vidore
baseline-results / Alibaba-NLP_gme-Qwen2-VL-7B-Instruct_metrics.json
QuentinJG's picture
Rename gme-qwen2-VL-7B_metrics.json to Alibaba-NLP_gme-Qwen2-VL-7B-Instruct_metrics.json
111cf45 verified
{"vidore/restaurant_esg_reports_beir": {
"ndcg_at_1": 0.64103,
"ndcg_at_3": 0.64687,
"ndcg_at_5": 0.65847,
"ndcg_at_10": 0.67679,
"ndcg_at_20": 0.71016,
"ndcg_at_50": 0.72701,
"ndcg_at_100": 0.73406,
"map_at_1": 0.46026,
"map_at_3": 0.57175,
"map_at_5": 0.59523,
"map_at_10": 0.60927,
"map_at_20": 0.62452,
"map_at_50": 0.63282,
"map_at_100": 0.63423,
"recall_at_1": 0.46026,
"recall_at_3": 0.63864,
"recall_at_5": 0.69602,
"recall_at_10": 0.74799,
"recall_at_20": 0.84622,
"recall_at_50": 0.89795,
"recall_at_100": 0.92823,
"precision_at_1": 0.65385,
"precision_at_3": 0.34615,
"precision_at_5": 0.24231,
"precision_at_10": 0.13846,
"precision_at_20": 0.08846,
"precision_at_50": 0.04154,
"precision_at_100": 0.02192,
"mrr_at_1": 0.6538461538461539,
"mrr_at_3": 0.6923076923076923,
"mrr_at_5": 0.7115384615384616,
"mrr_at_10": 0.7171474358974359,
"mrr_at_20": 0.7234039535026378,
"mrr_at_50": 0.7238847227334069,
"mrr_at_100": 0.7238847227334069,
"naucs_at_1_max": 0.23086039226273283,
"naucs_at_1_std": -0.016634064834066063,
"naucs_at_1_diff1": 0.60440655068253,
"naucs_at_3_max": 0.20374830655362478,
"naucs_at_3_std": 0.1517356986562186,
"naucs_at_3_diff1": 0.1431744010103715,
"naucs_at_5_max": 0.2679592702621421,
"naucs_at_5_std": 0.2521705461889716,
"naucs_at_5_diff1": -0.045946691724243385,
"naucs_at_10_max": 0.1870898752017782,
"naucs_at_10_std": 0.25497003931955176,
"naucs_at_10_diff1": -0.1785416897815052,
"naucs_at_20_max": 0.031666873348150236,
"naucs_at_20_std": 0.2722826958027661,
"naucs_at_20_diff1": -0.3138315596352517,
"naucs_at_50_max": -0.04799143250223134,
"naucs_at_50_std": 0.1803596755269399,
"naucs_at_50_diff1": -0.31674844717845624,
"naucs_at_100_max": -0.1026406780354188,
"naucs_at_100_std": 0.12833500881418947,
"naucs_at_100_diff1": -0.359625723150299
},
"vidore/synthetics_economics_macro_economy_2024_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.63793,
"ndcg_at_3": 0.60358,
"ndcg_at_5": 0.5617,
"ndcg_at_10": 0.54876,
"ndcg_at_20": 0.56065,
"ndcg_at_50": 0.62971,
"ndcg_at_100": 0.67396,
"map_at_1": 0.10999,
"map_at_3": 0.19136,
"map_at_5": 0.2376,
"map_at_10": 0.30609,
"map_at_20": 0.35432,
"map_at_50": 0.40973,
"map_at_100": 0.43775,
"recall_at_1": 0.10999,
"recall_at_3": 0.22603,
"recall_at_5": 0.30401,
"recall_at_10": 0.4303,
"recall_at_20": 0.55042,
"recall_at_50": 0.74983,
"recall_at_100": 0.88203,
"precision_at_1": 0.63793,
"precision_at_3": 0.54885,
"precision_at_5": 0.46897,
"precision_at_10": 0.38664,
"precision_at_20": 0.28427,
"precision_at_50": 0.18552,
"precision_at_100": 0.12345,
"mrr_at_1": 0.6379310344827587,
"mrr_at_3": 0.737787356321839,
"mrr_at_5": 0.7511494252873564,
"mrr_at_10": 0.7540400930487138,
"mrr_at_20": 0.756570482473086,
"mrr_at_50": 0.7575330442062826,
"mrr_at_100": 0.7575330442062826,
"naucs_at_1_max": 0.4257126936514001,
"naucs_at_1_std": 0.34640294199635524,
"naucs_at_1_diff1": 0.37250812182128895,
"naucs_at_3_max": 0.436869377310898,
"naucs_at_3_std": 0.3254658777431533,
"naucs_at_3_diff1": 0.18686449201114577,
"naucs_at_5_max": 0.45926648188576885,
"naucs_at_5_std": 0.35423096197625015,
"naucs_at_5_diff1": 0.12573019127851812,
"naucs_at_10_max": 0.44926493552514535,
"naucs_at_10_std": 0.3741317631320187,
"naucs_at_10_diff1": 0.07532540544382794,
"naucs_at_20_max": 0.36799714487565804,
"naucs_at_20_std": 0.36641452563256083,
"naucs_at_20_diff1": 0.010685735078934408,
"naucs_at_50_max": 0.26063240225409,
"naucs_at_50_std": 0.3039263594129317,
"naucs_at_50_diff1": -0.021803552442771462,
"naucs_at_100_max": 0.21506672441428243,
"naucs_at_100_std": 0.2735610974775767,
"naucs_at_100_diff1": -0.021701335107579767
},
"vidore/synthetic_axa_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.54167,
"ndcg_at_3": 0.54678,
"ndcg_at_5": 0.55352,
"ndcg_at_10": 0.57281,
"ndcg_at_20": 0.59252,
"ndcg_at_50": 0.66346,
"ndcg_at_100": 0.68529,
"map_at_1": 0.31073,
"map_at_3": 0.3756,
"map_at_5": 0.41287,
"map_at_10": 0.4587,
"map_at_20": 0.48045,
"map_at_50": 0.50712,
"map_at_100": 0.51369,
"recall_at_1": 0.31073,
"recall_at_3": 0.41064,
"recall_at_5": 0.49346,
"recall_at_10": 0.60971,
"recall_at_20": 0.68334,
"recall_at_50": 0.90884,
"recall_at_100": 0.97589,
"precision_at_1": 0.54167,
"precision_at_3": 0.35648,
"precision_at_5": 0.30278,
"precision_at_10": 0.22083,
"precision_at_20": 0.13681,
"precision_at_50": 0.07889,
"precision_at_100": 0.04486,
"mrr_at_1": 0.5416666666666666,
"mrr_at_3": 0.6412037037037037,
"mrr_at_5": 0.6599537037037037,
"mrr_at_10": 0.6656415343915344,
"mrr_at_20": 0.6674933862433863,
"mrr_at_50": 0.670311165327329,
"mrr_at_100": 0.670311165327329,
"naucs_at_1_max": -0.09759100819208395,
"naucs_at_1_std": -0.08519806914842944,
"naucs_at_1_diff1": 0.4528002784594793,
"naucs_at_3_max": 0.1594287481459452,
"naucs_at_3_std": 0.2562520943085768,
"naucs_at_3_diff1": 0.03918592900486676,
"naucs_at_5_max": 0.07827106986709774,
"naucs_at_5_std": 0.22597369682221727,
"naucs_at_5_diff1": -0.02847783770480081,
"naucs_at_10_max": -0.004198907072856447,
"naucs_at_10_std": 0.16878885222528184,
"naucs_at_10_diff1": -0.02732442585903392,
"naucs_at_20_max": -0.10164455853861176,
"naucs_at_20_std": 0.07367925373302965,
"naucs_at_20_diff1": 0.01263184714744086,
"naucs_at_50_max": -0.16844831805081548,
"naucs_at_50_std": 0.08562522976101725,
"naucs_at_50_diff1": -0.005775618813426175,
"naucs_at_100_max": -0.15639282554330194,
"naucs_at_100_std": 0.10767665269811233,
"naucs_at_100_diff1": -0.049551439044740866
},
"vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered": {
"ndcg_at_1": 0.6125,
"ndcg_at_3": 0.62922,
"ndcg_at_5": 0.64022,
"ndcg_at_10": 0.67793,
"ndcg_at_20": 0.69863,
"ndcg_at_50": 0.7253,
"ndcg_at_100": 0.73585,
"map_at_1": 0.38293,
"map_at_3": 0.51567,
"map_at_5": 0.55264,
"map_at_10": 0.58918,
"map_at_20": 0.60174,
"map_at_50": 0.61159,
"map_at_100": 0.61431,
"recall_at_1": 0.38293,
"recall_at_3": 0.58739,
"recall_at_5": 0.65315,
"recall_at_10": 0.76971,
"recall_at_20": 0.82782,
"recall_at_50": 0.90548,
"recall_at_100": 0.94091,
"precision_at_1": 0.6125,
"precision_at_3": 0.38958,
"precision_at_5": 0.28875,
"precision_at_10": 0.185,
"precision_at_20": 0.10531,
"precision_at_50": 0.05125,
"precision_at_100": 0.02819,
"mrr_at_1": 0.6125,
"mrr_at_3": 0.709375,
"mrr_at_5": 0.718125,
"mrr_at_10": 0.7282068452380952,
"mrr_at_20": 0.7292557962870463,
"mrr_at_50": 0.7306414011712925,
"mrr_at_100": 0.7306414011712925,
"naucs_at_1_max": 0.335261869985014,
"naucs_at_1_std": -0.4324032095908341,
"naucs_at_1_diff1": 0.5056338562513041,
"naucs_at_3_max": 0.20201103307583243,
"naucs_at_3_std": -0.1468269379237415,
"naucs_at_3_diff1": 0.001296082275490061,
"naucs_at_5_max": 0.14707400582846586,
"naucs_at_5_std": -0.050192104128278216,
"naucs_at_5_diff1": -0.07024868990211804,
"naucs_at_10_max": 0.10961197410842513,
"naucs_at_10_std": 0.07444076283774055,
"naucs_at_10_diff1": -0.17472590796866364,
"naucs_at_20_max": 0.05030964155750022,
"naucs_at_20_std": 0.1685571061631209,
"naucs_at_20_diff1": -0.22219600026128314,
"naucs_at_50_max": 0.0024275280377858447,
"naucs_at_50_std": 0.30314488535136513,
"naucs_at_50_diff1": -0.24616631170306078,
"naucs_at_100_max": -0.02497020999145598,
"naucs_at_100_std": 0.34401277035837946,
"naucs_at_100_diff1": -0.2607412653446645
},
"vidore/synthetic_rse_restaurant_filtered_v1.0": {
"ndcg_at_1": 0.52632,
"ndcg_at_3": 0.51159,
"ndcg_at_5": 0.54306,
"ndcg_at_10": 0.58832,
"ndcg_at_20": 0.61252,
"ndcg_at_50": 0.63797,
"ndcg_at_100": 0.65179,
"map_at_1": 0.25497,
"map_at_3": 0.38184,
"map_at_5": 0.42547,
"map_at_10": 0.46288,
"map_at_20": 0.48049,
"map_at_50": 0.49178,
"map_at_100": 0.49612,
"recall_at_1": 0.25497,
"recall_at_3": 0.46802,
"recall_at_5": 0.57162,
"recall_at_10": 0.71892,
"recall_at_20": 0.8038,
"recall_at_50": 0.8836,
"recall_at_100": 0.92832,
"precision_at_1": 0.52632,
"precision_at_3": 0.34503,
"precision_at_5": 0.27719,
"precision_at_10": 0.18772,
"precision_at_20": 0.11667,
"precision_at_50": 0.05825,
"precision_at_100": 0.03351,
"mrr_at_1": 0.5087719298245614,
"mrr_at_3": 0.6198830409356726,
"mrr_at_5": 0.6356725146198831,
"mrr_at_10": 0.6459551656920077,
"mrr_at_20": 0.6494489428699954,
"mrr_at_50": 0.6494489428699954,
"mrr_at_100": 0.6494489428699954,
"naucs_at_1_max": 0.20898501615503154,
"naucs_at_1_std": 0.19865951548817762,
"naucs_at_1_diff1": 0.12422954892869692,
"naucs_at_3_max": 0.13063907882379192,
"naucs_at_3_std": 0.1867268535200074,
"naucs_at_3_diff1": -0.010152821915993216,
"naucs_at_5_max": 0.12468822662836343,
"naucs_at_5_std": 0.24435864381053535,
"naucs_at_5_diff1": -0.14292016643588493,
"naucs_at_10_max": 0.09957214096099384,
"naucs_at_10_std": 0.1232178900347265,
"naucs_at_10_diff1": -0.18881049477524964,
"naucs_at_20_max": 0.10047824351341342,
"naucs_at_20_std": 0.050655985594202535,
"naucs_at_20_diff1": -0.18277757210761525,
"naucs_at_50_max": -0.0028577659697399107,
"naucs_at_50_std": 0.06785517706051693,
"naucs_at_50_diff1": -0.2805651145717775,
"naucs_at_100_max": -0.04677308705588518,
"naucs_at_100_std": 0.04887109949935036,
"naucs_at_100_diff1": -0.29480787024785843
},
"vidore/synthetic_rse_restaurant_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.55263,
"ndcg_at_3": 0.5385,
"ndcg_at_5": 0.5671,
"ndcg_at_10": 0.60965,
"ndcg_at_20": 0.63965,
"ndcg_at_50": 0.66076,
"ndcg_at_100": 0.67144,
"map_at_1": 0.27979,
"map_at_3": 0.403,
"map_at_5": 0.44628,
"map_at_10": 0.48694,
"map_at_20": 0.5079,
"map_at_50": 0.51864,
"map_at_100": 0.52233,
"recall_at_1": 0.27979,
"recall_at_3": 0.48229,
"recall_at_5": 0.59152,
"recall_at_10": 0.72148,
"recall_at_20": 0.82622,
"recall_at_50": 0.88972,
"recall_at_100": 0.92148,
"precision_at_1": 0.55263,
"precision_at_3": 0.35965,
"precision_at_5": 0.28246,
"precision_at_10": 0.19342,
"precision_at_20": 0.12105,
"precision_at_50": 0.05956,
"precision_at_100": 0.03346,
"mrr_at_1": 0.543859649122807,
"mrr_at_3": 0.6447368421052632,
"mrr_at_5": 0.6625000000000001,
"mrr_at_10": 0.6697176970203286,
"mrr_at_20": 0.6737127760154076,
"mrr_at_50": 0.6737127760154076,
"mrr_at_100": 0.6737127760154076,
"naucs_at_1_max": 0.2140943848106338,
"naucs_at_1_std": 0.23938427311315505,
"naucs_at_1_diff1": 0.1405473521309396,
"naucs_at_3_max": 0.1218141963405552,
"naucs_at_3_std": 0.18573191938786546,
"naucs_at_3_diff1": -0.008623274725985838,
"naucs_at_5_max": 0.12443788996710192,
"naucs_at_5_std": 0.19089201196588362,
"naucs_at_5_diff1": -0.10667362371792613,
"naucs_at_10_max": 0.06439702148586714,
"naucs_at_10_std": 0.08616401348743276,
"naucs_at_10_diff1": -0.14682566947769052,
"naucs_at_20_max": 0.011459892574334468,
"naucs_at_20_std": 0.011936648568131212,
"naucs_at_20_diff1": -0.17905730114267088,
"naucs_at_50_max": -0.039417393885098656,
"naucs_at_50_std": 0.01713010738519722,
"naucs_at_50_diff1": -0.25590773707473424,
"naucs_at_100_max": -0.07704226340939001,
"naucs_at_100_std": 0.00038016771434448113,
"naucs_at_100_diff1": -0.26337425866347924
},
"vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered_multilingual": {
"ndcg_at_1": 0.51406,
"ndcg_at_3": 0.52762,
"ndcg_at_5": 0.55104,
"ndcg_at_10": 0.58728,
"ndcg_at_20": 0.61616,
"ndcg_at_50": 0.64389,
"ndcg_at_100": 0.65766,
"map_at_1": 0.30278,
"map_at_3": 0.41955,
"map_at_5": 0.45948,
"map_at_10": 0.49458,
"map_at_20": 0.5094,
"map_at_50": 0.51865,
"map_at_100": 0.5216,
"recall_at_1": 0.30278,
"recall_at_3": 0.49228,
"recall_at_5": 0.58482,
"recall_at_10": 0.69499,
"recall_at_20": 0.78112,
"recall_at_50": 0.86891,
"recall_at_100": 0.92176,
"precision_at_1": 0.51406,
"precision_at_3": 0.33437,
"precision_at_5": 0.25813,
"precision_at_10": 0.16844,
"precision_at_20": 0.10031,
"precision_at_50": 0.04906,
"precision_at_100": 0.02725,
"mrr_at_1": 0.509375,
"mrr_at_3": 0.6041666666666667,
"mrr_at_5": 0.6216666666666667,
"mrr_at_10": 0.6328707837301588,
"mrr_at_20": 0.6363619415808972,
"mrr_at_50": 0.6378425889736316,
"mrr_at_100": 0.6379585768460532,
"naucs_at_1_max": 0.27523303979367053,
"naucs_at_1_std": -0.30760830303939224,
"naucs_at_1_diff1": 0.4221331299889948,
"naucs_at_3_max": 0.2864558859725673,
"naucs_at_3_std": -0.13868787899129562,
"naucs_at_3_diff1": 0.07688013246514978,
"naucs_at_5_max": 0.2631259003120805,
"naucs_at_5_std": -0.04478628121431216,
"naucs_at_5_diff1": 0.007887530373741765,
"naucs_at_10_max": 0.2318611860530291,
"naucs_at_10_std": 0.052621289911082725,
"naucs_at_10_diff1": -0.07510716470855315,
"naucs_at_20_max": 0.19912689437255834,
"naucs_at_20_std": 0.12677422040752656,
"naucs_at_20_diff1": -0.12302947838157144,
"naucs_at_50_max": 0.16880714786473955,
"naucs_at_50_std": 0.17376472772051615,
"naucs_at_50_diff1": -0.15388883088126787,
"naucs_at_100_max": 0.13818376420642023,
"naucs_at_100_std": 0.19026285189573108,
"naucs_at_100_diff1": -0.17332718731619928
},
"vidore/synthetic_axa_filtered_v1.0": {
"ndcg_at_1": 0.55556,
"ndcg_at_3": 0.61505,
"ndcg_at_5": 0.6072,
"ndcg_at_10": 0.60686,
"ndcg_at_20": 0.626,
"ndcg_at_50": 0.69007,
"ndcg_at_100": 0.71151,
"map_at_1": 0.29038,
"map_at_3": 0.39448,
"map_at_5": 0.4411,
"map_at_10": 0.48036,
"map_at_20": 0.50544,
"map_at_50": 0.53292,
"map_at_100": 0.53958,
"recall_at_1": 0.29038,
"recall_at_3": 0.44755,
"recall_at_5": 0.55414,
"recall_at_10": 0.6577,
"recall_at_20": 0.73138,
"recall_at_50": 0.91648,
"recall_at_100": 0.98291,
"precision_at_1": 0.55556,
"precision_at_3": 0.44444,
"precision_at_5": 0.35556,
"precision_at_10": 0.23333,
"precision_at_20": 0.14444,
"precision_at_50": 0.08111,
"precision_at_100": 0.04556,
"mrr_at_1": 0.5555555555555556,
"mrr_at_3": 0.6851851851851852,
"mrr_at_5": 0.6962962962962963,
"mrr_at_10": 0.7055555555555555,
"mrr_at_20": 0.7055555555555555,
"mrr_at_50": 0.7080808080808081,
"mrr_at_100": 0.7080808080808081,
"naucs_at_1_max": -0.1550749743136661,
"naucs_at_1_std": -0.2577900322133926,
"naucs_at_1_diff1": 0.3915048965349509,
"naucs_at_3_max": -0.009277695103048347,
"naucs_at_3_std": -0.013252529889708528,
"naucs_at_3_diff1": -0.14225756172879317,
"naucs_at_5_max": -0.019929062216098872,
"naucs_at_5_std": -0.016334128255812232,
"naucs_at_5_diff1": -0.07886984985074927,
"naucs_at_10_max": -0.12055344859502143,
"naucs_at_10_std": 0.10076623987025357,
"naucs_at_10_diff1": -0.058038138406430136,
"naucs_at_20_max": -0.20305200329246395,
"naucs_at_20_std": 0.04132903820914768,
"naucs_at_20_diff1": -0.051765679750717254,
"naucs_at_50_max": -0.260702587227321,
"naucs_at_50_std": 0.12470232476636918,
"naucs_at_50_diff1": -0.01753790576388771,
"naucs_at_100_max": -0.1821177255881445,
"naucs_at_100_std": 0.17569964744653446,
"naucs_at_100_diff1": -0.0027944975139212722
},
"vidore/synthetic_economics_macro_economy_2024_filtered_v1.0": {
"ndcg_at_1": 0.7069,
"ndcg_at_3": 0.67644,
"ndcg_at_5": 0.62879,
"ndcg_at_10": 0.61464,
"ndcg_at_20": 0.61321,
"ndcg_at_50": 0.67417,
"ndcg_at_100": 0.71849,
"map_at_1": 0.1217,
"map_at_3": 0.22011,
"map_at_5": 0.28081,
"map_at_10": 0.36154,
"map_at_20": 0.40785,
"map_at_50": 0.46311,
"map_at_100": 0.49362,
"recall_at_1": 0.1217,
"recall_at_3": 0.25759,
"recall_at_5": 0.34659,
"recall_at_10": 0.48032,
"recall_at_20": 0.58366,
"recall_at_50": 0.76561,
"recall_at_100": 0.8981,
"precision_at_1": 0.7069,
"precision_at_3": 0.62069,
"precision_at_5": 0.53103,
"precision_at_10": 0.43276,
"precision_at_20": 0.30172,
"precision_at_50": 0.19207,
"precision_at_100": 0.1281,
"mrr_at_1": 0.7068965517241379,
"mrr_at_3": 0.7931034482758621,
"mrr_at_5": 0.8051724137931034,
"mrr_at_10": 0.8073275862068966,
"mrr_at_20": 0.808764367816092,
"mrr_at_50": 0.8093588981371383,
"mrr_at_100": 0.8093588981371383,
"naucs_at_1_max": 0.4018993463844229,
"naucs_at_1_std": 0.1998840385213374,
"naucs_at_1_diff1": 0.41579715716899845,
"naucs_at_3_max": 0.481583737842091,
"naucs_at_3_std": 0.3483946306936104,
"naucs_at_3_diff1": 0.1634049527424186,
"naucs_at_5_max": 0.5683890127671222,
"naucs_at_5_std": 0.39505149464580597,
"naucs_at_5_diff1": 0.14631736355547817,
"naucs_at_10_max": 0.49930403622845404,
"naucs_at_10_std": 0.43506160583491765,
"naucs_at_10_diff1": -0.039974194143169446,
"naucs_at_20_max": 0.39595200496740807,
"naucs_at_20_std": 0.3766890283449539,
"naucs_at_20_diff1": -0.020975783016964022,
"naucs_at_50_max": 0.277708648482758,
"naucs_at_50_std": 0.3220573632505197,
"naucs_at_50_diff1": -0.05211198121096371,
"naucs_at_100_max": 0.19921588610583,
"naucs_at_100_std": 0.2616760394187849,
"naucs_at_100_diff1": -0.06514630531185855
}, "arxivqa_test_subsampled": {"ndcg_at_1": 0.83, "ndcg_at_3": 0.86645, "ndcg_at_5": 0.87575, "ndcg_at_10": 0.88357, "ndcg_at_20": 0.89031, "ndcg_at_50": 0.89361, "ndcg_at_100": 0.89491, "map_at_1": 0.83, "map_at_3": 0.857, "map_at_5": 0.8623, "map_at_10": 0.86557, "map_at_20": 0.86751, "map_at_50": 0.86809, "map_at_100": 0.86821, "recall_at_1": 0.83, "recall_at_3": 0.894, "recall_at_5": 0.916, "recall_at_10": 0.94, "recall_at_20": 0.966, "recall_at_50": 0.982, "recall_at_100": 0.99, "precision_at_1": 0.83, "precision_at_3": 0.298, "precision_at_5": 0.1832, "precision_at_10": 0.094, "precision_at_20": 0.0483, "precision_at_50": 0.01964, "precision_at_100": 0.0099, "mrr_at_1": 0.83, "mrr_at_3": 0.8569999999999998, "mrr_at_5": 0.8622999999999997, "mrr_at_10": 0.8655468253968253, "mrr_at_20": 0.86748706375324, "mrr_at_50": 0.8680725159027165, "mrr_at_100": 0.8681857764435326, "naucs_at_1_max": 0.858646311023393, "naucs_at_1_std": -0.4664247982817673, "naucs_at_1_diff1": 0.9168804783189187, "naucs_at_3_max": 0.8260919723834373, "naucs_at_3_std": -0.4909749214631783, "naucs_at_3_diff1": 0.8531405853433434, "naucs_at_5_max": 0.8605108710150751, "naucs_at_5_std": -0.5535658707927644, "naucs_at_5_diff1": 0.8712818460717628, "naucs_at_10_max": 0.8889666977902271, "naucs_at_10_std": -0.31691565515095205, "naucs_at_10_diff1": 0.8706037970743868, "naucs_at_20_max": 0.8959191519745157, "naucs_at_20_std": -0.38292964244521194, "naucs_at_20_diff1": 0.8699675948810928, "naucs_at_50_max": 0.9110384894698567, "naucs_at_50_std": -0.5361552028218665, "naucs_at_50_diff1": 0.8615001556178092, "naucs_at_100_max": 0.8921568627450854, "naucs_at_100_std": -0.12110177404293686, "naucs_at_100_diff1": 0.8585434173669326}, "docvqa_test_subsampled": {"ndcg_at_1": 0.48337, "ndcg_at_3": 0.54361, "ndcg_at_5": 0.56631, "ndcg_at_10": 0.5856, "ndcg_at_20": 0.6002, "ndcg_at_50": 0.61575, "ndcg_at_100": 0.62417, "map_at_1": 0.48337, "map_at_3": 0.52846, "map_at_5": 0.54098, "map_at_10": 0.5489, "map_at_20": 0.55291, "map_at_50": 0.55547, "map_at_100": 0.55625, "recall_at_1": 0.48337, "recall_at_3": 0.58758, "recall_at_5": 0.64302, "recall_at_10": 0.70288, "recall_at_20": 0.76053, "recall_at_50": 0.83814, "recall_at_100": 0.88914, "precision_at_1": 0.48337, "precision_at_3": 0.19586, "precision_at_5": 0.1286, "precision_at_10": 0.07029, "precision_at_20": 0.03803, "precision_at_50": 0.01676, "precision_at_100": 0.00889, "mrr_at_1": 0.48337028824833705, "mrr_at_3": 0.5291943828529196, "mrr_at_5": 0.5411677753141169, "mrr_at_10": 0.5491280399816985, "mrr_at_20": 0.5531358543364028, "mrr_at_50": 0.5556861079139531, "mrr_at_100": 0.5564702079607413, "naucs_at_1_max": 0.6938261034813209, "naucs_at_1_std": -0.44374096550677106, "naucs_at_1_diff1": 0.8515620837021114, "naucs_at_3_max": 0.6668377646579767, "naucs_at_3_std": -0.4652816060602179, "naucs_at_3_diff1": 0.7505735469305647, "naucs_at_5_max": 0.724457489986816, "naucs_at_5_std": -0.3958839295973761, "naucs_at_5_diff1": 0.7229459462048281, "naucs_at_10_max": 0.676802465865959, "naucs_at_10_std": -0.46150705683769994, "naucs_at_10_diff1": 0.6592299253063731, "naucs_at_20_max": 0.6600327112639377, "naucs_at_20_std": -0.4913360754229586, "naucs_at_20_diff1": 0.6173724312440265, "naucs_at_50_max": 0.7107726516332598, "naucs_at_50_std": -0.30342460889725203, "naucs_at_50_diff1": 0.5878153412566914, "naucs_at_100_max": 0.7701174556499546, "naucs_at_100_std": -0.20534787776527266, "naucs_at_100_diff1": 0.6057647818477818}, "infovqa_test_subsampled": {"ndcg_at_1": 0.88462, "ndcg_at_3": 0.91491, "ndcg_at_5": 0.92388, "ndcg_at_10": 0.92663, "ndcg_at_20": 0.92863, "ndcg_at_50": 0.93028, "ndcg_at_100": 0.93091, "map_at_1": 0.88462, "map_at_3": 0.90722, "map_at_5": 0.91208, "map_at_10": 0.9133, "map_at_20": 0.91382, "map_at_50": 0.91409, "map_at_100": 0.91414, "recall_at_1": 0.88462, "recall_at_3": 0.93725, "recall_at_5": 0.95951, "recall_at_10": 0.96761, "recall_at_20": 0.97571, "recall_at_50": 0.98381, "recall_at_100": 0.98785, "precision_at_1": 0.88462, "precision_at_3": 0.31242, "precision_at_5": 0.1919, "precision_at_10": 0.09676, "precision_at_20": 0.04879, "precision_at_50": 0.01968, "precision_at_100": 0.00988, "mrr_at_1": 0.8846153846153846, "mrr_at_3": 0.9072199730094463, "mrr_at_5": 0.9120782726045881, "mrr_at_10": 0.9132952573742044, "mrr_at_20": 0.9138156422643798, "mrr_at_50": 0.9140889280185405, "mrr_at_100": 0.9141383515097923, "naucs_at_1_max": 0.7812468890694119, "naucs_at_1_std": -0.08799174841521007, "naucs_at_1_diff1": 0.9456266707519869, "naucs_at_3_max": 0.8372432713037467, "naucs_at_3_std": 0.03838532830090231, "naucs_at_3_diff1": 0.9282000807550249, "naucs_at_5_max": 0.9295244758349281, "naucs_at_5_std": 0.1803114561450837, "naucs_at_5_diff1": 0.9665230450023843, "naucs_at_10_max": 0.9292640159035281, "naucs_at_10_std": 0.10340232903541587, "naucs_at_10_diff1": 0.9581538062529754, "naucs_at_20_max": 0.9659719840147728, "naucs_at_20_std": 0.2981831051495906, "naucs_at_20_diff1": 0.94420507500398, "naucs_at_50_max": 0.9836748182418981, "naucs_at_50_std": 0.36839391253502923, "naucs_at_50_diff1": 0.9326327942640528, "naucs_at_100_max": 0.9782330909892136, "naucs_at_100_std": 0.3783664086797387, "naucs_at_100_diff1": 0.9101770590187516}, "shiftproject_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.97262, "ndcg_at_5": 0.97262, "ndcg_at_10": 0.97262, "ndcg_at_20": 0.97518, "ndcg_at_50": 0.97518, "ndcg_at_100": 0.97518, "map_at_1": 0.95, "map_at_3": 0.96667, "map_at_5": 0.96667, "map_at_10": 0.96667, "map_at_20": 0.96738, "map_at_50": 0.96738, "map_at_100": 0.96738, "recall_at_1": 0.95, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.9666666666666667, "mrr_at_5": 0.9666666666666667, "mrr_at_10": 0.9666666666666667, "mrr_at_20": 0.9673809523809525, "mrr_at_50": 0.9673809523809525, "mrr_at_100": 0.9673809523809525, "naucs_at_1_max": 0.5981325863678774, "naucs_at_1_std": -0.3083099906629302, "naucs_at_1_diff1": 0.8921568627450971, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.35807656395889226, "naucs_at_3_diff1": 0.7222222222222157, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.35807656395891135, "naucs_at_5_diff1": 0.7222222222222276, "naucs_at_10_max": 1.0, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 0.7222222222222276, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.99, "ndcg_at_3": 0.99631, "ndcg_at_5": 0.99631, "ndcg_at_10": 0.99631, "ndcg_at_20": 0.99631, "ndcg_at_50": 0.99631, "ndcg_at_100": 0.99631, "map_at_1": 0.99, "map_at_3": 0.995, "map_at_5": 0.995, "map_at_10": 0.995, "map_at_20": 0.995, "map_at_50": 0.995, "map_at_100": 0.995, "recall_at_1": 0.99, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.995, "mrr_at_5": 0.995, "mrr_at_10": 0.995, "mrr_at_20": 0.995, "mrr_at_50": 0.995, "mrr_at_100": 0.995, "naucs_at_1_max": 1.0, "naucs_at_1_std": 0.8692810457516276, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.95893, "ndcg_at_10": 0.96831, "ndcg_at_20": 0.96831, "ndcg_at_50": 0.96831, "ndcg_at_100": 0.96831, "map_at_1": 0.94, "map_at_3": 0.955, "map_at_5": 0.955, "map_at_10": 0.95868, "map_at_20": 0.95868, "map_at_50": 0.95868, "map_at_100": 0.95868, "recall_at_1": 0.94, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.94, "mrr_at_3": 0.955, "mrr_at_5": 0.955, "mrr_at_10": 0.9586785714285714, "mrr_at_20": 0.9586785714285714, "mrr_at_50": 0.9586785714285714, "mrr_at_100": 0.9586785714285714, "naucs_at_1_max": 0.7394179894179895, "naucs_at_1_std": -0.7495331465919671, "naucs_at_1_diff1": 0.9346405228758177, "naucs_at_3_max": 1.0, "naucs_at_3_std": -1.1517273576097098, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 1.0, "naucs_at_5_std": -1.1517273576097071, "naucs_at_5_diff1": 0.9564270152505424, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.99, "ndcg_at_3": 0.995, "ndcg_at_5": 0.995, "ndcg_at_10": 0.995, "ndcg_at_20": 0.995, "ndcg_at_50": 0.995, "ndcg_at_100": 0.995, "map_at_1": 0.99, "map_at_3": 0.99333, "map_at_5": 0.99333, "map_at_10": 0.99333, "map_at_20": 0.99333, "map_at_50": 0.99333, "map_at_100": 0.99333, "recall_at_1": 0.99, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.9933333333333334, "mrr_at_5": 0.9933333333333334, "mrr_at_10": 0.9933333333333334, "mrr_at_20": 0.9933333333333334, "mrr_at_50": 0.9933333333333334, "mrr_at_100": 0.9933333333333334, "naucs_at_1_max": 0.7222222222222201, "naucs_at_1_std": -1.739962651727339, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.99, "ndcg_at_3": 0.99631, "ndcg_at_5": 0.99631, "ndcg_at_10": 0.99631, "ndcg_at_20": 0.99631, "ndcg_at_50": 0.99631, "ndcg_at_100": 0.99631, "map_at_1": 0.99, "map_at_3": 0.995, "map_at_5": 0.995, "map_at_10": 0.995, "map_at_20": 0.995, "map_at_50": 0.995, "map_at_100": 0.995, "recall_at_1": 0.99, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.995, "mrr_at_5": 0.995, "mrr_at_10": 0.995, "mrr_at_20": 0.995, "mrr_at_50": 0.995, "mrr_at_100": 0.995, "naucs_at_1_max": 1.0, "naucs_at_1_std": 0.7222222222222201, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.9, "ndcg_at_3": 0.9355, "ndcg_at_5": 0.9458, "ndcg_at_10": 0.94692, "ndcg_at_20": 0.94874, "ndcg_at_50": 0.94943, "ndcg_at_100": 0.94943, "map_at_1": 0.9, "map_at_3": 0.92679, "map_at_5": 0.9325, "map_at_10": 0.93295, "map_at_20": 0.93344, "map_at_50": 0.93355, "map_at_100": 0.93355, "recall_at_1": 0.9, "recall_at_3": 0.96071, "recall_at_5": 0.98571, "recall_at_10": 0.98929, "recall_at_20": 0.99643, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.32024, "precision_at_5": 0.19714, "precision_at_10": 0.09893, "precision_at_20": 0.04982, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.9, "mrr_at_3": 0.9267857142857143, "mrr_at_5": 0.9317857142857144, "mrr_at_10": 0.9328273809523809, "mrr_at_20": 0.9333253205128206, "mrr_at_50": 0.9334303625296272, "mrr_at_100": 0.9334303625296272, "naucs_at_1_max": 0.7993697478991594, "naucs_at_1_std": 0.48374349739895933, "naucs_at_1_diff1": 0.9092136854741905, "naucs_at_3_max": 0.9881164587046973, "naucs_at_3_std": 0.7182327476445171, "naucs_at_3_diff1": 0.9524658348187793, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.865196078431377, "naucs_at_5_diff1": 0.9346405228758147, "naucs_at_10_max": 1.0, "naucs_at_10_std": 0.9128540305011011, "naucs_at_10_diff1": 0.9128540305011011, "naucs_at_20_max": 1.0, "naucs_at_20_std": 0.8692810457516478, "naucs_at_20_diff1": 0.8692810457516478, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0}, "tatdqa_test": {"ndcg_at_1": 0.63913, "ndcg_at_3": 0.73348, "ndcg_at_5": 0.76119, "ndcg_at_10": 0.77795, "ndcg_at_20": 0.78597, "ndcg_at_50": 0.7922, "ndcg_at_100": 0.79398, "map_at_1": 0.63913, "map_at_3": 0.71061, "map_at_5": 0.72595, "map_at_10": 0.73302, "map_at_20": 0.73532, "map_at_50": 0.73641, "map_at_100": 0.73656, "recall_at_1": 0.63913, "recall_at_3": 0.79951, "recall_at_5": 0.86695, "recall_at_10": 0.91798, "recall_at_20": 0.94897, "recall_at_50": 0.97934, "recall_at_100": 0.99028, "precision_at_1": 0.63913, "precision_at_3": 0.2665, "precision_at_5": 0.17339, "precision_at_10": 0.0918, "precision_at_20": 0.04745, "precision_at_50": 0.01959, "precision_at_100": 0.0099, "mrr_at_1": 0.6391251518833536, "mrr_at_3": 0.7106115836371002, "mrr_at_5": 0.7256480356419612, "mrr_at_10": 0.7328699685625567, "mrr_at_20": 0.7350934149912067, "mrr_at_50": 0.7362113667635196, "mrr_at_100": 0.7363668461888006, "naucs_at_1_max": 0.5938210540556276, "naucs_at_1_std": -0.16049718029718266, "naucs_at_1_diff1": 0.7609838065385791, "naucs_at_3_max": 0.6431538299660706, "naucs_at_3_std": -0.08130035312037115, "naucs_at_3_diff1": 0.6646930087024925, "naucs_at_5_max": 0.6677283839470244, "naucs_at_5_std": -0.04183840532339394, "naucs_at_5_diff1": 0.6386845987046517, "naucs_at_10_max": 0.7170537401608961, "naucs_at_10_std": 0.029973785816548366, "naucs_at_10_diff1": 0.6180179387252528, "naucs_at_20_max": 0.7286213852092154, "naucs_at_20_std": 0.267131652053233, "naucs_at_20_diff1": 0.5728179001757068, "naucs_at_50_max": 0.8183455244679178, "naucs_at_50_std": 0.6268074134337968, "naucs_at_50_diff1": 0.540539174474304, "naucs_at_100_max": 0.9384258483213721, "naucs_at_100_std": 0.9047196439054298, "naucs_at_100_diff1": 0.6317586644999579}}