vidore
baseline-results / llamaindex_vdr-2b-multi-v1_metrics.json
QuentinJG's picture
Rename dse-llamaindex_metrics.json to llamaindex_vdr-2b-multi-v1_metrics.json
b87b597 verified
{
"metadata": {
"timestamp": "2025-03-12T16:10:17.229454",
"vidore_benchmark_version": "5.0.1.dev5+g1548c2d.d20250312"
},
"metrics": {
"vidore/restaurant_esg_reports_beir":{"ndcg_at_1": 0.51923, "ndcg_at_3": 0.58018, "ndcg_at_5": 0.63101, "ndcg_at_10": 0.65274, "ndcg_at_20": 0.67047, "ndcg_at_50": 0.69998, "ndcg_at_100": 0.70624, "map_at_1": 0.39872, "map_at_3": 0.50689, "map_at_5": 0.55161, "map_at_10": 0.57265, "map_at_20": 0.57995, "map_at_50": 0.58983, "map_at_100": 0.59139, "recall_at_1": 0.39872, "recall_at_3": 0.58333, "recall_at_5": 0.71781, "recall_at_10": 0.77967, "recall_at_20": 0.83563, "recall_at_50": 0.93341, "recall_at_100": 0.9592, "precision_at_1": 0.51923, "precision_at_3": 0.3141, "precision_at_5": 0.25385, "precision_at_10": 0.14423, "precision_at_20": 0.08173, "precision_at_50": 0.04154, "precision_at_100": 0.02192, "mrr_at_1": 0.5192307692307693, "mrr_at_3": 0.625, "mrr_at_5": 0.6567307692307692, "mrr_at_10": 0.6616147741147741, "mrr_at_20": 0.6659385753135751, "mrr_at_50": 0.6668543262293262, "mrr_at_100": 0.6668543262293262, "naucs_at_1_max": 0.5694483724614683, "naucs_at_1_std": 0.3722365643641625, "naucs_at_1_diff1": 0.6630923090626469, "naucs_at_3_max": 0.4284312717311841, "naucs_at_3_std": 0.3043188529887843, "naucs_at_3_diff1": 0.23222932871139657, "naucs_at_5_max": 0.1957258984917592, "naucs_at_5_std": 0.21172075191616438, "naucs_at_5_diff1": 0.12899378896152203, "naucs_at_10_max": 0.1911079264065253, "naucs_at_10_std": 0.18818758953070164, "naucs_at_10_diff1": 0.11397146879551144, "naucs_at_20_max": 0.12239979011786752, "naucs_at_20_std": 0.23716115395048135, "naucs_at_20_diff1": -0.004465965987469208, "naucs_at_50_max": 0.024713092790819645, "naucs_at_50_std": 0.23994428769410767, "naucs_at_50_diff1": -0.11651821615679771, "naucs_at_100_max": 0.0124502351274068, "naucs_at_100_std": 0.26711349464594386, "naucs_at_100_diff1": -0.1312476583819651},
"vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered": {
"ndcg_at_1": 0.5875,
"ndcg_at_3": 0.57886,
"ndcg_at_5": 0.60632,
"ndcg_at_10": 0.64636,
"ndcg_at_20": 0.67153,
"ndcg_at_50": 0.69612,
"ndcg_at_100": 0.70593,
"map_at_1": 0.35444,
"map_at_3": 0.47216,
"map_at_5": 0.51385,
"map_at_10": 0.5521,
"map_at_20": 0.56741,
"map_at_50": 0.57534,
"map_at_100": 0.57786,
"recall_at_1": 0.35444,
"recall_at_3": 0.53706,
"recall_at_5": 0.63063,
"recall_at_10": 0.746,
"recall_at_20": 0.81806,
"recall_at_50": 0.89598,
"recall_at_100": 0.92916,
"precision_at_1": 0.5875,
"precision_at_3": 0.35,
"precision_at_5": 0.27375,
"precision_at_10": 0.17938,
"precision_at_20": 0.10406,
"precision_at_50": 0.04975,
"precision_at_100": 0.02712,
"mrr_at_1": 0.5875,
"mrr_at_3": 0.6812499999999998,
"mrr_at_5": 0.6974999999999999,
"mrr_at_10": 0.7047966269841268,
"mrr_at_20": 0.7070646769166504,
"mrr_at_50": 0.708239325858248,
"mrr_at_100": 0.708239325858248,
"naucs_at_1_max": 0.31355746190850764,
"naucs_at_1_std": 0.04934974666642292,
"naucs_at_1_diff1": 0.32442245431764555,
"naucs_at_3_max": 0.25327223124789267,
"naucs_at_3_std": 0.05050118014897422,
"naucs_at_3_diff1": -0.06632845947133419,
"naucs_at_5_max": 0.1851610600189157,
"naucs_at_5_std": 0.058061097188027676,
"naucs_at_5_diff1": -0.22026563034766006,
"naucs_at_10_max": 0.15422295635424052,
"naucs_at_10_std": 0.14164030177658773,
"naucs_at_10_diff1": -0.27788248939794785,
"naucs_at_20_max": 0.16130977100298727,
"naucs_at_20_std": 0.18430855044448158,
"naucs_at_20_diff1": -0.280205660946133,
"naucs_at_50_max": 0.16350693053715493,
"naucs_at_50_std": 0.1988648228596469,
"naucs_at_50_diff1": -0.29375789506115757,
"naucs_at_100_max": 0.12559087058175492,
"naucs_at_100_std": 0.1908762696794704,
"naucs_at_100_diff1": -0.29440996763870275
},
"vidore/synthetic_economics_macro_economy_2024_filtered_v1.0": {
"ndcg_at_1": 0.63793,
"ndcg_at_3": 0.64059,
"ndcg_at_5": 0.61184,
"ndcg_at_10": 0.57183,
"ndcg_at_20": 0.57051,
"ndcg_at_50": 0.63089,
"ndcg_at_100": 0.6723,
"map_at_1": 0.07983,
"map_at_3": 0.17978,
"map_at_5": 0.2441,
"map_at_10": 0.31313,
"map_at_20": 0.35443,
"map_at_50": 0.40694,
"map_at_100": 0.43529,
"recall_at_1": 0.07983,
"recall_at_3": 0.22913,
"recall_at_5": 0.33164,
"recall_at_10": 0.46515,
"recall_at_20": 0.5659,
"recall_at_50": 0.75345,
"recall_at_100": 0.87683,
"precision_at_1": 0.63793,
"precision_at_3": 0.61494,
"precision_at_5": 0.55172,
"precision_at_10": 0.41552,
"precision_at_20": 0.29138,
"precision_at_50": 0.18586,
"precision_at_100": 0.12259,
"mrr_at_1": 0.6379310344827587,
"mrr_at_3": 0.7442528735632185,
"mrr_at_5": 0.7563218390804599,
"mrr_at_10": 0.7609195402298853,
"mrr_at_20": 0.7622458001768349,
"mrr_at_50": 0.7622458001768349,
"mrr_at_100": 0.7622458001768349,
"naucs_at_1_max": 0.419786473109018,
"naucs_at_1_std": 0.07500623692548537,
"naucs_at_1_diff1": 0.4755949058401351,
"naucs_at_3_max": 0.4608801956792379,
"naucs_at_3_std": 0.2349462202876775,
"naucs_at_3_diff1": 0.31316278322181995,
"naucs_at_5_max": 0.4690855783002276,
"naucs_at_5_std": 0.2973952058315693,
"naucs_at_5_diff1": 0.16569197052909665,
"naucs_at_10_max": 0.397719696512456,
"naucs_at_10_std": 0.20885173965188283,
"naucs_at_10_diff1": 0.1402583115094283,
"naucs_at_20_max": 0.32542799732914507,
"naucs_at_20_std": 0.2747074671470978,
"naucs_at_20_diff1": 0.13356908650626761,
"naucs_at_50_max": 0.23125915204560868,
"naucs_at_50_std": 0.1985419001386752,
"naucs_at_50_diff1": 0.1130455594831512,
"naucs_at_100_max": 0.1664221082762526,
"naucs_at_100_std": 0.10618063335238329,
"naucs_at_100_diff1": 0.1352860324631496
},
"vidore/synthetic_rse_restaurant_filtered_v1.0": {
"ndcg_at_1": 0.47368,
"ndcg_at_3": 0.47849,
"ndcg_at_5": 0.50343,
"ndcg_at_10": 0.5406,
"ndcg_at_20": 0.5836,
"ndcg_at_50": 0.61317,
"ndcg_at_100": 0.62789,
"map_at_1": 0.24712,
"map_at_3": 0.35036,
"map_at_5": 0.39048,
"map_at_10": 0.42533,
"map_at_20": 0.44764,
"map_at_50": 0.46192,
"map_at_100": 0.46709,
"recall_at_1": 0.24712,
"recall_at_3": 0.4477,
"recall_at_5": 0.534,
"recall_at_10": 0.65675,
"recall_at_20": 0.79217,
"recall_at_50": 0.87327,
"recall_at_100": 0.92452,
"precision_at_1": 0.47368,
"precision_at_3": 0.30994,
"precision_at_5": 0.24912,
"precision_at_10": 0.16667,
"precision_at_20": 0.10965,
"precision_at_50": 0.0586,
"precision_at_100": 0.03368,
"mrr_at_1": 0.47368421052631576,
"mrr_at_3": 0.5935672514619882,
"mrr_at_5": 0.6058479532163742,
"mrr_at_10": 0.6139376218323586,
"mrr_at_20": 0.618968190678717,
"mrr_at_50": 0.618968190678717,
"mrr_at_100": 0.6191509392167286,
"naucs_at_1_max": 0.22880110629191316,
"naucs_at_1_std": 0.4807654764575437,
"naucs_at_1_diff1": -0.010719521336880587,
"naucs_at_3_max": 0.22255470892079213,
"naucs_at_3_std": 0.26274414406533403,
"naucs_at_3_diff1": 0.19901703943731583,
"naucs_at_5_max": 0.08307200612000629,
"naucs_at_5_std": 0.3427446267225359,
"naucs_at_5_diff1": 0.03454799044188057,
"naucs_at_10_max": 0.006208463905274427,
"naucs_at_10_std": 0.26435566331213406,
"naucs_at_10_diff1": -0.0015237809655790574,
"naucs_at_20_max": 0.04503846394507633,
"naucs_at_20_std": 0.2139553060662944,
"naucs_at_20_diff1": 0.018201732846277442,
"naucs_at_50_max": 0.11296962958977845,
"naucs_at_50_std": 0.2632828869514633,
"naucs_at_50_diff1": -0.08837586390454706,
"naucs_at_100_max": 0.11868484014177574,
"naucs_at_100_std": 0.2734455942211179,
"naucs_at_100_diff1": -0.13438673026134765
},
"vidore/synthetic_axa_filtered_v1.0": {
"ndcg_at_1": 0.72222,
"ndcg_at_3": 0.72275,
"ndcg_at_5": 0.68826,
"ndcg_at_10": 0.69213,
"ndcg_at_20": 0.71771,
"ndcg_at_50": 0.77886,
"ndcg_at_100": 0.79636,
"map_at_1": 0.40655,
"map_at_3": 0.4985,
"map_at_5": 0.53149,
"map_at_10": 0.58441,
"map_at_20": 0.61212,
"map_at_50": 0.64312,
"map_at_100": 0.64864,
"recall_at_1": 0.40655,
"recall_at_3": 0.55187,
"recall_at_5": 0.6011,
"recall_at_10": 0.69041,
"recall_at_20": 0.76244,
"recall_at_50": 0.92875,
"recall_at_100": 0.99383,
"precision_at_1": 0.72222,
"precision_at_3": 0.46296,
"precision_at_5": 0.34444,
"precision_at_10": 0.23889,
"precision_at_20": 0.15556,
"precision_at_50": 0.08778,
"precision_at_100": 0.04722,
"mrr_at_1": 0.7222222222222222,
"mrr_at_3": 0.8333333333333334,
"mrr_at_5": 0.8333333333333334,
"mrr_at_10": 0.8333333333333334,
"mrr_at_20": 0.8333333333333334,
"mrr_at_50": 0.834625322997416,
"mrr_at_100": 0.834625322997416,
"naucs_at_1_max": 0.284337842431283,
"naucs_at_1_std": -0.5569014279726879,
"naucs_at_1_diff1": 0.9294297019639159,
"naucs_at_3_max": 0.4036627064656008,
"naucs_at_3_std": -0.258134268334631,
"naucs_at_3_diff1": 0.03319977085307361,
"naucs_at_5_max": 0.3391788551975161,
"naucs_at_5_std": -0.13525790110164213,
"naucs_at_5_diff1": -0.08545811027367776,
"naucs_at_10_max": 0.17205834819466667,
"naucs_at_10_std": -0.16161238583793947,
"naucs_at_10_diff1": -0.12294732613259658,
"naucs_at_20_max": 0.019221817231354917,
"naucs_at_20_std": -0.18190133460121213,
"naucs_at_20_diff1": -0.26196318223082454,
"naucs_at_50_max": -0.1775802959975011,
"naucs_at_50_std": -0.29278726428875046,
"naucs_at_50_diff1": -0.3578106924688144,
"naucs_at_100_max": -0.11947266465729971,
"naucs_at_100_std": -0.30339673690269625,
"naucs_at_100_diff1": -0.3387677917417156
},
"vidore/synthetic_rse_restaurant_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.47368,
"ndcg_at_3": 0.48112,
"ndcg_at_5": 0.51162,
"ndcg_at_10": 0.55587,
"ndcg_at_20": 0.593,
"ndcg_at_50": 0.62154,
"ndcg_at_100": 0.63537,
"map_at_1": 0.23657,
"map_at_3": 0.3516,
"map_at_5": 0.39581,
"map_at_10": 0.43424,
"map_at_20": 0.45458,
"map_at_50": 0.46799,
"map_at_100": 0.47342,
"recall_at_1": 0.23657,
"recall_at_3": 0.44464,
"recall_at_5": 0.5509,
"recall_at_10": 0.69046,
"recall_at_20": 0.80641,
"recall_at_50": 0.88589,
"recall_at_100": 0.92726,
"precision_at_1": 0.47368,
"precision_at_3": 0.31871,
"precision_at_5": 0.25526,
"precision_at_10": 0.17412,
"precision_at_20": 0.11096,
"precision_at_50": 0.05851,
"precision_at_100": 0.0339,
"mrr_at_1": 0.47368421052631576,
"mrr_at_3": 0.5921052631578947,
"mrr_at_5": 0.6070175438596491,
"mrr_at_10": 0.618662976886661,
"mrr_at_20": 0.623367953282814,
"mrr_at_50": 0.6237087980633872,
"mrr_at_100": 0.6237544851978901,
"naucs_at_1_max": 0.030121925389534895,
"naucs_at_1_std": 0.22985280707732192,
"naucs_at_1_diff1": 0.09815343782760363,
"naucs_at_3_max": 0.08222682574140788,
"naucs_at_3_std": 0.20242031488104395,
"naucs_at_3_diff1": 0.043651752642660774,
"naucs_at_5_max": 0.029502810692712066,
"naucs_at_5_std": 0.2552530112160377,
"naucs_at_5_diff1": -0.01488436104125662,
"naucs_at_10_max": 0.043798500718208115,
"naucs_at_10_std": 0.25656427779016705,
"naucs_at_10_diff1": -0.03213615749189139,
"naucs_at_20_max": 0.08483260560870946,
"naucs_at_20_std": 0.22759171353041793,
"naucs_at_20_diff1": -0.013226797212940839,
"naucs_at_50_max": 0.145727206325556,
"naucs_at_50_std": 0.2840662881602473,
"naucs_at_50_diff1": -0.12024084882915537,
"naucs_at_100_max": 0.16860092521171394,
"naucs_at_100_std": 0.324767526336977,
"naucs_at_100_diff1": -0.15998217261471917
},
"vidore/synthetic_axa_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.63889,
"ndcg_at_3": 0.62932,
"ndcg_at_5": 0.61003,
"ndcg_at_10": 0.62475,
"ndcg_at_20": 0.64803,
"ndcg_at_50": 0.71389,
"ndcg_at_100": 0.73221,
"map_at_1": 0.33774,
"map_at_3": 0.42553,
"map_at_5": 0.46128,
"map_at_10": 0.51109,
"map_at_20": 0.5344,
"map_at_50": 0.56198,
"map_at_100": 0.56769,
"recall_at_1": 0.33774,
"recall_at_3": 0.48038,
"recall_at_5": 0.55351,
"recall_at_10": 0.66626,
"recall_at_20": 0.72903,
"recall_at_50": 0.91514,
"recall_at_100": 0.97653,
"precision_at_1": 0.63889,
"precision_at_3": 0.40741,
"precision_at_5": 0.31111,
"precision_at_10": 0.21806,
"precision_at_20": 0.13889,
"precision_at_50": 0.08222,
"precision_at_100": 0.04542,
"mrr_at_1": 0.6388888888888888,
"mrr_at_3": 0.75,
"mrr_at_5": 0.7569444444444444,
"mrr_at_10": 0.7609126984126985,
"mrr_at_20": 0.7621753246753247,
"mrr_at_50": 0.7634990122224704,
"mrr_at_100": 0.7634990122224704,
"naucs_at_1_max": 0.4716487479189039,
"naucs_at_1_std": -0.006727437637346147,
"naucs_at_1_diff1": 0.6581008709633844,
"naucs_at_3_max": 0.4695980071561003,
"naucs_at_3_std": 0.07699949409760401,
"naucs_at_3_diff1": 0.37046512198112674,
"naucs_at_5_max": 0.39746806745579205,
"naucs_at_5_std": 0.08389923773194792,
"naucs_at_5_diff1": 0.2667449430996483,
"naucs_at_10_max": 0.27925046935465064,
"naucs_at_10_std": 0.04493147311300899,
"naucs_at_10_diff1": 0.14879018650447173,
"naucs_at_20_max": 0.1724123702015283,
"naucs_at_20_std": -0.0406874705309399,
"naucs_at_20_diff1": 0.005456808130550691,
"naucs_at_50_max": 0.053840240229350286,
"naucs_at_50_std": -0.15038250749984536,
"naucs_at_50_diff1": -0.15099178882024658,
"naucs_at_100_max": 0.017995420870417105,
"naucs_at_100_std": -0.1816075038440127,
"naucs_at_100_diff1": -0.18370322898832045
},
"vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered_multilingual": {
"ndcg_at_1": 0.53438,
"ndcg_at_3": 0.54593,
"ndcg_at_5": 0.56864,
"ndcg_at_10": 0.60603,
"ndcg_at_20": 0.63584,
"ndcg_at_50": 0.66155,
"ndcg_at_100": 0.67448,
"map_at_1": 0.3256,
"map_at_3": 0.43974,
"map_at_5": 0.47749,
"map_at_10": 0.51301,
"map_at_20": 0.52876,
"map_at_50": 0.53796,
"map_at_100": 0.54086,
"recall_at_1": 0.3256,
"recall_at_3": 0.50788,
"recall_at_5": 0.59866,
"recall_at_10": 0.70913,
"recall_at_20": 0.79605,
"recall_at_50": 0.87472,
"recall_at_100": 0.92428,
"precision_at_1": 0.53438,
"precision_at_3": 0.3349,
"precision_at_5": 0.2575,
"precision_at_10": 0.16875,
"precision_at_20": 0.1007,
"precision_at_50": 0.04887,
"precision_at_100": 0.027,
"mrr_at_1": 0.534375,
"mrr_at_3": 0.6351562499999994,
"mrr_at_5": 0.6516406249999992,
"mrr_at_10": 0.6609145585317455,
"mrr_at_20": 0.664847090359921,
"mrr_at_50": 0.6655434024683474,
"mrr_at_100": 0.6656955962320235,
"naucs_at_1_max": 0.289841750710978,
"naucs_at_1_std": -0.039288288171653626,
"naucs_at_1_diff1": 0.39128614768495107,
"naucs_at_3_max": 0.18877375406489166,
"naucs_at_3_std": 0.028383723285413855,
"naucs_at_3_diff1": -0.000027337444719206833,
"naucs_at_5_max": 0.15432591818453287,
"naucs_at_5_std": 0.02995110480740528,
"naucs_at_5_diff1": -0.11005803971334709,
"naucs_at_10_max": 0.13088890322554914,
"naucs_at_10_std": 0.06919596389023963,
"naucs_at_10_diff1": -0.17785912723881914,
"naucs_at_20_max": 0.1468487573626851,
"naucs_at_20_std": 0.14193307673103725,
"naucs_at_20_diff1": -0.20865497781935535,
"naucs_at_50_max": 0.13335503376393565,
"naucs_at_50_std": 0.13252192227124457,
"naucs_at_50_diff1": -0.22225698776995703,
"naucs_at_100_max": 0.0920178712104123,
"naucs_at_100_std": 0.11787379937051319,
"naucs_at_100_diff1": -0.22789195906486828
},
"vidore/synthetics_economics_macro_economy_2024_filtered_v1.0_multilingual": {
"ndcg_at_1": 0.57759,
"ndcg_at_3": 0.56007,
"ndcg_at_5": 0.52841,
"ndcg_at_10": 0.50961,
"ndcg_at_20": 0.52797,
"ndcg_at_50": 0.59657,
"ndcg_at_100": 0.63916,
"map_at_1": 0.07684,
"map_at_3": 0.16481,
"map_at_5": 0.20962,
"map_at_10": 0.26965,
"map_at_20": 0.31679,
"map_at_50": 0.3705,
"map_at_100": 0.39716,
"recall_at_1": 0.07684,
"recall_at_3": 0.20767,
"recall_at_5": 0.2827,
"recall_at_10": 0.40917,
"recall_at_20": 0.5326,
"recall_at_50": 0.73621,
"recall_at_100": 0.86845,
"precision_at_1": 0.57759,
"precision_at_3": 0.52586,
"precision_at_5": 0.46552,
"precision_at_10": 0.37241,
"precision_at_20": 0.27974,
"precision_at_50": 0.18276,
"precision_at_100": 0.12022,
"mrr_at_1": 0.5775862068965517,
"mrr_at_3": 0.682471264367816,
"mrr_at_5": 0.7003591954022986,
"mrr_at_10": 0.7084325396825395,
"mrr_at_20": 0.7095603211443726,
"mrr_at_50": 0.7103997141136704,
"mrr_at_100": 0.7104842306789173,
"naucs_at_1_max": 0.386719550550633,
"naucs_at_1_std": 0.03964188186567574,
"naucs_at_1_diff1": 0.29324581025971,
"naucs_at_3_max": 0.3940438135481905,
"naucs_at_3_std": 0.11916750908192153,
"naucs_at_3_diff1": 0.11789001603532652,
"naucs_at_5_max": 0.4355532765854707,
"naucs_at_5_std": 0.2551718488349167,
"naucs_at_5_diff1": 0.013367355141125142,
"naucs_at_10_max": 0.3421752314533559,
"naucs_at_10_std": 0.23601153039445807,
"naucs_at_10_diff1": -0.007635368428971648,
"naucs_at_20_max": 0.2981468248373431,
"naucs_at_20_std": 0.25464689427959064,
"naucs_at_20_diff1": -0.0018484213984256066,
"naucs_at_50_max": 0.20823391530171706,
"naucs_at_50_std": 0.18448677686850273,
"naucs_at_50_diff1": -0.03204255205680404,
"naucs_at_100_max": 0.16804217929784568,
"naucs_at_100_std": 0.13259088431947955,
"naucs_at_100_diff1": -0.017832844620359677
},
"vidore/arxivqa_test_subsampled": {
"ndcg_at_1": 0.776,
"ndcg_at_3": 0.83117,
"ndcg_at_5": 0.84184,
"ndcg_at_10": 0.8527,
"ndcg_at_20": 0.8598,
"ndcg_at_50": 0.86593,
"ndcg_at_100": 0.86754,
"map_at_1": 0.776,
"map_at_3": 0.81833,
"map_at_5": 0.82423,
"map_at_10": 0.82865,
"map_at_20": 0.83061,
"map_at_50": 0.83167,
"map_at_100": 0.83181,
"recall_at_1": 0.776,
"recall_at_3": 0.868,
"recall_at_5": 0.894,
"recall_at_10": 0.928,
"recall_at_20": 0.956,
"recall_at_50": 0.986,
"recall_at_100": 0.996,
"precision_at_1": 0.776,
"precision_at_3": 0.28933,
"precision_at_5": 0.1788,
"precision_at_10": 0.0928,
"precision_at_20": 0.0478,
"precision_at_50": 0.01972,
"precision_at_100": 0.00996,
"mrr_at_1": 0.776,
"mrr_at_3": 0.8183333333333332,
"mrr_at_5": 0.8242333333333332,
"mrr_at_10": 0.8286515873015872,
"mrr_at_20": 0.8306089112641741,
"mrr_at_50": 0.8316740970253331,
"mrr_at_100": 0.8318131621644544,
"naucs_at_1_max": 0.33058912801009327,
"naucs_at_1_std": -0.3471366403978715,
"naucs_at_1_diff1": 0.9033890402442812,
"naucs_at_3_max": 0.19704584636063602,
"naucs_at_3_std": -0.3672895421207954,
"naucs_at_3_diff1": 0.8800149022587156,
"naucs_at_5_max": 0.21286583958965521,
"naucs_at_5_std": -0.3741724792787036,
"naucs_at_5_diff1": 0.8638251424311815,
"naucs_at_10_max": 0.12077238302728274,
"naucs_at_10_std": -0.3839739599543562,
"naucs_at_10_diff1": 0.8619540408756106,
"naucs_at_20_max": 0.15711739241150968,
"naucs_at_20_std": -0.269353195823785,
"naucs_at_20_diff1": 0.9188311688311718,
"naucs_at_50_max": 0.012538348672796787,
"naucs_at_50_std": -0.17046818727492316,
"naucs_at_50_diff1": 0.9416433239962615,
"naucs_at_100_max": -0.3674136321195193,
"naucs_at_100_std": 0.21825396825390833,
"naucs_at_100_diff1": 1.0
},
"vidore/docvqa_test_subsampled": {
"ndcg_at_1": 0.47672,
"ndcg_at_3": 0.55403,
"ndcg_at_5": 0.57789,
"ndcg_at_10": 0.59341,
"ndcg_at_20": 0.60923,
"ndcg_at_50": 0.62426,
"ndcg_at_100": 0.63194,
"map_at_1": 0.47672,
"map_at_3": 0.53548,
"map_at_5": 0.54878,
"map_at_10": 0.55547,
"map_at_20": 0.55988,
"map_at_50": 0.56214,
"map_at_100": 0.56285,
"recall_at_1": 0.47672,
"recall_at_3": 0.60754,
"recall_at_5": 0.66519,
"recall_at_10": 0.71175,
"recall_at_20": 0.77384,
"recall_at_50": 0.85144,
"recall_at_100": 0.898,
"precision_at_1": 0.47672,
"precision_at_3": 0.20251,
"precision_at_5": 0.13304,
"precision_at_10": 0.07118,
"precision_at_20": 0.03869,
"precision_at_50": 0.01703,
"precision_at_100": 0.00898,
"mrr_at_1": 0.47671840354767187,
"mrr_at_3": 0.5354767184035478,
"mrr_at_5": 0.5487804878048781,
"mrr_at_10": 0.5554746066941187,
"mrr_at_20": 0.5598837982651386,
"mrr_at_50": 0.5621395811039165,
"mrr_at_100": 0.562853769260598,
"naucs_at_1_max": -0.34988732060075106,
"naucs_at_1_std": -0.0024823362084227295,
"naucs_at_1_diff1": 0.7709257756426611,
"naucs_at_3_max": -0.3848072440519266,
"naucs_at_3_std": 0.04900442240358232,
"naucs_at_3_diff1": 0.6684105207204192,
"naucs_at_5_max": -0.4729915658738238,
"naucs_at_5_std": 0.051502600367980475,
"naucs_at_5_diff1": 0.635314656852817,
"naucs_at_10_max": -0.4755655825410273,
"naucs_at_10_std": 0.14092635748512966,
"naucs_at_10_diff1": 0.6142413640583799,
"naucs_at_20_max": -0.5013121261464005,
"naucs_at_20_std": 0.26884721841803433,
"naucs_at_20_diff1": 0.5864223996381905,
"naucs_at_50_max": -0.46761445911554894,
"naucs_at_50_std": 0.5204115851881066,
"naucs_at_50_diff1": 0.5754658330685111,
"naucs_at_100_max": -0.45463665233128225,
"naucs_at_100_std": 0.6231821590075602,
"naucs_at_100_diff1": 0.5438810608835282
},
"vidore/infovqa_test_subsampled": {
"ndcg_at_1": 0.80162,
"ndcg_at_3": 0.86102,
"ndcg_at_5": 0.87252,
"ndcg_at_10": 0.88185,
"ndcg_at_20": 0.88587,
"ndcg_at_50": 0.88753,
"ndcg_at_100": 0.88887,
"map_at_1": 0.80162,
"map_at_3": 0.84717,
"map_at_5": 0.85344,
"map_at_10": 0.85739,
"map_at_20": 0.85845,
"map_at_50": 0.85874,
"map_at_100": 0.85886,
"recall_at_1": 0.80162,
"recall_at_3": 0.90081,
"recall_at_5": 0.92915,
"recall_at_10": 0.95749,
"recall_at_20": 0.97368,
"recall_at_50": 0.98178,
"recall_at_100": 0.98988,
"precision_at_1": 0.80162,
"precision_at_3": 0.30027,
"precision_at_5": 0.18583,
"precision_at_10": 0.09575,
"precision_at_20": 0.04868,
"precision_at_50": 0.01964,
"precision_at_100": 0.0099,
"mrr_at_1": 0.8016194331983806,
"mrr_at_3": 0.8471659919028339,
"mrr_at_5": 0.8534412955465585,
"mrr_at_10": 0.8573934837092728,
"mrr_at_20": 0.8584495144744448,
"mrr_at_50": 0.8587416976822032,
"mrr_at_100": 0.8588638098799428,
"naucs_at_1_max": 0.36644356235169917,
"naucs_at_1_std": -0.0621555111999374,
"naucs_at_1_diff1": 0.9156953149702299,
"naucs_at_3_max": 0.31921758147803636,
"naucs_at_3_std": -0.07179256055822544,
"naucs_at_3_diff1": 0.8529538901636119,
"naucs_at_5_max": 0.4367975086754174,
"naucs_at_5_std": 0.2066108147068303,
"naucs_at_5_diff1": 0.8413977765662696,
"naucs_at_10_max": 0.4577426306876829,
"naucs_at_10_std": 0.5149462893773337,
"naucs_at_10_diff1": 0.8478907857538679,
"naucs_at_20_max": 0.3736598448014174,
"naucs_at_20_std": 0.716083799779582,
"naucs_at_20_diff1": 0.8300268432831852,
"naucs_at_50_max": 0.38100826473331617,
"naucs_at_50_std": 0.9256067666719673,
"naucs_at_50_diff1": 0.8288764514037448,
"naucs_at_100_max": 0.380281489664749,
"naucs_at_100_std": 0.9738797091870569,
"naucs_at_100_diff1": 0.9183327616354471
},
"vidore/tabfquad_test_subsampled": {
"ndcg_at_1": 0.90714,
"ndcg_at_3": 0.94668,
"ndcg_at_5": 0.94806,
"ndcg_at_10": 0.95141,
"ndcg_at_20": 0.95329,
"ndcg_at_50": 0.95329,
"ndcg_at_100": 0.95388,
"map_at_1": 0.90714,
"map_at_3": 0.9369,
"map_at_5": 0.93762,
"map_at_10": 0.93893,
"map_at_20": 0.93948,
"map_at_50": 0.93948,
"map_at_100": 0.93954,
"recall_at_1": 0.90714,
"recall_at_3": 0.975,
"recall_at_5": 0.97857,
"recall_at_10": 0.98929,
"recall_at_20": 0.99643,
"recall_at_50": 0.99643,
"recall_at_100": 1.0,
"precision_at_1": 0.90714,
"precision_at_3": 0.325,
"precision_at_5": 0.19571,
"precision_at_10": 0.09893,
"precision_at_20": 0.04982,
"precision_at_50": 0.01993,
"precision_at_100": 0.01,
"mrr_at_1": 0.9071428571428571,
"mrr_at_3": 0.9369047619047618,
"mrr_at_5": 0.9376190476190477,
"mrr_at_10": 0.9389328231292515,
"mrr_at_20": 0.9394822736787022,
"mrr_at_50": 0.9394822736787022,
"mrr_at_100": 0.9395380772501308,
"naucs_at_1_max": 0.6988436400201107,
"naucs_at_1_std": 0.1637218990160183,
"naucs_at_1_diff1": 0.8848128995187822,
"naucs_at_3_max": 0.98132586367881,
"naucs_at_3_std": 0.2895158063225239,
"naucs_at_3_diff1": 0.9229691876750775,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 0.5297230003112448,
"naucs_at_5_diff1": 0.910130718954251,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 0.820261437908502,
"naucs_at_10_diff1": 0.9564270152505505,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 0.8692810457516478,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": 1.0,
"naucs_at_50_std": 0.8692810457515607,
"naucs_at_50_diff1": 1.0,
"naucs_at_100_max": 1.0,
"naucs_at_100_std": 1.0,
"naucs_at_100_diff1": 1.0
},
"vidore/tatdqa_test": {
"ndcg_at_1": 0.53463,
"ndcg_at_3": 0.63835,
"ndcg_at_5": 0.668,
"ndcg_at_10": 0.70153,
"ndcg_at_20": 0.71324,
"ndcg_at_50": 0.72125,
"ndcg_at_100": 0.72388,
"map_at_1": 0.53463,
"map_at_3": 0.6127,
"map_at_5": 0.62922,
"map_at_10": 0.64314,
"map_at_20": 0.64645,
"map_at_50": 0.64781,
"map_at_100": 0.64806,
"recall_at_1": 0.53463,
"recall_at_3": 0.71264,
"recall_at_5": 0.78433,
"recall_at_10": 0.88761,
"recall_at_20": 0.93317,
"recall_at_50": 0.97266,
"recall_at_100": 0.98846,
"precision_at_1": 0.53463,
"precision_at_3": 0.23755,
"precision_at_5": 0.15687,
"precision_at_10": 0.08876,
"precision_at_20": 0.04666,
"precision_at_50": 0.01945,
"precision_at_100": 0.00988,
"mrr_at_1": 0.5328068043742406,
"mrr_at_3": 0.6120899149453227,
"mrr_at_5": 0.628736330498179,
"mrr_at_10": 0.6424352446527422,
"mrr_at_20": 0.6457490447027197,
"mrr_at_50": 0.6471051191468972,
"mrr_at_100": 0.6473555641491229,
"naucs_at_1_max": 0.004770766180126278,
"naucs_at_1_std": -0.14754760705264022,
"naucs_at_1_diff1": 0.6935989861691055,
"naucs_at_3_max": -0.0016008692400043853,
"naucs_at_3_std": -0.13025322915899146,
"naucs_at_3_diff1": 0.6105721528628484,
"naucs_at_5_max": 0.010908860308784006,
"naucs_at_5_std": -0.07481967080015482,
"naucs_at_5_diff1": 0.5707165994173674,
"naucs_at_10_max": 0.13140741044161422,
"naucs_at_10_std": 0.05754247942547455,
"naucs_at_10_diff1": 0.5353376028377649,
"naucs_at_20_max": 0.22540165846494245,
"naucs_at_20_std": 0.2108123379388908,
"naucs_at_20_diff1": 0.51950176941338,
"naucs_at_50_max": 0.2798785039808347,
"naucs_at_50_std": 0.6091081627491584,
"naucs_at_50_diff1": 0.5097940423840385,
"naucs_at_100_max": 0.41272456649348505,
"naucs_at_100_std": 0.8595897297980766,
"naucs_at_100_diff1": 0.580463276628437
},
"vidore/shiftproject_test": {
"ndcg_at_1": 0.74,
"ndcg_at_3": 0.8394,
"ndcg_at_5": 0.85575,
"ndcg_at_10": 0.86589,
"ndcg_at_20": 0.86868,
"ndcg_at_50": 0.87054,
"ndcg_at_100": 0.87054,
"map_at_1": 0.74,
"map_at_3": 0.815,
"map_at_5": 0.824,
"map_at_10": 0.82844,
"map_at_20": 0.82935,
"map_at_50": 0.8296,
"map_at_100": 0.8296,
"recall_at_1": 0.74,
"recall_at_3": 0.91,
"recall_at_5": 0.95,
"recall_at_10": 0.98,
"recall_at_20": 0.99,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.74,
"precision_at_3": 0.30333,
"precision_at_5": 0.19,
"precision_at_10": 0.098,
"precision_at_20": 0.0495,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.74,
"mrr_at_3": 0.8149999999999998,
"mrr_at_5": 0.8240000000000001,
"mrr_at_10": 0.8284444444444445,
"mrr_at_20": 0.8293535353535354,
"mrr_at_50": 0.8296035353535354,
"mrr_at_100": 0.8296035353535354,
"naucs_at_1_max": 0.22610599732370926,
"naucs_at_1_std": -0.32918373139775386,
"naucs_at_1_diff1": 0.5869186164389112,
"naucs_at_3_max": 0.5146280734516033,
"naucs_at_3_std": -0.4240585122938067,
"naucs_at_3_diff1": 0.5019192862330117,
"naucs_at_5_max": 0.43902894491130423,
"naucs_at_5_std": -0.038001867413624234,
"naucs_at_5_diff1": 0.3900093370681662,
"naucs_at_10_max": 0.24042950513538955,
"naucs_at_10_std": 0.35807656395892185,
"naucs_at_10_diff1": 0.27544351073763346,
"naucs_at_20_max": 0.35807656395891135,
"naucs_at_20_std": 0.35807656395891135,
"naucs_at_20_diff1": -0.1713352007469681,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_artificial_intelligence_test": {
"ndcg_at_1": 0.93,
"ndcg_at_3": 0.97155,
"ndcg_at_5": 0.97155,
"ndcg_at_10": 0.97155,
"ndcg_at_20": 0.97155,
"ndcg_at_50": 0.97155,
"ndcg_at_100": 0.97155,
"map_at_1": 0.93,
"map_at_3": 0.96167,
"map_at_5": 0.96167,
"map_at_10": 0.96167,
"map_at_20": 0.96167,
"map_at_50": 0.96167,
"map_at_100": 0.96167,
"recall_at_1": 0.93,
"recall_at_3": 1.0,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.93,
"precision_at_3": 0.33333,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.93,
"mrr_at_3": 0.9616666666666667,
"mrr_at_5": 0.9616666666666667,
"mrr_at_10": 0.9616666666666667,
"mrr_at_20": 0.9616666666666667,
"mrr_at_50": 0.9616666666666667,
"mrr_at_100": 0.9616666666666667,
"naucs_at_1_max": 0.23656129118313848,
"naucs_at_1_std": -0.8155929038282005,
"naucs_at_1_diff1": 0.9813258636788056,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 1.0,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_energy_test": {
"ndcg_at_1": 0.86,
"ndcg_at_3": 0.90286,
"ndcg_at_5": 0.91921,
"ndcg_at_10": 0.92592,
"ndcg_at_20": 0.92592,
"ndcg_at_50": 0.92794,
"ndcg_at_100": 0.92794,
"map_at_1": 0.86,
"map_at_3": 0.89333,
"map_at_5": 0.90233,
"map_at_10": 0.90525,
"map_at_20": 0.90525,
"map_at_50": 0.90558,
"map_at_100": 0.90558,
"recall_at_1": 0.86,
"recall_at_3": 0.93,
"recall_at_5": 0.97,
"recall_at_10": 0.99,
"recall_at_20": 0.99,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.86,
"precision_at_3": 0.31,
"precision_at_5": 0.194,
"precision_at_10": 0.099,
"precision_at_20": 0.0495,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.86,
"mrr_at_3": 0.8933333333333334,
"mrr_at_5": 0.9023333333333333,
"mrr_at_10": 0.9052499999999999,
"mrr_at_20": 0.9052499999999999,
"mrr_at_50": 0.9055833333333332,
"mrr_at_100": 0.9055833333333332,
"naucs_at_1_max": 0.5179437439379241,
"naucs_at_1_std": -0.3843702369405563,
"naucs_at_1_diff1": 0.9381668283220175,
"naucs_at_3_max": 0.4529811924769899,
"naucs_at_3_std": -0.7859810590903017,
"naucs_at_3_diff1": 0.9416433239962636,
"naucs_at_5_max": 0.317149081854964,
"naucs_at_5_std": -0.2983193277310949,
"naucs_at_5_diff1": 0.9074074074074053,
"naucs_at_10_max": -0.1713352007469681,
"naucs_at_10_std": -1.7399626517273863,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": -0.1713352007469681,
"naucs_at_20_std": -1.7399626517273863,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_government_reports_test": {
"ndcg_at_1": 0.9,
"ndcg_at_3": 0.95417,
"ndcg_at_5": 0.95847,
"ndcg_at_10": 0.95847,
"ndcg_at_20": 0.95847,
"ndcg_at_50": 0.95847,
"ndcg_at_100": 0.95847,
"map_at_1": 0.9,
"map_at_3": 0.94167,
"map_at_5": 0.94417,
"map_at_10": 0.94417,
"map_at_20": 0.94417,
"map_at_50": 0.94417,
"map_at_100": 0.94417,
"recall_at_1": 0.9,
"recall_at_3": 0.99,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.9,
"precision_at_3": 0.33,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.9,
"mrr_at_3": 0.9416666666666668,
"mrr_at_5": 0.9441666666666667,
"mrr_at_10": 0.9441666666666667,
"mrr_at_20": 0.9441666666666667,
"mrr_at_50": 0.9441666666666667,
"mrr_at_100": 0.9441666666666667,
"naucs_at_1_max": 0.5605508870214744,
"naucs_at_1_std": -0.07072829131652612,
"naucs_at_1_diff1": 0.8389355742296911,
"naucs_at_3_max": 0.12278244631183229,
"naucs_at_3_std": -1.7399626517274398,
"naucs_at_3_diff1": 0.35807656395889226,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_healthcare_industry_test": {
"ndcg_at_1": 0.92,
"ndcg_at_3": 0.96917,
"ndcg_at_5": 0.96917,
"ndcg_at_10": 0.96917,
"ndcg_at_20": 0.96917,
"ndcg_at_50": 0.96917,
"ndcg_at_100": 0.96917,
"map_at_1": 0.92,
"map_at_3": 0.95833,
"map_at_5": 0.95833,
"map_at_10": 0.95833,
"map_at_20": 0.95833,
"map_at_50": 0.95833,
"map_at_100": 0.95833,
"recall_at_1": 0.92,
"recall_at_3": 1.0,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.92,
"precision_at_3": 0.33333,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.92,
"mrr_at_3": 0.9583333333333333,
"mrr_at_5": 0.9583333333333333,
"mrr_at_10": 0.9583333333333333,
"mrr_at_20": 0.9583333333333333,
"mrr_at_50": 0.9583333333333333,
"mrr_at_100": 0.9583333333333333,
"naucs_at_1_max": 0.7380368814192346,
"naucs_at_1_std": 0.03408029878618052,
"naucs_at_1_diff1": 0.9142156862745094,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 1.0,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
}
}
}