dse-phi35-vidore-ft / results.json
MrLight's picture
Create results.json
8f2f2a2 verified
{"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.746, "ndcg_at_3": 0.81469, "ndcg_at_5": 0.83466, "ndcg_at_10": 0.84241, "ndcg_at_20": 0.84886, "ndcg_at_100": 0.85551, "ndcg_at_1000": 0.85603, "map_at_1": 0.746, "map_at_3": 0.79767, "map_at_5": 0.80887, "map_at_10": 0.81205, "map_at_20": 0.81376, "map_at_100": 0.81481, "map_at_1000": 0.81483, "recall_at_1": 0.746, "recall_at_3": 0.864, "recall_at_5": 0.912, "recall_at_10": 0.936, "recall_at_20": 0.962, "recall_at_100": 0.996, "recall_at_1000": 1.0, "precision_at_1": 0.746, "precision_at_3": 0.288, "precision_at_5": 0.1824, "precision_at_10": 0.0936, "precision_at_20": 0.0481, "precision_at_100": 0.00996, "precision_at_1000": 0.001, "mrr_at_1": 0.744, "mrr_at_3": 0.7963333333333329, "mrr_at_5": 0.8080333333333326, "mrr_at_10": 0.8109023809523803, "mrr_at_20": 0.8127144610635315, "mrr_at_100": 0.8136796652301421, "mrr_at_1000": 0.813698878640845, "naucs_at_1_max": 0.3739404634477971, "naucs_at_1_std": -0.4966462646041429, "naucs_at_1_diff1": 0.8874070645413251, "naucs_at_3_max": 0.2799752517494459, "naucs_at_3_std": -0.5587202025374046, "naucs_at_3_diff1": 0.8067787449507883, "naucs_at_5_max": 0.40727654698242866, "naucs_at_5_std": -0.5171356421356439, "naucs_at_5_diff1": 0.8038154655801719, "naucs_at_10_max": 0.4166666666666667, "naucs_at_10_std": -0.4225461017740461, "naucs_at_10_diff1": 0.8349964985994394, "naucs_at_20_max": 0.4518649565089163, "naucs_at_20_std": -0.19976411617279305, "naucs_at_20_diff1": 0.8707553196717274, "naucs_at_100_max": 0.9346405228758466, "naucs_at_100_std": 0.9346405228758466, "naucs_at_100_diff1": 0.9346405228758466, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.46341, "ndcg_at_3": 0.551, "ndcg_at_5": 0.57399, "ndcg_at_10": 0.59775, "ndcg_at_20": 0.61089, "ndcg_at_100": 0.63575, "ndcg_at_1000": 0.64521, "map_at_1": 0.46341, "map_at_3": 0.52919, "map_at_5": 0.54205, "map_at_10": 0.55192, "map_at_20": 0.55566, "map_at_100": 0.55899, "map_at_1000": 0.5594, "recall_at_1": 0.46341, "recall_at_3": 0.61419, "recall_at_5": 0.66962, "recall_at_10": 0.74279, "recall_at_20": 0.79379, "recall_at_100": 0.92905, "recall_at_1000": 1.0, "precision_at_1": 0.46341, "precision_at_3": 0.20473, "precision_at_5": 0.13392, "precision_at_10": 0.07428, "precision_at_20": 0.03969, "precision_at_100": 0.00929, "precision_at_1000": 0.001, "mrr_at_1": 0.4634146341463415, "mrr_at_3": 0.5288248337028827, "mrr_at_5": 0.54079822616408, "mrr_at_10": 0.5510285784676029, "mrr_at_20": 0.5547003794245049, "mrr_at_100": 0.5581250079182053, "mrr_at_1000": 0.5585349970881556, "naucs_at_1_max": 0.2755459672041862, "naucs_at_1_std": -0.43175449758326734, "naucs_at_1_diff1": 0.7575636670161922, "naucs_at_3_max": 0.2871336202386352, "naucs_at_3_std": -0.42283801268921506, "naucs_at_3_diff1": 0.667328332989628, "naucs_at_5_max": 0.3310274785833846, "naucs_at_5_std": -0.3950881137736439, "naucs_at_5_diff1": 0.6243205294675613, "naucs_at_10_max": 0.3705995236350128, "naucs_at_10_std": -0.35046433172650254, "naucs_at_10_diff1": 0.5916980361796277, "naucs_at_20_max": 0.4194191572536321, "naucs_at_20_std": -0.26093541816229876, "naucs_at_20_diff1": 0.5756791544722945, "naucs_at_100_max": 0.497478530871693, "naucs_at_100_std": 0.1701221292331593, "naucs_at_100_diff1": 0.45064651890557395, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.79352, "ndcg_at_3": 0.85138, "ndcg_at_5": 0.86532, "ndcg_at_10": 0.87023, "ndcg_at_20": 0.87484, "ndcg_at_100": 0.87972, "ndcg_at_1000": 0.88184, "map_at_1": 0.79352, "map_at_3": 0.83772, "map_at_5": 0.84531, "map_at_10": 0.84754, "map_at_20": 0.84881, "map_at_100": 0.84947, "map_at_1000": 0.84956, "recall_at_1": 0.79352, "recall_at_3": 0.89069, "recall_at_5": 0.9251, "recall_at_10": 0.93927, "recall_at_20": 0.95749, "recall_at_100": 0.98381, "recall_at_1000": 1.0, "precision_at_1": 0.79352, "precision_at_3": 0.2969, "precision_at_5": 0.18502, "precision_at_10": 0.09393, "precision_at_20": 0.04787, "precision_at_100": 0.00984, "precision_at_1000": 0.001, "mrr_at_1": 0.7955465587044535, "mrr_at_3": 0.8383940620782724, "mrr_at_5": 0.8460863697705799, "mrr_at_10": 0.8483516483516479, "mrr_at_20": 0.8496270728584808, "mrr_at_100": 0.8502949338151693, "mrr_at_1000": 0.8503820385584274, "naucs_at_1_max": 0.3865361873053722, "naucs_at_1_std": -0.2858492575169539, "naucs_at_1_diff1": 0.8761387338349901, "naucs_at_3_max": 0.49322117975373037, "naucs_at_3_std": -0.15093654958109678, "naucs_at_3_diff1": 0.8277460954354272, "naucs_at_5_max": 0.626015520587777, "naucs_at_5_std": 0.03111710382863415, "naucs_at_5_diff1": 0.8259422073044183, "naucs_at_10_max": 0.6176985920427269, "naucs_at_10_std": 0.01636472088415303, "naucs_at_10_diff1": 0.8417137285401975, "naucs_at_20_max": 0.7210924296543608, "naucs_at_20_std": 0.19069592021139778, "naucs_at_20_diff1": 0.8562062222042085, "naucs_at_100_max": 0.6639331345365785, "naucs_at_100_std": 0.4682912968412802, "naucs_at_100_diff1": 0.7782720045335071, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.72857, "ndcg_at_3": 0.80455, "ndcg_at_5": 0.81792, "ndcg_at_10": 0.83766, "ndcg_at_20": 0.84485, "ndcg_at_100": 0.84907, "ndcg_at_1000": 0.84907, "map_at_1": 0.72857, "map_at_3": 0.78631, "map_at_5": 0.79381, "map_at_10": 0.80202, "map_at_20": 0.80397, "map_at_100": 0.80464, "map_at_1000": 0.80464, "recall_at_1": 0.72857, "recall_at_3": 0.85714, "recall_at_5": 0.88929, "recall_at_10": 0.95, "recall_at_20": 0.97857, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.72857, "precision_at_3": 0.28571, "precision_at_5": 0.17786, "precision_at_10": 0.095, "precision_at_20": 0.04893, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7285714285714285, "mrr_at_3": 0.7863095238095239, "mrr_at_5": 0.7938095238095237, "mrr_at_10": 0.8020564058956915, "mrr_at_20": 0.8040121882086169, "mrr_at_100": 0.8046834205000282, "mrr_at_1000": 0.8046834205000282, "naucs_at_1_max": 0.6764603081396976, "naucs_at_1_std": 0.07876855968459019, "naucs_at_1_diff1": 0.8757514749881165, "naucs_at_3_max": 0.7639334305150627, "naucs_at_3_std": 0.1721695821185624, "naucs_at_3_diff1": 0.7801263362487845, "naucs_at_5_max": 0.7293706720356309, "naucs_at_5_std": 0.19904822915713227, "naucs_at_5_diff1": 0.7365089533571261, "naucs_at_10_max": 0.7694411097772461, "naucs_at_10_std": 0.1360210750967062, "naucs_at_10_diff1": 0.7278911564625866, "naucs_at_20_max": 0.7443977591036421, "naucs_at_20_std": 0.35091814503579183, "naucs_at_20_diff1": 0.7288359788359783, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.5164, "ndcg_at_3": 0.63514, "ndcg_at_5": 0.6698, "ndcg_at_10": 0.70108, "ndcg_at_20": 0.71386, "ndcg_at_100": 0.72155, "ndcg_at_1000": 0.72292, "map_at_1": 0.5164, "map_at_3": 0.60622, "map_at_5": 0.62523, "map_at_10": 0.63828, "map_at_20": 0.64188, "map_at_100": 0.64299, "map_at_1000": 0.64307, "recall_at_1": 0.5164, "recall_at_3": 0.71871, "recall_at_5": 0.80377, "recall_at_10": 0.89976, "recall_at_20": 0.94957, "recall_at_100": 0.99028, "recall_at_1000": 1.0, "precision_at_1": 0.5164, "precision_at_3": 0.23957, "precision_at_5": 0.16075, "precision_at_10": 0.08998, "precision_at_20": 0.04748, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.5151883353584447, "mrr_at_3": 0.6054070473876072, "mrr_at_5": 0.6244532199270979, "mrr_at_10": 0.6378100349090636, "mrr_at_20": 0.6414463913727549, "mrr_at_100": 0.642542557898978, "mrr_at_1000": 0.6426108343127629, "naucs_at_1_max": 0.2373444266528672, "naucs_at_1_std": -0.23203158146645395, "naucs_at_1_diff1": 0.6928159855599241, "naucs_at_3_max": 0.2476003191487749, "naucs_at_3_std": -0.2023815192771237, "naucs_at_3_diff1": 0.5664637010612357, "naucs_at_5_max": 0.2910666373171799, "naucs_at_5_std": -0.1811597889590415, "naucs_at_5_diff1": 0.5303271314306698, "naucs_at_10_max": 0.3589963070878421, "naucs_at_10_std": -0.10632630300422154, "naucs_at_10_diff1": 0.5276877558316551, "naucs_at_20_max": 0.48407363833137856, "naucs_at_20_std": 0.19987584370485015, "naucs_at_20_diff1": 0.5532145893690829, "naucs_at_100_max": 0.5678114812546349, "naucs_at_100_std": 0.7994886012643277, "naucs_at_100_diff1": 0.5887807147011462, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.52, "ndcg_at_3": 0.67119, "ndcg_at_5": 0.71725, "ndcg_at_10": 0.73341, "ndcg_at_20": 0.74346, "ndcg_at_100": 0.74514, "ndcg_at_1000": 0.7481, "map_at_1": 0.52, "map_at_3": 0.63667, "map_at_5": 0.66267, "map_at_10": 0.66931, "map_at_20": 0.67203, "map_at_100": 0.67219, "map_at_1000": 0.67238, "recall_at_1": 0.52, "recall_at_3": 0.77, "recall_at_5": 0.88, "recall_at_10": 0.93, "recall_at_20": 0.97, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.52, "precision_at_3": 0.25667, "precision_at_5": 0.176, "precision_at_10": 0.093, "precision_at_20": 0.0485, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.53, "mrr_at_3": 0.6466666666666667, "mrr_at_5": 0.6701666666666668, "mrr_at_10": 0.6769920634920638, "mrr_at_20": 0.6797854269913097, "mrr_at_100": 0.6799467173138903, "mrr_at_1000": 0.680134548001721, "naucs_at_1_max": 0.27906850209478185, "naucs_at_1_std": -0.31487023233037714, "naucs_at_1_diff1": 0.5186217966157023, "naucs_at_3_max": 0.2773214565475257, "naucs_at_3_std": -0.524227300194713, "naucs_at_3_diff1": 0.4238490633602645, "naucs_at_5_max": 0.4284521158129184, "naucs_at_5_std": -0.5131641743557088, "naucs_at_5_diff1": 0.3310133630289543, "naucs_at_10_max": 0.47659063625450376, "naucs_at_10_std": -0.8099906629318334, "naucs_at_10_diff1": 0.25550220088035286, "naucs_at_20_max": 0.7027699968876432, "naucs_at_20_std": -0.35434173669467856, "naucs_at_20_diff1": 0.1722689075630234, "naucs_at_100_max": 0.7770774976657274, "naucs_at_100_std": 0.3384687208216665, "naucs_at_100_diff1": -0.10270774976657121, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.9, "ndcg_at_3": 0.94917, "ndcg_at_5": 0.9569, "ndcg_at_10": 0.9569, "ndcg_at_20": 0.9569, "ndcg_at_100": 0.9569, "ndcg_at_1000": 0.9569, "map_at_1": 0.9, "map_at_3": 0.93833, "map_at_5": 0.94233, "map_at_10": 0.94233, "map_at_20": 0.94233, "map_at_100": 0.94233, "map_at_1000": 0.94233, "recall_at_1": 0.9, "recall_at_3": 0.98, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.32667, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.9, "mrr_at_3": 0.9383333333333332, "mrr_at_5": 0.9423333333333334, "mrr_at_10": 0.9423333333333334, "mrr_at_20": 0.9423333333333334, "mrr_at_100": 0.9423333333333334, "mrr_at_1000": 0.9423333333333334, "naucs_at_1_max": 0.43408029878618104, "naucs_at_1_std": -0.557329598506071, "naucs_at_1_diff1": 0.8884220354808596, "naucs_at_3_max": 0.9346405228758099, "naucs_at_3_std": -0.14122315592904858, "naucs_at_3_diff1": 0.7770774976657261, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.91, "ndcg_at_3": 0.93893, "ndcg_at_5": 0.94323, "ndcg_at_10": 0.94323, "ndcg_at_20": 0.94323, "ndcg_at_100": 0.94766, "ndcg_at_1000": 0.94897, "map_at_1": 0.91, "map_at_3": 0.93167, "map_at_5": 0.93417, "map_at_10": 0.93417, "map_at_20": 0.93417, "map_at_100": 0.93508, "map_at_1000": 0.93513, "recall_at_1": 0.91, "recall_at_3": 0.96, "recall_at_5": 0.97, "recall_at_10": 0.97, "recall_at_20": 0.97, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.32, "precision_at_5": 0.194, "precision_at_10": 0.097, "precision_at_20": 0.0485, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.91, "mrr_at_3": 0.9316666666666668, "mrr_at_5": 0.9341666666666667, "mrr_at_10": 0.9341666666666667, "mrr_at_20": 0.9341666666666667, "mrr_at_100": 0.9350776397515529, "mrr_at_1000": 0.9351286601597161, "naucs_at_1_max": 0.12480547774665393, "naucs_at_1_std": -0.857609710550887, "naucs_at_1_diff1": 0.9237472766884529, "naucs_at_3_max": 0.31500933706816353, "naucs_at_3_std": -0.5880018674136294, "naucs_at_3_diff1": 0.9305555555555564, "naucs_at_5_max": 0.08667911609088073, "naucs_at_5_std": -0.8249299719887969, "naucs_at_5_diff1": 0.9074074074074053, "naucs_at_10_max": 0.08667911609088073, "naucs_at_10_std": -0.8249299719887969, "naucs_at_10_diff1": 0.9074074074074053, "naucs_at_20_max": 0.08667911609088073, "naucs_at_20_std": -0.8249299719887969, "naucs_at_20_diff1": 0.9074074074074053, "naucs_at_100_max": 1.0, "naucs_at_100_std": -0.17133520074697067, "naucs_at_100_diff1": 0.7222222222222041, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.9444, "ndcg_at_5": 0.94827, "ndcg_at_10": 0.94827, "ndcg_at_20": 0.94827, "ndcg_at_100": 0.94827, "ndcg_at_1000": 0.94827, "map_at_1": 0.87, "map_at_3": 0.92833, "map_at_5": 0.93033, "map_at_10": 0.93033, "map_at_20": 0.93033, "map_at_100": 0.93033, "map_at_1000": 0.93033, "recall_at_1": 0.87, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.89, "mrr_at_3": 0.9383333333333332, "mrr_at_5": 0.9403333333333334, "mrr_at_10": 0.9403333333333334, "mrr_at_20": 0.9403333333333334, "mrr_at_100": 0.9403333333333334, "mrr_at_1000": 0.9403333333333334, "naucs_at_1_max": 0.5261793675479518, "naucs_at_1_std": -0.5030363622898621, "naucs_at_1_diff1": 0.8894690068873585, "naucs_at_3_max": -0.1713352007469878, "naucs_at_3_std": -1.1517273576097802, "naucs_at_3_diff1": 0.8692810457516356, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.93, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.9628, "ndcg_at_10": 0.96613, "ndcg_at_20": 0.96613, "ndcg_at_100": 0.96613, "ndcg_at_1000": 0.96613, "map_at_1": 0.93, "map_at_3": 0.95167, "map_at_5": 0.95367, "map_at_10": 0.9551, "map_at_20": 0.9551, "map_at_100": 0.9551, "map_at_1000": 0.9551, "recall_at_1": 0.93, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.93, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.93, "mrr_at_3": 0.9516666666666665, "mrr_at_5": 0.9536666666666666, "mrr_at_10": 0.955095238095238, "mrr_at_20": 0.955095238095238, "mrr_at_100": 0.955095238095238, "mrr_at_1000": 0.955095238095238, "naucs_at_1_max": 0.42123516073095707, "naucs_at_1_std": -0.22322262238228857, "naucs_at_1_diff1": 0.8569427771108457, "naucs_at_3_max": -0.5144724556489392, "naucs_at_3_std": -1.151727357609717, "naucs_at_3_diff1": 0.6381886087768379, "naucs_at_5_max": -1.1517273576097316, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.5541549953314738, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}