Visual Document Retrieval
ColPali
Safetensors
English
vidore
vidore-experimental
QuentinJG commited on
Commit
4a87714
·
verified ·
1 Parent(s): bee760d

Upload results.json

Browse files
Files changed (1) hide show
  1. results.json +430 -430
results.json CHANGED
@@ -1,175 +1,175 @@
1
  {
2
  "metadata": {
3
- "timestamp": "2025-01-30T14:08:26.760026",
4
  "vidore_benchmark_version": "4.0.3.dev20+g2d72668"
5
  },
6
  "metrics": {
7
- "arxivqa_test_subsampled": {
8
- "ndcg_at_1": 0.83,
9
- "ndcg_at_3": 0.87855,
10
- "ndcg_at_5": 0.88517,
11
- "ndcg_at_10": 0.8947,
12
- "ndcg_at_20": 0.89981,
13
- "ndcg_at_50": 0.90189,
14
- "ndcg_at_100": 0.90285,
15
- "map_at_1": 0.83,
16
- "map_at_3": 0.86633,
17
- "map_at_5": 0.87003,
18
- "map_at_10": 0.87386,
19
- "map_at_20": 0.87529,
20
- "map_at_50": 0.87566,
21
- "map_at_100": 0.87574,
22
- "recall_at_1": 0.83,
23
- "recall_at_3": 0.914,
24
- "recall_at_5": 0.93,
25
- "recall_at_10": 0.96,
26
  "recall_at_20": 0.98,
27
  "recall_at_50": 0.99,
28
  "recall_at_100": 0.996,
29
- "precision_at_1": 0.83,
30
- "precision_at_3": 0.30467,
31
- "precision_at_5": 0.186,
32
- "precision_at_10": 0.096,
33
  "precision_at_20": 0.049,
34
  "precision_at_50": 0.0198,
35
  "precision_at_100": 0.00996,
36
- "mrr_at_1": 0.834,
37
- "mrr_at_3": 0.8696666666666666,
38
- "mrr_at_5": 0.8724666666666667,
39
- "mrr_at_10": 0.8768738095238096,
40
- "mrr_at_20": 0.8780439976689977,
41
- "mrr_at_50": 0.8784954053717398,
42
- "mrr_at_100": 0.8785747821152928,
43
- "naucs_at_1_max": 0.7814883612933191,
44
- "naucs_at_1_std": -0.014564346665118771,
45
- "naucs_at_1_diff1": 0.9431415800777861,
46
- "naucs_at_3_max": 0.7651293075369701,
47
- "naucs_at_3_std": 0.13162008989642363,
48
- "naucs_at_3_diff1": 0.876392417432091,
49
- "naucs_at_5_max": 0.7719887955182062,
50
- "naucs_at_5_std": 0.12731759370415013,
51
- "naucs_at_5_diff1": 0.8762838468720809,
52
- "naucs_at_10_max": 0.8234126984126983,
53
- "naucs_at_10_std": 0.03160597572361713,
54
- "naucs_at_10_diff1": 0.8921568627450972,
55
- "naucs_at_20_max": 0.8225957049486448,
56
- "naucs_at_20_std": -0.02110177404295123,
57
- "naucs_at_20_diff1": 0.9084967320261399,
58
- "naucs_at_50_max": 0.9738562091503188,
59
- "naucs_at_50_std": 0.3997198879551785,
60
- "naucs_at_50_diff1": 0.947712418300658,
61
  "naucs_at_100_max": 1.0,
62
  "naucs_at_100_std": 1.0,
63
- "naucs_at_100_diff1": 0.9346405228758466
64
  },
65
- "docvqa_test_subsampled": {
66
- "ndcg_at_1": 0.5255,
67
- "ndcg_at_3": 0.59582,
68
- "ndcg_at_5": 0.61852,
69
- "ndcg_at_10": 0.6347,
70
- "ndcg_at_20": 0.6492,
71
- "ndcg_at_50": 0.65706,
72
- "ndcg_at_100": 0.66361,
73
- "map_at_1": 0.5255,
74
- "map_at_3": 0.57871,
75
- "map_at_5": 0.59124,
76
- "map_at_10": 0.59815,
77
- "map_at_20": 0.6021,
78
- "map_at_50": 0.60333,
79
- "map_at_100": 0.60394,
80
- "recall_at_1": 0.5255,
81
- "recall_at_3": 0.64523,
82
- "recall_at_5": 0.70067,
83
- "recall_at_10": 0.74945,
84
- "recall_at_20": 0.8071,
85
- "recall_at_50": 0.84701,
86
- "recall_at_100": 0.88692,
87
- "precision_at_1": 0.5255,
88
- "precision_at_3": 0.21508,
89
- "precision_at_5": 0.14013,
90
- "precision_at_10": 0.07494,
91
- "precision_at_20": 0.04035,
92
- "precision_at_50": 0.01694,
93
- "precision_at_100": 0.00887,
94
- "mrr_at_1": 0.5254988913525499,
95
- "mrr_at_3": 0.5798226164079823,
96
- "mrr_at_5": 0.5909090909090909,
97
- "mrr_at_10": 0.5981936085594622,
98
- "mrr_at_20": 0.6018577072277826,
99
- "mrr_at_50": 0.603184631365057,
100
- "mrr_at_100": 0.6036950361798563,
101
- "naucs_at_1_max": 0.3862474572443775,
102
- "naucs_at_1_std": 0.6866980709008784,
103
- "naucs_at_1_diff1": 0.908866111398327,
104
- "naucs_at_3_max": 0.3232612877463118,
105
- "naucs_at_3_std": 0.7754733885431401,
106
- "naucs_at_3_diff1": 0.8488934990862631,
107
- "naucs_at_5_max": 0.26561580627789727,
108
- "naucs_at_5_std": 0.8294969802329596,
109
- "naucs_at_5_diff1": 0.843549438877749,
110
- "naucs_at_10_max": 0.21226897631460165,
111
- "naucs_at_10_std": 0.9004830786766392,
112
- "naucs_at_10_diff1": 0.8313389289681655,
113
- "naucs_at_20_max": 0.1442628756616487,
114
- "naucs_at_20_std": 0.9192991363569017,
115
- "naucs_at_20_diff1": 0.8284091873092273,
116
- "naucs_at_50_max": 0.04661475790032267,
117
- "naucs_at_50_std": 0.9356118889205624,
118
- "naucs_at_50_diff1": 0.8459735188892017,
119
- "naucs_at_100_max": -0.014764226166713695,
120
- "naucs_at_100_std": 0.9484296739649325,
121
- "naucs_at_100_diff1": 0.853153345512516
122
  },
123
- "syntheticDocQA_energy_test": {
124
- "ndcg_at_1": 0.95,
125
- "ndcg_at_3": 0.96131,
126
- "ndcg_at_5": 0.96131,
127
- "ndcg_at_10": 0.96487,
128
- "ndcg_at_20": 0.96757,
129
- "ndcg_at_50": 0.96952,
130
- "ndcg_at_100": 0.96952,
131
- "map_at_1": 0.95,
132
- "map_at_3": 0.95833,
133
- "map_at_5": 0.95833,
134
- "map_at_10": 0.96,
135
- "map_at_20": 0.96083,
136
- "map_at_50": 0.96113,
137
- "map_at_100": 0.96113,
138
- "recall_at_1": 0.95,
139
- "recall_at_3": 0.97,
140
- "recall_at_5": 0.97,
141
  "recall_at_10": 0.98,
142
  "recall_at_20": 0.99,
143
  "recall_at_50": 1.0,
144
  "recall_at_100": 1.0,
145
- "precision_at_1": 0.95,
146
- "precision_at_3": 0.32333,
147
- "precision_at_5": 0.194,
148
  "precision_at_10": 0.098,
149
  "precision_at_20": 0.0495,
150
  "precision_at_50": 0.02,
151
  "precision_at_100": 0.01,
152
- "mrr_at_1": 0.95,
153
- "mrr_at_3": 0.9583333333333333,
154
- "mrr_at_5": 0.9603333333333333,
155
- "mrr_at_10": 0.9603333333333333,
156
- "mrr_at_20": 0.9612424242424242,
157
- "mrr_at_50": 0.9615549242424243,
158
- "mrr_at_100": 0.9615549242424243,
159
- "naucs_at_1_max": 0.5612511671335213,
160
- "naucs_at_1_std": -0.8497665732959815,
161
- "naucs_at_1_diff1": 0.9477124183006508,
162
- "naucs_at_3_max": 0.807812013694371,
163
- "naucs_at_3_std": -0.6484593837535022,
164
- "naucs_at_3_diff1": 0.9564270152505466,
165
- "naucs_at_5_max": 0.807812013694364,
166
- "naucs_at_5_std": -0.6484593837535011,
167
- "naucs_at_5_diff1": 0.9564270152505424,
168
- "naucs_at_10_max": 0.9346405228758136,
169
- "naucs_at_10_std": -0.10270774976657283,
170
- "naucs_at_10_diff1": 0.9346405228758136,
171
- "naucs_at_20_max": 0.8692810457516413,
172
- "naucs_at_20_std": -0.5634920634920767,
173
  "naucs_at_20_diff1": 1.0,
174
  "naucs_at_50_max": null,
175
  "naucs_at_50_std": null,
@@ -178,161 +178,161 @@
178
  "naucs_at_100_std": null,
179
  "naucs_at_100_diff1": null
180
  },
181
- "tatdqa_test": {
182
- "ndcg_at_1": 0.69502,
183
- "ndcg_at_3": 0.78919,
184
- "ndcg_at_5": 0.80833,
185
- "ndcg_at_10": 0.82129,
186
- "ndcg_at_20": 0.82519,
187
- "ndcg_at_50": 0.82963,
188
- "ndcg_at_100": 0.83159,
189
- "map_at_1": 0.69502,
190
- "map_at_3": 0.76701,
191
- "map_at_5": 0.77755,
192
- "map_at_10": 0.78302,
193
- "map_at_20": 0.7842,
194
- "map_at_50": 0.78491,
195
- "map_at_100": 0.78508,
196
- "recall_at_1": 0.69502,
197
- "recall_at_3": 0.85298,
198
- "recall_at_5": 0.89976,
199
- "recall_at_10": 0.93925,
200
- "recall_at_20": 0.95383,
201
- "recall_at_50": 0.97631,
202
- "recall_at_100": 0.98846,
203
- "precision_at_1": 0.69502,
204
- "precision_at_3": 0.28433,
205
- "precision_at_5": 0.17995,
206
- "precision_at_10": 0.09392,
207
- "precision_at_20": 0.04769,
208
- "precision_at_50": 0.01953,
209
  "precision_at_100": 0.00988,
210
- "mrr_at_1": 0.6950182260024301,
211
- "mrr_at_3": 0.7656946132037261,
212
- "mrr_at_5": 0.7770554880518429,
213
- "mrr_at_10": 0.7827320681208895,
214
- "mrr_at_20": 0.783837406855329,
215
- "mrr_at_50": 0.784510459247072,
216
- "mrr_at_100": 0.7846961562032659,
217
- "naucs_at_1_max": 0.2747145592665206,
218
- "naucs_at_1_std": -0.09557716491043182,
219
- "naucs_at_1_diff1": 0.8084384046681937,
220
- "naucs_at_3_max": 0.3330280541363154,
221
- "naucs_at_3_std": -0.0389562599865292,
222
- "naucs_at_3_diff1": 0.7240601862039753,
223
- "naucs_at_5_max": 0.352919194006459,
224
- "naucs_at_5_std": 0.0514244344972758,
225
- "naucs_at_5_diff1": 0.6583624352004738,
226
- "naucs_at_10_max": 0.43229730811413347,
227
- "naucs_at_10_std": 0.2815741957749473,
228
- "naucs_at_10_diff1": 0.6007108794001051,
229
- "naucs_at_20_max": 0.4241317350379727,
230
- "naucs_at_20_std": 0.2971486230084223,
231
- "naucs_at_20_diff1": 0.5874614197126973,
232
- "naucs_at_50_max": 0.5140385840003181,
233
- "naucs_at_50_std": 0.518664544400799,
234
- "naucs_at_50_diff1": 0.5976629425860989,
235
- "naucs_at_100_max": 0.4955447990957944,
236
- "naucs_at_100_std": 0.5556756488737464,
237
- "naucs_at_100_diff1": 0.6696787538618733
238
  },
239
- "infovqa_test_subsampled": {
240
- "ndcg_at_1": 0.88664,
241
- "ndcg_at_3": 0.91826,
242
- "ndcg_at_5": 0.92479,
243
- "ndcg_at_10": 0.93028,
244
- "ndcg_at_20": 0.93185,
245
- "ndcg_at_50": 0.93365,
246
- "ndcg_at_100": 0.93435,
247
- "map_at_1": 0.88664,
248
- "map_at_3": 0.91093,
249
- "map_at_5": 0.91447,
250
- "map_at_10": 0.91689,
251
- "map_at_20": 0.91735,
252
- "map_at_50": 0.91756,
253
- "map_at_100": 0.91763,
254
- "recall_at_1": 0.88664,
255
  "recall_at_3": 0.93927,
256
  "recall_at_5": 0.95547,
257
- "recall_at_10": 0.97166,
258
- "recall_at_20": 0.97773,
259
  "recall_at_50": 0.98785,
260
- "recall_at_100": 0.9919,
261
- "precision_at_1": 0.88664,
262
  "precision_at_3": 0.31309,
263
  "precision_at_5": 0.19109,
264
- "precision_at_10": 0.09717,
265
- "precision_at_20": 0.04889,
266
  "precision_at_50": 0.01976,
267
- "precision_at_100": 0.00992,
268
  "mrr_at_1": 0.8846153846153846,
269
  "mrr_at_3": 0.9092442645074225,
270
- "mrr_at_5": 0.9127867746288799,
271
- "mrr_at_10": 0.9150801683696421,
272
- "mrr_at_20": 0.9155341004025214,
273
- "mrr_at_50": 0.9157652608527469,
274
- "mrr_at_100": 0.9158356239262059,
275
- "naucs_at_1_max": 0.6077965938527005,
276
- "naucs_at_1_std": -0.034859963613718514,
277
- "naucs_at_1_diff1": 0.9382491207886892,
278
- "naucs_at_3_max": 0.7083430308645834,
279
- "naucs_at_3_std": 0.09173878193665044,
280
- "naucs_at_3_diff1": 0.941058293792039,
281
- "naucs_at_5_max": 0.828261285914772,
282
- "naucs_at_5_std": 0.25360809635439513,
283
- "naucs_at_5_diff1": 0.9374342352707031,
284
- "naucs_at_10_max": 0.8969865162710334,
285
- "naucs_at_10_std": 0.5367916133086486,
286
- "naucs_at_10_diff1": 0.9521757785748344,
287
- "naucs_at_20_max": 0.927259949634811,
288
- "naucs_at_20_std": 0.6312197158498455,
289
- "naucs_at_20_diff1": 0.9643814216187027,
290
- "naucs_at_50_max": 0.9129323639568211,
291
- "naucs_at_50_std": 0.5625809313845725,
292
- "naucs_at_50_diff1": 0.9564661819783937,
293
- "naucs_at_100_max": 0.9346992729676393,
294
- "naucs_at_100_std": 0.7480140525249407,
295
- "naucs_at_100_diff1": 0.9673496364838197
296
  },
297
- "syntheticDocQA_healthcare_industry_test": {
298
- "ndcg_at_1": 0.97,
299
- "ndcg_at_3": 0.98762,
300
- "ndcg_at_5": 0.98762,
301
- "ndcg_at_10": 0.98762,
302
- "ndcg_at_20": 0.98762,
303
- "ndcg_at_50": 0.98762,
304
- "ndcg_at_100": 0.98762,
305
- "map_at_1": 0.97,
306
- "map_at_3": 0.98333,
307
- "map_at_5": 0.98333,
308
- "map_at_10": 0.98333,
309
- "map_at_20": 0.98333,
310
- "map_at_50": 0.98333,
311
- "map_at_100": 0.98333,
312
- "recall_at_1": 0.97,
313
  "recall_at_3": 1.0,
314
  "recall_at_5": 1.0,
315
  "recall_at_10": 1.0,
316
  "recall_at_20": 1.0,
317
  "recall_at_50": 1.0,
318
  "recall_at_100": 1.0,
319
- "precision_at_1": 0.97,
320
  "precision_at_3": 0.33333,
321
  "precision_at_5": 0.2,
322
  "precision_at_10": 0.1,
323
  "precision_at_20": 0.05,
324
  "precision_at_50": 0.02,
325
  "precision_at_100": 0.01,
326
- "mrr_at_1": 0.97,
327
- "mrr_at_3": 0.9833333333333333,
328
- "mrr_at_5": 0.9833333333333333,
329
- "mrr_at_10": 0.9833333333333333,
330
- "mrr_at_20": 0.9833333333333333,
331
- "mrr_at_50": 0.9833333333333333,
332
- "mrr_at_100": 0.9833333333333333,
333
- "naucs_at_1_max": 0.8202614379084989,
334
- "naucs_at_1_std": -0.2198879551820713,
335
- "naucs_at_1_diff1": 0.9564270152505465,
336
  "naucs_at_3_max": 1.0,
337
  "naucs_at_3_std": 1.0,
338
  "naucs_at_3_diff1": 1.0,
@@ -352,56 +352,56 @@
352
  "naucs_at_100_std": null,
353
  "naucs_at_100_diff1": null
354
  },
355
- "tabfquad_test_subsampled": {
356
- "ndcg_at_1": 0.83571,
357
- "ndcg_at_3": 0.88699,
358
- "ndcg_at_5": 0.89851,
359
- "ndcg_at_10": 0.90549,
360
- "ndcg_at_20": 0.91018,
361
- "ndcg_at_50": 0.9125,
362
- "ndcg_at_100": 0.9125,
363
- "map_at_1": 0.83571,
364
- "map_at_3": 0.875,
365
- "map_at_5": 0.88125,
366
- "map_at_10": 0.88416,
367
- "map_at_20": 0.88553,
368
- "map_at_50": 0.88598,
369
- "map_at_100": 0.88598,
370
- "recall_at_1": 0.83571,
371
- "recall_at_3": 0.92143,
372
- "recall_at_5": 0.95,
373
- "recall_at_10": 0.97143,
374
  "recall_at_20": 0.98929,
375
  "recall_at_50": 1.0,
376
  "recall_at_100": 1.0,
377
- "precision_at_1": 0.83571,
378
- "precision_at_3": 0.30714,
379
- "precision_at_5": 0.19,
380
- "precision_at_10": 0.09714,
381
  "precision_at_20": 0.04946,
382
  "precision_at_50": 0.02,
383
  "precision_at_100": 0.01,
384
- "mrr_at_1": 0.8392857142857143,
385
- "mrr_at_3": 0.8767857142857143,
386
- "mrr_at_5": 0.8830357142857143,
387
- "mrr_at_10": 0.8858517573696145,
388
- "mrr_at_20": 0.8872690305726021,
389
- "mrr_at_50": 0.8877252707609851,
390
- "mrr_at_100": 0.8877252707609851,
391
- "naucs_at_1_max": 0.4880524536858767,
392
- "naucs_at_1_std": 0.1966050571171859,
393
- "naucs_at_1_diff1": 0.9065802421597565,
394
- "naucs_at_3_max": 0.5683515830574649,
395
- "naucs_at_3_std": 0.33952975129445734,
396
- "naucs_at_3_diff1": 0.8935786435786452,
397
- "naucs_at_5_max": 0.7507669734560504,
398
- "naucs_at_5_std": 0.5118714152327615,
399
- "naucs_at_5_diff1": 0.9109643857543043,
400
- "naucs_at_10_max": 0.8150093370681609,
401
- "naucs_at_10_std": 0.5556722689075682,
402
- "naucs_at_10_diff1": 0.934640522875815,
403
- "naucs_at_20_max": 0.9128540305011011,
404
- "naucs_at_20_std": 0.807812013694365,
405
  "naucs_at_20_diff1": 0.9564270152505505,
406
  "naucs_at_50_max": 1.0,
407
  "naucs_at_50_std": 1.0,
@@ -410,50 +410,50 @@
410
  "naucs_at_100_std": 1.0,
411
  "naucs_at_100_diff1": 1.0
412
  },
413
- "syntheticDocQA_government_reports_test": {
414
- "ndcg_at_1": 0.91,
415
- "ndcg_at_3": 0.94393,
416
- "ndcg_at_5": 0.95254,
417
- "ndcg_at_10": 0.9561,
418
- "ndcg_at_20": 0.9561,
419
- "ndcg_at_50": 0.9561,
420
- "ndcg_at_100": 0.9561,
421
- "map_at_1": 0.91,
422
- "map_at_3": 0.935,
423
- "map_at_5": 0.94,
424
- "map_at_10": 0.94167,
425
- "map_at_20": 0.94167,
426
- "map_at_50": 0.94167,
427
- "map_at_100": 0.94167,
428
- "recall_at_1": 0.91,
429
- "recall_at_3": 0.97,
430
  "recall_at_5": 0.99,
431
  "recall_at_10": 1.0,
432
  "recall_at_20": 1.0,
433
  "recall_at_50": 1.0,
434
  "recall_at_100": 1.0,
435
- "precision_at_1": 0.91,
436
- "precision_at_3": 0.32333,
437
  "precision_at_5": 0.198,
438
  "precision_at_10": 0.1,
439
  "precision_at_20": 0.05,
440
  "precision_at_50": 0.02,
441
  "precision_at_100": 0.01,
442
- "mrr_at_1": 0.91,
443
- "mrr_at_3": 0.9383333333333332,
444
- "mrr_at_5": 0.9428333333333333,
445
- "mrr_at_10": 0.9428333333333333,
446
- "mrr_at_20": 0.9428333333333333,
447
- "mrr_at_50": 0.9428333333333333,
448
- "mrr_at_100": 0.9428333333333333,
449
- "naucs_at_1_max": 0.8246706089843343,
450
- "naucs_at_1_std": 0.4518103537711367,
451
- "naucs_at_1_diff1": 0.9564270152505436,
452
- "naucs_at_3_max": 0.7027699968876487,
453
- "naucs_at_3_std": 0.460939931528168,
454
- "naucs_at_3_diff1": 0.9564270152505466,
455
- "naucs_at_5_max": 0.5541549953314738,
456
- "naucs_at_5_std": -0.1713352007469681,
457
  "naucs_at_5_diff1": 1.0,
458
  "naucs_at_10_max": 1.0,
459
  "naucs_at_10_std": 1.0,
@@ -468,57 +468,57 @@
468
  "naucs_at_100_std": null,
469
  "naucs_at_100_diff1": null
470
  },
471
- "shiftproject_test": {
472
- "ndcg_at_1": 0.73,
473
- "ndcg_at_3": 0.85095,
474
- "ndcg_at_5": 0.85526,
475
- "ndcg_at_10": 0.86174,
476
- "ndcg_at_20": 0.86703,
477
- "ndcg_at_50": 0.87134,
478
- "ndcg_at_100": 0.87134,
479
- "map_at_1": 0.73,
480
- "map_at_3": 0.82333,
481
- "map_at_5": 0.82583,
482
- "map_at_10": 0.82851,
483
- "map_at_20": 0.83009,
484
- "map_at_50": 0.83092,
485
- "map_at_100": 0.83092,
486
- "recall_at_1": 0.73,
487
- "recall_at_3": 0.93,
488
  "recall_at_5": 0.94,
489
- "recall_at_10": 0.96,
490
- "recall_at_20": 0.98,
491
  "recall_at_50": 1.0,
492
  "recall_at_100": 1.0,
493
- "precision_at_1": 0.73,
494
- "precision_at_3": 0.31,
495
  "precision_at_5": 0.188,
496
- "precision_at_10": 0.096,
497
- "precision_at_20": 0.049,
498
  "precision_at_50": 0.02,
499
  "precision_at_100": 0.01,
500
- "mrr_at_1": 0.75,
501
- "mrr_at_3": 0.8316666666666667,
502
- "mrr_at_5": 0.8341666666666667,
503
- "mrr_at_10": 0.8380238095238096,
504
- "mrr_at_20": 0.8386904761904762,
505
- "mrr_at_50": 0.8395600414078674,
506
- "mrr_at_100": 0.8395600414078674,
507
- "naucs_at_1_max": -0.0662092826672433,
508
- "naucs_at_1_std": -0.3356071477717276,
509
- "naucs_at_1_diff1": 0.7686999941024986,
510
- "naucs_at_3_max": -0.05822328931572497,
511
- "naucs_at_3_std": -0.2665732959850601,
512
- "naucs_at_3_diff1": 0.7789782579698524,
513
- "naucs_at_5_max": 0.22206660441954526,
514
- "naucs_at_5_std": -0.33146591970121353,
515
- "naucs_at_5_diff1": 0.8883442265795238,
516
- "naucs_at_10_max": 0.05240429505135564,
517
- "naucs_at_10_std": -0.4458450046685252,
518
- "naucs_at_10_diff1": 0.8651960784313721,
519
- "naucs_at_20_max": -0.5088702147525547,
520
- "naucs_at_20_std": -0.5929038281979383,
521
- "naucs_at_20_diff1": 0.9346405228758136,
522
  "naucs_at_50_max": null,
523
  "naucs_at_50_std": null,
524
  "naucs_at_50_diff1": null,
@@ -526,44 +526,44 @@
526
  "naucs_at_100_std": null,
527
  "naucs_at_100_diff1": null
528
  },
529
- "syntheticDocQA_artificial_intelligence_test": {
530
- "ndcg_at_1": 0.98,
531
- "ndcg_at_3": 0.99262,
532
- "ndcg_at_5": 0.99262,
533
- "ndcg_at_10": 0.99262,
534
- "ndcg_at_20": 0.99262,
535
- "ndcg_at_50": 0.99262,
536
- "ndcg_at_100": 0.99262,
537
- "map_at_1": 0.98,
538
- "map_at_3": 0.99,
539
- "map_at_5": 0.99,
540
- "map_at_10": 0.99,
541
- "map_at_20": 0.99,
542
- "map_at_50": 0.99,
543
- "map_at_100": 0.99,
544
- "recall_at_1": 0.98,
545
  "recall_at_3": 1.0,
546
  "recall_at_5": 1.0,
547
  "recall_at_10": 1.0,
548
  "recall_at_20": 1.0,
549
  "recall_at_50": 1.0,
550
  "recall_at_100": 1.0,
551
- "precision_at_1": 0.98,
552
  "precision_at_3": 0.33333,
553
  "precision_at_5": 0.2,
554
  "precision_at_10": 0.1,
555
  "precision_at_20": 0.05,
556
  "precision_at_50": 0.02,
557
  "precision_at_100": 0.01,
558
- "mrr_at_1": 0.98,
559
- "mrr_at_3": 0.99,
560
- "mrr_at_5": 0.99,
561
- "mrr_at_10": 0.99,
562
- "mrr_at_20": 0.99,
563
- "mrr_at_50": 0.99,
564
- "mrr_at_100": 0.99,
565
- "naucs_at_1_max": 0.3489729225023353,
566
- "naucs_at_1_std": -0.2987861811391249,
567
  "naucs_at_1_diff1": 1.0,
568
  "naucs_at_3_max": 1.0,
569
  "naucs_at_3_std": 1.0,
 
1
  {
2
  "metadata": {
3
+ "timestamp": "2025-01-31T14:00:16.349261",
4
  "vidore_benchmark_version": "4.0.3.dev20+g2d72668"
5
  },
6
  "metrics": {
7
+ "data_dir/eval_vidore/arxivqa_test_subsampled": {
8
+ "ndcg_at_1": 0.856,
9
+ "ndcg_at_3": 0.88745,
10
+ "ndcg_at_5": 0.89227,
11
+ "ndcg_at_10": 0.90332,
12
+ "ndcg_at_20": 0.90941,
13
+ "ndcg_at_50": 0.91153,
14
+ "ndcg_at_100": 0.91252,
15
+ "map_at_1": 0.856,
16
+ "map_at_3": 0.87967,
17
+ "map_at_5": 0.88227,
18
+ "map_at_10": 0.88687,
19
+ "map_at_20": 0.88854,
20
+ "map_at_50": 0.88895,
21
+ "map_at_100": 0.88904,
22
+ "recall_at_1": 0.856,
23
+ "recall_at_3": 0.91,
24
+ "recall_at_5": 0.922,
25
+ "recall_at_10": 0.956,
26
  "recall_at_20": 0.98,
27
  "recall_at_50": 0.99,
28
  "recall_at_100": 0.996,
29
+ "precision_at_1": 0.856,
30
+ "precision_at_3": 0.30333,
31
+ "precision_at_5": 0.1844,
32
+ "precision_at_10": 0.0956,
33
  "precision_at_20": 0.049,
34
  "precision_at_50": 0.0198,
35
  "precision_at_100": 0.00996,
36
+ "mrr_at_1": 0.854,
37
+ "mrr_at_3": 0.8773333333333334,
38
+ "mrr_at_5": 0.8809333333333332,
39
+ "mrr_at_10": 0.8855674603174604,
40
+ "mrr_at_20": 0.8873665921453847,
41
+ "mrr_at_50": 0.8876643128304966,
42
+ "mrr_at_100": 0.887751797810069,
43
+ "naucs_at_1_max": 0.8146679814134421,
44
+ "naucs_at_1_std": 0.07396801383185674,
45
+ "naucs_at_1_diff1": 0.9423560082126644,
46
+ "naucs_at_3_max": 0.8066396929142029,
47
+ "naucs_at_3_std": 0.07559912854030806,
48
+ "naucs_at_3_diff1": 0.9181969083929873,
49
+ "naucs_at_5_max": 0.8034068328185975,
50
+ "naucs_at_5_std": 0.02020637314754663,
51
+ "naucs_at_5_diff1": 0.9160860925566815,
52
+ "naucs_at_10_max": 0.825566590272474,
53
+ "naucs_at_10_std": -0.15187590187589842,
54
+ "naucs_at_10_diff1": 0.9390968508615584,
55
+ "naucs_at_20_max": 0.8921568627450932,
56
+ "naucs_at_20_std": 0.0641923436041116,
57
+ "naucs_at_20_diff1": 0.9738562091503187,
58
+ "naucs_at_50_max": 0.947712418300658,
59
+ "naucs_at_50_std": 0.37030812324930756,
60
+ "naucs_at_50_diff1": 0.9738562091503188,
61
  "naucs_at_100_max": 1.0,
62
  "naucs_at_100_std": 1.0,
63
+ "naucs_at_100_diff1": 1.0
64
  },
65
+ "data_dir/eval_vidore/docvqa_test_subsampled": {
66
+ "ndcg_at_1": 0.55876,
67
+ "ndcg_at_3": 0.61485,
68
+ "ndcg_at_5": 0.63222,
69
+ "ndcg_at_10": 0.65493,
70
+ "ndcg_at_20": 0.66746,
71
+ "ndcg_at_50": 0.67657,
72
+ "ndcg_at_100": 0.68329,
73
+ "map_at_1": 0.55876,
74
+ "map_at_3": 0.60126,
75
+ "map_at_5": 0.6109,
76
+ "map_at_10": 0.62013,
77
+ "map_at_20": 0.62368,
78
+ "map_at_50": 0.62509,
79
+ "map_at_100": 0.62564,
80
+ "recall_at_1": 0.55876,
81
+ "recall_at_3": 0.6541,
82
+ "recall_at_5": 0.69623,
83
+ "recall_at_10": 0.76718,
84
+ "recall_at_20": 0.81596,
85
+ "recall_at_50": 0.86253,
86
+ "recall_at_100": 0.90466,
87
+ "precision_at_1": 0.55876,
88
+ "precision_at_3": 0.21803,
89
+ "precision_at_5": 0.13925,
90
+ "precision_at_10": 0.07672,
91
+ "precision_at_20": 0.0408,
92
+ "precision_at_50": 0.01725,
93
+ "precision_at_100": 0.00905,
94
+ "mrr_at_1": 0.5543237250554324,
95
+ "mrr_at_3": 0.599039172209904,
96
+ "mrr_at_5": 0.609349593495935,
97
+ "mrr_at_10": 0.6183401963889769,
98
+ "mrr_at_20": 0.6209344044461912,
99
+ "mrr_at_50": 0.6227106859869997,
100
+ "mrr_at_100": 0.6232889837626537,
101
+ "naucs_at_1_max": 0.29569387287927856,
102
+ "naucs_at_1_std": 0.6842470452244832,
103
+ "naucs_at_1_diff1": 0.9092758189268736,
104
+ "naucs_at_3_max": 0.21005633503267365,
105
+ "naucs_at_3_std": 0.805068083051039,
106
+ "naucs_at_3_diff1": 0.8754563618133548,
107
+ "naucs_at_5_max": 0.19259019824979384,
108
+ "naucs_at_5_std": 0.8409490067520664,
109
+ "naucs_at_5_diff1": 0.8726101181684977,
110
+ "naucs_at_10_max": 0.048646855770537796,
111
+ "naucs_at_10_std": 0.8833213018935924,
112
+ "naucs_at_10_diff1": 0.8643899050698751,
113
+ "naucs_at_20_max": -0.06225799483664109,
114
+ "naucs_at_20_std": 0.9127300541361831,
115
+ "naucs_at_20_diff1": 0.8597071895030789,
116
+ "naucs_at_50_max": -0.24442668101014947,
117
+ "naucs_at_50_std": 0.930294669379938,
118
+ "naucs_at_50_diff1": 0.8714057920484658,
119
+ "naucs_at_100_max": -0.3617257884810223,
120
+ "naucs_at_100_std": 0.9386292283530702,
121
+ "naucs_at_100_diff1": 0.8597379463433718
122
  },
123
+ "data_dir/eval_vidore/syntheticDocQA_energy_test": {
124
+ "ndcg_at_1": 0.93,
125
+ "ndcg_at_3": 0.95893,
126
+ "ndcg_at_5": 0.95893,
127
+ "ndcg_at_10": 0.95893,
128
+ "ndcg_at_20": 0.96163,
129
+ "ndcg_at_50": 0.96365,
130
+ "ndcg_at_100": 0.96365,
131
+ "map_at_1": 0.93,
132
+ "map_at_3": 0.95167,
133
+ "map_at_5": 0.95167,
134
+ "map_at_10": 0.95167,
135
+ "map_at_20": 0.9525,
136
+ "map_at_50": 0.95283,
137
+ "map_at_100": 0.95283,
138
+ "recall_at_1": 0.93,
139
+ "recall_at_3": 0.98,
140
+ "recall_at_5": 0.98,
141
  "recall_at_10": 0.98,
142
  "recall_at_20": 0.99,
143
  "recall_at_50": 1.0,
144
  "recall_at_100": 1.0,
145
+ "precision_at_1": 0.93,
146
+ "precision_at_3": 0.32667,
147
+ "precision_at_5": 0.196,
148
  "precision_at_10": 0.098,
149
  "precision_at_20": 0.0495,
150
  "precision_at_50": 0.02,
151
  "precision_at_100": 0.01,
152
+ "mrr_at_1": 0.93,
153
+ "mrr_at_3": 0.9516666666666667,
154
+ "mrr_at_5": 0.9516666666666667,
155
+ "mrr_at_10": 0.9516666666666667,
156
+ "mrr_at_20": 0.9525757575757575,
157
+ "mrr_at_50": 0.9529205851619644,
158
+ "mrr_at_100": 0.9529205851619644,
159
+ "naucs_at_1_max": 0.39482459650526885,
160
+ "naucs_at_1_std": -0.31419234360410914,
161
+ "naucs_at_1_diff1": 1.0,
162
+ "naucs_at_3_max": 0.6790382819794457,
163
+ "naucs_at_3_std": -0.9556489262371661,
164
+ "naucs_at_3_diff1": 1.0,
165
+ "naucs_at_5_max": 0.6790382819794609,
166
+ "naucs_at_5_std": -0.9556489262371534,
167
+ "naucs_at_5_diff1": 1.0,
168
+ "naucs_at_10_max": 0.6790382819794609,
169
+ "naucs_at_10_std": -0.9556489262371534,
170
+ "naucs_at_10_diff1": 1.0,
171
+ "naucs_at_20_max": 1.0,
172
+ "naucs_at_20_std": -0.1713352007469681,
173
  "naucs_at_20_diff1": 1.0,
174
  "naucs_at_50_max": null,
175
  "naucs_at_50_std": null,
 
178
  "naucs_at_100_std": null,
179
  "naucs_at_100_diff1": null
180
  },
181
+ "data_dir/eval_vidore/tatdqa_test": {
182
+ "ndcg_at_1": 0.69927,
183
+ "ndcg_at_3": 0.79372,
184
+ "ndcg_at_5": 0.81105,
185
+ "ndcg_at_10": 0.82459,
186
+ "ndcg_at_20": 0.82878,
187
+ "ndcg_at_50": 0.83303,
188
+ "ndcg_at_100": 0.83442,
189
+ "map_at_1": 0.69927,
190
+ "map_at_3": 0.77106,
191
+ "map_at_5": 0.78072,
192
+ "map_at_10": 0.78653,
193
+ "map_at_20": 0.7877,
194
+ "map_at_50": 0.78839,
195
+ "map_at_100": 0.78852,
196
+ "recall_at_1": 0.69927,
197
+ "recall_at_3": 0.85905,
198
+ "recall_at_5": 0.90097,
199
+ "recall_at_10": 0.94168,
200
+ "recall_at_20": 0.95808,
201
+ "recall_at_50": 0.97934,
202
+ "recall_at_100": 0.98785,
203
+ "precision_at_1": 0.69927,
204
+ "precision_at_3": 0.28635,
205
+ "precision_at_5": 0.18019,
206
+ "precision_at_10": 0.09417,
207
+ "precision_at_20": 0.0479,
208
+ "precision_at_50": 0.01959,
209
  "precision_at_100": 0.00988,
210
+ "mrr_at_1": 0.7004860267314702,
211
+ "mrr_at_3": 0.7722762251923856,
212
+ "mrr_at_5": 0.7813284730660186,
213
+ "mrr_at_10": 0.7874199598835079,
214
+ "mrr_at_20": 0.7885261582425689,
215
+ "mrr_at_50": 0.7892193634018624,
216
+ "mrr_at_100": 0.7893291443742743,
217
+ "naucs_at_1_max": 0.2682416909968819,
218
+ "naucs_at_1_std": -0.1346098281401034,
219
+ "naucs_at_1_diff1": 0.8296403667835969,
220
+ "naucs_at_3_max": 0.36050493227494845,
221
+ "naucs_at_3_std": -0.01834048113954263,
222
+ "naucs_at_3_diff1": 0.7445505884597561,
223
+ "naucs_at_5_max": 0.36523716935916267,
224
+ "naucs_at_5_std": 0.06489405058952001,
225
+ "naucs_at_5_diff1": 0.7142046612217674,
226
+ "naucs_at_10_max": 0.4293315802144752,
227
+ "naucs_at_10_std": 0.2564808038730297,
228
+ "naucs_at_10_diff1": 0.6873136665710184,
229
+ "naucs_at_20_max": 0.4688188620078513,
230
+ "naucs_at_20_std": 0.26917226500908054,
231
+ "naucs_at_20_diff1": 0.6720615167289586,
232
+ "naucs_at_50_max": 0.5671944664000176,
233
+ "naucs_at_50_std": 0.4964253483275758,
234
+ "naucs_at_50_diff1": 0.722603370462453,
235
+ "naucs_at_100_max": 0.6585255212623138,
236
+ "naucs_at_100_std": 0.5978336814194427,
237
+ "naucs_at_100_diff1": 0.8138609714332596
238
  },
239
+ "data_dir/eval_vidore/infovqa_test_subsampled": {
240
+ "ndcg_at_1": 0.88462,
241
+ "ndcg_at_3": 0.91698,
242
+ "ndcg_at_5": 0.92378,
243
+ "ndcg_at_10": 0.92838,
244
+ "ndcg_at_20": 0.93146,
245
+ "ndcg_at_50": 0.93263,
246
+ "ndcg_at_100": 0.93362,
247
+ "map_at_1": 0.88462,
248
+ "map_at_3": 0.90924,
249
+ "map_at_5": 0.91309,
250
+ "map_at_10": 0.91501,
251
+ "map_at_20": 0.91585,
252
+ "map_at_50": 0.91603,
253
+ "map_at_100": 0.91612,
254
+ "recall_at_1": 0.88462,
255
  "recall_at_3": 0.93927,
256
  "recall_at_5": 0.95547,
257
+ "recall_at_10": 0.96964,
258
+ "recall_at_20": 0.98178,
259
  "recall_at_50": 0.98785,
260
+ "recall_at_100": 0.99393,
261
+ "precision_at_1": 0.88462,
262
  "precision_at_3": 0.31309,
263
  "precision_at_5": 0.19109,
264
+ "precision_at_10": 0.09696,
265
+ "precision_at_20": 0.04909,
266
  "precision_at_50": 0.01976,
267
+ "precision_at_100": 0.00994,
268
  "mrr_at_1": 0.8846153846153846,
269
  "mrr_at_3": 0.9092442645074225,
270
+ "mrr_at_5": 0.9129892037786774,
271
+ "mrr_at_10": 0.9147717049032839,
272
+ "mrr_at_20": 0.9154333931688503,
273
+ "mrr_at_50": 0.9156112743493511,
274
+ "mrr_at_100": 0.915698522470477,
275
+ "naucs_at_1_max": 0.6002033777762705,
276
+ "naucs_at_1_std": -0.07102965572007143,
277
+ "naucs_at_1_diff1": 0.9544715151522033,
278
+ "naucs_at_3_max": 0.8025453360230541,
279
+ "naucs_at_3_std": 0.19885112605813188,
280
+ "naucs_at_3_diff1": 0.960819563780572,
281
+ "naucs_at_5_max": 0.9121856227472513,
282
+ "naucs_at_5_std": 0.3583912666662475,
283
+ "naucs_at_5_diff1": 0.9584449918884831,
284
+ "naucs_at_10_max": 0.9368484108193146,
285
+ "naucs_at_10_std": 0.44097650713388975,
286
+ "naucs_at_10_diff1": 0.9651729455827266,
287
+ "naucs_at_20_max": 0.9709774546522766,
288
+ "naucs_at_20_std": 0.6007213904253922,
289
+ "naucs_at_20_diff1": 0.9854887273261383,
290
+ "naucs_at_50_max": 0.9782330909892136,
291
+ "naucs_at_50_std": 0.7577252323561762,
292
+ "naucs_at_50_diff1": 0.9782330909892136,
293
+ "naucs_at_100_max": 1.0,
294
+ "naucs_at_100_std": 0.7075525547215259,
295
+ "naucs_at_100_diff1": 0.9564661819784259
296
  },
297
+ "data_dir/eval_vidore/syntheticDocQA_healthcare_industry_test": {
298
+ "ndcg_at_1": 0.95,
299
+ "ndcg_at_3": 0.97893,
300
+ "ndcg_at_5": 0.97893,
301
+ "ndcg_at_10": 0.97893,
302
+ "ndcg_at_20": 0.97893,
303
+ "ndcg_at_50": 0.97893,
304
+ "ndcg_at_100": 0.97893,
305
+ "map_at_1": 0.95,
306
+ "map_at_3": 0.97167,
307
+ "map_at_5": 0.97167,
308
+ "map_at_10": 0.97167,
309
+ "map_at_20": 0.97167,
310
+ "map_at_50": 0.97167,
311
+ "map_at_100": 0.97167,
312
+ "recall_at_1": 0.95,
313
  "recall_at_3": 1.0,
314
  "recall_at_5": 1.0,
315
  "recall_at_10": 1.0,
316
  "recall_at_20": 1.0,
317
  "recall_at_50": 1.0,
318
  "recall_at_100": 1.0,
319
+ "precision_at_1": 0.95,
320
  "precision_at_3": 0.33333,
321
  "precision_at_5": 0.2,
322
  "precision_at_10": 0.1,
323
  "precision_at_20": 0.05,
324
  "precision_at_50": 0.02,
325
  "precision_at_100": 0.01,
326
+ "mrr_at_1": 0.96,
327
+ "mrr_at_3": 0.9783333333333333,
328
+ "mrr_at_5": 0.9783333333333333,
329
+ "mrr_at_10": 0.9783333333333333,
330
+ "mrr_at_20": 0.9783333333333333,
331
+ "mrr_at_50": 0.9783333333333333,
332
+ "mrr_at_100": 0.9783333333333333,
333
+ "naucs_at_1_max": 0.7605042016806716,
334
+ "naucs_at_1_std": -0.35732959850606716,
335
+ "naucs_at_1_diff1": 0.9738562091503253,
336
  "naucs_at_3_max": 1.0,
337
  "naucs_at_3_std": 1.0,
338
  "naucs_at_3_diff1": 1.0,
 
352
  "naucs_at_100_std": null,
353
  "naucs_at_100_diff1": null
354
  },
355
+ "data_dir/eval_vidore/tabfquad_test_subsampled": {
356
+ "ndcg_at_1": 0.85714,
357
+ "ndcg_at_3": 0.90523,
358
+ "ndcg_at_5": 0.91122,
359
+ "ndcg_at_10": 0.91807,
360
+ "ndcg_at_20": 0.92163,
361
+ "ndcg_at_50": 0.92385,
362
+ "ndcg_at_100": 0.92385,
363
+ "map_at_1": 0.85714,
364
+ "map_at_3": 0.89345,
365
+ "map_at_5": 0.89685,
366
+ "map_at_10": 0.89962,
367
+ "map_at_20": 0.90057,
368
+ "map_at_50": 0.90097,
369
+ "map_at_100": 0.90097,
370
+ "recall_at_1": 0.85714,
371
+ "recall_at_3": 0.93929,
372
+ "recall_at_5": 0.95357,
373
+ "recall_at_10": 0.975,
374
  "recall_at_20": 0.98929,
375
  "recall_at_50": 1.0,
376
  "recall_at_100": 1.0,
377
+ "precision_at_1": 0.85714,
378
+ "precision_at_3": 0.3131,
379
+ "precision_at_5": 0.19071,
380
+ "precision_at_10": 0.0975,
381
  "precision_at_20": 0.04946,
382
  "precision_at_50": 0.02,
383
  "precision_at_100": 0.01,
384
+ "mrr_at_1": 0.8571428571428571,
385
+ "mrr_at_3": 0.8928571428571429,
386
+ "mrr_at_5": 0.8955357142857143,
387
+ "mrr_at_10": 0.8989866780045351,
388
+ "mrr_at_20": 0.8999608080411652,
389
+ "mrr_at_50": 0.9003545809349381,
390
+ "mrr_at_100": 0.9003545809349381,
391
+ "naucs_at_1_max": 0.44880952380952394,
392
+ "naucs_at_1_std": 0.06635082604470473,
393
+ "naucs_at_1_diff1": 0.9110301263362479,
394
+ "naucs_at_3_max": 0.7531443950129105,
395
+ "naucs_at_3_std": 0.4460647003899606,
396
+ "naucs_at_3_diff1": 0.9288735101883916,
397
+ "naucs_at_5_max": 0.8075127486892179,
398
+ "naucs_at_5_std": 0.529124470300943,
399
+ "naucs_at_5_diff1": 0.917043740573152,
400
+ "naucs_at_10_max": 0.7902494331065706,
401
+ "naucs_at_10_std": 0.6329198346005056,
402
+ "naucs_at_10_diff1": 0.9626517273576021,
403
+ "naucs_at_20_max": 0.9564270152505505,
404
+ "naucs_at_20_std": 0.8638344226579515,
405
  "naucs_at_20_diff1": 0.9564270152505505,
406
  "naucs_at_50_max": 1.0,
407
  "naucs_at_50_std": 1.0,
 
410
  "naucs_at_100_std": 1.0,
411
  "naucs_at_100_diff1": 1.0
412
  },
413
+ "data_dir/eval_vidore/syntheticDocQA_government_reports_test": {
414
+ "ndcg_at_1": 0.92,
415
+ "ndcg_at_3": 0.96417,
416
+ "ndcg_at_5": 0.96417,
417
+ "ndcg_at_10": 0.9675,
418
+ "ndcg_at_20": 0.9675,
419
+ "ndcg_at_50": 0.9675,
420
+ "ndcg_at_100": 0.9675,
421
+ "map_at_1": 0.92,
422
+ "map_at_3": 0.955,
423
+ "map_at_5": 0.955,
424
+ "map_at_10": 0.95643,
425
+ "map_at_20": 0.95643,
426
+ "map_at_50": 0.95643,
427
+ "map_at_100": 0.95643,
428
+ "recall_at_1": 0.92,
429
+ "recall_at_3": 0.99,
430
  "recall_at_5": 0.99,
431
  "recall_at_10": 1.0,
432
  "recall_at_20": 1.0,
433
  "recall_at_50": 1.0,
434
  "recall_at_100": 1.0,
435
+ "precision_at_1": 0.92,
436
+ "precision_at_3": 0.33,
437
  "precision_at_5": 0.198,
438
  "precision_at_10": 0.1,
439
  "precision_at_20": 0.05,
440
  "precision_at_50": 0.02,
441
  "precision_at_100": 0.01,
442
+ "mrr_at_1": 0.92,
443
+ "mrr_at_3": 0.955,
444
+ "mrr_at_5": 0.955,
445
+ "mrr_at_10": 0.9564285714285714,
446
+ "mrr_at_20": 0.9564285714285714,
447
+ "mrr_at_50": 0.9564285714285714,
448
+ "mrr_at_100": 0.9564285714285714,
449
+ "naucs_at_1_max": 0.8768674136321195,
450
+ "naucs_at_1_std": 0.41940943043884304,
451
+ "naucs_at_1_diff1": 0.9673202614379083,
452
+ "naucs_at_3_max": 1.0,
453
+ "naucs_at_3_std": 0.8692810457516356,
454
+ "naucs_at_3_diff1": 1.0,
455
+ "naucs_at_5_max": 1.0,
456
+ "naucs_at_5_std": 0.8692810457516413,
457
  "naucs_at_5_diff1": 1.0,
458
  "naucs_at_10_max": 1.0,
459
  "naucs_at_10_std": 1.0,
 
468
  "naucs_at_100_std": null,
469
  "naucs_at_100_diff1": null
470
  },
471
+ "data_dir/eval_vidore/shiftproject_test": {
472
+ "ndcg_at_1": 0.78,
473
+ "ndcg_at_3": 0.8644,
474
+ "ndcg_at_5": 0.87302,
475
+ "ndcg_at_10": 0.8828,
476
+ "ndcg_at_20": 0.88795,
477
+ "ndcg_at_50": 0.88997,
478
+ "ndcg_at_100": 0.88997,
479
+ "map_at_1": 0.78,
480
+ "map_at_3": 0.845,
481
+ "map_at_5": 0.85,
482
+ "map_at_10": 0.8541,
483
+ "map_at_20": 0.85555,
484
+ "map_at_50": 0.85589,
485
+ "map_at_100": 0.85589,
486
+ "recall_at_1": 0.78,
487
+ "recall_at_3": 0.92,
488
  "recall_at_5": 0.94,
489
+ "recall_at_10": 0.97,
490
+ "recall_at_20": 0.99,
491
  "recall_at_50": 1.0,
492
  "recall_at_100": 1.0,
493
+ "precision_at_1": 0.78,
494
+ "precision_at_3": 0.30667,
495
  "precision_at_5": 0.188,
496
+ "precision_at_10": 0.097,
497
+ "precision_at_20": 0.0495,
498
  "precision_at_50": 0.02,
499
  "precision_at_100": 0.01,
500
+ "mrr_at_1": 0.81,
501
+ "mrr_at_3": 0.8633333333333333,
502
+ "mrr_at_5": 0.8658333333333332,
503
+ "mrr_at_10": 0.8699285714285715,
504
+ "mrr_at_20": 0.871412087912088,
505
+ "mrr_at_50": 0.8717454212454213,
506
+ "mrr_at_100": 0.8717454212454213,
507
+ "naucs_at_1_max": -0.24755413363008374,
508
+ "naucs_at_1_std": -0.6164510594890337,
509
+ "naucs_at_1_diff1": 0.8162006769601703,
510
+ "naucs_at_3_max": 0.2243814192343627,
511
+ "naucs_at_3_std": -0.23225957049486393,
512
+ "naucs_at_3_diff1": 0.8544000933706815,
513
+ "naucs_at_5_max": -0.012371615312794324,
514
+ "naucs_at_5_std": -0.5504201680672273,
515
+ "naucs_at_5_diff1": 0.9128540305010897,
516
+ "naucs_at_10_max": -0.049486461251166146,
517
+ "naucs_at_10_std": -0.27591036414565706,
518
+ "naucs_at_10_diff1": 0.9128540305010848,
519
+ "naucs_at_20_max": 0.7222222222222276,
520
+ "naucs_at_20_std": 0.5541549953314738,
521
+ "naucs_at_20_diff1": 0.8692810457516413,
522
  "naucs_at_50_max": null,
523
  "naucs_at_50_std": null,
524
  "naucs_at_50_diff1": null,
 
526
  "naucs_at_100_std": null,
527
  "naucs_at_100_diff1": null
528
  },
529
+ "data_dir/eval_vidore/syntheticDocQA_artificial_intelligence_test": {
530
+ "ndcg_at_1": 0.99,
531
+ "ndcg_at_3": 0.99631,
532
+ "ndcg_at_5": 0.99631,
533
+ "ndcg_at_10": 0.99631,
534
+ "ndcg_at_20": 0.99631,
535
+ "ndcg_at_50": 0.99631,
536
+ "ndcg_at_100": 0.99631,
537
+ "map_at_1": 0.99,
538
+ "map_at_3": 0.995,
539
+ "map_at_5": 0.995,
540
+ "map_at_10": 0.995,
541
+ "map_at_20": 0.995,
542
+ "map_at_50": 0.995,
543
+ "map_at_100": 0.995,
544
+ "recall_at_1": 0.99,
545
  "recall_at_3": 1.0,
546
  "recall_at_5": 1.0,
547
  "recall_at_10": 1.0,
548
  "recall_at_20": 1.0,
549
  "recall_at_50": 1.0,
550
  "recall_at_100": 1.0,
551
+ "precision_at_1": 0.99,
552
  "precision_at_3": 0.33333,
553
  "precision_at_5": 0.2,
554
  "precision_at_10": 0.1,
555
  "precision_at_20": 0.05,
556
  "precision_at_50": 0.02,
557
  "precision_at_100": 0.01,
558
+ "mrr_at_1": 0.99,
559
+ "mrr_at_3": 0.995,
560
+ "mrr_at_5": 0.995,
561
+ "mrr_at_10": 0.995,
562
+ "mrr_at_20": 0.995,
563
+ "mrr_at_50": 0.995,
564
+ "mrr_at_100": 0.995,
565
+ "naucs_at_1_max": 0.8692810457516276,
566
+ "naucs_at_1_std": -0.5634920634920657,
567
  "naucs_at_1_diff1": 1.0,
568
  "naucs_at_3_max": 1.0,
569
  "naucs_at_3_std": 1.0,