wwydmanski commited on
Commit
cf4603d
·
verified ·
1 Parent(s): 5b01d6d

Create leaderboard.json

Browse files
Files changed (1) hide show
  1. leaderboard.json +338 -0
leaderboard.json ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "speakleash_Bielik-11B-v2.6-Instruct",
4
+ "Provider": "SpeakLeash",
5
+ "Quantization": "FP8",
6
+ "Parameters": 11,
7
+ "src_clf": 95.85,
8
+ "src_clf_time": 380,
9
+ "sum_rag": 98.62,
10
+ "sum_rag_v2": 95.38
11
+ },
12
+ {
13
+ "Model": "gpt-4o",
14
+ "Provider": "OpenAI",
15
+ "Quantization": "None",
16
+ "Parameters": null,
17
+ "src_clf": 96.05,
18
+ "src_clf_time": 0.0,
19
+ "sum_rag": 86.07,
20
+ "sum_rag_v2": 66.75
21
+ },
22
+ {
23
+ "Model": "gpt-4.1",
24
+ "Provider": "OpenAI",
25
+ "Quantization": "None",
26
+ "Parameters": null,
27
+ "src_clf": 96.39,
28
+ "src_clf_time": 0.0,
29
+ "sum_rag": 96.44,
30
+ "sum_rag_v2": 95.45
31
+ },
32
+ {
33
+ "Model": "gpt-4o-mini",
34
+ "Provider": "OpenAI",
35
+ "Quantization": "None",
36
+ "Parameters": null,
37
+ "src_clf": 95.8,
38
+ "src_clf_time": 0.0,
39
+ "sum_rag": 96.05,
40
+ "sum_rag_v2": 94.4
41
+ },
42
+ {
43
+ "Model": "gpt-5-nano",
44
+ "Provider": "OpenAI",
45
+ "Quantization": "None",
46
+ "Parameters": null,
47
+ "src_clf": 95.75,
48
+ "src_clf_time": 0.0,
49
+ "sum_rag": 93.83,
50
+ "sum_rag_v2": 53.08
51
+ },
52
+ {
53
+ "Model": "gpt-5-mini",
54
+ "Provider": "OpenAI",
55
+ "Quantization": "None",
56
+ "Parameters": null,
57
+ "src_clf": 96.71,
58
+ "src_clf_time": 0.0,
59
+ "sum_rag": 99.45,
60
+ "sum_rag_v2": 91.02
61
+ },
62
+ {
63
+ "Model": "gpt-5",
64
+ "Provider": "OpenAI",
65
+ "Quantization": "None",
66
+ "Parameters": null,
67
+ "src_clf": 95.18,
68
+ "src_clf_time": 0.0,
69
+ "sum_rag": 98.53,
70
+ "sum_rag_v2": 84.9
71
+ },
72
+ {
73
+ "Model": "claude-4-sonnet",
74
+ "Provider": "Anthropic",
75
+ "Quantization": "None",
76
+ "Parameters": null,
77
+ "src_clf": 95.88,
78
+ "src_clf_time": 0.0,
79
+ "sum_rag": 99.45,
80
+ "sum_rag_v2": 94.98
81
+ },
82
+ {
83
+ "Model": "openai_gpt-oss-120b",
84
+ "Provider": "OpenAI",
85
+ "Quantization": null,
86
+ "Parameters": 120,
87
+ "src_clf": 96.17,
88
+ "src_clf_time": 60,
89
+ "sum_rag": 97.87,
90
+ "sum_rag_v2": 96.37
91
+ },
92
+ {
93
+ "Model": "openai_gpt-oss-20b",
94
+ "Provider": "OpenAI",
95
+ "Quantization": null,
96
+ "Parameters": 20,
97
+ "src_clf": 95.78,
98
+ "src_clf_time": 178,
99
+ "sum_rag": 96.34,
100
+ "sum_rag_v2": 88.37
101
+ },
102
+ {
103
+ "Model": "zai-org_GLM-4.5",
104
+ "Provider": "Zhipu.AI",
105
+ "Quantization": null,
106
+ "Parameters": 355,
107
+ "src_clf": 90.24,
108
+ "src_clf_time": 687,
109
+ "sum_rag": 99.45,
110
+ "sum_rag_v2": 99.28
111
+ },
112
+ {
113
+ "Model": "moonshotai_Kimi-K2-Instruct",
114
+ "Provider": "MoonshotAI",
115
+ "Quantization": null,
116
+ "Parameters": 1000,
117
+ "src_clf": 95.91,
118
+ "src_clf_time": 87,
119
+ "sum_rag": 97.3,
120
+ "sum_rag_v2": 96.03
121
+ },
122
+ {
123
+ "Model": "Qwen_Qwen3-235B-A22B-Thinking-2507",
124
+ "Provider": "Qwen",
125
+ "Quantization": null,
126
+ "Parameters": 235,
127
+ "src_clf": 95.72,
128
+ "src_clf_time": 741,
129
+ "sum_rag": 100.0,
130
+ "sum_rag_v2": 98.41
131
+ },
132
+ {
133
+ "Model": "Qwen_Qwen3-235B-A22B-Instruct-2507",
134
+ "Provider": "Qwen",
135
+ "Quantization": null,
136
+ "Parameters": 235,
137
+ "src_clf": 96.21,
138
+ "src_clf_time": 239,
139
+ "sum_rag": 97.21,
140
+ "sum_rag_v2": 93.76
141
+ },
142
+ {
143
+ "Model": "Qwen_Qwen3-30B-A3B",
144
+ "Provider": "Qwen",
145
+ "Quantization": null,
146
+ "Parameters": 30,
147
+ "src_clf": 94.38,
148
+ "src_clf_time": 423,
149
+ "sum_rag": 98.06,
150
+ "sum_rag_v2": 98.46
151
+ },
152
+ {
153
+ "Model": "zai-org_GLM-4.5-Air",
154
+ "Provider": "Zhipu.AI",
155
+ "Quantization": null,
156
+ "Parameters": 106,
157
+ "src_clf": 88.81,
158
+ "src_clf_time": 743,
159
+ "sum_rag": 99.09,
160
+ "sum_rag_v2": 98.72
161
+ },
162
+ {
163
+ "Model": "Qwen_Qwen3-14B",
164
+ "Provider": "Qwen",
165
+ "Quantization": null,
166
+ "Parameters": 14,
167
+ "src_clf": 95.43,
168
+ "src_clf_time": 401,
169
+ "sum_rag": 98.62,
170
+ "sum_rag_v2": 98.27
171
+ },
172
+ {
173
+ "Model": "Qwen_Qwen3-32B",
174
+ "Provider": "Qwen",
175
+ "Quantization": null,
176
+ "Parameters": 32,
177
+ "src_clf": 95.86,
178
+ "src_clf_time": 343,
179
+ "sum_rag": 98.15,
180
+ "sum_rag_v2": 98.11
181
+ },
182
+ {
183
+ "Model": "deepseek-ai_DeepSeek-V3-0324",
184
+ "Provider": "DeepSeek",
185
+ "Quantization": null,
186
+ "Parameters": 671,
187
+ "src_clf": 96.03,
188
+ "src_clf_time": 129,
189
+ "sum_rag": 99.45,
190
+ "sum_rag_v2": 95.29
191
+ },
192
+ {
193
+ "Model": "google_gemma-3-12b-it",
194
+ "Provider": "Google",
195
+ "Quantization": null,
196
+ "Parameters": 12,
197
+ "src_clf": 93.98,
198
+ "src_clf_time": 92,
199
+ "sum_rag_v2": 94.22,
200
+ "sum_rag": 93.44
201
+ },
202
+ {
203
+ "Model": "google_gemma-3-27b-it",
204
+ "Provider": "Google",
205
+ "Quantization": null,
206
+ "Parameters": 27,
207
+ "src_clf": 95.65,
208
+ "src_clf_time": 135,
209
+ "sum_rag_v2": 95.19,
210
+ "sum_rag": 96.53
211
+ },
212
+ {
213
+ "Model": "google_gemma-3-4b-it",
214
+ "Provider": "Google",
215
+ "Quantization": null,
216
+ "Parameters": 4,
217
+ "src_clf": 89.78,
218
+ "src_clf_time": 59,
219
+ "sum_rag_v2": 89.04,
220
+ "sum_rag": 82.91
221
+ },
222
+ {
223
+ "Model": "meta-llama_Llama-4-Maverick-17B-128E-Instruct-Turbo",
224
+ "Provider": "Meta",
225
+ "Quantization": null,
226
+ "Parameters": 400,
227
+ "src_clf": 96.08,
228
+ "src_clf_time": 59,
229
+ "sum_rag": 97.68,
230
+ "sum_rag_v2": 95.34
231
+ },
232
+ {
233
+ "Model": "meta-llama_Llama-4-Scout-17B-16E-Instruct",
234
+ "Provider": "Meta",
235
+ "Quantization": null,
236
+ "Parameters": 109,
237
+ "src_clf": 95.86,
238
+ "src_clf_time": 69,
239
+ "sum_rag": 97.3,
240
+ "sum_rag_v2": 95.24
241
+ },
242
+ {
243
+ "Model": "speakleash_Bielik-11B-v2.5-Instruct-FP8-Dynamic",
244
+ "Provider": "SpeakLeash",
245
+ "Quantization": "FP8",
246
+ "Parameters": 11,
247
+ "sum_rag": 93.02,
248
+ "sum_rag_v2": 93.55,
249
+ "src_clf": 96.17,
250
+ "src_clf_time": 464
251
+ },
252
+ {
253
+ "Model": "meta-llama_Llama-4-Maverick-17B-128E-Instruct-FP8",
254
+ "Provider": null,
255
+ "Quantization": null,
256
+ "Parameters": null,
257
+ "sum_rag": 97.68,
258
+ "sum_rag_v2": 95.34
259
+ },
260
+ {
261
+ "Model": "speakleash_Bielik-11B-v2.6-Instruct-FP8-Dynamic",
262
+ "Provider": null,
263
+ "Quantization": null,
264
+ "Parameters": null,
265
+ "sum_rag": 97.3,
266
+ "sum_rag_v2": 94.58,
267
+ "src_clf": 95.85,
268
+ "src_clf_time": 451
269
+ },
270
+ {
271
+ "Model": "meta-llama_Llama-3.3-70B-Instruct-Turbo",
272
+ "Provider": "Meta",
273
+ "Quantization": null,
274
+ "Parameters": 70,
275
+ "src_clf": 95.84,
276
+ "src_clf_time": 151,
277
+ "sum_rag_v2": 92.74,
278
+ "sum_rag": 97.11
279
+ },
280
+ {
281
+ "Model": "speakleash_Bielik-11B-v2.5-Instruct",
282
+ "Provider": "SpeakLeash",
283
+ "Quantization": null,
284
+ "Parameters": 11,
285
+ "src_clf": 96.12,
286
+ "src_clf_time": 387,
287
+ "sum_rag": 89.16,
288
+ "sum_rag_v2": 94.42
289
+ },
290
+ {
291
+ "Model": "meta-llama_Llama-3.3-70B-Instruct",
292
+ "Provider": "Meta",
293
+ "Quantization": null,
294
+ "Parameters": 70,
295
+ "src_clf": 95.89,
296
+ "src_clf_time": 299,
297
+ "sum_rag": 96.24,
298
+ "sum_rag_v2": 92.98
299
+ },
300
+ {
301
+ "Model": "gaius-lex_llama3-8b-rag-pl-v0.8",
302
+ "Provider": "Gaius-Lex",
303
+ "Quantization": null,
304
+ "Parameters": 8,
305
+ "sum_rag": 93.33,
306
+ "src_clf": 78.73,
307
+ "src_clf_time": 255,
308
+ "sum_rag_v2": 71.37
309
+ },
310
+ {
311
+ "Model": "meta-llama_Meta-Llama-3.1-8B-Instruct",
312
+ "Provider": "Meta",
313
+ "Quantization": null,
314
+ "Parameters": 8,
315
+ "sum_rag": 83.17,
316
+ "src_clf": 91.81,
317
+ "src_clf_time": 41,
318
+ "sum_rag_v2": 90.12
319
+ },
320
+ {
321
+ "Model": "gaius-lex_llama3-8b-rag-pl-v0.6",
322
+ "Provider": "Gaius-Lex",
323
+ "Quantization": null,
324
+ "Parameters": 8,
325
+ "src_clf": 2.57,
326
+ "src_clf_time": 229
327
+ },
328
+ {
329
+ "Model": "meta-llama_Meta-Llama-3-8B-Instruct",
330
+ "Provider": "Meta",
331
+ "Quantization": null,
332
+ "Parameters": 8,
333
+ "sum_rag": 90.69,
334
+ "sum_rag_v2": 64.46,
335
+ "src_clf": 91.84,
336
+ "src_clf_time": 107
337
+ }
338
+ ]