Commit
·
80fc9d2
1
Parent(s):
90ec376
- src/lib/benchmarks/ index.ts +3 -3
- src/lib/benchmarks/qwen.ts +24 -24
src/lib/benchmarks/ index.ts
CHANGED
|
@@ -11,7 +11,7 @@ export const benchmarkData: Benchmark[] = [
|
|
| 11 |
...xaiBenchmarks,
|
| 12 |
...googleBenchmarks,
|
| 13 |
...anthropicBenchmarks,
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
...qwenBenchmarks,
|
| 17 |
];
|
|
|
|
| 11 |
...xaiBenchmarks,
|
| 12 |
...googleBenchmarks,
|
| 13 |
...anthropicBenchmarks,
|
| 14 |
+
...openaiBenchmarks,
|
| 15 |
+
...deepseekBenchmarks,
|
| 16 |
+
...qwenBenchmarks,
|
| 17 |
];
|
src/lib/benchmarks/qwen.ts
CHANGED
|
@@ -24,7 +24,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 24 |
mmmlu: 86.70,
|
| 25 |
////include: 73.46,
|
| 26 |
},
|
| 27 |
-
source: "",
|
| 28 |
},
|
| 29 |
{
|
| 30 |
model: "Qwen3-32B (Base Model)",
|
|
@@ -48,7 +48,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 48 |
mmmlu: 83.83,
|
| 49 |
//include: 67.87,
|
| 50 |
},
|
| 51 |
-
source: "",
|
| 52 |
},
|
| 53 |
{
|
| 54 |
model: "Qwen3-14B (Base Model)",
|
|
@@ -72,7 +72,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 72 |
mmmlu: 81.46,
|
| 73 |
//include: 64.55,
|
| 74 |
},
|
| 75 |
-
source: "",
|
| 76 |
},
|
| 77 |
{
|
| 78 |
model: "Qwen3-30B-A3B (Base Model)",
|
|
@@ -96,7 +96,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 96 |
mmmlu: 81.46,
|
| 97 |
//include: 67.00,
|
| 98 |
},
|
| 99 |
-
source: "",
|
| 100 |
},
|
| 101 |
{
|
| 102 |
model: "Qwen3-8B (Base Model)",
|
|
@@ -120,7 +120,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 120 |
mmmlu: 75.72,
|
| 121 |
//include: 59.40,
|
| 122 |
},
|
| 123 |
-
source: "",
|
| 124 |
},
|
| 125 |
{
|
| 126 |
model: "Qwen3-4B (Base Model)",
|
|
@@ -144,7 +144,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 144 |
mmmlu: 71.42,
|
| 145 |
//include: 56.29,
|
| 146 |
},
|
| 147 |
-
source: "",
|
| 148 |
},
|
| 149 |
{
|
| 150 |
model: "Qwen3-1.7B (Base Model)",
|
|
@@ -168,7 +168,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 168 |
mmmlu: 63.27,
|
| 169 |
//include: 45.57,
|
| 170 |
},
|
| 171 |
-
source: "",
|
| 172 |
},
|
| 173 |
{
|
| 174 |
model: "Qwen3-0.6B (Base Model)",
|
|
@@ -192,7 +192,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 192 |
mmmlu: 50.16,
|
| 193 |
//include: 34.26,
|
| 194 |
},
|
| 195 |
-
source: "",
|
| 196 |
},
|
| 197 |
{
|
| 198 |
model: "Qwen3-235B-A22B (Thinking Mode)",
|
|
@@ -226,7 +226,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 226 |
//poly//math: 54.7,
|
| 227 |
//mlogiqa: 77.1,
|
| 228 |
},
|
| 229 |
-
source: "",
|
| 230 |
},
|
| 231 |
{
|
| 232 |
model: "Qwen3-235B-A22B (Non-thinking Mode)",
|
|
@@ -261,7 +261,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 261 |
//poly//math: 27.0,
|
| 262 |
//mlogiqa: 67.6,
|
| 263 |
},
|
| 264 |
-
source: "",
|
| 265 |
},
|
| 266 |
{
|
| 267 |
model: "Qwen3-32B (Thinking Mode)",
|
|
@@ -294,7 +294,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 294 |
//poly//math: 47.4,
|
| 295 |
//mlogiqa: 76.3,
|
| 296 |
},
|
| 297 |
-
source: "",
|
| 298 |
},
|
| 299 |
{
|
| 300 |
model: "Qwen3-32B (Non-thinking Mode)",
|
|
@@ -328,7 +328,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 328 |
//poly//math: 22.5,
|
| 329 |
//mlogiqa: 62.9,
|
| 330 |
},
|
| 331 |
-
source: "",
|
| 332 |
},
|
| 333 |
{
|
| 334 |
model: "Qwen3-14B (Thinking Mode)",
|
|
@@ -361,7 +361,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 361 |
//poly//math: 45.8,
|
| 362 |
//mlogiqa: 71.1,
|
| 363 |
},
|
| 364 |
-
source: "",
|
| 365 |
},
|
| 366 |
{
|
| 367 |
model: "Qwen3-30B-A3B (Thinking Mode)",
|
|
@@ -394,7 +394,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 394 |
//poly//math: 46.1,
|
| 395 |
//mlogiqa: 70.1,
|
| 396 |
},
|
| 397 |
-
source: "",
|
| 398 |
},
|
| 399 |
{
|
| 400 |
model: "Qwen3-14B (Non-thinking Mode)",
|
|
@@ -427,7 +427,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 427 |
//poly//math: 22.0,
|
| 428 |
//mlogiqa: 58.9,
|
| 429 |
},
|
| 430 |
-
source: "",
|
| 431 |
},
|
| 432 |
{
|
| 433 |
model: "Qwen3-30B-A3B (Non-thinking Mode)",
|
|
@@ -460,7 +460,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 460 |
//poly//math: 23.3,
|
| 461 |
//mlogiqa: 53.3,
|
| 462 |
},
|
| 463 |
-
source: "",
|
| 464 |
},
|
| 465 |
{
|
| 466 |
model: "Qwen3-4B (Thinking Mode)",
|
|
@@ -493,7 +493,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 493 |
//poly//math: 40.0,
|
| 494 |
//mlogiqa: 65.9,
|
| 495 |
},
|
| 496 |
-
source: "",
|
| 497 |
},
|
| 498 |
{
|
| 499 |
model: "Qwen3-8B (Thinking Mode)",
|
|
@@ -526,7 +526,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 526 |
//poly//math: 42.7,
|
| 527 |
//mlogiqa: 69.0,
|
| 528 |
},
|
| 529 |
-
source: "",
|
| 530 |
},
|
| 531 |
{
|
| 532 |
model: "Qwen3-4B (Non-thinking Mode)",
|
|
@@ -559,7 +559,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 559 |
//poly//math: 16.6,
|
| 560 |
//mlogiqa: 49.9,
|
| 561 |
},
|
| 562 |
-
source: "",
|
| 563 |
},
|
| 564 |
{
|
| 565 |
model: "Qwen3-8B (Non-thinking Mode)",
|
|
@@ -592,7 +592,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 592 |
//poly//math: 18.8,
|
| 593 |
//mlogiqa: 51.4,
|
| 594 |
},
|
| 595 |
-
source: "",
|
| 596 |
},
|
| 597 |
{
|
| 598 |
model: "Qwen3-0.6B (Thinking Mode)",
|
|
@@ -623,7 +623,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 623 |
//poly//math: 11.4,
|
| 624 |
//mlogiqa: 40.9,
|
| 625 |
},
|
| 626 |
-
source: "",
|
| 627 |
},
|
| 628 |
{
|
| 629 |
model: "Qwen3-1.7B (Thinking Mode)",
|
|
@@ -654,7 +654,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 654 |
//poly//math: 25.2,
|
| 655 |
//mlogiqa: 56.0,
|
| 656 |
},
|
| 657 |
-
source: "",
|
| 658 |
},
|
| 659 |
{
|
| 660 |
model: "Qwen3-0.6B (Non-thinking Mode)",
|
|
@@ -685,7 +685,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 685 |
//poly//math: 4.6,
|
| 686 |
//mlogiqa: 37.3,
|
| 687 |
},
|
| 688 |
-
source: "",
|
| 689 |
},
|
| 690 |
{
|
| 691 |
model: "Qwen3-1.7B (Non-thinking Mode)",
|
|
@@ -716,5 +716,5 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
| 716 |
//poly//math: 10.3,
|
| 717 |
//mlogiqa: 41.1,
|
| 718 |
},
|
| 719 |
-
source: "",
|
| 720 |
},];
|
|
|
|
| 24 |
mmmlu: 86.70,
|
| 25 |
////include: 73.46,
|
| 26 |
},
|
| 27 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 28 |
},
|
| 29 |
{
|
| 30 |
model: "Qwen3-32B (Base Model)",
|
|
|
|
| 48 |
mmmlu: 83.83,
|
| 49 |
//include: 67.87,
|
| 50 |
},
|
| 51 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 52 |
},
|
| 53 |
{
|
| 54 |
model: "Qwen3-14B (Base Model)",
|
|
|
|
| 72 |
mmmlu: 81.46,
|
| 73 |
//include: 64.55,
|
| 74 |
},
|
| 75 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 76 |
},
|
| 77 |
{
|
| 78 |
model: "Qwen3-30B-A3B (Base Model)",
|
|
|
|
| 96 |
mmmlu: 81.46,
|
| 97 |
//include: 67.00,
|
| 98 |
},
|
| 99 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 100 |
},
|
| 101 |
{
|
| 102 |
model: "Qwen3-8B (Base Model)",
|
|
|
|
| 120 |
mmmlu: 75.72,
|
| 121 |
//include: 59.40,
|
| 122 |
},
|
| 123 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 124 |
},
|
| 125 |
{
|
| 126 |
model: "Qwen3-4B (Base Model)",
|
|
|
|
| 144 |
mmmlu: 71.42,
|
| 145 |
//include: 56.29,
|
| 146 |
},
|
| 147 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 148 |
},
|
| 149 |
{
|
| 150 |
model: "Qwen3-1.7B (Base Model)",
|
|
|
|
| 168 |
mmmlu: 63.27,
|
| 169 |
//include: 45.57,
|
| 170 |
},
|
| 171 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 172 |
},
|
| 173 |
{
|
| 174 |
model: "Qwen3-0.6B (Base Model)",
|
|
|
|
| 192 |
mmmlu: 50.16,
|
| 193 |
//include: 34.26,
|
| 194 |
},
|
| 195 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 196 |
},
|
| 197 |
{
|
| 198 |
model: "Qwen3-235B-A22B (Thinking Mode)",
|
|
|
|
| 226 |
//poly//math: 54.7,
|
| 227 |
//mlogiqa: 77.1,
|
| 228 |
},
|
| 229 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 230 |
},
|
| 231 |
{
|
| 232 |
model: "Qwen3-235B-A22B (Non-thinking Mode)",
|
|
|
|
| 261 |
//poly//math: 27.0,
|
| 262 |
//mlogiqa: 67.6,
|
| 263 |
},
|
| 264 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 265 |
},
|
| 266 |
{
|
| 267 |
model: "Qwen3-32B (Thinking Mode)",
|
|
|
|
| 294 |
//poly//math: 47.4,
|
| 295 |
//mlogiqa: 76.3,
|
| 296 |
},
|
| 297 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 298 |
},
|
| 299 |
{
|
| 300 |
model: "Qwen3-32B (Non-thinking Mode)",
|
|
|
|
| 328 |
//poly//math: 22.5,
|
| 329 |
//mlogiqa: 62.9,
|
| 330 |
},
|
| 331 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 332 |
},
|
| 333 |
{
|
| 334 |
model: "Qwen3-14B (Thinking Mode)",
|
|
|
|
| 361 |
//poly//math: 45.8,
|
| 362 |
//mlogiqa: 71.1,
|
| 363 |
},
|
| 364 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 365 |
},
|
| 366 |
{
|
| 367 |
model: "Qwen3-30B-A3B (Thinking Mode)",
|
|
|
|
| 394 |
//poly//math: 46.1,
|
| 395 |
//mlogiqa: 70.1,
|
| 396 |
},
|
| 397 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 398 |
},
|
| 399 |
{
|
| 400 |
model: "Qwen3-14B (Non-thinking Mode)",
|
|
|
|
| 427 |
//poly//math: 22.0,
|
| 428 |
//mlogiqa: 58.9,
|
| 429 |
},
|
| 430 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 431 |
},
|
| 432 |
{
|
| 433 |
model: "Qwen3-30B-A3B (Non-thinking Mode)",
|
|
|
|
| 460 |
//poly//math: 23.3,
|
| 461 |
//mlogiqa: 53.3,
|
| 462 |
},
|
| 463 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 464 |
},
|
| 465 |
{
|
| 466 |
model: "Qwen3-4B (Thinking Mode)",
|
|
|
|
| 493 |
//poly//math: 40.0,
|
| 494 |
//mlogiqa: 65.9,
|
| 495 |
},
|
| 496 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 497 |
},
|
| 498 |
{
|
| 499 |
model: "Qwen3-8B (Thinking Mode)",
|
|
|
|
| 526 |
//poly//math: 42.7,
|
| 527 |
//mlogiqa: 69.0,
|
| 528 |
},
|
| 529 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 530 |
},
|
| 531 |
{
|
| 532 |
model: "Qwen3-4B (Non-thinking Mode)",
|
|
|
|
| 559 |
//poly//math: 16.6,
|
| 560 |
//mlogiqa: 49.9,
|
| 561 |
},
|
| 562 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 563 |
},
|
| 564 |
{
|
| 565 |
model: "Qwen3-8B (Non-thinking Mode)",
|
|
|
|
| 592 |
//poly//math: 18.8,
|
| 593 |
//mlogiqa: 51.4,
|
| 594 |
},
|
| 595 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 596 |
},
|
| 597 |
{
|
| 598 |
model: "Qwen3-0.6B (Thinking Mode)",
|
|
|
|
| 623 |
//poly//math: 11.4,
|
| 624 |
//mlogiqa: 40.9,
|
| 625 |
},
|
| 626 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 627 |
},
|
| 628 |
{
|
| 629 |
model: "Qwen3-1.7B (Thinking Mode)",
|
|
|
|
| 654 |
//poly//math: 25.2,
|
| 655 |
//mlogiqa: 56.0,
|
| 656 |
},
|
| 657 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 658 |
},
|
| 659 |
{
|
| 660 |
model: "Qwen3-0.6B (Non-thinking Mode)",
|
|
|
|
| 685 |
//poly//math: 4.6,
|
| 686 |
//mlogiqa: 37.3,
|
| 687 |
},
|
| 688 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 689 |
},
|
| 690 |
{
|
| 691 |
model: "Qwen3-1.7B (Non-thinking Mode)",
|
|
|
|
| 716 |
//poly//math: 10.3,
|
| 717 |
//mlogiqa: 41.1,
|
| 718 |
},
|
| 719 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
| 720 |
},];
|