AmourWaltz
commited on
Commit
·
678f61f
1
Parent(s):
2874c7f
111
Browse files- ReliableMath.tsv +3 -0
- app.py +1 -1
ReliableMath.tsv
CHANGED
@@ -7,5 +7,8 @@ deepseek-ai/DeepSeek-R1-Distill-Qwen-32B 32 Reliable 0.551 0.001 0.684 0.000 5.0
|
|
7 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B 14 Reliable 0.547 0.000 0.629 0.000 6.23k 0.465 0.001 11.00k
|
8 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B 7 Reliable 0.289 0.000 0.575 0.000 6.24k 0.003 0.000 6.60k
|
9 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 1.5 Reliable 0.198 0.000 0.396 0.000 9.37k 0.000 0.000 9.70k
|
|
|
|
|
|
|
10 |
Qwen/Qwen2.5-Math-7B-Instruct 7 Reliable 0.266 0.000 0.505 0.000 0.82k 0.027 0.000 0.90k
|
11 |
Qwen/Qwen2.5-Math-1.5B-Instruct 1.5 Reliable 0.218 0.000 0.422 0.000 0.74k 0.015 0.000 0.80k
|
|
|
7 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B 14 Reliable 0.547 0.000 0.629 0.000 6.23k 0.465 0.001 11.00k
|
8 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B 7 Reliable 0.289 0.000 0.575 0.000 6.24k 0.003 0.000 6.60k
|
9 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 1.5 Reliable 0.198 0.000 0.396 0.000 9.37k 0.000 0.000 9.70k
|
10 |
+
Qwen/Qwen3-235B-A22B 235 Reliable 0.621 0.001 0.767 0.000 5.64k 0.475 0.003 5.60k
|
11 |
+
Qwen/Qwen3-32B 32 Reliable 0.545 0.000 0.764 0.000 5.88k 0.326 0.000 6.00k
|
12 |
+
Qwen/Qwen3-14B 14 Reliable 0.573 0.002 0.748 0.003 5.87k 0.399 0.000 6.10k
|
13 |
Qwen/Qwen2.5-Math-7B-Instruct 7 Reliable 0.266 0.000 0.505 0.000 0.82k 0.027 0.000 0.90k
|
14 |
Qwen/Qwen2.5-Math-1.5B-Instruct 1.5 Reliable 0.218 0.000 0.422 0.000 0.74k 0.015 0.000 0.80k
|
app.py
CHANGED
@@ -26,7 +26,7 @@ df["Size_Display"] = df["Size"].apply(
|
|
26 |
|
27 |
model_types = {
|
28 |
"reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini"],
|
29 |
-
"instruction": ["OpenAI/GPT-4o", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct"]
|
30 |
}
|
31 |
|
32 |
# Add size category for filtering
|
|
|
26 |
|
27 |
model_types = {
|
28 |
"reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini"],
|
29 |
+
"instruction": ["OpenAI/GPT-4o", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen3-14B"]
|
30 |
}
|
31 |
|
32 |
# Add size category for filtering
|