AmourWaltz commited on
Commit
678f61f
·
1 Parent(s): 2874c7f
Files changed (2) hide show
  1. ReliableMath.tsv +3 -0
  2. app.py +1 -1
ReliableMath.tsv CHANGED
@@ -7,5 +7,8 @@ deepseek-ai/DeepSeek-R1-Distill-Qwen-32B 32 Reliable 0.551 0.001 0.684 0.000 5.0
7
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B 14 Reliable 0.547 0.000 0.629 0.000 6.23k 0.465 0.001 11.00k
8
  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B 7 Reliable 0.289 0.000 0.575 0.000 6.24k 0.003 0.000 6.60k
9
  deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 1.5 Reliable 0.198 0.000 0.396 0.000 9.37k 0.000 0.000 9.70k
 
 
 
10
  Qwen/Qwen2.5-Math-7B-Instruct 7 Reliable 0.266 0.000 0.505 0.000 0.82k 0.027 0.000 0.90k
11
  Qwen/Qwen2.5-Math-1.5B-Instruct 1.5 Reliable 0.218 0.000 0.422 0.000 0.74k 0.015 0.000 0.80k
 
7
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B 14 Reliable 0.547 0.000 0.629 0.000 6.23k 0.465 0.001 11.00k
8
  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B 7 Reliable 0.289 0.000 0.575 0.000 6.24k 0.003 0.000 6.60k
9
  deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 1.5 Reliable 0.198 0.000 0.396 0.000 9.37k 0.000 0.000 9.70k
10
+ Qwen/Qwen3-235B-A22B 235 Reliable 0.621 0.001 0.767 0.000 5.64k 0.475 0.003 5.60k
11
+ Qwen/Qwen3-32B 32 Reliable 0.545 0.000 0.764 0.000 5.88k 0.326 0.000 6.00k
12
+ Qwen/Qwen3-14B 14 Reliable 0.573 0.002 0.748 0.003 5.87k 0.399 0.000 6.10k
13
  Qwen/Qwen2.5-Math-7B-Instruct 7 Reliable 0.266 0.000 0.505 0.000 0.82k 0.027 0.000 0.90k
14
  Qwen/Qwen2.5-Math-1.5B-Instruct 1.5 Reliable 0.218 0.000 0.422 0.000 0.74k 0.015 0.000 0.80k
app.py CHANGED
@@ -26,7 +26,7 @@ df["Size_Display"] = df["Size"].apply(
26
 
27
  model_types = {
28
  "reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini"],
29
- "instruction": ["OpenAI/GPT-4o", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct"]
30
  }
31
 
32
  # Add size category for filtering
 
26
 
27
  model_types = {
28
  "reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini"],
29
+ "instruction": ["OpenAI/GPT-4o", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen3-14B"]
30
  }
31
 
32
  # Add size category for filtering