Adding Evaluation Results (#4)
Browse files- Adding Evaluation Results (842a85493025972dd3883e334c466859d2a8a8fc)
README.md
CHANGED
@@ -13,7 +13,12 @@ tags:
|
|
13 |
- tool-use
|
14 |
base_model:
|
15 |
- Qwen/Qwen2.5-14B-Instruct
|
|
|
|
|
|
|
|
|
16 |
pipeline_tag: text-generation
|
|
|
17 |
model-index:
|
18 |
- name: miscii-14b-1028
|
19 |
results:
|
@@ -30,8 +35,7 @@ model-index:
|
|
30 |
value: 82.37
|
31 |
name: strict accuracy
|
32 |
source:
|
33 |
-
url:
|
34 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
35 |
name: Open LLM Leaderboard
|
36 |
- task:
|
37 |
type: text-generation
|
@@ -46,8 +50,7 @@ model-index:
|
|
46 |
value: 49.26
|
47 |
name: normalized accuracy
|
48 |
source:
|
49 |
-
url:
|
50 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
51 |
name: Open LLM Leaderboard
|
52 |
- task:
|
53 |
type: text-generation
|
@@ -62,8 +65,7 @@ model-index:
|
|
62 |
value: 6.34
|
63 |
name: exact match
|
64 |
source:
|
65 |
-
url:
|
66 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
67 |
name: Open LLM Leaderboard
|
68 |
- task:
|
69 |
type: text-generation
|
@@ -78,8 +80,7 @@ model-index:
|
|
78 |
value: 14.21
|
79 |
name: acc_norm
|
80 |
source:
|
81 |
-
url:
|
82 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
83 |
name: Open LLM Leaderboard
|
84 |
- task:
|
85 |
type: text-generation
|
@@ -94,8 +95,7 @@ model-index:
|
|
94 |
value: 12
|
95 |
name: acc_norm
|
96 |
source:
|
97 |
-
url:
|
98 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
99 |
name: Open LLM Leaderboard
|
100 |
- task:
|
101 |
type: text-generation
|
@@ -112,14 +112,8 @@ model-index:
|
|
112 |
value: 46.14
|
113 |
name: accuracy
|
114 |
source:
|
115 |
-
url:
|
116 |
-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
117 |
name: Open LLM Leaderboard
|
118 |
-
datasets:
|
119 |
-
- nvidia/HelpSteer2
|
120 |
-
- google/Synthetic-Persona-Chat
|
121 |
-
- mlabonne/orpo-dpo-mix-40k
|
122 |
-
new_version: sthenno-com/miscii-14b-1225
|
123 |
---
|
124 |
|
125 |
# miscii-14b-1028
|
@@ -167,4 +161,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
|
|
167 |
|MuSR (0-shot) |12.00|
|
168 |
|MMLU-PRO (5-shot) |46.14|
|
169 |
|
170 |
-
$$\large{\text{There's nothing more to Show}}$$
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
- tool-use
|
14 |
base_model:
|
15 |
- Qwen/Qwen2.5-14B-Instruct
|
16 |
+
datasets:
|
17 |
+
- nvidia/HelpSteer2
|
18 |
+
- google/Synthetic-Persona-Chat
|
19 |
+
- mlabonne/orpo-dpo-mix-40k
|
20 |
pipeline_tag: text-generation
|
21 |
+
new_version: sthenno-com/miscii-14b-1225
|
22 |
model-index:
|
23 |
- name: miscii-14b-1028
|
24 |
results:
|
|
|
35 |
value: 82.37
|
36 |
name: strict accuracy
|
37 |
source:
|
38 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
39 |
name: Open LLM Leaderboard
|
40 |
- task:
|
41 |
type: text-generation
|
|
|
50 |
value: 49.26
|
51 |
name: normalized accuracy
|
52 |
source:
|
53 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
54 |
name: Open LLM Leaderboard
|
55 |
- task:
|
56 |
type: text-generation
|
|
|
65 |
value: 6.34
|
66 |
name: exact match
|
67 |
source:
|
68 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
69 |
name: Open LLM Leaderboard
|
70 |
- task:
|
71 |
type: text-generation
|
|
|
80 |
value: 14.21
|
81 |
name: acc_norm
|
82 |
source:
|
83 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
84 |
name: Open LLM Leaderboard
|
85 |
- task:
|
86 |
type: text-generation
|
|
|
95 |
value: 12
|
96 |
name: acc_norm
|
97 |
source:
|
98 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
99 |
name: Open LLM Leaderboard
|
100 |
- task:
|
101 |
type: text-generation
|
|
|
112 |
value: 46.14
|
113 |
name: accuracy
|
114 |
source:
|
115 |
+
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=sthenno-com/miscii-14b-1028
|
|
|
116 |
name: Open LLM Leaderboard
|
|
|
|
|
|
|
|
|
|
|
117 |
---
|
118 |
|
119 |
# miscii-14b-1028
|
|
|
161 |
|MuSR (0-shot) |12.00|
|
162 |
|MMLU-PRO (5-shot) |46.14|
|
163 |
|
164 |
+
$$\large{\text{There's nothing more to Show}}$$
|
165 |
+
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
166 |
+
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/sthenno-com__miscii-14b-1028-details)
|
167 |
+
|
168 |
+
| Metric |Value|
|
169 |
+
|-------------------|----:|
|
170 |
+
|Avg. |42.38|
|
171 |
+
|IFEval (0-Shot) |82.37|
|
172 |
+
|BBH (3-Shot) |49.26|
|
173 |
+
|MATH Lvl 5 (4-Shot)|50.30|
|
174 |
+
|GPQA (0-shot) |14.21|
|
175 |
+
|MuSR (0-shot) |12.00|
|
176 |
+
|MMLU-PRO (5-shot) |46.14|
|
177 |
+
|