Spaces:
Sleeping
Sleeping
Update ShoppingMMLU_overall.json
Browse files- ShoppingMMLU_overall.json +27 -37
ShoppingMMLU_overall.json
CHANGED
@@ -4,8 +4,7 @@
|
|
4 |
"Claude-2": {
|
5 |
"META": {
|
6 |
"Method": [
|
7 |
-
"Claude-2"
|
8 |
-
"https://aws.amazon.com/bedrock/claude/"
|
9 |
],
|
10 |
"Parameters": "",
|
11 |
"Org": "Anthropic",
|
@@ -67,8 +66,7 @@
|
|
67 |
"GPT-3.5-turbo": {
|
68 |
"META": {
|
69 |
"Method": [
|
70 |
-
"GPT-3.5-turbo"
|
71 |
-
"https://platform.openai.com/docs/models#gpt-3-5-turbo"
|
72 |
],
|
73 |
"Parameters": "",
|
74 |
"Org": "OpenAI",
|
@@ -76,62 +74,61 @@
|
|
76 |
"Verified": "Yes"
|
77 |
},
|
78 |
"MedQA": {
|
79 |
-
"Overall":
|
80 |
},
|
81 |
"MedMCQA": {
|
82 |
-
"Overall":
|
83 |
},
|
84 |
"MMLU-Medicine": {
|
85 |
-
"Overall":
|
86 |
},
|
87 |
"PubMedQA": {
|
88 |
-
"Overall":
|
89 |
},
|
90 |
"Referral QA": {
|
91 |
-
"Overall":
|
92 |
},
|
93 |
"Treat Recom.": {
|
94 |
-
"Overall":
|
95 |
},
|
96 |
"MIMIC": {
|
97 |
-
"Overall":
|
98 |
},
|
99 |
"IU-Xray": {
|
100 |
-
"Overall":
|
101 |
},
|
102 |
"Hospitaliz. Summari.": {
|
103 |
-
"Overall":
|
104 |
},
|
105 |
"Patient Education": {
|
106 |
-
"Overall":
|
107 |
},
|
108 |
"BC5": {
|
109 |
-
"Overall":
|
110 |
},
|
111 |
"NCBI": {
|
112 |
-
"Overall":
|
113 |
},
|
114 |
"DDI": {
|
115 |
-
"Overall":
|
116 |
},
|
117 |
"GAD": {
|
118 |
-
"Overall":
|
119 |
},
|
120 |
"HoC": {
|
121 |
-
"Overall":
|
122 |
},
|
123 |
"Pharma. QA": {
|
124 |
-
"Overall":
|
125 |
},
|
126 |
"Drug Inter.": {
|
127 |
-
"Overall":
|
128 |
}
|
129 |
},
|
130 |
"GPT-4": {
|
131 |
"META": {
|
132 |
"Method": [
|
133 |
-
"GPT-4"
|
134 |
-
"https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4"
|
135 |
],
|
136 |
"Parameters": "",
|
137 |
"Org": "OpenAI",
|
@@ -255,8 +252,7 @@
|
|
255 |
"Vicuna-7B-v1.5": {
|
256 |
"META": {
|
257 |
"Method": [
|
258 |
-
"Vicuna-7B-v1.5"
|
259 |
-
"https://huggingface.co/lmsys/vicuna-7b-v1.5"
|
260 |
],
|
261 |
"Parameters": "7B",
|
262 |
"Org": "LMSys",
|
@@ -279,8 +275,7 @@
|
|
279 |
"LLaMA2-7B-Chat": {
|
280 |
"META": {
|
281 |
"Method": [
|
282 |
-
"LLaMA2-7B-Chat"
|
283 |
-
"https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
|
284 |
],
|
285 |
"Parameters": "7B",
|
286 |
"Org": "Meta",
|
@@ -303,8 +298,7 @@
|
|
303 |
"Mistral-7B-Instruct": {
|
304 |
"META": {
|
305 |
"Method": [
|
306 |
-
"Mistral-7B-Instruct"
|
307 |
-
"https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
|
308 |
],
|
309 |
"Parameters": "7B",
|
310 |
"Org": "MistralAI",
|
@@ -327,8 +321,7 @@
|
|
327 |
"Vicuna-13B-v1.5": {
|
328 |
"META": {
|
329 |
"Method": [
|
330 |
-
"Vicuna-13B-v1.5"
|
331 |
-
"https://huggingface.co/lmsys/vicuna-13b-v1.5"
|
332 |
],
|
333 |
"Parameters": "13B",
|
334 |
"Org": "LMSys",
|
@@ -351,8 +344,7 @@
|
|
351 |
"LLaMA-2-13B-Chat": {
|
352 |
"META": {
|
353 |
"Method": [
|
354 |
-
"LLaMA-2-13B-Chat"
|
355 |
-
"https://huggingface.co/meta-llama/Llama-2-13b-chat-hf"
|
356 |
],
|
357 |
"Parameters": "13B",
|
358 |
"Org": "Meta",
|
@@ -375,8 +367,7 @@
|
|
375 |
"LLaMA-2-70B": {
|
376 |
"META": {
|
377 |
"Method": [
|
378 |
-
"LLaMA-2-70B"
|
379 |
-
"https://huggingface.co/meta-llama/Llama-2-70b-chat-hf"
|
380 |
],
|
381 |
"Parameters": "70B",
|
382 |
"Org": "Meta",
|
@@ -399,8 +390,7 @@
|
|
399 |
"LLaMA-3-70B": {
|
400 |
"META": {
|
401 |
"Method": [
|
402 |
-
"LLaMA-3-70B"
|
403 |
-
"https://huggingface.co/meta-llama/Meta-Llama-3-70B"
|
404 |
],
|
405 |
"Parameters": "70B",
|
406 |
"Org": "Meta",
|
|
|
4 |
"Claude-2": {
|
5 |
"META": {
|
6 |
"Method": [
|
7 |
+
"Claude-2"
|
|
|
8 |
],
|
9 |
"Parameters": "",
|
10 |
"Org": "Anthropic",
|
|
|
66 |
"GPT-3.5-turbo": {
|
67 |
"META": {
|
68 |
"Method": [
|
69 |
+
"GPT-3.5-turbo"
|
|
|
70 |
],
|
71 |
"Parameters": "",
|
72 |
"Org": "OpenAI",
|
|
|
74 |
"Verified": "Yes"
|
75 |
},
|
76 |
"MedQA": {
|
77 |
+
"Overall": 61.2
|
78 |
},
|
79 |
"MedMCQA": {
|
80 |
+
"Overall": 59.4
|
81 |
},
|
82 |
"MMLU-Medicine": {
|
83 |
+
"Overall": 73.5
|
84 |
},
|
85 |
"PubMedQA": {
|
86 |
+
"Overall": 70.2
|
87 |
},
|
88 |
"Referral QA": {
|
89 |
+
"Overall": 81.1
|
90 |
},
|
91 |
"Treat Recom.": {
|
92 |
+
"Overall": 7.3
|
93 |
},
|
94 |
"MIMIC": {
|
95 |
+
"Overall": 14.1
|
96 |
},
|
97 |
"IU-Xray": {
|
98 |
+
"Overall": 10.3
|
99 |
},
|
100 |
"Hospitaliz. Summari.": {
|
101 |
+
"Overall": 10.5
|
102 |
},
|
103 |
"Patient Education": {
|
104 |
+
"Overall": 9.2
|
105 |
},
|
106 |
"BC5": {
|
107 |
+
"Overall": 52.3
|
108 |
},
|
109 |
"NCBI": {
|
110 |
+
"Overall": 46.1
|
111 |
},
|
112 |
"DDI": {
|
113 |
+
"Overall": 49.3
|
114 |
},
|
115 |
"GAD": {
|
116 |
+
"Overall": 50.8
|
117 |
},
|
118 |
"HoC": {
|
119 |
+
"Overall": 66.4
|
120 |
},
|
121 |
"Pharma. QA": {
|
122 |
+
"Overall": 57.3
|
123 |
},
|
124 |
"Drug Inter.": {
|
125 |
+
"Overall": 47.0
|
126 |
}
|
127 |
},
|
128 |
"GPT-4": {
|
129 |
"META": {
|
130 |
"Method": [
|
131 |
+
"GPT-4"
|
|
|
132 |
],
|
133 |
"Parameters": "",
|
134 |
"Org": "OpenAI",
|
|
|
252 |
"Vicuna-7B-v1.5": {
|
253 |
"META": {
|
254 |
"Method": [
|
255 |
+
"Vicuna-7B-v1.5"
|
|
|
256 |
],
|
257 |
"Parameters": "7B",
|
258 |
"Org": "LMSys",
|
|
|
275 |
"LLaMA2-7B-Chat": {
|
276 |
"META": {
|
277 |
"Method": [
|
278 |
+
"LLaMA2-7B-Chat"
|
|
|
279 |
],
|
280 |
"Parameters": "7B",
|
281 |
"Org": "Meta",
|
|
|
298 |
"Mistral-7B-Instruct": {
|
299 |
"META": {
|
300 |
"Method": [
|
301 |
+
"Mistral-7B-Instruct"
|
|
|
302 |
],
|
303 |
"Parameters": "7B",
|
304 |
"Org": "MistralAI",
|
|
|
321 |
"Vicuna-13B-v1.5": {
|
322 |
"META": {
|
323 |
"Method": [
|
324 |
+
"Vicuna-13B-v1.5"
|
|
|
325 |
],
|
326 |
"Parameters": "13B",
|
327 |
"Org": "LMSys",
|
|
|
344 |
"LLaMA-2-13B-Chat": {
|
345 |
"META": {
|
346 |
"Method": [
|
347 |
+
"LLaMA-2-13B-Chat"
|
|
|
348 |
],
|
349 |
"Parameters": "13B",
|
350 |
"Org": "Meta",
|
|
|
367 |
"LLaMA-2-70B": {
|
368 |
"META": {
|
369 |
"Method": [
|
370 |
+
"LLaMA-2-70B"
|
|
|
371 |
],
|
372 |
"Parameters": "70B",
|
373 |
"Org": "Meta",
|
|
|
390 |
"LLaMA-3-70B": {
|
391 |
"META": {
|
392 |
"Method": [
|
393 |
+
"LLaMA-3-70B"
|
|
|
394 |
],
|
395 |
"Parameters": "70B",
|
396 |
"Org": "Meta",
|