fenglinliu commited on
Commit
916c491
·
verified ·
1 Parent(s): 600915c

Update ShoppingMMLU_overall.json

Browse files
Files changed (1) hide show
  1. ShoppingMMLU_overall.json +27 -37
ShoppingMMLU_overall.json CHANGED
@@ -4,8 +4,7 @@
4
  "Claude-2": {
5
  "META": {
6
  "Method": [
7
- "Claude-2",
8
- "https://aws.amazon.com/bedrock/claude/"
9
  ],
10
  "Parameters": "",
11
  "Org": "Anthropic",
@@ -67,8 +66,7 @@
67
  "GPT-3.5-turbo": {
68
  "META": {
69
  "Method": [
70
- "GPT-3.5-turbo",
71
- "https://platform.openai.com/docs/models#gpt-3-5-turbo"
72
  ],
73
  "Parameters": "",
74
  "Org": "OpenAI",
@@ -76,62 +74,61 @@
76
  "Verified": "Yes"
77
  },
78
  "MedQA": {
79
- "Overall": 80.75
80
  },
81
  "MedMCQA": {
82
- "Overall": 71.63
83
  },
84
  "MMLU-Medicine": {
85
- "Overall": 70.17
86
  },
87
  "PubMedQA": {
88
- "Overall": 67.76
89
  },
90
  "Referral QA": {
91
- "Overall": 67.76
92
  },
93
  "Treat Recom.": {
94
- "Overall": 67.76
95
  },
96
  "MIMIC": {
97
- "Overall": 67.76
98
  },
99
  "IU-Xray": {
100
- "Overall": 67.76
101
  },
102
  "Hospitaliz. Summari.": {
103
- "Overall": 67.76
104
  },
105
  "Patient Education": {
106
- "Overall": 67.76
107
  },
108
  "BC5": {
109
- "Overall": 67.76
110
  },
111
  "NCBI": {
112
- "Overall": 67.76
113
  },
114
  "DDI": {
115
- "Overall": 67.76
116
  },
117
  "GAD": {
118
- "Overall": 67.76
119
  },
120
  "HoC": {
121
- "Overall": 67.76
122
  },
123
  "Pharma. QA": {
124
- "Overall": 67.76
125
  },
126
  "Drug Inter.": {
127
- "Overall": 67.76
128
  }
129
  },
130
  "GPT-4": {
131
  "META": {
132
  "Method": [
133
- "GPT-4",
134
- "https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4"
135
  ],
136
  "Parameters": "",
137
  "Org": "OpenAI",
@@ -255,8 +252,7 @@
255
  "Vicuna-7B-v1.5": {
256
  "META": {
257
  "Method": [
258
- "Vicuna-7B-v1.5",
259
- "https://huggingface.co/lmsys/vicuna-7b-v1.5"
260
  ],
261
  "Parameters": "7B",
262
  "Org": "LMSys",
@@ -279,8 +275,7 @@
279
  "LLaMA2-7B-Chat": {
280
  "META": {
281
  "Method": [
282
- "LLaMA2-7B-Chat",
283
- "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
284
  ],
285
  "Parameters": "7B",
286
  "Org": "Meta",
@@ -303,8 +298,7 @@
303
  "Mistral-7B-Instruct": {
304
  "META": {
305
  "Method": [
306
- "Mistral-7B-Instruct",
307
- "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
308
  ],
309
  "Parameters": "7B",
310
  "Org": "MistralAI",
@@ -327,8 +321,7 @@
327
  "Vicuna-13B-v1.5": {
328
  "META": {
329
  "Method": [
330
- "Vicuna-13B-v1.5",
331
- "https://huggingface.co/lmsys/vicuna-13b-v1.5"
332
  ],
333
  "Parameters": "13B",
334
  "Org": "LMSys",
@@ -351,8 +344,7 @@
351
  "LLaMA-2-13B-Chat": {
352
  "META": {
353
  "Method": [
354
- "LLaMA-2-13B-Chat",
355
- "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf"
356
  ],
357
  "Parameters": "13B",
358
  "Org": "Meta",
@@ -375,8 +367,7 @@
375
  "LLaMA-2-70B": {
376
  "META": {
377
  "Method": [
378
- "LLaMA-2-70B",
379
- "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf"
380
  ],
381
  "Parameters": "70B",
382
  "Org": "Meta",
@@ -399,8 +390,7 @@
399
  "LLaMA-3-70B": {
400
  "META": {
401
  "Method": [
402
- "LLaMA-3-70B",
403
- "https://huggingface.co/meta-llama/Meta-Llama-3-70B"
404
  ],
405
  "Parameters": "70B",
406
  "Org": "Meta",
 
4
  "Claude-2": {
5
  "META": {
6
  "Method": [
7
+ "Claude-2"
 
8
  ],
9
  "Parameters": "",
10
  "Org": "Anthropic",
 
66
  "GPT-3.5-turbo": {
67
  "META": {
68
  "Method": [
69
+ "GPT-3.5-turbo"
 
70
  ],
71
  "Parameters": "",
72
  "Org": "OpenAI",
 
74
  "Verified": "Yes"
75
  },
76
  "MedQA": {
77
+ "Overall": 61.2
78
  },
79
  "MedMCQA": {
80
+ "Overall": 59.4
81
  },
82
  "MMLU-Medicine": {
83
+ "Overall": 73.5
84
  },
85
  "PubMedQA": {
86
+ "Overall": 70.2
87
  },
88
  "Referral QA": {
89
+ "Overall": 81.1
90
  },
91
  "Treat Recom.": {
92
+ "Overall": 7.3
93
  },
94
  "MIMIC": {
95
+ "Overall": 14.1
96
  },
97
  "IU-Xray": {
98
+ "Overall": 10.3
99
  },
100
  "Hospitaliz. Summari.": {
101
+ "Overall": 10.5
102
  },
103
  "Patient Education": {
104
+ "Overall": 9.2
105
  },
106
  "BC5": {
107
+ "Overall": 52.3
108
  },
109
  "NCBI": {
110
+ "Overall": 46.1
111
  },
112
  "DDI": {
113
+ "Overall": 49.3
114
  },
115
  "GAD": {
116
+ "Overall": 50.8
117
  },
118
  "HoC": {
119
+ "Overall": 66.4
120
  },
121
  "Pharma. QA": {
122
+ "Overall": 57.3
123
  },
124
  "Drug Inter.": {
125
+ "Overall": 47.0
126
  }
127
  },
128
  "GPT-4": {
129
  "META": {
130
  "Method": [
131
+ "GPT-4"
 
132
  ],
133
  "Parameters": "",
134
  "Org": "OpenAI",
 
252
  "Vicuna-7B-v1.5": {
253
  "META": {
254
  "Method": [
255
+ "Vicuna-7B-v1.5"
 
256
  ],
257
  "Parameters": "7B",
258
  "Org": "LMSys",
 
275
  "LLaMA2-7B-Chat": {
276
  "META": {
277
  "Method": [
278
+ "LLaMA2-7B-Chat"
 
279
  ],
280
  "Parameters": "7B",
281
  "Org": "Meta",
 
298
  "Mistral-7B-Instruct": {
299
  "META": {
300
  "Method": [
301
+ "Mistral-7B-Instruct"
 
302
  ],
303
  "Parameters": "7B",
304
  "Org": "MistralAI",
 
321
  "Vicuna-13B-v1.5": {
322
  "META": {
323
  "Method": [
324
+ "Vicuna-13B-v1.5"
 
325
  ],
326
  "Parameters": "13B",
327
  "Org": "LMSys",
 
344
  "LLaMA-2-13B-Chat": {
345
  "META": {
346
  "Method": [
347
+ "LLaMA-2-13B-Chat"
 
348
  ],
349
  "Parameters": "13B",
350
  "Org": "Meta",
 
367
  "LLaMA-2-70B": {
368
  "META": {
369
  "Method": [
370
+ "LLaMA-2-70B"
 
371
  ],
372
  "Parameters": "70B",
373
  "Org": "Meta",
 
390
  "LLaMA-3-70B": {
391
  "META": {
392
  "Method": [
393
+ "LLaMA-3-70B"
 
394
  ],
395
  "Parameters": "70B",
396
  "Org": "Meta",