fenglinliu commited on
Commit
ef2627d
·
verified ·
1 Parent(s): d0790e3

Update ShoppingMMLU_overall.json

Browse files
Files changed (1) hide show
  1. ShoppingMMLU_overall.json +101 -23
ShoppingMMLU_overall.json CHANGED
@@ -64,52 +64,130 @@
64
  "Overall": 67.76
65
  }
66
  },
67
- "Claude2": {
68
  "META": {
69
  "Method": [
70
- "Claude2",
71
- "https://aws.amazon.com/bedrock/claude/"
72
  ],
73
  "Parameters": "",
74
- "Org": "Anthropic",
75
  "OpenSource": "No",
76
  "Verified": "Yes"
77
  },
78
- "Shopping Concept Understanding": {
79
- "Overall": 75.46
80
  },
81
- "Shopping Knowledge Reasoning": {
82
- "Overall": 65.5
83
  },
84
- "User Behavior Alignment": {
85
- "Overall": 63.53
86
  },
87
- "Multi-lingual Abilities": {
88
- "Overall": 65.24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  },
91
- "ChatGPT": {
92
  "META": {
93
  "Method": [
94
- "ChatGPT",
95
- "https://platform.openai.com/docs/models#gpt-3-5-turbo"
96
  ],
97
  "Parameters": "",
98
  "Org": "OpenAI",
99
  "OpenSource": "No",
100
  "Verified": "Yes"
101
  },
102
- "Shopping Concept Understanding": {
103
- "Overall": 75.63
104
  },
105
- "Shopping Knowledge Reasoning": {
106
- "Overall": 64.97
107
  },
108
- "User Behavior Alignment": {
109
- "Overall": 59.79
110
  },
111
- "Multi-lingual Abilities": {
112
- "Overall": 60.81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
  },
115
  "LLaMA3-70B-Instruct": {
 
64
  "Overall": 67.76
65
  }
66
  },
67
+ "GPT-3.5-turbo": {
68
  "META": {
69
  "Method": [
70
+ "GPT-3.5-turbo",
71
+ "https://platform.openai.com/docs/models#gpt-3-5-turbo"
72
  ],
73
  "Parameters": "",
74
+ "Org": "OpenAI",
75
  "OpenSource": "No",
76
  "Verified": "Yes"
77
  },
78
+ "MedQA": {
79
+ "Overall": 80.75
80
  },
81
+ "MedMCQA": {
82
+ "Overall": 71.63
83
  },
84
+ "MMLU-Medicine": {
85
+ "Overall": 70.17
86
  },
87
+ "PubMedQA": {
88
+ "Overall": 67.76
89
+ },
90
+ "Referral QA": {
91
+ "Overall": 67.76
92
+ },
93
+ "Treat Recom.": {
94
+ "Overall": 67.76
95
+ },
96
+ "MIMIC": {
97
+ "Overall": 67.76
98
+ },
99
+ "IU-Xray": {
100
+ "Overall": 67.76
101
+ },
102
+ "Hospitaliz. Summari.": {
103
+ "Overall": 67.76
104
+ },
105
+ "Patient Education": {
106
+ "Overall": 67.76
107
+ },
108
+ "BC5": {
109
+ "Overall": 67.76
110
+ },
111
+ "NCBI": {
112
+ "Overall": 67.76
113
+ },
114
+ "DDI": {
115
+ "Overall": 67.76
116
+ },
117
+ "GAD": {
118
+ "Overall": 67.76
119
+ },
120
+ "HoC": {
121
+ "Overall": 67.76
122
+ },
123
+ "Pharma. QA": {
124
+ "Overall": 67.76
125
+ },
126
+ "Drug Inter.": {
127
+ "Overall": 67.76
128
  }
129
  },
130
+ "GPT-4": {
131
  "META": {
132
  "Method": [
133
+ "GPT-4",
134
+ "https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4"
135
  ],
136
  "Parameters": "",
137
  "Org": "OpenAI",
138
  "OpenSource": "No",
139
  "Verified": "Yes"
140
  },
141
+ "MedQA": {
142
+ "Overall": 80.75
143
  },
144
+ "MedMCQA": {
145
+ "Overall": 71.63
146
  },
147
+ "MMLU-Medicine": {
148
+ "Overall": 70.17
149
  },
150
+ "PubMedQA": {
151
+ "Overall": 67.76
152
+ },
153
+ "Referral QA": {
154
+ "Overall": 67.76
155
+ },
156
+ "Treat Recom.": {
157
+ "Overall": 67.76
158
+ },
159
+ "MIMIC": {
160
+ "Overall": 67.76
161
+ },
162
+ "IU-Xray": {
163
+ "Overall": 67.76
164
+ },
165
+ "Hospitaliz. Summari.": {
166
+ "Overall": 67.76
167
+ },
168
+ "Patient Education": {
169
+ "Overall": 67.76
170
+ },
171
+ "BC5": {
172
+ "Overall": 67.76
173
+ },
174
+ "NCBI": {
175
+ "Overall": 67.76
176
+ },
177
+ "DDI": {
178
+ "Overall": 67.76
179
+ },
180
+ "GAD": {
181
+ "Overall": 67.76
182
+ },
183
+ "HoC": {
184
+ "Overall": 67.76
185
+ },
186
+ "Pharma. QA": {
187
+ "Overall": 67.76
188
+ },
189
+ "Drug Inter.": {
190
+ "Overall": 67.76
191
  }
192
  },
193
  "LLaMA3-70B-Instruct": {