fenglinliu commited on
Commit
175683b
·
verified ·
1 Parent(s): 0aa10b9

Update ShoppingMMLU_overall.json

Browse files
Files changed (1) hide show
  1. ShoppingMMLU_overall.json +18 -18
ShoppingMMLU_overall.json CHANGED
@@ -13,55 +13,55 @@
13
  "Verified": "Yes"
14
  },
15
  "MedQA": {
16
- "Overall": 80.75
17
  },
18
  "MedMCQA": {
19
- "Overall": 71.63
20
  },
21
  "MMLU-Medicine": {
22
- "Overall": 70.17
23
  },
24
  "PubMedQA": {
25
- "Overall": 67.76
26
  },
27
  "Referral QA": {
28
- "Overall": 67.76
29
  },
30
  "Treat Recom.": {
31
- "Overall": 67.76
32
  },
33
  "MIMIC": {
34
- "Overall": 67.76
35
  },
36
  "IU-Xray": {
37
- "Overall": 67.76
38
  },
39
  "Hospitaliz. Summari.": {
40
- "Overall": 67.76
41
  },
42
  "Patient Education": {
43
- "Overall": 67.76
44
  },
45
  "BC5": {
46
- "Overall": 67.76
47
  },
48
  "NCBI": {
49
- "Overall": 67.76
50
  },
51
  "DDI": {
52
- "Overall": 67.76
53
  },
54
  "GAD": {
55
- "Overall": 67.76
56
  },
57
  "HoC": {
58
- "Overall": 67.76
59
  },
60
  "Pharma. QA": {
61
- "Overall": 67.76
62
  },
63
  "Drug Inter.": {
64
- "Overall": 67.76
65
  }
66
  },
67
  "GPT-3.5-turbo": {
@@ -1062,6 +1062,6 @@
1062
  "Multi-lingual Abilities": {
1063
  "Overall": 44.23
1064
  }
1065
- },
1066
  }
1067
  }
 
13
  "Verified": "Yes"
14
  },
15
  "MedQA": {
16
+ "Overall": 65.1
17
  },
18
  "MedMCQA": {
19
+ "Overall": 60.3
20
  },
21
  "MMLU-Medicine": {
22
+ "Overall": 78.7
23
  },
24
  "PubMedQA": {
25
+ "Overall": 70.8
26
  },
27
  "Referral QA": {
28
+ "Overall": 70.8
29
  },
30
  "Treat Recom.": {
31
+ "Overall": 9.1
32
  },
33
  "MIMIC": {
34
+ "Overall": 13.3
35
  },
36
  "IU-Xray": {
37
+ "Overall": 9.4
38
  },
39
  "Hospitaliz. Summari.": {
40
+ "Overall": 11.3
41
  },
42
  "Patient Education": {
43
+ "Overall": 8.4
44
  },
45
  "BC5": {
46
+ "Overall": 52.9
47
  },
48
  "NCBI": {
49
+ "Overall": 44.2
50
  },
51
  "DDI": {
52
+ "Overall": 50.4
53
  },
54
  "GAD": {
55
+ "Overall": 50.7
56
  },
57
  "HoC": {
58
+ "Overall": 70.8
59
  },
60
  "Pharma. QA": {
61
+ "Overall": 70.8
62
  },
63
  "Drug Inter.": {
64
+ "Overall": 51.5
65
  }
66
  },
67
  "GPT-3.5-turbo": {
 
1062
  "Multi-lingual Abilities": {
1063
  "Overall": 44.23
1064
  }
1065
+ }
1066
  }
1067
  }