fenglinliu commited on
Commit
8aaabf1
·
verified ·
1 Parent(s): 1986367

Update ShoppingMMLU_overall.json

Browse files
Files changed (1) hide show
  1. ShoppingMMLU_overall.json +47 -8
ShoppingMMLU_overall.json CHANGED
@@ -1,27 +1,66 @@
1
  {
2
- "time": "241031154353",
3
  "results": {
4
- "Claude3-Sonnet": {
5
  "META": {
6
  "Method": [
7
- "Claude3-Sonnet",
8
  "https://aws.amazon.com/bedrock/claude/"
9
  ],
10
  "Parameters": "",
11
  "Org": "Anthropic",
12
  "OpenSource": "No",
13
  "Verified": "Yes"
14
- },
15
- "Shopping Concept Understanding": {
16
  "Overall": 80.75
17
  },
18
- "Shopping Knowledge Reasoning": {
19
  "Overall": 71.63
20
  },
21
- "User Behavior Alignment": {
22
  "Overall": 70.17
23
  },
24
- "Multi-lingual Abilities": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "Overall": 67.76
26
  }
27
  },
 
1
  {
2
+ "time": "241111120000",
3
  "results": {
4
+ "Claude-2": {
5
  "META": {
6
  "Method": [
7
+ "Claude-2",
8
  "https://aws.amazon.com/bedrock/claude/"
9
  ],
10
  "Parameters": "",
11
  "Org": "Anthropic",
12
  "OpenSource": "No",
13
  "Verified": "Yes"
14
+ },
15
+ "MedQA": {
16
  "Overall": 80.75
17
  },
18
+ "MedMCQA": {
19
  "Overall": 71.63
20
  },
21
+ "MMLU-Medicine": {
22
  "Overall": 70.17
23
  },
24
+ "PubMedQA": {
25
+ "Overall": 67.76
26
+ }
27
+ "Referral QA": {
28
+ "Overall": 67.76
29
+ }
30
+ "Treat Recom.": {
31
+ "Overall": 67.76
32
+ }
33
+ "MIMIC": {
34
+ "Overall": 67.76
35
+ }
36
+ "IU-Xray": {
37
+ "Overall": 67.76
38
+ }
39
+ "Hospitaliz. Summari.": {
40
+ "Overall": 67.76
41
+ }
42
+ "Patient Education": {
43
+ "Overall": 67.76
44
+ }
45
+ "BC5": {
46
+ "Overall": 67.76
47
+ }
48
+ "NCBI": {
49
+ "Overall": 67.76
50
+ }
51
+ "DDI": {
52
+ "Overall": 67.76
53
+ }
54
+ "GAD": {
55
+ "Overall": 67.76
56
+ }
57
+ "HoC": {
58
+ "Overall": 67.76
59
+ }
60
+ "Pharma. QA": {
61
+ "Overall": 67.76
62
+ }
63
+ "Drug Inter.": {
64
  "Overall": 67.76
65
  }
66
  },