Spaces:
Sleeping
Sleeping
Update ShoppingMMLU_overall.json
Browse files- ShoppingMMLU_overall.json +47 -8
ShoppingMMLU_overall.json
CHANGED
@@ -1,27 +1,66 @@
|
|
1 |
{
|
2 |
-
"time": "
|
3 |
"results": {
|
4 |
-
"
|
5 |
"META": {
|
6 |
"Method": [
|
7 |
-
"
|
8 |
"https://aws.amazon.com/bedrock/claude/"
|
9 |
],
|
10 |
"Parameters": "",
|
11 |
"Org": "Anthropic",
|
12 |
"OpenSource": "No",
|
13 |
"Verified": "Yes"
|
14 |
-
},
|
15 |
-
|
16 |
"Overall": 80.75
|
17 |
},
|
18 |
-
"
|
19 |
"Overall": 71.63
|
20 |
},
|
21 |
-
"
|
22 |
"Overall": 70.17
|
23 |
},
|
24 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
"Overall": 67.76
|
26 |
}
|
27 |
},
|
|
|
1 |
{
|
2 |
+
"time": "241111120000",
|
3 |
"results": {
|
4 |
+
"Claude-2": {
|
5 |
"META": {
|
6 |
"Method": [
|
7 |
+
"Claude-2",
|
8 |
"https://aws.amazon.com/bedrock/claude/"
|
9 |
],
|
10 |
"Parameters": "",
|
11 |
"Org": "Anthropic",
|
12 |
"OpenSource": "No",
|
13 |
"Verified": "Yes"
|
14 |
+
},
|
15 |
+
"MedQA": {
|
16 |
"Overall": 80.75
|
17 |
},
|
18 |
+
"MedMCQA": {
|
19 |
"Overall": 71.63
|
20 |
},
|
21 |
+
"MMLU-Medicine": {
|
22 |
"Overall": 70.17
|
23 |
},
|
24 |
+
"PubMedQA": {
|
25 |
+
"Overall": 67.76
|
26 |
+
}
|
27 |
+
"Referral QA": {
|
28 |
+
"Overall": 67.76
|
29 |
+
}
|
30 |
+
"Treat Recom.": {
|
31 |
+
"Overall": 67.76
|
32 |
+
}
|
33 |
+
"MIMIC": {
|
34 |
+
"Overall": 67.76
|
35 |
+
}
|
36 |
+
"IU-Xray": {
|
37 |
+
"Overall": 67.76
|
38 |
+
}
|
39 |
+
"Hospitaliz. Summari.": {
|
40 |
+
"Overall": 67.76
|
41 |
+
}
|
42 |
+
"Patient Education": {
|
43 |
+
"Overall": 67.76
|
44 |
+
}
|
45 |
+
"BC5": {
|
46 |
+
"Overall": 67.76
|
47 |
+
}
|
48 |
+
"NCBI": {
|
49 |
+
"Overall": 67.76
|
50 |
+
}
|
51 |
+
"DDI": {
|
52 |
+
"Overall": 67.76
|
53 |
+
}
|
54 |
+
"GAD": {
|
55 |
+
"Overall": 67.76
|
56 |
+
}
|
57 |
+
"HoC": {
|
58 |
+
"Overall": 67.76
|
59 |
+
}
|
60 |
+
"Pharma. QA": {
|
61 |
+
"Overall": 67.76
|
62 |
+
}
|
63 |
+
"Drug Inter.": {
|
64 |
"Overall": 67.76
|
65 |
}
|
66 |
},
|