medical_llm_leaderboard

Sleeping

App Files Files Community

fenglinliu commited on Nov 11, 2024

Commit

ef2627d

verified ·

1 Parent(s): d0790e3

Update ShoppingMMLU_overall.json

Browse files

Files changed (1) hide show

ShoppingMMLU_overall.json +101 -23

ShoppingMMLU_overall.json CHANGED Viewed

@@ -64,52 +64,130 @@
                 "Overall": 67.76
             }
         },
-        "Claude2": {
             "META": {
                 "Method": [
-                    "Claude2",
-                    "https://aws.amazon.com/bedrock/claude/"
                 ],
                 "Parameters": "",
-                "Org": "Anthropic",
                 "OpenSource": "No",
                 "Verified": "Yes"
             },
-            "Shopping Concept Understanding": {
-                "Overall": 75.46
             },
-            "Shopping Knowledge Reasoning": {
-                "Overall": 65.5
             },
-            "User Behavior Alignment": {
-                "Overall": 63.53
             },
-            "Multi-lingual Abilities": {
-                "Overall": 65.24
             }
         },
-        "ChatGPT": {
             "META": {
                 "Method": [
-                    "ChatGPT",
-                    "https://platform.openai.com/docs/models#gpt-3-5-turbo"
                 ],
                 "Parameters": "",
                 "Org": "OpenAI",
                 "OpenSource": "No",
                 "Verified": "Yes"
             },
-            "Shopping Concept Understanding": {
-                "Overall": 75.63
             },
-            "Shopping Knowledge Reasoning": {
-                "Overall": 64.97
             },
-            "User Behavior Alignment": {
-                "Overall": 59.79
             },
-            "Multi-lingual Abilities": {
-                "Overall": 60.81
             }
         },
         "LLaMA3-70B-Instruct": {

                 "Overall": 67.76
             }
         },
+        "GPT-3.5-turbo": {
             "META": {
                 "Method": [
+                    "GPT-3.5-turbo",
+                    "https://platform.openai.com/docs/models#gpt-3-5-turbo"
                 ],
                 "Parameters": "",
+                "Org": "OpenAI",
                 "OpenSource": "No",
                 "Verified": "Yes"
             },
+          "MedQA": {
+                "Overall": 80.75
             },
+            "MedMCQA": {
+                "Overall": 71.63
             },
+            "MMLU-Medicine": {
+                "Overall": 70.17
             },
+            "PubMedQA": {
+                "Overall": 67.76
+            },
+            "Referral QA": {
+                "Overall": 67.76
+            },
+            "Treat Recom.": {
+                "Overall": 67.76
+            },
+            "MIMIC": {
+                "Overall": 67.76
+            },
+            "IU-Xray": {
+                "Overall": 67.76
+            },
+            "Hospitaliz. Summari.": {
+                "Overall": 67.76
+            },
+            "Patient Education": {
+                "Overall": 67.76
+            },
+            "BC5": {
+                "Overall": 67.76
+            },
+            "NCBI": {
+                "Overall": 67.76
+            },
+            "DDI": {
+                "Overall": 67.76
+            },
+            "GAD": {
+                "Overall": 67.76
+            },
+            "HoC": {
+                "Overall": 67.76
+            },
+            "Pharma. QA": {
+                "Overall": 67.76
+            },
+            "Drug Inter.": {
+                "Overall": 67.76
             }
         },
+        "GPT-4": {
             "META": {
                 "Method": [
+                    "GPT-4",
+                    "https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4"
                 ],
                 "Parameters": "",
                 "Org": "OpenAI",
                 "OpenSource": "No",
                 "Verified": "Yes"
             },
+          "MedQA": {
+                "Overall": 80.75
             },
+            "MedMCQA": {
+                "Overall": 71.63
             },
+            "MMLU-Medicine": {
+                "Overall": 70.17
             },
+            "PubMedQA": {
+                "Overall": 67.76
+            },
+            "Referral QA": {
+                "Overall": 67.76
+            },
+            "Treat Recom.": {
+                "Overall": 67.76
+            },
+            "MIMIC": {
+                "Overall": 67.76
+            },
+            "IU-Xray": {
+                "Overall": 67.76
+            },
+            "Hospitaliz. Summari.": {
+                "Overall": 67.76
+            },
+            "Patient Education": {
+                "Overall": 67.76
+            },
+            "BC5": {
+                "Overall": 67.76
+            },
+            "NCBI": {
+                "Overall": 67.76
+            },
+            "DDI": {
+                "Overall": 67.76
+            },
+            "GAD": {
+                "Overall": 67.76
+            },
+            "HoC": {
+                "Overall": 67.76
+            },
+            "Pharma. QA": {
+                "Overall": 67.76
+            },
+            "Drug Inter.": {
+                "Overall": 67.76
             }
         },
         "LLaMA3-70B-Instruct": {