Spaces:
Sleeping
Sleeping
Update clinicbench_result.json
Browse files- clinicbench_result.json +92 -23
clinicbench_result.json
CHANGED
@@ -8,7 +8,10 @@
|
|
8 |
],
|
9 |
"Parameters": "",
|
10 |
"Org": "",
|
11 |
-
"
|
|
|
|
|
|
|
12 |
"Verified": "Yes"
|
13 |
},
|
14 |
"MedQA": {
|
@@ -49,7 +52,10 @@
|
|
49 |
],
|
50 |
"Parameters": "",
|
51 |
"Org": "Anthropic",
|
52 |
-
"
|
|
|
|
|
|
|
53 |
"Verified": "Yes"
|
54 |
},
|
55 |
"MedQA": {
|
@@ -111,7 +117,10 @@
|
|
111 |
],
|
112 |
"Parameters": "",
|
113 |
"Org": "OpenAI",
|
114 |
-
"
|
|
|
|
|
|
|
115 |
"Verified": "Yes"
|
116 |
},
|
117 |
"MedQA": {
|
@@ -173,7 +182,10 @@
|
|
173 |
],
|
174 |
"Parameters": "",
|
175 |
"Org": "OpenAI",
|
176 |
-
"
|
|
|
|
|
|
|
177 |
"Verified": "Yes"
|
178 |
},
|
179 |
"MedQA": {
|
@@ -235,7 +247,10 @@
|
|
235 |
],
|
236 |
"Parameters": "7B",
|
237 |
"Org": "",
|
238 |
-
"
|
|
|
|
|
|
|
239 |
"Verified": "Yes"
|
240 |
},
|
241 |
"MedQA": {
|
@@ -297,7 +312,10 @@
|
|
297 |
],
|
298 |
"Parameters": "7B",
|
299 |
"Org": "LMSys",
|
300 |
-
"
|
|
|
|
|
|
|
301 |
"Verified": "Yes"
|
302 |
},
|
303 |
"MedQA": {
|
@@ -359,7 +377,10 @@
|
|
359 |
],
|
360 |
"Parameters": "7B",
|
361 |
"Org": "Meta",
|
362 |
-
"
|
|
|
|
|
|
|
363 |
"Verified": "Yes"
|
364 |
},
|
365 |
"MedQA": {
|
@@ -421,7 +442,10 @@
|
|
421 |
],
|
422 |
"Parameters": "7B",
|
423 |
"Org": "MistralAI",
|
424 |
-
"
|
|
|
|
|
|
|
425 |
"Verified": "Yes"
|
426 |
},
|
427 |
"MedQA": {
|
@@ -483,7 +507,10 @@
|
|
483 |
],
|
484 |
"Parameters": "13B",
|
485 |
"Org": "LMSys",
|
486 |
-
"
|
|
|
|
|
|
|
487 |
"Verified": "Yes"
|
488 |
},
|
489 |
"MedQA": {
|
@@ -545,7 +572,10 @@
|
|
545 |
],
|
546 |
"Parameters": "13B",
|
547 |
"Org": "Meta",
|
548 |
-
"
|
|
|
|
|
|
|
549 |
"Verified": "Yes"
|
550 |
},
|
551 |
"MedQA": {
|
@@ -607,7 +637,10 @@
|
|
607 |
],
|
608 |
"Parameters": "70B",
|
609 |
"Org": "Meta",
|
610 |
-
"
|
|
|
|
|
|
|
611 |
"Verified": "Yes"
|
612 |
},
|
613 |
"MedQA": {
|
@@ -669,7 +702,10 @@
|
|
669 |
],
|
670 |
"Parameters": "70B",
|
671 |
"Org": "Meta",
|
672 |
-
"
|
|
|
|
|
|
|
673 |
"Verified": "Yes"
|
674 |
},
|
675 |
"MedQA": {
|
@@ -731,7 +767,10 @@
|
|
731 |
],
|
732 |
"Parameters": "7B",
|
733 |
"Org": "",
|
734 |
-
"
|
|
|
|
|
|
|
735 |
"Verified": "Yes"
|
736 |
},
|
737 |
"MedQA": {
|
@@ -793,7 +832,10 @@
|
|
793 |
],
|
794 |
"Parameters": "7B",
|
795 |
"Org": "",
|
796 |
-
"
|
|
|
|
|
|
|
797 |
"Verified": "Yes"
|
798 |
},
|
799 |
"MedQA": {
|
@@ -855,7 +897,10 @@
|
|
855 |
],
|
856 |
"Parameters": "7B",
|
857 |
"Org": "",
|
858 |
-
"
|
|
|
|
|
|
|
859 |
"Verified": "Yes"
|
860 |
},
|
861 |
"MedQA": {
|
@@ -917,7 +962,10 @@
|
|
917 |
],
|
918 |
"Parameters": "7B",
|
919 |
"Org": "",
|
920 |
-
"
|
|
|
|
|
|
|
921 |
"Verified": "Yes"
|
922 |
},
|
923 |
"MedQA": {
|
@@ -979,7 +1027,10 @@
|
|
979 |
],
|
980 |
"Parameters": "7B",
|
981 |
"Org": "",
|
982 |
-
"
|
|
|
|
|
|
|
983 |
"Verified": "Yes"
|
984 |
},
|
985 |
"MedQA": {
|
@@ -1041,7 +1092,10 @@
|
|
1041 |
],
|
1042 |
"Parameters": "7B",
|
1043 |
"Org": "",
|
1044 |
-
"
|
|
|
|
|
|
|
1045 |
"Verified": "Yes"
|
1046 |
},
|
1047 |
"MedQA": {
|
@@ -1103,7 +1157,10 @@
|
|
1103 |
],
|
1104 |
"Parameters": "7B",
|
1105 |
"Org": "",
|
1106 |
-
"
|
|
|
|
|
|
|
1107 |
"Verified": "Yes"
|
1108 |
},
|
1109 |
"MedQA": {
|
@@ -1165,7 +1222,10 @@
|
|
1165 |
],
|
1166 |
"Parameters": "13B",
|
1167 |
"Org": "",
|
1168 |
-
"
|
|
|
|
|
|
|
1169 |
"Verified": "Yes"
|
1170 |
},
|
1171 |
"MedQA": {
|
@@ -1227,7 +1287,10 @@
|
|
1227 |
],
|
1228 |
"Parameters": "13B",
|
1229 |
"Org": "",
|
1230 |
-
"
|
|
|
|
|
|
|
1231 |
"Verified": "Yes"
|
1232 |
},
|
1233 |
"MedQA": {
|
@@ -1289,7 +1352,10 @@
|
|
1289 |
],
|
1290 |
"Parameters": "70B",
|
1291 |
"Org": "",
|
1292 |
-
"
|
|
|
|
|
|
|
1293 |
"Verified": "Yes"
|
1294 |
},
|
1295 |
"MedQA": {
|
@@ -1351,7 +1417,10 @@
|
|
1351 |
],
|
1352 |
"Parameters": "70B",
|
1353 |
"Org": "",
|
1354 |
-
"
|
|
|
|
|
|
|
1355 |
"Verified": "Yes"
|
1356 |
},
|
1357 |
"MedQA": {
|
|
|
8 |
],
|
9 |
"Parameters": "",
|
10 |
"Org": "",
|
11 |
+
"Commercial LLMs": "No",
|
12 |
+
"General LLMs": "No",
|
13 |
+
"Medical LLMs": "No",
|
14 |
+
"SOTA": "Yes",
|
15 |
"Verified": "Yes"
|
16 |
},
|
17 |
"MedQA": {
|
|
|
52 |
],
|
53 |
"Parameters": "",
|
54 |
"Org": "Anthropic",
|
55 |
+
"Commercial LLMs": "Yes",
|
56 |
+
"General LLMs": "Yes",
|
57 |
+
"Medical LLMs": "No",
|
58 |
+
"SOTA": "No",
|
59 |
"Verified": "Yes"
|
60 |
},
|
61 |
"MedQA": {
|
|
|
117 |
],
|
118 |
"Parameters": "",
|
119 |
"Org": "OpenAI",
|
120 |
+
"Commercial LLMs": "Yes",
|
121 |
+
"General LLMs": "Yes",
|
122 |
+
"Medical LLMs": "No",
|
123 |
+
"SOTA": "No",
|
124 |
"Verified": "Yes"
|
125 |
},
|
126 |
"MedQA": {
|
|
|
182 |
],
|
183 |
"Parameters": "",
|
184 |
"Org": "OpenAI",
|
185 |
+
"Commercial LLMs": "Yes",
|
186 |
+
"General LLMs": "Yes",
|
187 |
+
"Medical LLMs": "No",
|
188 |
+
"SOTA": "No",
|
189 |
"Verified": "Yes"
|
190 |
},
|
191 |
"MedQA": {
|
|
|
247 |
],
|
248 |
"Parameters": "7B",
|
249 |
"Org": "",
|
250 |
+
"Commercial LLMs": "No",
|
251 |
+
"General LLMs": "Yes",
|
252 |
+
"Medical LLMs": "No",
|
253 |
+
"SOTA": "No",
|
254 |
"Verified": "Yes"
|
255 |
},
|
256 |
"MedQA": {
|
|
|
312 |
],
|
313 |
"Parameters": "7B",
|
314 |
"Org": "LMSys",
|
315 |
+
"Commercial LLMs": "No",
|
316 |
+
"General LLMs": "Yes",
|
317 |
+
"Medical LLMs": "No",
|
318 |
+
"SOTA": "No",
|
319 |
"Verified": "Yes"
|
320 |
},
|
321 |
"MedQA": {
|
|
|
377 |
],
|
378 |
"Parameters": "7B",
|
379 |
"Org": "Meta",
|
380 |
+
"Commercial LLMs": "No",
|
381 |
+
"General LLMs": "Yes",
|
382 |
+
"Medical LLMs": "No",
|
383 |
+
"SOTA": "No",
|
384 |
"Verified": "Yes"
|
385 |
},
|
386 |
"MedQA": {
|
|
|
442 |
],
|
443 |
"Parameters": "7B",
|
444 |
"Org": "MistralAI",
|
445 |
+
"Commercial LLMs": "No",
|
446 |
+
"General LLMs": "Yes",
|
447 |
+
"Medical LLMs": "No",
|
448 |
+
"SOTA": "No",
|
449 |
"Verified": "Yes"
|
450 |
},
|
451 |
"MedQA": {
|
|
|
507 |
],
|
508 |
"Parameters": "13B",
|
509 |
"Org": "LMSys",
|
510 |
+
"Commercial LLMs": "No",
|
511 |
+
"General LLMs": "Yes",
|
512 |
+
"Medical LLMs": "No",
|
513 |
+
"SOTA": "No",
|
514 |
"Verified": "Yes"
|
515 |
},
|
516 |
"MedQA": {
|
|
|
572 |
],
|
573 |
"Parameters": "13B",
|
574 |
"Org": "Meta",
|
575 |
+
"Commercial LLMs": "No",
|
576 |
+
"General LLMs": "Yes",
|
577 |
+
"Medical LLMs": "No",
|
578 |
+
"SOTA": "No",
|
579 |
"Verified": "Yes"
|
580 |
},
|
581 |
"MedQA": {
|
|
|
637 |
],
|
638 |
"Parameters": "70B",
|
639 |
"Org": "Meta",
|
640 |
+
"Commercial LLMs": "No",
|
641 |
+
"General LLMs": "Yes",
|
642 |
+
"Medical LLMs": "No",
|
643 |
+
"SOTA": "No",
|
644 |
"Verified": "Yes"
|
645 |
},
|
646 |
"MedQA": {
|
|
|
702 |
],
|
703 |
"Parameters": "70B",
|
704 |
"Org": "Meta",
|
705 |
+
"Commercial LLMs": "No",
|
706 |
+
"General LLMs": "Yes",
|
707 |
+
"Medical LLMs": "No",
|
708 |
+
"SOTA": "No",
|
709 |
"Verified": "Yes"
|
710 |
},
|
711 |
"MedQA": {
|
|
|
767 |
],
|
768 |
"Parameters": "7B",
|
769 |
"Org": "",
|
770 |
+
"Commercial LLMs": "No",
|
771 |
+
"General LLMs": "No",
|
772 |
+
"Medical LLMs": "Yes",
|
773 |
+
"SOTA": "No",
|
774 |
"Verified": "Yes"
|
775 |
},
|
776 |
"MedQA": {
|
|
|
832 |
],
|
833 |
"Parameters": "7B",
|
834 |
"Org": "",
|
835 |
+
"Commercial LLMs": "No",
|
836 |
+
"General LLMs": "No",
|
837 |
+
"Medical LLMs": "Yes",
|
838 |
+
"SOTA": "No",
|
839 |
"Verified": "Yes"
|
840 |
},
|
841 |
"MedQA": {
|
|
|
897 |
],
|
898 |
"Parameters": "7B",
|
899 |
"Org": "",
|
900 |
+
"Commercial LLMs": "No",
|
901 |
+
"General LLMs": "No",
|
902 |
+
"Medical LLMs": "Yes",
|
903 |
+
"SOTA": "No",
|
904 |
"Verified": "Yes"
|
905 |
},
|
906 |
"MedQA": {
|
|
|
962 |
],
|
963 |
"Parameters": "7B",
|
964 |
"Org": "",
|
965 |
+
"Commercial LLMs": "No",
|
966 |
+
"General LLMs": "No",
|
967 |
+
"Medical LLMs": "Yes",
|
968 |
+
"SOTA": "No",
|
969 |
"Verified": "Yes"
|
970 |
},
|
971 |
"MedQA": {
|
|
|
1027 |
],
|
1028 |
"Parameters": "7B",
|
1029 |
"Org": "",
|
1030 |
+
"Commercial LLMs": "No",
|
1031 |
+
"General LLMs": "No",
|
1032 |
+
"Medical LLMs": "Yes",
|
1033 |
+
"SOTA": "No",
|
1034 |
"Verified": "Yes"
|
1035 |
},
|
1036 |
"MedQA": {
|
|
|
1092 |
],
|
1093 |
"Parameters": "7B",
|
1094 |
"Org": "",
|
1095 |
+
"Commercial LLMs": "No",
|
1096 |
+
"General LLMs": "No",
|
1097 |
+
"Medical LLMs": "Yes",
|
1098 |
+
"SOTA": "No",
|
1099 |
"Verified": "Yes"
|
1100 |
},
|
1101 |
"MedQA": {
|
|
|
1157 |
],
|
1158 |
"Parameters": "7B",
|
1159 |
"Org": "",
|
1160 |
+
"Commercial LLMs": "No",
|
1161 |
+
"General LLMs": "No",
|
1162 |
+
"Medical LLMs": "Yes",
|
1163 |
+
"SOTA": "No",
|
1164 |
"Verified": "Yes"
|
1165 |
},
|
1166 |
"MedQA": {
|
|
|
1222 |
],
|
1223 |
"Parameters": "13B",
|
1224 |
"Org": "",
|
1225 |
+
"Commercial LLMs": "No",
|
1226 |
+
"General LLMs": "No",
|
1227 |
+
"Medical LLMs": "Yes",
|
1228 |
+
"SOTA": "No",
|
1229 |
"Verified": "Yes"
|
1230 |
},
|
1231 |
"MedQA": {
|
|
|
1287 |
],
|
1288 |
"Parameters": "13B",
|
1289 |
"Org": "",
|
1290 |
+
"Commercial LLMs": "No",
|
1291 |
+
"General LLMs": "No",
|
1292 |
+
"Medical LLMs": "Yes",
|
1293 |
+
"SOTA": "No",
|
1294 |
"Verified": "Yes"
|
1295 |
},
|
1296 |
"MedQA": {
|
|
|
1352 |
],
|
1353 |
"Parameters": "70B",
|
1354 |
"Org": "",
|
1355 |
+
"Commercial LLMs": "No",
|
1356 |
+
"General LLMs": "No",
|
1357 |
+
"Medical LLMs": "Yes",
|
1358 |
+
"SOTA": "No",
|
1359 |
"Verified": "Yes"
|
1360 |
},
|
1361 |
"MedQA": {
|
|
|
1417 |
],
|
1418 |
"Parameters": "70B",
|
1419 |
"Org": "",
|
1420 |
+
"Commercial LLMs": "No",
|
1421 |
+
"General LLMs": "No",
|
1422 |
+
"Medical LLMs": "Yes",
|
1423 |
+
"SOTA": "No",
|
1424 |
"Verified": "Yes"
|
1425 |
},
|
1426 |
"MedQA": {
|