Spaces:
Sleeping
Sleeping
Update clinicbench_result.json
Browse files- clinicbench_result.json +48 -45
clinicbench_result.json
CHANGED
@@ -17,12 +17,15 @@
|
|
17 |
"MedMCQA": {
|
18 |
"Overall": 43.0
|
19 |
},
|
20 |
-
"MMLU-Medicine": {
|
21 |
-
"Overall": 60.2
|
22 |
-
},
|
23 |
"PubMedQA": {
|
24 |
"Overall": 60.2
|
25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"BC5": {
|
27 |
"Overall": 90.0
|
28 |
},
|
@@ -304,7 +307,7 @@
|
|
304 |
"Overall": 33.4
|
305 |
},
|
306 |
"MMLU-Medicine": {
|
307 |
-
"Overall":
|
308 |
},
|
309 |
"PubMedQA": {
|
310 |
"Overall": 64.8
|
@@ -366,7 +369,7 @@
|
|
366 |
"Overall": 30.6
|
367 |
},
|
368 |
"MMLU-Medicine": {
|
369 |
-
"Overall": 42.
|
370 |
},
|
371 |
"PubMedQA": {
|
372 |
"Overall": 63.4
|
@@ -381,10 +384,10 @@
|
|
381 |
"Overall": 12.3
|
382 |
},
|
383 |
"IU-Xray": {
|
384 |
-
"Overall":
|
385 |
},
|
386 |
"Hospitaliz. Summari.": {
|
387 |
-
"Overall": 4.
|
388 |
},
|
389 |
"Patient Education": {
|
390 |
"Overall": 4.6
|
@@ -393,7 +396,7 @@
|
|
393 |
"Overall": 40.1
|
394 |
},
|
395 |
"NCBI": {
|
396 |
-
"Overall":
|
397 |
},
|
398 |
"DDI": {
|
399 |
"Overall": 37.9
|
@@ -428,7 +431,7 @@
|
|
428 |
"Overall": 37.8
|
429 |
},
|
430 |
"MMLU-Medicine": {
|
431 |
-
"Overall": 46.
|
432 |
},
|
433 |
"PubMedQA": {
|
434 |
"Overall": 69.4
|
@@ -552,7 +555,7 @@
|
|
552 |
"Overall": 35.5
|
553 |
},
|
554 |
"MMLU-Medicine": {
|
555 |
-
"Overall": 46.
|
556 |
},
|
557 |
"PubMedQA": {
|
558 |
"Overall": 66.8
|
@@ -614,7 +617,7 @@
|
|
614 |
"Overall": 42.7
|
615 |
},
|
616 |
"MMLU-Medicine": {
|
617 |
-
"Overall":
|
618 |
},
|
619 |
"PubMedQA": {
|
620 |
"Overall": 67.4
|
@@ -676,7 +679,7 @@
|
|
676 |
"Overall": 74.7
|
677 |
},
|
678 |
"MMLU-Medicine": {
|
679 |
-
"Overall":
|
680 |
},
|
681 |
"PubMedQA": {
|
682 |
"Overall": 77.4
|
@@ -797,19 +800,19 @@
|
|
797 |
"Overall": 33.2
|
798 |
},
|
799 |
"MedMCQA": {
|
800 |
-
"Overall":
|
801 |
},
|
802 |
"MMLU-Medicine": {
|
803 |
-
"Overall":
|
804 |
},
|
805 |
"PubMedQA": {
|
806 |
"Overall": 63.8
|
807 |
},
|
808 |
"Referral QA": {
|
809 |
-
"Overall": 73.
|
810 |
},
|
811 |
"Treat Recom.": {
|
812 |
-
"Overall": 3
|
813 |
},
|
814 |
"MIMIC": {
|
815 |
"Overall": 8.9
|
@@ -827,13 +830,13 @@
|
|
827 |
"Overall": 45.8
|
828 |
},
|
829 |
"NCBI": {
|
830 |
-
"Overall":
|
831 |
},
|
832 |
"DDI": {
|
833 |
-
"Overall":
|
834 |
},
|
835 |
"GAD": {
|
836 |
-
"Overall":
|
837 |
},
|
838 |
"HoC": {
|
839 |
"Overall": 55.7
|
@@ -842,7 +845,7 @@
|
|
842 |
"Overall": 42.7
|
843 |
},
|
844 |
"Drug Inter.": {
|
845 |
-
"Overall":
|
846 |
}
|
847 |
},
|
848 |
"PMC-LLaMA-7B": {
|
@@ -862,7 +865,7 @@
|
|
862 |
"Overall": 29.8
|
863 |
},
|
864 |
"MMLU-Medicine": {
|
865 |
-
"Overall": 39.
|
866 |
},
|
867 |
"PubMedQA": {
|
868 |
"Overall": 60.2
|
@@ -904,7 +907,7 @@
|
|
904 |
"Overall": 45.5
|
905 |
},
|
906 |
"Drug Inter.": {
|
907 |
-
"Overall":
|
908 |
}
|
909 |
},
|
910 |
"Baize-Healthcare": {
|
@@ -918,13 +921,13 @@
|
|
918 |
"Verified": "Yes"
|
919 |
},
|
920 |
"MedQA": {
|
921 |
-
"Overall":
|
922 |
},
|
923 |
"MedMCQA": {
|
924 |
"Overall": 31.3
|
925 |
},
|
926 |
"MMLU-Medicine": {
|
927 |
-
"Overall":
|
928 |
},
|
929 |
"PubMedQA": {
|
930 |
"Overall": 64.4
|
@@ -942,7 +945,7 @@
|
|
942 |
"Overall": 4.4
|
943 |
},
|
944 |
"Hospitaliz. Summari.": {
|
945 |
-
"Overall": 3
|
946 |
},
|
947 |
"Patient Education": {
|
948 |
"Overall": 1.8
|
@@ -960,13 +963,13 @@
|
|
960 |
"Overall": 45.8
|
961 |
},
|
962 |
"HoC": {
|
963 |
-
"Overall":
|
964 |
},
|
965 |
"Pharma. QA": {
|
966 |
-
"Overall":
|
967 |
},
|
968 |
"Drug Inter.": {
|
969 |
-
"Overall":
|
970 |
}
|
971 |
},
|
972 |
"MedAlpaca-7B": {
|
@@ -983,16 +986,16 @@
|
|
983 |
"Overall": 35.1
|
984 |
},
|
985 |
"MedMCQA": {
|
986 |
-
"Overall": 32.
|
987 |
},
|
988 |
"MMLU-Medicine": {
|
989 |
-
"Overall":
|
990 |
},
|
991 |
"PubMedQA": {
|
992 |
"Overall": 62.4
|
993 |
},
|
994 |
"Referral QA": {
|
995 |
-
"Overall":
|
996 |
},
|
997 |
"Treat Recom.": {
|
998 |
"Overall": 4.8
|
@@ -1013,22 +1016,22 @@
|
|
1013 |
"Overall": 47.3
|
1014 |
},
|
1015 |
"NCBI": {
|
1016 |
-
"Overall": 39.
|
1017 |
},
|
1018 |
"DDI": {
|
1019 |
-
"Overall": 43.
|
1020 |
},
|
1021 |
"GAD": {
|
1022 |
-
"Overall":
|
1023 |
},
|
1024 |
"HoC": {
|
1025 |
-
"Overall":
|
1026 |
},
|
1027 |
"Pharma. QA": {
|
1028 |
-
"Overall": 47.
|
1029 |
},
|
1030 |
"Drug Inter.": {
|
1031 |
-
"Overall":
|
1032 |
}
|
1033 |
},
|
1034 |
"Meditron-7B": {
|
@@ -1042,16 +1045,16 @@
|
|
1042 |
"Verified": "Yes"
|
1043 |
},
|
1044 |
"MedQA": {
|
1045 |
-
"Overall":
|
1046 |
},
|
1047 |
"MedMCQA": {
|
1048 |
"Overall": 31.1
|
1049 |
},
|
1050 |
"MMLU-Medicine": {
|
1051 |
-
"Overall":
|
1052 |
},
|
1053 |
"PubMedQA": {
|
1054 |
-
"Overall": 61.
|
1055 |
},
|
1056 |
"Referral QA": {
|
1057 |
"Overall": 74.9
|
@@ -1066,7 +1069,7 @@
|
|
1066 |
"Overall": 7.8
|
1067 |
},
|
1068 |
"Hospitaliz. Summari.": {
|
1069 |
-
"Overall":
|
1070 |
},
|
1071 |
"Patient Education": {
|
1072 |
"Overall": 5.9
|
@@ -1110,7 +1113,7 @@
|
|
1110 |
"Overall": 34.8
|
1111 |
},
|
1112 |
"MMLU-Medicine": {
|
1113 |
-
"Overall":
|
1114 |
},
|
1115 |
"PubMedQA": {
|
1116 |
"Overall": 66.4
|
@@ -1234,7 +1237,7 @@
|
|
1234 |
"Overall": 35.7
|
1235 |
},
|
1236 |
"MMLU-Medicine": {
|
1237 |
-
"Overall":
|
1238 |
},
|
1239 |
"PubMedQA": {
|
1240 |
"Overall": 65.6
|
@@ -1267,7 +1270,7 @@
|
|
1267 |
"Overall": 44.1
|
1268 |
},
|
1269 |
"GAD": {
|
1270 |
-
"Overall": 44.
|
1271 |
},
|
1272 |
"HoC": {
|
1273 |
"Overall": 59.4
|
@@ -1296,7 +1299,7 @@
|
|
1296 |
"Overall": 45.8
|
1297 |
},
|
1298 |
"MMLU-Medicine": {
|
1299 |
-
"Overall":
|
1300 |
},
|
1301 |
"PubMedQA": {
|
1302 |
"Overall": 71.0
|
|
|
17 |
"MedMCQA": {
|
18 |
"Overall": 43.0
|
19 |
},
|
|
|
|
|
|
|
20 |
"PubMedQA": {
|
21 |
"Overall": 60.2
|
22 |
},
|
23 |
+
"MIMIC": {
|
24 |
+
"Overall": 46.1
|
25 |
+
},
|
26 |
+
"IU-Xray": {
|
27 |
+
"Overall": 67.9
|
28 |
+
},
|
29 |
"BC5": {
|
30 |
"Overall": 90.0
|
31 |
},
|
|
|
307 |
"Overall": 33.4
|
308 |
},
|
309 |
"MMLU-Medicine": {
|
310 |
+
"Overall": 43.4
|
311 |
},
|
312 |
"PubMedQA": {
|
313 |
"Overall": 64.8
|
|
|
369 |
"Overall": 30.6
|
370 |
},
|
371 |
"MMLU-Medicine": {
|
372 |
+
"Overall": 42.3
|
373 |
},
|
374 |
"PubMedQA": {
|
375 |
"Overall": 63.4
|
|
|
384 |
"Overall": 12.3
|
385 |
},
|
386 |
"IU-Xray": {
|
387 |
+
"Overall": 8.6
|
388 |
},
|
389 |
"Hospitaliz. Summari.": {
|
390 |
+
"Overall": 4.9
|
391 |
},
|
392 |
"Patient Education": {
|
393 |
"Overall": 4.6
|
|
|
396 |
"Overall": 40.1
|
397 |
},
|
398 |
"NCBI": {
|
399 |
+
"Overall": 34.8
|
400 |
},
|
401 |
"DDI": {
|
402 |
"Overall": 37.9
|
|
|
431 |
"Overall": 37.8
|
432 |
},
|
433 |
"MMLU-Medicine": {
|
434 |
+
"Overall": 46.3
|
435 |
},
|
436 |
"PubMedQA": {
|
437 |
"Overall": 69.4
|
|
|
555 |
"Overall": 35.5
|
556 |
},
|
557 |
"MMLU-Medicine": {
|
558 |
+
"Overall": 46.0
|
559 |
},
|
560 |
"PubMedQA": {
|
561 |
"Overall": 66.8
|
|
|
617 |
"Overall": 42.7
|
618 |
},
|
619 |
"MMLU-Medicine": {
|
620 |
+
"Overall": 54.0
|
621 |
},
|
622 |
"PubMedQA": {
|
623 |
"Overall": 67.4
|
|
|
679 |
"Overall": 74.7
|
680 |
},
|
681 |
"MMLU-Medicine": {
|
682 |
+
"Overall": 86.4
|
683 |
},
|
684 |
"PubMedQA": {
|
685 |
"Overall": 77.4
|
|
|
800 |
"Overall": 33.2
|
801 |
},
|
802 |
"MedMCQA": {
|
803 |
+
"Overall": 31.5
|
804 |
},
|
805 |
"MMLU-Medicine": {
|
806 |
+
"Overall": 40.4
|
807 |
},
|
808 |
"PubMedQA": {
|
809 |
"Overall": 63.8
|
810 |
},
|
811 |
"Referral QA": {
|
812 |
+
"Overall": 73.7
|
813 |
},
|
814 |
"Treat Recom.": {
|
815 |
+
"Overall": 5.3
|
816 |
},
|
817 |
"MIMIC": {
|
818 |
"Overall": 8.9
|
|
|
830 |
"Overall": 45.8
|
831 |
},
|
832 |
"NCBI": {
|
833 |
+
"Overall": 40.9
|
834 |
},
|
835 |
"DDI": {
|
836 |
+
"Overall": 41.2
|
837 |
},
|
838 |
"GAD": {
|
839 |
+
"Overall": 40.1
|
840 |
},
|
841 |
"HoC": {
|
842 |
"Overall": 55.7
|
|
|
845 |
"Overall": 42.7
|
846 |
},
|
847 |
"Drug Inter.": {
|
848 |
+
"Overall": 48.5
|
849 |
}
|
850 |
},
|
851 |
"PMC-LLaMA-7B": {
|
|
|
865 |
"Overall": 29.8
|
866 |
},
|
867 |
"MMLU-Medicine": {
|
868 |
+
"Overall": 39.0
|
869 |
},
|
870 |
"PubMedQA": {
|
871 |
"Overall": 60.2
|
|
|
907 |
"Overall": 45.5
|
908 |
},
|
909 |
"Drug Inter.": {
|
910 |
+
"Overall": 51.0
|
911 |
}
|
912 |
},
|
913 |
"Baize-Healthcare": {
|
|
|
921 |
"Verified": "Yes"
|
922 |
},
|
923 |
"MedQA": {
|
924 |
+
"Overall": 34.9
|
925 |
},
|
926 |
"MedMCQA": {
|
927 |
"Overall": 31.3
|
928 |
},
|
929 |
"MMLU-Medicine": {
|
930 |
+
"Overall": 41.9
|
931 |
},
|
932 |
"PubMedQA": {
|
933 |
"Overall": 64.4
|
|
|
945 |
"Overall": 4.4
|
946 |
},
|
947 |
"Hospitaliz. Summari.": {
|
948 |
+
"Overall": 4.3
|
949 |
},
|
950 |
"Patient Education": {
|
951 |
"Overall": 1.8
|
|
|
963 |
"Overall": 45.8
|
964 |
},
|
965 |
"HoC": {
|
966 |
+
"Overall": 54.5
|
967 |
},
|
968 |
"Pharma. QA": {
|
969 |
+
"Overall": 46.9
|
970 |
},
|
971 |
"Drug Inter.": {
|
972 |
+
"Overall": 50.5
|
973 |
}
|
974 |
},
|
975 |
"MedAlpaca-7B": {
|
|
|
986 |
"Overall": 35.1
|
987 |
},
|
988 |
"MedMCQA": {
|
989 |
+
"Overall": 32.9
|
990 |
},
|
991 |
"MMLU-Medicine": {
|
992 |
+
"Overall": 48.5
|
993 |
},
|
994 |
"PubMedQA": {
|
995 |
"Overall": 62.4
|
996 |
},
|
997 |
"Referral QA": {
|
998 |
+
"Overall": 75.3
|
999 |
},
|
1000 |
"Treat Recom.": {
|
1001 |
"Overall": 4.8
|
|
|
1016 |
"Overall": 47.3
|
1017 |
},
|
1018 |
"NCBI": {
|
1019 |
+
"Overall": 39.0
|
1020 |
},
|
1021 |
"DDI": {
|
1022 |
+
"Overall": 43.5
|
1023 |
},
|
1024 |
"GAD": {
|
1025 |
+
"Overall": 44.0
|
1026 |
},
|
1027 |
"HoC": {
|
1028 |
+
"Overall": 58.7
|
1029 |
},
|
1030 |
"Pharma. QA": {
|
1031 |
+
"Overall": 47.9
|
1032 |
},
|
1033 |
"Drug Inter.": {
|
1034 |
+
"Overall": 48.0
|
1035 |
}
|
1036 |
},
|
1037 |
"Meditron-7B": {
|
|
|
1045 |
"Verified": "Yes"
|
1046 |
},
|
1047 |
"MedQA": {
|
1048 |
+
"Overall": 33.5
|
1049 |
},
|
1050 |
"MedMCQA": {
|
1051 |
"Overall": 31.1
|
1052 |
},
|
1053 |
"MMLU-Medicine": {
|
1054 |
+
"Overall": 45.2
|
1055 |
},
|
1056 |
"PubMedQA": {
|
1057 |
+
"Overall": 61.6
|
1058 |
},
|
1059 |
"Referral QA": {
|
1060 |
"Overall": 74.9
|
|
|
1069 |
"Overall": 7.8
|
1070 |
},
|
1071 |
"Hospitaliz. Summari.": {
|
1072 |
+
"Overall": 6.8
|
1073 |
},
|
1074 |
"Patient Education": {
|
1075 |
"Overall": 5.9
|
|
|
1113 |
"Overall": 34.8
|
1114 |
},
|
1115 |
"MMLU-Medicine": {
|
1116 |
+
"Overall": 52.6
|
1117 |
},
|
1118 |
"PubMedQA": {
|
1119 |
"Overall": 66.4
|
|
|
1237 |
"Overall": 35.7
|
1238 |
},
|
1239 |
"MMLU-Medicine": {
|
1240 |
+
"Overall": 51.5
|
1241 |
},
|
1242 |
"PubMedQA": {
|
1243 |
"Overall": 65.6
|
|
|
1270 |
"Overall": 44.1
|
1271 |
},
|
1272 |
"GAD": {
|
1273 |
+
"Overall": 44.5
|
1274 |
},
|
1275 |
"HoC": {
|
1276 |
"Overall": 59.4
|
|
|
1299 |
"Overall": 45.8
|
1300 |
},
|
1301 |
"MMLU-Medicine": {
|
1302 |
+
"Overall": 68.4
|
1303 |
},
|
1304 |
"PubMedQA": {
|
1305 |
"Overall": 71.0
|