fenglinliu commited on
Commit
c95ea14
·
verified ·
1 Parent(s): 5e5765f

Update clinicbench_result.json

Browse files
Files changed (1) hide show
  1. clinicbench_result.json +48 -45
clinicbench_result.json CHANGED
@@ -17,12 +17,15 @@
17
  "MedMCQA": {
18
  "Overall": 43.0
19
  },
20
- "MMLU-Medicine": {
21
- "Overall": 60.2
22
- },
23
  "PubMedQA": {
24
  "Overall": 60.2
25
  },
 
 
 
 
 
 
26
  "BC5": {
27
  "Overall": 90.0
28
  },
@@ -304,7 +307,7 @@
304
  "Overall": 33.4
305
  },
306
  "MMLU-Medicine": {
307
- "Overall": 37.3
308
  },
309
  "PubMedQA": {
310
  "Overall": 64.8
@@ -366,7 +369,7 @@
366
  "Overall": 30.6
367
  },
368
  "MMLU-Medicine": {
369
- "Overall": 42.4
370
  },
371
  "PubMedQA": {
372
  "Overall": 63.4
@@ -381,10 +384,10 @@
381
  "Overall": 12.3
382
  },
383
  "IU-Xray": {
384
- "Overall": 4.9
385
  },
386
  "Hospitaliz. Summari.": {
387
- "Overall": 4.6
388
  },
389
  "Patient Education": {
390
  "Overall": 4.6
@@ -393,7 +396,7 @@
393
  "Overall": 40.1
394
  },
395
  "NCBI": {
396
- "Overall": 38.4
397
  },
398
  "DDI": {
399
  "Overall": 37.9
@@ -428,7 +431,7 @@
428
  "Overall": 37.8
429
  },
430
  "MMLU-Medicine": {
431
- "Overall": 46.9
432
  },
433
  "PubMedQA": {
434
  "Overall": 69.4
@@ -552,7 +555,7 @@
552
  "Overall": 35.5
553
  },
554
  "MMLU-Medicine": {
555
- "Overall": 46.4
556
  },
557
  "PubMedQA": {
558
  "Overall": 66.8
@@ -614,7 +617,7 @@
614
  "Overall": 42.7
615
  },
616
  "MMLU-Medicine": {
617
- "Overall": 67.4
618
  },
619
  "PubMedQA": {
620
  "Overall": 67.4
@@ -676,7 +679,7 @@
676
  "Overall": 74.7
677
  },
678
  "MMLU-Medicine": {
679
- "Overall": 83.4
680
  },
681
  "PubMedQA": {
682
  "Overall": 77.4
@@ -797,19 +800,19 @@
797
  "Overall": 33.2
798
  },
799
  "MedMCQA": {
800
- "Overall": 29.3
801
  },
802
  "MMLU-Medicine": {
803
- "Overall": 37.0
804
  },
805
  "PubMedQA": {
806
  "Overall": 63.8
807
  },
808
  "Referral QA": {
809
- "Overall": 73.3
810
  },
811
  "Treat Recom.": {
812
- "Overall": 3.8
813
  },
814
  "MIMIC": {
815
  "Overall": 8.9
@@ -827,13 +830,13 @@
827
  "Overall": 45.8
828
  },
829
  "NCBI": {
830
- "Overall": 39.0
831
  },
832
  "DDI": {
833
- "Overall": 40.4
834
  },
835
  "GAD": {
836
- "Overall": 38.1
837
  },
838
  "HoC": {
839
  "Overall": 55.7
@@ -842,7 +845,7 @@
842
  "Overall": 42.7
843
  },
844
  "Drug Inter.": {
845
- "Overall": 51.0
846
  }
847
  },
848
  "PMC-LLaMA-7B": {
@@ -862,7 +865,7 @@
862
  "Overall": 29.8
863
  },
864
  "MMLU-Medicine": {
865
- "Overall": 39.5
866
  },
867
  "PubMedQA": {
868
  "Overall": 60.2
@@ -904,7 +907,7 @@
904
  "Overall": 45.5
905
  },
906
  "Drug Inter.": {
907
- "Overall": 47.8
908
  }
909
  },
910
  "Baize-Healthcare": {
@@ -918,13 +921,13 @@
918
  "Verified": "Yes"
919
  },
920
  "MedQA": {
921
- "Overall": 29.4
922
  },
923
  "MedMCQA": {
924
  "Overall": 31.3
925
  },
926
  "MMLU-Medicine": {
927
- "Overall": 39.9
928
  },
929
  "PubMedQA": {
930
  "Overall": 64.4
@@ -942,7 +945,7 @@
942
  "Overall": 4.4
943
  },
944
  "Hospitaliz. Summari.": {
945
- "Overall": 3.0
946
  },
947
  "Patient Education": {
948
  "Overall": 1.8
@@ -960,13 +963,13 @@
960
  "Overall": 45.8
961
  },
962
  "HoC": {
963
- "Overall": 56.9
964
  },
965
  "Pharma. QA": {
966
- "Overall": 50.4
967
  },
968
  "Drug Inter.": {
969
- "Overall": 49.5
970
  }
971
  },
972
  "MedAlpaca-7B": {
@@ -983,16 +986,16 @@
983
  "Overall": 35.1
984
  },
985
  "MedMCQA": {
986
- "Overall": 32.3
987
  },
988
  "MMLU-Medicine": {
989
- "Overall": 41.9
990
  },
991
  "PubMedQA": {
992
  "Overall": 62.4
993
  },
994
  "Referral QA": {
995
- "Overall": 74.5
996
  },
997
  "Treat Recom.": {
998
  "Overall": 4.8
@@ -1013,22 +1016,22 @@
1013
  "Overall": 47.3
1014
  },
1015
  "NCBI": {
1016
- "Overall": 39.3
1017
  },
1018
  "DDI": {
1019
- "Overall": 43.4
1020
  },
1021
  "GAD": {
1022
- "Overall": 45.4
1023
  },
1024
  "HoC": {
1025
- "Overall": 59.0
1026
  },
1027
  "Pharma. QA": {
1028
- "Overall": 47.7
1029
  },
1030
  "Drug Inter.": {
1031
- "Overall": 52.7
1032
  }
1033
  },
1034
  "Meditron-7B": {
@@ -1042,16 +1045,16 @@
1042
  "Verified": "Yes"
1043
  },
1044
  "MedQA": {
1045
- "Overall": 35.3
1046
  },
1047
  "MedMCQA": {
1048
  "Overall": 31.1
1049
  },
1050
  "MMLU-Medicine": {
1051
- "Overall": 41.3
1052
  },
1053
  "PubMedQA": {
1054
- "Overall": 61.4
1055
  },
1056
  "Referral QA": {
1057
  "Overall": 74.9
@@ -1066,7 +1069,7 @@
1066
  "Overall": 7.8
1067
  },
1068
  "Hospitaliz. Summari.": {
1069
- "Overall": 4.8
1070
  },
1071
  "Patient Education": {
1072
  "Overall": 5.9
@@ -1110,7 +1113,7 @@
1110
  "Overall": 34.8
1111
  },
1112
  "MMLU-Medicine": {
1113
- "Overall": 41.6
1114
  },
1115
  "PubMedQA": {
1116
  "Overall": 66.4
@@ -1234,7 +1237,7 @@
1234
  "Overall": 35.7
1235
  },
1236
  "MMLU-Medicine": {
1237
- "Overall": 53.6
1238
  },
1239
  "PubMedQA": {
1240
  "Overall": 65.6
@@ -1267,7 +1270,7 @@
1267
  "Overall": 44.1
1268
  },
1269
  "GAD": {
1270
- "Overall": 44.4
1271
  },
1272
  "HoC": {
1273
  "Overall": 59.4
@@ -1296,7 +1299,7 @@
1296
  "Overall": 45.8
1297
  },
1298
  "MMLU-Medicine": {
1299
- "Overall": 65.4
1300
  },
1301
  "PubMedQA": {
1302
  "Overall": 71.0
 
17
  "MedMCQA": {
18
  "Overall": 43.0
19
  },
 
 
 
20
  "PubMedQA": {
21
  "Overall": 60.2
22
  },
23
+ "MIMIC": {
24
+ "Overall": 46.1
25
+ },
26
+ "IU-Xray": {
27
+ "Overall": 67.9
28
+ },
29
  "BC5": {
30
  "Overall": 90.0
31
  },
 
307
  "Overall": 33.4
308
  },
309
  "MMLU-Medicine": {
310
+ "Overall": 43.4
311
  },
312
  "PubMedQA": {
313
  "Overall": 64.8
 
369
  "Overall": 30.6
370
  },
371
  "MMLU-Medicine": {
372
+ "Overall": 42.3
373
  },
374
  "PubMedQA": {
375
  "Overall": 63.4
 
384
  "Overall": 12.3
385
  },
386
  "IU-Xray": {
387
+ "Overall": 8.6
388
  },
389
  "Hospitaliz. Summari.": {
390
+ "Overall": 4.9
391
  },
392
  "Patient Education": {
393
  "Overall": 4.6
 
396
  "Overall": 40.1
397
  },
398
  "NCBI": {
399
+ "Overall": 34.8
400
  },
401
  "DDI": {
402
  "Overall": 37.9
 
431
  "Overall": 37.8
432
  },
433
  "MMLU-Medicine": {
434
+ "Overall": 46.3
435
  },
436
  "PubMedQA": {
437
  "Overall": 69.4
 
555
  "Overall": 35.5
556
  },
557
  "MMLU-Medicine": {
558
+ "Overall": 46.0
559
  },
560
  "PubMedQA": {
561
  "Overall": 66.8
 
617
  "Overall": 42.7
618
  },
619
  "MMLU-Medicine": {
620
+ "Overall": 54.0
621
  },
622
  "PubMedQA": {
623
  "Overall": 67.4
 
679
  "Overall": 74.7
680
  },
681
  "MMLU-Medicine": {
682
+ "Overall": 86.4
683
  },
684
  "PubMedQA": {
685
  "Overall": 77.4
 
800
  "Overall": 33.2
801
  },
802
  "MedMCQA": {
803
+ "Overall": 31.5
804
  },
805
  "MMLU-Medicine": {
806
+ "Overall": 40.4
807
  },
808
  "PubMedQA": {
809
  "Overall": 63.8
810
  },
811
  "Referral QA": {
812
+ "Overall": 73.7
813
  },
814
  "Treat Recom.": {
815
+ "Overall": 5.3
816
  },
817
  "MIMIC": {
818
  "Overall": 8.9
 
830
  "Overall": 45.8
831
  },
832
  "NCBI": {
833
+ "Overall": 40.9
834
  },
835
  "DDI": {
836
+ "Overall": 41.2
837
  },
838
  "GAD": {
839
+ "Overall": 40.1
840
  },
841
  "HoC": {
842
  "Overall": 55.7
 
845
  "Overall": 42.7
846
  },
847
  "Drug Inter.": {
848
+ "Overall": 48.5
849
  }
850
  },
851
  "PMC-LLaMA-7B": {
 
865
  "Overall": 29.8
866
  },
867
  "MMLU-Medicine": {
868
+ "Overall": 39.0
869
  },
870
  "PubMedQA": {
871
  "Overall": 60.2
 
907
  "Overall": 45.5
908
  },
909
  "Drug Inter.": {
910
+ "Overall": 51.0
911
  }
912
  },
913
  "Baize-Healthcare": {
 
921
  "Verified": "Yes"
922
  },
923
  "MedQA": {
924
+ "Overall": 34.9
925
  },
926
  "MedMCQA": {
927
  "Overall": 31.3
928
  },
929
  "MMLU-Medicine": {
930
+ "Overall": 41.9
931
  },
932
  "PubMedQA": {
933
  "Overall": 64.4
 
945
  "Overall": 4.4
946
  },
947
  "Hospitaliz. Summari.": {
948
+ "Overall": 4.3
949
  },
950
  "Patient Education": {
951
  "Overall": 1.8
 
963
  "Overall": 45.8
964
  },
965
  "HoC": {
966
+ "Overall": 54.5
967
  },
968
  "Pharma. QA": {
969
+ "Overall": 46.9
970
  },
971
  "Drug Inter.": {
972
+ "Overall": 50.5
973
  }
974
  },
975
  "MedAlpaca-7B": {
 
986
  "Overall": 35.1
987
  },
988
  "MedMCQA": {
989
+ "Overall": 32.9
990
  },
991
  "MMLU-Medicine": {
992
+ "Overall": 48.5
993
  },
994
  "PubMedQA": {
995
  "Overall": 62.4
996
  },
997
  "Referral QA": {
998
+ "Overall": 75.3
999
  },
1000
  "Treat Recom.": {
1001
  "Overall": 4.8
 
1016
  "Overall": 47.3
1017
  },
1018
  "NCBI": {
1019
+ "Overall": 39.0
1020
  },
1021
  "DDI": {
1022
+ "Overall": 43.5
1023
  },
1024
  "GAD": {
1025
+ "Overall": 44.0
1026
  },
1027
  "HoC": {
1028
+ "Overall": 58.7
1029
  },
1030
  "Pharma. QA": {
1031
+ "Overall": 47.9
1032
  },
1033
  "Drug Inter.": {
1034
+ "Overall": 48.0
1035
  }
1036
  },
1037
  "Meditron-7B": {
 
1045
  "Verified": "Yes"
1046
  },
1047
  "MedQA": {
1048
+ "Overall": 33.5
1049
  },
1050
  "MedMCQA": {
1051
  "Overall": 31.1
1052
  },
1053
  "MMLU-Medicine": {
1054
+ "Overall": 45.2
1055
  },
1056
  "PubMedQA": {
1057
+ "Overall": 61.6
1058
  },
1059
  "Referral QA": {
1060
  "Overall": 74.9
 
1069
  "Overall": 7.8
1070
  },
1071
  "Hospitaliz. Summari.": {
1072
+ "Overall": 6.8
1073
  },
1074
  "Patient Education": {
1075
  "Overall": 5.9
 
1113
  "Overall": 34.8
1114
  },
1115
  "MMLU-Medicine": {
1116
+ "Overall": 52.6
1117
  },
1118
  "PubMedQA": {
1119
  "Overall": 66.4
 
1237
  "Overall": 35.7
1238
  },
1239
  "MMLU-Medicine": {
1240
+ "Overall": 51.5
1241
  },
1242
  "PubMedQA": {
1243
  "Overall": 65.6
 
1270
  "Overall": 44.1
1271
  },
1272
  "GAD": {
1273
+ "Overall": 44.5
1274
  },
1275
  "HoC": {
1276
  "Overall": 59.4
 
1299
  "Overall": 45.8
1300
  },
1301
  "MMLU-Medicine": {
1302
+ "Overall": 68.4
1303
  },
1304
  "PubMedQA": {
1305
  "Overall": 71.0