fenglinliu commited on
Commit
8c6a180
·
verified ·
1 Parent(s): 8a53052

Update ShoppingMMLU_overall.json

Browse files
Files changed (1) hide show
  1. ShoppingMMLU_overall.json +642 -330
ShoppingMMLU_overall.json CHANGED
@@ -11,7 +11,7 @@
11
  "OpenSource": "No",
12
  "Verified": "Yes"
13
  },
14
- "MedQA": {
15
  "Overall": 65.1
16
  },
17
  "MedMCQA": {
@@ -24,7 +24,7 @@
24
  "Overall": 70.8
25
  },
26
  "Referral QA": {
27
- "Overall": 70.8
28
  },
29
  "Treat Recom.": {
30
  "Overall": 9.1
@@ -57,7 +57,7 @@
57
  "Overall": 70.8
58
  },
59
  "Pharma. QA": {
60
- "Overall": 70.8
61
  },
62
  "Drug Inter.": {
63
  "Overall": 51.5
@@ -73,7 +73,7 @@
73
  "OpenSource": "No",
74
  "Verified": "Yes"
75
  },
76
- "MedQA": {
77
  "Overall": 61.2
78
  },
79
  "MedMCQA": {
@@ -89,7 +89,7 @@
89
  "Overall": 81.1
90
  },
91
  "Treat Recom.": {
92
- "Overall": 7.3
93
  },
94
  "MIMIC": {
95
  "Overall": 14.1
@@ -98,7 +98,7 @@
98
  "Overall": 10.3
99
  },
100
  "Hospitaliz. Summari.": {
101
- "Overall": 10.5
102
  },
103
  "Patient Education": {
104
  "Overall": 9.2
@@ -116,7 +116,7 @@
116
  "Overall": 50.8
117
  },
118
  "HoC": {
119
- "Overall": 66.4
120
  },
121
  "Pharma. QA": {
122
  "Overall": 57.3
@@ -135,56 +135,56 @@
135
  "OpenSource": "No",
136
  "Verified": "Yes"
137
  },
138
- "MedQA": {
139
- "Overall": 80.75
140
  },
141
  "MedMCQA": {
142
- "Overall": 71.63
143
  },
144
  "MMLU-Medicine": {
145
- "Overall": 70.17
146
  },
147
  "PubMedQA": {
148
- "Overall": 67.76
149
  },
150
  "Referral QA": {
151
- "Overall": 67.76
152
  },
153
  "Treat Recom.": {
154
- "Overall": 67.76
155
  },
156
  "MIMIC": {
157
- "Overall": 67.76
158
  },
159
  "IU-Xray": {
160
- "Overall": 67.76
161
  },
162
  "Hospitaliz. Summari.": {
163
- "Overall": 67.76
164
  },
165
  "Patient Education": {
166
- "Overall": 67.76
167
  },
168
  "BC5": {
169
- "Overall": 67.76
170
  },
171
  "NCBI": {
172
- "Overall": 67.76
173
  },
174
  "DDI": {
175
- "Overall": 67.76
176
  },
177
  "GAD": {
178
- "Overall": 67.76
179
  },
180
  "HoC": {
181
- "Overall": 67.76
182
  },
183
  "Pharma. QA": {
184
- "Overall": 67.76
185
  },
186
  "Drug Inter.": {
187
- "Overall": 67.76
188
  }
189
  },
190
  "Alpaca": {
@@ -193,175 +193,370 @@
193
  "Alpaca"
194
  ],
195
  "Parameters": "7B",
196
- "Org": "OpenAI",
197
- "OpenSource": "No",
198
  "Verified": "Yes"
199
  },
200
- "MedQA": {
201
- "Overall": 80.75
202
  },
203
  "MedMCQA": {
204
- "Overall": 71.63
205
  },
206
  "MMLU-Medicine": {
207
- "Overall": 70.17
208
  },
209
  "PubMedQA": {
210
- "Overall": 67.76
211
  },
212
  "Referral QA": {
213
- "Overall": 67.76
214
  },
215
  "Treat Recom.": {
216
- "Overall": 67.76
217
  },
218
  "MIMIC": {
219
- "Overall": 67.76
220
  },
221
  "IU-Xray": {
222
- "Overall": 67.76
223
  },
224
  "Hospitaliz. Summari.": {
225
- "Overall": 67.76
226
  },
227
  "Patient Education": {
228
- "Overall": 67.76
229
  },
230
  "BC5": {
231
- "Overall": 67.76
232
  },
233
  "NCBI": {
234
- "Overall": 67.76
235
  },
236
  "DDI": {
237
- "Overall": 67.76
238
  },
239
  "GAD": {
240
- "Overall": 67.76
241
  },
242
  "HoC": {
243
- "Overall": 67.76
244
  },
245
  "Pharma. QA": {
246
- "Overall": 67.76
247
  },
248
  "Drug Inter.": {
249
- "Overall": 67.76
250
  }
251
  },
252
- "Vicuna-7B-v1.5": {
253
  "META": {
254
  "Method": [
255
- "Vicuna-7B-v1.5"
256
  ],
257
  "Parameters": "7B",
258
  "Org": "LMSys",
259
  "OpenSource": "Yes",
260
  "Verified": "Yes"
261
  },
262
- "Shopping Concept Understanding": {
263
- "Overall": 53.46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  },
265
- "Shopping Knowledge Reasoning": {
266
- "Overall": 45.06
267
  },
268
- "User Behavior Alignment": {
269
- "Overall": 41.11
270
  },
271
- "Multi-lingual Abilities": {
272
- "Overall": 43.82
 
 
 
 
 
 
273
  }
274
  },
275
- "LLaMA2-7B-Chat": {
276
  "META": {
277
  "Method": [
278
- "LLaMA2-7B-Chat"
279
  ],
280
  "Parameters": "7B",
281
  "Org": "Meta",
282
  "OpenSource": "Yes",
283
  "Verified": "Yes"
284
  },
285
- "Shopping Concept Understanding": {
286
- "Overall": 51.67
287
  },
288
- "Shopping Knowledge Reasoning": {
289
- "Overall": 43.48
290
  },
291
- "User Behavior Alignment": {
292
- "Overall": 41.42
293
  },
294
- "Multi-lingual Abilities": {
295
- "Overall": 40.43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  }
297
  },
298
- "Mistral-7B-Instruct": {
299
  "META": {
300
  "Method": [
301
- "Mistral-7B-Instruct"
302
  ],
303
  "Parameters": "7B",
304
  "Org": "MistralAI",
305
  "OpenSource": "Yes",
306
  "Verified": "Yes"
307
  },
308
- "Shopping Concept Understanding": {
309
- "Overall": 62.03
310
  },
311
- "Shopping Knowledge Reasoning": {
312
- "Overall": 46.36
 
 
 
 
 
 
 
 
 
313
  },
314
- "User Behavior Alignment": {
315
- "Overall": 42.21
316
  },
317
- "Multi-lingual Abilities": {
318
- "Overall": 43.32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  },
321
- "Vicuna-13B-v1.5": {
322
  "META": {
323
  "Method": [
324
- "Vicuna-13B-v1.5"
325
  ],
326
  "Parameters": "13B",
327
  "Org": "LMSys",
328
  "OpenSource": "Yes",
329
  "Verified": "Yes"
330
  },
331
- "Shopping Concept Understanding": {
332
- "Overall": 59.64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  },
334
- "Shopping Knowledge Reasoning": {
335
- "Overall": 52.63
336
  },
337
- "User Behavior Alignment": {
338
- "Overall": 49.81
339
  },
340
- "Multi-lingual Abilities": {
341
- "Overall": 49.64
 
 
 
342
  }
343
  },
344
- "LLaMA-2-13B-Chat": {
345
  "META": {
346
  "Method": [
347
- "LLaMA-2-13B-Chat"
348
  ],
349
  "Parameters": "13B",
350
  "Org": "Meta",
351
  "OpenSource": "Yes",
352
  "Verified": "Yes"
353
  },
354
- "Shopping Concept Understanding": {
355
- "Overall": 51.79
356
  },
357
- "Shopping Knowledge Reasoning": {
358
- "Overall": 45.01
359
  },
360
- "User Behavior Alignment": {
361
- "Overall": 39.95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  },
363
- "Multi-lingual Abilities": {
364
- "Overall": 42.99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  }
366
  },
367
  "LLaMA-2-70B": {
@@ -374,17 +569,56 @@
374
  "OpenSource": "Yes",
375
  "Verified": "Yes"
376
  },
377
- "Shopping Concept Understanding": {
378
- "Overall": 61.84
379
  },
380
- "Shopping Knowledge Reasoning": {
381
- "Overall": 40.73
382
  },
383
- "User Behavior Alignment": {
384
- "Overall": 44.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  },
386
- "Multi-lingual Abilities": {
387
- "Overall": 47.04
 
 
 
388
  }
389
  },
390
  "LLaMA-3-70B": {
@@ -397,17 +631,56 @@
397
  "OpenSource": "Yes",
398
  "Verified": "Yes"
399
  },
400
- "Shopping Concept Understanding": {
401
- "Overall": 69.59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  },
403
- "Shopping Knowledge Reasoning": {
404
- "Overall": 63.56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  },
406
- "User Behavior Alignment": {
407
- "Overall": 55.77
408
  },
409
- "Multi-lingual Abilities": {
410
- "Overall": 58.95
411
  }
412
  },
413
  "Huatuo": {
@@ -415,61 +688,61 @@
415
  "Method": [
416
  "Huatuo"
417
  ],
418
- "Parameters": "",
419
- "Org": "OpenAI",
420
  "OpenSource": "No",
421
  "Verified": "Yes"
422
  },
423
- "MedQA": {
424
- "Overall": 80.75
425
  },
426
  "MedMCQA": {
427
- "Overall": 71.63
428
  },
429
  "MMLU-Medicine": {
430
- "Overall": 70.17
431
  },
432
  "PubMedQA": {
433
- "Overall": 67.76
434
  },
435
  "Referral QA": {
436
- "Overall": 67.76
437
  },
438
  "Treat Recom.": {
439
- "Overall": 67.76
440
  },
441
  "MIMIC": {
442
- "Overall": 67.76
443
  },
444
  "IU-Xray": {
445
- "Overall": 67.76
446
  },
447
  "Hospitaliz. Summari.": {
448
- "Overall": 67.76
449
  },
450
  "Patient Education": {
451
- "Overall": 67.76
452
  },
453
  "BC5": {
454
- "Overall": 67.76
455
  },
456
  "NCBI": {
457
- "Overall": 67.76
458
  },
459
  "DDI": {
460
- "Overall": 67.76
461
  },
462
  "GAD": {
463
- "Overall": 67.76
464
  },
465
  "HoC": {
466
- "Overall": 67.76
467
  },
468
  "Pharma. QA": {
469
- "Overall": 67.76
470
  },
471
  "Drug Inter.": {
472
- "Overall": 67.76
473
  }
474
  },
475
  "ChatDoctor": {
@@ -477,123 +750,123 @@
477
  "Method": [
478
  "ChatDoctor"
479
  ],
480
- "Parameters": "",
481
- "Org": "OpenAI",
482
  "OpenSource": "No",
483
  "Verified": "Yes"
484
  },
485
- "MedQA": {
486
- "Overall": 80.75
487
  },
488
  "MedMCQA": {
489
- "Overall": 71.63
490
  },
491
  "MMLU-Medicine": {
492
- "Overall": 70.17
493
  },
494
  "PubMedQA": {
495
- "Overall": 67.76
496
  },
497
  "Referral QA": {
498
- "Overall": 67.76
499
  },
500
  "Treat Recom.": {
501
- "Overall": 67.76
502
  },
503
  "MIMIC": {
504
- "Overall": 67.76
505
  },
506
  "IU-Xray": {
507
- "Overall": 67.76
508
  },
509
  "Hospitaliz. Summari.": {
510
- "Overall": 67.76
511
  },
512
  "Patient Education": {
513
- "Overall": 67.76
514
  },
515
  "BC5": {
516
- "Overall": 67.76
517
  },
518
  "NCBI": {
519
- "Overall": 67.76
520
  },
521
  "DDI": {
522
- "Overall": 67.76
523
  },
524
  "GAD": {
525
- "Overall": 67.76
526
  },
527
  "HoC": {
528
- "Overall": 67.76
529
  },
530
  "Pharma. QA": {
531
- "Overall": 67.76
532
  },
533
  "Drug Inter.": {
534
- "Overall": 67.76
535
  }
536
  },
537
- "PMC-LLaMA": {
538
  "META": {
539
  "Method": [
540
- "PMC-LLaMA"
541
  ],
542
  "Parameters": "7B",
543
- "Org": "OpenAI",
544
  "OpenSource": "No",
545
  "Verified": "Yes"
546
  },
547
- "MedQA": {
548
- "Overall": 80.75
549
  },
550
  "MedMCQA": {
551
- "Overall": 71.63
552
  },
553
  "MMLU-Medicine": {
554
- "Overall": 70.17
555
  },
556
  "PubMedQA": {
557
- "Overall": 67.76
558
  },
559
  "Referral QA": {
560
- "Overall": 67.76
561
  },
562
  "Treat Recom.": {
563
- "Overall": 67.76
564
  },
565
  "MIMIC": {
566
- "Overall": 67.76
567
  },
568
  "IU-Xray": {
569
- "Overall": 67.76
570
  },
571
  "Hospitaliz. Summari.": {
572
- "Overall": 67.76
573
  },
574
  "Patient Education": {
575
- "Overall": 67.76
576
  },
577
  "BC5": {
578
- "Overall": 67.76
579
  },
580
  "NCBI": {
581
- "Overall": 67.76
582
  },
583
  "DDI": {
584
- "Overall": 67.76
585
  },
586
  "GAD": {
587
- "Overall": 67.76
588
  },
589
  "HoC": {
590
- "Overall": 67.76
591
  },
592
  "Pharma. QA": {
593
- "Overall": 67.76
594
  },
595
  "Drug Inter.": {
596
- "Overall": 67.76
597
  }
598
  },
599
  "Baize-Healthcare": {
@@ -602,184 +875,184 @@
602
  "Baize-Healthcare"
603
  ],
604
  "Parameters": "7B",
605
- "Org": "OpenAI",
606
  "OpenSource": "No",
607
  "Verified": "Yes"
608
  },
609
- "MedQA": {
610
- "Overall": 80.75
611
  },
612
  "MedMCQA": {
613
- "Overall": 71.63
614
  },
615
  "MMLU-Medicine": {
616
- "Overall": 70.17
617
  },
618
  "PubMedQA": {
619
- "Overall": 67.76
620
  },
621
  "Referral QA": {
622
- "Overall": 67.76
623
  },
624
  "Treat Recom.": {
625
- "Overall": 67.76
626
  },
627
  "MIMIC": {
628
- "Overall": 67.76
629
  },
630
  "IU-Xray": {
631
- "Overall": 67.76
632
  },
633
  "Hospitaliz. Summari.": {
634
- "Overall": 67.76
635
  },
636
  "Patient Education": {
637
- "Overall": 67.76
638
  },
639
  "BC5": {
640
- "Overall": 67.76
641
  },
642
  "NCBI": {
643
- "Overall": 67.76
644
  },
645
  "DDI": {
646
- "Overall": 67.76
647
  },
648
  "GAD": {
649
- "Overall": 67.76
650
  },
651
  "HoC": {
652
- "Overall": 67.76
653
  },
654
  "Pharma. QA": {
655
- "Overall": 67.76
656
  },
657
  "Drug Inter.": {
658
- "Overall": 67.76
659
  }
660
  },
661
- "MedAlpaca": {
662
  "META": {
663
  "Method": [
664
- "MedAlpaca"
665
  ],
666
  "Parameters": "7B",
667
- "Org": "OpenAI",
668
  "OpenSource": "No",
669
  "Verified": "Yes"
670
  },
671
- "MedQA": {
672
- "Overall": 80.75
673
  },
674
  "MedMCQA": {
675
- "Overall": 71.63
676
  },
677
  "MMLU-Medicine": {
678
- "Overall": 70.17
679
  },
680
  "PubMedQA": {
681
- "Overall": 67.76
682
  },
683
  "Referral QA": {
684
- "Overall": 67.76
685
  },
686
  "Treat Recom.": {
687
- "Overall": 67.76
688
  },
689
  "MIMIC": {
690
- "Overall": 67.76
691
  },
692
  "IU-Xray": {
693
- "Overall": 67.76
694
  },
695
  "Hospitaliz. Summari.": {
696
- "Overall": 67.76
697
  },
698
  "Patient Education": {
699
- "Overall": 67.76
700
  },
701
  "BC5": {
702
- "Overall": 67.76
703
  },
704
  "NCBI": {
705
- "Overall": 67.76
706
  },
707
  "DDI": {
708
- "Overall": 67.76
709
  },
710
  "GAD": {
711
- "Overall": 67.76
712
  },
713
  "HoC": {
714
- "Overall": 67.76
715
  },
716
  "Pharma. QA": {
717
- "Overall": 67.76
718
  },
719
  "Drug Inter.": {
720
- "Overall": 67.76
721
  }
722
  },
723
- "Meditron": {
724
  "META": {
725
  "Method": [
726
- "Meditron"
727
  ],
728
- "Parameters": "",
729
- "Org": "OpenAI",
730
  "OpenSource": "No",
731
  "Verified": "Yes"
732
  },
733
- "MedQA": {
734
- "Overall": 80.75
735
  },
736
  "MedMCQA": {
737
- "Overall": 71.63
738
  },
739
  "MMLU-Medicine": {
740
- "Overall": 70.17
741
  },
742
  "PubMedQA": {
743
- "Overall": 67.76
744
  },
745
  "Referral QA": {
746
- "Overall": 67.76
747
  },
748
  "Treat Recom.": {
749
- "Overall": 67.76
750
  },
751
  "MIMIC": {
752
- "Overall": 67.76
753
  },
754
  "IU-Xray": {
755
- "Overall": 67.76
756
  },
757
  "Hospitaliz. Summari.": {
758
- "Overall": 67.76
759
  },
760
  "Patient Education": {
761
- "Overall": 67.76
762
  },
763
  "BC5": {
764
- "Overall": 67.76
765
  },
766
  "NCBI": {
767
- "Overall": 67.76
768
  },
769
  "DDI": {
770
- "Overall": 67.76
771
  },
772
  "GAD": {
773
- "Overall": 67.76
774
  },
775
  "HoC": {
776
- "Overall": 67.76
777
  },
778
  "Pharma. QA": {
779
- "Overall": 67.76
780
  },
781
  "Drug Inter.": {
782
- "Overall": 67.76
783
  }
784
  },
785
  "BioMistral": {
@@ -787,185 +1060,185 @@
787
  "Method": [
788
  "BioMistral"
789
  ],
790
- "Parameters": "",
791
- "Org": "OpenAI",
792
  "OpenSource": "No",
793
  "Verified": "Yes"
794
  },
795
- "MedQA": {
796
- "Overall": 80.75
797
  },
798
  "MedMCQA": {
799
- "Overall": 71.63
800
  },
801
  "MMLU-Medicine": {
802
- "Overall": 70.17
803
  },
804
  "PubMedQA": {
805
- "Overall": 67.76
806
  },
807
  "Referral QA": {
808
- "Overall": 67.76
809
  },
810
  "Treat Recom.": {
811
- "Overall": 67.76
812
  },
813
  "MIMIC": {
814
- "Overall": 67.76
815
  },
816
  "IU-Xray": {
817
- "Overall": 67.76
818
  },
819
  "Hospitaliz. Summari.": {
820
- "Overall": 67.76
821
  },
822
  "Patient Education": {
823
- "Overall": 67.76
824
  },
825
  "BC5": {
826
- "Overall": 67.76
827
  },
828
  "NCBI": {
829
- "Overall": 67.76
830
  },
831
  "DDI": {
832
- "Overall": 67.76
833
  },
834
  "GAD": {
835
- "Overall": 67.76
836
  },
837
  "HoC": {
838
- "Overall": 67.76
839
  },
840
  "Pharma. QA": {
841
- "Overall": 67.76
842
  },
843
  "Drug Inter.": {
844
- "Overall": 67.76
845
  }
846
  },
847
- "PMC-LLaMA": {
848
  "META": {
849
  "Method": [
850
- "PMC-LLaMA"
851
  ],
852
- "Parameters": "",
853
- "Org": "OpenAI",
854
  "OpenSource": "No",
855
  "Verified": "Yes"
856
  },
857
- "MedQA": {
858
- "Overall": 80.75
859
  },
860
  "MedMCQA": {
861
- "Overall": 71.63
862
  },
863
  "MMLU-Medicine": {
864
- "Overall": 70.17
865
  },
866
  "PubMedQA": {
867
- "Overall": 67.76
868
  },
869
  "Referral QA": {
870
- "Overall": 67.76
871
  },
872
  "Treat Recom.": {
873
- "Overall": 67.76
874
  },
875
  "MIMIC": {
876
- "Overall": 67.76
877
  },
878
  "IU-Xray": {
879
- "Overall": 67.76
880
  },
881
  "Hospitaliz. Summari.": {
882
- "Overall": 67.76
883
  },
884
  "Patient Education": {
885
- "Overall": 67.76
886
  },
887
  "BC5": {
888
- "Overall": 67.76
889
  },
890
  "NCBI": {
891
- "Overall": 67.76
892
  },
893
  "DDI": {
894
- "Overall": 67.76
895
  },
896
  "GAD": {
897
- "Overall": 67.76
898
  },
899
  "HoC": {
900
- "Overall": 67.76
901
  },
902
  "Pharma. QA": {
903
- "Overall": 67.76
904
  },
905
  "Drug Inter.": {
906
- "Overall": 67.76
907
  }
908
  },
909
- "MedAlpaca": {
910
  "META": {
911
  "Method": [
912
- "MedAlpaca"
913
  ],
914
- "Parameters": "7B",
915
- "Org": "OpenAI",
916
  "OpenSource": "No",
917
  "Verified": "Yes"
918
  },
919
- "MedQA": {
920
- "Overall": 80.75
921
  },
922
  "MedMCQA": {
923
- "Overall": 71.63
924
  },
925
  "MMLU-Medicine": {
926
- "Overall": 70.17
927
  },
928
  "PubMedQA": {
929
- "Overall": 67.76
930
  },
931
  "Referral QA": {
932
- "Overall": 67.76
933
  },
934
  "Treat Recom.": {
935
- "Overall": 67.76
936
  },
937
  "MIMIC": {
938
- "Overall": 67.76
939
  },
940
  "IU-Xray": {
941
- "Overall": 67.76
942
  },
943
  "Hospitaliz. Summari.": {
944
- "Overall": 67.76
945
  },
946
  "Patient Education": {
947
- "Overall": 67.76
948
  },
949
  "BC5": {
950
- "Overall": 67.76
951
  },
952
  "NCBI": {
953
- "Overall": 67.76
954
  },
955
  "DDI": {
956
- "Overall": 67.76
957
  },
958
  "GAD": {
959
- "Overall": 67.76
960
  },
961
  "HoC": {
962
- "Overall": 67.76
963
  },
964
  "Pharma. QA": {
965
- "Overall": 67.76
966
  },
967
  "Drug Inter.": {
968
- "Overall": 67.76
969
  }
970
  },
971
  "ClinicalCamel": {
@@ -973,61 +1246,61 @@
973
  "Method": [
974
  "ClinicalCamel"
975
  ],
976
- "Parameters": "",
977
- "Org": "OpenAI",
978
  "OpenSource": "No",
979
  "Verified": "Yes"
980
  },
981
- "MedQA": {
982
- "Overall": 80.75
983
  },
984
  "MedMCQA": {
985
- "Overall": 71.63
986
  },
987
  "MMLU-Medicine": {
988
- "Overall": 70.17
989
  },
990
  "PubMedQA": {
991
- "Overall": 67.76
992
  },
993
  "Referral QA": {
994
- "Overall": 67.76
995
  },
996
  "Treat Recom.": {
997
- "Overall": 67.76
998
  },
999
  "MIMIC": {
1000
- "Overall": 67.76
1001
  },
1002
  "IU-Xray": {
1003
- "Overall": 67.76
1004
  },
1005
  "Hospitaliz. Summari.": {
1006
- "Overall": 67.76
1007
  },
1008
  "Patient Education": {
1009
- "Overall": 67.76
1010
  },
1011
  "BC5": {
1012
- "Overall": 67.76
1013
  },
1014
  "NCBI": {
1015
- "Overall": 67.76
1016
  },
1017
  "DDI": {
1018
- "Overall": 67.76
1019
  },
1020
  "GAD": {
1021
- "Overall": 67.76
1022
  },
1023
  "HoC": {
1024
- "Overall": 67.76
1025
  },
1026
  "Pharma. QA": {
1027
- "Overall": 67.76
1028
  },
1029
  "Drug Inter.": {
1030
- "Overall": 67.76
1031
  }
1032
  },
1033
  "Meditron-70B": {
@@ -1035,23 +1308,62 @@
1035
  "Method": [
1036
  "Meditron-70B"
1037
  ],
1038
- "Parameters": "13B",
1039
- "Org": "Meta",
1040
- "OpenSource": "Yes",
1041
  "Verified": "Yes"
1042
  },
1043
- "Shopping Concept Understanding": {
1044
- "Overall": 45.86
 
 
 
 
 
 
 
 
 
1045
  },
1046
- "Shopping Knowledge Reasoning": {
1047
- "Overall": 39.47
 
 
 
1048
  },
1049
- "User Behavior Alignment": {
1050
- "Overall": 39.43
 
 
 
1051
  },
1052
- "Multi-lingual Abilities": {
1053
- "Overall": 44.23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  }
1055
  }
1056
  }
1057
- }
 
11
  "OpenSource": "No",
12
  "Verified": "Yes"
13
  },
14
+ "MedQA": {
15
  "Overall": 65.1
16
  },
17
  "MedMCQA": {
 
24
  "Overall": 70.8
25
  },
26
  "Referral QA": {
27
+ "Overall": 80.5
28
  },
29
  "Treat Recom.": {
30
  "Overall": 9.1
 
57
  "Overall": 70.8
58
  },
59
  "Pharma. QA": {
60
+ "Overall": 60.6
61
  },
62
  "Drug Inter.": {
63
  "Overall": 51.5
 
73
  "OpenSource": "No",
74
  "Verified": "Yes"
75
  },
76
+ "MedQA": {
77
  "Overall": 61.2
78
  },
79
  "MedMCQA": {
 
89
  "Overall": 81.1
90
  },
91
  "Treat Recom.": {
92
+ "Overall": 7.3
93
  },
94
  "MIMIC": {
95
  "Overall": 14.1
 
98
  "Overall": 10.3
99
  },
100
  "Hospitaliz. Summari.": {
101
+ "Overall": 10.5
102
  },
103
  "Patient Education": {
104
  "Overall": 9.2
 
116
  "Overall": 50.8
117
  },
118
  "HoC": {
119
+ "Overall": 66.4
120
  },
121
  "Pharma. QA": {
122
  "Overall": 57.3
 
135
  "OpenSource": "No",
136
  "Verified": "Yes"
137
  },
138
+ "MedQA": {
139
+ "Overall": 83.4
140
  },
141
  "MedMCQA": {
142
+ "Overall": 78.2
143
  },
144
  "MMLU-Medicine": {
145
+ "Overall": 92.3
146
  },
147
  "PubMedQA": {
148
+ "Overall": 80.0
149
  },
150
  "Referral QA": {
151
+ "Overall": 83.2
152
  },
153
  "Treat Recom.": {
154
+ "Overall": 18.6
155
  },
156
  "MIMIC": {
157
+ "Overall": 20.7
158
  },
159
  "IU-Xray": {
160
+ "Overall": 18.6
161
  },
162
  "Hospitaliz. Summari.": {
163
+ "Overall": 14.2
164
  },
165
  "Patient Education": {
166
+ "Overall": 12.7
167
  },
168
  "BC5": {
169
+ "Overall": 71.3
170
  },
171
  "NCBI": {
172
+ "Overall": 58.4
173
  },
174
  "DDI": {
175
+ "Overall": 64.6
176
  },
177
  "GAD": {
178
+ "Overall": 68.2
179
  },
180
  "HoC": {
181
+ "Overall": 83.6
182
  },
183
  "Pharma. QA": {
184
+ "Overall": 63.8
185
  },
186
  "Drug Inter.": {
187
+ "Overall": 56.5
188
  }
189
  },
190
  "Alpaca": {
 
193
  "Alpaca"
194
  ],
195
  "Parameters": "7B",
196
+ "Org": "",
197
+ "OpenSource": "Yes",
198
  "Verified": "Yes"
199
  },
200
+ "MedQA": {
201
+ "Overall": 34.2
202
  },
203
  "MedMCQA": {
204
+ "Overall": 30.1
205
  },
206
  "MMLU-Medicine": {
207
+ "Overall": 40.8
208
  },
209
  "PubMedQA": {
210
+ "Overall": 65.2
211
  },
212
  "Referral QA": {
213
+ "Overall": 74.8
214
  },
215
  "Treat Recom.": {
216
+ "Overall": 3.5
217
  },
218
  "MIMIC": {
219
+ "Overall": 12.6
220
  },
221
  "IU-Xray": {
222
+ "Overall": 8.7
223
  },
224
  "Hospitaliz. Summari.": {
225
+ "Overall": 4.1
226
  },
227
  "Patient Education": {
228
+ "Overall": 2.9
229
  },
230
  "BC5": {
231
+ "Overall": 41.2
232
  },
233
  "NCBI": {
234
+ "Overall": 36.5
235
  },
236
  "DDI": {
237
+ "Overall": 37.4
238
  },
239
  "GAD": {
240
+ "Overall": 36.9
241
  },
242
  "HoC": {
243
+ "Overall": 52.6
244
  },
245
  "Pharma. QA": {
246
+ "Overall": 41.3
247
  },
248
  "Drug Inter.": {
249
+ "Overall": 47.5
250
  }
251
  },
252
+ "Vicuna-7B": {
253
  "META": {
254
  "Method": [
255
+ "Vicuna-7B"
256
  ],
257
  "Parameters": "7B",
258
  "Org": "LMSys",
259
  "OpenSource": "Yes",
260
  "Verified": "Yes"
261
  },
262
+ "MedQA": {
263
+ "Overall": 34.5
264
+ },
265
+ "MedMCQA": {
266
+ "Overall": 33.4
267
+ },
268
+ "MMLU-Medicine": {
269
+ "Overall": 37.3
270
+ },
271
+ "PubMedQA": {
272
+ "Overall": 64.8
273
+ },
274
+ "Referral QA": {
275
+ "Overall": 76.4
276
+ },
277
+ "Treat Recom.": {
278
+ "Overall": 2.6
279
+ },
280
+ "MIMIC": {
281
+ "Overall": 13.8
282
+ },
283
+ "IU-Xray": {
284
+ "Overall": 8.2
285
+ },
286
+ "Hospitaliz. Summari.": {
287
+ "Overall": 4.5
288
+ },
289
+ "Patient Education": {
290
+ "Overall": 3.1
291
+ },
292
+ "BC5": {
293
+ "Overall": 44.5
294
+ },
295
+ "NCBI": {
296
+ "Overall": 37.0
297
  },
298
+ "DDI": {
299
+ "Overall": 39.4
300
  },
301
+ "GAD": {
302
+ "Overall": 41.2
303
  },
304
+ "HoC": {
305
+ "Overall": 53.8
306
+ },
307
+ "Pharma. QA": {
308
+ "Overall": 42.3
309
+ },
310
+ "Drug Inter.": {
311
+ "Overall": 45.5
312
  }
313
  },
314
+ "LLaMA-2-7B": {
315
  "META": {
316
  "Method": [
317
+ "LLaMA-2-7B"
318
  ],
319
  "Parameters": "7B",
320
  "Org": "Meta",
321
  "OpenSource": "Yes",
322
  "Verified": "Yes"
323
  },
324
+ "MedQA": {
325
+ "Overall": 32.9
326
  },
327
+ "MedMCQA": {
328
+ "Overall": 30.6
329
  },
330
+ "MMLU-Medicine": {
331
+ "Overall": 42.4
332
  },
333
+ "PubMedQA": {
334
+ "Overall": 63.4
335
+ },
336
+ "Referral QA": {
337
+ "Overall": 74.5
338
+ },
339
+ "Treat Recom.": {
340
+ "Overall": 3.3
341
+ },
342
+ "MIMIC": {
343
+ "Overall": 12.3
344
+ },
345
+ "IU-Xray": {
346
+ "Overall": 4.9
347
+ },
348
+ "Hospitaliz. Summari.": {
349
+ "Overall": 4.6
350
+ },
351
+ "Patient Education": {
352
+ "Overall": 4.6
353
+ },
354
+ "BC5": {
355
+ "Overall": 40.1
356
+ },
357
+ "NCBI": {
358
+ "Overall": 38.4
359
+ },
360
+ "DDI": {
361
+ "Overall": 37.9
362
+ },
363
+ "GAD": {
364
+ "Overall": 39.3
365
+ },
366
+ "HoC": {
367
+ "Overall": 48.6
368
+ },
369
+ "Pharma. QA": {
370
+ "Overall": 46.5
371
+ },
372
+ "Drug Inter.": {
373
+ "Overall": 48.0
374
  }
375
  },
376
+ "Mistral": {
377
  "META": {
378
  "Method": [
379
+ "Mistral"
380
  ],
381
  "Parameters": "7B",
382
  "Org": "MistralAI",
383
  "OpenSource": "Yes",
384
  "Verified": "Yes"
385
  },
386
+ "MedQA": {
387
+ "Overall": 35.7
388
  },
389
+ "MedMCQA": {
390
+ "Overall": 37.8
391
+ },
392
+ "MMLU-Medicine": {
393
+ "Overall": 46.9
394
+ },
395
+ "PubMedQA": {
396
+ "Overall": 69.4
397
+ },
398
+ "Referral QA": {
399
+ "Overall": 77.7
400
  },
401
+ "Treat Recom.": {
402
+ "Overall": 5.0
403
  },
404
+ "MIMIC": {
405
+ "Overall": 13.2
406
+ },
407
+ "IU-Xray": {
408
+ "Overall": 7.9
409
+ },
410
+ "Hospitaliz. Summari.": {
411
+ "Overall": 6.1
412
+ },
413
+ "Patient Education": {
414
+ "Overall": 5.3
415
+ },
416
+ "BC5": {
417
+ "Overall": 46.8
418
+ },
419
+ "NCBI": {
420
+ "Overall": 39.9
421
+ },
422
+ "DDI": {
423
+ "Overall": 43.5
424
+ },
425
+ "GAD": {
426
+ "Overall": 44.3
427
+ },
428
+ "HoC": {
429
+ "Overall": 59.6
430
+ },
431
+ "Pharma. QA": {
432
+ "Overall": 51.2
433
+ },
434
+ "Drug Inter.": {
435
+ "Overall": 53.0
436
  }
437
  },
438
+ "Vicuna-13B": {
439
  "META": {
440
  "Method": [
441
+ "Vicuna-13B"
442
  ],
443
  "Parameters": "13B",
444
  "Org": "LMSys",
445
  "OpenSource": "Yes",
446
  "Verified": "Yes"
447
  },
448
+ "MedQA": {
449
+ "Overall": 38.0
450
+ },
451
+ "MedMCQA": {
452
+ "Overall": 36.4
453
+ },
454
+ "MMLU-Medicine": {
455
+ "Overall": 45.6
456
+ },
457
+ "PubMedQA": {
458
+ "Overall": 66.2
459
+ },
460
+ "Referral QA": {
461
+ "Overall": 76.8
462
+ },
463
+ "Treat Recom.": {
464
+ "Overall": 4.6
465
+ },
466
+ "MIMIC": {
467
+ "Overall": 14.5
468
+ },
469
+ "IU-Xray": {
470
+ "Overall": 9.4
471
+ },
472
+ "Hospitaliz. Summari.": {
473
+ "Overall": 6.2
474
+ },
475
+ "Patient Education": {
476
+ "Overall": 4.7
477
+ },
478
+ "BC5": {
479
+ "Overall": 46.2
480
+ },
481
+ "NCBI": {
482
+ "Overall": 39.0
483
+ },
484
+ "DDI": {
485
+ "Overall": 41.3
486
  },
487
+ "GAD": {
488
+ "Overall": 43.5
489
  },
490
+ "HoC": {
491
+ "Overall": 56.7
492
  },
493
+ "Pharma. QA": {
494
+ "Overall": 45.1
495
+ },
496
+ "Drug Inter.": {
497
+ "Overall": 46.0
498
  }
499
  },
500
+ "LLaMA-2-13B": {
501
  "META": {
502
  "Method": [
503
+ "LLaMA-2-13B"
504
  ],
505
  "Parameters": "13B",
506
  "Org": "Meta",
507
  "OpenSource": "Yes",
508
  "Verified": "Yes"
509
  },
510
+ "MedQA": {
511
+ "Overall": 38.1
512
  },
513
+ "MedMCQA": {
514
+ "Overall": 35.5
515
  },
516
+ "MMLU-Medicine": {
517
+ "Overall": 46.4
518
+ },
519
+ "PubMedQA": {
520
+ "Overall": 66.8
521
+ },
522
+ "Referral QA": {
523
+ "Overall": 77.1
524
+ },
525
+ "Treat Recom.": {
526
+ "Overall": 4.8
527
+ },
528
+ "MIMIC": {
529
+ "Overall": 12.0
530
+ },
531
+ "IU-Xray": {
532
+ "Overall": 9.1
533
+ },
534
+ "Hospitaliz. Summari.": {
535
+ "Overall": 6.4
536
+ },
537
+ "Patient Education": {
538
+ "Overall": 5.6
539
+ },
540
+ "BC5": {
541
+ "Overall": 46.6
542
  },
543
+ "NCBI": {
544
+ "Overall": 38.3
545
+ },
546
+ "DDI": {
547
+ "Overall": 39.7
548
+ },
549
+ "GAD": {
550
+ "Overall": 41.2
551
+ },
552
+ "HoC": {
553
+ "Overall": 55.9
554
+ },
555
+ "Pharma. QA": {
556
+ "Overall": 46.9
557
+ },
558
+ "Drug Inter.": {
559
+ "Overall": 47.5
560
  }
561
  },
562
  "LLaMA-2-70B": {
 
569
  "OpenSource": "Yes",
570
  "Verified": "Yes"
571
  },
572
+ "MedQA": {
573
+ "Overall": 45.8
574
  },
575
+ "MedMCQA": {
576
+ "Overall": 42.7
577
  },
578
+ "MMLU-Medicine": {
579
+ "Overall": 67.4
580
+ },
581
+ "PubMedQA": {
582
+ "Overall": 67.4
583
+ },
584
+ "Referral QA": {
585
+ "Overall": 78.9
586
+ },
587
+ "Treat Recom.": {
588
+ "Overall": 5.5
589
+ },
590
+ "MIMIC": {
591
+ "Overall": 13.9
592
+ },
593
+ "IU-Xray": {
594
+ "Overall": 8.0
595
+ },
596
+ "Hospitaliz. Summari.": {
597
+ "Overall": 8.3
598
+ },
599
+ "Patient Education": {
600
+ "Overall": 6.8
601
+ },
602
+ "BC5": {
603
+ "Overall": 47.8
604
+ },
605
+ "NCBI": {
606
+ "Overall": 41.5
607
+ },
608
+ "DDI": {
609
+ "Overall": 45.6
610
+ },
611
+ "GAD": {
612
+ "Overall": 44.7
613
+ },
614
+ "HoC": {
615
+ "Overall": 63.2
616
  },
617
+ "Pharma. QA": {
618
+ "Overall": 49.3
619
+ },
620
+ "Drug Inter.": {
621
+ "Overall": 51.5
622
  }
623
  },
624
  "LLaMA-3-70B": {
 
631
  "OpenSource": "Yes",
632
  "Verified": "Yes"
633
  },
634
+ "MedQA": {
635
+ "Overall": 78.8
636
+ },
637
+ "MedMCQA": {
638
+ "Overall": 74.7
639
+ },
640
+ "MMLU-Medicine": {
641
+ "Overall": 83.4
642
+ },
643
+ "PubMedQA": {
644
+ "Overall": 77.4
645
+ },
646
+ "Referral QA": {
647
+ "Overall": 82.4
648
+ },
649
+ "Treat Recom.": {
650
+ "Overall": 10.2
651
  },
652
+ "MIMIC": {
653
+ "Overall": 18.4
654
+ },
655
+ "IU-Xray": {
656
+ "Overall": 15.5
657
+ },
658
+ "Hospitaliz. Summari.": {
659
+ "Overall": 10.9
660
+ },
661
+ "Patient Education": {
662
+ "Overall": 10.1
663
+ },
664
+ "BC5": {
665
+ "Overall": 63.7
666
+ },
667
+ "NCBI": {
668
+ "Overall": 50.2
669
+ },
670
+ "DDI": {
671
+ "Overall": 59.7
672
+ },
673
+ "GAD": {
674
+ "Overall": 63.1
675
+ },
676
+ "HoC": {
677
+ "Overall": 79.0
678
  },
679
+ "Pharma. QA": {
680
+ "Overall": 62.4
681
  },
682
+ "Drug Inter.": {
683
+ "Overall": 53.0
684
  }
685
  },
686
  "Huatuo": {
 
688
  "Method": [
689
  "Huatuo"
690
  ],
691
+ "Parameters": "7B",
692
+ "Org": "",
693
  "OpenSource": "No",
694
  "Verified": "Yes"
695
  },
696
+ "MedQA": {
697
+ "Overall": 28.4
698
  },
699
  "MedMCQA": {
700
+ "Overall": 24.8
701
  },
702
  "MMLU-Medicine": {
703
+ "Overall": 31.6
704
  },
705
  "PubMedQA": {
706
+ "Overall": 61.0
707
  },
708
  "Referral QA": {
709
+ "Overall": 69.3
710
  },
711
  "Treat Recom.": {
712
+ "Overall": 3.8
713
  },
714
  "MIMIC": {
715
+ "Overall": 8.7
716
  },
717
  "IU-Xray": {
718
+ "Overall": 3.8
719
  },
720
  "Hospitaliz. Summari.": {
721
+ "Overall": 2.2
722
  },
723
  "Patient Education": {
724
+ "Overall": 1.4
725
  },
726
  "BC5": {
727
+ "Overall": 43.6
728
  },
729
  "NCBI": {
730
+ "Overall": 37.5
731
  },
732
  "DDI": {
733
+ "Overall": 40.1
734
  },
735
  "GAD": {
736
+ "Overall": 38.2
737
  },
738
  "HoC": {
739
+ "Overall": 50.2
740
  },
741
  "Pharma. QA": {
742
+ "Overall": 44.1
743
  },
744
  "Drug Inter.": {
745
+ "Overall": 49.5
746
  }
747
  },
748
  "ChatDoctor": {
 
750
  "Method": [
751
  "ChatDoctor"
752
  ],
753
+ "Parameters": "7B",
754
+ "Org": "",
755
  "OpenSource": "No",
756
  "Verified": "Yes"
757
  },
758
+ "MedQA": {
759
+ "Overall": 33.2
760
  },
761
  "MedMCQA": {
762
+ "Overall": 29.3
763
  },
764
  "MMLU-Medicine": {
765
+ "Overall": 37.0
766
  },
767
  "PubMedQA": {
768
+ "Overall": 63.8
769
  },
770
  "Referral QA": {
771
+ "Overall": 73.3
772
  },
773
  "Treat Recom.": {
774
+ "Overall": 3.8
775
  },
776
  "MIMIC": {
777
+ "Overall": 8.9
778
  },
779
  "IU-Xray": {
780
+ "Overall": 4.2
781
  },
782
  "Hospitaliz. Summari.": {
783
+ "Overall": 2.8
784
  },
785
  "Patient Education": {
786
+ "Overall": 1.7
787
  },
788
  "BC5": {
789
+ "Overall": 45.8
790
  },
791
  "NCBI": {
792
+ "Overall": 39.0
793
  },
794
  "DDI": {
795
+ "Overall": 40.4
796
  },
797
  "GAD": {
798
+ "Overall": 38.1
799
  },
800
  "HoC": {
801
+ "Overall": 55.7
802
  },
803
  "Pharma. QA": {
804
+ "Overall": 42.7
805
  },
806
  "Drug Inter.": {
807
+ "Overall": 51.0
808
  }
809
  },
810
+ "PMC-LLaMA-7B": {
811
  "META": {
812
  "Method": [
813
+ "PMC-LLaMA-7B"
814
  ],
815
  "Parameters": "7B",
816
+ "Org": "",
817
  "OpenSource": "No",
818
  "Verified": "Yes"
819
  },
820
+ "MedQA": {
821
+ "Overall": 28.7
822
  },
823
  "MedMCQA": {
824
+ "Overall": 29.8
825
  },
826
  "MMLU-Medicine": {
827
+ "Overall": 39.5
828
  },
829
  "PubMedQA": {
830
+ "Overall": 60.2
831
  },
832
  "Referral QA": {
833
+ "Overall": 70.2
834
  },
835
  "Treat Recom.": {
836
+ "Overall": 4.0
837
  },
838
  "MIMIC": {
839
+ "Overall": 7.6
840
  },
841
  "IU-Xray": {
842
+ "Overall": 4.0
843
  },
844
  "Hospitaliz. Summari.": {
845
+ "Overall": 3.6
846
  },
847
  "Patient Education": {
848
+ "Overall": 1.5
849
  },
850
  "BC5": {
851
+ "Overall": 45.2
852
  },
853
  "NCBI": {
854
+ "Overall": 37.8
855
  },
856
  "DDI": {
857
+ "Overall": 40.8
858
  },
859
  "GAD": {
860
+ "Overall": 42.0
861
  },
862
  "HoC": {
863
+ "Overall": 55.6
864
  },
865
  "Pharma. QA": {
866
+ "Overall": 45.5
867
  },
868
  "Drug Inter.": {
869
+ "Overall": 47.8
870
  }
871
  },
872
  "Baize-Healthcare": {
 
875
  "Baize-Healthcare"
876
  ],
877
  "Parameters": "7B",
878
+ "Org": "",
879
  "OpenSource": "No",
880
  "Verified": "Yes"
881
  },
882
+ "MedQA": {
883
+ "Overall": 29.4
884
  },
885
  "MedMCQA": {
886
+ "Overall": 31.3
887
  },
888
  "MMLU-Medicine": {
889
+ "Overall": 39.9
890
  },
891
  "PubMedQA": {
892
+ "Overall": 64.4
893
  },
894
  "Referral QA": {
895
+ "Overall": 74.0
896
  },
897
  "Treat Recom.": {
898
+ "Overall": 4.7
899
  },
900
  "MIMIC": {
901
+ "Overall": 9.8
902
  },
903
  "IU-Xray": {
904
+ "Overall": 4.4
905
  },
906
  "Hospitaliz. Summari.": {
907
+ "Overall": 3.0
908
  },
909
  "Patient Education": {
910
+ "Overall": 1.8
911
  },
912
  "BC5": {
913
+ "Overall": 44.4
914
  },
915
  "NCBI": {
916
+ "Overall": 38.5
917
  },
918
  "DDI": {
919
+ "Overall": 41.9
920
  },
921
  "GAD": {
922
+ "Overall": 45.8
923
  },
924
  "HoC": {
925
+ "Overall": 56.9
926
  },
927
  "Pharma. QA": {
928
+ "Overall": 50.4
929
  },
930
  "Drug Inter.": {
931
+ "Overall": 49.5
932
  }
933
  },
934
+ "MedAlpaca-7B": {
935
  "META": {
936
  "Method": [
937
+ "MedAlpaca-7B"
938
  ],
939
  "Parameters": "7B",
940
+ "Org": "",
941
  "OpenSource": "No",
942
  "Verified": "Yes"
943
  },
944
+ "MedQA": {
945
+ "Overall": 35.1
946
  },
947
  "MedMCQA": {
948
+ "Overall": 32.3
949
  },
950
  "MMLU-Medicine": {
951
+ "Overall": 41.9
952
  },
953
  "PubMedQA": {
954
+ "Overall": 62.4
955
  },
956
  "Referral QA": {
957
+ "Overall": 74.5
958
  },
959
  "Treat Recom.": {
960
+ "Overall": 4.8
961
  },
962
  "MIMIC": {
963
+ "Overall": 10.4
964
  },
965
  "IU-Xray": {
966
+ "Overall": 7.6
967
  },
968
  "Hospitaliz. Summari.": {
969
+ "Overall": 4.5
970
  },
971
  "Patient Education": {
972
+ "Overall": 2.7
973
  },
974
  "BC5": {
975
+ "Overall": 47.3
976
  },
977
  "NCBI": {
978
+ "Overall": 39.3
979
  },
980
  "DDI": {
981
+ "Overall": 43.4
982
  },
983
  "GAD": {
984
+ "Overall": 45.4
985
  },
986
  "HoC": {
987
+ "Overall": 59.0
988
  },
989
  "Pharma. QA": {
990
+ "Overall": 47.7
991
  },
992
  "Drug Inter.": {
993
+ "Overall": 52.7
994
  }
995
  },
996
+ "Meditron-7B": {
997
  "META": {
998
  "Method": [
999
+ "Meditron-7B"
1000
  ],
1001
+ "Parameters": "7B",
1002
+ "Org": "",
1003
  "OpenSource": "No",
1004
  "Verified": "Yes"
1005
  },
1006
+ "MedQA": {
1007
+ "Overall": 35.3
1008
  },
1009
  "MedMCQA": {
1010
+ "Overall": 31.1
1011
  },
1012
  "MMLU-Medicine": {
1013
+ "Overall": 41.3
1014
  },
1015
  "PubMedQA": {
1016
+ "Overall": 61.4
1017
  },
1018
  "Referral QA": {
1019
+ "Overall": 74.9
1020
  },
1021
  "Treat Recom.": {
1022
+ "Overall": 5.8
1023
  },
1024
  "MIMIC": {
1025
+ "Overall": 12.5
1026
  },
1027
  "IU-Xray": {
1028
+ "Overall": 7.8
1029
  },
1030
  "Hospitaliz. Summari.": {
1031
+ "Overall": 4.8
1032
  },
1033
  "Patient Education": {
1034
+ "Overall": 5.9
1035
  },
1036
  "BC5": {
1037
+ "Overall": 46.5
1038
  },
1039
  "NCBI": {
1040
+ "Overall": 39.2
1041
  },
1042
  "DDI": {
1043
+ "Overall": 42.7
1044
  },
1045
  "GAD": {
1046
+ "Overall": 43.3
1047
  },
1048
  "HoC": {
1049
+ "Overall": 57.9
1050
  },
1051
  "Pharma. QA": {
1052
+ "Overall": 50.7
1053
  },
1054
  "Drug Inter.": {
1055
+ "Overall": 52.0
1056
  }
1057
  },
1058
  "BioMistral": {
 
1060
  "Method": [
1061
  "BioMistral"
1062
  ],
1063
+ "Parameters": "7B",
1064
+ "Org": "",
1065
  "OpenSource": "No",
1066
  "Verified": "Yes"
1067
  },
1068
+ "MedQA": {
1069
+ "Overall": 35.4
1070
  },
1071
  "MedMCQA": {
1072
+ "Overall": 34.8
1073
  },
1074
  "MMLU-Medicine": {
1075
+ "Overall": 41.6
1076
  },
1077
  "PubMedQA": {
1078
+ "Overall": 66.4
1079
  },
1080
  "Referral QA": {
1081
+ "Overall": 77.0
1082
  },
1083
  "Treat Recom.": {
1084
+ "Overall": 7.6
1085
  },
1086
  "MIMIC": {
1087
+ "Overall": 14.2
1088
  },
1089
  "IU-Xray": {
1090
+ "Overall": 8.5
1091
  },
1092
  "Hospitaliz. Summari.": {
1093
+ "Overall": 7.5
1094
  },
1095
  "Patient Education": {
1096
+ "Overall": 6.6
1097
  },
1098
  "BC5": {
1099
+ "Overall": 48.8
1100
  },
1101
  "NCBI": {
1102
+ "Overall": 40.4
1103
  },
1104
  "DDI": {
1105
+ "Overall": 46.0
1106
  },
1107
  "GAD": {
1108
+ "Overall": 48.5
1109
  },
1110
  "HoC": {
1111
+ "Overall": 64.3
1112
  },
1113
  "Pharma. QA": {
1114
+ "Overall": 54.5
1115
  },
1116
  "Drug Inter.": {
1117
+ "Overall": 54.0
1118
  }
1119
  },
1120
+ "PMC-LLaMA-13B": {
1121
  "META": {
1122
  "Method": [
1123
+ "PMC-LLaMA-13B"
1124
  ],
1125
+ "Parameters": "13B",
1126
+ "Org": "",
1127
  "OpenSource": "No",
1128
  "Verified": "Yes"
1129
  },
1130
+ "MedQA": {
1131
+ "Overall": 39.6
1132
  },
1133
  "MedMCQA": {
1134
+ "Overall": 37.7
1135
  },
1136
  "MMLU-Medicine": {
1137
+ "Overall": 56.3
1138
  },
1139
  "PubMedQA": {
1140
+ "Overall": 67.0
1141
  },
1142
  "Referral QA": {
1143
+ "Overall": 77.6
1144
  },
1145
  "Treat Recom.": {
1146
+ "Overall": 4.9
1147
  },
1148
  "MIMIC": {
1149
+ "Overall": 9.4
1150
  },
1151
  "IU-Xray": {
1152
+ "Overall": 5.9
1153
  },
1154
  "Hospitaliz. Summari.": {
1155
+ "Overall": 4.2
1156
  },
1157
  "Patient Education": {
1158
+ "Overall": 2.7
1159
  },
1160
  "BC5": {
1161
+ "Overall": 51.5
1162
  },
1163
  "NCBI": {
1164
+ "Overall": 43.1
1165
  },
1166
  "DDI": {
1167
+ "Overall": 48.4
1168
  },
1169
  "GAD": {
1170
+ "Overall": 48.7
1171
  },
1172
  "HoC": {
1173
+ "Overall": 65.3
1174
  },
1175
  "Pharma. QA": {
1176
+ "Overall": 48.8
1177
  },
1178
  "Drug Inter.": {
1179
+ "Overall": 51.5
1180
  }
1181
  },
1182
+ "MedAlpaca-13B": {
1183
  "META": {
1184
  "Method": [
1185
+ "MedAlpaca-13B"
1186
  ],
1187
+ "Parameters": "13B",
1188
+ "Org": "",
1189
  "OpenSource": "No",
1190
  "Verified": "Yes"
1191
  },
1192
+ "MedQA": {
1193
+ "Overall": 37.3
1194
  },
1195
  "MedMCQA": {
1196
+ "Overall": 35.7
1197
  },
1198
  "MMLU-Medicine": {
1199
+ "Overall": 53.6
1200
  },
1201
  "PubMedQA": {
1202
+ "Overall": 65.6
1203
  },
1204
  "Referral QA": {
1205
+ "Overall": 77.4
1206
  },
1207
  "Treat Recom.": {
1208
+ "Overall": 5.1
1209
  },
1210
  "MIMIC": {
1211
+ "Overall": 11.7
1212
  },
1213
  "IU-Xray": {
1214
+ "Overall": 8.6
1215
  },
1216
  "Hospitaliz. Summari.": {
1217
+ "Overall": 5.0
1218
  },
1219
  "Patient Education": {
1220
+ "Overall": 3.5
1221
  },
1222
  "BC5": {
1223
+ "Overall": 49.2
1224
  },
1225
  "NCBI": {
1226
+ "Overall": 41.6
1227
  },
1228
  "DDI": {
1229
+ "Overall": 44.1
1230
  },
1231
  "GAD": {
1232
+ "Overall": 44.4
1233
  },
1234
  "HoC": {
1235
+ "Overall": 59.4
1236
  },
1237
  "Pharma. QA": {
1238
+ "Overall": 51.6
1239
  },
1240
  "Drug Inter.": {
1241
+ "Overall": 50.0
1242
  }
1243
  },
1244
  "ClinicalCamel": {
 
1246
  "Method": [
1247
  "ClinicalCamel"
1248
  ],
1249
+ "Parameters": "70B",
1250
+ "Org": "",
1251
  "OpenSource": "No",
1252
  "Verified": "Yes"
1253
  },
1254
+ "MedQA": {
1255
+ "Overall": 46.4
1256
  },
1257
  "MedMCQA": {
1258
+ "Overall": 45.8
1259
  },
1260
  "MMLU-Medicine": {
1261
+ "Overall": 65.4
1262
  },
1263
  "PubMedQA": {
1264
+ "Overall": 71.0
1265
  },
1266
  "Referral QA": {
1267
+ "Overall": 79.8
1268
  },
1269
  "Treat Recom.": {
1270
+ "Overall": 8.4
1271
  },
1272
  "MIMIC": {
1273
+ "Overall": 13.0
1274
  },
1275
  "IU-Xray": {
1276
+ "Overall": 9.6
1277
  },
1278
  "Hospitaliz. Summari.": {
1279
+ "Overall": 7.9
1280
  },
1281
  "Patient Education": {
1282
+ "Overall": 7.2
1283
  },
1284
  "BC5": {
1285
+ "Overall": 51.2
1286
  },
1287
  "NCBI": {
1288
+ "Overall": 43.7
1289
  },
1290
  "DDI": {
1291
+ "Overall": 47.6
1292
  },
1293
  "GAD": {
1294
+ "Overall": 47.2
1295
  },
1296
  "HoC": {
1297
+ "Overall": 64.8
1298
  },
1299
  "Pharma. QA": {
1300
+ "Overall": 52.6
1301
  },
1302
  "Drug Inter.": {
1303
+ "Overall": 52.5
1304
  }
1305
  },
1306
  "Meditron-70B": {
 
1308
  "Method": [
1309
  "Meditron-70B"
1310
  ],
1311
+ "Parameters": "70B",
1312
+ "Org": "",
1313
+ "OpenSource": "No",
1314
  "Verified": "Yes"
1315
  },
1316
+ "MedQA": {
1317
+ "Overall": 45.7
1318
+ },
1319
+ "MedMCQA": {
1320
+ "Overall": 44.9
1321
+ },
1322
+ "MMLU-Medicine": {
1323
+ "Overall": 65.1
1324
+ },
1325
+ "PubMedQA": {
1326
+ "Overall": 70.6
1327
  },
1328
+ "Referral QA": {
1329
+ "Overall": 78.6
1330
+ },
1331
+ "Treat Recom.": {
1332
+ "Overall": 8.9
1333
  },
1334
+ "MIMIC": {
1335
+ "Overall": 13.3
1336
+ },
1337
+ "IU-Xray": {
1338
+ "Overall": 8.0
1339
  },
1340
+ "Hospitaliz. Summari.": {
1341
+ "Overall": 9.6
1342
+ },
1343
+ "Patient Education": {
1344
+ "Overall": 7.7
1345
+ },
1346
+ "BC5": {
1347
+ "Overall": 54.3
1348
+ },
1349
+ "NCBI": {
1350
+ "Overall": 45.7
1351
+ },
1352
+ "DDI": {
1353
+ "Overall": 51.2
1354
+ },
1355
+ "GAD": {
1356
+ "Overall": 49.6
1357
+ },
1358
+ "HoC": {
1359
+ "Overall": 69.6
1360
+ },
1361
+ "Pharma. QA": {
1362
+ "Overall": 58.7
1363
+ },
1364
+ "Drug Inter.": {
1365
+ "Overall": 54.5
1366
  }
1367
  }
1368
  }
1369
+ }