MLLM_leaderboard / eval-results /chaoyi-wu /MedLLaMA_13B /results_2023-07-24T13-04-01.266274.json
Wwwduojin's picture
Upload 1529 files
a312f2f
{
"results": {
"harness|arc:challenge|25": {
"acc": 0.5102389078498294,
"acc_stderr": 0.014608326906285012,
"acc_norm": 0.5426621160409556,
"acc_norm_stderr": 0.014558106543924065
},
"harness|hellaswag|10": {
"acc": 0.5862378012348137,
"acc_stderr": 0.004915003499517829,
"acc_norm": 0.7853017327225652,
"acc_norm_stderr": 0.004097736838432052
},
"harness|hendrycksTest-abstract_algebra|5": {
"acc": 0.31,
"acc_stderr": 0.04648231987117316,
"acc_norm": 0.31,
"acc_norm_stderr": 0.04648231987117316
},
"harness|hendrycksTest-anatomy|5": {
"acc": 0.5259259259259259,
"acc_stderr": 0.04313531696750575,
"acc_norm": 0.5259259259259259,
"acc_norm_stderr": 0.04313531696750575
},
"harness|hendrycksTest-astronomy|5": {
"acc": 0.48026315789473684,
"acc_stderr": 0.040657710025626036,
"acc_norm": 0.48026315789473684,
"acc_norm_stderr": 0.040657710025626036
},
"harness|hendrycksTest-business_ethics|5": {
"acc": 0.44,
"acc_stderr": 0.04988876515698589,
"acc_norm": 0.44,
"acc_norm_stderr": 0.04988876515698589
},
"harness|hendrycksTest-clinical_knowledge|5": {
"acc": 0.49056603773584906,
"acc_stderr": 0.0307673947078081,
"acc_norm": 0.49056603773584906,
"acc_norm_stderr": 0.0307673947078081
},
"harness|hendrycksTest-college_biology|5": {
"acc": 0.4791666666666667,
"acc_stderr": 0.041775789507399935,
"acc_norm": 0.4791666666666667,
"acc_norm_stderr": 0.041775789507399935
},
"harness|hendrycksTest-college_chemistry|5": {
"acc": 0.35,
"acc_stderr": 0.047937248544110196,
"acc_norm": 0.35,
"acc_norm_stderr": 0.047937248544110196
},
"harness|hendrycksTest-college_computer_science|5": {
"acc": 0.38,
"acc_stderr": 0.048783173121456316,
"acc_norm": 0.38,
"acc_norm_stderr": 0.048783173121456316
},
"harness|hendrycksTest-college_mathematics|5": {
"acc": 0.35,
"acc_stderr": 0.0479372485441102,
"acc_norm": 0.35,
"acc_norm_stderr": 0.0479372485441102
},
"harness|hendrycksTest-college_medicine|5": {
"acc": 0.42196531791907516,
"acc_stderr": 0.03765746693865151,
"acc_norm": 0.42196531791907516,
"acc_norm_stderr": 0.03765746693865151
},
"harness|hendrycksTest-college_physics|5": {
"acc": 0.21568627450980393,
"acc_stderr": 0.04092563958237657,
"acc_norm": 0.21568627450980393,
"acc_norm_stderr": 0.04092563958237657
},
"harness|hendrycksTest-computer_security|5": {
"acc": 0.58,
"acc_stderr": 0.049604496374885836,
"acc_norm": 0.58,
"acc_norm_stderr": 0.049604496374885836
},
"harness|hendrycksTest-conceptual_physics|5": {
"acc": 0.4,
"acc_stderr": 0.03202563076101737,
"acc_norm": 0.4,
"acc_norm_stderr": 0.03202563076101737
},
"harness|hendrycksTest-econometrics|5": {
"acc": 0.22807017543859648,
"acc_stderr": 0.03947152782669415,
"acc_norm": 0.22807017543859648,
"acc_norm_stderr": 0.03947152782669415
},
"harness|hendrycksTest-electrical_engineering|5": {
"acc": 0.3793103448275862,
"acc_stderr": 0.04043461861916747,
"acc_norm": 0.3793103448275862,
"acc_norm_stderr": 0.04043461861916747
},
"harness|hendrycksTest-elementary_mathematics|5": {
"acc": 0.23809523809523808,
"acc_stderr": 0.021935878081184766,
"acc_norm": 0.23809523809523808,
"acc_norm_stderr": 0.021935878081184766
},
"harness|hendrycksTest-formal_logic|5": {
"acc": 0.3333333333333333,
"acc_stderr": 0.04216370213557835,
"acc_norm": 0.3333333333333333,
"acc_norm_stderr": 0.04216370213557835
},
"harness|hendrycksTest-global_facts|5": {
"acc": 0.35,
"acc_stderr": 0.047937248544110196,
"acc_norm": 0.35,
"acc_norm_stderr": 0.047937248544110196
},
"harness|hendrycksTest-high_school_biology|5": {
"acc": 0.5129032258064516,
"acc_stderr": 0.028434533152681855,
"acc_norm": 0.5129032258064516,
"acc_norm_stderr": 0.028434533152681855
},
"harness|hendrycksTest-high_school_chemistry|5": {
"acc": 0.28078817733990147,
"acc_stderr": 0.0316185633535861,
"acc_norm": 0.28078817733990147,
"acc_norm_stderr": 0.0316185633535861
},
"harness|hendrycksTest-high_school_computer_science|5": {
"acc": 0.44,
"acc_stderr": 0.04988876515698589,
"acc_norm": 0.44,
"acc_norm_stderr": 0.04988876515698589
},
"harness|hendrycksTest-high_school_european_history|5": {
"acc": 0.5757575757575758,
"acc_stderr": 0.038592681420702636,
"acc_norm": 0.5757575757575758,
"acc_norm_stderr": 0.038592681420702636
},
"harness|hendrycksTest-high_school_geography|5": {
"acc": 0.5151515151515151,
"acc_stderr": 0.03560716516531061,
"acc_norm": 0.5151515151515151,
"acc_norm_stderr": 0.03560716516531061
},
"harness|hendrycksTest-high_school_government_and_politics|5": {
"acc": 0.6580310880829016,
"acc_stderr": 0.03423465100104283,
"acc_norm": 0.6580310880829016,
"acc_norm_stderr": 0.03423465100104283
},
"harness|hendrycksTest-high_school_macroeconomics|5": {
"acc": 0.43846153846153846,
"acc_stderr": 0.025158266016868575,
"acc_norm": 0.43846153846153846,
"acc_norm_stderr": 0.025158266016868575
},
"harness|hendrycksTest-high_school_mathematics|5": {
"acc": 0.2962962962962963,
"acc_stderr": 0.027840811495871927,
"acc_norm": 0.2962962962962963,
"acc_norm_stderr": 0.027840811495871927
},
"harness|hendrycksTest-high_school_microeconomics|5": {
"acc": 0.44537815126050423,
"acc_stderr": 0.0322841062671639,
"acc_norm": 0.44537815126050423,
"acc_norm_stderr": 0.0322841062671639
},
"harness|hendrycksTest-high_school_physics|5": {
"acc": 0.33774834437086093,
"acc_stderr": 0.038615575462551684,
"acc_norm": 0.33774834437086093,
"acc_norm_stderr": 0.038615575462551684
},
"harness|hendrycksTest-high_school_psychology|5": {
"acc": 0.5871559633027523,
"acc_stderr": 0.021109128133413913,
"acc_norm": 0.5871559633027523,
"acc_norm_stderr": 0.021109128133413913
},
"harness|hendrycksTest-high_school_statistics|5": {
"acc": 0.3472222222222222,
"acc_stderr": 0.032468872436376486,
"acc_norm": 0.3472222222222222,
"acc_norm_stderr": 0.032468872436376486
},
"harness|hendrycksTest-high_school_us_history|5": {
"acc": 0.5294117647058824,
"acc_stderr": 0.03503235296367992,
"acc_norm": 0.5294117647058824,
"acc_norm_stderr": 0.03503235296367992
},
"harness|hendrycksTest-high_school_world_history|5": {
"acc": 0.6244725738396625,
"acc_stderr": 0.03152256243091156,
"acc_norm": 0.6244725738396625,
"acc_norm_stderr": 0.03152256243091156
},
"harness|hendrycksTest-human_aging|5": {
"acc": 0.5291479820627802,
"acc_stderr": 0.03350073248773404,
"acc_norm": 0.5291479820627802,
"acc_norm_stderr": 0.03350073248773404
},
"harness|hendrycksTest-human_sexuality|5": {
"acc": 0.5343511450381679,
"acc_stderr": 0.043749285605997376,
"acc_norm": 0.5343511450381679,
"acc_norm_stderr": 0.043749285605997376
},
"harness|hendrycksTest-international_law|5": {
"acc": 0.6528925619834711,
"acc_stderr": 0.04345724570292534,
"acc_norm": 0.6528925619834711,
"acc_norm_stderr": 0.04345724570292534
},
"harness|hendrycksTest-jurisprudence|5": {
"acc": 0.49074074074074076,
"acc_stderr": 0.04832853553437055,
"acc_norm": 0.49074074074074076,
"acc_norm_stderr": 0.04832853553437055
},
"harness|hendrycksTest-logical_fallacies|5": {
"acc": 0.4294478527607362,
"acc_stderr": 0.03889066619112722,
"acc_norm": 0.4294478527607362,
"acc_norm_stderr": 0.03889066619112722
},
"harness|hendrycksTest-machine_learning|5": {
"acc": 0.36607142857142855,
"acc_stderr": 0.045723723587374296,
"acc_norm": 0.36607142857142855,
"acc_norm_stderr": 0.045723723587374296
},
"harness|hendrycksTest-management|5": {
"acc": 0.5922330097087378,
"acc_stderr": 0.0486577757041077,
"acc_norm": 0.5922330097087378,
"acc_norm_stderr": 0.0486577757041077
},
"harness|hendrycksTest-marketing|5": {
"acc": 0.6495726495726496,
"acc_stderr": 0.0312561082442188,
"acc_norm": 0.6495726495726496,
"acc_norm_stderr": 0.0312561082442188
},
"harness|hendrycksTest-medical_genetics|5": {
"acc": 0.52,
"acc_stderr": 0.050211673156867795,
"acc_norm": 0.52,
"acc_norm_stderr": 0.050211673156867795
},
"harness|hendrycksTest-miscellaneous|5": {
"acc": 0.6206896551724138,
"acc_stderr": 0.01735126811754445,
"acc_norm": 0.6206896551724138,
"acc_norm_stderr": 0.01735126811754445
},
"harness|hendrycksTest-moral_disputes|5": {
"acc": 0.5028901734104047,
"acc_stderr": 0.02691864538323901,
"acc_norm": 0.5028901734104047,
"acc_norm_stderr": 0.02691864538323901
},
"harness|hendrycksTest-moral_scenarios|5": {
"acc": 0.2558659217877095,
"acc_stderr": 0.014593620923210756,
"acc_norm": 0.2558659217877095,
"acc_norm_stderr": 0.014593620923210756
},
"harness|hendrycksTest-nutrition|5": {
"acc": 0.545751633986928,
"acc_stderr": 0.028509807802626592,
"acc_norm": 0.545751633986928,
"acc_norm_stderr": 0.028509807802626592
},
"harness|hendrycksTest-philosophy|5": {
"acc": 0.49517684887459806,
"acc_stderr": 0.028396770444111298,
"acc_norm": 0.49517684887459806,
"acc_norm_stderr": 0.028396770444111298
},
"harness|hendrycksTest-prehistory|5": {
"acc": 0.5030864197530864,
"acc_stderr": 0.027820214158594377,
"acc_norm": 0.5030864197530864,
"acc_norm_stderr": 0.027820214158594377
},
"harness|hendrycksTest-professional_accounting|5": {
"acc": 0.3546099290780142,
"acc_stderr": 0.028538650028878638,
"acc_norm": 0.3546099290780142,
"acc_norm_stderr": 0.028538650028878638
},
"harness|hendrycksTest-professional_law|5": {
"acc": 0.3324641460234681,
"acc_stderr": 0.01203202233226051,
"acc_norm": 0.3324641460234681,
"acc_norm_stderr": 0.01203202233226051
},
"harness|hendrycksTest-professional_medicine|5": {
"acc": 0.5257352941176471,
"acc_stderr": 0.03033257809455502,
"acc_norm": 0.5257352941176471,
"acc_norm_stderr": 0.03033257809455502
},
"harness|hendrycksTest-professional_psychology|5": {
"acc": 0.46895424836601307,
"acc_stderr": 0.020188804456361887,
"acc_norm": 0.46895424836601307,
"acc_norm_stderr": 0.020188804456361887
},
"harness|hendrycksTest-public_relations|5": {
"acc": 0.5636363636363636,
"acc_stderr": 0.04750185058907296,
"acc_norm": 0.5636363636363636,
"acc_norm_stderr": 0.04750185058907296
},
"harness|hendrycksTest-security_studies|5": {
"acc": 0.5387755102040817,
"acc_stderr": 0.031912820526692774,
"acc_norm": 0.5387755102040817,
"acc_norm_stderr": 0.031912820526692774
},
"harness|hendrycksTest-sociology|5": {
"acc": 0.6318407960199005,
"acc_stderr": 0.03410410565495302,
"acc_norm": 0.6318407960199005,
"acc_norm_stderr": 0.03410410565495302
},
"harness|hendrycksTest-us_foreign_policy|5": {
"acc": 0.72,
"acc_stderr": 0.04512608598542127,
"acc_norm": 0.72,
"acc_norm_stderr": 0.04512608598542127
},
"harness|hendrycksTest-virology|5": {
"acc": 0.42771084337349397,
"acc_stderr": 0.038515976837185335,
"acc_norm": 0.42771084337349397,
"acc_norm_stderr": 0.038515976837185335
},
"harness|hendrycksTest-world_religions|5": {
"acc": 0.6549707602339181,
"acc_stderr": 0.03645981377388806,
"acc_norm": 0.6549707602339181,
"acc_norm_stderr": 0.03645981377388806
},
"harness|truthfulqa:mc|0": {
"mc1": 0.2582619339045288,
"mc1_stderr": 0.0153218216884762,
"mc2": 0.4053787386286284,
"mc2_stderr": 0.013893490031868357
},
"all": {
"acc": 0.46685175478824187,
"acc_stderr": 0.03531409019484935,
"acc_norm": 0.47077526563025673,
"acc_norm_stderr": 0.035299387024960424,
"mc1": 0.2582619339045288,
"mc1_stderr": 0.0153218216884762,
"mc2": 0.4053787386286284,
"mc2_stderr": 0.013893490031868357
}
},
"versions": {
"harness|arc:challenge|25": 0,
"harness|hellaswag|10": 0,
"harness|hendrycksTest-abstract_algebra|5": 1,
"harness|hendrycksTest-anatomy|5": 1,
"harness|hendrycksTest-astronomy|5": 1,
"harness|hendrycksTest-business_ethics|5": 1,
"harness|hendrycksTest-clinical_knowledge|5": 1,
"harness|hendrycksTest-college_biology|5": 1,
"harness|hendrycksTest-college_chemistry|5": 1,
"harness|hendrycksTest-college_computer_science|5": 1,
"harness|hendrycksTest-college_mathematics|5": 1,
"harness|hendrycksTest-college_medicine|5": 1,
"harness|hendrycksTest-college_physics|5": 1,
"harness|hendrycksTest-computer_security|5": 1,
"harness|hendrycksTest-conceptual_physics|5": 1,
"harness|hendrycksTest-econometrics|5": 1,
"harness|hendrycksTest-electrical_engineering|5": 1,
"harness|hendrycksTest-elementary_mathematics|5": 1,
"harness|hendrycksTest-formal_logic|5": 1,
"harness|hendrycksTest-global_facts|5": 1,
"harness|hendrycksTest-high_school_biology|5": 1,
"harness|hendrycksTest-high_school_chemistry|5": 1,
"harness|hendrycksTest-high_school_computer_science|5": 1,
"harness|hendrycksTest-high_school_european_history|5": 1,
"harness|hendrycksTest-high_school_geography|5": 1,
"harness|hendrycksTest-high_school_government_and_politics|5": 1,
"harness|hendrycksTest-high_school_macroeconomics|5": 1,
"harness|hendrycksTest-high_school_mathematics|5": 1,
"harness|hendrycksTest-high_school_microeconomics|5": 1,
"harness|hendrycksTest-high_school_physics|5": 1,
"harness|hendrycksTest-high_school_psychology|5": 1,
"harness|hendrycksTest-high_school_statistics|5": 1,
"harness|hendrycksTest-high_school_us_history|5": 1,
"harness|hendrycksTest-high_school_world_history|5": 1,
"harness|hendrycksTest-human_aging|5": 1,
"harness|hendrycksTest-human_sexuality|5": 1,
"harness|hendrycksTest-international_law|5": 1,
"harness|hendrycksTest-jurisprudence|5": 1,
"harness|hendrycksTest-logical_fallacies|5": 1,
"harness|hendrycksTest-machine_learning|5": 1,
"harness|hendrycksTest-management|5": 1,
"harness|hendrycksTest-marketing|5": 1,
"harness|hendrycksTest-medical_genetics|5": 1,
"harness|hendrycksTest-miscellaneous|5": 1,
"harness|hendrycksTest-moral_disputes|5": 1,
"harness|hendrycksTest-moral_scenarios|5": 1,
"harness|hendrycksTest-nutrition|5": 1,
"harness|hendrycksTest-philosophy|5": 1,
"harness|hendrycksTest-prehistory|5": 1,
"harness|hendrycksTest-professional_accounting|5": 1,
"harness|hendrycksTest-professional_law|5": 1,
"harness|hendrycksTest-professional_medicine|5": 1,
"harness|hendrycksTest-professional_psychology|5": 1,
"harness|hendrycksTest-public_relations|5": 1,
"harness|hendrycksTest-security_studies|5": 1,
"harness|hendrycksTest-sociology|5": 1,
"harness|hendrycksTest-us_foreign_policy|5": 1,
"harness|hendrycksTest-virology|5": 1,
"harness|hendrycksTest-world_religions|5": 1,
"harness|truthfulqa:mc|0": 1,
"all": 0
},
"config_general": {
"model_name": "chaoyi-wu/MedLLaMA_13B",
"model_sha": "893557ef32f98cd01deb1c5d063be6d640ffa657",
"model_dtype": "torch.float16",
"lighteval_sha": "03c2fad20ff7f5334c33cfee459024b8d7e4a109",
"num_few_shot_default": 0,
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null
},
"config_tasks": {
"harness|arc:challenge": "LM Harness task",
"harness|hellaswag": "LM Harness task",
"harness|hendrycksTest-abstract_algebra": "LM Harness task",
"harness|hendrycksTest-anatomy": "LM Harness task",
"harness|hendrycksTest-astronomy": "LM Harness task",
"harness|hendrycksTest-business_ethics": "LM Harness task",
"harness|hendrycksTest-clinical_knowledge": "LM Harness task",
"harness|hendrycksTest-college_biology": "LM Harness task",
"harness|hendrycksTest-college_chemistry": "LM Harness task",
"harness|hendrycksTest-college_computer_science": "LM Harness task",
"harness|hendrycksTest-college_mathematics": "LM Harness task",
"harness|hendrycksTest-college_medicine": "LM Harness task",
"harness|hendrycksTest-college_physics": "LM Harness task",
"harness|hendrycksTest-computer_security": "LM Harness task",
"harness|hendrycksTest-conceptual_physics": "LM Harness task",
"harness|hendrycksTest-econometrics": "LM Harness task",
"harness|hendrycksTest-electrical_engineering": "LM Harness task",
"harness|hendrycksTest-elementary_mathematics": "LM Harness task",
"harness|hendrycksTest-formal_logic": "LM Harness task",
"harness|hendrycksTest-global_facts": "LM Harness task",
"harness|hendrycksTest-high_school_biology": "LM Harness task",
"harness|hendrycksTest-high_school_chemistry": "LM Harness task",
"harness|hendrycksTest-high_school_computer_science": "LM Harness task",
"harness|hendrycksTest-high_school_european_history": "LM Harness task",
"harness|hendrycksTest-high_school_geography": "LM Harness task",
"harness|hendrycksTest-high_school_government_and_politics": "LM Harness task",
"harness|hendrycksTest-high_school_macroeconomics": "LM Harness task",
"harness|hendrycksTest-high_school_mathematics": "LM Harness task",
"harness|hendrycksTest-high_school_microeconomics": "LM Harness task",
"harness|hendrycksTest-high_school_physics": "LM Harness task",
"harness|hendrycksTest-high_school_psychology": "LM Harness task",
"harness|hendrycksTest-high_school_statistics": "LM Harness task",
"harness|hendrycksTest-high_school_us_history": "LM Harness task",
"harness|hendrycksTest-high_school_world_history": "LM Harness task",
"harness|hendrycksTest-human_aging": "LM Harness task",
"harness|hendrycksTest-human_sexuality": "LM Harness task",
"harness|hendrycksTest-international_law": "LM Harness task",
"harness|hendrycksTest-jurisprudence": "LM Harness task",
"harness|hendrycksTest-logical_fallacies": "LM Harness task",
"harness|hendrycksTest-machine_learning": "LM Harness task",
"harness|hendrycksTest-management": "LM Harness task",
"harness|hendrycksTest-marketing": "LM Harness task",
"harness|hendrycksTest-medical_genetics": "LM Harness task",
"harness|hendrycksTest-miscellaneous": "LM Harness task",
"harness|hendrycksTest-moral_disputes": "LM Harness task",
"harness|hendrycksTest-moral_scenarios": "LM Harness task",
"harness|hendrycksTest-nutrition": "LM Harness task",
"harness|hendrycksTest-philosophy": "LM Harness task",
"harness|hendrycksTest-prehistory": "LM Harness task",
"harness|hendrycksTest-professional_accounting": "LM Harness task",
"harness|hendrycksTest-professional_law": "LM Harness task",
"harness|hendrycksTest-professional_medicine": "LM Harness task",
"harness|hendrycksTest-professional_psychology": "LM Harness task",
"harness|hendrycksTest-public_relations": "LM Harness task",
"harness|hendrycksTest-security_studies": "LM Harness task",
"harness|hendrycksTest-sociology": "LM Harness task",
"harness|hendrycksTest-us_foreign_policy": "LM Harness task",
"harness|hendrycksTest-virology": "LM Harness task",
"harness|hendrycksTest-world_religions": "LM Harness task",
"harness|truthfulqa:mc": "LM Harness task"
},
"summary_tasks": {
"harness|arc:challenge|25": {
"hashes": {
"hash_examples": "17b0cae357c0259e",
"hash_full_prompts": "045cbb916e5145c6",
"hash_input_tokens": "2b0e07d4cdd3b0fe",
"hash_cont_tokens": "52204555b6e39a6e"
},
"truncated": 0,
"non-truncated": 4687,
"padded": 4687,
"non-padded": 0,
"effective_few_shots": 25.0,
"num_truncated_few_shots": 0
},
"harness|hellaswag|10": {
"hashes": {
"hash_examples": "e1768ecb99d7ecf0",
"hash_full_prompts": "0b4c16983130f84f",
"hash_input_tokens": "578edd77107cb2c3",
"hash_cont_tokens": "25c49737526d9f80"
},
"truncated": 0,
"non-truncated": 40168,
"padded": 40113,
"non-padded": 55,
"effective_few_shots": 10.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-abstract_algebra|5": {
"hashes": {
"hash_examples": "280f9f325b40559a",
"hash_full_prompts": "2f776a367d23aea2",
"hash_input_tokens": "6a95a1511f8da075",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-anatomy|5": {
"hashes": {
"hash_examples": "2f83a4f1cab4ba18",
"hash_full_prompts": "516f74bef25df620",
"hash_input_tokens": "24a78edc4d9a93aa",
"hash_cont_tokens": "f11971a765cb609f"
},
"truncated": 0,
"non-truncated": 540,
"padded": 540,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-astronomy|5": {
"hashes": {
"hash_examples": "7d587b908da4d762",
"hash_full_prompts": "faf4e80f65de93ca",
"hash_input_tokens": "b11106668d6c0974",
"hash_cont_tokens": "ebed26cf74a85815"
},
"truncated": 0,
"non-truncated": 608,
"padded": 608,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-business_ethics|5": {
"hashes": {
"hash_examples": "33e51740670de686",
"hash_full_prompts": "db01c3ef8e1479d4",
"hash_input_tokens": "10180ba12a075cb0",
"hash_cont_tokens": "6898ac348a7ae442"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-clinical_knowledge|5": {
"hashes": {
"hash_examples": "f3366dbe7eefffa4",
"hash_full_prompts": "49654f71d94b65c3",
"hash_input_tokens": "73351ef4968750a2",
"hash_cont_tokens": "34a058958a45af94"
},
"truncated": 0,
"non-truncated": 1060,
"padded": 1060,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_biology|5": {
"hashes": {
"hash_examples": "ca2b6753a0193e7f",
"hash_full_prompts": "2b460b75f1fdfefd",
"hash_input_tokens": "a539150af234c668",
"hash_cont_tokens": "875cde3af7a0ee14"
},
"truncated": 0,
"non-truncated": 576,
"padded": 576,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_chemistry|5": {
"hashes": {
"hash_examples": "22ff85f1d34f42d1",
"hash_full_prompts": "242c9be6da583e95",
"hash_input_tokens": "52e12e5a43bcee35",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_computer_science|5": {
"hashes": {
"hash_examples": "30318289d717a5cf",
"hash_full_prompts": "ed2bdb4e87c4b371",
"hash_input_tokens": "d1f3721a5659f7ee",
"hash_cont_tokens": "da408cb12ab08288"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_mathematics|5": {
"hashes": {
"hash_examples": "4944d1f0b6b5d911",
"hash_full_prompts": "770bc4281c973190",
"hash_input_tokens": "f2d78f546b5595c2",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_medicine|5": {
"hashes": {
"hash_examples": "dd69cc33381275af",
"hash_full_prompts": "ad2a53e5250ab46e",
"hash_input_tokens": "c9cc19179f63d1d6",
"hash_cont_tokens": "370a1a0ab68d15cd"
},
"truncated": 0,
"non-truncated": 692,
"padded": 692,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_physics|5": {
"hashes": {
"hash_examples": "875dd26d22655b0d",
"hash_full_prompts": "833a0d7b55aed500",
"hash_input_tokens": "5046144e67e992e8",
"hash_cont_tokens": "f7b8097afc16a47c"
},
"truncated": 0,
"non-truncated": 408,
"padded": 408,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-computer_security|5": {
"hashes": {
"hash_examples": "006451eedc0ededb",
"hash_full_prompts": "94034c97e85d8f46",
"hash_input_tokens": "4b14581ba4fc06fc",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-conceptual_physics|5": {
"hashes": {
"hash_examples": "8874ece872d2ca4c",
"hash_full_prompts": "e40d15a34640d6fa",
"hash_input_tokens": "1ee52c413b5b4cc4",
"hash_cont_tokens": "aa0e8bc655f2f641"
},
"truncated": 0,
"non-truncated": 940,
"padded": 940,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-econometrics|5": {
"hashes": {
"hash_examples": "64d3623b0bfaa43f",
"hash_full_prompts": "612f340fae41338d",
"hash_input_tokens": "2914077c4dd3090a",
"hash_cont_tokens": "80dea4d59245cf01"
},
"truncated": 0,
"non-truncated": 456,
"padded": 456,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-electrical_engineering|5": {
"hashes": {
"hash_examples": "e98f51780c674d7e",
"hash_full_prompts": "10275b312d812ae6",
"hash_input_tokens": "0f88a874342378de",
"hash_cont_tokens": "2425a3f084a591ef"
},
"truncated": 0,
"non-truncated": 580,
"padded": 580,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-elementary_mathematics|5": {
"hashes": {
"hash_examples": "fc48208a5ac1c0ce",
"hash_full_prompts": "5ec274c6c82aca23",
"hash_input_tokens": "9889933f1dd02a23",
"hash_cont_tokens": "309bef1803097408"
},
"truncated": 0,
"non-truncated": 1512,
"padded": 1512,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-formal_logic|5": {
"hashes": {
"hash_examples": "5a6525665f63ea72",
"hash_full_prompts": "07b92638c4a6b500",
"hash_input_tokens": "dc309a94c4bfdd2f",
"hash_cont_tokens": "5105a3bd1b39b785"
},
"truncated": 0,
"non-truncated": 504,
"padded": 504,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-global_facts|5": {
"hashes": {
"hash_examples": "371d70d743b2b89b",
"hash_full_prompts": "332fdee50a1921b4",
"hash_input_tokens": "0801a0aebec3ba8c",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_biology|5": {
"hashes": {
"hash_examples": "a79e1018b1674052",
"hash_full_prompts": "e624e26ede922561",
"hash_input_tokens": "5bc4aca8831d9c05",
"hash_cont_tokens": "205c5deee1581b02"
},
"truncated": 0,
"non-truncated": 1240,
"padded": 1240,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_chemistry|5": {
"hashes": {
"hash_examples": "44bfc25c389f0e03",
"hash_full_prompts": "0e3e5f5d9246482a",
"hash_input_tokens": "b92bd6b06fc3464c",
"hash_cont_tokens": "272d28867e0ff046"
},
"truncated": 0,
"non-truncated": 812,
"padded": 812,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_computer_science|5": {
"hashes": {
"hash_examples": "8b8cdb1084f24169",
"hash_full_prompts": "c00487e67c1813cc",
"hash_input_tokens": "a549346cde8165e9",
"hash_cont_tokens": "98b3bf311aa83f0d"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_european_history|5": {
"hashes": {
"hash_examples": "11cd32d0ef440171",
"hash_full_prompts": "318f4513c537c6bf",
"hash_input_tokens": "f1f73dd687da18d7",
"hash_cont_tokens": "674fc454bdc5ac93"
},
"truncated": 660,
"non-truncated": 0,
"padded": 0,
"non-padded": 660,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_geography|5": {
"hashes": {
"hash_examples": "b60019b9e80b642f",
"hash_full_prompts": "ee5789fcc1a81b1e",
"hash_input_tokens": "e7e9cf91f9d6a081",
"hash_cont_tokens": "03a5012b916274ea"
},
"truncated": 0,
"non-truncated": 792,
"padded": 792,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_government_and_politics|5": {
"hashes": {
"hash_examples": "d221ec983d143dc3",
"hash_full_prompts": "ac42d888e1ce1155",
"hash_input_tokens": "a61a1670f854d9e1",
"hash_cont_tokens": "d9e66fc7c702b795"
},
"truncated": 0,
"non-truncated": 772,
"padded": 772,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_macroeconomics|5": {
"hashes": {
"hash_examples": "59c2915cacfd3fbb",
"hash_full_prompts": "c6bd9d25158abd0e",
"hash_input_tokens": "8a77cb7763f28110",
"hash_cont_tokens": "c583432ad27fcfe0"
},
"truncated": 0,
"non-truncated": 1560,
"padded": 1560,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_mathematics|5": {
"hashes": {
"hash_examples": "1f8ac897608de342",
"hash_full_prompts": "5d88f41fc2d643a8",
"hash_input_tokens": "fcfcfae391f8faa1",
"hash_cont_tokens": "d4b1936084c060e0"
},
"truncated": 0,
"non-truncated": 1080,
"padded": 1080,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_microeconomics|5": {
"hashes": {
"hash_examples": "ead6a0f2f6c83370",
"hash_full_prompts": "bfc393381298609e",
"hash_input_tokens": "a29454cc1feb23ef",
"hash_cont_tokens": "f47f041de50333b9"
},
"truncated": 0,
"non-truncated": 952,
"padded": 952,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_physics|5": {
"hashes": {
"hash_examples": "c3f2025990afec64",
"hash_full_prompts": "fc78b4997e436734",
"hash_input_tokens": "b6734a25556d75dc",
"hash_cont_tokens": "2bf9921a39e901d9"
},
"truncated": 0,
"non-truncated": 604,
"padded": 604,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_psychology|5": {
"hashes": {
"hash_examples": "21f8aab618f6d636",
"hash_full_prompts": "d5c76aa40b9dbc43",
"hash_input_tokens": "5720438e29473426",
"hash_cont_tokens": "cab8b16be9576360"
},
"truncated": 0,
"non-truncated": 2180,
"padded": 2180,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_statistics|5": {
"hashes": {
"hash_examples": "2386a60a11fc5de3",
"hash_full_prompts": "4c5c8be5aafac432",
"hash_input_tokens": "486321d5858de240",
"hash_cont_tokens": "1c34fbe5a59f1ed1"
},
"truncated": 0,
"non-truncated": 864,
"padded": 864,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_us_history|5": {
"hashes": {
"hash_examples": "74961543be40f04f",
"hash_full_prompts": "5d5ca4840131ba21",
"hash_input_tokens": "50c9ff438c85a69e",
"hash_cont_tokens": "cdd0b3dc06d933e5"
},
"truncated": 816,
"non-truncated": 0,
"padded": 0,
"non-padded": 816,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_world_history|5": {
"hashes": {
"hash_examples": "2ad2f6b7198b2234",
"hash_full_prompts": "11845057459afd72",
"hash_input_tokens": "473919e64d1b8c80",
"hash_cont_tokens": "ebd714885a59ef55"
},
"truncated": 8,
"non-truncated": 940,
"padded": 940,
"non-padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-human_aging|5": {
"hashes": {
"hash_examples": "1a7199dc733e779b",
"hash_full_prompts": "756b9096b8eaf892",
"hash_input_tokens": "47a65c81fd7ed010",
"hash_cont_tokens": "142a4a8a1138a214"
},
"truncated": 0,
"non-truncated": 892,
"padded": 892,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-human_sexuality|5": {
"hashes": {
"hash_examples": "7acb8fdad97f88a6",
"hash_full_prompts": "731a52ff15b8cfdb",
"hash_input_tokens": "aedfcd41cbd2fcc9",
"hash_cont_tokens": "bc54813e809b796d"
},
"truncated": 0,
"non-truncated": 524,
"padded": 524,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-international_law|5": {
"hashes": {
"hash_examples": "1300bfd0dfc59114",
"hash_full_prompts": "db2aefbff5eec996",
"hash_input_tokens": "ed5f2414144d7b72",
"hash_cont_tokens": "aac52fa6a519223b"
},
"truncated": 0,
"non-truncated": 484,
"padded": 484,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-jurisprudence|5": {
"hashes": {
"hash_examples": "083b1e4904c48dc2",
"hash_full_prompts": "0f89ee3fe03d6a21",
"hash_input_tokens": "692eaacb5b747264",
"hash_cont_tokens": "e3a8cd951b6e3469"
},
"truncated": 0,
"non-truncated": 432,
"padded": 432,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-logical_fallacies|5": {
"hashes": {
"hash_examples": "709128f9926a634c",
"hash_full_prompts": "98a04b1f8f841069",
"hash_input_tokens": "2cbce4edca937588",
"hash_cont_tokens": "697179a0dd47c5c0"
},
"truncated": 0,
"non-truncated": 652,
"padded": 648,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-machine_learning|5": {
"hashes": {
"hash_examples": "88f22a636029ae47",
"hash_full_prompts": "2e1c8d4b1e0cc921",
"hash_input_tokens": "c2f38b19bab1aa2c",
"hash_cont_tokens": "9b19898e3ecb527f"
},
"truncated": 0,
"non-truncated": 448,
"padded": 448,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-management|5": {
"hashes": {
"hash_examples": "8c8a1e07a2151dca",
"hash_full_prompts": "f51611f514b265b0",
"hash_input_tokens": "fde277bc547bc3d8",
"hash_cont_tokens": "a01d6d39a83c4597"
},
"truncated": 0,
"non-truncated": 412,
"padded": 412,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-marketing|5": {
"hashes": {
"hash_examples": "2668953431f91e96",
"hash_full_prompts": "77562bef997c7650",
"hash_input_tokens": "87b232bbebce39db",
"hash_cont_tokens": "6aeaed4d823c98aa"
},
"truncated": 0,
"non-truncated": 936,
"padded": 936,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-medical_genetics|5": {
"hashes": {
"hash_examples": "9c2dda34a2ea4fd2",
"hash_full_prompts": "202139046daa118f",
"hash_input_tokens": "58c21af9da3e126e",
"hash_cont_tokens": "50421e30bef398f9"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-miscellaneous|5": {
"hashes": {
"hash_examples": "41adb694024809c2",
"hash_full_prompts": "bffec9fc237bcf93",
"hash_input_tokens": "d1f5c770d368e9c6",
"hash_cont_tokens": "9b0ab02a64603081"
},
"truncated": 0,
"non-truncated": 3132,
"padded": 3132,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-moral_disputes|5": {
"hashes": {
"hash_examples": "3171c13ba3c594c4",
"hash_full_prompts": "170831fc36f1d59e",
"hash_input_tokens": "98d6db15a50aaa8e",
"hash_cont_tokens": "1e30d7dedc7588c0"
},
"truncated": 0,
"non-truncated": 1384,
"padded": 1354,
"non-padded": 30,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-moral_scenarios|5": {
"hashes": {
"hash_examples": "9873e077e83e0546",
"hash_full_prompts": "08f4ceba3131a068",
"hash_input_tokens": "2aabd8c7337502f8",
"hash_cont_tokens": "ceee291786cbb123"
},
"truncated": 0,
"non-truncated": 3580,
"padded": 3580,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-nutrition|5": {
"hashes": {
"hash_examples": "7db1d8142ec14323",
"hash_full_prompts": "4c0e68e3586cb453",
"hash_input_tokens": "17f8c8f2d4a0a9b1",
"hash_cont_tokens": "484df4c25a5460bd"
},
"truncated": 0,
"non-truncated": 1224,
"padded": 1224,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-philosophy|5": {
"hashes": {
"hash_examples": "9b455b7d72811cc8",
"hash_full_prompts": "e467f822d8a0d3ff",
"hash_input_tokens": "dfc6df491d991966",
"hash_cont_tokens": "9f6ff69d23a48783"
},
"truncated": 0,
"non-truncated": 1244,
"padded": 1244,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-prehistory|5": {
"hashes": {
"hash_examples": "8be90d0f538f1560",
"hash_full_prompts": "152187949bcd0921",
"hash_input_tokens": "cffe8139e00da9dd",
"hash_cont_tokens": "85a9de6c685b7035"
},
"truncated": 0,
"non-truncated": 1296,
"padded": 1296,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_accounting|5": {
"hashes": {
"hash_examples": "8d377597916cd07e",
"hash_full_prompts": "0eb7345d6144ee0d",
"hash_input_tokens": "4a69ed6ee55918fb",
"hash_cont_tokens": "ad7b5a040535bdcf"
},
"truncated": 0,
"non-truncated": 1128,
"padded": 1128,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_law|5": {
"hashes": {
"hash_examples": "cd9dbc52b3c932d6",
"hash_full_prompts": "36ac764272bfb182",
"hash_input_tokens": "6cc713f12b5890de",
"hash_cont_tokens": "2e590029ef41fbcd"
},
"truncated": 604,
"non-truncated": 5532,
"padded": 5524,
"non-padded": 612,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_medicine|5": {
"hashes": {
"hash_examples": "b20e4e816c1e383e",
"hash_full_prompts": "7b8d69ea2acaf2f7",
"hash_input_tokens": "b4044fc92756c377",
"hash_cont_tokens": "0b7b5aaef574dc78"
},
"truncated": 0,
"non-truncated": 1088,
"padded": 1088,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_psychology|5": {
"hashes": {
"hash_examples": "d45b73b22f9cc039",
"hash_full_prompts": "fe8937e9ffc99771",
"hash_input_tokens": "b019784da8db089a",
"hash_cont_tokens": "63a651778e8d72d2"
},
"truncated": 0,
"non-truncated": 2448,
"padded": 2448,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-public_relations|5": {
"hashes": {
"hash_examples": "0d25072e1761652a",
"hash_full_prompts": "f9adc39cfa9f42ba",
"hash_input_tokens": "f47f37c7c9bfc601",
"hash_cont_tokens": "841583ab707b25d7"
},
"truncated": 0,
"non-truncated": 440,
"padded": 440,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-security_studies|5": {
"hashes": {
"hash_examples": "62bb8197e63d60d4",
"hash_full_prompts": "869c9c3ae196b7c3",
"hash_input_tokens": "4d282718d6142410",
"hash_cont_tokens": "9c2c01d3214f66b8"
},
"truncated": 0,
"non-truncated": 980,
"padded": 980,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-sociology|5": {
"hashes": {
"hash_examples": "e7959df87dea8672",
"hash_full_prompts": "1a1fc00e17b3a52a",
"hash_input_tokens": "fbc6026e500537bc",
"hash_cont_tokens": "c3a3bdfd177eed5b"
},
"truncated": 0,
"non-truncated": 804,
"padded": 804,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-us_foreign_policy|5": {
"hashes": {
"hash_examples": "4a56a01ddca44dca",
"hash_full_prompts": "0c7a7081c71c07b6",
"hash_input_tokens": "150dd1ff81ff642e",
"hash_cont_tokens": "96353c5969a9028a"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-virology|5": {
"hashes": {
"hash_examples": "451cc86a8c4f4fe9",
"hash_full_prompts": "01e95325d8b738e4",
"hash_input_tokens": "fcbac3e735545969",
"hash_cont_tokens": "a1f8901800ac9b68"
},
"truncated": 0,
"non-truncated": 664,
"padded": 664,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-world_religions|5": {
"hashes": {
"hash_examples": "3b29cfaf1a81c379",
"hash_full_prompts": "e0d79a15083dfdff",
"hash_input_tokens": "ffc962a38441ef13",
"hash_cont_tokens": "08c0be345e5f1c12"
},
"truncated": 0,
"non-truncated": 684,
"padded": 684,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|truthfulqa:mc|0": {
"hashes": {
"hash_examples": "23176c0531c7b867",
"hash_full_prompts": "36a6d90e75d92d4a",
"hash_input_tokens": "9ffb65d225ae550f",
"hash_cont_tokens": "16c760a491c6f26e"
},
"truncated": 0,
"non-truncated": 9996,
"padded": 9996,
"non-padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "d84d18e9a963753d",
"hash_full_prompts": "12b540783521a8e6",
"hash_input_tokens": "1c61d6705b299f5c",
"hash_cont_tokens": "868d6f1055fbd51d"
},
"total_evaluation_time_secondes": "3780.4133019447327",
"truncated": 2088,
"non-truncated": 108931,
"padded": 108834,
"non-padded": 2185,
"num_truncated_few_shots": 0
}
}