{ "arc_easy": { "alias": "arc_easy", "acc,none": 0.3207070707070707, "acc_stderr,none": 0.009577474571108819, "acc_norm,none": 0.30892255892255893, "acc_norm_stderr,none": 0.009481048387761367 }, "blimp": { "acc,none": 0.6446268656716417, "acc_stderr,none": 0.0016889805457528922, "alias": "blimp" }, "blimp_adjunct_island": { "alias": " - blimp_adjunct_island", "acc,none": 0.925, "acc_stderr,none": 0.008333333333333333 }, "blimp_anaphor_gender_agreement": { "alias": " - blimp_anaphor_gender_agreement", "acc,none": 0.841, "acc_stderr,none": 0.011569479368271195 }, "blimp_anaphor_number_agreement": { "alias": " - blimp_anaphor_number_agreement", "acc,none": 0.919, "acc_stderr,none": 0.008632121032139986 }, "blimp_animate_subject_passive": { "alias": " - blimp_animate_subject_passive", "acc,none": 0.678, "acc_stderr,none": 0.014782913600996745 }, "blimp_animate_subject_trans": { "alias": " - blimp_animate_subject_trans", "acc,none": 0.39, "acc_stderr,none": 0.01543172505386673 }, "blimp_causative": { "alias": " - blimp_causative", "acc,none": 0.536, "acc_stderr,none": 0.015778243024904673 }, "blimp_complex_NP_island": { "alias": " - blimp_complex_NP_island", "acc,none": 0.632, "acc_stderr,none": 0.015258073561521743 }, "blimp_coordinate_structure_constraint_complex_left_branch": { "alias": " - blimp_coordinate_structure_constraint_complex_left_branch", "acc,none": 0.6, "acc_stderr,none": 0.015499685165842571 }, "blimp_coordinate_structure_constraint_object_extraction": { "alias": " - blimp_coordinate_structure_constraint_object_extraction", "acc,none": 0.517, "acc_stderr,none": 0.015810153729833274 }, "blimp_determiner_noun_agreement_1": { "alias": " - blimp_determiner_noun_agreement_1", "acc,none": 0.912, "acc_stderr,none": 0.008963053962592012 }, "blimp_determiner_noun_agreement_2": { "alias": " - blimp_determiner_noun_agreement_2", "acc,none": 0.831, "acc_stderr,none": 0.011856625977890152 }, "blimp_determiner_noun_agreement_irregular_1": { "alias": " - blimp_determiner_noun_agreement_irregular_1", "acc,none": 0.85, "acc_stderr,none": 0.011297239823409418 }, "blimp_determiner_noun_agreement_irregular_2": { "alias": " - blimp_determiner_noun_agreement_irregular_2", "acc,none": 0.803, "acc_stderr,none": 0.012583693787968196 }, "blimp_determiner_noun_agreement_with_adj_2": { "alias": " - blimp_determiner_noun_agreement_with_adj_2", "acc,none": 0.748, "acc_stderr,none": 0.013736254390651213 }, "blimp_determiner_noun_agreement_with_adj_irregular_1": { "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_1", "acc,none": 0.735, "acc_stderr,none": 0.013963164754810064 }, "blimp_determiner_noun_agreement_with_adj_irregular_2": { "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_2", "acc,none": 0.677, "acc_stderr,none": 0.01479492784334854 }, "blimp_determiner_noun_agreement_with_adjective_1": { "alias": " - blimp_determiner_noun_agreement_with_adjective_1", "acc,none": 0.816, "acc_stderr,none": 0.01225945734093864 }, "blimp_distractor_agreement_relational_noun": { "alias": " - blimp_distractor_agreement_relational_noun", "acc,none": 0.604, "acc_stderr,none": 0.015473313265859495 }, "blimp_distractor_agreement_relative_clause": { "alias": " - blimp_distractor_agreement_relative_clause", "acc,none": 0.553, "acc_stderr,none": 0.015730176046009098 }, "blimp_drop_argument": { "alias": " - blimp_drop_argument", "acc,none": 0.741, "acc_stderr,none": 0.013860415257527861 }, "blimp_ellipsis_n_bar_1": { "alias": " - blimp_ellipsis_n_bar_1", "acc,none": 0.928, "acc_stderr,none": 0.008178195576218687 }, "blimp_ellipsis_n_bar_2": { "alias": " - blimp_ellipsis_n_bar_2", "acc,none": 0.811, "acc_stderr,none": 0.01238678458811779 }, "blimp_existential_there_object_raising": { "alias": " - blimp_existential_there_object_raising", "acc,none": 0.47, "acc_stderr,none": 0.015790799515836725 }, "blimp_existential_there_quantifiers_1": { "alias": " - blimp_existential_there_quantifiers_1", "acc,none": 0.772, "acc_stderr,none": 0.013273740700804434 }, "blimp_existential_there_quantifiers_2": { "alias": " - blimp_existential_there_quantifiers_2", "acc,none": 0.505, "acc_stderr,none": 0.015818508944436743 }, "blimp_existential_there_subject_raising": { "alias": " - blimp_existential_there_subject_raising", "acc,none": 0.39, "acc_stderr,none": 0.01543172505386673 }, "blimp_expletive_it_object_raising": { "alias": " - blimp_expletive_it_object_raising", "acc,none": 0.453, "acc_stderr,none": 0.015749255189977683 }, "blimp_inchoative": { "alias": " - blimp_inchoative", "acc,none": 0.747, "acc_stderr,none": 0.013754278613587126 }, "blimp_intransitive": { "alias": " - blimp_intransitive", "acc,none": 0.857, "acc_stderr,none": 0.011075814808567074 }, "blimp_irregular_past_participle_adjectives": { "alias": " - blimp_irregular_past_participle_adjectives", "acc,none": 0.816, "acc_stderr,none": 0.01225945734093864 }, "blimp_irregular_past_participle_verbs": { "alias": " - blimp_irregular_past_participle_verbs", "acc,none": 0.637, "acc_stderr,none": 0.01521389044467136 }, "blimp_irregular_plural_subject_verb_agreement_1": { "alias": " - blimp_irregular_plural_subject_verb_agreement_1", "acc,none": 0.728, "acc_stderr,none": 0.014078856992462642 }, "blimp_irregular_plural_subject_verb_agreement_2": { "alias": " - blimp_irregular_plural_subject_verb_agreement_2", "acc,none": 0.685, "acc_stderr,none": 0.014696631960792617 }, "blimp_left_branch_island_echo_question": { "alias": " - blimp_left_branch_island_echo_question", "acc,none": 0.099, "acc_stderr,none": 0.009449248027662746 }, "blimp_left_branch_island_simple_question": { "alias": " - blimp_left_branch_island_simple_question", "acc,none": 0.494, "acc_stderr,none": 0.015818160898606836 }, "blimp_matrix_question_npi_licensor_present": { "alias": " - blimp_matrix_question_npi_licensor_present", "acc,none": 0.875, "acc_stderr,none": 0.010463483381956722 }, "blimp_npi_present_1": { "alias": " - blimp_npi_present_1", "acc,none": 0.202, "acc_stderr,none": 0.01270265158765513 }, "blimp_npi_present_2": { "alias": " - blimp_npi_present_2", "acc,none": 0.207, "acc_stderr,none": 0.012818553557844009 }, "blimp_only_npi_licensor_present": { "alias": " - blimp_only_npi_licensor_present", "acc,none": 0.708, "acc_stderr,none": 0.014385511563477433 }, "blimp_only_npi_scope": { "alias": " - blimp_only_npi_scope", "acc,none": 0.582, "acc_stderr,none": 0.015605111967541904 }, "blimp_passive_1": { "alias": " - blimp_passive_1", "acc,none": 0.558, "acc_stderr,none": 0.01571250721186415 }, "blimp_passive_2": { "alias": " - blimp_passive_2", "acc,none": 0.742, "acc_stderr,none": 0.013842963108656601 }, "blimp_principle_A_c_command": { "alias": " - blimp_principle_A_c_command", "acc,none": 0.616, "acc_stderr,none": 0.015387682761896958 }, "blimp_principle_A_case_1": { "alias": " - blimp_principle_A_case_1", "acc,none": 0.775, "acc_stderr,none": 0.013211720158614833 }, "blimp_principle_A_case_2": { "alias": " - blimp_principle_A_case_2", "acc,none": 0.506, "acc_stderr,none": 0.015818160898606836 }, "blimp_principle_A_domain_1": { "alias": " - blimp_principle_A_domain_1", "acc,none": 0.957, "acc_stderr,none": 0.006418114379799739 }, "blimp_principle_A_domain_2": { "alias": " - blimp_principle_A_domain_2", "acc,none": 0.728, "acc_stderr,none": 0.014078856992462642 }, "blimp_principle_A_domain_3": { "alias": " - blimp_principle_A_domain_3", "acc,none": 0.591, "acc_stderr,none": 0.015555094373257946 }, "blimp_principle_A_reconstruction": { "alias": " - blimp_principle_A_reconstruction", "acc,none": 0.284, "acc_stderr,none": 0.014267009061031241 }, "blimp_regular_plural_subject_verb_agreement_1": { "alias": " - blimp_regular_plural_subject_verb_agreement_1", "acc,none": 0.569, "acc_stderr,none": 0.015667944488173442 }, "blimp_regular_plural_subject_verb_agreement_2": { "alias": " - blimp_regular_plural_subject_verb_agreement_2", "acc,none": 0.693, "acc_stderr,none": 0.01459328489285274 }, "blimp_sentential_negation_npi_licensor_present": { "alias": " - blimp_sentential_negation_npi_licensor_present", "acc,none": 0.865, "acc_stderr,none": 0.010811655372416006 }, "blimp_sentential_negation_npi_scope": { "alias": " - blimp_sentential_negation_npi_scope", "acc,none": 0.576, "acc_stderr,none": 0.015635487471405186 }, "blimp_sentential_subject_island": { "alias": " - blimp_sentential_subject_island", "acc,none": 0.158, "acc_stderr,none": 0.011539894677559635 }, "blimp_superlative_quantifiers_1": { "alias": " - blimp_superlative_quantifiers_1", "acc,none": 0.586, "acc_stderr,none": 0.01558354410417755 }, "blimp_superlative_quantifiers_2": { "alias": " - blimp_superlative_quantifiers_2", "acc,none": 0.424, "acc_stderr,none": 0.015635487471405186 }, "blimp_tough_vs_raising_1": { "alias": " - blimp_tough_vs_raising_1", "acc,none": 0.898, "acc_stderr,none": 0.009575368801653944 }, "blimp_tough_vs_raising_2": { "alias": " - blimp_tough_vs_raising_2", "acc,none": 0.419, "acc_stderr,none": 0.015610338967577951 }, "blimp_transitive": { "alias": " - blimp_transitive", "acc,none": 0.563, "acc_stderr,none": 0.01569322392873043 }, "blimp_wh_island": { "alias": " - blimp_wh_island", "acc,none": 0.673, "acc_stderr,none": 0.014842213153411162 }, "blimp_wh_questions_object_gap": { "alias": " - blimp_wh_questions_object_gap", "acc,none": 0.664, "acc_stderr,none": 0.014944140233794895 }, "blimp_wh_questions_subject_gap": { "alias": " - blimp_wh_questions_subject_gap", "acc,none": 0.701, "acc_stderr,none": 0.014484778521220433 }, "blimp_wh_questions_subject_gap_long_distance": { "alias": " - blimp_wh_questions_subject_gap_long_distance", "acc,none": 0.682, "acc_stderr,none": 0.014734079309311903 }, "blimp_wh_vs_that_no_gap": { "alias": " - blimp_wh_vs_that_no_gap", "acc,none": 0.457, "acc_stderr,none": 0.0157606915901365 }, "blimp_wh_vs_that_no_gap_long_distance": { "alias": " - blimp_wh_vs_that_no_gap_long_distance", "acc,none": 0.616, "acc_stderr,none": 0.015387682761896958 }, "blimp_wh_vs_that_with_gap": { "alias": " - blimp_wh_vs_that_with_gap", "acc,none": 0.927, "acc_stderr,none": 0.008230354715243976 }, "blimp_wh_vs_that_with_gap_long_distance": { "alias": " - blimp_wh_vs_that_with_gap_long_distance", "acc,none": 0.688, "acc_stderr,none": 0.014658474370509057 }, "c4": { "alias": "c4", "word_perplexity,none": 657.6550058046406, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 2.9567365779174586, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.5640057163493992, "bits_per_byte_stderr,none": "N/A" }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.2545309699263095, "acc_stderr,none": 0.0043470700195274055, "acc_norm,none": 0.26269667396932883, "acc_norm_stderr,none": 0.0043919956375421555 }, "piqa": { "alias": "piqa", "acc,none": 0.5701849836779108, "acc_stderr,none": 0.01155032226869392, "acc_norm,none": 0.55930359085963, "acc_norm_stderr,none": 0.011583478090656981 }, "wikitext": { "alias": "wikitext", "word_perplexity,none": 566.8611905392944, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 3.27275813757744, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.7105069886916633, "bits_per_byte_stderr,none": "N/A" } }