{ "arc_easy": { "alias": "arc_easy", "acc,none": 0.3501683501683502, "acc_stderr,none": 0.00978829541009327, "acc_norm,none": 0.3333333333333333, "acc_norm_stderr,none": 0.009673016668133489 }, "blimp": { "acc,none": 0.6717313432835821, "acc_stderr,none": 0.0016479510604007095, "alias": "blimp" }, "blimp_adjunct_island": { "alias": " - blimp_adjunct_island", "acc,none": 0.8, "acc_stderr,none": 0.012655439943366653 }, "blimp_anaphor_gender_agreement": { "alias": " - blimp_anaphor_gender_agreement", "acc,none": 0.877, "acc_stderr,none": 0.010391293421849803 }, "blimp_anaphor_number_agreement": { "alias": " - blimp_anaphor_number_agreement", "acc,none": 0.947, "acc_stderr,none": 0.00708810561724649 }, "blimp_animate_subject_passive": { "alias": " - blimp_animate_subject_passive", "acc,none": 0.725, "acc_stderr,none": 0.014127086556490498 }, "blimp_animate_subject_trans": { "alias": " - blimp_animate_subject_trans", "acc,none": 0.802, "acc_stderr,none": 0.012607733934175283 }, "blimp_causative": { "alias": " - blimp_causative", "acc,none": 0.739, "acc_stderr,none": 0.013895037677965183 }, "blimp_complex_NP_island": { "alias": " - blimp_complex_NP_island", "acc,none": 0.627, "acc_stderr,none": 0.015300493622922927 }, "blimp_coordinate_structure_constraint_complex_left_branch": { "alias": " - blimp_coordinate_structure_constraint_complex_left_branch", "acc,none": 0.424, "acc_stderr,none": 0.015635487471405186 }, "blimp_coordinate_structure_constraint_object_extraction": { "alias": " - blimp_coordinate_structure_constraint_object_extraction", "acc,none": 0.78, "acc_stderr,none": 0.013106173040661862 }, "blimp_determiner_noun_agreement_1": { "alias": " - blimp_determiner_noun_agreement_1", "acc,none": 0.966, "acc_stderr,none": 0.0057338361396953854 }, "blimp_determiner_noun_agreement_2": { "alias": " - blimp_determiner_noun_agreement_2", "acc,none": 0.886, "acc_stderr,none": 0.010055103435823278 }, "blimp_determiner_noun_agreement_irregular_1": { "alias": " - blimp_determiner_noun_agreement_irregular_1", "acc,none": 0.864, "acc_stderr,none": 0.01084535023047304 }, "blimp_determiner_noun_agreement_irregular_2": { "alias": " - blimp_determiner_noun_agreement_irregular_2", "acc,none": 0.854, "acc_stderr,none": 0.011171786285496551 }, "blimp_determiner_noun_agreement_with_adj_2": { "alias": " - blimp_determiner_noun_agreement_with_adj_2", "acc,none": 0.826, "acc_stderr,none": 0.011994493230973447 }, "blimp_determiner_noun_agreement_with_adj_irregular_1": { "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_1", "acc,none": 0.766, "acc_stderr,none": 0.01339490288966006 }, "blimp_determiner_noun_agreement_with_adj_irregular_2": { "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_2", "acc,none": 0.775, "acc_stderr,none": 0.013211720158614833 }, "blimp_determiner_noun_agreement_with_adjective_1": { "alias": " - blimp_determiner_noun_agreement_with_adjective_1", "acc,none": 0.9, "acc_stderr,none": 0.00949157995752499 }, "blimp_distractor_agreement_relational_noun": { "alias": " - blimp_distractor_agreement_relational_noun", "acc,none": 0.578, "acc_stderr,none": 0.015625625112620622 }, "blimp_distractor_agreement_relative_clause": { "alias": " - blimp_distractor_agreement_relative_clause", "acc,none": 0.508, "acc_stderr,none": 0.015817274929209084 }, "blimp_drop_argument": { "alias": " - blimp_drop_argument", "acc,none": 0.677, "acc_stderr,none": 0.01479492784334854 }, "blimp_ellipsis_n_bar_1": { "alias": " - blimp_ellipsis_n_bar_1", "acc,none": 0.767, "acc_stderr,none": 0.01337497251921999 }, "blimp_ellipsis_n_bar_2": { "alias": " - blimp_ellipsis_n_bar_2", "acc,none": 0.784, "acc_stderr,none": 0.013019735539307761 }, "blimp_existential_there_object_raising": { "alias": " - blimp_existential_there_object_raising", "acc,none": 0.758, "acc_stderr,none": 0.013550631705556003 }, "blimp_existential_there_quantifiers_1": { "alias": " - blimp_existential_there_quantifiers_1", "acc,none": 0.728, "acc_stderr,none": 0.014078856992462642 }, "blimp_existential_there_quantifiers_2": { "alias": " - blimp_existential_there_quantifiers_2", "acc,none": 0.224, "acc_stderr,none": 0.013190830072364589 }, "blimp_existential_there_subject_raising": { "alias": " - blimp_existential_there_subject_raising", "acc,none": 0.66, "acc_stderr,none": 0.014987482264363972 }, "blimp_expletive_it_object_raising": { "alias": " - blimp_expletive_it_object_raising", "acc,none": 0.67, "acc_stderr,none": 0.014876872027456623 }, "blimp_inchoative": { "alias": " - blimp_inchoative", "acc,none": 0.73, "acc_stderr,none": 0.01404625563263382 }, "blimp_intransitive": { "alias": " - blimp_intransitive", "acc,none": 0.824, "acc_stderr,none": 0.012048616898597498 }, "blimp_irregular_past_participle_adjectives": { "alias": " - blimp_irregular_past_participle_adjectives", "acc,none": 0.942, "acc_stderr,none": 0.0073953154557929906 }, "blimp_irregular_past_participle_verbs": { "alias": " - blimp_irregular_past_participle_verbs", "acc,none": 0.777, "acc_stderr,none": 0.013169830843425608 }, "blimp_irregular_plural_subject_verb_agreement_1": { "alias": " - blimp_irregular_plural_subject_verb_agreement_1", "acc,none": 0.767, "acc_stderr,none": 0.01337497251921999 }, "blimp_irregular_plural_subject_verb_agreement_2": { "alias": " - blimp_irregular_plural_subject_verb_agreement_2", "acc,none": 0.813, "acc_stderr,none": 0.012336254828074168 }, "blimp_left_branch_island_echo_question": { "alias": " - blimp_left_branch_island_echo_question", "acc,none": 0.323, "acc_stderr,none": 0.014794927843348541 }, "blimp_left_branch_island_simple_question": { "alias": " - blimp_left_branch_island_simple_question", "acc,none": 0.474, "acc_stderr,none": 0.015797897758042797 }, "blimp_matrix_question_npi_licensor_present": { "alias": " - blimp_matrix_question_npi_licensor_present", "acc,none": 0.64, "acc_stderr,none": 0.015186527932039985 }, "blimp_npi_present_1": { "alias": " - blimp_npi_present_1", "acc,none": 0.192, "acc_stderr,none": 0.01246159264666003 }, "blimp_npi_present_2": { "alias": " - blimp_npi_present_2", "acc,none": 0.213, "acc_stderr,none": 0.012953717566737312 }, "blimp_only_npi_licensor_present": { "alias": " - blimp_only_npi_licensor_present", "acc,none": 0.778, "acc_stderr,none": 0.013148721948877349 }, "blimp_only_npi_scope": { "alias": " - blimp_only_npi_scope", "acc,none": 0.545, "acc_stderr,none": 0.015755101498347232 }, "blimp_passive_1": { "alias": " - blimp_passive_1", "acc,none": 0.745, "acc_stderr,none": 0.013790038620872863 }, "blimp_passive_2": { "alias": " - blimp_passive_2", "acc,none": 0.713, "acc_stderr,none": 0.014312087053810001 }, "blimp_principle_A_c_command": { "alias": " - blimp_principle_A_c_command", "acc,none": 0.525, "acc_stderr,none": 0.015799513429996023 }, "blimp_principle_A_case_1": { "alias": " - blimp_principle_A_case_1", "acc,none": 1.0, "acc_stderr,none": 0.0 }, "blimp_principle_A_case_2": { "alias": " - blimp_principle_A_case_2", "acc,none": 0.883, "acc_stderr,none": 0.010169287802713345 }, "blimp_principle_A_domain_1": { "alias": " - blimp_principle_A_domain_1", "acc,none": 0.806, "acc_stderr,none": 0.012510816141264347 }, "blimp_principle_A_domain_2": { "alias": " - blimp_principle_A_domain_2", "acc,none": 0.661, "acc_stderr,none": 0.014976758771620224 }, "blimp_principle_A_domain_3": { "alias": " - blimp_principle_A_domain_3", "acc,none": 0.559, "acc_stderr,none": 0.015708779894242766 }, "blimp_principle_A_reconstruction": { "alias": " - blimp_principle_A_reconstruction", "acc,none": 0.595, "acc_stderr,none": 0.01553113699045296 }, "blimp_regular_plural_subject_verb_agreement_1": { "alias": " - blimp_regular_plural_subject_verb_agreement_1", "acc,none": 0.799, "acc_stderr,none": 0.012679107214617274 }, "blimp_regular_plural_subject_verb_agreement_2": { "alias": " - blimp_regular_plural_subject_verb_agreement_2", "acc,none": 0.834, "acc_stderr,none": 0.011772110370812133 }, "blimp_sentential_negation_npi_licensor_present": { "alias": " - blimp_sentential_negation_npi_licensor_present", "acc,none": 0.941, "acc_stderr,none": 0.007454835650406693 }, "blimp_sentential_negation_npi_scope": { "alias": " - blimp_sentential_negation_npi_scope", "acc,none": 0.278, "acc_stderr,none": 0.014174516461485272 }, "blimp_sentential_subject_island": { "alias": " - blimp_sentential_subject_island", "acc,none": 0.298, "acc_stderr,none": 0.014470846741134585 }, "blimp_superlative_quantifiers_1": { "alias": " - blimp_superlative_quantifiers_1", "acc,none": 0.499, "acc_stderr,none": 0.015819268290576817 }, "blimp_superlative_quantifiers_2": { "alias": " - blimp_superlative_quantifiers_2", "acc,none": 0.245, "acc_stderr,none": 0.01360735683959821 }, "blimp_tough_vs_raising_1": { "alias": " - blimp_tough_vs_raising_1", "acc,none": 0.711, "acc_stderr,none": 0.014341711358296287 }, "blimp_tough_vs_raising_2": { "alias": " - blimp_tough_vs_raising_2", "acc,none": 0.564, "acc_stderr,none": 0.015689173023144022 }, "blimp_transitive": { "alias": " - blimp_transitive", "acc,none": 0.711, "acc_stderr,none": 0.014341711358296287 }, "blimp_wh_island": { "alias": " - blimp_wh_island", "acc,none": 0.531, "acc_stderr,none": 0.015788865959538965 }, "blimp_wh_questions_object_gap": { "alias": " - blimp_wh_questions_object_gap", "acc,none": 0.45, "acc_stderr,none": 0.015740004693383918 }, "blimp_wh_questions_subject_gap": { "alias": " - blimp_wh_questions_subject_gap", "acc,none": 0.482, "acc_stderr,none": 0.01580904569940659 }, "blimp_wh_questions_subject_gap_long_distance": { "alias": " - blimp_wh_questions_subject_gap_long_distance", "acc,none": 0.648, "acc_stderr,none": 0.015110404505648562 }, "blimp_wh_vs_that_no_gap": { "alias": " - blimp_wh_vs_that_no_gap", "acc,none": 0.691, "acc_stderr,none": 0.014619600977206347 }, "blimp_wh_vs_that_no_gap_long_distance": { "alias": " - blimp_wh_vs_that_no_gap_long_distance", "acc,none": 0.765, "acc_stderr,none": 0.013414729030247124 }, "blimp_wh_vs_that_with_gap": { "alias": " - blimp_wh_vs_that_with_gap", "acc,none": 0.706, "acc_stderr,none": 0.014414290540008265 }, "blimp_wh_vs_that_with_gap_long_distance": { "alias": " - blimp_wh_vs_that_with_gap_long_distance", "acc,none": 0.436, "acc_stderr,none": 0.015689173023144022 }, "c4": { "alias": "c4", "word_perplexity,none": 1163.1545771571277, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 3.252269929955642, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.701447002322918, "bits_per_byte_stderr,none": "N/A" }, "hellaswag": { "alias": "hellaswag", "acc,none": 0.27325234017128064, "acc_stderr,none": 0.004447185883327611, "acc_norm,none": 0.29396534554869547, "acc_norm_stderr,none": 0.004546451825028415 }, "piqa": { "alias": "piqa", "acc,none": 0.6126224156692056, "acc_stderr,none": 0.011366038083435972, "acc_norm,none": 0.6147986942328618, "acc_norm_stderr,none": 0.011354179751257222 }, "wikitext": { "alias": "wikitext", "word_perplexity,none": 765.7105583499606, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 3.4620593087389477, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.7916304398087262, "bits_per_byte_stderr,none": "N/A" } }