Muennighoff commited on
Commit
066fc61
·
1 Parent(s): 9d1ea06
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +9 -0
  2. xp3capmixnewcodelonglossseq_global_step1245/Context_Section_Type/results.json +9 -0
  3. xp3capmixnewcodelonglossseq_global_step1245/config.json +25 -0
  4. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_body_title_to_star/results.json +9 -0
  5. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_review_to_star/results.json +9 -0
  6. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_title_to_star/results.json +9 -0
  7. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_body_title_to_star/results.json +9 -0
  8. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_review_to_star/results.json +9 -0
  9. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_title_to_star/results.json +9 -0
  10. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json +9 -0
  11. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_review_to_star/results.json +9 -0
  12. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_title_to_star/results.json +9 -0
  13. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json +9 -0
  14. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_review_to_star/results.json +9 -0
  15. xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_title_to_star/results.json +9 -0
  16. xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/Answer_questions_from_options/results.json +9 -0
  17. xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/answer_quiz/results.json +9 -0
  18. xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/select_the_best_option/results.json +9 -0
  19. xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis/results.json +9 -0
  20. xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_believable/results.json +9 -0
  21. xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_desc/results.json +9 -0
  22. xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_likely/results.json +9 -0
  23. xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_options/results.json +9 -0
  24. xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/direct_to_which_department/results.json +9 -0
  25. xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/help_page_topic/results.json +9 -0
  26. xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/rephrase_as_banking_term/results.json +9 -0
  27. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/classify/results.json +9 -0
  28. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/multi-choice/results.json +9 -0
  29. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/premise_context_first/results.json +9 -0
  30. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_between_1_2/results.json +9 -0
  31. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_between_A_B/results.json +9 -0
  32. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_which_one_1_2/results.json +9 -0
  33. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/single_sentence_bad_yes_no/results.json +9 -0
  34. xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/single_sentence_good_yes_no/results.json +9 -0
  35. xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/claim_and_all_supporting_evidences/results.json +9 -0
  36. xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/fifth_evidence_and_claim_itemization/results.json +9 -0
  37. xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/first_evidence_and_claim_itemization/results.json +9 -0
  38. xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/second_evidence_and_claim_itemization/results.json +9 -0
  39. xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/third_evidence_claim_pair/results.json +9 -0
  40. xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json +9 -0
  41. xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json +9 -0
  42. xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json +9 -0
  43. xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/answer_given_question_without_options/results.json +9 -0
  44. xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/most_suitable_answer/results.json +9 -0
  45. xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/question_answering/results.json +9 -0
  46. xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/ambiguous/results.json +9 -0
  47. xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/clarification_needed/results.json +9 -0
  48. xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/directly_answer/results.json +9 -0
  49. xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/score_give_number/results.json +9 -0
  50. xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/score_how_much/results.json +9 -0
.gitattributes CHANGED
@@ -30,3 +30,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
34
+ xp3capmixnewcodelonglossseq_global_step1992/tokenizer.json filter=lfs diff=lfs merge=lfs -text
35
+ xp3capmixnewcodelonglossseq_global_step2114/tokenizer.json filter=lfs diff=lfs merge=lfs -text
36
+ xp3capmixnewcodelonglossseq_global_step249/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ xp3capmixnewcodelonglossseq_global_step747/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ xp3capmixnewcodelonglossseq_global_step996/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ xp3capmixnewcodelonglossseq_global_step1245/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ xp3capmixnewcodelonglossseq_global_step1494/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ xp3capmixnewcodelonglossseq_global_step1743/tokenizer.json filter=lfs diff=lfs merge=lfs -text
xp3capmixnewcodelonglossseq_global_step1245/Context_Section_Type/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "pubmed_qa",
3
+ "dataset_config_name": "pqa_labeled",
4
+ "template_name": "Context Section Type",
5
+ "evaluation": {
6
+ "accuracy": 0.607
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='pqa_labeled', dataset_name='pubmed_qa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='Context Section Type', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_residual_connection_post_layernorm": false,
3
+ "attention_dropout": 0.0,
4
+ "architectures": [
5
+ "BloomModel"
6
+ ],
7
+ "attention_softmax_in_fp32": true,
8
+ "seq_length": 2048,
9
+ "pad_token_id": 3,
10
+ "bos_token_id": 1,
11
+ "eos_token_id": 2,
12
+ "hidden_dropout": 0.0,
13
+ "initializer_range": 0.02,
14
+ "layer_norm_epsilon": 1e-05,
15
+ "masked_softmax_fusion": true,
16
+ "model_type": "bloom",
17
+ "n_embed": 14336,
18
+ "n_layer": 70,
19
+ "num_attention_heads": 112,
20
+ "pretraining_tp": 4,
21
+ "slow_but_exact": false,
22
+ "transformers_version": "4.21.0",
23
+ "use_cache": true,
24
+ "vocab_size": 250880
25
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_body_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "en",
4
+ "template_name": "prompt_body_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.572
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_review_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "en",
4
+ "template_name": "prompt_review_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.528
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/en/prompt_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "en",
4
+ "template_name": "prompt_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4044
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_body_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "es",
4
+ "template_name": "prompt_body_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4612
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_review_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "es",
4
+ "template_name": "prompt_review_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4356
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/es/prompt_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "es",
4
+ "template_name": "prompt_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.2894
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "prompt_body_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4738
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_review_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "prompt_review_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.458
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/fr/prompt_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "prompt_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.3222
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "prompt_body_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4434
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_review_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "prompt_review_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.4262
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/amazon_reviews_multi/zh/prompt_title_to_star/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "amazon_reviews_multi",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "prompt_title_to_star",
5
+ "evaluation": {
6
+ "accuracy": 0.2988
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/Answer_questions_from_options/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "aqua_rat",
3
+ "dataset_config_name": "raw",
4
+ "template_name": "Answer questions from options",
5
+ "evaluation": {
6
+ "accuracy": 0.2125984251968504
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer questions from options', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/answer_quiz/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "aqua_rat",
3
+ "dataset_config_name": "raw",
4
+ "template_name": "answer_quiz",
5
+ "evaluation": {
6
+ "accuracy": 0.22440944881889763
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_quiz', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/aqua_rat/raw/select_the_best_option/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "aqua_rat",
3
+ "dataset_config_name": "raw",
4
+ "template_name": "select_the_best_option",
5
+ "evaluation": {
6
+ "accuracy": 0.2204724409448819
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='select_the_best_option', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "art",
3
+ "dataset_config_name": null,
4
+ "template_name": "choose_hypothesis",
5
+ "evaluation": {
6
+ "accuracy": 0.7003916449086162
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_believable/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "art",
3
+ "dataset_config_name": null,
4
+ "template_name": "choose_hypothesis_believable",
5
+ "evaluation": {
6
+ "accuracy": 0.70822454308094
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_believable', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_desc/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "art",
3
+ "dataset_config_name": null,
4
+ "template_name": "choose_hypothesis_desc",
5
+ "evaluation": {
6
+ "accuracy": 0.5802872062663186
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_desc', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_likely/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "art",
3
+ "dataset_config_name": null,
4
+ "template_name": "choose_hypothesis_likely",
5
+ "evaluation": {
6
+ "accuracy": 0.5783289817232375
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_likely', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/art/choose_hypothesis_options/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "art",
3
+ "dataset_config_name": null,
4
+ "template_name": "choose_hypothesis_options",
5
+ "evaluation": {
6
+ "accuracy": 0.7075718015665796
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_options', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/direct_to_which_department/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "banking77",
3
+ "dataset_config_name": null,
4
+ "template_name": "direct_to_which_department",
5
+ "evaluation": {
6
+ "accuracy": 0.19805194805194806
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='direct_to_which_department', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/help_page_topic/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "banking77",
3
+ "dataset_config_name": null,
4
+ "template_name": "help_page_topic",
5
+ "evaluation": {
6
+ "accuracy": 0.32045454545454544
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='help_page_topic', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/banking77/rephrase_as_banking_term/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "banking77",
3
+ "dataset_config_name": null,
4
+ "template_name": "rephrase_as_banking_term",
5
+ "evaluation": {
6
+ "accuracy": 0.26266233766233765
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=1, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='rephrase_as_banking_term', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/classify/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blbooksgenre",
3
+ "dataset_config_name": "title_genre_classifiction",
4
+ "template_name": "classify",
5
+ "evaluation": {
6
+ "accuracy": 0.33064516129032256
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='classify', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/multi-choice/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blbooksgenre",
3
+ "dataset_config_name": "title_genre_classifiction",
4
+ "template_name": "multi-choice",
5
+ "evaluation": {
6
+ "accuracy": 0.8467741935483871
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='multi-choice', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blbooksgenre/title_genre_classifiction/premise_context_first/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blbooksgenre",
3
+ "dataset_config_name": "title_genre_classifiction",
4
+ "template_name": "premise_context_first",
5
+ "evaluation": {
6
+ "accuracy": 0.8225806451612904
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='premise_context_first', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_between_1_2/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blimp",
3
+ "dataset_config_name": "adjunct_island",
4
+ "template_name": "grammatical_between_1_2",
5
+ "evaluation": {
6
+ "accuracy": 0.512
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_between_A_B/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blimp",
3
+ "dataset_config_name": "adjunct_island",
4
+ "template_name": "grammatical_between_A_B",
5
+ "evaluation": {
6
+ "accuracy": 0.475
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_A_B', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/grammatical_which_one_1_2/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blimp",
3
+ "dataset_config_name": "adjunct_island",
4
+ "template_name": "grammatical_which_one_1_2",
5
+ "evaluation": {
6
+ "accuracy": 0.528
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_which_one_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/single_sentence_bad_yes_no/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blimp",
3
+ "dataset_config_name": "adjunct_island",
4
+ "template_name": "single_sentence_bad_yes_no",
5
+ "evaluation": {
6
+ "accuracy": 0.514
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_bad_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/blimp/adjunct_island/single_sentence_good_yes_no/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "blimp",
3
+ "dataset_config_name": "adjunct_island",
4
+ "template_name": "single_sentence_good_yes_no",
5
+ "evaluation": {
6
+ "accuracy": 0.488
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_good_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/claim_and_all_supporting_evidences/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "climate_fever",
3
+ "dataset_config_name": null,
4
+ "template_name": "claim_and_all_supporting_evidences",
5
+ "evaluation": {
6
+ "accuracy": 0.35309446254071664
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='claim_and_all_supporting_evidences', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/fifth_evidence_and_claim_itemization/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "climate_fever",
3
+ "dataset_config_name": null,
4
+ "template_name": "fifth_evidence_and_claim_itemization",
5
+ "evaluation": {
6
+ "accuracy": 0.650814332247557
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='fifth_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/first_evidence_and_claim_itemization/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "climate_fever",
3
+ "dataset_config_name": null,
4
+ "template_name": "first_evidence_and_claim_itemization",
5
+ "evaluation": {
6
+ "accuracy": 0.5576547231270358
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='first_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/second_evidence_and_claim_itemization/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "climate_fever",
3
+ "dataset_config_name": null,
4
+ "template_name": "second_evidence_and_claim_itemization",
5
+ "evaluation": {
6
+ "accuracy": 0.5785016286644951
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='second_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/climate_fever/third_evidence_claim_pair/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "climate_fever",
3
+ "dataset_config_name": null,
4
+ "template_name": "third_evidence_claim_pair",
5
+ "evaluation": {
6
+ "accuracy": 0.6006514657980456
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='third_evidence_claim_pair', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "codah",
3
+ "dataset_config_name": "codah",
4
+ "template_name": "affirmative_instruction_after_sentence_and_choices",
5
+ "evaluation": {
6
+ "accuracy": 0.8393371757925072
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "codah",
3
+ "dataset_config_name": "codah",
4
+ "template_name": "affirmative_instruction_before_sentence_and_choices",
5
+ "evaluation": {
6
+ "accuracy": 0.8353746397694525
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_before_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "codah",
3
+ "dataset_config_name": "codah",
4
+ "template_name": "interrogative_instruction_after_sentence_and_choices",
5
+ "evaluation": {
6
+ "accuracy": 0.840778097982709
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='interrogative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/answer_given_question_without_options/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "commonsense_qa",
3
+ "dataset_config_name": null,
4
+ "template_name": "answer_given_question_without_options",
5
+ "evaluation": {
6
+ "accuracy": 0.7207207207207207
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_given_question_without_options', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/most_suitable_answer/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "commonsense_qa",
3
+ "dataset_config_name": null,
4
+ "template_name": "most_suitable_answer",
5
+ "evaluation": {
6
+ "accuracy": 0.9205569205569205
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='most_suitable_answer', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/commonsense_qa/question_answering/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "commonsense_qa",
3
+ "dataset_config_name": null,
4
+ "template_name": "question_answering",
5
+ "evaluation": {
6
+ "accuracy": 0.9123669123669124
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='question_answering', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/ambiguous/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "conv_ai_3",
3
+ "dataset_config_name": null,
4
+ "template_name": "ambiguous",
5
+ "evaluation": {
6
+ "accuracy": 0.39040207522697795
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='ambiguous', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/clarification_needed/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "conv_ai_3",
3
+ "dataset_config_name": null,
4
+ "template_name": "clarification_needed",
5
+ "evaluation": {
6
+ "accuracy": 0.40077821011673154
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='clarification_needed', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/directly_answer/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "conv_ai_3",
3
+ "dataset_config_name": null,
4
+ "template_name": "directly_answer",
5
+ "evaluation": {
6
+ "accuracy": 0.5940337224383917
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='directly_answer', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/score_give_number/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "conv_ai_3",
3
+ "dataset_config_name": null,
4
+ "template_name": "score_give_number",
5
+ "evaluation": {
6
+ "accuracy": 0.395157803718115
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_give_number', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
xp3capmixnewcodelonglossseq_global_step1245/evaluation/conv_ai_3/score_how_much/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "conv_ai_3",
3
+ "dataset_config_name": null,
4
+ "template_name": "score_how_much",
5
+ "evaluation": {
6
+ "accuracy": 0.3584089926502378
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/xp3capmixnewcodelonglossseq_global_step1245/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_how_much', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }