amy-hyunji-lee commited on
Commit
c7de868
·
verified ·
1 Parent(s): 1760bab

Add files using upload-large-folder tool

Browse files
Files changed (21) hide show
  1. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_0_step_100.pt +3 -0
  2. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_0_step_791.pt +3 -0
  3. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_1_step_1583.pt +3 -0
  4. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_2_step_2375.pt +3 -0
  5. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_3_step_3167.pt +3 -0
  6. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_4_step_3959.pt +3 -0
  7. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/fintune_ssm_config.json +1 -0
  8. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_0_step_100.pt +3 -0
  9. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_0_step_474.pt +3 -0
  10. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_1_step_949.pt +3 -0
  11. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_2_step_1424.pt +3 -0
  12. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_3_step_1899.pt +3 -0
  13. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_4_step_2374.pt +3 -0
  14. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/fintune_ssm_config.json +1 -0
  15. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_0_step_100.pt +3 -0
  16. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_0_step_633.pt +3 -0
  17. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_1_step_1267.pt +3 -0
  18. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_2_step_1901.pt +3 -0
  19. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_3_step_2535.pt +3 -0
  20. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_4_step_3169.pt +3 -0
  21. finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/fintune_ssm_config.json +1 -0
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_0_step_100.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e01605069ebd705d7a3d0bf0d1420af89cbca5a9bbf7f6dfcf61e63fa9343d03
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_0_step_791.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597570084937bd95d7cdaf6cf75f684a40b8ecf9d21714b54a8677424aa2a23d
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_1_step_1583.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdf0cdc9b2f866c9acb05c0aa3e01251847aca3b1027a2af690b09bd8da613e
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_2_step_2375.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7367782e4cfb443947abb80d8818800973726c45459c111990afe7b74e96ff16
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_3_step_3167.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6ea2ca32084ed4a58e0c5c077890883c1280902c16a8d81bbc2de94ec006fdf
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/epoch_4_step_3959.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f40ddca6b8e9774129f1b539af789739594b6a8179be551c590920515f616824
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_106940015/fintune_ssm_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/new_tsz512x4k_100B_Samba_1.3B.crossattn/finetune_992000", "cache_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/hf_cache", "activate_logging": true, "wandb_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/wandb", "run_name_addon": "", "record_debug_params": false, "recover_step": null, "eval_mode": false, "use_finetuned_model": false, "load_cp": null, "clip_grad": true, "clip_grad_max_norm": 1, "seed": 123, "lr_sched_type": "const", "save_steps": 100, "eval_steps": 100, "grad_flow_steps": 10, "max_step": 20000, "epochs": 5, "model_device": "cuda:0", "dataset": "converted_slimpajama", "train_set_size": 10000, "eval_set_size": 20, "eval_samples_to_log": 10, "log_eval_predictions_steps": 10, "eval_max_len": 50, "max_train_input_len": 20000, "niah_train_set_size": 6144, "niah_context_len_train": 2000, "niah_needle_depths_eval": [0, 0.25, 0.5, 0.75, 1], "niah_context_lens_eval": [1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000], "ppl_test_context_len_train": 2000, "ppl_test_num_windows_per_context_len_eval": 10, "ppl_test_context_lens_eval": [1000, 2000, 4000, 10000, 20000, 30000, 40000, 50000, 60000], "ppl_test_pred_len": 100, "deci_num_chunks": 2, "multidoc_num_noise_docs_train": 11, "multidoc_num_noise_docs_eval": [10, 20, 40], "multidoc_noise_injection_policy": "random_loc", "activate_decimation": false, "decimation_type": "max_p", "decimation_beta": 1, "decimating_layers": [], "decimation_min_seq_len": 0, "decimation_max_p_L_base": -1, "find_deci_layer": false, "lr": 0.0001, "weight_decay": 0.1, "grad_accum_steps": 32}
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_0_step_100.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4f60ae80d1d2b442ffb0c9999449d5bce70c4fd072949e7fb2fe6428a916967
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_0_step_474.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97ea94236f782b043c3691d2379e2ab98e3b411afbbcaf99fa2e35c22e121ff
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_1_step_949.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a21b27ecea5c353fa201d980d784a159ac9d56fcff8d27388aa88165671f895
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_2_step_1424.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b316ee77d92c09dfe417c0c53f7042b42fd4967d5dc0be2db3011ff1abce39
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_3_step_1899.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7536b82cea3155bbddd614444f8645fc95f0892af98aeb863570d03563060f
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/epoch_4_step_2374.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570de982f6c66fc96c69fa32585e65da99f737233aea30492b6582102b64917d
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_64146601/fintune_ssm_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/new_tsz512x4k_100B_Samba_1.3B.crossattn/finetune_992000", "cache_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/hf_cache", "activate_logging": true, "wandb_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/wandb", "run_name_addon": "", "record_debug_params": false, "recover_step": null, "eval_mode": false, "use_finetuned_model": false, "load_cp": null, "clip_grad": true, "clip_grad_max_norm": 1, "seed": 123, "lr_sched_type": "const", "save_steps": 100, "eval_steps": 100, "grad_flow_steps": 10, "max_step": 20000, "epochs": 5, "model_device": "cuda:0", "dataset": "converted_slimpajama", "train_set_size": 10000, "eval_set_size": 20, "eval_samples_to_log": 10, "log_eval_predictions_steps": 10, "eval_max_len": 50, "max_train_input_len": 20000, "niah_train_set_size": 6144, "niah_context_len_train": 2000, "niah_needle_depths_eval": [0, 0.25, 0.5, 0.75, 1], "niah_context_lens_eval": [1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000], "ppl_test_context_len_train": 2000, "ppl_test_num_windows_per_context_len_eval": 10, "ppl_test_context_lens_eval": [1000, 2000, 4000, 10000, 20000, 30000, 40000, 50000, 60000], "ppl_test_pred_len": 100, "deci_num_chunks": 2, "multidoc_num_noise_docs_train": 11, "multidoc_num_noise_docs_eval": [10, 20, 40], "multidoc_noise_injection_policy": "random_loc", "activate_decimation": false, "decimation_type": "max_p", "decimation_beta": 1, "decimating_layers": [], "decimation_min_seq_len": 0, "decimation_max_p_L_base": -1, "find_deci_layer": false, "lr": 0.0001, "weight_decay": 0.1, "grad_accum_steps": 32}
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_0_step_100.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d33eec5a5e4adb104bf6cd11a0df4bf947e8550f183d138ded073461df034fc
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_0_step_633.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8572dc1a8e511918ab2a07bc72f4ebfb2e71386fe7dc77972165c9a5d6e344d5
3
+ size 2868618811
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_1_step_1267.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc96a27450801c8863006eb88ca84ae2dc527c8d437493ad3cb5772dbcb7a3d
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_2_step_1901.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcd2416df60c690052f32951d1d325a650e7b6d772ce3eeef423234b2be7057c
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_3_step_2535.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a20853a15740bd43912d4468dc90d5158f8d0692e8a94f21af683ba10bf91db7
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/epoch_4_step_3169.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caa8b99a24fbd33165cd72def63b4aee600a31a2490e6512e88a6e6da155f905
3
+ size 2868619058
finetune_992000/finetune_converted_slimpajama.training_inst_-1.all_loss.block_size_4096.token_cnt_85548320/fintune_ssm_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/new_tsz512x4k_100B_Samba_1.3B.crossattn/finetune_992000", "cache_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/hf_cache", "activate_logging": true, "wandb_dir": "/apdcephfs_sh2/share_300000800/user/kaixinma/amylee/t_samba/converted_slimpajama/wandb", "run_name_addon": "", "record_debug_params": false, "recover_step": null, "eval_mode": false, "use_finetuned_model": false, "load_cp": null, "clip_grad": true, "clip_grad_max_norm": 1, "seed": 123, "lr_sched_type": "const", "save_steps": 100, "eval_steps": 100, "grad_flow_steps": 10, "max_step": 20000, "epochs": 5, "model_device": "cuda:0", "dataset": "converted_slimpajama", "train_set_size": 10000, "eval_set_size": 20, "eval_samples_to_log": 10, "log_eval_predictions_steps": 10, "eval_max_len": 50, "max_train_input_len": 20000, "niah_train_set_size": 6144, "niah_context_len_train": 2000, "niah_needle_depths_eval": [0, 0.25, 0.5, 0.75, 1], "niah_context_lens_eval": [1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000], "ppl_test_context_len_train": 2000, "ppl_test_num_windows_per_context_len_eval": 10, "ppl_test_context_lens_eval": [1000, 2000, 4000, 10000, 20000, 30000, 40000, 50000, 60000], "ppl_test_pred_len": 100, "deci_num_chunks": 2, "multidoc_num_noise_docs_train": 11, "multidoc_num_noise_docs_eval": [10, 20, 40], "multidoc_noise_injection_policy": "random_loc", "activate_decimation": false, "decimation_type": "max_p", "decimation_beta": 1, "decimating_layers": [], "decimation_min_seq_len": 0, "decimation_max_p_L_base": -1, "find_deci_layer": false, "lr": 0.0001, "weight_decay": 0.1, "grad_accum_steps": 32}