bhuvanmdev commited on
Commit
d381d35
·
verified ·
1 Parent(s): cf805be

Training in progress, step 1080, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83db771df950e62ad2023cec1215f51219493e18859b93162135ea811647b4ee
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bad098bfc3580a7ad640c4130e7a07c7506695bf24af2b17c66a370a46a96ea
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b7dbda1b23eed1009be33bc92a27f23738750bb760895a3877eb8413e63752
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1678b1a77440b4735ea8dca5d633374798805380e60f92032e0e8dfb0faedde
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7097ad8e44eb73dccc917ee5a890bd970cf5e65b02ff96ff5daa468616d769c6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5f93394ec4ac2c9202e96d8f2c28e904b14601df40e2aa991afd31d35337d4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ed300d9a9b5101b0a51972c41bba244062680bd71a8f222001fe6d7c1bf120
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee088d40e7a433523254925268530a7e4fe462e4869c9eeb52cf75236d3eadf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.37924865831842575,
5
  "eval_steps": 500,
6
- "global_step": 1060,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -855,14 +855,30 @@
855
  "loss": 0.4317,
856
  "num_input_tokens_seen": 710659,
857
  "step": 1060
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  }
859
  ],
860
  "logging_steps": 10,
861
  "max_steps": 2795,
862
- "num_input_tokens_seen": 710659,
863
  "num_train_epochs": 1,
864
  "save_steps": 20,
865
- "total_flos": 1.5980211294492672e+16,
866
  "train_batch_size": 1,
867
  "trial_name": null,
868
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.38640429338103754,
5
  "eval_steps": 500,
6
+ "global_step": 1080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
855
  "loss": 0.4317,
856
  "num_input_tokens_seen": 710659,
857
  "step": 1060
858
+ },
859
+ {
860
+ "epoch": 0.3828264758497317,
861
+ "grad_norm": 0.31499966979026794,
862
+ "learning_rate": 0.00012343470483005368,
863
+ "loss": 0.4357,
864
+ "num_input_tokens_seen": 717932,
865
+ "step": 1070
866
+ },
867
+ {
868
+ "epoch": 0.38640429338103754,
869
+ "grad_norm": 0.4409402012825012,
870
+ "learning_rate": 0.0001227191413237925,
871
+ "loss": 0.408,
872
+ "num_input_tokens_seen": 724716,
873
+ "step": 1080
874
  }
875
  ],
876
  "logging_steps": 10,
877
  "max_steps": 2795,
878
+ "num_input_tokens_seen": 724716,
879
  "num_train_epochs": 1,
880
  "save_steps": 20,
881
+ "total_flos": 1.6296303583715328e+16,
882
  "train_batch_size": 1,
883
  "trial_name": null,
884
  "trial_params": null