bhuvanmdev commited on
Commit
a8c1bb9
·
verified ·
1 Parent(s): 5acd4e8

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf7ba55347be3009d65b0f6e45243c095cb0115dd9730e83831a737d420ed7d8
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a978d2ea4c2ef178cab56e079e834d41ad51771ce490d4892a323b4ec39be6be
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ce7a1b50c1cfa308c27e5cb3651542491d426a9f882fb92cb544cabeb70bb31
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:469403812ebd6b0168f1cfd10dc9051127cd1573ac3bdf23aca7e0e5a8a0418b
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb8413f834d7aea22eaaf4929929d97571a7bcc98ba8e7df76bc3dfd61eff41
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c4a6bdcee541f4bc227f467016d9bc346f4f8483027f6990cb0f3a9b7a1f71e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db27570aff99c68af4609f5f7c05632809723cb9d1da7a19356fc2b24a3fe3ec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d836f79887d280f068e10ec79168f2ea91f57c6fbb35eab33b10757b0316f93b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4221824686940966,
5
  "eval_steps": 500,
6
- "global_step": 1180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -951,14 +951,30 @@
951
  "loss": 0.4196,
952
  "num_input_tokens_seen": 794103,
953
  "step": 1180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
954
  }
955
  ],
956
  "logging_steps": 10,
957
  "max_steps": 2795,
958
- "num_input_tokens_seen": 794103,
959
  "num_train_epochs": 1,
960
  "save_steps": 20,
961
- "total_flos": 1.7856572181018624e+16,
962
  "train_batch_size": 1,
963
  "trial_name": null,
964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4293381037567084,
5
  "eval_steps": 500,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
951
  "loss": 0.4196,
952
  "num_input_tokens_seen": 794103,
953
  "step": 1180
954
+ },
955
+ {
956
+ "epoch": 0.4257602862254025,
957
+ "grad_norm": 0.3390592038631439,
958
+ "learning_rate": 0.00011484794275491951,
959
+ "loss": 0.4073,
960
+ "num_input_tokens_seen": 800033,
961
+ "step": 1190
962
+ },
963
+ {
964
+ "epoch": 0.4293381037567084,
965
+ "grad_norm": 0.35398080945014954,
966
+ "learning_rate": 0.00011413237924865831,
967
+ "loss": 0.4032,
968
+ "num_input_tokens_seen": 807607,
969
+ "step": 1200
970
  }
971
  ],
972
  "logging_steps": 10,
973
  "max_steps": 2795,
974
+ "num_input_tokens_seen": 807607,
975
  "num_train_epochs": 1,
976
  "save_steps": 20,
977
+ "total_flos": 1.8160229453101056e+16,
978
  "train_batch_size": 1,
979
  "trial_name": null,
980
  "trial_params": null