bhuvanmdev commited on
Commit
0cc5f82
·
verified ·
1 Parent(s): 622d54b

Training in progress, step 1440, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aad36523f653c0b77cbc992ea91311b49e96a68b9bac8b5fc5e7ff63686ac900
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d6c5c98da7d7fc2308ce4cce8c077dacdaefc9c3e45997ff2a0b612a1ad5fb
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eae06a37ddc2a57f0d2662fd5d5d2cdd831ea69bc665ae17cf29a05b11e5d13
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf5fd6b5c41b0e5fd63b47580a5cc765e806b828011f641bf5fad58475b4f01
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:611ff9c30a238fa31d0058e6fc82212ffbe063da1a3832f3d98bad700efec653
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db1fc58a2a72dbad3c41b2357780fb02b9c10ea5bf856d5df562a584278bb4f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ee649f42236fc3525c15f4fc06e3387d0e6ad85bc6902d30e2ef469b193aea2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0fed88b87dca8914fe3b96a952dda114e542b0650af3a03c544afb4eea3bd4e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5080500894454383,
5
  "eval_steps": 500,
6
- "global_step": 1420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1143,14 +1143,30 @@
1143
  "loss": 0.3953,
1144
  "num_input_tokens_seen": 955261,
1145
  "step": 1420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1146
  }
1147
  ],
1148
  "logging_steps": 10,
1149
  "max_steps": 2795,
1150
- "num_input_tokens_seen": 955261,
1151
  "num_train_epochs": 1,
1152
  "save_steps": 20,
1153
- "total_flos": 2.148044648894669e+16,
1154
  "train_batch_size": 1,
1155
  "trial_name": null,
1156
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5152057245080501,
5
  "eval_steps": 500,
6
+ "global_step": 1440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1143
  "loss": 0.3953,
1144
  "num_input_tokens_seen": 955261,
1145
  "step": 1420
1146
+ },
1147
+ {
1148
+ "epoch": 0.5116279069767442,
1149
+ "grad_norm": 0.3044353723526001,
1150
+ "learning_rate": 9.767441860465116e-05,
1151
+ "loss": 0.4226,
1152
+ "num_input_tokens_seen": 961797,
1153
+ "step": 1430
1154
+ },
1155
+ {
1156
+ "epoch": 0.5152057245080501,
1157
+ "grad_norm": 0.4201965928077698,
1158
+ "learning_rate": 9.695885509838999e-05,
1159
+ "loss": 0.4031,
1160
+ "num_input_tokens_seen": 967800,
1161
+ "step": 1440
1162
  }
1163
  ],
1164
  "logging_steps": 10,
1165
  "max_steps": 2795,
1166
+ "num_input_tokens_seen": 967800,
1167
  "num_train_epochs": 1,
1168
  "save_steps": 20,
1169
+ "total_flos": 2.17624043188224e+16,
1170
  "train_batch_size": 1,
1171
  "trial_name": null,
1172
  "trial_params": null