bhuvanmdev commited on
Commit
8ddbc12
·
verified ·
1 Parent(s): 8ea19fd

Training in progress, step 1420, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:070555ab40db51d3a5840454fa269b784df6d102a3f8624f43e74d58478c8951
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad36523f653c0b77cbc992ea91311b49e96a68b9bac8b5fc5e7ff63686ac900
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca946250efe75fef35aba1dbf1b03c113298722b5d7da5cedd58c3eb48dddaf
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eae06a37ddc2a57f0d2662fd5d5d2cdd831ea69bc665ae17cf29a05b11e5d13
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:354c3e84f6b2a094e741c8f1f43bff92126887859b832aecb38252864730d160
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611ff9c30a238fa31d0058e6fc82212ffbe063da1a3832f3d98bad700efec653
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded0d7f7cfcac928a75f77d93dbcaf43644e4251cba1e72644318143074c5e26
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee649f42236fc3525c15f4fc06e3387d0e6ad85bc6902d30e2ef469b193aea2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5008944543828264,
5
  "eval_steps": 500,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1127,14 +1127,30 @@
1127
  "loss": 0.3776,
1128
  "num_input_tokens_seen": 938905,
1129
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1130
  }
1131
  ],
1132
  "logging_steps": 10,
1133
  "max_steps": 2795,
1134
- "num_input_tokens_seen": 938905,
1135
  "num_train_epochs": 1,
1136
  "save_steps": 20,
1137
- "total_flos": 2.111265780839424e+16,
1138
  "train_batch_size": 1,
1139
  "trial_name": null,
1140
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5080500894454383,
5
  "eval_steps": 500,
6
+ "global_step": 1420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1127
  "loss": 0.3776,
1128
  "num_input_tokens_seen": 938905,
1129
  "step": 1400
1130
+ },
1131
+ {
1132
+ "epoch": 0.5044722719141324,
1133
+ "grad_norm": 0.2043834775686264,
1134
+ "learning_rate": 9.910554561717353e-05,
1135
+ "loss": 0.4206,
1136
+ "num_input_tokens_seen": 947665,
1137
+ "step": 1410
1138
+ },
1139
+ {
1140
+ "epoch": 0.5080500894454383,
1141
+ "grad_norm": 0.3463163673877716,
1142
+ "learning_rate": 9.838998211091235e-05,
1143
+ "loss": 0.3953,
1144
+ "num_input_tokens_seen": 955261,
1145
+ "step": 1420
1146
  }
1147
  ],
1148
  "logging_steps": 10,
1149
  "max_steps": 2795,
1150
+ "num_input_tokens_seen": 955261,
1151
  "num_train_epochs": 1,
1152
  "save_steps": 20,
1153
+ "total_flos": 2.148044648894669e+16,
1154
  "train_batch_size": 1,
1155
  "trial_name": null,
1156
  "trial_params": null