sandernotenbaert commited on
Commit
a337101
·
verified ·
1 Parent(s): 3fad091

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48ab6fbe729d04037d979e3727d665bb8a3d92ea87dfd2ed84a826f73114e2ff
3
  size 1783055976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f672218c5e4e6a2121de04b65360eb70212979671319a6b7ccc699db76402d01
3
  size 1783055976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34887632667319441e15da8fbef52c84ef86e33be3a881d2c79ccd5c4dfd7e52
3
  size 3566173562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2097d5365d132bf9645baefe54a0be746abe632499a074cdbc93777d0e5c34c3
3
  size 3566173562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cfe3540f24c66f9bc38a546448a9e5d9989705fafc8d4b37aa5dafd0c7460f2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f83fe17529e572dff2734bb21512b28dd7cf5d20ef0e84688f5068ffbf24e765
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a43c200beec982be6ba6814f2f475edee50b971e23470ab6b587b0a72dccb9b7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97bba99094cb6ba509984c3fb752cc4387fde3be7cca9c123af30577d2dd911a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af2924a0eb31db835ecc1090391e433a4e9097b02bdc25d6cb879aecfffdd0e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45cef01a0c03a5fc76309b06f41731c4ef0f05769be4b933a78626a0c047135
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.016775014622221078,
6
  "eval_steps": 500,
7
- "global_step": 7500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1178,6 +1178,84 @@
1178
  "eval_samples_per_second": 6.218,
1179
  "eval_steps_per_second": 6.218,
1180
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1181
  }
1182
  ],
1183
  "logging_steps": 50,
@@ -1197,7 +1275,7 @@
1197
  "attributes": {}
1198
  }
1199
  },
1200
- "total_flos": 8.206216593408e+16,
1201
  "train_batch_size": 1,
1202
  "trial_name": null,
1203
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.01789334893036915,
6
  "eval_steps": 500,
7
+ "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1178
  "eval_samples_per_second": 6.218,
1179
  "eval_steps_per_second": 6.218,
1180
  "step": 7500
1181
+ },
1182
+ {
1183
+ "epoch": 0.016886848053035888,
1184
+ "grad_norm": 1.009393334388733,
1185
+ "learning_rate": 9.380203285369915e-06,
1186
+ "loss": 0.3244,
1187
+ "step": 7550
1188
+ },
1189
+ {
1190
+ "epoch": 0.016998681483850694,
1191
+ "grad_norm": 1.1191552877426147,
1192
+ "learning_rate": 9.442332065906211e-06,
1193
+ "loss": 0.325,
1194
+ "step": 7600
1195
+ },
1196
+ {
1197
+ "epoch": 0.0171105149146655,
1198
+ "grad_norm": 1.2570810317993164,
1199
+ "learning_rate": 9.504460846442507e-06,
1200
+ "loss": 0.3417,
1201
+ "step": 7650
1202
+ },
1203
+ {
1204
+ "epoch": 0.01722234834548031,
1205
+ "grad_norm": 0.9457122683525085,
1206
+ "learning_rate": 9.566589626978801e-06,
1207
+ "loss": 0.3141,
1208
+ "step": 7700
1209
+ },
1210
+ {
1211
+ "epoch": 0.017334181776295116,
1212
+ "grad_norm": 0.85469651222229,
1213
+ "learning_rate": 9.628718407515097e-06,
1214
+ "loss": 0.3235,
1215
+ "step": 7750
1216
+ },
1217
+ {
1218
+ "epoch": 0.01744601520710992,
1219
+ "grad_norm": 1.2483268976211548,
1220
+ "learning_rate": 9.690847188051395e-06,
1221
+ "loss": 0.3207,
1222
+ "step": 7800
1223
+ },
1224
+ {
1225
+ "epoch": 0.017557848637924728,
1226
+ "grad_norm": 0.8644481897354126,
1227
+ "learning_rate": 9.752975968587689e-06,
1228
+ "loss": 0.3203,
1229
+ "step": 7850
1230
+ },
1231
+ {
1232
+ "epoch": 0.017669682068739537,
1233
+ "grad_norm": 0.9396805763244629,
1234
+ "learning_rate": 9.815104749123985e-06,
1235
+ "loss": 0.3317,
1236
+ "step": 7900
1237
+ },
1238
+ {
1239
+ "epoch": 0.017781515499554344,
1240
+ "grad_norm": 1.322040319442749,
1241
+ "learning_rate": 9.877233529660281e-06,
1242
+ "loss": 0.3333,
1243
+ "step": 7950
1244
+ },
1245
+ {
1246
+ "epoch": 0.01789334893036915,
1247
+ "grad_norm": 0.8326611518859863,
1248
+ "learning_rate": 9.939362310196577e-06,
1249
+ "loss": 0.3115,
1250
+ "step": 8000
1251
+ },
1252
+ {
1253
+ "epoch": 0.01789334893036915,
1254
+ "eval_loss": 0.3215126693248749,
1255
+ "eval_runtime": 1461.5546,
1256
+ "eval_samples_per_second": 6.18,
1257
+ "eval_steps_per_second": 6.18,
1258
+ "step": 8000
1259
  }
1260
  ],
1261
  "logging_steps": 50,
 
1275
  "attributes": {}
1276
  }
1277
  },
1278
+ "total_flos": 8.7532976996352e+16,
1279
  "train_batch_size": 1,
1280
  "trial_name": null,
1281
  "trial_params": null