sandernotenbaert commited on
Commit
b2ad7d8
·
verified ·
1 Parent(s): 9e79033

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f672218c5e4e6a2121de04b65360eb70212979671319a6b7ccc699db76402d01
3
  size 1783055976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1a75e495e37659f935a94a153e5ae964ac453c26fb6a9e6d16dca433447a46
3
  size 1783055976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2097d5365d132bf9645baefe54a0be746abe632499a074cdbc93777d0e5c34c3
3
  size 3566173562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30fdbb96dddd647ac7cab10b09e0ee2785b645bae4fc61cc48fe5171deae209
3
  size 3566173562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83fe17529e572dff2734bb21512b28dd7cf5d20ef0e84688f5068ffbf24e765
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9213080fe2b45399b87036ca9ff9164533abe6b368e5c828136ee184486749d4
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97bba99094cb6ba509984c3fb752cc4387fde3be7cca9c123af30577d2dd911a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f6c298533d8c912a29bc1b5e945c92e9b2f851b00f21834ca034f7df9e34ae
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d45cef01a0c03a5fc76309b06f41731c4ef0f05769be4b933a78626a0c047135
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3965fdcf65adda4a516b1c5cd32d581e539d552e45f86bb9fe1a08df9bbf9fad
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01789334893036915,
6
  "eval_steps": 500,
7
- "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1256,10 +1256,88 @@
1256
  "eval_samples_per_second": 6.18,
1257
  "eval_steps_per_second": 6.18,
1258
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1259
  }
1260
  ],
1261
  "logging_steps": 50,
1262
- "max_steps": 1341282,
1263
  "num_input_tokens_seen": 0,
1264
  "num_train_epochs": 3,
1265
  "save_steps": 500,
@@ -1275,7 +1353,7 @@
1275
  "attributes": {}
1276
  }
1277
  },
1278
- "total_flos": 8.7532976996352e+16,
1279
  "train_batch_size": 1,
1280
  "trial_name": null,
1281
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07604673295406889,
6
  "eval_steps": 500,
7
+ "global_step": 8500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1256
  "eval_samples_per_second": 6.18,
1257
  "eval_steps_per_second": 6.18,
1258
  "step": 8000
1259
+ },
1260
+ {
1261
+ "epoch": 0.07202072944473584,
1262
+ "grad_norm": 0.6482174396514893,
1263
+ "learning_rate": 4.0004970178926446e-05,
1264
+ "loss": 0.3199,
1265
+ "step": 8050
1266
+ },
1267
+ {
1268
+ "epoch": 0.07246806316799506,
1269
+ "grad_norm": 0.560840368270874,
1270
+ "learning_rate": 4.025347912524851e-05,
1271
+ "loss": 0.3203,
1272
+ "step": 8100
1273
+ },
1274
+ {
1275
+ "epoch": 0.07291539689125429,
1276
+ "grad_norm": 0.47107866406440735,
1277
+ "learning_rate": 4.050198807157058e-05,
1278
+ "loss": 0.3277,
1279
+ "step": 8150
1280
+ },
1281
+ {
1282
+ "epoch": 0.07336273061451352,
1283
+ "grad_norm": 0.6409516334533691,
1284
+ "learning_rate": 4.075049701789265e-05,
1285
+ "loss": 0.3141,
1286
+ "step": 8200
1287
+ },
1288
+ {
1289
+ "epoch": 0.07381006433777275,
1290
+ "grad_norm": 0.5527054667472839,
1291
+ "learning_rate": 4.0999005964214716e-05,
1292
+ "loss": 0.3121,
1293
+ "step": 8250
1294
+ },
1295
+ {
1296
+ "epoch": 0.07425739806103197,
1297
+ "grad_norm": 0.5012880563735962,
1298
+ "learning_rate": 4.124751491053678e-05,
1299
+ "loss": 0.3015,
1300
+ "step": 8300
1301
+ },
1302
+ {
1303
+ "epoch": 0.07470473178429121,
1304
+ "grad_norm": 0.44512906670570374,
1305
+ "learning_rate": 4.149602385685885e-05,
1306
+ "loss": 0.3145,
1307
+ "step": 8350
1308
+ },
1309
+ {
1310
+ "epoch": 0.07515206550755044,
1311
+ "grad_norm": 0.5861555337905884,
1312
+ "learning_rate": 4.174453280318092e-05,
1313
+ "loss": 0.3077,
1314
+ "step": 8400
1315
+ },
1316
+ {
1317
+ "epoch": 0.07559939923080966,
1318
+ "grad_norm": 0.5449799299240112,
1319
+ "learning_rate": 4.199304174950298e-05,
1320
+ "loss": 0.3056,
1321
+ "step": 8450
1322
+ },
1323
+ {
1324
+ "epoch": 0.07604673295406889,
1325
+ "grad_norm": 0.6001898646354675,
1326
+ "learning_rate": 4.224155069582505e-05,
1327
+ "loss": 0.3066,
1328
+ "step": 8500
1329
+ },
1330
+ {
1331
+ "epoch": 0.07604673295406889,
1332
+ "eval_loss": 0.3047943115234375,
1333
+ "eval_runtime": 226.5226,
1334
+ "eval_samples_per_second": 39.877,
1335
+ "eval_steps_per_second": 4.988,
1336
+ "step": 8500
1337
  }
1338
  ],
1339
  "logging_steps": 50,
1340
+ "max_steps": 335322,
1341
  "num_input_tokens_seen": 0,
1342
  "num_train_epochs": 3,
1343
  "save_steps": 500,
 
1353
  "attributes": {}
1354
  }
1355
  },
1356
+ "total_flos": 1.0941622124544e+17,
1357
  "train_batch_size": 1,
1358
  "trial_name": null,
1359
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e104d5d3fed57e888a9b365bb5a58dcb175d2c55ee60c594628217a2bfea4cd
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:405d1c439695043016bcffc14d4eaca75fd1542cf769a56c446e9d029109e3c1
3
  size 5432