sandernotenbaert commited on
Commit
5f24d3a
·
verified ·
1 Parent(s): 8583143

Training in progress, step 9500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a965268f22c4e79d69dbf95ac68df18131ac4a28a88a5d81432e568fd272e719
3
  size 1783055976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412f4a8e77fea4f91ace0373bc44858601f6fee74390562d707e1b0da5b734e5
3
  size 1783055976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b417a00b6bce03f65479ff59686f2d226987f315fc5c6151693c6b7ec367ec9
3
  size 3566173562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2671390599f002f23fed81ebff0d7fc668bf97b893c07fdecb013bd970be245a
3
  size 3566173562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a2aa7bca84d2b8e7dd4e04a286714ba9169af11c46c739950a52df4c45259f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82be7d57b17f6620866913ecd4ff8b7c828f1f4bbbb44f8426b657efc6ab9fa7
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb8278e710e71ee2631a6ab6373f5d2bf3048e7d5a86c6ca348c577a5e8dd794
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c32cb22d4c11005c45aa3db5dc6b95531a493184497cbddb0017d1404b4885b5
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:695726e815b6f66e1dd703432b138d1f45b6d5a74f2a6894ef9031edc67f71e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb61bd1c310810ff79b63ed62b4ac17e06120cede8cab76720775afb5619a32e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08052007018666119,
6
  "eval_steps": 500,
7
- "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1412,6 +1412,84 @@
1412
  "eval_samples_per_second": 39.984,
1413
  "eval_steps_per_second": 5.002,
1414
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1415
  }
1416
  ],
1417
  "logging_steps": 50,
@@ -1431,7 +1509,7 @@
1431
  "attributes": {}
1432
  }
1433
  },
1434
- "total_flos": 1.31299465494528e+17,
1435
  "train_batch_size": 1,
1436
  "trial_name": null,
1437
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08499340741925347,
6
  "eval_steps": 500,
7
+ "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1412
  "eval_samples_per_second": 39.984,
1413
  "eval_steps_per_second": 5.002,
1414
  "step": 9000
1415
+ },
1416
+ {
1417
+ "epoch": 0.08096740390992041,
1418
+ "grad_norm": 0.4358559548854828,
1419
+ "learning_rate": 4.49751491053678e-05,
1420
+ "loss": 0.2933,
1421
+ "step": 9050
1422
+ },
1423
+ {
1424
+ "epoch": 0.08141473763317963,
1425
+ "grad_norm": 0.4459361732006073,
1426
+ "learning_rate": 4.5223658051689864e-05,
1427
+ "loss": 0.293,
1428
+ "step": 9100
1429
+ },
1430
+ {
1431
+ "epoch": 0.08186207135643886,
1432
+ "grad_norm": 0.5491781234741211,
1433
+ "learning_rate": 4.547216699801193e-05,
1434
+ "loss": 0.2874,
1435
+ "step": 9150
1436
+ },
1437
+ {
1438
+ "epoch": 0.0823094050796981,
1439
+ "grad_norm": 0.38162198662757874,
1440
+ "learning_rate": 4.5720675944333996e-05,
1441
+ "loss": 0.2864,
1442
+ "step": 9200
1443
+ },
1444
+ {
1445
+ "epoch": 0.08275673880295732,
1446
+ "grad_norm": 0.39891770482063293,
1447
+ "learning_rate": 4.596918489065606e-05,
1448
+ "loss": 0.3036,
1449
+ "step": 9250
1450
+ },
1451
+ {
1452
+ "epoch": 0.08320407252621655,
1453
+ "grad_norm": 0.461846262216568,
1454
+ "learning_rate": 4.6217693836978135e-05,
1455
+ "loss": 0.2906,
1456
+ "step": 9300
1457
+ },
1458
+ {
1459
+ "epoch": 0.08365140624947578,
1460
+ "grad_norm": 0.4161681830883026,
1461
+ "learning_rate": 4.64662027833002e-05,
1462
+ "loss": 0.2825,
1463
+ "step": 9350
1464
+ },
1465
+ {
1466
+ "epoch": 0.08409873997273501,
1467
+ "grad_norm": 0.41477343440055847,
1468
+ "learning_rate": 4.671471172962227e-05,
1469
+ "loss": 0.2899,
1470
+ "step": 9400
1471
+ },
1472
+ {
1473
+ "epoch": 0.08454607369599423,
1474
+ "grad_norm": 0.3996387720108032,
1475
+ "learning_rate": 4.696322067594434e-05,
1476
+ "loss": 0.2806,
1477
+ "step": 9450
1478
+ },
1479
+ {
1480
+ "epoch": 0.08499340741925347,
1481
+ "grad_norm": 0.3911084830760956,
1482
+ "learning_rate": 4.7211729622266405e-05,
1483
+ "loss": 0.283,
1484
+ "step": 9500
1485
+ },
1486
+ {
1487
+ "epoch": 0.08499340741925347,
1488
+ "eval_loss": 0.2821110785007477,
1489
+ "eval_runtime": 226.0823,
1490
+ "eval_samples_per_second": 39.954,
1491
+ "eval_steps_per_second": 4.998,
1492
+ "step": 9500
1493
  }
1494
  ],
1495
  "logging_steps": 50,
 
1509
  "attributes": {}
1510
  }
1511
  },
1512
+ "total_flos": 1.53182709743616e+17,
1513
  "train_batch_size": 1,
1514
  "trial_name": null,
1515
  "trial_params": null