HiAmNear commited on
Commit
590ad7e
·
verified ·
1 Parent(s): 9524e4c

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3c64ce65f09d500bf0cfae9c6075a92da4f200db8fbfbab3524bc47aa8f4f3
3
  size 540033204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff33ba1981fd23c9a70194ad44249bad6efd85e3f45f87477a5679eb64fba9d6
3
  size 540033204
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e55ca99603e17f395aaf1f9b2636edb297cd89ef45162c1dd9b3edd9d5f8e92
3
  size 1080178938
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce259efefbd0a669e092d069f8b7d0abeebece2ba65572da40dbc9cb4f2c7f00
3
  size 1080178938
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed735e3f99132bb65fd1fb7add5b0b7a7d8becc0ddeb5d97dfd35319e0195378
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b884a31436e5a926c6a4b2ab322392a191669d607ca1d807ba3a04c0729344f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e763bac2f027799c42d03eb657c76226e9c49c441f5c57ed896d4ba286bc8c37
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1a5a0490d673c1b146e0e96e5387b572aaad76740cc2aa549f781d311acc2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -519,6 +519,70 @@
519
  "eval_samples_per_second": 29.974,
520
  "eval_steps_per_second": 0.307,
521
  "step": 64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  }
523
  ],
524
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.0,
5
  "eval_steps": 500,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
519
  "eval_samples_per_second": 29.974,
520
  "eval_steps_per_second": 0.307,
521
  "step": 64
522
+ },
523
+ {
524
+ "epoch": 8.125,
525
+ "grad_norm": 48.37006759643555,
526
+ "learning_rate": 1.0207286432160806e-07,
527
+ "loss": 3.8137,
528
+ "step": 65
529
+ },
530
+ {
531
+ "epoch": 8.25,
532
+ "grad_norm": 47.9218635559082,
533
+ "learning_rate": 1.0364321608040202e-07,
534
+ "loss": 3.7862,
535
+ "step": 66
536
+ },
537
+ {
538
+ "epoch": 8.375,
539
+ "grad_norm": 48.484378814697266,
540
+ "learning_rate": 1.05213567839196e-07,
541
+ "loss": 3.789,
542
+ "step": 67
543
+ },
544
+ {
545
+ "epoch": 8.5,
546
+ "grad_norm": 48.121212005615234,
547
+ "learning_rate": 1.0678391959798996e-07,
548
+ "loss": 3.7609,
549
+ "step": 68
550
+ },
551
+ {
552
+ "epoch": 8.625,
553
+ "grad_norm": 48.182281494140625,
554
+ "learning_rate": 1.0835427135678393e-07,
555
+ "loss": 3.7594,
556
+ "step": 69
557
+ },
558
+ {
559
+ "epoch": 8.75,
560
+ "grad_norm": 48.09780502319336,
561
+ "learning_rate": 1.099246231155779e-07,
562
+ "loss": 3.6978,
563
+ "step": 70
564
+ },
565
+ {
566
+ "epoch": 8.875,
567
+ "grad_norm": 47.829345703125,
568
+ "learning_rate": 1.1149497487437187e-07,
569
+ "loss": 3.7101,
570
+ "step": 71
571
+ },
572
+ {
573
+ "epoch": 9.0,
574
+ "grad_norm": 47.34735107421875,
575
+ "learning_rate": 1.1306532663316584e-07,
576
+ "loss": 3.6672,
577
+ "step": 72
578
+ },
579
+ {
580
+ "epoch": 9.0,
581
+ "eval_loss": 3.3924500942230225,
582
+ "eval_runtime": 42.4081,
583
+ "eval_samples_per_second": 32.211,
584
+ "eval_steps_per_second": 0.33,
585
+ "step": 72
586
  }
587
  ],
588
  "logging_steps": 1,