pszemraj commited on
Commit
53ee6cd
·
verified ·
1 Parent(s): 9e8ed31

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-30000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625b973406cfb09789707dac6792e8677f50f529869cfbfc085d35af787b6b57
3
+ size 1202681712
checkpoints/checkpoint-pt-30000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -587,3 +587,125 @@ Mixed precision type: bf16
587
  [2024-08-10 14:06:20,974][Main][INFO] - [train] Step 26900 out of 80000 | Loss --> 2.107 | Grad_l2 --> 0.358 | Weights_l2 --> 8988.213 | Lr --> 0.007 | Seconds_per_step --> 3.412 |
588
  [2024-08-10 14:09:10,794][Main][INFO] - [train] Step 26950 out of 80000 | Loss --> 2.115 | Grad_l2 --> 0.355 | Weights_l2 --> 8988.873 | Lr --> 0.007 | Seconds_per_step --> 3.396 |
589
  [2024-08-10 14:11:59,378][Main][INFO] - [train] Step 27000 out of 80000 | Loss --> 2.114 | Grad_l2 --> 0.356 | Weights_l2 --> 8989.515 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  [2024-08-10 14:06:20,974][Main][INFO] - [train] Step 26900 out of 80000 | Loss --> 2.107 | Grad_l2 --> 0.358 | Weights_l2 --> 8988.213 | Lr --> 0.007 | Seconds_per_step --> 3.412 |
588
  [2024-08-10 14:09:10,794][Main][INFO] - [train] Step 26950 out of 80000 | Loss --> 2.115 | Grad_l2 --> 0.355 | Weights_l2 --> 8988.873 | Lr --> 0.007 | Seconds_per_step --> 3.396 |
589
  [2024-08-10 14:11:59,378][Main][INFO] - [train] Step 27000 out of 80000 | Loss --> 2.114 | Grad_l2 --> 0.356 | Weights_l2 --> 8989.515 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
590
+ [2024-08-10 14:14:49,113][Main][INFO] - [train] Step 27050 out of 80000 | Loss --> 2.108 | Grad_l2 --> 0.358 | Weights_l2 --> 8990.140 | Lr --> 0.007 | Seconds_per_step --> 3.395 |
591
+ [2024-08-10 14:17:39,074][Main][INFO] - [train] Step 27100 out of 80000 | Loss --> 2.115 | Grad_l2 --> 0.356 | Weights_l2 --> 8990.787 | Lr --> 0.007 | Seconds_per_step --> 3.399 |
592
+ [2024-08-10 14:20:28,802][Main][INFO] - [train] Step 27150 out of 80000 | Loss --> 2.108 | Grad_l2 --> 0.359 | Weights_l2 --> 8991.446 | Lr --> 0.007 | Seconds_per_step --> 3.395 |
593
+ [2024-08-10 14:23:18,386][Main][INFO] - [train] Step 27200 out of 80000 | Loss --> 2.119 | Grad_l2 --> 0.355 | Weights_l2 --> 8992.082 | Lr --> 0.007 | Seconds_per_step --> 3.392 |
594
+ [2024-08-10 14:26:07,414][Main][INFO] - [train] Step 27250 out of 80000 | Loss --> 2.102 | Grad_l2 --> 0.356 | Weights_l2 --> 8992.722 | Lr --> 0.007 | Seconds_per_step --> 3.381 |
595
+ [2024-08-10 14:28:57,816][Main][INFO] - [train] Step 27300 out of 80000 | Loss --> 2.099 | Grad_l2 --> 0.355 | Weights_l2 --> 8993.348 | Lr --> 0.007 | Seconds_per_step --> 3.408 |
596
+ [2024-08-10 14:31:46,143][Main][INFO] - [train] Step 27350 out of 80000 | Loss --> 2.099 | Grad_l2 --> 0.356 | Weights_l2 --> 8993.988 | Lr --> 0.007 | Seconds_per_step --> 3.367 |
597
+ [2024-08-10 14:34:34,674][Main][INFO] - [train] Step 27400 out of 80000 | Loss --> 2.106 | Grad_l2 --> 0.352 | Weights_l2 --> 8994.615 | Lr --> 0.007 | Seconds_per_step --> 3.371 |
598
+ [2024-08-10 14:37:23,575][Main][INFO] - [train] Step 27450 out of 80000 | Loss --> 2.092 | Grad_l2 --> 0.350 | Weights_l2 --> 8995.217 | Lr --> 0.007 | Seconds_per_step --> 3.378 |
599
+ [2024-08-10 14:40:13,983][Main][INFO] - [train] Step 27500 out of 80000 | Loss --> 2.097 | Grad_l2 --> 0.356 | Weights_l2 --> 8995.840 | Lr --> 0.007 | Seconds_per_step --> 3.408 |
600
+ [2024-08-10 14:43:03,622][Main][INFO] - [train] Step 27550 out of 80000 | Loss --> 2.096 | Grad_l2 --> 0.358 | Weights_l2 --> 8996.451 | Lr --> 0.007 | Seconds_per_step --> 3.393 |
601
+ [2024-08-10 14:45:52,968][Main][INFO] - [train] Step 27600 out of 80000 | Loss --> 2.099 | Grad_l2 --> 0.351 | Weights_l2 --> 8997.061 | Lr --> 0.007 | Seconds_per_step --> 3.387 |
602
+ [2024-08-10 14:48:41,052][Main][INFO] - [train] Step 27650 out of 80000 | Loss --> 2.096 | Grad_l2 --> 0.355 | Weights_l2 --> 8997.689 | Lr --> 0.007 | Seconds_per_step --> 3.362 |
603
+ [2024-08-10 14:51:30,840][Main][INFO] - [train] Step 27700 out of 80000 | Loss --> 2.093 | Grad_l2 --> 0.353 | Weights_l2 --> 8998.303 | Lr --> 0.007 | Seconds_per_step --> 3.396 |
604
+ [2024-08-10 14:54:19,140][Main][INFO] - [train] Step 27750 out of 80000 | Loss --> 2.095 | Grad_l2 --> 0.351 | Weights_l2 --> 8998.943 | Lr --> 0.007 | Seconds_per_step --> 3.366 |
605
+ [2024-08-10 14:57:08,364][Main][INFO] - [train] Step 27800 out of 80000 | Loss --> 2.107 | Grad_l2 --> 0.350 | Weights_l2 --> 8999.548 | Lr --> 0.007 | Seconds_per_step --> 3.384 |
606
+ [2024-08-10 14:59:56,874][Main][INFO] - [train] Step 27850 out of 80000 | Loss --> 2.103 | Grad_l2 --> 0.352 | Weights_l2 --> 9000.166 | Lr --> 0.007 | Seconds_per_step --> 3.370 |
607
+ [2024-08-10 15:02:45,245][Main][INFO] - [train] Step 27900 out of 80000 | Loss --> 2.105 | Grad_l2 --> 0.351 | Weights_l2 --> 9000.752 | Lr --> 0.007 | Seconds_per_step --> 3.367 |
608
+ [2024-08-10 15:05:34,878][Main][INFO] - [train] Step 27950 out of 80000 | Loss --> 2.090 | Grad_l2 --> 0.351 | Weights_l2 --> 9001.367 | Lr --> 0.007 | Seconds_per_step --> 3.393 |
609
+ [2024-08-10 15:08:27,943][Main][INFO] - [train] Step 28000 out of 80000 | Loss --> 2.092 | Grad_l2 --> 0.353 | Weights_l2 --> 9001.952 | Lr --> 0.007 | Seconds_per_step --> 3.461 |
610
+ [2024-08-10 15:11:16,686][Main][INFO] - [train] Step 28050 out of 80000 | Loss --> 2.095 | Grad_l2 --> 0.347 | Weights_l2 --> 9002.530 | Lr --> 0.007 | Seconds_per_step --> 3.375 |
611
+ [2024-08-10 15:14:05,413][Main][INFO] - [train] Step 28100 out of 80000 | Loss --> 2.091 | Grad_l2 --> 0.351 | Weights_l2 --> 9003.118 | Lr --> 0.007 | Seconds_per_step --> 3.375 |
612
+ [2024-08-10 15:16:54,575][Main][INFO] - [train] Step 28150 out of 80000 | Loss --> 2.083 | Grad_l2 --> 0.352 | Weights_l2 --> 9003.707 | Lr --> 0.007 | Seconds_per_step --> 3.383 |
613
+ [2024-08-10 15:19:42,657][Main][INFO] - [train] Step 28200 out of 80000 | Loss --> 2.089 | Grad_l2 --> 0.348 | Weights_l2 --> 9004.297 | Lr --> 0.007 | Seconds_per_step --> 3.362 |
614
+ [2024-08-10 15:22:30,942][Main][INFO] - [train] Step 28250 out of 80000 | Loss --> 2.078 | Grad_l2 --> 0.352 | Weights_l2 --> 9004.874 | Lr --> 0.007 | Seconds_per_step --> 3.366 |
615
+ [2024-08-10 15:25:19,094][Main][INFO] - [train] Step 28300 out of 80000 | Loss --> 2.085 | Grad_l2 --> 0.349 | Weights_l2 --> 9005.468 | Lr --> 0.007 | Seconds_per_step --> 3.363 |
616
+ [2024-08-10 15:28:08,763][Main][INFO] - [train] Step 28350 out of 80000 | Loss --> 2.084 | Grad_l2 --> 0.348 | Weights_l2 --> 9006.043 | Lr --> 0.007 | Seconds_per_step --> 3.393 |
617
+ [2024-08-10 15:30:57,721][Main][INFO] - [train] Step 28400 out of 80000 | Loss --> 2.084 | Grad_l2 --> 0.347 | Weights_l2 --> 9006.633 | Lr --> 0.007 | Seconds_per_step --> 3.379 |
618
+ [2024-08-10 15:33:46,251][Main][INFO] - [train] Step 28450 out of 80000 | Loss --> 2.076 | Grad_l2 --> 0.351 | Weights_l2 --> 9007.186 | Lr --> 0.007 | Seconds_per_step --> 3.371 |
619
+ [2024-08-10 15:36:34,855][Main][INFO] - [train] Step 28500 out of 80000 | Loss --> 2.074 | Grad_l2 --> 0.357 | Weights_l2 --> 9007.759 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
620
+ [2024-08-10 15:39:24,573][Main][INFO] - [train] Step 28550 out of 80000 | Loss --> 2.073 | Grad_l2 --> 0.350 | Weights_l2 --> 9008.346 | Lr --> 0.007 | Seconds_per_step --> 3.394 |
621
+ [2024-08-10 15:42:13,485][Main][INFO] - [train] Step 28600 out of 80000 | Loss --> 2.068 | Grad_l2 --> 0.349 | Weights_l2 --> 9008.929 | Lr --> 0.007 | Seconds_per_step --> 3.378 |
622
+ [2024-08-10 15:45:01,743][Main][INFO] - [train] Step 28650 out of 80000 | Loss --> 2.068 | Grad_l2 --> 0.348 | Weights_l2 --> 9009.504 | Lr --> 0.007 | Seconds_per_step --> 3.365 |
623
+ [2024-08-10 15:47:50,368][Main][INFO] - [train] Step 28700 out of 80000 | Loss --> 2.072 | Grad_l2 --> 0.348 | Weights_l2 --> 9010.089 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
624
+ [2024-08-10 15:50:39,698][Main][INFO] - [train] Step 28750 out of 80000 | Loss --> 2.071 | Grad_l2 --> 0.345 | Weights_l2 --> 9010.647 | Lr --> 0.007 | Seconds_per_step --> 3.387 |
625
+ [2024-08-10 15:53:28,221][Main][INFO] - [train] Step 28800 out of 80000 | Loss --> 2.069 | Grad_l2 --> 0.349 | Weights_l2 --> 9011.202 | Lr --> 0.007 | Seconds_per_step --> 3.370 |
626
+ [2024-08-10 15:56:16,912][Main][INFO] - [train] Step 28850 out of 80000 | Loss --> 2.058 | Grad_l2 --> 0.344 | Weights_l2 --> 9011.796 | Lr --> 0.007 | Seconds_per_step --> 3.374 |
627
+ [2024-08-10 15:59:05,519][Main][INFO] - [train] Step 28900 out of 80000 | Loss --> 2.063 | Grad_l2 --> 0.349 | Weights_l2 --> 9012.353 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
628
+ [2024-08-10 16:01:55,321][Main][INFO] - [train] Step 28950 out of 80000 | Loss --> 2.067 | Grad_l2 --> 0.347 | Weights_l2 --> 9012.927 | Lr --> 0.007 | Seconds_per_step --> 3.396 |
629
+ [2024-08-10 16:04:43,967][Main][INFO] - [train] Step 29000 out of 80000 | Loss --> 2.064 | Grad_l2 --> 0.342 | Weights_l2 --> 9013.466 | Lr --> 0.007 | Seconds_per_step --> 3.373 |
630
+ [2024-08-10 16:07:32,200][Main][INFO] - [train] Step 29050 out of 80000 | Loss --> 2.065 | Grad_l2 --> 0.349 | Weights_l2 --> 9014.025 | Lr --> 0.007 | Seconds_per_step --> 3.365 |
631
+ [2024-08-10 16:10:20,259][Main][INFO] - [train] Step 29100 out of 80000 | Loss --> 2.068 | Grad_l2 --> 0.345 | Weights_l2 --> 9014.577 | Lr --> 0.007 | Seconds_per_step --> 3.361 |
632
+ [2024-08-10 16:13:08,836][Main][INFO] - [train] Step 29150 out of 80000 | Loss --> 2.057 | Grad_l2 --> 0.343 | Weights_l2 --> 9015.114 | Lr --> 0.007 | Seconds_per_step --> 3.372 |
633
+ [2024-08-10 16:15:57,855][Main][INFO] - [train] Step 29200 out of 80000 | Loss --> 2.056 | Grad_l2 --> 0.344 | Weights_l2 --> 9015.652 | Lr --> 0.007 | Seconds_per_step --> 3.380 |
634
+ [2024-08-10 16:18:46,380][Main][INFO] - [train] Step 29250 out of 80000 | Loss --> 2.052 | Grad_l2 --> 0.344 | Weights_l2 --> 9016.179 | Lr --> 0.007 | Seconds_per_step --> 3.370 |
635
+ [2024-08-10 16:21:35,062][Main][INFO] - [train] Step 29300 out of 80000 | Loss --> 2.049 | Grad_l2 --> 0.344 | Weights_l2 --> 9016.760 | Lr --> 0.007 | Seconds_per_step --> 3.374 |
636
+ [2024-08-10 16:24:23,369][Main][INFO] - [train] Step 29350 out of 80000 | Loss --> 2.059 | Grad_l2 --> 0.346 | Weights_l2 --> 9017.287 | Lr --> 0.007 | Seconds_per_step --> 3.366 |
637
+ [2024-08-10 16:27:12,911][Main][INFO] - [train] Step 29400 out of 80000 | Loss --> 2.062 | Grad_l2 --> 0.345 | Weights_l2 --> 9017.828 | Lr --> 0.007 | Seconds_per_step --> 3.391 |
638
+ [2024-08-10 16:30:01,803][Main][INFO] - [train] Step 29450 out of 80000 | Loss --> 2.050 | Grad_l2 --> 0.341 | Weights_l2 --> 9018.397 | Lr --> 0.007 | Seconds_per_step --> 3.378 |
639
+ [2024-08-10 16:32:50,350][Main][INFO] - [train] Step 29500 out of 80000 | Loss --> 2.042 | Grad_l2 --> 0.348 | Weights_l2 --> 9018.920 | Lr --> 0.007 | Seconds_per_step --> 3.371 |
640
+ [2024-08-10 16:35:38,777][Main][INFO] - [train] Step 29550 out of 80000 | Loss --> 2.054 | Grad_l2 --> 0.340 | Weights_l2 --> 9019.463 | Lr --> 0.007 | Seconds_per_step --> 3.369 |
641
+ [2024-08-10 16:38:27,972][Main][INFO] - [train] Step 29600 out of 80000 | Loss --> 2.051 | Grad_l2 --> 0.342 | Weights_l2 --> 9020.006 | Lr --> 0.007 | Seconds_per_step --> 3.384 |
642
+ [2024-08-10 16:41:16,224][Main][INFO] - [train] Step 29650 out of 80000 | Loss --> 2.047 | Grad_l2 --> 0.340 | Weights_l2 --> 9020.522 | Lr --> 0.007 | Seconds_per_step --> 3.365 |
643
+ [2024-08-10 16:44:04,496][Main][INFO] - [train] Step 29700 out of 80000 | Loss --> 2.045 | Grad_l2 --> 0.341 | Weights_l2 --> 9021.050 | Lr --> 0.007 | Seconds_per_step --> 3.365 |
644
+ [2024-08-10 16:46:53,269][Main][INFO] - [train] Step 29750 out of 80000 | Loss --> 2.049 | Grad_l2 --> 0.341 | Weights_l2 --> 9021.579 | Lr --> 0.007 | Seconds_per_step --> 3.375 |
645
+ [2024-08-10 16:49:48,011][Main][INFO] - [train] Step 29800 out of 80000 | Loss --> 2.040 | Grad_l2 --> 0.340 | Weights_l2 --> 9022.114 | Lr --> 0.007 | Seconds_per_step --> 3.495 |
646
+ [2024-08-10 16:52:36,978][Main][INFO] - [train] Step 29850 out of 80000 | Loss --> 2.048 | Grad_l2 --> 0.339 | Weights_l2 --> 9022.615 | Lr --> 0.007 | Seconds_per_step --> 3.379 |
647
+ [2024-08-10 16:55:25,738][Main][INFO] - [train] Step 29900 out of 80000 | Loss --> 2.038 | Grad_l2 --> 0.339 | Weights_l2 --> 9023.139 | Lr --> 0.007 | Seconds_per_step --> 3.375 |
648
+ [2024-08-10 16:58:14,275][Main][INFO] - [train] Step 29950 out of 80000 | Loss --> 2.042 | Grad_l2 --> 0.340 | Weights_l2 --> 9023.652 | Lr --> 0.007 | Seconds_per_step --> 3.371 |
649
+ [2024-08-10 17:01:03,660][Main][INFO] - [train] Step 30000 out of 80000 | Loss --> 2.038 | Grad_l2 --> 0.341 | Weights_l2 --> 9024.174 | Lr --> 0.006 | Seconds_per_step --> 3.388 |
650
+ [2024-08-10 17:01:03,660][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-30000
651
+ [2024-08-10 17:01:03,664][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
652
+ [2024-08-10 17:01:05,567][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-30000/model.safetensors
653
+ [2024-08-10 17:01:08,375][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-30000/optimizer.bin
654
+ [2024-08-10 17:01:08,375][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-30000/scheduler.bin
655
+ [2024-08-10 17:01:08,375][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-30000/sampler.bin
656
+ [2024-08-10 17:01:08,375][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-30000/sampler_1.bin
657
+ [2024-08-10 17:01:08,376][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-30000/random_states_0.pkl
658
+ [2024-08-10 17:03:56,367][Main][INFO] - [train] Step 30050 out of 80000 | Loss --> 2.033 | Grad_l2 --> 0.340 | Weights_l2 --> 9024.673 | Lr --> 0.006 | Seconds_per_step --> 3.454 |
659
+ [2024-08-10 17:06:45,781][Main][INFO] - [train] Step 30100 out of 80000 | Loss --> 2.026 | Grad_l2 --> 0.336 | Weights_l2 --> 9025.193 | Lr --> 0.006 | Seconds_per_step --> 3.388 |
660
+ [2024-08-10 17:09:35,059][Main][INFO] - [train] Step 30150 out of 80000 | Loss --> 2.029 | Grad_l2 --> 0.339 | Weights_l2 --> 9025.688 | Lr --> 0.006 | Seconds_per_step --> 3.386 |
661
+ [2024-08-10 17:12:24,449][Main][INFO] - [train] Step 30200 out of 80000 | Loss --> 2.029 | Grad_l2 --> 0.337 | Weights_l2 --> 9026.206 | Lr --> 0.006 | Seconds_per_step --> 3.388 |
662
+ [2024-08-10 17:15:13,224][Main][INFO] - [train] Step 30250 out of 80000 | Loss --> 2.030 | Grad_l2 --> 0.338 | Weights_l2 --> 9026.710 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
663
+ [2024-08-10 17:18:01,694][Main][INFO] - [train] Step 30300 out of 80000 | Loss --> 2.025 | Grad_l2 --> 0.339 | Weights_l2 --> 9027.226 | Lr --> 0.006 | Seconds_per_step --> 3.369 |
664
+ [2024-08-10 17:20:50,461][Main][INFO] - [train] Step 30350 out of 80000 | Loss --> 2.023 | Grad_l2 --> 0.338 | Weights_l2 --> 9027.736 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
665
+ [2024-08-10 17:23:39,856][Main][INFO] - [train] Step 30400 out of 80000 | Loss --> 2.025 | Grad_l2 --> 0.336 | Weights_l2 --> 9028.239 | Lr --> 0.006 | Seconds_per_step --> 3.388 |
666
+ [2024-08-10 17:26:28,410][Main][INFO] - [train] Step 30450 out of 80000 | Loss --> 2.028 | Grad_l2 --> 0.337 | Weights_l2 --> 9028.741 | Lr --> 0.006 | Seconds_per_step --> 3.371 |
667
+ [2024-08-10 17:29:17,353][Main][INFO] - [train] Step 30500 out of 80000 | Loss --> 2.020 | Grad_l2 --> 0.337 | Weights_l2 --> 9029.242 | Lr --> 0.006 | Seconds_per_step --> 3.379 |
668
+ [2024-08-10 17:32:06,838][Main][INFO] - [train] Step 30550 out of 80000 | Loss --> 2.022 | Grad_l2 --> 0.336 | Weights_l2 --> 9029.748 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
669
+ [2024-08-10 17:34:56,133][Main][INFO] - [train] Step 30600 out of 80000 | Loss --> 2.020 | Grad_l2 --> 0.338 | Weights_l2 --> 9030.233 | Lr --> 0.006 | Seconds_per_step --> 3.386 |
670
+ [2024-08-10 17:37:44,605][Main][INFO] - [train] Step 30650 out of 80000 | Loss --> 2.020 | Grad_l2 --> 0.335 | Weights_l2 --> 9030.723 | Lr --> 0.006 | Seconds_per_step --> 3.369 |
671
+ [2024-08-10 17:40:36,273][Main][INFO] - [train] Step 30700 out of 80000 | Loss --> 2.022 | Grad_l2 --> 0.336 | Weights_l2 --> 9031.205 | Lr --> 0.006 | Seconds_per_step --> 3.433 |
672
+ [2024-08-10 17:43:24,760][Main][INFO] - [train] Step 30750 out of 80000 | Loss --> 2.009 | Grad_l2 --> 0.338 | Weights_l2 --> 9031.702 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
673
+ [2024-08-10 17:46:13,103][Main][INFO] - [train] Step 30800 out of 80000 | Loss --> 2.010 | Grad_l2 --> 0.336 | Weights_l2 --> 9032.180 | Lr --> 0.006 | Seconds_per_step --> 3.367 |
674
+ [2024-08-10 17:49:02,171][Main][INFO] - [train] Step 30850 out of 80000 | Loss --> 2.016 | Grad_l2 --> 0.337 | Weights_l2 --> 9032.684 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
675
+ [2024-08-10 17:51:50,797][Main][INFO] - [train] Step 30900 out of 80000 | Loss --> 2.008 | Grad_l2 --> 0.333 | Weights_l2 --> 9033.166 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
676
+ [2024-08-10 17:54:39,186][Main][INFO] - [train] Step 30950 out of 80000 | Loss --> 2.004 | Grad_l2 --> 0.335 | Weights_l2 --> 9033.641 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
677
+ [2024-08-10 17:57:28,172][Main][INFO] - [train] Step 31000 out of 80000 | Loss --> 2.003 | Grad_l2 --> 0.333 | Weights_l2 --> 9034.106 | Lr --> 0.006 | Seconds_per_step --> 3.380 |
678
+ [2024-08-10 18:00:17,752][Main][INFO] - [train] Step 31050 out of 80000 | Loss --> 2.001 | Grad_l2 --> 0.334 | Weights_l2 --> 9034.578 | Lr --> 0.006 | Seconds_per_step --> 3.392 |
679
+ [2024-08-10 18:03:05,833][Main][INFO] - [train] Step 31100 out of 80000 | Loss --> 1.995 | Grad_l2 --> 0.333 | Weights_l2 --> 9035.048 | Lr --> 0.006 | Seconds_per_step --> 3.362 |
680
+ [2024-08-10 18:05:57,136][Main][INFO] - [train] Step 31150 out of 80000 | Loss --> 2.004 | Grad_l2 --> 0.330 | Weights_l2 --> 9035.529 | Lr --> 0.006 | Seconds_per_step --> 3.426 |
681
+ [2024-08-10 18:08:59,939][Main][INFO] - [train] Step 31200 out of 80000 | Loss --> 1.997 | Grad_l2 --> 0.336 | Weights_l2 --> 9036.006 | Lr --> 0.006 | Seconds_per_step --> 3.656 |
682
+ [2024-08-10 18:11:52,395][Main][INFO] - [train] Step 31250 out of 80000 | Loss --> 1.994 | Grad_l2 --> 0.333 | Weights_l2 --> 9036.471 | Lr --> 0.006 | Seconds_per_step --> 3.449 |
683
+ [2024-08-10 18:14:40,969][Main][INFO] - [train] Step 31300 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.333 | Weights_l2 --> 9036.920 | Lr --> 0.006 | Seconds_per_step --> 3.371 |
684
+ [2024-08-10 18:17:29,783][Main][INFO] - [train] Step 31350 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.330 | Weights_l2 --> 9037.357 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
685
+ [2024-08-10 18:20:21,690][Main][INFO] - [train] Step 31400 out of 80000 | Loss --> 2.000 | Grad_l2 --> 0.329 | Weights_l2 --> 9037.811 | Lr --> 0.006 | Seconds_per_step --> 3.438 |
686
+ [2024-08-10 18:23:11,088][Main][INFO] - [train] Step 31450 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.333 | Weights_l2 --> 9038.266 | Lr --> 0.006 | Seconds_per_step --> 3.388 |
687
+ [2024-08-10 18:26:00,153][Main][INFO] - [train] Step 31500 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.330 | Weights_l2 --> 9038.727 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
688
+ [2024-08-10 18:28:53,003][Main][INFO] - [train] Step 31550 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.328 | Weights_l2 --> 9039.170 | Lr --> 0.006 | Seconds_per_step --> 3.457 |
689
+ [2024-08-10 18:32:18,339][Main][INFO] - [train] Step 31600 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.334 | Weights_l2 --> 9039.624 | Lr --> 0.006 | Seconds_per_step --> 4.107 |
690
+ [2024-08-10 18:35:13,944][Main][INFO] - [train] Step 31650 out of 80000 | Loss --> 1.979 | Grad_l2 --> 0.331 | Weights_l2 --> 9040.077 | Lr --> 0.006 | Seconds_per_step --> 3.512 |
691
+ [2024-08-10 18:38:16,891][Main][INFO] - [train] Step 31700 out of 80000 | Loss --> 1.982 | Grad_l2 --> 0.333 | Weights_l2 --> 9040.502 | Lr --> 0.006 | Seconds_per_step --> 3.659 |
692
+ [2024-08-10 18:41:07,083][Main][INFO] - [train] Step 31750 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.330 | Weights_l2 --> 9040.956 | Lr --> 0.006 | Seconds_per_step --> 3.404 |
693
+ [2024-08-10 18:44:09,382][Main][INFO] - [train] Step 31800 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.331 | Weights_l2 --> 9041.401 | Lr --> 0.006 | Seconds_per_step --> 3.646 |
694
+ [2024-08-10 18:47:08,204][Main][INFO] - [train] Step 31850 out of 80000 | Loss --> 1.977 | Grad_l2 --> 0.327 | Weights_l2 --> 9041.844 | Lr --> 0.006 | Seconds_per_step --> 3.576 |
695
+ [2024-08-10 18:49:56,651][Main][INFO] - [train] Step 31900 out of 80000 | Loss --> 1.979 | Grad_l2 --> 0.327 | Weights_l2 --> 9042.293 | Lr --> 0.006 | Seconds_per_step --> 3.369 |
696
+ [2024-08-10 18:52:45,087][Main][INFO] - [train] Step 31950 out of 80000 | Loss --> 1.983 | Grad_l2 --> 0.328 | Weights_l2 --> 9042.741 | Lr --> 0.006 | Seconds_per_step --> 3.369 |
697
+ [2024-08-10 18:55:33,129][Main][INFO] - [train] Step 32000 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.328 | Weights_l2 --> 9043.156 | Lr --> 0.006 | Seconds_per_step --> 3.361 |
698
+ [2024-08-10 18:58:21,966][Main][INFO] - [train] Step 32050 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.332 | Weights_l2 --> 9043.615 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
699
+ [2024-08-10 19:01:10,644][Main][INFO] - [train] Step 32100 out of 80000 | Loss --> 1.971 | Grad_l2 --> 0.336 | Weights_l2 --> 9044.048 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
700
+ [2024-08-10 19:03:59,925][Main][INFO] - [train] Step 32150 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.329 | Weights_l2 --> 9044.462 | Lr --> 0.006 | Seconds_per_step --> 3.386 |
701
+ [2024-08-10 19:06:48,343][Main][INFO] - [train] Step 32200 out of 80000 | Loss --> 1.979 | Grad_l2 --> 0.329 | Weights_l2 --> 9044.883 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
702
+ [2024-08-10 19:09:37,807][Main][INFO] - [train] Step 32250 out of 80000 | Loss --> 1.966 | Grad_l2 --> 0.329 | Weights_l2 --> 9045.314 | Lr --> 0.006 | Seconds_per_step --> 3.389 |
703
+ [2024-08-10 19:12:25,582][Main][INFO] - [train] Step 32300 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.326 | Weights_l2 --> 9045.752 | Lr --> 0.006 | Seconds_per_step --> 3.355 |
704
+ [2024-08-10 19:15:14,152][Main][INFO] - [train] Step 32350 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9046.174 | Lr --> 0.006 | Seconds_per_step --> 3.371 |
705
+ [2024-08-10 19:18:02,528][Main][INFO] - [train] Step 32400 out of 80000 | Loss --> 1.971 | Grad_l2 --> 0.326 | Weights_l2 --> 9046.586 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
706
+ [2024-08-10 19:20:51,357][Main][INFO] - [train] Step 32450 out of 80000 | Loss --> 1.966 | Grad_l2 --> 0.328 | Weights_l2 --> 9046.976 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
707
+ [2024-08-10 19:23:41,166][Main][INFO] - [train] Step 32500 out of 80000 | Loss --> 1.976 | Grad_l2 --> 0.332 | Weights_l2 --> 9047.423 | Lr --> 0.006 | Seconds_per_step --> 3.396 |
708
+ [2024-08-10 19:26:30,513][Main][INFO] - [train] Step 32550 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.325 | Weights_l2 --> 9047.840 | Lr --> 0.006 | Seconds_per_step --> 3.387 |
709
+ [2024-08-10 19:29:18,382][Main][INFO] - [train] Step 32600 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.327 | Weights_l2 --> 9048.263 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
710
+ [2024-08-10 19:32:06,781][Main][INFO] - [train] Step 32650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9048.703 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
711
+ [2024-08-10 19:34:55,944][Main][INFO] - [train] Step 32700 out of 80000 | Loss --> 1.968 | Grad_l2 --> 0.326 | Weights_l2 --> 9049.119 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -526,3 +526,130 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
526
  "2024-08-10 13:29:35,823",26250,2.121,0.36,8979.595,0.007,3.39
527
  "2024-08-10 13:32:25,900",26300,2.117,0.36,8980.264,0.007,3.402
528
  "2024-08-10 13:35:14,993",26350,2.111,0.364,8980.936,0.007,3.382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  "2024-08-10 13:29:35,823",26250,2.121,0.36,8979.595,0.007,3.39
527
  "2024-08-10 13:32:25,900",26300,2.117,0.36,8980.264,0.007,3.402
528
  "2024-08-10 13:35:14,993",26350,2.111,0.364,8980.936,0.007,3.382
529
+ "2024-08-10 13:38:03,528",26400,2.123,0.364,8981.629,0.007,3.371
530
+ "2024-08-10 13:40:55,925",26450,2.111,0.363,8982.292,0.007,3.448
531
+ "2024-08-10 13:43:46,071",26500,2.122,0.36,8982.97,0.007,3.403
532
+ "2024-08-10 13:46:36,135",26550,2.118,0.362,8983.635,0.007,3.401
533
+ "2024-08-10 13:49:25,847",26600,2.119,0.359,8984.271,0.007,3.394
534
+ "2024-08-10 13:52:14,646",26650,2.121,0.359,8984.935,0.007,3.376
535
+ "2024-08-10 13:55:04,238",26700,2.119,0.361,8985.6,0.007,3.392
536
+ "2024-08-10 13:57:52,429",26750,2.117,0.358,8986.254,0.007,3.364
537
+ "2024-08-10 14:00:40,663",26800,2.12,0.358,8986.901,0.007,3.365
538
+ "2024-08-10 14:03:30,358",26850,2.114,0.356,8987.561,0.007,3.394
539
+ "2024-08-10 14:06:20,974",26900,2.107,0.358,8988.213,0.007,3.412
540
+ "2024-08-10 14:09:10,794",26950,2.115,0.355,8988.873,0.007,3.396
541
+ "2024-08-10 14:11:59,378",27000,2.114,0.356,8989.515,0.007,3.372
542
+ "2024-08-10 14:14:49,113",27050,2.108,0.358,8990.14,0.007,3.395
543
+ "2024-08-10 14:17:39,074",27100,2.115,0.356,8990.787,0.007,3.399
544
+ "2024-08-10 14:20:28,802",27150,2.108,0.359,8991.446,0.007,3.395
545
+ "2024-08-10 14:23:18,386",27200,2.119,0.355,8992.082,0.007,3.392
546
+ "2024-08-10 14:26:07,414",27250,2.102,0.356,8992.722,0.007,3.381
547
+ "2024-08-10 14:28:57,816",27300,2.099,0.355,8993.348,0.007,3.408
548
+ "2024-08-10 14:31:46,143",27350,2.099,0.356,8993.988,0.007,3.367
549
+ "2024-08-10 14:34:34,674",27400,2.106,0.352,8994.615,0.007,3.371
550
+ "2024-08-10 14:37:23,575",27450,2.092,0.35,8995.217,0.007,3.378
551
+ "2024-08-10 14:40:13,983",27500,2.097,0.356,8995.84,0.007,3.408
552
+ "2024-08-10 14:43:03,622",27550,2.096,0.358,8996.451,0.007,3.393
553
+ "2024-08-10 14:45:52,968",27600,2.099,0.351,8997.061,0.007,3.387
554
+ "2024-08-10 14:48:41,052",27650,2.096,0.355,8997.689,0.007,3.362
555
+ "2024-08-10 14:51:30,840",27700,2.093,0.353,8998.303,0.007,3.396
556
+ "2024-08-10 14:54:19,140",27750,2.095,0.351,8998.943,0.007,3.366
557
+ "2024-08-10 14:57:08,364",27800,2.107,0.35,8999.548,0.007,3.384
558
+ "2024-08-10 14:59:56,874",27850,2.103,0.352,9000.166,0.007,3.37
559
+ "2024-08-10 15:02:45,245",27900,2.105,0.351,9000.752,0.007,3.367
560
+ "2024-08-10 15:05:34,878",27950,2.09,0.351,9001.367,0.007,3.393
561
+ "2024-08-10 15:08:27,943",28000,2.092,0.353,9001.952,0.007,3.461
562
+ "2024-08-10 15:11:16,686",28050,2.095,0.347,9002.53,0.007,3.375
563
+ "2024-08-10 15:14:05,413",28100,2.091,0.351,9003.118,0.007,3.375
564
+ "2024-08-10 15:16:54,575",28150,2.083,0.352,9003.707,0.007,3.383
565
+ "2024-08-10 15:19:42,657",28200,2.089,0.348,9004.297,0.007,3.362
566
+ "2024-08-10 15:22:30,942",28250,2.078,0.352,9004.874,0.007,3.366
567
+ "2024-08-10 15:25:19,094",28300,2.085,0.349,9005.468,0.007,3.363
568
+ "2024-08-10 15:28:08,763",28350,2.084,0.348,9006.043,0.007,3.393
569
+ "2024-08-10 15:30:57,721",28400,2.084,0.347,9006.633,0.007,3.379
570
+ "2024-08-10 15:33:46,251",28450,2.076,0.351,9007.186,0.007,3.371
571
+ "2024-08-10 15:36:34,855",28500,2.074,0.357,9007.759,0.007,3.372
572
+ "2024-08-10 15:39:24,573",28550,2.073,0.35,9008.346,0.007,3.394
573
+ "2024-08-10 15:42:13,485",28600,2.068,0.349,9008.929,0.007,3.378
574
+ "2024-08-10 15:45:01,743",28650,2.068,0.348,9009.504,0.007,3.365
575
+ "2024-08-10 15:47:50,368",28700,2.072,0.348,9010.089,0.007,3.372
576
+ "2024-08-10 15:50:39,698",28750,2.071,0.345,9010.647,0.007,3.387
577
+ "2024-08-10 15:53:28,221",28800,2.069,0.349,9011.202,0.007,3.37
578
+ "2024-08-10 15:56:16,912",28850,2.058,0.344,9011.796,0.007,3.374
579
+ "2024-08-10 15:59:05,519",28900,2.063,0.349,9012.353,0.007,3.372
580
+ "2024-08-10 16:01:55,321",28950,2.067,0.347,9012.927,0.007,3.396
581
+ "2024-08-10 16:04:43,967",29000,2.064,0.342,9013.466,0.007,3.373
582
+ "2024-08-10 16:07:32,200",29050,2.065,0.349,9014.025,0.007,3.365
583
+ "2024-08-10 16:10:20,259",29100,2.068,0.345,9014.577,0.007,3.361
584
+ "2024-08-10 16:13:08,836",29150,2.057,0.343,9015.114,0.007,3.372
585
+ "2024-08-10 16:15:57,855",29200,2.056,0.344,9015.652,0.007,3.38
586
+ "2024-08-10 16:18:46,380",29250,2.052,0.344,9016.179,0.007,3.37
587
+ "2024-08-10 16:21:35,062",29300,2.049,0.344,9016.76,0.007,3.374
588
+ "2024-08-10 16:24:23,369",29350,2.059,0.346,9017.287,0.007,3.366
589
+ "2024-08-10 16:27:12,911",29400,2.062,0.345,9017.828,0.007,3.391
590
+ "2024-08-10 16:30:01,803",29450,2.05,0.341,9018.397,0.007,3.378
591
+ "2024-08-10 16:32:50,350",29500,2.042,0.348,9018.92,0.007,3.371
592
+ "2024-08-10 16:35:38,777",29550,2.054,0.34,9019.463,0.007,3.369
593
+ "2024-08-10 16:38:27,972",29600,2.051,0.342,9020.006,0.007,3.384
594
+ "2024-08-10 16:41:16,224",29650,2.047,0.34,9020.522,0.007,3.365
595
+ "2024-08-10 16:44:04,496",29700,2.045,0.341,9021.05,0.007,3.365
596
+ "2024-08-10 16:46:53,269",29750,2.049,0.341,9021.579,0.007,3.375
597
+ "2024-08-10 16:49:48,011",29800,2.04,0.34,9022.114,0.007,3.495
598
+ "2024-08-10 16:52:36,978",29850,2.048,0.339,9022.615,0.007,3.379
599
+ "2024-08-10 16:55:25,738",29900,2.038,0.339,9023.139,0.007,3.375
600
+ "2024-08-10 16:58:14,275",29950,2.042,0.34,9023.652,0.007,3.371
601
+ "2024-08-10 17:01:03,660",30000,2.038,0.341,9024.174,0.006,3.388
602
+ "2024-08-10 17:03:56,367",30050,2.033,0.34,9024.673,0.006,3.454
603
+ "2024-08-10 17:06:45,781",30100,2.026,0.336,9025.193,0.006,3.388
604
+ "2024-08-10 17:09:35,059",30150,2.029,0.339,9025.688,0.006,3.386
605
+ "2024-08-10 17:12:24,449",30200,2.029,0.337,9026.206,0.006,3.388
606
+ "2024-08-10 17:15:13,224",30250,2.03,0.338,9026.71,0.006,3.375
607
+ "2024-08-10 17:18:01,694",30300,2.025,0.339,9027.226,0.006,3.369
608
+ "2024-08-10 17:20:50,461",30350,2.023,0.338,9027.736,0.006,3.375
609
+ "2024-08-10 17:23:39,856",30400,2.025,0.336,9028.239,0.006,3.388
610
+ "2024-08-10 17:26:28,410",30450,2.028,0.337,9028.741,0.006,3.371
611
+ "2024-08-10 17:29:17,353",30500,2.02,0.337,9029.242,0.006,3.379
612
+ "2024-08-10 17:32:06,838",30550,2.022,0.336,9029.748,0.006,3.39
613
+ "2024-08-10 17:34:56,133",30600,2.02,0.338,9030.233,0.006,3.386
614
+ "2024-08-10 17:37:44,605",30650,2.02,0.335,9030.723,0.006,3.369
615
+ "2024-08-10 17:40:36,273",30700,2.022,0.336,9031.205,0.006,3.433
616
+ "2024-08-10 17:43:24,760",30750,2.009,0.338,9031.702,0.006,3.37
617
+ "2024-08-10 17:46:13,103",30800,2.01,0.336,9032.18,0.006,3.367
618
+ "2024-08-10 17:49:02,171",30850,2.016,0.337,9032.684,0.006,3.381
619
+ "2024-08-10 17:51:50,797",30900,2.008,0.333,9033.166,0.006,3.373
620
+ "2024-08-10 17:54:39,186",30950,2.004,0.335,9033.641,0.006,3.368
621
+ "2024-08-10 17:57:28,172",31000,2.003,0.333,9034.106,0.006,3.38
622
+ "2024-08-10 18:00:17,752",31050,2.001,0.334,9034.578,0.006,3.392
623
+ "2024-08-10 18:03:05,833",31100,1.995,0.333,9035.048,0.006,3.362
624
+ "2024-08-10 18:05:57,136",31150,2.004,0.33,9035.529,0.006,3.426
625
+ "2024-08-10 18:08:59,939",31200,1.997,0.336,9036.006,0.006,3.656
626
+ "2024-08-10 18:11:52,395",31250,1.994,0.333,9036.471,0.006,3.449
627
+ "2024-08-10 18:14:40,969",31300,1.985,0.333,9036.92,0.006,3.371
628
+ "2024-08-10 18:17:29,783",31350,1.987,0.33,9037.357,0.006,3.376
629
+ "2024-08-10 18:20:21,690",31400,2.0,0.329,9037.811,0.006,3.438
630
+ "2024-08-10 18:23:11,088",31450,1.986,0.333,9038.266,0.006,3.388
631
+ "2024-08-10 18:26:00,153",31500,1.989,0.33,9038.727,0.006,3.381
632
+ "2024-08-10 18:28:53,003",31550,1.992,0.328,9039.17,0.006,3.457
633
+ "2024-08-10 18:32:18,339",31600,1.98,0.334,9039.624,0.006,4.107
634
+ "2024-08-10 18:35:13,944",31650,1.979,0.331,9040.077,0.006,3.512
635
+ "2024-08-10 18:38:16,891",31700,1.982,0.333,9040.502,0.006,3.659
636
+ "2024-08-10 18:41:07,083",31750,1.974,0.33,9040.956,0.006,3.404
637
+ "2024-08-10 18:44:09,382",31800,1.975,0.331,9041.401,0.006,3.646
638
+ "2024-08-10 18:47:08,204",31850,1.977,0.327,9041.844,0.006,3.576
639
+ "2024-08-10 18:49:56,651",31900,1.979,0.327,9042.293,0.006,3.369
640
+ "2024-08-10 18:52:45,087",31950,1.983,0.328,9042.741,0.006,3.369
641
+ "2024-08-10 18:55:33,129",32000,1.972,0.328,9043.156,0.006,3.361
642
+ "2024-08-10 18:58:21,966",32050,1.974,0.332,9043.615,0.006,3.377
643
+ "2024-08-10 19:01:10,644",32100,1.971,0.336,9044.048,0.006,3.374
644
+ "2024-08-10 19:03:59,925",32150,1.975,0.329,9044.462,0.006,3.386
645
+ "2024-08-10 19:06:48,343",32200,1.979,0.329,9044.883,0.006,3.368
646
+ "2024-08-10 19:09:37,807",32250,1.966,0.329,9045.314,0.006,3.389
647
+ "2024-08-10 19:12:25,582",32300,1.975,0.326,9045.752,0.006,3.355
648
+ "2024-08-10 19:15:14,152",32350,1.972,0.332,9046.174,0.006,3.371
649
+ "2024-08-10 19:18:02,528",32400,1.971,0.326,9046.586,0.006,3.368
650
+ "2024-08-10 19:20:51,357",32450,1.966,0.328,9046.976,0.006,3.377
651
+ "2024-08-10 19:23:41,166",32500,1.976,0.332,9047.423,0.006,3.396
652
+ "2024-08-10 19:26:30,513",32550,1.962,0.325,9047.84,0.006,3.387
653
+ "2024-08-10 19:29:18,382",32600,1.957,0.327,9048.263,0.006,3.357
654
+ "2024-08-10 19:32:06,781",32650,1.972,0.332,9048.703,0.006,3.368
655
+ "2024-08-10 19:34:55,944",32700,1.968,0.326,9049.119,0.006,3.383
checkpoints/weights_l2_over_steps.png CHANGED