pszemraj commited on
Commit
a2dab05
·
verified ·
1 Parent(s): 53ee6cd

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-35000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddfcdae4c5fcf1287bc30f6b62841ba8316d8741b93707a462e86a0d18d08ce
3
+ size 1202681712
checkpoints/checkpoint-pt-35000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -709,3 +709,116 @@ Mixed precision type: bf16
709
  [2024-08-10 19:29:18,382][Main][INFO] - [train] Step 32600 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.327 | Weights_l2 --> 9048.263 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
710
  [2024-08-10 19:32:06,781][Main][INFO] - [train] Step 32650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9048.703 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
711
  [2024-08-10 19:34:55,944][Main][INFO] - [train] Step 32700 out of 80000 | Loss --> 1.968 | Grad_l2 --> 0.326 | Weights_l2 --> 9049.119 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  [2024-08-10 19:29:18,382][Main][INFO] - [train] Step 32600 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.327 | Weights_l2 --> 9048.263 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
710
  [2024-08-10 19:32:06,781][Main][INFO] - [train] Step 32650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9048.703 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
711
  [2024-08-10 19:34:55,944][Main][INFO] - [train] Step 32700 out of 80000 | Loss --> 1.968 | Grad_l2 --> 0.326 | Weights_l2 --> 9049.119 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
712
+ [2024-08-10 19:37:44,560][Main][INFO] - [train] Step 32750 out of 80000 | Loss --> 1.971 | Grad_l2 --> 0.331 | Weights_l2 --> 9049.551 | Lr --> 0.006 | Seconds_per_step --> 3.372 |
713
+ [2024-08-10 19:40:33,293][Main][INFO] - [train] Step 32800 out of 80000 | Loss --> 1.967 | Grad_l2 --> 0.327 | Weights_l2 --> 9049.957 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
714
+ [2024-08-10 19:43:22,355][Main][INFO] - [train] Step 32850 out of 80000 | Loss --> 1.964 | Grad_l2 --> 0.328 | Weights_l2 --> 9050.372 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
715
+ [2024-08-10 19:46:12,626][Main][INFO] - [train] Step 32900 out of 80000 | Loss --> 1.964 | Grad_l2 --> 0.326 | Weights_l2 --> 9050.775 | Lr --> 0.006 | Seconds_per_step --> 3.405 |
716
+ [2024-08-10 19:49:01,476][Main][INFO] - [train] Step 32950 out of 80000 | Loss --> 1.965 | Grad_l2 --> 0.327 | Weights_l2 --> 9051.194 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
717
+ [2024-08-10 19:51:50,548][Main][INFO] - [train] Step 33000 out of 80000 | Loss --> 1.960 | Grad_l2 --> 0.325 | Weights_l2 --> 9051.604 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
718
+ [2024-08-10 19:54:38,786][Main][INFO] - [train] Step 33050 out of 80000 | Loss --> 1.958 | Grad_l2 --> 0.326 | Weights_l2 --> 9052.006 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
719
+ [2024-08-10 19:57:28,433][Main][INFO] - [train] Step 33100 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.326 | Weights_l2 --> 9052.404 | Lr --> 0.006 | Seconds_per_step --> 3.393 |
720
+ [2024-08-10 20:00:16,265][Main][INFO] - [train] Step 33150 out of 80000 | Loss --> 1.955 | Grad_l2 --> 0.324 | Weights_l2 --> 9052.805 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
721
+ [2024-08-10 20:03:05,196][Main][INFO] - [train] Step 33200 out of 80000 | Loss --> 1.955 | Grad_l2 --> 0.328 | Weights_l2 --> 9053.209 | Lr --> 0.006 | Seconds_per_step --> 3.379 |
722
+ [2024-08-10 20:05:53,340][Main][INFO] - [train] Step 33250 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.328 | Weights_l2 --> 9053.604 | Lr --> 0.006 | Seconds_per_step --> 3.363 |
723
+ [2024-08-10 20:08:41,826][Main][INFO] - [train] Step 33300 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.325 | Weights_l2 --> 9054.000 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
724
+ [2024-08-10 20:11:30,678][Main][INFO] - [train] Step 33350 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.326 | Weights_l2 --> 9054.393 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
725
+ [2024-08-10 20:14:19,885][Main][INFO] - [train] Step 33400 out of 80000 | Loss --> 1.943 | Grad_l2 --> 0.326 | Weights_l2 --> 9054.797 | Lr --> 0.006 | Seconds_per_step --> 3.384 |
726
+ [2024-08-10 20:17:08,133][Main][INFO] - [train] Step 33450 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.323 | Weights_l2 --> 9055.182 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
727
+ [2024-08-10 20:19:57,277][Main][INFO] - [train] Step 33500 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.323 | Weights_l2 --> 9055.580 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
728
+ [2024-08-10 20:22:45,534][Main][INFO] - [train] Step 33550 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.325 | Weights_l2 --> 9055.965 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
729
+ [2024-08-10 20:25:34,304][Main][INFO] - [train] Step 33600 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.322 | Weights_l2 --> 9056.338 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
730
+ [2024-08-10 20:28:22,563][Main][INFO] - [train] Step 33650 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.326 | Weights_l2 --> 9056.705 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
731
+ [2024-08-10 20:31:11,603][Main][INFO] - [train] Step 33700 out of 80000 | Loss --> 1.956 | Grad_l2 --> 0.323 | Weights_l2 --> 9057.105 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
732
+ [2024-08-10 20:34:00,728][Main][INFO] - [train] Step 33750 out of 80000 | Loss --> 1.942 | Grad_l2 --> 0.324 | Weights_l2 --> 9057.475 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
733
+ [2024-08-10 20:36:50,006][Main][INFO] - [train] Step 33800 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.327 | Weights_l2 --> 9057.854 | Lr --> 0.006 | Seconds_per_step --> 3.386 |
734
+ [2024-08-10 20:39:38,800][Main][INFO] - [train] Step 33850 out of 80000 | Loss --> 1.958 | Grad_l2 --> 0.324 | Weights_l2 --> 9058.234 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
735
+ [2024-08-10 20:42:27,584][Main][INFO] - [train] Step 33900 out of 80000 | Loss --> 1.946 | Grad_l2 --> 0.326 | Weights_l2 --> 9058.594 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
736
+ [2024-08-10 20:45:16,609][Main][INFO] - [train] Step 33950 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.328 | Weights_l2 --> 9058.969 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
737
+ [2024-08-10 20:48:07,179][Main][INFO] - [train] Step 34000 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.324 | Weights_l2 --> 9059.358 | Lr --> 0.006 | Seconds_per_step --> 3.411 |
738
+ [2024-08-10 20:50:56,670][Main][INFO] - [train] Step 34050 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.325 | Weights_l2 --> 9059.744 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
739
+ [2024-08-10 20:53:57,081][Main][INFO] - [train] Step 34100 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.323 | Weights_l2 --> 9060.114 | Lr --> 0.006 | Seconds_per_step --> 3.608 |
740
+ [2024-08-10 20:57:04,451][Main][INFO] - [train] Step 34150 out of 80000 | Loss --> 1.945 | Grad_l2 --> 0.322 | Weights_l2 --> 9060.487 | Lr --> 0.006 | Seconds_per_step --> 3.747 |
741
+ [2024-08-10 20:59:52,938][Main][INFO] - [train] Step 34200 out of 80000 | Loss --> 1.944 | Grad_l2 --> 0.323 | Weights_l2 --> 9060.857 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
742
+ [2024-08-10 21:02:41,472][Main][INFO] - [train] Step 34250 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.327 | Weights_l2 --> 9061.209 | Lr --> 0.006 | Seconds_per_step --> 3.371 |
743
+ [2024-08-10 21:05:30,147][Main][INFO] - [train] Step 34300 out of 80000 | Loss --> 1.943 | Grad_l2 --> 0.323 | Weights_l2 --> 9061.579 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
744
+ [2024-08-10 21:08:19,894][Main][INFO] - [train] Step 34350 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.323 | Weights_l2 --> 9061.930 | Lr --> 0.006 | Seconds_per_step --> 3.395 |
745
+ [2024-08-10 21:11:08,680][Main][INFO] - [train] Step 34400 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.324 | Weights_l2 --> 9062.305 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
746
+ [2024-08-10 21:13:58,108][Main][INFO] - [train] Step 34450 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.323 | Weights_l2 --> 9062.682 | Lr --> 0.006 | Seconds_per_step --> 3.389 |
747
+ [2024-08-10 21:16:57,011][Main][INFO] - [train] Step 34500 out of 80000 | Loss --> 1.953 | Grad_l2 --> 0.322 | Weights_l2 --> 9063.034 | Lr --> 0.006 | Seconds_per_step --> 3.578 |
748
+ [2024-08-10 21:20:03,124][Main][INFO] - [train] Step 34550 out of 80000 | Loss --> 1.956 | Grad_l2 --> 0.323 | Weights_l2 --> 9063.400 | Lr --> 0.006 | Seconds_per_step --> 3.722 |
749
+ [2024-08-10 21:22:52,395][Main][INFO] - [train] Step 34600 out of 80000 | Loss --> 1.951 | Grad_l2 --> 0.322 | Weights_l2 --> 9063.733 | Lr --> 0.006 | Seconds_per_step --> 3.385 |
750
+ [2024-08-10 21:25:41,187][Main][INFO] - [train] Step 34650 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.326 | Weights_l2 --> 9064.106 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
751
+ [2024-08-10 21:28:30,675][Main][INFO] - [train] Step 34700 out of 80000 | Loss --> 1.965 | Grad_l2 --> 0.326 | Weights_l2 --> 9064.465 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
752
+ [2024-08-10 21:31:20,544][Main][INFO] - [train] Step 34750 out of 80000 | Loss --> 1.953 | Grad_l2 --> 0.322 | Weights_l2 --> 9064.833 | Lr --> 0.006 | Seconds_per_step --> 3.397 |
753
+ [2024-08-10 21:34:09,343][Main][INFO] - [train] Step 34800 out of 80000 | Loss --> 1.959 | Grad_l2 --> 0.323 | Weights_l2 --> 9065.209 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
754
+ [2024-08-10 21:37:00,873][Main][INFO] - [train] Step 34850 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.326 | Weights_l2 --> 9065.547 | Lr --> 0.006 | Seconds_per_step --> 3.431 |
755
+ [2024-08-10 21:39:50,852][Main][INFO] - [train] Step 34900 out of 80000 | Loss --> 1.970 | Grad_l2 --> 0.324 | Weights_l2 --> 9065.911 | Lr --> 0.006 | Seconds_per_step --> 3.400 |
756
+ [2024-08-10 21:42:40,224][Main][INFO] - [train] Step 34950 out of 80000 | Loss --> 1.959 | Grad_l2 --> 0.324 | Weights_l2 --> 9066.270 | Lr --> 0.006 | Seconds_per_step --> 3.387 |
757
+ [2024-08-10 21:45:37,170][Main][INFO] - [train] Step 35000 out of 80000 | Loss --> 1.963 | Grad_l2 --> 0.322 | Weights_l2 --> 9066.630 | Lr --> 0.006 | Seconds_per_step --> 3.539 |
758
+ [2024-08-10 21:45:37,170][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-35000
759
+ [2024-08-10 21:45:37,174][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
760
+ [2024-08-10 21:45:39,174][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-35000/model.safetensors
761
+ [2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-35000/optimizer.bin
762
+ [2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-35000/scheduler.bin
763
+ [2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-35000/sampler.bin
764
+ [2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-35000/sampler_1.bin
765
+ [2024-08-10 21:45:41,921][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-35000/random_states_0.pkl
766
+ [2024-08-10 21:48:30,259][Main][INFO] - [train] Step 35050 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.328 | Weights_l2 --> 9066.983 | Lr --> 0.006 | Seconds_per_step --> 3.462 |
767
+ [2024-08-10 21:51:18,768][Main][INFO] - [train] Step 35100 out of 80000 | Loss --> 1.976 | Grad_l2 --> 0.324 | Weights_l2 --> 9067.320 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
768
+ [2024-08-10 21:54:07,440][Main][INFO] - [train] Step 35150 out of 80000 | Loss --> 1.973 | Grad_l2 --> 0.322 | Weights_l2 --> 9067.649 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
769
+ [2024-08-10 21:56:56,909][Main][INFO] - [train] Step 35200 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.327 | Weights_l2 --> 9068.010 | Lr --> 0.006 | Seconds_per_step --> 3.389 |
770
+ [2024-08-10 21:59:46,025][Main][INFO] - [train] Step 35250 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.325 | Weights_l2 --> 9068.355 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
771
+ [2024-08-10 22:02:34,755][Main][INFO] - [train] Step 35300 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.325 | Weights_l2 --> 9068.705 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
772
+ [2024-08-10 22:05:24,758][Main][INFO] - [train] Step 35350 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.327 | Weights_l2 --> 9069.051 | Lr --> 0.006 | Seconds_per_step --> 3.400 |
773
+ [2024-08-10 22:08:13,479][Main][INFO] - [train] Step 35400 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.324 | Weights_l2 --> 9069.396 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
774
+ [2024-08-10 22:11:02,521][Main][INFO] - [train] Step 35450 out of 80000 | Loss --> 1.969 | Grad_l2 --> 0.323 | Weights_l2 --> 9069.720 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
775
+ [2024-08-10 22:13:50,439][Main][INFO] - [train] Step 35500 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.322 | Weights_l2 --> 9070.047 | Lr --> 0.006 | Seconds_per_step --> 3.358 |
776
+ [2024-08-10 22:16:39,106][Main][INFO] - [train] Step 35550 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.324 | Weights_l2 --> 9070.389 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
777
+ [2024-08-10 22:19:27,802][Main][INFO] - [train] Step 35600 out of 80000 | Loss --> 1.979 | Grad_l2 --> 0.327 | Weights_l2 --> 9070.728 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
778
+ [2024-08-10 22:22:16,572][Main][INFO] - [train] Step 35650 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.325 | Weights_l2 --> 9071.049 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
779
+ [2024-08-10 22:25:04,796][Main][INFO] - [train] Step 35700 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.325 | Weights_l2 --> 9071.379 | Lr --> 0.006 | Seconds_per_step --> 3.364 |
780
+ [2024-08-10 22:27:51,592][Main][INFO] - [train] Step 35750 out of 80000 | Loss --> 1.990 | Grad_l2 --> 0.322 | Weights_l2 --> 9071.695 | Lr --> 0.006 | Seconds_per_step --> 3.336 |
781
+ [2024-08-10 22:30:40,355][Main][INFO] - [train] Step 35800 out of 80000 | Loss --> 1.978 | Grad_l2 --> 0.323 | Weights_l2 --> 9072.031 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
782
+ [2024-08-10 22:33:28,954][Main][INFO] - [train] Step 35850 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.320 | Weights_l2 --> 9072.335 | Lr --> 0.006 | Seconds_per_step --> 3.372 |
783
+ [2024-08-10 22:36:18,068][Main][INFO] - [train] Step 35900 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.324 | Weights_l2 --> 9072.654 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
784
+ [2024-08-10 22:39:07,221][Main][INFO] - [train] Step 35950 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.321 | Weights_l2 --> 9072.975 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
785
+ [2024-08-10 22:41:56,739][Main][INFO] - [train] Step 36000 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.320 | Weights_l2 --> 9073.295 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
786
+ [2024-08-10 22:44:45,142][Main][INFO] - [train] Step 36050 out of 80000 | Loss --> 1.982 | Grad_l2 --> 0.324 | Weights_l2 --> 9073.610 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
787
+ [2024-08-10 22:47:33,362][Main][INFO] - [train] Step 36100 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.321 | Weights_l2 --> 9073.917 | Lr --> 0.006 | Seconds_per_step --> 3.364 |
788
+ [2024-08-10 22:50:22,054][Main][INFO] - [train] Step 36150 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.323 | Weights_l2 --> 9074.221 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
789
+ [2024-08-10 22:53:11,318][Main][INFO] - [train] Step 36200 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.326 | Weights_l2 --> 9074.538 | Lr --> 0.006 | Seconds_per_step --> 3.385 |
790
+ [2024-08-10 22:55:59,443][Main][INFO] - [train] Step 36250 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.323 | Weights_l2 --> 9074.854 | Lr --> 0.006 | Seconds_per_step --> 3.363 |
791
+ [2024-08-10 22:58:47,752][Main][INFO] - [train] Step 36300 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.322 | Weights_l2 --> 9075.159 | Lr --> 0.006 | Seconds_per_step --> 3.366 |
792
+ [2024-08-10 23:01:36,450][Main][INFO] - [train] Step 36350 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.320 | Weights_l2 --> 9075.441 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
793
+ [2024-08-10 23:04:25,574][Main][INFO] - [train] Step 36400 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.321 | Weights_l2 --> 9075.748 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
794
+ [2024-08-10 23:07:14,442][Main][INFO] - [train] Step 36450 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.319 | Weights_l2 --> 9076.036 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
795
+ [2024-08-10 23:10:04,537][Main][INFO] - [train] Step 36500 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.323 | Weights_l2 --> 9076.326 | Lr --> 0.005 | Seconds_per_step --> 3.402 |
796
+ [2024-08-10 23:12:52,857][Main][INFO] - [train] Step 36550 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.325 | Weights_l2 --> 9076.642 | Lr --> 0.005 | Seconds_per_step --> 3.366 |
797
+ [2024-08-10 23:15:41,926][Main][INFO] - [train] Step 36600 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.938 | Lr --> 0.005 | Seconds_per_step --> 3.381 |
798
+ [2024-08-10 23:18:31,293][Main][INFO] - [train] Step 36650 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.321 | Weights_l2 --> 9077.225 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
799
+ [2024-08-10 23:21:20,030][Main][INFO] - [train] Step 36700 out of 80000 | Loss --> 1.996 | Grad_l2 --> 0.318 | Weights_l2 --> 9077.501 | Lr --> 0.005 | Seconds_per_step --> 3.375 |
800
+ [2024-08-10 23:24:08,655][Main][INFO] - [train] Step 36750 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.321 | Weights_l2 --> 9077.785 | Lr --> 0.005 | Seconds_per_step --> 3.372 |
801
+ [2024-08-10 23:26:59,534][Main][INFO] - [train] Step 36800 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.326 | Weights_l2 --> 9078.068 | Lr --> 0.005 | Seconds_per_step --> 3.418 |
802
+ [2024-08-10 23:29:48,887][Main][INFO] - [train] Step 36850 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.353 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
803
+ [2024-08-10 23:32:38,332][Main][INFO] - [train] Step 36900 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.633 | Lr --> 0.005 | Seconds_per_step --> 3.389 |
804
+ [2024-08-10 23:35:27,561][Main][INFO] - [train] Step 36950 out of 80000 | Loss --> 1.997 | Grad_l2 --> 0.322 | Weights_l2 --> 9078.910 | Lr --> 0.005 | Seconds_per_step --> 3.385 |
805
+ [2024-08-10 23:38:17,143][Main][INFO] - [train] Step 37000 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.319 | Weights_l2 --> 9079.184 | Lr --> 0.005 | Seconds_per_step --> 3.392 |
806
+ [2024-08-10 23:41:05,847][Main][INFO] - [train] Step 37050 out of 80000 | Loss --> 1.978 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.459 | Lr --> 0.005 | Seconds_per_step --> 3.374 |
807
+ [2024-08-10 23:43:54,811][Main][INFO] - [train] Step 37100 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.320 | Weights_l2 --> 9079.752 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
808
+ [2024-08-10 23:46:43,768][Main][INFO] - [train] Step 37150 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.321 | Weights_l2 --> 9080.034 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
809
+ [2024-08-10 23:49:32,160][Main][INFO] - [train] Step 37200 out of 80000 | Loss --> 1.994 | Grad_l2 --> 0.323 | Weights_l2 --> 9080.289 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
810
+ [2024-08-10 23:52:20,720][Main][INFO] - [train] Step 37250 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.315 | Weights_l2 --> 9080.560 | Lr --> 0.005 | Seconds_per_step --> 3.371 |
811
+ [2024-08-10 23:55:08,868][Main][INFO] - [train] Step 37300 out of 80000 | Loss --> 1.996 | Grad_l2 --> 0.321 | Weights_l2 --> 9080.819 | Lr --> 0.005 | Seconds_per_step --> 3.363 |
812
+ [2024-08-10 23:57:58,146][Main][INFO] - [train] Step 37350 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.084 | Lr --> 0.005 | Seconds_per_step --> 3.386 |
813
+ [2024-08-11 00:00:46,523][Main][INFO] - [train] Step 37400 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.347 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
814
+ [2024-08-11 00:03:35,784][Main][INFO] - [train] Step 37450 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.604 | Lr --> 0.005 | Seconds_per_step --> 3.385 |
815
+ [2024-08-11 00:06:24,722][Main][INFO] - [train] Step 37500 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.851 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
816
+ [2024-08-11 00:09:13,944][Main][INFO] - [train] Step 37550 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.319 | Weights_l2 --> 9082.097 | Lr --> 0.005 | Seconds_per_step --> 3.384 |
817
+ [2024-08-11 00:12:02,407][Main][INFO] - [train] Step 37600 out of 80000 | Loss --> 1.995 | Grad_l2 --> 0.316 | Weights_l2 --> 9082.342 | Lr --> 0.005 | Seconds_per_step --> 3.369 |
818
+ [2024-08-11 00:14:51,432][Main][INFO] - [train] Step 37650 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.318 | Weights_l2 --> 9082.588 | Lr --> 0.005 | Seconds_per_step --> 3.380 |
819
+ [2024-08-11 00:17:39,935][Main][INFO] - [train] Step 37700 out of 80000 | Loss --> 1.994 | Grad_l2 --> 0.315 | Weights_l2 --> 9082.847 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
820
+ [2024-08-11 00:20:28,827][Main][INFO] - [train] Step 37750 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.095 | Lr --> 0.005 | Seconds_per_step --> 3.378 |
821
+ [2024-08-11 00:23:16,528][Main][INFO] - [train] Step 37800 out of 80000 | Loss --> 1.995 | Grad_l2 --> 0.318 | Weights_l2 --> 9083.336 | Lr --> 0.005 | Seconds_per_step --> 3.354 |
822
+ [2024-08-11 00:26:04,576][Main][INFO] - [train] Step 37850 out of 80000 | Loss --> 1.993 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.585 | Lr --> 0.005 | Seconds_per_step --> 3.361 |
823
+ [2024-08-11 00:28:53,091][Main][INFO] - [train] Step 37900 out of 80000 | Loss --> 1.991 | Grad_l2 --> 0.317 | Weights_l2 --> 9083.812 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
824
+ [2024-08-11 00:31:41,481][Main][INFO] - [train] Step 37950 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.061 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -653,3 +653,107 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
653
  "2024-08-10 19:29:18,382",32600,1.957,0.327,9048.263,0.006,3.357
654
  "2024-08-10 19:32:06,781",32650,1.972,0.332,9048.703,0.006,3.368
655
  "2024-08-10 19:34:55,944",32700,1.968,0.326,9049.119,0.006,3.383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  "2024-08-10 19:29:18,382",32600,1.957,0.327,9048.263,0.006,3.357
654
  "2024-08-10 19:32:06,781",32650,1.972,0.332,9048.703,0.006,3.368
655
  "2024-08-10 19:34:55,944",32700,1.968,0.326,9049.119,0.006,3.383
656
+ "2024-08-10 19:37:44,560",32750,1.971,0.331,9049.551,0.006,3.372
657
+ "2024-08-10 19:40:33,293",32800,1.967,0.327,9049.957,0.006,3.375
658
+ "2024-08-10 19:43:22,355",32850,1.964,0.328,9050.372,0.006,3.381
659
+ "2024-08-10 19:46:12,626",32900,1.964,0.326,9050.775,0.006,3.405
660
+ "2024-08-10 19:49:01,476",32950,1.965,0.327,9051.194,0.006,3.377
661
+ "2024-08-10 19:51:50,548",33000,1.96,0.325,9051.604,0.006,3.381
662
+ "2024-08-10 19:54:38,786",33050,1.958,0.326,9052.006,0.006,3.365
663
+ "2024-08-10 19:57:28,433",33100,1.957,0.326,9052.404,0.006,3.393
664
+ "2024-08-10 20:00:16,265",33150,1.955,0.324,9052.805,0.006,3.357
665
+ "2024-08-10 20:03:05,196",33200,1.955,0.328,9053.209,0.006,3.379
666
+ "2024-08-10 20:05:53,340",33250,1.949,0.328,9053.604,0.006,3.363
667
+ "2024-08-10 20:08:41,826",33300,1.952,0.325,9054.0,0.006,3.37
668
+ "2024-08-10 20:11:30,678",33350,1.948,0.326,9054.393,0.006,3.377
669
+ "2024-08-10 20:14:19,885",33400,1.943,0.326,9054.797,0.006,3.384
670
+ "2024-08-10 20:17:08,133",33450,1.952,0.323,9055.182,0.006,3.365
671
+ "2024-08-10 20:19:57,277",33500,1.954,0.323,9055.58,0.006,3.383
672
+ "2024-08-10 20:22:45,534",33550,1.948,0.325,9055.965,0.006,3.365
673
+ "2024-08-10 20:25:34,304",33600,1.952,0.322,9056.338,0.006,3.375
674
+ "2024-08-10 20:28:22,563",33650,1.948,0.326,9056.705,0.006,3.365
675
+ "2024-08-10 20:31:11,603",33700,1.956,0.323,9057.105,0.006,3.381
676
+ "2024-08-10 20:34:00,728",33750,1.942,0.324,9057.475,0.006,3.383
677
+ "2024-08-10 20:36:50,006",33800,1.954,0.327,9057.854,0.006,3.386
678
+ "2024-08-10 20:39:38,800",33850,1.958,0.324,9058.234,0.006,3.376
679
+ "2024-08-10 20:42:27,584",33900,1.946,0.326,9058.594,0.006,3.376
680
+ "2024-08-10 20:45:16,609",33950,1.95,0.328,9058.969,0.006,3.381
681
+ "2024-08-10 20:48:07,179",34000,1.962,0.324,9059.358,0.006,3.411
682
+ "2024-08-10 20:50:56,670",34050,1.948,0.325,9059.744,0.006,3.39
683
+ "2024-08-10 20:53:57,081",34100,1.95,0.323,9060.114,0.006,3.608
684
+ "2024-08-10 20:57:04,451",34150,1.945,0.322,9060.487,0.006,3.747
685
+ "2024-08-10 20:59:52,938",34200,1.944,0.323,9060.857,0.006,3.37
686
+ "2024-08-10 21:02:41,472",34250,1.948,0.327,9061.209,0.006,3.371
687
+ "2024-08-10 21:05:30,147",34300,1.943,0.323,9061.579,0.006,3.373
688
+ "2024-08-10 21:08:19,894",34350,1.949,0.323,9061.93,0.006,3.395
689
+ "2024-08-10 21:11:08,680",34400,1.954,0.324,9062.305,0.006,3.376
690
+ "2024-08-10 21:13:58,108",34450,1.95,0.323,9062.682,0.006,3.389
691
+ "2024-08-10 21:16:57,011",34500,1.953,0.322,9063.034,0.006,3.578
692
+ "2024-08-10 21:20:03,124",34550,1.956,0.323,9063.4,0.006,3.722
693
+ "2024-08-10 21:22:52,395",34600,1.951,0.322,9063.733,0.006,3.385
694
+ "2024-08-10 21:25:41,187",34650,1.949,0.326,9064.106,0.006,3.376
695
+ "2024-08-10 21:28:30,675",34700,1.965,0.326,9064.465,0.006,3.39
696
+ "2024-08-10 21:31:20,544",34750,1.953,0.322,9064.833,0.006,3.397
697
+ "2024-08-10 21:34:09,343",34800,1.959,0.323,9065.209,0.006,3.376
698
+ "2024-08-10 21:37:00,873",34850,1.962,0.326,9065.547,0.006,3.431
699
+ "2024-08-10 21:39:50,852",34900,1.97,0.324,9065.911,0.006,3.4
700
+ "2024-08-10 21:42:40,224",34950,1.959,0.324,9066.27,0.006,3.387
701
+ "2024-08-10 21:45:37,170",35000,1.963,0.322,9066.63,0.006,3.539
702
+ "2024-08-10 21:48:30,259",35050,1.962,0.328,9066.983,0.006,3.462
703
+ "2024-08-10 21:51:18,768",35100,1.976,0.324,9067.32,0.006,3.37
704
+ "2024-08-10 21:54:07,440",35150,1.973,0.322,9067.649,0.006,3.373
705
+ "2024-08-10 21:56:56,909",35200,1.974,0.327,9068.01,0.006,3.389
706
+ "2024-08-10 21:59:46,025",35250,1.975,0.325,9068.355,0.006,3.382
707
+ "2024-08-10 22:02:34,755",35300,1.974,0.325,9068.705,0.006,3.375
708
+ "2024-08-10 22:05:24,758",35350,1.972,0.327,9069.051,0.006,3.4
709
+ "2024-08-10 22:08:13,479",35400,1.974,0.324,9069.396,0.006,3.374
710
+ "2024-08-10 22:11:02,521",35450,1.969,0.323,9069.72,0.006,3.381
711
+ "2024-08-10 22:13:50,439",35500,1.972,0.322,9070.047,0.006,3.358
712
+ "2024-08-10 22:16:39,106",35550,1.985,0.324,9070.389,0.006,3.373
713
+ "2024-08-10 22:19:27,802",35600,1.979,0.327,9070.728,0.006,3.374
714
+ "2024-08-10 22:22:16,572",35650,1.981,0.325,9071.049,0.006,3.375
715
+ "2024-08-10 22:25:04,796",35700,1.984,0.325,9071.379,0.006,3.364
716
+ "2024-08-10 22:27:51,592",35750,1.99,0.322,9071.695,0.006,3.336
717
+ "2024-08-10 22:30:40,355",35800,1.978,0.323,9072.031,0.006,3.375
718
+ "2024-08-10 22:33:28,954",35850,1.988,0.32,9072.335,0.006,3.372
719
+ "2024-08-10 22:36:18,068",35900,1.985,0.324,9072.654,0.006,3.382
720
+ "2024-08-10 22:39:07,221",35950,1.987,0.321,9072.975,0.006,3.383
721
+ "2024-08-10 22:41:56,739",36000,1.981,0.32,9073.295,0.006,3.39
722
+ "2024-08-10 22:44:45,142",36050,1.982,0.324,9073.61,0.006,3.368
723
+ "2024-08-10 22:47:33,362",36100,1.981,0.321,9073.917,0.006,3.364
724
+ "2024-08-10 22:50:22,054",36150,1.984,0.323,9074.221,0.006,3.374
725
+ "2024-08-10 22:53:11,318",36200,1.989,0.326,9074.538,0.006,3.385
726
+ "2024-08-10 22:55:59,443",36250,1.98,0.323,9074.854,0.006,3.363
727
+ "2024-08-10 22:58:47,752",36300,1.98,0.322,9075.159,0.006,3.366
728
+ "2024-08-10 23:01:36,450",36350,1.992,0.32,9075.441,0.006,3.374
729
+ "2024-08-10 23:04:25,574",36400,1.985,0.321,9075.748,0.006,3.382
730
+ "2024-08-10 23:07:14,442",36450,1.987,0.319,9076.036,0.006,3.377
731
+ "2024-08-10 23:10:04,537",36500,1.992,0.323,9076.326,0.005,3.402
732
+ "2024-08-10 23:12:52,857",36550,1.986,0.325,9076.642,0.005,3.366
733
+ "2024-08-10 23:15:41,926",36600,1.989,0.322,9076.938,0.005,3.381
734
+ "2024-08-10 23:18:31,293",36650,1.986,0.321,9077.225,0.005,3.387
735
+ "2024-08-10 23:21:20,030",36700,1.996,0.318,9077.501,0.005,3.375
736
+ "2024-08-10 23:24:08,655",36750,1.985,0.321,9077.785,0.005,3.372
737
+ "2024-08-10 23:26:59,534",36800,1.989,0.326,9078.068,0.005,3.418
738
+ "2024-08-10 23:29:48,887",36850,1.988,0.321,9078.353,0.005,3.387
739
+ "2024-08-10 23:32:38,332",36900,1.989,0.321,9078.633,0.005,3.389
740
+ "2024-08-10 23:35:27,561",36950,1.997,0.322,9078.91,0.005,3.385
741
+ "2024-08-10 23:38:17,143",37000,1.987,0.319,9079.184,0.005,3.392
742
+ "2024-08-10 23:41:05,847",37050,1.978,0.318,9079.459,0.005,3.374
743
+ "2024-08-10 23:43:54,811",37100,1.989,0.32,9079.752,0.005,3.379
744
+ "2024-08-10 23:46:43,768",37150,1.987,0.321,9080.034,0.005,3.379
745
+ "2024-08-10 23:49:32,160",37200,1.994,0.323,9080.289,0.005,3.368
746
+ "2024-08-10 23:52:20,720",37250,1.986,0.315,9080.56,0.005,3.371
747
+ "2024-08-10 23:55:08,868",37300,1.996,0.321,9080.819,0.005,3.363
748
+ "2024-08-10 23:57:58,146",37350,1.98,0.318,9081.084,0.005,3.386
749
+ "2024-08-11 00:00:46,523",37400,1.985,0.318,9081.347,0.005,3.368
750
+ "2024-08-11 00:03:35,784",37450,1.988,0.318,9081.604,0.005,3.385
751
+ "2024-08-11 00:06:24,722",37500,1.987,0.318,9081.851,0.005,3.379
752
+ "2024-08-11 00:09:13,944",37550,1.98,0.319,9082.097,0.005,3.384
753
+ "2024-08-11 00:12:02,407",37600,1.995,0.316,9082.342,0.005,3.369
754
+ "2024-08-11 00:14:51,432",37650,1.992,0.318,9082.588,0.005,3.38
755
+ "2024-08-11 00:17:39,935",37700,1.994,0.315,9082.847,0.005,3.37
756
+ "2024-08-11 00:20:28,827",37750,1.987,0.32,9083.095,0.005,3.378
757
+ "2024-08-11 00:23:16,528",37800,1.995,0.318,9083.336,0.005,3.354
758
+ "2024-08-11 00:26:04,576",37850,1.993,0.32,9083.585,0.005,3.361
759
+ "2024-08-11 00:28:53,091",37900,1.991,0.317,9083.812,0.005,3.37
checkpoints/weights_l2_over_steps.png CHANGED