Upload folder using huggingface_hub
Browse files- checkpoints/checkpoint-pt-35000/model.safetensors +3 -0
- checkpoints/checkpoint-pt-35000/random_states_0.pkl +3 -0
- checkpoints/grad_l2_over_steps.png +0 -0
- checkpoints/loss_over_steps.png +0 -0
- checkpoints/lr_over_steps.png +0 -0
- checkpoints/main.log +113 -0
- checkpoints/seconds_per_step_over_steps.png +0 -0
- checkpoints/training_metrics.csv +104 -0
- checkpoints/weights_l2_over_steps.png +0 -0
checkpoints/checkpoint-pt-35000/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ddfcdae4c5fcf1287bc30f6b62841ba8316d8741b93707a462e86a0d18d08ce
|
3 |
+
size 1202681712
|
checkpoints/checkpoint-pt-35000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
|
3 |
+
size 14344
|
checkpoints/grad_l2_over_steps.png
CHANGED
checkpoints/loss_over_steps.png
CHANGED
checkpoints/lr_over_steps.png
CHANGED
checkpoints/main.log
CHANGED
@@ -709,3 +709,116 @@ Mixed precision type: bf16
|
|
709 |
[2024-08-10 19:29:18,382][Main][INFO] - [train] Step 32600 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.327 | Weights_l2 --> 9048.263 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
|
710 |
[2024-08-10 19:32:06,781][Main][INFO] - [train] Step 32650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9048.703 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
|
711 |
[2024-08-10 19:34:55,944][Main][INFO] - [train] Step 32700 out of 80000 | Loss --> 1.968 | Grad_l2 --> 0.326 | Weights_l2 --> 9049.119 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
709 |
[2024-08-10 19:29:18,382][Main][INFO] - [train] Step 32600 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.327 | Weights_l2 --> 9048.263 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
|
710 |
[2024-08-10 19:32:06,781][Main][INFO] - [train] Step 32650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.332 | Weights_l2 --> 9048.703 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
|
711 |
[2024-08-10 19:34:55,944][Main][INFO] - [train] Step 32700 out of 80000 | Loss --> 1.968 | Grad_l2 --> 0.326 | Weights_l2 --> 9049.119 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
|
712 |
+
[2024-08-10 19:37:44,560][Main][INFO] - [train] Step 32750 out of 80000 | Loss --> 1.971 | Grad_l2 --> 0.331 | Weights_l2 --> 9049.551 | Lr --> 0.006 | Seconds_per_step --> 3.372 |
|
713 |
+
[2024-08-10 19:40:33,293][Main][INFO] - [train] Step 32800 out of 80000 | Loss --> 1.967 | Grad_l2 --> 0.327 | Weights_l2 --> 9049.957 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
|
714 |
+
[2024-08-10 19:43:22,355][Main][INFO] - [train] Step 32850 out of 80000 | Loss --> 1.964 | Grad_l2 --> 0.328 | Weights_l2 --> 9050.372 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
|
715 |
+
[2024-08-10 19:46:12,626][Main][INFO] - [train] Step 32900 out of 80000 | Loss --> 1.964 | Grad_l2 --> 0.326 | Weights_l2 --> 9050.775 | Lr --> 0.006 | Seconds_per_step --> 3.405 |
|
716 |
+
[2024-08-10 19:49:01,476][Main][INFO] - [train] Step 32950 out of 80000 | Loss --> 1.965 | Grad_l2 --> 0.327 | Weights_l2 --> 9051.194 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
|
717 |
+
[2024-08-10 19:51:50,548][Main][INFO] - [train] Step 33000 out of 80000 | Loss --> 1.960 | Grad_l2 --> 0.325 | Weights_l2 --> 9051.604 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
|
718 |
+
[2024-08-10 19:54:38,786][Main][INFO] - [train] Step 33050 out of 80000 | Loss --> 1.958 | Grad_l2 --> 0.326 | Weights_l2 --> 9052.006 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
|
719 |
+
[2024-08-10 19:57:28,433][Main][INFO] - [train] Step 33100 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.326 | Weights_l2 --> 9052.404 | Lr --> 0.006 | Seconds_per_step --> 3.393 |
|
720 |
+
[2024-08-10 20:00:16,265][Main][INFO] - [train] Step 33150 out of 80000 | Loss --> 1.955 | Grad_l2 --> 0.324 | Weights_l2 --> 9052.805 | Lr --> 0.006 | Seconds_per_step --> 3.357 |
|
721 |
+
[2024-08-10 20:03:05,196][Main][INFO] - [train] Step 33200 out of 80000 | Loss --> 1.955 | Grad_l2 --> 0.328 | Weights_l2 --> 9053.209 | Lr --> 0.006 | Seconds_per_step --> 3.379 |
|
722 |
+
[2024-08-10 20:05:53,340][Main][INFO] - [train] Step 33250 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.328 | Weights_l2 --> 9053.604 | Lr --> 0.006 | Seconds_per_step --> 3.363 |
|
723 |
+
[2024-08-10 20:08:41,826][Main][INFO] - [train] Step 33300 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.325 | Weights_l2 --> 9054.000 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
|
724 |
+
[2024-08-10 20:11:30,678][Main][INFO] - [train] Step 33350 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.326 | Weights_l2 --> 9054.393 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
|
725 |
+
[2024-08-10 20:14:19,885][Main][INFO] - [train] Step 33400 out of 80000 | Loss --> 1.943 | Grad_l2 --> 0.326 | Weights_l2 --> 9054.797 | Lr --> 0.006 | Seconds_per_step --> 3.384 |
|
726 |
+
[2024-08-10 20:17:08,133][Main][INFO] - [train] Step 33450 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.323 | Weights_l2 --> 9055.182 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
|
727 |
+
[2024-08-10 20:19:57,277][Main][INFO] - [train] Step 33500 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.323 | Weights_l2 --> 9055.580 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
|
728 |
+
[2024-08-10 20:22:45,534][Main][INFO] - [train] Step 33550 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.325 | Weights_l2 --> 9055.965 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
|
729 |
+
[2024-08-10 20:25:34,304][Main][INFO] - [train] Step 33600 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.322 | Weights_l2 --> 9056.338 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
|
730 |
+
[2024-08-10 20:28:22,563][Main][INFO] - [train] Step 33650 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.326 | Weights_l2 --> 9056.705 | Lr --> 0.006 | Seconds_per_step --> 3.365 |
|
731 |
+
[2024-08-10 20:31:11,603][Main][INFO] - [train] Step 33700 out of 80000 | Loss --> 1.956 | Grad_l2 --> 0.323 | Weights_l2 --> 9057.105 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
|
732 |
+
[2024-08-10 20:34:00,728][Main][INFO] - [train] Step 33750 out of 80000 | Loss --> 1.942 | Grad_l2 --> 0.324 | Weights_l2 --> 9057.475 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
|
733 |
+
[2024-08-10 20:36:50,006][Main][INFO] - [train] Step 33800 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.327 | Weights_l2 --> 9057.854 | Lr --> 0.006 | Seconds_per_step --> 3.386 |
|
734 |
+
[2024-08-10 20:39:38,800][Main][INFO] - [train] Step 33850 out of 80000 | Loss --> 1.958 | Grad_l2 --> 0.324 | Weights_l2 --> 9058.234 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
|
735 |
+
[2024-08-10 20:42:27,584][Main][INFO] - [train] Step 33900 out of 80000 | Loss --> 1.946 | Grad_l2 --> 0.326 | Weights_l2 --> 9058.594 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
|
736 |
+
[2024-08-10 20:45:16,609][Main][INFO] - [train] Step 33950 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.328 | Weights_l2 --> 9058.969 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
|
737 |
+
[2024-08-10 20:48:07,179][Main][INFO] - [train] Step 34000 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.324 | Weights_l2 --> 9059.358 | Lr --> 0.006 | Seconds_per_step --> 3.411 |
|
738 |
+
[2024-08-10 20:50:56,670][Main][INFO] - [train] Step 34050 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.325 | Weights_l2 --> 9059.744 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
|
739 |
+
[2024-08-10 20:53:57,081][Main][INFO] - [train] Step 34100 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.323 | Weights_l2 --> 9060.114 | Lr --> 0.006 | Seconds_per_step --> 3.608 |
|
740 |
+
[2024-08-10 20:57:04,451][Main][INFO] - [train] Step 34150 out of 80000 | Loss --> 1.945 | Grad_l2 --> 0.322 | Weights_l2 --> 9060.487 | Lr --> 0.006 | Seconds_per_step --> 3.747 |
|
741 |
+
[2024-08-10 20:59:52,938][Main][INFO] - [train] Step 34200 out of 80000 | Loss --> 1.944 | Grad_l2 --> 0.323 | Weights_l2 --> 9060.857 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
|
742 |
+
[2024-08-10 21:02:41,472][Main][INFO] - [train] Step 34250 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.327 | Weights_l2 --> 9061.209 | Lr --> 0.006 | Seconds_per_step --> 3.371 |
|
743 |
+
[2024-08-10 21:05:30,147][Main][INFO] - [train] Step 34300 out of 80000 | Loss --> 1.943 | Grad_l2 --> 0.323 | Weights_l2 --> 9061.579 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
|
744 |
+
[2024-08-10 21:08:19,894][Main][INFO] - [train] Step 34350 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.323 | Weights_l2 --> 9061.930 | Lr --> 0.006 | Seconds_per_step --> 3.395 |
|
745 |
+
[2024-08-10 21:11:08,680][Main][INFO] - [train] Step 34400 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.324 | Weights_l2 --> 9062.305 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
|
746 |
+
[2024-08-10 21:13:58,108][Main][INFO] - [train] Step 34450 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.323 | Weights_l2 --> 9062.682 | Lr --> 0.006 | Seconds_per_step --> 3.389 |
|
747 |
+
[2024-08-10 21:16:57,011][Main][INFO] - [train] Step 34500 out of 80000 | Loss --> 1.953 | Grad_l2 --> 0.322 | Weights_l2 --> 9063.034 | Lr --> 0.006 | Seconds_per_step --> 3.578 |
|
748 |
+
[2024-08-10 21:20:03,124][Main][INFO] - [train] Step 34550 out of 80000 | Loss --> 1.956 | Grad_l2 --> 0.323 | Weights_l2 --> 9063.400 | Lr --> 0.006 | Seconds_per_step --> 3.722 |
|
749 |
+
[2024-08-10 21:22:52,395][Main][INFO] - [train] Step 34600 out of 80000 | Loss --> 1.951 | Grad_l2 --> 0.322 | Weights_l2 --> 9063.733 | Lr --> 0.006 | Seconds_per_step --> 3.385 |
|
750 |
+
[2024-08-10 21:25:41,187][Main][INFO] - [train] Step 34650 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.326 | Weights_l2 --> 9064.106 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
|
751 |
+
[2024-08-10 21:28:30,675][Main][INFO] - [train] Step 34700 out of 80000 | Loss --> 1.965 | Grad_l2 --> 0.326 | Weights_l2 --> 9064.465 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
|
752 |
+
[2024-08-10 21:31:20,544][Main][INFO] - [train] Step 34750 out of 80000 | Loss --> 1.953 | Grad_l2 --> 0.322 | Weights_l2 --> 9064.833 | Lr --> 0.006 | Seconds_per_step --> 3.397 |
|
753 |
+
[2024-08-10 21:34:09,343][Main][INFO] - [train] Step 34800 out of 80000 | Loss --> 1.959 | Grad_l2 --> 0.323 | Weights_l2 --> 9065.209 | Lr --> 0.006 | Seconds_per_step --> 3.376 |
|
754 |
+
[2024-08-10 21:37:00,873][Main][INFO] - [train] Step 34850 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.326 | Weights_l2 --> 9065.547 | Lr --> 0.006 | Seconds_per_step --> 3.431 |
|
755 |
+
[2024-08-10 21:39:50,852][Main][INFO] - [train] Step 34900 out of 80000 | Loss --> 1.970 | Grad_l2 --> 0.324 | Weights_l2 --> 9065.911 | Lr --> 0.006 | Seconds_per_step --> 3.400 |
|
756 |
+
[2024-08-10 21:42:40,224][Main][INFO] - [train] Step 34950 out of 80000 | Loss --> 1.959 | Grad_l2 --> 0.324 | Weights_l2 --> 9066.270 | Lr --> 0.006 | Seconds_per_step --> 3.387 |
|
757 |
+
[2024-08-10 21:45:37,170][Main][INFO] - [train] Step 35000 out of 80000 | Loss --> 1.963 | Grad_l2 --> 0.322 | Weights_l2 --> 9066.630 | Lr --> 0.006 | Seconds_per_step --> 3.539 |
|
758 |
+
[2024-08-10 21:45:37,170][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-35000
|
759 |
+
[2024-08-10 21:45:37,174][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
760 |
+
[2024-08-10 21:45:39,174][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-35000/model.safetensors
|
761 |
+
[2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-35000/optimizer.bin
|
762 |
+
[2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-35000/scheduler.bin
|
763 |
+
[2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-35000/sampler.bin
|
764 |
+
[2024-08-10 21:45:41,920][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-35000/sampler_1.bin
|
765 |
+
[2024-08-10 21:45:41,921][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-35000/random_states_0.pkl
|
766 |
+
[2024-08-10 21:48:30,259][Main][INFO] - [train] Step 35050 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.328 | Weights_l2 --> 9066.983 | Lr --> 0.006 | Seconds_per_step --> 3.462 |
|
767 |
+
[2024-08-10 21:51:18,768][Main][INFO] - [train] Step 35100 out of 80000 | Loss --> 1.976 | Grad_l2 --> 0.324 | Weights_l2 --> 9067.320 | Lr --> 0.006 | Seconds_per_step --> 3.370 |
|
768 |
+
[2024-08-10 21:54:07,440][Main][INFO] - [train] Step 35150 out of 80000 | Loss --> 1.973 | Grad_l2 --> 0.322 | Weights_l2 --> 9067.649 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
|
769 |
+
[2024-08-10 21:56:56,909][Main][INFO] - [train] Step 35200 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.327 | Weights_l2 --> 9068.010 | Lr --> 0.006 | Seconds_per_step --> 3.389 |
|
770 |
+
[2024-08-10 21:59:46,025][Main][INFO] - [train] Step 35250 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.325 | Weights_l2 --> 9068.355 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
|
771 |
+
[2024-08-10 22:02:34,755][Main][INFO] - [train] Step 35300 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.325 | Weights_l2 --> 9068.705 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
|
772 |
+
[2024-08-10 22:05:24,758][Main][INFO] - [train] Step 35350 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.327 | Weights_l2 --> 9069.051 | Lr --> 0.006 | Seconds_per_step --> 3.400 |
|
773 |
+
[2024-08-10 22:08:13,479][Main][INFO] - [train] Step 35400 out of 80000 | Loss --> 1.974 | Grad_l2 --> 0.324 | Weights_l2 --> 9069.396 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
|
774 |
+
[2024-08-10 22:11:02,521][Main][INFO] - [train] Step 35450 out of 80000 | Loss --> 1.969 | Grad_l2 --> 0.323 | Weights_l2 --> 9069.720 | Lr --> 0.006 | Seconds_per_step --> 3.381 |
|
775 |
+
[2024-08-10 22:13:50,439][Main][INFO] - [train] Step 35500 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.322 | Weights_l2 --> 9070.047 | Lr --> 0.006 | Seconds_per_step --> 3.358 |
|
776 |
+
[2024-08-10 22:16:39,106][Main][INFO] - [train] Step 35550 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.324 | Weights_l2 --> 9070.389 | Lr --> 0.006 | Seconds_per_step --> 3.373 |
|
777 |
+
[2024-08-10 22:19:27,802][Main][INFO] - [train] Step 35600 out of 80000 | Loss --> 1.979 | Grad_l2 --> 0.327 | Weights_l2 --> 9070.728 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
|
778 |
+
[2024-08-10 22:22:16,572][Main][INFO] - [train] Step 35650 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.325 | Weights_l2 --> 9071.049 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
|
779 |
+
[2024-08-10 22:25:04,796][Main][INFO] - [train] Step 35700 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.325 | Weights_l2 --> 9071.379 | Lr --> 0.006 | Seconds_per_step --> 3.364 |
|
780 |
+
[2024-08-10 22:27:51,592][Main][INFO] - [train] Step 35750 out of 80000 | Loss --> 1.990 | Grad_l2 --> 0.322 | Weights_l2 --> 9071.695 | Lr --> 0.006 | Seconds_per_step --> 3.336 |
|
781 |
+
[2024-08-10 22:30:40,355][Main][INFO] - [train] Step 35800 out of 80000 | Loss --> 1.978 | Grad_l2 --> 0.323 | Weights_l2 --> 9072.031 | Lr --> 0.006 | Seconds_per_step --> 3.375 |
|
782 |
+
[2024-08-10 22:33:28,954][Main][INFO] - [train] Step 35850 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.320 | Weights_l2 --> 9072.335 | Lr --> 0.006 | Seconds_per_step --> 3.372 |
|
783 |
+
[2024-08-10 22:36:18,068][Main][INFO] - [train] Step 35900 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.324 | Weights_l2 --> 9072.654 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
|
784 |
+
[2024-08-10 22:39:07,221][Main][INFO] - [train] Step 35950 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.321 | Weights_l2 --> 9072.975 | Lr --> 0.006 | Seconds_per_step --> 3.383 |
|
785 |
+
[2024-08-10 22:41:56,739][Main][INFO] - [train] Step 36000 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.320 | Weights_l2 --> 9073.295 | Lr --> 0.006 | Seconds_per_step --> 3.390 |
|
786 |
+
[2024-08-10 22:44:45,142][Main][INFO] - [train] Step 36050 out of 80000 | Loss --> 1.982 | Grad_l2 --> 0.324 | Weights_l2 --> 9073.610 | Lr --> 0.006 | Seconds_per_step --> 3.368 |
|
787 |
+
[2024-08-10 22:47:33,362][Main][INFO] - [train] Step 36100 out of 80000 | Loss --> 1.981 | Grad_l2 --> 0.321 | Weights_l2 --> 9073.917 | Lr --> 0.006 | Seconds_per_step --> 3.364 |
|
788 |
+
[2024-08-10 22:50:22,054][Main][INFO] - [train] Step 36150 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.323 | Weights_l2 --> 9074.221 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
|
789 |
+
[2024-08-10 22:53:11,318][Main][INFO] - [train] Step 36200 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.326 | Weights_l2 --> 9074.538 | Lr --> 0.006 | Seconds_per_step --> 3.385 |
|
790 |
+
[2024-08-10 22:55:59,443][Main][INFO] - [train] Step 36250 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.323 | Weights_l2 --> 9074.854 | Lr --> 0.006 | Seconds_per_step --> 3.363 |
|
791 |
+
[2024-08-10 22:58:47,752][Main][INFO] - [train] Step 36300 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.322 | Weights_l2 --> 9075.159 | Lr --> 0.006 | Seconds_per_step --> 3.366 |
|
792 |
+
[2024-08-10 23:01:36,450][Main][INFO] - [train] Step 36350 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.320 | Weights_l2 --> 9075.441 | Lr --> 0.006 | Seconds_per_step --> 3.374 |
|
793 |
+
[2024-08-10 23:04:25,574][Main][INFO] - [train] Step 36400 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.321 | Weights_l2 --> 9075.748 | Lr --> 0.006 | Seconds_per_step --> 3.382 |
|
794 |
+
[2024-08-10 23:07:14,442][Main][INFO] - [train] Step 36450 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.319 | Weights_l2 --> 9076.036 | Lr --> 0.006 | Seconds_per_step --> 3.377 |
|
795 |
+
[2024-08-10 23:10:04,537][Main][INFO] - [train] Step 36500 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.323 | Weights_l2 --> 9076.326 | Lr --> 0.005 | Seconds_per_step --> 3.402 |
|
796 |
+
[2024-08-10 23:12:52,857][Main][INFO] - [train] Step 36550 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.325 | Weights_l2 --> 9076.642 | Lr --> 0.005 | Seconds_per_step --> 3.366 |
|
797 |
+
[2024-08-10 23:15:41,926][Main][INFO] - [train] Step 36600 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.938 | Lr --> 0.005 | Seconds_per_step --> 3.381 |
|
798 |
+
[2024-08-10 23:18:31,293][Main][INFO] - [train] Step 36650 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.321 | Weights_l2 --> 9077.225 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
|
799 |
+
[2024-08-10 23:21:20,030][Main][INFO] - [train] Step 36700 out of 80000 | Loss --> 1.996 | Grad_l2 --> 0.318 | Weights_l2 --> 9077.501 | Lr --> 0.005 | Seconds_per_step --> 3.375 |
|
800 |
+
[2024-08-10 23:24:08,655][Main][INFO] - [train] Step 36750 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.321 | Weights_l2 --> 9077.785 | Lr --> 0.005 | Seconds_per_step --> 3.372 |
|
801 |
+
[2024-08-10 23:26:59,534][Main][INFO] - [train] Step 36800 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.326 | Weights_l2 --> 9078.068 | Lr --> 0.005 | Seconds_per_step --> 3.418 |
|
802 |
+
[2024-08-10 23:29:48,887][Main][INFO] - [train] Step 36850 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.353 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
|
803 |
+
[2024-08-10 23:32:38,332][Main][INFO] - [train] Step 36900 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.633 | Lr --> 0.005 | Seconds_per_step --> 3.389 |
|
804 |
+
[2024-08-10 23:35:27,561][Main][INFO] - [train] Step 36950 out of 80000 | Loss --> 1.997 | Grad_l2 --> 0.322 | Weights_l2 --> 9078.910 | Lr --> 0.005 | Seconds_per_step --> 3.385 |
|
805 |
+
[2024-08-10 23:38:17,143][Main][INFO] - [train] Step 37000 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.319 | Weights_l2 --> 9079.184 | Lr --> 0.005 | Seconds_per_step --> 3.392 |
|
806 |
+
[2024-08-10 23:41:05,847][Main][INFO] - [train] Step 37050 out of 80000 | Loss --> 1.978 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.459 | Lr --> 0.005 | Seconds_per_step --> 3.374 |
|
807 |
+
[2024-08-10 23:43:54,811][Main][INFO] - [train] Step 37100 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.320 | Weights_l2 --> 9079.752 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
|
808 |
+
[2024-08-10 23:46:43,768][Main][INFO] - [train] Step 37150 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.321 | Weights_l2 --> 9080.034 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
|
809 |
+
[2024-08-10 23:49:32,160][Main][INFO] - [train] Step 37200 out of 80000 | Loss --> 1.994 | Grad_l2 --> 0.323 | Weights_l2 --> 9080.289 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
|
810 |
+
[2024-08-10 23:52:20,720][Main][INFO] - [train] Step 37250 out of 80000 | Loss --> 1.986 | Grad_l2 --> 0.315 | Weights_l2 --> 9080.560 | Lr --> 0.005 | Seconds_per_step --> 3.371 |
|
811 |
+
[2024-08-10 23:55:08,868][Main][INFO] - [train] Step 37300 out of 80000 | Loss --> 1.996 | Grad_l2 --> 0.321 | Weights_l2 --> 9080.819 | Lr --> 0.005 | Seconds_per_step --> 3.363 |
|
812 |
+
[2024-08-10 23:57:58,146][Main][INFO] - [train] Step 37350 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.084 | Lr --> 0.005 | Seconds_per_step --> 3.386 |
|
813 |
+
[2024-08-11 00:00:46,523][Main][INFO] - [train] Step 37400 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.347 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
|
814 |
+
[2024-08-11 00:03:35,784][Main][INFO] - [train] Step 37450 out of 80000 | Loss --> 1.988 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.604 | Lr --> 0.005 | Seconds_per_step --> 3.385 |
|
815 |
+
[2024-08-11 00:06:24,722][Main][INFO] - [train] Step 37500 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.318 | Weights_l2 --> 9081.851 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
|
816 |
+
[2024-08-11 00:09:13,944][Main][INFO] - [train] Step 37550 out of 80000 | Loss --> 1.980 | Grad_l2 --> 0.319 | Weights_l2 --> 9082.097 | Lr --> 0.005 | Seconds_per_step --> 3.384 |
|
817 |
+
[2024-08-11 00:12:02,407][Main][INFO] - [train] Step 37600 out of 80000 | Loss --> 1.995 | Grad_l2 --> 0.316 | Weights_l2 --> 9082.342 | Lr --> 0.005 | Seconds_per_step --> 3.369 |
|
818 |
+
[2024-08-11 00:14:51,432][Main][INFO] - [train] Step 37650 out of 80000 | Loss --> 1.992 | Grad_l2 --> 0.318 | Weights_l2 --> 9082.588 | Lr --> 0.005 | Seconds_per_step --> 3.380 |
|
819 |
+
[2024-08-11 00:17:39,935][Main][INFO] - [train] Step 37700 out of 80000 | Loss --> 1.994 | Grad_l2 --> 0.315 | Weights_l2 --> 9082.847 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
|
820 |
+
[2024-08-11 00:20:28,827][Main][INFO] - [train] Step 37750 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.095 | Lr --> 0.005 | Seconds_per_step --> 3.378 |
|
821 |
+
[2024-08-11 00:23:16,528][Main][INFO] - [train] Step 37800 out of 80000 | Loss --> 1.995 | Grad_l2 --> 0.318 | Weights_l2 --> 9083.336 | Lr --> 0.005 | Seconds_per_step --> 3.354 |
|
822 |
+
[2024-08-11 00:26:04,576][Main][INFO] - [train] Step 37850 out of 80000 | Loss --> 1.993 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.585 | Lr --> 0.005 | Seconds_per_step --> 3.361 |
|
823 |
+
[2024-08-11 00:28:53,091][Main][INFO] - [train] Step 37900 out of 80000 | Loss --> 1.991 | Grad_l2 --> 0.317 | Weights_l2 --> 9083.812 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
|
824 |
+
[2024-08-11 00:31:41,481][Main][INFO] - [train] Step 37950 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.061 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
|
checkpoints/seconds_per_step_over_steps.png
CHANGED
checkpoints/training_metrics.csv
CHANGED
@@ -653,3 +653,107 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
|
|
653 |
"2024-08-10 19:29:18,382",32600,1.957,0.327,9048.263,0.006,3.357
|
654 |
"2024-08-10 19:32:06,781",32650,1.972,0.332,9048.703,0.006,3.368
|
655 |
"2024-08-10 19:34:55,944",32700,1.968,0.326,9049.119,0.006,3.383
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
653 |
"2024-08-10 19:29:18,382",32600,1.957,0.327,9048.263,0.006,3.357
|
654 |
"2024-08-10 19:32:06,781",32650,1.972,0.332,9048.703,0.006,3.368
|
655 |
"2024-08-10 19:34:55,944",32700,1.968,0.326,9049.119,0.006,3.383
|
656 |
+
"2024-08-10 19:37:44,560",32750,1.971,0.331,9049.551,0.006,3.372
|
657 |
+
"2024-08-10 19:40:33,293",32800,1.967,0.327,9049.957,0.006,3.375
|
658 |
+
"2024-08-10 19:43:22,355",32850,1.964,0.328,9050.372,0.006,3.381
|
659 |
+
"2024-08-10 19:46:12,626",32900,1.964,0.326,9050.775,0.006,3.405
|
660 |
+
"2024-08-10 19:49:01,476",32950,1.965,0.327,9051.194,0.006,3.377
|
661 |
+
"2024-08-10 19:51:50,548",33000,1.96,0.325,9051.604,0.006,3.381
|
662 |
+
"2024-08-10 19:54:38,786",33050,1.958,0.326,9052.006,0.006,3.365
|
663 |
+
"2024-08-10 19:57:28,433",33100,1.957,0.326,9052.404,0.006,3.393
|
664 |
+
"2024-08-10 20:00:16,265",33150,1.955,0.324,9052.805,0.006,3.357
|
665 |
+
"2024-08-10 20:03:05,196",33200,1.955,0.328,9053.209,0.006,3.379
|
666 |
+
"2024-08-10 20:05:53,340",33250,1.949,0.328,9053.604,0.006,3.363
|
667 |
+
"2024-08-10 20:08:41,826",33300,1.952,0.325,9054.0,0.006,3.37
|
668 |
+
"2024-08-10 20:11:30,678",33350,1.948,0.326,9054.393,0.006,3.377
|
669 |
+
"2024-08-10 20:14:19,885",33400,1.943,0.326,9054.797,0.006,3.384
|
670 |
+
"2024-08-10 20:17:08,133",33450,1.952,0.323,9055.182,0.006,3.365
|
671 |
+
"2024-08-10 20:19:57,277",33500,1.954,0.323,9055.58,0.006,3.383
|
672 |
+
"2024-08-10 20:22:45,534",33550,1.948,0.325,9055.965,0.006,3.365
|
673 |
+
"2024-08-10 20:25:34,304",33600,1.952,0.322,9056.338,0.006,3.375
|
674 |
+
"2024-08-10 20:28:22,563",33650,1.948,0.326,9056.705,0.006,3.365
|
675 |
+
"2024-08-10 20:31:11,603",33700,1.956,0.323,9057.105,0.006,3.381
|
676 |
+
"2024-08-10 20:34:00,728",33750,1.942,0.324,9057.475,0.006,3.383
|
677 |
+
"2024-08-10 20:36:50,006",33800,1.954,0.327,9057.854,0.006,3.386
|
678 |
+
"2024-08-10 20:39:38,800",33850,1.958,0.324,9058.234,0.006,3.376
|
679 |
+
"2024-08-10 20:42:27,584",33900,1.946,0.326,9058.594,0.006,3.376
|
680 |
+
"2024-08-10 20:45:16,609",33950,1.95,0.328,9058.969,0.006,3.381
|
681 |
+
"2024-08-10 20:48:07,179",34000,1.962,0.324,9059.358,0.006,3.411
|
682 |
+
"2024-08-10 20:50:56,670",34050,1.948,0.325,9059.744,0.006,3.39
|
683 |
+
"2024-08-10 20:53:57,081",34100,1.95,0.323,9060.114,0.006,3.608
|
684 |
+
"2024-08-10 20:57:04,451",34150,1.945,0.322,9060.487,0.006,3.747
|
685 |
+
"2024-08-10 20:59:52,938",34200,1.944,0.323,9060.857,0.006,3.37
|
686 |
+
"2024-08-10 21:02:41,472",34250,1.948,0.327,9061.209,0.006,3.371
|
687 |
+
"2024-08-10 21:05:30,147",34300,1.943,0.323,9061.579,0.006,3.373
|
688 |
+
"2024-08-10 21:08:19,894",34350,1.949,0.323,9061.93,0.006,3.395
|
689 |
+
"2024-08-10 21:11:08,680",34400,1.954,0.324,9062.305,0.006,3.376
|
690 |
+
"2024-08-10 21:13:58,108",34450,1.95,0.323,9062.682,0.006,3.389
|
691 |
+
"2024-08-10 21:16:57,011",34500,1.953,0.322,9063.034,0.006,3.578
|
692 |
+
"2024-08-10 21:20:03,124",34550,1.956,0.323,9063.4,0.006,3.722
|
693 |
+
"2024-08-10 21:22:52,395",34600,1.951,0.322,9063.733,0.006,3.385
|
694 |
+
"2024-08-10 21:25:41,187",34650,1.949,0.326,9064.106,0.006,3.376
|
695 |
+
"2024-08-10 21:28:30,675",34700,1.965,0.326,9064.465,0.006,3.39
|
696 |
+
"2024-08-10 21:31:20,544",34750,1.953,0.322,9064.833,0.006,3.397
|
697 |
+
"2024-08-10 21:34:09,343",34800,1.959,0.323,9065.209,0.006,3.376
|
698 |
+
"2024-08-10 21:37:00,873",34850,1.962,0.326,9065.547,0.006,3.431
|
699 |
+
"2024-08-10 21:39:50,852",34900,1.97,0.324,9065.911,0.006,3.4
|
700 |
+
"2024-08-10 21:42:40,224",34950,1.959,0.324,9066.27,0.006,3.387
|
701 |
+
"2024-08-10 21:45:37,170",35000,1.963,0.322,9066.63,0.006,3.539
|
702 |
+
"2024-08-10 21:48:30,259",35050,1.962,0.328,9066.983,0.006,3.462
|
703 |
+
"2024-08-10 21:51:18,768",35100,1.976,0.324,9067.32,0.006,3.37
|
704 |
+
"2024-08-10 21:54:07,440",35150,1.973,0.322,9067.649,0.006,3.373
|
705 |
+
"2024-08-10 21:56:56,909",35200,1.974,0.327,9068.01,0.006,3.389
|
706 |
+
"2024-08-10 21:59:46,025",35250,1.975,0.325,9068.355,0.006,3.382
|
707 |
+
"2024-08-10 22:02:34,755",35300,1.974,0.325,9068.705,0.006,3.375
|
708 |
+
"2024-08-10 22:05:24,758",35350,1.972,0.327,9069.051,0.006,3.4
|
709 |
+
"2024-08-10 22:08:13,479",35400,1.974,0.324,9069.396,0.006,3.374
|
710 |
+
"2024-08-10 22:11:02,521",35450,1.969,0.323,9069.72,0.006,3.381
|
711 |
+
"2024-08-10 22:13:50,439",35500,1.972,0.322,9070.047,0.006,3.358
|
712 |
+
"2024-08-10 22:16:39,106",35550,1.985,0.324,9070.389,0.006,3.373
|
713 |
+
"2024-08-10 22:19:27,802",35600,1.979,0.327,9070.728,0.006,3.374
|
714 |
+
"2024-08-10 22:22:16,572",35650,1.981,0.325,9071.049,0.006,3.375
|
715 |
+
"2024-08-10 22:25:04,796",35700,1.984,0.325,9071.379,0.006,3.364
|
716 |
+
"2024-08-10 22:27:51,592",35750,1.99,0.322,9071.695,0.006,3.336
|
717 |
+
"2024-08-10 22:30:40,355",35800,1.978,0.323,9072.031,0.006,3.375
|
718 |
+
"2024-08-10 22:33:28,954",35850,1.988,0.32,9072.335,0.006,3.372
|
719 |
+
"2024-08-10 22:36:18,068",35900,1.985,0.324,9072.654,0.006,3.382
|
720 |
+
"2024-08-10 22:39:07,221",35950,1.987,0.321,9072.975,0.006,3.383
|
721 |
+
"2024-08-10 22:41:56,739",36000,1.981,0.32,9073.295,0.006,3.39
|
722 |
+
"2024-08-10 22:44:45,142",36050,1.982,0.324,9073.61,0.006,3.368
|
723 |
+
"2024-08-10 22:47:33,362",36100,1.981,0.321,9073.917,0.006,3.364
|
724 |
+
"2024-08-10 22:50:22,054",36150,1.984,0.323,9074.221,0.006,3.374
|
725 |
+
"2024-08-10 22:53:11,318",36200,1.989,0.326,9074.538,0.006,3.385
|
726 |
+
"2024-08-10 22:55:59,443",36250,1.98,0.323,9074.854,0.006,3.363
|
727 |
+
"2024-08-10 22:58:47,752",36300,1.98,0.322,9075.159,0.006,3.366
|
728 |
+
"2024-08-10 23:01:36,450",36350,1.992,0.32,9075.441,0.006,3.374
|
729 |
+
"2024-08-10 23:04:25,574",36400,1.985,0.321,9075.748,0.006,3.382
|
730 |
+
"2024-08-10 23:07:14,442",36450,1.987,0.319,9076.036,0.006,3.377
|
731 |
+
"2024-08-10 23:10:04,537",36500,1.992,0.323,9076.326,0.005,3.402
|
732 |
+
"2024-08-10 23:12:52,857",36550,1.986,0.325,9076.642,0.005,3.366
|
733 |
+
"2024-08-10 23:15:41,926",36600,1.989,0.322,9076.938,0.005,3.381
|
734 |
+
"2024-08-10 23:18:31,293",36650,1.986,0.321,9077.225,0.005,3.387
|
735 |
+
"2024-08-10 23:21:20,030",36700,1.996,0.318,9077.501,0.005,3.375
|
736 |
+
"2024-08-10 23:24:08,655",36750,1.985,0.321,9077.785,0.005,3.372
|
737 |
+
"2024-08-10 23:26:59,534",36800,1.989,0.326,9078.068,0.005,3.418
|
738 |
+
"2024-08-10 23:29:48,887",36850,1.988,0.321,9078.353,0.005,3.387
|
739 |
+
"2024-08-10 23:32:38,332",36900,1.989,0.321,9078.633,0.005,3.389
|
740 |
+
"2024-08-10 23:35:27,561",36950,1.997,0.322,9078.91,0.005,3.385
|
741 |
+
"2024-08-10 23:38:17,143",37000,1.987,0.319,9079.184,0.005,3.392
|
742 |
+
"2024-08-10 23:41:05,847",37050,1.978,0.318,9079.459,0.005,3.374
|
743 |
+
"2024-08-10 23:43:54,811",37100,1.989,0.32,9079.752,0.005,3.379
|
744 |
+
"2024-08-10 23:46:43,768",37150,1.987,0.321,9080.034,0.005,3.379
|
745 |
+
"2024-08-10 23:49:32,160",37200,1.994,0.323,9080.289,0.005,3.368
|
746 |
+
"2024-08-10 23:52:20,720",37250,1.986,0.315,9080.56,0.005,3.371
|
747 |
+
"2024-08-10 23:55:08,868",37300,1.996,0.321,9080.819,0.005,3.363
|
748 |
+
"2024-08-10 23:57:58,146",37350,1.98,0.318,9081.084,0.005,3.386
|
749 |
+
"2024-08-11 00:00:46,523",37400,1.985,0.318,9081.347,0.005,3.368
|
750 |
+
"2024-08-11 00:03:35,784",37450,1.988,0.318,9081.604,0.005,3.385
|
751 |
+
"2024-08-11 00:06:24,722",37500,1.987,0.318,9081.851,0.005,3.379
|
752 |
+
"2024-08-11 00:09:13,944",37550,1.98,0.319,9082.097,0.005,3.384
|
753 |
+
"2024-08-11 00:12:02,407",37600,1.995,0.316,9082.342,0.005,3.369
|
754 |
+
"2024-08-11 00:14:51,432",37650,1.992,0.318,9082.588,0.005,3.38
|
755 |
+
"2024-08-11 00:17:39,935",37700,1.994,0.315,9082.847,0.005,3.37
|
756 |
+
"2024-08-11 00:20:28,827",37750,1.987,0.32,9083.095,0.005,3.378
|
757 |
+
"2024-08-11 00:23:16,528",37800,1.995,0.318,9083.336,0.005,3.354
|
758 |
+
"2024-08-11 00:26:04,576",37850,1.993,0.32,9083.585,0.005,3.361
|
759 |
+
"2024-08-11 00:28:53,091",37900,1.991,0.317,9083.812,0.005,3.37
|
checkpoints/weights_l2_over_steps.png
CHANGED