pszemraj commited on
Commit
ca9edf0
·
verified ·
1 Parent(s): a2dab05

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba7a3e86feefdc3d4333ca63cbb2e21d655e383d8a3d0b2ae8ecf3e0a93a34d
3
+ size 1202681712
checkpoints/checkpoint-pt-40000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -822,3 +822,122 @@ Mixed precision type: bf16
822
  [2024-08-11 00:26:04,576][Main][INFO] - [train] Step 37850 out of 80000 | Loss --> 1.993 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.585 | Lr --> 0.005 | Seconds_per_step --> 3.361 |
823
  [2024-08-11 00:28:53,091][Main][INFO] - [train] Step 37900 out of 80000 | Loss --> 1.991 | Grad_l2 --> 0.317 | Weights_l2 --> 9083.812 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
824
  [2024-08-11 00:31:41,481][Main][INFO] - [train] Step 37950 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.061 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  [2024-08-11 00:26:04,576][Main][INFO] - [train] Step 37850 out of 80000 | Loss --> 1.993 | Grad_l2 --> 0.320 | Weights_l2 --> 9083.585 | Lr --> 0.005 | Seconds_per_step --> 3.361 |
823
  [2024-08-11 00:28:53,091][Main][INFO] - [train] Step 37900 out of 80000 | Loss --> 1.991 | Grad_l2 --> 0.317 | Weights_l2 --> 9083.812 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
824
  [2024-08-11 00:31:41,481][Main][INFO] - [train] Step 37950 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.061 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
825
+ [2024-08-11 00:34:29,788][Main][INFO] - [train] Step 38000 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.285 | Lr --> 0.005 | Seconds_per_step --> 3.366 |
826
+ [2024-08-11 00:37:19,033][Main][INFO] - [train] Step 38050 out of 80000 | Loss --> 1.985 | Grad_l2 --> 0.315 | Weights_l2 --> 9084.521 | Lr --> 0.005 | Seconds_per_step --> 3.385 |
827
+ [2024-08-11 00:40:06,692][Main][INFO] - [train] Step 38100 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.314 | Weights_l2 --> 9084.736 | Lr --> 0.005 | Seconds_per_step --> 3.353 |
828
+ [2024-08-11 00:42:54,429][Main][INFO] - [train] Step 38150 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.317 | Weights_l2 --> 9084.960 | Lr --> 0.005 | Seconds_per_step --> 3.355 |
829
+ [2024-08-11 00:45:43,227][Main][INFO] - [train] Step 38200 out of 80000 | Loss --> 1.978 | Grad_l2 --> 0.315 | Weights_l2 --> 9085.192 | Lr --> 0.005 | Seconds_per_step --> 3.376 |
830
+ [2024-08-11 00:48:31,813][Main][INFO] - [train] Step 38250 out of 80000 | Loss --> 1.989 | Grad_l2 --> 0.316 | Weights_l2 --> 9085.401 | Lr --> 0.005 | Seconds_per_step --> 3.372 |
831
+ [2024-08-11 00:51:19,799][Main][INFO] - [train] Step 38300 out of 80000 | Loss --> 1.987 | Grad_l2 --> 0.316 | Weights_l2 --> 9085.644 | Lr --> 0.005 | Seconds_per_step --> 3.360 |
832
+ [2024-08-11 00:54:08,857][Main][INFO] - [train] Step 38350 out of 80000 | Loss --> 1.983 | Grad_l2 --> 0.315 | Weights_l2 --> 9085.876 | Lr --> 0.005 | Seconds_per_step --> 3.381 |
833
+ [2024-08-11 00:56:56,916][Main][INFO] - [train] Step 38400 out of 80000 | Loss --> 1.984 | Grad_l2 --> 0.313 | Weights_l2 --> 9086.108 | Lr --> 0.005 | Seconds_per_step --> 3.361 |
834
+ [2024-08-11 00:59:46,820][Main][INFO] - [train] Step 38450 out of 80000 | Loss --> 1.976 | Grad_l2 --> 0.313 | Weights_l2 --> 9086.323 | Lr --> 0.005 | Seconds_per_step --> 3.398 |
835
+ [2024-08-11 01:02:36,195][Main][INFO] - [train] Step 38500 out of 80000 | Loss --> 1.969 | Grad_l2 --> 0.315 | Weights_l2 --> 9086.523 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
836
+ [2024-08-11 01:05:25,092][Main][INFO] - [train] Step 38550 out of 80000 | Loss --> 1.971 | Grad_l2 --> 0.313 | Weights_l2 --> 9086.744 | Lr --> 0.005 | Seconds_per_step --> 3.378 |
837
+ [2024-08-11 01:08:13,931][Main][INFO] - [train] Step 38600 out of 80000 | Loss --> 1.969 | Grad_l2 --> 0.313 | Weights_l2 --> 9086.958 | Lr --> 0.005 | Seconds_per_step --> 3.377 |
838
+ [2024-08-11 01:11:03,451][Main][INFO] - [train] Step 38650 out of 80000 | Loss --> 1.972 | Grad_l2 --> 0.310 | Weights_l2 --> 9087.155 | Lr --> 0.005 | Seconds_per_step --> 3.390 |
839
+ [2024-08-11 01:13:52,933][Main][INFO] - [train] Step 38700 out of 80000 | Loss --> 1.969 | Grad_l2 --> 0.311 | Weights_l2 --> 9087.358 | Lr --> 0.005 | Seconds_per_step --> 3.390 |
840
+ [2024-08-11 01:16:41,729][Main][INFO] - [train] Step 38750 out of 80000 | Loss --> 1.965 | Grad_l2 --> 0.312 | Weights_l2 --> 9087.569 | Lr --> 0.005 | Seconds_per_step --> 3.376 |
841
+ [2024-08-11 01:19:30,203][Main][INFO] - [train] Step 38800 out of 80000 | Loss --> 1.975 | Grad_l2 --> 0.312 | Weights_l2 --> 9087.770 | Lr --> 0.005 | Seconds_per_step --> 3.369 |
842
+ [2024-08-11 01:22:18,941][Main][INFO] - [train] Step 38850 out of 80000 | Loss --> 1.961 | Grad_l2 --> 0.312 | Weights_l2 --> 9087.988 | Lr --> 0.005 | Seconds_per_step --> 3.375 |
843
+ [2024-08-11 01:25:08,309][Main][INFO] - [train] Step 38900 out of 80000 | Loss --> 1.955 | Grad_l2 --> 0.311 | Weights_l2 --> 9088.181 | Lr --> 0.005 | Seconds_per_step --> 3.387 |
844
+ [2024-08-11 01:27:57,134][Main][INFO] - [train] Step 38950 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.311 | Weights_l2 --> 9088.378 | Lr --> 0.005 | Seconds_per_step --> 3.376 |
845
+ [2024-08-11 01:30:46,945][Main][INFO] - [train] Step 39000 out of 80000 | Loss --> 1.964 | Grad_l2 --> 0.311 | Weights_l2 --> 9088.599 | Lr --> 0.005 | Seconds_per_step --> 3.396 |
846
+ [2024-08-11 01:33:35,903][Main][INFO] - [train] Step 39050 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.311 | Weights_l2 --> 9088.786 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
847
+ [2024-08-11 01:36:25,983][Main][INFO] - [train] Step 39100 out of 80000 | Loss --> 1.963 | Grad_l2 --> 0.312 | Weights_l2 --> 9088.981 | Lr --> 0.005 | Seconds_per_step --> 3.402 |
848
+ [2024-08-11 01:39:15,477][Main][INFO] - [train] Step 39150 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.310 | Weights_l2 --> 9089.179 | Lr --> 0.005 | Seconds_per_step --> 3.390 |
849
+ [2024-08-11 01:42:04,924][Main][INFO] - [train] Step 39200 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.314 | Weights_l2 --> 9089.373 | Lr --> 0.005 | Seconds_per_step --> 3.389 |
850
+ [2024-08-11 01:44:54,356][Main][INFO] - [train] Step 39250 out of 80000 | Loss --> 1.954 | Grad_l2 --> 0.310 | Weights_l2 --> 9089.562 | Lr --> 0.005 | Seconds_per_step --> 3.389 |
851
+ [2024-08-11 01:47:45,286][Main][INFO] - [train] Step 39300 out of 80000 | Loss --> 1.949 | Grad_l2 --> 0.309 | Weights_l2 --> 9089.747 | Lr --> 0.005 | Seconds_per_step --> 3.419 |
852
+ [2024-08-11 01:50:34,944][Main][INFO] - [train] Step 39350 out of 80000 | Loss --> 1.957 | Grad_l2 --> 0.312 | Weights_l2 --> 9089.936 | Lr --> 0.005 | Seconds_per_step --> 3.393 |
853
+ [2024-08-11 01:53:24,603][Main][INFO] - [train] Step 39400 out of 80000 | Loss --> 1.962 | Grad_l2 --> 0.312 | Weights_l2 --> 9090.121 | Lr --> 0.005 | Seconds_per_step --> 3.393 |
854
+ [2024-08-11 01:56:14,325][Main][INFO] - [train] Step 39450 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.314 | Weights_l2 --> 9090.300 | Lr --> 0.005 | Seconds_per_step --> 3.394 |
855
+ [2024-08-11 01:59:04,661][Main][INFO] - [train] Step 39500 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.310 | Weights_l2 --> 9090.498 | Lr --> 0.005 | Seconds_per_step --> 3.407 |
856
+ [2024-08-11 02:01:53,842][Main][INFO] - [train] Step 39550 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.315 | Weights_l2 --> 9090.693 | Lr --> 0.005 | Seconds_per_step --> 3.384 |
857
+ [2024-08-11 02:04:42,792][Main][INFO] - [train] Step 39600 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.310 | Weights_l2 --> 9090.871 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
858
+ [2024-08-11 02:07:31,737][Main][INFO] - [train] Step 39650 out of 80000 | Loss --> 1.950 | Grad_l2 --> 0.311 | Weights_l2 --> 9091.055 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
859
+ [2024-08-11 02:10:22,230][Main][INFO] - [train] Step 39700 out of 80000 | Loss --> 1.947 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.222 | Lr --> 0.005 | Seconds_per_step --> 3.410 |
860
+ [2024-08-11 02:13:12,097][Main][INFO] - [train] Step 39750 out of 80000 | Loss --> 1.947 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.387 | Lr --> 0.005 | Seconds_per_step --> 3.397 |
861
+ [2024-08-11 02:16:01,033][Main][INFO] - [train] Step 39800 out of 80000 | Loss --> 1.952 | Grad_l2 --> 0.314 | Weights_l2 --> 9091.557 | Lr --> 0.005 | Seconds_per_step --> 3.379 |
862
+ [2024-08-11 02:18:49,894][Main][INFO] - [train] Step 39850 out of 80000 | Loss --> 1.946 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.734 | Lr --> 0.005 | Seconds_per_step --> 3.377 |
863
+ [2024-08-11 02:21:39,391][Main][INFO] - [train] Step 39900 out of 80000 | Loss --> 1.947 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.896 | Lr --> 0.005 | Seconds_per_step --> 3.390 |
864
+ [2024-08-11 02:24:29,042][Main][INFO] - [train] Step 39950 out of 80000 | Loss --> 1.948 | Grad_l2 --> 0.309 | Weights_l2 --> 9092.072 | Lr --> 0.005 | Seconds_per_step --> 3.393 |
865
+ [2024-08-11 02:27:18,462][Main][INFO] - [train] Step 40000 out of 80000 | Loss --> 1.945 | Grad_l2 --> 0.308 | Weights_l2 --> 9092.238 | Lr --> 0.005 | Seconds_per_step --> 3.388 |
866
+ [2024-08-11 02:27:18,462][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-40000
867
+ [2024-08-11 02:27:18,465][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
868
+ [2024-08-11 02:27:20,578][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-40000/model.safetensors
869
+ [2024-08-11 02:27:23,421][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-40000/optimizer.bin
870
+ [2024-08-11 02:27:23,421][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-40000/scheduler.bin
871
+ [2024-08-11 02:27:23,421][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-40000/sampler.bin
872
+ [2024-08-11 02:27:23,421][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-40000/sampler_1.bin
873
+ [2024-08-11 02:27:23,422][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-40000/random_states_0.pkl
874
+ [2024-08-11 02:30:12,274][Main][INFO] - [train] Step 40050 out of 80000 | Loss --> 1.938 | Grad_l2 --> 0.311 | Weights_l2 --> 9092.394 | Lr --> 0.005 | Seconds_per_step --> 3.476 |
875
+ [2024-08-11 02:33:02,978][Main][INFO] - [train] Step 40100 out of 80000 | Loss --> 1.936 | Grad_l2 --> 0.313 | Weights_l2 --> 9092.562 | Lr --> 0.005 | Seconds_per_step --> 3.414 |
876
+ [2024-08-11 02:35:52,303][Main][INFO] - [train] Step 40150 out of 80000 | Loss --> 1.945 | Grad_l2 --> 0.309 | Weights_l2 --> 9092.724 | Lr --> 0.005 | Seconds_per_step --> 3.386 |
877
+ [2024-08-11 02:38:41,408][Main][INFO] - [train] Step 40200 out of 80000 | Loss --> 1.940 | Grad_l2 --> 0.310 | Weights_l2 --> 9092.882 | Lr --> 0.005 | Seconds_per_step --> 3.382 |
878
+ [2024-08-11 02:41:29,521][Main][INFO] - [train] Step 40250 out of 80000 | Loss --> 1.935 | Grad_l2 --> 0.310 | Weights_l2 --> 9093.039 | Lr --> 0.005 | Seconds_per_step --> 3.362 |
879
+ [2024-08-11 02:44:17,817][Main][INFO] - [train] Step 40300 out of 80000 | Loss --> 1.928 | Grad_l2 --> 0.311 | Weights_l2 --> 9093.200 | Lr --> 0.005 | Seconds_per_step --> 3.366 |
880
+ [2024-08-11 02:47:06,404][Main][INFO] - [train] Step 40350 out of 80000 | Loss --> 1.934 | Grad_l2 --> 0.309 | Weights_l2 --> 9093.363 | Lr --> 0.005 | Seconds_per_step --> 3.372 |
881
+ [2024-08-11 02:49:54,901][Main][INFO] - [train] Step 40400 out of 80000 | Loss --> 1.930 | Grad_l2 --> 0.309 | Weights_l2 --> 9093.498 | Lr --> 0.005 | Seconds_per_step --> 3.370 |
882
+ [2024-08-11 02:52:45,475][Main][INFO] - [train] Step 40450 out of 80000 | Loss --> 1.936 | Grad_l2 --> 0.311 | Weights_l2 --> 9093.665 | Lr --> 0.005 | Seconds_per_step --> 3.411 |
883
+ [2024-08-11 02:55:35,053][Main][INFO] - [train] Step 40500 out of 80000 | Loss --> 1.919 | Grad_l2 --> 0.309 | Weights_l2 --> 9093.834 | Lr --> 0.005 | Seconds_per_step --> 3.392 |
884
+ [2024-08-11 02:58:25,674][Main][INFO] - [train] Step 40550 out of 80000 | Loss --> 1.927 | Grad_l2 --> 0.308 | Weights_l2 --> 9093.998 | Lr --> 0.005 | Seconds_per_step --> 3.412 |
885
+ [2024-08-11 03:01:14,558][Main][INFO] - [train] Step 40600 out of 80000 | Loss --> 1.933 | Grad_l2 --> 0.306 | Weights_l2 --> 9094.139 | Lr --> 0.005 | Seconds_per_step --> 3.378 |
886
+ [2024-08-11 03:04:03,662][Main][INFO] - [train] Step 40650 out of 80000 | Loss --> 1.927 | Grad_l2 --> 0.308 | Weights_l2 --> 9094.289 | Lr --> 0.005 | Seconds_per_step --> 3.382 |
887
+ [2024-08-11 03:06:52,826][Main][INFO] - [train] Step 40700 out of 80000 | Loss --> 1.912 | Grad_l2 --> 0.308 | Weights_l2 --> 9094.446 | Lr --> 0.005 | Seconds_per_step --> 3.383 |
888
+ [2024-08-11 03:09:42,887][Main][INFO] - [train] Step 40750 out of 80000 | Loss --> 1.911 | Grad_l2 --> 0.309 | Weights_l2 --> 9094.589 | Lr --> 0.005 | Seconds_per_step --> 3.401 |
889
+ [2024-08-11 03:12:31,609][Main][INFO] - [train] Step 40800 out of 80000 | Loss --> 1.918 | Grad_l2 --> 0.311 | Weights_l2 --> 9094.736 | Lr --> 0.005 | Seconds_per_step --> 3.374 |
890
+ [2024-08-11 03:15:18,925][Main][INFO] - [train] Step 40850 out of 80000 | Loss --> 1.915 | Grad_l2 --> 0.309 | Weights_l2 --> 9094.879 | Lr --> 0.005 | Seconds_per_step --> 3.346 |
891
+ [2024-08-11 03:18:07,730][Main][INFO] - [train] Step 40900 out of 80000 | Loss --> 1.910 | Grad_l2 --> 0.306 | Weights_l2 --> 9095.030 | Lr --> 0.005 | Seconds_per_step --> 3.376 |
892
+ [2024-08-11 03:21:01,061][Main][INFO] - [train] Step 40950 out of 80000 | Loss --> 1.916 | Grad_l2 --> 0.304 | Weights_l2 --> 9095.165 | Lr --> 0.005 | Seconds_per_step --> 3.467 |
893
+ [2024-08-11 03:23:50,091][Main][INFO] - [train] Step 41000 out of 80000 | Loss --> 1.912 | Grad_l2 --> 0.307 | Weights_l2 --> 9095.298 | Lr --> 0.005 | Seconds_per_step --> 3.381 |
894
+ [2024-08-11 03:26:38,484][Main][INFO] - [train] Step 41050 out of 80000 | Loss --> 1.926 | Grad_l2 --> 0.308 | Weights_l2 --> 9095.436 | Lr --> 0.005 | Seconds_per_step --> 3.368 |
895
+ [2024-08-11 03:29:26,707][Main][INFO] - [train] Step 41100 out of 80000 | Loss --> 1.916 | Grad_l2 --> 0.309 | Weights_l2 --> 9095.571 | Lr --> 0.005 | Seconds_per_step --> 3.364 |
896
+ [2024-08-11 03:32:17,300][Main][INFO] - [train] Step 41150 out of 80000 | Loss --> 1.914 | Grad_l2 --> 0.308 | Weights_l2 --> 9095.706 | Lr --> 0.005 | Seconds_per_step --> 3.412 |
897
+ [2024-08-11 03:35:06,978][Main][INFO] - [train] Step 41200 out of 80000 | Loss --> 1.914 | Grad_l2 --> 0.306 | Weights_l2 --> 9095.831 | Lr --> 0.005 | Seconds_per_step --> 3.394 |
898
+ [2024-08-11 03:37:56,000][Main][INFO] - [train] Step 41250 out of 80000 | Loss --> 1.914 | Grad_l2 --> 0.306 | Weights_l2 --> 9095.964 | Lr --> 0.005 | Seconds_per_step --> 3.380 |
899
+ [2024-08-11 03:40:45,631][Main][INFO] - [train] Step 41300 out of 80000 | Loss --> 1.912 | Grad_l2 --> 0.306 | Weights_l2 --> 9096.088 | Lr --> 0.005 | Seconds_per_step --> 3.393 |
900
+ [2024-08-11 03:43:35,918][Main][INFO] - [train] Step 41350 out of 80000 | Loss --> 1.898 | Grad_l2 --> 0.304 | Weights_l2 --> 9096.215 | Lr --> 0.005 | Seconds_per_step --> 3.406 |
901
+ [2024-08-11 03:46:26,811][Main][INFO] - [train] Step 41400 out of 80000 | Loss --> 1.899 | Grad_l2 --> 0.306 | Weights_l2 --> 9096.371 | Lr --> 0.005 | Seconds_per_step --> 3.418 |
902
+ [2024-08-11 03:49:16,649][Main][INFO] - [train] Step 41450 out of 80000 | Loss --> 1.909 | Grad_l2 --> 0.305 | Weights_l2 --> 9096.494 | Lr --> 0.005 | Seconds_per_step --> 3.397 |
903
+ [2024-08-11 03:52:06,340][Main][INFO] - [train] Step 41500 out of 80000 | Loss --> 1.900 | Grad_l2 --> 0.304 | Weights_l2 --> 9096.611 | Lr --> 0.005 | Seconds_per_step --> 3.394 |
904
+ [2024-08-11 03:54:56,488][Main][INFO] - [train] Step 41550 out of 80000 | Loss --> 1.900 | Grad_l2 --> 0.305 | Weights_l2 --> 9096.724 | Lr --> 0.005 | Seconds_per_step --> 3.403 |
905
+ [2024-08-11 03:57:46,246][Main][INFO] - [train] Step 41600 out of 80000 | Loss --> 1.904 | Grad_l2 --> 0.306 | Weights_l2 --> 9096.859 | Lr --> 0.005 | Seconds_per_step --> 3.395 |
906
+ [2024-08-11 04:00:36,209][Main][INFO] - [train] Step 41650 out of 80000 | Loss --> 1.900 | Grad_l2 --> 0.305 | Weights_l2 --> 9096.976 | Lr --> 0.005 | Seconds_per_step --> 3.399 |
907
+ [2024-08-11 04:03:26,087][Main][INFO] - [train] Step 41700 out of 80000 | Loss --> 1.896 | Grad_l2 --> 0.306 | Weights_l2 --> 9097.099 | Lr --> 0.005 | Seconds_per_step --> 3.398 |
908
+ [2024-08-11 04:06:16,494][Main][INFO] - [train] Step 41750 out of 80000 | Loss --> 1.906 | Grad_l2 --> 0.305 | Weights_l2 --> 9097.204 | Lr --> 0.005 | Seconds_per_step --> 3.408 |
909
+ [2024-08-11 04:09:06,107][Main][INFO] - [train] Step 41800 out of 80000 | Loss --> 1.902 | Grad_l2 --> 0.304 | Weights_l2 --> 9097.322 | Lr --> 0.005 | Seconds_per_step --> 3.392 |
910
+ [2024-08-11 04:11:54,877][Main][INFO] - [train] Step 41850 out of 80000 | Loss --> 1.892 | Grad_l2 --> 0.305 | Weights_l2 --> 9097.438 | Lr --> 0.005 | Seconds_per_step --> 3.375 |
911
+ [2024-08-11 04:14:45,250][Main][INFO] - [train] Step 41900 out of 80000 | Loss --> 1.893 | Grad_l2 --> 0.304 | Weights_l2 --> 9097.555 | Lr --> 0.005 | Seconds_per_step --> 3.407 |
912
+ [2024-08-11 04:17:33,003][Main][INFO] - [train] Step 41950 out of 80000 | Loss --> 1.886 | Grad_l2 --> 0.305 | Weights_l2 --> 9097.670 | Lr --> 0.005 | Seconds_per_step --> 3.355 |
913
+ [2024-08-11 04:20:23,408][Main][INFO] - [train] Step 42000 out of 80000 | Loss --> 1.895 | Grad_l2 --> 0.304 | Weights_l2 --> 9097.786 | Lr --> 0.005 | Seconds_per_step --> 3.408 |
914
+ [2024-08-11 04:23:12,871][Main][INFO] - [train] Step 42050 out of 80000 | Loss --> 1.890 | Grad_l2 --> 0.303 | Weights_l2 --> 9097.903 | Lr --> 0.005 | Seconds_per_step --> 3.389 |
915
+ [2024-08-11 04:26:10,451][Main][INFO] - [train] Step 42100 out of 80000 | Loss --> 1.887 | Grad_l2 --> 0.304 | Weights_l2 --> 9097.995 | Lr --> 0.005 | Seconds_per_step --> 3.552 |
916
+ [2024-08-11 04:29:00,301][Main][INFO] - [train] Step 42150 out of 80000 | Loss --> 1.886 | Grad_l2 --> 0.303 | Weights_l2 --> 9098.083 | Lr --> 0.005 | Seconds_per_step --> 3.397 |
917
+ [2024-08-11 04:31:50,543][Main][INFO] - [train] Step 42200 out of 80000 | Loss --> 1.885 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.170 | Lr --> 0.005 | Seconds_per_step --> 3.405 |
918
+ [2024-08-11 04:34:40,218][Main][INFO] - [train] Step 42250 out of 80000 | Loss --> 1.885 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.272 | Lr --> 0.004 | Seconds_per_step --> 3.393 |
919
+ [2024-08-11 04:37:29,693][Main][INFO] - [train] Step 42300 out of 80000 | Loss --> 1.879 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.378 | Lr --> 0.004 | Seconds_per_step --> 3.389 |
920
+ [2024-08-11 04:40:18,841][Main][INFO] - [train] Step 42350 out of 80000 | Loss --> 1.884 | Grad_l2 --> 0.306 | Weights_l2 --> 9098.505 | Lr --> 0.004 | Seconds_per_step --> 3.383 |
921
+ [2024-08-11 04:43:07,106][Main][INFO] - [train] Step 42400 out of 80000 | Loss --> 1.887 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.599 | Lr --> 0.004 | Seconds_per_step --> 3.365 |
922
+ [2024-08-11 04:45:54,958][Main][INFO] - [train] Step 42450 out of 80000 | Loss --> 1.890 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.697 | Lr --> 0.004 | Seconds_per_step --> 3.357 |
923
+ [2024-08-11 04:48:42,930][Main][INFO] - [train] Step 42500 out of 80000 | Loss --> 1.889 | Grad_l2 --> 0.301 | Weights_l2 --> 9098.800 | Lr --> 0.004 | Seconds_per_step --> 3.359 |
924
+ [2024-08-11 04:51:31,338][Main][INFO] - [train] Step 42550 out of 80000 | Loss --> 1.888 | Grad_l2 --> 0.302 | Weights_l2 --> 9098.884 | Lr --> 0.004 | Seconds_per_step --> 3.368 |
925
+ [2024-08-11 04:54:20,337][Main][INFO] - [train] Step 42600 out of 80000 | Loss --> 1.885 | Grad_l2 --> 0.304 | Weights_l2 --> 9098.972 | Lr --> 0.004 | Seconds_per_step --> 3.380 |
926
+ [2024-08-11 04:57:07,121][Main][INFO] - [train] Step 42650 out of 80000 | Loss --> 1.881 | Grad_l2 --> 0.303 | Weights_l2 --> 9099.064 | Lr --> 0.004 | Seconds_per_step --> 3.336 |
927
+ [2024-08-11 04:59:54,453][Main][INFO] - [train] Step 42700 out of 80000 | Loss --> 1.886 | Grad_l2 --> 0.308 | Weights_l2 --> 9099.155 | Lr --> 0.004 | Seconds_per_step --> 3.347 |
928
+ [2024-08-11 05:02:44,319][Main][INFO] - [train] Step 42750 out of 80000 | Loss --> 1.883 | Grad_l2 --> 0.301 | Weights_l2 --> 9099.248 | Lr --> 0.004 | Seconds_per_step --> 3.397 |
929
+ [2024-08-11 05:05:34,713][Main][INFO] - [train] Step 42800 out of 80000 | Loss --> 1.889 | Grad_l2 --> 0.305 | Weights_l2 --> 9099.338 | Lr --> 0.004 | Seconds_per_step --> 3.408 |
930
+ [2024-08-11 05:08:25,333][Main][INFO] - [train] Step 42850 out of 80000 | Loss --> 1.899 | Grad_l2 --> 0.304 | Weights_l2 --> 9099.447 | Lr --> 0.004 | Seconds_per_step --> 3.412 |
931
+ [2024-08-11 05:11:15,256][Main][INFO] - [train] Step 42900 out of 80000 | Loss --> 1.881 | Grad_l2 --> 0.304 | Weights_l2 --> 9099.536 | Lr --> 0.004 | Seconds_per_step --> 3.398 |
932
+ [2024-08-11 05:14:05,070][Main][INFO] - [train] Step 42950 out of 80000 | Loss --> 1.886 | Grad_l2 --> 0.303 | Weights_l2 --> 9099.635 | Lr --> 0.004 | Seconds_per_step --> 3.396 |
933
+ [2024-08-11 05:16:55,568][Main][INFO] - [train] Step 43000 out of 80000 | Loss --> 1.893 | Grad_l2 --> 0.303 | Weights_l2 --> 9099.732 | Lr --> 0.004 | Seconds_per_step --> 3.410 |
934
+ [2024-08-11 05:19:44,354][Main][INFO] - [train] Step 43050 out of 80000 | Loss --> 1.886 | Grad_l2 --> 0.301 | Weights_l2 --> 9099.800 | Lr --> 0.004 | Seconds_per_step --> 3.376 |
935
+ [2024-08-11 05:22:33,664][Main][INFO] - [train] Step 43100 out of 80000 | Loss --> 1.882 | Grad_l2 --> 0.301 | Weights_l2 --> 9099.887 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
936
+ [2024-08-11 05:25:22,103][Main][INFO] - [train] Step 43150 out of 80000 | Loss --> 1.877 | Grad_l2 --> 0.306 | Weights_l2 --> 9099.968 | Lr --> 0.004 | Seconds_per_step --> 3.369 |
937
+ [2024-08-11 05:28:12,205][Main][INFO] - [train] Step 43200 out of 80000 | Loss --> 1.881 | Grad_l2 --> 0.307 | Weights_l2 --> 9100.053 | Lr --> 0.004 | Seconds_per_step --> 3.402 |
938
+ [2024-08-11 05:31:02,600][Main][INFO] - [train] Step 43250 out of 80000 | Loss --> 1.875 | Grad_l2 --> 0.307 | Weights_l2 --> 9100.120 | Lr --> 0.004 | Seconds_per_step --> 3.408 |
939
+ [2024-08-11 05:33:51,662][Main][INFO] - [train] Step 43300 out of 80000 | Loss --> 1.885 | Grad_l2 --> 0.302 | Weights_l2 --> 9100.187 | Lr --> 0.004 | Seconds_per_step --> 3.381 |
940
+ [2024-08-11 05:36:40,598][Main][INFO] - [train] Step 43350 out of 80000 | Loss --> 1.884 | Grad_l2 --> 0.304 | Weights_l2 --> 9100.247 | Lr --> 0.004 | Seconds_per_step --> 3.379 |
941
+ [2024-08-11 05:39:30,749][Main][INFO] - [train] Step 43400 out of 80000 | Loss --> 1.873 | Grad_l2 --> 0.306 | Weights_l2 --> 9100.324 | Lr --> 0.004 | Seconds_per_step --> 3.403 |
942
+ [2024-08-11 05:42:19,147][Main][INFO] - [train] Step 43450 out of 80000 | Loss --> 1.870 | Grad_l2 --> 0.303 | Weights_l2 --> 9100.419 | Lr --> 0.004 | Seconds_per_step --> 3.368 |
943
+ [2024-08-11 05:45:08,526][Main][INFO] - [train] Step 43500 out of 80000 | Loss --> 1.873 | Grad_l2 --> 0.303 | Weights_l2 --> 9100.487 | Lr --> 0.004 | Seconds_per_step --> 3.388 |
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -757,3 +757,114 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
757
  "2024-08-11 00:23:16,528",37800,1.995,0.318,9083.336,0.005,3.354
758
  "2024-08-11 00:26:04,576",37850,1.993,0.32,9083.585,0.005,3.361
759
  "2024-08-11 00:28:53,091",37900,1.991,0.317,9083.812,0.005,3.37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
  "2024-08-11 00:23:16,528",37800,1.995,0.318,9083.336,0.005,3.354
758
  "2024-08-11 00:26:04,576",37850,1.993,0.32,9083.585,0.005,3.361
759
  "2024-08-11 00:28:53,091",37900,1.991,0.317,9083.812,0.005,3.37
760
+ "2024-08-11 00:31:41,481",37950,1.985,0.315,9084.061,0.005,3.368
761
+ "2024-08-11 00:34:29,788",38000,1.987,0.315,9084.285,0.005,3.366
762
+ "2024-08-11 00:37:19,033",38050,1.985,0.315,9084.521,0.005,3.385
763
+ "2024-08-11 00:40:06,692",38100,1.989,0.314,9084.736,0.005,3.353
764
+ "2024-08-11 00:42:54,429",38150,1.984,0.317,9084.96,0.005,3.355
765
+ "2024-08-11 00:45:43,227",38200,1.978,0.315,9085.192,0.005,3.376
766
+ "2024-08-11 00:48:31,813",38250,1.989,0.316,9085.401,0.005,3.372
767
+ "2024-08-11 00:51:19,799",38300,1.987,0.316,9085.644,0.005,3.36
768
+ "2024-08-11 00:54:08,857",38350,1.983,0.315,9085.876,0.005,3.381
769
+ "2024-08-11 00:56:56,916",38400,1.984,0.313,9086.108,0.005,3.361
770
+ "2024-08-11 00:59:46,820",38450,1.976,0.313,9086.323,0.005,3.398
771
+ "2024-08-11 01:02:36,195",38500,1.969,0.315,9086.523,0.005,3.387
772
+ "2024-08-11 01:05:25,092",38550,1.971,0.313,9086.744,0.005,3.378
773
+ "2024-08-11 01:08:13,931",38600,1.969,0.313,9086.958,0.005,3.377
774
+ "2024-08-11 01:11:03,451",38650,1.972,0.31,9087.155,0.005,3.39
775
+ "2024-08-11 01:13:52,933",38700,1.969,0.311,9087.358,0.005,3.39
776
+ "2024-08-11 01:16:41,729",38750,1.965,0.312,9087.569,0.005,3.376
777
+ "2024-08-11 01:19:30,203",38800,1.975,0.312,9087.77,0.005,3.369
778
+ "2024-08-11 01:22:18,941",38850,1.961,0.312,9087.988,0.005,3.375
779
+ "2024-08-11 01:25:08,309",38900,1.955,0.311,9088.181,0.005,3.387
780
+ "2024-08-11 01:27:57,134",38950,1.954,0.311,9088.378,0.005,3.376
781
+ "2024-08-11 01:30:46,945",39000,1.964,0.311,9088.599,0.005,3.396
782
+ "2024-08-11 01:33:35,903",39050,1.952,0.311,9088.786,0.005,3.379
783
+ "2024-08-11 01:36:25,983",39100,1.963,0.312,9088.981,0.005,3.402
784
+ "2024-08-11 01:39:15,477",39150,1.957,0.31,9089.179,0.005,3.39
785
+ "2024-08-11 01:42:04,924",39200,1.962,0.314,9089.373,0.005,3.389
786
+ "2024-08-11 01:44:54,356",39250,1.954,0.31,9089.562,0.005,3.389
787
+ "2024-08-11 01:47:45,286",39300,1.949,0.309,9089.747,0.005,3.419
788
+ "2024-08-11 01:50:34,944",39350,1.957,0.312,9089.936,0.005,3.393
789
+ "2024-08-11 01:53:24,603",39400,1.962,0.312,9090.121,0.005,3.393
790
+ "2024-08-11 01:56:14,325",39450,1.95,0.314,9090.3,0.005,3.394
791
+ "2024-08-11 01:59:04,661",39500,1.948,0.31,9090.498,0.005,3.407
792
+ "2024-08-11 02:01:53,842",39550,1.948,0.315,9090.693,0.005,3.384
793
+ "2024-08-11 02:04:42,792",39600,1.952,0.31,9090.871,0.005,3.379
794
+ "2024-08-11 02:07:31,737",39650,1.95,0.311,9091.055,0.005,3.379
795
+ "2024-08-11 02:10:22,230",39700,1.947,0.309,9091.222,0.005,3.41
796
+ "2024-08-11 02:13:12,097",39750,1.947,0.309,9091.387,0.005,3.397
797
+ "2024-08-11 02:16:01,033",39800,1.952,0.314,9091.557,0.005,3.379
798
+ "2024-08-11 02:18:49,894",39850,1.946,0.309,9091.734,0.005,3.377
799
+ "2024-08-11 02:21:39,391",39900,1.947,0.309,9091.896,0.005,3.39
800
+ "2024-08-11 02:24:29,042",39950,1.948,0.309,9092.072,0.005,3.393
801
+ "2024-08-11 02:27:18,462",40000,1.945,0.308,9092.238,0.005,3.388
802
+ "2024-08-11 02:30:12,274",40050,1.938,0.311,9092.394,0.005,3.476
803
+ "2024-08-11 02:33:02,978",40100,1.936,0.313,9092.562,0.005,3.414
804
+ "2024-08-11 02:35:52,303",40150,1.945,0.309,9092.724,0.005,3.386
805
+ "2024-08-11 02:38:41,408",40200,1.94,0.31,9092.882,0.005,3.382
806
+ "2024-08-11 02:41:29,521",40250,1.935,0.31,9093.039,0.005,3.362
807
+ "2024-08-11 02:44:17,817",40300,1.928,0.311,9093.2,0.005,3.366
808
+ "2024-08-11 02:47:06,404",40350,1.934,0.309,9093.363,0.005,3.372
809
+ "2024-08-11 02:49:54,901",40400,1.93,0.309,9093.498,0.005,3.37
810
+ "2024-08-11 02:52:45,475",40450,1.936,0.311,9093.665,0.005,3.411
811
+ "2024-08-11 02:55:35,053",40500,1.919,0.309,9093.834,0.005,3.392
812
+ "2024-08-11 02:58:25,674",40550,1.927,0.308,9093.998,0.005,3.412
813
+ "2024-08-11 03:01:14,558",40600,1.933,0.306,9094.139,0.005,3.378
814
+ "2024-08-11 03:04:03,662",40650,1.927,0.308,9094.289,0.005,3.382
815
+ "2024-08-11 03:06:52,826",40700,1.912,0.308,9094.446,0.005,3.383
816
+ "2024-08-11 03:09:42,887",40750,1.911,0.309,9094.589,0.005,3.401
817
+ "2024-08-11 03:12:31,609",40800,1.918,0.311,9094.736,0.005,3.374
818
+ "2024-08-11 03:15:18,925",40850,1.915,0.309,9094.879,0.005,3.346
819
+ "2024-08-11 03:18:07,730",40900,1.91,0.306,9095.03,0.005,3.376
820
+ "2024-08-11 03:21:01,061",40950,1.916,0.304,9095.165,0.005,3.467
821
+ "2024-08-11 03:23:50,091",41000,1.912,0.307,9095.298,0.005,3.381
822
+ "2024-08-11 03:26:38,484",41050,1.926,0.308,9095.436,0.005,3.368
823
+ "2024-08-11 03:29:26,707",41100,1.916,0.309,9095.571,0.005,3.364
824
+ "2024-08-11 03:32:17,300",41150,1.914,0.308,9095.706,0.005,3.412
825
+ "2024-08-11 03:35:06,978",41200,1.914,0.306,9095.831,0.005,3.394
826
+ "2024-08-11 03:37:56,000",41250,1.914,0.306,9095.964,0.005,3.38
827
+ "2024-08-11 03:40:45,631",41300,1.912,0.306,9096.088,0.005,3.393
828
+ "2024-08-11 03:43:35,918",41350,1.898,0.304,9096.215,0.005,3.406
829
+ "2024-08-11 03:46:26,811",41400,1.899,0.306,9096.371,0.005,3.418
830
+ "2024-08-11 03:49:16,649",41450,1.909,0.305,9096.494,0.005,3.397
831
+ "2024-08-11 03:52:06,340",41500,1.9,0.304,9096.611,0.005,3.394
832
+ "2024-08-11 03:54:56,488",41550,1.9,0.305,9096.724,0.005,3.403
833
+ "2024-08-11 03:57:46,246",41600,1.904,0.306,9096.859,0.005,3.395
834
+ "2024-08-11 04:00:36,209",41650,1.9,0.305,9096.976,0.005,3.399
835
+ "2024-08-11 04:03:26,087",41700,1.896,0.306,9097.099,0.005,3.398
836
+ "2024-08-11 04:06:16,494",41750,1.906,0.305,9097.204,0.005,3.408
837
+ "2024-08-11 04:09:06,107",41800,1.902,0.304,9097.322,0.005,3.392
838
+ "2024-08-11 04:11:54,877",41850,1.892,0.305,9097.438,0.005,3.375
839
+ "2024-08-11 04:14:45,250",41900,1.893,0.304,9097.555,0.005,3.407
840
+ "2024-08-11 04:17:33,003",41950,1.886,0.305,9097.67,0.005,3.355
841
+ "2024-08-11 04:20:23,408",42000,1.895,0.304,9097.786,0.005,3.408
842
+ "2024-08-11 04:23:12,871",42050,1.89,0.303,9097.903,0.005,3.389
843
+ "2024-08-11 04:26:10,451",42100,1.887,0.304,9097.995,0.005,3.552
844
+ "2024-08-11 04:29:00,301",42150,1.886,0.303,9098.083,0.005,3.397
845
+ "2024-08-11 04:31:50,543",42200,1.885,0.304,9098.17,0.005,3.405
846
+ "2024-08-11 04:34:40,218",42250,1.885,0.304,9098.272,0.004,3.393
847
+ "2024-08-11 04:37:29,693",42300,1.879,0.304,9098.378,0.004,3.389
848
+ "2024-08-11 04:40:18,841",42350,1.884,0.306,9098.505,0.004,3.383
849
+ "2024-08-11 04:43:07,106",42400,1.887,0.304,9098.599,0.004,3.365
850
+ "2024-08-11 04:45:54,958",42450,1.89,0.304,9098.697,0.004,3.357
851
+ "2024-08-11 04:48:42,930",42500,1.889,0.301,9098.8,0.004,3.359
852
+ "2024-08-11 04:51:31,338",42550,1.888,0.302,9098.884,0.004,3.368
853
+ "2024-08-11 04:54:20,337",42600,1.885,0.304,9098.972,0.004,3.38
854
+ "2024-08-11 04:57:07,121",42650,1.881,0.303,9099.064,0.004,3.336
855
+ "2024-08-11 04:59:54,453",42700,1.886,0.308,9099.155,0.004,3.347
856
+ "2024-08-11 05:02:44,319",42750,1.883,0.301,9099.248,0.004,3.397
857
+ "2024-08-11 05:05:34,713",42800,1.889,0.305,9099.338,0.004,3.408
858
+ "2024-08-11 05:08:25,333",42850,1.899,0.304,9099.447,0.004,3.412
859
+ "2024-08-11 05:11:15,256",42900,1.881,0.304,9099.536,0.004,3.398
860
+ "2024-08-11 05:14:05,070",42950,1.886,0.303,9099.635,0.004,3.396
861
+ "2024-08-11 05:16:55,568",43000,1.893,0.303,9099.732,0.004,3.41
862
+ "2024-08-11 05:19:44,354",43050,1.886,0.301,9099.8,0.004,3.376
863
+ "2024-08-11 05:22:33,664",43100,1.882,0.301,9099.887,0.004,3.386
864
+ "2024-08-11 05:25:22,103",43150,1.877,0.306,9099.968,0.004,3.369
865
+ "2024-08-11 05:28:12,205",43200,1.881,0.307,9100.053,0.004,3.402
866
+ "2024-08-11 05:31:02,600",43250,1.875,0.307,9100.12,0.004,3.408
867
+ "2024-08-11 05:33:51,662",43300,1.885,0.302,9100.187,0.004,3.381
868
+ "2024-08-11 05:36:40,598",43350,1.884,0.304,9100.247,0.004,3.379
869
+ "2024-08-11 05:39:30,749",43400,1.873,0.306,9100.324,0.004,3.403
870
+ "2024-08-11 05:42:19,147",43450,1.87,0.303,9100.419,0.004,3.368
checkpoints/weights_l2_over_steps.png CHANGED