Upload folder using huggingface_hub
Browse files- checkpoints/checkpoint-pt-50000/model.safetensors +3 -0
- checkpoints/checkpoint-pt-50000/random_states_0.pkl +3 -0
- checkpoints/grad_l2_over_steps.png +0 -0
- checkpoints/loss_over_steps.png +0 -0
- checkpoints/lr_over_steps.png +0 -0
- checkpoints/main.log +93 -0
- checkpoints/seconds_per_step_over_steps.png +0 -0
- checkpoints/training_metrics.csv +86 -0
- checkpoints/weights_l2_over_steps.png +0 -0
checkpoints/checkpoint-pt-50000/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07bf1f91330d84f49ff94b792c3b178dcd5b630796a8477dadc1c27d71373985
|
3 |
+
size 1202681712
|
checkpoints/checkpoint-pt-50000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
|
3 |
+
size 14344
|
checkpoints/grad_l2_over_steps.png
CHANGED
checkpoints/loss_over_steps.png
CHANGED
checkpoints/lr_over_steps.png
CHANGED
checkpoints/main.log
CHANGED
@@ -998,3 +998,96 @@ Mixed precision type: bf16
|
|
998 |
[2024-08-11 07:57:37,200][Main][INFO] - [train] Step 45850 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.643 | Lr --> 0.004 | Seconds_per_step --> 3.372 |
|
999 |
[2024-08-11 08:00:26,936][Main][INFO] - [train] Step 45900 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.666 | Lr --> 0.004 | Seconds_per_step --> 3.395 |
|
1000 |
[2024-08-11 08:03:16,131][Main][INFO] - [train] Step 45950 out of 80000 | Loss --> 1.859 | Grad_l2 --> 0.305 | Weights_l2 --> 9102.691 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
998 |
[2024-08-11 07:57:37,200][Main][INFO] - [train] Step 45850 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.643 | Lr --> 0.004 | Seconds_per_step --> 3.372 |
|
999 |
[2024-08-11 08:00:26,936][Main][INFO] - [train] Step 45900 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.666 | Lr --> 0.004 | Seconds_per_step --> 3.395 |
|
1000 |
[2024-08-11 08:03:16,131][Main][INFO] - [train] Step 45950 out of 80000 | Loss --> 1.859 | Grad_l2 --> 0.305 | Weights_l2 --> 9102.691 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
|
1001 |
+
[2024-08-11 08:06:04,985][Main][INFO] - [train] Step 46000 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.708 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
|
1002 |
+
[2024-08-11 08:08:53,975][Main][INFO] - [train] Step 46050 out of 80000 | Loss --> 1.847 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.731 | Lr --> 0.004 | Seconds_per_step --> 3.380 |
|
1003 |
+
[2024-08-11 08:11:43,887][Main][INFO] - [train] Step 46100 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.751 | Lr --> 0.004 | Seconds_per_step --> 3.398 |
|
1004 |
+
[2024-08-11 08:14:32,925][Main][INFO] - [train] Step 46150 out of 80000 | Loss --> 1.854 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.767 | Lr --> 0.004 | Seconds_per_step --> 3.381 |
|
1005 |
+
[2024-08-11 08:17:21,782][Main][INFO] - [train] Step 46200 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.776 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
|
1006 |
+
[2024-08-11 08:20:11,070][Main][INFO] - [train] Step 46250 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.791 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
|
1007 |
+
[2024-08-11 08:23:00,985][Main][INFO] - [train] Step 46300 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.809 | Lr --> 0.004 | Seconds_per_step --> 3.398 |
|
1008 |
+
[2024-08-11 08:25:48,354][Main][INFO] - [train] Step 46350 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.822 | Lr --> 0.004 | Seconds_per_step --> 3.347 |
|
1009 |
+
[2024-08-11 08:28:37,651][Main][INFO] - [train] Step 46400 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.831 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
|
1010 |
+
[2024-08-11 08:31:27,592][Main][INFO] - [train] Step 46450 out of 80000 | Loss --> 1.854 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.837 | Lr --> 0.004 | Seconds_per_step --> 3.399 |
|
1011 |
+
[2024-08-11 08:34:17,583][Main][INFO] - [train] Step 46500 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.850 | Lr --> 0.004 | Seconds_per_step --> 3.400 |
|
1012 |
+
[2024-08-11 08:37:06,886][Main][INFO] - [train] Step 46550 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.858 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
|
1013 |
+
[2024-08-11 08:39:56,545][Main][INFO] - [train] Step 46600 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.855 | Lr --> 0.004 | Seconds_per_step --> 3.393 |
|
1014 |
+
[2024-08-11 08:42:45,502][Main][INFO] - [train] Step 46650 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.857 | Lr --> 0.004 | Seconds_per_step --> 3.379 |
|
1015 |
+
[2024-08-11 08:45:34,652][Main][INFO] - [train] Step 46700 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.866 | Lr --> 0.004 | Seconds_per_step --> 3.383 |
|
1016 |
+
[2024-08-11 08:48:24,024][Main][INFO] - [train] Step 46750 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.869 | Lr --> 0.004 | Seconds_per_step --> 3.387 |
|
1017 |
+
[2024-08-11 08:51:12,848][Main][INFO] - [train] Step 46800 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.865 | Lr --> 0.004 | Seconds_per_step --> 3.376 |
|
1018 |
+
[2024-08-11 08:54:02,438][Main][INFO] - [train] Step 46850 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.867 | Lr --> 0.004 | Seconds_per_step --> 3.392 |
|
1019 |
+
[2024-08-11 08:56:51,614][Main][INFO] - [train] Step 46900 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.872 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
|
1020 |
+
[2024-08-11 08:59:41,654][Main][INFO] - [train] Step 46950 out of 80000 | Loss --> 1.861 | Grad_l2 --> 0.306 | Weights_l2 --> 9102.882 | Lr --> 0.004 | Seconds_per_step --> 3.401 |
|
1021 |
+
[2024-08-11 09:02:30,942][Main][INFO] - [train] Step 47000 out of 80000 | Loss --> 1.866 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.872 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
|
1022 |
+
[2024-08-11 09:05:19,246][Main][INFO] - [train] Step 47050 out of 80000 | Loss --> 1.868 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.874 | Lr --> 0.004 | Seconds_per_step --> 3.366 |
|
1023 |
+
[2024-08-11 09:08:08,518][Main][INFO] - [train] Step 47100 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.877 | Lr --> 0.004 | Seconds_per_step --> 3.385 |
|
1024 |
+
[2024-08-11 09:10:58,772][Main][INFO] - [train] Step 47150 out of 80000 | Loss --> 1.870 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.867 | Lr --> 0.004 | Seconds_per_step --> 3.405 |
|
1025 |
+
[2024-08-11 09:13:49,754][Main][INFO] - [train] Step 47200 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.854 | Lr --> 0.004 | Seconds_per_step --> 3.420 |
|
1026 |
+
[2024-08-11 09:16:39,033][Main][INFO] - [train] Step 47250 out of 80000 | Loss --> 1.867 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.846 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
|
1027 |
+
[2024-08-11 09:19:28,134][Main][INFO] - [train] Step 47300 out of 80000 | Loss --> 1.863 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.850 | Lr --> 0.004 | Seconds_per_step --> 3.382 |
|
1028 |
+
[2024-08-11 09:22:18,500][Main][INFO] - [train] Step 47350 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.833 | Lr --> 0.004 | Seconds_per_step --> 3.407 |
|
1029 |
+
[2024-08-11 09:25:07,928][Main][INFO] - [train] Step 47400 out of 80000 | Loss --> 1.865 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.834 | Lr --> 0.004 | Seconds_per_step --> 3.389 |
|
1030 |
+
[2024-08-11 09:27:57,487][Main][INFO] - [train] Step 47450 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.833 | Lr --> 0.004 | Seconds_per_step --> 3.391 |
|
1031 |
+
[2024-08-11 09:30:46,725][Main][INFO] - [train] Step 47500 out of 80000 | Loss --> 1.865 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.813 | Lr --> 0.004 | Seconds_per_step --> 3.385 |
|
1032 |
+
[2024-08-11 09:33:36,752][Main][INFO] - [train] Step 47550 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.811 | Lr --> 0.004 | Seconds_per_step --> 3.401 |
|
1033 |
+
[2024-08-11 09:36:24,674][Main][INFO] - [train] Step 47600 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.806 | Lr --> 0.004 | Seconds_per_step --> 3.358 |
|
1034 |
+
[2024-08-11 09:39:13,509][Main][INFO] - [train] Step 47650 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.793 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
|
1035 |
+
[2024-08-11 09:42:02,601][Main][INFO] - [train] Step 47700 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.780 | Lr --> 0.004 | Seconds_per_step --> 3.382 |
|
1036 |
+
[2024-08-11 09:44:50,911][Main][INFO] - [train] Step 47750 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.776 | Lr --> 0.004 | Seconds_per_step --> 3.366 |
|
1037 |
+
[2024-08-11 09:47:37,616][Main][INFO] - [train] Step 47800 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.762 | Lr --> 0.004 | Seconds_per_step --> 3.334 |
|
1038 |
+
[2024-08-11 09:50:26,867][Main][INFO] - [train] Step 47850 out of 80000 | Loss --> 1.864 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.746 | Lr --> 0.003 | Seconds_per_step --> 3.385 |
|
1039 |
+
[2024-08-11 09:53:16,815][Main][INFO] - [train] Step 47900 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.723 | Lr --> 0.003 | Seconds_per_step --> 3.399 |
|
1040 |
+
[2024-08-11 09:56:06,163][Main][INFO] - [train] Step 47950 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.696 | Lr --> 0.003 | Seconds_per_step --> 3.387 |
|
1041 |
+
[2024-08-11 09:58:56,126][Main][INFO] - [train] Step 48000 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.680 | Lr --> 0.003 | Seconds_per_step --> 3.399 |
|
1042 |
+
[2024-08-11 10:01:46,482][Main][INFO] - [train] Step 48050 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.658 | Lr --> 0.003 | Seconds_per_step --> 3.407 |
|
1043 |
+
[2024-08-11 10:04:36,355][Main][INFO] - [train] Step 48100 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.643 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
|
1044 |
+
[2024-08-11 10:07:26,163][Main][INFO] - [train] Step 48150 out of 80000 | Loss --> 1.863 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.618 | Lr --> 0.003 | Seconds_per_step --> 3.396 |
|
1045 |
+
[2024-08-11 10:10:15,109][Main][INFO] - [train] Step 48200 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.598 | Lr --> 0.003 | Seconds_per_step --> 3.379 |
|
1046 |
+
[2024-08-11 10:13:04,283][Main][INFO] - [train] Step 48250 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.579 | Lr --> 0.003 | Seconds_per_step --> 3.383 |
|
1047 |
+
[2024-08-11 10:15:56,254][Main][INFO] - [train] Step 48300 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.561 | Lr --> 0.003 | Seconds_per_step --> 3.439 |
|
1048 |
+
[2024-08-11 10:18:46,081][Main][INFO] - [train] Step 48350 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.531 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
|
1049 |
+
[2024-08-11 10:21:34,933][Main][INFO] - [train] Step 48400 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.506 | Lr --> 0.003 | Seconds_per_step --> 3.377 |
|
1050 |
+
[2024-08-11 10:24:21,068][Main][INFO] - [train] Step 48450 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.483 | Lr --> 0.003 | Seconds_per_step --> 3.323 |
|
1051 |
+
[2024-08-11 10:27:09,253][Main][INFO] - [train] Step 48500 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.456 | Lr --> 0.003 | Seconds_per_step --> 3.364 |
|
1052 |
+
[2024-08-11 10:29:57,885][Main][INFO] - [train] Step 48550 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.436 | Lr --> 0.003 | Seconds_per_step --> 3.373 |
|
1053 |
+
[2024-08-11 10:32:48,240][Main][INFO] - [train] Step 48600 out of 80000 | Loss --> 1.846 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.391 | Lr --> 0.003 | Seconds_per_step --> 3.407 |
|
1054 |
+
[2024-08-11 10:35:37,262][Main][INFO] - [train] Step 48650 out of 80000 | Loss --> 1.827 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.374 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
|
1055 |
+
[2024-08-11 10:38:26,196][Main][INFO] - [train] Step 48700 out of 80000 | Loss --> 1.840 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.333 | Lr --> 0.003 | Seconds_per_step --> 3.379 |
|
1056 |
+
[2024-08-11 10:41:14,907][Main][INFO] - [train] Step 48750 out of 80000 | Loss --> 1.835 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.312 | Lr --> 0.003 | Seconds_per_step --> 3.374 |
|
1057 |
+
[2024-08-11 10:44:04,940][Main][INFO] - [train] Step 48800 out of 80000 | Loss --> 1.830 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.293 | Lr --> 0.003 | Seconds_per_step --> 3.401 |
|
1058 |
+
[2024-08-11 10:46:53,292][Main][INFO] - [train] Step 48850 out of 80000 | Loss --> 1.825 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.255 | Lr --> 0.003 | Seconds_per_step --> 3.367 |
|
1059 |
+
[2024-08-11 10:49:42,903][Main][INFO] - [train] Step 48900 out of 80000 | Loss --> 1.823 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.212 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
1060 |
+
[2024-08-11 10:52:33,141][Main][INFO] - [train] Step 48950 out of 80000 | Loss --> 1.821 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.190 | Lr --> 0.003 | Seconds_per_step --> 3.405 |
|
1061 |
+
[2024-08-11 10:55:23,302][Main][INFO] - [train] Step 49000 out of 80000 | Loss --> 1.821 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.157 | Lr --> 0.003 | Seconds_per_step --> 3.403 |
|
1062 |
+
[2024-08-11 10:58:12,503][Main][INFO] - [train] Step 49050 out of 80000 | Loss --> 1.823 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.124 | Lr --> 0.003 | Seconds_per_step --> 3.384 |
|
1063 |
+
[2024-08-11 11:01:02,175][Main][INFO] - [train] Step 49100 out of 80000 | Loss --> 1.828 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.096 | Lr --> 0.003 | Seconds_per_step --> 3.393 |
|
1064 |
+
[2024-08-11 11:03:51,767][Main][INFO] - [train] Step 49150 out of 80000 | Loss --> 1.834 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.049 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
1065 |
+
[2024-08-11 11:06:42,443][Main][INFO] - [train] Step 49200 out of 80000 | Loss --> 1.830 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.013 | Lr --> 0.003 | Seconds_per_step --> 3.414 |
|
1066 |
+
[2024-08-11 11:09:31,684][Main][INFO] - [train] Step 49250 out of 80000 | Loss --> 1.833 | Grad_l2 --> 0.302 | Weights_l2 --> 9101.972 | Lr --> 0.003 | Seconds_per_step --> 3.385 |
|
1067 |
+
[2024-08-11 11:12:20,327][Main][INFO] - [train] Step 49300 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.928 | Lr --> 0.003 | Seconds_per_step --> 3.373 |
|
1068 |
+
[2024-08-11 11:15:09,324][Main][INFO] - [train] Step 49350 out of 80000 | Loss --> 1.846 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.889 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
|
1069 |
+
[2024-08-11 11:18:00,030][Main][INFO] - [train] Step 49400 out of 80000 | Loss --> 1.838 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.850 | Lr --> 0.003 | Seconds_per_step --> 3.414 |
|
1070 |
+
[2024-08-11 11:20:49,651][Main][INFO] - [train] Step 49450 out of 80000 | Loss --> 1.835 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.804 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
1071 |
+
[2024-08-11 11:23:37,817][Main][INFO] - [train] Step 49500 out of 80000 | Loss --> 1.843 | Grad_l2 --> 0.302 | Weights_l2 --> 9101.766 | Lr --> 0.003 | Seconds_per_step --> 3.363 |
|
1072 |
+
[2024-08-11 11:26:26,286][Main][INFO] - [train] Step 49550 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.726 | Lr --> 0.003 | Seconds_per_step --> 3.369 |
|
1073 |
+
[2024-08-11 11:29:16,394][Main][INFO] - [train] Step 49600 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.673 | Lr --> 0.003 | Seconds_per_step --> 3.402 |
|
1074 |
+
[2024-08-11 11:32:04,951][Main][INFO] - [train] Step 49650 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.301 | Weights_l2 --> 9101.629 | Lr --> 0.003 | Seconds_per_step --> 3.371 |
|
1075 |
+
[2024-08-11 11:34:54,813][Main][INFO] - [train] Step 49700 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.585 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
|
1076 |
+
[2024-08-11 11:37:44,289][Main][INFO] - [train] Step 49750 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.306 | Weights_l2 --> 9101.539 | Lr --> 0.003 | Seconds_per_step --> 3.390 |
|
1077 |
+
[2024-08-11 11:40:33,889][Main][INFO] - [train] Step 49800 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.493 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
1078 |
+
[2024-08-11 11:43:23,944][Main][INFO] - [train] Step 49850 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.449 | Lr --> 0.003 | Seconds_per_step --> 3.401 |
|
1079 |
+
[2024-08-11 11:46:13,525][Main][INFO] - [train] Step 49900 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.408 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
1080 |
+
[2024-08-11 11:49:03,443][Main][INFO] - [train] Step 49950 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.359 | Lr --> 0.003 | Seconds_per_step --> 3.398 |
|
1081 |
+
[2024-08-11 11:51:53,359][Main][INFO] - [train] Step 50000 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.306 | Weights_l2 --> 9101.317 | Lr --> 0.003 | Seconds_per_step --> 3.398 |
|
1082 |
+
[2024-08-11 11:51:53,359][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-50000
|
1083 |
+
[2024-08-11 11:51:53,362][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
1084 |
+
[2024-08-11 11:51:55,493][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-50000/model.safetensors
|
1085 |
+
[2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-50000/optimizer.bin
|
1086 |
+
[2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-50000/scheduler.bin
|
1087 |
+
[2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-50000/sampler.bin
|
1088 |
+
[2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-50000/sampler_1.bin
|
1089 |
+
[2024-08-11 11:51:58,611][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-50000/random_states_0.pkl
|
1090 |
+
[2024-08-11 11:54:48,709][Main][INFO] - [train] Step 50050 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.280 | Lr --> 0.003 | Seconds_per_step --> 3.507 |
|
1091 |
+
[2024-08-11 11:57:38,118][Main][INFO] - [train] Step 50100 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.305 | Weights_l2 --> 9101.232 | Lr --> 0.003 | Seconds_per_step --> 3.388 |
|
1092 |
+
[2024-08-11 12:00:27,137][Main][INFO] - [train] Step 50150 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.305 | Weights_l2 --> 9101.189 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
|
1093 |
+
[2024-08-11 12:03:16,714][Main][INFO] - [train] Step 50200 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.138 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
|
checkpoints/seconds_per_step_over_steps.png
CHANGED
checkpoints/training_metrics.csv
CHANGED
@@ -917,3 +917,89 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
|
|
917 |
"2024-08-11 07:54:48,582",45800,1.861,0.301,9102.628,0.004,3.376
|
918 |
"2024-08-11 07:57:37,200",45850,1.856,0.304,9102.643,0.004,3.372
|
919 |
"2024-08-11 08:00:26,936",45900,1.857,0.302,9102.666,0.004,3.395
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
917 |
"2024-08-11 07:54:48,582",45800,1.861,0.301,9102.628,0.004,3.376
|
918 |
"2024-08-11 07:57:37,200",45850,1.856,0.304,9102.643,0.004,3.372
|
919 |
"2024-08-11 08:00:26,936",45900,1.857,0.302,9102.666,0.004,3.395
|
920 |
+
"2024-08-11 08:03:16,131",45950,1.859,0.305,9102.691,0.004,3.384
|
921 |
+
"2024-08-11 08:06:04,985",46000,1.857,0.302,9102.708,0.004,3.377
|
922 |
+
"2024-08-11 08:08:53,975",46050,1.847,0.304,9102.731,0.004,3.38
|
923 |
+
"2024-08-11 08:11:43,887",46100,1.857,0.302,9102.751,0.004,3.398
|
924 |
+
"2024-08-11 08:14:32,925",46150,1.854,0.3,9102.767,0.004,3.381
|
925 |
+
"2024-08-11 08:17:21,782",46200,1.851,0.302,9102.776,0.004,3.377
|
926 |
+
"2024-08-11 08:20:11,070",46250,1.85,0.302,9102.791,0.004,3.386
|
927 |
+
"2024-08-11 08:23:00,985",46300,1.85,0.302,9102.809,0.004,3.398
|
928 |
+
"2024-08-11 08:25:48,354",46350,1.851,0.302,9102.822,0.004,3.347
|
929 |
+
"2024-08-11 08:28:37,651",46400,1.848,0.3,9102.831,0.004,3.386
|
930 |
+
"2024-08-11 08:31:27,592",46450,1.854,0.299,9102.837,0.004,3.399
|
931 |
+
"2024-08-11 08:34:17,583",46500,1.841,0.301,9102.85,0.004,3.4
|
932 |
+
"2024-08-11 08:37:06,886",46550,1.849,0.302,9102.858,0.004,3.386
|
933 |
+
"2024-08-11 08:39:56,545",46600,1.845,0.303,9102.855,0.004,3.393
|
934 |
+
"2024-08-11 08:42:45,502",46650,1.845,0.301,9102.857,0.004,3.379
|
935 |
+
"2024-08-11 08:45:34,652",46700,1.858,0.3,9102.866,0.004,3.383
|
936 |
+
"2024-08-11 08:48:24,024",46750,1.857,0.3,9102.869,0.004,3.387
|
937 |
+
"2024-08-11 08:51:12,848",46800,1.856,0.304,9102.865,0.004,3.376
|
938 |
+
"2024-08-11 08:54:02,438",46850,1.852,0.302,9102.867,0.004,3.392
|
939 |
+
"2024-08-11 08:56:51,614",46900,1.858,0.301,9102.872,0.004,3.384
|
940 |
+
"2024-08-11 08:59:41,654",46950,1.861,0.306,9102.882,0.004,3.401
|
941 |
+
"2024-08-11 09:02:30,942",47000,1.866,0.304,9102.872,0.004,3.386
|
942 |
+
"2024-08-11 09:05:19,246",47050,1.868,0.301,9102.874,0.004,3.366
|
943 |
+
"2024-08-11 09:08:08,518",47100,1.86,0.303,9102.877,0.004,3.385
|
944 |
+
"2024-08-11 09:10:58,772",47150,1.87,0.301,9102.867,0.004,3.405
|
945 |
+
"2024-08-11 09:13:49,754",47200,1.853,0.302,9102.854,0.004,3.42
|
946 |
+
"2024-08-11 09:16:39,033",47250,1.867,0.303,9102.846,0.004,3.386
|
947 |
+
"2024-08-11 09:19:28,134",47300,1.863,0.303,9102.85,0.004,3.382
|
948 |
+
"2024-08-11 09:22:18,500",47350,1.862,0.303,9102.833,0.004,3.407
|
949 |
+
"2024-08-11 09:25:07,928",47400,1.865,0.3,9102.834,0.004,3.389
|
950 |
+
"2024-08-11 09:27:57,487",47450,1.86,0.302,9102.833,0.004,3.391
|
951 |
+
"2024-08-11 09:30:46,725",47500,1.865,0.302,9102.813,0.004,3.385
|
952 |
+
"2024-08-11 09:33:36,752",47550,1.862,0.303,9102.811,0.004,3.401
|
953 |
+
"2024-08-11 09:36:24,674",47600,1.86,0.302,9102.806,0.004,3.358
|
954 |
+
"2024-08-11 09:39:13,509",47650,1.856,0.301,9102.793,0.004,3.377
|
955 |
+
"2024-08-11 09:42:02,601",47700,1.862,0.302,9102.78,0.004,3.382
|
956 |
+
"2024-08-11 09:44:50,911",47750,1.857,0.302,9102.776,0.004,3.366
|
957 |
+
"2024-08-11 09:47:37,616",47800,1.862,0.303,9102.762,0.004,3.334
|
958 |
+
"2024-08-11 09:50:26,867",47850,1.864,0.303,9102.746,0.003,3.385
|
959 |
+
"2024-08-11 09:53:16,815",47900,1.851,0.302,9102.723,0.003,3.399
|
960 |
+
"2024-08-11 09:56:06,163",47950,1.855,0.3,9102.696,0.003,3.387
|
961 |
+
"2024-08-11 09:58:56,126",48000,1.852,0.301,9102.68,0.003,3.399
|
962 |
+
"2024-08-11 10:01:46,482",48050,1.858,0.301,9102.658,0.003,3.407
|
963 |
+
"2024-08-11 10:04:36,355",48100,1.855,0.299,9102.643,0.003,3.397
|
964 |
+
"2024-08-11 10:07:26,163",48150,1.863,0.302,9102.618,0.003,3.396
|
965 |
+
"2024-08-11 10:10:15,109",48200,1.857,0.303,9102.598,0.003,3.379
|
966 |
+
"2024-08-11 10:13:04,283",48250,1.852,0.302,9102.579,0.003,3.383
|
967 |
+
"2024-08-11 10:15:56,254",48300,1.856,0.304,9102.561,0.003,3.439
|
968 |
+
"2024-08-11 10:18:46,081",48350,1.849,0.3,9102.531,0.003,3.397
|
969 |
+
"2024-08-11 10:21:34,933",48400,1.851,0.303,9102.506,0.003,3.377
|
970 |
+
"2024-08-11 10:24:21,068",48450,1.852,0.301,9102.483,0.003,3.323
|
971 |
+
"2024-08-11 10:27:09,253",48500,1.842,0.303,9102.456,0.003,3.364
|
972 |
+
"2024-08-11 10:29:57,885",48550,1.845,0.303,9102.436,0.003,3.373
|
973 |
+
"2024-08-11 10:32:48,240",48600,1.846,0.301,9102.391,0.003,3.407
|
974 |
+
"2024-08-11 10:35:37,262",48650,1.827,0.301,9102.374,0.003,3.38
|
975 |
+
"2024-08-11 10:38:26,196",48700,1.84,0.3,9102.333,0.003,3.379
|
976 |
+
"2024-08-11 10:41:14,907",48750,1.835,0.302,9102.312,0.003,3.374
|
977 |
+
"2024-08-11 10:44:04,940",48800,1.83,0.299,9102.293,0.003,3.401
|
978 |
+
"2024-08-11 10:46:53,292",48850,1.825,0.301,9102.255,0.003,3.367
|
979 |
+
"2024-08-11 10:49:42,903",48900,1.823,0.302,9102.212,0.003,3.392
|
980 |
+
"2024-08-11 10:52:33,141",48950,1.821,0.299,9102.19,0.003,3.405
|
981 |
+
"2024-08-11 10:55:23,302",49000,1.821,0.301,9102.157,0.003,3.403
|
982 |
+
"2024-08-11 10:58:12,503",49050,1.823,0.301,9102.124,0.003,3.384
|
983 |
+
"2024-08-11 11:01:02,175",49100,1.828,0.3,9102.096,0.003,3.393
|
984 |
+
"2024-08-11 11:03:51,767",49150,1.834,0.304,9102.049,0.003,3.392
|
985 |
+
"2024-08-11 11:06:42,443",49200,1.83,0.301,9102.013,0.003,3.414
|
986 |
+
"2024-08-11 11:09:31,684",49250,1.833,0.302,9101.972,0.003,3.385
|
987 |
+
"2024-08-11 11:12:20,327",49300,1.842,0.303,9101.928,0.003,3.373
|
988 |
+
"2024-08-11 11:15:09,324",49350,1.846,0.304,9101.889,0.003,3.38
|
989 |
+
"2024-08-11 11:18:00,030",49400,1.838,0.304,9101.85,0.003,3.414
|
990 |
+
"2024-08-11 11:20:49,651",49450,1.835,0.303,9101.804,0.003,3.392
|
991 |
+
"2024-08-11 11:23:37,817",49500,1.843,0.302,9101.766,0.003,3.363
|
992 |
+
"2024-08-11 11:26:26,286",49550,1.842,0.304,9101.726,0.003,3.369
|
993 |
+
"2024-08-11 11:29:16,394",49600,1.848,0.304,9101.673,0.003,3.402
|
994 |
+
"2024-08-11 11:32:04,951",49650,1.841,0.301,9101.629,0.003,3.371
|
995 |
+
"2024-08-11 11:34:54,813",49700,1.852,0.303,9101.585,0.003,3.397
|
996 |
+
"2024-08-11 11:37:44,289",49750,1.855,0.306,9101.539,0.003,3.39
|
997 |
+
"2024-08-11 11:40:33,889",49800,1.849,0.304,9101.493,0.003,3.392
|
998 |
+
"2024-08-11 11:43:23,944",49850,1.841,0.304,9101.449,0.003,3.401
|
999 |
+
"2024-08-11 11:46:13,525",49900,1.85,0.303,9101.408,0.003,3.392
|
1000 |
+
"2024-08-11 11:49:03,443",49950,1.849,0.304,9101.359,0.003,3.398
|
1001 |
+
"2024-08-11 11:51:53,359",50000,1.849,0.306,9101.317,0.003,3.398
|
1002 |
+
"2024-08-11 11:54:48,709",50050,1.848,0.304,9101.28,0.003,3.507
|
1003 |
+
"2024-08-11 11:57:38,118",50100,1.853,0.305,9101.232,0.003,3.388
|
1004 |
+
"2024-08-11 12:00:27,137",50150,1.858,0.305,9101.189,0.003,3.38
|
1005 |
+
"2024-08-11 12:03:16,714",50200,1.853,0.304,9101.138,0.003,3.392
|
checkpoints/weights_l2_over_steps.png
CHANGED