pszemraj commited on
Commit
b624068
·
verified ·
1 Parent(s): 1cc5f67

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-50000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07bf1f91330d84f49ff94b792c3b178dcd5b630796a8477dadc1c27d71373985
3
+ size 1202681712
checkpoints/checkpoint-pt-50000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -998,3 +998,96 @@ Mixed precision type: bf16
998
  [2024-08-11 07:57:37,200][Main][INFO] - [train] Step 45850 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.643 | Lr --> 0.004 | Seconds_per_step --> 3.372 |
999
  [2024-08-11 08:00:26,936][Main][INFO] - [train] Step 45900 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.666 | Lr --> 0.004 | Seconds_per_step --> 3.395 |
1000
  [2024-08-11 08:03:16,131][Main][INFO] - [train] Step 45950 out of 80000 | Loss --> 1.859 | Grad_l2 --> 0.305 | Weights_l2 --> 9102.691 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  [2024-08-11 07:57:37,200][Main][INFO] - [train] Step 45850 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.643 | Lr --> 0.004 | Seconds_per_step --> 3.372 |
999
  [2024-08-11 08:00:26,936][Main][INFO] - [train] Step 45900 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.666 | Lr --> 0.004 | Seconds_per_step --> 3.395 |
1000
  [2024-08-11 08:03:16,131][Main][INFO] - [train] Step 45950 out of 80000 | Loss --> 1.859 | Grad_l2 --> 0.305 | Weights_l2 --> 9102.691 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
1001
+ [2024-08-11 08:06:04,985][Main][INFO] - [train] Step 46000 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.708 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
1002
+ [2024-08-11 08:08:53,975][Main][INFO] - [train] Step 46050 out of 80000 | Loss --> 1.847 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.731 | Lr --> 0.004 | Seconds_per_step --> 3.380 |
1003
+ [2024-08-11 08:11:43,887][Main][INFO] - [train] Step 46100 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.751 | Lr --> 0.004 | Seconds_per_step --> 3.398 |
1004
+ [2024-08-11 08:14:32,925][Main][INFO] - [train] Step 46150 out of 80000 | Loss --> 1.854 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.767 | Lr --> 0.004 | Seconds_per_step --> 3.381 |
1005
+ [2024-08-11 08:17:21,782][Main][INFO] - [train] Step 46200 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.776 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
1006
+ [2024-08-11 08:20:11,070][Main][INFO] - [train] Step 46250 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.791 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
1007
+ [2024-08-11 08:23:00,985][Main][INFO] - [train] Step 46300 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.809 | Lr --> 0.004 | Seconds_per_step --> 3.398 |
1008
+ [2024-08-11 08:25:48,354][Main][INFO] - [train] Step 46350 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.822 | Lr --> 0.004 | Seconds_per_step --> 3.347 |
1009
+ [2024-08-11 08:28:37,651][Main][INFO] - [train] Step 46400 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.831 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
1010
+ [2024-08-11 08:31:27,592][Main][INFO] - [train] Step 46450 out of 80000 | Loss --> 1.854 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.837 | Lr --> 0.004 | Seconds_per_step --> 3.399 |
1011
+ [2024-08-11 08:34:17,583][Main][INFO] - [train] Step 46500 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.850 | Lr --> 0.004 | Seconds_per_step --> 3.400 |
1012
+ [2024-08-11 08:37:06,886][Main][INFO] - [train] Step 46550 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.858 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
1013
+ [2024-08-11 08:39:56,545][Main][INFO] - [train] Step 46600 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.855 | Lr --> 0.004 | Seconds_per_step --> 3.393 |
1014
+ [2024-08-11 08:42:45,502][Main][INFO] - [train] Step 46650 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.857 | Lr --> 0.004 | Seconds_per_step --> 3.379 |
1015
+ [2024-08-11 08:45:34,652][Main][INFO] - [train] Step 46700 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.866 | Lr --> 0.004 | Seconds_per_step --> 3.383 |
1016
+ [2024-08-11 08:48:24,024][Main][INFO] - [train] Step 46750 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.869 | Lr --> 0.004 | Seconds_per_step --> 3.387 |
1017
+ [2024-08-11 08:51:12,848][Main][INFO] - [train] Step 46800 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.865 | Lr --> 0.004 | Seconds_per_step --> 3.376 |
1018
+ [2024-08-11 08:54:02,438][Main][INFO] - [train] Step 46850 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.867 | Lr --> 0.004 | Seconds_per_step --> 3.392 |
1019
+ [2024-08-11 08:56:51,614][Main][INFO] - [train] Step 46900 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.872 | Lr --> 0.004 | Seconds_per_step --> 3.384 |
1020
+ [2024-08-11 08:59:41,654][Main][INFO] - [train] Step 46950 out of 80000 | Loss --> 1.861 | Grad_l2 --> 0.306 | Weights_l2 --> 9102.882 | Lr --> 0.004 | Seconds_per_step --> 3.401 |
1021
+ [2024-08-11 09:02:30,942][Main][INFO] - [train] Step 47000 out of 80000 | Loss --> 1.866 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.872 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
1022
+ [2024-08-11 09:05:19,246][Main][INFO] - [train] Step 47050 out of 80000 | Loss --> 1.868 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.874 | Lr --> 0.004 | Seconds_per_step --> 3.366 |
1023
+ [2024-08-11 09:08:08,518][Main][INFO] - [train] Step 47100 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.877 | Lr --> 0.004 | Seconds_per_step --> 3.385 |
1024
+ [2024-08-11 09:10:58,772][Main][INFO] - [train] Step 47150 out of 80000 | Loss --> 1.870 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.867 | Lr --> 0.004 | Seconds_per_step --> 3.405 |
1025
+ [2024-08-11 09:13:49,754][Main][INFO] - [train] Step 47200 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.854 | Lr --> 0.004 | Seconds_per_step --> 3.420 |
1026
+ [2024-08-11 09:16:39,033][Main][INFO] - [train] Step 47250 out of 80000 | Loss --> 1.867 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.846 | Lr --> 0.004 | Seconds_per_step --> 3.386 |
1027
+ [2024-08-11 09:19:28,134][Main][INFO] - [train] Step 47300 out of 80000 | Loss --> 1.863 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.850 | Lr --> 0.004 | Seconds_per_step --> 3.382 |
1028
+ [2024-08-11 09:22:18,500][Main][INFO] - [train] Step 47350 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.833 | Lr --> 0.004 | Seconds_per_step --> 3.407 |
1029
+ [2024-08-11 09:25:07,928][Main][INFO] - [train] Step 47400 out of 80000 | Loss --> 1.865 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.834 | Lr --> 0.004 | Seconds_per_step --> 3.389 |
1030
+ [2024-08-11 09:27:57,487][Main][INFO] - [train] Step 47450 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.833 | Lr --> 0.004 | Seconds_per_step --> 3.391 |
1031
+ [2024-08-11 09:30:46,725][Main][INFO] - [train] Step 47500 out of 80000 | Loss --> 1.865 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.813 | Lr --> 0.004 | Seconds_per_step --> 3.385 |
1032
+ [2024-08-11 09:33:36,752][Main][INFO] - [train] Step 47550 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.811 | Lr --> 0.004 | Seconds_per_step --> 3.401 |
1033
+ [2024-08-11 09:36:24,674][Main][INFO] - [train] Step 47600 out of 80000 | Loss --> 1.860 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.806 | Lr --> 0.004 | Seconds_per_step --> 3.358 |
1034
+ [2024-08-11 09:39:13,509][Main][INFO] - [train] Step 47650 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.793 | Lr --> 0.004 | Seconds_per_step --> 3.377 |
1035
+ [2024-08-11 09:42:02,601][Main][INFO] - [train] Step 47700 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.780 | Lr --> 0.004 | Seconds_per_step --> 3.382 |
1036
+ [2024-08-11 09:44:50,911][Main][INFO] - [train] Step 47750 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.776 | Lr --> 0.004 | Seconds_per_step --> 3.366 |
1037
+ [2024-08-11 09:47:37,616][Main][INFO] - [train] Step 47800 out of 80000 | Loss --> 1.862 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.762 | Lr --> 0.004 | Seconds_per_step --> 3.334 |
1038
+ [2024-08-11 09:50:26,867][Main][INFO] - [train] Step 47850 out of 80000 | Loss --> 1.864 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.746 | Lr --> 0.003 | Seconds_per_step --> 3.385 |
1039
+ [2024-08-11 09:53:16,815][Main][INFO] - [train] Step 47900 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.723 | Lr --> 0.003 | Seconds_per_step --> 3.399 |
1040
+ [2024-08-11 09:56:06,163][Main][INFO] - [train] Step 47950 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.696 | Lr --> 0.003 | Seconds_per_step --> 3.387 |
1041
+ [2024-08-11 09:58:56,126][Main][INFO] - [train] Step 48000 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.680 | Lr --> 0.003 | Seconds_per_step --> 3.399 |
1042
+ [2024-08-11 10:01:46,482][Main][INFO] - [train] Step 48050 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.658 | Lr --> 0.003 | Seconds_per_step --> 3.407 |
1043
+ [2024-08-11 10:04:36,355][Main][INFO] - [train] Step 48100 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.643 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
1044
+ [2024-08-11 10:07:26,163][Main][INFO] - [train] Step 48150 out of 80000 | Loss --> 1.863 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.618 | Lr --> 0.003 | Seconds_per_step --> 3.396 |
1045
+ [2024-08-11 10:10:15,109][Main][INFO] - [train] Step 48200 out of 80000 | Loss --> 1.857 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.598 | Lr --> 0.003 | Seconds_per_step --> 3.379 |
1046
+ [2024-08-11 10:13:04,283][Main][INFO] - [train] Step 48250 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.579 | Lr --> 0.003 | Seconds_per_step --> 3.383 |
1047
+ [2024-08-11 10:15:56,254][Main][INFO] - [train] Step 48300 out of 80000 | Loss --> 1.856 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.561 | Lr --> 0.003 | Seconds_per_step --> 3.439 |
1048
+ [2024-08-11 10:18:46,081][Main][INFO] - [train] Step 48350 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.531 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
1049
+ [2024-08-11 10:21:34,933][Main][INFO] - [train] Step 48400 out of 80000 | Loss --> 1.851 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.506 | Lr --> 0.003 | Seconds_per_step --> 3.377 |
1050
+ [2024-08-11 10:24:21,068][Main][INFO] - [train] Step 48450 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.483 | Lr --> 0.003 | Seconds_per_step --> 3.323 |
1051
+ [2024-08-11 10:27:09,253][Main][INFO] - [train] Step 48500 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.456 | Lr --> 0.003 | Seconds_per_step --> 3.364 |
1052
+ [2024-08-11 10:29:57,885][Main][INFO] - [train] Step 48550 out of 80000 | Loss --> 1.845 | Grad_l2 --> 0.303 | Weights_l2 --> 9102.436 | Lr --> 0.003 | Seconds_per_step --> 3.373 |
1053
+ [2024-08-11 10:32:48,240][Main][INFO] - [train] Step 48600 out of 80000 | Loss --> 1.846 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.391 | Lr --> 0.003 | Seconds_per_step --> 3.407 |
1054
+ [2024-08-11 10:35:37,262][Main][INFO] - [train] Step 48650 out of 80000 | Loss --> 1.827 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.374 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
1055
+ [2024-08-11 10:38:26,196][Main][INFO] - [train] Step 48700 out of 80000 | Loss --> 1.840 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.333 | Lr --> 0.003 | Seconds_per_step --> 3.379 |
1056
+ [2024-08-11 10:41:14,907][Main][INFO] - [train] Step 48750 out of 80000 | Loss --> 1.835 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.312 | Lr --> 0.003 | Seconds_per_step --> 3.374 |
1057
+ [2024-08-11 10:44:04,940][Main][INFO] - [train] Step 48800 out of 80000 | Loss --> 1.830 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.293 | Lr --> 0.003 | Seconds_per_step --> 3.401 |
1058
+ [2024-08-11 10:46:53,292][Main][INFO] - [train] Step 48850 out of 80000 | Loss --> 1.825 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.255 | Lr --> 0.003 | Seconds_per_step --> 3.367 |
1059
+ [2024-08-11 10:49:42,903][Main][INFO] - [train] Step 48900 out of 80000 | Loss --> 1.823 | Grad_l2 --> 0.302 | Weights_l2 --> 9102.212 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
1060
+ [2024-08-11 10:52:33,141][Main][INFO] - [train] Step 48950 out of 80000 | Loss --> 1.821 | Grad_l2 --> 0.299 | Weights_l2 --> 9102.190 | Lr --> 0.003 | Seconds_per_step --> 3.405 |
1061
+ [2024-08-11 10:55:23,302][Main][INFO] - [train] Step 49000 out of 80000 | Loss --> 1.821 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.157 | Lr --> 0.003 | Seconds_per_step --> 3.403 |
1062
+ [2024-08-11 10:58:12,503][Main][INFO] - [train] Step 49050 out of 80000 | Loss --> 1.823 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.124 | Lr --> 0.003 | Seconds_per_step --> 3.384 |
1063
+ [2024-08-11 11:01:02,175][Main][INFO] - [train] Step 49100 out of 80000 | Loss --> 1.828 | Grad_l2 --> 0.300 | Weights_l2 --> 9102.096 | Lr --> 0.003 | Seconds_per_step --> 3.393 |
1064
+ [2024-08-11 11:03:51,767][Main][INFO] - [train] Step 49150 out of 80000 | Loss --> 1.834 | Grad_l2 --> 0.304 | Weights_l2 --> 9102.049 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
1065
+ [2024-08-11 11:06:42,443][Main][INFO] - [train] Step 49200 out of 80000 | Loss --> 1.830 | Grad_l2 --> 0.301 | Weights_l2 --> 9102.013 | Lr --> 0.003 | Seconds_per_step --> 3.414 |
1066
+ [2024-08-11 11:09:31,684][Main][INFO] - [train] Step 49250 out of 80000 | Loss --> 1.833 | Grad_l2 --> 0.302 | Weights_l2 --> 9101.972 | Lr --> 0.003 | Seconds_per_step --> 3.385 |
1067
+ [2024-08-11 11:12:20,327][Main][INFO] - [train] Step 49300 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.928 | Lr --> 0.003 | Seconds_per_step --> 3.373 |
1068
+ [2024-08-11 11:15:09,324][Main][INFO] - [train] Step 49350 out of 80000 | Loss --> 1.846 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.889 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
1069
+ [2024-08-11 11:18:00,030][Main][INFO] - [train] Step 49400 out of 80000 | Loss --> 1.838 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.850 | Lr --> 0.003 | Seconds_per_step --> 3.414 |
1070
+ [2024-08-11 11:20:49,651][Main][INFO] - [train] Step 49450 out of 80000 | Loss --> 1.835 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.804 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
1071
+ [2024-08-11 11:23:37,817][Main][INFO] - [train] Step 49500 out of 80000 | Loss --> 1.843 | Grad_l2 --> 0.302 | Weights_l2 --> 9101.766 | Lr --> 0.003 | Seconds_per_step --> 3.363 |
1072
+ [2024-08-11 11:26:26,286][Main][INFO] - [train] Step 49550 out of 80000 | Loss --> 1.842 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.726 | Lr --> 0.003 | Seconds_per_step --> 3.369 |
1073
+ [2024-08-11 11:29:16,394][Main][INFO] - [train] Step 49600 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.673 | Lr --> 0.003 | Seconds_per_step --> 3.402 |
1074
+ [2024-08-11 11:32:04,951][Main][INFO] - [train] Step 49650 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.301 | Weights_l2 --> 9101.629 | Lr --> 0.003 | Seconds_per_step --> 3.371 |
1075
+ [2024-08-11 11:34:54,813][Main][INFO] - [train] Step 49700 out of 80000 | Loss --> 1.852 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.585 | Lr --> 0.003 | Seconds_per_step --> 3.397 |
1076
+ [2024-08-11 11:37:44,289][Main][INFO] - [train] Step 49750 out of 80000 | Loss --> 1.855 | Grad_l2 --> 0.306 | Weights_l2 --> 9101.539 | Lr --> 0.003 | Seconds_per_step --> 3.390 |
1077
+ [2024-08-11 11:40:33,889][Main][INFO] - [train] Step 49800 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.493 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
1078
+ [2024-08-11 11:43:23,944][Main][INFO] - [train] Step 49850 out of 80000 | Loss --> 1.841 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.449 | Lr --> 0.003 | Seconds_per_step --> 3.401 |
1079
+ [2024-08-11 11:46:13,525][Main][INFO] - [train] Step 49900 out of 80000 | Loss --> 1.850 | Grad_l2 --> 0.303 | Weights_l2 --> 9101.408 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
1080
+ [2024-08-11 11:49:03,443][Main][INFO] - [train] Step 49950 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.359 | Lr --> 0.003 | Seconds_per_step --> 3.398 |
1081
+ [2024-08-11 11:51:53,359][Main][INFO] - [train] Step 50000 out of 80000 | Loss --> 1.849 | Grad_l2 --> 0.306 | Weights_l2 --> 9101.317 | Lr --> 0.003 | Seconds_per_step --> 3.398 |
1082
+ [2024-08-11 11:51:53,359][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-50000
1083
+ [2024-08-11 11:51:53,362][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
1084
+ [2024-08-11 11:51:55,493][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-50000/model.safetensors
1085
+ [2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-50000/optimizer.bin
1086
+ [2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-50000/scheduler.bin
1087
+ [2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-50000/sampler.bin
1088
+ [2024-08-11 11:51:58,610][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-50000/sampler_1.bin
1089
+ [2024-08-11 11:51:58,611][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-50000/random_states_0.pkl
1090
+ [2024-08-11 11:54:48,709][Main][INFO] - [train] Step 50050 out of 80000 | Loss --> 1.848 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.280 | Lr --> 0.003 | Seconds_per_step --> 3.507 |
1091
+ [2024-08-11 11:57:38,118][Main][INFO] - [train] Step 50100 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.305 | Weights_l2 --> 9101.232 | Lr --> 0.003 | Seconds_per_step --> 3.388 |
1092
+ [2024-08-11 12:00:27,137][Main][INFO] - [train] Step 50150 out of 80000 | Loss --> 1.858 | Grad_l2 --> 0.305 | Weights_l2 --> 9101.189 | Lr --> 0.003 | Seconds_per_step --> 3.380 |
1093
+ [2024-08-11 12:03:16,714][Main][INFO] - [train] Step 50200 out of 80000 | Loss --> 1.853 | Grad_l2 --> 0.304 | Weights_l2 --> 9101.138 | Lr --> 0.003 | Seconds_per_step --> 3.392 |
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -917,3 +917,89 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
917
  "2024-08-11 07:54:48,582",45800,1.861,0.301,9102.628,0.004,3.376
918
  "2024-08-11 07:57:37,200",45850,1.856,0.304,9102.643,0.004,3.372
919
  "2024-08-11 08:00:26,936",45900,1.857,0.302,9102.666,0.004,3.395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
  "2024-08-11 07:54:48,582",45800,1.861,0.301,9102.628,0.004,3.376
918
  "2024-08-11 07:57:37,200",45850,1.856,0.304,9102.643,0.004,3.372
919
  "2024-08-11 08:00:26,936",45900,1.857,0.302,9102.666,0.004,3.395
920
+ "2024-08-11 08:03:16,131",45950,1.859,0.305,9102.691,0.004,3.384
921
+ "2024-08-11 08:06:04,985",46000,1.857,0.302,9102.708,0.004,3.377
922
+ "2024-08-11 08:08:53,975",46050,1.847,0.304,9102.731,0.004,3.38
923
+ "2024-08-11 08:11:43,887",46100,1.857,0.302,9102.751,0.004,3.398
924
+ "2024-08-11 08:14:32,925",46150,1.854,0.3,9102.767,0.004,3.381
925
+ "2024-08-11 08:17:21,782",46200,1.851,0.302,9102.776,0.004,3.377
926
+ "2024-08-11 08:20:11,070",46250,1.85,0.302,9102.791,0.004,3.386
927
+ "2024-08-11 08:23:00,985",46300,1.85,0.302,9102.809,0.004,3.398
928
+ "2024-08-11 08:25:48,354",46350,1.851,0.302,9102.822,0.004,3.347
929
+ "2024-08-11 08:28:37,651",46400,1.848,0.3,9102.831,0.004,3.386
930
+ "2024-08-11 08:31:27,592",46450,1.854,0.299,9102.837,0.004,3.399
931
+ "2024-08-11 08:34:17,583",46500,1.841,0.301,9102.85,0.004,3.4
932
+ "2024-08-11 08:37:06,886",46550,1.849,0.302,9102.858,0.004,3.386
933
+ "2024-08-11 08:39:56,545",46600,1.845,0.303,9102.855,0.004,3.393
934
+ "2024-08-11 08:42:45,502",46650,1.845,0.301,9102.857,0.004,3.379
935
+ "2024-08-11 08:45:34,652",46700,1.858,0.3,9102.866,0.004,3.383
936
+ "2024-08-11 08:48:24,024",46750,1.857,0.3,9102.869,0.004,3.387
937
+ "2024-08-11 08:51:12,848",46800,1.856,0.304,9102.865,0.004,3.376
938
+ "2024-08-11 08:54:02,438",46850,1.852,0.302,9102.867,0.004,3.392
939
+ "2024-08-11 08:56:51,614",46900,1.858,0.301,9102.872,0.004,3.384
940
+ "2024-08-11 08:59:41,654",46950,1.861,0.306,9102.882,0.004,3.401
941
+ "2024-08-11 09:02:30,942",47000,1.866,0.304,9102.872,0.004,3.386
942
+ "2024-08-11 09:05:19,246",47050,1.868,0.301,9102.874,0.004,3.366
943
+ "2024-08-11 09:08:08,518",47100,1.86,0.303,9102.877,0.004,3.385
944
+ "2024-08-11 09:10:58,772",47150,1.87,0.301,9102.867,0.004,3.405
945
+ "2024-08-11 09:13:49,754",47200,1.853,0.302,9102.854,0.004,3.42
946
+ "2024-08-11 09:16:39,033",47250,1.867,0.303,9102.846,0.004,3.386
947
+ "2024-08-11 09:19:28,134",47300,1.863,0.303,9102.85,0.004,3.382
948
+ "2024-08-11 09:22:18,500",47350,1.862,0.303,9102.833,0.004,3.407
949
+ "2024-08-11 09:25:07,928",47400,1.865,0.3,9102.834,0.004,3.389
950
+ "2024-08-11 09:27:57,487",47450,1.86,0.302,9102.833,0.004,3.391
951
+ "2024-08-11 09:30:46,725",47500,1.865,0.302,9102.813,0.004,3.385
952
+ "2024-08-11 09:33:36,752",47550,1.862,0.303,9102.811,0.004,3.401
953
+ "2024-08-11 09:36:24,674",47600,1.86,0.302,9102.806,0.004,3.358
954
+ "2024-08-11 09:39:13,509",47650,1.856,0.301,9102.793,0.004,3.377
955
+ "2024-08-11 09:42:02,601",47700,1.862,0.302,9102.78,0.004,3.382
956
+ "2024-08-11 09:44:50,911",47750,1.857,0.302,9102.776,0.004,3.366
957
+ "2024-08-11 09:47:37,616",47800,1.862,0.303,9102.762,0.004,3.334
958
+ "2024-08-11 09:50:26,867",47850,1.864,0.303,9102.746,0.003,3.385
959
+ "2024-08-11 09:53:16,815",47900,1.851,0.302,9102.723,0.003,3.399
960
+ "2024-08-11 09:56:06,163",47950,1.855,0.3,9102.696,0.003,3.387
961
+ "2024-08-11 09:58:56,126",48000,1.852,0.301,9102.68,0.003,3.399
962
+ "2024-08-11 10:01:46,482",48050,1.858,0.301,9102.658,0.003,3.407
963
+ "2024-08-11 10:04:36,355",48100,1.855,0.299,9102.643,0.003,3.397
964
+ "2024-08-11 10:07:26,163",48150,1.863,0.302,9102.618,0.003,3.396
965
+ "2024-08-11 10:10:15,109",48200,1.857,0.303,9102.598,0.003,3.379
966
+ "2024-08-11 10:13:04,283",48250,1.852,0.302,9102.579,0.003,3.383
967
+ "2024-08-11 10:15:56,254",48300,1.856,0.304,9102.561,0.003,3.439
968
+ "2024-08-11 10:18:46,081",48350,1.849,0.3,9102.531,0.003,3.397
969
+ "2024-08-11 10:21:34,933",48400,1.851,0.303,9102.506,0.003,3.377
970
+ "2024-08-11 10:24:21,068",48450,1.852,0.301,9102.483,0.003,3.323
971
+ "2024-08-11 10:27:09,253",48500,1.842,0.303,9102.456,0.003,3.364
972
+ "2024-08-11 10:29:57,885",48550,1.845,0.303,9102.436,0.003,3.373
973
+ "2024-08-11 10:32:48,240",48600,1.846,0.301,9102.391,0.003,3.407
974
+ "2024-08-11 10:35:37,262",48650,1.827,0.301,9102.374,0.003,3.38
975
+ "2024-08-11 10:38:26,196",48700,1.84,0.3,9102.333,0.003,3.379
976
+ "2024-08-11 10:41:14,907",48750,1.835,0.302,9102.312,0.003,3.374
977
+ "2024-08-11 10:44:04,940",48800,1.83,0.299,9102.293,0.003,3.401
978
+ "2024-08-11 10:46:53,292",48850,1.825,0.301,9102.255,0.003,3.367
979
+ "2024-08-11 10:49:42,903",48900,1.823,0.302,9102.212,0.003,3.392
980
+ "2024-08-11 10:52:33,141",48950,1.821,0.299,9102.19,0.003,3.405
981
+ "2024-08-11 10:55:23,302",49000,1.821,0.301,9102.157,0.003,3.403
982
+ "2024-08-11 10:58:12,503",49050,1.823,0.301,9102.124,0.003,3.384
983
+ "2024-08-11 11:01:02,175",49100,1.828,0.3,9102.096,0.003,3.393
984
+ "2024-08-11 11:03:51,767",49150,1.834,0.304,9102.049,0.003,3.392
985
+ "2024-08-11 11:06:42,443",49200,1.83,0.301,9102.013,0.003,3.414
986
+ "2024-08-11 11:09:31,684",49250,1.833,0.302,9101.972,0.003,3.385
987
+ "2024-08-11 11:12:20,327",49300,1.842,0.303,9101.928,0.003,3.373
988
+ "2024-08-11 11:15:09,324",49350,1.846,0.304,9101.889,0.003,3.38
989
+ "2024-08-11 11:18:00,030",49400,1.838,0.304,9101.85,0.003,3.414
990
+ "2024-08-11 11:20:49,651",49450,1.835,0.303,9101.804,0.003,3.392
991
+ "2024-08-11 11:23:37,817",49500,1.843,0.302,9101.766,0.003,3.363
992
+ "2024-08-11 11:26:26,286",49550,1.842,0.304,9101.726,0.003,3.369
993
+ "2024-08-11 11:29:16,394",49600,1.848,0.304,9101.673,0.003,3.402
994
+ "2024-08-11 11:32:04,951",49650,1.841,0.301,9101.629,0.003,3.371
995
+ "2024-08-11 11:34:54,813",49700,1.852,0.303,9101.585,0.003,3.397
996
+ "2024-08-11 11:37:44,289",49750,1.855,0.306,9101.539,0.003,3.39
997
+ "2024-08-11 11:40:33,889",49800,1.849,0.304,9101.493,0.003,3.392
998
+ "2024-08-11 11:43:23,944",49850,1.841,0.304,9101.449,0.003,3.401
999
+ "2024-08-11 11:46:13,525",49900,1.85,0.303,9101.408,0.003,3.392
1000
+ "2024-08-11 11:49:03,443",49950,1.849,0.304,9101.359,0.003,3.398
1001
+ "2024-08-11 11:51:53,359",50000,1.849,0.306,9101.317,0.003,3.398
1002
+ "2024-08-11 11:54:48,709",50050,1.848,0.304,9101.28,0.003,3.507
1003
+ "2024-08-11 11:57:38,118",50100,1.853,0.305,9101.232,0.003,3.388
1004
+ "2024-08-11 12:00:27,137",50150,1.858,0.305,9101.189,0.003,3.38
1005
+ "2024-08-11 12:03:16,714",50200,1.853,0.304,9101.138,0.003,3.392
checkpoints/weights_l2_over_steps.png CHANGED