Upload folder using huggingface_hub
Browse files- checkpoints/checkpoint-pt-60000/model.safetensors +3 -0
- checkpoints/checkpoint-pt-60000/random_states_0.pkl +3 -0
- checkpoints/grad_l2_over_steps.png +0 -0
- checkpoints/loss_over_steps.png +0 -0
- checkpoints/lr_over_steps.png +0 -0
- checkpoints/main.log +84 -0
- checkpoints/seconds_per_step_over_steps.png +0 -0
- checkpoints/training_metrics.csv +70 -0
- checkpoints/weights_l2_over_steps.png +0 -0
checkpoints/checkpoint-pt-60000/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41e3a6fa014436dd41f74392fb503453c1797f1703d6582360226237a064b9f7
|
3 |
+
size 1202681712
|
checkpoints/checkpoint-pt-60000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
|
3 |
+
size 14344
|
checkpoints/grad_l2_over_steps.png
CHANGED
checkpoints/loss_over_steps.png
CHANGED
checkpoints/lr_over_steps.png
CHANGED
checkpoints/main.log
CHANGED
@@ -1225,3 +1225,87 @@ Mixed precision type: bf16
|
|
1225 |
[2024-08-11 19:03:49,099][Main][INFO] - [train] Step 56400 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.329 | Lr --> 0.002 | Seconds_per_step --> 4.804 |
|
1226 |
[2024-08-11 19:08:07,847][Main][INFO] - [train] Step 56450 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.234 | Lr --> 0.002 | Seconds_per_step --> 5.175 |
|
1227 |
[2024-08-11 19:12:12,785][Main][INFO] - [train] Step 56500 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.130 | Lr --> 0.002 | Seconds_per_step --> 4.899 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1225 |
[2024-08-11 19:03:49,099][Main][INFO] - [train] Step 56400 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.329 | Lr --> 0.002 | Seconds_per_step --> 4.804 |
|
1226 |
[2024-08-11 19:08:07,847][Main][INFO] - [train] Step 56450 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.234 | Lr --> 0.002 | Seconds_per_step --> 5.175 |
|
1227 |
[2024-08-11 19:12:12,785][Main][INFO] - [train] Step 56500 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.130 | Lr --> 0.002 | Seconds_per_step --> 4.899 |
|
1228 |
+
[2024-08-11 19:16:07,111][Main][INFO] - [train] Step 56550 out of 80000 | Loss --> 1.809 | Grad_l2 --> 0.307 | Weights_l2 --> 9091.031 | Lr --> 0.002 | Seconds_per_step --> 4.687 |
|
1229 |
+
[2024-08-11 19:20:17,900][Main][INFO] - [train] Step 56600 out of 80000 | Loss --> 1.807 | Grad_l2 --> 0.306 | Weights_l2 --> 9090.943 | Lr --> 0.002 | Seconds_per_step --> 5.016 |
|
1230 |
+
[2024-08-11 19:24:29,336][Main][INFO] - [train] Step 56650 out of 80000 | Loss --> 1.816 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.840 | Lr --> 0.002 | Seconds_per_step --> 5.029 |
|
1231 |
+
[2024-08-11 19:28:33,570][Main][INFO] - [train] Step 56700 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.311 | Weights_l2 --> 9090.737 | Lr --> 0.002 | Seconds_per_step --> 4.885 |
|
1232 |
+
[2024-08-11 19:32:34,870][Main][INFO] - [train] Step 56750 out of 80000 | Loss --> 1.807 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.642 | Lr --> 0.002 | Seconds_per_step --> 4.826 |
|
1233 |
+
[2024-08-11 19:36:48,798][Main][INFO] - [train] Step 56800 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.549 | Lr --> 0.002 | Seconds_per_step --> 5.079 |
|
1234 |
+
[2024-08-11 19:40:53,609][Main][INFO] - [train] Step 56850 out of 80000 | Loss --> 1.799 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.450 | Lr --> 0.002 | Seconds_per_step --> 4.896 |
|
1235 |
+
[2024-08-11 19:44:48,784][Main][INFO] - [train] Step 56900 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.309 | Weights_l2 --> 9090.349 | Lr --> 0.002 | Seconds_per_step --> 4.703 |
|
1236 |
+
[2024-08-11 19:48:55,965][Main][INFO] - [train] Step 56950 out of 80000 | Loss --> 1.799 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.256 | Lr --> 0.002 | Seconds_per_step --> 4.944 |
|
1237 |
+
[2024-08-11 19:53:02,054][Main][INFO] - [train] Step 57000 out of 80000 | Loss --> 1.797 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.160 | Lr --> 0.002 | Seconds_per_step --> 4.922 |
|
1238 |
+
[2024-08-11 19:56:59,854][Main][INFO] - [train] Step 57050 out of 80000 | Loss --> 1.795 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.065 | Lr --> 0.002 | Seconds_per_step --> 4.756 |
|
1239 |
+
[2024-08-11 19:57:13,264][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 425286f4-04eb-4af4-9171-eff7b1e97f3d)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
|
1240 |
+
[2024-08-11 19:57:13,265][huggingface_hub.utils._http][WARNING] - Retrying in 1s [Retry 1/5].
|
1241 |
+
[2024-08-11 19:57:24,310][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 66d3c9a6-7e72-41be-9ff4-83977d484f23)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
|
1242 |
+
[2024-08-11 19:57:24,313][huggingface_hub.utils._http][WARNING] - Retrying in 2s [Retry 2/5].
|
1243 |
+
[2024-08-11 19:57:36,430][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 1856b455-849b-45df-b1c0-271375bee1dd)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
|
1244 |
+
[2024-08-11 19:57:36,433][huggingface_hub.utils._http][WARNING] - Retrying in 4s [Retry 3/5].
|
1245 |
+
[2024-08-11 20:01:49,199][Main][INFO] - [train] Step 57100 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.959 | Lr --> 0.002 | Seconds_per_step --> 5.787 |
|
1246 |
+
[2024-08-11 20:05:56,117][Main][INFO] - [train] Step 57150 out of 80000 | Loss --> 1.792 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.859 | Lr --> 0.002 | Seconds_per_step --> 4.938 |
|
1247 |
+
[2024-08-11 20:09:54,672][Main][INFO] - [train] Step 57200 out of 80000 | Loss --> 1.787 | Grad_l2 --> 0.305 | Weights_l2 --> 9089.765 | Lr --> 0.002 | Seconds_per_step --> 4.771 |
|
1248 |
+
[2024-08-11 20:13:52,764][Main][INFO] - [train] Step 57250 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.666 | Lr --> 0.002 | Seconds_per_step --> 4.762 |
|
1249 |
+
[2024-08-11 20:17:56,117][Main][INFO] - [train] Step 57300 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.561 | Lr --> 0.002 | Seconds_per_step --> 4.867 |
|
1250 |
+
[2024-08-11 20:21:39,065][Main][INFO] - [train] Step 57350 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.458 | Lr --> 0.002 | Seconds_per_step --> 4.459 |
|
1251 |
+
[2024-08-11 20:25:23,468][Main][INFO] - [train] Step 57400 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.306 | Weights_l2 --> 9089.348 | Lr --> 0.002 | Seconds_per_step --> 4.488 |
|
1252 |
+
[2024-08-11 20:29:16,922][Main][INFO] - [train] Step 57450 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.251 | Lr --> 0.002 | Seconds_per_step --> 4.669 |
|
1253 |
+
[2024-08-11 20:33:07,082][Main][INFO] - [train] Step 57500 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.152 | Lr --> 0.002 | Seconds_per_step --> 4.603 |
|
1254 |
+
[2024-08-11 20:36:55,672][Main][INFO] - [train] Step 57550 out of 80000 | Loss --> 1.784 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.054 | Lr --> 0.002 | Seconds_per_step --> 4.572 |
|
1255 |
+
[2024-08-11 20:40:43,035][Main][INFO] - [train] Step 57600 out of 80000 | Loss --> 1.782 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.953 | Lr --> 0.002 | Seconds_per_step --> 4.547 |
|
1256 |
+
[2024-08-11 20:44:33,501][Main][INFO] - [train] Step 57650 out of 80000 | Loss --> 1.786 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.842 | Lr --> 0.002 | Seconds_per_step --> 4.609 |
|
1257 |
+
[2024-08-11 20:48:25,676][Main][INFO] - [train] Step 57700 out of 80000 | Loss --> 1.779 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.733 | Lr --> 0.002 | Seconds_per_step --> 4.643 |
|
1258 |
+
[2024-08-11 20:52:15,588][Main][INFO] - [train] Step 57750 out of 80000 | Loss --> 1.781 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.639 | Lr --> 0.002 | Seconds_per_step --> 4.598 |
|
1259 |
+
[2024-08-11 20:56:06,157][Main][INFO] - [train] Step 57800 out of 80000 | Loss --> 1.778 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.536 | Lr --> 0.002 | Seconds_per_step --> 4.611 |
|
1260 |
+
[2024-08-11 20:59:53,337][Main][INFO] - [train] Step 57850 out of 80000 | Loss --> 1.776 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.436 | Lr --> 0.002 | Seconds_per_step --> 4.544 |
|
1261 |
+
[2024-08-11 21:03:44,489][Main][INFO] - [train] Step 57900 out of 80000 | Loss --> 1.778 | Grad_l2 --> 0.309 | Weights_l2 --> 9088.328 | Lr --> 0.002 | Seconds_per_step --> 4.623 |
|
1262 |
+
[2024-08-11 21:07:36,703][Main][INFO] - [train] Step 57950 out of 80000 | Loss --> 1.780 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.214 | Lr --> 0.002 | Seconds_per_step --> 4.644 |
|
1263 |
+
[2024-08-11 21:11:29,888][Main][INFO] - [train] Step 58000 out of 80000 | Loss --> 1.781 | Grad_l2 --> 0.308 | Weights_l2 --> 9088.110 | Lr --> 0.002 | Seconds_per_step --> 4.664 |
|
1264 |
+
[2024-08-11 21:15:15,006][Main][INFO] - [train] Step 58050 out of 80000 | Loss --> 1.767 | Grad_l2 --> 0.309 | Weights_l2 --> 9088.006 | Lr --> 0.002 | Seconds_per_step --> 4.502 |
|
1265 |
+
[2024-08-11 21:19:01,376][Main][INFO] - [train] Step 58100 out of 80000 | Loss --> 1.774 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.903 | Lr --> 0.002 | Seconds_per_step --> 4.527 |
|
1266 |
+
[2024-08-11 21:22:51,140][Main][INFO] - [train] Step 58150 out of 80000 | Loss --> 1.777 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.793 | Lr --> 0.002 | Seconds_per_step --> 4.595 |
|
1267 |
+
[2024-08-11 21:26:35,859][Main][INFO] - [train] Step 58200 out of 80000 | Loss --> 1.775 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.692 | Lr --> 0.002 | Seconds_per_step --> 4.494 |
|
1268 |
+
[2024-08-11 21:30:24,002][Main][INFO] - [train] Step 58250 out of 80000 | Loss --> 1.771 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.588 | Lr --> 0.002 | Seconds_per_step --> 4.563 |
|
1269 |
+
[2024-08-11 21:34:15,810][Main][INFO] - [train] Step 58300 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.486 | Lr --> 0.002 | Seconds_per_step --> 4.636 |
|
1270 |
+
[2024-08-11 21:38:04,254][Main][INFO] - [train] Step 58350 out of 80000 | Loss --> 1.770 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.387 | Lr --> 0.002 | Seconds_per_step --> 4.569 |
|
1271 |
+
[2024-08-11 21:41:45,046][Main][INFO] - [train] Step 58400 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.285 | Lr --> 0.002 | Seconds_per_step --> 4.416 |
|
1272 |
+
[2024-08-11 21:45:29,763][Main][INFO] - [train] Step 58450 out of 80000 | Loss --> 1.762 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.180 | Lr --> 0.002 | Seconds_per_step --> 4.494 |
|
1273 |
+
[2024-08-11 21:49:16,119][Main][INFO] - [train] Step 58500 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.067 | Lr --> 0.002 | Seconds_per_step --> 4.527 |
|
1274 |
+
[2024-08-11 21:52:58,696][Main][INFO] - [train] Step 58550 out of 80000 | Loss --> 1.766 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.963 | Lr --> 0.002 | Seconds_per_step --> 4.452 |
|
1275 |
+
[2024-08-11 21:56:46,334][Main][INFO] - [train] Step 58600 out of 80000 | Loss --> 1.762 | Grad_l2 --> 0.310 | Weights_l2 --> 9086.868 | Lr --> 0.002 | Seconds_per_step --> 4.553 |
|
1276 |
+
[2024-08-11 22:00:27,399][Main][INFO] - [train] Step 58650 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.310 | Weights_l2 --> 9086.770 | Lr --> 0.002 | Seconds_per_step --> 4.421 |
|
1277 |
+
[2024-08-11 22:04:12,722][Main][INFO] - [train] Step 58700 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.307 | Weights_l2 --> 9086.661 | Lr --> 0.002 | Seconds_per_step --> 4.506 |
|
1278 |
+
[2024-08-11 22:08:00,160][Main][INFO] - [train] Step 58750 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.563 | Lr --> 0.002 | Seconds_per_step --> 4.549 |
|
1279 |
+
[2024-08-11 22:11:44,169][Main][INFO] - [train] Step 58800 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.309 | Weights_l2 --> 9086.458 | Lr --> 0.002 | Seconds_per_step --> 4.480 |
|
1280 |
+
[2024-08-11 22:15:28,355][Main][INFO] - [train] Step 58850 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.307 | Weights_l2 --> 9086.355 | Lr --> 0.002 | Seconds_per_step --> 4.484 |
|
1281 |
+
[2024-08-11 22:19:13,149][Main][INFO] - [train] Step 58900 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.253 | Lr --> 0.002 | Seconds_per_step --> 4.496 |
|
1282 |
+
[2024-08-11 22:22:54,103][Main][INFO] - [train] Step 58950 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.151 | Lr --> 0.002 | Seconds_per_step --> 4.419 |
|
1283 |
+
[2024-08-11 22:26:42,100][Main][INFO] - [train] Step 59000 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.051 | Lr --> 0.002 | Seconds_per_step --> 4.560 |
|
1284 |
+
[2024-08-11 22:30:30,714][Main][INFO] - [train] Step 59050 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.308 | Weights_l2 --> 9085.948 | Lr --> 0.002 | Seconds_per_step --> 4.572 |
|
1285 |
+
[2024-08-11 22:34:12,979][Main][INFO] - [train] Step 59100 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.851 | Lr --> 0.002 | Seconds_per_step --> 4.445 |
|
1286 |
+
[2024-08-11 22:38:00,619][Main][INFO] - [train] Step 59150 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.308 | Weights_l2 --> 9085.755 | Lr --> 0.002 | Seconds_per_step --> 4.553 |
|
1287 |
+
[2024-08-11 22:41:41,913][Main][INFO] - [train] Step 59200 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.647 | Lr --> 0.002 | Seconds_per_step --> 4.426 |
|
1288 |
+
[2024-08-11 22:45:34,811][Main][INFO] - [train] Step 59250 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.551 | Lr --> 0.002 | Seconds_per_step --> 4.658 |
|
1289 |
+
[2024-08-11 22:49:19,551][Main][INFO] - [train] Step 59300 out of 80000 | Loss --> 1.753 | Grad_l2 --> 0.309 | Weights_l2 --> 9085.452 | Lr --> 0.002 | Seconds_per_step --> 4.495 |
|
1290 |
+
[2024-08-11 22:53:00,772][Main][INFO] - [train] Step 59350 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.363 | Lr --> 0.002 | Seconds_per_step --> 4.424 |
|
1291 |
+
[2024-08-11 22:56:45,139][Main][INFO] - [train] Step 59400 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.266 | Lr --> 0.002 | Seconds_per_step --> 4.487 |
|
1292 |
+
[2024-08-11 23:00:34,173][Main][INFO] - [train] Step 59450 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.158 | Lr --> 0.002 | Seconds_per_step --> 4.581 |
|
1293 |
+
[2024-08-11 23:04:21,635][Main][INFO] - [train] Step 59500 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.065 | Lr --> 0.002 | Seconds_per_step --> 4.549 |
|
1294 |
+
[2024-08-11 23:08:03,486][Main][INFO] - [train] Step 59550 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.310 | Weights_l2 --> 9084.969 | Lr --> 0.002 | Seconds_per_step --> 4.437 |
|
1295 |
+
[2024-08-11 23:11:45,006][Main][INFO] - [train] Step 59600 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.313 | Weights_l2 --> 9084.871 | Lr --> 0.002 | Seconds_per_step --> 4.430 |
|
1296 |
+
[2024-08-11 23:15:23,509][Main][INFO] - [train] Step 59650 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.777 | Lr --> 0.002 | Seconds_per_step --> 4.370 |
|
1297 |
+
[2024-08-11 23:19:01,925][Main][INFO] - [train] Step 59700 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.680 | Lr --> 0.002 | Seconds_per_step --> 4.368 |
|
1298 |
+
[2024-08-11 23:22:43,911][Main][INFO] - [train] Step 59750 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.580 | Lr --> 0.002 | Seconds_per_step --> 4.440 |
|
1299 |
+
[2024-08-11 23:26:25,067][Main][INFO] - [train] Step 59800 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.489 | Lr --> 0.002 | Seconds_per_step --> 4.423 |
|
1300 |
+
[2024-08-11 23:30:03,875][Main][INFO] - [train] Step 59850 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.392 | Lr --> 0.002 | Seconds_per_step --> 4.376 |
|
1301 |
+
[2024-08-11 23:33:42,430][Main][INFO] - [train] Step 59900 out of 80000 | Loss --> 1.761 | Grad_l2 --> 0.312 | Weights_l2 --> 9084.295 | Lr --> 0.002 | Seconds_per_step --> 4.371 |
|
1302 |
+
[2024-08-11 23:37:30,256][Main][INFO] - [train] Step 59950 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.313 | Weights_l2 --> 9084.198 | Lr --> 0.002 | Seconds_per_step --> 4.556 |
|
1303 |
+
[2024-08-11 23:41:15,929][Main][INFO] - [train] Step 60000 out of 80000 | Loss --> 1.763 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.104 | Lr --> 0.002 | Seconds_per_step --> 4.513 |
|
1304 |
+
[2024-08-11 23:41:15,929][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-60000
|
1305 |
+
[2024-08-11 23:41:15,933][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
1306 |
+
[2024-08-11 23:41:18,954][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-60000/model.safetensors
|
1307 |
+
[2024-08-11 23:41:22,600][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-60000/optimizer.bin
|
1308 |
+
[2024-08-11 23:41:22,600][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-60000/scheduler.bin
|
1309 |
+
[2024-08-11 23:41:22,601][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-60000/sampler.bin
|
1310 |
+
[2024-08-11 23:41:22,601][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-60000/sampler_1.bin
|
1311 |
+
[2024-08-11 23:41:22,602][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-60000/random_states_0.pkl
|
checkpoints/seconds_per_step_over_steps.png
CHANGED
checkpoints/training_metrics.csv
CHANGED
@@ -1129,3 +1129,73 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
|
|
1129 |
"2024-08-11 19:03:49,099",56400,1.803,0.31,9091.329,0.002,4.804
|
1130 |
"2024-08-11 19:08:07,847",56450,1.806,0.309,9091.234,0.002,5.175
|
1131 |
"2024-08-11 19:12:12,785",56500,1.804,0.31,9091.13,0.002,4.899
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1129 |
"2024-08-11 19:03:49,099",56400,1.803,0.31,9091.329,0.002,4.804
|
1130 |
"2024-08-11 19:08:07,847",56450,1.806,0.309,9091.234,0.002,5.175
|
1131 |
"2024-08-11 19:12:12,785",56500,1.804,0.31,9091.13,0.002,4.899
|
1132 |
+
"2024-08-11 19:16:07,111",56550,1.809,0.307,9091.031,0.002,4.687
|
1133 |
+
"2024-08-11 19:20:17,900",56600,1.807,0.306,9090.943,0.002,5.016
|
1134 |
+
"2024-08-11 19:24:29,336",56650,1.816,0.307,9090.84,0.002,5.029
|
1135 |
+
"2024-08-11 19:28:33,570",56700,1.804,0.311,9090.737,0.002,4.885
|
1136 |
+
"2024-08-11 19:32:34,870",56750,1.807,0.308,9090.642,0.002,4.826
|
1137 |
+
"2024-08-11 19:36:48,798",56800,1.806,0.307,9090.549,0.002,5.079
|
1138 |
+
"2024-08-11 19:40:53,609",56850,1.799,0.308,9090.45,0.002,4.896
|
1139 |
+
"2024-08-11 19:44:48,784",56900,1.803,0.309,9090.349,0.002,4.703
|
1140 |
+
"2024-08-11 19:48:55,965",56950,1.799,0.307,9090.256,0.002,4.944
|
1141 |
+
"2024-08-11 19:53:02,054",57000,1.797,0.308,9090.16,0.002,4.922
|
1142 |
+
"2024-08-11 19:56:59,854",57050,1.795,0.308,9090.065,0.002,4.756
|
1143 |
+
"2024-08-11 20:01:49,199",57100,1.788,0.307,9089.959,0.002,5.787
|
1144 |
+
"2024-08-11 20:05:56,117",57150,1.792,0.308,9089.859,0.002,4.938
|
1145 |
+
"2024-08-11 20:09:54,672",57200,1.787,0.305,9089.765,0.002,4.771
|
1146 |
+
"2024-08-11 20:13:52,764",57250,1.804,0.307,9089.666,0.002,4.762
|
1147 |
+
"2024-08-11 20:17:56,117",57300,1.79,0.308,9089.561,0.002,4.867
|
1148 |
+
"2024-08-11 20:21:39,065",57350,1.788,0.307,9089.458,0.002,4.459
|
1149 |
+
"2024-08-11 20:25:23,468",57400,1.79,0.306,9089.348,0.002,4.488
|
1150 |
+
"2024-08-11 20:29:16,922",57450,1.79,0.308,9089.251,0.002,4.669
|
1151 |
+
"2024-08-11 20:33:07,082",57500,1.788,0.307,9089.152,0.002,4.603
|
1152 |
+
"2024-08-11 20:36:55,672",57550,1.784,0.307,9089.054,0.002,4.572
|
1153 |
+
"2024-08-11 20:40:43,035",57600,1.782,0.307,9088.953,0.002,4.547
|
1154 |
+
"2024-08-11 20:44:33,501",57650,1.786,0.307,9088.842,0.002,4.609
|
1155 |
+
"2024-08-11 20:48:25,676",57700,1.779,0.307,9088.733,0.002,4.643
|
1156 |
+
"2024-08-11 20:52:15,588",57750,1.781,0.307,9088.639,0.002,4.598
|
1157 |
+
"2024-08-11 20:56:06,157",57800,1.778,0.307,9088.536,0.002,4.611
|
1158 |
+
"2024-08-11 20:59:53,337",57850,1.776,0.307,9088.436,0.002,4.544
|
1159 |
+
"2024-08-11 21:03:44,489",57900,1.778,0.309,9088.328,0.002,4.623
|
1160 |
+
"2024-08-11 21:07:36,703",57950,1.78,0.307,9088.214,0.002,4.644
|
1161 |
+
"2024-08-11 21:11:29,888",58000,1.781,0.308,9088.11,0.002,4.664
|
1162 |
+
"2024-08-11 21:15:15,006",58050,1.767,0.309,9088.006,0.002,4.502
|
1163 |
+
"2024-08-11 21:19:01,376",58100,1.774,0.308,9087.903,0.002,4.527
|
1164 |
+
"2024-08-11 21:22:51,140",58150,1.777,0.309,9087.793,0.002,4.595
|
1165 |
+
"2024-08-11 21:26:35,859",58200,1.775,0.308,9087.692,0.002,4.494
|
1166 |
+
"2024-08-11 21:30:24,002",58250,1.771,0.309,9087.588,0.002,4.563
|
1167 |
+
"2024-08-11 21:34:15,810",58300,1.764,0.308,9087.486,0.002,4.636
|
1168 |
+
"2024-08-11 21:38:04,254",58350,1.77,0.309,9087.387,0.002,4.569
|
1169 |
+
"2024-08-11 21:41:45,046",58400,1.759,0.309,9087.285,0.002,4.416
|
1170 |
+
"2024-08-11 21:45:29,763",58450,1.762,0.308,9087.18,0.002,4.494
|
1171 |
+
"2024-08-11 21:49:16,119",58500,1.764,0.308,9087.067,0.002,4.527
|
1172 |
+
"2024-08-11 21:52:58,696",58550,1.766,0.308,9086.963,0.002,4.452
|
1173 |
+
"2024-08-11 21:56:46,334",58600,1.762,0.31,9086.868,0.002,4.553
|
1174 |
+
"2024-08-11 22:00:27,399",58650,1.755,0.31,9086.77,0.002,4.421
|
1175 |
+
"2024-08-11 22:04:12,722",58700,1.757,0.307,9086.661,0.002,4.506
|
1176 |
+
"2024-08-11 22:08:00,160",58750,1.751,0.308,9086.563,0.002,4.549
|
1177 |
+
"2024-08-11 22:11:44,169",58800,1.752,0.309,9086.458,0.002,4.48
|
1178 |
+
"2024-08-11 22:15:28,355",58850,1.743,0.307,9086.355,0.002,4.484
|
1179 |
+
"2024-08-11 22:19:13,149",58900,1.745,0.308,9086.253,0.002,4.496
|
1180 |
+
"2024-08-11 22:22:54,103",58950,1.743,0.308,9086.151,0.002,4.419
|
1181 |
+
"2024-08-11 22:26:42,100",59000,1.755,0.308,9086.051,0.002,4.56
|
1182 |
+
"2024-08-11 22:30:30,714",59050,1.749,0.308,9085.948,0.002,4.572
|
1183 |
+
"2024-08-11 22:34:12,979",59100,1.759,0.31,9085.851,0.002,4.445
|
1184 |
+
"2024-08-11 22:38:00,619",59150,1.752,0.308,9085.755,0.002,4.553
|
1185 |
+
"2024-08-11 22:41:41,913",59200,1.755,0.31,9085.647,0.002,4.426
|
1186 |
+
"2024-08-11 22:45:34,811",59250,1.759,0.31,9085.551,0.002,4.658
|
1187 |
+
"2024-08-11 22:49:19,551",59300,1.753,0.309,9085.452,0.002,4.495
|
1188 |
+
"2024-08-11 22:53:00,772",59350,1.752,0.311,9085.363,0.002,4.424
|
1189 |
+
"2024-08-11 22:56:45,139",59400,1.76,0.311,9085.266,0.002,4.487
|
1190 |
+
"2024-08-11 23:00:34,173",59450,1.757,0.311,9085.158,0.002,4.581
|
1191 |
+
"2024-08-11 23:04:21,635",59500,1.754,0.31,9085.065,0.002,4.549
|
1192 |
+
"2024-08-11 23:08:03,486",59550,1.749,0.31,9084.969,0.002,4.437
|
1193 |
+
"2024-08-11 23:11:45,006",59600,1.764,0.313,9084.871,0.002,4.43
|
1194 |
+
"2024-08-11 23:15:23,509",59650,1.757,0.311,9084.777,0.002,4.37
|
1195 |
+
"2024-08-11 23:19:01,925",59700,1.76,0.311,9084.68,0.002,4.368
|
1196 |
+
"2024-08-11 23:22:43,911",59750,1.755,0.311,9084.58,0.002,4.44
|
1197 |
+
"2024-08-11 23:26:25,067",59800,1.748,0.311,9084.489,0.002,4.423
|
1198 |
+
"2024-08-11 23:30:03,875",59850,1.749,0.311,9084.392,0.002,4.376
|
1199 |
+
"2024-08-11 23:33:42,430",59900,1.761,0.312,9084.295,0.002,4.371
|
1200 |
+
"2024-08-11 23:37:30,256",59950,1.749,0.313,9084.198,0.002,4.556
|
1201 |
+
"2024-08-11 23:41:15,929",60000,1.763,0.311,9084.104,0.002,4.513
|
checkpoints/weights_l2_over_steps.png
CHANGED