pszemraj commited on
Commit
39bce54
·
verified ·
1 Parent(s): 0364a6e

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-60000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e3a6fa014436dd41f74392fb503453c1797f1703d6582360226237a064b9f7
3
+ size 1202681712
checkpoints/checkpoint-pt-60000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -1225,3 +1225,87 @@ Mixed precision type: bf16
1225
  [2024-08-11 19:03:49,099][Main][INFO] - [train] Step 56400 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.329 | Lr --> 0.002 | Seconds_per_step --> 4.804 |
1226
  [2024-08-11 19:08:07,847][Main][INFO] - [train] Step 56450 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.234 | Lr --> 0.002 | Seconds_per_step --> 5.175 |
1227
  [2024-08-11 19:12:12,785][Main][INFO] - [train] Step 56500 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.130 | Lr --> 0.002 | Seconds_per_step --> 4.899 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1225
  [2024-08-11 19:03:49,099][Main][INFO] - [train] Step 56400 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.329 | Lr --> 0.002 | Seconds_per_step --> 4.804 |
1226
  [2024-08-11 19:08:07,847][Main][INFO] - [train] Step 56450 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.309 | Weights_l2 --> 9091.234 | Lr --> 0.002 | Seconds_per_step --> 5.175 |
1227
  [2024-08-11 19:12:12,785][Main][INFO] - [train] Step 56500 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.310 | Weights_l2 --> 9091.130 | Lr --> 0.002 | Seconds_per_step --> 4.899 |
1228
+ [2024-08-11 19:16:07,111][Main][INFO] - [train] Step 56550 out of 80000 | Loss --> 1.809 | Grad_l2 --> 0.307 | Weights_l2 --> 9091.031 | Lr --> 0.002 | Seconds_per_step --> 4.687 |
1229
+ [2024-08-11 19:20:17,900][Main][INFO] - [train] Step 56600 out of 80000 | Loss --> 1.807 | Grad_l2 --> 0.306 | Weights_l2 --> 9090.943 | Lr --> 0.002 | Seconds_per_step --> 5.016 |
1230
+ [2024-08-11 19:24:29,336][Main][INFO] - [train] Step 56650 out of 80000 | Loss --> 1.816 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.840 | Lr --> 0.002 | Seconds_per_step --> 5.029 |
1231
+ [2024-08-11 19:28:33,570][Main][INFO] - [train] Step 56700 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.311 | Weights_l2 --> 9090.737 | Lr --> 0.002 | Seconds_per_step --> 4.885 |
1232
+ [2024-08-11 19:32:34,870][Main][INFO] - [train] Step 56750 out of 80000 | Loss --> 1.807 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.642 | Lr --> 0.002 | Seconds_per_step --> 4.826 |
1233
+ [2024-08-11 19:36:48,798][Main][INFO] - [train] Step 56800 out of 80000 | Loss --> 1.806 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.549 | Lr --> 0.002 | Seconds_per_step --> 5.079 |
1234
+ [2024-08-11 19:40:53,609][Main][INFO] - [train] Step 56850 out of 80000 | Loss --> 1.799 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.450 | Lr --> 0.002 | Seconds_per_step --> 4.896 |
1235
+ [2024-08-11 19:44:48,784][Main][INFO] - [train] Step 56900 out of 80000 | Loss --> 1.803 | Grad_l2 --> 0.309 | Weights_l2 --> 9090.349 | Lr --> 0.002 | Seconds_per_step --> 4.703 |
1236
+ [2024-08-11 19:48:55,965][Main][INFO] - [train] Step 56950 out of 80000 | Loss --> 1.799 | Grad_l2 --> 0.307 | Weights_l2 --> 9090.256 | Lr --> 0.002 | Seconds_per_step --> 4.944 |
1237
+ [2024-08-11 19:53:02,054][Main][INFO] - [train] Step 57000 out of 80000 | Loss --> 1.797 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.160 | Lr --> 0.002 | Seconds_per_step --> 4.922 |
1238
+ [2024-08-11 19:56:59,854][Main][INFO] - [train] Step 57050 out of 80000 | Loss --> 1.795 | Grad_l2 --> 0.308 | Weights_l2 --> 9090.065 | Lr --> 0.002 | Seconds_per_step --> 4.756 |
1239
+ [2024-08-11 19:57:13,264][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 425286f4-04eb-4af4-9171-eff7b1e97f3d)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
1240
+ [2024-08-11 19:57:13,265][huggingface_hub.utils._http][WARNING] - Retrying in 1s [Retry 1/5].
1241
+ [2024-08-11 19:57:24,310][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 66d3c9a6-7e72-41be-9ff4-83977d484f23)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
1242
+ [2024-08-11 19:57:24,313][huggingface_hub.utils._http][WARNING] - Retrying in 2s [Retry 2/5].
1243
+ [2024-08-11 19:57:36,430][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 1856b455-849b-45df-b1c0-271375bee1dd)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00193-of-00234.parquet
1244
+ [2024-08-11 19:57:36,433][huggingface_hub.utils._http][WARNING] - Retrying in 4s [Retry 3/5].
1245
+ [2024-08-11 20:01:49,199][Main][INFO] - [train] Step 57100 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.959 | Lr --> 0.002 | Seconds_per_step --> 5.787 |
1246
+ [2024-08-11 20:05:56,117][Main][INFO] - [train] Step 57150 out of 80000 | Loss --> 1.792 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.859 | Lr --> 0.002 | Seconds_per_step --> 4.938 |
1247
+ [2024-08-11 20:09:54,672][Main][INFO] - [train] Step 57200 out of 80000 | Loss --> 1.787 | Grad_l2 --> 0.305 | Weights_l2 --> 9089.765 | Lr --> 0.002 | Seconds_per_step --> 4.771 |
1248
+ [2024-08-11 20:13:52,764][Main][INFO] - [train] Step 57250 out of 80000 | Loss --> 1.804 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.666 | Lr --> 0.002 | Seconds_per_step --> 4.762 |
1249
+ [2024-08-11 20:17:56,117][Main][INFO] - [train] Step 57300 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.561 | Lr --> 0.002 | Seconds_per_step --> 4.867 |
1250
+ [2024-08-11 20:21:39,065][Main][INFO] - [train] Step 57350 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.458 | Lr --> 0.002 | Seconds_per_step --> 4.459 |
1251
+ [2024-08-11 20:25:23,468][Main][INFO] - [train] Step 57400 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.306 | Weights_l2 --> 9089.348 | Lr --> 0.002 | Seconds_per_step --> 4.488 |
1252
+ [2024-08-11 20:29:16,922][Main][INFO] - [train] Step 57450 out of 80000 | Loss --> 1.790 | Grad_l2 --> 0.308 | Weights_l2 --> 9089.251 | Lr --> 0.002 | Seconds_per_step --> 4.669 |
1253
+ [2024-08-11 20:33:07,082][Main][INFO] - [train] Step 57500 out of 80000 | Loss --> 1.788 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.152 | Lr --> 0.002 | Seconds_per_step --> 4.603 |
1254
+ [2024-08-11 20:36:55,672][Main][INFO] - [train] Step 57550 out of 80000 | Loss --> 1.784 | Grad_l2 --> 0.307 | Weights_l2 --> 9089.054 | Lr --> 0.002 | Seconds_per_step --> 4.572 |
1255
+ [2024-08-11 20:40:43,035][Main][INFO] - [train] Step 57600 out of 80000 | Loss --> 1.782 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.953 | Lr --> 0.002 | Seconds_per_step --> 4.547 |
1256
+ [2024-08-11 20:44:33,501][Main][INFO] - [train] Step 57650 out of 80000 | Loss --> 1.786 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.842 | Lr --> 0.002 | Seconds_per_step --> 4.609 |
1257
+ [2024-08-11 20:48:25,676][Main][INFO] - [train] Step 57700 out of 80000 | Loss --> 1.779 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.733 | Lr --> 0.002 | Seconds_per_step --> 4.643 |
1258
+ [2024-08-11 20:52:15,588][Main][INFO] - [train] Step 57750 out of 80000 | Loss --> 1.781 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.639 | Lr --> 0.002 | Seconds_per_step --> 4.598 |
1259
+ [2024-08-11 20:56:06,157][Main][INFO] - [train] Step 57800 out of 80000 | Loss --> 1.778 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.536 | Lr --> 0.002 | Seconds_per_step --> 4.611 |
1260
+ [2024-08-11 20:59:53,337][Main][INFO] - [train] Step 57850 out of 80000 | Loss --> 1.776 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.436 | Lr --> 0.002 | Seconds_per_step --> 4.544 |
1261
+ [2024-08-11 21:03:44,489][Main][INFO] - [train] Step 57900 out of 80000 | Loss --> 1.778 | Grad_l2 --> 0.309 | Weights_l2 --> 9088.328 | Lr --> 0.002 | Seconds_per_step --> 4.623 |
1262
+ [2024-08-11 21:07:36,703][Main][INFO] - [train] Step 57950 out of 80000 | Loss --> 1.780 | Grad_l2 --> 0.307 | Weights_l2 --> 9088.214 | Lr --> 0.002 | Seconds_per_step --> 4.644 |
1263
+ [2024-08-11 21:11:29,888][Main][INFO] - [train] Step 58000 out of 80000 | Loss --> 1.781 | Grad_l2 --> 0.308 | Weights_l2 --> 9088.110 | Lr --> 0.002 | Seconds_per_step --> 4.664 |
1264
+ [2024-08-11 21:15:15,006][Main][INFO] - [train] Step 58050 out of 80000 | Loss --> 1.767 | Grad_l2 --> 0.309 | Weights_l2 --> 9088.006 | Lr --> 0.002 | Seconds_per_step --> 4.502 |
1265
+ [2024-08-11 21:19:01,376][Main][INFO] - [train] Step 58100 out of 80000 | Loss --> 1.774 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.903 | Lr --> 0.002 | Seconds_per_step --> 4.527 |
1266
+ [2024-08-11 21:22:51,140][Main][INFO] - [train] Step 58150 out of 80000 | Loss --> 1.777 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.793 | Lr --> 0.002 | Seconds_per_step --> 4.595 |
1267
+ [2024-08-11 21:26:35,859][Main][INFO] - [train] Step 58200 out of 80000 | Loss --> 1.775 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.692 | Lr --> 0.002 | Seconds_per_step --> 4.494 |
1268
+ [2024-08-11 21:30:24,002][Main][INFO] - [train] Step 58250 out of 80000 | Loss --> 1.771 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.588 | Lr --> 0.002 | Seconds_per_step --> 4.563 |
1269
+ [2024-08-11 21:34:15,810][Main][INFO] - [train] Step 58300 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.486 | Lr --> 0.002 | Seconds_per_step --> 4.636 |
1270
+ [2024-08-11 21:38:04,254][Main][INFO] - [train] Step 58350 out of 80000 | Loss --> 1.770 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.387 | Lr --> 0.002 | Seconds_per_step --> 4.569 |
1271
+ [2024-08-11 21:41:45,046][Main][INFO] - [train] Step 58400 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.309 | Weights_l2 --> 9087.285 | Lr --> 0.002 | Seconds_per_step --> 4.416 |
1272
+ [2024-08-11 21:45:29,763][Main][INFO] - [train] Step 58450 out of 80000 | Loss --> 1.762 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.180 | Lr --> 0.002 | Seconds_per_step --> 4.494 |
1273
+ [2024-08-11 21:49:16,119][Main][INFO] - [train] Step 58500 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.308 | Weights_l2 --> 9087.067 | Lr --> 0.002 | Seconds_per_step --> 4.527 |
1274
+ [2024-08-11 21:52:58,696][Main][INFO] - [train] Step 58550 out of 80000 | Loss --> 1.766 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.963 | Lr --> 0.002 | Seconds_per_step --> 4.452 |
1275
+ [2024-08-11 21:56:46,334][Main][INFO] - [train] Step 58600 out of 80000 | Loss --> 1.762 | Grad_l2 --> 0.310 | Weights_l2 --> 9086.868 | Lr --> 0.002 | Seconds_per_step --> 4.553 |
1276
+ [2024-08-11 22:00:27,399][Main][INFO] - [train] Step 58650 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.310 | Weights_l2 --> 9086.770 | Lr --> 0.002 | Seconds_per_step --> 4.421 |
1277
+ [2024-08-11 22:04:12,722][Main][INFO] - [train] Step 58700 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.307 | Weights_l2 --> 9086.661 | Lr --> 0.002 | Seconds_per_step --> 4.506 |
1278
+ [2024-08-11 22:08:00,160][Main][INFO] - [train] Step 58750 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.563 | Lr --> 0.002 | Seconds_per_step --> 4.549 |
1279
+ [2024-08-11 22:11:44,169][Main][INFO] - [train] Step 58800 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.309 | Weights_l2 --> 9086.458 | Lr --> 0.002 | Seconds_per_step --> 4.480 |
1280
+ [2024-08-11 22:15:28,355][Main][INFO] - [train] Step 58850 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.307 | Weights_l2 --> 9086.355 | Lr --> 0.002 | Seconds_per_step --> 4.484 |
1281
+ [2024-08-11 22:19:13,149][Main][INFO] - [train] Step 58900 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.253 | Lr --> 0.002 | Seconds_per_step --> 4.496 |
1282
+ [2024-08-11 22:22:54,103][Main][INFO] - [train] Step 58950 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.151 | Lr --> 0.002 | Seconds_per_step --> 4.419 |
1283
+ [2024-08-11 22:26:42,100][Main][INFO] - [train] Step 59000 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.308 | Weights_l2 --> 9086.051 | Lr --> 0.002 | Seconds_per_step --> 4.560 |
1284
+ [2024-08-11 22:30:30,714][Main][INFO] - [train] Step 59050 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.308 | Weights_l2 --> 9085.948 | Lr --> 0.002 | Seconds_per_step --> 4.572 |
1285
+ [2024-08-11 22:34:12,979][Main][INFO] - [train] Step 59100 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.851 | Lr --> 0.002 | Seconds_per_step --> 4.445 |
1286
+ [2024-08-11 22:38:00,619][Main][INFO] - [train] Step 59150 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.308 | Weights_l2 --> 9085.755 | Lr --> 0.002 | Seconds_per_step --> 4.553 |
1287
+ [2024-08-11 22:41:41,913][Main][INFO] - [train] Step 59200 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.647 | Lr --> 0.002 | Seconds_per_step --> 4.426 |
1288
+ [2024-08-11 22:45:34,811][Main][INFO] - [train] Step 59250 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.551 | Lr --> 0.002 | Seconds_per_step --> 4.658 |
1289
+ [2024-08-11 22:49:19,551][Main][INFO] - [train] Step 59300 out of 80000 | Loss --> 1.753 | Grad_l2 --> 0.309 | Weights_l2 --> 9085.452 | Lr --> 0.002 | Seconds_per_step --> 4.495 |
1290
+ [2024-08-11 22:53:00,772][Main][INFO] - [train] Step 59350 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.363 | Lr --> 0.002 | Seconds_per_step --> 4.424 |
1291
+ [2024-08-11 22:56:45,139][Main][INFO] - [train] Step 59400 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.266 | Lr --> 0.002 | Seconds_per_step --> 4.487 |
1292
+ [2024-08-11 23:00:34,173][Main][INFO] - [train] Step 59450 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.311 | Weights_l2 --> 9085.158 | Lr --> 0.002 | Seconds_per_step --> 4.581 |
1293
+ [2024-08-11 23:04:21,635][Main][INFO] - [train] Step 59500 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.310 | Weights_l2 --> 9085.065 | Lr --> 0.002 | Seconds_per_step --> 4.549 |
1294
+ [2024-08-11 23:08:03,486][Main][INFO] - [train] Step 59550 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.310 | Weights_l2 --> 9084.969 | Lr --> 0.002 | Seconds_per_step --> 4.437 |
1295
+ [2024-08-11 23:11:45,006][Main][INFO] - [train] Step 59600 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.313 | Weights_l2 --> 9084.871 | Lr --> 0.002 | Seconds_per_step --> 4.430 |
1296
+ [2024-08-11 23:15:23,509][Main][INFO] - [train] Step 59650 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.777 | Lr --> 0.002 | Seconds_per_step --> 4.370 |
1297
+ [2024-08-11 23:19:01,925][Main][INFO] - [train] Step 59700 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.680 | Lr --> 0.002 | Seconds_per_step --> 4.368 |
1298
+ [2024-08-11 23:22:43,911][Main][INFO] - [train] Step 59750 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.580 | Lr --> 0.002 | Seconds_per_step --> 4.440 |
1299
+ [2024-08-11 23:26:25,067][Main][INFO] - [train] Step 59800 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.489 | Lr --> 0.002 | Seconds_per_step --> 4.423 |
1300
+ [2024-08-11 23:30:03,875][Main][INFO] - [train] Step 59850 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.392 | Lr --> 0.002 | Seconds_per_step --> 4.376 |
1301
+ [2024-08-11 23:33:42,430][Main][INFO] - [train] Step 59900 out of 80000 | Loss --> 1.761 | Grad_l2 --> 0.312 | Weights_l2 --> 9084.295 | Lr --> 0.002 | Seconds_per_step --> 4.371 |
1302
+ [2024-08-11 23:37:30,256][Main][INFO] - [train] Step 59950 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.313 | Weights_l2 --> 9084.198 | Lr --> 0.002 | Seconds_per_step --> 4.556 |
1303
+ [2024-08-11 23:41:15,929][Main][INFO] - [train] Step 60000 out of 80000 | Loss --> 1.763 | Grad_l2 --> 0.311 | Weights_l2 --> 9084.104 | Lr --> 0.002 | Seconds_per_step --> 4.513 |
1304
+ [2024-08-11 23:41:15,929][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-60000
1305
+ [2024-08-11 23:41:15,933][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
1306
+ [2024-08-11 23:41:18,954][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-60000/model.safetensors
1307
+ [2024-08-11 23:41:22,600][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-60000/optimizer.bin
1308
+ [2024-08-11 23:41:22,600][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-60000/scheduler.bin
1309
+ [2024-08-11 23:41:22,601][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-60000/sampler.bin
1310
+ [2024-08-11 23:41:22,601][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-60000/sampler_1.bin
1311
+ [2024-08-11 23:41:22,602][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-60000/random_states_0.pkl
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -1129,3 +1129,73 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
1129
  "2024-08-11 19:03:49,099",56400,1.803,0.31,9091.329,0.002,4.804
1130
  "2024-08-11 19:08:07,847",56450,1.806,0.309,9091.234,0.002,5.175
1131
  "2024-08-11 19:12:12,785",56500,1.804,0.31,9091.13,0.002,4.899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1129
  "2024-08-11 19:03:49,099",56400,1.803,0.31,9091.329,0.002,4.804
1130
  "2024-08-11 19:08:07,847",56450,1.806,0.309,9091.234,0.002,5.175
1131
  "2024-08-11 19:12:12,785",56500,1.804,0.31,9091.13,0.002,4.899
1132
+ "2024-08-11 19:16:07,111",56550,1.809,0.307,9091.031,0.002,4.687
1133
+ "2024-08-11 19:20:17,900",56600,1.807,0.306,9090.943,0.002,5.016
1134
+ "2024-08-11 19:24:29,336",56650,1.816,0.307,9090.84,0.002,5.029
1135
+ "2024-08-11 19:28:33,570",56700,1.804,0.311,9090.737,0.002,4.885
1136
+ "2024-08-11 19:32:34,870",56750,1.807,0.308,9090.642,0.002,4.826
1137
+ "2024-08-11 19:36:48,798",56800,1.806,0.307,9090.549,0.002,5.079
1138
+ "2024-08-11 19:40:53,609",56850,1.799,0.308,9090.45,0.002,4.896
1139
+ "2024-08-11 19:44:48,784",56900,1.803,0.309,9090.349,0.002,4.703
1140
+ "2024-08-11 19:48:55,965",56950,1.799,0.307,9090.256,0.002,4.944
1141
+ "2024-08-11 19:53:02,054",57000,1.797,0.308,9090.16,0.002,4.922
1142
+ "2024-08-11 19:56:59,854",57050,1.795,0.308,9090.065,0.002,4.756
1143
+ "2024-08-11 20:01:49,199",57100,1.788,0.307,9089.959,0.002,5.787
1144
+ "2024-08-11 20:05:56,117",57150,1.792,0.308,9089.859,0.002,4.938
1145
+ "2024-08-11 20:09:54,672",57200,1.787,0.305,9089.765,0.002,4.771
1146
+ "2024-08-11 20:13:52,764",57250,1.804,0.307,9089.666,0.002,4.762
1147
+ "2024-08-11 20:17:56,117",57300,1.79,0.308,9089.561,0.002,4.867
1148
+ "2024-08-11 20:21:39,065",57350,1.788,0.307,9089.458,0.002,4.459
1149
+ "2024-08-11 20:25:23,468",57400,1.79,0.306,9089.348,0.002,4.488
1150
+ "2024-08-11 20:29:16,922",57450,1.79,0.308,9089.251,0.002,4.669
1151
+ "2024-08-11 20:33:07,082",57500,1.788,0.307,9089.152,0.002,4.603
1152
+ "2024-08-11 20:36:55,672",57550,1.784,0.307,9089.054,0.002,4.572
1153
+ "2024-08-11 20:40:43,035",57600,1.782,0.307,9088.953,0.002,4.547
1154
+ "2024-08-11 20:44:33,501",57650,1.786,0.307,9088.842,0.002,4.609
1155
+ "2024-08-11 20:48:25,676",57700,1.779,0.307,9088.733,0.002,4.643
1156
+ "2024-08-11 20:52:15,588",57750,1.781,0.307,9088.639,0.002,4.598
1157
+ "2024-08-11 20:56:06,157",57800,1.778,0.307,9088.536,0.002,4.611
1158
+ "2024-08-11 20:59:53,337",57850,1.776,0.307,9088.436,0.002,4.544
1159
+ "2024-08-11 21:03:44,489",57900,1.778,0.309,9088.328,0.002,4.623
1160
+ "2024-08-11 21:07:36,703",57950,1.78,0.307,9088.214,0.002,4.644
1161
+ "2024-08-11 21:11:29,888",58000,1.781,0.308,9088.11,0.002,4.664
1162
+ "2024-08-11 21:15:15,006",58050,1.767,0.309,9088.006,0.002,4.502
1163
+ "2024-08-11 21:19:01,376",58100,1.774,0.308,9087.903,0.002,4.527
1164
+ "2024-08-11 21:22:51,140",58150,1.777,0.309,9087.793,0.002,4.595
1165
+ "2024-08-11 21:26:35,859",58200,1.775,0.308,9087.692,0.002,4.494
1166
+ "2024-08-11 21:30:24,002",58250,1.771,0.309,9087.588,0.002,4.563
1167
+ "2024-08-11 21:34:15,810",58300,1.764,0.308,9087.486,0.002,4.636
1168
+ "2024-08-11 21:38:04,254",58350,1.77,0.309,9087.387,0.002,4.569
1169
+ "2024-08-11 21:41:45,046",58400,1.759,0.309,9087.285,0.002,4.416
1170
+ "2024-08-11 21:45:29,763",58450,1.762,0.308,9087.18,0.002,4.494
1171
+ "2024-08-11 21:49:16,119",58500,1.764,0.308,9087.067,0.002,4.527
1172
+ "2024-08-11 21:52:58,696",58550,1.766,0.308,9086.963,0.002,4.452
1173
+ "2024-08-11 21:56:46,334",58600,1.762,0.31,9086.868,0.002,4.553
1174
+ "2024-08-11 22:00:27,399",58650,1.755,0.31,9086.77,0.002,4.421
1175
+ "2024-08-11 22:04:12,722",58700,1.757,0.307,9086.661,0.002,4.506
1176
+ "2024-08-11 22:08:00,160",58750,1.751,0.308,9086.563,0.002,4.549
1177
+ "2024-08-11 22:11:44,169",58800,1.752,0.309,9086.458,0.002,4.48
1178
+ "2024-08-11 22:15:28,355",58850,1.743,0.307,9086.355,0.002,4.484
1179
+ "2024-08-11 22:19:13,149",58900,1.745,0.308,9086.253,0.002,4.496
1180
+ "2024-08-11 22:22:54,103",58950,1.743,0.308,9086.151,0.002,4.419
1181
+ "2024-08-11 22:26:42,100",59000,1.755,0.308,9086.051,0.002,4.56
1182
+ "2024-08-11 22:30:30,714",59050,1.749,0.308,9085.948,0.002,4.572
1183
+ "2024-08-11 22:34:12,979",59100,1.759,0.31,9085.851,0.002,4.445
1184
+ "2024-08-11 22:38:00,619",59150,1.752,0.308,9085.755,0.002,4.553
1185
+ "2024-08-11 22:41:41,913",59200,1.755,0.31,9085.647,0.002,4.426
1186
+ "2024-08-11 22:45:34,811",59250,1.759,0.31,9085.551,0.002,4.658
1187
+ "2024-08-11 22:49:19,551",59300,1.753,0.309,9085.452,0.002,4.495
1188
+ "2024-08-11 22:53:00,772",59350,1.752,0.311,9085.363,0.002,4.424
1189
+ "2024-08-11 22:56:45,139",59400,1.76,0.311,9085.266,0.002,4.487
1190
+ "2024-08-11 23:00:34,173",59450,1.757,0.311,9085.158,0.002,4.581
1191
+ "2024-08-11 23:04:21,635",59500,1.754,0.31,9085.065,0.002,4.549
1192
+ "2024-08-11 23:08:03,486",59550,1.749,0.31,9084.969,0.002,4.437
1193
+ "2024-08-11 23:11:45,006",59600,1.764,0.313,9084.871,0.002,4.43
1194
+ "2024-08-11 23:15:23,509",59650,1.757,0.311,9084.777,0.002,4.37
1195
+ "2024-08-11 23:19:01,925",59700,1.76,0.311,9084.68,0.002,4.368
1196
+ "2024-08-11 23:22:43,911",59750,1.755,0.311,9084.58,0.002,4.44
1197
+ "2024-08-11 23:26:25,067",59800,1.748,0.311,9084.489,0.002,4.423
1198
+ "2024-08-11 23:30:03,875",59850,1.749,0.311,9084.392,0.002,4.376
1199
+ "2024-08-11 23:33:42,430",59900,1.761,0.312,9084.295,0.002,4.371
1200
+ "2024-08-11 23:37:30,256",59950,1.749,0.313,9084.198,0.002,4.556
1201
+ "2024-08-11 23:41:15,929",60000,1.763,0.311,9084.104,0.002,4.513
checkpoints/weights_l2_over_steps.png CHANGED