Upload folder using huggingface_hub
Browse files
checkpoints/grad_l2_over_steps.png
CHANGED
![]() |
![]() |
checkpoints/loss_over_steps.png
CHANGED
![]() |
![]() |
checkpoints/lr_over_steps.png
CHANGED
![]() |
![]() |
checkpoints/main.log
CHANGED
@@ -1359,3 +1359,52 @@ Mixed precision type: bf16
|
|
1359 |
[2024-08-12 02:37:37,466][Main][INFO] - [train] Step 62400 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.775 | Lr --> 0.001 | Seconds_per_step --> 4.425 |
|
1360 |
[2024-08-12 02:41:13,861][Main][INFO] - [train] Step 62450 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.691 | Lr --> 0.001 | Seconds_per_step --> 4.328 |
|
1361 |
[2024-08-12 02:44:48,098][Main][INFO] - [train] Step 62500 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.316 | Weights_l2 --> 9079.610 | Lr --> 0.001 | Seconds_per_step --> 4.285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1359 |
[2024-08-12 02:37:37,466][Main][INFO] - [train] Step 62400 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.775 | Lr --> 0.001 | Seconds_per_step --> 4.425 |
|
1360 |
[2024-08-12 02:41:13,861][Main][INFO] - [train] Step 62450 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.691 | Lr --> 0.001 | Seconds_per_step --> 4.328 |
|
1361 |
[2024-08-12 02:44:48,098][Main][INFO] - [train] Step 62500 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.316 | Weights_l2 --> 9079.610 | Lr --> 0.001 | Seconds_per_step --> 4.285 |
|
1362 |
+
[2024-08-12 02:48:27,232][Main][INFO] - [train] Step 62550 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.317 | Weights_l2 --> 9079.519 | Lr --> 0.001 | Seconds_per_step --> 4.383 |
|
1363 |
+
[2024-08-12 02:52:03,675][Main][INFO] - [train] Step 62600 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.317 | Weights_l2 --> 9079.435 | Lr --> 0.001 | Seconds_per_step --> 4.329 |
|
1364 |
+
[2024-08-12 02:55:36,853][Main][INFO] - [train] Step 62650 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.318 | Weights_l2 --> 9079.354 | Lr --> 0.001 | Seconds_per_step --> 4.264 |
|
1365 |
+
[2024-08-12 02:59:13,236][Main][INFO] - [train] Step 62700 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.317 | Weights_l2 --> 9079.273 | Lr --> 0.001 | Seconds_per_step --> 4.328 |
|
1366 |
+
[2024-08-12 03:02:51,873][Main][INFO] - [train] Step 62750 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.316 | Weights_l2 --> 9079.186 | Lr --> 0.001 | Seconds_per_step --> 4.373 |
|
1367 |
+
[2024-08-12 03:06:17,555][Main][INFO] - [train] Step 62800 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.316 | Weights_l2 --> 9079.101 | Lr --> 0.001 | Seconds_per_step --> 4.114 |
|
1368 |
+
[2024-08-12 03:09:51,048][Main][INFO] - [train] Step 62850 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.317 | Weights_l2 --> 9079.013 | Lr --> 0.001 | Seconds_per_step --> 4.270 |
|
1369 |
+
[2024-08-12 03:13:26,624][Main][INFO] - [train] Step 62900 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.929 | Lr --> 0.001 | Seconds_per_step --> 4.311 |
|
1370 |
+
[2024-08-12 03:16:59,358][Main][INFO] - [train] Step 62950 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.316 | Weights_l2 --> 9078.845 | Lr --> 0.001 | Seconds_per_step --> 4.255 |
|
1371 |
+
[2024-08-12 03:20:28,856][Main][INFO] - [train] Step 63000 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.761 | Lr --> 0.001 | Seconds_per_step --> 4.190 |
|
1372 |
+
[2024-08-12 03:24:04,310][Main][INFO] - [train] Step 63050 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.679 | Lr --> 0.001 | Seconds_per_step --> 4.309 |
|
1373 |
+
[2024-08-12 03:27:41,598][Main][INFO] - [train] Step 63100 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.319 | Weights_l2 --> 9078.598 | Lr --> 0.001 | Seconds_per_step --> 4.346 |
|
1374 |
+
[2024-08-12 03:31:16,710][Main][INFO] - [train] Step 63150 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.515 | Lr --> 0.001 | Seconds_per_step --> 4.302 |
|
1375 |
+
[2024-08-12 03:34:45,673][Main][INFO] - [train] Step 63200 out of 80000 | Loss --> 1.763 | Grad_l2 --> 0.323 | Weights_l2 --> 9078.436 | Lr --> 0.001 | Seconds_per_step --> 4.179 |
|
1376 |
+
[2024-08-12 03:38:25,800][Main][INFO] - [train] Step 63250 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.355 | Lr --> 0.001 | Seconds_per_step --> 4.403 |
|
1377 |
+
[2024-08-12 03:41:57,982][Main][INFO] - [train] Step 63300 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.275 | Lr --> 0.001 | Seconds_per_step --> 4.244 |
|
1378 |
+
[2024-08-12 03:45:31,669][Main][INFO] - [train] Step 63350 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.316 | Weights_l2 --> 9078.195 | Lr --> 0.001 | Seconds_per_step --> 4.274 |
|
1379 |
+
[2024-08-12 03:48:59,956][Main][INFO] - [train] Step 63400 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.318 | Weights_l2 --> 9078.112 | Lr --> 0.001 | Seconds_per_step --> 4.166 |
|
1380 |
+
[2024-08-12 03:52:32,360][Main][INFO] - [train] Step 63450 out of 80000 | Loss --> 1.767 | Grad_l2 --> 0.321 | Weights_l2 --> 9078.029 | Lr --> 0.001 | Seconds_per_step --> 4.248 |
|
1381 |
+
[2024-08-12 03:56:03,506][Main][INFO] - [train] Step 63500 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.322 | Weights_l2 --> 9077.949 | Lr --> 0.001 | Seconds_per_step --> 4.223 |
|
1382 |
+
[2024-08-12 03:59:28,970][Main][INFO] - [train] Step 63550 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.320 | Weights_l2 --> 9077.869 | Lr --> 0.001 | Seconds_per_step --> 4.109 |
|
1383 |
+
[2024-08-12 04:02:55,079][Main][INFO] - [train] Step 63600 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.317 | Weights_l2 --> 9077.787 | Lr --> 0.001 | Seconds_per_step --> 4.122 |
|
1384 |
+
[2024-08-12 04:06:21,350][Main][INFO] - [train] Step 63650 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.321 | Weights_l2 --> 9077.704 | Lr --> 0.001 | Seconds_per_step --> 4.125 |
|
1385 |
+
[2024-08-12 04:09:46,832][Main][INFO] - [train] Step 63700 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.620 | Lr --> 0.001 | Seconds_per_step --> 4.110 |
|
1386 |
+
[2024-08-12 04:13:18,253][Main][INFO] - [train] Step 63750 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.542 | Lr --> 0.001 | Seconds_per_step --> 4.228 |
|
1387 |
+
[2024-08-12 04:16:46,689][Main][INFO] - [train] Step 63800 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.320 | Weights_l2 --> 9077.465 | Lr --> 0.001 | Seconds_per_step --> 4.169 |
|
1388 |
+
[2024-08-12 04:20:17,181][Main][INFO] - [train] Step 63850 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.385 | Lr --> 0.001 | Seconds_per_step --> 4.210 |
|
1389 |
+
[2024-08-12 04:23:44,511][Main][INFO] - [train] Step 63900 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.320 | Weights_l2 --> 9077.307 | Lr --> 0.001 | Seconds_per_step --> 4.147 |
|
1390 |
+
[2024-08-12 04:27:16,494][Main][INFO] - [train] Step 63950 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.231 | Lr --> 0.001 | Seconds_per_step --> 4.240 |
|
1391 |
+
[2024-08-12 04:30:45,204][Main][INFO] - [train] Step 64000 out of 80000 | Loss --> 1.735 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.153 | Lr --> 0.001 | Seconds_per_step --> 4.174 |
|
1392 |
+
[2024-08-12 04:34:13,330][Main][INFO] - [train] Step 64050 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.319 | Weights_l2 --> 9077.076 | Lr --> 0.001 | Seconds_per_step --> 4.163 |
|
1393 |
+
[2024-08-12 04:37:45,142][Main][INFO] - [train] Step 64100 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.996 | Lr --> 0.001 | Seconds_per_step --> 4.236 |
|
1394 |
+
[2024-08-12 04:41:18,674][Main][INFO] - [train] Step 64150 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.921 | Lr --> 0.001 | Seconds_per_step --> 4.271 |
|
1395 |
+
[2024-08-12 04:44:48,178][Main][INFO] - [train] Step 64200 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.843 | Lr --> 0.001 | Seconds_per_step --> 4.190 |
|
1396 |
+
[2024-08-12 04:48:19,842][Main][INFO] - [train] Step 64250 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.321 | Weights_l2 --> 9076.764 | Lr --> 0.001 | Seconds_per_step --> 4.233 |
|
1397 |
+
[2024-08-12 04:51:52,645][Main][INFO] - [train] Step 64300 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.690 | Lr --> 0.001 | Seconds_per_step --> 4.256 |
|
1398 |
+
[2024-08-12 04:55:21,007][Main][INFO] - [train] Step 64350 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.321 | Weights_l2 --> 9076.614 | Lr --> 0.001 | Seconds_per_step --> 4.167 |
|
1399 |
+
[2024-08-12 04:58:49,316][Main][INFO] - [train] Step 64400 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.320 | Weights_l2 --> 9076.537 | Lr --> 0.001 | Seconds_per_step --> 4.166 |
|
1400 |
+
[2024-08-12 05:02:19,749][Main][INFO] - [train] Step 64450 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.321 | Weights_l2 --> 9076.460 | Lr --> 0.001 | Seconds_per_step --> 4.209 |
|
1401 |
+
[2024-08-12 05:05:46,590][Main][INFO] - [train] Step 64500 out of 80000 | Loss --> 1.731 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.389 | Lr --> 0.001 | Seconds_per_step --> 4.137 |
|
1402 |
+
[2024-08-12 05:09:15,338][Main][INFO] - [train] Step 64550 out of 80000 | Loss --> 1.733 | Grad_l2 --> 0.323 | Weights_l2 --> 9076.316 | Lr --> 0.001 | Seconds_per_step --> 4.175 |
|
1403 |
+
[2024-08-12 05:12:48,312][Main][INFO] - [train] Step 64600 out of 80000 | Loss --> 1.734 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.241 | Lr --> 0.001 | Seconds_per_step --> 4.259 |
|
1404 |
+
[2024-08-12 05:16:15,861][Main][INFO] - [train] Step 64650 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.324 | Weights_l2 --> 9076.163 | Lr --> 0.001 | Seconds_per_step --> 4.151 |
|
1405 |
+
[2024-08-12 05:19:47,700][Main][INFO] - [train] Step 64700 out of 80000 | Loss --> 1.731 | Grad_l2 --> 0.322 | Weights_l2 --> 9076.093 | Lr --> 0.001 | Seconds_per_step --> 4.237 |
|
1406 |
+
[2024-08-12 05:23:19,101][Main][INFO] - [train] Step 64750 out of 80000 | Loss --> 1.732 | Grad_l2 --> 0.324 | Weights_l2 --> 9076.017 | Lr --> 0.001 | Seconds_per_step --> 4.228 |
|
1407 |
+
[2024-08-12 05:26:53,820][Main][INFO] - [train] Step 64800 out of 80000 | Loss --> 1.730 | Grad_l2 --> 0.323 | Weights_l2 --> 9075.943 | Lr --> 0.001 | Seconds_per_step --> 4.294 |
|
1408 |
+
[2024-08-12 05:30:25,765][Main][INFO] - [train] Step 64850 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.323 | Weights_l2 --> 9075.868 | Lr --> 0.001 | Seconds_per_step --> 4.239 |
|
1409 |
+
[2024-08-12 05:33:57,856][Main][INFO] - [train] Step 64900 out of 80000 | Loss --> 1.729 | Grad_l2 --> 0.322 | Weights_l2 --> 9075.794 | Lr --> 0.001 | Seconds_per_step --> 4.242 |
|
1410 |
+
[2024-08-12 05:37:34,949][Main][INFO] - [train] Step 64950 out of 80000 | Loss --> 1.727 | Grad_l2 --> 0.324 | Weights_l2 --> 9075.723 | Lr --> 0.001 | Seconds_per_step --> 4.342 |
|
checkpoints/seconds_per_step_over_steps.png
CHANGED
![]() |
![]() |
checkpoints/training_metrics.csv
CHANGED
@@ -1249,3 +1249,49 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
|
|
1249 |
"2024-08-12 02:37:37,466",62400,1.758,0.318,9079.775,0.001,4.425
|
1250 |
"2024-08-12 02:41:13,861",62450,1.756,0.318,9079.691,0.001,4.328
|
1251 |
"2024-08-12 02:44:48,098",62500,1.754,0.316,9079.61,0.001,4.285
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1249 |
"2024-08-12 02:37:37,466",62400,1.758,0.318,9079.775,0.001,4.425
|
1250 |
"2024-08-12 02:41:13,861",62450,1.756,0.318,9079.691,0.001,4.328
|
1251 |
"2024-08-12 02:44:48,098",62500,1.754,0.316,9079.61,0.001,4.285
|
1252 |
+
"2024-08-12 02:48:27,232",62550,1.756,0.317,9079.519,0.001,4.383
|
1253 |
+
"2024-08-12 02:52:03,675",62600,1.751,0.317,9079.435,0.001,4.329
|
1254 |
+
"2024-08-12 02:55:36,853",62650,1.744,0.318,9079.354,0.001,4.264
|
1255 |
+
"2024-08-12 02:59:13,236",62700,1.755,0.317,9079.273,0.001,4.328
|
1256 |
+
"2024-08-12 03:02:51,873",62750,1.752,0.316,9079.186,0.001,4.373
|
1257 |
+
"2024-08-12 03:06:17,555",62800,1.756,0.316,9079.101,0.001,4.114
|
1258 |
+
"2024-08-12 03:09:51,048",62850,1.744,0.317,9079.013,0.001,4.27
|
1259 |
+
"2024-08-12 03:13:26,624",62900,1.747,0.318,9078.929,0.001,4.311
|
1260 |
+
"2024-08-12 03:16:59,358",62950,1.74,0.316,9078.845,0.001,4.255
|
1261 |
+
"2024-08-12 03:20:28,856",63000,1.749,0.318,9078.761,0.001,4.19
|
1262 |
+
"2024-08-12 03:24:04,310",63050,1.755,0.318,9078.679,0.001,4.309
|
1263 |
+
"2024-08-12 03:27:41,598",63100,1.755,0.319,9078.598,0.001,4.346
|
1264 |
+
"2024-08-12 03:31:16,710",63150,1.75,0.318,9078.515,0.001,4.302
|
1265 |
+
"2024-08-12 03:34:45,673",63200,1.763,0.323,9078.436,0.001,4.179
|
1266 |
+
"2024-08-12 03:38:25,800",63250,1.752,0.318,9078.355,0.001,4.403
|
1267 |
+
"2024-08-12 03:41:57,982",63300,1.76,0.321,9078.275,0.001,4.244
|
1268 |
+
"2024-08-12 03:45:31,669",63350,1.754,0.316,9078.195,0.001,4.274
|
1269 |
+
"2024-08-12 03:48:59,956",63400,1.758,0.318,9078.112,0.001,4.166
|
1270 |
+
"2024-08-12 03:52:32,360",63450,1.767,0.321,9078.029,0.001,4.248
|
1271 |
+
"2024-08-12 03:56:03,506",63500,1.752,0.322,9077.949,0.001,4.223
|
1272 |
+
"2024-08-12 03:59:28,970",63550,1.756,0.32,9077.869,0.001,4.109
|
1273 |
+
"2024-08-12 04:02:55,079",63600,1.754,0.317,9077.787,0.001,4.122
|
1274 |
+
"2024-08-12 04:06:21,350",63650,1.748,0.321,9077.704,0.001,4.125
|
1275 |
+
"2024-08-12 04:09:46,832",63700,1.744,0.319,9077.62,0.001,4.11
|
1276 |
+
"2024-08-12 04:13:18,253",63750,1.755,0.319,9077.542,0.001,4.228
|
1277 |
+
"2024-08-12 04:16:46,689",63800,1.748,0.32,9077.465,0.001,4.169
|
1278 |
+
"2024-08-12 04:20:17,181",63850,1.742,0.319,9077.385,0.001,4.21
|
1279 |
+
"2024-08-12 04:23:44,511",63900,1.751,0.32,9077.307,0.001,4.147
|
1280 |
+
"2024-08-12 04:27:16,494",63950,1.736,0.319,9077.231,0.001,4.24
|
1281 |
+
"2024-08-12 04:30:45,204",64000,1.735,0.319,9077.153,0.001,4.174
|
1282 |
+
"2024-08-12 04:34:13,330",64050,1.745,0.319,9077.076,0.001,4.163
|
1283 |
+
"2024-08-12 04:37:45,142",64100,1.738,0.322,9076.996,0.001,4.236
|
1284 |
+
"2024-08-12 04:41:18,674",64150,1.744,0.322,9076.921,0.001,4.271
|
1285 |
+
"2024-08-12 04:44:48,178",64200,1.744,0.322,9076.843,0.001,4.19
|
1286 |
+
"2024-08-12 04:48:19,842",64250,1.744,0.321,9076.764,0.001,4.233
|
1287 |
+
"2024-08-12 04:51:52,645",64300,1.745,0.322,9076.69,0.001,4.256
|
1288 |
+
"2024-08-12 04:55:21,007",64350,1.742,0.321,9076.614,0.001,4.167
|
1289 |
+
"2024-08-12 04:58:49,316",64400,1.726,0.32,9076.537,0.001,4.166
|
1290 |
+
"2024-08-12 05:02:19,749",64450,1.738,0.321,9076.46,0.001,4.209
|
1291 |
+
"2024-08-12 05:05:46,590",64500,1.731,0.322,9076.389,0.001,4.137
|
1292 |
+
"2024-08-12 05:09:15,338",64550,1.733,0.323,9076.316,0.001,4.175
|
1293 |
+
"2024-08-12 05:12:48,312",64600,1.734,0.322,9076.241,0.001,4.259
|
1294 |
+
"2024-08-12 05:16:15,861",64650,1.742,0.324,9076.163,0.001,4.151
|
1295 |
+
"2024-08-12 05:19:47,700",64700,1.731,0.322,9076.093,0.001,4.237
|
1296 |
+
"2024-08-12 05:23:19,101",64750,1.732,0.324,9076.017,0.001,4.228
|
1297 |
+
"2024-08-12 05:26:53,820",64800,1.73,0.323,9075.943,0.001,4.294
|
checkpoints/weights_l2_over_steps.png
CHANGED
![]() |
![]() |