pszemraj commited on
Commit
4d8a2ca
·
verified ·
1 Parent(s): 6a5f4a4

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-pt-70000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f02790ca1e08ed5869eaa4e2abfb7349b923308591d4557a1e629abbe3de0fb
3
+ size 1202681712
checkpoints/checkpoint-pt-70000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/checkpoint-pt-75000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd5e36c3e95a195e8f3dc003e55d73162c7abfed056529a549373e98b293010
3
+ size 1202681712
checkpoints/checkpoint-pt-75000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634ae87ad9ec14553a807f970f4e595e3fef7b62fd4afaddf671a76426ff94ed
3
+ size 14344
checkpoints/grad_l2_over_steps.png CHANGED
checkpoints/loss_over_steps.png CHANGED
checkpoints/lr_over_steps.png CHANGED
checkpoints/main.log CHANGED
@@ -1453,3 +1453,251 @@ Mixed precision type: bf16
1453
  [2024-08-12 07:56:36,923][Main][INFO] - [train] Step 66700 out of 80000 | Loss --> 1.768 | Grad_l2 --> 0.329 | Weights_l2 --> 9073.215 | Lr --> 0.001 | Seconds_per_step --> 4.601 |
1454
  [2024-08-12 08:00:18,881][Main][INFO] - [train] Step 66750 out of 80000 | Loss --> 1.770 | Grad_l2 --> 0.331 | Weights_l2 --> 9073.141 | Lr --> 0.001 | Seconds_per_step --> 4.439 |
1455
  [2024-08-12 08:04:03,533][Main][INFO] - [train] Step 66800 out of 80000 | Loss --> 1.769 | Grad_l2 --> 0.330 | Weights_l2 --> 9073.071 | Lr --> 0.001 | Seconds_per_step --> 4.493 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
  [2024-08-12 07:56:36,923][Main][INFO] - [train] Step 66700 out of 80000 | Loss --> 1.768 | Grad_l2 --> 0.329 | Weights_l2 --> 9073.215 | Lr --> 0.001 | Seconds_per_step --> 4.601 |
1454
  [2024-08-12 08:00:18,881][Main][INFO] - [train] Step 66750 out of 80000 | Loss --> 1.770 | Grad_l2 --> 0.331 | Weights_l2 --> 9073.141 | Lr --> 0.001 | Seconds_per_step --> 4.439 |
1455
  [2024-08-12 08:04:03,533][Main][INFO] - [train] Step 66800 out of 80000 | Loss --> 1.769 | Grad_l2 --> 0.330 | Weights_l2 --> 9073.071 | Lr --> 0.001 | Seconds_per_step --> 4.493 |
1456
+ [2024-08-12 08:07:50,500][Main][INFO] - [train] Step 66850 out of 80000 | Loss --> 1.769 | Grad_l2 --> 0.331 | Weights_l2 --> 9073.004 | Lr --> 0.001 | Seconds_per_step --> 4.539 |
1457
+ [2024-08-12 08:11:49,816][Main][INFO] - [train] Step 66900 out of 80000 | Loss --> 1.768 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.935 | Lr --> 0.001 | Seconds_per_step --> 4.786 |
1458
+ [2024-08-12 08:15:56,432][Main][INFO] - [train] Step 66950 out of 80000 | Loss --> 1.768 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.867 | Lr --> 0.001 | Seconds_per_step --> 4.932 |
1459
+ [2024-08-12 08:20:02,525][Main][INFO] - [train] Step 67000 out of 80000 | Loss --> 1.779 | Grad_l2 --> 0.332 | Weights_l2 --> 9072.797 | Lr --> 0.001 | Seconds_per_step --> 4.922 |
1460
+ [2024-08-12 08:23:53,330][Main][INFO] - [train] Step 67050 out of 80000 | Loss --> 1.771 | Grad_l2 --> 0.333 | Weights_l2 --> 9072.730 | Lr --> 0.001 | Seconds_per_step --> 4.616 |
1461
+ [2024-08-12 08:27:56,587][Main][INFO] - [train] Step 67100 out of 80000 | Loss --> 1.774 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.661 | Lr --> 0.001 | Seconds_per_step --> 4.865 |
1462
+ [2024-08-12 08:32:02,097][Main][INFO] - [train] Step 67150 out of 80000 | Loss --> 1.772 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.592 | Lr --> 0.001 | Seconds_per_step --> 4.910 |
1463
+ [2024-08-12 08:36:03,847][Main][INFO] - [train] Step 67200 out of 80000 | Loss --> 1.774 | Grad_l2 --> 0.332 | Weights_l2 --> 9072.521 | Lr --> 0.001 | Seconds_per_step --> 4.835 |
1464
+ [2024-08-12 08:40:03,755][Main][INFO] - [train] Step 67250 out of 80000 | Loss --> 1.763 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.457 | Lr --> 0.001 | Seconds_per_step --> 4.798 |
1465
+ [2024-08-12 08:44:12,833][Main][INFO] - [train] Step 67300 out of 80000 | Loss --> 1.769 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.387 | Lr --> 0.001 | Seconds_per_step --> 4.982 |
1466
+ [2024-08-12 08:48:15,824][Main][INFO] - [train] Step 67350 out of 80000 | Loss --> 1.760 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.319 | Lr --> 0.001 | Seconds_per_step --> 4.860 |
1467
+ [2024-08-12 08:52:17,176][Main][INFO] - [train] Step 67400 out of 80000 | Loss --> 1.766 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.248 | Lr --> 0.001 | Seconds_per_step --> 4.827 |
1468
+ [2024-08-12 08:56:26,912][Main][INFO] - [train] Step 67450 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.332 | Weights_l2 --> 9072.181 | Lr --> 0.001 | Seconds_per_step --> 4.995 |
1469
+ [2024-08-12 09:00:28,981][Main][INFO] - [train] Step 67500 out of 80000 | Loss --> 1.772 | Grad_l2 --> 0.331 | Weights_l2 --> 9072.113 | Lr --> 0.001 | Seconds_per_step --> 4.841 |
1470
+ [2024-08-12 09:04:36,172][Main][INFO] - [train] Step 67550 out of 80000 | Loss --> 1.770 | Grad_l2 --> 0.335 | Weights_l2 --> 9072.048 | Lr --> 0.001 | Seconds_per_step --> 4.944 |
1471
+ [2024-08-12 09:08:49,679][Main][INFO] - [train] Step 67600 out of 80000 | Loss --> 1.766 | Grad_l2 --> 0.335 | Weights_l2 --> 9071.978 | Lr --> 0.001 | Seconds_per_step --> 5.070 |
1472
+ [2024-08-12 09:12:58,709][Main][INFO] - [train] Step 67650 out of 80000 | Loss --> 1.764 | Grad_l2 --> 0.331 | Weights_l2 --> 9071.910 | Lr --> 0.001 | Seconds_per_step --> 4.981 |
1473
+ [2024-08-12 09:17:14,413][Main][INFO] - [train] Step 67700 out of 80000 | Loss --> 1.765 | Grad_l2 --> 0.331 | Weights_l2 --> 9071.843 | Lr --> 0.001 | Seconds_per_step --> 5.114 |
1474
+ [2024-08-12 09:21:11,505][Main][INFO] - [train] Step 67750 out of 80000 | Loss --> 1.765 | Grad_l2 --> 0.331 | Weights_l2 --> 9071.774 | Lr --> 0.001 | Seconds_per_step --> 4.742 |
1475
+ [2024-08-12 09:25:15,107][Main][INFO] - [train] Step 67800 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.332 | Weights_l2 --> 9071.709 | Lr --> 0.001 | Seconds_per_step --> 4.872 |
1476
+ [2024-08-12 09:29:20,556][Main][INFO] - [train] Step 67850 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.330 | Weights_l2 --> 9071.643 | Lr --> 0.001 | Seconds_per_step --> 4.909 |
1477
+ [2024-08-12 09:33:24,433][Main][INFO] - [train] Step 67900 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.334 | Weights_l2 --> 9071.575 | Lr --> 0.001 | Seconds_per_step --> 4.878 |
1478
+ [2024-08-12 09:37:21,053][Main][INFO] - [train] Step 67950 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.335 | Weights_l2 --> 9071.510 | Lr --> 0.001 | Seconds_per_step --> 4.732 |
1479
+ [2024-08-12 09:41:30,689][Main][INFO] - [train] Step 68000 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.331 | Weights_l2 --> 9071.446 | Lr --> 0.001 | Seconds_per_step --> 4.993 |
1480
+ [2024-08-12 09:45:33,925][Main][INFO] - [train] Step 68050 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.333 | Weights_l2 --> 9071.382 | Lr --> 0.001 | Seconds_per_step --> 4.865 |
1481
+ [2024-08-12 09:49:32,988][Main][INFO] - [train] Step 68100 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.331 | Weights_l2 --> 9071.320 | Lr --> 0.001 | Seconds_per_step --> 4.781 |
1482
+ [2024-08-12 09:53:35,684][Main][INFO] - [train] Step 68150 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.334 | Weights_l2 --> 9071.255 | Lr --> 0.001 | Seconds_per_step --> 4.854 |
1483
+ [2024-08-12 09:57:46,641][Main][INFO] - [train] Step 68200 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.333 | Weights_l2 --> 9071.196 | Lr --> 0.001 | Seconds_per_step --> 5.019 |
1484
+ [2024-08-12 10:01:44,603][Main][INFO] - [train] Step 68250 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.333 | Weights_l2 --> 9071.133 | Lr --> 0.001 | Seconds_per_step --> 4.759 |
1485
+ [2024-08-12 10:05:41,867][Main][INFO] - [train] Step 68300 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.334 | Weights_l2 --> 9071.075 | Lr --> 0.001 | Seconds_per_step --> 4.745 |
1486
+ [2024-08-12 10:09:51,335][Main][INFO] - [train] Step 68350 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.333 | Weights_l2 --> 9071.016 | Lr --> 0.001 | Seconds_per_step --> 4.989 |
1487
+ [2024-08-12 10:14:03,242][Main][INFO] - [train] Step 68400 out of 80000 | Loss --> 1.762 | Grad_l2 --> 0.334 | Weights_l2 --> 9070.960 | Lr --> 0.001 | Seconds_per_step --> 5.038 |
1488
+ [2024-08-12 10:17:57,549][Main][INFO] - [train] Step 68450 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.902 | Lr --> 0.001 | Seconds_per_step --> 4.686 |
1489
+ [2024-08-12 10:22:04,017][Main][INFO] - [train] Step 68500 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.845 | Lr --> 0.001 | Seconds_per_step --> 4.929 |
1490
+ [2024-08-12 10:26:15,511][Main][INFO] - [train] Step 68550 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.785 | Lr --> 0.001 | Seconds_per_step --> 5.030 |
1491
+ [2024-08-12 10:30:12,459][Main][INFO] - [train] Step 68600 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.334 | Weights_l2 --> 9070.729 | Lr --> 0.001 | Seconds_per_step --> 4.739 |
1492
+ [2024-08-12 10:34:09,711][Main][INFO] - [train] Step 68650 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.674 | Lr --> 0.001 | Seconds_per_step --> 4.745 |
1493
+ [2024-08-12 10:38:15,758][Main][INFO] - [train] Step 68700 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.620 | Lr --> 0.001 | Seconds_per_step --> 4.921 |
1494
+ [2024-08-12 10:42:31,275][Main][INFO] - [train] Step 68750 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.334 | Weights_l2 --> 9070.565 | Lr --> 0.001 | Seconds_per_step --> 5.110 |
1495
+ [2024-08-12 10:46:29,239][Main][INFO] - [train] Step 68800 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.336 | Weights_l2 --> 9070.513 | Lr --> 0.001 | Seconds_per_step --> 4.759 |
1496
+ [2024-08-12 10:50:35,687][Main][INFO] - [train] Step 68850 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.336 | Weights_l2 --> 9070.463 | Lr --> 0.000 | Seconds_per_step --> 4.929 |
1497
+ [2024-08-12 10:54:45,439][Main][INFO] - [train] Step 68900 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.334 | Weights_l2 --> 9070.413 | Lr --> 0.000 | Seconds_per_step --> 4.995 |
1498
+ [2024-08-12 10:58:49,957][Main][INFO] - [train] Step 68950 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.362 | Lr --> 0.000 | Seconds_per_step --> 4.890 |
1499
+ [2024-08-12 11:02:50,584][Main][INFO] - [train] Step 69000 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.333 | Weights_l2 --> 9070.312 | Lr --> 0.000 | Seconds_per_step --> 4.813 |
1500
+ [2024-08-12 11:07:04,656][Main][INFO] - [train] Step 69050 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.336 | Weights_l2 --> 9070.263 | Lr --> 0.000 | Seconds_per_step --> 5.081 |
1501
+ [2024-08-12 11:11:13,715][Main][INFO] - [train] Step 69100 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.336 | Weights_l2 --> 9070.214 | Lr --> 0.000 | Seconds_per_step --> 4.981 |
1502
+ [2024-08-12 11:15:08,470][Main][INFO] - [train] Step 69150 out of 80000 | Loss --> 1.737 | Grad_l2 --> 0.335 | Weights_l2 --> 9070.167 | Lr --> 0.000 | Seconds_per_step --> 4.695 |
1503
+ [2024-08-12 11:19:18,203][Main][INFO] - [train] Step 69200 out of 80000 | Loss --> 1.739 | Grad_l2 --> 0.336 | Weights_l2 --> 9070.119 | Lr --> 0.000 | Seconds_per_step --> 4.995 |
1504
+ [2024-08-12 11:23:36,177][Main][INFO] - [train] Step 69250 out of 80000 | Loss --> 1.735 | Grad_l2 --> 0.334 | Weights_l2 --> 9070.077 | Lr --> 0.000 | Seconds_per_step --> 5.159 |
1505
+ [2024-08-12 11:27:32,982][Main][INFO] - [train] Step 69300 out of 80000 | Loss --> 1.731 | Grad_l2 --> 0.335 | Weights_l2 --> 9070.031 | Lr --> 0.000 | Seconds_per_step --> 4.736 |
1506
+ [2024-08-12 11:31:35,613][Main][INFO] - [train] Step 69350 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.336 | Weights_l2 --> 9069.988 | Lr --> 0.000 | Seconds_per_step --> 4.853 |
1507
+ [2024-08-12 11:35:44,767][Main][INFO] - [train] Step 69400 out of 80000 | Loss --> 1.733 | Grad_l2 --> 0.336 | Weights_l2 --> 9069.946 | Lr --> 0.000 | Seconds_per_step --> 4.983 |
1508
+ [2024-08-12 11:39:44,712][Main][INFO] - [train] Step 69450 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.336 | Weights_l2 --> 9069.900 | Lr --> 0.000 | Seconds_per_step --> 4.799 |
1509
+ [2024-08-12 11:43:39,145][Main][INFO] - [train] Step 69500 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.338 | Weights_l2 --> 9069.857 | Lr --> 0.000 | Seconds_per_step --> 4.689 |
1510
+ [2024-08-12 11:47:43,420][Main][INFO] - [train] Step 69550 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.815 | Lr --> 0.000 | Seconds_per_step --> 4.885 |
1511
+ [2024-08-12 11:51:55,140][Main][INFO] - [train] Step 69600 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.336 | Weights_l2 --> 9069.774 | Lr --> 0.000 | Seconds_per_step --> 5.034 |
1512
+ [2024-08-12 11:55:50,294][Main][INFO] - [train] Step 69650 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.734 | Lr --> 0.000 | Seconds_per_step --> 4.703 |
1513
+ [2024-08-12 11:59:50,709][Main][INFO] - [train] Step 69700 out of 80000 | Loss --> 1.739 | Grad_l2 --> 0.336 | Weights_l2 --> 9069.695 | Lr --> 0.000 | Seconds_per_step --> 4.808 |
1514
+ [2024-08-12 12:03:56,289][Main][INFO] - [train] Step 69750 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.657 | Lr --> 0.000 | Seconds_per_step --> 4.912 |
1515
+ [2024-08-12 12:08:00,668][Main][INFO] - [train] Step 69800 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.338 | Weights_l2 --> 9069.619 | Lr --> 0.000 | Seconds_per_step --> 4.888 |
1516
+ [2024-08-12 12:11:55,491][Main][INFO] - [train] Step 69850 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.580 | Lr --> 0.000 | Seconds_per_step --> 4.696 |
1517
+ [2024-08-12 12:15:58,225][Main][INFO] - [train] Step 69900 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.544 | Lr --> 0.000 | Seconds_per_step --> 4.855 |
1518
+ [2024-08-12 12:20:07,222][Main][INFO] - [train] Step 69950 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.507 | Lr --> 0.000 | Seconds_per_step --> 4.980 |
1519
+ [2024-08-12 12:24:09,812][Main][INFO] - [train] Step 70000 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.338 | Weights_l2 --> 9069.472 | Lr --> 0.000 | Seconds_per_step --> 4.852 |
1520
+ [2024-08-12 12:24:09,812][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-70000
1521
+ [2024-08-12 12:24:09,816][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
1522
+ [2024-08-12 12:24:13,019][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-70000/model.safetensors
1523
+ [2024-08-12 12:24:16,995][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-70000/optimizer.bin
1524
+ [2024-08-12 12:24:16,996][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-70000/scheduler.bin
1525
+ [2024-08-12 12:24:16,996][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-70000/sampler.bin
1526
+ [2024-08-12 12:24:16,996][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-70000/sampler_1.bin
1527
+ [2024-08-12 12:24:16,997][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-70000/random_states_0.pkl
1528
+ [2024-08-12 12:28:17,924][Main][INFO] - [train] Step 70050 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.436 | Lr --> 0.000 | Seconds_per_step --> 4.962 |
1529
+ [2024-08-12 12:32:24,318][Main][INFO] - [train] Step 70100 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.338 | Weights_l2 --> 9069.403 | Lr --> 0.000 | Seconds_per_step --> 4.928 |
1530
+ [2024-08-12 12:36:36,478][Main][INFO] - [train] Step 70150 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.369 | Lr --> 0.000 | Seconds_per_step --> 5.043 |
1531
+ [2024-08-12 12:40:33,276][Main][INFO] - [train] Step 70200 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.337 | Weights_l2 --> 9069.335 | Lr --> 0.000 | Seconds_per_step --> 4.736 |
1532
+ [2024-08-12 12:44:33,787][Main][INFO] - [train] Step 70250 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.302 | Lr --> 0.000 | Seconds_per_step --> 4.810 |
1533
+ [2024-08-12 12:48:49,628][Main][INFO] - [train] Step 70300 out of 80000 | Loss --> 1.737 | Grad_l2 --> 0.340 | Weights_l2 --> 9069.272 | Lr --> 0.000 | Seconds_per_step --> 5.117 |
1534
+ [2024-08-12 12:52:50,330][Main][INFO] - [train] Step 70350 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.241 | Lr --> 0.000 | Seconds_per_step --> 4.814 |
1535
+ [2024-08-12 12:56:47,628][Main][INFO] - [train] Step 70400 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.340 | Weights_l2 --> 9069.210 | Lr --> 0.000 | Seconds_per_step --> 4.746 |
1536
+ [2024-08-12 13:00:53,896][Main][INFO] - [train] Step 70450 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.181 | Lr --> 0.000 | Seconds_per_step --> 4.925 |
1537
+ [2024-08-12 13:05:13,889][Main][INFO] - [train] Step 70500 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.340 | Weights_l2 --> 9069.153 | Lr --> 0.000 | Seconds_per_step --> 5.200 |
1538
+ [2024-08-12 13:09:07,510][Main][INFO] - [train] Step 70550 out of 80000 | Loss --> 1.732 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.124 | Lr --> 0.000 | Seconds_per_step --> 4.672 |
1539
+ [2024-08-12 13:13:07,689][Main][INFO] - [train] Step 70600 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.340 | Weights_l2 --> 9069.095 | Lr --> 0.000 | Seconds_per_step --> 4.804 |
1540
+ [2024-08-12 13:17:20,202][Main][INFO] - [train] Step 70650 out of 80000 | Loss --> 1.739 | Grad_l2 --> 0.339 | Weights_l2 --> 9069.067 | Lr --> 0.000 | Seconds_per_step --> 5.050 |
1541
+ [2024-08-12 13:21:31,602][Main][INFO] - [train] Step 70700 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.342 | Weights_l2 --> 9069.040 | Lr --> 0.000 | Seconds_per_step --> 5.028 |
1542
+ [2024-08-12 13:25:28,721][Main][INFO] - [train] Step 70750 out of 80000 | Loss --> 1.739 | Grad_l2 --> 0.341 | Weights_l2 --> 9069.015 | Lr --> 0.000 | Seconds_per_step --> 4.742 |
1543
+ [2024-08-12 13:29:41,170][Main][INFO] - [train] Step 70800 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.990 | Lr --> 0.000 | Seconds_per_step --> 5.049 |
1544
+ [2024-08-12 13:33:45,475][Main][INFO] - [train] Step 70850 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.339 | Weights_l2 --> 9068.965 | Lr --> 0.000 | Seconds_per_step --> 4.886 |
1545
+ [2024-08-12 13:37:50,507][Main][INFO] - [train] Step 70900 out of 80000 | Loss --> 1.729 | Grad_l2 --> 0.338 | Weights_l2 --> 9068.942 | Lr --> 0.000 | Seconds_per_step --> 4.901 |
1546
+ [2024-08-12 13:41:51,338][Main][INFO] - [train] Step 70950 out of 80000 | Loss --> 1.729 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.918 | Lr --> 0.000 | Seconds_per_step --> 4.817 |
1547
+ [2024-08-12 13:46:05,634][Main][INFO] - [train] Step 71000 out of 80000 | Loss --> 1.721 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.894 | Lr --> 0.000 | Seconds_per_step --> 5.086 |
1548
+ [2024-08-12 13:50:09,098][Main][INFO] - [train] Step 71050 out of 80000 | Loss --> 1.729 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.871 | Lr --> 0.000 | Seconds_per_step --> 4.869 |
1549
+ [2024-08-12 13:54:09,531][Main][INFO] - [train] Step 71100 out of 80000 | Loss --> 1.728 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.849 | Lr --> 0.000 | Seconds_per_step --> 4.809 |
1550
+ [2024-08-12 13:58:21,788][Main][INFO] - [train] Step 71150 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.829 | Lr --> 0.000 | Seconds_per_step --> 5.045 |
1551
+ [2024-08-12 14:02:21,447][Main][INFO] - [train] Step 71200 out of 80000 | Loss --> 1.725 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.809 | Lr --> 0.000 | Seconds_per_step --> 4.793 |
1552
+ [2024-08-12 14:06:20,014][Main][INFO] - [train] Step 71250 out of 80000 | Loss --> 1.725 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.789 | Lr --> 0.000 | Seconds_per_step --> 4.771 |
1553
+ [2024-08-12 14:10:32,195][Main][INFO] - [train] Step 71300 out of 80000 | Loss --> 1.713 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.770 | Lr --> 0.000 | Seconds_per_step --> 5.044 |
1554
+ [2024-08-12 14:14:53,155][Main][INFO] - [train] Step 71350 out of 80000 | Loss --> 1.712 | Grad_l2 --> 0.344 | Weights_l2 --> 9068.751 | Lr --> 0.000 | Seconds_per_step --> 5.219 |
1555
+ [2024-08-12 14:18:49,772][Main][INFO] - [train] Step 71400 out of 80000 | Loss --> 1.714 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.734 | Lr --> 0.000 | Seconds_per_step --> 4.732 |
1556
+ [2024-08-12 14:22:50,205][Main][INFO] - [train] Step 71450 out of 80000 | Loss --> 1.708 | Grad_l2 --> 0.340 | Weights_l2 --> 9068.718 | Lr --> 0.000 | Seconds_per_step --> 4.809 |
1557
+ [2024-08-12 14:26:37,544][Main][INFO] - [train] Step 71500 out of 80000 | Loss --> 1.713 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.702 | Lr --> 0.000 | Seconds_per_step --> 4.547 |
1558
+ [2024-08-12 14:30:43,794][Main][INFO] - [train] Step 71550 out of 80000 | Loss --> 1.703 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.686 | Lr --> 0.000 | Seconds_per_step --> 4.925 |
1559
+ [2024-08-12 14:34:43,687][Main][INFO] - [train] Step 71600 out of 80000 | Loss --> 1.705 | Grad_l2 --> 0.340 | Weights_l2 --> 9068.668 | Lr --> 0.000 | Seconds_per_step --> 4.798 |
1560
+ [2024-08-12 14:38:41,113][Main][INFO] - [train] Step 71650 out of 80000 | Loss --> 1.705 | Grad_l2 --> 0.340 | Weights_l2 --> 9068.651 | Lr --> 0.000 | Seconds_per_step --> 4.748 |
1561
+ [2024-08-12 14:42:57,491][Main][INFO] - [train] Step 71700 out of 80000 | Loss --> 1.705 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.634 | Lr --> 0.000 | Seconds_per_step --> 5.128 |
1562
+ [2024-08-12 14:46:57,538][Main][INFO] - [train] Step 71750 out of 80000 | Loss --> 1.710 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.619 | Lr --> 0.000 | Seconds_per_step --> 4.801 |
1563
+ [2024-08-12 14:50:54,553][Main][INFO] - [train] Step 71800 out of 80000 | Loss --> 1.705 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.603 | Lr --> 0.000 | Seconds_per_step --> 4.740 |
1564
+ [2024-08-12 14:55:04,238][Main][INFO] - [train] Step 71850 out of 80000 | Loss --> 1.703 | Grad_l2 --> 0.342 | Weights_l2 --> 9068.590 | Lr --> 0.000 | Seconds_per_step --> 4.994 |
1565
+ [2024-08-12 14:59:14,111][Main][INFO] - [train] Step 71900 out of 80000 | Loss --> 1.702 | Grad_l2 --> 0.341 | Weights_l2 --> 9068.573 | Lr --> 0.000 | Seconds_per_step --> 4.997 |
1566
+ [2024-08-12 15:00:26,188][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: c139f443-e606-47ad-b955-2e73792b3841)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00103-of-00234.parquet
1567
+ [2024-08-12 15:00:26,189][huggingface_hub.utils._http][WARNING] - Retrying in 1s [Retry 1/5].
1568
+ [2024-08-12 15:00:37,239][huggingface_hub.utils._http][WARNING] - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 6d4c6bb1-c809-4736-966e-a86e5016b21c)')' thrown while requesting GET https://huggingface.co/datasets/HuggingFaceTB/smollm-corpus/resolve/c074f3d3783ef8c321b40fd89088e5955cd05bad/fineweb-edu-dedup/train-00103-of-00234.parquet
1569
+ [2024-08-12 15:00:37,240][huggingface_hub.utils._http][WARNING] - Retrying in 2s [Retry 2/5].
1570
+ [2024-08-12 15:02:31,253][Main][INFO] - [train] Step 71950 out of 80000 | Loss --> 1.703 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.559 | Lr --> 0.000 | Seconds_per_step --> 3.943 |
1571
+ [2024-08-12 15:05:20,828][Main][INFO] - [train] Step 72000 out of 80000 | Loss --> 1.712 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.543 | Lr --> 0.000 | Seconds_per_step --> 3.391 |
1572
+ [2024-08-12 15:08:10,019][Main][INFO] - [train] Step 72050 out of 80000 | Loss --> 1.707 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.528 | Lr --> 0.000 | Seconds_per_step --> 3.384 |
1573
+ [2024-08-12 15:10:59,105][Main][INFO] - [train] Step 72100 out of 80000 | Loss --> 1.702 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.514 | Lr --> 0.000 | Seconds_per_step --> 3.382 |
1574
+ [2024-08-12 15:13:49,571][Main][INFO] - [train] Step 72150 out of 80000 | Loss --> 1.711 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.498 | Lr --> 0.000 | Seconds_per_step --> 3.409 |
1575
+ [2024-08-12 15:16:42,032][Main][INFO] - [train] Step 72200 out of 80000 | Loss --> 1.701 | Grad_l2 --> 0.344 | Weights_l2 --> 9068.483 | Lr --> 0.000 | Seconds_per_step --> 3.449 |
1576
+ [2024-08-12 15:19:30,656][Main][INFO] - [train] Step 72250 out of 80000 | Loss --> 1.716 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.470 | Lr --> 0.000 | Seconds_per_step --> 3.372 |
1577
+ [2024-08-12 15:22:20,433][Main][INFO] - [train] Step 72300 out of 80000 | Loss --> 1.712 | Grad_l2 --> 0.344 | Weights_l2 --> 9068.455 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1578
+ [2024-08-12 15:25:11,089][Main][INFO] - [train] Step 72350 out of 80000 | Loss --> 1.716 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.440 | Lr --> 0.000 | Seconds_per_step --> 3.413 |
1579
+ [2024-08-12 15:28:01,003][Main][INFO] - [train] Step 72400 out of 80000 | Loss --> 1.717 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.426 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1580
+ [2024-08-12 15:30:50,569][Main][INFO] - [train] Step 72450 out of 80000 | Loss --> 1.713 | Grad_l2 --> 0.343 | Weights_l2 --> 9068.415 | Lr --> 0.000 | Seconds_per_step --> 3.391 |
1581
+ [2024-08-12 15:33:39,952][Main][INFO] - [train] Step 72500 out of 80000 | Loss --> 1.718 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.401 | Lr --> 0.000 | Seconds_per_step --> 3.388 |
1582
+ [2024-08-12 15:36:30,135][Main][INFO] - [train] Step 72550 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.388 | Lr --> 0.000 | Seconds_per_step --> 3.404 |
1583
+ [2024-08-12 15:39:19,623][Main][INFO] - [train] Step 72600 out of 80000 | Loss --> 1.719 | Grad_l2 --> 0.345 | Weights_l2 --> 9068.373 | Lr --> 0.000 | Seconds_per_step --> 3.390 |
1584
+ [2024-08-12 15:42:09,023][Main][INFO] - [train] Step 72650 out of 80000 | Loss --> 1.733 | Grad_l2 --> 0.344 | Weights_l2 --> 9068.360 | Lr --> 0.000 | Seconds_per_step --> 3.388 |
1585
+ [2024-08-12 15:44:58,509][Main][INFO] - [train] Step 72700 out of 80000 | Loss --> 1.734 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.347 | Lr --> 0.000 | Seconds_per_step --> 3.390 |
1586
+ [2024-08-12 15:47:49,181][Main][INFO] - [train] Step 72750 out of 80000 | Loss --> 1.721 | Grad_l2 --> 0.344 | Weights_l2 --> 9068.333 | Lr --> 0.000 | Seconds_per_step --> 3.413 |
1587
+ [2024-08-12 15:50:38,995][Main][INFO] - [train] Step 72800 out of 80000 | Loss --> 1.735 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.322 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1588
+ [2024-08-12 15:53:28,892][Main][INFO] - [train] Step 72850 out of 80000 | Loss --> 1.730 | Grad_l2 --> 0.346 | Weights_l2 --> 9068.310 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1589
+ [2024-08-12 15:56:17,941][Main][INFO] - [train] Step 72900 out of 80000 | Loss --> 1.737 | Grad_l2 --> 0.346 | Weights_l2 --> 9068.298 | Lr --> 0.000 | Seconds_per_step --> 3.381 |
1590
+ [2024-08-12 15:59:12,501][Main][INFO] - [train] Step 72950 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.285 | Lr --> 0.000 | Seconds_per_step --> 3.491 |
1591
+ [2024-08-12 16:02:37,614][Main][INFO] - [train] Step 73000 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.272 | Lr --> 0.000 | Seconds_per_step --> 4.102 |
1592
+ [2024-08-12 16:06:37,371][Main][INFO] - [train] Step 73050 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.346 | Weights_l2 --> 9068.260 | Lr --> 0.000 | Seconds_per_step --> 4.795 |
1593
+ [2024-08-12 16:10:50,188][Main][INFO] - [train] Step 73100 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.247 | Lr --> 0.000 | Seconds_per_step --> 5.056 |
1594
+ [2024-08-12 16:14:47,653][Main][INFO] - [train] Step 73150 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.236 | Lr --> 0.000 | Seconds_per_step --> 4.749 |
1595
+ [2024-08-12 16:18:47,203][Main][INFO] - [train] Step 73200 out of 80000 | Loss --> 1.763 | Grad_l2 --> 0.350 | Weights_l2 --> 9068.224 | Lr --> 0.000 | Seconds_per_step --> 4.791 |
1596
+ [2024-08-12 16:22:52,223][Main][INFO] - [train] Step 73250 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.212 | Lr --> 0.000 | Seconds_per_step --> 4.900 |
1597
+ [2024-08-12 16:26:58,574][Main][INFO] - [train] Step 73300 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.200 | Lr --> 0.000 | Seconds_per_step --> 4.927 |
1598
+ [2024-08-12 16:30:52,333][Main][INFO] - [train] Step 73350 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.191 | Lr --> 0.000 | Seconds_per_step --> 4.675 |
1599
+ [2024-08-12 16:35:02,793][Main][INFO] - [train] Step 73400 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.351 | Weights_l2 --> 9068.179 | Lr --> 0.000 | Seconds_per_step --> 5.009 |
1600
+ [2024-08-12 16:39:12,998][Main][INFO] - [train] Step 73450 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.169 | Lr --> 0.000 | Seconds_per_step --> 5.004 |
1601
+ [2024-08-12 16:43:03,001][Main][INFO] - [train] Step 73500 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.158 | Lr --> 0.000 | Seconds_per_step --> 4.600 |
1602
+ [2024-08-12 16:47:03,618][Main][INFO] - [train] Step 73550 out of 80000 | Loss --> 1.759 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.147 | Lr --> 0.000 | Seconds_per_step --> 4.812 |
1603
+ [2024-08-12 16:51:16,923][Main][INFO] - [train] Step 73600 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.136 | Lr --> 0.000 | Seconds_per_step --> 5.066 |
1604
+ [2024-08-12 16:55:17,319][Main][INFO] - [train] Step 73650 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.127 | Lr --> 0.000 | Seconds_per_step --> 4.808 |
1605
+ [2024-08-12 16:59:11,488][Main][INFO] - [train] Step 73700 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.116 | Lr --> 0.000 | Seconds_per_step --> 4.683 |
1606
+ [2024-08-12 17:03:14,319][Main][INFO] - [train] Step 73750 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.107 | Lr --> 0.000 | Seconds_per_step --> 4.857 |
1607
+ [2024-08-12 17:07:27,659][Main][INFO] - [train] Step 73800 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.097 | Lr --> 0.000 | Seconds_per_step --> 5.067 |
1608
+ [2024-08-12 17:11:27,086][Main][INFO] - [train] Step 73850 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.348 | Weights_l2 --> 9068.087 | Lr --> 0.000 | Seconds_per_step --> 4.789 |
1609
+ [2024-08-12 17:15:20,740][Main][INFO] - [train] Step 73900 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.350 | Weights_l2 --> 9068.078 | Lr --> 0.000 | Seconds_per_step --> 4.673 |
1610
+ [2024-08-12 17:19:26,343][Main][INFO] - [train] Step 73950 out of 80000 | Loss --> 1.753 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.068 | Lr --> 0.000 | Seconds_per_step --> 4.912 |
1611
+ [2024-08-12 17:23:30,087][Main][INFO] - [train] Step 74000 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.059 | Lr --> 0.000 | Seconds_per_step --> 4.875 |
1612
+ [2024-08-12 17:27:25,018][Main][INFO] - [train] Step 74050 out of 80000 | Loss --> 1.756 | Grad_l2 --> 0.350 | Weights_l2 --> 9068.050 | Lr --> 0.000 | Seconds_per_step --> 4.699 |
1613
+ [2024-08-12 17:31:28,610][Main][INFO] - [train] Step 74100 out of 80000 | Loss --> 1.758 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.040 | Lr --> 0.000 | Seconds_per_step --> 4.872 |
1614
+ [2024-08-12 17:35:36,982][Main][INFO] - [train] Step 74150 out of 80000 | Loss --> 1.752 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.032 | Lr --> 0.000 | Seconds_per_step --> 4.967 |
1615
+ [2024-08-12 17:38:39,898][Main][INFO] - [train] Step 74200 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.347 | Weights_l2 --> 9068.024 | Lr --> 0.000 | Seconds_per_step --> 3.658 |
1616
+ [2024-08-12 17:41:29,774][Main][INFO] - [train] Step 74250 out of 80000 | Loss --> 1.753 | Grad_l2 --> 0.350 | Weights_l2 --> 9068.015 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1617
+ [2024-08-12 17:44:19,599][Main][INFO] - [train] Step 74300 out of 80000 | Loss --> 1.755 | Grad_l2 --> 0.349 | Weights_l2 --> 9068.006 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1618
+ [2024-08-12 17:47:09,578][Main][INFO] - [train] Step 74350 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.997 | Lr --> 0.000 | Seconds_per_step --> 3.400 |
1619
+ [2024-08-12 17:50:00,007][Main][INFO] - [train] Step 74400 out of 80000 | Loss --> 1.757 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.989 | Lr --> 0.000 | Seconds_per_step --> 3.409 |
1620
+ [2024-08-12 17:52:48,380][Main][INFO] - [train] Step 74450 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.980 | Lr --> 0.000 | Seconds_per_step --> 3.367 |
1621
+ [2024-08-12 17:55:37,403][Main][INFO] - [train] Step 74500 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.347 | Weights_l2 --> 9067.972 | Lr --> 0.000 | Seconds_per_step --> 3.380 |
1622
+ [2024-08-12 17:58:27,279][Main][INFO] - [train] Step 74550 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.965 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1623
+ [2024-08-12 18:01:17,245][Main][INFO] - [train] Step 74600 out of 80000 | Loss --> 1.749 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.957 | Lr --> 0.000 | Seconds_per_step --> 3.399 |
1624
+ [2024-08-12 18:04:05,815][Main][INFO] - [train] Step 74650 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.950 | Lr --> 0.000 | Seconds_per_step --> 3.371 |
1625
+ [2024-08-12 18:06:55,310][Main][INFO] - [train] Step 74700 out of 80000 | Loss --> 1.743 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.942 | Lr --> 0.000 | Seconds_per_step --> 3.390 |
1626
+ [2024-08-12 18:09:44,130][Main][INFO] - [train] Step 74750 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.934 | Lr --> 0.000 | Seconds_per_step --> 3.376 |
1627
+ [2024-08-12 18:12:34,080][Main][INFO] - [train] Step 74800 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.926 | Lr --> 0.000 | Seconds_per_step --> 3.399 |
1628
+ [2024-08-12 18:15:24,419][Main][INFO] - [train] Step 74850 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.919 | Lr --> 0.000 | Seconds_per_step --> 3.407 |
1629
+ [2024-08-12 18:18:12,739][Main][INFO] - [train] Step 74900 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.912 | Lr --> 0.000 | Seconds_per_step --> 3.366 |
1630
+ [2024-08-12 18:21:02,709][Main][INFO] - [train] Step 74950 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.905 | Lr --> 0.000 | Seconds_per_step --> 3.399 |
1631
+ [2024-08-12 18:23:52,761][Main][INFO] - [train] Step 75000 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.898 | Lr --> 0.000 | Seconds_per_step --> 3.401 |
1632
+ [2024-08-12 18:23:52,762][accelerate.accelerator][INFO] - Saving current state to checkpoint-pt-75000
1633
+ [2024-08-12 18:23:52,765][accelerate.utils.other][WARNING] - Removed shared tensor {'encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
1634
+ [2024-08-12 18:23:55,432][accelerate.checkpointing][INFO] - Model weights saved in checkpoint-pt-75000/model.safetensors
1635
+ [2024-08-12 18:23:58,451][accelerate.checkpointing][INFO] - Optimizer state saved in checkpoint-pt-75000/optimizer.bin
1636
+ [2024-08-12 18:23:58,451][accelerate.checkpointing][INFO] - Scheduler state saved in checkpoint-pt-75000/scheduler.bin
1637
+ [2024-08-12 18:23:58,452][accelerate.checkpointing][INFO] - Sampler state for dataloader 0 saved in checkpoint-pt-75000/sampler.bin
1638
+ [2024-08-12 18:23:58,452][accelerate.checkpointing][INFO] - Sampler state for dataloader 1 saved in checkpoint-pt-75000/sampler_1.bin
1639
+ [2024-08-12 18:23:58,452][accelerate.checkpointing][INFO] - Random states saved in checkpoint-pt-75000/random_states_0.pkl
1640
+ [2024-08-12 18:26:49,045][Main][INFO] - [train] Step 75050 out of 80000 | Loss --> 1.754 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.891 | Lr --> 0.000 | Seconds_per_step --> 3.526 |
1641
+ [2024-08-12 18:29:38,847][Main][INFO] - [train] Step 75100 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.885 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1642
+ [2024-08-12 18:32:29,593][Main][INFO] - [train] Step 75150 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.879 | Lr --> 0.000 | Seconds_per_step --> 3.415 |
1643
+ [2024-08-12 18:35:19,147][Main][INFO] - [train] Step 75200 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.873 | Lr --> 0.000 | Seconds_per_step --> 3.391 |
1644
+ [2024-08-12 18:38:08,943][Main][INFO] - [train] Step 75250 out of 80000 | Loss --> 1.737 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.866 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1645
+ [2024-08-12 18:40:57,335][Main][INFO] - [train] Step 75300 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.860 | Lr --> 0.000 | Seconds_per_step --> 3.368 |
1646
+ [2024-08-12 18:43:47,536][Main][INFO] - [train] Step 75350 out of 80000 | Loss --> 1.746 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.854 | Lr --> 0.000 | Seconds_per_step --> 3.404 |
1647
+ [2024-08-12 18:46:36,557][Main][INFO] - [train] Step 75400 out of 80000 | Loss --> 1.744 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.849 | Lr --> 0.000 | Seconds_per_step --> 3.380 |
1648
+ [2024-08-12 18:49:26,702][Main][INFO] - [train] Step 75450 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.843 | Lr --> 0.000 | Seconds_per_step --> 3.403 |
1649
+ [2024-08-12 18:52:16,129][Main][INFO] - [train] Step 75500 out of 80000 | Loss --> 1.747 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.838 | Lr --> 0.000 | Seconds_per_step --> 3.389 |
1650
+ [2024-08-12 18:55:06,083][Main][INFO] - [train] Step 75550 out of 80000 | Loss --> 1.750 | Grad_l2 --> 0.352 | Weights_l2 --> 9067.831 | Lr --> 0.000 | Seconds_per_step --> 3.399 |
1651
+ [2024-08-12 18:57:56,288][Main][INFO] - [train] Step 75600 out of 80000 | Loss --> 1.751 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.826 | Lr --> 0.000 | Seconds_per_step --> 3.404 |
1652
+ [2024-08-12 19:00:47,552][Main][INFO] - [train] Step 75650 out of 80000 | Loss --> 1.745 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.821 | Lr --> 0.000 | Seconds_per_step --> 3.425 |
1653
+ [2024-08-12 19:03:37,554][Main][INFO] - [train] Step 75700 out of 80000 | Loss --> 1.748 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.816 | Lr --> 0.000 | Seconds_per_step --> 3.400 |
1654
+ [2024-08-12 19:06:26,390][Main][INFO] - [train] Step 75750 out of 80000 | Loss --> 1.735 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.810 | Lr --> 0.000 | Seconds_per_step --> 3.377 |
1655
+ [2024-08-12 19:09:15,513][Main][INFO] - [train] Step 75800 out of 80000 | Loss --> 1.741 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.806 | Lr --> 0.000 | Seconds_per_step --> 3.382 |
1656
+ [2024-08-12 19:12:04,817][Main][INFO] - [train] Step 75850 out of 80000 | Loss --> 1.742 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.800 | Lr --> 0.000 | Seconds_per_step --> 3.386 |
1657
+ [2024-08-12 19:14:54,908][Main][INFO] - [train] Step 75900 out of 80000 | Loss --> 1.736 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.795 | Lr --> 0.000 | Seconds_per_step --> 3.402 |
1658
+ [2024-08-12 19:17:44,997][Main][INFO] - [train] Step 75950 out of 80000 | Loss --> 1.738 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.791 | Lr --> 0.000 | Seconds_per_step --> 3.402 |
1659
+ [2024-08-12 19:20:35,427][Main][INFO] - [train] Step 76000 out of 80000 | Loss --> 1.740 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.786 | Lr --> 0.000 | Seconds_per_step --> 3.409 |
1660
+ [2024-08-12 19:23:26,015][Main][INFO] - [train] Step 76050 out of 80000 | Loss --> 1.735 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.781 | Lr --> 0.000 | Seconds_per_step --> 3.412 |
1661
+ [2024-08-12 19:26:15,433][Main][INFO] - [train] Step 76100 out of 80000 | Loss --> 1.731 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.776 | Lr --> 0.000 | Seconds_per_step --> 3.388 |
1662
+ [2024-08-12 19:29:03,758][Main][INFO] - [train] Step 76150 out of 80000 | Loss --> 1.734 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.771 | Lr --> 0.000 | Seconds_per_step --> 3.366 |
1663
+ [2024-08-12 19:31:52,080][Main][INFO] - [train] Step 76200 out of 80000 | Loss --> 1.739 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.767 | Lr --> 0.000 | Seconds_per_step --> 3.366 |
1664
+ [2024-08-12 19:34:45,455][Main][INFO] - [train] Step 76250 out of 80000 | Loss --> 1.733 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.763 | Lr --> 0.000 | Seconds_per_step --> 3.467 |
1665
+ [2024-08-12 19:37:34,591][Main][INFO] - [train] Step 76300 out of 80000 | Loss --> 1.723 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.759 | Lr --> 0.000 | Seconds_per_step --> 3.383 |
1666
+ [2024-08-12 19:40:23,592][Main][INFO] - [train] Step 76350 out of 80000 | Loss --> 1.734 | Grad_l2 --> 0.352 | Weights_l2 --> 9067.755 | Lr --> 0.000 | Seconds_per_step --> 3.380 |
1667
+ [2024-08-12 19:43:12,814][Main][INFO] - [train] Step 76400 out of 80000 | Loss --> 1.732 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.751 | Lr --> 0.000 | Seconds_per_step --> 3.384 |
1668
+ [2024-08-12 19:46:01,456][Main][INFO] - [train] Step 76450 out of 80000 | Loss --> 1.723 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.747 | Lr --> 0.000 | Seconds_per_step --> 3.373 |
1669
+ [2024-08-12 19:48:51,400][Main][INFO] - [train] Step 76500 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.743 | Lr --> 0.000 | Seconds_per_step --> 3.399 |
1670
+ [2024-08-12 19:51:40,649][Main][INFO] - [train] Step 76550 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.740 | Lr --> 0.000 | Seconds_per_step --> 3.385 |
1671
+ [2024-08-12 19:54:29,691][Main][INFO] - [train] Step 76600 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.736 | Lr --> 0.000 | Seconds_per_step --> 3.381 |
1672
+ [2024-08-12 19:57:19,230][Main][INFO] - [train] Step 76650 out of 80000 | Loss --> 1.717 | Grad_l2 --> 0.347 | Weights_l2 --> 9067.733 | Lr --> 0.000 | Seconds_per_step --> 3.391 |
1673
+ [2024-08-12 20:00:09,385][Main][INFO] - [train] Step 76700 out of 80000 | Loss --> 1.717 | Grad_l2 --> 0.347 | Weights_l2 --> 9067.729 | Lr --> 0.000 | Seconds_per_step --> 3.403 |
1674
+ [2024-08-12 20:02:57,964][Main][INFO] - [train] Step 76750 out of 80000 | Loss --> 1.715 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.726 | Lr --> 0.000 | Seconds_per_step --> 3.372 |
1675
+ [2024-08-12 20:05:47,101][Main][INFO] - [train] Step 76800 out of 80000 | Loss --> 1.710 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.722 | Lr --> 0.000 | Seconds_per_step --> 3.383 |
1676
+ [2024-08-12 20:08:37,921][Main][INFO] - [train] Step 76850 out of 80000 | Loss --> 1.725 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.719 | Lr --> 0.000 | Seconds_per_step --> 3.416 |
1677
+ [2024-08-12 20:11:28,368][Main][INFO] - [train] Step 76900 out of 80000 | Loss --> 1.716 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.716 | Lr --> 0.000 | Seconds_per_step --> 3.409 |
1678
+ [2024-08-12 20:14:18,240][Main][INFO] - [train] Step 76950 out of 80000 | Loss --> 1.722 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.713 | Lr --> 0.000 | Seconds_per_step --> 3.397 |
1679
+ [2024-08-12 20:17:07,126][Main][INFO] - [train] Step 77000 out of 80000 | Loss --> 1.727 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.710 | Lr --> 0.000 | Seconds_per_step --> 3.378 |
1680
+ [2024-08-12 20:19:57,021][Main][INFO] - [train] Step 77050 out of 80000 | Loss --> 1.727 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.707 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1681
+ [2024-08-12 20:22:47,079][Main][INFO] - [train] Step 77100 out of 80000 | Loss --> 1.726 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.705 | Lr --> 0.000 | Seconds_per_step --> 3.401 |
1682
+ [2024-08-12 20:25:36,117][Main][INFO] - [train] Step 77150 out of 80000 | Loss --> 1.720 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.702 | Lr --> 0.000 | Seconds_per_step --> 3.381 |
1683
+ [2024-08-12 20:28:24,552][Main][INFO] - [train] Step 77200 out of 80000 | Loss --> 1.716 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.699 | Lr --> 0.000 | Seconds_per_step --> 3.369 |
1684
+ [2024-08-12 20:31:13,686][Main][INFO] - [train] Step 77250 out of 80000 | Loss --> 1.723 | Grad_l2 --> 0.348 | Weights_l2 --> 9067.696 | Lr --> 0.000 | Seconds_per_step --> 3.383 |
1685
+ [2024-08-12 20:34:04,979][Main][INFO] - [train] Step 77300 out of 80000 | Loss --> 1.724 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.694 | Lr --> 0.000 | Seconds_per_step --> 3.426 |
1686
+ [2024-08-12 20:36:54,247][Main][INFO] - [train] Step 77350 out of 80000 | Loss --> 1.724 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.691 | Lr --> 0.000 | Seconds_per_step --> 3.385 |
1687
+ [2024-08-12 20:39:44,072][Main][INFO] - [train] Step 77400 out of 80000 | Loss --> 1.717 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.689 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1688
+ [2024-08-12 20:42:33,256][Main][INFO] - [train] Step 77450 out of 80000 | Loss --> 1.721 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.686 | Lr --> 0.000 | Seconds_per_step --> 3.384 |
1689
+ [2024-08-12 20:45:23,400][Main][INFO] - [train] Step 77500 out of 80000 | Loss --> 1.725 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.684 | Lr --> 0.000 | Seconds_per_step --> 3.403 |
1690
+ [2024-08-12 20:48:13,007][Main][INFO] - [train] Step 77550 out of 80000 | Loss --> 1.723 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.682 | Lr --> 0.000 | Seconds_per_step --> 3.392 |
1691
+ [2024-08-12 20:51:01,893][Main][INFO] - [train] Step 77600 out of 80000 | Loss --> 1.711 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.680 | Lr --> 0.000 | Seconds_per_step --> 3.378 |
1692
+ [2024-08-12 20:53:51,688][Main][INFO] - [train] Step 77650 out of 80000 | Loss --> 1.719 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.678 | Lr --> 0.000 | Seconds_per_step --> 3.396 |
1693
+ [2024-08-12 20:56:42,523][Main][INFO] - [train] Step 77700 out of 80000 | Loss --> 1.718 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.676 | Lr --> 0.000 | Seconds_per_step --> 3.417 |
1694
+ [2024-08-12 20:59:36,305][Main][INFO] - [train] Step 77750 out of 80000 | Loss --> 1.717 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.674 | Lr --> 0.000 | Seconds_per_step --> 3.476 |
1695
+ [2024-08-12 21:02:26,051][Main][INFO] - [train] Step 77800 out of 80000 | Loss --> 1.714 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.672 | Lr --> 0.000 | Seconds_per_step --> 3.395 |
1696
+ [2024-08-12 21:05:15,893][Main][INFO] - [train] Step 77850 out of 80000 | Loss --> 1.719 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.670 | Lr --> 0.000 | Seconds_per_step --> 3.397 |
1697
+ [2024-08-12 21:08:06,462][Main][INFO] - [train] Step 77900 out of 80000 | Loss --> 1.719 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.668 | Lr --> 0.000 | Seconds_per_step --> 3.411 |
1698
+ [2024-08-12 21:10:56,190][Main][INFO] - [train] Step 77950 out of 80000 | Loss --> 1.714 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.666 | Lr --> 0.000 | Seconds_per_step --> 3.395 |
1699
+ [2024-08-12 21:13:44,945][Main][INFO] - [train] Step 78000 out of 80000 | Loss --> 1.714 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.665 | Lr --> 0.000 | Seconds_per_step --> 3.375 |
1700
+ [2024-08-12 21:16:34,160][Main][INFO] - [train] Step 78050 out of 80000 | Loss --> 1.710 | Grad_l2 --> 0.351 | Weights_l2 --> 9067.663 | Lr --> 0.000 | Seconds_per_step --> 3.384 |
1701
+ [2024-08-12 21:19:24,074][Main][INFO] - [train] Step 78100 out of 80000 | Loss --> 1.707 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.661 | Lr --> 0.000 | Seconds_per_step --> 3.398 |
1702
+ [2024-08-12 21:22:14,845][Main][INFO] - [train] Step 78150 out of 80000 | Loss --> 1.701 | Grad_l2 --> 0.349 | Weights_l2 --> 9067.660 | Lr --> 0.000 | Seconds_per_step --> 3.415 |
1703
+ [2024-08-12 21:25:04,253][Main][INFO] - [train] Step 78200 out of 80000 | Loss --> 1.707 | Grad_l2 --> 0.350 | Weights_l2 --> 9067.658 | Lr --> 0.000 | Seconds_per_step --> 3.388 |
checkpoints/seconds_per_step_over_steps.png CHANGED
checkpoints/training_metrics.csv CHANGED
@@ -1334,3 +1334,231 @@ timestamp,step,loss,grad_l2,weights_l2,lr,seconds_per_step
1334
  "2024-08-12 07:52:46,860",66650,1.768,0.33,9073.285,0.001,4.804
1335
  "2024-08-12 07:56:36,923",66700,1.768,0.329,9073.215,0.001,4.601
1336
  "2024-08-12 08:00:18,881",66750,1.77,0.331,9073.141,0.001,4.439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334
  "2024-08-12 07:52:46,860",66650,1.768,0.33,9073.285,0.001,4.804
1335
  "2024-08-12 07:56:36,923",66700,1.768,0.329,9073.215,0.001,4.601
1336
  "2024-08-12 08:00:18,881",66750,1.77,0.331,9073.141,0.001,4.439
1337
+ "2024-08-12 08:04:03,533",66800,1.769,0.33,9073.071,0.001,4.493
1338
+ "2024-08-12 08:07:50,500",66850,1.769,0.331,9073.004,0.001,4.539
1339
+ "2024-08-12 08:11:49,816",66900,1.768,0.331,9072.935,0.001,4.786
1340
+ "2024-08-12 08:15:56,432",66950,1.768,0.331,9072.867,0.001,4.932
1341
+ "2024-08-12 08:20:02,525",67000,1.779,0.332,9072.797,0.001,4.922
1342
+ "2024-08-12 08:23:53,330",67050,1.771,0.333,9072.73,0.001,4.616
1343
+ "2024-08-12 08:27:56,587",67100,1.774,0.331,9072.661,0.001,4.865
1344
+ "2024-08-12 08:32:02,097",67150,1.772,0.331,9072.592,0.001,4.91
1345
+ "2024-08-12 08:36:03,847",67200,1.774,0.332,9072.521,0.001,4.835
1346
+ "2024-08-12 08:40:03,755",67250,1.763,0.331,9072.457,0.001,4.798
1347
+ "2024-08-12 08:44:12,833",67300,1.769,0.331,9072.387,0.001,4.982
1348
+ "2024-08-12 08:48:15,824",67350,1.76,0.331,9072.319,0.001,4.86
1349
+ "2024-08-12 08:52:17,176",67400,1.766,0.331,9072.248,0.001,4.827
1350
+ "2024-08-12 08:56:26,912",67450,1.759,0.332,9072.181,0.001,4.995
1351
+ "2024-08-12 09:00:28,981",67500,1.772,0.331,9072.113,0.001,4.841
1352
+ "2024-08-12 09:04:36,172",67550,1.77,0.335,9072.048,0.001,4.944
1353
+ "2024-08-12 09:08:49,679",67600,1.766,0.335,9071.978,0.001,5.07
1354
+ "2024-08-12 09:12:58,709",67650,1.764,0.331,9071.91,0.001,4.981
1355
+ "2024-08-12 09:17:14,413",67700,1.765,0.331,9071.843,0.001,5.114
1356
+ "2024-08-12 09:21:11,505",67750,1.765,0.331,9071.774,0.001,4.742
1357
+ "2024-08-12 09:25:15,107",67800,1.755,0.332,9071.709,0.001,4.872
1358
+ "2024-08-12 09:29:20,556",67850,1.752,0.33,9071.643,0.001,4.909
1359
+ "2024-08-12 09:33:24,433",67900,1.751,0.334,9071.575,0.001,4.878
1360
+ "2024-08-12 09:37:21,053",67950,1.749,0.335,9071.51,0.001,4.732
1361
+ "2024-08-12 09:41:30,689",68000,1.755,0.331,9071.446,0.001,4.993
1362
+ "2024-08-12 09:45:33,925",68050,1.751,0.333,9071.382,0.001,4.865
1363
+ "2024-08-12 09:49:32,988",68100,1.751,0.331,9071.32,0.001,4.781
1364
+ "2024-08-12 09:53:35,684",68150,1.75,0.334,9071.255,0.001,4.854
1365
+ "2024-08-12 09:57:46,641",68200,1.746,0.333,9071.196,0.001,5.019
1366
+ "2024-08-12 10:01:44,603",68250,1.744,0.333,9071.133,0.001,4.759
1367
+ "2024-08-12 10:05:41,867",68300,1.738,0.334,9071.075,0.001,4.745
1368
+ "2024-08-12 10:09:51,335",68350,1.748,0.333,9071.016,0.001,4.989
1369
+ "2024-08-12 10:14:03,242",68400,1.762,0.334,9070.96,0.001,5.038
1370
+ "2024-08-12 10:17:57,549",68450,1.751,0.333,9070.902,0.001,4.686
1371
+ "2024-08-12 10:22:04,017",68500,1.754,0.333,9070.845,0.001,4.929
1372
+ "2024-08-12 10:26:15,511",68550,1.744,0.333,9070.785,0.001,5.03
1373
+ "2024-08-12 10:30:12,459",68600,1.748,0.334,9070.729,0.001,4.739
1374
+ "2024-08-12 10:34:09,711",68650,1.744,0.333,9070.674,0.001,4.745
1375
+ "2024-08-12 10:38:15,758",68700,1.747,0.333,9070.62,0.001,4.921
1376
+ "2024-08-12 10:42:31,275",68750,1.752,0.334,9070.565,0.001,5.11
1377
+ "2024-08-12 10:46:29,239",68800,1.756,0.336,9070.513,0.001,4.759
1378
+ "2024-08-12 10:50:35,687",68850,1.747,0.336,9070.463,0.0,4.929
1379
+ "2024-08-12 10:54:45,439",68900,1.746,0.334,9070.413,0.0,4.995
1380
+ "2024-08-12 10:58:49,957",68950,1.741,0.333,9070.362,0.0,4.89
1381
+ "2024-08-12 11:02:50,584",69000,1.74,0.333,9070.312,0.0,4.813
1382
+ "2024-08-12 11:07:04,656",69050,1.744,0.336,9070.263,0.0,5.081
1383
+ "2024-08-12 11:11:13,715",69100,1.738,0.336,9070.214,0.0,4.981
1384
+ "2024-08-12 11:15:08,470",69150,1.737,0.335,9070.167,0.0,4.695
1385
+ "2024-08-12 11:19:18,203",69200,1.739,0.336,9070.119,0.0,4.995
1386
+ "2024-08-12 11:23:36,177",69250,1.735,0.334,9070.077,0.0,5.159
1387
+ "2024-08-12 11:27:32,982",69300,1.731,0.335,9070.031,0.0,4.736
1388
+ "2024-08-12 11:31:35,613",69350,1.736,0.336,9069.988,0.0,4.853
1389
+ "2024-08-12 11:35:44,767",69400,1.733,0.336,9069.946,0.0,4.983
1390
+ "2024-08-12 11:39:44,712",69450,1.74,0.336,9069.9,0.0,4.799
1391
+ "2024-08-12 11:43:39,145",69500,1.742,0.338,9069.857,0.0,4.689
1392
+ "2024-08-12 11:47:43,420",69550,1.736,0.337,9069.815,0.0,4.885
1393
+ "2024-08-12 11:51:55,140",69600,1.741,0.336,9069.774,0.0,5.034
1394
+ "2024-08-12 11:55:50,294",69650,1.744,0.337,9069.734,0.0,4.703
1395
+ "2024-08-12 11:59:50,709",69700,1.739,0.336,9069.695,0.0,4.808
1396
+ "2024-08-12 12:03:56,289",69750,1.746,0.337,9069.657,0.0,4.912
1397
+ "2024-08-12 12:08:00,668",69800,1.745,0.338,9069.619,0.0,4.888
1398
+ "2024-08-12 12:11:55,491",69850,1.743,0.339,9069.58,0.0,4.696
1399
+ "2024-08-12 12:15:58,225",69900,1.746,0.337,9069.544,0.0,4.855
1400
+ "2024-08-12 12:20:07,222",69950,1.74,0.337,9069.507,0.0,4.98
1401
+ "2024-08-12 12:24:09,812",70000,1.74,0.338,9069.472,0.0,4.852
1402
+ "2024-08-12 12:28:17,924",70050,1.747,0.337,9069.436,0.0,4.962
1403
+ "2024-08-12 12:32:24,318",70100,1.738,0.338,9069.403,0.0,4.928
1404
+ "2024-08-12 12:36:36,478",70150,1.752,0.339,9069.369,0.0,5.043
1405
+ "2024-08-12 12:40:33,276",70200,1.736,0.337,9069.335,0.0,4.736
1406
+ "2024-08-12 12:44:33,787",70250,1.746,0.339,9069.302,0.0,4.81
1407
+ "2024-08-12 12:48:49,628",70300,1.737,0.34,9069.272,0.0,5.117
1408
+ "2024-08-12 12:52:50,330",70350,1.741,0.339,9069.241,0.0,4.814
1409
+ "2024-08-12 12:56:47,628",70400,1.747,0.34,9069.21,0.0,4.746
1410
+ "2024-08-12 13:00:53,896",70450,1.745,0.339,9069.181,0.0,4.925
1411
+ "2024-08-12 13:05:13,889",70500,1.746,0.34,9069.153,0.0,5.2
1412
+ "2024-08-12 13:09:07,510",70550,1.732,0.339,9069.124,0.0,4.672
1413
+ "2024-08-12 13:13:07,689",70600,1.736,0.34,9069.095,0.0,4.804
1414
+ "2024-08-12 13:17:20,202",70650,1.739,0.339,9069.067,0.0,5.05
1415
+ "2024-08-12 13:21:31,602",70700,1.741,0.342,9069.04,0.0,5.028
1416
+ "2024-08-12 13:25:28,721",70750,1.739,0.341,9069.015,0.0,4.742
1417
+ "2024-08-12 13:29:41,170",70800,1.748,0.343,9068.99,0.0,5.049
1418
+ "2024-08-12 13:33:45,475",70850,1.74,0.339,9068.965,0.0,4.886
1419
+ "2024-08-12 13:37:50,507",70900,1.729,0.338,9068.942,0.0,4.901
1420
+ "2024-08-12 13:41:51,338",70950,1.729,0.341,9068.918,0.0,4.817
1421
+ "2024-08-12 13:46:05,634",71000,1.721,0.341,9068.894,0.0,5.086
1422
+ "2024-08-12 13:50:09,098",71050,1.729,0.341,9068.871,0.0,4.869
1423
+ "2024-08-12 13:54:09,531",71100,1.728,0.343,9068.849,0.0,4.809
1424
+ "2024-08-12 13:58:21,788",71150,1.726,0.341,9068.829,0.0,5.045
1425
+ "2024-08-12 14:02:21,447",71200,1.725,0.341,9068.809,0.0,4.793
1426
+ "2024-08-12 14:06:20,014",71250,1.725,0.341,9068.789,0.0,4.771
1427
+ "2024-08-12 14:10:32,195",71300,1.713,0.342,9068.77,0.0,5.044
1428
+ "2024-08-12 14:14:53,155",71350,1.712,0.344,9068.751,0.0,5.219
1429
+ "2024-08-12 14:18:49,772",71400,1.714,0.342,9068.734,0.0,4.732
1430
+ "2024-08-12 14:22:50,205",71450,1.708,0.34,9068.718,0.0,4.809
1431
+ "2024-08-12 14:26:37,544",71500,1.713,0.341,9068.702,0.0,4.547
1432
+ "2024-08-12 14:30:43,794",71550,1.703,0.342,9068.686,0.0,4.925
1433
+ "2024-08-12 14:34:43,687",71600,1.705,0.34,9068.668,0.0,4.798
1434
+ "2024-08-12 14:38:41,113",71650,1.705,0.34,9068.651,0.0,4.748
1435
+ "2024-08-12 14:42:57,491",71700,1.705,0.342,9068.634,0.0,5.128
1436
+ "2024-08-12 14:46:57,538",71750,1.71,0.342,9068.619,0.0,4.801
1437
+ "2024-08-12 14:50:54,553",71800,1.705,0.343,9068.603,0.0,4.74
1438
+ "2024-08-12 14:55:04,238",71850,1.703,0.342,9068.59,0.0,4.994
1439
+ "2024-08-12 14:59:14,111",71900,1.702,0.341,9068.573,0.0,4.997
1440
+ "2024-08-12 15:02:31,253",71950,1.703,0.343,9068.559,0.0,3.943
1441
+ "2024-08-12 15:05:20,828",72000,1.712,0.343,9068.543,0.0,3.391
1442
+ "2024-08-12 15:08:10,019",72050,1.707,0.343,9068.528,0.0,3.384
1443
+ "2024-08-12 15:10:59,105",72100,1.702,0.343,9068.514,0.0,3.382
1444
+ "2024-08-12 15:13:49,571",72150,1.711,0.343,9068.498,0.0,3.409
1445
+ "2024-08-12 15:16:42,032",72200,1.701,0.344,9068.483,0.0,3.449
1446
+ "2024-08-12 15:19:30,656",72250,1.716,0.345,9068.47,0.0,3.372
1447
+ "2024-08-12 15:22:20,433",72300,1.712,0.344,9068.455,0.0,3.396
1448
+ "2024-08-12 15:25:11,089",72350,1.716,0.345,9068.44,0.0,3.413
1449
+ "2024-08-12 15:28:01,003",72400,1.717,0.345,9068.426,0.0,3.398
1450
+ "2024-08-12 15:30:50,569",72450,1.713,0.343,9068.415,0.0,3.391
1451
+ "2024-08-12 15:33:39,952",72500,1.718,0.345,9068.401,0.0,3.388
1452
+ "2024-08-12 15:36:30,135",72550,1.726,0.345,9068.388,0.0,3.404
1453
+ "2024-08-12 15:39:19,623",72600,1.719,0.345,9068.373,0.0,3.39
1454
+ "2024-08-12 15:42:09,023",72650,1.733,0.344,9068.36,0.0,3.388
1455
+ "2024-08-12 15:44:58,509",72700,1.734,0.348,9068.347,0.0,3.39
1456
+ "2024-08-12 15:47:49,181",72750,1.721,0.344,9068.333,0.0,3.413
1457
+ "2024-08-12 15:50:38,995",72800,1.735,0.347,9068.322,0.0,3.396
1458
+ "2024-08-12 15:53:28,892",72850,1.73,0.346,9068.31,0.0,3.398
1459
+ "2024-08-12 15:56:17,941",72900,1.737,0.346,9068.298,0.0,3.381
1460
+ "2024-08-12 15:59:12,501",72950,1.741,0.347,9068.285,0.0,3.491
1461
+ "2024-08-12 16:02:37,614",73000,1.742,0.348,9068.272,0.0,4.102
1462
+ "2024-08-12 16:06:37,371",73050,1.75,0.346,9068.26,0.0,4.795
1463
+ "2024-08-12 16:10:50,188",73100,1.745,0.348,9068.247,0.0,5.056
1464
+ "2024-08-12 16:14:47,653",73150,1.752,0.348,9068.236,0.0,4.749
1465
+ "2024-08-12 16:18:47,203",73200,1.763,0.35,9068.224,0.0,4.791
1466
+ "2024-08-12 16:22:52,223",73250,1.755,0.349,9068.212,0.0,4.9
1467
+ "2024-08-12 16:26:58,574",73300,1.755,0.348,9068.2,0.0,4.927
1468
+ "2024-08-12 16:30:52,333",73350,1.757,0.349,9068.191,0.0,4.675
1469
+ "2024-08-12 16:35:02,793",73400,1.757,0.351,9068.179,0.0,5.009
1470
+ "2024-08-12 16:39:12,998",73450,1.749,0.349,9068.169,0.0,5.004
1471
+ "2024-08-12 16:43:03,001",73500,1.757,0.348,9068.158,0.0,4.6
1472
+ "2024-08-12 16:47:03,618",73550,1.759,0.347,9068.147,0.0,4.812
1473
+ "2024-08-12 16:51:16,923",73600,1.758,0.349,9068.136,0.0,5.066
1474
+ "2024-08-12 16:55:17,319",73650,1.756,0.349,9068.127,0.0,4.808
1475
+ "2024-08-12 16:59:11,488",73700,1.757,0.349,9068.116,0.0,4.683
1476
+ "2024-08-12 17:03:14,319",73750,1.75,0.347,9068.107,0.0,4.857
1477
+ "2024-08-12 17:07:27,659",73800,1.749,0.348,9068.097,0.0,5.067
1478
+ "2024-08-12 17:11:27,086",73850,1.755,0.348,9068.087,0.0,4.789
1479
+ "2024-08-12 17:15:20,740",73900,1.758,0.35,9068.078,0.0,4.673
1480
+ "2024-08-12 17:19:26,343",73950,1.753,0.349,9068.068,0.0,4.912
1481
+ "2024-08-12 17:23:30,087",74000,1.755,0.349,9068.059,0.0,4.875
1482
+ "2024-08-12 17:27:25,018",74050,1.756,0.35,9068.05,0.0,4.699
1483
+ "2024-08-12 17:31:28,610",74100,1.758,0.349,9068.04,0.0,4.872
1484
+ "2024-08-12 17:35:36,982",74150,1.752,0.347,9068.032,0.0,4.967
1485
+ "2024-08-12 17:38:39,898",74200,1.749,0.347,9068.024,0.0,3.658
1486
+ "2024-08-12 17:41:29,774",74250,1.753,0.35,9068.015,0.0,3.398
1487
+ "2024-08-12 17:44:19,599",74300,1.755,0.349,9068.006,0.0,3.396
1488
+ "2024-08-12 17:47:09,578",74350,1.751,0.35,9067.997,0.0,3.4
1489
+ "2024-08-12 17:50:00,007",74400,1.757,0.349,9067.989,0.0,3.409
1490
+ "2024-08-12 17:52:48,380",74450,1.751,0.35,9067.98,0.0,3.367
1491
+ "2024-08-12 17:55:37,403",74500,1.75,0.347,9067.972,0.0,3.38
1492
+ "2024-08-12 17:58:27,279",74550,1.754,0.348,9067.965,0.0,3.398
1493
+ "2024-08-12 18:01:17,245",74600,1.749,0.35,9067.957,0.0,3.399
1494
+ "2024-08-12 18:04:05,815",74650,1.75,0.351,9067.95,0.0,3.371
1495
+ "2024-08-12 18:06:55,310",74700,1.743,0.349,9067.942,0.0,3.39
1496
+ "2024-08-12 18:09:44,130",74750,1.751,0.35,9067.934,0.0,3.376
1497
+ "2024-08-12 18:12:34,080",74800,1.746,0.348,9067.926,0.0,3.399
1498
+ "2024-08-12 18:15:24,419",74850,1.744,0.349,9067.919,0.0,3.407
1499
+ "2024-08-12 18:18:12,739",74900,1.747,0.349,9067.912,0.0,3.366
1500
+ "2024-08-12 18:21:02,709",74950,1.754,0.349,9067.905,0.0,3.399
1501
+ "2024-08-12 18:23:52,761",75000,1.75,0.35,9067.898,0.0,3.401
1502
+ "2024-08-12 18:26:49,045",75050,1.754,0.349,9067.891,0.0,3.526
1503
+ "2024-08-12 18:29:38,847",75100,1.746,0.351,9067.885,0.0,3.396
1504
+ "2024-08-12 18:32:29,593",75150,1.745,0.35,9067.879,0.0,3.415
1505
+ "2024-08-12 18:35:19,147",75200,1.748,0.35,9067.873,0.0,3.391
1506
+ "2024-08-12 18:38:08,943",75250,1.737,0.35,9067.866,0.0,3.396
1507
+ "2024-08-12 18:40:57,335",75300,1.75,0.35,9067.86,0.0,3.368
1508
+ "2024-08-12 18:43:47,536",75350,1.746,0.351,9067.854,0.0,3.404
1509
+ "2024-08-12 18:46:36,557",75400,1.744,0.348,9067.849,0.0,3.38
1510
+ "2024-08-12 18:49:26,702",75450,1.742,0.349,9067.843,0.0,3.403
1511
+ "2024-08-12 18:52:16,129",75500,1.747,0.349,9067.838,0.0,3.389
1512
+ "2024-08-12 18:55:06,083",75550,1.75,0.352,9067.831,0.0,3.399
1513
+ "2024-08-12 18:57:56,288",75600,1.751,0.35,9067.826,0.0,3.404
1514
+ "2024-08-12 19:00:47,552",75650,1.745,0.349,9067.821,0.0,3.425
1515
+ "2024-08-12 19:03:37,554",75700,1.748,0.351,9067.816,0.0,3.4
1516
+ "2024-08-12 19:06:26,390",75750,1.735,0.35,9067.81,0.0,3.377
1517
+ "2024-08-12 19:09:15,513",75800,1.741,0.35,9067.806,0.0,3.382
1518
+ "2024-08-12 19:12:04,817",75850,1.742,0.351,9067.8,0.0,3.386
1519
+ "2024-08-12 19:14:54,908",75900,1.736,0.35,9067.795,0.0,3.402
1520
+ "2024-08-12 19:17:44,997",75950,1.738,0.35,9067.791,0.0,3.402
1521
+ "2024-08-12 19:20:35,427",76000,1.74,0.349,9067.786,0.0,3.409
1522
+ "2024-08-12 19:23:26,015",76050,1.735,0.349,9067.781,0.0,3.412
1523
+ "2024-08-12 19:26:15,433",76100,1.731,0.35,9067.776,0.0,3.388
1524
+ "2024-08-12 19:29:03,758",76150,1.734,0.35,9067.771,0.0,3.366
1525
+ "2024-08-12 19:31:52,080",76200,1.739,0.349,9067.767,0.0,3.366
1526
+ "2024-08-12 19:34:45,455",76250,1.733,0.349,9067.763,0.0,3.467
1527
+ "2024-08-12 19:37:34,591",76300,1.723,0.349,9067.759,0.0,3.383
1528
+ "2024-08-12 19:40:23,592",76350,1.734,0.352,9067.755,0.0,3.38
1529
+ "2024-08-12 19:43:12,814",76400,1.732,0.35,9067.751,0.0,3.384
1530
+ "2024-08-12 19:46:01,456",76450,1.723,0.35,9067.747,0.0,3.373
1531
+ "2024-08-12 19:48:51,400",76500,1.726,0.349,9067.743,0.0,3.399
1532
+ "2024-08-12 19:51:40,649",76550,1.726,0.35,9067.74,0.0,3.385
1533
+ "2024-08-12 19:54:29,691",76600,1.726,0.35,9067.736,0.0,3.381
1534
+ "2024-08-12 19:57:19,230",76650,1.717,0.347,9067.733,0.0,3.391
1535
+ "2024-08-12 20:00:09,385",76700,1.717,0.347,9067.729,0.0,3.403
1536
+ "2024-08-12 20:02:57,964",76750,1.715,0.351,9067.726,0.0,3.372
1537
+ "2024-08-12 20:05:47,101",76800,1.71,0.351,9067.722,0.0,3.383
1538
+ "2024-08-12 20:08:37,921",76850,1.725,0.351,9067.719,0.0,3.416
1539
+ "2024-08-12 20:11:28,368",76900,1.716,0.35,9067.716,0.0,3.409
1540
+ "2024-08-12 20:14:18,240",76950,1.722,0.35,9067.713,0.0,3.397
1541
+ "2024-08-12 20:17:07,126",77000,1.727,0.348,9067.71,0.0,3.378
1542
+ "2024-08-12 20:19:57,021",77050,1.727,0.348,9067.707,0.0,3.398
1543
+ "2024-08-12 20:22:47,079",77100,1.726,0.349,9067.705,0.0,3.401
1544
+ "2024-08-12 20:25:36,117",77150,1.72,0.349,9067.702,0.0,3.381
1545
+ "2024-08-12 20:28:24,552",77200,1.716,0.351,9067.699,0.0,3.369
1546
+ "2024-08-12 20:31:13,686",77250,1.723,0.348,9067.696,0.0,3.383
1547
+ "2024-08-12 20:34:04,979",77300,1.724,0.349,9067.694,0.0,3.426
1548
+ "2024-08-12 20:36:54,247",77350,1.724,0.349,9067.691,0.0,3.385
1549
+ "2024-08-12 20:39:44,072",77400,1.717,0.35,9067.689,0.0,3.396
1550
+ "2024-08-12 20:42:33,256",77450,1.721,0.35,9067.686,0.0,3.384
1551
+ "2024-08-12 20:45:23,400",77500,1.725,0.351,9067.684,0.0,3.403
1552
+ "2024-08-12 20:48:13,007",77550,1.723,0.35,9067.682,0.0,3.392
1553
+ "2024-08-12 20:51:01,893",77600,1.711,0.349,9067.68,0.0,3.378
1554
+ "2024-08-12 20:53:51,688",77650,1.719,0.351,9067.678,0.0,3.396
1555
+ "2024-08-12 20:56:42,523",77700,1.718,0.349,9067.676,0.0,3.417
1556
+ "2024-08-12 20:59:36,305",77750,1.717,0.35,9067.674,0.0,3.476
1557
+ "2024-08-12 21:02:26,051",77800,1.714,0.349,9067.672,0.0,3.395
1558
+ "2024-08-12 21:05:15,893",77850,1.719,0.349,9067.67,0.0,3.397
1559
+ "2024-08-12 21:08:06,462",77900,1.719,0.35,9067.668,0.0,3.411
1560
+ "2024-08-12 21:10:56,190",77950,1.714,0.349,9067.666,0.0,3.395
1561
+ "2024-08-12 21:13:44,945",78000,1.714,0.351,9067.665,0.0,3.375
1562
+ "2024-08-12 21:16:34,160",78050,1.71,0.351,9067.663,0.0,3.384
1563
+ "2024-08-12 21:19:24,074",78100,1.707,0.349,9067.661,0.0,3.398
1564
+ "2024-08-12 21:22:14,845",78150,1.701,0.349,9067.66,0.0,3.415
checkpoints/weights_l2_over_steps.png CHANGED