error577 commited on
Commit
6ace8bb
·
verified ·
1 Parent(s): b093c29

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6a807852768ce2ccfda9549fd58cf83073aeb8e0dcfc544e8b523b0a5212bca
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b091bf5f1ce9e8388e64b336bdb4cf8f02f0eb007585067a4b0747d3b743c3aa
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1031a80aa022fef633fe2a9b86c4674285e249960c92983d151a972bb23abbc8
3
  size 254917780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d24339e9d80ca13375893d5df939b469a5841bd9782322090d796d0025d923d0
3
  size 254917780
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90dbd62f0b1c6fdca03988ec1ba0af067e521a02fdd2e714aba518c99f3bbc7c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98fa1f833b77a5bef15319c574c6083893d7c2840ec5da7147454424b67d975e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c822dec639f0641927f6a0448fd2ae65913fdbdae3d08ed0701aa491ca071f0a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:477e1ab9e7e387f392e0bb68fb7cd86779a760a788b2ed973ec470f1c83dd5f7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4887339770793915,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.06457992775120583,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
1447
  "eval_samples_per_second": 2.678,
1448
  "eval_steps_per_second": 2.678,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1475,7 +1833,7 @@
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 1.1792581360091136e+17,
1479
  "train_batch_size": 1,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4805048406124115,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 0.08072490968900728,
5
  "eval_steps": 50,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 2.678,
1448
  "eval_steps_per_second": 2.678,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 0.06490282738996185,
1453
+ "grad_norm": 0.318142294883728,
1454
+ "learning_rate": 7.830721146206451e-05,
1455
+ "loss": 0.5384,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 0.06522572702871789,
1460
+ "grad_norm": 0.288631409406662,
1461
+ "learning_rate": 7.688410249570214e-05,
1462
+ "loss": 0.5078,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 0.06554862666747392,
1467
+ "grad_norm": 0.280100554227829,
1468
+ "learning_rate": 7.54695740040912e-05,
1469
+ "loss": 0.4788,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 0.06587152630622994,
1474
+ "grad_norm": 0.279681533575058,
1475
+ "learning_rate": 7.406379198842189e-05,
1476
+ "loss": 0.4447,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 0.06619442594498598,
1481
+ "grad_norm": 0.2892783284187317,
1482
+ "learning_rate": 7.266692142344672e-05,
1483
+ "loss": 0.4932,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 0.066517325583742,
1488
+ "grad_norm": 0.2658500075340271,
1489
+ "learning_rate": 7.127912623811993e-05,
1490
+ "loss": 0.4682,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 0.06684022522249804,
1495
+ "grad_norm": 0.2946866452693939,
1496
+ "learning_rate": 6.990056929635957e-05,
1497
+ "loss": 0.4838,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 0.06716312486125406,
1502
+ "grad_norm": 0.2683822214603424,
1503
+ "learning_rate": 6.853141237793506e-05,
1504
+ "loss": 0.4408,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 0.0674860245000101,
1509
+ "grad_norm": 0.3225007653236389,
1510
+ "learning_rate": 6.717181615948126e-05,
1511
+ "loss": 0.4949,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 0.06780892413876612,
1516
+ "grad_norm": 0.25332513451576233,
1517
+ "learning_rate": 6.582194019564266e-05,
1518
+ "loss": 0.4141,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 0.06813182377752215,
1523
+ "grad_norm": 0.2799530625343323,
1524
+ "learning_rate": 6.448194290034848e-05,
1525
+ "loss": 0.4445,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 0.06845472341627817,
1530
+ "grad_norm": 0.27327555418014526,
1531
+ "learning_rate": 6.315198152822272e-05,
1532
+ "loss": 0.4138,
1533
+ "step": 212
1534
+ },
1535
+ {
1536
+ "epoch": 0.06877762305503421,
1537
+ "grad_norm": 0.3778553903102875,
1538
+ "learning_rate": 6.183221215612904e-05,
1539
+ "loss": 0.4804,
1540
+ "step": 213
1541
+ },
1542
+ {
1543
+ "epoch": 0.06910052269379023,
1544
+ "grad_norm": 0.3077884614467621,
1545
+ "learning_rate": 6.052278966485491e-05,
1546
+ "loss": 0.4657,
1547
+ "step": 214
1548
+ },
1549
+ {
1550
+ "epoch": 0.06942342233254627,
1551
+ "grad_norm": 0.29660362005233765,
1552
+ "learning_rate": 5.922386772093526e-05,
1553
+ "loss": 0.4297,
1554
+ "step": 215
1555
+ },
1556
+ {
1557
+ "epoch": 0.06974632197130229,
1558
+ "grad_norm": 0.3540116548538208,
1559
+ "learning_rate": 5.793559875861938e-05,
1560
+ "loss": 0.466,
1561
+ "step": 216
1562
+ },
1563
+ {
1564
+ "epoch": 0.07006922161005832,
1565
+ "grad_norm": 0.2957676351070404,
1566
+ "learning_rate": 5.6658133961981894e-05,
1567
+ "loss": 0.4421,
1568
+ "step": 217
1569
+ },
1570
+ {
1571
+ "epoch": 0.07039212124881435,
1572
+ "grad_norm": 0.3042965233325958,
1573
+ "learning_rate": 5.5391623247180744e-05,
1574
+ "loss": 0.441,
1575
+ "step": 218
1576
+ },
1577
+ {
1578
+ "epoch": 0.07071502088757038,
1579
+ "grad_norm": 0.36982765793800354,
1580
+ "learning_rate": 5.413621524486363e-05,
1581
+ "loss": 0.4114,
1582
+ "step": 219
1583
+ },
1584
+ {
1585
+ "epoch": 0.07103792052632642,
1586
+ "grad_norm": 0.3452307879924774,
1587
+ "learning_rate": 5.289205728272586e-05,
1588
+ "loss": 0.4562,
1589
+ "step": 220
1590
+ },
1591
+ {
1592
+ "epoch": 0.07136082016508244,
1593
+ "grad_norm": 0.3854043483734131,
1594
+ "learning_rate": 5.165929536822059e-05,
1595
+ "loss": 0.5003,
1596
+ "step": 221
1597
+ },
1598
+ {
1599
+ "epoch": 0.07168371980383847,
1600
+ "grad_norm": 0.3237496018409729,
1601
+ "learning_rate": 5.043807417142436e-05,
1602
+ "loss": 0.4592,
1603
+ "step": 222
1604
+ },
1605
+ {
1606
+ "epoch": 0.0720066194425945,
1607
+ "grad_norm": 0.32223159074783325,
1608
+ "learning_rate": 4.922853700805909e-05,
1609
+ "loss": 0.4553,
1610
+ "step": 223
1611
+ },
1612
+ {
1613
+ "epoch": 0.07232951908135053,
1614
+ "grad_norm": 0.40129488706588745,
1615
+ "learning_rate": 4.8030825822673814e-05,
1616
+ "loss": 0.4276,
1617
+ "step": 224
1618
+ },
1619
+ {
1620
+ "epoch": 0.07265241872010655,
1621
+ "grad_norm": 0.34809187054634094,
1622
+ "learning_rate": 4.684508117198648e-05,
1623
+ "loss": 0.4856,
1624
+ "step": 225
1625
+ },
1626
+ {
1627
+ "epoch": 0.07297531835886259,
1628
+ "grad_norm": 0.3367185592651367,
1629
+ "learning_rate": 4.567144220838923e-05,
1630
+ "loss": 0.4555,
1631
+ "step": 226
1632
+ },
1633
+ {
1634
+ "epoch": 0.07329821799761861,
1635
+ "grad_norm": 0.35933539271354675,
1636
+ "learning_rate": 4.4510046663617996e-05,
1637
+ "loss": 0.4837,
1638
+ "step": 227
1639
+ },
1640
+ {
1641
+ "epoch": 0.07362111763637465,
1642
+ "grad_norm": 0.3718101382255554,
1643
+ "learning_rate": 4.336103083258942e-05,
1644
+ "loss": 0.4789,
1645
+ "step": 228
1646
+ },
1647
+ {
1648
+ "epoch": 0.07394401727513067,
1649
+ "grad_norm": 0.3542415201663971,
1650
+ "learning_rate": 4.2224529557405645e-05,
1651
+ "loss": 0.5075,
1652
+ "step": 229
1653
+ },
1654
+ {
1655
+ "epoch": 0.0742669169138867,
1656
+ "grad_norm": 0.3407626748085022,
1657
+ "learning_rate": 4.1100676211530404e-05,
1658
+ "loss": 0.4803,
1659
+ "step": 230
1660
+ },
1661
+ {
1662
+ "epoch": 0.07458981655264273,
1663
+ "grad_norm": 0.39396294951438904,
1664
+ "learning_rate": 3.998960268413666e-05,
1665
+ "loss": 0.5117,
1666
+ "step": 231
1667
+ },
1668
+ {
1669
+ "epoch": 0.07491271619139876,
1670
+ "grad_norm": 0.3785285949707031,
1671
+ "learning_rate": 3.889143936462914e-05,
1672
+ "loss": 0.4925,
1673
+ "step": 232
1674
+ },
1675
+ {
1676
+ "epoch": 0.07523561583015478,
1677
+ "grad_norm": 0.36613747477531433,
1678
+ "learning_rate": 3.780631512734241e-05,
1679
+ "loss": 0.4434,
1680
+ "step": 233
1681
+ },
1682
+ {
1683
+ "epoch": 0.07555851546891082,
1684
+ "grad_norm": 0.3978104591369629,
1685
+ "learning_rate": 3.673435731641691e-05,
1686
+ "loss": 0.4613,
1687
+ "step": 234
1688
+ },
1689
+ {
1690
+ "epoch": 0.07588141510766684,
1691
+ "grad_norm": 0.43552708625793457,
1692
+ "learning_rate": 3.567569173085454e-05,
1693
+ "loss": 0.4177,
1694
+ "step": 235
1695
+ },
1696
+ {
1697
+ "epoch": 0.07620431474642288,
1698
+ "grad_norm": 0.3718654215335846,
1699
+ "learning_rate": 3.463044260975566e-05,
1700
+ "loss": 0.4611,
1701
+ "step": 236
1702
+ },
1703
+ {
1704
+ "epoch": 0.07652721438517891,
1705
+ "grad_norm": 0.41485676169395447,
1706
+ "learning_rate": 3.3598732617739036e-05,
1707
+ "loss": 0.5586,
1708
+ "step": 237
1709
+ },
1710
+ {
1711
+ "epoch": 0.07685011402393493,
1712
+ "grad_norm": 0.37860673666000366,
1713
+ "learning_rate": 3.258068283054666e-05,
1714
+ "loss": 0.4256,
1715
+ "step": 238
1716
+ },
1717
+ {
1718
+ "epoch": 0.07717301366269097,
1719
+ "grad_norm": 0.4362449645996094,
1720
+ "learning_rate": 3.1576412720834746e-05,
1721
+ "loss": 0.5763,
1722
+ "step": 239
1723
+ },
1724
+ {
1725
+ "epoch": 0.07749591330144699,
1726
+ "grad_norm": 0.3914451003074646,
1727
+ "learning_rate": 3.058604014415343e-05,
1728
+ "loss": 0.4739,
1729
+ "step": 240
1730
+ },
1731
+ {
1732
+ "epoch": 0.07781881294020303,
1733
+ "grad_norm": 0.3677349388599396,
1734
+ "learning_rate": 2.960968132511567e-05,
1735
+ "loss": 0.4716,
1736
+ "step": 241
1737
+ },
1738
+ {
1739
+ "epoch": 0.07814171257895905,
1740
+ "grad_norm": 0.3888345956802368,
1741
+ "learning_rate": 2.8647450843757897e-05,
1742
+ "loss": 0.5218,
1743
+ "step": 242
1744
+ },
1745
+ {
1746
+ "epoch": 0.07846461221771509,
1747
+ "grad_norm": 0.37700045108795166,
1748
+ "learning_rate": 2.7699461622093304e-05,
1749
+ "loss": 0.4978,
1750
+ "step": 243
1751
+ },
1752
+ {
1753
+ "epoch": 0.0787875118564711,
1754
+ "grad_norm": 0.41537439823150635,
1755
+ "learning_rate": 2.67658249108603e-05,
1756
+ "loss": 0.4907,
1757
+ "step": 244
1758
+ },
1759
+ {
1760
+ "epoch": 0.07911041149522714,
1761
+ "grad_norm": 0.40000054240226746,
1762
+ "learning_rate": 2.584665027646643e-05,
1763
+ "loss": 0.488,
1764
+ "step": 245
1765
+ },
1766
+ {
1767
+ "epoch": 0.07943331113398316,
1768
+ "grad_norm": 0.395548552274704,
1769
+ "learning_rate": 2.49420455881305e-05,
1770
+ "loss": 0.4847,
1771
+ "step": 246
1772
+ },
1773
+ {
1774
+ "epoch": 0.0797562107727392,
1775
+ "grad_norm": 0.4183206558227539,
1776
+ "learning_rate": 2.4052117005223455e-05,
1777
+ "loss": 0.5261,
1778
+ "step": 247
1779
+ },
1780
+ {
1781
+ "epoch": 0.08007911041149522,
1782
+ "grad_norm": 0.37241002917289734,
1783
+ "learning_rate": 2.317696896481024e-05,
1784
+ "loss": 0.499,
1785
+ "step": 248
1786
+ },
1787
+ {
1788
+ "epoch": 0.08040201005025126,
1789
+ "grad_norm": 0.4700750410556793,
1790
+ "learning_rate": 2.231670416939364e-05,
1791
+ "loss": 0.435,
1792
+ "step": 249
1793
+ },
1794
+ {
1795
+ "epoch": 0.08072490968900728,
1796
+ "grad_norm": 0.47890686988830566,
1797
+ "learning_rate": 2.147142357486164e-05,
1798
+ "loss": 0.6928,
1799
+ "step": 250
1800
+ },
1801
+ {
1802
+ "epoch": 0.08072490968900728,
1803
+ "eval_loss": 0.4805048406124115,
1804
+ "eval_runtime": 93.118,
1805
+ "eval_samples_per_second": 2.674,
1806
+ "eval_steps_per_second": 2.674,
1807
+ "step": 250
1808
  }
1809
  ],
1810
  "logging_steps": 1,
 
1833
  "attributes": {}
1834
  }
1835
  },
1836
+ "total_flos": 1.4707264776044544e+17,
1837
  "train_batch_size": 1,
1838
  "trial_name": null,
1839
  "trial_params": null