schnell commited on
Commit
c9b77c6
1 Parent(s): 9996fb2

Training in progress, epoch 6

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22087826d4ba83ff43aa92646474a7d738bab2dd63e2f35df5258bba24feb37a
3
  size 236470789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f3648a6badaba6668f8c6d4db4c2b9896ababb988525769298b2731863da37
3
  size 236470789
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a700a014c14e45bba7ac4d9c3addf426338c4babdc7d46b58792c086a371be15
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d71c32019041a99199225d8bb52225cec92c140618e167aad1e1ccb4d0d934
3
  size 118243218
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c826109d4f8c0572f569da300df250d13a18926f54ce1fa15e1b255c8c485c19
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5c3856f1342734c9ff443026f78f9d74949332f5c5b796847ea499dbf0c080
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8745b7d4dcff3c828a3250fc0904c3946b34c082c34c9ff77e31f739752a438
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b278cd6e09360f31a3d837f80dee4c2ce4d9c9d186a939ecf157e1a0deb793f3
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709c9f3a894f05e4848baff4ab73816348c18f139d1395092b46622df0fe9bf1
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f64a9b985406894ef65cdb08cec8746d6a7f750e0466984f5ddbc1f0df99b9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "global_step": 114700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1425,11 +1425,296 @@
1425
  "eval_samples_per_second": 496.191,
1426
  "eval_steps_per_second": 31.012,
1427
  "step": 114700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1428
  }
1429
  ],
1430
  "max_steps": 321160,
1431
  "num_train_epochs": 14,
1432
- "total_flos": 8.678421440109158e+17,
1433
  "trial_name": null,
1434
  "trial_params": null
1435
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
+ "global_step": 137640,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1425
  "eval_samples_per_second": 496.191,
1426
  "eval_steps_per_second": 31.012,
1427
  "step": 114700
1428
+ },
1429
+ {
1430
+ "epoch": 5.01,
1431
+ "learning_rate": 6.485651741794256e-05,
1432
+ "loss": 1.9898,
1433
+ "step": 115000
1434
+ },
1435
+ {
1436
+ "epoch": 5.03,
1437
+ "learning_rate": 6.469957351516601e-05,
1438
+ "loss": 1.988,
1439
+ "step": 115500
1440
+ },
1441
+ {
1442
+ "epoch": 5.06,
1443
+ "learning_rate": 6.454231509555022e-05,
1444
+ "loss": 1.9881,
1445
+ "step": 116000
1446
+ },
1447
+ {
1448
+ "epoch": 5.08,
1449
+ "learning_rate": 6.438505667593443e-05,
1450
+ "loss": 1.9849,
1451
+ "step": 116500
1452
+ },
1453
+ {
1454
+ "epoch": 5.1,
1455
+ "learning_rate": 6.422779825631865e-05,
1456
+ "loss": 1.9872,
1457
+ "step": 117000
1458
+ },
1459
+ {
1460
+ "epoch": 5.12,
1461
+ "learning_rate": 6.407085435354208e-05,
1462
+ "loss": 1.9834,
1463
+ "step": 117500
1464
+ },
1465
+ {
1466
+ "epoch": 5.14,
1467
+ "learning_rate": 6.391359593392631e-05,
1468
+ "loss": 1.9814,
1469
+ "step": 118000
1470
+ },
1471
+ {
1472
+ "epoch": 5.17,
1473
+ "learning_rate": 6.375633751431052e-05,
1474
+ "loss": 1.978,
1475
+ "step": 118500
1476
+ },
1477
+ {
1478
+ "epoch": 5.19,
1479
+ "learning_rate": 6.359907909469474e-05,
1480
+ "loss": 1.9857,
1481
+ "step": 119000
1482
+ },
1483
+ {
1484
+ "epoch": 5.21,
1485
+ "learning_rate": 6.344213519191819e-05,
1486
+ "loss": 1.9816,
1487
+ "step": 119500
1488
+ },
1489
+ {
1490
+ "epoch": 5.23,
1491
+ "learning_rate": 6.32848767723024e-05,
1492
+ "loss": 1.9817,
1493
+ "step": 120000
1494
+ },
1495
+ {
1496
+ "epoch": 5.25,
1497
+ "learning_rate": 6.312761835268661e-05,
1498
+ "loss": 1.9815,
1499
+ "step": 120500
1500
+ },
1501
+ {
1502
+ "epoch": 5.27,
1503
+ "learning_rate": 6.297035993307082e-05,
1504
+ "loss": 1.9796,
1505
+ "step": 121000
1506
+ },
1507
+ {
1508
+ "epoch": 5.3,
1509
+ "learning_rate": 6.281341603029426e-05,
1510
+ "loss": 1.9842,
1511
+ "step": 121500
1512
+ },
1513
+ {
1514
+ "epoch": 5.32,
1515
+ "learning_rate": 6.265615761067847e-05,
1516
+ "loss": 1.9816,
1517
+ "step": 122000
1518
+ },
1519
+ {
1520
+ "epoch": 5.34,
1521
+ "learning_rate": 6.249889919106268e-05,
1522
+ "loss": 1.976,
1523
+ "step": 122500
1524
+ },
1525
+ {
1526
+ "epoch": 5.36,
1527
+ "learning_rate": 6.234164077144691e-05,
1528
+ "loss": 1.9798,
1529
+ "step": 123000
1530
+ },
1531
+ {
1532
+ "epoch": 5.38,
1533
+ "learning_rate": 6.218469686867035e-05,
1534
+ "loss": 1.9768,
1535
+ "step": 123500
1536
+ },
1537
+ {
1538
+ "epoch": 5.41,
1539
+ "learning_rate": 6.202743844905456e-05,
1540
+ "loss": 1.9786,
1541
+ "step": 124000
1542
+ },
1543
+ {
1544
+ "epoch": 5.43,
1545
+ "learning_rate": 6.187018002943879e-05,
1546
+ "loss": 1.9753,
1547
+ "step": 124500
1548
+ },
1549
+ {
1550
+ "epoch": 5.45,
1551
+ "learning_rate": 6.1712921609823e-05,
1552
+ "loss": 1.9755,
1553
+ "step": 125000
1554
+ },
1555
+ {
1556
+ "epoch": 5.47,
1557
+ "learning_rate": 6.155597770704644e-05,
1558
+ "loss": 1.9771,
1559
+ "step": 125500
1560
+ },
1561
+ {
1562
+ "epoch": 5.49,
1563
+ "learning_rate": 6.139871928743065e-05,
1564
+ "loss": 1.9736,
1565
+ "step": 126000
1566
+ },
1567
+ {
1568
+ "epoch": 5.51,
1569
+ "learning_rate": 6.124146086781488e-05,
1570
+ "loss": 1.9729,
1571
+ "step": 126500
1572
+ },
1573
+ {
1574
+ "epoch": 5.54,
1575
+ "learning_rate": 6.108420244819907e-05,
1576
+ "loss": 1.9692,
1577
+ "step": 127000
1578
+ },
1579
+ {
1580
+ "epoch": 5.56,
1581
+ "learning_rate": 6.0927258545422526e-05,
1582
+ "loss": 1.9702,
1583
+ "step": 127500
1584
+ },
1585
+ {
1586
+ "epoch": 5.58,
1587
+ "learning_rate": 6.077000012580674e-05,
1588
+ "loss": 1.9709,
1589
+ "step": 128000
1590
+ },
1591
+ {
1592
+ "epoch": 5.6,
1593
+ "learning_rate": 6.0612741706190954e-05,
1594
+ "loss": 1.9712,
1595
+ "step": 128500
1596
+ },
1597
+ {
1598
+ "epoch": 5.62,
1599
+ "learning_rate": 6.0455483286575164e-05,
1600
+ "loss": 1.9714,
1601
+ "step": 129000
1602
+ },
1603
+ {
1604
+ "epoch": 5.65,
1605
+ "learning_rate": 6.0298539383798616e-05,
1606
+ "loss": 1.9736,
1607
+ "step": 129500
1608
+ },
1609
+ {
1610
+ "epoch": 5.67,
1611
+ "learning_rate": 6.014128096418282e-05,
1612
+ "loss": 1.9677,
1613
+ "step": 130000
1614
+ },
1615
+ {
1616
+ "epoch": 5.69,
1617
+ "learning_rate": 5.9984022544567044e-05,
1618
+ "loss": 1.9626,
1619
+ "step": 130500
1620
+ },
1621
+ {
1622
+ "epoch": 5.71,
1623
+ "learning_rate": 5.9826764124951254e-05,
1624
+ "loss": 1.966,
1625
+ "step": 131000
1626
+ },
1627
+ {
1628
+ "epoch": 5.73,
1629
+ "learning_rate": 5.96698202221747e-05,
1630
+ "loss": 1.9626,
1631
+ "step": 131500
1632
+ },
1633
+ {
1634
+ "epoch": 5.75,
1635
+ "learning_rate": 5.951256180255891e-05,
1636
+ "loss": 1.9652,
1637
+ "step": 132000
1638
+ },
1639
+ {
1640
+ "epoch": 5.78,
1641
+ "learning_rate": 5.935530338294313e-05,
1642
+ "loss": 1.9652,
1643
+ "step": 132500
1644
+ },
1645
+ {
1646
+ "epoch": 5.8,
1647
+ "learning_rate": 5.919804496332734e-05,
1648
+ "loss": 1.9638,
1649
+ "step": 133000
1650
+ },
1651
+ {
1652
+ "epoch": 5.82,
1653
+ "learning_rate": 5.9040786543711554e-05,
1654
+ "loss": 1.9591,
1655
+ "step": 133500
1656
+ },
1657
+ {
1658
+ "epoch": 5.84,
1659
+ "learning_rate": 5.888384264093499e-05,
1660
+ "loss": 1.9648,
1661
+ "step": 134000
1662
+ },
1663
+ {
1664
+ "epoch": 5.86,
1665
+ "learning_rate": 5.872658422131922e-05,
1666
+ "loss": 1.9617,
1667
+ "step": 134500
1668
+ },
1669
+ {
1670
+ "epoch": 5.88,
1671
+ "learning_rate": 5.856932580170342e-05,
1672
+ "loss": 1.9626,
1673
+ "step": 135000
1674
+ },
1675
+ {
1676
+ "epoch": 5.91,
1677
+ "learning_rate": 5.8412067382087644e-05,
1678
+ "loss": 1.9597,
1679
+ "step": 135500
1680
+ },
1681
+ {
1682
+ "epoch": 5.93,
1683
+ "learning_rate": 5.825512347931108e-05,
1684
+ "loss": 1.9597,
1685
+ "step": 136000
1686
+ },
1687
+ {
1688
+ "epoch": 5.95,
1689
+ "learning_rate": 5.80978650596953e-05,
1690
+ "loss": 1.962,
1691
+ "step": 136500
1692
+ },
1693
+ {
1694
+ "epoch": 5.97,
1695
+ "learning_rate": 5.794060664007951e-05,
1696
+ "loss": 1.9557,
1697
+ "step": 137000
1698
+ },
1699
+ {
1700
+ "epoch": 5.99,
1701
+ "learning_rate": 5.778334822046373e-05,
1702
+ "loss": 1.9576,
1703
+ "step": 137500
1704
+ },
1705
+ {
1706
+ "epoch": 6.0,
1707
+ "eval_accuracy": 0.6267606171222161,
1708
+ "eval_loss": 1.8356587886810303,
1709
+ "eval_runtime": 358.323,
1710
+ "eval_samples_per_second": 496.625,
1711
+ "eval_steps_per_second": 31.039,
1712
+ "step": 137640
1713
  }
1714
  ],
1715
  "max_steps": 321160,
1716
  "num_train_epochs": 14,
1717
+ "total_flos": 1.041429725631959e+18,
1718
  "trial_name": null,
1719
  "trial_params": null
1720
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a700a014c14e45bba7ac4d9c3addf426338c4babdc7d46b58792c086a371be15
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d71c32019041a99199225d8bb52225cec92c140618e167aad1e1ccb4d0d934
3
  size 118243218
runs/Feb20_18-27-58_ubuntu-2004/events.out.tfevents.1676885321.ubuntu-2004.886785.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cecb95435b971ee3e9da76c61cbc5907364083429abba73e46350de6431f6ee5
3
- size 41941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421928b94b23f10aa10b0c399afa84e783ed1e8b949a4822fbce947b89041721
3
+ size 49630