schnell commited on
Commit
680cf14
1 Parent(s): df543e6

Training in progress, epoch 6

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c89b45e2ac69f6293669df6313ee21058cc2af394a4390bba808d002466ccd2
3
  size 236491269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c204c5739cb89e423854dc482b604d68f1fe7777bddd32bee3e42c27396f835
3
  size 236491269
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1368949b8a3ca0194b62ba5c01ad414ec8388ceb72de5c960a6415ef0bc7eb
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3214d2294320f690f0de7c994db816bb6b393e81c203b4bd95ac5070d6787ff6
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2804a769785bd005d311fa5211b59d4c5e43c5e9f11eb9bdc8f5d8e3bbbcfcc
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:274b42d0611ce03f64f3695a4574be0aa6d9ba137add81127f546ea0347ddbe2
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249568bbffd1228f6946ea7e8e37b3e1003da8fddc10b6cbe9e7db83b6052d3f
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e06a18a940ba98c5f1a42737f61af8460d78a917521a83b1e094435306c8218
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f64a9b985406894ef65cdb08cec8746d6a7f750e0466984f5ddbc1f0df99b9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "global_step": 114700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1425,11 +1425,296 @@
1425
  "eval_samples_per_second": 603.782,
1426
  "eval_steps_per_second": 37.736,
1427
  "step": 114700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1428
  }
1429
  ],
1430
  "max_steps": 321160,
1431
  "num_train_epochs": 14,
1432
- "total_flos": 8.721147965075726e+17,
1433
  "trial_name": null,
1434
  "trial_params": null
1435
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
+ "global_step": 137640,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1425
  "eval_samples_per_second": 603.782,
1426
  "eval_steps_per_second": 37.736,
1427
  "step": 114700
1428
+ },
1429
+ {
1430
+ "epoch": 5.01,
1431
+ "learning_rate": 6.48568319347818e-05,
1432
+ "loss": 1.714,
1433
+ "step": 115000
1434
+ },
1435
+ {
1436
+ "epoch": 5.03,
1437
+ "learning_rate": 6.469957351516601e-05,
1438
+ "loss": 1.7098,
1439
+ "step": 115500
1440
+ },
1441
+ {
1442
+ "epoch": 5.06,
1443
+ "learning_rate": 6.454231509555022e-05,
1444
+ "loss": 1.7095,
1445
+ "step": 116000
1446
+ },
1447
+ {
1448
+ "epoch": 5.08,
1449
+ "learning_rate": 6.438537119277366e-05,
1450
+ "loss": 1.7098,
1451
+ "step": 116500
1452
+ },
1453
+ {
1454
+ "epoch": 5.1,
1455
+ "learning_rate": 6.422811277315787e-05,
1456
+ "loss": 1.711,
1457
+ "step": 117000
1458
+ },
1459
+ {
1460
+ "epoch": 5.12,
1461
+ "learning_rate": 6.407085435354208e-05,
1462
+ "loss": 1.7108,
1463
+ "step": 117500
1464
+ },
1465
+ {
1466
+ "epoch": 5.14,
1467
+ "learning_rate": 6.391359593392631e-05,
1468
+ "loss": 1.7088,
1469
+ "step": 118000
1470
+ },
1471
+ {
1472
+ "epoch": 5.17,
1473
+ "learning_rate": 6.375665203114975e-05,
1474
+ "loss": 1.7073,
1475
+ "step": 118500
1476
+ },
1477
+ {
1478
+ "epoch": 5.19,
1479
+ "learning_rate": 6.359939361153396e-05,
1480
+ "loss": 1.7099,
1481
+ "step": 119000
1482
+ },
1483
+ {
1484
+ "epoch": 5.21,
1485
+ "learning_rate": 6.344213519191819e-05,
1486
+ "loss": 1.7051,
1487
+ "step": 119500
1488
+ },
1489
+ {
1490
+ "epoch": 5.23,
1491
+ "learning_rate": 6.32848767723024e-05,
1492
+ "loss": 1.7083,
1493
+ "step": 120000
1494
+ },
1495
+ {
1496
+ "epoch": 5.25,
1497
+ "learning_rate": 6.312793286952584e-05,
1498
+ "loss": 1.7101,
1499
+ "step": 120500
1500
+ },
1501
+ {
1502
+ "epoch": 5.27,
1503
+ "learning_rate": 6.297067444991005e-05,
1504
+ "loss": 1.7033,
1505
+ "step": 121000
1506
+ },
1507
+ {
1508
+ "epoch": 5.3,
1509
+ "learning_rate": 6.281341603029426e-05,
1510
+ "loss": 1.7058,
1511
+ "step": 121500
1512
+ },
1513
+ {
1514
+ "epoch": 5.32,
1515
+ "learning_rate": 6.265615761067847e-05,
1516
+ "loss": 1.7041,
1517
+ "step": 122000
1518
+ },
1519
+ {
1520
+ "epoch": 5.34,
1521
+ "learning_rate": 6.249921370790193e-05,
1522
+ "loss": 1.7036,
1523
+ "step": 122500
1524
+ },
1525
+ {
1526
+ "epoch": 5.36,
1527
+ "learning_rate": 6.234195528828613e-05,
1528
+ "loss": 1.7024,
1529
+ "step": 123000
1530
+ },
1531
+ {
1532
+ "epoch": 5.38,
1533
+ "learning_rate": 6.218469686867035e-05,
1534
+ "loss": 1.703,
1535
+ "step": 123500
1536
+ },
1537
+ {
1538
+ "epoch": 5.41,
1539
+ "learning_rate": 6.202743844905456e-05,
1540
+ "loss": 1.7014,
1541
+ "step": 124000
1542
+ },
1543
+ {
1544
+ "epoch": 5.43,
1545
+ "learning_rate": 6.187049454627801e-05,
1546
+ "loss": 1.7043,
1547
+ "step": 124500
1548
+ },
1549
+ {
1550
+ "epoch": 5.45,
1551
+ "learning_rate": 6.171323612666222e-05,
1552
+ "loss": 1.703,
1553
+ "step": 125000
1554
+ },
1555
+ {
1556
+ "epoch": 5.47,
1557
+ "learning_rate": 6.155597770704644e-05,
1558
+ "loss": 1.6996,
1559
+ "step": 125500
1560
+ },
1561
+ {
1562
+ "epoch": 5.49,
1563
+ "learning_rate": 6.139871928743065e-05,
1564
+ "loss": 1.7036,
1565
+ "step": 126000
1566
+ },
1567
+ {
1568
+ "epoch": 5.51,
1569
+ "learning_rate": 6.12417753846541e-05,
1570
+ "loss": 1.6985,
1571
+ "step": 126500
1572
+ },
1573
+ {
1574
+ "epoch": 5.54,
1575
+ "learning_rate": 6.108451696503831e-05,
1576
+ "loss": 1.6983,
1577
+ "step": 127000
1578
+ },
1579
+ {
1580
+ "epoch": 5.56,
1581
+ "learning_rate": 6.0927258545422526e-05,
1582
+ "loss": 1.7001,
1583
+ "step": 127500
1584
+ },
1585
+ {
1586
+ "epoch": 5.58,
1587
+ "learning_rate": 6.077000012580674e-05,
1588
+ "loss": 1.6959,
1589
+ "step": 128000
1590
+ },
1591
+ {
1592
+ "epoch": 5.6,
1593
+ "learning_rate": 6.061305622303018e-05,
1594
+ "loss": 1.697,
1595
+ "step": 128500
1596
+ },
1597
+ {
1598
+ "epoch": 5.62,
1599
+ "learning_rate": 6.045579780341439e-05,
1600
+ "loss": 1.6969,
1601
+ "step": 129000
1602
+ },
1603
+ {
1604
+ "epoch": 5.65,
1605
+ "learning_rate": 6.0298539383798616e-05,
1606
+ "loss": 1.6922,
1607
+ "step": 129500
1608
+ },
1609
+ {
1610
+ "epoch": 5.67,
1611
+ "learning_rate": 6.014128096418282e-05,
1612
+ "loss": 1.6976,
1613
+ "step": 130000
1614
+ },
1615
+ {
1616
+ "epoch": 5.69,
1617
+ "learning_rate": 5.998433706140627e-05,
1618
+ "loss": 1.6929,
1619
+ "step": 130500
1620
+ },
1621
+ {
1622
+ "epoch": 5.71,
1623
+ "learning_rate": 5.982707864179048e-05,
1624
+ "loss": 1.6945,
1625
+ "step": 131000
1626
+ },
1627
+ {
1628
+ "epoch": 5.73,
1629
+ "learning_rate": 5.96698202221747e-05,
1630
+ "loss": 1.6906,
1631
+ "step": 131500
1632
+ },
1633
+ {
1634
+ "epoch": 5.75,
1635
+ "learning_rate": 5.951256180255891e-05,
1636
+ "loss": 1.6941,
1637
+ "step": 132000
1638
+ },
1639
+ {
1640
+ "epoch": 5.78,
1641
+ "learning_rate": 5.9355617899782356e-05,
1642
+ "loss": 1.695,
1643
+ "step": 132500
1644
+ },
1645
+ {
1646
+ "epoch": 5.8,
1647
+ "learning_rate": 5.919835948016658e-05,
1648
+ "loss": 1.6897,
1649
+ "step": 133000
1650
+ },
1651
+ {
1652
+ "epoch": 5.82,
1653
+ "learning_rate": 5.904110106055078e-05,
1654
+ "loss": 1.6877,
1655
+ "step": 133500
1656
+ },
1657
+ {
1658
+ "epoch": 5.84,
1659
+ "learning_rate": 5.888384264093499e-05,
1660
+ "loss": 1.6901,
1661
+ "step": 134000
1662
+ },
1663
+ {
1664
+ "epoch": 5.86,
1665
+ "learning_rate": 5.8726898738158446e-05,
1666
+ "loss": 1.6896,
1667
+ "step": 134500
1668
+ },
1669
+ {
1670
+ "epoch": 5.88,
1671
+ "learning_rate": 5.856964031854266e-05,
1672
+ "loss": 1.6925,
1673
+ "step": 135000
1674
+ },
1675
+ {
1676
+ "epoch": 5.91,
1677
+ "learning_rate": 5.841238189892687e-05,
1678
+ "loss": 1.685,
1679
+ "step": 135500
1680
+ },
1681
+ {
1682
+ "epoch": 5.93,
1683
+ "learning_rate": 5.825512347931108e-05,
1684
+ "loss": 1.6871,
1685
+ "step": 136000
1686
+ },
1687
+ {
1688
+ "epoch": 5.95,
1689
+ "learning_rate": 5.809817957653453e-05,
1690
+ "loss": 1.69,
1691
+ "step": 136500
1692
+ },
1693
+ {
1694
+ "epoch": 5.97,
1695
+ "learning_rate": 5.7940921156918746e-05,
1696
+ "loss": 1.6874,
1697
+ "step": 137000
1698
+ },
1699
+ {
1700
+ "epoch": 5.99,
1701
+ "learning_rate": 5.7783662737302956e-05,
1702
+ "loss": 1.6883,
1703
+ "step": 137500
1704
+ },
1705
+ {
1706
+ "epoch": 6.0,
1707
+ "eval_accuracy": 0.6772348720266682,
1708
+ "eval_loss": 1.5559165477752686,
1709
+ "eval_runtime": 294.9687,
1710
+ "eval_samples_per_second": 603.291,
1711
+ "eval_steps_per_second": 37.706,
1712
+ "step": 137640
1713
  }
1714
  ],
1715
  "max_steps": 321160,
1716
  "num_train_epochs": 14,
1717
+ "total_flos": 1.0465574145188712e+18,
1718
  "trial_name": null,
1719
  "trial_params": null
1720
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1368949b8a3ca0194b62ba5c01ad414ec8388ceb72de5c960a6415ef0bc7eb
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87287f5853697bd232c1d288f5654c67eba5514d42ef65d845face5a9d732001
3
- size 41957
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f59812f8ad810e2c50c9e69c1d3b4459e7d488705ec40983855bea3b4f8d6b
3
+ size 49646