schnell commited on
Commit
dd9e7a2
1 Parent(s): 680cf14

Training in progress, epoch 7

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c204c5739cb89e423854dc482b604d68f1fe7777bddd32bee3e42c27396f835
3
  size 236491269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49ec4140754981eb351649fecf5d3e3d44b0e29fa9e01bf8460a2dcc5b91392
3
  size 236491269
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f1368949b8a3ca0194b62ba5c01ad414ec8388ceb72de5c960a6415ef0bc7eb
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2804a769785bd005d311fa5211b59d4c5e43c5e9f11eb9bdc8f5d8e3bbbcfcc
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2593a21d27b6d3490c2b6104d1f46ccef142af342ac4030549c5bf8e21edca72
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:249568bbffd1228f6946ea7e8e37b3e1003da8fddc10b6cbe9e7db83b6052d3f
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74546aa0cb21fe7508cf9d0a3ed65e894eded209c32829312f983360c4339967
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1f64a9b985406894ef65cdb08cec8746d6a7f750e0466984f5ddbc1f0df99b9
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1981182cf21e486b0f1de0f86d848f914d636f6e137316378a492b50ad1a4d9c
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
- "global_step": 137640,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1710,11 +1710,296 @@
1710
  "eval_samples_per_second": 603.291,
1711
  "eval_steps_per_second": 37.706,
1712
  "step": 137640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1713
  }
1714
  ],
1715
  "max_steps": 321160,
1716
  "num_train_epochs": 14,
1717
- "total_flos": 1.0465574145188712e+18,
1718
  "trial_name": null,
1719
  "trial_params": null
1720
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.0,
5
+ "global_step": 160580,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1710
  "eval_samples_per_second": 603.291,
1711
  "eval_steps_per_second": 37.706,
1712
  "step": 137640
1713
+ },
1714
+ {
1715
+ "epoch": 6.02,
1716
+ "learning_rate": 5.762640431768718e-05,
1717
+ "loss": 1.6789,
1718
+ "step": 138000
1719
+ },
1720
+ {
1721
+ "epoch": 6.04,
1722
+ "learning_rate": 5.746946041491061e-05,
1723
+ "loss": 1.6869,
1724
+ "step": 138500
1725
+ },
1726
+ {
1727
+ "epoch": 6.06,
1728
+ "learning_rate": 5.7312201995294836e-05,
1729
+ "loss": 1.6766,
1730
+ "step": 139000
1731
+ },
1732
+ {
1733
+ "epoch": 6.08,
1734
+ "learning_rate": 5.7154943575679046e-05,
1735
+ "loss": 1.6819,
1736
+ "step": 139500
1737
+ },
1738
+ {
1739
+ "epoch": 6.1,
1740
+ "learning_rate": 5.699768515606326e-05,
1741
+ "loss": 1.6812,
1742
+ "step": 140000
1743
+ },
1744
+ {
1745
+ "epoch": 6.12,
1746
+ "learning_rate": 5.68407412532867e-05,
1747
+ "loss": 1.6802,
1748
+ "step": 140500
1749
+ },
1750
+ {
1751
+ "epoch": 6.15,
1752
+ "learning_rate": 5.668348283367092e-05,
1753
+ "loss": 1.6788,
1754
+ "step": 141000
1755
+ },
1756
+ {
1757
+ "epoch": 6.17,
1758
+ "learning_rate": 5.652622441405513e-05,
1759
+ "loss": 1.6786,
1760
+ "step": 141500
1761
+ },
1762
+ {
1763
+ "epoch": 6.19,
1764
+ "learning_rate": 5.6368965994439346e-05,
1765
+ "loss": 1.6798,
1766
+ "step": 142000
1767
+ },
1768
+ {
1769
+ "epoch": 6.21,
1770
+ "learning_rate": 5.6212022091662785e-05,
1771
+ "loss": 1.6758,
1772
+ "step": 142500
1773
+ },
1774
+ {
1775
+ "epoch": 6.23,
1776
+ "learning_rate": 5.605476367204701e-05,
1777
+ "loss": 1.6775,
1778
+ "step": 143000
1779
+ },
1780
+ {
1781
+ "epoch": 6.26,
1782
+ "learning_rate": 5.589750525243121e-05,
1783
+ "loss": 1.6764,
1784
+ "step": 143500
1785
+ },
1786
+ {
1787
+ "epoch": 6.28,
1788
+ "learning_rate": 5.5740246832815436e-05,
1789
+ "loss": 1.6735,
1790
+ "step": 144000
1791
+ },
1792
+ {
1793
+ "epoch": 6.3,
1794
+ "learning_rate": 5.5583302930038875e-05,
1795
+ "loss": 1.6758,
1796
+ "step": 144500
1797
+ },
1798
+ {
1799
+ "epoch": 6.32,
1800
+ "learning_rate": 5.542604451042309e-05,
1801
+ "loss": 1.6746,
1802
+ "step": 145000
1803
+ },
1804
+ {
1805
+ "epoch": 6.34,
1806
+ "learning_rate": 5.52687860908073e-05,
1807
+ "loss": 1.678,
1808
+ "step": 145500
1809
+ },
1810
+ {
1811
+ "epoch": 6.36,
1812
+ "learning_rate": 5.511152767119152e-05,
1813
+ "loss": 1.6724,
1814
+ "step": 146000
1815
+ },
1816
+ {
1817
+ "epoch": 6.39,
1818
+ "learning_rate": 5.495458376841497e-05,
1819
+ "loss": 1.6727,
1820
+ "step": 146500
1821
+ },
1822
+ {
1823
+ "epoch": 6.41,
1824
+ "learning_rate": 5.4797325348799175e-05,
1825
+ "loss": 1.6726,
1826
+ "step": 147000
1827
+ },
1828
+ {
1829
+ "epoch": 6.43,
1830
+ "learning_rate": 5.4640066929183386e-05,
1831
+ "loss": 1.6726,
1832
+ "step": 147500
1833
+ },
1834
+ {
1835
+ "epoch": 6.45,
1836
+ "learning_rate": 5.448280850956761e-05,
1837
+ "loss": 1.6735,
1838
+ "step": 148000
1839
+ },
1840
+ {
1841
+ "epoch": 6.47,
1842
+ "learning_rate": 5.4325864606791055e-05,
1843
+ "loss": 1.6722,
1844
+ "step": 148500
1845
+ },
1846
+ {
1847
+ "epoch": 6.5,
1848
+ "learning_rate": 5.4168606187175265e-05,
1849
+ "loss": 1.6672,
1850
+ "step": 149000
1851
+ },
1852
+ {
1853
+ "epoch": 6.52,
1854
+ "learning_rate": 5.4011347767559475e-05,
1855
+ "loss": 1.6716,
1856
+ "step": 149500
1857
+ },
1858
+ {
1859
+ "epoch": 6.54,
1860
+ "learning_rate": 5.385408934794369e-05,
1861
+ "loss": 1.6746,
1862
+ "step": 150000
1863
+ },
1864
+ {
1865
+ "epoch": 6.56,
1866
+ "learning_rate": 5.369714544516714e-05,
1867
+ "loss": 1.6714,
1868
+ "step": 150500
1869
+ },
1870
+ {
1871
+ "epoch": 6.58,
1872
+ "learning_rate": 5.353988702555135e-05,
1873
+ "loss": 1.6631,
1874
+ "step": 151000
1875
+ },
1876
+ {
1877
+ "epoch": 6.6,
1878
+ "learning_rate": 5.338262860593557e-05,
1879
+ "loss": 1.667,
1880
+ "step": 151500
1881
+ },
1882
+ {
1883
+ "epoch": 6.63,
1884
+ "learning_rate": 5.3225370186319776e-05,
1885
+ "loss": 1.6716,
1886
+ "step": 152000
1887
+ },
1888
+ {
1889
+ "epoch": 6.65,
1890
+ "learning_rate": 5.306842628354323e-05,
1891
+ "loss": 1.6653,
1892
+ "step": 152500
1893
+ },
1894
+ {
1895
+ "epoch": 6.67,
1896
+ "learning_rate": 5.291116786392744e-05,
1897
+ "loss": 1.6648,
1898
+ "step": 153000
1899
+ },
1900
+ {
1901
+ "epoch": 6.69,
1902
+ "learning_rate": 5.2753909444311655e-05,
1903
+ "loss": 1.6645,
1904
+ "step": 153500
1905
+ },
1906
+ {
1907
+ "epoch": 6.71,
1908
+ "learning_rate": 5.2596651024695866e-05,
1909
+ "loss": 1.6682,
1910
+ "step": 154000
1911
+ },
1912
+ {
1913
+ "epoch": 6.73,
1914
+ "learning_rate": 5.243970712191931e-05,
1915
+ "loss": 1.6631,
1916
+ "step": 154500
1917
+ },
1918
+ {
1919
+ "epoch": 6.76,
1920
+ "learning_rate": 5.228244870230352e-05,
1921
+ "loss": 1.6637,
1922
+ "step": 155000
1923
+ },
1924
+ {
1925
+ "epoch": 6.78,
1926
+ "learning_rate": 5.212519028268774e-05,
1927
+ "loss": 1.664,
1928
+ "step": 155500
1929
+ },
1930
+ {
1931
+ "epoch": 6.8,
1932
+ "learning_rate": 5.196793186307195e-05,
1933
+ "loss": 1.6665,
1934
+ "step": 156000
1935
+ },
1936
+ {
1937
+ "epoch": 6.82,
1938
+ "learning_rate": 5.18109879602954e-05,
1939
+ "loss": 1.6624,
1940
+ "step": 156500
1941
+ },
1942
+ {
1943
+ "epoch": 6.84,
1944
+ "learning_rate": 5.1653729540679605e-05,
1945
+ "loss": 1.6611,
1946
+ "step": 157000
1947
+ },
1948
+ {
1949
+ "epoch": 6.87,
1950
+ "learning_rate": 5.149647112106383e-05,
1951
+ "loss": 1.6642,
1952
+ "step": 157500
1953
+ },
1954
+ {
1955
+ "epoch": 6.89,
1956
+ "learning_rate": 5.133921270144804e-05,
1957
+ "loss": 1.6595,
1958
+ "step": 158000
1959
+ },
1960
+ {
1961
+ "epoch": 6.91,
1962
+ "learning_rate": 5.1182268798671485e-05,
1963
+ "loss": 1.6607,
1964
+ "step": 158500
1965
+ },
1966
+ {
1967
+ "epoch": 6.93,
1968
+ "learning_rate": 5.1025010379055695e-05,
1969
+ "loss": 1.663,
1970
+ "step": 159000
1971
+ },
1972
+ {
1973
+ "epoch": 6.95,
1974
+ "learning_rate": 5.086775195943991e-05,
1975
+ "loss": 1.6598,
1976
+ "step": 159500
1977
+ },
1978
+ {
1979
+ "epoch": 6.97,
1980
+ "learning_rate": 5.071049353982412e-05,
1981
+ "loss": 1.662,
1982
+ "step": 160000
1983
+ },
1984
+ {
1985
+ "epoch": 7.0,
1986
+ "learning_rate": 5.055354963704757e-05,
1987
+ "loss": 1.658,
1988
+ "step": 160500
1989
+ },
1990
+ {
1991
+ "epoch": 7.0,
1992
+ "eval_accuracy": 0.681177174568378,
1993
+ "eval_loss": 1.5331339836120605,
1994
+ "eval_runtime": 342.8004,
1995
+ "eval_samples_per_second": 519.113,
1996
+ "eval_steps_per_second": 32.445,
1997
+ "step": 160580
1998
  }
1999
  ],
2000
  "max_steps": 321160,
2001
  "num_train_epochs": 14,
2002
+ "total_flos": 1.2209738648603072e+18,
2003
  "trial_name": null,
2004
  "trial_params": null
2005
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f1368949b8a3ca0194b62ba5c01ad414ec8388ceb72de5c960a6415ef0bc7eb
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f59812f8ad810e2c50c9e69c1d3b4459e7d488705ec40983855bea3b4f8d6b
3
- size 49646
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be416dbc50f0cb836aa31ee963aba13302069cd1befb9ceaf72286a0cd5d4676
3
+ size 57335