Training in progress, epoch 7
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-27-58_ubuntu-2004/events.out.tfevents.1676885321.ubuntu-2004.886785.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 236470789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2266edb0b6b0e74ee02fbe5aa2f5218baeeafe1a239137bf990ae0aeab9a119
|
3 |
size 236470789
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118243218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c433f5760508a1599460a99e03b20a531880dfdad44d71ab00bd1c682027dadd
|
3 |
size 118243218
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15597
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb85f55f522538f02d3eea0fa023913981174e2e0027d28652cb76e91ebd4d8d
|
3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1a10f0e22563a2ad91f9f31ad1fc6a7a42e9711d892d03058453301106a5f72
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8f3332503ed7c858b6a78cb5232c8214dfa941a5425ab04fab1ad9da09e728b
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1710,11 +1710,296 @@
|
|
1710 |
"eval_samples_per_second": 496.625,
|
1711 |
"eval_steps_per_second": 31.039,
|
1712 |
"step": 137640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1713 |
}
|
1714 |
],
|
1715 |
"max_steps": 321160,
|
1716 |
"num_train_epochs": 14,
|
1717 |
-
"total_flos": 1.
|
1718 |
"trial_name": null,
|
1719 |
"trial_params": null
|
1720 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.0,
|
5 |
+
"global_step": 160580,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1710 |
"eval_samples_per_second": 496.625,
|
1711 |
"eval_steps_per_second": 31.039,
|
1712 |
"step": 137640
|
1713 |
+
},
|
1714 |
+
{
|
1715 |
+
"epoch": 6.02,
|
1716 |
+
"learning_rate": 5.762640431768718e-05,
|
1717 |
+
"loss": 1.9502,
|
1718 |
+
"step": 138000
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 6.04,
|
1722 |
+
"learning_rate": 5.746914589807138e-05,
|
1723 |
+
"loss": 1.9521,
|
1724 |
+
"step": 138500
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 6.06,
|
1728 |
+
"learning_rate": 5.7311887478455593e-05,
|
1729 |
+
"loss": 1.9533,
|
1730 |
+
"step": 139000
|
1731 |
+
},
|
1732 |
+
{
|
1733 |
+
"epoch": 6.08,
|
1734 |
+
"learning_rate": 5.715462905883982e-05,
|
1735 |
+
"loss": 1.9533,
|
1736 |
+
"step": 139500
|
1737 |
+
},
|
1738 |
+
{
|
1739 |
+
"epoch": 6.1,
|
1740 |
+
"learning_rate": 5.699768515606326e-05,
|
1741 |
+
"loss": 1.9513,
|
1742 |
+
"step": 140000
|
1743 |
+
},
|
1744 |
+
{
|
1745 |
+
"epoch": 6.12,
|
1746 |
+
"learning_rate": 5.684042673644747e-05,
|
1747 |
+
"loss": 1.9491,
|
1748 |
+
"step": 140500
|
1749 |
+
},
|
1750 |
+
{
|
1751 |
+
"epoch": 6.15,
|
1752 |
+
"learning_rate": 5.668316831683168e-05,
|
1753 |
+
"loss": 1.9514,
|
1754 |
+
"step": 141000
|
1755 |
+
},
|
1756 |
+
{
|
1757 |
+
"epoch": 6.17,
|
1758 |
+
"learning_rate": 5.65259098972159e-05,
|
1759 |
+
"loss": 1.9492,
|
1760 |
+
"step": 141500
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 6.19,
|
1764 |
+
"learning_rate": 5.6368965994439346e-05,
|
1765 |
+
"loss": 1.947,
|
1766 |
+
"step": 142000
|
1767 |
+
},
|
1768 |
+
{
|
1769 |
+
"epoch": 6.21,
|
1770 |
+
"learning_rate": 5.6211707574823556e-05,
|
1771 |
+
"loss": 1.945,
|
1772 |
+
"step": 142500
|
1773 |
+
},
|
1774 |
+
{
|
1775 |
+
"epoch": 6.23,
|
1776 |
+
"learning_rate": 5.605444915520778e-05,
|
1777 |
+
"loss": 1.9505,
|
1778 |
+
"step": 143000
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 6.26,
|
1782 |
+
"learning_rate": 5.5897190735591984e-05,
|
1783 |
+
"loss": 1.9488,
|
1784 |
+
"step": 143500
|
1785 |
+
},
|
1786 |
+
{
|
1787 |
+
"epoch": 6.28,
|
1788 |
+
"learning_rate": 5.5740246832815436e-05,
|
1789 |
+
"loss": 1.9451,
|
1790 |
+
"step": 144000
|
1791 |
+
},
|
1792 |
+
{
|
1793 |
+
"epoch": 6.3,
|
1794 |
+
"learning_rate": 5.5582988413199646e-05,
|
1795 |
+
"loss": 1.9416,
|
1796 |
+
"step": 144500
|
1797 |
+
},
|
1798 |
+
{
|
1799 |
+
"epoch": 6.32,
|
1800 |
+
"learning_rate": 5.542572999358386e-05,
|
1801 |
+
"loss": 1.9487,
|
1802 |
+
"step": 145000
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 6.34,
|
1806 |
+
"learning_rate": 5.5268471573968074e-05,
|
1807 |
+
"loss": 1.9394,
|
1808 |
+
"step": 145500
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 6.36,
|
1812 |
+
"learning_rate": 5.511152767119152e-05,
|
1813 |
+
"loss": 1.9463,
|
1814 |
+
"step": 146000
|
1815 |
+
},
|
1816 |
+
{
|
1817 |
+
"epoch": 6.39,
|
1818 |
+
"learning_rate": 5.495426925157573e-05,
|
1819 |
+
"loss": 1.947,
|
1820 |
+
"step": 146500
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 6.41,
|
1824 |
+
"learning_rate": 5.4797010831959947e-05,
|
1825 |
+
"loss": 1.9424,
|
1826 |
+
"step": 147000
|
1827 |
+
},
|
1828 |
+
{
|
1829 |
+
"epoch": 6.43,
|
1830 |
+
"learning_rate": 5.463975241234416e-05,
|
1831 |
+
"loss": 1.9433,
|
1832 |
+
"step": 147500
|
1833 |
+
},
|
1834 |
+
{
|
1835 |
+
"epoch": 6.45,
|
1836 |
+
"learning_rate": 5.448280850956761e-05,
|
1837 |
+
"loss": 1.9416,
|
1838 |
+
"step": 148000
|
1839 |
+
},
|
1840 |
+
{
|
1841 |
+
"epoch": 6.47,
|
1842 |
+
"learning_rate": 5.432555008995181e-05,
|
1843 |
+
"loss": 1.9443,
|
1844 |
+
"step": 148500
|
1845 |
+
},
|
1846 |
+
{
|
1847 |
+
"epoch": 6.5,
|
1848 |
+
"learning_rate": 5.4168291670336036e-05,
|
1849 |
+
"loss": 1.9422,
|
1850 |
+
"step": 149000
|
1851 |
+
},
|
1852 |
+
{
|
1853 |
+
"epoch": 6.52,
|
1854 |
+
"learning_rate": 5.401103325072025e-05,
|
1855 |
+
"loss": 1.9421,
|
1856 |
+
"step": 149500
|
1857 |
+
},
|
1858 |
+
{
|
1859 |
+
"epoch": 6.54,
|
1860 |
+
"learning_rate": 5.385408934794369e-05,
|
1861 |
+
"loss": 1.9412,
|
1862 |
+
"step": 150000
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 6.56,
|
1866 |
+
"learning_rate": 5.36968309283279e-05,
|
1867 |
+
"loss": 1.9411,
|
1868 |
+
"step": 150500
|
1869 |
+
},
|
1870 |
+
{
|
1871 |
+
"epoch": 6.58,
|
1872 |
+
"learning_rate": 5.353957250871212e-05,
|
1873 |
+
"loss": 1.9375,
|
1874 |
+
"step": 151000
|
1875 |
+
},
|
1876 |
+
{
|
1877 |
+
"epoch": 6.6,
|
1878 |
+
"learning_rate": 5.338231408909633e-05,
|
1879 |
+
"loss": 1.9399,
|
1880 |
+
"step": 151500
|
1881 |
+
},
|
1882 |
+
{
|
1883 |
+
"epoch": 6.63,
|
1884 |
+
"learning_rate": 5.3225370186319776e-05,
|
1885 |
+
"loss": 1.9344,
|
1886 |
+
"step": 152000
|
1887 |
+
},
|
1888 |
+
{
|
1889 |
+
"epoch": 6.65,
|
1890 |
+
"learning_rate": 5.3068111766703986e-05,
|
1891 |
+
"loss": 1.9419,
|
1892 |
+
"step": 152500
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 6.67,
|
1896 |
+
"learning_rate": 5.291085334708821e-05,
|
1897 |
+
"loss": 1.9353,
|
1898 |
+
"step": 153000
|
1899 |
+
},
|
1900 |
+
{
|
1901 |
+
"epoch": 6.69,
|
1902 |
+
"learning_rate": 5.275359492747241e-05,
|
1903 |
+
"loss": 1.9386,
|
1904 |
+
"step": 153500
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 6.71,
|
1908 |
+
"learning_rate": 5.259633650785664e-05,
|
1909 |
+
"loss": 1.9403,
|
1910 |
+
"step": 154000
|
1911 |
+
},
|
1912 |
+
{
|
1913 |
+
"epoch": 6.73,
|
1914 |
+
"learning_rate": 5.2439392605080076e-05,
|
1915 |
+
"loss": 1.9336,
|
1916 |
+
"step": 154500
|
1917 |
+
},
|
1918 |
+
{
|
1919 |
+
"epoch": 6.76,
|
1920 |
+
"learning_rate": 5.228213418546429e-05,
|
1921 |
+
"loss": 1.934,
|
1922 |
+
"step": 155000
|
1923 |
+
},
|
1924 |
+
{
|
1925 |
+
"epoch": 6.78,
|
1926 |
+
"learning_rate": 5.21248757658485e-05,
|
1927 |
+
"loss": 1.9322,
|
1928 |
+
"step": 155500
|
1929 |
+
},
|
1930 |
+
{
|
1931 |
+
"epoch": 6.8,
|
1932 |
+
"learning_rate": 5.196761734623272e-05,
|
1933 |
+
"loss": 1.9316,
|
1934 |
+
"step": 156000
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 6.82,
|
1938 |
+
"learning_rate": 5.181067344345617e-05,
|
1939 |
+
"loss": 1.9319,
|
1940 |
+
"step": 156500
|
1941 |
+
},
|
1942 |
+
{
|
1943 |
+
"epoch": 6.84,
|
1944 |
+
"learning_rate": 5.1653415023840376e-05,
|
1945 |
+
"loss": 1.937,
|
1946 |
+
"step": 157000
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 6.87,
|
1950 |
+
"learning_rate": 5.1496156604224586e-05,
|
1951 |
+
"loss": 1.9324,
|
1952 |
+
"step": 157500
|
1953 |
+
},
|
1954 |
+
{
|
1955 |
+
"epoch": 6.89,
|
1956 |
+
"learning_rate": 5.133889818460881e-05,
|
1957 |
+
"loss": 1.9305,
|
1958 |
+
"step": 158000
|
1959 |
+
},
|
1960 |
+
{
|
1961 |
+
"epoch": 6.91,
|
1962 |
+
"learning_rate": 5.1181954281832256e-05,
|
1963 |
+
"loss": 1.932,
|
1964 |
+
"step": 158500
|
1965 |
+
},
|
1966 |
+
{
|
1967 |
+
"epoch": 6.93,
|
1968 |
+
"learning_rate": 5.1024695862216466e-05,
|
1969 |
+
"loss": 1.9298,
|
1970 |
+
"step": 159000
|
1971 |
+
},
|
1972 |
+
{
|
1973 |
+
"epoch": 6.95,
|
1974 |
+
"learning_rate": 5.0867437442600676e-05,
|
1975 |
+
"loss": 1.9289,
|
1976 |
+
"step": 159500
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 6.97,
|
1980 |
+
"learning_rate": 5.071017902298489e-05,
|
1981 |
+
"loss": 1.9263,
|
1982 |
+
"step": 160000
|
1983 |
+
},
|
1984 |
+
{
|
1985 |
+
"epoch": 7.0,
|
1986 |
+
"learning_rate": 5.055323512020834e-05,
|
1987 |
+
"loss": 1.9313,
|
1988 |
+
"step": 160500
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 7.0,
|
1992 |
+
"eval_accuracy": 0.631738439030596,
|
1993 |
+
"eval_loss": 1.8091248273849487,
|
1994 |
+
"eval_runtime": 359.593,
|
1995 |
+
"eval_samples_per_second": 494.871,
|
1996 |
+
"eval_steps_per_second": 30.929,
|
1997 |
+
"step": 160580
|
1998 |
}
|
1999 |
],
|
2000 |
"max_steps": 321160,
|
2001 |
"num_train_epochs": 14,
|
2002 |
+
"total_flos": 1.2150058886378496e+18,
|
2003 |
"trial_name": null,
|
2004 |
"trial_params": null
|
2005 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118243218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c433f5760508a1599460a99e03b20a531880dfdad44d71ab00bd1c682027dadd
|
3 |
size 118243218
|
runs/Feb20_18-27-58_ubuntu-2004/events.out.tfevents.1676885321.ubuntu-2004.886785.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76ccf92c99516f4744f2ccb27d9c0dd34d687200b637d2a59ebecfb67ba42c1f
|
3 |
+
size 57319
|