Training in progress, epoch 7
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 236491269
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a49ec4140754981eb351649fecf5d3e3d44b0e29fa9e01bf8460a2dcc5b91392
|
3 |
size 236491269
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
|
3 |
size 118253458
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15597
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2593a21d27b6d3490c2b6104d1f46ccef142af342ac4030549c5bf8e21edca72
|
3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74546aa0cb21fe7508cf9d0a3ed65e894eded209c32829312f983360c4339967
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1981182cf21e486b0f1de0f86d848f914d636f6e137316378a492b50ad1a4d9c
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1710,11 +1710,296 @@
|
|
1710 |
"eval_samples_per_second": 603.291,
|
1711 |
"eval_steps_per_second": 37.706,
|
1712 |
"step": 137640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1713 |
}
|
1714 |
],
|
1715 |
"max_steps": 321160,
|
1716 |
"num_train_epochs": 14,
|
1717 |
-
"total_flos": 1.
|
1718 |
"trial_name": null,
|
1719 |
"trial_params": null
|
1720 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.0,
|
5 |
+
"global_step": 160580,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1710 |
"eval_samples_per_second": 603.291,
|
1711 |
"eval_steps_per_second": 37.706,
|
1712 |
"step": 137640
|
1713 |
+
},
|
1714 |
+
{
|
1715 |
+
"epoch": 6.02,
|
1716 |
+
"learning_rate": 5.762640431768718e-05,
|
1717 |
+
"loss": 1.6789,
|
1718 |
+
"step": 138000
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 6.04,
|
1722 |
+
"learning_rate": 5.746946041491061e-05,
|
1723 |
+
"loss": 1.6869,
|
1724 |
+
"step": 138500
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 6.06,
|
1728 |
+
"learning_rate": 5.7312201995294836e-05,
|
1729 |
+
"loss": 1.6766,
|
1730 |
+
"step": 139000
|
1731 |
+
},
|
1732 |
+
{
|
1733 |
+
"epoch": 6.08,
|
1734 |
+
"learning_rate": 5.7154943575679046e-05,
|
1735 |
+
"loss": 1.6819,
|
1736 |
+
"step": 139500
|
1737 |
+
},
|
1738 |
+
{
|
1739 |
+
"epoch": 6.1,
|
1740 |
+
"learning_rate": 5.699768515606326e-05,
|
1741 |
+
"loss": 1.6812,
|
1742 |
+
"step": 140000
|
1743 |
+
},
|
1744 |
+
{
|
1745 |
+
"epoch": 6.12,
|
1746 |
+
"learning_rate": 5.68407412532867e-05,
|
1747 |
+
"loss": 1.6802,
|
1748 |
+
"step": 140500
|
1749 |
+
},
|
1750 |
+
{
|
1751 |
+
"epoch": 6.15,
|
1752 |
+
"learning_rate": 5.668348283367092e-05,
|
1753 |
+
"loss": 1.6788,
|
1754 |
+
"step": 141000
|
1755 |
+
},
|
1756 |
+
{
|
1757 |
+
"epoch": 6.17,
|
1758 |
+
"learning_rate": 5.652622441405513e-05,
|
1759 |
+
"loss": 1.6786,
|
1760 |
+
"step": 141500
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 6.19,
|
1764 |
+
"learning_rate": 5.6368965994439346e-05,
|
1765 |
+
"loss": 1.6798,
|
1766 |
+
"step": 142000
|
1767 |
+
},
|
1768 |
+
{
|
1769 |
+
"epoch": 6.21,
|
1770 |
+
"learning_rate": 5.6212022091662785e-05,
|
1771 |
+
"loss": 1.6758,
|
1772 |
+
"step": 142500
|
1773 |
+
},
|
1774 |
+
{
|
1775 |
+
"epoch": 6.23,
|
1776 |
+
"learning_rate": 5.605476367204701e-05,
|
1777 |
+
"loss": 1.6775,
|
1778 |
+
"step": 143000
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 6.26,
|
1782 |
+
"learning_rate": 5.589750525243121e-05,
|
1783 |
+
"loss": 1.6764,
|
1784 |
+
"step": 143500
|
1785 |
+
},
|
1786 |
+
{
|
1787 |
+
"epoch": 6.28,
|
1788 |
+
"learning_rate": 5.5740246832815436e-05,
|
1789 |
+
"loss": 1.6735,
|
1790 |
+
"step": 144000
|
1791 |
+
},
|
1792 |
+
{
|
1793 |
+
"epoch": 6.3,
|
1794 |
+
"learning_rate": 5.5583302930038875e-05,
|
1795 |
+
"loss": 1.6758,
|
1796 |
+
"step": 144500
|
1797 |
+
},
|
1798 |
+
{
|
1799 |
+
"epoch": 6.32,
|
1800 |
+
"learning_rate": 5.542604451042309e-05,
|
1801 |
+
"loss": 1.6746,
|
1802 |
+
"step": 145000
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 6.34,
|
1806 |
+
"learning_rate": 5.52687860908073e-05,
|
1807 |
+
"loss": 1.678,
|
1808 |
+
"step": 145500
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 6.36,
|
1812 |
+
"learning_rate": 5.511152767119152e-05,
|
1813 |
+
"loss": 1.6724,
|
1814 |
+
"step": 146000
|
1815 |
+
},
|
1816 |
+
{
|
1817 |
+
"epoch": 6.39,
|
1818 |
+
"learning_rate": 5.495458376841497e-05,
|
1819 |
+
"loss": 1.6727,
|
1820 |
+
"step": 146500
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 6.41,
|
1824 |
+
"learning_rate": 5.4797325348799175e-05,
|
1825 |
+
"loss": 1.6726,
|
1826 |
+
"step": 147000
|
1827 |
+
},
|
1828 |
+
{
|
1829 |
+
"epoch": 6.43,
|
1830 |
+
"learning_rate": 5.4640066929183386e-05,
|
1831 |
+
"loss": 1.6726,
|
1832 |
+
"step": 147500
|
1833 |
+
},
|
1834 |
+
{
|
1835 |
+
"epoch": 6.45,
|
1836 |
+
"learning_rate": 5.448280850956761e-05,
|
1837 |
+
"loss": 1.6735,
|
1838 |
+
"step": 148000
|
1839 |
+
},
|
1840 |
+
{
|
1841 |
+
"epoch": 6.47,
|
1842 |
+
"learning_rate": 5.4325864606791055e-05,
|
1843 |
+
"loss": 1.6722,
|
1844 |
+
"step": 148500
|
1845 |
+
},
|
1846 |
+
{
|
1847 |
+
"epoch": 6.5,
|
1848 |
+
"learning_rate": 5.4168606187175265e-05,
|
1849 |
+
"loss": 1.6672,
|
1850 |
+
"step": 149000
|
1851 |
+
},
|
1852 |
+
{
|
1853 |
+
"epoch": 6.52,
|
1854 |
+
"learning_rate": 5.4011347767559475e-05,
|
1855 |
+
"loss": 1.6716,
|
1856 |
+
"step": 149500
|
1857 |
+
},
|
1858 |
+
{
|
1859 |
+
"epoch": 6.54,
|
1860 |
+
"learning_rate": 5.385408934794369e-05,
|
1861 |
+
"loss": 1.6746,
|
1862 |
+
"step": 150000
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 6.56,
|
1866 |
+
"learning_rate": 5.369714544516714e-05,
|
1867 |
+
"loss": 1.6714,
|
1868 |
+
"step": 150500
|
1869 |
+
},
|
1870 |
+
{
|
1871 |
+
"epoch": 6.58,
|
1872 |
+
"learning_rate": 5.353988702555135e-05,
|
1873 |
+
"loss": 1.6631,
|
1874 |
+
"step": 151000
|
1875 |
+
},
|
1876 |
+
{
|
1877 |
+
"epoch": 6.6,
|
1878 |
+
"learning_rate": 5.338262860593557e-05,
|
1879 |
+
"loss": 1.667,
|
1880 |
+
"step": 151500
|
1881 |
+
},
|
1882 |
+
{
|
1883 |
+
"epoch": 6.63,
|
1884 |
+
"learning_rate": 5.3225370186319776e-05,
|
1885 |
+
"loss": 1.6716,
|
1886 |
+
"step": 152000
|
1887 |
+
},
|
1888 |
+
{
|
1889 |
+
"epoch": 6.65,
|
1890 |
+
"learning_rate": 5.306842628354323e-05,
|
1891 |
+
"loss": 1.6653,
|
1892 |
+
"step": 152500
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 6.67,
|
1896 |
+
"learning_rate": 5.291116786392744e-05,
|
1897 |
+
"loss": 1.6648,
|
1898 |
+
"step": 153000
|
1899 |
+
},
|
1900 |
+
{
|
1901 |
+
"epoch": 6.69,
|
1902 |
+
"learning_rate": 5.2753909444311655e-05,
|
1903 |
+
"loss": 1.6645,
|
1904 |
+
"step": 153500
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 6.71,
|
1908 |
+
"learning_rate": 5.2596651024695866e-05,
|
1909 |
+
"loss": 1.6682,
|
1910 |
+
"step": 154000
|
1911 |
+
},
|
1912 |
+
{
|
1913 |
+
"epoch": 6.73,
|
1914 |
+
"learning_rate": 5.243970712191931e-05,
|
1915 |
+
"loss": 1.6631,
|
1916 |
+
"step": 154500
|
1917 |
+
},
|
1918 |
+
{
|
1919 |
+
"epoch": 6.76,
|
1920 |
+
"learning_rate": 5.228244870230352e-05,
|
1921 |
+
"loss": 1.6637,
|
1922 |
+
"step": 155000
|
1923 |
+
},
|
1924 |
+
{
|
1925 |
+
"epoch": 6.78,
|
1926 |
+
"learning_rate": 5.212519028268774e-05,
|
1927 |
+
"loss": 1.664,
|
1928 |
+
"step": 155500
|
1929 |
+
},
|
1930 |
+
{
|
1931 |
+
"epoch": 6.8,
|
1932 |
+
"learning_rate": 5.196793186307195e-05,
|
1933 |
+
"loss": 1.6665,
|
1934 |
+
"step": 156000
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 6.82,
|
1938 |
+
"learning_rate": 5.18109879602954e-05,
|
1939 |
+
"loss": 1.6624,
|
1940 |
+
"step": 156500
|
1941 |
+
},
|
1942 |
+
{
|
1943 |
+
"epoch": 6.84,
|
1944 |
+
"learning_rate": 5.1653729540679605e-05,
|
1945 |
+
"loss": 1.6611,
|
1946 |
+
"step": 157000
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 6.87,
|
1950 |
+
"learning_rate": 5.149647112106383e-05,
|
1951 |
+
"loss": 1.6642,
|
1952 |
+
"step": 157500
|
1953 |
+
},
|
1954 |
+
{
|
1955 |
+
"epoch": 6.89,
|
1956 |
+
"learning_rate": 5.133921270144804e-05,
|
1957 |
+
"loss": 1.6595,
|
1958 |
+
"step": 158000
|
1959 |
+
},
|
1960 |
+
{
|
1961 |
+
"epoch": 6.91,
|
1962 |
+
"learning_rate": 5.1182268798671485e-05,
|
1963 |
+
"loss": 1.6607,
|
1964 |
+
"step": 158500
|
1965 |
+
},
|
1966 |
+
{
|
1967 |
+
"epoch": 6.93,
|
1968 |
+
"learning_rate": 5.1025010379055695e-05,
|
1969 |
+
"loss": 1.663,
|
1970 |
+
"step": 159000
|
1971 |
+
},
|
1972 |
+
{
|
1973 |
+
"epoch": 6.95,
|
1974 |
+
"learning_rate": 5.086775195943991e-05,
|
1975 |
+
"loss": 1.6598,
|
1976 |
+
"step": 159500
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 6.97,
|
1980 |
+
"learning_rate": 5.071049353982412e-05,
|
1981 |
+
"loss": 1.662,
|
1982 |
+
"step": 160000
|
1983 |
+
},
|
1984 |
+
{
|
1985 |
+
"epoch": 7.0,
|
1986 |
+
"learning_rate": 5.055354963704757e-05,
|
1987 |
+
"loss": 1.658,
|
1988 |
+
"step": 160500
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 7.0,
|
1992 |
+
"eval_accuracy": 0.681177174568378,
|
1993 |
+
"eval_loss": 1.5331339836120605,
|
1994 |
+
"eval_runtime": 342.8004,
|
1995 |
+
"eval_samples_per_second": 519.113,
|
1996 |
+
"eval_steps_per_second": 32.445,
|
1997 |
+
"step": 160580
|
1998 |
}
|
1999 |
],
|
2000 |
"max_steps": 321160,
|
2001 |
"num_train_epochs": 14,
|
2002 |
+
"total_flos": 1.2209738648603072e+18,
|
2003 |
"trial_name": null,
|
2004 |
"trial_params": null
|
2005 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
|
3 |
size 118253458
|
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be416dbc50f0cb836aa31ee963aba13302069cd1befb9ceaf72286a0cd5d4676
|
3 |
+
size 57335
|