Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aa66ee572d638da161b0876a24a0495b141e6a283ebe22e54c7bb4b5cecc85d
|
3 |
size 966995080
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1925064044
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3095de81d8b920e91d85b8e2ee04f326ef5aa3917fee9ca74a8d0a152e8b3447
|
3 |
size 1925064044
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ada5f6f7cb1b6a49d79d11cd5642321498733c76d6eb8ca5030fe74fa4bc331
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e217ad856cdf0ac7c67db1e21b2cb21b2f44c6c7063ade06b9a1720236888449
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1741,6 +1741,295 @@
|
|
1741 |
"eval_samples_per_second": 2.22,
|
1742 |
"eval_steps_per_second": 0.278,
|
1743 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1744 |
}
|
1745 |
],
|
1746 |
"logging_steps": 25,
|
@@ -1748,7 +2037,7 @@
|
|
1748 |
"num_input_tokens_seen": 0,
|
1749 |
"num_train_epochs": 4,
|
1750 |
"save_steps": 1000,
|
1751 |
-
"total_flos":
|
1752 |
"train_batch_size": 8,
|
1753 |
"trial_name": null,
|
1754 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 47.53661784287617,
|
3 |
+
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
|
4 |
+
"epoch": 2.8,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 7000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1741 |
"eval_samples_per_second": 2.22,
|
1742 |
"eval_steps_per_second": 0.278,
|
1743 |
"step": 6000
|
1744 |
+
},
|
1745 |
+
{
|
1746 |
+
"epoch": 2.41,
|
1747 |
+
"grad_norm": 12.415263175964355,
|
1748 |
+
"learning_rate": 3.67283950617284e-06,
|
1749 |
+
"loss": 0.6038,
|
1750 |
+
"step": 6025
|
1751 |
+
},
|
1752 |
+
{
|
1753 |
+
"epoch": 2.42,
|
1754 |
+
"grad_norm": 12.250804901123047,
|
1755 |
+
"learning_rate": 3.641975308641976e-06,
|
1756 |
+
"loss": 0.6355,
|
1757 |
+
"step": 6050
|
1758 |
+
},
|
1759 |
+
{
|
1760 |
+
"epoch": 2.43,
|
1761 |
+
"grad_norm": 10.84643840789795,
|
1762 |
+
"learning_rate": 3.6111111111111115e-06,
|
1763 |
+
"loss": 0.5963,
|
1764 |
+
"step": 6075
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 2.44,
|
1768 |
+
"grad_norm": 9.677035331726074,
|
1769 |
+
"learning_rate": 3.580246913580247e-06,
|
1770 |
+
"loss": 0.5596,
|
1771 |
+
"step": 6100
|
1772 |
+
},
|
1773 |
+
{
|
1774 |
+
"epoch": 2.45,
|
1775 |
+
"grad_norm": 12.466174125671387,
|
1776 |
+
"learning_rate": 3.549382716049383e-06,
|
1777 |
+
"loss": 0.6114,
|
1778 |
+
"step": 6125
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 2.46,
|
1782 |
+
"grad_norm": 10.494367599487305,
|
1783 |
+
"learning_rate": 3.5185185185185187e-06,
|
1784 |
+
"loss": 0.5983,
|
1785 |
+
"step": 6150
|
1786 |
+
},
|
1787 |
+
{
|
1788 |
+
"epoch": 2.4699999999999998,
|
1789 |
+
"grad_norm": 10.007222175598145,
|
1790 |
+
"learning_rate": 3.4876543209876544e-06,
|
1791 |
+
"loss": 0.5739,
|
1792 |
+
"step": 6175
|
1793 |
+
},
|
1794 |
+
{
|
1795 |
+
"epoch": 2.48,
|
1796 |
+
"grad_norm": 10.590246200561523,
|
1797 |
+
"learning_rate": 3.4567901234567904e-06,
|
1798 |
+
"loss": 0.6105,
|
1799 |
+
"step": 6200
|
1800 |
+
},
|
1801 |
+
{
|
1802 |
+
"epoch": 2.49,
|
1803 |
+
"grad_norm": 11.260624885559082,
|
1804 |
+
"learning_rate": 3.4259259259259265e-06,
|
1805 |
+
"loss": 0.5963,
|
1806 |
+
"step": 6225
|
1807 |
+
},
|
1808 |
+
{
|
1809 |
+
"epoch": 2.5,
|
1810 |
+
"grad_norm": 12.293840408325195,
|
1811 |
+
"learning_rate": 3.395061728395062e-06,
|
1812 |
+
"loss": 0.6278,
|
1813 |
+
"step": 6250
|
1814 |
+
},
|
1815 |
+
{
|
1816 |
+
"epoch": 2.51,
|
1817 |
+
"grad_norm": 12.284423828125,
|
1818 |
+
"learning_rate": 3.3641975308641977e-06,
|
1819 |
+
"loss": 0.613,
|
1820 |
+
"step": 6275
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 2.52,
|
1824 |
+
"grad_norm": 10.285521507263184,
|
1825 |
+
"learning_rate": 3.3333333333333333e-06,
|
1826 |
+
"loss": 0.5841,
|
1827 |
+
"step": 6300
|
1828 |
+
},
|
1829 |
+
{
|
1830 |
+
"epoch": 2.5300000000000002,
|
1831 |
+
"grad_norm": 11.444025039672852,
|
1832 |
+
"learning_rate": 3.30246913580247e-06,
|
1833 |
+
"loss": 0.6789,
|
1834 |
+
"step": 6325
|
1835 |
+
},
|
1836 |
+
{
|
1837 |
+
"epoch": 2.54,
|
1838 |
+
"grad_norm": 9.65517520904541,
|
1839 |
+
"learning_rate": 3.2716049382716054e-06,
|
1840 |
+
"loss": 0.5777,
|
1841 |
+
"step": 6350
|
1842 |
+
},
|
1843 |
+
{
|
1844 |
+
"epoch": 2.55,
|
1845 |
+
"grad_norm": 11.333357810974121,
|
1846 |
+
"learning_rate": 3.240740740740741e-06,
|
1847 |
+
"loss": 0.6097,
|
1848 |
+
"step": 6375
|
1849 |
+
},
|
1850 |
+
{
|
1851 |
+
"epoch": 2.56,
|
1852 |
+
"grad_norm": 9.03528118133545,
|
1853 |
+
"learning_rate": 3.2098765432098767e-06,
|
1854 |
+
"loss": 0.5724,
|
1855 |
+
"step": 6400
|
1856 |
+
},
|
1857 |
+
{
|
1858 |
+
"epoch": 2.57,
|
1859 |
+
"grad_norm": 11.75942325592041,
|
1860 |
+
"learning_rate": 3.1790123456790127e-06,
|
1861 |
+
"loss": 0.6378,
|
1862 |
+
"step": 6425
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 2.58,
|
1866 |
+
"grad_norm": 11.017098426818848,
|
1867 |
+
"learning_rate": 3.1481481481481483e-06,
|
1868 |
+
"loss": 0.6758,
|
1869 |
+
"step": 6450
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 2.59,
|
1873 |
+
"grad_norm": 12.29273509979248,
|
1874 |
+
"learning_rate": 3.1172839506172844e-06,
|
1875 |
+
"loss": 0.5915,
|
1876 |
+
"step": 6475
|
1877 |
+
},
|
1878 |
+
{
|
1879 |
+
"epoch": 2.6,
|
1880 |
+
"grad_norm": 12.807594299316406,
|
1881 |
+
"learning_rate": 3.08641975308642e-06,
|
1882 |
+
"loss": 0.6041,
|
1883 |
+
"step": 6500
|
1884 |
+
},
|
1885 |
+
{
|
1886 |
+
"epoch": 2.61,
|
1887 |
+
"grad_norm": 12.91454029083252,
|
1888 |
+
"learning_rate": 3.055555555555556e-06,
|
1889 |
+
"loss": 0.5537,
|
1890 |
+
"step": 6525
|
1891 |
+
},
|
1892 |
+
{
|
1893 |
+
"epoch": 2.62,
|
1894 |
+
"grad_norm": 12.020458221435547,
|
1895 |
+
"learning_rate": 3.0246913580246917e-06,
|
1896 |
+
"loss": 0.6154,
|
1897 |
+
"step": 6550
|
1898 |
+
},
|
1899 |
+
{
|
1900 |
+
"epoch": 2.63,
|
1901 |
+
"grad_norm": 10.018027305603027,
|
1902 |
+
"learning_rate": 2.9938271604938273e-06,
|
1903 |
+
"loss": 0.5778,
|
1904 |
+
"step": 6575
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 2.64,
|
1908 |
+
"grad_norm": 10.63597297668457,
|
1909 |
+
"learning_rate": 2.962962962962963e-06,
|
1910 |
+
"loss": 0.5592,
|
1911 |
+
"step": 6600
|
1912 |
+
},
|
1913 |
+
{
|
1914 |
+
"epoch": 2.65,
|
1915 |
+
"grad_norm": 13.188393592834473,
|
1916 |
+
"learning_rate": 2.9320987654320994e-06,
|
1917 |
+
"loss": 0.582,
|
1918 |
+
"step": 6625
|
1919 |
+
},
|
1920 |
+
{
|
1921 |
+
"epoch": 2.66,
|
1922 |
+
"grad_norm": 10.43333625793457,
|
1923 |
+
"learning_rate": 2.901234567901235e-06,
|
1924 |
+
"loss": 0.621,
|
1925 |
+
"step": 6650
|
1926 |
+
},
|
1927 |
+
{
|
1928 |
+
"epoch": 2.67,
|
1929 |
+
"grad_norm": 11.198952674865723,
|
1930 |
+
"learning_rate": 2.8703703703703706e-06,
|
1931 |
+
"loss": 0.593,
|
1932 |
+
"step": 6675
|
1933 |
+
},
|
1934 |
+
{
|
1935 |
+
"epoch": 2.68,
|
1936 |
+
"grad_norm": 12.264167785644531,
|
1937 |
+
"learning_rate": 2.8395061728395062e-06,
|
1938 |
+
"loss": 0.587,
|
1939 |
+
"step": 6700
|
1940 |
+
},
|
1941 |
+
{
|
1942 |
+
"epoch": 2.69,
|
1943 |
+
"grad_norm": 12.704331398010254,
|
1944 |
+
"learning_rate": 2.8086419753086423e-06,
|
1945 |
+
"loss": 0.6146,
|
1946 |
+
"step": 6725
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 2.7,
|
1950 |
+
"grad_norm": 10.604636192321777,
|
1951 |
+
"learning_rate": 2.7777777777777783e-06,
|
1952 |
+
"loss": 0.6201,
|
1953 |
+
"step": 6750
|
1954 |
+
},
|
1955 |
+
{
|
1956 |
+
"epoch": 2.71,
|
1957 |
+
"grad_norm": 14.896036148071289,
|
1958 |
+
"learning_rate": 2.746913580246914e-06,
|
1959 |
+
"loss": 0.5932,
|
1960 |
+
"step": 6775
|
1961 |
+
},
|
1962 |
+
{
|
1963 |
+
"epoch": 2.7199999999999998,
|
1964 |
+
"grad_norm": 11.566553115844727,
|
1965 |
+
"learning_rate": 2.7160493827160496e-06,
|
1966 |
+
"loss": 0.55,
|
1967 |
+
"step": 6800
|
1968 |
+
},
|
1969 |
+
{
|
1970 |
+
"epoch": 2.73,
|
1971 |
+
"grad_norm": 12.04855728149414,
|
1972 |
+
"learning_rate": 2.6851851851851856e-06,
|
1973 |
+
"loss": 0.6556,
|
1974 |
+
"step": 6825
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 2.74,
|
1978 |
+
"grad_norm": 10.649229049682617,
|
1979 |
+
"learning_rate": 2.6543209876543212e-06,
|
1980 |
+
"loss": 0.5829,
|
1981 |
+
"step": 6850
|
1982 |
+
},
|
1983 |
+
{
|
1984 |
+
"epoch": 2.75,
|
1985 |
+
"grad_norm": 9.8768310546875,
|
1986 |
+
"learning_rate": 2.623456790123457e-06,
|
1987 |
+
"loss": 0.5403,
|
1988 |
+
"step": 6875
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 2.76,
|
1992 |
+
"grad_norm": 11.587966918945312,
|
1993 |
+
"learning_rate": 2.5925925925925925e-06,
|
1994 |
+
"loss": 0.579,
|
1995 |
+
"step": 6900
|
1996 |
+
},
|
1997 |
+
{
|
1998 |
+
"epoch": 2.77,
|
1999 |
+
"grad_norm": 9.96321964263916,
|
2000 |
+
"learning_rate": 2.561728395061729e-06,
|
2001 |
+
"loss": 0.61,
|
2002 |
+
"step": 6925
|
2003 |
+
},
|
2004 |
+
{
|
2005 |
+
"epoch": 2.7800000000000002,
|
2006 |
+
"grad_norm": 11.546381950378418,
|
2007 |
+
"learning_rate": 2.5308641975308646e-06,
|
2008 |
+
"loss": 0.5558,
|
2009 |
+
"step": 6950
|
2010 |
+
},
|
2011 |
+
{
|
2012 |
+
"epoch": 2.79,
|
2013 |
+
"grad_norm": 13.616846084594727,
|
2014 |
+
"learning_rate": 2.5e-06,
|
2015 |
+
"loss": 0.5776,
|
2016 |
+
"step": 6975
|
2017 |
+
},
|
2018 |
+
{
|
2019 |
+
"epoch": 2.8,
|
2020 |
+
"grad_norm": 11.600656509399414,
|
2021 |
+
"learning_rate": 2.469135802469136e-06,
|
2022 |
+
"loss": 0.5584,
|
2023 |
+
"step": 7000
|
2024 |
+
},
|
2025 |
+
{
|
2026 |
+
"epoch": 2.8,
|
2027 |
+
"eval_cer": 47.53661784287617,
|
2028 |
+
"eval_loss": 0.896188497543335,
|
2029 |
+
"eval_runtime": 1733.8859,
|
2030 |
+
"eval_samples_per_second": 2.27,
|
2031 |
+
"eval_steps_per_second": 0.284,
|
2032 |
+
"step": 7000
|
2033 |
}
|
2034 |
],
|
2035 |
"logging_steps": 25,
|
|
|
2037 |
"num_input_tokens_seen": 0,
|
2038 |
"num_train_epochs": 4,
|
2039 |
"save_steps": 1000,
|
2040 |
+
"total_flos": 3.232156483584e+19,
|
2041 |
"train_batch_size": 8,
|
2042 |
"trial_name": null,
|
2043 |
"trial_params": null
|