yuweiiizz commited on
Commit
8243704
·
verified ·
1 Parent(s): 325b01c

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ec0246d4e2aea1b71a33338e4420dd5d8c26630b4c1753f038e7f2036aad545
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa66ee572d638da161b0876a24a0495b141e6a283ebe22e54c7bb4b5cecc85d
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdc53657d0d3de6712008710ba891fd0a388a380e3678a28f24c312f466e7db5
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3095de81d8b920e91d85b8e2ee04f326ef5aa3917fee9ca74a8d0a152e8b3447
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01936a26df76d30ee6550fdbb203f4526dab703ccbf83b9464caef2a32f84a5b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ada5f6f7cb1b6a49d79d11cd5642321498733c76d6eb8ca5030fe74fa4bc331
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bdbe69a1efdf2a6b7b8df096446c782edc1c5607aae343868c0f0cf62a3941a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e217ad856cdf0ac7c67db1e21b2cb21b2f44c6c7063ade06b9a1720236888449
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 49.85901151405969,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-6000",
4
- "epoch": 2.4,
5
  "eval_steps": 1000,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1741,6 +1741,295 @@
1741
  "eval_samples_per_second": 2.22,
1742
  "eval_steps_per_second": 0.278,
1743
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1744
  }
1745
  ],
1746
  "logging_steps": 25,
@@ -1748,7 +2037,7 @@
1748
  "num_input_tokens_seen": 0,
1749
  "num_train_epochs": 4,
1750
  "save_steps": 1000,
1751
- "total_flos": 2.770419843072e+19,
1752
  "train_batch_size": 8,
1753
  "trial_name": null,
1754
  "trial_params": null
 
1
  {
2
+ "best_metric": 47.53661784287617,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
4
+ "epoch": 2.8,
5
  "eval_steps": 1000,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1741
  "eval_samples_per_second": 2.22,
1742
  "eval_steps_per_second": 0.278,
1743
  "step": 6000
1744
+ },
1745
+ {
1746
+ "epoch": 2.41,
1747
+ "grad_norm": 12.415263175964355,
1748
+ "learning_rate": 3.67283950617284e-06,
1749
+ "loss": 0.6038,
1750
+ "step": 6025
1751
+ },
1752
+ {
1753
+ "epoch": 2.42,
1754
+ "grad_norm": 12.250804901123047,
1755
+ "learning_rate": 3.641975308641976e-06,
1756
+ "loss": 0.6355,
1757
+ "step": 6050
1758
+ },
1759
+ {
1760
+ "epoch": 2.43,
1761
+ "grad_norm": 10.84643840789795,
1762
+ "learning_rate": 3.6111111111111115e-06,
1763
+ "loss": 0.5963,
1764
+ "step": 6075
1765
+ },
1766
+ {
1767
+ "epoch": 2.44,
1768
+ "grad_norm": 9.677035331726074,
1769
+ "learning_rate": 3.580246913580247e-06,
1770
+ "loss": 0.5596,
1771
+ "step": 6100
1772
+ },
1773
+ {
1774
+ "epoch": 2.45,
1775
+ "grad_norm": 12.466174125671387,
1776
+ "learning_rate": 3.549382716049383e-06,
1777
+ "loss": 0.6114,
1778
+ "step": 6125
1779
+ },
1780
+ {
1781
+ "epoch": 2.46,
1782
+ "grad_norm": 10.494367599487305,
1783
+ "learning_rate": 3.5185185185185187e-06,
1784
+ "loss": 0.5983,
1785
+ "step": 6150
1786
+ },
1787
+ {
1788
+ "epoch": 2.4699999999999998,
1789
+ "grad_norm": 10.007222175598145,
1790
+ "learning_rate": 3.4876543209876544e-06,
1791
+ "loss": 0.5739,
1792
+ "step": 6175
1793
+ },
1794
+ {
1795
+ "epoch": 2.48,
1796
+ "grad_norm": 10.590246200561523,
1797
+ "learning_rate": 3.4567901234567904e-06,
1798
+ "loss": 0.6105,
1799
+ "step": 6200
1800
+ },
1801
+ {
1802
+ "epoch": 2.49,
1803
+ "grad_norm": 11.260624885559082,
1804
+ "learning_rate": 3.4259259259259265e-06,
1805
+ "loss": 0.5963,
1806
+ "step": 6225
1807
+ },
1808
+ {
1809
+ "epoch": 2.5,
1810
+ "grad_norm": 12.293840408325195,
1811
+ "learning_rate": 3.395061728395062e-06,
1812
+ "loss": 0.6278,
1813
+ "step": 6250
1814
+ },
1815
+ {
1816
+ "epoch": 2.51,
1817
+ "grad_norm": 12.284423828125,
1818
+ "learning_rate": 3.3641975308641977e-06,
1819
+ "loss": 0.613,
1820
+ "step": 6275
1821
+ },
1822
+ {
1823
+ "epoch": 2.52,
1824
+ "grad_norm": 10.285521507263184,
1825
+ "learning_rate": 3.3333333333333333e-06,
1826
+ "loss": 0.5841,
1827
+ "step": 6300
1828
+ },
1829
+ {
1830
+ "epoch": 2.5300000000000002,
1831
+ "grad_norm": 11.444025039672852,
1832
+ "learning_rate": 3.30246913580247e-06,
1833
+ "loss": 0.6789,
1834
+ "step": 6325
1835
+ },
1836
+ {
1837
+ "epoch": 2.54,
1838
+ "grad_norm": 9.65517520904541,
1839
+ "learning_rate": 3.2716049382716054e-06,
1840
+ "loss": 0.5777,
1841
+ "step": 6350
1842
+ },
1843
+ {
1844
+ "epoch": 2.55,
1845
+ "grad_norm": 11.333357810974121,
1846
+ "learning_rate": 3.240740740740741e-06,
1847
+ "loss": 0.6097,
1848
+ "step": 6375
1849
+ },
1850
+ {
1851
+ "epoch": 2.56,
1852
+ "grad_norm": 9.03528118133545,
1853
+ "learning_rate": 3.2098765432098767e-06,
1854
+ "loss": 0.5724,
1855
+ "step": 6400
1856
+ },
1857
+ {
1858
+ "epoch": 2.57,
1859
+ "grad_norm": 11.75942325592041,
1860
+ "learning_rate": 3.1790123456790127e-06,
1861
+ "loss": 0.6378,
1862
+ "step": 6425
1863
+ },
1864
+ {
1865
+ "epoch": 2.58,
1866
+ "grad_norm": 11.017098426818848,
1867
+ "learning_rate": 3.1481481481481483e-06,
1868
+ "loss": 0.6758,
1869
+ "step": 6450
1870
+ },
1871
+ {
1872
+ "epoch": 2.59,
1873
+ "grad_norm": 12.29273509979248,
1874
+ "learning_rate": 3.1172839506172844e-06,
1875
+ "loss": 0.5915,
1876
+ "step": 6475
1877
+ },
1878
+ {
1879
+ "epoch": 2.6,
1880
+ "grad_norm": 12.807594299316406,
1881
+ "learning_rate": 3.08641975308642e-06,
1882
+ "loss": 0.6041,
1883
+ "step": 6500
1884
+ },
1885
+ {
1886
+ "epoch": 2.61,
1887
+ "grad_norm": 12.91454029083252,
1888
+ "learning_rate": 3.055555555555556e-06,
1889
+ "loss": 0.5537,
1890
+ "step": 6525
1891
+ },
1892
+ {
1893
+ "epoch": 2.62,
1894
+ "grad_norm": 12.020458221435547,
1895
+ "learning_rate": 3.0246913580246917e-06,
1896
+ "loss": 0.6154,
1897
+ "step": 6550
1898
+ },
1899
+ {
1900
+ "epoch": 2.63,
1901
+ "grad_norm": 10.018027305603027,
1902
+ "learning_rate": 2.9938271604938273e-06,
1903
+ "loss": 0.5778,
1904
+ "step": 6575
1905
+ },
1906
+ {
1907
+ "epoch": 2.64,
1908
+ "grad_norm": 10.63597297668457,
1909
+ "learning_rate": 2.962962962962963e-06,
1910
+ "loss": 0.5592,
1911
+ "step": 6600
1912
+ },
1913
+ {
1914
+ "epoch": 2.65,
1915
+ "grad_norm": 13.188393592834473,
1916
+ "learning_rate": 2.9320987654320994e-06,
1917
+ "loss": 0.582,
1918
+ "step": 6625
1919
+ },
1920
+ {
1921
+ "epoch": 2.66,
1922
+ "grad_norm": 10.43333625793457,
1923
+ "learning_rate": 2.901234567901235e-06,
1924
+ "loss": 0.621,
1925
+ "step": 6650
1926
+ },
1927
+ {
1928
+ "epoch": 2.67,
1929
+ "grad_norm": 11.198952674865723,
1930
+ "learning_rate": 2.8703703703703706e-06,
1931
+ "loss": 0.593,
1932
+ "step": 6675
1933
+ },
1934
+ {
1935
+ "epoch": 2.68,
1936
+ "grad_norm": 12.264167785644531,
1937
+ "learning_rate": 2.8395061728395062e-06,
1938
+ "loss": 0.587,
1939
+ "step": 6700
1940
+ },
1941
+ {
1942
+ "epoch": 2.69,
1943
+ "grad_norm": 12.704331398010254,
1944
+ "learning_rate": 2.8086419753086423e-06,
1945
+ "loss": 0.6146,
1946
+ "step": 6725
1947
+ },
1948
+ {
1949
+ "epoch": 2.7,
1950
+ "grad_norm": 10.604636192321777,
1951
+ "learning_rate": 2.7777777777777783e-06,
1952
+ "loss": 0.6201,
1953
+ "step": 6750
1954
+ },
1955
+ {
1956
+ "epoch": 2.71,
1957
+ "grad_norm": 14.896036148071289,
1958
+ "learning_rate": 2.746913580246914e-06,
1959
+ "loss": 0.5932,
1960
+ "step": 6775
1961
+ },
1962
+ {
1963
+ "epoch": 2.7199999999999998,
1964
+ "grad_norm": 11.566553115844727,
1965
+ "learning_rate": 2.7160493827160496e-06,
1966
+ "loss": 0.55,
1967
+ "step": 6800
1968
+ },
1969
+ {
1970
+ "epoch": 2.73,
1971
+ "grad_norm": 12.04855728149414,
1972
+ "learning_rate": 2.6851851851851856e-06,
1973
+ "loss": 0.6556,
1974
+ "step": 6825
1975
+ },
1976
+ {
1977
+ "epoch": 2.74,
1978
+ "grad_norm": 10.649229049682617,
1979
+ "learning_rate": 2.6543209876543212e-06,
1980
+ "loss": 0.5829,
1981
+ "step": 6850
1982
+ },
1983
+ {
1984
+ "epoch": 2.75,
1985
+ "grad_norm": 9.8768310546875,
1986
+ "learning_rate": 2.623456790123457e-06,
1987
+ "loss": 0.5403,
1988
+ "step": 6875
1989
+ },
1990
+ {
1991
+ "epoch": 2.76,
1992
+ "grad_norm": 11.587966918945312,
1993
+ "learning_rate": 2.5925925925925925e-06,
1994
+ "loss": 0.579,
1995
+ "step": 6900
1996
+ },
1997
+ {
1998
+ "epoch": 2.77,
1999
+ "grad_norm": 9.96321964263916,
2000
+ "learning_rate": 2.561728395061729e-06,
2001
+ "loss": 0.61,
2002
+ "step": 6925
2003
+ },
2004
+ {
2005
+ "epoch": 2.7800000000000002,
2006
+ "grad_norm": 11.546381950378418,
2007
+ "learning_rate": 2.5308641975308646e-06,
2008
+ "loss": 0.5558,
2009
+ "step": 6950
2010
+ },
2011
+ {
2012
+ "epoch": 2.79,
2013
+ "grad_norm": 13.616846084594727,
2014
+ "learning_rate": 2.5e-06,
2015
+ "loss": 0.5776,
2016
+ "step": 6975
2017
+ },
2018
+ {
2019
+ "epoch": 2.8,
2020
+ "grad_norm": 11.600656509399414,
2021
+ "learning_rate": 2.469135802469136e-06,
2022
+ "loss": 0.5584,
2023
+ "step": 7000
2024
+ },
2025
+ {
2026
+ "epoch": 2.8,
2027
+ "eval_cer": 47.53661784287617,
2028
+ "eval_loss": 0.896188497543335,
2029
+ "eval_runtime": 1733.8859,
2030
+ "eval_samples_per_second": 2.27,
2031
+ "eval_steps_per_second": 0.284,
2032
+ "step": 7000
2033
  }
2034
  ],
2035
  "logging_steps": 25,
 
2037
  "num_input_tokens_seen": 0,
2038
  "num_train_epochs": 4,
2039
  "save_steps": 1000,
2040
+ "total_flos": 3.232156483584e+19,
2041
  "train_batch_size": 8,
2042
  "trial_name": null,
2043
  "trial_params": null