lesso17 commited on
Commit
782e33d
·
verified ·
1 Parent(s): a42e565

Training in progress, step 14791, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9121bb13cac348f8255b1c8606cfd2bf5e0fa53792bec74f0ad42ad9a5892b13
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90032cf0dc9c856d41890f86f4685dace3e1d08c43dcf395cfb0abeb905dacff
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24729a35a7964a4ce3387d5638f82b3ff50a36ca8ace89c74d75aa6da5527dab
3
  size 401098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad103e7733612d5e8f27af84dac1a020ea6533850754af991474ed8c8362f9d
3
  size 401098
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72185d8f2fc683486e6ab10ae5b4e33251dc4710b2a632161220da3a79c49509
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7125b2b44ded5a58c206b318b0c5bdacc79616c40eb566c3d2a91235dce6447
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2808572b68b68af50d7aa6557fbee04cc697274c3d6f082e519062d2da20d671
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:342aa3cf2d92cb0b41d8671fb70ba6c07fc707a0a46c98b22d4a2df487835f64
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.318995475769043,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-12658",
4
- "epoch": 8.557762190484238,
5
  "eval_steps": 12658,
6
- "global_step": 12658,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1794,6 +1794,300 @@
1794
  "eval_samples_per_second": 216.168,
1795
  "eval_steps_per_second": 54.064,
1796
  "step": 12658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1797
  }
1798
  ],
1799
  "logging_steps": 50,
@@ -1817,12 +2111,12 @@
1817
  "should_evaluate": false,
1818
  "should_log": false,
1819
  "should_save": true,
1820
- "should_training_stop": false
1821
  },
1822
  "attributes": {}
1823
  }
1824
  },
1825
- "total_flos": 1848296839446528.0,
1826
  "train_batch_size": 4,
1827
  "trial_name": null,
1828
  "trial_params": null
 
1
  {
2
  "best_metric": 10.318995475769043,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-12658",
4
+ "epoch": 9.999830981154398,
5
  "eval_steps": 12658,
6
+ "global_step": 14791,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1794
  "eval_samples_per_second": 216.168,
1795
  "eval_steps_per_second": 54.064,
1796
  "step": 12658
1797
+ },
1798
+ {
1799
+ "epoch": 8.586157356545256,
1800
+ "grad_norm": 0.04512249305844307,
1801
+ "learning_rate": 1.0667350889148373e-05,
1802
+ "loss": 10.3284,
1803
+ "step": 12700
1804
+ },
1805
+ {
1806
+ "epoch": 8.619961125665512,
1807
+ "grad_norm": 0.04324914887547493,
1808
+ "learning_rate": 1.0171325835719605e-05,
1809
+ "loss": 10.328,
1810
+ "step": 12750
1811
+ },
1812
+ {
1813
+ "epoch": 8.653764894785768,
1814
+ "grad_norm": 0.04108593612909317,
1815
+ "learning_rate": 9.686541992842219e-06,
1816
+ "loss": 10.3283,
1817
+ "step": 12800
1818
+ },
1819
+ {
1820
+ "epoch": 8.687568663906026,
1821
+ "grad_norm": 0.04161646589636803,
1822
+ "learning_rate": 9.213054782368384e-06,
1823
+ "loss": 10.3278,
1824
+ "step": 12850
1825
+ },
1826
+ {
1827
+ "epoch": 8.721372433026282,
1828
+ "grad_norm": 0.0430586077272892,
1829
+ "learning_rate": 8.750918334687578e-06,
1830
+ "loss": 10.3283,
1831
+ "step": 12900
1832
+ },
1833
+ {
1834
+ "epoch": 8.75517620214654,
1835
+ "grad_norm": 0.042888566851615906,
1836
+ "learning_rate": 8.300185482538152e-06,
1837
+ "loss": 10.3285,
1838
+ "step": 12950
1839
+ },
1840
+ {
1841
+ "epoch": 8.788979971266796,
1842
+ "grad_norm": 0.041527193039655685,
1843
+ "learning_rate": 7.860907754967288e-06,
1844
+ "loss": 10.3281,
1845
+ "step": 13000
1846
+ },
1847
+ {
1848
+ "epoch": 8.822783740387052,
1849
+ "grad_norm": 0.04209504276514053,
1850
+ "learning_rate": 7.433135371440134e-06,
1851
+ "loss": 10.3281,
1852
+ "step": 13050
1853
+ },
1854
+ {
1855
+ "epoch": 8.85658750950731,
1856
+ "grad_norm": 0.04120711609721184,
1857
+ "learning_rate": 7.016917236098581e-06,
1858
+ "loss": 10.3284,
1859
+ "step": 13100
1860
+ },
1861
+ {
1862
+ "epoch": 8.890391278627567,
1863
+ "grad_norm": 0.0413849912583828,
1864
+ "learning_rate": 6.612300932170353e-06,
1865
+ "loss": 10.3283,
1866
+ "step": 13150
1867
+ },
1868
+ {
1869
+ "epoch": 8.924195047747824,
1870
+ "grad_norm": 0.04532284289598465,
1871
+ "learning_rate": 6.21933271652917e-06,
1872
+ "loss": 10.3282,
1873
+ "step": 13200
1874
+ },
1875
+ {
1876
+ "epoch": 8.95799881686808,
1877
+ "grad_norm": 0.04391855373978615,
1878
+ "learning_rate": 5.8380575144065715e-06,
1879
+ "loss": 10.3282,
1880
+ "step": 13250
1881
+ },
1882
+ {
1883
+ "epoch": 8.991802585988339,
1884
+ "grad_norm": 0.04782425984740257,
1885
+ "learning_rate": 5.4685189142558565e-06,
1886
+ "loss": 10.3278,
1887
+ "step": 13300
1888
+ },
1889
+ {
1890
+ "epoch": 9.025606355108595,
1891
+ "grad_norm": 0.06290362030267715,
1892
+ "learning_rate": 5.1107591627690535e-06,
1893
+ "loss": 10.5103,
1894
+ "step": 13350
1895
+ },
1896
+ {
1897
+ "epoch": 9.059410124228851,
1898
+ "grad_norm": 0.058358874171972275,
1899
+ "learning_rate": 4.764819160047029e-06,
1900
+ "loss": 10.331,
1901
+ "step": 13400
1902
+ },
1903
+ {
1904
+ "epoch": 9.093213893349109,
1905
+ "grad_norm": 0.061768610030412674,
1906
+ "learning_rate": 4.4307384549237645e-06,
1907
+ "loss": 10.3337,
1908
+ "step": 13450
1909
+ },
1910
+ {
1911
+ "epoch": 9.127017662469365,
1912
+ "grad_norm": 0.05595018342137337,
1913
+ "learning_rate": 4.108555240444934e-06,
1914
+ "loss": 10.3171,
1915
+ "step": 13500
1916
+ },
1917
+ {
1918
+ "epoch": 9.160821431589623,
1919
+ "grad_norm": 0.05736524239182472,
1920
+ "learning_rate": 3.7983063495017033e-06,
1921
+ "loss": 10.3269,
1922
+ "step": 13550
1923
+ },
1924
+ {
1925
+ "epoch": 9.19462520070988,
1926
+ "grad_norm": 0.05978742614388466,
1927
+ "learning_rate": 3.500027250619767e-06,
1928
+ "loss": 10.3274,
1929
+ "step": 13600
1930
+ },
1931
+ {
1932
+ "epoch": 9.228428969830135,
1933
+ "grad_norm": 0.06201018765568733,
1934
+ "learning_rate": 3.2137520439045086e-06,
1935
+ "loss": 10.3289,
1936
+ "step": 13650
1937
+ },
1938
+ {
1939
+ "epoch": 9.262232738950393,
1940
+ "grad_norm": 0.057245831936597824,
1941
+ "learning_rate": 2.9395134571426653e-06,
1942
+ "loss": 10.328,
1943
+ "step": 13700
1944
+ },
1945
+ {
1946
+ "epoch": 9.29603650807065,
1947
+ "grad_norm": 0.05930756404995918,
1948
+ "learning_rate": 2.6773428420606484e-06,
1949
+ "loss": 10.3296,
1950
+ "step": 13750
1951
+ },
1952
+ {
1953
+ "epoch": 9.329840277190907,
1954
+ "grad_norm": 0.0608067624270916,
1955
+ "learning_rate": 2.4272701707404724e-06,
1956
+ "loss": 10.3306,
1957
+ "step": 13800
1958
+ },
1959
+ {
1960
+ "epoch": 9.363644046311164,
1961
+ "grad_norm": 0.05809257924556732,
1962
+ "learning_rate": 2.1893240321931975e-06,
1963
+ "loss": 10.3233,
1964
+ "step": 13850
1965
+ },
1966
+ {
1967
+ "epoch": 9.39744781543142,
1968
+ "grad_norm": 0.061754010617733,
1969
+ "learning_rate": 1.963531629090508e-06,
1970
+ "loss": 10.332,
1971
+ "step": 13900
1972
+ },
1973
+ {
1974
+ "epoch": 9.431251584551678,
1975
+ "grad_norm": 0.06163324415683746,
1976
+ "learning_rate": 1.749918774654905e-06,
1977
+ "loss": 10.3353,
1978
+ "step": 13950
1979
+ },
1980
+ {
1981
+ "epoch": 9.465055353671934,
1982
+ "grad_norm": 0.06020689383149147,
1983
+ "learning_rate": 1.548509889708657e-06,
1984
+ "loss": 10.3174,
1985
+ "step": 14000
1986
+ },
1987
+ {
1988
+ "epoch": 9.498859122792192,
1989
+ "grad_norm": 0.05851924419403076,
1990
+ "learning_rate": 1.3593279998818836e-06,
1991
+ "loss": 10.3277,
1992
+ "step": 14050
1993
+ },
1994
+ {
1995
+ "epoch": 9.532662891912448,
1996
+ "grad_norm": 0.054948098957538605,
1997
+ "learning_rate": 1.182394732980232e-06,
1998
+ "loss": 10.3299,
1999
+ "step": 14100
2000
+ },
2001
+ {
2002
+ "epoch": 9.566466661032706,
2003
+ "grad_norm": 0.06071977689862251,
2004
+ "learning_rate": 1.0177303165123553e-06,
2005
+ "loss": 10.3287,
2006
+ "step": 14150
2007
+ },
2008
+ {
2009
+ "epoch": 9.600270430152962,
2010
+ "grad_norm": 0.06051783263683319,
2011
+ "learning_rate": 8.653535753774341e-07,
2012
+ "loss": 10.3278,
2013
+ "step": 14200
2014
+ },
2015
+ {
2016
+ "epoch": 9.634074199273218,
2017
+ "grad_norm": 0.060153182595968246,
2018
+ "learning_rate": 7.252819297130155e-07,
2019
+ "loss": 10.3266,
2020
+ "step": 14250
2021
+ },
2022
+ {
2023
+ "epoch": 9.667877968393476,
2024
+ "grad_norm": 0.06272024661302567,
2025
+ "learning_rate": 5.975313929035335e-07,
2026
+ "loss": 10.339,
2027
+ "step": 14300
2028
+ },
2029
+ {
2030
+ "epoch": 9.701681737513733,
2031
+ "grad_norm": 0.06360357999801636,
2032
+ "learning_rate": 4.821165697496309e-07,
2033
+ "loss": 10.3309,
2034
+ "step": 14350
2035
+ },
2036
+ {
2037
+ "epoch": 9.73548550663399,
2038
+ "grad_norm": 0.059037111699581146,
2039
+ "learning_rate": 3.7905065479846173e-07,
2040
+ "loss": 10.3234,
2041
+ "step": 14400
2042
+ },
2043
+ {
2044
+ "epoch": 9.769289275754247,
2045
+ "grad_norm": 0.0558004267513752,
2046
+ "learning_rate": 2.883454308352653e-07,
2047
+ "loss": 10.3263,
2048
+ "step": 14450
2049
+ },
2050
+ {
2051
+ "epoch": 9.803093044874503,
2052
+ "grad_norm": 0.05941386893391609,
2053
+ "learning_rate": 2.100112675363192e-07,
2054
+ "loss": 10.3226,
2055
+ "step": 14500
2056
+ },
2057
+ {
2058
+ "epoch": 9.83689681399476,
2059
+ "grad_norm": 0.0531686469912529,
2060
+ "learning_rate": 1.4405712028346407e-07,
2061
+ "loss": 10.3312,
2062
+ "step": 14550
2063
+ },
2064
+ {
2065
+ "epoch": 9.870700583115017,
2066
+ "grad_norm": 0.05972735211253166,
2067
+ "learning_rate": 9.049052914030886e-08,
2068
+ "loss": 10.328,
2069
+ "step": 14600
2070
+ },
2071
+ {
2072
+ "epoch": 9.904504352235275,
2073
+ "grad_norm": 0.06279122829437256,
2074
+ "learning_rate": 4.93176179901767e-08,
2075
+ "loss": 10.3326,
2076
+ "step": 14650
2077
+ },
2078
+ {
2079
+ "epoch": 9.938308121355531,
2080
+ "grad_norm": 0.05874359980225563,
2081
+ "learning_rate": 2.054309383605618e-08,
2082
+ "loss": 10.3212,
2083
+ "step": 14700
2084
+ },
2085
+ {
2086
+ "epoch": 9.972111890475787,
2087
+ "grad_norm": 0.060345474630594254,
2088
+ "learning_rate": 4.170246262474031e-09,
2089
+ "loss": 10.3283,
2090
+ "step": 14750
2091
  }
2092
  ],
2093
  "logging_steps": 50,
 
2111
  "should_evaluate": false,
2112
  "should_log": false,
2113
  "should_save": true,
2114
+ "should_training_stop": true
2115
  },
2116
  "attributes": {}
2117
  }
2118
  },
2119
+ "total_flos": 2158745813876736.0,
2120
  "train_batch_size": 4,
2121
  "trial_name": null,
2122
  "trial_params": null