LegrandFrederic's picture
Upload trainer_state.json with huggingface_hub
5364dd3 verified
raw
history blame
24.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1370,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.072992700729927,
"grad_norm": 5.749124526977539,
"learning_rate": 1.4492753623188407e-05,
"loss": 0.9214,
"step": 10
},
{
"epoch": 0.145985401459854,
"grad_norm": 2.569692611694336,
"learning_rate": 2.8985507246376814e-05,
"loss": 0.506,
"step": 20
},
{
"epoch": 0.21897810218978103,
"grad_norm": 1.676221489906311,
"learning_rate": 4.347826086956522e-05,
"loss": 0.3337,
"step": 30
},
{
"epoch": 0.291970802919708,
"grad_norm": 1.1421884298324585,
"learning_rate": 5.797101449275363e-05,
"loss": 0.2416,
"step": 40
},
{
"epoch": 0.36496350364963503,
"grad_norm": 0.7032585144042969,
"learning_rate": 7.246376811594203e-05,
"loss": 0.1709,
"step": 50
},
{
"epoch": 0.43795620437956206,
"grad_norm": 0.8790648579597473,
"learning_rate": 8.695652173913044e-05,
"loss": 0.1541,
"step": 60
},
{
"epoch": 0.5109489051094891,
"grad_norm": 0.9456900358200073,
"learning_rate": 9.999985422436231e-05,
"loss": 0.1368,
"step": 70
},
{
"epoch": 0.583941605839416,
"grad_norm": 0.8083729147911072,
"learning_rate": 9.998236217634196e-05,
"loss": 0.1253,
"step": 80
},
{
"epoch": 0.656934306569343,
"grad_norm": 0.5535995960235596,
"learning_rate": 9.993572668745786e-05,
"loss": 0.1117,
"step": 90
},
{
"epoch": 0.7299270072992701,
"grad_norm": 0.7210536599159241,
"learning_rate": 9.985997494967441e-05,
"loss": 0.0905,
"step": 100
},
{
"epoch": 0.8029197080291971,
"grad_norm": 0.5236138105392456,
"learning_rate": 9.975515113189827e-05,
"loss": 0.0917,
"step": 110
},
{
"epoch": 0.8759124087591241,
"grad_norm": 0.4034636318683624,
"learning_rate": 9.962131635422462e-05,
"loss": 0.0802,
"step": 120
},
{
"epoch": 0.948905109489051,
"grad_norm": 0.6598319411277771,
"learning_rate": 9.945854865229965e-05,
"loss": 0.0744,
"step": 130
},
{
"epoch": 1.0218978102189782,
"grad_norm": 0.6162580847740173,
"learning_rate": 9.926694293181986e-05,
"loss": 0.0808,
"step": 140
},
{
"epoch": 1.094890510948905,
"grad_norm": 0.5971336960792542,
"learning_rate": 9.904661091319503e-05,
"loss": 0.0765,
"step": 150
},
{
"epoch": 1.167883211678832,
"grad_norm": 0.7087730765342712,
"learning_rate": 9.879768106640687e-05,
"loss": 0.0713,
"step": 160
},
{
"epoch": 1.2408759124087592,
"grad_norm": 0.6969349980354309,
"learning_rate": 9.852029853610148e-05,
"loss": 0.0665,
"step": 170
},
{
"epoch": 1.313868613138686,
"grad_norm": 0.6811894178390503,
"learning_rate": 9.821462505695917e-05,
"loss": 0.0631,
"step": 180
},
{
"epoch": 1.3868613138686132,
"grad_norm": 0.34767240285873413,
"learning_rate": 9.788083885939116e-05,
"loss": 0.064,
"step": 190
},
{
"epoch": 1.4598540145985401,
"grad_norm": 0.3470080494880676,
"learning_rate": 9.75191345656179e-05,
"loss": 0.0557,
"step": 200
},
{
"epoch": 1.5328467153284673,
"grad_norm": 0.39414817094802856,
"learning_rate": 9.712972307618981e-05,
"loss": 0.0627,
"step": 210
},
{
"epoch": 1.6058394160583942,
"grad_norm": 0.5173900723457336,
"learning_rate": 9.671283144701663e-05,
"loss": 0.0573,
"step": 220
},
{
"epoch": 1.6788321167883211,
"grad_norm": 0.34683912992477417,
"learning_rate": 9.626870275697682e-05,
"loss": 0.0603,
"step": 230
},
{
"epoch": 1.7518248175182483,
"grad_norm": 0.5500655174255371,
"learning_rate": 9.579759596618454e-05,
"loss": 0.062,
"step": 240
},
{
"epoch": 1.8248175182481752,
"grad_norm": 0.3647141456604004,
"learning_rate": 9.529978576499652e-05,
"loss": 0.0504,
"step": 250
},
{
"epoch": 1.897810218978102,
"grad_norm": 0.3856804072856903,
"learning_rate": 9.477556241384724e-05,
"loss": 0.0543,
"step": 260
},
{
"epoch": 1.9708029197080292,
"grad_norm": 0.35589075088500977,
"learning_rate": 9.422523157400533e-05,
"loss": 0.0496,
"step": 270
},
{
"epoch": 2.0437956204379564,
"grad_norm": 0.37966451048851013,
"learning_rate": 9.36491141293504e-05,
"loss": 0.0475,
"step": 280
},
{
"epoch": 2.116788321167883,
"grad_norm": 0.4046926200389862,
"learning_rate": 9.304754599927377e-05,
"loss": 0.0466,
"step": 290
},
{
"epoch": 2.18978102189781,
"grad_norm": 0.5988451242446899,
"learning_rate": 9.242087794281243e-05,
"loss": 0.0529,
"step": 300
},
{
"epoch": 2.2627737226277373,
"grad_norm": 0.3989510238170624,
"learning_rate": 9.176947535413046e-05,
"loss": 0.0509,
"step": 310
},
{
"epoch": 2.335766423357664,
"grad_norm": 0.4888112246990204,
"learning_rate": 9.10937180494669e-05,
"loss": 0.0405,
"step": 320
},
{
"epoch": 2.408759124087591,
"grad_norm": 0.5207938551902771,
"learning_rate": 9.039400004567469e-05,
"loss": 0.0472,
"step": 330
},
{
"epoch": 2.4817518248175183,
"grad_norm": 0.41739702224731445,
"learning_rate": 8.967072933047945e-05,
"loss": 0.0523,
"step": 340
},
{
"epoch": 2.554744525547445,
"grad_norm": 0.5469692349433899,
"learning_rate": 8.892432762459221e-05,
"loss": 0.0466,
"step": 350
},
{
"epoch": 2.627737226277372,
"grad_norm": 0.44405660033226013,
"learning_rate": 8.815523013581488e-05,
"loss": 0.0475,
"step": 360
},
{
"epoch": 2.7007299270072993,
"grad_norm": 0.42182251811027527,
"learning_rate": 8.736388530528162e-05,
"loss": 0.0423,
"step": 370
},
{
"epoch": 2.7737226277372264,
"grad_norm": 0.323812335729599,
"learning_rate": 8.655075454598426e-05,
"loss": 0.0421,
"step": 380
},
{
"epoch": 2.846715328467153,
"grad_norm": 0.44694793224334717,
"learning_rate": 8.571631197373422e-05,
"loss": 0.0447,
"step": 390
},
{
"epoch": 2.9197080291970803,
"grad_norm": 0.2432471662759781,
"learning_rate": 8.486104413071755e-05,
"loss": 0.0438,
"step": 400
},
{
"epoch": 2.9927007299270074,
"grad_norm": 0.42758315801620483,
"learning_rate": 8.398544970180469e-05,
"loss": 0.0466,
"step": 410
},
{
"epoch": 3.065693430656934,
"grad_norm": 0.3887377679347992,
"learning_rate": 8.309003922377996e-05,
"loss": 0.04,
"step": 420
},
{
"epoch": 3.1386861313868613,
"grad_norm": 0.33644479513168335,
"learning_rate": 8.217533478766068e-05,
"loss": 0.0455,
"step": 430
},
{
"epoch": 3.2116788321167884,
"grad_norm": 0.29482313990592957,
"learning_rate": 8.124186973427911e-05,
"loss": 0.0398,
"step": 440
},
{
"epoch": 3.2846715328467155,
"grad_norm": 0.46486881375312805,
"learning_rate": 8.029018834330506e-05,
"loss": 0.0432,
"step": 450
},
{
"epoch": 3.3576642335766422,
"grad_norm": 0.4706019163131714,
"learning_rate": 7.932084551589027e-05,
"loss": 0.036,
"step": 460
},
{
"epoch": 3.4306569343065694,
"grad_norm": 0.502895176410675,
"learning_rate": 7.833440645111975e-05,
"loss": 0.0372,
"step": 470
},
{
"epoch": 3.5036496350364965,
"grad_norm": 0.37358635663986206,
"learning_rate": 7.733144631645852e-05,
"loss": 0.0415,
"step": 480
},
{
"epoch": 3.576642335766423,
"grad_norm": 0.38203373551368713,
"learning_rate": 7.631254991238621e-05,
"loss": 0.0362,
"step": 490
},
{
"epoch": 3.6496350364963503,
"grad_norm": 0.38717857003211975,
"learning_rate": 7.527831133141476e-05,
"loss": 0.0416,
"step": 500
},
{
"epoch": 3.7226277372262775,
"grad_norm": 0.32451218366622925,
"learning_rate": 7.422933361168825e-05,
"loss": 0.0378,
"step": 510
},
{
"epoch": 3.795620437956204,
"grad_norm": 0.3965657651424408,
"learning_rate": 7.316622838536673e-05,
"loss": 0.0374,
"step": 520
},
{
"epoch": 3.8686131386861313,
"grad_norm": 0.5710194110870361,
"learning_rate": 7.208961552199913e-05,
"loss": 0.0372,
"step": 530
},
{
"epoch": 3.9416058394160585,
"grad_norm": 0.3017335832118988,
"learning_rate": 7.100012276709302e-05,
"loss": 0.0411,
"step": 540
},
{
"epoch": 4.014598540145985,
"grad_norm": 0.42834600806236267,
"learning_rate": 6.98983853760924e-05,
"loss": 0.0324,
"step": 550
},
{
"epoch": 4.087591240875913,
"grad_norm": 0.2624529302120209,
"learning_rate": 6.878504574397626e-05,
"loss": 0.0362,
"step": 560
},
{
"epoch": 4.160583941605839,
"grad_norm": 0.3475762605667114,
"learning_rate": 6.766075303069459e-05,
"loss": 0.0398,
"step": 570
},
{
"epoch": 4.233576642335766,
"grad_norm": 0.5206601023674011,
"learning_rate": 6.65261627826597e-05,
"loss": 0.0367,
"step": 580
},
{
"epoch": 4.306569343065694,
"grad_norm": 0.4349361062049866,
"learning_rate": 6.538193655051381e-05,
"loss": 0.0365,
"step": 590
},
{
"epoch": 4.37956204379562,
"grad_norm": 0.5204529166221619,
"learning_rate": 6.422874150339579e-05,
"loss": 0.0358,
"step": 600
},
{
"epoch": 4.452554744525547,
"grad_norm": 0.27471956610679626,
"learning_rate": 6.30672500399318e-05,
"loss": 0.0336,
"step": 610
},
{
"epoch": 4.525547445255475,
"grad_norm": 0.3909718990325928,
"learning_rate": 6.189813939617682e-05,
"loss": 0.0363,
"step": 620
},
{
"epoch": 4.598540145985401,
"grad_norm": 0.24090121686458588,
"learning_rate": 6.072209125073561e-05,
"loss": 0.0363,
"step": 630
},
{
"epoch": 4.671532846715328,
"grad_norm": 0.3554544448852539,
"learning_rate": 5.95397913272932e-05,
"loss": 0.0359,
"step": 640
},
{
"epoch": 4.744525547445256,
"grad_norm": 0.31269019842147827,
"learning_rate": 5.8351928994787006e-05,
"loss": 0.0405,
"step": 650
},
{
"epoch": 4.817518248175182,
"grad_norm": 0.40918299555778503,
"learning_rate": 5.7159196865453294e-05,
"loss": 0.0361,
"step": 660
},
{
"epoch": 4.89051094890511,
"grad_norm": 0.522287130355835,
"learning_rate": 5.596229039098271e-05,
"loss": 0.0336,
"step": 670
},
{
"epoch": 4.963503649635037,
"grad_norm": 0.48848339915275574,
"learning_rate": 5.4761907457020077e-05,
"loss": 0.0317,
"step": 680
},
{
"epoch": 5.036496350364963,
"grad_norm": 0.4906352162361145,
"learning_rate": 5.355874797624515e-05,
"loss": 0.0295,
"step": 690
},
{
"epoch": 5.109489051094891,
"grad_norm": 0.24832546710968018,
"learning_rate": 5.235351348027129e-05,
"loss": 0.0299,
"step": 700
},
{
"epoch": 5.182481751824818,
"grad_norm": 0.3912707567214966,
"learning_rate": 5.1146906710600306e-05,
"loss": 0.0306,
"step": 710
},
{
"epoch": 5.255474452554744,
"grad_norm": 0.23428483307361603,
"learning_rate": 4.993963120887183e-05,
"loss": 0.0312,
"step": 720
},
{
"epoch": 5.328467153284672,
"grad_norm": 0.39608365297317505,
"learning_rate": 4.8732390906646097e-05,
"loss": 0.0323,
"step": 730
},
{
"epoch": 5.401459854014599,
"grad_norm": 0.24625132977962494,
"learning_rate": 4.75258897149594e-05,
"loss": 0.0346,
"step": 740
},
{
"epoch": 5.474452554744525,
"grad_norm": 0.4109422564506531,
"learning_rate": 4.632083111389153e-05,
"loss": 0.0305,
"step": 750
},
{
"epoch": 5.547445255474453,
"grad_norm": 0.4575289487838745,
"learning_rate": 4.5117917742384456e-05,
"loss": 0.0335,
"step": 760
},
{
"epoch": 5.62043795620438,
"grad_norm": 0.3231428265571594,
"learning_rate": 4.391785098855156e-05,
"loss": 0.032,
"step": 770
},
{
"epoch": 5.693430656934306,
"grad_norm": 0.22695614397525787,
"learning_rate": 4.272133058071595e-05,
"loss": 0.0341,
"step": 780
},
{
"epoch": 5.766423357664234,
"grad_norm": 0.3635699450969696,
"learning_rate": 4.1529054179416875e-05,
"loss": 0.0303,
"step": 790
},
{
"epoch": 5.839416058394161,
"grad_norm": 0.44931599497795105,
"learning_rate": 4.034171697062157e-05,
"loss": 0.0312,
"step": 800
},
{
"epoch": 5.912408759124087,
"grad_norm": 0.38062986731529236,
"learning_rate": 3.916001126038008e-05,
"loss": 0.0298,
"step": 810
},
{
"epoch": 5.985401459854015,
"grad_norm": 0.44949817657470703,
"learning_rate": 3.7984626071159224e-05,
"loss": 0.0301,
"step": 820
},
{
"epoch": 6.0583941605839415,
"grad_norm": 0.2569969892501831,
"learning_rate": 3.681624674009121e-05,
"loss": 0.03,
"step": 830
},
{
"epoch": 6.131386861313868,
"grad_norm": 0.3157390356063843,
"learning_rate": 3.5655554519370956e-05,
"loss": 0.0303,
"step": 840
},
{
"epoch": 6.204379562043796,
"grad_norm": 0.27934950590133667,
"learning_rate": 3.450322617903543e-05,
"loss": 0.0281,
"step": 850
},
{
"epoch": 6.2773722627737225,
"grad_norm": 0.2728220224380493,
"learning_rate": 3.3359933612356156e-05,
"loss": 0.0287,
"step": 860
},
{
"epoch": 6.350364963503649,
"grad_norm": 0.27907413244247437,
"learning_rate": 3.2226343444075465e-05,
"loss": 0.0268,
"step": 870
},
{
"epoch": 6.423357664233577,
"grad_norm": 0.2804538309574127,
"learning_rate": 3.110311664171458e-05,
"loss": 0.03,
"step": 880
},
{
"epoch": 6.4963503649635035,
"grad_norm": 0.5569368600845337,
"learning_rate": 2.999090813018035e-05,
"loss": 0.028,
"step": 890
},
{
"epoch": 6.569343065693431,
"grad_norm": 0.7178159952163696,
"learning_rate": 2.8890366409895148e-05,
"loss": 0.0325,
"step": 900
},
{
"epoch": 6.642335766423358,
"grad_norm": 0.19943037629127502,
"learning_rate": 2.780213317867292e-05,
"loss": 0.025,
"step": 910
},
{
"epoch": 6.7153284671532845,
"grad_norm": 0.1796846240758896,
"learning_rate": 2.672684295756147e-05,
"loss": 0.0257,
"step": 920
},
{
"epoch": 6.788321167883212,
"grad_norm": 0.23872807621955872,
"learning_rate": 2.566512272086945e-05,
"loss": 0.0304,
"step": 930
},
{
"epoch": 6.861313868613139,
"grad_norm": 0.2793905735015869,
"learning_rate": 2.4617591530593613e-05,
"loss": 0.0245,
"step": 940
},
{
"epoch": 6.934306569343065,
"grad_norm": 0.2569703161716461,
"learning_rate": 2.3584860175459584e-05,
"loss": 0.0269,
"step": 950
},
{
"epoch": 7.007299270072993,
"grad_norm": 0.27794763445854187,
"learning_rate": 2.2567530814786463e-05,
"loss": 0.0287,
"step": 960
},
{
"epoch": 7.08029197080292,
"grad_norm": 0.237870454788208,
"learning_rate": 2.156619662738319e-05,
"loss": 0.0261,
"step": 970
},
{
"epoch": 7.153284671532846,
"grad_norm": 0.3608488142490387,
"learning_rate": 2.0581441465680986e-05,
"loss": 0.0259,
"step": 980
},
{
"epoch": 7.226277372262774,
"grad_norm": 0.3045063316822052,
"learning_rate": 1.961383951530394e-05,
"loss": 0.0286,
"step": 990
},
{
"epoch": 7.299270072992701,
"grad_norm": 0.2131444364786148,
"learning_rate": 1.866395496027602e-05,
"loss": 0.0254,
"step": 1000
},
{
"epoch": 7.372262773722627,
"grad_norm": 0.25216737389564514,
"learning_rate": 1.7732341654059785e-05,
"loss": 0.0222,
"step": 1010
},
{
"epoch": 7.445255474452555,
"grad_norm": 0.2963661849498749,
"learning_rate": 1.6819542796618487e-05,
"loss": 0.0292,
"step": 1020
},
{
"epoch": 7.518248175182482,
"grad_norm": 0.4280974864959717,
"learning_rate": 1.592609061769004e-05,
"loss": 0.0263,
"step": 1030
},
{
"epoch": 7.591240875912408,
"grad_norm": 0.23306907713413239,
"learning_rate": 1.5052506066457461e-05,
"loss": 0.0211,
"step": 1040
},
{
"epoch": 7.664233576642336,
"grad_norm": 0.2898852825164795,
"learning_rate": 1.4199298507796698e-05,
"loss": 0.0209,
"step": 1050
},
{
"epoch": 7.737226277372263,
"grad_norm": 0.22745266556739807,
"learning_rate": 1.3366965425278899e-05,
"loss": 0.0218,
"step": 1060
},
{
"epoch": 7.81021897810219,
"grad_norm": 0.244761660695076,
"learning_rate": 1.2555992131100457e-05,
"loss": 0.0213,
"step": 1070
},
{
"epoch": 7.883211678832117,
"grad_norm": 0.3487051725387573,
"learning_rate": 1.1766851483109858e-05,
"loss": 0.0251,
"step": 1080
},
{
"epoch": 7.956204379562044,
"grad_norm": 0.38351985812187195,
"learning_rate": 1.1000003609096337e-05,
"loss": 0.0233,
"step": 1090
},
{
"epoch": 8.02919708029197,
"grad_norm": 0.2925453782081604,
"learning_rate": 1.0255895638501045e-05,
"loss": 0.0258,
"step": 1100
},
{
"epoch": 8.102189781021897,
"grad_norm": 0.2964305281639099,
"learning_rate": 9.534961441707307e-06,
"loss": 0.0193,
"step": 1110
},
{
"epoch": 8.175182481751825,
"grad_norm": 0.4270758628845215,
"learning_rate": 8.837621377061877e-06,
"loss": 0.0223,
"step": 1120
},
{
"epoch": 8.248175182481752,
"grad_norm": 0.22104066610336304,
"learning_rate": 8.16428204577468e-06,
"loss": 0.0217,
"step": 1130
},
{
"epoch": 8.321167883211679,
"grad_norm": 0.24666981399059296,
"learning_rate": 7.515336054840022e-06,
"loss": 0.0254,
"step": 1140
},
{
"epoch": 8.394160583941606,
"grad_norm": 0.16614589095115662,
"learning_rate": 6.8911617881174725e-06,
"loss": 0.02,
"step": 1150
},
{
"epoch": 8.467153284671532,
"grad_norm": 0.2146356999874115,
"learning_rate": 6.292123185705867e-06,
"loss": 0.024,
"step": 1160
},
{
"epoch": 8.540145985401459,
"grad_norm": 0.1602909117937088,
"learning_rate": 5.718569531739154e-06,
"loss": 0.0267,
"step": 1170
},
{
"epoch": 8.613138686131387,
"grad_norm": 0.4400380849838257,
"learning_rate": 5.170835250727663e-06,
"loss": 0.0231,
"step": 1180
},
{
"epoch": 8.686131386861314,
"grad_norm": 0.18121108412742615,
"learning_rate": 4.6492397125637525e-06,
"loss": 0.0192,
"step": 1190
},
{
"epoch": 8.75912408759124,
"grad_norm": 0.26767614483833313,
"learning_rate": 4.154087046305322e-06,
"loss": 0.0249,
"step": 1200
},
{
"epoch": 8.832116788321168,
"grad_norm": 0.2408941090106964,
"learning_rate": 3.6856659628459912e-06,
"loss": 0.0214,
"step": 1210
},
{
"epoch": 8.905109489051094,
"grad_norm": 0.2535434663295746,
"learning_rate": 3.244249586575038e-06,
"loss": 0.0203,
"step": 1220
},
{
"epoch": 8.978102189781023,
"grad_norm": 0.6523481011390686,
"learning_rate": 2.830095296125612e-06,
"loss": 0.0222,
"step": 1230
},
{
"epoch": 9.05109489051095,
"grad_norm": 0.16764762997627258,
"learning_rate": 2.4434445743037713e-06,
"loss": 0.0225,
"step": 1240
},
{
"epoch": 9.124087591240876,
"grad_norm": 0.35821276903152466,
"learning_rate": 2.0845228672860538e-06,
"loss": 0.0206,
"step": 1250
},
{
"epoch": 9.197080291970803,
"grad_norm": 0.38857728242874146,
"learning_rate": 1.7535394531675187e-06,
"loss": 0.0222,
"step": 1260
},
{
"epoch": 9.27007299270073,
"grad_norm": 0.2351900190114975,
"learning_rate": 1.4506873199370497e-06,
"loss": 0.021,
"step": 1270
},
{
"epoch": 9.343065693430656,
"grad_norm": 0.33225876092910767,
"learning_rate": 1.1761430529509899e-06,
"loss": 0.0258,
"step": 1280
},
{
"epoch": 9.416058394160585,
"grad_norm": 0.2640361785888672,
"learning_rate": 9.300667319706857e-07,
"loss": 0.0202,
"step": 1290
},
{
"epoch": 9.489051094890511,
"grad_norm": 0.15353083610534668,
"learning_rate": 7.126018378241062e-07,
"loss": 0.0215,
"step": 1300
},
{
"epoch": 9.562043795620438,
"grad_norm": 0.33280253410339355,
"learning_rate": 5.238751687458021e-07,
"loss": 0.0217,
"step": 1310
},
{
"epoch": 9.635036496350365,
"grad_norm": 0.18690501153469086,
"learning_rate": 3.639967664440802e-07,
"loss": 0.0194,
"step": 1320
},
{
"epoch": 9.708029197080291,
"grad_norm": 0.6559237837791443,
"learning_rate": 2.3305985193852742e-07,
"loss": 0.0242,
"step": 1330
},
{
"epoch": 9.78102189781022,
"grad_norm": 0.3035587966442108,
"learning_rate": 1.3114077120517376e-07,
"loss": 0.0271,
"step": 1340
},
{
"epoch": 9.854014598540147,
"grad_norm": 0.30467328429222107,
"learning_rate": 5.8298950661112017e-08,
"loss": 0.0187,
"step": 1350
},
{
"epoch": 9.927007299270073,
"grad_norm": 0.22451713681221008,
"learning_rate": 1.4576862514487089e-08,
"loss": 0.022,
"step": 1360
},
{
"epoch": 10.0,
"grad_norm": 0.33209139108657837,
"learning_rate": 0.0,
"loss": 0.0227,
"step": 1370
},
{
"epoch": 10.0,
"step": 1370,
"total_flos": 1.9634851190645568e+17,
"train_loss": 0.05370959477485531,
"train_runtime": 1800.9203,
"train_samples_per_second": 48.631,
"train_steps_per_second": 0.761
}
],
"logging_steps": 10,
"max_steps": 1370,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9634851190645568e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}