Alvin-Nahabwe's picture
End of training
6e108a7 verified
raw
history blame
45.1 kB
{
"best_global_step": 286140,
"best_metric": 0.1884266993162269,
"best_model_checkpoint": "wav2vec2-asr-africa-base-fintuned-luganda-400hrs-v0.1/checkpoint-286140",
"epoch": 99.0,
"eval_steps": 500,
"global_step": 298188,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 7.066422462463379,
"learning_rate": 6.98140770252324e-05,
"loss": 3.2575,
"step": 3012
},
{
"epoch": 1.0,
"eval_cer": 0.10930492502565166,
"eval_loss": 0.37255266308784485,
"eval_runtime": 151.5408,
"eval_samples_per_second": 239.869,
"eval_steps_per_second": 7.496,
"eval_wer": 0.534090083733671,
"step": 3012
},
{
"epoch": 2.0,
"grad_norm": 15.221166610717773,
"learning_rate": 6.929480730277542e-05,
"loss": 0.8396,
"step": 6024
},
{
"epoch": 2.0,
"eval_cer": 0.07810481279107405,
"eval_loss": 0.2538328468799591,
"eval_runtime": 152.0166,
"eval_samples_per_second": 239.119,
"eval_steps_per_second": 7.473,
"eval_wer": 0.3983557626127949,
"step": 6024
},
{
"epoch": 3.0,
"grad_norm": 7.060754299163818,
"learning_rate": 6.858773659570472e-05,
"loss": 0.7487,
"step": 9036
},
{
"epoch": 3.0,
"eval_cer": 0.0682448584368034,
"eval_loss": 0.22769133746623993,
"eval_runtime": 153.0969,
"eval_samples_per_second": 237.431,
"eval_steps_per_second": 7.42,
"eval_wer": 0.35308562554689743,
"step": 9036
},
{
"epoch": 4.0,
"grad_norm": 9.472733497619629,
"learning_rate": 6.788090063986478e-05,
"loss": 0.7226,
"step": 12048
},
{
"epoch": 4.0,
"eval_cer": 0.06425062788093966,
"eval_loss": 0.21326717734336853,
"eval_runtime": 171.3308,
"eval_samples_per_second": 212.163,
"eval_steps_per_second": 6.63,
"eval_wer": 0.33642189872410777,
"step": 12048
},
{
"epoch": 5.0,
"grad_norm": 23.48148536682129,
"learning_rate": 6.717429943525561e-05,
"loss": 0.7096,
"step": 15060
},
{
"epoch": 5.0,
"eval_cer": 0.06122744648913919,
"eval_loss": 0.20842401683330536,
"eval_runtime": 174.4012,
"eval_samples_per_second": 208.427,
"eval_steps_per_second": 6.514,
"eval_wer": 0.3211001241651708,
"step": 15060
},
{
"epoch": 6.0,
"grad_norm": 7.9525041580200195,
"learning_rate": 6.646746347941567e-05,
"loss": 0.6979,
"step": 18072
},
{
"epoch": 6.0,
"eval_cer": 0.06395399482657282,
"eval_loss": 0.20914477109909058,
"eval_runtime": 153.4786,
"eval_samples_per_second": 236.841,
"eval_steps_per_second": 7.402,
"eval_wer": 0.3287402596055075,
"step": 18072
},
{
"epoch": 7.0,
"grad_norm": 43.115169525146484,
"learning_rate": 6.57608622748065e-05,
"loss": 0.6899,
"step": 21084
},
{
"epoch": 7.0,
"eval_cer": 0.06076026089037598,
"eval_loss": 0.20185638964176178,
"eval_runtime": 173.3256,
"eval_samples_per_second": 209.721,
"eval_steps_per_second": 6.554,
"eval_wer": 0.31623727652863237,
"step": 21084
},
{
"epoch": 8.0,
"grad_norm": 4.6848602294921875,
"learning_rate": 6.505426107019733e-05,
"loss": 0.6765,
"step": 24096
},
{
"epoch": 8.0,
"eval_cer": 0.060137193933831115,
"eval_loss": 0.19728189706802368,
"eval_runtime": 154.1162,
"eval_samples_per_second": 235.861,
"eval_steps_per_second": 7.371,
"eval_wer": 0.31059969356450884,
"step": 24096
},
{
"epoch": 9.0,
"grad_norm": 5.830124378204346,
"learning_rate": 6.434719036312661e-05,
"loss": 0.6701,
"step": 27108
},
{
"epoch": 9.0,
"eval_cer": 0.05820976679231927,
"eval_loss": 0.19281432032585144,
"eval_runtime": 156.5256,
"eval_samples_per_second": 232.23,
"eval_steps_per_second": 7.258,
"eval_wer": 0.30471654717065966,
"step": 27108
},
{
"epoch": 10.0,
"grad_norm": Infinity,
"learning_rate": 6.364058915851744e-05,
"loss": 0.6621,
"step": 30120
},
{
"epoch": 10.0,
"eval_cer": 0.058245986284274416,
"eval_loss": 0.19237777590751648,
"eval_runtime": 155.0055,
"eval_samples_per_second": 234.508,
"eval_steps_per_second": 7.329,
"eval_wer": 0.3038691804061135,
"step": 30120
},
{
"epoch": 11.0,
"grad_norm": 5.588762283325195,
"learning_rate": 6.29337532026775e-05,
"loss": 0.6554,
"step": 33132
},
{
"epoch": 11.0,
"eval_cer": 0.05662298626501848,
"eval_loss": 0.18665704131126404,
"eval_runtime": 153.969,
"eval_samples_per_second": 236.087,
"eval_steps_per_second": 7.378,
"eval_wer": 0.2982627252006876,
"step": 33132
},
{
"epoch": 12.0,
"grad_norm": 5.955714702606201,
"learning_rate": 6.222691724683756e-05,
"loss": 0.6475,
"step": 36144
},
{
"epoch": 12.0,
"eval_cer": 0.05515495065438077,
"eval_loss": 0.1829417496919632,
"eval_runtime": 155.1287,
"eval_samples_per_second": 234.322,
"eval_steps_per_second": 7.323,
"eval_wer": 0.2873610923768119,
"step": 36144
},
{
"epoch": 13.0,
"grad_norm": 31.985713958740234,
"learning_rate": 6.152008129099762e-05,
"loss": 0.6429,
"step": 39156
},
{
"epoch": 13.0,
"eval_cer": 0.05419032013446143,
"eval_loss": 0.1801947057247162,
"eval_runtime": 168.8898,
"eval_samples_per_second": 215.229,
"eval_steps_per_second": 6.726,
"eval_wer": 0.28527553254407173,
"step": 39156
},
{
"epoch": 14.0,
"grad_norm": 6.557770252227783,
"learning_rate": 6.081324533515768e-05,
"loss": 0.6351,
"step": 42168
},
{
"epoch": 14.0,
"eval_cer": 0.055327337097104,
"eval_loss": 0.18261073529720306,
"eval_runtime": 159.2183,
"eval_samples_per_second": 228.303,
"eval_steps_per_second": 7.135,
"eval_wer": 0.2872746263804296,
"step": 42168
},
{
"epoch": 15.0,
"grad_norm": 8.820505142211914,
"learning_rate": 6.010640937931774e-05,
"loss": 0.6319,
"step": 45180
},
{
"epoch": 15.0,
"eval_cer": 0.05439250742499585,
"eval_loss": 0.17926117777824402,
"eval_runtime": 152.0308,
"eval_samples_per_second": 239.096,
"eval_steps_per_second": 7.472,
"eval_wer": 0.28315884495263394,
"step": 45180
},
{
"epoch": 16.0,
"grad_norm": 8.058792114257812,
"learning_rate": 5.93995734234778e-05,
"loss": 0.6251,
"step": 48192
},
{
"epoch": 16.0,
"eval_cer": 0.054798715904391546,
"eval_loss": 0.1785019189119339,
"eval_runtime": 154.575,
"eval_samples_per_second": 235.161,
"eval_steps_per_second": 7.349,
"eval_wer": 0.283826362444705,
"step": 48192
},
{
"epoch": 17.0,
"grad_norm": 7.038857936859131,
"learning_rate": 5.86925027164071e-05,
"loss": 0.6172,
"step": 51204
},
{
"epoch": 17.0,
"eval_cer": 0.051710431121988164,
"eval_loss": 0.17091116309165955,
"eval_runtime": 154.6152,
"eval_samples_per_second": 235.1,
"eval_steps_per_second": 7.347,
"eval_wer": 0.27192518270265037,
"step": 51204
},
{
"epoch": 18.0,
"grad_norm": NaN,
"learning_rate": 5.7985901511797926e-05,
"loss": 0.6122,
"step": 54216
},
{
"epoch": 18.0,
"eval_cer": 0.05208454638066411,
"eval_loss": 0.1720370054244995,
"eval_runtime": 154.3969,
"eval_samples_per_second": 235.432,
"eval_steps_per_second": 7.358,
"eval_wer": 0.27160698783596365,
"step": 54216
},
{
"epoch": 19.0,
"grad_norm": 4.123114109039307,
"learning_rate": 5.727930030718875e-05,
"loss": 0.6068,
"step": 57228
},
{
"epoch": 19.0,
"eval_cer": 0.0505266497520111,
"eval_loss": 0.16939722001552582,
"eval_runtime": 154.2835,
"eval_samples_per_second": 235.605,
"eval_steps_per_second": 7.363,
"eval_wer": 0.26646744901100194,
"step": 57228
},
{
"epoch": 20.0,
"grad_norm": 11.014168739318848,
"learning_rate": 5.657222960011804e-05,
"loss": 0.6035,
"step": 60240
},
{
"epoch": 20.0,
"eval_cer": 0.049698644657441546,
"eval_loss": 0.1669510304927826,
"eval_runtime": 155.9471,
"eval_samples_per_second": 233.092,
"eval_steps_per_second": 7.285,
"eval_wer": 0.26278053892526226,
"step": 60240
},
{
"epoch": 21.0,
"grad_norm": 7.567544937133789,
"learning_rate": 5.5865158893047335e-05,
"loss": 0.5957,
"step": 63252
},
{
"epoch": 21.0,
"eval_cer": 0.050415698903237105,
"eval_loss": 0.1704263538122177,
"eval_runtime": 155.0829,
"eval_samples_per_second": 234.391,
"eval_steps_per_second": 7.325,
"eval_wer": 0.2643818891782618,
"step": 63252
},
{
"epoch": 22.0,
"grad_norm": 3.353114366531372,
"learning_rate": 5.5158557688438164e-05,
"loss": 0.5909,
"step": 66264
},
{
"epoch": 22.0,
"eval_cer": 0.049318569229203364,
"eval_loss": 0.16528591513633728,
"eval_runtime": 155.149,
"eval_samples_per_second": 234.291,
"eval_steps_per_second": 7.322,
"eval_wer": 0.25990640920551583,
"step": 66264
},
{
"epoch": 23.0,
"grad_norm": 5.273142337799072,
"learning_rate": 5.445172173259822e-05,
"loss": 0.5879,
"step": 69276
},
{
"epoch": 23.0,
"eval_cer": 0.048735389561267335,
"eval_loss": 0.16745983064174652,
"eval_runtime": 155.8132,
"eval_samples_per_second": 233.292,
"eval_steps_per_second": 7.291,
"eval_wer": 0.2573400984328903,
"step": 69276
},
{
"epoch": 24.0,
"grad_norm": 12.098519325256348,
"learning_rate": 5.374512052798905e-05,
"loss": 0.5966,
"step": 72288
},
{
"epoch": 24.0,
"eval_cer": 0.05103463958854657,
"eval_loss": 0.19431033730506897,
"eval_runtime": 154.0761,
"eval_samples_per_second": 235.922,
"eval_steps_per_second": 7.373,
"eval_wer": 0.2738551037419025,
"step": 72288
},
{
"epoch": 25.0,
"grad_norm": 12.023294448852539,
"learning_rate": 5.3038519323379875e-05,
"loss": 0.6444,
"step": 75300
},
{
"epoch": 25.0,
"eval_cer": 0.05154996501838942,
"eval_loss": 0.1868334412574768,
"eval_runtime": 152.5369,
"eval_samples_per_second": 238.303,
"eval_steps_per_second": 7.447,
"eval_wer": 0.27229871580702175,
"step": 75300
},
{
"epoch": 26.0,
"grad_norm": 9.066435813903809,
"learning_rate": 5.2331448616309165e-05,
"loss": 0.5999,
"step": 78312
},
{
"epoch": 26.0,
"eval_cer": 0.04910904634536157,
"eval_loss": 0.16771361231803894,
"eval_runtime": 154.4851,
"eval_samples_per_second": 235.298,
"eval_steps_per_second": 7.353,
"eval_wer": 0.25782430801263095,
"step": 78312
},
{
"epoch": 27.0,
"grad_norm": 9.274683952331543,
"learning_rate": 5.1624847411699994e-05,
"loss": 0.5911,
"step": 81324
},
{
"epoch": 27.0,
"eval_cer": 0.04738747429103783,
"eval_loss": 0.16794191300868988,
"eval_runtime": 154.6471,
"eval_samples_per_second": 235.051,
"eval_steps_per_second": 7.346,
"eval_wer": 0.25102462205712983,
"step": 81324
},
{
"epoch": 28.0,
"grad_norm": 5.650504112243652,
"learning_rate": 5.091777670462929e-05,
"loss": 0.586,
"step": 84336
},
{
"epoch": 28.0,
"eval_cer": 0.04840666328618075,
"eval_loss": 0.1722731739282608,
"eval_runtime": 153.0438,
"eval_samples_per_second": 237.514,
"eval_steps_per_second": 7.423,
"eval_wer": 0.25386416537832335,
"step": 84336
},
{
"epoch": 29.0,
"grad_norm": 18.539613723754883,
"learning_rate": 5.021070599755859e-05,
"loss": 0.5816,
"step": 87348
},
{
"epoch": 29.0,
"eval_cer": 0.04769969548118283,
"eval_loss": 0.16775010526180267,
"eval_runtime": 156.0962,
"eval_samples_per_second": 232.869,
"eval_steps_per_second": 7.278,
"eval_wer": 0.25264326550940575,
"step": 87348
},
{
"epoch": 30.0,
"grad_norm": 4.343358039855957,
"learning_rate": 4.950457429541094e-05,
"loss": 0.5886,
"step": 90360
},
{
"epoch": 30.0,
"eval_cer": 0.04993246669411401,
"eval_loss": 0.18236766755580902,
"eval_runtime": 151.7294,
"eval_samples_per_second": 239.571,
"eval_steps_per_second": 7.487,
"eval_wer": 0.2629396363586056,
"step": 90360
},
{
"epoch": 31.0,
"grad_norm": 13.675621032714844,
"learning_rate": 4.879773833957101e-05,
"loss": 0.5978,
"step": 93372
},
{
"epoch": 31.0,
"eval_cer": 0.04701886072734242,
"eval_loss": 0.16201142966747284,
"eval_runtime": 152.4808,
"eval_samples_per_second": 238.391,
"eval_steps_per_second": 7.45,
"eval_wer": 0.24908778373816712,
"step": 93372
},
{
"epoch": 32.0,
"grad_norm": 5.842775821685791,
"learning_rate": 4.809066763250029e-05,
"loss": 0.5722,
"step": 96384
},
{
"epoch": 32.0,
"eval_cer": 0.04652920987407537,
"eval_loss": 0.15837915241718292,
"eval_runtime": 153.9577,
"eval_samples_per_second": 236.104,
"eval_steps_per_second": 7.379,
"eval_wer": 0.24719590773732322,
"step": 96384
},
{
"epoch": 33.0,
"grad_norm": 12.179231643676758,
"learning_rate": 4.738359692542959e-05,
"loss": 0.5615,
"step": 99396
},
{
"epoch": 33.0,
"eval_cer": 0.046122542920097966,
"eval_loss": 0.15639054775238037,
"eval_runtime": 165.4748,
"eval_samples_per_second": 219.671,
"eval_steps_per_second": 6.865,
"eval_wer": 0.2421047898703356,
"step": 99396
},
{
"epoch": 34.0,
"grad_norm": 11.811565399169922,
"learning_rate": 4.6676760969589654e-05,
"loss": 0.5566,
"step": 102408
},
{
"epoch": 34.0,
"eval_cer": 0.04475858103950859,
"eval_loss": 0.15303878486156464,
"eval_runtime": 151.973,
"eval_samples_per_second": 239.187,
"eval_steps_per_second": 7.475,
"eval_wer": 0.23680615361203053,
"step": 102408
},
{
"epoch": 35.0,
"grad_norm": 27.314680099487305,
"learning_rate": 4.5970394516211246e-05,
"loss": 0.5514,
"step": 105420
},
{
"epoch": 35.0,
"eval_cer": 0.04322590051284967,
"eval_loss": 0.14988180994987488,
"eval_runtime": 152.0916,
"eval_samples_per_second": 239.001,
"eval_steps_per_second": 7.469,
"eval_wer": 0.2308780449000626,
"step": 105420
},
{
"epoch": 36.0,
"grad_norm": 6.874896049499512,
"learning_rate": 4.526332380914054e-05,
"loss": 0.5485,
"step": 108432
},
{
"epoch": 36.0,
"eval_cer": 0.043550500516700855,
"eval_loss": 0.15108104050159454,
"eval_runtime": 156.7097,
"eval_samples_per_second": 231.958,
"eval_steps_per_second": 7.249,
"eval_wer": 0.23083308258194382,
"step": 108432
},
{
"epoch": 37.0,
"grad_norm": 18.988279342651367,
"learning_rate": 4.4556722604531365e-05,
"loss": 0.5451,
"step": 111444
},
{
"epoch": 37.0,
"eval_cer": 0.04385080136772137,
"eval_loss": 0.15071320533752441,
"eval_runtime": 172.1252,
"eval_samples_per_second": 211.183,
"eval_steps_per_second": 6.6,
"eval_wer": 0.23185338133925454,
"step": 111444
},
{
"epoch": 38.0,
"grad_norm": 28.172042846679688,
"learning_rate": 4.384988664869143e-05,
"loss": 0.5433,
"step": 114456
},
{
"epoch": 38.0,
"eval_cer": 0.0433771971248142,
"eval_loss": 0.14824804663658142,
"eval_runtime": 152.6042,
"eval_samples_per_second": 238.198,
"eval_steps_per_second": 7.444,
"eval_wer": 0.23122390888559166,
"step": 114456
},
{
"epoch": 39.0,
"grad_norm": 4.653916835784912,
"learning_rate": 4.3143050692851484e-05,
"loss": 0.5391,
"step": 117468
},
{
"epoch": 39.0,
"eval_cer": 0.04353903865215809,
"eval_loss": 0.14684619009494781,
"eval_runtime": 156.6046,
"eval_samples_per_second": 232.113,
"eval_steps_per_second": 7.254,
"eval_wer": 0.22910722129415387,
"step": 117468
},
{
"epoch": 40.0,
"grad_norm": 5.8400702476501465,
"learning_rate": 4.243621473701154e-05,
"loss": 0.5347,
"step": 120480
},
{
"epoch": 40.0,
"eval_cer": 0.042970530170836796,
"eval_loss": 0.1462726891040802,
"eval_runtime": 178.4468,
"eval_samples_per_second": 203.702,
"eval_steps_per_second": 6.366,
"eval_wer": 0.22744707416361443,
"step": 120480
},
{
"epoch": 41.0,
"grad_norm": 16.060422897338867,
"learning_rate": 4.172914402994084e-05,
"loss": 0.5313,
"step": 123492
},
{
"epoch": 41.0,
"eval_cer": 0.04216269795786252,
"eval_loss": 0.14503081142902374,
"eval_runtime": 152.5692,
"eval_samples_per_second": 238.253,
"eval_steps_per_second": 7.446,
"eval_wer": 0.22400226886774507,
"step": 123492
},
{
"epoch": 42.0,
"grad_norm": 8.755998611450195,
"learning_rate": 4.102254282533167e-05,
"loss": 0.5291,
"step": 126504
},
{
"epoch": 42.0,
"eval_cer": 0.04194263015864137,
"eval_loss": 0.1446152627468109,
"eval_runtime": 152.4116,
"eval_samples_per_second": 238.499,
"eval_steps_per_second": 7.454,
"eval_wer": 0.224061065745285,
"step": 126504
},
{
"epoch": 43.0,
"grad_norm": 9.50841999053955,
"learning_rate": 4.031570686949173e-05,
"loss": 0.5269,
"step": 129516
},
{
"epoch": 43.0,
"eval_cer": 0.04270048864220919,
"eval_loss": 0.14530107378959656,
"eval_runtime": 153.731,
"eval_samples_per_second": 236.452,
"eval_steps_per_second": 7.39,
"eval_wer": 0.22547219080624353,
"step": 129516
},
{
"epoch": 44.0,
"grad_norm": 28.027828216552734,
"learning_rate": 3.960887091365178e-05,
"loss": 0.5253,
"step": 132528
},
{
"epoch": 44.0,
"eval_cer": 0.042549192030244654,
"eval_loss": 0.14459766447544098,
"eval_runtime": 179.3972,
"eval_samples_per_second": 202.623,
"eval_steps_per_second": 6.332,
"eval_wer": 0.22531309337290018,
"step": 132528
},
{
"epoch": 45.0,
"grad_norm": 9.71485710144043,
"learning_rate": 3.890226970904261e-05,
"loss": 0.523,
"step": 135540
},
{
"epoch": 45.0,
"eval_cer": 0.041189356420890666,
"eval_loss": 0.1429988592863083,
"eval_runtime": 151.6718,
"eval_samples_per_second": 239.662,
"eval_steps_per_second": 7.49,
"eval_wer": 0.22018738910735963,
"step": 135540
},
{
"epoch": 46.0,
"grad_norm": 13.113300323486328,
"learning_rate": 3.819519900197191e-05,
"loss": 0.5192,
"step": 138552
},
{
"epoch": 46.0,
"eval_cer": 0.040866590315366325,
"eval_loss": 0.14137160778045654,
"eval_runtime": 152.9576,
"eval_samples_per_second": 237.648,
"eval_steps_per_second": 7.427,
"eval_wer": 0.21718528971296747,
"step": 138552
},
{
"epoch": 47.0,
"grad_norm": 15.17225456237793,
"learning_rate": 3.7488597797362736e-05,
"loss": 0.518,
"step": 141564
},
{
"epoch": 47.0,
"eval_cer": 0.040502561497488015,
"eval_loss": 0.14037571847438812,
"eval_runtime": 151.9659,
"eval_samples_per_second": 239.198,
"eval_steps_per_second": 7.475,
"eval_wer": 0.21598168304332638,
"step": 141564
},
{
"epoch": 48.0,
"grad_norm": 5.159524917602539,
"learning_rate": 3.678152709029203e-05,
"loss": 0.5139,
"step": 144576
},
{
"epoch": 48.0,
"eval_cer": 0.04006930301777139,
"eval_loss": 0.13999390602111816,
"eval_runtime": 161.9319,
"eval_samples_per_second": 224.477,
"eval_steps_per_second": 7.015,
"eval_wer": 0.2143319118323528,
"step": 144576
},
{
"epoch": 49.0,
"grad_norm": 5.24137020111084,
"learning_rate": 3.6074925885682855e-05,
"loss": 0.5133,
"step": 147588
},
{
"epoch": 49.0,
"eval_cer": 0.04118523014965527,
"eval_loss": 0.14138683676719666,
"eval_runtime": 154.3411,
"eval_samples_per_second": 235.517,
"eval_steps_per_second": 7.36,
"eval_wer": 0.21796694232026315,
"step": 147588
},
{
"epoch": 50.0,
"grad_norm": 4.531148910522461,
"learning_rate": 3.5367855178612144e-05,
"loss": 0.5114,
"step": 150600
},
{
"epoch": 50.0,
"eval_cer": 0.04037235471628217,
"eval_loss": 0.14019279181957245,
"eval_runtime": 152.1041,
"eval_samples_per_second": 238.981,
"eval_steps_per_second": 7.469,
"eval_wer": 0.21485070781064639,
"step": 150600
},
{
"epoch": 51.0,
"grad_norm": 6.761490821838379,
"learning_rate": 3.4661019222772204e-05,
"loss": 0.5087,
"step": 153612
},
{
"epoch": 51.0,
"eval_cer": 0.04063826997367439,
"eval_loss": 0.14041763544082642,
"eval_runtime": 154.2132,
"eval_samples_per_second": 235.713,
"eval_steps_per_second": 7.366,
"eval_wer": 0.21654889997959403,
"step": 153612
},
{
"epoch": 52.0,
"grad_norm": 4.379857540130615,
"learning_rate": 3.395441801816303e-05,
"loss": 0.5066,
"step": 156624
},
{
"epoch": 52.0,
"eval_cer": 0.04042691319150575,
"eval_loss": 0.13891662657260895,
"eval_runtime": 159.6511,
"eval_samples_per_second": 227.684,
"eval_steps_per_second": 7.116,
"eval_wer": 0.215715367774469,
"step": 156624
},
{
"epoch": 53.0,
"grad_norm": 19.332077026367188,
"learning_rate": 3.324734731109233e-05,
"loss": 0.5037,
"step": 159636
},
{
"epoch": 53.0,
"eval_cer": 0.03977129453965943,
"eval_loss": 0.1375364065170288,
"eval_runtime": 164.6123,
"eval_samples_per_second": 220.822,
"eval_steps_per_second": 6.901,
"eval_wer": 0.2132078538793834,
"step": 159636
},
{
"epoch": 54.0,
"grad_norm": 11.953184127807617,
"learning_rate": 3.254098085771392e-05,
"loss": 0.5024,
"step": 162648
},
{
"epoch": 54.0,
"eval_cer": 0.039842816574406296,
"eval_loss": 0.13721118867397308,
"eval_runtime": 156.3278,
"eval_samples_per_second": 232.524,
"eval_steps_per_second": 7.267,
"eval_wer": 0.21213221688438805,
"step": 162648
},
{
"epoch": 55.0,
"grad_norm": 7.574815273284912,
"learning_rate": 3.183391015064322e-05,
"loss": 0.5,
"step": 165660
},
{
"epoch": 55.0,
"eval_cer": 0.04010139623849114,
"eval_loss": 0.13785392045974731,
"eval_runtime": 152.1653,
"eval_samples_per_second": 238.885,
"eval_steps_per_second": 7.466,
"eval_wer": 0.2131732674808305,
"step": 165660
},
{
"epoch": 56.0,
"grad_norm": 11.841280937194824,
"learning_rate": 3.112707419480328e-05,
"loss": 0.4976,
"step": 168672
},
{
"epoch": 56.0,
"eval_cer": 0.038647114865304755,
"eval_loss": 0.13485907018184662,
"eval_runtime": 153.6401,
"eval_samples_per_second": 236.592,
"eval_steps_per_second": 7.394,
"eval_wer": 0.20721403101016495,
"step": 168672
},
{
"epoch": 57.0,
"grad_norm": 14.78765869140625,
"learning_rate": 3.0420238238963334e-05,
"loss": 0.4948,
"step": 171684
},
{
"epoch": 57.0,
"eval_cer": 0.0392779758897387,
"eval_loss": 0.13624149560928345,
"eval_runtime": 152.0309,
"eval_samples_per_second": 239.096,
"eval_steps_per_second": 7.472,
"eval_wer": 0.2102472581632547,
"step": 171684
},
{
"epoch": 58.0,
"grad_norm": 7.023338794708252,
"learning_rate": 2.9713402283123397e-05,
"loss": 0.4933,
"step": 174696
},
{
"epoch": 58.0,
"eval_cer": 0.038942372495926456,
"eval_loss": 0.13551433384418488,
"eval_runtime": 151.9877,
"eval_samples_per_second": 239.164,
"eval_steps_per_second": 7.474,
"eval_wer": 0.20676094918912188,
"step": 174696
},
{
"epoch": 59.0,
"grad_norm": 6.568221092224121,
"learning_rate": 2.9006801078514222e-05,
"loss": 0.4924,
"step": 177708
},
{
"epoch": 59.0,
"eval_cer": 0.03848756571086942,
"eval_loss": 0.13611619174480438,
"eval_runtime": 177.5315,
"eval_samples_per_second": 204.752,
"eval_steps_per_second": 6.399,
"eval_wer": 0.20549508700208555,
"step": 177708
},
{
"epoch": 60.0,
"grad_norm": 23.931304931640625,
"learning_rate": 2.8300199873905052e-05,
"loss": 0.4901,
"step": 180720
},
{
"epoch": 60.0,
"eval_cer": 0.03840274791325294,
"eval_loss": 0.13464532792568207,
"eval_runtime": 153.1916,
"eval_samples_per_second": 237.285,
"eval_steps_per_second": 7.416,
"eval_wer": 0.2053671173274398,
"step": 180720
},
{
"epoch": 61.0,
"grad_norm": 18.084495544433594,
"learning_rate": 2.759312916683434e-05,
"loss": 0.4898,
"step": 183732
},
{
"epoch": 61.0,
"eval_cer": 0.038370654692533195,
"eval_loss": 0.13341517746448517,
"eval_runtime": 151.6501,
"eval_samples_per_second": 239.696,
"eval_steps_per_second": 7.491,
"eval_wer": 0.2050074187824896,
"step": 183732
},
{
"epoch": 62.0,
"grad_norm": 2.890596866607666,
"learning_rate": 2.6886293210994404e-05,
"loss": 0.4873,
"step": 186744
},
{
"epoch": 62.0,
"eval_cer": 0.038351857234683054,
"eval_loss": 0.1341981142759323,
"eval_runtime": 150.7747,
"eval_samples_per_second": 241.088,
"eval_steps_per_second": 7.534,
"eval_wer": 0.20600696570066857,
"step": 186744
},
{
"epoch": 63.0,
"grad_norm": 11.759881973266602,
"learning_rate": 2.617969200638523e-05,
"loss": 0.4865,
"step": 189756
},
{
"epoch": 63.0,
"eval_cer": 0.03869296232347583,
"eval_loss": 0.13458400964736938,
"eval_runtime": 152.6729,
"eval_samples_per_second": 238.091,
"eval_steps_per_second": 7.441,
"eval_wer": 0.20699613669928163,
"step": 189756
},
{
"epoch": 64.0,
"grad_norm": 13.245360374450684,
"learning_rate": 2.547309080177606e-05,
"loss": 0.4842,
"step": 192768
},
{
"epoch": 64.0,
"eval_cer": 0.03874110215455545,
"eval_loss": 0.13456492125988007,
"eval_runtime": 153.3987,
"eval_samples_per_second": 236.964,
"eval_steps_per_second": 7.406,
"eval_wer": 0.2072278655695861,
"step": 192768
},
{
"epoch": 65.0,
"grad_norm": 11.684355735778809,
"learning_rate": 2.4766020094705352e-05,
"loss": 0.4822,
"step": 195780
},
{
"epoch": 65.0,
"eval_cer": 0.03811941062175572,
"eval_loss": 0.13252592086791992,
"eval_runtime": 156.7414,
"eval_samples_per_second": 231.911,
"eval_steps_per_second": 7.248,
"eval_wer": 0.20395599226648128,
"step": 195780
},
{
"epoch": 66.0,
"grad_norm": 25.19974708557129,
"learning_rate": 2.405918413886541e-05,
"loss": 0.4814,
"step": 198792
},
{
"epoch": 66.0,
"eval_cer": 0.037090135185815165,
"eval_loss": 0.13119570910930634,
"eval_runtime": 154.348,
"eval_samples_per_second": 235.507,
"eval_steps_per_second": 7.36,
"eval_wer": 0.19890983671761242,
"step": 198792
},
{
"epoch": 67.0,
"grad_norm": 12.580814361572266,
"learning_rate": 2.335234818302547e-05,
"loss": 0.4796,
"step": 201804
},
{
"epoch": 67.0,
"eval_cer": 0.03740739959635898,
"eval_loss": 0.13117973506450653,
"eval_runtime": 162.9804,
"eval_samples_per_second": 223.033,
"eval_steps_per_second": 6.97,
"eval_wer": 0.1999750977930419,
"step": 201804
},
{
"epoch": 68.0,
"grad_norm": 11.110360145568848,
"learning_rate": 2.2645746978416297e-05,
"loss": 0.4771,
"step": 204816
},
{
"epoch": 68.0,
"eval_cer": 0.037213006373713636,
"eval_loss": 0.1303921490907669,
"eval_runtime": 152.708,
"eval_samples_per_second": 238.036,
"eval_steps_per_second": 7.439,
"eval_wer": 0.1997191584437504,
"step": 204816
},
{
"epoch": 69.0,
"grad_norm": 4.782271385192871,
"learning_rate": 2.193891102257636e-05,
"loss": 0.4756,
"step": 207828
},
{
"epoch": 69.0,
"eval_cer": 0.037708617396542916,
"eval_loss": 0.13083402812480927,
"eval_runtime": 152.8061,
"eval_samples_per_second": 237.883,
"eval_steps_per_second": 7.434,
"eval_wer": 0.20086396823585156,
"step": 207828
},
{
"epoch": 70.0,
"grad_norm": 7.983453750610352,
"learning_rate": 2.1232544569197956e-05,
"loss": 0.4745,
"step": 210840
},
{
"epoch": 70.0,
"eval_cer": 0.0370488724734612,
"eval_loss": 0.13116249442100525,
"eval_runtime": 151.9447,
"eval_samples_per_second": 239.232,
"eval_steps_per_second": 7.476,
"eval_wer": 0.19823886058568607,
"step": 210840
},
{
"epoch": 71.0,
"grad_norm": 23.63794708251953,
"learning_rate": 2.052547386212725e-05,
"loss": 0.4738,
"step": 213852
},
{
"epoch": 71.0,
"eval_cer": 0.037366136884005016,
"eval_loss": 0.1306936889886856,
"eval_runtime": 154.0224,
"eval_samples_per_second": 236.005,
"eval_steps_per_second": 7.376,
"eval_wer": 0.20006848106913475,
"step": 213852
},
{
"epoch": 72.0,
"grad_norm": 10.838956832885742,
"learning_rate": 1.9818637906287305e-05,
"loss": 0.473,
"step": 216864
},
{
"epoch": 72.0,
"eval_cer": 0.0372285945094918,
"eval_loss": 0.13071005046367645,
"eval_runtime": 154.8642,
"eval_samples_per_second": 234.722,
"eval_steps_per_second": 7.335,
"eval_wer": 0.19911043782921928,
"step": 216864
},
{
"epoch": 73.0,
"grad_norm": 11.969744682312012,
"learning_rate": 1.9111801950447367e-05,
"loss": 0.472,
"step": 219876
},
{
"epoch": 73.0,
"eval_cer": 0.03662890975661418,
"eval_loss": 0.12924158573150635,
"eval_runtime": 154.3055,
"eval_samples_per_second": 235.572,
"eval_steps_per_second": 7.362,
"eval_wer": 0.19607375203627422,
"step": 219876
},
{
"epoch": 74.0,
"grad_norm": 6.115599155426025,
"learning_rate": 1.840496599460743e-05,
"loss": 0.4693,
"step": 222888
},
{
"epoch": 74.0,
"eval_cer": 0.036412051279465014,
"eval_loss": 0.12866230309009552,
"eval_runtime": 157.4725,
"eval_samples_per_second": 230.834,
"eval_steps_per_second": 7.214,
"eval_wer": 0.19521600935216216,
"step": 222888
},
{
"epoch": 75.0,
"grad_norm": 44.51272964477539,
"learning_rate": 1.7698130038767486e-05,
"loss": 0.4693,
"step": 225900
},
{
"epoch": 75.0,
"eval_cer": 0.03628138602367746,
"eval_loss": 0.12844808399677277,
"eval_runtime": 164.6927,
"eval_samples_per_second": 220.714,
"eval_steps_per_second": 6.898,
"eval_wer": 0.1944724017832747,
"step": 225900
},
{
"epoch": 76.0,
"grad_norm": 9.162590026855469,
"learning_rate": 1.6991528834158312e-05,
"loss": 0.4664,
"step": 228912
},
{
"epoch": 76.0,
"eval_cer": 0.03683797416587427,
"eval_loss": 0.12876588106155396,
"eval_runtime": 152.2551,
"eval_samples_per_second": 238.744,
"eval_steps_per_second": 7.461,
"eval_wer": 0.19688999104212276,
"step": 228912
},
{
"epoch": 77.0,
"grad_norm": 3.896597146987915,
"learning_rate": 1.6284692878318375e-05,
"loss": 0.4651,
"step": 231924
},
{
"epoch": 77.0,
"eval_cer": 0.03683384789463887,
"eval_loss": 0.12869854271411896,
"eval_runtime": 168.0203,
"eval_samples_per_second": 216.343,
"eval_steps_per_second": 6.761,
"eval_wer": 0.1971009680732955,
"step": 231924
},
{
"epoch": 78.0,
"grad_norm": 15.388148307800293,
"learning_rate": 1.5577856922478434e-05,
"loss": 0.4641,
"step": 234936
},
{
"epoch": 78.0,
"eval_cer": 0.03656747416266495,
"eval_loss": 0.1286703646183014,
"eval_runtime": 154.3849,
"eval_samples_per_second": 235.45,
"eval_steps_per_second": 7.358,
"eval_wer": 0.1952090920724516,
"step": 234936
},
{
"epoch": 79.0,
"grad_norm": 2.635507345199585,
"learning_rate": 1.4871020966638496e-05,
"loss": 0.462,
"step": 237948
},
{
"epoch": 79.0,
"eval_cer": 0.03641342670321015,
"eval_loss": 0.12868022918701172,
"eval_runtime": 152.2309,
"eval_samples_per_second": 238.782,
"eval_steps_per_second": 7.462,
"eval_wer": 0.19447586042313,
"step": 237948
},
{
"epoch": 80.0,
"grad_norm": 5.287237167358398,
"learning_rate": 1.4163950259567787e-05,
"loss": 0.4608,
"step": 240960
},
{
"epoch": 80.0,
"eval_cer": 0.036332276702247354,
"eval_loss": 0.12745150923728943,
"eval_runtime": 154.8487,
"eval_samples_per_second": 234.745,
"eval_steps_per_second": 7.336,
"eval_wer": 0.19517796431375398,
"step": 240960
},
{
"epoch": 81.0,
"grad_norm": 11.394750595092773,
"learning_rate": 1.3457349054958616e-05,
"loss": 0.4594,
"step": 243972
},
{
"epoch": 81.0,
"eval_cer": 0.0360989131401566,
"eval_loss": 0.12770119309425354,
"eval_runtime": 152.6766,
"eval_samples_per_second": 238.085,
"eval_steps_per_second": 7.441,
"eval_wer": 0.19389480892744118,
"step": 243972
},
{
"epoch": 82.0,
"grad_norm": 9.252601623535156,
"learning_rate": 1.2750278347887909e-05,
"loss": 0.4595,
"step": 246984
},
{
"epoch": 82.0,
"eval_cer": 0.03594715805361035,
"eval_loss": 0.12681059539318085,
"eval_runtime": 152.0339,
"eval_samples_per_second": 239.091,
"eval_steps_per_second": 7.472,
"eval_wer": 0.19371841829482137,
"step": 246984
},
{
"epoch": 83.0,
"grad_norm": 16.776588439941406,
"learning_rate": 1.2043677143278735e-05,
"loss": 0.4575,
"step": 249996
},
{
"epoch": 83.0,
"eval_cer": 0.03615393008996188,
"eval_loss": 0.12722131609916687,
"eval_runtime": 152.0731,
"eval_samples_per_second": 239.03,
"eval_steps_per_second": 7.47,
"eval_wer": 0.1942475901926808,
"step": 249996
},
{
"epoch": 84.0,
"grad_norm": 8.937053680419922,
"learning_rate": 1.1336606436208028e-05,
"loss": 0.4569,
"step": 253008
},
{
"epoch": 84.0,
"eval_cer": 0.03605260720740382,
"eval_loss": 0.12680456042289734,
"eval_runtime": 152.9563,
"eval_samples_per_second": 237.65,
"eval_steps_per_second": 7.427,
"eval_wer": 0.19341059934770052,
"step": 253008
},
{
"epoch": 85.0,
"grad_norm": 13.364398956298828,
"learning_rate": 1.0630005231598857e-05,
"loss": 0.4552,
"step": 256020
},
{
"epoch": 85.0,
"eval_cer": 0.035670697880838785,
"eval_loss": 0.12619073688983917,
"eval_runtime": 156.3815,
"eval_samples_per_second": 232.444,
"eval_steps_per_second": 7.264,
"eval_wer": 0.1916293998222259,
"step": 256020
},
{
"epoch": 86.0,
"grad_norm": 9.33234691619873,
"learning_rate": 9.923404026989685e-06,
"loss": 0.4538,
"step": 259032
},
{
"epoch": 86.0,
"eval_cer": 0.03549051737022648,
"eval_loss": 0.12592804431915283,
"eval_runtime": 156.5096,
"eval_samples_per_second": 232.254,
"eval_steps_per_second": 7.258,
"eval_wer": 0.19070940162071864,
"step": 259032
},
{
"epoch": 87.0,
"grad_norm": 4.286988735198975,
"learning_rate": 9.216568071149744e-06,
"loss": 0.4532,
"step": 262044
},
{
"epoch": 87.0,
"eval_cer": 0.03551573347222057,
"eval_loss": 0.12575581669807434,
"eval_runtime": 155.6329,
"eval_samples_per_second": 233.562,
"eval_steps_per_second": 7.299,
"eval_wer": 0.19122473895915693,
"step": 262044
},
{
"epoch": 88.0,
"grad_norm": 7.920403957366943,
"learning_rate": 8.509732115309804e-06,
"loss": 0.4524,
"step": 265056
},
{
"epoch": 88.0,
"eval_cer": 0.03555103601501229,
"eval_loss": 0.1259673833847046,
"eval_runtime": 166.3365,
"eval_samples_per_second": 218.533,
"eval_steps_per_second": 6.83,
"eval_wer": 0.19095150641058897,
"step": 265056
},
{
"epoch": 89.0,
"grad_norm": 9.81010913848877,
"learning_rate": 7.802896159469865e-06,
"loss": 0.4501,
"step": 268068
},
{
"epoch": 89.0,
"eval_cer": 0.03596458008771536,
"eval_loss": 0.12655647099018097,
"eval_runtime": 155.3246,
"eval_samples_per_second": 234.026,
"eval_steps_per_second": 7.314,
"eval_wer": 0.19276729233461648,
"step": 268068
},
{
"epoch": 90.0,
"grad_norm": 10.394911766052246,
"learning_rate": 7.096060203629924e-06,
"loss": 0.4491,
"step": 271080
},
{
"epoch": 90.0,
"eval_cer": 0.03546667669197752,
"eval_loss": 0.12519720196723938,
"eval_runtime": 177.0207,
"eval_samples_per_second": 205.343,
"eval_steps_per_second": 6.417,
"eval_wer": 0.19042579315258482,
"step": 271080
},
{
"epoch": 91.0,
"grad_norm": 9.381885528564453,
"learning_rate": 6.3892242477899846e-06,
"loss": 0.4486,
"step": 274092
},
{
"epoch": 91.0,
"eval_cer": 0.03518288092589859,
"eval_loss": 0.12525735795497894,
"eval_runtime": 173.742,
"eval_samples_per_second": 209.218,
"eval_steps_per_second": 6.538,
"eval_wer": 0.18893166073509932,
"step": 274092
},
{
"epoch": 92.0,
"grad_norm": 26.816091537475586,
"learning_rate": 5.682623043180811e-06,
"loss": 0.4487,
"step": 277104
},
{
"epoch": 92.0,
"eval_cer": 0.035369480080654846,
"eval_loss": 0.12525933980941772,
"eval_runtime": 153.4339,
"eval_samples_per_second": 236.91,
"eval_steps_per_second": 7.404,
"eval_wer": 0.190249402519965,
"step": 277104
},
{
"epoch": 93.0,
"grad_norm": 12.131733894348145,
"learning_rate": 4.975552336110105e-06,
"loss": 0.4471,
"step": 280116
},
{
"epoch": 93.0,
"eval_cer": 0.03521634957036347,
"eval_loss": 0.1251526027917862,
"eval_runtime": 176.0621,
"eval_samples_per_second": 206.461,
"eval_steps_per_second": 6.452,
"eval_wer": 0.1893743666365765,
"step": 280116
},
{
"epoch": 94.0,
"grad_norm": 14.601805686950684,
"learning_rate": 4.2687163802701646e-06,
"loss": 0.4458,
"step": 283128
},
{
"epoch": 94.0,
"eval_cer": 0.0351705021121924,
"eval_loss": 0.1253127008676529,
"eval_runtime": 177.0148,
"eval_samples_per_second": 205.35,
"eval_steps_per_second": 6.418,
"eval_wer": 0.18914263776627205,
"step": 283128
},
{
"epoch": 95.0,
"grad_norm": 3.8078722953796387,
"learning_rate": 3.562115175660992e-06,
"loss": 0.4449,
"step": 286140
},
{
"epoch": 95.0,
"eval_cer": 0.035079265670431965,
"eval_loss": 0.12475291639566422,
"eval_runtime": 168.7285,
"eval_samples_per_second": 215.435,
"eval_steps_per_second": 6.733,
"eval_wer": 0.1884266993162269,
"step": 286140
},
{
"epoch": 96.0,
"grad_norm": 4.562527656555176,
"learning_rate": 2.855044468590285e-06,
"loss": 0.4434,
"step": 289152
},
{
"epoch": 96.0,
"eval_cer": 0.035126030077766456,
"eval_loss": 0.1246921494603157,
"eval_runtime": 179.6539,
"eval_samples_per_second": 202.333,
"eval_steps_per_second": 6.323,
"eval_wer": 0.18909421680829797,
"step": 289152
},
{
"epoch": 97.0,
"grad_norm": 8.47780990600586,
"learning_rate": 2.1482085127503454e-06,
"loss": 0.4435,
"step": 292164
},
{
"epoch": 97.0,
"eval_cer": 0.035188841095460825,
"eval_loss": 0.12471602112054825,
"eval_runtime": 152.6845,
"eval_samples_per_second": 238.073,
"eval_steps_per_second": 7.44,
"eval_wer": 0.18912880320685088,
"step": 292164
},
{
"epoch": 98.0,
"grad_norm": 16.65981101989746,
"learning_rate": 1.4413725569104053e-06,
"loss": 0.4444,
"step": 295176
},
{
"epoch": 98.0,
"eval_cer": 0.03511594363696882,
"eval_loss": 0.12448572367429733,
"eval_runtime": 159.0007,
"eval_samples_per_second": 228.615,
"eval_steps_per_second": 7.145,
"eval_wer": 0.1887518114626242,
"step": 295176
},
{
"epoch": 99.0,
"grad_norm": 10.248809814453125,
"learning_rate": 7.345366010704655e-07,
"loss": 0.4429,
"step": 298188
},
{
"epoch": 99.0,
"eval_cer": 0.03512648855234817,
"eval_loss": 0.12444119900465012,
"eval_runtime": 155.6325,
"eval_samples_per_second": 233.563,
"eval_steps_per_second": 7.299,
"eval_wer": 0.18870684914450545,
"step": 298188
}
],
"logging_steps": 500,
"max_steps": 301200,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 8
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0954167922548843e+21,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}