{ "best_global_step": 286140, "best_metric": 0.1884266993162269, "best_model_checkpoint": "wav2vec2-asr-africa-base-fintuned-luganda-400hrs-v0.1/checkpoint-286140", "epoch": 99.0, "eval_steps": 500, "global_step": 298188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.066422462463379, "learning_rate": 6.98140770252324e-05, "loss": 3.2575, "step": 3012 }, { "epoch": 1.0, "eval_cer": 0.10930492502565166, "eval_loss": 0.37255266308784485, "eval_runtime": 151.5408, "eval_samples_per_second": 239.869, "eval_steps_per_second": 7.496, "eval_wer": 0.534090083733671, "step": 3012 }, { "epoch": 2.0, "grad_norm": 15.221166610717773, "learning_rate": 6.929480730277542e-05, "loss": 0.8396, "step": 6024 }, { "epoch": 2.0, "eval_cer": 0.07810481279107405, "eval_loss": 0.2538328468799591, "eval_runtime": 152.0166, "eval_samples_per_second": 239.119, "eval_steps_per_second": 7.473, "eval_wer": 0.3983557626127949, "step": 6024 }, { "epoch": 3.0, "grad_norm": 7.060754299163818, "learning_rate": 6.858773659570472e-05, "loss": 0.7487, "step": 9036 }, { "epoch": 3.0, "eval_cer": 0.0682448584368034, "eval_loss": 0.22769133746623993, "eval_runtime": 153.0969, "eval_samples_per_second": 237.431, "eval_steps_per_second": 7.42, "eval_wer": 0.35308562554689743, "step": 9036 }, { "epoch": 4.0, "grad_norm": 9.472733497619629, "learning_rate": 6.788090063986478e-05, "loss": 0.7226, "step": 12048 }, { "epoch": 4.0, "eval_cer": 0.06425062788093966, "eval_loss": 0.21326717734336853, "eval_runtime": 171.3308, "eval_samples_per_second": 212.163, "eval_steps_per_second": 6.63, "eval_wer": 0.33642189872410777, "step": 12048 }, { "epoch": 5.0, "grad_norm": 23.48148536682129, "learning_rate": 6.717429943525561e-05, "loss": 0.7096, "step": 15060 }, { "epoch": 5.0, "eval_cer": 0.06122744648913919, "eval_loss": 0.20842401683330536, "eval_runtime": 174.4012, "eval_samples_per_second": 208.427, "eval_steps_per_second": 6.514, "eval_wer": 0.3211001241651708, "step": 15060 }, { "epoch": 6.0, "grad_norm": 7.9525041580200195, "learning_rate": 6.646746347941567e-05, "loss": 0.6979, "step": 18072 }, { "epoch": 6.0, "eval_cer": 0.06395399482657282, "eval_loss": 0.20914477109909058, "eval_runtime": 153.4786, "eval_samples_per_second": 236.841, "eval_steps_per_second": 7.402, "eval_wer": 0.3287402596055075, "step": 18072 }, { "epoch": 7.0, "grad_norm": 43.115169525146484, "learning_rate": 6.57608622748065e-05, "loss": 0.6899, "step": 21084 }, { "epoch": 7.0, "eval_cer": 0.06076026089037598, "eval_loss": 0.20185638964176178, "eval_runtime": 173.3256, "eval_samples_per_second": 209.721, "eval_steps_per_second": 6.554, "eval_wer": 0.31623727652863237, "step": 21084 }, { "epoch": 8.0, "grad_norm": 4.6848602294921875, "learning_rate": 6.505426107019733e-05, "loss": 0.6765, "step": 24096 }, { "epoch": 8.0, "eval_cer": 0.060137193933831115, "eval_loss": 0.19728189706802368, "eval_runtime": 154.1162, "eval_samples_per_second": 235.861, "eval_steps_per_second": 7.371, "eval_wer": 0.31059969356450884, "step": 24096 }, { "epoch": 9.0, "grad_norm": 5.830124378204346, "learning_rate": 6.434719036312661e-05, "loss": 0.6701, "step": 27108 }, { "epoch": 9.0, "eval_cer": 0.05820976679231927, "eval_loss": 0.19281432032585144, "eval_runtime": 156.5256, "eval_samples_per_second": 232.23, "eval_steps_per_second": 7.258, "eval_wer": 0.30471654717065966, "step": 27108 }, { "epoch": 10.0, "grad_norm": Infinity, "learning_rate": 6.364058915851744e-05, "loss": 0.6621, "step": 30120 }, { "epoch": 10.0, "eval_cer": 0.058245986284274416, "eval_loss": 0.19237777590751648, "eval_runtime": 155.0055, "eval_samples_per_second": 234.508, "eval_steps_per_second": 7.329, "eval_wer": 0.3038691804061135, "step": 30120 }, { "epoch": 11.0, "grad_norm": 5.588762283325195, "learning_rate": 6.29337532026775e-05, "loss": 0.6554, "step": 33132 }, { "epoch": 11.0, "eval_cer": 0.05662298626501848, "eval_loss": 0.18665704131126404, "eval_runtime": 153.969, "eval_samples_per_second": 236.087, "eval_steps_per_second": 7.378, "eval_wer": 0.2982627252006876, "step": 33132 }, { "epoch": 12.0, "grad_norm": 5.955714702606201, "learning_rate": 6.222691724683756e-05, "loss": 0.6475, "step": 36144 }, { "epoch": 12.0, "eval_cer": 0.05515495065438077, "eval_loss": 0.1829417496919632, "eval_runtime": 155.1287, "eval_samples_per_second": 234.322, "eval_steps_per_second": 7.323, "eval_wer": 0.2873610923768119, "step": 36144 }, { "epoch": 13.0, "grad_norm": 31.985713958740234, "learning_rate": 6.152008129099762e-05, "loss": 0.6429, "step": 39156 }, { "epoch": 13.0, "eval_cer": 0.05419032013446143, "eval_loss": 0.1801947057247162, "eval_runtime": 168.8898, "eval_samples_per_second": 215.229, "eval_steps_per_second": 6.726, "eval_wer": 0.28527553254407173, "step": 39156 }, { "epoch": 14.0, "grad_norm": 6.557770252227783, "learning_rate": 6.081324533515768e-05, "loss": 0.6351, "step": 42168 }, { "epoch": 14.0, "eval_cer": 0.055327337097104, "eval_loss": 0.18261073529720306, "eval_runtime": 159.2183, "eval_samples_per_second": 228.303, "eval_steps_per_second": 7.135, "eval_wer": 0.2872746263804296, "step": 42168 }, { "epoch": 15.0, "grad_norm": 8.820505142211914, "learning_rate": 6.010640937931774e-05, "loss": 0.6319, "step": 45180 }, { "epoch": 15.0, "eval_cer": 0.05439250742499585, "eval_loss": 0.17926117777824402, "eval_runtime": 152.0308, "eval_samples_per_second": 239.096, "eval_steps_per_second": 7.472, "eval_wer": 0.28315884495263394, "step": 45180 }, { "epoch": 16.0, "grad_norm": 8.058792114257812, "learning_rate": 5.93995734234778e-05, "loss": 0.6251, "step": 48192 }, { "epoch": 16.0, "eval_cer": 0.054798715904391546, "eval_loss": 0.1785019189119339, "eval_runtime": 154.575, "eval_samples_per_second": 235.161, "eval_steps_per_second": 7.349, "eval_wer": 0.283826362444705, "step": 48192 }, { "epoch": 17.0, "grad_norm": 7.038857936859131, "learning_rate": 5.86925027164071e-05, "loss": 0.6172, "step": 51204 }, { "epoch": 17.0, "eval_cer": 0.051710431121988164, "eval_loss": 0.17091116309165955, "eval_runtime": 154.6152, "eval_samples_per_second": 235.1, "eval_steps_per_second": 7.347, "eval_wer": 0.27192518270265037, "step": 51204 }, { "epoch": 18.0, "grad_norm": NaN, "learning_rate": 5.7985901511797926e-05, "loss": 0.6122, "step": 54216 }, { "epoch": 18.0, "eval_cer": 0.05208454638066411, "eval_loss": 0.1720370054244995, "eval_runtime": 154.3969, "eval_samples_per_second": 235.432, "eval_steps_per_second": 7.358, "eval_wer": 0.27160698783596365, "step": 54216 }, { "epoch": 19.0, "grad_norm": 4.123114109039307, "learning_rate": 5.727930030718875e-05, "loss": 0.6068, "step": 57228 }, { "epoch": 19.0, "eval_cer": 0.0505266497520111, "eval_loss": 0.16939722001552582, "eval_runtime": 154.2835, "eval_samples_per_second": 235.605, "eval_steps_per_second": 7.363, "eval_wer": 0.26646744901100194, "step": 57228 }, { "epoch": 20.0, "grad_norm": 11.014168739318848, "learning_rate": 5.657222960011804e-05, "loss": 0.6035, "step": 60240 }, { "epoch": 20.0, "eval_cer": 0.049698644657441546, "eval_loss": 0.1669510304927826, "eval_runtime": 155.9471, "eval_samples_per_second": 233.092, "eval_steps_per_second": 7.285, "eval_wer": 0.26278053892526226, "step": 60240 }, { "epoch": 21.0, "grad_norm": 7.567544937133789, "learning_rate": 5.5865158893047335e-05, "loss": 0.5957, "step": 63252 }, { "epoch": 21.0, "eval_cer": 0.050415698903237105, "eval_loss": 0.1704263538122177, "eval_runtime": 155.0829, "eval_samples_per_second": 234.391, "eval_steps_per_second": 7.325, "eval_wer": 0.2643818891782618, "step": 63252 }, { "epoch": 22.0, "grad_norm": 3.353114366531372, "learning_rate": 5.5158557688438164e-05, "loss": 0.5909, "step": 66264 }, { "epoch": 22.0, "eval_cer": 0.049318569229203364, "eval_loss": 0.16528591513633728, "eval_runtime": 155.149, "eval_samples_per_second": 234.291, "eval_steps_per_second": 7.322, "eval_wer": 0.25990640920551583, "step": 66264 }, { "epoch": 23.0, "grad_norm": 5.273142337799072, "learning_rate": 5.445172173259822e-05, "loss": 0.5879, "step": 69276 }, { "epoch": 23.0, "eval_cer": 0.048735389561267335, "eval_loss": 0.16745983064174652, "eval_runtime": 155.8132, "eval_samples_per_second": 233.292, "eval_steps_per_second": 7.291, "eval_wer": 0.2573400984328903, "step": 69276 }, { "epoch": 24.0, "grad_norm": 12.098519325256348, "learning_rate": 5.374512052798905e-05, "loss": 0.5966, "step": 72288 }, { "epoch": 24.0, "eval_cer": 0.05103463958854657, "eval_loss": 0.19431033730506897, "eval_runtime": 154.0761, "eval_samples_per_second": 235.922, "eval_steps_per_second": 7.373, "eval_wer": 0.2738551037419025, "step": 72288 }, { "epoch": 25.0, "grad_norm": 12.023294448852539, "learning_rate": 5.3038519323379875e-05, "loss": 0.6444, "step": 75300 }, { "epoch": 25.0, "eval_cer": 0.05154996501838942, "eval_loss": 0.1868334412574768, "eval_runtime": 152.5369, "eval_samples_per_second": 238.303, "eval_steps_per_second": 7.447, "eval_wer": 0.27229871580702175, "step": 75300 }, { "epoch": 26.0, "grad_norm": 9.066435813903809, "learning_rate": 5.2331448616309165e-05, "loss": 0.5999, "step": 78312 }, { "epoch": 26.0, "eval_cer": 0.04910904634536157, "eval_loss": 0.16771361231803894, "eval_runtime": 154.4851, "eval_samples_per_second": 235.298, "eval_steps_per_second": 7.353, "eval_wer": 0.25782430801263095, "step": 78312 }, { "epoch": 27.0, "grad_norm": 9.274683952331543, "learning_rate": 5.1624847411699994e-05, "loss": 0.5911, "step": 81324 }, { "epoch": 27.0, "eval_cer": 0.04738747429103783, "eval_loss": 0.16794191300868988, "eval_runtime": 154.6471, "eval_samples_per_second": 235.051, "eval_steps_per_second": 7.346, "eval_wer": 0.25102462205712983, "step": 81324 }, { "epoch": 28.0, "grad_norm": 5.650504112243652, "learning_rate": 5.091777670462929e-05, "loss": 0.586, "step": 84336 }, { "epoch": 28.0, "eval_cer": 0.04840666328618075, "eval_loss": 0.1722731739282608, "eval_runtime": 153.0438, "eval_samples_per_second": 237.514, "eval_steps_per_second": 7.423, "eval_wer": 0.25386416537832335, "step": 84336 }, { "epoch": 29.0, "grad_norm": 18.539613723754883, "learning_rate": 5.021070599755859e-05, "loss": 0.5816, "step": 87348 }, { "epoch": 29.0, "eval_cer": 0.04769969548118283, "eval_loss": 0.16775010526180267, "eval_runtime": 156.0962, "eval_samples_per_second": 232.869, "eval_steps_per_second": 7.278, "eval_wer": 0.25264326550940575, "step": 87348 }, { "epoch": 30.0, "grad_norm": 4.343358039855957, "learning_rate": 4.950457429541094e-05, "loss": 0.5886, "step": 90360 }, { "epoch": 30.0, "eval_cer": 0.04993246669411401, "eval_loss": 0.18236766755580902, "eval_runtime": 151.7294, "eval_samples_per_second": 239.571, "eval_steps_per_second": 7.487, "eval_wer": 0.2629396363586056, "step": 90360 }, { "epoch": 31.0, "grad_norm": 13.675621032714844, "learning_rate": 4.879773833957101e-05, "loss": 0.5978, "step": 93372 }, { "epoch": 31.0, "eval_cer": 0.04701886072734242, "eval_loss": 0.16201142966747284, "eval_runtime": 152.4808, "eval_samples_per_second": 238.391, "eval_steps_per_second": 7.45, "eval_wer": 0.24908778373816712, "step": 93372 }, { "epoch": 32.0, "grad_norm": 5.842775821685791, "learning_rate": 4.809066763250029e-05, "loss": 0.5722, "step": 96384 }, { "epoch": 32.0, "eval_cer": 0.04652920987407537, "eval_loss": 0.15837915241718292, "eval_runtime": 153.9577, "eval_samples_per_second": 236.104, "eval_steps_per_second": 7.379, "eval_wer": 0.24719590773732322, "step": 96384 }, { "epoch": 33.0, "grad_norm": 12.179231643676758, "learning_rate": 4.738359692542959e-05, "loss": 0.5615, "step": 99396 }, { "epoch": 33.0, "eval_cer": 0.046122542920097966, "eval_loss": 0.15639054775238037, "eval_runtime": 165.4748, "eval_samples_per_second": 219.671, "eval_steps_per_second": 6.865, "eval_wer": 0.2421047898703356, "step": 99396 }, { "epoch": 34.0, "grad_norm": 11.811565399169922, "learning_rate": 4.6676760969589654e-05, "loss": 0.5566, "step": 102408 }, { "epoch": 34.0, "eval_cer": 0.04475858103950859, "eval_loss": 0.15303878486156464, "eval_runtime": 151.973, "eval_samples_per_second": 239.187, "eval_steps_per_second": 7.475, "eval_wer": 0.23680615361203053, "step": 102408 }, { "epoch": 35.0, "grad_norm": 27.314680099487305, "learning_rate": 4.5970394516211246e-05, "loss": 0.5514, "step": 105420 }, { "epoch": 35.0, "eval_cer": 0.04322590051284967, "eval_loss": 0.14988180994987488, "eval_runtime": 152.0916, "eval_samples_per_second": 239.001, "eval_steps_per_second": 7.469, "eval_wer": 0.2308780449000626, "step": 105420 }, { "epoch": 36.0, "grad_norm": 6.874896049499512, "learning_rate": 4.526332380914054e-05, "loss": 0.5485, "step": 108432 }, { "epoch": 36.0, "eval_cer": 0.043550500516700855, "eval_loss": 0.15108104050159454, "eval_runtime": 156.7097, "eval_samples_per_second": 231.958, "eval_steps_per_second": 7.249, "eval_wer": 0.23083308258194382, "step": 108432 }, { "epoch": 37.0, "grad_norm": 18.988279342651367, "learning_rate": 4.4556722604531365e-05, "loss": 0.5451, "step": 111444 }, { "epoch": 37.0, "eval_cer": 0.04385080136772137, "eval_loss": 0.15071320533752441, "eval_runtime": 172.1252, "eval_samples_per_second": 211.183, "eval_steps_per_second": 6.6, "eval_wer": 0.23185338133925454, "step": 111444 }, { "epoch": 38.0, "grad_norm": 28.172042846679688, "learning_rate": 4.384988664869143e-05, "loss": 0.5433, "step": 114456 }, { "epoch": 38.0, "eval_cer": 0.0433771971248142, "eval_loss": 0.14824804663658142, "eval_runtime": 152.6042, "eval_samples_per_second": 238.198, "eval_steps_per_second": 7.444, "eval_wer": 0.23122390888559166, "step": 114456 }, { "epoch": 39.0, "grad_norm": 4.653916835784912, "learning_rate": 4.3143050692851484e-05, "loss": 0.5391, "step": 117468 }, { "epoch": 39.0, "eval_cer": 0.04353903865215809, "eval_loss": 0.14684619009494781, "eval_runtime": 156.6046, "eval_samples_per_second": 232.113, "eval_steps_per_second": 7.254, "eval_wer": 0.22910722129415387, "step": 117468 }, { "epoch": 40.0, "grad_norm": 5.8400702476501465, "learning_rate": 4.243621473701154e-05, "loss": 0.5347, "step": 120480 }, { "epoch": 40.0, "eval_cer": 0.042970530170836796, "eval_loss": 0.1462726891040802, "eval_runtime": 178.4468, "eval_samples_per_second": 203.702, "eval_steps_per_second": 6.366, "eval_wer": 0.22744707416361443, "step": 120480 }, { "epoch": 41.0, "grad_norm": 16.060422897338867, "learning_rate": 4.172914402994084e-05, "loss": 0.5313, "step": 123492 }, { "epoch": 41.0, "eval_cer": 0.04216269795786252, "eval_loss": 0.14503081142902374, "eval_runtime": 152.5692, "eval_samples_per_second": 238.253, "eval_steps_per_second": 7.446, "eval_wer": 0.22400226886774507, "step": 123492 }, { "epoch": 42.0, "grad_norm": 8.755998611450195, "learning_rate": 4.102254282533167e-05, "loss": 0.5291, "step": 126504 }, { "epoch": 42.0, "eval_cer": 0.04194263015864137, "eval_loss": 0.1446152627468109, "eval_runtime": 152.4116, "eval_samples_per_second": 238.499, "eval_steps_per_second": 7.454, "eval_wer": 0.224061065745285, "step": 126504 }, { "epoch": 43.0, "grad_norm": 9.50841999053955, "learning_rate": 4.031570686949173e-05, "loss": 0.5269, "step": 129516 }, { "epoch": 43.0, "eval_cer": 0.04270048864220919, "eval_loss": 0.14530107378959656, "eval_runtime": 153.731, "eval_samples_per_second": 236.452, "eval_steps_per_second": 7.39, "eval_wer": 0.22547219080624353, "step": 129516 }, { "epoch": 44.0, "grad_norm": 28.027828216552734, "learning_rate": 3.960887091365178e-05, "loss": 0.5253, "step": 132528 }, { "epoch": 44.0, "eval_cer": 0.042549192030244654, "eval_loss": 0.14459766447544098, "eval_runtime": 179.3972, "eval_samples_per_second": 202.623, "eval_steps_per_second": 6.332, "eval_wer": 0.22531309337290018, "step": 132528 }, { "epoch": 45.0, "grad_norm": 9.71485710144043, "learning_rate": 3.890226970904261e-05, "loss": 0.523, "step": 135540 }, { "epoch": 45.0, "eval_cer": 0.041189356420890666, "eval_loss": 0.1429988592863083, "eval_runtime": 151.6718, "eval_samples_per_second": 239.662, "eval_steps_per_second": 7.49, "eval_wer": 0.22018738910735963, "step": 135540 }, { "epoch": 46.0, "grad_norm": 13.113300323486328, "learning_rate": 3.819519900197191e-05, "loss": 0.5192, "step": 138552 }, { "epoch": 46.0, "eval_cer": 0.040866590315366325, "eval_loss": 0.14137160778045654, "eval_runtime": 152.9576, "eval_samples_per_second": 237.648, "eval_steps_per_second": 7.427, "eval_wer": 0.21718528971296747, "step": 138552 }, { "epoch": 47.0, "grad_norm": 15.17225456237793, "learning_rate": 3.7488597797362736e-05, "loss": 0.518, "step": 141564 }, { "epoch": 47.0, "eval_cer": 0.040502561497488015, "eval_loss": 0.14037571847438812, "eval_runtime": 151.9659, "eval_samples_per_second": 239.198, "eval_steps_per_second": 7.475, "eval_wer": 0.21598168304332638, "step": 141564 }, { "epoch": 48.0, "grad_norm": 5.159524917602539, "learning_rate": 3.678152709029203e-05, "loss": 0.5139, "step": 144576 }, { "epoch": 48.0, "eval_cer": 0.04006930301777139, "eval_loss": 0.13999390602111816, "eval_runtime": 161.9319, "eval_samples_per_second": 224.477, "eval_steps_per_second": 7.015, "eval_wer": 0.2143319118323528, "step": 144576 }, { "epoch": 49.0, "grad_norm": 5.24137020111084, "learning_rate": 3.6074925885682855e-05, "loss": 0.5133, "step": 147588 }, { "epoch": 49.0, "eval_cer": 0.04118523014965527, "eval_loss": 0.14138683676719666, "eval_runtime": 154.3411, "eval_samples_per_second": 235.517, "eval_steps_per_second": 7.36, "eval_wer": 0.21796694232026315, "step": 147588 }, { "epoch": 50.0, "grad_norm": 4.531148910522461, "learning_rate": 3.5367855178612144e-05, "loss": 0.5114, "step": 150600 }, { "epoch": 50.0, "eval_cer": 0.04037235471628217, "eval_loss": 0.14019279181957245, "eval_runtime": 152.1041, "eval_samples_per_second": 238.981, "eval_steps_per_second": 7.469, "eval_wer": 0.21485070781064639, "step": 150600 }, { "epoch": 51.0, "grad_norm": 6.761490821838379, "learning_rate": 3.4661019222772204e-05, "loss": 0.5087, "step": 153612 }, { "epoch": 51.0, "eval_cer": 0.04063826997367439, "eval_loss": 0.14041763544082642, "eval_runtime": 154.2132, "eval_samples_per_second": 235.713, "eval_steps_per_second": 7.366, "eval_wer": 0.21654889997959403, "step": 153612 }, { "epoch": 52.0, "grad_norm": 4.379857540130615, "learning_rate": 3.395441801816303e-05, "loss": 0.5066, "step": 156624 }, { "epoch": 52.0, "eval_cer": 0.04042691319150575, "eval_loss": 0.13891662657260895, "eval_runtime": 159.6511, "eval_samples_per_second": 227.684, "eval_steps_per_second": 7.116, "eval_wer": 0.215715367774469, "step": 156624 }, { "epoch": 53.0, "grad_norm": 19.332077026367188, "learning_rate": 3.324734731109233e-05, "loss": 0.5037, "step": 159636 }, { "epoch": 53.0, "eval_cer": 0.03977129453965943, "eval_loss": 0.1375364065170288, "eval_runtime": 164.6123, "eval_samples_per_second": 220.822, "eval_steps_per_second": 6.901, "eval_wer": 0.2132078538793834, "step": 159636 }, { "epoch": 54.0, "grad_norm": 11.953184127807617, "learning_rate": 3.254098085771392e-05, "loss": 0.5024, "step": 162648 }, { "epoch": 54.0, "eval_cer": 0.039842816574406296, "eval_loss": 0.13721118867397308, "eval_runtime": 156.3278, "eval_samples_per_second": 232.524, "eval_steps_per_second": 7.267, "eval_wer": 0.21213221688438805, "step": 162648 }, { "epoch": 55.0, "grad_norm": 7.574815273284912, "learning_rate": 3.183391015064322e-05, "loss": 0.5, "step": 165660 }, { "epoch": 55.0, "eval_cer": 0.04010139623849114, "eval_loss": 0.13785392045974731, "eval_runtime": 152.1653, "eval_samples_per_second": 238.885, "eval_steps_per_second": 7.466, "eval_wer": 0.2131732674808305, "step": 165660 }, { "epoch": 56.0, "grad_norm": 11.841280937194824, "learning_rate": 3.112707419480328e-05, "loss": 0.4976, "step": 168672 }, { "epoch": 56.0, "eval_cer": 0.038647114865304755, "eval_loss": 0.13485907018184662, "eval_runtime": 153.6401, "eval_samples_per_second": 236.592, "eval_steps_per_second": 7.394, "eval_wer": 0.20721403101016495, "step": 168672 }, { "epoch": 57.0, "grad_norm": 14.78765869140625, "learning_rate": 3.0420238238963334e-05, "loss": 0.4948, "step": 171684 }, { "epoch": 57.0, "eval_cer": 0.0392779758897387, "eval_loss": 0.13624149560928345, "eval_runtime": 152.0309, "eval_samples_per_second": 239.096, "eval_steps_per_second": 7.472, "eval_wer": 0.2102472581632547, "step": 171684 }, { "epoch": 58.0, "grad_norm": 7.023338794708252, "learning_rate": 2.9713402283123397e-05, "loss": 0.4933, "step": 174696 }, { "epoch": 58.0, "eval_cer": 0.038942372495926456, "eval_loss": 0.13551433384418488, "eval_runtime": 151.9877, "eval_samples_per_second": 239.164, "eval_steps_per_second": 7.474, "eval_wer": 0.20676094918912188, "step": 174696 }, { "epoch": 59.0, "grad_norm": 6.568221092224121, "learning_rate": 2.9006801078514222e-05, "loss": 0.4924, "step": 177708 }, { "epoch": 59.0, "eval_cer": 0.03848756571086942, "eval_loss": 0.13611619174480438, "eval_runtime": 177.5315, "eval_samples_per_second": 204.752, "eval_steps_per_second": 6.399, "eval_wer": 0.20549508700208555, "step": 177708 }, { "epoch": 60.0, "grad_norm": 23.931304931640625, "learning_rate": 2.8300199873905052e-05, "loss": 0.4901, "step": 180720 }, { "epoch": 60.0, "eval_cer": 0.03840274791325294, "eval_loss": 0.13464532792568207, "eval_runtime": 153.1916, "eval_samples_per_second": 237.285, "eval_steps_per_second": 7.416, "eval_wer": 0.2053671173274398, "step": 180720 }, { "epoch": 61.0, "grad_norm": 18.084495544433594, "learning_rate": 2.759312916683434e-05, "loss": 0.4898, "step": 183732 }, { "epoch": 61.0, "eval_cer": 0.038370654692533195, "eval_loss": 0.13341517746448517, "eval_runtime": 151.6501, "eval_samples_per_second": 239.696, "eval_steps_per_second": 7.491, "eval_wer": 0.2050074187824896, "step": 183732 }, { "epoch": 62.0, "grad_norm": 2.890596866607666, "learning_rate": 2.6886293210994404e-05, "loss": 0.4873, "step": 186744 }, { "epoch": 62.0, "eval_cer": 0.038351857234683054, "eval_loss": 0.1341981142759323, "eval_runtime": 150.7747, "eval_samples_per_second": 241.088, "eval_steps_per_second": 7.534, "eval_wer": 0.20600696570066857, "step": 186744 }, { "epoch": 63.0, "grad_norm": 11.759881973266602, "learning_rate": 2.617969200638523e-05, "loss": 0.4865, "step": 189756 }, { "epoch": 63.0, "eval_cer": 0.03869296232347583, "eval_loss": 0.13458400964736938, "eval_runtime": 152.6729, "eval_samples_per_second": 238.091, "eval_steps_per_second": 7.441, "eval_wer": 0.20699613669928163, "step": 189756 }, { "epoch": 64.0, "grad_norm": 13.245360374450684, "learning_rate": 2.547309080177606e-05, "loss": 0.4842, "step": 192768 }, { "epoch": 64.0, "eval_cer": 0.03874110215455545, "eval_loss": 0.13456492125988007, "eval_runtime": 153.3987, "eval_samples_per_second": 236.964, "eval_steps_per_second": 7.406, "eval_wer": 0.2072278655695861, "step": 192768 }, { "epoch": 65.0, "grad_norm": 11.684355735778809, "learning_rate": 2.4766020094705352e-05, "loss": 0.4822, "step": 195780 }, { "epoch": 65.0, "eval_cer": 0.03811941062175572, "eval_loss": 0.13252592086791992, "eval_runtime": 156.7414, "eval_samples_per_second": 231.911, "eval_steps_per_second": 7.248, "eval_wer": 0.20395599226648128, "step": 195780 }, { "epoch": 66.0, "grad_norm": 25.19974708557129, "learning_rate": 2.405918413886541e-05, "loss": 0.4814, "step": 198792 }, { "epoch": 66.0, "eval_cer": 0.037090135185815165, "eval_loss": 0.13119570910930634, "eval_runtime": 154.348, "eval_samples_per_second": 235.507, "eval_steps_per_second": 7.36, "eval_wer": 0.19890983671761242, "step": 198792 }, { "epoch": 67.0, "grad_norm": 12.580814361572266, "learning_rate": 2.335234818302547e-05, "loss": 0.4796, "step": 201804 }, { "epoch": 67.0, "eval_cer": 0.03740739959635898, "eval_loss": 0.13117973506450653, "eval_runtime": 162.9804, "eval_samples_per_second": 223.033, "eval_steps_per_second": 6.97, "eval_wer": 0.1999750977930419, "step": 201804 }, { "epoch": 68.0, "grad_norm": 11.110360145568848, "learning_rate": 2.2645746978416297e-05, "loss": 0.4771, "step": 204816 }, { "epoch": 68.0, "eval_cer": 0.037213006373713636, "eval_loss": 0.1303921490907669, "eval_runtime": 152.708, "eval_samples_per_second": 238.036, "eval_steps_per_second": 7.439, "eval_wer": 0.1997191584437504, "step": 204816 }, { "epoch": 69.0, "grad_norm": 4.782271385192871, "learning_rate": 2.193891102257636e-05, "loss": 0.4756, "step": 207828 }, { "epoch": 69.0, "eval_cer": 0.037708617396542916, "eval_loss": 0.13083402812480927, "eval_runtime": 152.8061, "eval_samples_per_second": 237.883, "eval_steps_per_second": 7.434, "eval_wer": 0.20086396823585156, "step": 207828 }, { "epoch": 70.0, "grad_norm": 7.983453750610352, "learning_rate": 2.1232544569197956e-05, "loss": 0.4745, "step": 210840 }, { "epoch": 70.0, "eval_cer": 0.0370488724734612, "eval_loss": 0.13116249442100525, "eval_runtime": 151.9447, "eval_samples_per_second": 239.232, "eval_steps_per_second": 7.476, "eval_wer": 0.19823886058568607, "step": 210840 }, { "epoch": 71.0, "grad_norm": 23.63794708251953, "learning_rate": 2.052547386212725e-05, "loss": 0.4738, "step": 213852 }, { "epoch": 71.0, "eval_cer": 0.037366136884005016, "eval_loss": 0.1306936889886856, "eval_runtime": 154.0224, "eval_samples_per_second": 236.005, "eval_steps_per_second": 7.376, "eval_wer": 0.20006848106913475, "step": 213852 }, { "epoch": 72.0, "grad_norm": 10.838956832885742, "learning_rate": 1.9818637906287305e-05, "loss": 0.473, "step": 216864 }, { "epoch": 72.0, "eval_cer": 0.0372285945094918, "eval_loss": 0.13071005046367645, "eval_runtime": 154.8642, "eval_samples_per_second": 234.722, "eval_steps_per_second": 7.335, "eval_wer": 0.19911043782921928, "step": 216864 }, { "epoch": 73.0, "grad_norm": 11.969744682312012, "learning_rate": 1.9111801950447367e-05, "loss": 0.472, "step": 219876 }, { "epoch": 73.0, "eval_cer": 0.03662890975661418, "eval_loss": 0.12924158573150635, "eval_runtime": 154.3055, "eval_samples_per_second": 235.572, "eval_steps_per_second": 7.362, "eval_wer": 0.19607375203627422, "step": 219876 }, { "epoch": 74.0, "grad_norm": 6.115599155426025, "learning_rate": 1.840496599460743e-05, "loss": 0.4693, "step": 222888 }, { "epoch": 74.0, "eval_cer": 0.036412051279465014, "eval_loss": 0.12866230309009552, "eval_runtime": 157.4725, "eval_samples_per_second": 230.834, "eval_steps_per_second": 7.214, "eval_wer": 0.19521600935216216, "step": 222888 }, { "epoch": 75.0, "grad_norm": 44.51272964477539, "learning_rate": 1.7698130038767486e-05, "loss": 0.4693, "step": 225900 }, { "epoch": 75.0, "eval_cer": 0.03628138602367746, "eval_loss": 0.12844808399677277, "eval_runtime": 164.6927, "eval_samples_per_second": 220.714, "eval_steps_per_second": 6.898, "eval_wer": 0.1944724017832747, "step": 225900 }, { "epoch": 76.0, "grad_norm": 9.162590026855469, "learning_rate": 1.6991528834158312e-05, "loss": 0.4664, "step": 228912 }, { "epoch": 76.0, "eval_cer": 0.03683797416587427, "eval_loss": 0.12876588106155396, "eval_runtime": 152.2551, "eval_samples_per_second": 238.744, "eval_steps_per_second": 7.461, "eval_wer": 0.19688999104212276, "step": 228912 }, { "epoch": 77.0, "grad_norm": 3.896597146987915, "learning_rate": 1.6284692878318375e-05, "loss": 0.4651, "step": 231924 }, { "epoch": 77.0, "eval_cer": 0.03683384789463887, "eval_loss": 0.12869854271411896, "eval_runtime": 168.0203, "eval_samples_per_second": 216.343, "eval_steps_per_second": 6.761, "eval_wer": 0.1971009680732955, "step": 231924 }, { "epoch": 78.0, "grad_norm": 15.388148307800293, "learning_rate": 1.5577856922478434e-05, "loss": 0.4641, "step": 234936 }, { "epoch": 78.0, "eval_cer": 0.03656747416266495, "eval_loss": 0.1286703646183014, "eval_runtime": 154.3849, "eval_samples_per_second": 235.45, "eval_steps_per_second": 7.358, "eval_wer": 0.1952090920724516, "step": 234936 }, { "epoch": 79.0, "grad_norm": 2.635507345199585, "learning_rate": 1.4871020966638496e-05, "loss": 0.462, "step": 237948 }, { "epoch": 79.0, "eval_cer": 0.03641342670321015, "eval_loss": 0.12868022918701172, "eval_runtime": 152.2309, "eval_samples_per_second": 238.782, "eval_steps_per_second": 7.462, "eval_wer": 0.19447586042313, "step": 237948 }, { "epoch": 80.0, "grad_norm": 5.287237167358398, "learning_rate": 1.4163950259567787e-05, "loss": 0.4608, "step": 240960 }, { "epoch": 80.0, "eval_cer": 0.036332276702247354, "eval_loss": 0.12745150923728943, "eval_runtime": 154.8487, "eval_samples_per_second": 234.745, "eval_steps_per_second": 7.336, "eval_wer": 0.19517796431375398, "step": 240960 }, { "epoch": 81.0, "grad_norm": 11.394750595092773, "learning_rate": 1.3457349054958616e-05, "loss": 0.4594, "step": 243972 }, { "epoch": 81.0, "eval_cer": 0.0360989131401566, "eval_loss": 0.12770119309425354, "eval_runtime": 152.6766, "eval_samples_per_second": 238.085, "eval_steps_per_second": 7.441, "eval_wer": 0.19389480892744118, "step": 243972 }, { "epoch": 82.0, "grad_norm": 9.252601623535156, "learning_rate": 1.2750278347887909e-05, "loss": 0.4595, "step": 246984 }, { "epoch": 82.0, "eval_cer": 0.03594715805361035, "eval_loss": 0.12681059539318085, "eval_runtime": 152.0339, "eval_samples_per_second": 239.091, "eval_steps_per_second": 7.472, "eval_wer": 0.19371841829482137, "step": 246984 }, { "epoch": 83.0, "grad_norm": 16.776588439941406, "learning_rate": 1.2043677143278735e-05, "loss": 0.4575, "step": 249996 }, { "epoch": 83.0, "eval_cer": 0.03615393008996188, "eval_loss": 0.12722131609916687, "eval_runtime": 152.0731, "eval_samples_per_second": 239.03, "eval_steps_per_second": 7.47, "eval_wer": 0.1942475901926808, "step": 249996 }, { "epoch": 84.0, "grad_norm": 8.937053680419922, "learning_rate": 1.1336606436208028e-05, "loss": 0.4569, "step": 253008 }, { "epoch": 84.0, "eval_cer": 0.03605260720740382, "eval_loss": 0.12680456042289734, "eval_runtime": 152.9563, "eval_samples_per_second": 237.65, "eval_steps_per_second": 7.427, "eval_wer": 0.19341059934770052, "step": 253008 }, { "epoch": 85.0, "grad_norm": 13.364398956298828, "learning_rate": 1.0630005231598857e-05, "loss": 0.4552, "step": 256020 }, { "epoch": 85.0, "eval_cer": 0.035670697880838785, "eval_loss": 0.12619073688983917, "eval_runtime": 156.3815, "eval_samples_per_second": 232.444, "eval_steps_per_second": 7.264, "eval_wer": 0.1916293998222259, "step": 256020 }, { "epoch": 86.0, "grad_norm": 9.33234691619873, "learning_rate": 9.923404026989685e-06, "loss": 0.4538, "step": 259032 }, { "epoch": 86.0, "eval_cer": 0.03549051737022648, "eval_loss": 0.12592804431915283, "eval_runtime": 156.5096, "eval_samples_per_second": 232.254, "eval_steps_per_second": 7.258, "eval_wer": 0.19070940162071864, "step": 259032 }, { "epoch": 87.0, "grad_norm": 4.286988735198975, "learning_rate": 9.216568071149744e-06, "loss": 0.4532, "step": 262044 }, { "epoch": 87.0, "eval_cer": 0.03551573347222057, "eval_loss": 0.12575581669807434, "eval_runtime": 155.6329, "eval_samples_per_second": 233.562, "eval_steps_per_second": 7.299, "eval_wer": 0.19122473895915693, "step": 262044 }, { "epoch": 88.0, "grad_norm": 7.920403957366943, "learning_rate": 8.509732115309804e-06, "loss": 0.4524, "step": 265056 }, { "epoch": 88.0, "eval_cer": 0.03555103601501229, "eval_loss": 0.1259673833847046, "eval_runtime": 166.3365, "eval_samples_per_second": 218.533, "eval_steps_per_second": 6.83, "eval_wer": 0.19095150641058897, "step": 265056 }, { "epoch": 89.0, "grad_norm": 9.81010913848877, "learning_rate": 7.802896159469865e-06, "loss": 0.4501, "step": 268068 }, { "epoch": 89.0, "eval_cer": 0.03596458008771536, "eval_loss": 0.12655647099018097, "eval_runtime": 155.3246, "eval_samples_per_second": 234.026, "eval_steps_per_second": 7.314, "eval_wer": 0.19276729233461648, "step": 268068 }, { "epoch": 90.0, "grad_norm": 10.394911766052246, "learning_rate": 7.096060203629924e-06, "loss": 0.4491, "step": 271080 }, { "epoch": 90.0, "eval_cer": 0.03546667669197752, "eval_loss": 0.12519720196723938, "eval_runtime": 177.0207, "eval_samples_per_second": 205.343, "eval_steps_per_second": 6.417, "eval_wer": 0.19042579315258482, "step": 271080 }, { "epoch": 91.0, "grad_norm": 9.381885528564453, "learning_rate": 6.3892242477899846e-06, "loss": 0.4486, "step": 274092 }, { "epoch": 91.0, "eval_cer": 0.03518288092589859, "eval_loss": 0.12525735795497894, "eval_runtime": 173.742, "eval_samples_per_second": 209.218, "eval_steps_per_second": 6.538, "eval_wer": 0.18893166073509932, "step": 274092 }, { "epoch": 92.0, "grad_norm": 26.816091537475586, "learning_rate": 5.682623043180811e-06, "loss": 0.4487, "step": 277104 }, { "epoch": 92.0, "eval_cer": 0.035369480080654846, "eval_loss": 0.12525933980941772, "eval_runtime": 153.4339, "eval_samples_per_second": 236.91, "eval_steps_per_second": 7.404, "eval_wer": 0.190249402519965, "step": 277104 }, { "epoch": 93.0, "grad_norm": 12.131733894348145, "learning_rate": 4.975552336110105e-06, "loss": 0.4471, "step": 280116 }, { "epoch": 93.0, "eval_cer": 0.03521634957036347, "eval_loss": 0.1251526027917862, "eval_runtime": 176.0621, "eval_samples_per_second": 206.461, "eval_steps_per_second": 6.452, "eval_wer": 0.1893743666365765, "step": 280116 }, { "epoch": 94.0, "grad_norm": 14.601805686950684, "learning_rate": 4.2687163802701646e-06, "loss": 0.4458, "step": 283128 }, { "epoch": 94.0, "eval_cer": 0.0351705021121924, "eval_loss": 0.1253127008676529, "eval_runtime": 177.0148, "eval_samples_per_second": 205.35, "eval_steps_per_second": 6.418, "eval_wer": 0.18914263776627205, "step": 283128 }, { "epoch": 95.0, "grad_norm": 3.8078722953796387, "learning_rate": 3.562115175660992e-06, "loss": 0.4449, "step": 286140 }, { "epoch": 95.0, "eval_cer": 0.035079265670431965, "eval_loss": 0.12475291639566422, "eval_runtime": 168.7285, "eval_samples_per_second": 215.435, "eval_steps_per_second": 6.733, "eval_wer": 0.1884266993162269, "step": 286140 }, { "epoch": 96.0, "grad_norm": 4.562527656555176, "learning_rate": 2.855044468590285e-06, "loss": 0.4434, "step": 289152 }, { "epoch": 96.0, "eval_cer": 0.035126030077766456, "eval_loss": 0.1246921494603157, "eval_runtime": 179.6539, "eval_samples_per_second": 202.333, "eval_steps_per_second": 6.323, "eval_wer": 0.18909421680829797, "step": 289152 }, { "epoch": 97.0, "grad_norm": 8.47780990600586, "learning_rate": 2.1482085127503454e-06, "loss": 0.4435, "step": 292164 }, { "epoch": 97.0, "eval_cer": 0.035188841095460825, "eval_loss": 0.12471602112054825, "eval_runtime": 152.6845, "eval_samples_per_second": 238.073, "eval_steps_per_second": 7.44, "eval_wer": 0.18912880320685088, "step": 292164 }, { "epoch": 98.0, "grad_norm": 16.65981101989746, "learning_rate": 1.4413725569104053e-06, "loss": 0.4444, "step": 295176 }, { "epoch": 98.0, "eval_cer": 0.03511594363696882, "eval_loss": 0.12448572367429733, "eval_runtime": 159.0007, "eval_samples_per_second": 228.615, "eval_steps_per_second": 7.145, "eval_wer": 0.1887518114626242, "step": 295176 }, { "epoch": 99.0, "grad_norm": 10.248809814453125, "learning_rate": 7.345366010704655e-07, "loss": 0.4429, "step": 298188 }, { "epoch": 99.0, "eval_cer": 0.03512648855234817, "eval_loss": 0.12444119900465012, "eval_runtime": 155.6325, "eval_samples_per_second": 233.563, "eval_steps_per_second": 7.299, "eval_wer": 0.18870684914450545, "step": 298188 } ], "logging_steps": 500, "max_steps": 301200, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 8 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0954167922548843e+21, "train_batch_size": 64, "trial_name": null, "trial_params": null }