|
{ |
|
"best_global_step": 286140, |
|
"best_metric": 0.1884266993162269, |
|
"best_model_checkpoint": "wav2vec2-asr-africa-base-fintuned-luganda-400hrs-v0.1/checkpoint-286140", |
|
"epoch": 99.0, |
|
"eval_steps": 500, |
|
"global_step": 298188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.066422462463379, |
|
"learning_rate": 6.98140770252324e-05, |
|
"loss": 3.2575, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.10930492502565166, |
|
"eval_loss": 0.37255266308784485, |
|
"eval_runtime": 151.5408, |
|
"eval_samples_per_second": 239.869, |
|
"eval_steps_per_second": 7.496, |
|
"eval_wer": 0.534090083733671, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 15.221166610717773, |
|
"learning_rate": 6.929480730277542e-05, |
|
"loss": 0.8396, |
|
"step": 6024 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.07810481279107405, |
|
"eval_loss": 0.2538328468799591, |
|
"eval_runtime": 152.0166, |
|
"eval_samples_per_second": 239.119, |
|
"eval_steps_per_second": 7.473, |
|
"eval_wer": 0.3983557626127949, |
|
"step": 6024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.060754299163818, |
|
"learning_rate": 6.858773659570472e-05, |
|
"loss": 0.7487, |
|
"step": 9036 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.0682448584368034, |
|
"eval_loss": 0.22769133746623993, |
|
"eval_runtime": 153.0969, |
|
"eval_samples_per_second": 237.431, |
|
"eval_steps_per_second": 7.42, |
|
"eval_wer": 0.35308562554689743, |
|
"step": 9036 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 9.472733497619629, |
|
"learning_rate": 6.788090063986478e-05, |
|
"loss": 0.7226, |
|
"step": 12048 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.06425062788093966, |
|
"eval_loss": 0.21326717734336853, |
|
"eval_runtime": 171.3308, |
|
"eval_samples_per_second": 212.163, |
|
"eval_steps_per_second": 6.63, |
|
"eval_wer": 0.33642189872410777, |
|
"step": 12048 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 23.48148536682129, |
|
"learning_rate": 6.717429943525561e-05, |
|
"loss": 0.7096, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.06122744648913919, |
|
"eval_loss": 0.20842401683330536, |
|
"eval_runtime": 174.4012, |
|
"eval_samples_per_second": 208.427, |
|
"eval_steps_per_second": 6.514, |
|
"eval_wer": 0.3211001241651708, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 7.9525041580200195, |
|
"learning_rate": 6.646746347941567e-05, |
|
"loss": 0.6979, |
|
"step": 18072 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.06395399482657282, |
|
"eval_loss": 0.20914477109909058, |
|
"eval_runtime": 153.4786, |
|
"eval_samples_per_second": 236.841, |
|
"eval_steps_per_second": 7.402, |
|
"eval_wer": 0.3287402596055075, |
|
"step": 18072 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 43.115169525146484, |
|
"learning_rate": 6.57608622748065e-05, |
|
"loss": 0.6899, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.06076026089037598, |
|
"eval_loss": 0.20185638964176178, |
|
"eval_runtime": 173.3256, |
|
"eval_samples_per_second": 209.721, |
|
"eval_steps_per_second": 6.554, |
|
"eval_wer": 0.31623727652863237, |
|
"step": 21084 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.6848602294921875, |
|
"learning_rate": 6.505426107019733e-05, |
|
"loss": 0.6765, |
|
"step": 24096 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.060137193933831115, |
|
"eval_loss": 0.19728189706802368, |
|
"eval_runtime": 154.1162, |
|
"eval_samples_per_second": 235.861, |
|
"eval_steps_per_second": 7.371, |
|
"eval_wer": 0.31059969356450884, |
|
"step": 24096 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.830124378204346, |
|
"learning_rate": 6.434719036312661e-05, |
|
"loss": 0.6701, |
|
"step": 27108 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.05820976679231927, |
|
"eval_loss": 0.19281432032585144, |
|
"eval_runtime": 156.5256, |
|
"eval_samples_per_second": 232.23, |
|
"eval_steps_per_second": 7.258, |
|
"eval_wer": 0.30471654717065966, |
|
"step": 27108 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": Infinity, |
|
"learning_rate": 6.364058915851744e-05, |
|
"loss": 0.6621, |
|
"step": 30120 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.058245986284274416, |
|
"eval_loss": 0.19237777590751648, |
|
"eval_runtime": 155.0055, |
|
"eval_samples_per_second": 234.508, |
|
"eval_steps_per_second": 7.329, |
|
"eval_wer": 0.3038691804061135, |
|
"step": 30120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 5.588762283325195, |
|
"learning_rate": 6.29337532026775e-05, |
|
"loss": 0.6554, |
|
"step": 33132 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.05662298626501848, |
|
"eval_loss": 0.18665704131126404, |
|
"eval_runtime": 153.969, |
|
"eval_samples_per_second": 236.087, |
|
"eval_steps_per_second": 7.378, |
|
"eval_wer": 0.2982627252006876, |
|
"step": 33132 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.955714702606201, |
|
"learning_rate": 6.222691724683756e-05, |
|
"loss": 0.6475, |
|
"step": 36144 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.05515495065438077, |
|
"eval_loss": 0.1829417496919632, |
|
"eval_runtime": 155.1287, |
|
"eval_samples_per_second": 234.322, |
|
"eval_steps_per_second": 7.323, |
|
"eval_wer": 0.2873610923768119, |
|
"step": 36144 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 31.985713958740234, |
|
"learning_rate": 6.152008129099762e-05, |
|
"loss": 0.6429, |
|
"step": 39156 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.05419032013446143, |
|
"eval_loss": 0.1801947057247162, |
|
"eval_runtime": 168.8898, |
|
"eval_samples_per_second": 215.229, |
|
"eval_steps_per_second": 6.726, |
|
"eval_wer": 0.28527553254407173, |
|
"step": 39156 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.557770252227783, |
|
"learning_rate": 6.081324533515768e-05, |
|
"loss": 0.6351, |
|
"step": 42168 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.055327337097104, |
|
"eval_loss": 0.18261073529720306, |
|
"eval_runtime": 159.2183, |
|
"eval_samples_per_second": 228.303, |
|
"eval_steps_per_second": 7.135, |
|
"eval_wer": 0.2872746263804296, |
|
"step": 42168 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 8.820505142211914, |
|
"learning_rate": 6.010640937931774e-05, |
|
"loss": 0.6319, |
|
"step": 45180 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.05439250742499585, |
|
"eval_loss": 0.17926117777824402, |
|
"eval_runtime": 152.0308, |
|
"eval_samples_per_second": 239.096, |
|
"eval_steps_per_second": 7.472, |
|
"eval_wer": 0.28315884495263394, |
|
"step": 45180 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 8.058792114257812, |
|
"learning_rate": 5.93995734234778e-05, |
|
"loss": 0.6251, |
|
"step": 48192 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.054798715904391546, |
|
"eval_loss": 0.1785019189119339, |
|
"eval_runtime": 154.575, |
|
"eval_samples_per_second": 235.161, |
|
"eval_steps_per_second": 7.349, |
|
"eval_wer": 0.283826362444705, |
|
"step": 48192 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.038857936859131, |
|
"learning_rate": 5.86925027164071e-05, |
|
"loss": 0.6172, |
|
"step": 51204 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.051710431121988164, |
|
"eval_loss": 0.17091116309165955, |
|
"eval_runtime": 154.6152, |
|
"eval_samples_per_second": 235.1, |
|
"eval_steps_per_second": 7.347, |
|
"eval_wer": 0.27192518270265037, |
|
"step": 51204 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.7985901511797926e-05, |
|
"loss": 0.6122, |
|
"step": 54216 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.05208454638066411, |
|
"eval_loss": 0.1720370054244995, |
|
"eval_runtime": 154.3969, |
|
"eval_samples_per_second": 235.432, |
|
"eval_steps_per_second": 7.358, |
|
"eval_wer": 0.27160698783596365, |
|
"step": 54216 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.123114109039307, |
|
"learning_rate": 5.727930030718875e-05, |
|
"loss": 0.6068, |
|
"step": 57228 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.0505266497520111, |
|
"eval_loss": 0.16939722001552582, |
|
"eval_runtime": 154.2835, |
|
"eval_samples_per_second": 235.605, |
|
"eval_steps_per_second": 7.363, |
|
"eval_wer": 0.26646744901100194, |
|
"step": 57228 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 11.014168739318848, |
|
"learning_rate": 5.657222960011804e-05, |
|
"loss": 0.6035, |
|
"step": 60240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.049698644657441546, |
|
"eval_loss": 0.1669510304927826, |
|
"eval_runtime": 155.9471, |
|
"eval_samples_per_second": 233.092, |
|
"eval_steps_per_second": 7.285, |
|
"eval_wer": 0.26278053892526226, |
|
"step": 60240 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 7.567544937133789, |
|
"learning_rate": 5.5865158893047335e-05, |
|
"loss": 0.5957, |
|
"step": 63252 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.050415698903237105, |
|
"eval_loss": 0.1704263538122177, |
|
"eval_runtime": 155.0829, |
|
"eval_samples_per_second": 234.391, |
|
"eval_steps_per_second": 7.325, |
|
"eval_wer": 0.2643818891782618, |
|
"step": 63252 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 3.353114366531372, |
|
"learning_rate": 5.5158557688438164e-05, |
|
"loss": 0.5909, |
|
"step": 66264 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.049318569229203364, |
|
"eval_loss": 0.16528591513633728, |
|
"eval_runtime": 155.149, |
|
"eval_samples_per_second": 234.291, |
|
"eval_steps_per_second": 7.322, |
|
"eval_wer": 0.25990640920551583, |
|
"step": 66264 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 5.273142337799072, |
|
"learning_rate": 5.445172173259822e-05, |
|
"loss": 0.5879, |
|
"step": 69276 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.048735389561267335, |
|
"eval_loss": 0.16745983064174652, |
|
"eval_runtime": 155.8132, |
|
"eval_samples_per_second": 233.292, |
|
"eval_steps_per_second": 7.291, |
|
"eval_wer": 0.2573400984328903, |
|
"step": 69276 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 12.098519325256348, |
|
"learning_rate": 5.374512052798905e-05, |
|
"loss": 0.5966, |
|
"step": 72288 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.05103463958854657, |
|
"eval_loss": 0.19431033730506897, |
|
"eval_runtime": 154.0761, |
|
"eval_samples_per_second": 235.922, |
|
"eval_steps_per_second": 7.373, |
|
"eval_wer": 0.2738551037419025, |
|
"step": 72288 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 12.023294448852539, |
|
"learning_rate": 5.3038519323379875e-05, |
|
"loss": 0.6444, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.05154996501838942, |
|
"eval_loss": 0.1868334412574768, |
|
"eval_runtime": 152.5369, |
|
"eval_samples_per_second": 238.303, |
|
"eval_steps_per_second": 7.447, |
|
"eval_wer": 0.27229871580702175, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 9.066435813903809, |
|
"learning_rate": 5.2331448616309165e-05, |
|
"loss": 0.5999, |
|
"step": 78312 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.04910904634536157, |
|
"eval_loss": 0.16771361231803894, |
|
"eval_runtime": 154.4851, |
|
"eval_samples_per_second": 235.298, |
|
"eval_steps_per_second": 7.353, |
|
"eval_wer": 0.25782430801263095, |
|
"step": 78312 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 9.274683952331543, |
|
"learning_rate": 5.1624847411699994e-05, |
|
"loss": 0.5911, |
|
"step": 81324 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.04738747429103783, |
|
"eval_loss": 0.16794191300868988, |
|
"eval_runtime": 154.6471, |
|
"eval_samples_per_second": 235.051, |
|
"eval_steps_per_second": 7.346, |
|
"eval_wer": 0.25102462205712983, |
|
"step": 81324 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 5.650504112243652, |
|
"learning_rate": 5.091777670462929e-05, |
|
"loss": 0.586, |
|
"step": 84336 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.04840666328618075, |
|
"eval_loss": 0.1722731739282608, |
|
"eval_runtime": 153.0438, |
|
"eval_samples_per_second": 237.514, |
|
"eval_steps_per_second": 7.423, |
|
"eval_wer": 0.25386416537832335, |
|
"step": 84336 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 18.539613723754883, |
|
"learning_rate": 5.021070599755859e-05, |
|
"loss": 0.5816, |
|
"step": 87348 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.04769969548118283, |
|
"eval_loss": 0.16775010526180267, |
|
"eval_runtime": 156.0962, |
|
"eval_samples_per_second": 232.869, |
|
"eval_steps_per_second": 7.278, |
|
"eval_wer": 0.25264326550940575, |
|
"step": 87348 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 4.343358039855957, |
|
"learning_rate": 4.950457429541094e-05, |
|
"loss": 0.5886, |
|
"step": 90360 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.04993246669411401, |
|
"eval_loss": 0.18236766755580902, |
|
"eval_runtime": 151.7294, |
|
"eval_samples_per_second": 239.571, |
|
"eval_steps_per_second": 7.487, |
|
"eval_wer": 0.2629396363586056, |
|
"step": 90360 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 13.675621032714844, |
|
"learning_rate": 4.879773833957101e-05, |
|
"loss": 0.5978, |
|
"step": 93372 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.04701886072734242, |
|
"eval_loss": 0.16201142966747284, |
|
"eval_runtime": 152.4808, |
|
"eval_samples_per_second": 238.391, |
|
"eval_steps_per_second": 7.45, |
|
"eval_wer": 0.24908778373816712, |
|
"step": 93372 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 5.842775821685791, |
|
"learning_rate": 4.809066763250029e-05, |
|
"loss": 0.5722, |
|
"step": 96384 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.04652920987407537, |
|
"eval_loss": 0.15837915241718292, |
|
"eval_runtime": 153.9577, |
|
"eval_samples_per_second": 236.104, |
|
"eval_steps_per_second": 7.379, |
|
"eval_wer": 0.24719590773732322, |
|
"step": 96384 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 12.179231643676758, |
|
"learning_rate": 4.738359692542959e-05, |
|
"loss": 0.5615, |
|
"step": 99396 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.046122542920097966, |
|
"eval_loss": 0.15639054775238037, |
|
"eval_runtime": 165.4748, |
|
"eval_samples_per_second": 219.671, |
|
"eval_steps_per_second": 6.865, |
|
"eval_wer": 0.2421047898703356, |
|
"step": 99396 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 11.811565399169922, |
|
"learning_rate": 4.6676760969589654e-05, |
|
"loss": 0.5566, |
|
"step": 102408 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.04475858103950859, |
|
"eval_loss": 0.15303878486156464, |
|
"eval_runtime": 151.973, |
|
"eval_samples_per_second": 239.187, |
|
"eval_steps_per_second": 7.475, |
|
"eval_wer": 0.23680615361203053, |
|
"step": 102408 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 27.314680099487305, |
|
"learning_rate": 4.5970394516211246e-05, |
|
"loss": 0.5514, |
|
"step": 105420 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.04322590051284967, |
|
"eval_loss": 0.14988180994987488, |
|
"eval_runtime": 152.0916, |
|
"eval_samples_per_second": 239.001, |
|
"eval_steps_per_second": 7.469, |
|
"eval_wer": 0.2308780449000626, |
|
"step": 105420 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 6.874896049499512, |
|
"learning_rate": 4.526332380914054e-05, |
|
"loss": 0.5485, |
|
"step": 108432 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.043550500516700855, |
|
"eval_loss": 0.15108104050159454, |
|
"eval_runtime": 156.7097, |
|
"eval_samples_per_second": 231.958, |
|
"eval_steps_per_second": 7.249, |
|
"eval_wer": 0.23083308258194382, |
|
"step": 108432 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 18.988279342651367, |
|
"learning_rate": 4.4556722604531365e-05, |
|
"loss": 0.5451, |
|
"step": 111444 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 0.04385080136772137, |
|
"eval_loss": 0.15071320533752441, |
|
"eval_runtime": 172.1252, |
|
"eval_samples_per_second": 211.183, |
|
"eval_steps_per_second": 6.6, |
|
"eval_wer": 0.23185338133925454, |
|
"step": 111444 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 28.172042846679688, |
|
"learning_rate": 4.384988664869143e-05, |
|
"loss": 0.5433, |
|
"step": 114456 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 0.0433771971248142, |
|
"eval_loss": 0.14824804663658142, |
|
"eval_runtime": 152.6042, |
|
"eval_samples_per_second": 238.198, |
|
"eval_steps_per_second": 7.444, |
|
"eval_wer": 0.23122390888559166, |
|
"step": 114456 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 4.653916835784912, |
|
"learning_rate": 4.3143050692851484e-05, |
|
"loss": 0.5391, |
|
"step": 117468 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 0.04353903865215809, |
|
"eval_loss": 0.14684619009494781, |
|
"eval_runtime": 156.6046, |
|
"eval_samples_per_second": 232.113, |
|
"eval_steps_per_second": 7.254, |
|
"eval_wer": 0.22910722129415387, |
|
"step": 117468 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 5.8400702476501465, |
|
"learning_rate": 4.243621473701154e-05, |
|
"loss": 0.5347, |
|
"step": 120480 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.042970530170836796, |
|
"eval_loss": 0.1462726891040802, |
|
"eval_runtime": 178.4468, |
|
"eval_samples_per_second": 203.702, |
|
"eval_steps_per_second": 6.366, |
|
"eval_wer": 0.22744707416361443, |
|
"step": 120480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 16.060422897338867, |
|
"learning_rate": 4.172914402994084e-05, |
|
"loss": 0.5313, |
|
"step": 123492 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_cer": 0.04216269795786252, |
|
"eval_loss": 0.14503081142902374, |
|
"eval_runtime": 152.5692, |
|
"eval_samples_per_second": 238.253, |
|
"eval_steps_per_second": 7.446, |
|
"eval_wer": 0.22400226886774507, |
|
"step": 123492 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 8.755998611450195, |
|
"learning_rate": 4.102254282533167e-05, |
|
"loss": 0.5291, |
|
"step": 126504 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_cer": 0.04194263015864137, |
|
"eval_loss": 0.1446152627468109, |
|
"eval_runtime": 152.4116, |
|
"eval_samples_per_second": 238.499, |
|
"eval_steps_per_second": 7.454, |
|
"eval_wer": 0.224061065745285, |
|
"step": 126504 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 9.50841999053955, |
|
"learning_rate": 4.031570686949173e-05, |
|
"loss": 0.5269, |
|
"step": 129516 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_cer": 0.04270048864220919, |
|
"eval_loss": 0.14530107378959656, |
|
"eval_runtime": 153.731, |
|
"eval_samples_per_second": 236.452, |
|
"eval_steps_per_second": 7.39, |
|
"eval_wer": 0.22547219080624353, |
|
"step": 129516 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 28.027828216552734, |
|
"learning_rate": 3.960887091365178e-05, |
|
"loss": 0.5253, |
|
"step": 132528 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.042549192030244654, |
|
"eval_loss": 0.14459766447544098, |
|
"eval_runtime": 179.3972, |
|
"eval_samples_per_second": 202.623, |
|
"eval_steps_per_second": 6.332, |
|
"eval_wer": 0.22531309337290018, |
|
"step": 132528 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 9.71485710144043, |
|
"learning_rate": 3.890226970904261e-05, |
|
"loss": 0.523, |
|
"step": 135540 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_cer": 0.041189356420890666, |
|
"eval_loss": 0.1429988592863083, |
|
"eval_runtime": 151.6718, |
|
"eval_samples_per_second": 239.662, |
|
"eval_steps_per_second": 7.49, |
|
"eval_wer": 0.22018738910735963, |
|
"step": 135540 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 13.113300323486328, |
|
"learning_rate": 3.819519900197191e-05, |
|
"loss": 0.5192, |
|
"step": 138552 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_cer": 0.040866590315366325, |
|
"eval_loss": 0.14137160778045654, |
|
"eval_runtime": 152.9576, |
|
"eval_samples_per_second": 237.648, |
|
"eval_steps_per_second": 7.427, |
|
"eval_wer": 0.21718528971296747, |
|
"step": 138552 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 15.17225456237793, |
|
"learning_rate": 3.7488597797362736e-05, |
|
"loss": 0.518, |
|
"step": 141564 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_cer": 0.040502561497488015, |
|
"eval_loss": 0.14037571847438812, |
|
"eval_runtime": 151.9659, |
|
"eval_samples_per_second": 239.198, |
|
"eval_steps_per_second": 7.475, |
|
"eval_wer": 0.21598168304332638, |
|
"step": 141564 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 5.159524917602539, |
|
"learning_rate": 3.678152709029203e-05, |
|
"loss": 0.5139, |
|
"step": 144576 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.04006930301777139, |
|
"eval_loss": 0.13999390602111816, |
|
"eval_runtime": 161.9319, |
|
"eval_samples_per_second": 224.477, |
|
"eval_steps_per_second": 7.015, |
|
"eval_wer": 0.2143319118323528, |
|
"step": 144576 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 5.24137020111084, |
|
"learning_rate": 3.6074925885682855e-05, |
|
"loss": 0.5133, |
|
"step": 147588 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_cer": 0.04118523014965527, |
|
"eval_loss": 0.14138683676719666, |
|
"eval_runtime": 154.3411, |
|
"eval_samples_per_second": 235.517, |
|
"eval_steps_per_second": 7.36, |
|
"eval_wer": 0.21796694232026315, |
|
"step": 147588 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 4.531148910522461, |
|
"learning_rate": 3.5367855178612144e-05, |
|
"loss": 0.5114, |
|
"step": 150600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_cer": 0.04037235471628217, |
|
"eval_loss": 0.14019279181957245, |
|
"eval_runtime": 152.1041, |
|
"eval_samples_per_second": 238.981, |
|
"eval_steps_per_second": 7.469, |
|
"eval_wer": 0.21485070781064639, |
|
"step": 150600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"grad_norm": 6.761490821838379, |
|
"learning_rate": 3.4661019222772204e-05, |
|
"loss": 0.5087, |
|
"step": 153612 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_cer": 0.04063826997367439, |
|
"eval_loss": 0.14041763544082642, |
|
"eval_runtime": 154.2132, |
|
"eval_samples_per_second": 235.713, |
|
"eval_steps_per_second": 7.366, |
|
"eval_wer": 0.21654889997959403, |
|
"step": 153612 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 4.379857540130615, |
|
"learning_rate": 3.395441801816303e-05, |
|
"loss": 0.5066, |
|
"step": 156624 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_cer": 0.04042691319150575, |
|
"eval_loss": 0.13891662657260895, |
|
"eval_runtime": 159.6511, |
|
"eval_samples_per_second": 227.684, |
|
"eval_steps_per_second": 7.116, |
|
"eval_wer": 0.215715367774469, |
|
"step": 156624 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"grad_norm": 19.332077026367188, |
|
"learning_rate": 3.324734731109233e-05, |
|
"loss": 0.5037, |
|
"step": 159636 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_cer": 0.03977129453965943, |
|
"eval_loss": 0.1375364065170288, |
|
"eval_runtime": 164.6123, |
|
"eval_samples_per_second": 220.822, |
|
"eval_steps_per_second": 6.901, |
|
"eval_wer": 0.2132078538793834, |
|
"step": 159636 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 11.953184127807617, |
|
"learning_rate": 3.254098085771392e-05, |
|
"loss": 0.5024, |
|
"step": 162648 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_cer": 0.039842816574406296, |
|
"eval_loss": 0.13721118867397308, |
|
"eval_runtime": 156.3278, |
|
"eval_samples_per_second": 232.524, |
|
"eval_steps_per_second": 7.267, |
|
"eval_wer": 0.21213221688438805, |
|
"step": 162648 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 7.574815273284912, |
|
"learning_rate": 3.183391015064322e-05, |
|
"loss": 0.5, |
|
"step": 165660 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_cer": 0.04010139623849114, |
|
"eval_loss": 0.13785392045974731, |
|
"eval_runtime": 152.1653, |
|
"eval_samples_per_second": 238.885, |
|
"eval_steps_per_second": 7.466, |
|
"eval_wer": 0.2131732674808305, |
|
"step": 165660 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 11.841280937194824, |
|
"learning_rate": 3.112707419480328e-05, |
|
"loss": 0.4976, |
|
"step": 168672 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_cer": 0.038647114865304755, |
|
"eval_loss": 0.13485907018184662, |
|
"eval_runtime": 153.6401, |
|
"eval_samples_per_second": 236.592, |
|
"eval_steps_per_second": 7.394, |
|
"eval_wer": 0.20721403101016495, |
|
"step": 168672 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"grad_norm": 14.78765869140625, |
|
"learning_rate": 3.0420238238963334e-05, |
|
"loss": 0.4948, |
|
"step": 171684 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_cer": 0.0392779758897387, |
|
"eval_loss": 0.13624149560928345, |
|
"eval_runtime": 152.0309, |
|
"eval_samples_per_second": 239.096, |
|
"eval_steps_per_second": 7.472, |
|
"eval_wer": 0.2102472581632547, |
|
"step": 171684 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"grad_norm": 7.023338794708252, |
|
"learning_rate": 2.9713402283123397e-05, |
|
"loss": 0.4933, |
|
"step": 174696 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_cer": 0.038942372495926456, |
|
"eval_loss": 0.13551433384418488, |
|
"eval_runtime": 151.9877, |
|
"eval_samples_per_second": 239.164, |
|
"eval_steps_per_second": 7.474, |
|
"eval_wer": 0.20676094918912188, |
|
"step": 174696 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"grad_norm": 6.568221092224121, |
|
"learning_rate": 2.9006801078514222e-05, |
|
"loss": 0.4924, |
|
"step": 177708 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_cer": 0.03848756571086942, |
|
"eval_loss": 0.13611619174480438, |
|
"eval_runtime": 177.5315, |
|
"eval_samples_per_second": 204.752, |
|
"eval_steps_per_second": 6.399, |
|
"eval_wer": 0.20549508700208555, |
|
"step": 177708 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 23.931304931640625, |
|
"learning_rate": 2.8300199873905052e-05, |
|
"loss": 0.4901, |
|
"step": 180720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_cer": 0.03840274791325294, |
|
"eval_loss": 0.13464532792568207, |
|
"eval_runtime": 153.1916, |
|
"eval_samples_per_second": 237.285, |
|
"eval_steps_per_second": 7.416, |
|
"eval_wer": 0.2053671173274398, |
|
"step": 180720 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"grad_norm": 18.084495544433594, |
|
"learning_rate": 2.759312916683434e-05, |
|
"loss": 0.4898, |
|
"step": 183732 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_cer": 0.038370654692533195, |
|
"eval_loss": 0.13341517746448517, |
|
"eval_runtime": 151.6501, |
|
"eval_samples_per_second": 239.696, |
|
"eval_steps_per_second": 7.491, |
|
"eval_wer": 0.2050074187824896, |
|
"step": 183732 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"grad_norm": 2.890596866607666, |
|
"learning_rate": 2.6886293210994404e-05, |
|
"loss": 0.4873, |
|
"step": 186744 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_cer": 0.038351857234683054, |
|
"eval_loss": 0.1341981142759323, |
|
"eval_runtime": 150.7747, |
|
"eval_samples_per_second": 241.088, |
|
"eval_steps_per_second": 7.534, |
|
"eval_wer": 0.20600696570066857, |
|
"step": 186744 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"grad_norm": 11.759881973266602, |
|
"learning_rate": 2.617969200638523e-05, |
|
"loss": 0.4865, |
|
"step": 189756 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_cer": 0.03869296232347583, |
|
"eval_loss": 0.13458400964736938, |
|
"eval_runtime": 152.6729, |
|
"eval_samples_per_second": 238.091, |
|
"eval_steps_per_second": 7.441, |
|
"eval_wer": 0.20699613669928163, |
|
"step": 189756 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 13.245360374450684, |
|
"learning_rate": 2.547309080177606e-05, |
|
"loss": 0.4842, |
|
"step": 192768 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_cer": 0.03874110215455545, |
|
"eval_loss": 0.13456492125988007, |
|
"eval_runtime": 153.3987, |
|
"eval_samples_per_second": 236.964, |
|
"eval_steps_per_second": 7.406, |
|
"eval_wer": 0.2072278655695861, |
|
"step": 192768 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 11.684355735778809, |
|
"learning_rate": 2.4766020094705352e-05, |
|
"loss": 0.4822, |
|
"step": 195780 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_cer": 0.03811941062175572, |
|
"eval_loss": 0.13252592086791992, |
|
"eval_runtime": 156.7414, |
|
"eval_samples_per_second": 231.911, |
|
"eval_steps_per_second": 7.248, |
|
"eval_wer": 0.20395599226648128, |
|
"step": 195780 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"grad_norm": 25.19974708557129, |
|
"learning_rate": 2.405918413886541e-05, |
|
"loss": 0.4814, |
|
"step": 198792 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_cer": 0.037090135185815165, |
|
"eval_loss": 0.13119570910930634, |
|
"eval_runtime": 154.348, |
|
"eval_samples_per_second": 235.507, |
|
"eval_steps_per_second": 7.36, |
|
"eval_wer": 0.19890983671761242, |
|
"step": 198792 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"grad_norm": 12.580814361572266, |
|
"learning_rate": 2.335234818302547e-05, |
|
"loss": 0.4796, |
|
"step": 201804 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_cer": 0.03740739959635898, |
|
"eval_loss": 0.13117973506450653, |
|
"eval_runtime": 162.9804, |
|
"eval_samples_per_second": 223.033, |
|
"eval_steps_per_second": 6.97, |
|
"eval_wer": 0.1999750977930419, |
|
"step": 201804 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"grad_norm": 11.110360145568848, |
|
"learning_rate": 2.2645746978416297e-05, |
|
"loss": 0.4771, |
|
"step": 204816 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_cer": 0.037213006373713636, |
|
"eval_loss": 0.1303921490907669, |
|
"eval_runtime": 152.708, |
|
"eval_samples_per_second": 238.036, |
|
"eval_steps_per_second": 7.439, |
|
"eval_wer": 0.1997191584437504, |
|
"step": 204816 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"grad_norm": 4.782271385192871, |
|
"learning_rate": 2.193891102257636e-05, |
|
"loss": 0.4756, |
|
"step": 207828 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_cer": 0.037708617396542916, |
|
"eval_loss": 0.13083402812480927, |
|
"eval_runtime": 152.8061, |
|
"eval_samples_per_second": 237.883, |
|
"eval_steps_per_second": 7.434, |
|
"eval_wer": 0.20086396823585156, |
|
"step": 207828 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 7.983453750610352, |
|
"learning_rate": 2.1232544569197956e-05, |
|
"loss": 0.4745, |
|
"step": 210840 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_cer": 0.0370488724734612, |
|
"eval_loss": 0.13116249442100525, |
|
"eval_runtime": 151.9447, |
|
"eval_samples_per_second": 239.232, |
|
"eval_steps_per_second": 7.476, |
|
"eval_wer": 0.19823886058568607, |
|
"step": 210840 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"grad_norm": 23.63794708251953, |
|
"learning_rate": 2.052547386212725e-05, |
|
"loss": 0.4738, |
|
"step": 213852 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_cer": 0.037366136884005016, |
|
"eval_loss": 0.1306936889886856, |
|
"eval_runtime": 154.0224, |
|
"eval_samples_per_second": 236.005, |
|
"eval_steps_per_second": 7.376, |
|
"eval_wer": 0.20006848106913475, |
|
"step": 213852 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 10.838956832885742, |
|
"learning_rate": 1.9818637906287305e-05, |
|
"loss": 0.473, |
|
"step": 216864 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_cer": 0.0372285945094918, |
|
"eval_loss": 0.13071005046367645, |
|
"eval_runtime": 154.8642, |
|
"eval_samples_per_second": 234.722, |
|
"eval_steps_per_second": 7.335, |
|
"eval_wer": 0.19911043782921928, |
|
"step": 216864 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"grad_norm": 11.969744682312012, |
|
"learning_rate": 1.9111801950447367e-05, |
|
"loss": 0.472, |
|
"step": 219876 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_cer": 0.03662890975661418, |
|
"eval_loss": 0.12924158573150635, |
|
"eval_runtime": 154.3055, |
|
"eval_samples_per_second": 235.572, |
|
"eval_steps_per_second": 7.362, |
|
"eval_wer": 0.19607375203627422, |
|
"step": 219876 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"grad_norm": 6.115599155426025, |
|
"learning_rate": 1.840496599460743e-05, |
|
"loss": 0.4693, |
|
"step": 222888 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_cer": 0.036412051279465014, |
|
"eval_loss": 0.12866230309009552, |
|
"eval_runtime": 157.4725, |
|
"eval_samples_per_second": 230.834, |
|
"eval_steps_per_second": 7.214, |
|
"eval_wer": 0.19521600935216216, |
|
"step": 222888 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 44.51272964477539, |
|
"learning_rate": 1.7698130038767486e-05, |
|
"loss": 0.4693, |
|
"step": 225900 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_cer": 0.03628138602367746, |
|
"eval_loss": 0.12844808399677277, |
|
"eval_runtime": 164.6927, |
|
"eval_samples_per_second": 220.714, |
|
"eval_steps_per_second": 6.898, |
|
"eval_wer": 0.1944724017832747, |
|
"step": 225900 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"grad_norm": 9.162590026855469, |
|
"learning_rate": 1.6991528834158312e-05, |
|
"loss": 0.4664, |
|
"step": 228912 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_cer": 0.03683797416587427, |
|
"eval_loss": 0.12876588106155396, |
|
"eval_runtime": 152.2551, |
|
"eval_samples_per_second": 238.744, |
|
"eval_steps_per_second": 7.461, |
|
"eval_wer": 0.19688999104212276, |
|
"step": 228912 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"grad_norm": 3.896597146987915, |
|
"learning_rate": 1.6284692878318375e-05, |
|
"loss": 0.4651, |
|
"step": 231924 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_cer": 0.03683384789463887, |
|
"eval_loss": 0.12869854271411896, |
|
"eval_runtime": 168.0203, |
|
"eval_samples_per_second": 216.343, |
|
"eval_steps_per_second": 6.761, |
|
"eval_wer": 0.1971009680732955, |
|
"step": 231924 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"grad_norm": 15.388148307800293, |
|
"learning_rate": 1.5577856922478434e-05, |
|
"loss": 0.4641, |
|
"step": 234936 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_cer": 0.03656747416266495, |
|
"eval_loss": 0.1286703646183014, |
|
"eval_runtime": 154.3849, |
|
"eval_samples_per_second": 235.45, |
|
"eval_steps_per_second": 7.358, |
|
"eval_wer": 0.1952090920724516, |
|
"step": 234936 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"grad_norm": 2.635507345199585, |
|
"learning_rate": 1.4871020966638496e-05, |
|
"loss": 0.462, |
|
"step": 237948 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_cer": 0.03641342670321015, |
|
"eval_loss": 0.12868022918701172, |
|
"eval_runtime": 152.2309, |
|
"eval_samples_per_second": 238.782, |
|
"eval_steps_per_second": 7.462, |
|
"eval_wer": 0.19447586042313, |
|
"step": 237948 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 5.287237167358398, |
|
"learning_rate": 1.4163950259567787e-05, |
|
"loss": 0.4608, |
|
"step": 240960 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_cer": 0.036332276702247354, |
|
"eval_loss": 0.12745150923728943, |
|
"eval_runtime": 154.8487, |
|
"eval_samples_per_second": 234.745, |
|
"eval_steps_per_second": 7.336, |
|
"eval_wer": 0.19517796431375398, |
|
"step": 240960 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"grad_norm": 11.394750595092773, |
|
"learning_rate": 1.3457349054958616e-05, |
|
"loss": 0.4594, |
|
"step": 243972 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_cer": 0.0360989131401566, |
|
"eval_loss": 0.12770119309425354, |
|
"eval_runtime": 152.6766, |
|
"eval_samples_per_second": 238.085, |
|
"eval_steps_per_second": 7.441, |
|
"eval_wer": 0.19389480892744118, |
|
"step": 243972 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"grad_norm": 9.252601623535156, |
|
"learning_rate": 1.2750278347887909e-05, |
|
"loss": 0.4595, |
|
"step": 246984 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_cer": 0.03594715805361035, |
|
"eval_loss": 0.12681059539318085, |
|
"eval_runtime": 152.0339, |
|
"eval_samples_per_second": 239.091, |
|
"eval_steps_per_second": 7.472, |
|
"eval_wer": 0.19371841829482137, |
|
"step": 246984 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"grad_norm": 16.776588439941406, |
|
"learning_rate": 1.2043677143278735e-05, |
|
"loss": 0.4575, |
|
"step": 249996 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_cer": 0.03615393008996188, |
|
"eval_loss": 0.12722131609916687, |
|
"eval_runtime": 152.0731, |
|
"eval_samples_per_second": 239.03, |
|
"eval_steps_per_second": 7.47, |
|
"eval_wer": 0.1942475901926808, |
|
"step": 249996 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"grad_norm": 8.937053680419922, |
|
"learning_rate": 1.1336606436208028e-05, |
|
"loss": 0.4569, |
|
"step": 253008 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_cer": 0.03605260720740382, |
|
"eval_loss": 0.12680456042289734, |
|
"eval_runtime": 152.9563, |
|
"eval_samples_per_second": 237.65, |
|
"eval_steps_per_second": 7.427, |
|
"eval_wer": 0.19341059934770052, |
|
"step": 253008 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 13.364398956298828, |
|
"learning_rate": 1.0630005231598857e-05, |
|
"loss": 0.4552, |
|
"step": 256020 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_cer": 0.035670697880838785, |
|
"eval_loss": 0.12619073688983917, |
|
"eval_runtime": 156.3815, |
|
"eval_samples_per_second": 232.444, |
|
"eval_steps_per_second": 7.264, |
|
"eval_wer": 0.1916293998222259, |
|
"step": 256020 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"grad_norm": 9.33234691619873, |
|
"learning_rate": 9.923404026989685e-06, |
|
"loss": 0.4538, |
|
"step": 259032 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_cer": 0.03549051737022648, |
|
"eval_loss": 0.12592804431915283, |
|
"eval_runtime": 156.5096, |
|
"eval_samples_per_second": 232.254, |
|
"eval_steps_per_second": 7.258, |
|
"eval_wer": 0.19070940162071864, |
|
"step": 259032 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"grad_norm": 4.286988735198975, |
|
"learning_rate": 9.216568071149744e-06, |
|
"loss": 0.4532, |
|
"step": 262044 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_cer": 0.03551573347222057, |
|
"eval_loss": 0.12575581669807434, |
|
"eval_runtime": 155.6329, |
|
"eval_samples_per_second": 233.562, |
|
"eval_steps_per_second": 7.299, |
|
"eval_wer": 0.19122473895915693, |
|
"step": 262044 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"grad_norm": 7.920403957366943, |
|
"learning_rate": 8.509732115309804e-06, |
|
"loss": 0.4524, |
|
"step": 265056 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_cer": 0.03555103601501229, |
|
"eval_loss": 0.1259673833847046, |
|
"eval_runtime": 166.3365, |
|
"eval_samples_per_second": 218.533, |
|
"eval_steps_per_second": 6.83, |
|
"eval_wer": 0.19095150641058897, |
|
"step": 265056 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"grad_norm": 9.81010913848877, |
|
"learning_rate": 7.802896159469865e-06, |
|
"loss": 0.4501, |
|
"step": 268068 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_cer": 0.03596458008771536, |
|
"eval_loss": 0.12655647099018097, |
|
"eval_runtime": 155.3246, |
|
"eval_samples_per_second": 234.026, |
|
"eval_steps_per_second": 7.314, |
|
"eval_wer": 0.19276729233461648, |
|
"step": 268068 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 10.394911766052246, |
|
"learning_rate": 7.096060203629924e-06, |
|
"loss": 0.4491, |
|
"step": 271080 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_cer": 0.03546667669197752, |
|
"eval_loss": 0.12519720196723938, |
|
"eval_runtime": 177.0207, |
|
"eval_samples_per_second": 205.343, |
|
"eval_steps_per_second": 6.417, |
|
"eval_wer": 0.19042579315258482, |
|
"step": 271080 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"grad_norm": 9.381885528564453, |
|
"learning_rate": 6.3892242477899846e-06, |
|
"loss": 0.4486, |
|
"step": 274092 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_cer": 0.03518288092589859, |
|
"eval_loss": 0.12525735795497894, |
|
"eval_runtime": 173.742, |
|
"eval_samples_per_second": 209.218, |
|
"eval_steps_per_second": 6.538, |
|
"eval_wer": 0.18893166073509932, |
|
"step": 274092 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"grad_norm": 26.816091537475586, |
|
"learning_rate": 5.682623043180811e-06, |
|
"loss": 0.4487, |
|
"step": 277104 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_cer": 0.035369480080654846, |
|
"eval_loss": 0.12525933980941772, |
|
"eval_runtime": 153.4339, |
|
"eval_samples_per_second": 236.91, |
|
"eval_steps_per_second": 7.404, |
|
"eval_wer": 0.190249402519965, |
|
"step": 277104 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"grad_norm": 12.131733894348145, |
|
"learning_rate": 4.975552336110105e-06, |
|
"loss": 0.4471, |
|
"step": 280116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_cer": 0.03521634957036347, |
|
"eval_loss": 0.1251526027917862, |
|
"eval_runtime": 176.0621, |
|
"eval_samples_per_second": 206.461, |
|
"eval_steps_per_second": 6.452, |
|
"eval_wer": 0.1893743666365765, |
|
"step": 280116 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"grad_norm": 14.601805686950684, |
|
"learning_rate": 4.2687163802701646e-06, |
|
"loss": 0.4458, |
|
"step": 283128 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_cer": 0.0351705021121924, |
|
"eval_loss": 0.1253127008676529, |
|
"eval_runtime": 177.0148, |
|
"eval_samples_per_second": 205.35, |
|
"eval_steps_per_second": 6.418, |
|
"eval_wer": 0.18914263776627205, |
|
"step": 283128 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 3.8078722953796387, |
|
"learning_rate": 3.562115175660992e-06, |
|
"loss": 0.4449, |
|
"step": 286140 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_cer": 0.035079265670431965, |
|
"eval_loss": 0.12475291639566422, |
|
"eval_runtime": 168.7285, |
|
"eval_samples_per_second": 215.435, |
|
"eval_steps_per_second": 6.733, |
|
"eval_wer": 0.1884266993162269, |
|
"step": 286140 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"grad_norm": 4.562527656555176, |
|
"learning_rate": 2.855044468590285e-06, |
|
"loss": 0.4434, |
|
"step": 289152 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_cer": 0.035126030077766456, |
|
"eval_loss": 0.1246921494603157, |
|
"eval_runtime": 179.6539, |
|
"eval_samples_per_second": 202.333, |
|
"eval_steps_per_second": 6.323, |
|
"eval_wer": 0.18909421680829797, |
|
"step": 289152 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"grad_norm": 8.47780990600586, |
|
"learning_rate": 2.1482085127503454e-06, |
|
"loss": 0.4435, |
|
"step": 292164 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_cer": 0.035188841095460825, |
|
"eval_loss": 0.12471602112054825, |
|
"eval_runtime": 152.6845, |
|
"eval_samples_per_second": 238.073, |
|
"eval_steps_per_second": 7.44, |
|
"eval_wer": 0.18912880320685088, |
|
"step": 292164 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"grad_norm": 16.65981101989746, |
|
"learning_rate": 1.4413725569104053e-06, |
|
"loss": 0.4444, |
|
"step": 295176 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_cer": 0.03511594363696882, |
|
"eval_loss": 0.12448572367429733, |
|
"eval_runtime": 159.0007, |
|
"eval_samples_per_second": 228.615, |
|
"eval_steps_per_second": 7.145, |
|
"eval_wer": 0.1887518114626242, |
|
"step": 295176 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"grad_norm": 10.248809814453125, |
|
"learning_rate": 7.345366010704655e-07, |
|
"loss": 0.4429, |
|
"step": 298188 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_cer": 0.03512648855234817, |
|
"eval_loss": 0.12444119900465012, |
|
"eval_runtime": 155.6325, |
|
"eval_samples_per_second": 233.563, |
|
"eval_steps_per_second": 7.299, |
|
"eval_wer": 0.18870684914450545, |
|
"step": 298188 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 301200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 8 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0954167922548843e+21, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|