wav2vec2-xls-r-300m-fula / trainer_state.json
abdouaziiz's picture
End of training
d3a5319 verified
{
"best_metric": 0.46308374404907227,
"best_model_checkpoint": "wav2vec2-xls-r-300m-fula/checkpoint-4000",
"epoch": 5.154639175257732,
"eval_steps": 2000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01288659793814433,
"grad_norm": 10.90772533416748,
"learning_rate": 3.4499999999999996e-06,
"loss": 19.1295,
"step": 25
},
{
"epoch": 0.02577319587628866,
"grad_norm": 12.727262496948242,
"learning_rate": 7.2e-06,
"loss": 16.6655,
"step": 50
},
{
"epoch": 0.03865979381443299,
"grad_norm": 17.578950881958008,
"learning_rate": 1.0949999999999998e-05,
"loss": 13.462,
"step": 75
},
{
"epoch": 0.05154639175257732,
"grad_norm": 14.574068069458008,
"learning_rate": 1.47e-05,
"loss": 7.4496,
"step": 100
},
{
"epoch": 0.06443298969072164,
"grad_norm": 11.906006813049316,
"learning_rate": 1.8449999999999998e-05,
"loss": 6.1341,
"step": 125
},
{
"epoch": 0.07731958762886598,
"grad_norm": 10.743711471557617,
"learning_rate": 2.2199999999999998e-05,
"loss": 5.0479,
"step": 150
},
{
"epoch": 0.09020618556701031,
"grad_norm": 9.327588081359863,
"learning_rate": 2.5949999999999997e-05,
"loss": 4.86,
"step": 175
},
{
"epoch": 0.10309278350515463,
"grad_norm": 8.118748664855957,
"learning_rate": 2.97e-05,
"loss": 4.3188,
"step": 200
},
{
"epoch": 0.11597938144329897,
"grad_norm": 4.968245983123779,
"learning_rate": 3.345e-05,
"loss": 4.142,
"step": 225
},
{
"epoch": 0.12886597938144329,
"grad_norm": 3.5491788387298584,
"learning_rate": 3.7199999999999996e-05,
"loss": 3.7329,
"step": 250
},
{
"epoch": 0.14175257731958762,
"grad_norm": 2.228337049484253,
"learning_rate": 4.095e-05,
"loss": 3.5432,
"step": 275
},
{
"epoch": 0.15463917525773196,
"grad_norm": 1.3472025394439697,
"learning_rate": 4.4699999999999996e-05,
"loss": 3.3229,
"step": 300
},
{
"epoch": 0.16752577319587628,
"grad_norm": 0.8919354677200317,
"learning_rate": 4.845e-05,
"loss": 3.1852,
"step": 325
},
{
"epoch": 0.18041237113402062,
"grad_norm": 0.9615734815597534,
"learning_rate": 5.2199999999999995e-05,
"loss": 3.0339,
"step": 350
},
{
"epoch": 0.19329896907216496,
"grad_norm": 1.0601171255111694,
"learning_rate": 5.595e-05,
"loss": 2.9307,
"step": 375
},
{
"epoch": 0.20618556701030927,
"grad_norm": 1.2133561372756958,
"learning_rate": 5.97e-05,
"loss": 2.7395,
"step": 400
},
{
"epoch": 0.2190721649484536,
"grad_norm": 0.922024130821228,
"learning_rate": 6.345e-05,
"loss": 2.5765,
"step": 425
},
{
"epoch": 0.23195876288659795,
"grad_norm": 1.0528242588043213,
"learning_rate": 6.72e-05,
"loss": 2.2256,
"step": 450
},
{
"epoch": 0.24484536082474226,
"grad_norm": 1.39167058467865,
"learning_rate": 7.094999999999999e-05,
"loss": 1.7335,
"step": 475
},
{
"epoch": 0.25773195876288657,
"grad_norm": 1.182673692703247,
"learning_rate": 7.47e-05,
"loss": 1.2321,
"step": 500
},
{
"epoch": 0.2706185567010309,
"grad_norm": 0.9412080645561218,
"learning_rate": 7.845e-05,
"loss": 0.9694,
"step": 525
},
{
"epoch": 0.28350515463917525,
"grad_norm": 1.3718018531799316,
"learning_rate": 8.22e-05,
"loss": 0.9061,
"step": 550
},
{
"epoch": 0.2963917525773196,
"grad_norm": 0.7417821884155273,
"learning_rate": 8.594999999999999e-05,
"loss": 0.8102,
"step": 575
},
{
"epoch": 0.30927835051546393,
"grad_norm": 0.9690466523170471,
"learning_rate": 8.969999999999998e-05,
"loss": 0.7434,
"step": 600
},
{
"epoch": 0.32216494845360827,
"grad_norm": 0.8293886780738831,
"learning_rate": 9.345e-05,
"loss": 0.7381,
"step": 625
},
{
"epoch": 0.33505154639175255,
"grad_norm": 0.8486959934234619,
"learning_rate": 9.719999999999999e-05,
"loss": 0.6224,
"step": 650
},
{
"epoch": 0.3479381443298969,
"grad_norm": 0.7055938243865967,
"learning_rate": 0.00010095,
"loss": 0.6925,
"step": 675
},
{
"epoch": 0.36082474226804123,
"grad_norm": 0.7605512142181396,
"learning_rate": 0.00010469999999999998,
"loss": 0.6136,
"step": 700
},
{
"epoch": 0.37371134020618557,
"grad_norm": 0.6695568561553955,
"learning_rate": 0.00010845,
"loss": 0.6419,
"step": 725
},
{
"epoch": 0.3865979381443299,
"grad_norm": 0.8840702772140503,
"learning_rate": 0.00011219999999999999,
"loss": 0.6667,
"step": 750
},
{
"epoch": 0.39948453608247425,
"grad_norm": 0.7186158299446106,
"learning_rate": 0.00011595,
"loss": 0.6397,
"step": 775
},
{
"epoch": 0.41237113402061853,
"grad_norm": 0.6683000922203064,
"learning_rate": 0.0001197,
"loss": 0.5647,
"step": 800
},
{
"epoch": 0.4252577319587629,
"grad_norm": 0.8537669777870178,
"learning_rate": 0.00012345,
"loss": 0.5624,
"step": 825
},
{
"epoch": 0.4381443298969072,
"grad_norm": 1.0267616510391235,
"learning_rate": 0.00012719999999999997,
"loss": 0.624,
"step": 850
},
{
"epoch": 0.45103092783505155,
"grad_norm": 0.6453070640563965,
"learning_rate": 0.00013094999999999998,
"loss": 0.5539,
"step": 875
},
{
"epoch": 0.4639175257731959,
"grad_norm": 1.0308513641357422,
"learning_rate": 0.0001347,
"loss": 0.5728,
"step": 900
},
{
"epoch": 0.47680412371134023,
"grad_norm": 0.560512125492096,
"learning_rate": 0.00013845,
"loss": 0.5305,
"step": 925
},
{
"epoch": 0.4896907216494845,
"grad_norm": 0.7196568250656128,
"learning_rate": 0.0001422,
"loss": 0.5669,
"step": 950
},
{
"epoch": 0.5025773195876289,
"grad_norm": 0.5675578713417053,
"learning_rate": 0.00014595,
"loss": 0.5093,
"step": 975
},
{
"epoch": 0.5154639175257731,
"grad_norm": 0.8834163546562195,
"learning_rate": 0.00014969999999999998,
"loss": 0.5757,
"step": 1000
},
{
"epoch": 0.5283505154639175,
"grad_norm": 0.5083448886871338,
"learning_rate": 0.00015344999999999996,
"loss": 0.5412,
"step": 1025
},
{
"epoch": 0.5412371134020618,
"grad_norm": 0.747595489025116,
"learning_rate": 0.0001572,
"loss": 0.5849,
"step": 1050
},
{
"epoch": 0.5541237113402062,
"grad_norm": 0.5015640258789062,
"learning_rate": 0.00016094999999999998,
"loss": 0.5049,
"step": 1075
},
{
"epoch": 0.5670103092783505,
"grad_norm": 0.7311388850212097,
"learning_rate": 0.0001647,
"loss": 0.4726,
"step": 1100
},
{
"epoch": 0.5798969072164949,
"grad_norm": 0.6942028999328613,
"learning_rate": 0.00016844999999999997,
"loss": 0.4934,
"step": 1125
},
{
"epoch": 0.5927835051546392,
"grad_norm": 0.7268182635307312,
"learning_rate": 0.00017219999999999998,
"loss": 0.5074,
"step": 1150
},
{
"epoch": 0.6056701030927835,
"grad_norm": 0.5172975063323975,
"learning_rate": 0.00017595,
"loss": 0.5293,
"step": 1175
},
{
"epoch": 0.6185567010309279,
"grad_norm": 0.5973320603370667,
"learning_rate": 0.00017969999999999998,
"loss": 0.5018,
"step": 1200
},
{
"epoch": 0.6314432989690721,
"grad_norm": 0.6601810455322266,
"learning_rate": 0.00018345,
"loss": 0.4953,
"step": 1225
},
{
"epoch": 0.6443298969072165,
"grad_norm": 0.7992896437644958,
"learning_rate": 0.0001872,
"loss": 0.4748,
"step": 1250
},
{
"epoch": 0.6572164948453608,
"grad_norm": 0.5774939060211182,
"learning_rate": 0.00019094999999999998,
"loss": 0.4659,
"step": 1275
},
{
"epoch": 0.6701030927835051,
"grad_norm": 0.7045702338218689,
"learning_rate": 0.0001947,
"loss": 0.4433,
"step": 1300
},
{
"epoch": 0.6829896907216495,
"grad_norm": 0.6067873239517212,
"learning_rate": 0.00019844999999999997,
"loss": 0.4872,
"step": 1325
},
{
"epoch": 0.6958762886597938,
"grad_norm": 1.0101662874221802,
"learning_rate": 0.0002022,
"loss": 0.4933,
"step": 1350
},
{
"epoch": 0.7087628865979382,
"grad_norm": 0.4532999098300934,
"learning_rate": 0.00020595,
"loss": 0.474,
"step": 1375
},
{
"epoch": 0.7216494845360825,
"grad_norm": 0.8769963383674622,
"learning_rate": 0.00020969999999999997,
"loss": 0.4923,
"step": 1400
},
{
"epoch": 0.7345360824742269,
"grad_norm": 0.38735705614089966,
"learning_rate": 0.00021344999999999998,
"loss": 0.5358,
"step": 1425
},
{
"epoch": 0.7474226804123711,
"grad_norm": 0.5298680663108826,
"learning_rate": 0.00021719999999999997,
"loss": 0.4961,
"step": 1450
},
{
"epoch": 0.7603092783505154,
"grad_norm": 0.6393166780471802,
"learning_rate": 0.00022095,
"loss": 0.5057,
"step": 1475
},
{
"epoch": 0.7731958762886598,
"grad_norm": 1.908589243888855,
"learning_rate": 0.0002247,
"loss": 0.5043,
"step": 1500
},
{
"epoch": 0.7860824742268041,
"grad_norm": 0.5039921402931213,
"learning_rate": 0.00022844999999999997,
"loss": 0.4767,
"step": 1525
},
{
"epoch": 0.7989690721649485,
"grad_norm": 0.5750266909599304,
"learning_rate": 0.00023219999999999998,
"loss": 0.5002,
"step": 1550
},
{
"epoch": 0.8118556701030928,
"grad_norm": 0.5906339883804321,
"learning_rate": 0.00023594999999999996,
"loss": 0.4776,
"step": 1575
},
{
"epoch": 0.8247422680412371,
"grad_norm": 0.5718595385551453,
"learning_rate": 0.0002397,
"loss": 0.4934,
"step": 1600
},
{
"epoch": 0.8376288659793815,
"grad_norm": 0.5280390381813049,
"learning_rate": 0.00024344999999999998,
"loss": 0.4533,
"step": 1625
},
{
"epoch": 0.8505154639175257,
"grad_norm": 1.016766905784607,
"learning_rate": 0.0002472,
"loss": 0.4795,
"step": 1650
},
{
"epoch": 0.8634020618556701,
"grad_norm": 0.4700835943222046,
"learning_rate": 0.00025095,
"loss": 0.4399,
"step": 1675
},
{
"epoch": 0.8762886597938144,
"grad_norm": 0.7027618885040283,
"learning_rate": 0.00025469999999999996,
"loss": 0.4801,
"step": 1700
},
{
"epoch": 0.8891752577319587,
"grad_norm": 0.41857969760894775,
"learning_rate": 0.00025844999999999997,
"loss": 0.4449,
"step": 1725
},
{
"epoch": 0.9020618556701031,
"grad_norm": 0.7206704020500183,
"learning_rate": 0.0002622,
"loss": 0.4648,
"step": 1750
},
{
"epoch": 0.9149484536082474,
"grad_norm": 0.485895037651062,
"learning_rate": 0.00026595,
"loss": 0.4598,
"step": 1775
},
{
"epoch": 0.9278350515463918,
"grad_norm": 0.7267158031463623,
"learning_rate": 0.0002697,
"loss": 0.462,
"step": 1800
},
{
"epoch": 0.9407216494845361,
"grad_norm": 0.4129009246826172,
"learning_rate": 0.00027344999999999995,
"loss": 0.4585,
"step": 1825
},
{
"epoch": 0.9536082474226805,
"grad_norm": 0.6467506885528564,
"learning_rate": 0.0002772,
"loss": 0.4539,
"step": 1850
},
{
"epoch": 0.9664948453608248,
"grad_norm": 0.6241980791091919,
"learning_rate": 0.00028094999999999997,
"loss": 0.4917,
"step": 1875
},
{
"epoch": 0.979381443298969,
"grad_norm": 0.817642867565155,
"learning_rate": 0.0002847,
"loss": 0.4337,
"step": 1900
},
{
"epoch": 0.9922680412371134,
"grad_norm": 1.18275785446167,
"learning_rate": 0.00028845,
"loss": 0.4591,
"step": 1925
},
{
"epoch": 1.0051546391752577,
"grad_norm": 0.5338313579559326,
"learning_rate": 0.00029219999999999995,
"loss": 0.5187,
"step": 1950
},
{
"epoch": 1.018041237113402,
"grad_norm": 0.40176838636398315,
"learning_rate": 0.00029595,
"loss": 0.4012,
"step": 1975
},
{
"epoch": 1.0309278350515463,
"grad_norm": 0.6469115018844604,
"learning_rate": 0.00029969999999999997,
"loss": 0.4752,
"step": 2000
},
{
"epoch": 1.0309278350515463,
"eval_loss": 0.59544438123703,
"eval_runtime": 196.5387,
"eval_samples_per_second": 6.233,
"eval_steps_per_second": 0.784,
"eval_wer": 1.0,
"step": 2000
},
{
"epoch": 1.0438144329896908,
"grad_norm": 0.6261550188064575,
"learning_rate": 0.00029987722419928825,
"loss": 0.4102,
"step": 2025
},
{
"epoch": 1.056701030927835,
"grad_norm": 0.8379774689674377,
"learning_rate": 0.00029974377224199284,
"loss": 0.4658,
"step": 2050
},
{
"epoch": 1.0695876288659794,
"grad_norm": 0.503239631652832,
"learning_rate": 0.0002996103202846975,
"loss": 0.4167,
"step": 2075
},
{
"epoch": 1.0824742268041236,
"grad_norm": 0.3943336606025696,
"learning_rate": 0.00029947686832740213,
"loss": 0.4686,
"step": 2100
},
{
"epoch": 1.0953608247422681,
"grad_norm": 0.4782889783382416,
"learning_rate": 0.0002993434163701067,
"loss": 0.4003,
"step": 2125
},
{
"epoch": 1.1082474226804124,
"grad_norm": 0.37270641326904297,
"learning_rate": 0.00029920996441281137,
"loss": 0.4578,
"step": 2150
},
{
"epoch": 1.1211340206185567,
"grad_norm": 0.5629658699035645,
"learning_rate": 0.000299076512455516,
"loss": 0.3687,
"step": 2175
},
{
"epoch": 1.134020618556701,
"grad_norm": 0.3787396252155304,
"learning_rate": 0.0002989430604982206,
"loss": 0.4322,
"step": 2200
},
{
"epoch": 1.1469072164948453,
"grad_norm": 0.6377401947975159,
"learning_rate": 0.00029880960854092526,
"loss": 0.4403,
"step": 2225
},
{
"epoch": 1.1597938144329896,
"grad_norm": 0.37100082635879517,
"learning_rate": 0.00029867615658362985,
"loss": 0.453,
"step": 2250
},
{
"epoch": 1.172680412371134,
"grad_norm": 0.7276923060417175,
"learning_rate": 0.0002985427046263345,
"loss": 0.4145,
"step": 2275
},
{
"epoch": 1.1855670103092784,
"grad_norm": 0.8605408668518066,
"learning_rate": 0.00029840925266903914,
"loss": 0.4539,
"step": 2300
},
{
"epoch": 1.1984536082474226,
"grad_norm": 0.45449143648147583,
"learning_rate": 0.00029827580071174373,
"loss": 0.4031,
"step": 2325
},
{
"epoch": 1.211340206185567,
"grad_norm": 0.36433449387550354,
"learning_rate": 0.0002981423487544484,
"loss": 0.4548,
"step": 2350
},
{
"epoch": 1.2242268041237114,
"grad_norm": 0.48323678970336914,
"learning_rate": 0.000298008896797153,
"loss": 0.4236,
"step": 2375
},
{
"epoch": 1.2371134020618557,
"grad_norm": 0.6236255764961243,
"learning_rate": 0.0002978754448398576,
"loss": 0.438,
"step": 2400
},
{
"epoch": 1.25,
"grad_norm": 0.721978485584259,
"learning_rate": 0.00029774199288256227,
"loss": 0.3803,
"step": 2425
},
{
"epoch": 1.2628865979381443,
"grad_norm": 0.33121246099472046,
"learning_rate": 0.00029760854092526686,
"loss": 0.4759,
"step": 2450
},
{
"epoch": 1.2757731958762886,
"grad_norm": 0.6318331360816956,
"learning_rate": 0.0002974750889679715,
"loss": 0.3616,
"step": 2475
},
{
"epoch": 1.2886597938144329,
"grad_norm": 0.30613037943840027,
"learning_rate": 0.0002973416370106761,
"loss": 0.4355,
"step": 2500
},
{
"epoch": 1.3015463917525774,
"grad_norm": 0.6804624199867249,
"learning_rate": 0.00029720818505338075,
"loss": 0.4296,
"step": 2525
},
{
"epoch": 1.3144329896907216,
"grad_norm": 0.5847098231315613,
"learning_rate": 0.0002970747330960854,
"loss": 0.4774,
"step": 2550
},
{
"epoch": 1.327319587628866,
"grad_norm": 0.43900150060653687,
"learning_rate": 0.00029694128113879,
"loss": 0.4249,
"step": 2575
},
{
"epoch": 1.3402061855670104,
"grad_norm": 0.26748043298721313,
"learning_rate": 0.00029680782918149463,
"loss": 0.4713,
"step": 2600
},
{
"epoch": 1.3530927835051547,
"grad_norm": 0.5893319845199585,
"learning_rate": 0.0002966743772241993,
"loss": 0.3566,
"step": 2625
},
{
"epoch": 1.365979381443299,
"grad_norm": 0.2939490079879761,
"learning_rate": 0.00029654092526690387,
"loss": 0.427,
"step": 2650
},
{
"epoch": 1.3788659793814433,
"grad_norm": 0.6302582025527954,
"learning_rate": 0.0002964074733096085,
"loss": 0.413,
"step": 2675
},
{
"epoch": 1.3917525773195876,
"grad_norm": 0.5481074452400208,
"learning_rate": 0.0002962740213523131,
"loss": 0.5027,
"step": 2700
},
{
"epoch": 1.4046391752577319,
"grad_norm": 0.41078057885169983,
"learning_rate": 0.00029614056939501776,
"loss": 0.3935,
"step": 2725
},
{
"epoch": 1.4175257731958764,
"grad_norm": 0.5955342054367065,
"learning_rate": 0.0002960071174377224,
"loss": 0.4454,
"step": 2750
},
{
"epoch": 1.4304123711340206,
"grad_norm": 0.47366246581077576,
"learning_rate": 0.000295873665480427,
"loss": 0.3861,
"step": 2775
},
{
"epoch": 1.443298969072165,
"grad_norm": 0.5429800748825073,
"learning_rate": 0.00029574021352313164,
"loss": 0.4525,
"step": 2800
},
{
"epoch": 1.4561855670103092,
"grad_norm": 0.5089389681816101,
"learning_rate": 0.00029560676156583623,
"loss": 0.3727,
"step": 2825
},
{
"epoch": 1.4690721649484537,
"grad_norm": 0.3353477716445923,
"learning_rate": 0.0002954733096085409,
"loss": 0.4855,
"step": 2850
},
{
"epoch": 1.481958762886598,
"grad_norm": 0.6160407662391663,
"learning_rate": 0.0002953398576512455,
"loss": 0.3635,
"step": 2875
},
{
"epoch": 1.4948453608247423,
"grad_norm": 0.3237319886684418,
"learning_rate": 0.0002952064056939501,
"loss": 0.4285,
"step": 2900
},
{
"epoch": 1.5077319587628866,
"grad_norm": 0.450309693813324,
"learning_rate": 0.00029507295373665477,
"loss": 0.3759,
"step": 2925
},
{
"epoch": 1.5206185567010309,
"grad_norm": 0.3550674319267273,
"learning_rate": 0.0002949395017793594,
"loss": 0.4442,
"step": 2950
},
{
"epoch": 1.5335051546391751,
"grad_norm": 0.3989160656929016,
"learning_rate": 0.000294806049822064,
"loss": 0.4255,
"step": 2975
},
{
"epoch": 1.5463917525773194,
"grad_norm": 0.5774266719818115,
"learning_rate": 0.00029467259786476865,
"loss": 0.4526,
"step": 3000
},
{
"epoch": 1.559278350515464,
"grad_norm": 0.6264383792877197,
"learning_rate": 0.0002945391459074733,
"loss": 0.4097,
"step": 3025
},
{
"epoch": 1.5721649484536082,
"grad_norm": 0.33969295024871826,
"learning_rate": 0.0002944056939501779,
"loss": 0.4241,
"step": 3050
},
{
"epoch": 1.5850515463917527,
"grad_norm": 0.39299994707107544,
"learning_rate": 0.00029427224199288254,
"loss": 0.3778,
"step": 3075
},
{
"epoch": 1.597938144329897,
"grad_norm": 0.42388004064559937,
"learning_rate": 0.0002941387900355872,
"loss": 0.4004,
"step": 3100
},
{
"epoch": 1.6108247422680413,
"grad_norm": 0.9131516814231873,
"learning_rate": 0.0002940053380782918,
"loss": 0.3531,
"step": 3125
},
{
"epoch": 1.6237113402061856,
"grad_norm": 0.296908974647522,
"learning_rate": 0.0002938718861209964,
"loss": 0.4613,
"step": 3150
},
{
"epoch": 1.6365979381443299,
"grad_norm": 0.4583122134208679,
"learning_rate": 0.000293738434163701,
"loss": 0.3833,
"step": 3175
},
{
"epoch": 1.6494845360824741,
"grad_norm": 0.35052600502967834,
"learning_rate": 0.00029360498220640566,
"loss": 0.4432,
"step": 3200
},
{
"epoch": 1.6623711340206184,
"grad_norm": 0.532720685005188,
"learning_rate": 0.0002934715302491103,
"loss": 0.3635,
"step": 3225
},
{
"epoch": 1.675257731958763,
"grad_norm": 0.3807854652404785,
"learning_rate": 0.0002933380782918149,
"loss": 0.43,
"step": 3250
},
{
"epoch": 1.6881443298969072,
"grad_norm": 0.5288501381874084,
"learning_rate": 0.00029320462633451955,
"loss": 0.3449,
"step": 3275
},
{
"epoch": 1.7010309278350515,
"grad_norm": 0.3881712853908539,
"learning_rate": 0.0002930711743772242,
"loss": 0.4406,
"step": 3300
},
{
"epoch": 1.713917525773196,
"grad_norm": 0.42132484912872314,
"learning_rate": 0.0002929377224199288,
"loss": 0.3888,
"step": 3325
},
{
"epoch": 1.7268041237113403,
"grad_norm": 0.3974430561065674,
"learning_rate": 0.00029280427046263343,
"loss": 0.4265,
"step": 3350
},
{
"epoch": 1.7396907216494846,
"grad_norm": 0.4618494212627411,
"learning_rate": 0.0002926708185053381,
"loss": 0.3537,
"step": 3375
},
{
"epoch": 1.7525773195876289,
"grad_norm": 0.347777783870697,
"learning_rate": 0.00029253736654804267,
"loss": 0.4282,
"step": 3400
},
{
"epoch": 1.7654639175257731,
"grad_norm": 0.5885565280914307,
"learning_rate": 0.0002924039145907473,
"loss": 0.3694,
"step": 3425
},
{
"epoch": 1.7783505154639174,
"grad_norm": 0.5113171935081482,
"learning_rate": 0.00029227046263345197,
"loss": 0.5256,
"step": 3450
},
{
"epoch": 1.7912371134020617,
"grad_norm": 0.7724625468254089,
"learning_rate": 0.00029213701067615656,
"loss": 0.3649,
"step": 3475
},
{
"epoch": 1.8041237113402062,
"grad_norm": 0.44778281450271606,
"learning_rate": 0.0002920035587188612,
"loss": 0.4313,
"step": 3500
},
{
"epoch": 1.8170103092783505,
"grad_norm": 0.8251272439956665,
"learning_rate": 0.00029187010676156585,
"loss": 0.3839,
"step": 3525
},
{
"epoch": 1.829896907216495,
"grad_norm": 0.4858299493789673,
"learning_rate": 0.00029173665480427044,
"loss": 0.4489,
"step": 3550
},
{
"epoch": 1.8427835051546393,
"grad_norm": 0.5172144770622253,
"learning_rate": 0.0002916032028469751,
"loss": 0.409,
"step": 3575
},
{
"epoch": 1.8556701030927836,
"grad_norm": 0.34371522068977356,
"learning_rate": 0.0002914697508896797,
"loss": 0.4365,
"step": 3600
},
{
"epoch": 1.8685567010309279,
"grad_norm": 0.5957440137863159,
"learning_rate": 0.00029133629893238433,
"loss": 0.3611,
"step": 3625
},
{
"epoch": 1.8814432989690721,
"grad_norm": 0.3856901526451111,
"learning_rate": 0.000291202846975089,
"loss": 0.451,
"step": 3650
},
{
"epoch": 1.8943298969072164,
"grad_norm": 0.5961311459541321,
"learning_rate": 0.00029106939501779357,
"loss": 0.3934,
"step": 3675
},
{
"epoch": 1.9072164948453607,
"grad_norm": 0.4491939842700958,
"learning_rate": 0.0002909359430604982,
"loss": 0.4466,
"step": 3700
},
{
"epoch": 1.920103092783505,
"grad_norm": 0.41777607798576355,
"learning_rate": 0.0002908024911032028,
"loss": 0.3754,
"step": 3725
},
{
"epoch": 1.9329896907216495,
"grad_norm": 0.42550450563430786,
"learning_rate": 0.00029066903914590745,
"loss": 0.4027,
"step": 3750
},
{
"epoch": 1.9458762886597938,
"grad_norm": 0.427433043718338,
"learning_rate": 0.0002905355871886121,
"loss": 0.3603,
"step": 3775
},
{
"epoch": 1.9587628865979383,
"grad_norm": 0.8279537558555603,
"learning_rate": 0.0002904021352313167,
"loss": 0.4098,
"step": 3800
},
{
"epoch": 1.9716494845360826,
"grad_norm": 0.6759387850761414,
"learning_rate": 0.00029026868327402134,
"loss": 0.3756,
"step": 3825
},
{
"epoch": 1.9845360824742269,
"grad_norm": 0.29660704731941223,
"learning_rate": 0.000290135231316726,
"loss": 0.4591,
"step": 3850
},
{
"epoch": 1.9974226804123711,
"grad_norm": 0.4846726655960083,
"learning_rate": 0.0002900017793594306,
"loss": 0.3944,
"step": 3875
},
{
"epoch": 2.0103092783505154,
"grad_norm": 0.4863591492176056,
"learning_rate": 0.0002898683274021352,
"loss": 0.3813,
"step": 3900
},
{
"epoch": 2.0231958762886597,
"grad_norm": 0.37275585532188416,
"learning_rate": 0.0002897348754448398,
"loss": 0.3753,
"step": 3925
},
{
"epoch": 2.036082474226804,
"grad_norm": 0.36628881096839905,
"learning_rate": 0.00028960142348754446,
"loss": 0.403,
"step": 3950
},
{
"epoch": 2.0489690721649483,
"grad_norm": 0.4523802697658539,
"learning_rate": 0.0002894679715302491,
"loss": 0.3601,
"step": 3975
},
{
"epoch": 2.0618556701030926,
"grad_norm": 0.4722582697868347,
"learning_rate": 0.0002893345195729537,
"loss": 0.3858,
"step": 4000
},
{
"epoch": 2.0618556701030926,
"eval_loss": 0.46308374404907227,
"eval_runtime": 201.4429,
"eval_samples_per_second": 6.081,
"eval_steps_per_second": 0.764,
"eval_wer": 1.0057142857142858,
"step": 4000
},
{
"epoch": 2.0747422680412373,
"grad_norm": 0.5041220784187317,
"learning_rate": 0.00028920106761565835,
"loss": 0.3518,
"step": 4025
},
{
"epoch": 2.0876288659793816,
"grad_norm": 0.48610439896583557,
"learning_rate": 0.00028906761565836294,
"loss": 0.3818,
"step": 4050
},
{
"epoch": 2.100515463917526,
"grad_norm": 0.6834833025932312,
"learning_rate": 0.0002889341637010676,
"loss": 0.3778,
"step": 4075
},
{
"epoch": 2.11340206185567,
"grad_norm": 0.687245786190033,
"learning_rate": 0.00028880071174377224,
"loss": 0.3771,
"step": 4100
},
{
"epoch": 2.1262886597938144,
"grad_norm": 0.706832230091095,
"learning_rate": 0.00028866725978647683,
"loss": 0.3868,
"step": 4125
},
{
"epoch": 2.1391752577319587,
"grad_norm": 0.7049499154090881,
"learning_rate": 0.0002885338078291815,
"loss": 0.3646,
"step": 4150
},
{
"epoch": 2.152061855670103,
"grad_norm": 0.40853607654571533,
"learning_rate": 0.00028840035587188607,
"loss": 0.3235,
"step": 4175
},
{
"epoch": 2.1649484536082473,
"grad_norm": 0.3675331771373749,
"learning_rate": 0.0002882669039145907,
"loss": 0.4079,
"step": 4200
},
{
"epoch": 2.1778350515463916,
"grad_norm": 1.3320142030715942,
"learning_rate": 0.00028813345195729536,
"loss": 0.3031,
"step": 4225
},
{
"epoch": 2.1907216494845363,
"grad_norm": 0.6416336297988892,
"learning_rate": 0.00028799999999999995,
"loss": 0.4087,
"step": 4250
},
{
"epoch": 2.2036082474226806,
"grad_norm": 0.43675994873046875,
"learning_rate": 0.0002878665480427046,
"loss": 0.3577,
"step": 4275
},
{
"epoch": 2.216494845360825,
"grad_norm": 0.3541963994503021,
"learning_rate": 0.00028773309608540925,
"loss": 0.3706,
"step": 4300
},
{
"epoch": 2.229381443298969,
"grad_norm": 0.4469320476055145,
"learning_rate": 0.00028759964412811384,
"loss": 0.3692,
"step": 4325
},
{
"epoch": 2.2422680412371134,
"grad_norm": 0.4056352376937866,
"learning_rate": 0.0002874661921708185,
"loss": 0.3659,
"step": 4350
},
{
"epoch": 2.2551546391752577,
"grad_norm": 0.3547820746898651,
"learning_rate": 0.0002873327402135231,
"loss": 0.3564,
"step": 4375
},
{
"epoch": 2.268041237113402,
"grad_norm": 0.32645678520202637,
"learning_rate": 0.0002871992882562277,
"loss": 0.3346,
"step": 4400
},
{
"epoch": 2.2809278350515463,
"grad_norm": 0.7449667453765869,
"learning_rate": 0.00028706583629893237,
"loss": 0.3696,
"step": 4425
},
{
"epoch": 2.2938144329896906,
"grad_norm": 0.4612623155117035,
"learning_rate": 0.00028693238434163696,
"loss": 0.3925,
"step": 4450
},
{
"epoch": 2.306701030927835,
"grad_norm": 0.6325463056564331,
"learning_rate": 0.0002867989323843416,
"loss": 0.3512,
"step": 4475
},
{
"epoch": 2.319587628865979,
"grad_norm": 0.29471156001091003,
"learning_rate": 0.0002866654804270462,
"loss": 0.3569,
"step": 4500
},
{
"epoch": 2.332474226804124,
"grad_norm": 1.076217770576477,
"learning_rate": 0.00028653202846975085,
"loss": 0.3375,
"step": 4525
},
{
"epoch": 2.345360824742268,
"grad_norm": 0.614162027835846,
"learning_rate": 0.0002863985765124555,
"loss": 0.4078,
"step": 4550
},
{
"epoch": 2.3582474226804124,
"grad_norm": 0.4514384865760803,
"learning_rate": 0.0002862651245551601,
"loss": 0.3758,
"step": 4575
},
{
"epoch": 2.3711340206185567,
"grad_norm": 0.314336359500885,
"learning_rate": 0.00028613167259786473,
"loss": 0.3765,
"step": 4600
},
{
"epoch": 2.384020618556701,
"grad_norm": 0.5032577514648438,
"learning_rate": 0.0002859982206405694,
"loss": 0.3226,
"step": 4625
},
{
"epoch": 2.3969072164948453,
"grad_norm": 0.6402880549430847,
"learning_rate": 0.000285864768683274,
"loss": 0.3986,
"step": 4650
},
{
"epoch": 2.4097938144329896,
"grad_norm": 0.5510269403457642,
"learning_rate": 0.0002857313167259786,
"loss": 0.3757,
"step": 4675
},
{
"epoch": 2.422680412371134,
"grad_norm": 0.5594025254249573,
"learning_rate": 0.0002855978647686832,
"loss": 0.4297,
"step": 4700
},
{
"epoch": 2.4355670103092786,
"grad_norm": 0.3621445894241333,
"learning_rate": 0.00028546441281138786,
"loss": 0.3336,
"step": 4725
},
{
"epoch": 2.448453608247423,
"grad_norm": 0.5683943033218384,
"learning_rate": 0.0002853309608540925,
"loss": 0.3395,
"step": 4750
},
{
"epoch": 2.461340206185567,
"grad_norm": 0.7661644816398621,
"learning_rate": 0.0002851975088967971,
"loss": 0.377,
"step": 4775
},
{
"epoch": 2.4742268041237114,
"grad_norm": 0.456636905670166,
"learning_rate": 0.00028506405693950175,
"loss": 0.3645,
"step": 4800
},
{
"epoch": 2.4871134020618557,
"grad_norm": 0.49251827597618103,
"learning_rate": 0.0002849306049822064,
"loss": 0.34,
"step": 4825
},
{
"epoch": 2.5,
"grad_norm": 0.32308971881866455,
"learning_rate": 0.000284797153024911,
"loss": 0.4092,
"step": 4850
},
{
"epoch": 2.5128865979381443,
"grad_norm": 0.9722476005554199,
"learning_rate": 0.00028466370106761563,
"loss": 0.3651,
"step": 4875
},
{
"epoch": 2.5257731958762886,
"grad_norm": 0.4552549123764038,
"learning_rate": 0.0002845302491103203,
"loss": 0.3837,
"step": 4900
},
{
"epoch": 2.538659793814433,
"grad_norm": 0.4148350656032562,
"learning_rate": 0.00028439679715302487,
"loss": 0.3931,
"step": 4925
},
{
"epoch": 2.551546391752577,
"grad_norm": 0.41644537448883057,
"learning_rate": 0.0002842633451957295,
"loss": 0.3677,
"step": 4950
},
{
"epoch": 2.5644329896907214,
"grad_norm": 0.3989255428314209,
"learning_rate": 0.00028412989323843416,
"loss": 0.3759,
"step": 4975
},
{
"epoch": 2.5773195876288657,
"grad_norm": 0.472091943025589,
"learning_rate": 0.00028399644128113876,
"loss": 0.4008,
"step": 5000
},
{
"epoch": 2.5902061855670104,
"grad_norm": 0.9913691282272339,
"learning_rate": 0.0002838629893238434,
"loss": 0.3555,
"step": 5025
},
{
"epoch": 2.6030927835051547,
"grad_norm": 0.425589382648468,
"learning_rate": 0.000283729537366548,
"loss": 0.3774,
"step": 5050
},
{
"epoch": 2.615979381443299,
"grad_norm": 0.7836791276931763,
"learning_rate": 0.00028359608540925264,
"loss": 0.317,
"step": 5075
},
{
"epoch": 2.6288659793814433,
"grad_norm": 0.7071629166603088,
"learning_rate": 0.0002834626334519573,
"loss": 0.4039,
"step": 5100
},
{
"epoch": 2.6417525773195876,
"grad_norm": 0.5756880640983582,
"learning_rate": 0.0002833291814946619,
"loss": 0.342,
"step": 5125
},
{
"epoch": 2.654639175257732,
"grad_norm": 0.425029993057251,
"learning_rate": 0.0002831957295373665,
"loss": 0.3765,
"step": 5150
},
{
"epoch": 2.667525773195876,
"grad_norm": 0.5212023854255676,
"learning_rate": 0.0002830622775800712,
"loss": 0.3681,
"step": 5175
},
{
"epoch": 2.680412371134021,
"grad_norm": 0.39988288283348083,
"learning_rate": 0.00028292882562277577,
"loss": 0.3525,
"step": 5200
},
{
"epoch": 2.693298969072165,
"grad_norm": 0.31254854798316956,
"learning_rate": 0.0002827953736654804,
"loss": 0.3521,
"step": 5225
},
{
"epoch": 2.7061855670103094,
"grad_norm": 0.5564956665039062,
"learning_rate": 0.00028266192170818506,
"loss": 0.3602,
"step": 5250
},
{
"epoch": 2.7190721649484537,
"grad_norm": 0.45266616344451904,
"learning_rate": 0.00028252846975088965,
"loss": 0.3462,
"step": 5275
},
{
"epoch": 2.731958762886598,
"grad_norm": 0.26552554965019226,
"learning_rate": 0.0002823950177935943,
"loss": 0.3787,
"step": 5300
},
{
"epoch": 2.7448453608247423,
"grad_norm": 0.5850217938423157,
"learning_rate": 0.00028226156583629894,
"loss": 0.3289,
"step": 5325
},
{
"epoch": 2.7577319587628866,
"grad_norm": 0.47871604561805725,
"learning_rate": 0.00028212811387900354,
"loss": 0.4246,
"step": 5350
},
{
"epoch": 2.770618556701031,
"grad_norm": 0.5951977372169495,
"learning_rate": 0.0002819946619217082,
"loss": 0.3326,
"step": 5375
},
{
"epoch": 2.783505154639175,
"grad_norm": 0.5806294083595276,
"learning_rate": 0.0002818612099644128,
"loss": 0.3952,
"step": 5400
},
{
"epoch": 2.7963917525773194,
"grad_norm": 0.6218500733375549,
"learning_rate": 0.0002817277580071174,
"loss": 0.3222,
"step": 5425
},
{
"epoch": 2.8092783505154637,
"grad_norm": 0.38604310154914856,
"learning_rate": 0.00028159430604982207,
"loss": 0.4408,
"step": 5450
},
{
"epoch": 2.822164948453608,
"grad_norm": 2.576910972595215,
"learning_rate": 0.00028146085409252666,
"loss": 0.355,
"step": 5475
},
{
"epoch": 2.8350515463917527,
"grad_norm": 0.28045961260795593,
"learning_rate": 0.0002813274021352313,
"loss": 0.3479,
"step": 5500
},
{
"epoch": 2.847938144329897,
"grad_norm": 0.6178203225135803,
"learning_rate": 0.00028119395017793596,
"loss": 0.3097,
"step": 5525
},
{
"epoch": 2.8608247422680413,
"grad_norm": 0.36241769790649414,
"learning_rate": 0.00028106049822064055,
"loss": 0.3762,
"step": 5550
},
{
"epoch": 2.8737113402061856,
"grad_norm": 0.6459288597106934,
"learning_rate": 0.0002809270462633452,
"loss": 0.3261,
"step": 5575
},
{
"epoch": 2.88659793814433,
"grad_norm": 0.458281546831131,
"learning_rate": 0.0002807935943060498,
"loss": 0.378,
"step": 5600
},
{
"epoch": 2.899484536082474,
"grad_norm": 0.4269348382949829,
"learning_rate": 0.00028066014234875443,
"loss": 0.3483,
"step": 5625
},
{
"epoch": 2.9123711340206184,
"grad_norm": 0.36018800735473633,
"learning_rate": 0.0002805266903914591,
"loss": 0.3592,
"step": 5650
},
{
"epoch": 2.925257731958763,
"grad_norm": 0.6075245141983032,
"learning_rate": 0.00028039323843416367,
"loss": 0.3,
"step": 5675
},
{
"epoch": 2.9381443298969074,
"grad_norm": 0.3082279860973358,
"learning_rate": 0.0002802597864768683,
"loss": 0.4306,
"step": 5700
},
{
"epoch": 2.9510309278350517,
"grad_norm": 0.6344396471977234,
"learning_rate": 0.0002801263345195729,
"loss": 0.3795,
"step": 5725
},
{
"epoch": 2.963917525773196,
"grad_norm": 0.3072182536125183,
"learning_rate": 0.00027999288256227756,
"loss": 0.3425,
"step": 5750
},
{
"epoch": 2.9768041237113403,
"grad_norm": 0.5076513886451721,
"learning_rate": 0.0002798594306049822,
"loss": 0.3214,
"step": 5775
},
{
"epoch": 2.9896907216494846,
"grad_norm": 0.34852054715156555,
"learning_rate": 0.0002797259786476868,
"loss": 0.3678,
"step": 5800
},
{
"epoch": 3.002577319587629,
"grad_norm": 0.36334386467933655,
"learning_rate": 0.00027959252669039144,
"loss": 0.3984,
"step": 5825
},
{
"epoch": 3.015463917525773,
"grad_norm": 0.46211138367652893,
"learning_rate": 0.00027945907473309604,
"loss": 0.2912,
"step": 5850
},
{
"epoch": 3.0283505154639174,
"grad_norm": 0.3730103373527527,
"learning_rate": 0.0002793256227758007,
"loss": 0.337,
"step": 5875
},
{
"epoch": 3.0412371134020617,
"grad_norm": 0.37066757678985596,
"learning_rate": 0.00027919217081850533,
"loss": 0.2959,
"step": 5900
},
{
"epoch": 3.054123711340206,
"grad_norm": 0.5712897181510925,
"learning_rate": 0.0002790587188612099,
"loss": 0.3279,
"step": 5925
},
{
"epoch": 3.0670103092783507,
"grad_norm": 0.39846915006637573,
"learning_rate": 0.00027892526690391457,
"loss": 0.3532,
"step": 5950
},
{
"epoch": 3.079896907216495,
"grad_norm": 0.3366047143936157,
"learning_rate": 0.0002787918149466192,
"loss": 0.3486,
"step": 5975
},
{
"epoch": 3.0927835051546393,
"grad_norm": 0.5053852796554565,
"learning_rate": 0.0002786583629893238,
"loss": 0.3571,
"step": 6000
},
{
"epoch": 3.0927835051546393,
"eval_loss": 0.48639050126075745,
"eval_runtime": 195.6249,
"eval_samples_per_second": 6.262,
"eval_steps_per_second": 0.787,
"eval_wer": 1.0073469387755103,
"step": 6000
},
{
"epoch": 3.1056701030927836,
"grad_norm": 0.35252827405929565,
"learning_rate": 0.00027852491103202845,
"loss": 0.3401,
"step": 6025
},
{
"epoch": 3.118556701030928,
"grad_norm": 0.3530094027519226,
"learning_rate": 0.00027839145907473305,
"loss": 0.3094,
"step": 6050
},
{
"epoch": 3.131443298969072,
"grad_norm": 0.9595320224761963,
"learning_rate": 0.0002782580071174377,
"loss": 0.3692,
"step": 6075
},
{
"epoch": 3.1443298969072164,
"grad_norm": 0.3585176467895508,
"learning_rate": 0.00027812455516014234,
"loss": 0.3351,
"step": 6100
},
{
"epoch": 3.1572164948453607,
"grad_norm": 0.4432651996612549,
"learning_rate": 0.00027799110320284693,
"loss": 0.3494,
"step": 6125
},
{
"epoch": 3.170103092783505,
"grad_norm": 0.5367820858955383,
"learning_rate": 0.0002778576512455516,
"loss": 0.3311,
"step": 6150
},
{
"epoch": 3.1829896907216493,
"grad_norm": 0.3430980443954468,
"learning_rate": 0.00027772419928825617,
"loss": 0.3357,
"step": 6175
},
{
"epoch": 3.195876288659794,
"grad_norm": 1.0889408588409424,
"learning_rate": 0.0002775907473309608,
"loss": 0.3056,
"step": 6200
},
{
"epoch": 3.2087628865979383,
"grad_norm": 0.3883308172225952,
"learning_rate": 0.00027745729537366546,
"loss": 0.3251,
"step": 6225
},
{
"epoch": 3.2216494845360826,
"grad_norm": 0.41912856698036194,
"learning_rate": 0.00027732384341637006,
"loss": 0.3292,
"step": 6250
},
{
"epoch": 3.234536082474227,
"grad_norm": 0.45084699988365173,
"learning_rate": 0.0002771903914590747,
"loss": 0.3516,
"step": 6275
},
{
"epoch": 3.247422680412371,
"grad_norm": 0.4294661581516266,
"learning_rate": 0.00027705693950177935,
"loss": 0.3297,
"step": 6300
},
{
"epoch": 3.2603092783505154,
"grad_norm": 0.25462472438812256,
"learning_rate": 0.00027692348754448394,
"loss": 0.3513,
"step": 6325
},
{
"epoch": 3.2731958762886597,
"grad_norm": 0.2990482747554779,
"learning_rate": 0.0002767900355871886,
"loss": 0.3472,
"step": 6350
},
{
"epoch": 3.286082474226804,
"grad_norm": 0.5474823713302612,
"learning_rate": 0.0002766565836298932,
"loss": 0.3404,
"step": 6375
},
{
"epoch": 3.2989690721649483,
"grad_norm": 0.31416311860084534,
"learning_rate": 0.00027652313167259783,
"loss": 0.3052,
"step": 6400
},
{
"epoch": 3.3118556701030926,
"grad_norm": 0.6990143656730652,
"learning_rate": 0.0002763896797153025,
"loss": 0.3661,
"step": 6425
},
{
"epoch": 3.3247422680412373,
"grad_norm": 0.4336131811141968,
"learning_rate": 0.00027625622775800707,
"loss": 0.2757,
"step": 6450
},
{
"epoch": 3.3376288659793816,
"grad_norm": 0.35333672165870667,
"learning_rate": 0.0002761227758007117,
"loss": 0.3275,
"step": 6475
},
{
"epoch": 3.350515463917526,
"grad_norm": 0.7743633985519409,
"learning_rate": 0.0002759893238434163,
"loss": 0.2855,
"step": 6500
},
{
"epoch": 3.36340206185567,
"grad_norm": 0.5316669344902039,
"learning_rate": 0.00027585587188612095,
"loss": 0.3742,
"step": 6525
},
{
"epoch": 3.3762886597938144,
"grad_norm": 0.3642534017562866,
"learning_rate": 0.0002757277580071174,
"loss": 0.3676,
"step": 6550
},
{
"epoch": 3.3891752577319587,
"grad_norm": 0.6164928078651428,
"learning_rate": 0.00027559430604982203,
"loss": 0.3658,
"step": 6575
},
{
"epoch": 3.402061855670103,
"grad_norm": 0.3114078938961029,
"learning_rate": 0.0002754608540925266,
"loss": 0.2681,
"step": 6600
},
{
"epoch": 3.4149484536082473,
"grad_norm": 0.8610777258872986,
"learning_rate": 0.00027532740213523127,
"loss": 0.3607,
"step": 6625
},
{
"epoch": 3.4278350515463916,
"grad_norm": 0.49727797508239746,
"learning_rate": 0.0002751939501779359,
"loss": 0.3306,
"step": 6650
},
{
"epoch": 3.4407216494845363,
"grad_norm": 0.3058234751224518,
"learning_rate": 0.0002750604982206405,
"loss": 0.3313,
"step": 6675
},
{
"epoch": 3.4536082474226806,
"grad_norm": 0.5317339301109314,
"learning_rate": 0.00027492704626334516,
"loss": 0.3316,
"step": 6700
},
{
"epoch": 3.466494845360825,
"grad_norm": 0.4639209806919098,
"learning_rate": 0.0002747935943060498,
"loss": 0.3773,
"step": 6725
},
{
"epoch": 3.479381443298969,
"grad_norm": 0.28237494826316833,
"learning_rate": 0.0002746601423487544,
"loss": 0.3275,
"step": 6750
},
{
"epoch": 3.4922680412371134,
"grad_norm": 0.44046372175216675,
"learning_rate": 0.00027452669039145904,
"loss": 0.3777,
"step": 6775
},
{
"epoch": 3.5051546391752577,
"grad_norm": 0.40542760491371155,
"learning_rate": 0.0002743932384341637,
"loss": 0.3197,
"step": 6800
},
{
"epoch": 3.518041237113402,
"grad_norm": 0.587062418460846,
"learning_rate": 0.0002742597864768683,
"loss": 0.3441,
"step": 6825
},
{
"epoch": 3.5309278350515463,
"grad_norm": 0.34003278613090515,
"learning_rate": 0.00027412633451957293,
"loss": 0.3189,
"step": 6850
},
{
"epoch": 3.5438144329896906,
"grad_norm": 0.40320533514022827,
"learning_rate": 0.0002739928825622776,
"loss": 0.3115,
"step": 6875
},
{
"epoch": 3.556701030927835,
"grad_norm": 0.34437236189842224,
"learning_rate": 0.00027385943060498217,
"loss": 0.3344,
"step": 6900
},
{
"epoch": 3.569587628865979,
"grad_norm": 0.42826181650161743,
"learning_rate": 0.0002737259786476868,
"loss": 0.3716,
"step": 6925
},
{
"epoch": 3.582474226804124,
"grad_norm": 0.4200308620929718,
"learning_rate": 0.00027359252669039146,
"loss": 0.3511,
"step": 6950
},
{
"epoch": 3.595360824742268,
"grad_norm": 0.3897481858730316,
"learning_rate": 0.00027345907473309605,
"loss": 0.3651,
"step": 6975
},
{
"epoch": 3.6082474226804124,
"grad_norm": 0.3066980838775635,
"learning_rate": 0.0002733256227758007,
"loss": 0.314,
"step": 7000
},
{
"epoch": 3.6211340206185567,
"grad_norm": 0.39002224802970886,
"learning_rate": 0.0002731921708185053,
"loss": 0.411,
"step": 7025
},
{
"epoch": 3.634020618556701,
"grad_norm": 0.8328560590744019,
"learning_rate": 0.00027305871886120994,
"loss": 0.3154,
"step": 7050
},
{
"epoch": 3.6469072164948453,
"grad_norm": 0.43831050395965576,
"learning_rate": 0.0002729252669039146,
"loss": 0.396,
"step": 7075
},
{
"epoch": 3.6597938144329896,
"grad_norm": 0.3992260694503784,
"learning_rate": 0.0002727918149466192,
"loss": 0.3142,
"step": 7100
},
{
"epoch": 3.6726804123711343,
"grad_norm": 0.2941119074821472,
"learning_rate": 0.0002726583629893238,
"loss": 0.3527,
"step": 7125
},
{
"epoch": 3.6855670103092786,
"grad_norm": 0.4881301522254944,
"learning_rate": 0.00027252491103202847,
"loss": 0.3171,
"step": 7150
},
{
"epoch": 3.698453608247423,
"grad_norm": 0.3898142874240875,
"learning_rate": 0.00027239145907473306,
"loss": 0.3817,
"step": 7175
},
{
"epoch": 3.711340206185567,
"grad_norm": 0.31580379605293274,
"learning_rate": 0.0002722580071174377,
"loss": 0.2595,
"step": 7200
},
{
"epoch": 3.7242268041237114,
"grad_norm": 0.3299170136451721,
"learning_rate": 0.00027212455516014236,
"loss": 0.3416,
"step": 7225
},
{
"epoch": 3.7371134020618557,
"grad_norm": 0.6964473724365234,
"learning_rate": 0.00027199110320284695,
"loss": 0.2942,
"step": 7250
},
{
"epoch": 3.75,
"grad_norm": 0.3565264046192169,
"learning_rate": 0.0002718576512455516,
"loss": 0.3283,
"step": 7275
},
{
"epoch": 3.7628865979381443,
"grad_norm": 0.28134214878082275,
"learning_rate": 0.00027172419928825624,
"loss": 0.3203,
"step": 7300
},
{
"epoch": 3.7757731958762886,
"grad_norm": 0.4196174442768097,
"learning_rate": 0.00027159074733096083,
"loss": 0.3396,
"step": 7325
},
{
"epoch": 3.788659793814433,
"grad_norm": 0.4770311415195465,
"learning_rate": 0.0002714572953736655,
"loss": 0.3144,
"step": 7350
},
{
"epoch": 3.801546391752577,
"grad_norm": 0.29138994216918945,
"learning_rate": 0.0002713238434163701,
"loss": 0.3403,
"step": 7375
},
{
"epoch": 3.8144329896907214,
"grad_norm": 0.47035351395606995,
"learning_rate": 0.0002711903914590747,
"loss": 0.2623,
"step": 7400
},
{
"epoch": 3.8273195876288657,
"grad_norm": 0.32780078053474426,
"learning_rate": 0.00027105693950177937,
"loss": 0.3863,
"step": 7425
},
{
"epoch": 3.8402061855670104,
"grad_norm": 0.4419979751110077,
"learning_rate": 0.00027092348754448396,
"loss": 0.3207,
"step": 7450
},
{
"epoch": 3.8530927835051547,
"grad_norm": 0.28167861700057983,
"learning_rate": 0.0002707900355871886,
"loss": 0.3308,
"step": 7475
},
{
"epoch": 3.865979381443299,
"grad_norm": 0.3411625921726227,
"learning_rate": 0.00027065658362989325,
"loss": 0.3387,
"step": 7500
},
{
"epoch": 3.8788659793814433,
"grad_norm": 0.31229549646377563,
"learning_rate": 0.00027052313167259785,
"loss": 0.408,
"step": 7525
},
{
"epoch": 3.8917525773195876,
"grad_norm": 0.3603403866291046,
"learning_rate": 0.0002703896797153025,
"loss": 0.3259,
"step": 7550
},
{
"epoch": 3.904639175257732,
"grad_norm": 0.2832438051700592,
"learning_rate": 0.0002702562277580071,
"loss": 0.3423,
"step": 7575
},
{
"epoch": 3.917525773195876,
"grad_norm": 0.3435526192188263,
"learning_rate": 0.00027012277580071173,
"loss": 0.2835,
"step": 7600
},
{
"epoch": 3.930412371134021,
"grad_norm": 0.3645428419113159,
"learning_rate": 0.0002699893238434164,
"loss": 0.3483,
"step": 7625
},
{
"epoch": 3.943298969072165,
"grad_norm": 0.7062532901763916,
"learning_rate": 0.00026985587188612097,
"loss": 0.2586,
"step": 7650
},
{
"epoch": 3.9561855670103094,
"grad_norm": 0.4606476426124573,
"learning_rate": 0.0002697224199288256,
"loss": 0.3939,
"step": 7675
},
{
"epoch": 3.9690721649484537,
"grad_norm": 0.4727219045162201,
"learning_rate": 0.0002695889679715302,
"loss": 0.3151,
"step": 7700
},
{
"epoch": 3.981958762886598,
"grad_norm": 0.2774180471897125,
"learning_rate": 0.00026945551601423486,
"loss": 0.3519,
"step": 7725
},
{
"epoch": 3.9948453608247423,
"grad_norm": 0.4793704152107239,
"learning_rate": 0.0002693220640569395,
"loss": 0.3023,
"step": 7750
},
{
"epoch": 4.007731958762887,
"grad_norm": 0.3699122965335846,
"learning_rate": 0.0002691886120996441,
"loss": 0.3664,
"step": 7775
},
{
"epoch": 4.020618556701031,
"grad_norm": 0.6096347570419312,
"learning_rate": 0.00026905516014234874,
"loss": 0.2579,
"step": 7800
},
{
"epoch": 4.033505154639175,
"grad_norm": 0.2851110100746155,
"learning_rate": 0.00026892170818505333,
"loss": 0.3347,
"step": 7825
},
{
"epoch": 4.046391752577319,
"grad_norm": 0.5487604141235352,
"learning_rate": 0.000268788256227758,
"loss": 0.2676,
"step": 7850
},
{
"epoch": 4.059278350515464,
"grad_norm": 0.29913634061813354,
"learning_rate": 0.0002686548042704626,
"loss": 0.3061,
"step": 7875
},
{
"epoch": 4.072164948453608,
"grad_norm": 0.3672349750995636,
"learning_rate": 0.0002685213523131672,
"loss": 0.2632,
"step": 7900
},
{
"epoch": 4.085051546391752,
"grad_norm": 0.5969862341880798,
"learning_rate": 0.00026838790035587187,
"loss": 0.2868,
"step": 7925
},
{
"epoch": 4.097938144329897,
"grad_norm": 0.4409433901309967,
"learning_rate": 0.0002682544483985765,
"loss": 0.2907,
"step": 7950
},
{
"epoch": 4.110824742268041,
"grad_norm": 0.4578838050365448,
"learning_rate": 0.0002681209964412811,
"loss": 0.3377,
"step": 7975
},
{
"epoch": 4.123711340206185,
"grad_norm": 0.302491694688797,
"learning_rate": 0.00026798754448398575,
"loss": 0.2857,
"step": 8000
},
{
"epoch": 4.123711340206185,
"eval_loss": 0.4647158980369568,
"eval_runtime": 189.5598,
"eval_samples_per_second": 6.462,
"eval_steps_per_second": 0.812,
"eval_wer": 1.0008163265306123,
"step": 8000
},
{
"epoch": 4.13659793814433,
"grad_norm": 0.5263584852218628,
"learning_rate": 0.00026785409252669034,
"loss": 0.3246,
"step": 8025
},
{
"epoch": 4.149484536082475,
"grad_norm": 0.48449578881263733,
"learning_rate": 0.000267720640569395,
"loss": 0.3163,
"step": 8050
},
{
"epoch": 4.162371134020619,
"grad_norm": 0.3020855784416199,
"learning_rate": 0.00026758718861209964,
"loss": 0.3248,
"step": 8075
},
{
"epoch": 4.175257731958763,
"grad_norm": 0.3554665148258209,
"learning_rate": 0.00026745373665480423,
"loss": 0.2863,
"step": 8100
},
{
"epoch": 4.188144329896907,
"grad_norm": 0.4055439531803131,
"learning_rate": 0.0002673202846975089,
"loss": 0.3289,
"step": 8125
},
{
"epoch": 4.201030927835052,
"grad_norm": 0.6498619914054871,
"learning_rate": 0.00026718683274021347,
"loss": 0.289,
"step": 8150
},
{
"epoch": 4.213917525773196,
"grad_norm": 0.23331980407238007,
"learning_rate": 0.0002670533807829181,
"loss": 0.2744,
"step": 8175
},
{
"epoch": 4.22680412371134,
"grad_norm": 0.5142727494239807,
"learning_rate": 0.00026691992882562276,
"loss": 0.3139,
"step": 8200
},
{
"epoch": 4.239690721649485,
"grad_norm": 0.4468501806259155,
"learning_rate": 0.00026678647686832735,
"loss": 0.3392,
"step": 8225
},
{
"epoch": 4.252577319587629,
"grad_norm": 0.36652007699012756,
"learning_rate": 0.000266653024911032,
"loss": 0.2499,
"step": 8250
},
{
"epoch": 4.265463917525773,
"grad_norm": 0.3004089891910553,
"learning_rate": 0.0002665195729537366,
"loss": 0.31,
"step": 8275
},
{
"epoch": 4.278350515463917,
"grad_norm": 0.4099670946598053,
"learning_rate": 0.00026638612099644124,
"loss": 0.2798,
"step": 8300
},
{
"epoch": 4.291237113402062,
"grad_norm": 0.26817041635513306,
"learning_rate": 0.0002662526690391459,
"loss": 0.3055,
"step": 8325
},
{
"epoch": 4.304123711340206,
"grad_norm": 0.4156699478626251,
"learning_rate": 0.0002661192170818505,
"loss": 0.2942,
"step": 8350
},
{
"epoch": 4.31701030927835,
"grad_norm": 2.611400842666626,
"learning_rate": 0.0002659857651245551,
"loss": 0.3659,
"step": 8375
},
{
"epoch": 4.329896907216495,
"grad_norm": 0.46959781646728516,
"learning_rate": 0.00026585231316725977,
"loss": 0.2982,
"step": 8400
},
{
"epoch": 4.342783505154639,
"grad_norm": 0.2867629826068878,
"learning_rate": 0.00026571886120996436,
"loss": 0.3511,
"step": 8425
},
{
"epoch": 4.355670103092783,
"grad_norm": 0.38449224829673767,
"learning_rate": 0.000265585409252669,
"loss": 0.2826,
"step": 8450
},
{
"epoch": 4.368556701030927,
"grad_norm": 0.4524473249912262,
"learning_rate": 0.0002654519572953736,
"loss": 0.313,
"step": 8475
},
{
"epoch": 4.381443298969073,
"grad_norm": 0.3989889919757843,
"learning_rate": 0.00026531850533807825,
"loss": 0.2825,
"step": 8500
},
{
"epoch": 4.394329896907217,
"grad_norm": 0.3007287085056305,
"learning_rate": 0.0002651850533807829,
"loss": 0.3369,
"step": 8525
},
{
"epoch": 4.407216494845361,
"grad_norm": 0.7760800123214722,
"learning_rate": 0.0002650516014234875,
"loss": 0.2835,
"step": 8550
},
{
"epoch": 4.420103092783505,
"grad_norm": 0.27871614694595337,
"learning_rate": 0.00026491814946619214,
"loss": 0.3172,
"step": 8575
},
{
"epoch": 4.43298969072165,
"grad_norm": 0.48607179522514343,
"learning_rate": 0.0002647846975088968,
"loss": 0.303,
"step": 8600
},
{
"epoch": 4.445876288659794,
"grad_norm": 0.3540396988391876,
"learning_rate": 0.0002646512455516014,
"loss": 0.355,
"step": 8625
},
{
"epoch": 4.458762886597938,
"grad_norm": 1.2004367113113403,
"learning_rate": 0.000264517793594306,
"loss": 0.3197,
"step": 8650
},
{
"epoch": 4.471649484536083,
"grad_norm": 0.2405807226896286,
"learning_rate": 0.00026438434163701067,
"loss": 0.3179,
"step": 8675
},
{
"epoch": 4.484536082474227,
"grad_norm": 0.36553826928138733,
"learning_rate": 0.00026425088967971526,
"loss": 0.2793,
"step": 8700
},
{
"epoch": 4.497422680412371,
"grad_norm": 0.26435115933418274,
"learning_rate": 0.0002641174377224199,
"loss": 0.2849,
"step": 8725
},
{
"epoch": 4.510309278350515,
"grad_norm": 0.6196132302284241,
"learning_rate": 0.00026398398576512455,
"loss": 0.3267,
"step": 8750
},
{
"epoch": 4.52319587628866,
"grad_norm": 0.2987557649612427,
"learning_rate": 0.00026385053380782915,
"loss": 0.3152,
"step": 8775
},
{
"epoch": 4.536082474226804,
"grad_norm": 0.4445992112159729,
"learning_rate": 0.0002637170818505338,
"loss": 0.3,
"step": 8800
},
{
"epoch": 4.548969072164948,
"grad_norm": 0.4059930741786957,
"learning_rate": 0.00026358362989323844,
"loss": 0.3395,
"step": 8825
},
{
"epoch": 4.561855670103093,
"grad_norm": 0.522637665271759,
"learning_rate": 0.00026345017793594303,
"loss": 0.2783,
"step": 8850
},
{
"epoch": 4.574742268041237,
"grad_norm": 0.1963280737400055,
"learning_rate": 0.0002633167259786477,
"loss": 0.2979,
"step": 8875
},
{
"epoch": 4.587628865979381,
"grad_norm": 0.7622120976448059,
"learning_rate": 0.00026318327402135227,
"loss": 0.3204,
"step": 8900
},
{
"epoch": 4.600515463917525,
"grad_norm": 0.2749217450618744,
"learning_rate": 0.0002630498220640569,
"loss": 0.3365,
"step": 8925
},
{
"epoch": 4.61340206185567,
"grad_norm": 0.5664824843406677,
"learning_rate": 0.00026291637010676156,
"loss": 0.2772,
"step": 8950
},
{
"epoch": 4.626288659793815,
"grad_norm": 0.3661792278289795,
"learning_rate": 0.00026278291814946616,
"loss": 0.3314,
"step": 8975
},
{
"epoch": 4.639175257731958,
"grad_norm": 0.37070122361183167,
"learning_rate": 0.0002626494661921708,
"loss": 0.2991,
"step": 9000
},
{
"epoch": 4.652061855670103,
"grad_norm": 0.4963271915912628,
"learning_rate": 0.00026251601423487545,
"loss": 0.3312,
"step": 9025
},
{
"epoch": 4.664948453608248,
"grad_norm": 0.5193173885345459,
"learning_rate": 0.00026238256227758004,
"loss": 0.2914,
"step": 9050
},
{
"epoch": 4.677835051546392,
"grad_norm": 0.3158729076385498,
"learning_rate": 0.0002622491103202847,
"loss": 0.3317,
"step": 9075
},
{
"epoch": 4.690721649484536,
"grad_norm": 0.37290090322494507,
"learning_rate": 0.00026211565836298934,
"loss": 0.2612,
"step": 9100
},
{
"epoch": 4.703608247422681,
"grad_norm": 0.334089070558548,
"learning_rate": 0.00026198220640569393,
"loss": 0.3235,
"step": 9125
},
{
"epoch": 4.716494845360825,
"grad_norm": 0.3259222209453583,
"learning_rate": 0.0002618487544483986,
"loss": 0.2952,
"step": 9150
},
{
"epoch": 4.729381443298969,
"grad_norm": 0.2776556611061096,
"learning_rate": 0.0002617153024911032,
"loss": 0.3405,
"step": 9175
},
{
"epoch": 4.742268041237113,
"grad_norm": 0.33884450793266296,
"learning_rate": 0.0002615818505338078,
"loss": 0.2813,
"step": 9200
},
{
"epoch": 4.755154639175258,
"grad_norm": 0.48323071002960205,
"learning_rate": 0.00026144839857651246,
"loss": 0.3044,
"step": 9225
},
{
"epoch": 4.768041237113402,
"grad_norm": 0.32996150851249695,
"learning_rate": 0.00026131494661921705,
"loss": 0.2817,
"step": 9250
},
{
"epoch": 4.780927835051546,
"grad_norm": 0.3732437491416931,
"learning_rate": 0.0002611814946619217,
"loss": 0.3165,
"step": 9275
},
{
"epoch": 4.793814432989691,
"grad_norm": 0.5148000717163086,
"learning_rate": 0.00026104804270462635,
"loss": 0.2636,
"step": 9300
},
{
"epoch": 4.806701030927835,
"grad_norm": 0.3256881833076477,
"learning_rate": 0.00026091459074733094,
"loss": 0.3677,
"step": 9325
},
{
"epoch": 4.819587628865979,
"grad_norm": 0.5248320698738098,
"learning_rate": 0.0002607811387900356,
"loss": 0.2959,
"step": 9350
},
{
"epoch": 4.832474226804123,
"grad_norm": 0.25363728404045105,
"learning_rate": 0.0002606476868327402,
"loss": 0.355,
"step": 9375
},
{
"epoch": 4.845360824742268,
"grad_norm": 1.110967755317688,
"learning_rate": 0.0002605142348754448,
"loss": 0.3208,
"step": 9400
},
{
"epoch": 4.858247422680412,
"grad_norm": 0.3510916531085968,
"learning_rate": 0.00026038078291814947,
"loss": 0.3421,
"step": 9425
},
{
"epoch": 4.871134020618557,
"grad_norm": 0.33312925696372986,
"learning_rate": 0.00026024733096085406,
"loss": 0.3046,
"step": 9450
},
{
"epoch": 4.8840206185567006,
"grad_norm": 0.32453814148902893,
"learning_rate": 0.0002601138790035587,
"loss": 0.3294,
"step": 9475
},
{
"epoch": 4.896907216494846,
"grad_norm": 0.3632301092147827,
"learning_rate": 0.0002599804270462633,
"loss": 0.3261,
"step": 9500
},
{
"epoch": 4.90979381443299,
"grad_norm": 0.3007452189922333,
"learning_rate": 0.00025984697508896795,
"loss": 0.3524,
"step": 9525
},
{
"epoch": 4.922680412371134,
"grad_norm": 0.47182586789131165,
"learning_rate": 0.0002597135231316726,
"loss": 0.3287,
"step": 9550
},
{
"epoch": 4.935567010309279,
"grad_norm": 0.3959861397743225,
"learning_rate": 0.0002595800711743772,
"loss": 0.3134,
"step": 9575
},
{
"epoch": 4.948453608247423,
"grad_norm": 0.28187814354896545,
"learning_rate": 0.00025944661921708183,
"loss": 0.3154,
"step": 9600
},
{
"epoch": 4.961340206185567,
"grad_norm": 0.32686540484428406,
"learning_rate": 0.0002593131672597865,
"loss": 0.3464,
"step": 9625
},
{
"epoch": 4.974226804123711,
"grad_norm": 0.46848079562187195,
"learning_rate": 0.0002591797153024911,
"loss": 0.2753,
"step": 9650
},
{
"epoch": 4.987113402061856,
"grad_norm": 0.25264793634414673,
"learning_rate": 0.0002590462633451957,
"loss": 0.3403,
"step": 9675
},
{
"epoch": 5.0,
"grad_norm": 0.4163696765899658,
"learning_rate": 0.0002589128113879003,
"loss": 0.2669,
"step": 9700
},
{
"epoch": 5.012886597938144,
"grad_norm": 0.29742294549942017,
"learning_rate": 0.00025877935943060496,
"loss": 0.3203,
"step": 9725
},
{
"epoch": 5.025773195876289,
"grad_norm": 0.8651419281959534,
"learning_rate": 0.0002586459074733096,
"loss": 0.2589,
"step": 9750
},
{
"epoch": 5.038659793814433,
"grad_norm": 0.2916441261768341,
"learning_rate": 0.0002585124555160142,
"loss": 0.3,
"step": 9775
},
{
"epoch": 5.051546391752577,
"grad_norm": 1.4987293481826782,
"learning_rate": 0.00025837900355871884,
"loss": 0.3148,
"step": 9800
},
{
"epoch": 5.064432989690721,
"grad_norm": 0.46689650416374207,
"learning_rate": 0.00025824555160142344,
"loss": 0.2935,
"step": 9825
},
{
"epoch": 5.077319587628866,
"grad_norm": 0.3726617991924286,
"learning_rate": 0.0002581120996441281,
"loss": 0.2871,
"step": 9850
},
{
"epoch": 5.09020618556701,
"grad_norm": 0.29355180263519287,
"learning_rate": 0.00025797864768683273,
"loss": 0.2422,
"step": 9875
},
{
"epoch": 5.103092783505154,
"grad_norm": 0.6249547600746155,
"learning_rate": 0.0002578451957295373,
"loss": 0.2749,
"step": 9900
},
{
"epoch": 5.115979381443299,
"grad_norm": 0.42613521218299866,
"learning_rate": 0.00025771174377224197,
"loss": 0.2998,
"step": 9925
},
{
"epoch": 5.128865979381443,
"grad_norm": 0.4285449981689453,
"learning_rate": 0.00025757829181494656,
"loss": 0.3241,
"step": 9950
},
{
"epoch": 5.141752577319588,
"grad_norm": 0.2756471037864685,
"learning_rate": 0.0002574448398576512,
"loss": 0.2788,
"step": 9975
},
{
"epoch": 5.154639175257732,
"grad_norm": 0.650787889957428,
"learning_rate": 0.00025731138790035586,
"loss": 0.2628,
"step": 10000
},
{
"epoch": 5.154639175257732,
"eval_loss": 0.5844400525093079,
"eval_runtime": 196.2011,
"eval_samples_per_second": 6.244,
"eval_steps_per_second": 0.785,
"eval_wer": 1.006530612244898,
"step": 10000
},
{
"epoch": 5.154639175257732,
"step": 10000,
"total_flos": 5.52048529474528e+19,
"train_loss": 0.6429409675121307,
"train_runtime": 62307.8234,
"train_samples_per_second": 22.413,
"train_steps_per_second": 0.934
}
],
"logging_steps": 25,
"max_steps": 58200,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 2000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.52048529474528e+19,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}