{ "best_metric": 0.21830901145782886, "best_model_checkpoint": "./checkpoints/w2v-multilingual-v1.3/checkpoint-48000", "epoch": 4.9430470123022845, "eval_steps": 2000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001647627416520211, "grad_norm": 0.2767059803009033, "learning_rate": 4.9431537320810675e-08, "loss": 0.1923, "step": 20 }, { "epoch": 0.003295254833040422, "grad_norm": 2.006911039352417, "learning_rate": 1.977261492832427e-07, "loss": 0.2947, "step": 40 }, { "epoch": 0.004942882249560633, "grad_norm": 0.3714193105697632, "learning_rate": 3.624979403526116e-07, "loss": 0.2955, "step": 60 }, { "epoch": 0.006590509666080844, "grad_norm": 0.3106062412261963, "learning_rate": 5.272697314219806e-07, "loss": 0.2754, "step": 80 }, { "epoch": 0.008238137082601054, "grad_norm": 3.2078254222869873, "learning_rate": 6.838029329378811e-07, "loss": 0.3177, "step": 100 }, { "epoch": 0.009885764499121265, "grad_norm": 0.38503074645996094, "learning_rate": 8.485747240072501e-07, "loss": 0.1905, "step": 120 }, { "epoch": 0.011533391915641476, "grad_norm": 0.15724419057369232, "learning_rate": 1.013346515076619e-06, "loss": 0.2942, "step": 140 }, { "epoch": 0.013181019332161687, "grad_norm": 0.5840986967086792, "learning_rate": 1.1781183061459877e-06, "loss": 0.2907, "step": 160 }, { "epoch": 0.014828646748681899, "grad_norm": 0.23122897744178772, "learning_rate": 1.3346515076618883e-06, "loss": 0.2811, "step": 180 }, { "epoch": 0.016476274165202108, "grad_norm": 2.7248637676239014, "learning_rate": 1.4994232987312573e-06, "loss": 0.3168, "step": 200 }, { "epoch": 0.01812390158172232, "grad_norm": 1.0846350193023682, "learning_rate": 1.6641950898006263e-06, "loss": 0.1979, "step": 220 }, { "epoch": 0.01977152899824253, "grad_norm": 1.7905633449554443, "learning_rate": 1.828966880869995e-06, "loss": 0.2656, "step": 240 }, { "epoch": 0.021419156414762743, "grad_norm": 0.31305554509162903, "learning_rate": 1.993738671939364e-06, "loss": 0.272, "step": 260 }, { "epoch": 0.023066783831282953, "grad_norm": 0.31258106231689453, "learning_rate": 2.158510463008733e-06, "loss": 0.2693, "step": 280 }, { "epoch": 0.024714411247803162, "grad_norm": 1.783349871635437, "learning_rate": 2.323282254078102e-06, "loss": 0.3116, "step": 300 }, { "epoch": 0.026362038664323375, "grad_norm": 0.6936488747596741, "learning_rate": 2.488054045147471e-06, "loss": 0.1931, "step": 320 }, { "epoch": 0.028009666080843584, "grad_norm": 0.6185577511787415, "learning_rate": 2.65282583621684e-06, "loss": 0.258, "step": 340 }, { "epoch": 0.029657293497363797, "grad_norm": 0.3592207729816437, "learning_rate": 2.8175976272862085e-06, "loss": 0.2401, "step": 360 }, { "epoch": 0.03130492091388401, "grad_norm": 1.2324920892715454, "learning_rate": 2.982369418355578e-06, "loss": 0.2371, "step": 380 }, { "epoch": 0.032952548330404216, "grad_norm": 1.5560214519500732, "learning_rate": 3.147141209424947e-06, "loss": 0.2414, "step": 400 }, { "epoch": 0.03460017574692443, "grad_norm": 0.8133947253227234, "learning_rate": 3.3119130004943154e-06, "loss": 0.1744, "step": 420 }, { "epoch": 0.03624780316344464, "grad_norm": 0.8599107265472412, "learning_rate": 3.4766847915636844e-06, "loss": 0.2536, "step": 440 }, { "epoch": 0.03789543057996485, "grad_norm": 0.3699595034122467, "learning_rate": 3.641456582633054e-06, "loss": 0.2552, "step": 460 }, { "epoch": 0.03954305799648506, "grad_norm": 1.2955116033554077, "learning_rate": 3.797989784148954e-06, "loss": 0.2532, "step": 480 }, { "epoch": 0.041190685413005274, "grad_norm": 11.518170356750488, "learning_rate": 3.962761575218322e-06, "loss": 0.2507, "step": 500 }, { "epoch": 0.042838312829525486, "grad_norm": 0.9779248833656311, "learning_rate": 4.127533366287692e-06, "loss": 0.1733, "step": 520 }, { "epoch": 0.04448594024604569, "grad_norm": 0.5386682152748108, "learning_rate": 4.29230515735706e-06, "loss": 0.2415, "step": 540 }, { "epoch": 0.046133567662565905, "grad_norm": 2.7366793155670166, "learning_rate": 4.457076948426429e-06, "loss": 0.2409, "step": 560 }, { "epoch": 0.04778119507908612, "grad_norm": 1.9912066459655762, "learning_rate": 4.621848739495799e-06, "loss": 0.2134, "step": 580 }, { "epoch": 0.049428822495606324, "grad_norm": 4.058573246002197, "learning_rate": 4.786620530565167e-06, "loss": 0.2456, "step": 600 }, { "epoch": 0.05107644991212654, "grad_norm": 0.37456750869750977, "learning_rate": 4.951392321634536e-06, "loss": 0.1589, "step": 620 }, { "epoch": 0.05272407732864675, "grad_norm": 0.7950440645217896, "learning_rate": 5.116164112703905e-06, "loss": 0.2485, "step": 640 }, { "epoch": 0.05437170474516696, "grad_norm": 1.884665846824646, "learning_rate": 5.280935903773274e-06, "loss": 0.2197, "step": 660 }, { "epoch": 0.05601933216168717, "grad_norm": 0.32170844078063965, "learning_rate": 5.445707694842643e-06, "loss": 0.2111, "step": 680 }, { "epoch": 0.05766695957820738, "grad_norm": 1.8230172395706177, "learning_rate": 5.610479485912012e-06, "loss": 0.2379, "step": 700 }, { "epoch": 0.059314586994727594, "grad_norm": 1.2472524642944336, "learning_rate": 5.775251276981381e-06, "loss": 0.1684, "step": 720 }, { "epoch": 0.0609622144112478, "grad_norm": 0.29022061824798584, "learning_rate": 5.94002306805075e-06, "loss": 0.2201, "step": 740 }, { "epoch": 0.06260984182776802, "grad_norm": 0.49721184372901917, "learning_rate": 6.104794859120118e-06, "loss": 0.2297, "step": 760 }, { "epoch": 0.06425746924428823, "grad_norm": 0.6133716106414795, "learning_rate": 6.269566650189487e-06, "loss": 0.3173, "step": 780 }, { "epoch": 0.06590509666080843, "grad_norm": 0.9667792320251465, "learning_rate": 6.434338441258857e-06, "loss": 0.2349, "step": 800 }, { "epoch": 0.06755272407732865, "grad_norm": 0.3177216053009033, "learning_rate": 6.599110232328226e-06, "loss": 0.1635, "step": 820 }, { "epoch": 0.06920035149384886, "grad_norm": 0.8457621335983276, "learning_rate": 6.763882023397594e-06, "loss": 0.2777, "step": 840 }, { "epoch": 0.07084797891036906, "grad_norm": 0.3330087661743164, "learning_rate": 6.928653814466963e-06, "loss": 0.2154, "step": 860 }, { "epoch": 0.07249560632688928, "grad_norm": 0.5845814943313599, "learning_rate": 7.093425605536333e-06, "loss": 0.2164, "step": 880 }, { "epoch": 0.07414323374340949, "grad_norm": 2.325303554534912, "learning_rate": 7.258197396605701e-06, "loss": 0.2068, "step": 900 }, { "epoch": 0.0757908611599297, "grad_norm": 0.21893823146820068, "learning_rate": 7.414730598121602e-06, "loss": 0.1621, "step": 920 }, { "epoch": 0.07743848857644992, "grad_norm": 0.5854327082633972, "learning_rate": 7.5795023891909705e-06, "loss": 0.2281, "step": 940 }, { "epoch": 0.07908611599297012, "grad_norm": 0.2406030148267746, "learning_rate": 7.74427418026034e-06, "loss": 0.2342, "step": 960 }, { "epoch": 0.08073374340949033, "grad_norm": 1.3764126300811768, "learning_rate": 7.90904597132971e-06, "loss": 0.2018, "step": 980 }, { "epoch": 0.08238137082601055, "grad_norm": 0.9587862491607666, "learning_rate": 8.073817762399077e-06, "loss": 0.2489, "step": 1000 }, { "epoch": 0.08402899824253075, "grad_norm": 0.9726558923721313, "learning_rate": 8.238589553468447e-06, "loss": 0.154, "step": 1020 }, { "epoch": 0.08567662565905097, "grad_norm": 1.9828400611877441, "learning_rate": 8.403361344537817e-06, "loss": 0.2336, "step": 1040 }, { "epoch": 0.08732425307557118, "grad_norm": 0.14761961996555328, "learning_rate": 8.568133135607183e-06, "loss": 0.1949, "step": 1060 }, { "epoch": 0.08897188049209138, "grad_norm": 0.39077144861221313, "learning_rate": 8.732904926676553e-06, "loss": 0.2067, "step": 1080 }, { "epoch": 0.0906195079086116, "grad_norm": 2.3257837295532227, "learning_rate": 8.897676717745921e-06, "loss": 0.2184, "step": 1100 }, { "epoch": 0.09226713532513181, "grad_norm": 1.0966060161590576, "learning_rate": 9.062448508815291e-06, "loss": 0.1655, "step": 1120 }, { "epoch": 0.09391476274165202, "grad_norm": 0.5648412704467773, "learning_rate": 9.227220299884661e-06, "loss": 0.2097, "step": 1140 }, { "epoch": 0.09556239015817224, "grad_norm": 0.49816444516181946, "learning_rate": 9.39199209095403e-06, "loss": 0.2052, "step": 1160 }, { "epoch": 0.09721001757469244, "grad_norm": 1.1164054870605469, "learning_rate": 9.556763882023397e-06, "loss": 0.2298, "step": 1180 }, { "epoch": 0.09885764499121265, "grad_norm": 0.9453270435333252, "learning_rate": 9.721535673092767e-06, "loss": 0.2157, "step": 1200 }, { "epoch": 0.10050527240773287, "grad_norm": 0.4588276743888855, "learning_rate": 9.886307464162135e-06, "loss": 0.1433, "step": 1220 }, { "epoch": 0.10215289982425307, "grad_norm": 0.47535696625709534, "learning_rate": 1.0051079255231505e-05, "loss": 0.2115, "step": 1240 }, { "epoch": 0.10380052724077328, "grad_norm": 0.585959792137146, "learning_rate": 1.0215851046300875e-05, "loss": 0.2245, "step": 1260 }, { "epoch": 0.1054481546572935, "grad_norm": 0.24861204624176025, "learning_rate": 1.0380622837370241e-05, "loss": 0.1981, "step": 1280 }, { "epoch": 0.1070957820738137, "grad_norm": 31.602128982543945, "learning_rate": 1.0545394628439611e-05, "loss": 0.2265, "step": 1300 }, { "epoch": 0.10874340949033393, "grad_norm": 0.6234269142150879, "learning_rate": 1.0710166419508981e-05, "loss": 0.154, "step": 1320 }, { "epoch": 0.11039103690685413, "grad_norm": 1.2423540353775024, "learning_rate": 1.087493821057835e-05, "loss": 0.2062, "step": 1340 }, { "epoch": 0.11203866432337434, "grad_norm": 0.2090279757976532, "learning_rate": 1.1039710001647719e-05, "loss": 0.2214, "step": 1360 }, { "epoch": 0.11368629173989456, "grad_norm": 0.6145613193511963, "learning_rate": 1.1204481792717087e-05, "loss": 0.1942, "step": 1380 }, { "epoch": 0.11533391915641476, "grad_norm": 0.9004138708114624, "learning_rate": 1.1369253583786455e-05, "loss": 0.2271, "step": 1400 }, { "epoch": 0.11698154657293497, "grad_norm": 1.609165906906128, "learning_rate": 1.1534025374855825e-05, "loss": 0.1606, "step": 1420 }, { "epoch": 0.11862917398945519, "grad_norm": 0.8725568652153015, "learning_rate": 1.1698797165925195e-05, "loss": 0.204, "step": 1440 }, { "epoch": 0.1202768014059754, "grad_norm": 1.8169455528259277, "learning_rate": 1.1863568956994563e-05, "loss": 0.207, "step": 1460 }, { "epoch": 0.1219244288224956, "grad_norm": 0.37334388494491577, "learning_rate": 1.2028340748063933e-05, "loss": 0.2033, "step": 1480 }, { "epoch": 0.12357205623901582, "grad_norm": 15.727474212646484, "learning_rate": 1.2193112539133301e-05, "loss": 0.3298, "step": 1500 }, { "epoch": 0.12521968365553604, "grad_norm": 4.70903205871582, "learning_rate": 1.2357884330202669e-05, "loss": 0.159, "step": 1520 }, { "epoch": 0.12686731107205623, "grad_norm": 0.4843326807022095, "learning_rate": 1.2522656121272039e-05, "loss": 0.2141, "step": 1540 }, { "epoch": 0.12851493848857645, "grad_norm": 0.572084367275238, "learning_rate": 1.2687427912341407e-05, "loss": 0.2182, "step": 1560 }, { "epoch": 0.13016256590509667, "grad_norm": 0.31078797578811646, "learning_rate": 1.2852199703410777e-05, "loss": 0.1994, "step": 1580 }, { "epoch": 0.13181019332161686, "grad_norm": 9.014205932617188, "learning_rate": 1.3016971494480145e-05, "loss": 0.2089, "step": 1600 }, { "epoch": 0.13345782073813708, "grad_norm": 1.0150245428085327, "learning_rate": 1.3181743285549513e-05, "loss": 0.1521, "step": 1620 }, { "epoch": 0.1351054481546573, "grad_norm": 0.25271451473236084, "learning_rate": 1.3346515076618885e-05, "loss": 0.1996, "step": 1640 }, { "epoch": 0.1367530755711775, "grad_norm": 0.47118502855300903, "learning_rate": 1.3511286867688253e-05, "loss": 0.2059, "step": 1660 }, { "epoch": 0.13840070298769772, "grad_norm": 0.5134350657463074, "learning_rate": 1.3676058658757621e-05, "loss": 0.1935, "step": 1680 }, { "epoch": 0.14004833040421794, "grad_norm": 1.0354816913604736, "learning_rate": 1.384083044982699e-05, "loss": 0.2103, "step": 1700 }, { "epoch": 0.14169595782073813, "grad_norm": 0.5588876605033875, "learning_rate": 1.4005602240896359e-05, "loss": 0.1598, "step": 1720 }, { "epoch": 0.14334358523725835, "grad_norm": 0.7309175133705139, "learning_rate": 1.4170374031965727e-05, "loss": 0.2204, "step": 1740 }, { "epoch": 0.14499121265377857, "grad_norm": 0.6155902743339539, "learning_rate": 1.4335145823035099e-05, "loss": 0.2133, "step": 1760 }, { "epoch": 0.14663884007029876, "grad_norm": 0.7660940885543823, "learning_rate": 1.4499917614104467e-05, "loss": 0.2065, "step": 1780 }, { "epoch": 0.14828646748681898, "grad_norm": 1.1954026222229004, "learning_rate": 1.4664689405173835e-05, "loss": 0.2147, "step": 1800 }, { "epoch": 0.1499340949033392, "grad_norm": 0.4249323606491089, "learning_rate": 1.4829461196243205e-05, "loss": 0.1553, "step": 1820 }, { "epoch": 0.1515817223198594, "grad_norm": 2.9014129638671875, "learning_rate": 1.4994232987312573e-05, "loss": 0.2208, "step": 1840 }, { "epoch": 0.1532293497363796, "grad_norm": 1.6474498510360718, "learning_rate": 1.5159004778381941e-05, "loss": 0.209, "step": 1860 }, { "epoch": 0.15487697715289983, "grad_norm": 0.1585623174905777, "learning_rate": 1.532377656945131e-05, "loss": 0.1873, "step": 1880 }, { "epoch": 0.15652460456942002, "grad_norm": 1.171941876411438, "learning_rate": 1.548854836052068e-05, "loss": 0.2389, "step": 1900 }, { "epoch": 0.15817223198594024, "grad_norm": 0.48890382051467896, "learning_rate": 1.5653320151590047e-05, "loss": 0.1679, "step": 1920 }, { "epoch": 0.15981985940246046, "grad_norm": 0.5568016767501831, "learning_rate": 1.581809194265942e-05, "loss": 0.1968, "step": 1940 }, { "epoch": 0.16146748681898065, "grad_norm": 0.9775394797325134, "learning_rate": 1.5982863733728787e-05, "loss": 0.2208, "step": 1960 }, { "epoch": 0.16311511423550087, "grad_norm": 0.60302734375, "learning_rate": 1.6147635524798155e-05, "loss": 0.1929, "step": 1980 }, { "epoch": 0.1647627416520211, "grad_norm": 1.7513552904129028, "learning_rate": 1.6312407315867526e-05, "loss": 0.2055, "step": 2000 }, { "epoch": 0.1647627416520211, "eval_loss": 0.5699400305747986, "eval_runtime": 686.8117, "eval_samples_per_second": 31.725, "eval_steps_per_second": 7.932, "eval_wer": 0.23469093535410654, "step": 2000 }, { "epoch": 0.16641036906854131, "grad_norm": 1.049734354019165, "learning_rate": 1.6477179106936894e-05, "loss": 0.1608, "step": 2020 }, { "epoch": 0.1680579964850615, "grad_norm": 1.7113618850708008, "learning_rate": 1.6641950898006263e-05, "loss": 0.2008, "step": 2040 }, { "epoch": 0.16970562390158173, "grad_norm": 0.3202134370803833, "learning_rate": 1.6806722689075634e-05, "loss": 0.1973, "step": 2060 }, { "epoch": 0.17135325131810195, "grad_norm": 0.3612610697746277, "learning_rate": 1.6971494480145e-05, "loss": 0.1732, "step": 2080 }, { "epoch": 0.17300087873462214, "grad_norm": 1.8115849494934082, "learning_rate": 1.7136266271214367e-05, "loss": 0.2138, "step": 2100 }, { "epoch": 0.17464850615114236, "grad_norm": 0.7046949863433838, "learning_rate": 1.7301038062283735e-05, "loss": 0.1571, "step": 2120 }, { "epoch": 0.17629613356766258, "grad_norm": 0.5983096957206726, "learning_rate": 1.7465809853353107e-05, "loss": 0.2092, "step": 2140 }, { "epoch": 0.17794376098418277, "grad_norm": 0.17064958810806274, "learning_rate": 1.7630581644422475e-05, "loss": 0.2083, "step": 2160 }, { "epoch": 0.179591388400703, "grad_norm": 1.422013759613037, "learning_rate": 1.7795353435491843e-05, "loss": 0.2087, "step": 2180 }, { "epoch": 0.1812390158172232, "grad_norm": 0.982097864151001, "learning_rate": 1.7960125226561214e-05, "loss": 0.2161, "step": 2200 }, { "epoch": 0.1828866432337434, "grad_norm": 0.2690947949886322, "learning_rate": 1.8124897017630583e-05, "loss": 0.1693, "step": 2220 }, { "epoch": 0.18453427065026362, "grad_norm": 0.24867244064807892, "learning_rate": 1.828966880869995e-05, "loss": 0.2058, "step": 2240 }, { "epoch": 0.18618189806678384, "grad_norm": 0.9435555934906006, "learning_rate": 1.8454440599769322e-05, "loss": 0.1991, "step": 2260 }, { "epoch": 0.18782952548330403, "grad_norm": 0.23964335024356842, "learning_rate": 1.861921239083869e-05, "loss": 0.1932, "step": 2280 }, { "epoch": 0.18947715289982425, "grad_norm": 2.1007418632507324, "learning_rate": 1.878398418190806e-05, "loss": 0.2075, "step": 2300 }, { "epoch": 0.19112478031634447, "grad_norm": 0.31368857622146606, "learning_rate": 1.8948755972977427e-05, "loss": 0.1557, "step": 2320 }, { "epoch": 0.19277240773286466, "grad_norm": 0.4029647409915924, "learning_rate": 1.9113527764046795e-05, "loss": 0.1952, "step": 2340 }, { "epoch": 0.19442003514938488, "grad_norm": 0.38545289635658264, "learning_rate": 1.9278299555116163e-05, "loss": 0.1998, "step": 2360 }, { "epoch": 0.1960676625659051, "grad_norm": 0.44573166966438293, "learning_rate": 1.9443071346185534e-05, "loss": 0.2078, "step": 2380 }, { "epoch": 0.1977152899824253, "grad_norm": 9.132265090942383, "learning_rate": 1.9607843137254903e-05, "loss": 0.2175, "step": 2400 }, { "epoch": 0.19936291739894552, "grad_norm": 0.42929643392562866, "learning_rate": 1.977261492832427e-05, "loss": 0.1762, "step": 2420 }, { "epoch": 0.20101054481546574, "grad_norm": 0.6267173886299133, "learning_rate": 1.9937386719393642e-05, "loss": 0.2101, "step": 2440 }, { "epoch": 0.20265817223198593, "grad_norm": 9.70997142791748, "learning_rate": 2.010215851046301e-05, "loss": 0.2101, "step": 2460 }, { "epoch": 0.20430579964850615, "grad_norm": 0.47748956084251404, "learning_rate": 2.026693030153238e-05, "loss": 0.2039, "step": 2480 }, { "epoch": 0.20595342706502637, "grad_norm": 1.3222582340240479, "learning_rate": 2.043170209260175e-05, "loss": 0.213, "step": 2500 }, { "epoch": 0.20760105448154656, "grad_norm": 0.4152863919734955, "learning_rate": 2.0596473883671115e-05, "loss": 0.148, "step": 2520 }, { "epoch": 0.20924868189806678, "grad_norm": 0.7384160757064819, "learning_rate": 2.0761245674740483e-05, "loss": 0.2138, "step": 2540 }, { "epoch": 0.210896309314587, "grad_norm": 0.27651092410087585, "learning_rate": 2.0926017465809854e-05, "loss": 0.2046, "step": 2560 }, { "epoch": 0.21254393673110722, "grad_norm": 0.226897194981575, "learning_rate": 2.1090789256879222e-05, "loss": 0.1904, "step": 2580 }, { "epoch": 0.2141915641476274, "grad_norm": 1.2391464710235596, "learning_rate": 2.125556104794859e-05, "loss": 0.204, "step": 2600 }, { "epoch": 0.21583919156414763, "grad_norm": 1.6048617362976074, "learning_rate": 2.1420332839017962e-05, "loss": 0.1548, "step": 2620 }, { "epoch": 0.21748681898066785, "grad_norm": 0.28409889340400696, "learning_rate": 2.158510463008733e-05, "loss": 0.201, "step": 2640 }, { "epoch": 0.21913444639718804, "grad_norm": 0.354885995388031, "learning_rate": 2.17498764211567e-05, "loss": 0.2083, "step": 2660 }, { "epoch": 0.22078207381370826, "grad_norm": 0.2778099775314331, "learning_rate": 2.191464821222607e-05, "loss": 0.1891, "step": 2680 }, { "epoch": 0.22242970123022848, "grad_norm": 1.007686734199524, "learning_rate": 2.2079420003295438e-05, "loss": 0.2152, "step": 2700 }, { "epoch": 0.22407732864674867, "grad_norm": 0.9725649952888489, "learning_rate": 2.2244191794364806e-05, "loss": 0.1581, "step": 2720 }, { "epoch": 0.2257249560632689, "grad_norm": 0.2451123297214508, "learning_rate": 2.2408963585434174e-05, "loss": 0.2019, "step": 2740 }, { "epoch": 0.22737258347978911, "grad_norm": 0.3667006194591522, "learning_rate": 2.2573735376503542e-05, "loss": 0.2083, "step": 2760 }, { "epoch": 0.2290202108963093, "grad_norm": 3.1283884048461914, "learning_rate": 2.273850716757291e-05, "loss": 0.194, "step": 2780 }, { "epoch": 0.23066783831282953, "grad_norm": 0.7148507237434387, "learning_rate": 2.2903278958642282e-05, "loss": 0.1931, "step": 2800 }, { "epoch": 0.23231546572934975, "grad_norm": 0.5805519223213196, "learning_rate": 2.306805074971165e-05, "loss": 0.1552, "step": 2820 }, { "epoch": 0.23396309314586994, "grad_norm": 0.8168196082115173, "learning_rate": 2.323282254078102e-05, "loss": 0.2107, "step": 2840 }, { "epoch": 0.23561072056239016, "grad_norm": 0.17171867191791534, "learning_rate": 2.339759433185039e-05, "loss": 0.2195, "step": 2860 }, { "epoch": 0.23725834797891038, "grad_norm": 0.6692082285881042, "learning_rate": 2.3562366122919758e-05, "loss": 0.1975, "step": 2880 }, { "epoch": 0.23890597539543057, "grad_norm": 1.5185160636901855, "learning_rate": 2.3727137913989126e-05, "loss": 0.2086, "step": 2900 }, { "epoch": 0.2405536028119508, "grad_norm": 0.8978987336158752, "learning_rate": 2.3891909705058498e-05, "loss": 0.1537, "step": 2920 }, { "epoch": 0.242201230228471, "grad_norm": 1.1462221145629883, "learning_rate": 2.4056681496127866e-05, "loss": 0.215, "step": 2940 }, { "epoch": 0.2438488576449912, "grad_norm": 0.2455727905035019, "learning_rate": 2.422145328719723e-05, "loss": 0.2137, "step": 2960 }, { "epoch": 0.24549648506151142, "grad_norm": 0.21464231610298157, "learning_rate": 2.4386225078266602e-05, "loss": 0.203, "step": 2980 }, { "epoch": 0.24714411247803164, "grad_norm": 0.8478316068649292, "learning_rate": 2.455099686933597e-05, "loss": 0.2085, "step": 3000 }, { "epoch": 0.24879173989455183, "grad_norm": 0.37225690484046936, "learning_rate": 2.4715768660405338e-05, "loss": 0.1645, "step": 3020 }, { "epoch": 0.2504393673110721, "grad_norm": 1.3999593257904053, "learning_rate": 2.488054045147471e-05, "loss": 0.2129, "step": 3040 }, { "epoch": 0.2520869947275923, "grad_norm": 2.0909199714660645, "learning_rate": 2.5045312242544078e-05, "loss": 0.1926, "step": 3060 }, { "epoch": 0.25373462214411246, "grad_norm": 0.19655053317546844, "learning_rate": 2.5210084033613446e-05, "loss": 0.2007, "step": 3080 }, { "epoch": 0.2553822495606327, "grad_norm": 1.2680870294570923, "learning_rate": 2.5374855824682814e-05, "loss": 0.2107, "step": 3100 }, { "epoch": 0.2570298769771529, "grad_norm": 0.19821316003799438, "learning_rate": 2.5539627615752182e-05, "loss": 0.1511, "step": 3120 }, { "epoch": 0.2586775043936731, "grad_norm": 0.22427937388420105, "learning_rate": 2.5704399406821554e-05, "loss": 0.2139, "step": 3140 }, { "epoch": 0.26032513181019334, "grad_norm": 0.4531656503677368, "learning_rate": 2.5869171197890922e-05, "loss": 0.1995, "step": 3160 }, { "epoch": 0.26197275922671354, "grad_norm": 0.3967747986316681, "learning_rate": 2.603394298896029e-05, "loss": 0.1981, "step": 3180 }, { "epoch": 0.26362038664323373, "grad_norm": 1.0957462787628174, "learning_rate": 2.6198714780029658e-05, "loss": 0.2092, "step": 3200 }, { "epoch": 0.265268014059754, "grad_norm": 0.5567193627357483, "learning_rate": 2.6363486571099026e-05, "loss": 0.1642, "step": 3220 }, { "epoch": 0.26691564147627417, "grad_norm": 0.3523741066455841, "learning_rate": 2.6528258362168395e-05, "loss": 0.2059, "step": 3240 }, { "epoch": 0.26856326889279436, "grad_norm": 0.40257710218429565, "learning_rate": 2.669303015323777e-05, "loss": 0.195, "step": 3260 }, { "epoch": 0.2702108963093146, "grad_norm": 0.3187640309333801, "learning_rate": 2.6849563354753665e-05, "loss": 0.1827, "step": 3280 }, { "epoch": 0.2718585237258348, "grad_norm": 0.7375414967536926, "learning_rate": 2.701433514582304e-05, "loss": 0.2247, "step": 3300 }, { "epoch": 0.273506151142355, "grad_norm": 0.45597076416015625, "learning_rate": 2.7179106936892408e-05, "loss": 0.1654, "step": 3320 }, { "epoch": 0.27515377855887524, "grad_norm": 0.21507132053375244, "learning_rate": 2.7343878727961776e-05, "loss": 0.1858, "step": 3340 }, { "epoch": 0.27680140597539543, "grad_norm": 0.7203060388565063, "learning_rate": 2.7508650519031144e-05, "loss": 0.1908, "step": 3360 }, { "epoch": 0.2784490333919156, "grad_norm": 0.8007901906967163, "learning_rate": 2.7673422310100512e-05, "loss": 0.1793, "step": 3380 }, { "epoch": 0.28009666080843587, "grad_norm": 3.210064649581909, "learning_rate": 2.783819410116988e-05, "loss": 0.206, "step": 3400 }, { "epoch": 0.28174428822495606, "grad_norm": 0.8950255513191223, "learning_rate": 2.8002965892239252e-05, "loss": 0.1531, "step": 3420 }, { "epoch": 0.28339191564147626, "grad_norm": 0.4942973256111145, "learning_rate": 2.816773768330862e-05, "loss": 0.1916, "step": 3440 }, { "epoch": 0.2850395430579965, "grad_norm": 0.31426137685775757, "learning_rate": 2.8332509474377988e-05, "loss": 0.2013, "step": 3460 }, { "epoch": 0.2866871704745167, "grad_norm": 0.47154414653778076, "learning_rate": 2.8497281265447356e-05, "loss": 0.1831, "step": 3480 }, { "epoch": 0.2883347978910369, "grad_norm": 0.8456603288650513, "learning_rate": 2.8662053056516724e-05, "loss": 0.2043, "step": 3500 }, { "epoch": 0.28998242530755713, "grad_norm": 0.29031482338905334, "learning_rate": 2.8826824847586092e-05, "loss": 0.1607, "step": 3520 }, { "epoch": 0.2916300527240773, "grad_norm": 0.3170378804206848, "learning_rate": 2.8991596638655467e-05, "loss": 0.1916, "step": 3540 }, { "epoch": 0.2932776801405975, "grad_norm": 0.3800877332687378, "learning_rate": 2.9156368429724836e-05, "loss": 0.2051, "step": 3560 }, { "epoch": 0.29492530755711777, "grad_norm": 0.5847609639167786, "learning_rate": 2.9321140220794204e-05, "loss": 0.196, "step": 3580 }, { "epoch": 0.29657293497363796, "grad_norm": 1.0933667421340942, "learning_rate": 2.9485912011863572e-05, "loss": 0.2154, "step": 3600 }, { "epoch": 0.29822056239015815, "grad_norm": 4.349573135375977, "learning_rate": 2.9650683802932937e-05, "loss": 0.1606, "step": 3620 }, { "epoch": 0.2998681898066784, "grad_norm": 0.4264489710330963, "learning_rate": 2.9815455594002305e-05, "loss": 0.2117, "step": 3640 }, { "epoch": 0.3015158172231986, "grad_norm": 0.47935691475868225, "learning_rate": 2.998022738507168e-05, "loss": 0.1901, "step": 3660 }, { "epoch": 0.3031634446397188, "grad_norm": 0.7258153557777405, "learning_rate": 3.0144999176141048e-05, "loss": 0.189, "step": 3680 }, { "epoch": 0.30481107205623903, "grad_norm": 2.0093941688537598, "learning_rate": 3.0309770967210416e-05, "loss": 0.2104, "step": 3700 }, { "epoch": 0.3064586994727592, "grad_norm": 1.1718699932098389, "learning_rate": 3.0474542758279784e-05, "loss": 0.1577, "step": 3720 }, { "epoch": 0.3081063268892794, "grad_norm": 0.19388867914676666, "learning_rate": 3.063931454934915e-05, "loss": 0.2011, "step": 3740 }, { "epoch": 0.30975395430579966, "grad_norm": 0.2112320065498352, "learning_rate": 3.080408634041852e-05, "loss": 0.2077, "step": 3760 }, { "epoch": 0.31140158172231985, "grad_norm": 1.9554697275161743, "learning_rate": 3.096885813148789e-05, "loss": 0.1861, "step": 3780 }, { "epoch": 0.31304920913884005, "grad_norm": 0.7065563201904297, "learning_rate": 3.113362992255726e-05, "loss": 0.2071, "step": 3800 }, { "epoch": 0.3146968365553603, "grad_norm": 0.4599238634109497, "learning_rate": 3.129840171362663e-05, "loss": 0.1655, "step": 3820 }, { "epoch": 0.3163444639718805, "grad_norm": 0.5441445708274841, "learning_rate": 3.1463173504695996e-05, "loss": 0.1971, "step": 3840 }, { "epoch": 0.3179920913884007, "grad_norm": 1.1993862390518188, "learning_rate": 3.1627945295765364e-05, "loss": 0.2048, "step": 3860 }, { "epoch": 0.3196397188049209, "grad_norm": 0.3095191717147827, "learning_rate": 3.179271708683473e-05, "loss": 0.2009, "step": 3880 }, { "epoch": 0.3212873462214411, "grad_norm": 1.0743999481201172, "learning_rate": 3.195748887790411e-05, "loss": 0.2371, "step": 3900 }, { "epoch": 0.3229349736379613, "grad_norm": 0.5000220537185669, "learning_rate": 3.2122260668973475e-05, "loss": 0.1722, "step": 3920 }, { "epoch": 0.32458260105448156, "grad_norm": 1.1417018175125122, "learning_rate": 3.2287032460042844e-05, "loss": 0.2079, "step": 3940 }, { "epoch": 0.32623022847100175, "grad_norm": 1.099433422088623, "learning_rate": 3.245180425111221e-05, "loss": 0.2009, "step": 3960 }, { "epoch": 0.32787785588752194, "grad_norm": 0.3827146589756012, "learning_rate": 3.261657604218158e-05, "loss": 0.2171, "step": 3980 }, { "epoch": 0.3295254833040422, "grad_norm": 1.1845418214797974, "learning_rate": 3.278134783325095e-05, "loss": 0.2195, "step": 4000 }, { "epoch": 0.3295254833040422, "eval_loss": 0.5868579149246216, "eval_runtime": 260.92, "eval_samples_per_second": 83.508, "eval_steps_per_second": 20.88, "eval_wer": 0.2350240606008012, "step": 4000 }, { "epoch": 0.3311731107205624, "grad_norm": 1.6073412895202637, "learning_rate": 3.2946119624320316e-05, "loss": 0.1695, "step": 4020 }, { "epoch": 0.33282073813708263, "grad_norm": 2.9597036838531494, "learning_rate": 3.311089141538969e-05, "loss": 0.2484, "step": 4040 }, { "epoch": 0.3344683655536028, "grad_norm": 1.4150543212890625, "learning_rate": 3.327566320645906e-05, "loss": 0.2087, "step": 4060 }, { "epoch": 0.336115992970123, "grad_norm": 0.255397230386734, "learning_rate": 3.344043499752843e-05, "loss": 0.2048, "step": 4080 }, { "epoch": 0.33776362038664326, "grad_norm": 1.9232691526412964, "learning_rate": 3.3605206788597795e-05, "loss": 0.2274, "step": 4100 }, { "epoch": 0.33941124780316345, "grad_norm": 1.0172194242477417, "learning_rate": 3.3769978579667164e-05, "loss": 0.1779, "step": 4120 }, { "epoch": 0.34105887521968364, "grad_norm": 1.3862395286560059, "learning_rate": 3.393475037073653e-05, "loss": 0.2095, "step": 4140 }, { "epoch": 0.3427065026362039, "grad_norm": 0.3353387117385864, "learning_rate": 3.40995221618059e-05, "loss": 0.2021, "step": 4160 }, { "epoch": 0.3443541300527241, "grad_norm": 0.9549083709716797, "learning_rate": 3.426429395287527e-05, "loss": 0.1997, "step": 4180 }, { "epoch": 0.3460017574692443, "grad_norm": 1.6077580451965332, "learning_rate": 3.4429065743944636e-05, "loss": 0.2562, "step": 4200 }, { "epoch": 0.3476493848857645, "grad_norm": 5.387716770172119, "learning_rate": 3.4593837535014004e-05, "loss": 0.1726, "step": 4220 }, { "epoch": 0.3492970123022847, "grad_norm": 0.5455642342567444, "learning_rate": 3.475860932608337e-05, "loss": 0.2342, "step": 4240 }, { "epoch": 0.3509446397188049, "grad_norm": 0.18990729749202728, "learning_rate": 3.492338111715274e-05, "loss": 0.2334, "step": 4260 }, { "epoch": 0.35259226713532515, "grad_norm": 0.4878564476966858, "learning_rate": 3.5088152908222115e-05, "loss": 0.2065, "step": 4280 }, { "epoch": 0.35423989455184535, "grad_norm": 1.3400063514709473, "learning_rate": 3.5252924699291483e-05, "loss": 0.237, "step": 4300 }, { "epoch": 0.35588752196836554, "grad_norm": 0.5822551250457764, "learning_rate": 3.541769649036085e-05, "loss": 0.1787, "step": 4320 }, { "epoch": 0.3575351493848858, "grad_norm": 0.4629223644733429, "learning_rate": 3.558246828143022e-05, "loss": 0.2129, "step": 4340 }, { "epoch": 0.359182776801406, "grad_norm": 1.4195072650909424, "learning_rate": 3.574724007249959e-05, "loss": 0.2215, "step": 4360 }, { "epoch": 0.36083040421792617, "grad_norm": 0.3443647623062134, "learning_rate": 3.5912011863568956e-05, "loss": 0.2002, "step": 4380 }, { "epoch": 0.3624780316344464, "grad_norm": 3.2932443618774414, "learning_rate": 3.607678365463833e-05, "loss": 0.2172, "step": 4400 }, { "epoch": 0.3641256590509666, "grad_norm": 0.4463866055011749, "learning_rate": 3.62415554457077e-05, "loss": 0.1889, "step": 4420 }, { "epoch": 0.3657732864674868, "grad_norm": 0.4873151183128357, "learning_rate": 3.640632723677707e-05, "loss": 0.2139, "step": 4440 }, { "epoch": 0.36742091388400705, "grad_norm": 1.6354761123657227, "learning_rate": 3.6571099027846435e-05, "loss": 0.1986, "step": 4460 }, { "epoch": 0.36906854130052724, "grad_norm": 0.5571808815002441, "learning_rate": 3.6735870818915803e-05, "loss": 0.2047, "step": 4480 }, { "epoch": 0.37071616871704743, "grad_norm": 0.7461993098258972, "learning_rate": 3.690064260998517e-05, "loss": 0.2196, "step": 4500 }, { "epoch": 0.3723637961335677, "grad_norm": 0.2061534970998764, "learning_rate": 3.7065414401054546e-05, "loss": 0.1657, "step": 4520 }, { "epoch": 0.3740114235500879, "grad_norm": 1.0977954864501953, "learning_rate": 3.7230186192123915e-05, "loss": 0.2062, "step": 4540 }, { "epoch": 0.37565905096660807, "grad_norm": 2.38232684135437, "learning_rate": 3.739495798319328e-05, "loss": 0.2319, "step": 4560 }, { "epoch": 0.3773066783831283, "grad_norm": 0.6182531118392944, "learning_rate": 3.755972977426265e-05, "loss": 0.202, "step": 4580 }, { "epoch": 0.3789543057996485, "grad_norm": 28.744009017944336, "learning_rate": 3.772450156533201e-05, "loss": 0.2235, "step": 4600 }, { "epoch": 0.3806019332161687, "grad_norm": 0.5899057984352112, "learning_rate": 3.788927335640138e-05, "loss": 0.1577, "step": 4620 }, { "epoch": 0.38224956063268895, "grad_norm": 0.5423290133476257, "learning_rate": 3.8054045147470755e-05, "loss": 0.2098, "step": 4640 }, { "epoch": 0.38389718804920914, "grad_norm": 0.23139849305152893, "learning_rate": 3.8218816938540123e-05, "loss": 0.1989, "step": 4660 }, { "epoch": 0.38554481546572933, "grad_norm": 0.3539600670337677, "learning_rate": 3.838358872960949e-05, "loss": 0.1968, "step": 4680 }, { "epoch": 0.3871924428822496, "grad_norm": 0.7127693295478821, "learning_rate": 3.854836052067886e-05, "loss": 0.2225, "step": 4700 }, { "epoch": 0.38884007029876977, "grad_norm": 0.3489459753036499, "learning_rate": 3.871313231174823e-05, "loss": 0.1606, "step": 4720 }, { "epoch": 0.39048769771528996, "grad_norm": 0.27798184752464294, "learning_rate": 3.886966551326413e-05, "loss": 0.2096, "step": 4740 }, { "epoch": 0.3921353251318102, "grad_norm": 0.4229481816291809, "learning_rate": 3.90344373043335e-05, "loss": 0.2195, "step": 4760 }, { "epoch": 0.3937829525483304, "grad_norm": 0.25523656606674194, "learning_rate": 3.9199209095402866e-05, "loss": 0.2106, "step": 4780 }, { "epoch": 0.3954305799648506, "grad_norm": 0.9676795601844788, "learning_rate": 3.936398088647224e-05, "loss": 0.2267, "step": 4800 }, { "epoch": 0.39707820738137084, "grad_norm": 0.40875479578971863, "learning_rate": 3.952875267754161e-05, "loss": 0.155, "step": 4820 }, { "epoch": 0.39872583479789103, "grad_norm": 0.335033655166626, "learning_rate": 3.969352446861098e-05, "loss": 0.2081, "step": 4840 }, { "epoch": 0.4003734622144112, "grad_norm": 0.3303823173046112, "learning_rate": 3.9858296259680345e-05, "loss": 0.2282, "step": 4860 }, { "epoch": 0.40202108963093147, "grad_norm": 0.20934663712978363, "learning_rate": 4.0023068050749714e-05, "loss": 0.1965, "step": 4880 }, { "epoch": 0.40366871704745166, "grad_norm": 1.4093564748764038, "learning_rate": 4.018783984181908e-05, "loss": 0.22, "step": 4900 }, { "epoch": 0.40531634446397186, "grad_norm": 0.4276560842990875, "learning_rate": 4.035261163288846e-05, "loss": 0.1705, "step": 4920 }, { "epoch": 0.4069639718804921, "grad_norm": 0.4243983030319214, "learning_rate": 4.0517383423957825e-05, "loss": 0.2285, "step": 4940 }, { "epoch": 0.4086115992970123, "grad_norm": 0.16951896250247955, "learning_rate": 4.068215521502719e-05, "loss": 0.2234, "step": 4960 }, { "epoch": 0.4102592267135325, "grad_norm": 0.30336254835128784, "learning_rate": 4.084692700609656e-05, "loss": 0.2152, "step": 4980 }, { "epoch": 0.41190685413005274, "grad_norm": 1.0761586427688599, "learning_rate": 4.101169879716593e-05, "loss": 0.2093, "step": 5000 }, { "epoch": 0.4135544815465729, "grad_norm": 0.7221740484237671, "learning_rate": 4.11764705882353e-05, "loss": 0.1538, "step": 5020 }, { "epoch": 0.4152021089630931, "grad_norm": 0.4801746904850006, "learning_rate": 4.1341242379304665e-05, "loss": 0.1911, "step": 5040 }, { "epoch": 0.41684973637961337, "grad_norm": 0.311234712600708, "learning_rate": 4.1506014170374034e-05, "loss": 0.2015, "step": 5060 }, { "epoch": 0.41849736379613356, "grad_norm": 0.6403760313987732, "learning_rate": 4.16707859614434e-05, "loss": 0.2014, "step": 5080 }, { "epoch": 0.42014499121265375, "grad_norm": 1.2653217315673828, "learning_rate": 4.183555775251277e-05, "loss": 0.2388, "step": 5100 }, { "epoch": 0.421792618629174, "grad_norm": 0.5536401867866516, "learning_rate": 4.200032954358214e-05, "loss": 0.1553, "step": 5120 }, { "epoch": 0.4234402460456942, "grad_norm": 0.4605076014995575, "learning_rate": 4.2165101334651506e-05, "loss": 0.2105, "step": 5140 }, { "epoch": 0.42508787346221444, "grad_norm": 0.28758004307746887, "learning_rate": 4.232987312572088e-05, "loss": 0.2067, "step": 5160 }, { "epoch": 0.42673550087873463, "grad_norm": 0.36655622720718384, "learning_rate": 4.249464491679025e-05, "loss": 0.1976, "step": 5180 }, { "epoch": 0.4283831282952548, "grad_norm": 0.9053062796592712, "learning_rate": 4.265941670785962e-05, "loss": 0.2102, "step": 5200 }, { "epoch": 0.43003075571177507, "grad_norm": 0.5088081359863281, "learning_rate": 4.2824188498928985e-05, "loss": 0.1639, "step": 5220 }, { "epoch": 0.43167838312829526, "grad_norm": 0.7981218695640564, "learning_rate": 4.2988960289998354e-05, "loss": 0.2024, "step": 5240 }, { "epoch": 0.43332601054481545, "grad_norm": 0.7993011474609375, "learning_rate": 4.315373208106772e-05, "loss": 0.204, "step": 5260 }, { "epoch": 0.4349736379613357, "grad_norm": 0.5183406472206116, "learning_rate": 4.331850387213709e-05, "loss": 0.2041, "step": 5280 }, { "epoch": 0.4366212653778559, "grad_norm": 1.2263312339782715, "learning_rate": 4.3483275663206465e-05, "loss": 0.2182, "step": 5300 }, { "epoch": 0.4382688927943761, "grad_norm": 0.7018775343894958, "learning_rate": 4.364804745427583e-05, "loss": 0.1685, "step": 5320 }, { "epoch": 0.43991652021089633, "grad_norm": 0.7381752133369446, "learning_rate": 4.38128192453452e-05, "loss": 0.2073, "step": 5340 }, { "epoch": 0.4415641476274165, "grad_norm": 0.4658122956752777, "learning_rate": 4.397759103641457e-05, "loss": 0.2031, "step": 5360 }, { "epoch": 0.4432117750439367, "grad_norm": 0.34789761900901794, "learning_rate": 4.414236282748394e-05, "loss": 0.2023, "step": 5380 }, { "epoch": 0.44485940246045697, "grad_norm": 0.9787063598632812, "learning_rate": 4.4307134618553305e-05, "loss": 0.2264, "step": 5400 }, { "epoch": 0.44650702987697716, "grad_norm": 0.3786025047302246, "learning_rate": 4.4471906409622673e-05, "loss": 0.1656, "step": 5420 }, { "epoch": 0.44815465729349735, "grad_norm": 0.4397692084312439, "learning_rate": 4.463667820069204e-05, "loss": 0.2023, "step": 5440 }, { "epoch": 0.4498022847100176, "grad_norm": 0.20323756337165833, "learning_rate": 4.480144999176141e-05, "loss": 0.2146, "step": 5460 }, { "epoch": 0.4514499121265378, "grad_norm": 0.2108180820941925, "learning_rate": 4.496622178283078e-05, "loss": 0.2012, "step": 5480 }, { "epoch": 0.453097539543058, "grad_norm": 1.6603738069534302, "learning_rate": 4.5130993573900146e-05, "loss": 0.2257, "step": 5500 }, { "epoch": 0.45474516695957823, "grad_norm": 1.6646907329559326, "learning_rate": 4.5295765364969514e-05, "loss": 0.1636, "step": 5520 }, { "epoch": 0.4563927943760984, "grad_norm": 0.98222416639328, "learning_rate": 4.546053715603889e-05, "loss": 0.2018, "step": 5540 }, { "epoch": 0.4580404217926186, "grad_norm": 0.6065666079521179, "learning_rate": 4.562530894710826e-05, "loss": 0.2351, "step": 5560 }, { "epoch": 0.45968804920913886, "grad_norm": 0.4321737587451935, "learning_rate": 4.5790080738177625e-05, "loss": 0.2139, "step": 5580 }, { "epoch": 0.46133567662565905, "grad_norm": 2.530203342437744, "learning_rate": 4.5954852529246993e-05, "loss": 0.232, "step": 5600 }, { "epoch": 0.46298330404217924, "grad_norm": 0.8252795934677124, "learning_rate": 4.611962432031636e-05, "loss": 0.1792, "step": 5620 }, { "epoch": 0.4646309314586995, "grad_norm": 4.3282880783081055, "learning_rate": 4.628439611138573e-05, "loss": 0.2079, "step": 5640 }, { "epoch": 0.4662785588752197, "grad_norm": 0.2798108756542206, "learning_rate": 4.6449167902455105e-05, "loss": 0.2114, "step": 5660 }, { "epoch": 0.4679261862917399, "grad_norm": 0.1499057412147522, "learning_rate": 4.661393969352447e-05, "loss": 0.2026, "step": 5680 }, { "epoch": 0.4695738137082601, "grad_norm": 0.8664823770523071, "learning_rate": 4.677871148459384e-05, "loss": 0.2208, "step": 5700 }, { "epoch": 0.4712214411247803, "grad_norm": 0.32247450947761536, "learning_rate": 4.694348327566321e-05, "loss": 0.175, "step": 5720 }, { "epoch": 0.4728690685413005, "grad_norm": 0.4217327833175659, "learning_rate": 4.710825506673258e-05, "loss": 0.2207, "step": 5740 }, { "epoch": 0.47451669595782076, "grad_norm": 0.9544996023178101, "learning_rate": 4.7273026857801945e-05, "loss": 0.2269, "step": 5760 }, { "epoch": 0.47616432337434095, "grad_norm": 0.3182899057865143, "learning_rate": 4.743779864887132e-05, "loss": 0.224, "step": 5780 }, { "epoch": 0.47781195079086114, "grad_norm": 0.669552743434906, "learning_rate": 4.760257043994069e-05, "loss": 0.2332, "step": 5800 }, { "epoch": 0.4794595782073814, "grad_norm": 0.5297145247459412, "learning_rate": 4.7767342231010056e-05, "loss": 0.2233, "step": 5820 }, { "epoch": 0.4811072056239016, "grad_norm": 0.5998116731643677, "learning_rate": 4.7932114022079425e-05, "loss": 0.2282, "step": 5840 }, { "epoch": 0.48275483304042177, "grad_norm": 0.4391906261444092, "learning_rate": 4.809688581314879e-05, "loss": 0.2094, "step": 5860 }, { "epoch": 0.484402460456942, "grad_norm": 0.559304416179657, "learning_rate": 4.826165760421816e-05, "loss": 0.2106, "step": 5880 }, { "epoch": 0.4860500878734622, "grad_norm": 2.914066791534424, "learning_rate": 4.842642939528753e-05, "loss": 0.2484, "step": 5900 }, { "epoch": 0.4876977152899824, "grad_norm": 0.29989850521087646, "learning_rate": 4.85912011863569e-05, "loss": 0.1631, "step": 5920 }, { "epoch": 0.48934534270650265, "grad_norm": 2.1414361000061035, "learning_rate": 4.8755972977426265e-05, "loss": 0.2315, "step": 5940 }, { "epoch": 0.49099297012302284, "grad_norm": 0.5668668746948242, "learning_rate": 4.892074476849563e-05, "loss": 0.218, "step": 5960 }, { "epoch": 0.49264059753954303, "grad_norm": 0.34356266260147095, "learning_rate": 4.9077277970011535e-05, "loss": 0.2121, "step": 5980 }, { "epoch": 0.4942882249560633, "grad_norm": 0.7654836773872375, "learning_rate": 4.9242049761080904e-05, "loss": 0.212, "step": 6000 }, { "epoch": 0.4942882249560633, "eval_loss": 0.5638302564620972, "eval_runtime": 264.9088, "eval_samples_per_second": 82.251, "eval_steps_per_second": 20.566, "eval_wer": 0.24026168527782366, "step": 6000 }, { "epoch": 0.4959358523725835, "grad_norm": 1.2249395847320557, "learning_rate": 4.940682155215027e-05, "loss": 0.1786, "step": 6020 }, { "epoch": 0.49758347978910367, "grad_norm": 0.834241509437561, "learning_rate": 4.957159334321964e-05, "loss": 0.2193, "step": 6040 }, { "epoch": 0.4992311072056239, "grad_norm": 1.9017058610916138, "learning_rate": 4.9736365134289015e-05, "loss": 0.2338, "step": 6060 }, { "epoch": 0.5008787346221442, "grad_norm": 3.1669952869415283, "learning_rate": 4.990113692535838e-05, "loss": 0.2245, "step": 6080 }, { "epoch": 0.5025263620386643, "grad_norm": 2.9794270992279053, "learning_rate": 4.9999997353508944e-05, "loss": 0.239, "step": 6100 }, { "epoch": 0.5041739894551845, "grad_norm": 1.2019259929656982, "learning_rate": 4.999996758049098e-05, "loss": 0.1871, "step": 6120 }, { "epoch": 0.5058216168717048, "grad_norm": 2.175334930419922, "learning_rate": 4.999990472638076e-05, "loss": 0.2265, "step": 6140 }, { "epoch": 0.5074692442882249, "grad_norm": 1.4895635843276978, "learning_rate": 4.999980879126146e-05, "loss": 0.2259, "step": 6160 }, { "epoch": 0.5091168717047452, "grad_norm": 3.355076789855957, "learning_rate": 4.9999679775260015e-05, "loss": 0.2316, "step": 6180 }, { "epoch": 0.5107644991212654, "grad_norm": 3.545771598815918, "learning_rate": 4.999951767854715e-05, "loss": 0.2426, "step": 6200 }, { "epoch": 0.5124121265377856, "grad_norm": 2.085190773010254, "learning_rate": 4.999932250133736e-05, "loss": 0.1884, "step": 6220 }, { "epoch": 0.5140597539543058, "grad_norm": 1.6277663707733154, "learning_rate": 4.999909424388892e-05, "loss": 0.2332, "step": 6240 }, { "epoch": 0.5157073813708261, "grad_norm": 3.9234516620635986, "learning_rate": 4.9998832906503856e-05, "loss": 0.2346, "step": 6260 }, { "epoch": 0.5173550087873462, "grad_norm": 1.7027534246444702, "learning_rate": 4.9998538489527984e-05, "loss": 0.2339, "step": 6280 }, { "epoch": 0.5190026362038664, "grad_norm": 2.7195184230804443, "learning_rate": 4.9998210993350895e-05, "loss": 0.2511, "step": 6300 }, { "epoch": 0.5206502636203867, "grad_norm": 0.7267903089523315, "learning_rate": 4.9997850418405945e-05, "loss": 0.1765, "step": 6320 }, { "epoch": 0.5222978910369068, "grad_norm": 0.751308798789978, "learning_rate": 4.999745676517027e-05, "loss": 0.2393, "step": 6340 }, { "epoch": 0.5239455184534271, "grad_norm": 1.0236356258392334, "learning_rate": 4.999703003416476e-05, "loss": 0.2292, "step": 6360 }, { "epoch": 0.5255931458699473, "grad_norm": 0.7693842649459839, "learning_rate": 4.999657022595409e-05, "loss": 0.2285, "step": 6380 }, { "epoch": 0.5272407732864675, "grad_norm": 3.4103684425354004, "learning_rate": 4.999607734114669e-05, "loss": 0.2635, "step": 6400 }, { "epoch": 0.5288884007029877, "grad_norm": 0.5887142419815063, "learning_rate": 4.999555138039478e-05, "loss": 0.1938, "step": 6420 }, { "epoch": 0.530536028119508, "grad_norm": 1.6326848268508911, "learning_rate": 4.999499234439433e-05, "loss": 0.2112, "step": 6440 }, { "epoch": 0.5321836555360281, "grad_norm": 0.4003719985485077, "learning_rate": 4.9994400233885086e-05, "loss": 0.2011, "step": 6460 }, { "epoch": 0.5338312829525483, "grad_norm": 1.0633249282836914, "learning_rate": 4.999377504965055e-05, "loss": 0.2388, "step": 6480 }, { "epoch": 0.5354789103690686, "grad_norm": 3.87857985496521, "learning_rate": 4.999311679251799e-05, "loss": 0.2222, "step": 6500 }, { "epoch": 0.5371265377855887, "grad_norm": 1.1982243061065674, "learning_rate": 4.999242546335845e-05, "loss": 0.1736, "step": 6520 }, { "epoch": 0.538774165202109, "grad_norm": 1.4149129390716553, "learning_rate": 4.999170106308673e-05, "loss": 0.2044, "step": 6540 }, { "epoch": 0.5404217926186292, "grad_norm": 0.9540938138961792, "learning_rate": 4.999094359266138e-05, "loss": 0.2343, "step": 6560 }, { "epoch": 0.5420694200351494, "grad_norm": 0.8150781989097595, "learning_rate": 4.999015305308472e-05, "loss": 0.2027, "step": 6580 }, { "epoch": 0.5437170474516696, "grad_norm": 2.2410573959350586, "learning_rate": 4.998932944540284e-05, "loss": 0.2191, "step": 6600 }, { "epoch": 0.5453646748681898, "grad_norm": 2.924471616744995, "learning_rate": 4.998847277070556e-05, "loss": 0.1776, "step": 6620 }, { "epoch": 0.54701230228471, "grad_norm": 2.5102221965789795, "learning_rate": 4.9987583030126484e-05, "loss": 0.2172, "step": 6640 }, { "epoch": 0.5486599297012302, "grad_norm": 1.2676066160202026, "learning_rate": 4.998666022484295e-05, "loss": 0.2235, "step": 6660 }, { "epoch": 0.5503075571177505, "grad_norm": 1.7459567785263062, "learning_rate": 4.998570435607605e-05, "loss": 0.2114, "step": 6680 }, { "epoch": 0.5519551845342706, "grad_norm": 2.4994170665740967, "learning_rate": 4.998476565684759e-05, "loss": 0.2321, "step": 6700 }, { "epoch": 0.5536028119507909, "grad_norm": 1.436635136604309, "learning_rate": 4.998374531796601e-05, "loss": 0.1715, "step": 6720 }, { "epoch": 0.5552504393673111, "grad_norm": 5.170281410217285, "learning_rate": 4.9982691919458215e-05, "loss": 0.2227, "step": 6740 }, { "epoch": 0.5568980667838312, "grad_norm": 2.329806327819824, "learning_rate": 4.9981605462718097e-05, "loss": 0.2103, "step": 6760 }, { "epoch": 0.5585456942003515, "grad_norm": 0.5105818510055542, "learning_rate": 4.998048594918331e-05, "loss": 0.2297, "step": 6780 }, { "epoch": 0.5601933216168717, "grad_norm": 2.74815034866333, "learning_rate": 4.997933338033525e-05, "loss": 0.2454, "step": 6800 }, { "epoch": 0.5618409490333919, "grad_norm": 1.0433367490768433, "learning_rate": 4.997814775769904e-05, "loss": 0.175, "step": 6820 }, { "epoch": 0.5634885764499121, "grad_norm": 0.5902445316314697, "learning_rate": 4.997692908284356e-05, "loss": 0.2153, "step": 6840 }, { "epoch": 0.5651362038664324, "grad_norm": 0.9332594275474548, "learning_rate": 4.997567735738141e-05, "loss": 0.2246, "step": 6860 }, { "epoch": 0.5667838312829525, "grad_norm": 1.2199161052703857, "learning_rate": 4.9974392582968934e-05, "loss": 0.2281, "step": 6880 }, { "epoch": 0.5684314586994728, "grad_norm": 2.2492709159851074, "learning_rate": 4.9973074761306186e-05, "loss": 0.238, "step": 6900 }, { "epoch": 0.570079086115993, "grad_norm": 9.928802490234375, "learning_rate": 4.997172389413699e-05, "loss": 0.1731, "step": 6920 }, { "epoch": 0.5717267135325131, "grad_norm": 1.4187493324279785, "learning_rate": 4.997033998324886e-05, "loss": 0.2267, "step": 6940 }, { "epoch": 0.5733743409490334, "grad_norm": 1.0871655941009521, "learning_rate": 4.996892303047306e-05, "loss": 0.2478, "step": 6960 }, { "epoch": 0.5750219683655536, "grad_norm": 0.9939442873001099, "learning_rate": 4.996747303768456e-05, "loss": 0.1992, "step": 6980 }, { "epoch": 0.5766695957820738, "grad_norm": 48.66152572631836, "learning_rate": 4.996599000680206e-05, "loss": 0.2341, "step": 7000 }, { "epoch": 0.578317223198594, "grad_norm": 0.8890938758850098, "learning_rate": 4.996447393978797e-05, "loss": 0.1687, "step": 7020 }, { "epoch": 0.5799648506151143, "grad_norm": 0.8341367840766907, "learning_rate": 4.996292483864843e-05, "loss": 0.2175, "step": 7040 }, { "epoch": 0.5816124780316344, "grad_norm": 1.6562838554382324, "learning_rate": 4.996134270543326e-05, "loss": 0.2136, "step": 7060 }, { "epoch": 0.5832601054481547, "grad_norm": 0.6537133455276489, "learning_rate": 4.9959727542236025e-05, "loss": 0.2139, "step": 7080 }, { "epoch": 0.5849077328646749, "grad_norm": 2.101271390914917, "learning_rate": 4.9958079351193976e-05, "loss": 0.2359, "step": 7100 }, { "epoch": 0.586555360281195, "grad_norm": 0.9616307616233826, "learning_rate": 4.995639813448808e-05, "loss": 0.1774, "step": 7120 }, { "epoch": 0.5882029876977153, "grad_norm": 1.0992920398712158, "learning_rate": 4.9954683894343e-05, "loss": 0.1962, "step": 7140 }, { "epoch": 0.5898506151142355, "grad_norm": 0.6329948902130127, "learning_rate": 4.995293663302709e-05, "loss": 0.2145, "step": 7160 }, { "epoch": 0.5914982425307557, "grad_norm": 1.0156137943267822, "learning_rate": 4.9951156352852415e-05, "loss": 0.2161, "step": 7180 }, { "epoch": 0.5931458699472759, "grad_norm": 2.6131973266601562, "learning_rate": 4.994943450511368e-05, "loss": 0.2234, "step": 7200 }, { "epoch": 0.5947934973637962, "grad_norm": 3.4186134338378906, "learning_rate": 4.99475898449799e-05, "loss": 0.171, "step": 7220 }, { "epoch": 0.5964411247803163, "grad_norm": 4.896924018859863, "learning_rate": 4.9945712173062477e-05, "loss": 0.2073, "step": 7240 }, { "epoch": 0.5980887521968365, "grad_norm": 1.2636560201644897, "learning_rate": 4.994380149184601e-05, "loss": 0.2144, "step": 7260 }, { "epoch": 0.5997363796133568, "grad_norm": 1.4861242771148682, "learning_rate": 4.99418578038588e-05, "loss": 0.2024, "step": 7280 }, { "epoch": 0.6013840070298769, "grad_norm": 6.527903079986572, "learning_rate": 4.993988111167284e-05, "loss": 0.2172, "step": 7300 }, { "epoch": 0.6030316344463972, "grad_norm": 1.9219621419906616, "learning_rate": 4.993787141790375e-05, "loss": 0.1865, "step": 7320 }, { "epoch": 0.6046792618629174, "grad_norm": 0.7625167369842529, "learning_rate": 4.9935828725210874e-05, "loss": 0.2238, "step": 7340 }, { "epoch": 0.6063268892794376, "grad_norm": 1.3375577926635742, "learning_rate": 4.9933753036297196e-05, "loss": 0.1878, "step": 7360 }, { "epoch": 0.6079745166959578, "grad_norm": 1.1067700386047363, "learning_rate": 4.993164435390935e-05, "loss": 0.2111, "step": 7380 }, { "epoch": 0.6096221441124781, "grad_norm": 4.094808578491211, "learning_rate": 4.992950268083764e-05, "loss": 0.2073, "step": 7400 }, { "epoch": 0.6112697715289982, "grad_norm": 2.683678150177002, "learning_rate": 4.992732801991602e-05, "loss": 0.172, "step": 7420 }, { "epoch": 0.6129173989455184, "grad_norm": 0.9752517342567444, "learning_rate": 4.992512037402212e-05, "loss": 0.1924, "step": 7440 }, { "epoch": 0.6145650263620387, "grad_norm": 0.8463476896286011, "learning_rate": 4.9922879746077176e-05, "loss": 0.2021, "step": 7460 }, { "epoch": 0.6162126537785588, "grad_norm": 3.053812026977539, "learning_rate": 4.992060613904611e-05, "loss": 0.2086, "step": 7480 }, { "epoch": 0.6178602811950791, "grad_norm": 2.9483275413513184, "learning_rate": 4.991829955593744e-05, "loss": 0.2154, "step": 7500 }, { "epoch": 0.6195079086115993, "grad_norm": 1.3794467449188232, "learning_rate": 4.9915959999803365e-05, "loss": 0.1599, "step": 7520 }, { "epoch": 0.6211555360281195, "grad_norm": 1.0529364347457886, "learning_rate": 4.9913587473739666e-05, "loss": 0.1891, "step": 7540 }, { "epoch": 0.6228031634446397, "grad_norm": 0.9026776552200317, "learning_rate": 4.991118198088579e-05, "loss": 0.2074, "step": 7560 }, { "epoch": 0.62445079086116, "grad_norm": 0.4504956603050232, "learning_rate": 4.9908743524424806e-05, "loss": 0.1931, "step": 7580 }, { "epoch": 0.6260984182776801, "grad_norm": 4.0763726234436035, "learning_rate": 4.9906272107583366e-05, "loss": 0.2192, "step": 7600 }, { "epoch": 0.6277460456942003, "grad_norm": 1.012601375579834, "learning_rate": 4.990376773363178e-05, "loss": 0.1604, "step": 7620 }, { "epoch": 0.6293936731107206, "grad_norm": 0.6183291673660278, "learning_rate": 4.990123040588395e-05, "loss": 0.2015, "step": 7640 }, { "epoch": 0.6310413005272407, "grad_norm": 0.718015730381012, "learning_rate": 4.989866012769736e-05, "loss": 0.2155, "step": 7660 }, { "epoch": 0.632688927943761, "grad_norm": 1.5611259937286377, "learning_rate": 4.989605690247315e-05, "loss": 0.1975, "step": 7680 }, { "epoch": 0.6343365553602812, "grad_norm": 3.2936527729034424, "learning_rate": 4.9893420733656e-05, "loss": 0.2305, "step": 7700 }, { "epoch": 0.6359841827768014, "grad_norm": 1.3771491050720215, "learning_rate": 4.9890751624734225e-05, "loss": 0.1575, "step": 7720 }, { "epoch": 0.6376318101933216, "grad_norm": 1.0577577352523804, "learning_rate": 4.98880495792397e-05, "loss": 0.2045, "step": 7740 }, { "epoch": 0.6392794376098418, "grad_norm": 3.6020660400390625, "learning_rate": 4.988531460074791e-05, "loss": 0.2034, "step": 7760 }, { "epoch": 0.640927065026362, "grad_norm": 0.7804440855979919, "learning_rate": 4.9882546692877885e-05, "loss": 0.1953, "step": 7780 }, { "epoch": 0.6425746924428822, "grad_norm": 57.011390686035156, "learning_rate": 4.987974585929226e-05, "loss": 0.2207, "step": 7800 }, { "epoch": 0.6442223198594025, "grad_norm": 1.5725603103637695, "learning_rate": 4.987691210369721e-05, "loss": 0.1664, "step": 7820 }, { "epoch": 0.6458699472759226, "grad_norm": 0.6347635984420776, "learning_rate": 4.98740454298425e-05, "loss": 0.2134, "step": 7840 }, { "epoch": 0.6475175746924429, "grad_norm": 1.3581891059875488, "learning_rate": 4.987114584152145e-05, "loss": 0.217, "step": 7860 }, { "epoch": 0.6491652021089631, "grad_norm": 5.659873008728027, "learning_rate": 4.986821334257091e-05, "loss": 0.1977, "step": 7880 }, { "epoch": 0.6508128295254832, "grad_norm": 11.7850980758667, "learning_rate": 4.986524793687131e-05, "loss": 0.218, "step": 7900 }, { "epoch": 0.6524604569420035, "grad_norm": 1.2856826782226562, "learning_rate": 4.986224962834659e-05, "loss": 0.1675, "step": 7920 }, { "epoch": 0.6541080843585237, "grad_norm": 1.055503249168396, "learning_rate": 4.985921842096427e-05, "loss": 0.2113, "step": 7940 }, { "epoch": 0.6557557117750439, "grad_norm": 1.5587449073791504, "learning_rate": 4.9856154318735374e-05, "loss": 0.1939, "step": 7960 }, { "epoch": 0.6574033391915641, "grad_norm": 2.40641450881958, "learning_rate": 4.985305732571446e-05, "loss": 0.206, "step": 7980 }, { "epoch": 0.6590509666080844, "grad_norm": 4.06686544418335, "learning_rate": 4.98499274459996e-05, "loss": 0.2107, "step": 8000 }, { "epoch": 0.6590509666080844, "eval_loss": 0.5655311346054077, "eval_runtime": 835.4807, "eval_samples_per_second": 26.08, "eval_steps_per_second": 6.521, "eval_wer": 0.2457764477651215, "step": 8000 }, { "epoch": 0.6606985940246046, "grad_norm": 0.8585990071296692, "learning_rate": 4.984676468373241e-05, "loss": 0.1667, "step": 8020 }, { "epoch": 0.6623462214411248, "grad_norm": 1.2851459980010986, "learning_rate": 4.984356904309799e-05, "loss": 0.2184, "step": 8040 }, { "epoch": 0.663993848857645, "grad_norm": 0.8445897698402405, "learning_rate": 4.984034052832496e-05, "loss": 0.2169, "step": 8060 }, { "epoch": 0.6656414762741653, "grad_norm": 1.4265161752700806, "learning_rate": 4.983707914368544e-05, "loss": 0.2004, "step": 8080 }, { "epoch": 0.6672891036906854, "grad_norm": 3.0620169639587402, "learning_rate": 4.983378489349504e-05, "loss": 0.2467, "step": 8100 }, { "epoch": 0.6689367311072056, "grad_norm": 1.1079747676849365, "learning_rate": 4.983045778211286e-05, "loss": 0.1587, "step": 8120 }, { "epoch": 0.6705843585237259, "grad_norm": 0.8565286993980408, "learning_rate": 4.982709781394148e-05, "loss": 0.2101, "step": 8140 }, { "epoch": 0.672231985940246, "grad_norm": 2.957345962524414, "learning_rate": 4.982370499342698e-05, "loss": 0.212, "step": 8160 }, { "epoch": 0.6738796133567663, "grad_norm": 1.744805932044983, "learning_rate": 4.982027932505887e-05, "loss": 0.209, "step": 8180 }, { "epoch": 0.6755272407732865, "grad_norm": 3.963977336883545, "learning_rate": 4.9816820813370166e-05, "loss": 0.214, "step": 8200 }, { "epoch": 0.6771748681898067, "grad_norm": 1.3982582092285156, "learning_rate": 4.981332946293733e-05, "loss": 0.1671, "step": 8220 }, { "epoch": 0.6788224956063269, "grad_norm": 0.5959907174110413, "learning_rate": 4.9809805278380264e-05, "loss": 0.2061, "step": 8240 }, { "epoch": 0.6804701230228472, "grad_norm": 1.2237777709960938, "learning_rate": 4.980624826436233e-05, "loss": 0.2188, "step": 8260 }, { "epoch": 0.6821177504393673, "grad_norm": 2.6652753353118896, "learning_rate": 4.9802658425590344e-05, "loss": 0.1964, "step": 8280 }, { "epoch": 0.6837653778558875, "grad_norm": 31.890893936157227, "learning_rate": 4.979903576681453e-05, "loss": 0.2133, "step": 8300 }, { "epoch": 0.6854130052724078, "grad_norm": 1.1191130876541138, "learning_rate": 4.979538029282855e-05, "loss": 0.1623, "step": 8320 }, { "epoch": 0.6870606326889279, "grad_norm": 1.4404329061508179, "learning_rate": 4.9791692008469514e-05, "loss": 0.2087, "step": 8340 }, { "epoch": 0.6887082601054482, "grad_norm": 1.4656809568405151, "learning_rate": 4.9787970918617914e-05, "loss": 0.2134, "step": 8360 }, { "epoch": 0.6903558875219684, "grad_norm": 2.310316801071167, "learning_rate": 4.978421702819767e-05, "loss": 0.215, "step": 8380 }, { "epoch": 0.6920035149384886, "grad_norm": 21.141889572143555, "learning_rate": 4.978043034217609e-05, "loss": 0.2206, "step": 8400 }, { "epoch": 0.6936511423550088, "grad_norm": 1.2799972295761108, "learning_rate": 4.977680261809319e-05, "loss": 0.1553, "step": 8420 }, { "epoch": 0.695298769771529, "grad_norm": 0.927029550075531, "learning_rate": 4.97729519951006e-05, "loss": 0.2205, "step": 8440 }, { "epoch": 0.6969463971880492, "grad_norm": 1.8685028553009033, "learning_rate": 4.976906859141309e-05, "loss": 0.1938, "step": 8460 }, { "epoch": 0.6985940246045694, "grad_norm": 0.38425010442733765, "learning_rate": 4.976515241216936e-05, "loss": 0.1964, "step": 8480 }, { "epoch": 0.7002416520210897, "grad_norm": 2.6303980350494385, "learning_rate": 4.976120346255146e-05, "loss": 0.1984, "step": 8500 }, { "epoch": 0.7018892794376098, "grad_norm": 7.791947841644287, "learning_rate": 4.975722174778482e-05, "loss": 0.1678, "step": 8520 }, { "epoch": 0.7035369068541301, "grad_norm": 1.193328857421875, "learning_rate": 4.9753207273138245e-05, "loss": 0.2182, "step": 8540 }, { "epoch": 0.7051845342706503, "grad_norm": 1.1064016819000244, "learning_rate": 4.974916004392385e-05, "loss": 0.2065, "step": 8560 }, { "epoch": 0.7068321616871704, "grad_norm": 3.304832696914673, "learning_rate": 4.974508006549711e-05, "loss": 0.1872, "step": 8580 }, { "epoch": 0.7084797891036907, "grad_norm": 2.5478882789611816, "learning_rate": 4.974096734325686e-05, "loss": 0.2295, "step": 8600 }, { "epoch": 0.7101274165202109, "grad_norm": 14.596063613891602, "learning_rate": 4.9736821882645226e-05, "loss": 0.1628, "step": 8620 }, { "epoch": 0.7117750439367311, "grad_norm": 0.9999619126319885, "learning_rate": 4.973264368914766e-05, "loss": 0.2107, "step": 8640 }, { "epoch": 0.7134226713532513, "grad_norm": 1.053412675857544, "learning_rate": 4.972843276829296e-05, "loss": 0.2085, "step": 8660 }, { "epoch": 0.7150702987697716, "grad_norm": 2.1676464080810547, "learning_rate": 4.9724189125653195e-05, "loss": 0.2048, "step": 8680 }, { "epoch": 0.7167179261862917, "grad_norm": 10.32970142364502, "learning_rate": 4.9719912766843746e-05, "loss": 0.2224, "step": 8700 }, { "epoch": 0.718365553602812, "grad_norm": 1.9829623699188232, "learning_rate": 4.971560369752328e-05, "loss": 0.1686, "step": 8720 }, { "epoch": 0.7200131810193322, "grad_norm": 2.7383005619049072, "learning_rate": 4.971126192339377e-05, "loss": 0.2088, "step": 8740 }, { "epoch": 0.7216608084358523, "grad_norm": 0.6413734555244446, "learning_rate": 4.970688745020043e-05, "loss": 0.2111, "step": 8760 }, { "epoch": 0.7233084358523726, "grad_norm": 0.8469798564910889, "learning_rate": 4.970248028373178e-05, "loss": 0.2057, "step": 8780 }, { "epoch": 0.7249560632688928, "grad_norm": 8.44117259979248, "learning_rate": 4.969804042981956e-05, "loss": 0.2179, "step": 8800 }, { "epoch": 0.726603690685413, "grad_norm": 1.40958571434021, "learning_rate": 4.969356789433881e-05, "loss": 0.1604, "step": 8820 }, { "epoch": 0.7282513181019332, "grad_norm": 5.328885078430176, "learning_rate": 4.968906268320777e-05, "loss": 0.2075, "step": 8840 }, { "epoch": 0.7298989455184535, "grad_norm": 0.6396345496177673, "learning_rate": 4.9684524802387956e-05, "loss": 0.2057, "step": 8860 }, { "epoch": 0.7315465729349736, "grad_norm": 0.7661327123641968, "learning_rate": 4.967995425788409e-05, "loss": 0.2064, "step": 8880 }, { "epoch": 0.7331942003514939, "grad_norm": 2.8300130367279053, "learning_rate": 4.9675351055744134e-05, "loss": 0.2109, "step": 8900 }, { "epoch": 0.7348418277680141, "grad_norm": 1.3380045890808105, "learning_rate": 4.9670715202059235e-05, "loss": 0.1492, "step": 8920 }, { "epoch": 0.7364894551845342, "grad_norm": 1.478825569152832, "learning_rate": 4.9666046702963784e-05, "loss": 0.2133, "step": 8940 }, { "epoch": 0.7381370826010545, "grad_norm": 1.3596333265304565, "learning_rate": 4.9661345564635356e-05, "loss": 0.1999, "step": 8960 }, { "epoch": 0.7397847100175747, "grad_norm": 0.3950871229171753, "learning_rate": 4.965661179329468e-05, "loss": 0.1925, "step": 8980 }, { "epoch": 0.7414323374340949, "grad_norm": 3.47658634185791, "learning_rate": 4.965184539520572e-05, "loss": 0.2016, "step": 9000 }, { "epoch": 0.7430799648506151, "grad_norm": 1.6241281032562256, "learning_rate": 4.9647046376675586e-05, "loss": 0.1522, "step": 9020 }, { "epoch": 0.7447275922671354, "grad_norm": 0.582032322883606, "learning_rate": 4.964221474405456e-05, "loss": 0.2161, "step": 9040 }, { "epoch": 0.7463752196836555, "grad_norm": 1.1760663986206055, "learning_rate": 4.963735050373608e-05, "loss": 0.1986, "step": 9060 }, { "epoch": 0.7480228471001757, "grad_norm": 1.9206633567810059, "learning_rate": 4.963245366215672e-05, "loss": 0.195, "step": 9080 }, { "epoch": 0.749670474516696, "grad_norm": 2.0300755500793457, "learning_rate": 4.9627524225796206e-05, "loss": 0.2044, "step": 9100 }, { "epoch": 0.7513181019332161, "grad_norm": 0.91159987449646, "learning_rate": 4.962256220117739e-05, "loss": 0.156, "step": 9120 }, { "epoch": 0.7529657293497364, "grad_norm": 0.6553214192390442, "learning_rate": 4.961756759486625e-05, "loss": 0.1918, "step": 9140 }, { "epoch": 0.7546133567662566, "grad_norm": 2.1197195053100586, "learning_rate": 4.961254041347189e-05, "loss": 0.1942, "step": 9160 }, { "epoch": 0.7562609841827768, "grad_norm": 0.5275347232818604, "learning_rate": 4.9607480663646487e-05, "loss": 0.1975, "step": 9180 }, { "epoch": 0.757908611599297, "grad_norm": 2.276308536529541, "learning_rate": 4.9602388352085337e-05, "loss": 0.2035, "step": 9200 }, { "epoch": 0.7595562390158173, "grad_norm": 1.4071913957595825, "learning_rate": 4.9597263485526826e-05, "loss": 0.1636, "step": 9220 }, { "epoch": 0.7612038664323374, "grad_norm": 5.9329023361206055, "learning_rate": 4.959210607075239e-05, "loss": 0.1954, "step": 9240 }, { "epoch": 0.7628514938488576, "grad_norm": 1.0558881759643555, "learning_rate": 4.958691611458657e-05, "loss": 0.2089, "step": 9260 }, { "epoch": 0.7644991212653779, "grad_norm": 1.1962648630142212, "learning_rate": 4.958169362389695e-05, "loss": 0.1966, "step": 9280 }, { "epoch": 0.766146748681898, "grad_norm": 27.050457000732422, "learning_rate": 4.957643860559417e-05, "loss": 0.2254, "step": 9300 }, { "epoch": 0.7677943760984183, "grad_norm": 1.5262819528579712, "learning_rate": 4.95711510666319e-05, "loss": 0.1672, "step": 9320 }, { "epoch": 0.7694420035149385, "grad_norm": 1.5432629585266113, "learning_rate": 4.956583101400685e-05, "loss": 0.2201, "step": 9340 }, { "epoch": 0.7710896309314587, "grad_norm": 1.4080497026443481, "learning_rate": 4.956047845475877e-05, "loss": 0.2034, "step": 9360 }, { "epoch": 0.7727372583479789, "grad_norm": 1.6339974403381348, "learning_rate": 4.9555093395970396e-05, "loss": 0.1918, "step": 9380 }, { "epoch": 0.7743848857644992, "grad_norm": 3.812486410140991, "learning_rate": 4.954967584476748e-05, "loss": 0.2221, "step": 9400 }, { "epoch": 0.7760325131810193, "grad_norm": 2.0563764572143555, "learning_rate": 4.954422580831879e-05, "loss": 0.1611, "step": 9420 }, { "epoch": 0.7776801405975395, "grad_norm": 0.7051644325256348, "learning_rate": 4.9538743293836046e-05, "loss": 0.2053, "step": 9440 }, { "epoch": 0.7793277680140598, "grad_norm": 0.837563157081604, "learning_rate": 4.9533228308573966e-05, "loss": 0.2078, "step": 9460 }, { "epoch": 0.7809753954305799, "grad_norm": 1.1410398483276367, "learning_rate": 4.952768085983023e-05, "loss": 0.2071, "step": 9480 }, { "epoch": 0.7826230228471002, "grad_norm": 2.372843027114868, "learning_rate": 4.952210095494546e-05, "loss": 0.2154, "step": 9500 }, { "epoch": 0.7842706502636204, "grad_norm": 25.939308166503906, "learning_rate": 4.9516488601303255e-05, "loss": 0.1618, "step": 9520 }, { "epoch": 0.7859182776801406, "grad_norm": 1.780004858970642, "learning_rate": 4.951084380633013e-05, "loss": 0.1993, "step": 9540 }, { "epoch": 0.7875659050966608, "grad_norm": 1.1589267253875732, "learning_rate": 4.9505166577495546e-05, "loss": 0.1939, "step": 9560 }, { "epoch": 0.789213532513181, "grad_norm": 1.0380936861038208, "learning_rate": 4.949945692231185e-05, "loss": 0.1863, "step": 9580 }, { "epoch": 0.7908611599297012, "grad_norm": 3.860309600830078, "learning_rate": 4.949371484833433e-05, "loss": 0.2199, "step": 9600 }, { "epoch": 0.7925087873462214, "grad_norm": 2.6430704593658447, "learning_rate": 4.9487940363161155e-05, "loss": 0.161, "step": 9620 }, { "epoch": 0.7941564147627417, "grad_norm": 2.1502444744110107, "learning_rate": 4.948213347443339e-05, "loss": 0.2101, "step": 9640 }, { "epoch": 0.7958040421792618, "grad_norm": 0.9621193408966064, "learning_rate": 4.9476294189834974e-05, "loss": 0.2105, "step": 9660 }, { "epoch": 0.7974516695957821, "grad_norm": 0.8426349759101868, "learning_rate": 4.9470422517092696e-05, "loss": 0.1821, "step": 9680 }, { "epoch": 0.7990992970123023, "grad_norm": 2.1861345767974854, "learning_rate": 4.9464518463976246e-05, "loss": 0.2157, "step": 9700 }, { "epoch": 0.8007469244288224, "grad_norm": 1.074504017829895, "learning_rate": 4.945858203829812e-05, "loss": 0.1593, "step": 9720 }, { "epoch": 0.8023945518453427, "grad_norm": 1.4774161577224731, "learning_rate": 4.945261324791367e-05, "loss": 0.1981, "step": 9740 }, { "epoch": 0.8040421792618629, "grad_norm": 0.9464648962020874, "learning_rate": 4.944661210072107e-05, "loss": 0.2107, "step": 9760 }, { "epoch": 0.8056898066783831, "grad_norm": 0.34157031774520874, "learning_rate": 4.94405786046613e-05, "loss": 0.209, "step": 9780 }, { "epoch": 0.8073374340949033, "grad_norm": 3.6989963054656982, "learning_rate": 4.943451276771818e-05, "loss": 0.221, "step": 9800 }, { "epoch": 0.8089850615114236, "grad_norm": 0.6388671398162842, "learning_rate": 4.942841459791828e-05, "loss": 0.1661, "step": 9820 }, { "epoch": 0.8106326889279437, "grad_norm": 0.6647291779518127, "learning_rate": 4.9422284103330985e-05, "loss": 0.192, "step": 9840 }, { "epoch": 0.812280316344464, "grad_norm": 0.540134847164154, "learning_rate": 4.941612129206844e-05, "loss": 0.2126, "step": 9860 }, { "epoch": 0.8139279437609842, "grad_norm": 0.5511148571968079, "learning_rate": 4.940992617228556e-05, "loss": 0.2018, "step": 9880 }, { "epoch": 0.8155755711775043, "grad_norm": 2.1634738445281982, "learning_rate": 4.9403698752180006e-05, "loss": 0.2087, "step": 9900 }, { "epoch": 0.8172231985940246, "grad_norm": 1.3764668703079224, "learning_rate": 4.939743903999218e-05, "loss": 0.1561, "step": 9920 }, { "epoch": 0.8188708260105448, "grad_norm": 0.8314900398254395, "learning_rate": 4.939114704400523e-05, "loss": 0.2226, "step": 9940 }, { "epoch": 0.820518453427065, "grad_norm": 1.0714060068130493, "learning_rate": 4.9384822772544994e-05, "loss": 0.191, "step": 9960 }, { "epoch": 0.8221660808435852, "grad_norm": 0.5301602482795715, "learning_rate": 4.937846623398003e-05, "loss": 0.2069, "step": 9980 }, { "epoch": 0.8238137082601055, "grad_norm": 2.4229841232299805, "learning_rate": 4.9372077436721634e-05, "loss": 0.2132, "step": 10000 }, { "epoch": 0.8238137082601055, "eval_loss": 0.5383469462394714, "eval_runtime": 248.9799, "eval_samples_per_second": 87.513, "eval_steps_per_second": 21.881, "eval_wer": 0.23184117484036582, "step": 10000 }, { "epoch": 0.8254613356766256, "grad_norm": 1.1694583892822266, "learning_rate": 4.936565638922372e-05, "loss": 0.1624, "step": 10020 }, { "epoch": 0.8271089630931459, "grad_norm": 2.124119281768799, "learning_rate": 4.9359203099982924e-05, "loss": 0.2062, "step": 10040 }, { "epoch": 0.8287565905096661, "grad_norm": 0.7251669764518738, "learning_rate": 4.935271757753852e-05, "loss": 0.1981, "step": 10060 }, { "epoch": 0.8304042179261862, "grad_norm": 1.2785292863845825, "learning_rate": 4.934619983047246e-05, "loss": 0.2094, "step": 10080 }, { "epoch": 0.8320518453427065, "grad_norm": 3.0615155696868896, "learning_rate": 4.933964986740931e-05, "loss": 0.2159, "step": 10100 }, { "epoch": 0.8336994727592267, "grad_norm": 2.058302402496338, "learning_rate": 4.933306769701629e-05, "loss": 0.1585, "step": 10120 }, { "epoch": 0.8353471001757469, "grad_norm": 1.0357364416122437, "learning_rate": 4.9326453328003217e-05, "loss": 0.187, "step": 10140 }, { "epoch": 0.8369947275922671, "grad_norm": 0.6068626642227173, "learning_rate": 4.931980676912252e-05, "loss": 0.1971, "step": 10160 }, { "epoch": 0.8386423550087874, "grad_norm": 3.377023220062256, "learning_rate": 4.931312802916925e-05, "loss": 0.1999, "step": 10180 }, { "epoch": 0.8402899824253075, "grad_norm": 3.0354530811309814, "learning_rate": 4.9306417116980996e-05, "loss": 0.2008, "step": 10200 }, { "epoch": 0.8419376098418277, "grad_norm": 2.531010150909424, "learning_rate": 4.929967404143796e-05, "loss": 0.1688, "step": 10220 }, { "epoch": 0.843585237258348, "grad_norm": 0.526393711566925, "learning_rate": 4.929289881146286e-05, "loss": 0.1844, "step": 10240 }, { "epoch": 0.8452328646748682, "grad_norm": 0.9323037266731262, "learning_rate": 4.9286091436021015e-05, "loss": 0.1968, "step": 10260 }, { "epoch": 0.8468804920913884, "grad_norm": 1.0275589227676392, "learning_rate": 4.927925192412024e-05, "loss": 0.182, "step": 10280 }, { "epoch": 0.8485281195079086, "grad_norm": 4.738681793212891, "learning_rate": 4.927238028481089e-05, "loss": 0.214, "step": 10300 }, { "epoch": 0.8501757469244289, "grad_norm": 1.584230899810791, "learning_rate": 4.926547652718583e-05, "loss": 0.1582, "step": 10320 }, { "epoch": 0.851823374340949, "grad_norm": 0.9227916598320007, "learning_rate": 4.9258540660380434e-05, "loss": 0.1921, "step": 10340 }, { "epoch": 0.8534710017574693, "grad_norm": 7.2498250007629395, "learning_rate": 4.925157269357254e-05, "loss": 0.2197, "step": 10360 }, { "epoch": 0.8551186291739895, "grad_norm": 0.8496150970458984, "learning_rate": 4.924457263598248e-05, "loss": 0.1798, "step": 10380 }, { "epoch": 0.8567662565905096, "grad_norm": 9.715910911560059, "learning_rate": 4.9237540496873064e-05, "loss": 0.2241, "step": 10400 }, { "epoch": 0.8584138840070299, "grad_norm": 2.707458257675171, "learning_rate": 4.923047628554952e-05, "loss": 0.1581, "step": 10420 }, { "epoch": 0.8600615114235501, "grad_norm": 0.9239126443862915, "learning_rate": 4.9223380011359544e-05, "loss": 0.1964, "step": 10440 }, { "epoch": 0.8617091388400703, "grad_norm": 1.0885734558105469, "learning_rate": 4.9216251683693246e-05, "loss": 0.2139, "step": 10460 }, { "epoch": 0.8633567662565905, "grad_norm": 0.8583139777183533, "learning_rate": 4.920909131198315e-05, "loss": 0.2007, "step": 10480 }, { "epoch": 0.8650043936731108, "grad_norm": 2.6485893726348877, "learning_rate": 4.920189890570419e-05, "loss": 0.2065, "step": 10500 }, { "epoch": 0.8666520210896309, "grad_norm": 1.3636205196380615, "learning_rate": 4.919467447437368e-05, "loss": 0.1583, "step": 10520 }, { "epoch": 0.8682996485061512, "grad_norm": 1.2751201391220093, "learning_rate": 4.918741802755132e-05, "loss": 0.2044, "step": 10540 }, { "epoch": 0.8699472759226714, "grad_norm": 0.8484062552452087, "learning_rate": 4.918012957483916e-05, "loss": 0.192, "step": 10560 }, { "epoch": 0.8715949033391915, "grad_norm": 0.5631062984466553, "learning_rate": 4.917280912588163e-05, "loss": 0.1997, "step": 10580 }, { "epoch": 0.8732425307557118, "grad_norm": 2.789642572402954, "learning_rate": 4.916545669036545e-05, "loss": 0.2309, "step": 10600 }, { "epoch": 0.874890158172232, "grad_norm": 7.149659156799316, "learning_rate": 4.915807227801973e-05, "loss": 0.1559, "step": 10620 }, { "epoch": 0.8765377855887522, "grad_norm": 1.080868124961853, "learning_rate": 4.915065589861584e-05, "loss": 0.2, "step": 10640 }, { "epoch": 0.8781854130052724, "grad_norm": 0.49652764201164246, "learning_rate": 4.914320756196748e-05, "loss": 0.2034, "step": 10660 }, { "epoch": 0.8798330404217927, "grad_norm": 0.9888033270835876, "learning_rate": 4.913572727793062e-05, "loss": 0.1952, "step": 10680 }, { "epoch": 0.8814806678383128, "grad_norm": 13.734339714050293, "learning_rate": 4.9128215056403507e-05, "loss": 0.2042, "step": 10700 }, { "epoch": 0.883128295254833, "grad_norm": 0.7586619257926941, "learning_rate": 4.912067090732667e-05, "loss": 0.1449, "step": 10720 }, { "epoch": 0.8847759226713533, "grad_norm": 0.673252284526825, "learning_rate": 4.911309484068285e-05, "loss": 0.2138, "step": 10740 }, { "epoch": 0.8864235500878734, "grad_norm": 1.2692677974700928, "learning_rate": 4.910548686649706e-05, "loss": 0.1994, "step": 10760 }, { "epoch": 0.8880711775043937, "grad_norm": 0.7501096725463867, "learning_rate": 4.9097846994836505e-05, "loss": 0.1949, "step": 10780 }, { "epoch": 0.8897188049209139, "grad_norm": 3.0218920707702637, "learning_rate": 4.909017523581062e-05, "loss": 0.2137, "step": 10800 }, { "epoch": 0.8913664323374341, "grad_norm": 1.1619638204574585, "learning_rate": 4.9082471599571015e-05, "loss": 0.1509, "step": 10820 }, { "epoch": 0.8930140597539543, "grad_norm": 1.3160550594329834, "learning_rate": 4.907512362815835e-05, "loss": 0.1879, "step": 10840 }, { "epoch": 0.8946616871704746, "grad_norm": 2.0613820552825928, "learning_rate": 4.9067357860710327e-05, "loss": 0.2094, "step": 10860 }, { "epoch": 0.8963093145869947, "grad_norm": 0.6547063589096069, "learning_rate": 4.905956024624158e-05, "loss": 0.1879, "step": 10880 }, { "epoch": 0.897956942003515, "grad_norm": 4.540472507476807, "learning_rate": 4.905173079507026e-05, "loss": 0.2022, "step": 10900 }, { "epoch": 0.8996045694200352, "grad_norm": 2.7642438411712646, "learning_rate": 4.904386951755665e-05, "loss": 0.1566, "step": 10920 }, { "epoch": 0.9012521968365553, "grad_norm": 1.2250425815582275, "learning_rate": 4.903597642410316e-05, "loss": 0.2049, "step": 10940 }, { "epoch": 0.9028998242530756, "grad_norm": 0.6964439749717712, "learning_rate": 4.902805152515427e-05, "loss": 0.2003, "step": 10960 }, { "epoch": 0.9045474516695958, "grad_norm": 0.8881003260612488, "learning_rate": 4.902049342086357e-05, "loss": 0.1969, "step": 10980 }, { "epoch": 0.906195079086116, "grad_norm": 3.828152656555176, "learning_rate": 4.901250653139905e-05, "loss": 0.2199, "step": 11000 }, { "epoch": 0.9078427065026362, "grad_norm": 1.2089974880218506, "learning_rate": 4.900448786749557e-05, "loss": 0.157, "step": 11020 }, { "epoch": 0.9094903339191565, "grad_norm": 1.148004412651062, "learning_rate": 4.8996437439763784e-05, "loss": 0.2013, "step": 11040 }, { "epoch": 0.9111379613356766, "grad_norm": 1.6459652185440063, "learning_rate": 4.89883552588564e-05, "loss": 0.1928, "step": 11060 }, { "epoch": 0.9127855887521968, "grad_norm": 0.7444145083427429, "learning_rate": 4.898024133546811e-05, "loss": 0.1965, "step": 11080 }, { "epoch": 0.9144332161687171, "grad_norm": 7.0522260665893555, "learning_rate": 4.897209568033564e-05, "loss": 0.2181, "step": 11100 }, { "epoch": 0.9160808435852372, "grad_norm": 1.5349088907241821, "learning_rate": 4.896391830423768e-05, "loss": 0.1575, "step": 11120 }, { "epoch": 0.9177284710017575, "grad_norm": 0.5103209018707275, "learning_rate": 4.895570921799491e-05, "loss": 0.2057, "step": 11140 }, { "epoch": 0.9193760984182777, "grad_norm": 0.4984442889690399, "learning_rate": 4.8947468432469955e-05, "loss": 0.1897, "step": 11160 }, { "epoch": 0.9210237258347979, "grad_norm": 0.5621454119682312, "learning_rate": 4.893919595856742e-05, "loss": 0.1912, "step": 11180 }, { "epoch": 0.9226713532513181, "grad_norm": 5.476040363311768, "learning_rate": 4.8930891807233794e-05, "loss": 0.2166, "step": 11200 }, { "epoch": 0.9243189806678384, "grad_norm": 1.3259927034378052, "learning_rate": 4.892255598945753e-05, "loss": 0.1561, "step": 11220 }, { "epoch": 0.9259666080843585, "grad_norm": 0.8870605230331421, "learning_rate": 4.891418851626893e-05, "loss": 0.2003, "step": 11240 }, { "epoch": 0.9276142355008787, "grad_norm": 16.381240844726562, "learning_rate": 4.890578939874025e-05, "loss": 0.1916, "step": 11260 }, { "epoch": 0.929261862917399, "grad_norm": 1.6785776615142822, "learning_rate": 4.889735864798556e-05, "loss": 0.1825, "step": 11280 }, { "epoch": 0.9309094903339191, "grad_norm": 5.951013565063477, "learning_rate": 4.8888896275160816e-05, "loss": 0.2045, "step": 11300 }, { "epoch": 0.9325571177504394, "grad_norm": 3.8519906997680664, "learning_rate": 4.888040229146382e-05, "loss": 0.1573, "step": 11320 }, { "epoch": 0.9342047451669596, "grad_norm": 1.0654627084732056, "learning_rate": 4.887187670813419e-05, "loss": 0.223, "step": 11340 }, { "epoch": 0.9358523725834798, "grad_norm": 1.6851961612701416, "learning_rate": 4.8863319536453364e-05, "loss": 0.2058, "step": 11360 }, { "epoch": 0.9375, "grad_norm": 0.6880635023117065, "learning_rate": 4.88547307877446e-05, "loss": 0.1974, "step": 11380 }, { "epoch": 0.9391476274165202, "grad_norm": 5.7095866203308105, "learning_rate": 4.884611047337289e-05, "loss": 0.2169, "step": 11400 }, { "epoch": 0.9407952548330404, "grad_norm": 8.853272438049316, "learning_rate": 4.883745860474505e-05, "loss": 0.1584, "step": 11420 }, { "epoch": 0.9424428822495606, "grad_norm": 1.7952899932861328, "learning_rate": 4.882877519330961e-05, "loss": 0.1937, "step": 11440 }, { "epoch": 0.9440905096660809, "grad_norm": 5.514512538909912, "learning_rate": 4.882006025055685e-05, "loss": 0.1989, "step": 11460 }, { "epoch": 0.945738137082601, "grad_norm": 0.776798665523529, "learning_rate": 4.88113137880188e-05, "loss": 0.1862, "step": 11480 }, { "epoch": 0.9473857644991213, "grad_norm": 2.824843645095825, "learning_rate": 4.880253581726916e-05, "loss": 0.2168, "step": 11500 }, { "epoch": 0.9490333919156415, "grad_norm": 0.9011817574501038, "learning_rate": 4.879372634992335e-05, "loss": 0.1601, "step": 11520 }, { "epoch": 0.9506810193321616, "grad_norm": 5.290555477142334, "learning_rate": 4.8784885397638445e-05, "loss": 0.1892, "step": 11540 }, { "epoch": 0.9523286467486819, "grad_norm": 1.164330005645752, "learning_rate": 4.87760129721132e-05, "loss": 0.2112, "step": 11560 }, { "epoch": 0.9539762741652021, "grad_norm": 0.8354160189628601, "learning_rate": 4.876710908508801e-05, "loss": 0.2176, "step": 11580 }, { "epoch": 0.9556239015817223, "grad_norm": 3.980334758758545, "learning_rate": 4.8758173748344904e-05, "loss": 0.2484, "step": 11600 }, { "epoch": 0.9572715289982425, "grad_norm": 1.7610282897949219, "learning_rate": 4.874920697370753e-05, "loss": 0.1618, "step": 11620 }, { "epoch": 0.9589191564147628, "grad_norm": 1.3336617946624756, "learning_rate": 4.874020877304113e-05, "loss": 0.1895, "step": 11640 }, { "epoch": 0.9605667838312829, "grad_norm": 0.9175180792808533, "learning_rate": 4.873117915825252e-05, "loss": 0.1945, "step": 11660 }, { "epoch": 0.9622144112478032, "grad_norm": 0.49205484986305237, "learning_rate": 4.8722118141290105e-05, "loss": 0.1935, "step": 11680 }, { "epoch": 0.9638620386643234, "grad_norm": 3.554715394973755, "learning_rate": 4.871302573414384e-05, "loss": 0.2135, "step": 11700 }, { "epoch": 0.9655096660808435, "grad_norm": 1.0651150941848755, "learning_rate": 4.8703901948845205e-05, "loss": 0.1564, "step": 11720 }, { "epoch": 0.9671572934973638, "grad_norm": 1.88383150100708, "learning_rate": 4.869474679746721e-05, "loss": 0.1994, "step": 11740 }, { "epoch": 0.968804920913884, "grad_norm": 0.6271897554397583, "learning_rate": 4.868556029212435e-05, "loss": 0.2043, "step": 11760 }, { "epoch": 0.9704525483304042, "grad_norm": 0.616521954536438, "learning_rate": 4.867634244497265e-05, "loss": 0.2059, "step": 11780 }, { "epoch": 0.9721001757469244, "grad_norm": 3.2947463989257812, "learning_rate": 4.8667093268209575e-05, "loss": 0.205, "step": 11800 }, { "epoch": 0.9737478031634447, "grad_norm": 0.7675888538360596, "learning_rate": 4.865781277407405e-05, "loss": 0.1436, "step": 11820 }, { "epoch": 0.9753954305799648, "grad_norm": 1.3581780195236206, "learning_rate": 4.8648500974846445e-05, "loss": 0.1964, "step": 11840 }, { "epoch": 0.977043057996485, "grad_norm": 0.7175058722496033, "learning_rate": 4.8639157882848566e-05, "loss": 0.197, "step": 11860 }, { "epoch": 0.9786906854130053, "grad_norm": 0.7383044362068176, "learning_rate": 4.8629783510443616e-05, "loss": 0.1904, "step": 11880 }, { "epoch": 0.9803383128295254, "grad_norm": 5.0527777671813965, "learning_rate": 4.8620377870036184e-05, "loss": 0.2117, "step": 11900 }, { "epoch": 0.9819859402460457, "grad_norm": 1.5865782499313354, "learning_rate": 4.861094097407224e-05, "loss": 0.1515, "step": 11920 }, { "epoch": 0.9836335676625659, "grad_norm": 0.5650818943977356, "learning_rate": 4.860147283503912e-05, "loss": 0.1926, "step": 11940 }, { "epoch": 0.9852811950790861, "grad_norm": 1.3663684129714966, "learning_rate": 4.8591973465465493e-05, "loss": 0.2066, "step": 11960 }, { "epoch": 0.9869288224956063, "grad_norm": 1.2778149843215942, "learning_rate": 4.8582442877921344e-05, "loss": 0.2022, "step": 11980 }, { "epoch": 0.9885764499121266, "grad_norm": 4.921967029571533, "learning_rate": 4.8572881085017996e-05, "loss": 0.2158, "step": 12000 }, { "epoch": 0.9885764499121266, "eval_loss": 0.5375325679779053, "eval_runtime": 252.7679, "eval_samples_per_second": 86.202, "eval_steps_per_second": 21.553, "eval_wer": 0.2286638878235724, "step": 12000 }, { "epoch": 0.9902240773286467, "grad_norm": 1.044043779373169, "learning_rate": 4.856328809940804e-05, "loss": 0.1584, "step": 12020 }, { "epoch": 0.991871704745167, "grad_norm": 2.4731016159057617, "learning_rate": 4.855366393378535e-05, "loss": 0.2071, "step": 12040 }, { "epoch": 0.9935193321616872, "grad_norm": 0.6640995144844055, "learning_rate": 4.854400860088506e-05, "loss": 0.2119, "step": 12060 }, { "epoch": 0.9951669595782073, "grad_norm": 0.43121984601020813, "learning_rate": 4.853432211348356e-05, "loss": 0.1918, "step": 12080 }, { "epoch": 0.9968145869947276, "grad_norm": 5.402212619781494, "learning_rate": 4.8524604484398414e-05, "loss": 0.2181, "step": 12100 }, { "epoch": 0.9984622144112478, "grad_norm": 1.417004108428955, "learning_rate": 4.851485572648846e-05, "loss": 0.1493, "step": 12120 }, { "epoch": 1.000109841827768, "grad_norm": 0.9582122564315796, "learning_rate": 4.8505075852653684e-05, "loss": 0.207, "step": 12140 }, { "epoch": 1.0017574692442883, "grad_norm": 1.74813973903656, "learning_rate": 4.8495264875835264e-05, "loss": 0.1468, "step": 12160 }, { "epoch": 1.0034050966608083, "grad_norm": 3.2926461696624756, "learning_rate": 4.8485422809015535e-05, "loss": 0.2009, "step": 12180 }, { "epoch": 1.0050527240773286, "grad_norm": 2.141998291015625, "learning_rate": 4.8475549665217956e-05, "loss": 0.1878, "step": 12200 }, { "epoch": 1.0067003514938488, "grad_norm": 1.7473055124282837, "learning_rate": 4.846564545750712e-05, "loss": 0.1916, "step": 12220 }, { "epoch": 1.008347978910369, "grad_norm": 0.20854823291301727, "learning_rate": 4.845571019898874e-05, "loss": 0.19, "step": 12240 }, { "epoch": 1.0099956063268893, "grad_norm": 1.6081265211105347, "learning_rate": 4.844574390280958e-05, "loss": 0.1623, "step": 12260 }, { "epoch": 1.0116432337434096, "grad_norm": 2.013803482055664, "learning_rate": 4.843574658215751e-05, "loss": 0.1916, "step": 12280 }, { "epoch": 1.0132908611599296, "grad_norm": 1.2559956312179565, "learning_rate": 4.842571825026143e-05, "loss": 0.1759, "step": 12300 }, { "epoch": 1.0149384885764499, "grad_norm": 1.103947401046753, "learning_rate": 4.8415658920391296e-05, "loss": 0.202, "step": 12320 }, { "epoch": 1.01658611599297, "grad_norm": 0.5012603998184204, "learning_rate": 4.840556860585805e-05, "loss": 0.198, "step": 12340 }, { "epoch": 1.0182337434094904, "grad_norm": 1.4309896230697632, "learning_rate": 4.839544732001368e-05, "loss": 0.163, "step": 12360 }, { "epoch": 1.0198813708260106, "grad_norm": 0.6756483912467957, "learning_rate": 4.838529507625111e-05, "loss": 0.1831, "step": 12380 }, { "epoch": 1.0215289982425309, "grad_norm": 1.141714096069336, "learning_rate": 4.8375111888004263e-05, "loss": 0.1888, "step": 12400 }, { "epoch": 1.0231766256590509, "grad_norm": 0.471711665391922, "learning_rate": 4.836489776874799e-05, "loss": 0.1892, "step": 12420 }, { "epoch": 1.0248242530755711, "grad_norm": 0.28789934515953064, "learning_rate": 4.835465273199807e-05, "loss": 0.1795, "step": 12440 }, { "epoch": 1.0264718804920914, "grad_norm": 1.1327400207519531, "learning_rate": 4.834437679131121e-05, "loss": 0.169, "step": 12460 }, { "epoch": 1.0281195079086116, "grad_norm": 1.0975223779678345, "learning_rate": 4.8334069960285e-05, "loss": 0.1875, "step": 12480 }, { "epoch": 1.0297671353251319, "grad_norm": 0.5748992562294006, "learning_rate": 4.832373225255791e-05, "loss": 0.1888, "step": 12500 }, { "epoch": 1.0314147627416521, "grad_norm": 0.5393682718276978, "learning_rate": 4.831336368180927e-05, "loss": 0.1808, "step": 12520 }, { "epoch": 1.0330623901581721, "grad_norm": 0.22509616613388062, "learning_rate": 4.830296426175922e-05, "loss": 0.1803, "step": 12540 }, { "epoch": 1.0347100175746924, "grad_norm": 1.0403659343719482, "learning_rate": 4.829253400616876e-05, "loss": 0.1594, "step": 12560 }, { "epoch": 1.0363576449912126, "grad_norm": 1.5973280668258667, "learning_rate": 4.828207292883968e-05, "loss": 0.1907, "step": 12580 }, { "epoch": 1.0380052724077329, "grad_norm": 0.9520076513290405, "learning_rate": 4.8271581043614555e-05, "loss": 0.1852, "step": 12600 }, { "epoch": 1.0396528998242531, "grad_norm": 3.2169032096862793, "learning_rate": 4.826105836437672e-05, "loss": 0.1796, "step": 12620 }, { "epoch": 1.0413005272407734, "grad_norm": 0.3701666295528412, "learning_rate": 4.825050490505025e-05, "loss": 0.1894, "step": 12640 }, { "epoch": 1.0429481546572934, "grad_norm": 1.573367953300476, "learning_rate": 4.823992067959998e-05, "loss": 0.1565, "step": 12660 }, { "epoch": 1.0445957820738137, "grad_norm": 0.6586406230926514, "learning_rate": 4.8229305702031426e-05, "loss": 0.183, "step": 12680 }, { "epoch": 1.046243409490334, "grad_norm": 1.1278094053268433, "learning_rate": 4.821865998639081e-05, "loss": 0.175, "step": 12700 }, { "epoch": 1.0478910369068541, "grad_norm": 0.9823224544525146, "learning_rate": 4.820798354676504e-05, "loss": 0.1939, "step": 12720 }, { "epoch": 1.0495386643233744, "grad_norm": 0.6462484002113342, "learning_rate": 4.819727639728164e-05, "loss": 0.1652, "step": 12740 }, { "epoch": 1.0511862917398946, "grad_norm": 1.3370243310928345, "learning_rate": 4.818653855210882e-05, "loss": 0.1605, "step": 12760 }, { "epoch": 1.0528339191564147, "grad_norm": 0.7861006259918213, "learning_rate": 4.817577002545538e-05, "loss": 0.1858, "step": 12780 }, { "epoch": 1.054481546572935, "grad_norm": 2.107189655303955, "learning_rate": 4.816497083157071e-05, "loss": 0.1933, "step": 12800 }, { "epoch": 1.0561291739894552, "grad_norm": 0.9943327307701111, "learning_rate": 4.81541409847448e-05, "loss": 0.1949, "step": 12820 }, { "epoch": 1.0577768014059754, "grad_norm": 0.5097872018814087, "learning_rate": 4.8143280499308205e-05, "loss": 0.1879, "step": 12840 }, { "epoch": 1.0594244288224957, "grad_norm": 2.121750593185425, "learning_rate": 4.8132389389631995e-05, "loss": 0.1616, "step": 12860 }, { "epoch": 1.061072056239016, "grad_norm": 0.86652010679245, "learning_rate": 4.81214676701278e-05, "loss": 0.2113, "step": 12880 }, { "epoch": 1.062719683655536, "grad_norm": 2.3862969875335693, "learning_rate": 4.811051535524772e-05, "loss": 0.1904, "step": 12900 }, { "epoch": 1.0643673110720562, "grad_norm": 0.5471189618110657, "learning_rate": 4.8099532459484356e-05, "loss": 0.182, "step": 12920 }, { "epoch": 1.0660149384885764, "grad_norm": 1.5992333889007568, "learning_rate": 4.8088518997370794e-05, "loss": 0.1821, "step": 12940 }, { "epoch": 1.0676625659050967, "grad_norm": 0.6893438696861267, "learning_rate": 4.807747498348052e-05, "loss": 0.1648, "step": 12960 }, { "epoch": 1.069310193321617, "grad_norm": 4.395170211791992, "learning_rate": 4.806640043242748e-05, "loss": 0.1759, "step": 12980 }, { "epoch": 1.0709578207381372, "grad_norm": 0.5166418552398682, "learning_rate": 4.805529535886605e-05, "loss": 0.1956, "step": 13000 }, { "epoch": 1.0726054481546572, "grad_norm": 0.8624104261398315, "learning_rate": 4.804415977749094e-05, "loss": 0.1839, "step": 13020 }, { "epoch": 1.0742530755711774, "grad_norm": 0.6681638360023499, "learning_rate": 4.803299370303725e-05, "loss": 0.2012, "step": 13040 }, { "epoch": 1.0759007029876977, "grad_norm": 1.1625574827194214, "learning_rate": 4.802179715028047e-05, "loss": 0.165, "step": 13060 }, { "epoch": 1.077548330404218, "grad_norm": 3.3364834785461426, "learning_rate": 4.801057013403636e-05, "loss": 0.1905, "step": 13080 }, { "epoch": 1.0791959578207382, "grad_norm": 0.9237430691719055, "learning_rate": 4.799931266916103e-05, "loss": 0.1923, "step": 13100 }, { "epoch": 1.0808435852372584, "grad_norm": 2.341661214828491, "learning_rate": 4.798858988805215e-05, "loss": 0.1918, "step": 13120 }, { "epoch": 1.0824912126537785, "grad_norm": 0.2139461189508438, "learning_rate": 4.797727309122835e-05, "loss": 0.2039, "step": 13140 }, { "epoch": 1.0841388400702987, "grad_norm": 1.3084605932235718, "learning_rate": 4.796592588983348e-05, "loss": 0.1808, "step": 13160 }, { "epoch": 1.085786467486819, "grad_norm": 0.5879707336425781, "learning_rate": 4.7954548298882685e-05, "loss": 0.1879, "step": 13180 }, { "epoch": 1.0874340949033392, "grad_norm": 9.557912826538086, "learning_rate": 4.794314033343129e-05, "loss": 0.1667, "step": 13200 }, { "epoch": 1.0890817223198594, "grad_norm": 2.7290220260620117, "learning_rate": 4.793170200857485e-05, "loss": 0.1848, "step": 13220 }, { "epoch": 1.0907293497363797, "grad_norm": 0.4870263338088989, "learning_rate": 4.792023333944907e-05, "loss": 0.183, "step": 13240 }, { "epoch": 1.0923769771528997, "grad_norm": 1.1112992763519287, "learning_rate": 4.790873434122982e-05, "loss": 0.1628, "step": 13260 }, { "epoch": 1.09402460456942, "grad_norm": 1.2795594930648804, "learning_rate": 4.789720502913309e-05, "loss": 0.1687, "step": 13280 }, { "epoch": 1.0956722319859402, "grad_norm": 1.0361709594726562, "learning_rate": 4.788564541841498e-05, "loss": 0.196, "step": 13300 }, { "epoch": 1.0973198594024605, "grad_norm": 1.4173661470413208, "learning_rate": 4.787405552437171e-05, "loss": 0.191, "step": 13320 }, { "epoch": 1.0989674868189807, "grad_norm": 3.3120641708374023, "learning_rate": 4.786243536233954e-05, "loss": 0.1881, "step": 13340 }, { "epoch": 1.100615114235501, "grad_norm": 1.3123334646224976, "learning_rate": 4.785078494769481e-05, "loss": 0.1701, "step": 13360 }, { "epoch": 1.1022627416520212, "grad_norm": 1.0791329145431519, "learning_rate": 4.783910429585386e-05, "loss": 0.1883, "step": 13380 }, { "epoch": 1.1039103690685412, "grad_norm": 4.972422122955322, "learning_rate": 4.782739342227308e-05, "loss": 0.1977, "step": 13400 }, { "epoch": 1.1055579964850615, "grad_norm": 1.7628355026245117, "learning_rate": 4.781565234244881e-05, "loss": 0.1828, "step": 13420 }, { "epoch": 1.1072056239015817, "grad_norm": 0.7487472295761108, "learning_rate": 4.7803881071917386e-05, "loss": 0.1903, "step": 13440 }, { "epoch": 1.108853251318102, "grad_norm": 1.0923112630844116, "learning_rate": 4.77920796262551e-05, "loss": 0.1597, "step": 13460 }, { "epoch": 1.1105008787346222, "grad_norm": 2.634671211242676, "learning_rate": 4.778024802107814e-05, "loss": 0.201, "step": 13480 }, { "epoch": 1.1121485061511422, "grad_norm": 0.6952950358390808, "learning_rate": 4.7768386272042646e-05, "loss": 0.1887, "step": 13500 }, { "epoch": 1.1137961335676625, "grad_norm": 1.2554984092712402, "learning_rate": 4.77564943948446e-05, "loss": 0.1862, "step": 13520 }, { "epoch": 1.1154437609841827, "grad_norm": 0.42830732464790344, "learning_rate": 4.774457240521989e-05, "loss": 0.1892, "step": 13540 }, { "epoch": 1.117091388400703, "grad_norm": 1.1879973411560059, "learning_rate": 4.7732620318944224e-05, "loss": 0.1576, "step": 13560 }, { "epoch": 1.1187390158172232, "grad_norm": 0.9063071012496948, "learning_rate": 4.772063815183315e-05, "loss": 0.1941, "step": 13580 }, { "epoch": 1.1203866432337435, "grad_norm": 1.071677803993225, "learning_rate": 4.770862591974202e-05, "loss": 0.1779, "step": 13600 }, { "epoch": 1.1220342706502637, "grad_norm": 1.2179434299468994, "learning_rate": 4.769658363856595e-05, "loss": 0.1838, "step": 13620 }, { "epoch": 1.1236818980667838, "grad_norm": 0.8977575898170471, "learning_rate": 4.768451132423985e-05, "loss": 0.2017, "step": 13640 }, { "epoch": 1.125329525483304, "grad_norm": 1.6982157230377197, "learning_rate": 4.767240899273835e-05, "loss": 0.1608, "step": 13660 }, { "epoch": 1.1269771528998243, "grad_norm": 0.7634503245353699, "learning_rate": 4.7660276660075804e-05, "loss": 0.1718, "step": 13680 }, { "epoch": 1.1286247803163445, "grad_norm": 0.8203310966491699, "learning_rate": 4.764811434230626e-05, "loss": 0.1831, "step": 13700 }, { "epoch": 1.1302724077328647, "grad_norm": 0.9817870259284973, "learning_rate": 4.763592205552345e-05, "loss": 0.1858, "step": 13720 }, { "epoch": 1.1319200351493848, "grad_norm": 0.333933562040329, "learning_rate": 4.762369981586077e-05, "loss": 0.1785, "step": 13740 }, { "epoch": 1.133567662565905, "grad_norm": 1.1885998249053955, "learning_rate": 4.761144763949124e-05, "loss": 0.1508, "step": 13760 }, { "epoch": 1.1352152899824253, "grad_norm": 4.4533820152282715, "learning_rate": 4.759916554262749e-05, "loss": 0.1855, "step": 13780 }, { "epoch": 1.1368629173989455, "grad_norm": 0.6248905062675476, "learning_rate": 4.7586853541521744e-05, "loss": 0.1938, "step": 13800 }, { "epoch": 1.1385105448154658, "grad_norm": 0.7727264761924744, "learning_rate": 4.757451165246583e-05, "loss": 0.1809, "step": 13820 }, { "epoch": 1.140158172231986, "grad_norm": 0.3115682005882263, "learning_rate": 4.7562139891791046e-05, "loss": 0.188, "step": 13840 }, { "epoch": 1.1418057996485063, "grad_norm": 3.9221365451812744, "learning_rate": 4.754973827586832e-05, "loss": 0.1556, "step": 13860 }, { "epoch": 1.1434534270650263, "grad_norm": 0.8228521347045898, "learning_rate": 4.7537306821108e-05, "loss": 0.185, "step": 13880 }, { "epoch": 1.1451010544815465, "grad_norm": 3.115772247314453, "learning_rate": 4.752484554395995e-05, "loss": 0.1786, "step": 13900 }, { "epoch": 1.1467486818980668, "grad_norm": 0.7074133157730103, "learning_rate": 4.7512354460913524e-05, "loss": 0.175, "step": 13920 }, { "epoch": 1.148396309314587, "grad_norm": 0.4195045828819275, "learning_rate": 4.7499833588497464e-05, "loss": 0.1868, "step": 13940 }, { "epoch": 1.1500439367311073, "grad_norm": 1.2703948020935059, "learning_rate": 4.7487282943279965e-05, "loss": 0.1517, "step": 13960 }, { "epoch": 1.1516915641476273, "grad_norm": 2.4614219665527344, "learning_rate": 4.747470254186862e-05, "loss": 0.1926, "step": 13980 }, { "epoch": 1.1533391915641475, "grad_norm": 2.0017571449279785, "learning_rate": 4.746209240091038e-05, "loss": 0.1852, "step": 14000 }, { "epoch": 1.1533391915641475, "eval_loss": 0.5445581078529358, "eval_runtime": 260.7987, "eval_samples_per_second": 83.547, "eval_steps_per_second": 20.89, "eval_wer": 0.22884024824829308, "step": 14000 }, { "epoch": 1.1550417398945518, "grad_norm": 0.7891082763671875, "learning_rate": 4.744945253709156e-05, "loss": 0.1683, "step": 14020 }, { "epoch": 1.156689367311072, "grad_norm": 1.2621268033981323, "learning_rate": 4.743678296713779e-05, "loss": 0.2037, "step": 14040 }, { "epoch": 1.1583369947275923, "grad_norm": 1.7061976194381714, "learning_rate": 4.7424083707814035e-05, "loss": 0.18, "step": 14060 }, { "epoch": 1.1599846221441124, "grad_norm": 4.809337615966797, "learning_rate": 4.7411354775924523e-05, "loss": 0.1772, "step": 14080 }, { "epoch": 1.1616322495606326, "grad_norm": 4.00537109375, "learning_rate": 4.739859618831276e-05, "loss": 0.2029, "step": 14100 }, { "epoch": 1.1632798769771528, "grad_norm": 3.184988498687744, "learning_rate": 4.7385807961861486e-05, "loss": 0.1857, "step": 14120 }, { "epoch": 1.164927504393673, "grad_norm": 0.24155808985233307, "learning_rate": 4.737299011349265e-05, "loss": 0.1808, "step": 14140 }, { "epoch": 1.1665751318101933, "grad_norm": 1.2904281616210938, "learning_rate": 4.736014266016743e-05, "loss": 0.1728, "step": 14160 }, { "epoch": 1.1682227592267136, "grad_norm": 0.8972460031509399, "learning_rate": 4.734726561888616e-05, "loss": 0.1712, "step": 14180 }, { "epoch": 1.1698703866432338, "grad_norm": 0.48299163579940796, "learning_rate": 4.7334359006688314e-05, "loss": 0.1872, "step": 14200 }, { "epoch": 1.1715180140597539, "grad_norm": 1.942762017250061, "learning_rate": 4.732142284065251e-05, "loss": 0.1742, "step": 14220 }, { "epoch": 1.1731656414762741, "grad_norm": 0.8175634741783142, "learning_rate": 4.730845713789647e-05, "loss": 0.175, "step": 14240 }, { "epoch": 1.1748132688927944, "grad_norm": 3.179103136062622, "learning_rate": 4.7295461915577e-05, "loss": 0.1544, "step": 14260 }, { "epoch": 1.1764608963093146, "grad_norm": 0.7264633178710938, "learning_rate": 4.7282437190889975e-05, "loss": 0.1854, "step": 14280 }, { "epoch": 1.1781085237258349, "grad_norm": 1.372662901878357, "learning_rate": 4.7269382981070296e-05, "loss": 0.193, "step": 14300 }, { "epoch": 1.179756151142355, "grad_norm": 1.8890334367752075, "learning_rate": 4.7256299303391895e-05, "loss": 0.1908, "step": 14320 }, { "epoch": 1.1814037785588751, "grad_norm": 0.4149373769760132, "learning_rate": 4.724318617516768e-05, "loss": 0.1838, "step": 14340 }, { "epoch": 1.1830514059753954, "grad_norm": 2.029543876647949, "learning_rate": 4.723004361374953e-05, "loss": 0.1656, "step": 14360 }, { "epoch": 1.1846990333919156, "grad_norm": 10.552154541015625, "learning_rate": 4.721687163652829e-05, "loss": 0.1828, "step": 14380 }, { "epoch": 1.1863466608084359, "grad_norm": 0.5331339240074158, "learning_rate": 4.7203670260933725e-05, "loss": 0.1807, "step": 14400 }, { "epoch": 1.1879942882249561, "grad_norm": 2.227116584777832, "learning_rate": 4.719043950443448e-05, "loss": 0.1903, "step": 14420 }, { "epoch": 1.1896419156414764, "grad_norm": 0.36228641867637634, "learning_rate": 4.717717938453811e-05, "loss": 0.1775, "step": 14440 }, { "epoch": 1.1912895430579964, "grad_norm": 2.8174803256988525, "learning_rate": 4.7163889918790984e-05, "loss": 0.1672, "step": 14460 }, { "epoch": 1.1929371704745166, "grad_norm": 1.1632130146026611, "learning_rate": 4.7150571124778344e-05, "loss": 0.1798, "step": 14480 }, { "epoch": 1.1945847978910369, "grad_norm": 2.7487854957580566, "learning_rate": 4.713722302012421e-05, "loss": 0.1833, "step": 14500 }, { "epoch": 1.1962324253075571, "grad_norm": 0.7984181642532349, "learning_rate": 4.712384562249141e-05, "loss": 0.1934, "step": 14520 }, { "epoch": 1.1978800527240774, "grad_norm": 0.22223146259784698, "learning_rate": 4.711043894958153e-05, "loss": 0.1883, "step": 14540 }, { "epoch": 1.1995276801405976, "grad_norm": 5.944990158081055, "learning_rate": 4.7097003019134876e-05, "loss": 0.1584, "step": 14560 }, { "epoch": 1.2011753075571177, "grad_norm": 0.5789378881454468, "learning_rate": 4.708353784893049e-05, "loss": 0.1837, "step": 14580 }, { "epoch": 1.202822934973638, "grad_norm": 0.4476211369037628, "learning_rate": 4.707004345678609e-05, "loss": 0.1878, "step": 14600 }, { "epoch": 1.2044705623901582, "grad_norm": 1.0182552337646484, "learning_rate": 4.705651986055807e-05, "loss": 0.1968, "step": 14620 }, { "epoch": 1.2061181898066784, "grad_norm": 0.5336174368858337, "learning_rate": 4.7042967078141466e-05, "loss": 0.1849, "step": 14640 }, { "epoch": 1.2077658172231986, "grad_norm": 0.6163532733917236, "learning_rate": 4.702938512746994e-05, "loss": 0.1571, "step": 14660 }, { "epoch": 1.209413444639719, "grad_norm": 1.7392462491989136, "learning_rate": 4.701577402651574e-05, "loss": 0.205, "step": 14680 }, { "epoch": 1.2110610720562391, "grad_norm": 0.9206298589706421, "learning_rate": 4.70021337932897e-05, "loss": 0.1872, "step": 14700 }, { "epoch": 1.2127086994727592, "grad_norm": 1.0826961994171143, "learning_rate": 4.6988464445841186e-05, "loss": 0.185, "step": 14720 }, { "epoch": 1.2143563268892794, "grad_norm": 0.3322293758392334, "learning_rate": 4.6974766002258105e-05, "loss": 0.1746, "step": 14740 }, { "epoch": 1.2160039543057997, "grad_norm": 1.4564270973205566, "learning_rate": 4.696103848066686e-05, "loss": 0.1581, "step": 14760 }, { "epoch": 1.21765158172232, "grad_norm": 0.7961132526397705, "learning_rate": 4.6947281899232333e-05, "loss": 0.1774, "step": 14780 }, { "epoch": 1.2192992091388402, "grad_norm": 2.5025506019592285, "learning_rate": 4.693349627615784e-05, "loss": 0.2024, "step": 14800 }, { "epoch": 1.2209468365553602, "grad_norm": 0.9771924614906311, "learning_rate": 4.692037305103247e-05, "loss": 0.1857, "step": 14820 }, { "epoch": 1.2225944639718804, "grad_norm": 0.3456748127937317, "learning_rate": 4.690653084926293e-05, "loss": 0.1828, "step": 14840 }, { "epoch": 1.2242420913884007, "grad_norm": 0.9823417067527771, "learning_rate": 4.689265965977708e-05, "loss": 0.1599, "step": 14860 }, { "epoch": 1.225889718804921, "grad_norm": 1.0269907712936401, "learning_rate": 4.68787595009299e-05, "loss": 0.1797, "step": 14880 }, { "epoch": 1.2275373462214412, "grad_norm": 0.7121568918228149, "learning_rate": 4.686483039111472e-05, "loss": 0.1851, "step": 14900 }, { "epoch": 1.2291849736379614, "grad_norm": 0.5829301476478577, "learning_rate": 4.6850872348763166e-05, "loss": 0.1816, "step": 14920 }, { "epoch": 1.2308326010544817, "grad_norm": 0.23825562000274658, "learning_rate": 4.683688539234515e-05, "loss": 0.1687, "step": 14940 }, { "epoch": 1.2324802284710017, "grad_norm": 1.7142807245254517, "learning_rate": 4.682286954036886e-05, "loss": 0.1618, "step": 14960 }, { "epoch": 1.234127855887522, "grad_norm": 2.196317195892334, "learning_rate": 4.6808824811380704e-05, "loss": 0.1868, "step": 14980 }, { "epoch": 1.2357754833040422, "grad_norm": 0.6108691096305847, "learning_rate": 4.67947512239653e-05, "loss": 0.195, "step": 15000 }, { "epoch": 1.2374231107205624, "grad_norm": 0.5913376808166504, "learning_rate": 4.678135460276387e-05, "loss": 0.1855, "step": 15020 }, { "epoch": 1.2390707381370827, "grad_norm": 0.6738501191139221, "learning_rate": 4.676722479501395e-05, "loss": 0.1843, "step": 15040 }, { "epoch": 1.2407183655536027, "grad_norm": 1.7320432662963867, "learning_rate": 4.675306618388381e-05, "loss": 0.1567, "step": 15060 }, { "epoch": 1.242365992970123, "grad_norm": 1.1772425174713135, "learning_rate": 4.6738878788108776e-05, "loss": 0.1927, "step": 15080 }, { "epoch": 1.2440136203866432, "grad_norm": 0.8685306906700134, "learning_rate": 4.672466262646225e-05, "loss": 0.2057, "step": 15100 }, { "epoch": 1.2456612478031635, "grad_norm": 0.5060691833496094, "learning_rate": 4.6710417717755695e-05, "loss": 0.1846, "step": 15120 }, { "epoch": 1.2473088752196837, "grad_norm": 0.2376640886068344, "learning_rate": 4.6696144080838624e-05, "loss": 0.1835, "step": 15140 }, { "epoch": 1.248956502636204, "grad_norm": 1.7704603672027588, "learning_rate": 4.6681841734598576e-05, "loss": 0.1661, "step": 15160 }, { "epoch": 1.2506041300527242, "grad_norm": 0.9057344198226929, "learning_rate": 4.6667510697961045e-05, "loss": 0.1942, "step": 15180 }, { "epoch": 1.2522517574692442, "grad_norm": 0.7479068636894226, "learning_rate": 4.6653150989889525e-05, "loss": 0.1807, "step": 15200 }, { "epoch": 1.2538993848857645, "grad_norm": 0.8971445560455322, "learning_rate": 4.663876262938543e-05, "loss": 0.192, "step": 15220 }, { "epoch": 1.2555470123022847, "grad_norm": 0.6187984347343445, "learning_rate": 4.6624345635488085e-05, "loss": 0.1781, "step": 15240 }, { "epoch": 1.257194639718805, "grad_norm": 1.5628702640533447, "learning_rate": 4.6609900027274714e-05, "loss": 0.1511, "step": 15260 }, { "epoch": 1.2588422671353252, "grad_norm": 1.0656757354736328, "learning_rate": 4.659542582386041e-05, "loss": 0.1888, "step": 15280 }, { "epoch": 1.2604898945518452, "grad_norm": 0.9140082597732544, "learning_rate": 4.658092304439808e-05, "loss": 0.1791, "step": 15300 }, { "epoch": 1.2621375219683655, "grad_norm": 0.7536066770553589, "learning_rate": 4.6566391708078484e-05, "loss": 0.1945, "step": 15320 }, { "epoch": 1.2637851493848857, "grad_norm": 0.6287294030189514, "learning_rate": 4.655183183413012e-05, "loss": 0.1844, "step": 15340 }, { "epoch": 1.265432776801406, "grad_norm": 1.3471739292144775, "learning_rate": 4.653724344181929e-05, "loss": 0.1718, "step": 15360 }, { "epoch": 1.2670804042179262, "grad_norm": 0.8668192625045776, "learning_rate": 4.652262655045001e-05, "loss": 0.1816, "step": 15380 }, { "epoch": 1.2687280316344465, "grad_norm": 0.7043120861053467, "learning_rate": 4.6507981179364015e-05, "loss": 0.1835, "step": 15400 }, { "epoch": 1.2703756590509667, "grad_norm": 2.341987133026123, "learning_rate": 4.6493307347940735e-05, "loss": 0.1779, "step": 15420 }, { "epoch": 1.2720232864674867, "grad_norm": 0.7353787422180176, "learning_rate": 4.6478605075597236e-05, "loss": 0.1995, "step": 15440 }, { "epoch": 1.273670913884007, "grad_norm": 1.131020188331604, "learning_rate": 4.646387438178824e-05, "loss": 0.1626, "step": 15460 }, { "epoch": 1.2753185413005272, "grad_norm": 5.1669206619262695, "learning_rate": 4.644911528600607e-05, "loss": 0.1739, "step": 15480 }, { "epoch": 1.2769661687170475, "grad_norm": 1.9862631559371948, "learning_rate": 4.643432780778064e-05, "loss": 0.1811, "step": 15500 }, { "epoch": 1.2786137961335677, "grad_norm": 4.627335548400879, "learning_rate": 4.641951196667939e-05, "loss": 0.1755, "step": 15520 }, { "epoch": 1.2802614235500878, "grad_norm": 1.6128586530685425, "learning_rate": 4.6404667782307334e-05, "loss": 0.1758, "step": 15540 }, { "epoch": 1.281909050966608, "grad_norm": 0.8337876200675964, "learning_rate": 4.638979527430696e-05, "loss": 0.1543, "step": 15560 }, { "epoch": 1.2835566783831283, "grad_norm": 1.7509500980377197, "learning_rate": 4.6374894462358246e-05, "loss": 0.1866, "step": 15580 }, { "epoch": 1.2852043057996485, "grad_norm": 1.9350312948226929, "learning_rate": 4.635996536617863e-05, "loss": 0.1898, "step": 15600 }, { "epoch": 1.2868519332161688, "grad_norm": 0.9639732241630554, "learning_rate": 4.6345008005522966e-05, "loss": 0.1757, "step": 15620 }, { "epoch": 1.288499560632689, "grad_norm": 0.22245891392230988, "learning_rate": 4.633002240018351e-05, "loss": 0.1725, "step": 15640 }, { "epoch": 1.2901471880492092, "grad_norm": 1.5688083171844482, "learning_rate": 4.6315008569989905e-05, "loss": 0.1682, "step": 15660 }, { "epoch": 1.2917948154657293, "grad_norm": 0.9785603880882263, "learning_rate": 4.629996653480912e-05, "loss": 0.184, "step": 15680 }, { "epoch": 1.2934424428822495, "grad_norm": 1.3254144191741943, "learning_rate": 4.628489631454548e-05, "loss": 0.1862, "step": 15700 }, { "epoch": 1.2950900702987698, "grad_norm": 2.9795165061950684, "learning_rate": 4.6269797929140554e-05, "loss": 0.1836, "step": 15720 }, { "epoch": 1.29673769771529, "grad_norm": 0.510050356388092, "learning_rate": 4.625467139857323e-05, "loss": 0.194, "step": 15740 }, { "epoch": 1.2983853251318103, "grad_norm": 1.456040859222412, "learning_rate": 4.6239516742859625e-05, "loss": 0.1679, "step": 15760 }, { "epoch": 1.3000329525483303, "grad_norm": 0.7667500376701355, "learning_rate": 4.622433398205305e-05, "loss": 0.1955, "step": 15780 }, { "epoch": 1.3016805799648505, "grad_norm": 5.522034168243408, "learning_rate": 4.620912313624404e-05, "loss": 0.1766, "step": 15800 }, { "epoch": 1.3033282073813708, "grad_norm": 1.5902382135391235, "learning_rate": 4.619388422556026e-05, "loss": 0.1843, "step": 15820 }, { "epoch": 1.304975834797891, "grad_norm": 0.3163457214832306, "learning_rate": 4.617861727016655e-05, "loss": 0.1876, "step": 15840 }, { "epoch": 1.3066234622144113, "grad_norm": 1.0746263265609741, "learning_rate": 4.616332229026482e-05, "loss": 0.1727, "step": 15860 }, { "epoch": 1.3082710896309315, "grad_norm": 7.499575614929199, "learning_rate": 4.6147999306094095e-05, "loss": 0.191, "step": 15880 }, { "epoch": 1.3099187170474518, "grad_norm": 1.3614699840545654, "learning_rate": 4.613264833793044e-05, "loss": 0.1979, "step": 15900 }, { "epoch": 1.3115663444639718, "grad_norm": 2.371039390563965, "learning_rate": 4.611726940608694e-05, "loss": 0.1868, "step": 15920 }, { "epoch": 1.313213971880492, "grad_norm": 0.4890645444393158, "learning_rate": 4.6101862530913715e-05, "loss": 0.181, "step": 15940 }, { "epoch": 1.3148615992970123, "grad_norm": 1.2924984693527222, "learning_rate": 4.6086427732797836e-05, "loss": 0.1826, "step": 15960 }, { "epoch": 1.3165092267135325, "grad_norm": 0.8311454653739929, "learning_rate": 4.6070965032163316e-05, "loss": 0.1757, "step": 15980 }, { "epoch": 1.3181568541300528, "grad_norm": 4.735937595367432, "learning_rate": 4.605547444947112e-05, "loss": 0.1934, "step": 16000 }, { "epoch": 1.3181568541300528, "eval_loss": 0.5345502495765686, "eval_runtime": 711.0231, "eval_samples_per_second": 30.645, "eval_steps_per_second": 7.662, "eval_wer": 0.23376714265318863, "step": 16000 }, { "epoch": 1.3198044815465728, "grad_norm": 0.9712895750999451, "learning_rate": 4.603995600521908e-05, "loss": 0.1792, "step": 16020 }, { "epoch": 1.321452108963093, "grad_norm": 0.3473972976207733, "learning_rate": 4.6024409719941905e-05, "loss": 0.1926, "step": 16040 }, { "epoch": 1.3230997363796133, "grad_norm": 1.2832224369049072, "learning_rate": 4.600883561421116e-05, "loss": 0.1696, "step": 16060 }, { "epoch": 1.3247473637961336, "grad_norm": 1.0073579549789429, "learning_rate": 4.599401446384175e-05, "loss": 0.1816, "step": 16080 }, { "epoch": 1.3263949912126538, "grad_norm": 1.0329769849777222, "learning_rate": 4.597838616753484e-05, "loss": 0.1834, "step": 16100 }, { "epoch": 1.328042618629174, "grad_norm": 2.175204038619995, "learning_rate": 4.596273011167479e-05, "loss": 0.1936, "step": 16120 }, { "epoch": 1.3296902460456943, "grad_norm": 0.29955798387527466, "learning_rate": 4.594704631697842e-05, "loss": 0.1735, "step": 16140 }, { "epoch": 1.3313378734622145, "grad_norm": 5.876577377319336, "learning_rate": 4.593133480419924e-05, "loss": 0.1642, "step": 16160 }, { "epoch": 1.3329855008787346, "grad_norm": 1.0501422882080078, "learning_rate": 4.591559559412743e-05, "loss": 0.2176, "step": 16180 }, { "epoch": 1.3346331282952548, "grad_norm": 2.9819207191467285, "learning_rate": 4.5899828707589844e-05, "loss": 0.1841, "step": 16200 }, { "epoch": 1.336280755711775, "grad_norm": 0.8568416237831116, "learning_rate": 4.588403416544992e-05, "loss": 0.1916, "step": 16220 }, { "epoch": 1.3379283831282953, "grad_norm": 1.058608889579773, "learning_rate": 4.586821198860772e-05, "loss": 0.1881, "step": 16240 }, { "epoch": 1.3395760105448153, "grad_norm": 1.0041903257369995, "learning_rate": 4.585236219799988e-05, "loss": 0.1696, "step": 16260 }, { "epoch": 1.3412236379613356, "grad_norm": 1.351030945777893, "learning_rate": 4.5836484814599544e-05, "loss": 0.183, "step": 16280 }, { "epoch": 1.3428712653778558, "grad_norm": 0.7650487422943115, "learning_rate": 4.58205798594164e-05, "loss": 0.1775, "step": 16300 }, { "epoch": 1.344518892794376, "grad_norm": 1.0618997812271118, "learning_rate": 4.5804647353496604e-05, "loss": 0.1846, "step": 16320 }, { "epoch": 1.3461665202108963, "grad_norm": 1.2706764936447144, "learning_rate": 4.578868731792277e-05, "loss": 0.1994, "step": 16340 }, { "epoch": 1.3478141476274166, "grad_norm": 0.9554028511047363, "learning_rate": 4.5772699773813936e-05, "loss": 0.1564, "step": 16360 }, { "epoch": 1.3494617750439368, "grad_norm": 0.8473567366600037, "learning_rate": 4.575668474232556e-05, "loss": 0.1924, "step": 16380 }, { "epoch": 1.351109402460457, "grad_norm": 1.2312932014465332, "learning_rate": 4.574064224464944e-05, "loss": 0.194, "step": 16400 }, { "epoch": 1.352757029876977, "grad_norm": 0.5687373280525208, "learning_rate": 4.5724572302013754e-05, "loss": 0.1828, "step": 16420 }, { "epoch": 1.3544046572934973, "grad_norm": 0.2973005473613739, "learning_rate": 4.570847493568297e-05, "loss": 0.1774, "step": 16440 }, { "epoch": 1.3560522847100176, "grad_norm": 22.61744499206543, "learning_rate": 4.5692350166957864e-05, "loss": 0.16, "step": 16460 }, { "epoch": 1.3576999121265378, "grad_norm": 0.5010253190994263, "learning_rate": 4.567619801717545e-05, "loss": 0.1842, "step": 16480 }, { "epoch": 1.3593475395430579, "grad_norm": 1.405232548713684, "learning_rate": 4.566001850770899e-05, "loss": 0.194, "step": 16500 }, { "epoch": 1.3609951669595781, "grad_norm": 0.8372935652732849, "learning_rate": 4.564381165996796e-05, "loss": 0.1866, "step": 16520 }, { "epoch": 1.3626427943760984, "grad_norm": 0.2158607393503189, "learning_rate": 4.5627577495397974e-05, "loss": 0.1802, "step": 16540 }, { "epoch": 1.3642904217926186, "grad_norm": 7.98114013671875, "learning_rate": 4.561131603548082e-05, "loss": 0.1832, "step": 16560 }, { "epoch": 1.3659380492091389, "grad_norm": 0.6528833508491516, "learning_rate": 4.5595027301734413e-05, "loss": 0.1919, "step": 16580 }, { "epoch": 1.367585676625659, "grad_norm": 0.5290044546127319, "learning_rate": 4.557871131571274e-05, "loss": 0.1693, "step": 16600 }, { "epoch": 1.3692333040421794, "grad_norm": 1.5808736085891724, "learning_rate": 4.5562368099005854e-05, "loss": 0.1813, "step": 16620 }, { "epoch": 1.3708809314586996, "grad_norm": 0.17612561583518982, "learning_rate": 4.554599767323984e-05, "loss": 0.1879, "step": 16640 }, { "epoch": 1.3725285588752196, "grad_norm": 1.369644284248352, "learning_rate": 4.552960006007678e-05, "loss": 0.1602, "step": 16660 }, { "epoch": 1.3741761862917399, "grad_norm": 1.1678745746612549, "learning_rate": 4.551317528121476e-05, "loss": 0.1897, "step": 16680 }, { "epoch": 1.3758238137082601, "grad_norm": 0.5510971546173096, "learning_rate": 4.549672335838777e-05, "loss": 0.196, "step": 16700 }, { "epoch": 1.3774714411247804, "grad_norm": 0.9784049391746521, "learning_rate": 4.548024431336576e-05, "loss": 0.1865, "step": 16720 }, { "epoch": 1.3791190685413004, "grad_norm": 0.2733114957809448, "learning_rate": 4.546373816795455e-05, "loss": 0.1811, "step": 16740 }, { "epoch": 1.3807666959578206, "grad_norm": 1.3034899234771729, "learning_rate": 4.5447204943995816e-05, "loss": 0.1528, "step": 16760 }, { "epoch": 1.382414323374341, "grad_norm": 1.3286523818969727, "learning_rate": 4.543064466336706e-05, "loss": 0.1832, "step": 16780 }, { "epoch": 1.3840619507908611, "grad_norm": 0.747806191444397, "learning_rate": 4.541405734798161e-05, "loss": 0.1672, "step": 16800 }, { "epoch": 1.3857095782073814, "grad_norm": 0.5867149233818054, "learning_rate": 4.539744301978856e-05, "loss": 0.1814, "step": 16820 }, { "epoch": 1.3873572056239016, "grad_norm": 0.43317413330078125, "learning_rate": 4.5380801700772726e-05, "loss": 0.1825, "step": 16840 }, { "epoch": 1.3890048330404219, "grad_norm": 1.011514663696289, "learning_rate": 4.536413341295467e-05, "loss": 0.1553, "step": 16860 }, { "epoch": 1.3906524604569421, "grad_norm": 0.7447202205657959, "learning_rate": 4.5347438178390624e-05, "loss": 0.1832, "step": 16880 }, { "epoch": 1.3923000878734622, "grad_norm": 4.756796360015869, "learning_rate": 4.533071601917248e-05, "loss": 0.1923, "step": 16900 }, { "epoch": 1.3939477152899824, "grad_norm": 1.5507986545562744, "learning_rate": 4.5313966957427755e-05, "loss": 0.1754, "step": 16920 }, { "epoch": 1.3955953427065027, "grad_norm": 0.557498037815094, "learning_rate": 4.529719101531957e-05, "loss": 0.178, "step": 16940 }, { "epoch": 1.397242970123023, "grad_norm": 1.611181378364563, "learning_rate": 4.5280388215046624e-05, "loss": 0.1557, "step": 16960 }, { "epoch": 1.398890597539543, "grad_norm": 0.6798775792121887, "learning_rate": 4.5263558578843136e-05, "loss": 0.1741, "step": 16980 }, { "epoch": 1.4005382249560632, "grad_norm": 10.896245956420898, "learning_rate": 4.524670212897884e-05, "loss": 0.2016, "step": 17000 }, { "epoch": 1.4021858523725834, "grad_norm": 1.8802106380462646, "learning_rate": 4.5229818887758957e-05, "loss": 0.177, "step": 17020 }, { "epoch": 1.4038334797891037, "grad_norm": 0.2604859471321106, "learning_rate": 4.5212908877524165e-05, "loss": 0.1791, "step": 17040 }, { "epoch": 1.405481107205624, "grad_norm": 1.834112524986267, "learning_rate": 4.519597212065056e-05, "loss": 0.1664, "step": 17060 }, { "epoch": 1.4071287346221442, "grad_norm": 0.9913097620010376, "learning_rate": 4.517900863954962e-05, "loss": 0.1955, "step": 17080 }, { "epoch": 1.4087763620386644, "grad_norm": 2.1117942333221436, "learning_rate": 4.51620184566682e-05, "loss": 0.1798, "step": 17100 }, { "epoch": 1.4104239894551847, "grad_norm": 5.098875999450684, "learning_rate": 4.5145001594488466e-05, "loss": 0.1908, "step": 17120 }, { "epoch": 1.4120716168717047, "grad_norm": 0.7090994715690613, "learning_rate": 4.512795807552792e-05, "loss": 0.1727, "step": 17140 }, { "epoch": 1.413719244288225, "grad_norm": 3.752267599105835, "learning_rate": 4.511088792233932e-05, "loss": 0.1542, "step": 17160 }, { "epoch": 1.4153668717047452, "grad_norm": 1.5449800491333008, "learning_rate": 4.5093791157510656e-05, "loss": 0.1651, "step": 17180 }, { "epoch": 1.4170144991212654, "grad_norm": 0.9400858283042908, "learning_rate": 4.507666780366514e-05, "loss": 0.1844, "step": 17200 }, { "epoch": 1.4186621265377855, "grad_norm": 0.37838345766067505, "learning_rate": 4.5059517883461196e-05, "loss": 0.1733, "step": 17220 }, { "epoch": 1.4203097539543057, "grad_norm": 0.239340141415596, "learning_rate": 4.50432008728471e-05, "loss": 0.1855, "step": 17240 }, { "epoch": 1.421957381370826, "grad_norm": 1.4485529661178589, "learning_rate": 4.502599921354852e-05, "loss": 0.1515, "step": 17260 }, { "epoch": 1.4236050087873462, "grad_norm": 0.6260993480682373, "learning_rate": 4.500877105493846e-05, "loss": 0.1781, "step": 17280 }, { "epoch": 1.4252526362038664, "grad_norm": 2.114168882369995, "learning_rate": 4.499151641981402e-05, "loss": 0.1732, "step": 17300 }, { "epoch": 1.4269002636203867, "grad_norm": 0.7402538061141968, "learning_rate": 4.497423533100732e-05, "loss": 0.1769, "step": 17320 }, { "epoch": 1.428547891036907, "grad_norm": 0.7155308723449707, "learning_rate": 4.4956927811385475e-05, "loss": 0.1649, "step": 17340 }, { "epoch": 1.4301955184534272, "grad_norm": 0.8413863182067871, "learning_rate": 4.4939593883850576e-05, "loss": 0.1563, "step": 17360 }, { "epoch": 1.4318431458699472, "grad_norm": 2.0796799659729004, "learning_rate": 4.492223357133967e-05, "loss": 0.1797, "step": 17380 }, { "epoch": 1.4334907732864675, "grad_norm": 1.1681219339370728, "learning_rate": 4.490484689682471e-05, "loss": 0.1819, "step": 17400 }, { "epoch": 1.4351384007029877, "grad_norm": 1.0417784452438354, "learning_rate": 4.488743388331254e-05, "loss": 0.1861, "step": 17420 }, { "epoch": 1.436786028119508, "grad_norm": 0.17414021492004395, "learning_rate": 4.486999455384483e-05, "loss": 0.1734, "step": 17440 }, { "epoch": 1.4384336555360282, "grad_norm": 1.723633885383606, "learning_rate": 4.485252893149812e-05, "loss": 0.1629, "step": 17460 }, { "epoch": 1.4400812829525482, "grad_norm": 0.5598846673965454, "learning_rate": 4.4835037039383686e-05, "loss": 0.1749, "step": 17480 }, { "epoch": 1.4417289103690685, "grad_norm": 0.617661714553833, "learning_rate": 4.481751890064761e-05, "loss": 0.1734, "step": 17500 }, { "epoch": 1.4433765377855887, "grad_norm": 0.4917828440666199, "learning_rate": 4.479997453847069e-05, "loss": 0.1724, "step": 17520 }, { "epoch": 1.445024165202109, "grad_norm": 0.27072271704673767, "learning_rate": 4.4782403976068417e-05, "loss": 0.1664, "step": 17540 }, { "epoch": 1.4466717926186292, "grad_norm": 1.5086781978607178, "learning_rate": 4.476480723669095e-05, "loss": 0.1614, "step": 17560 }, { "epoch": 1.4483194200351495, "grad_norm": 1.092875599861145, "learning_rate": 4.4747184343623113e-05, "loss": 0.1827, "step": 17580 }, { "epoch": 1.4499670474516697, "grad_norm": 0.9140943288803101, "learning_rate": 4.4729535320184304e-05, "loss": 0.1865, "step": 17600 }, { "epoch": 1.4516146748681897, "grad_norm": 0.9031366109848022, "learning_rate": 4.471186018972852e-05, "loss": 0.1767, "step": 17620 }, { "epoch": 1.45326230228471, "grad_norm": 0.4150620102882385, "learning_rate": 4.46941589756443e-05, "loss": 0.1814, "step": 17640 }, { "epoch": 1.4549099297012302, "grad_norm": 2.178504228591919, "learning_rate": 4.467643170135467e-05, "loss": 0.1608, "step": 17660 }, { "epoch": 1.4565575571177505, "grad_norm": 1.05979323387146, "learning_rate": 4.4658678390317186e-05, "loss": 0.175, "step": 17680 }, { "epoch": 1.4582051845342707, "grad_norm": 1.3193062543869019, "learning_rate": 4.464089906602383e-05, "loss": 0.2092, "step": 17700 }, { "epoch": 1.4598528119507908, "grad_norm": 0.6934455037117004, "learning_rate": 4.462309375200101e-05, "loss": 0.1695, "step": 17720 }, { "epoch": 1.461500439367311, "grad_norm": 0.34769323468208313, "learning_rate": 4.460526247180953e-05, "loss": 0.1844, "step": 17740 }, { "epoch": 1.4631480667838312, "grad_norm": 2.567417621612549, "learning_rate": 4.4587405249044546e-05, "loss": 0.1657, "step": 17760 }, { "epoch": 1.4647956942003515, "grad_norm": 0.7568791508674622, "learning_rate": 4.4569522107335535e-05, "loss": 0.1794, "step": 17780 }, { "epoch": 1.4664433216168717, "grad_norm": 0.6661089062690735, "learning_rate": 4.455161307034631e-05, "loss": 0.1812, "step": 17800 }, { "epoch": 1.468090949033392, "grad_norm": 0.6958649754524231, "learning_rate": 4.4533678161774894e-05, "loss": 0.1714, "step": 17820 }, { "epoch": 1.4697385764499122, "grad_norm": 0.3765020966529846, "learning_rate": 4.45157174053536e-05, "loss": 0.176, "step": 17840 }, { "epoch": 1.4713862038664323, "grad_norm": 1.378657579421997, "learning_rate": 4.4497730824848885e-05, "loss": 0.1645, "step": 17860 }, { "epoch": 1.4730338312829525, "grad_norm": 0.8017684817314148, "learning_rate": 4.447971844406143e-05, "loss": 0.1795, "step": 17880 }, { "epoch": 1.4746814586994728, "grad_norm": 1.0388306379318237, "learning_rate": 4.4461680286826024e-05, "loss": 0.1941, "step": 17900 }, { "epoch": 1.476329086115993, "grad_norm": 0.5598440766334534, "learning_rate": 4.4443616377011586e-05, "loss": 0.1879, "step": 17920 }, { "epoch": 1.4779767135325133, "grad_norm": 0.18948721885681152, "learning_rate": 4.4425526738521096e-05, "loss": 0.1837, "step": 17940 }, { "epoch": 1.4796243409490333, "grad_norm": 1.4350675344467163, "learning_rate": 4.440741139529159e-05, "loss": 0.1578, "step": 17960 }, { "epoch": 1.4812719683655535, "grad_norm": 0.7614567279815674, "learning_rate": 4.4389270371294104e-05, "loss": 0.1878, "step": 17980 }, { "epoch": 1.4829195957820738, "grad_norm": 0.5294657945632935, "learning_rate": 4.437110369053368e-05, "loss": 0.1792, "step": 18000 }, { "epoch": 1.4829195957820738, "eval_loss": 0.539612352848053, "eval_runtime": 824.8651, "eval_samples_per_second": 26.415, "eval_steps_per_second": 6.605, "eval_wer": 0.2317739899166627, "step": 18000 }, { "epoch": 1.484567223198594, "grad_norm": 0.9461752772331238, "learning_rate": 4.4352911377049266e-05, "loss": 0.1759, "step": 18020 }, { "epoch": 1.4862148506151143, "grad_norm": 0.35723596811294556, "learning_rate": 4.433469345491379e-05, "loss": 0.1883, "step": 18040 }, { "epoch": 1.4878624780316345, "grad_norm": 1.2138187885284424, "learning_rate": 4.4316449948234007e-05, "loss": 0.1486, "step": 18060 }, { "epoch": 1.4895101054481548, "grad_norm": 0.9856829643249512, "learning_rate": 4.4298180881150575e-05, "loss": 0.1985, "step": 18080 }, { "epoch": 1.4911577328646748, "grad_norm": 0.697496771812439, "learning_rate": 4.4279886277837945e-05, "loss": 0.1799, "step": 18100 }, { "epoch": 1.492805360281195, "grad_norm": 0.8957895040512085, "learning_rate": 4.426156616250436e-05, "loss": 0.1755, "step": 18120 }, { "epoch": 1.4944529876977153, "grad_norm": 0.3005205988883972, "learning_rate": 4.424322055939183e-05, "loss": 0.1754, "step": 18140 }, { "epoch": 1.4961006151142355, "grad_norm": 0.5195316672325134, "learning_rate": 4.42248494927761e-05, "loss": 0.1566, "step": 18160 }, { "epoch": 1.4977482425307558, "grad_norm": 1.898149013519287, "learning_rate": 4.42064529869666e-05, "loss": 0.177, "step": 18180 }, { "epoch": 1.4993958699472758, "grad_norm": 2.6191961765289307, "learning_rate": 4.4188031066306406e-05, "loss": 0.1794, "step": 18200 }, { "epoch": 1.501043497363796, "grad_norm": 0.5942957997322083, "learning_rate": 4.416958375517225e-05, "loss": 0.1805, "step": 18220 }, { "epoch": 1.5026911247803163, "grad_norm": 0.2722390294075012, "learning_rate": 4.415111107797445e-05, "loss": 0.1704, "step": 18240 }, { "epoch": 1.5043387521968365, "grad_norm": 1.5523074865341187, "learning_rate": 4.41326130591569e-05, "loss": 0.1683, "step": 18260 }, { "epoch": 1.5059863796133568, "grad_norm": 0.6534819006919861, "learning_rate": 4.4114089723197e-05, "loss": 0.1734, "step": 18280 }, { "epoch": 1.507634007029877, "grad_norm": 0.3791787922382355, "learning_rate": 4.409554109460568e-05, "loss": 0.1713, "step": 18300 }, { "epoch": 1.5092816344463973, "grad_norm": 0.8552284836769104, "learning_rate": 4.407696719792734e-05, "loss": 0.1816, "step": 18320 }, { "epoch": 1.5109292618629175, "grad_norm": 1.9957807064056396, "learning_rate": 4.405836805773979e-05, "loss": 0.1676, "step": 18340 }, { "epoch": 1.5125768892794376, "grad_norm": 3.502870559692383, "learning_rate": 4.4039743698654244e-05, "loss": 0.1557, "step": 18360 }, { "epoch": 1.5142245166959578, "grad_norm": 2.0844368934631348, "learning_rate": 4.402109414531532e-05, "loss": 0.1744, "step": 18380 }, { "epoch": 1.515872144112478, "grad_norm": 2.32145357131958, "learning_rate": 4.4002419422400954e-05, "loss": 0.1853, "step": 18400 }, { "epoch": 1.517519771528998, "grad_norm": 4.515986919403076, "learning_rate": 4.398371955462237e-05, "loss": 0.1737, "step": 18420 }, { "epoch": 1.5191673989455183, "grad_norm": 0.3069842457771301, "learning_rate": 4.396499456672409e-05, "loss": 0.1863, "step": 18440 }, { "epoch": 1.5208150263620386, "grad_norm": 2.3054933547973633, "learning_rate": 4.394624448348388e-05, "loss": 0.1503, "step": 18460 }, { "epoch": 1.5224626537785588, "grad_norm": 0.7396153211593628, "learning_rate": 4.392746932971269e-05, "loss": 0.1882, "step": 18480 }, { "epoch": 1.524110281195079, "grad_norm": 0.6996922492980957, "learning_rate": 4.390866913025466e-05, "loss": 0.1812, "step": 18500 }, { "epoch": 1.5257579086115993, "grad_norm": 0.8116775751113892, "learning_rate": 4.3889843909987064e-05, "loss": 0.182, "step": 18520 }, { "epoch": 1.5274055360281196, "grad_norm": 0.23936544358730316, "learning_rate": 4.387099369382029e-05, "loss": 0.1837, "step": 18540 }, { "epoch": 1.5290531634446398, "grad_norm": 0.7272351980209351, "learning_rate": 4.3852118506697795e-05, "loss": 0.1601, "step": 18560 }, { "epoch": 1.53070079086116, "grad_norm": 0.8234082460403442, "learning_rate": 4.3833218373596094e-05, "loss": 0.1628, "step": 18580 }, { "epoch": 1.53234841827768, "grad_norm": 0.8772994875907898, "learning_rate": 4.38142933195247e-05, "loss": 0.1683, "step": 18600 }, { "epoch": 1.5339960456942003, "grad_norm": 0.4307045340538025, "learning_rate": 4.379534336952609e-05, "loss": 0.1983, "step": 18620 }, { "epoch": 1.5356436731107206, "grad_norm": 0.24042223393917084, "learning_rate": 4.377636854867572e-05, "loss": 0.163, "step": 18640 }, { "epoch": 1.5372913005272406, "grad_norm": 1.3200021982192993, "learning_rate": 4.3757368882081905e-05, "loss": 0.1574, "step": 18660 }, { "epoch": 1.5389389279437609, "grad_norm": 0.6988914608955383, "learning_rate": 4.37383443948859e-05, "loss": 0.1656, "step": 18680 }, { "epoch": 1.540586555360281, "grad_norm": 0.46211034059524536, "learning_rate": 4.371929511226175e-05, "loss": 0.1843, "step": 18700 }, { "epoch": 1.5422341827768014, "grad_norm": 0.7007827758789062, "learning_rate": 4.370022105941633e-05, "loss": 0.1642, "step": 18720 }, { "epoch": 1.5438818101933216, "grad_norm": 0.3869803249835968, "learning_rate": 4.3681122261589305e-05, "loss": 0.1561, "step": 18740 }, { "epoch": 1.5455294376098418, "grad_norm": 0.945253312587738, "learning_rate": 4.366199874405306e-05, "loss": 0.161, "step": 18760 }, { "epoch": 1.547177065026362, "grad_norm": 1.2405415773391724, "learning_rate": 4.36428505321127e-05, "loss": 0.1671, "step": 18780 }, { "epoch": 1.5488246924428823, "grad_norm": 0.9262678027153015, "learning_rate": 4.3623677651106025e-05, "loss": 0.1819, "step": 18800 }, { "epoch": 1.5504723198594026, "grad_norm": 0.5617028474807739, "learning_rate": 4.360448012640345e-05, "loss": 0.1823, "step": 18820 }, { "epoch": 1.5521199472759226, "grad_norm": 0.24927659332752228, "learning_rate": 4.358525798340801e-05, "loss": 0.1802, "step": 18840 }, { "epoch": 1.5537675746924429, "grad_norm": 1.1216681003570557, "learning_rate": 4.356601124755532e-05, "loss": 0.142, "step": 18860 }, { "epoch": 1.5554152021089631, "grad_norm": 1.321250319480896, "learning_rate": 4.3546739944313544e-05, "loss": 0.1849, "step": 18880 }, { "epoch": 1.5570628295254831, "grad_norm": 1.2743161916732788, "learning_rate": 4.352744409918332e-05, "loss": 0.1802, "step": 18900 }, { "epoch": 1.5587104569420034, "grad_norm": 0.6473543047904968, "learning_rate": 4.350812373769781e-05, "loss": 0.1807, "step": 18920 }, { "epoch": 1.5603580843585236, "grad_norm": 0.6407447457313538, "learning_rate": 4.348877888542259e-05, "loss": 0.1825, "step": 18940 }, { "epoch": 1.5620057117750439, "grad_norm": 1.5329558849334717, "learning_rate": 4.346940956795564e-05, "loss": 0.1555, "step": 18960 }, { "epoch": 1.5636533391915641, "grad_norm": 0.9263864755630493, "learning_rate": 4.345001581092733e-05, "loss": 0.1847, "step": 18980 }, { "epoch": 1.5653009666080844, "grad_norm": 0.6662928462028503, "learning_rate": 4.343059764000035e-05, "loss": 0.1947, "step": 19000 }, { "epoch": 1.5669485940246046, "grad_norm": 3.533243417739868, "learning_rate": 4.3411155080869704e-05, "loss": 0.189, "step": 19020 }, { "epoch": 1.5685962214411249, "grad_norm": 0.34724509716033936, "learning_rate": 4.339168815926268e-05, "loss": 0.1751, "step": 19040 }, { "epoch": 1.5702438488576451, "grad_norm": 1.1154448986053467, "learning_rate": 4.337219690093879e-05, "loss": 0.1615, "step": 19060 }, { "epoch": 1.5718914762741654, "grad_norm": 2.445345163345337, "learning_rate": 4.335268133168974e-05, "loss": 0.1927, "step": 19080 }, { "epoch": 1.5735391036906854, "grad_norm": 1.5590624809265137, "learning_rate": 4.3333141477339426e-05, "loss": 0.1985, "step": 19100 }, { "epoch": 1.5751867311072056, "grad_norm": 1.7810287475585938, "learning_rate": 4.3313577363743874e-05, "loss": 0.1825, "step": 19120 }, { "epoch": 1.5768343585237259, "grad_norm": 0.18706871569156647, "learning_rate": 4.32939890167912e-05, "loss": 0.1884, "step": 19140 }, { "epoch": 1.578481985940246, "grad_norm": 5.887953281402588, "learning_rate": 4.327437646240161e-05, "loss": 0.1584, "step": 19160 }, { "epoch": 1.5801296133567662, "grad_norm": 0.7244459390640259, "learning_rate": 4.32547397265273e-05, "loss": 0.1821, "step": 19180 }, { "epoch": 1.5817772407732864, "grad_norm": 0.7429137229919434, "learning_rate": 4.323507883515252e-05, "loss": 0.18, "step": 19200 }, { "epoch": 1.5834248681898067, "grad_norm": 0.573156476020813, "learning_rate": 4.321539381429343e-05, "loss": 0.1777, "step": 19220 }, { "epoch": 1.585072495606327, "grad_norm": 0.19367003440856934, "learning_rate": 4.3195684689998155e-05, "loss": 0.1765, "step": 19240 }, { "epoch": 1.5867201230228472, "grad_norm": 1.1158256530761719, "learning_rate": 4.31759514883467e-05, "loss": 0.1667, "step": 19260 }, { "epoch": 1.5883677504393674, "grad_norm": 0.7506932616233826, "learning_rate": 4.3156194235450936e-05, "loss": 0.1657, "step": 19280 }, { "epoch": 1.5900153778558876, "grad_norm": 4.906134605407715, "learning_rate": 4.313641295745455e-05, "loss": 0.1837, "step": 19300 }, { "epoch": 1.591663005272408, "grad_norm": 2.754065990447998, "learning_rate": 4.311660768053304e-05, "loss": 0.1786, "step": 19320 }, { "epoch": 1.593310632688928, "grad_norm": 0.19174990057945251, "learning_rate": 4.309677843089364e-05, "loss": 0.1813, "step": 19340 }, { "epoch": 1.5949582601054482, "grad_norm": 4.260095119476318, "learning_rate": 4.307692523477531e-05, "loss": 0.1542, "step": 19360 }, { "epoch": 1.5966058875219684, "grad_norm": 1.4479669332504272, "learning_rate": 4.305704811844871e-05, "loss": 0.1803, "step": 19380 }, { "epoch": 1.5982535149384884, "grad_norm": 1.0454448461532593, "learning_rate": 4.303714710821613e-05, "loss": 0.1877, "step": 19400 }, { "epoch": 1.5999011423550087, "grad_norm": 1.397001028060913, "learning_rate": 4.301722223041151e-05, "loss": 0.182, "step": 19420 }, { "epoch": 1.601548769771529, "grad_norm": 0.5425103902816772, "learning_rate": 4.299827151317217e-05, "loss": 0.1793, "step": 19440 }, { "epoch": 1.6031963971880492, "grad_norm": 1.6720526218414307, "learning_rate": 4.297830016946455e-05, "loss": 0.1804, "step": 19460 }, { "epoch": 1.6048440246045694, "grad_norm": 0.6204736232757568, "learning_rate": 4.295830503605383e-05, "loss": 0.1906, "step": 19480 }, { "epoch": 1.6064916520210897, "grad_norm": 2.814265727996826, "learning_rate": 4.293828613939845e-05, "loss": 0.1687, "step": 19500 }, { "epoch": 1.60813927943761, "grad_norm": 2.273043394088745, "learning_rate": 4.2918243505988354e-05, "loss": 0.1837, "step": 19520 }, { "epoch": 1.6097869068541302, "grad_norm": 1.504699945449829, "learning_rate": 4.289817716234485e-05, "loss": 0.1715, "step": 19540 }, { "epoch": 1.6114345342706504, "grad_norm": 2.6365151405334473, "learning_rate": 4.287808713502063e-05, "loss": 0.1655, "step": 19560 }, { "epoch": 1.6130821616871704, "grad_norm": 0.9557230472564697, "learning_rate": 4.2857973450599744e-05, "loss": 0.1628, "step": 19580 }, { "epoch": 1.6147297891036907, "grad_norm": 0.9700766801834106, "learning_rate": 4.283783613569752e-05, "loss": 0.1912, "step": 19600 }, { "epoch": 1.616377416520211, "grad_norm": 1.2748225927352905, "learning_rate": 4.281767521696057e-05, "loss": 0.1788, "step": 19620 }, { "epoch": 1.618025043936731, "grad_norm": 0.2875102460384369, "learning_rate": 4.279749072106675e-05, "loss": 0.1726, "step": 19640 }, { "epoch": 1.6196726713532512, "grad_norm": 2.1846017837524414, "learning_rate": 4.2777282674725086e-05, "loss": 0.1544, "step": 19660 }, { "epoch": 1.6213202987697715, "grad_norm": 1.5533403158187866, "learning_rate": 4.2757051104675786e-05, "loss": 0.1621, "step": 19680 }, { "epoch": 1.6229679261862917, "grad_norm": 6.069032669067383, "learning_rate": 4.273780934867807e-05, "loss": 0.1855, "step": 19700 }, { "epoch": 1.624615553602812, "grad_norm": 1.1619160175323486, "learning_rate": 4.271753198442825e-05, "loss": 0.1821, "step": 19720 }, { "epoch": 1.6262631810193322, "grad_norm": 0.8923582434654236, "learning_rate": 4.269723117553562e-05, "loss": 0.1861, "step": 19740 }, { "epoch": 1.6279108084358525, "grad_norm": 4.761159420013428, "learning_rate": 4.2676906948863134e-05, "loss": 0.147, "step": 19760 }, { "epoch": 1.6295584358523727, "grad_norm": 3.785452127456665, "learning_rate": 4.265655933130473e-05, "loss": 0.1789, "step": 19780 }, { "epoch": 1.631206063268893, "grad_norm": 7.1180806159973145, "learning_rate": 4.26361883497853e-05, "loss": 0.1867, "step": 19800 }, { "epoch": 1.632853690685413, "grad_norm": 0.7360396981239319, "learning_rate": 4.2615794031260655e-05, "loss": 0.1789, "step": 19820 }, { "epoch": 1.6345013181019332, "grad_norm": 0.38821306824684143, "learning_rate": 4.259537640271749e-05, "loss": 0.1824, "step": 19840 }, { "epoch": 1.6361489455184535, "grad_norm": 1.545905590057373, "learning_rate": 4.2574935491173325e-05, "loss": 0.1436, "step": 19860 }, { "epoch": 1.6377965729349735, "grad_norm": 2.462200880050659, "learning_rate": 4.2554471323676505e-05, "loss": 0.1876, "step": 19880 }, { "epoch": 1.6394442003514937, "grad_norm": 1.7179991006851196, "learning_rate": 4.253398392730614e-05, "loss": 0.1744, "step": 19900 }, { "epoch": 1.641091827768014, "grad_norm": 1.1609716415405273, "learning_rate": 4.2513473329172106e-05, "loss": 0.1865, "step": 19920 }, { "epoch": 1.6427394551845342, "grad_norm": 0.445671945810318, "learning_rate": 4.249293955641493e-05, "loss": 0.1842, "step": 19940 }, { "epoch": 1.6443870826010545, "grad_norm": 1.0819984674453735, "learning_rate": 4.2472382636205845e-05, "loss": 0.1583, "step": 19960 }, { "epoch": 1.6460347100175747, "grad_norm": 2.5188121795654297, "learning_rate": 4.24518025957467e-05, "loss": 0.1861, "step": 19980 }, { "epoch": 1.647682337434095, "grad_norm": 0.8718958497047424, "learning_rate": 4.243119946226996e-05, "loss": 0.1842, "step": 20000 }, { "epoch": 1.647682337434095, "eval_loss": 0.5287063121795654, "eval_runtime": 251.8074, "eval_samples_per_second": 86.53, "eval_steps_per_second": 21.636, "eval_wer": 0.23113573314148306, "step": 20000 }, { "epoch": 1.6493299648506152, "grad_norm": 1.2543200254440308, "learning_rate": 4.2410573263038583e-05, "loss": 0.1729, "step": 20020 }, { "epoch": 1.6509775922671355, "grad_norm": 0.2878034710884094, "learning_rate": 4.238992402534613e-05, "loss": 0.1789, "step": 20040 }, { "epoch": 1.6526252196836555, "grad_norm": 2.4613733291625977, "learning_rate": 4.236925177651658e-05, "loss": 0.1631, "step": 20060 }, { "epoch": 1.6542728471001757, "grad_norm": 0.9280514121055603, "learning_rate": 4.234855654390442e-05, "loss": 0.1741, "step": 20080 }, { "epoch": 1.655920474516696, "grad_norm": 1.553101897239685, "learning_rate": 4.232783835489451e-05, "loss": 0.1665, "step": 20100 }, { "epoch": 1.657568101933216, "grad_norm": 0.5047245621681213, "learning_rate": 4.23070972369021e-05, "loss": 0.1882, "step": 20120 }, { "epoch": 1.6592157293497363, "grad_norm": 0.32549336552619934, "learning_rate": 4.228633321737276e-05, "loss": 0.1716, "step": 20140 }, { "epoch": 1.6608633567662565, "grad_norm": 2.7314305305480957, "learning_rate": 4.226554632378241e-05, "loss": 0.1551, "step": 20160 }, { "epoch": 1.6625109841827768, "grad_norm": 1.0868523120880127, "learning_rate": 4.2244736583637214e-05, "loss": 0.1674, "step": 20180 }, { "epoch": 1.664158611599297, "grad_norm": 0.6131607294082642, "learning_rate": 4.222390402447353e-05, "loss": 0.1832, "step": 20200 }, { "epoch": 1.6658062390158173, "grad_norm": 0.8340827226638794, "learning_rate": 4.220304867385798e-05, "loss": 0.1695, "step": 20220 }, { "epoch": 1.6674538664323375, "grad_norm": 0.22361576557159424, "learning_rate": 4.21821705593873e-05, "loss": 0.1935, "step": 20240 }, { "epoch": 1.6691014938488578, "grad_norm": 1.1089187860488892, "learning_rate": 4.216126970868836e-05, "loss": 0.1536, "step": 20260 }, { "epoch": 1.670749121265378, "grad_norm": 0.504047691822052, "learning_rate": 4.214034614941811e-05, "loss": 0.1758, "step": 20280 }, { "epoch": 1.672396748681898, "grad_norm": 0.8742786049842834, "learning_rate": 4.211939990926355e-05, "loss": 0.1843, "step": 20300 }, { "epoch": 1.6740443760984183, "grad_norm": 0.856685221195221, "learning_rate": 4.209843101594171e-05, "loss": 0.1708, "step": 20320 }, { "epoch": 1.6756920035149385, "grad_norm": 0.20526465773582458, "learning_rate": 4.2077439497199576e-05, "loss": 0.1699, "step": 20340 }, { "epoch": 1.6773396309314585, "grad_norm": 0.9350545406341553, "learning_rate": 4.2056425380814074e-05, "loss": 0.1503, "step": 20360 }, { "epoch": 1.6789872583479788, "grad_norm": 2.845334529876709, "learning_rate": 4.203538869459204e-05, "loss": 0.1708, "step": 20380 }, { "epoch": 1.680634885764499, "grad_norm": 1.2838850021362305, "learning_rate": 4.2014329466370176e-05, "loss": 0.18, "step": 20400 }, { "epoch": 1.6822825131810193, "grad_norm": 1.8057399988174438, "learning_rate": 4.1993247724015e-05, "loss": 0.173, "step": 20420 }, { "epoch": 1.6839301405975395, "grad_norm": 0.3127855658531189, "learning_rate": 4.1972143495422844e-05, "loss": 0.1728, "step": 20440 }, { "epoch": 1.6855777680140598, "grad_norm": 2.700399398803711, "learning_rate": 4.195101680851977e-05, "loss": 0.1578, "step": 20460 }, { "epoch": 1.68722539543058, "grad_norm": 0.7250802516937256, "learning_rate": 4.192986769126158e-05, "loss": 0.1823, "step": 20480 }, { "epoch": 1.6888730228471003, "grad_norm": 1.1070244312286377, "learning_rate": 4.1908696171633734e-05, "loss": 0.189, "step": 20500 }, { "epoch": 1.6905206502636205, "grad_norm": 1.0246353149414062, "learning_rate": 4.188750227765135e-05, "loss": 0.1902, "step": 20520 }, { "epoch": 1.6921682776801406, "grad_norm": 0.6496437191963196, "learning_rate": 4.186628603735916e-05, "loss": 0.1747, "step": 20540 }, { "epoch": 1.6938159050966608, "grad_norm": 0.8663806319236755, "learning_rate": 4.184504747883145e-05, "loss": 0.1519, "step": 20560 }, { "epoch": 1.695463532513181, "grad_norm": 0.7036964893341064, "learning_rate": 4.182378663017205e-05, "loss": 0.1829, "step": 20580 }, { "epoch": 1.697111159929701, "grad_norm": 0.7158936858177185, "learning_rate": 4.180250351951427e-05, "loss": 0.1672, "step": 20600 }, { "epoch": 1.6987587873462213, "grad_norm": 0.5382769703865051, "learning_rate": 4.17811981750209e-05, "loss": 0.1804, "step": 20620 }, { "epoch": 1.7004064147627416, "grad_norm": 0.5654181838035583, "learning_rate": 4.175987062488413e-05, "loss": 0.1585, "step": 20640 }, { "epoch": 1.7020540421792618, "grad_norm": 1.4323805570602417, "learning_rate": 4.1738520897325544e-05, "loss": 0.1532, "step": 20660 }, { "epoch": 1.703701669595782, "grad_norm": 0.7951692938804626, "learning_rate": 4.171714902059608e-05, "loss": 0.1755, "step": 20680 }, { "epoch": 1.7053492970123023, "grad_norm": 0.7378861308097839, "learning_rate": 4.169575502297597e-05, "loss": 0.1814, "step": 20700 }, { "epoch": 1.7069969244288226, "grad_norm": 0.6902081966400146, "learning_rate": 4.1674338932774724e-05, "loss": 0.1639, "step": 20720 }, { "epoch": 1.7086445518453428, "grad_norm": 0.16693931818008423, "learning_rate": 4.1652900778331076e-05, "loss": 0.1788, "step": 20740 }, { "epoch": 1.710292179261863, "grad_norm": 2.9214699268341064, "learning_rate": 4.1631440588012974e-05, "loss": 0.1562, "step": 20760 }, { "epoch": 1.711939806678383, "grad_norm": 0.8093401193618774, "learning_rate": 4.1609958390217516e-05, "loss": 0.1714, "step": 20780 }, { "epoch": 1.7135874340949033, "grad_norm": 1.0199005603790283, "learning_rate": 4.158845421337093e-05, "loss": 0.1869, "step": 20800 }, { "epoch": 1.7152350615114236, "grad_norm": 0.5902179479598999, "learning_rate": 4.1566928085928504e-05, "loss": 0.1868, "step": 20820 }, { "epoch": 1.7168826889279436, "grad_norm": 0.30310845375061035, "learning_rate": 4.154538003637459e-05, "loss": 0.1851, "step": 20840 }, { "epoch": 1.7185303163444638, "grad_norm": 4.026705741882324, "learning_rate": 4.1523810093222547e-05, "loss": 0.1667, "step": 20860 }, { "epoch": 1.720177943760984, "grad_norm": 0.82432621717453, "learning_rate": 4.1502218285014714e-05, "loss": 0.1785, "step": 20880 }, { "epoch": 1.7218255711775043, "grad_norm": 1.0857402086257935, "learning_rate": 4.148060464032234e-05, "loss": 0.1867, "step": 20900 }, { "epoch": 1.7234731985940246, "grad_norm": 1.142042636871338, "learning_rate": 4.145896918774558e-05, "loss": 0.1744, "step": 20920 }, { "epoch": 1.7251208260105448, "grad_norm": 0.4160640835762024, "learning_rate": 4.143731195591345e-05, "loss": 0.1801, "step": 20940 }, { "epoch": 1.726768453427065, "grad_norm": 1.8969520330429077, "learning_rate": 4.141563297348378e-05, "loss": 0.1536, "step": 20960 }, { "epoch": 1.7284160808435853, "grad_norm": 0.7605903744697571, "learning_rate": 4.1393932269143206e-05, "loss": 0.1777, "step": 20980 }, { "epoch": 1.7300637082601056, "grad_norm": 1.4995478391647339, "learning_rate": 4.137220987160705e-05, "loss": 0.184, "step": 21000 }, { "epoch": 1.7317113356766256, "grad_norm": 1.7079527378082275, "learning_rate": 4.13504658096194e-05, "loss": 0.1799, "step": 21020 }, { "epoch": 1.7333589630931459, "grad_norm": 0.3677631616592407, "learning_rate": 4.132870011195298e-05, "loss": 0.1718, "step": 21040 }, { "epoch": 1.735006590509666, "grad_norm": 5.215697288513184, "learning_rate": 4.1306912807409134e-05, "loss": 0.1515, "step": 21060 }, { "epoch": 1.7366542179261861, "grad_norm": 2.6421728134155273, "learning_rate": 4.1285103924817836e-05, "loss": 0.1877, "step": 21080 }, { "epoch": 1.7383018453427064, "grad_norm": 2.3778457641601562, "learning_rate": 4.126327349303758e-05, "loss": 0.1818, "step": 21100 }, { "epoch": 1.7399494727592266, "grad_norm": 2.7478296756744385, "learning_rate": 4.124142154095539e-05, "loss": 0.1791, "step": 21120 }, { "epoch": 1.7415971001757469, "grad_norm": 0.5207391381263733, "learning_rate": 4.121954809748675e-05, "loss": 0.1644, "step": 21140 }, { "epoch": 1.7432447275922671, "grad_norm": 3.1791322231292725, "learning_rate": 4.119765319157559e-05, "loss": 0.1652, "step": 21160 }, { "epoch": 1.7448923550087874, "grad_norm": 4.9425764083862305, "learning_rate": 4.117573685219427e-05, "loss": 0.1841, "step": 21180 }, { "epoch": 1.7465399824253076, "grad_norm": 0.698239266872406, "learning_rate": 4.115379910834345e-05, "loss": 0.1775, "step": 21200 }, { "epoch": 1.7481876098418279, "grad_norm": 3.20509934425354, "learning_rate": 4.113183998905218e-05, "loss": 0.1703, "step": 21220 }, { "epoch": 1.749835237258348, "grad_norm": 0.2055424153804779, "learning_rate": 4.110985952337775e-05, "loss": 0.1679, "step": 21240 }, { "epoch": 1.7514828646748684, "grad_norm": 2.4991447925567627, "learning_rate": 4.108785774040571e-05, "loss": 0.1528, "step": 21260 }, { "epoch": 1.7531304920913884, "grad_norm": 1.0346381664276123, "learning_rate": 4.106583466924981e-05, "loss": 0.1735, "step": 21280 }, { "epoch": 1.7547781195079086, "grad_norm": 1.605350136756897, "learning_rate": 4.104379033905199e-05, "loss": 0.1765, "step": 21300 }, { "epoch": 1.7564257469244289, "grad_norm": 0.4309673309326172, "learning_rate": 4.102172477898232e-05, "loss": 0.1841, "step": 21320 }, { "epoch": 1.758073374340949, "grad_norm": 0.5647534132003784, "learning_rate": 4.099963801823892e-05, "loss": 0.1606, "step": 21340 }, { "epoch": 1.7597210017574691, "grad_norm": 2.7488176822662354, "learning_rate": 4.0977530086048025e-05, "loss": 0.1562, "step": 21360 }, { "epoch": 1.7613686291739894, "grad_norm": 0.6944543719291687, "learning_rate": 4.095540101166383e-05, "loss": 0.1757, "step": 21380 }, { "epoch": 1.7630162565905096, "grad_norm": 2.5942211151123047, "learning_rate": 4.093325082436854e-05, "loss": 0.1852, "step": 21400 }, { "epoch": 1.76466388400703, "grad_norm": 0.6985669732093811, "learning_rate": 4.09110795534723e-05, "loss": 0.1801, "step": 21420 }, { "epoch": 1.7663115114235501, "grad_norm": 0.5788651704788208, "learning_rate": 4.088888722831312e-05, "loss": 0.1841, "step": 21440 }, { "epoch": 1.7679591388400704, "grad_norm": 1.5852128267288208, "learning_rate": 4.0866673878256925e-05, "loss": 0.1632, "step": 21460 }, { "epoch": 1.7696067662565906, "grad_norm": 1.0001317262649536, "learning_rate": 4.0844439532697386e-05, "loss": 0.1828, "step": 21480 }, { "epoch": 1.7712543936731109, "grad_norm": 3.1319148540496826, "learning_rate": 4.0822184221056036e-05, "loss": 0.1797, "step": 21500 }, { "epoch": 1.772902021089631, "grad_norm": 1.2414968013763428, "learning_rate": 4.0799907972782105e-05, "loss": 0.172, "step": 21520 }, { "epoch": 1.7745496485061512, "grad_norm": 0.37049078941345215, "learning_rate": 4.077761081735252e-05, "loss": 0.1836, "step": 21540 }, { "epoch": 1.7761972759226714, "grad_norm": 2.273507595062256, "learning_rate": 4.0755292784271896e-05, "loss": 0.1663, "step": 21560 }, { "epoch": 1.7778449033391914, "grad_norm": 1.669340968132019, "learning_rate": 4.073295390307248e-05, "loss": 0.1789, "step": 21580 }, { "epoch": 1.7794925307557117, "grad_norm": 0.8448110222816467, "learning_rate": 4.071059420331408e-05, "loss": 0.1927, "step": 21600 }, { "epoch": 1.781140158172232, "grad_norm": 2.4922406673431396, "learning_rate": 4.0688213714584076e-05, "loss": 0.1852, "step": 21620 }, { "epoch": 1.7827877855887522, "grad_norm": 0.36666399240493774, "learning_rate": 4.066581246649734e-05, "loss": 0.1869, "step": 21640 }, { "epoch": 1.7844354130052724, "grad_norm": 1.3697789907455444, "learning_rate": 4.0643390488696225e-05, "loss": 0.159, "step": 21660 }, { "epoch": 1.7860830404217927, "grad_norm": 0.6621468663215637, "learning_rate": 4.062094781085051e-05, "loss": 0.1827, "step": 21680 }, { "epoch": 1.787730667838313, "grad_norm": 3.3689136505126953, "learning_rate": 4.059848446265736e-05, "loss": 0.1829, "step": 21700 }, { "epoch": 1.7893782952548332, "grad_norm": 1.2748357057571411, "learning_rate": 4.0576000473841295e-05, "loss": 0.1814, "step": 21720 }, { "epoch": 1.7910259226713534, "grad_norm": 0.7753732800483704, "learning_rate": 4.0553495874154167e-05, "loss": 0.1837, "step": 21740 }, { "epoch": 1.7926735500878734, "grad_norm": 4.699821472167969, "learning_rate": 4.053097069337507e-05, "loss": 0.1608, "step": 21760 }, { "epoch": 1.7943211775043937, "grad_norm": 0.6081763505935669, "learning_rate": 4.050842496131034e-05, "loss": 0.1802, "step": 21780 }, { "epoch": 1.795968804920914, "grad_norm": 1.5527398586273193, "learning_rate": 4.0485858707793534e-05, "loss": 0.186, "step": 21800 }, { "epoch": 1.797616432337434, "grad_norm": 0.7068532109260559, "learning_rate": 4.046327196268533e-05, "loss": 0.1653, "step": 21820 }, { "epoch": 1.7992640597539542, "grad_norm": 1.8269671201705933, "learning_rate": 4.044066475587353e-05, "loss": 0.1799, "step": 21840 }, { "epoch": 1.8009116871704745, "grad_norm": 3.3052139282226562, "learning_rate": 4.041803711727303e-05, "loss": 0.1502, "step": 21860 }, { "epoch": 1.8025593145869947, "grad_norm": 0.9614394307136536, "learning_rate": 4.039538907682574e-05, "loss": 0.1739, "step": 21880 }, { "epoch": 1.804206942003515, "grad_norm": 1.014877438545227, "learning_rate": 4.0372720664500575e-05, "loss": 0.1829, "step": 21900 }, { "epoch": 1.8058545694200352, "grad_norm": 1.322003960609436, "learning_rate": 4.035003191029341e-05, "loss": 0.1935, "step": 21920 }, { "epoch": 1.8075021968365554, "grad_norm": 0.6618562340736389, "learning_rate": 4.0328458779473224e-05, "loss": 0.1741, "step": 21940 }, { "epoch": 1.8091498242530757, "grad_norm": 1.402084469795227, "learning_rate": 4.0305730444973686e-05, "loss": 0.1621, "step": 21960 }, { "epoch": 1.810797451669596, "grad_norm": 1.3006558418273926, "learning_rate": 4.0282981857236654e-05, "loss": 0.1674, "step": 21980 }, { "epoch": 1.812445079086116, "grad_norm": 1.6449670791625977, "learning_rate": 4.026021304636408e-05, "loss": 0.1845, "step": 22000 }, { "epoch": 1.812445079086116, "eval_loss": 0.5395076274871826, "eval_runtime": 246.9697, "eval_samples_per_second": 88.225, "eval_steps_per_second": 22.059, "eval_wer": 0.2258113279380107, "step": 22000 }, { "epoch": 1.8140927065026362, "grad_norm": 0.5984182953834534, "learning_rate": 4.0237424042484694e-05, "loss": 0.18, "step": 22020 }, { "epoch": 1.8157403339191565, "grad_norm": 1.0855306386947632, "learning_rate": 4.0214614875753955e-05, "loss": 0.1651, "step": 22040 }, { "epoch": 1.8173879613356765, "grad_norm": 1.4949283599853516, "learning_rate": 4.0191785576353956e-05, "loss": 0.1563, "step": 22060 }, { "epoch": 1.8190355887521967, "grad_norm": 0.9411268830299377, "learning_rate": 4.016893617449349e-05, "loss": 0.1807, "step": 22080 }, { "epoch": 1.820683216168717, "grad_norm": 3.1195433139801025, "learning_rate": 4.014606670040791e-05, "loss": 0.1611, "step": 22100 }, { "epoch": 1.8223308435852372, "grad_norm": 1.749937653541565, "learning_rate": 4.012317718435915e-05, "loss": 0.1809, "step": 22120 }, { "epoch": 1.8239784710017575, "grad_norm": 0.41568905115127563, "learning_rate": 4.010026765663566e-05, "loss": 0.1754, "step": 22140 }, { "epoch": 1.8256260984182777, "grad_norm": 1.6782418489456177, "learning_rate": 4.007733814755235e-05, "loss": 0.1577, "step": 22160 }, { "epoch": 1.827273725834798, "grad_norm": 1.470269799232483, "learning_rate": 4.005438868745059e-05, "loss": 0.1804, "step": 22180 }, { "epoch": 1.8289213532513182, "grad_norm": 0.8983897566795349, "learning_rate": 4.003141930669818e-05, "loss": 0.1771, "step": 22200 }, { "epoch": 1.8305689806678385, "grad_norm": 0.6398609280586243, "learning_rate": 4.000843003568921e-05, "loss": 0.1937, "step": 22220 }, { "epoch": 1.8322166080843585, "grad_norm": 0.20628367364406586, "learning_rate": 3.998542090484414e-05, "loss": 0.1804, "step": 22240 }, { "epoch": 1.8338642355008787, "grad_norm": 1.640540361404419, "learning_rate": 3.9962391944609704e-05, "loss": 0.1576, "step": 22260 }, { "epoch": 1.835511862917399, "grad_norm": 0.5554460883140564, "learning_rate": 3.9939343185458856e-05, "loss": 0.1629, "step": 22280 }, { "epoch": 1.837159490333919, "grad_norm": 0.5767085552215576, "learning_rate": 3.991627465789076e-05, "loss": 0.1779, "step": 22300 }, { "epoch": 1.8388071177504393, "grad_norm": 0.3550276458263397, "learning_rate": 3.989318639243075e-05, "loss": 0.1812, "step": 22320 }, { "epoch": 1.8404547451669595, "grad_norm": 0.23671790957450867, "learning_rate": 3.9870078419630276e-05, "loss": 0.1717, "step": 22340 }, { "epoch": 1.8421023725834798, "grad_norm": 1.2688475847244263, "learning_rate": 3.984695077006683e-05, "loss": 0.1606, "step": 22360 }, { "epoch": 1.84375, "grad_norm": 2.13102388381958, "learning_rate": 3.982380347434398e-05, "loss": 0.1652, "step": 22380 }, { "epoch": 1.8453976274165202, "grad_norm": 0.6820785403251648, "learning_rate": 3.980063656309128e-05, "loss": 0.1832, "step": 22400 }, { "epoch": 1.8470452548330405, "grad_norm": 1.0797230005264282, "learning_rate": 3.9777450066964236e-05, "loss": 0.1673, "step": 22420 }, { "epoch": 1.8486928822495607, "grad_norm": 0.22749900817871094, "learning_rate": 3.9754244016644286e-05, "loss": 0.1935, "step": 22440 }, { "epoch": 1.850340509666081, "grad_norm": 21.42913246154785, "learning_rate": 3.973101844283871e-05, "loss": 0.1616, "step": 22460 }, { "epoch": 1.851988137082601, "grad_norm": 1.0109410285949707, "learning_rate": 3.970777337628066e-05, "loss": 0.1733, "step": 22480 }, { "epoch": 1.8536357644991213, "grad_norm": 1.8864178657531738, "learning_rate": 3.968450884772905e-05, "loss": 0.191, "step": 22500 }, { "epoch": 1.8552833919156415, "grad_norm": 1.0088257789611816, "learning_rate": 3.9661224887968564e-05, "loss": 0.179, "step": 22520 }, { "epoch": 1.8569310193321615, "grad_norm": 0.27392879128456116, "learning_rate": 3.9637921527809594e-05, "loss": 0.1883, "step": 22540 }, { "epoch": 1.8585786467486818, "grad_norm": 1.0473995208740234, "learning_rate": 3.9614598798088205e-05, "loss": 0.1568, "step": 22560 }, { "epoch": 1.860226274165202, "grad_norm": 2.516446352005005, "learning_rate": 3.959125672966609e-05, "loss": 0.179, "step": 22580 }, { "epoch": 1.8618739015817223, "grad_norm": 0.5339066982269287, "learning_rate": 3.956789535343054e-05, "loss": 0.1886, "step": 22600 }, { "epoch": 1.8635215289982425, "grad_norm": 3.4075698852539062, "learning_rate": 3.954451470029439e-05, "loss": 0.1723, "step": 22620 }, { "epoch": 1.8651691564147628, "grad_norm": 0.4262460172176361, "learning_rate": 3.952111480119597e-05, "loss": 0.1774, "step": 22640 }, { "epoch": 1.866816783831283, "grad_norm": 1.0418535470962524, "learning_rate": 3.94976956870991e-05, "loss": 0.1613, "step": 22660 }, { "epoch": 1.8684644112478033, "grad_norm": 1.1704654693603516, "learning_rate": 3.947425738899302e-05, "loss": 0.1879, "step": 22680 }, { "epoch": 1.8701120386643235, "grad_norm": 1.1728986501693726, "learning_rate": 3.9450799937892324e-05, "loss": 0.1655, "step": 22700 }, { "epoch": 1.8717596660808435, "grad_norm": 1.5375456809997559, "learning_rate": 3.9427323364837e-05, "loss": 0.1865, "step": 22720 }, { "epoch": 1.8734072934973638, "grad_norm": 0.16443948447704315, "learning_rate": 3.940382770089232e-05, "loss": 0.179, "step": 22740 }, { "epoch": 1.875054920913884, "grad_norm": 13.896778106689453, "learning_rate": 3.9380312977148795e-05, "loss": 0.1495, "step": 22760 }, { "epoch": 1.876702548330404, "grad_norm": 2.690234422683716, "learning_rate": 3.935677922472218e-05, "loss": 0.1715, "step": 22780 }, { "epoch": 1.8783501757469243, "grad_norm": 2.224005937576294, "learning_rate": 3.933322647475342e-05, "loss": 0.1753, "step": 22800 }, { "epoch": 1.8799978031634446, "grad_norm": 1.617680311203003, "learning_rate": 3.9309654758408556e-05, "loss": 0.1773, "step": 22820 }, { "epoch": 1.8816454305799648, "grad_norm": 0.17472465336322784, "learning_rate": 3.928606410687877e-05, "loss": 0.174, "step": 22840 }, { "epoch": 1.883293057996485, "grad_norm": 2.1024489402770996, "learning_rate": 3.926245455138029e-05, "loss": 0.1486, "step": 22860 }, { "epoch": 1.8849406854130053, "grad_norm": 3.535928726196289, "learning_rate": 3.923882612315434e-05, "loss": 0.1917, "step": 22880 }, { "epoch": 1.8865883128295255, "grad_norm": 1.4998210668563843, "learning_rate": 3.9215178853467135e-05, "loss": 0.1786, "step": 22900 }, { "epoch": 1.8882359402460458, "grad_norm": 1.4746694564819336, "learning_rate": 3.9191512773609806e-05, "loss": 0.1731, "step": 22920 }, { "epoch": 1.889883567662566, "grad_norm": 0.23350854218006134, "learning_rate": 3.91678279148984e-05, "loss": 0.1719, "step": 22940 }, { "epoch": 1.891531195079086, "grad_norm": 1.7336878776550293, "learning_rate": 3.9144124308673784e-05, "loss": 0.1495, "step": 22960 }, { "epoch": 1.8931788224956063, "grad_norm": 1.0310170650482178, "learning_rate": 3.912040198630166e-05, "loss": 0.1682, "step": 22980 }, { "epoch": 1.8948264499121266, "grad_norm": 0.8213064074516296, "learning_rate": 3.9096660979172464e-05, "loss": 0.1812, "step": 23000 }, { "epoch": 1.8964740773286466, "grad_norm": 0.4071774184703827, "learning_rate": 3.907290131870139e-05, "loss": 0.1697, "step": 23020 }, { "epoch": 1.8981217047451668, "grad_norm": 0.33335015177726746, "learning_rate": 3.9049123036328296e-05, "loss": 0.1645, "step": 23040 }, { "epoch": 1.899769332161687, "grad_norm": 1.0531188249588013, "learning_rate": 3.9025326163517676e-05, "loss": 0.1484, "step": 23060 }, { "epoch": 1.9014169595782073, "grad_norm": 1.8591293096542358, "learning_rate": 3.900151073175864e-05, "loss": 0.1911, "step": 23080 }, { "epoch": 1.9030645869947276, "grad_norm": 1.4219475984573364, "learning_rate": 3.897767677256485e-05, "loss": 0.1809, "step": 23100 }, { "epoch": 1.9047122144112478, "grad_norm": 1.1440541744232178, "learning_rate": 3.8955017379019396e-05, "loss": 0.174, "step": 23120 }, { "epoch": 1.906359841827768, "grad_norm": 0.2897416353225708, "learning_rate": 3.8931147382061806e-05, "loss": 0.176, "step": 23140 }, { "epoch": 1.9080074692442883, "grad_norm": 1.9240251779556274, "learning_rate": 3.890725895077744e-05, "loss": 0.1549, "step": 23160 }, { "epoch": 1.9096550966608086, "grad_norm": 0.8471350073814392, "learning_rate": 3.888335211677656e-05, "loss": 0.1813, "step": 23180 }, { "epoch": 1.9113027240773286, "grad_norm": 1.2786263227462769, "learning_rate": 3.8859426911693756e-05, "loss": 0.1754, "step": 23200 }, { "epoch": 1.9129503514938488, "grad_norm": 2.0185673236846924, "learning_rate": 3.8835483367187974e-05, "loss": 0.1832, "step": 23220 }, { "epoch": 1.914597978910369, "grad_norm": 0.2411476969718933, "learning_rate": 3.8811521514942374e-05, "loss": 0.1714, "step": 23240 }, { "epoch": 1.9162456063268891, "grad_norm": 1.0712230205535889, "learning_rate": 3.8787541386664374e-05, "loss": 0.1563, "step": 23260 }, { "epoch": 1.9178932337434094, "grad_norm": 0.9587976932525635, "learning_rate": 3.876354301408558e-05, "loss": 0.178, "step": 23280 }, { "epoch": 1.9195408611599296, "grad_norm": 1.298622965812683, "learning_rate": 3.873952642896171e-05, "loss": 0.1695, "step": 23300 }, { "epoch": 1.9211884885764499, "grad_norm": 4.192885398864746, "learning_rate": 3.871549166307261e-05, "loss": 0.1721, "step": 23320 }, { "epoch": 1.92283611599297, "grad_norm": 0.1895458996295929, "learning_rate": 3.869143874822218e-05, "loss": 0.1775, "step": 23340 }, { "epoch": 1.9244837434094904, "grad_norm": 1.721372127532959, "learning_rate": 3.8667367716238326e-05, "loss": 0.1597, "step": 23360 }, { "epoch": 1.9261313708260106, "grad_norm": 4.6658525466918945, "learning_rate": 3.864327859897291e-05, "loss": 0.1728, "step": 23380 }, { "epoch": 1.9277789982425309, "grad_norm": 0.3837490379810333, "learning_rate": 3.861917142830177e-05, "loss": 0.1788, "step": 23400 }, { "epoch": 1.929426625659051, "grad_norm": 0.4308772087097168, "learning_rate": 3.859504623612461e-05, "loss": 0.1747, "step": 23420 }, { "epoch": 1.9310742530755711, "grad_norm": 0.3830674886703491, "learning_rate": 3.857090305436496e-05, "loss": 0.1703, "step": 23440 }, { "epoch": 1.9327218804920914, "grad_norm": 0.9429712891578674, "learning_rate": 3.854674191497019e-05, "loss": 0.1552, "step": 23460 }, { "epoch": 1.9343695079086116, "grad_norm": 0.5584468245506287, "learning_rate": 3.85225628499114e-05, "loss": 0.1817, "step": 23480 }, { "epoch": 1.9360171353251316, "grad_norm": 1.4192808866500854, "learning_rate": 3.849836589118345e-05, "loss": 0.1776, "step": 23500 }, { "epoch": 1.937664762741652, "grad_norm": 0.453254371881485, "learning_rate": 3.847415107080485e-05, "loss": 0.1721, "step": 23520 }, { "epoch": 1.9393123901581721, "grad_norm": 0.37701448798179626, "learning_rate": 3.8449918420817734e-05, "loss": 0.1724, "step": 23540 }, { "epoch": 1.9409600175746924, "grad_norm": 0.9709746241569519, "learning_rate": 3.8425667973287847e-05, "loss": 0.1512, "step": 23560 }, { "epoch": 1.9426076449912126, "grad_norm": 3.227597951889038, "learning_rate": 3.8401399760304504e-05, "loss": 0.1702, "step": 23580 }, { "epoch": 1.9442552724077329, "grad_norm": 1.5262757539749146, "learning_rate": 3.837711381398048e-05, "loss": 0.1773, "step": 23600 }, { "epoch": 1.9459028998242531, "grad_norm": 0.9164407849311829, "learning_rate": 3.835281016645206e-05, "loss": 0.168, "step": 23620 }, { "epoch": 1.9475505272407734, "grad_norm": 0.3824036121368408, "learning_rate": 3.832848884987892e-05, "loss": 0.178, "step": 23640 }, { "epoch": 1.9491981546572936, "grad_norm": 0.8727893233299255, "learning_rate": 3.8304149896444145e-05, "loss": 0.1525, "step": 23660 }, { "epoch": 1.9508457820738139, "grad_norm": 1.189348578453064, "learning_rate": 3.827979333835413e-05, "loss": 0.1655, "step": 23680 }, { "epoch": 1.952493409490334, "grad_norm": 0.6089633107185364, "learning_rate": 3.825541920783858e-05, "loss": 0.179, "step": 23700 }, { "epoch": 1.9541410369068541, "grad_norm": 5.746613025665283, "learning_rate": 3.8231027537150464e-05, "loss": 0.1729, "step": 23720 }, { "epoch": 1.9557886643233744, "grad_norm": 0.3469756841659546, "learning_rate": 3.820661835856594e-05, "loss": 0.2064, "step": 23740 }, { "epoch": 1.9574362917398944, "grad_norm": 1.2260953187942505, "learning_rate": 3.818219170438434e-05, "loss": 0.1463, "step": 23760 }, { "epoch": 1.9590839191564147, "grad_norm": 0.5917554497718811, "learning_rate": 3.815774760692813e-05, "loss": 0.1633, "step": 23780 }, { "epoch": 1.960731546572935, "grad_norm": 0.7345995903015137, "learning_rate": 3.8133286098542835e-05, "loss": 0.1662, "step": 23800 }, { "epoch": 1.9623791739894552, "grad_norm": 0.6015161275863647, "learning_rate": 3.810880721159704e-05, "loss": 0.1782, "step": 23820 }, { "epoch": 1.9640268014059754, "grad_norm": 0.1269834190607071, "learning_rate": 3.808431097848233e-05, "loss": 0.177, "step": 23840 }, { "epoch": 1.9656744288224957, "grad_norm": 3.330901861190796, "learning_rate": 3.805979743161322e-05, "loss": 0.1551, "step": 23860 }, { "epoch": 1.967322056239016, "grad_norm": 1.1339266300201416, "learning_rate": 3.8035266603427144e-05, "loss": 0.1705, "step": 23880 }, { "epoch": 1.9689696836555362, "grad_norm": 0.560189962387085, "learning_rate": 3.801071852638443e-05, "loss": 0.1862, "step": 23900 }, { "epoch": 1.9706173110720564, "grad_norm": 0.4056016206741333, "learning_rate": 3.798615323296819e-05, "loss": 0.1811, "step": 23920 }, { "epoch": 1.9722649384885764, "grad_norm": 0.24118812382221222, "learning_rate": 3.796157075568433e-05, "loss": 0.1726, "step": 23940 }, { "epoch": 1.9739125659050967, "grad_norm": 1.474798560142517, "learning_rate": 3.793697112706153e-05, "loss": 0.1446, "step": 23960 }, { "epoch": 1.975560193321617, "grad_norm": 1.19919753074646, "learning_rate": 3.791235437965109e-05, "loss": 0.1664, "step": 23980 }, { "epoch": 1.977207820738137, "grad_norm": 0.38849976658821106, "learning_rate": 3.788772054602705e-05, "loss": 0.1769, "step": 24000 }, { "epoch": 1.977207820738137, "eval_loss": 0.5249012112617493, "eval_runtime": 254.2293, "eval_samples_per_second": 85.706, "eval_steps_per_second": 21.429, "eval_wer": 0.22245208175285466, "step": 24000 }, { "epoch": 1.9788554481546572, "grad_norm": 0.323760062456131, "learning_rate": 3.7863069658786e-05, "loss": 0.1633, "step": 24020 }, { "epoch": 1.9805030755711774, "grad_norm": 0.3223556876182556, "learning_rate": 3.7838401750547115e-05, "loss": 0.1713, "step": 24040 }, { "epoch": 1.9821507029876977, "grad_norm": 1.7246328592300415, "learning_rate": 3.7813716853952086e-05, "loss": 0.1459, "step": 24060 }, { "epoch": 1.983798330404218, "grad_norm": 0.7763593196868896, "learning_rate": 3.77890150016651e-05, "loss": 0.1636, "step": 24080 }, { "epoch": 1.9854459578207382, "grad_norm": 0.703742265701294, "learning_rate": 3.7764296226372756e-05, "loss": 0.1739, "step": 24100 }, { "epoch": 1.9870935852372584, "grad_norm": 0.5700841546058655, "learning_rate": 3.773956056078407e-05, "loss": 0.1784, "step": 24120 }, { "epoch": 1.9887412126537787, "grad_norm": 0.1853460967540741, "learning_rate": 3.7714808037630396e-05, "loss": 0.173, "step": 24140 }, { "epoch": 1.990388840070299, "grad_norm": 0.5716795921325684, "learning_rate": 3.76900386896654e-05, "loss": 0.1502, "step": 24160 }, { "epoch": 1.992036467486819, "grad_norm": 0.789965033531189, "learning_rate": 3.766525254966499e-05, "loss": 0.1886, "step": 24180 }, { "epoch": 1.9936840949033392, "grad_norm": 2.330145835876465, "learning_rate": 3.764044965042734e-05, "loss": 0.1851, "step": 24200 }, { "epoch": 1.9953317223198594, "grad_norm": 4.897230625152588, "learning_rate": 3.761563002477274e-05, "loss": 0.1855, "step": 24220 }, { "epoch": 1.9969793497363795, "grad_norm": 0.2082502692937851, "learning_rate": 3.759203591747027e-05, "loss": 0.1807, "step": 24240 }, { "epoch": 1.9986269771528997, "grad_norm": 1.0545985698699951, "learning_rate": 3.7567183769785896e-05, "loss": 0.1403, "step": 24260 }, { "epoch": 2.00027460456942, "grad_norm": 0.38820046186447144, "learning_rate": 3.754231499263332e-05, "loss": 0.1762, "step": 24280 }, { "epoch": 2.00192223198594, "grad_norm": 1.5918121337890625, "learning_rate": 3.751742961892006e-05, "loss": 0.154, "step": 24300 }, { "epoch": 2.0035698594024605, "grad_norm": 1.0402336120605469, "learning_rate": 3.749252768157555e-05, "loss": 0.1637, "step": 24320 }, { "epoch": 2.0052174868189807, "grad_norm": 0.6094750761985779, "learning_rate": 3.746760921355119e-05, "loss": 0.1579, "step": 24340 }, { "epoch": 2.006865114235501, "grad_norm": 0.7969854474067688, "learning_rate": 3.744267424782019e-05, "loss": 0.1586, "step": 24360 }, { "epoch": 2.008512741652021, "grad_norm": 0.7175785899162292, "learning_rate": 3.741772281737766e-05, "loss": 0.1691, "step": 24380 }, { "epoch": 2.0101603690685415, "grad_norm": 2.261885643005371, "learning_rate": 3.739275495524046e-05, "loss": 0.1655, "step": 24400 }, { "epoch": 2.0118079964850617, "grad_norm": 0.9677129983901978, "learning_rate": 3.736777069444719e-05, "loss": 0.1628, "step": 24420 }, { "epoch": 2.0134556239015815, "grad_norm": 0.5170992016792297, "learning_rate": 3.7342770068058164e-05, "loss": 0.1736, "step": 24440 }, { "epoch": 2.0151032513181018, "grad_norm": 1.3504503965377808, "learning_rate": 3.731775310915534e-05, "loss": 0.1666, "step": 24460 }, { "epoch": 2.016750878734622, "grad_norm": 0.5912557244300842, "learning_rate": 3.729271985084231e-05, "loss": 0.1591, "step": 24480 }, { "epoch": 2.0183985061511422, "grad_norm": 2.212615728378296, "learning_rate": 3.72676703262442e-05, "loss": 0.1522, "step": 24500 }, { "epoch": 2.0200461335676625, "grad_norm": 0.7549368143081665, "learning_rate": 3.724260456850769e-05, "loss": 0.162, "step": 24520 }, { "epoch": 2.0216937609841827, "grad_norm": 0.7109091281890869, "learning_rate": 3.721752261080093e-05, "loss": 0.1605, "step": 24540 }, { "epoch": 2.023341388400703, "grad_norm": 0.46456268429756165, "learning_rate": 3.719242448631349e-05, "loss": 0.1597, "step": 24560 }, { "epoch": 2.0249890158172232, "grad_norm": 0.6666226387023926, "learning_rate": 3.716731022825636e-05, "loss": 0.1527, "step": 24580 }, { "epoch": 2.0266366432337435, "grad_norm": 1.9217432737350464, "learning_rate": 3.714217986986187e-05, "loss": 0.1519, "step": 24600 }, { "epoch": 2.0282842706502637, "grad_norm": 0.7165806889533997, "learning_rate": 3.711703344438364e-05, "loss": 0.1475, "step": 24620 }, { "epoch": 2.029931898066784, "grad_norm": 3.279149055480957, "learning_rate": 3.7091870985096574e-05, "loss": 0.1805, "step": 24640 }, { "epoch": 2.0315795254833042, "grad_norm": 0.8710042238235474, "learning_rate": 3.7066692525296766e-05, "loss": 0.1779, "step": 24660 }, { "epoch": 2.0332271528998245, "grad_norm": 0.5144171714782715, "learning_rate": 3.704149809830151e-05, "loss": 0.1779, "step": 24680 }, { "epoch": 2.0348747803163443, "grad_norm": 3.780592441558838, "learning_rate": 3.701628773744921e-05, "loss": 0.1646, "step": 24700 }, { "epoch": 2.0365224077328645, "grad_norm": 2.9000730514526367, "learning_rate": 3.699106147609936e-05, "loss": 0.162, "step": 24720 }, { "epoch": 2.0381700351493848, "grad_norm": 0.6916472315788269, "learning_rate": 3.69658193476325e-05, "loss": 0.1755, "step": 24740 }, { "epoch": 2.039817662565905, "grad_norm": 1.2293349504470825, "learning_rate": 3.694056138545015e-05, "loss": 0.1679, "step": 24760 }, { "epoch": 2.0414652899824253, "grad_norm": 0.5820478200912476, "learning_rate": 3.691528762297481e-05, "loss": 0.174, "step": 24780 }, { "epoch": 2.0431129173989455, "grad_norm": 1.158288598060608, "learning_rate": 3.6889998093649855e-05, "loss": 0.159, "step": 24800 }, { "epoch": 2.0447605448154658, "grad_norm": 1.0274357795715332, "learning_rate": 3.686469283093954e-05, "loss": 0.1652, "step": 24820 }, { "epoch": 2.046408172231986, "grad_norm": 0.7709755897521973, "learning_rate": 3.683937186832894e-05, "loss": 0.1699, "step": 24840 }, { "epoch": 2.0480557996485063, "grad_norm": 1.5248311758041382, "learning_rate": 3.681403523932391e-05, "loss": 0.161, "step": 24860 }, { "epoch": 2.0497034270650265, "grad_norm": 0.45869287848472595, "learning_rate": 3.6788682977451016e-05, "loss": 0.1563, "step": 24880 }, { "epoch": 2.0513510544815468, "grad_norm": 1.448834776878357, "learning_rate": 3.6763315116257524e-05, "loss": 0.1497, "step": 24900 }, { "epoch": 2.052998681898067, "grad_norm": 0.9293612837791443, "learning_rate": 3.6737931689311344e-05, "loss": 0.1546, "step": 24920 }, { "epoch": 2.054646309314587, "grad_norm": 1.0912532806396484, "learning_rate": 3.671253273020098e-05, "loss": 0.1598, "step": 24940 }, { "epoch": 2.056293936731107, "grad_norm": 0.8708963990211487, "learning_rate": 3.668711827253548e-05, "loss": 0.1656, "step": 24960 }, { "epoch": 2.0579415641476273, "grad_norm": 0.5793485045433044, "learning_rate": 3.666168834994442e-05, "loss": 0.1746, "step": 24980 }, { "epoch": 2.0595891915641475, "grad_norm": 1.7373342514038086, "learning_rate": 3.663624299607782e-05, "loss": 0.1607, "step": 25000 }, { "epoch": 2.061236818980668, "grad_norm": 0.9185656309127808, "learning_rate": 3.661078224460613e-05, "loss": 0.1708, "step": 25020 }, { "epoch": 2.062884446397188, "grad_norm": 1.290109634399414, "learning_rate": 3.6585306129220175e-05, "loss": 0.1598, "step": 25040 }, { "epoch": 2.0645320738137083, "grad_norm": 1.8787568807601929, "learning_rate": 3.6559814683631096e-05, "loss": 0.1665, "step": 25060 }, { "epoch": 2.0661797012302285, "grad_norm": 0.3839080333709717, "learning_rate": 3.653430794157035e-05, "loss": 0.1739, "step": 25080 }, { "epoch": 2.067827328646749, "grad_norm": 1.4831798076629639, "learning_rate": 3.6508785936789604e-05, "loss": 0.157, "step": 25100 }, { "epoch": 2.069474956063269, "grad_norm": 2.0582001209259033, "learning_rate": 3.648324870306073e-05, "loss": 0.158, "step": 25120 }, { "epoch": 2.0711225834797893, "grad_norm": 9.493792533874512, "learning_rate": 3.645769627417577e-05, "loss": 0.1641, "step": 25140 }, { "epoch": 2.0727702108963095, "grad_norm": 0.920673668384552, "learning_rate": 3.643212868394684e-05, "loss": 0.161, "step": 25160 }, { "epoch": 2.0744178383128293, "grad_norm": 1.4923367500305176, "learning_rate": 3.6406545966206154e-05, "loss": 0.1646, "step": 25180 }, { "epoch": 2.0760654657293496, "grad_norm": 2.3396904468536377, "learning_rate": 3.638094815480592e-05, "loss": 0.1602, "step": 25200 }, { "epoch": 2.07771309314587, "grad_norm": 1.5938606262207031, "learning_rate": 3.635533528361832e-05, "loss": 0.1858, "step": 25220 }, { "epoch": 2.07936072056239, "grad_norm": 0.813642680644989, "learning_rate": 3.632970738653546e-05, "loss": 0.1671, "step": 25240 }, { "epoch": 2.0810083479789103, "grad_norm": 0.6709133982658386, "learning_rate": 3.630406449746935e-05, "loss": 0.1515, "step": 25260 }, { "epoch": 2.0826559753954306, "grad_norm": 0.36236572265625, "learning_rate": 3.6278406650351834e-05, "loss": 0.1755, "step": 25280 }, { "epoch": 2.084303602811951, "grad_norm": 1.213829517364502, "learning_rate": 3.6252733879134516e-05, "loss": 0.158, "step": 25300 }, { "epoch": 2.085951230228471, "grad_norm": 3.2344813346862793, "learning_rate": 3.622704621778879e-05, "loss": 0.1657, "step": 25320 }, { "epoch": 2.0875988576449913, "grad_norm": 2.512044668197632, "learning_rate": 3.6201343700305725e-05, "loss": 0.1677, "step": 25340 }, { "epoch": 2.0892464850615116, "grad_norm": 1.1710649728775024, "learning_rate": 3.6175626360696076e-05, "loss": 0.1755, "step": 25360 }, { "epoch": 2.090894112478032, "grad_norm": 0.6621778607368469, "learning_rate": 3.6149894232990176e-05, "loss": 0.1591, "step": 25380 }, { "epoch": 2.092541739894552, "grad_norm": 9.004714012145996, "learning_rate": 3.612414735123795e-05, "loss": 0.1559, "step": 25400 }, { "epoch": 2.094189367311072, "grad_norm": 6.198091983795166, "learning_rate": 3.609838574950885e-05, "loss": 0.1685, "step": 25420 }, { "epoch": 2.095836994727592, "grad_norm": 0.4765397012233734, "learning_rate": 3.6072609461891785e-05, "loss": 0.1748, "step": 25440 }, { "epoch": 2.0974846221441124, "grad_norm": 3.423109531402588, "learning_rate": 3.604681852249512e-05, "loss": 0.1725, "step": 25460 }, { "epoch": 2.0991322495606326, "grad_norm": 0.9131501317024231, "learning_rate": 3.6021012965446594e-05, "loss": 0.1718, "step": 25480 }, { "epoch": 2.100779876977153, "grad_norm": 0.9718762636184692, "learning_rate": 3.599519282489329e-05, "loss": 0.1683, "step": 25500 }, { "epoch": 2.102427504393673, "grad_norm": 0.8705006837844849, "learning_rate": 3.596935813500159e-05, "loss": 0.1727, "step": 25520 }, { "epoch": 2.1040751318101933, "grad_norm": 0.5502598881721497, "learning_rate": 3.5943508929957115e-05, "loss": 0.1855, "step": 25540 }, { "epoch": 2.1057227592267136, "grad_norm": 1.4949660301208496, "learning_rate": 3.5917645243964726e-05, "loss": 0.1705, "step": 25560 }, { "epoch": 2.107370386643234, "grad_norm": 0.46673768758773804, "learning_rate": 3.589176711124843e-05, "loss": 0.1585, "step": 25580 }, { "epoch": 2.109018014059754, "grad_norm": 1.2299778461456299, "learning_rate": 3.586587456605133e-05, "loss": 0.1541, "step": 25600 }, { "epoch": 2.1106656414762743, "grad_norm": 0.7118710875511169, "learning_rate": 3.5839967642635636e-05, "loss": 0.1755, "step": 25620 }, { "epoch": 2.1123132688927946, "grad_norm": 0.45549798011779785, "learning_rate": 3.5814046375282554e-05, "loss": 0.1689, "step": 25640 }, { "epoch": 2.1139608963093144, "grad_norm": 1.2367688417434692, "learning_rate": 3.578811079829227e-05, "loss": 0.1671, "step": 25660 }, { "epoch": 2.1156085237258346, "grad_norm": 0.7065935730934143, "learning_rate": 3.5762160945983944e-05, "loss": 0.1671, "step": 25680 }, { "epoch": 2.117256151142355, "grad_norm": 1.5369142293930054, "learning_rate": 3.573619685269558e-05, "loss": 0.1499, "step": 25700 }, { "epoch": 2.118903778558875, "grad_norm": 0.8731626272201538, "learning_rate": 3.571021855278405e-05, "loss": 0.1631, "step": 25720 }, { "epoch": 2.1205514059753954, "grad_norm": 0.6267048120498657, "learning_rate": 3.568422608062502e-05, "loss": 0.1768, "step": 25740 }, { "epoch": 2.1221990333919156, "grad_norm": 0.6229351758956909, "learning_rate": 3.565821947061291e-05, "loss": 0.1702, "step": 25760 }, { "epoch": 2.123846660808436, "grad_norm": 0.5864184498786926, "learning_rate": 3.563219875716084e-05, "loss": 0.1669, "step": 25780 }, { "epoch": 2.125494288224956, "grad_norm": 1.3878580331802368, "learning_rate": 3.5606163974700616e-05, "loss": 0.1563, "step": 25800 }, { "epoch": 2.1271419156414764, "grad_norm": 1.8174159526824951, "learning_rate": 3.558011515768264e-05, "loss": 0.1583, "step": 25820 }, { "epoch": 2.1287895430579966, "grad_norm": 0.8748142123222351, "learning_rate": 3.5554052340575876e-05, "loss": 0.1836, "step": 25840 }, { "epoch": 2.130437170474517, "grad_norm": 0.6941066384315491, "learning_rate": 3.552797555786784e-05, "loss": 0.1671, "step": 25860 }, { "epoch": 2.132084797891037, "grad_norm": 0.7565058469772339, "learning_rate": 3.5501884844064506e-05, "loss": 0.1663, "step": 25880 }, { "epoch": 2.133732425307557, "grad_norm": 2.014808177947998, "learning_rate": 3.54757802336903e-05, "loss": 0.1577, "step": 25900 }, { "epoch": 2.135380052724077, "grad_norm": 1.0949345827102661, "learning_rate": 3.544966176128802e-05, "loss": 0.1474, "step": 25920 }, { "epoch": 2.1370276801405974, "grad_norm": 1.0987663269042969, "learning_rate": 3.5423529461418816e-05, "loss": 0.1705, "step": 25940 }, { "epoch": 2.1386753075571177, "grad_norm": 0.8172488212585449, "learning_rate": 3.539738336866214e-05, "loss": 0.163, "step": 25960 }, { "epoch": 2.140322934973638, "grad_norm": 0.37782424688339233, "learning_rate": 3.537122351761568e-05, "loss": 0.178, "step": 25980 }, { "epoch": 2.141970562390158, "grad_norm": 1.336592435836792, "learning_rate": 3.534504994289535e-05, "loss": 0.1706, "step": 26000 }, { "epoch": 2.141970562390158, "eval_loss": 0.5332348942756653, "eval_runtime": 245.7071, "eval_samples_per_second": 88.679, "eval_steps_per_second": 22.173, "eval_wer": 0.2268666911145139, "step": 26000 }, { "epoch": 2.1436181898066784, "grad_norm": 0.7879346609115601, "learning_rate": 3.53188626791352e-05, "loss": 0.1631, "step": 26020 }, { "epoch": 2.1452658172231986, "grad_norm": 0.4960474967956543, "learning_rate": 3.529266176098741e-05, "loss": 0.1843, "step": 26040 }, { "epoch": 2.146913444639719, "grad_norm": 0.8829359412193298, "learning_rate": 3.5266447223122234e-05, "loss": 0.166, "step": 26060 }, { "epoch": 2.148561072056239, "grad_norm": 2.7551321983337402, "learning_rate": 3.524021910022793e-05, "loss": 0.1662, "step": 26080 }, { "epoch": 2.1502086994727594, "grad_norm": 2.773979663848877, "learning_rate": 3.5213977427010744e-05, "loss": 0.1536, "step": 26100 }, { "epoch": 2.1518563268892796, "grad_norm": 1.1753557920455933, "learning_rate": 3.518772223819485e-05, "loss": 0.1671, "step": 26120 }, { "epoch": 2.1535039543057994, "grad_norm": 0.704517662525177, "learning_rate": 3.51614535685223e-05, "loss": 0.1834, "step": 26140 }, { "epoch": 2.1551515817223197, "grad_norm": 1.0736467838287354, "learning_rate": 3.5135171452753e-05, "loss": 0.1673, "step": 26160 }, { "epoch": 2.15679920913884, "grad_norm": 0.44670116901397705, "learning_rate": 3.510887592566463e-05, "loss": 0.1579, "step": 26180 }, { "epoch": 2.15844683655536, "grad_norm": 1.965410828590393, "learning_rate": 3.508256702205263e-05, "loss": 0.1622, "step": 26200 }, { "epoch": 2.1600944639718804, "grad_norm": 0.6488901972770691, "learning_rate": 3.505624477673013e-05, "loss": 0.1706, "step": 26220 }, { "epoch": 2.1617420913884007, "grad_norm": 1.937798023223877, "learning_rate": 3.5029909224527936e-05, "loss": 0.1686, "step": 26240 }, { "epoch": 2.163389718804921, "grad_norm": 0.9314224720001221, "learning_rate": 3.500356040029443e-05, "loss": 0.1586, "step": 26260 }, { "epoch": 2.165037346221441, "grad_norm": 0.41909438371658325, "learning_rate": 3.497719833889558e-05, "loss": 0.158, "step": 26280 }, { "epoch": 2.1666849736379614, "grad_norm": 1.581127643585205, "learning_rate": 3.495082307521486e-05, "loss": 0.1477, "step": 26300 }, { "epoch": 2.1683326010544817, "grad_norm": 3.524155855178833, "learning_rate": 3.492443464415322e-05, "loss": 0.1651, "step": 26320 }, { "epoch": 2.169980228471002, "grad_norm": 0.7497128248214722, "learning_rate": 3.489803308062902e-05, "loss": 0.1726, "step": 26340 }, { "epoch": 2.171627855887522, "grad_norm": 0.5789768695831299, "learning_rate": 3.4871618419578024e-05, "loss": 0.1636, "step": 26360 }, { "epoch": 2.1732754833040424, "grad_norm": 0.4211706221103668, "learning_rate": 3.4845190695953294e-05, "loss": 0.1604, "step": 26380 }, { "epoch": 2.174923110720562, "grad_norm": 5.170593738555908, "learning_rate": 3.4818749944725214e-05, "loss": 0.1465, "step": 26400 }, { "epoch": 2.1765707381370825, "grad_norm": 0.47005754709243774, "learning_rate": 3.479229620088137e-05, "loss": 0.1585, "step": 26420 }, { "epoch": 2.1782183655536027, "grad_norm": 0.8993769884109497, "learning_rate": 3.4765829499426554e-05, "loss": 0.1969, "step": 26440 }, { "epoch": 2.179865992970123, "grad_norm": 0.8027138710021973, "learning_rate": 3.473934987538273e-05, "loss": 0.1534, "step": 26460 }, { "epoch": 2.181513620386643, "grad_norm": 0.3248322308063507, "learning_rate": 3.471285736378891e-05, "loss": 0.1519, "step": 26480 }, { "epoch": 2.1831612478031635, "grad_norm": 0.6821154356002808, "learning_rate": 3.468635199970121e-05, "loss": 0.145, "step": 26500 }, { "epoch": 2.1848088752196837, "grad_norm": 1.0828319787979126, "learning_rate": 3.465983381819272e-05, "loss": 0.1639, "step": 26520 }, { "epoch": 2.186456502636204, "grad_norm": 0.5787882804870605, "learning_rate": 3.4633302854353505e-05, "loss": 0.175, "step": 26540 }, { "epoch": 2.188104130052724, "grad_norm": 1.9811298847198486, "learning_rate": 3.460675914329055e-05, "loss": 0.1739, "step": 26560 }, { "epoch": 2.1897517574692444, "grad_norm": 0.5545487403869629, "learning_rate": 3.458020272012768e-05, "loss": 0.1542, "step": 26580 }, { "epoch": 2.1913993848857647, "grad_norm": 2.1951963901519775, "learning_rate": 3.455363362000558e-05, "loss": 0.1532, "step": 26600 }, { "epoch": 2.1930470123022845, "grad_norm": 0.9359253644943237, "learning_rate": 3.452705187808167e-05, "loss": 0.1622, "step": 26620 }, { "epoch": 2.1946946397188047, "grad_norm": 0.5310496687889099, "learning_rate": 3.450045752953012e-05, "loss": 0.175, "step": 26640 }, { "epoch": 2.196342267135325, "grad_norm": 0.44132381677627563, "learning_rate": 3.44738506095418e-05, "loss": 0.1718, "step": 26660 }, { "epoch": 2.1979898945518452, "grad_norm": 0.6888401508331299, "learning_rate": 3.444723115332418e-05, "loss": 0.1837, "step": 26680 }, { "epoch": 2.1996375219683655, "grad_norm": 1.194044589996338, "learning_rate": 3.442059919610133e-05, "loss": 0.1585, "step": 26700 }, { "epoch": 2.2012851493848857, "grad_norm": 0.7281453013420105, "learning_rate": 3.439395477311388e-05, "loss": 0.1678, "step": 26720 }, { "epoch": 2.202932776801406, "grad_norm": 0.5641632676124573, "learning_rate": 3.4367297919618925e-05, "loss": 0.1839, "step": 26740 }, { "epoch": 2.2045804042179262, "grad_norm": 10.77109146118164, "learning_rate": 3.434062867089003e-05, "loss": 0.1674, "step": 26760 }, { "epoch": 2.2062280316344465, "grad_norm": 0.7375435829162598, "learning_rate": 3.431394706221715e-05, "loss": 0.1657, "step": 26780 }, { "epoch": 2.2078756590509667, "grad_norm": 2.0869717597961426, "learning_rate": 3.4288588117737054e-05, "loss": 0.158, "step": 26800 }, { "epoch": 2.209523286467487, "grad_norm": 1.758624792098999, "learning_rate": 3.4261882508738067e-05, "loss": 0.1579, "step": 26820 }, { "epoch": 2.211170913884007, "grad_norm": 0.6693238019943237, "learning_rate": 3.4235164643995594e-05, "loss": 0.1886, "step": 26840 }, { "epoch": 2.2128185413005275, "grad_norm": 1.1407721042633057, "learning_rate": 3.4208434558863925e-05, "loss": 0.1755, "step": 26860 }, { "epoch": 2.2144661687170473, "grad_norm": 1.6515735387802124, "learning_rate": 3.418169228871353e-05, "loss": 0.1667, "step": 26880 }, { "epoch": 2.2161137961335675, "grad_norm": 3.605949878692627, "learning_rate": 3.4154937868931e-05, "loss": 0.1542, "step": 26900 }, { "epoch": 2.2177614235500878, "grad_norm": 0.5841062068939209, "learning_rate": 3.4128171334918986e-05, "loss": 0.154, "step": 26920 }, { "epoch": 2.219409050966608, "grad_norm": 0.9051983952522278, "learning_rate": 3.410139272209619e-05, "loss": 0.157, "step": 26940 }, { "epoch": 2.2210566783831283, "grad_norm": 1.0805680751800537, "learning_rate": 3.407460206589728e-05, "loss": 0.1773, "step": 26960 }, { "epoch": 2.2227043057996485, "grad_norm": 2.126185655593872, "learning_rate": 3.4047799401772874e-05, "loss": 0.1693, "step": 26980 }, { "epoch": 2.2243519332161688, "grad_norm": 1.8613393306732178, "learning_rate": 3.402098476518947e-05, "loss": 0.1583, "step": 27000 }, { "epoch": 2.225999560632689, "grad_norm": 1.6910747289657593, "learning_rate": 3.399415819162941e-05, "loss": 0.1845, "step": 27020 }, { "epoch": 2.2276471880492092, "grad_norm": 1.2789907455444336, "learning_rate": 3.396866192245471e-05, "loss": 0.1829, "step": 27040 }, { "epoch": 2.2292948154657295, "grad_norm": 1.3591282367706299, "learning_rate": 3.394181217390606e-05, "loss": 0.1657, "step": 27060 }, { "epoch": 2.2309424428822497, "grad_norm": 0.36001384258270264, "learning_rate": 3.3914950593145534e-05, "loss": 0.1608, "step": 27080 }, { "epoch": 2.2325900702987695, "grad_norm": 3.2652955055236816, "learning_rate": 3.388807721571758e-05, "loss": 0.1536, "step": 27100 }, { "epoch": 2.23423769771529, "grad_norm": 1.4743667840957642, "learning_rate": 3.3861192077182284e-05, "loss": 0.1593, "step": 27120 }, { "epoch": 2.23588532513181, "grad_norm": 4.98727560043335, "learning_rate": 3.3834295213115274e-05, "loss": 0.1608, "step": 27140 }, { "epoch": 2.2375329525483303, "grad_norm": 2.447120189666748, "learning_rate": 3.380738665910771e-05, "loss": 0.1712, "step": 27160 }, { "epoch": 2.2391805799648505, "grad_norm": 1.3015066385269165, "learning_rate": 3.3780466450766215e-05, "loss": 0.1785, "step": 27180 }, { "epoch": 2.240828207381371, "grad_norm": 2.257363796234131, "learning_rate": 3.375353462371281e-05, "loss": 0.1678, "step": 27200 }, { "epoch": 2.242475834797891, "grad_norm": 1.4966301918029785, "learning_rate": 3.372659121358493e-05, "loss": 0.1719, "step": 27220 }, { "epoch": 2.2441234622144113, "grad_norm": 0.6006611585617065, "learning_rate": 3.369963625603533e-05, "loss": 0.1759, "step": 27240 }, { "epoch": 2.2457710896309315, "grad_norm": 1.252107858657837, "learning_rate": 3.367266978673201e-05, "loss": 0.1698, "step": 27260 }, { "epoch": 2.2474187170474518, "grad_norm": 1.2595499753952026, "learning_rate": 3.364569184135824e-05, "loss": 0.1687, "step": 27280 }, { "epoch": 2.249066344463972, "grad_norm": 1.3134331703186035, "learning_rate": 3.361870245561244e-05, "loss": 0.1586, "step": 27300 }, { "epoch": 2.2507139718804923, "grad_norm": 1.1949400901794434, "learning_rate": 3.359170166520822e-05, "loss": 0.1564, "step": 27320 }, { "epoch": 2.2523615992970125, "grad_norm": 1.6761950254440308, "learning_rate": 3.3564689505874236e-05, "loss": 0.1755, "step": 27340 }, { "epoch": 2.2540092267135323, "grad_norm": 0.8743230104446411, "learning_rate": 3.3537666013354216e-05, "loss": 0.1771, "step": 27360 }, { "epoch": 2.2556568541300526, "grad_norm": 2.021210193634033, "learning_rate": 3.351063122340686e-05, "loss": 0.1639, "step": 27380 }, { "epoch": 2.257304481546573, "grad_norm": 1.8056607246398926, "learning_rate": 3.3483585171805825e-05, "loss": 0.1647, "step": 27400 }, { "epoch": 2.258952108963093, "grad_norm": 4.348305702209473, "learning_rate": 3.345652789433969e-05, "loss": 0.1604, "step": 27420 }, { "epoch": 2.2605997363796133, "grad_norm": 1.9006876945495605, "learning_rate": 3.342945942681186e-05, "loss": 0.1757, "step": 27440 }, { "epoch": 2.2622473637961336, "grad_norm": 3.284151315689087, "learning_rate": 3.340237980504058e-05, "loss": 0.1914, "step": 27460 }, { "epoch": 2.263894991212654, "grad_norm": 1.1403635740280151, "learning_rate": 3.337528906485881e-05, "loss": 0.1663, "step": 27480 }, { "epoch": 2.265542618629174, "grad_norm": 1.159006953239441, "learning_rate": 3.334818724211426e-05, "loss": 0.1658, "step": 27500 }, { "epoch": 2.2671902460456943, "grad_norm": 0.8393998742103577, "learning_rate": 3.33210743726693e-05, "loss": 0.1664, "step": 27520 }, { "epoch": 2.2688378734622145, "grad_norm": 0.6777483224868774, "learning_rate": 3.329395049240089e-05, "loss": 0.1835, "step": 27540 }, { "epoch": 2.270485500878735, "grad_norm": 1.037467360496521, "learning_rate": 3.3266815637200605e-05, "loss": 0.1755, "step": 27560 }, { "epoch": 2.2721331282952546, "grad_norm": 0.3177587389945984, "learning_rate": 3.3239669842974495e-05, "loss": 0.1611, "step": 27580 }, { "epoch": 2.2737807557117753, "grad_norm": 3.4033076763153076, "learning_rate": 3.321251314564312e-05, "loss": 0.1668, "step": 27600 }, { "epoch": 2.275428383128295, "grad_norm": 0.5792399048805237, "learning_rate": 3.318534558114146e-05, "loss": 0.1765, "step": 27620 }, { "epoch": 2.2770760105448153, "grad_norm": 0.6022660732269287, "learning_rate": 3.3158167185418846e-05, "loss": 0.1721, "step": 27640 }, { "epoch": 2.2787236379613356, "grad_norm": 2.6053919792175293, "learning_rate": 3.313097799443898e-05, "loss": 0.1774, "step": 27660 }, { "epoch": 2.280371265377856, "grad_norm": 1.1545339822769165, "learning_rate": 3.310377804417985e-05, "loss": 0.1595, "step": 27680 }, { "epoch": 2.282018892794376, "grad_norm": 1.8960176706314087, "learning_rate": 3.3076567370633636e-05, "loss": 0.1611, "step": 27700 }, { "epoch": 2.2836665202108963, "grad_norm": 2.5447661876678467, "learning_rate": 3.3049346009806766e-05, "loss": 0.1634, "step": 27720 }, { "epoch": 2.2853141476274166, "grad_norm": 3.0229389667510986, "learning_rate": 3.302211399771975e-05, "loss": 0.1873, "step": 27740 }, { "epoch": 2.286961775043937, "grad_norm": 1.5011436939239502, "learning_rate": 3.299487137040724e-05, "loss": 0.1653, "step": 27760 }, { "epoch": 2.288609402460457, "grad_norm": 3.6717607975006104, "learning_rate": 3.2967618163917926e-05, "loss": 0.1905, "step": 27780 }, { "epoch": 2.2902570298769773, "grad_norm": 1.347084403038025, "learning_rate": 3.2940354414314465e-05, "loss": 0.157, "step": 27800 }, { "epoch": 2.2919046572934976, "grad_norm": 1.2442917823791504, "learning_rate": 3.2913080157673516e-05, "loss": 0.156, "step": 27820 }, { "epoch": 2.2935522847100174, "grad_norm": 1.3263580799102783, "learning_rate": 3.288579543008559e-05, "loss": 0.1801, "step": 27840 }, { "epoch": 2.2951999121265376, "grad_norm": 0.6720674633979797, "learning_rate": 3.285850026765509e-05, "loss": 0.176, "step": 27860 }, { "epoch": 2.296847539543058, "grad_norm": 1.0861464738845825, "learning_rate": 3.2831194706500215e-05, "loss": 0.1623, "step": 27880 }, { "epoch": 2.298495166959578, "grad_norm": 0.9440199136734009, "learning_rate": 3.2803878782752904e-05, "loss": 0.1497, "step": 27900 }, { "epoch": 2.3001427943760984, "grad_norm": 2.2923052310943604, "learning_rate": 3.277655253255886e-05, "loss": 0.17, "step": 27920 }, { "epoch": 2.3017904217926186, "grad_norm": 0.5938337445259094, "learning_rate": 3.2749215992077394e-05, "loss": 0.184, "step": 27940 }, { "epoch": 2.303438049209139, "grad_norm": 0.8251938223838806, "learning_rate": 3.272186919748147e-05, "loss": 0.1765, "step": 27960 }, { "epoch": 2.305085676625659, "grad_norm": 2.4454212188720703, "learning_rate": 3.2694512184957596e-05, "loss": 0.1649, "step": 27980 }, { "epoch": 2.3067333040421794, "grad_norm": 11.248618125915527, "learning_rate": 3.266714499070582e-05, "loss": 0.1558, "step": 28000 }, { "epoch": 2.3067333040421794, "eval_loss": 0.5309145450592041, "eval_runtime": 249.2844, "eval_samples_per_second": 87.406, "eval_steps_per_second": 21.855, "eval_wer": 0.22905859925032823, "step": 28000 }, { "epoch": 2.3083809314586996, "grad_norm": 2.0679521560668945, "learning_rate": 3.2639767650939665e-05, "loss": 0.1621, "step": 28020 }, { "epoch": 2.31002855887522, "grad_norm": 1.7116316556930542, "learning_rate": 3.261238020188605e-05, "loss": 0.1709, "step": 28040 }, { "epoch": 2.3116761862917397, "grad_norm": 1.4451134204864502, "learning_rate": 3.258498267978531e-05, "loss": 0.1652, "step": 28060 }, { "epoch": 2.3133238137082603, "grad_norm": 0.7450625896453857, "learning_rate": 3.255757512089108e-05, "loss": 0.1599, "step": 28080 }, { "epoch": 2.31497144112478, "grad_norm": 2.857830762863159, "learning_rate": 3.2530157561470295e-05, "loss": 0.1548, "step": 28100 }, { "epoch": 2.3166190685413004, "grad_norm": 1.0902551412582397, "learning_rate": 3.25027300378031e-05, "loss": 0.1593, "step": 28120 }, { "epoch": 2.3182666959578206, "grad_norm": 1.3042412996292114, "learning_rate": 3.247529258618285e-05, "loss": 0.1732, "step": 28140 }, { "epoch": 2.319914323374341, "grad_norm": 0.9760801196098328, "learning_rate": 3.244784524291603e-05, "loss": 0.1776, "step": 28160 }, { "epoch": 2.321561950790861, "grad_norm": 0.6647989749908447, "learning_rate": 3.2420388044322204e-05, "loss": 0.1681, "step": 28180 }, { "epoch": 2.3232095782073814, "grad_norm": 2.4924325942993164, "learning_rate": 3.239292102673398e-05, "loss": 0.1659, "step": 28200 }, { "epoch": 2.3248572056239016, "grad_norm": 1.96873939037323, "learning_rate": 3.2365444226496977e-05, "loss": 0.174, "step": 28220 }, { "epoch": 2.326504833040422, "grad_norm": 1.3874828815460205, "learning_rate": 3.2337957679969736e-05, "loss": 0.1817, "step": 28240 }, { "epoch": 2.328152460456942, "grad_norm": 1.0978938341140747, "learning_rate": 3.2310461423523706e-05, "loss": 0.1703, "step": 28260 }, { "epoch": 2.3298000878734624, "grad_norm": 0.40774619579315186, "learning_rate": 3.228295549354317e-05, "loss": 0.1709, "step": 28280 }, { "epoch": 2.3314477152899826, "grad_norm": 2.165424346923828, "learning_rate": 3.225543992642524e-05, "loss": 0.1606, "step": 28300 }, { "epoch": 2.3330953427065024, "grad_norm": 1.4774760007858276, "learning_rate": 3.222791475857976e-05, "loss": 0.1602, "step": 28320 }, { "epoch": 2.3347429701230227, "grad_norm": 0.60544353723526, "learning_rate": 3.2200380026429274e-05, "loss": 0.1806, "step": 28340 }, { "epoch": 2.336390597539543, "grad_norm": 0.8085589408874512, "learning_rate": 3.2172835766409e-05, "loss": 0.1744, "step": 28360 }, { "epoch": 2.338038224956063, "grad_norm": 0.43836772441864014, "learning_rate": 3.2145282014966736e-05, "loss": 0.1671, "step": 28380 }, { "epoch": 2.3396858523725834, "grad_norm": 2.028322219848633, "learning_rate": 3.211771880856288e-05, "loss": 0.1567, "step": 28400 }, { "epoch": 2.3413334797891037, "grad_norm": 0.6231321692466736, "learning_rate": 3.2090146183670305e-05, "loss": 0.1718, "step": 28420 }, { "epoch": 2.342981107205624, "grad_norm": 1.3398654460906982, "learning_rate": 3.206256417677437e-05, "loss": 0.1705, "step": 28440 }, { "epoch": 2.344628734622144, "grad_norm": 1.2689118385314941, "learning_rate": 3.203497282437282e-05, "loss": 0.1622, "step": 28460 }, { "epoch": 2.3462763620386644, "grad_norm": 0.6729577779769897, "learning_rate": 3.2007372162975805e-05, "loss": 0.1616, "step": 28480 }, { "epoch": 2.3479239894551847, "grad_norm": 2.0656583309173584, "learning_rate": 3.197976222910577e-05, "loss": 0.1654, "step": 28500 }, { "epoch": 2.349571616871705, "grad_norm": 1.439696192741394, "learning_rate": 3.195214305929742e-05, "loss": 0.166, "step": 28520 }, { "epoch": 2.3512192442882247, "grad_norm": 0.6270071268081665, "learning_rate": 3.1924514690097706e-05, "loss": 0.183, "step": 28540 }, { "epoch": 2.3528668717047454, "grad_norm": 0.5838283896446228, "learning_rate": 3.1896877158065747e-05, "loss": 0.1716, "step": 28560 }, { "epoch": 2.354514499121265, "grad_norm": 0.424365371465683, "learning_rate": 3.186923049977276e-05, "loss": 0.1776, "step": 28580 }, { "epoch": 2.3561621265377855, "grad_norm": 1.257899284362793, "learning_rate": 3.1841574751802076e-05, "loss": 0.1535, "step": 28600 }, { "epoch": 2.3578097539543057, "grad_norm": 1.2399417161941528, "learning_rate": 3.181390995074903e-05, "loss": 0.1676, "step": 28620 }, { "epoch": 2.359457381370826, "grad_norm": 1.031891942024231, "learning_rate": 3.178623613322094e-05, "loss": 0.1682, "step": 28640 }, { "epoch": 2.361105008787346, "grad_norm": 2.5640039443969727, "learning_rate": 3.175855333583706e-05, "loss": 0.1656, "step": 28660 }, { "epoch": 2.3627526362038664, "grad_norm": 0.32436710596084595, "learning_rate": 3.173086159522853e-05, "loss": 0.1617, "step": 28680 }, { "epoch": 2.3644002636203867, "grad_norm": 1.4761956930160522, "learning_rate": 3.1703160948038315e-05, "loss": 0.1558, "step": 28700 }, { "epoch": 2.366047891036907, "grad_norm": 1.316041350364685, "learning_rate": 3.1675451430921166e-05, "loss": 0.1596, "step": 28720 }, { "epoch": 2.367695518453427, "grad_norm": 0.48719462752342224, "learning_rate": 3.164773308054357e-05, "loss": 0.176, "step": 28740 }, { "epoch": 2.3693431458699474, "grad_norm": 0.4793241322040558, "learning_rate": 3.162000593358372e-05, "loss": 0.162, "step": 28760 }, { "epoch": 2.3709907732864677, "grad_norm": 0.25977346301078796, "learning_rate": 3.159227002673143e-05, "loss": 0.1477, "step": 28780 }, { "epoch": 2.3726384007029875, "grad_norm": 3.16365385055542, "learning_rate": 3.156452539668811e-05, "loss": 0.1599, "step": 28800 }, { "epoch": 2.3742860281195077, "grad_norm": 0.5996500849723816, "learning_rate": 3.153677208016672e-05, "loss": 0.1545, "step": 28820 }, { "epoch": 2.375933655536028, "grad_norm": 1.808449387550354, "learning_rate": 3.15090101138917e-05, "loss": 0.1565, "step": 28840 }, { "epoch": 2.3775812829525482, "grad_norm": 0.822944700717926, "learning_rate": 3.148123953459894e-05, "loss": 0.1655, "step": 28860 }, { "epoch": 2.3792289103690685, "grad_norm": 0.3074828088283539, "learning_rate": 3.145346037903574e-05, "loss": 0.1617, "step": 28880 }, { "epoch": 2.3808765377855887, "grad_norm": 3.4006128311157227, "learning_rate": 3.142567268396075e-05, "loss": 0.1755, "step": 28900 }, { "epoch": 2.382524165202109, "grad_norm": 0.9167118668556213, "learning_rate": 3.1397876486143895e-05, "loss": 0.1648, "step": 28920 }, { "epoch": 2.384171792618629, "grad_norm": 0.6109206676483154, "learning_rate": 3.137007182236637e-05, "loss": 0.1637, "step": 28940 }, { "epoch": 2.3858194200351495, "grad_norm": 0.45387306809425354, "learning_rate": 3.1342258729420564e-05, "loss": 0.1736, "step": 28960 }, { "epoch": 2.3874670474516697, "grad_norm": 0.3555924892425537, "learning_rate": 3.131443724411003e-05, "loss": 0.1664, "step": 28980 }, { "epoch": 2.38911467486819, "grad_norm": 1.275738000869751, "learning_rate": 3.128660740324943e-05, "loss": 0.1553, "step": 29000 }, { "epoch": 2.39076230228471, "grad_norm": 0.5454599261283875, "learning_rate": 3.125876924366445e-05, "loss": 0.163, "step": 29020 }, { "epoch": 2.3924099297012305, "grad_norm": 0.4194986820220947, "learning_rate": 3.12323153203915e-05, "loss": 0.1833, "step": 29040 }, { "epoch": 2.3940575571177503, "grad_norm": 0.4429135024547577, "learning_rate": 3.1204461045255604e-05, "loss": 0.1668, "step": 29060 }, { "epoch": 2.3957051845342705, "grad_norm": 0.3895147442817688, "learning_rate": 3.117659856009512e-05, "loss": 0.1609, "step": 29080 }, { "epoch": 2.3973528119507908, "grad_norm": 3.838486909866333, "learning_rate": 3.114872790177897e-05, "loss": 0.157, "step": 29100 }, { "epoch": 2.399000439367311, "grad_norm": 0.43187758326530457, "learning_rate": 3.112084910718688e-05, "loss": 0.1643, "step": 29120 }, { "epoch": 2.4006480667838312, "grad_norm": 0.3776147663593292, "learning_rate": 3.109296221320932e-05, "loss": 0.1791, "step": 29140 }, { "epoch": 2.4022956942003515, "grad_norm": 0.5783634185791016, "learning_rate": 3.1065067256747495e-05, "loss": 0.1644, "step": 29160 }, { "epoch": 2.4039433216168717, "grad_norm": 0.8182345032691956, "learning_rate": 3.1037164274713286e-05, "loss": 0.1492, "step": 29180 }, { "epoch": 2.405590949033392, "grad_norm": 2.9094724655151367, "learning_rate": 3.100925330402919e-05, "loss": 0.1555, "step": 29200 }, { "epoch": 2.4072385764499122, "grad_norm": 0.932719349861145, "learning_rate": 3.0981334381628256e-05, "loss": 0.1575, "step": 29220 }, { "epoch": 2.4088862038664325, "grad_norm": 0.6541189551353455, "learning_rate": 3.095340754445409e-05, "loss": 0.1681, "step": 29240 }, { "epoch": 2.4105338312829527, "grad_norm": 1.609031319618225, "learning_rate": 3.092547282946073e-05, "loss": 0.1605, "step": 29260 }, { "epoch": 2.4121814586994725, "grad_norm": 0.24770651757717133, "learning_rate": 3.089753027361267e-05, "loss": 0.1618, "step": 29280 }, { "epoch": 2.413829086115993, "grad_norm": 5.404471397399902, "learning_rate": 3.0869579913884775e-05, "loss": 0.1633, "step": 29300 }, { "epoch": 2.415476713532513, "grad_norm": 2.0868561267852783, "learning_rate": 3.0841621787262213e-05, "loss": 0.1533, "step": 29320 }, { "epoch": 2.4171243409490333, "grad_norm": 1.2306647300720215, "learning_rate": 3.0813655930740466e-05, "loss": 0.1672, "step": 29340 }, { "epoch": 2.4187719683655535, "grad_norm": 1.3059163093566895, "learning_rate": 3.078568238132521e-05, "loss": 0.163, "step": 29360 }, { "epoch": 2.4204195957820738, "grad_norm": 0.6795551180839539, "learning_rate": 3.075770117603232e-05, "loss": 0.1696, "step": 29380 }, { "epoch": 2.422067223198594, "grad_norm": 3.5448899269104004, "learning_rate": 3.07297123518878e-05, "loss": 0.1523, "step": 29400 }, { "epoch": 2.4237148506151143, "grad_norm": 0.5173410177230835, "learning_rate": 3.070171594592773e-05, "loss": 0.1685, "step": 29420 }, { "epoch": 2.4253624780316345, "grad_norm": 0.46082064509391785, "learning_rate": 3.067371199519822e-05, "loss": 0.1729, "step": 29440 }, { "epoch": 2.4270101054481548, "grad_norm": 0.4469384551048279, "learning_rate": 3.0645700536755384e-05, "loss": 0.1704, "step": 29460 }, { "epoch": 2.428657732864675, "grad_norm": 0.2371663600206375, "learning_rate": 3.0617681607665256e-05, "loss": 0.1697, "step": 29480 }, { "epoch": 2.4303053602811953, "grad_norm": 2.3724477291107178, "learning_rate": 3.058965524500374e-05, "loss": 0.171, "step": 29500 }, { "epoch": 2.4319529876977155, "grad_norm": 0.954786479473114, "learning_rate": 3.056162148585659e-05, "loss": 0.1641, "step": 29520 }, { "epoch": 2.4336006151142353, "grad_norm": 0.5289298892021179, "learning_rate": 3.053358036731937e-05, "loss": 0.1664, "step": 29540 }, { "epoch": 2.4352482425307556, "grad_norm": 0.7976502180099487, "learning_rate": 3.0505531926497343e-05, "loss": 0.1745, "step": 29560 }, { "epoch": 2.436895869947276, "grad_norm": 0.3595167100429535, "learning_rate": 3.047747620050549e-05, "loss": 0.151, "step": 29580 }, { "epoch": 2.438543497363796, "grad_norm": 19.019508361816406, "learning_rate": 3.0449413226468417e-05, "loss": 0.1803, "step": 29600 }, { "epoch": 2.4401911247803163, "grad_norm": 7.179177761077881, "learning_rate": 3.0421343041520322e-05, "loss": 0.1552, "step": 29620 }, { "epoch": 2.4418387521968365, "grad_norm": 0.5879939794540405, "learning_rate": 3.039326568280495e-05, "loss": 0.1721, "step": 29640 }, { "epoch": 2.443486379613357, "grad_norm": 0.8371209502220154, "learning_rate": 3.036518118747554e-05, "loss": 0.1691, "step": 29660 }, { "epoch": 2.445134007029877, "grad_norm": 0.41010937094688416, "learning_rate": 3.0337089592694772e-05, "loss": 0.1709, "step": 29680 }, { "epoch": 2.4467816344463973, "grad_norm": 1.2957394123077393, "learning_rate": 3.0308990935634718e-05, "loss": 0.1466, "step": 29700 }, { "epoch": 2.4484292618629175, "grad_norm": 0.7702599167823792, "learning_rate": 3.0280885253476797e-05, "loss": 0.1652, "step": 29720 }, { "epoch": 2.450076889279438, "grad_norm": 0.7201776504516602, "learning_rate": 3.025277258341172e-05, "loss": 0.1675, "step": 29740 }, { "epoch": 2.4517245166959576, "grad_norm": 0.37506094574928284, "learning_rate": 3.0224652962639454e-05, "loss": 0.163, "step": 29760 }, { "epoch": 2.4533721441124783, "grad_norm": 0.3698022961616516, "learning_rate": 3.0196526428369155e-05, "loss": 0.1634, "step": 29780 }, { "epoch": 2.455019771528998, "grad_norm": 2.1484596729278564, "learning_rate": 3.016839301781913e-05, "loss": 0.1422, "step": 29800 }, { "epoch": 2.4566673989455183, "grad_norm": 1.094212293624878, "learning_rate": 3.014025276821679e-05, "loss": 0.1539, "step": 29820 }, { "epoch": 2.4583150263620386, "grad_norm": 1.0083136558532715, "learning_rate": 3.011210571679859e-05, "loss": 0.1652, "step": 29840 }, { "epoch": 2.459962653778559, "grad_norm": 0.9524624943733215, "learning_rate": 3.0083951900809983e-05, "loss": 0.1549, "step": 29860 }, { "epoch": 2.461610281195079, "grad_norm": 0.9156121611595154, "learning_rate": 3.005579135750538e-05, "loss": 0.1493, "step": 29880 }, { "epoch": 2.4632579086115993, "grad_norm": 1.3590614795684814, "learning_rate": 3.002762412414808e-05, "loss": 0.1754, "step": 29900 }, { "epoch": 2.4649055360281196, "grad_norm": 0.9748837947845459, "learning_rate": 2.9999450238010264e-05, "loss": 0.1662, "step": 29920 }, { "epoch": 2.46655316344464, "grad_norm": 0.43516382575035095, "learning_rate": 2.9971269736372874e-05, "loss": 0.1816, "step": 29940 }, { "epoch": 2.46820079086116, "grad_norm": 0.7476974129676819, "learning_rate": 2.994308265652565e-05, "loss": 0.1624, "step": 29960 }, { "epoch": 2.4698484182776803, "grad_norm": 0.2631094455718994, "learning_rate": 2.9914889035767014e-05, "loss": 0.1598, "step": 29980 }, { "epoch": 2.4714960456942006, "grad_norm": 7.102409839630127, "learning_rate": 2.9886688911404033e-05, "loss": 0.1574, "step": 30000 }, { "epoch": 2.4714960456942006, "eval_loss": 0.5125442147254944, "eval_runtime": 246.1596, "eval_samples_per_second": 88.516, "eval_steps_per_second": 22.132, "eval_wer": 0.22153388779557867, "step": 30000 }, { "epoch": 2.4731436731107204, "grad_norm": 0.5292249917984009, "learning_rate": 2.98584823207524e-05, "loss": 0.1598, "step": 30020 }, { "epoch": 2.4747913005272406, "grad_norm": 0.8891049027442932, "learning_rate": 2.983026930113635e-05, "loss": 0.1769, "step": 30040 }, { "epoch": 2.476438927943761, "grad_norm": 0.5276570916175842, "learning_rate": 2.9802049889888638e-05, "loss": 0.156, "step": 30060 }, { "epoch": 2.478086555360281, "grad_norm": 0.30637362599372864, "learning_rate": 2.977382412435047e-05, "loss": 0.1549, "step": 30080 }, { "epoch": 2.4797341827768014, "grad_norm": 7.410438537597656, "learning_rate": 2.974559204187146e-05, "loss": 0.1503, "step": 30100 }, { "epoch": 2.4813818101933216, "grad_norm": 3.507681369781494, "learning_rate": 2.9717353679809584e-05, "loss": 0.1577, "step": 30120 }, { "epoch": 2.483029437609842, "grad_norm": 0.5745053291320801, "learning_rate": 2.968910907553112e-05, "loss": 0.164, "step": 30140 }, { "epoch": 2.484677065026362, "grad_norm": 0.6689228415489197, "learning_rate": 2.9660858266410613e-05, "loss": 0.1603, "step": 30160 }, { "epoch": 2.4863246924428823, "grad_norm": 0.7255517244338989, "learning_rate": 2.963260128983081e-05, "loss": 0.1567, "step": 30180 }, { "epoch": 2.4879723198594026, "grad_norm": 5.331938743591309, "learning_rate": 2.9604338183182633e-05, "loss": 0.1504, "step": 30200 }, { "epoch": 2.489619947275923, "grad_norm": 3.504755973815918, "learning_rate": 2.957606898386511e-05, "loss": 0.1538, "step": 30220 }, { "epoch": 2.4912675746924426, "grad_norm": 0.5787059664726257, "learning_rate": 2.9547793729285328e-05, "loss": 0.1868, "step": 30240 }, { "epoch": 2.4929152021089633, "grad_norm": 0.596563458442688, "learning_rate": 2.9519512456858384e-05, "loss": 0.1732, "step": 30260 }, { "epoch": 2.494562829525483, "grad_norm": 0.5609906315803528, "learning_rate": 2.9491225204007338e-05, "loss": 0.1564, "step": 30280 }, { "epoch": 2.4962104569420034, "grad_norm": 17.780019760131836, "learning_rate": 2.9462932008163175e-05, "loss": 0.1568, "step": 30300 }, { "epoch": 2.4978580843585236, "grad_norm": 0.8004500865936279, "learning_rate": 2.9434632906764737e-05, "loss": 0.1605, "step": 30320 }, { "epoch": 2.499505711775044, "grad_norm": 1.081133246421814, "learning_rate": 2.9406327937258677e-05, "loss": 0.1782, "step": 30340 }, { "epoch": 2.501153339191564, "grad_norm": 3.350252389907837, "learning_rate": 2.9378017137099417e-05, "loss": 0.1673, "step": 30360 }, { "epoch": 2.5028009666080844, "grad_norm": 0.6096969842910767, "learning_rate": 2.935111651042648e-05, "loss": 0.1636, "step": 30380 }, { "epoch": 2.5044485940246046, "grad_norm": 4.936652660369873, "learning_rate": 2.932279444825093e-05, "loss": 0.1585, "step": 30400 }, { "epoch": 2.506096221441125, "grad_norm": 1.6837114095687866, "learning_rate": 2.9294466665957475e-05, "loss": 0.1644, "step": 30420 }, { "epoch": 2.507743848857645, "grad_norm": 0.9502339959144592, "learning_rate": 2.926613320103074e-05, "loss": 0.1791, "step": 30440 }, { "epoch": 2.5093914762741654, "grad_norm": 1.6495269536972046, "learning_rate": 2.9237794090962834e-05, "loss": 0.1724, "step": 30460 }, { "epoch": 2.5110391036906856, "grad_norm": 1.6112371683120728, "learning_rate": 2.9209449373253372e-05, "loss": 0.1719, "step": 30480 }, { "epoch": 2.5126867311072054, "grad_norm": 1.7970635890960693, "learning_rate": 2.918109908540937e-05, "loss": 0.1553, "step": 30500 }, { "epoch": 2.514334358523726, "grad_norm": 0.9640928506851196, "learning_rate": 2.9152743264945205e-05, "loss": 0.1651, "step": 30520 }, { "epoch": 2.515981985940246, "grad_norm": 1.5328330993652344, "learning_rate": 2.9124381949382584e-05, "loss": 0.1851, "step": 30540 }, { "epoch": 2.517629613356766, "grad_norm": 1.3880538940429688, "learning_rate": 2.909601517625051e-05, "loss": 0.1693, "step": 30560 }, { "epoch": 2.5192772407732864, "grad_norm": 0.763766348361969, "learning_rate": 2.906764298308516e-05, "loss": 0.1702, "step": 30580 }, { "epoch": 2.5209248681898067, "grad_norm": 1.9157522916793823, "learning_rate": 2.9039265407429928e-05, "loss": 0.1686, "step": 30600 }, { "epoch": 2.522572495606327, "grad_norm": 1.0073531866073608, "learning_rate": 2.9010882486835288e-05, "loss": 0.1687, "step": 30620 }, { "epoch": 2.524220123022847, "grad_norm": 0.8558169007301331, "learning_rate": 2.8982494258858828e-05, "loss": 0.1721, "step": 30640 }, { "epoch": 2.5258677504393674, "grad_norm": 2.5032193660736084, "learning_rate": 2.895410076106514e-05, "loss": 0.1641, "step": 30660 }, { "epoch": 2.5275153778558876, "grad_norm": 0.6763248443603516, "learning_rate": 2.8925702031025775e-05, "loss": 0.168, "step": 30680 }, { "epoch": 2.529163005272408, "grad_norm": 5.686913013458252, "learning_rate": 2.8897298106319243e-05, "loss": 0.1576, "step": 30700 }, { "epoch": 2.5308106326889277, "grad_norm": 0.7178692817687988, "learning_rate": 2.8868889024530888e-05, "loss": 0.1597, "step": 30720 }, { "epoch": 2.5324582601054484, "grad_norm": 1.8655472993850708, "learning_rate": 2.88404748232529e-05, "loss": 0.1634, "step": 30740 }, { "epoch": 2.534105887521968, "grad_norm": 0.9312283396720886, "learning_rate": 2.881205554008425e-05, "loss": 0.1678, "step": 30760 }, { "epoch": 2.5357535149384884, "grad_norm": 1.1616804599761963, "learning_rate": 2.8783631212630618e-05, "loss": 0.1518, "step": 30780 }, { "epoch": 2.5374011423550087, "grad_norm": 2.426833391189575, "learning_rate": 2.8755201878504363e-05, "loss": 0.1612, "step": 30800 }, { "epoch": 2.539048769771529, "grad_norm": 0.6256110072135925, "learning_rate": 2.872676757532447e-05, "loss": 0.15, "step": 30820 }, { "epoch": 2.540696397188049, "grad_norm": 0.48603343963623047, "learning_rate": 2.869832834071651e-05, "loss": 0.1692, "step": 30840 }, { "epoch": 2.5423440246045694, "grad_norm": 1.9028749465942383, "learning_rate": 2.866988421231256e-05, "loss": 0.1625, "step": 30860 }, { "epoch": 2.5439916520210897, "grad_norm": 0.6048976182937622, "learning_rate": 2.864143522775118e-05, "loss": 0.1539, "step": 30880 }, { "epoch": 2.54563927943761, "grad_norm": 0.9251933693885803, "learning_rate": 2.861298142467737e-05, "loss": 0.1484, "step": 30900 }, { "epoch": 2.54728690685413, "grad_norm": 2.2003366947174072, "learning_rate": 2.8584522840742485e-05, "loss": 0.1696, "step": 30920 }, { "epoch": 2.5489345342706504, "grad_norm": 0.9549096822738647, "learning_rate": 2.8556059513604215e-05, "loss": 0.1821, "step": 30940 }, { "epoch": 2.5505821616871707, "grad_norm": 1.7979339361190796, "learning_rate": 2.852759148092653e-05, "loss": 0.1712, "step": 30960 }, { "epoch": 2.5522297891036905, "grad_norm": 0.9172550439834595, "learning_rate": 2.8499118780379625e-05, "loss": 0.1699, "step": 30980 }, { "epoch": 2.553877416520211, "grad_norm": 1.2865842580795288, "learning_rate": 2.8470641449639868e-05, "loss": 0.1538, "step": 31000 }, { "epoch": 2.555525043936731, "grad_norm": 1.5538517236709595, "learning_rate": 2.8442159526389755e-05, "loss": 0.1582, "step": 31020 }, { "epoch": 2.557172671353251, "grad_norm": 0.45951759815216064, "learning_rate": 2.841367304831786e-05, "loss": 0.1701, "step": 31040 }, { "epoch": 2.5588202987697715, "grad_norm": 1.1140258312225342, "learning_rate": 2.8385182053118793e-05, "loss": 0.1665, "step": 31060 }, { "epoch": 2.5604679261862917, "grad_norm": 0.5959802269935608, "learning_rate": 2.8356686578493112e-05, "loss": 0.1731, "step": 31080 }, { "epoch": 2.562115553602812, "grad_norm": 0.9773458242416382, "learning_rate": 2.8328186662147345e-05, "loss": 0.1665, "step": 31100 }, { "epoch": 2.563763181019332, "grad_norm": 1.550663948059082, "learning_rate": 2.829968234179386e-05, "loss": 0.1703, "step": 31120 }, { "epoch": 2.5654108084358525, "grad_norm": 1.0775203704833984, "learning_rate": 2.8271173655150878e-05, "loss": 0.1783, "step": 31140 }, { "epoch": 2.5670584358523727, "grad_norm": 0.7373536825180054, "learning_rate": 2.8242660639942388e-05, "loss": 0.1727, "step": 31160 }, { "epoch": 2.568706063268893, "grad_norm": 1.5068947076797485, "learning_rate": 2.8215569300525145e-05, "loss": 0.1738, "step": 31180 }, { "epoch": 2.5703536906854128, "grad_norm": 1.7037030458450317, "learning_rate": 2.8187047953139167e-05, "loss": 0.1617, "step": 31200 }, { "epoch": 2.5720013181019334, "grad_norm": 3.2985782623291016, "learning_rate": 2.8158522388506632e-05, "loss": 0.173, "step": 31220 }, { "epoch": 2.5736489455184532, "grad_norm": 0.8943614363670349, "learning_rate": 2.812999264437386e-05, "loss": 0.1845, "step": 31240 }, { "epoch": 2.5752965729349735, "grad_norm": 2.3433315753936768, "learning_rate": 2.810145875849269e-05, "loss": 0.1716, "step": 31260 }, { "epoch": 2.5769442003514937, "grad_norm": 0.7237234711647034, "learning_rate": 2.807292076862047e-05, "loss": 0.1827, "step": 31280 }, { "epoch": 2.578591827768014, "grad_norm": 1.9707437753677368, "learning_rate": 2.804437871251996e-05, "loss": 0.1644, "step": 31300 }, { "epoch": 2.5802394551845342, "grad_norm": 5.560729503631592, "learning_rate": 2.8015832627959298e-05, "loss": 0.1614, "step": 31320 }, { "epoch": 2.5818870826010545, "grad_norm": 1.6304755210876465, "learning_rate": 2.7987282552711973e-05, "loss": 0.1732, "step": 31340 }, { "epoch": 2.5835347100175747, "grad_norm": 0.6963209509849548, "learning_rate": 2.7958728524556727e-05, "loss": 0.1793, "step": 31360 }, { "epoch": 2.585182337434095, "grad_norm": 0.8520829677581787, "learning_rate": 2.793017058127756e-05, "loss": 0.1813, "step": 31380 }, { "epoch": 2.5868299648506152, "grad_norm": 1.0092051029205322, "learning_rate": 2.7901608760663623e-05, "loss": 0.1655, "step": 31400 }, { "epoch": 2.5884775922671355, "grad_norm": 1.0574760437011719, "learning_rate": 2.7873043100509228e-05, "loss": 0.1779, "step": 31420 }, { "epoch": 2.5901252196836557, "grad_norm": 1.1205741167068481, "learning_rate": 2.784447363861375e-05, "loss": 0.1747, "step": 31440 }, { "epoch": 2.5917728471001755, "grad_norm": 0.9736700654029846, "learning_rate": 2.7815900412781594e-05, "loss": 0.1703, "step": 31460 }, { "epoch": 2.593420474516696, "grad_norm": 0.5332132577896118, "learning_rate": 2.7787323460822146e-05, "loss": 0.1648, "step": 31480 }, { "epoch": 2.595068101933216, "grad_norm": 2.0113890171051025, "learning_rate": 2.7758742820549738e-05, "loss": 0.1647, "step": 31500 }, { "epoch": 2.5967157293497363, "grad_norm": 7.613828659057617, "learning_rate": 2.7730158529783566e-05, "loss": 0.1549, "step": 31520 }, { "epoch": 2.5983633567662565, "grad_norm": 1.237125277519226, "learning_rate": 2.770157062634766e-05, "loss": 0.1739, "step": 31540 }, { "epoch": 2.6000109841827768, "grad_norm": 1.3644907474517822, "learning_rate": 2.767297914807084e-05, "loss": 0.1746, "step": 31560 }, { "epoch": 2.601658611599297, "grad_norm": 1.010188341140747, "learning_rate": 2.7644384132786637e-05, "loss": 0.1577, "step": 31580 }, { "epoch": 2.6033062390158173, "grad_norm": 1.2578411102294922, "learning_rate": 2.761578561833328e-05, "loss": 0.1569, "step": 31600 }, { "epoch": 2.6049538664323375, "grad_norm": 0.8691573143005371, "learning_rate": 2.758718364255361e-05, "loss": 0.17, "step": 31620 }, { "epoch": 2.6066014938488578, "grad_norm": 4.4133405685424805, "learning_rate": 2.7558578243295087e-05, "loss": 0.178, "step": 31640 }, { "epoch": 2.608249121265378, "grad_norm": 0.5621764659881592, "learning_rate": 2.7529969458409644e-05, "loss": 0.1619, "step": 31660 }, { "epoch": 2.609896748681898, "grad_norm": 1.044129729270935, "learning_rate": 2.750135732575374e-05, "loss": 0.1662, "step": 31680 }, { "epoch": 2.6115443760984185, "grad_norm": 4.191971302032471, "learning_rate": 2.7472741883188248e-05, "loss": 0.1698, "step": 31700 }, { "epoch": 2.6131920035149383, "grad_norm": 1.4464364051818848, "learning_rate": 2.7444123168578418e-05, "loss": 0.1691, "step": 31720 }, { "epoch": 2.6148396309314585, "grad_norm": 1.0693591833114624, "learning_rate": 2.7415501219793833e-05, "loss": 0.1636, "step": 31740 }, { "epoch": 2.616487258347979, "grad_norm": 0.7950401306152344, "learning_rate": 2.738687607470835e-05, "loss": 0.1763, "step": 31760 }, { "epoch": 2.618134885764499, "grad_norm": 1.1098366975784302, "learning_rate": 2.7358247771200073e-05, "loss": 0.1771, "step": 31780 }, { "epoch": 2.6197825131810193, "grad_norm": 0.9066492319107056, "learning_rate": 2.732961634715126e-05, "loss": 0.1582, "step": 31800 }, { "epoch": 2.6214301405975395, "grad_norm": 1.0293641090393066, "learning_rate": 2.730098184044832e-05, "loss": 0.1752, "step": 31820 }, { "epoch": 2.62307776801406, "grad_norm": 0.5921834111213684, "learning_rate": 2.7272344288981726e-05, "loss": 0.1781, "step": 31840 }, { "epoch": 2.62472539543058, "grad_norm": 3.703381061553955, "learning_rate": 2.7243703730645992e-05, "loss": 0.1809, "step": 31860 }, { "epoch": 2.6263730228471003, "grad_norm": 1.139620065689087, "learning_rate": 2.721506020333961e-05, "loss": 0.166, "step": 31880 }, { "epoch": 2.6280206502636205, "grad_norm": 0.6249837279319763, "learning_rate": 2.7186413744964982e-05, "loss": 0.1528, "step": 31900 }, { "epoch": 2.6296682776801408, "grad_norm": 0.8693757057189941, "learning_rate": 2.715776439342842e-05, "loss": 0.1691, "step": 31920 }, { "epoch": 2.6313159050966606, "grad_norm": 7.358799934387207, "learning_rate": 2.7129112186640027e-05, "loss": 0.1675, "step": 31940 }, { "epoch": 2.6329635325131813, "grad_norm": 1.3295739889144897, "learning_rate": 2.7100457162513715e-05, "loss": 0.1754, "step": 31960 }, { "epoch": 2.634611159929701, "grad_norm": 0.8648635149002075, "learning_rate": 2.7071799358967116e-05, "loss": 0.1611, "step": 31980 }, { "epoch": 2.6362587873462213, "grad_norm": 11.785006523132324, "learning_rate": 2.7043138813921527e-05, "loss": 0.1681, "step": 32000 }, { "epoch": 2.6362587873462213, "eval_loss": 0.5284088253974915, "eval_runtime": 251.5884, "eval_samples_per_second": 86.606, "eval_steps_per_second": 21.654, "eval_wer": 0.22814880340851512, "step": 32000 }, { "epoch": 2.6379064147627416, "grad_norm": 0.9631918668746948, "learning_rate": 2.7014475565301893e-05, "loss": 0.1791, "step": 32020 }, { "epoch": 2.639554042179262, "grad_norm": 1.5670260190963745, "learning_rate": 2.698580965103672e-05, "loss": 0.1808, "step": 32040 }, { "epoch": 2.641201669595782, "grad_norm": 30.275543212890625, "learning_rate": 2.695714110905805e-05, "loss": 0.1803, "step": 32060 }, { "epoch": 2.6428492970123023, "grad_norm": 0.5388301014900208, "learning_rate": 2.6928469977301397e-05, "loss": 0.1691, "step": 32080 }, { "epoch": 2.6444969244288226, "grad_norm": 1.3333371877670288, "learning_rate": 2.6899796293705705e-05, "loss": 0.1516, "step": 32100 }, { "epoch": 2.646144551845343, "grad_norm": 1.3209874629974365, "learning_rate": 2.6871120096213286e-05, "loss": 0.1551, "step": 32120 }, { "epoch": 2.647792179261863, "grad_norm": 1.3346821069717407, "learning_rate": 2.684244142276981e-05, "loss": 0.1624, "step": 32140 }, { "epoch": 2.649439806678383, "grad_norm": 1.642838478088379, "learning_rate": 2.6813760311324176e-05, "loss": 0.1808, "step": 32160 }, { "epoch": 2.6510874340949035, "grad_norm": 0.6234583258628845, "learning_rate": 2.6785076799828546e-05, "loss": 0.1544, "step": 32180 }, { "epoch": 2.6527350615114234, "grad_norm": 1.2087680101394653, "learning_rate": 2.6756390926238235e-05, "loss": 0.1571, "step": 32200 }, { "epoch": 2.6543826889279436, "grad_norm": 0.8298318386077881, "learning_rate": 2.672770272851171e-05, "loss": 0.1613, "step": 32220 }, { "epoch": 2.656030316344464, "grad_norm": 0.6398922204971313, "learning_rate": 2.6699012244610477e-05, "loss": 0.1685, "step": 32240 }, { "epoch": 2.657677943760984, "grad_norm": 0.7772256731987, "learning_rate": 2.6670319512499104e-05, "loss": 0.1572, "step": 32260 }, { "epoch": 2.6593255711775043, "grad_norm": 0.9730490446090698, "learning_rate": 2.6641624570145102e-05, "loss": 0.1633, "step": 32280 }, { "epoch": 2.6609731985940246, "grad_norm": 2.9239614009857178, "learning_rate": 2.6612927455518928e-05, "loss": 0.1694, "step": 32300 }, { "epoch": 2.662620826010545, "grad_norm": 0.7175331711769104, "learning_rate": 2.6584228206593913e-05, "loss": 0.1618, "step": 32320 }, { "epoch": 2.664268453427065, "grad_norm": 0.7444982528686523, "learning_rate": 2.6555526861346203e-05, "loss": 0.181, "step": 32340 }, { "epoch": 2.6659160808435853, "grad_norm": 3.278308153152466, "learning_rate": 2.652682345775472e-05, "loss": 0.1682, "step": 32360 }, { "epoch": 2.6675637082601056, "grad_norm": 1.4923603534698486, "learning_rate": 2.64981180338011e-05, "loss": 0.1698, "step": 32380 }, { "epoch": 2.669211335676626, "grad_norm": 6.6563897132873535, "learning_rate": 2.6469410627469683e-05, "loss": 0.1513, "step": 32400 }, { "epoch": 2.6708589630931456, "grad_norm": 3.57399845123291, "learning_rate": 2.6440701276747404e-05, "loss": 0.1505, "step": 32420 }, { "epoch": 2.6725065905096663, "grad_norm": 0.5343198776245117, "learning_rate": 2.641199001962377e-05, "loss": 0.181, "step": 32440 }, { "epoch": 2.674154217926186, "grad_norm": 0.8804404735565186, "learning_rate": 2.638327689409083e-05, "loss": 0.1773, "step": 32460 }, { "epoch": 2.6758018453427064, "grad_norm": 1.2259153127670288, "learning_rate": 2.63545619381431e-05, "loss": 0.1703, "step": 32480 }, { "epoch": 2.6774494727592266, "grad_norm": 1.8356521129608154, "learning_rate": 2.6325845189777502e-05, "loss": 0.1566, "step": 32500 }, { "epoch": 2.679097100175747, "grad_norm": 1.4201345443725586, "learning_rate": 2.6297126686993357e-05, "loss": 0.1648, "step": 32520 }, { "epoch": 2.680744727592267, "grad_norm": 0.9832906126976013, "learning_rate": 2.626840646779228e-05, "loss": 0.1673, "step": 32540 }, { "epoch": 2.6823923550087874, "grad_norm": 1.2985649108886719, "learning_rate": 2.623968457017817e-05, "loss": 0.1626, "step": 32560 }, { "epoch": 2.6840399824253076, "grad_norm": 1.3062632083892822, "learning_rate": 2.6210961032157145e-05, "loss": 0.1699, "step": 32580 }, { "epoch": 2.685687609841828, "grad_norm": 2.640608787536621, "learning_rate": 2.6182235891737493e-05, "loss": 0.1629, "step": 32600 }, { "epoch": 2.687335237258348, "grad_norm": 1.0804749727249146, "learning_rate": 2.6153509186929635e-05, "loss": 0.1662, "step": 32620 }, { "epoch": 2.6889828646748684, "grad_norm": 1.186482548713684, "learning_rate": 2.612478095574604e-05, "loss": 0.182, "step": 32640 }, { "epoch": 2.6906304920913886, "grad_norm": 1.7019797563552856, "learning_rate": 2.6096051236201217e-05, "loss": 0.1627, "step": 32660 }, { "epoch": 2.6922781195079084, "grad_norm": 1.0984563827514648, "learning_rate": 2.606732006631163e-05, "loss": 0.1804, "step": 32680 }, { "epoch": 2.693925746924429, "grad_norm": 12.436518669128418, "learning_rate": 2.6038587484095673e-05, "loss": 0.174, "step": 32700 }, { "epoch": 2.695573374340949, "grad_norm": 0.690076470375061, "learning_rate": 2.6009853527573585e-05, "loss": 0.1741, "step": 32720 }, { "epoch": 2.697221001757469, "grad_norm": 7.712978363037109, "learning_rate": 2.5981118234767467e-05, "loss": 0.186, "step": 32740 }, { "epoch": 2.6988686291739894, "grad_norm": 5.45370626449585, "learning_rate": 2.5952381643701147e-05, "loss": 0.1873, "step": 32760 }, { "epoch": 2.7005162565905096, "grad_norm": 0.6914022564888, "learning_rate": 2.5923643792400194e-05, "loss": 0.1692, "step": 32780 }, { "epoch": 2.70216388400703, "grad_norm": 2.832401752471924, "learning_rate": 2.5894904718891834e-05, "loss": 0.1604, "step": 32800 }, { "epoch": 2.70381151142355, "grad_norm": 1.0212492942810059, "learning_rate": 2.5867601501626415e-05, "loss": 0.169, "step": 32820 }, { "epoch": 2.7054591388400704, "grad_norm": 2.72938871383667, "learning_rate": 2.583886015419551e-05, "loss": 0.1496, "step": 32840 }, { "epoch": 2.7071067662565906, "grad_norm": 1.3014295101165771, "learning_rate": 2.581011769674676e-05, "loss": 0.1735, "step": 32860 }, { "epoch": 2.708754393673111, "grad_norm": 0.7176096439361572, "learning_rate": 2.5781374167313492e-05, "loss": 0.1582, "step": 32880 }, { "epoch": 2.7104020210896307, "grad_norm": 2.523735523223877, "learning_rate": 2.5752629603930457e-05, "loss": 0.161, "step": 32900 }, { "epoch": 2.7120496485061514, "grad_norm": 4.890264987945557, "learning_rate": 2.572388404463374e-05, "loss": 0.1665, "step": 32920 }, { "epoch": 2.713697275922671, "grad_norm": 1.3927950859069824, "learning_rate": 2.5695137527460795e-05, "loss": 0.178, "step": 32940 }, { "epoch": 2.7153449033391914, "grad_norm": 1.3219293355941772, "learning_rate": 2.566639009045031e-05, "loss": 0.1692, "step": 32960 }, { "epoch": 2.7169925307557117, "grad_norm": 4.5115790367126465, "learning_rate": 2.5637641771642196e-05, "loss": 0.1666, "step": 32980 }, { "epoch": 2.718640158172232, "grad_norm": 2.417607307434082, "learning_rate": 2.560889260907754e-05, "loss": 0.1567, "step": 33000 }, { "epoch": 2.720287785588752, "grad_norm": 6.914716720581055, "learning_rate": 2.558014264079853e-05, "loss": 0.1693, "step": 33020 }, { "epoch": 2.7219354130052724, "grad_norm": 5.992996692657471, "learning_rate": 2.5551391904848448e-05, "loss": 0.1729, "step": 33040 }, { "epoch": 2.7235830404217927, "grad_norm": 0.9340835213661194, "learning_rate": 2.552264043927155e-05, "loss": 0.1823, "step": 33060 }, { "epoch": 2.725230667838313, "grad_norm": 0.47994711995124817, "learning_rate": 2.5493888282113092e-05, "loss": 0.1607, "step": 33080 }, { "epoch": 2.726878295254833, "grad_norm": 2.1940431594848633, "learning_rate": 2.5465135471419236e-05, "loss": 0.1561, "step": 33100 }, { "epoch": 2.7285259226713534, "grad_norm": 0.7191601991653442, "learning_rate": 2.5436382045237008e-05, "loss": 0.1865, "step": 33120 }, { "epoch": 2.7301735500878737, "grad_norm": 1.24649977684021, "learning_rate": 2.5407628041614234e-05, "loss": 0.1733, "step": 33140 }, { "epoch": 2.7318211775043935, "grad_norm": 1.3951886892318726, "learning_rate": 2.5378873498599535e-05, "loss": 0.1781, "step": 33160 }, { "epoch": 2.733468804920914, "grad_norm": 0.28106406331062317, "learning_rate": 2.5350118454242217e-05, "loss": 0.1859, "step": 33180 }, { "epoch": 2.735116432337434, "grad_norm": 1.036487102508545, "learning_rate": 2.532136294659226e-05, "loss": 0.1637, "step": 33200 }, { "epoch": 2.736764059753954, "grad_norm": 0.9746518135070801, "learning_rate": 2.5292607013700258e-05, "loss": 0.1625, "step": 33220 }, { "epoch": 2.7384116871704745, "grad_norm": 31.79758644104004, "learning_rate": 2.526385069361738e-05, "loss": 0.1615, "step": 33240 }, { "epoch": 2.7400593145869947, "grad_norm": 0.9092070460319519, "learning_rate": 2.5235094024395273e-05, "loss": 0.1646, "step": 33260 }, { "epoch": 2.741706942003515, "grad_norm": 0.34284865856170654, "learning_rate": 2.520633704408608e-05, "loss": 0.1563, "step": 33280 }, { "epoch": 2.743354569420035, "grad_norm": 0.9926866888999939, "learning_rate": 2.517757979074235e-05, "loss": 0.1501, "step": 33300 }, { "epoch": 2.7450021968365554, "grad_norm": 1.1938321590423584, "learning_rate": 2.514882230241697e-05, "loss": 0.1544, "step": 33320 }, { "epoch": 2.7466498242530757, "grad_norm": 0.7486788630485535, "learning_rate": 2.5120064617163162e-05, "loss": 0.1733, "step": 33340 }, { "epoch": 2.748297451669596, "grad_norm": 0.8478560447692871, "learning_rate": 2.5091306773034405e-05, "loss": 0.1635, "step": 33360 }, { "epoch": 2.7499450790861157, "grad_norm": 0.4918525516986847, "learning_rate": 2.5062548808084374e-05, "loss": 0.1574, "step": 33380 }, { "epoch": 2.7515927065026364, "grad_norm": 2.3312413692474365, "learning_rate": 2.5033790760366925e-05, "loss": 0.1493, "step": 33400 }, { "epoch": 2.7532403339191562, "grad_norm": 1.4235730171203613, "learning_rate": 2.5005032667936006e-05, "loss": 0.1566, "step": 33420 }, { "epoch": 2.7548879613356765, "grad_norm": 1.1899724006652832, "learning_rate": 2.4976274568845632e-05, "loss": 0.1996, "step": 33440 }, { "epoch": 2.7565355887521967, "grad_norm": 1.3901513814926147, "learning_rate": 2.494751650114983e-05, "loss": 0.1829, "step": 33460 }, { "epoch": 2.758183216168717, "grad_norm": 3.0261080265045166, "learning_rate": 2.491875850290257e-05, "loss": 0.1772, "step": 33480 }, { "epoch": 2.7598308435852372, "grad_norm": 1.2606502771377563, "learning_rate": 2.489000061215775e-05, "loss": 0.161, "step": 33500 }, { "epoch": 2.7614784710017575, "grad_norm": 0.659880518913269, "learning_rate": 2.486124286696911e-05, "loss": 0.164, "step": 33520 }, { "epoch": 2.7631260984182777, "grad_norm": 0.6361564993858337, "learning_rate": 2.4832485305390213e-05, "loss": 0.1718, "step": 33540 }, { "epoch": 2.764773725834798, "grad_norm": 0.8560569286346436, "learning_rate": 2.4803727965474376e-05, "loss": 0.1758, "step": 33560 }, { "epoch": 2.766421353251318, "grad_norm": 0.9023476839065552, "learning_rate": 2.4774970885274598e-05, "loss": 0.1525, "step": 33580 }, { "epoch": 2.7680689806678385, "grad_norm": 1.1586657762527466, "learning_rate": 2.474621410284356e-05, "loss": 0.1567, "step": 33600 }, { "epoch": 2.7697166080843587, "grad_norm": 2.3108408451080322, "learning_rate": 2.4717457656233545e-05, "loss": 0.1645, "step": 33620 }, { "epoch": 2.7713642355008785, "grad_norm": 0.7011807560920715, "learning_rate": 2.4688701583496392e-05, "loss": 0.1519, "step": 33640 }, { "epoch": 2.773011862917399, "grad_norm": 2.491318702697754, "learning_rate": 2.4659945922683446e-05, "loss": 0.1806, "step": 33660 }, { "epoch": 2.774659490333919, "grad_norm": 1.146855354309082, "learning_rate": 2.4631190711845484e-05, "loss": 0.1637, "step": 33680 }, { "epoch": 2.7763071177504393, "grad_norm": 1.3814749717712402, "learning_rate": 2.4602435989032718e-05, "loss": 0.159, "step": 33700 }, { "epoch": 2.7779547451669595, "grad_norm": 2.1869618892669678, "learning_rate": 2.4573681792294706e-05, "loss": 0.1643, "step": 33720 }, { "epoch": 2.7796023725834798, "grad_norm": 0.6119910478591919, "learning_rate": 2.4544928159680303e-05, "loss": 0.1843, "step": 33740 }, { "epoch": 2.78125, "grad_norm": 2.1698923110961914, "learning_rate": 2.4516175129237627e-05, "loss": 0.1701, "step": 33760 }, { "epoch": 2.7828976274165202, "grad_norm": 1.134415626525879, "learning_rate": 2.448742273901399e-05, "loss": 0.1674, "step": 33780 }, { "epoch": 2.7845452548330405, "grad_norm": 2.005388021469116, "learning_rate": 2.445867102705585e-05, "loss": 0.1645, "step": 33800 }, { "epoch": 2.7861928822495607, "grad_norm": 1.6660879850387573, "learning_rate": 2.442992003140879e-05, "loss": 0.1693, "step": 33820 }, { "epoch": 2.787840509666081, "grad_norm": 1.1064176559448242, "learning_rate": 2.4401169790117427e-05, "loss": 0.175, "step": 33840 }, { "epoch": 2.789488137082601, "grad_norm": 3.4418516159057617, "learning_rate": 2.4372420341225395e-05, "loss": 0.1818, "step": 33860 }, { "epoch": 2.7911357644991215, "grad_norm": 1.3131994009017944, "learning_rate": 2.434367172277526e-05, "loss": 0.1754, "step": 33880 }, { "epoch": 2.7927833919156413, "grad_norm": 2.1762378215789795, "learning_rate": 2.43149239728085e-05, "loss": 0.1644, "step": 33900 }, { "epoch": 2.7944310193321615, "grad_norm": 1.5846805572509766, "learning_rate": 2.4286177129365446e-05, "loss": 0.1762, "step": 33920 }, { "epoch": 2.796078646748682, "grad_norm": 0.6951166987419128, "learning_rate": 2.4257431230485228e-05, "loss": 0.1704, "step": 33940 }, { "epoch": 2.797726274165202, "grad_norm": 0.9165875911712646, "learning_rate": 2.4228686314205736e-05, "loss": 0.1813, "step": 33960 }, { "epoch": 2.7993739015817223, "grad_norm": 1.009450912475586, "learning_rate": 2.419994241856352e-05, "loss": 0.1599, "step": 33980 }, { "epoch": 2.8010215289982425, "grad_norm": 0.898774266242981, "learning_rate": 2.4171199581593825e-05, "loss": 0.1596, "step": 34000 }, { "epoch": 2.8010215289982425, "eval_loss": 0.5283127427101135, "eval_runtime": 250.9148, "eval_samples_per_second": 86.838, "eval_steps_per_second": 21.713, "eval_wer": 0.22828317325592137, "step": 34000 }, { "epoch": 2.8026691564147628, "grad_norm": 23.361583709716797, "learning_rate": 2.4142457841330478e-05, "loss": 0.1676, "step": 34020 }, { "epoch": 2.804316783831283, "grad_norm": 6.748179912567139, "learning_rate": 2.4113717235805854e-05, "loss": 0.1755, "step": 34040 }, { "epoch": 2.8059644112478033, "grad_norm": 1.5261645317077637, "learning_rate": 2.408497780305083e-05, "loss": 0.1895, "step": 34060 }, { "epoch": 2.8076120386643235, "grad_norm": 0.5314382314682007, "learning_rate": 2.405623958109472e-05, "loss": 0.1584, "step": 34080 }, { "epoch": 2.8092596660808438, "grad_norm": 1.132767915725708, "learning_rate": 2.402750260796525e-05, "loss": 0.1689, "step": 34100 }, { "epoch": 2.8109072934973636, "grad_norm": 1.2487684488296509, "learning_rate": 2.3998766921688485e-05, "loss": 0.1749, "step": 34120 }, { "epoch": 2.8125549209138843, "grad_norm": 0.5913691520690918, "learning_rate": 2.3970032560288788e-05, "loss": 0.179, "step": 34140 }, { "epoch": 2.814202548330404, "grad_norm": 1.5123908519744873, "learning_rate": 2.3941299561788783e-05, "loss": 0.1617, "step": 34160 }, { "epoch": 2.8158501757469243, "grad_norm": 0.2831098735332489, "learning_rate": 2.3912567964209264e-05, "loss": 0.1548, "step": 34180 }, { "epoch": 2.8174978031634446, "grad_norm": 4.424227714538574, "learning_rate": 2.3883837805569188e-05, "loss": 0.1512, "step": 34200 }, { "epoch": 2.819145430579965, "grad_norm": 1.4950264692306519, "learning_rate": 2.3855109123885612e-05, "loss": 0.1693, "step": 34220 }, { "epoch": 2.820793057996485, "grad_norm": 1.211940884590149, "learning_rate": 2.3826381957173638e-05, "loss": 0.1671, "step": 34240 }, { "epoch": 2.8224406854130053, "grad_norm": 1.2161052227020264, "learning_rate": 2.3797656343446338e-05, "loss": 0.168, "step": 34260 }, { "epoch": 2.8240883128295255, "grad_norm": 0.32065290212631226, "learning_rate": 2.3768932320714758e-05, "loss": 0.1751, "step": 34280 }, { "epoch": 2.825735940246046, "grad_norm": 2.1875534057617188, "learning_rate": 2.3740209926987828e-05, "loss": 0.1666, "step": 34300 }, { "epoch": 2.827383567662566, "grad_norm": 1.0044718980789185, "learning_rate": 2.3711489200272326e-05, "loss": 0.1624, "step": 34320 }, { "epoch": 2.829031195079086, "grad_norm": 2.3182806968688965, "learning_rate": 2.368277017857283e-05, "loss": 0.1698, "step": 34340 }, { "epoch": 2.8306788224956065, "grad_norm": 1.2233198881149292, "learning_rate": 2.365405289989163e-05, "loss": 0.1624, "step": 34360 }, { "epoch": 2.8323264499121263, "grad_norm": 0.30370575189590454, "learning_rate": 2.3625337402228754e-05, "loss": 0.1532, "step": 34380 }, { "epoch": 2.8339740773286466, "grad_norm": 2.319612979888916, "learning_rate": 2.3596623723581842e-05, "loss": 0.155, "step": 34400 }, { "epoch": 2.835621704745167, "grad_norm": 3.8146913051605225, "learning_rate": 2.3567911901946143e-05, "loss": 0.173, "step": 34420 }, { "epoch": 2.837269332161687, "grad_norm": 3.995103120803833, "learning_rate": 2.3539201975314447e-05, "loss": 0.1643, "step": 34440 }, { "epoch": 2.8389169595782073, "grad_norm": 1.1264597177505493, "learning_rate": 2.351049398167702e-05, "loss": 0.1811, "step": 34460 }, { "epoch": 2.8405645869947276, "grad_norm": 1.4618791341781616, "learning_rate": 2.3481787959021586e-05, "loss": 0.1668, "step": 34480 }, { "epoch": 2.842212214411248, "grad_norm": 1.8342463970184326, "learning_rate": 2.3453083945333266e-05, "loss": 0.1635, "step": 34500 }, { "epoch": 2.843859841827768, "grad_norm": 1.134645700454712, "learning_rate": 2.3424381978594505e-05, "loss": 0.1748, "step": 34520 }, { "epoch": 2.8455074692442883, "grad_norm": 0.6016003489494324, "learning_rate": 2.3395682096785065e-05, "loss": 0.1651, "step": 34540 }, { "epoch": 2.8471550966608086, "grad_norm": 0.5989654064178467, "learning_rate": 2.3366984337881908e-05, "loss": 0.1748, "step": 34560 }, { "epoch": 2.848802724077329, "grad_norm": 0.23500819504261017, "learning_rate": 2.333828873985923e-05, "loss": 0.1656, "step": 34580 }, { "epoch": 2.8504503514938486, "grad_norm": 3.5726726055145264, "learning_rate": 2.3309595340688338e-05, "loss": 0.1519, "step": 34600 }, { "epoch": 2.8520979789103693, "grad_norm": 0.5327156782150269, "learning_rate": 2.3280904178337648e-05, "loss": 0.155, "step": 34620 }, { "epoch": 2.853745606326889, "grad_norm": 0.4773898422718048, "learning_rate": 2.3252215290772616e-05, "loss": 0.182, "step": 34640 }, { "epoch": 2.8553932337434094, "grad_norm": 0.9713151454925537, "learning_rate": 2.3223528715955665e-05, "loss": 0.1806, "step": 34660 }, { "epoch": 2.8570408611599296, "grad_norm": 0.49397534132003784, "learning_rate": 2.3194844491846178e-05, "loss": 0.1587, "step": 34680 }, { "epoch": 2.85868848857645, "grad_norm": 5.4063029289245605, "learning_rate": 2.3166162656400418e-05, "loss": 0.1492, "step": 34700 }, { "epoch": 2.86033611599297, "grad_norm": 1.0393853187561035, "learning_rate": 2.3137483247571505e-05, "loss": 0.1624, "step": 34720 }, { "epoch": 2.8619837434094904, "grad_norm": 1.0829802751541138, "learning_rate": 2.3108806303309337e-05, "loss": 0.173, "step": 34740 }, { "epoch": 2.8636313708260106, "grad_norm": 1.1949620246887207, "learning_rate": 2.3081565523627518e-05, "loss": 0.1661, "step": 34760 }, { "epoch": 2.865278998242531, "grad_norm": 0.9706825613975525, "learning_rate": 2.305289349441147e-05, "loss": 0.153, "step": 34780 }, { "epoch": 2.866926625659051, "grad_norm": 1.9239734411239624, "learning_rate": 2.302422404169516e-05, "loss": 0.1571, "step": 34800 }, { "epoch": 2.868574253075571, "grad_norm": 6.778972148895264, "learning_rate": 2.2995557203415314e-05, "loss": 0.1624, "step": 34820 }, { "epoch": 2.8702218804920916, "grad_norm": 0.6842126250267029, "learning_rate": 2.2966893017505192e-05, "loss": 0.1684, "step": 34840 }, { "epoch": 2.8718695079086114, "grad_norm": 1.1653012037277222, "learning_rate": 2.2938231521894544e-05, "loss": 0.1662, "step": 34860 }, { "epoch": 2.8735171353251316, "grad_norm": 1.9263895750045776, "learning_rate": 2.2909572754509558e-05, "loss": 0.1609, "step": 34880 }, { "epoch": 2.875164762741652, "grad_norm": 3.092329978942871, "learning_rate": 2.2880916753272824e-05, "loss": 0.1626, "step": 34900 }, { "epoch": 2.876812390158172, "grad_norm": 0.6692825555801392, "learning_rate": 2.2852263556103263e-05, "loss": 0.1648, "step": 34920 }, { "epoch": 2.8784600175746924, "grad_norm": 1.1220498085021973, "learning_rate": 2.2823613200916095e-05, "loss": 0.1957, "step": 34940 }, { "epoch": 2.8801076449912126, "grad_norm": 1.7044694423675537, "learning_rate": 2.279496572562275e-05, "loss": 0.1757, "step": 34960 }, { "epoch": 2.881755272407733, "grad_norm": 0.3736783266067505, "learning_rate": 2.2766321168130876e-05, "loss": 0.1637, "step": 34980 }, { "epoch": 2.883402899824253, "grad_norm": 3.305779457092285, "learning_rate": 2.273767956634426e-05, "loss": 0.1617, "step": 35000 }, { "epoch": 2.8850505272407734, "grad_norm": 0.9354811310768127, "learning_rate": 2.2709040958162764e-05, "loss": 0.1711, "step": 35020 }, { "epoch": 2.8866981546572936, "grad_norm": 0.9158414006233215, "learning_rate": 2.2680405381482305e-05, "loss": 0.1756, "step": 35040 }, { "epoch": 2.888345782073814, "grad_norm": 1.0217347145080566, "learning_rate": 2.265177287419476e-05, "loss": 0.1711, "step": 35060 }, { "epoch": 2.8899934094903337, "grad_norm": 0.9840256571769714, "learning_rate": 2.262314347418797e-05, "loss": 0.1628, "step": 35080 }, { "epoch": 2.8916410369068544, "grad_norm": 4.251138210296631, "learning_rate": 2.259451721934566e-05, "loss": 0.1616, "step": 35100 }, { "epoch": 2.893288664323374, "grad_norm": 6.069693565368652, "learning_rate": 2.2565894147547397e-05, "loss": 0.1628, "step": 35120 }, { "epoch": 2.8949362917398944, "grad_norm": 0.44294020533561707, "learning_rate": 2.2537274296668513e-05, "loss": 0.1791, "step": 35140 }, { "epoch": 2.8965839191564147, "grad_norm": 1.3097333908081055, "learning_rate": 2.2508657704580104e-05, "loss": 0.1795, "step": 35160 }, { "epoch": 2.898231546572935, "grad_norm": 0.8062418103218079, "learning_rate": 2.2480044409148947e-05, "loss": 0.1886, "step": 35180 }, { "epoch": 2.899879173989455, "grad_norm": 1.6019326448440552, "learning_rate": 2.2451434448237448e-05, "loss": 0.1498, "step": 35200 }, { "epoch": 2.9015268014059754, "grad_norm": 1.6042932271957397, "learning_rate": 2.242282785970361e-05, "loss": 0.1691, "step": 35220 }, { "epoch": 2.9031744288224957, "grad_norm": 0.9575341939926147, "learning_rate": 2.2394224681400962e-05, "loss": 0.1647, "step": 35240 }, { "epoch": 2.904822056239016, "grad_norm": 0.8834397792816162, "learning_rate": 2.2365624951178535e-05, "loss": 0.1652, "step": 35260 }, { "epoch": 2.906469683655536, "grad_norm": 0.6135185360908508, "learning_rate": 2.2337028706880792e-05, "loss": 0.1763, "step": 35280 }, { "epoch": 2.9081173110720564, "grad_norm": 1.6442698240280151, "learning_rate": 2.2308435986347577e-05, "loss": 0.1683, "step": 35300 }, { "epoch": 2.9097649384885766, "grad_norm": 1.1094133853912354, "learning_rate": 2.2279846827414088e-05, "loss": 0.1597, "step": 35320 }, { "epoch": 2.9114125659050965, "grad_norm": 0.7289668917655945, "learning_rate": 2.2251261267910777e-05, "loss": 0.1767, "step": 35340 }, { "epoch": 2.913060193321617, "grad_norm": 0.8773306012153625, "learning_rate": 2.222267934566337e-05, "loss": 0.1597, "step": 35360 }, { "epoch": 2.914707820738137, "grad_norm": 0.398133248090744, "learning_rate": 2.2194101098492758e-05, "loss": 0.1705, "step": 35380 }, { "epoch": 2.916355448154657, "grad_norm": 1.7705035209655762, "learning_rate": 2.216552656421498e-05, "loss": 0.1636, "step": 35400 }, { "epoch": 2.9180030755711774, "grad_norm": 0.6762025952339172, "learning_rate": 2.2136955780641165e-05, "loss": 0.1661, "step": 35420 }, { "epoch": 2.9196507029876977, "grad_norm": 0.7774632573127747, "learning_rate": 2.2108388785577448e-05, "loss": 0.1737, "step": 35440 }, { "epoch": 2.921298330404218, "grad_norm": 0.9466442465782166, "learning_rate": 2.2079825616824988e-05, "loss": 0.1779, "step": 35460 }, { "epoch": 2.922945957820738, "grad_norm": 0.8889173269271851, "learning_rate": 2.2051266312179865e-05, "loss": 0.1699, "step": 35480 }, { "epoch": 2.9245935852372584, "grad_norm": 1.440914273262024, "learning_rate": 2.202271090943305e-05, "loss": 0.16, "step": 35500 }, { "epoch": 2.9262412126537787, "grad_norm": 0.5880303978919983, "learning_rate": 2.1994159446370356e-05, "loss": 0.1718, "step": 35520 }, { "epoch": 2.927888840070299, "grad_norm": 1.3823106288909912, "learning_rate": 2.1965611960772354e-05, "loss": 0.1811, "step": 35540 }, { "epoch": 2.9295364674868187, "grad_norm": 1.034883975982666, "learning_rate": 2.193706849041439e-05, "loss": 0.1904, "step": 35560 }, { "epoch": 2.9311840949033394, "grad_norm": 1.2182835340499878, "learning_rate": 2.1908529073066476e-05, "loss": 0.1596, "step": 35580 }, { "epoch": 2.9328317223198592, "grad_norm": 1.7504252195358276, "learning_rate": 2.1879993746493262e-05, "loss": 0.1579, "step": 35600 }, { "epoch": 2.9344793497363795, "grad_norm": 0.9394094347953796, "learning_rate": 2.1851462548454e-05, "loss": 0.1782, "step": 35620 }, { "epoch": 2.9361269771528997, "grad_norm": 0.9647118449211121, "learning_rate": 2.182293551670245e-05, "loss": 0.1822, "step": 35640 }, { "epoch": 2.93777460456942, "grad_norm": 1.628814935684204, "learning_rate": 2.179441268898688e-05, "loss": 0.174, "step": 35660 }, { "epoch": 2.93942223198594, "grad_norm": 0.8225361704826355, "learning_rate": 2.1765894103049992e-05, "loss": 0.162, "step": 35680 }, { "epoch": 2.9410698594024605, "grad_norm": 1.2492133378982544, "learning_rate": 2.1737379796628882e-05, "loss": 0.1508, "step": 35700 }, { "epoch": 2.9427174868189807, "grad_norm": 0.734495222568512, "learning_rate": 2.1708869807454974e-05, "loss": 0.1672, "step": 35720 }, { "epoch": 2.944365114235501, "grad_norm": 0.8144159317016602, "learning_rate": 2.1680364173253964e-05, "loss": 0.1717, "step": 35740 }, { "epoch": 2.946012741652021, "grad_norm": 0.5943552255630493, "learning_rate": 2.1651862931745815e-05, "loss": 0.1584, "step": 35760 }, { "epoch": 2.9476603690685415, "grad_norm": 0.5486462116241455, "learning_rate": 2.162336612064466e-05, "loss": 0.1537, "step": 35780 }, { "epoch": 2.9493079964850617, "grad_norm": 1.7498178482055664, "learning_rate": 2.1594873777658772e-05, "loss": 0.1583, "step": 35800 }, { "epoch": 2.9509556239015815, "grad_norm": 0.6559889316558838, "learning_rate": 2.156638594049052e-05, "loss": 0.1789, "step": 35820 }, { "epoch": 2.952603251318102, "grad_norm": 1.0158253908157349, "learning_rate": 2.1537902646836298e-05, "loss": 0.1746, "step": 35840 }, { "epoch": 2.954250878734622, "grad_norm": 2.991706132888794, "learning_rate": 2.150942393438649e-05, "loss": 0.1604, "step": 35860 }, { "epoch": 2.9558985061511422, "grad_norm": 1.1813044548034668, "learning_rate": 2.148094984082543e-05, "loss": 0.1537, "step": 35880 }, { "epoch": 2.9575461335676625, "grad_norm": 1.1172895431518555, "learning_rate": 2.1452480403831323e-05, "loss": 0.1494, "step": 35900 }, { "epoch": 2.9591937609841827, "grad_norm": 5.81532096862793, "learning_rate": 2.142401566107624e-05, "loss": 0.1671, "step": 35920 }, { "epoch": 2.960841388400703, "grad_norm": 9.261301040649414, "learning_rate": 2.1395555650225993e-05, "loss": 0.1748, "step": 35940 }, { "epoch": 2.9624890158172232, "grad_norm": 0.48542654514312744, "learning_rate": 2.1367100408940176e-05, "loss": 0.1812, "step": 35960 }, { "epoch": 2.9641366432337435, "grad_norm": 0.28661516308784485, "learning_rate": 2.133864997487205e-05, "loss": 0.157, "step": 35980 }, { "epoch": 2.9657842706502637, "grad_norm": 0.7356573343276978, "learning_rate": 2.1310204385668527e-05, "loss": 0.148, "step": 36000 }, { "epoch": 2.9657842706502637, "eval_loss": 0.5161551833152771, "eval_runtime": 250.2579, "eval_samples_per_second": 87.066, "eval_steps_per_second": 21.77, "eval_wer": 0.22239329494461443, "step": 36000 }, { "epoch": 2.967486818980668, "grad_norm": 1.018925428390503, "learning_rate": 2.1281763678970113e-05, "loss": 0.1475, "step": 36020 }, { "epoch": 2.969134446397188, "grad_norm": 0.5352170467376709, "learning_rate": 2.1254749564304577e-05, "loss": 0.1641, "step": 36040 }, { "epoch": 2.9707820738137083, "grad_norm": 1.6044977903366089, "learning_rate": 2.122631848673005e-05, "loss": 0.1644, "step": 36060 }, { "epoch": 2.9724297012302285, "grad_norm": 0.3798460066318512, "learning_rate": 2.1197892402662252e-05, "loss": 0.1449, "step": 36080 }, { "epoch": 2.974077328646749, "grad_norm": 1.5500409603118896, "learning_rate": 2.1169471349715874e-05, "loss": 0.14, "step": 36100 }, { "epoch": 2.975724956063269, "grad_norm": 1.157693862915039, "learning_rate": 2.114105536549893e-05, "loss": 0.1439, "step": 36120 }, { "epoch": 2.977372583479789, "grad_norm": 0.46837174892425537, "learning_rate": 2.1112644487612743e-05, "loss": 0.1555, "step": 36140 }, { "epoch": 2.9790202108963095, "grad_norm": 4.228370189666748, "learning_rate": 2.108423875365188e-05, "loss": 0.1484, "step": 36160 }, { "epoch": 2.9806678383128293, "grad_norm": 0.4278930425643921, "learning_rate": 2.10558382012041e-05, "loss": 0.1428, "step": 36180 }, { "epoch": 2.9823154657293496, "grad_norm": 1.5068235397338867, "learning_rate": 2.102744286785031e-05, "loss": 0.1393, "step": 36200 }, { "epoch": 2.98396309314587, "grad_norm": 0.6257343888282776, "learning_rate": 2.0999052791164483e-05, "loss": 0.1385, "step": 36220 }, { "epoch": 2.98561072056239, "grad_norm": 0.6275610327720642, "learning_rate": 2.0970668008713677e-05, "loss": 0.162, "step": 36240 }, { "epoch": 2.9872583479789103, "grad_norm": 2.056185722351074, "learning_rate": 2.0942288558057915e-05, "loss": 0.1513, "step": 36260 }, { "epoch": 2.9889059753954306, "grad_norm": 0.30075156688690186, "learning_rate": 2.091391447675018e-05, "loss": 0.1463, "step": 36280 }, { "epoch": 2.990553602811951, "grad_norm": 3.691248655319214, "learning_rate": 2.0885545802336355e-05, "loss": 0.154, "step": 36300 }, { "epoch": 2.992201230228471, "grad_norm": 1.2786400318145752, "learning_rate": 2.0857182572355133e-05, "loss": 0.1622, "step": 36320 }, { "epoch": 2.9938488576449913, "grad_norm": 1.1107524633407593, "learning_rate": 2.082882482433804e-05, "loss": 0.1603, "step": 36340 }, { "epoch": 2.9954964850615116, "grad_norm": 0.4886188507080078, "learning_rate": 2.080047259580934e-05, "loss": 0.1627, "step": 36360 }, { "epoch": 2.997144112478032, "grad_norm": 0.2950489819049835, "learning_rate": 2.0772125924285983e-05, "loss": 0.1525, "step": 36380 }, { "epoch": 2.9987917398945516, "grad_norm": 5.7747111320495605, "learning_rate": 2.0743784847277586e-05, "loss": 0.1378, "step": 36400 }, { "epoch": 3.000439367311072, "grad_norm": 3.0797595977783203, "learning_rate": 2.0715449402286327e-05, "loss": 0.153, "step": 36420 }, { "epoch": 3.002086994727592, "grad_norm": 3.7219903469085693, "learning_rate": 2.0687119626806965e-05, "loss": 0.1586, "step": 36440 }, { "epoch": 3.0037346221441124, "grad_norm": 0.7922471165657043, "learning_rate": 2.0658795558326743e-05, "loss": 0.1308, "step": 36460 }, { "epoch": 3.0053822495606326, "grad_norm": 0.8190672397613525, "learning_rate": 2.063047723432536e-05, "loss": 0.1516, "step": 36480 }, { "epoch": 3.007029876977153, "grad_norm": 0.46464812755584717, "learning_rate": 2.0602164692274904e-05, "loss": 0.1593, "step": 36500 }, { "epoch": 3.008677504393673, "grad_norm": 1.922914981842041, "learning_rate": 2.0573857969639813e-05, "loss": 0.1499, "step": 36520 }, { "epoch": 3.0103251318101933, "grad_norm": 1.359255313873291, "learning_rate": 2.0545557103876832e-05, "loss": 0.1452, "step": 36540 }, { "epoch": 3.0119727592267136, "grad_norm": 1.6646554470062256, "learning_rate": 2.0517262132434947e-05, "loss": 0.134, "step": 36560 }, { "epoch": 3.013620386643234, "grad_norm": 0.5390244722366333, "learning_rate": 2.0488973092755355e-05, "loss": 0.1459, "step": 36580 }, { "epoch": 3.015268014059754, "grad_norm": 0.6744359731674194, "learning_rate": 2.0460690022271405e-05, "loss": 0.1543, "step": 36600 }, { "epoch": 3.0169156414762743, "grad_norm": 0.5361171960830688, "learning_rate": 2.0432412958408524e-05, "loss": 0.1612, "step": 36620 }, { "epoch": 3.0185632688927946, "grad_norm": 1.7773494720458984, "learning_rate": 2.040414193858421e-05, "loss": 0.1637, "step": 36640 }, { "epoch": 3.0202108963093144, "grad_norm": 0.7252753376960754, "learning_rate": 2.0375877000207963e-05, "loss": 0.1275, "step": 36660 }, { "epoch": 3.0218585237258346, "grad_norm": 0.7316433787345886, "learning_rate": 2.0347618180681232e-05, "loss": 0.1707, "step": 36680 }, { "epoch": 3.023506151142355, "grad_norm": 0.9963041543960571, "learning_rate": 2.0319365517397385e-05, "loss": 0.1606, "step": 36700 }, { "epoch": 3.025153778558875, "grad_norm": 1.5264005661010742, "learning_rate": 2.02911190477416e-05, "loss": 0.1484, "step": 36720 }, { "epoch": 3.0268014059753954, "grad_norm": 4.306487560272217, "learning_rate": 2.026287880909091e-05, "loss": 0.1769, "step": 36740 }, { "epoch": 3.0284490333919156, "grad_norm": 0.7492676377296448, "learning_rate": 2.023464483881407e-05, "loss": 0.1336, "step": 36760 }, { "epoch": 3.030096660808436, "grad_norm": 3.7590513229370117, "learning_rate": 2.0206417174271557e-05, "loss": 0.1541, "step": 36780 }, { "epoch": 3.031744288224956, "grad_norm": 0.990483283996582, "learning_rate": 2.0178195852815508e-05, "loss": 0.1557, "step": 36800 }, { "epoch": 3.0333919156414764, "grad_norm": 2.186178207397461, "learning_rate": 2.014998091178964e-05, "loss": 0.1451, "step": 36820 }, { "epoch": 3.0350395430579966, "grad_norm": 3.0323667526245117, "learning_rate": 2.0121772388529247e-05, "loss": 0.1503, "step": 36840 }, { "epoch": 3.036687170474517, "grad_norm": 0.8073984980583191, "learning_rate": 2.009357032036113e-05, "loss": 0.1241, "step": 36860 }, { "epoch": 3.038334797891037, "grad_norm": 0.6493812799453735, "learning_rate": 2.0065374744603555e-05, "loss": 0.1553, "step": 36880 }, { "epoch": 3.039982425307557, "grad_norm": 0.4456873834133148, "learning_rate": 2.0037185698566175e-05, "loss": 0.1546, "step": 36900 }, { "epoch": 3.041630052724077, "grad_norm": 0.3840373456478119, "learning_rate": 2.0009003219550018e-05, "loss": 0.1624, "step": 36920 }, { "epoch": 3.0432776801405974, "grad_norm": 1.3345998525619507, "learning_rate": 1.9980827344847425e-05, "loss": 0.1505, "step": 36940 }, { "epoch": 3.0449253075571177, "grad_norm": 1.5938456058502197, "learning_rate": 1.9952658111741994e-05, "loss": 0.1283, "step": 36960 }, { "epoch": 3.046572934973638, "grad_norm": 0.8933133482933044, "learning_rate": 1.992449555750854e-05, "loss": 0.1597, "step": 36980 }, { "epoch": 3.048220562390158, "grad_norm": 0.4933435022830963, "learning_rate": 1.989633971941301e-05, "loss": 0.1621, "step": 37000 }, { "epoch": 3.0498681898066784, "grad_norm": 0.9858216643333435, "learning_rate": 1.9868190634712504e-05, "loss": 0.1589, "step": 37020 }, { "epoch": 3.0515158172231986, "grad_norm": 3.218104124069214, "learning_rate": 1.984004834065516e-05, "loss": 0.1659, "step": 37040 }, { "epoch": 3.053163444639719, "grad_norm": 0.7501934766769409, "learning_rate": 1.9811912874480154e-05, "loss": 0.1294, "step": 37060 }, { "epoch": 3.054811072056239, "grad_norm": 0.9431729912757874, "learning_rate": 1.9783784273417612e-05, "loss": 0.1668, "step": 37080 }, { "epoch": 3.0564586994727594, "grad_norm": 1.0041712522506714, "learning_rate": 1.9755662574688555e-05, "loss": 0.1619, "step": 37100 }, { "epoch": 3.0581063268892796, "grad_norm": 1.4895607233047485, "learning_rate": 1.9727547815504903e-05, "loss": 0.1646, "step": 37120 }, { "epoch": 3.0597539543057994, "grad_norm": 2.0483157634735107, "learning_rate": 1.969944003306939e-05, "loss": 0.1601, "step": 37140 }, { "epoch": 3.0614015817223197, "grad_norm": 1.0434845685958862, "learning_rate": 1.96713392645755e-05, "loss": 0.1326, "step": 37160 }, { "epoch": 3.06304920913884, "grad_norm": 0.5273759961128235, "learning_rate": 1.9643245547207453e-05, "loss": 0.1556, "step": 37180 }, { "epoch": 3.06469683655536, "grad_norm": 0.9159584045410156, "learning_rate": 1.9615158918140126e-05, "loss": 0.1678, "step": 37200 }, { "epoch": 3.0663444639718804, "grad_norm": 0.8162606358528137, "learning_rate": 1.9587079414539022e-05, "loss": 0.1401, "step": 37220 }, { "epoch": 3.0679920913884007, "grad_norm": 16.133909225463867, "learning_rate": 1.9559007073560217e-05, "loss": 0.1578, "step": 37240 }, { "epoch": 3.069639718804921, "grad_norm": 0.6620104312896729, "learning_rate": 1.953094193235031e-05, "loss": 0.1345, "step": 37260 }, { "epoch": 3.071287346221441, "grad_norm": 0.8160563707351685, "learning_rate": 1.950288402804637e-05, "loss": 0.1483, "step": 37280 }, { "epoch": 3.0729349736379614, "grad_norm": 0.6311087012290955, "learning_rate": 1.9474833397775887e-05, "loss": 0.1596, "step": 37300 }, { "epoch": 3.0745826010544817, "grad_norm": 1.5575555562973022, "learning_rate": 1.944679007865672e-05, "loss": 0.1478, "step": 37320 }, { "epoch": 3.076230228471002, "grad_norm": 1.3235657215118408, "learning_rate": 1.9420155731246147e-05, "loss": 0.1656, "step": 37340 }, { "epoch": 3.077877855887522, "grad_norm": 2.973172903060913, "learning_rate": 1.9392126775595654e-05, "loss": 0.1311, "step": 37360 }, { "epoch": 3.079525483304042, "grad_norm": 0.46286314725875854, "learning_rate": 1.936410524053765e-05, "loss": 0.1577, "step": 37380 }, { "epoch": 3.081173110720562, "grad_norm": 1.0705723762512207, "learning_rate": 1.933609116315149e-05, "loss": 0.1565, "step": 37400 }, { "epoch": 3.0828207381370825, "grad_norm": 0.6402031183242798, "learning_rate": 1.9308084580506677e-05, "loss": 0.1449, "step": 37420 }, { "epoch": 3.0844683655536027, "grad_norm": 5.519851207733154, "learning_rate": 1.9280085529662798e-05, "loss": 0.1702, "step": 37440 }, { "epoch": 3.086115992970123, "grad_norm": 1.0702592134475708, "learning_rate": 1.9252094047669464e-05, "loss": 0.1253, "step": 37460 }, { "epoch": 3.087763620386643, "grad_norm": 0.9042578935623169, "learning_rate": 1.9224110171566288e-05, "loss": 0.1471, "step": 37480 }, { "epoch": 3.0894112478031635, "grad_norm": 0.34733909368515015, "learning_rate": 1.9196133938382785e-05, "loss": 0.1534, "step": 37500 }, { "epoch": 3.0910588752196837, "grad_norm": 1.277514100074768, "learning_rate": 1.9168165385138385e-05, "loss": 0.149, "step": 37520 }, { "epoch": 3.092706502636204, "grad_norm": 1.3620398044586182, "learning_rate": 1.9140204548842348e-05, "loss": 0.1538, "step": 37540 }, { "epoch": 3.094354130052724, "grad_norm": 0.6622684001922607, "learning_rate": 1.9112251466493725e-05, "loss": 0.1328, "step": 37560 }, { "epoch": 3.0960017574692444, "grad_norm": 7.100107669830322, "learning_rate": 1.9084306175081303e-05, "loss": 0.1455, "step": 37580 }, { "epoch": 3.0976493848857647, "grad_norm": 0.45634210109710693, "learning_rate": 1.9056368711583558e-05, "loss": 0.1499, "step": 37600 }, { "epoch": 3.0992970123022845, "grad_norm": 1.493963360786438, "learning_rate": 1.902843911296861e-05, "loss": 0.1531, "step": 37620 }, { "epoch": 3.1009446397188047, "grad_norm": 1.5713478326797485, "learning_rate": 1.9000517416194174e-05, "loss": 0.1624, "step": 37640 }, { "epoch": 3.102592267135325, "grad_norm": 0.6847428679466248, "learning_rate": 1.8972603658207518e-05, "loss": 0.1328, "step": 37660 }, { "epoch": 3.1042398945518452, "grad_norm": 0.9931829571723938, "learning_rate": 1.8944697875945374e-05, "loss": 0.1476, "step": 37680 }, { "epoch": 3.1058875219683655, "grad_norm": 0.6386281847953796, "learning_rate": 1.8916800106333946e-05, "loss": 0.1618, "step": 37700 }, { "epoch": 3.1075351493848857, "grad_norm": 0.6640763282775879, "learning_rate": 1.8888910386288832e-05, "loss": 0.1491, "step": 37720 }, { "epoch": 3.109182776801406, "grad_norm": 1.5718262195587158, "learning_rate": 1.886102875271498e-05, "loss": 0.157, "step": 37740 }, { "epoch": 3.1108304042179262, "grad_norm": 1.4136067628860474, "learning_rate": 1.8833155242506635e-05, "loss": 0.1376, "step": 37760 }, { "epoch": 3.1124780316344465, "grad_norm": 1.0528970956802368, "learning_rate": 1.880528989254728e-05, "loss": 0.1644, "step": 37780 }, { "epoch": 3.1141256590509667, "grad_norm": 0.6771901845932007, "learning_rate": 1.8777432739709616e-05, "loss": 0.1622, "step": 37800 }, { "epoch": 3.115773286467487, "grad_norm": 0.9342694282531738, "learning_rate": 1.8749583820855493e-05, "loss": 0.1425, "step": 37820 }, { "epoch": 3.117420913884007, "grad_norm": 11.285189628601074, "learning_rate": 1.8721743172835872e-05, "loss": 0.161, "step": 37840 }, { "epoch": 3.1190685413005275, "grad_norm": 1.2589502334594727, "learning_rate": 1.869391083249075e-05, "loss": 0.1379, "step": 37860 }, { "epoch": 3.1207161687170473, "grad_norm": 0.5863010287284851, "learning_rate": 1.8666086836649156e-05, "loss": 0.1573, "step": 37880 }, { "epoch": 3.1223637961335675, "grad_norm": 0.6433321833610535, "learning_rate": 1.8638271222129056e-05, "loss": 0.1447, "step": 37900 }, { "epoch": 3.1240114235500878, "grad_norm": 0.7037969827651978, "learning_rate": 1.8610464025737336e-05, "loss": 0.1445, "step": 37920 }, { "epoch": 3.125659050966608, "grad_norm": 3.2603673934936523, "learning_rate": 1.858266528426974e-05, "loss": 0.1718, "step": 37940 }, { "epoch": 3.1273066783831283, "grad_norm": 0.9228240847587585, "learning_rate": 1.855487503451084e-05, "loss": 0.1432, "step": 37960 }, { "epoch": 3.1289543057996485, "grad_norm": 0.524641752243042, "learning_rate": 1.8527093313233927e-05, "loss": 0.1568, "step": 37980 }, { "epoch": 3.1306019332161688, "grad_norm": 0.6379425525665283, "learning_rate": 1.8499320157201057e-05, "loss": 0.1576, "step": 38000 }, { "epoch": 3.1306019332161688, "eval_loss": 0.5186827778816223, "eval_runtime": 810.9319, "eval_samples_per_second": 26.869, "eval_steps_per_second": 6.718, "eval_wer": 0.2234430593774757, "step": 38000 }, { "epoch": 3.132304481546573, "grad_norm": 0.6488630771636963, "learning_rate": 1.8471555603162927e-05, "loss": 0.1522, "step": 38020 }, { "epoch": 3.133952108963093, "grad_norm": 0.13922233879566193, "learning_rate": 1.844379968785885e-05, "loss": 0.159, "step": 38040 }, { "epoch": 3.1355997363796133, "grad_norm": 1.2710275650024414, "learning_rate": 1.8416052448016736e-05, "loss": 0.1262, "step": 38060 }, { "epoch": 3.1372473637961336, "grad_norm": 1.2879269123077393, "learning_rate": 1.8388313920352956e-05, "loss": 0.1641, "step": 38080 }, { "epoch": 3.138894991212654, "grad_norm": 0.41094842553138733, "learning_rate": 1.8360584141572414e-05, "loss": 0.1609, "step": 38100 }, { "epoch": 3.140542618629174, "grad_norm": 0.3641243875026703, "learning_rate": 1.8332863148368406e-05, "loss": 0.1506, "step": 38120 }, { "epoch": 3.1421902460456943, "grad_norm": 0.9135380983352661, "learning_rate": 1.830515097742261e-05, "loss": 0.1666, "step": 38140 }, { "epoch": 3.1438378734622145, "grad_norm": 2.1218717098236084, "learning_rate": 1.827744766540503e-05, "loss": 0.1353, "step": 38160 }, { "epoch": 3.145485500878735, "grad_norm": 1.940061092376709, "learning_rate": 1.824975324897394e-05, "loss": 0.1658, "step": 38180 }, { "epoch": 3.1471331282952546, "grad_norm": 0.5710387229919434, "learning_rate": 1.8222067764775856e-05, "loss": 0.1621, "step": 38200 }, { "epoch": 3.148780755711775, "grad_norm": 1.434187412261963, "learning_rate": 1.819439124944547e-05, "loss": 0.1555, "step": 38220 }, { "epoch": 3.150428383128295, "grad_norm": 0.399093896150589, "learning_rate": 1.8166723739605598e-05, "loss": 0.1665, "step": 38240 }, { "epoch": 3.1520760105448153, "grad_norm": 1.5391314029693604, "learning_rate": 1.8139065271867152e-05, "loss": 0.136, "step": 38260 }, { "epoch": 3.1537236379613356, "grad_norm": 0.7850829362869263, "learning_rate": 1.811141588282906e-05, "loss": 0.1549, "step": 38280 }, { "epoch": 3.155371265377856, "grad_norm": 0.7228546738624573, "learning_rate": 1.8083775609078254e-05, "loss": 0.141, "step": 38300 }, { "epoch": 3.157018892794376, "grad_norm": 0.5662539601325989, "learning_rate": 1.8056144487189602e-05, "loss": 0.1729, "step": 38320 }, { "epoch": 3.1586665202108963, "grad_norm": 0.20174540579319, "learning_rate": 1.8028522553725863e-05, "loss": 0.1549, "step": 38340 }, { "epoch": 3.1603141476274166, "grad_norm": 0.5369256734848022, "learning_rate": 1.8000909845237638e-05, "loss": 0.1321, "step": 38360 }, { "epoch": 3.161961775043937, "grad_norm": 2.0894880294799805, "learning_rate": 1.7973306398263297e-05, "loss": 0.1592, "step": 38380 }, { "epoch": 3.163609402460457, "grad_norm": 0.35338452458381653, "learning_rate": 1.7945712249328982e-05, "loss": 0.1482, "step": 38400 }, { "epoch": 3.1652570298769773, "grad_norm": 0.660490095615387, "learning_rate": 1.7918127434948533e-05, "loss": 0.147, "step": 38420 }, { "epoch": 3.1669046572934976, "grad_norm": 0.15226125717163086, "learning_rate": 1.789055199162342e-05, "loss": 0.1602, "step": 38440 }, { "epoch": 3.1685522847100174, "grad_norm": 1.184643268585205, "learning_rate": 1.7862985955842736e-05, "loss": 0.1352, "step": 38460 }, { "epoch": 3.1701999121265376, "grad_norm": 6.8397536277771, "learning_rate": 1.783542936408309e-05, "loss": 0.149, "step": 38480 }, { "epoch": 3.171847539543058, "grad_norm": 0.5208004713058472, "learning_rate": 1.780788225280862e-05, "loss": 0.138, "step": 38500 }, { "epoch": 3.173495166959578, "grad_norm": 5.069784164428711, "learning_rate": 1.7780344658470928e-05, "loss": 0.144, "step": 38520 }, { "epoch": 3.1751427943760984, "grad_norm": 0.22637605667114258, "learning_rate": 1.7752816617509e-05, "loss": 0.1561, "step": 38540 }, { "epoch": 3.1767904217926186, "grad_norm": 1.149791955947876, "learning_rate": 1.7725298166349193e-05, "loss": 0.1294, "step": 38560 }, { "epoch": 3.178438049209139, "grad_norm": 0.4961896240711212, "learning_rate": 1.7697789341405163e-05, "loss": 0.1557, "step": 38580 }, { "epoch": 3.180085676625659, "grad_norm": 1.0814521312713623, "learning_rate": 1.7670290179077844e-05, "loss": 0.1595, "step": 38600 }, { "epoch": 3.1817333040421794, "grad_norm": 4.159520626068115, "learning_rate": 1.7642800715755376e-05, "loss": 0.1617, "step": 38620 }, { "epoch": 3.1833809314586996, "grad_norm": 0.23039385676383972, "learning_rate": 1.761532098781306e-05, "loss": 0.1539, "step": 38640 }, { "epoch": 3.18502855887522, "grad_norm": 0.8867893218994141, "learning_rate": 1.7587851031613337e-05, "loss": 0.1391, "step": 38660 }, { "epoch": 3.18667618629174, "grad_norm": 0.7063521146774292, "learning_rate": 1.7560390883505672e-05, "loss": 0.1482, "step": 38680 }, { "epoch": 3.18832381370826, "grad_norm": 0.4100753366947174, "learning_rate": 1.7532940579826598e-05, "loss": 0.1629, "step": 38700 }, { "epoch": 3.18997144112478, "grad_norm": 1.997273325920105, "learning_rate": 1.75055001568996e-05, "loss": 0.1573, "step": 38720 }, { "epoch": 3.1916190685413004, "grad_norm": 0.39883023500442505, "learning_rate": 1.74780696510351e-05, "loss": 0.153, "step": 38740 }, { "epoch": 3.1932666959578206, "grad_norm": 1.3483376502990723, "learning_rate": 1.7450649098530368e-05, "loss": 0.139, "step": 38760 }, { "epoch": 3.194914323374341, "grad_norm": 3.0959737300872803, "learning_rate": 1.7423238535669538e-05, "loss": 0.1582, "step": 38780 }, { "epoch": 3.196561950790861, "grad_norm": 0.8072210550308228, "learning_rate": 1.7395837998723512e-05, "loss": 0.1654, "step": 38800 }, { "epoch": 3.1982095782073814, "grad_norm": 1.0747969150543213, "learning_rate": 1.7368447523949922e-05, "loss": 0.1615, "step": 38820 }, { "epoch": 3.1998572056239016, "grad_norm": 1.8250178098678589, "learning_rate": 1.734106714759309e-05, "loss": 0.1572, "step": 38840 }, { "epoch": 3.201504833040422, "grad_norm": 0.7575979232788086, "learning_rate": 1.731369690588397e-05, "loss": 0.1329, "step": 38860 }, { "epoch": 3.203152460456942, "grad_norm": 1.538978934288025, "learning_rate": 1.72877045964653e-05, "loss": 0.1588, "step": 38880 }, { "epoch": 3.2048000878734624, "grad_norm": 0.6645594239234924, "learning_rate": 1.7260354221477675e-05, "loss": 0.1628, "step": 38900 }, { "epoch": 3.2064477152899826, "grad_norm": 1.7454744577407837, "learning_rate": 1.723301408794078e-05, "loss": 0.1528, "step": 38920 }, { "epoch": 3.2080953427065024, "grad_norm": 0.19629539549350739, "learning_rate": 1.7205684232032316e-05, "loss": 0.1772, "step": 38940 }, { "epoch": 3.2097429701230227, "grad_norm": 13.83620548248291, "learning_rate": 1.717836468991638e-05, "loss": 0.146, "step": 38960 }, { "epoch": 3.211390597539543, "grad_norm": 1.3969836235046387, "learning_rate": 1.7151055497743437e-05, "loss": 0.1644, "step": 38980 }, { "epoch": 3.213038224956063, "grad_norm": 0.7652454972267151, "learning_rate": 1.712375669165025e-05, "loss": 0.1585, "step": 39000 }, { "epoch": 3.2146858523725834, "grad_norm": 1.5630168914794922, "learning_rate": 1.7096468307759844e-05, "loss": 0.1556, "step": 39020 }, { "epoch": 3.2163334797891037, "grad_norm": 0.46949490904808044, "learning_rate": 1.706919038218145e-05, "loss": 0.1495, "step": 39040 }, { "epoch": 3.217981107205624, "grad_norm": 4.1307692527771, "learning_rate": 1.7041922951010436e-05, "loss": 0.1408, "step": 39060 }, { "epoch": 3.219628734622144, "grad_norm": 0.4786194860935211, "learning_rate": 1.7014666050328325e-05, "loss": 0.1751, "step": 39080 }, { "epoch": 3.2212763620386644, "grad_norm": 0.9991576075553894, "learning_rate": 1.6987419716202672e-05, "loss": 0.1508, "step": 39100 }, { "epoch": 3.2229239894551847, "grad_norm": 0.6878542304039001, "learning_rate": 1.696018398468708e-05, "loss": 0.1464, "step": 39120 }, { "epoch": 3.224571616871705, "grad_norm": 0.37046971917152405, "learning_rate": 1.6932958891821106e-05, "loss": 0.1502, "step": 39140 }, { "epoch": 3.226219244288225, "grad_norm": 1.1257407665252686, "learning_rate": 1.690574447363021e-05, "loss": 0.1369, "step": 39160 }, { "epoch": 3.2278668717047454, "grad_norm": 11.745176315307617, "learning_rate": 1.687854076612575e-05, "loss": 0.172, "step": 39180 }, { "epoch": 3.229514499121265, "grad_norm": 0.8210144639015198, "learning_rate": 1.6851347805304917e-05, "loss": 0.1706, "step": 39200 }, { "epoch": 3.2311621265377855, "grad_norm": 1.5889972448349, "learning_rate": 1.682416562715067e-05, "loss": 0.1449, "step": 39220 }, { "epoch": 3.2328097539543057, "grad_norm": 0.25009989738464355, "learning_rate": 1.6796994267631704e-05, "loss": 0.167, "step": 39240 }, { "epoch": 3.234457381370826, "grad_norm": 0.7307600975036621, "learning_rate": 1.676983376270238e-05, "loss": 0.1413, "step": 39260 }, { "epoch": 3.236105008787346, "grad_norm": 1.3502663373947144, "learning_rate": 1.6742684148302727e-05, "loss": 0.1693, "step": 39280 }, { "epoch": 3.2377526362038664, "grad_norm": 0.6214455366134644, "learning_rate": 1.671554546035834e-05, "loss": 0.1497, "step": 39300 }, { "epoch": 3.2394002636203867, "grad_norm": 0.5916234850883484, "learning_rate": 1.6688417734780364e-05, "loss": 0.1701, "step": 39320 }, { "epoch": 3.241047891036907, "grad_norm": 0.2480258345603943, "learning_rate": 1.6661301007465452e-05, "loss": 0.1605, "step": 39340 }, { "epoch": 3.242695518453427, "grad_norm": 1.1683536767959595, "learning_rate": 1.6634195314295665e-05, "loss": 0.1475, "step": 39360 }, { "epoch": 3.2443431458699474, "grad_norm": 6.31130313873291, "learning_rate": 1.6607100691138494e-05, "loss": 0.1799, "step": 39380 }, { "epoch": 3.2459907732864677, "grad_norm": 0.7771806120872498, "learning_rate": 1.6580017173846784e-05, "loss": 0.1494, "step": 39400 }, { "epoch": 3.2476384007029875, "grad_norm": 1.8961833715438843, "learning_rate": 1.6552944798258665e-05, "loss": 0.153, "step": 39420 }, { "epoch": 3.2492860281195077, "grad_norm": 2.086655616760254, "learning_rate": 1.652588360019755e-05, "loss": 0.1708, "step": 39440 }, { "epoch": 3.250933655536028, "grad_norm": 1.0704363584518433, "learning_rate": 1.649883361547202e-05, "loss": 0.1441, "step": 39460 }, { "epoch": 3.2525812829525482, "grad_norm": 0.9307258129119873, "learning_rate": 1.647179487987586e-05, "loss": 0.1579, "step": 39480 }, { "epoch": 3.2542289103690685, "grad_norm": 1.0694314241409302, "learning_rate": 1.6444767429187947e-05, "loss": 0.1651, "step": 39500 }, { "epoch": 3.2558765377855887, "grad_norm": 1.8976612091064453, "learning_rate": 1.6417751299172233e-05, "loss": 0.1552, "step": 39520 }, { "epoch": 3.257524165202109, "grad_norm": 0.33035072684288025, "learning_rate": 1.6390746525577687e-05, "loss": 0.1633, "step": 39540 }, { "epoch": 3.259171792618629, "grad_norm": 0.8933309316635132, "learning_rate": 1.6363753144138254e-05, "loss": 0.14, "step": 39560 }, { "epoch": 3.2608194200351495, "grad_norm": 0.6716617345809937, "learning_rate": 1.6336771190572797e-05, "loss": 0.1644, "step": 39580 }, { "epoch": 3.2624670474516697, "grad_norm": 5.440345764160156, "learning_rate": 1.6309800700585065e-05, "loss": 0.1657, "step": 39600 }, { "epoch": 3.26411467486819, "grad_norm": 3.83003830909729, "learning_rate": 1.6282841709863644e-05, "loss": 0.1784, "step": 39620 }, { "epoch": 3.26576230228471, "grad_norm": 0.3602890968322754, "learning_rate": 1.6255894254081876e-05, "loss": 0.1852, "step": 39640 }, { "epoch": 3.2674099297012305, "grad_norm": 1.2716591358184814, "learning_rate": 1.6228958368897866e-05, "loss": 0.1426, "step": 39660 }, { "epoch": 3.2690575571177503, "grad_norm": 0.9488288760185242, "learning_rate": 1.6202034089954406e-05, "loss": 0.1551, "step": 39680 }, { "epoch": 3.2707051845342705, "grad_norm": 0.6714811325073242, "learning_rate": 1.617512145287892e-05, "loss": 0.1541, "step": 39700 }, { "epoch": 3.2723528119507908, "grad_norm": 0.9304949641227722, "learning_rate": 1.614822049328344e-05, "loss": 0.1701, "step": 39720 }, { "epoch": 3.274000439367311, "grad_norm": 0.3308255076408386, "learning_rate": 1.6121331246764522e-05, "loss": 0.1621, "step": 39740 }, { "epoch": 3.2756480667838312, "grad_norm": 1.1160569190979004, "learning_rate": 1.6094453748903243e-05, "loss": 0.1395, "step": 39760 }, { "epoch": 3.2772956942003515, "grad_norm": 0.5838717222213745, "learning_rate": 1.6067588035265128e-05, "loss": 0.1556, "step": 39780 }, { "epoch": 3.2789433216168717, "grad_norm": 1.3350988626480103, "learning_rate": 1.6040734141400116e-05, "loss": 0.1496, "step": 39800 }, { "epoch": 3.280590949033392, "grad_norm": 0.5808613300323486, "learning_rate": 1.60138921028425e-05, "loss": 0.1516, "step": 39820 }, { "epoch": 3.2822385764499122, "grad_norm": 0.6595000624656677, "learning_rate": 1.5987061955110866e-05, "loss": 0.1521, "step": 39840 }, { "epoch": 3.2838862038664325, "grad_norm": 0.8309194445610046, "learning_rate": 1.5960243733708102e-05, "loss": 0.1376, "step": 39860 }, { "epoch": 3.2855338312829527, "grad_norm": 1.0434929132461548, "learning_rate": 1.59334374741213e-05, "loss": 0.1601, "step": 39880 }, { "epoch": 3.2871814586994725, "grad_norm": 2.53883957862854, "learning_rate": 1.5907982639453698e-05, "loss": 0.1461, "step": 39900 }, { "epoch": 3.288829086115993, "grad_norm": 6.17413330078125, "learning_rate": 1.5881199807417752e-05, "loss": 0.147, "step": 39920 }, { "epoch": 3.290476713532513, "grad_norm": 0.4980841279029846, "learning_rate": 1.5854429041792265e-05, "loss": 0.1493, "step": 39940 }, { "epoch": 3.2921243409490333, "grad_norm": 0.8692514896392822, "learning_rate": 1.5827670378001537e-05, "loss": 0.1343, "step": 39960 }, { "epoch": 3.2937719683655535, "grad_norm": 1.5790104866027832, "learning_rate": 1.5800923851453842e-05, "loss": 0.1609, "step": 39980 }, { "epoch": 3.2954195957820738, "grad_norm": 2.524847984313965, "learning_rate": 1.5774189497541406e-05, "loss": 0.1503, "step": 40000 }, { "epoch": 3.2954195957820738, "eval_loss": 0.5555781722068787, "eval_runtime": 807.4041, "eval_samples_per_second": 26.986, "eval_steps_per_second": 6.748, "eval_wer": 0.22916497537952482, "step": 40000 }, { "epoch": 3.297067223198594, "grad_norm": 1.8827567100524902, "learning_rate": 1.5747467351640345e-05, "loss": 0.1538, "step": 40020 }, { "epoch": 3.2987148506151143, "grad_norm": 0.4699152410030365, "learning_rate": 1.5720757449110597e-05, "loss": 0.1671, "step": 40040 }, { "epoch": 3.3003624780316345, "grad_norm": 0.644223690032959, "learning_rate": 1.5694059825295928e-05, "loss": 0.135, "step": 40060 }, { "epoch": 3.3020101054481548, "grad_norm": 0.40487343072891235, "learning_rate": 1.5667374515523843e-05, "loss": 0.1394, "step": 40080 }, { "epoch": 3.303657732864675, "grad_norm": 0.7618485689163208, "learning_rate": 1.564070155510556e-05, "loss": 0.1663, "step": 40100 }, { "epoch": 3.3053053602811953, "grad_norm": 0.6829048991203308, "learning_rate": 1.561404097933596e-05, "loss": 0.146, "step": 40120 }, { "epoch": 3.3069529876977155, "grad_norm": 1.3032153844833374, "learning_rate": 1.5587392823493506e-05, "loss": 0.1531, "step": 40140 }, { "epoch": 3.3086006151142353, "grad_norm": 1.1882566213607788, "learning_rate": 1.556075712284026e-05, "loss": 0.143, "step": 40160 }, { "epoch": 3.3102482425307556, "grad_norm": 9.589151382446289, "learning_rate": 1.5534133912621786e-05, "loss": 0.1644, "step": 40180 }, { "epoch": 3.311895869947276, "grad_norm": 1.2138134241104126, "learning_rate": 1.550752322806713e-05, "loss": 0.1495, "step": 40200 }, { "epoch": 3.313543497363796, "grad_norm": 0.6604083776473999, "learning_rate": 1.5480925104388762e-05, "loss": 0.1519, "step": 40220 }, { "epoch": 3.3151911247803163, "grad_norm": 0.5243780612945557, "learning_rate": 1.5454339576782517e-05, "loss": 0.1624, "step": 40240 }, { "epoch": 3.3168387521968365, "grad_norm": 0.8697063326835632, "learning_rate": 1.542776668042758e-05, "loss": 0.1324, "step": 40260 }, { "epoch": 3.318486379613357, "grad_norm": 2.33467960357666, "learning_rate": 1.5401206450486406e-05, "loss": 0.1718, "step": 40280 }, { "epoch": 3.320134007029877, "grad_norm": 2.60256028175354, "learning_rate": 1.5374658922104704e-05, "loss": 0.1613, "step": 40300 }, { "epoch": 3.3217816344463973, "grad_norm": 1.0596078634262085, "learning_rate": 1.5348124130411363e-05, "loss": 0.1687, "step": 40320 }, { "epoch": 3.3234292618629175, "grad_norm": 0.6154633164405823, "learning_rate": 1.532160211051843e-05, "loss": 0.1694, "step": 40340 }, { "epoch": 3.325076889279438, "grad_norm": 0.6055323481559753, "learning_rate": 1.529509289752104e-05, "loss": 0.1398, "step": 40360 }, { "epoch": 3.3267245166959576, "grad_norm": 0.6304402947425842, "learning_rate": 1.526859652649739e-05, "loss": 0.1613, "step": 40380 }, { "epoch": 3.3283721441124783, "grad_norm": 0.7361418604850769, "learning_rate": 1.5242113032508697e-05, "loss": 0.1659, "step": 40400 }, { "epoch": 3.330019771528998, "grad_norm": 0.3940108120441437, "learning_rate": 1.5215642450599099e-05, "loss": 0.1483, "step": 40420 }, { "epoch": 3.3316673989455183, "grad_norm": 0.16881057620048523, "learning_rate": 1.5189184815795676e-05, "loss": 0.1516, "step": 40440 }, { "epoch": 3.3333150263620386, "grad_norm": 0.9273294806480408, "learning_rate": 1.516274016310838e-05, "loss": 0.1542, "step": 40460 }, { "epoch": 3.334962653778559, "grad_norm": 0.9351533651351929, "learning_rate": 1.5136308527529974e-05, "loss": 0.1544, "step": 40480 }, { "epoch": 3.336610281195079, "grad_norm": 0.6827693581581116, "learning_rate": 1.5109889944036005e-05, "loss": 0.1545, "step": 40500 }, { "epoch": 3.3382579086115993, "grad_norm": 0.5103474259376526, "learning_rate": 1.5083484447584722e-05, "loss": 0.1535, "step": 40520 }, { "epoch": 3.3399055360281196, "grad_norm": 0.4932575821876526, "learning_rate": 1.5057092073117097e-05, "loss": 0.163, "step": 40540 }, { "epoch": 3.34155316344464, "grad_norm": 0.8227238655090332, "learning_rate": 1.5030712855556705e-05, "loss": 0.1447, "step": 40560 }, { "epoch": 3.34320079086116, "grad_norm": 1.6172962188720703, "learning_rate": 1.5004346829809729e-05, "loss": 0.1559, "step": 40580 }, { "epoch": 3.3448484182776803, "grad_norm": 2.6368677616119385, "learning_rate": 1.49779940307649e-05, "loss": 0.1573, "step": 40600 }, { "epoch": 3.3464960456942006, "grad_norm": 0.5566956996917725, "learning_rate": 1.4951654493293427e-05, "loss": 0.1645, "step": 40620 }, { "epoch": 3.3481436731107204, "grad_norm": 0.9300214052200317, "learning_rate": 1.4925328252248982e-05, "loss": 0.1735, "step": 40640 }, { "epoch": 3.3497913005272406, "grad_norm": 2.0181519985198975, "learning_rate": 1.4899015342467649e-05, "loss": 0.1469, "step": 40660 }, { "epoch": 3.351438927943761, "grad_norm": 1.1045427322387695, "learning_rate": 1.4872715798767861e-05, "loss": 0.1666, "step": 40680 }, { "epoch": 3.353086555360281, "grad_norm": 1.1991610527038574, "learning_rate": 1.4846429655950383e-05, "loss": 0.1648, "step": 40700 }, { "epoch": 3.3547341827768014, "grad_norm": 0.6012402772903442, "learning_rate": 1.4820156948798205e-05, "loss": 0.1479, "step": 40720 }, { "epoch": 3.3563818101933216, "grad_norm": 0.15366359055042267, "learning_rate": 1.4793897712076581e-05, "loss": 0.1608, "step": 40740 }, { "epoch": 3.358029437609842, "grad_norm": 1.1802315711975098, "learning_rate": 1.4767651980532926e-05, "loss": 0.1384, "step": 40760 }, { "epoch": 3.359677065026362, "grad_norm": 0.8219059109687805, "learning_rate": 1.4741419788896784e-05, "loss": 0.1594, "step": 40780 }, { "epoch": 3.3613246924428823, "grad_norm": 1.291498064994812, "learning_rate": 1.4715201171879789e-05, "loss": 0.1646, "step": 40800 }, { "epoch": 3.3629723198594026, "grad_norm": 1.1582722663879395, "learning_rate": 1.4688996164175589e-05, "loss": 0.1539, "step": 40820 }, { "epoch": 3.364619947275923, "grad_norm": 0.22327923774719238, "learning_rate": 1.4662804800459846e-05, "loss": 0.173, "step": 40840 }, { "epoch": 3.3662675746924426, "grad_norm": 0.767255425453186, "learning_rate": 1.4636627115390167e-05, "loss": 0.1339, "step": 40860 }, { "epoch": 3.3679152021089633, "grad_norm": 0.8810155987739563, "learning_rate": 1.4610463143606046e-05, "loss": 0.1579, "step": 40880 }, { "epoch": 3.369562829525483, "grad_norm": 0.5456433296203613, "learning_rate": 1.4584312919728853e-05, "loss": 0.15, "step": 40900 }, { "epoch": 3.3712104569420034, "grad_norm": 1.5852445363998413, "learning_rate": 1.4558176478361735e-05, "loss": 0.1619, "step": 40920 }, { "epoch": 3.3728580843585236, "grad_norm": 0.3770715296268463, "learning_rate": 1.4532053854089625e-05, "loss": 0.1533, "step": 40940 }, { "epoch": 3.374505711775044, "grad_norm": 0.9395153522491455, "learning_rate": 1.4505945081479161e-05, "loss": 0.1415, "step": 40960 }, { "epoch": 3.376153339191564, "grad_norm": 0.5108890533447266, "learning_rate": 1.4479850195078664e-05, "loss": 0.1675, "step": 40980 }, { "epoch": 3.3778009666080844, "grad_norm": 0.7402895092964172, "learning_rate": 1.445376922941808e-05, "loss": 0.1633, "step": 41000 }, { "epoch": 3.3794485940246046, "grad_norm": 2.410801887512207, "learning_rate": 1.4427702219008901e-05, "loss": 0.1661, "step": 41020 }, { "epoch": 3.381096221441125, "grad_norm": 0.3239726722240448, "learning_rate": 1.4401649198344196e-05, "loss": 0.1576, "step": 41040 }, { "epoch": 3.382743848857645, "grad_norm": 0.6572871208190918, "learning_rate": 1.4375610201898505e-05, "loss": 0.1386, "step": 41060 }, { "epoch": 3.3843914762741654, "grad_norm": 0.9796928763389587, "learning_rate": 1.4350886176591104e-05, "loss": 0.1531, "step": 41080 }, { "epoch": 3.3860391036906856, "grad_norm": 1.2608447074890137, "learning_rate": 1.4324874626459562e-05, "loss": 0.1589, "step": 41100 }, { "epoch": 3.3876867311072054, "grad_norm": 1.1734158992767334, "learning_rate": 1.4298877202138631e-05, "loss": 0.155, "step": 41120 }, { "epoch": 3.3893343585237257, "grad_norm": 0.28572332859039307, "learning_rate": 1.4272893938029281e-05, "loss": 0.1594, "step": 41140 }, { "epoch": 3.390981985940246, "grad_norm": 22.722564697265625, "learning_rate": 1.4246924868513762e-05, "loss": 0.1429, "step": 41160 }, { "epoch": 3.392629613356766, "grad_norm": 0.7266652584075928, "learning_rate": 1.4220970027955499e-05, "loss": 0.1656, "step": 41180 }, { "epoch": 3.3942772407732864, "grad_norm": 1.7261922359466553, "learning_rate": 1.4195029450699135e-05, "loss": 0.1553, "step": 41200 }, { "epoch": 3.3959248681898067, "grad_norm": 1.3773916959762573, "learning_rate": 1.4169103171070414e-05, "loss": 0.1568, "step": 41220 }, { "epoch": 3.397572495606327, "grad_norm": 0.42017877101898193, "learning_rate": 1.4143191223376162e-05, "loss": 0.1491, "step": 41240 }, { "epoch": 3.399220123022847, "grad_norm": 1.750963807106018, "learning_rate": 1.4117293641904256e-05, "loss": 0.1386, "step": 41260 }, { "epoch": 3.4008677504393674, "grad_norm": 1.1979619264602661, "learning_rate": 1.4091410460923554e-05, "loss": 0.1728, "step": 41280 }, { "epoch": 3.4025153778558876, "grad_norm": 0.625075101852417, "learning_rate": 1.4065541714683836e-05, "loss": 0.1458, "step": 41300 }, { "epoch": 3.404163005272408, "grad_norm": 1.3867195844650269, "learning_rate": 1.4039687437415813e-05, "loss": 0.1607, "step": 41320 }, { "epoch": 3.4058106326889277, "grad_norm": 0.3580436110496521, "learning_rate": 1.4013847663331042e-05, "loss": 0.176, "step": 41340 }, { "epoch": 3.4074582601054484, "grad_norm": 1.7142635583877563, "learning_rate": 1.398802242662188e-05, "loss": 0.1395, "step": 41360 }, { "epoch": 3.409105887521968, "grad_norm": 0.6943619847297668, "learning_rate": 1.3962211761461466e-05, "loss": 0.168, "step": 41380 }, { "epoch": 3.4107535149384884, "grad_norm": 1.8670823574066162, "learning_rate": 1.3936415702003624e-05, "loss": 0.1596, "step": 41400 }, { "epoch": 3.4124011423550087, "grad_norm": 2.691347360610962, "learning_rate": 1.3910634282382878e-05, "loss": 0.1528, "step": 41420 }, { "epoch": 3.414048769771529, "grad_norm": 0.25475576519966125, "learning_rate": 1.388486753671438e-05, "loss": 0.1516, "step": 41440 }, { "epoch": 3.415696397188049, "grad_norm": 1.1902556419372559, "learning_rate": 1.3859115499093849e-05, "loss": 0.1311, "step": 41460 }, { "epoch": 3.4173440246045694, "grad_norm": 0.6261383891105652, "learning_rate": 1.383337820359757e-05, "loss": 0.1641, "step": 41480 }, { "epoch": 3.4189916520210897, "grad_norm": 0.8379786610603333, "learning_rate": 1.3807655684282272e-05, "loss": 0.1674, "step": 41500 }, { "epoch": 3.42063927943761, "grad_norm": 1.3956279754638672, "learning_rate": 1.3781947975185178e-05, "loss": 0.1628, "step": 41520 }, { "epoch": 3.42228690685413, "grad_norm": 0.22908486425876617, "learning_rate": 1.3756255110323891e-05, "loss": 0.159, "step": 41540 }, { "epoch": 3.4239345342706504, "grad_norm": 1.428760290145874, "learning_rate": 1.3730577123696376e-05, "loss": 0.1397, "step": 41560 }, { "epoch": 3.4255821616871707, "grad_norm": 3.128469467163086, "learning_rate": 1.3704914049280926e-05, "loss": 0.1554, "step": 41580 }, { "epoch": 3.4272297891036905, "grad_norm": 0.8418864011764526, "learning_rate": 1.3679265921036062e-05, "loss": 0.1482, "step": 41600 }, { "epoch": 3.4288774165202107, "grad_norm": 1.0914111137390137, "learning_rate": 1.365363277290056e-05, "loss": 0.148, "step": 41620 }, { "epoch": 3.430525043936731, "grad_norm": 0.5711450576782227, "learning_rate": 1.3628014638793369e-05, "loss": 0.158, "step": 41640 }, { "epoch": 3.432172671353251, "grad_norm": 1.1503074169158936, "learning_rate": 1.360241155261357e-05, "loss": 0.1343, "step": 41660 }, { "epoch": 3.4338202987697715, "grad_norm": 1.1104477643966675, "learning_rate": 1.3576823548240333e-05, "loss": 0.1539, "step": 41680 }, { "epoch": 3.4354679261862917, "grad_norm": 3.4182581901550293, "learning_rate": 1.3551250659532852e-05, "loss": 0.1602, "step": 41700 }, { "epoch": 3.437115553602812, "grad_norm": 0.9579629302024841, "learning_rate": 1.352569292033034e-05, "loss": 0.1449, "step": 41720 }, { "epoch": 3.438763181019332, "grad_norm": 0.18893243372440338, "learning_rate": 1.3500150364451963e-05, "loss": 0.1503, "step": 41740 }, { "epoch": 3.4404108084358525, "grad_norm": 1.7320404052734375, "learning_rate": 1.3474623025696792e-05, "loss": 0.1315, "step": 41760 }, { "epoch": 3.4420584358523727, "grad_norm": 0.9133217930793762, "learning_rate": 1.3449110937843751e-05, "loss": 0.1573, "step": 41780 }, { "epoch": 3.443706063268893, "grad_norm": 0.9956775903701782, "learning_rate": 1.3423614134651602e-05, "loss": 0.1461, "step": 41800 }, { "epoch": 3.445353690685413, "grad_norm": 3.1847634315490723, "learning_rate": 1.3398132649858868e-05, "loss": 0.1528, "step": 41820 }, { "epoch": 3.4470013181019334, "grad_norm": 0.3741768002510071, "learning_rate": 1.3372666517183813e-05, "loss": 0.1561, "step": 41840 }, { "epoch": 3.4486489455184532, "grad_norm": 0.5312712788581848, "learning_rate": 1.3347215770324372e-05, "loss": 0.137, "step": 41860 }, { "epoch": 3.4502965729349735, "grad_norm": 0.594749927520752, "learning_rate": 1.3321780442958143e-05, "loss": 0.1611, "step": 41880 }, { "epoch": 3.4519442003514937, "grad_norm": 0.9457196593284607, "learning_rate": 1.3296360568742282e-05, "loss": 0.1535, "step": 41900 }, { "epoch": 3.453591827768014, "grad_norm": 1.3242007493972778, "learning_rate": 1.327095618131354e-05, "loss": 0.1513, "step": 41920 }, { "epoch": 3.4552394551845342, "grad_norm": 0.14979644119739532, "learning_rate": 1.3245567314288148e-05, "loss": 0.1538, "step": 41940 }, { "epoch": 3.4568870826010545, "grad_norm": 3.511759042739868, "learning_rate": 1.3220194001261813e-05, "loss": 0.1442, "step": 41960 }, { "epoch": 3.4585347100175747, "grad_norm": 2.007502794265747, "learning_rate": 1.3194836275809658e-05, "loss": 0.1631, "step": 41980 }, { "epoch": 3.460182337434095, "grad_norm": 3.720379590988159, "learning_rate": 1.3169494171486164e-05, "loss": 0.1442, "step": 42000 }, { "epoch": 3.460182337434095, "eval_loss": 0.5300702452659607, "eval_runtime": 254.8955, "eval_samples_per_second": 85.482, "eval_steps_per_second": 21.373, "eval_wer": 0.22274321642223485, "step": 42000 }, { "epoch": 3.4618299648506152, "grad_norm": 1.0895417928695679, "learning_rate": 1.3144167721825157e-05, "loss": 0.1459, "step": 42020 }, { "epoch": 3.4634775922671355, "grad_norm": 0.23793169856071472, "learning_rate": 1.311885696033975e-05, "loss": 0.1583, "step": 42040 }, { "epoch": 3.4651252196836557, "grad_norm": 0.7614142298698425, "learning_rate": 1.3093561920522293e-05, "loss": 0.1396, "step": 42060 }, { "epoch": 3.4667728471001755, "grad_norm": 0.9006688594818115, "learning_rate": 1.3068282635844337e-05, "loss": 0.1558, "step": 42080 }, { "epoch": 3.4684204745166958, "grad_norm": 0.690870463848114, "learning_rate": 1.3043019139756563e-05, "loss": 0.1487, "step": 42100 }, { "epoch": 3.470068101933216, "grad_norm": 0.4665576219558716, "learning_rate": 1.3017771465688782e-05, "loss": 0.1546, "step": 42120 }, { "epoch": 3.4717157293497363, "grad_norm": 0.12971307337284088, "learning_rate": 1.2992539647049867e-05, "loss": 0.1659, "step": 42140 }, { "epoch": 3.4733633567662565, "grad_norm": 1.8812086582183838, "learning_rate": 1.2967323717227715e-05, "loss": 0.141, "step": 42160 }, { "epoch": 3.4750109841827768, "grad_norm": 0.7102630734443665, "learning_rate": 1.2942123709589165e-05, "loss": 0.1554, "step": 42180 }, { "epoch": 3.476658611599297, "grad_norm": 0.522306501865387, "learning_rate": 1.2916939657480028e-05, "loss": 0.158, "step": 42200 }, { "epoch": 3.4783062390158173, "grad_norm": 1.0656942129135132, "learning_rate": 1.2891771594224983e-05, "loss": 0.1546, "step": 42220 }, { "epoch": 3.4799538664323375, "grad_norm": 0.21948251128196716, "learning_rate": 1.2866619553127551e-05, "loss": 0.1635, "step": 42240 }, { "epoch": 3.4816014938488578, "grad_norm": 0.8842756748199463, "learning_rate": 1.284148356747007e-05, "loss": 0.1368, "step": 42260 }, { "epoch": 3.483249121265378, "grad_norm": 0.8186292052268982, "learning_rate": 1.2816363670513592e-05, "loss": 0.1429, "step": 42280 }, { "epoch": 3.4848967486818982, "grad_norm": 1.9236111640930176, "learning_rate": 1.279125989549792e-05, "loss": 0.1467, "step": 42300 }, { "epoch": 3.4865443760984185, "grad_norm": 0.48028749227523804, "learning_rate": 1.2766172275641507e-05, "loss": 0.1589, "step": 42320 }, { "epoch": 3.4881920035149383, "grad_norm": 0.4948166608810425, "learning_rate": 1.274110084414143e-05, "loss": 0.1437, "step": 42340 }, { "epoch": 3.4898396309314585, "grad_norm": 1.7658034563064575, "learning_rate": 1.2716045634173345e-05, "loss": 0.1518, "step": 42360 }, { "epoch": 3.491487258347979, "grad_norm": 0.6642500162124634, "learning_rate": 1.2691006678891446e-05, "loss": 0.1595, "step": 42380 }, { "epoch": 3.493134885764499, "grad_norm": 0.4344884157180786, "learning_rate": 1.2665984011428414e-05, "loss": 0.1488, "step": 42400 }, { "epoch": 3.4947825131810193, "grad_norm": 0.3623495399951935, "learning_rate": 1.2640977664895387e-05, "loss": 0.149, "step": 42420 }, { "epoch": 3.4964301405975395, "grad_norm": 0.13554301857948303, "learning_rate": 1.2615987672381884e-05, "loss": 0.1578, "step": 42440 }, { "epoch": 3.49807776801406, "grad_norm": 3.0575642585754395, "learning_rate": 1.2591014066955803e-05, "loss": 0.1246, "step": 42460 }, { "epoch": 3.49972539543058, "grad_norm": 0.6213861107826233, "learning_rate": 1.2566056881663352e-05, "loss": 0.1578, "step": 42480 }, { "epoch": 3.5013730228471003, "grad_norm": 0.3637482523918152, "learning_rate": 1.2541116149529015e-05, "loss": 0.1567, "step": 42500 }, { "epoch": 3.5030206502636205, "grad_norm": 0.5865427851676941, "learning_rate": 1.2516191903555496e-05, "loss": 0.1512, "step": 42520 }, { "epoch": 3.5046682776801408, "grad_norm": 0.20063956081867218, "learning_rate": 1.2491284176723697e-05, "loss": 0.1599, "step": 42540 }, { "epoch": 3.5063159050966606, "grad_norm": 0.615571141242981, "learning_rate": 1.2466393001992658e-05, "loss": 0.1288, "step": 42560 }, { "epoch": 3.5079635325131813, "grad_norm": 0.4955737590789795, "learning_rate": 1.2441518412299491e-05, "loss": 0.1531, "step": 42580 }, { "epoch": 3.509611159929701, "grad_norm": 0.3354365825653076, "learning_rate": 1.2416660440559396e-05, "loss": 0.1545, "step": 42600 }, { "epoch": 3.5112587873462213, "grad_norm": 0.47431784868240356, "learning_rate": 1.2391819119665568e-05, "loss": 0.1463, "step": 42620 }, { "epoch": 3.5129064147627416, "grad_norm": 0.18025411665439606, "learning_rate": 1.236699448248917e-05, "loss": 0.1519, "step": 42640 }, { "epoch": 3.514554042179262, "grad_norm": 0.9464624524116516, "learning_rate": 1.2342186561879296e-05, "loss": 0.1344, "step": 42660 }, { "epoch": 3.516201669595782, "grad_norm": 0.6174651384353638, "learning_rate": 1.23173953906629e-05, "loss": 0.155, "step": 42680 }, { "epoch": 3.5178492970123023, "grad_norm": 0.641559362411499, "learning_rate": 1.2292621001644795e-05, "loss": 0.148, "step": 42700 }, { "epoch": 3.5194969244288226, "grad_norm": 1.0432337522506714, "learning_rate": 1.226786342760757e-05, "loss": 0.1529, "step": 42720 }, { "epoch": 3.521144551845343, "grad_norm": 0.2608916461467743, "learning_rate": 1.2243122701311577e-05, "loss": 0.1533, "step": 42740 }, { "epoch": 3.522792179261863, "grad_norm": 12.690485000610352, "learning_rate": 1.2218398855494879e-05, "loss": 0.1404, "step": 42760 }, { "epoch": 3.524439806678383, "grad_norm": 0.7469035983085632, "learning_rate": 1.2193691922873166e-05, "loss": 0.156, "step": 42780 }, { "epoch": 3.5260874340949035, "grad_norm": 0.37507572770118713, "learning_rate": 1.2169001936139788e-05, "loss": 0.1555, "step": 42800 }, { "epoch": 3.5277350615114234, "grad_norm": 1.2525111436843872, "learning_rate": 1.2144328927965656e-05, "loss": 0.1596, "step": 42820 }, { "epoch": 3.5293826889279436, "grad_norm": 0.3763274550437927, "learning_rate": 1.2119672930999212e-05, "loss": 0.1434, "step": 42840 }, { "epoch": 3.531030316344464, "grad_norm": 0.7608032822608948, "learning_rate": 1.2095033977866404e-05, "loss": 0.1346, "step": 42860 }, { "epoch": 3.532677943760984, "grad_norm": 5.237201690673828, "learning_rate": 1.2070412101170594e-05, "loss": 0.1401, "step": 42880 }, { "epoch": 3.5343255711775043, "grad_norm": 0.3807220160961151, "learning_rate": 1.2045807333492574e-05, "loss": 0.1649, "step": 42900 }, { "epoch": 3.5359731985940246, "grad_norm": 0.5126063227653503, "learning_rate": 1.202121970739049e-05, "loss": 0.1431, "step": 42920 }, { "epoch": 3.537620826010545, "grad_norm": 0.4814915359020233, "learning_rate": 1.1996649255399809e-05, "loss": 0.1523, "step": 42940 }, { "epoch": 3.539268453427065, "grad_norm": 0.8156435489654541, "learning_rate": 1.197209601003327e-05, "loss": 0.1371, "step": 42960 }, { "epoch": 3.5409160808435853, "grad_norm": 0.6996150612831116, "learning_rate": 1.1947560003780836e-05, "loss": 0.1628, "step": 42980 }, { "epoch": 3.5425637082601056, "grad_norm": 0.7120909690856934, "learning_rate": 1.1923041269109663e-05, "loss": 0.1583, "step": 43000 }, { "epoch": 3.544211335676626, "grad_norm": 0.6141802668571472, "learning_rate": 1.1898539838464057e-05, "loss": 0.1408, "step": 43020 }, { "epoch": 3.5458589630931456, "grad_norm": 0.7216193675994873, "learning_rate": 1.1874055744265427e-05, "loss": 0.1559, "step": 43040 }, { "epoch": 3.5475065905096663, "grad_norm": 0.7931749820709229, "learning_rate": 1.1849589018912236e-05, "loss": 0.1387, "step": 43060 }, { "epoch": 3.549154217926186, "grad_norm": 3.7226061820983887, "learning_rate": 1.1825139694779964e-05, "loss": 0.1621, "step": 43080 }, { "epoch": 3.5508018453427064, "grad_norm": 1.089087963104248, "learning_rate": 1.1800707804221067e-05, "loss": 0.1552, "step": 43100 }, { "epoch": 3.5524494727592266, "grad_norm": 0.803062379360199, "learning_rate": 1.1776293379564934e-05, "loss": 0.1549, "step": 43120 }, { "epoch": 3.554097100175747, "grad_norm": 0.412349671125412, "learning_rate": 1.175189645311785e-05, "loss": 0.1546, "step": 43140 }, { "epoch": 3.555744727592267, "grad_norm": 0.5987644195556641, "learning_rate": 1.1727517057162919e-05, "loss": 0.127, "step": 43160 }, { "epoch": 3.5573923550087874, "grad_norm": 0.8935190439224243, "learning_rate": 1.1703155223960067e-05, "loss": 0.1631, "step": 43180 }, { "epoch": 3.5590399824253076, "grad_norm": 0.720613420009613, "learning_rate": 1.1678810985745983e-05, "loss": 0.169, "step": 43200 }, { "epoch": 3.560687609841828, "grad_norm": 1.8782857656478882, "learning_rate": 1.1654484374734067e-05, "loss": 0.1601, "step": 43220 }, { "epoch": 3.562335237258348, "grad_norm": 0.4977351725101471, "learning_rate": 1.1630175423114407e-05, "loss": 0.1579, "step": 43240 }, { "epoch": 3.5639828646748684, "grad_norm": 1.4265191555023193, "learning_rate": 1.1605884163053688e-05, "loss": 0.131, "step": 43260 }, { "epoch": 3.5656304920913886, "grad_norm": 4.71371603012085, "learning_rate": 1.1581610626695222e-05, "loss": 0.1656, "step": 43280 }, { "epoch": 3.5672781195079084, "grad_norm": 0.9297062158584595, "learning_rate": 1.1557354846158855e-05, "loss": 0.1639, "step": 43300 }, { "epoch": 3.568925746924429, "grad_norm": 0.9973880648612976, "learning_rate": 1.1533116853540932e-05, "loss": 0.1631, "step": 43320 }, { "epoch": 3.570573374340949, "grad_norm": 0.2250540405511856, "learning_rate": 1.150889668091428e-05, "loss": 0.1651, "step": 43340 }, { "epoch": 3.572221001757469, "grad_norm": 3.2225499153137207, "learning_rate": 1.1484694360328108e-05, "loss": 0.1396, "step": 43360 }, { "epoch": 3.5738686291739894, "grad_norm": 0.9798160195350647, "learning_rate": 1.1460509923808038e-05, "loss": 0.1687, "step": 43380 }, { "epoch": 3.5755162565905096, "grad_norm": 0.8608407974243164, "learning_rate": 1.1436343403356017e-05, "loss": 0.1574, "step": 43400 }, { "epoch": 3.57716388400703, "grad_norm": 0.802926242351532, "learning_rate": 1.1412194830950276e-05, "loss": 0.1612, "step": 43420 }, { "epoch": 3.57881151142355, "grad_norm": 0.17060858011245728, "learning_rate": 1.1389270340647518e-05, "loss": 0.1472, "step": 43440 }, { "epoch": 3.5804591388400704, "grad_norm": 1.1347203254699707, "learning_rate": 1.1365156858819454e-05, "loss": 0.133, "step": 43460 }, { "epoch": 3.5821067662565906, "grad_norm": 1.255826711654663, "learning_rate": 1.1341061419234941e-05, "loss": 0.1598, "step": 43480 }, { "epoch": 3.583754393673111, "grad_norm": 1.792175054550171, "learning_rate": 1.1316984053778159e-05, "loss": 0.162, "step": 43500 }, { "epoch": 3.5854020210896307, "grad_norm": 2.900879383087158, "learning_rate": 1.1292924794309374e-05, "loss": 0.158, "step": 43520 }, { "epoch": 3.5870496485061514, "grad_norm": 0.2784268856048584, "learning_rate": 1.1268883672664893e-05, "loss": 0.1599, "step": 43540 }, { "epoch": 3.588697275922671, "grad_norm": 0.8298107981681824, "learning_rate": 1.1244860720657006e-05, "loss": 0.1342, "step": 43560 }, { "epoch": 3.5903449033391914, "grad_norm": 0.5499345064163208, "learning_rate": 1.1220855970073981e-05, "loss": 0.16, "step": 43580 }, { "epoch": 3.5919925307557117, "grad_norm": 1.1263962984085083, "learning_rate": 1.1196869452679997e-05, "loss": 0.1605, "step": 43600 }, { "epoch": 3.593640158172232, "grad_norm": 1.825993299484253, "learning_rate": 1.1172901200215103e-05, "loss": 0.1553, "step": 43620 }, { "epoch": 3.595287785588752, "grad_norm": 0.2598525285720825, "learning_rate": 1.115014830715152e-05, "loss": 0.1541, "step": 43640 }, { "epoch": 3.5969354130052724, "grad_norm": 1.0398223400115967, "learning_rate": 1.1126215762499096e-05, "loss": 0.1403, "step": 43660 }, { "epoch": 3.5985830404217927, "grad_norm": 1.0068581104278564, "learning_rate": 1.1102301576267929e-05, "loss": 0.1651, "step": 43680 }, { "epoch": 3.600230667838313, "grad_norm": 1.069786548614502, "learning_rate": 1.1078405780102358e-05, "loss": 0.1624, "step": 43700 }, { "epoch": 3.601878295254833, "grad_norm": 1.3623285293579102, "learning_rate": 1.1054528405622395e-05, "loss": 0.1509, "step": 43720 }, { "epoch": 3.6035259226713534, "grad_norm": 0.17843444645404816, "learning_rate": 1.1030669484423648e-05, "loss": 0.1789, "step": 43740 }, { "epoch": 3.6051735500878737, "grad_norm": 2.411144733428955, "learning_rate": 1.1006829048077333e-05, "loss": 0.1272, "step": 43760 }, { "epoch": 3.6068211775043935, "grad_norm": 1.3706731796264648, "learning_rate": 1.0983007128130198e-05, "loss": 0.1472, "step": 43780 }, { "epoch": 3.608468804920914, "grad_norm": 1.3497474193572998, "learning_rate": 1.095920375610449e-05, "loss": 0.1573, "step": 43800 }, { "epoch": 3.610116432337434, "grad_norm": 0.8513794541358948, "learning_rate": 1.0935418963497915e-05, "loss": 0.1448, "step": 43820 }, { "epoch": 3.611764059753954, "grad_norm": 0.19558091461658478, "learning_rate": 1.0911652781783581e-05, "loss": 0.1562, "step": 43840 }, { "epoch": 3.6134116871704745, "grad_norm": 1.4188400506973267, "learning_rate": 1.088790524240999e-05, "loss": 0.1317, "step": 43860 }, { "epoch": 3.6150593145869947, "grad_norm": 0.6879696249961853, "learning_rate": 1.0864176376800956e-05, "loss": 0.1655, "step": 43880 }, { "epoch": 3.616706942003515, "grad_norm": 1.0637201070785522, "learning_rate": 1.0840466216355594e-05, "loss": 0.1627, "step": 43900 }, { "epoch": 3.618354569420035, "grad_norm": 1.584519386291504, "learning_rate": 1.0816774792448276e-05, "loss": 0.1543, "step": 43920 }, { "epoch": 3.6200021968365554, "grad_norm": 0.3126054108142853, "learning_rate": 1.0793102136428548e-05, "loss": 0.145, "step": 43940 }, { "epoch": 3.6216498242530757, "grad_norm": 1.0722142457962036, "learning_rate": 1.0769448279621158e-05, "loss": 0.1298, "step": 43960 }, { "epoch": 3.623297451669596, "grad_norm": 2.167898416519165, "learning_rate": 1.0745813253325957e-05, "loss": 0.1636, "step": 43980 }, { "epoch": 3.6249450790861157, "grad_norm": 0.8137891292572021, "learning_rate": 1.072219708881789e-05, "loss": 0.1522, "step": 44000 }, { "epoch": 3.6249450790861157, "eval_loss": 0.5213801860809326, "eval_runtime": 254.0846, "eval_samples_per_second": 85.755, "eval_steps_per_second": 21.442, "eval_wer": 0.2228915831287459, "step": 44000 }, { "epoch": 3.6265927065026364, "grad_norm": 1.1316087245941162, "learning_rate": 1.0698599817346943e-05, "loss": 0.1554, "step": 44020 }, { "epoch": 3.6282403339191562, "grad_norm": 0.18573714792728424, "learning_rate": 1.067502147013808e-05, "loss": 0.1405, "step": 44040 }, { "epoch": 3.6298879613356765, "grad_norm": 2.417786121368408, "learning_rate": 1.0651462078391253e-05, "loss": 0.1379, "step": 44060 }, { "epoch": 3.6315355887521967, "grad_norm": 1.5969245433807373, "learning_rate": 1.0627921673281319e-05, "loss": 0.165, "step": 44080 }, { "epoch": 3.633183216168717, "grad_norm": 0.5565603971481323, "learning_rate": 1.0604400285958004e-05, "loss": 0.1557, "step": 44100 }, { "epoch": 3.6348308435852372, "grad_norm": 7.448882102966309, "learning_rate": 1.0580897947545892e-05, "loss": 0.1562, "step": 44120 }, { "epoch": 3.6364784710017575, "grad_norm": 0.19679468870162964, "learning_rate": 1.055741468914432e-05, "loss": 0.1516, "step": 44140 }, { "epoch": 3.6381260984182777, "grad_norm": 1.2326829433441162, "learning_rate": 1.053395054182742e-05, "loss": 0.1348, "step": 44160 }, { "epoch": 3.639773725834798, "grad_norm": 0.6727321147918701, "learning_rate": 1.0510505536644008e-05, "loss": 0.1435, "step": 44180 }, { "epoch": 3.641421353251318, "grad_norm": 0.6418051719665527, "learning_rate": 1.0487079704617584e-05, "loss": 0.1491, "step": 44200 }, { "epoch": 3.6430689806678385, "grad_norm": 1.064529299736023, "learning_rate": 1.046367307674628e-05, "loss": 0.1527, "step": 44220 }, { "epoch": 3.6447166080843587, "grad_norm": 0.5230397582054138, "learning_rate": 1.0440285684002793e-05, "loss": 0.1555, "step": 44240 }, { "epoch": 3.6463642355008785, "grad_norm": 7.855539321899414, "learning_rate": 1.0416917557334388e-05, "loss": 0.1352, "step": 44260 }, { "epoch": 3.648011862917399, "grad_norm": 0.49042534828186035, "learning_rate": 1.0393568727662836e-05, "loss": 0.1505, "step": 44280 }, { "epoch": 3.649659490333919, "grad_norm": 0.5023667216300964, "learning_rate": 1.0370239225884366e-05, "loss": 0.1439, "step": 44300 }, { "epoch": 3.6513071177504393, "grad_norm": 1.5370584726333618, "learning_rate": 1.0346929082869641e-05, "loss": 0.1599, "step": 44320 }, { "epoch": 3.6529547451669595, "grad_norm": 0.6781786680221558, "learning_rate": 1.0323638329463686e-05, "loss": 0.1667, "step": 44340 }, { "epoch": 3.6546023725834798, "grad_norm": 0.6102729439735413, "learning_rate": 1.0300366996485894e-05, "loss": 0.1344, "step": 44360 }, { "epoch": 3.65625, "grad_norm": 1.4229748249053955, "learning_rate": 1.0277115114729943e-05, "loss": 0.1461, "step": 44380 }, { "epoch": 3.6578976274165202, "grad_norm": 0.6897698640823364, "learning_rate": 1.0253882714963786e-05, "loss": 0.1658, "step": 44400 }, { "epoch": 3.6595452548330405, "grad_norm": 0.6321083903312683, "learning_rate": 1.0230669827929585e-05, "loss": 0.1565, "step": 44420 }, { "epoch": 3.6611928822495607, "grad_norm": 0.13009710609912872, "learning_rate": 1.0207476484343695e-05, "loss": 0.1502, "step": 44440 }, { "epoch": 3.662840509666081, "grad_norm": 1.3412202596664429, "learning_rate": 1.0184302714896584e-05, "loss": 0.134, "step": 44460 }, { "epoch": 3.664488137082601, "grad_norm": 12.585155487060547, "learning_rate": 1.0161148550252846e-05, "loss": 0.1629, "step": 44480 }, { "epoch": 3.6661357644991215, "grad_norm": 1.5875834226608276, "learning_rate": 1.0138014021051125e-05, "loss": 0.1618, "step": 44500 }, { "epoch": 3.6677833919156413, "grad_norm": 0.5883969664573669, "learning_rate": 1.0114899157904076e-05, "loss": 0.168, "step": 44520 }, { "epoch": 3.6694310193321615, "grad_norm": 0.20514604449272156, "learning_rate": 1.0091803991398336e-05, "loss": 0.1569, "step": 44540 }, { "epoch": 3.671078646748682, "grad_norm": 0.6673290133476257, "learning_rate": 1.0068728552094482e-05, "loss": 0.1279, "step": 44560 }, { "epoch": 3.672726274165202, "grad_norm": 0.6501079797744751, "learning_rate": 1.004567287052698e-05, "loss": 0.1542, "step": 44580 }, { "epoch": 3.6743739015817223, "grad_norm": 1.013692021369934, "learning_rate": 1.002263697720415e-05, "loss": 0.1459, "step": 44600 }, { "epoch": 3.6760215289982425, "grad_norm": 0.8775647282600403, "learning_rate": 9.99962090260815e-06, "loss": 0.1432, "step": 44620 }, { "epoch": 3.6776691564147628, "grad_norm": 0.5576298236846924, "learning_rate": 9.976624677194873e-06, "loss": 0.1397, "step": 44640 }, { "epoch": 3.679316783831283, "grad_norm": 1.3125576972961426, "learning_rate": 9.953648331393977e-06, "loss": 0.137, "step": 44660 }, { "epoch": 3.6809644112478033, "grad_norm": 0.9429082274436951, "learning_rate": 9.93069189560881e-06, "loss": 0.1539, "step": 44680 }, { "epoch": 3.6826120386643235, "grad_norm": 0.4823976755142212, "learning_rate": 9.907755400216365e-06, "loss": 0.1538, "step": 44700 }, { "epoch": 3.6842596660808438, "grad_norm": 0.866899847984314, "learning_rate": 9.884838875567263e-06, "loss": 0.1461, "step": 44720 }, { "epoch": 3.6859072934973636, "grad_norm": 0.1597004532814026, "learning_rate": 9.861942351985676e-06, "loss": 0.1554, "step": 44740 }, { "epoch": 3.6875549209138843, "grad_norm": 1.0429483652114868, "learning_rate": 9.83906585976933e-06, "loss": 0.1385, "step": 44760 }, { "epoch": 3.689202548330404, "grad_norm": 0.5699923038482666, "learning_rate": 9.81620942918944e-06, "loss": 0.1556, "step": 44780 }, { "epoch": 3.6908501757469243, "grad_norm": 1.0025277137756348, "learning_rate": 9.793373090490679e-06, "loss": 0.1591, "step": 44800 }, { "epoch": 3.6924978031634446, "grad_norm": 2.603668212890625, "learning_rate": 9.770556873891112e-06, "loss": 0.151, "step": 44820 }, { "epoch": 3.694145430579965, "grad_norm": 0.28980565071105957, "learning_rate": 9.747760809582201e-06, "loss": 0.1428, "step": 44840 }, { "epoch": 3.695793057996485, "grad_norm": 0.5941391587257385, "learning_rate": 9.724984927728733e-06, "loss": 0.1347, "step": 44860 }, { "epoch": 3.6974406854130053, "grad_norm": 0.5636907815933228, "learning_rate": 9.70222925846879e-06, "loss": 0.1559, "step": 44880 }, { "epoch": 3.6990883128295255, "grad_norm": 0.5769711136817932, "learning_rate": 9.679493831913716e-06, "loss": 0.1529, "step": 44900 }, { "epoch": 3.700735940246046, "grad_norm": 0.8310657739639282, "learning_rate": 9.656778678148037e-06, "loss": 0.1418, "step": 44920 }, { "epoch": 3.702383567662566, "grad_norm": 0.21607761085033417, "learning_rate": 9.634083827229493e-06, "loss": 0.1527, "step": 44940 }, { "epoch": 3.704031195079086, "grad_norm": 4.590474605560303, "learning_rate": 9.611409309188934e-06, "loss": 0.1307, "step": 44960 }, { "epoch": 3.7056788224956065, "grad_norm": 0.3627116084098816, "learning_rate": 9.588755154030316e-06, "loss": 0.1548, "step": 44980 }, { "epoch": 3.7073264499121263, "grad_norm": 1.2244609594345093, "learning_rate": 9.566121391730651e-06, "loss": 0.1399, "step": 45000 }, { "epoch": 3.7089740773286466, "grad_norm": 1.2097513675689697, "learning_rate": 9.54350805223995e-06, "loss": 0.153, "step": 45020 }, { "epoch": 3.710621704745167, "grad_norm": 0.45634397864341736, "learning_rate": 9.520915165481212e-06, "loss": 0.1529, "step": 45040 }, { "epoch": 3.712269332161687, "grad_norm": 0.761043906211853, "learning_rate": 9.498342761350374e-06, "loss": 0.1343, "step": 45060 }, { "epoch": 3.7139169595782073, "grad_norm": 0.42660772800445557, "learning_rate": 9.475790869716264e-06, "loss": 0.1631, "step": 45080 }, { "epoch": 3.7155645869947276, "grad_norm": 0.555163562297821, "learning_rate": 9.453259520420574e-06, "loss": 0.1619, "step": 45100 }, { "epoch": 3.717212214411248, "grad_norm": 1.0226809978485107, "learning_rate": 9.430748743277804e-06, "loss": 0.1663, "step": 45120 }, { "epoch": 3.718859841827768, "grad_norm": 0.2902556359767914, "learning_rate": 9.408258568075238e-06, "loss": 0.1534, "step": 45140 }, { "epoch": 3.7205074692442883, "grad_norm": 3.5902607440948486, "learning_rate": 9.385789024572897e-06, "loss": 0.1347, "step": 45160 }, { "epoch": 3.7221550966608086, "grad_norm": 0.7113789916038513, "learning_rate": 9.363340142503498e-06, "loss": 0.1459, "step": 45180 }, { "epoch": 3.723802724077329, "grad_norm": 0.3767790198326111, "learning_rate": 9.340911951572435e-06, "loss": 0.1516, "step": 45200 }, { "epoch": 3.7254503514938486, "grad_norm": 0.6034421324729919, "learning_rate": 9.318504481457688e-06, "loss": 0.1563, "step": 45220 }, { "epoch": 3.7270979789103693, "grad_norm": 0.23844176530838013, "learning_rate": 9.29611776180985e-06, "loss": 0.1499, "step": 45240 }, { "epoch": 3.728745606326889, "grad_norm": 0.8575461506843567, "learning_rate": 9.273751822252041e-06, "loss": 0.1332, "step": 45260 }, { "epoch": 3.7303932337434094, "grad_norm": 0.4221842288970947, "learning_rate": 9.251406692379894e-06, "loss": 0.1536, "step": 45280 }, { "epoch": 3.7320408611599296, "grad_norm": 1.116652250289917, "learning_rate": 9.229082401761502e-06, "loss": 0.1624, "step": 45300 }, { "epoch": 3.73368848857645, "grad_norm": 1.2018272876739502, "learning_rate": 9.206778979937365e-06, "loss": 0.145, "step": 45320 }, { "epoch": 3.73533611599297, "grad_norm": 0.7084421515464783, "learning_rate": 9.18449645642039e-06, "loss": 0.1428, "step": 45340 }, { "epoch": 3.7369837434094904, "grad_norm": 1.1555784940719604, "learning_rate": 9.162234860695829e-06, "loss": 0.1435, "step": 45360 }, { "epoch": 3.7386313708260106, "grad_norm": 1.0259857177734375, "learning_rate": 9.139994222221232e-06, "loss": 0.1537, "step": 45380 }, { "epoch": 3.740278998242531, "grad_norm": 0.5255835056304932, "learning_rate": 9.117774570426432e-06, "loss": 0.1462, "step": 45400 }, { "epoch": 3.741926625659051, "grad_norm": 0.7363396286964417, "learning_rate": 9.095575934713466e-06, "loss": 0.1406, "step": 45420 }, { "epoch": 3.743574253075571, "grad_norm": 0.19337935745716095, "learning_rate": 9.07339834445658e-06, "loss": 0.1643, "step": 45440 }, { "epoch": 3.7452218804920916, "grad_norm": 1.9763349294662476, "learning_rate": 9.051241829002172e-06, "loss": 0.1376, "step": 45460 }, { "epoch": 3.7468695079086114, "grad_norm": 1.5776675939559937, "learning_rate": 9.029106417668747e-06, "loss": 0.1468, "step": 45480 }, { "epoch": 3.7485171353251316, "grad_norm": 1.0653555393218994, "learning_rate": 9.006992139746897e-06, "loss": 0.1579, "step": 45500 }, { "epoch": 3.750164762741652, "grad_norm": 0.9452747106552124, "learning_rate": 8.984899024499222e-06, "loss": 0.1423, "step": 45520 }, { "epoch": 3.751812390158172, "grad_norm": 4.119504928588867, "learning_rate": 8.962827101160342e-06, "loss": 0.1436, "step": 45540 }, { "epoch": 3.7534600175746924, "grad_norm": 1.3755271434783936, "learning_rate": 8.940776398936831e-06, "loss": 0.1286, "step": 45560 }, { "epoch": 3.7551076449912126, "grad_norm": 3.2259788513183594, "learning_rate": 8.918746947007176e-06, "loss": 0.1586, "step": 45580 }, { "epoch": 3.756755272407733, "grad_norm": 1.7073843479156494, "learning_rate": 8.89673877452176e-06, "loss": 0.1503, "step": 45600 }, { "epoch": 3.758402899824253, "grad_norm": 0.7050052285194397, "learning_rate": 8.874751910602774e-06, "loss": 0.1443, "step": 45620 }, { "epoch": 3.7600505272407734, "grad_norm": 0.30678680539131165, "learning_rate": 8.852786384344247e-06, "loss": 0.1535, "step": 45640 }, { "epoch": 3.7616981546572936, "grad_norm": 1.3186354637145996, "learning_rate": 8.830842224811962e-06, "loss": 0.1317, "step": 45660 }, { "epoch": 3.763345782073814, "grad_norm": 11.927634239196777, "learning_rate": 8.808919461043425e-06, "loss": 0.1604, "step": 45680 }, { "epoch": 3.7649934094903337, "grad_norm": 2.6330153942108154, "learning_rate": 8.788112679711754e-06, "loss": 0.154, "step": 45700 }, { "epoch": 3.7666410369068544, "grad_norm": 1.1367371082305908, "learning_rate": 8.766231721094405e-06, "loss": 0.163, "step": 45720 }, { "epoch": 3.768288664323374, "grad_norm": 0.2689257562160492, "learning_rate": 8.744372243736357e-06, "loss": 0.1625, "step": 45740 }, { "epoch": 3.7699362917398944, "grad_norm": 6.826746463775635, "learning_rate": 8.722534276563064e-06, "loss": 0.1373, "step": 45760 }, { "epoch": 3.7715839191564147, "grad_norm": 3.139256000518799, "learning_rate": 8.700717848471512e-06, "loss": 0.1589, "step": 45780 }, { "epoch": 3.773231546572935, "grad_norm": 1.6718791723251343, "learning_rate": 8.678922988330194e-06, "loss": 0.1558, "step": 45800 }, { "epoch": 3.774879173989455, "grad_norm": 3.558382987976074, "learning_rate": 8.657149724979055e-06, "loss": 0.1648, "step": 45820 }, { "epoch": 3.7765268014059754, "grad_norm": 0.3443264961242676, "learning_rate": 8.635398087229465e-06, "loss": 0.1605, "step": 45840 }, { "epoch": 3.7781744288224957, "grad_norm": 0.43101775646209717, "learning_rate": 8.613668103864192e-06, "loss": 0.1375, "step": 45860 }, { "epoch": 3.779822056239016, "grad_norm": 1.540165662765503, "learning_rate": 8.591959803637314e-06, "loss": 0.1694, "step": 45880 }, { "epoch": 3.781469683655536, "grad_norm": 1.742284893989563, "learning_rate": 8.570273215274255e-06, "loss": 0.1585, "step": 45900 }, { "epoch": 3.7831173110720564, "grad_norm": 0.5359193086624146, "learning_rate": 8.548608367471687e-06, "loss": 0.1603, "step": 45920 }, { "epoch": 3.7847649384885766, "grad_norm": 1.0469856262207031, "learning_rate": 8.52696528889752e-06, "loss": 0.1551, "step": 45940 }, { "epoch": 3.7864125659050965, "grad_norm": 0.8619704842567444, "learning_rate": 8.505344008190863e-06, "loss": 0.1452, "step": 45960 }, { "epoch": 3.788060193321617, "grad_norm": 0.7364770770072937, "learning_rate": 8.483744553961984e-06, "loss": 0.1527, "step": 45980 }, { "epoch": 3.789707820738137, "grad_norm": 1.1404296159744263, "learning_rate": 8.462166954792242e-06, "loss": 0.1459, "step": 46000 }, { "epoch": 3.789707820738137, "eval_loss": 0.5422182679176331, "eval_runtime": 253.541, "eval_samples_per_second": 85.939, "eval_steps_per_second": 21.488, "eval_wer": 0.22258645160026092, "step": 46000 }, { "epoch": 3.791355448154657, "grad_norm": 1.1411128044128418, "learning_rate": 8.440611239234105e-06, "loss": 0.1597, "step": 46020 }, { "epoch": 3.7930030755711774, "grad_norm": 0.34028419852256775, "learning_rate": 8.419077435811076e-06, "loss": 0.1629, "step": 46040 }, { "epoch": 3.7946507029876977, "grad_norm": 4.206717491149902, "learning_rate": 8.397565573017663e-06, "loss": 0.1458, "step": 46060 }, { "epoch": 3.796298330404218, "grad_norm": 2.615445137023926, "learning_rate": 8.376075679319342e-06, "loss": 0.1495, "step": 46080 }, { "epoch": 3.797945957820738, "grad_norm": 2.354487180709839, "learning_rate": 8.354607783152508e-06, "loss": 0.1503, "step": 46100 }, { "epoch": 3.7995935852372584, "grad_norm": 0.7319648265838623, "learning_rate": 8.333161912924455e-06, "loss": 0.1574, "step": 46120 }, { "epoch": 3.8012412126537787, "grad_norm": 0.21383851766586304, "learning_rate": 8.311738097013339e-06, "loss": 0.1554, "step": 46140 }, { "epoch": 3.802888840070299, "grad_norm": 0.9344923496246338, "learning_rate": 8.290336363768122e-06, "loss": 0.1407, "step": 46160 }, { "epoch": 3.8045364674868187, "grad_norm": 0.8369607925415039, "learning_rate": 8.268956741508562e-06, "loss": 0.1617, "step": 46180 }, { "epoch": 3.8061840949033394, "grad_norm": 0.4113675057888031, "learning_rate": 8.247599258525126e-06, "loss": 0.1667, "step": 46200 }, { "epoch": 3.8078317223198592, "grad_norm": 1.6866681575775146, "learning_rate": 8.226263943079013e-06, "loss": 0.1539, "step": 46220 }, { "epoch": 3.8094793497363795, "grad_norm": 0.16504263877868652, "learning_rate": 8.204950823402083e-06, "loss": 0.1543, "step": 46240 }, { "epoch": 3.8111269771528997, "grad_norm": 13.53427505493164, "learning_rate": 8.183659927696824e-06, "loss": 0.1329, "step": 46260 }, { "epoch": 3.81277460456942, "grad_norm": 0.7844868898391724, "learning_rate": 8.162391284136323e-06, "loss": 0.1587, "step": 46280 }, { "epoch": 3.81442223198594, "grad_norm": 0.6177399754524231, "learning_rate": 8.141144920864197e-06, "loss": 0.1544, "step": 46300 }, { "epoch": 3.8160698594024605, "grad_norm": 0.7318494915962219, "learning_rate": 8.11992086599461e-06, "loss": 0.1508, "step": 46320 }, { "epoch": 3.8177174868189807, "grad_norm": 0.25767505168914795, "learning_rate": 8.09871914761219e-06, "loss": 0.17, "step": 46340 }, { "epoch": 3.819365114235501, "grad_norm": 1.0404783487319946, "learning_rate": 8.077539793772024e-06, "loss": 0.1324, "step": 46360 }, { "epoch": 3.821012741652021, "grad_norm": 0.4000663757324219, "learning_rate": 8.05638283249959e-06, "loss": 0.1396, "step": 46380 }, { "epoch": 3.8226603690685415, "grad_norm": 1.2788134813308716, "learning_rate": 8.035248291790731e-06, "loss": 0.1552, "step": 46400 }, { "epoch": 3.8243079964850617, "grad_norm": 4.33125114440918, "learning_rate": 8.014136199611639e-06, "loss": 0.1538, "step": 46420 }, { "epoch": 3.8259556239015815, "grad_norm": 0.18401068449020386, "learning_rate": 7.993046583898791e-06, "loss": 0.158, "step": 46440 }, { "epoch": 3.827603251318102, "grad_norm": 1.1308575868606567, "learning_rate": 7.971979472558924e-06, "loss": 0.1407, "step": 46460 }, { "epoch": 3.829250878734622, "grad_norm": 1.6696640253067017, "learning_rate": 7.950934893468994e-06, "loss": 0.157, "step": 46480 }, { "epoch": 3.8308985061511422, "grad_norm": 2.529257297515869, "learning_rate": 7.92991287447615e-06, "loss": 0.1771, "step": 46500 }, { "epoch": 3.8325461335676625, "grad_norm": 1.8606712818145752, "learning_rate": 7.908913443397677e-06, "loss": 0.1494, "step": 46520 }, { "epoch": 3.8341937609841827, "grad_norm": 0.16448961198329926, "learning_rate": 7.88793662802098e-06, "loss": 0.1559, "step": 46540 }, { "epoch": 3.835841388400703, "grad_norm": 0.7512365579605103, "learning_rate": 7.866982456103535e-06, "loss": 0.1356, "step": 46560 }, { "epoch": 3.8374890158172232, "grad_norm": 1.3015378713607788, "learning_rate": 7.846050955372845e-06, "loss": 0.1533, "step": 46580 }, { "epoch": 3.8391366432337435, "grad_norm": 2.272094964981079, "learning_rate": 7.825142153526425e-06, "loss": 0.1615, "step": 46600 }, { "epoch": 3.8407842706502637, "grad_norm": 1.6781426668167114, "learning_rate": 7.804256078231753e-06, "loss": 0.1554, "step": 46620 }, { "epoch": 3.842431898066784, "grad_norm": 0.8780381679534912, "learning_rate": 7.783392757126231e-06, "loss": 0.1572, "step": 46640 }, { "epoch": 3.844079525483304, "grad_norm": 4.552302360534668, "learning_rate": 7.762552217817163e-06, "loss": 0.135, "step": 46660 }, { "epoch": 3.8457271528998245, "grad_norm": 0.6195056438446045, "learning_rate": 7.74173448788168e-06, "loss": 0.1541, "step": 46680 }, { "epoch": 3.8473747803163443, "grad_norm": 1.2643500566482544, "learning_rate": 7.720939594866752e-06, "loss": 0.1474, "step": 46700 }, { "epoch": 3.8490224077328645, "grad_norm": 0.6250098943710327, "learning_rate": 7.700167566289133e-06, "loss": 0.1559, "step": 46720 }, { "epoch": 3.8506700351493848, "grad_norm": 0.974387526512146, "learning_rate": 7.679418429635306e-06, "loss": 0.1503, "step": 46740 }, { "epoch": 3.852317662565905, "grad_norm": 0.8023262023925781, "learning_rate": 7.658692212361487e-06, "loss": 0.133, "step": 46760 }, { "epoch": 3.8539652899824253, "grad_norm": 0.8580151200294495, "learning_rate": 7.637988941893527e-06, "loss": 0.1553, "step": 46780 }, { "epoch": 3.8556129173989455, "grad_norm": 2.8851852416992188, "learning_rate": 7.617308645626939e-06, "loss": 0.1426, "step": 46800 }, { "epoch": 3.8572605448154658, "grad_norm": 1.1372631788253784, "learning_rate": 7.5966513509268365e-06, "loss": 0.1744, "step": 46820 }, { "epoch": 3.858908172231986, "grad_norm": 0.20085509121418, "learning_rate": 7.577048251059796e-06, "loss": 0.157, "step": 46840 }, { "epoch": 3.8605557996485063, "grad_norm": 0.785279393196106, "learning_rate": 7.5564358880079315e-06, "loss": 0.1332, "step": 46860 }, { "epoch": 3.8622034270650265, "grad_norm": 0.8351372480392456, "learning_rate": 7.535846607072139e-06, "loss": 0.1703, "step": 46880 }, { "epoch": 3.8638510544815468, "grad_norm": 0.8297408819198608, "learning_rate": 7.515280435497097e-06, "loss": 0.1532, "step": 46900 }, { "epoch": 3.8654986818980666, "grad_norm": 0.7368187308311462, "learning_rate": 7.494737400496898e-06, "loss": 0.1516, "step": 46920 }, { "epoch": 3.8671463093145872, "grad_norm": 0.2698105573654175, "learning_rate": 7.474217529255018e-06, "loss": 0.155, "step": 46940 }, { "epoch": 3.868793936731107, "grad_norm": 21.42542839050293, "learning_rate": 7.45372084892429e-06, "loss": 0.1457, "step": 46960 }, { "epoch": 3.8704415641476273, "grad_norm": 0.3842104375362396, "learning_rate": 7.433247386626835e-06, "loss": 0.1445, "step": 46980 }, { "epoch": 3.8720891915641475, "grad_norm": 1.187163233757019, "learning_rate": 7.412797169454081e-06, "loss": 0.1634, "step": 47000 }, { "epoch": 3.873736818980668, "grad_norm": 0.6695907115936279, "learning_rate": 7.39237022446668e-06, "loss": 0.1566, "step": 47020 }, { "epoch": 3.875384446397188, "grad_norm": 0.23000146448612213, "learning_rate": 7.371966578694492e-06, "loss": 0.149, "step": 47040 }, { "epoch": 3.8770320738137083, "grad_norm": 1.0031481981277466, "learning_rate": 7.351586259136559e-06, "loss": 0.1393, "step": 47060 }, { "epoch": 3.8786797012302285, "grad_norm": 0.600594699382782, "learning_rate": 7.331229292761027e-06, "loss": 0.1559, "step": 47080 }, { "epoch": 3.880327328646749, "grad_norm": 1.6415737867355347, "learning_rate": 7.310895706505166e-06, "loss": 0.1556, "step": 47100 }, { "epoch": 3.881974956063269, "grad_norm": 2.001828908920288, "learning_rate": 7.290585527275304e-06, "loss": 0.148, "step": 47120 }, { "epoch": 3.883622583479789, "grad_norm": 0.23537398874759674, "learning_rate": 7.270298781946788e-06, "loss": 0.1513, "step": 47140 }, { "epoch": 3.8852702108963095, "grad_norm": 0.8537335991859436, "learning_rate": 7.250035497363964e-06, "loss": 0.1347, "step": 47160 }, { "epoch": 3.8869178383128293, "grad_norm": 1.2351799011230469, "learning_rate": 7.229795700340125e-06, "loss": 0.1603, "step": 47180 }, { "epoch": 3.8885654657293496, "grad_norm": 0.3554437756538391, "learning_rate": 7.209579417657497e-06, "loss": 0.1562, "step": 47200 }, { "epoch": 3.89021309314587, "grad_norm": 1.6531754732131958, "learning_rate": 7.189386676067175e-06, "loss": 0.1494, "step": 47220 }, { "epoch": 3.89186072056239, "grad_norm": 0.19578656554222107, "learning_rate": 7.169217502289119e-06, "loss": 0.138, "step": 47240 }, { "epoch": 3.8935083479789103, "grad_norm": 2.553358554840088, "learning_rate": 7.149071923012099e-06, "loss": 0.1436, "step": 47260 }, { "epoch": 3.8951559753954306, "grad_norm": 0.6994835138320923, "learning_rate": 7.128949964893647e-06, "loss": 0.1534, "step": 47280 }, { "epoch": 3.896803602811951, "grad_norm": 0.7397701144218445, "learning_rate": 7.109856008031157e-06, "loss": 0.1558, "step": 47300 }, { "epoch": 3.898451230228471, "grad_norm": 1.2970362901687622, "learning_rate": 7.089780187727285e-06, "loss": 0.1407, "step": 47320 }, { "epoch": 3.9000988576449913, "grad_norm": 0.596501350402832, "learning_rate": 7.069728067039513e-06, "loss": 0.1471, "step": 47340 }, { "epoch": 3.9017464850615116, "grad_norm": 0.8416828513145447, "learning_rate": 7.049699672501703e-06, "loss": 0.1311, "step": 47360 }, { "epoch": 3.903394112478032, "grad_norm": 1.3695924282073975, "learning_rate": 7.029695030616337e-06, "loss": 0.1536, "step": 47380 }, { "epoch": 3.9050417398945516, "grad_norm": 1.4369745254516602, "learning_rate": 7.009714167854473e-06, "loss": 0.153, "step": 47400 }, { "epoch": 3.9066893673110723, "grad_norm": 0.6326245069503784, "learning_rate": 6.9897571106556925e-06, "loss": 0.149, "step": 47420 }, { "epoch": 3.908336994727592, "grad_norm": 0.33852240443229675, "learning_rate": 6.969823885428087e-06, "loss": 0.1509, "step": 47440 }, { "epoch": 3.9099846221441124, "grad_norm": 0.6619710922241211, "learning_rate": 6.949914518548193e-06, "loss": 0.1314, "step": 47460 }, { "epoch": 3.9116322495606326, "grad_norm": 0.5734514594078064, "learning_rate": 6.930029036360991e-06, "loss": 0.1477, "step": 47480 }, { "epoch": 3.913279876977153, "grad_norm": 1.2817206382751465, "learning_rate": 6.910167465179857e-06, "loss": 0.1613, "step": 47500 }, { "epoch": 3.914927504393673, "grad_norm": 0.5360845923423767, "learning_rate": 6.890329831286527e-06, "loss": 0.1468, "step": 47520 }, { "epoch": 3.9165751318101933, "grad_norm": 0.34198638796806335, "learning_rate": 6.870516160931059e-06, "loss": 0.1505, "step": 47540 }, { "epoch": 3.9182227592267136, "grad_norm": 1.448637843132019, "learning_rate": 6.850726480331795e-06, "loss": 0.1317, "step": 47560 }, { "epoch": 3.919870386643234, "grad_norm": 0.48592647910118103, "learning_rate": 6.830960815675339e-06, "loss": 0.152, "step": 47580 }, { "epoch": 3.921518014059754, "grad_norm": 1.0064761638641357, "learning_rate": 6.811219193116519e-06, "loss": 0.1443, "step": 47600 }, { "epoch": 3.923165641476274, "grad_norm": 0.9316272735595703, "learning_rate": 6.79150163877835e-06, "loss": 0.1499, "step": 47620 }, { "epoch": 3.9248132688927946, "grad_norm": 0.2946498692035675, "learning_rate": 6.771808178751996e-06, "loss": 0.1507, "step": 47640 }, { "epoch": 3.9264608963093144, "grad_norm": 0.755538821220398, "learning_rate": 6.752138839096725e-06, "loss": 0.1343, "step": 47660 }, { "epoch": 3.9281085237258346, "grad_norm": 0.7228836417198181, "learning_rate": 6.73249364583991e-06, "loss": 0.1511, "step": 47680 }, { "epoch": 3.929756151142355, "grad_norm": 2.5564022064208984, "learning_rate": 6.71287262497696e-06, "loss": 0.142, "step": 47700 }, { "epoch": 3.931403778558875, "grad_norm": 0.6671843528747559, "learning_rate": 6.693275802471302e-06, "loss": 0.1547, "step": 47720 }, { "epoch": 3.9330514059753954, "grad_norm": 0.2564792037010193, "learning_rate": 6.673703204254347e-06, "loss": 0.1588, "step": 47740 }, { "epoch": 3.9346990333919156, "grad_norm": 1.0663928985595703, "learning_rate": 6.6541548562254364e-06, "loss": 0.1284, "step": 47760 }, { "epoch": 3.936346660808436, "grad_norm": 0.35327231884002686, "learning_rate": 6.634630784251833e-06, "loss": 0.1558, "step": 47780 }, { "epoch": 3.937994288224956, "grad_norm": 0.34408310055732727, "learning_rate": 6.6151310141686795e-06, "loss": 0.1478, "step": 47800 }, { "epoch": 3.9396419156414764, "grad_norm": 0.5912595987319946, "learning_rate": 6.595655571778953e-06, "loss": 0.1531, "step": 47820 }, { "epoch": 3.9412895430579966, "grad_norm": 0.12092000246047974, "learning_rate": 6.576204482853446e-06, "loss": 0.145, "step": 47840 }, { "epoch": 3.942937170474517, "grad_norm": 0.6143555641174316, "learning_rate": 6.556777773130724e-06, "loss": 0.1333, "step": 47860 }, { "epoch": 3.9445847978910367, "grad_norm": 0.5487130880355835, "learning_rate": 6.537375468317094e-06, "loss": 0.1433, "step": 47880 }, { "epoch": 3.9462324253075574, "grad_norm": 0.9685620665550232, "learning_rate": 6.517997594086555e-06, "loss": 0.1417, "step": 47900 }, { "epoch": 3.947880052724077, "grad_norm": 0.40364882349967957, "learning_rate": 6.498644176080801e-06, "loss": 0.1559, "step": 47920 }, { "epoch": 3.9495276801405974, "grad_norm": 0.14558570086956024, "learning_rate": 6.479315239909145e-06, "loss": 0.1398, "step": 47940 }, { "epoch": 3.9511753075571177, "grad_norm": 1.8676153421401978, "learning_rate": 6.46001081114852e-06, "loss": 0.1311, "step": 47960 }, { "epoch": 3.952822934973638, "grad_norm": 0.5240793228149414, "learning_rate": 6.440730915343421e-06, "loss": 0.1536, "step": 47980 }, { "epoch": 3.954470562390158, "grad_norm": 1.2686840295791626, "learning_rate": 6.421475578005881e-06, "loss": 0.1584, "step": 48000 }, { "epoch": 3.954470562390158, "eval_loss": 0.5072916150093079, "eval_runtime": 763.3044, "eval_samples_per_second": 28.546, "eval_steps_per_second": 7.137, "eval_wer": 0.21830901145782886, "step": 48000 }, { "epoch": 3.9561181898066784, "grad_norm": 1.2696417570114136, "learning_rate": 6.402244824615436e-06, "loss": 0.1639, "step": 48020 }, { "epoch": 3.9577658172231986, "grad_norm": 0.24946627020835876, "learning_rate": 6.383038680619099e-06, "loss": 0.1462, "step": 48040 }, { "epoch": 3.959413444639719, "grad_norm": 4.019192218780518, "learning_rate": 6.363857171431301e-06, "loss": 0.1293, "step": 48060 }, { "epoch": 3.961061072056239, "grad_norm": 1.8190034627914429, "learning_rate": 6.344700322433889e-06, "loss": 0.1447, "step": 48080 }, { "epoch": 3.9627086994727594, "grad_norm": 3.572174072265625, "learning_rate": 6.325568158976075e-06, "loss": 0.1551, "step": 48100 }, { "epoch": 3.9643563268892796, "grad_norm": 0.6448047757148743, "learning_rate": 6.3064607063744115e-06, "loss": 0.1518, "step": 48120 }, { "epoch": 3.9660039543057994, "grad_norm": 0.1911247968673706, "learning_rate": 6.287377989912749e-06, "loss": 0.1588, "step": 48140 }, { "epoch": 3.96765158172232, "grad_norm": 0.8728081583976746, "learning_rate": 6.268320034842192e-06, "loss": 0.129, "step": 48160 }, { "epoch": 3.96929920913884, "grad_norm": 0.7072778940200806, "learning_rate": 6.249286866381101e-06, "loss": 0.1564, "step": 48180 }, { "epoch": 3.97094683655536, "grad_norm": 0.5733066201210022, "learning_rate": 6.230278509715029e-06, "loss": 0.1593, "step": 48200 }, { "epoch": 3.9725944639718804, "grad_norm": 3.564107894897461, "learning_rate": 6.211294989996699e-06, "loss": 0.1356, "step": 48220 }, { "epoch": 3.9742420913884007, "grad_norm": 0.1467239409685135, "learning_rate": 6.192336332345958e-06, "loss": 0.1285, "step": 48240 }, { "epoch": 3.975889718804921, "grad_norm": 0.9012780785560608, "learning_rate": 6.173402561849764e-06, "loss": 0.1297, "step": 48260 }, { "epoch": 3.977537346221441, "grad_norm": 0.4792120158672333, "learning_rate": 6.154493703562145e-06, "loss": 0.1415, "step": 48280 }, { "epoch": 3.9791849736379614, "grad_norm": 5.1045756340026855, "learning_rate": 6.135609782504162e-06, "loss": 0.1398, "step": 48300 }, { "epoch": 3.9808326010544817, "grad_norm": 1.0115171670913696, "learning_rate": 6.11675082366388e-06, "loss": 0.1384, "step": 48320 }, { "epoch": 3.982480228471002, "grad_norm": 0.5539193153381348, "learning_rate": 6.0979168519963216e-06, "loss": 0.1315, "step": 48340 }, { "epoch": 3.9841278558875217, "grad_norm": 0.9682093858718872, "learning_rate": 6.079107892423452e-06, "loss": 0.1231, "step": 48360 }, { "epoch": 3.9857754833040424, "grad_norm": 0.7027262449264526, "learning_rate": 6.061262570951409e-06, "loss": 0.1516, "step": 48380 }, { "epoch": 3.987423110720562, "grad_norm": 0.539910078048706, "learning_rate": 6.04250245651963e-06, "loss": 0.1528, "step": 48400 }, { "epoch": 3.9890707381370825, "grad_norm": 0.5832187533378601, "learning_rate": 6.023767427509394e-06, "loss": 0.1423, "step": 48420 }, { "epoch": 3.9907183655536027, "grad_norm": 0.21605855226516724, "learning_rate": 6.005057508711734e-06, "loss": 0.1466, "step": 48440 }, { "epoch": 3.992365992970123, "grad_norm": 0.6518982648849487, "learning_rate": 5.986372724884462e-06, "loss": 0.1445, "step": 48460 }, { "epoch": 3.994013620386643, "grad_norm": 1.1703104972839355, "learning_rate": 5.96771310075214e-06, "loss": 0.1445, "step": 48480 }, { "epoch": 3.9956612478031635, "grad_norm": 0.831611692905426, "learning_rate": 5.949078661006027e-06, "loss": 0.1534, "step": 48500 }, { "epoch": 3.9973088752196837, "grad_norm": 0.8763064742088318, "learning_rate": 5.930469430304067e-06, "loss": 0.1443, "step": 48520 }, { "epoch": 3.998956502636204, "grad_norm": 0.30029746890068054, "learning_rate": 5.911885433270825e-06, "loss": 0.1421, "step": 48540 }, { "epoch": 4.000604130052724, "grad_norm": 1.0976738929748535, "learning_rate": 5.893326694497494e-06, "loss": 0.1337, "step": 48560 }, { "epoch": 4.002251757469244, "grad_norm": 0.20220765471458435, "learning_rate": 5.874793238541846e-06, "loss": 0.1356, "step": 48580 }, { "epoch": 4.003899384885765, "grad_norm": 1.9902305603027344, "learning_rate": 5.856285089928188e-06, "loss": 0.1263, "step": 48600 }, { "epoch": 4.0055470123022845, "grad_norm": 0.6571900844573975, "learning_rate": 5.837802273147342e-06, "loss": 0.1431, "step": 48620 }, { "epoch": 4.007194639718805, "grad_norm": 0.5143736004829407, "learning_rate": 5.8193448126566105e-06, "loss": 0.1443, "step": 48640 }, { "epoch": 4.008842267135325, "grad_norm": 0.6226786971092224, "learning_rate": 5.800912732879743e-06, "loss": 0.1398, "step": 48660 }, { "epoch": 4.010489894551846, "grad_norm": 0.7970151305198669, "learning_rate": 5.782506058206905e-06, "loss": 0.1491, "step": 48680 }, { "epoch": 4.0121375219683655, "grad_norm": 1.225899338722229, "learning_rate": 5.764124812994645e-06, "loss": 0.1341, "step": 48700 }, { "epoch": 4.013785149384886, "grad_norm": 0.6610860228538513, "learning_rate": 5.745769021565869e-06, "loss": 0.1437, "step": 48720 }, { "epoch": 4.015432776801406, "grad_norm": 1.223840355873108, "learning_rate": 5.7274387082097785e-06, "loss": 0.147, "step": 48740 }, { "epoch": 4.017080404217926, "grad_norm": 3.3841757774353027, "learning_rate": 5.709133897181881e-06, "loss": 0.1518, "step": 48760 }, { "epoch": 4.0187280316344465, "grad_norm": 0.2505913972854614, "learning_rate": 5.690854612703939e-06, "loss": 0.1607, "step": 48780 }, { "epoch": 4.020375659050966, "grad_norm": 2.2898383140563965, "learning_rate": 5.672600878963927e-06, "loss": 0.1376, "step": 48800 }, { "epoch": 4.022023286467487, "grad_norm": 3.1586852073669434, "learning_rate": 5.654372720116022e-06, "loss": 0.1512, "step": 48820 }, { "epoch": 4.023670913884007, "grad_norm": 5.6944451332092285, "learning_rate": 5.63617016028054e-06, "loss": 0.1468, "step": 48840 }, { "epoch": 4.0253185413005275, "grad_norm": 1.5076755285263062, "learning_rate": 5.617993223543938e-06, "loss": 0.1533, "step": 48860 }, { "epoch": 4.026966168717047, "grad_norm": 1.9633057117462158, "learning_rate": 5.599841933958769e-06, "loss": 0.1424, "step": 48880 }, { "epoch": 4.028613796133568, "grad_norm": 0.6375758647918701, "learning_rate": 5.5817163155436396e-06, "loss": 0.1312, "step": 48900 }, { "epoch": 4.030261423550088, "grad_norm": 4.582575798034668, "learning_rate": 5.563616392283199e-06, "loss": 0.1514, "step": 48920 }, { "epoch": 4.0319090509666085, "grad_norm": 1.4480077028274536, "learning_rate": 5.545542188128072e-06, "loss": 0.1519, "step": 48940 }, { "epoch": 4.033556678383128, "grad_norm": 0.4868896007537842, "learning_rate": 5.527493726994879e-06, "loss": 0.1484, "step": 48960 }, { "epoch": 4.035204305799649, "grad_norm": 0.7873154878616333, "learning_rate": 5.509471032766156e-06, "loss": 0.1394, "step": 48980 }, { "epoch": 4.036851933216169, "grad_norm": 0.5535939931869507, "learning_rate": 5.491474129290361e-06, "loss": 0.1375, "step": 49000 }, { "epoch": 4.038499560632689, "grad_norm": 0.668782114982605, "learning_rate": 5.4735030403818e-06, "loss": 0.1512, "step": 49020 }, { "epoch": 4.040147188049209, "grad_norm": 0.6487458944320679, "learning_rate": 5.455557789820639e-06, "loss": 0.1431, "step": 49040 }, { "epoch": 4.041794815465729, "grad_norm": 0.8897418975830078, "learning_rate": 5.437638401352851e-06, "loss": 0.1487, "step": 49060 }, { "epoch": 4.04344244288225, "grad_norm": 1.6307114362716675, "learning_rate": 5.419744898690185e-06, "loss": 0.1219, "step": 49080 }, { "epoch": 4.0450900702987695, "grad_norm": 0.7759345769882202, "learning_rate": 5.401877305510139e-06, "loss": 0.1295, "step": 49100 }, { "epoch": 4.04673769771529, "grad_norm": 0.5725277662277222, "learning_rate": 5.384035645455912e-06, "loss": 0.1675, "step": 49120 }, { "epoch": 4.04838532513181, "grad_norm": 0.5994029641151428, "learning_rate": 5.3662199421364054e-06, "loss": 0.141, "step": 49140 }, { "epoch": 4.050032952548331, "grad_norm": 0.8295794725418091, "learning_rate": 5.348430219126169e-06, "loss": 0.158, "step": 49160 }, { "epoch": 4.0516805799648505, "grad_norm": 0.3460080623626709, "learning_rate": 5.330666499965367e-06, "loss": 0.1314, "step": 49180 }, { "epoch": 4.053328207381371, "grad_norm": 1.8230607509613037, "learning_rate": 5.312928808159765e-06, "loss": 0.1375, "step": 49200 }, { "epoch": 4.054975834797891, "grad_norm": 0.6278348565101624, "learning_rate": 5.295217167180677e-06, "loss": 0.1513, "step": 49220 }, { "epoch": 4.056623462214411, "grad_norm": 0.47059327363967896, "learning_rate": 5.277531600464953e-06, "loss": 0.1411, "step": 49240 }, { "epoch": 4.0582710896309315, "grad_norm": 0.8084439635276794, "learning_rate": 5.2598721314149395e-06, "loss": 0.1379, "step": 49260 }, { "epoch": 4.059918717047451, "grad_norm": 0.7220436334609985, "learning_rate": 5.242238783398451e-06, "loss": 0.1294, "step": 49280 }, { "epoch": 4.061566344463972, "grad_norm": 0.4311424791812897, "learning_rate": 5.224631579748726e-06, "loss": 0.1205, "step": 49300 }, { "epoch": 4.063213971880492, "grad_norm": 0.4632396697998047, "learning_rate": 5.207050543764416e-06, "loss": 0.138, "step": 49320 }, { "epoch": 4.0648615992970125, "grad_norm": 0.5787003636360168, "learning_rate": 5.189495698709551e-06, "loss": 0.1458, "step": 49340 }, { "epoch": 4.066509226713532, "grad_norm": 1.0666968822479248, "learning_rate": 5.171967067813496e-06, "loss": 0.1544, "step": 49360 }, { "epoch": 4.068156854130053, "grad_norm": 0.19422683119773865, "learning_rate": 5.154464674270934e-06, "loss": 0.1408, "step": 49380 }, { "epoch": 4.069804481546573, "grad_norm": 4.505268096923828, "learning_rate": 5.136988541241833e-06, "loss": 0.1487, "step": 49400 }, { "epoch": 4.0714521089630935, "grad_norm": 0.5279316902160645, "learning_rate": 5.1195386918513915e-06, "loss": 0.1554, "step": 49420 }, { "epoch": 4.073099736379613, "grad_norm": 0.864841878414154, "learning_rate": 5.102115149190048e-06, "loss": 0.1519, "step": 49440 }, { "epoch": 4.074747363796134, "grad_norm": 0.4843533933162689, "learning_rate": 5.084717936313426e-06, "loss": 0.1567, "step": 49460 }, { "epoch": 4.076394991212654, "grad_norm": 0.6509762406349182, "learning_rate": 5.067347076242312e-06, "loss": 0.1382, "step": 49480 }, { "epoch": 4.078042618629174, "grad_norm": 1.0307797193527222, "learning_rate": 5.050002591962616e-06, "loss": 0.1319, "step": 49500 }, { "epoch": 4.079690246045694, "grad_norm": 0.6851842403411865, "learning_rate": 5.032684506425339e-06, "loss": 0.1526, "step": 49520 }, { "epoch": 4.081337873462214, "grad_norm": 1.0875792503356934, "learning_rate": 5.015392842546565e-06, "loss": 0.1426, "step": 49540 }, { "epoch": 4.082985500878735, "grad_norm": 4.727265357971191, "learning_rate": 4.998127623207405e-06, "loss": 0.1513, "step": 49560 }, { "epoch": 4.084633128295255, "grad_norm": 0.43898648023605347, "learning_rate": 4.980888871253983e-06, "loss": 0.1358, "step": 49580 }, { "epoch": 4.086280755711775, "grad_norm": 4.587413787841797, "learning_rate": 4.9636766094974055e-06, "loss": 0.1361, "step": 49600 }, { "epoch": 4.087928383128295, "grad_norm": 1.679709553718567, "learning_rate": 4.946490860713707e-06, "loss": 0.1584, "step": 49620 }, { "epoch": 4.089576010544816, "grad_norm": 4.208545684814453, "learning_rate": 4.929331647643853e-06, "loss": 0.1561, "step": 49640 }, { "epoch": 4.091223637961336, "grad_norm": 0.668868899345398, "learning_rate": 4.912198992993697e-06, "loss": 0.159, "step": 49660 }, { "epoch": 4.092871265377856, "grad_norm": 0.2711634933948517, "learning_rate": 4.895092919433944e-06, "loss": 0.1659, "step": 49680 }, { "epoch": 4.094518892794376, "grad_norm": 2.033294200897217, "learning_rate": 4.878013449600135e-06, "loss": 0.1413, "step": 49700 }, { "epoch": 4.096166520210896, "grad_norm": 1.0798916816711426, "learning_rate": 4.860960606092588e-06, "loss": 0.1555, "step": 49720 }, { "epoch": 4.097814147627417, "grad_norm": 2.1771676540374756, "learning_rate": 4.8439344114764e-06, "loss": 0.1616, "step": 49740 }, { "epoch": 4.099461775043936, "grad_norm": 1.2357114553451538, "learning_rate": 4.826934888281412e-06, "loss": 0.1591, "step": 49760 }, { "epoch": 4.101109402460457, "grad_norm": 0.2864702641963959, "learning_rate": 4.809962059002163e-06, "loss": 0.1446, "step": 49780 }, { "epoch": 4.102757029876977, "grad_norm": 3.759768486022949, "learning_rate": 4.793015946097876e-06, "loss": 0.1409, "step": 49800 }, { "epoch": 4.104404657293498, "grad_norm": 0.914750337600708, "learning_rate": 4.776096571992405e-06, "loss": 0.1572, "step": 49820 }, { "epoch": 4.106052284710017, "grad_norm": 1.4311671257019043, "learning_rate": 4.759203959074243e-06, "loss": 0.1557, "step": 49840 }, { "epoch": 4.107699912126538, "grad_norm": 1.1592999696731567, "learning_rate": 4.7423381296964585e-06, "loss": 0.1519, "step": 49860 }, { "epoch": 4.109347539543058, "grad_norm": 0.5043455958366394, "learning_rate": 4.725499106176684e-06, "loss": 0.1409, "step": 49880 }, { "epoch": 4.110995166959579, "grad_norm": 1.2519917488098145, "learning_rate": 4.708686910797083e-06, "loss": 0.1394, "step": 49900 }, { "epoch": 4.112642794376098, "grad_norm": 0.7815605401992798, "learning_rate": 4.69190156580431e-06, "loss": 0.1568, "step": 49920 }, { "epoch": 4.114290421792619, "grad_norm": 3.0310311317443848, "learning_rate": 4.6751430934095005e-06, "loss": 0.1514, "step": 49940 }, { "epoch": 4.115938049209139, "grad_norm": 0.5789735317230225, "learning_rate": 4.658411515788225e-06, "loss": 0.1576, "step": 49960 }, { "epoch": 4.117585676625659, "grad_norm": 0.3229387700557709, "learning_rate": 4.641706855080474e-06, "loss": 0.1542, "step": 49980 }, { "epoch": 4.119233304042179, "grad_norm": 1.2507129907608032, "learning_rate": 4.625029133390596e-06, "loss": 0.1349, "step": 50000 }, { "epoch": 4.119233304042179, "eval_loss": 0.5318771600723267, "eval_runtime": 246.2463, "eval_samples_per_second": 88.485, "eval_steps_per_second": 22.124, "eval_wer": 0.22357742922488194, "step": 50000 }, { "epoch": 4.120880931458699, "grad_norm": 1.7700546979904175, "learning_rate": 4.608378372787319e-06, "loss": 0.1478, "step": 50020 }, { "epoch": 4.12252855887522, "grad_norm": 1.2826635837554932, "learning_rate": 4.591754595303688e-06, "loss": 0.1473, "step": 50040 }, { "epoch": 4.12417618629174, "grad_norm": 3.3998169898986816, "learning_rate": 4.575157822937037e-06, "loss": 0.1431, "step": 50060 }, { "epoch": 4.12582381370826, "grad_norm": 0.26330679655075073, "learning_rate": 4.558588077648976e-06, "loss": 0.1607, "step": 50080 }, { "epoch": 4.12747144112478, "grad_norm": 3.1583251953125, "learning_rate": 4.542045381365331e-06, "loss": 0.139, "step": 50100 }, { "epoch": 4.129119068541301, "grad_norm": 1.3226184844970703, "learning_rate": 4.525529755976154e-06, "loss": 0.151, "step": 50120 }, { "epoch": 4.130766695957821, "grad_norm": 1.3383190631866455, "learning_rate": 4.509041223335675e-06, "loss": 0.1567, "step": 50140 }, { "epoch": 4.132414323374341, "grad_norm": 0.4968540072441101, "learning_rate": 4.492579805262265e-06, "loss": 0.1476, "step": 50160 }, { "epoch": 4.134061950790861, "grad_norm": 0.2892935872077942, "learning_rate": 4.476145523538424e-06, "loss": 0.1493, "step": 50180 }, { "epoch": 4.135709578207381, "grad_norm": 0.5198156237602234, "learning_rate": 4.459738399910735e-06, "loss": 0.1401, "step": 50200 }, { "epoch": 4.137357205623902, "grad_norm": 0.7355120182037354, "learning_rate": 4.44335845608985e-06, "loss": 0.1409, "step": 50220 }, { "epoch": 4.139004833040421, "grad_norm": 2.883500576019287, "learning_rate": 4.427005713750454e-06, "loss": 0.1489, "step": 50240 }, { "epoch": 4.140652460456942, "grad_norm": 1.221643328666687, "learning_rate": 4.410680194531239e-06, "loss": 0.1675, "step": 50260 }, { "epoch": 4.142300087873462, "grad_norm": 0.5968945622444153, "learning_rate": 4.394381920034882e-06, "loss": 0.1517, "step": 50280 }, { "epoch": 4.143947715289983, "grad_norm": 0.9936787486076355, "learning_rate": 4.378110911827981e-06, "loss": 0.1433, "step": 50300 }, { "epoch": 4.145595342706502, "grad_norm": 1.2625391483306885, "learning_rate": 4.361867191441088e-06, "loss": 0.1476, "step": 50320 }, { "epoch": 4.147242970123023, "grad_norm": 0.8387503027915955, "learning_rate": 4.3456507803686265e-06, "loss": 0.1455, "step": 50340 }, { "epoch": 4.148890597539543, "grad_norm": 0.6159060001373291, "learning_rate": 4.329461700068885e-06, "loss": 0.1613, "step": 50360 }, { "epoch": 4.150538224956064, "grad_norm": 0.2571849524974823, "learning_rate": 4.313299971964002e-06, "loss": 0.1436, "step": 50380 }, { "epoch": 4.152185852372583, "grad_norm": 1.6137828826904297, "learning_rate": 4.29797168471382e-06, "loss": 0.1345, "step": 50400 }, { "epoch": 4.153833479789104, "grad_norm": 1.0832774639129639, "learning_rate": 4.2818633548671825e-06, "loss": 0.1485, "step": 50420 }, { "epoch": 4.155481107205624, "grad_norm": 1.0701065063476562, "learning_rate": 4.265782440199687e-06, "loss": 0.1575, "step": 50440 }, { "epoch": 4.157128734622144, "grad_norm": 1.129987120628357, "learning_rate": 4.249728961990329e-06, "loss": 0.1461, "step": 50460 }, { "epoch": 4.158776362038664, "grad_norm": 0.3026888966560364, "learning_rate": 4.233702941481804e-06, "loss": 0.144, "step": 50480 }, { "epoch": 4.160423989455184, "grad_norm": 2.898268461227417, "learning_rate": 4.217704399880462e-06, "loss": 0.1242, "step": 50500 }, { "epoch": 4.162071616871705, "grad_norm": 2.9740545749664307, "learning_rate": 4.201733358356303e-06, "loss": 0.1467, "step": 50520 }, { "epoch": 4.163719244288225, "grad_norm": 1.1181706190109253, "learning_rate": 4.185789838042933e-06, "loss": 0.1415, "step": 50540 }, { "epoch": 4.165366871704745, "grad_norm": 0.6788629293441772, "learning_rate": 4.169873860037546e-06, "loss": 0.1534, "step": 50560 }, { "epoch": 4.167014499121265, "grad_norm": 0.20810231566429138, "learning_rate": 4.153985445400882e-06, "loss": 0.1434, "step": 50580 }, { "epoch": 4.168662126537786, "grad_norm": 1.3016338348388672, "learning_rate": 4.138124615157218e-06, "loss": 0.1365, "step": 50600 }, { "epoch": 4.170309753954306, "grad_norm": 0.3593561351299286, "learning_rate": 4.122291390294319e-06, "loss": 0.1416, "step": 50620 }, { "epoch": 4.171957381370826, "grad_norm": 0.8482309579849243, "learning_rate": 4.106485791763431e-06, "loss": 0.1501, "step": 50640 }, { "epoch": 4.173605008787346, "grad_norm": 1.084952712059021, "learning_rate": 4.090707840479241e-06, "loss": 0.138, "step": 50660 }, { "epoch": 4.175252636203867, "grad_norm": 1.4379688501358032, "learning_rate": 4.074957557319853e-06, "loss": 0.1522, "step": 50680 }, { "epoch": 4.176900263620387, "grad_norm": 0.8657311201095581, "learning_rate": 4.059234963126746e-06, "loss": 0.1367, "step": 50700 }, { "epoch": 4.1785478910369065, "grad_norm": 1.0521893501281738, "learning_rate": 4.043540078704777e-06, "loss": 0.1634, "step": 50720 }, { "epoch": 4.180195518453427, "grad_norm": 1.0205254554748535, "learning_rate": 4.027872924822129e-06, "loss": 0.146, "step": 50740 }, { "epoch": 4.181843145869947, "grad_norm": 0.8555058240890503, "learning_rate": 4.01223352221029e-06, "loss": 0.1444, "step": 50760 }, { "epoch": 4.183490773286468, "grad_norm": 0.19485245645046234, "learning_rate": 3.996621891564037e-06, "loss": 0.151, "step": 50780 }, { "epoch": 4.1851384007029875, "grad_norm": 2.3820459842681885, "learning_rate": 3.981038053541375e-06, "loss": 0.1342, "step": 50800 }, { "epoch": 4.186786028119508, "grad_norm": 0.5096474289894104, "learning_rate": 3.965482028763554e-06, "loss": 0.1502, "step": 50820 }, { "epoch": 4.188433655536028, "grad_norm": 0.6316790580749512, "learning_rate": 3.949953837815007e-06, "loss": 0.1468, "step": 50840 }, { "epoch": 4.190081282952549, "grad_norm": 2.2774689197540283, "learning_rate": 3.934453501243352e-06, "loss": 0.1404, "step": 50860 }, { "epoch": 4.1917289103690685, "grad_norm": 0.7296523451805115, "learning_rate": 3.918981039559324e-06, "loss": 0.1317, "step": 50880 }, { "epoch": 4.193376537785589, "grad_norm": 0.804567813873291, "learning_rate": 3.903536473236796e-06, "loss": 0.1383, "step": 50900 }, { "epoch": 4.195024165202109, "grad_norm": 4.935573101043701, "learning_rate": 3.888119822712716e-06, "loss": 0.1461, "step": 50920 }, { "epoch": 4.196671792618629, "grad_norm": 0.6570642590522766, "learning_rate": 3.872731108387098e-06, "loss": 0.1642, "step": 50940 }, { "epoch": 4.1983194200351495, "grad_norm": 1.4187827110290527, "learning_rate": 3.858137724228927e-06, "loss": 0.1543, "step": 50960 }, { "epoch": 4.199967047451669, "grad_norm": 0.236581489443779, "learning_rate": 3.842803544025822e-06, "loss": 0.1342, "step": 50980 }, { "epoch": 4.20161467486819, "grad_norm": 3.1486315727233887, "learning_rate": 3.827497359985738e-06, "loss": 0.1397, "step": 51000 }, { "epoch": 4.20326230228471, "grad_norm": 2.405132293701172, "learning_rate": 3.8122191923625096e-06, "loss": 0.1542, "step": 51020 }, { "epoch": 4.2049099297012305, "grad_norm": 0.9896066784858704, "learning_rate": 3.7969690613729013e-06, "loss": 0.1609, "step": 51040 }, { "epoch": 4.20655755711775, "grad_norm": 1.055495023727417, "learning_rate": 3.7817469871965883e-06, "loss": 0.1614, "step": 51060 }, { "epoch": 4.208205184534271, "grad_norm": 0.44680196046829224, "learning_rate": 3.7665529899760963e-06, "loss": 0.16, "step": 51080 }, { "epoch": 4.209852811950791, "grad_norm": 1.3997803926467896, "learning_rate": 3.7513870898168157e-06, "loss": 0.1413, "step": 51100 }, { "epoch": 4.211500439367311, "grad_norm": 1.6829358339309692, "learning_rate": 3.73624930678696e-06, "loss": 0.1602, "step": 51120 }, { "epoch": 4.213148066783831, "grad_norm": 0.44518765807151794, "learning_rate": 3.7211396609175274e-06, "loss": 0.1568, "step": 51140 }, { "epoch": 4.214795694200351, "grad_norm": 1.3205004930496216, "learning_rate": 3.7060581722022925e-06, "loss": 0.1453, "step": 51160 }, { "epoch": 4.216443321616872, "grad_norm": 0.3841530680656433, "learning_rate": 3.6910048605977577e-06, "loss": 0.1432, "step": 51180 }, { "epoch": 4.2180909490333915, "grad_norm": 2.0167789459228516, "learning_rate": 3.6759797460231503e-06, "loss": 0.1424, "step": 51200 }, { "epoch": 4.219738576449912, "grad_norm": 0.71668541431427, "learning_rate": 3.6609828483603865e-06, "loss": 0.1521, "step": 51220 }, { "epoch": 4.221386203866432, "grad_norm": 2.7252283096313477, "learning_rate": 3.6460141874540417e-06, "loss": 0.1542, "step": 51240 }, { "epoch": 4.223033831282953, "grad_norm": 2.7393345832824707, "learning_rate": 3.6310737831113274e-06, "loss": 0.1557, "step": 51260 }, { "epoch": 4.2246814586994725, "grad_norm": 1.2369873523712158, "learning_rate": 3.6161616551020655e-06, "loss": 0.1363, "step": 51280 }, { "epoch": 4.226329086115993, "grad_norm": 0.6343797445297241, "learning_rate": 3.601277823158661e-06, "loss": 0.1451, "step": 51300 }, { "epoch": 4.227976713532513, "grad_norm": 0.5077883005142212, "learning_rate": 3.586422306976084e-06, "loss": 0.1559, "step": 51320 }, { "epoch": 4.229624340949034, "grad_norm": 0.8625708818435669, "learning_rate": 3.57159512621181e-06, "loss": 0.1438, "step": 51340 }, { "epoch": 4.2312719683655535, "grad_norm": 1.098022222518921, "learning_rate": 3.5567963004858485e-06, "loss": 0.1508, "step": 51360 }, { "epoch": 4.232919595782074, "grad_norm": 0.24744924902915955, "learning_rate": 3.5420258493806784e-06, "loss": 0.1371, "step": 51380 }, { "epoch": 4.234567223198594, "grad_norm": 1.9693866968154907, "learning_rate": 3.52728379244123e-06, "loss": 0.1346, "step": 51400 }, { "epoch": 4.236214850615114, "grad_norm": 0.6847724318504333, "learning_rate": 3.5125701491748624e-06, "loss": 0.1542, "step": 51420 }, { "epoch": 4.2378624780316345, "grad_norm": 1.2375575304031372, "learning_rate": 3.4978849390513354e-06, "loss": 0.1422, "step": 51440 }, { "epoch": 4.239510105448154, "grad_norm": 0.8321471810340881, "learning_rate": 3.48322818150279e-06, "loss": 0.1496, "step": 51460 }, { "epoch": 4.241157732864675, "grad_norm": 0.41930148005485535, "learning_rate": 3.4685998959237075e-06, "loss": 0.1448, "step": 51480 }, { "epoch": 4.242805360281195, "grad_norm": 0.9636502861976624, "learning_rate": 3.454000101670901e-06, "loss": 0.1478, "step": 51500 }, { "epoch": 4.2444529876977155, "grad_norm": 1.1395734548568726, "learning_rate": 3.4394288180634836e-06, "loss": 0.1494, "step": 51520 }, { "epoch": 4.246100615114235, "grad_norm": 1.8777341842651367, "learning_rate": 3.4248860643828346e-06, "loss": 0.149, "step": 51540 }, { "epoch": 4.247748242530756, "grad_norm": 4.042720794677734, "learning_rate": 3.410371859872599e-06, "loss": 0.1487, "step": 51560 }, { "epoch": 4.249395869947276, "grad_norm": 0.32698577642440796, "learning_rate": 3.3958862237386145e-06, "loss": 0.1438, "step": 51580 }, { "epoch": 4.2510434973637965, "grad_norm": 1.1202223300933838, "learning_rate": 3.3814291751489397e-06, "loss": 0.141, "step": 51600 }, { "epoch": 4.252691124780316, "grad_norm": 1.0267754793167114, "learning_rate": 3.3670007332337965e-06, "loss": 0.151, "step": 51620 }, { "epoch": 4.254338752196837, "grad_norm": 1.0378262996673584, "learning_rate": 3.352600917085563e-06, "loss": 0.1572, "step": 51640 }, { "epoch": 4.255986379613357, "grad_norm": 1.1462045907974243, "learning_rate": 3.338229745758717e-06, "loss": 0.1549, "step": 51660 }, { "epoch": 4.257634007029877, "grad_norm": 0.5214184522628784, "learning_rate": 3.3238872382698487e-06, "loss": 0.1482, "step": 51680 }, { "epoch": 4.259281634446397, "grad_norm": 2.4122419357299805, "learning_rate": 3.309573413597619e-06, "loss": 0.1367, "step": 51700 }, { "epoch": 4.260929261862917, "grad_norm": 1.4425621032714844, "learning_rate": 3.295288290682727e-06, "loss": 0.1586, "step": 51720 }, { "epoch": 4.262576889279438, "grad_norm": 1.007507085800171, "learning_rate": 3.2810318884279046e-06, "loss": 0.1438, "step": 51740 }, { "epoch": 4.264224516695958, "grad_norm": 0.92079097032547, "learning_rate": 3.267514925979853e-06, "loss": 0.1671, "step": 51760 }, { "epoch": 4.265872144112478, "grad_norm": 0.586050271987915, "learning_rate": 3.2533145832371547e-06, "loss": 0.1407, "step": 51780 }, { "epoch": 4.267519771528998, "grad_norm": 2.817061424255371, "learning_rate": 3.2391430166960364e-06, "loss": 0.1364, "step": 51800 }, { "epoch": 4.269167398945519, "grad_norm": 0.7773513197898865, "learning_rate": 3.2250002451089555e-06, "loss": 0.1509, "step": 51820 }, { "epoch": 4.270815026362039, "grad_norm": 0.7700615525245667, "learning_rate": 3.210886287190279e-06, "loss": 0.1522, "step": 51840 }, { "epoch": 4.272462653778559, "grad_norm": 0.6879720091819763, "learning_rate": 3.1968011616162198e-06, "loss": 0.1556, "step": 51860 }, { "epoch": 4.274110281195079, "grad_norm": 0.5407189726829529, "learning_rate": 3.1827448870248633e-06, "loss": 0.1443, "step": 51880 }, { "epoch": 4.275757908611599, "grad_norm": 3.257999897003174, "learning_rate": 3.1687174820161147e-06, "loss": 0.1441, "step": 51900 }, { "epoch": 4.27740553602812, "grad_norm": 7.568297386169434, "learning_rate": 3.1547189651516677e-06, "loss": 0.1501, "step": 51920 }, { "epoch": 4.279053163444639, "grad_norm": 1.1375879049301147, "learning_rate": 3.1407493549550025e-06, "loss": 0.1582, "step": 51940 }, { "epoch": 4.28070079086116, "grad_norm": 1.3720674514770508, "learning_rate": 3.1268086699113324e-06, "loss": 0.1704, "step": 51960 }, { "epoch": 4.28234841827768, "grad_norm": 1.5218106508255005, "learning_rate": 3.1128969284676098e-06, "loss": 0.1509, "step": 51980 }, { "epoch": 4.283996045694201, "grad_norm": 1.475452184677124, "learning_rate": 3.099014149032481e-06, "loss": 0.134, "step": 52000 }, { "epoch": 4.283996045694201, "eval_loss": 0.5309126377105713, "eval_runtime": 247.5954, "eval_samples_per_second": 88.002, "eval_steps_per_second": 22.004, "eval_wer": 0.22345145749293857, "step": 52000 }, { "epoch": 4.28564367311072, "grad_norm": 3.977250576019287, "learning_rate": 3.0851603499762683e-06, "loss": 0.1589, "step": 52020 }, { "epoch": 4.287291300527241, "grad_norm": 1.5431817770004272, "learning_rate": 3.0713355496309547e-06, "loss": 0.1562, "step": 52040 }, { "epoch": 4.288938927943761, "grad_norm": 0.9286394119262695, "learning_rate": 3.057539766290138e-06, "loss": 0.1581, "step": 52060 }, { "epoch": 4.2905865553602816, "grad_norm": 0.2867872416973114, "learning_rate": 3.043773018209026e-06, "loss": 0.1435, "step": 52080 }, { "epoch": 4.292234182776801, "grad_norm": 1.3681451082229614, "learning_rate": 3.0300353236044047e-06, "loss": 0.1426, "step": 52100 }, { "epoch": 4.293881810193321, "grad_norm": 0.7551935315132141, "learning_rate": 3.0163267006546177e-06, "loss": 0.1543, "step": 52120 }, { "epoch": 4.295529437609842, "grad_norm": 1.782273769378662, "learning_rate": 3.0026471674995423e-06, "loss": 0.1546, "step": 52140 }, { "epoch": 4.297177065026362, "grad_norm": 1.6591081619262695, "learning_rate": 2.9889967422405433e-06, "loss": 0.141, "step": 52160 }, { "epoch": 4.298824692442882, "grad_norm": 1.1596513986587524, "learning_rate": 2.9753754429404924e-06, "loss": 0.1444, "step": 52180 }, { "epoch": 4.300472319859402, "grad_norm": 0.9086285829544067, "learning_rate": 2.9617832876237095e-06, "loss": 0.1476, "step": 52200 }, { "epoch": 4.302119947275923, "grad_norm": 0.5555962920188904, "learning_rate": 2.9482202942759525e-06, "loss": 0.1503, "step": 52220 }, { "epoch": 4.303767574692443, "grad_norm": 5.21596622467041, "learning_rate": 2.9346864808443954e-06, "loss": 0.1471, "step": 52240 }, { "epoch": 4.305415202108963, "grad_norm": 1.3723896741867065, "learning_rate": 2.921181865237585e-06, "loss": 0.1609, "step": 52260 }, { "epoch": 4.307062829525483, "grad_norm": 0.38962632417678833, "learning_rate": 2.9077064653254468e-06, "loss": 0.15, "step": 52280 }, { "epoch": 4.308710456942004, "grad_norm": 5.113753318786621, "learning_rate": 2.8942602989392386e-06, "loss": 0.1363, "step": 52300 }, { "epoch": 4.310358084358524, "grad_norm": 0.7274277806282043, "learning_rate": 2.8808433838715426e-06, "loss": 0.1552, "step": 52320 }, { "epoch": 4.312005711775044, "grad_norm": 3.2971127033233643, "learning_rate": 2.8674557378762318e-06, "loss": 0.1668, "step": 52340 }, { "epoch": 4.313653339191564, "grad_norm": 0.5251350402832031, "learning_rate": 2.854097378668438e-06, "loss": 0.1654, "step": 52360 }, { "epoch": 4.315300966608085, "grad_norm": 0.7041589021682739, "learning_rate": 2.840768323924556e-06, "loss": 0.1371, "step": 52380 }, { "epoch": 4.316948594024605, "grad_norm": 1.8479079008102417, "learning_rate": 2.827468591282195e-06, "loss": 0.1344, "step": 52400 }, { "epoch": 4.318596221441124, "grad_norm": 1.1348035335540771, "learning_rate": 2.8141981983401707e-06, "loss": 0.1582, "step": 52420 }, { "epoch": 4.320243848857645, "grad_norm": 2.181511878967285, "learning_rate": 2.800957162658455e-06, "loss": 0.1538, "step": 52440 }, { "epoch": 4.321891476274165, "grad_norm": 0.7242217063903809, "learning_rate": 2.787745501758199e-06, "loss": 0.1388, "step": 52460 }, { "epoch": 4.323539103690686, "grad_norm": 0.6617832779884338, "learning_rate": 2.7745632331216687e-06, "loss": 0.1549, "step": 52480 }, { "epoch": 4.325186731107205, "grad_norm": 4.680964469909668, "learning_rate": 2.7614103741922414e-06, "loss": 0.1329, "step": 52500 }, { "epoch": 4.326834358523726, "grad_norm": 1.1766717433929443, "learning_rate": 2.7482869423743812e-06, "loss": 0.154, "step": 52520 }, { "epoch": 4.328481985940246, "grad_norm": 1.1324840784072876, "learning_rate": 2.7351929550335997e-06, "loss": 0.1414, "step": 52540 }, { "epoch": 4.330129613356767, "grad_norm": 0.7402358055114746, "learning_rate": 2.7221284294964624e-06, "loss": 0.1511, "step": 52560 }, { "epoch": 4.331777240773286, "grad_norm": 1.3569769859313965, "learning_rate": 2.7090933830505398e-06, "loss": 0.1528, "step": 52580 }, { "epoch": 4.333424868189807, "grad_norm": 1.0375301837921143, "learning_rate": 2.6960878329443996e-06, "loss": 0.1412, "step": 52600 }, { "epoch": 4.335072495606327, "grad_norm": 19.285921096801758, "learning_rate": 2.6831117963875745e-06, "loss": 0.1465, "step": 52620 }, { "epoch": 4.336720123022847, "grad_norm": 0.6132317781448364, "learning_rate": 2.6701652905505443e-06, "loss": 0.1567, "step": 52640 }, { "epoch": 4.338367750439367, "grad_norm": 1.3831067085266113, "learning_rate": 2.6572483325647157e-06, "loss": 0.1549, "step": 52660 }, { "epoch": 4.340015377855887, "grad_norm": 1.0848127603530884, "learning_rate": 2.6443609395223943e-06, "loss": 0.1412, "step": 52680 }, { "epoch": 4.341663005272408, "grad_norm": 5.991546154022217, "learning_rate": 2.6315031284767614e-06, "loss": 0.1364, "step": 52700 }, { "epoch": 4.343310632688928, "grad_norm": 1.5096750259399414, "learning_rate": 2.618674916441863e-06, "loss": 0.1554, "step": 52720 }, { "epoch": 4.344958260105448, "grad_norm": 0.5254679322242737, "learning_rate": 2.605876320392561e-06, "loss": 0.1581, "step": 52740 }, { "epoch": 4.346605887521968, "grad_norm": 0.719620943069458, "learning_rate": 2.593107357264546e-06, "loss": 0.1817, "step": 52760 }, { "epoch": 4.348253514938489, "grad_norm": 0.5592374205589294, "learning_rate": 2.580368043954287e-06, "loss": 0.1384, "step": 52780 }, { "epoch": 4.349901142355009, "grad_norm": 2.1580827236175537, "learning_rate": 2.567658397319023e-06, "loss": 0.1434, "step": 52800 }, { "epoch": 4.351548769771529, "grad_norm": 0.6046550273895264, "learning_rate": 2.5549784341767385e-06, "loss": 0.1438, "step": 52820 }, { "epoch": 4.353196397188049, "grad_norm": 0.4718208611011505, "learning_rate": 2.5423281713061325e-06, "loss": 0.1581, "step": 52840 }, { "epoch": 4.354844024604569, "grad_norm": 2.0034682750701904, "learning_rate": 2.5297076254466033e-06, "loss": 0.1514, "step": 52860 }, { "epoch": 4.35649165202109, "grad_norm": 0.5157046318054199, "learning_rate": 2.517116813298237e-06, "loss": 0.1357, "step": 52880 }, { "epoch": 4.3581392794376095, "grad_norm": 1.4846811294555664, "learning_rate": 2.504555751521767e-06, "loss": 0.1415, "step": 52900 }, { "epoch": 4.35978690685413, "grad_norm": 0.4785284399986267, "learning_rate": 2.4920244567385647e-06, "loss": 0.1551, "step": 52920 }, { "epoch": 4.36143453427065, "grad_norm": 0.6805145740509033, "learning_rate": 2.479522945530596e-06, "loss": 0.1515, "step": 52940 }, { "epoch": 4.363082161687171, "grad_norm": 0.809760332107544, "learning_rate": 2.467051234440437e-06, "loss": 0.1604, "step": 52960 }, { "epoch": 4.3647297891036905, "grad_norm": 0.30300357937812805, "learning_rate": 2.45460933997122e-06, "loss": 0.1295, "step": 52980 }, { "epoch": 4.366377416520211, "grad_norm": 0.6598816514015198, "learning_rate": 2.4421972785866247e-06, "loss": 0.1277, "step": 53000 }, { "epoch": 4.368025043936731, "grad_norm": 0.372051477432251, "learning_rate": 2.429815066710861e-06, "loss": 0.1494, "step": 53020 }, { "epoch": 4.369672671353252, "grad_norm": 0.7859922051429749, "learning_rate": 2.417462720728622e-06, "loss": 0.156, "step": 53040 }, { "epoch": 4.3713202987697715, "grad_norm": 1.0352360010147095, "learning_rate": 2.405140256985097e-06, "loss": 0.1494, "step": 53060 }, { "epoch": 4.372967926186292, "grad_norm": 1.3417617082595825, "learning_rate": 2.392847691785935e-06, "loss": 0.1431, "step": 53080 }, { "epoch": 4.374615553602812, "grad_norm": 1.5382243394851685, "learning_rate": 2.3805850413972097e-06, "loss": 0.1321, "step": 53100 }, { "epoch": 4.376263181019332, "grad_norm": 0.5792887210845947, "learning_rate": 2.3683523220454252e-06, "loss": 0.1452, "step": 53120 }, { "epoch": 4.3779108084358525, "grad_norm": 0.44876059889793396, "learning_rate": 2.356149549917458e-06, "loss": 0.1563, "step": 53140 }, { "epoch": 4.379558435852372, "grad_norm": 0.4710574150085449, "learning_rate": 2.3439767411605807e-06, "loss": 0.1494, "step": 53160 }, { "epoch": 4.381206063268893, "grad_norm": 0.17351019382476807, "learning_rate": 2.3318339118824026e-06, "loss": 0.1464, "step": 53180 }, { "epoch": 4.382853690685413, "grad_norm": 0.9383738040924072, "learning_rate": 2.319721078150869e-06, "loss": 0.138, "step": 53200 }, { "epoch": 4.384501318101933, "grad_norm": 1.0102771520614624, "learning_rate": 2.307638255994235e-06, "loss": 0.1473, "step": 53220 }, { "epoch": 4.386148945518453, "grad_norm": 0.5714807510375977, "learning_rate": 2.2955854614010288e-06, "loss": 0.1544, "step": 53240 }, { "epoch": 4.387796572934974, "grad_norm": 2.1891729831695557, "learning_rate": 2.283562710320064e-06, "loss": 0.1547, "step": 53260 }, { "epoch": 4.389444200351494, "grad_norm": 0.23909251391887665, "learning_rate": 2.271570018660385e-06, "loss": 0.1516, "step": 53280 }, { "epoch": 4.391091827768014, "grad_norm": 1.3543933629989624, "learning_rate": 2.259607402291272e-06, "loss": 0.133, "step": 53300 }, { "epoch": 4.392739455184534, "grad_norm": 0.5052430033683777, "learning_rate": 2.2476748770421995e-06, "loss": 0.1664, "step": 53320 }, { "epoch": 4.394387082601055, "grad_norm": 0.45945048332214355, "learning_rate": 2.2357724587028257e-06, "loss": 0.1565, "step": 53340 }, { "epoch": 4.396034710017575, "grad_norm": 1.039807915687561, "learning_rate": 2.223900163022971e-06, "loss": 0.1485, "step": 53360 }, { "epoch": 4.3976823374340945, "grad_norm": 0.20642758905887604, "learning_rate": 2.2120580057125974e-06, "loss": 0.1572, "step": 53380 }, { "epoch": 4.399329964850615, "grad_norm": 1.0194061994552612, "learning_rate": 2.2002460024417916e-06, "loss": 0.1298, "step": 53400 }, { "epoch": 4.400977592267135, "grad_norm": 0.8997033834457397, "learning_rate": 2.1884641688407174e-06, "loss": 0.1623, "step": 53420 }, { "epoch": 4.402625219683656, "grad_norm": 0.3872506022453308, "learning_rate": 2.1767125204996435e-06, "loss": 0.1462, "step": 53440 }, { "epoch": 4.4042728471001755, "grad_norm": 0.39947617053985596, "learning_rate": 2.1649910729688806e-06, "loss": 0.1515, "step": 53460 }, { "epoch": 4.405920474516696, "grad_norm": 0.22790731489658356, "learning_rate": 2.1532998417587822e-06, "loss": 0.1515, "step": 53480 }, { "epoch": 4.407568101933216, "grad_norm": 1.5337435007095337, "learning_rate": 2.1416388423397187e-06, "loss": 0.1358, "step": 53500 }, { "epoch": 4.409215729349737, "grad_norm": 0.3033355474472046, "learning_rate": 2.1300080901420464e-06, "loss": 0.1535, "step": 53520 }, { "epoch": 4.4108633567662565, "grad_norm": 0.7599854469299316, "learning_rate": 2.1184076005561105e-06, "loss": 0.1562, "step": 53540 }, { "epoch": 4.412510984182777, "grad_norm": 0.6213507056236267, "learning_rate": 2.106837388932201e-06, "loss": 0.1614, "step": 53560 }, { "epoch": 4.414158611599297, "grad_norm": 0.1423792690038681, "learning_rate": 2.0952974705805513e-06, "loss": 0.1465, "step": 53580 }, { "epoch": 4.415806239015817, "grad_norm": 0.5973201394081116, "learning_rate": 2.0837878607713106e-06, "loss": 0.1422, "step": 53600 }, { "epoch": 4.4174538664323375, "grad_norm": 0.5904609560966492, "learning_rate": 2.072308574734505e-06, "loss": 0.1399, "step": 53620 }, { "epoch": 4.419101493848857, "grad_norm": 0.38316109776496887, "learning_rate": 2.0608596276600528e-06, "loss": 0.1519, "step": 53640 }, { "epoch": 4.420749121265378, "grad_norm": 0.5448755025863647, "learning_rate": 2.0494410346977216e-06, "loss": 0.1322, "step": 53660 }, { "epoch": 4.422396748681898, "grad_norm": 0.5622198581695557, "learning_rate": 2.0380528109571114e-06, "loss": 0.1409, "step": 53680 }, { "epoch": 4.4240443760984185, "grad_norm": 2.2935588359832764, "learning_rate": 2.0266949715076396e-06, "loss": 0.14, "step": 53700 }, { "epoch": 4.425692003514938, "grad_norm": 2.701993465423584, "learning_rate": 2.0153675313785092e-06, "loss": 0.1556, "step": 53720 }, { "epoch": 4.427339630931459, "grad_norm": 1.0471820831298828, "learning_rate": 2.0040705055587085e-06, "loss": 0.1393, "step": 53740 }, { "epoch": 4.428987258347979, "grad_norm": 0.8080337643623352, "learning_rate": 1.992803908996971e-06, "loss": 0.1572, "step": 53760 }, { "epoch": 4.4306348857644995, "grad_norm": 0.21969157457351685, "learning_rate": 1.9815677566017684e-06, "loss": 0.1361, "step": 53780 }, { "epoch": 4.432282513181019, "grad_norm": 1.0187530517578125, "learning_rate": 1.9703620632412963e-06, "loss": 0.1368, "step": 53800 }, { "epoch": 4.433930140597539, "grad_norm": 0.9453950524330139, "learning_rate": 1.959186843743427e-06, "loss": 0.1523, "step": 53820 }, { "epoch": 4.43557776801406, "grad_norm": 0.7072224617004395, "learning_rate": 1.948042112895723e-06, "loss": 0.1377, "step": 53840 }, { "epoch": 4.43722539543058, "grad_norm": 0.8716121912002563, "learning_rate": 1.936927885445397e-06, "loss": 0.1468, "step": 53860 }, { "epoch": 4.4388730228471, "grad_norm": 0.35938969254493713, "learning_rate": 1.925844176099298e-06, "loss": 0.1394, "step": 53880 }, { "epoch": 4.44052065026362, "grad_norm": 1.345229148864746, "learning_rate": 1.9147909995238966e-06, "loss": 0.1276, "step": 53900 }, { "epoch": 4.442168277680141, "grad_norm": 0.5796838402748108, "learning_rate": 1.9037683703452548e-06, "loss": 0.1493, "step": 53920 }, { "epoch": 4.443815905096661, "grad_norm": 0.5931035280227661, "learning_rate": 1.8927763031490215e-06, "loss": 0.1497, "step": 53940 }, { "epoch": 4.445463532513181, "grad_norm": 0.5077899694442749, "learning_rate": 1.8818148124803925e-06, "loss": 0.1431, "step": 53960 }, { "epoch": 4.447111159929701, "grad_norm": 0.23701219260692596, "learning_rate": 1.870883912844118e-06, "loss": 0.1478, "step": 53980 }, { "epoch": 4.448758787346222, "grad_norm": 0.7609965801239014, "learning_rate": 1.85998361870445e-06, "loss": 0.137, "step": 54000 }, { "epoch": 4.448758787346222, "eval_loss": 0.5092979669570923, "eval_runtime": 246.1568, "eval_samples_per_second": 88.517, "eval_steps_per_second": 22.132, "eval_wer": 0.21872891723097337, "step": 54000 }, { "epoch": 4.450406414762742, "grad_norm": 1.7415201663970947, "learning_rate": 1.849113944485159e-06, "loss": 0.1504, "step": 54020 }, { "epoch": 4.452054042179262, "grad_norm": 2.653851270675659, "learning_rate": 1.8382749045694925e-06, "loss": 0.1554, "step": 54040 }, { "epoch": 4.453701669595782, "grad_norm": 0.4178394079208374, "learning_rate": 1.8274665133001578e-06, "loss": 0.1589, "step": 54060 }, { "epoch": 4.455349297012303, "grad_norm": 0.42581671476364136, "learning_rate": 1.816688784979309e-06, "loss": 0.1263, "step": 54080 }, { "epoch": 4.456996924428823, "grad_norm": 1.5703861713409424, "learning_rate": 1.8059417338685298e-06, "loss": 0.1421, "step": 54100 }, { "epoch": 4.458644551845342, "grad_norm": 2.1000301837921143, "learning_rate": 1.7952253741887997e-06, "loss": 0.1635, "step": 54120 }, { "epoch": 4.460292179261863, "grad_norm": 0.5711442828178406, "learning_rate": 1.7845397201204956e-06, "loss": 0.137, "step": 54140 }, { "epoch": 4.461939806678383, "grad_norm": 0.46140486001968384, "learning_rate": 1.7738847858033591e-06, "loss": 0.158, "step": 54160 }, { "epoch": 4.4635874340949035, "grad_norm": 0.41269364953041077, "learning_rate": 1.7632605853364847e-06, "loss": 0.1413, "step": 54180 }, { "epoch": 4.465235061511423, "grad_norm": 0.8353527188301086, "learning_rate": 1.752667132778299e-06, "loss": 0.129, "step": 54200 }, { "epoch": 4.466882688927944, "grad_norm": 1.356337547302246, "learning_rate": 1.7421044421465331e-06, "loss": 0.1462, "step": 54220 }, { "epoch": 4.468530316344464, "grad_norm": 1.3821256160736084, "learning_rate": 1.7315725274182238e-06, "loss": 0.1606, "step": 54240 }, { "epoch": 4.4701779437609845, "grad_norm": 4.19685173034668, "learning_rate": 1.7210714025296753e-06, "loss": 0.1476, "step": 54260 }, { "epoch": 4.471825571177504, "grad_norm": 0.4461887776851654, "learning_rate": 1.7106010813764562e-06, "loss": 0.1377, "step": 54280 }, { "epoch": 4.473473198594025, "grad_norm": 1.226690649986267, "learning_rate": 1.7001615778133722e-06, "loss": 0.1353, "step": 54300 }, { "epoch": 4.475120826010545, "grad_norm": 1.3363343477249146, "learning_rate": 1.6897529056544431e-06, "loss": 0.1404, "step": 54320 }, { "epoch": 4.476768453427065, "grad_norm": 0.9175166487693787, "learning_rate": 1.6793750786729012e-06, "loss": 0.1511, "step": 54340 }, { "epoch": 4.478416080843585, "grad_norm": 0.9659866690635681, "learning_rate": 1.6690281106011534e-06, "loss": 0.1586, "step": 54360 }, { "epoch": 4.480063708260105, "grad_norm": 0.4666774272918701, "learning_rate": 1.6587120151307812e-06, "loss": 0.1476, "step": 54380 }, { "epoch": 4.481711335676626, "grad_norm": 1.403324842453003, "learning_rate": 1.6484268059125146e-06, "loss": 0.1362, "step": 54400 }, { "epoch": 4.483358963093146, "grad_norm": 2.8815035820007324, "learning_rate": 1.6381724965562006e-06, "loss": 0.1462, "step": 54420 }, { "epoch": 4.485006590509666, "grad_norm": 2.308983564376831, "learning_rate": 1.6279491006308129e-06, "loss": 0.153, "step": 54440 }, { "epoch": 4.486654217926186, "grad_norm": 0.7412064075469971, "learning_rate": 1.6177566316644104e-06, "loss": 0.1559, "step": 54460 }, { "epoch": 4.488301845342707, "grad_norm": 0.550797700881958, "learning_rate": 1.6075951031441356e-06, "loss": 0.1386, "step": 54480 }, { "epoch": 4.489949472759227, "grad_norm": 0.7735878229141235, "learning_rate": 1.5974645285161794e-06, "loss": 0.1366, "step": 54500 }, { "epoch": 4.491597100175747, "grad_norm": 1.8373870849609375, "learning_rate": 1.5873649211857771e-06, "loss": 0.156, "step": 54520 }, { "epoch": 4.493244727592267, "grad_norm": 3.1302435398101807, "learning_rate": 1.5772962945171888e-06, "loss": 0.1514, "step": 54540 }, { "epoch": 4.494892355008787, "grad_norm": 0.7400826811790466, "learning_rate": 1.5672586618336842e-06, "loss": 0.1376, "step": 54560 }, { "epoch": 4.496539982425308, "grad_norm": 0.49967142939567566, "learning_rate": 1.557252036417506e-06, "loss": 0.1443, "step": 54580 }, { "epoch": 4.498187609841827, "grad_norm": 5.618014812469482, "learning_rate": 1.5477744748142453e-06, "loss": 0.1364, "step": 54600 }, { "epoch": 4.499835237258348, "grad_norm": 0.8003312349319458, "learning_rate": 1.5378283516170378e-06, "loss": 0.1452, "step": 54620 }, { "epoch": 4.501482864674868, "grad_norm": 1.0814329385757446, "learning_rate": 1.527913274630688e-06, "loss": 0.157, "step": 54640 }, { "epoch": 4.503130492091389, "grad_norm": 0.44943967461586, "learning_rate": 1.5180292569752675e-06, "loss": 0.1492, "step": 54660 }, { "epoch": 4.504778119507908, "grad_norm": 0.3442207872867584, "learning_rate": 1.5081763117297642e-06, "loss": 0.1382, "step": 54680 }, { "epoch": 4.506425746924429, "grad_norm": 6.923589706420898, "learning_rate": 1.498354451932038e-06, "loss": 0.1315, "step": 54700 }, { "epoch": 4.508073374340949, "grad_norm": 1.0040614604949951, "learning_rate": 1.488563690578823e-06, "loss": 0.1516, "step": 54720 }, { "epoch": 4.50972100175747, "grad_norm": 2.2533833980560303, "learning_rate": 1.4788040406257002e-06, "loss": 0.1568, "step": 54740 }, { "epoch": 4.511368629173989, "grad_norm": 1.032484531402588, "learning_rate": 1.4690755149870756e-06, "loss": 0.1546, "step": 54760 }, { "epoch": 4.513016256590509, "grad_norm": 0.3278011381626129, "learning_rate": 1.4593781265361844e-06, "loss": 0.145, "step": 54780 }, { "epoch": 4.51466388400703, "grad_norm": 5.8222432136535645, "learning_rate": 1.4497118881050458e-06, "loss": 0.1335, "step": 54800 }, { "epoch": 4.516311511423551, "grad_norm": 0.5845094919204712, "learning_rate": 1.4400768124844615e-06, "loss": 0.1479, "step": 54820 }, { "epoch": 4.51795913884007, "grad_norm": 2.58100962638855, "learning_rate": 1.4304729124240106e-06, "loss": 0.1478, "step": 54840 }, { "epoch": 4.51960676625659, "grad_norm": 3.2109968662261963, "learning_rate": 1.4209002006320027e-06, "loss": 0.139, "step": 54860 }, { "epoch": 4.521254393673111, "grad_norm": 0.7873286008834839, "learning_rate": 1.411358689775491e-06, "loss": 0.1599, "step": 54880 }, { "epoch": 4.522902021089631, "grad_norm": 2.308744430541992, "learning_rate": 1.4018483924802262e-06, "loss": 0.1486, "step": 54900 }, { "epoch": 4.524549648506151, "grad_norm": 0.5533203482627869, "learning_rate": 1.3923693213306721e-06, "loss": 0.1473, "step": 54920 }, { "epoch": 4.526197275922671, "grad_norm": 1.296086072921753, "learning_rate": 1.3829214888699676e-06, "loss": 0.1547, "step": 54940 }, { "epoch": 4.527844903339192, "grad_norm": 0.7685306668281555, "learning_rate": 1.3735049075999096e-06, "loss": 0.1499, "step": 54960 }, { "epoch": 4.529492530755712, "grad_norm": 0.4017468988895416, "learning_rate": 1.36411958998095e-06, "loss": 0.1526, "step": 54980 }, { "epoch": 4.531140158172232, "grad_norm": 3.8548662662506104, "learning_rate": 1.354765548432163e-06, "loss": 0.1405, "step": 55000 }, { "epoch": 4.532787785588752, "grad_norm": 1.4292021989822388, "learning_rate": 1.3454427953312422e-06, "loss": 0.1591, "step": 55020 }, { "epoch": 4.534435413005273, "grad_norm": 1.4278817176818848, "learning_rate": 1.3361513430144773e-06, "loss": 0.1544, "step": 55040 }, { "epoch": 4.536083040421793, "grad_norm": 1.1543562412261963, "learning_rate": 1.3268912037767423e-06, "loss": 0.1606, "step": 55060 }, { "epoch": 4.5377306678383125, "grad_norm": 0.20305891335010529, "learning_rate": 1.3176623898714768e-06, "loss": 0.1583, "step": 55080 }, { "epoch": 4.539378295254833, "grad_norm": 0.9102775454521179, "learning_rate": 1.3084649135106568e-06, "loss": 0.1374, "step": 55100 }, { "epoch": 4.541025922671353, "grad_norm": 0.8400938510894775, "learning_rate": 1.2992987868648082e-06, "loss": 0.1478, "step": 55120 }, { "epoch": 4.542673550087874, "grad_norm": 0.8100221157073975, "learning_rate": 1.2901640220629647e-06, "loss": 0.1612, "step": 55140 }, { "epoch": 4.5443211775043935, "grad_norm": 0.6619641780853271, "learning_rate": 1.2810606311926571e-06, "loss": 0.1551, "step": 55160 }, { "epoch": 4.545968804920914, "grad_norm": 0.7199742197990417, "learning_rate": 1.2719886262999165e-06, "loss": 0.1589, "step": 55180 }, { "epoch": 4.547616432337434, "grad_norm": 1.7203363180160522, "learning_rate": 1.2629480193892208e-06, "loss": 0.1405, "step": 55200 }, { "epoch": 4.549264059753955, "grad_norm": 0.8535423278808594, "learning_rate": 1.2539388224235143e-06, "loss": 0.1643, "step": 55220 }, { "epoch": 4.5509116871704745, "grad_norm": 0.8002120852470398, "learning_rate": 1.244961047324178e-06, "loss": 0.1685, "step": 55240 }, { "epoch": 4.552559314586995, "grad_norm": 1.135498046875, "learning_rate": 1.2360147059710087e-06, "loss": 0.1457, "step": 55260 }, { "epoch": 4.554206942003515, "grad_norm": 0.5072938203811646, "learning_rate": 1.2270998102022147e-06, "loss": 0.1465, "step": 55280 }, { "epoch": 4.555854569420035, "grad_norm": 1.3565483093261719, "learning_rate": 1.2182163718143875e-06, "loss": 0.1405, "step": 55300 }, { "epoch": 4.557502196836555, "grad_norm": 2.158135175704956, "learning_rate": 1.209364402562499e-06, "loss": 0.1585, "step": 55320 }, { "epoch": 4.559149824253075, "grad_norm": 0.9993425011634827, "learning_rate": 1.2005439141598769e-06, "loss": 0.1668, "step": 55340 }, { "epoch": 4.560797451669596, "grad_norm": 0.4801332950592041, "learning_rate": 1.1917549182781928e-06, "loss": 0.1454, "step": 55360 }, { "epoch": 4.562445079086116, "grad_norm": 0.24088925123214722, "learning_rate": 1.1829974265474386e-06, "loss": 0.1339, "step": 55380 }, { "epoch": 4.564092706502636, "grad_norm": 3.2226004600524902, "learning_rate": 1.1742714505559305e-06, "loss": 0.1346, "step": 55400 }, { "epoch": 4.565740333919156, "grad_norm": 0.8390809893608093, "learning_rate": 1.1655770018502765e-06, "loss": 0.1387, "step": 55420 }, { "epoch": 4.567387961335677, "grad_norm": 1.1488370895385742, "learning_rate": 1.1569140919353627e-06, "loss": 0.1407, "step": 55440 }, { "epoch": 4.569035588752197, "grad_norm": 1.1055350303649902, "learning_rate": 1.14828273227435e-06, "loss": 0.1404, "step": 55460 }, { "epoch": 4.570683216168717, "grad_norm": 0.338011234998703, "learning_rate": 1.139682934288641e-06, "loss": 0.1394, "step": 55480 }, { "epoch": 4.572330843585237, "grad_norm": 0.8999879360198975, "learning_rate": 1.1311147093578777e-06, "loss": 0.132, "step": 55500 }, { "epoch": 4.573978471001757, "grad_norm": 0.5754852294921875, "learning_rate": 1.1225780688199295e-06, "loss": 0.1599, "step": 55520 }, { "epoch": 4.575626098418278, "grad_norm": 1.3341476917266846, "learning_rate": 1.1140730239708602e-06, "loss": 0.1483, "step": 55540 }, { "epoch": 4.5772737258347975, "grad_norm": 0.4963632822036743, "learning_rate": 1.10559958606494e-06, "loss": 0.1562, "step": 55560 }, { "epoch": 4.578921353251318, "grad_norm": 0.5869734287261963, "learning_rate": 1.0971577663145994e-06, "loss": 0.1506, "step": 55580 }, { "epoch": 4.580568980667838, "grad_norm": 3.3073182106018066, "learning_rate": 1.088747575890439e-06, "loss": 0.1409, "step": 55600 }, { "epoch": 4.582216608084359, "grad_norm": 0.8252844214439392, "learning_rate": 1.0803690259212075e-06, "loss": 0.1444, "step": 55620 }, { "epoch": 4.5838642355008785, "grad_norm": 0.9522702693939209, "learning_rate": 1.0724387205203552e-06, "loss": 0.1666, "step": 55640 }, { "epoch": 4.585511862917399, "grad_norm": 0.9360484480857849, "learning_rate": 1.0641219012887172e-06, "loss": 0.1554, "step": 55660 }, { "epoch": 4.587159490333919, "grad_norm": 0.6430097818374634, "learning_rate": 1.0558367550978138e-06, "loss": 0.1639, "step": 55680 }, { "epoch": 4.58880711775044, "grad_norm": 0.9593109488487244, "learning_rate": 1.0475832929109309e-06, "loss": 0.138, "step": 55700 }, { "epoch": 4.5904547451669595, "grad_norm": 1.2917250394821167, "learning_rate": 1.0393615256494238e-06, "loss": 0.1518, "step": 55720 }, { "epoch": 4.592102372583479, "grad_norm": 0.8590771555900574, "learning_rate": 1.0311714641927068e-06, "loss": 0.1612, "step": 55740 }, { "epoch": 4.59375, "grad_norm": 0.8756323456764221, "learning_rate": 1.0230131193782466e-06, "loss": 0.1547, "step": 55760 }, { "epoch": 4.595397627416521, "grad_norm": 0.30478066205978394, "learning_rate": 1.0148865020015274e-06, "loss": 0.1564, "step": 55780 }, { "epoch": 4.5970452548330405, "grad_norm": 0.6073692440986633, "learning_rate": 1.0067916228160611e-06, "loss": 0.1369, "step": 55800 }, { "epoch": 4.59869288224956, "grad_norm": 1.2964072227478027, "learning_rate": 9.9872849253336e-07, "loss": 0.1684, "step": 55820 }, { "epoch": 4.600340509666081, "grad_norm": 1.1033300161361694, "learning_rate": 9.906971218229255e-07, "loss": 0.1581, "step": 55840 }, { "epoch": 4.601988137082601, "grad_norm": 1.938038945198059, "learning_rate": 9.82697521312234e-07, "loss": 0.1541, "step": 55860 }, { "epoch": 4.6036357644991215, "grad_norm": 0.4966880977153778, "learning_rate": 9.747297015867129e-07, "loss": 0.1479, "step": 55880 }, { "epoch": 4.605283391915641, "grad_norm": 3.8020315170288086, "learning_rate": 9.667936731897475e-07, "loss": 0.1463, "step": 55900 }, { "epoch": 4.606931019332162, "grad_norm": 1.588336706161499, "learning_rate": 9.588894466226517e-07, "loss": 0.1541, "step": 55920 }, { "epoch": 4.608578646748682, "grad_norm": 0.9965344667434692, "learning_rate": 9.51017032344656e-07, "loss": 0.1534, "step": 55940 }, { "epoch": 4.6102262741652025, "grad_norm": 1.3559978008270264, "learning_rate": 9.431764407728944e-07, "loss": 0.1537, "step": 55960 }, { "epoch": 4.611873901581722, "grad_norm": 0.4042039215564728, "learning_rate": 9.353676822823981e-07, "loss": 0.1537, "step": 55980 }, { "epoch": 4.613521528998243, "grad_norm": 2.2283642292022705, "learning_rate": 9.275907672060685e-07, "loss": 0.1421, "step": 56000 }, { "epoch": 4.613521528998243, "eval_loss": 0.5239511132240295, "eval_runtime": 254.4993, "eval_samples_per_second": 85.615, "eval_steps_per_second": 21.407, "eval_wer": 0.2226704327548898, "step": 56000 }, { "epoch": 4.615169156414763, "grad_norm": 1.169054388999939, "learning_rate": 9.198457058346737e-07, "loss": 0.1581, "step": 56020 }, { "epoch": 4.616816783831283, "grad_norm": 0.80378258228302, "learning_rate": 9.121325084168269e-07, "loss": 0.1543, "step": 56040 }, { "epoch": 4.618464411247803, "grad_norm": 0.8089606761932373, "learning_rate": 9.044511851589831e-07, "loss": 0.1548, "step": 56060 }, { "epoch": 4.620112038664323, "grad_norm": 0.2052605152130127, "learning_rate": 8.968017462254203e-07, "loss": 0.1576, "step": 56080 }, { "epoch": 4.621759666080844, "grad_norm": 0.8120124936103821, "learning_rate": 8.891842017382218e-07, "loss": 0.1324, "step": 56100 }, { "epoch": 4.623407293497364, "grad_norm": 0.8884233236312866, "learning_rate": 8.815985617772693e-07, "loss": 0.1508, "step": 56120 }, { "epoch": 4.625054920913884, "grad_norm": 1.3301658630371094, "learning_rate": 8.740448363802251e-07, "loss": 0.1579, "step": 56140 }, { "epoch": 4.626702548330404, "grad_norm": 0.5924556255340576, "learning_rate": 8.665230355425213e-07, "loss": 0.1384, "step": 56160 }, { "epoch": 4.628350175746925, "grad_norm": 0.46048134565353394, "learning_rate": 8.590331692173464e-07, "loss": 0.1417, "step": 56180 }, { "epoch": 4.629997803163445, "grad_norm": 2.2289745807647705, "learning_rate": 8.515752473156308e-07, "loss": 0.1276, "step": 56200 }, { "epoch": 4.631645430579965, "grad_norm": 1.227634072303772, "learning_rate": 8.441492797060385e-07, "loss": 0.1523, "step": 56220 }, { "epoch": 4.633293057996485, "grad_norm": 2.3526508808135986, "learning_rate": 8.367552762149456e-07, "loss": 0.1484, "step": 56240 }, { "epoch": 4.634940685413005, "grad_norm": 23.358123779296875, "learning_rate": 8.29393246626431e-07, "loss": 0.1405, "step": 56260 }, { "epoch": 4.6365883128295255, "grad_norm": 0.5459713339805603, "learning_rate": 8.220632006822687e-07, "loss": 0.1539, "step": 56280 }, { "epoch": 4.638235940246045, "grad_norm": 1.6459999084472656, "learning_rate": 8.147651480819058e-07, "loss": 0.1277, "step": 56300 }, { "epoch": 4.639883567662566, "grad_norm": 0.5306848883628845, "learning_rate": 8.074990984824615e-07, "loss": 0.1473, "step": 56320 }, { "epoch": 4.641531195079086, "grad_norm": 0.5553666353225708, "learning_rate": 8.002650614987006e-07, "loss": 0.1587, "step": 56340 }, { "epoch": 4.6431788224956065, "grad_norm": 0.5253654718399048, "learning_rate": 7.93063046703027e-07, "loss": 0.1462, "step": 56360 }, { "epoch": 4.644826449912126, "grad_norm": 0.26898902654647827, "learning_rate": 7.858930636254758e-07, "loss": 0.1454, "step": 56380 }, { "epoch": 4.646474077328647, "grad_norm": 1.6993054151535034, "learning_rate": 7.787551217536909e-07, "loss": 0.1411, "step": 56400 }, { "epoch": 4.648121704745167, "grad_norm": 0.7432767152786255, "learning_rate": 7.716492305329282e-07, "loss": 0.1538, "step": 56420 }, { "epoch": 4.6497693321616875, "grad_norm": 0.6850394606590271, "learning_rate": 7.645753993660215e-07, "loss": 0.15, "step": 56440 }, { "epoch": 4.651416959578207, "grad_norm": 0.49582144618034363, "learning_rate": 7.575336376133835e-07, "loss": 0.1446, "step": 56460 }, { "epoch": 4.653064586994727, "grad_norm": 0.18560680747032166, "learning_rate": 7.505239545929937e-07, "loss": 0.1485, "step": 56480 }, { "epoch": 4.654712214411248, "grad_norm": 1.527174949645996, "learning_rate": 7.435463595803826e-07, "loss": 0.1375, "step": 56500 }, { "epoch": 4.656359841827768, "grad_norm": 0.49086669087409973, "learning_rate": 7.366008618086256e-07, "loss": 0.1578, "step": 56520 }, { "epoch": 4.658007469244288, "grad_norm": 1.9830960035324097, "learning_rate": 7.296874704683182e-07, "loss": 0.166, "step": 56540 }, { "epoch": 4.659655096660808, "grad_norm": 6.122490882873535, "learning_rate": 7.228061947075704e-07, "loss": 0.1495, "step": 56560 }, { "epoch": 4.661302724077329, "grad_norm": 0.8401085138320923, "learning_rate": 7.159570436320012e-07, "loss": 0.1421, "step": 56580 }, { "epoch": 4.662950351493849, "grad_norm": 0.8349100351333618, "learning_rate": 7.091400263047193e-07, "loss": 0.1477, "step": 56600 }, { "epoch": 4.664597978910369, "grad_norm": 1.9410412311553955, "learning_rate": 7.02355151746309e-07, "loss": 0.1436, "step": 56620 }, { "epoch": 4.666245606326889, "grad_norm": 1.6588361263275146, "learning_rate": 6.956024289348273e-07, "loss": 0.1547, "step": 56640 }, { "epoch": 4.66789323374341, "grad_norm": 0.8127797842025757, "learning_rate": 6.888818668057823e-07, "loss": 0.1528, "step": 56660 }, { "epoch": 4.66954086115993, "grad_norm": 0.7297115921974182, "learning_rate": 6.821934742521269e-07, "loss": 0.1384, "step": 56680 }, { "epoch": 4.671188488576449, "grad_norm": 1.4666099548339844, "learning_rate": 6.755372601242454e-07, "loss": 0.134, "step": 56700 }, { "epoch": 4.67283611599297, "grad_norm": 0.7805234789848328, "learning_rate": 6.689132332299424e-07, "loss": 0.1504, "step": 56720 }, { "epoch": 4.674483743409491, "grad_norm": 0.4731627404689789, "learning_rate": 6.623214023344371e-07, "loss": 0.1478, "step": 56740 }, { "epoch": 4.676131370826011, "grad_norm": 0.5945594310760498, "learning_rate": 6.557617761603297e-07, "loss": 0.154, "step": 56760 }, { "epoch": 4.67777899824253, "grad_norm": 0.3850064277648926, "learning_rate": 6.492343633876186e-07, "loss": 0.1424, "step": 56780 }, { "epoch": 4.679426625659051, "grad_norm": 3.253350257873535, "learning_rate": 6.427391726536752e-07, "loss": 0.1455, "step": 56800 }, { "epoch": 4.681074253075571, "grad_norm": 1.4225090742111206, "learning_rate": 6.362762125532301e-07, "loss": 0.1581, "step": 56820 }, { "epoch": 4.682721880492092, "grad_norm": 1.6421289443969727, "learning_rate": 6.298454916383645e-07, "loss": 0.1564, "step": 56840 }, { "epoch": 4.684369507908611, "grad_norm": 1.7642822265625, "learning_rate": 6.234470184185021e-07, "loss": 0.1526, "step": 56860 }, { "epoch": 4.686017135325132, "grad_norm": 0.3099801540374756, "learning_rate": 6.170808013603896e-07, "loss": 0.1297, "step": 56880 }, { "epoch": 4.687664762741652, "grad_norm": 1.1055113077163696, "learning_rate": 6.107468488880968e-07, "loss": 0.1287, "step": 56900 }, { "epoch": 4.689312390158173, "grad_norm": 3.6715750694274902, "learning_rate": 6.04445169383e-07, "loss": 0.1518, "step": 56920 }, { "epoch": 4.690960017574692, "grad_norm": 0.6779770255088806, "learning_rate": 5.981757711837649e-07, "loss": 0.1545, "step": 56940 }, { "epoch": 4.692607644991213, "grad_norm": 1.036171317100525, "learning_rate": 5.919386625863416e-07, "loss": 0.1459, "step": 56960 }, { "epoch": 4.694255272407733, "grad_norm": 0.6482994556427002, "learning_rate": 5.857338518439559e-07, "loss": 0.1441, "step": 56980 }, { "epoch": 4.695902899824253, "grad_norm": 3.9100735187530518, "learning_rate": 5.795613471670985e-07, "loss": 0.1435, "step": 57000 }, { "epoch": 4.697550527240773, "grad_norm": 1.5144389867782593, "learning_rate": 5.734211567235082e-07, "loss": 0.1448, "step": 57020 }, { "epoch": 4.699198154657293, "grad_norm": 0.831039309501648, "learning_rate": 5.673132886381604e-07, "loss": 0.1603, "step": 57040 }, { "epoch": 4.700845782073814, "grad_norm": 0.7148709893226624, "learning_rate": 5.612377509932654e-07, "loss": 0.1447, "step": 57060 }, { "epoch": 4.702493409490334, "grad_norm": 0.6357032656669617, "learning_rate": 5.551945518282503e-07, "loss": 0.1363, "step": 57080 }, { "epoch": 4.704141036906854, "grad_norm": 1.4283089637756348, "learning_rate": 5.494834734223936e-07, "loss": 0.1419, "step": 57100 }, { "epoch": 4.705788664323374, "grad_norm": 1.3370014429092407, "learning_rate": 5.435033572544823e-07, "loss": 0.1599, "step": 57120 }, { "epoch": 4.707436291739895, "grad_norm": 1.0289931297302246, "learning_rate": 5.375556030334045e-07, "loss": 0.1517, "step": 57140 }, { "epoch": 4.709083919156415, "grad_norm": 1.4723304510116577, "learning_rate": 5.316402186295005e-07, "loss": 0.1436, "step": 57160 }, { "epoch": 4.710731546572935, "grad_norm": 0.32518282532691956, "learning_rate": 5.25757211870273e-07, "loss": 0.1579, "step": 57180 }, { "epoch": 4.712379173989455, "grad_norm": 1.5030494928359985, "learning_rate": 5.199065905403838e-07, "loss": 0.1421, "step": 57200 }, { "epoch": 4.714026801405975, "grad_norm": 2.238295793533325, "learning_rate": 5.140883623816484e-07, "loss": 0.1573, "step": 57220 }, { "epoch": 4.715674428822496, "grad_norm": 3.121870517730713, "learning_rate": 5.083025350929998e-07, "loss": 0.1395, "step": 57240 }, { "epoch": 4.7173220562390155, "grad_norm": 1.087999939918518, "learning_rate": 5.025491163305112e-07, "loss": 0.1525, "step": 57260 }, { "epoch": 4.718969683655536, "grad_norm": 0.649878978729248, "learning_rate": 4.968281137073705e-07, "loss": 0.1452, "step": 57280 }, { "epoch": 4.720617311072056, "grad_norm": 1.0645869970321655, "learning_rate": 4.911395347938663e-07, "loss": 0.1407, "step": 57300 }, { "epoch": 4.722264938488577, "grad_norm": 0.8524389863014221, "learning_rate": 4.85483387117383e-07, "loss": 0.15, "step": 57320 }, { "epoch": 4.7239125659050965, "grad_norm": 1.1131137609481812, "learning_rate": 4.798596781623921e-07, "loss": 0.1631, "step": 57340 }, { "epoch": 4.725560193321617, "grad_norm": 1.0309462547302246, "learning_rate": 4.7426841537044355e-07, "loss": 0.1454, "step": 57360 }, { "epoch": 4.727207820738137, "grad_norm": 0.3223254680633545, "learning_rate": 4.6870960614014967e-07, "loss": 0.1425, "step": 57380 }, { "epoch": 4.728855448154658, "grad_norm": 1.2456896305084229, "learning_rate": 4.6318325782717665e-07, "loss": 0.1378, "step": 57400 }, { "epoch": 4.730503075571177, "grad_norm": 0.5149497985839844, "learning_rate": 4.5768937774424146e-07, "loss": 0.1413, "step": 57420 }, { "epoch": 4.732150702987697, "grad_norm": 1.3381003141403198, "learning_rate": 4.522279731610929e-07, "loss": 0.1425, "step": 57440 }, { "epoch": 4.733798330404218, "grad_norm": 1.9310591220855713, "learning_rate": 4.4679905130451396e-07, "loss": 0.1375, "step": 57460 }, { "epoch": 4.735445957820739, "grad_norm": 0.38854601979255676, "learning_rate": 4.414026193582971e-07, "loss": 0.126, "step": 57480 }, { "epoch": 4.737093585237258, "grad_norm": 1.4728537797927856, "learning_rate": 4.360386844632469e-07, "loss": 0.1401, "step": 57500 }, { "epoch": 4.738741212653778, "grad_norm": 3.4486091136932373, "learning_rate": 4.3070725371716915e-07, "loss": 0.1573, "step": 57520 }, { "epoch": 4.740388840070299, "grad_norm": 0.6438698172569275, "learning_rate": 4.254083341748483e-07, "loss": 0.159, "step": 57540 }, { "epoch": 4.742036467486819, "grad_norm": 0.7948741912841797, "learning_rate": 4.201419328480588e-07, "loss": 0.1601, "step": 57560 }, { "epoch": 4.743684094903339, "grad_norm": 2.405937671661377, "learning_rate": 4.14908056705543e-07, "loss": 0.1453, "step": 57580 }, { "epoch": 4.745331722319859, "grad_norm": 1.1048628091812134, "learning_rate": 4.0970671267300253e-07, "loss": 0.1473, "step": 57600 }, { "epoch": 4.74697934973638, "grad_norm": 2.0241329669952393, "learning_rate": 4.0453790763309294e-07, "loss": 0.1508, "step": 57620 }, { "epoch": 4.7486269771529, "grad_norm": 3.0674831867218018, "learning_rate": 3.9940164842540984e-07, "loss": 0.1476, "step": 57640 }, { "epoch": 4.75027460456942, "grad_norm": 1.0839959383010864, "learning_rate": 3.9429794184648595e-07, "loss": 0.1563, "step": 57660 }, { "epoch": 4.75192223198594, "grad_norm": 0.3047626316547394, "learning_rate": 3.892267946497746e-07, "loss": 0.149, "step": 57680 }, { "epoch": 4.753569859402461, "grad_norm": 2.2087759971618652, "learning_rate": 3.841882135456526e-07, "loss": 0.1396, "step": 57700 }, { "epoch": 4.755217486818981, "grad_norm": 2.0891573429107666, "learning_rate": 3.7918220520140044e-07, "loss": 0.1563, "step": 57720 }, { "epoch": 4.7568651142355005, "grad_norm": 0.7756680250167847, "learning_rate": 3.742087762411889e-07, "loss": 0.1493, "step": 57740 }, { "epoch": 4.758512741652021, "grad_norm": 0.7801302075386047, "learning_rate": 3.6926793324608964e-07, "loss": 0.1539, "step": 57760 }, { "epoch": 4.760160369068541, "grad_norm": 0.35034140944480896, "learning_rate": 3.6435968275404797e-07, "loss": 0.1509, "step": 57780 }, { "epoch": 4.761807996485062, "grad_norm": 1.6359241008758545, "learning_rate": 3.5948403125988516e-07, "loss": 0.1371, "step": 57800 }, { "epoch": 4.7634556239015815, "grad_norm": 0.7057865858078003, "learning_rate": 3.5464098521528764e-07, "loss": 0.15, "step": 57820 }, { "epoch": 4.765103251318102, "grad_norm": 1.0694705247879028, "learning_rate": 3.4983055102878746e-07, "loss": 0.1572, "step": 57840 }, { "epoch": 4.766750878734622, "grad_norm": 1.1055688858032227, "learning_rate": 3.450527350657762e-07, "loss": 0.1573, "step": 57860 }, { "epoch": 4.768398506151143, "grad_norm": 1.1190261840820312, "learning_rate": 3.403075436484715e-07, "loss": 0.1512, "step": 57880 }, { "epoch": 4.7700461335676625, "grad_norm": 0.8060190081596375, "learning_rate": 3.355949830559285e-07, "loss": 0.148, "step": 57900 }, { "epoch": 4.771693760984183, "grad_norm": 3.7827963829040527, "learning_rate": 3.309150595240229e-07, "loss": 0.1552, "step": 57920 }, { "epoch": 4.773341388400703, "grad_norm": 0.991356611251831, "learning_rate": 3.262677792454372e-07, "loss": 0.1521, "step": 57940 }, { "epoch": 4.774989015817223, "grad_norm": 0.7682467699050903, "learning_rate": 3.2165314836966885e-07, "loss": 0.1418, "step": 57960 }, { "epoch": 4.7766366432337435, "grad_norm": 0.5051642656326294, "learning_rate": 3.170711730030057e-07, "loss": 0.149, "step": 57980 }, { "epoch": 4.778284270650263, "grad_norm": 0.9407787919044495, "learning_rate": 3.1252185920852275e-07, "loss": 0.1335, "step": 58000 }, { "epoch": 4.778284270650263, "eval_loss": 0.5357042551040649, "eval_runtime": 246.0704, "eval_samples_per_second": 88.548, "eval_steps_per_second": 22.14, "eval_wer": 0.22416249793546328, "step": 58000 }, { "epoch": 4.779931898066784, "grad_norm": 1.1755343675613403, "learning_rate": 3.0800521300607954e-07, "loss": 0.1525, "step": 58020 }, { "epoch": 4.781579525483304, "grad_norm": 0.8533674478530884, "learning_rate": 3.03521240372312e-07, "loss": 0.1654, "step": 58040 }, { "epoch": 4.7832271528998245, "grad_norm": 0.7474669218063354, "learning_rate": 2.9906994724061e-07, "loss": 0.1426, "step": 58060 }, { "epoch": 4.784874780316344, "grad_norm": 0.20984718203544617, "learning_rate": 2.946513395011313e-07, "loss": 0.139, "step": 58080 }, { "epoch": 4.786522407732865, "grad_norm": 1.1169986724853516, "learning_rate": 2.9026542300077677e-07, "loss": 0.1417, "step": 58100 }, { "epoch": 4.788170035149385, "grad_norm": 0.9066877365112305, "learning_rate": 2.8591220354318994e-07, "loss": 0.1586, "step": 58120 }, { "epoch": 4.7898176625659055, "grad_norm": 1.765103816986084, "learning_rate": 2.8159168688874635e-07, "loss": 0.1721, "step": 58140 }, { "epoch": 4.791465289982425, "grad_norm": 0.6120753288269043, "learning_rate": 2.7730387875455343e-07, "loss": 0.1552, "step": 58160 }, { "epoch": 4.793112917398945, "grad_norm": 0.4533499479293823, "learning_rate": 2.7304878481443087e-07, "loss": 0.1386, "step": 58180 }, { "epoch": 4.794760544815466, "grad_norm": 1.1315693855285645, "learning_rate": 2.688264106989163e-07, "loss": 0.1392, "step": 58200 }, { "epoch": 4.796408172231986, "grad_norm": 0.7475069761276245, "learning_rate": 2.6463676199524335e-07, "loss": 0.1411, "step": 58220 }, { "epoch": 4.798055799648506, "grad_norm": 0.5072384476661682, "learning_rate": 2.604798442473438e-07, "loss": 0.1469, "step": 58240 }, { "epoch": 4.799703427065026, "grad_norm": 0.9817243814468384, "learning_rate": 2.5635566295584554e-07, "loss": 0.1479, "step": 58260 }, { "epoch": 4.801351054481547, "grad_norm": 1.1475521326065063, "learning_rate": 2.52264223578047e-07, "loss": 0.1515, "step": 58280 }, { "epoch": 4.802998681898067, "grad_norm": 1.5746099948883057, "learning_rate": 2.4820553152793135e-07, "loss": 0.1468, "step": 58300 }, { "epoch": 4.804646309314587, "grad_norm": 0.7214694023132324, "learning_rate": 2.441795921761386e-07, "loss": 0.1596, "step": 58320 }, { "epoch": 4.806293936731107, "grad_norm": 2.504847288131714, "learning_rate": 2.4018641084997685e-07, "loss": 0.151, "step": 58340 }, { "epoch": 4.807941564147628, "grad_norm": 0.6382145285606384, "learning_rate": 2.362259928334082e-07, "loss": 0.1534, "step": 58360 }, { "epoch": 4.8095891915641475, "grad_norm": 0.22215913236141205, "learning_rate": 2.3229834336703505e-07, "loss": 0.1519, "step": 58380 }, { "epoch": 4.811236818980667, "grad_norm": 0.7345685958862305, "learning_rate": 2.2840346764810004e-07, "loss": 0.1364, "step": 58400 }, { "epoch": 4.812884446397188, "grad_norm": 0.8262800574302673, "learning_rate": 2.2454137083048332e-07, "loss": 0.1482, "step": 58420 }, { "epoch": 4.814532073813709, "grad_norm": 0.6173376441001892, "learning_rate": 2.2071205802468299e-07, "loss": 0.1503, "step": 58440 }, { "epoch": 4.8161797012302285, "grad_norm": 0.4323236644268036, "learning_rate": 2.1691553429782074e-07, "loss": 0.1462, "step": 58460 }, { "epoch": 4.817827328646748, "grad_norm": 0.19968824088573456, "learning_rate": 2.1315180467362805e-07, "loss": 0.1444, "step": 58480 }, { "epoch": 4.819474956063269, "grad_norm": 0.9473910927772522, "learning_rate": 2.0942087413244882e-07, "loss": 0.1369, "step": 58500 }, { "epoch": 4.821122583479789, "grad_norm": 0.7982332706451416, "learning_rate": 2.0572274761121167e-07, "loss": 0.1321, "step": 58520 }, { "epoch": 4.8227702108963095, "grad_norm": 1.7388837337493896, "learning_rate": 2.0205743000344945e-07, "loss": 0.1471, "step": 58540 }, { "epoch": 4.824417838312829, "grad_norm": 0.4136100709438324, "learning_rate": 1.9842492615927687e-07, "loss": 0.1487, "step": 58560 }, { "epoch": 4.82606546572935, "grad_norm": 0.25300848484039307, "learning_rate": 1.948252408853879e-07, "loss": 0.1311, "step": 58580 }, { "epoch": 4.82771309314587, "grad_norm": 2.3845789432525635, "learning_rate": 1.9125837894505006e-07, "loss": 0.1396, "step": 58600 }, { "epoch": 4.8293607205623905, "grad_norm": 0.6275327801704407, "learning_rate": 1.8772434505809344e-07, "loss": 0.158, "step": 58620 }, { "epoch": 4.83100834797891, "grad_norm": 0.7575026154518127, "learning_rate": 1.84223143900919e-07, "loss": 0.1495, "step": 58640 }, { "epoch": 4.832655975395431, "grad_norm": 0.7890337109565735, "learning_rate": 1.8075478010646797e-07, "loss": 0.1499, "step": 58660 }, { "epoch": 4.834303602811951, "grad_norm": 0.2886408865451813, "learning_rate": 1.7731925826424134e-07, "loss": 0.1482, "step": 58680 }, { "epoch": 4.835951230228471, "grad_norm": 0.702225923538208, "learning_rate": 1.739165829202749e-07, "loss": 0.1433, "step": 58700 }, { "epoch": 4.837598857644991, "grad_norm": 0.958633303642273, "learning_rate": 1.7054675857714474e-07, "loss": 0.1406, "step": 58720 }, { "epoch": 4.839246485061511, "grad_norm": 0.7321062684059143, "learning_rate": 1.6720978969395618e-07, "loss": 0.149, "step": 58740 }, { "epoch": 4.840894112478032, "grad_norm": 2.386580467224121, "learning_rate": 1.6390568068633538e-07, "loss": 0.1599, "step": 58760 }, { "epoch": 4.842541739894552, "grad_norm": 0.24375922977924347, "learning_rate": 1.6063443592643222e-07, "loss": 0.146, "step": 58780 }, { "epoch": 4.844189367311072, "grad_norm": 0.955115020275116, "learning_rate": 1.5739605974290638e-07, "loss": 0.1392, "step": 58800 }, { "epoch": 4.845836994727592, "grad_norm": 0.8327272534370422, "learning_rate": 1.5419055642091895e-07, "loss": 0.1458, "step": 58820 }, { "epoch": 4.847484622144113, "grad_norm": 1.001267433166504, "learning_rate": 1.510179302021436e-07, "loss": 0.143, "step": 58840 }, { "epoch": 4.849132249560633, "grad_norm": 1.2017278671264648, "learning_rate": 1.4787818528474162e-07, "loss": 0.1533, "step": 58860 }, { "epoch": 4.850779876977153, "grad_norm": 1.756772518157959, "learning_rate": 1.447713258233646e-07, "loss": 0.1364, "step": 58880 }, { "epoch": 4.852427504393673, "grad_norm": 20.684553146362305, "learning_rate": 1.4169735592915178e-07, "loss": 0.1376, "step": 58900 }, { "epoch": 4.854075131810193, "grad_norm": 1.4841912984848022, "learning_rate": 1.3865627966971883e-07, "loss": 0.1594, "step": 58920 }, { "epoch": 4.855722759226714, "grad_norm": 0.9735491275787354, "learning_rate": 1.3564810106915515e-07, "loss": 0.1589, "step": 58940 }, { "epoch": 4.857370386643233, "grad_norm": 0.8079643845558167, "learning_rate": 1.3267282410802106e-07, "loss": 0.1518, "step": 58960 }, { "epoch": 4.859018014059754, "grad_norm": 0.824241578578949, "learning_rate": 1.297304527233395e-07, "loss": 0.1469, "step": 58980 }, { "epoch": 4.860665641476274, "grad_norm": 1.237937092781067, "learning_rate": 1.268209908085849e-07, "loss": 0.145, "step": 59000 }, { "epoch": 4.862313268892795, "grad_norm": 0.46395012736320496, "learning_rate": 1.2394444221369427e-07, "loss": 0.1427, "step": 59020 }, { "epoch": 4.863960896309314, "grad_norm": 0.8301859498023987, "learning_rate": 1.2124221047846164e-07, "loss": 0.1477, "step": 59040 }, { "epoch": 4.865608523725835, "grad_norm": 0.7848377227783203, "learning_rate": 1.1842985376572802e-07, "loss": 0.1421, "step": 59060 }, { "epoch": 4.867256151142355, "grad_norm": 0.30427277088165283, "learning_rate": 1.1565042147638761e-07, "loss": 0.1482, "step": 59080 }, { "epoch": 4.868903778558876, "grad_norm": 1.6267706155776978, "learning_rate": 1.1290391728831507e-07, "loss": 0.1363, "step": 59100 }, { "epoch": 4.870551405975395, "grad_norm": 5.8373212814331055, "learning_rate": 1.1019034483580881e-07, "loss": 0.149, "step": 59120 }, { "epoch": 4.872199033391915, "grad_norm": 0.656572163105011, "learning_rate": 1.0750970770958823e-07, "loss": 0.1426, "step": 59140 }, { "epoch": 4.873846660808436, "grad_norm": 0.44349759817123413, "learning_rate": 1.0486200945679647e-07, "loss": 0.1496, "step": 59160 }, { "epoch": 4.875494288224957, "grad_norm": 0.27204573154449463, "learning_rate": 1.022472535809893e-07, "loss": 0.1363, "step": 59180 }, { "epoch": 4.877141915641476, "grad_norm": 1.1407734155654907, "learning_rate": 9.966544354212959e-08, "loss": 0.1317, "step": 59200 }, { "epoch": 4.878789543057996, "grad_norm": 1.0067180395126343, "learning_rate": 9.711658275658175e-08, "loss": 0.1386, "step": 59220 }, { "epoch": 4.880437170474517, "grad_norm": 2.183023452758789, "learning_rate": 9.460067459712008e-08, "loss": 0.1451, "step": 59240 }, { "epoch": 4.882084797891037, "grad_norm": 0.6398245096206665, "learning_rate": 9.211772239290373e-08, "loss": 0.1412, "step": 59260 }, { "epoch": 4.883732425307557, "grad_norm": 0.2688296139240265, "learning_rate": 8.966772942949064e-08, "loss": 0.1435, "step": 59280 }, { "epoch": 4.885380052724077, "grad_norm": 1.3446420431137085, "learning_rate": 8.725069894882364e-08, "loss": 0.1382, "step": 59300 }, { "epoch": 4.887027680140598, "grad_norm": 1.0231753587722778, "learning_rate": 8.486663414922214e-08, "loss": 0.1578, "step": 59320 }, { "epoch": 4.888675307557118, "grad_norm": 2.6336162090301514, "learning_rate": 8.251553818539315e-08, "loss": 0.1386, "step": 59340 }, { "epoch": 4.890322934973638, "grad_norm": 0.5091193914413452, "learning_rate": 8.019741416841198e-08, "loss": 0.1538, "step": 59360 }, { "epoch": 4.891970562390158, "grad_norm": 0.6824169754981995, "learning_rate": 7.79122651657277e-08, "loss": 0.15, "step": 59380 }, { "epoch": 4.893618189806679, "grad_norm": 1.1917065382003784, "learning_rate": 7.566009420115205e-08, "loss": 0.1354, "step": 59400 }, { "epoch": 4.895265817223199, "grad_norm": 0.6058480739593506, "learning_rate": 7.344090425485949e-08, "loss": 0.1395, "step": 59420 }, { "epoch": 4.8969134446397184, "grad_norm": 0.45373713970184326, "learning_rate": 7.12546982633816e-08, "loss": 0.1477, "step": 59440 }, { "epoch": 4.898561072056239, "grad_norm": 2.359759569168091, "learning_rate": 6.910147911960707e-08, "loss": 0.1462, "step": 59460 }, { "epoch": 4.900208699472759, "grad_norm": 0.36701512336730957, "learning_rate": 6.69812496727762e-08, "loss": 0.1506, "step": 59480 }, { "epoch": 4.90185632688928, "grad_norm": 0.7711554765701294, "learning_rate": 6.489401272846696e-08, "loss": 0.1362, "step": 59500 }, { "epoch": 4.903503954305799, "grad_norm": 0.5819993615150452, "learning_rate": 6.283977104860895e-08, "loss": 0.1494, "step": 59520 }, { "epoch": 4.90515158172232, "grad_norm": 0.9013060331344604, "learning_rate": 6.081852735146943e-08, "loss": 0.1391, "step": 59540 }, { "epoch": 4.90679920913884, "grad_norm": 1.0158357620239258, "learning_rate": 5.883028431164783e-08, "loss": 0.131, "step": 59560 }, { "epoch": 4.908446836555361, "grad_norm": 0.8705806732177734, "learning_rate": 5.687504456007575e-08, "loss": 0.1436, "step": 59580 }, { "epoch": 4.91009446397188, "grad_norm": 3.586292028427124, "learning_rate": 5.49528106840197e-08, "loss": 0.1417, "step": 59600 }, { "epoch": 4.911742091388401, "grad_norm": 1.4119175672531128, "learning_rate": 5.306358522706445e-08, "loss": 0.1525, "step": 59620 }, { "epoch": 4.913389718804921, "grad_norm": 1.224418044090271, "learning_rate": 5.1207370689118625e-08, "loss": 0.154, "step": 59640 }, { "epoch": 4.915037346221441, "grad_norm": 3.059386968612671, "learning_rate": 4.93841695264119e-08, "loss": 0.1593, "step": 59660 }, { "epoch": 4.916684973637961, "grad_norm": 0.5255879163742065, "learning_rate": 4.759398415148386e-08, "loss": 0.1443, "step": 59680 }, { "epoch": 4.918332601054481, "grad_norm": 0.6989895105361938, "learning_rate": 4.583681693318964e-08, "loss": 0.1409, "step": 59700 }, { "epoch": 4.919980228471002, "grad_norm": 1.9440925121307373, "learning_rate": 4.411267019669429e-08, "loss": 0.1446, "step": 59720 }, { "epoch": 4.921627855887522, "grad_norm": 42.70412826538086, "learning_rate": 4.2421546223464505e-08, "loss": 0.1424, "step": 59740 }, { "epoch": 4.923275483304042, "grad_norm": 1.0813547372817993, "learning_rate": 4.0763447251276896e-08, "loss": 0.1442, "step": 59760 }, { "epoch": 4.924923110720562, "grad_norm": 0.7377712726593018, "learning_rate": 3.913837547419863e-08, "loss": 0.1477, "step": 59780 }, { "epoch": 4.926570738137083, "grad_norm": 1.5248273611068726, "learning_rate": 3.754633304259847e-08, "loss": 0.1332, "step": 59800 }, { "epoch": 4.928218365553603, "grad_norm": 7.687877655029297, "learning_rate": 3.5987322063144036e-08, "loss": 0.1499, "step": 59820 }, { "epoch": 4.929865992970123, "grad_norm": 1.106568694114685, "learning_rate": 3.446134459878514e-08, "loss": 0.1561, "step": 59840 }, { "epoch": 4.931513620386643, "grad_norm": 1.875120759010315, "learning_rate": 3.296840266876489e-08, "loss": 0.1512, "step": 59860 }, { "epoch": 4.933161247803163, "grad_norm": 0.35406166315078735, "learning_rate": 3.150849824861413e-08, "loss": 0.1397, "step": 59880 }, { "epoch": 4.934808875219684, "grad_norm": 1.5011489391326904, "learning_rate": 3.008163327014035e-08, "loss": 0.1392, "step": 59900 }, { "epoch": 4.9364565026362035, "grad_norm": 0.8797003030776978, "learning_rate": 2.8687809621441574e-08, "loss": 0.1472, "step": 59920 }, { "epoch": 4.938104130052724, "grad_norm": 1.0946521759033203, "learning_rate": 2.732702914688412e-08, "loss": 0.1456, "step": 59940 }, { "epoch": 4.939751757469244, "grad_norm": 0.768555223941803, "learning_rate": 2.5999293647116506e-08, "loss": 0.1609, "step": 59960 }, { "epoch": 4.941399384885765, "grad_norm": 0.19902761280536652, "learning_rate": 2.4704604879058348e-08, "loss": 0.1343, "step": 59980 }, { "epoch": 4.9430470123022845, "grad_norm": 2.1632421016693115, "learning_rate": 2.3442964555903113e-08, "loss": 0.1469, "step": 60000 }, { "epoch": 4.9430470123022845, "eval_loss": 0.5249403119087219, "eval_runtime": 243.8886, "eval_samples_per_second": 89.34, "eval_steps_per_second": 22.338, "eval_wer": 0.2207360668266041, "step": 60000 } ], "logging_steps": 20, "max_steps": 60690, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.2335284844344734e+21, "train_batch_size": 24, "trial_name": null, "trial_params": null }