{ "best_metric": 0.2286638878235724, "best_model_checkpoint": "./checkpoints/w2v-multilingual-v1.3/checkpoint-12000", "epoch": 1.4829195957820738, "eval_steps": 2000, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001647627416520211, "grad_norm": 0.2767059803009033, "learning_rate": 4.9431537320810675e-08, "loss": 0.1923, "step": 20 }, { "epoch": 0.003295254833040422, "grad_norm": 2.006911039352417, "learning_rate": 1.977261492832427e-07, "loss": 0.2947, "step": 40 }, { "epoch": 0.004942882249560633, "grad_norm": 0.3714193105697632, "learning_rate": 3.624979403526116e-07, "loss": 0.2955, "step": 60 }, { "epoch": 0.006590509666080844, "grad_norm": 0.3106062412261963, "learning_rate": 5.272697314219806e-07, "loss": 0.2754, "step": 80 }, { "epoch": 0.008238137082601054, "grad_norm": 3.2078254222869873, "learning_rate": 6.838029329378811e-07, "loss": 0.3177, "step": 100 }, { "epoch": 0.009885764499121265, "grad_norm": 0.38503074645996094, "learning_rate": 8.485747240072501e-07, "loss": 0.1905, "step": 120 }, { "epoch": 0.011533391915641476, "grad_norm": 0.15724419057369232, "learning_rate": 1.013346515076619e-06, "loss": 0.2942, "step": 140 }, { "epoch": 0.013181019332161687, "grad_norm": 0.5840986967086792, "learning_rate": 1.1781183061459877e-06, "loss": 0.2907, "step": 160 }, { "epoch": 0.014828646748681899, "grad_norm": 0.23122897744178772, "learning_rate": 1.3346515076618883e-06, "loss": 0.2811, "step": 180 }, { "epoch": 0.016476274165202108, "grad_norm": 2.7248637676239014, "learning_rate": 1.4994232987312573e-06, "loss": 0.3168, "step": 200 }, { "epoch": 0.01812390158172232, "grad_norm": 1.0846350193023682, "learning_rate": 1.6641950898006263e-06, "loss": 0.1979, "step": 220 }, { "epoch": 0.01977152899824253, "grad_norm": 1.7905633449554443, "learning_rate": 1.828966880869995e-06, "loss": 0.2656, "step": 240 }, { "epoch": 0.021419156414762743, "grad_norm": 0.31305554509162903, "learning_rate": 1.993738671939364e-06, "loss": 0.272, "step": 260 }, { "epoch": 0.023066783831282953, "grad_norm": 0.31258106231689453, "learning_rate": 2.158510463008733e-06, "loss": 0.2693, "step": 280 }, { "epoch": 0.024714411247803162, "grad_norm": 1.783349871635437, "learning_rate": 2.323282254078102e-06, "loss": 0.3116, "step": 300 }, { "epoch": 0.026362038664323375, "grad_norm": 0.6936488747596741, "learning_rate": 2.488054045147471e-06, "loss": 0.1931, "step": 320 }, { "epoch": 0.028009666080843584, "grad_norm": 0.6185577511787415, "learning_rate": 2.65282583621684e-06, "loss": 0.258, "step": 340 }, { "epoch": 0.029657293497363797, "grad_norm": 0.3592207729816437, "learning_rate": 2.8175976272862085e-06, "loss": 0.2401, "step": 360 }, { "epoch": 0.03130492091388401, "grad_norm": 1.2324920892715454, "learning_rate": 2.982369418355578e-06, "loss": 0.2371, "step": 380 }, { "epoch": 0.032952548330404216, "grad_norm": 1.5560214519500732, "learning_rate": 3.147141209424947e-06, "loss": 0.2414, "step": 400 }, { "epoch": 0.03460017574692443, "grad_norm": 0.8133947253227234, "learning_rate": 3.3119130004943154e-06, "loss": 0.1744, "step": 420 }, { "epoch": 0.03624780316344464, "grad_norm": 0.8599107265472412, "learning_rate": 3.4766847915636844e-06, "loss": 0.2536, "step": 440 }, { "epoch": 0.03789543057996485, "grad_norm": 0.3699595034122467, "learning_rate": 3.641456582633054e-06, "loss": 0.2552, "step": 460 }, { "epoch": 0.03954305799648506, "grad_norm": 1.2955116033554077, "learning_rate": 3.797989784148954e-06, "loss": 0.2532, "step": 480 }, { "epoch": 0.041190685413005274, "grad_norm": 11.518170356750488, "learning_rate": 3.962761575218322e-06, "loss": 0.2507, "step": 500 }, { "epoch": 0.042838312829525486, "grad_norm": 0.9779248833656311, "learning_rate": 4.127533366287692e-06, "loss": 0.1733, "step": 520 }, { "epoch": 0.04448594024604569, "grad_norm": 0.5386682152748108, "learning_rate": 4.29230515735706e-06, "loss": 0.2415, "step": 540 }, { "epoch": 0.046133567662565905, "grad_norm": 2.7366793155670166, "learning_rate": 4.457076948426429e-06, "loss": 0.2409, "step": 560 }, { "epoch": 0.04778119507908612, "grad_norm": 1.9912066459655762, "learning_rate": 4.621848739495799e-06, "loss": 0.2134, "step": 580 }, { "epoch": 0.049428822495606324, "grad_norm": 4.058573246002197, "learning_rate": 4.786620530565167e-06, "loss": 0.2456, "step": 600 }, { "epoch": 0.05107644991212654, "grad_norm": 0.37456750869750977, "learning_rate": 4.951392321634536e-06, "loss": 0.1589, "step": 620 }, { "epoch": 0.05272407732864675, "grad_norm": 0.7950440645217896, "learning_rate": 5.116164112703905e-06, "loss": 0.2485, "step": 640 }, { "epoch": 0.05437170474516696, "grad_norm": 1.884665846824646, "learning_rate": 5.280935903773274e-06, "loss": 0.2197, "step": 660 }, { "epoch": 0.05601933216168717, "grad_norm": 0.32170844078063965, "learning_rate": 5.445707694842643e-06, "loss": 0.2111, "step": 680 }, { "epoch": 0.05766695957820738, "grad_norm": 1.8230172395706177, "learning_rate": 5.610479485912012e-06, "loss": 0.2379, "step": 700 }, { "epoch": 0.059314586994727594, "grad_norm": 1.2472524642944336, "learning_rate": 5.775251276981381e-06, "loss": 0.1684, "step": 720 }, { "epoch": 0.0609622144112478, "grad_norm": 0.29022061824798584, "learning_rate": 5.94002306805075e-06, "loss": 0.2201, "step": 740 }, { "epoch": 0.06260984182776802, "grad_norm": 0.49721184372901917, "learning_rate": 6.104794859120118e-06, "loss": 0.2297, "step": 760 }, { "epoch": 0.06425746924428823, "grad_norm": 0.6133716106414795, "learning_rate": 6.269566650189487e-06, "loss": 0.3173, "step": 780 }, { "epoch": 0.06590509666080843, "grad_norm": 0.9667792320251465, "learning_rate": 6.434338441258857e-06, "loss": 0.2349, "step": 800 }, { "epoch": 0.06755272407732865, "grad_norm": 0.3177216053009033, "learning_rate": 6.599110232328226e-06, "loss": 0.1635, "step": 820 }, { "epoch": 0.06920035149384886, "grad_norm": 0.8457621335983276, "learning_rate": 6.763882023397594e-06, "loss": 0.2777, "step": 840 }, { "epoch": 0.07084797891036906, "grad_norm": 0.3330087661743164, "learning_rate": 6.928653814466963e-06, "loss": 0.2154, "step": 860 }, { "epoch": 0.07249560632688928, "grad_norm": 0.5845814943313599, "learning_rate": 7.093425605536333e-06, "loss": 0.2164, "step": 880 }, { "epoch": 0.07414323374340949, "grad_norm": 2.325303554534912, "learning_rate": 7.258197396605701e-06, "loss": 0.2068, "step": 900 }, { "epoch": 0.0757908611599297, "grad_norm": 0.21893823146820068, "learning_rate": 7.414730598121602e-06, "loss": 0.1621, "step": 920 }, { "epoch": 0.07743848857644992, "grad_norm": 0.5854327082633972, "learning_rate": 7.5795023891909705e-06, "loss": 0.2281, "step": 940 }, { "epoch": 0.07908611599297012, "grad_norm": 0.2406030148267746, "learning_rate": 7.74427418026034e-06, "loss": 0.2342, "step": 960 }, { "epoch": 0.08073374340949033, "grad_norm": 1.3764126300811768, "learning_rate": 7.90904597132971e-06, "loss": 0.2018, "step": 980 }, { "epoch": 0.08238137082601055, "grad_norm": 0.9587862491607666, "learning_rate": 8.073817762399077e-06, "loss": 0.2489, "step": 1000 }, { "epoch": 0.08402899824253075, "grad_norm": 0.9726558923721313, "learning_rate": 8.238589553468447e-06, "loss": 0.154, "step": 1020 }, { "epoch": 0.08567662565905097, "grad_norm": 1.9828400611877441, "learning_rate": 8.403361344537817e-06, "loss": 0.2336, "step": 1040 }, { "epoch": 0.08732425307557118, "grad_norm": 0.14761961996555328, "learning_rate": 8.568133135607183e-06, "loss": 0.1949, "step": 1060 }, { "epoch": 0.08897188049209138, "grad_norm": 0.39077144861221313, "learning_rate": 8.732904926676553e-06, "loss": 0.2067, "step": 1080 }, { "epoch": 0.0906195079086116, "grad_norm": 2.3257837295532227, "learning_rate": 8.897676717745921e-06, "loss": 0.2184, "step": 1100 }, { "epoch": 0.09226713532513181, "grad_norm": 1.0966060161590576, "learning_rate": 9.062448508815291e-06, "loss": 0.1655, "step": 1120 }, { "epoch": 0.09391476274165202, "grad_norm": 0.5648412704467773, "learning_rate": 9.227220299884661e-06, "loss": 0.2097, "step": 1140 }, { "epoch": 0.09556239015817224, "grad_norm": 0.49816444516181946, "learning_rate": 9.39199209095403e-06, "loss": 0.2052, "step": 1160 }, { "epoch": 0.09721001757469244, "grad_norm": 1.1164054870605469, "learning_rate": 9.556763882023397e-06, "loss": 0.2298, "step": 1180 }, { "epoch": 0.09885764499121265, "grad_norm": 0.9453270435333252, "learning_rate": 9.721535673092767e-06, "loss": 0.2157, "step": 1200 }, { "epoch": 0.10050527240773287, "grad_norm": 0.4588276743888855, "learning_rate": 9.886307464162135e-06, "loss": 0.1433, "step": 1220 }, { "epoch": 0.10215289982425307, "grad_norm": 0.47535696625709534, "learning_rate": 1.0051079255231505e-05, "loss": 0.2115, "step": 1240 }, { "epoch": 0.10380052724077328, "grad_norm": 0.585959792137146, "learning_rate": 1.0215851046300875e-05, "loss": 0.2245, "step": 1260 }, { "epoch": 0.1054481546572935, "grad_norm": 0.24861204624176025, "learning_rate": 1.0380622837370241e-05, "loss": 0.1981, "step": 1280 }, { "epoch": 0.1070957820738137, "grad_norm": 31.602128982543945, "learning_rate": 1.0545394628439611e-05, "loss": 0.2265, "step": 1300 }, { "epoch": 0.10874340949033393, "grad_norm": 0.6234269142150879, "learning_rate": 1.0710166419508981e-05, "loss": 0.154, "step": 1320 }, { "epoch": 0.11039103690685413, "grad_norm": 1.2423540353775024, "learning_rate": 1.087493821057835e-05, "loss": 0.2062, "step": 1340 }, { "epoch": 0.11203866432337434, "grad_norm": 0.2090279757976532, "learning_rate": 1.1039710001647719e-05, "loss": 0.2214, "step": 1360 }, { "epoch": 0.11368629173989456, "grad_norm": 0.6145613193511963, "learning_rate": 1.1204481792717087e-05, "loss": 0.1942, "step": 1380 }, { "epoch": 0.11533391915641476, "grad_norm": 0.9004138708114624, "learning_rate": 1.1369253583786455e-05, "loss": 0.2271, "step": 1400 }, { "epoch": 0.11698154657293497, "grad_norm": 1.609165906906128, "learning_rate": 1.1534025374855825e-05, "loss": 0.1606, "step": 1420 }, { "epoch": 0.11862917398945519, "grad_norm": 0.8725568652153015, "learning_rate": 1.1698797165925195e-05, "loss": 0.204, "step": 1440 }, { "epoch": 0.1202768014059754, "grad_norm": 1.8169455528259277, "learning_rate": 1.1863568956994563e-05, "loss": 0.207, "step": 1460 }, { "epoch": 0.1219244288224956, "grad_norm": 0.37334388494491577, "learning_rate": 1.2028340748063933e-05, "loss": 0.2033, "step": 1480 }, { "epoch": 0.12357205623901582, "grad_norm": 15.727474212646484, "learning_rate": 1.2193112539133301e-05, "loss": 0.3298, "step": 1500 }, { "epoch": 0.12521968365553604, "grad_norm": 4.70903205871582, "learning_rate": 1.2357884330202669e-05, "loss": 0.159, "step": 1520 }, { "epoch": 0.12686731107205623, "grad_norm": 0.4843326807022095, "learning_rate": 1.2522656121272039e-05, "loss": 0.2141, "step": 1540 }, { "epoch": 0.12851493848857645, "grad_norm": 0.572084367275238, "learning_rate": 1.2687427912341407e-05, "loss": 0.2182, "step": 1560 }, { "epoch": 0.13016256590509667, "grad_norm": 0.31078797578811646, "learning_rate": 1.2852199703410777e-05, "loss": 0.1994, "step": 1580 }, { "epoch": 0.13181019332161686, "grad_norm": 9.014205932617188, "learning_rate": 1.3016971494480145e-05, "loss": 0.2089, "step": 1600 }, { "epoch": 0.13345782073813708, "grad_norm": 1.0150245428085327, "learning_rate": 1.3181743285549513e-05, "loss": 0.1521, "step": 1620 }, { "epoch": 0.1351054481546573, "grad_norm": 0.25271451473236084, "learning_rate": 1.3346515076618885e-05, "loss": 0.1996, "step": 1640 }, { "epoch": 0.1367530755711775, "grad_norm": 0.47118502855300903, "learning_rate": 1.3511286867688253e-05, "loss": 0.2059, "step": 1660 }, { "epoch": 0.13840070298769772, "grad_norm": 0.5134350657463074, "learning_rate": 1.3676058658757621e-05, "loss": 0.1935, "step": 1680 }, { "epoch": 0.14004833040421794, "grad_norm": 1.0354816913604736, "learning_rate": 1.384083044982699e-05, "loss": 0.2103, "step": 1700 }, { "epoch": 0.14169595782073813, "grad_norm": 0.5588876605033875, "learning_rate": 1.4005602240896359e-05, "loss": 0.1598, "step": 1720 }, { "epoch": 0.14334358523725835, "grad_norm": 0.7309175133705139, "learning_rate": 1.4170374031965727e-05, "loss": 0.2204, "step": 1740 }, { "epoch": 0.14499121265377857, "grad_norm": 0.6155902743339539, "learning_rate": 1.4335145823035099e-05, "loss": 0.2133, "step": 1760 }, { "epoch": 0.14663884007029876, "grad_norm": 0.7660940885543823, "learning_rate": 1.4499917614104467e-05, "loss": 0.2065, "step": 1780 }, { "epoch": 0.14828646748681898, "grad_norm": 1.1954026222229004, "learning_rate": 1.4664689405173835e-05, "loss": 0.2147, "step": 1800 }, { "epoch": 0.1499340949033392, "grad_norm": 0.4249323606491089, "learning_rate": 1.4829461196243205e-05, "loss": 0.1553, "step": 1820 }, { "epoch": 0.1515817223198594, "grad_norm": 2.9014129638671875, "learning_rate": 1.4994232987312573e-05, "loss": 0.2208, "step": 1840 }, { "epoch": 0.1532293497363796, "grad_norm": 1.6474498510360718, "learning_rate": 1.5159004778381941e-05, "loss": 0.209, "step": 1860 }, { "epoch": 0.15487697715289983, "grad_norm": 0.1585623174905777, "learning_rate": 1.532377656945131e-05, "loss": 0.1873, "step": 1880 }, { "epoch": 0.15652460456942002, "grad_norm": 1.171941876411438, "learning_rate": 1.548854836052068e-05, "loss": 0.2389, "step": 1900 }, { "epoch": 0.15817223198594024, "grad_norm": 0.48890382051467896, "learning_rate": 1.5653320151590047e-05, "loss": 0.1679, "step": 1920 }, { "epoch": 0.15981985940246046, "grad_norm": 0.5568016767501831, "learning_rate": 1.581809194265942e-05, "loss": 0.1968, "step": 1940 }, { "epoch": 0.16146748681898065, "grad_norm": 0.9775394797325134, "learning_rate": 1.5982863733728787e-05, "loss": 0.2208, "step": 1960 }, { "epoch": 0.16311511423550087, "grad_norm": 0.60302734375, "learning_rate": 1.6147635524798155e-05, "loss": 0.1929, "step": 1980 }, { "epoch": 0.1647627416520211, "grad_norm": 1.7513552904129028, "learning_rate": 1.6312407315867526e-05, "loss": 0.2055, "step": 2000 }, { "epoch": 0.1647627416520211, "eval_loss": 0.5699400305747986, "eval_runtime": 686.8117, "eval_samples_per_second": 31.725, "eval_steps_per_second": 7.932, "eval_wer": 0.23469093535410654, "step": 2000 }, { "epoch": 0.16641036906854131, "grad_norm": 1.049734354019165, "learning_rate": 1.6477179106936894e-05, "loss": 0.1608, "step": 2020 }, { "epoch": 0.1680579964850615, "grad_norm": 1.7113618850708008, "learning_rate": 1.6641950898006263e-05, "loss": 0.2008, "step": 2040 }, { "epoch": 0.16970562390158173, "grad_norm": 0.3202134370803833, "learning_rate": 1.6806722689075634e-05, "loss": 0.1973, "step": 2060 }, { "epoch": 0.17135325131810195, "grad_norm": 0.3612610697746277, "learning_rate": 1.6971494480145e-05, "loss": 0.1732, "step": 2080 }, { "epoch": 0.17300087873462214, "grad_norm": 1.8115849494934082, "learning_rate": 1.7136266271214367e-05, "loss": 0.2138, "step": 2100 }, { "epoch": 0.17464850615114236, "grad_norm": 0.7046949863433838, "learning_rate": 1.7301038062283735e-05, "loss": 0.1571, "step": 2120 }, { "epoch": 0.17629613356766258, "grad_norm": 0.5983096957206726, "learning_rate": 1.7465809853353107e-05, "loss": 0.2092, "step": 2140 }, { "epoch": 0.17794376098418277, "grad_norm": 0.17064958810806274, "learning_rate": 1.7630581644422475e-05, "loss": 0.2083, "step": 2160 }, { "epoch": 0.179591388400703, "grad_norm": 1.422013759613037, "learning_rate": 1.7795353435491843e-05, "loss": 0.2087, "step": 2180 }, { "epoch": 0.1812390158172232, "grad_norm": 0.982097864151001, "learning_rate": 1.7960125226561214e-05, "loss": 0.2161, "step": 2200 }, { "epoch": 0.1828866432337434, "grad_norm": 0.2690947949886322, "learning_rate": 1.8124897017630583e-05, "loss": 0.1693, "step": 2220 }, { "epoch": 0.18453427065026362, "grad_norm": 0.24867244064807892, "learning_rate": 1.828966880869995e-05, "loss": 0.2058, "step": 2240 }, { "epoch": 0.18618189806678384, "grad_norm": 0.9435555934906006, "learning_rate": 1.8454440599769322e-05, "loss": 0.1991, "step": 2260 }, { "epoch": 0.18782952548330403, "grad_norm": 0.23964335024356842, "learning_rate": 1.861921239083869e-05, "loss": 0.1932, "step": 2280 }, { "epoch": 0.18947715289982425, "grad_norm": 2.1007418632507324, "learning_rate": 1.878398418190806e-05, "loss": 0.2075, "step": 2300 }, { "epoch": 0.19112478031634447, "grad_norm": 0.31368857622146606, "learning_rate": 1.8948755972977427e-05, "loss": 0.1557, "step": 2320 }, { "epoch": 0.19277240773286466, "grad_norm": 0.4029647409915924, "learning_rate": 1.9113527764046795e-05, "loss": 0.1952, "step": 2340 }, { "epoch": 0.19442003514938488, "grad_norm": 0.38545289635658264, "learning_rate": 1.9278299555116163e-05, "loss": 0.1998, "step": 2360 }, { "epoch": 0.1960676625659051, "grad_norm": 0.44573166966438293, "learning_rate": 1.9443071346185534e-05, "loss": 0.2078, "step": 2380 }, { "epoch": 0.1977152899824253, "grad_norm": 9.132265090942383, "learning_rate": 1.9607843137254903e-05, "loss": 0.2175, "step": 2400 }, { "epoch": 0.19936291739894552, "grad_norm": 0.42929643392562866, "learning_rate": 1.977261492832427e-05, "loss": 0.1762, "step": 2420 }, { "epoch": 0.20101054481546574, "grad_norm": 0.6267173886299133, "learning_rate": 1.9937386719393642e-05, "loss": 0.2101, "step": 2440 }, { "epoch": 0.20265817223198593, "grad_norm": 9.70997142791748, "learning_rate": 2.010215851046301e-05, "loss": 0.2101, "step": 2460 }, { "epoch": 0.20430579964850615, "grad_norm": 0.47748956084251404, "learning_rate": 2.026693030153238e-05, "loss": 0.2039, "step": 2480 }, { "epoch": 0.20595342706502637, "grad_norm": 1.3222582340240479, "learning_rate": 2.043170209260175e-05, "loss": 0.213, "step": 2500 }, { "epoch": 0.20760105448154656, "grad_norm": 0.4152863919734955, "learning_rate": 2.0596473883671115e-05, "loss": 0.148, "step": 2520 }, { "epoch": 0.20924868189806678, "grad_norm": 0.7384160757064819, "learning_rate": 2.0761245674740483e-05, "loss": 0.2138, "step": 2540 }, { "epoch": 0.210896309314587, "grad_norm": 0.27651092410087585, "learning_rate": 2.0926017465809854e-05, "loss": 0.2046, "step": 2560 }, { "epoch": 0.21254393673110722, "grad_norm": 0.226897194981575, "learning_rate": 2.1090789256879222e-05, "loss": 0.1904, "step": 2580 }, { "epoch": 0.2141915641476274, "grad_norm": 1.2391464710235596, "learning_rate": 2.125556104794859e-05, "loss": 0.204, "step": 2600 }, { "epoch": 0.21583919156414763, "grad_norm": 1.6048617362976074, "learning_rate": 2.1420332839017962e-05, "loss": 0.1548, "step": 2620 }, { "epoch": 0.21748681898066785, "grad_norm": 0.28409889340400696, "learning_rate": 2.158510463008733e-05, "loss": 0.201, "step": 2640 }, { "epoch": 0.21913444639718804, "grad_norm": 0.354885995388031, "learning_rate": 2.17498764211567e-05, "loss": 0.2083, "step": 2660 }, { "epoch": 0.22078207381370826, "grad_norm": 0.2778099775314331, "learning_rate": 2.191464821222607e-05, "loss": 0.1891, "step": 2680 }, { "epoch": 0.22242970123022848, "grad_norm": 1.007686734199524, "learning_rate": 2.2079420003295438e-05, "loss": 0.2152, "step": 2700 }, { "epoch": 0.22407732864674867, "grad_norm": 0.9725649952888489, "learning_rate": 2.2244191794364806e-05, "loss": 0.1581, "step": 2720 }, { "epoch": 0.2257249560632689, "grad_norm": 0.2451123297214508, "learning_rate": 2.2408963585434174e-05, "loss": 0.2019, "step": 2740 }, { "epoch": 0.22737258347978911, "grad_norm": 0.3667006194591522, "learning_rate": 2.2573735376503542e-05, "loss": 0.2083, "step": 2760 }, { "epoch": 0.2290202108963093, "grad_norm": 3.1283884048461914, "learning_rate": 2.273850716757291e-05, "loss": 0.194, "step": 2780 }, { "epoch": 0.23066783831282953, "grad_norm": 0.7148507237434387, "learning_rate": 2.2903278958642282e-05, "loss": 0.1931, "step": 2800 }, { "epoch": 0.23231546572934975, "grad_norm": 0.5805519223213196, "learning_rate": 2.306805074971165e-05, "loss": 0.1552, "step": 2820 }, { "epoch": 0.23396309314586994, "grad_norm": 0.8168196082115173, "learning_rate": 2.323282254078102e-05, "loss": 0.2107, "step": 2840 }, { "epoch": 0.23561072056239016, "grad_norm": 0.17171867191791534, "learning_rate": 2.339759433185039e-05, "loss": 0.2195, "step": 2860 }, { "epoch": 0.23725834797891038, "grad_norm": 0.6692082285881042, "learning_rate": 2.3562366122919758e-05, "loss": 0.1975, "step": 2880 }, { "epoch": 0.23890597539543057, "grad_norm": 1.5185160636901855, "learning_rate": 2.3727137913989126e-05, "loss": 0.2086, "step": 2900 }, { "epoch": 0.2405536028119508, "grad_norm": 0.8978987336158752, "learning_rate": 2.3891909705058498e-05, "loss": 0.1537, "step": 2920 }, { "epoch": 0.242201230228471, "grad_norm": 1.1462221145629883, "learning_rate": 2.4056681496127866e-05, "loss": 0.215, "step": 2940 }, { "epoch": 0.2438488576449912, "grad_norm": 0.2455727905035019, "learning_rate": 2.422145328719723e-05, "loss": 0.2137, "step": 2960 }, { "epoch": 0.24549648506151142, "grad_norm": 0.21464231610298157, "learning_rate": 2.4386225078266602e-05, "loss": 0.203, "step": 2980 }, { "epoch": 0.24714411247803164, "grad_norm": 0.8478316068649292, "learning_rate": 2.455099686933597e-05, "loss": 0.2085, "step": 3000 }, { "epoch": 0.24879173989455183, "grad_norm": 0.37225690484046936, "learning_rate": 2.4715768660405338e-05, "loss": 0.1645, "step": 3020 }, { "epoch": 0.2504393673110721, "grad_norm": 1.3999593257904053, "learning_rate": 2.488054045147471e-05, "loss": 0.2129, "step": 3040 }, { "epoch": 0.2520869947275923, "grad_norm": 2.0909199714660645, "learning_rate": 2.5045312242544078e-05, "loss": 0.1926, "step": 3060 }, { "epoch": 0.25373462214411246, "grad_norm": 0.19655053317546844, "learning_rate": 2.5210084033613446e-05, "loss": 0.2007, "step": 3080 }, { "epoch": 0.2553822495606327, "grad_norm": 1.2680870294570923, "learning_rate": 2.5374855824682814e-05, "loss": 0.2107, "step": 3100 }, { "epoch": 0.2570298769771529, "grad_norm": 0.19821316003799438, "learning_rate": 2.5539627615752182e-05, "loss": 0.1511, "step": 3120 }, { "epoch": 0.2586775043936731, "grad_norm": 0.22427937388420105, "learning_rate": 2.5704399406821554e-05, "loss": 0.2139, "step": 3140 }, { "epoch": 0.26032513181019334, "grad_norm": 0.4531656503677368, "learning_rate": 2.5869171197890922e-05, "loss": 0.1995, "step": 3160 }, { "epoch": 0.26197275922671354, "grad_norm": 0.3967747986316681, "learning_rate": 2.603394298896029e-05, "loss": 0.1981, "step": 3180 }, { "epoch": 0.26362038664323373, "grad_norm": 1.0957462787628174, "learning_rate": 2.6198714780029658e-05, "loss": 0.2092, "step": 3200 }, { "epoch": 0.265268014059754, "grad_norm": 0.5567193627357483, "learning_rate": 2.6363486571099026e-05, "loss": 0.1642, "step": 3220 }, { "epoch": 0.26691564147627417, "grad_norm": 0.3523741066455841, "learning_rate": 2.6528258362168395e-05, "loss": 0.2059, "step": 3240 }, { "epoch": 0.26856326889279436, "grad_norm": 0.40257710218429565, "learning_rate": 2.669303015323777e-05, "loss": 0.195, "step": 3260 }, { "epoch": 0.2702108963093146, "grad_norm": 0.3187640309333801, "learning_rate": 2.6849563354753665e-05, "loss": 0.1827, "step": 3280 }, { "epoch": 0.2718585237258348, "grad_norm": 0.7375414967536926, "learning_rate": 2.701433514582304e-05, "loss": 0.2247, "step": 3300 }, { "epoch": 0.273506151142355, "grad_norm": 0.45597076416015625, "learning_rate": 2.7179106936892408e-05, "loss": 0.1654, "step": 3320 }, { "epoch": 0.27515377855887524, "grad_norm": 0.21507132053375244, "learning_rate": 2.7343878727961776e-05, "loss": 0.1858, "step": 3340 }, { "epoch": 0.27680140597539543, "grad_norm": 0.7203060388565063, "learning_rate": 2.7508650519031144e-05, "loss": 0.1908, "step": 3360 }, { "epoch": 0.2784490333919156, "grad_norm": 0.8007901906967163, "learning_rate": 2.7673422310100512e-05, "loss": 0.1793, "step": 3380 }, { "epoch": 0.28009666080843587, "grad_norm": 3.210064649581909, "learning_rate": 2.783819410116988e-05, "loss": 0.206, "step": 3400 }, { "epoch": 0.28174428822495606, "grad_norm": 0.8950255513191223, "learning_rate": 2.8002965892239252e-05, "loss": 0.1531, "step": 3420 }, { "epoch": 0.28339191564147626, "grad_norm": 0.4942973256111145, "learning_rate": 2.816773768330862e-05, "loss": 0.1916, "step": 3440 }, { "epoch": 0.2850395430579965, "grad_norm": 0.31426137685775757, "learning_rate": 2.8332509474377988e-05, "loss": 0.2013, "step": 3460 }, { "epoch": 0.2866871704745167, "grad_norm": 0.47154414653778076, "learning_rate": 2.8497281265447356e-05, "loss": 0.1831, "step": 3480 }, { "epoch": 0.2883347978910369, "grad_norm": 0.8456603288650513, "learning_rate": 2.8662053056516724e-05, "loss": 0.2043, "step": 3500 }, { "epoch": 0.28998242530755713, "grad_norm": 0.29031482338905334, "learning_rate": 2.8826824847586092e-05, "loss": 0.1607, "step": 3520 }, { "epoch": 0.2916300527240773, "grad_norm": 0.3170378804206848, "learning_rate": 2.8991596638655467e-05, "loss": 0.1916, "step": 3540 }, { "epoch": 0.2932776801405975, "grad_norm": 0.3800877332687378, "learning_rate": 2.9156368429724836e-05, "loss": 0.2051, "step": 3560 }, { "epoch": 0.29492530755711777, "grad_norm": 0.5847609639167786, "learning_rate": 2.9321140220794204e-05, "loss": 0.196, "step": 3580 }, { "epoch": 0.29657293497363796, "grad_norm": 1.0933667421340942, "learning_rate": 2.9485912011863572e-05, "loss": 0.2154, "step": 3600 }, { "epoch": 0.29822056239015815, "grad_norm": 4.349573135375977, "learning_rate": 2.9650683802932937e-05, "loss": 0.1606, "step": 3620 }, { "epoch": 0.2998681898066784, "grad_norm": 0.4264489710330963, "learning_rate": 2.9815455594002305e-05, "loss": 0.2117, "step": 3640 }, { "epoch": 0.3015158172231986, "grad_norm": 0.47935691475868225, "learning_rate": 2.998022738507168e-05, "loss": 0.1901, "step": 3660 }, { "epoch": 0.3031634446397188, "grad_norm": 0.7258153557777405, "learning_rate": 3.0144999176141048e-05, "loss": 0.189, "step": 3680 }, { "epoch": 0.30481107205623903, "grad_norm": 2.0093941688537598, "learning_rate": 3.0309770967210416e-05, "loss": 0.2104, "step": 3700 }, { "epoch": 0.3064586994727592, "grad_norm": 1.1718699932098389, "learning_rate": 3.0474542758279784e-05, "loss": 0.1577, "step": 3720 }, { "epoch": 0.3081063268892794, "grad_norm": 0.19388867914676666, "learning_rate": 3.063931454934915e-05, "loss": 0.2011, "step": 3740 }, { "epoch": 0.30975395430579966, "grad_norm": 0.2112320065498352, "learning_rate": 3.080408634041852e-05, "loss": 0.2077, "step": 3760 }, { "epoch": 0.31140158172231985, "grad_norm": 1.9554697275161743, "learning_rate": 3.096885813148789e-05, "loss": 0.1861, "step": 3780 }, { "epoch": 0.31304920913884005, "grad_norm": 0.7065563201904297, "learning_rate": 3.113362992255726e-05, "loss": 0.2071, "step": 3800 }, { "epoch": 0.3146968365553603, "grad_norm": 0.4599238634109497, "learning_rate": 3.129840171362663e-05, "loss": 0.1655, "step": 3820 }, { "epoch": 0.3163444639718805, "grad_norm": 0.5441445708274841, "learning_rate": 3.1463173504695996e-05, "loss": 0.1971, "step": 3840 }, { "epoch": 0.3179920913884007, "grad_norm": 1.1993862390518188, "learning_rate": 3.1627945295765364e-05, "loss": 0.2048, "step": 3860 }, { "epoch": 0.3196397188049209, "grad_norm": 0.3095191717147827, "learning_rate": 3.179271708683473e-05, "loss": 0.2009, "step": 3880 }, { "epoch": 0.3212873462214411, "grad_norm": 1.0743999481201172, "learning_rate": 3.195748887790411e-05, "loss": 0.2371, "step": 3900 }, { "epoch": 0.3229349736379613, "grad_norm": 0.5000220537185669, "learning_rate": 3.2122260668973475e-05, "loss": 0.1722, "step": 3920 }, { "epoch": 0.32458260105448156, "grad_norm": 1.1417018175125122, "learning_rate": 3.2287032460042844e-05, "loss": 0.2079, "step": 3940 }, { "epoch": 0.32623022847100175, "grad_norm": 1.099433422088623, "learning_rate": 3.245180425111221e-05, "loss": 0.2009, "step": 3960 }, { "epoch": 0.32787785588752194, "grad_norm": 0.3827146589756012, "learning_rate": 3.261657604218158e-05, "loss": 0.2171, "step": 3980 }, { "epoch": 0.3295254833040422, "grad_norm": 1.1845418214797974, "learning_rate": 3.278134783325095e-05, "loss": 0.2195, "step": 4000 }, { "epoch": 0.3295254833040422, "eval_loss": 0.5868579149246216, "eval_runtime": 260.92, "eval_samples_per_second": 83.508, "eval_steps_per_second": 20.88, "eval_wer": 0.2350240606008012, "step": 4000 }, { "epoch": 0.3311731107205624, "grad_norm": 1.6073412895202637, "learning_rate": 3.2946119624320316e-05, "loss": 0.1695, "step": 4020 }, { "epoch": 0.33282073813708263, "grad_norm": 2.9597036838531494, "learning_rate": 3.311089141538969e-05, "loss": 0.2484, "step": 4040 }, { "epoch": 0.3344683655536028, "grad_norm": 1.4150543212890625, "learning_rate": 3.327566320645906e-05, "loss": 0.2087, "step": 4060 }, { "epoch": 0.336115992970123, "grad_norm": 0.255397230386734, "learning_rate": 3.344043499752843e-05, "loss": 0.2048, "step": 4080 }, { "epoch": 0.33776362038664326, "grad_norm": 1.9232691526412964, "learning_rate": 3.3605206788597795e-05, "loss": 0.2274, "step": 4100 }, { "epoch": 0.33941124780316345, "grad_norm": 1.0172194242477417, "learning_rate": 3.3769978579667164e-05, "loss": 0.1779, "step": 4120 }, { "epoch": 0.34105887521968364, "grad_norm": 1.3862395286560059, "learning_rate": 3.393475037073653e-05, "loss": 0.2095, "step": 4140 }, { "epoch": 0.3427065026362039, "grad_norm": 0.3353387117385864, "learning_rate": 3.40995221618059e-05, "loss": 0.2021, "step": 4160 }, { "epoch": 0.3443541300527241, "grad_norm": 0.9549083709716797, "learning_rate": 3.426429395287527e-05, "loss": 0.1997, "step": 4180 }, { "epoch": 0.3460017574692443, "grad_norm": 1.6077580451965332, "learning_rate": 3.4429065743944636e-05, "loss": 0.2562, "step": 4200 }, { "epoch": 0.3476493848857645, "grad_norm": 5.387716770172119, "learning_rate": 3.4593837535014004e-05, "loss": 0.1726, "step": 4220 }, { "epoch": 0.3492970123022847, "grad_norm": 0.5455642342567444, "learning_rate": 3.475860932608337e-05, "loss": 0.2342, "step": 4240 }, { "epoch": 0.3509446397188049, "grad_norm": 0.18990729749202728, "learning_rate": 3.492338111715274e-05, "loss": 0.2334, "step": 4260 }, { "epoch": 0.35259226713532515, "grad_norm": 0.4878564476966858, "learning_rate": 3.5088152908222115e-05, "loss": 0.2065, "step": 4280 }, { "epoch": 0.35423989455184535, "grad_norm": 1.3400063514709473, "learning_rate": 3.5252924699291483e-05, "loss": 0.237, "step": 4300 }, { "epoch": 0.35588752196836554, "grad_norm": 0.5822551250457764, "learning_rate": 3.541769649036085e-05, "loss": 0.1787, "step": 4320 }, { "epoch": 0.3575351493848858, "grad_norm": 0.4629223644733429, "learning_rate": 3.558246828143022e-05, "loss": 0.2129, "step": 4340 }, { "epoch": 0.359182776801406, "grad_norm": 1.4195072650909424, "learning_rate": 3.574724007249959e-05, "loss": 0.2215, "step": 4360 }, { "epoch": 0.36083040421792617, "grad_norm": 0.3443647623062134, "learning_rate": 3.5912011863568956e-05, "loss": 0.2002, "step": 4380 }, { "epoch": 0.3624780316344464, "grad_norm": 3.2932443618774414, "learning_rate": 3.607678365463833e-05, "loss": 0.2172, "step": 4400 }, { "epoch": 0.3641256590509666, "grad_norm": 0.4463866055011749, "learning_rate": 3.62415554457077e-05, "loss": 0.1889, "step": 4420 }, { "epoch": 0.3657732864674868, "grad_norm": 0.4873151183128357, "learning_rate": 3.640632723677707e-05, "loss": 0.2139, "step": 4440 }, { "epoch": 0.36742091388400705, "grad_norm": 1.6354761123657227, "learning_rate": 3.6571099027846435e-05, "loss": 0.1986, "step": 4460 }, { "epoch": 0.36906854130052724, "grad_norm": 0.5571808815002441, "learning_rate": 3.6735870818915803e-05, "loss": 0.2047, "step": 4480 }, { "epoch": 0.37071616871704743, "grad_norm": 0.7461993098258972, "learning_rate": 3.690064260998517e-05, "loss": 0.2196, "step": 4500 }, { "epoch": 0.3723637961335677, "grad_norm": 0.2061534970998764, "learning_rate": 3.7065414401054546e-05, "loss": 0.1657, "step": 4520 }, { "epoch": 0.3740114235500879, "grad_norm": 1.0977954864501953, "learning_rate": 3.7230186192123915e-05, "loss": 0.2062, "step": 4540 }, { "epoch": 0.37565905096660807, "grad_norm": 2.38232684135437, "learning_rate": 3.739495798319328e-05, "loss": 0.2319, "step": 4560 }, { "epoch": 0.3773066783831283, "grad_norm": 0.6182531118392944, "learning_rate": 3.755972977426265e-05, "loss": 0.202, "step": 4580 }, { "epoch": 0.3789543057996485, "grad_norm": 28.744009017944336, "learning_rate": 3.772450156533201e-05, "loss": 0.2235, "step": 4600 }, { "epoch": 0.3806019332161687, "grad_norm": 0.5899057984352112, "learning_rate": 3.788927335640138e-05, "loss": 0.1577, "step": 4620 }, { "epoch": 0.38224956063268895, "grad_norm": 0.5423290133476257, "learning_rate": 3.8054045147470755e-05, "loss": 0.2098, "step": 4640 }, { "epoch": 0.38389718804920914, "grad_norm": 0.23139849305152893, "learning_rate": 3.8218816938540123e-05, "loss": 0.1989, "step": 4660 }, { "epoch": 0.38554481546572933, "grad_norm": 0.3539600670337677, "learning_rate": 3.838358872960949e-05, "loss": 0.1968, "step": 4680 }, { "epoch": 0.3871924428822496, "grad_norm": 0.7127693295478821, "learning_rate": 3.854836052067886e-05, "loss": 0.2225, "step": 4700 }, { "epoch": 0.38884007029876977, "grad_norm": 0.3489459753036499, "learning_rate": 3.871313231174823e-05, "loss": 0.1606, "step": 4720 }, { "epoch": 0.39048769771528996, "grad_norm": 0.27798184752464294, "learning_rate": 3.886966551326413e-05, "loss": 0.2096, "step": 4740 }, { "epoch": 0.3921353251318102, "grad_norm": 0.4229481816291809, "learning_rate": 3.90344373043335e-05, "loss": 0.2195, "step": 4760 }, { "epoch": 0.3937829525483304, "grad_norm": 0.25523656606674194, "learning_rate": 3.9199209095402866e-05, "loss": 0.2106, "step": 4780 }, { "epoch": 0.3954305799648506, "grad_norm": 0.9676795601844788, "learning_rate": 3.936398088647224e-05, "loss": 0.2267, "step": 4800 }, { "epoch": 0.39707820738137084, "grad_norm": 0.40875479578971863, "learning_rate": 3.952875267754161e-05, "loss": 0.155, "step": 4820 }, { "epoch": 0.39872583479789103, "grad_norm": 0.335033655166626, "learning_rate": 3.969352446861098e-05, "loss": 0.2081, "step": 4840 }, { "epoch": 0.4003734622144112, "grad_norm": 0.3303823173046112, "learning_rate": 3.9858296259680345e-05, "loss": 0.2282, "step": 4860 }, { "epoch": 0.40202108963093147, "grad_norm": 0.20934663712978363, "learning_rate": 4.0023068050749714e-05, "loss": 0.1965, "step": 4880 }, { "epoch": 0.40366871704745166, "grad_norm": 1.4093564748764038, "learning_rate": 4.018783984181908e-05, "loss": 0.22, "step": 4900 }, { "epoch": 0.40531634446397186, "grad_norm": 0.4276560842990875, "learning_rate": 4.035261163288846e-05, "loss": 0.1705, "step": 4920 }, { "epoch": 0.4069639718804921, "grad_norm": 0.4243983030319214, "learning_rate": 4.0517383423957825e-05, "loss": 0.2285, "step": 4940 }, { "epoch": 0.4086115992970123, "grad_norm": 0.16951896250247955, "learning_rate": 4.068215521502719e-05, "loss": 0.2234, "step": 4960 }, { "epoch": 0.4102592267135325, "grad_norm": 0.30336254835128784, "learning_rate": 4.084692700609656e-05, "loss": 0.2152, "step": 4980 }, { "epoch": 0.41190685413005274, "grad_norm": 1.0761586427688599, "learning_rate": 4.101169879716593e-05, "loss": 0.2093, "step": 5000 }, { "epoch": 0.4135544815465729, "grad_norm": 0.7221740484237671, "learning_rate": 4.11764705882353e-05, "loss": 0.1538, "step": 5020 }, { "epoch": 0.4152021089630931, "grad_norm": 0.4801746904850006, "learning_rate": 4.1341242379304665e-05, "loss": 0.1911, "step": 5040 }, { "epoch": 0.41684973637961337, "grad_norm": 0.311234712600708, "learning_rate": 4.1506014170374034e-05, "loss": 0.2015, "step": 5060 }, { "epoch": 0.41849736379613356, "grad_norm": 0.6403760313987732, "learning_rate": 4.16707859614434e-05, "loss": 0.2014, "step": 5080 }, { "epoch": 0.42014499121265375, "grad_norm": 1.2653217315673828, "learning_rate": 4.183555775251277e-05, "loss": 0.2388, "step": 5100 }, { "epoch": 0.421792618629174, "grad_norm": 0.5536401867866516, "learning_rate": 4.200032954358214e-05, "loss": 0.1553, "step": 5120 }, { "epoch": 0.4234402460456942, "grad_norm": 0.4605076014995575, "learning_rate": 4.2165101334651506e-05, "loss": 0.2105, "step": 5140 }, { "epoch": 0.42508787346221444, "grad_norm": 0.28758004307746887, "learning_rate": 4.232987312572088e-05, "loss": 0.2067, "step": 5160 }, { "epoch": 0.42673550087873463, "grad_norm": 0.36655622720718384, "learning_rate": 4.249464491679025e-05, "loss": 0.1976, "step": 5180 }, { "epoch": 0.4283831282952548, "grad_norm": 0.9053062796592712, "learning_rate": 4.265941670785962e-05, "loss": 0.2102, "step": 5200 }, { "epoch": 0.43003075571177507, "grad_norm": 0.5088081359863281, "learning_rate": 4.2824188498928985e-05, "loss": 0.1639, "step": 5220 }, { "epoch": 0.43167838312829526, "grad_norm": 0.7981218695640564, "learning_rate": 4.2988960289998354e-05, "loss": 0.2024, "step": 5240 }, { "epoch": 0.43332601054481545, "grad_norm": 0.7993011474609375, "learning_rate": 4.315373208106772e-05, "loss": 0.204, "step": 5260 }, { "epoch": 0.4349736379613357, "grad_norm": 0.5183406472206116, "learning_rate": 4.331850387213709e-05, "loss": 0.2041, "step": 5280 }, { "epoch": 0.4366212653778559, "grad_norm": 1.2263312339782715, "learning_rate": 4.3483275663206465e-05, "loss": 0.2182, "step": 5300 }, { "epoch": 0.4382688927943761, "grad_norm": 0.7018775343894958, "learning_rate": 4.364804745427583e-05, "loss": 0.1685, "step": 5320 }, { "epoch": 0.43991652021089633, "grad_norm": 0.7381752133369446, "learning_rate": 4.38128192453452e-05, "loss": 0.2073, "step": 5340 }, { "epoch": 0.4415641476274165, "grad_norm": 0.4658122956752777, "learning_rate": 4.397759103641457e-05, "loss": 0.2031, "step": 5360 }, { "epoch": 0.4432117750439367, "grad_norm": 0.34789761900901794, "learning_rate": 4.414236282748394e-05, "loss": 0.2023, "step": 5380 }, { "epoch": 0.44485940246045697, "grad_norm": 0.9787063598632812, "learning_rate": 4.4307134618553305e-05, "loss": 0.2264, "step": 5400 }, { "epoch": 0.44650702987697716, "grad_norm": 0.3786025047302246, "learning_rate": 4.4471906409622673e-05, "loss": 0.1656, "step": 5420 }, { "epoch": 0.44815465729349735, "grad_norm": 0.4397692084312439, "learning_rate": 4.463667820069204e-05, "loss": 0.2023, "step": 5440 }, { "epoch": 0.4498022847100176, "grad_norm": 0.20323756337165833, "learning_rate": 4.480144999176141e-05, "loss": 0.2146, "step": 5460 }, { "epoch": 0.4514499121265378, "grad_norm": 0.2108180820941925, "learning_rate": 4.496622178283078e-05, "loss": 0.2012, "step": 5480 }, { "epoch": 0.453097539543058, "grad_norm": 1.6603738069534302, "learning_rate": 4.5130993573900146e-05, "loss": 0.2257, "step": 5500 }, { "epoch": 0.45474516695957823, "grad_norm": 1.6646907329559326, "learning_rate": 4.5295765364969514e-05, "loss": 0.1636, "step": 5520 }, { "epoch": 0.4563927943760984, "grad_norm": 0.98222416639328, "learning_rate": 4.546053715603889e-05, "loss": 0.2018, "step": 5540 }, { "epoch": 0.4580404217926186, "grad_norm": 0.6065666079521179, "learning_rate": 4.562530894710826e-05, "loss": 0.2351, "step": 5560 }, { "epoch": 0.45968804920913886, "grad_norm": 0.4321737587451935, "learning_rate": 4.5790080738177625e-05, "loss": 0.2139, "step": 5580 }, { "epoch": 0.46133567662565905, "grad_norm": 2.530203342437744, "learning_rate": 4.5954852529246993e-05, "loss": 0.232, "step": 5600 }, { "epoch": 0.46298330404217924, "grad_norm": 0.8252795934677124, "learning_rate": 4.611962432031636e-05, "loss": 0.1792, "step": 5620 }, { "epoch": 0.4646309314586995, "grad_norm": 4.3282880783081055, "learning_rate": 4.628439611138573e-05, "loss": 0.2079, "step": 5640 }, { "epoch": 0.4662785588752197, "grad_norm": 0.2798108756542206, "learning_rate": 4.6449167902455105e-05, "loss": 0.2114, "step": 5660 }, { "epoch": 0.4679261862917399, "grad_norm": 0.1499057412147522, "learning_rate": 4.661393969352447e-05, "loss": 0.2026, "step": 5680 }, { "epoch": 0.4695738137082601, "grad_norm": 0.8664823770523071, "learning_rate": 4.677871148459384e-05, "loss": 0.2208, "step": 5700 }, { "epoch": 0.4712214411247803, "grad_norm": 0.32247450947761536, "learning_rate": 4.694348327566321e-05, "loss": 0.175, "step": 5720 }, { "epoch": 0.4728690685413005, "grad_norm": 0.4217327833175659, "learning_rate": 4.710825506673258e-05, "loss": 0.2207, "step": 5740 }, { "epoch": 0.47451669595782076, "grad_norm": 0.9544996023178101, "learning_rate": 4.7273026857801945e-05, "loss": 0.2269, "step": 5760 }, { "epoch": 0.47616432337434095, "grad_norm": 0.3182899057865143, "learning_rate": 4.743779864887132e-05, "loss": 0.224, "step": 5780 }, { "epoch": 0.47781195079086114, "grad_norm": 0.669552743434906, "learning_rate": 4.760257043994069e-05, "loss": 0.2332, "step": 5800 }, { "epoch": 0.4794595782073814, "grad_norm": 0.5297145247459412, "learning_rate": 4.7767342231010056e-05, "loss": 0.2233, "step": 5820 }, { "epoch": 0.4811072056239016, "grad_norm": 0.5998116731643677, "learning_rate": 4.7932114022079425e-05, "loss": 0.2282, "step": 5840 }, { "epoch": 0.48275483304042177, "grad_norm": 0.4391906261444092, "learning_rate": 4.809688581314879e-05, "loss": 0.2094, "step": 5860 }, { "epoch": 0.484402460456942, "grad_norm": 0.559304416179657, "learning_rate": 4.826165760421816e-05, "loss": 0.2106, "step": 5880 }, { "epoch": 0.4860500878734622, "grad_norm": 2.914066791534424, "learning_rate": 4.842642939528753e-05, "loss": 0.2484, "step": 5900 }, { "epoch": 0.4876977152899824, "grad_norm": 0.29989850521087646, "learning_rate": 4.85912011863569e-05, "loss": 0.1631, "step": 5920 }, { "epoch": 0.48934534270650265, "grad_norm": 2.1414361000061035, "learning_rate": 4.8755972977426265e-05, "loss": 0.2315, "step": 5940 }, { "epoch": 0.49099297012302284, "grad_norm": 0.5668668746948242, "learning_rate": 4.892074476849563e-05, "loss": 0.218, "step": 5960 }, { "epoch": 0.49264059753954303, "grad_norm": 0.34356266260147095, "learning_rate": 4.9077277970011535e-05, "loss": 0.2121, "step": 5980 }, { "epoch": 0.4942882249560633, "grad_norm": 0.7654836773872375, "learning_rate": 4.9242049761080904e-05, "loss": 0.212, "step": 6000 }, { "epoch": 0.4942882249560633, "eval_loss": 0.5638302564620972, "eval_runtime": 264.9088, "eval_samples_per_second": 82.251, "eval_steps_per_second": 20.566, "eval_wer": 0.24026168527782366, "step": 6000 }, { "epoch": 0.4959358523725835, "grad_norm": 1.2249395847320557, "learning_rate": 4.940682155215027e-05, "loss": 0.1786, "step": 6020 }, { "epoch": 0.49758347978910367, "grad_norm": 0.834241509437561, "learning_rate": 4.957159334321964e-05, "loss": 0.2193, "step": 6040 }, { "epoch": 0.4992311072056239, "grad_norm": 1.9017058610916138, "learning_rate": 4.9736365134289015e-05, "loss": 0.2338, "step": 6060 }, { "epoch": 0.5008787346221442, "grad_norm": 3.1669952869415283, "learning_rate": 4.990113692535838e-05, "loss": 0.2245, "step": 6080 }, { "epoch": 0.5025263620386643, "grad_norm": 2.9794270992279053, "learning_rate": 4.9999997353508944e-05, "loss": 0.239, "step": 6100 }, { "epoch": 0.5041739894551845, "grad_norm": 1.2019259929656982, "learning_rate": 4.999996758049098e-05, "loss": 0.1871, "step": 6120 }, { "epoch": 0.5058216168717048, "grad_norm": 2.175334930419922, "learning_rate": 4.999990472638076e-05, "loss": 0.2265, "step": 6140 }, { "epoch": 0.5074692442882249, "grad_norm": 1.4895635843276978, "learning_rate": 4.999980879126146e-05, "loss": 0.2259, "step": 6160 }, { "epoch": 0.5091168717047452, "grad_norm": 3.355076789855957, "learning_rate": 4.9999679775260015e-05, "loss": 0.2316, "step": 6180 }, { "epoch": 0.5107644991212654, "grad_norm": 3.545771598815918, "learning_rate": 4.999951767854715e-05, "loss": 0.2426, "step": 6200 }, { "epoch": 0.5124121265377856, "grad_norm": 2.085190773010254, "learning_rate": 4.999932250133736e-05, "loss": 0.1884, "step": 6220 }, { "epoch": 0.5140597539543058, "grad_norm": 1.6277663707733154, "learning_rate": 4.999909424388892e-05, "loss": 0.2332, "step": 6240 }, { "epoch": 0.5157073813708261, "grad_norm": 3.9234516620635986, "learning_rate": 4.9998832906503856e-05, "loss": 0.2346, "step": 6260 }, { "epoch": 0.5173550087873462, "grad_norm": 1.7027534246444702, "learning_rate": 4.9998538489527984e-05, "loss": 0.2339, "step": 6280 }, { "epoch": 0.5190026362038664, "grad_norm": 2.7195184230804443, "learning_rate": 4.9998210993350895e-05, "loss": 0.2511, "step": 6300 }, { "epoch": 0.5206502636203867, "grad_norm": 0.7267903089523315, "learning_rate": 4.9997850418405945e-05, "loss": 0.1765, "step": 6320 }, { "epoch": 0.5222978910369068, "grad_norm": 0.751308798789978, "learning_rate": 4.999745676517027e-05, "loss": 0.2393, "step": 6340 }, { "epoch": 0.5239455184534271, "grad_norm": 1.0236356258392334, "learning_rate": 4.999703003416476e-05, "loss": 0.2292, "step": 6360 }, { "epoch": 0.5255931458699473, "grad_norm": 0.7693842649459839, "learning_rate": 4.999657022595409e-05, "loss": 0.2285, "step": 6380 }, { "epoch": 0.5272407732864675, "grad_norm": 3.4103684425354004, "learning_rate": 4.999607734114669e-05, "loss": 0.2635, "step": 6400 }, { "epoch": 0.5288884007029877, "grad_norm": 0.5887142419815063, "learning_rate": 4.999555138039478e-05, "loss": 0.1938, "step": 6420 }, { "epoch": 0.530536028119508, "grad_norm": 1.6326848268508911, "learning_rate": 4.999499234439433e-05, "loss": 0.2112, "step": 6440 }, { "epoch": 0.5321836555360281, "grad_norm": 0.4003719985485077, "learning_rate": 4.9994400233885086e-05, "loss": 0.2011, "step": 6460 }, { "epoch": 0.5338312829525483, "grad_norm": 1.0633249282836914, "learning_rate": 4.999377504965055e-05, "loss": 0.2388, "step": 6480 }, { "epoch": 0.5354789103690686, "grad_norm": 3.87857985496521, "learning_rate": 4.999311679251799e-05, "loss": 0.2222, "step": 6500 }, { "epoch": 0.5371265377855887, "grad_norm": 1.1982243061065674, "learning_rate": 4.999242546335845e-05, "loss": 0.1736, "step": 6520 }, { "epoch": 0.538774165202109, "grad_norm": 1.4149129390716553, "learning_rate": 4.999170106308673e-05, "loss": 0.2044, "step": 6540 }, { "epoch": 0.5404217926186292, "grad_norm": 0.9540938138961792, "learning_rate": 4.999094359266138e-05, "loss": 0.2343, "step": 6560 }, { "epoch": 0.5420694200351494, "grad_norm": 0.8150781989097595, "learning_rate": 4.999015305308472e-05, "loss": 0.2027, "step": 6580 }, { "epoch": 0.5437170474516696, "grad_norm": 2.2410573959350586, "learning_rate": 4.998932944540284e-05, "loss": 0.2191, "step": 6600 }, { "epoch": 0.5453646748681898, "grad_norm": 2.924471616744995, "learning_rate": 4.998847277070556e-05, "loss": 0.1776, "step": 6620 }, { "epoch": 0.54701230228471, "grad_norm": 2.5102221965789795, "learning_rate": 4.9987583030126484e-05, "loss": 0.2172, "step": 6640 }, { "epoch": 0.5486599297012302, "grad_norm": 1.2676066160202026, "learning_rate": 4.998666022484295e-05, "loss": 0.2235, "step": 6660 }, { "epoch": 0.5503075571177505, "grad_norm": 1.7459567785263062, "learning_rate": 4.998570435607605e-05, "loss": 0.2114, "step": 6680 }, { "epoch": 0.5519551845342706, "grad_norm": 2.4994170665740967, "learning_rate": 4.998476565684759e-05, "loss": 0.2321, "step": 6700 }, { "epoch": 0.5536028119507909, "grad_norm": 1.436635136604309, "learning_rate": 4.998374531796601e-05, "loss": 0.1715, "step": 6720 }, { "epoch": 0.5552504393673111, "grad_norm": 5.170281410217285, "learning_rate": 4.9982691919458215e-05, "loss": 0.2227, "step": 6740 }, { "epoch": 0.5568980667838312, "grad_norm": 2.329806327819824, "learning_rate": 4.9981605462718097e-05, "loss": 0.2103, "step": 6760 }, { "epoch": 0.5585456942003515, "grad_norm": 0.5105818510055542, "learning_rate": 4.998048594918331e-05, "loss": 0.2297, "step": 6780 }, { "epoch": 0.5601933216168717, "grad_norm": 2.74815034866333, "learning_rate": 4.997933338033525e-05, "loss": 0.2454, "step": 6800 }, { "epoch": 0.5618409490333919, "grad_norm": 1.0433367490768433, "learning_rate": 4.997814775769904e-05, "loss": 0.175, "step": 6820 }, { "epoch": 0.5634885764499121, "grad_norm": 0.5902445316314697, "learning_rate": 4.997692908284356e-05, "loss": 0.2153, "step": 6840 }, { "epoch": 0.5651362038664324, "grad_norm": 0.9332594275474548, "learning_rate": 4.997567735738141e-05, "loss": 0.2246, "step": 6860 }, { "epoch": 0.5667838312829525, "grad_norm": 1.2199161052703857, "learning_rate": 4.9974392582968934e-05, "loss": 0.2281, "step": 6880 }, { "epoch": 0.5684314586994728, "grad_norm": 2.2492709159851074, "learning_rate": 4.9973074761306186e-05, "loss": 0.238, "step": 6900 }, { "epoch": 0.570079086115993, "grad_norm": 9.928802490234375, "learning_rate": 4.997172389413699e-05, "loss": 0.1731, "step": 6920 }, { "epoch": 0.5717267135325131, "grad_norm": 1.4187493324279785, "learning_rate": 4.997033998324886e-05, "loss": 0.2267, "step": 6940 }, { "epoch": 0.5733743409490334, "grad_norm": 1.0871655941009521, "learning_rate": 4.996892303047306e-05, "loss": 0.2478, "step": 6960 }, { "epoch": 0.5750219683655536, "grad_norm": 0.9939442873001099, "learning_rate": 4.996747303768456e-05, "loss": 0.1992, "step": 6980 }, { "epoch": 0.5766695957820738, "grad_norm": 48.66152572631836, "learning_rate": 4.996599000680206e-05, "loss": 0.2341, "step": 7000 }, { "epoch": 0.578317223198594, "grad_norm": 0.8890938758850098, "learning_rate": 4.996447393978797e-05, "loss": 0.1687, "step": 7020 }, { "epoch": 0.5799648506151143, "grad_norm": 0.8341367840766907, "learning_rate": 4.996292483864843e-05, "loss": 0.2175, "step": 7040 }, { "epoch": 0.5816124780316344, "grad_norm": 1.6562838554382324, "learning_rate": 4.996134270543326e-05, "loss": 0.2136, "step": 7060 }, { "epoch": 0.5832601054481547, "grad_norm": 0.6537133455276489, "learning_rate": 4.9959727542236025e-05, "loss": 0.2139, "step": 7080 }, { "epoch": 0.5849077328646749, "grad_norm": 2.101271390914917, "learning_rate": 4.9958079351193976e-05, "loss": 0.2359, "step": 7100 }, { "epoch": 0.586555360281195, "grad_norm": 0.9616307616233826, "learning_rate": 4.995639813448808e-05, "loss": 0.1774, "step": 7120 }, { "epoch": 0.5882029876977153, "grad_norm": 1.0992920398712158, "learning_rate": 4.9954683894343e-05, "loss": 0.1962, "step": 7140 }, { "epoch": 0.5898506151142355, "grad_norm": 0.6329948902130127, "learning_rate": 4.995293663302709e-05, "loss": 0.2145, "step": 7160 }, { "epoch": 0.5914982425307557, "grad_norm": 1.0156137943267822, "learning_rate": 4.9951156352852415e-05, "loss": 0.2161, "step": 7180 }, { "epoch": 0.5931458699472759, "grad_norm": 2.6131973266601562, "learning_rate": 4.994943450511368e-05, "loss": 0.2234, "step": 7200 }, { "epoch": 0.5947934973637962, "grad_norm": 3.4186134338378906, "learning_rate": 4.99475898449799e-05, "loss": 0.171, "step": 7220 }, { "epoch": 0.5964411247803163, "grad_norm": 4.896924018859863, "learning_rate": 4.9945712173062477e-05, "loss": 0.2073, "step": 7240 }, { "epoch": 0.5980887521968365, "grad_norm": 1.2636560201644897, "learning_rate": 4.994380149184601e-05, "loss": 0.2144, "step": 7260 }, { "epoch": 0.5997363796133568, "grad_norm": 1.4861242771148682, "learning_rate": 4.99418578038588e-05, "loss": 0.2024, "step": 7280 }, { "epoch": 0.6013840070298769, "grad_norm": 6.527903079986572, "learning_rate": 4.993988111167284e-05, "loss": 0.2172, "step": 7300 }, { "epoch": 0.6030316344463972, "grad_norm": 1.9219621419906616, "learning_rate": 4.993787141790375e-05, "loss": 0.1865, "step": 7320 }, { "epoch": 0.6046792618629174, "grad_norm": 0.7625167369842529, "learning_rate": 4.9935828725210874e-05, "loss": 0.2238, "step": 7340 }, { "epoch": 0.6063268892794376, "grad_norm": 1.3375577926635742, "learning_rate": 4.9933753036297196e-05, "loss": 0.1878, "step": 7360 }, { "epoch": 0.6079745166959578, "grad_norm": 1.1067700386047363, "learning_rate": 4.993164435390935e-05, "loss": 0.2111, "step": 7380 }, { "epoch": 0.6096221441124781, "grad_norm": 4.094808578491211, "learning_rate": 4.992950268083764e-05, "loss": 0.2073, "step": 7400 }, { "epoch": 0.6112697715289982, "grad_norm": 2.683678150177002, "learning_rate": 4.992732801991602e-05, "loss": 0.172, "step": 7420 }, { "epoch": 0.6129173989455184, "grad_norm": 0.9752517342567444, "learning_rate": 4.992512037402212e-05, "loss": 0.1924, "step": 7440 }, { "epoch": 0.6145650263620387, "grad_norm": 0.8463476896286011, "learning_rate": 4.9922879746077176e-05, "loss": 0.2021, "step": 7460 }, { "epoch": 0.6162126537785588, "grad_norm": 3.053812026977539, "learning_rate": 4.992060613904611e-05, "loss": 0.2086, "step": 7480 }, { "epoch": 0.6178602811950791, "grad_norm": 2.9483275413513184, "learning_rate": 4.991829955593744e-05, "loss": 0.2154, "step": 7500 }, { "epoch": 0.6195079086115993, "grad_norm": 1.3794467449188232, "learning_rate": 4.9915959999803365e-05, "loss": 0.1599, "step": 7520 }, { "epoch": 0.6211555360281195, "grad_norm": 1.0529364347457886, "learning_rate": 4.9913587473739666e-05, "loss": 0.1891, "step": 7540 }, { "epoch": 0.6228031634446397, "grad_norm": 0.9026776552200317, "learning_rate": 4.991118198088579e-05, "loss": 0.2074, "step": 7560 }, { "epoch": 0.62445079086116, "grad_norm": 0.4504956603050232, "learning_rate": 4.9908743524424806e-05, "loss": 0.1931, "step": 7580 }, { "epoch": 0.6260984182776801, "grad_norm": 4.0763726234436035, "learning_rate": 4.9906272107583366e-05, "loss": 0.2192, "step": 7600 }, { "epoch": 0.6277460456942003, "grad_norm": 1.012601375579834, "learning_rate": 4.990376773363178e-05, "loss": 0.1604, "step": 7620 }, { "epoch": 0.6293936731107206, "grad_norm": 0.6183291673660278, "learning_rate": 4.990123040588395e-05, "loss": 0.2015, "step": 7640 }, { "epoch": 0.6310413005272407, "grad_norm": 0.718015730381012, "learning_rate": 4.989866012769736e-05, "loss": 0.2155, "step": 7660 }, { "epoch": 0.632688927943761, "grad_norm": 1.5611259937286377, "learning_rate": 4.989605690247315e-05, "loss": 0.1975, "step": 7680 }, { "epoch": 0.6343365553602812, "grad_norm": 3.2936527729034424, "learning_rate": 4.9893420733656e-05, "loss": 0.2305, "step": 7700 }, { "epoch": 0.6359841827768014, "grad_norm": 1.3771491050720215, "learning_rate": 4.9890751624734225e-05, "loss": 0.1575, "step": 7720 }, { "epoch": 0.6376318101933216, "grad_norm": 1.0577577352523804, "learning_rate": 4.98880495792397e-05, "loss": 0.2045, "step": 7740 }, { "epoch": 0.6392794376098418, "grad_norm": 3.6020660400390625, "learning_rate": 4.988531460074791e-05, "loss": 0.2034, "step": 7760 }, { "epoch": 0.640927065026362, "grad_norm": 0.7804440855979919, "learning_rate": 4.9882546692877885e-05, "loss": 0.1953, "step": 7780 }, { "epoch": 0.6425746924428822, "grad_norm": 57.011390686035156, "learning_rate": 4.987974585929226e-05, "loss": 0.2207, "step": 7800 }, { "epoch": 0.6442223198594025, "grad_norm": 1.5725603103637695, "learning_rate": 4.987691210369721e-05, "loss": 0.1664, "step": 7820 }, { "epoch": 0.6458699472759226, "grad_norm": 0.6347635984420776, "learning_rate": 4.98740454298425e-05, "loss": 0.2134, "step": 7840 }, { "epoch": 0.6475175746924429, "grad_norm": 1.3581891059875488, "learning_rate": 4.987114584152145e-05, "loss": 0.217, "step": 7860 }, { "epoch": 0.6491652021089631, "grad_norm": 5.659873008728027, "learning_rate": 4.986821334257091e-05, "loss": 0.1977, "step": 7880 }, { "epoch": 0.6508128295254832, "grad_norm": 11.7850980758667, "learning_rate": 4.986524793687131e-05, "loss": 0.218, "step": 7900 }, { "epoch": 0.6524604569420035, "grad_norm": 1.2856826782226562, "learning_rate": 4.986224962834659e-05, "loss": 0.1675, "step": 7920 }, { "epoch": 0.6541080843585237, "grad_norm": 1.055503249168396, "learning_rate": 4.985921842096427e-05, "loss": 0.2113, "step": 7940 }, { "epoch": 0.6557557117750439, "grad_norm": 1.5587449073791504, "learning_rate": 4.9856154318735374e-05, "loss": 0.1939, "step": 7960 }, { "epoch": 0.6574033391915641, "grad_norm": 2.40641450881958, "learning_rate": 4.985305732571446e-05, "loss": 0.206, "step": 7980 }, { "epoch": 0.6590509666080844, "grad_norm": 4.06686544418335, "learning_rate": 4.98499274459996e-05, "loss": 0.2107, "step": 8000 }, { "epoch": 0.6590509666080844, "eval_loss": 0.5655311346054077, "eval_runtime": 835.4807, "eval_samples_per_second": 26.08, "eval_steps_per_second": 6.521, "eval_wer": 0.2457764477651215, "step": 8000 }, { "epoch": 0.6606985940246046, "grad_norm": 0.8585990071296692, "learning_rate": 4.984676468373241e-05, "loss": 0.1667, "step": 8020 }, { "epoch": 0.6623462214411248, "grad_norm": 1.2851459980010986, "learning_rate": 4.984356904309799e-05, "loss": 0.2184, "step": 8040 }, { "epoch": 0.663993848857645, "grad_norm": 0.8445897698402405, "learning_rate": 4.984034052832496e-05, "loss": 0.2169, "step": 8060 }, { "epoch": 0.6656414762741653, "grad_norm": 1.4265161752700806, "learning_rate": 4.983707914368544e-05, "loss": 0.2004, "step": 8080 }, { "epoch": 0.6672891036906854, "grad_norm": 3.0620169639587402, "learning_rate": 4.983378489349504e-05, "loss": 0.2467, "step": 8100 }, { "epoch": 0.6689367311072056, "grad_norm": 1.1079747676849365, "learning_rate": 4.983045778211286e-05, "loss": 0.1587, "step": 8120 }, { "epoch": 0.6705843585237259, "grad_norm": 0.8565286993980408, "learning_rate": 4.982709781394148e-05, "loss": 0.2101, "step": 8140 }, { "epoch": 0.672231985940246, "grad_norm": 2.957345962524414, "learning_rate": 4.982370499342698e-05, "loss": 0.212, "step": 8160 }, { "epoch": 0.6738796133567663, "grad_norm": 1.744805932044983, "learning_rate": 4.982027932505887e-05, "loss": 0.209, "step": 8180 }, { "epoch": 0.6755272407732865, "grad_norm": 3.963977336883545, "learning_rate": 4.9816820813370166e-05, "loss": 0.214, "step": 8200 }, { "epoch": 0.6771748681898067, "grad_norm": 1.3982582092285156, "learning_rate": 4.981332946293733e-05, "loss": 0.1671, "step": 8220 }, { "epoch": 0.6788224956063269, "grad_norm": 0.5959907174110413, "learning_rate": 4.9809805278380264e-05, "loss": 0.2061, "step": 8240 }, { "epoch": 0.6804701230228472, "grad_norm": 1.2237777709960938, "learning_rate": 4.980624826436233e-05, "loss": 0.2188, "step": 8260 }, { "epoch": 0.6821177504393673, "grad_norm": 2.6652753353118896, "learning_rate": 4.9802658425590344e-05, "loss": 0.1964, "step": 8280 }, { "epoch": 0.6837653778558875, "grad_norm": 31.890893936157227, "learning_rate": 4.979903576681453e-05, "loss": 0.2133, "step": 8300 }, { "epoch": 0.6854130052724078, "grad_norm": 1.1191130876541138, "learning_rate": 4.979538029282855e-05, "loss": 0.1623, "step": 8320 }, { "epoch": 0.6870606326889279, "grad_norm": 1.4404329061508179, "learning_rate": 4.9791692008469514e-05, "loss": 0.2087, "step": 8340 }, { "epoch": 0.6887082601054482, "grad_norm": 1.4656809568405151, "learning_rate": 4.9787970918617914e-05, "loss": 0.2134, "step": 8360 }, { "epoch": 0.6903558875219684, "grad_norm": 2.310316801071167, "learning_rate": 4.978421702819767e-05, "loss": 0.215, "step": 8380 }, { "epoch": 0.6920035149384886, "grad_norm": 21.141889572143555, "learning_rate": 4.978043034217609e-05, "loss": 0.2206, "step": 8400 }, { "epoch": 0.6936511423550088, "grad_norm": 1.2799972295761108, "learning_rate": 4.977680261809319e-05, "loss": 0.1553, "step": 8420 }, { "epoch": 0.695298769771529, "grad_norm": 0.927029550075531, "learning_rate": 4.97729519951006e-05, "loss": 0.2205, "step": 8440 }, { "epoch": 0.6969463971880492, "grad_norm": 1.8685028553009033, "learning_rate": 4.976906859141309e-05, "loss": 0.1938, "step": 8460 }, { "epoch": 0.6985940246045694, "grad_norm": 0.38425010442733765, "learning_rate": 4.976515241216936e-05, "loss": 0.1964, "step": 8480 }, { "epoch": 0.7002416520210897, "grad_norm": 2.6303980350494385, "learning_rate": 4.976120346255146e-05, "loss": 0.1984, "step": 8500 }, { "epoch": 0.7018892794376098, "grad_norm": 7.791947841644287, "learning_rate": 4.975722174778482e-05, "loss": 0.1678, "step": 8520 }, { "epoch": 0.7035369068541301, "grad_norm": 1.193328857421875, "learning_rate": 4.9753207273138245e-05, "loss": 0.2182, "step": 8540 }, { "epoch": 0.7051845342706503, "grad_norm": 1.1064016819000244, "learning_rate": 4.974916004392385e-05, "loss": 0.2065, "step": 8560 }, { "epoch": 0.7068321616871704, "grad_norm": 3.304832696914673, "learning_rate": 4.974508006549711e-05, "loss": 0.1872, "step": 8580 }, { "epoch": 0.7084797891036907, "grad_norm": 2.5478882789611816, "learning_rate": 4.974096734325686e-05, "loss": 0.2295, "step": 8600 }, { "epoch": 0.7101274165202109, "grad_norm": 14.596063613891602, "learning_rate": 4.9736821882645226e-05, "loss": 0.1628, "step": 8620 }, { "epoch": 0.7117750439367311, "grad_norm": 0.9999619126319885, "learning_rate": 4.973264368914766e-05, "loss": 0.2107, "step": 8640 }, { "epoch": 0.7134226713532513, "grad_norm": 1.053412675857544, "learning_rate": 4.972843276829296e-05, "loss": 0.2085, "step": 8660 }, { "epoch": 0.7150702987697716, "grad_norm": 2.1676464080810547, "learning_rate": 4.9724189125653195e-05, "loss": 0.2048, "step": 8680 }, { "epoch": 0.7167179261862917, "grad_norm": 10.32970142364502, "learning_rate": 4.9719912766843746e-05, "loss": 0.2224, "step": 8700 }, { "epoch": 0.718365553602812, "grad_norm": 1.9829623699188232, "learning_rate": 4.971560369752328e-05, "loss": 0.1686, "step": 8720 }, { "epoch": 0.7200131810193322, "grad_norm": 2.7383005619049072, "learning_rate": 4.971126192339377e-05, "loss": 0.2088, "step": 8740 }, { "epoch": 0.7216608084358523, "grad_norm": 0.6413734555244446, "learning_rate": 4.970688745020043e-05, "loss": 0.2111, "step": 8760 }, { "epoch": 0.7233084358523726, "grad_norm": 0.8469798564910889, "learning_rate": 4.970248028373178e-05, "loss": 0.2057, "step": 8780 }, { "epoch": 0.7249560632688928, "grad_norm": 8.44117259979248, "learning_rate": 4.969804042981956e-05, "loss": 0.2179, "step": 8800 }, { "epoch": 0.726603690685413, "grad_norm": 1.40958571434021, "learning_rate": 4.969356789433881e-05, "loss": 0.1604, "step": 8820 }, { "epoch": 0.7282513181019332, "grad_norm": 5.328885078430176, "learning_rate": 4.968906268320777e-05, "loss": 0.2075, "step": 8840 }, { "epoch": 0.7298989455184535, "grad_norm": 0.6396345496177673, "learning_rate": 4.9684524802387956e-05, "loss": 0.2057, "step": 8860 }, { "epoch": 0.7315465729349736, "grad_norm": 0.7661327123641968, "learning_rate": 4.967995425788409e-05, "loss": 0.2064, "step": 8880 }, { "epoch": 0.7331942003514939, "grad_norm": 2.8300130367279053, "learning_rate": 4.9675351055744134e-05, "loss": 0.2109, "step": 8900 }, { "epoch": 0.7348418277680141, "grad_norm": 1.3380045890808105, "learning_rate": 4.9670715202059235e-05, "loss": 0.1492, "step": 8920 }, { "epoch": 0.7364894551845342, "grad_norm": 1.478825569152832, "learning_rate": 4.9666046702963784e-05, "loss": 0.2133, "step": 8940 }, { "epoch": 0.7381370826010545, "grad_norm": 1.3596333265304565, "learning_rate": 4.9661345564635356e-05, "loss": 0.1999, "step": 8960 }, { "epoch": 0.7397847100175747, "grad_norm": 0.3950871229171753, "learning_rate": 4.965661179329468e-05, "loss": 0.1925, "step": 8980 }, { "epoch": 0.7414323374340949, "grad_norm": 3.47658634185791, "learning_rate": 4.965184539520572e-05, "loss": 0.2016, "step": 9000 }, { "epoch": 0.7430799648506151, "grad_norm": 1.6241281032562256, "learning_rate": 4.9647046376675586e-05, "loss": 0.1522, "step": 9020 }, { "epoch": 0.7447275922671354, "grad_norm": 0.582032322883606, "learning_rate": 4.964221474405456e-05, "loss": 0.2161, "step": 9040 }, { "epoch": 0.7463752196836555, "grad_norm": 1.1760663986206055, "learning_rate": 4.963735050373608e-05, "loss": 0.1986, "step": 9060 }, { "epoch": 0.7480228471001757, "grad_norm": 1.9206633567810059, "learning_rate": 4.963245366215672e-05, "loss": 0.195, "step": 9080 }, { "epoch": 0.749670474516696, "grad_norm": 2.0300755500793457, "learning_rate": 4.9627524225796206e-05, "loss": 0.2044, "step": 9100 }, { "epoch": 0.7513181019332161, "grad_norm": 0.91159987449646, "learning_rate": 4.962256220117739e-05, "loss": 0.156, "step": 9120 }, { "epoch": 0.7529657293497364, "grad_norm": 0.6553214192390442, "learning_rate": 4.961756759486625e-05, "loss": 0.1918, "step": 9140 }, { "epoch": 0.7546133567662566, "grad_norm": 2.1197195053100586, "learning_rate": 4.961254041347189e-05, "loss": 0.1942, "step": 9160 }, { "epoch": 0.7562609841827768, "grad_norm": 0.5275347232818604, "learning_rate": 4.9607480663646487e-05, "loss": 0.1975, "step": 9180 }, { "epoch": 0.757908611599297, "grad_norm": 2.276308536529541, "learning_rate": 4.9602388352085337e-05, "loss": 0.2035, "step": 9200 }, { "epoch": 0.7595562390158173, "grad_norm": 1.4071913957595825, "learning_rate": 4.9597263485526826e-05, "loss": 0.1636, "step": 9220 }, { "epoch": 0.7612038664323374, "grad_norm": 5.9329023361206055, "learning_rate": 4.959210607075239e-05, "loss": 0.1954, "step": 9240 }, { "epoch": 0.7628514938488576, "grad_norm": 1.0558881759643555, "learning_rate": 4.958691611458657e-05, "loss": 0.2089, "step": 9260 }, { "epoch": 0.7644991212653779, "grad_norm": 1.1962648630142212, "learning_rate": 4.958169362389695e-05, "loss": 0.1966, "step": 9280 }, { "epoch": 0.766146748681898, "grad_norm": 27.050457000732422, "learning_rate": 4.957643860559417e-05, "loss": 0.2254, "step": 9300 }, { "epoch": 0.7677943760984183, "grad_norm": 1.5262819528579712, "learning_rate": 4.95711510666319e-05, "loss": 0.1672, "step": 9320 }, { "epoch": 0.7694420035149385, "grad_norm": 1.5432629585266113, "learning_rate": 4.956583101400685e-05, "loss": 0.2201, "step": 9340 }, { "epoch": 0.7710896309314587, "grad_norm": 1.4080497026443481, "learning_rate": 4.956047845475877e-05, "loss": 0.2034, "step": 9360 }, { "epoch": 0.7727372583479789, "grad_norm": 1.6339974403381348, "learning_rate": 4.9555093395970396e-05, "loss": 0.1918, "step": 9380 }, { "epoch": 0.7743848857644992, "grad_norm": 3.812486410140991, "learning_rate": 4.954967584476748e-05, "loss": 0.2221, "step": 9400 }, { "epoch": 0.7760325131810193, "grad_norm": 2.0563764572143555, "learning_rate": 4.954422580831879e-05, "loss": 0.1611, "step": 9420 }, { "epoch": 0.7776801405975395, "grad_norm": 0.7051644325256348, "learning_rate": 4.9538743293836046e-05, "loss": 0.2053, "step": 9440 }, { "epoch": 0.7793277680140598, "grad_norm": 0.837563157081604, "learning_rate": 4.9533228308573966e-05, "loss": 0.2078, "step": 9460 }, { "epoch": 0.7809753954305799, "grad_norm": 1.1410398483276367, "learning_rate": 4.952768085983023e-05, "loss": 0.2071, "step": 9480 }, { "epoch": 0.7826230228471002, "grad_norm": 2.372843027114868, "learning_rate": 4.952210095494546e-05, "loss": 0.2154, "step": 9500 }, { "epoch": 0.7842706502636204, "grad_norm": 25.939308166503906, "learning_rate": 4.9516488601303255e-05, "loss": 0.1618, "step": 9520 }, { "epoch": 0.7859182776801406, "grad_norm": 1.780004858970642, "learning_rate": 4.951084380633013e-05, "loss": 0.1993, "step": 9540 }, { "epoch": 0.7875659050966608, "grad_norm": 1.1589267253875732, "learning_rate": 4.9505166577495546e-05, "loss": 0.1939, "step": 9560 }, { "epoch": 0.789213532513181, "grad_norm": 1.0380936861038208, "learning_rate": 4.949945692231185e-05, "loss": 0.1863, "step": 9580 }, { "epoch": 0.7908611599297012, "grad_norm": 3.860309600830078, "learning_rate": 4.949371484833433e-05, "loss": 0.2199, "step": 9600 }, { "epoch": 0.7925087873462214, "grad_norm": 2.6430704593658447, "learning_rate": 4.9487940363161155e-05, "loss": 0.161, "step": 9620 }, { "epoch": 0.7941564147627417, "grad_norm": 2.1502444744110107, "learning_rate": 4.948213347443339e-05, "loss": 0.2101, "step": 9640 }, { "epoch": 0.7958040421792618, "grad_norm": 0.9621193408966064, "learning_rate": 4.9476294189834974e-05, "loss": 0.2105, "step": 9660 }, { "epoch": 0.7974516695957821, "grad_norm": 0.8426349759101868, "learning_rate": 4.9470422517092696e-05, "loss": 0.1821, "step": 9680 }, { "epoch": 0.7990992970123023, "grad_norm": 2.1861345767974854, "learning_rate": 4.9464518463976246e-05, "loss": 0.2157, "step": 9700 }, { "epoch": 0.8007469244288224, "grad_norm": 1.074504017829895, "learning_rate": 4.945858203829812e-05, "loss": 0.1593, "step": 9720 }, { "epoch": 0.8023945518453427, "grad_norm": 1.4774161577224731, "learning_rate": 4.945261324791367e-05, "loss": 0.1981, "step": 9740 }, { "epoch": 0.8040421792618629, "grad_norm": 0.9464648962020874, "learning_rate": 4.944661210072107e-05, "loss": 0.2107, "step": 9760 }, { "epoch": 0.8056898066783831, "grad_norm": 0.34157031774520874, "learning_rate": 4.94405786046613e-05, "loss": 0.209, "step": 9780 }, { "epoch": 0.8073374340949033, "grad_norm": 3.6989963054656982, "learning_rate": 4.943451276771818e-05, "loss": 0.221, "step": 9800 }, { "epoch": 0.8089850615114236, "grad_norm": 0.6388671398162842, "learning_rate": 4.942841459791828e-05, "loss": 0.1661, "step": 9820 }, { "epoch": 0.8106326889279437, "grad_norm": 0.6647291779518127, "learning_rate": 4.9422284103330985e-05, "loss": 0.192, "step": 9840 }, { "epoch": 0.812280316344464, "grad_norm": 0.540134847164154, "learning_rate": 4.941612129206844e-05, "loss": 0.2126, "step": 9860 }, { "epoch": 0.8139279437609842, "grad_norm": 0.5511148571968079, "learning_rate": 4.940992617228556e-05, "loss": 0.2018, "step": 9880 }, { "epoch": 0.8155755711775043, "grad_norm": 2.1634738445281982, "learning_rate": 4.9403698752180006e-05, "loss": 0.2087, "step": 9900 }, { "epoch": 0.8172231985940246, "grad_norm": 1.3764668703079224, "learning_rate": 4.939743903999218e-05, "loss": 0.1561, "step": 9920 }, { "epoch": 0.8188708260105448, "grad_norm": 0.8314900398254395, "learning_rate": 4.939114704400523e-05, "loss": 0.2226, "step": 9940 }, { "epoch": 0.820518453427065, "grad_norm": 1.0714060068130493, "learning_rate": 4.9384822772544994e-05, "loss": 0.191, "step": 9960 }, { "epoch": 0.8221660808435852, "grad_norm": 0.5301602482795715, "learning_rate": 4.937846623398003e-05, "loss": 0.2069, "step": 9980 }, { "epoch": 0.8238137082601055, "grad_norm": 2.4229841232299805, "learning_rate": 4.9372077436721634e-05, "loss": 0.2132, "step": 10000 }, { "epoch": 0.8238137082601055, "eval_loss": 0.5383469462394714, "eval_runtime": 248.9799, "eval_samples_per_second": 87.513, "eval_steps_per_second": 21.881, "eval_wer": 0.23184117484036582, "step": 10000 }, { "epoch": 0.8254613356766256, "grad_norm": 1.1694583892822266, "learning_rate": 4.936565638922372e-05, "loss": 0.1624, "step": 10020 }, { "epoch": 0.8271089630931459, "grad_norm": 2.124119281768799, "learning_rate": 4.9359203099982924e-05, "loss": 0.2062, "step": 10040 }, { "epoch": 0.8287565905096661, "grad_norm": 0.7251669764518738, "learning_rate": 4.935271757753852e-05, "loss": 0.1981, "step": 10060 }, { "epoch": 0.8304042179261862, "grad_norm": 1.2785292863845825, "learning_rate": 4.934619983047246e-05, "loss": 0.2094, "step": 10080 }, { "epoch": 0.8320518453427065, "grad_norm": 3.0615155696868896, "learning_rate": 4.933964986740931e-05, "loss": 0.2159, "step": 10100 }, { "epoch": 0.8336994727592267, "grad_norm": 2.058302402496338, "learning_rate": 4.933306769701629e-05, "loss": 0.1585, "step": 10120 }, { "epoch": 0.8353471001757469, "grad_norm": 1.0357364416122437, "learning_rate": 4.9326453328003217e-05, "loss": 0.187, "step": 10140 }, { "epoch": 0.8369947275922671, "grad_norm": 0.6068626642227173, "learning_rate": 4.931980676912252e-05, "loss": 0.1971, "step": 10160 }, { "epoch": 0.8386423550087874, "grad_norm": 3.377023220062256, "learning_rate": 4.931312802916925e-05, "loss": 0.1999, "step": 10180 }, { "epoch": 0.8402899824253075, "grad_norm": 3.0354530811309814, "learning_rate": 4.9306417116980996e-05, "loss": 0.2008, "step": 10200 }, { "epoch": 0.8419376098418277, "grad_norm": 2.531010150909424, "learning_rate": 4.929967404143796e-05, "loss": 0.1688, "step": 10220 }, { "epoch": 0.843585237258348, "grad_norm": 0.526393711566925, "learning_rate": 4.929289881146286e-05, "loss": 0.1844, "step": 10240 }, { "epoch": 0.8452328646748682, "grad_norm": 0.9323037266731262, "learning_rate": 4.9286091436021015e-05, "loss": 0.1968, "step": 10260 }, { "epoch": 0.8468804920913884, "grad_norm": 1.0275589227676392, "learning_rate": 4.927925192412024e-05, "loss": 0.182, "step": 10280 }, { "epoch": 0.8485281195079086, "grad_norm": 4.738681793212891, "learning_rate": 4.927238028481089e-05, "loss": 0.214, "step": 10300 }, { "epoch": 0.8501757469244289, "grad_norm": 1.584230899810791, "learning_rate": 4.926547652718583e-05, "loss": 0.1582, "step": 10320 }, { "epoch": 0.851823374340949, "grad_norm": 0.9227916598320007, "learning_rate": 4.9258540660380434e-05, "loss": 0.1921, "step": 10340 }, { "epoch": 0.8534710017574693, "grad_norm": 7.2498250007629395, "learning_rate": 4.925157269357254e-05, "loss": 0.2197, "step": 10360 }, { "epoch": 0.8551186291739895, "grad_norm": 0.8496150970458984, "learning_rate": 4.924457263598248e-05, "loss": 0.1798, "step": 10380 }, { "epoch": 0.8567662565905096, "grad_norm": 9.715910911560059, "learning_rate": 4.9237540496873064e-05, "loss": 0.2241, "step": 10400 }, { "epoch": 0.8584138840070299, "grad_norm": 2.707458257675171, "learning_rate": 4.923047628554952e-05, "loss": 0.1581, "step": 10420 }, { "epoch": 0.8600615114235501, "grad_norm": 0.9239126443862915, "learning_rate": 4.9223380011359544e-05, "loss": 0.1964, "step": 10440 }, { "epoch": 0.8617091388400703, "grad_norm": 1.0885734558105469, "learning_rate": 4.9216251683693246e-05, "loss": 0.2139, "step": 10460 }, { "epoch": 0.8633567662565905, "grad_norm": 0.8583139777183533, "learning_rate": 4.920909131198315e-05, "loss": 0.2007, "step": 10480 }, { "epoch": 0.8650043936731108, "grad_norm": 2.6485893726348877, "learning_rate": 4.920189890570419e-05, "loss": 0.2065, "step": 10500 }, { "epoch": 0.8666520210896309, "grad_norm": 1.3636205196380615, "learning_rate": 4.919467447437368e-05, "loss": 0.1583, "step": 10520 }, { "epoch": 0.8682996485061512, "grad_norm": 1.2751201391220093, "learning_rate": 4.918741802755132e-05, "loss": 0.2044, "step": 10540 }, { "epoch": 0.8699472759226714, "grad_norm": 0.8484062552452087, "learning_rate": 4.918012957483916e-05, "loss": 0.192, "step": 10560 }, { "epoch": 0.8715949033391915, "grad_norm": 0.5631062984466553, "learning_rate": 4.917280912588163e-05, "loss": 0.1997, "step": 10580 }, { "epoch": 0.8732425307557118, "grad_norm": 2.789642572402954, "learning_rate": 4.916545669036545e-05, "loss": 0.2309, "step": 10600 }, { "epoch": 0.874890158172232, "grad_norm": 7.149659156799316, "learning_rate": 4.915807227801973e-05, "loss": 0.1559, "step": 10620 }, { "epoch": 0.8765377855887522, "grad_norm": 1.080868124961853, "learning_rate": 4.915065589861584e-05, "loss": 0.2, "step": 10640 }, { "epoch": 0.8781854130052724, "grad_norm": 0.49652764201164246, "learning_rate": 4.914320756196748e-05, "loss": 0.2034, "step": 10660 }, { "epoch": 0.8798330404217927, "grad_norm": 0.9888033270835876, "learning_rate": 4.913572727793062e-05, "loss": 0.1952, "step": 10680 }, { "epoch": 0.8814806678383128, "grad_norm": 13.734339714050293, "learning_rate": 4.9128215056403507e-05, "loss": 0.2042, "step": 10700 }, { "epoch": 0.883128295254833, "grad_norm": 0.7586619257926941, "learning_rate": 4.912067090732667e-05, "loss": 0.1449, "step": 10720 }, { "epoch": 0.8847759226713533, "grad_norm": 0.673252284526825, "learning_rate": 4.911309484068285e-05, "loss": 0.2138, "step": 10740 }, { "epoch": 0.8864235500878734, "grad_norm": 1.2692677974700928, "learning_rate": 4.910548686649706e-05, "loss": 0.1994, "step": 10760 }, { "epoch": 0.8880711775043937, "grad_norm": 0.7501096725463867, "learning_rate": 4.9097846994836505e-05, "loss": 0.1949, "step": 10780 }, { "epoch": 0.8897188049209139, "grad_norm": 3.0218920707702637, "learning_rate": 4.909017523581062e-05, "loss": 0.2137, "step": 10800 }, { "epoch": 0.8913664323374341, "grad_norm": 1.1619638204574585, "learning_rate": 4.9082471599571015e-05, "loss": 0.1509, "step": 10820 }, { "epoch": 0.8930140597539543, "grad_norm": 1.3160550594329834, "learning_rate": 4.907512362815835e-05, "loss": 0.1879, "step": 10840 }, { "epoch": 0.8946616871704746, "grad_norm": 2.0613820552825928, "learning_rate": 4.9067357860710327e-05, "loss": 0.2094, "step": 10860 }, { "epoch": 0.8963093145869947, "grad_norm": 0.6547063589096069, "learning_rate": 4.905956024624158e-05, "loss": 0.1879, "step": 10880 }, { "epoch": 0.897956942003515, "grad_norm": 4.540472507476807, "learning_rate": 4.905173079507026e-05, "loss": 0.2022, "step": 10900 }, { "epoch": 0.8996045694200352, "grad_norm": 2.7642438411712646, "learning_rate": 4.904386951755665e-05, "loss": 0.1566, "step": 10920 }, { "epoch": 0.9012521968365553, "grad_norm": 1.2250425815582275, "learning_rate": 4.903597642410316e-05, "loss": 0.2049, "step": 10940 }, { "epoch": 0.9028998242530756, "grad_norm": 0.6964439749717712, "learning_rate": 4.902805152515427e-05, "loss": 0.2003, "step": 10960 }, { "epoch": 0.9045474516695958, "grad_norm": 0.8881003260612488, "learning_rate": 4.902049342086357e-05, "loss": 0.1969, "step": 10980 }, { "epoch": 0.906195079086116, "grad_norm": 3.828152656555176, "learning_rate": 4.901250653139905e-05, "loss": 0.2199, "step": 11000 }, { "epoch": 0.9078427065026362, "grad_norm": 1.2089974880218506, "learning_rate": 4.900448786749557e-05, "loss": 0.157, "step": 11020 }, { "epoch": 0.9094903339191565, "grad_norm": 1.148004412651062, "learning_rate": 4.8996437439763784e-05, "loss": 0.2013, "step": 11040 }, { "epoch": 0.9111379613356766, "grad_norm": 1.6459652185440063, "learning_rate": 4.89883552588564e-05, "loss": 0.1928, "step": 11060 }, { "epoch": 0.9127855887521968, "grad_norm": 0.7444145083427429, "learning_rate": 4.898024133546811e-05, "loss": 0.1965, "step": 11080 }, { "epoch": 0.9144332161687171, "grad_norm": 7.0522260665893555, "learning_rate": 4.897209568033564e-05, "loss": 0.2181, "step": 11100 }, { "epoch": 0.9160808435852372, "grad_norm": 1.5349088907241821, "learning_rate": 4.896391830423768e-05, "loss": 0.1575, "step": 11120 }, { "epoch": 0.9177284710017575, "grad_norm": 0.5103209018707275, "learning_rate": 4.895570921799491e-05, "loss": 0.2057, "step": 11140 }, { "epoch": 0.9193760984182777, "grad_norm": 0.4984442889690399, "learning_rate": 4.8947468432469955e-05, "loss": 0.1897, "step": 11160 }, { "epoch": 0.9210237258347979, "grad_norm": 0.5621454119682312, "learning_rate": 4.893919595856742e-05, "loss": 0.1912, "step": 11180 }, { "epoch": 0.9226713532513181, "grad_norm": 5.476040363311768, "learning_rate": 4.8930891807233794e-05, "loss": 0.2166, "step": 11200 }, { "epoch": 0.9243189806678384, "grad_norm": 1.3259927034378052, "learning_rate": 4.892255598945753e-05, "loss": 0.1561, "step": 11220 }, { "epoch": 0.9259666080843585, "grad_norm": 0.8870605230331421, "learning_rate": 4.891418851626893e-05, "loss": 0.2003, "step": 11240 }, { "epoch": 0.9276142355008787, "grad_norm": 16.381240844726562, "learning_rate": 4.890578939874025e-05, "loss": 0.1916, "step": 11260 }, { "epoch": 0.929261862917399, "grad_norm": 1.6785776615142822, "learning_rate": 4.889735864798556e-05, "loss": 0.1825, "step": 11280 }, { "epoch": 0.9309094903339191, "grad_norm": 5.951013565063477, "learning_rate": 4.8888896275160816e-05, "loss": 0.2045, "step": 11300 }, { "epoch": 0.9325571177504394, "grad_norm": 3.8519906997680664, "learning_rate": 4.888040229146382e-05, "loss": 0.1573, "step": 11320 }, { "epoch": 0.9342047451669596, "grad_norm": 1.0654627084732056, "learning_rate": 4.887187670813419e-05, "loss": 0.223, "step": 11340 }, { "epoch": 0.9358523725834798, "grad_norm": 1.6851961612701416, "learning_rate": 4.8863319536453364e-05, "loss": 0.2058, "step": 11360 }, { "epoch": 0.9375, "grad_norm": 0.6880635023117065, "learning_rate": 4.88547307877446e-05, "loss": 0.1974, "step": 11380 }, { "epoch": 0.9391476274165202, "grad_norm": 5.7095866203308105, "learning_rate": 4.884611047337289e-05, "loss": 0.2169, "step": 11400 }, { "epoch": 0.9407952548330404, "grad_norm": 8.853272438049316, "learning_rate": 4.883745860474505e-05, "loss": 0.1584, "step": 11420 }, { "epoch": 0.9424428822495606, "grad_norm": 1.7952899932861328, "learning_rate": 4.882877519330961e-05, "loss": 0.1937, "step": 11440 }, { "epoch": 0.9440905096660809, "grad_norm": 5.514512538909912, "learning_rate": 4.882006025055685e-05, "loss": 0.1989, "step": 11460 }, { "epoch": 0.945738137082601, "grad_norm": 0.776798665523529, "learning_rate": 4.88113137880188e-05, "loss": 0.1862, "step": 11480 }, { "epoch": 0.9473857644991213, "grad_norm": 2.824843645095825, "learning_rate": 4.880253581726916e-05, "loss": 0.2168, "step": 11500 }, { "epoch": 0.9490333919156415, "grad_norm": 0.9011817574501038, "learning_rate": 4.879372634992335e-05, "loss": 0.1601, "step": 11520 }, { "epoch": 0.9506810193321616, "grad_norm": 5.290555477142334, "learning_rate": 4.8784885397638445e-05, "loss": 0.1892, "step": 11540 }, { "epoch": 0.9523286467486819, "grad_norm": 1.164330005645752, "learning_rate": 4.87760129721132e-05, "loss": 0.2112, "step": 11560 }, { "epoch": 0.9539762741652021, "grad_norm": 0.8354160189628601, "learning_rate": 4.876710908508801e-05, "loss": 0.2176, "step": 11580 }, { "epoch": 0.9556239015817223, "grad_norm": 3.980334758758545, "learning_rate": 4.8758173748344904e-05, "loss": 0.2484, "step": 11600 }, { "epoch": 0.9572715289982425, "grad_norm": 1.7610282897949219, "learning_rate": 4.874920697370753e-05, "loss": 0.1618, "step": 11620 }, { "epoch": 0.9589191564147628, "grad_norm": 1.3336617946624756, "learning_rate": 4.874020877304113e-05, "loss": 0.1895, "step": 11640 }, { "epoch": 0.9605667838312829, "grad_norm": 0.9175180792808533, "learning_rate": 4.873117915825252e-05, "loss": 0.1945, "step": 11660 }, { "epoch": 0.9622144112478032, "grad_norm": 0.49205484986305237, "learning_rate": 4.8722118141290105e-05, "loss": 0.1935, "step": 11680 }, { "epoch": 0.9638620386643234, "grad_norm": 3.554715394973755, "learning_rate": 4.871302573414384e-05, "loss": 0.2135, "step": 11700 }, { "epoch": 0.9655096660808435, "grad_norm": 1.0651150941848755, "learning_rate": 4.8703901948845205e-05, "loss": 0.1564, "step": 11720 }, { "epoch": 0.9671572934973638, "grad_norm": 1.88383150100708, "learning_rate": 4.869474679746721e-05, "loss": 0.1994, "step": 11740 }, { "epoch": 0.968804920913884, "grad_norm": 0.6271897554397583, "learning_rate": 4.868556029212435e-05, "loss": 0.2043, "step": 11760 }, { "epoch": 0.9704525483304042, "grad_norm": 0.616521954536438, "learning_rate": 4.867634244497265e-05, "loss": 0.2059, "step": 11780 }, { "epoch": 0.9721001757469244, "grad_norm": 3.2947463989257812, "learning_rate": 4.8667093268209575e-05, "loss": 0.205, "step": 11800 }, { "epoch": 0.9737478031634447, "grad_norm": 0.7675888538360596, "learning_rate": 4.865781277407405e-05, "loss": 0.1436, "step": 11820 }, { "epoch": 0.9753954305799648, "grad_norm": 1.3581780195236206, "learning_rate": 4.8648500974846445e-05, "loss": 0.1964, "step": 11840 }, { "epoch": 0.977043057996485, "grad_norm": 0.7175058722496033, "learning_rate": 4.8639157882848566e-05, "loss": 0.197, "step": 11860 }, { "epoch": 0.9786906854130053, "grad_norm": 0.7383044362068176, "learning_rate": 4.8629783510443616e-05, "loss": 0.1904, "step": 11880 }, { "epoch": 0.9803383128295254, "grad_norm": 5.0527777671813965, "learning_rate": 4.8620377870036184e-05, "loss": 0.2117, "step": 11900 }, { "epoch": 0.9819859402460457, "grad_norm": 1.5865782499313354, "learning_rate": 4.861094097407224e-05, "loss": 0.1515, "step": 11920 }, { "epoch": 0.9836335676625659, "grad_norm": 0.5650818943977356, "learning_rate": 4.860147283503912e-05, "loss": 0.1926, "step": 11940 }, { "epoch": 0.9852811950790861, "grad_norm": 1.3663684129714966, "learning_rate": 4.8591973465465493e-05, "loss": 0.2066, "step": 11960 }, { "epoch": 0.9869288224956063, "grad_norm": 1.2778149843215942, "learning_rate": 4.8582442877921344e-05, "loss": 0.2022, "step": 11980 }, { "epoch": 0.9885764499121266, "grad_norm": 4.921967029571533, "learning_rate": 4.8572881085017996e-05, "loss": 0.2158, "step": 12000 }, { "epoch": 0.9885764499121266, "eval_loss": 0.5375325679779053, "eval_runtime": 252.7679, "eval_samples_per_second": 86.202, "eval_steps_per_second": 21.553, "eval_wer": 0.2286638878235724, "step": 12000 }, { "epoch": 0.9902240773286467, "grad_norm": 1.044043779373169, "learning_rate": 4.856328809940804e-05, "loss": 0.1584, "step": 12020 }, { "epoch": 0.991871704745167, "grad_norm": 2.4731016159057617, "learning_rate": 4.855366393378535e-05, "loss": 0.2071, "step": 12040 }, { "epoch": 0.9935193321616872, "grad_norm": 0.6640995144844055, "learning_rate": 4.854400860088506e-05, "loss": 0.2119, "step": 12060 }, { "epoch": 0.9951669595782073, "grad_norm": 0.43121984601020813, "learning_rate": 4.853432211348356e-05, "loss": 0.1918, "step": 12080 }, { "epoch": 0.9968145869947276, "grad_norm": 5.402212619781494, "learning_rate": 4.8524604484398414e-05, "loss": 0.2181, "step": 12100 }, { "epoch": 0.9984622144112478, "grad_norm": 1.417004108428955, "learning_rate": 4.851485572648846e-05, "loss": 0.1493, "step": 12120 }, { "epoch": 1.000109841827768, "grad_norm": 0.9582122564315796, "learning_rate": 4.8505075852653684e-05, "loss": 0.207, "step": 12140 }, { "epoch": 1.0017574692442883, "grad_norm": 1.74813973903656, "learning_rate": 4.8495264875835264e-05, "loss": 0.1468, "step": 12160 }, { "epoch": 1.0034050966608083, "grad_norm": 3.2926461696624756, "learning_rate": 4.8485422809015535e-05, "loss": 0.2009, "step": 12180 }, { "epoch": 1.0050527240773286, "grad_norm": 2.141998291015625, "learning_rate": 4.8475549665217956e-05, "loss": 0.1878, "step": 12200 }, { "epoch": 1.0067003514938488, "grad_norm": 1.7473055124282837, "learning_rate": 4.846564545750712e-05, "loss": 0.1916, "step": 12220 }, { "epoch": 1.008347978910369, "grad_norm": 0.20854823291301727, "learning_rate": 4.845571019898874e-05, "loss": 0.19, "step": 12240 }, { "epoch": 1.0099956063268893, "grad_norm": 1.6081265211105347, "learning_rate": 4.844574390280958e-05, "loss": 0.1623, "step": 12260 }, { "epoch": 1.0116432337434096, "grad_norm": 2.013803482055664, "learning_rate": 4.843574658215751e-05, "loss": 0.1916, "step": 12280 }, { "epoch": 1.0132908611599296, "grad_norm": 1.2559956312179565, "learning_rate": 4.842571825026143e-05, "loss": 0.1759, "step": 12300 }, { "epoch": 1.0149384885764499, "grad_norm": 1.103947401046753, "learning_rate": 4.8415658920391296e-05, "loss": 0.202, "step": 12320 }, { "epoch": 1.01658611599297, "grad_norm": 0.5012603998184204, "learning_rate": 4.840556860585805e-05, "loss": 0.198, "step": 12340 }, { "epoch": 1.0182337434094904, "grad_norm": 1.4309896230697632, "learning_rate": 4.839544732001368e-05, "loss": 0.163, "step": 12360 }, { "epoch": 1.0198813708260106, "grad_norm": 0.6756483912467957, "learning_rate": 4.838529507625111e-05, "loss": 0.1831, "step": 12380 }, { "epoch": 1.0215289982425309, "grad_norm": 1.141714096069336, "learning_rate": 4.8375111888004263e-05, "loss": 0.1888, "step": 12400 }, { "epoch": 1.0231766256590509, "grad_norm": 0.471711665391922, "learning_rate": 4.836489776874799e-05, "loss": 0.1892, "step": 12420 }, { "epoch": 1.0248242530755711, "grad_norm": 0.28789934515953064, "learning_rate": 4.835465273199807e-05, "loss": 0.1795, "step": 12440 }, { "epoch": 1.0264718804920914, "grad_norm": 1.1327400207519531, "learning_rate": 4.834437679131121e-05, "loss": 0.169, "step": 12460 }, { "epoch": 1.0281195079086116, "grad_norm": 1.0975223779678345, "learning_rate": 4.8334069960285e-05, "loss": 0.1875, "step": 12480 }, { "epoch": 1.0297671353251319, "grad_norm": 0.5748992562294006, "learning_rate": 4.832373225255791e-05, "loss": 0.1888, "step": 12500 }, { "epoch": 1.0314147627416521, "grad_norm": 0.5393682718276978, "learning_rate": 4.831336368180927e-05, "loss": 0.1808, "step": 12520 }, { "epoch": 1.0330623901581721, "grad_norm": 0.22509616613388062, "learning_rate": 4.830296426175922e-05, "loss": 0.1803, "step": 12540 }, { "epoch": 1.0347100175746924, "grad_norm": 1.0403659343719482, "learning_rate": 4.829253400616876e-05, "loss": 0.1594, "step": 12560 }, { "epoch": 1.0363576449912126, "grad_norm": 1.5973280668258667, "learning_rate": 4.828207292883968e-05, "loss": 0.1907, "step": 12580 }, { "epoch": 1.0380052724077329, "grad_norm": 0.9520076513290405, "learning_rate": 4.8271581043614555e-05, "loss": 0.1852, "step": 12600 }, { "epoch": 1.0396528998242531, "grad_norm": 3.2169032096862793, "learning_rate": 4.826105836437672e-05, "loss": 0.1796, "step": 12620 }, { "epoch": 1.0413005272407734, "grad_norm": 0.3701666295528412, "learning_rate": 4.825050490505025e-05, "loss": 0.1894, "step": 12640 }, { "epoch": 1.0429481546572934, "grad_norm": 1.573367953300476, "learning_rate": 4.823992067959998e-05, "loss": 0.1565, "step": 12660 }, { "epoch": 1.0445957820738137, "grad_norm": 0.6586406230926514, "learning_rate": 4.8229305702031426e-05, "loss": 0.183, "step": 12680 }, { "epoch": 1.046243409490334, "grad_norm": 1.1278094053268433, "learning_rate": 4.821865998639081e-05, "loss": 0.175, "step": 12700 }, { "epoch": 1.0478910369068541, "grad_norm": 0.9823224544525146, "learning_rate": 4.820798354676504e-05, "loss": 0.1939, "step": 12720 }, { "epoch": 1.0495386643233744, "grad_norm": 0.6462484002113342, "learning_rate": 4.819727639728164e-05, "loss": 0.1652, "step": 12740 }, { "epoch": 1.0511862917398946, "grad_norm": 1.3370243310928345, "learning_rate": 4.818653855210882e-05, "loss": 0.1605, "step": 12760 }, { "epoch": 1.0528339191564147, "grad_norm": 0.7861006259918213, "learning_rate": 4.817577002545538e-05, "loss": 0.1858, "step": 12780 }, { "epoch": 1.054481546572935, "grad_norm": 2.107189655303955, "learning_rate": 4.816497083157071e-05, "loss": 0.1933, "step": 12800 }, { "epoch": 1.0561291739894552, "grad_norm": 0.9943327307701111, "learning_rate": 4.81541409847448e-05, "loss": 0.1949, "step": 12820 }, { "epoch": 1.0577768014059754, "grad_norm": 0.5097872018814087, "learning_rate": 4.8143280499308205e-05, "loss": 0.1879, "step": 12840 }, { "epoch": 1.0594244288224957, "grad_norm": 2.121750593185425, "learning_rate": 4.8132389389631995e-05, "loss": 0.1616, "step": 12860 }, { "epoch": 1.061072056239016, "grad_norm": 0.86652010679245, "learning_rate": 4.81214676701278e-05, "loss": 0.2113, "step": 12880 }, { "epoch": 1.062719683655536, "grad_norm": 2.3862969875335693, "learning_rate": 4.811051535524772e-05, "loss": 0.1904, "step": 12900 }, { "epoch": 1.0643673110720562, "grad_norm": 0.5471189618110657, "learning_rate": 4.8099532459484356e-05, "loss": 0.182, "step": 12920 }, { "epoch": 1.0660149384885764, "grad_norm": 1.5992333889007568, "learning_rate": 4.8088518997370794e-05, "loss": 0.1821, "step": 12940 }, { "epoch": 1.0676625659050967, "grad_norm": 0.6893438696861267, "learning_rate": 4.807747498348052e-05, "loss": 0.1648, "step": 12960 }, { "epoch": 1.069310193321617, "grad_norm": 4.395170211791992, "learning_rate": 4.806640043242748e-05, "loss": 0.1759, "step": 12980 }, { "epoch": 1.0709578207381372, "grad_norm": 0.5166418552398682, "learning_rate": 4.805529535886605e-05, "loss": 0.1956, "step": 13000 }, { "epoch": 1.0726054481546572, "grad_norm": 0.8624104261398315, "learning_rate": 4.804415977749094e-05, "loss": 0.1839, "step": 13020 }, { "epoch": 1.0742530755711774, "grad_norm": 0.6681638360023499, "learning_rate": 4.803299370303725e-05, "loss": 0.2012, "step": 13040 }, { "epoch": 1.0759007029876977, "grad_norm": 1.1625574827194214, "learning_rate": 4.802179715028047e-05, "loss": 0.165, "step": 13060 }, { "epoch": 1.077548330404218, "grad_norm": 3.3364834785461426, "learning_rate": 4.801057013403636e-05, "loss": 0.1905, "step": 13080 }, { "epoch": 1.0791959578207382, "grad_norm": 0.9237430691719055, "learning_rate": 4.799931266916103e-05, "loss": 0.1923, "step": 13100 }, { "epoch": 1.0808435852372584, "grad_norm": 2.341661214828491, "learning_rate": 4.798858988805215e-05, "loss": 0.1918, "step": 13120 }, { "epoch": 1.0824912126537785, "grad_norm": 0.2139461189508438, "learning_rate": 4.797727309122835e-05, "loss": 0.2039, "step": 13140 }, { "epoch": 1.0841388400702987, "grad_norm": 1.3084605932235718, "learning_rate": 4.796592588983348e-05, "loss": 0.1808, "step": 13160 }, { "epoch": 1.085786467486819, "grad_norm": 0.5879707336425781, "learning_rate": 4.7954548298882685e-05, "loss": 0.1879, "step": 13180 }, { "epoch": 1.0874340949033392, "grad_norm": 9.557912826538086, "learning_rate": 4.794314033343129e-05, "loss": 0.1667, "step": 13200 }, { "epoch": 1.0890817223198594, "grad_norm": 2.7290220260620117, "learning_rate": 4.793170200857485e-05, "loss": 0.1848, "step": 13220 }, { "epoch": 1.0907293497363797, "grad_norm": 0.4870263338088989, "learning_rate": 4.792023333944907e-05, "loss": 0.183, "step": 13240 }, { "epoch": 1.0923769771528997, "grad_norm": 1.1112992763519287, "learning_rate": 4.790873434122982e-05, "loss": 0.1628, "step": 13260 }, { "epoch": 1.09402460456942, "grad_norm": 1.2795594930648804, "learning_rate": 4.789720502913309e-05, "loss": 0.1687, "step": 13280 }, { "epoch": 1.0956722319859402, "grad_norm": 1.0361709594726562, "learning_rate": 4.788564541841498e-05, "loss": 0.196, "step": 13300 }, { "epoch": 1.0973198594024605, "grad_norm": 1.4173661470413208, "learning_rate": 4.787405552437171e-05, "loss": 0.191, "step": 13320 }, { "epoch": 1.0989674868189807, "grad_norm": 3.3120641708374023, "learning_rate": 4.786243536233954e-05, "loss": 0.1881, "step": 13340 }, { "epoch": 1.100615114235501, "grad_norm": 1.3123334646224976, "learning_rate": 4.785078494769481e-05, "loss": 0.1701, "step": 13360 }, { "epoch": 1.1022627416520212, "grad_norm": 1.0791329145431519, "learning_rate": 4.783910429585386e-05, "loss": 0.1883, "step": 13380 }, { "epoch": 1.1039103690685412, "grad_norm": 4.972422122955322, "learning_rate": 4.782739342227308e-05, "loss": 0.1977, "step": 13400 }, { "epoch": 1.1055579964850615, "grad_norm": 1.7628355026245117, "learning_rate": 4.781565234244881e-05, "loss": 0.1828, "step": 13420 }, { "epoch": 1.1072056239015817, "grad_norm": 0.7487472295761108, "learning_rate": 4.7803881071917386e-05, "loss": 0.1903, "step": 13440 }, { "epoch": 1.108853251318102, "grad_norm": 1.0923112630844116, "learning_rate": 4.77920796262551e-05, "loss": 0.1597, "step": 13460 }, { "epoch": 1.1105008787346222, "grad_norm": 2.634671211242676, "learning_rate": 4.778024802107814e-05, "loss": 0.201, "step": 13480 }, { "epoch": 1.1121485061511422, "grad_norm": 0.6952950358390808, "learning_rate": 4.7768386272042646e-05, "loss": 0.1887, "step": 13500 }, { "epoch": 1.1137961335676625, "grad_norm": 1.2554984092712402, "learning_rate": 4.77564943948446e-05, "loss": 0.1862, "step": 13520 }, { "epoch": 1.1154437609841827, "grad_norm": 0.42830732464790344, "learning_rate": 4.774457240521989e-05, "loss": 0.1892, "step": 13540 }, { "epoch": 1.117091388400703, "grad_norm": 1.1879973411560059, "learning_rate": 4.7732620318944224e-05, "loss": 0.1576, "step": 13560 }, { "epoch": 1.1187390158172232, "grad_norm": 0.9063071012496948, "learning_rate": 4.772063815183315e-05, "loss": 0.1941, "step": 13580 }, { "epoch": 1.1203866432337435, "grad_norm": 1.071677803993225, "learning_rate": 4.770862591974202e-05, "loss": 0.1779, "step": 13600 }, { "epoch": 1.1220342706502637, "grad_norm": 1.2179434299468994, "learning_rate": 4.769658363856595e-05, "loss": 0.1838, "step": 13620 }, { "epoch": 1.1236818980667838, "grad_norm": 0.8977575898170471, "learning_rate": 4.768451132423985e-05, "loss": 0.2017, "step": 13640 }, { "epoch": 1.125329525483304, "grad_norm": 1.6982157230377197, "learning_rate": 4.767240899273835e-05, "loss": 0.1608, "step": 13660 }, { "epoch": 1.1269771528998243, "grad_norm": 0.7634503245353699, "learning_rate": 4.7660276660075804e-05, "loss": 0.1718, "step": 13680 }, { "epoch": 1.1286247803163445, "grad_norm": 0.8203310966491699, "learning_rate": 4.764811434230626e-05, "loss": 0.1831, "step": 13700 }, { "epoch": 1.1302724077328647, "grad_norm": 0.9817870259284973, "learning_rate": 4.763592205552345e-05, "loss": 0.1858, "step": 13720 }, { "epoch": 1.1319200351493848, "grad_norm": 0.333933562040329, "learning_rate": 4.762369981586077e-05, "loss": 0.1785, "step": 13740 }, { "epoch": 1.133567662565905, "grad_norm": 1.1885998249053955, "learning_rate": 4.761144763949124e-05, "loss": 0.1508, "step": 13760 }, { "epoch": 1.1352152899824253, "grad_norm": 4.4533820152282715, "learning_rate": 4.759916554262749e-05, "loss": 0.1855, "step": 13780 }, { "epoch": 1.1368629173989455, "grad_norm": 0.6248905062675476, "learning_rate": 4.7586853541521744e-05, "loss": 0.1938, "step": 13800 }, { "epoch": 1.1385105448154658, "grad_norm": 0.7727264761924744, "learning_rate": 4.757451165246583e-05, "loss": 0.1809, "step": 13820 }, { "epoch": 1.140158172231986, "grad_norm": 0.3115682005882263, "learning_rate": 4.7562139891791046e-05, "loss": 0.188, "step": 13840 }, { "epoch": 1.1418057996485063, "grad_norm": 3.9221365451812744, "learning_rate": 4.754973827586832e-05, "loss": 0.1556, "step": 13860 }, { "epoch": 1.1434534270650263, "grad_norm": 0.8228521347045898, "learning_rate": 4.7537306821108e-05, "loss": 0.185, "step": 13880 }, { "epoch": 1.1451010544815465, "grad_norm": 3.115772247314453, "learning_rate": 4.752484554395995e-05, "loss": 0.1786, "step": 13900 }, { "epoch": 1.1467486818980668, "grad_norm": 0.7074133157730103, "learning_rate": 4.7512354460913524e-05, "loss": 0.175, "step": 13920 }, { "epoch": 1.148396309314587, "grad_norm": 0.4195045828819275, "learning_rate": 4.7499833588497464e-05, "loss": 0.1868, "step": 13940 }, { "epoch": 1.1500439367311073, "grad_norm": 1.2703948020935059, "learning_rate": 4.7487282943279965e-05, "loss": 0.1517, "step": 13960 }, { "epoch": 1.1516915641476273, "grad_norm": 2.4614219665527344, "learning_rate": 4.747470254186862e-05, "loss": 0.1926, "step": 13980 }, { "epoch": 1.1533391915641475, "grad_norm": 2.0017571449279785, "learning_rate": 4.746209240091038e-05, "loss": 0.1852, "step": 14000 }, { "epoch": 1.1533391915641475, "eval_loss": 0.5445581078529358, "eval_runtime": 260.7987, "eval_samples_per_second": 83.547, "eval_steps_per_second": 20.89, "eval_wer": 0.22884024824829308, "step": 14000 }, { "epoch": 1.1550417398945518, "grad_norm": 0.7891082763671875, "learning_rate": 4.744945253709156e-05, "loss": 0.1683, "step": 14020 }, { "epoch": 1.156689367311072, "grad_norm": 1.2621268033981323, "learning_rate": 4.743678296713779e-05, "loss": 0.2037, "step": 14040 }, { "epoch": 1.1583369947275923, "grad_norm": 1.7061976194381714, "learning_rate": 4.7424083707814035e-05, "loss": 0.18, "step": 14060 }, { "epoch": 1.1599846221441124, "grad_norm": 4.809337615966797, "learning_rate": 4.7411354775924523e-05, "loss": 0.1772, "step": 14080 }, { "epoch": 1.1616322495606326, "grad_norm": 4.00537109375, "learning_rate": 4.739859618831276e-05, "loss": 0.2029, "step": 14100 }, { "epoch": 1.1632798769771528, "grad_norm": 3.184988498687744, "learning_rate": 4.7385807961861486e-05, "loss": 0.1857, "step": 14120 }, { "epoch": 1.164927504393673, "grad_norm": 0.24155808985233307, "learning_rate": 4.737299011349265e-05, "loss": 0.1808, "step": 14140 }, { "epoch": 1.1665751318101933, "grad_norm": 1.2904281616210938, "learning_rate": 4.736014266016743e-05, "loss": 0.1728, "step": 14160 }, { "epoch": 1.1682227592267136, "grad_norm": 0.8972460031509399, "learning_rate": 4.734726561888616e-05, "loss": 0.1712, "step": 14180 }, { "epoch": 1.1698703866432338, "grad_norm": 0.48299163579940796, "learning_rate": 4.7334359006688314e-05, "loss": 0.1872, "step": 14200 }, { "epoch": 1.1715180140597539, "grad_norm": 1.942762017250061, "learning_rate": 4.732142284065251e-05, "loss": 0.1742, "step": 14220 }, { "epoch": 1.1731656414762741, "grad_norm": 0.8175634741783142, "learning_rate": 4.730845713789647e-05, "loss": 0.175, "step": 14240 }, { "epoch": 1.1748132688927944, "grad_norm": 3.179103136062622, "learning_rate": 4.7295461915577e-05, "loss": 0.1544, "step": 14260 }, { "epoch": 1.1764608963093146, "grad_norm": 0.7264633178710938, "learning_rate": 4.7282437190889975e-05, "loss": 0.1854, "step": 14280 }, { "epoch": 1.1781085237258349, "grad_norm": 1.372662901878357, "learning_rate": 4.7269382981070296e-05, "loss": 0.193, "step": 14300 }, { "epoch": 1.179756151142355, "grad_norm": 1.8890334367752075, "learning_rate": 4.7256299303391895e-05, "loss": 0.1908, "step": 14320 }, { "epoch": 1.1814037785588751, "grad_norm": 0.4149373769760132, "learning_rate": 4.724318617516768e-05, "loss": 0.1838, "step": 14340 }, { "epoch": 1.1830514059753954, "grad_norm": 2.029543876647949, "learning_rate": 4.723004361374953e-05, "loss": 0.1656, "step": 14360 }, { "epoch": 1.1846990333919156, "grad_norm": 10.552154541015625, "learning_rate": 4.721687163652829e-05, "loss": 0.1828, "step": 14380 }, { "epoch": 1.1863466608084359, "grad_norm": 0.5331339240074158, "learning_rate": 4.7203670260933725e-05, "loss": 0.1807, "step": 14400 }, { "epoch": 1.1879942882249561, "grad_norm": 2.227116584777832, "learning_rate": 4.719043950443448e-05, "loss": 0.1903, "step": 14420 }, { "epoch": 1.1896419156414764, "grad_norm": 0.36228641867637634, "learning_rate": 4.717717938453811e-05, "loss": 0.1775, "step": 14440 }, { "epoch": 1.1912895430579964, "grad_norm": 2.8174803256988525, "learning_rate": 4.7163889918790984e-05, "loss": 0.1672, "step": 14460 }, { "epoch": 1.1929371704745166, "grad_norm": 1.1632130146026611, "learning_rate": 4.7150571124778344e-05, "loss": 0.1798, "step": 14480 }, { "epoch": 1.1945847978910369, "grad_norm": 2.7487854957580566, "learning_rate": 4.713722302012421e-05, "loss": 0.1833, "step": 14500 }, { "epoch": 1.1962324253075571, "grad_norm": 0.7984181642532349, "learning_rate": 4.712384562249141e-05, "loss": 0.1934, "step": 14520 }, { "epoch": 1.1978800527240774, "grad_norm": 0.22223146259784698, "learning_rate": 4.711043894958153e-05, "loss": 0.1883, "step": 14540 }, { "epoch": 1.1995276801405976, "grad_norm": 5.944990158081055, "learning_rate": 4.7097003019134876e-05, "loss": 0.1584, "step": 14560 }, { "epoch": 1.2011753075571177, "grad_norm": 0.5789378881454468, "learning_rate": 4.708353784893049e-05, "loss": 0.1837, "step": 14580 }, { "epoch": 1.202822934973638, "grad_norm": 0.4476211369037628, "learning_rate": 4.707004345678609e-05, "loss": 0.1878, "step": 14600 }, { "epoch": 1.2044705623901582, "grad_norm": 1.0182552337646484, "learning_rate": 4.705651986055807e-05, "loss": 0.1968, "step": 14620 }, { "epoch": 1.2061181898066784, "grad_norm": 0.5336174368858337, "learning_rate": 4.7042967078141466e-05, "loss": 0.1849, "step": 14640 }, { "epoch": 1.2077658172231986, "grad_norm": 0.6163532733917236, "learning_rate": 4.702938512746994e-05, "loss": 0.1571, "step": 14660 }, { "epoch": 1.209413444639719, "grad_norm": 1.7392462491989136, "learning_rate": 4.701577402651574e-05, "loss": 0.205, "step": 14680 }, { "epoch": 1.2110610720562391, "grad_norm": 0.9206298589706421, "learning_rate": 4.70021337932897e-05, "loss": 0.1872, "step": 14700 }, { "epoch": 1.2127086994727592, "grad_norm": 1.0826961994171143, "learning_rate": 4.6988464445841186e-05, "loss": 0.185, "step": 14720 }, { "epoch": 1.2143563268892794, "grad_norm": 0.3322293758392334, "learning_rate": 4.6974766002258105e-05, "loss": 0.1746, "step": 14740 }, { "epoch": 1.2160039543057997, "grad_norm": 1.4564270973205566, "learning_rate": 4.696103848066686e-05, "loss": 0.1581, "step": 14760 }, { "epoch": 1.21765158172232, "grad_norm": 0.7961132526397705, "learning_rate": 4.6947281899232333e-05, "loss": 0.1774, "step": 14780 }, { "epoch": 1.2192992091388402, "grad_norm": 2.5025506019592285, "learning_rate": 4.693349627615784e-05, "loss": 0.2024, "step": 14800 }, { "epoch": 1.2209468365553602, "grad_norm": 0.9771924614906311, "learning_rate": 4.692037305103247e-05, "loss": 0.1857, "step": 14820 }, { "epoch": 1.2225944639718804, "grad_norm": 0.3456748127937317, "learning_rate": 4.690653084926293e-05, "loss": 0.1828, "step": 14840 }, { "epoch": 1.2242420913884007, "grad_norm": 0.9823417067527771, "learning_rate": 4.689265965977708e-05, "loss": 0.1599, "step": 14860 }, { "epoch": 1.225889718804921, "grad_norm": 1.0269907712936401, "learning_rate": 4.68787595009299e-05, "loss": 0.1797, "step": 14880 }, { "epoch": 1.2275373462214412, "grad_norm": 0.7121568918228149, "learning_rate": 4.686483039111472e-05, "loss": 0.1851, "step": 14900 }, { "epoch": 1.2291849736379614, "grad_norm": 0.5829301476478577, "learning_rate": 4.6850872348763166e-05, "loss": 0.1816, "step": 14920 }, { "epoch": 1.2308326010544817, "grad_norm": 0.23825562000274658, "learning_rate": 4.683688539234515e-05, "loss": 0.1687, "step": 14940 }, { "epoch": 1.2324802284710017, "grad_norm": 1.7142807245254517, "learning_rate": 4.682286954036886e-05, "loss": 0.1618, "step": 14960 }, { "epoch": 1.234127855887522, "grad_norm": 2.196317195892334, "learning_rate": 4.6808824811380704e-05, "loss": 0.1868, "step": 14980 }, { "epoch": 1.2357754833040422, "grad_norm": 0.6108691096305847, "learning_rate": 4.67947512239653e-05, "loss": 0.195, "step": 15000 }, { "epoch": 1.2374231107205624, "grad_norm": 0.5913376808166504, "learning_rate": 4.678135460276387e-05, "loss": 0.1855, "step": 15020 }, { "epoch": 1.2390707381370827, "grad_norm": 0.6738501191139221, "learning_rate": 4.676722479501395e-05, "loss": 0.1843, "step": 15040 }, { "epoch": 1.2407183655536027, "grad_norm": 1.7320432662963867, "learning_rate": 4.675306618388381e-05, "loss": 0.1567, "step": 15060 }, { "epoch": 1.242365992970123, "grad_norm": 1.1772425174713135, "learning_rate": 4.6738878788108776e-05, "loss": 0.1927, "step": 15080 }, { "epoch": 1.2440136203866432, "grad_norm": 0.8685306906700134, "learning_rate": 4.672466262646225e-05, "loss": 0.2057, "step": 15100 }, { "epoch": 1.2456612478031635, "grad_norm": 0.5060691833496094, "learning_rate": 4.6710417717755695e-05, "loss": 0.1846, "step": 15120 }, { "epoch": 1.2473088752196837, "grad_norm": 0.2376640886068344, "learning_rate": 4.6696144080838624e-05, "loss": 0.1835, "step": 15140 }, { "epoch": 1.248956502636204, "grad_norm": 1.7704603672027588, "learning_rate": 4.6681841734598576e-05, "loss": 0.1661, "step": 15160 }, { "epoch": 1.2506041300527242, "grad_norm": 0.9057344198226929, "learning_rate": 4.6667510697961045e-05, "loss": 0.1942, "step": 15180 }, { "epoch": 1.2522517574692442, "grad_norm": 0.7479068636894226, "learning_rate": 4.6653150989889525e-05, "loss": 0.1807, "step": 15200 }, { "epoch": 1.2538993848857645, "grad_norm": 0.8971445560455322, "learning_rate": 4.663876262938543e-05, "loss": 0.192, "step": 15220 }, { "epoch": 1.2555470123022847, "grad_norm": 0.6187984347343445, "learning_rate": 4.6624345635488085e-05, "loss": 0.1781, "step": 15240 }, { "epoch": 1.257194639718805, "grad_norm": 1.5628702640533447, "learning_rate": 4.6609900027274714e-05, "loss": 0.1511, "step": 15260 }, { "epoch": 1.2588422671353252, "grad_norm": 1.0656757354736328, "learning_rate": 4.659542582386041e-05, "loss": 0.1888, "step": 15280 }, { "epoch": 1.2604898945518452, "grad_norm": 0.9140082597732544, "learning_rate": 4.658092304439808e-05, "loss": 0.1791, "step": 15300 }, { "epoch": 1.2621375219683655, "grad_norm": 0.7536066770553589, "learning_rate": 4.6566391708078484e-05, "loss": 0.1945, "step": 15320 }, { "epoch": 1.2637851493848857, "grad_norm": 0.6287294030189514, "learning_rate": 4.655183183413012e-05, "loss": 0.1844, "step": 15340 }, { "epoch": 1.265432776801406, "grad_norm": 1.3471739292144775, "learning_rate": 4.653724344181929e-05, "loss": 0.1718, "step": 15360 }, { "epoch": 1.2670804042179262, "grad_norm": 0.8668192625045776, "learning_rate": 4.652262655045001e-05, "loss": 0.1816, "step": 15380 }, { "epoch": 1.2687280316344465, "grad_norm": 0.7043120861053467, "learning_rate": 4.6507981179364015e-05, "loss": 0.1835, "step": 15400 }, { "epoch": 1.2703756590509667, "grad_norm": 2.341987133026123, "learning_rate": 4.6493307347940735e-05, "loss": 0.1779, "step": 15420 }, { "epoch": 1.2720232864674867, "grad_norm": 0.7353787422180176, "learning_rate": 4.6478605075597236e-05, "loss": 0.1995, "step": 15440 }, { "epoch": 1.273670913884007, "grad_norm": 1.131020188331604, "learning_rate": 4.646387438178824e-05, "loss": 0.1626, "step": 15460 }, { "epoch": 1.2753185413005272, "grad_norm": 5.1669206619262695, "learning_rate": 4.644911528600607e-05, "loss": 0.1739, "step": 15480 }, { "epoch": 1.2769661687170475, "grad_norm": 1.9862631559371948, "learning_rate": 4.643432780778064e-05, "loss": 0.1811, "step": 15500 }, { "epoch": 1.2786137961335677, "grad_norm": 4.627335548400879, "learning_rate": 4.641951196667939e-05, "loss": 0.1755, "step": 15520 }, { "epoch": 1.2802614235500878, "grad_norm": 1.6128586530685425, "learning_rate": 4.6404667782307334e-05, "loss": 0.1758, "step": 15540 }, { "epoch": 1.281909050966608, "grad_norm": 0.8337876200675964, "learning_rate": 4.638979527430696e-05, "loss": 0.1543, "step": 15560 }, { "epoch": 1.2835566783831283, "grad_norm": 1.7509500980377197, "learning_rate": 4.6374894462358246e-05, "loss": 0.1866, "step": 15580 }, { "epoch": 1.2852043057996485, "grad_norm": 1.9350312948226929, "learning_rate": 4.635996536617863e-05, "loss": 0.1898, "step": 15600 }, { "epoch": 1.2868519332161688, "grad_norm": 0.9639732241630554, "learning_rate": 4.6345008005522966e-05, "loss": 0.1757, "step": 15620 }, { "epoch": 1.288499560632689, "grad_norm": 0.22245891392230988, "learning_rate": 4.633002240018351e-05, "loss": 0.1725, "step": 15640 }, { "epoch": 1.2901471880492092, "grad_norm": 1.5688083171844482, "learning_rate": 4.6315008569989905e-05, "loss": 0.1682, "step": 15660 }, { "epoch": 1.2917948154657293, "grad_norm": 0.9785603880882263, "learning_rate": 4.629996653480912e-05, "loss": 0.184, "step": 15680 }, { "epoch": 1.2934424428822495, "grad_norm": 1.3254144191741943, "learning_rate": 4.628489631454548e-05, "loss": 0.1862, "step": 15700 }, { "epoch": 1.2950900702987698, "grad_norm": 2.9795165061950684, "learning_rate": 4.6269797929140554e-05, "loss": 0.1836, "step": 15720 }, { "epoch": 1.29673769771529, "grad_norm": 0.510050356388092, "learning_rate": 4.625467139857323e-05, "loss": 0.194, "step": 15740 }, { "epoch": 1.2983853251318103, "grad_norm": 1.456040859222412, "learning_rate": 4.6239516742859625e-05, "loss": 0.1679, "step": 15760 }, { "epoch": 1.3000329525483303, "grad_norm": 0.7667500376701355, "learning_rate": 4.622433398205305e-05, "loss": 0.1955, "step": 15780 }, { "epoch": 1.3016805799648505, "grad_norm": 5.522034168243408, "learning_rate": 4.620912313624404e-05, "loss": 0.1766, "step": 15800 }, { "epoch": 1.3033282073813708, "grad_norm": 1.5902382135391235, "learning_rate": 4.619388422556026e-05, "loss": 0.1843, "step": 15820 }, { "epoch": 1.304975834797891, "grad_norm": 0.3163457214832306, "learning_rate": 4.617861727016655e-05, "loss": 0.1876, "step": 15840 }, { "epoch": 1.3066234622144113, "grad_norm": 1.0746263265609741, "learning_rate": 4.616332229026482e-05, "loss": 0.1727, "step": 15860 }, { "epoch": 1.3082710896309315, "grad_norm": 7.499575614929199, "learning_rate": 4.6147999306094095e-05, "loss": 0.191, "step": 15880 }, { "epoch": 1.3099187170474518, "grad_norm": 1.3614699840545654, "learning_rate": 4.613264833793044e-05, "loss": 0.1979, "step": 15900 }, { "epoch": 1.3115663444639718, "grad_norm": 2.371039390563965, "learning_rate": 4.611726940608694e-05, "loss": 0.1868, "step": 15920 }, { "epoch": 1.313213971880492, "grad_norm": 0.4890645444393158, "learning_rate": 4.6101862530913715e-05, "loss": 0.181, "step": 15940 }, { "epoch": 1.3148615992970123, "grad_norm": 1.2924984693527222, "learning_rate": 4.6086427732797836e-05, "loss": 0.1826, "step": 15960 }, { "epoch": 1.3165092267135325, "grad_norm": 0.8311454653739929, "learning_rate": 4.6070965032163316e-05, "loss": 0.1757, "step": 15980 }, { "epoch": 1.3181568541300528, "grad_norm": 4.735937595367432, "learning_rate": 4.605547444947112e-05, "loss": 0.1934, "step": 16000 }, { "epoch": 1.3181568541300528, "eval_loss": 0.5345502495765686, "eval_runtime": 711.0231, "eval_samples_per_second": 30.645, "eval_steps_per_second": 7.662, "eval_wer": 0.23376714265318863, "step": 16000 }, { "epoch": 1.3198044815465728, "grad_norm": 0.9712895750999451, "learning_rate": 4.603995600521908e-05, "loss": 0.1792, "step": 16020 }, { "epoch": 1.321452108963093, "grad_norm": 0.3473972976207733, "learning_rate": 4.6024409719941905e-05, "loss": 0.1926, "step": 16040 }, { "epoch": 1.3230997363796133, "grad_norm": 1.2832224369049072, "learning_rate": 4.600883561421116e-05, "loss": 0.1696, "step": 16060 }, { "epoch": 1.3247473637961336, "grad_norm": 1.0073579549789429, "learning_rate": 4.599401446384175e-05, "loss": 0.1816, "step": 16080 }, { "epoch": 1.3263949912126538, "grad_norm": 1.0329769849777222, "learning_rate": 4.597838616753484e-05, "loss": 0.1834, "step": 16100 }, { "epoch": 1.328042618629174, "grad_norm": 2.175204038619995, "learning_rate": 4.596273011167479e-05, "loss": 0.1936, "step": 16120 }, { "epoch": 1.3296902460456943, "grad_norm": 0.29955798387527466, "learning_rate": 4.594704631697842e-05, "loss": 0.1735, "step": 16140 }, { "epoch": 1.3313378734622145, "grad_norm": 5.876577377319336, "learning_rate": 4.593133480419924e-05, "loss": 0.1642, "step": 16160 }, { "epoch": 1.3329855008787346, "grad_norm": 1.0501422882080078, "learning_rate": 4.591559559412743e-05, "loss": 0.2176, "step": 16180 }, { "epoch": 1.3346331282952548, "grad_norm": 2.9819207191467285, "learning_rate": 4.5899828707589844e-05, "loss": 0.1841, "step": 16200 }, { "epoch": 1.336280755711775, "grad_norm": 0.8568416237831116, "learning_rate": 4.588403416544992e-05, "loss": 0.1916, "step": 16220 }, { "epoch": 1.3379283831282953, "grad_norm": 1.058608889579773, "learning_rate": 4.586821198860772e-05, "loss": 0.1881, "step": 16240 }, { "epoch": 1.3395760105448153, "grad_norm": 1.0041903257369995, "learning_rate": 4.585236219799988e-05, "loss": 0.1696, "step": 16260 }, { "epoch": 1.3412236379613356, "grad_norm": 1.351030945777893, "learning_rate": 4.5836484814599544e-05, "loss": 0.183, "step": 16280 }, { "epoch": 1.3428712653778558, "grad_norm": 0.7650487422943115, "learning_rate": 4.58205798594164e-05, "loss": 0.1775, "step": 16300 }, { "epoch": 1.344518892794376, "grad_norm": 1.0618997812271118, "learning_rate": 4.5804647353496604e-05, "loss": 0.1846, "step": 16320 }, { "epoch": 1.3461665202108963, "grad_norm": 1.2706764936447144, "learning_rate": 4.578868731792277e-05, "loss": 0.1994, "step": 16340 }, { "epoch": 1.3478141476274166, "grad_norm": 0.9554028511047363, "learning_rate": 4.5772699773813936e-05, "loss": 0.1564, "step": 16360 }, { "epoch": 1.3494617750439368, "grad_norm": 0.8473567366600037, "learning_rate": 4.575668474232556e-05, "loss": 0.1924, "step": 16380 }, { "epoch": 1.351109402460457, "grad_norm": 1.2312932014465332, "learning_rate": 4.574064224464944e-05, "loss": 0.194, "step": 16400 }, { "epoch": 1.352757029876977, "grad_norm": 0.5687373280525208, "learning_rate": 4.5724572302013754e-05, "loss": 0.1828, "step": 16420 }, { "epoch": 1.3544046572934973, "grad_norm": 0.2973005473613739, "learning_rate": 4.570847493568297e-05, "loss": 0.1774, "step": 16440 }, { "epoch": 1.3560522847100176, "grad_norm": 22.61744499206543, "learning_rate": 4.5692350166957864e-05, "loss": 0.16, "step": 16460 }, { "epoch": 1.3576999121265378, "grad_norm": 0.5010253190994263, "learning_rate": 4.567619801717545e-05, "loss": 0.1842, "step": 16480 }, { "epoch": 1.3593475395430579, "grad_norm": 1.405232548713684, "learning_rate": 4.566001850770899e-05, "loss": 0.194, "step": 16500 }, { "epoch": 1.3609951669595781, "grad_norm": 0.8372935652732849, "learning_rate": 4.564381165996796e-05, "loss": 0.1866, "step": 16520 }, { "epoch": 1.3626427943760984, "grad_norm": 0.2158607393503189, "learning_rate": 4.5627577495397974e-05, "loss": 0.1802, "step": 16540 }, { "epoch": 1.3642904217926186, "grad_norm": 7.98114013671875, "learning_rate": 4.561131603548082e-05, "loss": 0.1832, "step": 16560 }, { "epoch": 1.3659380492091389, "grad_norm": 0.6528833508491516, "learning_rate": 4.5595027301734413e-05, "loss": 0.1919, "step": 16580 }, { "epoch": 1.367585676625659, "grad_norm": 0.5290044546127319, "learning_rate": 4.557871131571274e-05, "loss": 0.1693, "step": 16600 }, { "epoch": 1.3692333040421794, "grad_norm": 1.5808736085891724, "learning_rate": 4.5562368099005854e-05, "loss": 0.1813, "step": 16620 }, { "epoch": 1.3708809314586996, "grad_norm": 0.17612561583518982, "learning_rate": 4.554599767323984e-05, "loss": 0.1879, "step": 16640 }, { "epoch": 1.3725285588752196, "grad_norm": 1.369644284248352, "learning_rate": 4.552960006007678e-05, "loss": 0.1602, "step": 16660 }, { "epoch": 1.3741761862917399, "grad_norm": 1.1678745746612549, "learning_rate": 4.551317528121476e-05, "loss": 0.1897, "step": 16680 }, { "epoch": 1.3758238137082601, "grad_norm": 0.5510971546173096, "learning_rate": 4.549672335838777e-05, "loss": 0.196, "step": 16700 }, { "epoch": 1.3774714411247804, "grad_norm": 0.9784049391746521, "learning_rate": 4.548024431336576e-05, "loss": 0.1865, "step": 16720 }, { "epoch": 1.3791190685413004, "grad_norm": 0.2733114957809448, "learning_rate": 4.546373816795455e-05, "loss": 0.1811, "step": 16740 }, { "epoch": 1.3807666959578206, "grad_norm": 1.3034899234771729, "learning_rate": 4.5447204943995816e-05, "loss": 0.1528, "step": 16760 }, { "epoch": 1.382414323374341, "grad_norm": 1.3286523818969727, "learning_rate": 4.543064466336706e-05, "loss": 0.1832, "step": 16780 }, { "epoch": 1.3840619507908611, "grad_norm": 0.747806191444397, "learning_rate": 4.541405734798161e-05, "loss": 0.1672, "step": 16800 }, { "epoch": 1.3857095782073814, "grad_norm": 0.5867149233818054, "learning_rate": 4.539744301978856e-05, "loss": 0.1814, "step": 16820 }, { "epoch": 1.3873572056239016, "grad_norm": 0.43317413330078125, "learning_rate": 4.5380801700772726e-05, "loss": 0.1825, "step": 16840 }, { "epoch": 1.3890048330404219, "grad_norm": 1.011514663696289, "learning_rate": 4.536413341295467e-05, "loss": 0.1553, "step": 16860 }, { "epoch": 1.3906524604569421, "grad_norm": 0.7447202205657959, "learning_rate": 4.5347438178390624e-05, "loss": 0.1832, "step": 16880 }, { "epoch": 1.3923000878734622, "grad_norm": 4.756796360015869, "learning_rate": 4.533071601917248e-05, "loss": 0.1923, "step": 16900 }, { "epoch": 1.3939477152899824, "grad_norm": 1.5507986545562744, "learning_rate": 4.5313966957427755e-05, "loss": 0.1754, "step": 16920 }, { "epoch": 1.3955953427065027, "grad_norm": 0.557498037815094, "learning_rate": 4.529719101531957e-05, "loss": 0.178, "step": 16940 }, { "epoch": 1.397242970123023, "grad_norm": 1.611181378364563, "learning_rate": 4.5280388215046624e-05, "loss": 0.1557, "step": 16960 }, { "epoch": 1.398890597539543, "grad_norm": 0.6798775792121887, "learning_rate": 4.5263558578843136e-05, "loss": 0.1741, "step": 16980 }, { "epoch": 1.4005382249560632, "grad_norm": 10.896245956420898, "learning_rate": 4.524670212897884e-05, "loss": 0.2016, "step": 17000 }, { "epoch": 1.4021858523725834, "grad_norm": 1.8802106380462646, "learning_rate": 4.5229818887758957e-05, "loss": 0.177, "step": 17020 }, { "epoch": 1.4038334797891037, "grad_norm": 0.2604859471321106, "learning_rate": 4.5212908877524165e-05, "loss": 0.1791, "step": 17040 }, { "epoch": 1.405481107205624, "grad_norm": 1.834112524986267, "learning_rate": 4.519597212065056e-05, "loss": 0.1664, "step": 17060 }, { "epoch": 1.4071287346221442, "grad_norm": 0.9913097620010376, "learning_rate": 4.517900863954962e-05, "loss": 0.1955, "step": 17080 }, { "epoch": 1.4087763620386644, "grad_norm": 2.1117942333221436, "learning_rate": 4.51620184566682e-05, "loss": 0.1798, "step": 17100 }, { "epoch": 1.4104239894551847, "grad_norm": 5.098875999450684, "learning_rate": 4.5145001594488466e-05, "loss": 0.1908, "step": 17120 }, { "epoch": 1.4120716168717047, "grad_norm": 0.7090994715690613, "learning_rate": 4.512795807552792e-05, "loss": 0.1727, "step": 17140 }, { "epoch": 1.413719244288225, "grad_norm": 3.752267599105835, "learning_rate": 4.511088792233932e-05, "loss": 0.1542, "step": 17160 }, { "epoch": 1.4153668717047452, "grad_norm": 1.5449800491333008, "learning_rate": 4.5093791157510656e-05, "loss": 0.1651, "step": 17180 }, { "epoch": 1.4170144991212654, "grad_norm": 0.9400858283042908, "learning_rate": 4.507666780366514e-05, "loss": 0.1844, "step": 17200 }, { "epoch": 1.4186621265377855, "grad_norm": 0.37838345766067505, "learning_rate": 4.5059517883461196e-05, "loss": 0.1733, "step": 17220 }, { "epoch": 1.4203097539543057, "grad_norm": 0.239340141415596, "learning_rate": 4.50432008728471e-05, "loss": 0.1855, "step": 17240 }, { "epoch": 1.421957381370826, "grad_norm": 1.4485529661178589, "learning_rate": 4.502599921354852e-05, "loss": 0.1515, "step": 17260 }, { "epoch": 1.4236050087873462, "grad_norm": 0.6260993480682373, "learning_rate": 4.500877105493846e-05, "loss": 0.1781, "step": 17280 }, { "epoch": 1.4252526362038664, "grad_norm": 2.114168882369995, "learning_rate": 4.499151641981402e-05, "loss": 0.1732, "step": 17300 }, { "epoch": 1.4269002636203867, "grad_norm": 0.7402538061141968, "learning_rate": 4.497423533100732e-05, "loss": 0.1769, "step": 17320 }, { "epoch": 1.428547891036907, "grad_norm": 0.7155308723449707, "learning_rate": 4.4956927811385475e-05, "loss": 0.1649, "step": 17340 }, { "epoch": 1.4301955184534272, "grad_norm": 0.8413863182067871, "learning_rate": 4.4939593883850576e-05, "loss": 0.1563, "step": 17360 }, { "epoch": 1.4318431458699472, "grad_norm": 2.0796799659729004, "learning_rate": 4.492223357133967e-05, "loss": 0.1797, "step": 17380 }, { "epoch": 1.4334907732864675, "grad_norm": 1.1681219339370728, "learning_rate": 4.490484689682471e-05, "loss": 0.1819, "step": 17400 }, { "epoch": 1.4351384007029877, "grad_norm": 1.0417784452438354, "learning_rate": 4.488743388331254e-05, "loss": 0.1861, "step": 17420 }, { "epoch": 1.436786028119508, "grad_norm": 0.17414021492004395, "learning_rate": 4.486999455384483e-05, "loss": 0.1734, "step": 17440 }, { "epoch": 1.4384336555360282, "grad_norm": 1.723633885383606, "learning_rate": 4.485252893149812e-05, "loss": 0.1629, "step": 17460 }, { "epoch": 1.4400812829525482, "grad_norm": 0.5598846673965454, "learning_rate": 4.4835037039383686e-05, "loss": 0.1749, "step": 17480 }, { "epoch": 1.4417289103690685, "grad_norm": 0.617661714553833, "learning_rate": 4.481751890064761e-05, "loss": 0.1734, "step": 17500 }, { "epoch": 1.4433765377855887, "grad_norm": 0.4917828440666199, "learning_rate": 4.479997453847069e-05, "loss": 0.1724, "step": 17520 }, { "epoch": 1.445024165202109, "grad_norm": 0.27072271704673767, "learning_rate": 4.4782403976068417e-05, "loss": 0.1664, "step": 17540 }, { "epoch": 1.4466717926186292, "grad_norm": 1.5086781978607178, "learning_rate": 4.476480723669095e-05, "loss": 0.1614, "step": 17560 }, { "epoch": 1.4483194200351495, "grad_norm": 1.092875599861145, "learning_rate": 4.4747184343623113e-05, "loss": 0.1827, "step": 17580 }, { "epoch": 1.4499670474516697, "grad_norm": 0.9140943288803101, "learning_rate": 4.4729535320184304e-05, "loss": 0.1865, "step": 17600 }, { "epoch": 1.4516146748681897, "grad_norm": 0.9031366109848022, "learning_rate": 4.471186018972852e-05, "loss": 0.1767, "step": 17620 }, { "epoch": 1.45326230228471, "grad_norm": 0.4150620102882385, "learning_rate": 4.46941589756443e-05, "loss": 0.1814, "step": 17640 }, { "epoch": 1.4549099297012302, "grad_norm": 2.178504228591919, "learning_rate": 4.467643170135467e-05, "loss": 0.1608, "step": 17660 }, { "epoch": 1.4565575571177505, "grad_norm": 1.05979323387146, "learning_rate": 4.4658678390317186e-05, "loss": 0.175, "step": 17680 }, { "epoch": 1.4582051845342707, "grad_norm": 1.3193062543869019, "learning_rate": 4.464089906602383e-05, "loss": 0.2092, "step": 17700 }, { "epoch": 1.4598528119507908, "grad_norm": 0.6934455037117004, "learning_rate": 4.462309375200101e-05, "loss": 0.1695, "step": 17720 }, { "epoch": 1.461500439367311, "grad_norm": 0.34769323468208313, "learning_rate": 4.460526247180953e-05, "loss": 0.1844, "step": 17740 }, { "epoch": 1.4631480667838312, "grad_norm": 2.567417621612549, "learning_rate": 4.4587405249044546e-05, "loss": 0.1657, "step": 17760 }, { "epoch": 1.4647956942003515, "grad_norm": 0.7568791508674622, "learning_rate": 4.4569522107335535e-05, "loss": 0.1794, "step": 17780 }, { "epoch": 1.4664433216168717, "grad_norm": 0.6661089062690735, "learning_rate": 4.455161307034631e-05, "loss": 0.1812, "step": 17800 }, { "epoch": 1.468090949033392, "grad_norm": 0.6958649754524231, "learning_rate": 4.4533678161774894e-05, "loss": 0.1714, "step": 17820 }, { "epoch": 1.4697385764499122, "grad_norm": 0.3765020966529846, "learning_rate": 4.45157174053536e-05, "loss": 0.176, "step": 17840 }, { "epoch": 1.4713862038664323, "grad_norm": 1.378657579421997, "learning_rate": 4.4497730824848885e-05, "loss": 0.1645, "step": 17860 }, { "epoch": 1.4730338312829525, "grad_norm": 0.8017684817314148, "learning_rate": 4.447971844406143e-05, "loss": 0.1795, "step": 17880 }, { "epoch": 1.4746814586994728, "grad_norm": 1.0388306379318237, "learning_rate": 4.4461680286826024e-05, "loss": 0.1941, "step": 17900 }, { "epoch": 1.476329086115993, "grad_norm": 0.5598440766334534, "learning_rate": 4.4443616377011586e-05, "loss": 0.1879, "step": 17920 }, { "epoch": 1.4779767135325133, "grad_norm": 0.18948721885681152, "learning_rate": 4.4425526738521096e-05, "loss": 0.1837, "step": 17940 }, { "epoch": 1.4796243409490333, "grad_norm": 1.4350675344467163, "learning_rate": 4.440741139529159e-05, "loss": 0.1578, "step": 17960 }, { "epoch": 1.4812719683655535, "grad_norm": 0.7614567279815674, "learning_rate": 4.4389270371294104e-05, "loss": 0.1878, "step": 17980 }, { "epoch": 1.4829195957820738, "grad_norm": 0.5294657945632935, "learning_rate": 4.437110369053368e-05, "loss": 0.1792, "step": 18000 }, { "epoch": 1.4829195957820738, "eval_loss": 0.539612352848053, "eval_runtime": 824.8651, "eval_samples_per_second": 26.415, "eval_steps_per_second": 6.605, "eval_wer": 0.2317739899166627, "step": 18000 } ], "logging_steps": 20, "max_steps": 60690, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.702403313535895e+20, "train_batch_size": 24, "trial_name": null, "trial_params": null }