{ "best_metric": 0.23469093535410654, "best_model_checkpoint": "./checkpoints/w2v-multilingual-v1.3/checkpoint-2000", "epoch": 0.4942882249560633, "eval_steps": 2000, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001647627416520211, "grad_norm": 0.2767059803009033, "learning_rate": 4.9431537320810675e-08, "loss": 0.1923, "step": 20 }, { "epoch": 0.003295254833040422, "grad_norm": 2.006911039352417, "learning_rate": 1.977261492832427e-07, "loss": 0.2947, "step": 40 }, { "epoch": 0.004942882249560633, "grad_norm": 0.3714193105697632, "learning_rate": 3.624979403526116e-07, "loss": 0.2955, "step": 60 }, { "epoch": 0.006590509666080844, "grad_norm": 0.3106062412261963, "learning_rate": 5.272697314219806e-07, "loss": 0.2754, "step": 80 }, { "epoch": 0.008238137082601054, "grad_norm": 3.2078254222869873, "learning_rate": 6.838029329378811e-07, "loss": 0.3177, "step": 100 }, { "epoch": 0.009885764499121265, "grad_norm": 0.38503074645996094, "learning_rate": 8.485747240072501e-07, "loss": 0.1905, "step": 120 }, { "epoch": 0.011533391915641476, "grad_norm": 0.15724419057369232, "learning_rate": 1.013346515076619e-06, "loss": 0.2942, "step": 140 }, { "epoch": 0.013181019332161687, "grad_norm": 0.5840986967086792, "learning_rate": 1.1781183061459877e-06, "loss": 0.2907, "step": 160 }, { "epoch": 0.014828646748681899, "grad_norm": 0.23122897744178772, "learning_rate": 1.3346515076618883e-06, "loss": 0.2811, "step": 180 }, { "epoch": 0.016476274165202108, "grad_norm": 2.7248637676239014, "learning_rate": 1.4994232987312573e-06, "loss": 0.3168, "step": 200 }, { "epoch": 0.01812390158172232, "grad_norm": 1.0846350193023682, "learning_rate": 1.6641950898006263e-06, "loss": 0.1979, "step": 220 }, { "epoch": 0.01977152899824253, "grad_norm": 1.7905633449554443, "learning_rate": 1.828966880869995e-06, "loss": 0.2656, "step": 240 }, { "epoch": 0.021419156414762743, "grad_norm": 0.31305554509162903, "learning_rate": 1.993738671939364e-06, "loss": 0.272, "step": 260 }, { "epoch": 0.023066783831282953, "grad_norm": 0.31258106231689453, "learning_rate": 2.158510463008733e-06, "loss": 0.2693, "step": 280 }, { "epoch": 0.024714411247803162, "grad_norm": 1.783349871635437, "learning_rate": 2.323282254078102e-06, "loss": 0.3116, "step": 300 }, { "epoch": 0.026362038664323375, "grad_norm": 0.6936488747596741, "learning_rate": 2.488054045147471e-06, "loss": 0.1931, "step": 320 }, { "epoch": 0.028009666080843584, "grad_norm": 0.6185577511787415, "learning_rate": 2.65282583621684e-06, "loss": 0.258, "step": 340 }, { "epoch": 0.029657293497363797, "grad_norm": 0.3592207729816437, "learning_rate": 2.8175976272862085e-06, "loss": 0.2401, "step": 360 }, { "epoch": 0.03130492091388401, "grad_norm": 1.2324920892715454, "learning_rate": 2.982369418355578e-06, "loss": 0.2371, "step": 380 }, { "epoch": 0.032952548330404216, "grad_norm": 1.5560214519500732, "learning_rate": 3.147141209424947e-06, "loss": 0.2414, "step": 400 }, { "epoch": 0.03460017574692443, "grad_norm": 0.8133947253227234, "learning_rate": 3.3119130004943154e-06, "loss": 0.1744, "step": 420 }, { "epoch": 0.03624780316344464, "grad_norm": 0.8599107265472412, "learning_rate": 3.4766847915636844e-06, "loss": 0.2536, "step": 440 }, { "epoch": 0.03789543057996485, "grad_norm": 0.3699595034122467, "learning_rate": 3.641456582633054e-06, "loss": 0.2552, "step": 460 }, { "epoch": 0.03954305799648506, "grad_norm": 1.2955116033554077, "learning_rate": 3.797989784148954e-06, "loss": 0.2532, "step": 480 }, { "epoch": 0.041190685413005274, "grad_norm": 11.518170356750488, "learning_rate": 3.962761575218322e-06, "loss": 0.2507, "step": 500 }, { "epoch": 0.042838312829525486, "grad_norm": 0.9779248833656311, "learning_rate": 4.127533366287692e-06, "loss": 0.1733, "step": 520 }, { "epoch": 0.04448594024604569, "grad_norm": 0.5386682152748108, "learning_rate": 4.29230515735706e-06, "loss": 0.2415, "step": 540 }, { "epoch": 0.046133567662565905, "grad_norm": 2.7366793155670166, "learning_rate": 4.457076948426429e-06, "loss": 0.2409, "step": 560 }, { "epoch": 0.04778119507908612, "grad_norm": 1.9912066459655762, "learning_rate": 4.621848739495799e-06, "loss": 0.2134, "step": 580 }, { "epoch": 0.049428822495606324, "grad_norm": 4.058573246002197, "learning_rate": 4.786620530565167e-06, "loss": 0.2456, "step": 600 }, { "epoch": 0.05107644991212654, "grad_norm": 0.37456750869750977, "learning_rate": 4.951392321634536e-06, "loss": 0.1589, "step": 620 }, { "epoch": 0.05272407732864675, "grad_norm": 0.7950440645217896, "learning_rate": 5.116164112703905e-06, "loss": 0.2485, "step": 640 }, { "epoch": 0.05437170474516696, "grad_norm": 1.884665846824646, "learning_rate": 5.280935903773274e-06, "loss": 0.2197, "step": 660 }, { "epoch": 0.05601933216168717, "grad_norm": 0.32170844078063965, "learning_rate": 5.445707694842643e-06, "loss": 0.2111, "step": 680 }, { "epoch": 0.05766695957820738, "grad_norm": 1.8230172395706177, "learning_rate": 5.610479485912012e-06, "loss": 0.2379, "step": 700 }, { "epoch": 0.059314586994727594, "grad_norm": 1.2472524642944336, "learning_rate": 5.775251276981381e-06, "loss": 0.1684, "step": 720 }, { "epoch": 0.0609622144112478, "grad_norm": 0.29022061824798584, "learning_rate": 5.94002306805075e-06, "loss": 0.2201, "step": 740 }, { "epoch": 0.06260984182776802, "grad_norm": 0.49721184372901917, "learning_rate": 6.104794859120118e-06, "loss": 0.2297, "step": 760 }, { "epoch": 0.06425746924428823, "grad_norm": 0.6133716106414795, "learning_rate": 6.269566650189487e-06, "loss": 0.3173, "step": 780 }, { "epoch": 0.06590509666080843, "grad_norm": 0.9667792320251465, "learning_rate": 6.434338441258857e-06, "loss": 0.2349, "step": 800 }, { "epoch": 0.06755272407732865, "grad_norm": 0.3177216053009033, "learning_rate": 6.599110232328226e-06, "loss": 0.1635, "step": 820 }, { "epoch": 0.06920035149384886, "grad_norm": 0.8457621335983276, "learning_rate": 6.763882023397594e-06, "loss": 0.2777, "step": 840 }, { "epoch": 0.07084797891036906, "grad_norm": 0.3330087661743164, "learning_rate": 6.928653814466963e-06, "loss": 0.2154, "step": 860 }, { "epoch": 0.07249560632688928, "grad_norm": 0.5845814943313599, "learning_rate": 7.093425605536333e-06, "loss": 0.2164, "step": 880 }, { "epoch": 0.07414323374340949, "grad_norm": 2.325303554534912, "learning_rate": 7.258197396605701e-06, "loss": 0.2068, "step": 900 }, { "epoch": 0.0757908611599297, "grad_norm": 0.21893823146820068, "learning_rate": 7.414730598121602e-06, "loss": 0.1621, "step": 920 }, { "epoch": 0.07743848857644992, "grad_norm": 0.5854327082633972, "learning_rate": 7.5795023891909705e-06, "loss": 0.2281, "step": 940 }, { "epoch": 0.07908611599297012, "grad_norm": 0.2406030148267746, "learning_rate": 7.74427418026034e-06, "loss": 0.2342, "step": 960 }, { "epoch": 0.08073374340949033, "grad_norm": 1.3764126300811768, "learning_rate": 7.90904597132971e-06, "loss": 0.2018, "step": 980 }, { "epoch": 0.08238137082601055, "grad_norm": 0.9587862491607666, "learning_rate": 8.073817762399077e-06, "loss": 0.2489, "step": 1000 }, { "epoch": 0.08402899824253075, "grad_norm": 0.9726558923721313, "learning_rate": 8.238589553468447e-06, "loss": 0.154, "step": 1020 }, { "epoch": 0.08567662565905097, "grad_norm": 1.9828400611877441, "learning_rate": 8.403361344537817e-06, "loss": 0.2336, "step": 1040 }, { "epoch": 0.08732425307557118, "grad_norm": 0.14761961996555328, "learning_rate": 8.568133135607183e-06, "loss": 0.1949, "step": 1060 }, { "epoch": 0.08897188049209138, "grad_norm": 0.39077144861221313, "learning_rate": 8.732904926676553e-06, "loss": 0.2067, "step": 1080 }, { "epoch": 0.0906195079086116, "grad_norm": 2.3257837295532227, "learning_rate": 8.897676717745921e-06, "loss": 0.2184, "step": 1100 }, { "epoch": 0.09226713532513181, "grad_norm": 1.0966060161590576, "learning_rate": 9.062448508815291e-06, "loss": 0.1655, "step": 1120 }, { "epoch": 0.09391476274165202, "grad_norm": 0.5648412704467773, "learning_rate": 9.227220299884661e-06, "loss": 0.2097, "step": 1140 }, { "epoch": 0.09556239015817224, "grad_norm": 0.49816444516181946, "learning_rate": 9.39199209095403e-06, "loss": 0.2052, "step": 1160 }, { "epoch": 0.09721001757469244, "grad_norm": 1.1164054870605469, "learning_rate": 9.556763882023397e-06, "loss": 0.2298, "step": 1180 }, { "epoch": 0.09885764499121265, "grad_norm": 0.9453270435333252, "learning_rate": 9.721535673092767e-06, "loss": 0.2157, "step": 1200 }, { "epoch": 0.10050527240773287, "grad_norm": 0.4588276743888855, "learning_rate": 9.886307464162135e-06, "loss": 0.1433, "step": 1220 }, { "epoch": 0.10215289982425307, "grad_norm": 0.47535696625709534, "learning_rate": 1.0051079255231505e-05, "loss": 0.2115, "step": 1240 }, { "epoch": 0.10380052724077328, "grad_norm": 0.585959792137146, "learning_rate": 1.0215851046300875e-05, "loss": 0.2245, "step": 1260 }, { "epoch": 0.1054481546572935, "grad_norm": 0.24861204624176025, "learning_rate": 1.0380622837370241e-05, "loss": 0.1981, "step": 1280 }, { "epoch": 0.1070957820738137, "grad_norm": 31.602128982543945, "learning_rate": 1.0545394628439611e-05, "loss": 0.2265, "step": 1300 }, { "epoch": 0.10874340949033393, "grad_norm": 0.6234269142150879, "learning_rate": 1.0710166419508981e-05, "loss": 0.154, "step": 1320 }, { "epoch": 0.11039103690685413, "grad_norm": 1.2423540353775024, "learning_rate": 1.087493821057835e-05, "loss": 0.2062, "step": 1340 }, { "epoch": 0.11203866432337434, "grad_norm": 0.2090279757976532, "learning_rate": 1.1039710001647719e-05, "loss": 0.2214, "step": 1360 }, { "epoch": 0.11368629173989456, "grad_norm": 0.6145613193511963, "learning_rate": 1.1204481792717087e-05, "loss": 0.1942, "step": 1380 }, { "epoch": 0.11533391915641476, "grad_norm": 0.9004138708114624, "learning_rate": 1.1369253583786455e-05, "loss": 0.2271, "step": 1400 }, { "epoch": 0.11698154657293497, "grad_norm": 1.609165906906128, "learning_rate": 1.1534025374855825e-05, "loss": 0.1606, "step": 1420 }, { "epoch": 0.11862917398945519, "grad_norm": 0.8725568652153015, "learning_rate": 1.1698797165925195e-05, "loss": 0.204, "step": 1440 }, { "epoch": 0.1202768014059754, "grad_norm": 1.8169455528259277, "learning_rate": 1.1863568956994563e-05, "loss": 0.207, "step": 1460 }, { "epoch": 0.1219244288224956, "grad_norm": 0.37334388494491577, "learning_rate": 1.2028340748063933e-05, "loss": 0.2033, "step": 1480 }, { "epoch": 0.12357205623901582, "grad_norm": 15.727474212646484, "learning_rate": 1.2193112539133301e-05, "loss": 0.3298, "step": 1500 }, { "epoch": 0.12521968365553604, "grad_norm": 4.70903205871582, "learning_rate": 1.2357884330202669e-05, "loss": 0.159, "step": 1520 }, { "epoch": 0.12686731107205623, "grad_norm": 0.4843326807022095, "learning_rate": 1.2522656121272039e-05, "loss": 0.2141, "step": 1540 }, { "epoch": 0.12851493848857645, "grad_norm": 0.572084367275238, "learning_rate": 1.2687427912341407e-05, "loss": 0.2182, "step": 1560 }, { "epoch": 0.13016256590509667, "grad_norm": 0.31078797578811646, "learning_rate": 1.2852199703410777e-05, "loss": 0.1994, "step": 1580 }, { "epoch": 0.13181019332161686, "grad_norm": 9.014205932617188, "learning_rate": 1.3016971494480145e-05, "loss": 0.2089, "step": 1600 }, { "epoch": 0.13345782073813708, "grad_norm": 1.0150245428085327, "learning_rate": 1.3181743285549513e-05, "loss": 0.1521, "step": 1620 }, { "epoch": 0.1351054481546573, "grad_norm": 0.25271451473236084, "learning_rate": 1.3346515076618885e-05, "loss": 0.1996, "step": 1640 }, { "epoch": 0.1367530755711775, "grad_norm": 0.47118502855300903, "learning_rate": 1.3511286867688253e-05, "loss": 0.2059, "step": 1660 }, { "epoch": 0.13840070298769772, "grad_norm": 0.5134350657463074, "learning_rate": 1.3676058658757621e-05, "loss": 0.1935, "step": 1680 }, { "epoch": 0.14004833040421794, "grad_norm": 1.0354816913604736, "learning_rate": 1.384083044982699e-05, "loss": 0.2103, "step": 1700 }, { "epoch": 0.14169595782073813, "grad_norm": 0.5588876605033875, "learning_rate": 1.4005602240896359e-05, "loss": 0.1598, "step": 1720 }, { "epoch": 0.14334358523725835, "grad_norm": 0.7309175133705139, "learning_rate": 1.4170374031965727e-05, "loss": 0.2204, "step": 1740 }, { "epoch": 0.14499121265377857, "grad_norm": 0.6155902743339539, "learning_rate": 1.4335145823035099e-05, "loss": 0.2133, "step": 1760 }, { "epoch": 0.14663884007029876, "grad_norm": 0.7660940885543823, "learning_rate": 1.4499917614104467e-05, "loss": 0.2065, "step": 1780 }, { "epoch": 0.14828646748681898, "grad_norm": 1.1954026222229004, "learning_rate": 1.4664689405173835e-05, "loss": 0.2147, "step": 1800 }, { "epoch": 0.1499340949033392, "grad_norm": 0.4249323606491089, "learning_rate": 1.4829461196243205e-05, "loss": 0.1553, "step": 1820 }, { "epoch": 0.1515817223198594, "grad_norm": 2.9014129638671875, "learning_rate": 1.4994232987312573e-05, "loss": 0.2208, "step": 1840 }, { "epoch": 0.1532293497363796, "grad_norm": 1.6474498510360718, "learning_rate": 1.5159004778381941e-05, "loss": 0.209, "step": 1860 }, { "epoch": 0.15487697715289983, "grad_norm": 0.1585623174905777, "learning_rate": 1.532377656945131e-05, "loss": 0.1873, "step": 1880 }, { "epoch": 0.15652460456942002, "grad_norm": 1.171941876411438, "learning_rate": 1.548854836052068e-05, "loss": 0.2389, "step": 1900 }, { "epoch": 0.15817223198594024, "grad_norm": 0.48890382051467896, "learning_rate": 1.5653320151590047e-05, "loss": 0.1679, "step": 1920 }, { "epoch": 0.15981985940246046, "grad_norm": 0.5568016767501831, "learning_rate": 1.581809194265942e-05, "loss": 0.1968, "step": 1940 }, { "epoch": 0.16146748681898065, "grad_norm": 0.9775394797325134, "learning_rate": 1.5982863733728787e-05, "loss": 0.2208, "step": 1960 }, { "epoch": 0.16311511423550087, "grad_norm": 0.60302734375, "learning_rate": 1.6147635524798155e-05, "loss": 0.1929, "step": 1980 }, { "epoch": 0.1647627416520211, "grad_norm": 1.7513552904129028, "learning_rate": 1.6312407315867526e-05, "loss": 0.2055, "step": 2000 }, { "epoch": 0.1647627416520211, "eval_loss": 0.5699400305747986, "eval_runtime": 686.8117, "eval_samples_per_second": 31.725, "eval_steps_per_second": 7.932, "eval_wer": 0.23469093535410654, "step": 2000 }, { "epoch": 0.16641036906854131, "grad_norm": 1.049734354019165, "learning_rate": 1.6477179106936894e-05, "loss": 0.1608, "step": 2020 }, { "epoch": 0.1680579964850615, "grad_norm": 1.7113618850708008, "learning_rate": 1.6641950898006263e-05, "loss": 0.2008, "step": 2040 }, { "epoch": 0.16970562390158173, "grad_norm": 0.3202134370803833, "learning_rate": 1.6806722689075634e-05, "loss": 0.1973, "step": 2060 }, { "epoch": 0.17135325131810195, "grad_norm": 0.3612610697746277, "learning_rate": 1.6971494480145e-05, "loss": 0.1732, "step": 2080 }, { "epoch": 0.17300087873462214, "grad_norm": 1.8115849494934082, "learning_rate": 1.7136266271214367e-05, "loss": 0.2138, "step": 2100 }, { "epoch": 0.17464850615114236, "grad_norm": 0.7046949863433838, "learning_rate": 1.7301038062283735e-05, "loss": 0.1571, "step": 2120 }, { "epoch": 0.17629613356766258, "grad_norm": 0.5983096957206726, "learning_rate": 1.7465809853353107e-05, "loss": 0.2092, "step": 2140 }, { "epoch": 0.17794376098418277, "grad_norm": 0.17064958810806274, "learning_rate": 1.7630581644422475e-05, "loss": 0.2083, "step": 2160 }, { "epoch": 0.179591388400703, "grad_norm": 1.422013759613037, "learning_rate": 1.7795353435491843e-05, "loss": 0.2087, "step": 2180 }, { "epoch": 0.1812390158172232, "grad_norm": 0.982097864151001, "learning_rate": 1.7960125226561214e-05, "loss": 0.2161, "step": 2200 }, { "epoch": 0.1828866432337434, "grad_norm": 0.2690947949886322, "learning_rate": 1.8124897017630583e-05, "loss": 0.1693, "step": 2220 }, { "epoch": 0.18453427065026362, "grad_norm": 0.24867244064807892, "learning_rate": 1.828966880869995e-05, "loss": 0.2058, "step": 2240 }, { "epoch": 0.18618189806678384, "grad_norm": 0.9435555934906006, "learning_rate": 1.8454440599769322e-05, "loss": 0.1991, "step": 2260 }, { "epoch": 0.18782952548330403, "grad_norm": 0.23964335024356842, "learning_rate": 1.861921239083869e-05, "loss": 0.1932, "step": 2280 }, { "epoch": 0.18947715289982425, "grad_norm": 2.1007418632507324, "learning_rate": 1.878398418190806e-05, "loss": 0.2075, "step": 2300 }, { "epoch": 0.19112478031634447, "grad_norm": 0.31368857622146606, "learning_rate": 1.8948755972977427e-05, "loss": 0.1557, "step": 2320 }, { "epoch": 0.19277240773286466, "grad_norm": 0.4029647409915924, "learning_rate": 1.9113527764046795e-05, "loss": 0.1952, "step": 2340 }, { "epoch": 0.19442003514938488, "grad_norm": 0.38545289635658264, "learning_rate": 1.9278299555116163e-05, "loss": 0.1998, "step": 2360 }, { "epoch": 0.1960676625659051, "grad_norm": 0.44573166966438293, "learning_rate": 1.9443071346185534e-05, "loss": 0.2078, "step": 2380 }, { "epoch": 0.1977152899824253, "grad_norm": 9.132265090942383, "learning_rate": 1.9607843137254903e-05, "loss": 0.2175, "step": 2400 }, { "epoch": 0.19936291739894552, "grad_norm": 0.42929643392562866, "learning_rate": 1.977261492832427e-05, "loss": 0.1762, "step": 2420 }, { "epoch": 0.20101054481546574, "grad_norm": 0.6267173886299133, "learning_rate": 1.9937386719393642e-05, "loss": 0.2101, "step": 2440 }, { "epoch": 0.20265817223198593, "grad_norm": 9.70997142791748, "learning_rate": 2.010215851046301e-05, "loss": 0.2101, "step": 2460 }, { "epoch": 0.20430579964850615, "grad_norm": 0.47748956084251404, "learning_rate": 2.026693030153238e-05, "loss": 0.2039, "step": 2480 }, { "epoch": 0.20595342706502637, "grad_norm": 1.3222582340240479, "learning_rate": 2.043170209260175e-05, "loss": 0.213, "step": 2500 }, { "epoch": 0.20760105448154656, "grad_norm": 0.4152863919734955, "learning_rate": 2.0596473883671115e-05, "loss": 0.148, "step": 2520 }, { "epoch": 0.20924868189806678, "grad_norm": 0.7384160757064819, "learning_rate": 2.0761245674740483e-05, "loss": 0.2138, "step": 2540 }, { "epoch": 0.210896309314587, "grad_norm": 0.27651092410087585, "learning_rate": 2.0926017465809854e-05, "loss": 0.2046, "step": 2560 }, { "epoch": 0.21254393673110722, "grad_norm": 0.226897194981575, "learning_rate": 2.1090789256879222e-05, "loss": 0.1904, "step": 2580 }, { "epoch": 0.2141915641476274, "grad_norm": 1.2391464710235596, "learning_rate": 2.125556104794859e-05, "loss": 0.204, "step": 2600 }, { "epoch": 0.21583919156414763, "grad_norm": 1.6048617362976074, "learning_rate": 2.1420332839017962e-05, "loss": 0.1548, "step": 2620 }, { "epoch": 0.21748681898066785, "grad_norm": 0.28409889340400696, "learning_rate": 2.158510463008733e-05, "loss": 0.201, "step": 2640 }, { "epoch": 0.21913444639718804, "grad_norm": 0.354885995388031, "learning_rate": 2.17498764211567e-05, "loss": 0.2083, "step": 2660 }, { "epoch": 0.22078207381370826, "grad_norm": 0.2778099775314331, "learning_rate": 2.191464821222607e-05, "loss": 0.1891, "step": 2680 }, { "epoch": 0.22242970123022848, "grad_norm": 1.007686734199524, "learning_rate": 2.2079420003295438e-05, "loss": 0.2152, "step": 2700 }, { "epoch": 0.22407732864674867, "grad_norm": 0.9725649952888489, "learning_rate": 2.2244191794364806e-05, "loss": 0.1581, "step": 2720 }, { "epoch": 0.2257249560632689, "grad_norm": 0.2451123297214508, "learning_rate": 2.2408963585434174e-05, "loss": 0.2019, "step": 2740 }, { "epoch": 0.22737258347978911, "grad_norm": 0.3667006194591522, "learning_rate": 2.2573735376503542e-05, "loss": 0.2083, "step": 2760 }, { "epoch": 0.2290202108963093, "grad_norm": 3.1283884048461914, "learning_rate": 2.273850716757291e-05, "loss": 0.194, "step": 2780 }, { "epoch": 0.23066783831282953, "grad_norm": 0.7148507237434387, "learning_rate": 2.2903278958642282e-05, "loss": 0.1931, "step": 2800 }, { "epoch": 0.23231546572934975, "grad_norm": 0.5805519223213196, "learning_rate": 2.306805074971165e-05, "loss": 0.1552, "step": 2820 }, { "epoch": 0.23396309314586994, "grad_norm": 0.8168196082115173, "learning_rate": 2.323282254078102e-05, "loss": 0.2107, "step": 2840 }, { "epoch": 0.23561072056239016, "grad_norm": 0.17171867191791534, "learning_rate": 2.339759433185039e-05, "loss": 0.2195, "step": 2860 }, { "epoch": 0.23725834797891038, "grad_norm": 0.6692082285881042, "learning_rate": 2.3562366122919758e-05, "loss": 0.1975, "step": 2880 }, { "epoch": 0.23890597539543057, "grad_norm": 1.5185160636901855, "learning_rate": 2.3727137913989126e-05, "loss": 0.2086, "step": 2900 }, { "epoch": 0.2405536028119508, "grad_norm": 0.8978987336158752, "learning_rate": 2.3891909705058498e-05, "loss": 0.1537, "step": 2920 }, { "epoch": 0.242201230228471, "grad_norm": 1.1462221145629883, "learning_rate": 2.4056681496127866e-05, "loss": 0.215, "step": 2940 }, { "epoch": 0.2438488576449912, "grad_norm": 0.2455727905035019, "learning_rate": 2.422145328719723e-05, "loss": 0.2137, "step": 2960 }, { "epoch": 0.24549648506151142, "grad_norm": 0.21464231610298157, "learning_rate": 2.4386225078266602e-05, "loss": 0.203, "step": 2980 }, { "epoch": 0.24714411247803164, "grad_norm": 0.8478316068649292, "learning_rate": 2.455099686933597e-05, "loss": 0.2085, "step": 3000 }, { "epoch": 0.24879173989455183, "grad_norm": 0.37225690484046936, "learning_rate": 2.4715768660405338e-05, "loss": 0.1645, "step": 3020 }, { "epoch": 0.2504393673110721, "grad_norm": 1.3999593257904053, "learning_rate": 2.488054045147471e-05, "loss": 0.2129, "step": 3040 }, { "epoch": 0.2520869947275923, "grad_norm": 2.0909199714660645, "learning_rate": 2.5045312242544078e-05, "loss": 0.1926, "step": 3060 }, { "epoch": 0.25373462214411246, "grad_norm": 0.19655053317546844, "learning_rate": 2.5210084033613446e-05, "loss": 0.2007, "step": 3080 }, { "epoch": 0.2553822495606327, "grad_norm": 1.2680870294570923, "learning_rate": 2.5374855824682814e-05, "loss": 0.2107, "step": 3100 }, { "epoch": 0.2570298769771529, "grad_norm": 0.19821316003799438, "learning_rate": 2.5539627615752182e-05, "loss": 0.1511, "step": 3120 }, { "epoch": 0.2586775043936731, "grad_norm": 0.22427937388420105, "learning_rate": 2.5704399406821554e-05, "loss": 0.2139, "step": 3140 }, { "epoch": 0.26032513181019334, "grad_norm": 0.4531656503677368, "learning_rate": 2.5869171197890922e-05, "loss": 0.1995, "step": 3160 }, { "epoch": 0.26197275922671354, "grad_norm": 0.3967747986316681, "learning_rate": 2.603394298896029e-05, "loss": 0.1981, "step": 3180 }, { "epoch": 0.26362038664323373, "grad_norm": 1.0957462787628174, "learning_rate": 2.6198714780029658e-05, "loss": 0.2092, "step": 3200 }, { "epoch": 0.265268014059754, "grad_norm": 0.5567193627357483, "learning_rate": 2.6363486571099026e-05, "loss": 0.1642, "step": 3220 }, { "epoch": 0.26691564147627417, "grad_norm": 0.3523741066455841, "learning_rate": 2.6528258362168395e-05, "loss": 0.2059, "step": 3240 }, { "epoch": 0.26856326889279436, "grad_norm": 0.40257710218429565, "learning_rate": 2.669303015323777e-05, "loss": 0.195, "step": 3260 }, { "epoch": 0.2702108963093146, "grad_norm": 0.3187640309333801, "learning_rate": 2.6849563354753665e-05, "loss": 0.1827, "step": 3280 }, { "epoch": 0.2718585237258348, "grad_norm": 0.7375414967536926, "learning_rate": 2.701433514582304e-05, "loss": 0.2247, "step": 3300 }, { "epoch": 0.273506151142355, "grad_norm": 0.45597076416015625, "learning_rate": 2.7179106936892408e-05, "loss": 0.1654, "step": 3320 }, { "epoch": 0.27515377855887524, "grad_norm": 0.21507132053375244, "learning_rate": 2.7343878727961776e-05, "loss": 0.1858, "step": 3340 }, { "epoch": 0.27680140597539543, "grad_norm": 0.7203060388565063, "learning_rate": 2.7508650519031144e-05, "loss": 0.1908, "step": 3360 }, { "epoch": 0.2784490333919156, "grad_norm": 0.8007901906967163, "learning_rate": 2.7673422310100512e-05, "loss": 0.1793, "step": 3380 }, { "epoch": 0.28009666080843587, "grad_norm": 3.210064649581909, "learning_rate": 2.783819410116988e-05, "loss": 0.206, "step": 3400 }, { "epoch": 0.28174428822495606, "grad_norm": 0.8950255513191223, "learning_rate": 2.8002965892239252e-05, "loss": 0.1531, "step": 3420 }, { "epoch": 0.28339191564147626, "grad_norm": 0.4942973256111145, "learning_rate": 2.816773768330862e-05, "loss": 0.1916, "step": 3440 }, { "epoch": 0.2850395430579965, "grad_norm": 0.31426137685775757, "learning_rate": 2.8332509474377988e-05, "loss": 0.2013, "step": 3460 }, { "epoch": 0.2866871704745167, "grad_norm": 0.47154414653778076, "learning_rate": 2.8497281265447356e-05, "loss": 0.1831, "step": 3480 }, { "epoch": 0.2883347978910369, "grad_norm": 0.8456603288650513, "learning_rate": 2.8662053056516724e-05, "loss": 0.2043, "step": 3500 }, { "epoch": 0.28998242530755713, "grad_norm": 0.29031482338905334, "learning_rate": 2.8826824847586092e-05, "loss": 0.1607, "step": 3520 }, { "epoch": 0.2916300527240773, "grad_norm": 0.3170378804206848, "learning_rate": 2.8991596638655467e-05, "loss": 0.1916, "step": 3540 }, { "epoch": 0.2932776801405975, "grad_norm": 0.3800877332687378, "learning_rate": 2.9156368429724836e-05, "loss": 0.2051, "step": 3560 }, { "epoch": 0.29492530755711777, "grad_norm": 0.5847609639167786, "learning_rate": 2.9321140220794204e-05, "loss": 0.196, "step": 3580 }, { "epoch": 0.29657293497363796, "grad_norm": 1.0933667421340942, "learning_rate": 2.9485912011863572e-05, "loss": 0.2154, "step": 3600 }, { "epoch": 0.29822056239015815, "grad_norm": 4.349573135375977, "learning_rate": 2.9650683802932937e-05, "loss": 0.1606, "step": 3620 }, { "epoch": 0.2998681898066784, "grad_norm": 0.4264489710330963, "learning_rate": 2.9815455594002305e-05, "loss": 0.2117, "step": 3640 }, { "epoch": 0.3015158172231986, "grad_norm": 0.47935691475868225, "learning_rate": 2.998022738507168e-05, "loss": 0.1901, "step": 3660 }, { "epoch": 0.3031634446397188, "grad_norm": 0.7258153557777405, "learning_rate": 3.0144999176141048e-05, "loss": 0.189, "step": 3680 }, { "epoch": 0.30481107205623903, "grad_norm": 2.0093941688537598, "learning_rate": 3.0309770967210416e-05, "loss": 0.2104, "step": 3700 }, { "epoch": 0.3064586994727592, "grad_norm": 1.1718699932098389, "learning_rate": 3.0474542758279784e-05, "loss": 0.1577, "step": 3720 }, { "epoch": 0.3081063268892794, "grad_norm": 0.19388867914676666, "learning_rate": 3.063931454934915e-05, "loss": 0.2011, "step": 3740 }, { "epoch": 0.30975395430579966, "grad_norm": 0.2112320065498352, "learning_rate": 3.080408634041852e-05, "loss": 0.2077, "step": 3760 }, { "epoch": 0.31140158172231985, "grad_norm": 1.9554697275161743, "learning_rate": 3.096885813148789e-05, "loss": 0.1861, "step": 3780 }, { "epoch": 0.31304920913884005, "grad_norm": 0.7065563201904297, "learning_rate": 3.113362992255726e-05, "loss": 0.2071, "step": 3800 }, { "epoch": 0.3146968365553603, "grad_norm": 0.4599238634109497, "learning_rate": 3.129840171362663e-05, "loss": 0.1655, "step": 3820 }, { "epoch": 0.3163444639718805, "grad_norm": 0.5441445708274841, "learning_rate": 3.1463173504695996e-05, "loss": 0.1971, "step": 3840 }, { "epoch": 0.3179920913884007, "grad_norm": 1.1993862390518188, "learning_rate": 3.1627945295765364e-05, "loss": 0.2048, "step": 3860 }, { "epoch": 0.3196397188049209, "grad_norm": 0.3095191717147827, "learning_rate": 3.179271708683473e-05, "loss": 0.2009, "step": 3880 }, { "epoch": 0.3212873462214411, "grad_norm": 1.0743999481201172, "learning_rate": 3.195748887790411e-05, "loss": 0.2371, "step": 3900 }, { "epoch": 0.3229349736379613, "grad_norm": 0.5000220537185669, "learning_rate": 3.2122260668973475e-05, "loss": 0.1722, "step": 3920 }, { "epoch": 0.32458260105448156, "grad_norm": 1.1417018175125122, "learning_rate": 3.2287032460042844e-05, "loss": 0.2079, "step": 3940 }, { "epoch": 0.32623022847100175, "grad_norm": 1.099433422088623, "learning_rate": 3.245180425111221e-05, "loss": 0.2009, "step": 3960 }, { "epoch": 0.32787785588752194, "grad_norm": 0.3827146589756012, "learning_rate": 3.261657604218158e-05, "loss": 0.2171, "step": 3980 }, { "epoch": 0.3295254833040422, "grad_norm": 1.1845418214797974, "learning_rate": 3.278134783325095e-05, "loss": 0.2195, "step": 4000 }, { "epoch": 0.3295254833040422, "eval_loss": 0.5868579149246216, "eval_runtime": 260.92, "eval_samples_per_second": 83.508, "eval_steps_per_second": 20.88, "eval_wer": 0.2350240606008012, "step": 4000 }, { "epoch": 0.3311731107205624, "grad_norm": 1.6073412895202637, "learning_rate": 3.2946119624320316e-05, "loss": 0.1695, "step": 4020 }, { "epoch": 0.33282073813708263, "grad_norm": 2.9597036838531494, "learning_rate": 3.311089141538969e-05, "loss": 0.2484, "step": 4040 }, { "epoch": 0.3344683655536028, "grad_norm": 1.4150543212890625, "learning_rate": 3.327566320645906e-05, "loss": 0.2087, "step": 4060 }, { "epoch": 0.336115992970123, "grad_norm": 0.255397230386734, "learning_rate": 3.344043499752843e-05, "loss": 0.2048, "step": 4080 }, { "epoch": 0.33776362038664326, "grad_norm": 1.9232691526412964, "learning_rate": 3.3605206788597795e-05, "loss": 0.2274, "step": 4100 }, { "epoch": 0.33941124780316345, "grad_norm": 1.0172194242477417, "learning_rate": 3.3769978579667164e-05, "loss": 0.1779, "step": 4120 }, { "epoch": 0.34105887521968364, "grad_norm": 1.3862395286560059, "learning_rate": 3.393475037073653e-05, "loss": 0.2095, "step": 4140 }, { "epoch": 0.3427065026362039, "grad_norm": 0.3353387117385864, "learning_rate": 3.40995221618059e-05, "loss": 0.2021, "step": 4160 }, { "epoch": 0.3443541300527241, "grad_norm": 0.9549083709716797, "learning_rate": 3.426429395287527e-05, "loss": 0.1997, "step": 4180 }, { "epoch": 0.3460017574692443, "grad_norm": 1.6077580451965332, "learning_rate": 3.4429065743944636e-05, "loss": 0.2562, "step": 4200 }, { "epoch": 0.3476493848857645, "grad_norm": 5.387716770172119, "learning_rate": 3.4593837535014004e-05, "loss": 0.1726, "step": 4220 }, { "epoch": 0.3492970123022847, "grad_norm": 0.5455642342567444, "learning_rate": 3.475860932608337e-05, "loss": 0.2342, "step": 4240 }, { "epoch": 0.3509446397188049, "grad_norm": 0.18990729749202728, "learning_rate": 3.492338111715274e-05, "loss": 0.2334, "step": 4260 }, { "epoch": 0.35259226713532515, "grad_norm": 0.4878564476966858, "learning_rate": 3.5088152908222115e-05, "loss": 0.2065, "step": 4280 }, { "epoch": 0.35423989455184535, "grad_norm": 1.3400063514709473, "learning_rate": 3.5252924699291483e-05, "loss": 0.237, "step": 4300 }, { "epoch": 0.35588752196836554, "grad_norm": 0.5822551250457764, "learning_rate": 3.541769649036085e-05, "loss": 0.1787, "step": 4320 }, { "epoch": 0.3575351493848858, "grad_norm": 0.4629223644733429, "learning_rate": 3.558246828143022e-05, "loss": 0.2129, "step": 4340 }, { "epoch": 0.359182776801406, "grad_norm": 1.4195072650909424, "learning_rate": 3.574724007249959e-05, "loss": 0.2215, "step": 4360 }, { "epoch": 0.36083040421792617, "grad_norm": 0.3443647623062134, "learning_rate": 3.5912011863568956e-05, "loss": 0.2002, "step": 4380 }, { "epoch": 0.3624780316344464, "grad_norm": 3.2932443618774414, "learning_rate": 3.607678365463833e-05, "loss": 0.2172, "step": 4400 }, { "epoch": 0.3641256590509666, "grad_norm": 0.4463866055011749, "learning_rate": 3.62415554457077e-05, "loss": 0.1889, "step": 4420 }, { "epoch": 0.3657732864674868, "grad_norm": 0.4873151183128357, "learning_rate": 3.640632723677707e-05, "loss": 0.2139, "step": 4440 }, { "epoch": 0.36742091388400705, "grad_norm": 1.6354761123657227, "learning_rate": 3.6571099027846435e-05, "loss": 0.1986, "step": 4460 }, { "epoch": 0.36906854130052724, "grad_norm": 0.5571808815002441, "learning_rate": 3.6735870818915803e-05, "loss": 0.2047, "step": 4480 }, { "epoch": 0.37071616871704743, "grad_norm": 0.7461993098258972, "learning_rate": 3.690064260998517e-05, "loss": 0.2196, "step": 4500 }, { "epoch": 0.3723637961335677, "grad_norm": 0.2061534970998764, "learning_rate": 3.7065414401054546e-05, "loss": 0.1657, "step": 4520 }, { "epoch": 0.3740114235500879, "grad_norm": 1.0977954864501953, "learning_rate": 3.7230186192123915e-05, "loss": 0.2062, "step": 4540 }, { "epoch": 0.37565905096660807, "grad_norm": 2.38232684135437, "learning_rate": 3.739495798319328e-05, "loss": 0.2319, "step": 4560 }, { "epoch": 0.3773066783831283, "grad_norm": 0.6182531118392944, "learning_rate": 3.755972977426265e-05, "loss": 0.202, "step": 4580 }, { "epoch": 0.3789543057996485, "grad_norm": 28.744009017944336, "learning_rate": 3.772450156533201e-05, "loss": 0.2235, "step": 4600 }, { "epoch": 0.3806019332161687, "grad_norm": 0.5899057984352112, "learning_rate": 3.788927335640138e-05, "loss": 0.1577, "step": 4620 }, { "epoch": 0.38224956063268895, "grad_norm": 0.5423290133476257, "learning_rate": 3.8054045147470755e-05, "loss": 0.2098, "step": 4640 }, { "epoch": 0.38389718804920914, "grad_norm": 0.23139849305152893, "learning_rate": 3.8218816938540123e-05, "loss": 0.1989, "step": 4660 }, { "epoch": 0.38554481546572933, "grad_norm": 0.3539600670337677, "learning_rate": 3.838358872960949e-05, "loss": 0.1968, "step": 4680 }, { "epoch": 0.3871924428822496, "grad_norm": 0.7127693295478821, "learning_rate": 3.854836052067886e-05, "loss": 0.2225, "step": 4700 }, { "epoch": 0.38884007029876977, "grad_norm": 0.3489459753036499, "learning_rate": 3.871313231174823e-05, "loss": 0.1606, "step": 4720 }, { "epoch": 0.39048769771528996, "grad_norm": 0.27798184752464294, "learning_rate": 3.886966551326413e-05, "loss": 0.2096, "step": 4740 }, { "epoch": 0.3921353251318102, "grad_norm": 0.4229481816291809, "learning_rate": 3.90344373043335e-05, "loss": 0.2195, "step": 4760 }, { "epoch": 0.3937829525483304, "grad_norm": 0.25523656606674194, "learning_rate": 3.9199209095402866e-05, "loss": 0.2106, "step": 4780 }, { "epoch": 0.3954305799648506, "grad_norm": 0.9676795601844788, "learning_rate": 3.936398088647224e-05, "loss": 0.2267, "step": 4800 }, { "epoch": 0.39707820738137084, "grad_norm": 0.40875479578971863, "learning_rate": 3.952875267754161e-05, "loss": 0.155, "step": 4820 }, { "epoch": 0.39872583479789103, "grad_norm": 0.335033655166626, "learning_rate": 3.969352446861098e-05, "loss": 0.2081, "step": 4840 }, { "epoch": 0.4003734622144112, "grad_norm": 0.3303823173046112, "learning_rate": 3.9858296259680345e-05, "loss": 0.2282, "step": 4860 }, { "epoch": 0.40202108963093147, "grad_norm": 0.20934663712978363, "learning_rate": 4.0023068050749714e-05, "loss": 0.1965, "step": 4880 }, { "epoch": 0.40366871704745166, "grad_norm": 1.4093564748764038, "learning_rate": 4.018783984181908e-05, "loss": 0.22, "step": 4900 }, { "epoch": 0.40531634446397186, "grad_norm": 0.4276560842990875, "learning_rate": 4.035261163288846e-05, "loss": 0.1705, "step": 4920 }, { "epoch": 0.4069639718804921, "grad_norm": 0.4243983030319214, "learning_rate": 4.0517383423957825e-05, "loss": 0.2285, "step": 4940 }, { "epoch": 0.4086115992970123, "grad_norm": 0.16951896250247955, "learning_rate": 4.068215521502719e-05, "loss": 0.2234, "step": 4960 }, { "epoch": 0.4102592267135325, "grad_norm": 0.30336254835128784, "learning_rate": 4.084692700609656e-05, "loss": 0.2152, "step": 4980 }, { "epoch": 0.41190685413005274, "grad_norm": 1.0761586427688599, "learning_rate": 4.101169879716593e-05, "loss": 0.2093, "step": 5000 }, { "epoch": 0.4135544815465729, "grad_norm": 0.7221740484237671, "learning_rate": 4.11764705882353e-05, "loss": 0.1538, "step": 5020 }, { "epoch": 0.4152021089630931, "grad_norm": 0.4801746904850006, "learning_rate": 4.1341242379304665e-05, "loss": 0.1911, "step": 5040 }, { "epoch": 0.41684973637961337, "grad_norm": 0.311234712600708, "learning_rate": 4.1506014170374034e-05, "loss": 0.2015, "step": 5060 }, { "epoch": 0.41849736379613356, "grad_norm": 0.6403760313987732, "learning_rate": 4.16707859614434e-05, "loss": 0.2014, "step": 5080 }, { "epoch": 0.42014499121265375, "grad_norm": 1.2653217315673828, "learning_rate": 4.183555775251277e-05, "loss": 0.2388, "step": 5100 }, { "epoch": 0.421792618629174, "grad_norm": 0.5536401867866516, "learning_rate": 4.200032954358214e-05, "loss": 0.1553, "step": 5120 }, { "epoch": 0.4234402460456942, "grad_norm": 0.4605076014995575, "learning_rate": 4.2165101334651506e-05, "loss": 0.2105, "step": 5140 }, { "epoch": 0.42508787346221444, "grad_norm": 0.28758004307746887, "learning_rate": 4.232987312572088e-05, "loss": 0.2067, "step": 5160 }, { "epoch": 0.42673550087873463, "grad_norm": 0.36655622720718384, "learning_rate": 4.249464491679025e-05, "loss": 0.1976, "step": 5180 }, { "epoch": 0.4283831282952548, "grad_norm": 0.9053062796592712, "learning_rate": 4.265941670785962e-05, "loss": 0.2102, "step": 5200 }, { "epoch": 0.43003075571177507, "grad_norm": 0.5088081359863281, "learning_rate": 4.2824188498928985e-05, "loss": 0.1639, "step": 5220 }, { "epoch": 0.43167838312829526, "grad_norm": 0.7981218695640564, "learning_rate": 4.2988960289998354e-05, "loss": 0.2024, "step": 5240 }, { "epoch": 0.43332601054481545, "grad_norm": 0.7993011474609375, "learning_rate": 4.315373208106772e-05, "loss": 0.204, "step": 5260 }, { "epoch": 0.4349736379613357, "grad_norm": 0.5183406472206116, "learning_rate": 4.331850387213709e-05, "loss": 0.2041, "step": 5280 }, { "epoch": 0.4366212653778559, "grad_norm": 1.2263312339782715, "learning_rate": 4.3483275663206465e-05, "loss": 0.2182, "step": 5300 }, { "epoch": 0.4382688927943761, "grad_norm": 0.7018775343894958, "learning_rate": 4.364804745427583e-05, "loss": 0.1685, "step": 5320 }, { "epoch": 0.43991652021089633, "grad_norm": 0.7381752133369446, "learning_rate": 4.38128192453452e-05, "loss": 0.2073, "step": 5340 }, { "epoch": 0.4415641476274165, "grad_norm": 0.4658122956752777, "learning_rate": 4.397759103641457e-05, "loss": 0.2031, "step": 5360 }, { "epoch": 0.4432117750439367, "grad_norm": 0.34789761900901794, "learning_rate": 4.414236282748394e-05, "loss": 0.2023, "step": 5380 }, { "epoch": 0.44485940246045697, "grad_norm": 0.9787063598632812, "learning_rate": 4.4307134618553305e-05, "loss": 0.2264, "step": 5400 }, { "epoch": 0.44650702987697716, "grad_norm": 0.3786025047302246, "learning_rate": 4.4471906409622673e-05, "loss": 0.1656, "step": 5420 }, { "epoch": 0.44815465729349735, "grad_norm": 0.4397692084312439, "learning_rate": 4.463667820069204e-05, "loss": 0.2023, "step": 5440 }, { "epoch": 0.4498022847100176, "grad_norm": 0.20323756337165833, "learning_rate": 4.480144999176141e-05, "loss": 0.2146, "step": 5460 }, { "epoch": 0.4514499121265378, "grad_norm": 0.2108180820941925, "learning_rate": 4.496622178283078e-05, "loss": 0.2012, "step": 5480 }, { "epoch": 0.453097539543058, "grad_norm": 1.6603738069534302, "learning_rate": 4.5130993573900146e-05, "loss": 0.2257, "step": 5500 }, { "epoch": 0.45474516695957823, "grad_norm": 1.6646907329559326, "learning_rate": 4.5295765364969514e-05, "loss": 0.1636, "step": 5520 }, { "epoch": 0.4563927943760984, "grad_norm": 0.98222416639328, "learning_rate": 4.546053715603889e-05, "loss": 0.2018, "step": 5540 }, { "epoch": 0.4580404217926186, "grad_norm": 0.6065666079521179, "learning_rate": 4.562530894710826e-05, "loss": 0.2351, "step": 5560 }, { "epoch": 0.45968804920913886, "grad_norm": 0.4321737587451935, "learning_rate": 4.5790080738177625e-05, "loss": 0.2139, "step": 5580 }, { "epoch": 0.46133567662565905, "grad_norm": 2.530203342437744, "learning_rate": 4.5954852529246993e-05, "loss": 0.232, "step": 5600 }, { "epoch": 0.46298330404217924, "grad_norm": 0.8252795934677124, "learning_rate": 4.611962432031636e-05, "loss": 0.1792, "step": 5620 }, { "epoch": 0.4646309314586995, "grad_norm": 4.3282880783081055, "learning_rate": 4.628439611138573e-05, "loss": 0.2079, "step": 5640 }, { "epoch": 0.4662785588752197, "grad_norm": 0.2798108756542206, "learning_rate": 4.6449167902455105e-05, "loss": 0.2114, "step": 5660 }, { "epoch": 0.4679261862917399, "grad_norm": 0.1499057412147522, "learning_rate": 4.661393969352447e-05, "loss": 0.2026, "step": 5680 }, { "epoch": 0.4695738137082601, "grad_norm": 0.8664823770523071, "learning_rate": 4.677871148459384e-05, "loss": 0.2208, "step": 5700 }, { "epoch": 0.4712214411247803, "grad_norm": 0.32247450947761536, "learning_rate": 4.694348327566321e-05, "loss": 0.175, "step": 5720 }, { "epoch": 0.4728690685413005, "grad_norm": 0.4217327833175659, "learning_rate": 4.710825506673258e-05, "loss": 0.2207, "step": 5740 }, { "epoch": 0.47451669595782076, "grad_norm": 0.9544996023178101, "learning_rate": 4.7273026857801945e-05, "loss": 0.2269, "step": 5760 }, { "epoch": 0.47616432337434095, "grad_norm": 0.3182899057865143, "learning_rate": 4.743779864887132e-05, "loss": 0.224, "step": 5780 }, { "epoch": 0.47781195079086114, "grad_norm": 0.669552743434906, "learning_rate": 4.760257043994069e-05, "loss": 0.2332, "step": 5800 }, { "epoch": 0.4794595782073814, "grad_norm": 0.5297145247459412, "learning_rate": 4.7767342231010056e-05, "loss": 0.2233, "step": 5820 }, { "epoch": 0.4811072056239016, "grad_norm": 0.5998116731643677, "learning_rate": 4.7932114022079425e-05, "loss": 0.2282, "step": 5840 }, { "epoch": 0.48275483304042177, "grad_norm": 0.4391906261444092, "learning_rate": 4.809688581314879e-05, "loss": 0.2094, "step": 5860 }, { "epoch": 0.484402460456942, "grad_norm": 0.559304416179657, "learning_rate": 4.826165760421816e-05, "loss": 0.2106, "step": 5880 }, { "epoch": 0.4860500878734622, "grad_norm": 2.914066791534424, "learning_rate": 4.842642939528753e-05, "loss": 0.2484, "step": 5900 }, { "epoch": 0.4876977152899824, "grad_norm": 0.29989850521087646, "learning_rate": 4.85912011863569e-05, "loss": 0.1631, "step": 5920 }, { "epoch": 0.48934534270650265, "grad_norm": 2.1414361000061035, "learning_rate": 4.8755972977426265e-05, "loss": 0.2315, "step": 5940 }, { "epoch": 0.49099297012302284, "grad_norm": 0.5668668746948242, "learning_rate": 4.892074476849563e-05, "loss": 0.218, "step": 5960 }, { "epoch": 0.49264059753954303, "grad_norm": 0.34356266260147095, "learning_rate": 4.9077277970011535e-05, "loss": 0.2121, "step": 5980 }, { "epoch": 0.4942882249560633, "grad_norm": 0.7654836773872375, "learning_rate": 4.9242049761080904e-05, "loss": 0.212, "step": 6000 }, { "epoch": 0.4942882249560633, "eval_loss": 0.5638302564620972, "eval_runtime": 264.9088, "eval_samples_per_second": 82.251, "eval_steps_per_second": 20.566, "eval_wer": 0.24026168527782366, "step": 6000 } ], "logging_steps": 20, "max_steps": 60690, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.234907314346902e+20, "train_batch_size": 24, "trial_name": null, "trial_params": null }