{ "best_metric": 0.23469093535410654, "best_model_checkpoint": "./checkpoints/w2v-multilingual-v1.3/checkpoint-2000", "epoch": 0.3295254833040422, "eval_steps": 2000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001647627416520211, "grad_norm": 0.2767059803009033, "learning_rate": 4.9431537320810675e-08, "loss": 0.1923, "step": 20 }, { "epoch": 0.003295254833040422, "grad_norm": 2.006911039352417, "learning_rate": 1.977261492832427e-07, "loss": 0.2947, "step": 40 }, { "epoch": 0.004942882249560633, "grad_norm": 0.3714193105697632, "learning_rate": 3.624979403526116e-07, "loss": 0.2955, "step": 60 }, { "epoch": 0.006590509666080844, "grad_norm": 0.3106062412261963, "learning_rate": 5.272697314219806e-07, "loss": 0.2754, "step": 80 }, { "epoch": 0.008238137082601054, "grad_norm": 3.2078254222869873, "learning_rate": 6.838029329378811e-07, "loss": 0.3177, "step": 100 }, { "epoch": 0.009885764499121265, "grad_norm": 0.38503074645996094, "learning_rate": 8.485747240072501e-07, "loss": 0.1905, "step": 120 }, { "epoch": 0.011533391915641476, "grad_norm": 0.15724419057369232, "learning_rate": 1.013346515076619e-06, "loss": 0.2942, "step": 140 }, { "epoch": 0.013181019332161687, "grad_norm": 0.5840986967086792, "learning_rate": 1.1781183061459877e-06, "loss": 0.2907, "step": 160 }, { "epoch": 0.014828646748681899, "grad_norm": 0.23122897744178772, "learning_rate": 1.3346515076618883e-06, "loss": 0.2811, "step": 180 }, { "epoch": 0.016476274165202108, "grad_norm": 2.7248637676239014, "learning_rate": 1.4994232987312573e-06, "loss": 0.3168, "step": 200 }, { "epoch": 0.01812390158172232, "grad_norm": 1.0846350193023682, "learning_rate": 1.6641950898006263e-06, "loss": 0.1979, "step": 220 }, { "epoch": 0.01977152899824253, "grad_norm": 1.7905633449554443, "learning_rate": 1.828966880869995e-06, "loss": 0.2656, "step": 240 }, { "epoch": 0.021419156414762743, "grad_norm": 0.31305554509162903, "learning_rate": 1.993738671939364e-06, "loss": 0.272, "step": 260 }, { "epoch": 0.023066783831282953, "grad_norm": 0.31258106231689453, "learning_rate": 2.158510463008733e-06, "loss": 0.2693, "step": 280 }, { "epoch": 0.024714411247803162, "grad_norm": 1.783349871635437, "learning_rate": 2.323282254078102e-06, "loss": 0.3116, "step": 300 }, { "epoch": 0.026362038664323375, "grad_norm": 0.6936488747596741, "learning_rate": 2.488054045147471e-06, "loss": 0.1931, "step": 320 }, { "epoch": 0.028009666080843584, "grad_norm": 0.6185577511787415, "learning_rate": 2.65282583621684e-06, "loss": 0.258, "step": 340 }, { "epoch": 0.029657293497363797, "grad_norm": 0.3592207729816437, "learning_rate": 2.8175976272862085e-06, "loss": 0.2401, "step": 360 }, { "epoch": 0.03130492091388401, "grad_norm": 1.2324920892715454, "learning_rate": 2.982369418355578e-06, "loss": 0.2371, "step": 380 }, { "epoch": 0.032952548330404216, "grad_norm": 1.5560214519500732, "learning_rate": 3.147141209424947e-06, "loss": 0.2414, "step": 400 }, { "epoch": 0.03460017574692443, "grad_norm": 0.8133947253227234, "learning_rate": 3.3119130004943154e-06, "loss": 0.1744, "step": 420 }, { "epoch": 0.03624780316344464, "grad_norm": 0.8599107265472412, "learning_rate": 3.4766847915636844e-06, "loss": 0.2536, "step": 440 }, { "epoch": 0.03789543057996485, "grad_norm": 0.3699595034122467, "learning_rate": 3.641456582633054e-06, "loss": 0.2552, "step": 460 }, { "epoch": 0.03954305799648506, "grad_norm": 1.2955116033554077, "learning_rate": 3.797989784148954e-06, "loss": 0.2532, "step": 480 }, { "epoch": 0.041190685413005274, "grad_norm": 11.518170356750488, "learning_rate": 3.962761575218322e-06, "loss": 0.2507, "step": 500 }, { "epoch": 0.042838312829525486, "grad_norm": 0.9779248833656311, "learning_rate": 4.127533366287692e-06, "loss": 0.1733, "step": 520 }, { "epoch": 0.04448594024604569, "grad_norm": 0.5386682152748108, "learning_rate": 4.29230515735706e-06, "loss": 0.2415, "step": 540 }, { "epoch": 0.046133567662565905, "grad_norm": 2.7366793155670166, "learning_rate": 4.457076948426429e-06, "loss": 0.2409, "step": 560 }, { "epoch": 0.04778119507908612, "grad_norm": 1.9912066459655762, "learning_rate": 4.621848739495799e-06, "loss": 0.2134, "step": 580 }, { "epoch": 0.049428822495606324, "grad_norm": 4.058573246002197, "learning_rate": 4.786620530565167e-06, "loss": 0.2456, "step": 600 }, { "epoch": 0.05107644991212654, "grad_norm": 0.37456750869750977, "learning_rate": 4.951392321634536e-06, "loss": 0.1589, "step": 620 }, { "epoch": 0.05272407732864675, "grad_norm": 0.7950440645217896, "learning_rate": 5.116164112703905e-06, "loss": 0.2485, "step": 640 }, { "epoch": 0.05437170474516696, "grad_norm": 1.884665846824646, "learning_rate": 5.280935903773274e-06, "loss": 0.2197, "step": 660 }, { "epoch": 0.05601933216168717, "grad_norm": 0.32170844078063965, "learning_rate": 5.445707694842643e-06, "loss": 0.2111, "step": 680 }, { "epoch": 0.05766695957820738, "grad_norm": 1.8230172395706177, "learning_rate": 5.610479485912012e-06, "loss": 0.2379, "step": 700 }, { "epoch": 0.059314586994727594, "grad_norm": 1.2472524642944336, "learning_rate": 5.775251276981381e-06, "loss": 0.1684, "step": 720 }, { "epoch": 0.0609622144112478, "grad_norm": 0.29022061824798584, "learning_rate": 5.94002306805075e-06, "loss": 0.2201, "step": 740 }, { "epoch": 0.06260984182776802, "grad_norm": 0.49721184372901917, "learning_rate": 6.104794859120118e-06, "loss": 0.2297, "step": 760 }, { "epoch": 0.06425746924428823, "grad_norm": 0.6133716106414795, "learning_rate": 6.269566650189487e-06, "loss": 0.3173, "step": 780 }, { "epoch": 0.06590509666080843, "grad_norm": 0.9667792320251465, "learning_rate": 6.434338441258857e-06, "loss": 0.2349, "step": 800 }, { "epoch": 0.06755272407732865, "grad_norm": 0.3177216053009033, "learning_rate": 6.599110232328226e-06, "loss": 0.1635, "step": 820 }, { "epoch": 0.06920035149384886, "grad_norm": 0.8457621335983276, "learning_rate": 6.763882023397594e-06, "loss": 0.2777, "step": 840 }, { "epoch": 0.07084797891036906, "grad_norm": 0.3330087661743164, "learning_rate": 6.928653814466963e-06, "loss": 0.2154, "step": 860 }, { "epoch": 0.07249560632688928, "grad_norm": 0.5845814943313599, "learning_rate": 7.093425605536333e-06, "loss": 0.2164, "step": 880 }, { "epoch": 0.07414323374340949, "grad_norm": 2.325303554534912, "learning_rate": 7.258197396605701e-06, "loss": 0.2068, "step": 900 }, { "epoch": 0.0757908611599297, "grad_norm": 0.21893823146820068, "learning_rate": 7.414730598121602e-06, "loss": 0.1621, "step": 920 }, { "epoch": 0.07743848857644992, "grad_norm": 0.5854327082633972, "learning_rate": 7.5795023891909705e-06, "loss": 0.2281, "step": 940 }, { "epoch": 0.07908611599297012, "grad_norm": 0.2406030148267746, "learning_rate": 7.74427418026034e-06, "loss": 0.2342, "step": 960 }, { "epoch": 0.08073374340949033, "grad_norm": 1.3764126300811768, "learning_rate": 7.90904597132971e-06, "loss": 0.2018, "step": 980 }, { "epoch": 0.08238137082601055, "grad_norm": 0.9587862491607666, "learning_rate": 8.073817762399077e-06, "loss": 0.2489, "step": 1000 }, { "epoch": 0.08402899824253075, "grad_norm": 0.9726558923721313, "learning_rate": 8.238589553468447e-06, "loss": 0.154, "step": 1020 }, { "epoch": 0.08567662565905097, "grad_norm": 1.9828400611877441, "learning_rate": 8.403361344537817e-06, "loss": 0.2336, "step": 1040 }, { "epoch": 0.08732425307557118, "grad_norm": 0.14761961996555328, "learning_rate": 8.568133135607183e-06, "loss": 0.1949, "step": 1060 }, { "epoch": 0.08897188049209138, "grad_norm": 0.39077144861221313, "learning_rate": 8.732904926676553e-06, "loss": 0.2067, "step": 1080 }, { "epoch": 0.0906195079086116, "grad_norm": 2.3257837295532227, "learning_rate": 8.897676717745921e-06, "loss": 0.2184, "step": 1100 }, { "epoch": 0.09226713532513181, "grad_norm": 1.0966060161590576, "learning_rate": 9.062448508815291e-06, "loss": 0.1655, "step": 1120 }, { "epoch": 0.09391476274165202, "grad_norm": 0.5648412704467773, "learning_rate": 9.227220299884661e-06, "loss": 0.2097, "step": 1140 }, { "epoch": 0.09556239015817224, "grad_norm": 0.49816444516181946, "learning_rate": 9.39199209095403e-06, "loss": 0.2052, "step": 1160 }, { "epoch": 0.09721001757469244, "grad_norm": 1.1164054870605469, "learning_rate": 9.556763882023397e-06, "loss": 0.2298, "step": 1180 }, { "epoch": 0.09885764499121265, "grad_norm": 0.9453270435333252, "learning_rate": 9.721535673092767e-06, "loss": 0.2157, "step": 1200 }, { "epoch": 0.10050527240773287, "grad_norm": 0.4588276743888855, "learning_rate": 9.886307464162135e-06, "loss": 0.1433, "step": 1220 }, { "epoch": 0.10215289982425307, "grad_norm": 0.47535696625709534, "learning_rate": 1.0051079255231505e-05, "loss": 0.2115, "step": 1240 }, { "epoch": 0.10380052724077328, "grad_norm": 0.585959792137146, "learning_rate": 1.0215851046300875e-05, "loss": 0.2245, "step": 1260 }, { "epoch": 0.1054481546572935, "grad_norm": 0.24861204624176025, "learning_rate": 1.0380622837370241e-05, "loss": 0.1981, "step": 1280 }, { "epoch": 0.1070957820738137, "grad_norm": 31.602128982543945, "learning_rate": 1.0545394628439611e-05, "loss": 0.2265, "step": 1300 }, { "epoch": 0.10874340949033393, "grad_norm": 0.6234269142150879, "learning_rate": 1.0710166419508981e-05, "loss": 0.154, "step": 1320 }, { "epoch": 0.11039103690685413, "grad_norm": 1.2423540353775024, "learning_rate": 1.087493821057835e-05, "loss": 0.2062, "step": 1340 }, { "epoch": 0.11203866432337434, "grad_norm": 0.2090279757976532, "learning_rate": 1.1039710001647719e-05, "loss": 0.2214, "step": 1360 }, { "epoch": 0.11368629173989456, "grad_norm": 0.6145613193511963, "learning_rate": 1.1204481792717087e-05, "loss": 0.1942, "step": 1380 }, { "epoch": 0.11533391915641476, "grad_norm": 0.9004138708114624, "learning_rate": 1.1369253583786455e-05, "loss": 0.2271, "step": 1400 }, { "epoch": 0.11698154657293497, "grad_norm": 1.609165906906128, "learning_rate": 1.1534025374855825e-05, "loss": 0.1606, "step": 1420 }, { "epoch": 0.11862917398945519, "grad_norm": 0.8725568652153015, "learning_rate": 1.1698797165925195e-05, "loss": 0.204, "step": 1440 }, { "epoch": 0.1202768014059754, "grad_norm": 1.8169455528259277, "learning_rate": 1.1863568956994563e-05, "loss": 0.207, "step": 1460 }, { "epoch": 0.1219244288224956, "grad_norm": 0.37334388494491577, "learning_rate": 1.2028340748063933e-05, "loss": 0.2033, "step": 1480 }, { "epoch": 0.12357205623901582, "grad_norm": 15.727474212646484, "learning_rate": 1.2193112539133301e-05, "loss": 0.3298, "step": 1500 }, { "epoch": 0.12521968365553604, "grad_norm": 4.70903205871582, "learning_rate": 1.2357884330202669e-05, "loss": 0.159, "step": 1520 }, { "epoch": 0.12686731107205623, "grad_norm": 0.4843326807022095, "learning_rate": 1.2522656121272039e-05, "loss": 0.2141, "step": 1540 }, { "epoch": 0.12851493848857645, "grad_norm": 0.572084367275238, "learning_rate": 1.2687427912341407e-05, "loss": 0.2182, "step": 1560 }, { "epoch": 0.13016256590509667, "grad_norm": 0.31078797578811646, "learning_rate": 1.2852199703410777e-05, "loss": 0.1994, "step": 1580 }, { "epoch": 0.13181019332161686, "grad_norm": 9.014205932617188, "learning_rate": 1.3016971494480145e-05, "loss": 0.2089, "step": 1600 }, { "epoch": 0.13345782073813708, "grad_norm": 1.0150245428085327, "learning_rate": 1.3181743285549513e-05, "loss": 0.1521, "step": 1620 }, { "epoch": 0.1351054481546573, "grad_norm": 0.25271451473236084, "learning_rate": 1.3346515076618885e-05, "loss": 0.1996, "step": 1640 }, { "epoch": 0.1367530755711775, "grad_norm": 0.47118502855300903, "learning_rate": 1.3511286867688253e-05, "loss": 0.2059, "step": 1660 }, { "epoch": 0.13840070298769772, "grad_norm": 0.5134350657463074, "learning_rate": 1.3676058658757621e-05, "loss": 0.1935, "step": 1680 }, { "epoch": 0.14004833040421794, "grad_norm": 1.0354816913604736, "learning_rate": 1.384083044982699e-05, "loss": 0.2103, "step": 1700 }, { "epoch": 0.14169595782073813, "grad_norm": 0.5588876605033875, "learning_rate": 1.4005602240896359e-05, "loss": 0.1598, "step": 1720 }, { "epoch": 0.14334358523725835, "grad_norm": 0.7309175133705139, "learning_rate": 1.4170374031965727e-05, "loss": 0.2204, "step": 1740 }, { "epoch": 0.14499121265377857, "grad_norm": 0.6155902743339539, "learning_rate": 1.4335145823035099e-05, "loss": 0.2133, "step": 1760 }, { "epoch": 0.14663884007029876, "grad_norm": 0.7660940885543823, "learning_rate": 1.4499917614104467e-05, "loss": 0.2065, "step": 1780 }, { "epoch": 0.14828646748681898, "grad_norm": 1.1954026222229004, "learning_rate": 1.4664689405173835e-05, "loss": 0.2147, "step": 1800 }, { "epoch": 0.1499340949033392, "grad_norm": 0.4249323606491089, "learning_rate": 1.4829461196243205e-05, "loss": 0.1553, "step": 1820 }, { "epoch": 0.1515817223198594, "grad_norm": 2.9014129638671875, "learning_rate": 1.4994232987312573e-05, "loss": 0.2208, "step": 1840 }, { "epoch": 0.1532293497363796, "grad_norm": 1.6474498510360718, "learning_rate": 1.5159004778381941e-05, "loss": 0.209, "step": 1860 }, { "epoch": 0.15487697715289983, "grad_norm": 0.1585623174905777, "learning_rate": 1.532377656945131e-05, "loss": 0.1873, "step": 1880 }, { "epoch": 0.15652460456942002, "grad_norm": 1.171941876411438, "learning_rate": 1.548854836052068e-05, "loss": 0.2389, "step": 1900 }, { "epoch": 0.15817223198594024, "grad_norm": 0.48890382051467896, "learning_rate": 1.5653320151590047e-05, "loss": 0.1679, "step": 1920 }, { "epoch": 0.15981985940246046, "grad_norm": 0.5568016767501831, "learning_rate": 1.581809194265942e-05, "loss": 0.1968, "step": 1940 }, { "epoch": 0.16146748681898065, "grad_norm": 0.9775394797325134, "learning_rate": 1.5982863733728787e-05, "loss": 0.2208, "step": 1960 }, { "epoch": 0.16311511423550087, "grad_norm": 0.60302734375, "learning_rate": 1.6147635524798155e-05, "loss": 0.1929, "step": 1980 }, { "epoch": 0.1647627416520211, "grad_norm": 1.7513552904129028, "learning_rate": 1.6312407315867526e-05, "loss": 0.2055, "step": 2000 }, { "epoch": 0.1647627416520211, "eval_loss": 0.5699400305747986, "eval_runtime": 686.8117, "eval_samples_per_second": 31.725, "eval_steps_per_second": 7.932, "eval_wer": 0.23469093535410654, "step": 2000 }, { "epoch": 0.16641036906854131, "grad_norm": 1.049734354019165, "learning_rate": 1.6477179106936894e-05, "loss": 0.1608, "step": 2020 }, { "epoch": 0.1680579964850615, "grad_norm": 1.7113618850708008, "learning_rate": 1.6641950898006263e-05, "loss": 0.2008, "step": 2040 }, { "epoch": 0.16970562390158173, "grad_norm": 0.3202134370803833, "learning_rate": 1.6806722689075634e-05, "loss": 0.1973, "step": 2060 }, { "epoch": 0.17135325131810195, "grad_norm": 0.3612610697746277, "learning_rate": 1.6971494480145e-05, "loss": 0.1732, "step": 2080 }, { "epoch": 0.17300087873462214, "grad_norm": 1.8115849494934082, "learning_rate": 1.7136266271214367e-05, "loss": 0.2138, "step": 2100 }, { "epoch": 0.17464850615114236, "grad_norm": 0.7046949863433838, "learning_rate": 1.7301038062283735e-05, "loss": 0.1571, "step": 2120 }, { "epoch": 0.17629613356766258, "grad_norm": 0.5983096957206726, "learning_rate": 1.7465809853353107e-05, "loss": 0.2092, "step": 2140 }, { "epoch": 0.17794376098418277, "grad_norm": 0.17064958810806274, "learning_rate": 1.7630581644422475e-05, "loss": 0.2083, "step": 2160 }, { "epoch": 0.179591388400703, "grad_norm": 1.422013759613037, "learning_rate": 1.7795353435491843e-05, "loss": 0.2087, "step": 2180 }, { "epoch": 0.1812390158172232, "grad_norm": 0.982097864151001, "learning_rate": 1.7960125226561214e-05, "loss": 0.2161, "step": 2200 }, { "epoch": 0.1828866432337434, "grad_norm": 0.2690947949886322, "learning_rate": 1.8124897017630583e-05, "loss": 0.1693, "step": 2220 }, { "epoch": 0.18453427065026362, "grad_norm": 0.24867244064807892, "learning_rate": 1.828966880869995e-05, "loss": 0.2058, "step": 2240 }, { "epoch": 0.18618189806678384, "grad_norm": 0.9435555934906006, "learning_rate": 1.8454440599769322e-05, "loss": 0.1991, "step": 2260 }, { "epoch": 0.18782952548330403, "grad_norm": 0.23964335024356842, "learning_rate": 1.861921239083869e-05, "loss": 0.1932, "step": 2280 }, { "epoch": 0.18947715289982425, "grad_norm": 2.1007418632507324, "learning_rate": 1.878398418190806e-05, "loss": 0.2075, "step": 2300 }, { "epoch": 0.19112478031634447, "grad_norm": 0.31368857622146606, "learning_rate": 1.8948755972977427e-05, "loss": 0.1557, "step": 2320 }, { "epoch": 0.19277240773286466, "grad_norm": 0.4029647409915924, "learning_rate": 1.9113527764046795e-05, "loss": 0.1952, "step": 2340 }, { "epoch": 0.19442003514938488, "grad_norm": 0.38545289635658264, "learning_rate": 1.9278299555116163e-05, "loss": 0.1998, "step": 2360 }, { "epoch": 0.1960676625659051, "grad_norm": 0.44573166966438293, "learning_rate": 1.9443071346185534e-05, "loss": 0.2078, "step": 2380 }, { "epoch": 0.1977152899824253, "grad_norm": 9.132265090942383, "learning_rate": 1.9607843137254903e-05, "loss": 0.2175, "step": 2400 }, { "epoch": 0.19936291739894552, "grad_norm": 0.42929643392562866, "learning_rate": 1.977261492832427e-05, "loss": 0.1762, "step": 2420 }, { "epoch": 0.20101054481546574, "grad_norm": 0.6267173886299133, "learning_rate": 1.9937386719393642e-05, "loss": 0.2101, "step": 2440 }, { "epoch": 0.20265817223198593, "grad_norm": 9.70997142791748, "learning_rate": 2.010215851046301e-05, "loss": 0.2101, "step": 2460 }, { "epoch": 0.20430579964850615, "grad_norm": 0.47748956084251404, "learning_rate": 2.026693030153238e-05, "loss": 0.2039, "step": 2480 }, { "epoch": 0.20595342706502637, "grad_norm": 1.3222582340240479, "learning_rate": 2.043170209260175e-05, "loss": 0.213, "step": 2500 }, { "epoch": 0.20760105448154656, "grad_norm": 0.4152863919734955, "learning_rate": 2.0596473883671115e-05, "loss": 0.148, "step": 2520 }, { "epoch": 0.20924868189806678, "grad_norm": 0.7384160757064819, "learning_rate": 2.0761245674740483e-05, "loss": 0.2138, "step": 2540 }, { "epoch": 0.210896309314587, "grad_norm": 0.27651092410087585, "learning_rate": 2.0926017465809854e-05, "loss": 0.2046, "step": 2560 }, { "epoch": 0.21254393673110722, "grad_norm": 0.226897194981575, "learning_rate": 2.1090789256879222e-05, "loss": 0.1904, "step": 2580 }, { "epoch": 0.2141915641476274, "grad_norm": 1.2391464710235596, "learning_rate": 2.125556104794859e-05, "loss": 0.204, "step": 2600 }, { "epoch": 0.21583919156414763, "grad_norm": 1.6048617362976074, "learning_rate": 2.1420332839017962e-05, "loss": 0.1548, "step": 2620 }, { "epoch": 0.21748681898066785, "grad_norm": 0.28409889340400696, "learning_rate": 2.158510463008733e-05, "loss": 0.201, "step": 2640 }, { "epoch": 0.21913444639718804, "grad_norm": 0.354885995388031, "learning_rate": 2.17498764211567e-05, "loss": 0.2083, "step": 2660 }, { "epoch": 0.22078207381370826, "grad_norm": 0.2778099775314331, "learning_rate": 2.191464821222607e-05, "loss": 0.1891, "step": 2680 }, { "epoch": 0.22242970123022848, "grad_norm": 1.007686734199524, "learning_rate": 2.2079420003295438e-05, "loss": 0.2152, "step": 2700 }, { "epoch": 0.22407732864674867, "grad_norm": 0.9725649952888489, "learning_rate": 2.2244191794364806e-05, "loss": 0.1581, "step": 2720 }, { "epoch": 0.2257249560632689, "grad_norm": 0.2451123297214508, "learning_rate": 2.2408963585434174e-05, "loss": 0.2019, "step": 2740 }, { "epoch": 0.22737258347978911, "grad_norm": 0.3667006194591522, "learning_rate": 2.2573735376503542e-05, "loss": 0.2083, "step": 2760 }, { "epoch": 0.2290202108963093, "grad_norm": 3.1283884048461914, "learning_rate": 2.273850716757291e-05, "loss": 0.194, "step": 2780 }, { "epoch": 0.23066783831282953, "grad_norm": 0.7148507237434387, "learning_rate": 2.2903278958642282e-05, "loss": 0.1931, "step": 2800 }, { "epoch": 0.23231546572934975, "grad_norm": 0.5805519223213196, "learning_rate": 2.306805074971165e-05, "loss": 0.1552, "step": 2820 }, { "epoch": 0.23396309314586994, "grad_norm": 0.8168196082115173, "learning_rate": 2.323282254078102e-05, "loss": 0.2107, "step": 2840 }, { "epoch": 0.23561072056239016, "grad_norm": 0.17171867191791534, "learning_rate": 2.339759433185039e-05, "loss": 0.2195, "step": 2860 }, { "epoch": 0.23725834797891038, "grad_norm": 0.6692082285881042, "learning_rate": 2.3562366122919758e-05, "loss": 0.1975, "step": 2880 }, { "epoch": 0.23890597539543057, "grad_norm": 1.5185160636901855, "learning_rate": 2.3727137913989126e-05, "loss": 0.2086, "step": 2900 }, { "epoch": 0.2405536028119508, "grad_norm": 0.8978987336158752, "learning_rate": 2.3891909705058498e-05, "loss": 0.1537, "step": 2920 }, { "epoch": 0.242201230228471, "grad_norm": 1.1462221145629883, "learning_rate": 2.4056681496127866e-05, "loss": 0.215, "step": 2940 }, { "epoch": 0.2438488576449912, "grad_norm": 0.2455727905035019, "learning_rate": 2.422145328719723e-05, "loss": 0.2137, "step": 2960 }, { "epoch": 0.24549648506151142, "grad_norm": 0.21464231610298157, "learning_rate": 2.4386225078266602e-05, "loss": 0.203, "step": 2980 }, { "epoch": 0.24714411247803164, "grad_norm": 0.8478316068649292, "learning_rate": 2.455099686933597e-05, "loss": 0.2085, "step": 3000 }, { "epoch": 0.24879173989455183, "grad_norm": 0.37225690484046936, "learning_rate": 2.4715768660405338e-05, "loss": 0.1645, "step": 3020 }, { "epoch": 0.2504393673110721, "grad_norm": 1.3999593257904053, "learning_rate": 2.488054045147471e-05, "loss": 0.2129, "step": 3040 }, { "epoch": 0.2520869947275923, "grad_norm": 2.0909199714660645, "learning_rate": 2.5045312242544078e-05, "loss": 0.1926, "step": 3060 }, { "epoch": 0.25373462214411246, "grad_norm": 0.19655053317546844, "learning_rate": 2.5210084033613446e-05, "loss": 0.2007, "step": 3080 }, { "epoch": 0.2553822495606327, "grad_norm": 1.2680870294570923, "learning_rate": 2.5374855824682814e-05, "loss": 0.2107, "step": 3100 }, { "epoch": 0.2570298769771529, "grad_norm": 0.19821316003799438, "learning_rate": 2.5539627615752182e-05, "loss": 0.1511, "step": 3120 }, { "epoch": 0.2586775043936731, "grad_norm": 0.22427937388420105, "learning_rate": 2.5704399406821554e-05, "loss": 0.2139, "step": 3140 }, { "epoch": 0.26032513181019334, "grad_norm": 0.4531656503677368, "learning_rate": 2.5869171197890922e-05, "loss": 0.1995, "step": 3160 }, { "epoch": 0.26197275922671354, "grad_norm": 0.3967747986316681, "learning_rate": 2.603394298896029e-05, "loss": 0.1981, "step": 3180 }, { "epoch": 0.26362038664323373, "grad_norm": 1.0957462787628174, "learning_rate": 2.6198714780029658e-05, "loss": 0.2092, "step": 3200 }, { "epoch": 0.265268014059754, "grad_norm": 0.5567193627357483, "learning_rate": 2.6363486571099026e-05, "loss": 0.1642, "step": 3220 }, { "epoch": 0.26691564147627417, "grad_norm": 0.3523741066455841, "learning_rate": 2.6528258362168395e-05, "loss": 0.2059, "step": 3240 }, { "epoch": 0.26856326889279436, "grad_norm": 0.40257710218429565, "learning_rate": 2.669303015323777e-05, "loss": 0.195, "step": 3260 }, { "epoch": 0.2702108963093146, "grad_norm": 0.3187640309333801, "learning_rate": 2.6849563354753665e-05, "loss": 0.1827, "step": 3280 }, { "epoch": 0.2718585237258348, "grad_norm": 0.7375414967536926, "learning_rate": 2.701433514582304e-05, "loss": 0.2247, "step": 3300 }, { "epoch": 0.273506151142355, "grad_norm": 0.45597076416015625, "learning_rate": 2.7179106936892408e-05, "loss": 0.1654, "step": 3320 }, { "epoch": 0.27515377855887524, "grad_norm": 0.21507132053375244, "learning_rate": 2.7343878727961776e-05, "loss": 0.1858, "step": 3340 }, { "epoch": 0.27680140597539543, "grad_norm": 0.7203060388565063, "learning_rate": 2.7508650519031144e-05, "loss": 0.1908, "step": 3360 }, { "epoch": 0.2784490333919156, "grad_norm": 0.8007901906967163, "learning_rate": 2.7673422310100512e-05, "loss": 0.1793, "step": 3380 }, { "epoch": 0.28009666080843587, "grad_norm": 3.210064649581909, "learning_rate": 2.783819410116988e-05, "loss": 0.206, "step": 3400 }, { "epoch": 0.28174428822495606, "grad_norm": 0.8950255513191223, "learning_rate": 2.8002965892239252e-05, "loss": 0.1531, "step": 3420 }, { "epoch": 0.28339191564147626, "grad_norm": 0.4942973256111145, "learning_rate": 2.816773768330862e-05, "loss": 0.1916, "step": 3440 }, { "epoch": 0.2850395430579965, "grad_norm": 0.31426137685775757, "learning_rate": 2.8332509474377988e-05, "loss": 0.2013, "step": 3460 }, { "epoch": 0.2866871704745167, "grad_norm": 0.47154414653778076, "learning_rate": 2.8497281265447356e-05, "loss": 0.1831, "step": 3480 }, { "epoch": 0.2883347978910369, "grad_norm": 0.8456603288650513, "learning_rate": 2.8662053056516724e-05, "loss": 0.2043, "step": 3500 }, { "epoch": 0.28998242530755713, "grad_norm": 0.29031482338905334, "learning_rate": 2.8826824847586092e-05, "loss": 0.1607, "step": 3520 }, { "epoch": 0.2916300527240773, "grad_norm": 0.3170378804206848, "learning_rate": 2.8991596638655467e-05, "loss": 0.1916, "step": 3540 }, { "epoch": 0.2932776801405975, "grad_norm": 0.3800877332687378, "learning_rate": 2.9156368429724836e-05, "loss": 0.2051, "step": 3560 }, { "epoch": 0.29492530755711777, "grad_norm": 0.5847609639167786, "learning_rate": 2.9321140220794204e-05, "loss": 0.196, "step": 3580 }, { "epoch": 0.29657293497363796, "grad_norm": 1.0933667421340942, "learning_rate": 2.9485912011863572e-05, "loss": 0.2154, "step": 3600 }, { "epoch": 0.29822056239015815, "grad_norm": 4.349573135375977, "learning_rate": 2.9650683802932937e-05, "loss": 0.1606, "step": 3620 }, { "epoch": 0.2998681898066784, "grad_norm": 0.4264489710330963, "learning_rate": 2.9815455594002305e-05, "loss": 0.2117, "step": 3640 }, { "epoch": 0.3015158172231986, "grad_norm": 0.47935691475868225, "learning_rate": 2.998022738507168e-05, "loss": 0.1901, "step": 3660 }, { "epoch": 0.3031634446397188, "grad_norm": 0.7258153557777405, "learning_rate": 3.0144999176141048e-05, "loss": 0.189, "step": 3680 }, { "epoch": 0.30481107205623903, "grad_norm": 2.0093941688537598, "learning_rate": 3.0309770967210416e-05, "loss": 0.2104, "step": 3700 }, { "epoch": 0.3064586994727592, "grad_norm": 1.1718699932098389, "learning_rate": 3.0474542758279784e-05, "loss": 0.1577, "step": 3720 }, { "epoch": 0.3081063268892794, "grad_norm": 0.19388867914676666, "learning_rate": 3.063931454934915e-05, "loss": 0.2011, "step": 3740 }, { "epoch": 0.30975395430579966, "grad_norm": 0.2112320065498352, "learning_rate": 3.080408634041852e-05, "loss": 0.2077, "step": 3760 }, { "epoch": 0.31140158172231985, "grad_norm": 1.9554697275161743, "learning_rate": 3.096885813148789e-05, "loss": 0.1861, "step": 3780 }, { "epoch": 0.31304920913884005, "grad_norm": 0.7065563201904297, "learning_rate": 3.113362992255726e-05, "loss": 0.2071, "step": 3800 }, { "epoch": 0.3146968365553603, "grad_norm": 0.4599238634109497, "learning_rate": 3.129840171362663e-05, "loss": 0.1655, "step": 3820 }, { "epoch": 0.3163444639718805, "grad_norm": 0.5441445708274841, "learning_rate": 3.1463173504695996e-05, "loss": 0.1971, "step": 3840 }, { "epoch": 0.3179920913884007, "grad_norm": 1.1993862390518188, "learning_rate": 3.1627945295765364e-05, "loss": 0.2048, "step": 3860 }, { "epoch": 0.3196397188049209, "grad_norm": 0.3095191717147827, "learning_rate": 3.179271708683473e-05, "loss": 0.2009, "step": 3880 }, { "epoch": 0.3212873462214411, "grad_norm": 1.0743999481201172, "learning_rate": 3.195748887790411e-05, "loss": 0.2371, "step": 3900 }, { "epoch": 0.3229349736379613, "grad_norm": 0.5000220537185669, "learning_rate": 3.2122260668973475e-05, "loss": 0.1722, "step": 3920 }, { "epoch": 0.32458260105448156, "grad_norm": 1.1417018175125122, "learning_rate": 3.2287032460042844e-05, "loss": 0.2079, "step": 3940 }, { "epoch": 0.32623022847100175, "grad_norm": 1.099433422088623, "learning_rate": 3.245180425111221e-05, "loss": 0.2009, "step": 3960 }, { "epoch": 0.32787785588752194, "grad_norm": 0.3827146589756012, "learning_rate": 3.261657604218158e-05, "loss": 0.2171, "step": 3980 }, { "epoch": 0.3295254833040422, "grad_norm": 1.1845418214797974, "learning_rate": 3.278134783325095e-05, "loss": 0.2195, "step": 4000 }, { "epoch": 0.3295254833040422, "eval_loss": 0.5868579149246216, "eval_runtime": 260.92, "eval_samples_per_second": 83.508, "eval_steps_per_second": 20.88, "eval_wer": 0.2350240606008012, "step": 4000 } ], "logging_steps": 20, "max_steps": 60690, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.157571471334336e+20, "train_batch_size": 24, "trial_name": null, "trial_params": null }