{ "best_metric": null, "best_model_checkpoint": null, "epoch": 21.73913043478261, "eval_steps": 400, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.043478260869565216, "grad_norm": 47.171226501464844, "learning_rate": 9.995652173913044e-06, "loss": 13.5424, "step": 4 }, { "epoch": 0.08695652173913043, "grad_norm": 49.8515510559082, "learning_rate": 9.98913043478261e-06, "loss": 9.1597, "step": 8 }, { "epoch": 0.13043478260869565, "grad_norm": 29.11096954345703, "learning_rate": 9.982608695652175e-06, "loss": 6.5185, "step": 12 }, { "epoch": 0.17391304347826086, "grad_norm": 46.89970779418945, "learning_rate": 9.973913043478262e-06, "loss": 5.2362, "step": 16 }, { "epoch": 0.21739130434782608, "grad_norm": 25.512113571166992, "learning_rate": 9.965217391304348e-06, "loss": 3.9746, "step": 20 }, { "epoch": 0.2608695652173913, "grad_norm": 27.412797927856445, "learning_rate": 9.956521739130436e-06, "loss": 3.4012, "step": 24 }, { "epoch": 0.30434782608695654, "grad_norm": 20.463056564331055, "learning_rate": 9.947826086956522e-06, "loss": 2.9379, "step": 28 }, { "epoch": 0.34782608695652173, "grad_norm": 27.398883819580078, "learning_rate": 9.93913043478261e-06, "loss": 2.5659, "step": 32 }, { "epoch": 0.391304347826087, "grad_norm": 15.34399127960205, "learning_rate": 9.930434782608697e-06, "loss": 2.3444, "step": 36 }, { "epoch": 0.43478260869565216, "grad_norm": 12.422914505004883, "learning_rate": 9.921739130434783e-06, "loss": 1.8916, "step": 40 }, { "epoch": 0.4782608695652174, "grad_norm": 13.897340774536133, "learning_rate": 9.913043478260871e-06, "loss": 1.7226, "step": 44 }, { "epoch": 0.5217391304347826, "grad_norm": 13.420745849609375, "learning_rate": 9.904347826086957e-06, "loss": 1.4487, "step": 48 }, { "epoch": 0.5652173913043478, "grad_norm": 11.356691360473633, "learning_rate": 9.895652173913045e-06, "loss": 1.2677, "step": 52 }, { "epoch": 0.6086956521739131, "grad_norm": 13.727324485778809, "learning_rate": 9.886956521739132e-06, "loss": 1.1441, "step": 56 }, { "epoch": 0.6521739130434783, "grad_norm": 91.36569213867188, "learning_rate": 9.878260869565218e-06, "loss": 0.9725, "step": 60 }, { "epoch": 0.6956521739130435, "grad_norm": 10.242810249328613, "learning_rate": 9.869565217391304e-06, "loss": 0.9417, "step": 64 }, { "epoch": 0.7391304347826086, "grad_norm": 7.823087215423584, "learning_rate": 9.860869565217392e-06, "loss": 0.9272, "step": 68 }, { "epoch": 0.782608695652174, "grad_norm": 12.870728492736816, "learning_rate": 9.852173913043478e-06, "loss": 0.8999, "step": 72 }, { "epoch": 0.8260869565217391, "grad_norm": 9.052152633666992, "learning_rate": 9.843478260869566e-06, "loss": 0.7101, "step": 76 }, { "epoch": 0.8695652173913043, "grad_norm": 12.759235382080078, "learning_rate": 9.834782608695654e-06, "loss": 0.8, "step": 80 }, { "epoch": 0.9130434782608695, "grad_norm": 8.912745475769043, "learning_rate": 9.82608695652174e-06, "loss": 0.7605, "step": 84 }, { "epoch": 0.9565217391304348, "grad_norm": 16.077392578125, "learning_rate": 9.817391304347826e-06, "loss": 0.6604, "step": 88 }, { "epoch": 1.0, "grad_norm": 15.102202415466309, "learning_rate": 9.808695652173913e-06, "loss": 0.722, "step": 92 }, { "epoch": 1.0434782608695652, "grad_norm": 7.68995475769043, "learning_rate": 9.800000000000001e-06, "loss": 0.5627, "step": 96 }, { "epoch": 1.0869565217391304, "grad_norm": 9.279730796813965, "learning_rate": 9.791304347826089e-06, "loss": 0.5249, "step": 100 }, { "epoch": 1.1304347826086956, "grad_norm": 7.119338035583496, "learning_rate": 9.782608695652175e-06, "loss": 0.5265, "step": 104 }, { "epoch": 1.1739130434782608, "grad_norm": 6.912904739379883, "learning_rate": 9.77391304347826e-06, "loss": 0.5091, "step": 108 }, { "epoch": 1.2173913043478262, "grad_norm": 7.478869915008545, "learning_rate": 9.765217391304348e-06, "loss": 0.5577, "step": 112 }, { "epoch": 1.2608695652173914, "grad_norm": 9.599380493164062, "learning_rate": 9.756521739130436e-06, "loss": 0.4835, "step": 116 }, { "epoch": 1.3043478260869565, "grad_norm": 9.232748031616211, "learning_rate": 9.747826086956522e-06, "loss": 0.4713, "step": 120 }, { "epoch": 1.3478260869565217, "grad_norm": 5.770024299621582, "learning_rate": 9.73913043478261e-06, "loss": 0.5085, "step": 124 }, { "epoch": 1.391304347826087, "grad_norm": 10.671361923217773, "learning_rate": 9.730434782608696e-06, "loss": 0.4687, "step": 128 }, { "epoch": 1.434782608695652, "grad_norm": 6.39243745803833, "learning_rate": 9.721739130434784e-06, "loss": 0.4141, "step": 132 }, { "epoch": 1.4782608695652173, "grad_norm": 5.833802700042725, "learning_rate": 9.713043478260871e-06, "loss": 0.4197, "step": 136 }, { "epoch": 1.5217391304347827, "grad_norm": 5.65615701675415, "learning_rate": 9.704347826086957e-06, "loss": 0.441, "step": 140 }, { "epoch": 1.5652173913043477, "grad_norm": 6.189269542694092, "learning_rate": 9.695652173913043e-06, "loss": 0.4112, "step": 144 }, { "epoch": 1.608695652173913, "grad_norm": 6.086719512939453, "learning_rate": 9.686956521739131e-06, "loss": 0.4976, "step": 148 }, { "epoch": 1.6521739130434783, "grad_norm": 6.070087432861328, "learning_rate": 9.678260869565219e-06, "loss": 0.3907, "step": 152 }, { "epoch": 1.6956521739130435, "grad_norm": 12.213912010192871, "learning_rate": 9.669565217391305e-06, "loss": 0.4124, "step": 156 }, { "epoch": 1.7391304347826086, "grad_norm": 5.771620750427246, "learning_rate": 9.660869565217392e-06, "loss": 0.4145, "step": 160 }, { "epoch": 1.7826086956521738, "grad_norm": 12.128915786743164, "learning_rate": 9.652173913043478e-06, "loss": 0.3937, "step": 164 }, { "epoch": 1.8260869565217392, "grad_norm": 7.719477653503418, "learning_rate": 9.643478260869566e-06, "loss": 0.4234, "step": 168 }, { "epoch": 1.8695652173913042, "grad_norm": 5.7604498863220215, "learning_rate": 9.634782608695654e-06, "loss": 0.3727, "step": 172 }, { "epoch": 1.9130434782608696, "grad_norm": 7.81648588180542, "learning_rate": 9.62608695652174e-06, "loss": 0.383, "step": 176 }, { "epoch": 1.9565217391304348, "grad_norm": 4.417276859283447, "learning_rate": 9.617391304347828e-06, "loss": 0.2987, "step": 180 }, { "epoch": 2.0, "grad_norm": 6.025758266448975, "learning_rate": 9.608695652173914e-06, "loss": 0.3757, "step": 184 }, { "epoch": 2.0434782608695654, "grad_norm": 4.832042694091797, "learning_rate": 9.600000000000001e-06, "loss": 0.2297, "step": 188 }, { "epoch": 2.0869565217391304, "grad_norm": 5.992522716522217, "learning_rate": 9.591304347826087e-06, "loss": 0.277, "step": 192 }, { "epoch": 2.130434782608696, "grad_norm": 5.093125343322754, "learning_rate": 9.582608695652175e-06, "loss": 0.3055, "step": 196 }, { "epoch": 2.1739130434782608, "grad_norm": 6.269705295562744, "learning_rate": 9.573913043478261e-06, "loss": 0.2858, "step": 200 }, { "epoch": 2.217391304347826, "grad_norm": 11.102181434631348, "learning_rate": 9.565217391304349e-06, "loss": 0.2379, "step": 204 }, { "epoch": 2.260869565217391, "grad_norm": 5.047725677490234, "learning_rate": 9.556521739130435e-06, "loss": 0.3002, "step": 208 }, { "epoch": 2.3043478260869565, "grad_norm": 5.518750190734863, "learning_rate": 9.547826086956522e-06, "loss": 0.2765, "step": 212 }, { "epoch": 2.3478260869565215, "grad_norm": 4.136101245880127, "learning_rate": 9.53913043478261e-06, "loss": 0.2232, "step": 216 }, { "epoch": 2.391304347826087, "grad_norm": 15.251372337341309, "learning_rate": 9.530434782608696e-06, "loss": 0.2887, "step": 220 }, { "epoch": 2.4347826086956523, "grad_norm": 7.168148994445801, "learning_rate": 9.521739130434784e-06, "loss": 0.2139, "step": 224 }, { "epoch": 2.4782608695652173, "grad_norm": 4.087583065032959, "learning_rate": 9.51304347826087e-06, "loss": 0.2195, "step": 228 }, { "epoch": 2.5217391304347827, "grad_norm": 4.113509178161621, "learning_rate": 9.504347826086958e-06, "loss": 0.2115, "step": 232 }, { "epoch": 2.5652173913043477, "grad_norm": 6.073418140411377, "learning_rate": 9.495652173913045e-06, "loss": 0.2119, "step": 236 }, { "epoch": 2.608695652173913, "grad_norm": 4.231682300567627, "learning_rate": 9.486956521739131e-06, "loss": 0.2327, "step": 240 }, { "epoch": 2.6521739130434785, "grad_norm": 5.613582134246826, "learning_rate": 9.478260869565217e-06, "loss": 0.2294, "step": 244 }, { "epoch": 2.6956521739130435, "grad_norm": 6.237614631652832, "learning_rate": 9.469565217391305e-06, "loss": 0.2616, "step": 248 }, { "epoch": 2.7391304347826084, "grad_norm": 5.773486614227295, "learning_rate": 9.460869565217393e-06, "loss": 0.2429, "step": 252 }, { "epoch": 2.782608695652174, "grad_norm": 12.50413990020752, "learning_rate": 9.452173913043479e-06, "loss": 0.259, "step": 256 }, { "epoch": 2.8260869565217392, "grad_norm": 5.150471210479736, "learning_rate": 9.443478260869566e-06, "loss": 0.2345, "step": 260 }, { "epoch": 2.869565217391304, "grad_norm": 6.293915748596191, "learning_rate": 9.434782608695652e-06, "loss": 0.2969, "step": 264 }, { "epoch": 2.9130434782608696, "grad_norm": 8.247614860534668, "learning_rate": 9.42608695652174e-06, "loss": 0.2403, "step": 268 }, { "epoch": 2.9565217391304346, "grad_norm": 5.069606781005859, "learning_rate": 9.417391304347828e-06, "loss": 0.2205, "step": 272 }, { "epoch": 3.0, "grad_norm": 4.0079522132873535, "learning_rate": 9.408695652173914e-06, "loss": 0.1906, "step": 276 }, { "epoch": 3.0434782608695654, "grad_norm": 5.8868303298950195, "learning_rate": 9.4e-06, "loss": 0.1781, "step": 280 }, { "epoch": 3.0869565217391304, "grad_norm": 3.611581802368164, "learning_rate": 9.391304347826087e-06, "loss": 0.1476, "step": 284 }, { "epoch": 3.130434782608696, "grad_norm": 3.231337547302246, "learning_rate": 9.382608695652175e-06, "loss": 0.1695, "step": 288 }, { "epoch": 3.1739130434782608, "grad_norm": 3.9809021949768066, "learning_rate": 9.373913043478263e-06, "loss": 0.17, "step": 292 }, { "epoch": 3.217391304347826, "grad_norm": 4.352972030639648, "learning_rate": 9.365217391304349e-06, "loss": 0.1315, "step": 296 }, { "epoch": 3.260869565217391, "grad_norm": 3.489063024520874, "learning_rate": 9.356521739130435e-06, "loss": 0.1721, "step": 300 }, { "epoch": 3.3043478260869565, "grad_norm": 19.128976821899414, "learning_rate": 9.347826086956523e-06, "loss": 0.1779, "step": 304 }, { "epoch": 3.3478260869565215, "grad_norm": 4.8399152755737305, "learning_rate": 9.33913043478261e-06, "loss": 0.1434, "step": 308 }, { "epoch": 3.391304347826087, "grad_norm": 4.583820343017578, "learning_rate": 9.330434782608696e-06, "loss": 0.1448, "step": 312 }, { "epoch": 3.4347826086956523, "grad_norm": 4.670399188995361, "learning_rate": 9.321739130434784e-06, "loss": 0.1463, "step": 316 }, { "epoch": 3.4782608695652173, "grad_norm": 3.626206636428833, "learning_rate": 9.31304347826087e-06, "loss": 0.1611, "step": 320 }, { "epoch": 3.5217391304347827, "grad_norm": 4.661035537719727, "learning_rate": 9.304347826086956e-06, "loss": 0.1612, "step": 324 }, { "epoch": 3.5652173913043477, "grad_norm": 6.019312381744385, "learning_rate": 9.295652173913044e-06, "loss": 0.1565, "step": 328 }, { "epoch": 3.608695652173913, "grad_norm": 4.620255470275879, "learning_rate": 9.286956521739131e-06, "loss": 0.1711, "step": 332 }, { "epoch": 3.6521739130434785, "grad_norm": 3.4951438903808594, "learning_rate": 9.278260869565217e-06, "loss": 0.1397, "step": 336 }, { "epoch": 3.6956521739130435, "grad_norm": 5.785006523132324, "learning_rate": 9.269565217391305e-06, "loss": 0.1618, "step": 340 }, { "epoch": 3.7391304347826084, "grad_norm": 3.1313061714172363, "learning_rate": 9.260869565217391e-06, "loss": 0.178, "step": 344 }, { "epoch": 3.782608695652174, "grad_norm": 2.9009153842926025, "learning_rate": 9.252173913043479e-06, "loss": 0.1628, "step": 348 }, { "epoch": 3.8260869565217392, "grad_norm": 7.5648016929626465, "learning_rate": 9.243478260869567e-06, "loss": 0.1546, "step": 352 }, { "epoch": 3.869565217391304, "grad_norm": 4.314565658569336, "learning_rate": 9.234782608695653e-06, "loss": 0.1542, "step": 356 }, { "epoch": 3.9130434782608696, "grad_norm": 4.508668899536133, "learning_rate": 9.22608695652174e-06, "loss": 0.1452, "step": 360 }, { "epoch": 3.9565217391304346, "grad_norm": 4.037124156951904, "learning_rate": 9.217391304347826e-06, "loss": 0.1622, "step": 364 }, { "epoch": 4.0, "grad_norm": 3.8812248706817627, "learning_rate": 9.208695652173914e-06, "loss": 0.1655, "step": 368 }, { "epoch": 4.043478260869565, "grad_norm": 4.363816261291504, "learning_rate": 9.200000000000002e-06, "loss": 0.1128, "step": 372 }, { "epoch": 4.086956521739131, "grad_norm": 2.9691643714904785, "learning_rate": 9.191304347826088e-06, "loss": 0.1144, "step": 376 }, { "epoch": 4.130434782608695, "grad_norm": 3.795179843902588, "learning_rate": 9.182608695652174e-06, "loss": 0.1031, "step": 380 }, { "epoch": 4.173913043478261, "grad_norm": 2.585742473602295, "learning_rate": 9.173913043478261e-06, "loss": 0.1083, "step": 384 }, { "epoch": 4.217391304347826, "grad_norm": 3.1143500804901123, "learning_rate": 9.165217391304349e-06, "loss": 0.0937, "step": 388 }, { "epoch": 4.260869565217392, "grad_norm": 3.8131914138793945, "learning_rate": 9.156521739130435e-06, "loss": 0.0871, "step": 392 }, { "epoch": 4.304347826086957, "grad_norm": 3.666533946990967, "learning_rate": 9.147826086956523e-06, "loss": 0.09, "step": 396 }, { "epoch": 4.3478260869565215, "grad_norm": 5.618098735809326, "learning_rate": 9.139130434782609e-06, "loss": 0.088, "step": 400 }, { "epoch": 4.3478260869565215, "eval_cer": 0.07770737052741912, "eval_loss": 0.23064376413822174, "eval_runtime": 204.5976, "eval_samples_per_second": 1.799, "eval_steps_per_second": 0.112, "step": 400 }, { "epoch": 4.391304347826087, "grad_norm": 6.6667160987854, "learning_rate": 9.130434782608697e-06, "loss": 0.1078, "step": 404 }, { "epoch": 4.434782608695652, "grad_norm": 2.159426212310791, "learning_rate": 9.121739130434784e-06, "loss": 0.1256, "step": 408 }, { "epoch": 4.478260869565218, "grad_norm": 3.9748709201812744, "learning_rate": 9.11304347826087e-06, "loss": 0.109, "step": 412 }, { "epoch": 4.521739130434782, "grad_norm": 3.3357207775115967, "learning_rate": 9.104347826086958e-06, "loss": 0.1307, "step": 416 }, { "epoch": 4.565217391304348, "grad_norm": 3.22724986076355, "learning_rate": 9.095652173913044e-06, "loss": 0.1005, "step": 420 }, { "epoch": 4.608695652173913, "grad_norm": 30.268951416015625, "learning_rate": 9.086956521739132e-06, "loss": 0.1534, "step": 424 }, { "epoch": 4.6521739130434785, "grad_norm": 2.8930823802948, "learning_rate": 9.07826086956522e-06, "loss": 0.1079, "step": 428 }, { "epoch": 4.695652173913043, "grad_norm": 3.111816644668579, "learning_rate": 9.069565217391305e-06, "loss": 0.1163, "step": 432 }, { "epoch": 4.739130434782608, "grad_norm": 5.531673908233643, "learning_rate": 9.060869565217391e-06, "loss": 0.0988, "step": 436 }, { "epoch": 4.782608695652174, "grad_norm": 3.657074451446533, "learning_rate": 9.052173913043479e-06, "loss": 0.0888, "step": 440 }, { "epoch": 4.826086956521739, "grad_norm": 4.184857368469238, "learning_rate": 9.043478260869565e-06, "loss": 0.1035, "step": 444 }, { "epoch": 4.869565217391305, "grad_norm": 3.1598477363586426, "learning_rate": 9.034782608695653e-06, "loss": 0.1036, "step": 448 }, { "epoch": 4.913043478260869, "grad_norm": 8.149157524108887, "learning_rate": 9.02608695652174e-06, "loss": 0.11, "step": 452 }, { "epoch": 4.956521739130435, "grad_norm": 4.9232330322265625, "learning_rate": 9.017391304347827e-06, "loss": 0.1143, "step": 456 }, { "epoch": 5.0, "grad_norm": 42.848388671875, "learning_rate": 9.008695652173913e-06, "loss": 0.1007, "step": 460 }, { "epoch": 5.043478260869565, "grad_norm": 4.660690784454346, "learning_rate": 9e-06, "loss": 0.0718, "step": 464 }, { "epoch": 5.086956521739131, "grad_norm": 3.026564359664917, "learning_rate": 8.991304347826088e-06, "loss": 0.0839, "step": 468 }, { "epoch": 5.130434782608695, "grad_norm": 2.490325450897217, "learning_rate": 8.982608695652176e-06, "loss": 0.0856, "step": 472 }, { "epoch": 5.173913043478261, "grad_norm": 1.9447076320648193, "learning_rate": 8.973913043478262e-06, "loss": 0.0763, "step": 476 }, { "epoch": 5.217391304347826, "grad_norm": 2.4002890586853027, "learning_rate": 8.965217391304348e-06, "loss": 0.09, "step": 480 }, { "epoch": 5.260869565217392, "grad_norm": 2.6894094944000244, "learning_rate": 8.956521739130435e-06, "loss": 0.0752, "step": 484 }, { "epoch": 5.304347826086957, "grad_norm": 4.181057453155518, "learning_rate": 8.947826086956523e-06, "loss": 0.0789, "step": 488 }, { "epoch": 5.3478260869565215, "grad_norm": 1.981608510017395, "learning_rate": 8.939130434782609e-06, "loss": 0.0644, "step": 492 }, { "epoch": 5.391304347826087, "grad_norm": 5.828056812286377, "learning_rate": 8.930434782608697e-06, "loss": 0.084, "step": 496 }, { "epoch": 5.434782608695652, "grad_norm": 2.0661747455596924, "learning_rate": 8.921739130434783e-06, "loss": 0.0824, "step": 500 }, { "epoch": 5.478260869565218, "grad_norm": 5.284090995788574, "learning_rate": 8.91304347826087e-06, "loss": 0.0885, "step": 504 }, { "epoch": 5.521739130434782, "grad_norm": 5.806349754333496, "learning_rate": 8.904347826086958e-06, "loss": 0.0922, "step": 508 }, { "epoch": 5.565217391304348, "grad_norm": 2.6671273708343506, "learning_rate": 8.895652173913044e-06, "loss": 0.0627, "step": 512 }, { "epoch": 5.608695652173913, "grad_norm": 5.863679885864258, "learning_rate": 8.88695652173913e-06, "loss": 0.0607, "step": 516 }, { "epoch": 5.6521739130434785, "grad_norm": 3.5998377799987793, "learning_rate": 8.878260869565218e-06, "loss": 0.0711, "step": 520 }, { "epoch": 5.695652173913043, "grad_norm": 2.923870801925659, "learning_rate": 8.869565217391306e-06, "loss": 0.0837, "step": 524 }, { "epoch": 5.739130434782608, "grad_norm": 3.152458429336548, "learning_rate": 8.860869565217392e-06, "loss": 0.0706, "step": 528 }, { "epoch": 5.782608695652174, "grad_norm": 3.7412848472595215, "learning_rate": 8.85217391304348e-06, "loss": 0.1121, "step": 532 }, { "epoch": 5.826086956521739, "grad_norm": 3.7156355381011963, "learning_rate": 8.843478260869565e-06, "loss": 0.0735, "step": 536 }, { "epoch": 5.869565217391305, "grad_norm": 3.2829031944274902, "learning_rate": 8.834782608695653e-06, "loss": 0.0901, "step": 540 }, { "epoch": 5.913043478260869, "grad_norm": 4.220902442932129, "learning_rate": 8.82608695652174e-06, "loss": 0.0605, "step": 544 }, { "epoch": 5.956521739130435, "grad_norm": 3.6237545013427734, "learning_rate": 8.817391304347827e-06, "loss": 0.0811, "step": 548 }, { "epoch": 6.0, "grad_norm": 18.45893669128418, "learning_rate": 8.808695652173914e-06, "loss": 0.0655, "step": 552 }, { "epoch": 6.043478260869565, "grad_norm": 2.0105226039886475, "learning_rate": 8.8e-06, "loss": 0.0463, "step": 556 }, { "epoch": 6.086956521739131, "grad_norm": 3.7101633548736572, "learning_rate": 8.791304347826088e-06, "loss": 0.0438, "step": 560 }, { "epoch": 6.130434782608695, "grad_norm": 1.8274800777435303, "learning_rate": 8.782608695652174e-06, "loss": 0.0608, "step": 564 }, { "epoch": 6.173913043478261, "grad_norm": 2.9458870887756348, "learning_rate": 8.773913043478262e-06, "loss": 0.0547, "step": 568 }, { "epoch": 6.217391304347826, "grad_norm": 4.860917568206787, "learning_rate": 8.765217391304348e-06, "loss": 0.0703, "step": 572 }, { "epoch": 6.260869565217392, "grad_norm": 2.066480875015259, "learning_rate": 8.756521739130436e-06, "loss": 0.0577, "step": 576 }, { "epoch": 6.304347826086957, "grad_norm": 6.434304714202881, "learning_rate": 8.747826086956522e-06, "loss": 0.0571, "step": 580 }, { "epoch": 6.3478260869565215, "grad_norm": 3.6393909454345703, "learning_rate": 8.73913043478261e-06, "loss": 0.067, "step": 584 }, { "epoch": 6.391304347826087, "grad_norm": 3.2604472637176514, "learning_rate": 8.730434782608697e-06, "loss": 0.0735, "step": 588 }, { "epoch": 6.434782608695652, "grad_norm": 2.827587127685547, "learning_rate": 8.721739130434783e-06, "loss": 0.0575, "step": 592 }, { "epoch": 6.478260869565218, "grad_norm": 2.7942161560058594, "learning_rate": 8.71304347826087e-06, "loss": 0.0581, "step": 596 }, { "epoch": 6.521739130434782, "grad_norm": 3.0687575340270996, "learning_rate": 8.704347826086957e-06, "loss": 0.0766, "step": 600 }, { "epoch": 6.565217391304348, "grad_norm": 2.9538204669952393, "learning_rate": 8.695652173913044e-06, "loss": 0.0593, "step": 604 }, { "epoch": 6.608695652173913, "grad_norm": 2.8427815437316895, "learning_rate": 8.686956521739132e-06, "loss": 0.0576, "step": 608 }, { "epoch": 6.6521739130434785, "grad_norm": 3.09675669670105, "learning_rate": 8.678260869565218e-06, "loss": 0.0596, "step": 612 }, { "epoch": 6.695652173913043, "grad_norm": 2.505993604660034, "learning_rate": 8.669565217391304e-06, "loss": 0.0643, "step": 616 }, { "epoch": 6.739130434782608, "grad_norm": 4.215645790100098, "learning_rate": 8.660869565217392e-06, "loss": 0.0501, "step": 620 }, { "epoch": 6.782608695652174, "grad_norm": 2.606529712677002, "learning_rate": 8.65217391304348e-06, "loss": 0.0692, "step": 624 }, { "epoch": 6.826086956521739, "grad_norm": 2.0819032192230225, "learning_rate": 8.643478260869566e-06, "loss": 0.0475, "step": 628 }, { "epoch": 6.869565217391305, "grad_norm": 2.3126375675201416, "learning_rate": 8.634782608695653e-06, "loss": 0.0416, "step": 632 }, { "epoch": 6.913043478260869, "grad_norm": 2.6911380290985107, "learning_rate": 8.62608695652174e-06, "loss": 0.0493, "step": 636 }, { "epoch": 6.956521739130435, "grad_norm": 2.5800588130950928, "learning_rate": 8.617391304347827e-06, "loss": 0.0477, "step": 640 }, { "epoch": 7.0, "grad_norm": 2.65940523147583, "learning_rate": 8.608695652173915e-06, "loss": 0.056, "step": 644 }, { "epoch": 7.043478260869565, "grad_norm": 2.3243050575256348, "learning_rate": 8.6e-06, "loss": 0.0401, "step": 648 }, { "epoch": 7.086956521739131, "grad_norm": 2.9676589965820312, "learning_rate": 8.591304347826087e-06, "loss": 0.0365, "step": 652 }, { "epoch": 7.130434782608695, "grad_norm": 2.3540804386138916, "learning_rate": 8.582608695652174e-06, "loss": 0.0343, "step": 656 }, { "epoch": 7.173913043478261, "grad_norm": 2.7761409282684326, "learning_rate": 8.573913043478262e-06, "loss": 0.0373, "step": 660 }, { "epoch": 7.217391304347826, "grad_norm": 1.6700774431228638, "learning_rate": 8.56521739130435e-06, "loss": 0.0326, "step": 664 }, { "epoch": 7.260869565217392, "grad_norm": 4.183841705322266, "learning_rate": 8.556521739130436e-06, "loss": 0.0494, "step": 668 }, { "epoch": 7.304347826086957, "grad_norm": 3.0469799041748047, "learning_rate": 8.547826086956522e-06, "loss": 0.038, "step": 672 }, { "epoch": 7.3478260869565215, "grad_norm": 2.769195079803467, "learning_rate": 8.53913043478261e-06, "loss": 0.0474, "step": 676 }, { "epoch": 7.391304347826087, "grad_norm": 1.6180307865142822, "learning_rate": 8.530434782608697e-06, "loss": 0.0416, "step": 680 }, { "epoch": 7.434782608695652, "grad_norm": 2.314739465713501, "learning_rate": 8.521739130434783e-06, "loss": 0.0333, "step": 684 }, { "epoch": 7.478260869565218, "grad_norm": 1.7624801397323608, "learning_rate": 8.513043478260871e-06, "loss": 0.0294, "step": 688 }, { "epoch": 7.521739130434782, "grad_norm": 1.459468126296997, "learning_rate": 8.504347826086957e-06, "loss": 0.0345, "step": 692 }, { "epoch": 7.565217391304348, "grad_norm": 3.126145839691162, "learning_rate": 8.495652173913043e-06, "loss": 0.0417, "step": 696 }, { "epoch": 7.608695652173913, "grad_norm": 2.4488730430603027, "learning_rate": 8.48695652173913e-06, "loss": 0.0488, "step": 700 }, { "epoch": 7.6521739130434785, "grad_norm": 3.3886046409606934, "learning_rate": 8.478260869565218e-06, "loss": 0.039, "step": 704 }, { "epoch": 7.695652173913043, "grad_norm": 2.5945188999176025, "learning_rate": 8.469565217391304e-06, "loss": 0.0469, "step": 708 }, { "epoch": 7.739130434782608, "grad_norm": 3.02083420753479, "learning_rate": 8.460869565217392e-06, "loss": 0.0524, "step": 712 }, { "epoch": 7.782608695652174, "grad_norm": 1.3947174549102783, "learning_rate": 8.452173913043478e-06, "loss": 0.0338, "step": 716 }, { "epoch": 7.826086956521739, "grad_norm": 3.899280071258545, "learning_rate": 8.443478260869566e-06, "loss": 0.0399, "step": 720 }, { "epoch": 7.869565217391305, "grad_norm": 2.55698299407959, "learning_rate": 8.434782608695653e-06, "loss": 0.0409, "step": 724 }, { "epoch": 7.913043478260869, "grad_norm": 2.460869789123535, "learning_rate": 8.42608695652174e-06, "loss": 0.0452, "step": 728 }, { "epoch": 7.956521739130435, "grad_norm": 1.9155172109603882, "learning_rate": 8.417391304347827e-06, "loss": 0.0607, "step": 732 }, { "epoch": 8.0, "grad_norm": 2.8160810470581055, "learning_rate": 8.408695652173913e-06, "loss": 0.054, "step": 736 }, { "epoch": 8.043478260869565, "grad_norm": 6.335769176483154, "learning_rate": 8.400000000000001e-06, "loss": 0.0274, "step": 740 }, { "epoch": 8.08695652173913, "grad_norm": 1.9223042726516724, "learning_rate": 8.391304347826089e-06, "loss": 0.025, "step": 744 }, { "epoch": 8.130434782608695, "grad_norm": 1.8186802864074707, "learning_rate": 8.382608695652175e-06, "loss": 0.047, "step": 748 }, { "epoch": 8.173913043478262, "grad_norm": 2.7168970108032227, "learning_rate": 8.37391304347826e-06, "loss": 0.0342, "step": 752 }, { "epoch": 8.217391304347826, "grad_norm": 2.6159074306488037, "learning_rate": 8.365217391304348e-06, "loss": 0.0196, "step": 756 }, { "epoch": 8.26086956521739, "grad_norm": 1.7596994638442993, "learning_rate": 8.356521739130436e-06, "loss": 0.033, "step": 760 }, { "epoch": 8.304347826086957, "grad_norm": 1.9787019491195679, "learning_rate": 8.347826086956522e-06, "loss": 0.0352, "step": 764 }, { "epoch": 8.347826086956522, "grad_norm": 1.57660710811615, "learning_rate": 8.33913043478261e-06, "loss": 0.0275, "step": 768 }, { "epoch": 8.391304347826088, "grad_norm": 0.904009222984314, "learning_rate": 8.330434782608696e-06, "loss": 0.0219, "step": 772 }, { "epoch": 8.434782608695652, "grad_norm": 1.7360124588012695, "learning_rate": 8.321739130434783e-06, "loss": 0.0242, "step": 776 }, { "epoch": 8.478260869565217, "grad_norm": 1.1798505783081055, "learning_rate": 8.313043478260871e-06, "loss": 0.0252, "step": 780 }, { "epoch": 8.521739130434783, "grad_norm": 2.467694044113159, "learning_rate": 8.304347826086957e-06, "loss": 0.0254, "step": 784 }, { "epoch": 8.565217391304348, "grad_norm": 1.7192645072937012, "learning_rate": 8.295652173913045e-06, "loss": 0.0247, "step": 788 }, { "epoch": 8.608695652173914, "grad_norm": 1.6751654148101807, "learning_rate": 8.286956521739131e-06, "loss": 0.0267, "step": 792 }, { "epoch": 8.652173913043478, "grad_norm": 4.166838645935059, "learning_rate": 8.278260869565219e-06, "loss": 0.0671, "step": 796 }, { "epoch": 8.695652173913043, "grad_norm": 4.912561893463135, "learning_rate": 8.269565217391306e-06, "loss": 0.0536, "step": 800 }, { "epoch": 8.695652173913043, "eval_cer": 0.07322883415122207, "eval_loss": 0.2012723833322525, "eval_runtime": 206.2592, "eval_samples_per_second": 1.784, "eval_steps_per_second": 0.112, "step": 800 }, { "epoch": 8.73913043478261, "grad_norm": 12.09276294708252, "learning_rate": 8.260869565217392e-06, "loss": 0.0328, "step": 804 }, { "epoch": 8.782608695652174, "grad_norm": 2.3350353240966797, "learning_rate": 8.252173913043478e-06, "loss": 0.0351, "step": 808 }, { "epoch": 8.826086956521738, "grad_norm": 2.933907985687256, "learning_rate": 8.243478260869566e-06, "loss": 0.0339, "step": 812 }, { "epoch": 8.869565217391305, "grad_norm": 1.9966821670532227, "learning_rate": 8.234782608695652e-06, "loss": 0.0315, "step": 816 }, { "epoch": 8.91304347826087, "grad_norm": 1.8749300241470337, "learning_rate": 8.22608695652174e-06, "loss": 0.0333, "step": 820 }, { "epoch": 8.956521739130435, "grad_norm": 2.9768922328948975, "learning_rate": 8.217391304347827e-06, "loss": 0.0352, "step": 824 }, { "epoch": 9.0, "grad_norm": 1.9757808446884155, "learning_rate": 8.208695652173913e-06, "loss": 0.0487, "step": 828 }, { "epoch": 9.043478260869565, "grad_norm": 2.8149919509887695, "learning_rate": 8.2e-06, "loss": 0.0189, "step": 832 }, { "epoch": 9.08695652173913, "grad_norm": 1.0551875829696655, "learning_rate": 8.191304347826087e-06, "loss": 0.0192, "step": 836 }, { "epoch": 9.130434782608695, "grad_norm": 2.780104637145996, "learning_rate": 8.182608695652175e-06, "loss": 0.0243, "step": 840 }, { "epoch": 9.173913043478262, "grad_norm": 1.5332213640213013, "learning_rate": 8.173913043478263e-06, "loss": 0.0192, "step": 844 }, { "epoch": 9.217391304347826, "grad_norm": 2.287720203399658, "learning_rate": 8.165217391304349e-06, "loss": 0.026, "step": 848 }, { "epoch": 9.26086956521739, "grad_norm": 1.6307133436203003, "learning_rate": 8.156521739130435e-06, "loss": 0.0177, "step": 852 }, { "epoch": 9.304347826086957, "grad_norm": 1.3198946714401245, "learning_rate": 8.147826086956522e-06, "loss": 0.0223, "step": 856 }, { "epoch": 9.347826086956522, "grad_norm": 1.0884102582931519, "learning_rate": 8.13913043478261e-06, "loss": 0.0198, "step": 860 }, { "epoch": 9.391304347826088, "grad_norm": 5.5890984535217285, "learning_rate": 8.130434782608696e-06, "loss": 0.0323, "step": 864 }, { "epoch": 9.434782608695652, "grad_norm": 1.3700402975082397, "learning_rate": 8.121739130434784e-06, "loss": 0.0202, "step": 868 }, { "epoch": 9.478260869565217, "grad_norm": 1.2030320167541504, "learning_rate": 8.11304347826087e-06, "loss": 0.0168, "step": 872 }, { "epoch": 9.521739130434783, "grad_norm": 1.6642029285430908, "learning_rate": 8.104347826086957e-06, "loss": 0.0238, "step": 876 }, { "epoch": 9.565217391304348, "grad_norm": 1.961349606513977, "learning_rate": 8.095652173913045e-06, "loss": 0.0237, "step": 880 }, { "epoch": 9.608695652173914, "grad_norm": 1.679360032081604, "learning_rate": 8.086956521739131e-06, "loss": 0.0158, "step": 884 }, { "epoch": 9.652173913043478, "grad_norm": 2.9290273189544678, "learning_rate": 8.078260869565217e-06, "loss": 0.0228, "step": 888 }, { "epoch": 9.695652173913043, "grad_norm": 2.845383405685425, "learning_rate": 8.069565217391305e-06, "loss": 0.0263, "step": 892 }, { "epoch": 9.73913043478261, "grad_norm": 1.3626561164855957, "learning_rate": 8.060869565217392e-06, "loss": 0.0195, "step": 896 }, { "epoch": 9.782608695652174, "grad_norm": 2.75199556350708, "learning_rate": 8.052173913043479e-06, "loss": 0.0336, "step": 900 }, { "epoch": 9.826086956521738, "grad_norm": 1.116907000541687, "learning_rate": 8.043478260869566e-06, "loss": 0.0193, "step": 904 }, { "epoch": 9.869565217391305, "grad_norm": 2.4771246910095215, "learning_rate": 8.034782608695652e-06, "loss": 0.0217, "step": 908 }, { "epoch": 9.91304347826087, "grad_norm": 1.3269857168197632, "learning_rate": 8.02608695652174e-06, "loss": 0.0209, "step": 912 }, { "epoch": 9.956521739130435, "grad_norm": 2.300865888595581, "learning_rate": 8.017391304347828e-06, "loss": 0.026, "step": 916 }, { "epoch": 10.0, "grad_norm": 6.091469764709473, "learning_rate": 8.008695652173914e-06, "loss": 0.0248, "step": 920 }, { "epoch": 10.043478260869565, "grad_norm": 1.8367950916290283, "learning_rate": 8.000000000000001e-06, "loss": 0.0125, "step": 924 }, { "epoch": 10.08695652173913, "grad_norm": 1.6097432374954224, "learning_rate": 7.991304347826087e-06, "loss": 0.0117, "step": 928 }, { "epoch": 10.130434782608695, "grad_norm": 1.3999067544937134, "learning_rate": 7.982608695652175e-06, "loss": 0.019, "step": 932 }, { "epoch": 10.173913043478262, "grad_norm": 3.8111000061035156, "learning_rate": 7.973913043478261e-06, "loss": 0.0164, "step": 936 }, { "epoch": 10.217391304347826, "grad_norm": 1.2426923513412476, "learning_rate": 7.965217391304349e-06, "loss": 0.0176, "step": 940 }, { "epoch": 10.26086956521739, "grad_norm": 1.5852206945419312, "learning_rate": 7.956521739130435e-06, "loss": 0.012, "step": 944 }, { "epoch": 10.304347826086957, "grad_norm": 1.2044821977615356, "learning_rate": 7.947826086956522e-06, "loss": 0.0158, "step": 948 }, { "epoch": 10.347826086956522, "grad_norm": 1.5005583763122559, "learning_rate": 7.939130434782608e-06, "loss": 0.0228, "step": 952 }, { "epoch": 10.391304347826088, "grad_norm": 1.1949985027313232, "learning_rate": 7.930434782608696e-06, "loss": 0.0122, "step": 956 }, { "epoch": 10.434782608695652, "grad_norm": 2.0156874656677246, "learning_rate": 7.921739130434784e-06, "loss": 0.0148, "step": 960 }, { "epoch": 10.478260869565217, "grad_norm": 1.7487869262695312, "learning_rate": 7.91304347826087e-06, "loss": 0.0198, "step": 964 }, { "epoch": 10.521739130434783, "grad_norm": 1.9267992973327637, "learning_rate": 7.904347826086958e-06, "loss": 0.019, "step": 968 }, { "epoch": 10.565217391304348, "grad_norm": 1.3667218685150146, "learning_rate": 7.895652173913044e-06, "loss": 0.0165, "step": 972 }, { "epoch": 10.608695652173914, "grad_norm": 1.9264909029006958, "learning_rate": 7.886956521739131e-06, "loss": 0.0138, "step": 976 }, { "epoch": 10.652173913043478, "grad_norm": 1.770121693611145, "learning_rate": 7.878260869565219e-06, "loss": 0.0261, "step": 980 }, { "epoch": 10.695652173913043, "grad_norm": 1.254451870918274, "learning_rate": 7.869565217391305e-06, "loss": 0.0114, "step": 984 }, { "epoch": 10.73913043478261, "grad_norm": 1.3121169805526733, "learning_rate": 7.860869565217391e-06, "loss": 0.0197, "step": 988 }, { "epoch": 10.782608695652174, "grad_norm": 0.8275075554847717, "learning_rate": 7.852173913043479e-06, "loss": 0.019, "step": 992 }, { "epoch": 10.826086956521738, "grad_norm": 2.495657205581665, "learning_rate": 7.843478260869566e-06, "loss": 0.0167, "step": 996 }, { "epoch": 10.869565217391305, "grad_norm": 2.2398645877838135, "learning_rate": 7.834782608695652e-06, "loss": 0.018, "step": 1000 }, { "epoch": 10.91304347826087, "grad_norm": 1.1990290880203247, "learning_rate": 7.82608695652174e-06, "loss": 0.0137, "step": 1004 }, { "epoch": 10.956521739130435, "grad_norm": 0.7502943873405457, "learning_rate": 7.817391304347826e-06, "loss": 0.0099, "step": 1008 }, { "epoch": 11.0, "grad_norm": 1.1841095685958862, "learning_rate": 7.808695652173914e-06, "loss": 0.015, "step": 1012 }, { "epoch": 11.043478260869565, "grad_norm": 0.8014634847640991, "learning_rate": 7.800000000000002e-06, "loss": 0.0083, "step": 1016 }, { "epoch": 11.08695652173913, "grad_norm": 1.4484273195266724, "learning_rate": 7.791304347826088e-06, "loss": 0.011, "step": 1020 }, { "epoch": 11.130434782608695, "grad_norm": 1.3159563541412354, "learning_rate": 7.782608695652174e-06, "loss": 0.0132, "step": 1024 }, { "epoch": 11.173913043478262, "grad_norm": 0.6403052806854248, "learning_rate": 7.773913043478261e-06, "loss": 0.0172, "step": 1028 }, { "epoch": 11.217391304347826, "grad_norm": 3.112725019454956, "learning_rate": 7.765217391304349e-06, "loss": 0.018, "step": 1032 }, { "epoch": 11.26086956521739, "grad_norm": 1.857997179031372, "learning_rate": 7.756521739130437e-06, "loss": 0.0123, "step": 1036 }, { "epoch": 11.304347826086957, "grad_norm": 0.9496269822120667, "learning_rate": 7.747826086956523e-06, "loss": 0.0101, "step": 1040 }, { "epoch": 11.347826086956522, "grad_norm": 1.7921116352081299, "learning_rate": 7.739130434782609e-06, "loss": 0.0134, "step": 1044 }, { "epoch": 11.391304347826088, "grad_norm": 3.3530218601226807, "learning_rate": 7.730434782608696e-06, "loss": 0.0093, "step": 1048 }, { "epoch": 11.434782608695652, "grad_norm": 2.403032064437866, "learning_rate": 7.721739130434784e-06, "loss": 0.013, "step": 1052 }, { "epoch": 11.478260869565217, "grad_norm": 1.6711821556091309, "learning_rate": 7.71304347826087e-06, "loss": 0.0132, "step": 1056 }, { "epoch": 11.521739130434783, "grad_norm": 1.2587711811065674, "learning_rate": 7.704347826086958e-06, "loss": 0.0118, "step": 1060 }, { "epoch": 11.565217391304348, "grad_norm": 2.215118885040283, "learning_rate": 7.695652173913044e-06, "loss": 0.0157, "step": 1064 }, { "epoch": 11.608695652173914, "grad_norm": 2.3528332710266113, "learning_rate": 7.68695652173913e-06, "loss": 0.0133, "step": 1068 }, { "epoch": 11.652173913043478, "grad_norm": 1.6357158422470093, "learning_rate": 7.678260869565218e-06, "loss": 0.011, "step": 1072 }, { "epoch": 11.695652173913043, "grad_norm": 8.644936561584473, "learning_rate": 7.669565217391305e-06, "loss": 0.0152, "step": 1076 }, { "epoch": 11.73913043478261, "grad_norm": 1.8608405590057373, "learning_rate": 7.660869565217391e-06, "loss": 0.0112, "step": 1080 }, { "epoch": 11.782608695652174, "grad_norm": 2.8228299617767334, "learning_rate": 7.652173913043479e-06, "loss": 0.0173, "step": 1084 }, { "epoch": 11.826086956521738, "grad_norm": 1.1610649824142456, "learning_rate": 7.643478260869565e-06, "loss": 0.0166, "step": 1088 }, { "epoch": 11.869565217391305, "grad_norm": 2.308224678039551, "learning_rate": 7.634782608695653e-06, "loss": 0.0134, "step": 1092 }, { "epoch": 11.91304347826087, "grad_norm": 2.5532352924346924, "learning_rate": 7.6260869565217395e-06, "loss": 0.0192, "step": 1096 }, { "epoch": 11.956521739130435, "grad_norm": 0.96513432264328, "learning_rate": 7.617391304347826e-06, "loss": 0.0122, "step": 1100 }, { "epoch": 12.0, "grad_norm": 1.3869465589523315, "learning_rate": 7.608695652173914e-06, "loss": 0.0129, "step": 1104 }, { "epoch": 12.043478260869565, "grad_norm": 4.364943027496338, "learning_rate": 7.600000000000001e-06, "loss": 0.0163, "step": 1108 }, { "epoch": 12.08695652173913, "grad_norm": 0.5522220134735107, "learning_rate": 7.591304347826087e-06, "loss": 0.0151, "step": 1112 }, { "epoch": 12.130434782608695, "grad_norm": 0.9613381028175354, "learning_rate": 7.582608695652175e-06, "loss": 0.0107, "step": 1116 }, { "epoch": 12.173913043478262, "grad_norm": 26.306720733642578, "learning_rate": 7.5739130434782615e-06, "loss": 0.017, "step": 1120 }, { "epoch": 12.217391304347826, "grad_norm": 1.4156843423843384, "learning_rate": 7.565217391304348e-06, "loss": 0.0144, "step": 1124 }, { "epoch": 12.26086956521739, "grad_norm": 0.6852642297744751, "learning_rate": 7.556521739130436e-06, "loss": 0.0121, "step": 1128 }, { "epoch": 12.304347826086957, "grad_norm": 1.6730104684829712, "learning_rate": 7.547826086956522e-06, "loss": 0.0154, "step": 1132 }, { "epoch": 12.347826086956522, "grad_norm": 2.2148935794830322, "learning_rate": 7.539130434782609e-06, "loss": 0.0099, "step": 1136 }, { "epoch": 12.391304347826088, "grad_norm": 0.9563978314399719, "learning_rate": 7.530434782608697e-06, "loss": 0.0069, "step": 1140 }, { "epoch": 12.434782608695652, "grad_norm": 0.8385843634605408, "learning_rate": 7.5217391304347835e-06, "loss": 0.0073, "step": 1144 }, { "epoch": 12.478260869565217, "grad_norm": 0.8640780448913574, "learning_rate": 7.5130434782608695e-06, "loss": 0.0075, "step": 1148 }, { "epoch": 12.521739130434783, "grad_norm": 1.487969160079956, "learning_rate": 7.504347826086957e-06, "loss": 0.0085, "step": 1152 }, { "epoch": 12.565217391304348, "grad_norm": 1.0546014308929443, "learning_rate": 7.495652173913044e-06, "loss": 0.0092, "step": 1156 }, { "epoch": 12.608695652173914, "grad_norm": 0.9219134449958801, "learning_rate": 7.486956521739132e-06, "loss": 0.0068, "step": 1160 }, { "epoch": 12.652173913043478, "grad_norm": 1.1197435855865479, "learning_rate": 7.478260869565218e-06, "loss": 0.0095, "step": 1164 }, { "epoch": 12.695652173913043, "grad_norm": 1.1771098375320435, "learning_rate": 7.469565217391305e-06, "loss": 0.0076, "step": 1168 }, { "epoch": 12.73913043478261, "grad_norm": 0.8085527420043945, "learning_rate": 7.460869565217392e-06, "loss": 0.0088, "step": 1172 }, { "epoch": 12.782608695652174, "grad_norm": 0.8408207297325134, "learning_rate": 7.452173913043479e-06, "loss": 0.0107, "step": 1176 }, { "epoch": 12.826086956521738, "grad_norm": 1.1366029977798462, "learning_rate": 7.443478260869565e-06, "loss": 0.0097, "step": 1180 }, { "epoch": 12.869565217391305, "grad_norm": 0.9171955585479736, "learning_rate": 7.434782608695653e-06, "loss": 0.0186, "step": 1184 }, { "epoch": 12.91304347826087, "grad_norm": 1.5258619785308838, "learning_rate": 7.42608695652174e-06, "loss": 0.0092, "step": 1188 }, { "epoch": 12.956521739130435, "grad_norm": 1.7403347492218018, "learning_rate": 7.417391304347827e-06, "loss": 0.0123, "step": 1192 }, { "epoch": 13.0, "grad_norm": 2.7387073040008545, "learning_rate": 7.408695652173914e-06, "loss": 0.0104, "step": 1196 }, { "epoch": 13.043478260869565, "grad_norm": 0.43595385551452637, "learning_rate": 7.4e-06, "loss": 0.0064, "step": 1200 }, { "epoch": 13.043478260869565, "eval_cer": 0.06913144980704178, "eval_loss": 0.21886567771434784, "eval_runtime": 201.8526, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.114, "step": 1200 }, { "epoch": 13.08695652173913, "grad_norm": 1.531267762184143, "learning_rate": 7.391304347826087e-06, "loss": 0.0097, "step": 1204 }, { "epoch": 13.130434782608695, "grad_norm": 2.1816365718841553, "learning_rate": 7.382608695652175e-06, "loss": 0.008, "step": 1208 }, { "epoch": 13.173913043478262, "grad_norm": 1.225962519645691, "learning_rate": 7.373913043478262e-06, "loss": 0.0095, "step": 1212 }, { "epoch": 13.217391304347826, "grad_norm": 2.986109733581543, "learning_rate": 7.365217391304348e-06, "loss": 0.0125, "step": 1216 }, { "epoch": 13.26086956521739, "grad_norm": 1.2151403427124023, "learning_rate": 7.3565217391304354e-06, "loss": 0.0088, "step": 1220 }, { "epoch": 13.304347826086957, "grad_norm": 0.9124925136566162, "learning_rate": 7.347826086956522e-06, "loss": 0.0111, "step": 1224 }, { "epoch": 13.347826086956522, "grad_norm": 0.6479306817054749, "learning_rate": 7.33913043478261e-06, "loss": 0.0102, "step": 1228 }, { "epoch": 13.391304347826088, "grad_norm": 0.8522439002990723, "learning_rate": 7.330434782608696e-06, "loss": 0.0073, "step": 1232 }, { "epoch": 13.434782608695652, "grad_norm": 1.401593804359436, "learning_rate": 7.321739130434783e-06, "loss": 0.008, "step": 1236 }, { "epoch": 13.478260869565217, "grad_norm": 0.7106354236602783, "learning_rate": 7.3130434782608706e-06, "loss": 0.0086, "step": 1240 }, { "epoch": 13.521739130434783, "grad_norm": 0.4422832429409027, "learning_rate": 7.304347826086957e-06, "loss": 0.0092, "step": 1244 }, { "epoch": 13.565217391304348, "grad_norm": 1.795759677886963, "learning_rate": 7.295652173913043e-06, "loss": 0.0145, "step": 1248 }, { "epoch": 13.608695652173914, "grad_norm": 2.278503894805908, "learning_rate": 7.286956521739131e-06, "loss": 0.0113, "step": 1252 }, { "epoch": 13.652173913043478, "grad_norm": 0.5220128297805786, "learning_rate": 7.278260869565218e-06, "loss": 0.0081, "step": 1256 }, { "epoch": 13.695652173913043, "grad_norm": 2.6076862812042236, "learning_rate": 7.269565217391305e-06, "loss": 0.008, "step": 1260 }, { "epoch": 13.73913043478261, "grad_norm": 19.561298370361328, "learning_rate": 7.2608695652173925e-06, "loss": 0.0146, "step": 1264 }, { "epoch": 13.782608695652174, "grad_norm": 1.466150164604187, "learning_rate": 7.2521739130434785e-06, "loss": 0.0109, "step": 1268 }, { "epoch": 13.826086956521738, "grad_norm": 1.6824641227722168, "learning_rate": 7.243478260869565e-06, "loss": 0.0098, "step": 1272 }, { "epoch": 13.869565217391305, "grad_norm": 0.7150336503982544, "learning_rate": 7.234782608695653e-06, "loss": 0.0116, "step": 1276 }, { "epoch": 13.91304347826087, "grad_norm": 1.5328797101974487, "learning_rate": 7.226086956521739e-06, "loss": 0.0107, "step": 1280 }, { "epoch": 13.956521739130435, "grad_norm": 1.238612174987793, "learning_rate": 7.217391304347827e-06, "loss": 0.0088, "step": 1284 }, { "epoch": 14.0, "grad_norm": 0.6885136961936951, "learning_rate": 7.208695652173914e-06, "loss": 0.0077, "step": 1288 }, { "epoch": 14.043478260869565, "grad_norm": 1.337780237197876, "learning_rate": 7.2000000000000005e-06, "loss": 0.0064, "step": 1292 }, { "epoch": 14.08695652173913, "grad_norm": 1.1798834800720215, "learning_rate": 7.191304347826088e-06, "loss": 0.0173, "step": 1296 }, { "epoch": 14.130434782608695, "grad_norm": 0.6264637112617493, "learning_rate": 7.182608695652174e-06, "loss": 0.0053, "step": 1300 }, { "epoch": 14.173913043478262, "grad_norm": 0.9245102405548096, "learning_rate": 7.173913043478261e-06, "loss": 0.0057, "step": 1304 }, { "epoch": 14.217391304347826, "grad_norm": 0.7311404943466187, "learning_rate": 7.165217391304349e-06, "loss": 0.0052, "step": 1308 }, { "epoch": 14.26086956521739, "grad_norm": 2.2179930210113525, "learning_rate": 7.156521739130436e-06, "loss": 0.0068, "step": 1312 }, { "epoch": 14.304347826086957, "grad_norm": 1.4353398084640503, "learning_rate": 7.147826086956522e-06, "loss": 0.0117, "step": 1316 }, { "epoch": 14.347826086956522, "grad_norm": 1.2515575885772705, "learning_rate": 7.139130434782609e-06, "loss": 0.0047, "step": 1320 }, { "epoch": 14.391304347826088, "grad_norm": 0.7005864977836609, "learning_rate": 7.130434782608696e-06, "loss": 0.0118, "step": 1324 }, { "epoch": 14.434782608695652, "grad_norm": 2.7481956481933594, "learning_rate": 7.121739130434783e-06, "loss": 0.0188, "step": 1328 }, { "epoch": 14.478260869565217, "grad_norm": 1.1425955295562744, "learning_rate": 7.113043478260871e-06, "loss": 0.0076, "step": 1332 }, { "epoch": 14.521739130434783, "grad_norm": 13.091636657714844, "learning_rate": 7.104347826086957e-06, "loss": 0.013, "step": 1336 }, { "epoch": 14.565217391304348, "grad_norm": 2.775904893875122, "learning_rate": 7.095652173913044e-06, "loss": 0.0095, "step": 1340 }, { "epoch": 14.608695652173914, "grad_norm": 1.3664196729660034, "learning_rate": 7.086956521739131e-06, "loss": 0.0101, "step": 1344 }, { "epoch": 14.652173913043478, "grad_norm": 0.5242487788200378, "learning_rate": 7.078260869565217e-06, "loss": 0.0051, "step": 1348 }, { "epoch": 14.695652173913043, "grad_norm": 5.7111287117004395, "learning_rate": 7.069565217391305e-06, "loss": 0.0369, "step": 1352 }, { "epoch": 14.73913043478261, "grad_norm": 1.0724788904190063, "learning_rate": 7.060869565217392e-06, "loss": 0.0066, "step": 1356 }, { "epoch": 14.782608695652174, "grad_norm": 1.0189027786254883, "learning_rate": 7.052173913043479e-06, "loss": 0.0108, "step": 1360 }, { "epoch": 14.826086956521738, "grad_norm": 1.367018222808838, "learning_rate": 7.0434782608695665e-06, "loss": 0.0102, "step": 1364 }, { "epoch": 14.869565217391305, "grad_norm": 0.9228054285049438, "learning_rate": 7.0347826086956525e-06, "loss": 0.0064, "step": 1368 }, { "epoch": 14.91304347826087, "grad_norm": 0.41058632731437683, "learning_rate": 7.026086956521739e-06, "loss": 0.007, "step": 1372 }, { "epoch": 14.956521739130435, "grad_norm": 0.9409981966018677, "learning_rate": 7.017391304347827e-06, "loss": 0.009, "step": 1376 }, { "epoch": 15.0, "grad_norm": 0.8965757489204407, "learning_rate": 7.008695652173914e-06, "loss": 0.0069, "step": 1380 }, { "epoch": 15.043478260869565, "grad_norm": 1.0358166694641113, "learning_rate": 7e-06, "loss": 0.0039, "step": 1384 }, { "epoch": 15.08695652173913, "grad_norm": 0.9616153836250305, "learning_rate": 6.991304347826088e-06, "loss": 0.0039, "step": 1388 }, { "epoch": 15.130434782608695, "grad_norm": 1.0469268560409546, "learning_rate": 6.9826086956521745e-06, "loss": 0.0042, "step": 1392 }, { "epoch": 15.173913043478262, "grad_norm": 1.0306861400604248, "learning_rate": 6.973913043478261e-06, "loss": 0.0057, "step": 1396 }, { "epoch": 15.217391304347826, "grad_norm": 0.5975437164306641, "learning_rate": 6.965217391304349e-06, "loss": 0.006, "step": 1400 }, { "epoch": 15.26086956521739, "grad_norm": 0.6831496357917786, "learning_rate": 6.956521739130435e-06, "loss": 0.0186, "step": 1404 }, { "epoch": 15.304347826086957, "grad_norm": 0.2180139571428299, "learning_rate": 6.947826086956523e-06, "loss": 0.0033, "step": 1408 }, { "epoch": 15.347826086956522, "grad_norm": 0.9308338165283203, "learning_rate": 6.93913043478261e-06, "loss": 0.0054, "step": 1412 }, { "epoch": 15.391304347826088, "grad_norm": 0.6423309445381165, "learning_rate": 6.930434782608696e-06, "loss": 0.0048, "step": 1416 }, { "epoch": 15.434782608695652, "grad_norm": 0.5056251883506775, "learning_rate": 6.921739130434783e-06, "loss": 0.0066, "step": 1420 }, { "epoch": 15.478260869565217, "grad_norm": 2.4142568111419678, "learning_rate": 6.91304347826087e-06, "loss": 0.0084, "step": 1424 }, { "epoch": 15.521739130434783, "grad_norm": 0.6071086525917053, "learning_rate": 6.904347826086957e-06, "loss": 0.0031, "step": 1428 }, { "epoch": 15.565217391304348, "grad_norm": 4.050166606903076, "learning_rate": 6.895652173913045e-06, "loss": 0.0051, "step": 1432 }, { "epoch": 15.608695652173914, "grad_norm": 3.473848581314087, "learning_rate": 6.886956521739131e-06, "loss": 0.0053, "step": 1436 }, { "epoch": 15.652173913043478, "grad_norm": 0.5063245892524719, "learning_rate": 6.8782608695652176e-06, "loss": 0.0088, "step": 1440 }, { "epoch": 15.695652173913043, "grad_norm": 0.3653779923915863, "learning_rate": 6.869565217391305e-06, "loss": 0.0034, "step": 1444 }, { "epoch": 15.73913043478261, "grad_norm": 1.3541284799575806, "learning_rate": 6.860869565217392e-06, "loss": 0.0085, "step": 1448 }, { "epoch": 15.782608695652174, "grad_norm": 0.6482077836990356, "learning_rate": 6.852173913043478e-06, "loss": 0.0042, "step": 1452 }, { "epoch": 15.826086956521738, "grad_norm": 14.62485408782959, "learning_rate": 6.843478260869566e-06, "loss": 0.0167, "step": 1456 }, { "epoch": 15.869565217391305, "grad_norm": 1.3629876375198364, "learning_rate": 6.834782608695653e-06, "loss": 0.0092, "step": 1460 }, { "epoch": 15.91304347826087, "grad_norm": 0.7435063123703003, "learning_rate": 6.8260869565217395e-06, "loss": 0.0053, "step": 1464 }, { "epoch": 15.956521739130435, "grad_norm": 0.5410985946655273, "learning_rate": 6.817391304347826e-06, "loss": 0.0048, "step": 1468 }, { "epoch": 16.0, "grad_norm": 0.9544240236282349, "learning_rate": 6.808695652173913e-06, "loss": 0.0097, "step": 1472 }, { "epoch": 16.043478260869566, "grad_norm": 0.6027460694313049, "learning_rate": 6.800000000000001e-06, "loss": 0.0046, "step": 1476 }, { "epoch": 16.08695652173913, "grad_norm": 0.5865103006362915, "learning_rate": 6.791304347826088e-06, "loss": 0.0066, "step": 1480 }, { "epoch": 16.130434782608695, "grad_norm": 3.3589930534362793, "learning_rate": 6.782608695652174e-06, "loss": 0.0046, "step": 1484 }, { "epoch": 16.17391304347826, "grad_norm": 0.2249296009540558, "learning_rate": 6.7739130434782615e-06, "loss": 0.0033, "step": 1488 }, { "epoch": 16.217391304347824, "grad_norm": 1.2928370237350464, "learning_rate": 6.765217391304348e-06, "loss": 0.0074, "step": 1492 }, { "epoch": 16.26086956521739, "grad_norm": 1.2256135940551758, "learning_rate": 6.756521739130435e-06, "loss": 0.0067, "step": 1496 }, { "epoch": 16.304347826086957, "grad_norm": 0.4373249113559723, "learning_rate": 6.747826086956523e-06, "loss": 0.0051, "step": 1500 }, { "epoch": 16.347826086956523, "grad_norm": 1.054301142692566, "learning_rate": 6.739130434782609e-06, "loss": 0.0052, "step": 1504 }, { "epoch": 16.391304347826086, "grad_norm": 0.6513290405273438, "learning_rate": 6.730434782608696e-06, "loss": 0.0055, "step": 1508 }, { "epoch": 16.434782608695652, "grad_norm": 1.176254153251648, "learning_rate": 6.7217391304347835e-06, "loss": 0.0041, "step": 1512 }, { "epoch": 16.47826086956522, "grad_norm": 0.6245970129966736, "learning_rate": 6.71304347826087e-06, "loss": 0.0076, "step": 1516 }, { "epoch": 16.52173913043478, "grad_norm": 0.4100065529346466, "learning_rate": 6.704347826086956e-06, "loss": 0.0034, "step": 1520 }, { "epoch": 16.565217391304348, "grad_norm": 1.162110447883606, "learning_rate": 6.695652173913044e-06, "loss": 0.0041, "step": 1524 }, { "epoch": 16.608695652173914, "grad_norm": 0.9842275977134705, "learning_rate": 6.686956521739131e-06, "loss": 0.0059, "step": 1528 }, { "epoch": 16.652173913043477, "grad_norm": 1.063281536102295, "learning_rate": 6.678260869565219e-06, "loss": 0.0074, "step": 1532 }, { "epoch": 16.695652173913043, "grad_norm": 0.5967265367507935, "learning_rate": 6.669565217391305e-06, "loss": 0.007, "step": 1536 }, { "epoch": 16.73913043478261, "grad_norm": 3.215507745742798, "learning_rate": 6.6608695652173915e-06, "loss": 0.0051, "step": 1540 }, { "epoch": 16.782608695652176, "grad_norm": 0.7696347832679749, "learning_rate": 6.652173913043479e-06, "loss": 0.0048, "step": 1544 }, { "epoch": 16.82608695652174, "grad_norm": 4.061685562133789, "learning_rate": 6.643478260869566e-06, "loss": 0.0113, "step": 1548 }, { "epoch": 16.869565217391305, "grad_norm": 1.3145076036453247, "learning_rate": 6.634782608695652e-06, "loss": 0.0081, "step": 1552 }, { "epoch": 16.91304347826087, "grad_norm": 1.1601479053497314, "learning_rate": 6.62608695652174e-06, "loss": 0.0049, "step": 1556 }, { "epoch": 16.956521739130434, "grad_norm": 2.2673768997192383, "learning_rate": 6.617391304347827e-06, "loss": 0.0064, "step": 1560 }, { "epoch": 17.0, "grad_norm": 0.8262242078781128, "learning_rate": 6.6086956521739135e-06, "loss": 0.003, "step": 1564 }, { "epoch": 17.043478260869566, "grad_norm": 0.47302860021591187, "learning_rate": 6.600000000000001e-06, "loss": 0.0053, "step": 1568 }, { "epoch": 17.08695652173913, "grad_norm": 0.707246720790863, "learning_rate": 6.591304347826087e-06, "loss": 0.0032, "step": 1572 }, { "epoch": 17.130434782608695, "grad_norm": 0.3421729803085327, "learning_rate": 6.582608695652174e-06, "loss": 0.0072, "step": 1576 }, { "epoch": 17.17391304347826, "grad_norm": 0.39960405230522156, "learning_rate": 6.573913043478262e-06, "loss": 0.0026, "step": 1580 }, { "epoch": 17.217391304347824, "grad_norm": 0.585614800453186, "learning_rate": 6.565217391304349e-06, "loss": 0.0029, "step": 1584 }, { "epoch": 17.26086956521739, "grad_norm": 0.33347564935684204, "learning_rate": 6.556521739130435e-06, "loss": 0.0029, "step": 1588 }, { "epoch": 17.304347826086957, "grad_norm": 1.1246775388717651, "learning_rate": 6.547826086956522e-06, "loss": 0.0057, "step": 1592 }, { "epoch": 17.347826086956523, "grad_norm": 1.3347176313400269, "learning_rate": 6.539130434782609e-06, "loss": 0.0043, "step": 1596 }, { "epoch": 17.391304347826086, "grad_norm": 0.38102060556411743, "learning_rate": 6.530434782608697e-06, "loss": 0.004, "step": 1600 }, { "epoch": 17.391304347826086, "eval_cer": 0.06450998141883844, "eval_loss": 0.22015000879764557, "eval_runtime": 202.7935, "eval_samples_per_second": 1.815, "eval_steps_per_second": 0.113, "step": 1600 }, { "epoch": 17.434782608695652, "grad_norm": 0.35870620608329773, "learning_rate": 6.521739130434783e-06, "loss": 0.0029, "step": 1604 }, { "epoch": 17.47826086956522, "grad_norm": 0.5182778835296631, "learning_rate": 6.51304347826087e-06, "loss": 0.0031, "step": 1608 }, { "epoch": 17.52173913043478, "grad_norm": 0.2557024359703064, "learning_rate": 6.5043478260869574e-06, "loss": 0.003, "step": 1612 }, { "epoch": 17.565217391304348, "grad_norm": 0.794677197933197, "learning_rate": 6.495652173913044e-06, "loss": 0.0055, "step": 1616 }, { "epoch": 17.608695652173914, "grad_norm": 0.3375849425792694, "learning_rate": 6.48695652173913e-06, "loss": 0.0021, "step": 1620 }, { "epoch": 17.652173913043477, "grad_norm": 1.1278650760650635, "learning_rate": 6.478260869565218e-06, "loss": 0.0033, "step": 1624 }, { "epoch": 17.695652173913043, "grad_norm": 1.9284498691558838, "learning_rate": 6.469565217391305e-06, "loss": 0.0049, "step": 1628 }, { "epoch": 17.73913043478261, "grad_norm": 0.5490275621414185, "learning_rate": 6.460869565217392e-06, "loss": 0.0069, "step": 1632 }, { "epoch": 17.782608695652176, "grad_norm": 1.833752155303955, "learning_rate": 6.4521739130434794e-06, "loss": 0.0061, "step": 1636 }, { "epoch": 17.82608695652174, "grad_norm": 2.1299726963043213, "learning_rate": 6.4434782608695654e-06, "loss": 0.0064, "step": 1640 }, { "epoch": 17.869565217391305, "grad_norm": 0.6617554426193237, "learning_rate": 6.434782608695652e-06, "loss": 0.0032, "step": 1644 }, { "epoch": 17.91304347826087, "grad_norm": 1.0980204343795776, "learning_rate": 6.42608695652174e-06, "loss": 0.0037, "step": 1648 }, { "epoch": 17.956521739130434, "grad_norm": 0.6975145936012268, "learning_rate": 6.417391304347827e-06, "loss": 0.0046, "step": 1652 }, { "epoch": 18.0, "grad_norm": 0.4166085720062256, "learning_rate": 6.408695652173914e-06, "loss": 0.0048, "step": 1656 }, { "epoch": 18.043478260869566, "grad_norm": 1.6994531154632568, "learning_rate": 6.4000000000000006e-06, "loss": 0.0037, "step": 1660 }, { "epoch": 18.08695652173913, "grad_norm": 3.2556941509246826, "learning_rate": 6.391304347826087e-06, "loss": 0.005, "step": 1664 }, { "epoch": 18.130434782608695, "grad_norm": 0.30955272912979126, "learning_rate": 6.382608695652175e-06, "loss": 0.0024, "step": 1668 }, { "epoch": 18.17391304347826, "grad_norm": 0.22232691943645477, "learning_rate": 6.373913043478261e-06, "loss": 0.0042, "step": 1672 }, { "epoch": 18.217391304347824, "grad_norm": 0.1353224366903305, "learning_rate": 6.365217391304348e-06, "loss": 0.0076, "step": 1676 }, { "epoch": 18.26086956521739, "grad_norm": 0.26332777738571167, "learning_rate": 6.356521739130436e-06, "loss": 0.0032, "step": 1680 }, { "epoch": 18.304347826086957, "grad_norm": 0.2566346228122711, "learning_rate": 6.3478260869565225e-06, "loss": 0.0031, "step": 1684 }, { "epoch": 18.347826086956523, "grad_norm": 0.18673892319202423, "learning_rate": 6.3391304347826085e-06, "loss": 0.0023, "step": 1688 }, { "epoch": 18.391304347826086, "grad_norm": 1.5212428569793701, "learning_rate": 6.330434782608696e-06, "loss": 0.004, "step": 1692 }, { "epoch": 18.434782608695652, "grad_norm": 0.4993237555027008, "learning_rate": 6.321739130434783e-06, "loss": 0.0024, "step": 1696 }, { "epoch": 18.47826086956522, "grad_norm": 0.8646178245544434, "learning_rate": 6.31304347826087e-06, "loss": 0.0032, "step": 1700 }, { "epoch": 18.52173913043478, "grad_norm": 0.14716516435146332, "learning_rate": 6.304347826086958e-06, "loss": 0.0084, "step": 1704 }, { "epoch": 18.565217391304348, "grad_norm": 0.23538489639759064, "learning_rate": 6.295652173913044e-06, "loss": 0.0036, "step": 1708 }, { "epoch": 18.608695652173914, "grad_norm": 0.7179269790649414, "learning_rate": 6.2869565217391305e-06, "loss": 0.0036, "step": 1712 }, { "epoch": 18.652173913043477, "grad_norm": 0.3134477734565735, "learning_rate": 6.278260869565218e-06, "loss": 0.0035, "step": 1716 }, { "epoch": 18.695652173913043, "grad_norm": 0.12795044481754303, "learning_rate": 6.269565217391304e-06, "loss": 0.0027, "step": 1720 }, { "epoch": 18.73913043478261, "grad_norm": 0.2717900276184082, "learning_rate": 6.260869565217392e-06, "loss": 0.0025, "step": 1724 }, { "epoch": 18.782608695652176, "grad_norm": 0.7385865449905396, "learning_rate": 6.252173913043479e-06, "loss": 0.0027, "step": 1728 }, { "epoch": 18.82608695652174, "grad_norm": 0.1632547825574875, "learning_rate": 6.243478260869566e-06, "loss": 0.0029, "step": 1732 }, { "epoch": 18.869565217391305, "grad_norm": 0.2515088617801666, "learning_rate": 6.234782608695653e-06, "loss": 0.0066, "step": 1736 }, { "epoch": 18.91304347826087, "grad_norm": 0.5043063163757324, "learning_rate": 6.226086956521739e-06, "loss": 0.0053, "step": 1740 }, { "epoch": 18.956521739130434, "grad_norm": 0.485673189163208, "learning_rate": 6.217391304347826e-06, "loss": 0.0026, "step": 1744 }, { "epoch": 19.0, "grad_norm": 10.630827903747559, "learning_rate": 6.208695652173914e-06, "loss": 0.007, "step": 1748 }, { "epoch": 19.043478260869566, "grad_norm": 0.6521807312965393, "learning_rate": 6.200000000000001e-06, "loss": 0.002, "step": 1752 }, { "epoch": 19.08695652173913, "grad_norm": 0.685613214969635, "learning_rate": 6.191304347826087e-06, "loss": 0.0031, "step": 1756 }, { "epoch": 19.130434782608695, "grad_norm": 0.4042814373970032, "learning_rate": 6.1826086956521745e-06, "loss": 0.0033, "step": 1760 }, { "epoch": 19.17391304347826, "grad_norm": 1.0956984758377075, "learning_rate": 6.173913043478261e-06, "loss": 0.0042, "step": 1764 }, { "epoch": 19.217391304347824, "grad_norm": 0.45772242546081543, "learning_rate": 6.165217391304348e-06, "loss": 0.0043, "step": 1768 }, { "epoch": 19.26086956521739, "grad_norm": 0.42395636439323425, "learning_rate": 6.156521739130436e-06, "loss": 0.0023, "step": 1772 }, { "epoch": 19.304347826086957, "grad_norm": 1.5082902908325195, "learning_rate": 6.147826086956522e-06, "loss": 0.0025, "step": 1776 }, { "epoch": 19.347826086956523, "grad_norm": 0.21798431873321533, "learning_rate": 6.13913043478261e-06, "loss": 0.0036, "step": 1780 }, { "epoch": 19.391304347826086, "grad_norm": 0.3865170180797577, "learning_rate": 6.1304347826086965e-06, "loss": 0.0019, "step": 1784 }, { "epoch": 19.434782608695652, "grad_norm": 3.2888128757476807, "learning_rate": 6.1217391304347825e-06, "loss": 0.0026, "step": 1788 }, { "epoch": 19.47826086956522, "grad_norm": 0.2803182601928711, "learning_rate": 6.11304347826087e-06, "loss": 0.0037, "step": 1792 }, { "epoch": 19.52173913043478, "grad_norm": 0.33410853147506714, "learning_rate": 6.104347826086957e-06, "loss": 0.002, "step": 1796 }, { "epoch": 19.565217391304348, "grad_norm": 0.32171395421028137, "learning_rate": 6.095652173913044e-06, "loss": 0.0026, "step": 1800 }, { "epoch": 19.608695652173914, "grad_norm": 2.363369941711426, "learning_rate": 6.086956521739132e-06, "loss": 0.0055, "step": 1804 }, { "epoch": 19.652173913043477, "grad_norm": 0.21327437460422516, "learning_rate": 6.078260869565218e-06, "loss": 0.0026, "step": 1808 }, { "epoch": 19.695652173913043, "grad_norm": 0.3712483048439026, "learning_rate": 6.0695652173913045e-06, "loss": 0.0032, "step": 1812 }, { "epoch": 19.73913043478261, "grad_norm": 0.8645619750022888, "learning_rate": 6.060869565217392e-06, "loss": 0.005, "step": 1816 }, { "epoch": 19.782608695652176, "grad_norm": 1.5834615230560303, "learning_rate": 6.052173913043479e-06, "loss": 0.0033, "step": 1820 }, { "epoch": 19.82608695652174, "grad_norm": 1.1824058294296265, "learning_rate": 6.043478260869565e-06, "loss": 0.0038, "step": 1824 }, { "epoch": 19.869565217391305, "grad_norm": 0.2253231257200241, "learning_rate": 6.034782608695653e-06, "loss": 0.0016, "step": 1828 }, { "epoch": 19.91304347826087, "grad_norm": 0.46170753240585327, "learning_rate": 6.02608695652174e-06, "loss": 0.0033, "step": 1832 }, { "epoch": 19.956521739130434, "grad_norm": 1.4026618003845215, "learning_rate": 6.0173913043478264e-06, "loss": 0.0038, "step": 1836 }, { "epoch": 20.0, "grad_norm": 0.854394257068634, "learning_rate": 6.008695652173913e-06, "loss": 0.0024, "step": 1840 }, { "epoch": 20.043478260869566, "grad_norm": 15.418863296508789, "learning_rate": 6e-06, "loss": 0.0099, "step": 1844 }, { "epoch": 20.08695652173913, "grad_norm": 1.9084014892578125, "learning_rate": 5.991304347826088e-06, "loss": 0.0076, "step": 1848 }, { "epoch": 20.130434782608695, "grad_norm": 0.22345063090324402, "learning_rate": 5.982608695652175e-06, "loss": 0.0024, "step": 1852 }, { "epoch": 20.17391304347826, "grad_norm": 1.098819613456726, "learning_rate": 5.973913043478261e-06, "loss": 0.0047, "step": 1856 }, { "epoch": 20.217391304347824, "grad_norm": 0.576606273651123, "learning_rate": 5.965217391304348e-06, "loss": 0.0104, "step": 1860 }, { "epoch": 20.26086956521739, "grad_norm": 0.3249319791793823, "learning_rate": 5.956521739130435e-06, "loss": 0.0018, "step": 1864 }, { "epoch": 20.304347826086957, "grad_norm": 0.19233490526676178, "learning_rate": 5.947826086956522e-06, "loss": 0.0028, "step": 1868 }, { "epoch": 20.347826086956523, "grad_norm": 1.0393699407577515, "learning_rate": 5.93913043478261e-06, "loss": 0.0054, "step": 1872 }, { "epoch": 20.391304347826086, "grad_norm": 0.12541915476322174, "learning_rate": 5.930434782608696e-06, "loss": 0.0014, "step": 1876 }, { "epoch": 20.434782608695652, "grad_norm": 0.6915731430053711, "learning_rate": 5.921739130434783e-06, "loss": 0.0022, "step": 1880 }, { "epoch": 20.47826086956522, "grad_norm": 2.4252281188964844, "learning_rate": 5.91304347826087e-06, "loss": 0.0117, "step": 1884 }, { "epoch": 20.52173913043478, "grad_norm": 0.26406538486480713, "learning_rate": 5.904347826086957e-06, "loss": 0.0074, "step": 1888 }, { "epoch": 20.565217391304348, "grad_norm": 0.7169681787490845, "learning_rate": 5.895652173913043e-06, "loss": 0.0102, "step": 1892 }, { "epoch": 20.608695652173914, "grad_norm": 1.7539249658584595, "learning_rate": 5.886956521739131e-06, "loss": 0.0061, "step": 1896 }, { "epoch": 20.652173913043477, "grad_norm": 2.9715065956115723, "learning_rate": 5.878260869565218e-06, "loss": 0.0041, "step": 1900 }, { "epoch": 20.695652173913043, "grad_norm": 0.5111327171325684, "learning_rate": 5.8695652173913055e-06, "loss": 0.005, "step": 1904 }, { "epoch": 20.73913043478261, "grad_norm": 0.5571478009223938, "learning_rate": 5.8608695652173915e-06, "loss": 0.0021, "step": 1908 }, { "epoch": 20.782608695652176, "grad_norm": 5.586385726928711, "learning_rate": 5.852173913043478e-06, "loss": 0.0049, "step": 1912 }, { "epoch": 20.82608695652174, "grad_norm": 0.3944290280342102, "learning_rate": 5.843478260869566e-06, "loss": 0.0045, "step": 1916 }, { "epoch": 20.869565217391305, "grad_norm": 1.2807611227035522, "learning_rate": 5.834782608695653e-06, "loss": 0.0021, "step": 1920 }, { "epoch": 20.91304347826087, "grad_norm": 1.8484444618225098, "learning_rate": 5.826086956521739e-06, "loss": 0.003, "step": 1924 }, { "epoch": 20.956521739130434, "grad_norm": 2.0489065647125244, "learning_rate": 5.817391304347827e-06, "loss": 0.0206, "step": 1928 }, { "epoch": 21.0, "grad_norm": 0.18105614185333252, "learning_rate": 5.8086956521739135e-06, "loss": 0.0017, "step": 1932 }, { "epoch": 21.043478260869566, "grad_norm": 0.8697389960289001, "learning_rate": 5.8e-06, "loss": 0.002, "step": 1936 }, { "epoch": 21.08695652173913, "grad_norm": 0.16119571030139923, "learning_rate": 5.791304347826088e-06, "loss": 0.0025, "step": 1940 }, { "epoch": 21.130434782608695, "grad_norm": 0.5686094760894775, "learning_rate": 5.782608695652174e-06, "loss": 0.0022, "step": 1944 }, { "epoch": 21.17391304347826, "grad_norm": 0.4840877652168274, "learning_rate": 5.773913043478261e-06, "loss": 0.0024, "step": 1948 }, { "epoch": 21.217391304347824, "grad_norm": 16.31727409362793, "learning_rate": 5.765217391304349e-06, "loss": 0.0097, "step": 1952 }, { "epoch": 21.26086956521739, "grad_norm": 2.346874237060547, "learning_rate": 5.7565217391304355e-06, "loss": 0.0069, "step": 1956 }, { "epoch": 21.304347826086957, "grad_norm": 0.3269040882587433, "learning_rate": 5.7478260869565215e-06, "loss": 0.0063, "step": 1960 }, { "epoch": 21.347826086956523, "grad_norm": 1.1412652730941772, "learning_rate": 5.739130434782609e-06, "loss": 0.0032, "step": 1964 }, { "epoch": 21.391304347826086, "grad_norm": 0.2136031985282898, "learning_rate": 5.730434782608696e-06, "loss": 0.0019, "step": 1968 }, { "epoch": 21.434782608695652, "grad_norm": 0.5011817216873169, "learning_rate": 5.721739130434784e-06, "loss": 0.0022, "step": 1972 }, { "epoch": 21.47826086956522, "grad_norm": 2.7149758338928223, "learning_rate": 5.71304347826087e-06, "loss": 0.002, "step": 1976 }, { "epoch": 21.52173913043478, "grad_norm": 0.43043053150177, "learning_rate": 5.704347826086957e-06, "loss": 0.0017, "step": 1980 }, { "epoch": 21.565217391304348, "grad_norm": 0.5882115364074707, "learning_rate": 5.695652173913044e-06, "loss": 0.0016, "step": 1984 }, { "epoch": 21.608695652173914, "grad_norm": 0.7102403044700623, "learning_rate": 5.686956521739131e-06, "loss": 0.0015, "step": 1988 }, { "epoch": 21.652173913043477, "grad_norm": 0.1992396116256714, "learning_rate": 5.678260869565217e-06, "loss": 0.0032, "step": 1992 }, { "epoch": 21.695652173913043, "grad_norm": 0.17559760808944702, "learning_rate": 5.669565217391305e-06, "loss": 0.0026, "step": 1996 }, { "epoch": 21.73913043478261, "grad_norm": 0.5184961557388306, "learning_rate": 5.660869565217392e-06, "loss": 0.0027, "step": 2000 }, { "epoch": 21.73913043478261, "eval_cer": 0.06660631759493067, "eval_loss": 0.22050534188747406, "eval_runtime": 203.0091, "eval_samples_per_second": 1.813, "eval_steps_per_second": 0.113, "step": 2000 } ], "logging_steps": 4, "max_steps": 4600, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3913698133849145e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }