{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 35800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005586592178770949, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 13.4193, "step": 1 }, { "epoch": 0.0011173184357541898, "grad_norm": 33.38533401489258, "learning_rate": 1e-05, "loss": 12.9216, "step": 2 }, { "epoch": 0.0016759776536312849, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 13.042, "step": 3 }, { "epoch": 0.0022346368715083797, "grad_norm": 34.66608810424805, "learning_rate": 2e-05, "loss": 13.1416, "step": 4 }, { "epoch": 0.002793296089385475, "grad_norm": 37.251033782958984, "learning_rate": 3e-05, "loss": 13.529, "step": 5 }, { "epoch": 0.0033519553072625698, "grad_norm": 32.82163619995117, "learning_rate": 4e-05, "loss": 12.9028, "step": 6 }, { "epoch": 0.003910614525139665, "grad_norm": 34.39775085449219, "learning_rate": 5e-05, "loss": 12.6227, "step": 7 }, { "epoch": 0.004469273743016759, "grad_norm": 35.74417495727539, "learning_rate": 6e-05, "loss": 12.4218, "step": 8 }, { "epoch": 0.005027932960893855, "grad_norm": 37.67799377441406, "learning_rate": 7.000000000000001e-05, "loss": 12.5441, "step": 9 }, { "epoch": 0.00558659217877095, "grad_norm": 34.68803405761719, "learning_rate": 8e-05, "loss": 11.8226, "step": 10 }, { "epoch": 0.006145251396648044, "grad_norm": 40.62199020385742, "learning_rate": 8.999999999999999e-05, "loss": 13.4227, "step": 11 }, { "epoch": 0.0067039106145251395, "grad_norm": 41.13390350341797, "learning_rate": 0.0001, "loss": 12.0762, "step": 12 }, { "epoch": 0.007262569832402235, "grad_norm": 32.25524139404297, "learning_rate": 0.00011, "loss": 11.2081, "step": 13 }, { "epoch": 0.00782122905027933, "grad_norm": Infinity, "learning_rate": 0.00011, "loss": 11.9672, "step": 14 }, { "epoch": 0.008379888268156424, "grad_norm": 37.992488861083984, "learning_rate": 0.00012, "loss": 11.5283, "step": 15 }, { "epoch": 0.008938547486033519, "grad_norm": 37.751983642578125, "learning_rate": 0.00013000000000000002, "loss": 10.5765, "step": 16 }, { "epoch": 0.009497206703910615, "grad_norm": 37.5428352355957, "learning_rate": 0.00014000000000000001, "loss": 10.9605, "step": 17 }, { "epoch": 0.01005586592178771, "grad_norm": 39.13656234741211, "learning_rate": 0.00015, "loss": 10.0404, "step": 18 }, { "epoch": 0.010614525139664804, "grad_norm": 39.929405212402344, "learning_rate": 0.00016, "loss": 9.638, "step": 19 }, { "epoch": 0.0111731843575419, "grad_norm": 35.697303771972656, "learning_rate": 0.00017, "loss": 9.4459, "step": 20 }, { "epoch": 0.011731843575418994, "grad_norm": 43.64272689819336, "learning_rate": 0.00017999999999999998, "loss": 9.1672, "step": 21 }, { "epoch": 0.012290502793296089, "grad_norm": 36.118736267089844, "learning_rate": 0.00019, "loss": 8.3166, "step": 22 }, { "epoch": 0.012849162011173185, "grad_norm": 37.35279083251953, "learning_rate": 0.0002, "loss": 8.3808, "step": 23 }, { "epoch": 0.013407821229050279, "grad_norm": 38.960723876953125, "learning_rate": 0.00021, "loss": 7.4123, "step": 24 }, { "epoch": 0.013966480446927373, "grad_norm": 34.48334884643555, "learning_rate": 0.00022, "loss": 7.0422, "step": 25 }, { "epoch": 0.01452513966480447, "grad_norm": 37.087867736816406, "learning_rate": 0.00023, "loss": 6.8311, "step": 26 }, { "epoch": 0.015083798882681564, "grad_norm": 37.18873596191406, "learning_rate": 0.00024, "loss": 5.968, "step": 27 }, { "epoch": 0.01564245810055866, "grad_norm": 33.669677734375, "learning_rate": 0.00025, "loss": 5.592, "step": 28 }, { "epoch": 0.016201117318435754, "grad_norm": 33.15557098388672, "learning_rate": 0.00026000000000000003, "loss": 5.8566, "step": 29 }, { "epoch": 0.01675977653631285, "grad_norm": 25.873477935791016, "learning_rate": 0.00027, "loss": 4.7083, "step": 30 }, { "epoch": 0.017318435754189943, "grad_norm": 20.893062591552734, "learning_rate": 0.00028000000000000003, "loss": 4.2827, "step": 31 }, { "epoch": 0.017877094972067038, "grad_norm": 19.956817626953125, "learning_rate": 0.00029, "loss": 4.0665, "step": 32 }, { "epoch": 0.018435754189944135, "grad_norm": 11.324917793273926, "learning_rate": 0.0003, "loss": 3.5017, "step": 33 }, { "epoch": 0.01899441340782123, "grad_norm": 9.019549369812012, "learning_rate": 0.00031, "loss": 3.4026, "step": 34 }, { "epoch": 0.019553072625698324, "grad_norm": 4.461162090301514, "learning_rate": 0.00032, "loss": 3.1863, "step": 35 }, { "epoch": 0.02011173184357542, "grad_norm": 2.5391528606414795, "learning_rate": 0.00033, "loss": 3.0947, "step": 36 }, { "epoch": 0.020670391061452513, "grad_norm": 2.935084581375122, "learning_rate": 0.00034, "loss": 3.0354, "step": 37 }, { "epoch": 0.021229050279329607, "grad_norm": 4.849456787109375, "learning_rate": 0.00035, "loss": 2.9455, "step": 38 }, { "epoch": 0.021787709497206705, "grad_norm": 5.720569610595703, "learning_rate": 0.00035999999999999997, "loss": 2.9848, "step": 39 }, { "epoch": 0.0223463687150838, "grad_norm": 6.49008321762085, "learning_rate": 0.00037, "loss": 2.9493, "step": 40 }, { "epoch": 0.022905027932960894, "grad_norm": 5.751615047454834, "learning_rate": 0.00038, "loss": 2.9418, "step": 41 }, { "epoch": 0.02346368715083799, "grad_norm": 4.788735866546631, "learning_rate": 0.00039000000000000005, "loss": 2.8486, "step": 42 }, { "epoch": 0.024022346368715083, "grad_norm": 3.7940988540649414, "learning_rate": 0.0004, "loss": 2.7338, "step": 43 }, { "epoch": 0.024581005586592177, "grad_norm": 2.9017174243927, "learning_rate": 0.00041, "loss": 2.6205, "step": 44 }, { "epoch": 0.025139664804469275, "grad_norm": 3.0014331340789795, "learning_rate": 0.00042, "loss": 2.5226, "step": 45 }, { "epoch": 0.02569832402234637, "grad_norm": 3.001723527908325, "learning_rate": 0.00043, "loss": 2.5499, "step": 46 }, { "epoch": 0.026256983240223464, "grad_norm": 3.0659825801849365, "learning_rate": 0.00044, "loss": 2.4628, "step": 47 }, { "epoch": 0.026815642458100558, "grad_norm": 3.408596992492676, "learning_rate": 0.00045000000000000004, "loss": 2.31, "step": 48 }, { "epoch": 0.027374301675977653, "grad_norm": 2.9686427116394043, "learning_rate": 0.00046, "loss": 2.2109, "step": 49 }, { "epoch": 0.027932960893854747, "grad_norm": 2.8361899852752686, "learning_rate": 0.00047, "loss": 2.0227, "step": 50 }, { "epoch": 0.028491620111731845, "grad_norm": 2.9370713233947754, "learning_rate": 0.00048, "loss": 1.877, "step": 51 }, { "epoch": 0.02905027932960894, "grad_norm": 2.5855000019073486, "learning_rate": 0.00049, "loss": 1.7842, "step": 52 }, { "epoch": 0.029608938547486034, "grad_norm": 2.46258544921875, "learning_rate": 0.0005, "loss": 1.5363, "step": 53 }, { "epoch": 0.030167597765363128, "grad_norm": 2.1196603775024414, "learning_rate": 0.00051, "loss": 1.5057, "step": 54 }, { "epoch": 0.030726256983240222, "grad_norm": 1.6629084348678589, "learning_rate": 0.0005200000000000001, "loss": 1.5028, "step": 55 }, { "epoch": 0.03128491620111732, "grad_norm": 1.582436203956604, "learning_rate": 0.0005300000000000001, "loss": 1.3508, "step": 56 }, { "epoch": 0.031843575418994415, "grad_norm": 1.4905234575271606, "learning_rate": 0.00054, "loss": 1.2586, "step": 57 }, { "epoch": 0.03240223463687151, "grad_norm": 1.264930248260498, "learning_rate": 0.00055, "loss": 0.9889, "step": 58 }, { "epoch": 0.0329608938547486, "grad_norm": 1.204182744026184, "learning_rate": 0.0005600000000000001, "loss": 0.9618, "step": 59 }, { "epoch": 0.0335195530726257, "grad_norm": 1.1022368669509888, "learning_rate": 0.00057, "loss": 0.9186, "step": 60 }, { "epoch": 0.03407821229050279, "grad_norm": 1.2111046314239502, "learning_rate": 0.00058, "loss": 0.9412, "step": 61 }, { "epoch": 0.034636871508379886, "grad_norm": 1.2781904935836792, "learning_rate": 0.00059, "loss": 0.7073, "step": 62 }, { "epoch": 0.03519553072625698, "grad_norm": 1.3620400428771973, "learning_rate": 0.0006, "loss": 0.7923, "step": 63 }, { "epoch": 0.035754189944134075, "grad_norm": 0.6299511790275574, "learning_rate": 0.00061, "loss": 0.8297, "step": 64 }, { "epoch": 0.036312849162011177, "grad_norm": 2.654103994369507, "learning_rate": 0.00062, "loss": 0.6226, "step": 65 }, { "epoch": 0.03687150837988827, "grad_norm": 1.0151530504226685, "learning_rate": 0.00063, "loss": 0.8323, "step": 66 }, { "epoch": 0.037430167597765365, "grad_norm": 0.9279337525367737, "learning_rate": 0.00064, "loss": 0.6149, "step": 67 }, { "epoch": 0.03798882681564246, "grad_norm": 2.061647415161133, "learning_rate": 0.0006500000000000001, "loss": 0.8854, "step": 68 }, { "epoch": 0.038547486033519554, "grad_norm": 1.1083693504333496, "learning_rate": 0.00066, "loss": 0.7137, "step": 69 }, { "epoch": 0.03910614525139665, "grad_norm": 1.25771963596344, "learning_rate": 0.00067, "loss": 0.7517, "step": 70 }, { "epoch": 0.03966480446927374, "grad_norm": 0.7894580960273743, "learning_rate": 0.00068, "loss": 0.7649, "step": 71 }, { "epoch": 0.04022346368715084, "grad_norm": 1.381516456604004, "learning_rate": 0.00069, "loss": 0.8449, "step": 72 }, { "epoch": 0.04078212290502793, "grad_norm": 1.1479374170303345, "learning_rate": 0.0007, "loss": 0.7623, "step": 73 }, { "epoch": 0.041340782122905026, "grad_norm": 1.061487078666687, "learning_rate": 0.00071, "loss": 0.7552, "step": 74 }, { "epoch": 0.04189944134078212, "grad_norm": 0.7779232263565063, "learning_rate": 0.0007199999999999999, "loss": 0.7695, "step": 75 }, { "epoch": 0.042458100558659215, "grad_norm": 1.5185297727584839, "learning_rate": 0.00073, "loss": 0.6829, "step": 76 }, { "epoch": 0.043016759776536316, "grad_norm": 1.2652549743652344, "learning_rate": 0.00074, "loss": 0.7714, "step": 77 }, { "epoch": 0.04357541899441341, "grad_norm": 0.9421565532684326, "learning_rate": 0.00075, "loss": 0.725, "step": 78 }, { "epoch": 0.044134078212290505, "grad_norm": 1.9182367324829102, "learning_rate": 0.00076, "loss": 0.6889, "step": 79 }, { "epoch": 0.0446927374301676, "grad_norm": 1.2144354581832886, "learning_rate": 0.0007700000000000001, "loss": 0.5684, "step": 80 }, { "epoch": 0.045251396648044694, "grad_norm": 1.2565786838531494, "learning_rate": 0.0007800000000000001, "loss": 0.7785, "step": 81 }, { "epoch": 0.04581005586592179, "grad_norm": 0.9230783581733704, "learning_rate": 0.00079, "loss": 0.69, "step": 82 }, { "epoch": 0.04636871508379888, "grad_norm": 1.5504589080810547, "learning_rate": 0.0008, "loss": 0.5854, "step": 83 }, { "epoch": 0.04692737430167598, "grad_norm": 0.9252780079841614, "learning_rate": 0.0008100000000000001, "loss": 0.6917, "step": 84 }, { "epoch": 0.04748603351955307, "grad_norm": 1.1322064399719238, "learning_rate": 0.00082, "loss": 0.6407, "step": 85 }, { "epoch": 0.048044692737430165, "grad_norm": 2.5337114334106445, "learning_rate": 0.00083, "loss": 0.6398, "step": 86 }, { "epoch": 0.04860335195530726, "grad_norm": 1.1239203214645386, "learning_rate": 0.00084, "loss": 0.7269, "step": 87 }, { "epoch": 0.049162011173184354, "grad_norm": 1.083847999572754, "learning_rate": 0.00085, "loss": 0.6315, "step": 88 }, { "epoch": 0.049720670391061456, "grad_norm": 0.7342848777770996, "learning_rate": 0.00086, "loss": 0.652, "step": 89 }, { "epoch": 0.05027932960893855, "grad_norm": 4.147243976593018, "learning_rate": 0.00087, "loss": 0.7615, "step": 90 }, { "epoch": 0.050837988826815644, "grad_norm": 2.0662992000579834, "learning_rate": 0.00088, "loss": 0.5951, "step": 91 }, { "epoch": 0.05139664804469274, "grad_norm": 1.634759545326233, "learning_rate": 0.0008900000000000001, "loss": 0.6484, "step": 92 }, { "epoch": 0.05195530726256983, "grad_norm": 0.8145692944526672, "learning_rate": 0.0009000000000000001, "loss": 0.6243, "step": 93 }, { "epoch": 0.05251396648044693, "grad_norm": 2.5843019485473633, "learning_rate": 0.00091, "loss": 0.653, "step": 94 }, { "epoch": 0.05307262569832402, "grad_norm": 0.823938250541687, "learning_rate": 0.00092, "loss": 0.6247, "step": 95 }, { "epoch": 0.053631284916201116, "grad_norm": 0.8840071558952332, "learning_rate": 0.00093, "loss": 0.4915, "step": 96 }, { "epoch": 0.05418994413407821, "grad_norm": 2.526801109313965, "learning_rate": 0.00094, "loss": 0.66, "step": 97 }, { "epoch": 0.054748603351955305, "grad_norm": 1.1919090747833252, "learning_rate": 0.00095, "loss": 0.5974, "step": 98 }, { "epoch": 0.0553072625698324, "grad_norm": 0.6100426912307739, "learning_rate": 0.00096, "loss": 0.5521, "step": 99 }, { "epoch": 0.055865921787709494, "grad_norm": 1.6367216110229492, "learning_rate": 0.0009699999999999999, "loss": 0.6039, "step": 100 }, { "epoch": 0.056424581005586595, "grad_norm": 0.8854618072509766, "learning_rate": 0.00098, "loss": 0.7298, "step": 101 }, { "epoch": 0.05698324022346369, "grad_norm": 0.5425339341163635, "learning_rate": 0.00099, "loss": 0.6097, "step": 102 }, { "epoch": 0.057541899441340784, "grad_norm": 0.9364147186279297, "learning_rate": 0.001, "loss": 0.5727, "step": 103 }, { "epoch": 0.05810055865921788, "grad_norm": 1.6017168760299683, "learning_rate": 0.0009999719887955182, "loss": 0.7056, "step": 104 }, { "epoch": 0.05865921787709497, "grad_norm": 1.070496678352356, "learning_rate": 0.0009999439775910364, "loss": 0.616, "step": 105 }, { "epoch": 0.05921787709497207, "grad_norm": 2.1718647480010986, "learning_rate": 0.0009999159663865546, "loss": 0.6594, "step": 106 }, { "epoch": 0.05977653631284916, "grad_norm": 10.674858093261719, "learning_rate": 0.0009998879551820728, "loss": 0.5402, "step": 107 }, { "epoch": 0.060335195530726256, "grad_norm": 1.2746437788009644, "learning_rate": 0.000999859943977591, "loss": 0.5654, "step": 108 }, { "epoch": 0.06089385474860335, "grad_norm": 2.5544512271881104, "learning_rate": 0.0009998319327731093, "loss": 0.6085, "step": 109 }, { "epoch": 0.061452513966480445, "grad_norm": 0.6390484571456909, "learning_rate": 0.0009998039215686275, "loss": 0.5188, "step": 110 }, { "epoch": 0.06201117318435754, "grad_norm": 1.117769479751587, "learning_rate": 0.0009997759103641457, "loss": 0.6537, "step": 111 }, { "epoch": 0.06256983240223464, "grad_norm": 1.5526758432388306, "learning_rate": 0.0009997478991596639, "loss": 0.5633, "step": 112 }, { "epoch": 0.06312849162011173, "grad_norm": 0.8329930305480957, "learning_rate": 0.000999719887955182, "loss": 0.734, "step": 113 }, { "epoch": 0.06368715083798883, "grad_norm": 1.0422401428222656, "learning_rate": 0.0009996918767507003, "loss": 0.5845, "step": 114 }, { "epoch": 0.06424581005586592, "grad_norm": 0.5255727767944336, "learning_rate": 0.0009996638655462185, "loss": 0.6155, "step": 115 }, { "epoch": 0.06480446927374302, "grad_norm": 0.7957318425178528, "learning_rate": 0.0009996358543417367, "loss": 0.5424, "step": 116 }, { "epoch": 0.06536312849162011, "grad_norm": 0.8654010891914368, "learning_rate": 0.000999607843137255, "loss": 0.5597, "step": 117 }, { "epoch": 0.0659217877094972, "grad_norm": 0.7695738077163696, "learning_rate": 0.0009995798319327731, "loss": 0.649, "step": 118 }, { "epoch": 0.0664804469273743, "grad_norm": 0.6359785795211792, "learning_rate": 0.0009995518207282913, "loss": 0.5015, "step": 119 }, { "epoch": 0.0670391061452514, "grad_norm": 1.1970692873001099, "learning_rate": 0.0009995238095238095, "loss": 0.5476, "step": 120 }, { "epoch": 0.06759776536312849, "grad_norm": 1.4306594133377075, "learning_rate": 0.0009994957983193277, "loss": 0.8179, "step": 121 }, { "epoch": 0.06815642458100558, "grad_norm": 0.8186668157577515, "learning_rate": 0.000999467787114846, "loss": 0.5035, "step": 122 }, { "epoch": 0.06871508379888268, "grad_norm": 0.9915730357170105, "learning_rate": 0.0009994397759103641, "loss": 0.6088, "step": 123 }, { "epoch": 0.06927374301675977, "grad_norm": 1.1441738605499268, "learning_rate": 0.0009994117647058823, "loss": 0.5978, "step": 124 }, { "epoch": 0.06983240223463687, "grad_norm": 3.2184536457061768, "learning_rate": 0.0009993837535014006, "loss": 0.6593, "step": 125 }, { "epoch": 0.07039106145251396, "grad_norm": 6.630455017089844, "learning_rate": 0.0009993557422969188, "loss": 0.7603, "step": 126 }, { "epoch": 0.07094972067039106, "grad_norm": 1.8797285556793213, "learning_rate": 0.000999327731092437, "loss": 0.6696, "step": 127 }, { "epoch": 0.07150837988826815, "grad_norm": 1.5480901002883911, "learning_rate": 0.0009992997198879552, "loss": 0.5827, "step": 128 }, { "epoch": 0.07206703910614524, "grad_norm": 0.9013562798500061, "learning_rate": 0.0009992717086834734, "loss": 0.5032, "step": 129 }, { "epoch": 0.07262569832402235, "grad_norm": 1.7039713859558105, "learning_rate": 0.0009992436974789916, "loss": 0.6589, "step": 130 }, { "epoch": 0.07318435754189945, "grad_norm": 1.4759498834609985, "learning_rate": 0.0009992156862745098, "loss": 0.5978, "step": 131 }, { "epoch": 0.07374301675977654, "grad_norm": 1.7050954103469849, "learning_rate": 0.000999187675070028, "loss": 0.7511, "step": 132 }, { "epoch": 0.07430167597765364, "grad_norm": 0.7413017749786377, "learning_rate": 0.0009991596638655462, "loss": 0.6098, "step": 133 }, { "epoch": 0.07486033519553073, "grad_norm": 0.9138259291648865, "learning_rate": 0.0009991316526610644, "loss": 0.6288, "step": 134 }, { "epoch": 0.07541899441340782, "grad_norm": 1.1756887435913086, "learning_rate": 0.0009991036414565826, "loss": 0.5499, "step": 135 }, { "epoch": 0.07597765363128492, "grad_norm": 3.5213701725006104, "learning_rate": 0.0009990756302521008, "loss": 0.5325, "step": 136 }, { "epoch": 0.07653631284916201, "grad_norm": 0.9328041672706604, "learning_rate": 0.000999047619047619, "loss": 0.607, "step": 137 }, { "epoch": 0.07709497206703911, "grad_norm": 0.8740829825401306, "learning_rate": 0.0009990196078431372, "loss": 0.4362, "step": 138 }, { "epoch": 0.0776536312849162, "grad_norm": 0.8908244371414185, "learning_rate": 0.0009989915966386554, "loss": 0.6038, "step": 139 }, { "epoch": 0.0782122905027933, "grad_norm": 3.7454893589019775, "learning_rate": 0.0009989635854341736, "loss": 0.5341, "step": 140 }, { "epoch": 0.07877094972067039, "grad_norm": 9.606756210327148, "learning_rate": 0.0009989355742296918, "loss": 0.6827, "step": 141 }, { "epoch": 0.07932960893854749, "grad_norm": 0.8355755805969238, "learning_rate": 0.00099890756302521, "loss": 0.518, "step": 142 }, { "epoch": 0.07988826815642458, "grad_norm": 1.1310237646102905, "learning_rate": 0.0009988795518207283, "loss": 0.8823, "step": 143 }, { "epoch": 0.08044692737430167, "grad_norm": 0.8808151483535767, "learning_rate": 0.0009988515406162465, "loss": 0.6951, "step": 144 }, { "epoch": 0.08100558659217877, "grad_norm": 9.77657413482666, "learning_rate": 0.0009988235294117647, "loss": 0.6022, "step": 145 }, { "epoch": 0.08156424581005586, "grad_norm": 1.1427901983261108, "learning_rate": 0.0009987955182072829, "loss": 0.7025, "step": 146 }, { "epoch": 0.08212290502793296, "grad_norm": 0.9580820798873901, "learning_rate": 0.000998767507002801, "loss": 0.6505, "step": 147 }, { "epoch": 0.08268156424581005, "grad_norm": 0.7566211819648743, "learning_rate": 0.0009987394957983193, "loss": 0.6104, "step": 148 }, { "epoch": 0.08324022346368715, "grad_norm": 0.7948142886161804, "learning_rate": 0.0009987114845938375, "loss": 0.4299, "step": 149 }, { "epoch": 0.08379888268156424, "grad_norm": 1.3001176118850708, "learning_rate": 0.0009986834733893557, "loss": 0.7201, "step": 150 }, { "epoch": 0.08435754189944134, "grad_norm": 1.0026098489761353, "learning_rate": 0.0009986554621848741, "loss": 0.5754, "step": 151 }, { "epoch": 0.08491620111731843, "grad_norm": 0.7993031144142151, "learning_rate": 0.0009986274509803921, "loss": 0.5115, "step": 152 }, { "epoch": 0.08547486033519552, "grad_norm": 2.829930305480957, "learning_rate": 0.0009985994397759103, "loss": 0.5267, "step": 153 }, { "epoch": 0.08603351955307263, "grad_norm": 0.6446975469589233, "learning_rate": 0.0009985714285714285, "loss": 0.4949, "step": 154 }, { "epoch": 0.08659217877094973, "grad_norm": 1.545230507850647, "learning_rate": 0.0009985434173669467, "loss": 0.6897, "step": 155 }, { "epoch": 0.08715083798882682, "grad_norm": 0.6464013457298279, "learning_rate": 0.0009985154061624652, "loss": 0.4356, "step": 156 }, { "epoch": 0.08770949720670392, "grad_norm": 0.5887356400489807, "learning_rate": 0.0009984873949579831, "loss": 0.5884, "step": 157 }, { "epoch": 0.08826815642458101, "grad_norm": 0.7637951374053955, "learning_rate": 0.0009984593837535014, "loss": 0.5663, "step": 158 }, { "epoch": 0.0888268156424581, "grad_norm": 0.8062839508056641, "learning_rate": 0.0009984313725490196, "loss": 0.5993, "step": 159 }, { "epoch": 0.0893854748603352, "grad_norm": 0.8858649134635925, "learning_rate": 0.0009984033613445378, "loss": 0.462, "step": 160 }, { "epoch": 0.08994413407821229, "grad_norm": 0.7305874824523926, "learning_rate": 0.0009983753501400562, "loss": 0.6053, "step": 161 }, { "epoch": 0.09050279329608939, "grad_norm": 4.35394811630249, "learning_rate": 0.0009983473389355742, "loss": 0.5371, "step": 162 }, { "epoch": 0.09106145251396648, "grad_norm": 0.7559210062026978, "learning_rate": 0.0009983193277310924, "loss": 0.5161, "step": 163 }, { "epoch": 0.09162011173184358, "grad_norm": 1.1714906692504883, "learning_rate": 0.0009982913165266106, "loss": 0.6804, "step": 164 }, { "epoch": 0.09217877094972067, "grad_norm": 0.6833853125572205, "learning_rate": 0.0009982633053221288, "loss": 0.544, "step": 165 }, { "epoch": 0.09273743016759776, "grad_norm": 1.8271632194519043, "learning_rate": 0.0009982352941176472, "loss": 0.7229, "step": 166 }, { "epoch": 0.09329608938547486, "grad_norm": 0.6042512059211731, "learning_rate": 0.0009982072829131654, "loss": 0.5214, "step": 167 }, { "epoch": 0.09385474860335195, "grad_norm": 0.7902947068214417, "learning_rate": 0.0009981792717086834, "loss": 0.5494, "step": 168 }, { "epoch": 0.09441340782122905, "grad_norm": 0.9695896506309509, "learning_rate": 0.0009981512605042016, "loss": 0.5285, "step": 169 }, { "epoch": 0.09497206703910614, "grad_norm": 0.8638774752616882, "learning_rate": 0.0009981232492997198, "loss": 0.5545, "step": 170 }, { "epoch": 0.09553072625698324, "grad_norm": 0.7909321188926697, "learning_rate": 0.0009980952380952382, "loss": 0.5636, "step": 171 }, { "epoch": 0.09608938547486033, "grad_norm": 1.0302354097366333, "learning_rate": 0.0009980672268907565, "loss": 0.7091, "step": 172 }, { "epoch": 0.09664804469273743, "grad_norm": 0.9037856459617615, "learning_rate": 0.0009980392156862744, "loss": 0.729, "step": 173 }, { "epoch": 0.09720670391061452, "grad_norm": 0.796578586101532, "learning_rate": 0.0009980112044817926, "loss": 0.4461, "step": 174 }, { "epoch": 0.09776536312849161, "grad_norm": 1.3324625492095947, "learning_rate": 0.0009979831932773109, "loss": 0.504, "step": 175 }, { "epoch": 0.09832402234636871, "grad_norm": 1.0910922288894653, "learning_rate": 0.0009979551820728293, "loss": 0.5651, "step": 176 }, { "epoch": 0.09888268156424582, "grad_norm": 0.774204671382904, "learning_rate": 0.0009979271708683475, "loss": 0.6074, "step": 177 }, { "epoch": 0.09944134078212291, "grad_norm": 0.5686858296394348, "learning_rate": 0.0009978991596638655, "loss": 0.4672, "step": 178 }, { "epoch": 0.1, "grad_norm": 0.905280351638794, "learning_rate": 0.0009978711484593837, "loss": 0.5032, "step": 179 }, { "epoch": 0.1005586592178771, "grad_norm": 1.158460259437561, "learning_rate": 0.0009978431372549019, "loss": 0.5575, "step": 180 }, { "epoch": 0.1011173184357542, "grad_norm": 1.3891628980636597, "learning_rate": 0.0009978151260504203, "loss": 0.6522, "step": 181 }, { "epoch": 0.10167597765363129, "grad_norm": 0.6089535355567932, "learning_rate": 0.0009977871148459385, "loss": 0.5908, "step": 182 }, { "epoch": 0.10223463687150838, "grad_norm": 1.1667119264602661, "learning_rate": 0.0009977591036414567, "loss": 0.6482, "step": 183 }, { "epoch": 0.10279329608938548, "grad_norm": 4.3863139152526855, "learning_rate": 0.0009977310924369747, "loss": 0.5249, "step": 184 }, { "epoch": 0.10335195530726257, "grad_norm": 0.6774610877037048, "learning_rate": 0.000997703081232493, "loss": 0.5093, "step": 185 }, { "epoch": 0.10391061452513967, "grad_norm": 1.5877678394317627, "learning_rate": 0.0009976750700280113, "loss": 0.549, "step": 186 }, { "epoch": 0.10446927374301676, "grad_norm": 0.8745883107185364, "learning_rate": 0.0009976470588235295, "loss": 0.4241, "step": 187 }, { "epoch": 0.10502793296089385, "grad_norm": 0.8454338908195496, "learning_rate": 0.0009976190476190477, "loss": 0.6387, "step": 188 }, { "epoch": 0.10558659217877095, "grad_norm": 0.5730166435241699, "learning_rate": 0.0009975910364145657, "loss": 0.5914, "step": 189 }, { "epoch": 0.10614525139664804, "grad_norm": 0.7003739476203918, "learning_rate": 0.000997563025210084, "loss": 0.7188, "step": 190 }, { "epoch": 0.10670391061452514, "grad_norm": 1.1836274862289429, "learning_rate": 0.0009975350140056024, "loss": 0.6011, "step": 191 }, { "epoch": 0.10726256983240223, "grad_norm": 1.303983211517334, "learning_rate": 0.0009975070028011206, "loss": 0.5994, "step": 192 }, { "epoch": 0.10782122905027933, "grad_norm": 3.37532901763916, "learning_rate": 0.0009974789915966388, "loss": 0.6153, "step": 193 }, { "epoch": 0.10837988826815642, "grad_norm": 1.1878474950790405, "learning_rate": 0.0009974509803921568, "loss": 0.5201, "step": 194 }, { "epoch": 0.10893854748603352, "grad_norm": 1.4199057817459106, "learning_rate": 0.000997422969187675, "loss": 0.5518, "step": 195 }, { "epoch": 0.10949720670391061, "grad_norm": 0.9133730530738831, "learning_rate": 0.0009973949579831934, "loss": 0.5121, "step": 196 }, { "epoch": 0.1100558659217877, "grad_norm": 0.983331561088562, "learning_rate": 0.0009973669467787116, "loss": 0.5642, "step": 197 }, { "epoch": 0.1106145251396648, "grad_norm": 1.2697263956069946, "learning_rate": 0.0009973389355742298, "loss": 0.545, "step": 198 }, { "epoch": 0.1111731843575419, "grad_norm": 0.9948388338088989, "learning_rate": 0.000997310924369748, "loss": 0.6011, "step": 199 }, { "epoch": 0.11173184357541899, "grad_norm": 1.0217320919036865, "learning_rate": 0.000997282913165266, "loss": 0.6751, "step": 200 }, { "epoch": 0.1122905027932961, "grad_norm": 2.5174546241760254, "learning_rate": 0.0009972549019607844, "loss": 0.4967, "step": 201 }, { "epoch": 0.11284916201117319, "grad_norm": 0.7323604822158813, "learning_rate": 0.0009972268907563026, "loss": 0.6117, "step": 202 }, { "epoch": 0.11340782122905028, "grad_norm": 0.8109521269798279, "learning_rate": 0.0009971988795518208, "loss": 0.5708, "step": 203 }, { "epoch": 0.11396648044692738, "grad_norm": 0.7176600098609924, "learning_rate": 0.000997170868347339, "loss": 0.6001, "step": 204 }, { "epoch": 0.11452513966480447, "grad_norm": 0.6863576173782349, "learning_rate": 0.000997142857142857, "loss": 0.5, "step": 205 }, { "epoch": 0.11508379888268157, "grad_norm": 1.0173395872116089, "learning_rate": 0.0009971148459383755, "loss": 0.5232, "step": 206 }, { "epoch": 0.11564245810055866, "grad_norm": 1.3011823892593384, "learning_rate": 0.0009970868347338937, "loss": 0.5667, "step": 207 }, { "epoch": 0.11620111731843576, "grad_norm": 1.8995484113693237, "learning_rate": 0.0009970588235294119, "loss": 0.61, "step": 208 }, { "epoch": 0.11675977653631285, "grad_norm": 3.420168876647949, "learning_rate": 0.00099703081232493, "loss": 0.6301, "step": 209 }, { "epoch": 0.11731843575418995, "grad_norm": 6.120429515838623, "learning_rate": 0.000997002801120448, "loss": 0.5196, "step": 210 }, { "epoch": 0.11787709497206704, "grad_norm": 0.8453794717788696, "learning_rate": 0.0009969747899159665, "loss": 0.64, "step": 211 }, { "epoch": 0.11843575418994413, "grad_norm": 0.569521427154541, "learning_rate": 0.0009969467787114847, "loss": 0.501, "step": 212 }, { "epoch": 0.11899441340782123, "grad_norm": 1.01449453830719, "learning_rate": 0.000996918767507003, "loss": 0.6819, "step": 213 }, { "epoch": 0.11955307262569832, "grad_norm": 1.6356980800628662, "learning_rate": 0.000996890756302521, "loss": 0.6916, "step": 214 }, { "epoch": 0.12011173184357542, "grad_norm": 2.6632416248321533, "learning_rate": 0.0009968627450980393, "loss": 0.5216, "step": 215 }, { "epoch": 0.12067039106145251, "grad_norm": 0.8983120918273926, "learning_rate": 0.0009968347338935573, "loss": 0.6797, "step": 216 }, { "epoch": 0.1212290502793296, "grad_norm": 0.6871258020401001, "learning_rate": 0.0009968067226890757, "loss": 0.6117, "step": 217 }, { "epoch": 0.1217877094972067, "grad_norm": 0.7885923981666565, "learning_rate": 0.000996778711484594, "loss": 0.4373, "step": 218 }, { "epoch": 0.1223463687150838, "grad_norm": 0.7660871148109436, "learning_rate": 0.0009967507002801121, "loss": 0.4945, "step": 219 }, { "epoch": 0.12290502793296089, "grad_norm": 0.5301454067230225, "learning_rate": 0.0009967226890756303, "loss": 0.527, "step": 220 }, { "epoch": 0.12346368715083798, "grad_norm": 0.6207997798919678, "learning_rate": 0.0009966946778711483, "loss": 0.4888, "step": 221 }, { "epoch": 0.12402234636871508, "grad_norm": 1.0855971574783325, "learning_rate": 0.0009966666666666668, "loss": 0.6201, "step": 222 }, { "epoch": 0.12458100558659217, "grad_norm": 4.373345851898193, "learning_rate": 0.000996638655462185, "loss": 0.6542, "step": 223 }, { "epoch": 0.12513966480446928, "grad_norm": 4.327059268951416, "learning_rate": 0.0009966106442577032, "loss": 0.4733, "step": 224 }, { "epoch": 0.12569832402234637, "grad_norm": 4.315557479858398, "learning_rate": 0.0009965826330532214, "loss": 0.6439, "step": 225 }, { "epoch": 0.12625698324022347, "grad_norm": 2.901041269302368, "learning_rate": 0.0009965546218487394, "loss": 0.6209, "step": 226 }, { "epoch": 0.12681564245810056, "grad_norm": 0.8233610391616821, "learning_rate": 0.0009965266106442578, "loss": 0.5543, "step": 227 }, { "epoch": 0.12737430167597766, "grad_norm": 0.731225848197937, "learning_rate": 0.000996498599439776, "loss": 0.501, "step": 228 }, { "epoch": 0.12793296089385475, "grad_norm": 2.698946714401245, "learning_rate": 0.0009964705882352942, "loss": 0.5389, "step": 229 }, { "epoch": 0.12849162011173185, "grad_norm": 0.7199321389198303, "learning_rate": 0.0009964425770308124, "loss": 0.475, "step": 230 }, { "epoch": 0.12905027932960894, "grad_norm": 1.217470645904541, "learning_rate": 0.0009964145658263306, "loss": 0.6172, "step": 231 }, { "epoch": 0.12960893854748604, "grad_norm": 0.7861664891242981, "learning_rate": 0.0009963865546218488, "loss": 0.5498, "step": 232 }, { "epoch": 0.13016759776536313, "grad_norm": 1.6193184852600098, "learning_rate": 0.000996358543417367, "loss": 0.6655, "step": 233 }, { "epoch": 0.13072625698324022, "grad_norm": 0.7959342002868652, "learning_rate": 0.0009963305322128852, "loss": 0.4295, "step": 234 }, { "epoch": 0.13128491620111732, "grad_norm": 0.7992600202560425, "learning_rate": 0.0009963025210084034, "loss": 0.5969, "step": 235 }, { "epoch": 0.1318435754189944, "grad_norm": 0.7413977980613708, "learning_rate": 0.0009962745098039216, "loss": 0.6023, "step": 236 }, { "epoch": 0.1324022346368715, "grad_norm": 3.2688894271850586, "learning_rate": 0.0009962464985994398, "loss": 0.5235, "step": 237 }, { "epoch": 0.1329608938547486, "grad_norm": 1.4802277088165283, "learning_rate": 0.000996218487394958, "loss": 0.5943, "step": 238 }, { "epoch": 0.1335195530726257, "grad_norm": 0.9552064538002014, "learning_rate": 0.0009961904761904763, "loss": 0.5415, "step": 239 }, { "epoch": 0.1340782122905028, "grad_norm": 1.092341661453247, "learning_rate": 0.0009961624649859945, "loss": 0.6357, "step": 240 }, { "epoch": 0.13463687150837989, "grad_norm": 1.7820661067962646, "learning_rate": 0.0009961344537815127, "loss": 0.5674, "step": 241 }, { "epoch": 0.13519553072625698, "grad_norm": 0.7442818880081177, "learning_rate": 0.0009961064425770309, "loss": 0.5915, "step": 242 }, { "epoch": 0.13575418994413407, "grad_norm": 4.437726020812988, "learning_rate": 0.000996078431372549, "loss": 0.536, "step": 243 }, { "epoch": 0.13631284916201117, "grad_norm": 0.7915422916412354, "learning_rate": 0.0009960504201680673, "loss": 0.5699, "step": 244 }, { "epoch": 0.13687150837988826, "grad_norm": 1.7991547584533691, "learning_rate": 0.0009960224089635855, "loss": 0.547, "step": 245 }, { "epoch": 0.13743016759776536, "grad_norm": 1.4279496669769287, "learning_rate": 0.0009959943977591037, "loss": 0.455, "step": 246 }, { "epoch": 0.13798882681564245, "grad_norm": 2.0297365188598633, "learning_rate": 0.000995966386554622, "loss": 0.5672, "step": 247 }, { "epoch": 0.13854748603351955, "grad_norm": 0.9002755880355835, "learning_rate": 0.00099593837535014, "loss": 0.6349, "step": 248 }, { "epoch": 0.13910614525139664, "grad_norm": 0.652621865272522, "learning_rate": 0.0009959103641456583, "loss": 0.5551, "step": 249 }, { "epoch": 0.13966480446927373, "grad_norm": 0.738399088382721, "learning_rate": 0.0009958823529411765, "loss": 0.5952, "step": 250 }, { "epoch": 0.14022346368715083, "grad_norm": 1.0210678577423096, "learning_rate": 0.0009958543417366947, "loss": 0.607, "step": 251 }, { "epoch": 0.14078212290502792, "grad_norm": 1.1312799453735352, "learning_rate": 0.000995826330532213, "loss": 0.8005, "step": 252 }, { "epoch": 0.14134078212290502, "grad_norm": 4.63861083984375, "learning_rate": 0.0009957983193277311, "loss": 0.5475, "step": 253 }, { "epoch": 0.1418994413407821, "grad_norm": 2.7473626136779785, "learning_rate": 0.0009957703081232493, "loss": 0.5327, "step": 254 }, { "epoch": 0.1424581005586592, "grad_norm": 1.1836369037628174, "learning_rate": 0.0009957422969187675, "loss": 0.5657, "step": 255 }, { "epoch": 0.1430167597765363, "grad_norm": 1.3779765367507935, "learning_rate": 0.0009957142857142858, "loss": 0.4871, "step": 256 }, { "epoch": 0.1435754189944134, "grad_norm": 0.649736762046814, "learning_rate": 0.000995686274509804, "loss": 0.4794, "step": 257 }, { "epoch": 0.1441340782122905, "grad_norm": 1.8200886249542236, "learning_rate": 0.0009956582633053222, "loss": 0.6292, "step": 258 }, { "epoch": 0.14469273743016758, "grad_norm": 1.9034805297851562, "learning_rate": 0.0009956302521008404, "loss": 0.682, "step": 259 }, { "epoch": 0.1452513966480447, "grad_norm": 1.3746448755264282, "learning_rate": 0.0009956022408963586, "loss": 0.503, "step": 260 }, { "epoch": 0.1458100558659218, "grad_norm": 1.5244262218475342, "learning_rate": 0.0009955742296918768, "loss": 0.6125, "step": 261 }, { "epoch": 0.1463687150837989, "grad_norm": 0.6705476641654968, "learning_rate": 0.000995546218487395, "loss": 0.5163, "step": 262 }, { "epoch": 0.146927374301676, "grad_norm": 1.0974162817001343, "learning_rate": 0.0009955182072829132, "loss": 0.6924, "step": 263 }, { "epoch": 0.14748603351955308, "grad_norm": 1.7833330631256104, "learning_rate": 0.0009954901960784314, "loss": 0.6602, "step": 264 }, { "epoch": 0.14804469273743018, "grad_norm": 0.9520974159240723, "learning_rate": 0.0009954621848739496, "loss": 0.5909, "step": 265 }, { "epoch": 0.14860335195530727, "grad_norm": 0.8226149678230286, "learning_rate": 0.0009954341736694678, "loss": 0.6374, "step": 266 }, { "epoch": 0.14916201117318437, "grad_norm": 0.6469694972038269, "learning_rate": 0.000995406162464986, "loss": 0.5713, "step": 267 }, { "epoch": 0.14972067039106146, "grad_norm": 0.9335330128669739, "learning_rate": 0.0009953781512605042, "loss": 0.6174, "step": 268 }, { "epoch": 0.15027932960893856, "grad_norm": 0.7867704033851624, "learning_rate": 0.0009953501400560224, "loss": 0.5148, "step": 269 }, { "epoch": 0.15083798882681565, "grad_norm": 1.929457187652588, "learning_rate": 0.0009953221288515406, "loss": 0.5345, "step": 270 }, { "epoch": 0.15139664804469274, "grad_norm": 0.9096598029136658, "learning_rate": 0.0009952941176470588, "loss": 0.4868, "step": 271 }, { "epoch": 0.15195530726256984, "grad_norm": 0.7338758111000061, "learning_rate": 0.000995266106442577, "loss": 0.5996, "step": 272 }, { "epoch": 0.15251396648044693, "grad_norm": 1.0851118564605713, "learning_rate": 0.0009952380952380953, "loss": 0.5014, "step": 273 }, { "epoch": 0.15307262569832403, "grad_norm": 1.3886101245880127, "learning_rate": 0.0009952100840336135, "loss": 0.6848, "step": 274 }, { "epoch": 0.15363128491620112, "grad_norm": 1.0894609689712524, "learning_rate": 0.0009951820728291317, "loss": 0.7159, "step": 275 }, { "epoch": 0.15418994413407822, "grad_norm": 0.5999849438667297, "learning_rate": 0.0009951540616246499, "loss": 0.4273, "step": 276 }, { "epoch": 0.1547486033519553, "grad_norm": 1.2654602527618408, "learning_rate": 0.000995126050420168, "loss": 0.6949, "step": 277 }, { "epoch": 0.1553072625698324, "grad_norm": 1.3784074783325195, "learning_rate": 0.0009950980392156863, "loss": 0.5785, "step": 278 }, { "epoch": 0.1558659217877095, "grad_norm": 3.8308889865875244, "learning_rate": 0.0009950700280112045, "loss": 0.5655, "step": 279 }, { "epoch": 0.1564245810055866, "grad_norm": 1.7307665348052979, "learning_rate": 0.0009950420168067227, "loss": 0.5642, "step": 280 }, { "epoch": 0.1569832402234637, "grad_norm": 3.464566230773926, "learning_rate": 0.000995014005602241, "loss": 0.5521, "step": 281 }, { "epoch": 0.15754189944134078, "grad_norm": 1.9289082288742065, "learning_rate": 0.0009949859943977591, "loss": 0.5533, "step": 282 }, { "epoch": 0.15810055865921788, "grad_norm": 1.3773527145385742, "learning_rate": 0.0009949579831932773, "loss": 0.6159, "step": 283 }, { "epoch": 0.15865921787709497, "grad_norm": 0.7298031449317932, "learning_rate": 0.0009949299719887955, "loss": 0.4764, "step": 284 }, { "epoch": 0.15921787709497207, "grad_norm": 4.222969055175781, "learning_rate": 0.0009949019607843137, "loss": 0.4393, "step": 285 }, { "epoch": 0.15977653631284916, "grad_norm": 0.9089906215667725, "learning_rate": 0.000994873949579832, "loss": 0.5886, "step": 286 }, { "epoch": 0.16033519553072625, "grad_norm": 0.988497257232666, "learning_rate": 0.0009948459383753501, "loss": 0.4182, "step": 287 }, { "epoch": 0.16089385474860335, "grad_norm": 2.0216290950775146, "learning_rate": 0.0009948179271708683, "loss": 0.5996, "step": 288 }, { "epoch": 0.16145251396648044, "grad_norm": 0.7447571158409119, "learning_rate": 0.0009947899159663866, "loss": 0.635, "step": 289 }, { "epoch": 0.16201117318435754, "grad_norm": 0.9543245434761047, "learning_rate": 0.0009947619047619048, "loss": 0.5056, "step": 290 }, { "epoch": 0.16256983240223463, "grad_norm": 1.108781099319458, "learning_rate": 0.000994733893557423, "loss": 0.5206, "step": 291 }, { "epoch": 0.16312849162011173, "grad_norm": 2.061345338821411, "learning_rate": 0.0009947058823529412, "loss": 0.5572, "step": 292 }, { "epoch": 0.16368715083798882, "grad_norm": 0.9002458453178406, "learning_rate": 0.0009946778711484594, "loss": 0.4982, "step": 293 }, { "epoch": 0.16424581005586592, "grad_norm": 0.6461503505706787, "learning_rate": 0.0009946498599439776, "loss": 0.4248, "step": 294 }, { "epoch": 0.164804469273743, "grad_norm": 0.9050043225288391, "learning_rate": 0.0009946218487394958, "loss": 0.6251, "step": 295 }, { "epoch": 0.1653631284916201, "grad_norm": 1.5620747804641724, "learning_rate": 0.000994593837535014, "loss": 0.3986, "step": 296 }, { "epoch": 0.1659217877094972, "grad_norm": 0.5747299194335938, "learning_rate": 0.0009945658263305322, "loss": 0.5393, "step": 297 }, { "epoch": 0.1664804469273743, "grad_norm": 5.657763481140137, "learning_rate": 0.0009945378151260504, "loss": 0.6081, "step": 298 }, { "epoch": 0.1670391061452514, "grad_norm": 1.2506048679351807, "learning_rate": 0.0009945098039215686, "loss": 0.5267, "step": 299 }, { "epoch": 0.16759776536312848, "grad_norm": 1.3759843111038208, "learning_rate": 0.0009944817927170868, "loss": 0.4981, "step": 300 }, { "epoch": 0.16815642458100558, "grad_norm": 0.6979724168777466, "learning_rate": 0.000994453781512605, "loss": 0.5283, "step": 301 }, { "epoch": 0.16871508379888267, "grad_norm": 0.6828892230987549, "learning_rate": 0.0009944257703081232, "loss": 0.523, "step": 302 }, { "epoch": 0.16927374301675976, "grad_norm": 2.234743356704712, "learning_rate": 0.0009943977591036414, "loss": 0.6468, "step": 303 }, { "epoch": 0.16983240223463686, "grad_norm": 4.027324676513672, "learning_rate": 0.0009943697478991596, "loss": 0.473, "step": 304 }, { "epoch": 0.17039106145251395, "grad_norm": 1.2224324941635132, "learning_rate": 0.0009943417366946778, "loss": 0.5357, "step": 305 }, { "epoch": 0.17094972067039105, "grad_norm": 0.8478534817695618, "learning_rate": 0.000994313725490196, "loss": 0.5372, "step": 306 }, { "epoch": 0.17150837988826817, "grad_norm": 0.9016631841659546, "learning_rate": 0.0009942857142857143, "loss": 0.5891, "step": 307 }, { "epoch": 0.17206703910614526, "grad_norm": 1.09122896194458, "learning_rate": 0.0009942577030812325, "loss": 0.5555, "step": 308 }, { "epoch": 0.17262569832402236, "grad_norm": 1.4219303131103516, "learning_rate": 0.0009942296918767507, "loss": 0.5375, "step": 309 }, { "epoch": 0.17318435754189945, "grad_norm": 1.436359167098999, "learning_rate": 0.0009942016806722689, "loss": 0.7384, "step": 310 }, { "epoch": 0.17374301675977655, "grad_norm": 0.825744092464447, "learning_rate": 0.000994173669467787, "loss": 0.6145, "step": 311 }, { "epoch": 0.17430167597765364, "grad_norm": Infinity, "learning_rate": 0.000994173669467787, "loss": 0.6434, "step": 312 }, { "epoch": 0.17486033519553074, "grad_norm": 1.1593785285949707, "learning_rate": 0.0009941456582633053, "loss": 0.4869, "step": 313 }, { "epoch": 0.17541899441340783, "grad_norm": 1.1484551429748535, "learning_rate": 0.0009941176470588235, "loss": 0.6688, "step": 314 }, { "epoch": 0.17597765363128492, "grad_norm": 1.0478200912475586, "learning_rate": 0.0009940896358543417, "loss": 0.573, "step": 315 }, { "epoch": 0.17653631284916202, "grad_norm": 3.2461318969726562, "learning_rate": 0.00099406162464986, "loss": 0.6484, "step": 316 }, { "epoch": 0.1770949720670391, "grad_norm": 2.6686880588531494, "learning_rate": 0.0009940336134453781, "loss": 0.6038, "step": 317 }, { "epoch": 0.1776536312849162, "grad_norm": 1.0154472589492798, "learning_rate": 0.0009940056022408963, "loss": 0.7957, "step": 318 }, { "epoch": 0.1782122905027933, "grad_norm": 0.7798212170600891, "learning_rate": 0.0009939775910364145, "loss": 0.5341, "step": 319 }, { "epoch": 0.1787709497206704, "grad_norm": 1.3281465768814087, "learning_rate": 0.0009939495798319327, "loss": 0.6075, "step": 320 }, { "epoch": 0.1793296089385475, "grad_norm": 0.6935046911239624, "learning_rate": 0.000993921568627451, "loss": 0.5669, "step": 321 }, { "epoch": 0.17988826815642459, "grad_norm": 1.006704568862915, "learning_rate": 0.0009938935574229691, "loss": 0.531, "step": 322 }, { "epoch": 0.18044692737430168, "grad_norm": 1.3580342531204224, "learning_rate": 0.0009938655462184876, "loss": 0.5573, "step": 323 }, { "epoch": 0.18100558659217877, "grad_norm": 3.498706340789795, "learning_rate": 0.0009938375350140056, "loss": 0.6409, "step": 324 }, { "epoch": 0.18156424581005587, "grad_norm": 1.06218683719635, "learning_rate": 0.0009938095238095238, "loss": 0.599, "step": 325 }, { "epoch": 0.18212290502793296, "grad_norm": 1.1846939325332642, "learning_rate": 0.000993781512605042, "loss": 0.5959, "step": 326 }, { "epoch": 0.18268156424581006, "grad_norm": 1.4181071519851685, "learning_rate": 0.0009937535014005602, "loss": 0.4734, "step": 327 }, { "epoch": 0.18324022346368715, "grad_norm": 0.738722562789917, "learning_rate": 0.0009937254901960786, "loss": 0.5656, "step": 328 }, { "epoch": 0.18379888268156425, "grad_norm": 1.360398292541504, "learning_rate": 0.0009936974789915966, "loss": 0.4343, "step": 329 }, { "epoch": 0.18435754189944134, "grad_norm": 1.1944875717163086, "learning_rate": 0.0009936694677871148, "loss": 0.482, "step": 330 }, { "epoch": 0.18491620111731844, "grad_norm": 0.5688614249229431, "learning_rate": 0.000993641456582633, "loss": 0.5339, "step": 331 }, { "epoch": 0.18547486033519553, "grad_norm": 0.9470820426940918, "learning_rate": 0.0009936134453781512, "loss": 0.6469, "step": 332 }, { "epoch": 0.18603351955307262, "grad_norm": 1.0332776308059692, "learning_rate": 0.0009935854341736696, "loss": 0.4985, "step": 333 }, { "epoch": 0.18659217877094972, "grad_norm": 1.2427397966384888, "learning_rate": 0.0009935574229691876, "loss": 0.5668, "step": 334 }, { "epoch": 0.1871508379888268, "grad_norm": 5.453192234039307, "learning_rate": 0.0009935294117647058, "loss": 0.4979, "step": 335 }, { "epoch": 0.1877094972067039, "grad_norm": 0.9218781590461731, "learning_rate": 0.000993501400560224, "loss": 0.7005, "step": 336 }, { "epoch": 0.188268156424581, "grad_norm": 0.8410573601722717, "learning_rate": 0.0009934733893557422, "loss": 0.4558, "step": 337 }, { "epoch": 0.1888268156424581, "grad_norm": 0.7678621411323547, "learning_rate": 0.0009934453781512607, "loss": 0.6676, "step": 338 }, { "epoch": 0.1893854748603352, "grad_norm": 0.8786569833755493, "learning_rate": 0.0009934173669467789, "loss": 0.5568, "step": 339 }, { "epoch": 0.18994413407821228, "grad_norm": 4.146731376647949, "learning_rate": 0.0009933893557422969, "loss": 0.5414, "step": 340 }, { "epoch": 0.19050279329608938, "grad_norm": 1.320022463798523, "learning_rate": 0.000993361344537815, "loss": 0.5405, "step": 341 }, { "epoch": 0.19106145251396647, "grad_norm": 0.923991322517395, "learning_rate": 0.0009933333333333333, "loss": 0.5447, "step": 342 }, { "epoch": 0.19162011173184357, "grad_norm": 1.1749308109283447, "learning_rate": 0.0009933053221288517, "loss": 0.5945, "step": 343 }, { "epoch": 0.19217877094972066, "grad_norm": 0.7339878082275391, "learning_rate": 0.00099327731092437, "loss": 0.6541, "step": 344 }, { "epoch": 0.19273743016759776, "grad_norm": 1.042183756828308, "learning_rate": 0.0009932492997198879, "loss": 0.5684, "step": 345 }, { "epoch": 0.19329608938547485, "grad_norm": 0.8728654980659485, "learning_rate": 0.000993221288515406, "loss": 0.5173, "step": 346 }, { "epoch": 0.19385474860335195, "grad_norm": 1.5398110151290894, "learning_rate": 0.0009931932773109243, "loss": 0.5283, "step": 347 }, { "epoch": 0.19441340782122904, "grad_norm": 1.3644812107086182, "learning_rate": 0.0009931652661064427, "loss": 0.5595, "step": 348 }, { "epoch": 0.19497206703910613, "grad_norm": 1.0397660732269287, "learning_rate": 0.000993137254901961, "loss": 0.592, "step": 349 }, { "epoch": 0.19553072625698323, "grad_norm": 0.7873978614807129, "learning_rate": 0.000993109243697479, "loss": 0.5031, "step": 350 }, { "epoch": 0.19608938547486032, "grad_norm": 0.8450810313224792, "learning_rate": 0.0009930812324929971, "loss": 0.5131, "step": 351 }, { "epoch": 0.19664804469273742, "grad_norm": 0.907400906085968, "learning_rate": 0.0009930532212885153, "loss": 0.7516, "step": 352 }, { "epoch": 0.1972067039106145, "grad_norm": 0.9214017987251282, "learning_rate": 0.0009930252100840337, "loss": 0.5093, "step": 353 }, { "epoch": 0.19776536312849163, "grad_norm": 0.7404553890228271, "learning_rate": 0.000992997198879552, "loss": 0.4838, "step": 354 }, { "epoch": 0.19832402234636873, "grad_norm": 1.0390986204147339, "learning_rate": 0.0009929691876750702, "loss": 0.5519, "step": 355 }, { "epoch": 0.19888268156424582, "grad_norm": 0.8315941095352173, "learning_rate": 0.0009929411764705881, "loss": 0.5808, "step": 356 }, { "epoch": 0.19944134078212292, "grad_norm": 2.7812280654907227, "learning_rate": 0.0009929131652661064, "loss": 0.4906, "step": 357 }, { "epoch": 0.2, "grad_norm": 0.6816196441650391, "learning_rate": 0.0009928851540616248, "loss": 0.6241, "step": 358 }, { "epoch": 0.2005586592178771, "grad_norm": 0.8561577796936035, "learning_rate": 0.000992857142857143, "loss": 0.7879, "step": 359 }, { "epoch": 0.2011173184357542, "grad_norm": 13.957138061523438, "learning_rate": 0.0009928291316526612, "loss": 0.4757, "step": 360 }, { "epoch": 0.2016759776536313, "grad_norm": 0.6377781629562378, "learning_rate": 0.0009928011204481792, "loss": 0.5755, "step": 361 }, { "epoch": 0.2022346368715084, "grad_norm": 0.7347707748413086, "learning_rate": 0.0009927731092436974, "loss": 0.5293, "step": 362 }, { "epoch": 0.20279329608938548, "grad_norm": 2.759532928466797, "learning_rate": 0.0009927450980392158, "loss": 0.4397, "step": 363 }, { "epoch": 0.20335195530726258, "grad_norm": 1.5283832550048828, "learning_rate": 0.000992717086834734, "loss": 0.581, "step": 364 }, { "epoch": 0.20391061452513967, "grad_norm": 2.0260815620422363, "learning_rate": 0.0009926890756302522, "loss": 0.6305, "step": 365 }, { "epoch": 0.20446927374301677, "grad_norm": 0.6075266003608704, "learning_rate": 0.0009926610644257702, "loss": 0.4664, "step": 366 }, { "epoch": 0.20502793296089386, "grad_norm": 0.9785339832305908, "learning_rate": 0.0009926330532212884, "loss": 0.6795, "step": 367 }, { "epoch": 0.20558659217877095, "grad_norm": 0.975587010383606, "learning_rate": 0.0009926050420168068, "loss": 0.5089, "step": 368 }, { "epoch": 0.20614525139664805, "grad_norm": 0.9132829308509827, "learning_rate": 0.000992577030812325, "loss": 0.5454, "step": 369 }, { "epoch": 0.20670391061452514, "grad_norm": 1.2932801246643066, "learning_rate": 0.0009925490196078432, "loss": 0.5674, "step": 370 }, { "epoch": 0.20726256983240224, "grad_norm": 1.3855035305023193, "learning_rate": 0.0009925210084033615, "loss": 0.4964, "step": 371 }, { "epoch": 0.20782122905027933, "grad_norm": 1.4948569536209106, "learning_rate": 0.0009924929971988794, "loss": 0.6149, "step": 372 }, { "epoch": 0.20837988826815643, "grad_norm": 0.5700994729995728, "learning_rate": 0.0009924649859943979, "loss": 0.4061, "step": 373 }, { "epoch": 0.20893854748603352, "grad_norm": 0.5803407430648804, "learning_rate": 0.000992436974789916, "loss": 0.5132, "step": 374 }, { "epoch": 0.20949720670391062, "grad_norm": 0.9584372639656067, "learning_rate": 0.0009924089635854343, "loss": 0.5948, "step": 375 }, { "epoch": 0.2100558659217877, "grad_norm": 3.9942266941070557, "learning_rate": 0.0009923809523809525, "loss": 0.4893, "step": 376 }, { "epoch": 0.2106145251396648, "grad_norm": 1.9359759092330933, "learning_rate": 0.0009923529411764705, "loss": 0.6682, "step": 377 }, { "epoch": 0.2111731843575419, "grad_norm": 0.8714433908462524, "learning_rate": 0.000992324929971989, "loss": 0.4921, "step": 378 }, { "epoch": 0.211731843575419, "grad_norm": 2.355104684829712, "learning_rate": 0.000992296918767507, "loss": 0.4428, "step": 379 }, { "epoch": 0.2122905027932961, "grad_norm": 0.7010190486907959, "learning_rate": 0.0009922689075630253, "loss": 0.5505, "step": 380 }, { "epoch": 0.21284916201117318, "grad_norm": 0.7834289073944092, "learning_rate": 0.0009922408963585435, "loss": 0.5067, "step": 381 }, { "epoch": 0.21340782122905028, "grad_norm": 1.174731731414795, "learning_rate": 0.0009922128851540615, "loss": 0.4715, "step": 382 }, { "epoch": 0.21396648044692737, "grad_norm": 0.8410313129425049, "learning_rate": 0.00099218487394958, "loss": 0.4943, "step": 383 }, { "epoch": 0.21452513966480447, "grad_norm": 2.046583652496338, "learning_rate": 0.0009921568627450981, "loss": 0.5629, "step": 384 }, { "epoch": 0.21508379888268156, "grad_norm": 1.2268325090408325, "learning_rate": 0.0009921288515406163, "loss": 0.4832, "step": 385 }, { "epoch": 0.21564245810055865, "grad_norm": 6.274129390716553, "learning_rate": 0.0009921008403361345, "loss": 0.6357, "step": 386 }, { "epoch": 0.21620111731843575, "grad_norm": 1.5891910791397095, "learning_rate": 0.0009920728291316528, "loss": 0.6177, "step": 387 }, { "epoch": 0.21675977653631284, "grad_norm": 0.9734386801719666, "learning_rate": 0.000992044817927171, "loss": 0.5366, "step": 388 }, { "epoch": 0.21731843575418994, "grad_norm": 0.5598587989807129, "learning_rate": 0.0009920168067226892, "loss": 0.405, "step": 389 }, { "epoch": 0.21787709497206703, "grad_norm": 5.168168067932129, "learning_rate": 0.0009919887955182074, "loss": 0.5266, "step": 390 }, { "epoch": 0.21843575418994413, "grad_norm": 0.7288747429847717, "learning_rate": 0.0009919607843137256, "loss": 0.4978, "step": 391 }, { "epoch": 0.21899441340782122, "grad_norm": 0.8025857210159302, "learning_rate": 0.0009919327731092438, "loss": 0.54, "step": 392 }, { "epoch": 0.21955307262569831, "grad_norm": 0.9832891225814819, "learning_rate": 0.000991904761904762, "loss": 0.5582, "step": 393 }, { "epoch": 0.2201117318435754, "grad_norm": 0.5751308798789978, "learning_rate": 0.0009918767507002802, "loss": 0.4927, "step": 394 }, { "epoch": 0.2206703910614525, "grad_norm": 0.8643447160720825, "learning_rate": 0.0009918487394957984, "loss": 0.6098, "step": 395 }, { "epoch": 0.2212290502793296, "grad_norm": 0.8747020363807678, "learning_rate": 0.0009918207282913166, "loss": 0.5769, "step": 396 }, { "epoch": 0.2217877094972067, "grad_norm": 1.0640156269073486, "learning_rate": 0.0009917927170868348, "loss": 0.5298, "step": 397 }, { "epoch": 0.2223463687150838, "grad_norm": 1.4771331548690796, "learning_rate": 0.000991764705882353, "loss": 0.5066, "step": 398 }, { "epoch": 0.22290502793296088, "grad_norm": 0.8105157017707825, "learning_rate": 0.0009917366946778712, "loss": 0.566, "step": 399 }, { "epoch": 0.22346368715083798, "grad_norm": 1.858401894569397, "learning_rate": 0.0009917086834733894, "loss": 0.4828, "step": 400 }, { "epoch": 0.2240223463687151, "grad_norm": 3.5270822048187256, "learning_rate": 0.0009916806722689076, "loss": 0.4755, "step": 401 }, { "epoch": 0.2245810055865922, "grad_norm": 0.7943991422653198, "learning_rate": 0.0009916526610644258, "loss": 0.6251, "step": 402 }, { "epoch": 0.22513966480446929, "grad_norm": 1.6298469305038452, "learning_rate": 0.000991624649859944, "loss": 0.5079, "step": 403 }, { "epoch": 0.22569832402234638, "grad_norm": 1.080518364906311, "learning_rate": 0.0009915966386554623, "loss": 0.5656, "step": 404 }, { "epoch": 0.22625698324022347, "grad_norm": 0.7638111710548401, "learning_rate": 0.0009915686274509805, "loss": 0.5934, "step": 405 }, { "epoch": 0.22681564245810057, "grad_norm": 0.8922688961029053, "learning_rate": 0.0009915406162464987, "loss": 0.5072, "step": 406 }, { "epoch": 0.22737430167597766, "grad_norm": 0.8489904403686523, "learning_rate": 0.0009915126050420169, "loss": 0.6803, "step": 407 }, { "epoch": 0.22793296089385476, "grad_norm": 0.4775846302509308, "learning_rate": 0.000991484593837535, "loss": 0.4048, "step": 408 }, { "epoch": 0.22849162011173185, "grad_norm": 0.7432544827461243, "learning_rate": 0.0009914565826330533, "loss": 0.6623, "step": 409 }, { "epoch": 0.22905027932960895, "grad_norm": 1.2556594610214233, "learning_rate": 0.0009914285714285715, "loss": 0.5097, "step": 410 }, { "epoch": 0.22960893854748604, "grad_norm": 1.8600044250488281, "learning_rate": 0.0009914005602240897, "loss": 0.7254, "step": 411 }, { "epoch": 0.23016759776536314, "grad_norm": 3.979811906814575, "learning_rate": 0.000991372549019608, "loss": 0.7805, "step": 412 }, { "epoch": 0.23072625698324023, "grad_norm": 0.9164400100708008, "learning_rate": 0.000991344537815126, "loss": 0.5806, "step": 413 }, { "epoch": 0.23128491620111732, "grad_norm": 1.161778211593628, "learning_rate": 0.0009913165266106443, "loss": 0.5192, "step": 414 }, { "epoch": 0.23184357541899442, "grad_norm": 0.6569408178329468, "learning_rate": 0.0009912885154061625, "loss": 0.5496, "step": 415 }, { "epoch": 0.2324022346368715, "grad_norm": 0.6834328770637512, "learning_rate": 0.0009912605042016807, "loss": 0.6046, "step": 416 }, { "epoch": 0.2329608938547486, "grad_norm": 1.3349989652633667, "learning_rate": 0.000991232492997199, "loss": 0.5765, "step": 417 }, { "epoch": 0.2335195530726257, "grad_norm": 0.8419320583343506, "learning_rate": 0.0009912044817927171, "loss": 0.7966, "step": 418 }, { "epoch": 0.2340782122905028, "grad_norm": 1.4713504314422607, "learning_rate": 0.0009911764705882353, "loss": 0.6522, "step": 419 }, { "epoch": 0.2346368715083799, "grad_norm": 1.129390835762024, "learning_rate": 0.0009911484593837535, "loss": 0.4777, "step": 420 }, { "epoch": 0.23519553072625698, "grad_norm": 0.8232699632644653, "learning_rate": 0.0009911204481792718, "loss": 0.4943, "step": 421 }, { "epoch": 0.23575418994413408, "grad_norm": 0.7964234948158264, "learning_rate": 0.00099109243697479, "loss": 0.4717, "step": 422 }, { "epoch": 0.23631284916201117, "grad_norm": 1.2052483558654785, "learning_rate": 0.0009910644257703082, "loss": 0.5989, "step": 423 }, { "epoch": 0.23687150837988827, "grad_norm": 1.7664382457733154, "learning_rate": 0.0009910364145658264, "loss": 0.6418, "step": 424 }, { "epoch": 0.23743016759776536, "grad_norm": 0.9030166864395142, "learning_rate": 0.0009910084033613446, "loss": 0.5307, "step": 425 }, { "epoch": 0.23798882681564246, "grad_norm": 0.9568511247634888, "learning_rate": 0.0009909803921568628, "loss": 0.6072, "step": 426 }, { "epoch": 0.23854748603351955, "grad_norm": 1.076154112815857, "learning_rate": 0.000990952380952381, "loss": 0.6198, "step": 427 }, { "epoch": 0.23910614525139665, "grad_norm": 0.6711006164550781, "learning_rate": 0.0009909243697478992, "loss": 0.531, "step": 428 }, { "epoch": 0.23966480446927374, "grad_norm": 0.7736319303512573, "learning_rate": 0.0009908963585434174, "loss": 0.6457, "step": 429 }, { "epoch": 0.24022346368715083, "grad_norm": 1.0549379587173462, "learning_rate": 0.0009908683473389356, "loss": 0.6572, "step": 430 }, { "epoch": 0.24078212290502793, "grad_norm": 1.4842422008514404, "learning_rate": 0.0009908403361344538, "loss": 0.5173, "step": 431 }, { "epoch": 0.24134078212290502, "grad_norm": 0.9686875343322754, "learning_rate": 0.000990812324929972, "loss": 0.4831, "step": 432 }, { "epoch": 0.24189944134078212, "grad_norm": 1.2189487218856812, "learning_rate": 0.0009907843137254902, "loss": 0.4976, "step": 433 }, { "epoch": 0.2424581005586592, "grad_norm": 2.1376233100891113, "learning_rate": 0.0009907563025210084, "loss": 0.6494, "step": 434 }, { "epoch": 0.2430167597765363, "grad_norm": 0.8061239719390869, "learning_rate": 0.0009907282913165266, "loss": 0.4959, "step": 435 }, { "epoch": 0.2435754189944134, "grad_norm": 0.7881906032562256, "learning_rate": 0.0009907002801120448, "loss": 0.5501, "step": 436 }, { "epoch": 0.2441340782122905, "grad_norm": 0.8568841218948364, "learning_rate": 0.000990672268907563, "loss": 0.6192, "step": 437 }, { "epoch": 0.2446927374301676, "grad_norm": 2.3859543800354004, "learning_rate": 0.0009906442577030813, "loss": 0.4891, "step": 438 }, { "epoch": 0.24525139664804468, "grad_norm": 0.701188325881958, "learning_rate": 0.0009906162464985995, "loss": 0.6303, "step": 439 }, { "epoch": 0.24581005586592178, "grad_norm": 1.6035054922103882, "learning_rate": 0.0009905882352941177, "loss": 0.6473, "step": 440 }, { "epoch": 0.24636871508379887, "grad_norm": 0.8858741521835327, "learning_rate": 0.0009905602240896359, "loss": 0.5283, "step": 441 }, { "epoch": 0.24692737430167597, "grad_norm": 0.979948103427887, "learning_rate": 0.000990532212885154, "loss": 0.5517, "step": 442 }, { "epoch": 0.24748603351955306, "grad_norm": 0.8531337380409241, "learning_rate": 0.0009905042016806723, "loss": 0.4889, "step": 443 }, { "epoch": 0.24804469273743016, "grad_norm": 1.120388388633728, "learning_rate": 0.0009904761904761905, "loss": 0.5923, "step": 444 }, { "epoch": 0.24860335195530725, "grad_norm": 1.450002670288086, "learning_rate": 0.0009904481792717087, "loss": 0.5827, "step": 445 }, { "epoch": 0.24916201117318434, "grad_norm": 0.8190898299217224, "learning_rate": 0.000990420168067227, "loss": 0.4909, "step": 446 }, { "epoch": 0.24972067039106144, "grad_norm": 1.3921219110488892, "learning_rate": 0.0009903921568627451, "loss": 0.6231, "step": 447 }, { "epoch": 0.25027932960893856, "grad_norm": 4.642048358917236, "learning_rate": 0.0009903641456582633, "loss": 0.6153, "step": 448 }, { "epoch": 0.2508379888268156, "grad_norm": 1.0766607522964478, "learning_rate": 0.0009903361344537815, "loss": 0.5779, "step": 449 }, { "epoch": 0.25139664804469275, "grad_norm": 1.5149521827697754, "learning_rate": 0.0009903081232492997, "loss": 0.5621, "step": 450 }, { "epoch": 0.2519553072625698, "grad_norm": 1.6316112279891968, "learning_rate": 0.000990280112044818, "loss": 0.5128, "step": 451 }, { "epoch": 0.25251396648044694, "grad_norm": 0.7833787798881531, "learning_rate": 0.0009902521008403361, "loss": 0.4507, "step": 452 }, { "epoch": 0.253072625698324, "grad_norm": 0.6542819738388062, "learning_rate": 0.0009902240896358543, "loss": 0.4431, "step": 453 }, { "epoch": 0.2536312849162011, "grad_norm": 1.1062020063400269, "learning_rate": 0.0009901960784313726, "loss": 0.4658, "step": 454 }, { "epoch": 0.2541899441340782, "grad_norm": 1.5975990295410156, "learning_rate": 0.0009901680672268908, "loss": 0.683, "step": 455 }, { "epoch": 0.2547486033519553, "grad_norm": 0.9453422427177429, "learning_rate": 0.000990140056022409, "loss": 0.5247, "step": 456 }, { "epoch": 0.2553072625698324, "grad_norm": 0.9046151638031006, "learning_rate": 0.0009901120448179272, "loss": 0.567, "step": 457 }, { "epoch": 0.2558659217877095, "grad_norm": 4.5664167404174805, "learning_rate": 0.0009900840336134454, "loss": 0.5604, "step": 458 }, { "epoch": 0.25642458100558657, "grad_norm": 1.0993216037750244, "learning_rate": 0.0009900560224089636, "loss": 0.5606, "step": 459 }, { "epoch": 0.2569832402234637, "grad_norm": 1.1192004680633545, "learning_rate": 0.0009900280112044818, "loss": 0.449, "step": 460 }, { "epoch": 0.25754189944134076, "grad_norm": 1.8525092601776123, "learning_rate": 0.00099, "loss": 0.5958, "step": 461 }, { "epoch": 0.2581005586592179, "grad_norm": 0.8744431734085083, "learning_rate": 0.0009899719887955182, "loss": 0.6259, "step": 462 }, { "epoch": 0.25865921787709495, "grad_norm": 0.5100352168083191, "learning_rate": 0.0009899439775910364, "loss": 0.5203, "step": 463 }, { "epoch": 0.25921787709497207, "grad_norm": 2.1647567749023438, "learning_rate": 0.0009899159663865546, "loss": 0.4758, "step": 464 }, { "epoch": 0.25977653631284914, "grad_norm": 1.028052806854248, "learning_rate": 0.0009898879551820728, "loss": 0.5388, "step": 465 }, { "epoch": 0.26033519553072626, "grad_norm": 1.115816354751587, "learning_rate": 0.000989859943977591, "loss": 0.6204, "step": 466 }, { "epoch": 0.2608938547486033, "grad_norm": 0.7185821533203125, "learning_rate": 0.0009898319327731092, "loss": 0.5762, "step": 467 }, { "epoch": 0.26145251396648045, "grad_norm": 0.9607667922973633, "learning_rate": 0.0009898039215686274, "loss": 0.6681, "step": 468 }, { "epoch": 0.2620111731843575, "grad_norm": 2.2605960369110107, "learning_rate": 0.0009897759103641456, "loss": 0.484, "step": 469 }, { "epoch": 0.26256983240223464, "grad_norm": 1.1028248071670532, "learning_rate": 0.0009897478991596638, "loss": 0.511, "step": 470 }, { "epoch": 0.2631284916201117, "grad_norm": 0.9246019124984741, "learning_rate": 0.000989719887955182, "loss": 0.5988, "step": 471 }, { "epoch": 0.2636871508379888, "grad_norm": 1.1163049936294556, "learning_rate": 0.0009896918767507003, "loss": 0.5703, "step": 472 }, { "epoch": 0.26424581005586595, "grad_norm": 0.9648666977882385, "learning_rate": 0.0009896638655462185, "loss": 0.7031, "step": 473 }, { "epoch": 0.264804469273743, "grad_norm": 0.8844133019447327, "learning_rate": 0.0009896358543417367, "loss": 0.5242, "step": 474 }, { "epoch": 0.26536312849162014, "grad_norm": 1.0383061170578003, "learning_rate": 0.0009896078431372549, "loss": 0.5462, "step": 475 }, { "epoch": 0.2659217877094972, "grad_norm": 1.277012586593628, "learning_rate": 0.000989579831932773, "loss": 0.5399, "step": 476 }, { "epoch": 0.2664804469273743, "grad_norm": 0.8860754370689392, "learning_rate": 0.0009895518207282913, "loss": 0.4776, "step": 477 }, { "epoch": 0.2670391061452514, "grad_norm": 0.8229119181632996, "learning_rate": 0.0009895238095238095, "loss": 0.5386, "step": 478 }, { "epoch": 0.2675977653631285, "grad_norm": 0.8690646290779114, "learning_rate": 0.0009894957983193277, "loss": 0.5621, "step": 479 }, { "epoch": 0.2681564245810056, "grad_norm": 2.185850143432617, "learning_rate": 0.000989467787114846, "loss": 0.5787, "step": 480 }, { "epoch": 0.2687150837988827, "grad_norm": 0.6414355635643005, "learning_rate": 0.0009894397759103641, "loss": 0.4936, "step": 481 }, { "epoch": 0.26927374301675977, "grad_norm": 0.8222938179969788, "learning_rate": 0.0009894117647058823, "loss": 0.4996, "step": 482 }, { "epoch": 0.2698324022346369, "grad_norm": 0.9511945247650146, "learning_rate": 0.0009893837535014005, "loss": 0.5639, "step": 483 }, { "epoch": 0.27039106145251396, "grad_norm": 1.151688575744629, "learning_rate": 0.0009893557422969187, "loss": 0.5654, "step": 484 }, { "epoch": 0.2709497206703911, "grad_norm": 0.7357352375984192, "learning_rate": 0.000989327731092437, "loss": 0.5087, "step": 485 }, { "epoch": 0.27150837988826815, "grad_norm": 0.8794959187507629, "learning_rate": 0.0009892997198879551, "loss": 0.5731, "step": 486 }, { "epoch": 0.27206703910614527, "grad_norm": 0.8126729726791382, "learning_rate": 0.0009892717086834734, "loss": 0.7194, "step": 487 }, { "epoch": 0.27262569832402234, "grad_norm": 0.5910065770149231, "learning_rate": 0.0009892436974789916, "loss": 0.5187, "step": 488 }, { "epoch": 0.27318435754189946, "grad_norm": 0.7675679922103882, "learning_rate": 0.0009892156862745098, "loss": 0.7064, "step": 489 }, { "epoch": 0.2737430167597765, "grad_norm": 1.0286262035369873, "learning_rate": 0.000989187675070028, "loss": 0.547, "step": 490 }, { "epoch": 0.27430167597765365, "grad_norm": 0.561172366142273, "learning_rate": 0.0009891596638655462, "loss": 0.5427, "step": 491 }, { "epoch": 0.2748603351955307, "grad_norm": 0.6805617809295654, "learning_rate": 0.0009891316526610644, "loss": 0.4352, "step": 492 }, { "epoch": 0.27541899441340784, "grad_norm": 1.0946743488311768, "learning_rate": 0.0009891036414565826, "loss": 0.5098, "step": 493 }, { "epoch": 0.2759776536312849, "grad_norm": 0.8197351694107056, "learning_rate": 0.0009890756302521008, "loss": 0.5598, "step": 494 }, { "epoch": 0.276536312849162, "grad_norm": 0.8153690695762634, "learning_rate": 0.000989047619047619, "loss": 0.5379, "step": 495 }, { "epoch": 0.2770949720670391, "grad_norm": 0.7094146609306335, "learning_rate": 0.0009890196078431372, "loss": 0.6306, "step": 496 }, { "epoch": 0.2776536312849162, "grad_norm": 0.5500633120536804, "learning_rate": 0.0009889915966386554, "loss": 0.5782, "step": 497 }, { "epoch": 0.2782122905027933, "grad_norm": 0.6152987480163574, "learning_rate": 0.0009889635854341736, "loss": 0.4949, "step": 498 }, { "epoch": 0.2787709497206704, "grad_norm": 0.7058537006378174, "learning_rate": 0.000988935574229692, "loss": 0.4576, "step": 499 }, { "epoch": 0.27932960893854747, "grad_norm": 0.9381784200668335, "learning_rate": 0.00098890756302521, "loss": 0.4365, "step": 500 }, { "epoch": 0.27932960893854747, "eval_cer": 0.10674500561901956, "eval_loss": 0.39540618658065796, "eval_runtime": 55.7698, "eval_samples_per_second": 81.37, "eval_steps_per_second": 5.092, "eval_wer": 0.4124745118851429, "step": 500 }, { "epoch": 0.2798882681564246, "grad_norm": 0.640254020690918, "learning_rate": 0.0009888795518207282, "loss": 0.7642, "step": 501 }, { "epoch": 0.28044692737430166, "grad_norm": 0.6222963333129883, "learning_rate": 0.0009888515406162464, "loss": 0.4841, "step": 502 }, { "epoch": 0.2810055865921788, "grad_norm": 0.6733713150024414, "learning_rate": 0.0009888235294117646, "loss": 0.5577, "step": 503 }, { "epoch": 0.28156424581005585, "grad_norm": 0.5247134566307068, "learning_rate": 0.000988795518207283, "loss": 0.5758, "step": 504 }, { "epoch": 0.28212290502793297, "grad_norm": 1.093283772468567, "learning_rate": 0.000988767507002801, "loss": 0.6831, "step": 505 }, { "epoch": 0.28268156424581004, "grad_norm": 0.7806113362312317, "learning_rate": 0.0009887394957983193, "loss": 0.455, "step": 506 }, { "epoch": 0.28324022346368716, "grad_norm": 0.6761318445205688, "learning_rate": 0.0009887114845938375, "loss": 0.4346, "step": 507 }, { "epoch": 0.2837988826815642, "grad_norm": 0.8825002908706665, "learning_rate": 0.0009886834733893557, "loss": 0.6033, "step": 508 }, { "epoch": 0.28435754189944135, "grad_norm": 1.0674254894256592, "learning_rate": 0.000988655462184874, "loss": 0.6355, "step": 509 }, { "epoch": 0.2849162011173184, "grad_norm": 0.5881056785583496, "learning_rate": 0.000988627450980392, "loss": 0.6378, "step": 510 }, { "epoch": 0.28547486033519553, "grad_norm": 1.8274636268615723, "learning_rate": 0.0009885994397759103, "loss": 0.6773, "step": 511 }, { "epoch": 0.2860335195530726, "grad_norm": 0.7210019826889038, "learning_rate": 0.0009885714285714285, "loss": 0.4746, "step": 512 }, { "epoch": 0.2865921787709497, "grad_norm": 0.5159186124801636, "learning_rate": 0.0009885434173669467, "loss": 0.4151, "step": 513 }, { "epoch": 0.2871508379888268, "grad_norm": 0.47784191370010376, "learning_rate": 0.0009885154061624651, "loss": 0.4714, "step": 514 }, { "epoch": 0.2877094972067039, "grad_norm": 2.3483059406280518, "learning_rate": 0.0009884873949579833, "loss": 0.5185, "step": 515 }, { "epoch": 0.288268156424581, "grad_norm": 0.7674930691719055, "learning_rate": 0.0009884593837535013, "loss": 0.6903, "step": 516 }, { "epoch": 0.2888268156424581, "grad_norm": 0.9830483794212341, "learning_rate": 0.0009884313725490195, "loss": 0.4401, "step": 517 }, { "epoch": 0.28938547486033517, "grad_norm": 0.5300085544586182, "learning_rate": 0.0009884033613445377, "loss": 0.5136, "step": 518 }, { "epoch": 0.2899441340782123, "grad_norm": 0.5695764422416687, "learning_rate": 0.0009883753501400562, "loss": 0.4509, "step": 519 }, { "epoch": 0.2905027932960894, "grad_norm": 2.513749837875366, "learning_rate": 0.0009883473389355744, "loss": 0.5806, "step": 520 }, { "epoch": 0.2910614525139665, "grad_norm": 0.5982545614242554, "learning_rate": 0.0009883193277310924, "loss": 0.5088, "step": 521 }, { "epoch": 0.2916201117318436, "grad_norm": 0.5623366236686707, "learning_rate": 0.0009882913165266106, "loss": 0.548, "step": 522 }, { "epoch": 0.29217877094972067, "grad_norm": 1.4201511144638062, "learning_rate": 0.0009882633053221288, "loss": 0.5219, "step": 523 }, { "epoch": 0.2927374301675978, "grad_norm": 4.216770648956299, "learning_rate": 0.0009882352941176472, "loss": 0.4378, "step": 524 }, { "epoch": 0.29329608938547486, "grad_norm": 1.6362286806106567, "learning_rate": 0.0009882072829131654, "loss": 0.4981, "step": 525 }, { "epoch": 0.293854748603352, "grad_norm": 0.6319324374198914, "learning_rate": 0.0009881792717086834, "loss": 0.5748, "step": 526 }, { "epoch": 0.29441340782122905, "grad_norm": 0.6910527348518372, "learning_rate": 0.0009881512605042016, "loss": 0.6463, "step": 527 }, { "epoch": 0.29497206703910617, "grad_norm": 1.051849126815796, "learning_rate": 0.0009881232492997198, "loss": 0.4279, "step": 528 }, { "epoch": 0.29553072625698323, "grad_norm": 1.8838139772415161, "learning_rate": 0.0009880952380952382, "loss": 0.5647, "step": 529 }, { "epoch": 0.29608938547486036, "grad_norm": 1.694541335105896, "learning_rate": 0.0009880672268907564, "loss": 0.5946, "step": 530 }, { "epoch": 0.2966480446927374, "grad_norm": 0.5781271457672119, "learning_rate": 0.0009880392156862746, "loss": 0.4582, "step": 531 }, { "epoch": 0.29720670391061454, "grad_norm": 0.7101206183433533, "learning_rate": 0.0009880112044817926, "loss": 0.5532, "step": 532 }, { "epoch": 0.2977653631284916, "grad_norm": 0.5283511877059937, "learning_rate": 0.0009879831932773108, "loss": 0.5119, "step": 533 }, { "epoch": 0.29832402234636873, "grad_norm": 1.3297278881072998, "learning_rate": 0.0009879551820728292, "loss": 0.6357, "step": 534 }, { "epoch": 0.2988826815642458, "grad_norm": 0.6172975897789001, "learning_rate": 0.0009879271708683475, "loss": 0.5827, "step": 535 }, { "epoch": 0.2994413407821229, "grad_norm": 0.5623006224632263, "learning_rate": 0.0009878991596638657, "loss": 0.4352, "step": 536 }, { "epoch": 0.3, "grad_norm": 0.9791301488876343, "learning_rate": 0.0009878711484593837, "loss": 0.4996, "step": 537 }, { "epoch": 0.3005586592178771, "grad_norm": 0.9790188074111938, "learning_rate": 0.0009878431372549019, "loss": 0.5203, "step": 538 }, { "epoch": 0.3011173184357542, "grad_norm": 0.7493875026702881, "learning_rate": 0.0009878151260504203, "loss": 0.5667, "step": 539 }, { "epoch": 0.3016759776536313, "grad_norm": 1.7138361930847168, "learning_rate": 0.0009877871148459385, "loss": 0.5694, "step": 540 }, { "epoch": 0.30223463687150837, "grad_norm": 1.1247279644012451, "learning_rate": 0.0009877591036414567, "loss": 0.5885, "step": 541 }, { "epoch": 0.3027932960893855, "grad_norm": 0.6994509696960449, "learning_rate": 0.0009877310924369747, "loss": 0.5168, "step": 542 }, { "epoch": 0.30335195530726256, "grad_norm": 1.5583826303482056, "learning_rate": 0.0009877030812324929, "loss": 0.5291, "step": 543 }, { "epoch": 0.3039106145251397, "grad_norm": 0.9216806292533875, "learning_rate": 0.0009876750700280113, "loss": 0.4847, "step": 544 }, { "epoch": 0.30446927374301674, "grad_norm": 1.0649412870407104, "learning_rate": 0.0009876470588235295, "loss": 0.6681, "step": 545 }, { "epoch": 0.30502793296089387, "grad_norm": 0.8307594656944275, "learning_rate": 0.0009876190476190477, "loss": 0.539, "step": 546 }, { "epoch": 0.30558659217877093, "grad_norm": 4.53532075881958, "learning_rate": 0.000987591036414566, "loss": 0.542, "step": 547 }, { "epoch": 0.30614525139664805, "grad_norm": 1.1315875053405762, "learning_rate": 0.000987563025210084, "loss": 0.554, "step": 548 }, { "epoch": 0.3067039106145251, "grad_norm": 1.2333356142044067, "learning_rate": 0.0009875350140056023, "loss": 0.4888, "step": 549 }, { "epoch": 0.30726256983240224, "grad_norm": 0.7535827159881592, "learning_rate": 0.0009875070028011205, "loss": 0.4799, "step": 550 }, { "epoch": 0.3078212290502793, "grad_norm": 0.681426465511322, "learning_rate": 0.0009874789915966388, "loss": 0.3963, "step": 551 }, { "epoch": 0.30837988826815643, "grad_norm": 1.0495132207870483, "learning_rate": 0.000987450980392157, "loss": 0.4841, "step": 552 }, { "epoch": 0.3089385474860335, "grad_norm": 0.8529074192047119, "learning_rate": 0.000987422969187675, "loss": 0.6456, "step": 553 }, { "epoch": 0.3094972067039106, "grad_norm": 7.31911563873291, "learning_rate": 0.0009873949579831934, "loss": 0.564, "step": 554 }, { "epoch": 0.3100558659217877, "grad_norm": 0.7274956107139587, "learning_rate": 0.0009873669467787116, "loss": 0.4856, "step": 555 }, { "epoch": 0.3106145251396648, "grad_norm": 0.9564986824989319, "learning_rate": 0.0009873389355742298, "loss": 0.7139, "step": 556 }, { "epoch": 0.3111731843575419, "grad_norm": 1.6834135055541992, "learning_rate": 0.000987310924369748, "loss": 0.4995, "step": 557 }, { "epoch": 0.311731843575419, "grad_norm": 3.4599685668945312, "learning_rate": 0.000987282913165266, "loss": 0.5741, "step": 558 }, { "epoch": 0.31229050279329607, "grad_norm": 0.7904874682426453, "learning_rate": 0.0009872549019607844, "loss": 0.464, "step": 559 }, { "epoch": 0.3128491620111732, "grad_norm": 1.5327842235565186, "learning_rate": 0.0009872268907563026, "loss": 0.5896, "step": 560 }, { "epoch": 0.31340782122905025, "grad_norm": 0.861058235168457, "learning_rate": 0.0009871988795518208, "loss": 0.4847, "step": 561 }, { "epoch": 0.3139664804469274, "grad_norm": 0.7388689517974854, "learning_rate": 0.000987170868347339, "loss": 0.4693, "step": 562 }, { "epoch": 0.31452513966480444, "grad_norm": 0.5862420201301575, "learning_rate": 0.0009871428571428572, "loss": 0.6276, "step": 563 }, { "epoch": 0.31508379888268156, "grad_norm": 0.8733795285224915, "learning_rate": 0.0009871148459383754, "loss": 0.5302, "step": 564 }, { "epoch": 0.31564245810055863, "grad_norm": 1.1133688688278198, "learning_rate": 0.0009870868347338936, "loss": 0.4618, "step": 565 }, { "epoch": 0.31620111731843575, "grad_norm": 0.9358701705932617, "learning_rate": 0.0009870588235294118, "loss": 0.4899, "step": 566 }, { "epoch": 0.3167597765363129, "grad_norm": 0.857302188873291, "learning_rate": 0.00098703081232493, "loss": 0.6417, "step": 567 }, { "epoch": 0.31731843575418994, "grad_norm": 0.8492801189422607, "learning_rate": 0.0009870028011204483, "loss": 0.4556, "step": 568 }, { "epoch": 0.31787709497206706, "grad_norm": 0.9835643172264099, "learning_rate": 0.0009869747899159665, "loss": 0.5778, "step": 569 }, { "epoch": 0.31843575418994413, "grad_norm": 1.7731804847717285, "learning_rate": 0.0009869467787114847, "loss": 0.5371, "step": 570 }, { "epoch": 0.31899441340782125, "grad_norm": 0.8389686942100525, "learning_rate": 0.0009869187675070029, "loss": 0.4409, "step": 571 }, { "epoch": 0.3195530726256983, "grad_norm": 1.047930121421814, "learning_rate": 0.000986890756302521, "loss": 0.5879, "step": 572 }, { "epoch": 0.32011173184357544, "grad_norm": 0.7291346192359924, "learning_rate": 0.0009868627450980393, "loss": 0.555, "step": 573 }, { "epoch": 0.3206703910614525, "grad_norm": 0.9129700064659119, "learning_rate": 0.0009868347338935575, "loss": 0.5871, "step": 574 }, { "epoch": 0.32122905027932963, "grad_norm": 0.8879346251487732, "learning_rate": 0.0009868067226890757, "loss": 0.4973, "step": 575 }, { "epoch": 0.3217877094972067, "grad_norm": 1.487825870513916, "learning_rate": 0.000986778711484594, "loss": 0.5086, "step": 576 }, { "epoch": 0.3223463687150838, "grad_norm": 0.638690710067749, "learning_rate": 0.000986750700280112, "loss": 0.486, "step": 577 }, { "epoch": 0.3229050279329609, "grad_norm": 0.4099404215812683, "learning_rate": 0.0009867226890756303, "loss": 0.4206, "step": 578 }, { "epoch": 0.323463687150838, "grad_norm": 1.078679084777832, "learning_rate": 0.0009866946778711485, "loss": 0.6084, "step": 579 }, { "epoch": 0.3240223463687151, "grad_norm": 0.818130373954773, "learning_rate": 0.0009866666666666667, "loss": 0.5727, "step": 580 }, { "epoch": 0.3245810055865922, "grad_norm": 6.597809314727783, "learning_rate": 0.000986638655462185, "loss": 0.4981, "step": 581 }, { "epoch": 0.32513966480446926, "grad_norm": 0.6093296408653259, "learning_rate": 0.0009866106442577031, "loss": 0.543, "step": 582 }, { "epoch": 0.3256983240223464, "grad_norm": 0.7406026721000671, "learning_rate": 0.0009865826330532213, "loss": 0.4562, "step": 583 }, { "epoch": 0.32625698324022345, "grad_norm": 0.7541854977607727, "learning_rate": 0.0009865546218487395, "loss": 0.5853, "step": 584 }, { "epoch": 0.3268156424581006, "grad_norm": 0.8364110589027405, "learning_rate": 0.0009865266106442578, "loss": 0.6134, "step": 585 }, { "epoch": 0.32737430167597764, "grad_norm": 0.9898073673248291, "learning_rate": 0.000986498599439776, "loss": 0.5891, "step": 586 }, { "epoch": 0.32793296089385476, "grad_norm": 0.7348021864891052, "learning_rate": 0.0009864705882352942, "loss": 0.5638, "step": 587 }, { "epoch": 0.32849162011173183, "grad_norm": 0.566052258014679, "learning_rate": 0.0009864425770308124, "loss": 0.6911, "step": 588 }, { "epoch": 0.32905027932960895, "grad_norm": 5.006649017333984, "learning_rate": 0.0009864145658263306, "loss": 0.4984, "step": 589 }, { "epoch": 0.329608938547486, "grad_norm": 1.3679720163345337, "learning_rate": 0.0009863865546218488, "loss": 0.451, "step": 590 }, { "epoch": 0.33016759776536314, "grad_norm": 0.7462795972824097, "learning_rate": 0.000986358543417367, "loss": 0.4371, "step": 591 }, { "epoch": 0.3307262569832402, "grad_norm": 0.9732274413108826, "learning_rate": 0.0009863305322128852, "loss": 0.6407, "step": 592 }, { "epoch": 0.33128491620111733, "grad_norm": 0.7388654351234436, "learning_rate": 0.0009863025210084034, "loss": 0.4886, "step": 593 }, { "epoch": 0.3318435754189944, "grad_norm": 0.8793904185295105, "learning_rate": 0.0009862745098039216, "loss": 0.4647, "step": 594 }, { "epoch": 0.3324022346368715, "grad_norm": 0.9062551856040955, "learning_rate": 0.0009862464985994398, "loss": 0.5031, "step": 595 }, { "epoch": 0.3329608938547486, "grad_norm": 3.0678863525390625, "learning_rate": 0.000986218487394958, "loss": 0.6406, "step": 596 }, { "epoch": 0.3335195530726257, "grad_norm": 0.9255324006080627, "learning_rate": 0.0009861904761904762, "loss": 0.56, "step": 597 }, { "epoch": 0.3340782122905028, "grad_norm": 0.6190474033355713, "learning_rate": 0.0009861624649859944, "loss": 0.5146, "step": 598 }, { "epoch": 0.3346368715083799, "grad_norm": 0.7674522399902344, "learning_rate": 0.0009861344537815126, "loss": 0.5423, "step": 599 }, { "epoch": 0.33519553072625696, "grad_norm": 1.7365634441375732, "learning_rate": 0.0009861064425770308, "loss": 0.5815, "step": 600 }, { "epoch": 0.3357541899441341, "grad_norm": 0.9577219486236572, "learning_rate": 0.000986078431372549, "loss": 0.5612, "step": 601 }, { "epoch": 0.33631284916201115, "grad_norm": 0.9474151730537415, "learning_rate": 0.0009860504201680673, "loss": 0.512, "step": 602 }, { "epoch": 0.3368715083798883, "grad_norm": 0.5228251814842224, "learning_rate": 0.0009860224089635855, "loss": 0.594, "step": 603 }, { "epoch": 0.33743016759776534, "grad_norm": 0.7312942743301392, "learning_rate": 0.0009859943977591037, "loss": 0.5573, "step": 604 }, { "epoch": 0.33798882681564246, "grad_norm": 0.7236953377723694, "learning_rate": 0.0009859663865546219, "loss": 0.6495, "step": 605 }, { "epoch": 0.33854748603351953, "grad_norm": 0.749528169631958, "learning_rate": 0.00098593837535014, "loss": 0.498, "step": 606 }, { "epoch": 0.33910614525139665, "grad_norm": 1.3720815181732178, "learning_rate": 0.0009859103641456583, "loss": 0.5544, "step": 607 }, { "epoch": 0.3396648044692737, "grad_norm": 1.715617060661316, "learning_rate": 0.0009858823529411765, "loss": 0.4906, "step": 608 }, { "epoch": 0.34022346368715084, "grad_norm": 0.5368160009384155, "learning_rate": 0.0009858543417366947, "loss": 0.4501, "step": 609 }, { "epoch": 0.3407821229050279, "grad_norm": 0.6735332012176514, "learning_rate": 0.000985826330532213, "loss": 0.5236, "step": 610 }, { "epoch": 0.34134078212290503, "grad_norm": 0.6093345880508423, "learning_rate": 0.0009857983193277311, "loss": 0.4786, "step": 611 }, { "epoch": 0.3418994413407821, "grad_norm": 1.024030327796936, "learning_rate": 0.0009857703081232493, "loss": 0.5203, "step": 612 }, { "epoch": 0.3424581005586592, "grad_norm": 0.609090268611908, "learning_rate": 0.0009857422969187675, "loss": 0.4912, "step": 613 }, { "epoch": 0.34301675977653634, "grad_norm": 0.5807856917381287, "learning_rate": 0.0009857142857142857, "loss": 0.4129, "step": 614 }, { "epoch": 0.3435754189944134, "grad_norm": 0.8883734345436096, "learning_rate": 0.000985686274509804, "loss": 0.5617, "step": 615 }, { "epoch": 0.34413407821229053, "grad_norm": 8.983072280883789, "learning_rate": 0.0009856582633053221, "loss": 0.5198, "step": 616 }, { "epoch": 0.3446927374301676, "grad_norm": 1.6517291069030762, "learning_rate": 0.0009856302521008403, "loss": 0.5649, "step": 617 }, { "epoch": 0.3452513966480447, "grad_norm": 0.8718855977058411, "learning_rate": 0.0009856022408963586, "loss": 0.621, "step": 618 }, { "epoch": 0.3458100558659218, "grad_norm": 0.5957295298576355, "learning_rate": 0.0009855742296918768, "loss": 0.4323, "step": 619 }, { "epoch": 0.3463687150837989, "grad_norm": 4.571535110473633, "learning_rate": 0.000985546218487395, "loss": 0.6198, "step": 620 }, { "epoch": 0.346927374301676, "grad_norm": 0.9922893643379211, "learning_rate": 0.0009855182072829132, "loss": 0.5402, "step": 621 }, { "epoch": 0.3474860335195531, "grad_norm": 0.8614273071289062, "learning_rate": 0.0009854901960784314, "loss": 0.4172, "step": 622 }, { "epoch": 0.34804469273743016, "grad_norm": 1.1592367887496948, "learning_rate": 0.0009854621848739496, "loss": 0.6982, "step": 623 }, { "epoch": 0.3486033519553073, "grad_norm": 0.8100976347923279, "learning_rate": 0.0009854341736694678, "loss": 0.5332, "step": 624 }, { "epoch": 0.34916201117318435, "grad_norm": 1.0766860246658325, "learning_rate": 0.000985406162464986, "loss": 0.6281, "step": 625 }, { "epoch": 0.34972067039106147, "grad_norm": 0.5531345009803772, "learning_rate": 0.0009853781512605042, "loss": 0.4697, "step": 626 }, { "epoch": 0.35027932960893854, "grad_norm": 0.6653377413749695, "learning_rate": 0.0009853501400560224, "loss": 0.476, "step": 627 }, { "epoch": 0.35083798882681566, "grad_norm": 0.7839998006820679, "learning_rate": 0.0009853221288515406, "loss": 0.4844, "step": 628 }, { "epoch": 0.3513966480446927, "grad_norm": 0.9125815629959106, "learning_rate": 0.0009852941176470588, "loss": 0.55, "step": 629 }, { "epoch": 0.35195530726256985, "grad_norm": 0.7828765511512756, "learning_rate": 0.000985266106442577, "loss": 0.464, "step": 630 }, { "epoch": 0.3525139664804469, "grad_norm": 0.9206963181495667, "learning_rate": 0.0009852380952380952, "loss": 0.6883, "step": 631 }, { "epoch": 0.35307262569832404, "grad_norm": 1.4578670263290405, "learning_rate": 0.0009852100840336134, "loss": 0.5362, "step": 632 }, { "epoch": 0.3536312849162011, "grad_norm": 0.7980212569236755, "learning_rate": 0.0009851820728291316, "loss": 0.5009, "step": 633 }, { "epoch": 0.3541899441340782, "grad_norm": 0.9872021675109863, "learning_rate": 0.0009851540616246498, "loss": 0.3887, "step": 634 }, { "epoch": 0.3547486033519553, "grad_norm": 1.398852825164795, "learning_rate": 0.000985126050420168, "loss": 0.6557, "step": 635 }, { "epoch": 0.3553072625698324, "grad_norm": 0.9444912672042847, "learning_rate": 0.0009850980392156863, "loss": 0.5873, "step": 636 }, { "epoch": 0.3558659217877095, "grad_norm": 1.0968694686889648, "learning_rate": 0.0009850700280112045, "loss": 0.5259, "step": 637 }, { "epoch": 0.3564245810055866, "grad_norm": 0.5139156579971313, "learning_rate": 0.0009850420168067227, "loss": 0.5205, "step": 638 }, { "epoch": 0.35698324022346367, "grad_norm": 1.0068995952606201, "learning_rate": 0.0009850140056022409, "loss": 0.5287, "step": 639 }, { "epoch": 0.3575418994413408, "grad_norm": 2.2291014194488525, "learning_rate": 0.000984985994397759, "loss": 0.5895, "step": 640 }, { "epoch": 0.35810055865921786, "grad_norm": 1.0994702577590942, "learning_rate": 0.0009849579831932773, "loss": 0.6271, "step": 641 }, { "epoch": 0.358659217877095, "grad_norm": 0.701944887638092, "learning_rate": 0.0009849299719887955, "loss": 0.5157, "step": 642 }, { "epoch": 0.35921787709497205, "grad_norm": 1.1416760683059692, "learning_rate": 0.0009849019607843137, "loss": 0.7159, "step": 643 }, { "epoch": 0.35977653631284917, "grad_norm": 0.7814168930053711, "learning_rate": 0.000984873949579832, "loss": 0.397, "step": 644 }, { "epoch": 0.36033519553072624, "grad_norm": 0.5681543946266174, "learning_rate": 0.0009848459383753501, "loss": 0.5497, "step": 645 }, { "epoch": 0.36089385474860336, "grad_norm": 0.5196084380149841, "learning_rate": 0.0009848179271708683, "loss": 0.4015, "step": 646 }, { "epoch": 0.3614525139664804, "grad_norm": 0.8165639042854309, "learning_rate": 0.0009847899159663865, "loss": 0.5294, "step": 647 }, { "epoch": 0.36201117318435755, "grad_norm": 0.7947068810462952, "learning_rate": 0.0009847619047619047, "loss": 0.6244, "step": 648 }, { "epoch": 0.3625698324022346, "grad_norm": 0.5430769920349121, "learning_rate": 0.000984733893557423, "loss": 0.514, "step": 649 }, { "epoch": 0.36312849162011174, "grad_norm": 5.341940402984619, "learning_rate": 0.0009847058823529411, "loss": 0.4679, "step": 650 }, { "epoch": 0.3636871508379888, "grad_norm": 0.7629307508468628, "learning_rate": 0.0009846778711484594, "loss": 0.7608, "step": 651 }, { "epoch": 0.3642458100558659, "grad_norm": 0.6666110754013062, "learning_rate": 0.0009846498599439776, "loss": 0.5415, "step": 652 }, { "epoch": 0.364804469273743, "grad_norm": 0.8909905552864075, "learning_rate": 0.0009846218487394958, "loss": 0.453, "step": 653 }, { "epoch": 0.3653631284916201, "grad_norm": 0.8186153769493103, "learning_rate": 0.0009845938375350142, "loss": 0.4321, "step": 654 }, { "epoch": 0.3659217877094972, "grad_norm": 0.7347453236579895, "learning_rate": 0.0009845658263305322, "loss": 0.3973, "step": 655 }, { "epoch": 0.3664804469273743, "grad_norm": 0.8336395621299744, "learning_rate": 0.0009845378151260504, "loss": 0.5548, "step": 656 }, { "epoch": 0.36703910614525137, "grad_norm": 0.8416475057601929, "learning_rate": 0.0009845098039215686, "loss": 0.4945, "step": 657 }, { "epoch": 0.3675977653631285, "grad_norm": 0.8832162022590637, "learning_rate": 0.0009844817927170868, "loss": 0.5359, "step": 658 }, { "epoch": 0.36815642458100556, "grad_norm": 0.6441562175750732, "learning_rate": 0.0009844537815126052, "loss": 0.4669, "step": 659 }, { "epoch": 0.3687150837988827, "grad_norm": 1.0926862955093384, "learning_rate": 0.0009844257703081232, "loss": 0.5395, "step": 660 }, { "epoch": 0.3692737430167598, "grad_norm": 0.9024230241775513, "learning_rate": 0.0009843977591036414, "loss": 0.543, "step": 661 }, { "epoch": 0.36983240223463687, "grad_norm": 0.9043408036231995, "learning_rate": 0.0009843697478991596, "loss": 0.486, "step": 662 }, { "epoch": 0.370391061452514, "grad_norm": 0.5910623073577881, "learning_rate": 0.0009843417366946778, "loss": 0.4853, "step": 663 }, { "epoch": 0.37094972067039106, "grad_norm": 0.5296788811683655, "learning_rate": 0.000984313725490196, "loss": 0.438, "step": 664 }, { "epoch": 0.3715083798882682, "grad_norm": 0.763145923614502, "learning_rate": 0.0009842857142857142, "loss": 0.5428, "step": 665 }, { "epoch": 0.37206703910614525, "grad_norm": 1.2509864568710327, "learning_rate": 0.0009842577030812324, "loss": 0.6036, "step": 666 }, { "epoch": 0.37262569832402237, "grad_norm": 0.636996328830719, "learning_rate": 0.0009842296918767506, "loss": 0.477, "step": 667 }, { "epoch": 0.37318435754189944, "grad_norm": 0.8803697228431702, "learning_rate": 0.0009842016806722689, "loss": 0.5022, "step": 668 }, { "epoch": 0.37374301675977656, "grad_norm": 0.5829938054084778, "learning_rate": 0.000984173669467787, "loss": 0.4451, "step": 669 }, { "epoch": 0.3743016759776536, "grad_norm": 0.48007455468177795, "learning_rate": 0.0009841456582633055, "loss": 0.42, "step": 670 }, { "epoch": 0.37486033519553075, "grad_norm": 1.1333321332931519, "learning_rate": 0.0009841176470588235, "loss": 0.4739, "step": 671 }, { "epoch": 0.3754189944134078, "grad_norm": 0.5421041250228882, "learning_rate": 0.0009840896358543417, "loss": 0.5519, "step": 672 }, { "epoch": 0.37597765363128494, "grad_norm": 0.621100902557373, "learning_rate": 0.0009840616246498599, "loss": 0.4365, "step": 673 }, { "epoch": 0.376536312849162, "grad_norm": 1.0602147579193115, "learning_rate": 0.000984033613445378, "loss": 0.5093, "step": 674 }, { "epoch": 0.3770949720670391, "grad_norm": 0.9669912457466125, "learning_rate": 0.0009840056022408965, "loss": 0.6225, "step": 675 }, { "epoch": 0.3776536312849162, "grad_norm": 0.6287827491760254, "learning_rate": 0.0009839775910364145, "loss": 0.3752, "step": 676 }, { "epoch": 0.3782122905027933, "grad_norm": 0.7915810346603394, "learning_rate": 0.0009839495798319327, "loss": 0.5571, "step": 677 }, { "epoch": 0.3787709497206704, "grad_norm": 0.8399115204811096, "learning_rate": 0.000983921568627451, "loss": 0.6311, "step": 678 }, { "epoch": 0.3793296089385475, "grad_norm": 1.0199395418167114, "learning_rate": 0.0009838935574229691, "loss": 0.6158, "step": 679 }, { "epoch": 0.37988826815642457, "grad_norm": 0.7124590277671814, "learning_rate": 0.0009838655462184875, "loss": 0.4925, "step": 680 }, { "epoch": 0.3804469273743017, "grad_norm": 0.5956300497055054, "learning_rate": 0.0009838375350140055, "loss": 0.5394, "step": 681 }, { "epoch": 0.38100558659217876, "grad_norm": 0.5080326199531555, "learning_rate": 0.0009838095238095237, "loss": 0.5528, "step": 682 }, { "epoch": 0.3815642458100559, "grad_norm": 0.6605573892593384, "learning_rate": 0.000983781512605042, "loss": 0.4708, "step": 683 }, { "epoch": 0.38212290502793295, "grad_norm": 0.8146275281906128, "learning_rate": 0.0009837535014005601, "loss": 0.578, "step": 684 }, { "epoch": 0.38268156424581007, "grad_norm": 1.4359737634658813, "learning_rate": 0.0009837254901960786, "loss": 0.442, "step": 685 }, { "epoch": 0.38324022346368714, "grad_norm": 0.7695728540420532, "learning_rate": 0.0009836974789915968, "loss": 0.5112, "step": 686 }, { "epoch": 0.38379888268156426, "grad_norm": 0.7323272228240967, "learning_rate": 0.0009836694677871148, "loss": 0.5192, "step": 687 }, { "epoch": 0.3843575418994413, "grad_norm": 0.9150975942611694, "learning_rate": 0.000983641456582633, "loss": 0.4296, "step": 688 }, { "epoch": 0.38491620111731845, "grad_norm": 1.4393677711486816, "learning_rate": 0.0009836134453781512, "loss": 0.4992, "step": 689 }, { "epoch": 0.3854748603351955, "grad_norm": 1.056930661201477, "learning_rate": 0.0009835854341736696, "loss": 0.4393, "step": 690 }, { "epoch": 0.38603351955307263, "grad_norm": 0.8621572256088257, "learning_rate": 0.0009835574229691878, "loss": 0.5995, "step": 691 }, { "epoch": 0.3865921787709497, "grad_norm": 0.9120810031890869, "learning_rate": 0.0009835294117647058, "loss": 0.5515, "step": 692 }, { "epoch": 0.3871508379888268, "grad_norm": 0.6020013093948364, "learning_rate": 0.000983501400560224, "loss": 0.4929, "step": 693 }, { "epoch": 0.3877094972067039, "grad_norm": 0.8536820411682129, "learning_rate": 0.0009834733893557422, "loss": 0.4239, "step": 694 }, { "epoch": 0.388268156424581, "grad_norm": 1.2177194356918335, "learning_rate": 0.0009834453781512606, "loss": 0.548, "step": 695 }, { "epoch": 0.3888268156424581, "grad_norm": 0.8397508859634399, "learning_rate": 0.0009834173669467788, "loss": 0.5709, "step": 696 }, { "epoch": 0.3893854748603352, "grad_norm": 0.75736403465271, "learning_rate": 0.0009833893557422968, "loss": 0.5774, "step": 697 }, { "epoch": 0.38994413407821227, "grad_norm": 0.4960309863090515, "learning_rate": 0.000983361344537815, "loss": 0.485, "step": 698 }, { "epoch": 0.3905027932960894, "grad_norm": 0.6970162391662598, "learning_rate": 0.0009833333333333332, "loss": 0.3803, "step": 699 }, { "epoch": 0.39106145251396646, "grad_norm": 0.5962986946105957, "learning_rate": 0.0009833053221288517, "loss": 0.4879, "step": 700 }, { "epoch": 0.3916201117318436, "grad_norm": 0.5089249610900879, "learning_rate": 0.0009832773109243699, "loss": 0.3975, "step": 701 }, { "epoch": 0.39217877094972065, "grad_norm": 0.9010866284370422, "learning_rate": 0.000983249299719888, "loss": 0.3997, "step": 702 }, { "epoch": 0.39273743016759777, "grad_norm": 0.587181568145752, "learning_rate": 0.000983221288515406, "loss": 0.4275, "step": 703 }, { "epoch": 0.39329608938547483, "grad_norm": 0.6018347144126892, "learning_rate": 0.0009831932773109243, "loss": 0.5464, "step": 704 }, { "epoch": 0.39385474860335196, "grad_norm": 0.748507559299469, "learning_rate": 0.0009831652661064427, "loss": 0.46, "step": 705 }, { "epoch": 0.394413407821229, "grad_norm": 0.5247789025306702, "learning_rate": 0.000983137254901961, "loss": 0.4726, "step": 706 }, { "epoch": 0.39497206703910615, "grad_norm": 0.43686094880104065, "learning_rate": 0.000983109243697479, "loss": 0.4056, "step": 707 }, { "epoch": 0.39553072625698327, "grad_norm": 1.0581320524215698, "learning_rate": 0.000983081232492997, "loss": 0.6545, "step": 708 }, { "epoch": 0.39608938547486033, "grad_norm": 0.581413984298706, "learning_rate": 0.0009830532212885153, "loss": 0.3951, "step": 709 }, { "epoch": 0.39664804469273746, "grad_norm": 0.5816717147827148, "learning_rate": 0.0009830252100840337, "loss": 0.4465, "step": 710 }, { "epoch": 0.3972067039106145, "grad_norm": 0.6564436554908752, "learning_rate": 0.000982997198879552, "loss": 0.4934, "step": 711 }, { "epoch": 0.39776536312849164, "grad_norm": 0.5008072853088379, "learning_rate": 0.0009829691876750701, "loss": 0.4224, "step": 712 }, { "epoch": 0.3983240223463687, "grad_norm": 0.6331523656845093, "learning_rate": 0.0009829411764705881, "loss": 0.5258, "step": 713 }, { "epoch": 0.39888268156424583, "grad_norm": 0.5446946620941162, "learning_rate": 0.0009829131652661063, "loss": 0.5251, "step": 714 }, { "epoch": 0.3994413407821229, "grad_norm": 1.016221284866333, "learning_rate": 0.0009828851540616248, "loss": 0.6111, "step": 715 }, { "epoch": 0.4, "grad_norm": 0.5130570530891418, "learning_rate": 0.000982857142857143, "loss": 0.465, "step": 716 }, { "epoch": 0.4005586592178771, "grad_norm": 0.7790815234184265, "learning_rate": 0.0009828291316526612, "loss": 0.5953, "step": 717 }, { "epoch": 0.4011173184357542, "grad_norm": 0.6934060454368591, "learning_rate": 0.0009828011204481794, "loss": 0.5043, "step": 718 }, { "epoch": 0.4016759776536313, "grad_norm": 0.6301409602165222, "learning_rate": 0.0009827731092436974, "loss": 0.5164, "step": 719 }, { "epoch": 0.4022346368715084, "grad_norm": 0.7534012198448181, "learning_rate": 0.0009827450980392158, "loss": 0.5121, "step": 720 }, { "epoch": 0.40279329608938547, "grad_norm": 0.6056898236274719, "learning_rate": 0.000982717086834734, "loss": 0.5822, "step": 721 }, { "epoch": 0.4033519553072626, "grad_norm": 1.156616449356079, "learning_rate": 0.0009826890756302522, "loss": 0.5301, "step": 722 }, { "epoch": 0.40391061452513966, "grad_norm": 1.0898436307907104, "learning_rate": 0.0009826610644257704, "loss": 0.3878, "step": 723 }, { "epoch": 0.4044692737430168, "grad_norm": 0.7641041874885559, "learning_rate": 0.0009826330532212884, "loss": 0.6096, "step": 724 }, { "epoch": 0.40502793296089384, "grad_norm": 0.5866867899894714, "learning_rate": 0.0009826050420168068, "loss": 0.5536, "step": 725 }, { "epoch": 0.40558659217877097, "grad_norm": 5.730029582977295, "learning_rate": 0.000982577030812325, "loss": 0.4713, "step": 726 }, { "epoch": 0.40614525139664803, "grad_norm": 1.1350740194320679, "learning_rate": 0.0009825490196078432, "loss": 0.5403, "step": 727 }, { "epoch": 0.40670391061452515, "grad_norm": 1.1513550281524658, "learning_rate": 0.0009825210084033614, "loss": 0.4084, "step": 728 }, { "epoch": 0.4072625698324022, "grad_norm": 0.6230626702308655, "learning_rate": 0.0009824929971988794, "loss": 0.5078, "step": 729 }, { "epoch": 0.40782122905027934, "grad_norm": 0.6454103589057922, "learning_rate": 0.0009824649859943978, "loss": 0.5555, "step": 730 }, { "epoch": 0.4083798882681564, "grad_norm": 0.5713451504707336, "learning_rate": 0.000982436974789916, "loss": 0.4608, "step": 731 }, { "epoch": 0.40893854748603353, "grad_norm": 0.7829291820526123, "learning_rate": 0.0009824089635854343, "loss": 0.447, "step": 732 }, { "epoch": 0.4094972067039106, "grad_norm": 1.2563079595565796, "learning_rate": 0.0009823809523809525, "loss": 0.6568, "step": 733 }, { "epoch": 0.4100558659217877, "grad_norm": 9.718208312988281, "learning_rate": 0.0009823529411764707, "loss": 0.682, "step": 734 }, { "epoch": 0.4106145251396648, "grad_norm": 0.726872444152832, "learning_rate": 0.0009823249299719889, "loss": 0.5385, "step": 735 }, { "epoch": 0.4111731843575419, "grad_norm": 0.5961375832557678, "learning_rate": 0.000982296918767507, "loss": 0.5955, "step": 736 }, { "epoch": 0.411731843575419, "grad_norm": 6.342020034790039, "learning_rate": 0.0009822689075630253, "loss": 0.4763, "step": 737 }, { "epoch": 0.4122905027932961, "grad_norm": 0.8904435038566589, "learning_rate": 0.0009822408963585435, "loss": 0.5814, "step": 738 }, { "epoch": 0.41284916201117317, "grad_norm": 0.5107899308204651, "learning_rate": 0.0009822128851540617, "loss": 0.4904, "step": 739 }, { "epoch": 0.4134078212290503, "grad_norm": 0.8592984676361084, "learning_rate": 0.00098218487394958, "loss": 0.5191, "step": 740 }, { "epoch": 0.41396648044692735, "grad_norm": 0.6866621971130371, "learning_rate": 0.000982156862745098, "loss": 0.6135, "step": 741 }, { "epoch": 0.4145251396648045, "grad_norm": 0.9929611086845398, "learning_rate": 0.0009821288515406163, "loss": 0.5175, "step": 742 }, { "epoch": 0.41508379888268154, "grad_norm": 4.138786315917969, "learning_rate": 0.0009821008403361345, "loss": 0.5835, "step": 743 }, { "epoch": 0.41564245810055866, "grad_norm": 0.5353333950042725, "learning_rate": 0.0009820728291316527, "loss": 0.4488, "step": 744 }, { "epoch": 0.41620111731843573, "grad_norm": 0.7421092987060547, "learning_rate": 0.000982044817927171, "loss": 0.4024, "step": 745 }, { "epoch": 0.41675977653631285, "grad_norm": 0.5338488817214966, "learning_rate": 0.0009820168067226891, "loss": 0.51, "step": 746 }, { "epoch": 0.4173184357541899, "grad_norm": 1.0539344549179077, "learning_rate": 0.0009819887955182073, "loss": 0.4863, "step": 747 }, { "epoch": 0.41787709497206704, "grad_norm": 1.0292673110961914, "learning_rate": 0.0009819607843137255, "loss": 0.6767, "step": 748 }, { "epoch": 0.4184357541899441, "grad_norm": 0.8841199278831482, "learning_rate": 0.0009819327731092438, "loss": 0.5283, "step": 749 }, { "epoch": 0.41899441340782123, "grad_norm": 0.4805144965648651, "learning_rate": 0.000981904761904762, "loss": 0.5039, "step": 750 }, { "epoch": 0.4195530726256983, "grad_norm": 0.49522754549980164, "learning_rate": 0.0009818767507002802, "loss": 0.4332, "step": 751 }, { "epoch": 0.4201117318435754, "grad_norm": 1.9347999095916748, "learning_rate": 0.0009818487394957984, "loss": 0.6183, "step": 752 }, { "epoch": 0.4206703910614525, "grad_norm": 0.7999465465545654, "learning_rate": 0.0009818207282913166, "loss": 0.552, "step": 753 }, { "epoch": 0.4212290502793296, "grad_norm": 0.5772879123687744, "learning_rate": 0.0009817927170868348, "loss": 0.4982, "step": 754 }, { "epoch": 0.42178770949720673, "grad_norm": 0.4552770256996155, "learning_rate": 0.000981764705882353, "loss": 0.4604, "step": 755 }, { "epoch": 0.4223463687150838, "grad_norm": 0.6333035230636597, "learning_rate": 0.0009817366946778712, "loss": 0.5294, "step": 756 }, { "epoch": 0.4229050279329609, "grad_norm": 0.5289617776870728, "learning_rate": 0.0009817086834733894, "loss": 0.5121, "step": 757 }, { "epoch": 0.423463687150838, "grad_norm": 0.9566251039505005, "learning_rate": 0.0009816806722689076, "loss": 0.5332, "step": 758 }, { "epoch": 0.4240223463687151, "grad_norm": 0.9983794093132019, "learning_rate": 0.0009816526610644258, "loss": 0.4375, "step": 759 }, { "epoch": 0.4245810055865922, "grad_norm": 0.8549471497535706, "learning_rate": 0.000981624649859944, "loss": 0.4418, "step": 760 }, { "epoch": 0.4251396648044693, "grad_norm": 2.0585970878601074, "learning_rate": 0.0009815966386554622, "loss": 0.6453, "step": 761 }, { "epoch": 0.42569832402234636, "grad_norm": 0.5526566505432129, "learning_rate": 0.0009815686274509804, "loss": 0.4516, "step": 762 }, { "epoch": 0.4262569832402235, "grad_norm": 0.6749558448791504, "learning_rate": 0.0009815406162464986, "loss": 0.4908, "step": 763 }, { "epoch": 0.42681564245810055, "grad_norm": 1.370712399482727, "learning_rate": 0.0009815126050420168, "loss": 0.5092, "step": 764 }, { "epoch": 0.4273743016759777, "grad_norm": 0.7793422341346741, "learning_rate": 0.000981484593837535, "loss": 0.5939, "step": 765 }, { "epoch": 0.42793296089385474, "grad_norm": 0.5176940560340881, "learning_rate": 0.0009814565826330533, "loss": 0.4488, "step": 766 }, { "epoch": 0.42849162011173186, "grad_norm": 1.0649203062057495, "learning_rate": 0.0009814285714285715, "loss": 0.619, "step": 767 }, { "epoch": 0.42905027932960893, "grad_norm": 0.6778590083122253, "learning_rate": 0.0009814005602240897, "loss": 0.4603, "step": 768 }, { "epoch": 0.42960893854748605, "grad_norm": 0.9636827707290649, "learning_rate": 0.0009813725490196079, "loss": 0.5105, "step": 769 }, { "epoch": 0.4301675977653631, "grad_norm": 0.5597183108329773, "learning_rate": 0.000981344537815126, "loss": 0.4559, "step": 770 }, { "epoch": 0.43072625698324024, "grad_norm": 0.5207726955413818, "learning_rate": 0.0009813165266106443, "loss": 0.4505, "step": 771 }, { "epoch": 0.4312849162011173, "grad_norm": 0.6174609065055847, "learning_rate": 0.0009812885154061625, "loss": 0.4889, "step": 772 }, { "epoch": 0.43184357541899443, "grad_norm": 0.5063725113868713, "learning_rate": 0.0009812605042016807, "loss": 0.4667, "step": 773 }, { "epoch": 0.4324022346368715, "grad_norm": 1.2749954462051392, "learning_rate": 0.000981232492997199, "loss": 0.4954, "step": 774 }, { "epoch": 0.4329608938547486, "grad_norm": 0.6341150403022766, "learning_rate": 0.0009812044817927171, "loss": 0.5442, "step": 775 }, { "epoch": 0.4335195530726257, "grad_norm": 0.7182409167289734, "learning_rate": 0.0009811764705882353, "loss": 0.5457, "step": 776 }, { "epoch": 0.4340782122905028, "grad_norm": 0.7968026399612427, "learning_rate": 0.0009811484593837535, "loss": 0.5734, "step": 777 }, { "epoch": 0.4346368715083799, "grad_norm": 0.4441359341144562, "learning_rate": 0.0009811204481792717, "loss": 0.5043, "step": 778 }, { "epoch": 0.435195530726257, "grad_norm": 0.48550713062286377, "learning_rate": 0.00098109243697479, "loss": 0.4485, "step": 779 }, { "epoch": 0.43575418994413406, "grad_norm": 0.7723861336708069, "learning_rate": 0.0009810644257703081, "loss": 0.4585, "step": 780 }, { "epoch": 0.4363128491620112, "grad_norm": 1.5276738405227661, "learning_rate": 0.0009810364145658263, "loss": 0.453, "step": 781 }, { "epoch": 0.43687150837988825, "grad_norm": 0.5024844408035278, "learning_rate": 0.0009810084033613446, "loss": 0.4935, "step": 782 }, { "epoch": 0.4374301675977654, "grad_norm": 0.5297918915748596, "learning_rate": 0.0009809803921568628, "loss": 0.4632, "step": 783 }, { "epoch": 0.43798882681564244, "grad_norm": 0.6294731497764587, "learning_rate": 0.000980952380952381, "loss": 0.5002, "step": 784 }, { "epoch": 0.43854748603351956, "grad_norm": 3.596766710281372, "learning_rate": 0.0009809243697478992, "loss": 0.6663, "step": 785 }, { "epoch": 0.43910614525139663, "grad_norm": 0.6239421367645264, "learning_rate": 0.0009808963585434174, "loss": 0.5175, "step": 786 }, { "epoch": 0.43966480446927375, "grad_norm": 0.9190245866775513, "learning_rate": 0.0009808683473389356, "loss": 0.4805, "step": 787 }, { "epoch": 0.4402234636871508, "grad_norm": 1.2294410467147827, "learning_rate": 0.0009808403361344538, "loss": 0.5014, "step": 788 }, { "epoch": 0.44078212290502794, "grad_norm": 1.0701860189437866, "learning_rate": 0.000980812324929972, "loss": 0.4388, "step": 789 }, { "epoch": 0.441340782122905, "grad_norm": 0.6473264694213867, "learning_rate": 0.0009807843137254902, "loss": 0.5691, "step": 790 }, { "epoch": 0.44189944134078213, "grad_norm": 0.5692540407180786, "learning_rate": 0.0009807563025210084, "loss": 0.5383, "step": 791 }, { "epoch": 0.4424581005586592, "grad_norm": 0.49326780438423157, "learning_rate": 0.0009807282913165266, "loss": 0.5117, "step": 792 }, { "epoch": 0.4430167597765363, "grad_norm": 0.5171276330947876, "learning_rate": 0.0009807002801120448, "loss": 0.4677, "step": 793 }, { "epoch": 0.4435754189944134, "grad_norm": 1.14442777633667, "learning_rate": 0.000980672268907563, "loss": 0.5585, "step": 794 }, { "epoch": 0.4441340782122905, "grad_norm": 0.7754183411598206, "learning_rate": 0.0009806442577030812, "loss": 0.6227, "step": 795 }, { "epoch": 0.4446927374301676, "grad_norm": 0.7203547358512878, "learning_rate": 0.0009806162464985994, "loss": 0.5006, "step": 796 }, { "epoch": 0.4452513966480447, "grad_norm": 1.2330557107925415, "learning_rate": 0.0009805882352941176, "loss": 0.5251, "step": 797 }, { "epoch": 0.44581005586592176, "grad_norm": 0.7639957666397095, "learning_rate": 0.0009805602240896358, "loss": 0.4819, "step": 798 }, { "epoch": 0.4463687150837989, "grad_norm": 0.7057264447212219, "learning_rate": 0.000980532212885154, "loss": 0.3599, "step": 799 }, { "epoch": 0.44692737430167595, "grad_norm": 2.596640110015869, "learning_rate": 0.0009805042016806723, "loss": 0.5319, "step": 800 }, { "epoch": 0.4474860335195531, "grad_norm": 0.5031240582466125, "learning_rate": 0.0009804761904761905, "loss": 0.4199, "step": 801 }, { "epoch": 0.4480446927374302, "grad_norm": 0.6316196322441101, "learning_rate": 0.0009804481792717087, "loss": 0.5212, "step": 802 }, { "epoch": 0.44860335195530726, "grad_norm": 5.0885090827941895, "learning_rate": 0.0009804201680672269, "loss": 0.4256, "step": 803 }, { "epoch": 0.4491620111731844, "grad_norm": 2.0129776000976562, "learning_rate": 0.000980392156862745, "loss": 0.4249, "step": 804 }, { "epoch": 0.44972067039106145, "grad_norm": 6.43217134475708, "learning_rate": 0.0009803641456582633, "loss": 0.5374, "step": 805 }, { "epoch": 0.45027932960893857, "grad_norm": 0.7426183819770813, "learning_rate": 0.0009803361344537815, "loss": 0.5464, "step": 806 }, { "epoch": 0.45083798882681564, "grad_norm": 1.1538949012756348, "learning_rate": 0.0009803081232492997, "loss": 0.5922, "step": 807 }, { "epoch": 0.45139664804469276, "grad_norm": 0.7717742919921875, "learning_rate": 0.000980280112044818, "loss": 0.4813, "step": 808 }, { "epoch": 0.4519553072625698, "grad_norm": 1.0451397895812988, "learning_rate": 0.0009802521008403361, "loss": 0.5399, "step": 809 }, { "epoch": 0.45251396648044695, "grad_norm": 2.155149221420288, "learning_rate": 0.0009802240896358543, "loss": 0.603, "step": 810 }, { "epoch": 0.453072625698324, "grad_norm": 0.6225429177284241, "learning_rate": 0.0009801960784313725, "loss": 0.4949, "step": 811 }, { "epoch": 0.45363128491620114, "grad_norm": 0.8359684944152832, "learning_rate": 0.0009801680672268907, "loss": 0.4687, "step": 812 }, { "epoch": 0.4541899441340782, "grad_norm": 2.504591703414917, "learning_rate": 0.000980140056022409, "loss": 0.4975, "step": 813 }, { "epoch": 0.4547486033519553, "grad_norm": 1.0969488620758057, "learning_rate": 0.0009801120448179271, "loss": 0.4135, "step": 814 }, { "epoch": 0.4553072625698324, "grad_norm": 0.8878272175788879, "learning_rate": 0.0009800840336134454, "loss": 0.5093, "step": 815 }, { "epoch": 0.4558659217877095, "grad_norm": 1.4094451665878296, "learning_rate": 0.0009800560224089636, "loss": 0.536, "step": 816 }, { "epoch": 0.4564245810055866, "grad_norm": 0.8703171014785767, "learning_rate": 0.0009800280112044818, "loss": 0.4872, "step": 817 }, { "epoch": 0.4569832402234637, "grad_norm": 0.5140367150306702, "learning_rate": 0.00098, "loss": 0.5125, "step": 818 }, { "epoch": 0.45754189944134077, "grad_norm": 0.6497352123260498, "learning_rate": 0.0009799719887955182, "loss": 0.5546, "step": 819 }, { "epoch": 0.4581005586592179, "grad_norm": 0.5851194262504578, "learning_rate": 0.0009799439775910364, "loss": 0.4927, "step": 820 }, { "epoch": 0.45865921787709496, "grad_norm": 4.143260955810547, "learning_rate": 0.0009799159663865546, "loss": 0.5803, "step": 821 }, { "epoch": 0.4592178770949721, "grad_norm": 0.9179551005363464, "learning_rate": 0.0009798879551820728, "loss": 0.4332, "step": 822 }, { "epoch": 0.45977653631284915, "grad_norm": 0.5912773013114929, "learning_rate": 0.000979859943977591, "loss": 0.5745, "step": 823 }, { "epoch": 0.46033519553072627, "grad_norm": 1.2477174997329712, "learning_rate": 0.0009798319327731092, "loss": 0.4981, "step": 824 }, { "epoch": 0.46089385474860334, "grad_norm": 0.6297051906585693, "learning_rate": 0.0009798039215686276, "loss": 0.452, "step": 825 }, { "epoch": 0.46145251396648046, "grad_norm": 0.6839253306388855, "learning_rate": 0.0009797759103641456, "loss": 0.7986, "step": 826 }, { "epoch": 0.4620111731843575, "grad_norm": 0.6715407371520996, "learning_rate": 0.0009797478991596638, "loss": 0.4612, "step": 827 }, { "epoch": 0.46256983240223465, "grad_norm": 0.9140963554382324, "learning_rate": 0.000979719887955182, "loss": 0.5857, "step": 828 }, { "epoch": 0.4631284916201117, "grad_norm": 0.7106597423553467, "learning_rate": 0.0009796918767507002, "loss": 0.5063, "step": 829 }, { "epoch": 0.46368715083798884, "grad_norm": 0.7576602697372437, "learning_rate": 0.0009796638655462187, "loss": 0.5293, "step": 830 }, { "epoch": 0.4642458100558659, "grad_norm": 1.3902246952056885, "learning_rate": 0.0009796358543417366, "loss": 0.5623, "step": 831 }, { "epoch": 0.464804469273743, "grad_norm": 0.728994607925415, "learning_rate": 0.0009796078431372549, "loss": 0.4056, "step": 832 }, { "epoch": 0.4653631284916201, "grad_norm": 1.275497555732727, "learning_rate": 0.000979579831932773, "loss": 0.4133, "step": 833 }, { "epoch": 0.4659217877094972, "grad_norm": 0.6350085735321045, "learning_rate": 0.0009795518207282913, "loss": 0.6348, "step": 834 }, { "epoch": 0.4664804469273743, "grad_norm": 0.937637209892273, "learning_rate": 0.0009795238095238097, "loss": 0.6311, "step": 835 }, { "epoch": 0.4670391061452514, "grad_norm": 0.4883667826652527, "learning_rate": 0.0009794957983193277, "loss": 0.5412, "step": 836 }, { "epoch": 0.46759776536312847, "grad_norm": 3.5121424198150635, "learning_rate": 0.0009794677871148459, "loss": 0.4174, "step": 837 }, { "epoch": 0.4681564245810056, "grad_norm": 0.7006067037582397, "learning_rate": 0.000979439775910364, "loss": 0.5278, "step": 838 }, { "epoch": 0.46871508379888266, "grad_norm": 0.529995322227478, "learning_rate": 0.0009794117647058823, "loss": 0.4906, "step": 839 }, { "epoch": 0.4692737430167598, "grad_norm": 0.5436596274375916, "learning_rate": 0.0009793837535014007, "loss": 0.4689, "step": 840 }, { "epoch": 0.46983240223463685, "grad_norm": 0.8091188073158264, "learning_rate": 0.000979355742296919, "loss": 0.5458, "step": 841 }, { "epoch": 0.47039106145251397, "grad_norm": 1.0712378025054932, "learning_rate": 0.000979327731092437, "loss": 0.4206, "step": 842 }, { "epoch": 0.47094972067039104, "grad_norm": 1.2762094736099243, "learning_rate": 0.0009792997198879551, "loss": 0.4527, "step": 843 }, { "epoch": 0.47150837988826816, "grad_norm": 1.0467904806137085, "learning_rate": 0.0009792717086834733, "loss": 0.8351, "step": 844 }, { "epoch": 0.4720670391061452, "grad_norm": 0.48221683502197266, "learning_rate": 0.0009792436974789917, "loss": 0.4944, "step": 845 }, { "epoch": 0.47262569832402235, "grad_norm": 0.6988289952278137, "learning_rate": 0.00097921568627451, "loss": 0.4712, "step": 846 }, { "epoch": 0.4731843575418994, "grad_norm": 0.9647338390350342, "learning_rate": 0.000979187675070028, "loss": 0.5967, "step": 847 }, { "epoch": 0.47374301675977654, "grad_norm": 0.9094915390014648, "learning_rate": 0.0009791596638655461, "loss": 0.5468, "step": 848 }, { "epoch": 0.47430167597765366, "grad_norm": 0.6334877610206604, "learning_rate": 0.0009791316526610644, "loss": 0.4925, "step": 849 }, { "epoch": 0.4748603351955307, "grad_norm": 0.8837565183639526, "learning_rate": 0.0009791036414565828, "loss": 0.5654, "step": 850 }, { "epoch": 0.47541899441340785, "grad_norm": 1.1544402837753296, "learning_rate": 0.000979075630252101, "loss": 0.6857, "step": 851 }, { "epoch": 0.4759776536312849, "grad_norm": 0.6996060609817505, "learning_rate": 0.000979047619047619, "loss": 0.4821, "step": 852 }, { "epoch": 0.47653631284916204, "grad_norm": 1.8354023694992065, "learning_rate": 0.0009790196078431372, "loss": 0.5043, "step": 853 }, { "epoch": 0.4770949720670391, "grad_norm": 0.5960931777954102, "learning_rate": 0.0009789915966386554, "loss": 0.6045, "step": 854 }, { "epoch": 0.4776536312849162, "grad_norm": 1.124712347984314, "learning_rate": 0.0009789635854341738, "loss": 0.5422, "step": 855 }, { "epoch": 0.4782122905027933, "grad_norm": 0.5247573852539062, "learning_rate": 0.000978935574229692, "loss": 0.4228, "step": 856 }, { "epoch": 0.4787709497206704, "grad_norm": 0.5940335988998413, "learning_rate": 0.0009789075630252102, "loss": 0.4598, "step": 857 }, { "epoch": 0.4793296089385475, "grad_norm": 1.1262948513031006, "learning_rate": 0.0009788795518207282, "loss": 0.4873, "step": 858 }, { "epoch": 0.4798882681564246, "grad_norm": 0.8122714757919312, "learning_rate": 0.0009788515406162464, "loss": 0.5261, "step": 859 }, { "epoch": 0.48044692737430167, "grad_norm": 0.8120923638343811, "learning_rate": 0.0009788235294117648, "loss": 0.4576, "step": 860 }, { "epoch": 0.4810055865921788, "grad_norm": 0.6665226817131042, "learning_rate": 0.000978795518207283, "loss": 0.5516, "step": 861 }, { "epoch": 0.48156424581005586, "grad_norm": 1.4202057123184204, "learning_rate": 0.0009787675070028012, "loss": 0.6323, "step": 862 }, { "epoch": 0.482122905027933, "grad_norm": 1.0429213047027588, "learning_rate": 0.0009787394957983192, "loss": 0.4993, "step": 863 }, { "epoch": 0.48268156424581005, "grad_norm": 0.563971757888794, "learning_rate": 0.0009787114845938374, "loss": 0.4815, "step": 864 }, { "epoch": 0.48324022346368717, "grad_norm": 1.0299530029296875, "learning_rate": 0.0009786834733893559, "loss": 0.5597, "step": 865 }, { "epoch": 0.48379888268156424, "grad_norm": 1.0133742094039917, "learning_rate": 0.000978655462184874, "loss": 0.4798, "step": 866 }, { "epoch": 0.48435754189944136, "grad_norm": 0.6536348462104797, "learning_rate": 0.0009786274509803923, "loss": 0.5242, "step": 867 }, { "epoch": 0.4849162011173184, "grad_norm": 0.44831448793411255, "learning_rate": 0.0009785994397759103, "loss": 0.4123, "step": 868 }, { "epoch": 0.48547486033519555, "grad_norm": 0.6059315800666809, "learning_rate": 0.0009785714285714285, "loss": 0.5477, "step": 869 }, { "epoch": 0.4860335195530726, "grad_norm": 0.8129459619522095, "learning_rate": 0.000978543417366947, "loss": 0.425, "step": 870 }, { "epoch": 0.48659217877094973, "grad_norm": 0.7330930829048157, "learning_rate": 0.000978515406162465, "loss": 0.5031, "step": 871 }, { "epoch": 0.4871508379888268, "grad_norm": 1.3822016716003418, "learning_rate": 0.0009784873949579833, "loss": 0.6426, "step": 872 }, { "epoch": 0.4877094972067039, "grad_norm": 0.629173755645752, "learning_rate": 0.0009784593837535015, "loss": 0.5425, "step": 873 }, { "epoch": 0.488268156424581, "grad_norm": 0.7746703028678894, "learning_rate": 0.0009784313725490195, "loss": 0.6409, "step": 874 }, { "epoch": 0.4888268156424581, "grad_norm": 0.7017098069190979, "learning_rate": 0.000978403361344538, "loss": 0.5994, "step": 875 }, { "epoch": 0.4893854748603352, "grad_norm": 1.6050899028778076, "learning_rate": 0.0009783753501400561, "loss": 0.5533, "step": 876 }, { "epoch": 0.4899441340782123, "grad_norm": 0.821544349193573, "learning_rate": 0.0009783473389355743, "loss": 0.5541, "step": 877 }, { "epoch": 0.49050279329608937, "grad_norm": 1.6707091331481934, "learning_rate": 0.0009783193277310925, "loss": 0.5065, "step": 878 }, { "epoch": 0.4910614525139665, "grad_norm": 0.6373299956321716, "learning_rate": 0.0009782913165266105, "loss": 0.464, "step": 879 }, { "epoch": 0.49162011173184356, "grad_norm": 0.7731879353523254, "learning_rate": 0.000978263305322129, "loss": 0.4619, "step": 880 }, { "epoch": 0.4921787709497207, "grad_norm": Infinity, "learning_rate": 0.000978263305322129, "loss": 0.521, "step": 881 }, { "epoch": 0.49273743016759775, "grad_norm": 1.804014801979065, "learning_rate": 0.0009782352941176472, "loss": 0.4391, "step": 882 }, { "epoch": 0.49329608938547487, "grad_norm": 0.5658578872680664, "learning_rate": 0.0009782072829131654, "loss": 0.4609, "step": 883 }, { "epoch": 0.49385474860335193, "grad_norm": 1.8259243965148926, "learning_rate": 0.0009781792717086836, "loss": 0.4189, "step": 884 }, { "epoch": 0.49441340782122906, "grad_norm": 1.3172889947891235, "learning_rate": 0.0009781512605042016, "loss": 0.4807, "step": 885 }, { "epoch": 0.4949720670391061, "grad_norm": 1.0069423913955688, "learning_rate": 0.0009781232492997198, "loss": 0.4293, "step": 886 }, { "epoch": 0.49553072625698324, "grad_norm": 0.9529631733894348, "learning_rate": 0.0009780952380952382, "loss": 0.523, "step": 887 }, { "epoch": 0.4960893854748603, "grad_norm": 21.41440200805664, "learning_rate": 0.0009780672268907564, "loss": 0.5485, "step": 888 }, { "epoch": 0.49664804469273743, "grad_norm": 0.8933480381965637, "learning_rate": 0.0009780392156862746, "loss": 0.5737, "step": 889 }, { "epoch": 0.4972067039106145, "grad_norm": 0.5926549434661865, "learning_rate": 0.0009780112044817928, "loss": 0.4538, "step": 890 }, { "epoch": 0.4977653631284916, "grad_norm": 0.7941659092903137, "learning_rate": 0.0009779831932773108, "loss": 0.5334, "step": 891 }, { "epoch": 0.4983240223463687, "grad_norm": 0.679101824760437, "learning_rate": 0.0009779551820728292, "loss": 0.4683, "step": 892 }, { "epoch": 0.4988826815642458, "grad_norm": 0.5870710015296936, "learning_rate": 0.0009779271708683474, "loss": 0.4707, "step": 893 }, { "epoch": 0.4994413407821229, "grad_norm": 0.4131081998348236, "learning_rate": 0.0009778991596638656, "loss": 0.4766, "step": 894 }, { "epoch": 0.5, "grad_norm": 0.7532033324241638, "learning_rate": 0.0009778711484593838, "loss": 0.5604, "step": 895 }, { "epoch": 0.5005586592178771, "grad_norm": 0.5894113779067993, "learning_rate": 0.0009778431372549018, "loss": 0.556, "step": 896 }, { "epoch": 0.5011173184357542, "grad_norm": 0.6159910559654236, "learning_rate": 0.0009778151260504203, "loss": 0.7493, "step": 897 }, { "epoch": 0.5016759776536313, "grad_norm": 0.645681619644165, "learning_rate": 0.0009777871148459385, "loss": 0.4461, "step": 898 }, { "epoch": 0.5022346368715084, "grad_norm": 0.7601158618927002, "learning_rate": 0.0009777591036414567, "loss": 0.5558, "step": 899 }, { "epoch": 0.5027932960893855, "grad_norm": 1.1194024085998535, "learning_rate": 0.0009777310924369749, "loss": 0.5576, "step": 900 }, { "epoch": 0.5033519553072626, "grad_norm": 2.758066177368164, "learning_rate": 0.0009777030812324929, "loss": 0.5647, "step": 901 }, { "epoch": 0.5039106145251396, "grad_norm": 0.9305416345596313, "learning_rate": 0.0009776750700280113, "loss": 0.6004, "step": 902 }, { "epoch": 0.5044692737430168, "grad_norm": 0.782402753829956, "learning_rate": 0.0009776470588235295, "loss": 0.4926, "step": 903 }, { "epoch": 0.5050279329608939, "grad_norm": 3.9130053520202637, "learning_rate": 0.0009776190476190477, "loss": 0.4887, "step": 904 }, { "epoch": 0.505586592178771, "grad_norm": 0.9924647808074951, "learning_rate": 0.000977591036414566, "loss": 0.5809, "step": 905 }, { "epoch": 0.506145251396648, "grad_norm": 0.9683765172958374, "learning_rate": 0.000977563025210084, "loss": 0.6797, "step": 906 }, { "epoch": 0.5067039106145251, "grad_norm": 0.656795859336853, "learning_rate": 0.0009775350140056023, "loss": 0.5957, "step": 907 }, { "epoch": 0.5072625698324023, "grad_norm": 0.836251974105835, "learning_rate": 0.0009775070028011205, "loss": 0.5127, "step": 908 }, { "epoch": 0.5078212290502794, "grad_norm": 0.6780369877815247, "learning_rate": 0.0009774789915966387, "loss": 0.5403, "step": 909 }, { "epoch": 0.5083798882681564, "grad_norm": 0.9531988501548767, "learning_rate": 0.000977450980392157, "loss": 0.4681, "step": 910 }, { "epoch": 0.5089385474860335, "grad_norm": 0.8000809550285339, "learning_rate": 0.0009774229691876751, "loss": 0.5315, "step": 911 }, { "epoch": 0.5094972067039106, "grad_norm": 0.7712262272834778, "learning_rate": 0.0009773949579831933, "loss": 0.5366, "step": 912 }, { "epoch": 0.5100558659217878, "grad_norm": 1.2008029222488403, "learning_rate": 0.0009773669467787115, "loss": 0.6025, "step": 913 }, { "epoch": 0.5106145251396648, "grad_norm": 0.5094325542449951, "learning_rate": 0.0009773389355742298, "loss": 0.484, "step": 914 }, { "epoch": 0.5111731843575419, "grad_norm": 0.6349051594734192, "learning_rate": 0.000977310924369748, "loss": 0.5264, "step": 915 }, { "epoch": 0.511731843575419, "grad_norm": 1.3723305463790894, "learning_rate": 0.0009772829131652662, "loss": 0.3958, "step": 916 }, { "epoch": 0.5122905027932961, "grad_norm": 0.5602142214775085, "learning_rate": 0.0009772549019607844, "loss": 0.6107, "step": 917 }, { "epoch": 0.5128491620111731, "grad_norm": 0.6605849862098694, "learning_rate": 0.0009772268907563026, "loss": 0.4906, "step": 918 }, { "epoch": 0.5134078212290503, "grad_norm": 0.6099862456321716, "learning_rate": 0.0009771988795518208, "loss": 0.4238, "step": 919 }, { "epoch": 0.5139664804469274, "grad_norm": 0.7553727030754089, "learning_rate": 0.000977170868347339, "loss": 0.4375, "step": 920 }, { "epoch": 0.5145251396648045, "grad_norm": 1.2150839567184448, "learning_rate": 0.0009771428571428572, "loss": 0.5048, "step": 921 }, { "epoch": 0.5150837988826815, "grad_norm": 0.5379416346549988, "learning_rate": 0.0009771148459383754, "loss": 0.5527, "step": 922 }, { "epoch": 0.5156424581005586, "grad_norm": 0.6172040104866028, "learning_rate": 0.0009770868347338936, "loss": 0.5659, "step": 923 }, { "epoch": 0.5162011173184358, "grad_norm": 1.0052822828292847, "learning_rate": 0.0009770588235294118, "loss": 0.5649, "step": 924 }, { "epoch": 0.5167597765363129, "grad_norm": 0.7726624011993408, "learning_rate": 0.00097703081232493, "loss": 0.4821, "step": 925 }, { "epoch": 0.5173184357541899, "grad_norm": 0.6800291538238525, "learning_rate": 0.0009770028011204482, "loss": 0.6569, "step": 926 }, { "epoch": 0.517877094972067, "grad_norm": 1.327609896659851, "learning_rate": 0.0009769747899159664, "loss": 0.5363, "step": 927 }, { "epoch": 0.5184357541899441, "grad_norm": 1.0891976356506348, "learning_rate": 0.0009769467787114846, "loss": 0.495, "step": 928 }, { "epoch": 0.5189944134078213, "grad_norm": 0.6245486736297607, "learning_rate": 0.0009769187675070028, "loss": 0.5165, "step": 929 }, { "epoch": 0.5195530726256983, "grad_norm": 0.49972960352897644, "learning_rate": 0.000976890756302521, "loss": 0.4633, "step": 930 }, { "epoch": 0.5201117318435754, "grad_norm": 2.4194657802581787, "learning_rate": 0.0009768627450980393, "loss": 0.4955, "step": 931 }, { "epoch": 0.5206703910614525, "grad_norm": 0.6558769941329956, "learning_rate": 0.0009768347338935575, "loss": 0.509, "step": 932 }, { "epoch": 0.5212290502793296, "grad_norm": 0.9265739917755127, "learning_rate": 0.0009768067226890757, "loss": 0.5005, "step": 933 }, { "epoch": 0.5217877094972067, "grad_norm": 0.5933675169944763, "learning_rate": 0.0009767787114845939, "loss": 0.5826, "step": 934 }, { "epoch": 0.5223463687150838, "grad_norm": 0.7285407781600952, "learning_rate": 0.000976750700280112, "loss": 0.4265, "step": 935 }, { "epoch": 0.5229050279329609, "grad_norm": 0.7411402463912964, "learning_rate": 0.0009767226890756303, "loss": 0.6028, "step": 936 }, { "epoch": 0.523463687150838, "grad_norm": 0.5636767148971558, "learning_rate": 0.0009766946778711485, "loss": 0.4115, "step": 937 }, { "epoch": 0.524022346368715, "grad_norm": 0.7952788472175598, "learning_rate": 0.0009766666666666667, "loss": 0.8305, "step": 938 }, { "epoch": 0.5245810055865922, "grad_norm": 0.5462428331375122, "learning_rate": 0.000976638655462185, "loss": 0.3768, "step": 939 }, { "epoch": 0.5251396648044693, "grad_norm": 0.684895396232605, "learning_rate": 0.0009766106442577031, "loss": 0.607, "step": 940 }, { "epoch": 0.5256983240223464, "grad_norm": 0.6716300845146179, "learning_rate": 0.0009765826330532213, "loss": 0.4181, "step": 941 }, { "epoch": 0.5262569832402234, "grad_norm": 0.7308188080787659, "learning_rate": 0.0009765546218487395, "loss": 0.5381, "step": 942 }, { "epoch": 0.5268156424581005, "grad_norm": 1.2455106973648071, "learning_rate": 0.0009765266106442577, "loss": 0.6004, "step": 943 }, { "epoch": 0.5273743016759777, "grad_norm": 0.7769612669944763, "learning_rate": 0.0009764985994397759, "loss": 0.41, "step": 944 }, { "epoch": 0.5279329608938548, "grad_norm": 1.355358362197876, "learning_rate": 0.0009764705882352941, "loss": 0.5135, "step": 945 }, { "epoch": 0.5284916201117319, "grad_norm": 0.7444446682929993, "learning_rate": 0.0009764425770308123, "loss": 0.5822, "step": 946 }, { "epoch": 0.5290502793296089, "grad_norm": 0.6030605435371399, "learning_rate": 0.0009764145658263306, "loss": 0.4447, "step": 947 }, { "epoch": 0.529608938547486, "grad_norm": 0.6857961416244507, "learning_rate": 0.0009763865546218488, "loss": 0.5858, "step": 948 }, { "epoch": 0.5301675977653632, "grad_norm": 1.5943262577056885, "learning_rate": 0.000976358543417367, "loss": 0.4036, "step": 949 }, { "epoch": 0.5307262569832403, "grad_norm": 0.6407567858695984, "learning_rate": 0.0009763305322128852, "loss": 0.4712, "step": 950 }, { "epoch": 0.5312849162011173, "grad_norm": 0.4918668270111084, "learning_rate": 0.0009763025210084034, "loss": 0.4965, "step": 951 }, { "epoch": 0.5318435754189944, "grad_norm": 2.872591018676758, "learning_rate": 0.0009762745098039216, "loss": 0.7254, "step": 952 }, { "epoch": 0.5324022346368715, "grad_norm": 0.8626722097396851, "learning_rate": 0.0009762464985994398, "loss": 0.5013, "step": 953 }, { "epoch": 0.5329608938547487, "grad_norm": 0.8280640244483948, "learning_rate": 0.000976218487394958, "loss": 0.4996, "step": 954 }, { "epoch": 0.5335195530726257, "grad_norm": 0.5562185049057007, "learning_rate": 0.0009761904761904762, "loss": 0.4468, "step": 955 }, { "epoch": 0.5340782122905028, "grad_norm": 0.8575215935707092, "learning_rate": 0.0009761624649859944, "loss": 0.5449, "step": 956 }, { "epoch": 0.5346368715083799, "grad_norm": 0.6176499128341675, "learning_rate": 0.0009761344537815126, "loss": 0.5362, "step": 957 }, { "epoch": 0.535195530726257, "grad_norm": 1.6306699514389038, "learning_rate": 0.0009761064425770308, "loss": 0.6004, "step": 958 }, { "epoch": 0.535754189944134, "grad_norm": 0.8023949861526489, "learning_rate": 0.000976078431372549, "loss": 0.5008, "step": 959 }, { "epoch": 0.5363128491620112, "grad_norm": 0.5992342233657837, "learning_rate": 0.0009760504201680672, "loss": 0.4655, "step": 960 }, { "epoch": 0.5368715083798883, "grad_norm": 0.8887758255004883, "learning_rate": 0.0009760224089635854, "loss": 0.5942, "step": 961 }, { "epoch": 0.5374301675977654, "grad_norm": 0.6822399497032166, "learning_rate": 0.0009759943977591038, "loss": 0.7113, "step": 962 }, { "epoch": 0.5379888268156424, "grad_norm": 0.43152928352355957, "learning_rate": 0.0009759663865546218, "loss": 0.5528, "step": 963 }, { "epoch": 0.5385474860335195, "grad_norm": 0.5266768336296082, "learning_rate": 0.0009759383753501401, "loss": 0.5415, "step": 964 }, { "epoch": 0.5391061452513967, "grad_norm": 0.5582186579704285, "learning_rate": 0.0009759103641456583, "loss": 0.4896, "step": 965 }, { "epoch": 0.5396648044692738, "grad_norm": 0.5387858748435974, "learning_rate": 0.0009758823529411765, "loss": 0.4501, "step": 966 }, { "epoch": 0.5402234636871508, "grad_norm": 0.564531683921814, "learning_rate": 0.0009758543417366948, "loss": 0.453, "step": 967 }, { "epoch": 0.5407821229050279, "grad_norm": 0.6433477401733398, "learning_rate": 0.0009758263305322129, "loss": 0.432, "step": 968 }, { "epoch": 0.541340782122905, "grad_norm": 0.7274858355522156, "learning_rate": 0.0009757983193277311, "loss": 0.7383, "step": 969 }, { "epoch": 0.5418994413407822, "grad_norm": 1.164543628692627, "learning_rate": 0.0009757703081232493, "loss": 0.5345, "step": 970 }, { "epoch": 0.5424581005586592, "grad_norm": 0.8189229369163513, "learning_rate": 0.0009757422969187675, "loss": 0.6865, "step": 971 }, { "epoch": 0.5430167597765363, "grad_norm": 0.8637515306472778, "learning_rate": 0.0009757142857142858, "loss": 0.442, "step": 972 }, { "epoch": 0.5435754189944134, "grad_norm": 1.665154218673706, "learning_rate": 0.0009756862745098039, "loss": 0.5287, "step": 973 }, { "epoch": 0.5441340782122905, "grad_norm": 2.0094218254089355, "learning_rate": 0.0009756582633053221, "loss": 0.5974, "step": 974 }, { "epoch": 0.5446927374301676, "grad_norm": 0.6737040281295776, "learning_rate": 0.0009756302521008403, "loss": 0.4898, "step": 975 }, { "epoch": 0.5452513966480447, "grad_norm": 0.8267460465431213, "learning_rate": 0.0009756022408963585, "loss": 0.6025, "step": 976 }, { "epoch": 0.5458100558659218, "grad_norm": 1.0673226118087769, "learning_rate": 0.0009755742296918768, "loss": 0.4838, "step": 977 }, { "epoch": 0.5463687150837989, "grad_norm": 0.9449355006217957, "learning_rate": 0.000975546218487395, "loss": 0.5025, "step": 978 }, { "epoch": 0.5469273743016759, "grad_norm": 0.8984672427177429, "learning_rate": 0.0009755182072829131, "loss": 0.5473, "step": 979 }, { "epoch": 0.547486033519553, "grad_norm": 1.1542268991470337, "learning_rate": 0.0009754901960784314, "loss": 0.5518, "step": 980 }, { "epoch": 0.5480446927374302, "grad_norm": 0.7877004742622375, "learning_rate": 0.0009754621848739496, "loss": 0.574, "step": 981 }, { "epoch": 0.5486033519553073, "grad_norm": 1.4613877534866333, "learning_rate": 0.0009754341736694679, "loss": 0.6451, "step": 982 }, { "epoch": 0.5491620111731843, "grad_norm": 0.9373902082443237, "learning_rate": 0.0009754061624649861, "loss": 0.5687, "step": 983 }, { "epoch": 0.5497206703910614, "grad_norm": 0.8266605734825134, "learning_rate": 0.0009753781512605042, "loss": 0.5074, "step": 984 }, { "epoch": 0.5502793296089385, "grad_norm": 0.8974220752716064, "learning_rate": 0.0009753501400560224, "loss": 0.5967, "step": 985 }, { "epoch": 0.5508379888268157, "grad_norm": 4.773686408996582, "learning_rate": 0.0009753221288515406, "loss": 0.4411, "step": 986 }, { "epoch": 0.5513966480446927, "grad_norm": 0.6652644872665405, "learning_rate": 0.0009752941176470589, "loss": 0.5025, "step": 987 }, { "epoch": 0.5519553072625698, "grad_norm": 1.3818681240081787, "learning_rate": 0.0009752661064425771, "loss": 0.5055, "step": 988 }, { "epoch": 0.5525139664804469, "grad_norm": 0.9579857587814331, "learning_rate": 0.0009752380952380952, "loss": 0.4816, "step": 989 }, { "epoch": 0.553072625698324, "grad_norm": 0.6119722127914429, "learning_rate": 0.0009752100840336134, "loss": 0.5603, "step": 990 }, { "epoch": 0.5536312849162012, "grad_norm": 0.7044486403465271, "learning_rate": 0.0009751820728291316, "loss": 0.4872, "step": 991 }, { "epoch": 0.5541899441340782, "grad_norm": 1.1273928880691528, "learning_rate": 0.0009751540616246499, "loss": 0.5578, "step": 992 }, { "epoch": 0.5547486033519553, "grad_norm": 0.8864875435829163, "learning_rate": 0.0009751260504201681, "loss": 0.4983, "step": 993 }, { "epoch": 0.5553072625698324, "grad_norm": 0.8385987877845764, "learning_rate": 0.0009750980392156863, "loss": 0.6813, "step": 994 }, { "epoch": 0.5558659217877095, "grad_norm": 3.663464307785034, "learning_rate": 0.0009750700280112044, "loss": 0.4018, "step": 995 }, { "epoch": 0.5564245810055866, "grad_norm": 0.8634306788444519, "learning_rate": 0.0009750420168067226, "loss": 0.4952, "step": 996 }, { "epoch": 0.5569832402234637, "grad_norm": 2.8100054264068604, "learning_rate": 0.000975014005602241, "loss": 0.3986, "step": 997 }, { "epoch": 0.5575418994413408, "grad_norm": 1.1263238191604614, "learning_rate": 0.0009749859943977592, "loss": 0.4929, "step": 998 }, { "epoch": 0.5581005586592179, "grad_norm": 0.7554044127464294, "learning_rate": 0.0009749579831932774, "loss": 0.7032, "step": 999 }, { "epoch": 0.5586592178770949, "grad_norm": 0.6761658787727356, "learning_rate": 0.0009749299719887955, "loss": 0.5478, "step": 1000 }, { "epoch": 0.5586592178770949, "eval_cer": 0.10311173665892005, "eval_loss": 0.38809409737586975, "eval_runtime": 55.6357, "eval_samples_per_second": 81.566, "eval_steps_per_second": 5.105, "eval_wer": 0.4025027233876149, "step": 1000 }, { "epoch": 0.5592178770949721, "grad_norm": 0.8933016657829285, "learning_rate": 0.0009749019607843137, "loss": 0.5917, "step": 1001 }, { "epoch": 0.5597765363128492, "grad_norm": 0.6337428092956543, "learning_rate": 0.000974873949579832, "loss": 0.4803, "step": 1002 }, { "epoch": 0.5603351955307263, "grad_norm": 0.7888473868370056, "learning_rate": 0.0009748459383753502, "loss": 0.548, "step": 1003 }, { "epoch": 0.5608938547486033, "grad_norm": 0.9701579809188843, "learning_rate": 0.0009748179271708684, "loss": 0.4508, "step": 1004 }, { "epoch": 0.5614525139664804, "grad_norm": 0.886437177658081, "learning_rate": 0.0009747899159663865, "loss": 0.5356, "step": 1005 }, { "epoch": 0.5620111731843576, "grad_norm": 0.6148294806480408, "learning_rate": 0.0009747619047619047, "loss": 0.5619, "step": 1006 }, { "epoch": 0.5625698324022347, "grad_norm": 0.5675657987594604, "learning_rate": 0.000974733893557423, "loss": 0.4057, "step": 1007 }, { "epoch": 0.5631284916201117, "grad_norm": 0.6281993389129639, "learning_rate": 0.0009747058823529412, "loss": 0.6571, "step": 1008 }, { "epoch": 0.5636871508379888, "grad_norm": 0.7165383696556091, "learning_rate": 0.0009746778711484594, "loss": 0.5533, "step": 1009 }, { "epoch": 0.5642458100558659, "grad_norm": 0.6096805334091187, "learning_rate": 0.0009746498599439776, "loss": 0.4701, "step": 1010 }, { "epoch": 0.5648044692737431, "grad_norm": 0.7585029602050781, "learning_rate": 0.0009746218487394957, "loss": 0.5767, "step": 1011 }, { "epoch": 0.5653631284916201, "grad_norm": 0.7557053565979004, "learning_rate": 0.000974593837535014, "loss": 0.5908, "step": 1012 }, { "epoch": 0.5659217877094972, "grad_norm": 0.555161714553833, "learning_rate": 0.0009745658263305323, "loss": 0.5017, "step": 1013 }, { "epoch": 0.5664804469273743, "grad_norm": 1.2929558753967285, "learning_rate": 0.0009745378151260505, "loss": 0.5627, "step": 1014 }, { "epoch": 0.5670391061452514, "grad_norm": 0.5683190822601318, "learning_rate": 0.0009745098039215687, "loss": 0.571, "step": 1015 }, { "epoch": 0.5675977653631284, "grad_norm": 0.6255306601524353, "learning_rate": 0.0009744817927170868, "loss": 0.5816, "step": 1016 }, { "epoch": 0.5681564245810056, "grad_norm": 0.9703459143638611, "learning_rate": 0.0009744537815126051, "loss": 0.5338, "step": 1017 }, { "epoch": 0.5687150837988827, "grad_norm": 0.8128557205200195, "learning_rate": 0.0009744257703081233, "loss": 0.4785, "step": 1018 }, { "epoch": 0.5692737430167598, "grad_norm": 0.5669181942939758, "learning_rate": 0.0009743977591036415, "loss": 0.4606, "step": 1019 }, { "epoch": 0.5698324022346368, "grad_norm": 0.5641766786575317, "learning_rate": 0.0009743697478991597, "loss": 0.4018, "step": 1020 }, { "epoch": 0.570391061452514, "grad_norm": 0.7533718347549438, "learning_rate": 0.0009743417366946778, "loss": 0.6241, "step": 1021 }, { "epoch": 0.5709497206703911, "grad_norm": 1.3725582361221313, "learning_rate": 0.0009743137254901961, "loss": 0.4687, "step": 1022 }, { "epoch": 0.5715083798882682, "grad_norm": 0.7779861688613892, "learning_rate": 0.0009742857142857143, "loss": 0.4283, "step": 1023 }, { "epoch": 0.5720670391061452, "grad_norm": 1.0402158498764038, "learning_rate": 0.0009742577030812325, "loss": 0.6428, "step": 1024 }, { "epoch": 0.5726256983240223, "grad_norm": 1.1067811250686646, "learning_rate": 0.0009742296918767507, "loss": 0.5387, "step": 1025 }, { "epoch": 0.5731843575418994, "grad_norm": 0.5275371074676514, "learning_rate": 0.0009742016806722689, "loss": 0.5494, "step": 1026 }, { "epoch": 0.5737430167597766, "grad_norm": 0.7539685368537903, "learning_rate": 0.0009741736694677871, "loss": 0.4766, "step": 1027 }, { "epoch": 0.5743016759776536, "grad_norm": 0.6288818120956421, "learning_rate": 0.0009741456582633053, "loss": 0.4094, "step": 1028 }, { "epoch": 0.5748603351955307, "grad_norm": 1.1248012781143188, "learning_rate": 0.0009741176470588236, "loss": 0.5811, "step": 1029 }, { "epoch": 0.5754189944134078, "grad_norm": 0.6503866910934448, "learning_rate": 0.0009740896358543418, "loss": 0.4154, "step": 1030 }, { "epoch": 0.575977653631285, "grad_norm": 0.7325912117958069, "learning_rate": 0.00097406162464986, "loss": 0.5496, "step": 1031 }, { "epoch": 0.576536312849162, "grad_norm": 1.188781976699829, "learning_rate": 0.0009740336134453782, "loss": 0.4322, "step": 1032 }, { "epoch": 0.5770949720670391, "grad_norm": 0.8566517233848572, "learning_rate": 0.0009740056022408964, "loss": 0.4415, "step": 1033 }, { "epoch": 0.5776536312849162, "grad_norm": 1.23500394821167, "learning_rate": 0.0009739775910364146, "loss": 0.533, "step": 1034 }, { "epoch": 0.5782122905027933, "grad_norm": 4.765673637390137, "learning_rate": 0.0009739495798319328, "loss": 0.6176, "step": 1035 }, { "epoch": 0.5787709497206703, "grad_norm": 0.9743689298629761, "learning_rate": 0.000973921568627451, "loss": 0.4773, "step": 1036 }, { "epoch": 0.5793296089385475, "grad_norm": 1.006348967552185, "learning_rate": 0.0009738935574229693, "loss": 0.5239, "step": 1037 }, { "epoch": 0.5798882681564246, "grad_norm": 1.0814579725265503, "learning_rate": 0.0009738655462184874, "loss": 0.516, "step": 1038 }, { "epoch": 0.5804469273743017, "grad_norm": 0.7172871232032776, "learning_rate": 0.0009738375350140056, "loss": 0.6491, "step": 1039 }, { "epoch": 0.5810055865921788, "grad_norm": 0.6884520649909973, "learning_rate": 0.0009738095238095238, "loss": 0.5154, "step": 1040 }, { "epoch": 0.5815642458100558, "grad_norm": 0.5274704098701477, "learning_rate": 0.000973781512605042, "loss": 0.5848, "step": 1041 }, { "epoch": 0.582122905027933, "grad_norm": 3.0449068546295166, "learning_rate": 0.0009737535014005603, "loss": 0.5009, "step": 1042 }, { "epoch": 0.5826815642458101, "grad_norm": 0.6482254266738892, "learning_rate": 0.0009737254901960784, "loss": 0.5343, "step": 1043 }, { "epoch": 0.5832402234636872, "grad_norm": 0.6769239902496338, "learning_rate": 0.0009736974789915966, "loss": 0.573, "step": 1044 }, { "epoch": 0.5837988826815642, "grad_norm": 0.5965105891227722, "learning_rate": 0.0009736694677871148, "loss": 0.5129, "step": 1045 }, { "epoch": 0.5843575418994413, "grad_norm": 0.43163272738456726, "learning_rate": 0.0009736414565826331, "loss": 0.513, "step": 1046 }, { "epoch": 0.5849162011173185, "grad_norm": 0.4251498579978943, "learning_rate": 0.0009736134453781514, "loss": 0.447, "step": 1047 }, { "epoch": 0.5854748603351956, "grad_norm": 0.7185901999473572, "learning_rate": 0.0009735854341736695, "loss": 0.4406, "step": 1048 }, { "epoch": 0.5860335195530726, "grad_norm": 0.6413022875785828, "learning_rate": 0.0009735574229691877, "loss": 0.4161, "step": 1049 }, { "epoch": 0.5865921787709497, "grad_norm": 2.7174506187438965, "learning_rate": 0.0009735294117647059, "loss": 0.5461, "step": 1050 }, { "epoch": 0.5871508379888268, "grad_norm": 0.715381383895874, "learning_rate": 0.0009735014005602241, "loss": 0.5319, "step": 1051 }, { "epoch": 0.587709497206704, "grad_norm": 0.8026681542396545, "learning_rate": 0.0009734733893557424, "loss": 0.613, "step": 1052 }, { "epoch": 0.588268156424581, "grad_norm": 0.765281081199646, "learning_rate": 0.0009734453781512606, "loss": 0.5044, "step": 1053 }, { "epoch": 0.5888268156424581, "grad_norm": 0.5909101963043213, "learning_rate": 0.0009734173669467787, "loss": 0.4075, "step": 1054 }, { "epoch": 0.5893854748603352, "grad_norm": 0.5381833910942078, "learning_rate": 0.0009733893557422969, "loss": 0.5116, "step": 1055 }, { "epoch": 0.5899441340782123, "grad_norm": 0.6411459445953369, "learning_rate": 0.0009733613445378151, "loss": 0.5999, "step": 1056 }, { "epoch": 0.5905027932960893, "grad_norm": 2.460374116897583, "learning_rate": 0.0009733333333333334, "loss": 0.5764, "step": 1057 }, { "epoch": 0.5910614525139665, "grad_norm": 0.45168182253837585, "learning_rate": 0.0009733053221288516, "loss": 0.4098, "step": 1058 }, { "epoch": 0.5916201117318436, "grad_norm": 0.52949458360672, "learning_rate": 0.0009732773109243697, "loss": 0.4437, "step": 1059 }, { "epoch": 0.5921787709497207, "grad_norm": 0.49910610914230347, "learning_rate": 0.0009732492997198879, "loss": 0.475, "step": 1060 }, { "epoch": 0.5927374301675977, "grad_norm": 0.5932276844978333, "learning_rate": 0.0009732212885154061, "loss": 0.4501, "step": 1061 }, { "epoch": 0.5932960893854748, "grad_norm": 1.1807256937026978, "learning_rate": 0.0009731932773109245, "loss": 0.5197, "step": 1062 }, { "epoch": 0.593854748603352, "grad_norm": 0.5904451608657837, "learning_rate": 0.0009731652661064427, "loss": 0.5183, "step": 1063 }, { "epoch": 0.5944134078212291, "grad_norm": 1.4970983266830444, "learning_rate": 0.0009731372549019608, "loss": 0.4292, "step": 1064 }, { "epoch": 0.5949720670391061, "grad_norm": 1.2578061819076538, "learning_rate": 0.000973109243697479, "loss": 0.4124, "step": 1065 }, { "epoch": 0.5955307262569832, "grad_norm": 0.551940381526947, "learning_rate": 0.0009730812324929972, "loss": 0.4868, "step": 1066 }, { "epoch": 0.5960893854748603, "grad_norm": 0.6726689338684082, "learning_rate": 0.0009730532212885155, "loss": 0.5478, "step": 1067 }, { "epoch": 0.5966480446927375, "grad_norm": 0.7053314447402954, "learning_rate": 0.0009730252100840337, "loss": 0.5736, "step": 1068 }, { "epoch": 0.5972067039106145, "grad_norm": 1.3705649375915527, "learning_rate": 0.0009729971988795519, "loss": 0.5976, "step": 1069 }, { "epoch": 0.5977653631284916, "grad_norm": 0.7059028148651123, "learning_rate": 0.00097296918767507, "loss": 0.5561, "step": 1070 }, { "epoch": 0.5983240223463687, "grad_norm": 0.7204971313476562, "learning_rate": 0.0009729411764705882, "loss": 0.4561, "step": 1071 }, { "epoch": 0.5988826815642458, "grad_norm": 0.6502424478530884, "learning_rate": 0.0009729131652661065, "loss": 0.4368, "step": 1072 }, { "epoch": 0.5994413407821229, "grad_norm": 0.7644829750061035, "learning_rate": 0.0009728851540616247, "loss": 0.6742, "step": 1073 }, { "epoch": 0.6, "grad_norm": 0.8019526600837708, "learning_rate": 0.0009728571428571429, "loss": 0.3617, "step": 1074 }, { "epoch": 0.6005586592178771, "grad_norm": 1.1168971061706543, "learning_rate": 0.000972829131652661, "loss": 0.5234, "step": 1075 }, { "epoch": 0.6011173184357542, "grad_norm": 0.6615554690361023, "learning_rate": 0.0009728011204481792, "loss": 0.668, "step": 1076 }, { "epoch": 0.6016759776536312, "grad_norm": 0.544056236743927, "learning_rate": 0.0009727731092436975, "loss": 0.5545, "step": 1077 }, { "epoch": 0.6022346368715084, "grad_norm": 0.6310135722160339, "learning_rate": 0.0009727450980392158, "loss": 0.4709, "step": 1078 }, { "epoch": 0.6027932960893855, "grad_norm": 0.5278035402297974, "learning_rate": 0.000972717086834734, "loss": 0.5032, "step": 1079 }, { "epoch": 0.6033519553072626, "grad_norm": 0.8330789804458618, "learning_rate": 0.0009726890756302521, "loss": 0.5006, "step": 1080 }, { "epoch": 0.6039106145251396, "grad_norm": 0.6948153376579285, "learning_rate": 0.0009726610644257703, "loss": 0.5183, "step": 1081 }, { "epoch": 0.6044692737430167, "grad_norm": 0.6444036364555359, "learning_rate": 0.0009726330532212886, "loss": 0.4745, "step": 1082 }, { "epoch": 0.6050279329608939, "grad_norm": 0.5123707056045532, "learning_rate": 0.0009726050420168068, "loss": 0.4727, "step": 1083 }, { "epoch": 0.605586592178771, "grad_norm": 0.5131059885025024, "learning_rate": 0.000972577030812325, "loss": 0.4359, "step": 1084 }, { "epoch": 0.6061452513966481, "grad_norm": 0.5386841297149658, "learning_rate": 0.0009725490196078432, "loss": 0.5728, "step": 1085 }, { "epoch": 0.6067039106145251, "grad_norm": 1.0279717445373535, "learning_rate": 0.0009725210084033613, "loss": 0.4456, "step": 1086 }, { "epoch": 0.6072625698324022, "grad_norm": 1.6170985698699951, "learning_rate": 0.0009724929971988796, "loss": 0.4718, "step": 1087 }, { "epoch": 0.6078212290502794, "grad_norm": 0.5936034917831421, "learning_rate": 0.0009724649859943978, "loss": 0.442, "step": 1088 }, { "epoch": 0.6083798882681565, "grad_norm": 0.8925131559371948, "learning_rate": 0.000972436974789916, "loss": 0.4927, "step": 1089 }, { "epoch": 0.6089385474860335, "grad_norm": 1.0748463869094849, "learning_rate": 0.0009724089635854342, "loss": 0.5497, "step": 1090 }, { "epoch": 0.6094972067039106, "grad_norm": 0.8236844539642334, "learning_rate": 0.0009723809523809523, "loss": 0.5295, "step": 1091 }, { "epoch": 0.6100558659217877, "grad_norm": 11.054025650024414, "learning_rate": 0.0009723529411764706, "loss": 0.6345, "step": 1092 }, { "epoch": 0.6106145251396649, "grad_norm": 0.896457850933075, "learning_rate": 0.0009723249299719888, "loss": 0.7274, "step": 1093 }, { "epoch": 0.6111731843575419, "grad_norm": 0.8940154910087585, "learning_rate": 0.000972296918767507, "loss": 0.56, "step": 1094 }, { "epoch": 0.611731843575419, "grad_norm": 0.5155821442604065, "learning_rate": 0.0009722689075630253, "loss": 0.415, "step": 1095 }, { "epoch": 0.6122905027932961, "grad_norm": 0.7292872667312622, "learning_rate": 0.0009722408963585434, "loss": 0.4018, "step": 1096 }, { "epoch": 0.6128491620111732, "grad_norm": 0.5831227898597717, "learning_rate": 0.0009722128851540617, "loss": 0.5924, "step": 1097 }, { "epoch": 0.6134078212290502, "grad_norm": 0.5251787304878235, "learning_rate": 0.0009721848739495799, "loss": 0.5893, "step": 1098 }, { "epoch": 0.6139664804469274, "grad_norm": 0.9705613851547241, "learning_rate": 0.0009721568627450981, "loss": 0.4265, "step": 1099 }, { "epoch": 0.6145251396648045, "grad_norm": 7.225012302398682, "learning_rate": 0.0009721288515406163, "loss": 0.5099, "step": 1100 }, { "epoch": 0.6150837988826816, "grad_norm": 0.9091275930404663, "learning_rate": 0.0009721008403361345, "loss": 0.435, "step": 1101 }, { "epoch": 0.6156424581005586, "grad_norm": 1.5239793062210083, "learning_rate": 0.0009720728291316527, "loss": 0.6069, "step": 1102 }, { "epoch": 0.6162011173184357, "grad_norm": 0.4193691313266754, "learning_rate": 0.0009720448179271709, "loss": 0.5286, "step": 1103 }, { "epoch": 0.6167597765363129, "grad_norm": 0.6078191995620728, "learning_rate": 0.0009720168067226891, "loss": 0.4901, "step": 1104 }, { "epoch": 0.61731843575419, "grad_norm": 4.932099342346191, "learning_rate": 0.0009719887955182073, "loss": 0.4849, "step": 1105 }, { "epoch": 0.617877094972067, "grad_norm": 0.7159140110015869, "learning_rate": 0.0009719607843137255, "loss": 0.5934, "step": 1106 }, { "epoch": 0.6184357541899441, "grad_norm": 0.5018102526664734, "learning_rate": 0.0009719327731092437, "loss": 0.4016, "step": 1107 }, { "epoch": 0.6189944134078212, "grad_norm": 0.8189084529876709, "learning_rate": 0.0009719047619047619, "loss": 0.4621, "step": 1108 }, { "epoch": 0.6195530726256984, "grad_norm": 0.8758090138435364, "learning_rate": 0.0009718767507002801, "loss": 0.6693, "step": 1109 }, { "epoch": 0.6201117318435754, "grad_norm": 0.9009056091308594, "learning_rate": 0.0009718487394957983, "loss": 0.5139, "step": 1110 }, { "epoch": 0.6206703910614525, "grad_norm": 0.44998493790626526, "learning_rate": 0.0009718207282913166, "loss": 0.4325, "step": 1111 }, { "epoch": 0.6212290502793296, "grad_norm": 0.5723503828048706, "learning_rate": 0.0009717927170868347, "loss": 0.6231, "step": 1112 }, { "epoch": 0.6217877094972067, "grad_norm": 0.5779495239257812, "learning_rate": 0.000971764705882353, "loss": 0.5373, "step": 1113 }, { "epoch": 0.6223463687150838, "grad_norm": 0.6900322437286377, "learning_rate": 0.0009717366946778712, "loss": 0.5136, "step": 1114 }, { "epoch": 0.6229050279329609, "grad_norm": 0.8103601932525635, "learning_rate": 0.0009717086834733894, "loss": 0.5172, "step": 1115 }, { "epoch": 0.623463687150838, "grad_norm": 0.6067003011703491, "learning_rate": 0.0009716806722689076, "loss": 0.4749, "step": 1116 }, { "epoch": 0.6240223463687151, "grad_norm": 0.9293099641799927, "learning_rate": 0.0009716526610644258, "loss": 0.6614, "step": 1117 }, { "epoch": 0.6245810055865921, "grad_norm": 0.7477920651435852, "learning_rate": 0.000971624649859944, "loss": 0.628, "step": 1118 }, { "epoch": 0.6251396648044693, "grad_norm": 0.705543577671051, "learning_rate": 0.0009715966386554622, "loss": 0.5577, "step": 1119 }, { "epoch": 0.6256983240223464, "grad_norm": 0.9889763593673706, "learning_rate": 0.0009715686274509804, "loss": 0.5096, "step": 1120 }, { "epoch": 0.6262569832402235, "grad_norm": 3.406019926071167, "learning_rate": 0.0009715406162464986, "loss": 0.5001, "step": 1121 }, { "epoch": 0.6268156424581005, "grad_norm": 6.968403339385986, "learning_rate": 0.0009715126050420168, "loss": 0.4409, "step": 1122 }, { "epoch": 0.6273743016759776, "grad_norm": 0.4938574731349945, "learning_rate": 0.000971484593837535, "loss": 0.4828, "step": 1123 }, { "epoch": 0.6279329608938548, "grad_norm": 0.5067856907844543, "learning_rate": 0.0009714565826330532, "loss": 0.4099, "step": 1124 }, { "epoch": 0.6284916201117319, "grad_norm": 0.687279999256134, "learning_rate": 0.0009714285714285714, "loss": 0.431, "step": 1125 }, { "epoch": 0.6290502793296089, "grad_norm": 0.7488174438476562, "learning_rate": 0.0009714005602240896, "loss": 0.5415, "step": 1126 }, { "epoch": 0.629608938547486, "grad_norm": 0.7109690308570862, "learning_rate": 0.0009713725490196078, "loss": 0.6235, "step": 1127 }, { "epoch": 0.6301675977653631, "grad_norm": 1.0108140707015991, "learning_rate": 0.0009713445378151261, "loss": 0.8259, "step": 1128 }, { "epoch": 0.6307262569832403, "grad_norm": 0.7121959328651428, "learning_rate": 0.0009713165266106443, "loss": 0.654, "step": 1129 }, { "epoch": 0.6312849162011173, "grad_norm": 1.587821364402771, "learning_rate": 0.0009712885154061625, "loss": 0.5472, "step": 1130 }, { "epoch": 0.6318435754189944, "grad_norm": 0.734961986541748, "learning_rate": 0.0009712605042016807, "loss": 0.4481, "step": 1131 }, { "epoch": 0.6324022346368715, "grad_norm": 0.8328256011009216, "learning_rate": 0.0009712324929971989, "loss": 0.6009, "step": 1132 }, { "epoch": 0.6329608938547486, "grad_norm": 0.5134819746017456, "learning_rate": 0.0009712044817927172, "loss": 0.4236, "step": 1133 }, { "epoch": 0.6335195530726258, "grad_norm": 0.6215536594390869, "learning_rate": 0.0009711764705882353, "loss": 0.4078, "step": 1134 }, { "epoch": 0.6340782122905028, "grad_norm": 0.7158108353614807, "learning_rate": 0.0009711484593837535, "loss": 0.4183, "step": 1135 }, { "epoch": 0.6346368715083799, "grad_norm": 0.6285063624382019, "learning_rate": 0.0009711204481792717, "loss": 0.4381, "step": 1136 }, { "epoch": 0.635195530726257, "grad_norm": 0.7607704997062683, "learning_rate": 0.0009710924369747899, "loss": 0.6068, "step": 1137 }, { "epoch": 0.6357541899441341, "grad_norm": 0.7894600629806519, "learning_rate": 0.0009710644257703082, "loss": 0.5156, "step": 1138 }, { "epoch": 0.6363128491620111, "grad_norm": 0.7930091023445129, "learning_rate": 0.0009710364145658263, "loss": 0.4807, "step": 1139 }, { "epoch": 0.6368715083798883, "grad_norm": 1.073494553565979, "learning_rate": 0.0009710084033613445, "loss": 0.4868, "step": 1140 }, { "epoch": 0.6374301675977654, "grad_norm": 0.9982699155807495, "learning_rate": 0.0009709803921568627, "loss": 0.4942, "step": 1141 }, { "epoch": 0.6379888268156425, "grad_norm": 0.43620118498802185, "learning_rate": 0.0009709523809523809, "loss": 0.3666, "step": 1142 }, { "epoch": 0.6385474860335195, "grad_norm": 0.987544596195221, "learning_rate": 0.0009709243697478993, "loss": 0.4831, "step": 1143 }, { "epoch": 0.6391061452513966, "grad_norm": 3.113736391067505, "learning_rate": 0.0009708963585434174, "loss": 0.5172, "step": 1144 }, { "epoch": 0.6396648044692738, "grad_norm": 0.6777386665344238, "learning_rate": 0.0009708683473389356, "loss": 0.4525, "step": 1145 }, { "epoch": 0.6402234636871509, "grad_norm": 0.7435891628265381, "learning_rate": 0.0009708403361344538, "loss": 0.5556, "step": 1146 }, { "epoch": 0.6407821229050279, "grad_norm": 0.8971303105354309, "learning_rate": 0.000970812324929972, "loss": 0.5126, "step": 1147 }, { "epoch": 0.641340782122905, "grad_norm": 0.665515661239624, "learning_rate": 0.0009707843137254903, "loss": 0.6845, "step": 1148 }, { "epoch": 0.6418994413407821, "grad_norm": 0.6780884265899658, "learning_rate": 0.0009707563025210085, "loss": 0.6369, "step": 1149 }, { "epoch": 0.6424581005586593, "grad_norm": 0.8216647505760193, "learning_rate": 0.0009707282913165266, "loss": 0.4555, "step": 1150 }, { "epoch": 0.6430167597765363, "grad_norm": 0.5883092284202576, "learning_rate": 0.0009707002801120448, "loss": 0.4918, "step": 1151 }, { "epoch": 0.6435754189944134, "grad_norm": 0.8862646818161011, "learning_rate": 0.000970672268907563, "loss": 0.4957, "step": 1152 }, { "epoch": 0.6441340782122905, "grad_norm": 0.6011155247688293, "learning_rate": 0.0009706442577030813, "loss": 0.47, "step": 1153 }, { "epoch": 0.6446927374301676, "grad_norm": 0.6456702947616577, "learning_rate": 0.0009706162464985995, "loss": 0.5884, "step": 1154 }, { "epoch": 0.6452513966480447, "grad_norm": 0.5160109996795654, "learning_rate": 0.0009705882352941176, "loss": 0.5162, "step": 1155 }, { "epoch": 0.6458100558659218, "grad_norm": 0.5241988301277161, "learning_rate": 0.0009705602240896358, "loss": 0.5013, "step": 1156 }, { "epoch": 0.6463687150837989, "grad_norm": 1.7749062776565552, "learning_rate": 0.000970532212885154, "loss": 0.6822, "step": 1157 }, { "epoch": 0.646927374301676, "grad_norm": 0.6804704666137695, "learning_rate": 0.0009705042016806723, "loss": 0.5191, "step": 1158 }, { "epoch": 0.647486033519553, "grad_norm": 1.4008277654647827, "learning_rate": 0.0009704761904761905, "loss": 0.5502, "step": 1159 }, { "epoch": 0.6480446927374302, "grad_norm": 0.533053994178772, "learning_rate": 0.0009704481792717086, "loss": 0.4722, "step": 1160 }, { "epoch": 0.6486033519553073, "grad_norm": 0.624987006187439, "learning_rate": 0.0009704201680672269, "loss": 0.5322, "step": 1161 }, { "epoch": 0.6491620111731844, "grad_norm": 1.0220478773117065, "learning_rate": 0.0009703921568627451, "loss": 0.5202, "step": 1162 }, { "epoch": 0.6497206703910614, "grad_norm": 7.964524269104004, "learning_rate": 0.0009703641456582634, "loss": 0.4592, "step": 1163 }, { "epoch": 0.6502793296089385, "grad_norm": 0.7197114825248718, "learning_rate": 0.0009703361344537816, "loss": 0.6007, "step": 1164 }, { "epoch": 0.6508379888268156, "grad_norm": 2.3243930339813232, "learning_rate": 0.0009703081232492998, "loss": 0.5335, "step": 1165 }, { "epoch": 0.6513966480446928, "grad_norm": 0.48415863513946533, "learning_rate": 0.0009702801120448179, "loss": 0.5499, "step": 1166 }, { "epoch": 0.6519553072625698, "grad_norm": 1.8503010272979736, "learning_rate": 0.0009702521008403361, "loss": 0.5757, "step": 1167 }, { "epoch": 0.6525139664804469, "grad_norm": 0.6757665872573853, "learning_rate": 0.0009702240896358544, "loss": 0.684, "step": 1168 }, { "epoch": 0.653072625698324, "grad_norm": 1.3364590406417847, "learning_rate": 0.0009701960784313726, "loss": 0.4595, "step": 1169 }, { "epoch": 0.6536312849162011, "grad_norm": 0.6175302267074585, "learning_rate": 0.0009701680672268908, "loss": 0.4642, "step": 1170 }, { "epoch": 0.6541899441340782, "grad_norm": 0.4852699637413025, "learning_rate": 0.0009701400560224089, "loss": 0.4996, "step": 1171 }, { "epoch": 0.6547486033519553, "grad_norm": 0.9816637635231018, "learning_rate": 0.0009701120448179271, "loss": 0.4906, "step": 1172 }, { "epoch": 0.6553072625698324, "grad_norm": 0.6328439712524414, "learning_rate": 0.0009700840336134454, "loss": 0.5194, "step": 1173 }, { "epoch": 0.6558659217877095, "grad_norm": 1.1651662588119507, "learning_rate": 0.0009700560224089636, "loss": 0.4595, "step": 1174 }, { "epoch": 0.6564245810055865, "grad_norm": 0.6353319883346558, "learning_rate": 0.0009700280112044818, "loss": 0.4036, "step": 1175 }, { "epoch": 0.6569832402234637, "grad_norm": 0.6404257416725159, "learning_rate": 0.0009699999999999999, "loss": 0.4365, "step": 1176 }, { "epoch": 0.6575418994413408, "grad_norm": 1.0196770429611206, "learning_rate": 0.0009699719887955181, "loss": 0.4619, "step": 1177 }, { "epoch": 0.6581005586592179, "grad_norm": 1.550592303276062, "learning_rate": 0.0009699439775910365, "loss": 0.4848, "step": 1178 }, { "epoch": 0.658659217877095, "grad_norm": 0.5176575779914856, "learning_rate": 0.0009699159663865547, "loss": 0.4501, "step": 1179 }, { "epoch": 0.659217877094972, "grad_norm": 0.5924175381660461, "learning_rate": 0.0009698879551820729, "loss": 0.5381, "step": 1180 }, { "epoch": 0.6597765363128492, "grad_norm": 0.6795361638069153, "learning_rate": 0.0009698599439775911, "loss": 0.419, "step": 1181 }, { "epoch": 0.6603351955307263, "grad_norm": 1.0456833839416504, "learning_rate": 0.0009698319327731092, "loss": 0.4914, "step": 1182 }, { "epoch": 0.6608938547486034, "grad_norm": 0.6252954602241516, "learning_rate": 0.0009698039215686275, "loss": 0.5764, "step": 1183 }, { "epoch": 0.6614525139664804, "grad_norm": 11.64720344543457, "learning_rate": 0.0009697759103641457, "loss": 0.4989, "step": 1184 }, { "epoch": 0.6620111731843575, "grad_norm": 1.2602818012237549, "learning_rate": 0.0009697478991596639, "loss": 0.432, "step": 1185 }, { "epoch": 0.6625698324022347, "grad_norm": 1.4845386743545532, "learning_rate": 0.0009697198879551821, "loss": 0.5126, "step": 1186 }, { "epoch": 0.6631284916201118, "grad_norm": 0.8853285908699036, "learning_rate": 0.0009696918767507002, "loss": 0.5589, "step": 1187 }, { "epoch": 0.6636871508379888, "grad_norm": 0.7342753410339355, "learning_rate": 0.0009696638655462185, "loss": 0.5588, "step": 1188 }, { "epoch": 0.6642458100558659, "grad_norm": 3.369445323944092, "learning_rate": 0.0009696358543417367, "loss": 0.4997, "step": 1189 }, { "epoch": 0.664804469273743, "grad_norm": 0.5214580297470093, "learning_rate": 0.0009696078431372549, "loss": 0.476, "step": 1190 }, { "epoch": 0.6653631284916202, "grad_norm": 1.2988337278366089, "learning_rate": 0.0009695798319327731, "loss": 0.4847, "step": 1191 }, { "epoch": 0.6659217877094972, "grad_norm": 0.7430171370506287, "learning_rate": 0.0009695518207282912, "loss": 0.4597, "step": 1192 }, { "epoch": 0.6664804469273743, "grad_norm": 1.2891651391983032, "learning_rate": 0.0009695238095238096, "loss": 0.4538, "step": 1193 }, { "epoch": 0.6670391061452514, "grad_norm": 2.5343263149261475, "learning_rate": 0.0009694957983193278, "loss": 0.464, "step": 1194 }, { "epoch": 0.6675977653631285, "grad_norm": 0.5320484638214111, "learning_rate": 0.000969467787114846, "loss": 0.5846, "step": 1195 }, { "epoch": 0.6681564245810055, "grad_norm": 0.6462830305099487, "learning_rate": 0.0009694397759103642, "loss": 0.5244, "step": 1196 }, { "epoch": 0.6687150837988827, "grad_norm": 0.5505291223526001, "learning_rate": 0.0009694117647058824, "loss": 0.5034, "step": 1197 }, { "epoch": 0.6692737430167598, "grad_norm": 0.5639625191688538, "learning_rate": 0.0009693837535014006, "loss": 0.632, "step": 1198 }, { "epoch": 0.6698324022346369, "grad_norm": 0.7597293853759766, "learning_rate": 0.0009693557422969188, "loss": 0.6121, "step": 1199 }, { "epoch": 0.6703910614525139, "grad_norm": 0.560198962688446, "learning_rate": 0.000969327731092437, "loss": 0.4575, "step": 1200 }, { "epoch": 0.670949720670391, "grad_norm": 0.5961484909057617, "learning_rate": 0.0009692997198879552, "loss": 0.5024, "step": 1201 }, { "epoch": 0.6715083798882682, "grad_norm": 0.6472962498664856, "learning_rate": 0.0009692717086834734, "loss": 0.4449, "step": 1202 }, { "epoch": 0.6720670391061453, "grad_norm": 0.8945441246032715, "learning_rate": 0.0009692436974789916, "loss": 0.4517, "step": 1203 }, { "epoch": 0.6726256983240223, "grad_norm": 0.6092879772186279, "learning_rate": 0.0009692156862745098, "loss": 0.534, "step": 1204 }, { "epoch": 0.6731843575418994, "grad_norm": 0.8901610374450684, "learning_rate": 0.000969187675070028, "loss": 0.542, "step": 1205 }, { "epoch": 0.6737430167597765, "grad_norm": 0.6504933834075928, "learning_rate": 0.0009691596638655462, "loss": 0.4646, "step": 1206 }, { "epoch": 0.6743016759776537, "grad_norm": 0.6498280763626099, "learning_rate": 0.0009691316526610644, "loss": 0.5283, "step": 1207 }, { "epoch": 0.6748603351955307, "grad_norm": 1.006180763244629, "learning_rate": 0.0009691036414565826, "loss": 0.4876, "step": 1208 }, { "epoch": 0.6754189944134078, "grad_norm": 0.6444622278213501, "learning_rate": 0.0009690756302521008, "loss": 0.3993, "step": 1209 }, { "epoch": 0.6759776536312849, "grad_norm": 8.225159645080566, "learning_rate": 0.0009690476190476191, "loss": 0.5376, "step": 1210 }, { "epoch": 0.676536312849162, "grad_norm": 0.7043599486351013, "learning_rate": 0.0009690196078431373, "loss": 0.6021, "step": 1211 }, { "epoch": 0.6770949720670391, "grad_norm": 0.8514872789382935, "learning_rate": 0.0009689915966386555, "loss": 0.4462, "step": 1212 }, { "epoch": 0.6776536312849162, "grad_norm": 1.7722604274749756, "learning_rate": 0.0009689635854341738, "loss": 0.5955, "step": 1213 }, { "epoch": 0.6782122905027933, "grad_norm": 0.4466038644313812, "learning_rate": 0.0009689355742296919, "loss": 0.4371, "step": 1214 }, { "epoch": 0.6787709497206704, "grad_norm": 0.4447239339351654, "learning_rate": 0.0009689075630252101, "loss": 0.3808, "step": 1215 }, { "epoch": 0.6793296089385474, "grad_norm": 0.5980348587036133, "learning_rate": 0.0009688795518207283, "loss": 0.5303, "step": 1216 }, { "epoch": 0.6798882681564246, "grad_norm": 1.0799697637557983, "learning_rate": 0.0009688515406162465, "loss": 0.5761, "step": 1217 }, { "epoch": 0.6804469273743017, "grad_norm": 0.9614025354385376, "learning_rate": 0.0009688235294117648, "loss": 0.5431, "step": 1218 }, { "epoch": 0.6810055865921788, "grad_norm": 0.9209677577018738, "learning_rate": 0.0009687955182072829, "loss": 0.4556, "step": 1219 }, { "epoch": 0.6815642458100558, "grad_norm": 0.6238844990730286, "learning_rate": 0.0009687675070028011, "loss": 0.4895, "step": 1220 }, { "epoch": 0.6821229050279329, "grad_norm": 0.855700671672821, "learning_rate": 0.0009687394957983193, "loss": 0.5299, "step": 1221 }, { "epoch": 0.6826815642458101, "grad_norm": 1.0048364400863647, "learning_rate": 0.0009687114845938375, "loss": 0.4399, "step": 1222 }, { "epoch": 0.6832402234636872, "grad_norm": 0.5466775298118591, "learning_rate": 0.0009686834733893558, "loss": 0.4425, "step": 1223 }, { "epoch": 0.6837988826815642, "grad_norm": 0.9739848375320435, "learning_rate": 0.0009686554621848739, "loss": 0.4623, "step": 1224 }, { "epoch": 0.6843575418994413, "grad_norm": 11.794245719909668, "learning_rate": 0.0009686274509803921, "loss": 0.531, "step": 1225 }, { "epoch": 0.6849162011173184, "grad_norm": 0.8747836351394653, "learning_rate": 0.0009685994397759104, "loss": 0.5467, "step": 1226 }, { "epoch": 0.6854748603351956, "grad_norm": 0.7160051465034485, "learning_rate": 0.0009685714285714286, "loss": 0.4519, "step": 1227 }, { "epoch": 0.6860335195530727, "grad_norm": 0.5023255944252014, "learning_rate": 0.0009685434173669469, "loss": 0.419, "step": 1228 }, { "epoch": 0.6865921787709497, "grad_norm": 0.48213818669319153, "learning_rate": 0.0009685154061624651, "loss": 0.4597, "step": 1229 }, { "epoch": 0.6871508379888268, "grad_norm": 0.6828752160072327, "learning_rate": 0.0009684873949579832, "loss": 0.4993, "step": 1230 }, { "epoch": 0.6877094972067039, "grad_norm": 0.667091965675354, "learning_rate": 0.0009684593837535014, "loss": 0.4369, "step": 1231 }, { "epoch": 0.6882681564245811, "grad_norm": 0.5374354720115662, "learning_rate": 0.0009684313725490196, "loss": 0.5498, "step": 1232 }, { "epoch": 0.6888268156424581, "grad_norm": 0.595704197883606, "learning_rate": 0.0009684033613445379, "loss": 0.4821, "step": 1233 }, { "epoch": 0.6893854748603352, "grad_norm": 0.6341480016708374, "learning_rate": 0.0009683753501400561, "loss": 0.4737, "step": 1234 }, { "epoch": 0.6899441340782123, "grad_norm": 0.8466470241546631, "learning_rate": 0.0009683473389355742, "loss": 0.5707, "step": 1235 }, { "epoch": 0.6905027932960894, "grad_norm": 0.5244937539100647, "learning_rate": 0.0009683193277310924, "loss": 0.4776, "step": 1236 }, { "epoch": 0.6910614525139664, "grad_norm": 0.6936596632003784, "learning_rate": 0.0009682913165266106, "loss": 0.5769, "step": 1237 }, { "epoch": 0.6916201117318436, "grad_norm": 0.6972819566726685, "learning_rate": 0.0009682633053221289, "loss": 0.5927, "step": 1238 }, { "epoch": 0.6921787709497207, "grad_norm": 0.4999435544013977, "learning_rate": 0.0009682352941176471, "loss": 0.4442, "step": 1239 }, { "epoch": 0.6927374301675978, "grad_norm": 0.6698967218399048, "learning_rate": 0.0009682072829131652, "loss": 0.565, "step": 1240 }, { "epoch": 0.6932960893854748, "grad_norm": 0.6050639152526855, "learning_rate": 0.0009681792717086834, "loss": 0.4654, "step": 1241 }, { "epoch": 0.693854748603352, "grad_norm": 2.451584815979004, "learning_rate": 0.0009681512605042016, "loss": 0.5334, "step": 1242 }, { "epoch": 0.6944134078212291, "grad_norm": 0.6050121188163757, "learning_rate": 0.00096812324929972, "loss": 0.5986, "step": 1243 }, { "epoch": 0.6949720670391062, "grad_norm": 1.0941985845565796, "learning_rate": 0.0009680952380952382, "loss": 0.457, "step": 1244 }, { "epoch": 0.6955307262569832, "grad_norm": 0.7693732380867004, "learning_rate": 0.0009680672268907564, "loss": 0.5854, "step": 1245 }, { "epoch": 0.6960893854748603, "grad_norm": 0.6690261960029602, "learning_rate": 0.0009680392156862745, "loss": 0.4599, "step": 1246 }, { "epoch": 0.6966480446927374, "grad_norm": 0.7835928797721863, "learning_rate": 0.0009680112044817927, "loss": 0.5687, "step": 1247 }, { "epoch": 0.6972067039106146, "grad_norm": 0.5262266397476196, "learning_rate": 0.000967983193277311, "loss": 0.4271, "step": 1248 }, { "epoch": 0.6977653631284916, "grad_norm": 0.9227473735809326, "learning_rate": 0.0009679551820728292, "loss": 0.4949, "step": 1249 }, { "epoch": 0.6983240223463687, "grad_norm": 2.512376546859741, "learning_rate": 0.0009679271708683474, "loss": 0.4703, "step": 1250 }, { "epoch": 0.6988826815642458, "grad_norm": 0.6254360675811768, "learning_rate": 0.0009678991596638655, "loss": 0.5113, "step": 1251 }, { "epoch": 0.6994413407821229, "grad_norm": 0.517559289932251, "learning_rate": 0.0009678711484593837, "loss": 0.4107, "step": 1252 }, { "epoch": 0.7, "grad_norm": 0.49326375126838684, "learning_rate": 0.000967843137254902, "loss": 0.4401, "step": 1253 }, { "epoch": 0.7005586592178771, "grad_norm": 0.6251634955406189, "learning_rate": 0.0009678151260504202, "loss": 0.4965, "step": 1254 }, { "epoch": 0.7011173184357542, "grad_norm": 0.515642523765564, "learning_rate": 0.0009677871148459384, "loss": 0.4646, "step": 1255 }, { "epoch": 0.7016759776536313, "grad_norm": 0.934600293636322, "learning_rate": 0.0009677591036414565, "loss": 0.5258, "step": 1256 }, { "epoch": 0.7022346368715083, "grad_norm": 4.897491455078125, "learning_rate": 0.0009677310924369747, "loss": 0.518, "step": 1257 }, { "epoch": 0.7027932960893855, "grad_norm": 0.7678471207618713, "learning_rate": 0.000967703081232493, "loss": 0.4552, "step": 1258 }, { "epoch": 0.7033519553072626, "grad_norm": 0.8946427702903748, "learning_rate": 0.0009676750700280113, "loss": 0.5568, "step": 1259 }, { "epoch": 0.7039106145251397, "grad_norm": 0.5655092000961304, "learning_rate": 0.0009676470588235295, "loss": 0.501, "step": 1260 }, { "epoch": 0.7044692737430167, "grad_norm": 0.6731577515602112, "learning_rate": 0.0009676190476190477, "loss": 0.5235, "step": 1261 }, { "epoch": 0.7050279329608938, "grad_norm": 0.6445982456207275, "learning_rate": 0.0009675910364145658, "loss": 0.4754, "step": 1262 }, { "epoch": 0.705586592178771, "grad_norm": 0.528878927230835, "learning_rate": 0.0009675630252100841, "loss": 0.5521, "step": 1263 }, { "epoch": 0.7061452513966481, "grad_norm": 1.5173887014389038, "learning_rate": 0.0009675350140056023, "loss": 0.771, "step": 1264 }, { "epoch": 0.7067039106145251, "grad_norm": 0.615871787071228, "learning_rate": 0.0009675070028011205, "loss": 0.4714, "step": 1265 }, { "epoch": 0.7072625698324022, "grad_norm": 0.6621685028076172, "learning_rate": 0.0009674789915966387, "loss": 0.5297, "step": 1266 }, { "epoch": 0.7078212290502793, "grad_norm": 0.9935075044631958, "learning_rate": 0.0009674509803921568, "loss": 0.4746, "step": 1267 }, { "epoch": 0.7083798882681565, "grad_norm": 0.6970776319503784, "learning_rate": 0.0009674229691876751, "loss": 0.5013, "step": 1268 }, { "epoch": 0.7089385474860335, "grad_norm": 0.5533170104026794, "learning_rate": 0.0009673949579831933, "loss": 0.5044, "step": 1269 }, { "epoch": 0.7094972067039106, "grad_norm": 0.9132866859436035, "learning_rate": 0.0009673669467787115, "loss": 0.625, "step": 1270 }, { "epoch": 0.7100558659217877, "grad_norm": 0.604953944683075, "learning_rate": 0.0009673389355742297, "loss": 0.6432, "step": 1271 }, { "epoch": 0.7106145251396648, "grad_norm": 0.5731745362281799, "learning_rate": 0.0009673109243697478, "loss": 0.4679, "step": 1272 }, { "epoch": 0.711173184357542, "grad_norm": 0.8611568808555603, "learning_rate": 0.0009672829131652661, "loss": 0.4981, "step": 1273 }, { "epoch": 0.711731843575419, "grad_norm": 0.823574423789978, "learning_rate": 0.0009672549019607843, "loss": 0.4382, "step": 1274 }, { "epoch": 0.7122905027932961, "grad_norm": 1.7036203145980835, "learning_rate": 0.0009672268907563026, "loss": 0.5302, "step": 1275 }, { "epoch": 0.7128491620111732, "grad_norm": 0.779194712638855, "learning_rate": 0.0009671988795518208, "loss": 0.483, "step": 1276 }, { "epoch": 0.7134078212290503, "grad_norm": 0.7826222777366638, "learning_rate": 0.000967170868347339, "loss": 0.6213, "step": 1277 }, { "epoch": 0.7139664804469273, "grad_norm": 0.8241113424301147, "learning_rate": 0.0009671428571428572, "loss": 0.5697, "step": 1278 }, { "epoch": 0.7145251396648045, "grad_norm": 0.6261003613471985, "learning_rate": 0.0009671148459383754, "loss": 0.5607, "step": 1279 }, { "epoch": 0.7150837988826816, "grad_norm": 1.5766152143478394, "learning_rate": 0.0009670868347338936, "loss": 0.5122, "step": 1280 }, { "epoch": 0.7156424581005587, "grad_norm": 3.283601999282837, "learning_rate": 0.0009670588235294118, "loss": 0.5834, "step": 1281 }, { "epoch": 0.7162011173184357, "grad_norm": 1.238656997680664, "learning_rate": 0.00096703081232493, "loss": 0.4688, "step": 1282 }, { "epoch": 0.7167597765363128, "grad_norm": 0.6876110434532166, "learning_rate": 0.0009670028011204482, "loss": 0.4625, "step": 1283 }, { "epoch": 0.71731843575419, "grad_norm": 1.1952418088912964, "learning_rate": 0.0009669747899159664, "loss": 0.4241, "step": 1284 }, { "epoch": 0.7178770949720671, "grad_norm": 0.725604236125946, "learning_rate": 0.0009669467787114846, "loss": 0.4743, "step": 1285 }, { "epoch": 0.7184357541899441, "grad_norm": 0.6231208443641663, "learning_rate": 0.0009669187675070028, "loss": 0.5137, "step": 1286 }, { "epoch": 0.7189944134078212, "grad_norm": 2.544731378555298, "learning_rate": 0.000966890756302521, "loss": 0.4742, "step": 1287 }, { "epoch": 0.7195530726256983, "grad_norm": 0.4675057530403137, "learning_rate": 0.0009668627450980393, "loss": 0.4382, "step": 1288 }, { "epoch": 0.7201117318435755, "grad_norm": 0.8564208745956421, "learning_rate": 0.0009668347338935574, "loss": 0.48, "step": 1289 }, { "epoch": 0.7206703910614525, "grad_norm": 0.5290160775184631, "learning_rate": 0.0009668067226890756, "loss": 0.4523, "step": 1290 }, { "epoch": 0.7212290502793296, "grad_norm": 3.9657459259033203, "learning_rate": 0.0009667787114845938, "loss": 0.5707, "step": 1291 }, { "epoch": 0.7217877094972067, "grad_norm": 0.5429648160934448, "learning_rate": 0.0009667507002801121, "loss": 0.531, "step": 1292 }, { "epoch": 0.7223463687150838, "grad_norm": 0.4244798421859741, "learning_rate": 0.0009667226890756304, "loss": 0.5218, "step": 1293 }, { "epoch": 0.7229050279329609, "grad_norm": 0.5533431172370911, "learning_rate": 0.0009666946778711485, "loss": 0.4547, "step": 1294 }, { "epoch": 0.723463687150838, "grad_norm": 0.5960068702697754, "learning_rate": 0.0009666666666666667, "loss": 0.5347, "step": 1295 }, { "epoch": 0.7240223463687151, "grad_norm": 0.4864499866962433, "learning_rate": 0.0009666386554621849, "loss": 0.4628, "step": 1296 }, { "epoch": 0.7245810055865922, "grad_norm": 0.509096622467041, "learning_rate": 0.0009666106442577031, "loss": 0.5803, "step": 1297 }, { "epoch": 0.7251396648044692, "grad_norm": 0.47281110286712646, "learning_rate": 0.0009665826330532214, "loss": 0.4286, "step": 1298 }, { "epoch": 0.7256983240223464, "grad_norm": 0.655084490776062, "learning_rate": 0.0009665546218487395, "loss": 0.5528, "step": 1299 }, { "epoch": 0.7262569832402235, "grad_norm": 0.5568073391914368, "learning_rate": 0.0009665266106442577, "loss": 0.4891, "step": 1300 }, { "epoch": 0.7268156424581006, "grad_norm": 0.5502691268920898, "learning_rate": 0.0009664985994397759, "loss": 0.5156, "step": 1301 }, { "epoch": 0.7273743016759776, "grad_norm": 0.7386418581008911, "learning_rate": 0.0009664705882352941, "loss": 0.5032, "step": 1302 }, { "epoch": 0.7279329608938547, "grad_norm": 0.5926641821861267, "learning_rate": 0.0009664425770308124, "loss": 0.571, "step": 1303 }, { "epoch": 0.7284916201117319, "grad_norm": 1.3213979005813599, "learning_rate": 0.0009664145658263306, "loss": 0.4977, "step": 1304 }, { "epoch": 0.729050279329609, "grad_norm": 1.4560959339141846, "learning_rate": 0.0009663865546218487, "loss": 0.6771, "step": 1305 }, { "epoch": 0.729608938547486, "grad_norm": 1.2416977882385254, "learning_rate": 0.0009663585434173669, "loss": 0.4763, "step": 1306 }, { "epoch": 0.7301675977653631, "grad_norm": 0.6432930827140808, "learning_rate": 0.0009663305322128851, "loss": 0.4798, "step": 1307 }, { "epoch": 0.7307262569832402, "grad_norm": 2.4803335666656494, "learning_rate": 0.0009663025210084035, "loss": 0.6173, "step": 1308 }, { "epoch": 0.7312849162011174, "grad_norm": 0.4887591302394867, "learning_rate": 0.0009662745098039217, "loss": 0.4394, "step": 1309 }, { "epoch": 0.7318435754189944, "grad_norm": 0.5033316016197205, "learning_rate": 0.0009662464985994398, "loss": 0.507, "step": 1310 }, { "epoch": 0.7324022346368715, "grad_norm": 1.2471177577972412, "learning_rate": 0.000966218487394958, "loss": 0.4824, "step": 1311 }, { "epoch": 0.7329608938547486, "grad_norm": 0.6987435221672058, "learning_rate": 0.0009661904761904762, "loss": 0.4418, "step": 1312 }, { "epoch": 0.7335195530726257, "grad_norm": 0.6770135760307312, "learning_rate": 0.0009661624649859945, "loss": 0.6289, "step": 1313 }, { "epoch": 0.7340782122905027, "grad_norm": 0.784820020198822, "learning_rate": 0.0009661344537815127, "loss": 0.6944, "step": 1314 }, { "epoch": 0.7346368715083799, "grad_norm": 2.167708158493042, "learning_rate": 0.0009661064425770308, "loss": 0.5111, "step": 1315 }, { "epoch": 0.735195530726257, "grad_norm": 0.5602216124534607, "learning_rate": 0.000966078431372549, "loss": 0.4602, "step": 1316 }, { "epoch": 0.7357541899441341, "grad_norm": 0.9406470656394958, "learning_rate": 0.0009660504201680672, "loss": 0.4209, "step": 1317 }, { "epoch": 0.7363128491620111, "grad_norm": 2.0616462230682373, "learning_rate": 0.0009660224089635855, "loss": 0.6191, "step": 1318 }, { "epoch": 0.7368715083798882, "grad_norm": 0.5870423316955566, "learning_rate": 0.0009659943977591037, "loss": 0.5135, "step": 1319 }, { "epoch": 0.7374301675977654, "grad_norm": 0.5533879995346069, "learning_rate": 0.0009659663865546219, "loss": 0.5163, "step": 1320 }, { "epoch": 0.7379888268156425, "grad_norm": 10.194575309753418, "learning_rate": 0.00096593837535014, "loss": 0.6292, "step": 1321 }, { "epoch": 0.7385474860335196, "grad_norm": 1.2345503568649292, "learning_rate": 0.0009659103641456582, "loss": 0.5805, "step": 1322 }, { "epoch": 0.7391061452513966, "grad_norm": 0.7731404900550842, "learning_rate": 0.0009658823529411765, "loss": 0.4614, "step": 1323 }, { "epoch": 0.7396648044692737, "grad_norm": 0.6412000060081482, "learning_rate": 0.0009658543417366948, "loss": 0.4785, "step": 1324 }, { "epoch": 0.7402234636871509, "grad_norm": 2.088176965713501, "learning_rate": 0.000965826330532213, "loss": 0.4535, "step": 1325 }, { "epoch": 0.740782122905028, "grad_norm": 0.6518339514732361, "learning_rate": 0.0009657983193277311, "loss": 0.4381, "step": 1326 }, { "epoch": 0.741340782122905, "grad_norm": 0.793572723865509, "learning_rate": 0.0009657703081232493, "loss": 0.3524, "step": 1327 }, { "epoch": 0.7418994413407821, "grad_norm": 0.8990557193756104, "learning_rate": 0.0009657422969187676, "loss": 0.3739, "step": 1328 }, { "epoch": 0.7424581005586592, "grad_norm": 1.6538406610488892, "learning_rate": 0.0009657142857142858, "loss": 0.6079, "step": 1329 }, { "epoch": 0.7430167597765364, "grad_norm": 0.6119401454925537, "learning_rate": 0.000965686274509804, "loss": 0.6339, "step": 1330 }, { "epoch": 0.7435754189944134, "grad_norm": 0.6402374505996704, "learning_rate": 0.0009656582633053221, "loss": 0.5539, "step": 1331 }, { "epoch": 0.7441340782122905, "grad_norm": 0.6331562399864197, "learning_rate": 0.0009656302521008403, "loss": 0.5754, "step": 1332 }, { "epoch": 0.7446927374301676, "grad_norm": 2.119746446609497, "learning_rate": 0.0009656022408963585, "loss": 0.4693, "step": 1333 }, { "epoch": 0.7452513966480447, "grad_norm": 0.9882779121398926, "learning_rate": 0.0009655742296918768, "loss": 0.5527, "step": 1334 }, { "epoch": 0.7458100558659218, "grad_norm": 0.5713686347007751, "learning_rate": 0.000965546218487395, "loss": 0.5648, "step": 1335 }, { "epoch": 0.7463687150837989, "grad_norm": 0.5363240242004395, "learning_rate": 0.0009655182072829132, "loss": 0.4486, "step": 1336 }, { "epoch": 0.746927374301676, "grad_norm": 0.482914537191391, "learning_rate": 0.0009654901960784313, "loss": 0.4789, "step": 1337 }, { "epoch": 0.7474860335195531, "grad_norm": 0.8363133668899536, "learning_rate": 0.0009654621848739495, "loss": 0.5066, "step": 1338 }, { "epoch": 0.7480446927374301, "grad_norm": 0.7073422074317932, "learning_rate": 0.0009654341736694678, "loss": 0.5763, "step": 1339 }, { "epoch": 0.7486033519553073, "grad_norm": 1.0467145442962646, "learning_rate": 0.000965406162464986, "loss": 0.5788, "step": 1340 }, { "epoch": 0.7491620111731844, "grad_norm": 3.773933172225952, "learning_rate": 0.0009653781512605043, "loss": 0.5293, "step": 1341 }, { "epoch": 0.7497206703910615, "grad_norm": 0.9196529388427734, "learning_rate": 0.0009653501400560224, "loss": 0.4952, "step": 1342 }, { "epoch": 0.7502793296089385, "grad_norm": 12.447409629821777, "learning_rate": 0.0009653221288515406, "loss": 0.4218, "step": 1343 }, { "epoch": 0.7508379888268156, "grad_norm": 1.0569573640823364, "learning_rate": 0.0009652941176470589, "loss": 0.5137, "step": 1344 }, { "epoch": 0.7513966480446927, "grad_norm": 0.6175000071525574, "learning_rate": 0.0009652661064425771, "loss": 0.5575, "step": 1345 }, { "epoch": 0.7519553072625699, "grad_norm": 2.4520671367645264, "learning_rate": 0.0009652380952380953, "loss": 0.4509, "step": 1346 }, { "epoch": 0.7525139664804469, "grad_norm": 0.5845544934272766, "learning_rate": 0.0009652100840336134, "loss": 0.5234, "step": 1347 }, { "epoch": 0.753072625698324, "grad_norm": 0.6283985376358032, "learning_rate": 0.0009651820728291316, "loss": 0.4814, "step": 1348 }, { "epoch": 0.7536312849162011, "grad_norm": 0.5294947624206543, "learning_rate": 0.0009651540616246499, "loss": 0.4875, "step": 1349 }, { "epoch": 0.7541899441340782, "grad_norm": 0.8415542840957642, "learning_rate": 0.0009651260504201681, "loss": 0.4588, "step": 1350 }, { "epoch": 0.7547486033519553, "grad_norm": 1.381120204925537, "learning_rate": 0.0009650980392156863, "loss": 0.5656, "step": 1351 }, { "epoch": 0.7553072625698324, "grad_norm": 1.033808708190918, "learning_rate": 0.0009650700280112045, "loss": 0.4942, "step": 1352 }, { "epoch": 0.7558659217877095, "grad_norm": 0.4895726144313812, "learning_rate": 0.0009650420168067226, "loss": 0.5065, "step": 1353 }, { "epoch": 0.7564245810055866, "grad_norm": 0.4510646462440491, "learning_rate": 0.0009650140056022409, "loss": 0.4549, "step": 1354 }, { "epoch": 0.7569832402234636, "grad_norm": 1.4920895099639893, "learning_rate": 0.0009649859943977591, "loss": 0.5378, "step": 1355 }, { "epoch": 0.7575418994413408, "grad_norm": 0.6821523904800415, "learning_rate": 0.0009649579831932773, "loss": 0.4238, "step": 1356 }, { "epoch": 0.7581005586592179, "grad_norm": 0.5303128361701965, "learning_rate": 0.0009649299719887956, "loss": 0.5003, "step": 1357 }, { "epoch": 0.758659217877095, "grad_norm": 0.560666024684906, "learning_rate": 0.0009649019607843136, "loss": 0.4822, "step": 1358 }, { "epoch": 0.759217877094972, "grad_norm": 0.6806285381317139, "learning_rate": 0.000964873949579832, "loss": 0.5673, "step": 1359 }, { "epoch": 0.7597765363128491, "grad_norm": 0.49648961424827576, "learning_rate": 0.0009648459383753502, "loss": 0.4474, "step": 1360 }, { "epoch": 0.7603351955307263, "grad_norm": 0.7954347133636475, "learning_rate": 0.0009648179271708684, "loss": 0.4837, "step": 1361 }, { "epoch": 0.7608938547486034, "grad_norm": 0.6850284934043884, "learning_rate": 0.0009647899159663866, "loss": 0.5586, "step": 1362 }, { "epoch": 0.7614525139664804, "grad_norm": 1.3656668663024902, "learning_rate": 0.0009647619047619047, "loss": 0.3773, "step": 1363 }, { "epoch": 0.7620111731843575, "grad_norm": 1.313139796257019, "learning_rate": 0.000964733893557423, "loss": 0.4479, "step": 1364 }, { "epoch": 0.7625698324022346, "grad_norm": 0.47272732853889465, "learning_rate": 0.0009647058823529412, "loss": 0.4587, "step": 1365 }, { "epoch": 0.7631284916201118, "grad_norm": 0.7108208537101746, "learning_rate": 0.0009646778711484594, "loss": 0.5143, "step": 1366 }, { "epoch": 0.7636871508379889, "grad_norm": 0.8293966054916382, "learning_rate": 0.0009646498599439776, "loss": 0.5347, "step": 1367 }, { "epoch": 0.7642458100558659, "grad_norm": 18.415096282958984, "learning_rate": 0.0009646218487394958, "loss": 0.5508, "step": 1368 }, { "epoch": 0.764804469273743, "grad_norm": 4.5069122314453125, "learning_rate": 0.000964593837535014, "loss": 0.4962, "step": 1369 }, { "epoch": 0.7653631284916201, "grad_norm": 4.609315872192383, "learning_rate": 0.0009645658263305322, "loss": 0.4428, "step": 1370 }, { "epoch": 0.7659217877094973, "grad_norm": 0.7481788396835327, "learning_rate": 0.0009645378151260504, "loss": 0.5505, "step": 1371 }, { "epoch": 0.7664804469273743, "grad_norm": 0.6795213222503662, "learning_rate": 0.0009645098039215686, "loss": 0.6345, "step": 1372 }, { "epoch": 0.7670391061452514, "grad_norm": 2.641211986541748, "learning_rate": 0.0009644817927170868, "loss": 0.6124, "step": 1373 }, { "epoch": 0.7675977653631285, "grad_norm": 0.9110129475593567, "learning_rate": 0.0009644537815126051, "loss": 0.5255, "step": 1374 }, { "epoch": 0.7681564245810056, "grad_norm": 0.8977395296096802, "learning_rate": 0.0009644257703081233, "loss": 0.5757, "step": 1375 }, { "epoch": 0.7687150837988826, "grad_norm": 1.586690068244934, "learning_rate": 0.0009643977591036415, "loss": 0.56, "step": 1376 }, { "epoch": 0.7692737430167598, "grad_norm": 0.4725937843322754, "learning_rate": 0.0009643697478991597, "loss": 0.461, "step": 1377 }, { "epoch": 0.7698324022346369, "grad_norm": 0.7157468795776367, "learning_rate": 0.0009643417366946779, "loss": 0.627, "step": 1378 }, { "epoch": 0.770391061452514, "grad_norm": 1.6860612630844116, "learning_rate": 0.0009643137254901961, "loss": 0.4543, "step": 1379 }, { "epoch": 0.770949720670391, "grad_norm": 0.9065467119216919, "learning_rate": 0.0009642857142857143, "loss": 0.7157, "step": 1380 }, { "epoch": 0.7715083798882681, "grad_norm": 1.117568850517273, "learning_rate": 0.0009642577030812325, "loss": 0.5059, "step": 1381 }, { "epoch": 0.7720670391061453, "grad_norm": 2.117650270462036, "learning_rate": 0.0009642296918767507, "loss": 0.6479, "step": 1382 }, { "epoch": 0.7726256983240224, "grad_norm": 0.47136661410331726, "learning_rate": 0.0009642016806722689, "loss": 0.4798, "step": 1383 }, { "epoch": 0.7731843575418994, "grad_norm": 0.6892040371894836, "learning_rate": 0.0009641736694677872, "loss": 0.4799, "step": 1384 }, { "epoch": 0.7737430167597765, "grad_norm": 1.518065094947815, "learning_rate": 0.0009641456582633053, "loss": 0.5524, "step": 1385 }, { "epoch": 0.7743016759776536, "grad_norm": 0.9349548816680908, "learning_rate": 0.0009641176470588235, "loss": 0.4479, "step": 1386 }, { "epoch": 0.7748603351955308, "grad_norm": 0.7361342906951904, "learning_rate": 0.0009640896358543417, "loss": 0.6852, "step": 1387 }, { "epoch": 0.7754189944134078, "grad_norm": 0.6144516468048096, "learning_rate": 0.0009640616246498599, "loss": 0.4628, "step": 1388 }, { "epoch": 0.7759776536312849, "grad_norm": 1.3548355102539062, "learning_rate": 0.0009640336134453783, "loss": 0.4708, "step": 1389 }, { "epoch": 0.776536312849162, "grad_norm": 0.6006221175193787, "learning_rate": 0.0009640056022408964, "loss": 0.5141, "step": 1390 }, { "epoch": 0.7770949720670391, "grad_norm": 1.1799458265304565, "learning_rate": 0.0009639775910364146, "loss": 0.5467, "step": 1391 }, { "epoch": 0.7776536312849162, "grad_norm": 1.314858078956604, "learning_rate": 0.0009639495798319328, "loss": 0.4201, "step": 1392 }, { "epoch": 0.7782122905027933, "grad_norm": 0.7050217390060425, "learning_rate": 0.000963921568627451, "loss": 0.5912, "step": 1393 }, { "epoch": 0.7787709497206704, "grad_norm": 0.8072057962417603, "learning_rate": 0.0009638935574229693, "loss": 0.7952, "step": 1394 }, { "epoch": 0.7793296089385475, "grad_norm": 0.7330551743507385, "learning_rate": 0.0009638655462184874, "loss": 0.6818, "step": 1395 }, { "epoch": 0.7798882681564245, "grad_norm": 4.893136024475098, "learning_rate": 0.0009638375350140056, "loss": 0.4728, "step": 1396 }, { "epoch": 0.7804469273743017, "grad_norm": 0.7894774079322815, "learning_rate": 0.0009638095238095238, "loss": 0.711, "step": 1397 }, { "epoch": 0.7810055865921788, "grad_norm": 0.9732863903045654, "learning_rate": 0.000963781512605042, "loss": 0.469, "step": 1398 }, { "epoch": 0.7815642458100559, "grad_norm": 0.8147786259651184, "learning_rate": 0.0009637535014005603, "loss": 0.5458, "step": 1399 }, { "epoch": 0.7821229050279329, "grad_norm": 0.816346287727356, "learning_rate": 0.0009637254901960785, "loss": 0.5431, "step": 1400 }, { "epoch": 0.78268156424581, "grad_norm": 0.7073534727096558, "learning_rate": 0.0009636974789915966, "loss": 0.5808, "step": 1401 }, { "epoch": 0.7832402234636872, "grad_norm": 0.7590193152427673, "learning_rate": 0.0009636694677871148, "loss": 0.4888, "step": 1402 }, { "epoch": 0.7837988826815643, "grad_norm": 8.355823516845703, "learning_rate": 0.000963641456582633, "loss": 0.3829, "step": 1403 }, { "epoch": 0.7843575418994413, "grad_norm": 0.7909501194953918, "learning_rate": 0.0009636134453781513, "loss": 0.4857, "step": 1404 }, { "epoch": 0.7849162011173184, "grad_norm": 0.8943551182746887, "learning_rate": 0.0009635854341736695, "loss": 0.6265, "step": 1405 }, { "epoch": 0.7854748603351955, "grad_norm": 0.6333885788917542, "learning_rate": 0.0009635574229691876, "loss": 0.4893, "step": 1406 }, { "epoch": 0.7860335195530727, "grad_norm": 0.7539492845535278, "learning_rate": 0.0009635294117647059, "loss": 0.5991, "step": 1407 }, { "epoch": 0.7865921787709497, "grad_norm": 0.5527846217155457, "learning_rate": 0.0009635014005602241, "loss": 0.5105, "step": 1408 }, { "epoch": 0.7871508379888268, "grad_norm": 0.827819287776947, "learning_rate": 0.0009634733893557424, "loss": 0.5239, "step": 1409 }, { "epoch": 0.7877094972067039, "grad_norm": 0.6314797401428223, "learning_rate": 0.0009634453781512606, "loss": 0.3903, "step": 1410 }, { "epoch": 0.788268156424581, "grad_norm": 4.768505096435547, "learning_rate": 0.0009634173669467787, "loss": 0.5414, "step": 1411 }, { "epoch": 0.788826815642458, "grad_norm": 1.0183470249176025, "learning_rate": 0.0009633893557422969, "loss": 0.4332, "step": 1412 }, { "epoch": 0.7893854748603352, "grad_norm": 0.769864559173584, "learning_rate": 0.0009633613445378151, "loss": 0.4566, "step": 1413 }, { "epoch": 0.7899441340782123, "grad_norm": Infinity, "learning_rate": 0.0009633613445378151, "loss": 0.5697, "step": 1414 }, { "epoch": 0.7905027932960894, "grad_norm": 0.5988388657569885, "learning_rate": 0.0009633333333333334, "loss": 0.3823, "step": 1415 }, { "epoch": 0.7910614525139665, "grad_norm": 0.9508142471313477, "learning_rate": 0.0009633053221288516, "loss": 0.5901, "step": 1416 }, { "epoch": 0.7916201117318435, "grad_norm": 0.6289923787117004, "learning_rate": 0.0009632773109243698, "loss": 0.4294, "step": 1417 }, { "epoch": 0.7921787709497207, "grad_norm": 1.3091530799865723, "learning_rate": 0.0009632492997198879, "loss": 0.4808, "step": 1418 }, { "epoch": 0.7927374301675978, "grad_norm": 1.4104604721069336, "learning_rate": 0.0009632212885154061, "loss": 0.5711, "step": 1419 }, { "epoch": 0.7932960893854749, "grad_norm": 0.5860559940338135, "learning_rate": 0.0009631932773109244, "loss": 0.5183, "step": 1420 }, { "epoch": 0.7938547486033519, "grad_norm": 0.6045873761177063, "learning_rate": 0.0009631652661064426, "loss": 0.4496, "step": 1421 }, { "epoch": 0.794413407821229, "grad_norm": 0.5462859869003296, "learning_rate": 0.0009631372549019608, "loss": 0.4219, "step": 1422 }, { "epoch": 0.7949720670391062, "grad_norm": 0.7901493906974792, "learning_rate": 0.0009631092436974789, "loss": 0.6415, "step": 1423 }, { "epoch": 0.7955307262569833, "grad_norm": 0.5696995854377747, "learning_rate": 0.0009630812324929971, "loss": 0.4359, "step": 1424 }, { "epoch": 0.7960893854748603, "grad_norm": 0.8355426788330078, "learning_rate": 0.0009630532212885155, "loss": 0.5583, "step": 1425 }, { "epoch": 0.7966480446927374, "grad_norm": 0.7962026000022888, "learning_rate": 0.0009630252100840337, "loss": 0.4758, "step": 1426 }, { "epoch": 0.7972067039106145, "grad_norm": 9.704211235046387, "learning_rate": 0.0009629971988795519, "loss": 0.579, "step": 1427 }, { "epoch": 0.7977653631284917, "grad_norm": 1.9955533742904663, "learning_rate": 0.00096296918767507, "loss": 0.5201, "step": 1428 }, { "epoch": 0.7983240223463687, "grad_norm": 0.8000580072402954, "learning_rate": 0.0009629411764705882, "loss": 0.7567, "step": 1429 }, { "epoch": 0.7988826815642458, "grad_norm": 0.9265885949134827, "learning_rate": 0.0009629131652661065, "loss": 0.5148, "step": 1430 }, { "epoch": 0.7994413407821229, "grad_norm": 0.6476082801818848, "learning_rate": 0.0009628851540616247, "loss": 0.5419, "step": 1431 }, { "epoch": 0.8, "grad_norm": 0.5342894792556763, "learning_rate": 0.0009628571428571429, "loss": 0.4456, "step": 1432 }, { "epoch": 0.8005586592178771, "grad_norm": 0.6798768043518066, "learning_rate": 0.0009628291316526611, "loss": 0.569, "step": 1433 }, { "epoch": 0.8011173184357542, "grad_norm": 0.45200982689857483, "learning_rate": 0.0009628011204481792, "loss": 0.4152, "step": 1434 }, { "epoch": 0.8016759776536313, "grad_norm": 0.7450093030929565, "learning_rate": 0.0009627731092436975, "loss": 0.4831, "step": 1435 }, { "epoch": 0.8022346368715084, "grad_norm": 0.3933943212032318, "learning_rate": 0.0009627450980392157, "loss": 0.3844, "step": 1436 }, { "epoch": 0.8027932960893854, "grad_norm": 0.599287211894989, "learning_rate": 0.0009627170868347339, "loss": 0.5188, "step": 1437 }, { "epoch": 0.8033519553072626, "grad_norm": 0.7025023698806763, "learning_rate": 0.0009626890756302521, "loss": 0.5235, "step": 1438 }, { "epoch": 0.8039106145251397, "grad_norm": 0.6847041249275208, "learning_rate": 0.0009626610644257702, "loss": 0.601, "step": 1439 }, { "epoch": 0.8044692737430168, "grad_norm": 0.6263582110404968, "learning_rate": 0.0009626330532212886, "loss": 0.4547, "step": 1440 }, { "epoch": 0.8050279329608938, "grad_norm": 0.5411430597305298, "learning_rate": 0.0009626050420168068, "loss": 0.4323, "step": 1441 }, { "epoch": 0.8055865921787709, "grad_norm": 0.7263736128807068, "learning_rate": 0.000962577030812325, "loss": 0.6322, "step": 1442 }, { "epoch": 0.806145251396648, "grad_norm": 0.6641649007797241, "learning_rate": 0.0009625490196078432, "loss": 0.4404, "step": 1443 }, { "epoch": 0.8067039106145252, "grad_norm": 0.5103501677513123, "learning_rate": 0.0009625210084033613, "loss": 0.4959, "step": 1444 }, { "epoch": 0.8072625698324022, "grad_norm": 0.6319855451583862, "learning_rate": 0.0009624929971988796, "loss": 0.6303, "step": 1445 }, { "epoch": 0.8078212290502793, "grad_norm": 0.37431323528289795, "learning_rate": 0.0009624649859943978, "loss": 0.3968, "step": 1446 }, { "epoch": 0.8083798882681564, "grad_norm": 1.0800323486328125, "learning_rate": 0.000962436974789916, "loss": 0.6015, "step": 1447 }, { "epoch": 0.8089385474860336, "grad_norm": 0.5986151099205017, "learning_rate": 0.0009624089635854342, "loss": 0.5188, "step": 1448 }, { "epoch": 0.8094972067039106, "grad_norm": 0.6418584585189819, "learning_rate": 0.0009623809523809524, "loss": 0.5892, "step": 1449 }, { "epoch": 0.8100558659217877, "grad_norm": 0.598464846611023, "learning_rate": 0.0009623529411764706, "loss": 0.5116, "step": 1450 }, { "epoch": 0.8106145251396648, "grad_norm": 0.6212530136108398, "learning_rate": 0.0009623249299719888, "loss": 0.4941, "step": 1451 }, { "epoch": 0.8111731843575419, "grad_norm": 1.3260297775268555, "learning_rate": 0.000962296918767507, "loss": 0.4667, "step": 1452 }, { "epoch": 0.8117318435754189, "grad_norm": 0.5975127816200256, "learning_rate": 0.0009622689075630252, "loss": 0.5124, "step": 1453 }, { "epoch": 0.8122905027932961, "grad_norm": 0.5148320198059082, "learning_rate": 0.0009622408963585434, "loss": 0.4867, "step": 1454 }, { "epoch": 0.8128491620111732, "grad_norm": 0.6139815449714661, "learning_rate": 0.0009622128851540616, "loss": 0.4463, "step": 1455 }, { "epoch": 0.8134078212290503, "grad_norm": 0.8188477158546448, "learning_rate": 0.0009621848739495798, "loss": 0.5896, "step": 1456 }, { "epoch": 0.8139664804469273, "grad_norm": 0.7311028838157654, "learning_rate": 0.0009621568627450981, "loss": 0.5491, "step": 1457 }, { "epoch": 0.8145251396648044, "grad_norm": 0.4400715231895447, "learning_rate": 0.0009621288515406163, "loss": 0.41, "step": 1458 }, { "epoch": 0.8150837988826816, "grad_norm": 1.0677579641342163, "learning_rate": 0.0009621008403361345, "loss": 0.4221, "step": 1459 }, { "epoch": 0.8156424581005587, "grad_norm": 0.7484898567199707, "learning_rate": 0.0009620728291316528, "loss": 0.6855, "step": 1460 }, { "epoch": 0.8162011173184358, "grad_norm": 0.4938328266143799, "learning_rate": 0.0009620448179271709, "loss": 0.4868, "step": 1461 }, { "epoch": 0.8167597765363128, "grad_norm": 0.7591249346733093, "learning_rate": 0.0009620168067226891, "loss": 0.4954, "step": 1462 }, { "epoch": 0.8173184357541899, "grad_norm": 0.6450791358947754, "learning_rate": 0.0009619887955182073, "loss": 0.5415, "step": 1463 }, { "epoch": 0.8178770949720671, "grad_norm": 0.5208788514137268, "learning_rate": 0.0009619607843137255, "loss": 0.4769, "step": 1464 }, { "epoch": 0.8184357541899442, "grad_norm": 1.159798264503479, "learning_rate": 0.0009619327731092438, "loss": 0.7681, "step": 1465 }, { "epoch": 0.8189944134078212, "grad_norm": 0.8069193363189697, "learning_rate": 0.0009619047619047619, "loss": 0.5153, "step": 1466 }, { "epoch": 0.8195530726256983, "grad_norm": 0.6228049397468567, "learning_rate": 0.0009618767507002801, "loss": 0.6156, "step": 1467 }, { "epoch": 0.8201117318435754, "grad_norm": 0.8810644745826721, "learning_rate": 0.0009618487394957983, "loss": 0.648, "step": 1468 }, { "epoch": 0.8206703910614526, "grad_norm": 1.2396124601364136, "learning_rate": 0.0009618207282913165, "loss": 0.4615, "step": 1469 }, { "epoch": 0.8212290502793296, "grad_norm": 0.7651035189628601, "learning_rate": 0.0009617927170868348, "loss": 0.5078, "step": 1470 }, { "epoch": 0.8217877094972067, "grad_norm": 0.5683399438858032, "learning_rate": 0.0009617647058823529, "loss": 0.5043, "step": 1471 }, { "epoch": 0.8223463687150838, "grad_norm": 2.3346378803253174, "learning_rate": 0.0009617366946778711, "loss": 0.5614, "step": 1472 }, { "epoch": 0.8229050279329609, "grad_norm": 0.4906153976917267, "learning_rate": 0.0009617086834733894, "loss": 0.4891, "step": 1473 }, { "epoch": 0.823463687150838, "grad_norm": 1.4745047092437744, "learning_rate": 0.0009616806722689076, "loss": 0.3869, "step": 1474 }, { "epoch": 0.8240223463687151, "grad_norm": 0.8142638802528381, "learning_rate": 0.0009616526610644259, "loss": 0.6038, "step": 1475 }, { "epoch": 0.8245810055865922, "grad_norm": 0.8037557005882263, "learning_rate": 0.0009616246498599441, "loss": 0.4948, "step": 1476 }, { "epoch": 0.8251396648044693, "grad_norm": 3.388263463973999, "learning_rate": 0.0009615966386554622, "loss": 0.4969, "step": 1477 }, { "epoch": 0.8256983240223463, "grad_norm": 4.625975131988525, "learning_rate": 0.0009615686274509804, "loss": 0.5537, "step": 1478 }, { "epoch": 0.8262569832402235, "grad_norm": 1.5438491106033325, "learning_rate": 0.0009615406162464986, "loss": 0.499, "step": 1479 }, { "epoch": 0.8268156424581006, "grad_norm": 0.7401479482650757, "learning_rate": 0.0009615126050420169, "loss": 0.5007, "step": 1480 }, { "epoch": 0.8273743016759777, "grad_norm": 0.6178761124610901, "learning_rate": 0.0009614845938375351, "loss": 0.5297, "step": 1481 }, { "epoch": 0.8279329608938547, "grad_norm": 2.41516375541687, "learning_rate": 0.0009614565826330532, "loss": 0.5436, "step": 1482 }, { "epoch": 0.8284916201117318, "grad_norm": 0.5793017745018005, "learning_rate": 0.0009614285714285714, "loss": 0.5233, "step": 1483 }, { "epoch": 0.829050279329609, "grad_norm": 1.0162672996520996, "learning_rate": 0.0009614005602240896, "loss": 0.5452, "step": 1484 }, { "epoch": 0.8296089385474861, "grad_norm": 0.6787902116775513, "learning_rate": 0.0009613725490196079, "loss": 0.6462, "step": 1485 }, { "epoch": 0.8301675977653631, "grad_norm": 0.5842154622077942, "learning_rate": 0.0009613445378151261, "loss": 0.6168, "step": 1486 }, { "epoch": 0.8307262569832402, "grad_norm": 0.534328818321228, "learning_rate": 0.0009613165266106442, "loss": 0.5113, "step": 1487 }, { "epoch": 0.8312849162011173, "grad_norm": 1.2192753553390503, "learning_rate": 0.0009612885154061624, "loss": 0.4032, "step": 1488 }, { "epoch": 0.8318435754189945, "grad_norm": 0.875845193862915, "learning_rate": 0.0009612605042016806, "loss": 0.5181, "step": 1489 }, { "epoch": 0.8324022346368715, "grad_norm": 0.49047836661338806, "learning_rate": 0.000961232492997199, "loss": 0.4514, "step": 1490 }, { "epoch": 0.8329608938547486, "grad_norm": 0.4537179470062256, "learning_rate": 0.0009612044817927172, "loss": 0.4166, "step": 1491 }, { "epoch": 0.8335195530726257, "grad_norm": 0.5758245587348938, "learning_rate": 0.0009611764705882354, "loss": 0.4441, "step": 1492 }, { "epoch": 0.8340782122905028, "grad_norm": 1.1364498138427734, "learning_rate": 0.0009611484593837535, "loss": 0.5217, "step": 1493 }, { "epoch": 0.8346368715083798, "grad_norm": 0.6305497884750366, "learning_rate": 0.0009611204481792717, "loss": 0.5642, "step": 1494 }, { "epoch": 0.835195530726257, "grad_norm": 0.4967428147792816, "learning_rate": 0.00096109243697479, "loss": 0.4663, "step": 1495 }, { "epoch": 0.8357541899441341, "grad_norm": 5.082947731018066, "learning_rate": 0.0009610644257703082, "loss": 0.6391, "step": 1496 }, { "epoch": 0.8363128491620112, "grad_norm": 1.2604416608810425, "learning_rate": 0.0009610364145658264, "loss": 0.4479, "step": 1497 }, { "epoch": 0.8368715083798882, "grad_norm": 20.05978775024414, "learning_rate": 0.0009610084033613445, "loss": 0.6632, "step": 1498 }, { "epoch": 0.8374301675977653, "grad_norm": 4.604385852813721, "learning_rate": 0.0009609803921568627, "loss": 0.4911, "step": 1499 }, { "epoch": 0.8379888268156425, "grad_norm": 1.2543137073516846, "learning_rate": 0.000960952380952381, "loss": 0.5289, "step": 1500 }, { "epoch": 0.8379888268156425, "eval_cer": 0.09844740488581934, "eval_loss": 0.37865063548088074, "eval_runtime": 55.249, "eval_samples_per_second": 82.137, "eval_steps_per_second": 5.14, "eval_wer": 0.3857154828077428, "step": 1500 }, { "epoch": 0.8385474860335196, "grad_norm": 0.6591171622276306, "learning_rate": 0.0009609243697478992, "loss": 0.5223, "step": 1501 }, { "epoch": 0.8391061452513966, "grad_norm": 0.8398429155349731, "learning_rate": 0.0009608963585434174, "loss": 0.7085, "step": 1502 }, { "epoch": 0.8396648044692737, "grad_norm": 0.5711323618888855, "learning_rate": 0.0009608683473389355, "loss": 0.4737, "step": 1503 }, { "epoch": 0.8402234636871508, "grad_norm": 0.5332566499710083, "learning_rate": 0.0009608403361344537, "loss": 0.5858, "step": 1504 }, { "epoch": 0.840782122905028, "grad_norm": 0.7061896920204163, "learning_rate": 0.000960812324929972, "loss": 0.4925, "step": 1505 }, { "epoch": 0.841340782122905, "grad_norm": 0.5304086804389954, "learning_rate": 0.0009607843137254903, "loss": 0.4686, "step": 1506 }, { "epoch": 0.8418994413407821, "grad_norm": 2.148744821548462, "learning_rate": 0.0009607563025210085, "loss": 0.4337, "step": 1507 }, { "epoch": 0.8424581005586592, "grad_norm": 0.8156803846359253, "learning_rate": 0.0009607282913165267, "loss": 0.438, "step": 1508 }, { "epoch": 0.8430167597765363, "grad_norm": 1.305841088294983, "learning_rate": 0.0009607002801120448, "loss": 0.5863, "step": 1509 }, { "epoch": 0.8435754189944135, "grad_norm": 0.47048819065093994, "learning_rate": 0.0009606722689075631, "loss": 0.5107, "step": 1510 }, { "epoch": 0.8441340782122905, "grad_norm": 0.4968303143978119, "learning_rate": 0.0009606442577030813, "loss": 0.4913, "step": 1511 }, { "epoch": 0.8446927374301676, "grad_norm": 0.7322350144386292, "learning_rate": 0.0009606162464985995, "loss": 0.567, "step": 1512 }, { "epoch": 0.8452513966480447, "grad_norm": 0.6126371026039124, "learning_rate": 0.0009605882352941177, "loss": 0.4864, "step": 1513 }, { "epoch": 0.8458100558659218, "grad_norm": 0.732340395450592, "learning_rate": 0.0009605602240896358, "loss": 0.5562, "step": 1514 }, { "epoch": 0.8463687150837989, "grad_norm": 0.4569789171218872, "learning_rate": 0.0009605322128851541, "loss": 0.4622, "step": 1515 }, { "epoch": 0.846927374301676, "grad_norm": 0.9417468905448914, "learning_rate": 0.0009605042016806723, "loss": 0.6526, "step": 1516 }, { "epoch": 0.8474860335195531, "grad_norm": 0.9558619260787964, "learning_rate": 0.0009604761904761905, "loss": 0.551, "step": 1517 }, { "epoch": 0.8480446927374302, "grad_norm": 0.900699257850647, "learning_rate": 0.0009604481792717087, "loss": 0.5343, "step": 1518 }, { "epoch": 0.8486033519553072, "grad_norm": 1.0645067691802979, "learning_rate": 0.0009604201680672268, "loss": 0.4961, "step": 1519 }, { "epoch": 0.8491620111731844, "grad_norm": 0.43453070521354675, "learning_rate": 0.0009603921568627451, "loss": 0.4925, "step": 1520 }, { "epoch": 0.8497206703910615, "grad_norm": 0.6701645851135254, "learning_rate": 0.0009603641456582633, "loss": 0.5334, "step": 1521 }, { "epoch": 0.8502793296089386, "grad_norm": 0.47887933254241943, "learning_rate": 0.0009603361344537816, "loss": 0.4878, "step": 1522 }, { "epoch": 0.8508379888268156, "grad_norm": 0.48265787959098816, "learning_rate": 0.0009603081232492998, "loss": 0.4657, "step": 1523 }, { "epoch": 0.8513966480446927, "grad_norm": 0.6489775776863098, "learning_rate": 0.000960280112044818, "loss": 0.4002, "step": 1524 }, { "epoch": 0.8519553072625698, "grad_norm": 0.7259930372238159, "learning_rate": 0.0009602521008403362, "loss": 0.4652, "step": 1525 }, { "epoch": 0.852513966480447, "grad_norm": 1.5105582475662231, "learning_rate": 0.0009602240896358544, "loss": 0.7282, "step": 1526 }, { "epoch": 0.853072625698324, "grad_norm": 0.6706119775772095, "learning_rate": 0.0009601960784313726, "loss": 0.6042, "step": 1527 }, { "epoch": 0.8536312849162011, "grad_norm": 0.4488590359687805, "learning_rate": 0.0009601680672268908, "loss": 0.497, "step": 1528 }, { "epoch": 0.8541899441340782, "grad_norm": 7.585607528686523, "learning_rate": 0.000960140056022409, "loss": 0.4696, "step": 1529 }, { "epoch": 0.8547486033519553, "grad_norm": 0.49235406517982483, "learning_rate": 0.0009601120448179272, "loss": 0.4489, "step": 1530 }, { "epoch": 0.8553072625698324, "grad_norm": 1.6137057542800903, "learning_rate": 0.0009600840336134454, "loss": 0.4593, "step": 1531 }, { "epoch": 0.8558659217877095, "grad_norm": 0.6306363344192505, "learning_rate": 0.0009600560224089636, "loss": 0.4695, "step": 1532 }, { "epoch": 0.8564245810055866, "grad_norm": 1.7941296100616455, "learning_rate": 0.0009600280112044818, "loss": 0.4729, "step": 1533 }, { "epoch": 0.8569832402234637, "grad_norm": 0.74614417552948, "learning_rate": 0.00096, "loss": 0.4275, "step": 1534 }, { "epoch": 0.8575418994413407, "grad_norm": 0.6001713871955872, "learning_rate": 0.0009599719887955182, "loss": 0.469, "step": 1535 }, { "epoch": 0.8581005586592179, "grad_norm": 0.963833212852478, "learning_rate": 0.0009599439775910364, "loss": 0.5154, "step": 1536 }, { "epoch": 0.858659217877095, "grad_norm": 1.13657546043396, "learning_rate": 0.0009599159663865546, "loss": 0.5546, "step": 1537 }, { "epoch": 0.8592178770949721, "grad_norm": 0.5945402383804321, "learning_rate": 0.0009598879551820728, "loss": 0.4922, "step": 1538 }, { "epoch": 0.8597765363128491, "grad_norm": 3.436249017715454, "learning_rate": 0.0009598599439775911, "loss": 0.567, "step": 1539 }, { "epoch": 0.8603351955307262, "grad_norm": 0.5806109309196472, "learning_rate": 0.0009598319327731094, "loss": 0.4277, "step": 1540 }, { "epoch": 0.8608938547486034, "grad_norm": 5.576815605163574, "learning_rate": 0.0009598039215686275, "loss": 0.4417, "step": 1541 }, { "epoch": 0.8614525139664805, "grad_norm": 1.2157827615737915, "learning_rate": 0.0009597759103641457, "loss": 0.4929, "step": 1542 }, { "epoch": 0.8620111731843575, "grad_norm": 0.614841103553772, "learning_rate": 0.0009597478991596639, "loss": 0.5091, "step": 1543 }, { "epoch": 0.8625698324022346, "grad_norm": 0.5197077989578247, "learning_rate": 0.0009597198879551821, "loss": 0.3851, "step": 1544 }, { "epoch": 0.8631284916201117, "grad_norm": 0.4492049813270569, "learning_rate": 0.0009596918767507004, "loss": 0.4737, "step": 1545 }, { "epoch": 0.8636871508379889, "grad_norm": 2.4102890491485596, "learning_rate": 0.0009596638655462185, "loss": 0.439, "step": 1546 }, { "epoch": 0.8642458100558659, "grad_norm": 0.7981939315795898, "learning_rate": 0.0009596358543417367, "loss": 0.6839, "step": 1547 }, { "epoch": 0.864804469273743, "grad_norm": 0.5832147598266602, "learning_rate": 0.0009596078431372549, "loss": 0.5052, "step": 1548 }, { "epoch": 0.8653631284916201, "grad_norm": 0.976466178894043, "learning_rate": 0.0009595798319327731, "loss": 0.3913, "step": 1549 }, { "epoch": 0.8659217877094972, "grad_norm": 1.053292155265808, "learning_rate": 0.0009595518207282914, "loss": 0.554, "step": 1550 }, { "epoch": 0.8664804469273742, "grad_norm": 1.20173978805542, "learning_rate": 0.0009595238095238095, "loss": 0.5341, "step": 1551 }, { "epoch": 0.8670391061452514, "grad_norm": 0.8238740563392639, "learning_rate": 0.0009594957983193277, "loss": 0.5558, "step": 1552 }, { "epoch": 0.8675977653631285, "grad_norm": 0.5024362802505493, "learning_rate": 0.0009594677871148459, "loss": 0.4121, "step": 1553 }, { "epoch": 0.8681564245810056, "grad_norm": 0.7707139253616333, "learning_rate": 0.0009594397759103641, "loss": 0.4614, "step": 1554 }, { "epoch": 0.8687150837988827, "grad_norm": 0.5292174220085144, "learning_rate": 0.0009594117647058825, "loss": 0.4525, "step": 1555 }, { "epoch": 0.8692737430167597, "grad_norm": 0.7752367854118347, "learning_rate": 0.0009593837535014007, "loss": 0.4536, "step": 1556 }, { "epoch": 0.8698324022346369, "grad_norm": 0.5903460383415222, "learning_rate": 0.0009593557422969188, "loss": 0.4733, "step": 1557 }, { "epoch": 0.870391061452514, "grad_norm": 1.443812608718872, "learning_rate": 0.000959327731092437, "loss": 0.7236, "step": 1558 }, { "epoch": 0.8709497206703911, "grad_norm": 0.6632566452026367, "learning_rate": 0.0009592997198879552, "loss": 0.5525, "step": 1559 }, { "epoch": 0.8715083798882681, "grad_norm": 0.5941494107246399, "learning_rate": 0.0009592717086834734, "loss": 0.4692, "step": 1560 }, { "epoch": 0.8720670391061452, "grad_norm": 0.42596128582954407, "learning_rate": 0.0009592436974789917, "loss": 0.3983, "step": 1561 }, { "epoch": 0.8726256983240224, "grad_norm": 0.7327626347541809, "learning_rate": 0.0009592156862745098, "loss": 0.5121, "step": 1562 }, { "epoch": 0.8731843575418995, "grad_norm": 2.932777166366577, "learning_rate": 0.000959187675070028, "loss": 0.4547, "step": 1563 }, { "epoch": 0.8737430167597765, "grad_norm": 0.5944148898124695, "learning_rate": 0.0009591596638655462, "loss": 0.6036, "step": 1564 }, { "epoch": 0.8743016759776536, "grad_norm": 0.6992055177688599, "learning_rate": 0.0009591316526610644, "loss": 0.5199, "step": 1565 }, { "epoch": 0.8748603351955307, "grad_norm": 0.7510950565338135, "learning_rate": 0.0009591036414565827, "loss": 0.6397, "step": 1566 }, { "epoch": 0.8754189944134079, "grad_norm": 0.6152811646461487, "learning_rate": 0.0009590756302521008, "loss": 0.511, "step": 1567 }, { "epoch": 0.8759776536312849, "grad_norm": 0.5206465721130371, "learning_rate": 0.000959047619047619, "loss": 0.4913, "step": 1568 }, { "epoch": 0.876536312849162, "grad_norm": 1.9374691247940063, "learning_rate": 0.0009590196078431372, "loss": 0.9544, "step": 1569 }, { "epoch": 0.8770949720670391, "grad_norm": 0.9915938377380371, "learning_rate": 0.0009589915966386554, "loss": 0.5381, "step": 1570 }, { "epoch": 0.8776536312849162, "grad_norm": 1.0157009363174438, "learning_rate": 0.0009589635854341738, "loss": 0.4593, "step": 1571 }, { "epoch": 0.8782122905027933, "grad_norm": 0.7500250339508057, "learning_rate": 0.000958935574229692, "loss": 0.5349, "step": 1572 }, { "epoch": 0.8787709497206704, "grad_norm": 0.6948181986808777, "learning_rate": 0.0009589075630252101, "loss": 0.5535, "step": 1573 }, { "epoch": 0.8793296089385475, "grad_norm": 0.7886123657226562, "learning_rate": 0.0009588795518207283, "loss": 0.5553, "step": 1574 }, { "epoch": 0.8798882681564246, "grad_norm": 0.46402987837791443, "learning_rate": 0.0009588515406162465, "loss": 0.4221, "step": 1575 }, { "epoch": 0.8804469273743016, "grad_norm": 0.9824563264846802, "learning_rate": 0.0009588235294117648, "loss": 0.5114, "step": 1576 }, { "epoch": 0.8810055865921788, "grad_norm": 2.5311214923858643, "learning_rate": 0.000958795518207283, "loss": 0.4866, "step": 1577 }, { "epoch": 0.8815642458100559, "grad_norm": 4.942215442657471, "learning_rate": 0.0009587675070028011, "loss": 0.6619, "step": 1578 }, { "epoch": 0.882122905027933, "grad_norm": 1.346177577972412, "learning_rate": 0.0009587394957983193, "loss": 0.5644, "step": 1579 }, { "epoch": 0.88268156424581, "grad_norm": 0.5623987317085266, "learning_rate": 0.0009587114845938375, "loss": 0.5696, "step": 1580 }, { "epoch": 0.8832402234636871, "grad_norm": 0.5278266668319702, "learning_rate": 0.0009586834733893558, "loss": 0.5945, "step": 1581 }, { "epoch": 0.8837988826815643, "grad_norm": 0.5834859013557434, "learning_rate": 0.000958655462184874, "loss": 0.4662, "step": 1582 }, { "epoch": 0.8843575418994414, "grad_norm": 1.6163126230239868, "learning_rate": 0.0009586274509803921, "loss": 0.4954, "step": 1583 }, { "epoch": 0.8849162011173184, "grad_norm": 0.6733283996582031, "learning_rate": 0.0009585994397759103, "loss": 0.4711, "step": 1584 }, { "epoch": 0.8854748603351955, "grad_norm": 0.5481908917427063, "learning_rate": 0.0009585714285714285, "loss": 0.5297, "step": 1585 }, { "epoch": 0.8860335195530726, "grad_norm": 1.2589175701141357, "learning_rate": 0.0009585434173669468, "loss": 0.5592, "step": 1586 }, { "epoch": 0.8865921787709498, "grad_norm": 0.6359618306159973, "learning_rate": 0.000958515406162465, "loss": 0.5307, "step": 1587 }, { "epoch": 0.8871508379888268, "grad_norm": 3.2101893424987793, "learning_rate": 0.0009584873949579833, "loss": 0.4774, "step": 1588 }, { "epoch": 0.8877094972067039, "grad_norm": 0.7876462936401367, "learning_rate": 0.0009584593837535014, "loss": 0.5563, "step": 1589 }, { "epoch": 0.888268156424581, "grad_norm": 0.5258017778396606, "learning_rate": 0.0009584313725490196, "loss": 0.4406, "step": 1590 }, { "epoch": 0.8888268156424581, "grad_norm": 0.7677879929542542, "learning_rate": 0.0009584033613445379, "loss": 0.5258, "step": 1591 }, { "epoch": 0.8893854748603351, "grad_norm": 3.9905264377593994, "learning_rate": 0.0009583753501400561, "loss": 0.465, "step": 1592 }, { "epoch": 0.8899441340782123, "grad_norm": 1.254267692565918, "learning_rate": 0.0009583473389355743, "loss": 0.4444, "step": 1593 }, { "epoch": 0.8905027932960894, "grad_norm": 0.5092368125915527, "learning_rate": 0.0009583193277310924, "loss": 0.3964, "step": 1594 }, { "epoch": 0.8910614525139665, "grad_norm": 0.6793869137763977, "learning_rate": 0.0009582913165266106, "loss": 0.5527, "step": 1595 }, { "epoch": 0.8916201117318435, "grad_norm": 0.542073130607605, "learning_rate": 0.0009582633053221289, "loss": 0.5264, "step": 1596 }, { "epoch": 0.8921787709497206, "grad_norm": 0.5154288411140442, "learning_rate": 0.0009582352941176471, "loss": 0.4487, "step": 1597 }, { "epoch": 0.8927374301675978, "grad_norm": 0.7843811511993408, "learning_rate": 0.0009582072829131653, "loss": 0.5066, "step": 1598 }, { "epoch": 0.8932960893854749, "grad_norm": 0.7047399282455444, "learning_rate": 0.0009581792717086834, "loss": 0.5834, "step": 1599 }, { "epoch": 0.8938547486033519, "grad_norm": 0.6373627781867981, "learning_rate": 0.0009581512605042016, "loss": 0.4788, "step": 1600 }, { "epoch": 0.894413407821229, "grad_norm": 0.9333066344261169, "learning_rate": 0.0009581232492997199, "loss": 0.4882, "step": 1601 }, { "epoch": 0.8949720670391061, "grad_norm": 0.8540565967559814, "learning_rate": 0.0009580952380952381, "loss": 0.5884, "step": 1602 }, { "epoch": 0.8955307262569833, "grad_norm": 0.5950315594673157, "learning_rate": 0.0009580672268907563, "loss": 0.6124, "step": 1603 }, { "epoch": 0.8960893854748604, "grad_norm": 1.0109556913375854, "learning_rate": 0.0009580392156862746, "loss": 0.5261, "step": 1604 }, { "epoch": 0.8966480446927374, "grad_norm": 1.743308663368225, "learning_rate": 0.0009580112044817926, "loss": 0.442, "step": 1605 }, { "epoch": 0.8972067039106145, "grad_norm": 0.5196111798286438, "learning_rate": 0.000957983193277311, "loss": 0.4999, "step": 1606 }, { "epoch": 0.8977653631284916, "grad_norm": 1.3952412605285645, "learning_rate": 0.0009579551820728292, "loss": 0.5323, "step": 1607 }, { "epoch": 0.8983240223463688, "grad_norm": 0.7273664474487305, "learning_rate": 0.0009579271708683474, "loss": 0.5089, "step": 1608 }, { "epoch": 0.8988826815642458, "grad_norm": 0.6543103456497192, "learning_rate": 0.0009578991596638656, "loss": 0.4647, "step": 1609 }, { "epoch": 0.8994413407821229, "grad_norm": 1.605989694595337, "learning_rate": 0.0009578711484593837, "loss": 0.5853, "step": 1610 }, { "epoch": 0.9, "grad_norm": 0.6041949987411499, "learning_rate": 0.000957843137254902, "loss": 0.4679, "step": 1611 }, { "epoch": 0.9005586592178771, "grad_norm": 0.6679384112358093, "learning_rate": 0.0009578151260504202, "loss": 0.5184, "step": 1612 }, { "epoch": 0.9011173184357542, "grad_norm": 1.1944327354431152, "learning_rate": 0.0009577871148459384, "loss": 0.4659, "step": 1613 }, { "epoch": 0.9016759776536313, "grad_norm": 4.223563194274902, "learning_rate": 0.0009577591036414566, "loss": 0.7264, "step": 1614 }, { "epoch": 0.9022346368715084, "grad_norm": 1.6066884994506836, "learning_rate": 0.0009577310924369747, "loss": 0.4625, "step": 1615 }, { "epoch": 0.9027932960893855, "grad_norm": 0.6855906248092651, "learning_rate": 0.000957703081232493, "loss": 0.4257, "step": 1616 }, { "epoch": 0.9033519553072625, "grad_norm": 0.42809733748435974, "learning_rate": 0.0009576750700280112, "loss": 0.4092, "step": 1617 }, { "epoch": 0.9039106145251397, "grad_norm": 0.7035035490989685, "learning_rate": 0.0009576470588235294, "loss": 0.4174, "step": 1618 }, { "epoch": 0.9044692737430168, "grad_norm": 0.6464139223098755, "learning_rate": 0.0009576190476190476, "loss": 0.512, "step": 1619 }, { "epoch": 0.9050279329608939, "grad_norm": 0.48409298062324524, "learning_rate": 0.0009575910364145658, "loss": 0.456, "step": 1620 }, { "epoch": 0.9055865921787709, "grad_norm": 0.4653385281562805, "learning_rate": 0.0009575630252100841, "loss": 0.4392, "step": 1621 }, { "epoch": 0.906145251396648, "grad_norm": 0.5446961522102356, "learning_rate": 0.0009575350140056023, "loss": 0.548, "step": 1622 }, { "epoch": 0.9067039106145252, "grad_norm": 0.5252688527107239, "learning_rate": 0.0009575070028011205, "loss": 0.442, "step": 1623 }, { "epoch": 0.9072625698324023, "grad_norm": 0.6384592652320862, "learning_rate": 0.0009574789915966387, "loss": 0.4584, "step": 1624 }, { "epoch": 0.9078212290502793, "grad_norm": 0.6891541481018066, "learning_rate": 0.0009574509803921569, "loss": 0.5276, "step": 1625 }, { "epoch": 0.9083798882681564, "grad_norm": 0.5562767386436462, "learning_rate": 0.0009574229691876751, "loss": 0.4626, "step": 1626 }, { "epoch": 0.9089385474860335, "grad_norm": 0.829547107219696, "learning_rate": 0.0009573949579831933, "loss": 0.6602, "step": 1627 }, { "epoch": 0.9094972067039107, "grad_norm": 0.7216253280639648, "learning_rate": 0.0009573669467787115, "loss": 0.4428, "step": 1628 }, { "epoch": 0.9100558659217877, "grad_norm": 0.6616451740264893, "learning_rate": 0.0009573389355742297, "loss": 0.6104, "step": 1629 }, { "epoch": 0.9106145251396648, "grad_norm": 0.8982250690460205, "learning_rate": 0.0009573109243697479, "loss": 0.6243, "step": 1630 }, { "epoch": 0.9111731843575419, "grad_norm": 0.7317488789558411, "learning_rate": 0.0009572829131652661, "loss": 0.5087, "step": 1631 }, { "epoch": 0.911731843575419, "grad_norm": 0.5401996970176697, "learning_rate": 0.0009572549019607843, "loss": 0.5399, "step": 1632 }, { "epoch": 0.912290502793296, "grad_norm": 0.48129868507385254, "learning_rate": 0.0009572268907563025, "loss": 0.45, "step": 1633 }, { "epoch": 0.9128491620111732, "grad_norm": 0.8447461724281311, "learning_rate": 0.0009571988795518207, "loss": 0.5154, "step": 1634 }, { "epoch": 0.9134078212290503, "grad_norm": 0.7238785624504089, "learning_rate": 0.0009571708683473389, "loss": 0.5845, "step": 1635 }, { "epoch": 0.9139664804469274, "grad_norm": 4.996368408203125, "learning_rate": 0.0009571428571428573, "loss": 0.4767, "step": 1636 }, { "epoch": 0.9145251396648044, "grad_norm": 0.6995401382446289, "learning_rate": 0.0009571148459383754, "loss": 0.4001, "step": 1637 }, { "epoch": 0.9150837988826815, "grad_norm": 0.6319295763969421, "learning_rate": 0.0009570868347338936, "loss": 0.5082, "step": 1638 }, { "epoch": 0.9156424581005587, "grad_norm": 6.914177417755127, "learning_rate": 0.0009570588235294118, "loss": 0.4413, "step": 1639 }, { "epoch": 0.9162011173184358, "grad_norm": 5.069177150726318, "learning_rate": 0.00095703081232493, "loss": 0.4618, "step": 1640 }, { "epoch": 0.9167597765363128, "grad_norm": 0.8674466609954834, "learning_rate": 0.0009570028011204483, "loss": 0.5621, "step": 1641 }, { "epoch": 0.9173184357541899, "grad_norm": 0.7931159734725952, "learning_rate": 0.0009569747899159664, "loss": 0.4304, "step": 1642 }, { "epoch": 0.917877094972067, "grad_norm": 0.6191529035568237, "learning_rate": 0.0009569467787114846, "loss": 0.5212, "step": 1643 }, { "epoch": 0.9184357541899442, "grad_norm": 0.4811217784881592, "learning_rate": 0.0009569187675070028, "loss": 0.5537, "step": 1644 }, { "epoch": 0.9189944134078212, "grad_norm": 0.754035234451294, "learning_rate": 0.000956890756302521, "loss": 0.5198, "step": 1645 }, { "epoch": 0.9195530726256983, "grad_norm": 0.6202226877212524, "learning_rate": 0.0009568627450980393, "loss": 0.5066, "step": 1646 }, { "epoch": 0.9201117318435754, "grad_norm": 0.7443351745605469, "learning_rate": 0.0009568347338935574, "loss": 0.408, "step": 1647 }, { "epoch": 0.9206703910614525, "grad_norm": 0.9001688361167908, "learning_rate": 0.0009568067226890756, "loss": 0.5128, "step": 1648 }, { "epoch": 0.9212290502793297, "grad_norm": 1.2606867551803589, "learning_rate": 0.0009567787114845938, "loss": 0.5465, "step": 1649 }, { "epoch": 0.9217877094972067, "grad_norm": 0.5945485830307007, "learning_rate": 0.000956750700280112, "loss": 0.5672, "step": 1650 }, { "epoch": 0.9223463687150838, "grad_norm": 2.541149616241455, "learning_rate": 0.0009567226890756303, "loss": 0.5646, "step": 1651 }, { "epoch": 0.9229050279329609, "grad_norm": 0.5519164800643921, "learning_rate": 0.0009566946778711485, "loss": 0.5333, "step": 1652 }, { "epoch": 0.923463687150838, "grad_norm": 1.086621642112732, "learning_rate": 0.0009566666666666666, "loss": 0.4063, "step": 1653 }, { "epoch": 0.924022346368715, "grad_norm": 0.8017902374267578, "learning_rate": 0.0009566386554621849, "loss": 0.5293, "step": 1654 }, { "epoch": 0.9245810055865922, "grad_norm": 0.48129141330718994, "learning_rate": 0.0009566106442577031, "loss": 0.4635, "step": 1655 }, { "epoch": 0.9251396648044693, "grad_norm": 0.7882272601127625, "learning_rate": 0.0009565826330532214, "loss": 0.539, "step": 1656 }, { "epoch": 0.9256983240223464, "grad_norm": 0.5756723284721375, "learning_rate": 0.0009565546218487396, "loss": 0.5251, "step": 1657 }, { "epoch": 0.9262569832402234, "grad_norm": 0.5985224843025208, "learning_rate": 0.0009565266106442577, "loss": 0.5465, "step": 1658 }, { "epoch": 0.9268156424581006, "grad_norm": 0.8080509901046753, "learning_rate": 0.0009564985994397759, "loss": 0.5516, "step": 1659 }, { "epoch": 0.9273743016759777, "grad_norm": 2.229174852371216, "learning_rate": 0.0009564705882352941, "loss": 0.5039, "step": 1660 }, { "epoch": 0.9279329608938548, "grad_norm": 1.6585955619812012, "learning_rate": 0.0009564425770308124, "loss": 0.5118, "step": 1661 }, { "epoch": 0.9284916201117318, "grad_norm": 0.7574945092201233, "learning_rate": 0.0009564145658263306, "loss": 0.5281, "step": 1662 }, { "epoch": 0.9290502793296089, "grad_norm": 0.6235692501068115, "learning_rate": 0.0009563865546218487, "loss": 0.4852, "step": 1663 }, { "epoch": 0.929608938547486, "grad_norm": 0.4636309742927551, "learning_rate": 0.0009563585434173669, "loss": 0.4089, "step": 1664 }, { "epoch": 0.9301675977653632, "grad_norm": 0.5483853220939636, "learning_rate": 0.0009563305322128851, "loss": 0.374, "step": 1665 }, { "epoch": 0.9307262569832402, "grad_norm": 1.0265727043151855, "learning_rate": 0.0009563025210084034, "loss": 0.5324, "step": 1666 }, { "epoch": 0.9312849162011173, "grad_norm": 0.7922943234443665, "learning_rate": 0.0009562745098039216, "loss": 0.4633, "step": 1667 }, { "epoch": 0.9318435754189944, "grad_norm": 0.6494239568710327, "learning_rate": 0.0009562464985994398, "loss": 0.5183, "step": 1668 }, { "epoch": 0.9324022346368716, "grad_norm": 0.6400390267372131, "learning_rate": 0.0009562184873949579, "loss": 0.4662, "step": 1669 }, { "epoch": 0.9329608938547486, "grad_norm": 0.6184152364730835, "learning_rate": 0.0009561904761904761, "loss": 0.497, "step": 1670 }, { "epoch": 0.9335195530726257, "grad_norm": 2.4548275470733643, "learning_rate": 0.0009561624649859945, "loss": 0.4708, "step": 1671 }, { "epoch": 0.9340782122905028, "grad_norm": 0.7691540718078613, "learning_rate": 0.0009561344537815127, "loss": 0.6856, "step": 1672 }, { "epoch": 0.9346368715083799, "grad_norm": 0.8598730564117432, "learning_rate": 0.0009561064425770309, "loss": 0.5985, "step": 1673 }, { "epoch": 0.9351955307262569, "grad_norm": 12.871362686157227, "learning_rate": 0.000956078431372549, "loss": 0.4781, "step": 1674 }, { "epoch": 0.9357541899441341, "grad_norm": 0.4955989122390747, "learning_rate": 0.0009560504201680672, "loss": 0.3648, "step": 1675 }, { "epoch": 0.9363128491620112, "grad_norm": 0.7067756652832031, "learning_rate": 0.0009560224089635855, "loss": 0.4822, "step": 1676 }, { "epoch": 0.9368715083798883, "grad_norm": 0.8544894456863403, "learning_rate": 0.0009559943977591037, "loss": 0.4771, "step": 1677 }, { "epoch": 0.9374301675977653, "grad_norm": 1.1533461809158325, "learning_rate": 0.0009559663865546219, "loss": 0.5395, "step": 1678 }, { "epoch": 0.9379888268156424, "grad_norm": 0.9036091566085815, "learning_rate": 0.00095593837535014, "loss": 0.4618, "step": 1679 }, { "epoch": 0.9385474860335196, "grad_norm": 1.4535365104675293, "learning_rate": 0.0009559103641456582, "loss": 0.4719, "step": 1680 }, { "epoch": 0.9391061452513967, "grad_norm": 1.0793883800506592, "learning_rate": 0.0009558823529411765, "loss": 0.5846, "step": 1681 }, { "epoch": 0.9396648044692737, "grad_norm": 0.9983944892883301, "learning_rate": 0.0009558543417366947, "loss": 0.5385, "step": 1682 }, { "epoch": 0.9402234636871508, "grad_norm": 0.5338730812072754, "learning_rate": 0.0009558263305322129, "loss": 0.4756, "step": 1683 }, { "epoch": 0.9407821229050279, "grad_norm": 0.8283085823059082, "learning_rate": 0.0009557983193277311, "loss": 0.494, "step": 1684 }, { "epoch": 0.9413407821229051, "grad_norm": 1.6396960020065308, "learning_rate": 0.0009557703081232492, "loss": 0.4261, "step": 1685 }, { "epoch": 0.9418994413407821, "grad_norm": 0.6962085962295532, "learning_rate": 0.0009557422969187676, "loss": 0.5559, "step": 1686 }, { "epoch": 0.9424581005586592, "grad_norm": 0.8268210887908936, "learning_rate": 0.0009557142857142858, "loss": 0.4957, "step": 1687 }, { "epoch": 0.9430167597765363, "grad_norm": 0.529246985912323, "learning_rate": 0.000955686274509804, "loss": 0.4346, "step": 1688 }, { "epoch": 0.9435754189944134, "grad_norm": 1.1125683784484863, "learning_rate": 0.0009556582633053222, "loss": 0.4382, "step": 1689 }, { "epoch": 0.9441340782122905, "grad_norm": 0.6227396130561829, "learning_rate": 0.0009556302521008403, "loss": 0.4709, "step": 1690 }, { "epoch": 0.9446927374301676, "grad_norm": 0.5941323041915894, "learning_rate": 0.0009556022408963586, "loss": 0.3957, "step": 1691 }, { "epoch": 0.9452513966480447, "grad_norm": 1.0418164730072021, "learning_rate": 0.0009555742296918768, "loss": 0.4414, "step": 1692 }, { "epoch": 0.9458100558659218, "grad_norm": 0.851777195930481, "learning_rate": 0.000955546218487395, "loss": 0.5859, "step": 1693 }, { "epoch": 0.9463687150837988, "grad_norm": 0.585878849029541, "learning_rate": 0.0009555182072829132, "loss": 0.4641, "step": 1694 }, { "epoch": 0.946927374301676, "grad_norm": 0.8957218527793884, "learning_rate": 0.0009554901960784313, "loss": 0.5636, "step": 1695 }, { "epoch": 0.9474860335195531, "grad_norm": 0.8570482134819031, "learning_rate": 0.0009554621848739496, "loss": 0.5148, "step": 1696 }, { "epoch": 0.9480446927374302, "grad_norm": 0.5498955845832825, "learning_rate": 0.0009554341736694678, "loss": 0.512, "step": 1697 }, { "epoch": 0.9486033519553073, "grad_norm": 0.6170614361763, "learning_rate": 0.000955406162464986, "loss": 0.5516, "step": 1698 }, { "epoch": 0.9491620111731843, "grad_norm": 0.8566698431968689, "learning_rate": 0.0009553781512605042, "loss": 0.4912, "step": 1699 }, { "epoch": 0.9497206703910615, "grad_norm": 0.5369873642921448, "learning_rate": 0.0009553501400560224, "loss": 0.4751, "step": 1700 }, { "epoch": 0.9502793296089386, "grad_norm": 0.694010317325592, "learning_rate": 0.0009553221288515406, "loss": 0.4858, "step": 1701 }, { "epoch": 0.9508379888268157, "grad_norm": 5.918373107910156, "learning_rate": 0.0009552941176470588, "loss": 0.5437, "step": 1702 }, { "epoch": 0.9513966480446927, "grad_norm": 0.7442026734352112, "learning_rate": 0.0009552661064425771, "loss": 0.4207, "step": 1703 }, { "epoch": 0.9519553072625698, "grad_norm": 0.6700260043144226, "learning_rate": 0.0009552380952380953, "loss": 0.4938, "step": 1704 }, { "epoch": 0.952513966480447, "grad_norm": 0.7718746066093445, "learning_rate": 0.0009552100840336135, "loss": 0.3786, "step": 1705 }, { "epoch": 0.9530726256983241, "grad_norm": 1.0982671976089478, "learning_rate": 0.0009551820728291317, "loss": 0.4839, "step": 1706 }, { "epoch": 0.9536312849162011, "grad_norm": 0.5608755946159363, "learning_rate": 0.0009551540616246499, "loss": 0.5376, "step": 1707 }, { "epoch": 0.9541899441340782, "grad_norm": 2.001518726348877, "learning_rate": 0.0009551260504201681, "loss": 0.6045, "step": 1708 }, { "epoch": 0.9547486033519553, "grad_norm": 1.1883184909820557, "learning_rate": 0.0009550980392156863, "loss": 0.4384, "step": 1709 }, { "epoch": 0.9553072625698324, "grad_norm": 0.7585318088531494, "learning_rate": 0.0009550700280112045, "loss": 0.5194, "step": 1710 }, { "epoch": 0.9558659217877095, "grad_norm": 1.5778087377548218, "learning_rate": 0.0009550420168067228, "loss": 0.4411, "step": 1711 }, { "epoch": 0.9564245810055866, "grad_norm": 0.7450963258743286, "learning_rate": 0.0009550140056022409, "loss": 0.4841, "step": 1712 }, { "epoch": 0.9569832402234637, "grad_norm": 0.5968680381774902, "learning_rate": 0.0009549859943977591, "loss": 0.6217, "step": 1713 }, { "epoch": 0.9575418994413408, "grad_norm": 1.0186375379562378, "learning_rate": 0.0009549579831932773, "loss": 0.5569, "step": 1714 }, { "epoch": 0.9581005586592178, "grad_norm": 0.5329403281211853, "learning_rate": 0.0009549299719887955, "loss": 0.4947, "step": 1715 }, { "epoch": 0.958659217877095, "grad_norm": 0.572655200958252, "learning_rate": 0.0009549019607843138, "loss": 0.5223, "step": 1716 }, { "epoch": 0.9592178770949721, "grad_norm": 0.5006478428840637, "learning_rate": 0.0009548739495798319, "loss": 0.4951, "step": 1717 }, { "epoch": 0.9597765363128492, "grad_norm": 0.6001412868499756, "learning_rate": 0.0009548459383753501, "loss": 0.4952, "step": 1718 }, { "epoch": 0.9603351955307262, "grad_norm": 2.2571427822113037, "learning_rate": 0.0009548179271708684, "loss": 0.4362, "step": 1719 }, { "epoch": 0.9608938547486033, "grad_norm": 0.5362712740898132, "learning_rate": 0.0009547899159663866, "loss": 0.4559, "step": 1720 }, { "epoch": 0.9614525139664805, "grad_norm": 0.6957347393035889, "learning_rate": 0.0009547619047619049, "loss": 0.4905, "step": 1721 }, { "epoch": 0.9620111731843576, "grad_norm": 0.6331759095191956, "learning_rate": 0.000954733893557423, "loss": 0.4175, "step": 1722 }, { "epoch": 0.9625698324022346, "grad_norm": 0.5453664064407349, "learning_rate": 0.0009547058823529412, "loss": 0.5286, "step": 1723 }, { "epoch": 0.9631284916201117, "grad_norm": 0.594581127166748, "learning_rate": 0.0009546778711484594, "loss": 0.4282, "step": 1724 }, { "epoch": 0.9636871508379888, "grad_norm": 0.9801068305969238, "learning_rate": 0.0009546498599439776, "loss": 0.5001, "step": 1725 }, { "epoch": 0.964245810055866, "grad_norm": 1.5954480171203613, "learning_rate": 0.0009546218487394959, "loss": 0.4733, "step": 1726 }, { "epoch": 0.964804469273743, "grad_norm": 0.4829848110675812, "learning_rate": 0.0009545938375350141, "loss": 0.3831, "step": 1727 }, { "epoch": 0.9653631284916201, "grad_norm": 0.6249749064445496, "learning_rate": 0.0009545658263305322, "loss": 0.4997, "step": 1728 }, { "epoch": 0.9659217877094972, "grad_norm": 1.116471529006958, "learning_rate": 0.0009545378151260504, "loss": 0.7677, "step": 1729 }, { "epoch": 0.9664804469273743, "grad_norm": 2.0907201766967773, "learning_rate": 0.0009545098039215686, "loss": 0.3794, "step": 1730 }, { "epoch": 0.9670391061452513, "grad_norm": 0.9863475561141968, "learning_rate": 0.0009544817927170869, "loss": 0.5375, "step": 1731 }, { "epoch": 0.9675977653631285, "grad_norm": 3.3474059104919434, "learning_rate": 0.0009544537815126051, "loss": 0.4954, "step": 1732 }, { "epoch": 0.9681564245810056, "grad_norm": 1.8166532516479492, "learning_rate": 0.0009544257703081232, "loss": 0.418, "step": 1733 }, { "epoch": 0.9687150837988827, "grad_norm": 1.42117440700531, "learning_rate": 0.0009543977591036414, "loss": 0.5219, "step": 1734 }, { "epoch": 0.9692737430167597, "grad_norm": 0.9605877995491028, "learning_rate": 0.0009543697478991596, "loss": 0.4613, "step": 1735 }, { "epoch": 0.9698324022346368, "grad_norm": 4.026614665985107, "learning_rate": 0.000954341736694678, "loss": 0.7381, "step": 1736 }, { "epoch": 0.970391061452514, "grad_norm": 0.5737302303314209, "learning_rate": 0.0009543137254901962, "loss": 0.6071, "step": 1737 }, { "epoch": 0.9709497206703911, "grad_norm": 0.6030662655830383, "learning_rate": 0.0009542857142857143, "loss": 0.5175, "step": 1738 }, { "epoch": 0.9715083798882681, "grad_norm": 0.5691815614700317, "learning_rate": 0.0009542577030812325, "loss": 0.4475, "step": 1739 }, { "epoch": 0.9720670391061452, "grad_norm": 0.4971826374530792, "learning_rate": 0.0009542296918767507, "loss": 0.4734, "step": 1740 }, { "epoch": 0.9726256983240223, "grad_norm": 0.7168786525726318, "learning_rate": 0.000954201680672269, "loss": 0.5472, "step": 1741 }, { "epoch": 0.9731843575418995, "grad_norm": 0.49384424090385437, "learning_rate": 0.0009541736694677872, "loss": 0.5945, "step": 1742 }, { "epoch": 0.9737430167597766, "grad_norm": 1.3744165897369385, "learning_rate": 0.0009541456582633054, "loss": 0.4235, "step": 1743 }, { "epoch": 0.9743016759776536, "grad_norm": 1.0167593955993652, "learning_rate": 0.0009541176470588235, "loss": 0.5223, "step": 1744 }, { "epoch": 0.9748603351955307, "grad_norm": 0.8459349274635315, "learning_rate": 0.0009540896358543417, "loss": 0.3932, "step": 1745 }, { "epoch": 0.9754189944134078, "grad_norm": 0.5819987654685974, "learning_rate": 0.00095406162464986, "loss": 0.4391, "step": 1746 }, { "epoch": 0.975977653631285, "grad_norm": 0.5702646970748901, "learning_rate": 0.0009540336134453782, "loss": 0.5308, "step": 1747 }, { "epoch": 0.976536312849162, "grad_norm": 0.6302896738052368, "learning_rate": 0.0009540056022408964, "loss": 0.4809, "step": 1748 }, { "epoch": 0.9770949720670391, "grad_norm": 0.7520524859428406, "learning_rate": 0.0009539775910364145, "loss": 0.461, "step": 1749 }, { "epoch": 0.9776536312849162, "grad_norm": 1.06060791015625, "learning_rate": 0.0009539495798319327, "loss": 0.6726, "step": 1750 }, { "epoch": 0.9782122905027933, "grad_norm": 0.8254221677780151, "learning_rate": 0.000953921568627451, "loss": 0.5178, "step": 1751 }, { "epoch": 0.9787709497206704, "grad_norm": 0.5896238684654236, "learning_rate": 0.0009538935574229693, "loss": 0.6828, "step": 1752 }, { "epoch": 0.9793296089385475, "grad_norm": 0.4934066832065582, "learning_rate": 0.0009538655462184875, "loss": 0.4935, "step": 1753 }, { "epoch": 0.9798882681564246, "grad_norm": 0.6261158585548401, "learning_rate": 0.0009538375350140056, "loss": 0.4269, "step": 1754 }, { "epoch": 0.9804469273743017, "grad_norm": 1.0440266132354736, "learning_rate": 0.0009538095238095238, "loss": 0.5511, "step": 1755 }, { "epoch": 0.9810055865921787, "grad_norm": 0.5914115309715271, "learning_rate": 0.0009537815126050421, "loss": 0.5074, "step": 1756 }, { "epoch": 0.9815642458100559, "grad_norm": 0.6855108737945557, "learning_rate": 0.0009537535014005603, "loss": 0.5304, "step": 1757 }, { "epoch": 0.982122905027933, "grad_norm": 0.9795001149177551, "learning_rate": 0.0009537254901960785, "loss": 0.4309, "step": 1758 }, { "epoch": 0.9826815642458101, "grad_norm": 0.8306459784507751, "learning_rate": 0.0009536974789915967, "loss": 0.516, "step": 1759 }, { "epoch": 0.9832402234636871, "grad_norm": 2.5804319381713867, "learning_rate": 0.0009536694677871148, "loss": 0.5219, "step": 1760 }, { "epoch": 0.9837988826815642, "grad_norm": 0.8659358620643616, "learning_rate": 0.0009536414565826331, "loss": 0.4489, "step": 1761 }, { "epoch": 0.9843575418994414, "grad_norm": 17.09529685974121, "learning_rate": 0.0009536134453781513, "loss": 0.5589, "step": 1762 }, { "epoch": 0.9849162011173185, "grad_norm": 0.4888550937175751, "learning_rate": 0.0009535854341736695, "loss": 0.5087, "step": 1763 }, { "epoch": 0.9854748603351955, "grad_norm": 0.8920523524284363, "learning_rate": 0.0009535574229691877, "loss": 0.389, "step": 1764 }, { "epoch": 0.9860335195530726, "grad_norm": 0.6390886306762695, "learning_rate": 0.0009535294117647058, "loss": 0.4592, "step": 1765 }, { "epoch": 0.9865921787709497, "grad_norm": 0.8547239899635315, "learning_rate": 0.0009535014005602241, "loss": 0.505, "step": 1766 }, { "epoch": 0.9871508379888269, "grad_norm": 1.1240209341049194, "learning_rate": 0.0009534733893557423, "loss": 0.5856, "step": 1767 }, { "epoch": 0.9877094972067039, "grad_norm": 2.0392749309539795, "learning_rate": 0.0009534453781512606, "loss": 0.5459, "step": 1768 }, { "epoch": 0.988268156424581, "grad_norm": 1.228261947631836, "learning_rate": 0.0009534173669467788, "loss": 0.5185, "step": 1769 }, { "epoch": 0.9888268156424581, "grad_norm": 1.0804953575134277, "learning_rate": 0.0009533893557422969, "loss": 0.6126, "step": 1770 }, { "epoch": 0.9893854748603352, "grad_norm": 1.4725781679153442, "learning_rate": 0.0009533613445378152, "loss": 0.5833, "step": 1771 }, { "epoch": 0.9899441340782122, "grad_norm": 0.6424615979194641, "learning_rate": 0.0009533333333333334, "loss": 0.51, "step": 1772 }, { "epoch": 0.9905027932960894, "grad_norm": 0.6401039361953735, "learning_rate": 0.0009533053221288516, "loss": 0.4098, "step": 1773 }, { "epoch": 0.9910614525139665, "grad_norm": 0.7132853269577026, "learning_rate": 0.0009532773109243698, "loss": 0.5708, "step": 1774 }, { "epoch": 0.9916201117318436, "grad_norm": 0.5756059885025024, "learning_rate": 0.000953249299719888, "loss": 0.5526, "step": 1775 }, { "epoch": 0.9921787709497206, "grad_norm": 0.6990247368812561, "learning_rate": 0.0009532212885154062, "loss": 0.5504, "step": 1776 }, { "epoch": 0.9927374301675977, "grad_norm": 0.7180444598197937, "learning_rate": 0.0009531932773109244, "loss": 0.4779, "step": 1777 }, { "epoch": 0.9932960893854749, "grad_norm": 0.5672318339347839, "learning_rate": 0.0009531652661064426, "loss": 0.486, "step": 1778 }, { "epoch": 0.993854748603352, "grad_norm": 0.7721855640411377, "learning_rate": 0.0009531372549019608, "loss": 0.5043, "step": 1779 }, { "epoch": 0.994413407821229, "grad_norm": 0.7544890642166138, "learning_rate": 0.000953109243697479, "loss": 0.5308, "step": 1780 }, { "epoch": 0.9949720670391061, "grad_norm": 1.666582703590393, "learning_rate": 0.0009530812324929971, "loss": 0.4838, "step": 1781 }, { "epoch": 0.9955307262569832, "grad_norm": 0.5985055565834045, "learning_rate": 0.0009530532212885154, "loss": 0.4106, "step": 1782 }, { "epoch": 0.9960893854748604, "grad_norm": 3.151643991470337, "learning_rate": 0.0009530252100840336, "loss": 0.4749, "step": 1783 }, { "epoch": 0.9966480446927374, "grad_norm": 2.540830135345459, "learning_rate": 0.0009529971988795518, "loss": 0.5104, "step": 1784 }, { "epoch": 0.9972067039106145, "grad_norm": 0.5820245742797852, "learning_rate": 0.0009529691876750701, "loss": 0.4112, "step": 1785 }, { "epoch": 0.9977653631284916, "grad_norm": 0.8016902208328247, "learning_rate": 0.0009529411764705882, "loss": 0.4685, "step": 1786 }, { "epoch": 0.9983240223463687, "grad_norm": 0.6371076107025146, "learning_rate": 0.0009529131652661065, "loss": 0.5928, "step": 1787 }, { "epoch": 0.9988826815642458, "grad_norm": 0.6752023100852966, "learning_rate": 0.0009528851540616247, "loss": 0.5414, "step": 1788 }, { "epoch": 0.9994413407821229, "grad_norm": 0.6677910089492798, "learning_rate": 0.0009528571428571429, "loss": 0.4194, "step": 1789 }, { "epoch": 1.0, "grad_norm": 0.8213701844215393, "learning_rate": 0.0009528291316526611, "loss": 0.5417, "step": 1790 }, { "epoch": 1.000558659217877, "grad_norm": 0.8009673357009888, "learning_rate": 0.0009528011204481793, "loss": 0.583, "step": 1791 }, { "epoch": 1.0011173184357542, "grad_norm": 0.5417811274528503, "learning_rate": 0.0009527731092436975, "loss": 0.532, "step": 1792 }, { "epoch": 1.0016759776536313, "grad_norm": 1.0531492233276367, "learning_rate": 0.0009527450980392157, "loss": 0.5112, "step": 1793 }, { "epoch": 1.0022346368715085, "grad_norm": 2.9258675575256348, "learning_rate": 0.0009527170868347339, "loss": 0.4589, "step": 1794 }, { "epoch": 1.0027932960893855, "grad_norm": 0.7311922907829285, "learning_rate": 0.0009526890756302521, "loss": 0.4816, "step": 1795 }, { "epoch": 1.0033519553072625, "grad_norm": 0.5544597506523132, "learning_rate": 0.0009526610644257703, "loss": 0.4589, "step": 1796 }, { "epoch": 1.0039106145251397, "grad_norm": 0.6038646697998047, "learning_rate": 0.0009526330532212885, "loss": 0.4494, "step": 1797 }, { "epoch": 1.0044692737430168, "grad_norm": 1.2839605808258057, "learning_rate": 0.0009526050420168067, "loss": 0.5132, "step": 1798 }, { "epoch": 1.0050279329608938, "grad_norm": 0.5195278525352478, "learning_rate": 0.0009525770308123249, "loss": 0.4801, "step": 1799 }, { "epoch": 1.005586592178771, "grad_norm": 0.5706222653388977, "learning_rate": 0.0009525490196078431, "loss": 0.3827, "step": 1800 }, { "epoch": 1.006145251396648, "grad_norm": 2.2582526206970215, "learning_rate": 0.0009525210084033614, "loss": 0.4738, "step": 1801 }, { "epoch": 1.0067039106145252, "grad_norm": 5.209932327270508, "learning_rate": 0.0009524929971988796, "loss": 0.3825, "step": 1802 }, { "epoch": 1.0072625698324023, "grad_norm": 1.2498819828033447, "learning_rate": 0.0009524649859943978, "loss": 0.5034, "step": 1803 }, { "epoch": 1.0078212290502793, "grad_norm": 0.6335483193397522, "learning_rate": 0.000952436974789916, "loss": 0.4548, "step": 1804 }, { "epoch": 1.0083798882681565, "grad_norm": 1.3591705560684204, "learning_rate": 0.0009524089635854342, "loss": 0.4128, "step": 1805 }, { "epoch": 1.0089385474860335, "grad_norm": 0.4688738286495209, "learning_rate": 0.0009523809523809524, "loss": 0.3854, "step": 1806 }, { "epoch": 1.0094972067039105, "grad_norm": 0.5695164799690247, "learning_rate": 0.0009523529411764707, "loss": 0.4268, "step": 1807 }, { "epoch": 1.0100558659217878, "grad_norm": 0.5155953168869019, "learning_rate": 0.0009523249299719888, "loss": 0.4241, "step": 1808 }, { "epoch": 1.0106145251396648, "grad_norm": 0.6092314720153809, "learning_rate": 0.000952296918767507, "loss": 0.4624, "step": 1809 }, { "epoch": 1.011173184357542, "grad_norm": 0.7365458607673645, "learning_rate": 0.0009522689075630252, "loss": 0.5287, "step": 1810 }, { "epoch": 1.011731843575419, "grad_norm": 0.522511899471283, "learning_rate": 0.0009522408963585434, "loss": 0.4051, "step": 1811 }, { "epoch": 1.012290502793296, "grad_norm": 1.2410948276519775, "learning_rate": 0.0009522128851540617, "loss": 0.6009, "step": 1812 }, { "epoch": 1.0128491620111733, "grad_norm": 1.6744202375411987, "learning_rate": 0.0009521848739495798, "loss": 0.5283, "step": 1813 }, { "epoch": 1.0134078212290503, "grad_norm": 0.6357499957084656, "learning_rate": 0.000952156862745098, "loss": 0.46, "step": 1814 }, { "epoch": 1.0139664804469273, "grad_norm": 0.5928827524185181, "learning_rate": 0.0009521288515406162, "loss": 0.5303, "step": 1815 }, { "epoch": 1.0145251396648045, "grad_norm": 0.43160295486450195, "learning_rate": 0.0009521008403361344, "loss": 0.464, "step": 1816 }, { "epoch": 1.0150837988826815, "grad_norm": 0.7292526960372925, "learning_rate": 0.0009520728291316528, "loss": 0.6032, "step": 1817 }, { "epoch": 1.0156424581005588, "grad_norm": 1.6284388303756714, "learning_rate": 0.0009520448179271709, "loss": 0.6681, "step": 1818 }, { "epoch": 1.0162011173184358, "grad_norm": 1.0405173301696777, "learning_rate": 0.0009520168067226891, "loss": 0.536, "step": 1819 }, { "epoch": 1.0167597765363128, "grad_norm": 0.5934048295021057, "learning_rate": 0.0009519887955182073, "loss": 0.4234, "step": 1820 }, { "epoch": 1.01731843575419, "grad_norm": 0.8506879210472107, "learning_rate": 0.0009519607843137255, "loss": 0.5327, "step": 1821 }, { "epoch": 1.017877094972067, "grad_norm": 1.1442513465881348, "learning_rate": 0.0009519327731092438, "loss": 0.6624, "step": 1822 }, { "epoch": 1.018435754189944, "grad_norm": 0.9382103681564331, "learning_rate": 0.000951904761904762, "loss": 0.4661, "step": 1823 }, { "epoch": 1.0189944134078213, "grad_norm": 1.3499640226364136, "learning_rate": 0.0009518767507002801, "loss": 0.7089, "step": 1824 }, { "epoch": 1.0195530726256983, "grad_norm": 1.1573398113250732, "learning_rate": 0.0009518487394957983, "loss": 0.4748, "step": 1825 }, { "epoch": 1.0201117318435755, "grad_norm": 0.8220486640930176, "learning_rate": 0.0009518207282913165, "loss": 0.5487, "step": 1826 }, { "epoch": 1.0206703910614525, "grad_norm": 0.7242711186408997, "learning_rate": 0.0009517927170868348, "loss": 0.4882, "step": 1827 }, { "epoch": 1.0212290502793295, "grad_norm": 4.589696407318115, "learning_rate": 0.000951764705882353, "loss": 0.6962, "step": 1828 }, { "epoch": 1.0217877094972068, "grad_norm": 0.7695217132568359, "learning_rate": 0.0009517366946778711, "loss": 0.5039, "step": 1829 }, { "epoch": 1.0223463687150838, "grad_norm": 1.1315627098083496, "learning_rate": 0.0009517086834733893, "loss": 0.5383, "step": 1830 }, { "epoch": 1.0229050279329608, "grad_norm": 0.9076684713363647, "learning_rate": 0.0009516806722689075, "loss": 0.4484, "step": 1831 }, { "epoch": 1.023463687150838, "grad_norm": 7.834429740905762, "learning_rate": 0.0009516526610644258, "loss": 0.6796, "step": 1832 }, { "epoch": 1.024022346368715, "grad_norm": 0.5402182340621948, "learning_rate": 0.000951624649859944, "loss": 0.4995, "step": 1833 }, { "epoch": 1.0245810055865923, "grad_norm": 0.6438533067703247, "learning_rate": 0.0009515966386554621, "loss": 0.6354, "step": 1834 }, { "epoch": 1.0251396648044693, "grad_norm": 0.8940802812576294, "learning_rate": 0.0009515686274509804, "loss": 0.5287, "step": 1835 }, { "epoch": 1.0256983240223463, "grad_norm": 0.9579599499702454, "learning_rate": 0.0009515406162464986, "loss": 0.4779, "step": 1836 }, { "epoch": 1.0262569832402235, "grad_norm": 0.7511836886405945, "learning_rate": 0.0009515126050420169, "loss": 0.4558, "step": 1837 }, { "epoch": 1.0268156424581005, "grad_norm": 0.46512100100517273, "learning_rate": 0.0009514845938375351, "loss": 0.472, "step": 1838 }, { "epoch": 1.0273743016759775, "grad_norm": 0.5905054807662964, "learning_rate": 0.0009514565826330533, "loss": 0.4722, "step": 1839 }, { "epoch": 1.0279329608938548, "grad_norm": 0.5499210953712463, "learning_rate": 0.0009514285714285714, "loss": 0.4799, "step": 1840 }, { "epoch": 1.0284916201117318, "grad_norm": 0.8468977212905884, "learning_rate": 0.0009514005602240896, "loss": 0.444, "step": 1841 }, { "epoch": 1.029050279329609, "grad_norm": 0.7623924016952515, "learning_rate": 0.0009513725490196079, "loss": 0.4681, "step": 1842 }, { "epoch": 1.029608938547486, "grad_norm": 0.5783141851425171, "learning_rate": 0.0009513445378151261, "loss": 0.4932, "step": 1843 }, { "epoch": 1.030167597765363, "grad_norm": 0.6379013061523438, "learning_rate": 0.0009513165266106443, "loss": 0.5716, "step": 1844 }, { "epoch": 1.0307262569832403, "grad_norm": 0.7331202030181885, "learning_rate": 0.0009512885154061624, "loss": 0.4656, "step": 1845 }, { "epoch": 1.0312849162011173, "grad_norm": 0.8809279799461365, "learning_rate": 0.0009512605042016806, "loss": 0.6752, "step": 1846 }, { "epoch": 1.0318435754189945, "grad_norm": 0.4546876847743988, "learning_rate": 0.0009512324929971989, "loss": 0.4629, "step": 1847 }, { "epoch": 1.0324022346368715, "grad_norm": 12.710982322692871, "learning_rate": 0.0009512044817927171, "loss": 0.4853, "step": 1848 }, { "epoch": 1.0329608938547485, "grad_norm": 0.8372478485107422, "learning_rate": 0.0009511764705882353, "loss": 0.4681, "step": 1849 }, { "epoch": 1.0335195530726258, "grad_norm": 0.9921114444732666, "learning_rate": 0.0009511484593837534, "loss": 0.5301, "step": 1850 }, { "epoch": 1.0340782122905028, "grad_norm": 1.7067806720733643, "learning_rate": 0.0009511204481792716, "loss": 0.5073, "step": 1851 }, { "epoch": 1.0346368715083798, "grad_norm": 0.6098561882972717, "learning_rate": 0.00095109243697479, "loss": 0.4594, "step": 1852 }, { "epoch": 1.035195530726257, "grad_norm": 0.6715817451477051, "learning_rate": 0.0009510644257703082, "loss": 0.5017, "step": 1853 }, { "epoch": 1.035754189944134, "grad_norm": 0.6328045725822449, "learning_rate": 0.0009510364145658264, "loss": 0.58, "step": 1854 }, { "epoch": 1.0363128491620113, "grad_norm": 0.6053429841995239, "learning_rate": 0.0009510084033613446, "loss": 0.5138, "step": 1855 }, { "epoch": 1.0368715083798883, "grad_norm": 2.4972400665283203, "learning_rate": 0.0009509803921568627, "loss": 0.5526, "step": 1856 }, { "epoch": 1.0374301675977653, "grad_norm": 1.4493807554244995, "learning_rate": 0.000950952380952381, "loss": 0.5394, "step": 1857 }, { "epoch": 1.0379888268156425, "grad_norm": 0.7984095215797424, "learning_rate": 0.0009509243697478992, "loss": 0.3829, "step": 1858 }, { "epoch": 1.0385474860335195, "grad_norm": 0.600412130355835, "learning_rate": 0.0009508963585434174, "loss": 0.517, "step": 1859 }, { "epoch": 1.0391061452513966, "grad_norm": 0.9996770620346069, "learning_rate": 0.0009508683473389356, "loss": 0.4835, "step": 1860 }, { "epoch": 1.0396648044692738, "grad_norm": 0.6391316056251526, "learning_rate": 0.0009508403361344537, "loss": 0.493, "step": 1861 }, { "epoch": 1.0402234636871508, "grad_norm": 0.7396573424339294, "learning_rate": 0.000950812324929972, "loss": 0.4412, "step": 1862 }, { "epoch": 1.040782122905028, "grad_norm": 0.7529088854789734, "learning_rate": 0.0009507843137254902, "loss": 0.5204, "step": 1863 }, { "epoch": 1.041340782122905, "grad_norm": 1.4906047582626343, "learning_rate": 0.0009507563025210084, "loss": 0.3793, "step": 1864 }, { "epoch": 1.041899441340782, "grad_norm": 0.8225281238555908, "learning_rate": 0.0009507282913165266, "loss": 0.684, "step": 1865 }, { "epoch": 1.0424581005586593, "grad_norm": 1.064224123954773, "learning_rate": 0.0009507002801120447, "loss": 0.4323, "step": 1866 }, { "epoch": 1.0430167597765363, "grad_norm": 0.8468764424324036, "learning_rate": 0.0009506722689075631, "loss": 0.5064, "step": 1867 }, { "epoch": 1.0435754189944133, "grad_norm": 1.3305532932281494, "learning_rate": 0.0009506442577030813, "loss": 0.6069, "step": 1868 }, { "epoch": 1.0441340782122905, "grad_norm": 0.5554779767990112, "learning_rate": 0.0009506162464985995, "loss": 0.5527, "step": 1869 }, { "epoch": 1.0446927374301676, "grad_norm": 0.6082237362861633, "learning_rate": 0.0009505882352941177, "loss": 0.4117, "step": 1870 }, { "epoch": 1.0452513966480448, "grad_norm": 0.5815912485122681, "learning_rate": 0.0009505602240896359, "loss": 0.4673, "step": 1871 }, { "epoch": 1.0458100558659218, "grad_norm": 3.348129987716675, "learning_rate": 0.0009505322128851541, "loss": 0.4915, "step": 1872 }, { "epoch": 1.0463687150837988, "grad_norm": 0.7753360867500305, "learning_rate": 0.0009505042016806723, "loss": 0.4562, "step": 1873 }, { "epoch": 1.046927374301676, "grad_norm": 0.8797411918640137, "learning_rate": 0.0009504761904761905, "loss": 0.5448, "step": 1874 }, { "epoch": 1.047486033519553, "grad_norm": 0.4864180386066437, "learning_rate": 0.0009504481792717087, "loss": 0.4564, "step": 1875 }, { "epoch": 1.04804469273743, "grad_norm": 0.4797047972679138, "learning_rate": 0.0009504201680672269, "loss": 0.4936, "step": 1876 }, { "epoch": 1.0486033519553073, "grad_norm": 0.6791684031486511, "learning_rate": 0.0009503921568627451, "loss": 0.5268, "step": 1877 }, { "epoch": 1.0491620111731843, "grad_norm": 0.6789284348487854, "learning_rate": 0.0009503641456582633, "loss": 0.5303, "step": 1878 }, { "epoch": 1.0497206703910615, "grad_norm": 2.4155874252319336, "learning_rate": 0.0009503361344537815, "loss": 0.5527, "step": 1879 }, { "epoch": 1.0502793296089385, "grad_norm": 0.5727857351303101, "learning_rate": 0.0009503081232492997, "loss": 0.4154, "step": 1880 }, { "epoch": 1.0508379888268156, "grad_norm": 0.8701090216636658, "learning_rate": 0.0009502801120448179, "loss": 0.5501, "step": 1881 }, { "epoch": 1.0513966480446928, "grad_norm": 0.5002401471138, "learning_rate": 0.0009502521008403361, "loss": 0.4729, "step": 1882 }, { "epoch": 1.0519553072625698, "grad_norm": 0.6317011117935181, "learning_rate": 0.0009502240896358544, "loss": 0.5321, "step": 1883 }, { "epoch": 1.052513966480447, "grad_norm": 0.671137809753418, "learning_rate": 0.0009501960784313726, "loss": 0.5161, "step": 1884 }, { "epoch": 1.053072625698324, "grad_norm": 0.9408466219902039, "learning_rate": 0.0009501680672268908, "loss": 0.4937, "step": 1885 }, { "epoch": 1.053631284916201, "grad_norm": 0.5962716937065125, "learning_rate": 0.000950140056022409, "loss": 0.4943, "step": 1886 }, { "epoch": 1.0541899441340783, "grad_norm": 1.048824667930603, "learning_rate": 0.0009501120448179273, "loss": 0.4504, "step": 1887 }, { "epoch": 1.0547486033519553, "grad_norm": 0.581483006477356, "learning_rate": 0.0009500840336134454, "loss": 0.494, "step": 1888 }, { "epoch": 1.0553072625698323, "grad_norm": 0.7170062065124512, "learning_rate": 0.0009500560224089636, "loss": 0.4955, "step": 1889 }, { "epoch": 1.0558659217877095, "grad_norm": 0.5089759230613708, "learning_rate": 0.0009500280112044818, "loss": 0.4707, "step": 1890 }, { "epoch": 1.0564245810055866, "grad_norm": 2.935894727706909, "learning_rate": 0.00095, "loss": 0.5117, "step": 1891 }, { "epoch": 1.0569832402234638, "grad_norm": 0.6432769298553467, "learning_rate": 0.0009499719887955183, "loss": 0.5352, "step": 1892 }, { "epoch": 1.0575418994413408, "grad_norm": 0.6752887964248657, "learning_rate": 0.0009499439775910364, "loss": 0.4226, "step": 1893 }, { "epoch": 1.0581005586592178, "grad_norm": 1.1219720840454102, "learning_rate": 0.0009499159663865546, "loss": 0.6474, "step": 1894 }, { "epoch": 1.058659217877095, "grad_norm": 0.7912245988845825, "learning_rate": 0.0009498879551820728, "loss": 0.608, "step": 1895 }, { "epoch": 1.059217877094972, "grad_norm": 1.3063058853149414, "learning_rate": 0.000949859943977591, "loss": 0.4818, "step": 1896 }, { "epoch": 1.059776536312849, "grad_norm": 0.8381514549255371, "learning_rate": 0.0009498319327731093, "loss": 0.4716, "step": 1897 }, { "epoch": 1.0603351955307263, "grad_norm": 1.1324119567871094, "learning_rate": 0.0009498039215686274, "loss": 0.5246, "step": 1898 }, { "epoch": 1.0608938547486033, "grad_norm": 0.8672372698783875, "learning_rate": 0.0009497759103641456, "loss": 0.4882, "step": 1899 }, { "epoch": 1.0614525139664805, "grad_norm": 1.3416739702224731, "learning_rate": 0.0009497478991596639, "loss": 0.5836, "step": 1900 }, { "epoch": 1.0620111731843576, "grad_norm": 0.539715588092804, "learning_rate": 0.0009497198879551821, "loss": 0.5111, "step": 1901 }, { "epoch": 1.0625698324022346, "grad_norm": 0.629913866519928, "learning_rate": 0.0009496918767507004, "loss": 0.468, "step": 1902 }, { "epoch": 1.0631284916201118, "grad_norm": 0.60403972864151, "learning_rate": 0.0009496638655462186, "loss": 0.5335, "step": 1903 }, { "epoch": 1.0636871508379888, "grad_norm": 0.5676341652870178, "learning_rate": 0.0009496358543417367, "loss": 0.6233, "step": 1904 }, { "epoch": 1.0642458100558658, "grad_norm": 0.46623653173446655, "learning_rate": 0.0009496078431372549, "loss": 0.3396, "step": 1905 }, { "epoch": 1.064804469273743, "grad_norm": 2.7224881649017334, "learning_rate": 0.0009495798319327731, "loss": 0.5402, "step": 1906 }, { "epoch": 1.06536312849162, "grad_norm": 0.7040708065032959, "learning_rate": 0.0009495518207282914, "loss": 0.4954, "step": 1907 }, { "epoch": 1.0659217877094973, "grad_norm": 4.1388044357299805, "learning_rate": 0.0009495238095238096, "loss": 0.4982, "step": 1908 }, { "epoch": 1.0664804469273743, "grad_norm": 0.6924494504928589, "learning_rate": 0.0009494957983193277, "loss": 0.4696, "step": 1909 }, { "epoch": 1.0670391061452513, "grad_norm": 0.7390439510345459, "learning_rate": 0.0009494677871148459, "loss": 0.5629, "step": 1910 }, { "epoch": 1.0675977653631286, "grad_norm": 0.7155113220214844, "learning_rate": 0.0009494397759103641, "loss": 0.4215, "step": 1911 }, { "epoch": 1.0681564245810056, "grad_norm": 1.014770746231079, "learning_rate": 0.0009494117647058824, "loss": 0.5626, "step": 1912 }, { "epoch": 1.0687150837988826, "grad_norm": 0.5904166102409363, "learning_rate": 0.0009493837535014006, "loss": 0.5667, "step": 1913 }, { "epoch": 1.0692737430167598, "grad_norm": 0.9801692366600037, "learning_rate": 0.0009493557422969187, "loss": 0.5677, "step": 1914 }, { "epoch": 1.0698324022346368, "grad_norm": 0.8028150200843811, "learning_rate": 0.0009493277310924369, "loss": 0.427, "step": 1915 }, { "epoch": 1.070391061452514, "grad_norm": 1.2819465398788452, "learning_rate": 0.0009492997198879551, "loss": 0.5448, "step": 1916 }, { "epoch": 1.070949720670391, "grad_norm": 0.5701756477355957, "learning_rate": 0.0009492717086834735, "loss": 0.5001, "step": 1917 }, { "epoch": 1.071508379888268, "grad_norm": 0.5249055624008179, "learning_rate": 0.0009492436974789917, "loss": 0.4983, "step": 1918 }, { "epoch": 1.0720670391061453, "grad_norm": 4.500863552093506, "learning_rate": 0.0009492156862745099, "loss": 0.5184, "step": 1919 }, { "epoch": 1.0726256983240223, "grad_norm": 0.6784878373146057, "learning_rate": 0.000949187675070028, "loss": 0.5462, "step": 1920 }, { "epoch": 1.0731843575418996, "grad_norm": 0.9951833486557007, "learning_rate": 0.0009491596638655462, "loss": 0.5515, "step": 1921 }, { "epoch": 1.0737430167597766, "grad_norm": 0.5707071423530579, "learning_rate": 0.0009491316526610645, "loss": 0.5275, "step": 1922 }, { "epoch": 1.0743016759776536, "grad_norm": 0.6549288034439087, "learning_rate": 0.0009491036414565827, "loss": 0.5335, "step": 1923 }, { "epoch": 1.0748603351955308, "grad_norm": 0.5716150403022766, "learning_rate": 0.0009490756302521009, "loss": 0.4694, "step": 1924 }, { "epoch": 1.0754189944134078, "grad_norm": 0.6139609813690186, "learning_rate": 0.000949047619047619, "loss": 0.5359, "step": 1925 }, { "epoch": 1.0759776536312848, "grad_norm": 1.3087342977523804, "learning_rate": 0.0009490196078431372, "loss": 0.5169, "step": 1926 }, { "epoch": 1.076536312849162, "grad_norm": 0.472334086894989, "learning_rate": 0.0009489915966386555, "loss": 0.479, "step": 1927 }, { "epoch": 1.077094972067039, "grad_norm": 0.48861464858055115, "learning_rate": 0.0009489635854341737, "loss": 0.5137, "step": 1928 }, { "epoch": 1.077653631284916, "grad_norm": 0.8765335083007812, "learning_rate": 0.0009489355742296919, "loss": 0.5395, "step": 1929 }, { "epoch": 1.0782122905027933, "grad_norm": 0.8806900978088379, "learning_rate": 0.00094890756302521, "loss": 0.4267, "step": 1930 }, { "epoch": 1.0787709497206703, "grad_norm": 0.614098072052002, "learning_rate": 0.0009488795518207282, "loss": 0.6302, "step": 1931 }, { "epoch": 1.0793296089385476, "grad_norm": 0.6210005283355713, "learning_rate": 0.0009488515406162466, "loss": 0.4408, "step": 1932 }, { "epoch": 1.0798882681564246, "grad_norm": 0.3822392523288727, "learning_rate": 0.0009488235294117648, "loss": 0.3646, "step": 1933 }, { "epoch": 1.0804469273743016, "grad_norm": 0.5221074223518372, "learning_rate": 0.000948795518207283, "loss": 0.4972, "step": 1934 }, { "epoch": 1.0810055865921788, "grad_norm": 0.7435020208358765, "learning_rate": 0.0009487675070028012, "loss": 0.4364, "step": 1935 }, { "epoch": 1.0815642458100558, "grad_norm": 1.6197137832641602, "learning_rate": 0.0009487394957983193, "loss": 0.517, "step": 1936 }, { "epoch": 1.082122905027933, "grad_norm": 0.582164466381073, "learning_rate": 0.0009487114845938376, "loss": 0.4018, "step": 1937 }, { "epoch": 1.08268156424581, "grad_norm": 0.6637523174285889, "learning_rate": 0.0009486834733893558, "loss": 0.539, "step": 1938 }, { "epoch": 1.083240223463687, "grad_norm": 0.5906127095222473, "learning_rate": 0.000948655462184874, "loss": 0.4602, "step": 1939 }, { "epoch": 1.0837988826815643, "grad_norm": 0.7345956563949585, "learning_rate": 0.0009486274509803922, "loss": 0.4681, "step": 1940 }, { "epoch": 1.0843575418994413, "grad_norm": 1.3723267316818237, "learning_rate": 0.0009485994397759103, "loss": 0.698, "step": 1941 }, { "epoch": 1.0849162011173183, "grad_norm": 0.6047074794769287, "learning_rate": 0.0009485714285714286, "loss": 0.4643, "step": 1942 }, { "epoch": 1.0854748603351956, "grad_norm": 3.5328667163848877, "learning_rate": 0.0009485434173669468, "loss": 0.4468, "step": 1943 }, { "epoch": 1.0860335195530726, "grad_norm": 0.9016256928443909, "learning_rate": 0.000948515406162465, "loss": 0.4569, "step": 1944 }, { "epoch": 1.0865921787709498, "grad_norm": 0.4552290141582489, "learning_rate": 0.0009484873949579832, "loss": 0.5389, "step": 1945 }, { "epoch": 1.0871508379888268, "grad_norm": 0.6696059107780457, "learning_rate": 0.0009484593837535013, "loss": 0.4362, "step": 1946 }, { "epoch": 1.0877094972067038, "grad_norm": 0.6108041405677795, "learning_rate": 0.0009484313725490196, "loss": 0.4632, "step": 1947 }, { "epoch": 1.088268156424581, "grad_norm": 0.562382161617279, "learning_rate": 0.0009484033613445378, "loss": 0.5649, "step": 1948 }, { "epoch": 1.088826815642458, "grad_norm": 0.5756879448890686, "learning_rate": 0.000948375350140056, "loss": 0.3809, "step": 1949 }, { "epoch": 1.089385474860335, "grad_norm": 1.2760363817214966, "learning_rate": 0.0009483473389355743, "loss": 0.5882, "step": 1950 }, { "epoch": 1.0899441340782123, "grad_norm": 0.6584094762802124, "learning_rate": 0.0009483193277310925, "loss": 0.525, "step": 1951 }, { "epoch": 1.0905027932960893, "grad_norm": 0.5820613503456116, "learning_rate": 0.0009482913165266107, "loss": 0.3538, "step": 1952 }, { "epoch": 1.0910614525139666, "grad_norm": 0.7930244207382202, "learning_rate": 0.0009482633053221289, "loss": 0.5407, "step": 1953 }, { "epoch": 1.0916201117318436, "grad_norm": 0.4802961051464081, "learning_rate": 0.0009482352941176471, "loss": 0.4492, "step": 1954 }, { "epoch": 1.0921787709497206, "grad_norm": 0.7456192374229431, "learning_rate": 0.0009482072829131653, "loss": 0.5938, "step": 1955 }, { "epoch": 1.0927374301675978, "grad_norm": 0.6738383173942566, "learning_rate": 0.0009481792717086835, "loss": 0.5535, "step": 1956 }, { "epoch": 1.0932960893854748, "grad_norm": 1.3703727722167969, "learning_rate": 0.0009481512605042017, "loss": 0.5562, "step": 1957 }, { "epoch": 1.0938547486033519, "grad_norm": 1.293674349784851, "learning_rate": 0.0009481232492997199, "loss": 0.5104, "step": 1958 }, { "epoch": 1.094413407821229, "grad_norm": 1.3170990943908691, "learning_rate": 0.0009480952380952381, "loss": 0.5793, "step": 1959 }, { "epoch": 1.094972067039106, "grad_norm": 0.9616127014160156, "learning_rate": 0.0009480672268907563, "loss": 0.6517, "step": 1960 }, { "epoch": 1.0955307262569833, "grad_norm": 1.4081939458847046, "learning_rate": 0.0009480392156862745, "loss": 0.5712, "step": 1961 }, { "epoch": 1.0960893854748603, "grad_norm": 0.7713123559951782, "learning_rate": 0.0009480112044817928, "loss": 0.546, "step": 1962 }, { "epoch": 1.0966480446927374, "grad_norm": 0.6402348279953003, "learning_rate": 0.0009479831932773109, "loss": 0.6007, "step": 1963 }, { "epoch": 1.0972067039106146, "grad_norm": 0.5454915165901184, "learning_rate": 0.0009479551820728291, "loss": 0.4806, "step": 1964 }, { "epoch": 1.0977653631284916, "grad_norm": 0.6856116056442261, "learning_rate": 0.0009479271708683474, "loss": 0.4945, "step": 1965 }, { "epoch": 1.0983240223463686, "grad_norm": 0.6550681591033936, "learning_rate": 0.0009478991596638656, "loss": 0.3967, "step": 1966 }, { "epoch": 1.0988826815642458, "grad_norm": 0.7778172492980957, "learning_rate": 0.0009478711484593839, "loss": 0.534, "step": 1967 }, { "epoch": 1.0994413407821229, "grad_norm": 0.8509172201156616, "learning_rate": 0.000947843137254902, "loss": 0.4184, "step": 1968 }, { "epoch": 1.1, "grad_norm": 0.7637555599212646, "learning_rate": 0.0009478151260504202, "loss": 0.5898, "step": 1969 }, { "epoch": 1.100558659217877, "grad_norm": 0.6652496457099915, "learning_rate": 0.0009477871148459384, "loss": 0.5575, "step": 1970 }, { "epoch": 1.1011173184357541, "grad_norm": 1.1267739534378052, "learning_rate": 0.0009477591036414566, "loss": 0.4535, "step": 1971 }, { "epoch": 1.1016759776536313, "grad_norm": 0.7473882436752319, "learning_rate": 0.0009477310924369749, "loss": 0.4894, "step": 1972 }, { "epoch": 1.1022346368715084, "grad_norm": 0.4576612710952759, "learning_rate": 0.000947703081232493, "loss": 0.405, "step": 1973 }, { "epoch": 1.1027932960893856, "grad_norm": 0.5016672611236572, "learning_rate": 0.0009476750700280112, "loss": 0.4387, "step": 1974 }, { "epoch": 1.1033519553072626, "grad_norm": 1.9634641408920288, "learning_rate": 0.0009476470588235294, "loss": 0.4615, "step": 1975 }, { "epoch": 1.1039106145251396, "grad_norm": 0.561021625995636, "learning_rate": 0.0009476190476190476, "loss": 0.4402, "step": 1976 }, { "epoch": 1.1044692737430168, "grad_norm": 0.4194818437099457, "learning_rate": 0.0009475910364145659, "loss": 0.4603, "step": 1977 }, { "epoch": 1.1050279329608939, "grad_norm": 6.717833042144775, "learning_rate": 0.0009475630252100841, "loss": 0.5868, "step": 1978 }, { "epoch": 1.1055865921787709, "grad_norm": 1.257798671722412, "learning_rate": 0.0009475350140056022, "loss": 0.5072, "step": 1979 }, { "epoch": 1.106145251396648, "grad_norm": 0.505747377872467, "learning_rate": 0.0009475070028011204, "loss": 0.3985, "step": 1980 }, { "epoch": 1.106703910614525, "grad_norm": 0.8068348169326782, "learning_rate": 0.0009474789915966386, "loss": 0.5688, "step": 1981 }, { "epoch": 1.1072625698324021, "grad_norm": 0.7358660101890564, "learning_rate": 0.000947450980392157, "loss": 0.5501, "step": 1982 }, { "epoch": 1.1078212290502794, "grad_norm": 0.4912152886390686, "learning_rate": 0.0009474229691876752, "loss": 0.4926, "step": 1983 }, { "epoch": 1.1083798882681564, "grad_norm": 0.9780363440513611, "learning_rate": 0.0009473949579831933, "loss": 0.4942, "step": 1984 }, { "epoch": 1.1089385474860336, "grad_norm": 9.432075500488281, "learning_rate": 0.0009473669467787115, "loss": 0.4413, "step": 1985 }, { "epoch": 1.1094972067039106, "grad_norm": 1.2021106481552124, "learning_rate": 0.0009473389355742297, "loss": 0.4839, "step": 1986 }, { "epoch": 1.1100558659217876, "grad_norm": 2.9500606060028076, "learning_rate": 0.000947310924369748, "loss": 0.5105, "step": 1987 }, { "epoch": 1.1106145251396649, "grad_norm": 0.6623552441596985, "learning_rate": 0.0009472829131652662, "loss": 0.5214, "step": 1988 }, { "epoch": 1.1111731843575419, "grad_norm": 0.6067897081375122, "learning_rate": 0.0009472549019607843, "loss": 0.4155, "step": 1989 }, { "epoch": 1.111731843575419, "grad_norm": 0.576686441898346, "learning_rate": 0.0009472268907563025, "loss": 0.474, "step": 1990 }, { "epoch": 1.112290502793296, "grad_norm": 0.4937525987625122, "learning_rate": 0.0009471988795518207, "loss": 0.5354, "step": 1991 }, { "epoch": 1.1128491620111731, "grad_norm": 0.7148085832595825, "learning_rate": 0.000947170868347339, "loss": 0.4498, "step": 1992 }, { "epoch": 1.1134078212290504, "grad_norm": 0.6365212202072144, "learning_rate": 0.0009471428571428572, "loss": 0.4725, "step": 1993 }, { "epoch": 1.1139664804469274, "grad_norm": 12.856399536132812, "learning_rate": 0.0009471148459383754, "loss": 0.4554, "step": 1994 }, { "epoch": 1.1145251396648044, "grad_norm": 1.4765926599502563, "learning_rate": 0.0009470868347338935, "loss": 0.4718, "step": 1995 }, { "epoch": 1.1150837988826816, "grad_norm": 0.6106082797050476, "learning_rate": 0.0009470588235294117, "loss": 0.4687, "step": 1996 }, { "epoch": 1.1156424581005586, "grad_norm": 0.7510437965393066, "learning_rate": 0.00094703081232493, "loss": 0.5655, "step": 1997 }, { "epoch": 1.1162011173184359, "grad_norm": 2.784778594970703, "learning_rate": 0.0009470028011204483, "loss": 0.5206, "step": 1998 }, { "epoch": 1.1167597765363129, "grad_norm": 0.5566121339797974, "learning_rate": 0.0009469747899159665, "loss": 0.4103, "step": 1999 }, { "epoch": 1.1173184357541899, "grad_norm": 0.49366363883018494, "learning_rate": 0.0009469467787114846, "loss": 0.5134, "step": 2000 }, { "epoch": 1.1173184357541899, "eval_cer": 0.10084230739855761, "eval_loss": 0.376040518283844, "eval_runtime": 55.697, "eval_samples_per_second": 81.477, "eval_steps_per_second": 5.099, "eval_wer": 0.4046535012988464, "step": 2000 }, { "epoch": 1.117877094972067, "grad_norm": 0.6351279020309448, "learning_rate": 0.0009469187675070028, "loss": 0.6502, "step": 2001 }, { "epoch": 1.1184357541899441, "grad_norm": 0.8336634635925293, "learning_rate": 0.0009468907563025211, "loss": 0.5242, "step": 2002 }, { "epoch": 1.1189944134078211, "grad_norm": 0.7816217541694641, "learning_rate": 0.0009468627450980393, "loss": 0.5954, "step": 2003 }, { "epoch": 1.1195530726256984, "grad_norm": 0.7004947066307068, "learning_rate": 0.0009468347338935575, "loss": 0.474, "step": 2004 }, { "epoch": 1.1201117318435754, "grad_norm": 0.611807644367218, "learning_rate": 0.0009468067226890756, "loss": 0.4017, "step": 2005 }, { "epoch": 1.1206703910614526, "grad_norm": 2.6075925827026367, "learning_rate": 0.0009467787114845938, "loss": 0.5336, "step": 2006 }, { "epoch": 1.1212290502793296, "grad_norm": 0.9471938610076904, "learning_rate": 0.000946750700280112, "loss": 0.422, "step": 2007 }, { "epoch": 1.1217877094972066, "grad_norm": 0.5446215271949768, "learning_rate": 0.0009467226890756303, "loss": 0.4789, "step": 2008 }, { "epoch": 1.1223463687150839, "grad_norm": 4.740174293518066, "learning_rate": 0.0009466946778711485, "loss": 0.5513, "step": 2009 }, { "epoch": 1.1229050279329609, "grad_norm": 0.3919450342655182, "learning_rate": 0.0009466666666666667, "loss": 0.4792, "step": 2010 }, { "epoch": 1.1234636871508379, "grad_norm": 0.6619439125061035, "learning_rate": 0.0009466386554621848, "loss": 0.4726, "step": 2011 }, { "epoch": 1.1240223463687151, "grad_norm": 1.0391995906829834, "learning_rate": 0.000946610644257703, "loss": 0.6268, "step": 2012 }, { "epoch": 1.1245810055865921, "grad_norm": 0.7475591897964478, "learning_rate": 0.0009465826330532213, "loss": 0.5402, "step": 2013 }, { "epoch": 1.1251396648044694, "grad_norm": 0.45967820286750793, "learning_rate": 0.0009465546218487396, "loss": 0.548, "step": 2014 }, { "epoch": 1.1256983240223464, "grad_norm": 0.503616452217102, "learning_rate": 0.0009465266106442578, "loss": 0.5305, "step": 2015 }, { "epoch": 1.1262569832402234, "grad_norm": 0.5683256387710571, "learning_rate": 0.0009464985994397759, "loss": 0.3593, "step": 2016 }, { "epoch": 1.1268156424581006, "grad_norm": 1.0615944862365723, "learning_rate": 0.0009464705882352941, "loss": 0.5239, "step": 2017 }, { "epoch": 1.1273743016759776, "grad_norm": 2.092423439025879, "learning_rate": 0.0009464425770308124, "loss": 0.4618, "step": 2018 }, { "epoch": 1.1279329608938546, "grad_norm": 0.960770845413208, "learning_rate": 0.0009464145658263306, "loss": 0.4726, "step": 2019 }, { "epoch": 1.1284916201117319, "grad_norm": 0.6446082592010498, "learning_rate": 0.0009463865546218488, "loss": 0.488, "step": 2020 }, { "epoch": 1.1290502793296089, "grad_norm": 1.553737998008728, "learning_rate": 0.0009463585434173669, "loss": 0.4262, "step": 2021 }, { "epoch": 1.1296089385474861, "grad_norm": 1.2342222929000854, "learning_rate": 0.0009463305322128851, "loss": 0.4967, "step": 2022 }, { "epoch": 1.1301675977653631, "grad_norm": 0.9561513066291809, "learning_rate": 0.0009463025210084034, "loss": 0.5597, "step": 2023 }, { "epoch": 1.1307262569832401, "grad_norm": 1.0787378549575806, "learning_rate": 0.0009462745098039216, "loss": 0.5564, "step": 2024 }, { "epoch": 1.1312849162011174, "grad_norm": 0.6298704743385315, "learning_rate": 0.0009462464985994398, "loss": 0.5185, "step": 2025 }, { "epoch": 1.1318435754189944, "grad_norm": 1.0412228107452393, "learning_rate": 0.000946218487394958, "loss": 0.4284, "step": 2026 }, { "epoch": 1.1324022346368716, "grad_norm": 0.9268519878387451, "learning_rate": 0.0009461904761904761, "loss": 0.435, "step": 2027 }, { "epoch": 1.1329608938547486, "grad_norm": 1.1674823760986328, "learning_rate": 0.0009461624649859944, "loss": 0.521, "step": 2028 }, { "epoch": 1.1335195530726256, "grad_norm": 0.8815200328826904, "learning_rate": 0.0009461344537815126, "loss": 0.5258, "step": 2029 }, { "epoch": 1.1340782122905029, "grad_norm": 0.7177609205245972, "learning_rate": 0.0009461064425770308, "loss": 0.5613, "step": 2030 }, { "epoch": 1.1346368715083799, "grad_norm": 0.7643547654151917, "learning_rate": 0.000946078431372549, "loss": 0.5462, "step": 2031 }, { "epoch": 1.135195530726257, "grad_norm": 1.8996915817260742, "learning_rate": 0.0009460504201680672, "loss": 0.5297, "step": 2032 }, { "epoch": 1.1357541899441341, "grad_norm": 0.5828958749771118, "learning_rate": 0.0009460224089635855, "loss": 0.3906, "step": 2033 }, { "epoch": 1.1363128491620111, "grad_norm": 0.6114552617073059, "learning_rate": 0.0009459943977591037, "loss": 0.4089, "step": 2034 }, { "epoch": 1.1368715083798882, "grad_norm": 0.4777858555316925, "learning_rate": 0.0009459663865546219, "loss": 0.48, "step": 2035 }, { "epoch": 1.1374301675977654, "grad_norm": 0.37437257170677185, "learning_rate": 0.0009459383753501401, "loss": 0.3888, "step": 2036 }, { "epoch": 1.1379888268156424, "grad_norm": 0.672275722026825, "learning_rate": 0.0009459103641456582, "loss": 0.4543, "step": 2037 }, { "epoch": 1.1385474860335196, "grad_norm": 0.8527001142501831, "learning_rate": 0.0009458823529411765, "loss": 0.4963, "step": 2038 }, { "epoch": 1.1391061452513966, "grad_norm": 0.7076776027679443, "learning_rate": 0.0009458543417366947, "loss": 0.4566, "step": 2039 }, { "epoch": 1.1396648044692737, "grad_norm": 0.9013495445251465, "learning_rate": 0.0009458263305322129, "loss": 0.446, "step": 2040 }, { "epoch": 1.1402234636871509, "grad_norm": 2.5649375915527344, "learning_rate": 0.0009457983193277311, "loss": 0.5934, "step": 2041 }, { "epoch": 1.140782122905028, "grad_norm": 0.8204906582832336, "learning_rate": 0.0009457703081232493, "loss": 0.5531, "step": 2042 }, { "epoch": 1.1413407821229051, "grad_norm": 0.7193981409072876, "learning_rate": 0.0009457422969187675, "loss": 0.5328, "step": 2043 }, { "epoch": 1.1418994413407821, "grad_norm": 0.6250834465026855, "learning_rate": 0.0009457142857142857, "loss": 0.4741, "step": 2044 }, { "epoch": 1.1424581005586592, "grad_norm": 0.7503669261932373, "learning_rate": 0.0009456862745098039, "loss": 0.7432, "step": 2045 }, { "epoch": 1.1430167597765364, "grad_norm": 0.5963375568389893, "learning_rate": 0.0009456582633053221, "loss": 0.4875, "step": 2046 }, { "epoch": 1.1435754189944134, "grad_norm": 0.685600996017456, "learning_rate": 0.0009456302521008404, "loss": 0.3985, "step": 2047 }, { "epoch": 1.1441340782122904, "grad_norm": 0.5940116047859192, "learning_rate": 0.0009456022408963586, "loss": 0.5145, "step": 2048 }, { "epoch": 1.1446927374301676, "grad_norm": 0.593612015247345, "learning_rate": 0.0009455742296918768, "loss": 0.502, "step": 2049 }, { "epoch": 1.1452513966480447, "grad_norm": 0.7128913998603821, "learning_rate": 0.000945546218487395, "loss": 0.4228, "step": 2050 }, { "epoch": 1.1458100558659219, "grad_norm": 0.7061253786087036, "learning_rate": 0.0009455182072829132, "loss": 0.5887, "step": 2051 }, { "epoch": 1.146368715083799, "grad_norm": 0.5116814374923706, "learning_rate": 0.0009454901960784314, "loss": 0.4821, "step": 2052 }, { "epoch": 1.146927374301676, "grad_norm": 1.508675456047058, "learning_rate": 0.0009454621848739496, "loss": 0.6087, "step": 2053 }, { "epoch": 1.1474860335195531, "grad_norm": 1.2514795064926147, "learning_rate": 0.0009454341736694678, "loss": 0.4883, "step": 2054 }, { "epoch": 1.1480446927374302, "grad_norm": 0.6793577075004578, "learning_rate": 0.000945406162464986, "loss": 0.6769, "step": 2055 }, { "epoch": 1.1486033519553072, "grad_norm": 0.5529429912567139, "learning_rate": 0.0009453781512605042, "loss": 0.491, "step": 2056 }, { "epoch": 1.1491620111731844, "grad_norm": 0.8128876090049744, "learning_rate": 0.0009453501400560224, "loss": 0.4867, "step": 2057 }, { "epoch": 1.1497206703910614, "grad_norm": 0.504065752029419, "learning_rate": 0.0009453221288515407, "loss": 0.4177, "step": 2058 }, { "epoch": 1.1502793296089386, "grad_norm": 0.9824762940406799, "learning_rate": 0.0009452941176470588, "loss": 0.6173, "step": 2059 }, { "epoch": 1.1508379888268156, "grad_norm": 0.5630059242248535, "learning_rate": 0.000945266106442577, "loss": 0.4012, "step": 2060 }, { "epoch": 1.1513966480446927, "grad_norm": 0.6323129534721375, "learning_rate": 0.0009452380952380952, "loss": 0.5885, "step": 2061 }, { "epoch": 1.15195530726257, "grad_norm": 1.6381089687347412, "learning_rate": 0.0009452100840336134, "loss": 0.5873, "step": 2062 }, { "epoch": 1.152513966480447, "grad_norm": 0.6622616648674011, "learning_rate": 0.0009451820728291318, "loss": 0.5932, "step": 2063 }, { "epoch": 1.1530726256983241, "grad_norm": 0.763674795627594, "learning_rate": 0.0009451540616246499, "loss": 0.6191, "step": 2064 }, { "epoch": 1.1536312849162011, "grad_norm": 0.5640531182289124, "learning_rate": 0.0009451260504201681, "loss": 0.5414, "step": 2065 }, { "epoch": 1.1541899441340782, "grad_norm": 1.0875272750854492, "learning_rate": 0.0009450980392156863, "loss": 0.4564, "step": 2066 }, { "epoch": 1.1547486033519554, "grad_norm": 0.4861888289451599, "learning_rate": 0.0009450700280112045, "loss": 0.4297, "step": 2067 }, { "epoch": 1.1553072625698324, "grad_norm": 1.0888410806655884, "learning_rate": 0.0009450420168067228, "loss": 0.4436, "step": 2068 }, { "epoch": 1.1558659217877094, "grad_norm": 1.016395926475525, "learning_rate": 0.0009450140056022409, "loss": 0.4985, "step": 2069 }, { "epoch": 1.1564245810055866, "grad_norm": 0.5680158138275146, "learning_rate": 0.0009449859943977591, "loss": 0.3889, "step": 2070 }, { "epoch": 1.1569832402234637, "grad_norm": 0.6512601971626282, "learning_rate": 0.0009449579831932773, "loss": 0.497, "step": 2071 }, { "epoch": 1.1575418994413407, "grad_norm": 0.7783377170562744, "learning_rate": 0.0009449299719887955, "loss": 0.6509, "step": 2072 }, { "epoch": 1.158100558659218, "grad_norm": 0.7246847152709961, "learning_rate": 0.0009449019607843138, "loss": 0.6507, "step": 2073 }, { "epoch": 1.158659217877095, "grad_norm": 1.2934362888336182, "learning_rate": 0.000944873949579832, "loss": 0.4753, "step": 2074 }, { "epoch": 1.1592178770949721, "grad_norm": 0.5288491249084473, "learning_rate": 0.0009448459383753501, "loss": 0.4657, "step": 2075 }, { "epoch": 1.1597765363128492, "grad_norm": 0.5828776955604553, "learning_rate": 0.0009448179271708683, "loss": 0.4424, "step": 2076 }, { "epoch": 1.1603351955307262, "grad_norm": 0.7760799527168274, "learning_rate": 0.0009447899159663865, "loss": 0.5057, "step": 2077 }, { "epoch": 1.1608938547486034, "grad_norm": 1.175264835357666, "learning_rate": 0.0009447619047619048, "loss": 0.5168, "step": 2078 }, { "epoch": 1.1614525139664804, "grad_norm": 0.5211125016212463, "learning_rate": 0.000944733893557423, "loss": 0.4559, "step": 2079 }, { "epoch": 1.1620111731843576, "grad_norm": 0.4531496465206146, "learning_rate": 0.0009447058823529411, "loss": 0.4065, "step": 2080 }, { "epoch": 1.1625698324022347, "grad_norm": 0.5101728439331055, "learning_rate": 0.0009446778711484594, "loss": 0.527, "step": 2081 }, { "epoch": 1.1631284916201117, "grad_norm": 0.6482619047164917, "learning_rate": 0.0009446498599439776, "loss": 0.5388, "step": 2082 }, { "epoch": 1.163687150837989, "grad_norm": 0.490764319896698, "learning_rate": 0.0009446218487394959, "loss": 0.4549, "step": 2083 }, { "epoch": 1.164245810055866, "grad_norm": 0.5618298649787903, "learning_rate": 0.0009445938375350141, "loss": 0.5465, "step": 2084 }, { "epoch": 1.164804469273743, "grad_norm": 0.8647662401199341, "learning_rate": 0.0009445658263305322, "loss": 0.4772, "step": 2085 }, { "epoch": 1.1653631284916202, "grad_norm": 0.6574456095695496, "learning_rate": 0.0009445378151260504, "loss": 0.5492, "step": 2086 }, { "epoch": 1.1659217877094972, "grad_norm": 1.570063591003418, "learning_rate": 0.0009445098039215686, "loss": 0.4776, "step": 2087 }, { "epoch": 1.1664804469273742, "grad_norm": 0.6808940172195435, "learning_rate": 0.0009444817927170869, "loss": 0.5196, "step": 2088 }, { "epoch": 1.1670391061452514, "grad_norm": 0.4464326500892639, "learning_rate": 0.0009444537815126051, "loss": 0.5087, "step": 2089 }, { "epoch": 1.1675977653631284, "grad_norm": 0.4341641068458557, "learning_rate": 0.0009444257703081233, "loss": 0.4685, "step": 2090 }, { "epoch": 1.1681564245810057, "grad_norm": 1.411848783493042, "learning_rate": 0.0009443977591036414, "loss": 0.5911, "step": 2091 }, { "epoch": 1.1687150837988827, "grad_norm": 0.5872650146484375, "learning_rate": 0.0009443697478991596, "loss": 0.511, "step": 2092 }, { "epoch": 1.1692737430167597, "grad_norm": 1.6268452405929565, "learning_rate": 0.0009443417366946779, "loss": 0.3877, "step": 2093 }, { "epoch": 1.169832402234637, "grad_norm": 0.6332060694694519, "learning_rate": 0.0009443137254901961, "loss": 0.5258, "step": 2094 }, { "epoch": 1.170391061452514, "grad_norm": 0.45429572463035583, "learning_rate": 0.0009442857142857143, "loss": 0.3962, "step": 2095 }, { "epoch": 1.1709497206703912, "grad_norm": 2.5373170375823975, "learning_rate": 0.0009442577030812324, "loss": 0.4432, "step": 2096 }, { "epoch": 1.1715083798882682, "grad_norm": 0.6933470368385315, "learning_rate": 0.0009442296918767506, "loss": 0.4427, "step": 2097 }, { "epoch": 1.1720670391061452, "grad_norm": 0.6767030358314514, "learning_rate": 0.000944201680672269, "loss": 0.4712, "step": 2098 }, { "epoch": 1.1726256983240224, "grad_norm": 4.434169769287109, "learning_rate": 0.0009441736694677872, "loss": 0.5754, "step": 2099 }, { "epoch": 1.1731843575418994, "grad_norm": 34.56585693359375, "learning_rate": 0.0009441456582633054, "loss": 0.5498, "step": 2100 }, { "epoch": 1.1737430167597767, "grad_norm": 0.7130588889122009, "learning_rate": 0.0009441176470588235, "loss": 0.5418, "step": 2101 }, { "epoch": 1.1743016759776537, "grad_norm": 1.0188194513320923, "learning_rate": 0.0009440896358543417, "loss": 0.4108, "step": 2102 }, { "epoch": 1.1748603351955307, "grad_norm": 1.7920080423355103, "learning_rate": 0.00094406162464986, "loss": 0.5079, "step": 2103 }, { "epoch": 1.175418994413408, "grad_norm": 0.7652671933174133, "learning_rate": 0.0009440336134453782, "loss": 0.4698, "step": 2104 }, { "epoch": 1.175977653631285, "grad_norm": 2.849742889404297, "learning_rate": 0.0009440056022408964, "loss": 0.444, "step": 2105 }, { "epoch": 1.176536312849162, "grad_norm": 0.685455322265625, "learning_rate": 0.0009439775910364146, "loss": 0.468, "step": 2106 }, { "epoch": 1.1770949720670392, "grad_norm": 0.7482166290283203, "learning_rate": 0.0009439495798319327, "loss": 0.4793, "step": 2107 }, { "epoch": 1.1776536312849162, "grad_norm": 0.8171047568321228, "learning_rate": 0.000943921568627451, "loss": 0.5212, "step": 2108 }, { "epoch": 1.1782122905027932, "grad_norm": 0.5344380140304565, "learning_rate": 0.0009438935574229692, "loss": 0.4642, "step": 2109 }, { "epoch": 1.1787709497206704, "grad_norm": 0.45532694458961487, "learning_rate": 0.0009438655462184874, "loss": 0.4947, "step": 2110 }, { "epoch": 1.1793296089385474, "grad_norm": 0.890842616558075, "learning_rate": 0.0009438375350140056, "loss": 0.5628, "step": 2111 }, { "epoch": 1.1798882681564247, "grad_norm": 0.8063690066337585, "learning_rate": 0.0009438095238095237, "loss": 0.495, "step": 2112 }, { "epoch": 1.1804469273743017, "grad_norm": 0.8815364837646484, "learning_rate": 0.000943781512605042, "loss": 0.5065, "step": 2113 }, { "epoch": 1.1810055865921787, "grad_norm": 0.71140456199646, "learning_rate": 0.0009437535014005603, "loss": 0.4184, "step": 2114 }, { "epoch": 1.181564245810056, "grad_norm": 0.833466112613678, "learning_rate": 0.0009437254901960785, "loss": 0.439, "step": 2115 }, { "epoch": 1.182122905027933, "grad_norm": 4.519916534423828, "learning_rate": 0.0009436974789915967, "loss": 0.4693, "step": 2116 }, { "epoch": 1.1826815642458102, "grad_norm": 1.029601812362671, "learning_rate": 0.0009436694677871148, "loss": 0.5891, "step": 2117 }, { "epoch": 1.1832402234636872, "grad_norm": 0.4797174632549286, "learning_rate": 0.0009436414565826331, "loss": 0.4465, "step": 2118 }, { "epoch": 1.1837988826815642, "grad_norm": 1.2280062437057495, "learning_rate": 0.0009436134453781513, "loss": 0.5429, "step": 2119 }, { "epoch": 1.1843575418994414, "grad_norm": 1.3682737350463867, "learning_rate": 0.0009435854341736695, "loss": 0.4934, "step": 2120 }, { "epoch": 1.1849162011173184, "grad_norm": 4.337080955505371, "learning_rate": 0.0009435574229691877, "loss": 0.5855, "step": 2121 }, { "epoch": 1.1854748603351954, "grad_norm": 1.0282882452011108, "learning_rate": 0.0009435294117647059, "loss": 0.4405, "step": 2122 }, { "epoch": 1.1860335195530727, "grad_norm": 1.3295059204101562, "learning_rate": 0.0009435014005602241, "loss": 0.4725, "step": 2123 }, { "epoch": 1.1865921787709497, "grad_norm": 0.720296323299408, "learning_rate": 0.0009434733893557423, "loss": 0.515, "step": 2124 }, { "epoch": 1.1871508379888267, "grad_norm": 0.8459692597389221, "learning_rate": 0.0009434453781512605, "loss": 0.7379, "step": 2125 }, { "epoch": 1.187709497206704, "grad_norm": 0.5645942091941833, "learning_rate": 0.0009434173669467787, "loss": 0.4765, "step": 2126 }, { "epoch": 1.188268156424581, "grad_norm": 0.9478999376296997, "learning_rate": 0.0009433893557422969, "loss": 0.4183, "step": 2127 }, { "epoch": 1.1888268156424582, "grad_norm": 0.7287110686302185, "learning_rate": 0.0009433613445378151, "loss": 0.3978, "step": 2128 }, { "epoch": 1.1893854748603352, "grad_norm": 1.2676537036895752, "learning_rate": 0.0009433333333333334, "loss": 0.4012, "step": 2129 }, { "epoch": 1.1899441340782122, "grad_norm": 0.8754597306251526, "learning_rate": 0.0009433053221288516, "loss": 0.5435, "step": 2130 }, { "epoch": 1.1905027932960894, "grad_norm": 0.6942610740661621, "learning_rate": 0.0009432773109243698, "loss": 0.5449, "step": 2131 }, { "epoch": 1.1910614525139664, "grad_norm": 1.1593784093856812, "learning_rate": 0.000943249299719888, "loss": 0.4472, "step": 2132 }, { "epoch": 1.1916201117318437, "grad_norm": 0.8377334475517273, "learning_rate": 0.0009432212885154063, "loss": 0.5899, "step": 2133 }, { "epoch": 1.1921787709497207, "grad_norm": 0.6997050046920776, "learning_rate": 0.0009431932773109244, "loss": 0.5047, "step": 2134 }, { "epoch": 1.1927374301675977, "grad_norm": 1.3585000038146973, "learning_rate": 0.0009431652661064426, "loss": 0.4843, "step": 2135 }, { "epoch": 1.193296089385475, "grad_norm": 0.7649428844451904, "learning_rate": 0.0009431372549019608, "loss": 0.5575, "step": 2136 }, { "epoch": 1.193854748603352, "grad_norm": 10.159912109375, "learning_rate": 0.000943109243697479, "loss": 0.6898, "step": 2137 }, { "epoch": 1.194413407821229, "grad_norm": 1.1560299396514893, "learning_rate": 0.0009430812324929973, "loss": 0.4705, "step": 2138 }, { "epoch": 1.1949720670391062, "grad_norm": 1.281246304512024, "learning_rate": 0.0009430532212885154, "loss": 0.4527, "step": 2139 }, { "epoch": 1.1955307262569832, "grad_norm": 0.7689476609230042, "learning_rate": 0.0009430252100840336, "loss": 0.3603, "step": 2140 }, { "epoch": 1.1960893854748602, "grad_norm": 0.6380707621574402, "learning_rate": 0.0009429971988795518, "loss": 0.6703, "step": 2141 }, { "epoch": 1.1966480446927374, "grad_norm": 1.4263190031051636, "learning_rate": 0.00094296918767507, "loss": 0.4352, "step": 2142 }, { "epoch": 1.1972067039106145, "grad_norm": 1.352187156677246, "learning_rate": 0.0009429411764705883, "loss": 0.4378, "step": 2143 }, { "epoch": 1.1977653631284917, "grad_norm": 0.5018616318702698, "learning_rate": 0.0009429131652661064, "loss": 0.5471, "step": 2144 }, { "epoch": 1.1983240223463687, "grad_norm": 0.6830251812934875, "learning_rate": 0.0009428851540616246, "loss": 0.4867, "step": 2145 }, { "epoch": 1.1988826815642457, "grad_norm": 0.826819896697998, "learning_rate": 0.0009428571428571429, "loss": 0.4842, "step": 2146 }, { "epoch": 1.199441340782123, "grad_norm": 0.6691630482673645, "learning_rate": 0.0009428291316526611, "loss": 0.4429, "step": 2147 }, { "epoch": 1.2, "grad_norm": 0.5742301940917969, "learning_rate": 0.0009428011204481794, "loss": 0.5749, "step": 2148 }, { "epoch": 1.2005586592178772, "grad_norm": 0.5265778303146362, "learning_rate": 0.0009427731092436976, "loss": 0.4716, "step": 2149 }, { "epoch": 1.2011173184357542, "grad_norm": 1.4679458141326904, "learning_rate": 0.0009427450980392157, "loss": 0.4507, "step": 2150 }, { "epoch": 1.2016759776536312, "grad_norm": 0.9520155191421509, "learning_rate": 0.0009427170868347339, "loss": 0.5619, "step": 2151 }, { "epoch": 1.2022346368715084, "grad_norm": 2.3973822593688965, "learning_rate": 0.0009426890756302521, "loss": 0.6133, "step": 2152 }, { "epoch": 1.2027932960893855, "grad_norm": 0.5013342499732971, "learning_rate": 0.0009426610644257704, "loss": 0.4829, "step": 2153 }, { "epoch": 1.2033519553072627, "grad_norm": 0.9384273886680603, "learning_rate": 0.0009426330532212886, "loss": 0.4524, "step": 2154 }, { "epoch": 1.2039106145251397, "grad_norm": 0.46103498339653015, "learning_rate": 0.0009426050420168067, "loss": 0.4417, "step": 2155 }, { "epoch": 1.2044692737430167, "grad_norm": 2.0672597885131836, "learning_rate": 0.0009425770308123249, "loss": 0.5018, "step": 2156 }, { "epoch": 1.205027932960894, "grad_norm": 0.7411819696426392, "learning_rate": 0.0009425490196078431, "loss": 0.4332, "step": 2157 }, { "epoch": 1.205586592178771, "grad_norm": 0.46277377009391785, "learning_rate": 0.0009425210084033614, "loss": 0.5167, "step": 2158 }, { "epoch": 1.206145251396648, "grad_norm": 0.554575502872467, "learning_rate": 0.0009424929971988796, "loss": 0.5481, "step": 2159 }, { "epoch": 1.2067039106145252, "grad_norm": 0.7418820858001709, "learning_rate": 0.0009424649859943977, "loss": 0.6344, "step": 2160 }, { "epoch": 1.2072625698324022, "grad_norm": 0.8901815414428711, "learning_rate": 0.0009424369747899159, "loss": 0.6099, "step": 2161 }, { "epoch": 1.2078212290502792, "grad_norm": 0.4801618456840515, "learning_rate": 0.0009424089635854341, "loss": 0.5136, "step": 2162 }, { "epoch": 1.2083798882681565, "grad_norm": 28.97088050842285, "learning_rate": 0.0009423809523809525, "loss": 0.3598, "step": 2163 }, { "epoch": 1.2089385474860335, "grad_norm": 0.5268792510032654, "learning_rate": 0.0009423529411764707, "loss": 0.5499, "step": 2164 }, { "epoch": 1.2094972067039107, "grad_norm": 0.5892419815063477, "learning_rate": 0.0009423249299719889, "loss": 0.563, "step": 2165 }, { "epoch": 1.2100558659217877, "grad_norm": 1.8505092859268188, "learning_rate": 0.000942296918767507, "loss": 0.692, "step": 2166 }, { "epoch": 1.2106145251396647, "grad_norm": 0.7788013219833374, "learning_rate": 0.0009422689075630252, "loss": 0.511, "step": 2167 }, { "epoch": 1.211173184357542, "grad_norm": 0.7060709595680237, "learning_rate": 0.0009422408963585435, "loss": 0.6062, "step": 2168 }, { "epoch": 1.211731843575419, "grad_norm": 0.5576009750366211, "learning_rate": 0.0009422128851540617, "loss": 0.5004, "step": 2169 }, { "epoch": 1.2122905027932962, "grad_norm": 0.41443267464637756, "learning_rate": 0.0009421848739495799, "loss": 0.4762, "step": 2170 }, { "epoch": 1.2128491620111732, "grad_norm": 0.662115752696991, "learning_rate": 0.000942156862745098, "loss": 0.4955, "step": 2171 }, { "epoch": 1.2134078212290502, "grad_norm": 0.6038459539413452, "learning_rate": 0.0009421288515406162, "loss": 0.4901, "step": 2172 }, { "epoch": 1.2139664804469275, "grad_norm": 0.7786489129066467, "learning_rate": 0.0009421008403361345, "loss": 0.4629, "step": 2173 }, { "epoch": 1.2145251396648045, "grad_norm": 0.5956007838249207, "learning_rate": 0.0009420728291316527, "loss": 0.5035, "step": 2174 }, { "epoch": 1.2150837988826815, "grad_norm": 3.5090713500976562, "learning_rate": 0.0009420448179271709, "loss": 0.5553, "step": 2175 }, { "epoch": 1.2156424581005587, "grad_norm": 0.5134099125862122, "learning_rate": 0.000942016806722689, "loss": 0.5324, "step": 2176 }, { "epoch": 1.2162011173184357, "grad_norm": 0.6300668716430664, "learning_rate": 0.0009419887955182072, "loss": 0.7685, "step": 2177 }, { "epoch": 1.2167597765363127, "grad_norm": 1.9936174154281616, "learning_rate": 0.0009419607843137256, "loss": 0.4019, "step": 2178 }, { "epoch": 1.21731843575419, "grad_norm": 0.48586663603782654, "learning_rate": 0.0009419327731092438, "loss": 0.6216, "step": 2179 }, { "epoch": 1.217877094972067, "grad_norm": 0.5281707048416138, "learning_rate": 0.000941904761904762, "loss": 0.5676, "step": 2180 }, { "epoch": 1.2184357541899442, "grad_norm": 1.4248437881469727, "learning_rate": 0.0009418767507002802, "loss": 0.4664, "step": 2181 }, { "epoch": 1.2189944134078212, "grad_norm": 1.0297634601593018, "learning_rate": 0.0009418487394957983, "loss": 0.4404, "step": 2182 }, { "epoch": 1.2195530726256982, "grad_norm": 1.143286108970642, "learning_rate": 0.0009418207282913166, "loss": 0.5365, "step": 2183 }, { "epoch": 1.2201117318435755, "grad_norm": 0.5228847861289978, "learning_rate": 0.0009417927170868348, "loss": 0.5826, "step": 2184 }, { "epoch": 1.2206703910614525, "grad_norm": 0.5148554444313049, "learning_rate": 0.000941764705882353, "loss": 0.546, "step": 2185 }, { "epoch": 1.2212290502793297, "grad_norm": 0.4707455039024353, "learning_rate": 0.0009417366946778712, "loss": 0.5057, "step": 2186 }, { "epoch": 1.2217877094972067, "grad_norm": 0.8517330288887024, "learning_rate": 0.0009417086834733893, "loss": 0.4911, "step": 2187 }, { "epoch": 1.2223463687150837, "grad_norm": 0.7839780449867249, "learning_rate": 0.0009416806722689076, "loss": 0.4379, "step": 2188 }, { "epoch": 1.222905027932961, "grad_norm": 0.3970915973186493, "learning_rate": 0.0009416526610644258, "loss": 0.3645, "step": 2189 }, { "epoch": 1.223463687150838, "grad_norm": 1.4941742420196533, "learning_rate": 0.000941624649859944, "loss": 0.5191, "step": 2190 }, { "epoch": 1.2240223463687152, "grad_norm": 0.6183914542198181, "learning_rate": 0.0009415966386554622, "loss": 0.4913, "step": 2191 }, { "epoch": 1.2245810055865922, "grad_norm": 0.5494821667671204, "learning_rate": 0.0009415686274509803, "loss": 0.5324, "step": 2192 }, { "epoch": 1.2251396648044692, "grad_norm": 0.8816516995429993, "learning_rate": 0.0009415406162464986, "loss": 0.5819, "step": 2193 }, { "epoch": 1.2256983240223465, "grad_norm": 1.034061312675476, "learning_rate": 0.0009415126050420168, "loss": 0.5174, "step": 2194 }, { "epoch": 1.2262569832402235, "grad_norm": 0.5098188519477844, "learning_rate": 0.000941484593837535, "loss": 0.596, "step": 2195 }, { "epoch": 1.2268156424581005, "grad_norm": 1.2608144283294678, "learning_rate": 0.0009414565826330533, "loss": 0.4305, "step": 2196 }, { "epoch": 1.2273743016759777, "grad_norm": 0.5888888239860535, "learning_rate": 0.0009414285714285715, "loss": 0.3995, "step": 2197 }, { "epoch": 1.2279329608938547, "grad_norm": 0.8148500323295593, "learning_rate": 0.0009414005602240897, "loss": 0.5187, "step": 2198 }, { "epoch": 1.2284916201117317, "grad_norm": 0.5967636704444885, "learning_rate": 0.0009413725490196079, "loss": 0.5153, "step": 2199 }, { "epoch": 1.229050279329609, "grad_norm": 0.9023078083992004, "learning_rate": 0.0009413445378151261, "loss": 0.443, "step": 2200 }, { "epoch": 1.229608938547486, "grad_norm": 0.6595953702926636, "learning_rate": 0.0009413165266106443, "loss": 0.6693, "step": 2201 }, { "epoch": 1.2301675977653632, "grad_norm": 0.4447948634624481, "learning_rate": 0.0009412885154061625, "loss": 0.3766, "step": 2202 }, { "epoch": 1.2307262569832402, "grad_norm": 2.511414051055908, "learning_rate": 0.0009412605042016807, "loss": 0.4313, "step": 2203 }, { "epoch": 1.2312849162011172, "grad_norm": 1.5143481492996216, "learning_rate": 0.0009412324929971989, "loss": 0.5101, "step": 2204 }, { "epoch": 1.2318435754189945, "grad_norm": 0.659372091293335, "learning_rate": 0.0009412044817927171, "loss": 0.4999, "step": 2205 }, { "epoch": 1.2324022346368715, "grad_norm": 0.5657572746276855, "learning_rate": 0.0009411764705882353, "loss": 0.4752, "step": 2206 }, { "epoch": 1.2329608938547487, "grad_norm": 0.5374714732170105, "learning_rate": 0.0009411484593837535, "loss": 0.5059, "step": 2207 }, { "epoch": 1.2335195530726257, "grad_norm": 1.0359647274017334, "learning_rate": 0.0009411204481792717, "loss": 0.5077, "step": 2208 }, { "epoch": 1.2340782122905027, "grad_norm": 0.7191410064697266, "learning_rate": 0.0009410924369747899, "loss": 0.4023, "step": 2209 }, { "epoch": 1.23463687150838, "grad_norm": 0.8112833499908447, "learning_rate": 0.0009410644257703081, "loss": 0.5111, "step": 2210 }, { "epoch": 1.235195530726257, "grad_norm": 0.760631799697876, "learning_rate": 0.0009410364145658264, "loss": 0.637, "step": 2211 }, { "epoch": 1.235754189944134, "grad_norm": 14.107086181640625, "learning_rate": 0.0009410084033613446, "loss": 0.5136, "step": 2212 }, { "epoch": 1.2363128491620112, "grad_norm": 0.7132804989814758, "learning_rate": 0.0009409803921568629, "loss": 0.4148, "step": 2213 }, { "epoch": 1.2368715083798882, "grad_norm": 0.5110153555870056, "learning_rate": 0.000940952380952381, "loss": 0.4572, "step": 2214 }, { "epoch": 1.2374301675977653, "grad_norm": 0.7452877759933472, "learning_rate": 0.0009409243697478992, "loss": 0.4563, "step": 2215 }, { "epoch": 1.2379888268156425, "grad_norm": 0.6648342609405518, "learning_rate": 0.0009408963585434174, "loss": 0.4606, "step": 2216 }, { "epoch": 1.2385474860335195, "grad_norm": 0.5784304738044739, "learning_rate": 0.0009408683473389356, "loss": 0.5491, "step": 2217 }, { "epoch": 1.2391061452513967, "grad_norm": 0.6082378029823303, "learning_rate": 0.0009408403361344539, "loss": 0.5645, "step": 2218 }, { "epoch": 1.2396648044692737, "grad_norm": 0.8048925995826721, "learning_rate": 0.000940812324929972, "loss": 0.5302, "step": 2219 }, { "epoch": 1.2402234636871508, "grad_norm": 2.321373224258423, "learning_rate": 0.0009407843137254902, "loss": 0.4928, "step": 2220 }, { "epoch": 1.240782122905028, "grad_norm": 0.5194941759109497, "learning_rate": 0.0009407563025210084, "loss": 0.4633, "step": 2221 }, { "epoch": 1.241340782122905, "grad_norm": 8.402451515197754, "learning_rate": 0.0009407282913165266, "loss": 0.4617, "step": 2222 }, { "epoch": 1.2418994413407822, "grad_norm": 0.4527837336063385, "learning_rate": 0.0009407002801120449, "loss": 0.5353, "step": 2223 }, { "epoch": 1.2424581005586592, "grad_norm": 0.7383092045783997, "learning_rate": 0.000940672268907563, "loss": 0.5599, "step": 2224 }, { "epoch": 1.2430167597765363, "grad_norm": 2.717256546020508, "learning_rate": 0.0009406442577030812, "loss": 0.5823, "step": 2225 }, { "epoch": 1.2435754189944135, "grad_norm": 0.6024227142333984, "learning_rate": 0.0009406162464985994, "loss": 0.5049, "step": 2226 }, { "epoch": 1.2441340782122905, "grad_norm": 0.48112189769744873, "learning_rate": 0.0009405882352941176, "loss": 0.4484, "step": 2227 }, { "epoch": 1.2446927374301675, "grad_norm": 1.0186944007873535, "learning_rate": 0.0009405602240896359, "loss": 0.5297, "step": 2228 }, { "epoch": 1.2452513966480447, "grad_norm": 0.48272573947906494, "learning_rate": 0.0009405322128851542, "loss": 0.4606, "step": 2229 }, { "epoch": 1.2458100558659218, "grad_norm": 0.6424063444137573, "learning_rate": 0.0009405042016806723, "loss": 0.5134, "step": 2230 }, { "epoch": 1.2463687150837988, "grad_norm": 0.8302348852157593, "learning_rate": 0.0009404761904761905, "loss": 0.4915, "step": 2231 }, { "epoch": 1.246927374301676, "grad_norm": 0.5846266150474548, "learning_rate": 0.0009404481792717087, "loss": 0.444, "step": 2232 }, { "epoch": 1.247486033519553, "grad_norm": 0.5861256718635559, "learning_rate": 0.0009404201680672269, "loss": 0.4968, "step": 2233 }, { "epoch": 1.2480446927374302, "grad_norm": 0.5197890400886536, "learning_rate": 0.0009403921568627452, "loss": 0.4419, "step": 2234 }, { "epoch": 1.2486033519553073, "grad_norm": 3.3656036853790283, "learning_rate": 0.0009403641456582633, "loss": 0.4971, "step": 2235 }, { "epoch": 1.2491620111731843, "grad_norm": 2.8907134532928467, "learning_rate": 0.0009403361344537815, "loss": 0.4644, "step": 2236 }, { "epoch": 1.2497206703910615, "grad_norm": 0.8501009345054626, "learning_rate": 0.0009403081232492997, "loss": 0.4333, "step": 2237 }, { "epoch": 1.2502793296089385, "grad_norm": 0.7901805639266968, "learning_rate": 0.0009402801120448179, "loss": 0.5054, "step": 2238 }, { "epoch": 1.2508379888268157, "grad_norm": 9.953897476196289, "learning_rate": 0.0009402521008403362, "loss": 0.5503, "step": 2239 }, { "epoch": 1.2513966480446927, "grad_norm": 0.4574260413646698, "learning_rate": 0.0009402240896358543, "loss": 0.413, "step": 2240 }, { "epoch": 1.2519553072625698, "grad_norm": 0.880647599697113, "learning_rate": 0.0009401960784313725, "loss": 0.488, "step": 2241 }, { "epoch": 1.252513966480447, "grad_norm": 0.6018409729003906, "learning_rate": 0.0009401680672268907, "loss": 0.4849, "step": 2242 }, { "epoch": 1.253072625698324, "grad_norm": 0.7612437009811401, "learning_rate": 0.0009401400560224089, "loss": 0.5401, "step": 2243 }, { "epoch": 1.2536312849162012, "grad_norm": 0.6563434600830078, "learning_rate": 0.0009401120448179273, "loss": 0.512, "step": 2244 }, { "epoch": 1.2541899441340782, "grad_norm": 0.7319206595420837, "learning_rate": 0.0009400840336134455, "loss": 0.6396, "step": 2245 }, { "epoch": 1.2547486033519553, "grad_norm": 0.7326250076293945, "learning_rate": 0.0009400560224089636, "loss": 0.4986, "step": 2246 }, { "epoch": 1.2553072625698323, "grad_norm": 0.666903555393219, "learning_rate": 0.0009400280112044818, "loss": 0.362, "step": 2247 }, { "epoch": 1.2558659217877095, "grad_norm": 0.6648302674293518, "learning_rate": 0.00094, "loss": 0.4379, "step": 2248 }, { "epoch": 1.2564245810055865, "grad_norm": 0.6782199144363403, "learning_rate": 0.0009399719887955183, "loss": 0.3984, "step": 2249 }, { "epoch": 1.2569832402234637, "grad_norm": 0.8376119136810303, "learning_rate": 0.0009399439775910365, "loss": 0.5668, "step": 2250 }, { "epoch": 1.2575418994413408, "grad_norm": 0.7745038270950317, "learning_rate": 0.0009399159663865546, "loss": 0.5542, "step": 2251 }, { "epoch": 1.2581005586592178, "grad_norm": 0.49192437529563904, "learning_rate": 0.0009398879551820728, "loss": 0.4786, "step": 2252 }, { "epoch": 1.258659217877095, "grad_norm": 0.7043928503990173, "learning_rate": 0.000939859943977591, "loss": 0.4761, "step": 2253 }, { "epoch": 1.259217877094972, "grad_norm": 0.47638171911239624, "learning_rate": 0.0009398319327731093, "loss": 0.464, "step": 2254 }, { "epoch": 1.2597765363128492, "grad_norm": 0.6261572241783142, "learning_rate": 0.0009398039215686275, "loss": 0.3789, "step": 2255 }, { "epoch": 1.2603351955307263, "grad_norm": 0.9678022265434265, "learning_rate": 0.0009397759103641456, "loss": 0.4295, "step": 2256 }, { "epoch": 1.2608938547486033, "grad_norm": 0.8020207285881042, "learning_rate": 0.0009397478991596638, "loss": 0.6257, "step": 2257 }, { "epoch": 1.2614525139664805, "grad_norm": 0.5573917031288147, "learning_rate": 0.000939719887955182, "loss": 0.4932, "step": 2258 }, { "epoch": 1.2620111731843575, "grad_norm": 1.220342755317688, "learning_rate": 0.0009396918767507003, "loss": 0.5833, "step": 2259 }, { "epoch": 1.2625698324022347, "grad_norm": 0.5947352647781372, "learning_rate": 0.0009396638655462186, "loss": 0.5532, "step": 2260 }, { "epoch": 1.2631284916201118, "grad_norm": 0.6306976675987244, "learning_rate": 0.0009396358543417368, "loss": 0.4108, "step": 2261 }, { "epoch": 1.2636871508379888, "grad_norm": 1.0991276502609253, "learning_rate": 0.0009396078431372549, "loss": 0.5008, "step": 2262 }, { "epoch": 1.264245810055866, "grad_norm": 0.4786874055862427, "learning_rate": 0.0009395798319327731, "loss": 0.4771, "step": 2263 }, { "epoch": 1.264804469273743, "grad_norm": 1.4687696695327759, "learning_rate": 0.0009395518207282914, "loss": 0.6492, "step": 2264 }, { "epoch": 1.2653631284916202, "grad_norm": 0.4954474866390228, "learning_rate": 0.0009395238095238096, "loss": 0.4555, "step": 2265 }, { "epoch": 1.2659217877094973, "grad_norm": 0.5406965613365173, "learning_rate": 0.0009394957983193278, "loss": 0.5395, "step": 2266 }, { "epoch": 1.2664804469273743, "grad_norm": 1.00322687625885, "learning_rate": 0.0009394677871148459, "loss": 0.4583, "step": 2267 }, { "epoch": 1.2670391061452513, "grad_norm": 0.7092496752738953, "learning_rate": 0.0009394397759103641, "loss": 0.5007, "step": 2268 }, { "epoch": 1.2675977653631285, "grad_norm": 0.8788418173789978, "learning_rate": 0.0009394117647058824, "loss": 0.489, "step": 2269 }, { "epoch": 1.2681564245810055, "grad_norm": 0.5346425771713257, "learning_rate": 0.0009393837535014006, "loss": 0.4748, "step": 2270 }, { "epoch": 1.2687150837988828, "grad_norm": 0.531053900718689, "learning_rate": 0.0009393557422969188, "loss": 0.3946, "step": 2271 }, { "epoch": 1.2692737430167598, "grad_norm": 0.5598241686820984, "learning_rate": 0.0009393277310924369, "loss": 0.5454, "step": 2272 }, { "epoch": 1.2698324022346368, "grad_norm": 0.6497790813446045, "learning_rate": 0.0009392997198879551, "loss": 0.4848, "step": 2273 }, { "epoch": 1.270391061452514, "grad_norm": 0.47104766964912415, "learning_rate": 0.0009392717086834734, "loss": 0.4843, "step": 2274 }, { "epoch": 1.270949720670391, "grad_norm": 0.6373875737190247, "learning_rate": 0.0009392436974789916, "loss": 0.4422, "step": 2275 }, { "epoch": 1.2715083798882683, "grad_norm": 0.6158223152160645, "learning_rate": 0.0009392156862745098, "loss": 0.5435, "step": 2276 }, { "epoch": 1.2720670391061453, "grad_norm": 0.8325719833374023, "learning_rate": 0.000939187675070028, "loss": 0.4364, "step": 2277 }, { "epoch": 1.2726256983240223, "grad_norm": 0.5909124612808228, "learning_rate": 0.0009391596638655462, "loss": 0.5291, "step": 2278 }, { "epoch": 1.2731843575418995, "grad_norm": 0.6929603219032288, "learning_rate": 0.0009391316526610645, "loss": 0.5202, "step": 2279 }, { "epoch": 1.2737430167597765, "grad_norm": 0.5221192240715027, "learning_rate": 0.0009391036414565827, "loss": 0.4662, "step": 2280 }, { "epoch": 1.2743016759776538, "grad_norm": 0.4766484797000885, "learning_rate": 0.0009390756302521009, "loss": 0.3955, "step": 2281 }, { "epoch": 1.2748603351955308, "grad_norm": 0.4216242730617523, "learning_rate": 0.0009390476190476191, "loss": 0.3814, "step": 2282 }, { "epoch": 1.2754189944134078, "grad_norm": 0.45738425850868225, "learning_rate": 0.0009390196078431372, "loss": 0.5629, "step": 2283 }, { "epoch": 1.2759776536312848, "grad_norm": 1.963683009147644, "learning_rate": 0.0009389915966386555, "loss": 0.5602, "step": 2284 }, { "epoch": 1.276536312849162, "grad_norm": 0.45442578196525574, "learning_rate": 0.0009389635854341737, "loss": 0.5522, "step": 2285 }, { "epoch": 1.277094972067039, "grad_norm": 0.5828547477722168, "learning_rate": 0.0009389355742296919, "loss": 0.3992, "step": 2286 }, { "epoch": 1.2776536312849163, "grad_norm": 0.6319419145584106, "learning_rate": 0.0009389075630252101, "loss": 0.4846, "step": 2287 }, { "epoch": 1.2782122905027933, "grad_norm": 0.5193390846252441, "learning_rate": 0.0009388795518207282, "loss": 0.4614, "step": 2288 }, { "epoch": 1.2787709497206703, "grad_norm": 0.7177364230155945, "learning_rate": 0.0009388515406162465, "loss": 0.4653, "step": 2289 }, { "epoch": 1.2793296089385475, "grad_norm": 0.3906303346157074, "learning_rate": 0.0009388235294117647, "loss": 0.4146, "step": 2290 }, { "epoch": 1.2798882681564245, "grad_norm": 0.47126659750938416, "learning_rate": 0.0009387955182072829, "loss": 0.4305, "step": 2291 }, { "epoch": 1.2804469273743018, "grad_norm": 0.41768181324005127, "learning_rate": 0.0009387675070028011, "loss": 0.4524, "step": 2292 }, { "epoch": 1.2810055865921788, "grad_norm": 0.5221830010414124, "learning_rate": 0.0009387394957983194, "loss": 0.5144, "step": 2293 }, { "epoch": 1.2815642458100558, "grad_norm": 0.8605964779853821, "learning_rate": 0.0009387114845938376, "loss": 0.4509, "step": 2294 }, { "epoch": 1.282122905027933, "grad_norm": 1.1724451780319214, "learning_rate": 0.0009386834733893558, "loss": 0.6453, "step": 2295 }, { "epoch": 1.28268156424581, "grad_norm": 0.6247095465660095, "learning_rate": 0.000938655462184874, "loss": 0.5012, "step": 2296 }, { "epoch": 1.2832402234636873, "grad_norm": 0.44797074794769287, "learning_rate": 0.0009386274509803922, "loss": 0.4417, "step": 2297 }, { "epoch": 1.2837988826815643, "grad_norm": 0.48256805539131165, "learning_rate": 0.0009385994397759104, "loss": 0.4259, "step": 2298 }, { "epoch": 1.2843575418994413, "grad_norm": 0.5220387578010559, "learning_rate": 0.0009385714285714286, "loss": 0.419, "step": 2299 }, { "epoch": 1.2849162011173183, "grad_norm": 1.777891993522644, "learning_rate": 0.0009385434173669468, "loss": 0.3602, "step": 2300 }, { "epoch": 1.2854748603351955, "grad_norm": 0.6454483270645142, "learning_rate": 0.000938515406162465, "loss": 0.5735, "step": 2301 }, { "epoch": 1.2860335195530725, "grad_norm": 0.6514875888824463, "learning_rate": 0.0009384873949579832, "loss": 0.4505, "step": 2302 }, { "epoch": 1.2865921787709498, "grad_norm": 1.135413408279419, "learning_rate": 0.0009384593837535014, "loss": 0.4506, "step": 2303 }, { "epoch": 1.2871508379888268, "grad_norm": 1.0130354166030884, "learning_rate": 0.0009384313725490196, "loss": 0.3847, "step": 2304 }, { "epoch": 1.2877094972067038, "grad_norm": 0.6721040606498718, "learning_rate": 0.0009384033613445378, "loss": 0.4477, "step": 2305 }, { "epoch": 1.288268156424581, "grad_norm": 0.7059589624404907, "learning_rate": 0.000938375350140056, "loss": 0.7653, "step": 2306 }, { "epoch": 1.288826815642458, "grad_norm": 0.4800010621547699, "learning_rate": 0.0009383473389355742, "loss": 0.4682, "step": 2307 }, { "epoch": 1.2893854748603353, "grad_norm": 0.5354390740394592, "learning_rate": 0.0009383193277310924, "loss": 0.4465, "step": 2308 }, { "epoch": 1.2899441340782123, "grad_norm": 0.5681769847869873, "learning_rate": 0.0009382913165266108, "loss": 0.5501, "step": 2309 }, { "epoch": 1.2905027932960893, "grad_norm": 0.5556226372718811, "learning_rate": 0.0009382633053221289, "loss": 0.6201, "step": 2310 }, { "epoch": 1.2910614525139665, "grad_norm": 0.5704392194747925, "learning_rate": 0.0009382352941176471, "loss": 0.5139, "step": 2311 }, { "epoch": 1.2916201117318435, "grad_norm": 0.5250219106674194, "learning_rate": 0.0009382072829131653, "loss": 0.5067, "step": 2312 }, { "epoch": 1.2921787709497208, "grad_norm": 0.48458990454673767, "learning_rate": 0.0009381792717086835, "loss": 0.3434, "step": 2313 }, { "epoch": 1.2927374301675978, "grad_norm": 0.4792317748069763, "learning_rate": 0.0009381512605042018, "loss": 0.4662, "step": 2314 }, { "epoch": 1.2932960893854748, "grad_norm": 0.44975733757019043, "learning_rate": 0.0009381232492997199, "loss": 0.4624, "step": 2315 }, { "epoch": 1.293854748603352, "grad_norm": 0.8396225571632385, "learning_rate": 0.0009380952380952381, "loss": 0.5841, "step": 2316 }, { "epoch": 1.294413407821229, "grad_norm": 2.294858932495117, "learning_rate": 0.0009380672268907563, "loss": 0.5021, "step": 2317 }, { "epoch": 1.2949720670391063, "grad_norm": 1.1718876361846924, "learning_rate": 0.0009380392156862745, "loss": 0.5981, "step": 2318 }, { "epoch": 1.2955307262569833, "grad_norm": 1.5335103273391724, "learning_rate": 0.0009380112044817928, "loss": 0.635, "step": 2319 }, { "epoch": 1.2960893854748603, "grad_norm": 0.9901288151741028, "learning_rate": 0.0009379831932773109, "loss": 0.5752, "step": 2320 }, { "epoch": 1.2966480446927373, "grad_norm": 1.2723642587661743, "learning_rate": 0.0009379551820728291, "loss": 0.584, "step": 2321 }, { "epoch": 1.2972067039106145, "grad_norm": 0.5291574001312256, "learning_rate": 0.0009379271708683473, "loss": 0.354, "step": 2322 }, { "epoch": 1.2977653631284916, "grad_norm": 0.8696710467338562, "learning_rate": 0.0009378991596638655, "loss": 0.4486, "step": 2323 }, { "epoch": 1.2983240223463688, "grad_norm": 0.6118862628936768, "learning_rate": 0.0009378711484593838, "loss": 0.5562, "step": 2324 }, { "epoch": 1.2988826815642458, "grad_norm": 0.6964899301528931, "learning_rate": 0.000937843137254902, "loss": 0.4508, "step": 2325 }, { "epoch": 1.2994413407821228, "grad_norm": 0.6855241656303406, "learning_rate": 0.0009378151260504201, "loss": 0.5312, "step": 2326 }, { "epoch": 1.3, "grad_norm": 1.0400898456573486, "learning_rate": 0.0009377871148459384, "loss": 0.5504, "step": 2327 }, { "epoch": 1.300558659217877, "grad_norm": 0.5161594152450562, "learning_rate": 0.0009377591036414566, "loss": 0.5327, "step": 2328 }, { "epoch": 1.3011173184357543, "grad_norm": 0.5096378326416016, "learning_rate": 0.0009377310924369749, "loss": 0.4295, "step": 2329 }, { "epoch": 1.3016759776536313, "grad_norm": 0.4510524272918701, "learning_rate": 0.0009377030812324931, "loss": 0.4408, "step": 2330 }, { "epoch": 1.3022346368715083, "grad_norm": 0.43716779351234436, "learning_rate": 0.0009376750700280112, "loss": 0.4222, "step": 2331 }, { "epoch": 1.3027932960893855, "grad_norm": 0.6374119520187378, "learning_rate": 0.0009376470588235294, "loss": 0.3717, "step": 2332 }, { "epoch": 1.3033519553072626, "grad_norm": 0.5533728003501892, "learning_rate": 0.0009376190476190476, "loss": 0.5172, "step": 2333 }, { "epoch": 1.3039106145251398, "grad_norm": 0.7198346257209778, "learning_rate": 0.0009375910364145659, "loss": 0.5654, "step": 2334 }, { "epoch": 1.3044692737430168, "grad_norm": 0.4423556327819824, "learning_rate": 0.0009375630252100841, "loss": 0.4037, "step": 2335 }, { "epoch": 1.3050279329608938, "grad_norm": 0.5753763914108276, "learning_rate": 0.0009375350140056022, "loss": 0.503, "step": 2336 }, { "epoch": 1.3055865921787708, "grad_norm": 1.4048054218292236, "learning_rate": 0.0009375070028011204, "loss": 0.4906, "step": 2337 }, { "epoch": 1.306145251396648, "grad_norm": 0.6295005679130554, "learning_rate": 0.0009374789915966386, "loss": 0.5017, "step": 2338 }, { "epoch": 1.306703910614525, "grad_norm": 0.4695497155189514, "learning_rate": 0.0009374509803921569, "loss": 0.507, "step": 2339 }, { "epoch": 1.3072625698324023, "grad_norm": 0.4623326361179352, "learning_rate": 0.0009374229691876751, "loss": 0.4622, "step": 2340 }, { "epoch": 1.3078212290502793, "grad_norm": 0.498626172542572, "learning_rate": 0.0009373949579831933, "loss": 0.4211, "step": 2341 }, { "epoch": 1.3083798882681563, "grad_norm": 1.420953631401062, "learning_rate": 0.0009373669467787114, "loss": 0.4418, "step": 2342 }, { "epoch": 1.3089385474860336, "grad_norm": 0.6144613027572632, "learning_rate": 0.0009373389355742296, "loss": 0.5697, "step": 2343 }, { "epoch": 1.3094972067039106, "grad_norm": 0.5299192070960999, "learning_rate": 0.000937310924369748, "loss": 0.428, "step": 2344 }, { "epoch": 1.3100558659217878, "grad_norm": 0.5369188189506531, "learning_rate": 0.0009372829131652662, "loss": 0.3956, "step": 2345 }, { "epoch": 1.3106145251396648, "grad_norm": 0.853063702583313, "learning_rate": 0.0009372549019607844, "loss": 0.5381, "step": 2346 }, { "epoch": 1.3111731843575418, "grad_norm": 6.990716934204102, "learning_rate": 0.0009372268907563025, "loss": 0.5895, "step": 2347 }, { "epoch": 1.311731843575419, "grad_norm": 0.8140488266944885, "learning_rate": 0.0009371988795518207, "loss": 0.5286, "step": 2348 }, { "epoch": 1.312290502793296, "grad_norm": 1.4516748189926147, "learning_rate": 0.000937170868347339, "loss": 0.4647, "step": 2349 }, { "epoch": 1.3128491620111733, "grad_norm": 0.6451416015625, "learning_rate": 0.0009371428571428572, "loss": 0.522, "step": 2350 }, { "epoch": 1.3134078212290503, "grad_norm": 0.7677500247955322, "learning_rate": 0.0009371148459383754, "loss": 0.5252, "step": 2351 }, { "epoch": 1.3139664804469273, "grad_norm": 1.2045583724975586, "learning_rate": 0.0009370868347338935, "loss": 0.5289, "step": 2352 }, { "epoch": 1.3145251396648043, "grad_norm": 0.47654032707214355, "learning_rate": 0.0009370588235294117, "loss": 0.4123, "step": 2353 }, { "epoch": 1.3150837988826816, "grad_norm": 0.4769761860370636, "learning_rate": 0.00093703081232493, "loss": 0.4522, "step": 2354 }, { "epoch": 1.3156424581005586, "grad_norm": 0.8925811648368835, "learning_rate": 0.0009370028011204482, "loss": 0.4944, "step": 2355 }, { "epoch": 1.3162011173184358, "grad_norm": 0.5771417617797852, "learning_rate": 0.0009369747899159664, "loss": 0.4573, "step": 2356 }, { "epoch": 1.3167597765363128, "grad_norm": 0.9821098446846008, "learning_rate": 0.0009369467787114846, "loss": 0.6322, "step": 2357 }, { "epoch": 1.3173184357541898, "grad_norm": 0.5378075838088989, "learning_rate": 0.0009369187675070027, "loss": 0.546, "step": 2358 }, { "epoch": 1.317877094972067, "grad_norm": 7.0606560707092285, "learning_rate": 0.000936890756302521, "loss": 0.6345, "step": 2359 }, { "epoch": 1.318435754189944, "grad_norm": 0.7089935541152954, "learning_rate": 0.0009368627450980393, "loss": 0.5802, "step": 2360 }, { "epoch": 1.3189944134078213, "grad_norm": 1.0092034339904785, "learning_rate": 0.0009368347338935575, "loss": 0.5194, "step": 2361 }, { "epoch": 1.3195530726256983, "grad_norm": 1.243813395500183, "learning_rate": 0.0009368067226890757, "loss": 0.5829, "step": 2362 }, { "epoch": 1.3201117318435753, "grad_norm": 0.7206133604049683, "learning_rate": 0.0009367787114845938, "loss": 0.4782, "step": 2363 }, { "epoch": 1.3206703910614526, "grad_norm": 0.5967955589294434, "learning_rate": 0.0009367507002801121, "loss": 0.4524, "step": 2364 }, { "epoch": 1.3212290502793296, "grad_norm": 0.6767614483833313, "learning_rate": 0.0009367226890756303, "loss": 0.4303, "step": 2365 }, { "epoch": 1.3217877094972068, "grad_norm": 0.6356673240661621, "learning_rate": 0.0009366946778711485, "loss": 0.4548, "step": 2366 }, { "epoch": 1.3223463687150838, "grad_norm": 0.6191254258155823, "learning_rate": 0.0009366666666666667, "loss": 0.4844, "step": 2367 }, { "epoch": 1.3229050279329608, "grad_norm": 0.7626490592956543, "learning_rate": 0.0009366386554621848, "loss": 0.6647, "step": 2368 }, { "epoch": 1.323463687150838, "grad_norm": 0.7978389263153076, "learning_rate": 0.0009366106442577031, "loss": 0.4788, "step": 2369 }, { "epoch": 1.324022346368715, "grad_norm": 0.5534348487854004, "learning_rate": 0.0009365826330532213, "loss": 0.466, "step": 2370 }, { "epoch": 1.3245810055865923, "grad_norm": 0.6681626439094543, "learning_rate": 0.0009365546218487395, "loss": 0.4191, "step": 2371 }, { "epoch": 1.3251396648044693, "grad_norm": 0.6625450253486633, "learning_rate": 0.0009365266106442577, "loss": 0.4959, "step": 2372 }, { "epoch": 1.3256983240223463, "grad_norm": 1.1665563583374023, "learning_rate": 0.0009364985994397759, "loss": 0.4161, "step": 2373 }, { "epoch": 1.3262569832402233, "grad_norm": 0.9296486377716064, "learning_rate": 0.0009364705882352941, "loss": 0.4142, "step": 2374 }, { "epoch": 1.3268156424581006, "grad_norm": 0.6072700619697571, "learning_rate": 0.0009364425770308124, "loss": 0.4306, "step": 2375 }, { "epoch": 1.3273743016759776, "grad_norm": 0.5891187787055969, "learning_rate": 0.0009364145658263306, "loss": 0.5263, "step": 2376 }, { "epoch": 1.3279329608938548, "grad_norm": 0.5013577938079834, "learning_rate": 0.0009363865546218488, "loss": 0.428, "step": 2377 }, { "epoch": 1.3284916201117318, "grad_norm": 0.48129329085350037, "learning_rate": 0.000936358543417367, "loss": 0.4253, "step": 2378 }, { "epoch": 1.3290502793296088, "grad_norm": 3.904083728790283, "learning_rate": 0.0009363305322128852, "loss": 0.4978, "step": 2379 }, { "epoch": 1.329608938547486, "grad_norm": 0.6414837837219238, "learning_rate": 0.0009363025210084034, "loss": 0.468, "step": 2380 }, { "epoch": 1.330167597765363, "grad_norm": 1.2827571630477905, "learning_rate": 0.0009362745098039216, "loss": 0.5169, "step": 2381 }, { "epoch": 1.3307262569832403, "grad_norm": 0.7587721943855286, "learning_rate": 0.0009362464985994398, "loss": 0.385, "step": 2382 }, { "epoch": 1.3312849162011173, "grad_norm": 0.4111006557941437, "learning_rate": 0.000936218487394958, "loss": 0.4295, "step": 2383 }, { "epoch": 1.3318435754189943, "grad_norm": 0.951179563999176, "learning_rate": 0.0009361904761904763, "loss": 0.5763, "step": 2384 }, { "epoch": 1.3324022346368716, "grad_norm": 0.6663637161254883, "learning_rate": 0.0009361624649859944, "loss": 0.4535, "step": 2385 }, { "epoch": 1.3329608938547486, "grad_norm": 0.4615425765514374, "learning_rate": 0.0009361344537815126, "loss": 0.3139, "step": 2386 }, { "epoch": 1.3335195530726258, "grad_norm": 0.9143214821815491, "learning_rate": 0.0009361064425770308, "loss": 0.4999, "step": 2387 }, { "epoch": 1.3340782122905028, "grad_norm": 0.3537091016769409, "learning_rate": 0.000936078431372549, "loss": 0.3886, "step": 2388 }, { "epoch": 1.3346368715083798, "grad_norm": 2.323657512664795, "learning_rate": 0.0009360504201680673, "loss": 0.4529, "step": 2389 }, { "epoch": 1.3351955307262569, "grad_norm": 0.6156639456748962, "learning_rate": 0.0009360224089635854, "loss": 0.417, "step": 2390 }, { "epoch": 1.335754189944134, "grad_norm": 1.5982677936553955, "learning_rate": 0.0009359943977591036, "loss": 0.3718, "step": 2391 }, { "epoch": 1.336312849162011, "grad_norm": 0.5884528160095215, "learning_rate": 0.0009359663865546219, "loss": 0.5476, "step": 2392 }, { "epoch": 1.3368715083798883, "grad_norm": 0.4246930181980133, "learning_rate": 0.0009359383753501401, "loss": 0.4264, "step": 2393 }, { "epoch": 1.3374301675977653, "grad_norm": 0.5364925861358643, "learning_rate": 0.0009359103641456584, "loss": 0.4244, "step": 2394 }, { "epoch": 1.3379888268156424, "grad_norm": 0.5065272450447083, "learning_rate": 0.0009358823529411765, "loss": 0.4616, "step": 2395 }, { "epoch": 1.3385474860335196, "grad_norm": 0.6549285650253296, "learning_rate": 0.0009358543417366947, "loss": 0.5211, "step": 2396 }, { "epoch": 1.3391061452513966, "grad_norm": 0.5903199911117554, "learning_rate": 0.0009358263305322129, "loss": 0.5934, "step": 2397 }, { "epoch": 1.3396648044692738, "grad_norm": 2.045487880706787, "learning_rate": 0.0009357983193277311, "loss": 0.5649, "step": 2398 }, { "epoch": 1.3402234636871508, "grad_norm": 0.5242357850074768, "learning_rate": 0.0009357703081232494, "loss": 0.3817, "step": 2399 }, { "epoch": 1.3407821229050279, "grad_norm": 0.6028469800949097, "learning_rate": 0.0009357422969187676, "loss": 0.5627, "step": 2400 }, { "epoch": 1.341340782122905, "grad_norm": 0.5879208445549011, "learning_rate": 0.0009357142857142857, "loss": 0.4442, "step": 2401 }, { "epoch": 1.341899441340782, "grad_norm": 1.3531235456466675, "learning_rate": 0.0009356862745098039, "loss": 0.5208, "step": 2402 }, { "epoch": 1.3424581005586593, "grad_norm": 0.8171552419662476, "learning_rate": 0.0009356582633053221, "loss": 0.4274, "step": 2403 }, { "epoch": 1.3430167597765363, "grad_norm": 0.6056199669837952, "learning_rate": 0.0009356302521008404, "loss": 0.5542, "step": 2404 }, { "epoch": 1.3435754189944134, "grad_norm": 0.8101986646652222, "learning_rate": 0.0009356022408963586, "loss": 0.6412, "step": 2405 }, { "epoch": 1.3441340782122906, "grad_norm": 0.5022657513618469, "learning_rate": 0.0009355742296918767, "loss": 0.4424, "step": 2406 }, { "epoch": 1.3446927374301676, "grad_norm": 1.1738780736923218, "learning_rate": 0.0009355462184873949, "loss": 0.642, "step": 2407 }, { "epoch": 1.3452513966480448, "grad_norm": 0.43925851583480835, "learning_rate": 0.0009355182072829131, "loss": 0.5324, "step": 2408 }, { "epoch": 1.3458100558659218, "grad_norm": 0.9970035552978516, "learning_rate": 0.0009354901960784315, "loss": 0.584, "step": 2409 }, { "epoch": 1.3463687150837989, "grad_norm": 1.432970643043518, "learning_rate": 0.0009354621848739497, "loss": 0.4647, "step": 2410 }, { "epoch": 1.3469273743016759, "grad_norm": 0.7642507553100586, "learning_rate": 0.0009354341736694678, "loss": 0.4856, "step": 2411 }, { "epoch": 1.347486033519553, "grad_norm": 0.5948406457901001, "learning_rate": 0.000935406162464986, "loss": 0.4705, "step": 2412 }, { "epoch": 1.34804469273743, "grad_norm": 0.5157648324966431, "learning_rate": 0.0009353781512605042, "loss": 0.4436, "step": 2413 }, { "epoch": 1.3486033519553073, "grad_norm": 0.664602518081665, "learning_rate": 0.0009353501400560225, "loss": 0.4444, "step": 2414 }, { "epoch": 1.3491620111731844, "grad_norm": 0.5324051380157471, "learning_rate": 0.0009353221288515407, "loss": 0.4082, "step": 2415 }, { "epoch": 1.3497206703910614, "grad_norm": 0.7975985407829285, "learning_rate": 0.0009352941176470589, "loss": 0.4899, "step": 2416 }, { "epoch": 1.3502793296089386, "grad_norm": 0.5563843846321106, "learning_rate": 0.000935266106442577, "loss": 0.3955, "step": 2417 }, { "epoch": 1.3508379888268156, "grad_norm": 2.0421030521392822, "learning_rate": 0.0009352380952380952, "loss": 0.4895, "step": 2418 }, { "epoch": 1.3513966480446928, "grad_norm": 0.6686625480651855, "learning_rate": 0.0009352100840336135, "loss": 0.4811, "step": 2419 }, { "epoch": 1.3519553072625698, "grad_norm": 0.4972870349884033, "learning_rate": 0.0009351820728291317, "loss": 0.4943, "step": 2420 }, { "epoch": 1.3525139664804469, "grad_norm": 0.680006206035614, "learning_rate": 0.0009351540616246499, "loss": 0.4926, "step": 2421 }, { "epoch": 1.353072625698324, "grad_norm": 0.5970544219017029, "learning_rate": 0.000935126050420168, "loss": 0.437, "step": 2422 }, { "epoch": 1.353631284916201, "grad_norm": 0.48182040452957153, "learning_rate": 0.0009350980392156862, "loss": 0.4512, "step": 2423 }, { "epoch": 1.3541899441340783, "grad_norm": 0.6070398688316345, "learning_rate": 0.0009350700280112046, "loss": 0.6856, "step": 2424 }, { "epoch": 1.3547486033519553, "grad_norm": 0.5730800628662109, "learning_rate": 0.0009350420168067228, "loss": 0.558, "step": 2425 }, { "epoch": 1.3553072625698324, "grad_norm": 2.4738833904266357, "learning_rate": 0.000935014005602241, "loss": 0.4682, "step": 2426 }, { "epoch": 1.3558659217877094, "grad_norm": 0.8916347026824951, "learning_rate": 0.0009349859943977591, "loss": 0.611, "step": 2427 }, { "epoch": 1.3564245810055866, "grad_norm": 0.8283774852752686, "learning_rate": 0.0009349579831932773, "loss": 0.4222, "step": 2428 }, { "epoch": 1.3569832402234636, "grad_norm": 0.5654096007347107, "learning_rate": 0.0009349299719887956, "loss": 0.5877, "step": 2429 }, { "epoch": 1.3575418994413408, "grad_norm": 0.4563155770301819, "learning_rate": 0.0009349019607843138, "loss": 0.4535, "step": 2430 }, { "epoch": 1.3581005586592179, "grad_norm": 0.7458542585372925, "learning_rate": 0.000934873949579832, "loss": 0.39, "step": 2431 }, { "epoch": 1.3586592178770949, "grad_norm": 0.9126211404800415, "learning_rate": 0.0009348459383753502, "loss": 0.4917, "step": 2432 }, { "epoch": 1.359217877094972, "grad_norm": 0.8338176012039185, "learning_rate": 0.0009348179271708683, "loss": 0.476, "step": 2433 }, { "epoch": 1.3597765363128491, "grad_norm": 0.6514798402786255, "learning_rate": 0.0009347899159663866, "loss": 0.6019, "step": 2434 }, { "epoch": 1.3603351955307263, "grad_norm": 0.657715380191803, "learning_rate": 0.0009347619047619048, "loss": 0.4768, "step": 2435 }, { "epoch": 1.3608938547486034, "grad_norm": 0.6107572913169861, "learning_rate": 0.000934733893557423, "loss": 0.5799, "step": 2436 }, { "epoch": 1.3614525139664804, "grad_norm": 1.2885218858718872, "learning_rate": 0.0009347058823529412, "loss": 0.5081, "step": 2437 }, { "epoch": 1.3620111731843576, "grad_norm": 1.0166044235229492, "learning_rate": 0.0009346778711484593, "loss": 0.5626, "step": 2438 }, { "epoch": 1.3625698324022346, "grad_norm": 0.8314241766929626, "learning_rate": 0.0009346498599439776, "loss": 0.5662, "step": 2439 }, { "epoch": 1.3631284916201118, "grad_norm": 0.4049365818500519, "learning_rate": 0.0009346218487394958, "loss": 0.4666, "step": 2440 }, { "epoch": 1.3636871508379889, "grad_norm": 1.9691574573516846, "learning_rate": 0.000934593837535014, "loss": 0.3739, "step": 2441 }, { "epoch": 1.3642458100558659, "grad_norm": 1.2149285078048706, "learning_rate": 0.0009345658263305323, "loss": 0.6774, "step": 2442 }, { "epoch": 1.3648044692737429, "grad_norm": 9.740959167480469, "learning_rate": 0.0009345378151260504, "loss": 0.4811, "step": 2443 }, { "epoch": 1.3653631284916201, "grad_norm": 0.7420990467071533, "learning_rate": 0.0009345098039215687, "loss": 0.5085, "step": 2444 }, { "epoch": 1.3659217877094971, "grad_norm": 0.8175650835037231, "learning_rate": 0.0009344817927170869, "loss": 0.4328, "step": 2445 }, { "epoch": 1.3664804469273744, "grad_norm": 0.8845696449279785, "learning_rate": 0.0009344537815126051, "loss": 0.4291, "step": 2446 }, { "epoch": 1.3670391061452514, "grad_norm": 0.6789989471435547, "learning_rate": 0.0009344257703081233, "loss": 0.5748, "step": 2447 }, { "epoch": 1.3675977653631284, "grad_norm": 3.8430233001708984, "learning_rate": 0.0009343977591036415, "loss": 0.3884, "step": 2448 }, { "epoch": 1.3681564245810056, "grad_norm": 12.143961906433105, "learning_rate": 0.0009343697478991596, "loss": 0.4391, "step": 2449 }, { "epoch": 1.3687150837988826, "grad_norm": 1.053959846496582, "learning_rate": 0.0009343417366946779, "loss": 0.4391, "step": 2450 }, { "epoch": 1.3692737430167599, "grad_norm": 0.7687263488769531, "learning_rate": 0.0009343137254901961, "loss": 0.5036, "step": 2451 }, { "epoch": 1.3698324022346369, "grad_norm": 0.618775486946106, "learning_rate": 0.0009342857142857143, "loss": 0.6145, "step": 2452 }, { "epoch": 1.3703910614525139, "grad_norm": 3.653212785720825, "learning_rate": 0.0009342577030812325, "loss": 0.5035, "step": 2453 }, { "epoch": 1.3709497206703911, "grad_norm": 0.7300713062286377, "learning_rate": 0.0009342296918767506, "loss": 0.4668, "step": 2454 }, { "epoch": 1.3715083798882681, "grad_norm": 0.7744580507278442, "learning_rate": 0.0009342016806722689, "loss": 0.4067, "step": 2455 }, { "epoch": 1.3720670391061454, "grad_norm": 1.296471357345581, "learning_rate": 0.0009341736694677871, "loss": 0.5711, "step": 2456 }, { "epoch": 1.3726256983240224, "grad_norm": 0.9308703541755676, "learning_rate": 0.0009341456582633054, "loss": 0.5285, "step": 2457 }, { "epoch": 1.3731843575418994, "grad_norm": 0.6054986119270325, "learning_rate": 0.0009341176470588236, "loss": 0.5099, "step": 2458 }, { "epoch": 1.3737430167597766, "grad_norm": 16.23585319519043, "learning_rate": 0.0009340896358543417, "loss": 0.6635, "step": 2459 }, { "epoch": 1.3743016759776536, "grad_norm": 1.516026258468628, "learning_rate": 0.00093406162464986, "loss": 0.5686, "step": 2460 }, { "epoch": 1.3748603351955309, "grad_norm": 2.034881353378296, "learning_rate": 0.0009340336134453782, "loss": 0.4215, "step": 2461 }, { "epoch": 1.3754189944134079, "grad_norm": 1.4947832822799683, "learning_rate": 0.0009340056022408964, "loss": 0.6685, "step": 2462 }, { "epoch": 1.3759776536312849, "grad_norm": 1.3847365379333496, "learning_rate": 0.0009339775910364146, "loss": 0.6743, "step": 2463 }, { "epoch": 1.376536312849162, "grad_norm": 2.259474515914917, "learning_rate": 0.0009339495798319328, "loss": 0.4427, "step": 2464 }, { "epoch": 1.3770949720670391, "grad_norm": 0.9777543544769287, "learning_rate": 0.000933921568627451, "loss": 0.5175, "step": 2465 }, { "epoch": 1.3776536312849161, "grad_norm": 0.8059589862823486, "learning_rate": 0.0009338935574229692, "loss": 0.4721, "step": 2466 }, { "epoch": 1.3782122905027934, "grad_norm": 0.6438359022140503, "learning_rate": 0.0009338655462184874, "loss": 0.4236, "step": 2467 }, { "epoch": 1.3787709497206704, "grad_norm": 1.8244068622589111, "learning_rate": 0.0009338375350140056, "loss": 0.5051, "step": 2468 }, { "epoch": 1.3793296089385474, "grad_norm": 9.361257553100586, "learning_rate": 0.0009338095238095238, "loss": 0.5777, "step": 2469 }, { "epoch": 1.3798882681564246, "grad_norm": 0.803830087184906, "learning_rate": 0.000933781512605042, "loss": 0.5565, "step": 2470 }, { "epoch": 1.3804469273743016, "grad_norm": 0.6548046469688416, "learning_rate": 0.0009337535014005602, "loss": 0.4599, "step": 2471 }, { "epoch": 1.3810055865921789, "grad_norm": 0.7830075621604919, "learning_rate": 0.0009337254901960784, "loss": 0.4607, "step": 2472 }, { "epoch": 1.3815642458100559, "grad_norm": 3.2481491565704346, "learning_rate": 0.0009336974789915966, "loss": 0.6175, "step": 2473 }, { "epoch": 1.382122905027933, "grad_norm": 0.8518757224082947, "learning_rate": 0.0009336694677871149, "loss": 0.5009, "step": 2474 }, { "epoch": 1.3826815642458101, "grad_norm": 0.5780080556869507, "learning_rate": 0.0009336414565826331, "loss": 0.4583, "step": 2475 }, { "epoch": 1.3832402234636871, "grad_norm": 1.9408583641052246, "learning_rate": 0.0009336134453781513, "loss": 0.486, "step": 2476 }, { "epoch": 1.3837988826815644, "grad_norm": 0.7586171627044678, "learning_rate": 0.0009335854341736695, "loss": 0.5087, "step": 2477 }, { "epoch": 1.3843575418994414, "grad_norm": 1.2310386896133423, "learning_rate": 0.0009335574229691877, "loss": 0.5671, "step": 2478 }, { "epoch": 1.3849162011173184, "grad_norm": 0.8117244839668274, "learning_rate": 0.0009335294117647059, "loss": 0.5861, "step": 2479 }, { "epoch": 1.3854748603351954, "grad_norm": 0.6689615249633789, "learning_rate": 0.0009335014005602242, "loss": 0.5152, "step": 2480 }, { "epoch": 1.3860335195530726, "grad_norm": 0.7848302721977234, "learning_rate": 0.0009334733893557423, "loss": 0.5771, "step": 2481 }, { "epoch": 1.3865921787709496, "grad_norm": 0.8203095197677612, "learning_rate": 0.0009334453781512605, "loss": 0.5806, "step": 2482 }, { "epoch": 1.3871508379888269, "grad_norm": 1.5317904949188232, "learning_rate": 0.0009334173669467787, "loss": 0.5107, "step": 2483 }, { "epoch": 1.387709497206704, "grad_norm": 1.7203454971313477, "learning_rate": 0.0009333893557422969, "loss": 0.4391, "step": 2484 }, { "epoch": 1.388268156424581, "grad_norm": 3.301591634750366, "learning_rate": 0.0009333613445378152, "loss": 0.516, "step": 2485 }, { "epoch": 1.3888268156424581, "grad_norm": 0.7346853613853455, "learning_rate": 0.0009333333333333333, "loss": 0.5327, "step": 2486 }, { "epoch": 1.3893854748603351, "grad_norm": 0.6157276630401611, "learning_rate": 0.0009333053221288515, "loss": 0.5551, "step": 2487 }, { "epoch": 1.3899441340782124, "grad_norm": 0.6655274629592896, "learning_rate": 0.0009332773109243697, "loss": 0.4512, "step": 2488 }, { "epoch": 1.3905027932960894, "grad_norm": 0.6196253895759583, "learning_rate": 0.0009332492997198879, "loss": 0.4572, "step": 2489 }, { "epoch": 1.3910614525139664, "grad_norm": 0.5501835942268372, "learning_rate": 0.0009332212885154063, "loss": 0.5067, "step": 2490 }, { "epoch": 1.3916201117318436, "grad_norm": 1.2724072933197021, "learning_rate": 0.0009331932773109244, "loss": 0.5171, "step": 2491 }, { "epoch": 1.3921787709497206, "grad_norm": 0.9336773157119751, "learning_rate": 0.0009331652661064426, "loss": 0.4704, "step": 2492 }, { "epoch": 1.3927374301675979, "grad_norm": 0.9166853427886963, "learning_rate": 0.0009331372549019608, "loss": 0.5213, "step": 2493 }, { "epoch": 1.393296089385475, "grad_norm": 0.6173405051231384, "learning_rate": 0.000933109243697479, "loss": 0.5639, "step": 2494 }, { "epoch": 1.393854748603352, "grad_norm": 0.5503080487251282, "learning_rate": 0.0009330812324929973, "loss": 0.4921, "step": 2495 }, { "epoch": 1.394413407821229, "grad_norm": 19.085041046142578, "learning_rate": 0.0009330532212885155, "loss": 0.4346, "step": 2496 }, { "epoch": 1.3949720670391061, "grad_norm": 0.6812968254089355, "learning_rate": 0.0009330252100840336, "loss": 0.4317, "step": 2497 }, { "epoch": 1.3955307262569834, "grad_norm": 0.8005194067955017, "learning_rate": 0.0009329971988795518, "loss": 0.4454, "step": 2498 }, { "epoch": 1.3960893854748604, "grad_norm": 0.7951617240905762, "learning_rate": 0.00093296918767507, "loss": 0.5179, "step": 2499 }, { "epoch": 1.3966480446927374, "grad_norm": 0.5328164100646973, "learning_rate": 0.0009329411764705883, "loss": 0.429, "step": 2500 }, { "epoch": 1.3966480446927374, "eval_cer": 0.0976836546539666, "eval_loss": 0.36993345618247986, "eval_runtime": 55.65, "eval_samples_per_second": 81.545, "eval_steps_per_second": 5.103, "eval_wer": 0.38507304265243986, "step": 2500 }, { "epoch": 1.3972067039106144, "grad_norm": 1.18572998046875, "learning_rate": 0.0009329131652661065, "loss": 0.5513, "step": 2501 }, { "epoch": 1.3977653631284916, "grad_norm": 1.1080896854400635, "learning_rate": 0.0009328851540616246, "loss": 0.4954, "step": 2502 }, { "epoch": 1.3983240223463687, "grad_norm": 0.789019763469696, "learning_rate": 0.0009328571428571428, "loss": 0.4919, "step": 2503 }, { "epoch": 1.3988826815642459, "grad_norm": 1.1073821783065796, "learning_rate": 0.000932829131652661, "loss": 0.4855, "step": 2504 }, { "epoch": 1.399441340782123, "grad_norm": 0.848599374294281, "learning_rate": 0.0009328011204481793, "loss": 0.3677, "step": 2505 }, { "epoch": 1.4, "grad_norm": 0.8059666752815247, "learning_rate": 0.0009327731092436976, "loss": 0.6229, "step": 2506 }, { "epoch": 1.4005586592178771, "grad_norm": 1.0171482563018799, "learning_rate": 0.0009327450980392156, "loss": 0.4088, "step": 2507 }, { "epoch": 1.4011173184357542, "grad_norm": 0.8622143864631653, "learning_rate": 0.0009327170868347339, "loss": 0.5827, "step": 2508 }, { "epoch": 1.4016759776536314, "grad_norm": 0.6227065324783325, "learning_rate": 0.0009326890756302521, "loss": 0.4108, "step": 2509 }, { "epoch": 1.4022346368715084, "grad_norm": 0.5788086652755737, "learning_rate": 0.0009326610644257704, "loss": 0.5117, "step": 2510 }, { "epoch": 1.4027932960893854, "grad_norm": 0.46648746728897095, "learning_rate": 0.0009326330532212886, "loss": 0.5672, "step": 2511 }, { "epoch": 1.4033519553072626, "grad_norm": 0.6061733365058899, "learning_rate": 0.0009326050420168068, "loss": 0.5534, "step": 2512 }, { "epoch": 1.4039106145251397, "grad_norm": 0.6212568283081055, "learning_rate": 0.0009325770308123249, "loss": 0.4059, "step": 2513 }, { "epoch": 1.4044692737430169, "grad_norm": 0.6439541578292847, "learning_rate": 0.0009325490196078431, "loss": 0.4233, "step": 2514 }, { "epoch": 1.405027932960894, "grad_norm": 1.7958157062530518, "learning_rate": 0.0009325210084033614, "loss": 0.4694, "step": 2515 }, { "epoch": 1.405586592178771, "grad_norm": 0.43417346477508545, "learning_rate": 0.0009324929971988796, "loss": 0.4863, "step": 2516 }, { "epoch": 1.406145251396648, "grad_norm": 0.9820460677146912, "learning_rate": 0.0009324649859943978, "loss": 0.4064, "step": 2517 }, { "epoch": 1.4067039106145252, "grad_norm": 0.4516315460205078, "learning_rate": 0.0009324369747899159, "loss": 0.5079, "step": 2518 }, { "epoch": 1.4072625698324022, "grad_norm": 0.9147122502326965, "learning_rate": 0.0009324089635854341, "loss": 0.5505, "step": 2519 }, { "epoch": 1.4078212290502794, "grad_norm": 0.4546954035758972, "learning_rate": 0.0009323809523809524, "loss": 0.3978, "step": 2520 }, { "epoch": 1.4083798882681564, "grad_norm": 0.43666183948516846, "learning_rate": 0.0009323529411764706, "loss": 0.4012, "step": 2521 }, { "epoch": 1.4089385474860334, "grad_norm": 3.7608439922332764, "learning_rate": 0.0009323249299719888, "loss": 0.4185, "step": 2522 }, { "epoch": 1.4094972067039107, "grad_norm": 0.40791329741477966, "learning_rate": 0.0009322969187675069, "loss": 0.4971, "step": 2523 }, { "epoch": 1.4100558659217877, "grad_norm": 1.2691696882247925, "learning_rate": 0.0009322689075630252, "loss": 0.4989, "step": 2524 }, { "epoch": 1.410614525139665, "grad_norm": 0.5668780207633972, "learning_rate": 0.0009322408963585435, "loss": 0.4954, "step": 2525 }, { "epoch": 1.411173184357542, "grad_norm": 1.1780273914337158, "learning_rate": 0.0009322128851540617, "loss": 0.4284, "step": 2526 }, { "epoch": 1.411731843575419, "grad_norm": 0.568503737449646, "learning_rate": 0.0009321848739495799, "loss": 0.4869, "step": 2527 }, { "epoch": 1.4122905027932962, "grad_norm": 0.8034923076629639, "learning_rate": 0.0009321568627450981, "loss": 0.5239, "step": 2528 }, { "epoch": 1.4128491620111732, "grad_norm": 1.2309287786483765, "learning_rate": 0.0009321288515406162, "loss": 0.4618, "step": 2529 }, { "epoch": 1.4134078212290504, "grad_norm": 0.44164738059043884, "learning_rate": 0.0009321008403361345, "loss": 0.5138, "step": 2530 }, { "epoch": 1.4139664804469274, "grad_norm": 1.4449609518051147, "learning_rate": 0.0009320728291316527, "loss": 0.5402, "step": 2531 }, { "epoch": 1.4145251396648044, "grad_norm": 0.8091464638710022, "learning_rate": 0.0009320448179271709, "loss": 0.4908, "step": 2532 }, { "epoch": 1.4150837988826814, "grad_norm": 0.6629249453544617, "learning_rate": 0.0009320168067226891, "loss": 0.5608, "step": 2533 }, { "epoch": 1.4156424581005587, "grad_norm": 0.6336396932601929, "learning_rate": 0.0009319887955182072, "loss": 0.4957, "step": 2534 }, { "epoch": 1.4162011173184357, "grad_norm": 0.8397176861763, "learning_rate": 0.0009319607843137255, "loss": 0.5222, "step": 2535 }, { "epoch": 1.416759776536313, "grad_norm": 2.699629783630371, "learning_rate": 0.0009319327731092437, "loss": 0.489, "step": 2536 }, { "epoch": 1.41731843575419, "grad_norm": 9.770133018493652, "learning_rate": 0.0009319047619047619, "loss": 0.5091, "step": 2537 }, { "epoch": 1.417877094972067, "grad_norm": 1.0547733306884766, "learning_rate": 0.0009318767507002801, "loss": 0.4625, "step": 2538 }, { "epoch": 1.4184357541899442, "grad_norm": 0.6585387587547302, "learning_rate": 0.0009318487394957982, "loss": 0.4785, "step": 2539 }, { "epoch": 1.4189944134078212, "grad_norm": 0.8304965496063232, "learning_rate": 0.0009318207282913166, "loss": 0.4586, "step": 2540 }, { "epoch": 1.4195530726256984, "grad_norm": 0.5305891633033752, "learning_rate": 0.0009317927170868348, "loss": 0.4909, "step": 2541 }, { "epoch": 1.4201117318435754, "grad_norm": 0.9010879993438721, "learning_rate": 0.000931764705882353, "loss": 0.8379, "step": 2542 }, { "epoch": 1.4206703910614524, "grad_norm": 0.5750892162322998, "learning_rate": 0.0009317366946778712, "loss": 0.4265, "step": 2543 }, { "epoch": 1.4212290502793297, "grad_norm": 0.6748759150505066, "learning_rate": 0.0009317086834733894, "loss": 0.4119, "step": 2544 }, { "epoch": 1.4217877094972067, "grad_norm": 1.5361045598983765, "learning_rate": 0.0009316806722689076, "loss": 0.4722, "step": 2545 }, { "epoch": 1.422346368715084, "grad_norm": 0.6403470039367676, "learning_rate": 0.0009316526610644258, "loss": 0.4722, "step": 2546 }, { "epoch": 1.422905027932961, "grad_norm": 1.2295637130737305, "learning_rate": 0.000931624649859944, "loss": 0.5622, "step": 2547 }, { "epoch": 1.423463687150838, "grad_norm": 0.766720712184906, "learning_rate": 0.0009315966386554622, "loss": 0.4731, "step": 2548 }, { "epoch": 1.4240223463687152, "grad_norm": 0.6608802676200867, "learning_rate": 0.0009315686274509804, "loss": 0.4391, "step": 2549 }, { "epoch": 1.4245810055865922, "grad_norm": 0.4678759276866913, "learning_rate": 0.0009315406162464986, "loss": 0.5577, "step": 2550 }, { "epoch": 1.4251396648044694, "grad_norm": 0.5720287561416626, "learning_rate": 0.0009315126050420168, "loss": 0.5004, "step": 2551 }, { "epoch": 1.4256983240223464, "grad_norm": 0.7422264218330383, "learning_rate": 0.000931484593837535, "loss": 0.5391, "step": 2552 }, { "epoch": 1.4262569832402234, "grad_norm": 0.8045322299003601, "learning_rate": 0.0009314565826330532, "loss": 0.5479, "step": 2553 }, { "epoch": 1.4268156424581004, "grad_norm": 0.7911209464073181, "learning_rate": 0.0009314285714285714, "loss": 0.4286, "step": 2554 }, { "epoch": 1.4273743016759777, "grad_norm": 0.8115670084953308, "learning_rate": 0.0009314005602240896, "loss": 0.6007, "step": 2555 }, { "epoch": 1.4279329608938547, "grad_norm": 0.7535646557807922, "learning_rate": 0.0009313725490196079, "loss": 0.4321, "step": 2556 }, { "epoch": 1.428491620111732, "grad_norm": 0.6910133957862854, "learning_rate": 0.0009313445378151261, "loss": 0.5538, "step": 2557 }, { "epoch": 1.429050279329609, "grad_norm": 0.6578505635261536, "learning_rate": 0.0009313165266106443, "loss": 0.5565, "step": 2558 }, { "epoch": 1.429608938547486, "grad_norm": 2.156248092651367, "learning_rate": 0.0009312885154061625, "loss": 0.5567, "step": 2559 }, { "epoch": 1.4301675977653632, "grad_norm": 0.5340940356254578, "learning_rate": 0.0009312605042016808, "loss": 0.6375, "step": 2560 }, { "epoch": 1.4307262569832402, "grad_norm": 0.6691848039627075, "learning_rate": 0.0009312324929971989, "loss": 0.6056, "step": 2561 }, { "epoch": 1.4312849162011174, "grad_norm": 0.8040280938148499, "learning_rate": 0.0009312044817927171, "loss": 0.6013, "step": 2562 }, { "epoch": 1.4318435754189944, "grad_norm": 0.5586495995521545, "learning_rate": 0.0009311764705882353, "loss": 0.5543, "step": 2563 }, { "epoch": 1.4324022346368714, "grad_norm": 1.5113362073898315, "learning_rate": 0.0009311484593837535, "loss": 0.3772, "step": 2564 }, { "epoch": 1.4329608938547487, "grad_norm": 0.8635311722755432, "learning_rate": 0.0009311204481792718, "loss": 0.5313, "step": 2565 }, { "epoch": 1.4335195530726257, "grad_norm": 0.886853039264679, "learning_rate": 0.0009310924369747899, "loss": 0.4537, "step": 2566 }, { "epoch": 1.434078212290503, "grad_norm": 2.031019449234009, "learning_rate": 0.0009310644257703081, "loss": 0.5883, "step": 2567 }, { "epoch": 1.43463687150838, "grad_norm": 1.0636546611785889, "learning_rate": 0.0009310364145658263, "loss": 0.4193, "step": 2568 }, { "epoch": 1.435195530726257, "grad_norm": 0.42769888043403625, "learning_rate": 0.0009310084033613445, "loss": 0.4232, "step": 2569 }, { "epoch": 1.435754189944134, "grad_norm": 0.8384038209915161, "learning_rate": 0.0009309803921568628, "loss": 0.503, "step": 2570 }, { "epoch": 1.4363128491620112, "grad_norm": 0.7572436928749084, "learning_rate": 0.0009309523809523809, "loss": 0.5198, "step": 2571 }, { "epoch": 1.4368715083798882, "grad_norm": 1.6784976720809937, "learning_rate": 0.0009309243697478991, "loss": 0.6243, "step": 2572 }, { "epoch": 1.4374301675977654, "grad_norm": 0.7462752461433411, "learning_rate": 0.0009308963585434174, "loss": 0.6218, "step": 2573 }, { "epoch": 1.4379888268156424, "grad_norm": 1.355023741722107, "learning_rate": 0.0009308683473389356, "loss": 0.4417, "step": 2574 }, { "epoch": 1.4385474860335195, "grad_norm": 0.46579670906066895, "learning_rate": 0.0009308403361344539, "loss": 0.4145, "step": 2575 }, { "epoch": 1.4391061452513967, "grad_norm": 6.230794429779053, "learning_rate": 0.0009308123249299721, "loss": 0.6353, "step": 2576 }, { "epoch": 1.4396648044692737, "grad_norm": 0.6536762714385986, "learning_rate": 0.0009307843137254902, "loss": 0.466, "step": 2577 }, { "epoch": 1.440223463687151, "grad_norm": 0.46875396370887756, "learning_rate": 0.0009307563025210084, "loss": 0.5062, "step": 2578 }, { "epoch": 1.440782122905028, "grad_norm": 0.4661177694797516, "learning_rate": 0.0009307282913165266, "loss": 0.4033, "step": 2579 }, { "epoch": 1.441340782122905, "grad_norm": 0.633985161781311, "learning_rate": 0.0009307002801120449, "loss": 0.5414, "step": 2580 }, { "epoch": 1.4418994413407822, "grad_norm": 0.8244467973709106, "learning_rate": 0.0009306722689075631, "loss": 0.5299, "step": 2581 }, { "epoch": 1.4424581005586592, "grad_norm": 0.6113318204879761, "learning_rate": 0.0009306442577030812, "loss": 0.4394, "step": 2582 }, { "epoch": 1.4430167597765364, "grad_norm": 1.072367787361145, "learning_rate": 0.0009306162464985994, "loss": 0.4307, "step": 2583 }, { "epoch": 1.4435754189944134, "grad_norm": 0.42234715819358826, "learning_rate": 0.0009305882352941176, "loss": 0.4104, "step": 2584 }, { "epoch": 1.4441340782122905, "grad_norm": 0.931414783000946, "learning_rate": 0.0009305602240896359, "loss": 0.5007, "step": 2585 }, { "epoch": 1.4446927374301675, "grad_norm": 0.9446127414703369, "learning_rate": 0.0009305322128851541, "loss": 0.4738, "step": 2586 }, { "epoch": 1.4452513966480447, "grad_norm": 0.6625229120254517, "learning_rate": 0.0009305042016806722, "loss": 0.5122, "step": 2587 }, { "epoch": 1.4458100558659217, "grad_norm": 0.5373765826225281, "learning_rate": 0.0009304761904761904, "loss": 0.3953, "step": 2588 }, { "epoch": 1.446368715083799, "grad_norm": 0.7654617428779602, "learning_rate": 0.0009304481792717086, "loss": 0.5313, "step": 2589 }, { "epoch": 1.446927374301676, "grad_norm": 0.6200145483016968, "learning_rate": 0.000930420168067227, "loss": 0.5059, "step": 2590 }, { "epoch": 1.447486033519553, "grad_norm": 0.530850887298584, "learning_rate": 0.0009303921568627452, "loss": 0.4399, "step": 2591 }, { "epoch": 1.4480446927374302, "grad_norm": 0.5828093886375427, "learning_rate": 0.0009303641456582634, "loss": 0.5025, "step": 2592 }, { "epoch": 1.4486033519553072, "grad_norm": 0.5664350986480713, "learning_rate": 0.0009303361344537815, "loss": 0.5232, "step": 2593 }, { "epoch": 1.4491620111731844, "grad_norm": 0.5003907084465027, "learning_rate": 0.0009303081232492997, "loss": 0.5158, "step": 2594 }, { "epoch": 1.4497206703910615, "grad_norm": 0.4369567334651947, "learning_rate": 0.000930280112044818, "loss": 0.3871, "step": 2595 }, { "epoch": 1.4502793296089385, "grad_norm": 0.4398859143257141, "learning_rate": 0.0009302521008403362, "loss": 0.3881, "step": 2596 }, { "epoch": 1.4508379888268157, "grad_norm": 0.46730324625968933, "learning_rate": 0.0009302240896358544, "loss": 0.3999, "step": 2597 }, { "epoch": 1.4513966480446927, "grad_norm": 0.6658786535263062, "learning_rate": 0.0009301960784313725, "loss": 0.4737, "step": 2598 }, { "epoch": 1.45195530726257, "grad_norm": 0.7979653477668762, "learning_rate": 0.0009301680672268907, "loss": 0.6727, "step": 2599 }, { "epoch": 1.452513966480447, "grad_norm": 0.7182505130767822, "learning_rate": 0.000930140056022409, "loss": 0.4866, "step": 2600 }, { "epoch": 1.453072625698324, "grad_norm": 2.0803158283233643, "learning_rate": 0.0009301120448179272, "loss": 0.3919, "step": 2601 }, { "epoch": 1.4536312849162012, "grad_norm": 0.6667923927307129, "learning_rate": 0.0009300840336134454, "loss": 0.5225, "step": 2602 }, { "epoch": 1.4541899441340782, "grad_norm": 0.7217258810997009, "learning_rate": 0.0009300560224089635, "loss": 0.4409, "step": 2603 }, { "epoch": 1.4547486033519554, "grad_norm": 0.5033441781997681, "learning_rate": 0.0009300280112044817, "loss": 0.4594, "step": 2604 }, { "epoch": 1.4553072625698324, "grad_norm": 0.5012925267219543, "learning_rate": 0.00093, "loss": 0.5098, "step": 2605 }, { "epoch": 1.4558659217877095, "grad_norm": 0.81548011302948, "learning_rate": 0.0009299719887955183, "loss": 0.5645, "step": 2606 }, { "epoch": 1.4564245810055865, "grad_norm": 0.6461879014968872, "learning_rate": 0.0009299439775910365, "loss": 0.4361, "step": 2607 }, { "epoch": 1.4569832402234637, "grad_norm": 0.8119282126426697, "learning_rate": 0.0009299159663865547, "loss": 0.5991, "step": 2608 }, { "epoch": 1.4575418994413407, "grad_norm": 1.2009344100952148, "learning_rate": 0.0009298879551820728, "loss": 0.4882, "step": 2609 }, { "epoch": 1.458100558659218, "grad_norm": 0.7457526922225952, "learning_rate": 0.0009298599439775911, "loss": 0.5036, "step": 2610 }, { "epoch": 1.458659217877095, "grad_norm": 0.6496821045875549, "learning_rate": 0.0009298319327731093, "loss": 0.5617, "step": 2611 }, { "epoch": 1.459217877094972, "grad_norm": 0.4850807189941406, "learning_rate": 0.0009298039215686275, "loss": 0.5347, "step": 2612 }, { "epoch": 1.4597765363128492, "grad_norm": 0.5157176852226257, "learning_rate": 0.0009297759103641457, "loss": 0.3972, "step": 2613 }, { "epoch": 1.4603351955307262, "grad_norm": 0.6404157876968384, "learning_rate": 0.0009297478991596638, "loss": 0.3821, "step": 2614 }, { "epoch": 1.4608938547486034, "grad_norm": 0.6212974786758423, "learning_rate": 0.0009297198879551821, "loss": 0.5472, "step": 2615 }, { "epoch": 1.4614525139664805, "grad_norm": 0.8928610682487488, "learning_rate": 0.0009296918767507003, "loss": 0.3099, "step": 2616 }, { "epoch": 1.4620111731843575, "grad_norm": 4.528392314910889, "learning_rate": 0.0009296638655462185, "loss": 0.4775, "step": 2617 }, { "epoch": 1.4625698324022347, "grad_norm": 0.6402145624160767, "learning_rate": 0.0009296358543417367, "loss": 0.4919, "step": 2618 }, { "epoch": 1.4631284916201117, "grad_norm": 0.5726767778396606, "learning_rate": 0.0009296078431372548, "loss": 0.5082, "step": 2619 }, { "epoch": 1.463687150837989, "grad_norm": 0.46533146500587463, "learning_rate": 0.0009295798319327731, "loss": 0.3863, "step": 2620 }, { "epoch": 1.464245810055866, "grad_norm": 0.49588528275489807, "learning_rate": 0.0009295518207282913, "loss": 0.3835, "step": 2621 }, { "epoch": 1.464804469273743, "grad_norm": 0.45549502968788147, "learning_rate": 0.0009295238095238096, "loss": 0.3506, "step": 2622 }, { "epoch": 1.46536312849162, "grad_norm": 0.5790160894393921, "learning_rate": 0.0009294957983193278, "loss": 0.6728, "step": 2623 }, { "epoch": 1.4659217877094972, "grad_norm": 0.6579418778419495, "learning_rate": 0.000929467787114846, "loss": 0.5358, "step": 2624 }, { "epoch": 1.4664804469273742, "grad_norm": 0.45469197630882263, "learning_rate": 0.0009294397759103642, "loss": 0.4329, "step": 2625 }, { "epoch": 1.4670391061452515, "grad_norm": 0.5428677797317505, "learning_rate": 0.0009294117647058824, "loss": 0.4543, "step": 2626 }, { "epoch": 1.4675977653631285, "grad_norm": 0.5525240898132324, "learning_rate": 0.0009293837535014006, "loss": 0.5091, "step": 2627 }, { "epoch": 1.4681564245810055, "grad_norm": 0.7379921078681946, "learning_rate": 0.0009293557422969188, "loss": 0.4923, "step": 2628 }, { "epoch": 1.4687150837988827, "grad_norm": 1.9589022397994995, "learning_rate": 0.000929327731092437, "loss": 0.5534, "step": 2629 }, { "epoch": 1.4692737430167597, "grad_norm": 0.4258909821510315, "learning_rate": 0.0009292997198879552, "loss": 0.4178, "step": 2630 }, { "epoch": 1.469832402234637, "grad_norm": 0.4239647388458252, "learning_rate": 0.0009292717086834734, "loss": 0.4064, "step": 2631 }, { "epoch": 1.470391061452514, "grad_norm": 0.5227537155151367, "learning_rate": 0.0009292436974789916, "loss": 0.4668, "step": 2632 }, { "epoch": 1.470949720670391, "grad_norm": 0.6114113926887512, "learning_rate": 0.0009292156862745098, "loss": 0.5525, "step": 2633 }, { "epoch": 1.4715083798882682, "grad_norm": 0.6233765482902527, "learning_rate": 0.000929187675070028, "loss": 0.5509, "step": 2634 }, { "epoch": 1.4720670391061452, "grad_norm": 2.157526731491089, "learning_rate": 0.0009291596638655463, "loss": 0.4408, "step": 2635 }, { "epoch": 1.4726256983240225, "grad_norm": 1.38654625415802, "learning_rate": 0.0009291316526610644, "loss": 0.5012, "step": 2636 }, { "epoch": 1.4731843575418995, "grad_norm": 0.6054531335830688, "learning_rate": 0.0009291036414565826, "loss": 0.4161, "step": 2637 }, { "epoch": 1.4737430167597765, "grad_norm": 0.721886396408081, "learning_rate": 0.0009290756302521009, "loss": 0.5071, "step": 2638 }, { "epoch": 1.4743016759776537, "grad_norm": 1.330005168914795, "learning_rate": 0.0009290476190476191, "loss": 0.5051, "step": 2639 }, { "epoch": 1.4748603351955307, "grad_norm": 0.630526602268219, "learning_rate": 0.0009290196078431374, "loss": 0.5096, "step": 2640 }, { "epoch": 1.475418994413408, "grad_norm": 1.3126240968704224, "learning_rate": 0.0009289915966386555, "loss": 0.3866, "step": 2641 }, { "epoch": 1.475977653631285, "grad_norm": 0.9824892282485962, "learning_rate": 0.0009289635854341737, "loss": 0.5208, "step": 2642 }, { "epoch": 1.476536312849162, "grad_norm": 0.6409022808074951, "learning_rate": 0.0009289355742296919, "loss": 0.4574, "step": 2643 }, { "epoch": 1.477094972067039, "grad_norm": 0.5286266803741455, "learning_rate": 0.0009289075630252101, "loss": 0.441, "step": 2644 }, { "epoch": 1.4776536312849162, "grad_norm": 0.5803026556968689, "learning_rate": 0.0009288795518207284, "loss": 0.534, "step": 2645 }, { "epoch": 1.4782122905027932, "grad_norm": 0.670640766620636, "learning_rate": 0.0009288515406162465, "loss": 0.4967, "step": 2646 }, { "epoch": 1.4787709497206705, "grad_norm": 0.7008808851242065, "learning_rate": 0.0009288235294117647, "loss": 0.5124, "step": 2647 }, { "epoch": 1.4793296089385475, "grad_norm": 1.3227956295013428, "learning_rate": 0.0009287955182072829, "loss": 0.4875, "step": 2648 }, { "epoch": 1.4798882681564245, "grad_norm": 0.7477467656135559, "learning_rate": 0.0009287675070028011, "loss": 0.7499, "step": 2649 }, { "epoch": 1.4804469273743017, "grad_norm": 0.555888295173645, "learning_rate": 0.0009287394957983194, "loss": 0.6365, "step": 2650 }, { "epoch": 1.4810055865921787, "grad_norm": 0.739152729511261, "learning_rate": 0.0009287114845938376, "loss": 0.6218, "step": 2651 }, { "epoch": 1.481564245810056, "grad_norm": 2.709662675857544, "learning_rate": 0.0009286834733893557, "loss": 0.5598, "step": 2652 }, { "epoch": 1.482122905027933, "grad_norm": 0.6691284775733948, "learning_rate": 0.0009286554621848739, "loss": 0.4887, "step": 2653 }, { "epoch": 1.48268156424581, "grad_norm": 0.5898959040641785, "learning_rate": 0.0009286274509803921, "loss": 0.5642, "step": 2654 }, { "epoch": 1.4832402234636872, "grad_norm": 0.515849232673645, "learning_rate": 0.0009285994397759105, "loss": 0.4541, "step": 2655 }, { "epoch": 1.4837988826815642, "grad_norm": 0.6814642548561096, "learning_rate": 0.0009285714285714287, "loss": 0.502, "step": 2656 }, { "epoch": 1.4843575418994415, "grad_norm": 0.47332486510276794, "learning_rate": 0.0009285434173669468, "loss": 0.4856, "step": 2657 }, { "epoch": 1.4849162011173185, "grad_norm": 0.522954523563385, "learning_rate": 0.000928515406162465, "loss": 0.5723, "step": 2658 }, { "epoch": 1.4854748603351955, "grad_norm": 0.7725816965103149, "learning_rate": 0.0009284873949579832, "loss": 0.49, "step": 2659 }, { "epoch": 1.4860335195530725, "grad_norm": 1.1196281909942627, "learning_rate": 0.0009284593837535015, "loss": 0.6984, "step": 2660 }, { "epoch": 1.4865921787709497, "grad_norm": 6.089732646942139, "learning_rate": 0.0009284313725490197, "loss": 0.4783, "step": 2661 }, { "epoch": 1.4871508379888267, "grad_norm": 1.3049112558364868, "learning_rate": 0.0009284033613445378, "loss": 0.5496, "step": 2662 }, { "epoch": 1.487709497206704, "grad_norm": 0.7450880408287048, "learning_rate": 0.000928375350140056, "loss": 0.4972, "step": 2663 }, { "epoch": 1.488268156424581, "grad_norm": 0.7437418103218079, "learning_rate": 0.0009283473389355742, "loss": 0.6042, "step": 2664 }, { "epoch": 1.488826815642458, "grad_norm": 0.4941578805446625, "learning_rate": 0.0009283193277310925, "loss": 0.5089, "step": 2665 }, { "epoch": 1.4893854748603352, "grad_norm": 0.5777472853660583, "learning_rate": 0.0009282913165266107, "loss": 0.358, "step": 2666 }, { "epoch": 1.4899441340782122, "grad_norm": 1.475481629371643, "learning_rate": 0.0009282633053221289, "loss": 0.5799, "step": 2667 }, { "epoch": 1.4905027932960895, "grad_norm": 0.7806932330131531, "learning_rate": 0.000928235294117647, "loss": 0.3583, "step": 2668 }, { "epoch": 1.4910614525139665, "grad_norm": 4.788658142089844, "learning_rate": 0.0009282072829131652, "loss": 0.5905, "step": 2669 }, { "epoch": 1.4916201117318435, "grad_norm": 0.5692121386528015, "learning_rate": 0.0009281792717086836, "loss": 0.5891, "step": 2670 }, { "epoch": 1.4921787709497207, "grad_norm": 1.0268464088439941, "learning_rate": 0.0009281512605042018, "loss": 0.4142, "step": 2671 }, { "epoch": 1.4927374301675977, "grad_norm": 0.8897309303283691, "learning_rate": 0.00092812324929972, "loss": 0.3936, "step": 2672 }, { "epoch": 1.493296089385475, "grad_norm": 0.9935110807418823, "learning_rate": 0.0009280952380952381, "loss": 0.4195, "step": 2673 }, { "epoch": 1.493854748603352, "grad_norm": 0.5382152199745178, "learning_rate": 0.0009280672268907563, "loss": 0.4481, "step": 2674 }, { "epoch": 1.494413407821229, "grad_norm": 1.0676406621932983, "learning_rate": 0.0009280392156862745, "loss": 0.3805, "step": 2675 }, { "epoch": 1.494972067039106, "grad_norm": 0.5046552419662476, "learning_rate": 0.0009280112044817928, "loss": 0.5647, "step": 2676 }, { "epoch": 1.4955307262569832, "grad_norm": 0.48943591117858887, "learning_rate": 0.000927983193277311, "loss": 0.4288, "step": 2677 }, { "epoch": 1.4960893854748603, "grad_norm": 3.701481342315674, "learning_rate": 0.0009279551820728291, "loss": 0.437, "step": 2678 }, { "epoch": 1.4966480446927375, "grad_norm": 1.0245435237884521, "learning_rate": 0.0009279271708683473, "loss": 0.4463, "step": 2679 }, { "epoch": 1.4972067039106145, "grad_norm": 0.6588135957717896, "learning_rate": 0.0009278991596638655, "loss": 0.5567, "step": 2680 }, { "epoch": 1.4977653631284915, "grad_norm": 0.5455290079116821, "learning_rate": 0.0009278711484593838, "loss": 0.4625, "step": 2681 }, { "epoch": 1.4983240223463687, "grad_norm": 0.5909234881401062, "learning_rate": 0.000927843137254902, "loss": 0.4309, "step": 2682 }, { "epoch": 1.4988826815642458, "grad_norm": 0.6696202754974365, "learning_rate": 0.0009278151260504202, "loss": 0.4379, "step": 2683 }, { "epoch": 1.499441340782123, "grad_norm": 0.8101652264595032, "learning_rate": 0.0009277871148459383, "loss": 0.4769, "step": 2684 }, { "epoch": 1.5, "grad_norm": 0.5355522632598877, "learning_rate": 0.0009277591036414565, "loss": 0.5438, "step": 2685 }, { "epoch": 1.500558659217877, "grad_norm": 0.49236586689949036, "learning_rate": 0.0009277310924369748, "loss": 0.4865, "step": 2686 }, { "epoch": 1.5011173184357542, "grad_norm": 0.7549045085906982, "learning_rate": 0.000927703081232493, "loss": 0.531, "step": 2687 }, { "epoch": 1.5016759776536313, "grad_norm": 0.6821882128715515, "learning_rate": 0.0009276750700280113, "loss": 0.5781, "step": 2688 }, { "epoch": 1.5022346368715085, "grad_norm": 0.6423541307449341, "learning_rate": 0.0009276470588235294, "loss": 0.4837, "step": 2689 }, { "epoch": 1.5027932960893855, "grad_norm": 1.8678420782089233, "learning_rate": 0.0009276190476190476, "loss": 0.4704, "step": 2690 }, { "epoch": 1.5033519553072625, "grad_norm": 0.5778162479400635, "learning_rate": 0.0009275910364145659, "loss": 0.4484, "step": 2691 }, { "epoch": 1.5039106145251395, "grad_norm": 0.4615047574043274, "learning_rate": 0.0009275630252100841, "loss": 0.4259, "step": 2692 }, { "epoch": 1.5044692737430168, "grad_norm": 0.6708247661590576, "learning_rate": 0.0009275350140056023, "loss": 0.6335, "step": 2693 }, { "epoch": 1.505027932960894, "grad_norm": 0.6936284303665161, "learning_rate": 0.0009275070028011204, "loss": 0.4856, "step": 2694 }, { "epoch": 1.505586592178771, "grad_norm": 0.707811176776886, "learning_rate": 0.0009274789915966386, "loss": 0.5278, "step": 2695 }, { "epoch": 1.506145251396648, "grad_norm": 2.1636545658111572, "learning_rate": 0.0009274509803921569, "loss": 0.4717, "step": 2696 }, { "epoch": 1.506703910614525, "grad_norm": 0.48805782198905945, "learning_rate": 0.0009274229691876751, "loss": 0.3648, "step": 2697 }, { "epoch": 1.5072625698324023, "grad_norm": 0.5463467836380005, "learning_rate": 0.0009273949579831933, "loss": 0.4566, "step": 2698 }, { "epoch": 1.5078212290502795, "grad_norm": 1.0485665798187256, "learning_rate": 0.0009273669467787115, "loss": 0.5163, "step": 2699 }, { "epoch": 1.5083798882681565, "grad_norm": 0.8843816518783569, "learning_rate": 0.0009273389355742296, "loss": 0.6677, "step": 2700 }, { "epoch": 1.5089385474860335, "grad_norm": 0.5736004710197449, "learning_rate": 0.0009273109243697479, "loss": 0.4338, "step": 2701 }, { "epoch": 1.5094972067039105, "grad_norm": 4.1943678855896, "learning_rate": 0.0009272829131652661, "loss": 0.4167, "step": 2702 }, { "epoch": 1.5100558659217878, "grad_norm": 0.8349507451057434, "learning_rate": 0.0009272549019607843, "loss": 0.4526, "step": 2703 }, { "epoch": 1.5106145251396648, "grad_norm": 0.6282299757003784, "learning_rate": 0.0009272268907563026, "loss": 0.5266, "step": 2704 }, { "epoch": 1.511173184357542, "grad_norm": 0.5767343640327454, "learning_rate": 0.0009271988795518207, "loss": 0.4249, "step": 2705 }, { "epoch": 1.511731843575419, "grad_norm": 0.5181618332862854, "learning_rate": 0.000927170868347339, "loss": 0.4287, "step": 2706 }, { "epoch": 1.512290502793296, "grad_norm": 0.5861397981643677, "learning_rate": 0.0009271428571428572, "loss": 0.4934, "step": 2707 }, { "epoch": 1.512849162011173, "grad_norm": 0.7683354616165161, "learning_rate": 0.0009271148459383754, "loss": 0.4592, "step": 2708 }, { "epoch": 1.5134078212290503, "grad_norm": 0.43219852447509766, "learning_rate": 0.0009270868347338936, "loss": 0.3708, "step": 2709 }, { "epoch": 1.5139664804469275, "grad_norm": 1.0593012571334839, "learning_rate": 0.0009270588235294117, "loss": 0.563, "step": 2710 }, { "epoch": 1.5145251396648045, "grad_norm": 0.5885233879089355, "learning_rate": 0.00092703081232493, "loss": 0.41, "step": 2711 }, { "epoch": 1.5150837988826815, "grad_norm": 0.5605961084365845, "learning_rate": 0.0009270028011204482, "loss": 0.5442, "step": 2712 }, { "epoch": 1.5156424581005585, "grad_norm": 1.160618543624878, "learning_rate": 0.0009269747899159664, "loss": 0.5942, "step": 2713 }, { "epoch": 1.5162011173184358, "grad_norm": 0.9720863103866577, "learning_rate": 0.0009269467787114846, "loss": 0.458, "step": 2714 }, { "epoch": 1.516759776536313, "grad_norm": 1.003719449043274, "learning_rate": 0.0009269187675070028, "loss": 0.5449, "step": 2715 }, { "epoch": 1.51731843575419, "grad_norm": 0.5648466944694519, "learning_rate": 0.000926890756302521, "loss": 0.4219, "step": 2716 }, { "epoch": 1.517877094972067, "grad_norm": 0.5501232743263245, "learning_rate": 0.0009268627450980392, "loss": 0.5251, "step": 2717 }, { "epoch": 1.518435754189944, "grad_norm": 0.5324401259422302, "learning_rate": 0.0009268347338935574, "loss": 0.4915, "step": 2718 }, { "epoch": 1.5189944134078213, "grad_norm": 0.5625090003013611, "learning_rate": 0.0009268067226890756, "loss": 0.5022, "step": 2719 }, { "epoch": 1.5195530726256983, "grad_norm": 0.6464283466339111, "learning_rate": 0.0009267787114845939, "loss": 0.5249, "step": 2720 }, { "epoch": 1.5201117318435755, "grad_norm": 0.7982908487319946, "learning_rate": 0.0009267507002801121, "loss": 0.5135, "step": 2721 }, { "epoch": 1.5206703910614525, "grad_norm": 0.6112183928489685, "learning_rate": 0.0009267226890756303, "loss": 0.3826, "step": 2722 }, { "epoch": 1.5212290502793295, "grad_norm": 0.6859140992164612, "learning_rate": 0.0009266946778711485, "loss": 0.4637, "step": 2723 }, { "epoch": 1.5217877094972065, "grad_norm": 1.0874067544937134, "learning_rate": 0.0009266666666666667, "loss": 0.503, "step": 2724 }, { "epoch": 1.5223463687150838, "grad_norm": 0.5052918195724487, "learning_rate": 0.0009266386554621849, "loss": 0.4556, "step": 2725 }, { "epoch": 1.522905027932961, "grad_norm": 0.8066805005073547, "learning_rate": 0.0009266106442577031, "loss": 0.5166, "step": 2726 }, { "epoch": 1.523463687150838, "grad_norm": 0.6549170613288879, "learning_rate": 0.0009265826330532213, "loss": 0.4763, "step": 2727 }, { "epoch": 1.524022346368715, "grad_norm": 1.8753679990768433, "learning_rate": 0.0009265546218487395, "loss": 0.4528, "step": 2728 }, { "epoch": 1.524581005586592, "grad_norm": 1.3903990983963013, "learning_rate": 0.0009265266106442577, "loss": 0.5485, "step": 2729 }, { "epoch": 1.5251396648044693, "grad_norm": 1.8706400394439697, "learning_rate": 0.0009264985994397759, "loss": 0.5387, "step": 2730 }, { "epoch": 1.5256983240223465, "grad_norm": 0.7359647154808044, "learning_rate": 0.0009264705882352942, "loss": 0.4494, "step": 2731 }, { "epoch": 1.5262569832402235, "grad_norm": 0.452828049659729, "learning_rate": 0.0009264425770308123, "loss": 0.4034, "step": 2732 }, { "epoch": 1.5268156424581005, "grad_norm": 0.49070021510124207, "learning_rate": 0.0009264145658263305, "loss": 0.4653, "step": 2733 }, { "epoch": 1.5273743016759775, "grad_norm": 1.677897572517395, "learning_rate": 0.0009263865546218487, "loss": 0.3873, "step": 2734 }, { "epoch": 1.5279329608938548, "grad_norm": 0.7672814130783081, "learning_rate": 0.0009263585434173669, "loss": 0.5337, "step": 2735 }, { "epoch": 1.528491620111732, "grad_norm": 0.6242414116859436, "learning_rate": 0.0009263305322128853, "loss": 0.432, "step": 2736 }, { "epoch": 1.529050279329609, "grad_norm": 0.5464147925376892, "learning_rate": 0.0009263025210084034, "loss": 0.598, "step": 2737 }, { "epoch": 1.529608938547486, "grad_norm": 0.6822505593299866, "learning_rate": 0.0009262745098039216, "loss": 0.4842, "step": 2738 }, { "epoch": 1.530167597765363, "grad_norm": 1.3498238325119019, "learning_rate": 0.0009262464985994398, "loss": 0.4758, "step": 2739 }, { "epoch": 1.5307262569832403, "grad_norm": 0.6377862095832825, "learning_rate": 0.000926218487394958, "loss": 0.4904, "step": 2740 }, { "epoch": 1.5312849162011173, "grad_norm": 1.275589942932129, "learning_rate": 0.0009261904761904763, "loss": 0.4656, "step": 2741 }, { "epoch": 1.5318435754189945, "grad_norm": 0.5122226476669312, "learning_rate": 0.0009261624649859944, "loss": 0.5018, "step": 2742 }, { "epoch": 1.5324022346368715, "grad_norm": 1.044105052947998, "learning_rate": 0.0009261344537815126, "loss": 0.6277, "step": 2743 }, { "epoch": 1.5329608938547485, "grad_norm": 0.4903325140476227, "learning_rate": 0.0009261064425770308, "loss": 0.4832, "step": 2744 }, { "epoch": 1.5335195530726256, "grad_norm": 1.3142452239990234, "learning_rate": 0.000926078431372549, "loss": 0.4616, "step": 2745 }, { "epoch": 1.5340782122905028, "grad_norm": 0.7310460805892944, "learning_rate": 0.0009260504201680673, "loss": 0.5511, "step": 2746 }, { "epoch": 1.53463687150838, "grad_norm": 1.4101277589797974, "learning_rate": 0.0009260224089635855, "loss": 0.5203, "step": 2747 }, { "epoch": 1.535195530726257, "grad_norm": 1.755393147468567, "learning_rate": 0.0009259943977591036, "loss": 0.5651, "step": 2748 }, { "epoch": 1.535754189944134, "grad_norm": 0.6259930729866028, "learning_rate": 0.0009259663865546218, "loss": 0.4588, "step": 2749 }, { "epoch": 1.536312849162011, "grad_norm": 2.5083696842193604, "learning_rate": 0.00092593837535014, "loss": 0.5273, "step": 2750 }, { "epoch": 1.5368715083798883, "grad_norm": 0.600470781326294, "learning_rate": 0.0009259103641456583, "loss": 0.5336, "step": 2751 }, { "epoch": 1.5374301675977655, "grad_norm": 1.3746676445007324, "learning_rate": 0.0009258823529411766, "loss": 0.4209, "step": 2752 }, { "epoch": 1.5379888268156425, "grad_norm": 0.4862399995326996, "learning_rate": 0.0009258543417366946, "loss": 0.4566, "step": 2753 }, { "epoch": 1.5385474860335195, "grad_norm": 0.7803627252578735, "learning_rate": 0.0009258263305322129, "loss": 0.5474, "step": 2754 }, { "epoch": 1.5391061452513966, "grad_norm": 1.5817126035690308, "learning_rate": 0.0009257983193277311, "loss": 0.4603, "step": 2755 }, { "epoch": 1.5396648044692738, "grad_norm": 0.5320682525634766, "learning_rate": 0.0009257703081232494, "loss": 0.4772, "step": 2756 }, { "epoch": 1.5402234636871508, "grad_norm": 1.7863763570785522, "learning_rate": 0.0009257422969187676, "loss": 0.5236, "step": 2757 }, { "epoch": 1.540782122905028, "grad_norm": 0.7422907948493958, "learning_rate": 0.0009257142857142857, "loss": 0.6242, "step": 2758 }, { "epoch": 1.541340782122905, "grad_norm": 0.6582601070404053, "learning_rate": 0.0009256862745098039, "loss": 0.5305, "step": 2759 }, { "epoch": 1.541899441340782, "grad_norm": 0.5598275065422058, "learning_rate": 0.0009256582633053221, "loss": 0.5369, "step": 2760 }, { "epoch": 1.542458100558659, "grad_norm": 0.5950160026550293, "learning_rate": 0.0009256302521008404, "loss": 0.5478, "step": 2761 }, { "epoch": 1.5430167597765363, "grad_norm": 0.6061046123504639, "learning_rate": 0.0009256022408963586, "loss": 0.4774, "step": 2762 }, { "epoch": 1.5435754189944135, "grad_norm": 4.923741340637207, "learning_rate": 0.0009255742296918768, "loss": 0.4244, "step": 2763 }, { "epoch": 1.5441340782122905, "grad_norm": 0.6583361029624939, "learning_rate": 0.0009255462184873949, "loss": 0.4424, "step": 2764 }, { "epoch": 1.5446927374301676, "grad_norm": 0.5482707023620605, "learning_rate": 0.0009255182072829131, "loss": 0.4597, "step": 2765 }, { "epoch": 1.5452513966480446, "grad_norm": 0.5797296762466431, "learning_rate": 0.0009254901960784314, "loss": 0.5291, "step": 2766 }, { "epoch": 1.5458100558659218, "grad_norm": 0.6409083604812622, "learning_rate": 0.0009254621848739496, "loss": 0.5089, "step": 2767 }, { "epoch": 1.546368715083799, "grad_norm": 0.571533739566803, "learning_rate": 0.0009254341736694678, "loss": 0.4228, "step": 2768 }, { "epoch": 1.546927374301676, "grad_norm": 1.3261182308197021, "learning_rate": 0.0009254061624649859, "loss": 0.6556, "step": 2769 }, { "epoch": 1.547486033519553, "grad_norm": 1.483222246170044, "learning_rate": 0.0009253781512605042, "loss": 0.4688, "step": 2770 }, { "epoch": 1.54804469273743, "grad_norm": 1.9051486253738403, "learning_rate": 0.0009253501400560225, "loss": 0.4937, "step": 2771 }, { "epoch": 1.5486033519553073, "grad_norm": 0.6750649213790894, "learning_rate": 0.0009253221288515407, "loss": 0.4989, "step": 2772 }, { "epoch": 1.5491620111731843, "grad_norm": 1.6300128698349, "learning_rate": 0.0009252941176470589, "loss": 0.4737, "step": 2773 }, { "epoch": 1.5497206703910615, "grad_norm": 1.0556766986846924, "learning_rate": 0.000925266106442577, "loss": 0.6323, "step": 2774 }, { "epoch": 1.5502793296089385, "grad_norm": 0.4854927957057953, "learning_rate": 0.0009252380952380952, "loss": 0.4267, "step": 2775 }, { "epoch": 1.5508379888268156, "grad_norm": 0.9065334796905518, "learning_rate": 0.0009252100840336135, "loss": 0.5673, "step": 2776 }, { "epoch": 1.5513966480446926, "grad_norm": 0.6424968838691711, "learning_rate": 0.0009251820728291317, "loss": 0.5238, "step": 2777 }, { "epoch": 1.5519553072625698, "grad_norm": 0.8335216045379639, "learning_rate": 0.0009251540616246499, "loss": 0.5646, "step": 2778 }, { "epoch": 1.552513966480447, "grad_norm": 0.49993088841438293, "learning_rate": 0.0009251260504201681, "loss": 0.4298, "step": 2779 }, { "epoch": 1.553072625698324, "grad_norm": 0.9979957938194275, "learning_rate": 0.0009250980392156862, "loss": 0.4548, "step": 2780 }, { "epoch": 1.553631284916201, "grad_norm": 0.6388674974441528, "learning_rate": 0.0009250700280112045, "loss": 0.4553, "step": 2781 }, { "epoch": 1.554189944134078, "grad_norm": 1.2615723609924316, "learning_rate": 0.0009250420168067227, "loss": 0.4694, "step": 2782 }, { "epoch": 1.5547486033519553, "grad_norm": 0.5050574541091919, "learning_rate": 0.0009250140056022409, "loss": 0.4345, "step": 2783 }, { "epoch": 1.5553072625698325, "grad_norm": 1.7929205894470215, "learning_rate": 0.0009249859943977591, "loss": 0.4348, "step": 2784 }, { "epoch": 1.5558659217877095, "grad_norm": 0.5956754088401794, "learning_rate": 0.0009249579831932772, "loss": 0.471, "step": 2785 }, { "epoch": 1.5564245810055866, "grad_norm": 0.7108403444290161, "learning_rate": 0.0009249299719887956, "loss": 0.4583, "step": 2786 }, { "epoch": 1.5569832402234636, "grad_norm": 0.5493324398994446, "learning_rate": 0.0009249019607843138, "loss": 0.5494, "step": 2787 }, { "epoch": 1.5575418994413408, "grad_norm": 0.6735252141952515, "learning_rate": 0.000924873949579832, "loss": 0.5228, "step": 2788 }, { "epoch": 1.558100558659218, "grad_norm": 0.6224062442779541, "learning_rate": 0.0009248459383753502, "loss": 0.5515, "step": 2789 }, { "epoch": 1.558659217877095, "grad_norm": 0.7120926380157471, "learning_rate": 0.0009248179271708683, "loss": 0.5912, "step": 2790 }, { "epoch": 1.559217877094972, "grad_norm": 0.7107549905776978, "learning_rate": 0.0009247899159663866, "loss": 0.447, "step": 2791 }, { "epoch": 1.559776536312849, "grad_norm": 0.7261028289794922, "learning_rate": 0.0009247619047619048, "loss": 0.7375, "step": 2792 }, { "epoch": 1.5603351955307263, "grad_norm": 0.6139434576034546, "learning_rate": 0.000924733893557423, "loss": 0.4755, "step": 2793 }, { "epoch": 1.5608938547486033, "grad_norm": 18.866910934448242, "learning_rate": 0.0009247058823529412, "loss": 0.6126, "step": 2794 }, { "epoch": 1.5614525139664805, "grad_norm": 0.5344040989875793, "learning_rate": 0.0009246778711484594, "loss": 0.5019, "step": 2795 }, { "epoch": 1.5620111731843576, "grad_norm": 0.5899941325187683, "learning_rate": 0.0009246498599439776, "loss": 0.5483, "step": 2796 }, { "epoch": 1.5625698324022346, "grad_norm": 0.5017511248588562, "learning_rate": 0.0009246218487394958, "loss": 0.5214, "step": 2797 }, { "epoch": 1.5631284916201116, "grad_norm": 0.8440216183662415, "learning_rate": 0.000924593837535014, "loss": 0.4348, "step": 2798 }, { "epoch": 1.5636871508379888, "grad_norm": 0.5344979763031006, "learning_rate": 0.0009245658263305322, "loss": 0.5582, "step": 2799 }, { "epoch": 1.564245810055866, "grad_norm": 0.6181933283805847, "learning_rate": 0.0009245378151260504, "loss": 0.446, "step": 2800 }, { "epoch": 1.564804469273743, "grad_norm": 0.5276536345481873, "learning_rate": 0.0009245098039215686, "loss": 0.3863, "step": 2801 }, { "epoch": 1.56536312849162, "grad_norm": 0.4638065993785858, "learning_rate": 0.0009244817927170869, "loss": 0.4631, "step": 2802 }, { "epoch": 1.565921787709497, "grad_norm": 0.4662304222583771, "learning_rate": 0.0009244537815126051, "loss": 0.4591, "step": 2803 }, { "epoch": 1.5664804469273743, "grad_norm": 0.7063485980033875, "learning_rate": 0.0009244257703081233, "loss": 0.4275, "step": 2804 }, { "epoch": 1.5670391061452515, "grad_norm": 0.8666127324104309, "learning_rate": 0.0009243977591036415, "loss": 0.6861, "step": 2805 }, { "epoch": 1.5675977653631286, "grad_norm": 4.635715961456299, "learning_rate": 0.0009243697478991597, "loss": 0.5489, "step": 2806 }, { "epoch": 1.5681564245810056, "grad_norm": 0.9933450222015381, "learning_rate": 0.0009243417366946779, "loss": 0.5432, "step": 2807 }, { "epoch": 1.5687150837988826, "grad_norm": 0.6908605694770813, "learning_rate": 0.0009243137254901961, "loss": 0.4025, "step": 2808 }, { "epoch": 1.5692737430167598, "grad_norm": 0.7941877245903015, "learning_rate": 0.0009242857142857143, "loss": 0.5637, "step": 2809 }, { "epoch": 1.5698324022346368, "grad_norm": 0.4864341914653778, "learning_rate": 0.0009242577030812325, "loss": 0.4766, "step": 2810 }, { "epoch": 1.570391061452514, "grad_norm": 0.5854458212852478, "learning_rate": 0.0009242296918767508, "loss": 0.4063, "step": 2811 }, { "epoch": 1.570949720670391, "grad_norm": 0.5189414620399475, "learning_rate": 0.0009242016806722689, "loss": 0.4168, "step": 2812 }, { "epoch": 1.571508379888268, "grad_norm": 0.6213449239730835, "learning_rate": 0.0009241736694677871, "loss": 0.4899, "step": 2813 }, { "epoch": 1.572067039106145, "grad_norm": 2.9355480670928955, "learning_rate": 0.0009241456582633053, "loss": 0.5294, "step": 2814 }, { "epoch": 1.5726256983240223, "grad_norm": 0.7673971056938171, "learning_rate": 0.0009241176470588235, "loss": 0.5934, "step": 2815 }, { "epoch": 1.5731843575418996, "grad_norm": 0.639930248260498, "learning_rate": 0.0009240896358543418, "loss": 0.6002, "step": 2816 }, { "epoch": 1.5737430167597766, "grad_norm": 0.5959346294403076, "learning_rate": 0.0009240616246498599, "loss": 0.4396, "step": 2817 }, { "epoch": 1.5743016759776536, "grad_norm": 3.7702465057373047, "learning_rate": 0.0009240336134453781, "loss": 0.4159, "step": 2818 }, { "epoch": 1.5748603351955306, "grad_norm": 2.404097318649292, "learning_rate": 0.0009240056022408964, "loss": 0.4869, "step": 2819 }, { "epoch": 1.5754189944134078, "grad_norm": 0.9831011891365051, "learning_rate": 0.0009239775910364146, "loss": 0.6152, "step": 2820 }, { "epoch": 1.575977653631285, "grad_norm": 0.5874549746513367, "learning_rate": 0.0009239495798319329, "loss": 0.5179, "step": 2821 }, { "epoch": 1.576536312849162, "grad_norm": 0.811124324798584, "learning_rate": 0.000923921568627451, "loss": 0.6292, "step": 2822 }, { "epoch": 1.577094972067039, "grad_norm": 36.496070861816406, "learning_rate": 0.0009238935574229692, "loss": 0.4898, "step": 2823 }, { "epoch": 1.577653631284916, "grad_norm": 0.915553867816925, "learning_rate": 0.0009238655462184874, "loss": 0.5476, "step": 2824 }, { "epoch": 1.5782122905027933, "grad_norm": 1.1290582418441772, "learning_rate": 0.0009238375350140056, "loss": 0.5451, "step": 2825 }, { "epoch": 1.5787709497206703, "grad_norm": 2.8792660236358643, "learning_rate": 0.0009238095238095239, "loss": 0.4526, "step": 2826 }, { "epoch": 1.5793296089385476, "grad_norm": 0.5938659906387329, "learning_rate": 0.0009237815126050421, "loss": 0.4981, "step": 2827 }, { "epoch": 1.5798882681564246, "grad_norm": 0.6090506911277771, "learning_rate": 0.0009237535014005602, "loss": 0.4586, "step": 2828 }, { "epoch": 1.5804469273743016, "grad_norm": 2.3405656814575195, "learning_rate": 0.0009237254901960784, "loss": 0.566, "step": 2829 }, { "epoch": 1.5810055865921788, "grad_norm": 0.6403980255126953, "learning_rate": 0.0009236974789915966, "loss": 0.425, "step": 2830 }, { "epoch": 1.5815642458100558, "grad_norm": 0.9365788698196411, "learning_rate": 0.0009236694677871149, "loss": 0.4745, "step": 2831 }, { "epoch": 1.582122905027933, "grad_norm": 0.6935022473335266, "learning_rate": 0.0009236414565826331, "loss": 0.5897, "step": 2832 }, { "epoch": 1.58268156424581, "grad_norm": 0.483063668012619, "learning_rate": 0.0009236134453781512, "loss": 0.4582, "step": 2833 }, { "epoch": 1.583240223463687, "grad_norm": 0.5326879620552063, "learning_rate": 0.0009235854341736694, "loss": 0.4834, "step": 2834 }, { "epoch": 1.583798882681564, "grad_norm": 1.6950187683105469, "learning_rate": 0.0009235574229691876, "loss": 0.5464, "step": 2835 }, { "epoch": 1.5843575418994413, "grad_norm": 4.1505231857299805, "learning_rate": 0.000923529411764706, "loss": 0.4663, "step": 2836 }, { "epoch": 1.5849162011173186, "grad_norm": 0.7821114659309387, "learning_rate": 0.0009235014005602242, "loss": 0.4632, "step": 2837 }, { "epoch": 1.5854748603351956, "grad_norm": 0.6650540232658386, "learning_rate": 0.0009234733893557423, "loss": 0.558, "step": 2838 }, { "epoch": 1.5860335195530726, "grad_norm": 0.8296647667884827, "learning_rate": 0.0009234453781512605, "loss": 0.5619, "step": 2839 }, { "epoch": 1.5865921787709496, "grad_norm": 1.3736047744750977, "learning_rate": 0.0009234173669467787, "loss": 0.6286, "step": 2840 }, { "epoch": 1.5871508379888268, "grad_norm": 0.9090030193328857, "learning_rate": 0.000923389355742297, "loss": 0.4552, "step": 2841 }, { "epoch": 1.587709497206704, "grad_norm": 2.0754737854003906, "learning_rate": 0.0009233613445378152, "loss": 0.5756, "step": 2842 }, { "epoch": 1.588268156424581, "grad_norm": 2.355257272720337, "learning_rate": 0.0009233333333333334, "loss": 0.5973, "step": 2843 }, { "epoch": 1.588826815642458, "grad_norm": 0.5823377370834351, "learning_rate": 0.0009233053221288515, "loss": 0.4513, "step": 2844 }, { "epoch": 1.589385474860335, "grad_norm": 0.4742606580257416, "learning_rate": 0.0009232773109243697, "loss": 0.5143, "step": 2845 }, { "epoch": 1.5899441340782123, "grad_norm": 0.868121325969696, "learning_rate": 0.000923249299719888, "loss": 0.4596, "step": 2846 }, { "epoch": 1.5905027932960893, "grad_norm": 0.8557822108268738, "learning_rate": 0.0009232212885154062, "loss": 0.5136, "step": 2847 }, { "epoch": 1.5910614525139666, "grad_norm": 0.8515350818634033, "learning_rate": 0.0009231932773109244, "loss": 0.5733, "step": 2848 }, { "epoch": 1.5916201117318436, "grad_norm": 0.8426374793052673, "learning_rate": 0.0009231652661064425, "loss": 0.6004, "step": 2849 }, { "epoch": 1.5921787709497206, "grad_norm": 0.5116023421287537, "learning_rate": 0.0009231372549019607, "loss": 0.4745, "step": 2850 }, { "epoch": 1.5927374301675976, "grad_norm": 0.7062660455703735, "learning_rate": 0.000923109243697479, "loss": 0.5869, "step": 2851 }, { "epoch": 1.5932960893854748, "grad_norm": 0.5434456467628479, "learning_rate": 0.0009230812324929973, "loss": 0.4573, "step": 2852 }, { "epoch": 1.593854748603352, "grad_norm": 0.7755725383758545, "learning_rate": 0.0009230532212885155, "loss": 0.4905, "step": 2853 }, { "epoch": 1.594413407821229, "grad_norm": 0.7681111693382263, "learning_rate": 0.0009230252100840336, "loss": 0.534, "step": 2854 }, { "epoch": 1.594972067039106, "grad_norm": 0.5175493955612183, "learning_rate": 0.0009229971988795518, "loss": 0.4965, "step": 2855 }, { "epoch": 1.5955307262569831, "grad_norm": 0.5323821902275085, "learning_rate": 0.0009229691876750701, "loss": 0.4737, "step": 2856 }, { "epoch": 1.5960893854748603, "grad_norm": 4.029867172241211, "learning_rate": 0.0009229411764705883, "loss": 0.4348, "step": 2857 }, { "epoch": 1.5966480446927376, "grad_norm": 4.8672590255737305, "learning_rate": 0.0009229131652661065, "loss": 0.6158, "step": 2858 }, { "epoch": 1.5972067039106146, "grad_norm": 1.7575292587280273, "learning_rate": 0.0009228851540616247, "loss": 0.3117, "step": 2859 }, { "epoch": 1.5977653631284916, "grad_norm": 0.5673686265945435, "learning_rate": 0.0009228571428571428, "loss": 0.5704, "step": 2860 }, { "epoch": 1.5983240223463686, "grad_norm": 0.6373631954193115, "learning_rate": 0.0009228291316526611, "loss": 0.4548, "step": 2861 }, { "epoch": 1.5988826815642458, "grad_norm": 0.8258795142173767, "learning_rate": 0.0009228011204481793, "loss": 0.6333, "step": 2862 }, { "epoch": 1.5994413407821229, "grad_norm": 0.6410709619522095, "learning_rate": 0.0009227731092436975, "loss": 0.4179, "step": 2863 }, { "epoch": 1.6, "grad_norm": 0.688508152961731, "learning_rate": 0.0009227450980392157, "loss": 0.3971, "step": 2864 }, { "epoch": 1.600558659217877, "grad_norm": 4.202364444732666, "learning_rate": 0.0009227170868347338, "loss": 0.4572, "step": 2865 }, { "epoch": 1.6011173184357541, "grad_norm": 0.7221179008483887, "learning_rate": 0.0009226890756302521, "loss": 0.4466, "step": 2866 }, { "epoch": 1.6016759776536311, "grad_norm": 2.5795836448669434, "learning_rate": 0.0009226610644257703, "loss": 0.5522, "step": 2867 }, { "epoch": 1.6022346368715084, "grad_norm": 0.6176924705505371, "learning_rate": 0.0009226330532212886, "loss": 0.526, "step": 2868 }, { "epoch": 1.6027932960893856, "grad_norm": 0.4730075001716614, "learning_rate": 0.0009226050420168068, "loss": 0.4054, "step": 2869 }, { "epoch": 1.6033519553072626, "grad_norm": 0.5490831732749939, "learning_rate": 0.0009225770308123249, "loss": 0.5697, "step": 2870 }, { "epoch": 1.6039106145251396, "grad_norm": 0.5738762021064758, "learning_rate": 0.0009225490196078432, "loss": 0.4648, "step": 2871 }, { "epoch": 1.6044692737430166, "grad_norm": 0.6908128261566162, "learning_rate": 0.0009225210084033614, "loss": 0.3872, "step": 2872 }, { "epoch": 1.6050279329608939, "grad_norm": 0.6184394955635071, "learning_rate": 0.0009224929971988796, "loss": 0.3635, "step": 2873 }, { "epoch": 1.605586592178771, "grad_norm": 0.6684905290603638, "learning_rate": 0.0009224649859943978, "loss": 0.5747, "step": 2874 }, { "epoch": 1.606145251396648, "grad_norm": 0.6855420470237732, "learning_rate": 0.000922436974789916, "loss": 0.3914, "step": 2875 }, { "epoch": 1.606703910614525, "grad_norm": 0.7845286130905151, "learning_rate": 0.0009224089635854342, "loss": 0.4643, "step": 2876 }, { "epoch": 1.6072625698324021, "grad_norm": 0.7778080701828003, "learning_rate": 0.0009223809523809524, "loss": 0.508, "step": 2877 }, { "epoch": 1.6078212290502794, "grad_norm": 0.5270350575447083, "learning_rate": 0.0009223529411764706, "loss": 0.5029, "step": 2878 }, { "epoch": 1.6083798882681566, "grad_norm": 0.5413338541984558, "learning_rate": 0.0009223249299719888, "loss": 0.5288, "step": 2879 }, { "epoch": 1.6089385474860336, "grad_norm": 0.4262201488018036, "learning_rate": 0.000922296918767507, "loss": 0.3522, "step": 2880 }, { "epoch": 1.6094972067039106, "grad_norm": 1.0702208280563354, "learning_rate": 0.0009222689075630252, "loss": 0.463, "step": 2881 }, { "epoch": 1.6100558659217876, "grad_norm": 0.7859681248664856, "learning_rate": 0.0009222408963585434, "loss": 0.523, "step": 2882 }, { "epoch": 1.6106145251396649, "grad_norm": 0.5103206038475037, "learning_rate": 0.0009222128851540616, "loss": 0.3756, "step": 2883 }, { "epoch": 1.6111731843575419, "grad_norm": 0.5550638437271118, "learning_rate": 0.0009221848739495799, "loss": 0.4632, "step": 2884 }, { "epoch": 1.611731843575419, "grad_norm": 0.6195359826087952, "learning_rate": 0.0009221568627450981, "loss": 0.508, "step": 2885 }, { "epoch": 1.612290502793296, "grad_norm": 0.6476532220840454, "learning_rate": 0.0009221288515406164, "loss": 0.5528, "step": 2886 }, { "epoch": 1.6128491620111731, "grad_norm": 0.8326266407966614, "learning_rate": 0.0009221008403361345, "loss": 0.5712, "step": 2887 }, { "epoch": 1.6134078212290501, "grad_norm": 0.4361787736415863, "learning_rate": 0.0009220728291316527, "loss": 0.4299, "step": 2888 }, { "epoch": 1.6139664804469274, "grad_norm": 0.841769278049469, "learning_rate": 0.0009220448179271709, "loss": 0.8441, "step": 2889 }, { "epoch": 1.6145251396648046, "grad_norm": 0.6139340996742249, "learning_rate": 0.0009220168067226891, "loss": 0.4644, "step": 2890 }, { "epoch": 1.6150837988826816, "grad_norm": 0.8081483244895935, "learning_rate": 0.0009219887955182074, "loss": 0.6304, "step": 2891 }, { "epoch": 1.6156424581005586, "grad_norm": 1.714690923690796, "learning_rate": 0.0009219607843137255, "loss": 0.557, "step": 2892 }, { "epoch": 1.6162011173184356, "grad_norm": 1.9895784854888916, "learning_rate": 0.0009219327731092437, "loss": 0.4478, "step": 2893 }, { "epoch": 1.6167597765363129, "grad_norm": 0.5913851261138916, "learning_rate": 0.0009219047619047619, "loss": 0.4804, "step": 2894 }, { "epoch": 1.61731843575419, "grad_norm": 0.5215538740158081, "learning_rate": 0.0009218767507002801, "loss": 0.4256, "step": 2895 }, { "epoch": 1.617877094972067, "grad_norm": 0.5980809330940247, "learning_rate": 0.0009218487394957983, "loss": 0.5376, "step": 2896 }, { "epoch": 1.6184357541899441, "grad_norm": 0.5189204216003418, "learning_rate": 0.0009218207282913165, "loss": 0.4814, "step": 2897 }, { "epoch": 1.6189944134078211, "grad_norm": 0.6995025277137756, "learning_rate": 0.0009217927170868347, "loss": 0.469, "step": 2898 }, { "epoch": 1.6195530726256984, "grad_norm": 0.6404252052307129, "learning_rate": 0.0009217647058823529, "loss": 0.5438, "step": 2899 }, { "epoch": 1.6201117318435754, "grad_norm": 0.5329297184944153, "learning_rate": 0.0009217366946778711, "loss": 0.4656, "step": 2900 }, { "epoch": 1.6206703910614526, "grad_norm": 0.752088725566864, "learning_rate": 0.0009217086834733894, "loss": 0.4993, "step": 2901 }, { "epoch": 1.6212290502793296, "grad_norm": 1.3972595930099487, "learning_rate": 0.0009216806722689077, "loss": 0.4056, "step": 2902 }, { "epoch": 1.6217877094972066, "grad_norm": 1.312340497970581, "learning_rate": 0.0009216526610644258, "loss": 0.6291, "step": 2903 }, { "epoch": 1.6223463687150836, "grad_norm": 0.6492317318916321, "learning_rate": 0.000921624649859944, "loss": 0.5123, "step": 2904 }, { "epoch": 1.6229050279329609, "grad_norm": 0.7971457839012146, "learning_rate": 0.0009215966386554622, "loss": 0.5297, "step": 2905 }, { "epoch": 1.623463687150838, "grad_norm": 1.1285161972045898, "learning_rate": 0.0009215686274509804, "loss": 0.5025, "step": 2906 }, { "epoch": 1.6240223463687151, "grad_norm": 0.759039580821991, "learning_rate": 0.0009215406162464987, "loss": 0.4019, "step": 2907 }, { "epoch": 1.6245810055865921, "grad_norm": 0.7061336636543274, "learning_rate": 0.0009215126050420168, "loss": 0.4109, "step": 2908 }, { "epoch": 1.6251396648044691, "grad_norm": 0.6587031483650208, "learning_rate": 0.000921484593837535, "loss": 0.4681, "step": 2909 }, { "epoch": 1.6256983240223464, "grad_norm": 0.7848811149597168, "learning_rate": 0.0009214565826330532, "loss": 0.5185, "step": 2910 }, { "epoch": 1.6262569832402236, "grad_norm": 2.2897210121154785, "learning_rate": 0.0009214285714285714, "loss": 0.5306, "step": 2911 }, { "epoch": 1.6268156424581006, "grad_norm": 0.8609905242919922, "learning_rate": 0.0009214005602240897, "loss": 0.3421, "step": 2912 }, { "epoch": 1.6273743016759776, "grad_norm": 0.7585361003875732, "learning_rate": 0.0009213725490196078, "loss": 0.3992, "step": 2913 }, { "epoch": 1.6279329608938546, "grad_norm": 0.6667531132698059, "learning_rate": 0.000921344537815126, "loss": 0.4215, "step": 2914 }, { "epoch": 1.6284916201117319, "grad_norm": 0.5932943224906921, "learning_rate": 0.0009213165266106442, "loss": 0.375, "step": 2915 }, { "epoch": 1.6290502793296089, "grad_norm": 0.651016116142273, "learning_rate": 0.0009212885154061624, "loss": 0.5173, "step": 2916 }, { "epoch": 1.6296089385474861, "grad_norm": 0.7457938194274902, "learning_rate": 0.0009212605042016808, "loss": 0.7692, "step": 2917 }, { "epoch": 1.6301675977653631, "grad_norm": 0.6583865284919739, "learning_rate": 0.000921232492997199, "loss": 0.7213, "step": 2918 }, { "epoch": 1.6307262569832401, "grad_norm": 0.902870774269104, "learning_rate": 0.0009212044817927171, "loss": 0.4226, "step": 2919 }, { "epoch": 1.6312849162011172, "grad_norm": 0.6329991817474365, "learning_rate": 0.0009211764705882353, "loss": 0.379, "step": 2920 }, { "epoch": 1.6318435754189944, "grad_norm": 2.070338487625122, "learning_rate": 0.0009211484593837535, "loss": 0.4733, "step": 2921 }, { "epoch": 1.6324022346368716, "grad_norm": 0.6288262009620667, "learning_rate": 0.0009211204481792718, "loss": 0.4602, "step": 2922 }, { "epoch": 1.6329608938547486, "grad_norm": 0.7328474521636963, "learning_rate": 0.00092109243697479, "loss": 0.492, "step": 2923 }, { "epoch": 1.6335195530726256, "grad_norm": 1.1070021390914917, "learning_rate": 0.0009210644257703081, "loss": 0.4978, "step": 2924 }, { "epoch": 1.6340782122905027, "grad_norm": 0.955201268196106, "learning_rate": 0.0009210364145658263, "loss": 0.4917, "step": 2925 }, { "epoch": 1.6346368715083799, "grad_norm": 0.4593413174152374, "learning_rate": 0.0009210084033613445, "loss": 0.3933, "step": 2926 }, { "epoch": 1.6351955307262571, "grad_norm": 0.5655298829078674, "learning_rate": 0.0009209803921568628, "loss": 0.4398, "step": 2927 }, { "epoch": 1.6357541899441341, "grad_norm": 2.355804920196533, "learning_rate": 0.000920952380952381, "loss": 0.4983, "step": 2928 }, { "epoch": 1.6363128491620111, "grad_norm": 0.8567566275596619, "learning_rate": 0.0009209243697478991, "loss": 0.4985, "step": 2929 }, { "epoch": 1.6368715083798882, "grad_norm": 0.6869891881942749, "learning_rate": 0.0009208963585434173, "loss": 0.4451, "step": 2930 }, { "epoch": 1.6374301675977654, "grad_norm": 0.6745887398719788, "learning_rate": 0.0009208683473389355, "loss": 0.4912, "step": 2931 }, { "epoch": 1.6379888268156426, "grad_norm": 0.982644259929657, "learning_rate": 0.0009208403361344538, "loss": 0.456, "step": 2932 }, { "epoch": 1.6385474860335196, "grad_norm": 0.4889505207538605, "learning_rate": 0.000920812324929972, "loss": 0.4856, "step": 2933 }, { "epoch": 1.6391061452513966, "grad_norm": 0.6137354969978333, "learning_rate": 0.0009207843137254903, "loss": 0.5827, "step": 2934 }, { "epoch": 1.6396648044692737, "grad_norm": 0.7956241369247437, "learning_rate": 0.0009207563025210084, "loss": 0.4139, "step": 2935 }, { "epoch": 1.6402234636871509, "grad_norm": 0.8453567624092102, "learning_rate": 0.0009207282913165266, "loss": 0.5177, "step": 2936 }, { "epoch": 1.640782122905028, "grad_norm": 0.6705008745193481, "learning_rate": 0.0009207002801120449, "loss": 0.6625, "step": 2937 }, { "epoch": 1.6413407821229051, "grad_norm": 0.6575820446014404, "learning_rate": 0.0009206722689075631, "loss": 0.5216, "step": 2938 }, { "epoch": 1.6418994413407821, "grad_norm": 0.5475789308547974, "learning_rate": 0.0009206442577030813, "loss": 0.431, "step": 2939 }, { "epoch": 1.6424581005586592, "grad_norm": 0.4481825530529022, "learning_rate": 0.0009206162464985994, "loss": 0.3985, "step": 2940 }, { "epoch": 1.6430167597765362, "grad_norm": 0.4677400588989258, "learning_rate": 0.0009205882352941176, "loss": 0.4608, "step": 2941 }, { "epoch": 1.6435754189944134, "grad_norm": 0.5366738438606262, "learning_rate": 0.0009205602240896359, "loss": 0.5212, "step": 2942 }, { "epoch": 1.6441340782122906, "grad_norm": 1.7023563385009766, "learning_rate": 0.0009205322128851541, "loss": 0.4121, "step": 2943 }, { "epoch": 1.6446927374301676, "grad_norm": 0.5010234713554382, "learning_rate": 0.0009205042016806723, "loss": 0.4885, "step": 2944 }, { "epoch": 1.6452513966480447, "grad_norm": 0.538973867893219, "learning_rate": 0.0009204761904761904, "loss": 0.5003, "step": 2945 }, { "epoch": 1.6458100558659217, "grad_norm": 0.5855522751808167, "learning_rate": 0.0009204481792717086, "loss": 0.5985, "step": 2946 }, { "epoch": 1.646368715083799, "grad_norm": 0.7652376890182495, "learning_rate": 0.0009204201680672269, "loss": 0.4511, "step": 2947 }, { "epoch": 1.6469273743016761, "grad_norm": 1.4848134517669678, "learning_rate": 0.0009203921568627451, "loss": 0.4837, "step": 2948 }, { "epoch": 1.6474860335195531, "grad_norm": 0.8052696585655212, "learning_rate": 0.0009203641456582633, "loss": 0.4961, "step": 2949 }, { "epoch": 1.6480446927374302, "grad_norm": 0.6934226751327515, "learning_rate": 0.0009203361344537816, "loss": 0.4663, "step": 2950 }, { "epoch": 1.6486033519553072, "grad_norm": 1.0082859992980957, "learning_rate": 0.0009203081232492997, "loss": 0.6173, "step": 2951 }, { "epoch": 1.6491620111731844, "grad_norm": 0.584140419960022, "learning_rate": 0.000920280112044818, "loss": 0.5377, "step": 2952 }, { "epoch": 1.6497206703910614, "grad_norm": 3.865908622741699, "learning_rate": 0.0009202521008403362, "loss": 0.4542, "step": 2953 }, { "epoch": 1.6502793296089386, "grad_norm": 0.9439364075660706, "learning_rate": 0.0009202240896358544, "loss": 0.5125, "step": 2954 }, { "epoch": 1.6508379888268156, "grad_norm": 0.5345238447189331, "learning_rate": 0.0009201960784313726, "loss": 0.5079, "step": 2955 }, { "epoch": 1.6513966480446927, "grad_norm": 1.526811957359314, "learning_rate": 0.0009201680672268907, "loss": 0.4813, "step": 2956 }, { "epoch": 1.6519553072625697, "grad_norm": 0.563715398311615, "learning_rate": 0.000920140056022409, "loss": 0.5363, "step": 2957 }, { "epoch": 1.652513966480447, "grad_norm": 0.7194618582725525, "learning_rate": 0.0009201120448179272, "loss": 0.5975, "step": 2958 }, { "epoch": 1.6530726256983241, "grad_norm": 0.6104727983474731, "learning_rate": 0.0009200840336134454, "loss": 0.4158, "step": 2959 }, { "epoch": 1.6536312849162011, "grad_norm": 0.5742731094360352, "learning_rate": 0.0009200560224089636, "loss": 0.5462, "step": 2960 }, { "epoch": 1.6541899441340782, "grad_norm": 0.6003073453903198, "learning_rate": 0.0009200280112044817, "loss": 0.4897, "step": 2961 }, { "epoch": 1.6547486033519552, "grad_norm": 0.7050849795341492, "learning_rate": 0.00092, "loss": 0.48, "step": 2962 }, { "epoch": 1.6553072625698324, "grad_norm": 0.6076145172119141, "learning_rate": 0.0009199719887955182, "loss": 0.3207, "step": 2963 }, { "epoch": 1.6558659217877096, "grad_norm": 0.9574232697486877, "learning_rate": 0.0009199439775910364, "loss": 0.6019, "step": 2964 }, { "epoch": 1.6564245810055866, "grad_norm": 0.661709189414978, "learning_rate": 0.0009199159663865546, "loss": 0.4523, "step": 2965 }, { "epoch": 1.6569832402234637, "grad_norm": 0.5521332621574402, "learning_rate": 0.0009198879551820729, "loss": 0.5107, "step": 2966 }, { "epoch": 1.6575418994413407, "grad_norm": 2.8413784503936768, "learning_rate": 0.0009198599439775911, "loss": 0.615, "step": 2967 }, { "epoch": 1.658100558659218, "grad_norm": 0.6800103187561035, "learning_rate": 0.0009198319327731093, "loss": 0.5227, "step": 2968 }, { "epoch": 1.6586592178770951, "grad_norm": 0.5890018939971924, "learning_rate": 0.0009198039215686275, "loss": 0.4577, "step": 2969 }, { "epoch": 1.6592178770949721, "grad_norm": 0.6618414521217346, "learning_rate": 0.0009197759103641457, "loss": 0.4643, "step": 2970 }, { "epoch": 1.6597765363128492, "grad_norm": 0.8312742114067078, "learning_rate": 0.0009197478991596639, "loss": 0.3716, "step": 2971 }, { "epoch": 1.6603351955307262, "grad_norm": 0.8219579458236694, "learning_rate": 0.0009197198879551821, "loss": 0.5036, "step": 2972 }, { "epoch": 1.6608938547486034, "grad_norm": 0.5063742399215698, "learning_rate": 0.0009196918767507003, "loss": 0.5249, "step": 2973 }, { "epoch": 1.6614525139664804, "grad_norm": 0.767436146736145, "learning_rate": 0.0009196638655462185, "loss": 0.4814, "step": 2974 }, { "epoch": 1.6620111731843576, "grad_norm": 0.6326186656951904, "learning_rate": 0.0009196358543417367, "loss": 0.5397, "step": 2975 }, { "epoch": 1.6625698324022347, "grad_norm": 0.5269055962562561, "learning_rate": 0.0009196078431372549, "loss": 0.462, "step": 2976 }, { "epoch": 1.6631284916201117, "grad_norm": 1.0455354452133179, "learning_rate": 0.0009195798319327731, "loss": 0.4637, "step": 2977 }, { "epoch": 1.6636871508379887, "grad_norm": 0.49353930354118347, "learning_rate": 0.0009195518207282913, "loss": 0.4772, "step": 2978 }, { "epoch": 1.664245810055866, "grad_norm": 3.803060531616211, "learning_rate": 0.0009195238095238095, "loss": 0.4495, "step": 2979 }, { "epoch": 1.6648044692737431, "grad_norm": 0.6462464928627014, "learning_rate": 0.0009194957983193277, "loss": 0.4297, "step": 2980 }, { "epoch": 1.6653631284916202, "grad_norm": 0.6319571137428284, "learning_rate": 0.0009194677871148459, "loss": 0.4821, "step": 2981 }, { "epoch": 1.6659217877094972, "grad_norm": 0.488567978143692, "learning_rate": 0.0009194397759103643, "loss": 0.4366, "step": 2982 }, { "epoch": 1.6664804469273742, "grad_norm": 0.5684471726417542, "learning_rate": 0.0009194117647058824, "loss": 0.4164, "step": 2983 }, { "epoch": 1.6670391061452514, "grad_norm": 0.7592557668685913, "learning_rate": 0.0009193837535014006, "loss": 0.4645, "step": 2984 }, { "epoch": 1.6675977653631286, "grad_norm": 0.6510218381881714, "learning_rate": 0.0009193557422969188, "loss": 0.7281, "step": 2985 }, { "epoch": 1.6681564245810057, "grad_norm": 0.6027083396911621, "learning_rate": 0.000919327731092437, "loss": 0.4817, "step": 2986 }, { "epoch": 1.6687150837988827, "grad_norm": 2.120337963104248, "learning_rate": 0.0009192997198879553, "loss": 0.5205, "step": 2987 }, { "epoch": 1.6692737430167597, "grad_norm": 0.4455220103263855, "learning_rate": 0.0009192717086834734, "loss": 0.4645, "step": 2988 }, { "epoch": 1.669832402234637, "grad_norm": 0.5661830306053162, "learning_rate": 0.0009192436974789916, "loss": 0.468, "step": 2989 }, { "epoch": 1.670391061452514, "grad_norm": 0.59552001953125, "learning_rate": 0.0009192156862745098, "loss": 0.5502, "step": 2990 }, { "epoch": 1.6709497206703912, "grad_norm": 1.0381678342819214, "learning_rate": 0.000919187675070028, "loss": 0.5192, "step": 2991 }, { "epoch": 1.6715083798882682, "grad_norm": 0.7575311660766602, "learning_rate": 0.0009191596638655463, "loss": 0.4759, "step": 2992 }, { "epoch": 1.6720670391061452, "grad_norm": 0.8445690274238586, "learning_rate": 0.0009191316526610644, "loss": 0.6034, "step": 2993 }, { "epoch": 1.6726256983240222, "grad_norm": 0.5338285565376282, "learning_rate": 0.0009191036414565826, "loss": 0.4491, "step": 2994 }, { "epoch": 1.6731843575418994, "grad_norm": 0.4428769052028656, "learning_rate": 0.0009190756302521008, "loss": 0.4677, "step": 2995 }, { "epoch": 1.6737430167597767, "grad_norm": 0.5209064483642578, "learning_rate": 0.000919047619047619, "loss": 0.4895, "step": 2996 }, { "epoch": 1.6743016759776537, "grad_norm": 0.43626096844673157, "learning_rate": 0.0009190196078431373, "loss": 0.4419, "step": 2997 }, { "epoch": 1.6748603351955307, "grad_norm": 1.7064591646194458, "learning_rate": 0.0009189915966386556, "loss": 0.7224, "step": 2998 }, { "epoch": 1.6754189944134077, "grad_norm": 1.0908819437026978, "learning_rate": 0.0009189635854341736, "loss": 0.5763, "step": 2999 }, { "epoch": 1.675977653631285, "grad_norm": 0.6949249505996704, "learning_rate": 0.0009189355742296919, "loss": 0.6624, "step": 3000 }, { "epoch": 1.675977653631285, "eval_cer": 0.09953302128681003, "eval_loss": 0.36958059668540955, "eval_runtime": 55.7861, "eval_samples_per_second": 81.346, "eval_steps_per_second": 5.091, "eval_wer": 0.39183263037345323, "step": 3000 }, { "epoch": 1.6765363128491622, "grad_norm": 1.1103029251098633, "learning_rate": 0.0009189075630252101, "loss": 0.4821, "step": 3001 }, { "epoch": 1.6770949720670392, "grad_norm": 1.1582223176956177, "learning_rate": 0.0009188795518207284, "loss": 0.5081, "step": 3002 }, { "epoch": 1.6776536312849162, "grad_norm": 1.3446568250656128, "learning_rate": 0.0009188515406162466, "loss": 0.4201, "step": 3003 }, { "epoch": 1.6782122905027932, "grad_norm": 1.1600929498672485, "learning_rate": 0.0009188235294117647, "loss": 0.4844, "step": 3004 }, { "epoch": 1.6787709497206704, "grad_norm": 0.6011174917221069, "learning_rate": 0.0009187955182072829, "loss": 0.4485, "step": 3005 }, { "epoch": 1.6793296089385474, "grad_norm": 0.8195842504501343, "learning_rate": 0.0009187675070028011, "loss": 0.6858, "step": 3006 }, { "epoch": 1.6798882681564247, "grad_norm": 0.6972982287406921, "learning_rate": 0.0009187394957983194, "loss": 0.3695, "step": 3007 }, { "epoch": 1.6804469273743017, "grad_norm": 0.7719422578811646, "learning_rate": 0.0009187114845938376, "loss": 0.4631, "step": 3008 }, { "epoch": 1.6810055865921787, "grad_norm": 0.5306065678596497, "learning_rate": 0.0009186834733893557, "loss": 0.3978, "step": 3009 }, { "epoch": 1.6815642458100557, "grad_norm": 1.917561650276184, "learning_rate": 0.0009186554621848739, "loss": 0.6242, "step": 3010 }, { "epoch": 1.682122905027933, "grad_norm": 1.2080435752868652, "learning_rate": 0.0009186274509803921, "loss": 0.5714, "step": 3011 }, { "epoch": 1.6826815642458102, "grad_norm": 1.7968887090682983, "learning_rate": 0.0009185994397759104, "loss": 0.4285, "step": 3012 }, { "epoch": 1.6832402234636872, "grad_norm": 0.4624374806880951, "learning_rate": 0.0009185714285714286, "loss": 0.5075, "step": 3013 }, { "epoch": 1.6837988826815642, "grad_norm": 0.5181758999824524, "learning_rate": 0.0009185434173669468, "loss": 0.3951, "step": 3014 }, { "epoch": 1.6843575418994412, "grad_norm": 0.5095945596694946, "learning_rate": 0.0009185154061624649, "loss": 0.4684, "step": 3015 }, { "epoch": 1.6849162011173184, "grad_norm": 0.5722306966781616, "learning_rate": 0.0009184873949579832, "loss": 0.4164, "step": 3016 }, { "epoch": 1.6854748603351957, "grad_norm": 0.9484612345695496, "learning_rate": 0.0009184593837535015, "loss": 0.5633, "step": 3017 }, { "epoch": 1.6860335195530727, "grad_norm": 0.46368923783302307, "learning_rate": 0.0009184313725490197, "loss": 0.4649, "step": 3018 }, { "epoch": 1.6865921787709497, "grad_norm": 0.7870525121688843, "learning_rate": 0.0009184033613445379, "loss": 0.4788, "step": 3019 }, { "epoch": 1.6871508379888267, "grad_norm": 1.2743964195251465, "learning_rate": 0.000918375350140056, "loss": 0.3967, "step": 3020 }, { "epoch": 1.687709497206704, "grad_norm": 1.8453387022018433, "learning_rate": 0.0009183473389355742, "loss": 0.4935, "step": 3021 }, { "epoch": 1.6882681564245812, "grad_norm": 4.191108226776123, "learning_rate": 0.0009183193277310925, "loss": 0.6462, "step": 3022 }, { "epoch": 1.6888268156424582, "grad_norm": 0.9374301433563232, "learning_rate": 0.0009182913165266107, "loss": 0.7021, "step": 3023 }, { "epoch": 1.6893854748603352, "grad_norm": 0.5467361211776733, "learning_rate": 0.0009182633053221289, "loss": 0.5279, "step": 3024 }, { "epoch": 1.6899441340782122, "grad_norm": 0.6957757472991943, "learning_rate": 0.000918235294117647, "loss": 0.6554, "step": 3025 }, { "epoch": 1.6905027932960894, "grad_norm": 0.9166900515556335, "learning_rate": 0.0009182072829131652, "loss": 0.5043, "step": 3026 }, { "epoch": 1.6910614525139664, "grad_norm": 0.8530071377754211, "learning_rate": 0.0009181792717086835, "loss": 0.4453, "step": 3027 }, { "epoch": 1.6916201117318437, "grad_norm": 0.5116307139396667, "learning_rate": 0.0009181512605042017, "loss": 0.58, "step": 3028 }, { "epoch": 1.6921787709497207, "grad_norm": 0.6031655073165894, "learning_rate": 0.0009181232492997199, "loss": 0.4278, "step": 3029 }, { "epoch": 1.6927374301675977, "grad_norm": 3.58233642578125, "learning_rate": 0.0009180952380952381, "loss": 0.4304, "step": 3030 }, { "epoch": 1.6932960893854747, "grad_norm": 0.4411269724369049, "learning_rate": 0.0009180672268907562, "loss": 0.4872, "step": 3031 }, { "epoch": 1.693854748603352, "grad_norm": 0.7215275168418884, "learning_rate": 0.0009180392156862746, "loss": 0.5153, "step": 3032 }, { "epoch": 1.6944134078212292, "grad_norm": 0.5682253837585449, "learning_rate": 0.0009180112044817928, "loss": 0.4766, "step": 3033 }, { "epoch": 1.6949720670391062, "grad_norm": 0.5677854418754578, "learning_rate": 0.000917983193277311, "loss": 0.5427, "step": 3034 }, { "epoch": 1.6955307262569832, "grad_norm": 0.6147822141647339, "learning_rate": 0.0009179551820728292, "loss": 0.5426, "step": 3035 }, { "epoch": 1.6960893854748602, "grad_norm": 0.4158145487308502, "learning_rate": 0.0009179271708683473, "loss": 0.3587, "step": 3036 }, { "epoch": 1.6966480446927374, "grad_norm": 0.7393187880516052, "learning_rate": 0.0009178991596638656, "loss": 0.5026, "step": 3037 }, { "epoch": 1.6972067039106147, "grad_norm": 0.8531793355941772, "learning_rate": 0.0009178711484593838, "loss": 0.5205, "step": 3038 }, { "epoch": 1.6977653631284917, "grad_norm": 0.47491392493247986, "learning_rate": 0.000917843137254902, "loss": 0.5226, "step": 3039 }, { "epoch": 1.6983240223463687, "grad_norm": 0.9626147150993347, "learning_rate": 0.0009178151260504202, "loss": 0.7377, "step": 3040 }, { "epoch": 1.6988826815642457, "grad_norm": 0.6460261940956116, "learning_rate": 0.0009177871148459383, "loss": 0.4547, "step": 3041 }, { "epoch": 1.699441340782123, "grad_norm": 1.0530688762664795, "learning_rate": 0.0009177591036414566, "loss": 0.6088, "step": 3042 }, { "epoch": 1.7, "grad_norm": 0.496269166469574, "learning_rate": 0.0009177310924369748, "loss": 0.4702, "step": 3043 }, { "epoch": 1.7005586592178772, "grad_norm": 0.6647741198539734, "learning_rate": 0.000917703081232493, "loss": 0.4477, "step": 3044 }, { "epoch": 1.7011173184357542, "grad_norm": 0.5344812273979187, "learning_rate": 0.0009176750700280112, "loss": 0.4469, "step": 3045 }, { "epoch": 1.7016759776536312, "grad_norm": 0.5009032487869263, "learning_rate": 0.0009176470588235294, "loss": 0.6159, "step": 3046 }, { "epoch": 1.7022346368715082, "grad_norm": 0.6459294557571411, "learning_rate": 0.0009176190476190476, "loss": 0.5017, "step": 3047 }, { "epoch": 1.7027932960893855, "grad_norm": 0.8312904834747314, "learning_rate": 0.0009175910364145659, "loss": 0.7266, "step": 3048 }, { "epoch": 1.7033519553072627, "grad_norm": 1.9589706659317017, "learning_rate": 0.0009175630252100841, "loss": 0.5248, "step": 3049 }, { "epoch": 1.7039106145251397, "grad_norm": 1.463280200958252, "learning_rate": 0.0009175350140056023, "loss": 0.501, "step": 3050 }, { "epoch": 1.7044692737430167, "grad_norm": 1.3849666118621826, "learning_rate": 0.0009175070028011205, "loss": 0.4167, "step": 3051 }, { "epoch": 1.7050279329608937, "grad_norm": 1.0459672212600708, "learning_rate": 0.0009174789915966387, "loss": 0.4104, "step": 3052 }, { "epoch": 1.705586592178771, "grad_norm": 2.0638487339019775, "learning_rate": 0.0009174509803921569, "loss": 0.4553, "step": 3053 }, { "epoch": 1.7061452513966482, "grad_norm": 0.4697238504886627, "learning_rate": 0.0009174229691876751, "loss": 0.473, "step": 3054 }, { "epoch": 1.7067039106145252, "grad_norm": 0.3627098500728607, "learning_rate": 0.0009173949579831933, "loss": 0.4106, "step": 3055 }, { "epoch": 1.7072625698324022, "grad_norm": 0.8281555771827698, "learning_rate": 0.0009173669467787115, "loss": 0.4971, "step": 3056 }, { "epoch": 1.7078212290502792, "grad_norm": 0.600597620010376, "learning_rate": 0.0009173389355742298, "loss": 0.5494, "step": 3057 }, { "epoch": 1.7083798882681565, "grad_norm": 0.8619397878646851, "learning_rate": 0.0009173109243697479, "loss": 0.4277, "step": 3058 }, { "epoch": 1.7089385474860335, "grad_norm": 0.4999079704284668, "learning_rate": 0.0009172829131652661, "loss": 0.4587, "step": 3059 }, { "epoch": 1.7094972067039107, "grad_norm": 0.6281794309616089, "learning_rate": 0.0009172549019607843, "loss": 0.6208, "step": 3060 }, { "epoch": 1.7100558659217877, "grad_norm": 0.695807158946991, "learning_rate": 0.0009172268907563025, "loss": 0.4673, "step": 3061 }, { "epoch": 1.7106145251396647, "grad_norm": 3.4741170406341553, "learning_rate": 0.0009171988795518208, "loss": 0.4197, "step": 3062 }, { "epoch": 1.711173184357542, "grad_norm": 0.6798413991928101, "learning_rate": 0.0009171708683473389, "loss": 0.4741, "step": 3063 }, { "epoch": 1.711731843575419, "grad_norm": 0.5536423325538635, "learning_rate": 0.0009171428571428571, "loss": 0.4942, "step": 3064 }, { "epoch": 1.7122905027932962, "grad_norm": 0.8299341201782227, "learning_rate": 0.0009171148459383754, "loss": 0.5756, "step": 3065 }, { "epoch": 1.7128491620111732, "grad_norm": 0.6157573461532593, "learning_rate": 0.0009170868347338936, "loss": 0.4403, "step": 3066 }, { "epoch": 1.7134078212290502, "grad_norm": 0.5600780248641968, "learning_rate": 0.0009170588235294119, "loss": 0.4841, "step": 3067 }, { "epoch": 1.7139664804469272, "grad_norm": 0.5978518128395081, "learning_rate": 0.00091703081232493, "loss": 0.6111, "step": 3068 }, { "epoch": 1.7145251396648045, "grad_norm": 0.7723780274391174, "learning_rate": 0.0009170028011204482, "loss": 0.61, "step": 3069 }, { "epoch": 1.7150837988826817, "grad_norm": 0.49161583185195923, "learning_rate": 0.0009169747899159664, "loss": 0.5156, "step": 3070 }, { "epoch": 1.7156424581005587, "grad_norm": 0.5228260159492493, "learning_rate": 0.0009169467787114846, "loss": 0.4738, "step": 3071 }, { "epoch": 1.7162011173184357, "grad_norm": 0.4669853448867798, "learning_rate": 0.0009169187675070029, "loss": 0.3075, "step": 3072 }, { "epoch": 1.7167597765363127, "grad_norm": 0.6266192197799683, "learning_rate": 0.0009168907563025211, "loss": 0.4909, "step": 3073 }, { "epoch": 1.71731843575419, "grad_norm": 0.7092539668083191, "learning_rate": 0.0009168627450980392, "loss": 0.5391, "step": 3074 }, { "epoch": 1.7178770949720672, "grad_norm": 0.5469465255737305, "learning_rate": 0.0009168347338935574, "loss": 0.5518, "step": 3075 }, { "epoch": 1.7184357541899442, "grad_norm": 0.4958898723125458, "learning_rate": 0.0009168067226890756, "loss": 0.5336, "step": 3076 }, { "epoch": 1.7189944134078212, "grad_norm": 1.5030094385147095, "learning_rate": 0.0009167787114845939, "loss": 0.4746, "step": 3077 }, { "epoch": 1.7195530726256982, "grad_norm": 0.9934349656105042, "learning_rate": 0.0009167507002801121, "loss": 0.4497, "step": 3078 }, { "epoch": 1.7201117318435755, "grad_norm": 1.688143253326416, "learning_rate": 0.0009167226890756302, "loss": 0.5267, "step": 3079 }, { "epoch": 1.7206703910614525, "grad_norm": 1.1756359338760376, "learning_rate": 0.0009166946778711484, "loss": 0.3613, "step": 3080 }, { "epoch": 1.7212290502793297, "grad_norm": 0.740430474281311, "learning_rate": 0.0009166666666666666, "loss": 0.5577, "step": 3081 }, { "epoch": 1.7217877094972067, "grad_norm": 0.6306280493736267, "learning_rate": 0.000916638655462185, "loss": 0.4266, "step": 3082 }, { "epoch": 1.7223463687150837, "grad_norm": 0.4091308116912842, "learning_rate": 0.0009166106442577032, "loss": 0.4428, "step": 3083 }, { "epoch": 1.7229050279329607, "grad_norm": 1.7345776557922363, "learning_rate": 0.0009165826330532213, "loss": 0.5564, "step": 3084 }, { "epoch": 1.723463687150838, "grad_norm": 0.6802529692649841, "learning_rate": 0.0009165546218487395, "loss": 0.3493, "step": 3085 }, { "epoch": 1.7240223463687152, "grad_norm": 0.972693681716919, "learning_rate": 0.0009165266106442577, "loss": 0.5288, "step": 3086 }, { "epoch": 1.7245810055865922, "grad_norm": 0.9185463786125183, "learning_rate": 0.000916498599439776, "loss": 0.4222, "step": 3087 }, { "epoch": 1.7251396648044692, "grad_norm": 0.4764520525932312, "learning_rate": 0.0009164705882352942, "loss": 0.4483, "step": 3088 }, { "epoch": 1.7256983240223462, "grad_norm": 1.3819745779037476, "learning_rate": 0.0009164425770308124, "loss": 0.5322, "step": 3089 }, { "epoch": 1.7262569832402235, "grad_norm": 0.6650181412696838, "learning_rate": 0.0009164145658263305, "loss": 0.5584, "step": 3090 }, { "epoch": 1.7268156424581007, "grad_norm": 0.44552111625671387, "learning_rate": 0.0009163865546218487, "loss": 0.413, "step": 3091 }, { "epoch": 1.7273743016759777, "grad_norm": 0.6607682108879089, "learning_rate": 0.000916358543417367, "loss": 0.4661, "step": 3092 }, { "epoch": 1.7279329608938547, "grad_norm": 0.6770963668823242, "learning_rate": 0.0009163305322128852, "loss": 0.6304, "step": 3093 }, { "epoch": 1.7284916201117317, "grad_norm": 0.601143479347229, "learning_rate": 0.0009163025210084034, "loss": 0.4934, "step": 3094 }, { "epoch": 1.729050279329609, "grad_norm": 0.5488032698631287, "learning_rate": 0.0009162745098039215, "loss": 0.4922, "step": 3095 }, { "epoch": 1.729608938547486, "grad_norm": 0.8162834644317627, "learning_rate": 0.0009162464985994397, "loss": 0.5126, "step": 3096 }, { "epoch": 1.7301675977653632, "grad_norm": 1.1971279382705688, "learning_rate": 0.000916218487394958, "loss": 0.511, "step": 3097 }, { "epoch": 1.7307262569832402, "grad_norm": 1.797239065170288, "learning_rate": 0.0009161904761904763, "loss": 0.5462, "step": 3098 }, { "epoch": 1.7312849162011172, "grad_norm": 0.5142049193382263, "learning_rate": 0.0009161624649859945, "loss": 0.4754, "step": 3099 }, { "epoch": 1.7318435754189943, "grad_norm": 0.5536891222000122, "learning_rate": 0.0009161344537815126, "loss": 0.4698, "step": 3100 }, { "epoch": 1.7324022346368715, "grad_norm": 1.0244752168655396, "learning_rate": 0.0009161064425770308, "loss": 0.3972, "step": 3101 }, { "epoch": 1.7329608938547487, "grad_norm": 0.6083419919013977, "learning_rate": 0.0009160784313725491, "loss": 0.4494, "step": 3102 }, { "epoch": 1.7335195530726257, "grad_norm": 1.548628807067871, "learning_rate": 0.0009160504201680673, "loss": 0.5115, "step": 3103 }, { "epoch": 1.7340782122905027, "grad_norm": 1.2171717882156372, "learning_rate": 0.0009160224089635855, "loss": 0.5723, "step": 3104 }, { "epoch": 1.7346368715083798, "grad_norm": 0.6739428043365479, "learning_rate": 0.0009159943977591037, "loss": 0.4302, "step": 3105 }, { "epoch": 1.735195530726257, "grad_norm": 0.5084200501441956, "learning_rate": 0.0009159663865546218, "loss": 0.4626, "step": 3106 }, { "epoch": 1.7357541899441342, "grad_norm": 0.6711824536323547, "learning_rate": 0.0009159383753501401, "loss": 0.4492, "step": 3107 }, { "epoch": 1.7363128491620112, "grad_norm": 7.0675458908081055, "learning_rate": 0.0009159103641456583, "loss": 0.4145, "step": 3108 }, { "epoch": 1.7368715083798882, "grad_norm": 0.5331434011459351, "learning_rate": 0.0009158823529411765, "loss": 0.5088, "step": 3109 }, { "epoch": 1.7374301675977653, "grad_norm": 0.5827344059944153, "learning_rate": 0.0009158543417366947, "loss": 0.3838, "step": 3110 }, { "epoch": 1.7379888268156425, "grad_norm": 0.9303395748138428, "learning_rate": 0.0009158263305322128, "loss": 0.6908, "step": 3111 }, { "epoch": 1.7385474860335197, "grad_norm": 0.6547474265098572, "learning_rate": 0.0009157983193277311, "loss": 0.562, "step": 3112 }, { "epoch": 1.7391061452513967, "grad_norm": 0.5259217619895935, "learning_rate": 0.0009157703081232493, "loss": 0.4964, "step": 3113 }, { "epoch": 1.7396648044692737, "grad_norm": 0.547927737236023, "learning_rate": 0.0009157422969187676, "loss": 0.529, "step": 3114 }, { "epoch": 1.7402234636871508, "grad_norm": 0.5204383134841919, "learning_rate": 0.0009157142857142858, "loss": 0.4458, "step": 3115 }, { "epoch": 1.740782122905028, "grad_norm": 0.6744920611381531, "learning_rate": 0.0009156862745098039, "loss": 0.4247, "step": 3116 }, { "epoch": 1.741340782122905, "grad_norm": 2.0540030002593994, "learning_rate": 0.0009156582633053222, "loss": 0.5017, "step": 3117 }, { "epoch": 1.7418994413407822, "grad_norm": 1.4944185018539429, "learning_rate": 0.0009156302521008404, "loss": 0.7158, "step": 3118 }, { "epoch": 1.7424581005586592, "grad_norm": 0.5214855074882507, "learning_rate": 0.0009156022408963586, "loss": 0.5148, "step": 3119 }, { "epoch": 1.7430167597765363, "grad_norm": 3.0218491554260254, "learning_rate": 0.0009155742296918768, "loss": 0.5922, "step": 3120 }, { "epoch": 1.7435754189944133, "grad_norm": 1.070124864578247, "learning_rate": 0.000915546218487395, "loss": 0.5451, "step": 3121 }, { "epoch": 1.7441340782122905, "grad_norm": 1.2633320093154907, "learning_rate": 0.0009155182072829131, "loss": 0.5398, "step": 3122 }, { "epoch": 1.7446927374301677, "grad_norm": 0.739230215549469, "learning_rate": 0.0009154901960784314, "loss": 0.4518, "step": 3123 }, { "epoch": 1.7452513966480447, "grad_norm": 0.8199949264526367, "learning_rate": 0.0009154621848739496, "loss": 0.5391, "step": 3124 }, { "epoch": 1.7458100558659218, "grad_norm": 1.691592812538147, "learning_rate": 0.0009154341736694678, "loss": 0.4119, "step": 3125 }, { "epoch": 1.7463687150837988, "grad_norm": 0.8518623113632202, "learning_rate": 0.000915406162464986, "loss": 0.4361, "step": 3126 }, { "epoch": 1.746927374301676, "grad_norm": 0.9469772577285767, "learning_rate": 0.0009153781512605041, "loss": 0.469, "step": 3127 }, { "epoch": 1.7474860335195532, "grad_norm": 0.5455501675605774, "learning_rate": 0.0009153501400560224, "loss": 0.4906, "step": 3128 }, { "epoch": 1.7480446927374302, "grad_norm": 0.39891985058784485, "learning_rate": 0.0009153221288515406, "loss": 0.4374, "step": 3129 }, { "epoch": 1.7486033519553073, "grad_norm": 1.063988447189331, "learning_rate": 0.0009152941176470589, "loss": 0.5026, "step": 3130 }, { "epoch": 1.7491620111731843, "grad_norm": 0.5377022624015808, "learning_rate": 0.0009152661064425771, "loss": 0.4527, "step": 3131 }, { "epoch": 1.7497206703910615, "grad_norm": 0.9185256958007812, "learning_rate": 0.0009152380952380952, "loss": 0.6537, "step": 3132 }, { "epoch": 1.7502793296089385, "grad_norm": 1.6892807483673096, "learning_rate": 0.0009152100840336135, "loss": 0.5244, "step": 3133 }, { "epoch": 1.7508379888268157, "grad_norm": 0.6731375455856323, "learning_rate": 0.0009151820728291317, "loss": 0.5268, "step": 3134 }, { "epoch": 1.7513966480446927, "grad_norm": 0.7444874048233032, "learning_rate": 0.0009151540616246499, "loss": 0.5966, "step": 3135 }, { "epoch": 1.7519553072625698, "grad_norm": 0.6658584475517273, "learning_rate": 0.0009151260504201681, "loss": 0.4287, "step": 3136 }, { "epoch": 1.7525139664804468, "grad_norm": 0.5693717002868652, "learning_rate": 0.0009150980392156863, "loss": 0.4879, "step": 3137 }, { "epoch": 1.753072625698324, "grad_norm": 0.857803225517273, "learning_rate": 0.0009150700280112045, "loss": 0.446, "step": 3138 }, { "epoch": 1.7536312849162012, "grad_norm": 0.8032236695289612, "learning_rate": 0.0009150420168067227, "loss": 0.3945, "step": 3139 }, { "epoch": 1.7541899441340782, "grad_norm": 0.6887109875679016, "learning_rate": 0.0009150140056022409, "loss": 0.6698, "step": 3140 }, { "epoch": 1.7547486033519553, "grad_norm": 0.9623390436172485, "learning_rate": 0.0009149859943977591, "loss": 0.4731, "step": 3141 }, { "epoch": 1.7553072625698323, "grad_norm": 0.572283148765564, "learning_rate": 0.0009149579831932773, "loss": 0.457, "step": 3142 }, { "epoch": 1.7558659217877095, "grad_norm": 0.5509768128395081, "learning_rate": 0.0009149299719887955, "loss": 0.4345, "step": 3143 }, { "epoch": 1.7564245810055867, "grad_norm": 2.6843929290771484, "learning_rate": 0.0009149019607843137, "loss": 0.5544, "step": 3144 }, { "epoch": 1.7569832402234637, "grad_norm": 1.8658711910247803, "learning_rate": 0.0009148739495798319, "loss": 0.5912, "step": 3145 }, { "epoch": 1.7575418994413408, "grad_norm": 1.2871838808059692, "learning_rate": 0.0009148459383753501, "loss": 0.5554, "step": 3146 }, { "epoch": 1.7581005586592178, "grad_norm": 0.5986303091049194, "learning_rate": 0.0009148179271708684, "loss": 0.4511, "step": 3147 }, { "epoch": 1.758659217877095, "grad_norm": 0.5408793687820435, "learning_rate": 0.0009147899159663866, "loss": 0.4302, "step": 3148 }, { "epoch": 1.759217877094972, "grad_norm": 1.0741839408874512, "learning_rate": 0.0009147619047619048, "loss": 0.6019, "step": 3149 }, { "epoch": 1.7597765363128492, "grad_norm": 0.5812860727310181, "learning_rate": 0.000914733893557423, "loss": 0.5739, "step": 3150 }, { "epoch": 1.7603351955307263, "grad_norm": 0.5961041450500488, "learning_rate": 0.0009147058823529412, "loss": 0.4712, "step": 3151 }, { "epoch": 1.7608938547486033, "grad_norm": 0.8099181652069092, "learning_rate": 0.0009146778711484594, "loss": 0.5246, "step": 3152 }, { "epoch": 1.7614525139664803, "grad_norm": 0.5553327202796936, "learning_rate": 0.0009146498599439777, "loss": 0.5419, "step": 3153 }, { "epoch": 1.7620111731843575, "grad_norm": 0.5164934992790222, "learning_rate": 0.0009146218487394958, "loss": 0.4026, "step": 3154 }, { "epoch": 1.7625698324022347, "grad_norm": 0.5284751653671265, "learning_rate": 0.000914593837535014, "loss": 0.621, "step": 3155 }, { "epoch": 1.7631284916201118, "grad_norm": 0.5386056900024414, "learning_rate": 0.0009145658263305322, "loss": 0.5432, "step": 3156 }, { "epoch": 1.7636871508379888, "grad_norm": 4.473935604095459, "learning_rate": 0.0009145378151260504, "loss": 0.479, "step": 3157 }, { "epoch": 1.7642458100558658, "grad_norm": 0.5091344118118286, "learning_rate": 0.0009145098039215687, "loss": 0.3973, "step": 3158 }, { "epoch": 1.764804469273743, "grad_norm": 0.7269484400749207, "learning_rate": 0.0009144817927170868, "loss": 0.4625, "step": 3159 }, { "epoch": 1.7653631284916202, "grad_norm": 0.6569042205810547, "learning_rate": 0.000914453781512605, "loss": 0.6453, "step": 3160 }, { "epoch": 1.7659217877094973, "grad_norm": 0.5146526098251343, "learning_rate": 0.0009144257703081232, "loss": 0.4935, "step": 3161 }, { "epoch": 1.7664804469273743, "grad_norm": 0.5423449277877808, "learning_rate": 0.0009143977591036414, "loss": 0.3714, "step": 3162 }, { "epoch": 1.7670391061452513, "grad_norm": 0.7551223635673523, "learning_rate": 0.0009143697478991598, "loss": 0.5137, "step": 3163 }, { "epoch": 1.7675977653631285, "grad_norm": 0.49909594655036926, "learning_rate": 0.0009143417366946779, "loss": 0.4782, "step": 3164 }, { "epoch": 1.7681564245810057, "grad_norm": 0.42767465114593506, "learning_rate": 0.0009143137254901961, "loss": 0.391, "step": 3165 }, { "epoch": 1.7687150837988828, "grad_norm": 0.5423476099967957, "learning_rate": 0.0009142857142857143, "loss": 0.4518, "step": 3166 }, { "epoch": 1.7692737430167598, "grad_norm": 0.7874027490615845, "learning_rate": 0.0009142577030812325, "loss": 0.4433, "step": 3167 }, { "epoch": 1.7698324022346368, "grad_norm": 0.7019028663635254, "learning_rate": 0.0009142296918767508, "loss": 0.5464, "step": 3168 }, { "epoch": 1.770391061452514, "grad_norm": 15.238758087158203, "learning_rate": 0.000914201680672269, "loss": 0.4594, "step": 3169 }, { "epoch": 1.770949720670391, "grad_norm": 0.6703578233718872, "learning_rate": 0.0009141736694677871, "loss": 0.5333, "step": 3170 }, { "epoch": 1.7715083798882683, "grad_norm": 1.0852935314178467, "learning_rate": 0.0009141456582633053, "loss": 0.5146, "step": 3171 }, { "epoch": 1.7720670391061453, "grad_norm": 0.6139355897903442, "learning_rate": 0.0009141176470588235, "loss": 0.5606, "step": 3172 }, { "epoch": 1.7726256983240223, "grad_norm": 0.4671594202518463, "learning_rate": 0.0009140896358543418, "loss": 0.4041, "step": 3173 }, { "epoch": 1.7731843575418993, "grad_norm": 0.8935621380805969, "learning_rate": 0.00091406162464986, "loss": 0.5659, "step": 3174 }, { "epoch": 1.7737430167597765, "grad_norm": 0.633843719959259, "learning_rate": 0.0009140336134453781, "loss": 0.4534, "step": 3175 }, { "epoch": 1.7743016759776538, "grad_norm": 0.6199772953987122, "learning_rate": 0.0009140056022408963, "loss": 0.3949, "step": 3176 }, { "epoch": 1.7748603351955308, "grad_norm": 0.6112352609634399, "learning_rate": 0.0009139775910364145, "loss": 0.3991, "step": 3177 }, { "epoch": 1.7754189944134078, "grad_norm": 14.571758270263672, "learning_rate": 0.0009139495798319328, "loss": 0.525, "step": 3178 }, { "epoch": 1.7759776536312848, "grad_norm": 0.7849811911582947, "learning_rate": 0.000913921568627451, "loss": 0.4102, "step": 3179 }, { "epoch": 1.776536312849162, "grad_norm": 6.994330406188965, "learning_rate": 0.0009138935574229692, "loss": 0.5312, "step": 3180 }, { "epoch": 1.7770949720670393, "grad_norm": 0.6431810259819031, "learning_rate": 0.0009138655462184874, "loss": 0.4155, "step": 3181 }, { "epoch": 1.7776536312849163, "grad_norm": 4.65981912612915, "learning_rate": 0.0009138375350140056, "loss": 0.5503, "step": 3182 }, { "epoch": 1.7782122905027933, "grad_norm": 0.9029192328453064, "learning_rate": 0.0009138095238095239, "loss": 0.4662, "step": 3183 }, { "epoch": 1.7787709497206703, "grad_norm": 0.5947895050048828, "learning_rate": 0.0009137815126050421, "loss": 0.5221, "step": 3184 }, { "epoch": 1.7793296089385475, "grad_norm": 0.6901898980140686, "learning_rate": 0.0009137535014005603, "loss": 0.5007, "step": 3185 }, { "epoch": 1.7798882681564245, "grad_norm": 0.5442327260971069, "learning_rate": 0.0009137254901960784, "loss": 0.445, "step": 3186 }, { "epoch": 1.7804469273743018, "grad_norm": 0.570530891418457, "learning_rate": 0.0009136974789915966, "loss": 0.485, "step": 3187 }, { "epoch": 1.7810055865921788, "grad_norm": 2.407280445098877, "learning_rate": 0.0009136694677871149, "loss": 0.4441, "step": 3188 }, { "epoch": 1.7815642458100558, "grad_norm": 0.8397179245948792, "learning_rate": 0.0009136414565826331, "loss": 0.4369, "step": 3189 }, { "epoch": 1.7821229050279328, "grad_norm": 0.986168622970581, "learning_rate": 0.0009136134453781513, "loss": 0.4832, "step": 3190 }, { "epoch": 1.78268156424581, "grad_norm": 0.5946182608604431, "learning_rate": 0.0009135854341736694, "loss": 0.5193, "step": 3191 }, { "epoch": 1.7832402234636873, "grad_norm": 1.54610013961792, "learning_rate": 0.0009135574229691876, "loss": 0.4778, "step": 3192 }, { "epoch": 1.7837988826815643, "grad_norm": 0.6242474317550659, "learning_rate": 0.0009135294117647059, "loss": 0.5603, "step": 3193 }, { "epoch": 1.7843575418994413, "grad_norm": 0.6124485731124878, "learning_rate": 0.0009135014005602241, "loss": 0.5536, "step": 3194 }, { "epoch": 1.7849162011173183, "grad_norm": 0.7565421462059021, "learning_rate": 0.0009134733893557423, "loss": 0.4548, "step": 3195 }, { "epoch": 1.7854748603351955, "grad_norm": 0.6444962024688721, "learning_rate": 0.0009134453781512604, "loss": 0.4885, "step": 3196 }, { "epoch": 1.7860335195530728, "grad_norm": 0.5428703427314758, "learning_rate": 0.0009134173669467787, "loss": 0.5451, "step": 3197 }, { "epoch": 1.7865921787709498, "grad_norm": 1.2530630826950073, "learning_rate": 0.000913389355742297, "loss": 0.8271, "step": 3198 }, { "epoch": 1.7871508379888268, "grad_norm": 0.5181470513343811, "learning_rate": 0.0009133613445378152, "loss": 0.5571, "step": 3199 }, { "epoch": 1.7877094972067038, "grad_norm": 1.4144065380096436, "learning_rate": 0.0009133333333333334, "loss": 0.4939, "step": 3200 }, { "epoch": 1.788268156424581, "grad_norm": 0.9006614089012146, "learning_rate": 0.0009133053221288516, "loss": 0.4826, "step": 3201 }, { "epoch": 1.788826815642458, "grad_norm": 0.7793147563934326, "learning_rate": 0.0009132773109243697, "loss": 0.4808, "step": 3202 }, { "epoch": 1.7893854748603353, "grad_norm": 0.7988924384117126, "learning_rate": 0.000913249299719888, "loss": 0.5851, "step": 3203 }, { "epoch": 1.7899441340782123, "grad_norm": 0.6632587909698486, "learning_rate": 0.0009132212885154062, "loss": 0.3586, "step": 3204 }, { "epoch": 1.7905027932960893, "grad_norm": 0.6377608776092529, "learning_rate": 0.0009131932773109244, "loss": 0.503, "step": 3205 }, { "epoch": 1.7910614525139665, "grad_norm": 0.5571548938751221, "learning_rate": 0.0009131652661064426, "loss": 0.4389, "step": 3206 }, { "epoch": 1.7916201117318435, "grad_norm": 0.8831945061683655, "learning_rate": 0.0009131372549019607, "loss": 0.4241, "step": 3207 }, { "epoch": 1.7921787709497208, "grad_norm": 0.6242866516113281, "learning_rate": 0.000913109243697479, "loss": 0.5814, "step": 3208 }, { "epoch": 1.7927374301675978, "grad_norm": 0.5590229630470276, "learning_rate": 0.0009130812324929972, "loss": 0.5109, "step": 3209 }, { "epoch": 1.7932960893854748, "grad_norm": 0.554449737071991, "learning_rate": 0.0009130532212885154, "loss": 0.5484, "step": 3210 }, { "epoch": 1.7938547486033518, "grad_norm": 0.45062708854675293, "learning_rate": 0.0009130252100840336, "loss": 0.4523, "step": 3211 }, { "epoch": 1.794413407821229, "grad_norm": 0.4103892147541046, "learning_rate": 0.0009129971988795517, "loss": 0.4018, "step": 3212 }, { "epoch": 1.7949720670391063, "grad_norm": 0.712181031703949, "learning_rate": 0.0009129691876750701, "loss": 0.5398, "step": 3213 }, { "epoch": 1.7955307262569833, "grad_norm": 0.5673133134841919, "learning_rate": 0.0009129411764705883, "loss": 0.4612, "step": 3214 }, { "epoch": 1.7960893854748603, "grad_norm": 0.8361480832099915, "learning_rate": 0.0009129131652661065, "loss": 0.4171, "step": 3215 }, { "epoch": 1.7966480446927373, "grad_norm": 1.4768726825714111, "learning_rate": 0.0009128851540616247, "loss": 0.4183, "step": 3216 }, { "epoch": 1.7972067039106145, "grad_norm": 2.5639262199401855, "learning_rate": 0.0009128571428571429, "loss": 0.4561, "step": 3217 }, { "epoch": 1.7977653631284918, "grad_norm": 0.7279541492462158, "learning_rate": 0.0009128291316526611, "loss": 0.4435, "step": 3218 }, { "epoch": 1.7983240223463688, "grad_norm": 1.6590209007263184, "learning_rate": 0.0009128011204481793, "loss": 0.5238, "step": 3219 }, { "epoch": 1.7988826815642458, "grad_norm": 0.44741788506507874, "learning_rate": 0.0009127731092436975, "loss": 0.5591, "step": 3220 }, { "epoch": 1.7994413407821228, "grad_norm": 0.5549837946891785, "learning_rate": 0.0009127450980392157, "loss": 0.5138, "step": 3221 }, { "epoch": 1.8, "grad_norm": 0.6384388208389282, "learning_rate": 0.0009127170868347339, "loss": 0.4149, "step": 3222 }, { "epoch": 1.800558659217877, "grad_norm": 0.4796817898750305, "learning_rate": 0.0009126890756302521, "loss": 0.4302, "step": 3223 }, { "epoch": 1.8011173184357543, "grad_norm": 0.6586772203445435, "learning_rate": 0.0009126610644257703, "loss": 0.5628, "step": 3224 }, { "epoch": 1.8016759776536313, "grad_norm": 0.7913119792938232, "learning_rate": 0.0009126330532212885, "loss": 0.5819, "step": 3225 }, { "epoch": 1.8022346368715083, "grad_norm": 0.523750364780426, "learning_rate": 0.0009126050420168067, "loss": 0.502, "step": 3226 }, { "epoch": 1.8027932960893853, "grad_norm": 1.5710994005203247, "learning_rate": 0.0009125770308123249, "loss": 0.7304, "step": 3227 }, { "epoch": 1.8033519553072626, "grad_norm": 0.5283070802688599, "learning_rate": 0.0009125490196078431, "loss": 0.4541, "step": 3228 }, { "epoch": 1.8039106145251398, "grad_norm": 0.6923981308937073, "learning_rate": 0.0009125210084033614, "loss": 0.5953, "step": 3229 }, { "epoch": 1.8044692737430168, "grad_norm": 0.3811795711517334, "learning_rate": 0.0009124929971988796, "loss": 0.3949, "step": 3230 }, { "epoch": 1.8050279329608938, "grad_norm": 0.8328307271003723, "learning_rate": 0.0009124649859943978, "loss": 0.5691, "step": 3231 }, { "epoch": 1.8055865921787708, "grad_norm": 1.4995604753494263, "learning_rate": 0.000912436974789916, "loss": 0.5034, "step": 3232 }, { "epoch": 1.806145251396648, "grad_norm": 0.5103375911712646, "learning_rate": 0.0009124089635854343, "loss": 0.4367, "step": 3233 }, { "epoch": 1.8067039106145253, "grad_norm": 0.5794250965118408, "learning_rate": 0.0009123809523809524, "loss": 0.5514, "step": 3234 }, { "epoch": 1.8072625698324023, "grad_norm": 1.3814373016357422, "learning_rate": 0.0009123529411764706, "loss": 0.4518, "step": 3235 }, { "epoch": 1.8078212290502793, "grad_norm": 1.232062816619873, "learning_rate": 0.0009123249299719888, "loss": 0.598, "step": 3236 }, { "epoch": 1.8083798882681563, "grad_norm": 0.550687849521637, "learning_rate": 0.000912296918767507, "loss": 0.5147, "step": 3237 }, { "epoch": 1.8089385474860336, "grad_norm": 0.7990720272064209, "learning_rate": 0.0009122689075630253, "loss": 0.5182, "step": 3238 }, { "epoch": 1.8094972067039106, "grad_norm": 0.6354506611824036, "learning_rate": 0.0009122408963585434, "loss": 0.5284, "step": 3239 }, { "epoch": 1.8100558659217878, "grad_norm": 0.6284214854240417, "learning_rate": 0.0009122128851540616, "loss": 0.4155, "step": 3240 }, { "epoch": 1.8106145251396648, "grad_norm": 7.2653069496154785, "learning_rate": 0.0009121848739495798, "loss": 0.5143, "step": 3241 }, { "epoch": 1.8111731843575418, "grad_norm": 0.8755322694778442, "learning_rate": 0.000912156862745098, "loss": 0.6028, "step": 3242 }, { "epoch": 1.8117318435754188, "grad_norm": 0.8377264738082886, "learning_rate": 0.0009121288515406163, "loss": 0.5066, "step": 3243 }, { "epoch": 1.812290502793296, "grad_norm": 0.6050659418106079, "learning_rate": 0.0009121008403361344, "loss": 0.6722, "step": 3244 }, { "epoch": 1.8128491620111733, "grad_norm": 0.8741009831428528, "learning_rate": 0.0009120728291316526, "loss": 0.4339, "step": 3245 }, { "epoch": 1.8134078212290503, "grad_norm": 0.5095301270484924, "learning_rate": 0.0009120448179271709, "loss": 0.5186, "step": 3246 }, { "epoch": 1.8139664804469273, "grad_norm": 0.7434853911399841, "learning_rate": 0.0009120168067226891, "loss": 0.4591, "step": 3247 }, { "epoch": 1.8145251396648043, "grad_norm": 0.5455620884895325, "learning_rate": 0.0009119887955182074, "loss": 0.5082, "step": 3248 }, { "epoch": 1.8150837988826816, "grad_norm": 0.6582542657852173, "learning_rate": 0.0009119607843137256, "loss": 0.5654, "step": 3249 }, { "epoch": 1.8156424581005588, "grad_norm": 0.6216452717781067, "learning_rate": 0.0009119327731092437, "loss": 0.4429, "step": 3250 }, { "epoch": 1.8162011173184358, "grad_norm": 0.5055205821990967, "learning_rate": 0.0009119047619047619, "loss": 0.5245, "step": 3251 }, { "epoch": 1.8167597765363128, "grad_norm": 0.690655529499054, "learning_rate": 0.0009118767507002801, "loss": 0.4268, "step": 3252 }, { "epoch": 1.8173184357541898, "grad_norm": 0.9002646803855896, "learning_rate": 0.0009118487394957984, "loss": 0.4873, "step": 3253 }, { "epoch": 1.817877094972067, "grad_norm": 0.3966941237449646, "learning_rate": 0.0009118207282913166, "loss": 0.5805, "step": 3254 }, { "epoch": 1.8184357541899443, "grad_norm": 0.5643709897994995, "learning_rate": 0.0009117927170868347, "loss": 0.4476, "step": 3255 }, { "epoch": 1.8189944134078213, "grad_norm": 0.5275784730911255, "learning_rate": 0.0009117647058823529, "loss": 0.5981, "step": 3256 }, { "epoch": 1.8195530726256983, "grad_norm": 0.7534781098365784, "learning_rate": 0.0009117366946778711, "loss": 0.4925, "step": 3257 }, { "epoch": 1.8201117318435753, "grad_norm": 0.5781060457229614, "learning_rate": 0.0009117086834733894, "loss": 0.4801, "step": 3258 }, { "epoch": 1.8206703910614526, "grad_norm": 0.4545038044452667, "learning_rate": 0.0009116806722689076, "loss": 0.5501, "step": 3259 }, { "epoch": 1.8212290502793296, "grad_norm": 0.4350161850452423, "learning_rate": 0.0009116526610644257, "loss": 0.4169, "step": 3260 }, { "epoch": 1.8217877094972068, "grad_norm": 0.964316725730896, "learning_rate": 0.0009116246498599439, "loss": 0.5944, "step": 3261 }, { "epoch": 1.8223463687150838, "grad_norm": 0.5807787179946899, "learning_rate": 0.0009115966386554622, "loss": 0.3721, "step": 3262 }, { "epoch": 1.8229050279329608, "grad_norm": 0.5068808197975159, "learning_rate": 0.0009115686274509805, "loss": 0.5274, "step": 3263 }, { "epoch": 1.8234636871508378, "grad_norm": 0.5355903506278992, "learning_rate": 0.0009115406162464987, "loss": 0.49, "step": 3264 }, { "epoch": 1.824022346368715, "grad_norm": 0.5871938467025757, "learning_rate": 0.0009115126050420169, "loss": 0.5894, "step": 3265 }, { "epoch": 1.8245810055865923, "grad_norm": 1.748473882675171, "learning_rate": 0.000911484593837535, "loss": 0.5611, "step": 3266 }, { "epoch": 1.8251396648044693, "grad_norm": 1.1771451234817505, "learning_rate": 0.0009114565826330532, "loss": 0.5533, "step": 3267 }, { "epoch": 1.8256983240223463, "grad_norm": 0.746534526348114, "learning_rate": 0.0009114285714285715, "loss": 0.5372, "step": 3268 }, { "epoch": 1.8262569832402233, "grad_norm": 1.1431599855422974, "learning_rate": 0.0009114005602240897, "loss": 0.4518, "step": 3269 }, { "epoch": 1.8268156424581006, "grad_norm": 0.645499050617218, "learning_rate": 0.0009113725490196079, "loss": 0.4456, "step": 3270 }, { "epoch": 1.8273743016759778, "grad_norm": 0.4873228371143341, "learning_rate": 0.000911344537815126, "loss": 0.5993, "step": 3271 }, { "epoch": 1.8279329608938548, "grad_norm": 0.6083270907402039, "learning_rate": 0.0009113165266106442, "loss": 0.4335, "step": 3272 }, { "epoch": 1.8284916201117318, "grad_norm": 0.5168682336807251, "learning_rate": 0.0009112885154061625, "loss": 0.5144, "step": 3273 }, { "epoch": 1.8290502793296088, "grad_norm": 0.43728867173194885, "learning_rate": 0.0009112605042016807, "loss": 0.4251, "step": 3274 }, { "epoch": 1.829608938547486, "grad_norm": 0.7454100847244263, "learning_rate": 0.0009112324929971989, "loss": 0.5582, "step": 3275 }, { "epoch": 1.830167597765363, "grad_norm": 0.5789195895195007, "learning_rate": 0.000911204481792717, "loss": 0.4246, "step": 3276 }, { "epoch": 1.8307262569832403, "grad_norm": 0.6177477240562439, "learning_rate": 0.0009111764705882352, "loss": 0.5446, "step": 3277 }, { "epoch": 1.8312849162011173, "grad_norm": 0.5867534279823303, "learning_rate": 0.0009111484593837536, "loss": 0.4863, "step": 3278 }, { "epoch": 1.8318435754189943, "grad_norm": 0.5285478234291077, "learning_rate": 0.0009111204481792718, "loss": 0.3922, "step": 3279 }, { "epoch": 1.8324022346368714, "grad_norm": 0.6765609383583069, "learning_rate": 0.00091109243697479, "loss": 0.6008, "step": 3280 }, { "epoch": 1.8329608938547486, "grad_norm": 0.39646437764167786, "learning_rate": 0.0009110644257703082, "loss": 0.4554, "step": 3281 }, { "epoch": 1.8335195530726258, "grad_norm": 3.053067445755005, "learning_rate": 0.0009110364145658263, "loss": 0.4177, "step": 3282 }, { "epoch": 1.8340782122905028, "grad_norm": 1.0648759603500366, "learning_rate": 0.0009110084033613446, "loss": 0.4587, "step": 3283 }, { "epoch": 1.8346368715083798, "grad_norm": 0.6261885166168213, "learning_rate": 0.0009109803921568628, "loss": 0.5741, "step": 3284 }, { "epoch": 1.8351955307262569, "grad_norm": 0.7103239297866821, "learning_rate": 0.000910952380952381, "loss": 0.4021, "step": 3285 }, { "epoch": 1.835754189944134, "grad_norm": 3.89915132522583, "learning_rate": 0.0009109243697478992, "loss": 0.4802, "step": 3286 }, { "epoch": 1.8363128491620113, "grad_norm": 1.1798536777496338, "learning_rate": 0.0009108963585434173, "loss": 0.6326, "step": 3287 }, { "epoch": 1.8368715083798883, "grad_norm": 0.6962305903434753, "learning_rate": 0.0009108683473389356, "loss": 0.4404, "step": 3288 }, { "epoch": 1.8374301675977653, "grad_norm": 0.4706648588180542, "learning_rate": 0.0009108403361344538, "loss": 0.5461, "step": 3289 }, { "epoch": 1.8379888268156424, "grad_norm": 5.161343574523926, "learning_rate": 0.000910812324929972, "loss": 0.4162, "step": 3290 }, { "epoch": 1.8385474860335196, "grad_norm": 1.0783441066741943, "learning_rate": 0.0009107843137254902, "loss": 0.5888, "step": 3291 }, { "epoch": 1.8391061452513966, "grad_norm": 2.5199368000030518, "learning_rate": 0.0009107563025210083, "loss": 0.4219, "step": 3292 }, { "epoch": 1.8396648044692738, "grad_norm": 0.6675357818603516, "learning_rate": 0.0009107282913165266, "loss": 0.4912, "step": 3293 }, { "epoch": 1.8402234636871508, "grad_norm": 0.842688262462616, "learning_rate": 0.0009107002801120449, "loss": 0.5007, "step": 3294 }, { "epoch": 1.8407821229050279, "grad_norm": 0.9746347069740295, "learning_rate": 0.0009106722689075631, "loss": 0.5165, "step": 3295 }, { "epoch": 1.8413407821229049, "grad_norm": 0.7330273389816284, "learning_rate": 0.0009106442577030813, "loss": 0.5351, "step": 3296 }, { "epoch": 1.841899441340782, "grad_norm": 0.4980505406856537, "learning_rate": 0.0009106162464985995, "loss": 0.4395, "step": 3297 }, { "epoch": 1.8424581005586593, "grad_norm": 0.6395367383956909, "learning_rate": 0.0009105882352941177, "loss": 0.4772, "step": 3298 }, { "epoch": 1.8430167597765363, "grad_norm": 0.4734107553958893, "learning_rate": 0.0009105602240896359, "loss": 0.446, "step": 3299 }, { "epoch": 1.8435754189944134, "grad_norm": 0.6829903721809387, "learning_rate": 0.0009105322128851541, "loss": 0.4506, "step": 3300 }, { "epoch": 1.8441340782122904, "grad_norm": 4.157322406768799, "learning_rate": 0.0009105042016806723, "loss": 0.5003, "step": 3301 }, { "epoch": 1.8446927374301676, "grad_norm": 1.6932682991027832, "learning_rate": 0.0009104761904761905, "loss": 0.4878, "step": 3302 }, { "epoch": 1.8452513966480448, "grad_norm": 0.6642430424690247, "learning_rate": 0.0009104481792717087, "loss": 0.3785, "step": 3303 }, { "epoch": 1.8458100558659218, "grad_norm": 0.5883099436759949, "learning_rate": 0.0009104201680672269, "loss": 0.4769, "step": 3304 }, { "epoch": 1.8463687150837989, "grad_norm": 0.6421769261360168, "learning_rate": 0.0009103921568627451, "loss": 0.5527, "step": 3305 }, { "epoch": 1.8469273743016759, "grad_norm": 0.6536919474601746, "learning_rate": 0.0009103641456582633, "loss": 0.465, "step": 3306 }, { "epoch": 1.847486033519553, "grad_norm": 1.170295238494873, "learning_rate": 0.0009103361344537815, "loss": 0.5485, "step": 3307 }, { "epoch": 1.8480446927374303, "grad_norm": 0.5307703018188477, "learning_rate": 0.0009103081232492998, "loss": 0.365, "step": 3308 }, { "epoch": 1.8486033519553073, "grad_norm": 0.5870599746704102, "learning_rate": 0.0009102801120448179, "loss": 0.3947, "step": 3309 }, { "epoch": 1.8491620111731844, "grad_norm": 1.0016573667526245, "learning_rate": 0.0009102521008403361, "loss": 0.5521, "step": 3310 }, { "epoch": 1.8497206703910614, "grad_norm": 2.484774112701416, "learning_rate": 0.0009102240896358544, "loss": 0.5415, "step": 3311 }, { "epoch": 1.8502793296089386, "grad_norm": 0.8974372744560242, "learning_rate": 0.0009101960784313726, "loss": 0.4474, "step": 3312 }, { "epoch": 1.8508379888268156, "grad_norm": 24.924850463867188, "learning_rate": 0.0009101680672268909, "loss": 0.5219, "step": 3313 }, { "epoch": 1.8513966480446928, "grad_norm": 0.6826589107513428, "learning_rate": 0.000910140056022409, "loss": 0.4647, "step": 3314 }, { "epoch": 1.8519553072625698, "grad_norm": 0.7049552798271179, "learning_rate": 0.0009101120448179272, "loss": 0.6995, "step": 3315 }, { "epoch": 1.8525139664804469, "grad_norm": 0.6224097013473511, "learning_rate": 0.0009100840336134454, "loss": 0.5642, "step": 3316 }, { "epoch": 1.8530726256983239, "grad_norm": 0.4712682068347931, "learning_rate": 0.0009100560224089636, "loss": 0.4338, "step": 3317 }, { "epoch": 1.853631284916201, "grad_norm": 0.5854752063751221, "learning_rate": 0.0009100280112044819, "loss": 0.3791, "step": 3318 }, { "epoch": 1.8541899441340783, "grad_norm": 1.2928085327148438, "learning_rate": 0.00091, "loss": 0.4136, "step": 3319 }, { "epoch": 1.8547486033519553, "grad_norm": 3.0672287940979004, "learning_rate": 0.0009099719887955182, "loss": 0.4559, "step": 3320 }, { "epoch": 1.8553072625698324, "grad_norm": 0.8925442695617676, "learning_rate": 0.0009099439775910364, "loss": 0.5204, "step": 3321 }, { "epoch": 1.8558659217877094, "grad_norm": 0.5389899611473083, "learning_rate": 0.0009099159663865546, "loss": 0.4773, "step": 3322 }, { "epoch": 1.8564245810055866, "grad_norm": 0.4795750379562378, "learning_rate": 0.0009098879551820729, "loss": 0.457, "step": 3323 }, { "epoch": 1.8569832402234638, "grad_norm": 2.4752981662750244, "learning_rate": 0.0009098599439775911, "loss": 0.6095, "step": 3324 }, { "epoch": 1.8575418994413408, "grad_norm": 0.7278405427932739, "learning_rate": 0.0009098319327731092, "loss": 0.4303, "step": 3325 }, { "epoch": 1.8581005586592179, "grad_norm": 2.557514190673828, "learning_rate": 0.0009098039215686274, "loss": 0.4835, "step": 3326 }, { "epoch": 1.8586592178770949, "grad_norm": 0.7001586556434631, "learning_rate": 0.0009097759103641456, "loss": 0.5267, "step": 3327 }, { "epoch": 1.859217877094972, "grad_norm": 0.5900753736495972, "learning_rate": 0.000909747899159664, "loss": 0.5004, "step": 3328 }, { "epoch": 1.8597765363128491, "grad_norm": 10.75402545928955, "learning_rate": 0.0009097198879551822, "loss": 0.4819, "step": 3329 }, { "epoch": 1.8603351955307263, "grad_norm": 1.0458437204360962, "learning_rate": 0.0009096918767507003, "loss": 0.5626, "step": 3330 }, { "epoch": 1.8608938547486034, "grad_norm": 0.5565247535705566, "learning_rate": 0.0009096638655462185, "loss": 0.4633, "step": 3331 }, { "epoch": 1.8614525139664804, "grad_norm": 0.6101935505867004, "learning_rate": 0.0009096358543417367, "loss": 0.3707, "step": 3332 }, { "epoch": 1.8620111731843574, "grad_norm": 1.3918886184692383, "learning_rate": 0.000909607843137255, "loss": 0.5561, "step": 3333 }, { "epoch": 1.8625698324022346, "grad_norm": 1.9468659162521362, "learning_rate": 0.0009095798319327732, "loss": 0.4529, "step": 3334 }, { "epoch": 1.8631284916201118, "grad_norm": 0.4596400260925293, "learning_rate": 0.0009095518207282913, "loss": 0.4397, "step": 3335 }, { "epoch": 1.8636871508379889, "grad_norm": 0.6582339406013489, "learning_rate": 0.0009095238095238095, "loss": 0.4956, "step": 3336 }, { "epoch": 1.8642458100558659, "grad_norm": 0.6420575380325317, "learning_rate": 0.0009094957983193277, "loss": 0.5357, "step": 3337 }, { "epoch": 1.8648044692737429, "grad_norm": 0.6272958517074585, "learning_rate": 0.000909467787114846, "loss": 0.386, "step": 3338 }, { "epoch": 1.8653631284916201, "grad_norm": 0.606256902217865, "learning_rate": 0.0009094397759103642, "loss": 0.4967, "step": 3339 }, { "epoch": 1.8659217877094973, "grad_norm": 0.652633786201477, "learning_rate": 0.0009094117647058824, "loss": 0.4033, "step": 3340 }, { "epoch": 1.8664804469273744, "grad_norm": 0.33578285574913025, "learning_rate": 0.0009093837535014005, "loss": 0.3376, "step": 3341 }, { "epoch": 1.8670391061452514, "grad_norm": 0.5056957006454468, "learning_rate": 0.0009093557422969187, "loss": 0.5379, "step": 3342 }, { "epoch": 1.8675977653631284, "grad_norm": 0.40216267108917236, "learning_rate": 0.0009093277310924369, "loss": 0.4526, "step": 3343 }, { "epoch": 1.8681564245810056, "grad_norm": 3.4325509071350098, "learning_rate": 0.0009092997198879553, "loss": 0.5134, "step": 3344 }, { "epoch": 1.8687150837988828, "grad_norm": 0.6396369338035583, "learning_rate": 0.0009092717086834735, "loss": 0.6644, "step": 3345 }, { "epoch": 1.8692737430167599, "grad_norm": 0.8216027617454529, "learning_rate": 0.0009092436974789916, "loss": 0.4511, "step": 3346 }, { "epoch": 1.8698324022346369, "grad_norm": 0.6777360439300537, "learning_rate": 0.0009092156862745098, "loss": 0.5113, "step": 3347 }, { "epoch": 1.8703910614525139, "grad_norm": 0.6325851678848267, "learning_rate": 0.000909187675070028, "loss": 0.5297, "step": 3348 }, { "epoch": 1.8709497206703911, "grad_norm": 1.1637097597122192, "learning_rate": 0.0009091596638655463, "loss": 0.421, "step": 3349 }, { "epoch": 1.8715083798882681, "grad_norm": 0.5719764232635498, "learning_rate": 0.0009091316526610645, "loss": 0.4587, "step": 3350 }, { "epoch": 1.8720670391061454, "grad_norm": 0.568630576133728, "learning_rate": 0.0009091036414565826, "loss": 0.4217, "step": 3351 }, { "epoch": 1.8726256983240224, "grad_norm": 0.706224799156189, "learning_rate": 0.0009090756302521008, "loss": 0.4824, "step": 3352 }, { "epoch": 1.8731843575418994, "grad_norm": 1.231709361076355, "learning_rate": 0.000909047619047619, "loss": 0.4946, "step": 3353 }, { "epoch": 1.8737430167597764, "grad_norm": 0.5383389592170715, "learning_rate": 0.0009090196078431373, "loss": 0.5246, "step": 3354 }, { "epoch": 1.8743016759776536, "grad_norm": 0.45403286814689636, "learning_rate": 0.0009089915966386555, "loss": 0.4279, "step": 3355 }, { "epoch": 1.8748603351955309, "grad_norm": 0.4688047766685486, "learning_rate": 0.0009089635854341737, "loss": 0.3409, "step": 3356 }, { "epoch": 1.8754189944134079, "grad_norm": 0.7380261421203613, "learning_rate": 0.0009089355742296918, "loss": 0.4302, "step": 3357 }, { "epoch": 1.8759776536312849, "grad_norm": 1.5678224563598633, "learning_rate": 0.00090890756302521, "loss": 0.445, "step": 3358 }, { "epoch": 1.876536312849162, "grad_norm": 0.8303191661834717, "learning_rate": 0.0009088795518207283, "loss": 0.4072, "step": 3359 }, { "epoch": 1.8770949720670391, "grad_norm": 8.348840713500977, "learning_rate": 0.0009088515406162466, "loss": 0.5845, "step": 3360 }, { "epoch": 1.8776536312849164, "grad_norm": 1.5485373735427856, "learning_rate": 0.0009088235294117648, "loss": 0.4745, "step": 3361 }, { "epoch": 1.8782122905027934, "grad_norm": 1.3296009302139282, "learning_rate": 0.0009087955182072829, "loss": 0.5202, "step": 3362 }, { "epoch": 1.8787709497206704, "grad_norm": 0.43270382285118103, "learning_rate": 0.0009087675070028011, "loss": 0.4692, "step": 3363 }, { "epoch": 1.8793296089385474, "grad_norm": 0.46897760033607483, "learning_rate": 0.0009087394957983194, "loss": 0.5073, "step": 3364 }, { "epoch": 1.8798882681564246, "grad_norm": 0.38963064551353455, "learning_rate": 0.0009087114845938376, "loss": 0.4243, "step": 3365 }, { "epoch": 1.8804469273743016, "grad_norm": 0.6272234320640564, "learning_rate": 0.0009086834733893558, "loss": 0.4974, "step": 3366 }, { "epoch": 1.8810055865921789, "grad_norm": 0.8900682330131531, "learning_rate": 0.0009086554621848739, "loss": 0.42, "step": 3367 }, { "epoch": 1.8815642458100559, "grad_norm": 0.6141631603240967, "learning_rate": 0.0009086274509803921, "loss": 0.5032, "step": 3368 }, { "epoch": 1.882122905027933, "grad_norm": 0.5407202839851379, "learning_rate": 0.0009085994397759104, "loss": 0.4056, "step": 3369 }, { "epoch": 1.88268156424581, "grad_norm": 0.6230933666229248, "learning_rate": 0.0009085714285714286, "loss": 0.6713, "step": 3370 }, { "epoch": 1.8832402234636871, "grad_norm": 0.6125653386116028, "learning_rate": 0.0009085434173669468, "loss": 0.4475, "step": 3371 }, { "epoch": 1.8837988826815644, "grad_norm": 1.4241896867752075, "learning_rate": 0.000908515406162465, "loss": 0.4833, "step": 3372 }, { "epoch": 1.8843575418994414, "grad_norm": 0.5077396631240845, "learning_rate": 0.0009084873949579831, "loss": 0.462, "step": 3373 }, { "epoch": 1.8849162011173184, "grad_norm": 0.4441854953765869, "learning_rate": 0.0009084593837535014, "loss": 0.4636, "step": 3374 }, { "epoch": 1.8854748603351954, "grad_norm": 1.3085445165634155, "learning_rate": 0.0009084313725490196, "loss": 0.4555, "step": 3375 }, { "epoch": 1.8860335195530726, "grad_norm": 0.6074004769325256, "learning_rate": 0.0009084033613445379, "loss": 0.4144, "step": 3376 }, { "epoch": 1.8865921787709499, "grad_norm": 0.5048871636390686, "learning_rate": 0.0009083753501400561, "loss": 0.4632, "step": 3377 }, { "epoch": 1.8871508379888269, "grad_norm": 0.586675763130188, "learning_rate": 0.0009083473389355742, "loss": 0.5268, "step": 3378 }, { "epoch": 1.887709497206704, "grad_norm": 0.48384320735931396, "learning_rate": 0.0009083193277310925, "loss": 0.4257, "step": 3379 }, { "epoch": 1.888268156424581, "grad_norm": 0.5696973204612732, "learning_rate": 0.0009082913165266107, "loss": 0.3715, "step": 3380 }, { "epoch": 1.8888268156424581, "grad_norm": 0.47410765290260315, "learning_rate": 0.0009082633053221289, "loss": 0.392, "step": 3381 }, { "epoch": 1.8893854748603351, "grad_norm": 1.4106309413909912, "learning_rate": 0.0009082352941176471, "loss": 0.5432, "step": 3382 }, { "epoch": 1.8899441340782124, "grad_norm": 3.170473575592041, "learning_rate": 0.0009082072829131652, "loss": 0.3659, "step": 3383 }, { "epoch": 1.8905027932960894, "grad_norm": 0.5562348365783691, "learning_rate": 0.0009081792717086835, "loss": 0.5037, "step": 3384 }, { "epoch": 1.8910614525139664, "grad_norm": 0.6246789693832397, "learning_rate": 0.0009081512605042017, "loss": 0.4931, "step": 3385 }, { "epoch": 1.8916201117318434, "grad_norm": 0.6318274140357971, "learning_rate": 0.0009081232492997199, "loss": 0.4445, "step": 3386 }, { "epoch": 1.8921787709497206, "grad_norm": 0.9648261070251465, "learning_rate": 0.0009080952380952381, "loss": 0.4032, "step": 3387 }, { "epoch": 1.8927374301675979, "grad_norm": 0.51225745677948, "learning_rate": 0.0009080672268907563, "loss": 0.4821, "step": 3388 }, { "epoch": 1.893296089385475, "grad_norm": 0.4842418134212494, "learning_rate": 0.0009080392156862745, "loss": 0.4827, "step": 3389 }, { "epoch": 1.893854748603352, "grad_norm": 0.5580561757087708, "learning_rate": 0.0009080112044817927, "loss": 0.5639, "step": 3390 }, { "epoch": 1.894413407821229, "grad_norm": 0.42107322812080383, "learning_rate": 0.0009079831932773109, "loss": 0.5225, "step": 3391 }, { "epoch": 1.8949720670391061, "grad_norm": 0.8365760445594788, "learning_rate": 0.0009079551820728291, "loss": 0.386, "step": 3392 }, { "epoch": 1.8955307262569834, "grad_norm": 0.5805233120918274, "learning_rate": 0.0009079271708683474, "loss": 0.4543, "step": 3393 }, { "epoch": 1.8960893854748604, "grad_norm": 0.4055412709712982, "learning_rate": 0.0009078991596638656, "loss": 0.4576, "step": 3394 }, { "epoch": 1.8966480446927374, "grad_norm": 0.714622437953949, "learning_rate": 0.0009078711484593838, "loss": 0.5736, "step": 3395 }, { "epoch": 1.8972067039106144, "grad_norm": 0.9913580417633057, "learning_rate": 0.000907843137254902, "loss": 0.4861, "step": 3396 }, { "epoch": 1.8977653631284916, "grad_norm": 0.5982085466384888, "learning_rate": 0.0009078151260504202, "loss": 0.4136, "step": 3397 }, { "epoch": 1.8983240223463689, "grad_norm": 0.389060914516449, "learning_rate": 0.0009077871148459384, "loss": 0.3704, "step": 3398 }, { "epoch": 1.8988826815642459, "grad_norm": 0.47859811782836914, "learning_rate": 0.0009077591036414566, "loss": 0.4942, "step": 3399 }, { "epoch": 1.899441340782123, "grad_norm": 0.519008994102478, "learning_rate": 0.0009077310924369748, "loss": 0.4471, "step": 3400 }, { "epoch": 1.9, "grad_norm": 0.4630208909511566, "learning_rate": 0.000907703081232493, "loss": 0.4148, "step": 3401 }, { "epoch": 1.9005586592178771, "grad_norm": 1.0838171243667603, "learning_rate": 0.0009076750700280112, "loss": 0.589, "step": 3402 }, { "epoch": 1.9011173184357542, "grad_norm": 0.6444001793861389, "learning_rate": 0.0009076470588235294, "loss": 0.4501, "step": 3403 }, { "epoch": 1.9016759776536314, "grad_norm": 0.4720785319805145, "learning_rate": 0.0009076190476190477, "loss": 0.515, "step": 3404 }, { "epoch": 1.9022346368715084, "grad_norm": 1.9518955945968628, "learning_rate": 0.0009075910364145658, "loss": 0.4045, "step": 3405 }, { "epoch": 1.9027932960893854, "grad_norm": 0.7964692115783691, "learning_rate": 0.000907563025210084, "loss": 0.4541, "step": 3406 }, { "epoch": 1.9033519553072624, "grad_norm": 0.9589069485664368, "learning_rate": 0.0009075350140056022, "loss": 0.6191, "step": 3407 }, { "epoch": 1.9039106145251397, "grad_norm": 0.45958298444747925, "learning_rate": 0.0009075070028011204, "loss": 0.4787, "step": 3408 }, { "epoch": 1.9044692737430169, "grad_norm": 0.4079684317111969, "learning_rate": 0.0009074789915966388, "loss": 0.4345, "step": 3409 }, { "epoch": 1.905027932960894, "grad_norm": 0.5356015563011169, "learning_rate": 0.0009074509803921569, "loss": 0.3998, "step": 3410 }, { "epoch": 1.905586592178771, "grad_norm": 0.6153750419616699, "learning_rate": 0.0009074229691876751, "loss": 0.4284, "step": 3411 }, { "epoch": 1.906145251396648, "grad_norm": 0.7890805006027222, "learning_rate": 0.0009073949579831933, "loss": 0.402, "step": 3412 }, { "epoch": 1.9067039106145252, "grad_norm": 0.4278022348880768, "learning_rate": 0.0009073669467787115, "loss": 0.4263, "step": 3413 }, { "epoch": 1.9072625698324024, "grad_norm": 0.6270452737808228, "learning_rate": 0.0009073389355742298, "loss": 0.4042, "step": 3414 }, { "epoch": 1.9078212290502794, "grad_norm": 1.2224963903427124, "learning_rate": 0.0009073109243697479, "loss": 0.4413, "step": 3415 }, { "epoch": 1.9083798882681564, "grad_norm": 0.6034187078475952, "learning_rate": 0.0009072829131652661, "loss": 0.4356, "step": 3416 }, { "epoch": 1.9089385474860334, "grad_norm": 1.150604248046875, "learning_rate": 0.0009072549019607843, "loss": 0.6877, "step": 3417 }, { "epoch": 1.9094972067039107, "grad_norm": 0.5894142985343933, "learning_rate": 0.0009072268907563025, "loss": 0.5169, "step": 3418 }, { "epoch": 1.9100558659217877, "grad_norm": 0.7291961312294006, "learning_rate": 0.0009071988795518208, "loss": 0.6585, "step": 3419 }, { "epoch": 1.910614525139665, "grad_norm": 5.241934776306152, "learning_rate": 0.000907170868347339, "loss": 0.4698, "step": 3420 }, { "epoch": 1.911173184357542, "grad_norm": 0.6813094019889832, "learning_rate": 0.0009071428571428571, "loss": 0.4977, "step": 3421 }, { "epoch": 1.911731843575419, "grad_norm": 0.7271748781204224, "learning_rate": 0.0009071148459383753, "loss": 0.377, "step": 3422 }, { "epoch": 1.912290502793296, "grad_norm": 0.7742030024528503, "learning_rate": 0.0009070868347338935, "loss": 0.48, "step": 3423 }, { "epoch": 1.9128491620111732, "grad_norm": 0.5869528651237488, "learning_rate": 0.0009070588235294118, "loss": 0.4957, "step": 3424 }, { "epoch": 1.9134078212290504, "grad_norm": 0.7639419436454773, "learning_rate": 0.00090703081232493, "loss": 0.4396, "step": 3425 }, { "epoch": 1.9139664804469274, "grad_norm": 0.6555896997451782, "learning_rate": 0.0009070028011204482, "loss": 0.4396, "step": 3426 }, { "epoch": 1.9145251396648044, "grad_norm": 0.691922664642334, "learning_rate": 0.0009069747899159664, "loss": 0.4835, "step": 3427 }, { "epoch": 1.9150837988826814, "grad_norm": 0.4975016415119171, "learning_rate": 0.0009069467787114846, "loss": 0.4799, "step": 3428 }, { "epoch": 1.9156424581005587, "grad_norm": 2.0976202487945557, "learning_rate": 0.0009069187675070029, "loss": 0.4976, "step": 3429 }, { "epoch": 1.916201117318436, "grad_norm": 0.48069363832473755, "learning_rate": 0.0009068907563025211, "loss": 0.5978, "step": 3430 }, { "epoch": 1.916759776536313, "grad_norm": 0.6229406595230103, "learning_rate": 0.0009068627450980392, "loss": 0.5495, "step": 3431 }, { "epoch": 1.91731843575419, "grad_norm": 0.7944614291191101, "learning_rate": 0.0009068347338935574, "loss": 0.471, "step": 3432 }, { "epoch": 1.917877094972067, "grad_norm": 0.5190448760986328, "learning_rate": 0.0009068067226890756, "loss": 0.5354, "step": 3433 }, { "epoch": 1.9184357541899442, "grad_norm": 1.036592721939087, "learning_rate": 0.0009067787114845939, "loss": 0.4768, "step": 3434 }, { "epoch": 1.9189944134078212, "grad_norm": 0.7237743735313416, "learning_rate": 0.0009067507002801121, "loss": 0.6533, "step": 3435 }, { "epoch": 1.9195530726256984, "grad_norm": 0.7009897828102112, "learning_rate": 0.0009067226890756303, "loss": 0.5783, "step": 3436 }, { "epoch": 1.9201117318435754, "grad_norm": 0.5043143630027771, "learning_rate": 0.0009066946778711484, "loss": 0.4794, "step": 3437 }, { "epoch": 1.9206703910614524, "grad_norm": 0.4626138508319855, "learning_rate": 0.0009066666666666666, "loss": 0.5044, "step": 3438 }, { "epoch": 1.9212290502793297, "grad_norm": 0.7077670693397522, "learning_rate": 0.0009066386554621849, "loss": 0.4474, "step": 3439 }, { "epoch": 1.9217877094972067, "grad_norm": 0.7444259524345398, "learning_rate": 0.0009066106442577031, "loss": 0.519, "step": 3440 }, { "epoch": 1.922346368715084, "grad_norm": 0.3730005919933319, "learning_rate": 0.0009065826330532213, "loss": 0.4742, "step": 3441 }, { "epoch": 1.922905027932961, "grad_norm": 0.39288267493247986, "learning_rate": 0.0009065546218487394, "loss": 0.5099, "step": 3442 }, { "epoch": 1.923463687150838, "grad_norm": 0.37690800428390503, "learning_rate": 0.0009065266106442577, "loss": 0.3998, "step": 3443 }, { "epoch": 1.924022346368715, "grad_norm": 0.6762821674346924, "learning_rate": 0.000906498599439776, "loss": 0.4353, "step": 3444 }, { "epoch": 1.9245810055865922, "grad_norm": 0.6250193119049072, "learning_rate": 0.0009064705882352942, "loss": 0.4794, "step": 3445 }, { "epoch": 1.9251396648044694, "grad_norm": 0.6170416474342346, "learning_rate": 0.0009064425770308124, "loss": 0.5364, "step": 3446 }, { "epoch": 1.9256983240223464, "grad_norm": 2.247410297393799, "learning_rate": 0.0009064145658263305, "loss": 0.5995, "step": 3447 }, { "epoch": 1.9262569832402234, "grad_norm": 1.9219515323638916, "learning_rate": 0.0009063865546218487, "loss": 0.5276, "step": 3448 }, { "epoch": 1.9268156424581004, "grad_norm": 1.4085333347320557, "learning_rate": 0.000906358543417367, "loss": 0.4371, "step": 3449 }, { "epoch": 1.9273743016759777, "grad_norm": 0.7772863507270813, "learning_rate": 0.0009063305322128852, "loss": 0.5818, "step": 3450 }, { "epoch": 1.927932960893855, "grad_norm": 0.627034604549408, "learning_rate": 0.0009063025210084034, "loss": 0.442, "step": 3451 }, { "epoch": 1.928491620111732, "grad_norm": 0.6423159241676331, "learning_rate": 0.0009062745098039216, "loss": 0.5428, "step": 3452 }, { "epoch": 1.929050279329609, "grad_norm": 0.6216955780982971, "learning_rate": 0.0009062464985994397, "loss": 0.477, "step": 3453 }, { "epoch": 1.929608938547486, "grad_norm": 0.46773308515548706, "learning_rate": 0.000906218487394958, "loss": 0.4536, "step": 3454 }, { "epoch": 1.9301675977653632, "grad_norm": 0.6190614700317383, "learning_rate": 0.0009061904761904762, "loss": 0.52, "step": 3455 }, { "epoch": 1.9307262569832402, "grad_norm": 0.5048285722732544, "learning_rate": 0.0009061624649859944, "loss": 0.4504, "step": 3456 }, { "epoch": 1.9312849162011174, "grad_norm": 0.639789342880249, "learning_rate": 0.0009061344537815126, "loss": 0.5773, "step": 3457 }, { "epoch": 1.9318435754189944, "grad_norm": 0.7518055438995361, "learning_rate": 0.0009061064425770307, "loss": 0.6785, "step": 3458 }, { "epoch": 1.9324022346368714, "grad_norm": 0.5717806220054626, "learning_rate": 0.0009060784313725491, "loss": 0.5382, "step": 3459 }, { "epoch": 1.9329608938547485, "grad_norm": 0.8811807632446289, "learning_rate": 0.0009060504201680673, "loss": 0.4675, "step": 3460 }, { "epoch": 1.9335195530726257, "grad_norm": 0.39842790365219116, "learning_rate": 0.0009060224089635855, "loss": 0.3666, "step": 3461 }, { "epoch": 1.934078212290503, "grad_norm": 0.8868865966796875, "learning_rate": 0.0009059943977591037, "loss": 0.4776, "step": 3462 }, { "epoch": 1.93463687150838, "grad_norm": 0.43413469195365906, "learning_rate": 0.0009059663865546218, "loss": 0.4532, "step": 3463 }, { "epoch": 1.935195530726257, "grad_norm": 1.447930932044983, "learning_rate": 0.0009059383753501401, "loss": 0.391, "step": 3464 }, { "epoch": 1.935754189944134, "grad_norm": 0.8444411754608154, "learning_rate": 0.0009059103641456583, "loss": 0.4676, "step": 3465 }, { "epoch": 1.9363128491620112, "grad_norm": 1.0747498273849487, "learning_rate": 0.0009058823529411765, "loss": 0.457, "step": 3466 }, { "epoch": 1.9368715083798884, "grad_norm": 0.6661196351051331, "learning_rate": 0.0009058543417366947, "loss": 0.5078, "step": 3467 }, { "epoch": 1.9374301675977654, "grad_norm": 0.6356555819511414, "learning_rate": 0.0009058263305322129, "loss": 0.49, "step": 3468 }, { "epoch": 1.9379888268156424, "grad_norm": 0.7038290500640869, "learning_rate": 0.0009057983193277311, "loss": 0.5258, "step": 3469 }, { "epoch": 1.9385474860335195, "grad_norm": 0.5217005014419556, "learning_rate": 0.0009057703081232493, "loss": 0.5581, "step": 3470 }, { "epoch": 1.9391061452513967, "grad_norm": 1.912278652191162, "learning_rate": 0.0009057422969187675, "loss": 0.5453, "step": 3471 }, { "epoch": 1.9396648044692737, "grad_norm": 0.6004762649536133, "learning_rate": 0.0009057142857142857, "loss": 0.4522, "step": 3472 }, { "epoch": 1.940223463687151, "grad_norm": 0.5349053740501404, "learning_rate": 0.0009056862745098039, "loss": 0.5741, "step": 3473 }, { "epoch": 1.940782122905028, "grad_norm": 0.5003183484077454, "learning_rate": 0.0009056582633053221, "loss": 0.4716, "step": 3474 }, { "epoch": 1.941340782122905, "grad_norm": 1.856022596359253, "learning_rate": 0.0009056302521008404, "loss": 0.533, "step": 3475 }, { "epoch": 1.941899441340782, "grad_norm": 0.43124836683273315, "learning_rate": 0.0009056022408963586, "loss": 0.4012, "step": 3476 }, { "epoch": 1.9424581005586592, "grad_norm": 0.8166503310203552, "learning_rate": 0.0009055742296918768, "loss": 0.5546, "step": 3477 }, { "epoch": 1.9430167597765364, "grad_norm": 0.5390708446502686, "learning_rate": 0.000905546218487395, "loss": 0.3833, "step": 3478 }, { "epoch": 1.9435754189944134, "grad_norm": 0.5894172787666321, "learning_rate": 0.0009055182072829132, "loss": 0.5986, "step": 3479 }, { "epoch": 1.9441340782122905, "grad_norm": 0.7875383496284485, "learning_rate": 0.0009054901960784314, "loss": 0.4902, "step": 3480 }, { "epoch": 1.9446927374301675, "grad_norm": 0.431085467338562, "learning_rate": 0.0009054621848739496, "loss": 0.3963, "step": 3481 }, { "epoch": 1.9452513966480447, "grad_norm": 0.5462560653686523, "learning_rate": 0.0009054341736694678, "loss": 0.4574, "step": 3482 }, { "epoch": 1.945810055865922, "grad_norm": 0.4532710015773773, "learning_rate": 0.000905406162464986, "loss": 0.4163, "step": 3483 }, { "epoch": 1.946368715083799, "grad_norm": 0.3987860679626465, "learning_rate": 0.0009053781512605043, "loss": 0.327, "step": 3484 }, { "epoch": 1.946927374301676, "grad_norm": 5.585010528564453, "learning_rate": 0.0009053501400560224, "loss": 0.4719, "step": 3485 }, { "epoch": 1.947486033519553, "grad_norm": 2.0112860202789307, "learning_rate": 0.0009053221288515406, "loss": 0.4288, "step": 3486 }, { "epoch": 1.9480446927374302, "grad_norm": 0.5010934472084045, "learning_rate": 0.0009052941176470588, "loss": 0.4974, "step": 3487 }, { "epoch": 1.9486033519553074, "grad_norm": 0.6223816275596619, "learning_rate": 0.000905266106442577, "loss": 0.4894, "step": 3488 }, { "epoch": 1.9491620111731844, "grad_norm": 0.5376791954040527, "learning_rate": 0.0009052380952380953, "loss": 0.4926, "step": 3489 }, { "epoch": 1.9497206703910615, "grad_norm": 0.563200056552887, "learning_rate": 0.0009052100840336134, "loss": 0.3986, "step": 3490 }, { "epoch": 1.9502793296089385, "grad_norm": 0.4292796850204468, "learning_rate": 0.0009051820728291316, "loss": 0.4133, "step": 3491 }, { "epoch": 1.9508379888268157, "grad_norm": 0.5368371605873108, "learning_rate": 0.0009051540616246499, "loss": 0.4386, "step": 3492 }, { "epoch": 1.9513966480446927, "grad_norm": 0.5037499666213989, "learning_rate": 0.0009051260504201681, "loss": 0.4515, "step": 3493 }, { "epoch": 1.95195530726257, "grad_norm": 0.7200754284858704, "learning_rate": 0.0009050980392156864, "loss": 0.5068, "step": 3494 }, { "epoch": 1.952513966480447, "grad_norm": 0.6245931386947632, "learning_rate": 0.0009050700280112045, "loss": 0.4626, "step": 3495 }, { "epoch": 1.953072625698324, "grad_norm": 0.4852912425994873, "learning_rate": 0.0009050420168067227, "loss": 0.5257, "step": 3496 }, { "epoch": 1.953631284916201, "grad_norm": 0.4774476885795593, "learning_rate": 0.0009050140056022409, "loss": 0.4487, "step": 3497 }, { "epoch": 1.9541899441340782, "grad_norm": 1.0392900705337524, "learning_rate": 0.0009049859943977591, "loss": 0.4675, "step": 3498 }, { "epoch": 1.9547486033519554, "grad_norm": 0.8098852634429932, "learning_rate": 0.0009049579831932774, "loss": 0.6607, "step": 3499 }, { "epoch": 1.9553072625698324, "grad_norm": 0.5951655507087708, "learning_rate": 0.0009049299719887956, "loss": 0.4997, "step": 3500 }, { "epoch": 1.9553072625698324, "eval_cer": 0.09693081513971175, "eval_loss": 0.3673718273639679, "eval_runtime": 55.528, "eval_samples_per_second": 81.725, "eval_steps_per_second": 5.115, "eval_wer": 0.3797100639646937, "step": 3500 }, { "epoch": 1.9558659217877095, "grad_norm": 0.4045039713382721, "learning_rate": 0.0009049019607843137, "loss": 0.3696, "step": 3501 }, { "epoch": 1.9564245810055865, "grad_norm": 0.48960962891578674, "learning_rate": 0.0009048739495798319, "loss": 0.5343, "step": 3502 }, { "epoch": 1.9569832402234637, "grad_norm": 0.3607482314109802, "learning_rate": 0.0009048459383753501, "loss": 0.4177, "step": 3503 }, { "epoch": 1.957541899441341, "grad_norm": 0.5582266449928284, "learning_rate": 0.0009048179271708684, "loss": 0.5064, "step": 3504 }, { "epoch": 1.958100558659218, "grad_norm": 0.4474537670612335, "learning_rate": 0.0009047899159663866, "loss": 0.4405, "step": 3505 }, { "epoch": 1.958659217877095, "grad_norm": 0.8795913457870483, "learning_rate": 0.0009047619047619047, "loss": 0.4723, "step": 3506 }, { "epoch": 1.959217877094972, "grad_norm": 0.42152541875839233, "learning_rate": 0.0009047338935574229, "loss": 0.4382, "step": 3507 }, { "epoch": 1.9597765363128492, "grad_norm": 0.630465567111969, "learning_rate": 0.0009047058823529412, "loss": 0.3956, "step": 3508 }, { "epoch": 1.9603351955307262, "grad_norm": 0.38271746039390564, "learning_rate": 0.0009046778711484595, "loss": 0.4385, "step": 3509 }, { "epoch": 1.9608938547486034, "grad_norm": 0.7205129861831665, "learning_rate": 0.0009046498599439777, "loss": 0.5121, "step": 3510 }, { "epoch": 1.9614525139664805, "grad_norm": 0.6931906342506409, "learning_rate": 0.0009046218487394958, "loss": 0.4694, "step": 3511 }, { "epoch": 1.9620111731843575, "grad_norm": 0.5405535101890564, "learning_rate": 0.000904593837535014, "loss": 0.5335, "step": 3512 }, { "epoch": 1.9625698324022345, "grad_norm": 0.4896290600299835, "learning_rate": 0.0009045658263305322, "loss": 0.5676, "step": 3513 }, { "epoch": 1.9631284916201117, "grad_norm": 0.6114362478256226, "learning_rate": 0.0009045378151260505, "loss": 0.4836, "step": 3514 }, { "epoch": 1.963687150837989, "grad_norm": 0.5427936911582947, "learning_rate": 0.0009045098039215687, "loss": 0.4397, "step": 3515 }, { "epoch": 1.964245810055866, "grad_norm": 0.7454958558082581, "learning_rate": 0.0009044817927170869, "loss": 0.4664, "step": 3516 }, { "epoch": 1.964804469273743, "grad_norm": 0.5674418807029724, "learning_rate": 0.000904453781512605, "loss": 0.417, "step": 3517 }, { "epoch": 1.96536312849162, "grad_norm": 0.6599341034889221, "learning_rate": 0.0009044257703081232, "loss": 0.6134, "step": 3518 }, { "epoch": 1.9659217877094972, "grad_norm": 0.6833198070526123, "learning_rate": 0.0009043977591036415, "loss": 0.539, "step": 3519 }, { "epoch": 1.9664804469273744, "grad_norm": 1.0619444847106934, "learning_rate": 0.0009043697478991597, "loss": 0.596, "step": 3520 }, { "epoch": 1.9670391061452515, "grad_norm": 0.6789833903312683, "learning_rate": 0.0009043417366946779, "loss": 0.497, "step": 3521 }, { "epoch": 1.9675977653631285, "grad_norm": 0.4983786940574646, "learning_rate": 0.000904313725490196, "loss": 0.3507, "step": 3522 }, { "epoch": 1.9681564245810055, "grad_norm": 0.7860569357872009, "learning_rate": 0.0009042857142857142, "loss": 0.4384, "step": 3523 }, { "epoch": 1.9687150837988827, "grad_norm": 0.530733585357666, "learning_rate": 0.0009042577030812326, "loss": 0.4214, "step": 3524 }, { "epoch": 1.9692737430167597, "grad_norm": 0.8622692823410034, "learning_rate": 0.0009042296918767508, "loss": 0.5221, "step": 3525 }, { "epoch": 1.969832402234637, "grad_norm": 0.9509183168411255, "learning_rate": 0.000904201680672269, "loss": 0.5467, "step": 3526 }, { "epoch": 1.970391061452514, "grad_norm": 0.5086579918861389, "learning_rate": 0.0009041736694677871, "loss": 0.5532, "step": 3527 }, { "epoch": 1.970949720670391, "grad_norm": 0.6074703335762024, "learning_rate": 0.0009041456582633053, "loss": 0.5062, "step": 3528 }, { "epoch": 1.971508379888268, "grad_norm": 1.8037029504776, "learning_rate": 0.0009041176470588236, "loss": 0.4707, "step": 3529 }, { "epoch": 1.9720670391061452, "grad_norm": 5.210567474365234, "learning_rate": 0.0009040896358543418, "loss": 0.6929, "step": 3530 }, { "epoch": 1.9726256983240225, "grad_norm": 0.5642329454421997, "learning_rate": 0.00090406162464986, "loss": 0.4816, "step": 3531 }, { "epoch": 1.9731843575418995, "grad_norm": 0.7043510675430298, "learning_rate": 0.0009040336134453782, "loss": 0.458, "step": 3532 }, { "epoch": 1.9737430167597765, "grad_norm": 0.5983909964561462, "learning_rate": 0.0009040056022408963, "loss": 0.3557, "step": 3533 }, { "epoch": 1.9743016759776535, "grad_norm": 0.5334946513175964, "learning_rate": 0.0009039775910364146, "loss": 0.5308, "step": 3534 }, { "epoch": 1.9748603351955307, "grad_norm": 0.4784345328807831, "learning_rate": 0.0009039495798319328, "loss": 0.4847, "step": 3535 }, { "epoch": 1.975418994413408, "grad_norm": 0.5475999116897583, "learning_rate": 0.000903921568627451, "loss": 0.3819, "step": 3536 }, { "epoch": 1.975977653631285, "grad_norm": 0.5399876236915588, "learning_rate": 0.0009038935574229692, "loss": 0.3594, "step": 3537 }, { "epoch": 1.976536312849162, "grad_norm": 0.6344031691551208, "learning_rate": 0.0009038655462184873, "loss": 0.5733, "step": 3538 }, { "epoch": 1.977094972067039, "grad_norm": 0.7832716703414917, "learning_rate": 0.0009038375350140056, "loss": 0.5387, "step": 3539 }, { "epoch": 1.9776536312849162, "grad_norm": 0.48385801911354065, "learning_rate": 0.0009038095238095239, "loss": 0.4875, "step": 3540 }, { "epoch": 1.9782122905027935, "grad_norm": 7.341414928436279, "learning_rate": 0.0009037815126050421, "loss": 0.403, "step": 3541 }, { "epoch": 1.9787709497206705, "grad_norm": 0.5225608944892883, "learning_rate": 0.0009037535014005603, "loss": 0.4762, "step": 3542 }, { "epoch": 1.9793296089385475, "grad_norm": 0.5041249394416809, "learning_rate": 0.0009037254901960784, "loss": 0.3955, "step": 3543 }, { "epoch": 1.9798882681564245, "grad_norm": 10.300610542297363, "learning_rate": 0.0009036974789915967, "loss": 0.5804, "step": 3544 }, { "epoch": 1.9804469273743017, "grad_norm": 0.6910036206245422, "learning_rate": 0.0009036694677871149, "loss": 0.4921, "step": 3545 }, { "epoch": 1.9810055865921787, "grad_norm": 0.7483671307563782, "learning_rate": 0.0009036414565826331, "loss": 0.5679, "step": 3546 }, { "epoch": 1.981564245810056, "grad_norm": 1.0613023042678833, "learning_rate": 0.0009036134453781513, "loss": 0.5126, "step": 3547 }, { "epoch": 1.982122905027933, "grad_norm": 0.8317072987556458, "learning_rate": 0.0009035854341736695, "loss": 0.533, "step": 3548 }, { "epoch": 1.98268156424581, "grad_norm": 1.4939213991165161, "learning_rate": 0.0009035574229691877, "loss": 0.4655, "step": 3549 }, { "epoch": 1.983240223463687, "grad_norm": 0.5927630066871643, "learning_rate": 0.0009035294117647059, "loss": 0.5273, "step": 3550 }, { "epoch": 1.9837988826815642, "grad_norm": 0.5648838877677917, "learning_rate": 0.0009035014005602241, "loss": 0.5222, "step": 3551 }, { "epoch": 1.9843575418994415, "grad_norm": 0.6251527667045593, "learning_rate": 0.0009034733893557423, "loss": 0.4953, "step": 3552 }, { "epoch": 1.9849162011173185, "grad_norm": 0.7572916150093079, "learning_rate": 0.0009034453781512605, "loss": 0.4855, "step": 3553 }, { "epoch": 1.9854748603351955, "grad_norm": 0.5038579106330872, "learning_rate": 0.0009034173669467787, "loss": 0.5029, "step": 3554 }, { "epoch": 1.9860335195530725, "grad_norm": 0.5522314310073853, "learning_rate": 0.0009033893557422969, "loss": 0.4267, "step": 3555 }, { "epoch": 1.9865921787709497, "grad_norm": 0.4981827735900879, "learning_rate": 0.0009033613445378151, "loss": 0.4412, "step": 3556 }, { "epoch": 1.987150837988827, "grad_norm": 0.7789214849472046, "learning_rate": 0.0009033333333333334, "loss": 0.4464, "step": 3557 }, { "epoch": 1.987709497206704, "grad_norm": 0.7598844766616821, "learning_rate": 0.0009033053221288516, "loss": 0.3918, "step": 3558 }, { "epoch": 1.988268156424581, "grad_norm": 7.820371627807617, "learning_rate": 0.0009032773109243699, "loss": 0.3769, "step": 3559 }, { "epoch": 1.988826815642458, "grad_norm": 2.5441315174102783, "learning_rate": 0.000903249299719888, "loss": 0.4993, "step": 3560 }, { "epoch": 1.9893854748603352, "grad_norm": 0.6510112881660461, "learning_rate": 0.0009032212885154062, "loss": 0.4605, "step": 3561 }, { "epoch": 1.9899441340782122, "grad_norm": 4.732877731323242, "learning_rate": 0.0009031932773109244, "loss": 0.532, "step": 3562 }, { "epoch": 1.9905027932960895, "grad_norm": 0.5059810876846313, "learning_rate": 0.0009031652661064426, "loss": 0.5014, "step": 3563 }, { "epoch": 1.9910614525139665, "grad_norm": 0.6189684867858887, "learning_rate": 0.0009031372549019609, "loss": 0.4179, "step": 3564 }, { "epoch": 1.9916201117318435, "grad_norm": 0.5997883081436157, "learning_rate": 0.000903109243697479, "loss": 0.4231, "step": 3565 }, { "epoch": 1.9921787709497205, "grad_norm": 0.5159011483192444, "learning_rate": 0.0009030812324929972, "loss": 0.3627, "step": 3566 }, { "epoch": 1.9927374301675977, "grad_norm": 0.5479264259338379, "learning_rate": 0.0009030532212885154, "loss": 0.4364, "step": 3567 }, { "epoch": 1.993296089385475, "grad_norm": 0.5636224150657654, "learning_rate": 0.0009030252100840336, "loss": 0.5088, "step": 3568 }, { "epoch": 1.993854748603352, "grad_norm": 0.48500046133995056, "learning_rate": 0.0009029971988795518, "loss": 0.4343, "step": 3569 }, { "epoch": 1.994413407821229, "grad_norm": 0.7204303741455078, "learning_rate": 0.00090296918767507, "loss": 0.5234, "step": 3570 }, { "epoch": 1.994972067039106, "grad_norm": 0.6034387350082397, "learning_rate": 0.0009029411764705882, "loss": 0.5912, "step": 3571 }, { "epoch": 1.9955307262569832, "grad_norm": 0.7989311218261719, "learning_rate": 0.0009029131652661064, "loss": 0.5422, "step": 3572 }, { "epoch": 1.9960893854748605, "grad_norm": 0.4728044867515564, "learning_rate": 0.0009028851540616246, "loss": 0.4165, "step": 3573 }, { "epoch": 1.9966480446927375, "grad_norm": 1.8345197439193726, "learning_rate": 0.0009028571428571429, "loss": 0.5437, "step": 3574 }, { "epoch": 1.9972067039106145, "grad_norm": 0.589110255241394, "learning_rate": 0.0009028291316526612, "loss": 0.544, "step": 3575 }, { "epoch": 1.9977653631284915, "grad_norm": 0.617041826248169, "learning_rate": 0.0009028011204481793, "loss": 0.5897, "step": 3576 }, { "epoch": 1.9983240223463687, "grad_norm": 0.594106912612915, "learning_rate": 0.0009027731092436975, "loss": 0.4297, "step": 3577 }, { "epoch": 1.9988826815642458, "grad_norm": 0.9016503691673279, "learning_rate": 0.0009027450980392157, "loss": 0.5086, "step": 3578 }, { "epoch": 1.999441340782123, "grad_norm": 0.5713033080101013, "learning_rate": 0.0009027170868347339, "loss": 0.5764, "step": 3579 }, { "epoch": 2.0, "grad_norm": 0.7048798203468323, "learning_rate": 0.0009026890756302522, "loss": 0.4876, "step": 3580 }, { "epoch": 2.000558659217877, "grad_norm": 0.5450454354286194, "learning_rate": 0.0009026610644257703, "loss": 0.503, "step": 3581 }, { "epoch": 2.001117318435754, "grad_norm": 0.6440742015838623, "learning_rate": 0.0009026330532212885, "loss": 0.438, "step": 3582 }, { "epoch": 2.0016759776536315, "grad_norm": 0.5473048686981201, "learning_rate": 0.0009026050420168067, "loss": 0.4054, "step": 3583 }, { "epoch": 2.0022346368715085, "grad_norm": 0.3822300136089325, "learning_rate": 0.0009025770308123249, "loss": 0.4562, "step": 3584 }, { "epoch": 2.0027932960893855, "grad_norm": 0.6752724647521973, "learning_rate": 0.0009025490196078432, "loss": 0.4064, "step": 3585 }, { "epoch": 2.0033519553072625, "grad_norm": 0.4130238890647888, "learning_rate": 0.0009025210084033613, "loss": 0.4614, "step": 3586 }, { "epoch": 2.0039106145251395, "grad_norm": 0.5895639061927795, "learning_rate": 0.0009024929971988795, "loss": 0.4011, "step": 3587 }, { "epoch": 2.004469273743017, "grad_norm": 0.5819107294082642, "learning_rate": 0.0009024649859943977, "loss": 0.4264, "step": 3588 }, { "epoch": 2.005027932960894, "grad_norm": 0.6172674298286438, "learning_rate": 0.0009024369747899159, "loss": 0.401, "step": 3589 }, { "epoch": 2.005586592178771, "grad_norm": 1.1746076345443726, "learning_rate": 0.0009024089635854343, "loss": 0.4585, "step": 3590 }, { "epoch": 2.006145251396648, "grad_norm": 0.6145292520523071, "learning_rate": 0.0009023809523809525, "loss": 0.5267, "step": 3591 }, { "epoch": 2.006703910614525, "grad_norm": 0.5318580269813538, "learning_rate": 0.0009023529411764706, "loss": 0.4666, "step": 3592 }, { "epoch": 2.007262569832402, "grad_norm": 0.5863227844238281, "learning_rate": 0.0009023249299719888, "loss": 0.4878, "step": 3593 }, { "epoch": 2.0078212290502795, "grad_norm": 0.8668081760406494, "learning_rate": 0.000902296918767507, "loss": 0.4759, "step": 3594 }, { "epoch": 2.0083798882681565, "grad_norm": 0.8008526563644409, "learning_rate": 0.0009022689075630253, "loss": 0.507, "step": 3595 }, { "epoch": 2.0089385474860335, "grad_norm": 0.5928978323936462, "learning_rate": 0.0009022408963585435, "loss": 0.4554, "step": 3596 }, { "epoch": 2.0094972067039105, "grad_norm": 0.652567446231842, "learning_rate": 0.0009022128851540616, "loss": 0.4781, "step": 3597 }, { "epoch": 2.0100558659217875, "grad_norm": 1.2569924592971802, "learning_rate": 0.0009021848739495798, "loss": 0.4537, "step": 3598 }, { "epoch": 2.010614525139665, "grad_norm": 0.7195703387260437, "learning_rate": 0.000902156862745098, "loss": 0.4226, "step": 3599 }, { "epoch": 2.011173184357542, "grad_norm": 0.6938717365264893, "learning_rate": 0.0009021288515406163, "loss": 0.5468, "step": 3600 }, { "epoch": 2.011731843575419, "grad_norm": 0.6042268872261047, "learning_rate": 0.0009021008403361345, "loss": 0.5378, "step": 3601 }, { "epoch": 2.012290502793296, "grad_norm": 0.748271644115448, "learning_rate": 0.0009020728291316526, "loss": 0.4694, "step": 3602 }, { "epoch": 2.012849162011173, "grad_norm": 0.509853184223175, "learning_rate": 0.0009020448179271708, "loss": 0.4945, "step": 3603 }, { "epoch": 2.0134078212290505, "grad_norm": 0.5544410943984985, "learning_rate": 0.000902016806722689, "loss": 0.5588, "step": 3604 }, { "epoch": 2.0139664804469275, "grad_norm": 0.7388685941696167, "learning_rate": 0.0009019887955182073, "loss": 0.5227, "step": 3605 }, { "epoch": 2.0145251396648045, "grad_norm": 0.7522904276847839, "learning_rate": 0.0009019607843137256, "loss": 0.5748, "step": 3606 }, { "epoch": 2.0150837988826815, "grad_norm": 0.8101471662521362, "learning_rate": 0.0009019327731092438, "loss": 0.4948, "step": 3607 }, { "epoch": 2.0156424581005585, "grad_norm": 0.4869842231273651, "learning_rate": 0.0009019047619047619, "loss": 0.3863, "step": 3608 }, { "epoch": 2.0162011173184355, "grad_norm": 0.7836116552352905, "learning_rate": 0.0009018767507002801, "loss": 0.4181, "step": 3609 }, { "epoch": 2.016759776536313, "grad_norm": 0.5335279703140259, "learning_rate": 0.0009018487394957984, "loss": 0.4375, "step": 3610 }, { "epoch": 2.01731843575419, "grad_norm": 1.068753957748413, "learning_rate": 0.0009018207282913166, "loss": 0.4274, "step": 3611 }, { "epoch": 2.017877094972067, "grad_norm": 0.5417978763580322, "learning_rate": 0.0009017927170868348, "loss": 0.4648, "step": 3612 }, { "epoch": 2.018435754189944, "grad_norm": 0.5654529929161072, "learning_rate": 0.0009017647058823529, "loss": 0.5148, "step": 3613 }, { "epoch": 2.018994413407821, "grad_norm": 0.5350977778434753, "learning_rate": 0.0009017366946778711, "loss": 0.4992, "step": 3614 }, { "epoch": 2.0195530726256985, "grad_norm": 0.7936908602714539, "learning_rate": 0.0009017086834733894, "loss": 0.3505, "step": 3615 }, { "epoch": 2.0201117318435755, "grad_norm": 0.7255499958992004, "learning_rate": 0.0009016806722689076, "loss": 0.6074, "step": 3616 }, { "epoch": 2.0206703910614525, "grad_norm": 0.5595179796218872, "learning_rate": 0.0009016526610644258, "loss": 0.4777, "step": 3617 }, { "epoch": 2.0212290502793295, "grad_norm": 0.6538064479827881, "learning_rate": 0.0009016246498599439, "loss": 0.4465, "step": 3618 }, { "epoch": 2.0217877094972065, "grad_norm": 1.006617546081543, "learning_rate": 0.0009015966386554621, "loss": 0.4811, "step": 3619 }, { "epoch": 2.022346368715084, "grad_norm": 0.5685110688209534, "learning_rate": 0.0009015686274509804, "loss": 0.4459, "step": 3620 }, { "epoch": 2.022905027932961, "grad_norm": 0.5071479082107544, "learning_rate": 0.0009015406162464986, "loss": 0.5977, "step": 3621 }, { "epoch": 2.023463687150838, "grad_norm": 0.4989989995956421, "learning_rate": 0.0009015126050420169, "loss": 0.5899, "step": 3622 }, { "epoch": 2.024022346368715, "grad_norm": 1.2915453910827637, "learning_rate": 0.0009014845938375351, "loss": 0.6405, "step": 3623 }, { "epoch": 2.024581005586592, "grad_norm": 0.49206650257110596, "learning_rate": 0.0009014565826330532, "loss": 0.5031, "step": 3624 }, { "epoch": 2.0251396648044695, "grad_norm": 0.8080052733421326, "learning_rate": 0.0009014285714285715, "loss": 0.4978, "step": 3625 }, { "epoch": 2.0256983240223465, "grad_norm": 0.6381545662879944, "learning_rate": 0.0009014005602240897, "loss": 0.4272, "step": 3626 }, { "epoch": 2.0262569832402235, "grad_norm": 1.6535542011260986, "learning_rate": 0.0009013725490196079, "loss": 0.4504, "step": 3627 }, { "epoch": 2.0268156424581005, "grad_norm": 0.4633352756500244, "learning_rate": 0.0009013445378151261, "loss": 0.5212, "step": 3628 }, { "epoch": 2.0273743016759775, "grad_norm": 0.45375335216522217, "learning_rate": 0.0009013165266106442, "loss": 0.4174, "step": 3629 }, { "epoch": 2.0279329608938546, "grad_norm": 0.4371800422668457, "learning_rate": 0.0009012885154061625, "loss": 0.4732, "step": 3630 }, { "epoch": 2.028491620111732, "grad_norm": 0.7216630578041077, "learning_rate": 0.0009012605042016807, "loss": 0.5108, "step": 3631 }, { "epoch": 2.029050279329609, "grad_norm": 0.5368731617927551, "learning_rate": 0.0009012324929971989, "loss": 0.4784, "step": 3632 }, { "epoch": 2.029608938547486, "grad_norm": 0.42912301421165466, "learning_rate": 0.0009012044817927171, "loss": 0.3978, "step": 3633 }, { "epoch": 2.030167597765363, "grad_norm": 0.6719613075256348, "learning_rate": 0.0009011764705882352, "loss": 0.5235, "step": 3634 }, { "epoch": 2.03072625698324, "grad_norm": 0.5906072854995728, "learning_rate": 0.0009011484593837535, "loss": 0.4929, "step": 3635 }, { "epoch": 2.0312849162011175, "grad_norm": 2.1807973384857178, "learning_rate": 0.0009011204481792717, "loss": 0.419, "step": 3636 }, { "epoch": 2.0318435754189945, "grad_norm": 13.073674201965332, "learning_rate": 0.0009010924369747899, "loss": 0.7023, "step": 3637 }, { "epoch": 2.0324022346368715, "grad_norm": 0.5532638430595398, "learning_rate": 0.0009010644257703081, "loss": 0.4215, "step": 3638 }, { "epoch": 2.0329608938547485, "grad_norm": 0.5153961777687073, "learning_rate": 0.0009010364145658264, "loss": 0.4712, "step": 3639 }, { "epoch": 2.0335195530726256, "grad_norm": 1.0909032821655273, "learning_rate": 0.0009010084033613446, "loss": 0.4436, "step": 3640 }, { "epoch": 2.034078212290503, "grad_norm": 0.5917366147041321, "learning_rate": 0.0009009803921568628, "loss": 0.4256, "step": 3641 }, { "epoch": 2.03463687150838, "grad_norm": 1.351475477218628, "learning_rate": 0.000900952380952381, "loss": 0.4113, "step": 3642 }, { "epoch": 2.035195530726257, "grad_norm": 0.4937914311885834, "learning_rate": 0.0009009243697478992, "loss": 0.5164, "step": 3643 }, { "epoch": 2.035754189944134, "grad_norm": 0.5243120789527893, "learning_rate": 0.0009008963585434174, "loss": 0.451, "step": 3644 }, { "epoch": 2.036312849162011, "grad_norm": 0.6904476284980774, "learning_rate": 0.0009008683473389356, "loss": 0.4354, "step": 3645 }, { "epoch": 2.036871508379888, "grad_norm": 0.6102162599563599, "learning_rate": 0.0009008403361344538, "loss": 0.4485, "step": 3646 }, { "epoch": 2.0374301675977655, "grad_norm": 1.0556654930114746, "learning_rate": 0.000900812324929972, "loss": 0.4758, "step": 3647 }, { "epoch": 2.0379888268156425, "grad_norm": 0.48797985911369324, "learning_rate": 0.0009007843137254902, "loss": 0.4622, "step": 3648 }, { "epoch": 2.0385474860335195, "grad_norm": 0.5164204239845276, "learning_rate": 0.0009007563025210084, "loss": 0.459, "step": 3649 }, { "epoch": 2.0391061452513966, "grad_norm": 2.2609622478485107, "learning_rate": 0.0009007282913165266, "loss": 0.4979, "step": 3650 }, { "epoch": 2.0396648044692736, "grad_norm": 0.8742501139640808, "learning_rate": 0.0009007002801120448, "loss": 0.4722, "step": 3651 }, { "epoch": 2.040223463687151, "grad_norm": 2.1491634845733643, "learning_rate": 0.000900672268907563, "loss": 0.5696, "step": 3652 }, { "epoch": 2.040782122905028, "grad_norm": 0.6949541568756104, "learning_rate": 0.0009006442577030812, "loss": 0.4896, "step": 3653 }, { "epoch": 2.041340782122905, "grad_norm": 1.1042120456695557, "learning_rate": 0.0009006162464985994, "loss": 0.4089, "step": 3654 }, { "epoch": 2.041899441340782, "grad_norm": 0.5481662750244141, "learning_rate": 0.0009005882352941178, "loss": 0.479, "step": 3655 }, { "epoch": 2.042458100558659, "grad_norm": 0.4432147741317749, "learning_rate": 0.0009005602240896359, "loss": 0.4474, "step": 3656 }, { "epoch": 2.0430167597765365, "grad_norm": 0.711911141872406, "learning_rate": 0.0009005322128851541, "loss": 0.443, "step": 3657 }, { "epoch": 2.0435754189944135, "grad_norm": 0.6026064157485962, "learning_rate": 0.0009005042016806723, "loss": 0.4773, "step": 3658 }, { "epoch": 2.0441340782122905, "grad_norm": 0.966338038444519, "learning_rate": 0.0009004761904761905, "loss": 0.7091, "step": 3659 }, { "epoch": 2.0446927374301676, "grad_norm": 0.6239568591117859, "learning_rate": 0.0009004481792717088, "loss": 0.5048, "step": 3660 }, { "epoch": 2.0452513966480446, "grad_norm": 0.6432452201843262, "learning_rate": 0.0009004201680672269, "loss": 0.4432, "step": 3661 }, { "epoch": 2.0458100558659216, "grad_norm": 0.912550151348114, "learning_rate": 0.0009003921568627451, "loss": 0.526, "step": 3662 }, { "epoch": 2.046368715083799, "grad_norm": 0.8593716621398926, "learning_rate": 0.0009003641456582633, "loss": 0.4943, "step": 3663 }, { "epoch": 2.046927374301676, "grad_norm": 2.3145017623901367, "learning_rate": 0.0009003361344537815, "loss": 0.5077, "step": 3664 }, { "epoch": 2.047486033519553, "grad_norm": 2.300337553024292, "learning_rate": 0.0009003081232492998, "loss": 0.4057, "step": 3665 }, { "epoch": 2.04804469273743, "grad_norm": 0.6351850032806396, "learning_rate": 0.0009002801120448179, "loss": 0.4694, "step": 3666 }, { "epoch": 2.048603351955307, "grad_norm": 0.565089762210846, "learning_rate": 0.0009002521008403361, "loss": 0.4212, "step": 3667 }, { "epoch": 2.0491620111731845, "grad_norm": 2.1437771320343018, "learning_rate": 0.0009002240896358543, "loss": 0.4626, "step": 3668 }, { "epoch": 2.0497206703910615, "grad_norm": 6.8366804122924805, "learning_rate": 0.0009001960784313725, "loss": 0.5141, "step": 3669 }, { "epoch": 2.0502793296089385, "grad_norm": 0.561722457408905, "learning_rate": 0.0009001680672268908, "loss": 0.4529, "step": 3670 }, { "epoch": 2.0508379888268156, "grad_norm": 0.43533599376678467, "learning_rate": 0.000900140056022409, "loss": 0.4096, "step": 3671 }, { "epoch": 2.0513966480446926, "grad_norm": 1.6014506816864014, "learning_rate": 0.0009001120448179272, "loss": 0.5019, "step": 3672 }, { "epoch": 2.05195530726257, "grad_norm": 0.5009967088699341, "learning_rate": 0.0009000840336134454, "loss": 0.4794, "step": 3673 }, { "epoch": 2.052513966480447, "grad_norm": 2.5643579959869385, "learning_rate": 0.0009000560224089636, "loss": 0.5311, "step": 3674 }, { "epoch": 2.053072625698324, "grad_norm": 0.6453258991241455, "learning_rate": 0.0009000280112044819, "loss": 0.4993, "step": 3675 }, { "epoch": 2.053631284916201, "grad_norm": 0.3981837332248688, "learning_rate": 0.0009000000000000001, "loss": 0.5037, "step": 3676 }, { "epoch": 2.054189944134078, "grad_norm": 0.6156757473945618, "learning_rate": 0.0008999719887955182, "loss": 0.5961, "step": 3677 }, { "epoch": 2.054748603351955, "grad_norm": 0.44877707958221436, "learning_rate": 0.0008999439775910364, "loss": 0.3651, "step": 3678 }, { "epoch": 2.0553072625698325, "grad_norm": 0.5611476898193359, "learning_rate": 0.0008999159663865546, "loss": 0.448, "step": 3679 }, { "epoch": 2.0558659217877095, "grad_norm": 3.6825473308563232, "learning_rate": 0.0008998879551820729, "loss": 0.5328, "step": 3680 }, { "epoch": 2.0564245810055866, "grad_norm": 0.6583296656608582, "learning_rate": 0.0008998599439775911, "loss": 0.4755, "step": 3681 }, { "epoch": 2.0569832402234636, "grad_norm": 0.5341112017631531, "learning_rate": 0.0008998319327731092, "loss": 0.4938, "step": 3682 }, { "epoch": 2.0575418994413406, "grad_norm": 0.5836253762245178, "learning_rate": 0.0008998039215686274, "loss": 0.4253, "step": 3683 }, { "epoch": 2.058100558659218, "grad_norm": 0.7295217514038086, "learning_rate": 0.0008997759103641456, "loss": 0.6345, "step": 3684 }, { "epoch": 2.058659217877095, "grad_norm": 0.4508606493473053, "learning_rate": 0.0008997478991596639, "loss": 0.4422, "step": 3685 }, { "epoch": 2.059217877094972, "grad_norm": 0.6020694375038147, "learning_rate": 0.0008997198879551821, "loss": 0.5392, "step": 3686 }, { "epoch": 2.059776536312849, "grad_norm": 0.6957813501358032, "learning_rate": 0.0008996918767507003, "loss": 0.548, "step": 3687 }, { "epoch": 2.060335195530726, "grad_norm": 1.2248607873916626, "learning_rate": 0.0008996638655462184, "loss": 0.4601, "step": 3688 }, { "epoch": 2.0608938547486035, "grad_norm": 0.5103458166122437, "learning_rate": 0.0008996358543417367, "loss": 0.513, "step": 3689 }, { "epoch": 2.0614525139664805, "grad_norm": 0.4685027301311493, "learning_rate": 0.000899607843137255, "loss": 0.4959, "step": 3690 }, { "epoch": 2.0620111731843576, "grad_norm": 0.8616878390312195, "learning_rate": 0.0008995798319327732, "loss": 0.6087, "step": 3691 }, { "epoch": 2.0625698324022346, "grad_norm": 0.749208390712738, "learning_rate": 0.0008995518207282914, "loss": 0.4034, "step": 3692 }, { "epoch": 2.0631284916201116, "grad_norm": 0.4404391050338745, "learning_rate": 0.0008995238095238095, "loss": 0.4779, "step": 3693 }, { "epoch": 2.063687150837989, "grad_norm": 0.515535831451416, "learning_rate": 0.0008994957983193277, "loss": 0.4963, "step": 3694 }, { "epoch": 2.064245810055866, "grad_norm": 0.4829117953777313, "learning_rate": 0.000899467787114846, "loss": 0.3912, "step": 3695 }, { "epoch": 2.064804469273743, "grad_norm": 0.8293478488922119, "learning_rate": 0.0008994397759103642, "loss": 0.3592, "step": 3696 }, { "epoch": 2.06536312849162, "grad_norm": 3.508714437484741, "learning_rate": 0.0008994117647058824, "loss": 0.5117, "step": 3697 }, { "epoch": 2.065921787709497, "grad_norm": 0.937251627445221, "learning_rate": 0.0008993837535014005, "loss": 0.581, "step": 3698 }, { "epoch": 2.066480446927374, "grad_norm": 0.4724261462688446, "learning_rate": 0.0008993557422969187, "loss": 0.4653, "step": 3699 }, { "epoch": 2.0670391061452515, "grad_norm": 0.8045380115509033, "learning_rate": 0.000899327731092437, "loss": 0.4063, "step": 3700 }, { "epoch": 2.0675977653631286, "grad_norm": 0.44875216484069824, "learning_rate": 0.0008992997198879552, "loss": 0.4156, "step": 3701 }, { "epoch": 2.0681564245810056, "grad_norm": 6.698723793029785, "learning_rate": 0.0008992717086834734, "loss": 0.4371, "step": 3702 }, { "epoch": 2.0687150837988826, "grad_norm": 0.6368177533149719, "learning_rate": 0.0008992436974789916, "loss": 0.3801, "step": 3703 }, { "epoch": 2.0692737430167596, "grad_norm": 0.8219538927078247, "learning_rate": 0.0008992156862745097, "loss": 0.4772, "step": 3704 }, { "epoch": 2.069832402234637, "grad_norm": 0.9575294256210327, "learning_rate": 0.0008991876750700281, "loss": 0.4072, "step": 3705 }, { "epoch": 2.070391061452514, "grad_norm": 1.4106591939926147, "learning_rate": 0.0008991596638655463, "loss": 0.4239, "step": 3706 }, { "epoch": 2.070949720670391, "grad_norm": 0.7687220573425293, "learning_rate": 0.0008991316526610645, "loss": 0.5372, "step": 3707 }, { "epoch": 2.071508379888268, "grad_norm": 0.7370675206184387, "learning_rate": 0.0008991036414565827, "loss": 0.5408, "step": 3708 }, { "epoch": 2.072067039106145, "grad_norm": 0.470316618680954, "learning_rate": 0.0008990756302521008, "loss": 0.4622, "step": 3709 }, { "epoch": 2.0726256983240225, "grad_norm": 0.5752003192901611, "learning_rate": 0.0008990476190476191, "loss": 0.5363, "step": 3710 }, { "epoch": 2.0731843575418996, "grad_norm": 0.5439411401748657, "learning_rate": 0.0008990196078431373, "loss": 0.4332, "step": 3711 }, { "epoch": 2.0737430167597766, "grad_norm": 0.5157110095024109, "learning_rate": 0.0008989915966386555, "loss": 0.4944, "step": 3712 }, { "epoch": 2.0743016759776536, "grad_norm": 0.6001066565513611, "learning_rate": 0.0008989635854341737, "loss": 0.5176, "step": 3713 }, { "epoch": 2.0748603351955306, "grad_norm": 0.7983638644218445, "learning_rate": 0.0008989355742296918, "loss": 0.4992, "step": 3714 }, { "epoch": 2.0754189944134076, "grad_norm": 0.94972163438797, "learning_rate": 0.0008989075630252101, "loss": 0.4639, "step": 3715 }, { "epoch": 2.075977653631285, "grad_norm": 0.5934249758720398, "learning_rate": 0.0008988795518207283, "loss": 0.4983, "step": 3716 }, { "epoch": 2.076536312849162, "grad_norm": 0.6522203087806702, "learning_rate": 0.0008988515406162465, "loss": 0.4679, "step": 3717 }, { "epoch": 2.077094972067039, "grad_norm": 0.5073977112770081, "learning_rate": 0.0008988235294117647, "loss": 0.4703, "step": 3718 }, { "epoch": 2.077653631284916, "grad_norm": 0.6643916964530945, "learning_rate": 0.0008987955182072829, "loss": 0.4247, "step": 3719 }, { "epoch": 2.078212290502793, "grad_norm": 0.5237669944763184, "learning_rate": 0.0008987675070028011, "loss": 0.4487, "step": 3720 }, { "epoch": 2.0787709497206706, "grad_norm": 9.248260498046875, "learning_rate": 0.0008987394957983194, "loss": 0.4931, "step": 3721 }, { "epoch": 2.0793296089385476, "grad_norm": 0.7812570333480835, "learning_rate": 0.0008987114845938376, "loss": 0.4135, "step": 3722 }, { "epoch": 2.0798882681564246, "grad_norm": 0.7737398147583008, "learning_rate": 0.0008986834733893558, "loss": 0.3451, "step": 3723 }, { "epoch": 2.0804469273743016, "grad_norm": 0.5317307710647583, "learning_rate": 0.000898655462184874, "loss": 0.6042, "step": 3724 }, { "epoch": 2.0810055865921786, "grad_norm": 0.6720497012138367, "learning_rate": 0.0008986274509803922, "loss": 0.5637, "step": 3725 }, { "epoch": 2.081564245810056, "grad_norm": 0.5801661014556885, "learning_rate": 0.0008985994397759104, "loss": 0.4899, "step": 3726 }, { "epoch": 2.082122905027933, "grad_norm": 0.5745871067047119, "learning_rate": 0.0008985714285714286, "loss": 0.5712, "step": 3727 }, { "epoch": 2.08268156424581, "grad_norm": 0.5367339253425598, "learning_rate": 0.0008985434173669468, "loss": 0.4759, "step": 3728 }, { "epoch": 2.083240223463687, "grad_norm": 0.4944542646408081, "learning_rate": 0.000898515406162465, "loss": 0.542, "step": 3729 }, { "epoch": 2.083798882681564, "grad_norm": 0.5606075525283813, "learning_rate": 0.0008984873949579833, "loss": 0.4008, "step": 3730 }, { "epoch": 2.0843575418994416, "grad_norm": 5.330539703369141, "learning_rate": 0.0008984593837535014, "loss": 0.5064, "step": 3731 }, { "epoch": 2.0849162011173186, "grad_norm": 0.6617708802223206, "learning_rate": 0.0008984313725490196, "loss": 0.4042, "step": 3732 }, { "epoch": 2.0854748603351956, "grad_norm": 0.4983225166797638, "learning_rate": 0.0008984033613445378, "loss": 0.4525, "step": 3733 }, { "epoch": 2.0860335195530726, "grad_norm": 0.6001350283622742, "learning_rate": 0.000898375350140056, "loss": 0.4917, "step": 3734 }, { "epoch": 2.0865921787709496, "grad_norm": 2.2164862155914307, "learning_rate": 0.0008983473389355743, "loss": 0.5909, "step": 3735 }, { "epoch": 2.0871508379888266, "grad_norm": 0.5075696110725403, "learning_rate": 0.0008983193277310924, "loss": 0.3998, "step": 3736 }, { "epoch": 2.087709497206704, "grad_norm": 0.5509397387504578, "learning_rate": 0.0008982913165266106, "loss": 0.5552, "step": 3737 }, { "epoch": 2.088268156424581, "grad_norm": 0.48098692297935486, "learning_rate": 0.0008982633053221289, "loss": 0.4863, "step": 3738 }, { "epoch": 2.088826815642458, "grad_norm": 2.272890090942383, "learning_rate": 0.0008982352941176471, "loss": 0.5314, "step": 3739 }, { "epoch": 2.089385474860335, "grad_norm": 1.3531925678253174, "learning_rate": 0.0008982072829131654, "loss": 0.5998, "step": 3740 }, { "epoch": 2.089944134078212, "grad_norm": 0.8127819895744324, "learning_rate": 0.0008981792717086835, "loss": 0.4904, "step": 3741 }, { "epoch": 2.0905027932960896, "grad_norm": 0.9548804759979248, "learning_rate": 0.0008981512605042017, "loss": 0.4814, "step": 3742 }, { "epoch": 2.0910614525139666, "grad_norm": 0.5896094441413879, "learning_rate": 0.0008981232492997199, "loss": 0.4818, "step": 3743 }, { "epoch": 2.0916201117318436, "grad_norm": 0.4566092789173126, "learning_rate": 0.0008980952380952381, "loss": 0.3928, "step": 3744 }, { "epoch": 2.0921787709497206, "grad_norm": 0.528643786907196, "learning_rate": 0.0008980672268907564, "loss": 0.5536, "step": 3745 }, { "epoch": 2.0927374301675976, "grad_norm": 0.7363616228103638, "learning_rate": 0.0008980392156862746, "loss": 0.5224, "step": 3746 }, { "epoch": 2.093296089385475, "grad_norm": 0.6808377504348755, "learning_rate": 0.0008980112044817927, "loss": 0.4684, "step": 3747 }, { "epoch": 2.093854748603352, "grad_norm": 0.5341591238975525, "learning_rate": 0.0008979831932773109, "loss": 0.3539, "step": 3748 }, { "epoch": 2.094413407821229, "grad_norm": 0.40797972679138184, "learning_rate": 0.0008979551820728291, "loss": 0.4366, "step": 3749 }, { "epoch": 2.094972067039106, "grad_norm": 0.4666319489479065, "learning_rate": 0.0008979271708683474, "loss": 0.5171, "step": 3750 }, { "epoch": 2.095530726256983, "grad_norm": 1.6182719469070435, "learning_rate": 0.0008978991596638656, "loss": 0.4744, "step": 3751 }, { "epoch": 2.09608938547486, "grad_norm": 0.5935097932815552, "learning_rate": 0.0008978711484593837, "loss": 0.5552, "step": 3752 }, { "epoch": 2.0966480446927376, "grad_norm": 1.1712733507156372, "learning_rate": 0.0008978431372549019, "loss": 0.4601, "step": 3753 }, { "epoch": 2.0972067039106146, "grad_norm": 0.9645431041717529, "learning_rate": 0.0008978151260504202, "loss": 0.5203, "step": 3754 }, { "epoch": 2.0977653631284916, "grad_norm": 0.7179100513458252, "learning_rate": 0.0008977871148459385, "loss": 0.5173, "step": 3755 }, { "epoch": 2.0983240223463686, "grad_norm": 0.7007253170013428, "learning_rate": 0.0008977591036414567, "loss": 0.4079, "step": 3756 }, { "epoch": 2.0988826815642456, "grad_norm": 0.6294612288475037, "learning_rate": 0.0008977310924369748, "loss": 0.4309, "step": 3757 }, { "epoch": 2.099441340782123, "grad_norm": 0.4474851191043854, "learning_rate": 0.000897703081232493, "loss": 0.4335, "step": 3758 }, { "epoch": 2.1, "grad_norm": 1.6781375408172607, "learning_rate": 0.0008976750700280112, "loss": 0.59, "step": 3759 }, { "epoch": 2.100558659217877, "grad_norm": 0.5361772775650024, "learning_rate": 0.0008976470588235295, "loss": 0.5142, "step": 3760 }, { "epoch": 2.101117318435754, "grad_norm": 0.60182785987854, "learning_rate": 0.0008976190476190477, "loss": 0.4508, "step": 3761 }, { "epoch": 2.101675977653631, "grad_norm": 0.5234772562980652, "learning_rate": 0.0008975910364145659, "loss": 0.3858, "step": 3762 }, { "epoch": 2.1022346368715086, "grad_norm": 0.5795048475265503, "learning_rate": 0.000897563025210084, "loss": 0.4334, "step": 3763 }, { "epoch": 2.1027932960893856, "grad_norm": 0.5706015229225159, "learning_rate": 0.0008975350140056022, "loss": 0.5285, "step": 3764 }, { "epoch": 2.1033519553072626, "grad_norm": 0.3967292606830597, "learning_rate": 0.0008975070028011205, "loss": 0.4253, "step": 3765 }, { "epoch": 2.1039106145251396, "grad_norm": 0.5121884346008301, "learning_rate": 0.0008974789915966387, "loss": 0.4451, "step": 3766 }, { "epoch": 2.1044692737430166, "grad_norm": 0.6755759716033936, "learning_rate": 0.0008974509803921569, "loss": 0.5626, "step": 3767 }, { "epoch": 2.105027932960894, "grad_norm": 0.5974379181861877, "learning_rate": 0.000897422969187675, "loss": 0.4261, "step": 3768 }, { "epoch": 2.105586592178771, "grad_norm": 0.506280779838562, "learning_rate": 0.0008973949579831932, "loss": 0.664, "step": 3769 }, { "epoch": 2.106145251396648, "grad_norm": 2.147064208984375, "learning_rate": 0.0008973669467787116, "loss": 0.4534, "step": 3770 }, { "epoch": 2.106703910614525, "grad_norm": 0.4539654552936554, "learning_rate": 0.0008973389355742298, "loss": 0.4992, "step": 3771 }, { "epoch": 2.107262569832402, "grad_norm": 0.4924386143684387, "learning_rate": 0.000897310924369748, "loss": 0.4914, "step": 3772 }, { "epoch": 2.107821229050279, "grad_norm": 1.0393178462982178, "learning_rate": 0.0008972829131652661, "loss": 0.4184, "step": 3773 }, { "epoch": 2.1083798882681566, "grad_norm": 0.6019902229309082, "learning_rate": 0.0008972549019607843, "loss": 0.4533, "step": 3774 }, { "epoch": 2.1089385474860336, "grad_norm": 0.6919354796409607, "learning_rate": 0.0008972268907563026, "loss": 0.5453, "step": 3775 }, { "epoch": 2.1094972067039106, "grad_norm": 0.4117518365383148, "learning_rate": 0.0008971988795518208, "loss": 0.4395, "step": 3776 }, { "epoch": 2.1100558659217876, "grad_norm": 0.47695526480674744, "learning_rate": 0.000897170868347339, "loss": 0.4572, "step": 3777 }, { "epoch": 2.1106145251396646, "grad_norm": 0.5436705350875854, "learning_rate": 0.0008971428571428572, "loss": 0.4931, "step": 3778 }, { "epoch": 2.111173184357542, "grad_norm": 0.5723715424537659, "learning_rate": 0.0008971148459383753, "loss": 0.5657, "step": 3779 }, { "epoch": 2.111731843575419, "grad_norm": 0.5120642185211182, "learning_rate": 0.0008970868347338936, "loss": 0.3819, "step": 3780 }, { "epoch": 2.112290502793296, "grad_norm": 0.683992862701416, "learning_rate": 0.0008970588235294118, "loss": 0.4697, "step": 3781 }, { "epoch": 2.112849162011173, "grad_norm": 0.5189375281333923, "learning_rate": 0.00089703081232493, "loss": 0.4441, "step": 3782 }, { "epoch": 2.11340782122905, "grad_norm": 0.634335994720459, "learning_rate": 0.0008970028011204482, "loss": 0.522, "step": 3783 }, { "epoch": 2.1139664804469276, "grad_norm": 0.5401212573051453, "learning_rate": 0.0008969747899159663, "loss": 0.4753, "step": 3784 }, { "epoch": 2.1145251396648046, "grad_norm": 0.6072887182235718, "learning_rate": 0.0008969467787114846, "loss": 0.4758, "step": 3785 }, { "epoch": 2.1150837988826816, "grad_norm": 5.551515579223633, "learning_rate": 0.0008969187675070029, "loss": 0.4116, "step": 3786 }, { "epoch": 2.1156424581005586, "grad_norm": 0.4808572232723236, "learning_rate": 0.0008968907563025211, "loss": 0.4224, "step": 3787 }, { "epoch": 2.1162011173184356, "grad_norm": 0.45117831230163574, "learning_rate": 0.0008968627450980393, "loss": 0.507, "step": 3788 }, { "epoch": 2.1167597765363126, "grad_norm": 0.4961342215538025, "learning_rate": 0.0008968347338935574, "loss": 0.4282, "step": 3789 }, { "epoch": 2.11731843575419, "grad_norm": 1.128710150718689, "learning_rate": 0.0008968067226890756, "loss": 0.4983, "step": 3790 }, { "epoch": 2.117877094972067, "grad_norm": 0.8296599388122559, "learning_rate": 0.0008967787114845939, "loss": 0.5818, "step": 3791 }, { "epoch": 2.118435754189944, "grad_norm": 0.6532965302467346, "learning_rate": 0.0008967507002801121, "loss": 0.4443, "step": 3792 }, { "epoch": 2.118994413407821, "grad_norm": 2.0979795455932617, "learning_rate": 0.0008967226890756303, "loss": 0.4998, "step": 3793 }, { "epoch": 2.119553072625698, "grad_norm": 3.302300214767456, "learning_rate": 0.0008966946778711485, "loss": 0.447, "step": 3794 }, { "epoch": 2.1201117318435756, "grad_norm": 0.5424798727035522, "learning_rate": 0.0008966666666666666, "loss": 0.543, "step": 3795 }, { "epoch": 2.1206703910614526, "grad_norm": 0.6697589755058289, "learning_rate": 0.0008966386554621849, "loss": 0.5218, "step": 3796 }, { "epoch": 2.1212290502793296, "grad_norm": 0.6120874881744385, "learning_rate": 0.0008966106442577031, "loss": 0.4499, "step": 3797 }, { "epoch": 2.1217877094972066, "grad_norm": 0.5173333287239075, "learning_rate": 0.0008965826330532213, "loss": 0.3973, "step": 3798 }, { "epoch": 2.1223463687150836, "grad_norm": 0.4179285764694214, "learning_rate": 0.0008965546218487395, "loss": 0.5076, "step": 3799 }, { "epoch": 2.122905027932961, "grad_norm": 0.5040994882583618, "learning_rate": 0.0008965266106442576, "loss": 0.527, "step": 3800 }, { "epoch": 2.123463687150838, "grad_norm": 0.38026806712150574, "learning_rate": 0.0008964985994397759, "loss": 0.47, "step": 3801 }, { "epoch": 2.124022346368715, "grad_norm": 0.7907775640487671, "learning_rate": 0.0008964705882352941, "loss": 0.4527, "step": 3802 }, { "epoch": 2.124581005586592, "grad_norm": 0.6088317632675171, "learning_rate": 0.0008964425770308124, "loss": 0.4411, "step": 3803 }, { "epoch": 2.125139664804469, "grad_norm": 0.45487162470817566, "learning_rate": 0.0008964145658263306, "loss": 0.4223, "step": 3804 }, { "epoch": 2.1256983240223466, "grad_norm": 0.5422521829605103, "learning_rate": 0.0008963865546218487, "loss": 0.5266, "step": 3805 }, { "epoch": 2.1262569832402236, "grad_norm": 0.5822058320045471, "learning_rate": 0.000896358543417367, "loss": 0.441, "step": 3806 }, { "epoch": 2.1268156424581006, "grad_norm": 0.47583141922950745, "learning_rate": 0.0008963305322128852, "loss": 0.4818, "step": 3807 }, { "epoch": 2.1273743016759776, "grad_norm": 0.6223709583282471, "learning_rate": 0.0008963025210084034, "loss": 0.4252, "step": 3808 }, { "epoch": 2.1279329608938546, "grad_norm": 0.8416194319725037, "learning_rate": 0.0008962745098039216, "loss": 0.6117, "step": 3809 }, { "epoch": 2.1284916201117317, "grad_norm": 1.0066306591033936, "learning_rate": 0.0008962464985994398, "loss": 0.5445, "step": 3810 }, { "epoch": 2.129050279329609, "grad_norm": 0.6163341999053955, "learning_rate": 0.000896218487394958, "loss": 0.5861, "step": 3811 }, { "epoch": 2.129608938547486, "grad_norm": 0.5084480047225952, "learning_rate": 0.0008961904761904762, "loss": 0.4558, "step": 3812 }, { "epoch": 2.130167597765363, "grad_norm": 0.447477787733078, "learning_rate": 0.0008961624649859944, "loss": 0.4645, "step": 3813 }, { "epoch": 2.13072625698324, "grad_norm": 0.5391470789909363, "learning_rate": 0.0008961344537815126, "loss": 0.4588, "step": 3814 }, { "epoch": 2.131284916201117, "grad_norm": 0.5313900709152222, "learning_rate": 0.0008961064425770308, "loss": 0.5352, "step": 3815 }, { "epoch": 2.1318435754189946, "grad_norm": 0.5331398844718933, "learning_rate": 0.000896078431372549, "loss": 0.4543, "step": 3816 }, { "epoch": 2.1324022346368716, "grad_norm": 0.5167649984359741, "learning_rate": 0.0008960504201680672, "loss": 0.5248, "step": 3817 }, { "epoch": 2.1329608938547486, "grad_norm": 2.74029541015625, "learning_rate": 0.0008960224089635854, "loss": 0.6056, "step": 3818 }, { "epoch": 2.1335195530726256, "grad_norm": 2.7519335746765137, "learning_rate": 0.0008959943977591036, "loss": 0.6211, "step": 3819 }, { "epoch": 2.1340782122905027, "grad_norm": 0.5218809247016907, "learning_rate": 0.0008959663865546219, "loss": 0.5768, "step": 3820 }, { "epoch": 2.1346368715083797, "grad_norm": 0.5395709872245789, "learning_rate": 0.0008959383753501401, "loss": 0.5176, "step": 3821 }, { "epoch": 2.135195530726257, "grad_norm": 0.49308541417121887, "learning_rate": 0.0008959103641456583, "loss": 0.4127, "step": 3822 }, { "epoch": 2.135754189944134, "grad_norm": 0.6129255890846252, "learning_rate": 0.0008958823529411765, "loss": 0.6359, "step": 3823 }, { "epoch": 2.136312849162011, "grad_norm": 0.5810296535491943, "learning_rate": 0.0008958543417366947, "loss": 0.3593, "step": 3824 }, { "epoch": 2.136871508379888, "grad_norm": 1.3670779466629028, "learning_rate": 0.0008958263305322129, "loss": 0.4384, "step": 3825 }, { "epoch": 2.137430167597765, "grad_norm": 0.4468787908554077, "learning_rate": 0.0008957983193277312, "loss": 0.3445, "step": 3826 }, { "epoch": 2.1379888268156426, "grad_norm": 0.47791972756385803, "learning_rate": 0.0008957703081232493, "loss": 0.4176, "step": 3827 }, { "epoch": 2.1385474860335196, "grad_norm": 0.8680862784385681, "learning_rate": 0.0008957422969187675, "loss": 0.4593, "step": 3828 }, { "epoch": 2.1391061452513966, "grad_norm": 0.47572362422943115, "learning_rate": 0.0008957142857142857, "loss": 0.5265, "step": 3829 }, { "epoch": 2.1396648044692737, "grad_norm": 0.6244614124298096, "learning_rate": 0.0008956862745098039, "loss": 0.4254, "step": 3830 }, { "epoch": 2.1402234636871507, "grad_norm": 0.7154577970504761, "learning_rate": 0.0008956582633053222, "loss": 0.4698, "step": 3831 }, { "epoch": 2.140782122905028, "grad_norm": 0.7574757933616638, "learning_rate": 0.0008956302521008403, "loss": 0.4249, "step": 3832 }, { "epoch": 2.141340782122905, "grad_norm": 0.5189238786697388, "learning_rate": 0.0008956022408963585, "loss": 0.4772, "step": 3833 }, { "epoch": 2.141899441340782, "grad_norm": 3.430995225906372, "learning_rate": 0.0008955742296918767, "loss": 0.4976, "step": 3834 }, { "epoch": 2.142458100558659, "grad_norm": 0.6838756799697876, "learning_rate": 0.0008955462184873949, "loss": 0.5701, "step": 3835 }, { "epoch": 2.143016759776536, "grad_norm": 0.5704289078712463, "learning_rate": 0.0008955182072829133, "loss": 0.4228, "step": 3836 }, { "epoch": 2.1435754189944136, "grad_norm": 0.4766763746738434, "learning_rate": 0.0008954901960784314, "loss": 0.5659, "step": 3837 }, { "epoch": 2.1441340782122906, "grad_norm": 0.8810789585113525, "learning_rate": 0.0008954621848739496, "loss": 0.5164, "step": 3838 }, { "epoch": 2.1446927374301676, "grad_norm": 0.4730139970779419, "learning_rate": 0.0008954341736694678, "loss": 0.4843, "step": 3839 }, { "epoch": 2.1452513966480447, "grad_norm": 0.7862569093704224, "learning_rate": 0.000895406162464986, "loss": 0.4956, "step": 3840 }, { "epoch": 2.1458100558659217, "grad_norm": 5.963804244995117, "learning_rate": 0.0008953781512605043, "loss": 0.4541, "step": 3841 }, { "epoch": 2.146368715083799, "grad_norm": 0.562130331993103, "learning_rate": 0.0008953501400560225, "loss": 0.4078, "step": 3842 }, { "epoch": 2.146927374301676, "grad_norm": 0.5721172094345093, "learning_rate": 0.0008953221288515406, "loss": 0.5345, "step": 3843 }, { "epoch": 2.147486033519553, "grad_norm": 0.67780601978302, "learning_rate": 0.0008952941176470588, "loss": 0.4785, "step": 3844 }, { "epoch": 2.14804469273743, "grad_norm": 0.45617246627807617, "learning_rate": 0.000895266106442577, "loss": 0.4376, "step": 3845 }, { "epoch": 2.148603351955307, "grad_norm": 0.43296343088150024, "learning_rate": 0.0008952380952380953, "loss": 0.4024, "step": 3846 }, { "epoch": 2.149162011173184, "grad_norm": 0.5535196661949158, "learning_rate": 0.0008952100840336135, "loss": 0.43, "step": 3847 }, { "epoch": 2.1497206703910616, "grad_norm": 0.6729576587677002, "learning_rate": 0.0008951820728291316, "loss": 0.475, "step": 3848 }, { "epoch": 2.1502793296089386, "grad_norm": 0.5174770355224609, "learning_rate": 0.0008951540616246498, "loss": 0.459, "step": 3849 }, { "epoch": 2.1508379888268156, "grad_norm": 1.0330302715301514, "learning_rate": 0.000895126050420168, "loss": 0.5393, "step": 3850 }, { "epoch": 2.1513966480446927, "grad_norm": 0.4281381666660309, "learning_rate": 0.0008950980392156863, "loss": 0.3785, "step": 3851 }, { "epoch": 2.1519553072625697, "grad_norm": 0.8313674926757812, "learning_rate": 0.0008950700280112046, "loss": 0.4172, "step": 3852 }, { "epoch": 2.152513966480447, "grad_norm": 1.3019518852233887, "learning_rate": 0.0008950420168067227, "loss": 0.4977, "step": 3853 }, { "epoch": 2.153072625698324, "grad_norm": 2.2441890239715576, "learning_rate": 0.0008950140056022409, "loss": 0.5788, "step": 3854 }, { "epoch": 2.153631284916201, "grad_norm": 0.8006486892700195, "learning_rate": 0.0008949859943977591, "loss": 0.496, "step": 3855 }, { "epoch": 2.154189944134078, "grad_norm": 0.5741393566131592, "learning_rate": 0.0008949579831932774, "loss": 0.5004, "step": 3856 }, { "epoch": 2.154748603351955, "grad_norm": 0.7082931399345398, "learning_rate": 0.0008949299719887956, "loss": 0.4286, "step": 3857 }, { "epoch": 2.155307262569832, "grad_norm": 17.997554779052734, "learning_rate": 0.0008949019607843138, "loss": 0.366, "step": 3858 }, { "epoch": 2.1558659217877096, "grad_norm": 3.6697189807891846, "learning_rate": 0.0008948739495798319, "loss": 0.5159, "step": 3859 }, { "epoch": 2.1564245810055866, "grad_norm": 1.1577590703964233, "learning_rate": 0.0008948459383753501, "loss": 0.4831, "step": 3860 }, { "epoch": 2.1569832402234637, "grad_norm": 0.8880429267883301, "learning_rate": 0.0008948179271708684, "loss": 0.59, "step": 3861 }, { "epoch": 2.1575418994413407, "grad_norm": 3.190842628479004, "learning_rate": 0.0008947899159663866, "loss": 0.6169, "step": 3862 }, { "epoch": 2.1581005586592177, "grad_norm": 1.0041362047195435, "learning_rate": 0.0008947619047619048, "loss": 0.5001, "step": 3863 }, { "epoch": 2.158659217877095, "grad_norm": 0.629850447177887, "learning_rate": 0.0008947338935574229, "loss": 0.5899, "step": 3864 }, { "epoch": 2.159217877094972, "grad_norm": 1.105079174041748, "learning_rate": 0.0008947058823529411, "loss": 0.4194, "step": 3865 }, { "epoch": 2.159776536312849, "grad_norm": 1.2017194032669067, "learning_rate": 0.0008946778711484594, "loss": 0.5322, "step": 3866 }, { "epoch": 2.160335195530726, "grad_norm": 0.8280863165855408, "learning_rate": 0.0008946498599439776, "loss": 0.5519, "step": 3867 }, { "epoch": 2.160893854748603, "grad_norm": 2.054886817932129, "learning_rate": 0.0008946218487394959, "loss": 0.5436, "step": 3868 }, { "epoch": 2.1614525139664806, "grad_norm": 0.6127734184265137, "learning_rate": 0.000894593837535014, "loss": 0.5537, "step": 3869 }, { "epoch": 2.1620111731843576, "grad_norm": 0.6851430535316467, "learning_rate": 0.0008945658263305322, "loss": 0.4636, "step": 3870 }, { "epoch": 2.1625698324022347, "grad_norm": 0.6148260831832886, "learning_rate": 0.0008945378151260505, "loss": 0.373, "step": 3871 }, { "epoch": 2.1631284916201117, "grad_norm": 0.5657532215118408, "learning_rate": 0.0008945098039215687, "loss": 0.603, "step": 3872 }, { "epoch": 2.1636871508379887, "grad_norm": 0.6626763939857483, "learning_rate": 0.0008944817927170869, "loss": 0.5025, "step": 3873 }, { "epoch": 2.164245810055866, "grad_norm": 0.41087618470191956, "learning_rate": 0.0008944537815126051, "loss": 0.3988, "step": 3874 }, { "epoch": 2.164804469273743, "grad_norm": 0.6639741659164429, "learning_rate": 0.0008944257703081232, "loss": 0.4227, "step": 3875 }, { "epoch": 2.16536312849162, "grad_norm": 0.4683172106742859, "learning_rate": 0.0008943977591036415, "loss": 0.4732, "step": 3876 }, { "epoch": 2.165921787709497, "grad_norm": 0.47243303060531616, "learning_rate": 0.0008943697478991597, "loss": 0.3917, "step": 3877 }, { "epoch": 2.166480446927374, "grad_norm": 0.7888584136962891, "learning_rate": 0.0008943417366946779, "loss": 0.6877, "step": 3878 }, { "epoch": 2.167039106145251, "grad_norm": 0.5306342244148254, "learning_rate": 0.0008943137254901961, "loss": 0.4708, "step": 3879 }, { "epoch": 2.1675977653631286, "grad_norm": 0.5245224833488464, "learning_rate": 0.0008942857142857142, "loss": 0.4673, "step": 3880 }, { "epoch": 2.1681564245810057, "grad_norm": 0.6621931791305542, "learning_rate": 0.0008942577030812325, "loss": 0.5821, "step": 3881 }, { "epoch": 2.1687150837988827, "grad_norm": 0.5790925025939941, "learning_rate": 0.0008942296918767507, "loss": 0.4347, "step": 3882 }, { "epoch": 2.1692737430167597, "grad_norm": 0.5362594127655029, "learning_rate": 0.0008942016806722689, "loss": 0.531, "step": 3883 }, { "epoch": 2.1698324022346367, "grad_norm": 0.761273980140686, "learning_rate": 0.0008941736694677871, "loss": 0.4826, "step": 3884 }, { "epoch": 2.170391061452514, "grad_norm": 0.6512978672981262, "learning_rate": 0.0008941456582633052, "loss": 0.3859, "step": 3885 }, { "epoch": 2.170949720670391, "grad_norm": 0.546739399433136, "learning_rate": 0.0008941176470588236, "loss": 0.5693, "step": 3886 }, { "epoch": 2.171508379888268, "grad_norm": 0.5866378545761108, "learning_rate": 0.0008940896358543418, "loss": 0.5104, "step": 3887 }, { "epoch": 2.172067039106145, "grad_norm": 0.4839584231376648, "learning_rate": 0.00089406162464986, "loss": 0.4134, "step": 3888 }, { "epoch": 2.172625698324022, "grad_norm": 1.3358453512191772, "learning_rate": 0.0008940336134453782, "loss": 0.4414, "step": 3889 }, { "epoch": 2.1731843575418996, "grad_norm": 0.529212474822998, "learning_rate": 0.0008940056022408964, "loss": 0.4399, "step": 3890 }, { "epoch": 2.1737430167597767, "grad_norm": 0.6501895189285278, "learning_rate": 0.0008939775910364146, "loss": 0.4201, "step": 3891 }, { "epoch": 2.1743016759776537, "grad_norm": 0.6130054593086243, "learning_rate": 0.0008939495798319328, "loss": 0.4556, "step": 3892 }, { "epoch": 2.1748603351955307, "grad_norm": 0.510717511177063, "learning_rate": 0.000893921568627451, "loss": 0.4676, "step": 3893 }, { "epoch": 2.1754189944134077, "grad_norm": 0.7903940677642822, "learning_rate": 0.0008938935574229692, "loss": 0.459, "step": 3894 }, { "epoch": 2.1759776536312847, "grad_norm": 0.46124550700187683, "learning_rate": 0.0008938655462184874, "loss": 0.4928, "step": 3895 }, { "epoch": 2.176536312849162, "grad_norm": 2.1835978031158447, "learning_rate": 0.0008938375350140056, "loss": 0.669, "step": 3896 }, { "epoch": 2.177094972067039, "grad_norm": 1.0175588130950928, "learning_rate": 0.0008938095238095238, "loss": 0.4085, "step": 3897 }, { "epoch": 2.177653631284916, "grad_norm": 0.813089907169342, "learning_rate": 0.000893781512605042, "loss": 0.6075, "step": 3898 }, { "epoch": 2.178212290502793, "grad_norm": 0.4249681234359741, "learning_rate": 0.0008937535014005602, "loss": 0.4756, "step": 3899 }, { "epoch": 2.17877094972067, "grad_norm": 2.335358142852783, "learning_rate": 0.0008937254901960784, "loss": 0.3813, "step": 3900 }, { "epoch": 2.1793296089385477, "grad_norm": 0.6547650694847107, "learning_rate": 0.0008936974789915966, "loss": 0.5097, "step": 3901 }, { "epoch": 2.1798882681564247, "grad_norm": 0.45946362614631653, "learning_rate": 0.0008936694677871149, "loss": 0.4532, "step": 3902 }, { "epoch": 2.1804469273743017, "grad_norm": 0.6938641667366028, "learning_rate": 0.0008936414565826331, "loss": 0.4465, "step": 3903 }, { "epoch": 2.1810055865921787, "grad_norm": 0.4258013665676117, "learning_rate": 0.0008936134453781513, "loss": 0.4379, "step": 3904 }, { "epoch": 2.1815642458100557, "grad_norm": 0.7088890075683594, "learning_rate": 0.0008935854341736695, "loss": 0.4482, "step": 3905 }, { "epoch": 2.182122905027933, "grad_norm": 0.6079127192497253, "learning_rate": 0.0008935574229691878, "loss": 0.45, "step": 3906 }, { "epoch": 2.18268156424581, "grad_norm": 0.8777434825897217, "learning_rate": 0.0008935294117647059, "loss": 0.495, "step": 3907 }, { "epoch": 2.183240223463687, "grad_norm": 3.983078956604004, "learning_rate": 0.0008935014005602241, "loss": 0.6685, "step": 3908 }, { "epoch": 2.183798882681564, "grad_norm": 0.49473270773887634, "learning_rate": 0.0008934733893557423, "loss": 0.4859, "step": 3909 }, { "epoch": 2.184357541899441, "grad_norm": 0.42636168003082275, "learning_rate": 0.0008934453781512605, "loss": 0.3903, "step": 3910 }, { "epoch": 2.1849162011173187, "grad_norm": 0.5656536221504211, "learning_rate": 0.0008934173669467788, "loss": 0.4414, "step": 3911 }, { "epoch": 2.1854748603351957, "grad_norm": 0.6124873757362366, "learning_rate": 0.0008933893557422969, "loss": 0.5598, "step": 3912 }, { "epoch": 2.1860335195530727, "grad_norm": 0.5090885162353516, "learning_rate": 0.0008933613445378151, "loss": 0.4544, "step": 3913 }, { "epoch": 2.1865921787709497, "grad_norm": 0.6983973979949951, "learning_rate": 0.0008933333333333333, "loss": 0.5386, "step": 3914 }, { "epoch": 2.1871508379888267, "grad_norm": 1.056702733039856, "learning_rate": 0.0008933053221288515, "loss": 0.5133, "step": 3915 }, { "epoch": 2.1877094972067037, "grad_norm": 0.4468769133090973, "learning_rate": 0.0008932773109243698, "loss": 0.4828, "step": 3916 }, { "epoch": 2.188268156424581, "grad_norm": 0.665208637714386, "learning_rate": 0.0008932492997198879, "loss": 0.5185, "step": 3917 }, { "epoch": 2.188826815642458, "grad_norm": 1.4855750799179077, "learning_rate": 0.0008932212885154062, "loss": 0.5367, "step": 3918 }, { "epoch": 2.189385474860335, "grad_norm": 0.5110841393470764, "learning_rate": 0.0008931932773109244, "loss": 0.5942, "step": 3919 }, { "epoch": 2.189944134078212, "grad_norm": 0.48792195320129395, "learning_rate": 0.0008931652661064426, "loss": 0.6568, "step": 3920 }, { "epoch": 2.190502793296089, "grad_norm": 0.9303848743438721, "learning_rate": 0.0008931372549019609, "loss": 0.4585, "step": 3921 }, { "epoch": 2.1910614525139667, "grad_norm": 0.5885030031204224, "learning_rate": 0.0008931092436974791, "loss": 0.3972, "step": 3922 }, { "epoch": 2.1916201117318437, "grad_norm": 0.4627387821674347, "learning_rate": 0.0008930812324929972, "loss": 0.3675, "step": 3923 }, { "epoch": 2.1921787709497207, "grad_norm": 0.6177243590354919, "learning_rate": 0.0008930532212885154, "loss": 0.3952, "step": 3924 }, { "epoch": 2.1927374301675977, "grad_norm": 7.05940580368042, "learning_rate": 0.0008930252100840336, "loss": 0.4125, "step": 3925 }, { "epoch": 2.1932960893854747, "grad_norm": 0.6348626613616943, "learning_rate": 0.0008929971988795519, "loss": 0.4596, "step": 3926 }, { "epoch": 2.1938547486033517, "grad_norm": 0.8002322912216187, "learning_rate": 0.0008929691876750701, "loss": 0.4804, "step": 3927 }, { "epoch": 2.194413407821229, "grad_norm": 1.0392259359359741, "learning_rate": 0.0008929411764705882, "loss": 0.4416, "step": 3928 }, { "epoch": 2.194972067039106, "grad_norm": 0.43021318316459656, "learning_rate": 0.0008929131652661064, "loss": 0.3832, "step": 3929 }, { "epoch": 2.195530726256983, "grad_norm": 0.8332733511924744, "learning_rate": 0.0008928851540616246, "loss": 0.4439, "step": 3930 }, { "epoch": 2.19608938547486, "grad_norm": 0.4537784159183502, "learning_rate": 0.0008928571428571429, "loss": 0.4118, "step": 3931 }, { "epoch": 2.1966480446927372, "grad_norm": 0.6397614479064941, "learning_rate": 0.0008928291316526611, "loss": 0.6567, "step": 3932 }, { "epoch": 2.1972067039106147, "grad_norm": 0.613127589225769, "learning_rate": 0.0008928011204481792, "loss": 0.4477, "step": 3933 }, { "epoch": 2.1977653631284917, "grad_norm": 0.4283999502658844, "learning_rate": 0.0008927731092436974, "loss": 0.4176, "step": 3934 }, { "epoch": 2.1983240223463687, "grad_norm": 0.9790128469467163, "learning_rate": 0.0008927450980392157, "loss": 0.397, "step": 3935 }, { "epoch": 2.1988826815642457, "grad_norm": 0.5619804263114929, "learning_rate": 0.000892717086834734, "loss": 0.5685, "step": 3936 }, { "epoch": 2.1994413407821227, "grad_norm": 0.6304671764373779, "learning_rate": 0.0008926890756302522, "loss": 0.5531, "step": 3937 }, { "epoch": 2.2, "grad_norm": 0.5057224631309509, "learning_rate": 0.0008926610644257704, "loss": 0.5876, "step": 3938 }, { "epoch": 2.200558659217877, "grad_norm": 0.5985106825828552, "learning_rate": 0.0008926330532212885, "loss": 0.7314, "step": 3939 }, { "epoch": 2.201117318435754, "grad_norm": 0.8729943633079529, "learning_rate": 0.0008926050420168067, "loss": 0.551, "step": 3940 }, { "epoch": 2.201675977653631, "grad_norm": 0.47301408648490906, "learning_rate": 0.000892577030812325, "loss": 0.433, "step": 3941 }, { "epoch": 2.2022346368715082, "grad_norm": 0.5290800333023071, "learning_rate": 0.0008925490196078432, "loss": 0.7571, "step": 3942 }, { "epoch": 2.2027932960893857, "grad_norm": 0.9559956192970276, "learning_rate": 0.0008925210084033614, "loss": 0.4057, "step": 3943 }, { "epoch": 2.2033519553072627, "grad_norm": 0.6105080842971802, "learning_rate": 0.0008924929971988795, "loss": 0.4293, "step": 3944 }, { "epoch": 2.2039106145251397, "grad_norm": 1.232312560081482, "learning_rate": 0.0008924649859943977, "loss": 0.4993, "step": 3945 }, { "epoch": 2.2044692737430167, "grad_norm": 0.8616457581520081, "learning_rate": 0.000892436974789916, "loss": 0.5239, "step": 3946 }, { "epoch": 2.2050279329608937, "grad_norm": 0.7790232300758362, "learning_rate": 0.0008924089635854342, "loss": 0.5504, "step": 3947 }, { "epoch": 2.205586592178771, "grad_norm": 1.1775554418563843, "learning_rate": 0.0008923809523809524, "loss": 0.4108, "step": 3948 }, { "epoch": 2.206145251396648, "grad_norm": 0.7443337440490723, "learning_rate": 0.0008923529411764705, "loss": 0.6079, "step": 3949 }, { "epoch": 2.206703910614525, "grad_norm": 1.3190522193908691, "learning_rate": 0.0008923249299719887, "loss": 0.527, "step": 3950 }, { "epoch": 2.207262569832402, "grad_norm": 0.5428374409675598, "learning_rate": 0.0008922969187675071, "loss": 0.5548, "step": 3951 }, { "epoch": 2.207821229050279, "grad_norm": 0.6532554030418396, "learning_rate": 0.0008922689075630253, "loss": 0.6963, "step": 3952 }, { "epoch": 2.2083798882681562, "grad_norm": 0.888231635093689, "learning_rate": 0.0008922408963585435, "loss": 0.5792, "step": 3953 }, { "epoch": 2.2089385474860337, "grad_norm": 0.4321534037590027, "learning_rate": 0.0008922128851540617, "loss": 0.4117, "step": 3954 }, { "epoch": 2.2094972067039107, "grad_norm": 0.5281447768211365, "learning_rate": 0.0008921848739495798, "loss": 0.5289, "step": 3955 }, { "epoch": 2.2100558659217877, "grad_norm": 1.008543848991394, "learning_rate": 0.0008921568627450981, "loss": 0.4983, "step": 3956 }, { "epoch": 2.2106145251396647, "grad_norm": 2.2757229804992676, "learning_rate": 0.0008921288515406163, "loss": 0.4311, "step": 3957 }, { "epoch": 2.2111731843575417, "grad_norm": 1.3314635753631592, "learning_rate": 0.0008921008403361345, "loss": 0.5646, "step": 3958 }, { "epoch": 2.211731843575419, "grad_norm": 0.6620938181877136, "learning_rate": 0.0008920728291316527, "loss": 0.4986, "step": 3959 }, { "epoch": 2.212290502793296, "grad_norm": 1.910640835762024, "learning_rate": 0.0008920448179271708, "loss": 0.6316, "step": 3960 }, { "epoch": 2.212849162011173, "grad_norm": 0.7703306078910828, "learning_rate": 0.0008920168067226891, "loss": 0.424, "step": 3961 }, { "epoch": 2.21340782122905, "grad_norm": 0.5946208238601685, "learning_rate": 0.0008919887955182073, "loss": 0.5351, "step": 3962 }, { "epoch": 2.2139664804469272, "grad_norm": 0.9763235449790955, "learning_rate": 0.0008919607843137255, "loss": 0.4968, "step": 3963 }, { "epoch": 2.2145251396648042, "grad_norm": 7.759881496429443, "learning_rate": 0.0008919327731092437, "loss": 0.4845, "step": 3964 }, { "epoch": 2.2150837988826817, "grad_norm": 0.6663423776626587, "learning_rate": 0.0008919047619047618, "loss": 0.4836, "step": 3965 }, { "epoch": 2.2156424581005587, "grad_norm": 0.7887842655181885, "learning_rate": 0.0008918767507002801, "loss": 0.4292, "step": 3966 }, { "epoch": 2.2162011173184357, "grad_norm": 0.570859432220459, "learning_rate": 0.0008918487394957984, "loss": 0.539, "step": 3967 }, { "epoch": 2.2167597765363127, "grad_norm": 0.7126721739768982, "learning_rate": 0.0008918207282913166, "loss": 0.4837, "step": 3968 }, { "epoch": 2.2173184357541897, "grad_norm": 0.5668030977249146, "learning_rate": 0.0008917927170868348, "loss": 0.5257, "step": 3969 }, { "epoch": 2.217877094972067, "grad_norm": 0.41417011618614197, "learning_rate": 0.000891764705882353, "loss": 0.3347, "step": 3970 }, { "epoch": 2.218435754189944, "grad_norm": 0.4726235270500183, "learning_rate": 0.0008917366946778712, "loss": 0.5055, "step": 3971 }, { "epoch": 2.218994413407821, "grad_norm": 0.44764265418052673, "learning_rate": 0.0008917086834733894, "loss": 0.4741, "step": 3972 }, { "epoch": 2.2195530726256982, "grad_norm": 0.4934897720813751, "learning_rate": 0.0008916806722689076, "loss": 0.3788, "step": 3973 }, { "epoch": 2.2201117318435752, "grad_norm": 0.6134538054466248, "learning_rate": 0.0008916526610644258, "loss": 0.6054, "step": 3974 }, { "epoch": 2.2206703910614527, "grad_norm": 8.542625427246094, "learning_rate": 0.000891624649859944, "loss": 0.5029, "step": 3975 }, { "epoch": 2.2212290502793297, "grad_norm": 0.4735102653503418, "learning_rate": 0.0008915966386554622, "loss": 0.5312, "step": 3976 }, { "epoch": 2.2217877094972067, "grad_norm": 0.8623790740966797, "learning_rate": 0.0008915686274509804, "loss": 0.4552, "step": 3977 }, { "epoch": 2.2223463687150837, "grad_norm": 0.8594672679901123, "learning_rate": 0.0008915406162464986, "loss": 0.7722, "step": 3978 }, { "epoch": 2.2229050279329607, "grad_norm": 1.4279512166976929, "learning_rate": 0.0008915126050420168, "loss": 0.5103, "step": 3979 }, { "epoch": 2.223463687150838, "grad_norm": 0.8403601050376892, "learning_rate": 0.000891484593837535, "loss": 0.4125, "step": 3980 }, { "epoch": 2.224022346368715, "grad_norm": 0.5547012090682983, "learning_rate": 0.0008914565826330533, "loss": 0.3444, "step": 3981 }, { "epoch": 2.224581005586592, "grad_norm": 1.2253466844558716, "learning_rate": 0.0008914285714285714, "loss": 0.4463, "step": 3982 }, { "epoch": 2.2251396648044692, "grad_norm": 0.5143917798995972, "learning_rate": 0.0008914005602240896, "loss": 0.5134, "step": 3983 }, { "epoch": 2.2256983240223462, "grad_norm": 1.185542106628418, "learning_rate": 0.0008913725490196079, "loss": 0.5527, "step": 3984 }, { "epoch": 2.2262569832402237, "grad_norm": 0.5383661985397339, "learning_rate": 0.0008913445378151261, "loss": 0.5151, "step": 3985 }, { "epoch": 2.2268156424581007, "grad_norm": 0.44264593720436096, "learning_rate": 0.0008913165266106444, "loss": 0.4296, "step": 3986 }, { "epoch": 2.2273743016759777, "grad_norm": 0.521536648273468, "learning_rate": 0.0008912885154061625, "loss": 0.4164, "step": 3987 }, { "epoch": 2.2279329608938547, "grad_norm": 1.3124431371688843, "learning_rate": 0.0008912605042016807, "loss": 0.4444, "step": 3988 }, { "epoch": 2.2284916201117317, "grad_norm": 0.6384272575378418, "learning_rate": 0.0008912324929971989, "loss": 0.6504, "step": 3989 }, { "epoch": 2.2290502793296088, "grad_norm": 0.8629913330078125, "learning_rate": 0.0008912044817927171, "loss": 0.4836, "step": 3990 }, { "epoch": 2.229608938547486, "grad_norm": 0.5492082834243774, "learning_rate": 0.0008911764705882354, "loss": 0.4701, "step": 3991 }, { "epoch": 2.230167597765363, "grad_norm": 0.4890977144241333, "learning_rate": 0.0008911484593837535, "loss": 0.4474, "step": 3992 }, { "epoch": 2.2307262569832402, "grad_norm": 0.5650962591171265, "learning_rate": 0.0008911204481792717, "loss": 0.4234, "step": 3993 }, { "epoch": 2.2312849162011172, "grad_norm": 1.309747338294983, "learning_rate": 0.0008910924369747899, "loss": 0.3745, "step": 3994 }, { "epoch": 2.2318435754189943, "grad_norm": 0.6015917062759399, "learning_rate": 0.0008910644257703081, "loss": 0.5381, "step": 3995 }, { "epoch": 2.2324022346368717, "grad_norm": 1.0430498123168945, "learning_rate": 0.0008910364145658264, "loss": 0.5379, "step": 3996 }, { "epoch": 2.2329608938547487, "grad_norm": 0.5327572822570801, "learning_rate": 0.0008910084033613446, "loss": 0.4768, "step": 3997 }, { "epoch": 2.2335195530726257, "grad_norm": 3.224012613296509, "learning_rate": 0.0008909803921568627, "loss": 0.519, "step": 3998 }, { "epoch": 2.2340782122905027, "grad_norm": 1.0534822940826416, "learning_rate": 0.0008909523809523809, "loss": 0.459, "step": 3999 }, { "epoch": 2.2346368715083798, "grad_norm": 0.5711976289749146, "learning_rate": 0.0008909243697478992, "loss": 0.3572, "step": 4000 }, { "epoch": 2.2346368715083798, "eval_cer": 0.09715448485046861, "eval_loss": 0.36586418747901917, "eval_runtime": 55.7043, "eval_samples_per_second": 81.466, "eval_steps_per_second": 5.098, "eval_wer": 0.3814697913466104, "step": 4000 }, { "epoch": 2.2351955307262568, "grad_norm": 0.5830895304679871, "learning_rate": 0.0008908963585434175, "loss": 0.4802, "step": 4001 }, { "epoch": 2.235754189944134, "grad_norm": 0.8545703887939453, "learning_rate": 0.0008908683473389357, "loss": 0.5038, "step": 4002 }, { "epoch": 2.2363128491620112, "grad_norm": 0.6758176684379578, "learning_rate": 0.0008908403361344538, "loss": 0.4568, "step": 4003 }, { "epoch": 2.2368715083798882, "grad_norm": 0.6274861693382263, "learning_rate": 0.000890812324929972, "loss": 0.4008, "step": 4004 }, { "epoch": 2.2374301675977653, "grad_norm": 0.9927757382392883, "learning_rate": 0.0008907843137254902, "loss": 0.5065, "step": 4005 }, { "epoch": 2.2379888268156423, "grad_norm": 0.6257267594337463, "learning_rate": 0.0008907563025210085, "loss": 0.5046, "step": 4006 }, { "epoch": 2.2385474860335197, "grad_norm": 0.5576756596565247, "learning_rate": 0.0008907282913165267, "loss": 0.4643, "step": 4007 }, { "epoch": 2.2391061452513967, "grad_norm": 0.5263304710388184, "learning_rate": 0.0008907002801120448, "loss": 0.5706, "step": 4008 }, { "epoch": 2.2396648044692737, "grad_norm": 0.4375152885913849, "learning_rate": 0.000890672268907563, "loss": 0.4671, "step": 4009 }, { "epoch": 2.2402234636871508, "grad_norm": 0.6941837668418884, "learning_rate": 0.0008906442577030812, "loss": 0.4392, "step": 4010 }, { "epoch": 2.2407821229050278, "grad_norm": 0.44970181584358215, "learning_rate": 0.0008906162464985994, "loss": 0.4216, "step": 4011 }, { "epoch": 2.241340782122905, "grad_norm": 0.6526822447776794, "learning_rate": 0.0008905882352941177, "loss": 0.4845, "step": 4012 }, { "epoch": 2.2418994413407822, "grad_norm": 0.4581945240497589, "learning_rate": 0.0008905602240896359, "loss": 0.4041, "step": 4013 }, { "epoch": 2.2424581005586592, "grad_norm": 0.7085303068161011, "learning_rate": 0.000890532212885154, "loss": 0.3743, "step": 4014 }, { "epoch": 2.2430167597765363, "grad_norm": 3.2189433574676514, "learning_rate": 0.0008905042016806722, "loss": 0.7906, "step": 4015 }, { "epoch": 2.2435754189944133, "grad_norm": 1.275059461593628, "learning_rate": 0.0008904761904761904, "loss": 0.4727, "step": 4016 }, { "epoch": 2.2441340782122907, "grad_norm": 0.7866321802139282, "learning_rate": 0.0008904481792717088, "loss": 0.431, "step": 4017 }, { "epoch": 2.2446927374301677, "grad_norm": 0.6882569789886475, "learning_rate": 0.000890420168067227, "loss": 0.517, "step": 4018 }, { "epoch": 2.2452513966480447, "grad_norm": 1.0975369215011597, "learning_rate": 0.0008903921568627451, "loss": 0.455, "step": 4019 }, { "epoch": 2.2458100558659218, "grad_norm": 0.4408380091190338, "learning_rate": 0.0008903641456582633, "loss": 0.4277, "step": 4020 }, { "epoch": 2.2463687150837988, "grad_norm": 0.4277670979499817, "learning_rate": 0.0008903361344537815, "loss": 0.4198, "step": 4021 }, { "epoch": 2.2469273743016758, "grad_norm": 0.6119745969772339, "learning_rate": 0.0008903081232492998, "loss": 0.4625, "step": 4022 }, { "epoch": 2.2474860335195532, "grad_norm": 0.6843535900115967, "learning_rate": 0.000890280112044818, "loss": 0.4642, "step": 4023 }, { "epoch": 2.2480446927374302, "grad_norm": 1.420467734336853, "learning_rate": 0.0008902521008403361, "loss": 0.3801, "step": 4024 }, { "epoch": 2.2486033519553073, "grad_norm": 0.6617404818534851, "learning_rate": 0.0008902240896358543, "loss": 0.4799, "step": 4025 }, { "epoch": 2.2491620111731843, "grad_norm": 0.6966404318809509, "learning_rate": 0.0008901960784313725, "loss": 0.5039, "step": 4026 }, { "epoch": 2.2497206703910613, "grad_norm": 0.6200904846191406, "learning_rate": 0.0008901680672268908, "loss": 0.373, "step": 4027 }, { "epoch": 2.2502793296089387, "grad_norm": 1.8639546632766724, "learning_rate": 0.000890140056022409, "loss": 0.5356, "step": 4028 }, { "epoch": 2.2508379888268157, "grad_norm": 0.7014147639274597, "learning_rate": 0.0008901120448179272, "loss": 0.362, "step": 4029 }, { "epoch": 2.2513966480446927, "grad_norm": 0.5041413307189941, "learning_rate": 0.0008900840336134453, "loss": 0.444, "step": 4030 }, { "epoch": 2.2519553072625698, "grad_norm": 0.5858452320098877, "learning_rate": 0.0008900560224089635, "loss": 0.6114, "step": 4031 }, { "epoch": 2.2525139664804468, "grad_norm": 0.7455693483352661, "learning_rate": 0.0008900280112044819, "loss": 0.4134, "step": 4032 }, { "epoch": 2.253072625698324, "grad_norm": 1.9302022457122803, "learning_rate": 0.0008900000000000001, "loss": 0.6002, "step": 4033 }, { "epoch": 2.2536312849162012, "grad_norm": 0.5668044686317444, "learning_rate": 0.0008899719887955183, "loss": 0.4839, "step": 4034 }, { "epoch": 2.2541899441340782, "grad_norm": 3.9623420238494873, "learning_rate": 0.0008899439775910364, "loss": 0.4099, "step": 4035 }, { "epoch": 2.2547486033519553, "grad_norm": 0.4752205014228821, "learning_rate": 0.0008899159663865546, "loss": 0.4718, "step": 4036 }, { "epoch": 2.2553072625698323, "grad_norm": 0.9224374294281006, "learning_rate": 0.0008898879551820729, "loss": 0.5898, "step": 4037 }, { "epoch": 2.2558659217877093, "grad_norm": 0.5311445593833923, "learning_rate": 0.0008898599439775911, "loss": 0.4319, "step": 4038 }, { "epoch": 2.2564245810055867, "grad_norm": 0.527908205986023, "learning_rate": 0.0008898319327731093, "loss": 0.4151, "step": 4039 }, { "epoch": 2.2569832402234637, "grad_norm": 0.5853949785232544, "learning_rate": 0.0008898039215686274, "loss": 0.5628, "step": 4040 }, { "epoch": 2.2575418994413408, "grad_norm": 1.008521318435669, "learning_rate": 0.0008897759103641456, "loss": 0.5459, "step": 4041 }, { "epoch": 2.2581005586592178, "grad_norm": 1.5214451551437378, "learning_rate": 0.0008897478991596639, "loss": 0.3984, "step": 4042 }, { "epoch": 2.258659217877095, "grad_norm": 0.6478580236434937, "learning_rate": 0.0008897198879551821, "loss": 0.4768, "step": 4043 }, { "epoch": 2.2592178770949722, "grad_norm": 0.7572171688079834, "learning_rate": 0.0008896918767507003, "loss": 0.5513, "step": 4044 }, { "epoch": 2.2597765363128492, "grad_norm": 0.4587641656398773, "learning_rate": 0.0008896638655462185, "loss": 0.4634, "step": 4045 }, { "epoch": 2.2603351955307263, "grad_norm": 0.5504059195518494, "learning_rate": 0.0008896358543417366, "loss": 0.4738, "step": 4046 }, { "epoch": 2.2608938547486033, "grad_norm": 0.674247682094574, "learning_rate": 0.0008896078431372549, "loss": 0.4244, "step": 4047 }, { "epoch": 2.2614525139664803, "grad_norm": 0.9136465787887573, "learning_rate": 0.0008895798319327731, "loss": 0.607, "step": 4048 }, { "epoch": 2.2620111731843577, "grad_norm": 0.725601851940155, "learning_rate": 0.0008895518207282914, "loss": 0.461, "step": 4049 }, { "epoch": 2.2625698324022347, "grad_norm": 0.6201837658882141, "learning_rate": 0.0008895238095238096, "loss": 0.5212, "step": 4050 }, { "epoch": 2.2631284916201118, "grad_norm": 0.5169089436531067, "learning_rate": 0.0008894957983193277, "loss": 0.5246, "step": 4051 }, { "epoch": 2.2636871508379888, "grad_norm": 0.5812177658081055, "learning_rate": 0.000889467787114846, "loss": 0.399, "step": 4052 }, { "epoch": 2.264245810055866, "grad_norm": 0.5390665531158447, "learning_rate": 0.0008894397759103642, "loss": 0.5036, "step": 4053 }, { "epoch": 2.2648044692737432, "grad_norm": 0.6369917988777161, "learning_rate": 0.0008894117647058824, "loss": 0.4432, "step": 4054 }, { "epoch": 2.2653631284916202, "grad_norm": 0.6308425664901733, "learning_rate": 0.0008893837535014006, "loss": 0.5208, "step": 4055 }, { "epoch": 2.2659217877094973, "grad_norm": 0.4943470060825348, "learning_rate": 0.0008893557422969187, "loss": 0.4957, "step": 4056 }, { "epoch": 2.2664804469273743, "grad_norm": 0.6871525645256042, "learning_rate": 0.000889327731092437, "loss": 0.5049, "step": 4057 }, { "epoch": 2.2670391061452513, "grad_norm": 1.1248698234558105, "learning_rate": 0.0008892997198879552, "loss": 0.521, "step": 4058 }, { "epoch": 2.2675977653631287, "grad_norm": 0.6286033391952515, "learning_rate": 0.0008892717086834734, "loss": 0.4176, "step": 4059 }, { "epoch": 2.2681564245810057, "grad_norm": 1.6168240308761597, "learning_rate": 0.0008892436974789916, "loss": 0.564, "step": 4060 }, { "epoch": 2.2687150837988828, "grad_norm": 0.4227534234523773, "learning_rate": 0.0008892156862745098, "loss": 0.3782, "step": 4061 }, { "epoch": 2.2692737430167598, "grad_norm": 0.6461851596832275, "learning_rate": 0.000889187675070028, "loss": 0.4866, "step": 4062 }, { "epoch": 2.269832402234637, "grad_norm": 0.7072750329971313, "learning_rate": 0.0008891596638655462, "loss": 0.8246, "step": 4063 }, { "epoch": 2.270391061452514, "grad_norm": 0.545943558216095, "learning_rate": 0.0008891316526610644, "loss": 0.5434, "step": 4064 }, { "epoch": 2.2709497206703912, "grad_norm": 0.5444265007972717, "learning_rate": 0.0008891036414565826, "loss": 0.493, "step": 4065 }, { "epoch": 2.2715083798882683, "grad_norm": 0.6745163798332214, "learning_rate": 0.0008890756302521009, "loss": 0.4715, "step": 4066 }, { "epoch": 2.2720670391061453, "grad_norm": 2.209089756011963, "learning_rate": 0.0008890476190476191, "loss": 0.4144, "step": 4067 }, { "epoch": 2.2726256983240223, "grad_norm": 0.6629486083984375, "learning_rate": 0.0008890196078431373, "loss": 0.4839, "step": 4068 }, { "epoch": 2.2731843575418993, "grad_norm": 0.6990852355957031, "learning_rate": 0.0008889915966386555, "loss": 0.4421, "step": 4069 }, { "epoch": 2.2737430167597763, "grad_norm": 0.46221858263015747, "learning_rate": 0.0008889635854341737, "loss": 0.3601, "step": 4070 }, { "epoch": 2.2743016759776538, "grad_norm": 1.5171685218811035, "learning_rate": 0.0008889355742296919, "loss": 0.6345, "step": 4071 }, { "epoch": 2.2748603351955308, "grad_norm": 0.8347725868225098, "learning_rate": 0.0008889075630252101, "loss": 0.4724, "step": 4072 }, { "epoch": 2.275418994413408, "grad_norm": 0.8772679567337036, "learning_rate": 0.0008888795518207283, "loss": 0.6179, "step": 4073 }, { "epoch": 2.275977653631285, "grad_norm": 0.5657960772514343, "learning_rate": 0.0008888515406162465, "loss": 0.4992, "step": 4074 }, { "epoch": 2.276536312849162, "grad_norm": 0.8635707497596741, "learning_rate": 0.0008888235294117647, "loss": 0.4538, "step": 4075 }, { "epoch": 2.2770949720670393, "grad_norm": 1.282912254333496, "learning_rate": 0.0008887955182072829, "loss": 0.7064, "step": 4076 }, { "epoch": 2.2776536312849163, "grad_norm": 0.5405939817428589, "learning_rate": 0.0008887675070028012, "loss": 0.3681, "step": 4077 }, { "epoch": 2.2782122905027933, "grad_norm": 0.7191981077194214, "learning_rate": 0.0008887394957983193, "loss": 0.4156, "step": 4078 }, { "epoch": 2.2787709497206703, "grad_norm": 0.4254477620124817, "learning_rate": 0.0008887114845938375, "loss": 0.4195, "step": 4079 }, { "epoch": 2.2793296089385473, "grad_norm": 0.8394465446472168, "learning_rate": 0.0008886834733893557, "loss": 0.5782, "step": 4080 }, { "epoch": 2.2798882681564248, "grad_norm": 0.48060542345046997, "learning_rate": 0.0008886554621848739, "loss": 0.3882, "step": 4081 }, { "epoch": 2.2804469273743018, "grad_norm": 0.714497983455658, "learning_rate": 0.0008886274509803923, "loss": 0.5803, "step": 4082 }, { "epoch": 2.281005586592179, "grad_norm": 0.6810247898101807, "learning_rate": 0.0008885994397759104, "loss": 0.4857, "step": 4083 }, { "epoch": 2.281564245810056, "grad_norm": 0.5977585911750793, "learning_rate": 0.0008885714285714286, "loss": 0.5131, "step": 4084 }, { "epoch": 2.282122905027933, "grad_norm": 0.5889948606491089, "learning_rate": 0.0008885434173669468, "loss": 0.5389, "step": 4085 }, { "epoch": 2.2826815642458103, "grad_norm": 0.8167197108268738, "learning_rate": 0.000888515406162465, "loss": 0.4406, "step": 4086 }, { "epoch": 2.2832402234636873, "grad_norm": 0.4687158465385437, "learning_rate": 0.0008884873949579833, "loss": 0.4221, "step": 4087 }, { "epoch": 2.2837988826815643, "grad_norm": 0.5708418488502502, "learning_rate": 0.0008884593837535014, "loss": 0.3652, "step": 4088 }, { "epoch": 2.2843575418994413, "grad_norm": 0.5562434792518616, "learning_rate": 0.0008884313725490196, "loss": 0.4668, "step": 4089 }, { "epoch": 2.2849162011173183, "grad_norm": 0.6504343152046204, "learning_rate": 0.0008884033613445378, "loss": 0.3874, "step": 4090 }, { "epoch": 2.2854748603351958, "grad_norm": 0.6166595220565796, "learning_rate": 0.000888375350140056, "loss": 0.4679, "step": 4091 }, { "epoch": 2.2860335195530728, "grad_norm": 0.7786232233047485, "learning_rate": 0.0008883473389355743, "loss": 0.4963, "step": 4092 }, { "epoch": 2.28659217877095, "grad_norm": 0.4540756344795227, "learning_rate": 0.0008883193277310925, "loss": 0.3871, "step": 4093 }, { "epoch": 2.287150837988827, "grad_norm": 0.827295184135437, "learning_rate": 0.0008882913165266106, "loss": 0.7639, "step": 4094 }, { "epoch": 2.287709497206704, "grad_norm": 1.3360422849655151, "learning_rate": 0.0008882633053221288, "loss": 0.5245, "step": 4095 }, { "epoch": 2.288268156424581, "grad_norm": 0.6237481832504272, "learning_rate": 0.000888235294117647, "loss": 0.5449, "step": 4096 }, { "epoch": 2.2888268156424583, "grad_norm": 1.0694364309310913, "learning_rate": 0.0008882072829131653, "loss": 0.4615, "step": 4097 }, { "epoch": 2.2893854748603353, "grad_norm": 0.47446396946907043, "learning_rate": 0.0008881792717086836, "loss": 0.4717, "step": 4098 }, { "epoch": 2.2899441340782123, "grad_norm": 0.642112672328949, "learning_rate": 0.0008881512605042017, "loss": 0.5206, "step": 4099 }, { "epoch": 2.2905027932960893, "grad_norm": 0.4892105162143707, "learning_rate": 0.0008881232492997199, "loss": 0.5134, "step": 4100 }, { "epoch": 2.2910614525139663, "grad_norm": 2.5501768589019775, "learning_rate": 0.0008880952380952381, "loss": 0.4788, "step": 4101 }, { "epoch": 2.2916201117318438, "grad_norm": 1.609928846359253, "learning_rate": 0.0008880672268907564, "loss": 0.3942, "step": 4102 }, { "epoch": 2.292178770949721, "grad_norm": 0.5709579586982727, "learning_rate": 0.0008880392156862746, "loss": 0.4299, "step": 4103 }, { "epoch": 2.292737430167598, "grad_norm": 0.5986993908882141, "learning_rate": 0.0008880112044817927, "loss": 0.4683, "step": 4104 }, { "epoch": 2.293296089385475, "grad_norm": 0.8785414695739746, "learning_rate": 0.0008879831932773109, "loss": 0.4779, "step": 4105 }, { "epoch": 2.293854748603352, "grad_norm": 0.4095707833766937, "learning_rate": 0.0008879551820728291, "loss": 0.4205, "step": 4106 }, { "epoch": 2.294413407821229, "grad_norm": 0.5068609714508057, "learning_rate": 0.0008879271708683474, "loss": 0.5193, "step": 4107 }, { "epoch": 2.2949720670391063, "grad_norm": 0.5796323418617249, "learning_rate": 0.0008878991596638656, "loss": 0.5022, "step": 4108 }, { "epoch": 2.2955307262569833, "grad_norm": 0.6752324104309082, "learning_rate": 0.0008878711484593838, "loss": 0.4512, "step": 4109 }, { "epoch": 2.2960893854748603, "grad_norm": 0.7865152359008789, "learning_rate": 0.0008878431372549019, "loss": 0.4861, "step": 4110 }, { "epoch": 2.2966480446927373, "grad_norm": 0.5990186929702759, "learning_rate": 0.0008878151260504201, "loss": 0.5114, "step": 4111 }, { "epoch": 2.2972067039106143, "grad_norm": 0.5680040121078491, "learning_rate": 0.0008877871148459384, "loss": 0.5215, "step": 4112 }, { "epoch": 2.2977653631284918, "grad_norm": 0.5190915465354919, "learning_rate": 0.0008877591036414566, "loss": 0.5886, "step": 4113 }, { "epoch": 2.298324022346369, "grad_norm": 0.8567480444908142, "learning_rate": 0.0008877310924369749, "loss": 0.3847, "step": 4114 }, { "epoch": 2.298882681564246, "grad_norm": 0.7038180232048035, "learning_rate": 0.000887703081232493, "loss": 0.5703, "step": 4115 }, { "epoch": 2.299441340782123, "grad_norm": 0.5825362801551819, "learning_rate": 0.0008876750700280112, "loss": 0.5479, "step": 4116 }, { "epoch": 2.3, "grad_norm": 2.6037185192108154, "learning_rate": 0.0008876470588235295, "loss": 0.5473, "step": 4117 }, { "epoch": 2.3005586592178773, "grad_norm": 0.8928390145301819, "learning_rate": 0.0008876190476190477, "loss": 0.5253, "step": 4118 }, { "epoch": 2.3011173184357543, "grad_norm": 0.685375988483429, "learning_rate": 0.0008875910364145659, "loss": 0.5245, "step": 4119 }, { "epoch": 2.3016759776536313, "grad_norm": 0.5561337471008301, "learning_rate": 0.000887563025210084, "loss": 0.4835, "step": 4120 }, { "epoch": 2.3022346368715083, "grad_norm": 1.3214690685272217, "learning_rate": 0.0008875350140056022, "loss": 0.4703, "step": 4121 }, { "epoch": 2.3027932960893853, "grad_norm": 1.1387779712677002, "learning_rate": 0.0008875070028011205, "loss": 0.4082, "step": 4122 }, { "epoch": 2.3033519553072628, "grad_norm": 7.468307971954346, "learning_rate": 0.0008874789915966387, "loss": 0.398, "step": 4123 }, { "epoch": 2.30391061452514, "grad_norm": 0.8910265564918518, "learning_rate": 0.0008874509803921569, "loss": 0.438, "step": 4124 }, { "epoch": 2.304469273743017, "grad_norm": 0.8785117268562317, "learning_rate": 0.0008874229691876751, "loss": 0.5598, "step": 4125 }, { "epoch": 2.305027932960894, "grad_norm": 0.6925243735313416, "learning_rate": 0.0008873949579831932, "loss": 0.4195, "step": 4126 }, { "epoch": 2.305586592178771, "grad_norm": 1.0697098970413208, "learning_rate": 0.0008873669467787115, "loss": 0.6214, "step": 4127 }, { "epoch": 2.3061452513966483, "grad_norm": 0.6937605738639832, "learning_rate": 0.0008873389355742297, "loss": 0.502, "step": 4128 }, { "epoch": 2.3067039106145253, "grad_norm": 0.6571819186210632, "learning_rate": 0.0008873109243697479, "loss": 0.4924, "step": 4129 }, { "epoch": 2.3072625698324023, "grad_norm": 0.5332193970680237, "learning_rate": 0.0008872829131652661, "loss": 0.4646, "step": 4130 }, { "epoch": 2.3078212290502793, "grad_norm": 0.5302227139472961, "learning_rate": 0.0008872549019607842, "loss": 0.4596, "step": 4131 }, { "epoch": 2.3083798882681563, "grad_norm": 2.9041993618011475, "learning_rate": 0.0008872268907563026, "loss": 0.352, "step": 4132 }, { "epoch": 2.3089385474860333, "grad_norm": 0.6829684376716614, "learning_rate": 0.0008871988795518208, "loss": 0.6677, "step": 4133 }, { "epoch": 2.309497206703911, "grad_norm": 0.7759153246879578, "learning_rate": 0.000887170868347339, "loss": 0.5889, "step": 4134 }, { "epoch": 2.310055865921788, "grad_norm": 0.7212245464324951, "learning_rate": 0.0008871428571428572, "loss": 0.482, "step": 4135 }, { "epoch": 2.310614525139665, "grad_norm": 0.9701433181762695, "learning_rate": 0.0008871148459383753, "loss": 0.3879, "step": 4136 }, { "epoch": 2.311173184357542, "grad_norm": 0.6263357996940613, "learning_rate": 0.0008870868347338936, "loss": 0.4101, "step": 4137 }, { "epoch": 2.311731843575419, "grad_norm": 0.5934965014457703, "learning_rate": 0.0008870588235294118, "loss": 0.5446, "step": 4138 }, { "epoch": 2.312290502793296, "grad_norm": 0.5642522573471069, "learning_rate": 0.00088703081232493, "loss": 0.463, "step": 4139 }, { "epoch": 2.3128491620111733, "grad_norm": 0.565039336681366, "learning_rate": 0.0008870028011204482, "loss": 0.5694, "step": 4140 }, { "epoch": 2.3134078212290503, "grad_norm": 0.5565946102142334, "learning_rate": 0.0008869747899159664, "loss": 0.7868, "step": 4141 }, { "epoch": 2.3139664804469273, "grad_norm": 1.1255923509597778, "learning_rate": 0.0008869467787114846, "loss": 0.5169, "step": 4142 }, { "epoch": 2.3145251396648043, "grad_norm": 0.8964384198188782, "learning_rate": 0.0008869187675070028, "loss": 0.7309, "step": 4143 }, { "epoch": 2.3150837988826813, "grad_norm": 0.6354342699050903, "learning_rate": 0.000886890756302521, "loss": 0.5366, "step": 4144 }, { "epoch": 2.315642458100559, "grad_norm": 0.6353447437286377, "learning_rate": 0.0008868627450980392, "loss": 0.5252, "step": 4145 }, { "epoch": 2.316201117318436, "grad_norm": 0.4865851402282715, "learning_rate": 0.0008868347338935574, "loss": 0.3709, "step": 4146 }, { "epoch": 2.316759776536313, "grad_norm": 0.5829808712005615, "learning_rate": 0.0008868067226890756, "loss": 0.4164, "step": 4147 }, { "epoch": 2.31731843575419, "grad_norm": 0.5432511568069458, "learning_rate": 0.0008867787114845939, "loss": 0.4828, "step": 4148 }, { "epoch": 2.317877094972067, "grad_norm": 0.8548393249511719, "learning_rate": 0.0008867507002801121, "loss": 0.5463, "step": 4149 }, { "epoch": 2.3184357541899443, "grad_norm": 0.4730907380580902, "learning_rate": 0.0008867226890756303, "loss": 0.4975, "step": 4150 }, { "epoch": 2.3189944134078213, "grad_norm": 0.49459514021873474, "learning_rate": 0.0008866946778711485, "loss": 0.4229, "step": 4151 }, { "epoch": 2.3195530726256983, "grad_norm": 0.4433779716491699, "learning_rate": 0.0008866666666666667, "loss": 0.4425, "step": 4152 }, { "epoch": 2.3201117318435753, "grad_norm": 0.5054029822349548, "learning_rate": 0.0008866386554621849, "loss": 0.5313, "step": 4153 }, { "epoch": 2.3206703910614523, "grad_norm": 0.5901767015457153, "learning_rate": 0.0008866106442577031, "loss": 0.5021, "step": 4154 }, { "epoch": 2.32122905027933, "grad_norm": 0.4914945662021637, "learning_rate": 0.0008865826330532213, "loss": 0.5471, "step": 4155 }, { "epoch": 2.321787709497207, "grad_norm": 1.8711297512054443, "learning_rate": 0.0008865546218487395, "loss": 0.4219, "step": 4156 }, { "epoch": 2.322346368715084, "grad_norm": 1.6710267066955566, "learning_rate": 0.0008865266106442578, "loss": 0.4895, "step": 4157 }, { "epoch": 2.322905027932961, "grad_norm": 0.5949874520301819, "learning_rate": 0.0008864985994397759, "loss": 0.4086, "step": 4158 }, { "epoch": 2.323463687150838, "grad_norm": 2.5391275882720947, "learning_rate": 0.0008864705882352941, "loss": 0.5711, "step": 4159 }, { "epoch": 2.3240223463687153, "grad_norm": 0.7851516604423523, "learning_rate": 0.0008864425770308123, "loss": 0.5719, "step": 4160 }, { "epoch": 2.3245810055865923, "grad_norm": 1.2702527046203613, "learning_rate": 0.0008864145658263305, "loss": 0.4646, "step": 4161 }, { "epoch": 2.3251396648044693, "grad_norm": 0.7813109159469604, "learning_rate": 0.0008863865546218488, "loss": 0.5545, "step": 4162 }, { "epoch": 2.3256983240223463, "grad_norm": 0.49233749508857727, "learning_rate": 0.0008863585434173669, "loss": 0.4892, "step": 4163 }, { "epoch": 2.3262569832402233, "grad_norm": 6.627612113952637, "learning_rate": 0.0008863305322128852, "loss": 0.5691, "step": 4164 }, { "epoch": 2.326815642458101, "grad_norm": 0.8298326730728149, "learning_rate": 0.0008863025210084034, "loss": 0.4895, "step": 4165 }, { "epoch": 2.327374301675978, "grad_norm": 0.5120519399642944, "learning_rate": 0.0008862745098039216, "loss": 0.4642, "step": 4166 }, { "epoch": 2.327932960893855, "grad_norm": 0.48763808608055115, "learning_rate": 0.0008862464985994399, "loss": 0.386, "step": 4167 }, { "epoch": 2.328491620111732, "grad_norm": 0.706680953502655, "learning_rate": 0.000886218487394958, "loss": 0.3697, "step": 4168 }, { "epoch": 2.329050279329609, "grad_norm": 0.6681955456733704, "learning_rate": 0.0008861904761904762, "loss": 0.5167, "step": 4169 }, { "epoch": 2.329608938547486, "grad_norm": 0.5295430421829224, "learning_rate": 0.0008861624649859944, "loss": 0.5733, "step": 4170 }, { "epoch": 2.3301675977653633, "grad_norm": 0.5398170351982117, "learning_rate": 0.0008861344537815126, "loss": 0.5139, "step": 4171 }, { "epoch": 2.3307262569832403, "grad_norm": 0.5111408233642578, "learning_rate": 0.0008861064425770309, "loss": 0.4717, "step": 4172 }, { "epoch": 2.3312849162011173, "grad_norm": 0.5647570490837097, "learning_rate": 0.0008860784313725491, "loss": 0.4832, "step": 4173 }, { "epoch": 2.3318435754189943, "grad_norm": 0.4562356472015381, "learning_rate": 0.0008860504201680672, "loss": 0.4556, "step": 4174 }, { "epoch": 2.3324022346368714, "grad_norm": 0.5109476447105408, "learning_rate": 0.0008860224089635854, "loss": 0.4676, "step": 4175 }, { "epoch": 2.3329608938547484, "grad_norm": 0.45182135701179504, "learning_rate": 0.0008859943977591036, "loss": 0.4014, "step": 4176 }, { "epoch": 2.333519553072626, "grad_norm": 0.6884251832962036, "learning_rate": 0.0008859663865546219, "loss": 0.4297, "step": 4177 }, { "epoch": 2.334078212290503, "grad_norm": 0.4124736189842224, "learning_rate": 0.0008859383753501401, "loss": 0.3958, "step": 4178 }, { "epoch": 2.33463687150838, "grad_norm": 0.45792844891548157, "learning_rate": 0.0008859103641456582, "loss": 0.4592, "step": 4179 }, { "epoch": 2.335195530726257, "grad_norm": 0.4927826523780823, "learning_rate": 0.0008858823529411764, "loss": 0.3621, "step": 4180 }, { "epoch": 2.335754189944134, "grad_norm": 0.4476109445095062, "learning_rate": 0.0008858543417366947, "loss": 0.4068, "step": 4181 }, { "epoch": 2.3363128491620113, "grad_norm": 0.8651317358016968, "learning_rate": 0.000885826330532213, "loss": 0.5339, "step": 4182 }, { "epoch": 2.3368715083798883, "grad_norm": 0.5524964332580566, "learning_rate": 0.0008857983193277312, "loss": 0.5092, "step": 4183 }, { "epoch": 2.3374301675977653, "grad_norm": 0.4942110478878021, "learning_rate": 0.0008857703081232493, "loss": 0.3719, "step": 4184 }, { "epoch": 2.3379888268156424, "grad_norm": 1.1364790201187134, "learning_rate": 0.0008857422969187675, "loss": 0.4955, "step": 4185 }, { "epoch": 2.3385474860335194, "grad_norm": 0.45928874611854553, "learning_rate": 0.0008857142857142857, "loss": 0.512, "step": 4186 }, { "epoch": 2.339106145251397, "grad_norm": 0.49775776267051697, "learning_rate": 0.000885686274509804, "loss": 0.5159, "step": 4187 }, { "epoch": 2.339664804469274, "grad_norm": 0.598150908946991, "learning_rate": 0.0008856582633053222, "loss": 0.3664, "step": 4188 }, { "epoch": 2.340223463687151, "grad_norm": 1.0361453294754028, "learning_rate": 0.0008856302521008404, "loss": 0.4969, "step": 4189 }, { "epoch": 2.340782122905028, "grad_norm": 0.5566352009773254, "learning_rate": 0.0008856022408963585, "loss": 0.5234, "step": 4190 }, { "epoch": 2.341340782122905, "grad_norm": 0.5165764689445496, "learning_rate": 0.0008855742296918767, "loss": 0.4647, "step": 4191 }, { "epoch": 2.3418994413407823, "grad_norm": 2.389528751373291, "learning_rate": 0.000885546218487395, "loss": 0.4856, "step": 4192 }, { "epoch": 2.3424581005586593, "grad_norm": 0.8305392265319824, "learning_rate": 0.0008855182072829132, "loss": 0.4363, "step": 4193 }, { "epoch": 2.3430167597765363, "grad_norm": 0.5118175745010376, "learning_rate": 0.0008854901960784314, "loss": 0.5227, "step": 4194 }, { "epoch": 2.3435754189944134, "grad_norm": 0.5471662282943726, "learning_rate": 0.0008854621848739495, "loss": 0.4773, "step": 4195 }, { "epoch": 2.3441340782122904, "grad_norm": 0.8572961091995239, "learning_rate": 0.0008854341736694677, "loss": 0.5489, "step": 4196 }, { "epoch": 2.344692737430168, "grad_norm": 0.6659944653511047, "learning_rate": 0.0008854061624649861, "loss": 0.5261, "step": 4197 }, { "epoch": 2.345251396648045, "grad_norm": 0.5566151738166809, "learning_rate": 0.0008853781512605043, "loss": 0.516, "step": 4198 }, { "epoch": 2.345810055865922, "grad_norm": 0.531288206577301, "learning_rate": 0.0008853501400560225, "loss": 0.4466, "step": 4199 }, { "epoch": 2.346368715083799, "grad_norm": 0.6818004250526428, "learning_rate": 0.0008853221288515406, "loss": 0.4464, "step": 4200 }, { "epoch": 2.346927374301676, "grad_norm": 0.7107765674591064, "learning_rate": 0.0008852941176470588, "loss": 0.5624, "step": 4201 }, { "epoch": 2.3474860335195533, "grad_norm": 0.5839899778366089, "learning_rate": 0.0008852661064425771, "loss": 0.4085, "step": 4202 }, { "epoch": 2.3480446927374303, "grad_norm": 0.6199565529823303, "learning_rate": 0.0008852380952380953, "loss": 0.6021, "step": 4203 }, { "epoch": 2.3486033519553073, "grad_norm": 0.5469422936439514, "learning_rate": 0.0008852100840336135, "loss": 0.4661, "step": 4204 }, { "epoch": 2.3491620111731844, "grad_norm": 0.9261488318443298, "learning_rate": 0.0008851820728291317, "loss": 0.464, "step": 4205 }, { "epoch": 2.3497206703910614, "grad_norm": 0.5453421473503113, "learning_rate": 0.0008851540616246498, "loss": 0.3775, "step": 4206 }, { "epoch": 2.3502793296089384, "grad_norm": 0.6501973271369934, "learning_rate": 0.0008851260504201681, "loss": 0.4996, "step": 4207 }, { "epoch": 2.350837988826816, "grad_norm": 0.6161912679672241, "learning_rate": 0.0008850980392156863, "loss": 0.4738, "step": 4208 }, { "epoch": 2.351396648044693, "grad_norm": 0.5534847974777222, "learning_rate": 0.0008850700280112045, "loss": 0.4977, "step": 4209 }, { "epoch": 2.35195530726257, "grad_norm": 0.9877995252609253, "learning_rate": 0.0008850420168067227, "loss": 0.5588, "step": 4210 }, { "epoch": 2.352513966480447, "grad_norm": 0.6785343289375305, "learning_rate": 0.0008850140056022408, "loss": 0.5344, "step": 4211 }, { "epoch": 2.353072625698324, "grad_norm": 0.5983065962791443, "learning_rate": 0.0008849859943977591, "loss": 0.5827, "step": 4212 }, { "epoch": 2.353631284916201, "grad_norm": 0.4906616508960724, "learning_rate": 0.0008849579831932774, "loss": 0.5322, "step": 4213 }, { "epoch": 2.3541899441340783, "grad_norm": 0.5360569953918457, "learning_rate": 0.0008849299719887956, "loss": 0.4714, "step": 4214 }, { "epoch": 2.3547486033519553, "grad_norm": 3.384766101837158, "learning_rate": 0.0008849019607843138, "loss": 0.522, "step": 4215 }, { "epoch": 2.3553072625698324, "grad_norm": 0.7306132316589355, "learning_rate": 0.0008848739495798319, "loss": 0.4352, "step": 4216 }, { "epoch": 2.3558659217877094, "grad_norm": 0.46426141262054443, "learning_rate": 0.0008848459383753502, "loss": 0.4176, "step": 4217 }, { "epoch": 2.3564245810055864, "grad_norm": 0.4560985565185547, "learning_rate": 0.0008848179271708684, "loss": 0.4916, "step": 4218 }, { "epoch": 2.356983240223464, "grad_norm": 0.6996239423751831, "learning_rate": 0.0008847899159663866, "loss": 0.5451, "step": 4219 }, { "epoch": 2.357541899441341, "grad_norm": 0.3714365065097809, "learning_rate": 0.0008847619047619048, "loss": 0.4146, "step": 4220 }, { "epoch": 2.358100558659218, "grad_norm": 0.7201758027076721, "learning_rate": 0.000884733893557423, "loss": 0.5827, "step": 4221 }, { "epoch": 2.358659217877095, "grad_norm": 0.7988872528076172, "learning_rate": 0.0008847058823529412, "loss": 0.6139, "step": 4222 }, { "epoch": 2.359217877094972, "grad_norm": 0.433135062456131, "learning_rate": 0.0008846778711484594, "loss": 0.4678, "step": 4223 }, { "epoch": 2.3597765363128493, "grad_norm": 0.5376521348953247, "learning_rate": 0.0008846498599439776, "loss": 0.5302, "step": 4224 }, { "epoch": 2.3603351955307263, "grad_norm": 0.6785982251167297, "learning_rate": 0.0008846218487394958, "loss": 0.5146, "step": 4225 }, { "epoch": 2.3608938547486034, "grad_norm": 0.6107456088066101, "learning_rate": 0.000884593837535014, "loss": 0.383, "step": 4226 }, { "epoch": 2.3614525139664804, "grad_norm": 0.7669987082481384, "learning_rate": 0.0008845658263305322, "loss": 0.5966, "step": 4227 }, { "epoch": 2.3620111731843574, "grad_norm": 0.5143811702728271, "learning_rate": 0.0008845378151260504, "loss": 0.4405, "step": 4228 }, { "epoch": 2.362569832402235, "grad_norm": 0.6507100462913513, "learning_rate": 0.0008845098039215686, "loss": 0.5834, "step": 4229 }, { "epoch": 2.363128491620112, "grad_norm": 0.7228155136108398, "learning_rate": 0.0008844817927170869, "loss": 0.4828, "step": 4230 }, { "epoch": 2.363687150837989, "grad_norm": 0.7891553640365601, "learning_rate": 0.0008844537815126051, "loss": 0.4341, "step": 4231 }, { "epoch": 2.364245810055866, "grad_norm": 0.8317373991012573, "learning_rate": 0.0008844257703081234, "loss": 0.3927, "step": 4232 }, { "epoch": 2.364804469273743, "grad_norm": 0.5360849499702454, "learning_rate": 0.0008843977591036415, "loss": 0.3607, "step": 4233 }, { "epoch": 2.3653631284916203, "grad_norm": 0.4699600338935852, "learning_rate": 0.0008843697478991597, "loss": 0.4963, "step": 4234 }, { "epoch": 2.3659217877094973, "grad_norm": 0.48334765434265137, "learning_rate": 0.0008843417366946779, "loss": 0.3855, "step": 4235 }, { "epoch": 2.3664804469273744, "grad_norm": 0.40742337703704834, "learning_rate": 0.0008843137254901961, "loss": 0.4161, "step": 4236 }, { "epoch": 2.3670391061452514, "grad_norm": 0.47621408104896545, "learning_rate": 0.0008842857142857143, "loss": 0.4672, "step": 4237 }, { "epoch": 2.3675977653631284, "grad_norm": 0.7323170900344849, "learning_rate": 0.0008842577030812325, "loss": 0.5033, "step": 4238 }, { "epoch": 2.3681564245810054, "grad_norm": 0.4447155296802521, "learning_rate": 0.0008842296918767507, "loss": 0.339, "step": 4239 }, { "epoch": 2.368715083798883, "grad_norm": 0.6605436205863953, "learning_rate": 0.0008842016806722689, "loss": 0.3911, "step": 4240 }, { "epoch": 2.36927374301676, "grad_norm": 1.070167064666748, "learning_rate": 0.0008841736694677871, "loss": 0.6765, "step": 4241 }, { "epoch": 2.369832402234637, "grad_norm": 0.5497492551803589, "learning_rate": 0.0008841456582633053, "loss": 0.4693, "step": 4242 }, { "epoch": 2.370391061452514, "grad_norm": 0.47830671072006226, "learning_rate": 0.0008841176470588235, "loss": 0.459, "step": 4243 }, { "epoch": 2.370949720670391, "grad_norm": 0.4435995817184448, "learning_rate": 0.0008840896358543417, "loss": 0.436, "step": 4244 }, { "epoch": 2.3715083798882683, "grad_norm": 0.9786117076873779, "learning_rate": 0.0008840616246498599, "loss": 0.5611, "step": 4245 }, { "epoch": 2.3720670391061454, "grad_norm": 0.6978351473808289, "learning_rate": 0.0008840336134453782, "loss": 0.4574, "step": 4246 }, { "epoch": 2.3726256983240224, "grad_norm": 0.8869118690490723, "learning_rate": 0.0008840056022408964, "loss": 0.4586, "step": 4247 }, { "epoch": 2.3731843575418994, "grad_norm": 1.659661054611206, "learning_rate": 0.0008839775910364147, "loss": 0.4293, "step": 4248 }, { "epoch": 2.3737430167597764, "grad_norm": 1.223410725593567, "learning_rate": 0.0008839495798319328, "loss": 0.5203, "step": 4249 }, { "epoch": 2.3743016759776534, "grad_norm": 0.7573944330215454, "learning_rate": 0.000883921568627451, "loss": 0.4359, "step": 4250 }, { "epoch": 2.374860335195531, "grad_norm": 0.5053035020828247, "learning_rate": 0.0008838935574229692, "loss": 0.4868, "step": 4251 }, { "epoch": 2.375418994413408, "grad_norm": 0.4673672616481781, "learning_rate": 0.0008838655462184874, "loss": 0.4322, "step": 4252 }, { "epoch": 2.375977653631285, "grad_norm": 0.44579166173934937, "learning_rate": 0.0008838375350140057, "loss": 0.3557, "step": 4253 }, { "epoch": 2.376536312849162, "grad_norm": 0.46582189202308655, "learning_rate": 0.0008838095238095238, "loss": 0.4105, "step": 4254 }, { "epoch": 2.377094972067039, "grad_norm": 0.44945183396339417, "learning_rate": 0.000883781512605042, "loss": 0.43, "step": 4255 }, { "epoch": 2.3776536312849164, "grad_norm": 10.617576599121094, "learning_rate": 0.0008837535014005602, "loss": 0.4773, "step": 4256 }, { "epoch": 2.3782122905027934, "grad_norm": 0.6170254349708557, "learning_rate": 0.0008837254901960784, "loss": 0.5113, "step": 4257 }, { "epoch": 2.3787709497206704, "grad_norm": 0.5523990392684937, "learning_rate": 0.0008836974789915967, "loss": 0.543, "step": 4258 }, { "epoch": 2.3793296089385474, "grad_norm": 1.5440400838851929, "learning_rate": 0.0008836694677871148, "loss": 0.4009, "step": 4259 }, { "epoch": 2.3798882681564244, "grad_norm": 0.7903233170509338, "learning_rate": 0.000883641456582633, "loss": 0.5596, "step": 4260 }, { "epoch": 2.380446927374302, "grad_norm": 0.9833642840385437, "learning_rate": 0.0008836134453781512, "loss": 0.4112, "step": 4261 }, { "epoch": 2.381005586592179, "grad_norm": 0.7449160814285278, "learning_rate": 0.0008835854341736694, "loss": 0.5871, "step": 4262 }, { "epoch": 2.381564245810056, "grad_norm": 0.7182409167289734, "learning_rate": 0.0008835574229691878, "loss": 0.5365, "step": 4263 }, { "epoch": 2.382122905027933, "grad_norm": 1.0733376741409302, "learning_rate": 0.000883529411764706, "loss": 0.3719, "step": 4264 }, { "epoch": 2.38268156424581, "grad_norm": 0.48038485646247864, "learning_rate": 0.0008835014005602241, "loss": 0.4889, "step": 4265 }, { "epoch": 2.3832402234636874, "grad_norm": 0.5214555263519287, "learning_rate": 0.0008834733893557423, "loss": 0.5317, "step": 4266 }, { "epoch": 2.3837988826815644, "grad_norm": 0.6974785327911377, "learning_rate": 0.0008834453781512605, "loss": 0.5347, "step": 4267 }, { "epoch": 2.3843575418994414, "grad_norm": 0.46163174510002136, "learning_rate": 0.0008834173669467788, "loss": 0.5655, "step": 4268 }, { "epoch": 2.3849162011173184, "grad_norm": 0.5612812638282776, "learning_rate": 0.000883389355742297, "loss": 0.5412, "step": 4269 }, { "epoch": 2.3854748603351954, "grad_norm": 0.5364530682563782, "learning_rate": 0.0008833613445378151, "loss": 0.3914, "step": 4270 }, { "epoch": 2.386033519553073, "grad_norm": 0.5268359184265137, "learning_rate": 0.0008833333333333333, "loss": 0.4727, "step": 4271 }, { "epoch": 2.38659217877095, "grad_norm": 0.5146215558052063, "learning_rate": 0.0008833053221288515, "loss": 0.5487, "step": 4272 }, { "epoch": 2.387150837988827, "grad_norm": 0.4384045898914337, "learning_rate": 0.0008832773109243698, "loss": 0.3752, "step": 4273 }, { "epoch": 2.387709497206704, "grad_norm": 0.6137128472328186, "learning_rate": 0.000883249299719888, "loss": 0.4412, "step": 4274 }, { "epoch": 2.388268156424581, "grad_norm": 0.5418224334716797, "learning_rate": 0.0008832212885154061, "loss": 0.4486, "step": 4275 }, { "epoch": 2.388826815642458, "grad_norm": 0.544743001461029, "learning_rate": 0.0008831932773109243, "loss": 0.4664, "step": 4276 }, { "epoch": 2.3893854748603354, "grad_norm": 0.39321839809417725, "learning_rate": 0.0008831652661064425, "loss": 0.3911, "step": 4277 }, { "epoch": 2.3899441340782124, "grad_norm": 0.8188342452049255, "learning_rate": 0.0008831372549019609, "loss": 0.4849, "step": 4278 }, { "epoch": 2.3905027932960894, "grad_norm": 0.7178171277046204, "learning_rate": 0.0008831092436974791, "loss": 0.5732, "step": 4279 }, { "epoch": 2.3910614525139664, "grad_norm": 0.5431494116783142, "learning_rate": 0.0008830812324929973, "loss": 0.5017, "step": 4280 }, { "epoch": 2.3916201117318434, "grad_norm": 0.6249786019325256, "learning_rate": 0.0008830532212885154, "loss": 0.4637, "step": 4281 }, { "epoch": 2.3921787709497204, "grad_norm": 0.5498879551887512, "learning_rate": 0.0008830252100840336, "loss": 0.4105, "step": 4282 }, { "epoch": 2.392737430167598, "grad_norm": 0.4963003695011139, "learning_rate": 0.0008829971988795519, "loss": 0.399, "step": 4283 }, { "epoch": 2.393296089385475, "grad_norm": 1.0586880445480347, "learning_rate": 0.0008829691876750701, "loss": 0.3949, "step": 4284 }, { "epoch": 2.393854748603352, "grad_norm": 0.433883398771286, "learning_rate": 0.0008829411764705883, "loss": 0.47, "step": 4285 }, { "epoch": 2.394413407821229, "grad_norm": 0.5674890875816345, "learning_rate": 0.0008829131652661064, "loss": 0.4918, "step": 4286 }, { "epoch": 2.394972067039106, "grad_norm": 0.48947373032569885, "learning_rate": 0.0008828851540616246, "loss": 0.5143, "step": 4287 }, { "epoch": 2.3955307262569834, "grad_norm": 0.513671338558197, "learning_rate": 0.0008828571428571429, "loss": 0.4568, "step": 4288 }, { "epoch": 2.3960893854748604, "grad_norm": 0.7631064057350159, "learning_rate": 0.0008828291316526611, "loss": 0.7101, "step": 4289 }, { "epoch": 2.3966480446927374, "grad_norm": 0.8110824227333069, "learning_rate": 0.0008828011204481793, "loss": 0.6534, "step": 4290 }, { "epoch": 2.3972067039106144, "grad_norm": 1.466528058052063, "learning_rate": 0.0008827731092436974, "loss": 0.4679, "step": 4291 }, { "epoch": 2.3977653631284914, "grad_norm": 2.621582269668579, "learning_rate": 0.0008827450980392156, "loss": 0.4822, "step": 4292 }, { "epoch": 2.398324022346369, "grad_norm": 0.5260323882102966, "learning_rate": 0.0008827170868347339, "loss": 0.6171, "step": 4293 }, { "epoch": 2.398882681564246, "grad_norm": 0.4889635741710663, "learning_rate": 0.0008826890756302521, "loss": 0.621, "step": 4294 }, { "epoch": 2.399441340782123, "grad_norm": 0.8604654669761658, "learning_rate": 0.0008826610644257704, "loss": 0.4936, "step": 4295 }, { "epoch": 2.4, "grad_norm": 25.43321990966797, "learning_rate": 0.0008826330532212886, "loss": 0.4807, "step": 4296 }, { "epoch": 2.400558659217877, "grad_norm": 0.6410494446754456, "learning_rate": 0.0008826050420168067, "loss": 0.5789, "step": 4297 }, { "epoch": 2.4011173184357544, "grad_norm": 1.1895076036453247, "learning_rate": 0.000882577030812325, "loss": 0.4906, "step": 4298 }, { "epoch": 2.4016759776536314, "grad_norm": 0.6382520198822021, "learning_rate": 0.0008825490196078432, "loss": 0.5272, "step": 4299 }, { "epoch": 2.4022346368715084, "grad_norm": 1.1108702421188354, "learning_rate": 0.0008825210084033614, "loss": 0.3921, "step": 4300 }, { "epoch": 2.4027932960893854, "grad_norm": 0.46631813049316406, "learning_rate": 0.0008824929971988796, "loss": 0.499, "step": 4301 }, { "epoch": 2.4033519553072624, "grad_norm": 0.5861110091209412, "learning_rate": 0.0008824649859943977, "loss": 0.4722, "step": 4302 }, { "epoch": 2.40391061452514, "grad_norm": 0.4181196987628937, "learning_rate": 0.000882436974789916, "loss": 0.4135, "step": 4303 }, { "epoch": 2.404469273743017, "grad_norm": 0.49521398544311523, "learning_rate": 0.0008824089635854342, "loss": 0.4804, "step": 4304 }, { "epoch": 2.405027932960894, "grad_norm": 0.7357967495918274, "learning_rate": 0.0008823809523809524, "loss": 0.5264, "step": 4305 }, { "epoch": 2.405586592178771, "grad_norm": 0.44721147418022156, "learning_rate": 0.0008823529411764706, "loss": 0.481, "step": 4306 }, { "epoch": 2.406145251396648, "grad_norm": 0.49752721190452576, "learning_rate": 0.0008823249299719887, "loss": 0.464, "step": 4307 }, { "epoch": 2.4067039106145254, "grad_norm": 0.4879881739616394, "learning_rate": 0.000882296918767507, "loss": 0.4486, "step": 4308 }, { "epoch": 2.4072625698324024, "grad_norm": 0.4629369378089905, "learning_rate": 0.0008822689075630252, "loss": 0.4524, "step": 4309 }, { "epoch": 2.4078212290502794, "grad_norm": 1.4756484031677246, "learning_rate": 0.0008822408963585434, "loss": 0.5583, "step": 4310 }, { "epoch": 2.4083798882681564, "grad_norm": 0.5566049814224243, "learning_rate": 0.0008822128851540616, "loss": 0.4512, "step": 4311 }, { "epoch": 2.4089385474860334, "grad_norm": 0.6636320352554321, "learning_rate": 0.0008821848739495799, "loss": 0.5222, "step": 4312 }, { "epoch": 2.4094972067039104, "grad_norm": 0.7487527132034302, "learning_rate": 0.0008821568627450981, "loss": 0.521, "step": 4313 }, { "epoch": 2.410055865921788, "grad_norm": 0.7668407559394836, "learning_rate": 0.0008821288515406163, "loss": 0.5283, "step": 4314 }, { "epoch": 2.410614525139665, "grad_norm": 0.3995644152164459, "learning_rate": 0.0008821008403361345, "loss": 0.4354, "step": 4315 }, { "epoch": 2.411173184357542, "grad_norm": 0.5607393980026245, "learning_rate": 0.0008820728291316527, "loss": 0.5221, "step": 4316 }, { "epoch": 2.411731843575419, "grad_norm": 0.49787914752960205, "learning_rate": 0.0008820448179271709, "loss": 0.5637, "step": 4317 }, { "epoch": 2.412290502793296, "grad_norm": 0.5838570594787598, "learning_rate": 0.0008820168067226891, "loss": 0.4391, "step": 4318 }, { "epoch": 2.412849162011173, "grad_norm": 0.5224358439445496, "learning_rate": 0.0008819887955182073, "loss": 0.4389, "step": 4319 }, { "epoch": 2.4134078212290504, "grad_norm": 0.5662122964859009, "learning_rate": 0.0008819607843137255, "loss": 0.4244, "step": 4320 }, { "epoch": 2.4139664804469274, "grad_norm": 0.6424004435539246, "learning_rate": 0.0008819327731092437, "loss": 0.4881, "step": 4321 }, { "epoch": 2.4145251396648044, "grad_norm": 0.7683541178703308, "learning_rate": 0.0008819047619047619, "loss": 0.3957, "step": 4322 }, { "epoch": 2.4150837988826814, "grad_norm": 0.4299624562263489, "learning_rate": 0.0008818767507002801, "loss": 0.3726, "step": 4323 }, { "epoch": 2.4156424581005584, "grad_norm": 0.91096031665802, "learning_rate": 0.0008818487394957983, "loss": 0.5559, "step": 4324 }, { "epoch": 2.416201117318436, "grad_norm": 0.7092557549476624, "learning_rate": 0.0008818207282913165, "loss": 0.4296, "step": 4325 }, { "epoch": 2.416759776536313, "grad_norm": 4.360379219055176, "learning_rate": 0.0008817927170868347, "loss": 0.4725, "step": 4326 }, { "epoch": 2.41731843575419, "grad_norm": 0.9486434459686279, "learning_rate": 0.0008817647058823529, "loss": 0.5032, "step": 4327 }, { "epoch": 2.417877094972067, "grad_norm": 0.48562589287757874, "learning_rate": 0.0008817366946778713, "loss": 0.4271, "step": 4328 }, { "epoch": 2.418435754189944, "grad_norm": 0.5344672799110413, "learning_rate": 0.0008817086834733894, "loss": 0.4067, "step": 4329 }, { "epoch": 2.4189944134078214, "grad_norm": 2.026885509490967, "learning_rate": 0.0008816806722689076, "loss": 0.5047, "step": 4330 }, { "epoch": 2.4195530726256984, "grad_norm": 0.49559345841407776, "learning_rate": 0.0008816526610644258, "loss": 0.4767, "step": 4331 }, { "epoch": 2.4201117318435754, "grad_norm": 1.3192914724349976, "learning_rate": 0.000881624649859944, "loss": 0.483, "step": 4332 }, { "epoch": 2.4206703910614524, "grad_norm": 0.5153898596763611, "learning_rate": 0.0008815966386554623, "loss": 0.4174, "step": 4333 }, { "epoch": 2.4212290502793294, "grad_norm": 0.4890030324459076, "learning_rate": 0.0008815686274509804, "loss": 0.4532, "step": 4334 }, { "epoch": 2.421787709497207, "grad_norm": 0.4998883306980133, "learning_rate": 0.0008815406162464986, "loss": 0.5379, "step": 4335 }, { "epoch": 2.422346368715084, "grad_norm": 6.456830024719238, "learning_rate": 0.0008815126050420168, "loss": 0.4178, "step": 4336 }, { "epoch": 2.422905027932961, "grad_norm": 0.7396804094314575, "learning_rate": 0.000881484593837535, "loss": 0.5235, "step": 4337 }, { "epoch": 2.423463687150838, "grad_norm": 0.5532344579696655, "learning_rate": 0.0008814565826330533, "loss": 0.5341, "step": 4338 }, { "epoch": 2.424022346368715, "grad_norm": 0.5099005103111267, "learning_rate": 0.0008814285714285714, "loss": 0.4806, "step": 4339 }, { "epoch": 2.4245810055865924, "grad_norm": 0.6905778646469116, "learning_rate": 0.0008814005602240896, "loss": 0.5037, "step": 4340 }, { "epoch": 2.4251396648044694, "grad_norm": 0.5489441752433777, "learning_rate": 0.0008813725490196078, "loss": 0.4945, "step": 4341 }, { "epoch": 2.4256983240223464, "grad_norm": 0.5421143174171448, "learning_rate": 0.000881344537815126, "loss": 0.6252, "step": 4342 }, { "epoch": 2.4262569832402234, "grad_norm": 0.41108086705207825, "learning_rate": 0.0008813165266106443, "loss": 0.4741, "step": 4343 }, { "epoch": 2.4268156424581004, "grad_norm": 0.5088945031166077, "learning_rate": 0.0008812885154061626, "loss": 0.4808, "step": 4344 }, { "epoch": 2.427374301675978, "grad_norm": 0.5964069962501526, "learning_rate": 0.0008812605042016807, "loss": 0.4156, "step": 4345 }, { "epoch": 2.427932960893855, "grad_norm": 0.5038523077964783, "learning_rate": 0.0008812324929971989, "loss": 0.4349, "step": 4346 }, { "epoch": 2.428491620111732, "grad_norm": 3.671360969543457, "learning_rate": 0.0008812044817927171, "loss": 0.4267, "step": 4347 }, { "epoch": 2.429050279329609, "grad_norm": 0.5666340589523315, "learning_rate": 0.0008811764705882354, "loss": 0.4614, "step": 4348 }, { "epoch": 2.429608938547486, "grad_norm": 1.0279804468154907, "learning_rate": 0.0008811484593837536, "loss": 0.6051, "step": 4349 }, { "epoch": 2.430167597765363, "grad_norm": 0.6092056632041931, "learning_rate": 0.0008811204481792717, "loss": 0.5395, "step": 4350 }, { "epoch": 2.4307262569832404, "grad_norm": 1.9469459056854248, "learning_rate": 0.0008810924369747899, "loss": 0.5241, "step": 4351 }, { "epoch": 2.4312849162011174, "grad_norm": 0.5555824637413025, "learning_rate": 0.0008810644257703081, "loss": 0.4475, "step": 4352 }, { "epoch": 2.4318435754189944, "grad_norm": 0.6190809607505798, "learning_rate": 0.0008810364145658264, "loss": 0.6339, "step": 4353 }, { "epoch": 2.4324022346368714, "grad_norm": 0.5151634216308594, "learning_rate": 0.0008810084033613446, "loss": 0.5336, "step": 4354 }, { "epoch": 2.4329608938547485, "grad_norm": 0.5393658876419067, "learning_rate": 0.0008809803921568627, "loss": 0.4911, "step": 4355 }, { "epoch": 2.4335195530726255, "grad_norm": 0.41556069254875183, "learning_rate": 0.0008809523809523809, "loss": 0.3815, "step": 4356 }, { "epoch": 2.434078212290503, "grad_norm": 0.4446249008178711, "learning_rate": 0.0008809243697478991, "loss": 0.4646, "step": 4357 }, { "epoch": 2.43463687150838, "grad_norm": 0.4504421055316925, "learning_rate": 0.0008808963585434174, "loss": 0.4485, "step": 4358 }, { "epoch": 2.435195530726257, "grad_norm": 0.5532277226448059, "learning_rate": 0.0008808683473389356, "loss": 0.4241, "step": 4359 }, { "epoch": 2.435754189944134, "grad_norm": 0.5600268840789795, "learning_rate": 0.0008808403361344539, "loss": 0.4679, "step": 4360 }, { "epoch": 2.436312849162011, "grad_norm": 0.47818443179130554, "learning_rate": 0.000880812324929972, "loss": 0.5175, "step": 4361 }, { "epoch": 2.4368715083798884, "grad_norm": 0.4332275092601776, "learning_rate": 0.0008807843137254902, "loss": 0.3947, "step": 4362 }, { "epoch": 2.4374301675977654, "grad_norm": 1.0805692672729492, "learning_rate": 0.0008807563025210085, "loss": 0.391, "step": 4363 }, { "epoch": 2.4379888268156424, "grad_norm": 0.4999154210090637, "learning_rate": 0.0008807282913165267, "loss": 0.4842, "step": 4364 }, { "epoch": 2.4385474860335195, "grad_norm": 0.391719251871109, "learning_rate": 0.0008807002801120449, "loss": 0.4172, "step": 4365 }, { "epoch": 2.4391061452513965, "grad_norm": 0.9226119518280029, "learning_rate": 0.000880672268907563, "loss": 0.4765, "step": 4366 }, { "epoch": 2.439664804469274, "grad_norm": 0.6326907277107239, "learning_rate": 0.0008806442577030812, "loss": 0.4686, "step": 4367 }, { "epoch": 2.440223463687151, "grad_norm": 0.5321052074432373, "learning_rate": 0.0008806162464985995, "loss": 0.5778, "step": 4368 }, { "epoch": 2.440782122905028, "grad_norm": 6.32752799987793, "learning_rate": 0.0008805882352941177, "loss": 0.3842, "step": 4369 }, { "epoch": 2.441340782122905, "grad_norm": 2.7710537910461426, "learning_rate": 0.0008805602240896359, "loss": 0.6063, "step": 4370 }, { "epoch": 2.441899441340782, "grad_norm": 0.5310767889022827, "learning_rate": 0.000880532212885154, "loss": 0.495, "step": 4371 }, { "epoch": 2.4424581005586594, "grad_norm": 0.8203009963035583, "learning_rate": 0.0008805042016806722, "loss": 0.4765, "step": 4372 }, { "epoch": 2.4430167597765364, "grad_norm": 0.4067409634590149, "learning_rate": 0.0008804761904761905, "loss": 0.4315, "step": 4373 }, { "epoch": 2.4435754189944134, "grad_norm": 0.6586512327194214, "learning_rate": 0.0008804481792717087, "loss": 0.4524, "step": 4374 }, { "epoch": 2.4441340782122905, "grad_norm": 1.092674970626831, "learning_rate": 0.0008804201680672269, "loss": 0.4151, "step": 4375 }, { "epoch": 2.4446927374301675, "grad_norm": 0.864611029624939, "learning_rate": 0.0008803921568627451, "loss": 0.4224, "step": 4376 }, { "epoch": 2.445251396648045, "grad_norm": 1.6714537143707275, "learning_rate": 0.0008803641456582632, "loss": 0.4327, "step": 4377 }, { "epoch": 2.445810055865922, "grad_norm": 2.8098738193511963, "learning_rate": 0.0008803361344537816, "loss": 0.4702, "step": 4378 }, { "epoch": 2.446368715083799, "grad_norm": 0.5370066165924072, "learning_rate": 0.0008803081232492998, "loss": 0.462, "step": 4379 }, { "epoch": 2.446927374301676, "grad_norm": 0.7801875472068787, "learning_rate": 0.000880280112044818, "loss": 0.5963, "step": 4380 }, { "epoch": 2.447486033519553, "grad_norm": 0.5990810394287109, "learning_rate": 0.0008802521008403362, "loss": 0.4869, "step": 4381 }, { "epoch": 2.4480446927374304, "grad_norm": 0.6246578693389893, "learning_rate": 0.0008802240896358543, "loss": 0.5052, "step": 4382 }, { "epoch": 2.4486033519553074, "grad_norm": 0.5706420540809631, "learning_rate": 0.0008801960784313726, "loss": 0.4506, "step": 4383 }, { "epoch": 2.4491620111731844, "grad_norm": 0.761964738368988, "learning_rate": 0.0008801680672268908, "loss": 0.4354, "step": 4384 }, { "epoch": 2.4497206703910615, "grad_norm": 0.4557899534702301, "learning_rate": 0.000880140056022409, "loss": 0.4536, "step": 4385 }, { "epoch": 2.4502793296089385, "grad_norm": 0.8080939054489136, "learning_rate": 0.0008801120448179272, "loss": 0.4344, "step": 4386 }, { "epoch": 2.4508379888268155, "grad_norm": 5.231118202209473, "learning_rate": 0.0008800840336134453, "loss": 0.4743, "step": 4387 }, { "epoch": 2.451396648044693, "grad_norm": 0.5391502380371094, "learning_rate": 0.0008800560224089636, "loss": 0.4194, "step": 4388 }, { "epoch": 2.45195530726257, "grad_norm": 1.0946112871170044, "learning_rate": 0.0008800280112044818, "loss": 0.5505, "step": 4389 }, { "epoch": 2.452513966480447, "grad_norm": 0.7133363485336304, "learning_rate": 0.00088, "loss": 0.5146, "step": 4390 }, { "epoch": 2.453072625698324, "grad_norm": 0.5240291953086853, "learning_rate": 0.0008799719887955182, "loss": 0.454, "step": 4391 }, { "epoch": 2.453631284916201, "grad_norm": 1.0536044836044312, "learning_rate": 0.0008799439775910364, "loss": 0.495, "step": 4392 }, { "epoch": 2.454189944134078, "grad_norm": 0.5113751292228699, "learning_rate": 0.0008799159663865546, "loss": 0.4708, "step": 4393 }, { "epoch": 2.4547486033519554, "grad_norm": 1.1066749095916748, "learning_rate": 0.0008798879551820729, "loss": 0.5135, "step": 4394 }, { "epoch": 2.4553072625698324, "grad_norm": 0.6315325498580933, "learning_rate": 0.0008798599439775911, "loss": 0.5431, "step": 4395 }, { "epoch": 2.4558659217877095, "grad_norm": 2.807061195373535, "learning_rate": 0.0008798319327731093, "loss": 0.4797, "step": 4396 }, { "epoch": 2.4564245810055865, "grad_norm": 3.1679513454437256, "learning_rate": 0.0008798039215686275, "loss": 0.5308, "step": 4397 }, { "epoch": 2.4569832402234635, "grad_norm": 0.4514045715332031, "learning_rate": 0.0008797759103641457, "loss": 0.4231, "step": 4398 }, { "epoch": 2.457541899441341, "grad_norm": 0.49052560329437256, "learning_rate": 0.0008797478991596639, "loss": 0.4859, "step": 4399 }, { "epoch": 2.458100558659218, "grad_norm": 0.45187923312187195, "learning_rate": 0.0008797198879551821, "loss": 0.4388, "step": 4400 }, { "epoch": 2.458659217877095, "grad_norm": 8.988836288452148, "learning_rate": 0.0008796918767507003, "loss": 0.4998, "step": 4401 }, { "epoch": 2.459217877094972, "grad_norm": 0.49433860182762146, "learning_rate": 0.0008796638655462185, "loss": 0.4329, "step": 4402 }, { "epoch": 2.459776536312849, "grad_norm": 0.4444673955440521, "learning_rate": 0.0008796358543417367, "loss": 0.3616, "step": 4403 }, { "epoch": 2.4603351955307264, "grad_norm": 1.8196717500686646, "learning_rate": 0.0008796078431372549, "loss": 0.5, "step": 4404 }, { "epoch": 2.4608938547486034, "grad_norm": 0.5254615545272827, "learning_rate": 0.0008795798319327731, "loss": 0.4922, "step": 4405 }, { "epoch": 2.4614525139664805, "grad_norm": 0.5483995079994202, "learning_rate": 0.0008795518207282913, "loss": 0.5123, "step": 4406 }, { "epoch": 2.4620111731843575, "grad_norm": 2.8272669315338135, "learning_rate": 0.0008795238095238095, "loss": 0.4422, "step": 4407 }, { "epoch": 2.4625698324022345, "grad_norm": 0.7430616617202759, "learning_rate": 0.0008794957983193278, "loss": 0.5514, "step": 4408 }, { "epoch": 2.463128491620112, "grad_norm": 3.747798442840576, "learning_rate": 0.0008794677871148459, "loss": 0.3925, "step": 4409 }, { "epoch": 2.463687150837989, "grad_norm": 0.590506911277771, "learning_rate": 0.0008794397759103642, "loss": 0.4449, "step": 4410 }, { "epoch": 2.464245810055866, "grad_norm": 0.42584675550460815, "learning_rate": 0.0008794117647058824, "loss": 0.4518, "step": 4411 }, { "epoch": 2.464804469273743, "grad_norm": 1.0190391540527344, "learning_rate": 0.0008793837535014006, "loss": 0.6653, "step": 4412 }, { "epoch": 2.46536312849162, "grad_norm": 0.45869314670562744, "learning_rate": 0.0008793557422969189, "loss": 0.4285, "step": 4413 }, { "epoch": 2.4659217877094974, "grad_norm": 0.5037466883659363, "learning_rate": 0.000879327731092437, "loss": 0.4296, "step": 4414 }, { "epoch": 2.4664804469273744, "grad_norm": 1.2750922441482544, "learning_rate": 0.0008792997198879552, "loss": 0.4937, "step": 4415 }, { "epoch": 2.4670391061452515, "grad_norm": 1.1161415576934814, "learning_rate": 0.0008792717086834734, "loss": 0.5768, "step": 4416 }, { "epoch": 2.4675977653631285, "grad_norm": 0.8670387864112854, "learning_rate": 0.0008792436974789916, "loss": 0.4481, "step": 4417 }, { "epoch": 2.4681564245810055, "grad_norm": 1.4730231761932373, "learning_rate": 0.0008792156862745099, "loss": 0.5884, "step": 4418 }, { "epoch": 2.4687150837988825, "grad_norm": 0.7452554702758789, "learning_rate": 0.000879187675070028, "loss": 0.5316, "step": 4419 }, { "epoch": 2.46927374301676, "grad_norm": 0.4893207550048828, "learning_rate": 0.0008791596638655462, "loss": 0.3357, "step": 4420 }, { "epoch": 2.469832402234637, "grad_norm": 2.1996099948883057, "learning_rate": 0.0008791316526610644, "loss": 0.3918, "step": 4421 }, { "epoch": 2.470391061452514, "grad_norm": 0.6242367029190063, "learning_rate": 0.0008791036414565826, "loss": 0.4399, "step": 4422 }, { "epoch": 2.470949720670391, "grad_norm": 0.5205169916152954, "learning_rate": 0.0008790756302521009, "loss": 0.4872, "step": 4423 }, { "epoch": 2.471508379888268, "grad_norm": 0.5776616334915161, "learning_rate": 0.0008790476190476191, "loss": 0.5271, "step": 4424 }, { "epoch": 2.472067039106145, "grad_norm": 0.6396153569221497, "learning_rate": 0.0008790196078431372, "loss": 0.4457, "step": 4425 }, { "epoch": 2.4726256983240225, "grad_norm": 1.701348066329956, "learning_rate": 0.0008789915966386554, "loss": 0.5394, "step": 4426 }, { "epoch": 2.4731843575418995, "grad_norm": 0.649161696434021, "learning_rate": 0.0008789635854341737, "loss": 0.5024, "step": 4427 }, { "epoch": 2.4737430167597765, "grad_norm": 0.8495590090751648, "learning_rate": 0.000878935574229692, "loss": 0.4276, "step": 4428 }, { "epoch": 2.4743016759776535, "grad_norm": 1.5100113153457642, "learning_rate": 0.0008789075630252102, "loss": 0.5683, "step": 4429 }, { "epoch": 2.4748603351955305, "grad_norm": 0.5396854281425476, "learning_rate": 0.0008788795518207283, "loss": 0.485, "step": 4430 }, { "epoch": 2.475418994413408, "grad_norm": 0.9645025134086609, "learning_rate": 0.0008788515406162465, "loss": 0.5211, "step": 4431 }, { "epoch": 2.475977653631285, "grad_norm": 0.522449254989624, "learning_rate": 0.0008788235294117647, "loss": 0.5439, "step": 4432 }, { "epoch": 2.476536312849162, "grad_norm": 0.6460433602333069, "learning_rate": 0.000878795518207283, "loss": 0.4083, "step": 4433 }, { "epoch": 2.477094972067039, "grad_norm": 0.8278968930244446, "learning_rate": 0.0008787675070028012, "loss": 0.3941, "step": 4434 }, { "epoch": 2.477653631284916, "grad_norm": 0.5852335691452026, "learning_rate": 0.0008787394957983193, "loss": 0.6851, "step": 4435 }, { "epoch": 2.4782122905027935, "grad_norm": 0.4538413882255554, "learning_rate": 0.0008787114845938375, "loss": 0.4634, "step": 4436 }, { "epoch": 2.4787709497206705, "grad_norm": 0.5930296182632446, "learning_rate": 0.0008786834733893557, "loss": 0.4946, "step": 4437 }, { "epoch": 2.4793296089385475, "grad_norm": 0.569406270980835, "learning_rate": 0.000878655462184874, "loss": 0.6094, "step": 4438 }, { "epoch": 2.4798882681564245, "grad_norm": 0.5196834206581116, "learning_rate": 0.0008786274509803922, "loss": 0.5329, "step": 4439 }, { "epoch": 2.4804469273743015, "grad_norm": 1.3550713062286377, "learning_rate": 0.0008785994397759104, "loss": 0.4816, "step": 4440 }, { "epoch": 2.481005586592179, "grad_norm": 0.6692785620689392, "learning_rate": 0.0008785714285714285, "loss": 0.3862, "step": 4441 }, { "epoch": 2.481564245810056, "grad_norm": 0.3891158998012543, "learning_rate": 0.0008785434173669467, "loss": 0.4182, "step": 4442 }, { "epoch": 2.482122905027933, "grad_norm": 0.8678598403930664, "learning_rate": 0.0008785154061624651, "loss": 0.5296, "step": 4443 }, { "epoch": 2.48268156424581, "grad_norm": 0.6176441311836243, "learning_rate": 0.0008784873949579833, "loss": 0.4719, "step": 4444 }, { "epoch": 2.483240223463687, "grad_norm": 2.4637835025787354, "learning_rate": 0.0008784593837535015, "loss": 0.4647, "step": 4445 }, { "epoch": 2.4837988826815645, "grad_norm": 0.5736443996429443, "learning_rate": 0.0008784313725490196, "loss": 0.5703, "step": 4446 }, { "epoch": 2.4843575418994415, "grad_norm": 0.6970122456550598, "learning_rate": 0.0008784033613445378, "loss": 0.4167, "step": 4447 }, { "epoch": 2.4849162011173185, "grad_norm": 0.4753473997116089, "learning_rate": 0.0008783753501400561, "loss": 0.5326, "step": 4448 }, { "epoch": 2.4854748603351955, "grad_norm": 0.6131035685539246, "learning_rate": 0.0008783473389355743, "loss": 0.4966, "step": 4449 }, { "epoch": 2.4860335195530725, "grad_norm": 0.8042401075363159, "learning_rate": 0.0008783193277310925, "loss": 0.5292, "step": 4450 }, { "epoch": 2.48659217877095, "grad_norm": 0.5250211358070374, "learning_rate": 0.0008782913165266106, "loss": 0.4898, "step": 4451 }, { "epoch": 2.487150837988827, "grad_norm": 0.6477358937263489, "learning_rate": 0.0008782633053221288, "loss": 0.3879, "step": 4452 }, { "epoch": 2.487709497206704, "grad_norm": 0.5329732298851013, "learning_rate": 0.0008782352941176471, "loss": 0.468, "step": 4453 }, { "epoch": 2.488268156424581, "grad_norm": 0.6341084241867065, "learning_rate": 0.0008782072829131653, "loss": 0.4227, "step": 4454 }, { "epoch": 2.488826815642458, "grad_norm": 0.4659540355205536, "learning_rate": 0.0008781792717086835, "loss": 0.5092, "step": 4455 }, { "epoch": 2.489385474860335, "grad_norm": 0.7676764130592346, "learning_rate": 0.0008781512605042017, "loss": 0.4128, "step": 4456 }, { "epoch": 2.4899441340782125, "grad_norm": 0.7669159770011902, "learning_rate": 0.0008781232492997198, "loss": 0.3884, "step": 4457 }, { "epoch": 2.4905027932960895, "grad_norm": 0.5939233899116516, "learning_rate": 0.000878095238095238, "loss": 0.3679, "step": 4458 }, { "epoch": 2.4910614525139665, "grad_norm": 0.640285074710846, "learning_rate": 0.0008780672268907564, "loss": 0.4908, "step": 4459 }, { "epoch": 2.4916201117318435, "grad_norm": 0.40500783920288086, "learning_rate": 0.0008780392156862746, "loss": 0.3761, "step": 4460 }, { "epoch": 2.4921787709497205, "grad_norm": 0.5249844193458557, "learning_rate": 0.0008780112044817928, "loss": 0.4169, "step": 4461 }, { "epoch": 2.4927374301675975, "grad_norm": 0.611035943031311, "learning_rate": 0.0008779831932773109, "loss": 0.4091, "step": 4462 }, { "epoch": 2.493296089385475, "grad_norm": 1.7709965705871582, "learning_rate": 0.0008779551820728291, "loss": 0.4715, "step": 4463 }, { "epoch": 2.493854748603352, "grad_norm": 0.667874276638031, "learning_rate": 0.0008779271708683474, "loss": 0.5608, "step": 4464 }, { "epoch": 2.494413407821229, "grad_norm": 0.5665532946586609, "learning_rate": 0.0008778991596638656, "loss": 0.4516, "step": 4465 }, { "epoch": 2.494972067039106, "grad_norm": 0.8860997557640076, "learning_rate": 0.0008778711484593838, "loss": 0.3293, "step": 4466 }, { "epoch": 2.495530726256983, "grad_norm": 0.5210645198822021, "learning_rate": 0.0008778431372549019, "loss": 0.4554, "step": 4467 }, { "epoch": 2.4960893854748605, "grad_norm": 0.7173487544059753, "learning_rate": 0.0008778151260504201, "loss": 0.4601, "step": 4468 }, { "epoch": 2.4966480446927375, "grad_norm": 1.1310702562332153, "learning_rate": 0.0008777871148459384, "loss": 0.5989, "step": 4469 }, { "epoch": 2.4972067039106145, "grad_norm": 1.9089235067367554, "learning_rate": 0.0008777591036414566, "loss": 0.3195, "step": 4470 }, { "epoch": 2.4977653631284915, "grad_norm": 0.8973751068115234, "learning_rate": 0.0008777310924369748, "loss": 0.5801, "step": 4471 }, { "epoch": 2.4983240223463685, "grad_norm": 0.9188690781593323, "learning_rate": 0.000877703081232493, "loss": 0.5607, "step": 4472 }, { "epoch": 2.498882681564246, "grad_norm": 0.5488870143890381, "learning_rate": 0.0008776750700280111, "loss": 0.4537, "step": 4473 }, { "epoch": 2.499441340782123, "grad_norm": 0.5225598216056824, "learning_rate": 0.0008776470588235294, "loss": 0.4182, "step": 4474 }, { "epoch": 2.5, "grad_norm": 1.4376113414764404, "learning_rate": 0.0008776190476190476, "loss": 0.5006, "step": 4475 }, { "epoch": 2.500558659217877, "grad_norm": 0.47329193353652954, "learning_rate": 0.0008775910364145659, "loss": 0.499, "step": 4476 }, { "epoch": 2.501117318435754, "grad_norm": 0.8424351811408997, "learning_rate": 0.0008775630252100841, "loss": 0.5124, "step": 4477 }, { "epoch": 2.5016759776536315, "grad_norm": 0.9163236618041992, "learning_rate": 0.0008775350140056022, "loss": 0.5137, "step": 4478 }, { "epoch": 2.5022346368715085, "grad_norm": 0.7280294895172119, "learning_rate": 0.0008775070028011205, "loss": 0.4547, "step": 4479 }, { "epoch": 2.5027932960893855, "grad_norm": 0.5652651786804199, "learning_rate": 0.0008774789915966387, "loss": 0.3931, "step": 4480 }, { "epoch": 2.5033519553072625, "grad_norm": 0.874708354473114, "learning_rate": 0.0008774509803921569, "loss": 0.4496, "step": 4481 }, { "epoch": 2.5039106145251395, "grad_norm": 0.5706674456596375, "learning_rate": 0.0008774229691876751, "loss": 0.4237, "step": 4482 }, { "epoch": 2.504469273743017, "grad_norm": 0.4492708146572113, "learning_rate": 0.0008773949579831932, "loss": 0.4023, "step": 4483 }, { "epoch": 2.505027932960894, "grad_norm": 0.8117376565933228, "learning_rate": 0.0008773669467787115, "loss": 0.5415, "step": 4484 }, { "epoch": 2.505586592178771, "grad_norm": 0.5417127013206482, "learning_rate": 0.0008773389355742297, "loss": 0.4804, "step": 4485 }, { "epoch": 2.506145251396648, "grad_norm": 0.5969968438148499, "learning_rate": 0.0008773109243697479, "loss": 0.5356, "step": 4486 }, { "epoch": 2.506703910614525, "grad_norm": 1.2206852436065674, "learning_rate": 0.0008772829131652661, "loss": 0.6047, "step": 4487 }, { "epoch": 2.5072625698324025, "grad_norm": 0.9537792801856995, "learning_rate": 0.0008772549019607843, "loss": 0.475, "step": 4488 }, { "epoch": 2.5078212290502795, "grad_norm": 4.100423336029053, "learning_rate": 0.0008772268907563025, "loss": 0.4823, "step": 4489 }, { "epoch": 2.5083798882681565, "grad_norm": 0.5287656784057617, "learning_rate": 0.0008771988795518207, "loss": 0.5067, "step": 4490 }, { "epoch": 2.5089385474860335, "grad_norm": 0.8828968405723572, "learning_rate": 0.0008771708683473389, "loss": 0.4223, "step": 4491 }, { "epoch": 2.5094972067039105, "grad_norm": 0.6327812075614929, "learning_rate": 0.0008771428571428572, "loss": 0.4399, "step": 4492 }, { "epoch": 2.510055865921788, "grad_norm": 0.451775461435318, "learning_rate": 0.0008771148459383754, "loss": 0.3741, "step": 4493 }, { "epoch": 2.5106145251396645, "grad_norm": 0.7219546437263489, "learning_rate": 0.0008770868347338936, "loss": 0.5053, "step": 4494 }, { "epoch": 2.511173184357542, "grad_norm": 0.5533103346824646, "learning_rate": 0.0008770588235294118, "loss": 0.4911, "step": 4495 }, { "epoch": 2.511731843575419, "grad_norm": 0.7420123219490051, "learning_rate": 0.00087703081232493, "loss": 0.4492, "step": 4496 }, { "epoch": 2.512290502793296, "grad_norm": 0.43191009759902954, "learning_rate": 0.0008770028011204482, "loss": 0.4421, "step": 4497 }, { "epoch": 2.512849162011173, "grad_norm": 0.49949371814727783, "learning_rate": 0.0008769747899159664, "loss": 0.5096, "step": 4498 }, { "epoch": 2.51340782122905, "grad_norm": 0.6802654266357422, "learning_rate": 0.0008769467787114847, "loss": 0.4505, "step": 4499 }, { "epoch": 2.5139664804469275, "grad_norm": 0.4956294596195221, "learning_rate": 0.0008769187675070028, "loss": 0.4281, "step": 4500 }, { "epoch": 2.5139664804469275, "eval_cer": 0.09615615419026109, "eval_loss": 0.36138299107551575, "eval_runtime": 55.6478, "eval_samples_per_second": 81.549, "eval_steps_per_second": 5.104, "eval_wer": 0.3760509482975336, "step": 4500 }, { "epoch": 2.5145251396648045, "grad_norm": 0.5228537321090698, "learning_rate": 0.000876890756302521, "loss": 0.4907, "step": 4501 }, { "epoch": 2.5150837988826815, "grad_norm": 0.6658404469490051, "learning_rate": 0.0008768627450980392, "loss": 0.39, "step": 4502 }, { "epoch": 2.5156424581005585, "grad_norm": 0.5798348188400269, "learning_rate": 0.0008768347338935574, "loss": 0.4364, "step": 4503 }, { "epoch": 2.5162011173184355, "grad_norm": 1.7013322114944458, "learning_rate": 0.0008768067226890757, "loss": 0.4757, "step": 4504 }, { "epoch": 2.516759776536313, "grad_norm": 0.6113928556442261, "learning_rate": 0.0008767787114845938, "loss": 0.5182, "step": 4505 }, { "epoch": 2.51731843575419, "grad_norm": 0.8509871959686279, "learning_rate": 0.000876750700280112, "loss": 0.6024, "step": 4506 }, { "epoch": 2.517877094972067, "grad_norm": 0.48457011580467224, "learning_rate": 0.0008767226890756302, "loss": 0.3722, "step": 4507 }, { "epoch": 2.518435754189944, "grad_norm": 0.5147897005081177, "learning_rate": 0.0008766946778711484, "loss": 0.4042, "step": 4508 }, { "epoch": 2.518994413407821, "grad_norm": 0.652128279209137, "learning_rate": 0.0008766666666666668, "loss": 0.5105, "step": 4509 }, { "epoch": 2.5195530726256985, "grad_norm": 0.5479727983474731, "learning_rate": 0.0008766386554621849, "loss": 0.4718, "step": 4510 }, { "epoch": 2.5201117318435755, "grad_norm": 0.6929388642311096, "learning_rate": 0.0008766106442577031, "loss": 0.4866, "step": 4511 }, { "epoch": 2.5206703910614525, "grad_norm": 0.577779233455658, "learning_rate": 0.0008765826330532213, "loss": 0.5949, "step": 4512 }, { "epoch": 2.5212290502793295, "grad_norm": 0.5748147368431091, "learning_rate": 0.0008765546218487395, "loss": 0.5287, "step": 4513 }, { "epoch": 2.5217877094972065, "grad_norm": 0.9498600959777832, "learning_rate": 0.0008765266106442578, "loss": 0.4796, "step": 4514 }, { "epoch": 2.522346368715084, "grad_norm": 0.8140057325363159, "learning_rate": 0.000876498599439776, "loss": 0.4023, "step": 4515 }, { "epoch": 2.522905027932961, "grad_norm": 0.45185020565986633, "learning_rate": 0.0008764705882352941, "loss": 0.4363, "step": 4516 }, { "epoch": 2.523463687150838, "grad_norm": 0.5347515344619751, "learning_rate": 0.0008764425770308123, "loss": 0.4074, "step": 4517 }, { "epoch": 2.524022346368715, "grad_norm": 0.4780946969985962, "learning_rate": 0.0008764145658263305, "loss": 0.467, "step": 4518 }, { "epoch": 2.524581005586592, "grad_norm": 0.7791872024536133, "learning_rate": 0.0008763865546218488, "loss": 0.4619, "step": 4519 }, { "epoch": 2.5251396648044695, "grad_norm": 0.6607198119163513, "learning_rate": 0.000876358543417367, "loss": 0.4721, "step": 4520 }, { "epoch": 2.5256983240223465, "grad_norm": 0.6076475977897644, "learning_rate": 0.0008763305322128851, "loss": 0.4908, "step": 4521 }, { "epoch": 2.5262569832402235, "grad_norm": 1.4148967266082764, "learning_rate": 0.0008763025210084033, "loss": 0.4464, "step": 4522 }, { "epoch": 2.5268156424581005, "grad_norm": 0.6420329809188843, "learning_rate": 0.0008762745098039215, "loss": 0.4436, "step": 4523 }, { "epoch": 2.5273743016759775, "grad_norm": 2.79652738571167, "learning_rate": 0.0008762464985994399, "loss": 0.5347, "step": 4524 }, { "epoch": 2.527932960893855, "grad_norm": 0.6680772304534912, "learning_rate": 0.0008762184873949581, "loss": 0.5336, "step": 4525 }, { "epoch": 2.528491620111732, "grad_norm": 1.2352335453033447, "learning_rate": 0.0008761904761904762, "loss": 0.5413, "step": 4526 }, { "epoch": 2.529050279329609, "grad_norm": 0.7459228038787842, "learning_rate": 0.0008761624649859944, "loss": 0.4987, "step": 4527 }, { "epoch": 2.529608938547486, "grad_norm": 0.6470720767974854, "learning_rate": 0.0008761344537815126, "loss": 0.6132, "step": 4528 }, { "epoch": 2.530167597765363, "grad_norm": 0.8248283267021179, "learning_rate": 0.0008761064425770309, "loss": 0.5492, "step": 4529 }, { "epoch": 2.5307262569832405, "grad_norm": 0.6090158820152283, "learning_rate": 0.0008760784313725491, "loss": 0.4907, "step": 4530 }, { "epoch": 2.531284916201117, "grad_norm": 1.093633770942688, "learning_rate": 0.0008760504201680673, "loss": 0.4484, "step": 4531 }, { "epoch": 2.5318435754189945, "grad_norm": 2.0259437561035156, "learning_rate": 0.0008760224089635854, "loss": 0.496, "step": 4532 }, { "epoch": 2.5324022346368715, "grad_norm": 0.5741031169891357, "learning_rate": 0.0008759943977591036, "loss": 0.5206, "step": 4533 }, { "epoch": 2.5329608938547485, "grad_norm": 0.9330551624298096, "learning_rate": 0.0008759663865546219, "loss": 0.4618, "step": 4534 }, { "epoch": 2.5335195530726256, "grad_norm": 4.283111572265625, "learning_rate": 0.0008759383753501401, "loss": 0.4011, "step": 4535 }, { "epoch": 2.5340782122905026, "grad_norm": 0.6364736557006836, "learning_rate": 0.0008759103641456583, "loss": 0.4744, "step": 4536 }, { "epoch": 2.53463687150838, "grad_norm": 0.7679871916770935, "learning_rate": 0.0008758823529411764, "loss": 0.4555, "step": 4537 }, { "epoch": 2.535195530726257, "grad_norm": 0.7553772926330566, "learning_rate": 0.0008758543417366946, "loss": 0.382, "step": 4538 }, { "epoch": 2.535754189944134, "grad_norm": 0.6703191995620728, "learning_rate": 0.0008758263305322129, "loss": 0.3969, "step": 4539 }, { "epoch": 2.536312849162011, "grad_norm": 0.5475282669067383, "learning_rate": 0.0008757983193277311, "loss": 0.4321, "step": 4540 }, { "epoch": 2.536871508379888, "grad_norm": 0.9024701714515686, "learning_rate": 0.0008757703081232494, "loss": 0.4568, "step": 4541 }, { "epoch": 2.5374301675977655, "grad_norm": 0.8123811483383179, "learning_rate": 0.0008757422969187675, "loss": 0.4794, "step": 4542 }, { "epoch": 2.5379888268156425, "grad_norm": 0.5144625306129456, "learning_rate": 0.0008757142857142857, "loss": 0.5732, "step": 4543 }, { "epoch": 2.5385474860335195, "grad_norm": 0.8084815144538879, "learning_rate": 0.000875686274509804, "loss": 0.4647, "step": 4544 }, { "epoch": 2.5391061452513966, "grad_norm": 1.0256503820419312, "learning_rate": 0.0008756582633053222, "loss": 0.4485, "step": 4545 }, { "epoch": 2.5396648044692736, "grad_norm": 0.6930704116821289, "learning_rate": 0.0008756302521008404, "loss": 0.495, "step": 4546 }, { "epoch": 2.540223463687151, "grad_norm": 0.8160821795463562, "learning_rate": 0.0008756022408963586, "loss": 0.562, "step": 4547 }, { "epoch": 2.540782122905028, "grad_norm": 0.5813196897506714, "learning_rate": 0.0008755742296918767, "loss": 0.4651, "step": 4548 }, { "epoch": 2.541340782122905, "grad_norm": 0.617099404335022, "learning_rate": 0.000875546218487395, "loss": 0.4141, "step": 4549 }, { "epoch": 2.541899441340782, "grad_norm": 0.4484594166278839, "learning_rate": 0.0008755182072829132, "loss": 0.4557, "step": 4550 }, { "epoch": 2.542458100558659, "grad_norm": 6.695379734039307, "learning_rate": 0.0008754901960784314, "loss": 0.4751, "step": 4551 }, { "epoch": 2.5430167597765365, "grad_norm": 1.1015985012054443, "learning_rate": 0.0008754621848739496, "loss": 0.4442, "step": 4552 }, { "epoch": 2.5435754189944135, "grad_norm": 1.083116888999939, "learning_rate": 0.0008754341736694677, "loss": 0.5141, "step": 4553 }, { "epoch": 2.5441340782122905, "grad_norm": 0.5880929231643677, "learning_rate": 0.000875406162464986, "loss": 0.3553, "step": 4554 }, { "epoch": 2.5446927374301676, "grad_norm": 1.604151964187622, "learning_rate": 0.0008753781512605042, "loss": 0.5281, "step": 4555 }, { "epoch": 2.5452513966480446, "grad_norm": 0.6877735257148743, "learning_rate": 0.0008753501400560224, "loss": 0.4869, "step": 4556 }, { "epoch": 2.545810055865922, "grad_norm": 0.6027437448501587, "learning_rate": 0.0008753221288515406, "loss": 0.5367, "step": 4557 }, { "epoch": 2.546368715083799, "grad_norm": 0.7366870641708374, "learning_rate": 0.0008752941176470587, "loss": 0.548, "step": 4558 }, { "epoch": 2.546927374301676, "grad_norm": 1.889333724975586, "learning_rate": 0.0008752661064425771, "loss": 0.4813, "step": 4559 }, { "epoch": 2.547486033519553, "grad_norm": 0.6310818791389465, "learning_rate": 0.0008752380952380953, "loss": 0.4387, "step": 4560 }, { "epoch": 2.54804469273743, "grad_norm": 1.465470790863037, "learning_rate": 0.0008752100840336135, "loss": 0.5065, "step": 4561 }, { "epoch": 2.5486033519553075, "grad_norm": 0.4020266830921173, "learning_rate": 0.0008751820728291317, "loss": 0.4131, "step": 4562 }, { "epoch": 2.549162011173184, "grad_norm": 0.6013814210891724, "learning_rate": 0.0008751540616246499, "loss": 0.5172, "step": 4563 }, { "epoch": 2.5497206703910615, "grad_norm": 1.005944013595581, "learning_rate": 0.0008751260504201681, "loss": 0.6483, "step": 4564 }, { "epoch": 2.5502793296089385, "grad_norm": 0.617769718170166, "learning_rate": 0.0008750980392156863, "loss": 0.4005, "step": 4565 }, { "epoch": 2.5508379888268156, "grad_norm": 0.7957714796066284, "learning_rate": 0.0008750700280112045, "loss": 0.5973, "step": 4566 }, { "epoch": 2.5513966480446926, "grad_norm": 0.5730568766593933, "learning_rate": 0.0008750420168067227, "loss": 0.4122, "step": 4567 }, { "epoch": 2.5519553072625696, "grad_norm": 0.410114586353302, "learning_rate": 0.0008750140056022409, "loss": 0.3875, "step": 4568 }, { "epoch": 2.552513966480447, "grad_norm": 0.5299544930458069, "learning_rate": 0.0008749859943977591, "loss": 0.5132, "step": 4569 }, { "epoch": 2.553072625698324, "grad_norm": 0.42914700508117676, "learning_rate": 0.0008749579831932773, "loss": 0.4213, "step": 4570 }, { "epoch": 2.553631284916201, "grad_norm": 0.7517532110214233, "learning_rate": 0.0008749299719887955, "loss": 0.5517, "step": 4571 }, { "epoch": 2.554189944134078, "grad_norm": 0.4640340805053711, "learning_rate": 0.0008749019607843137, "loss": 0.5067, "step": 4572 }, { "epoch": 2.554748603351955, "grad_norm": 3.790728807449341, "learning_rate": 0.0008748739495798319, "loss": 0.5391, "step": 4573 }, { "epoch": 2.5553072625698325, "grad_norm": 0.4719926118850708, "learning_rate": 0.0008748459383753502, "loss": 0.3969, "step": 4574 }, { "epoch": 2.5558659217877095, "grad_norm": 0.5402041077613831, "learning_rate": 0.0008748179271708684, "loss": 0.4546, "step": 4575 }, { "epoch": 2.5564245810055866, "grad_norm": 0.8960727453231812, "learning_rate": 0.0008747899159663866, "loss": 0.6047, "step": 4576 }, { "epoch": 2.5569832402234636, "grad_norm": 0.6102758049964905, "learning_rate": 0.0008747619047619048, "loss": 0.6259, "step": 4577 }, { "epoch": 2.5575418994413406, "grad_norm": 0.5716445446014404, "learning_rate": 0.000874733893557423, "loss": 0.5124, "step": 4578 }, { "epoch": 2.558100558659218, "grad_norm": 0.5660879015922546, "learning_rate": 0.0008747058823529413, "loss": 0.4046, "step": 4579 }, { "epoch": 2.558659217877095, "grad_norm": 1.2680186033248901, "learning_rate": 0.0008746778711484594, "loss": 0.4978, "step": 4580 }, { "epoch": 2.559217877094972, "grad_norm": 0.884227454662323, "learning_rate": 0.0008746498599439776, "loss": 0.3966, "step": 4581 }, { "epoch": 2.559776536312849, "grad_norm": 0.6963787078857422, "learning_rate": 0.0008746218487394958, "loss": 0.6064, "step": 4582 }, { "epoch": 2.560335195530726, "grad_norm": 0.42113932967185974, "learning_rate": 0.000874593837535014, "loss": 0.4798, "step": 4583 }, { "epoch": 2.5608938547486035, "grad_norm": 4.617695331573486, "learning_rate": 0.0008745658263305323, "loss": 0.4827, "step": 4584 }, { "epoch": 2.5614525139664805, "grad_norm": 0.632781982421875, "learning_rate": 0.0008745378151260504, "loss": 0.452, "step": 4585 }, { "epoch": 2.5620111731843576, "grad_norm": 0.9550939798355103, "learning_rate": 0.0008745098039215686, "loss": 0.6119, "step": 4586 }, { "epoch": 2.5625698324022346, "grad_norm": 0.5491369366645813, "learning_rate": 0.0008744817927170868, "loss": 0.495, "step": 4587 }, { "epoch": 2.5631284916201116, "grad_norm": 0.6712592244148254, "learning_rate": 0.000874453781512605, "loss": 0.4184, "step": 4588 }, { "epoch": 2.563687150837989, "grad_norm": 0.7854418158531189, "learning_rate": 0.0008744257703081233, "loss": 0.47, "step": 4589 }, { "epoch": 2.564245810055866, "grad_norm": 0.9034537076950073, "learning_rate": 0.0008743977591036414, "loss": 0.6141, "step": 4590 }, { "epoch": 2.564804469273743, "grad_norm": 0.8327544927597046, "learning_rate": 0.0008743697478991597, "loss": 0.343, "step": 4591 }, { "epoch": 2.56536312849162, "grad_norm": 0.4472448229789734, "learning_rate": 0.0008743417366946779, "loss": 0.404, "step": 4592 }, { "epoch": 2.565921787709497, "grad_norm": 3.9351420402526855, "learning_rate": 0.0008743137254901961, "loss": 0.5229, "step": 4593 }, { "epoch": 2.5664804469273745, "grad_norm": 0.6701863408088684, "learning_rate": 0.0008742857142857144, "loss": 0.3762, "step": 4594 }, { "epoch": 2.5670391061452515, "grad_norm": 0.4767731726169586, "learning_rate": 0.0008742577030812326, "loss": 0.4533, "step": 4595 }, { "epoch": 2.5675977653631286, "grad_norm": 0.5488147139549255, "learning_rate": 0.0008742296918767507, "loss": 0.3994, "step": 4596 }, { "epoch": 2.5681564245810056, "grad_norm": 0.4771422743797302, "learning_rate": 0.0008742016806722689, "loss": 0.4987, "step": 4597 }, { "epoch": 2.5687150837988826, "grad_norm": 0.544771134853363, "learning_rate": 0.0008741736694677871, "loss": 0.4167, "step": 4598 }, { "epoch": 2.56927374301676, "grad_norm": 1.0047450065612793, "learning_rate": 0.0008741456582633054, "loss": 0.5022, "step": 4599 }, { "epoch": 2.5698324022346366, "grad_norm": 0.8253680467605591, "learning_rate": 0.0008741176470588236, "loss": 0.5205, "step": 4600 }, { "epoch": 2.570391061452514, "grad_norm": 0.9420467615127563, "learning_rate": 0.0008740896358543417, "loss": 0.524, "step": 4601 }, { "epoch": 2.570949720670391, "grad_norm": 1.6282472610473633, "learning_rate": 0.0008740616246498599, "loss": 0.4728, "step": 4602 }, { "epoch": 2.571508379888268, "grad_norm": 0.7011590003967285, "learning_rate": 0.0008740336134453781, "loss": 0.6335, "step": 4603 }, { "epoch": 2.572067039106145, "grad_norm": 0.6995001435279846, "learning_rate": 0.0008740056022408964, "loss": 0.5563, "step": 4604 }, { "epoch": 2.572625698324022, "grad_norm": 0.6163257360458374, "learning_rate": 0.0008739775910364146, "loss": 0.4066, "step": 4605 }, { "epoch": 2.5731843575418996, "grad_norm": 0.3490305542945862, "learning_rate": 0.0008739495798319327, "loss": 0.3272, "step": 4606 }, { "epoch": 2.5737430167597766, "grad_norm": 0.5445644855499268, "learning_rate": 0.000873921568627451, "loss": 0.383, "step": 4607 }, { "epoch": 2.5743016759776536, "grad_norm": 0.47447124123573303, "learning_rate": 0.0008738935574229692, "loss": 0.4254, "step": 4608 }, { "epoch": 2.5748603351955306, "grad_norm": 1.102980375289917, "learning_rate": 0.0008738655462184875, "loss": 0.5693, "step": 4609 }, { "epoch": 2.5754189944134076, "grad_norm": 0.5574500560760498, "learning_rate": 0.0008738375350140057, "loss": 0.4323, "step": 4610 }, { "epoch": 2.575977653631285, "grad_norm": 0.7321161031723022, "learning_rate": 0.0008738095238095239, "loss": 0.5591, "step": 4611 }, { "epoch": 2.576536312849162, "grad_norm": 0.6664870381355286, "learning_rate": 0.000873781512605042, "loss": 0.5209, "step": 4612 }, { "epoch": 2.577094972067039, "grad_norm": 1.344436764717102, "learning_rate": 0.0008737535014005602, "loss": 0.4379, "step": 4613 }, { "epoch": 2.577653631284916, "grad_norm": 0.8389381170272827, "learning_rate": 0.0008737254901960785, "loss": 0.4701, "step": 4614 }, { "epoch": 2.578212290502793, "grad_norm": 0.7012710571289062, "learning_rate": 0.0008736974789915967, "loss": 0.4322, "step": 4615 }, { "epoch": 2.5787709497206706, "grad_norm": 0.5328612327575684, "learning_rate": 0.0008736694677871149, "loss": 0.4715, "step": 4616 }, { "epoch": 2.5793296089385476, "grad_norm": 0.5420647859573364, "learning_rate": 0.000873641456582633, "loss": 0.4558, "step": 4617 }, { "epoch": 2.5798882681564246, "grad_norm": 0.5130098462104797, "learning_rate": 0.0008736134453781512, "loss": 0.5482, "step": 4618 }, { "epoch": 2.5804469273743016, "grad_norm": 0.5072817206382751, "learning_rate": 0.0008735854341736695, "loss": 0.3707, "step": 4619 }, { "epoch": 2.5810055865921786, "grad_norm": 1.2881916761398315, "learning_rate": 0.0008735574229691877, "loss": 0.4751, "step": 4620 }, { "epoch": 2.581564245810056, "grad_norm": 0.6830811500549316, "learning_rate": 0.0008735294117647059, "loss": 0.5533, "step": 4621 }, { "epoch": 2.582122905027933, "grad_norm": 0.6909314393997192, "learning_rate": 0.000873501400560224, "loss": 0.4638, "step": 4622 }, { "epoch": 2.58268156424581, "grad_norm": 0.6132743954658508, "learning_rate": 0.0008734733893557422, "loss": 0.5715, "step": 4623 }, { "epoch": 2.583240223463687, "grad_norm": 0.9117586612701416, "learning_rate": 0.0008734453781512606, "loss": 0.535, "step": 4624 }, { "epoch": 2.583798882681564, "grad_norm": 0.6704279184341431, "learning_rate": 0.0008734173669467788, "loss": 0.4819, "step": 4625 }, { "epoch": 2.5843575418994416, "grad_norm": 0.7600635886192322, "learning_rate": 0.000873389355742297, "loss": 0.5432, "step": 4626 }, { "epoch": 2.5849162011173186, "grad_norm": 0.6423128843307495, "learning_rate": 0.0008733613445378152, "loss": 0.5347, "step": 4627 }, { "epoch": 2.5854748603351956, "grad_norm": 0.9727770686149597, "learning_rate": 0.0008733333333333333, "loss": 0.5764, "step": 4628 }, { "epoch": 2.5860335195530726, "grad_norm": 0.5933406352996826, "learning_rate": 0.0008733053221288516, "loss": 0.4209, "step": 4629 }, { "epoch": 2.5865921787709496, "grad_norm": 0.510667085647583, "learning_rate": 0.0008732773109243698, "loss": 0.5379, "step": 4630 }, { "epoch": 2.587150837988827, "grad_norm": 0.6274928450584412, "learning_rate": 0.000873249299719888, "loss": 0.5807, "step": 4631 }, { "epoch": 2.587709497206704, "grad_norm": 0.5118690133094788, "learning_rate": 0.0008732212885154062, "loss": 0.5204, "step": 4632 }, { "epoch": 2.588268156424581, "grad_norm": 0.5862618088722229, "learning_rate": 0.0008731932773109243, "loss": 0.4826, "step": 4633 }, { "epoch": 2.588826815642458, "grad_norm": 0.6152790188789368, "learning_rate": 0.0008731652661064426, "loss": 0.4975, "step": 4634 }, { "epoch": 2.589385474860335, "grad_norm": 0.5919183492660522, "learning_rate": 0.0008731372549019608, "loss": 0.5434, "step": 4635 }, { "epoch": 2.5899441340782126, "grad_norm": 0.5699243545532227, "learning_rate": 0.000873109243697479, "loss": 0.4746, "step": 4636 }, { "epoch": 2.590502793296089, "grad_norm": 0.6677420139312744, "learning_rate": 0.0008730812324929972, "loss": 0.5273, "step": 4637 }, { "epoch": 2.5910614525139666, "grad_norm": 0.6572413444519043, "learning_rate": 0.0008730532212885153, "loss": 0.5044, "step": 4638 }, { "epoch": 2.5916201117318436, "grad_norm": 0.6275217533111572, "learning_rate": 0.0008730252100840336, "loss": 0.4657, "step": 4639 }, { "epoch": 2.5921787709497206, "grad_norm": 0.9364564418792725, "learning_rate": 0.0008729971988795519, "loss": 0.4398, "step": 4640 }, { "epoch": 2.5927374301675976, "grad_norm": 0.7408837080001831, "learning_rate": 0.0008729691876750701, "loss": 0.418, "step": 4641 }, { "epoch": 2.5932960893854746, "grad_norm": 0.6557133793830872, "learning_rate": 0.0008729411764705883, "loss": 0.4434, "step": 4642 }, { "epoch": 2.593854748603352, "grad_norm": 0.5375706553459167, "learning_rate": 0.0008729131652661065, "loss": 0.5618, "step": 4643 }, { "epoch": 2.594413407821229, "grad_norm": 0.4827219247817993, "learning_rate": 0.0008728851540616247, "loss": 0.5042, "step": 4644 }, { "epoch": 2.594972067039106, "grad_norm": 0.5630202293395996, "learning_rate": 0.0008728571428571429, "loss": 0.4861, "step": 4645 }, { "epoch": 2.595530726256983, "grad_norm": 0.8943331837654114, "learning_rate": 0.0008728291316526611, "loss": 0.514, "step": 4646 }, { "epoch": 2.59608938547486, "grad_norm": 1.135999083518982, "learning_rate": 0.0008728011204481793, "loss": 0.5324, "step": 4647 }, { "epoch": 2.5966480446927376, "grad_norm": 0.597041666507721, "learning_rate": 0.0008727731092436975, "loss": 0.4565, "step": 4648 }, { "epoch": 2.5972067039106146, "grad_norm": 0.3813895881175995, "learning_rate": 0.0008727450980392157, "loss": 0.3213, "step": 4649 }, { "epoch": 2.5977653631284916, "grad_norm": 1.0273348093032837, "learning_rate": 0.0008727170868347339, "loss": 0.5125, "step": 4650 }, { "epoch": 2.5983240223463686, "grad_norm": 0.42914119362831116, "learning_rate": 0.0008726890756302521, "loss": 0.4083, "step": 4651 }, { "epoch": 2.5988826815642456, "grad_norm": 1.0949652194976807, "learning_rate": 0.0008726610644257703, "loss": 0.5836, "step": 4652 }, { "epoch": 2.599441340782123, "grad_norm": 2.259638547897339, "learning_rate": 0.0008726330532212885, "loss": 0.5527, "step": 4653 }, { "epoch": 2.6, "grad_norm": 2.4475438594818115, "learning_rate": 0.0008726050420168068, "loss": 0.5693, "step": 4654 }, { "epoch": 2.600558659217877, "grad_norm": 0.6083667874336243, "learning_rate": 0.0008725770308123249, "loss": 0.4522, "step": 4655 }, { "epoch": 2.601117318435754, "grad_norm": 6.451206684112549, "learning_rate": 0.0008725490196078432, "loss": 0.5011, "step": 4656 }, { "epoch": 2.601675977653631, "grad_norm": 4.511854648590088, "learning_rate": 0.0008725210084033614, "loss": 0.4763, "step": 4657 }, { "epoch": 2.6022346368715086, "grad_norm": 3.012691020965576, "learning_rate": 0.0008724929971988796, "loss": 0.61, "step": 4658 }, { "epoch": 2.6027932960893856, "grad_norm": 0.4272519648075104, "learning_rate": 0.0008724649859943979, "loss": 0.4468, "step": 4659 }, { "epoch": 2.6033519553072626, "grad_norm": 0.5548369884490967, "learning_rate": 0.000872436974789916, "loss": 0.4977, "step": 4660 }, { "epoch": 2.6039106145251396, "grad_norm": 0.4272269606590271, "learning_rate": 0.0008724089635854342, "loss": 0.3786, "step": 4661 }, { "epoch": 2.6044692737430166, "grad_norm": 0.6776177287101746, "learning_rate": 0.0008723809523809524, "loss": 0.6815, "step": 4662 }, { "epoch": 2.605027932960894, "grad_norm": 1.3994733095169067, "learning_rate": 0.0008723529411764706, "loss": 0.4271, "step": 4663 }, { "epoch": 2.605586592178771, "grad_norm": 0.7243050932884216, "learning_rate": 0.0008723249299719889, "loss": 0.4679, "step": 4664 }, { "epoch": 2.606145251396648, "grad_norm": 0.6003062129020691, "learning_rate": 0.000872296918767507, "loss": 0.4951, "step": 4665 }, { "epoch": 2.606703910614525, "grad_norm": 1.933485746383667, "learning_rate": 0.0008722689075630252, "loss": 0.4334, "step": 4666 }, { "epoch": 2.607262569832402, "grad_norm": 0.694419801235199, "learning_rate": 0.0008722408963585434, "loss": 0.6094, "step": 4667 }, { "epoch": 2.6078212290502796, "grad_norm": 1.0899416208267212, "learning_rate": 0.0008722128851540616, "loss": 0.6327, "step": 4668 }, { "epoch": 2.6083798882681566, "grad_norm": 0.6413179039955139, "learning_rate": 0.0008721848739495799, "loss": 0.4992, "step": 4669 }, { "epoch": 2.6089385474860336, "grad_norm": 0.5404636263847351, "learning_rate": 0.0008721568627450981, "loss": 0.5117, "step": 4670 }, { "epoch": 2.6094972067039106, "grad_norm": 0.6101294159889221, "learning_rate": 0.0008721288515406162, "loss": 0.4767, "step": 4671 }, { "epoch": 2.6100558659217876, "grad_norm": 1.3087968826293945, "learning_rate": 0.0008721008403361344, "loss": 0.5354, "step": 4672 }, { "epoch": 2.610614525139665, "grad_norm": 0.7199153304100037, "learning_rate": 0.0008720728291316527, "loss": 0.4506, "step": 4673 }, { "epoch": 2.6111731843575416, "grad_norm": 0.6021122336387634, "learning_rate": 0.000872044817927171, "loss": 0.4336, "step": 4674 }, { "epoch": 2.611731843575419, "grad_norm": 0.6390370726585388, "learning_rate": 0.0008720168067226892, "loss": 0.4852, "step": 4675 }, { "epoch": 2.612290502793296, "grad_norm": 0.8552182912826538, "learning_rate": 0.0008719887955182073, "loss": 0.5133, "step": 4676 }, { "epoch": 2.612849162011173, "grad_norm": 0.5973447561264038, "learning_rate": 0.0008719607843137255, "loss": 0.5347, "step": 4677 }, { "epoch": 2.61340782122905, "grad_norm": 0.6187921166419983, "learning_rate": 0.0008719327731092437, "loss": 0.4583, "step": 4678 }, { "epoch": 2.613966480446927, "grad_norm": 0.8108627796173096, "learning_rate": 0.000871904761904762, "loss": 0.5222, "step": 4679 }, { "epoch": 2.6145251396648046, "grad_norm": 0.6222319602966309, "learning_rate": 0.0008718767507002802, "loss": 0.5005, "step": 4680 }, { "epoch": 2.6150837988826816, "grad_norm": 2.097660779953003, "learning_rate": 0.0008718487394957983, "loss": 0.4469, "step": 4681 }, { "epoch": 2.6156424581005586, "grad_norm": 0.6373857259750366, "learning_rate": 0.0008718207282913165, "loss": 0.4338, "step": 4682 }, { "epoch": 2.6162011173184356, "grad_norm": 0.5153201818466187, "learning_rate": 0.0008717927170868347, "loss": 0.484, "step": 4683 }, { "epoch": 2.6167597765363126, "grad_norm": 7.995108127593994, "learning_rate": 0.0008717647058823529, "loss": 0.4267, "step": 4684 }, { "epoch": 2.61731843575419, "grad_norm": 0.46427232027053833, "learning_rate": 0.0008717366946778712, "loss": 0.4209, "step": 4685 }, { "epoch": 2.617877094972067, "grad_norm": 0.644861102104187, "learning_rate": 0.0008717086834733894, "loss": 0.5519, "step": 4686 }, { "epoch": 2.618435754189944, "grad_norm": 0.46076497435569763, "learning_rate": 0.0008716806722689075, "loss": 0.4472, "step": 4687 }, { "epoch": 2.618994413407821, "grad_norm": 0.5737087726593018, "learning_rate": 0.0008716526610644257, "loss": 0.5042, "step": 4688 }, { "epoch": 2.619553072625698, "grad_norm": 1.2339030504226685, "learning_rate": 0.000871624649859944, "loss": 0.6456, "step": 4689 }, { "epoch": 2.6201117318435756, "grad_norm": 0.5793154239654541, "learning_rate": 0.0008715966386554623, "loss": 0.5078, "step": 4690 }, { "epoch": 2.6206703910614526, "grad_norm": 0.4105638265609741, "learning_rate": 0.0008715686274509805, "loss": 0.4385, "step": 4691 }, { "epoch": 2.6212290502793296, "grad_norm": 0.5621259808540344, "learning_rate": 0.0008715406162464986, "loss": 0.5668, "step": 4692 }, { "epoch": 2.6217877094972066, "grad_norm": 0.5614182353019714, "learning_rate": 0.0008715126050420168, "loss": 0.4502, "step": 4693 }, { "epoch": 2.6223463687150836, "grad_norm": 0.38099655508995056, "learning_rate": 0.000871484593837535, "loss": 0.4513, "step": 4694 }, { "epoch": 2.622905027932961, "grad_norm": 1.6032222509384155, "learning_rate": 0.0008714565826330533, "loss": 0.4963, "step": 4695 }, { "epoch": 2.623463687150838, "grad_norm": 0.42527201771736145, "learning_rate": 0.0008714285714285715, "loss": 0.4399, "step": 4696 }, { "epoch": 2.624022346368715, "grad_norm": 0.7446414232254028, "learning_rate": 0.0008714005602240896, "loss": 0.8152, "step": 4697 }, { "epoch": 2.624581005586592, "grad_norm": 0.764163613319397, "learning_rate": 0.0008713725490196078, "loss": 0.4505, "step": 4698 }, { "epoch": 2.625139664804469, "grad_norm": 0.4340057075023651, "learning_rate": 0.000871344537815126, "loss": 0.488, "step": 4699 }, { "epoch": 2.6256983240223466, "grad_norm": 1.5946266651153564, "learning_rate": 0.0008713165266106443, "loss": 0.5975, "step": 4700 }, { "epoch": 2.6262569832402236, "grad_norm": 0.6357771754264832, "learning_rate": 0.0008712885154061625, "loss": 0.5358, "step": 4701 }, { "epoch": 2.6268156424581006, "grad_norm": 0.6142751574516296, "learning_rate": 0.0008712605042016807, "loss": 0.5015, "step": 4702 }, { "epoch": 2.6273743016759776, "grad_norm": 0.6175709366798401, "learning_rate": 0.0008712324929971988, "loss": 0.5146, "step": 4703 }, { "epoch": 2.6279329608938546, "grad_norm": 0.6648790836334229, "learning_rate": 0.000871204481792717, "loss": 0.5756, "step": 4704 }, { "epoch": 2.628491620111732, "grad_norm": 0.46663302183151245, "learning_rate": 0.0008711764705882354, "loss": 0.5286, "step": 4705 }, { "epoch": 2.6290502793296087, "grad_norm": 0.7095016837120056, "learning_rate": 0.0008711484593837536, "loss": 0.4608, "step": 4706 }, { "epoch": 2.629608938547486, "grad_norm": 0.7443677186965942, "learning_rate": 0.0008711204481792718, "loss": 0.4947, "step": 4707 }, { "epoch": 2.630167597765363, "grad_norm": 0.8868491053581238, "learning_rate": 0.0008710924369747899, "loss": 0.3831, "step": 4708 }, { "epoch": 2.63072625698324, "grad_norm": 0.4886857271194458, "learning_rate": 0.0008710644257703081, "loss": 0.6954, "step": 4709 }, { "epoch": 2.631284916201117, "grad_norm": 0.5084818601608276, "learning_rate": 0.0008710364145658264, "loss": 0.583, "step": 4710 }, { "epoch": 2.631843575418994, "grad_norm": 0.499833881855011, "learning_rate": 0.0008710084033613446, "loss": 0.4395, "step": 4711 }, { "epoch": 2.6324022346368716, "grad_norm": 0.697119414806366, "learning_rate": 0.0008709803921568628, "loss": 0.4771, "step": 4712 }, { "epoch": 2.6329608938547486, "grad_norm": 0.8509107232093811, "learning_rate": 0.0008709523809523809, "loss": 0.546, "step": 4713 }, { "epoch": 2.6335195530726256, "grad_norm": 0.7323441505432129, "learning_rate": 0.0008709243697478991, "loss": 0.4639, "step": 4714 }, { "epoch": 2.6340782122905027, "grad_norm": 0.41734227538108826, "learning_rate": 0.0008708963585434174, "loss": 0.4869, "step": 4715 }, { "epoch": 2.6346368715083797, "grad_norm": 0.5006591081619263, "learning_rate": 0.0008708683473389356, "loss": 0.461, "step": 4716 }, { "epoch": 2.635195530726257, "grad_norm": 1.191251277923584, "learning_rate": 0.0008708403361344538, "loss": 0.5559, "step": 4717 }, { "epoch": 2.635754189944134, "grad_norm": 0.7904143333435059, "learning_rate": 0.000870812324929972, "loss": 0.5127, "step": 4718 }, { "epoch": 2.636312849162011, "grad_norm": 0.9613448977470398, "learning_rate": 0.0008707843137254901, "loss": 0.4754, "step": 4719 }, { "epoch": 2.636871508379888, "grad_norm": 0.43707001209259033, "learning_rate": 0.0008707563025210084, "loss": 0.4735, "step": 4720 }, { "epoch": 2.637430167597765, "grad_norm": 0.7447942495346069, "learning_rate": 0.0008707282913165266, "loss": 0.4612, "step": 4721 }, { "epoch": 2.6379888268156426, "grad_norm": 0.7653697729110718, "learning_rate": 0.0008707002801120449, "loss": 0.4265, "step": 4722 }, { "epoch": 2.6385474860335196, "grad_norm": 0.9355491995811462, "learning_rate": 0.0008706722689075631, "loss": 0.6343, "step": 4723 }, { "epoch": 2.6391061452513966, "grad_norm": 0.6037322878837585, "learning_rate": 0.0008706442577030812, "loss": 0.3857, "step": 4724 }, { "epoch": 2.6396648044692737, "grad_norm": 0.5230147242546082, "learning_rate": 0.0008706162464985995, "loss": 0.526, "step": 4725 }, { "epoch": 2.6402234636871507, "grad_norm": 0.9687732458114624, "learning_rate": 0.0008705882352941177, "loss": 0.5247, "step": 4726 }, { "epoch": 2.640782122905028, "grad_norm": 0.6201425194740295, "learning_rate": 0.0008705602240896359, "loss": 0.513, "step": 4727 }, { "epoch": 2.641340782122905, "grad_norm": 0.6437943577766418, "learning_rate": 0.0008705322128851541, "loss": 0.4176, "step": 4728 }, { "epoch": 2.641899441340782, "grad_norm": 0.44939082860946655, "learning_rate": 0.0008705042016806722, "loss": 0.4928, "step": 4729 }, { "epoch": 2.642458100558659, "grad_norm": 0.38824886083602905, "learning_rate": 0.0008704761904761905, "loss": 0.402, "step": 4730 }, { "epoch": 2.643016759776536, "grad_norm": 0.5163977742195129, "learning_rate": 0.0008704481792717087, "loss": 0.3362, "step": 4731 }, { "epoch": 2.6435754189944136, "grad_norm": 0.7596457600593567, "learning_rate": 0.0008704201680672269, "loss": 0.4694, "step": 4732 }, { "epoch": 2.6441340782122906, "grad_norm": 0.39082902669906616, "learning_rate": 0.0008703921568627451, "loss": 0.428, "step": 4733 }, { "epoch": 2.6446927374301676, "grad_norm": 0.7579891681671143, "learning_rate": 0.0008703641456582633, "loss": 0.4739, "step": 4734 }, { "epoch": 2.6452513966480447, "grad_norm": 0.5568997859954834, "learning_rate": 0.0008703361344537815, "loss": 0.496, "step": 4735 }, { "epoch": 2.6458100558659217, "grad_norm": 0.5152088403701782, "learning_rate": 0.0008703081232492997, "loss": 0.4694, "step": 4736 }, { "epoch": 2.646368715083799, "grad_norm": 0.4541816711425781, "learning_rate": 0.0008702801120448179, "loss": 0.5664, "step": 4737 }, { "epoch": 2.646927374301676, "grad_norm": 0.7481280565261841, "learning_rate": 0.0008702521008403362, "loss": 0.4173, "step": 4738 }, { "epoch": 2.647486033519553, "grad_norm": 0.681196391582489, "learning_rate": 0.0008702240896358544, "loss": 0.441, "step": 4739 }, { "epoch": 2.64804469273743, "grad_norm": 0.5350286960601807, "learning_rate": 0.0008701960784313726, "loss": 0.5377, "step": 4740 }, { "epoch": 2.648603351955307, "grad_norm": 0.5217966437339783, "learning_rate": 0.0008701680672268908, "loss": 0.4624, "step": 4741 }, { "epoch": 2.6491620111731846, "grad_norm": 1.993146300315857, "learning_rate": 0.000870140056022409, "loss": 0.4675, "step": 4742 }, { "epoch": 2.649720670391061, "grad_norm": 0.554341197013855, "learning_rate": 0.0008701120448179272, "loss": 0.393, "step": 4743 }, { "epoch": 2.6502793296089386, "grad_norm": 0.6307961344718933, "learning_rate": 0.0008700840336134454, "loss": 0.5158, "step": 4744 }, { "epoch": 2.6508379888268156, "grad_norm": 0.4391495883464813, "learning_rate": 0.0008700560224089636, "loss": 0.3746, "step": 4745 }, { "epoch": 2.6513966480446927, "grad_norm": 3.2256343364715576, "learning_rate": 0.0008700280112044818, "loss": 0.4607, "step": 4746 }, { "epoch": 2.6519553072625697, "grad_norm": 0.5182428956031799, "learning_rate": 0.00087, "loss": 0.3851, "step": 4747 }, { "epoch": 2.6525139664804467, "grad_norm": 4.928438186645508, "learning_rate": 0.0008699719887955182, "loss": 0.6138, "step": 4748 }, { "epoch": 2.653072625698324, "grad_norm": 0.9736620187759399, "learning_rate": 0.0008699439775910364, "loss": 0.4929, "step": 4749 }, { "epoch": 2.653631284916201, "grad_norm": 0.4683849811553955, "learning_rate": 0.0008699159663865547, "loss": 0.432, "step": 4750 }, { "epoch": 2.654189944134078, "grad_norm": 0.45677450299263, "learning_rate": 0.0008698879551820728, "loss": 0.398, "step": 4751 }, { "epoch": 2.654748603351955, "grad_norm": 0.5331783294677734, "learning_rate": 0.000869859943977591, "loss": 0.4654, "step": 4752 }, { "epoch": 2.655307262569832, "grad_norm": 5.260037899017334, "learning_rate": 0.0008698319327731092, "loss": 0.5979, "step": 4753 }, { "epoch": 2.6558659217877096, "grad_norm": 0.48784613609313965, "learning_rate": 0.0008698039215686274, "loss": 0.4364, "step": 4754 }, { "epoch": 2.6564245810055866, "grad_norm": 0.47899293899536133, "learning_rate": 0.0008697759103641458, "loss": 0.3693, "step": 4755 }, { "epoch": 2.6569832402234637, "grad_norm": 1.0056439638137817, "learning_rate": 0.0008697478991596639, "loss": 0.4495, "step": 4756 }, { "epoch": 2.6575418994413407, "grad_norm": 0.5287572145462036, "learning_rate": 0.0008697198879551821, "loss": 0.4169, "step": 4757 }, { "epoch": 2.6581005586592177, "grad_norm": 0.7584150433540344, "learning_rate": 0.0008696918767507003, "loss": 0.4563, "step": 4758 }, { "epoch": 2.658659217877095, "grad_norm": 0.4861416816711426, "learning_rate": 0.0008696638655462185, "loss": 0.3965, "step": 4759 }, { "epoch": 2.659217877094972, "grad_norm": 0.5171219706535339, "learning_rate": 0.0008696358543417368, "loss": 0.3695, "step": 4760 }, { "epoch": 2.659776536312849, "grad_norm": 0.4791187047958374, "learning_rate": 0.0008696078431372549, "loss": 0.401, "step": 4761 }, { "epoch": 2.660335195530726, "grad_norm": 0.41869813203811646, "learning_rate": 0.0008695798319327731, "loss": 0.3108, "step": 4762 }, { "epoch": 2.660893854748603, "grad_norm": 2.8987014293670654, "learning_rate": 0.0008695518207282913, "loss": 0.5095, "step": 4763 }, { "epoch": 2.6614525139664806, "grad_norm": 0.5957987308502197, "learning_rate": 0.0008695238095238095, "loss": 0.501, "step": 4764 }, { "epoch": 2.6620111731843576, "grad_norm": 4.71506929397583, "learning_rate": 0.0008694957983193278, "loss": 0.4998, "step": 4765 }, { "epoch": 2.6625698324022347, "grad_norm": 0.5180720090866089, "learning_rate": 0.000869467787114846, "loss": 0.3085, "step": 4766 }, { "epoch": 2.6631284916201117, "grad_norm": 0.47399231791496277, "learning_rate": 0.0008694397759103641, "loss": 0.4256, "step": 4767 }, { "epoch": 2.6636871508379887, "grad_norm": 0.5193911194801331, "learning_rate": 0.0008694117647058823, "loss": 0.4369, "step": 4768 }, { "epoch": 2.664245810055866, "grad_norm": 0.43193545937538147, "learning_rate": 0.0008693837535014005, "loss": 0.4277, "step": 4769 }, { "epoch": 2.664804469273743, "grad_norm": 0.6616137623786926, "learning_rate": 0.0008693557422969189, "loss": 0.3911, "step": 4770 }, { "epoch": 2.66536312849162, "grad_norm": 0.5150429010391235, "learning_rate": 0.0008693277310924371, "loss": 0.5552, "step": 4771 }, { "epoch": 2.665921787709497, "grad_norm": 0.4323931932449341, "learning_rate": 0.0008692997198879552, "loss": 0.3487, "step": 4772 }, { "epoch": 2.666480446927374, "grad_norm": 0.6784036159515381, "learning_rate": 0.0008692717086834734, "loss": 0.5034, "step": 4773 }, { "epoch": 2.6670391061452516, "grad_norm": 0.5154772996902466, "learning_rate": 0.0008692436974789916, "loss": 0.4521, "step": 4774 }, { "epoch": 2.6675977653631286, "grad_norm": 0.5224826335906982, "learning_rate": 0.0008692156862745099, "loss": 0.4242, "step": 4775 }, { "epoch": 2.6681564245810057, "grad_norm": 0.631027102470398, "learning_rate": 0.0008691876750700281, "loss": 0.4868, "step": 4776 }, { "epoch": 2.6687150837988827, "grad_norm": 0.5402419567108154, "learning_rate": 0.0008691596638655462, "loss": 0.4344, "step": 4777 }, { "epoch": 2.6692737430167597, "grad_norm": 0.5768053531646729, "learning_rate": 0.0008691316526610644, "loss": 0.401, "step": 4778 }, { "epoch": 2.669832402234637, "grad_norm": 0.7169563174247742, "learning_rate": 0.0008691036414565826, "loss": 0.4646, "step": 4779 }, { "epoch": 2.6703910614525137, "grad_norm": 2.339073657989502, "learning_rate": 0.0008690756302521009, "loss": 0.4197, "step": 4780 }, { "epoch": 2.670949720670391, "grad_norm": 0.488383948802948, "learning_rate": 0.0008690476190476191, "loss": 0.5487, "step": 4781 }, { "epoch": 2.671508379888268, "grad_norm": 0.7402324676513672, "learning_rate": 0.0008690196078431373, "loss": 0.5294, "step": 4782 }, { "epoch": 2.672067039106145, "grad_norm": 0.5183477401733398, "learning_rate": 0.0008689915966386554, "loss": 0.4699, "step": 4783 }, { "epoch": 2.672625698324022, "grad_norm": 0.5226608514785767, "learning_rate": 0.0008689635854341736, "loss": 0.4893, "step": 4784 }, { "epoch": 2.673184357541899, "grad_norm": 0.5028848052024841, "learning_rate": 0.0008689355742296919, "loss": 0.4526, "step": 4785 }, { "epoch": 2.6737430167597767, "grad_norm": 0.46172961592674255, "learning_rate": 0.0008689075630252101, "loss": 0.503, "step": 4786 }, { "epoch": 2.6743016759776537, "grad_norm": 0.4455204904079437, "learning_rate": 0.0008688795518207284, "loss": 0.4155, "step": 4787 }, { "epoch": 2.6748603351955307, "grad_norm": 1.6291117668151855, "learning_rate": 0.0008688515406162465, "loss": 0.4509, "step": 4788 }, { "epoch": 2.6754189944134077, "grad_norm": 1.36480712890625, "learning_rate": 0.0008688235294117647, "loss": 0.4896, "step": 4789 }, { "epoch": 2.6759776536312847, "grad_norm": 1.3482952117919922, "learning_rate": 0.000868795518207283, "loss": 0.4216, "step": 4790 }, { "epoch": 2.676536312849162, "grad_norm": 0.5353065729141235, "learning_rate": 0.0008687675070028012, "loss": 0.5104, "step": 4791 }, { "epoch": 2.677094972067039, "grad_norm": 0.4740195572376251, "learning_rate": 0.0008687394957983194, "loss": 0.5258, "step": 4792 }, { "epoch": 2.677653631284916, "grad_norm": 0.4119703471660614, "learning_rate": 0.0008687114845938375, "loss": 0.4302, "step": 4793 }, { "epoch": 2.678212290502793, "grad_norm": 0.7389618754386902, "learning_rate": 0.0008686834733893557, "loss": 0.5686, "step": 4794 }, { "epoch": 2.67877094972067, "grad_norm": 0.49716615676879883, "learning_rate": 0.000868655462184874, "loss": 0.5795, "step": 4795 }, { "epoch": 2.6793296089385477, "grad_norm": 0.6634438037872314, "learning_rate": 0.0008686274509803922, "loss": 0.4977, "step": 4796 }, { "epoch": 2.6798882681564247, "grad_norm": 0.6217042803764343, "learning_rate": 0.0008685994397759104, "loss": 0.4127, "step": 4797 }, { "epoch": 2.6804469273743017, "grad_norm": 0.45596328377723694, "learning_rate": 0.0008685714285714286, "loss": 0.3753, "step": 4798 }, { "epoch": 2.6810055865921787, "grad_norm": 0.5892964601516724, "learning_rate": 0.0008685434173669467, "loss": 0.5285, "step": 4799 }, { "epoch": 2.6815642458100557, "grad_norm": 0.5044070482254028, "learning_rate": 0.000868515406162465, "loss": 0.5043, "step": 4800 }, { "epoch": 2.682122905027933, "grad_norm": 1.1690303087234497, "learning_rate": 0.0008684873949579832, "loss": 0.5025, "step": 4801 }, { "epoch": 2.68268156424581, "grad_norm": 0.5131223201751709, "learning_rate": 0.0008684593837535014, "loss": 0.4858, "step": 4802 }, { "epoch": 2.683240223463687, "grad_norm": 0.4487779140472412, "learning_rate": 0.0008684313725490196, "loss": 0.4132, "step": 4803 }, { "epoch": 2.683798882681564, "grad_norm": 0.5181785821914673, "learning_rate": 0.0008684033613445377, "loss": 0.5784, "step": 4804 }, { "epoch": 2.684357541899441, "grad_norm": 0.6847459077835083, "learning_rate": 0.0008683753501400561, "loss": 0.5493, "step": 4805 }, { "epoch": 2.6849162011173187, "grad_norm": 0.7921627163887024, "learning_rate": 0.0008683473389355743, "loss": 0.4167, "step": 4806 }, { "epoch": 2.6854748603351957, "grad_norm": 0.8112843632698059, "learning_rate": 0.0008683193277310925, "loss": 0.5319, "step": 4807 }, { "epoch": 2.6860335195530727, "grad_norm": 0.7470324635505676, "learning_rate": 0.0008682913165266107, "loss": 0.4181, "step": 4808 }, { "epoch": 2.6865921787709497, "grad_norm": 0.6501536965370178, "learning_rate": 0.0008682633053221288, "loss": 0.5372, "step": 4809 }, { "epoch": 2.6871508379888267, "grad_norm": 0.46358639001846313, "learning_rate": 0.0008682352941176471, "loss": 0.527, "step": 4810 }, { "epoch": 2.687709497206704, "grad_norm": 0.37311455607414246, "learning_rate": 0.0008682072829131653, "loss": 0.4203, "step": 4811 }, { "epoch": 2.688268156424581, "grad_norm": 0.6030970811843872, "learning_rate": 0.0008681792717086835, "loss": 0.4161, "step": 4812 }, { "epoch": 2.688826815642458, "grad_norm": 0.6002731323242188, "learning_rate": 0.0008681512605042017, "loss": 0.5618, "step": 4813 }, { "epoch": 2.689385474860335, "grad_norm": 0.8159639835357666, "learning_rate": 0.0008681232492997199, "loss": 0.5235, "step": 4814 }, { "epoch": 2.689944134078212, "grad_norm": 2.1633708477020264, "learning_rate": 0.0008680952380952381, "loss": 0.5416, "step": 4815 }, { "epoch": 2.6905027932960897, "grad_norm": 0.7509324550628662, "learning_rate": 0.0008680672268907563, "loss": 0.5318, "step": 4816 }, { "epoch": 2.6910614525139662, "grad_norm": 0.553111732006073, "learning_rate": 0.0008680392156862745, "loss": 0.4199, "step": 4817 }, { "epoch": 2.6916201117318437, "grad_norm": 0.85640949010849, "learning_rate": 0.0008680112044817927, "loss": 0.5516, "step": 4818 }, { "epoch": 2.6921787709497207, "grad_norm": 3.5856409072875977, "learning_rate": 0.0008679831932773109, "loss": 0.4912, "step": 4819 }, { "epoch": 2.6927374301675977, "grad_norm": 0.6759235262870789, "learning_rate": 0.0008679551820728292, "loss": 0.4594, "step": 4820 }, { "epoch": 2.6932960893854747, "grad_norm": 1.2774004936218262, "learning_rate": 0.0008679271708683474, "loss": 0.4193, "step": 4821 }, { "epoch": 2.6938547486033517, "grad_norm": 0.9100181460380554, "learning_rate": 0.0008678991596638656, "loss": 0.5827, "step": 4822 }, { "epoch": 2.694413407821229, "grad_norm": 1.0939244031906128, "learning_rate": 0.0008678711484593838, "loss": 0.5164, "step": 4823 }, { "epoch": 2.694972067039106, "grad_norm": 1.1153905391693115, "learning_rate": 0.000867843137254902, "loss": 0.5033, "step": 4824 }, { "epoch": 2.695530726256983, "grad_norm": 0.7720142602920532, "learning_rate": 0.0008678151260504202, "loss": 0.5977, "step": 4825 }, { "epoch": 2.69608938547486, "grad_norm": 0.4999604821205139, "learning_rate": 0.0008677871148459384, "loss": 0.4439, "step": 4826 }, { "epoch": 2.6966480446927372, "grad_norm": 0.6050472259521484, "learning_rate": 0.0008677591036414566, "loss": 0.523, "step": 4827 }, { "epoch": 2.6972067039106147, "grad_norm": 0.9201844334602356, "learning_rate": 0.0008677310924369748, "loss": 0.424, "step": 4828 }, { "epoch": 2.6977653631284917, "grad_norm": 0.39806562662124634, "learning_rate": 0.000867703081232493, "loss": 0.3526, "step": 4829 }, { "epoch": 2.6983240223463687, "grad_norm": 0.6608548164367676, "learning_rate": 0.0008676750700280113, "loss": 0.4449, "step": 4830 }, { "epoch": 2.6988826815642457, "grad_norm": 0.6510975956916809, "learning_rate": 0.0008676470588235294, "loss": 0.5388, "step": 4831 }, { "epoch": 2.6994413407821227, "grad_norm": 5.686730861663818, "learning_rate": 0.0008676190476190476, "loss": 0.4578, "step": 4832 }, { "epoch": 2.7, "grad_norm": 0.9750047326087952, "learning_rate": 0.0008675910364145658, "loss": 0.5236, "step": 4833 }, { "epoch": 2.700558659217877, "grad_norm": 0.8430211544036865, "learning_rate": 0.000867563025210084, "loss": 0.6356, "step": 4834 }, { "epoch": 2.701117318435754, "grad_norm": 0.6292615532875061, "learning_rate": 0.0008675350140056023, "loss": 0.5324, "step": 4835 }, { "epoch": 2.701675977653631, "grad_norm": 0.6719739437103271, "learning_rate": 0.0008675070028011204, "loss": 0.4924, "step": 4836 }, { "epoch": 2.7022346368715082, "grad_norm": 0.5120917558670044, "learning_rate": 0.0008674789915966387, "loss": 0.3827, "step": 4837 }, { "epoch": 2.7027932960893857, "grad_norm": 1.0064018964767456, "learning_rate": 0.0008674509803921569, "loss": 0.5709, "step": 4838 }, { "epoch": 2.7033519553072627, "grad_norm": 0.8618167042732239, "learning_rate": 0.0008674229691876751, "loss": 0.4471, "step": 4839 }, { "epoch": 2.7039106145251397, "grad_norm": 1.8580939769744873, "learning_rate": 0.0008673949579831934, "loss": 0.4313, "step": 4840 }, { "epoch": 2.7044692737430167, "grad_norm": 0.8321442008018494, "learning_rate": 0.0008673669467787115, "loss": 0.6412, "step": 4841 }, { "epoch": 2.7050279329608937, "grad_norm": 0.6822600364685059, "learning_rate": 0.0008673389355742297, "loss": 0.5894, "step": 4842 }, { "epoch": 2.705586592178771, "grad_norm": 0.6240845322608948, "learning_rate": 0.0008673109243697479, "loss": 0.5778, "step": 4843 }, { "epoch": 2.706145251396648, "grad_norm": 0.9722593426704407, "learning_rate": 0.0008672829131652661, "loss": 0.3861, "step": 4844 }, { "epoch": 2.706703910614525, "grad_norm": 0.8506166934967041, "learning_rate": 0.0008672549019607844, "loss": 0.515, "step": 4845 }, { "epoch": 2.707262569832402, "grad_norm": 0.9309301376342773, "learning_rate": 0.0008672268907563026, "loss": 0.4268, "step": 4846 }, { "epoch": 2.707821229050279, "grad_norm": 0.4745652973651886, "learning_rate": 0.0008671988795518207, "loss": 0.5309, "step": 4847 }, { "epoch": 2.7083798882681567, "grad_norm": 0.9603132605552673, "learning_rate": 0.0008671708683473389, "loss": 0.5937, "step": 4848 }, { "epoch": 2.7089385474860332, "grad_norm": 0.5685709714889526, "learning_rate": 0.0008671428571428571, "loss": 0.4115, "step": 4849 }, { "epoch": 2.7094972067039107, "grad_norm": 0.6907859444618225, "learning_rate": 0.0008671148459383754, "loss": 0.3736, "step": 4850 }, { "epoch": 2.7100558659217877, "grad_norm": 0.66511470079422, "learning_rate": 0.0008670868347338936, "loss": 0.4716, "step": 4851 }, { "epoch": 2.7106145251396647, "grad_norm": 7.405402660369873, "learning_rate": 0.0008670588235294117, "loss": 0.4322, "step": 4852 }, { "epoch": 2.711173184357542, "grad_norm": 0.5701492428779602, "learning_rate": 0.00086703081232493, "loss": 0.4989, "step": 4853 }, { "epoch": 2.7117318435754187, "grad_norm": 0.4032461941242218, "learning_rate": 0.0008670028011204482, "loss": 0.4713, "step": 4854 }, { "epoch": 2.712290502793296, "grad_norm": 0.590755820274353, "learning_rate": 0.0008669747899159665, "loss": 0.4635, "step": 4855 }, { "epoch": 2.712849162011173, "grad_norm": 0.7063531279563904, "learning_rate": 0.0008669467787114847, "loss": 0.4278, "step": 4856 }, { "epoch": 2.71340782122905, "grad_norm": 2.3604865074157715, "learning_rate": 0.0008669187675070028, "loss": 0.6278, "step": 4857 }, { "epoch": 2.7139664804469272, "grad_norm": 0.5989410281181335, "learning_rate": 0.000866890756302521, "loss": 0.4891, "step": 4858 }, { "epoch": 2.7145251396648042, "grad_norm": 1.384153127670288, "learning_rate": 0.0008668627450980392, "loss": 0.4805, "step": 4859 }, { "epoch": 2.7150837988826817, "grad_norm": 9.467945098876953, "learning_rate": 0.0008668347338935575, "loss": 0.4178, "step": 4860 }, { "epoch": 2.7156424581005587, "grad_norm": 0.7025426626205444, "learning_rate": 0.0008668067226890757, "loss": 0.4701, "step": 4861 }, { "epoch": 2.7162011173184357, "grad_norm": 0.6365002989768982, "learning_rate": 0.0008667787114845939, "loss": 0.5429, "step": 4862 }, { "epoch": 2.7167597765363127, "grad_norm": 3.0573928356170654, "learning_rate": 0.000866750700280112, "loss": 0.4711, "step": 4863 }, { "epoch": 2.7173184357541897, "grad_norm": 1.6173280477523804, "learning_rate": 0.0008667226890756302, "loss": 0.5303, "step": 4864 }, { "epoch": 2.717877094972067, "grad_norm": 16.50718879699707, "learning_rate": 0.0008666946778711485, "loss": 0.5031, "step": 4865 }, { "epoch": 2.718435754189944, "grad_norm": 0.6878076195716858, "learning_rate": 0.0008666666666666667, "loss": 0.4979, "step": 4866 }, { "epoch": 2.718994413407821, "grad_norm": 0.42856016755104065, "learning_rate": 0.0008666386554621849, "loss": 0.4511, "step": 4867 }, { "epoch": 2.7195530726256982, "grad_norm": 0.4885893762111664, "learning_rate": 0.000866610644257703, "loss": 0.4235, "step": 4868 }, { "epoch": 2.7201117318435752, "grad_norm": 0.9483295679092407, "learning_rate": 0.0008665826330532212, "loss": 0.4626, "step": 4869 }, { "epoch": 2.7206703910614527, "grad_norm": 0.48496589064598083, "learning_rate": 0.0008665546218487396, "loss": 0.4604, "step": 4870 }, { "epoch": 2.7212290502793297, "grad_norm": 0.4748501181602478, "learning_rate": 0.0008665266106442578, "loss": 0.4825, "step": 4871 }, { "epoch": 2.7217877094972067, "grad_norm": 0.441242516040802, "learning_rate": 0.000866498599439776, "loss": 0.3897, "step": 4872 }, { "epoch": 2.7223463687150837, "grad_norm": 0.6068809628486633, "learning_rate": 0.0008664705882352941, "loss": 0.4513, "step": 4873 }, { "epoch": 2.7229050279329607, "grad_norm": 0.5913217067718506, "learning_rate": 0.0008664425770308123, "loss": 0.4436, "step": 4874 }, { "epoch": 2.723463687150838, "grad_norm": 0.45163917541503906, "learning_rate": 0.0008664145658263306, "loss": 0.4221, "step": 4875 }, { "epoch": 2.724022346368715, "grad_norm": 2.0871331691741943, "learning_rate": 0.0008663865546218488, "loss": 0.5328, "step": 4876 }, { "epoch": 2.724581005586592, "grad_norm": 0.6465966701507568, "learning_rate": 0.000866358543417367, "loss": 0.5189, "step": 4877 }, { "epoch": 2.7251396648044692, "grad_norm": 0.4940882623195648, "learning_rate": 0.0008663305322128852, "loss": 0.4098, "step": 4878 }, { "epoch": 2.7256983240223462, "grad_norm": 0.6184472441673279, "learning_rate": 0.0008663025210084033, "loss": 0.3774, "step": 4879 }, { "epoch": 2.7262569832402237, "grad_norm": 0.7680293917655945, "learning_rate": 0.0008662745098039216, "loss": 0.5625, "step": 4880 }, { "epoch": 2.7268156424581007, "grad_norm": 2.2876410484313965, "learning_rate": 0.0008662464985994398, "loss": 0.3986, "step": 4881 }, { "epoch": 2.7273743016759777, "grad_norm": 0.501945436000824, "learning_rate": 0.000866218487394958, "loss": 0.3579, "step": 4882 }, { "epoch": 2.7279329608938547, "grad_norm": 0.4877082407474518, "learning_rate": 0.0008661904761904762, "loss": 0.4911, "step": 4883 }, { "epoch": 2.7284916201117317, "grad_norm": 0.6780194640159607, "learning_rate": 0.0008661624649859943, "loss": 0.5406, "step": 4884 }, { "epoch": 2.729050279329609, "grad_norm": 1.2068274021148682, "learning_rate": 0.0008661344537815126, "loss": 0.4233, "step": 4885 }, { "epoch": 2.7296089385474858, "grad_norm": 0.6840962767601013, "learning_rate": 0.0008661064425770309, "loss": 0.586, "step": 4886 }, { "epoch": 2.730167597765363, "grad_norm": 0.8631529211997986, "learning_rate": 0.0008660784313725491, "loss": 0.4502, "step": 4887 }, { "epoch": 2.7307262569832402, "grad_norm": 0.4405871331691742, "learning_rate": 0.0008660504201680673, "loss": 0.4645, "step": 4888 }, { "epoch": 2.7312849162011172, "grad_norm": 0.45140179991722107, "learning_rate": 0.0008660224089635854, "loss": 0.4729, "step": 4889 }, { "epoch": 2.7318435754189943, "grad_norm": 0.6622568368911743, "learning_rate": 0.0008659943977591037, "loss": 0.4638, "step": 4890 }, { "epoch": 2.7324022346368713, "grad_norm": 0.5176422595977783, "learning_rate": 0.0008659663865546219, "loss": 0.4697, "step": 4891 }, { "epoch": 2.7329608938547487, "grad_norm": 0.45798808336257935, "learning_rate": 0.0008659383753501401, "loss": 0.4678, "step": 4892 }, { "epoch": 2.7335195530726257, "grad_norm": 2.073618173599243, "learning_rate": 0.0008659103641456583, "loss": 0.505, "step": 4893 }, { "epoch": 2.7340782122905027, "grad_norm": 0.5658608675003052, "learning_rate": 0.0008658823529411765, "loss": 0.4332, "step": 4894 }, { "epoch": 2.7346368715083798, "grad_norm": 1.4124257564544678, "learning_rate": 0.0008658543417366947, "loss": 0.4317, "step": 4895 }, { "epoch": 2.7351955307262568, "grad_norm": 0.5639500617980957, "learning_rate": 0.0008658263305322129, "loss": 0.419, "step": 4896 }, { "epoch": 2.735754189944134, "grad_norm": 0.4894201457500458, "learning_rate": 0.0008657983193277311, "loss": 0.4356, "step": 4897 }, { "epoch": 2.7363128491620112, "grad_norm": 2.7163777351379395, "learning_rate": 0.0008657703081232493, "loss": 0.5886, "step": 4898 }, { "epoch": 2.7368715083798882, "grad_norm": 0.5326542854309082, "learning_rate": 0.0008657422969187675, "loss": 0.5377, "step": 4899 }, { "epoch": 2.7374301675977653, "grad_norm": 0.6481336951255798, "learning_rate": 0.0008657142857142857, "loss": 0.4702, "step": 4900 }, { "epoch": 2.7379888268156423, "grad_norm": 0.4442083239555359, "learning_rate": 0.0008656862745098039, "loss": 0.3616, "step": 4901 }, { "epoch": 2.7385474860335197, "grad_norm": 1.6935418844223022, "learning_rate": 0.0008656582633053222, "loss": 0.4699, "step": 4902 }, { "epoch": 2.7391061452513967, "grad_norm": 0.5431769490242004, "learning_rate": 0.0008656302521008404, "loss": 0.5993, "step": 4903 }, { "epoch": 2.7396648044692737, "grad_norm": 4.07416296005249, "learning_rate": 0.0008656022408963586, "loss": 0.3618, "step": 4904 }, { "epoch": 2.7402234636871508, "grad_norm": 0.7660875916481018, "learning_rate": 0.0008655742296918767, "loss": 0.4801, "step": 4905 }, { "epoch": 2.7407821229050278, "grad_norm": 0.9906030893325806, "learning_rate": 0.000865546218487395, "loss": 0.4216, "step": 4906 }, { "epoch": 2.741340782122905, "grad_norm": 0.586628794670105, "learning_rate": 0.0008655182072829132, "loss": 0.588, "step": 4907 }, { "epoch": 2.7418994413407822, "grad_norm": 0.4912775456905365, "learning_rate": 0.0008654901960784314, "loss": 0.4886, "step": 4908 }, { "epoch": 2.7424581005586592, "grad_norm": 0.7644976377487183, "learning_rate": 0.0008654621848739496, "loss": 0.5569, "step": 4909 }, { "epoch": 2.7430167597765363, "grad_norm": 0.5250295996665955, "learning_rate": 0.0008654341736694678, "loss": 0.5051, "step": 4910 }, { "epoch": 2.7435754189944133, "grad_norm": 0.49718669056892395, "learning_rate": 0.000865406162464986, "loss": 0.5335, "step": 4911 }, { "epoch": 2.7441340782122907, "grad_norm": 0.6537137031555176, "learning_rate": 0.0008653781512605042, "loss": 0.4848, "step": 4912 }, { "epoch": 2.7446927374301677, "grad_norm": 0.636180579662323, "learning_rate": 0.0008653501400560224, "loss": 0.4697, "step": 4913 }, { "epoch": 2.7452513966480447, "grad_norm": 1.114094853401184, "learning_rate": 0.0008653221288515406, "loss": 0.5721, "step": 4914 }, { "epoch": 2.7458100558659218, "grad_norm": 0.44556185603141785, "learning_rate": 0.0008652941176470588, "loss": 0.3715, "step": 4915 }, { "epoch": 2.7463687150837988, "grad_norm": 0.7542825937271118, "learning_rate": 0.000865266106442577, "loss": 0.5613, "step": 4916 }, { "epoch": 2.746927374301676, "grad_norm": 0.6731939315795898, "learning_rate": 0.0008652380952380952, "loss": 0.4524, "step": 4917 }, { "epoch": 2.7474860335195532, "grad_norm": 0.5472222566604614, "learning_rate": 0.0008652100840336134, "loss": 0.3921, "step": 4918 }, { "epoch": 2.7480446927374302, "grad_norm": 0.5595595240592957, "learning_rate": 0.0008651820728291317, "loss": 0.5001, "step": 4919 }, { "epoch": 2.7486033519553073, "grad_norm": 0.5129935145378113, "learning_rate": 0.0008651540616246499, "loss": 0.4708, "step": 4920 }, { "epoch": 2.7491620111731843, "grad_norm": 0.676902174949646, "learning_rate": 0.0008651260504201682, "loss": 0.4269, "step": 4921 }, { "epoch": 2.7497206703910617, "grad_norm": 0.551462709903717, "learning_rate": 0.0008650980392156863, "loss": 0.4705, "step": 4922 }, { "epoch": 2.7502793296089383, "grad_norm": 0.5070472955703735, "learning_rate": 0.0008650700280112045, "loss": 0.5748, "step": 4923 }, { "epoch": 2.7508379888268157, "grad_norm": 1.1104278564453125, "learning_rate": 0.0008650420168067227, "loss": 0.5349, "step": 4924 }, { "epoch": 2.7513966480446927, "grad_norm": 0.5214980840682983, "learning_rate": 0.0008650140056022409, "loss": 0.455, "step": 4925 }, { "epoch": 2.7519553072625698, "grad_norm": 0.6127670407295227, "learning_rate": 0.0008649859943977592, "loss": 0.3987, "step": 4926 }, { "epoch": 2.7525139664804468, "grad_norm": 0.6497572660446167, "learning_rate": 0.0008649579831932773, "loss": 0.5737, "step": 4927 }, { "epoch": 2.753072625698324, "grad_norm": 0.6777095198631287, "learning_rate": 0.0008649299719887955, "loss": 0.4693, "step": 4928 }, { "epoch": 2.7536312849162012, "grad_norm": 0.700614869594574, "learning_rate": 0.0008649019607843137, "loss": 0.904, "step": 4929 }, { "epoch": 2.7541899441340782, "grad_norm": 0.5954992175102234, "learning_rate": 0.0008648739495798319, "loss": 0.5217, "step": 4930 }, { "epoch": 2.7547486033519553, "grad_norm": 0.6739497780799866, "learning_rate": 0.0008648459383753502, "loss": 0.4592, "step": 4931 }, { "epoch": 2.7553072625698323, "grad_norm": 0.5994608402252197, "learning_rate": 0.0008648179271708683, "loss": 0.5233, "step": 4932 }, { "epoch": 2.7558659217877093, "grad_norm": 4.368466854095459, "learning_rate": 0.0008647899159663865, "loss": 0.4687, "step": 4933 }, { "epoch": 2.7564245810055867, "grad_norm": 1.4562932252883911, "learning_rate": 0.0008647619047619047, "loss": 0.4545, "step": 4934 }, { "epoch": 2.7569832402234637, "grad_norm": 0.650810718536377, "learning_rate": 0.000864733893557423, "loss": 0.4358, "step": 4935 }, { "epoch": 2.7575418994413408, "grad_norm": 0.8807802796363831, "learning_rate": 0.0008647058823529413, "loss": 0.6186, "step": 4936 }, { "epoch": 2.7581005586592178, "grad_norm": 1.7569150924682617, "learning_rate": 0.0008646778711484595, "loss": 0.4967, "step": 4937 }, { "epoch": 2.758659217877095, "grad_norm": 0.48197001218795776, "learning_rate": 0.0008646498599439776, "loss": 0.4785, "step": 4938 }, { "epoch": 2.7592178770949722, "grad_norm": 1.0546276569366455, "learning_rate": 0.0008646218487394958, "loss": 0.4092, "step": 4939 }, { "epoch": 2.7597765363128492, "grad_norm": 0.7439518570899963, "learning_rate": 0.000864593837535014, "loss": 0.5937, "step": 4940 }, { "epoch": 2.7603351955307263, "grad_norm": 0.5570810437202454, "learning_rate": 0.0008645658263305323, "loss": 0.432, "step": 4941 }, { "epoch": 2.7608938547486033, "grad_norm": 0.6318339705467224, "learning_rate": 0.0008645378151260505, "loss": 0.5582, "step": 4942 }, { "epoch": 2.7614525139664803, "grad_norm": 2.140132427215576, "learning_rate": 0.0008645098039215686, "loss": 0.5217, "step": 4943 }, { "epoch": 2.7620111731843577, "grad_norm": 0.5386053323745728, "learning_rate": 0.0008644817927170868, "loss": 0.5861, "step": 4944 }, { "epoch": 2.7625698324022347, "grad_norm": 1.3792846202850342, "learning_rate": 0.000864453781512605, "loss": 0.5431, "step": 4945 }, { "epoch": 2.7631284916201118, "grad_norm": 0.8350787162780762, "learning_rate": 0.0008644257703081233, "loss": 0.4192, "step": 4946 }, { "epoch": 2.7636871508379888, "grad_norm": 0.6028815507888794, "learning_rate": 0.0008643977591036415, "loss": 0.3765, "step": 4947 }, { "epoch": 2.764245810055866, "grad_norm": 0.7146716713905334, "learning_rate": 0.0008643697478991596, "loss": 0.4428, "step": 4948 }, { "epoch": 2.7648044692737432, "grad_norm": 0.491089791059494, "learning_rate": 0.0008643417366946778, "loss": 0.4343, "step": 4949 }, { "epoch": 2.7653631284916202, "grad_norm": 0.615129828453064, "learning_rate": 0.000864313725490196, "loss": 0.398, "step": 4950 }, { "epoch": 2.7659217877094973, "grad_norm": 0.448180615901947, "learning_rate": 0.0008642857142857144, "loss": 0.3798, "step": 4951 }, { "epoch": 2.7664804469273743, "grad_norm": 0.8530778884887695, "learning_rate": 0.0008642577030812326, "loss": 0.5042, "step": 4952 }, { "epoch": 2.7670391061452513, "grad_norm": 0.7157233953475952, "learning_rate": 0.0008642296918767508, "loss": 0.5035, "step": 4953 }, { "epoch": 2.7675977653631287, "grad_norm": 8.344500541687012, "learning_rate": 0.0008642016806722689, "loss": 0.543, "step": 4954 }, { "epoch": 2.7681564245810057, "grad_norm": 0.6947936415672302, "learning_rate": 0.0008641736694677871, "loss": 0.4683, "step": 4955 }, { "epoch": 2.7687150837988828, "grad_norm": 0.5956802368164062, "learning_rate": 0.0008641456582633054, "loss": 0.5348, "step": 4956 }, { "epoch": 2.7692737430167598, "grad_norm": 0.8770701885223389, "learning_rate": 0.0008641176470588236, "loss": 0.3736, "step": 4957 }, { "epoch": 2.769832402234637, "grad_norm": 0.5714986324310303, "learning_rate": 0.0008640896358543418, "loss": 0.448, "step": 4958 }, { "epoch": 2.7703910614525142, "grad_norm": 0.5729160904884338, "learning_rate": 0.0008640616246498599, "loss": 0.4438, "step": 4959 }, { "epoch": 2.770949720670391, "grad_norm": 0.3909223675727844, "learning_rate": 0.0008640336134453781, "loss": 0.3621, "step": 4960 }, { "epoch": 2.7715083798882683, "grad_norm": 0.4876652657985687, "learning_rate": 0.0008640056022408964, "loss": 0.5069, "step": 4961 }, { "epoch": 2.7720670391061453, "grad_norm": 0.6069240570068359, "learning_rate": 0.0008639775910364146, "loss": 0.4026, "step": 4962 }, { "epoch": 2.7726256983240223, "grad_norm": 0.710731029510498, "learning_rate": 0.0008639495798319328, "loss": 0.5177, "step": 4963 }, { "epoch": 2.7731843575418993, "grad_norm": 0.5441970229148865, "learning_rate": 0.0008639215686274509, "loss": 0.4089, "step": 4964 }, { "epoch": 2.7737430167597763, "grad_norm": 0.5625534057617188, "learning_rate": 0.0008638935574229691, "loss": 0.3967, "step": 4965 }, { "epoch": 2.7743016759776538, "grad_norm": 3.055351734161377, "learning_rate": 0.0008638655462184874, "loss": 0.4838, "step": 4966 }, { "epoch": 2.7748603351955308, "grad_norm": 0.5649999976158142, "learning_rate": 0.0008638375350140056, "loss": 0.5054, "step": 4967 }, { "epoch": 2.775418994413408, "grad_norm": 1.1882247924804688, "learning_rate": 0.0008638095238095239, "loss": 0.4907, "step": 4968 }, { "epoch": 2.775977653631285, "grad_norm": 0.5228495001792908, "learning_rate": 0.0008637815126050421, "loss": 0.5175, "step": 4969 }, { "epoch": 2.776536312849162, "grad_norm": 0.7560868263244629, "learning_rate": 0.0008637535014005602, "loss": 0.4681, "step": 4970 }, { "epoch": 2.7770949720670393, "grad_norm": 0.7712761759757996, "learning_rate": 0.0008637254901960785, "loss": 0.429, "step": 4971 }, { "epoch": 2.7776536312849163, "grad_norm": 0.5001828670501709, "learning_rate": 0.0008636974789915967, "loss": 0.3925, "step": 4972 }, { "epoch": 2.7782122905027933, "grad_norm": 2.4527015686035156, "learning_rate": 0.0008636694677871149, "loss": 0.3796, "step": 4973 }, { "epoch": 2.7787709497206703, "grad_norm": 2.300713062286377, "learning_rate": 0.0008636414565826331, "loss": 0.3545, "step": 4974 }, { "epoch": 2.7793296089385473, "grad_norm": 0.5880308747291565, "learning_rate": 0.0008636134453781512, "loss": 0.4987, "step": 4975 }, { "epoch": 2.7798882681564248, "grad_norm": 0.4641307294368744, "learning_rate": 0.0008635854341736695, "loss": 0.3935, "step": 4976 }, { "epoch": 2.7804469273743018, "grad_norm": 0.6852543950080872, "learning_rate": 0.0008635574229691877, "loss": 0.4961, "step": 4977 }, { "epoch": 2.781005586592179, "grad_norm": 0.6853237748146057, "learning_rate": 0.0008635294117647059, "loss": 0.4439, "step": 4978 }, { "epoch": 2.781564245810056, "grad_norm": 0.6090775728225708, "learning_rate": 0.0008635014005602241, "loss": 0.4761, "step": 4979 }, { "epoch": 2.782122905027933, "grad_norm": 0.5112475156784058, "learning_rate": 0.0008634733893557422, "loss": 0.3353, "step": 4980 }, { "epoch": 2.7826815642458103, "grad_norm": 0.49931249022483826, "learning_rate": 0.0008634453781512605, "loss": 0.5073, "step": 4981 }, { "epoch": 2.7832402234636873, "grad_norm": 0.4504586160182953, "learning_rate": 0.0008634173669467787, "loss": 0.6052, "step": 4982 }, { "epoch": 2.7837988826815643, "grad_norm": 0.733995795249939, "learning_rate": 0.0008633893557422969, "loss": 0.5849, "step": 4983 }, { "epoch": 2.7843575418994413, "grad_norm": 0.5288225412368774, "learning_rate": 0.0008633613445378152, "loss": 0.5129, "step": 4984 }, { "epoch": 2.7849162011173183, "grad_norm": 0.3947197198867798, "learning_rate": 0.0008633333333333334, "loss": 0.4521, "step": 4985 }, { "epoch": 2.7854748603351958, "grad_norm": 0.4959245026111603, "learning_rate": 0.0008633053221288516, "loss": 0.4244, "step": 4986 }, { "epoch": 2.7860335195530728, "grad_norm": 0.49115273356437683, "learning_rate": 0.0008632773109243698, "loss": 0.4335, "step": 4987 }, { "epoch": 2.78659217877095, "grad_norm": 0.5411583781242371, "learning_rate": 0.000863249299719888, "loss": 0.4724, "step": 4988 }, { "epoch": 2.787150837988827, "grad_norm": 0.47826892137527466, "learning_rate": 0.0008632212885154062, "loss": 0.4941, "step": 4989 }, { "epoch": 2.787709497206704, "grad_norm": 1.393666386604309, "learning_rate": 0.0008631932773109244, "loss": 0.4298, "step": 4990 }, { "epoch": 2.7882681564245813, "grad_norm": 0.7982737421989441, "learning_rate": 0.0008631652661064426, "loss": 0.4934, "step": 4991 }, { "epoch": 2.788826815642458, "grad_norm": 0.5343127250671387, "learning_rate": 0.0008631372549019608, "loss": 0.5208, "step": 4992 }, { "epoch": 2.7893854748603353, "grad_norm": 0.9246816039085388, "learning_rate": 0.000863109243697479, "loss": 0.6124, "step": 4993 }, { "epoch": 2.7899441340782123, "grad_norm": 0.6478784084320068, "learning_rate": 0.0008630812324929972, "loss": 0.5112, "step": 4994 }, { "epoch": 2.7905027932960893, "grad_norm": 0.4586789011955261, "learning_rate": 0.0008630532212885154, "loss": 0.397, "step": 4995 }, { "epoch": 2.7910614525139668, "grad_norm": 0.7183420658111572, "learning_rate": 0.0008630252100840336, "loss": 0.4592, "step": 4996 }, { "epoch": 2.7916201117318433, "grad_norm": 0.6217768788337708, "learning_rate": 0.0008629971988795518, "loss": 0.5333, "step": 4997 }, { "epoch": 2.792178770949721, "grad_norm": 0.6634668707847595, "learning_rate": 0.00086296918767507, "loss": 0.5981, "step": 4998 }, { "epoch": 2.792737430167598, "grad_norm": 0.5104075074195862, "learning_rate": 0.0008629411764705882, "loss": 0.5016, "step": 4999 }, { "epoch": 2.793296089385475, "grad_norm": 0.8200002312660217, "learning_rate": 0.0008629131652661064, "loss": 0.4535, "step": 5000 }, { "epoch": 2.793296089385475, "eval_cer": 0.09497779668968828, "eval_loss": 0.359159916639328, "eval_runtime": 55.6906, "eval_samples_per_second": 81.486, "eval_steps_per_second": 5.1, "eval_wer": 0.3739001703863021, "step": 5000 }, { "epoch": 2.793854748603352, "grad_norm": 0.54245924949646, "learning_rate": 0.0008628851540616248, "loss": 0.4048, "step": 5001 }, { "epoch": 2.794413407821229, "grad_norm": 0.6140114068984985, "learning_rate": 0.0008628571428571429, "loss": 0.4892, "step": 5002 }, { "epoch": 2.7949720670391063, "grad_norm": 0.8202154636383057, "learning_rate": 0.0008628291316526611, "loss": 0.4681, "step": 5003 }, { "epoch": 2.7955307262569833, "grad_norm": 0.5861547589302063, "learning_rate": 0.0008628011204481793, "loss": 0.4486, "step": 5004 }, { "epoch": 2.7960893854748603, "grad_norm": 1.1219669580459595, "learning_rate": 0.0008627731092436975, "loss": 0.48, "step": 5005 }, { "epoch": 2.7966480446927373, "grad_norm": 0.603406548500061, "learning_rate": 0.0008627450980392158, "loss": 0.4598, "step": 5006 }, { "epoch": 2.7972067039106143, "grad_norm": 1.0853874683380127, "learning_rate": 0.0008627170868347339, "loss": 0.7008, "step": 5007 }, { "epoch": 2.7977653631284918, "grad_norm": 0.48637187480926514, "learning_rate": 0.0008626890756302521, "loss": 0.3929, "step": 5008 }, { "epoch": 2.798324022346369, "grad_norm": 0.5474040508270264, "learning_rate": 0.0008626610644257703, "loss": 0.4781, "step": 5009 }, { "epoch": 2.798882681564246, "grad_norm": 0.6707407832145691, "learning_rate": 0.0008626330532212885, "loss": 0.5671, "step": 5010 }, { "epoch": 2.799441340782123, "grad_norm": 0.5939853191375732, "learning_rate": 0.0008626050420168068, "loss": 0.4217, "step": 5011 }, { "epoch": 2.8, "grad_norm": 0.561815083026886, "learning_rate": 0.0008625770308123249, "loss": 0.4973, "step": 5012 }, { "epoch": 2.8005586592178773, "grad_norm": 0.3941192328929901, "learning_rate": 0.0008625490196078431, "loss": 0.3137, "step": 5013 }, { "epoch": 2.8011173184357543, "grad_norm": 0.8116780519485474, "learning_rate": 0.0008625210084033613, "loss": 0.5507, "step": 5014 }, { "epoch": 2.8016759776536313, "grad_norm": 0.8271045088768005, "learning_rate": 0.0008624929971988795, "loss": 0.4198, "step": 5015 }, { "epoch": 2.8022346368715083, "grad_norm": 0.45983797311782837, "learning_rate": 0.0008624649859943979, "loss": 0.4398, "step": 5016 }, { "epoch": 2.8027932960893853, "grad_norm": 0.7537004947662354, "learning_rate": 0.0008624369747899161, "loss": 0.4461, "step": 5017 }, { "epoch": 2.8033519553072628, "grad_norm": 0.6530876755714417, "learning_rate": 0.0008624089635854342, "loss": 0.4467, "step": 5018 }, { "epoch": 2.80391061452514, "grad_norm": 0.7464740872383118, "learning_rate": 0.0008623809523809524, "loss": 0.4614, "step": 5019 }, { "epoch": 2.804469273743017, "grad_norm": 0.8537314534187317, "learning_rate": 0.0008623529411764706, "loss": 0.5005, "step": 5020 }, { "epoch": 2.805027932960894, "grad_norm": 0.5359246134757996, "learning_rate": 0.0008623249299719889, "loss": 0.6119, "step": 5021 }, { "epoch": 2.805586592178771, "grad_norm": 0.5170433521270752, "learning_rate": 0.0008622969187675071, "loss": 0.4415, "step": 5022 }, { "epoch": 2.8061452513966483, "grad_norm": 0.5501569509506226, "learning_rate": 0.0008622689075630252, "loss": 0.5106, "step": 5023 }, { "epoch": 2.8067039106145253, "grad_norm": 0.4375327229499817, "learning_rate": 0.0008622408963585434, "loss": 0.4642, "step": 5024 }, { "epoch": 2.8072625698324023, "grad_norm": 0.4825558364391327, "learning_rate": 0.0008622128851540616, "loss": 0.4577, "step": 5025 }, { "epoch": 2.8078212290502793, "grad_norm": 0.4116192162036896, "learning_rate": 0.0008621848739495799, "loss": 0.2807, "step": 5026 }, { "epoch": 2.8083798882681563, "grad_norm": 0.5142484903335571, "learning_rate": 0.0008621568627450981, "loss": 0.4113, "step": 5027 }, { "epoch": 2.8089385474860338, "grad_norm": 0.5388715863227844, "learning_rate": 0.0008621288515406162, "loss": 0.4539, "step": 5028 }, { "epoch": 2.8094972067039103, "grad_norm": 0.649519145488739, "learning_rate": 0.0008621008403361344, "loss": 0.4808, "step": 5029 }, { "epoch": 2.810055865921788, "grad_norm": 0.437104195356369, "learning_rate": 0.0008620728291316526, "loss": 0.4824, "step": 5030 }, { "epoch": 2.810614525139665, "grad_norm": 0.5369560718536377, "learning_rate": 0.0008620448179271709, "loss": 0.5076, "step": 5031 }, { "epoch": 2.811173184357542, "grad_norm": 0.5922414064407349, "learning_rate": 0.0008620168067226891, "loss": 0.4777, "step": 5032 }, { "epoch": 2.811731843575419, "grad_norm": 1.1569852828979492, "learning_rate": 0.0008619887955182074, "loss": 0.4211, "step": 5033 }, { "epoch": 2.812290502793296, "grad_norm": 0.5336988568305969, "learning_rate": 0.0008619607843137255, "loss": 0.4623, "step": 5034 }, { "epoch": 2.8128491620111733, "grad_norm": 0.47945383191108704, "learning_rate": 0.0008619327731092437, "loss": 0.3797, "step": 5035 }, { "epoch": 2.8134078212290503, "grad_norm": 1.145865797996521, "learning_rate": 0.000861904761904762, "loss": 0.4892, "step": 5036 }, { "epoch": 2.8139664804469273, "grad_norm": 0.5120701789855957, "learning_rate": 0.0008618767507002802, "loss": 0.4875, "step": 5037 }, { "epoch": 2.8145251396648043, "grad_norm": 2.864386796951294, "learning_rate": 0.0008618487394957984, "loss": 0.5305, "step": 5038 }, { "epoch": 2.8150837988826813, "grad_norm": 1.0483943223953247, "learning_rate": 0.0008618207282913165, "loss": 0.3532, "step": 5039 }, { "epoch": 2.815642458100559, "grad_norm": 0.45070531964302063, "learning_rate": 0.0008617927170868347, "loss": 0.366, "step": 5040 }, { "epoch": 2.816201117318436, "grad_norm": 0.395125150680542, "learning_rate": 0.000861764705882353, "loss": 0.451, "step": 5041 }, { "epoch": 2.816759776536313, "grad_norm": 0.435016930103302, "learning_rate": 0.0008617366946778712, "loss": 0.4727, "step": 5042 }, { "epoch": 2.81731843575419, "grad_norm": 0.5069654583930969, "learning_rate": 0.0008617086834733894, "loss": 0.3629, "step": 5043 }, { "epoch": 2.817877094972067, "grad_norm": 0.5725529193878174, "learning_rate": 0.0008616806722689075, "loss": 0.4249, "step": 5044 }, { "epoch": 2.8184357541899443, "grad_norm": 0.5819624066352844, "learning_rate": 0.0008616526610644257, "loss": 0.502, "step": 5045 }, { "epoch": 2.8189944134078213, "grad_norm": 1.1757373809814453, "learning_rate": 0.000861624649859944, "loss": 0.5448, "step": 5046 }, { "epoch": 2.8195530726256983, "grad_norm": 0.3719157576560974, "learning_rate": 0.0008615966386554622, "loss": 0.4092, "step": 5047 }, { "epoch": 2.8201117318435753, "grad_norm": 0.49994513392448425, "learning_rate": 0.0008615686274509804, "loss": 0.4789, "step": 5048 }, { "epoch": 2.8206703910614523, "grad_norm": 0.8068432807922363, "learning_rate": 0.0008615406162464986, "loss": 0.3884, "step": 5049 }, { "epoch": 2.82122905027933, "grad_norm": 0.5980684161186218, "learning_rate": 0.0008615126050420167, "loss": 0.4437, "step": 5050 }, { "epoch": 2.821787709497207, "grad_norm": 0.6048242449760437, "learning_rate": 0.0008614845938375351, "loss": 0.4221, "step": 5051 }, { "epoch": 2.822346368715084, "grad_norm": 0.9423447847366333, "learning_rate": 0.0008614565826330533, "loss": 0.4867, "step": 5052 }, { "epoch": 2.822905027932961, "grad_norm": 0.8989359736442566, "learning_rate": 0.0008614285714285715, "loss": 0.6963, "step": 5053 }, { "epoch": 2.823463687150838, "grad_norm": 0.6020029187202454, "learning_rate": 0.0008614005602240897, "loss": 0.5347, "step": 5054 }, { "epoch": 2.8240223463687153, "grad_norm": 0.6163732409477234, "learning_rate": 0.0008613725490196078, "loss": 0.5634, "step": 5055 }, { "epoch": 2.8245810055865923, "grad_norm": 0.7855117917060852, "learning_rate": 0.0008613445378151261, "loss": 0.5319, "step": 5056 }, { "epoch": 2.8251396648044693, "grad_norm": 0.4465838670730591, "learning_rate": 0.0008613165266106443, "loss": 0.4587, "step": 5057 }, { "epoch": 2.8256983240223463, "grad_norm": 3.268709897994995, "learning_rate": 0.0008612885154061625, "loss": 0.444, "step": 5058 }, { "epoch": 2.8262569832402233, "grad_norm": 0.5292797088623047, "learning_rate": 0.0008612605042016807, "loss": 0.3942, "step": 5059 }, { "epoch": 2.826815642458101, "grad_norm": 0.4475780725479126, "learning_rate": 0.0008612324929971988, "loss": 0.4248, "step": 5060 }, { "epoch": 2.827374301675978, "grad_norm": 0.4191311299800873, "learning_rate": 0.0008612044817927171, "loss": 0.4121, "step": 5061 }, { "epoch": 2.827932960893855, "grad_norm": 1.642168641090393, "learning_rate": 0.0008611764705882353, "loss": 0.416, "step": 5062 }, { "epoch": 2.828491620111732, "grad_norm": 1.054935097694397, "learning_rate": 0.0008611484593837535, "loss": 0.4342, "step": 5063 }, { "epoch": 2.829050279329609, "grad_norm": 0.5002058744430542, "learning_rate": 0.0008611204481792717, "loss": 0.411, "step": 5064 }, { "epoch": 2.8296089385474863, "grad_norm": 0.5005667805671692, "learning_rate": 0.0008610924369747899, "loss": 0.3589, "step": 5065 }, { "epoch": 2.830167597765363, "grad_norm": 1.0537171363830566, "learning_rate": 0.0008610644257703082, "loss": 0.479, "step": 5066 }, { "epoch": 2.8307262569832403, "grad_norm": 0.433057963848114, "learning_rate": 0.0008610364145658264, "loss": 0.4614, "step": 5067 }, { "epoch": 2.8312849162011173, "grad_norm": 0.4622815251350403, "learning_rate": 0.0008610084033613446, "loss": 0.4238, "step": 5068 }, { "epoch": 2.8318435754189943, "grad_norm": 0.6391461491584778, "learning_rate": 0.0008609803921568628, "loss": 0.4696, "step": 5069 }, { "epoch": 2.8324022346368714, "grad_norm": 1.1945433616638184, "learning_rate": 0.000860952380952381, "loss": 0.6053, "step": 5070 }, { "epoch": 2.8329608938547484, "grad_norm": 0.6835968494415283, "learning_rate": 0.0008609243697478992, "loss": 0.4141, "step": 5071 }, { "epoch": 2.833519553072626, "grad_norm": 0.7184941172599792, "learning_rate": 0.0008608963585434174, "loss": 0.6268, "step": 5072 }, { "epoch": 2.834078212290503, "grad_norm": 0.9129653573036194, "learning_rate": 0.0008608683473389356, "loss": 0.5922, "step": 5073 }, { "epoch": 2.83463687150838, "grad_norm": 1.3539282083511353, "learning_rate": 0.0008608403361344538, "loss": 0.4466, "step": 5074 }, { "epoch": 2.835195530726257, "grad_norm": 0.6483432054519653, "learning_rate": 0.000860812324929972, "loss": 0.5087, "step": 5075 }, { "epoch": 2.835754189944134, "grad_norm": 0.6693288683891296, "learning_rate": 0.0008607843137254902, "loss": 0.5042, "step": 5076 }, { "epoch": 2.8363128491620113, "grad_norm": 0.6429671049118042, "learning_rate": 0.0008607563025210084, "loss": 0.3691, "step": 5077 }, { "epoch": 2.8368715083798883, "grad_norm": 0.4690956771373749, "learning_rate": 0.0008607282913165266, "loss": 0.3897, "step": 5078 }, { "epoch": 2.8374301675977653, "grad_norm": 1.886242151260376, "learning_rate": 0.0008607002801120448, "loss": 0.4195, "step": 5079 }, { "epoch": 2.8379888268156424, "grad_norm": 0.5595972537994385, "learning_rate": 0.000860672268907563, "loss": 0.5348, "step": 5080 }, { "epoch": 2.8385474860335194, "grad_norm": 1.1238492727279663, "learning_rate": 0.0008606442577030813, "loss": 0.4244, "step": 5081 }, { "epoch": 2.839106145251397, "grad_norm": 0.822028398513794, "learning_rate": 0.0008606162464985994, "loss": 0.4276, "step": 5082 }, { "epoch": 2.839664804469274, "grad_norm": 0.5736905932426453, "learning_rate": 0.0008605882352941177, "loss": 0.4062, "step": 5083 }, { "epoch": 2.840223463687151, "grad_norm": 0.9170734286308289, "learning_rate": 0.0008605602240896359, "loss": 0.538, "step": 5084 }, { "epoch": 2.840782122905028, "grad_norm": 0.4064418077468872, "learning_rate": 0.0008605322128851541, "loss": 0.4815, "step": 5085 }, { "epoch": 2.841340782122905, "grad_norm": 0.7241451144218445, "learning_rate": 0.0008605042016806724, "loss": 0.455, "step": 5086 }, { "epoch": 2.8418994413407823, "grad_norm": 0.7200766205787659, "learning_rate": 0.0008604761904761905, "loss": 0.7181, "step": 5087 }, { "epoch": 2.8424581005586593, "grad_norm": 0.5524330735206604, "learning_rate": 0.0008604481792717087, "loss": 0.4517, "step": 5088 }, { "epoch": 2.8430167597765363, "grad_norm": 0.5243330001831055, "learning_rate": 0.0008604201680672269, "loss": 0.5113, "step": 5089 }, { "epoch": 2.8435754189944134, "grad_norm": 0.42244550585746765, "learning_rate": 0.0008603921568627451, "loss": 0.5435, "step": 5090 }, { "epoch": 2.8441340782122904, "grad_norm": 1.268298625946045, "learning_rate": 0.0008603641456582634, "loss": 0.5057, "step": 5091 }, { "epoch": 2.844692737430168, "grad_norm": 0.8056744337081909, "learning_rate": 0.0008603361344537815, "loss": 0.4779, "step": 5092 }, { "epoch": 2.845251396648045, "grad_norm": 0.5748698115348816, "learning_rate": 0.0008603081232492997, "loss": 0.4207, "step": 5093 }, { "epoch": 2.845810055865922, "grad_norm": 1.1256290674209595, "learning_rate": 0.0008602801120448179, "loss": 0.3795, "step": 5094 }, { "epoch": 2.846368715083799, "grad_norm": 0.8879082798957825, "learning_rate": 0.0008602521008403361, "loss": 0.6347, "step": 5095 }, { "epoch": 2.846927374301676, "grad_norm": 0.7756565809249878, "learning_rate": 0.0008602240896358544, "loss": 0.4921, "step": 5096 }, { "epoch": 2.8474860335195533, "grad_norm": 0.4193631410598755, "learning_rate": 0.0008601960784313726, "loss": 0.4344, "step": 5097 }, { "epoch": 2.8480446927374303, "grad_norm": 0.44814175367355347, "learning_rate": 0.0008601680672268907, "loss": 0.4646, "step": 5098 }, { "epoch": 2.8486033519553073, "grad_norm": 2.894409656524658, "learning_rate": 0.000860140056022409, "loss": 0.3383, "step": 5099 }, { "epoch": 2.8491620111731844, "grad_norm": 1.2867300510406494, "learning_rate": 0.0008601120448179272, "loss": 0.5005, "step": 5100 }, { "epoch": 2.8497206703910614, "grad_norm": 0.4173518121242523, "learning_rate": 0.0008600840336134455, "loss": 0.3868, "step": 5101 }, { "epoch": 2.850279329608939, "grad_norm": 0.4465699791908264, "learning_rate": 0.0008600560224089637, "loss": 0.3371, "step": 5102 }, { "epoch": 2.8508379888268154, "grad_norm": 0.6916064620018005, "learning_rate": 0.0008600280112044818, "loss": 0.4853, "step": 5103 }, { "epoch": 2.851396648044693, "grad_norm": 0.5090463757514954, "learning_rate": 0.00086, "loss": 0.4856, "step": 5104 }, { "epoch": 2.85195530726257, "grad_norm": 0.6066995859146118, "learning_rate": 0.0008599719887955182, "loss": 0.4082, "step": 5105 }, { "epoch": 2.852513966480447, "grad_norm": 0.6781220436096191, "learning_rate": 0.0008599439775910365, "loss": 0.549, "step": 5106 }, { "epoch": 2.853072625698324, "grad_norm": 2.6489884853363037, "learning_rate": 0.0008599159663865547, "loss": 0.378, "step": 5107 }, { "epoch": 2.853631284916201, "grad_norm": 0.6756976842880249, "learning_rate": 0.0008598879551820728, "loss": 0.6914, "step": 5108 }, { "epoch": 2.8541899441340783, "grad_norm": 0.5543894171714783, "learning_rate": 0.000859859943977591, "loss": 0.4202, "step": 5109 }, { "epoch": 2.8547486033519553, "grad_norm": 0.5163624882698059, "learning_rate": 0.0008598319327731092, "loss": 0.3955, "step": 5110 }, { "epoch": 2.8553072625698324, "grad_norm": 0.6780015826225281, "learning_rate": 0.0008598039215686275, "loss": 0.5653, "step": 5111 }, { "epoch": 2.8558659217877094, "grad_norm": 0.5120530128479004, "learning_rate": 0.0008597759103641457, "loss": 0.4575, "step": 5112 }, { "epoch": 2.8564245810055864, "grad_norm": 0.6539584994316101, "learning_rate": 0.0008597478991596639, "loss": 0.6027, "step": 5113 }, { "epoch": 2.856983240223464, "grad_norm": 0.587616503238678, "learning_rate": 0.000859719887955182, "loss": 0.4516, "step": 5114 }, { "epoch": 2.857541899441341, "grad_norm": 1.3105885982513428, "learning_rate": 0.0008596918767507002, "loss": 0.4366, "step": 5115 }, { "epoch": 2.858100558659218, "grad_norm": 0.6124499440193176, "learning_rate": 0.0008596638655462186, "loss": 0.4136, "step": 5116 }, { "epoch": 2.858659217877095, "grad_norm": 1.268271565437317, "learning_rate": 0.0008596358543417368, "loss": 0.4719, "step": 5117 }, { "epoch": 2.859217877094972, "grad_norm": 3.596426486968994, "learning_rate": 0.000859607843137255, "loss": 0.629, "step": 5118 }, { "epoch": 2.8597765363128493, "grad_norm": 0.5195632576942444, "learning_rate": 0.0008595798319327731, "loss": 0.6498, "step": 5119 }, { "epoch": 2.8603351955307263, "grad_norm": 0.6706558465957642, "learning_rate": 0.0008595518207282913, "loss": 0.6026, "step": 5120 }, { "epoch": 2.8608938547486034, "grad_norm": 0.6112656593322754, "learning_rate": 0.0008595238095238096, "loss": 0.4883, "step": 5121 }, { "epoch": 2.8614525139664804, "grad_norm": 0.5213063955307007, "learning_rate": 0.0008594957983193278, "loss": 0.4466, "step": 5122 }, { "epoch": 2.8620111731843574, "grad_norm": 0.6336454749107361, "learning_rate": 0.000859467787114846, "loss": 0.6278, "step": 5123 }, { "epoch": 2.862569832402235, "grad_norm": 0.7374244332313538, "learning_rate": 0.0008594397759103641, "loss": 0.6401, "step": 5124 }, { "epoch": 2.863128491620112, "grad_norm": 0.6692697405815125, "learning_rate": 0.0008594117647058823, "loss": 0.4764, "step": 5125 }, { "epoch": 2.863687150837989, "grad_norm": 0.7981042265892029, "learning_rate": 0.0008593837535014006, "loss": 0.603, "step": 5126 }, { "epoch": 2.864245810055866, "grad_norm": 0.9843001365661621, "learning_rate": 0.0008593557422969188, "loss": 0.6013, "step": 5127 }, { "epoch": 2.864804469273743, "grad_norm": 0.6881023049354553, "learning_rate": 0.000859327731092437, "loss": 0.4202, "step": 5128 }, { "epoch": 2.8653631284916203, "grad_norm": 0.805983304977417, "learning_rate": 0.0008592997198879552, "loss": 0.4521, "step": 5129 }, { "epoch": 2.8659217877094973, "grad_norm": 0.7637940645217896, "learning_rate": 0.0008592717086834733, "loss": 0.471, "step": 5130 }, { "epoch": 2.8664804469273744, "grad_norm": 0.523261547088623, "learning_rate": 0.0008592436974789915, "loss": 0.4238, "step": 5131 }, { "epoch": 2.8670391061452514, "grad_norm": 3.7323405742645264, "learning_rate": 0.0008592156862745099, "loss": 0.5386, "step": 5132 }, { "epoch": 2.8675977653631284, "grad_norm": 0.5389082431793213, "learning_rate": 0.0008591876750700281, "loss": 0.504, "step": 5133 }, { "epoch": 2.868156424581006, "grad_norm": 0.43062400817871094, "learning_rate": 0.0008591596638655463, "loss": 0.3815, "step": 5134 }, { "epoch": 2.868715083798883, "grad_norm": 0.508239209651947, "learning_rate": 0.0008591316526610644, "loss": 0.5233, "step": 5135 }, { "epoch": 2.86927374301676, "grad_norm": 1.392482042312622, "learning_rate": 0.0008591036414565826, "loss": 0.4106, "step": 5136 }, { "epoch": 2.869832402234637, "grad_norm": 0.6519489884376526, "learning_rate": 0.0008590756302521009, "loss": 0.4012, "step": 5137 }, { "epoch": 2.870391061452514, "grad_norm": 1.091198205947876, "learning_rate": 0.0008590476190476191, "loss": 0.6055, "step": 5138 }, { "epoch": 2.8709497206703913, "grad_norm": 0.5800414085388184, "learning_rate": 0.0008590196078431373, "loss": 0.4781, "step": 5139 }, { "epoch": 2.871508379888268, "grad_norm": 0.49878600239753723, "learning_rate": 0.0008589915966386554, "loss": 0.4931, "step": 5140 }, { "epoch": 2.8720670391061454, "grad_norm": 0.5097025036811829, "learning_rate": 0.0008589635854341736, "loss": 0.4896, "step": 5141 }, { "epoch": 2.8726256983240224, "grad_norm": 3.178318738937378, "learning_rate": 0.0008589355742296919, "loss": 0.4201, "step": 5142 }, { "epoch": 2.8731843575418994, "grad_norm": 0.6847972273826599, "learning_rate": 0.0008589075630252101, "loss": 0.5028, "step": 5143 }, { "epoch": 2.8737430167597764, "grad_norm": 1.0314918756484985, "learning_rate": 0.0008588795518207283, "loss": 0.4317, "step": 5144 }, { "epoch": 2.8743016759776534, "grad_norm": 0.4729918837547302, "learning_rate": 0.0008588515406162465, "loss": 0.4613, "step": 5145 }, { "epoch": 2.874860335195531, "grad_norm": 0.4969775676727295, "learning_rate": 0.0008588235294117646, "loss": 0.447, "step": 5146 }, { "epoch": 2.875418994413408, "grad_norm": 0.619356632232666, "learning_rate": 0.0008587955182072829, "loss": 0.502, "step": 5147 }, { "epoch": 2.875977653631285, "grad_norm": 0.4823377728462219, "learning_rate": 0.0008587675070028012, "loss": 0.4737, "step": 5148 }, { "epoch": 2.876536312849162, "grad_norm": 0.48929011821746826, "learning_rate": 0.0008587394957983194, "loss": 0.479, "step": 5149 }, { "epoch": 2.877094972067039, "grad_norm": 0.42125391960144043, "learning_rate": 0.0008587114845938376, "loss": 0.4672, "step": 5150 }, { "epoch": 2.8776536312849164, "grad_norm": 0.889860212802887, "learning_rate": 0.0008586834733893557, "loss": 0.4701, "step": 5151 }, { "epoch": 2.8782122905027934, "grad_norm": 6.053067684173584, "learning_rate": 0.000858655462184874, "loss": 0.4678, "step": 5152 }, { "epoch": 2.8787709497206704, "grad_norm": 3.453068256378174, "learning_rate": 0.0008586274509803922, "loss": 0.6017, "step": 5153 }, { "epoch": 2.8793296089385474, "grad_norm": 0.39680376648902893, "learning_rate": 0.0008585994397759104, "loss": 0.3969, "step": 5154 }, { "epoch": 2.8798882681564244, "grad_norm": 0.6880179047584534, "learning_rate": 0.0008585714285714286, "loss": 0.421, "step": 5155 }, { "epoch": 2.880446927374302, "grad_norm": 0.7897211313247681, "learning_rate": 0.0008585434173669467, "loss": 0.4925, "step": 5156 }, { "epoch": 2.881005586592179, "grad_norm": 0.6023778915405273, "learning_rate": 0.000858515406162465, "loss": 0.518, "step": 5157 }, { "epoch": 2.881564245810056, "grad_norm": 0.4900436997413635, "learning_rate": 0.0008584873949579832, "loss": 0.4901, "step": 5158 }, { "epoch": 2.882122905027933, "grad_norm": 1.5217920541763306, "learning_rate": 0.0008584593837535014, "loss": 0.4754, "step": 5159 }, { "epoch": 2.88268156424581, "grad_norm": 0.4940977990627289, "learning_rate": 0.0008584313725490196, "loss": 0.4261, "step": 5160 }, { "epoch": 2.8832402234636874, "grad_norm": 2.331089735031128, "learning_rate": 0.0008584033613445378, "loss": 0.4189, "step": 5161 }, { "epoch": 2.8837988826815644, "grad_norm": 0.4676640033721924, "learning_rate": 0.000858375350140056, "loss": 0.3533, "step": 5162 }, { "epoch": 2.8843575418994414, "grad_norm": 1.4016258716583252, "learning_rate": 0.0008583473389355742, "loss": 0.4997, "step": 5163 }, { "epoch": 2.8849162011173184, "grad_norm": 0.6847200989723206, "learning_rate": 0.0008583193277310924, "loss": 0.3922, "step": 5164 }, { "epoch": 2.8854748603351954, "grad_norm": 0.6961644291877747, "learning_rate": 0.0008582913165266107, "loss": 0.6224, "step": 5165 }, { "epoch": 2.886033519553073, "grad_norm": 0.6959324479103088, "learning_rate": 0.0008582633053221289, "loss": 0.5146, "step": 5166 }, { "epoch": 2.88659217877095, "grad_norm": 0.689371645450592, "learning_rate": 0.0008582352941176471, "loss": 0.4311, "step": 5167 }, { "epoch": 2.887150837988827, "grad_norm": 0.8468054533004761, "learning_rate": 0.0008582072829131653, "loss": 0.4509, "step": 5168 }, { "epoch": 2.887709497206704, "grad_norm": 0.4900321364402771, "learning_rate": 0.0008581792717086835, "loss": 0.5418, "step": 5169 }, { "epoch": 2.888268156424581, "grad_norm": 0.6423742771148682, "learning_rate": 0.0008581512605042017, "loss": 0.6463, "step": 5170 }, { "epoch": 2.8888268156424584, "grad_norm": 0.8085363507270813, "learning_rate": 0.0008581232492997199, "loss": 0.4523, "step": 5171 }, { "epoch": 2.889385474860335, "grad_norm": 0.6199180483818054, "learning_rate": 0.0008580952380952382, "loss": 0.4751, "step": 5172 }, { "epoch": 2.8899441340782124, "grad_norm": 0.43933606147766113, "learning_rate": 0.0008580672268907563, "loss": 0.4845, "step": 5173 }, { "epoch": 2.8905027932960894, "grad_norm": 1.1304324865341187, "learning_rate": 0.0008580392156862745, "loss": 0.6311, "step": 5174 }, { "epoch": 2.8910614525139664, "grad_norm": 0.573530912399292, "learning_rate": 0.0008580112044817927, "loss": 0.571, "step": 5175 }, { "epoch": 2.8916201117318434, "grad_norm": 0.5387104749679565, "learning_rate": 0.0008579831932773109, "loss": 0.57, "step": 5176 }, { "epoch": 2.8921787709497204, "grad_norm": 0.5040245652198792, "learning_rate": 0.0008579551820728292, "loss": 0.4616, "step": 5177 }, { "epoch": 2.892737430167598, "grad_norm": 0.506863534450531, "learning_rate": 0.0008579271708683473, "loss": 0.4317, "step": 5178 }, { "epoch": 2.893296089385475, "grad_norm": 0.5977319478988647, "learning_rate": 0.0008578991596638655, "loss": 0.4988, "step": 5179 }, { "epoch": 2.893854748603352, "grad_norm": 0.6969428658485413, "learning_rate": 0.0008578711484593837, "loss": 0.5135, "step": 5180 }, { "epoch": 2.894413407821229, "grad_norm": 0.6479884386062622, "learning_rate": 0.000857843137254902, "loss": 0.498, "step": 5181 }, { "epoch": 2.894972067039106, "grad_norm": 0.5598331093788147, "learning_rate": 0.0008578151260504203, "loss": 0.438, "step": 5182 }, { "epoch": 2.8955307262569834, "grad_norm": 0.4293232858181, "learning_rate": 0.0008577871148459384, "loss": 0.4494, "step": 5183 }, { "epoch": 2.8960893854748604, "grad_norm": 0.4953780770301819, "learning_rate": 0.0008577591036414566, "loss": 0.3984, "step": 5184 }, { "epoch": 2.8966480446927374, "grad_norm": 0.5538314580917358, "learning_rate": 0.0008577310924369748, "loss": 0.4605, "step": 5185 }, { "epoch": 2.8972067039106144, "grad_norm": 0.610962450504303, "learning_rate": 0.000857703081232493, "loss": 0.5514, "step": 5186 }, { "epoch": 2.8977653631284914, "grad_norm": 0.4831545948982239, "learning_rate": 0.0008576750700280113, "loss": 0.418, "step": 5187 }, { "epoch": 2.898324022346369, "grad_norm": 0.6809319853782654, "learning_rate": 0.0008576470588235295, "loss": 0.5996, "step": 5188 }, { "epoch": 2.898882681564246, "grad_norm": 0.5042480826377869, "learning_rate": 0.0008576190476190476, "loss": 0.5174, "step": 5189 }, { "epoch": 2.899441340782123, "grad_norm": 0.7760380506515503, "learning_rate": 0.0008575910364145658, "loss": 0.4816, "step": 5190 }, { "epoch": 2.9, "grad_norm": 0.8557183742523193, "learning_rate": 0.000857563025210084, "loss": 0.3988, "step": 5191 }, { "epoch": 2.900558659217877, "grad_norm": 0.5912318825721741, "learning_rate": 0.0008575350140056023, "loss": 0.5366, "step": 5192 }, { "epoch": 2.9011173184357544, "grad_norm": 0.5452551245689392, "learning_rate": 0.0008575070028011205, "loss": 0.4688, "step": 5193 }, { "epoch": 2.9016759776536314, "grad_norm": 0.5104190707206726, "learning_rate": 0.0008574789915966386, "loss": 0.51, "step": 5194 }, { "epoch": 2.9022346368715084, "grad_norm": 0.6214990019798279, "learning_rate": 0.0008574509803921568, "loss": 0.5122, "step": 5195 }, { "epoch": 2.9027932960893854, "grad_norm": 0.5402427911758423, "learning_rate": 0.000857422969187675, "loss": 0.489, "step": 5196 }, { "epoch": 2.9033519553072624, "grad_norm": 2.7312228679656982, "learning_rate": 0.0008573949579831934, "loss": 0.4598, "step": 5197 }, { "epoch": 2.90391061452514, "grad_norm": 1.3614888191223145, "learning_rate": 0.0008573669467787116, "loss": 0.412, "step": 5198 }, { "epoch": 2.904469273743017, "grad_norm": 0.8111847639083862, "learning_rate": 0.0008573389355742297, "loss": 0.4629, "step": 5199 }, { "epoch": 2.905027932960894, "grad_norm": 0.5172328948974609, "learning_rate": 0.0008573109243697479, "loss": 0.5623, "step": 5200 }, { "epoch": 2.905586592178771, "grad_norm": 0.6620856523513794, "learning_rate": 0.0008572829131652661, "loss": 0.476, "step": 5201 }, { "epoch": 2.906145251396648, "grad_norm": 0.5316200852394104, "learning_rate": 0.0008572549019607844, "loss": 0.5059, "step": 5202 }, { "epoch": 2.9067039106145254, "grad_norm": 0.6617650389671326, "learning_rate": 0.0008572268907563026, "loss": 0.4201, "step": 5203 }, { "epoch": 2.9072625698324024, "grad_norm": 0.5596368908882141, "learning_rate": 0.0008571988795518208, "loss": 0.3933, "step": 5204 }, { "epoch": 2.9078212290502794, "grad_norm": 0.8836463093757629, "learning_rate": 0.0008571708683473389, "loss": 0.524, "step": 5205 }, { "epoch": 2.9083798882681564, "grad_norm": 0.8760644197463989, "learning_rate": 0.0008571428571428571, "loss": 0.5705, "step": 5206 }, { "epoch": 2.9089385474860334, "grad_norm": 0.5425623655319214, "learning_rate": 0.0008571148459383754, "loss": 0.5588, "step": 5207 }, { "epoch": 2.909497206703911, "grad_norm": 0.6250994205474854, "learning_rate": 0.0008570868347338936, "loss": 0.6644, "step": 5208 }, { "epoch": 2.9100558659217874, "grad_norm": 1.6671067476272583, "learning_rate": 0.0008570588235294118, "loss": 0.6824, "step": 5209 }, { "epoch": 2.910614525139665, "grad_norm": 0.7472813725471497, "learning_rate": 0.0008570308123249299, "loss": 0.6222, "step": 5210 }, { "epoch": 2.911173184357542, "grad_norm": 0.9850180149078369, "learning_rate": 0.0008570028011204481, "loss": 0.497, "step": 5211 }, { "epoch": 2.911731843575419, "grad_norm": 2.756469964981079, "learning_rate": 0.0008569747899159664, "loss": 0.4527, "step": 5212 }, { "epoch": 2.912290502793296, "grad_norm": 0.7195557951927185, "learning_rate": 0.0008569467787114846, "loss": 0.5287, "step": 5213 }, { "epoch": 2.912849162011173, "grad_norm": 0.47098296880722046, "learning_rate": 0.0008569187675070029, "loss": 0.4728, "step": 5214 }, { "epoch": 2.9134078212290504, "grad_norm": 0.7351492047309875, "learning_rate": 0.000856890756302521, "loss": 0.474, "step": 5215 }, { "epoch": 2.9139664804469274, "grad_norm": 7.006438255310059, "learning_rate": 0.0008568627450980392, "loss": 0.5086, "step": 5216 }, { "epoch": 2.9145251396648044, "grad_norm": 0.4575154483318329, "learning_rate": 0.0008568347338935575, "loss": 0.4303, "step": 5217 }, { "epoch": 2.9150837988826814, "grad_norm": 0.5102495551109314, "learning_rate": 0.0008568067226890757, "loss": 0.504, "step": 5218 }, { "epoch": 2.9156424581005584, "grad_norm": 0.5614028573036194, "learning_rate": 0.0008567787114845939, "loss": 0.4173, "step": 5219 }, { "epoch": 2.916201117318436, "grad_norm": 0.4462522268295288, "learning_rate": 0.0008567507002801121, "loss": 0.4039, "step": 5220 }, { "epoch": 2.916759776536313, "grad_norm": 2.253756046295166, "learning_rate": 0.0008567226890756302, "loss": 0.4502, "step": 5221 }, { "epoch": 2.91731843575419, "grad_norm": 0.6509634256362915, "learning_rate": 0.0008566946778711485, "loss": 0.4553, "step": 5222 }, { "epoch": 2.917877094972067, "grad_norm": 0.5005168318748474, "learning_rate": 0.0008566666666666667, "loss": 0.4467, "step": 5223 }, { "epoch": 2.918435754189944, "grad_norm": 0.9374316334724426, "learning_rate": 0.0008566386554621849, "loss": 0.5944, "step": 5224 }, { "epoch": 2.9189944134078214, "grad_norm": 1.4228215217590332, "learning_rate": 0.0008566106442577031, "loss": 0.5214, "step": 5225 }, { "epoch": 2.9195530726256984, "grad_norm": 0.44831714034080505, "learning_rate": 0.0008565826330532212, "loss": 0.3466, "step": 5226 }, { "epoch": 2.9201117318435754, "grad_norm": 3.154331684112549, "learning_rate": 0.0008565546218487395, "loss": 0.6113, "step": 5227 }, { "epoch": 2.9206703910614524, "grad_norm": 1.5173183679580688, "learning_rate": 0.0008565266106442577, "loss": 0.3932, "step": 5228 }, { "epoch": 2.9212290502793294, "grad_norm": 0.4372890591621399, "learning_rate": 0.0008564985994397759, "loss": 0.3439, "step": 5229 }, { "epoch": 2.921787709497207, "grad_norm": 0.44898954033851624, "learning_rate": 0.0008564705882352942, "loss": 0.4945, "step": 5230 }, { "epoch": 2.922346368715084, "grad_norm": 0.4339487552642822, "learning_rate": 0.0008564425770308122, "loss": 0.4724, "step": 5231 }, { "epoch": 2.922905027932961, "grad_norm": 3.6003055572509766, "learning_rate": 0.0008564145658263306, "loss": 0.5035, "step": 5232 }, { "epoch": 2.923463687150838, "grad_norm": 11.896994590759277, "learning_rate": 0.0008563865546218488, "loss": 0.7009, "step": 5233 }, { "epoch": 2.924022346368715, "grad_norm": 0.3272635042667389, "learning_rate": 0.000856358543417367, "loss": 0.3322, "step": 5234 }, { "epoch": 2.9245810055865924, "grad_norm": 0.4550877809524536, "learning_rate": 0.0008563305322128852, "loss": 0.5193, "step": 5235 }, { "epoch": 2.9251396648044694, "grad_norm": 0.4681559205055237, "learning_rate": 0.0008563025210084034, "loss": 0.4387, "step": 5236 }, { "epoch": 2.9256983240223464, "grad_norm": 0.7069253325462341, "learning_rate": 0.0008562745098039216, "loss": 0.4566, "step": 5237 }, { "epoch": 2.9262569832402234, "grad_norm": 0.3951936662197113, "learning_rate": 0.0008562464985994398, "loss": 0.4267, "step": 5238 }, { "epoch": 2.9268156424581004, "grad_norm": 2.9395580291748047, "learning_rate": 0.000856218487394958, "loss": 0.6986, "step": 5239 }, { "epoch": 2.927374301675978, "grad_norm": 0.4268450140953064, "learning_rate": 0.0008561904761904762, "loss": 0.3528, "step": 5240 }, { "epoch": 2.927932960893855, "grad_norm": 0.5216578245162964, "learning_rate": 0.0008561624649859944, "loss": 0.4481, "step": 5241 }, { "epoch": 2.928491620111732, "grad_norm": 0.8292457461357117, "learning_rate": 0.0008561344537815126, "loss": 0.6018, "step": 5242 }, { "epoch": 2.929050279329609, "grad_norm": 0.4213391840457916, "learning_rate": 0.0008561064425770308, "loss": 0.3861, "step": 5243 }, { "epoch": 2.929608938547486, "grad_norm": 0.887485921382904, "learning_rate": 0.000856078431372549, "loss": 0.4673, "step": 5244 }, { "epoch": 2.9301675977653634, "grad_norm": 0.4789484739303589, "learning_rate": 0.0008560504201680672, "loss": 0.3961, "step": 5245 }, { "epoch": 2.93072625698324, "grad_norm": 0.6058626770973206, "learning_rate": 0.0008560224089635854, "loss": 0.502, "step": 5246 }, { "epoch": 2.9312849162011174, "grad_norm": 0.5269392132759094, "learning_rate": 0.0008559943977591037, "loss": 0.4484, "step": 5247 }, { "epoch": 2.9318435754189944, "grad_norm": 0.6030774712562561, "learning_rate": 0.0008559663865546219, "loss": 0.3759, "step": 5248 }, { "epoch": 2.9324022346368714, "grad_norm": 0.6322498321533203, "learning_rate": 0.0008559383753501401, "loss": 0.4797, "step": 5249 }, { "epoch": 2.9329608938547485, "grad_norm": 0.7021874189376831, "learning_rate": 0.0008559103641456583, "loss": 0.6254, "step": 5250 }, { "epoch": 2.9335195530726255, "grad_norm": 1.421185851097107, "learning_rate": 0.0008558823529411765, "loss": 0.5824, "step": 5251 }, { "epoch": 2.934078212290503, "grad_norm": 0.560073971748352, "learning_rate": 0.0008558543417366948, "loss": 0.629, "step": 5252 }, { "epoch": 2.93463687150838, "grad_norm": 0.7694107890129089, "learning_rate": 0.0008558263305322129, "loss": 0.4859, "step": 5253 }, { "epoch": 2.935195530726257, "grad_norm": 0.6547850966453552, "learning_rate": 0.0008557983193277311, "loss": 0.4928, "step": 5254 }, { "epoch": 2.935754189944134, "grad_norm": 0.7182886600494385, "learning_rate": 0.0008557703081232493, "loss": 0.6571, "step": 5255 }, { "epoch": 2.936312849162011, "grad_norm": 0.5350194573402405, "learning_rate": 0.0008557422969187675, "loss": 0.477, "step": 5256 }, { "epoch": 2.9368715083798884, "grad_norm": 0.6255475282669067, "learning_rate": 0.0008557142857142858, "loss": 0.4823, "step": 5257 }, { "epoch": 2.9374301675977654, "grad_norm": 3.8309788703918457, "learning_rate": 0.0008556862745098039, "loss": 0.467, "step": 5258 }, { "epoch": 2.9379888268156424, "grad_norm": 0.5530065894126892, "learning_rate": 0.0008556582633053221, "loss": 0.414, "step": 5259 }, { "epoch": 2.9385474860335195, "grad_norm": 0.4978122115135193, "learning_rate": 0.0008556302521008403, "loss": 0.4778, "step": 5260 }, { "epoch": 2.9391061452513965, "grad_norm": 1.2582539319992065, "learning_rate": 0.0008556022408963585, "loss": 0.5299, "step": 5261 }, { "epoch": 2.939664804469274, "grad_norm": 0.7021863460540771, "learning_rate": 0.0008555742296918769, "loss": 0.4803, "step": 5262 }, { "epoch": 2.940223463687151, "grad_norm": 0.601661205291748, "learning_rate": 0.000855546218487395, "loss": 0.4241, "step": 5263 }, { "epoch": 2.940782122905028, "grad_norm": 0.8026847243309021, "learning_rate": 0.0008555182072829132, "loss": 0.4989, "step": 5264 }, { "epoch": 2.941340782122905, "grad_norm": 1.6611796617507935, "learning_rate": 0.0008554901960784314, "loss": 0.4394, "step": 5265 }, { "epoch": 2.941899441340782, "grad_norm": 1.6839842796325684, "learning_rate": 0.0008554621848739496, "loss": 0.437, "step": 5266 }, { "epoch": 2.9424581005586594, "grad_norm": 0.40616342425346375, "learning_rate": 0.0008554341736694679, "loss": 0.4184, "step": 5267 }, { "epoch": 2.9430167597765364, "grad_norm": 0.5075446367263794, "learning_rate": 0.0008554061624649861, "loss": 0.4567, "step": 5268 }, { "epoch": 2.9435754189944134, "grad_norm": 0.4084770679473877, "learning_rate": 0.0008553781512605042, "loss": 0.4064, "step": 5269 }, { "epoch": 2.9441340782122905, "grad_norm": 0.5572300553321838, "learning_rate": 0.0008553501400560224, "loss": 0.367, "step": 5270 }, { "epoch": 2.9446927374301675, "grad_norm": 0.44329598546028137, "learning_rate": 0.0008553221288515406, "loss": 0.4105, "step": 5271 }, { "epoch": 2.945251396648045, "grad_norm": 1.13699209690094, "learning_rate": 0.0008552941176470589, "loss": 0.4623, "step": 5272 }, { "epoch": 2.945810055865922, "grad_norm": 0.5589644908905029, "learning_rate": 0.0008552661064425771, "loss": 0.4697, "step": 5273 }, { "epoch": 2.946368715083799, "grad_norm": 0.5557832717895508, "learning_rate": 0.0008552380952380952, "loss": 0.5364, "step": 5274 }, { "epoch": 2.946927374301676, "grad_norm": 0.9397745728492737, "learning_rate": 0.0008552100840336134, "loss": 0.4608, "step": 5275 }, { "epoch": 2.947486033519553, "grad_norm": 1.5104057788848877, "learning_rate": 0.0008551820728291316, "loss": 0.5783, "step": 5276 }, { "epoch": 2.9480446927374304, "grad_norm": 0.6010105013847351, "learning_rate": 0.0008551540616246499, "loss": 0.5162, "step": 5277 }, { "epoch": 2.9486033519553074, "grad_norm": 0.43357473611831665, "learning_rate": 0.0008551260504201681, "loss": 0.5003, "step": 5278 }, { "epoch": 2.9491620111731844, "grad_norm": 6.203266620635986, "learning_rate": 0.0008550980392156862, "loss": 0.4421, "step": 5279 }, { "epoch": 2.9497206703910615, "grad_norm": 1.873213291168213, "learning_rate": 0.0008550700280112045, "loss": 0.5304, "step": 5280 }, { "epoch": 2.9502793296089385, "grad_norm": 0.6766135692596436, "learning_rate": 0.0008550420168067227, "loss": 0.409, "step": 5281 }, { "epoch": 2.950837988826816, "grad_norm": 7.697422981262207, "learning_rate": 0.000855014005602241, "loss": 0.4847, "step": 5282 }, { "epoch": 2.9513966480446925, "grad_norm": 0.6321967840194702, "learning_rate": 0.0008549859943977592, "loss": 0.525, "step": 5283 }, { "epoch": 2.95195530726257, "grad_norm": 0.7350785732269287, "learning_rate": 0.0008549579831932774, "loss": 0.6233, "step": 5284 }, { "epoch": 2.952513966480447, "grad_norm": 0.5767834782600403, "learning_rate": 0.0008549299719887955, "loss": 0.4759, "step": 5285 }, { "epoch": 2.953072625698324, "grad_norm": 0.5822747349739075, "learning_rate": 0.0008549019607843137, "loss": 0.471, "step": 5286 }, { "epoch": 2.953631284916201, "grad_norm": 0.6781153678894043, "learning_rate": 0.000854873949579832, "loss": 0.4501, "step": 5287 }, { "epoch": 2.954189944134078, "grad_norm": 1.9439740180969238, "learning_rate": 0.0008548459383753502, "loss": 0.4536, "step": 5288 }, { "epoch": 2.9547486033519554, "grad_norm": 0.7673845887184143, "learning_rate": 0.0008548179271708684, "loss": 0.5123, "step": 5289 }, { "epoch": 2.9553072625698324, "grad_norm": 1.6544477939605713, "learning_rate": 0.0008547899159663865, "loss": 0.6024, "step": 5290 }, { "epoch": 2.9558659217877095, "grad_norm": 0.5968123078346252, "learning_rate": 0.0008547619047619047, "loss": 0.4345, "step": 5291 }, { "epoch": 2.9564245810055865, "grad_norm": 0.6262439489364624, "learning_rate": 0.000854733893557423, "loss": 0.5571, "step": 5292 }, { "epoch": 2.9569832402234635, "grad_norm": 0.7656193375587463, "learning_rate": 0.0008547058823529412, "loss": 0.6512, "step": 5293 }, { "epoch": 2.957541899441341, "grad_norm": 0.5560053586959839, "learning_rate": 0.0008546778711484594, "loss": 0.7462, "step": 5294 }, { "epoch": 2.958100558659218, "grad_norm": 0.5246129035949707, "learning_rate": 0.0008546498599439775, "loss": 0.4772, "step": 5295 }, { "epoch": 2.958659217877095, "grad_norm": 0.7463229298591614, "learning_rate": 0.0008546218487394957, "loss": 0.4147, "step": 5296 }, { "epoch": 2.959217877094972, "grad_norm": 0.5940794944763184, "learning_rate": 0.0008545938375350141, "loss": 0.3936, "step": 5297 }, { "epoch": 2.959776536312849, "grad_norm": 0.8366613984107971, "learning_rate": 0.0008545658263305323, "loss": 0.4459, "step": 5298 }, { "epoch": 2.9603351955307264, "grad_norm": 0.6187811493873596, "learning_rate": 0.0008545378151260505, "loss": 0.4231, "step": 5299 }, { "epoch": 2.9608938547486034, "grad_norm": 0.7047716379165649, "learning_rate": 0.0008545098039215687, "loss": 0.4942, "step": 5300 }, { "epoch": 2.9614525139664805, "grad_norm": 0.8810130953788757, "learning_rate": 0.0008544817927170868, "loss": 0.487, "step": 5301 }, { "epoch": 2.9620111731843575, "grad_norm": 0.864976704120636, "learning_rate": 0.0008544537815126051, "loss": 0.5124, "step": 5302 }, { "epoch": 2.9625698324022345, "grad_norm": 0.6067531704902649, "learning_rate": 0.0008544257703081233, "loss": 0.5235, "step": 5303 }, { "epoch": 2.963128491620112, "grad_norm": 0.6633782982826233, "learning_rate": 0.0008543977591036415, "loss": 0.5902, "step": 5304 }, { "epoch": 2.963687150837989, "grad_norm": 0.5539004802703857, "learning_rate": 0.0008543697478991597, "loss": 0.4846, "step": 5305 }, { "epoch": 2.964245810055866, "grad_norm": 0.8036938309669495, "learning_rate": 0.0008543417366946778, "loss": 0.5442, "step": 5306 }, { "epoch": 2.964804469273743, "grad_norm": 0.47018709778785706, "learning_rate": 0.0008543137254901961, "loss": 0.541, "step": 5307 }, { "epoch": 2.96536312849162, "grad_norm": 0.6300920844078064, "learning_rate": 0.0008542857142857143, "loss": 0.501, "step": 5308 }, { "epoch": 2.9659217877094974, "grad_norm": 1.424096703529358, "learning_rate": 0.0008542577030812325, "loss": 0.4753, "step": 5309 }, { "epoch": 2.9664804469273744, "grad_norm": 0.5059372782707214, "learning_rate": 0.0008542296918767507, "loss": 0.4065, "step": 5310 }, { "epoch": 2.9670391061452515, "grad_norm": 0.9171116948127747, "learning_rate": 0.0008542016806722688, "loss": 0.4752, "step": 5311 }, { "epoch": 2.9675977653631285, "grad_norm": 1.5030795335769653, "learning_rate": 0.0008541736694677872, "loss": 0.5334, "step": 5312 }, { "epoch": 2.9681564245810055, "grad_norm": 0.91139817237854, "learning_rate": 0.0008541456582633054, "loss": 0.464, "step": 5313 }, { "epoch": 2.968715083798883, "grad_norm": 0.5637885928153992, "learning_rate": 0.0008541176470588236, "loss": 0.4687, "step": 5314 }, { "epoch": 2.9692737430167595, "grad_norm": 0.44936779141426086, "learning_rate": 0.0008540896358543418, "loss": 0.5043, "step": 5315 }, { "epoch": 2.969832402234637, "grad_norm": 1.3325906991958618, "learning_rate": 0.00085406162464986, "loss": 0.4937, "step": 5316 }, { "epoch": 2.970391061452514, "grad_norm": 3.632030487060547, "learning_rate": 0.0008540336134453782, "loss": 0.3722, "step": 5317 }, { "epoch": 2.970949720670391, "grad_norm": 0.7179813981056213, "learning_rate": 0.0008540056022408964, "loss": 0.4831, "step": 5318 }, { "epoch": 2.971508379888268, "grad_norm": 0.8740227818489075, "learning_rate": 0.0008539775910364146, "loss": 0.5092, "step": 5319 }, { "epoch": 2.972067039106145, "grad_norm": 1.2245211601257324, "learning_rate": 0.0008539495798319328, "loss": 0.5828, "step": 5320 }, { "epoch": 2.9726256983240225, "grad_norm": 0.5264342427253723, "learning_rate": 0.000853921568627451, "loss": 0.4307, "step": 5321 }, { "epoch": 2.9731843575418995, "grad_norm": 1.093178629875183, "learning_rate": 0.0008538935574229692, "loss": 0.5418, "step": 5322 }, { "epoch": 2.9737430167597765, "grad_norm": 1.6047625541687012, "learning_rate": 0.0008538655462184874, "loss": 0.5896, "step": 5323 }, { "epoch": 2.9743016759776535, "grad_norm": 0.6026519536972046, "learning_rate": 0.0008538375350140056, "loss": 0.4694, "step": 5324 }, { "epoch": 2.9748603351955305, "grad_norm": 0.44366228580474854, "learning_rate": 0.0008538095238095238, "loss": 0.475, "step": 5325 }, { "epoch": 2.975418994413408, "grad_norm": 0.9205455780029297, "learning_rate": 0.000853781512605042, "loss": 0.5779, "step": 5326 }, { "epoch": 2.975977653631285, "grad_norm": 0.8671701550483704, "learning_rate": 0.0008537535014005602, "loss": 0.4666, "step": 5327 }, { "epoch": 2.976536312849162, "grad_norm": 0.6516280174255371, "learning_rate": 0.0008537254901960784, "loss": 0.3994, "step": 5328 }, { "epoch": 2.977094972067039, "grad_norm": 0.47651827335357666, "learning_rate": 0.0008536974789915967, "loss": 0.4897, "step": 5329 }, { "epoch": 2.977653631284916, "grad_norm": 1.0731016397476196, "learning_rate": 0.0008536694677871149, "loss": 0.5126, "step": 5330 }, { "epoch": 2.9782122905027935, "grad_norm": 0.5467098951339722, "learning_rate": 0.0008536414565826331, "loss": 0.5058, "step": 5331 }, { "epoch": 2.9787709497206705, "grad_norm": 0.6360552310943604, "learning_rate": 0.0008536134453781514, "loss": 0.6286, "step": 5332 }, { "epoch": 2.9793296089385475, "grad_norm": 0.561384916305542, "learning_rate": 0.0008535854341736695, "loss": 0.4836, "step": 5333 }, { "epoch": 2.9798882681564245, "grad_norm": 0.4336279034614563, "learning_rate": 0.0008535574229691877, "loss": 0.4819, "step": 5334 }, { "epoch": 2.9804469273743015, "grad_norm": 0.4420956075191498, "learning_rate": 0.0008535294117647059, "loss": 0.4715, "step": 5335 }, { "epoch": 2.981005586592179, "grad_norm": 6.295414924621582, "learning_rate": 0.0008535014005602241, "loss": 0.3881, "step": 5336 }, { "epoch": 2.981564245810056, "grad_norm": 0.6499784588813782, "learning_rate": 0.0008534733893557424, "loss": 0.4992, "step": 5337 }, { "epoch": 2.982122905027933, "grad_norm": 0.8571310639381409, "learning_rate": 0.0008534453781512605, "loss": 0.4905, "step": 5338 }, { "epoch": 2.98268156424581, "grad_norm": 0.514336884021759, "learning_rate": 0.0008534173669467787, "loss": 0.4949, "step": 5339 }, { "epoch": 2.983240223463687, "grad_norm": 5.9147443771362305, "learning_rate": 0.0008533893557422969, "loss": 0.4681, "step": 5340 }, { "epoch": 2.9837988826815645, "grad_norm": 1.3518317937850952, "learning_rate": 0.0008533613445378151, "loss": 0.4052, "step": 5341 }, { "epoch": 2.9843575418994415, "grad_norm": 0.9077181220054626, "learning_rate": 0.0008533333333333334, "loss": 0.482, "step": 5342 }, { "epoch": 2.9849162011173185, "grad_norm": 0.7650498747825623, "learning_rate": 0.0008533053221288515, "loss": 0.4926, "step": 5343 }, { "epoch": 2.9854748603351955, "grad_norm": 0.5614609718322754, "learning_rate": 0.0008532773109243697, "loss": 0.5085, "step": 5344 }, { "epoch": 2.9860335195530725, "grad_norm": 4.789839267730713, "learning_rate": 0.000853249299719888, "loss": 0.4963, "step": 5345 }, { "epoch": 2.98659217877095, "grad_norm": 0.6768234372138977, "learning_rate": 0.0008532212885154062, "loss": 0.3343, "step": 5346 }, { "epoch": 2.987150837988827, "grad_norm": 0.8178666830062866, "learning_rate": 0.0008531932773109245, "loss": 0.5221, "step": 5347 }, { "epoch": 2.987709497206704, "grad_norm": 0.5692423582077026, "learning_rate": 0.0008531652661064427, "loss": 0.4396, "step": 5348 }, { "epoch": 2.988268156424581, "grad_norm": 0.4675418436527252, "learning_rate": 0.0008531372549019608, "loss": 0.3618, "step": 5349 }, { "epoch": 2.988826815642458, "grad_norm": 0.6296323537826538, "learning_rate": 0.000853109243697479, "loss": 0.4867, "step": 5350 }, { "epoch": 2.9893854748603355, "grad_norm": 0.8342413902282715, "learning_rate": 0.0008530812324929972, "loss": 0.3928, "step": 5351 }, { "epoch": 2.989944134078212, "grad_norm": 0.6714411377906799, "learning_rate": 0.0008530532212885154, "loss": 0.4725, "step": 5352 }, { "epoch": 2.9905027932960895, "grad_norm": 0.4294596016407013, "learning_rate": 0.0008530252100840337, "loss": 0.4436, "step": 5353 }, { "epoch": 2.9910614525139665, "grad_norm": 0.42483019828796387, "learning_rate": 0.0008529971988795518, "loss": 0.3971, "step": 5354 }, { "epoch": 2.9916201117318435, "grad_norm": 0.6476815342903137, "learning_rate": 0.00085296918767507, "loss": 0.6176, "step": 5355 }, { "epoch": 2.9921787709497205, "grad_norm": 0.6838845014572144, "learning_rate": 0.0008529411764705882, "loss": 0.5515, "step": 5356 }, { "epoch": 2.9927374301675975, "grad_norm": 2.527945041656494, "learning_rate": 0.0008529131652661064, "loss": 0.4539, "step": 5357 }, { "epoch": 2.993296089385475, "grad_norm": 0.5473175048828125, "learning_rate": 0.0008528851540616247, "loss": 0.4663, "step": 5358 }, { "epoch": 2.993854748603352, "grad_norm": 0.5364380478858948, "learning_rate": 0.0008528571428571428, "loss": 0.5466, "step": 5359 }, { "epoch": 2.994413407821229, "grad_norm": 0.7082886099815369, "learning_rate": 0.000852829131652661, "loss": 0.3553, "step": 5360 }, { "epoch": 2.994972067039106, "grad_norm": 19.685489654541016, "learning_rate": 0.0008528011204481792, "loss": 0.4668, "step": 5361 }, { "epoch": 2.995530726256983, "grad_norm": 0.7132713794708252, "learning_rate": 0.0008527731092436975, "loss": 0.4904, "step": 5362 }, { "epoch": 2.9960893854748605, "grad_norm": 0.6538237929344177, "learning_rate": 0.0008527450980392158, "loss": 0.5379, "step": 5363 }, { "epoch": 2.9966480446927375, "grad_norm": 2.562812089920044, "learning_rate": 0.000852717086834734, "loss": 0.5682, "step": 5364 }, { "epoch": 2.9972067039106145, "grad_norm": 0.47311344742774963, "learning_rate": 0.0008526890756302521, "loss": 0.4406, "step": 5365 }, { "epoch": 2.9977653631284915, "grad_norm": 0.38964036107063293, "learning_rate": 0.0008526610644257703, "loss": 0.3839, "step": 5366 }, { "epoch": 2.9983240223463685, "grad_norm": 0.43479278683662415, "learning_rate": 0.0008526330532212885, "loss": 0.4989, "step": 5367 }, { "epoch": 2.998882681564246, "grad_norm": 0.530277669429779, "learning_rate": 0.0008526050420168068, "loss": 0.5573, "step": 5368 }, { "epoch": 2.999441340782123, "grad_norm": 2.3859963417053223, "learning_rate": 0.000852577030812325, "loss": 0.6129, "step": 5369 }, { "epoch": 3.0, "grad_norm": 0.8000469207763672, "learning_rate": 0.0008525490196078431, "loss": 0.6314, "step": 5370 }, { "epoch": 3.000558659217877, "grad_norm": 0.3947930634021759, "learning_rate": 0.0008525210084033613, "loss": 0.4098, "step": 5371 }, { "epoch": 3.001117318435754, "grad_norm": 0.6759277582168579, "learning_rate": 0.0008524929971988795, "loss": 0.5769, "step": 5372 }, { "epoch": 3.0016759776536315, "grad_norm": 0.8616467118263245, "learning_rate": 0.0008524649859943978, "loss": 0.5528, "step": 5373 }, { "epoch": 3.0022346368715085, "grad_norm": 0.4359557628631592, "learning_rate": 0.000852436974789916, "loss": 0.5051, "step": 5374 }, { "epoch": 3.0027932960893855, "grad_norm": 0.6852437257766724, "learning_rate": 0.0008524089635854341, "loss": 0.4135, "step": 5375 }, { "epoch": 3.0033519553072625, "grad_norm": 0.6340899467468262, "learning_rate": 0.0008523809523809523, "loss": 0.4783, "step": 5376 }, { "epoch": 3.0039106145251395, "grad_norm": 0.42108026146888733, "learning_rate": 0.0008523529411764705, "loss": 0.4335, "step": 5377 }, { "epoch": 3.004469273743017, "grad_norm": 0.4909720718860626, "learning_rate": 0.0008523249299719889, "loss": 0.4557, "step": 5378 }, { "epoch": 3.005027932960894, "grad_norm": 0.5477637052536011, "learning_rate": 0.0008522969187675071, "loss": 0.4057, "step": 5379 }, { "epoch": 3.005586592178771, "grad_norm": 0.8505667448043823, "learning_rate": 0.0008522689075630253, "loss": 0.5986, "step": 5380 }, { "epoch": 3.006145251396648, "grad_norm": 0.5427995324134827, "learning_rate": 0.0008522408963585434, "loss": 0.5046, "step": 5381 }, { "epoch": 3.006703910614525, "grad_norm": 0.6454780697822571, "learning_rate": 0.0008522128851540616, "loss": 0.4516, "step": 5382 }, { "epoch": 3.007262569832402, "grad_norm": 0.5145334601402283, "learning_rate": 0.0008521848739495799, "loss": 0.4073, "step": 5383 }, { "epoch": 3.0078212290502795, "grad_norm": 0.40927255153656006, "learning_rate": 0.0008521568627450981, "loss": 0.4487, "step": 5384 }, { "epoch": 3.0083798882681565, "grad_norm": 1.5024330615997314, "learning_rate": 0.0008521288515406163, "loss": 0.4863, "step": 5385 }, { "epoch": 3.0089385474860335, "grad_norm": 0.4533179998397827, "learning_rate": 0.0008521008403361344, "loss": 0.3654, "step": 5386 }, { "epoch": 3.0094972067039105, "grad_norm": 0.8234923481941223, "learning_rate": 0.0008520728291316526, "loss": 0.5443, "step": 5387 }, { "epoch": 3.0100558659217875, "grad_norm": 1.0196948051452637, "learning_rate": 0.0008520448179271709, "loss": 0.3855, "step": 5388 }, { "epoch": 3.010614525139665, "grad_norm": 0.38857924938201904, "learning_rate": 0.0008520168067226891, "loss": 0.3629, "step": 5389 }, { "epoch": 3.011173184357542, "grad_norm": 0.7608642578125, "learning_rate": 0.0008519887955182073, "loss": 0.4688, "step": 5390 }, { "epoch": 3.011731843575419, "grad_norm": 0.7476602792739868, "learning_rate": 0.0008519607843137254, "loss": 0.5045, "step": 5391 }, { "epoch": 3.012290502793296, "grad_norm": 0.4268593192100525, "learning_rate": 0.0008519327731092436, "loss": 0.4032, "step": 5392 }, { "epoch": 3.012849162011173, "grad_norm": 0.847745954990387, "learning_rate": 0.0008519047619047619, "loss": 0.4937, "step": 5393 }, { "epoch": 3.0134078212290505, "grad_norm": 0.5917583703994751, "learning_rate": 0.0008518767507002802, "loss": 0.4772, "step": 5394 }, { "epoch": 3.0139664804469275, "grad_norm": 2.905977487564087, "learning_rate": 0.0008518487394957984, "loss": 0.4522, "step": 5395 }, { "epoch": 3.0145251396648045, "grad_norm": 0.7927609086036682, "learning_rate": 0.0008518207282913166, "loss": 0.5007, "step": 5396 }, { "epoch": 3.0150837988826815, "grad_norm": 0.4961709976196289, "learning_rate": 0.0008517927170868347, "loss": 0.4821, "step": 5397 }, { "epoch": 3.0156424581005585, "grad_norm": 0.4693697988986969, "learning_rate": 0.000851764705882353, "loss": 0.431, "step": 5398 }, { "epoch": 3.0162011173184355, "grad_norm": 0.5396574139595032, "learning_rate": 0.0008517366946778712, "loss": 0.4525, "step": 5399 }, { "epoch": 3.016759776536313, "grad_norm": 0.7243140935897827, "learning_rate": 0.0008517086834733894, "loss": 0.5513, "step": 5400 }, { "epoch": 3.01731843575419, "grad_norm": 0.7669305205345154, "learning_rate": 0.0008516806722689076, "loss": 0.4388, "step": 5401 }, { "epoch": 3.017877094972067, "grad_norm": 2.39949893951416, "learning_rate": 0.0008516526610644257, "loss": 0.4158, "step": 5402 }, { "epoch": 3.018435754189944, "grad_norm": 0.6900848150253296, "learning_rate": 0.000851624649859944, "loss": 0.4889, "step": 5403 }, { "epoch": 3.018994413407821, "grad_norm": 0.5717877745628357, "learning_rate": 0.0008515966386554622, "loss": 0.4642, "step": 5404 }, { "epoch": 3.0195530726256985, "grad_norm": 1.1549183130264282, "learning_rate": 0.0008515686274509804, "loss": 0.544, "step": 5405 }, { "epoch": 3.0201117318435755, "grad_norm": 0.7069724798202515, "learning_rate": 0.0008515406162464986, "loss": 0.6365, "step": 5406 }, { "epoch": 3.0206703910614525, "grad_norm": 0.7818683385848999, "learning_rate": 0.0008515126050420167, "loss": 0.6091, "step": 5407 }, { "epoch": 3.0212290502793295, "grad_norm": 0.6126403212547302, "learning_rate": 0.000851484593837535, "loss": 0.4354, "step": 5408 }, { "epoch": 3.0217877094972065, "grad_norm": 1.1813786029815674, "learning_rate": 0.0008514565826330532, "loss": 0.4934, "step": 5409 }, { "epoch": 3.022346368715084, "grad_norm": 0.4681362509727478, "learning_rate": 0.0008514285714285714, "loss": 0.5089, "step": 5410 }, { "epoch": 3.022905027932961, "grad_norm": 0.609001636505127, "learning_rate": 0.0008514005602240897, "loss": 0.4533, "step": 5411 }, { "epoch": 3.023463687150838, "grad_norm": 0.6924839019775391, "learning_rate": 0.0008513725490196079, "loss": 0.4698, "step": 5412 }, { "epoch": 3.024022346368715, "grad_norm": 0.5728451013565063, "learning_rate": 0.0008513445378151261, "loss": 0.4306, "step": 5413 }, { "epoch": 3.024581005586592, "grad_norm": 1.0006122589111328, "learning_rate": 0.0008513165266106443, "loss": 0.5292, "step": 5414 }, { "epoch": 3.0251396648044695, "grad_norm": 0.5900217890739441, "learning_rate": 0.0008512885154061625, "loss": 0.417, "step": 5415 }, { "epoch": 3.0256983240223465, "grad_norm": 0.849025309085846, "learning_rate": 0.0008512605042016807, "loss": 0.3924, "step": 5416 }, { "epoch": 3.0262569832402235, "grad_norm": 1.138856053352356, "learning_rate": 0.0008512324929971989, "loss": 0.5554, "step": 5417 }, { "epoch": 3.0268156424581005, "grad_norm": 1.0687626600265503, "learning_rate": 0.0008512044817927171, "loss": 0.4906, "step": 5418 }, { "epoch": 3.0273743016759775, "grad_norm": 0.3479156494140625, "learning_rate": 0.0008511764705882353, "loss": 0.4567, "step": 5419 }, { "epoch": 3.0279329608938546, "grad_norm": 0.45297959446907043, "learning_rate": 0.0008511484593837535, "loss": 0.3967, "step": 5420 }, { "epoch": 3.028491620111732, "grad_norm": 1.5802494287490845, "learning_rate": 0.0008511204481792717, "loss": 0.4969, "step": 5421 }, { "epoch": 3.029050279329609, "grad_norm": 3.114891290664673, "learning_rate": 0.0008510924369747899, "loss": 0.4723, "step": 5422 }, { "epoch": 3.029608938547486, "grad_norm": 0.4734114110469818, "learning_rate": 0.0008510644257703082, "loss": 0.4032, "step": 5423 }, { "epoch": 3.030167597765363, "grad_norm": 0.5840662121772766, "learning_rate": 0.0008510364145658263, "loss": 0.4633, "step": 5424 }, { "epoch": 3.03072625698324, "grad_norm": 0.6865155100822449, "learning_rate": 0.0008510084033613445, "loss": 0.4043, "step": 5425 }, { "epoch": 3.0312849162011175, "grad_norm": 0.5602957010269165, "learning_rate": 0.0008509803921568627, "loss": 0.4446, "step": 5426 }, { "epoch": 3.0318435754189945, "grad_norm": 0.45751628279685974, "learning_rate": 0.000850952380952381, "loss": 0.431, "step": 5427 }, { "epoch": 3.0324022346368715, "grad_norm": 0.979215681552887, "learning_rate": 0.0008509243697478993, "loss": 0.5194, "step": 5428 }, { "epoch": 3.0329608938547485, "grad_norm": 0.4992451071739197, "learning_rate": 0.0008508963585434174, "loss": 0.4721, "step": 5429 }, { "epoch": 3.0335195530726256, "grad_norm": 0.4723842442035675, "learning_rate": 0.0008508683473389356, "loss": 0.3636, "step": 5430 }, { "epoch": 3.034078212290503, "grad_norm": 1.1194945573806763, "learning_rate": 0.0008508403361344538, "loss": 0.4854, "step": 5431 }, { "epoch": 3.03463687150838, "grad_norm": 0.6002005934715271, "learning_rate": 0.000850812324929972, "loss": 0.5133, "step": 5432 }, { "epoch": 3.035195530726257, "grad_norm": 0.609556257724762, "learning_rate": 0.0008507843137254903, "loss": 0.3968, "step": 5433 }, { "epoch": 3.035754189944134, "grad_norm": 0.5571757555007935, "learning_rate": 0.0008507563025210084, "loss": 0.6153, "step": 5434 }, { "epoch": 3.036312849162011, "grad_norm": 0.46065160632133484, "learning_rate": 0.0008507282913165266, "loss": 0.545, "step": 5435 }, { "epoch": 3.036871508379888, "grad_norm": 0.4290101230144501, "learning_rate": 0.0008507002801120448, "loss": 0.4854, "step": 5436 }, { "epoch": 3.0374301675977655, "grad_norm": 0.8071284890174866, "learning_rate": 0.000850672268907563, "loss": 0.5238, "step": 5437 }, { "epoch": 3.0379888268156425, "grad_norm": 0.6952858567237854, "learning_rate": 0.0008506442577030813, "loss": 0.489, "step": 5438 }, { "epoch": 3.0385474860335195, "grad_norm": 0.6648319959640503, "learning_rate": 0.0008506162464985995, "loss": 0.597, "step": 5439 }, { "epoch": 3.0391061452513966, "grad_norm": 2.418651580810547, "learning_rate": 0.0008505882352941176, "loss": 0.3934, "step": 5440 }, { "epoch": 3.0396648044692736, "grad_norm": 0.6917094588279724, "learning_rate": 0.0008505602240896358, "loss": 0.4138, "step": 5441 }, { "epoch": 3.040223463687151, "grad_norm": 0.5461602807044983, "learning_rate": 0.000850532212885154, "loss": 0.4808, "step": 5442 }, { "epoch": 3.040782122905028, "grad_norm": 0.45151203870773315, "learning_rate": 0.0008505042016806724, "loss": 0.422, "step": 5443 }, { "epoch": 3.041340782122905, "grad_norm": 0.5675491094589233, "learning_rate": 0.0008504761904761906, "loss": 0.4592, "step": 5444 }, { "epoch": 3.041899441340782, "grad_norm": 0.7291675806045532, "learning_rate": 0.0008504481792717087, "loss": 0.5236, "step": 5445 }, { "epoch": 3.042458100558659, "grad_norm": 0.40713581442832947, "learning_rate": 0.0008504201680672269, "loss": 0.4599, "step": 5446 }, { "epoch": 3.0430167597765365, "grad_norm": 0.5759377479553223, "learning_rate": 0.0008503921568627451, "loss": 0.4811, "step": 5447 }, { "epoch": 3.0435754189944135, "grad_norm": 0.870193362236023, "learning_rate": 0.0008503641456582634, "loss": 0.3958, "step": 5448 }, { "epoch": 3.0441340782122905, "grad_norm": 0.4510118067264557, "learning_rate": 0.0008503361344537816, "loss": 0.4206, "step": 5449 }, { "epoch": 3.0446927374301676, "grad_norm": 0.4428526759147644, "learning_rate": 0.0008503081232492997, "loss": 0.4322, "step": 5450 }, { "epoch": 3.0452513966480446, "grad_norm": 0.5920659899711609, "learning_rate": 0.0008502801120448179, "loss": 0.5337, "step": 5451 }, { "epoch": 3.0458100558659216, "grad_norm": 0.765612006187439, "learning_rate": 0.0008502521008403361, "loss": 0.4487, "step": 5452 }, { "epoch": 3.046368715083799, "grad_norm": 0.5659612417221069, "learning_rate": 0.0008502240896358544, "loss": 0.5048, "step": 5453 }, { "epoch": 3.046927374301676, "grad_norm": 0.47614389657974243, "learning_rate": 0.0008501960784313726, "loss": 0.4963, "step": 5454 }, { "epoch": 3.047486033519553, "grad_norm": 2.2290472984313965, "learning_rate": 0.0008501680672268908, "loss": 0.4536, "step": 5455 }, { "epoch": 3.04804469273743, "grad_norm": 0.5330693125724792, "learning_rate": 0.0008501400560224089, "loss": 0.4104, "step": 5456 }, { "epoch": 3.048603351955307, "grad_norm": 0.8237184286117554, "learning_rate": 0.0008501120448179271, "loss": 0.431, "step": 5457 }, { "epoch": 3.0491620111731845, "grad_norm": 0.504925012588501, "learning_rate": 0.0008500840336134454, "loss": 0.515, "step": 5458 }, { "epoch": 3.0497206703910615, "grad_norm": 0.5609250068664551, "learning_rate": 0.0008500560224089636, "loss": 0.4171, "step": 5459 }, { "epoch": 3.0502793296089385, "grad_norm": 0.6541677713394165, "learning_rate": 0.0008500280112044819, "loss": 0.4549, "step": 5460 }, { "epoch": 3.0508379888268156, "grad_norm": 0.9161620736122131, "learning_rate": 0.00085, "loss": 0.5277, "step": 5461 }, { "epoch": 3.0513966480446926, "grad_norm": 1.3137433528900146, "learning_rate": 0.0008499719887955182, "loss": 0.4882, "step": 5462 }, { "epoch": 3.05195530726257, "grad_norm": 0.6924602389335632, "learning_rate": 0.0008499439775910365, "loss": 0.4953, "step": 5463 }, { "epoch": 3.052513966480447, "grad_norm": 0.5731591582298279, "learning_rate": 0.0008499159663865547, "loss": 0.4847, "step": 5464 }, { "epoch": 3.053072625698324, "grad_norm": 2.5538318157196045, "learning_rate": 0.0008498879551820729, "loss": 0.5312, "step": 5465 }, { "epoch": 3.053631284916201, "grad_norm": 0.4962441027164459, "learning_rate": 0.000849859943977591, "loss": 0.49, "step": 5466 }, { "epoch": 3.054189944134078, "grad_norm": 0.9920082092285156, "learning_rate": 0.0008498319327731092, "loss": 0.463, "step": 5467 }, { "epoch": 3.054748603351955, "grad_norm": 0.42664119601249695, "learning_rate": 0.0008498039215686275, "loss": 0.45, "step": 5468 }, { "epoch": 3.0553072625698325, "grad_norm": 0.6212796568870544, "learning_rate": 0.0008497759103641457, "loss": 0.4025, "step": 5469 }, { "epoch": 3.0558659217877095, "grad_norm": 0.54094398021698, "learning_rate": 0.0008497478991596639, "loss": 0.5867, "step": 5470 }, { "epoch": 3.0564245810055866, "grad_norm": 0.5930367112159729, "learning_rate": 0.0008497198879551821, "loss": 0.4479, "step": 5471 }, { "epoch": 3.0569832402234636, "grad_norm": 0.38967838883399963, "learning_rate": 0.0008496918767507002, "loss": 0.3649, "step": 5472 }, { "epoch": 3.0575418994413406, "grad_norm": 0.38972780108451843, "learning_rate": 0.0008496638655462185, "loss": 0.4095, "step": 5473 }, { "epoch": 3.058100558659218, "grad_norm": 0.7354070544242859, "learning_rate": 0.0008496358543417367, "loss": 0.5083, "step": 5474 }, { "epoch": 3.058659217877095, "grad_norm": 0.5130355358123779, "learning_rate": 0.0008496078431372549, "loss": 0.4713, "step": 5475 }, { "epoch": 3.059217877094972, "grad_norm": 0.6274758577346802, "learning_rate": 0.0008495798319327732, "loss": 0.4377, "step": 5476 }, { "epoch": 3.059776536312849, "grad_norm": 0.6343010067939758, "learning_rate": 0.0008495518207282912, "loss": 0.5356, "step": 5477 }, { "epoch": 3.060335195530726, "grad_norm": 0.7146463394165039, "learning_rate": 0.0008495238095238096, "loss": 0.3958, "step": 5478 }, { "epoch": 3.0608938547486035, "grad_norm": 0.5553720593452454, "learning_rate": 0.0008494957983193278, "loss": 0.4362, "step": 5479 }, { "epoch": 3.0614525139664805, "grad_norm": 0.46876946091651917, "learning_rate": 0.000849467787114846, "loss": 0.3627, "step": 5480 }, { "epoch": 3.0620111731843576, "grad_norm": 0.438692569732666, "learning_rate": 0.0008494397759103642, "loss": 0.4711, "step": 5481 }, { "epoch": 3.0625698324022346, "grad_norm": 0.5679853558540344, "learning_rate": 0.0008494117647058823, "loss": 0.434, "step": 5482 }, { "epoch": 3.0631284916201116, "grad_norm": 0.47009748220443726, "learning_rate": 0.0008493837535014006, "loss": 0.395, "step": 5483 }, { "epoch": 3.063687150837989, "grad_norm": 0.5046628713607788, "learning_rate": 0.0008493557422969188, "loss": 0.3764, "step": 5484 }, { "epoch": 3.064245810055866, "grad_norm": 0.9186261296272278, "learning_rate": 0.000849327731092437, "loss": 0.4576, "step": 5485 }, { "epoch": 3.064804469273743, "grad_norm": 0.5351700186729431, "learning_rate": 0.0008492997198879552, "loss": 0.4692, "step": 5486 }, { "epoch": 3.06536312849162, "grad_norm": 0.5569940805435181, "learning_rate": 0.0008492717086834734, "loss": 0.3793, "step": 5487 }, { "epoch": 3.065921787709497, "grad_norm": 2.4412596225738525, "learning_rate": 0.0008492436974789916, "loss": 0.5074, "step": 5488 }, { "epoch": 3.066480446927374, "grad_norm": 0.4022413194179535, "learning_rate": 0.0008492156862745098, "loss": 0.4553, "step": 5489 }, { "epoch": 3.0670391061452515, "grad_norm": 1.9563251733779907, "learning_rate": 0.000849187675070028, "loss": 0.5153, "step": 5490 }, { "epoch": 3.0675977653631286, "grad_norm": 0.6106370687484741, "learning_rate": 0.0008491596638655462, "loss": 0.5353, "step": 5491 }, { "epoch": 3.0681564245810056, "grad_norm": 0.9997402429580688, "learning_rate": 0.0008491316526610644, "loss": 0.4995, "step": 5492 }, { "epoch": 3.0687150837988826, "grad_norm": 0.7541323304176331, "learning_rate": 0.0008491036414565827, "loss": 0.5709, "step": 5493 }, { "epoch": 3.0692737430167596, "grad_norm": 0.38327455520629883, "learning_rate": 0.0008490756302521009, "loss": 0.3823, "step": 5494 }, { "epoch": 3.069832402234637, "grad_norm": 0.410897821187973, "learning_rate": 0.0008490476190476191, "loss": 0.4184, "step": 5495 }, { "epoch": 3.070391061452514, "grad_norm": 0.47653019428253174, "learning_rate": 0.0008490196078431373, "loss": 0.3793, "step": 5496 }, { "epoch": 3.070949720670391, "grad_norm": 1.0713906288146973, "learning_rate": 0.0008489915966386555, "loss": 0.4462, "step": 5497 }, { "epoch": 3.071508379888268, "grad_norm": 0.5321863293647766, "learning_rate": 0.0008489635854341737, "loss": 0.5408, "step": 5498 }, { "epoch": 3.072067039106145, "grad_norm": 0.4431574046611786, "learning_rate": 0.0008489355742296919, "loss": 0.4765, "step": 5499 }, { "epoch": 3.0726256983240225, "grad_norm": 0.8000943660736084, "learning_rate": 0.0008489075630252101, "loss": 0.5143, "step": 5500 }, { "epoch": 3.0726256983240225, "eval_cer": 0.09375579631872388, "eval_loss": 0.35735177993774414, "eval_runtime": 55.6504, "eval_samples_per_second": 81.545, "eval_steps_per_second": 5.103, "eval_wer": 0.3709114270551102, "step": 5500 }, { "epoch": 3.0731843575418996, "grad_norm": 0.5274427533149719, "learning_rate": 0.0008488795518207283, "loss": 0.3762, "step": 5501 }, { "epoch": 3.0737430167597766, "grad_norm": 0.4781281650066376, "learning_rate": 0.0008488515406162465, "loss": 0.5234, "step": 5502 }, { "epoch": 3.0743016759776536, "grad_norm": 0.5493807196617126, "learning_rate": 0.0008488235294117648, "loss": 0.5391, "step": 5503 }, { "epoch": 3.0748603351955306, "grad_norm": 4.780723571777344, "learning_rate": 0.0008487955182072829, "loss": 0.5195, "step": 5504 }, { "epoch": 3.0754189944134076, "grad_norm": 0.4696957468986511, "learning_rate": 0.0008487675070028011, "loss": 0.4055, "step": 5505 }, { "epoch": 3.075977653631285, "grad_norm": 0.6096591949462891, "learning_rate": 0.0008487394957983193, "loss": 0.5075, "step": 5506 }, { "epoch": 3.076536312849162, "grad_norm": 0.4517671763896942, "learning_rate": 0.0008487114845938375, "loss": 0.4828, "step": 5507 }, { "epoch": 3.077094972067039, "grad_norm": 0.7566508650779724, "learning_rate": 0.0008486834733893559, "loss": 0.5829, "step": 5508 }, { "epoch": 3.077653631284916, "grad_norm": 0.5540488958358765, "learning_rate": 0.000848655462184874, "loss": 0.5444, "step": 5509 }, { "epoch": 3.078212290502793, "grad_norm": 0.5105178952217102, "learning_rate": 0.0008486274509803922, "loss": 0.3697, "step": 5510 }, { "epoch": 3.0787709497206706, "grad_norm": 0.7785966992378235, "learning_rate": 0.0008485994397759104, "loss": 0.498, "step": 5511 }, { "epoch": 3.0793296089385476, "grad_norm": 1.881880521774292, "learning_rate": 0.0008485714285714286, "loss": 0.6548, "step": 5512 }, { "epoch": 3.0798882681564246, "grad_norm": 0.6333538889884949, "learning_rate": 0.0008485434173669469, "loss": 0.4901, "step": 5513 }, { "epoch": 3.0804469273743016, "grad_norm": 0.412016898393631, "learning_rate": 0.000848515406162465, "loss": 0.4515, "step": 5514 }, { "epoch": 3.0810055865921786, "grad_norm": 0.6195076704025269, "learning_rate": 0.0008484873949579832, "loss": 0.608, "step": 5515 }, { "epoch": 3.081564245810056, "grad_norm": 0.45217135548591614, "learning_rate": 0.0008484593837535014, "loss": 0.4707, "step": 5516 }, { "epoch": 3.082122905027933, "grad_norm": 0.7113887071609497, "learning_rate": 0.0008484313725490196, "loss": 0.4882, "step": 5517 }, { "epoch": 3.08268156424581, "grad_norm": 0.8828628063201904, "learning_rate": 0.0008484033613445379, "loss": 0.4991, "step": 5518 }, { "epoch": 3.083240223463687, "grad_norm": 0.5489522814750671, "learning_rate": 0.0008483753501400561, "loss": 0.4257, "step": 5519 }, { "epoch": 3.083798882681564, "grad_norm": 0.49655401706695557, "learning_rate": 0.0008483473389355742, "loss": 0.4676, "step": 5520 }, { "epoch": 3.0843575418994416, "grad_norm": 0.6676682233810425, "learning_rate": 0.0008483193277310924, "loss": 0.598, "step": 5521 }, { "epoch": 3.0849162011173186, "grad_norm": 0.536139190196991, "learning_rate": 0.0008482913165266106, "loss": 0.4722, "step": 5522 }, { "epoch": 3.0854748603351956, "grad_norm": 0.5223150253295898, "learning_rate": 0.0008482633053221289, "loss": 0.3845, "step": 5523 }, { "epoch": 3.0860335195530726, "grad_norm": 1.3387963771820068, "learning_rate": 0.0008482352941176471, "loss": 0.5167, "step": 5524 }, { "epoch": 3.0865921787709496, "grad_norm": 1.4403148889541626, "learning_rate": 0.0008482072829131652, "loss": 0.4271, "step": 5525 }, { "epoch": 3.0871508379888266, "grad_norm": 1.0278398990631104, "learning_rate": 0.0008481792717086835, "loss": 0.4866, "step": 5526 }, { "epoch": 3.087709497206704, "grad_norm": 0.41480597853660583, "learning_rate": 0.0008481512605042017, "loss": 0.3641, "step": 5527 }, { "epoch": 3.088268156424581, "grad_norm": 0.8197434544563293, "learning_rate": 0.00084812324929972, "loss": 0.4056, "step": 5528 }, { "epoch": 3.088826815642458, "grad_norm": 0.364523708820343, "learning_rate": 0.0008480952380952382, "loss": 0.3921, "step": 5529 }, { "epoch": 3.089385474860335, "grad_norm": 0.6194524168968201, "learning_rate": 0.0008480672268907563, "loss": 0.5028, "step": 5530 }, { "epoch": 3.089944134078212, "grad_norm": 0.503078818321228, "learning_rate": 0.0008480392156862745, "loss": 0.474, "step": 5531 }, { "epoch": 3.0905027932960896, "grad_norm": 1.7513192892074585, "learning_rate": 0.0008480112044817927, "loss": 0.381, "step": 5532 }, { "epoch": 3.0910614525139666, "grad_norm": 0.5741810202598572, "learning_rate": 0.000847983193277311, "loss": 0.4214, "step": 5533 }, { "epoch": 3.0916201117318436, "grad_norm": 0.6218336820602417, "learning_rate": 0.0008479551820728292, "loss": 0.4276, "step": 5534 }, { "epoch": 3.0921787709497206, "grad_norm": 0.5685567855834961, "learning_rate": 0.0008479271708683474, "loss": 0.4764, "step": 5535 }, { "epoch": 3.0927374301675976, "grad_norm": 0.6985644698143005, "learning_rate": 0.0008478991596638655, "loss": 0.4882, "step": 5536 }, { "epoch": 3.093296089385475, "grad_norm": 0.5651882886886597, "learning_rate": 0.0008478711484593837, "loss": 0.6333, "step": 5537 }, { "epoch": 3.093854748603352, "grad_norm": 0.3793085515499115, "learning_rate": 0.000847843137254902, "loss": 0.4573, "step": 5538 }, { "epoch": 3.094413407821229, "grad_norm": 0.5325741767883301, "learning_rate": 0.0008478151260504202, "loss": 0.4729, "step": 5539 }, { "epoch": 3.094972067039106, "grad_norm": 0.8570863008499146, "learning_rate": 0.0008477871148459384, "loss": 0.7019, "step": 5540 }, { "epoch": 3.095530726256983, "grad_norm": 0.6007282733917236, "learning_rate": 0.0008477591036414565, "loss": 0.5069, "step": 5541 }, { "epoch": 3.09608938547486, "grad_norm": 0.5458068251609802, "learning_rate": 0.0008477310924369747, "loss": 0.5598, "step": 5542 }, { "epoch": 3.0966480446927376, "grad_norm": 0.47937455773353577, "learning_rate": 0.0008477030812324931, "loss": 0.4483, "step": 5543 }, { "epoch": 3.0972067039106146, "grad_norm": 0.4870924949645996, "learning_rate": 0.0008476750700280113, "loss": 0.4401, "step": 5544 }, { "epoch": 3.0977653631284916, "grad_norm": 1.1074011325836182, "learning_rate": 0.0008476470588235295, "loss": 0.4424, "step": 5545 }, { "epoch": 3.0983240223463686, "grad_norm": 0.5029626488685608, "learning_rate": 0.0008476190476190476, "loss": 0.4748, "step": 5546 }, { "epoch": 3.0988826815642456, "grad_norm": 3.9221127033233643, "learning_rate": 0.0008475910364145658, "loss": 0.542, "step": 5547 }, { "epoch": 3.099441340782123, "grad_norm": 0.6362393498420715, "learning_rate": 0.0008475630252100841, "loss": 0.4191, "step": 5548 }, { "epoch": 3.1, "grad_norm": 0.4929668605327606, "learning_rate": 0.0008475350140056023, "loss": 0.3726, "step": 5549 }, { "epoch": 3.100558659217877, "grad_norm": 0.5730764269828796, "learning_rate": 0.0008475070028011205, "loss": 0.4915, "step": 5550 }, { "epoch": 3.101117318435754, "grad_norm": 0.5927708745002747, "learning_rate": 0.0008474789915966387, "loss": 0.4225, "step": 5551 }, { "epoch": 3.101675977653631, "grad_norm": 0.4804950952529907, "learning_rate": 0.0008474509803921568, "loss": 0.3416, "step": 5552 }, { "epoch": 3.1022346368715086, "grad_norm": 0.6153953671455383, "learning_rate": 0.0008474229691876751, "loss": 0.49, "step": 5553 }, { "epoch": 3.1027932960893856, "grad_norm": 0.7899450659751892, "learning_rate": 0.0008473949579831933, "loss": 0.4699, "step": 5554 }, { "epoch": 3.1033519553072626, "grad_norm": 0.5297183990478516, "learning_rate": 0.0008473669467787115, "loss": 0.4664, "step": 5555 }, { "epoch": 3.1039106145251396, "grad_norm": 1.1394513845443726, "learning_rate": 0.0008473389355742297, "loss": 0.403, "step": 5556 }, { "epoch": 3.1044692737430166, "grad_norm": 1.3090755939483643, "learning_rate": 0.0008473109243697478, "loss": 0.4964, "step": 5557 }, { "epoch": 3.105027932960894, "grad_norm": 0.5511622428894043, "learning_rate": 0.0008472829131652662, "loss": 0.4778, "step": 5558 }, { "epoch": 3.105586592178771, "grad_norm": 0.7322571277618408, "learning_rate": 0.0008472549019607844, "loss": 0.4158, "step": 5559 }, { "epoch": 3.106145251396648, "grad_norm": 0.6841732263565063, "learning_rate": 0.0008472268907563026, "loss": 0.4086, "step": 5560 }, { "epoch": 3.106703910614525, "grad_norm": 0.550640881061554, "learning_rate": 0.0008471988795518208, "loss": 0.4952, "step": 5561 }, { "epoch": 3.107262569832402, "grad_norm": 1.0008827447891235, "learning_rate": 0.0008471708683473389, "loss": 0.4601, "step": 5562 }, { "epoch": 3.107821229050279, "grad_norm": 0.6077209711074829, "learning_rate": 0.0008471428571428572, "loss": 0.582, "step": 5563 }, { "epoch": 3.1083798882681566, "grad_norm": 0.5013401508331299, "learning_rate": 0.0008471148459383754, "loss": 0.4023, "step": 5564 }, { "epoch": 3.1089385474860336, "grad_norm": 0.44117945432662964, "learning_rate": 0.0008470868347338936, "loss": 0.5147, "step": 5565 }, { "epoch": 3.1094972067039106, "grad_norm": 0.7289434671401978, "learning_rate": 0.0008470588235294118, "loss": 0.4158, "step": 5566 }, { "epoch": 3.1100558659217876, "grad_norm": 1.3812426328659058, "learning_rate": 0.00084703081232493, "loss": 0.542, "step": 5567 }, { "epoch": 3.1106145251396646, "grad_norm": 0.683921754360199, "learning_rate": 0.0008470028011204482, "loss": 0.4842, "step": 5568 }, { "epoch": 3.111173184357542, "grad_norm": 0.49743011593818665, "learning_rate": 0.0008469747899159664, "loss": 0.4649, "step": 5569 }, { "epoch": 3.111731843575419, "grad_norm": 1.115496277809143, "learning_rate": 0.0008469467787114846, "loss": 0.2978, "step": 5570 }, { "epoch": 3.112290502793296, "grad_norm": 0.47693005204200745, "learning_rate": 0.0008469187675070028, "loss": 0.4808, "step": 5571 }, { "epoch": 3.112849162011173, "grad_norm": 0.7259455919265747, "learning_rate": 0.000846890756302521, "loss": 0.4257, "step": 5572 }, { "epoch": 3.11340782122905, "grad_norm": 0.503884494304657, "learning_rate": 0.0008468627450980391, "loss": 0.403, "step": 5573 }, { "epoch": 3.1139664804469276, "grad_norm": 0.4987591505050659, "learning_rate": 0.0008468347338935574, "loss": 0.4607, "step": 5574 }, { "epoch": 3.1145251396648046, "grad_norm": 0.46255776286125183, "learning_rate": 0.0008468067226890757, "loss": 0.4712, "step": 5575 }, { "epoch": 3.1150837988826816, "grad_norm": 0.520065188407898, "learning_rate": 0.0008467787114845939, "loss": 0.4239, "step": 5576 }, { "epoch": 3.1156424581005586, "grad_norm": 0.7515888810157776, "learning_rate": 0.0008467507002801121, "loss": 0.4432, "step": 5577 }, { "epoch": 3.1162011173184356, "grad_norm": 0.8942346572875977, "learning_rate": 0.0008467226890756302, "loss": 0.5083, "step": 5578 }, { "epoch": 3.1167597765363126, "grad_norm": 0.38200417160987854, "learning_rate": 0.0008466946778711485, "loss": 0.4241, "step": 5579 }, { "epoch": 3.11731843575419, "grad_norm": 0.4806053340435028, "learning_rate": 0.0008466666666666667, "loss": 0.4384, "step": 5580 }, { "epoch": 3.117877094972067, "grad_norm": 0.6235408186912537, "learning_rate": 0.0008466386554621849, "loss": 0.4569, "step": 5581 }, { "epoch": 3.118435754189944, "grad_norm": 0.9441065788269043, "learning_rate": 0.0008466106442577031, "loss": 0.4306, "step": 5582 }, { "epoch": 3.118994413407821, "grad_norm": 0.46072858572006226, "learning_rate": 0.0008465826330532213, "loss": 0.5688, "step": 5583 }, { "epoch": 3.119553072625698, "grad_norm": 0.6990225315093994, "learning_rate": 0.0008465546218487395, "loss": 0.51, "step": 5584 }, { "epoch": 3.1201117318435756, "grad_norm": 0.6464018821716309, "learning_rate": 0.0008465266106442577, "loss": 0.5027, "step": 5585 }, { "epoch": 3.1206703910614526, "grad_norm": 0.4172152578830719, "learning_rate": 0.0008464985994397759, "loss": 0.3654, "step": 5586 }, { "epoch": 3.1212290502793296, "grad_norm": 1.3186568021774292, "learning_rate": 0.0008464705882352941, "loss": 0.4486, "step": 5587 }, { "epoch": 3.1217877094972066, "grad_norm": 1.02861487865448, "learning_rate": 0.0008464425770308123, "loss": 0.4515, "step": 5588 }, { "epoch": 3.1223463687150836, "grad_norm": 0.585443913936615, "learning_rate": 0.0008464145658263305, "loss": 0.5493, "step": 5589 }, { "epoch": 3.122905027932961, "grad_norm": 0.5775396823883057, "learning_rate": 0.0008463865546218487, "loss": 0.36, "step": 5590 }, { "epoch": 3.123463687150838, "grad_norm": 0.6701005101203918, "learning_rate": 0.000846358543417367, "loss": 0.6122, "step": 5591 }, { "epoch": 3.124022346368715, "grad_norm": 0.7165709137916565, "learning_rate": 0.0008463305322128852, "loss": 0.6009, "step": 5592 }, { "epoch": 3.124581005586592, "grad_norm": 0.5199567675590515, "learning_rate": 0.0008463025210084034, "loss": 0.4555, "step": 5593 }, { "epoch": 3.125139664804469, "grad_norm": 0.4732874631881714, "learning_rate": 0.0008462745098039217, "loss": 0.5021, "step": 5594 }, { "epoch": 3.1256983240223466, "grad_norm": 0.5118914842605591, "learning_rate": 0.0008462464985994398, "loss": 0.5008, "step": 5595 }, { "epoch": 3.1262569832402236, "grad_norm": 0.6696200966835022, "learning_rate": 0.000846218487394958, "loss": 0.4497, "step": 5596 }, { "epoch": 3.1268156424581006, "grad_norm": 0.6452716588973999, "learning_rate": 0.0008461904761904762, "loss": 0.3698, "step": 5597 }, { "epoch": 3.1273743016759776, "grad_norm": 0.5884919762611389, "learning_rate": 0.0008461624649859944, "loss": 0.4545, "step": 5598 }, { "epoch": 3.1279329608938546, "grad_norm": 1.2152279615402222, "learning_rate": 0.0008461344537815127, "loss": 0.5066, "step": 5599 }, { "epoch": 3.1284916201117317, "grad_norm": 2.404604434967041, "learning_rate": 0.0008461064425770308, "loss": 0.4193, "step": 5600 }, { "epoch": 3.129050279329609, "grad_norm": 0.5319065451622009, "learning_rate": 0.000846078431372549, "loss": 0.5891, "step": 5601 }, { "epoch": 3.129608938547486, "grad_norm": 0.48796480894088745, "learning_rate": 0.0008460504201680672, "loss": 0.4536, "step": 5602 }, { "epoch": 3.130167597765363, "grad_norm": 0.5275574922561646, "learning_rate": 0.0008460224089635854, "loss": 0.4787, "step": 5603 }, { "epoch": 3.13072625698324, "grad_norm": 0.5120730400085449, "learning_rate": 0.0008459943977591037, "loss": 0.498, "step": 5604 }, { "epoch": 3.131284916201117, "grad_norm": 0.593523383140564, "learning_rate": 0.0008459663865546218, "loss": 0.4781, "step": 5605 }, { "epoch": 3.1318435754189946, "grad_norm": 0.7718291282653809, "learning_rate": 0.00084593837535014, "loss": 0.555, "step": 5606 }, { "epoch": 3.1324022346368716, "grad_norm": 0.5640621185302734, "learning_rate": 0.0008459103641456582, "loss": 0.454, "step": 5607 }, { "epoch": 3.1329608938547486, "grad_norm": 0.7334309220314026, "learning_rate": 0.0008458823529411765, "loss": 0.5365, "step": 5608 }, { "epoch": 3.1335195530726256, "grad_norm": 0.8507549166679382, "learning_rate": 0.0008458543417366948, "loss": 0.3926, "step": 5609 }, { "epoch": 3.1340782122905027, "grad_norm": 0.5288267135620117, "learning_rate": 0.000845826330532213, "loss": 0.4904, "step": 5610 }, { "epoch": 3.1346368715083797, "grad_norm": 0.5941880941390991, "learning_rate": 0.0008457983193277311, "loss": 0.4464, "step": 5611 }, { "epoch": 3.135195530726257, "grad_norm": 0.6736599802970886, "learning_rate": 0.0008457703081232493, "loss": 0.5198, "step": 5612 }, { "epoch": 3.135754189944134, "grad_norm": 0.5984303951263428, "learning_rate": 0.0008457422969187675, "loss": 0.4512, "step": 5613 }, { "epoch": 3.136312849162011, "grad_norm": 0.43068036437034607, "learning_rate": 0.0008457142857142858, "loss": 0.3395, "step": 5614 }, { "epoch": 3.136871508379888, "grad_norm": 1.7956981658935547, "learning_rate": 0.000845686274509804, "loss": 0.4573, "step": 5615 }, { "epoch": 3.137430167597765, "grad_norm": 1.5689762830734253, "learning_rate": 0.0008456582633053221, "loss": 0.463, "step": 5616 }, { "epoch": 3.1379888268156426, "grad_norm": 0.7011322379112244, "learning_rate": 0.0008456302521008403, "loss": 0.3743, "step": 5617 }, { "epoch": 3.1385474860335196, "grad_norm": 0.4777504801750183, "learning_rate": 0.0008456022408963585, "loss": 0.458, "step": 5618 }, { "epoch": 3.1391061452513966, "grad_norm": 0.9461365938186646, "learning_rate": 0.0008455742296918768, "loss": 0.4364, "step": 5619 }, { "epoch": 3.1396648044692737, "grad_norm": 0.49534231424331665, "learning_rate": 0.000845546218487395, "loss": 0.4795, "step": 5620 }, { "epoch": 3.1402234636871507, "grad_norm": 0.61227947473526, "learning_rate": 0.0008455182072829131, "loss": 0.5414, "step": 5621 }, { "epoch": 3.140782122905028, "grad_norm": 0.7459039688110352, "learning_rate": 0.0008454901960784313, "loss": 0.4904, "step": 5622 }, { "epoch": 3.141340782122905, "grad_norm": 1.202772855758667, "learning_rate": 0.0008454621848739495, "loss": 0.5283, "step": 5623 }, { "epoch": 3.141899441340782, "grad_norm": 0.4477715492248535, "learning_rate": 0.0008454341736694679, "loss": 0.3747, "step": 5624 }, { "epoch": 3.142458100558659, "grad_norm": 0.5851945877075195, "learning_rate": 0.0008454061624649861, "loss": 0.5378, "step": 5625 }, { "epoch": 3.143016759776536, "grad_norm": 0.4029100835323334, "learning_rate": 0.0008453781512605043, "loss": 0.4517, "step": 5626 }, { "epoch": 3.1435754189944136, "grad_norm": 0.42703381180763245, "learning_rate": 0.0008453501400560224, "loss": 0.4815, "step": 5627 }, { "epoch": 3.1441340782122906, "grad_norm": 12.102084159851074, "learning_rate": 0.0008453221288515406, "loss": 0.569, "step": 5628 }, { "epoch": 3.1446927374301676, "grad_norm": 0.6901870369911194, "learning_rate": 0.0008452941176470589, "loss": 0.4042, "step": 5629 }, { "epoch": 3.1452513966480447, "grad_norm": 1.1148213148117065, "learning_rate": 0.0008452661064425771, "loss": 0.3946, "step": 5630 }, { "epoch": 3.1458100558659217, "grad_norm": 0.4705391526222229, "learning_rate": 0.0008452380952380953, "loss": 0.5268, "step": 5631 }, { "epoch": 3.146368715083799, "grad_norm": 0.6951575875282288, "learning_rate": 0.0008452100840336134, "loss": 0.4138, "step": 5632 }, { "epoch": 3.146927374301676, "grad_norm": 0.6584864854812622, "learning_rate": 0.0008451820728291316, "loss": 0.4196, "step": 5633 }, { "epoch": 3.147486033519553, "grad_norm": 0.5888062715530396, "learning_rate": 0.0008451540616246499, "loss": 0.5384, "step": 5634 }, { "epoch": 3.14804469273743, "grad_norm": 0.8504028916358948, "learning_rate": 0.0008451260504201681, "loss": 0.4661, "step": 5635 }, { "epoch": 3.148603351955307, "grad_norm": 0.45534247159957886, "learning_rate": 0.0008450980392156863, "loss": 0.4792, "step": 5636 }, { "epoch": 3.149162011173184, "grad_norm": 0.6078174710273743, "learning_rate": 0.0008450700280112044, "loss": 0.5263, "step": 5637 }, { "epoch": 3.1497206703910616, "grad_norm": 0.48322969675064087, "learning_rate": 0.0008450420168067226, "loss": 0.4213, "step": 5638 }, { "epoch": 3.1502793296089386, "grad_norm": 0.5949899554252625, "learning_rate": 0.0008450140056022409, "loss": 0.4008, "step": 5639 }, { "epoch": 3.1508379888268156, "grad_norm": 0.4921998679637909, "learning_rate": 0.0008449859943977592, "loss": 0.4125, "step": 5640 }, { "epoch": 3.1513966480446927, "grad_norm": 1.483995795249939, "learning_rate": 0.0008449579831932774, "loss": 0.5603, "step": 5641 }, { "epoch": 3.1519553072625697, "grad_norm": 0.4538545608520508, "learning_rate": 0.0008449299719887956, "loss": 0.4636, "step": 5642 }, { "epoch": 3.152513966480447, "grad_norm": 0.49479126930236816, "learning_rate": 0.0008449019607843137, "loss": 0.4627, "step": 5643 }, { "epoch": 3.153072625698324, "grad_norm": 6.404916763305664, "learning_rate": 0.000844873949579832, "loss": 0.3233, "step": 5644 }, { "epoch": 3.153631284916201, "grad_norm": 0.6194260120391846, "learning_rate": 0.0008448459383753502, "loss": 0.4499, "step": 5645 }, { "epoch": 3.154189944134078, "grad_norm": 0.479398250579834, "learning_rate": 0.0008448179271708684, "loss": 0.5014, "step": 5646 }, { "epoch": 3.154748603351955, "grad_norm": 0.6063601970672607, "learning_rate": 0.0008447899159663866, "loss": 0.533, "step": 5647 }, { "epoch": 3.155307262569832, "grad_norm": 0.5322023630142212, "learning_rate": 0.0008447619047619047, "loss": 0.436, "step": 5648 }, { "epoch": 3.1558659217877096, "grad_norm": 0.424039363861084, "learning_rate": 0.000844733893557423, "loss": 0.4873, "step": 5649 }, { "epoch": 3.1564245810055866, "grad_norm": 0.47259289026260376, "learning_rate": 0.0008447058823529412, "loss": 0.4794, "step": 5650 }, { "epoch": 3.1569832402234637, "grad_norm": 0.456737756729126, "learning_rate": 0.0008446778711484594, "loss": 0.3803, "step": 5651 }, { "epoch": 3.1575418994413407, "grad_norm": 0.48854315280914307, "learning_rate": 0.0008446498599439776, "loss": 0.4304, "step": 5652 }, { "epoch": 3.1581005586592177, "grad_norm": 0.39837995171546936, "learning_rate": 0.0008446218487394957, "loss": 0.4636, "step": 5653 }, { "epoch": 3.158659217877095, "grad_norm": 0.4552333950996399, "learning_rate": 0.000844593837535014, "loss": 0.4663, "step": 5654 }, { "epoch": 3.159217877094972, "grad_norm": 0.422201007604599, "learning_rate": 0.0008445658263305322, "loss": 0.5065, "step": 5655 }, { "epoch": 3.159776536312849, "grad_norm": 0.422823965549469, "learning_rate": 0.0008445378151260504, "loss": 0.566, "step": 5656 }, { "epoch": 3.160335195530726, "grad_norm": 0.49043142795562744, "learning_rate": 0.0008445098039215687, "loss": 0.4752, "step": 5657 }, { "epoch": 3.160893854748603, "grad_norm": 0.5856580138206482, "learning_rate": 0.0008444817927170869, "loss": 0.4897, "step": 5658 }, { "epoch": 3.1614525139664806, "grad_norm": 1.0482743978500366, "learning_rate": 0.0008444537815126051, "loss": 0.5223, "step": 5659 }, { "epoch": 3.1620111731843576, "grad_norm": 0.6414709091186523, "learning_rate": 0.0008444257703081233, "loss": 0.381, "step": 5660 }, { "epoch": 3.1625698324022347, "grad_norm": 0.49149081110954285, "learning_rate": 0.0008443977591036415, "loss": 0.4525, "step": 5661 }, { "epoch": 3.1631284916201117, "grad_norm": 0.5326992869377136, "learning_rate": 0.0008443697478991597, "loss": 0.4691, "step": 5662 }, { "epoch": 3.1636871508379887, "grad_norm": 0.6219455003738403, "learning_rate": 0.0008443417366946779, "loss": 0.4394, "step": 5663 }, { "epoch": 3.164245810055866, "grad_norm": 0.7326778173446655, "learning_rate": 0.0008443137254901961, "loss": 0.6552, "step": 5664 }, { "epoch": 3.164804469273743, "grad_norm": 0.3856636881828308, "learning_rate": 0.0008442857142857143, "loss": 0.3941, "step": 5665 }, { "epoch": 3.16536312849162, "grad_norm": 0.4134659469127655, "learning_rate": 0.0008442577030812325, "loss": 0.4796, "step": 5666 }, { "epoch": 3.165921787709497, "grad_norm": 0.46562039852142334, "learning_rate": 0.0008442296918767507, "loss": 0.3128, "step": 5667 }, { "epoch": 3.166480446927374, "grad_norm": 0.572398841381073, "learning_rate": 0.0008442016806722689, "loss": 0.5344, "step": 5668 }, { "epoch": 3.167039106145251, "grad_norm": 0.8000866174697876, "learning_rate": 0.0008441736694677871, "loss": 0.6003, "step": 5669 }, { "epoch": 3.1675977653631286, "grad_norm": 1.1431472301483154, "learning_rate": 0.0008441456582633053, "loss": 0.4302, "step": 5670 }, { "epoch": 3.1681564245810057, "grad_norm": 1.4717786312103271, "learning_rate": 0.0008441176470588235, "loss": 0.3784, "step": 5671 }, { "epoch": 3.1687150837988827, "grad_norm": 0.513985276222229, "learning_rate": 0.0008440896358543417, "loss": 0.4501, "step": 5672 }, { "epoch": 3.1692737430167597, "grad_norm": 0.9906004667282104, "learning_rate": 0.00084406162464986, "loss": 0.5115, "step": 5673 }, { "epoch": 3.1698324022346367, "grad_norm": 0.8090980052947998, "learning_rate": 0.0008440336134453783, "loss": 0.4552, "step": 5674 }, { "epoch": 3.170391061452514, "grad_norm": 0.5078025460243225, "learning_rate": 0.0008440056022408964, "loss": 0.4519, "step": 5675 }, { "epoch": 3.170949720670391, "grad_norm": 0.6318791508674622, "learning_rate": 0.0008439775910364146, "loss": 0.4745, "step": 5676 }, { "epoch": 3.171508379888268, "grad_norm": 0.6941470503807068, "learning_rate": 0.0008439495798319328, "loss": 0.3625, "step": 5677 }, { "epoch": 3.172067039106145, "grad_norm": 0.4782574474811554, "learning_rate": 0.000843921568627451, "loss": 0.4658, "step": 5678 }, { "epoch": 3.172625698324022, "grad_norm": 0.5398354530334473, "learning_rate": 0.0008438935574229693, "loss": 0.4546, "step": 5679 }, { "epoch": 3.1731843575418996, "grad_norm": 0.5194029211997986, "learning_rate": 0.0008438655462184874, "loss": 0.5172, "step": 5680 }, { "epoch": 3.1737430167597767, "grad_norm": 0.5547402501106262, "learning_rate": 0.0008438375350140056, "loss": 0.4501, "step": 5681 }, { "epoch": 3.1743016759776537, "grad_norm": 0.6482549905776978, "learning_rate": 0.0008438095238095238, "loss": 0.4743, "step": 5682 }, { "epoch": 3.1748603351955307, "grad_norm": 0.8866221308708191, "learning_rate": 0.000843781512605042, "loss": 0.5239, "step": 5683 }, { "epoch": 3.1754189944134077, "grad_norm": 0.7125205397605896, "learning_rate": 0.0008437535014005603, "loss": 0.4964, "step": 5684 }, { "epoch": 3.1759776536312847, "grad_norm": 0.4755825102329254, "learning_rate": 0.0008437254901960784, "loss": 0.4781, "step": 5685 }, { "epoch": 3.176536312849162, "grad_norm": 0.4849969744682312, "learning_rate": 0.0008436974789915966, "loss": 0.4686, "step": 5686 }, { "epoch": 3.177094972067039, "grad_norm": 6.330876350402832, "learning_rate": 0.0008436694677871148, "loss": 0.3751, "step": 5687 }, { "epoch": 3.177653631284916, "grad_norm": 0.6688026189804077, "learning_rate": 0.000843641456582633, "loss": 0.4104, "step": 5688 }, { "epoch": 3.178212290502793, "grad_norm": 0.8242323994636536, "learning_rate": 0.0008436134453781514, "loss": 0.4669, "step": 5689 }, { "epoch": 3.17877094972067, "grad_norm": 0.9547811150550842, "learning_rate": 0.0008435854341736696, "loss": 0.4651, "step": 5690 }, { "epoch": 3.1793296089385477, "grad_norm": 0.870112419128418, "learning_rate": 0.0008435574229691877, "loss": 0.5659, "step": 5691 }, { "epoch": 3.1798882681564247, "grad_norm": 1.4759526252746582, "learning_rate": 0.0008435294117647059, "loss": 0.8533, "step": 5692 }, { "epoch": 3.1804469273743017, "grad_norm": 0.8604695200920105, "learning_rate": 0.0008435014005602241, "loss": 0.5095, "step": 5693 }, { "epoch": 3.1810055865921787, "grad_norm": 0.615852415561676, "learning_rate": 0.0008434733893557424, "loss": 0.4572, "step": 5694 }, { "epoch": 3.1815642458100557, "grad_norm": 0.4653518497943878, "learning_rate": 0.0008434453781512606, "loss": 0.3781, "step": 5695 }, { "epoch": 3.182122905027933, "grad_norm": 0.4282912313938141, "learning_rate": 0.0008434173669467787, "loss": 0.4924, "step": 5696 }, { "epoch": 3.18268156424581, "grad_norm": 1.489055871963501, "learning_rate": 0.0008433893557422969, "loss": 0.4605, "step": 5697 }, { "epoch": 3.183240223463687, "grad_norm": 0.5174456834793091, "learning_rate": 0.0008433613445378151, "loss": 0.5485, "step": 5698 }, { "epoch": 3.183798882681564, "grad_norm": 0.47346973419189453, "learning_rate": 0.0008433333333333334, "loss": 0.5117, "step": 5699 }, { "epoch": 3.184357541899441, "grad_norm": 3.379406690597534, "learning_rate": 0.0008433053221288516, "loss": 0.5139, "step": 5700 }, { "epoch": 3.1849162011173187, "grad_norm": 0.47423630952835083, "learning_rate": 0.0008432773109243697, "loss": 0.5593, "step": 5701 }, { "epoch": 3.1854748603351957, "grad_norm": 0.5654445886611938, "learning_rate": 0.0008432492997198879, "loss": 0.4591, "step": 5702 }, { "epoch": 3.1860335195530727, "grad_norm": 1.4614496231079102, "learning_rate": 0.0008432212885154061, "loss": 0.524, "step": 5703 }, { "epoch": 3.1865921787709497, "grad_norm": 0.8168285489082336, "learning_rate": 0.0008431932773109244, "loss": 0.4515, "step": 5704 }, { "epoch": 3.1871508379888267, "grad_norm": 1.4792838096618652, "learning_rate": 0.0008431652661064426, "loss": 0.4342, "step": 5705 }, { "epoch": 3.1877094972067037, "grad_norm": 0.3846934139728546, "learning_rate": 0.0008431372549019609, "loss": 0.3252, "step": 5706 }, { "epoch": 3.188268156424581, "grad_norm": 0.7086730599403381, "learning_rate": 0.000843109243697479, "loss": 0.5533, "step": 5707 }, { "epoch": 3.188826815642458, "grad_norm": 0.6092351675033569, "learning_rate": 0.0008430812324929972, "loss": 0.5899, "step": 5708 }, { "epoch": 3.189385474860335, "grad_norm": 0.5143446326255798, "learning_rate": 0.0008430532212885155, "loss": 0.4684, "step": 5709 }, { "epoch": 3.189944134078212, "grad_norm": 0.6106857061386108, "learning_rate": 0.0008430252100840337, "loss": 0.4214, "step": 5710 }, { "epoch": 3.190502793296089, "grad_norm": 0.47395002841949463, "learning_rate": 0.0008429971988795519, "loss": 0.4858, "step": 5711 }, { "epoch": 3.1910614525139667, "grad_norm": 0.5685007572174072, "learning_rate": 0.00084296918767507, "loss": 0.4735, "step": 5712 }, { "epoch": 3.1916201117318437, "grad_norm": 1.178513765335083, "learning_rate": 0.0008429411764705882, "loss": 0.4756, "step": 5713 }, { "epoch": 3.1921787709497207, "grad_norm": 0.5206089615821838, "learning_rate": 0.0008429131652661065, "loss": 0.588, "step": 5714 }, { "epoch": 3.1927374301675977, "grad_norm": 0.39936062693595886, "learning_rate": 0.0008428851540616247, "loss": 0.4499, "step": 5715 }, { "epoch": 3.1932960893854747, "grad_norm": 0.555731475353241, "learning_rate": 0.0008428571428571429, "loss": 0.4558, "step": 5716 }, { "epoch": 3.1938547486033517, "grad_norm": 1.530231237411499, "learning_rate": 0.000842829131652661, "loss": 0.4141, "step": 5717 }, { "epoch": 3.194413407821229, "grad_norm": 0.7651337385177612, "learning_rate": 0.0008428011204481792, "loss": 0.481, "step": 5718 }, { "epoch": 3.194972067039106, "grad_norm": 0.5995467305183411, "learning_rate": 0.0008427731092436975, "loss": 0.5484, "step": 5719 }, { "epoch": 3.195530726256983, "grad_norm": 0.5191141963005066, "learning_rate": 0.0008427450980392157, "loss": 0.4855, "step": 5720 }, { "epoch": 3.19608938547486, "grad_norm": 0.6673374176025391, "learning_rate": 0.0008427170868347339, "loss": 0.5144, "step": 5721 }, { "epoch": 3.1966480446927372, "grad_norm": 0.4839000999927521, "learning_rate": 0.0008426890756302522, "loss": 0.4386, "step": 5722 }, { "epoch": 3.1972067039106147, "grad_norm": 0.9123650789260864, "learning_rate": 0.0008426610644257702, "loss": 0.6075, "step": 5723 }, { "epoch": 3.1977653631284917, "grad_norm": 0.48614388704299927, "learning_rate": 0.0008426330532212886, "loss": 0.4924, "step": 5724 }, { "epoch": 3.1983240223463687, "grad_norm": 0.4855598509311676, "learning_rate": 0.0008426050420168068, "loss": 0.5797, "step": 5725 }, { "epoch": 3.1988826815642457, "grad_norm": 0.5441485643386841, "learning_rate": 0.000842577030812325, "loss": 0.4406, "step": 5726 }, { "epoch": 3.1994413407821227, "grad_norm": 0.8743434548377991, "learning_rate": 0.0008425490196078432, "loss": 0.4153, "step": 5727 }, { "epoch": 3.2, "grad_norm": 0.5514986515045166, "learning_rate": 0.0008425210084033613, "loss": 0.3868, "step": 5728 }, { "epoch": 3.200558659217877, "grad_norm": 1.6775099039077759, "learning_rate": 0.0008424929971988796, "loss": 0.5614, "step": 5729 }, { "epoch": 3.201117318435754, "grad_norm": 0.5128558278083801, "learning_rate": 0.0008424649859943978, "loss": 0.5077, "step": 5730 }, { "epoch": 3.201675977653631, "grad_norm": 0.40318864583969116, "learning_rate": 0.000842436974789916, "loss": 0.3648, "step": 5731 }, { "epoch": 3.2022346368715082, "grad_norm": 0.4633824825286865, "learning_rate": 0.0008424089635854342, "loss": 0.4542, "step": 5732 }, { "epoch": 3.2027932960893857, "grad_norm": 1.8085352182388306, "learning_rate": 0.0008423809523809523, "loss": 0.4831, "step": 5733 }, { "epoch": 3.2033519553072627, "grad_norm": 0.5630502104759216, "learning_rate": 0.0008423529411764706, "loss": 0.4112, "step": 5734 }, { "epoch": 3.2039106145251397, "grad_norm": 2.502984046936035, "learning_rate": 0.0008423249299719888, "loss": 0.341, "step": 5735 }, { "epoch": 3.2044692737430167, "grad_norm": 0.5019913911819458, "learning_rate": 0.000842296918767507, "loss": 0.4526, "step": 5736 }, { "epoch": 3.2050279329608937, "grad_norm": 0.9250722527503967, "learning_rate": 0.0008422689075630252, "loss": 0.6133, "step": 5737 }, { "epoch": 3.205586592178771, "grad_norm": 0.7668030261993408, "learning_rate": 0.0008422408963585434, "loss": 0.4595, "step": 5738 }, { "epoch": 3.206145251396648, "grad_norm": 0.8034147024154663, "learning_rate": 0.0008422128851540617, "loss": 0.4791, "step": 5739 }, { "epoch": 3.206703910614525, "grad_norm": 0.5307127833366394, "learning_rate": 0.0008421848739495799, "loss": 0.5161, "step": 5740 }, { "epoch": 3.207262569832402, "grad_norm": 0.5297577977180481, "learning_rate": 0.0008421568627450981, "loss": 0.4318, "step": 5741 }, { "epoch": 3.207821229050279, "grad_norm": 1.2233542203903198, "learning_rate": 0.0008421288515406163, "loss": 0.3532, "step": 5742 }, { "epoch": 3.2083798882681562, "grad_norm": 0.7091529369354248, "learning_rate": 0.0008421008403361345, "loss": 0.4733, "step": 5743 }, { "epoch": 3.2089385474860337, "grad_norm": 0.7562621831893921, "learning_rate": 0.0008420728291316527, "loss": 0.4636, "step": 5744 }, { "epoch": 3.2094972067039107, "grad_norm": 3.031297445297241, "learning_rate": 0.0008420448179271709, "loss": 0.5458, "step": 5745 }, { "epoch": 3.2100558659217877, "grad_norm": 0.46593883633613586, "learning_rate": 0.0008420168067226891, "loss": 0.4396, "step": 5746 }, { "epoch": 3.2106145251396647, "grad_norm": 0.5773929357528687, "learning_rate": 0.0008419887955182073, "loss": 0.4911, "step": 5747 }, { "epoch": 3.2111731843575417, "grad_norm": 0.8325348496437073, "learning_rate": 0.0008419607843137255, "loss": 0.5775, "step": 5748 }, { "epoch": 3.211731843575419, "grad_norm": 0.6409006714820862, "learning_rate": 0.0008419327731092437, "loss": 0.4466, "step": 5749 }, { "epoch": 3.212290502793296, "grad_norm": 2.3184618949890137, "learning_rate": 0.0008419047619047619, "loss": 0.5867, "step": 5750 }, { "epoch": 3.212849162011173, "grad_norm": 0.8211964964866638, "learning_rate": 0.0008418767507002801, "loss": 0.5038, "step": 5751 }, { "epoch": 3.21340782122905, "grad_norm": 0.5444333553314209, "learning_rate": 0.0008418487394957983, "loss": 0.4761, "step": 5752 }, { "epoch": 3.2139664804469272, "grad_norm": 0.8410589694976807, "learning_rate": 0.0008418207282913165, "loss": 0.4396, "step": 5753 }, { "epoch": 3.2145251396648042, "grad_norm": 0.5327155590057373, "learning_rate": 0.0008417927170868349, "loss": 0.3801, "step": 5754 }, { "epoch": 3.2150837988826817, "grad_norm": 9.373048782348633, "learning_rate": 0.000841764705882353, "loss": 0.5984, "step": 5755 }, { "epoch": 3.2156424581005587, "grad_norm": 0.5760027170181274, "learning_rate": 0.0008417366946778712, "loss": 0.5275, "step": 5756 }, { "epoch": 3.2162011173184357, "grad_norm": 1.1095306873321533, "learning_rate": 0.0008417086834733894, "loss": 0.5204, "step": 5757 }, { "epoch": 3.2167597765363127, "grad_norm": 0.5383431911468506, "learning_rate": 0.0008416806722689076, "loss": 0.538, "step": 5758 }, { "epoch": 3.2173184357541897, "grad_norm": 0.9036241769790649, "learning_rate": 0.0008416526610644259, "loss": 0.5184, "step": 5759 }, { "epoch": 3.217877094972067, "grad_norm": 0.6109788417816162, "learning_rate": 0.000841624649859944, "loss": 0.52, "step": 5760 }, { "epoch": 3.218435754189944, "grad_norm": 0.6571570634841919, "learning_rate": 0.0008415966386554622, "loss": 0.5638, "step": 5761 }, { "epoch": 3.218994413407821, "grad_norm": 2.4715847969055176, "learning_rate": 0.0008415686274509804, "loss": 0.3864, "step": 5762 }, { "epoch": 3.2195530726256982, "grad_norm": 0.7069171667098999, "learning_rate": 0.0008415406162464986, "loss": 0.5143, "step": 5763 }, { "epoch": 3.2201117318435752, "grad_norm": 0.6125462055206299, "learning_rate": 0.0008415126050420169, "loss": 0.5237, "step": 5764 }, { "epoch": 3.2206703910614527, "grad_norm": 0.5367489457130432, "learning_rate": 0.000841484593837535, "loss": 0.466, "step": 5765 }, { "epoch": 3.2212290502793297, "grad_norm": 0.6129879951477051, "learning_rate": 0.0008414565826330532, "loss": 0.4102, "step": 5766 }, { "epoch": 3.2217877094972067, "grad_norm": 0.744072437286377, "learning_rate": 0.0008414285714285714, "loss": 0.4615, "step": 5767 }, { "epoch": 3.2223463687150837, "grad_norm": 1.3382662534713745, "learning_rate": 0.0008414005602240896, "loss": 0.581, "step": 5768 }, { "epoch": 3.2229050279329607, "grad_norm": 0.6672928929328918, "learning_rate": 0.0008413725490196079, "loss": 0.393, "step": 5769 }, { "epoch": 3.223463687150838, "grad_norm": 0.5600702166557312, "learning_rate": 0.0008413445378151261, "loss": 0.5429, "step": 5770 }, { "epoch": 3.224022346368715, "grad_norm": 0.5933088660240173, "learning_rate": 0.0008413165266106442, "loss": 0.4516, "step": 5771 }, { "epoch": 3.224581005586592, "grad_norm": 0.5212042927742004, "learning_rate": 0.0008412885154061625, "loss": 0.4062, "step": 5772 }, { "epoch": 3.2251396648044692, "grad_norm": 0.5427224636077881, "learning_rate": 0.0008412605042016807, "loss": 0.4972, "step": 5773 }, { "epoch": 3.2256983240223462, "grad_norm": 0.524664580821991, "learning_rate": 0.000841232492997199, "loss": 0.4396, "step": 5774 }, { "epoch": 3.2262569832402237, "grad_norm": 0.6468037962913513, "learning_rate": 0.0008412044817927172, "loss": 0.4934, "step": 5775 }, { "epoch": 3.2268156424581007, "grad_norm": 0.4983474910259247, "learning_rate": 0.0008411764705882353, "loss": 0.437, "step": 5776 }, { "epoch": 3.2273743016759777, "grad_norm": 1.0326396226882935, "learning_rate": 0.0008411484593837535, "loss": 0.4518, "step": 5777 }, { "epoch": 3.2279329608938547, "grad_norm": 0.8367441296577454, "learning_rate": 0.0008411204481792717, "loss": 0.4202, "step": 5778 }, { "epoch": 3.2284916201117317, "grad_norm": 1.5711349248886108, "learning_rate": 0.00084109243697479, "loss": 0.4431, "step": 5779 }, { "epoch": 3.2290502793296088, "grad_norm": 0.5931336879730225, "learning_rate": 0.0008410644257703082, "loss": 0.3531, "step": 5780 }, { "epoch": 3.229608938547486, "grad_norm": 0.7560968995094299, "learning_rate": 0.0008410364145658263, "loss": 0.5194, "step": 5781 }, { "epoch": 3.230167597765363, "grad_norm": 1.7650152444839478, "learning_rate": 0.0008410084033613445, "loss": 0.522, "step": 5782 }, { "epoch": 3.2307262569832402, "grad_norm": 0.871603786945343, "learning_rate": 0.0008409803921568627, "loss": 0.4031, "step": 5783 }, { "epoch": 3.2312849162011172, "grad_norm": 0.46541014313697815, "learning_rate": 0.000840952380952381, "loss": 0.3239, "step": 5784 }, { "epoch": 3.2318435754189943, "grad_norm": 0.5787938237190247, "learning_rate": 0.0008409243697478992, "loss": 0.4855, "step": 5785 }, { "epoch": 3.2324022346368717, "grad_norm": 1.032808780670166, "learning_rate": 0.0008408963585434174, "loss": 0.5314, "step": 5786 }, { "epoch": 3.2329608938547487, "grad_norm": 1.4101992845535278, "learning_rate": 0.0008408683473389355, "loss": 0.5148, "step": 5787 }, { "epoch": 3.2335195530726257, "grad_norm": 0.9198086857795715, "learning_rate": 0.0008408403361344537, "loss": 0.5617, "step": 5788 }, { "epoch": 3.2340782122905027, "grad_norm": 0.5087761878967285, "learning_rate": 0.0008408123249299721, "loss": 0.477, "step": 5789 }, { "epoch": 3.2346368715083798, "grad_norm": 0.8337647318840027, "learning_rate": 0.0008407843137254903, "loss": 0.5226, "step": 5790 }, { "epoch": 3.2351955307262568, "grad_norm": 0.7318830490112305, "learning_rate": 0.0008407563025210085, "loss": 0.4866, "step": 5791 }, { "epoch": 3.235754189944134, "grad_norm": 0.5144968628883362, "learning_rate": 0.0008407282913165266, "loss": 0.4411, "step": 5792 }, { "epoch": 3.2363128491620112, "grad_norm": 0.45033547282218933, "learning_rate": 0.0008407002801120448, "loss": 0.4586, "step": 5793 }, { "epoch": 3.2368715083798882, "grad_norm": 0.5863280296325684, "learning_rate": 0.0008406722689075631, "loss": 0.3774, "step": 5794 }, { "epoch": 3.2374301675977653, "grad_norm": 0.564110279083252, "learning_rate": 0.0008406442577030813, "loss": 0.4198, "step": 5795 }, { "epoch": 3.2379888268156423, "grad_norm": 0.548296332359314, "learning_rate": 0.0008406162464985995, "loss": 0.4525, "step": 5796 }, { "epoch": 3.2385474860335197, "grad_norm": 0.7311416268348694, "learning_rate": 0.0008405882352941176, "loss": 0.4919, "step": 5797 }, { "epoch": 3.2391061452513967, "grad_norm": 0.8921449184417725, "learning_rate": 0.0008405602240896358, "loss": 0.3881, "step": 5798 }, { "epoch": 3.2396648044692737, "grad_norm": 0.5490403175354004, "learning_rate": 0.000840532212885154, "loss": 0.4504, "step": 5799 }, { "epoch": 3.2402234636871508, "grad_norm": 0.44778934121131897, "learning_rate": 0.0008405042016806723, "loss": 0.4101, "step": 5800 }, { "epoch": 3.2407821229050278, "grad_norm": 0.504734992980957, "learning_rate": 0.0008404761904761905, "loss": 0.4183, "step": 5801 }, { "epoch": 3.241340782122905, "grad_norm": 0.7137935161590576, "learning_rate": 0.0008404481792717087, "loss": 0.5006, "step": 5802 }, { "epoch": 3.2418994413407822, "grad_norm": 0.6243535876274109, "learning_rate": 0.0008404201680672268, "loss": 0.5688, "step": 5803 }, { "epoch": 3.2424581005586592, "grad_norm": 0.5670920014381409, "learning_rate": 0.000840392156862745, "loss": 0.5508, "step": 5804 }, { "epoch": 3.2430167597765363, "grad_norm": 1.1637734174728394, "learning_rate": 0.0008403641456582634, "loss": 0.5075, "step": 5805 }, { "epoch": 3.2435754189944133, "grad_norm": 2.084158420562744, "learning_rate": 0.0008403361344537816, "loss": 0.4104, "step": 5806 }, { "epoch": 3.2441340782122907, "grad_norm": 0.5076479911804199, "learning_rate": 0.0008403081232492998, "loss": 0.4143, "step": 5807 }, { "epoch": 3.2446927374301677, "grad_norm": 0.5585202574729919, "learning_rate": 0.0008402801120448179, "loss": 0.408, "step": 5808 }, { "epoch": 3.2452513966480447, "grad_norm": 0.5491577386856079, "learning_rate": 0.0008402521008403361, "loss": 0.487, "step": 5809 }, { "epoch": 3.2458100558659218, "grad_norm": 0.6580778956413269, "learning_rate": 0.0008402240896358544, "loss": 0.5002, "step": 5810 }, { "epoch": 3.2463687150837988, "grad_norm": 0.820746123790741, "learning_rate": 0.0008401960784313726, "loss": 0.4189, "step": 5811 }, { "epoch": 3.2469273743016758, "grad_norm": 1.0066092014312744, "learning_rate": 0.0008401680672268908, "loss": 0.518, "step": 5812 }, { "epoch": 3.2474860335195532, "grad_norm": 0.4979892373085022, "learning_rate": 0.0008401400560224089, "loss": 0.4848, "step": 5813 }, { "epoch": 3.2480446927374302, "grad_norm": 0.4993119537830353, "learning_rate": 0.0008401120448179271, "loss": 0.4859, "step": 5814 }, { "epoch": 3.2486033519553073, "grad_norm": 0.8079336881637573, "learning_rate": 0.0008400840336134454, "loss": 0.425, "step": 5815 }, { "epoch": 3.2491620111731843, "grad_norm": 0.47287917137145996, "learning_rate": 0.0008400560224089636, "loss": 0.3969, "step": 5816 }, { "epoch": 3.2497206703910613, "grad_norm": 0.6306312084197998, "learning_rate": 0.0008400280112044818, "loss": 0.5888, "step": 5817 }, { "epoch": 3.2502793296089387, "grad_norm": 0.4639133810997009, "learning_rate": 0.00084, "loss": 0.3681, "step": 5818 }, { "epoch": 3.2508379888268157, "grad_norm": 0.3771038055419922, "learning_rate": 0.0008399719887955181, "loss": 0.341, "step": 5819 }, { "epoch": 3.2513966480446927, "grad_norm": 0.6737233996391296, "learning_rate": 0.0008399439775910364, "loss": 0.464, "step": 5820 }, { "epoch": 3.2519553072625698, "grad_norm": 0.6057330369949341, "learning_rate": 0.0008399159663865547, "loss": 0.4295, "step": 5821 }, { "epoch": 3.2525139664804468, "grad_norm": 0.5017884969711304, "learning_rate": 0.0008398879551820729, "loss": 0.5652, "step": 5822 }, { "epoch": 3.253072625698324, "grad_norm": 0.5959794521331787, "learning_rate": 0.0008398599439775911, "loss": 0.5196, "step": 5823 }, { "epoch": 3.2536312849162012, "grad_norm": 0.6592594385147095, "learning_rate": 0.0008398319327731092, "loss": 0.4486, "step": 5824 }, { "epoch": 3.2541899441340782, "grad_norm": 0.5312095880508423, "learning_rate": 0.0008398039215686275, "loss": 0.3249, "step": 5825 }, { "epoch": 3.2547486033519553, "grad_norm": 0.6078124642372131, "learning_rate": 0.0008397759103641457, "loss": 0.4297, "step": 5826 }, { "epoch": 3.2553072625698323, "grad_norm": 0.7393833994865417, "learning_rate": 0.0008397478991596639, "loss": 0.6271, "step": 5827 }, { "epoch": 3.2558659217877093, "grad_norm": 0.7218764424324036, "learning_rate": 0.0008397198879551821, "loss": 0.5153, "step": 5828 }, { "epoch": 3.2564245810055867, "grad_norm": 0.6063776612281799, "learning_rate": 0.0008396918767507002, "loss": 0.5261, "step": 5829 }, { "epoch": 3.2569832402234637, "grad_norm": 0.5681562423706055, "learning_rate": 0.0008396638655462185, "loss": 0.4595, "step": 5830 }, { "epoch": 3.2575418994413408, "grad_norm": 0.9323566555976868, "learning_rate": 0.0008396358543417367, "loss": 0.6893, "step": 5831 }, { "epoch": 3.2581005586592178, "grad_norm": 0.4728999733924866, "learning_rate": 0.0008396078431372549, "loss": 0.5198, "step": 5832 }, { "epoch": 3.258659217877095, "grad_norm": 0.5985539555549622, "learning_rate": 0.0008395798319327731, "loss": 0.4188, "step": 5833 }, { "epoch": 3.2592178770949722, "grad_norm": 0.8027599453926086, "learning_rate": 0.0008395518207282913, "loss": 0.4697, "step": 5834 }, { "epoch": 3.2597765363128492, "grad_norm": 0.5126614570617676, "learning_rate": 0.0008395238095238095, "loss": 0.4221, "step": 5835 }, { "epoch": 3.2603351955307263, "grad_norm": 0.6420572400093079, "learning_rate": 0.0008394957983193277, "loss": 0.5148, "step": 5836 }, { "epoch": 3.2608938547486033, "grad_norm": 0.7101128101348877, "learning_rate": 0.000839467787114846, "loss": 0.4463, "step": 5837 }, { "epoch": 3.2614525139664803, "grad_norm": 0.5522117018699646, "learning_rate": 0.0008394397759103642, "loss": 0.4005, "step": 5838 }, { "epoch": 3.2620111731843577, "grad_norm": 0.6390681862831116, "learning_rate": 0.0008394117647058824, "loss": 0.4511, "step": 5839 }, { "epoch": 3.2625698324022347, "grad_norm": 0.4235347509384155, "learning_rate": 0.0008393837535014006, "loss": 0.4698, "step": 5840 }, { "epoch": 3.2631284916201118, "grad_norm": 0.4609551727771759, "learning_rate": 0.0008393557422969188, "loss": 0.3928, "step": 5841 }, { "epoch": 3.2636871508379888, "grad_norm": 0.6046702265739441, "learning_rate": 0.000839327731092437, "loss": 0.4491, "step": 5842 }, { "epoch": 3.264245810055866, "grad_norm": 0.8087189197540283, "learning_rate": 0.0008392997198879552, "loss": 0.6472, "step": 5843 }, { "epoch": 3.2648044692737432, "grad_norm": 0.4118671119213104, "learning_rate": 0.0008392717086834734, "loss": 0.4232, "step": 5844 }, { "epoch": 3.2653631284916202, "grad_norm": 0.9294708967208862, "learning_rate": 0.0008392436974789917, "loss": 0.4206, "step": 5845 }, { "epoch": 3.2659217877094973, "grad_norm": 0.44083964824676514, "learning_rate": 0.0008392156862745098, "loss": 0.4222, "step": 5846 }, { "epoch": 3.2664804469273743, "grad_norm": 0.39109838008880615, "learning_rate": 0.000839187675070028, "loss": 0.3247, "step": 5847 }, { "epoch": 3.2670391061452513, "grad_norm": 0.7346558570861816, "learning_rate": 0.0008391596638655462, "loss": 0.3942, "step": 5848 }, { "epoch": 3.2675977653631287, "grad_norm": 1.5916489362716675, "learning_rate": 0.0008391316526610644, "loss": 0.4863, "step": 5849 }, { "epoch": 3.2681564245810057, "grad_norm": 0.4552851617336273, "learning_rate": 0.0008391036414565827, "loss": 0.4117, "step": 5850 }, { "epoch": 3.2687150837988828, "grad_norm": 0.6976941823959351, "learning_rate": 0.0008390756302521008, "loss": 0.4061, "step": 5851 }, { "epoch": 3.2692737430167598, "grad_norm": 0.9338297247886658, "learning_rate": 0.000839047619047619, "loss": 0.5003, "step": 5852 }, { "epoch": 3.269832402234637, "grad_norm": 0.7453265190124512, "learning_rate": 0.0008390196078431372, "loss": 0.4772, "step": 5853 }, { "epoch": 3.270391061452514, "grad_norm": 0.512266218662262, "learning_rate": 0.0008389915966386555, "loss": 0.4417, "step": 5854 }, { "epoch": 3.2709497206703912, "grad_norm": 0.45327645540237427, "learning_rate": 0.0008389635854341738, "loss": 0.4654, "step": 5855 }, { "epoch": 3.2715083798882683, "grad_norm": 0.3998764753341675, "learning_rate": 0.0008389355742296919, "loss": 0.4208, "step": 5856 }, { "epoch": 3.2720670391061453, "grad_norm": 0.7484354376792908, "learning_rate": 0.0008389075630252101, "loss": 0.4572, "step": 5857 }, { "epoch": 3.2726256983240223, "grad_norm": 0.6923760771751404, "learning_rate": 0.0008388795518207283, "loss": 0.4934, "step": 5858 }, { "epoch": 3.2731843575418993, "grad_norm": 0.8958445191383362, "learning_rate": 0.0008388515406162465, "loss": 0.6625, "step": 5859 }, { "epoch": 3.2737430167597763, "grad_norm": 1.4391804933547974, "learning_rate": 0.0008388235294117648, "loss": 0.5695, "step": 5860 }, { "epoch": 3.2743016759776538, "grad_norm": 0.5883399844169617, "learning_rate": 0.000838795518207283, "loss": 0.4544, "step": 5861 }, { "epoch": 3.2748603351955308, "grad_norm": 0.6043676137924194, "learning_rate": 0.0008387675070028011, "loss": 0.4684, "step": 5862 }, { "epoch": 3.275418994413408, "grad_norm": 0.949695348739624, "learning_rate": 0.0008387394957983193, "loss": 0.4743, "step": 5863 }, { "epoch": 3.275977653631285, "grad_norm": 0.4443172812461853, "learning_rate": 0.0008387114845938375, "loss": 0.3997, "step": 5864 }, { "epoch": 3.276536312849162, "grad_norm": 0.8300796151161194, "learning_rate": 0.0008386834733893558, "loss": 0.5845, "step": 5865 }, { "epoch": 3.2770949720670393, "grad_norm": 0.6482070684432983, "learning_rate": 0.000838655462184874, "loss": 0.4247, "step": 5866 }, { "epoch": 3.2776536312849163, "grad_norm": 2.993833065032959, "learning_rate": 0.0008386274509803921, "loss": 0.5309, "step": 5867 }, { "epoch": 3.2782122905027933, "grad_norm": 0.6275423765182495, "learning_rate": 0.0008385994397759103, "loss": 0.48, "step": 5868 }, { "epoch": 3.2787709497206703, "grad_norm": 0.7693106532096863, "learning_rate": 0.0008385714285714285, "loss": 0.507, "step": 5869 }, { "epoch": 3.2793296089385473, "grad_norm": 0.4466283321380615, "learning_rate": 0.0008385434173669469, "loss": 0.3821, "step": 5870 }, { "epoch": 3.2798882681564248, "grad_norm": 3.289853572845459, "learning_rate": 0.0008385154061624651, "loss": 0.4186, "step": 5871 }, { "epoch": 3.2804469273743018, "grad_norm": 0.6237671375274658, "learning_rate": 0.0008384873949579832, "loss": 0.487, "step": 5872 }, { "epoch": 3.281005586592179, "grad_norm": 0.9812585711479187, "learning_rate": 0.0008384593837535014, "loss": 0.5444, "step": 5873 }, { "epoch": 3.281564245810056, "grad_norm": 2.3811252117156982, "learning_rate": 0.0008384313725490196, "loss": 0.4848, "step": 5874 }, { "epoch": 3.282122905027933, "grad_norm": 0.8060725331306458, "learning_rate": 0.0008384033613445379, "loss": 0.4601, "step": 5875 }, { "epoch": 3.2826815642458103, "grad_norm": 0.9154607057571411, "learning_rate": 0.0008383753501400561, "loss": 0.4821, "step": 5876 }, { "epoch": 3.2832402234636873, "grad_norm": 1.448519229888916, "learning_rate": 0.0008383473389355743, "loss": 0.3754, "step": 5877 }, { "epoch": 3.2837988826815643, "grad_norm": 0.47846508026123047, "learning_rate": 0.0008383193277310924, "loss": 0.4222, "step": 5878 }, { "epoch": 3.2843575418994413, "grad_norm": 0.7119501829147339, "learning_rate": 0.0008382913165266106, "loss": 0.5035, "step": 5879 }, { "epoch": 3.2849162011173183, "grad_norm": 0.6041054725646973, "learning_rate": 0.0008382633053221289, "loss": 0.5721, "step": 5880 }, { "epoch": 3.2854748603351958, "grad_norm": 0.8183274865150452, "learning_rate": 0.0008382352941176471, "loss": 0.5763, "step": 5881 }, { "epoch": 3.2860335195530728, "grad_norm": 0.5485641360282898, "learning_rate": 0.0008382072829131653, "loss": 0.5318, "step": 5882 }, { "epoch": 3.28659217877095, "grad_norm": 0.9233376383781433, "learning_rate": 0.0008381792717086834, "loss": 0.4954, "step": 5883 }, { "epoch": 3.287150837988827, "grad_norm": 0.6849166750907898, "learning_rate": 0.0008381512605042016, "loss": 0.4896, "step": 5884 }, { "epoch": 3.287709497206704, "grad_norm": 0.7204636335372925, "learning_rate": 0.0008381232492997199, "loss": 0.4403, "step": 5885 }, { "epoch": 3.288268156424581, "grad_norm": 0.9672218561172485, "learning_rate": 0.0008380952380952382, "loss": 0.4994, "step": 5886 }, { "epoch": 3.2888268156424583, "grad_norm": 0.5536984205245972, "learning_rate": 0.0008380672268907564, "loss": 0.4526, "step": 5887 }, { "epoch": 3.2893854748603353, "grad_norm": 0.6521371006965637, "learning_rate": 0.0008380392156862745, "loss": 0.4508, "step": 5888 }, { "epoch": 3.2899441340782123, "grad_norm": 3.3169729709625244, "learning_rate": 0.0008380112044817927, "loss": 0.4629, "step": 5889 }, { "epoch": 3.2905027932960893, "grad_norm": 2.767110586166382, "learning_rate": 0.000837983193277311, "loss": 0.4126, "step": 5890 }, { "epoch": 3.2910614525139663, "grad_norm": 0.7629807591438293, "learning_rate": 0.0008379551820728292, "loss": 0.5102, "step": 5891 }, { "epoch": 3.2916201117318438, "grad_norm": 0.6200615763664246, "learning_rate": 0.0008379271708683474, "loss": 0.6001, "step": 5892 }, { "epoch": 3.292178770949721, "grad_norm": 0.524581789970398, "learning_rate": 0.0008378991596638656, "loss": 0.3683, "step": 5893 }, { "epoch": 3.292737430167598, "grad_norm": 1.2102714776992798, "learning_rate": 0.0008378711484593837, "loss": 0.6946, "step": 5894 }, { "epoch": 3.293296089385475, "grad_norm": 0.4404248297214508, "learning_rate": 0.000837843137254902, "loss": 0.4063, "step": 5895 }, { "epoch": 3.293854748603352, "grad_norm": 0.6545925140380859, "learning_rate": 0.0008378151260504202, "loss": 0.3816, "step": 5896 }, { "epoch": 3.294413407821229, "grad_norm": 1.7304235696792603, "learning_rate": 0.0008377871148459384, "loss": 0.4554, "step": 5897 }, { "epoch": 3.2949720670391063, "grad_norm": 0.5310423374176025, "learning_rate": 0.0008377591036414566, "loss": 0.577, "step": 5898 }, { "epoch": 3.2955307262569833, "grad_norm": 0.4148930311203003, "learning_rate": 0.0008377310924369747, "loss": 0.4106, "step": 5899 }, { "epoch": 3.2960893854748603, "grad_norm": 0.5346720218658447, "learning_rate": 0.000837703081232493, "loss": 0.4965, "step": 5900 }, { "epoch": 3.2966480446927373, "grad_norm": 0.535318911075592, "learning_rate": 0.0008376750700280112, "loss": 0.5348, "step": 5901 }, { "epoch": 3.2972067039106143, "grad_norm": 1.3257360458374023, "learning_rate": 0.0008376470588235294, "loss": 0.4411, "step": 5902 }, { "epoch": 3.2977653631284918, "grad_norm": 0.7085011601448059, "learning_rate": 0.0008376190476190477, "loss": 0.5034, "step": 5903 }, { "epoch": 3.298324022346369, "grad_norm": 0.6488482356071472, "learning_rate": 0.0008375910364145658, "loss": 0.4438, "step": 5904 }, { "epoch": 3.298882681564246, "grad_norm": 0.8926328420639038, "learning_rate": 0.0008375630252100841, "loss": 0.3763, "step": 5905 }, { "epoch": 3.299441340782123, "grad_norm": 0.6652448773384094, "learning_rate": 0.0008375350140056023, "loss": 0.5289, "step": 5906 }, { "epoch": 3.3, "grad_norm": 0.5910142660140991, "learning_rate": 0.0008375070028011205, "loss": 0.435, "step": 5907 }, { "epoch": 3.3005586592178773, "grad_norm": 0.621354341506958, "learning_rate": 0.0008374789915966387, "loss": 0.4935, "step": 5908 }, { "epoch": 3.3011173184357543, "grad_norm": 6.71200704574585, "learning_rate": 0.0008374509803921569, "loss": 0.4426, "step": 5909 }, { "epoch": 3.3016759776536313, "grad_norm": 0.4947996139526367, "learning_rate": 0.0008374229691876751, "loss": 0.3556, "step": 5910 }, { "epoch": 3.3022346368715083, "grad_norm": 0.4281250834465027, "learning_rate": 0.0008373949579831933, "loss": 0.4049, "step": 5911 }, { "epoch": 3.3027932960893853, "grad_norm": 0.7397978901863098, "learning_rate": 0.0008373669467787115, "loss": 0.5779, "step": 5912 }, { "epoch": 3.3033519553072628, "grad_norm": 9.742897987365723, "learning_rate": 0.0008373389355742297, "loss": 0.5392, "step": 5913 }, { "epoch": 3.30391061452514, "grad_norm": 0.8706209659576416, "learning_rate": 0.0008373109243697479, "loss": 0.451, "step": 5914 }, { "epoch": 3.304469273743017, "grad_norm": 0.49004945158958435, "learning_rate": 0.0008372829131652661, "loss": 0.5001, "step": 5915 }, { "epoch": 3.305027932960894, "grad_norm": 1.3001060485839844, "learning_rate": 0.0008372549019607843, "loss": 0.5163, "step": 5916 }, { "epoch": 3.305586592178771, "grad_norm": 0.7620853185653687, "learning_rate": 0.0008372268907563025, "loss": 0.486, "step": 5917 }, { "epoch": 3.3061452513966483, "grad_norm": 0.6356363892555237, "learning_rate": 0.0008371988795518207, "loss": 0.6636, "step": 5918 }, { "epoch": 3.3067039106145253, "grad_norm": 0.45684704184532166, "learning_rate": 0.000837170868347339, "loss": 0.4321, "step": 5919 }, { "epoch": 3.3072625698324023, "grad_norm": 0.5030642151832581, "learning_rate": 0.0008371428571428572, "loss": 0.5827, "step": 5920 }, { "epoch": 3.3078212290502793, "grad_norm": 0.9501070976257324, "learning_rate": 0.0008371148459383754, "loss": 0.9152, "step": 5921 }, { "epoch": 3.3083798882681563, "grad_norm": 1.2457044124603271, "learning_rate": 0.0008370868347338936, "loss": 0.4704, "step": 5922 }, { "epoch": 3.3089385474860333, "grad_norm": 0.5496012568473816, "learning_rate": 0.0008370588235294118, "loss": 0.3784, "step": 5923 }, { "epoch": 3.309497206703911, "grad_norm": 0.775839626789093, "learning_rate": 0.00083703081232493, "loss": 0.5187, "step": 5924 }, { "epoch": 3.310055865921788, "grad_norm": 0.7287942171096802, "learning_rate": 0.0008370028011204483, "loss": 0.4446, "step": 5925 }, { "epoch": 3.310614525139665, "grad_norm": 1.3466728925704956, "learning_rate": 0.0008369747899159664, "loss": 0.501, "step": 5926 }, { "epoch": 3.311173184357542, "grad_norm": 0.7960771322250366, "learning_rate": 0.0008369467787114846, "loss": 0.624, "step": 5927 }, { "epoch": 3.311731843575419, "grad_norm": 0.6292746663093567, "learning_rate": 0.0008369187675070028, "loss": 0.5935, "step": 5928 }, { "epoch": 3.312290502793296, "grad_norm": 0.4995972812175751, "learning_rate": 0.000836890756302521, "loss": 0.4782, "step": 5929 }, { "epoch": 3.3128491620111733, "grad_norm": 0.7585958242416382, "learning_rate": 0.0008368627450980393, "loss": 0.4901, "step": 5930 }, { "epoch": 3.3134078212290503, "grad_norm": 0.7725650072097778, "learning_rate": 0.0008368347338935574, "loss": 0.4647, "step": 5931 }, { "epoch": 3.3139664804469273, "grad_norm": 0.5303656458854675, "learning_rate": 0.0008368067226890756, "loss": 0.4532, "step": 5932 }, { "epoch": 3.3145251396648043, "grad_norm": 0.487987756729126, "learning_rate": 0.0008367787114845938, "loss": 0.4656, "step": 5933 }, { "epoch": 3.3150837988826813, "grad_norm": 0.6956318020820618, "learning_rate": 0.000836750700280112, "loss": 0.5001, "step": 5934 }, { "epoch": 3.315642458100559, "grad_norm": 0.7788981795310974, "learning_rate": 0.0008367226890756304, "loss": 0.508, "step": 5935 }, { "epoch": 3.316201117318436, "grad_norm": 0.5326288342475891, "learning_rate": 0.0008366946778711485, "loss": 0.4932, "step": 5936 }, { "epoch": 3.316759776536313, "grad_norm": 0.6776241064071655, "learning_rate": 0.0008366666666666667, "loss": 0.632, "step": 5937 }, { "epoch": 3.31731843575419, "grad_norm": 1.221423625946045, "learning_rate": 0.0008366386554621849, "loss": 0.545, "step": 5938 }, { "epoch": 3.317877094972067, "grad_norm": 0.6130251288414001, "learning_rate": 0.0008366106442577031, "loss": 0.5382, "step": 5939 }, { "epoch": 3.3184357541899443, "grad_norm": 0.5487998723983765, "learning_rate": 0.0008365826330532214, "loss": 0.4817, "step": 5940 }, { "epoch": 3.3189944134078213, "grad_norm": 0.3733319640159607, "learning_rate": 0.0008365546218487396, "loss": 0.4541, "step": 5941 }, { "epoch": 3.3195530726256983, "grad_norm": 0.676230251789093, "learning_rate": 0.0008365266106442577, "loss": 0.4223, "step": 5942 }, { "epoch": 3.3201117318435753, "grad_norm": 1.0991166830062866, "learning_rate": 0.0008364985994397759, "loss": 0.7488, "step": 5943 }, { "epoch": 3.3206703910614523, "grad_norm": 0.7113136053085327, "learning_rate": 0.0008364705882352941, "loss": 0.4664, "step": 5944 }, { "epoch": 3.32122905027933, "grad_norm": 0.6897918581962585, "learning_rate": 0.0008364425770308124, "loss": 0.5825, "step": 5945 }, { "epoch": 3.321787709497207, "grad_norm": 0.740591824054718, "learning_rate": 0.0008364145658263306, "loss": 0.5044, "step": 5946 }, { "epoch": 3.322346368715084, "grad_norm": 0.5311262607574463, "learning_rate": 0.0008363865546218487, "loss": 0.4535, "step": 5947 }, { "epoch": 3.322905027932961, "grad_norm": 0.495380699634552, "learning_rate": 0.0008363585434173669, "loss": 0.5506, "step": 5948 }, { "epoch": 3.323463687150838, "grad_norm": 0.5739846229553223, "learning_rate": 0.0008363305322128851, "loss": 0.4776, "step": 5949 }, { "epoch": 3.3240223463687153, "grad_norm": 0.4468778669834137, "learning_rate": 0.0008363025210084034, "loss": 0.4511, "step": 5950 }, { "epoch": 3.3245810055865923, "grad_norm": 0.4775971472263336, "learning_rate": 0.0008362745098039216, "loss": 0.4567, "step": 5951 }, { "epoch": 3.3251396648044693, "grad_norm": 0.6873248219490051, "learning_rate": 0.0008362464985994397, "loss": 0.4802, "step": 5952 }, { "epoch": 3.3256983240223463, "grad_norm": 0.539291262626648, "learning_rate": 0.000836218487394958, "loss": 0.5324, "step": 5953 }, { "epoch": 3.3262569832402233, "grad_norm": 0.6163298487663269, "learning_rate": 0.0008361904761904762, "loss": 0.5937, "step": 5954 }, { "epoch": 3.326815642458101, "grad_norm": 0.664472758769989, "learning_rate": 0.0008361624649859945, "loss": 0.536, "step": 5955 }, { "epoch": 3.327374301675978, "grad_norm": 0.7042726874351501, "learning_rate": 0.0008361344537815127, "loss": 0.4491, "step": 5956 }, { "epoch": 3.327932960893855, "grad_norm": 0.4764694273471832, "learning_rate": 0.0008361064425770309, "loss": 0.4625, "step": 5957 }, { "epoch": 3.328491620111732, "grad_norm": 0.5840118527412415, "learning_rate": 0.000836078431372549, "loss": 0.5109, "step": 5958 }, { "epoch": 3.329050279329609, "grad_norm": 0.5546570420265198, "learning_rate": 0.0008360504201680672, "loss": 0.3827, "step": 5959 }, { "epoch": 3.329608938547486, "grad_norm": 0.49044451117515564, "learning_rate": 0.0008360224089635855, "loss": 0.3845, "step": 5960 }, { "epoch": 3.3301675977653633, "grad_norm": 0.7842701077461243, "learning_rate": 0.0008359943977591037, "loss": 0.4405, "step": 5961 }, { "epoch": 3.3307262569832403, "grad_norm": 0.5469485521316528, "learning_rate": 0.0008359663865546219, "loss": 0.4293, "step": 5962 }, { "epoch": 3.3312849162011173, "grad_norm": 0.5998361110687256, "learning_rate": 0.00083593837535014, "loss": 0.4219, "step": 5963 }, { "epoch": 3.3318435754189943, "grad_norm": 0.42217960953712463, "learning_rate": 0.0008359103641456582, "loss": 0.4884, "step": 5964 }, { "epoch": 3.3324022346368714, "grad_norm": 0.43354055285453796, "learning_rate": 0.0008358823529411765, "loss": 0.3874, "step": 5965 }, { "epoch": 3.3329608938547484, "grad_norm": 0.492789089679718, "learning_rate": 0.0008358543417366947, "loss": 0.445, "step": 5966 }, { "epoch": 3.333519553072626, "grad_norm": 1.0337791442871094, "learning_rate": 0.0008358263305322129, "loss": 0.5195, "step": 5967 }, { "epoch": 3.334078212290503, "grad_norm": 0.5146295428276062, "learning_rate": 0.000835798319327731, "loss": 0.4475, "step": 5968 }, { "epoch": 3.33463687150838, "grad_norm": 2.359459638595581, "learning_rate": 0.0008357703081232492, "loss": 0.4291, "step": 5969 }, { "epoch": 3.335195530726257, "grad_norm": 1.1305863857269287, "learning_rate": 0.0008357422969187676, "loss": 0.478, "step": 5970 }, { "epoch": 3.335754189944134, "grad_norm": 1.1479641199111938, "learning_rate": 0.0008357142857142858, "loss": 0.4636, "step": 5971 }, { "epoch": 3.3363128491620113, "grad_norm": 0.5999601483345032, "learning_rate": 0.000835686274509804, "loss": 0.4708, "step": 5972 }, { "epoch": 3.3368715083798883, "grad_norm": 0.4380541741847992, "learning_rate": 0.0008356582633053222, "loss": 0.4034, "step": 5973 }, { "epoch": 3.3374301675977653, "grad_norm": 0.5763838291168213, "learning_rate": 0.0008356302521008403, "loss": 0.4328, "step": 5974 }, { "epoch": 3.3379888268156424, "grad_norm": 1.615910291671753, "learning_rate": 0.0008356022408963586, "loss": 0.6219, "step": 5975 }, { "epoch": 3.3385474860335194, "grad_norm": 1.3738059997558594, "learning_rate": 0.0008355742296918768, "loss": 0.4257, "step": 5976 }, { "epoch": 3.339106145251397, "grad_norm": 0.6873733997344971, "learning_rate": 0.000835546218487395, "loss": 0.574, "step": 5977 }, { "epoch": 3.339664804469274, "grad_norm": 0.5822592377662659, "learning_rate": 0.0008355182072829132, "loss": 0.4977, "step": 5978 }, { "epoch": 3.340223463687151, "grad_norm": 0.7007447481155396, "learning_rate": 0.0008354901960784313, "loss": 0.4804, "step": 5979 }, { "epoch": 3.340782122905028, "grad_norm": 0.5462225675582886, "learning_rate": 0.0008354621848739496, "loss": 0.4344, "step": 5980 }, { "epoch": 3.341340782122905, "grad_norm": 0.4206903278827667, "learning_rate": 0.0008354341736694678, "loss": 0.369, "step": 5981 }, { "epoch": 3.3418994413407823, "grad_norm": 0.7577139735221863, "learning_rate": 0.000835406162464986, "loss": 0.5953, "step": 5982 }, { "epoch": 3.3424581005586593, "grad_norm": 0.4644266963005066, "learning_rate": 0.0008353781512605042, "loss": 0.4187, "step": 5983 }, { "epoch": 3.3430167597765363, "grad_norm": 0.8171898126602173, "learning_rate": 0.0008353501400560223, "loss": 0.3725, "step": 5984 }, { "epoch": 3.3435754189944134, "grad_norm": 0.5116392374038696, "learning_rate": 0.0008353221288515407, "loss": 0.434, "step": 5985 }, { "epoch": 3.3441340782122904, "grad_norm": 0.5752791166305542, "learning_rate": 0.0008352941176470589, "loss": 0.4831, "step": 5986 }, { "epoch": 3.344692737430168, "grad_norm": 0.5015922784805298, "learning_rate": 0.0008352661064425771, "loss": 0.416, "step": 5987 }, { "epoch": 3.345251396648045, "grad_norm": 0.41650280356407166, "learning_rate": 0.0008352380952380953, "loss": 0.4374, "step": 5988 }, { "epoch": 3.345810055865922, "grad_norm": 0.5924286246299744, "learning_rate": 0.0008352100840336135, "loss": 0.4617, "step": 5989 }, { "epoch": 3.346368715083799, "grad_norm": 0.51353520154953, "learning_rate": 0.0008351820728291317, "loss": 0.4073, "step": 5990 }, { "epoch": 3.346927374301676, "grad_norm": 0.44078338146209717, "learning_rate": 0.0008351540616246499, "loss": 0.4688, "step": 5991 }, { "epoch": 3.3474860335195533, "grad_norm": 5.740250587463379, "learning_rate": 0.0008351260504201681, "loss": 0.4629, "step": 5992 }, { "epoch": 3.3480446927374303, "grad_norm": 0.580997884273529, "learning_rate": 0.0008350980392156863, "loss": 0.5854, "step": 5993 }, { "epoch": 3.3486033519553073, "grad_norm": 0.7381086349487305, "learning_rate": 0.0008350700280112045, "loss": 0.5432, "step": 5994 }, { "epoch": 3.3491620111731844, "grad_norm": 1.129784345626831, "learning_rate": 0.0008350420168067227, "loss": 0.5235, "step": 5995 }, { "epoch": 3.3497206703910614, "grad_norm": 0.6231741905212402, "learning_rate": 0.0008350140056022409, "loss": 0.5439, "step": 5996 }, { "epoch": 3.3502793296089384, "grad_norm": 0.8671806454658508, "learning_rate": 0.0008349859943977591, "loss": 0.4991, "step": 5997 }, { "epoch": 3.350837988826816, "grad_norm": 0.4410226345062256, "learning_rate": 0.0008349579831932773, "loss": 0.4581, "step": 5998 }, { "epoch": 3.351396648044693, "grad_norm": 0.445648193359375, "learning_rate": 0.0008349299719887955, "loss": 0.4117, "step": 5999 }, { "epoch": 3.35195530726257, "grad_norm": 0.4277268946170807, "learning_rate": 0.0008349019607843137, "loss": 0.3061, "step": 6000 }, { "epoch": 3.35195530726257, "eval_cer": 0.09412130535825342, "eval_loss": 0.35406970977783203, "eval_runtime": 55.3464, "eval_samples_per_second": 81.993, "eval_steps_per_second": 5.131, "eval_wer": 0.3714142063070864, "step": 6000 }, { "epoch": 3.352513966480447, "grad_norm": 0.6357645988464355, "learning_rate": 0.000834873949579832, "loss": 0.3904, "step": 6001 }, { "epoch": 3.353072625698324, "grad_norm": 0.7088366746902466, "learning_rate": 0.0008348459383753502, "loss": 0.4961, "step": 6002 }, { "epoch": 3.353631284916201, "grad_norm": 0.8757784962654114, "learning_rate": 0.0008348179271708684, "loss": 0.3754, "step": 6003 }, { "epoch": 3.3541899441340783, "grad_norm": 0.9190798401832581, "learning_rate": 0.0008347899159663866, "loss": 0.5937, "step": 6004 }, { "epoch": 3.3547486033519553, "grad_norm": 1.9690254926681519, "learning_rate": 0.0008347619047619049, "loss": 0.477, "step": 6005 }, { "epoch": 3.3553072625698324, "grad_norm": 0.48200395703315735, "learning_rate": 0.000834733893557423, "loss": 0.5189, "step": 6006 }, { "epoch": 3.3558659217877094, "grad_norm": 0.3942033648490906, "learning_rate": 0.0008347058823529412, "loss": 0.4153, "step": 6007 }, { "epoch": 3.3564245810055864, "grad_norm": 0.6851130127906799, "learning_rate": 0.0008346778711484594, "loss": 0.4621, "step": 6008 }, { "epoch": 3.356983240223464, "grad_norm": 0.7886162996292114, "learning_rate": 0.0008346498599439776, "loss": 0.5397, "step": 6009 }, { "epoch": 3.357541899441341, "grad_norm": 1.5325363874435425, "learning_rate": 0.0008346218487394959, "loss": 0.3924, "step": 6010 }, { "epoch": 3.358100558659218, "grad_norm": 1.0592213869094849, "learning_rate": 0.000834593837535014, "loss": 0.4683, "step": 6011 }, { "epoch": 3.358659217877095, "grad_norm": 0.8366856575012207, "learning_rate": 0.0008345658263305322, "loss": 0.4993, "step": 6012 }, { "epoch": 3.359217877094972, "grad_norm": 0.7793118357658386, "learning_rate": 0.0008345378151260504, "loss": 0.4538, "step": 6013 }, { "epoch": 3.3597765363128493, "grad_norm": 0.5727178454399109, "learning_rate": 0.0008345098039215686, "loss": 0.6036, "step": 6014 }, { "epoch": 3.3603351955307263, "grad_norm": 1.0506523847579956, "learning_rate": 0.0008344817927170869, "loss": 0.4593, "step": 6015 }, { "epoch": 3.3608938547486034, "grad_norm": 1.3926310539245605, "learning_rate": 0.000834453781512605, "loss": 0.5351, "step": 6016 }, { "epoch": 3.3614525139664804, "grad_norm": 0.6975616812705994, "learning_rate": 0.0008344257703081232, "loss": 0.4663, "step": 6017 }, { "epoch": 3.3620111731843574, "grad_norm": 0.5686214566230774, "learning_rate": 0.0008343977591036415, "loss": 0.4689, "step": 6018 }, { "epoch": 3.362569832402235, "grad_norm": 0.4696175456047058, "learning_rate": 0.0008343697478991597, "loss": 0.4944, "step": 6019 }, { "epoch": 3.363128491620112, "grad_norm": 0.5842403769493103, "learning_rate": 0.0008343417366946779, "loss": 0.544, "step": 6020 }, { "epoch": 3.363687150837989, "grad_norm": 0.5163576006889343, "learning_rate": 0.0008343137254901962, "loss": 0.473, "step": 6021 }, { "epoch": 3.364245810055866, "grad_norm": 0.43924614787101746, "learning_rate": 0.0008342857142857143, "loss": 0.4131, "step": 6022 }, { "epoch": 3.364804469273743, "grad_norm": 0.4216912090778351, "learning_rate": 0.0008342577030812325, "loss": 0.4767, "step": 6023 }, { "epoch": 3.3653631284916203, "grad_norm": 1.3931881189346313, "learning_rate": 0.0008342296918767507, "loss": 0.4433, "step": 6024 }, { "epoch": 3.3659217877094973, "grad_norm": 0.5150628685951233, "learning_rate": 0.0008342016806722689, "loss": 0.5303, "step": 6025 }, { "epoch": 3.3664804469273744, "grad_norm": 0.40338414907455444, "learning_rate": 0.0008341736694677872, "loss": 0.4618, "step": 6026 }, { "epoch": 3.3670391061452514, "grad_norm": 0.6179518699645996, "learning_rate": 0.0008341456582633053, "loss": 0.4445, "step": 6027 }, { "epoch": 3.3675977653631284, "grad_norm": 0.5551663041114807, "learning_rate": 0.0008341176470588235, "loss": 0.4725, "step": 6028 }, { "epoch": 3.3681564245810054, "grad_norm": 0.8116632699966431, "learning_rate": 0.0008340896358543417, "loss": 0.4221, "step": 6029 }, { "epoch": 3.368715083798883, "grad_norm": 1.5497699975967407, "learning_rate": 0.0008340616246498599, "loss": 0.4863, "step": 6030 }, { "epoch": 3.36927374301676, "grad_norm": 0.5297819375991821, "learning_rate": 0.0008340336134453782, "loss": 0.5314, "step": 6031 }, { "epoch": 3.369832402234637, "grad_norm": 1.4641435146331787, "learning_rate": 0.0008340056022408963, "loss": 0.4431, "step": 6032 }, { "epoch": 3.370391061452514, "grad_norm": 0.5053655505180359, "learning_rate": 0.0008339775910364145, "loss": 0.4246, "step": 6033 }, { "epoch": 3.370949720670391, "grad_norm": 0.5946944952011108, "learning_rate": 0.0008339495798319327, "loss": 0.5037, "step": 6034 }, { "epoch": 3.3715083798882683, "grad_norm": 0.5260332226753235, "learning_rate": 0.000833921568627451, "loss": 0.4625, "step": 6035 }, { "epoch": 3.3720670391061454, "grad_norm": 0.5531105399131775, "learning_rate": 0.0008338935574229693, "loss": 0.4822, "step": 6036 }, { "epoch": 3.3726256983240224, "grad_norm": 0.6000711917877197, "learning_rate": 0.0008338655462184875, "loss": 0.571, "step": 6037 }, { "epoch": 3.3731843575418994, "grad_norm": 0.4021860659122467, "learning_rate": 0.0008338375350140056, "loss": 0.5426, "step": 6038 }, { "epoch": 3.3737430167597764, "grad_norm": 1.9584848880767822, "learning_rate": 0.0008338095238095238, "loss": 0.5339, "step": 6039 }, { "epoch": 3.3743016759776534, "grad_norm": 1.7087459564208984, "learning_rate": 0.000833781512605042, "loss": 0.3887, "step": 6040 }, { "epoch": 3.374860335195531, "grad_norm": 0.6639626622200012, "learning_rate": 0.0008337535014005603, "loss": 0.5081, "step": 6041 }, { "epoch": 3.375418994413408, "grad_norm": 0.48255738615989685, "learning_rate": 0.0008337254901960785, "loss": 0.4183, "step": 6042 }, { "epoch": 3.375977653631285, "grad_norm": 0.6156942248344421, "learning_rate": 0.0008336974789915966, "loss": 0.368, "step": 6043 }, { "epoch": 3.376536312849162, "grad_norm": 0.5803930163383484, "learning_rate": 0.0008336694677871148, "loss": 0.4946, "step": 6044 }, { "epoch": 3.377094972067039, "grad_norm": 0.5024144649505615, "learning_rate": 0.000833641456582633, "loss": 0.4009, "step": 6045 }, { "epoch": 3.3776536312849164, "grad_norm": 1.3605116605758667, "learning_rate": 0.0008336134453781513, "loss": 0.4125, "step": 6046 }, { "epoch": 3.3782122905027934, "grad_norm": 0.5708194971084595, "learning_rate": 0.0008335854341736695, "loss": 0.4672, "step": 6047 }, { "epoch": 3.3787709497206704, "grad_norm": 3.3018643856048584, "learning_rate": 0.0008335574229691876, "loss": 0.5313, "step": 6048 }, { "epoch": 3.3793296089385474, "grad_norm": 2.0821480751037598, "learning_rate": 0.0008335294117647058, "loss": 0.5199, "step": 6049 }, { "epoch": 3.3798882681564244, "grad_norm": 0.7997511625289917, "learning_rate": 0.000833501400560224, "loss": 0.4057, "step": 6050 }, { "epoch": 3.380446927374302, "grad_norm": 0.5062386989593506, "learning_rate": 0.0008334733893557424, "loss": 0.3939, "step": 6051 }, { "epoch": 3.381005586592179, "grad_norm": 1.694746971130371, "learning_rate": 0.0008334453781512606, "loss": 0.41, "step": 6052 }, { "epoch": 3.381564245810056, "grad_norm": 0.704200804233551, "learning_rate": 0.0008334173669467788, "loss": 0.4948, "step": 6053 }, { "epoch": 3.382122905027933, "grad_norm": 0.5387531518936157, "learning_rate": 0.0008333893557422969, "loss": 0.5413, "step": 6054 }, { "epoch": 3.38268156424581, "grad_norm": 0.6832190752029419, "learning_rate": 0.0008333613445378151, "loss": 0.5196, "step": 6055 }, { "epoch": 3.3832402234636874, "grad_norm": 0.564065158367157, "learning_rate": 0.0008333333333333334, "loss": 0.383, "step": 6056 }, { "epoch": 3.3837988826815644, "grad_norm": 1.477967619895935, "learning_rate": 0.0008333053221288516, "loss": 0.3982, "step": 6057 }, { "epoch": 3.3843575418994414, "grad_norm": 1.6309324502944946, "learning_rate": 0.0008332773109243698, "loss": 0.4391, "step": 6058 }, { "epoch": 3.3849162011173184, "grad_norm": 2.5116946697235107, "learning_rate": 0.0008332492997198879, "loss": 0.4056, "step": 6059 }, { "epoch": 3.3854748603351954, "grad_norm": 1.6525691747665405, "learning_rate": 0.0008332212885154061, "loss": 0.7121, "step": 6060 }, { "epoch": 3.386033519553073, "grad_norm": 0.5799946784973145, "learning_rate": 0.0008331932773109244, "loss": 0.3619, "step": 6061 }, { "epoch": 3.38659217877095, "grad_norm": 0.44262635707855225, "learning_rate": 0.0008331652661064426, "loss": 0.3979, "step": 6062 }, { "epoch": 3.387150837988827, "grad_norm": 0.7026075124740601, "learning_rate": 0.0008331372549019608, "loss": 0.6531, "step": 6063 }, { "epoch": 3.387709497206704, "grad_norm": 0.5166801810264587, "learning_rate": 0.0008331092436974789, "loss": 0.3797, "step": 6064 }, { "epoch": 3.388268156424581, "grad_norm": 0.37580618262290955, "learning_rate": 0.0008330812324929971, "loss": 0.5012, "step": 6065 }, { "epoch": 3.388826815642458, "grad_norm": 0.5464267134666443, "learning_rate": 0.0008330532212885154, "loss": 0.3618, "step": 6066 }, { "epoch": 3.3893854748603354, "grad_norm": 0.7585722208023071, "learning_rate": 0.0008330252100840337, "loss": 0.4528, "step": 6067 }, { "epoch": 3.3899441340782124, "grad_norm": 1.071548342704773, "learning_rate": 0.0008329971988795519, "loss": 0.4616, "step": 6068 }, { "epoch": 3.3905027932960894, "grad_norm": 0.628174901008606, "learning_rate": 0.0008329691876750701, "loss": 0.4371, "step": 6069 }, { "epoch": 3.3910614525139664, "grad_norm": 0.8733397126197815, "learning_rate": 0.0008329411764705882, "loss": 0.511, "step": 6070 }, { "epoch": 3.3916201117318434, "grad_norm": 0.7845448851585388, "learning_rate": 0.0008329131652661065, "loss": 0.4789, "step": 6071 }, { "epoch": 3.3921787709497204, "grad_norm": 0.4847370982170105, "learning_rate": 0.0008328851540616247, "loss": 0.3967, "step": 6072 }, { "epoch": 3.392737430167598, "grad_norm": 0.5363072752952576, "learning_rate": 0.0008328571428571429, "loss": 0.5241, "step": 6073 }, { "epoch": 3.393296089385475, "grad_norm": 0.8072665333747864, "learning_rate": 0.0008328291316526611, "loss": 0.4497, "step": 6074 }, { "epoch": 3.393854748603352, "grad_norm": 0.7780736684799194, "learning_rate": 0.0008328011204481792, "loss": 0.4764, "step": 6075 }, { "epoch": 3.394413407821229, "grad_norm": 2.6497485637664795, "learning_rate": 0.0008327731092436975, "loss": 0.5924, "step": 6076 }, { "epoch": 3.394972067039106, "grad_norm": 0.677177369594574, "learning_rate": 0.0008327450980392157, "loss": 0.3832, "step": 6077 }, { "epoch": 3.3955307262569834, "grad_norm": 0.6119154691696167, "learning_rate": 0.0008327170868347339, "loss": 0.4964, "step": 6078 }, { "epoch": 3.3960893854748604, "grad_norm": 0.48202550411224365, "learning_rate": 0.0008326890756302521, "loss": 0.4755, "step": 6079 }, { "epoch": 3.3966480446927374, "grad_norm": 0.5974293947219849, "learning_rate": 0.0008326610644257702, "loss": 0.4616, "step": 6080 }, { "epoch": 3.3972067039106144, "grad_norm": 0.515937089920044, "learning_rate": 0.0008326330532212885, "loss": 0.5021, "step": 6081 }, { "epoch": 3.3977653631284914, "grad_norm": 0.485859751701355, "learning_rate": 0.0008326050420168067, "loss": 0.3718, "step": 6082 }, { "epoch": 3.398324022346369, "grad_norm": 0.47070395946502686, "learning_rate": 0.000832577030812325, "loss": 0.3567, "step": 6083 }, { "epoch": 3.398882681564246, "grad_norm": 0.5709047913551331, "learning_rate": 0.0008325490196078432, "loss": 0.4434, "step": 6084 }, { "epoch": 3.399441340782123, "grad_norm": 0.8484343886375427, "learning_rate": 0.0008325210084033614, "loss": 0.4589, "step": 6085 }, { "epoch": 3.4, "grad_norm": 0.9602091908454895, "learning_rate": 0.0008324929971988796, "loss": 0.4809, "step": 6086 }, { "epoch": 3.400558659217877, "grad_norm": 0.4323050081729889, "learning_rate": 0.0008324649859943978, "loss": 0.4197, "step": 6087 }, { "epoch": 3.4011173184357544, "grad_norm": 1.7077631950378418, "learning_rate": 0.000832436974789916, "loss": 0.4206, "step": 6088 }, { "epoch": 3.4016759776536314, "grad_norm": 0.47025471925735474, "learning_rate": 0.0008324089635854342, "loss": 0.4042, "step": 6089 }, { "epoch": 3.4022346368715084, "grad_norm": 0.9566075801849365, "learning_rate": 0.0008323809523809524, "loss": 0.5372, "step": 6090 }, { "epoch": 3.4027932960893854, "grad_norm": 0.6932033896446228, "learning_rate": 0.0008323529411764706, "loss": 0.5512, "step": 6091 }, { "epoch": 3.4033519553072624, "grad_norm": 0.4438057541847229, "learning_rate": 0.0008323249299719888, "loss": 0.5734, "step": 6092 }, { "epoch": 3.40391061452514, "grad_norm": 0.6034622192382812, "learning_rate": 0.000832296918767507, "loss": 0.5491, "step": 6093 }, { "epoch": 3.404469273743017, "grad_norm": 2.567652702331543, "learning_rate": 0.0008322689075630252, "loss": 0.4947, "step": 6094 }, { "epoch": 3.405027932960894, "grad_norm": 0.8938478827476501, "learning_rate": 0.0008322408963585434, "loss": 0.4776, "step": 6095 }, { "epoch": 3.405586592178771, "grad_norm": 1.2218962907791138, "learning_rate": 0.0008322128851540617, "loss": 0.485, "step": 6096 }, { "epoch": 3.406145251396648, "grad_norm": 0.5890511274337769, "learning_rate": 0.0008321848739495798, "loss": 0.4577, "step": 6097 }, { "epoch": 3.4067039106145254, "grad_norm": 0.6005613207817078, "learning_rate": 0.000832156862745098, "loss": 0.5547, "step": 6098 }, { "epoch": 3.4072625698324024, "grad_norm": 0.548182487487793, "learning_rate": 0.0008321288515406162, "loss": 0.4736, "step": 6099 }, { "epoch": 3.4078212290502794, "grad_norm": 0.6354692578315735, "learning_rate": 0.0008321008403361345, "loss": 0.5198, "step": 6100 }, { "epoch": 3.4083798882681564, "grad_norm": 0.7965889573097229, "learning_rate": 0.0008320728291316528, "loss": 0.4779, "step": 6101 }, { "epoch": 3.4089385474860334, "grad_norm": 0.573383629322052, "learning_rate": 0.0008320448179271709, "loss": 0.5811, "step": 6102 }, { "epoch": 3.4094972067039104, "grad_norm": 3.57027268409729, "learning_rate": 0.0008320168067226891, "loss": 0.4717, "step": 6103 }, { "epoch": 3.410055865921788, "grad_norm": 0.4645558297634125, "learning_rate": 0.0008319887955182073, "loss": 0.393, "step": 6104 }, { "epoch": 3.410614525139665, "grad_norm": 0.4812800884246826, "learning_rate": 0.0008319607843137255, "loss": 0.495, "step": 6105 }, { "epoch": 3.411173184357542, "grad_norm": 0.9177267551422119, "learning_rate": 0.0008319327731092438, "loss": 0.4404, "step": 6106 }, { "epoch": 3.411731843575419, "grad_norm": 0.4278791546821594, "learning_rate": 0.0008319047619047619, "loss": 0.478, "step": 6107 }, { "epoch": 3.412290502793296, "grad_norm": 2.0132620334625244, "learning_rate": 0.0008318767507002801, "loss": 0.4125, "step": 6108 }, { "epoch": 3.412849162011173, "grad_norm": 0.5608509182929993, "learning_rate": 0.0008318487394957983, "loss": 0.4765, "step": 6109 }, { "epoch": 3.4134078212290504, "grad_norm": 1.7458564043045044, "learning_rate": 0.0008318207282913165, "loss": 0.4494, "step": 6110 }, { "epoch": 3.4139664804469274, "grad_norm": 0.7779131531715393, "learning_rate": 0.0008317927170868348, "loss": 0.4889, "step": 6111 }, { "epoch": 3.4145251396648044, "grad_norm": 1.0489270687103271, "learning_rate": 0.000831764705882353, "loss": 0.4302, "step": 6112 }, { "epoch": 3.4150837988826814, "grad_norm": 2.2097907066345215, "learning_rate": 0.0008317366946778711, "loss": 0.5399, "step": 6113 }, { "epoch": 3.4156424581005584, "grad_norm": 0.5458060503005981, "learning_rate": 0.0008317086834733893, "loss": 0.4644, "step": 6114 }, { "epoch": 3.416201117318436, "grad_norm": 0.5295409560203552, "learning_rate": 0.0008316806722689075, "loss": 0.479, "step": 6115 }, { "epoch": 3.416759776536313, "grad_norm": 0.9819905757904053, "learning_rate": 0.0008316526610644259, "loss": 0.5432, "step": 6116 }, { "epoch": 3.41731843575419, "grad_norm": 0.4620686173439026, "learning_rate": 0.0008316246498599441, "loss": 0.3981, "step": 6117 }, { "epoch": 3.417877094972067, "grad_norm": 0.5014457702636719, "learning_rate": 0.0008315966386554622, "loss": 0.4821, "step": 6118 }, { "epoch": 3.418435754189944, "grad_norm": 0.49127867817878723, "learning_rate": 0.0008315686274509804, "loss": 0.4395, "step": 6119 }, { "epoch": 3.4189944134078214, "grad_norm": 0.783652126789093, "learning_rate": 0.0008315406162464986, "loss": 0.449, "step": 6120 }, { "epoch": 3.4195530726256984, "grad_norm": 0.48742324113845825, "learning_rate": 0.0008315126050420169, "loss": 0.4947, "step": 6121 }, { "epoch": 3.4201117318435754, "grad_norm": 1.2025938034057617, "learning_rate": 0.0008314845938375351, "loss": 0.4748, "step": 6122 }, { "epoch": 3.4206703910614524, "grad_norm": 0.46522942185401917, "learning_rate": 0.0008314565826330532, "loss": 0.4796, "step": 6123 }, { "epoch": 3.4212290502793294, "grad_norm": 0.6571593880653381, "learning_rate": 0.0008314285714285714, "loss": 0.5064, "step": 6124 }, { "epoch": 3.421787709497207, "grad_norm": 0.40364181995391846, "learning_rate": 0.0008314005602240896, "loss": 0.2829, "step": 6125 }, { "epoch": 3.422346368715084, "grad_norm": 0.3179486393928528, "learning_rate": 0.0008313725490196079, "loss": 0.3246, "step": 6126 }, { "epoch": 3.422905027932961, "grad_norm": 0.6112605333328247, "learning_rate": 0.0008313445378151261, "loss": 0.5162, "step": 6127 }, { "epoch": 3.423463687150838, "grad_norm": 0.6073411107063293, "learning_rate": 0.0008313165266106443, "loss": 0.4942, "step": 6128 }, { "epoch": 3.424022346368715, "grad_norm": 0.603777289390564, "learning_rate": 0.0008312885154061624, "loss": 0.6173, "step": 6129 }, { "epoch": 3.4245810055865924, "grad_norm": 3.2474570274353027, "learning_rate": 0.0008312605042016806, "loss": 0.3928, "step": 6130 }, { "epoch": 3.4251396648044694, "grad_norm": 0.4615461528301239, "learning_rate": 0.0008312324929971989, "loss": 0.4786, "step": 6131 }, { "epoch": 3.4256983240223464, "grad_norm": 0.5394693613052368, "learning_rate": 0.0008312044817927172, "loss": 0.472, "step": 6132 }, { "epoch": 3.4262569832402234, "grad_norm": 0.6481944918632507, "learning_rate": 0.0008311764705882354, "loss": 0.4809, "step": 6133 }, { "epoch": 3.4268156424581004, "grad_norm": 0.6655814051628113, "learning_rate": 0.0008311484593837535, "loss": 0.4104, "step": 6134 }, { "epoch": 3.427374301675978, "grad_norm": 0.6282750964164734, "learning_rate": 0.0008311204481792717, "loss": 0.4829, "step": 6135 }, { "epoch": 3.427932960893855, "grad_norm": 0.5513433814048767, "learning_rate": 0.00083109243697479, "loss": 0.6495, "step": 6136 }, { "epoch": 3.428491620111732, "grad_norm": 0.5036987066268921, "learning_rate": 0.0008310644257703082, "loss": 0.3979, "step": 6137 }, { "epoch": 3.429050279329609, "grad_norm": 0.549089252948761, "learning_rate": 0.0008310364145658264, "loss": 0.5067, "step": 6138 }, { "epoch": 3.429608938547486, "grad_norm": 0.48470252752304077, "learning_rate": 0.0008310084033613445, "loss": 0.4748, "step": 6139 }, { "epoch": 3.430167597765363, "grad_norm": 0.4290856122970581, "learning_rate": 0.0008309803921568627, "loss": 0.457, "step": 6140 }, { "epoch": 3.4307262569832404, "grad_norm": 0.6760498285293579, "learning_rate": 0.000830952380952381, "loss": 0.4882, "step": 6141 }, { "epoch": 3.4312849162011174, "grad_norm": 0.6759735345840454, "learning_rate": 0.0008309243697478992, "loss": 0.5385, "step": 6142 }, { "epoch": 3.4318435754189944, "grad_norm": 0.5476924777030945, "learning_rate": 0.0008308963585434174, "loss": 0.5527, "step": 6143 }, { "epoch": 3.4324022346368714, "grad_norm": 0.54535311460495, "learning_rate": 0.0008308683473389356, "loss": 0.5131, "step": 6144 }, { "epoch": 3.4329608938547485, "grad_norm": 0.5027291178703308, "learning_rate": 0.0008308403361344537, "loss": 0.3548, "step": 6145 }, { "epoch": 3.4335195530726255, "grad_norm": 0.4983408749103546, "learning_rate": 0.000830812324929972, "loss": 0.4955, "step": 6146 }, { "epoch": 3.434078212290503, "grad_norm": 0.5102941393852234, "learning_rate": 0.0008307843137254902, "loss": 0.5684, "step": 6147 }, { "epoch": 3.43463687150838, "grad_norm": 0.7571179270744324, "learning_rate": 0.0008307563025210084, "loss": 0.5, "step": 6148 }, { "epoch": 3.435195530726257, "grad_norm": 0.42837122082710266, "learning_rate": 0.0008307282913165267, "loss": 0.3529, "step": 6149 }, { "epoch": 3.435754189944134, "grad_norm": 0.6471491456031799, "learning_rate": 0.0008307002801120448, "loss": 0.4824, "step": 6150 }, { "epoch": 3.436312849162011, "grad_norm": 0.5041567087173462, "learning_rate": 0.0008306722689075631, "loss": 0.5364, "step": 6151 }, { "epoch": 3.4368715083798884, "grad_norm": 0.5908088088035583, "learning_rate": 0.0008306442577030813, "loss": 0.4052, "step": 6152 }, { "epoch": 3.4374301675977654, "grad_norm": 0.5950685739517212, "learning_rate": 0.0008306162464985995, "loss": 0.3639, "step": 6153 }, { "epoch": 3.4379888268156424, "grad_norm": 0.4470774531364441, "learning_rate": 0.0008305882352941177, "loss": 0.5267, "step": 6154 }, { "epoch": 3.4385474860335195, "grad_norm": 0.529109537601471, "learning_rate": 0.0008305602240896358, "loss": 0.3909, "step": 6155 }, { "epoch": 3.4391061452513965, "grad_norm": 0.5458061099052429, "learning_rate": 0.0008305322128851541, "loss": 0.7119, "step": 6156 }, { "epoch": 3.439664804469274, "grad_norm": 0.5622304081916809, "learning_rate": 0.0008305042016806723, "loss": 0.5789, "step": 6157 }, { "epoch": 3.440223463687151, "grad_norm": 0.3653081953525543, "learning_rate": 0.0008304761904761905, "loss": 0.4522, "step": 6158 }, { "epoch": 3.440782122905028, "grad_norm": 0.4660721719264984, "learning_rate": 0.0008304481792717087, "loss": 0.4748, "step": 6159 }, { "epoch": 3.441340782122905, "grad_norm": 1.2203646898269653, "learning_rate": 0.0008304201680672269, "loss": 0.4994, "step": 6160 }, { "epoch": 3.441899441340782, "grad_norm": 0.7291945219039917, "learning_rate": 0.0008303921568627451, "loss": 0.562, "step": 6161 }, { "epoch": 3.4424581005586594, "grad_norm": 0.9149802327156067, "learning_rate": 0.0008303641456582633, "loss": 0.4369, "step": 6162 }, { "epoch": 3.4430167597765364, "grad_norm": 0.5975952744483948, "learning_rate": 0.0008303361344537815, "loss": 0.5538, "step": 6163 }, { "epoch": 3.4435754189944134, "grad_norm": 3.1010115146636963, "learning_rate": 0.0008303081232492997, "loss": 0.5367, "step": 6164 }, { "epoch": 3.4441340782122905, "grad_norm": 0.722662627696991, "learning_rate": 0.000830280112044818, "loss": 0.4752, "step": 6165 }, { "epoch": 3.4446927374301675, "grad_norm": 2.758788824081421, "learning_rate": 0.0008302521008403362, "loss": 0.5845, "step": 6166 }, { "epoch": 3.445251396648045, "grad_norm": 2.9750373363494873, "learning_rate": 0.0008302240896358544, "loss": 0.4125, "step": 6167 }, { "epoch": 3.445810055865922, "grad_norm": 0.5902764797210693, "learning_rate": 0.0008301960784313726, "loss": 0.4676, "step": 6168 }, { "epoch": 3.446368715083799, "grad_norm": 0.645778477191925, "learning_rate": 0.0008301680672268908, "loss": 0.5567, "step": 6169 }, { "epoch": 3.446927374301676, "grad_norm": 0.690401017665863, "learning_rate": 0.000830140056022409, "loss": 0.4933, "step": 6170 }, { "epoch": 3.447486033519553, "grad_norm": 0.6196557879447937, "learning_rate": 0.0008301120448179272, "loss": 0.3777, "step": 6171 }, { "epoch": 3.4480446927374304, "grad_norm": 0.46737533807754517, "learning_rate": 0.0008300840336134454, "loss": 0.4538, "step": 6172 }, { "epoch": 3.4486033519553074, "grad_norm": 0.6995070576667786, "learning_rate": 0.0008300560224089636, "loss": 0.5321, "step": 6173 }, { "epoch": 3.4491620111731844, "grad_norm": 0.6664303541183472, "learning_rate": 0.0008300280112044818, "loss": 0.3817, "step": 6174 }, { "epoch": 3.4497206703910615, "grad_norm": 0.6369305849075317, "learning_rate": 0.00083, "loss": 0.4651, "step": 6175 }, { "epoch": 3.4502793296089385, "grad_norm": 0.4330568313598633, "learning_rate": 0.0008299719887955183, "loss": 0.4245, "step": 6176 }, { "epoch": 3.4508379888268155, "grad_norm": 0.6581286191940308, "learning_rate": 0.0008299439775910364, "loss": 0.5335, "step": 6177 }, { "epoch": 3.451396648044693, "grad_norm": 0.5372915267944336, "learning_rate": 0.0008299159663865546, "loss": 0.4247, "step": 6178 }, { "epoch": 3.45195530726257, "grad_norm": 0.5128198266029358, "learning_rate": 0.0008298879551820728, "loss": 0.3812, "step": 6179 }, { "epoch": 3.452513966480447, "grad_norm": 0.5149964690208435, "learning_rate": 0.000829859943977591, "loss": 0.4034, "step": 6180 }, { "epoch": 3.453072625698324, "grad_norm": 0.6453413367271423, "learning_rate": 0.0008298319327731094, "loss": 0.4377, "step": 6181 }, { "epoch": 3.453631284916201, "grad_norm": 0.5139704346656799, "learning_rate": 0.0008298039215686275, "loss": 0.3467, "step": 6182 }, { "epoch": 3.454189944134078, "grad_norm": 0.6001042723655701, "learning_rate": 0.0008297759103641457, "loss": 0.4628, "step": 6183 }, { "epoch": 3.4547486033519554, "grad_norm": 0.6400231122970581, "learning_rate": 0.0008297478991596639, "loss": 0.4181, "step": 6184 }, { "epoch": 3.4553072625698324, "grad_norm": 0.4790504276752472, "learning_rate": 0.0008297198879551821, "loss": 0.3962, "step": 6185 }, { "epoch": 3.4558659217877095, "grad_norm": 0.4278772175312042, "learning_rate": 0.0008296918767507004, "loss": 0.4199, "step": 6186 }, { "epoch": 3.4564245810055865, "grad_norm": 0.621315598487854, "learning_rate": 0.0008296638655462185, "loss": 0.5873, "step": 6187 }, { "epoch": 3.4569832402234635, "grad_norm": 2.627028226852417, "learning_rate": 0.0008296358543417367, "loss": 0.6, "step": 6188 }, { "epoch": 3.457541899441341, "grad_norm": 0.6533622145652771, "learning_rate": 0.0008296078431372549, "loss": 0.6004, "step": 6189 }, { "epoch": 3.458100558659218, "grad_norm": 0.4335118532180786, "learning_rate": 0.0008295798319327731, "loss": 0.398, "step": 6190 }, { "epoch": 3.458659217877095, "grad_norm": 3.6930038928985596, "learning_rate": 0.0008295518207282914, "loss": 0.5033, "step": 6191 }, { "epoch": 3.459217877094972, "grad_norm": 0.4581165611743927, "learning_rate": 0.0008295238095238096, "loss": 0.475, "step": 6192 }, { "epoch": 3.459776536312849, "grad_norm": 0.5832881331443787, "learning_rate": 0.0008294957983193277, "loss": 0.4453, "step": 6193 }, { "epoch": 3.4603351955307264, "grad_norm": 0.48735567927360535, "learning_rate": 0.0008294677871148459, "loss": 0.435, "step": 6194 }, { "epoch": 3.4608938547486034, "grad_norm": 3.0008339881896973, "learning_rate": 0.0008294397759103641, "loss": 0.5185, "step": 6195 }, { "epoch": 3.4614525139664805, "grad_norm": 0.6516963243484497, "learning_rate": 0.0008294117647058824, "loss": 0.4898, "step": 6196 }, { "epoch": 3.4620111731843575, "grad_norm": 0.5168313384056091, "learning_rate": 0.0008293837535014006, "loss": 0.4192, "step": 6197 }, { "epoch": 3.4625698324022345, "grad_norm": 0.8226152062416077, "learning_rate": 0.0008293557422969187, "loss": 0.5007, "step": 6198 }, { "epoch": 3.463128491620112, "grad_norm": 0.3868049383163452, "learning_rate": 0.000829327731092437, "loss": 0.3972, "step": 6199 }, { "epoch": 3.463687150837989, "grad_norm": 0.5038139224052429, "learning_rate": 0.0008292997198879552, "loss": 0.308, "step": 6200 }, { "epoch": 3.464245810055866, "grad_norm": 0.4974587559700012, "learning_rate": 0.0008292717086834735, "loss": 0.5803, "step": 6201 }, { "epoch": 3.464804469273743, "grad_norm": 0.461847186088562, "learning_rate": 0.0008292436974789917, "loss": 0.4907, "step": 6202 }, { "epoch": 3.46536312849162, "grad_norm": 0.4884839951992035, "learning_rate": 0.0008292156862745098, "loss": 0.4431, "step": 6203 }, { "epoch": 3.4659217877094974, "grad_norm": 0.40575647354125977, "learning_rate": 0.000829187675070028, "loss": 0.3935, "step": 6204 }, { "epoch": 3.4664804469273744, "grad_norm": 0.370565801858902, "learning_rate": 0.0008291596638655462, "loss": 0.426, "step": 6205 }, { "epoch": 3.4670391061452515, "grad_norm": 0.8607147932052612, "learning_rate": 0.0008291316526610645, "loss": 0.3742, "step": 6206 }, { "epoch": 3.4675977653631285, "grad_norm": 0.48300793766975403, "learning_rate": 0.0008291036414565827, "loss": 0.4999, "step": 6207 }, { "epoch": 3.4681564245810055, "grad_norm": 0.3741581439971924, "learning_rate": 0.0008290756302521009, "loss": 0.4202, "step": 6208 }, { "epoch": 3.4687150837988825, "grad_norm": 0.5670000910758972, "learning_rate": 0.000829047619047619, "loss": 0.5501, "step": 6209 }, { "epoch": 3.46927374301676, "grad_norm": 0.6159808039665222, "learning_rate": 0.0008290196078431372, "loss": 0.3801, "step": 6210 }, { "epoch": 3.469832402234637, "grad_norm": 0.7532714605331421, "learning_rate": 0.0008289915966386555, "loss": 0.4826, "step": 6211 }, { "epoch": 3.470391061452514, "grad_norm": 0.7035219669342041, "learning_rate": 0.0008289635854341737, "loss": 0.4745, "step": 6212 }, { "epoch": 3.470949720670391, "grad_norm": 0.4840216338634491, "learning_rate": 0.0008289355742296919, "loss": 0.4788, "step": 6213 }, { "epoch": 3.471508379888268, "grad_norm": 0.5621411204338074, "learning_rate": 0.00082890756302521, "loss": 0.6408, "step": 6214 }, { "epoch": 3.472067039106145, "grad_norm": 0.7430678009986877, "learning_rate": 0.0008288795518207282, "loss": 0.5221, "step": 6215 }, { "epoch": 3.4726256983240225, "grad_norm": 0.6376789808273315, "learning_rate": 0.0008288515406162466, "loss": 0.3903, "step": 6216 }, { "epoch": 3.4731843575418995, "grad_norm": 0.5655553936958313, "learning_rate": 0.0008288235294117648, "loss": 0.4414, "step": 6217 }, { "epoch": 3.4737430167597765, "grad_norm": 0.4801236391067505, "learning_rate": 0.000828795518207283, "loss": 0.4709, "step": 6218 }, { "epoch": 3.4743016759776535, "grad_norm": 0.591867208480835, "learning_rate": 0.0008287675070028011, "loss": 0.5107, "step": 6219 }, { "epoch": 3.4748603351955305, "grad_norm": 0.5417670011520386, "learning_rate": 0.0008287394957983193, "loss": 0.428, "step": 6220 }, { "epoch": 3.475418994413408, "grad_norm": 0.702387809753418, "learning_rate": 0.0008287114845938376, "loss": 0.5977, "step": 6221 }, { "epoch": 3.475977653631285, "grad_norm": 0.6193615794181824, "learning_rate": 0.0008286834733893558, "loss": 0.622, "step": 6222 }, { "epoch": 3.476536312849162, "grad_norm": 0.46409308910369873, "learning_rate": 0.000828655462184874, "loss": 0.3543, "step": 6223 }, { "epoch": 3.477094972067039, "grad_norm": 0.9161087274551392, "learning_rate": 0.0008286274509803922, "loss": 0.5775, "step": 6224 }, { "epoch": 3.477653631284916, "grad_norm": 0.5752775073051453, "learning_rate": 0.0008285994397759103, "loss": 0.4749, "step": 6225 }, { "epoch": 3.4782122905027935, "grad_norm": 0.6740328669548035, "learning_rate": 0.0008285714285714286, "loss": 0.4558, "step": 6226 }, { "epoch": 3.4787709497206705, "grad_norm": 0.48524630069732666, "learning_rate": 0.0008285434173669468, "loss": 0.3237, "step": 6227 }, { "epoch": 3.4793296089385475, "grad_norm": 1.2408488988876343, "learning_rate": 0.000828515406162465, "loss": 0.3959, "step": 6228 }, { "epoch": 3.4798882681564245, "grad_norm": 0.5826830863952637, "learning_rate": 0.0008284873949579832, "loss": 0.4294, "step": 6229 }, { "epoch": 3.4804469273743015, "grad_norm": 0.8397267460823059, "learning_rate": 0.0008284593837535013, "loss": 0.3857, "step": 6230 }, { "epoch": 3.481005586592179, "grad_norm": 2.1733570098876953, "learning_rate": 0.0008284313725490197, "loss": 0.3724, "step": 6231 }, { "epoch": 3.481564245810056, "grad_norm": 0.6842045187950134, "learning_rate": 0.0008284033613445379, "loss": 0.5183, "step": 6232 }, { "epoch": 3.482122905027933, "grad_norm": 0.7522661089897156, "learning_rate": 0.0008283753501400561, "loss": 0.3443, "step": 6233 }, { "epoch": 3.48268156424581, "grad_norm": 0.5143001675605774, "learning_rate": 0.0008283473389355743, "loss": 0.4324, "step": 6234 }, { "epoch": 3.483240223463687, "grad_norm": 1.29478120803833, "learning_rate": 0.0008283193277310924, "loss": 0.456, "step": 6235 }, { "epoch": 3.4837988826815645, "grad_norm": 0.6133304238319397, "learning_rate": 0.0008282913165266107, "loss": 0.42, "step": 6236 }, { "epoch": 3.4843575418994415, "grad_norm": 0.6396066546440125, "learning_rate": 0.0008282633053221289, "loss": 0.6273, "step": 6237 }, { "epoch": 3.4849162011173185, "grad_norm": 0.6781561374664307, "learning_rate": 0.0008282352941176471, "loss": 0.4388, "step": 6238 }, { "epoch": 3.4854748603351955, "grad_norm": 1.2636315822601318, "learning_rate": 0.0008282072829131653, "loss": 0.4019, "step": 6239 }, { "epoch": 3.4860335195530725, "grad_norm": 0.6104376912117004, "learning_rate": 0.0008281792717086835, "loss": 0.5062, "step": 6240 }, { "epoch": 3.48659217877095, "grad_norm": 1.448607087135315, "learning_rate": 0.0008281512605042017, "loss": 0.4492, "step": 6241 }, { "epoch": 3.487150837988827, "grad_norm": 0.6598222255706787, "learning_rate": 0.0008281232492997199, "loss": 0.538, "step": 6242 }, { "epoch": 3.487709497206704, "grad_norm": 0.8888305425643921, "learning_rate": 0.0008280952380952381, "loss": 0.4827, "step": 6243 }, { "epoch": 3.488268156424581, "grad_norm": 0.6721920967102051, "learning_rate": 0.0008280672268907563, "loss": 0.5386, "step": 6244 }, { "epoch": 3.488826815642458, "grad_norm": 1.0853923559188843, "learning_rate": 0.0008280392156862745, "loss": 0.4809, "step": 6245 }, { "epoch": 3.489385474860335, "grad_norm": 0.5683623552322388, "learning_rate": 0.0008280112044817926, "loss": 0.3923, "step": 6246 }, { "epoch": 3.4899441340782125, "grad_norm": 0.5953332185745239, "learning_rate": 0.000827983193277311, "loss": 0.4219, "step": 6247 }, { "epoch": 3.4905027932960895, "grad_norm": 0.6139711737632751, "learning_rate": 0.0008279551820728292, "loss": 0.3811, "step": 6248 }, { "epoch": 3.4910614525139665, "grad_norm": 1.4022561311721802, "learning_rate": 0.0008279271708683474, "loss": 0.5551, "step": 6249 }, { "epoch": 3.4916201117318435, "grad_norm": 0.7792200446128845, "learning_rate": 0.0008278991596638656, "loss": 0.3993, "step": 6250 }, { "epoch": 3.4921787709497205, "grad_norm": 1.1188433170318604, "learning_rate": 0.0008278711484593837, "loss": 0.46, "step": 6251 }, { "epoch": 3.4927374301675975, "grad_norm": 0.5536515116691589, "learning_rate": 0.000827843137254902, "loss": 0.569, "step": 6252 }, { "epoch": 3.493296089385475, "grad_norm": 0.7545760273933411, "learning_rate": 0.0008278151260504202, "loss": 0.5369, "step": 6253 }, { "epoch": 3.493854748603352, "grad_norm": 0.7642180919647217, "learning_rate": 0.0008277871148459384, "loss": 0.6411, "step": 6254 }, { "epoch": 3.494413407821229, "grad_norm": 0.5666323304176331, "learning_rate": 0.0008277591036414566, "loss": 0.4678, "step": 6255 }, { "epoch": 3.494972067039106, "grad_norm": 0.5523326992988586, "learning_rate": 0.0008277310924369748, "loss": 0.4569, "step": 6256 }, { "epoch": 3.495530726256983, "grad_norm": 0.5576061010360718, "learning_rate": 0.000827703081232493, "loss": 0.4409, "step": 6257 }, { "epoch": 3.4960893854748605, "grad_norm": 0.4355257749557495, "learning_rate": 0.0008276750700280112, "loss": 0.396, "step": 6258 }, { "epoch": 3.4966480446927375, "grad_norm": 2.5634796619415283, "learning_rate": 0.0008276470588235294, "loss": 0.5531, "step": 6259 }, { "epoch": 3.4972067039106145, "grad_norm": 0.542377233505249, "learning_rate": 0.0008276190476190476, "loss": 0.6028, "step": 6260 }, { "epoch": 3.4977653631284915, "grad_norm": 0.7469171285629272, "learning_rate": 0.0008275910364145658, "loss": 0.7268, "step": 6261 }, { "epoch": 3.4983240223463685, "grad_norm": 0.46832942962646484, "learning_rate": 0.000827563025210084, "loss": 0.4185, "step": 6262 }, { "epoch": 3.498882681564246, "grad_norm": 0.5550451278686523, "learning_rate": 0.0008275350140056022, "loss": 0.4034, "step": 6263 }, { "epoch": 3.499441340782123, "grad_norm": 0.7761154770851135, "learning_rate": 0.0008275070028011205, "loss": 0.7377, "step": 6264 }, { "epoch": 3.5, "grad_norm": 0.964921236038208, "learning_rate": 0.0008274789915966387, "loss": 0.56, "step": 6265 }, { "epoch": 3.500558659217877, "grad_norm": 0.7288264632225037, "learning_rate": 0.0008274509803921569, "loss": 0.3962, "step": 6266 }, { "epoch": 3.501117318435754, "grad_norm": 0.5606486201286316, "learning_rate": 0.0008274229691876752, "loss": 0.3888, "step": 6267 }, { "epoch": 3.5016759776536315, "grad_norm": 0.6529912352561951, "learning_rate": 0.0008273949579831933, "loss": 0.6772, "step": 6268 }, { "epoch": 3.5022346368715085, "grad_norm": 0.6502243876457214, "learning_rate": 0.0008273669467787115, "loss": 0.5324, "step": 6269 }, { "epoch": 3.5027932960893855, "grad_norm": 0.7022506594657898, "learning_rate": 0.0008273389355742297, "loss": 0.4499, "step": 6270 }, { "epoch": 3.5033519553072625, "grad_norm": 0.9374591708183289, "learning_rate": 0.0008273109243697479, "loss": 0.5747, "step": 6271 }, { "epoch": 3.5039106145251395, "grad_norm": 0.4639132618904114, "learning_rate": 0.0008272829131652662, "loss": 0.441, "step": 6272 }, { "epoch": 3.504469273743017, "grad_norm": 0.46832239627838135, "learning_rate": 0.0008272549019607843, "loss": 0.4049, "step": 6273 }, { "epoch": 3.505027932960894, "grad_norm": 0.6344608664512634, "learning_rate": 0.0008272268907563025, "loss": 0.444, "step": 6274 }, { "epoch": 3.505586592178771, "grad_norm": 0.731968879699707, "learning_rate": 0.0008271988795518207, "loss": 0.6096, "step": 6275 }, { "epoch": 3.506145251396648, "grad_norm": 0.758788526058197, "learning_rate": 0.0008271708683473389, "loss": 0.3645, "step": 6276 }, { "epoch": 3.506703910614525, "grad_norm": 0.5580565929412842, "learning_rate": 0.0008271428571428572, "loss": 0.4949, "step": 6277 }, { "epoch": 3.5072625698324025, "grad_norm": 0.507298469543457, "learning_rate": 0.0008271148459383753, "loss": 0.4238, "step": 6278 }, { "epoch": 3.5078212290502795, "grad_norm": 0.9117985367774963, "learning_rate": 0.0008270868347338935, "loss": 0.4057, "step": 6279 }, { "epoch": 3.5083798882681565, "grad_norm": 2.154933214187622, "learning_rate": 0.0008270588235294117, "loss": 0.5608, "step": 6280 }, { "epoch": 3.5089385474860335, "grad_norm": 0.5375345349311829, "learning_rate": 0.00082703081232493, "loss": 0.4625, "step": 6281 }, { "epoch": 3.5094972067039105, "grad_norm": 0.4402356743812561, "learning_rate": 0.0008270028011204483, "loss": 0.3624, "step": 6282 }, { "epoch": 3.510055865921788, "grad_norm": 0.8601878881454468, "learning_rate": 0.0008269747899159665, "loss": 0.585, "step": 6283 }, { "epoch": 3.5106145251396645, "grad_norm": 0.4689737856388092, "learning_rate": 0.0008269467787114846, "loss": 0.5057, "step": 6284 }, { "epoch": 3.511173184357542, "grad_norm": 0.6700764894485474, "learning_rate": 0.0008269187675070028, "loss": 0.4682, "step": 6285 }, { "epoch": 3.511731843575419, "grad_norm": 0.7798172235488892, "learning_rate": 0.000826890756302521, "loss": 0.4129, "step": 6286 }, { "epoch": 3.512290502793296, "grad_norm": 0.8098841309547424, "learning_rate": 0.0008268627450980393, "loss": 0.4749, "step": 6287 }, { "epoch": 3.512849162011173, "grad_norm": 0.5506035089492798, "learning_rate": 0.0008268347338935575, "loss": 0.4992, "step": 6288 }, { "epoch": 3.51340782122905, "grad_norm": 0.6817156672477722, "learning_rate": 0.0008268067226890756, "loss": 0.4863, "step": 6289 }, { "epoch": 3.5139664804469275, "grad_norm": 0.544497549533844, "learning_rate": 0.0008267787114845938, "loss": 0.5951, "step": 6290 }, { "epoch": 3.5145251396648045, "grad_norm": 0.7743503451347351, "learning_rate": 0.000826750700280112, "loss": 0.4196, "step": 6291 }, { "epoch": 3.5150837988826815, "grad_norm": 0.9870738387107849, "learning_rate": 0.0008267226890756303, "loss": 0.5177, "step": 6292 }, { "epoch": 3.5156424581005585, "grad_norm": 2.3739304542541504, "learning_rate": 0.0008266946778711485, "loss": 0.3858, "step": 6293 }, { "epoch": 3.5162011173184355, "grad_norm": 0.6579199433326721, "learning_rate": 0.0008266666666666666, "loss": 0.4135, "step": 6294 }, { "epoch": 3.516759776536313, "grad_norm": 0.5895616412162781, "learning_rate": 0.0008266386554621848, "loss": 0.4884, "step": 6295 }, { "epoch": 3.51731843575419, "grad_norm": 0.7617058157920837, "learning_rate": 0.000826610644257703, "loss": 0.431, "step": 6296 }, { "epoch": 3.517877094972067, "grad_norm": 2.0498316287994385, "learning_rate": 0.0008265826330532214, "loss": 0.524, "step": 6297 }, { "epoch": 3.518435754189944, "grad_norm": 0.5397300124168396, "learning_rate": 0.0008265546218487396, "loss": 0.424, "step": 6298 }, { "epoch": 3.518994413407821, "grad_norm": 0.907599925994873, "learning_rate": 0.0008265266106442578, "loss": 0.5571, "step": 6299 }, { "epoch": 3.5195530726256985, "grad_norm": 0.5154718160629272, "learning_rate": 0.0008264985994397759, "loss": 0.4773, "step": 6300 }, { "epoch": 3.5201117318435755, "grad_norm": 0.501197099685669, "learning_rate": 0.0008264705882352941, "loss": 0.4867, "step": 6301 }, { "epoch": 3.5206703910614525, "grad_norm": 0.5469103455543518, "learning_rate": 0.0008264425770308124, "loss": 0.4265, "step": 6302 }, { "epoch": 3.5212290502793295, "grad_norm": 0.5078546404838562, "learning_rate": 0.0008264145658263306, "loss": 0.5196, "step": 6303 }, { "epoch": 3.5217877094972065, "grad_norm": 0.6976302266120911, "learning_rate": 0.0008263865546218488, "loss": 0.521, "step": 6304 }, { "epoch": 3.522346368715084, "grad_norm": 0.52731853723526, "learning_rate": 0.0008263585434173669, "loss": 0.576, "step": 6305 }, { "epoch": 3.522905027932961, "grad_norm": 0.8768479228019714, "learning_rate": 0.0008263305322128851, "loss": 0.4087, "step": 6306 }, { "epoch": 3.523463687150838, "grad_norm": 0.6016049981117249, "learning_rate": 0.0008263025210084034, "loss": 0.4309, "step": 6307 }, { "epoch": 3.524022346368715, "grad_norm": 1.674957036972046, "learning_rate": 0.0008262745098039216, "loss": 0.3762, "step": 6308 }, { "epoch": 3.524581005586592, "grad_norm": 0.43001589179039, "learning_rate": 0.0008262464985994398, "loss": 0.4364, "step": 6309 }, { "epoch": 3.5251396648044695, "grad_norm": 1.1241363286972046, "learning_rate": 0.0008262184873949579, "loss": 0.4668, "step": 6310 }, { "epoch": 3.5256983240223465, "grad_norm": 0.4156871438026428, "learning_rate": 0.0008261904761904761, "loss": 0.4207, "step": 6311 }, { "epoch": 3.5262569832402235, "grad_norm": 0.6592857837677002, "learning_rate": 0.0008261624649859944, "loss": 0.4434, "step": 6312 }, { "epoch": 3.5268156424581005, "grad_norm": 0.5553011298179626, "learning_rate": 0.0008261344537815127, "loss": 0.4965, "step": 6313 }, { "epoch": 3.5273743016759775, "grad_norm": 0.9002603888511658, "learning_rate": 0.0008261064425770309, "loss": 0.529, "step": 6314 }, { "epoch": 3.527932960893855, "grad_norm": 0.6343851685523987, "learning_rate": 0.0008260784313725491, "loss": 0.4628, "step": 6315 }, { "epoch": 3.528491620111732, "grad_norm": 0.5288112163543701, "learning_rate": 0.0008260504201680672, "loss": 0.4399, "step": 6316 }, { "epoch": 3.529050279329609, "grad_norm": 0.805587887763977, "learning_rate": 0.0008260224089635855, "loss": 0.5357, "step": 6317 }, { "epoch": 3.529608938547486, "grad_norm": 1.3080799579620361, "learning_rate": 0.0008259943977591037, "loss": 0.4533, "step": 6318 }, { "epoch": 3.530167597765363, "grad_norm": 0.7621505856513977, "learning_rate": 0.0008259663865546219, "loss": 0.4495, "step": 6319 }, { "epoch": 3.5307262569832405, "grad_norm": 0.6917195320129395, "learning_rate": 0.0008259383753501401, "loss": 0.5868, "step": 6320 }, { "epoch": 3.531284916201117, "grad_norm": 0.602952241897583, "learning_rate": 0.0008259103641456582, "loss": 0.3552, "step": 6321 }, { "epoch": 3.5318435754189945, "grad_norm": 0.443672239780426, "learning_rate": 0.0008258823529411765, "loss": 0.4313, "step": 6322 }, { "epoch": 3.5324022346368715, "grad_norm": 0.5087466835975647, "learning_rate": 0.0008258543417366947, "loss": 0.3614, "step": 6323 }, { "epoch": 3.5329608938547485, "grad_norm": 0.5650293827056885, "learning_rate": 0.0008258263305322129, "loss": 0.4144, "step": 6324 }, { "epoch": 3.5335195530726256, "grad_norm": 0.547363817691803, "learning_rate": 0.0008257983193277311, "loss": 0.4474, "step": 6325 }, { "epoch": 3.5340782122905026, "grad_norm": 0.47006767988204956, "learning_rate": 0.0008257703081232492, "loss": 0.4135, "step": 6326 }, { "epoch": 3.53463687150838, "grad_norm": 0.5777665972709656, "learning_rate": 0.0008257422969187675, "loss": 0.4646, "step": 6327 }, { "epoch": 3.535195530726257, "grad_norm": 0.4608403444290161, "learning_rate": 0.0008257142857142857, "loss": 0.3563, "step": 6328 }, { "epoch": 3.535754189944134, "grad_norm": 0.4284411370754242, "learning_rate": 0.000825686274509804, "loss": 0.4066, "step": 6329 }, { "epoch": 3.536312849162011, "grad_norm": 1.2041430473327637, "learning_rate": 0.0008256582633053222, "loss": 0.5643, "step": 6330 }, { "epoch": 3.536871508379888, "grad_norm": 0.7602660655975342, "learning_rate": 0.0008256302521008404, "loss": 0.4196, "step": 6331 }, { "epoch": 3.5374301675977655, "grad_norm": 2.63192081451416, "learning_rate": 0.0008256022408963586, "loss": 0.4932, "step": 6332 }, { "epoch": 3.5379888268156425, "grad_norm": 0.6775304675102234, "learning_rate": 0.0008255742296918768, "loss": 0.4489, "step": 6333 }, { "epoch": 3.5385474860335195, "grad_norm": 0.45529839396476746, "learning_rate": 0.000825546218487395, "loss": 0.3781, "step": 6334 }, { "epoch": 3.5391061452513966, "grad_norm": 0.6338316202163696, "learning_rate": 0.0008255182072829132, "loss": 0.5974, "step": 6335 }, { "epoch": 3.5396648044692736, "grad_norm": 0.5107612013816833, "learning_rate": 0.0008254901960784314, "loss": 0.4611, "step": 6336 }, { "epoch": 3.540223463687151, "grad_norm": 0.48357951641082764, "learning_rate": 0.0008254621848739496, "loss": 0.4755, "step": 6337 }, { "epoch": 3.540782122905028, "grad_norm": 0.6447745561599731, "learning_rate": 0.0008254341736694678, "loss": 0.5034, "step": 6338 }, { "epoch": 3.541340782122905, "grad_norm": 0.6290780901908875, "learning_rate": 0.000825406162464986, "loss": 0.6193, "step": 6339 }, { "epoch": 3.541899441340782, "grad_norm": 0.5462241768836975, "learning_rate": 0.0008253781512605042, "loss": 0.4217, "step": 6340 }, { "epoch": 3.542458100558659, "grad_norm": 0.41927841305732727, "learning_rate": 0.0008253501400560224, "loss": 0.4314, "step": 6341 }, { "epoch": 3.5430167597765365, "grad_norm": 0.3848975598812103, "learning_rate": 0.0008253221288515406, "loss": 0.4251, "step": 6342 }, { "epoch": 3.5435754189944135, "grad_norm": 0.614855170249939, "learning_rate": 0.0008252941176470588, "loss": 0.3531, "step": 6343 }, { "epoch": 3.5441340782122905, "grad_norm": 0.3214449882507324, "learning_rate": 0.000825266106442577, "loss": 0.3856, "step": 6344 }, { "epoch": 3.5446927374301676, "grad_norm": 0.5649007558822632, "learning_rate": 0.0008252380952380952, "loss": 0.5055, "step": 6345 }, { "epoch": 3.5452513966480446, "grad_norm": 0.5173424482345581, "learning_rate": 0.0008252100840336135, "loss": 0.4799, "step": 6346 }, { "epoch": 3.545810055865922, "grad_norm": 0.4183085560798645, "learning_rate": 0.0008251820728291318, "loss": 0.378, "step": 6347 }, { "epoch": 3.546368715083799, "grad_norm": 1.2555066347122192, "learning_rate": 0.0008251540616246499, "loss": 0.4145, "step": 6348 }, { "epoch": 3.546927374301676, "grad_norm": 0.8779321908950806, "learning_rate": 0.0008251260504201681, "loss": 0.6206, "step": 6349 }, { "epoch": 3.547486033519553, "grad_norm": 0.5858129262924194, "learning_rate": 0.0008250980392156863, "loss": 0.2681, "step": 6350 }, { "epoch": 3.54804469273743, "grad_norm": 0.41439181566238403, "learning_rate": 0.0008250700280112045, "loss": 0.399, "step": 6351 }, { "epoch": 3.5486033519553075, "grad_norm": 0.5139670372009277, "learning_rate": 0.0008250420168067228, "loss": 0.4459, "step": 6352 }, { "epoch": 3.549162011173184, "grad_norm": 0.5441014766693115, "learning_rate": 0.0008250140056022409, "loss": 0.4204, "step": 6353 }, { "epoch": 3.5497206703910615, "grad_norm": 0.5554166436195374, "learning_rate": 0.0008249859943977591, "loss": 0.4715, "step": 6354 }, { "epoch": 3.5502793296089385, "grad_norm": 6.790686130523682, "learning_rate": 0.0008249579831932773, "loss": 0.4577, "step": 6355 }, { "epoch": 3.5508379888268156, "grad_norm": 0.6405669450759888, "learning_rate": 0.0008249299719887955, "loss": 0.4864, "step": 6356 }, { "epoch": 3.5513966480446926, "grad_norm": 0.49734386801719666, "learning_rate": 0.0008249019607843138, "loss": 0.5342, "step": 6357 }, { "epoch": 3.5519553072625696, "grad_norm": 0.8427916765213013, "learning_rate": 0.0008248739495798319, "loss": 0.5436, "step": 6358 }, { "epoch": 3.552513966480447, "grad_norm": 0.5033753514289856, "learning_rate": 0.0008248459383753501, "loss": 0.351, "step": 6359 }, { "epoch": 3.553072625698324, "grad_norm": 0.841964066028595, "learning_rate": 0.0008248179271708683, "loss": 0.5495, "step": 6360 }, { "epoch": 3.553631284916201, "grad_norm": 0.5770707726478577, "learning_rate": 0.0008247899159663865, "loss": 0.6003, "step": 6361 }, { "epoch": 3.554189944134078, "grad_norm": 0.48813343048095703, "learning_rate": 0.0008247619047619049, "loss": 0.4487, "step": 6362 }, { "epoch": 3.554748603351955, "grad_norm": 0.38025999069213867, "learning_rate": 0.0008247338935574231, "loss": 0.5018, "step": 6363 }, { "epoch": 3.5553072625698325, "grad_norm": 0.6288140416145325, "learning_rate": 0.0008247058823529412, "loss": 0.3701, "step": 6364 }, { "epoch": 3.5558659217877095, "grad_norm": 0.5462133884429932, "learning_rate": 0.0008246778711484594, "loss": 0.4782, "step": 6365 }, { "epoch": 3.5564245810055866, "grad_norm": 0.41887781023979187, "learning_rate": 0.0008246498599439776, "loss": 0.3922, "step": 6366 }, { "epoch": 3.5569832402234636, "grad_norm": 0.41739621758461, "learning_rate": 0.0008246218487394959, "loss": 0.3886, "step": 6367 }, { "epoch": 3.5575418994413406, "grad_norm": 0.9115992784500122, "learning_rate": 0.0008245938375350141, "loss": 0.4778, "step": 6368 }, { "epoch": 3.558100558659218, "grad_norm": 0.637393057346344, "learning_rate": 0.0008245658263305322, "loss": 0.5296, "step": 6369 }, { "epoch": 3.558659217877095, "grad_norm": 0.8492223024368286, "learning_rate": 0.0008245378151260504, "loss": 0.4015, "step": 6370 }, { "epoch": 3.559217877094972, "grad_norm": 0.6010319590568542, "learning_rate": 0.0008245098039215686, "loss": 0.4767, "step": 6371 }, { "epoch": 3.559776536312849, "grad_norm": 0.5058609843254089, "learning_rate": 0.0008244817927170869, "loss": 0.5415, "step": 6372 }, { "epoch": 3.560335195530726, "grad_norm": 0.767530083656311, "learning_rate": 0.0008244537815126051, "loss": 0.446, "step": 6373 }, { "epoch": 3.5608938547486035, "grad_norm": 0.9753162860870361, "learning_rate": 0.0008244257703081232, "loss": 0.4386, "step": 6374 }, { "epoch": 3.5614525139664805, "grad_norm": 0.3988550901412964, "learning_rate": 0.0008243977591036414, "loss": 0.3952, "step": 6375 }, { "epoch": 3.5620111731843576, "grad_norm": 0.6361387968063354, "learning_rate": 0.0008243697478991596, "loss": 0.478, "step": 6376 }, { "epoch": 3.5625698324022346, "grad_norm": 0.6381384134292603, "learning_rate": 0.0008243417366946779, "loss": 0.522, "step": 6377 }, { "epoch": 3.5631284916201116, "grad_norm": 1.174181342124939, "learning_rate": 0.0008243137254901962, "loss": 0.5317, "step": 6378 }, { "epoch": 3.563687150837989, "grad_norm": 0.7982651591300964, "learning_rate": 0.0008242857142857144, "loss": 0.4036, "step": 6379 }, { "epoch": 3.564245810055866, "grad_norm": 0.3963726758956909, "learning_rate": 0.0008242577030812325, "loss": 0.3298, "step": 6380 }, { "epoch": 3.564804469273743, "grad_norm": 0.488386869430542, "learning_rate": 0.0008242296918767507, "loss": 0.473, "step": 6381 }, { "epoch": 3.56536312849162, "grad_norm": 0.552078366279602, "learning_rate": 0.000824201680672269, "loss": 0.3987, "step": 6382 }, { "epoch": 3.565921787709497, "grad_norm": 0.6977636218070984, "learning_rate": 0.0008241736694677872, "loss": 0.5865, "step": 6383 }, { "epoch": 3.5664804469273745, "grad_norm": 0.5130059719085693, "learning_rate": 0.0008241456582633054, "loss": 0.4821, "step": 6384 }, { "epoch": 3.5670391061452515, "grad_norm": 0.516468346118927, "learning_rate": 0.0008241176470588235, "loss": 0.5345, "step": 6385 }, { "epoch": 3.5675977653631286, "grad_norm": 3.8563690185546875, "learning_rate": 0.0008240896358543417, "loss": 0.4306, "step": 6386 }, { "epoch": 3.5681564245810056, "grad_norm": 0.6177405118942261, "learning_rate": 0.00082406162464986, "loss": 0.4676, "step": 6387 }, { "epoch": 3.5687150837988826, "grad_norm": 0.9815372228622437, "learning_rate": 0.0008240336134453782, "loss": 0.4165, "step": 6388 }, { "epoch": 3.56927374301676, "grad_norm": 0.487702339887619, "learning_rate": 0.0008240056022408964, "loss": 0.4252, "step": 6389 }, { "epoch": 3.5698324022346366, "grad_norm": 0.8767315745353699, "learning_rate": 0.0008239775910364145, "loss": 0.3639, "step": 6390 }, { "epoch": 3.570391061452514, "grad_norm": 0.40929293632507324, "learning_rate": 0.0008239495798319327, "loss": 0.3955, "step": 6391 }, { "epoch": 3.570949720670391, "grad_norm": 0.5693562626838684, "learning_rate": 0.000823921568627451, "loss": 0.4986, "step": 6392 }, { "epoch": 3.571508379888268, "grad_norm": 0.4609087109565735, "learning_rate": 0.0008238935574229692, "loss": 0.5037, "step": 6393 }, { "epoch": 3.572067039106145, "grad_norm": 1.7756911516189575, "learning_rate": 0.0008238655462184874, "loss": 0.5288, "step": 6394 }, { "epoch": 3.572625698324022, "grad_norm": 0.6726887226104736, "learning_rate": 0.0008238375350140057, "loss": 0.4616, "step": 6395 }, { "epoch": 3.5731843575418996, "grad_norm": 0.40285179018974304, "learning_rate": 0.0008238095238095238, "loss": 0.3809, "step": 6396 }, { "epoch": 3.5737430167597766, "grad_norm": 1.3979545831680298, "learning_rate": 0.0008237815126050421, "loss": 0.4524, "step": 6397 }, { "epoch": 3.5743016759776536, "grad_norm": 0.618635892868042, "learning_rate": 0.0008237535014005603, "loss": 0.4365, "step": 6398 }, { "epoch": 3.5748603351955306, "grad_norm": 0.5117287635803223, "learning_rate": 0.0008237254901960785, "loss": 0.4755, "step": 6399 }, { "epoch": 3.5754189944134076, "grad_norm": 1.8905123472213745, "learning_rate": 0.0008236974789915967, "loss": 0.4277, "step": 6400 }, { "epoch": 3.575977653631285, "grad_norm": 0.5427520275115967, "learning_rate": 0.0008236694677871148, "loss": 0.5016, "step": 6401 }, { "epoch": 3.576536312849162, "grad_norm": 0.42610442638397217, "learning_rate": 0.0008236414565826331, "loss": 0.3809, "step": 6402 }, { "epoch": 3.577094972067039, "grad_norm": 0.5109557509422302, "learning_rate": 0.0008236134453781513, "loss": 0.5045, "step": 6403 }, { "epoch": 3.577653631284916, "grad_norm": 0.3809089660644531, "learning_rate": 0.0008235854341736695, "loss": 0.3944, "step": 6404 }, { "epoch": 3.578212290502793, "grad_norm": 0.5951355695724487, "learning_rate": 0.0008235574229691877, "loss": 0.423, "step": 6405 }, { "epoch": 3.5787709497206706, "grad_norm": 0.6134072542190552, "learning_rate": 0.0008235294117647058, "loss": 0.4461, "step": 6406 }, { "epoch": 3.5793296089385476, "grad_norm": 0.47377634048461914, "learning_rate": 0.0008235014005602241, "loss": 0.5002, "step": 6407 }, { "epoch": 3.5798882681564246, "grad_norm": 0.5466529130935669, "learning_rate": 0.0008234733893557423, "loss": 0.4641, "step": 6408 }, { "epoch": 3.5804469273743016, "grad_norm": 0.4330349862575531, "learning_rate": 0.0008234453781512605, "loss": 0.4305, "step": 6409 }, { "epoch": 3.5810055865921786, "grad_norm": 0.8357173204421997, "learning_rate": 0.0008234173669467787, "loss": 0.524, "step": 6410 }, { "epoch": 3.581564245810056, "grad_norm": 0.5404726266860962, "learning_rate": 0.000823389355742297, "loss": 0.5421, "step": 6411 }, { "epoch": 3.582122905027933, "grad_norm": 0.40846315026283264, "learning_rate": 0.0008233613445378152, "loss": 0.4173, "step": 6412 }, { "epoch": 3.58268156424581, "grad_norm": 0.4397639334201813, "learning_rate": 0.0008233333333333334, "loss": 0.3876, "step": 6413 }, { "epoch": 3.583240223463687, "grad_norm": 2.4906694889068604, "learning_rate": 0.0008233053221288516, "loss": 0.426, "step": 6414 }, { "epoch": 3.583798882681564, "grad_norm": 0.9563032388687134, "learning_rate": 0.0008232773109243698, "loss": 0.4847, "step": 6415 }, { "epoch": 3.5843575418994416, "grad_norm": 0.40227943658828735, "learning_rate": 0.000823249299719888, "loss": 0.4023, "step": 6416 }, { "epoch": 3.5849162011173186, "grad_norm": 4.108040809631348, "learning_rate": 0.0008232212885154062, "loss": 0.4843, "step": 6417 }, { "epoch": 3.5854748603351956, "grad_norm": 0.43629199266433716, "learning_rate": 0.0008231932773109244, "loss": 0.4363, "step": 6418 }, { "epoch": 3.5860335195530726, "grad_norm": 0.5753178000450134, "learning_rate": 0.0008231652661064426, "loss": 0.3836, "step": 6419 }, { "epoch": 3.5865921787709496, "grad_norm": 0.6297770142555237, "learning_rate": 0.0008231372549019608, "loss": 0.4496, "step": 6420 }, { "epoch": 3.587150837988827, "grad_norm": 1.8221763372421265, "learning_rate": 0.000823109243697479, "loss": 0.4854, "step": 6421 }, { "epoch": 3.587709497206704, "grad_norm": 0.6255560517311096, "learning_rate": 0.0008230812324929972, "loss": 0.4266, "step": 6422 }, { "epoch": 3.588268156424581, "grad_norm": 0.5087469816207886, "learning_rate": 0.0008230532212885154, "loss": 0.4282, "step": 6423 }, { "epoch": 3.588826815642458, "grad_norm": 0.5999743342399597, "learning_rate": 0.0008230252100840336, "loss": 0.4311, "step": 6424 }, { "epoch": 3.589385474860335, "grad_norm": 0.39028701186180115, "learning_rate": 0.0008229971988795518, "loss": 0.4005, "step": 6425 }, { "epoch": 3.5899441340782126, "grad_norm": 0.5782503485679626, "learning_rate": 0.00082296918767507, "loss": 0.491, "step": 6426 }, { "epoch": 3.590502793296089, "grad_norm": 0.4766971170902252, "learning_rate": 0.0008229411764705884, "loss": 0.5077, "step": 6427 }, { "epoch": 3.5910614525139666, "grad_norm": 0.5042833089828491, "learning_rate": 0.0008229131652661065, "loss": 0.3267, "step": 6428 }, { "epoch": 3.5916201117318436, "grad_norm": 0.6977332830429077, "learning_rate": 0.0008228851540616247, "loss": 0.6247, "step": 6429 }, { "epoch": 3.5921787709497206, "grad_norm": 0.555036187171936, "learning_rate": 0.0008228571428571429, "loss": 0.5664, "step": 6430 }, { "epoch": 3.5927374301675976, "grad_norm": 0.8549402952194214, "learning_rate": 0.0008228291316526611, "loss": 0.4764, "step": 6431 }, { "epoch": 3.5932960893854746, "grad_norm": 0.40853291749954224, "learning_rate": 0.0008228011204481794, "loss": 0.3969, "step": 6432 }, { "epoch": 3.593854748603352, "grad_norm": 0.5229538679122925, "learning_rate": 0.0008227731092436975, "loss": 0.5448, "step": 6433 }, { "epoch": 3.594413407821229, "grad_norm": 0.5672826766967773, "learning_rate": 0.0008227450980392157, "loss": 0.3375, "step": 6434 }, { "epoch": 3.594972067039106, "grad_norm": 0.5265349745750427, "learning_rate": 0.0008227170868347339, "loss": 0.4105, "step": 6435 }, { "epoch": 3.595530726256983, "grad_norm": 1.0345433950424194, "learning_rate": 0.0008226890756302521, "loss": 0.4641, "step": 6436 }, { "epoch": 3.59608938547486, "grad_norm": 0.5872271656990051, "learning_rate": 0.0008226610644257704, "loss": 0.6174, "step": 6437 }, { "epoch": 3.5966480446927376, "grad_norm": 0.4988188147544861, "learning_rate": 0.0008226330532212885, "loss": 0.497, "step": 6438 }, { "epoch": 3.5972067039106146, "grad_norm": 1.1633720397949219, "learning_rate": 0.0008226050420168067, "loss": 0.3614, "step": 6439 }, { "epoch": 3.5977653631284916, "grad_norm": 0.5424314141273499, "learning_rate": 0.0008225770308123249, "loss": 0.465, "step": 6440 }, { "epoch": 3.5983240223463686, "grad_norm": 0.7888202667236328, "learning_rate": 0.0008225490196078431, "loss": 0.5604, "step": 6441 }, { "epoch": 3.5988826815642456, "grad_norm": 0.6142560839653015, "learning_rate": 0.0008225210084033614, "loss": 0.3842, "step": 6442 }, { "epoch": 3.599441340782123, "grad_norm": 0.4416868984699249, "learning_rate": 0.0008224929971988796, "loss": 0.3801, "step": 6443 }, { "epoch": 3.6, "grad_norm": 0.38662686944007874, "learning_rate": 0.0008224649859943977, "loss": 0.3578, "step": 6444 }, { "epoch": 3.600558659217877, "grad_norm": 15.736149787902832, "learning_rate": 0.000822436974789916, "loss": 0.3535, "step": 6445 }, { "epoch": 3.601117318435754, "grad_norm": 0.5843623280525208, "learning_rate": 0.0008224089635854342, "loss": 0.4783, "step": 6446 }, { "epoch": 3.601675977653631, "grad_norm": 0.46261361241340637, "learning_rate": 0.0008223809523809525, "loss": 0.4116, "step": 6447 }, { "epoch": 3.6022346368715086, "grad_norm": 0.623014509677887, "learning_rate": 0.0008223529411764707, "loss": 0.4058, "step": 6448 }, { "epoch": 3.6027932960893856, "grad_norm": 0.5723839998245239, "learning_rate": 0.0008223249299719888, "loss": 0.4354, "step": 6449 }, { "epoch": 3.6033519553072626, "grad_norm": 0.8480479717254639, "learning_rate": 0.000822296918767507, "loss": 0.5407, "step": 6450 }, { "epoch": 3.6039106145251396, "grad_norm": 0.4948956072330475, "learning_rate": 0.0008222689075630252, "loss": 0.5575, "step": 6451 }, { "epoch": 3.6044692737430166, "grad_norm": 1.755097508430481, "learning_rate": 0.0008222408963585435, "loss": 0.4199, "step": 6452 }, { "epoch": 3.605027932960894, "grad_norm": 0.634207546710968, "learning_rate": 0.0008222128851540617, "loss": 0.606, "step": 6453 }, { "epoch": 3.605586592178771, "grad_norm": 0.8725922703742981, "learning_rate": 0.0008221848739495798, "loss": 0.5041, "step": 6454 }, { "epoch": 3.606145251396648, "grad_norm": 0.6817995309829712, "learning_rate": 0.000822156862745098, "loss": 0.5345, "step": 6455 }, { "epoch": 3.606703910614525, "grad_norm": 0.6545759439468384, "learning_rate": 0.0008221288515406162, "loss": 0.4908, "step": 6456 }, { "epoch": 3.607262569832402, "grad_norm": 0.7679895162582397, "learning_rate": 0.0008221008403361345, "loss": 0.4979, "step": 6457 }, { "epoch": 3.6078212290502796, "grad_norm": 0.5004749894142151, "learning_rate": 0.0008220728291316527, "loss": 0.4487, "step": 6458 }, { "epoch": 3.6083798882681566, "grad_norm": 0.7766402959823608, "learning_rate": 0.0008220448179271709, "loss": 0.5176, "step": 6459 }, { "epoch": 3.6089385474860336, "grad_norm": 0.8019154667854309, "learning_rate": 0.000822016806722689, "loss": 0.4483, "step": 6460 }, { "epoch": 3.6094972067039106, "grad_norm": 0.5108367800712585, "learning_rate": 0.0008219887955182072, "loss": 0.5476, "step": 6461 }, { "epoch": 3.6100558659217876, "grad_norm": 0.4819338619709015, "learning_rate": 0.0008219607843137256, "loss": 0.4242, "step": 6462 }, { "epoch": 3.610614525139665, "grad_norm": 0.5087612867355347, "learning_rate": 0.0008219327731092438, "loss": 0.5091, "step": 6463 }, { "epoch": 3.6111731843575416, "grad_norm": 0.9313182830810547, "learning_rate": 0.000821904761904762, "loss": 0.7428, "step": 6464 }, { "epoch": 3.611731843575419, "grad_norm": 1.1090079545974731, "learning_rate": 0.0008218767507002801, "loss": 0.4983, "step": 6465 }, { "epoch": 3.612290502793296, "grad_norm": 0.4408183991909027, "learning_rate": 0.0008218487394957983, "loss": 0.4667, "step": 6466 }, { "epoch": 3.612849162011173, "grad_norm": 0.7039101123809814, "learning_rate": 0.0008218207282913165, "loss": 0.3746, "step": 6467 }, { "epoch": 3.61340782122905, "grad_norm": 0.6735904812812805, "learning_rate": 0.0008217927170868348, "loss": 0.4699, "step": 6468 }, { "epoch": 3.613966480446927, "grad_norm": 0.6414945721626282, "learning_rate": 0.000821764705882353, "loss": 0.4009, "step": 6469 }, { "epoch": 3.6145251396648046, "grad_norm": 0.6738402247428894, "learning_rate": 0.0008217366946778711, "loss": 0.5266, "step": 6470 }, { "epoch": 3.6150837988826816, "grad_norm": 0.8753194212913513, "learning_rate": 0.0008217086834733893, "loss": 0.4415, "step": 6471 }, { "epoch": 3.6156424581005586, "grad_norm": 0.537079393863678, "learning_rate": 0.0008216806722689075, "loss": 0.5859, "step": 6472 }, { "epoch": 3.6162011173184356, "grad_norm": 0.831777811050415, "learning_rate": 0.0008216526610644258, "loss": 0.4068, "step": 6473 }, { "epoch": 3.6167597765363126, "grad_norm": 0.46099838614463806, "learning_rate": 0.000821624649859944, "loss": 0.499, "step": 6474 }, { "epoch": 3.61731843575419, "grad_norm": 0.6289117932319641, "learning_rate": 0.0008215966386554622, "loss": 0.4864, "step": 6475 }, { "epoch": 3.617877094972067, "grad_norm": 0.7247985601425171, "learning_rate": 0.0008215686274509803, "loss": 0.5355, "step": 6476 }, { "epoch": 3.618435754189944, "grad_norm": 0.4053064286708832, "learning_rate": 0.0008215406162464985, "loss": 0.4343, "step": 6477 }, { "epoch": 3.618994413407821, "grad_norm": 0.6325204372406006, "learning_rate": 0.0008215126050420169, "loss": 0.3954, "step": 6478 }, { "epoch": 3.619553072625698, "grad_norm": 0.5867834091186523, "learning_rate": 0.0008214845938375351, "loss": 0.3693, "step": 6479 }, { "epoch": 3.6201117318435756, "grad_norm": 0.694374680519104, "learning_rate": 0.0008214565826330533, "loss": 0.5966, "step": 6480 }, { "epoch": 3.6206703910614526, "grad_norm": 0.5166329145431519, "learning_rate": 0.0008214285714285714, "loss": 0.3699, "step": 6481 }, { "epoch": 3.6212290502793296, "grad_norm": 22.80353355407715, "learning_rate": 0.0008214005602240896, "loss": 0.529, "step": 6482 }, { "epoch": 3.6217877094972066, "grad_norm": 0.4705682098865509, "learning_rate": 0.0008213725490196079, "loss": 0.3925, "step": 6483 }, { "epoch": 3.6223463687150836, "grad_norm": 1.3270971775054932, "learning_rate": 0.0008213445378151261, "loss": 0.4902, "step": 6484 }, { "epoch": 3.622905027932961, "grad_norm": 2.1248257160186768, "learning_rate": 0.0008213165266106443, "loss": 0.4774, "step": 6485 }, { "epoch": 3.623463687150838, "grad_norm": 0.5341605544090271, "learning_rate": 0.0008212885154061624, "loss": 0.4741, "step": 6486 }, { "epoch": 3.624022346368715, "grad_norm": 0.4652163088321686, "learning_rate": 0.0008212605042016806, "loss": 0.3937, "step": 6487 }, { "epoch": 3.624581005586592, "grad_norm": 0.5006020665168762, "learning_rate": 0.0008212324929971989, "loss": 0.4843, "step": 6488 }, { "epoch": 3.625139664804469, "grad_norm": 0.752861499786377, "learning_rate": 0.0008212044817927171, "loss": 0.6368, "step": 6489 }, { "epoch": 3.6256983240223466, "grad_norm": 0.7518357038497925, "learning_rate": 0.0008211764705882353, "loss": 0.4761, "step": 6490 }, { "epoch": 3.6262569832402236, "grad_norm": 0.7350558042526245, "learning_rate": 0.0008211484593837535, "loss": 0.3349, "step": 6491 }, { "epoch": 3.6268156424581006, "grad_norm": 0.4279731214046478, "learning_rate": 0.0008211204481792716, "loss": 0.3346, "step": 6492 }, { "epoch": 3.6273743016759776, "grad_norm": 0.7430214881896973, "learning_rate": 0.00082109243697479, "loss": 0.3664, "step": 6493 }, { "epoch": 3.6279329608938546, "grad_norm": 0.5654424428939819, "learning_rate": 0.0008210644257703082, "loss": 0.3604, "step": 6494 }, { "epoch": 3.628491620111732, "grad_norm": 1.3331470489501953, "learning_rate": 0.0008210364145658264, "loss": 0.4775, "step": 6495 }, { "epoch": 3.6290502793296087, "grad_norm": 0.5819928050041199, "learning_rate": 0.0008210084033613446, "loss": 0.4449, "step": 6496 }, { "epoch": 3.629608938547486, "grad_norm": 0.5940769910812378, "learning_rate": 0.0008209803921568627, "loss": 0.483, "step": 6497 }, { "epoch": 3.630167597765363, "grad_norm": 0.7750584483146667, "learning_rate": 0.000820952380952381, "loss": 0.509, "step": 6498 }, { "epoch": 3.63072625698324, "grad_norm": 0.5893155932426453, "learning_rate": 0.0008209243697478992, "loss": 0.4488, "step": 6499 }, { "epoch": 3.631284916201117, "grad_norm": 0.5505998730659485, "learning_rate": 0.0008208963585434174, "loss": 0.4699, "step": 6500 }, { "epoch": 3.631284916201117, "eval_cer": 0.09406675177026393, "eval_loss": 0.35760098695755005, "eval_runtime": 55.7703, "eval_samples_per_second": 81.369, "eval_steps_per_second": 5.092, "eval_wer": 0.3711069523197676, "step": 6500 }, { "epoch": 3.631843575418994, "grad_norm": 0.5845969319343567, "learning_rate": 0.0008208683473389356, "loss": 0.4983, "step": 6501 }, { "epoch": 3.6324022346368716, "grad_norm": 0.46298035979270935, "learning_rate": 0.0008208403361344537, "loss": 0.4643, "step": 6502 }, { "epoch": 3.6329608938547486, "grad_norm": 0.3908912241458893, "learning_rate": 0.000820812324929972, "loss": 0.4557, "step": 6503 }, { "epoch": 3.6335195530726256, "grad_norm": 0.5883150100708008, "learning_rate": 0.0008207843137254902, "loss": 0.4242, "step": 6504 }, { "epoch": 3.6340782122905027, "grad_norm": 0.7345861792564392, "learning_rate": 0.0008207563025210084, "loss": 0.6422, "step": 6505 }, { "epoch": 3.6346368715083797, "grad_norm": 0.4540961682796478, "learning_rate": 0.0008207282913165266, "loss": 0.4319, "step": 6506 }, { "epoch": 3.635195530726257, "grad_norm": 0.4743022322654724, "learning_rate": 0.0008207002801120448, "loss": 0.5672, "step": 6507 }, { "epoch": 3.635754189944134, "grad_norm": 0.4285391569137573, "learning_rate": 0.000820672268907563, "loss": 0.4264, "step": 6508 }, { "epoch": 3.636312849162011, "grad_norm": 0.7424596548080444, "learning_rate": 0.0008206442577030812, "loss": 0.406, "step": 6509 }, { "epoch": 3.636871508379888, "grad_norm": 0.5257818102836609, "learning_rate": 0.0008206162464985995, "loss": 0.4395, "step": 6510 }, { "epoch": 3.637430167597765, "grad_norm": 0.6352879405021667, "learning_rate": 0.0008205882352941177, "loss": 0.6214, "step": 6511 }, { "epoch": 3.6379888268156426, "grad_norm": 1.3795509338378906, "learning_rate": 0.0008205602240896359, "loss": 0.5127, "step": 6512 }, { "epoch": 3.6385474860335196, "grad_norm": 0.4487282931804657, "learning_rate": 0.0008205322128851541, "loss": 0.4578, "step": 6513 }, { "epoch": 3.6391061452513966, "grad_norm": 0.48283979296684265, "learning_rate": 0.0008205042016806723, "loss": 0.4808, "step": 6514 }, { "epoch": 3.6396648044692737, "grad_norm": 0.4567292332649231, "learning_rate": 0.0008204761904761905, "loss": 0.5098, "step": 6515 }, { "epoch": 3.6402234636871507, "grad_norm": 0.8431621193885803, "learning_rate": 0.0008204481792717087, "loss": 0.524, "step": 6516 }, { "epoch": 3.640782122905028, "grad_norm": 0.5462682247161865, "learning_rate": 0.0008204201680672269, "loss": 0.3413, "step": 6517 }, { "epoch": 3.641340782122905, "grad_norm": 0.42183825373649597, "learning_rate": 0.0008203921568627452, "loss": 0.4816, "step": 6518 }, { "epoch": 3.641899441340782, "grad_norm": 1.2426623106002808, "learning_rate": 0.0008203641456582633, "loss": 0.4582, "step": 6519 }, { "epoch": 3.642458100558659, "grad_norm": 0.6737184524536133, "learning_rate": 0.0008203361344537815, "loss": 0.5678, "step": 6520 }, { "epoch": 3.643016759776536, "grad_norm": 0.4400502145290375, "learning_rate": 0.0008203081232492997, "loss": 0.4366, "step": 6521 }, { "epoch": 3.6435754189944136, "grad_norm": 1.0397083759307861, "learning_rate": 0.0008202801120448179, "loss": 0.5584, "step": 6522 }, { "epoch": 3.6441340782122906, "grad_norm": 0.49252504110336304, "learning_rate": 0.0008202521008403362, "loss": 0.4701, "step": 6523 }, { "epoch": 3.6446927374301676, "grad_norm": 0.6515716910362244, "learning_rate": 0.0008202240896358543, "loss": 0.4662, "step": 6524 }, { "epoch": 3.6452513966480447, "grad_norm": 0.532579243183136, "learning_rate": 0.0008201960784313725, "loss": 0.5567, "step": 6525 }, { "epoch": 3.6458100558659217, "grad_norm": 0.5238862633705139, "learning_rate": 0.0008201680672268907, "loss": 0.4799, "step": 6526 }, { "epoch": 3.646368715083799, "grad_norm": 0.5112152695655823, "learning_rate": 0.000820140056022409, "loss": 0.3977, "step": 6527 }, { "epoch": 3.646927374301676, "grad_norm": 1.3041168451309204, "learning_rate": 0.0008201120448179273, "loss": 0.5636, "step": 6528 }, { "epoch": 3.647486033519553, "grad_norm": 2.3652446269989014, "learning_rate": 0.0008200840336134454, "loss": 0.406, "step": 6529 }, { "epoch": 3.64804469273743, "grad_norm": 0.4410611093044281, "learning_rate": 0.0008200560224089636, "loss": 0.4464, "step": 6530 }, { "epoch": 3.648603351955307, "grad_norm": 0.5673394203186035, "learning_rate": 0.0008200280112044818, "loss": 0.4015, "step": 6531 }, { "epoch": 3.6491620111731846, "grad_norm": 0.45798346400260925, "learning_rate": 0.00082, "loss": 0.5285, "step": 6532 }, { "epoch": 3.649720670391061, "grad_norm": 0.417288213968277, "learning_rate": 0.0008199719887955183, "loss": 0.4092, "step": 6533 }, { "epoch": 3.6502793296089386, "grad_norm": 0.5197558403015137, "learning_rate": 0.0008199439775910365, "loss": 0.421, "step": 6534 }, { "epoch": 3.6508379888268156, "grad_norm": 0.8692957162857056, "learning_rate": 0.0008199159663865546, "loss": 0.5988, "step": 6535 }, { "epoch": 3.6513966480446927, "grad_norm": 0.6718247532844543, "learning_rate": 0.0008198879551820728, "loss": 0.474, "step": 6536 }, { "epoch": 3.6519553072625697, "grad_norm": 0.5074272751808167, "learning_rate": 0.000819859943977591, "loss": 0.4871, "step": 6537 }, { "epoch": 3.6525139664804467, "grad_norm": 0.5672338604927063, "learning_rate": 0.0008198319327731093, "loss": 0.4436, "step": 6538 }, { "epoch": 3.653072625698324, "grad_norm": 0.8451388478279114, "learning_rate": 0.0008198039215686275, "loss": 0.4759, "step": 6539 }, { "epoch": 3.653631284916201, "grad_norm": 10.254681587219238, "learning_rate": 0.0008197759103641456, "loss": 0.4975, "step": 6540 }, { "epoch": 3.654189944134078, "grad_norm": 1.752718210220337, "learning_rate": 0.0008197478991596638, "loss": 0.3744, "step": 6541 }, { "epoch": 3.654748603351955, "grad_norm": 1.2247676849365234, "learning_rate": 0.000819719887955182, "loss": 0.6475, "step": 6542 }, { "epoch": 3.655307262569832, "grad_norm": 0.6271907091140747, "learning_rate": 0.0008196918767507004, "loss": 0.437, "step": 6543 }, { "epoch": 3.6558659217877096, "grad_norm": 0.39722007513046265, "learning_rate": 0.0008196638655462186, "loss": 0.4291, "step": 6544 }, { "epoch": 3.6564245810055866, "grad_norm": 0.4425193667411804, "learning_rate": 0.0008196358543417367, "loss": 0.427, "step": 6545 }, { "epoch": 3.6569832402234637, "grad_norm": 1.2784485816955566, "learning_rate": 0.0008196078431372549, "loss": 0.4491, "step": 6546 }, { "epoch": 3.6575418994413407, "grad_norm": 0.5835462808609009, "learning_rate": 0.0008195798319327731, "loss": 0.4424, "step": 6547 }, { "epoch": 3.6581005586592177, "grad_norm": 0.5545950531959534, "learning_rate": 0.0008195518207282914, "loss": 0.4382, "step": 6548 }, { "epoch": 3.658659217877095, "grad_norm": 0.4416521191596985, "learning_rate": 0.0008195238095238096, "loss": 0.4208, "step": 6549 }, { "epoch": 3.659217877094972, "grad_norm": 0.5419308543205261, "learning_rate": 0.0008194957983193278, "loss": 0.5201, "step": 6550 }, { "epoch": 3.659776536312849, "grad_norm": 1.5149476528167725, "learning_rate": 0.0008194677871148459, "loss": 0.4185, "step": 6551 }, { "epoch": 3.660335195530726, "grad_norm": 0.6551757454872131, "learning_rate": 0.0008194397759103641, "loss": 0.4841, "step": 6552 }, { "epoch": 3.660893854748603, "grad_norm": 1.3892247676849365, "learning_rate": 0.0008194117647058824, "loss": 0.4641, "step": 6553 }, { "epoch": 3.6614525139664806, "grad_norm": 0.5354766249656677, "learning_rate": 0.0008193837535014006, "loss": 0.4782, "step": 6554 }, { "epoch": 3.6620111731843576, "grad_norm": 0.42245107889175415, "learning_rate": 0.0008193557422969188, "loss": 0.5282, "step": 6555 }, { "epoch": 3.6625698324022347, "grad_norm": 0.483610600233078, "learning_rate": 0.0008193277310924369, "loss": 0.4826, "step": 6556 }, { "epoch": 3.6631284916201117, "grad_norm": 0.4468355178833008, "learning_rate": 0.0008192997198879551, "loss": 0.4835, "step": 6557 }, { "epoch": 3.6636871508379887, "grad_norm": 0.372497022151947, "learning_rate": 0.0008192717086834734, "loss": 0.408, "step": 6558 }, { "epoch": 3.664245810055866, "grad_norm": 0.5888392329216003, "learning_rate": 0.0008192436974789917, "loss": 0.3686, "step": 6559 }, { "epoch": 3.664804469273743, "grad_norm": 1.0408971309661865, "learning_rate": 0.0008192156862745099, "loss": 0.462, "step": 6560 }, { "epoch": 3.66536312849162, "grad_norm": 0.5975467562675476, "learning_rate": 0.000819187675070028, "loss": 0.4531, "step": 6561 }, { "epoch": 3.665921787709497, "grad_norm": 0.4743780791759491, "learning_rate": 0.0008191596638655462, "loss": 0.3791, "step": 6562 }, { "epoch": 3.666480446927374, "grad_norm": 0.4812266528606415, "learning_rate": 0.0008191316526610645, "loss": 0.4072, "step": 6563 }, { "epoch": 3.6670391061452516, "grad_norm": 0.39554843306541443, "learning_rate": 0.0008191036414565827, "loss": 0.3693, "step": 6564 }, { "epoch": 3.6675977653631286, "grad_norm": 0.6460503339767456, "learning_rate": 0.0008190756302521009, "loss": 0.466, "step": 6565 }, { "epoch": 3.6681564245810057, "grad_norm": 0.6518165469169617, "learning_rate": 0.0008190476190476191, "loss": 0.4814, "step": 6566 }, { "epoch": 3.6687150837988827, "grad_norm": 0.6282866597175598, "learning_rate": 0.0008190196078431372, "loss": 0.685, "step": 6567 }, { "epoch": 3.6692737430167597, "grad_norm": 0.5056010484695435, "learning_rate": 0.0008189915966386555, "loss": 0.4358, "step": 6568 }, { "epoch": 3.669832402234637, "grad_norm": 0.5974522829055786, "learning_rate": 0.0008189635854341737, "loss": 0.489, "step": 6569 }, { "epoch": 3.6703910614525137, "grad_norm": 0.3944227993488312, "learning_rate": 0.0008189355742296919, "loss": 0.4885, "step": 6570 }, { "epoch": 3.670949720670391, "grad_norm": 0.9439200162887573, "learning_rate": 0.0008189075630252101, "loss": 0.3953, "step": 6571 }, { "epoch": 3.671508379888268, "grad_norm": 0.6894012689590454, "learning_rate": 0.0008188795518207282, "loss": 0.5021, "step": 6572 }, { "epoch": 3.672067039106145, "grad_norm": 0.5507104992866516, "learning_rate": 0.0008188515406162465, "loss": 0.4369, "step": 6573 }, { "epoch": 3.672625698324022, "grad_norm": 0.3866426944732666, "learning_rate": 0.0008188235294117647, "loss": 0.4085, "step": 6574 }, { "epoch": 3.673184357541899, "grad_norm": 0.5168094038963318, "learning_rate": 0.000818795518207283, "loss": 0.3898, "step": 6575 }, { "epoch": 3.6737430167597767, "grad_norm": 0.44074076414108276, "learning_rate": 0.0008187675070028012, "loss": 0.3856, "step": 6576 }, { "epoch": 3.6743016759776537, "grad_norm": 0.7955960631370544, "learning_rate": 0.0008187394957983193, "loss": 0.5206, "step": 6577 }, { "epoch": 3.6748603351955307, "grad_norm": 0.5694375038146973, "learning_rate": 0.0008187114845938376, "loss": 0.5083, "step": 6578 }, { "epoch": 3.6754189944134077, "grad_norm": 0.4771963059902191, "learning_rate": 0.0008186834733893558, "loss": 0.4795, "step": 6579 }, { "epoch": 3.6759776536312847, "grad_norm": 0.7393046617507935, "learning_rate": 0.000818655462184874, "loss": 0.4858, "step": 6580 }, { "epoch": 3.676536312849162, "grad_norm": 0.5868818163871765, "learning_rate": 0.0008186274509803922, "loss": 0.346, "step": 6581 }, { "epoch": 3.677094972067039, "grad_norm": 0.5598104000091553, "learning_rate": 0.0008185994397759104, "loss": 0.4849, "step": 6582 }, { "epoch": 3.677653631284916, "grad_norm": 0.7943028211593628, "learning_rate": 0.0008185714285714286, "loss": 0.4603, "step": 6583 }, { "epoch": 3.678212290502793, "grad_norm": 0.5936654210090637, "learning_rate": 0.0008185434173669468, "loss": 0.3908, "step": 6584 }, { "epoch": 3.67877094972067, "grad_norm": 0.585900068283081, "learning_rate": 0.000818515406162465, "loss": 0.4222, "step": 6585 }, { "epoch": 3.6793296089385477, "grad_norm": 0.5086262822151184, "learning_rate": 0.0008184873949579832, "loss": 0.3499, "step": 6586 }, { "epoch": 3.6798882681564247, "grad_norm": 0.4961419403553009, "learning_rate": 0.0008184593837535014, "loss": 0.4381, "step": 6587 }, { "epoch": 3.6804469273743017, "grad_norm": 0.5073038935661316, "learning_rate": 0.0008184313725490196, "loss": 0.4638, "step": 6588 }, { "epoch": 3.6810055865921787, "grad_norm": 0.5773311853408813, "learning_rate": 0.0008184033613445378, "loss": 0.4878, "step": 6589 }, { "epoch": 3.6815642458100557, "grad_norm": 0.5299826264381409, "learning_rate": 0.000818375350140056, "loss": 0.4, "step": 6590 }, { "epoch": 3.682122905027933, "grad_norm": 0.7297815084457397, "learning_rate": 0.0008183473389355742, "loss": 0.5699, "step": 6591 }, { "epoch": 3.68268156424581, "grad_norm": 0.6074373126029968, "learning_rate": 0.0008183193277310925, "loss": 0.5335, "step": 6592 }, { "epoch": 3.683240223463687, "grad_norm": 1.6804271936416626, "learning_rate": 0.0008182913165266107, "loss": 0.7456, "step": 6593 }, { "epoch": 3.683798882681564, "grad_norm": 0.8368260860443115, "learning_rate": 0.0008182633053221289, "loss": 0.5148, "step": 6594 }, { "epoch": 3.684357541899441, "grad_norm": 0.9733879566192627, "learning_rate": 0.0008182352941176471, "loss": 0.4015, "step": 6595 }, { "epoch": 3.6849162011173187, "grad_norm": 1.1178513765335083, "learning_rate": 0.0008182072829131653, "loss": 0.6611, "step": 6596 }, { "epoch": 3.6854748603351957, "grad_norm": 0.4010314643383026, "learning_rate": 0.0008181792717086835, "loss": 0.4945, "step": 6597 }, { "epoch": 3.6860335195530727, "grad_norm": 0.7516027092933655, "learning_rate": 0.0008181512605042018, "loss": 0.4372, "step": 6598 }, { "epoch": 3.6865921787709497, "grad_norm": 0.4921731650829315, "learning_rate": 0.0008181232492997199, "loss": 0.3972, "step": 6599 }, { "epoch": 3.6871508379888267, "grad_norm": 0.6820230484008789, "learning_rate": 0.0008180952380952381, "loss": 0.3899, "step": 6600 }, { "epoch": 3.687709497206704, "grad_norm": 0.5081499814987183, "learning_rate": 0.0008180672268907563, "loss": 0.4212, "step": 6601 }, { "epoch": 3.688268156424581, "grad_norm": 0.7553133368492126, "learning_rate": 0.0008180392156862745, "loss": 0.4516, "step": 6602 }, { "epoch": 3.688826815642458, "grad_norm": 0.7080279588699341, "learning_rate": 0.0008180112044817928, "loss": 0.4477, "step": 6603 }, { "epoch": 3.689385474860335, "grad_norm": 0.6791130304336548, "learning_rate": 0.0008179831932773109, "loss": 0.5012, "step": 6604 }, { "epoch": 3.689944134078212, "grad_norm": 0.42615175247192383, "learning_rate": 0.0008179551820728291, "loss": 0.5078, "step": 6605 }, { "epoch": 3.6905027932960897, "grad_norm": 4.26744270324707, "learning_rate": 0.0008179271708683473, "loss": 0.4548, "step": 6606 }, { "epoch": 3.6910614525139662, "grad_norm": 0.5060055255889893, "learning_rate": 0.0008178991596638655, "loss": 0.4112, "step": 6607 }, { "epoch": 3.6916201117318437, "grad_norm": 0.4261416792869568, "learning_rate": 0.0008178711484593839, "loss": 0.4393, "step": 6608 }, { "epoch": 3.6921787709497207, "grad_norm": 1.223616600036621, "learning_rate": 0.000817843137254902, "loss": 0.3641, "step": 6609 }, { "epoch": 3.6927374301675977, "grad_norm": 0.5487021207809448, "learning_rate": 0.0008178151260504202, "loss": 0.5481, "step": 6610 }, { "epoch": 3.6932960893854747, "grad_norm": 2.412358045578003, "learning_rate": 0.0008177871148459384, "loss": 0.5495, "step": 6611 }, { "epoch": 3.6938547486033517, "grad_norm": 0.4571426808834076, "learning_rate": 0.0008177591036414566, "loss": 0.4205, "step": 6612 }, { "epoch": 3.694413407821229, "grad_norm": 0.4575337767601013, "learning_rate": 0.0008177310924369749, "loss": 0.4602, "step": 6613 }, { "epoch": 3.694972067039106, "grad_norm": 0.512572169303894, "learning_rate": 0.0008177030812324931, "loss": 0.5942, "step": 6614 }, { "epoch": 3.695530726256983, "grad_norm": 0.45204320549964905, "learning_rate": 0.0008176750700280112, "loss": 0.423, "step": 6615 }, { "epoch": 3.69608938547486, "grad_norm": 0.7094957828521729, "learning_rate": 0.0008176470588235294, "loss": 0.4425, "step": 6616 }, { "epoch": 3.6966480446927372, "grad_norm": 0.54328852891922, "learning_rate": 0.0008176190476190476, "loss": 0.5917, "step": 6617 }, { "epoch": 3.6972067039106147, "grad_norm": 0.699624240398407, "learning_rate": 0.0008175910364145659, "loss": 0.4372, "step": 6618 }, { "epoch": 3.6977653631284917, "grad_norm": 0.6127368807792664, "learning_rate": 0.0008175630252100841, "loss": 0.5105, "step": 6619 }, { "epoch": 3.6983240223463687, "grad_norm": 0.4927031099796295, "learning_rate": 0.0008175350140056022, "loss": 0.5134, "step": 6620 }, { "epoch": 3.6988826815642457, "grad_norm": 0.7929823398590088, "learning_rate": 0.0008175070028011204, "loss": 0.4772, "step": 6621 }, { "epoch": 3.6994413407821227, "grad_norm": 0.5992799401283264, "learning_rate": 0.0008174789915966386, "loss": 0.4889, "step": 6622 }, { "epoch": 3.7, "grad_norm": 0.6249922513961792, "learning_rate": 0.0008174509803921569, "loss": 0.518, "step": 6623 }, { "epoch": 3.700558659217877, "grad_norm": 0.6661367416381836, "learning_rate": 0.0008174229691876752, "loss": 0.4634, "step": 6624 }, { "epoch": 3.701117318435754, "grad_norm": 2.2336106300354004, "learning_rate": 0.0008173949579831932, "loss": 0.4755, "step": 6625 }, { "epoch": 3.701675977653631, "grad_norm": 1.5266122817993164, "learning_rate": 0.0008173669467787115, "loss": 0.6458, "step": 6626 }, { "epoch": 3.7022346368715082, "grad_norm": 1.2884702682495117, "learning_rate": 0.0008173389355742297, "loss": 0.4475, "step": 6627 }, { "epoch": 3.7027932960893857, "grad_norm": 0.8609706163406372, "learning_rate": 0.000817310924369748, "loss": 0.4115, "step": 6628 }, { "epoch": 3.7033519553072627, "grad_norm": 0.5485642552375793, "learning_rate": 0.0008172829131652662, "loss": 0.4176, "step": 6629 }, { "epoch": 3.7039106145251397, "grad_norm": 0.41080906987190247, "learning_rate": 0.0008172549019607844, "loss": 0.4145, "step": 6630 }, { "epoch": 3.7044692737430167, "grad_norm": 0.49313127994537354, "learning_rate": 0.0008172268907563025, "loss": 0.4882, "step": 6631 }, { "epoch": 3.7050279329608937, "grad_norm": 0.4483972489833832, "learning_rate": 0.0008171988795518207, "loss": 0.4773, "step": 6632 }, { "epoch": 3.705586592178771, "grad_norm": 0.8375333547592163, "learning_rate": 0.000817170868347339, "loss": 0.6853, "step": 6633 }, { "epoch": 3.706145251396648, "grad_norm": 0.4427073299884796, "learning_rate": 0.0008171428571428572, "loss": 0.4905, "step": 6634 }, { "epoch": 3.706703910614525, "grad_norm": 0.44916847348213196, "learning_rate": 0.0008171148459383754, "loss": 0.4992, "step": 6635 }, { "epoch": 3.707262569832402, "grad_norm": 0.5653901100158691, "learning_rate": 0.0008170868347338935, "loss": 0.5632, "step": 6636 }, { "epoch": 3.707821229050279, "grad_norm": 0.582859456539154, "learning_rate": 0.0008170588235294117, "loss": 0.5395, "step": 6637 }, { "epoch": 3.7083798882681567, "grad_norm": 1.1364970207214355, "learning_rate": 0.00081703081232493, "loss": 0.4551, "step": 6638 }, { "epoch": 3.7089385474860332, "grad_norm": 0.4864155352115631, "learning_rate": 0.0008170028011204482, "loss": 0.468, "step": 6639 }, { "epoch": 3.7094972067039107, "grad_norm": 0.46229758858680725, "learning_rate": 0.0008169747899159664, "loss": 0.479, "step": 6640 }, { "epoch": 3.7100558659217877, "grad_norm": 0.6852522492408752, "learning_rate": 0.0008169467787114845, "loss": 0.4726, "step": 6641 }, { "epoch": 3.7106145251396647, "grad_norm": 1.1568009853363037, "learning_rate": 0.0008169187675070028, "loss": 0.3804, "step": 6642 }, { "epoch": 3.711173184357542, "grad_norm": 0.3943794369697571, "learning_rate": 0.0008168907563025211, "loss": 0.3449, "step": 6643 }, { "epoch": 3.7117318435754187, "grad_norm": 0.5411002039909363, "learning_rate": 0.0008168627450980393, "loss": 0.4335, "step": 6644 }, { "epoch": 3.712290502793296, "grad_norm": 0.7068082690238953, "learning_rate": 0.0008168347338935575, "loss": 0.4428, "step": 6645 }, { "epoch": 3.712849162011173, "grad_norm": 1.0023226737976074, "learning_rate": 0.0008168067226890757, "loss": 0.667, "step": 6646 }, { "epoch": 3.71340782122905, "grad_norm": 0.4348014295101166, "learning_rate": 0.0008167787114845938, "loss": 0.4338, "step": 6647 }, { "epoch": 3.7139664804469272, "grad_norm": 0.3668944537639618, "learning_rate": 0.0008167507002801121, "loss": 0.4125, "step": 6648 }, { "epoch": 3.7145251396648042, "grad_norm": 0.5998632311820984, "learning_rate": 0.0008167226890756303, "loss": 0.5072, "step": 6649 }, { "epoch": 3.7150837988826817, "grad_norm": 0.44316115975379944, "learning_rate": 0.0008166946778711485, "loss": 0.4378, "step": 6650 }, { "epoch": 3.7156424581005587, "grad_norm": 1.0044258832931519, "learning_rate": 0.0008166666666666667, "loss": 0.5984, "step": 6651 }, { "epoch": 3.7162011173184357, "grad_norm": 0.4026990532875061, "learning_rate": 0.0008166386554621848, "loss": 0.515, "step": 6652 }, { "epoch": 3.7167597765363127, "grad_norm": 0.5036965012550354, "learning_rate": 0.0008166106442577031, "loss": 0.4244, "step": 6653 }, { "epoch": 3.7173184357541897, "grad_norm": 1.06436288356781, "learning_rate": 0.0008165826330532213, "loss": 0.4537, "step": 6654 }, { "epoch": 3.717877094972067, "grad_norm": 1.7551262378692627, "learning_rate": 0.0008165546218487395, "loss": 0.5695, "step": 6655 }, { "epoch": 3.718435754189944, "grad_norm": 0.41385704278945923, "learning_rate": 0.0008165266106442577, "loss": 0.4038, "step": 6656 }, { "epoch": 3.718994413407821, "grad_norm": 0.5916339159011841, "learning_rate": 0.0008164985994397758, "loss": 0.4134, "step": 6657 }, { "epoch": 3.7195530726256982, "grad_norm": 0.47273582220077515, "learning_rate": 0.0008164705882352942, "loss": 0.4651, "step": 6658 }, { "epoch": 3.7201117318435752, "grad_norm": 0.7123088240623474, "learning_rate": 0.0008164425770308124, "loss": 0.5679, "step": 6659 }, { "epoch": 3.7206703910614527, "grad_norm": 0.5929846167564392, "learning_rate": 0.0008164145658263306, "loss": 0.4358, "step": 6660 }, { "epoch": 3.7212290502793297, "grad_norm": 1.381717324256897, "learning_rate": 0.0008163865546218488, "loss": 0.5122, "step": 6661 }, { "epoch": 3.7217877094972067, "grad_norm": 2.166510820388794, "learning_rate": 0.000816358543417367, "loss": 0.6774, "step": 6662 }, { "epoch": 3.7223463687150837, "grad_norm": 0.5627672672271729, "learning_rate": 0.0008163305322128852, "loss": 0.4467, "step": 6663 }, { "epoch": 3.7229050279329607, "grad_norm": 0.5538860559463501, "learning_rate": 0.0008163025210084034, "loss": 0.5572, "step": 6664 }, { "epoch": 3.723463687150838, "grad_norm": 0.5472835302352905, "learning_rate": 0.0008162745098039216, "loss": 0.4724, "step": 6665 }, { "epoch": 3.724022346368715, "grad_norm": 0.6507546305656433, "learning_rate": 0.0008162464985994398, "loss": 0.3905, "step": 6666 }, { "epoch": 3.724581005586592, "grad_norm": 1.0526360273361206, "learning_rate": 0.000816218487394958, "loss": 0.461, "step": 6667 }, { "epoch": 3.7251396648044692, "grad_norm": 0.4964771270751953, "learning_rate": 0.0008161904761904762, "loss": 0.5815, "step": 6668 }, { "epoch": 3.7256983240223462, "grad_norm": 0.6165770888328552, "learning_rate": 0.0008161624649859944, "loss": 0.5121, "step": 6669 }, { "epoch": 3.7262569832402237, "grad_norm": 0.5066892504692078, "learning_rate": 0.0008161344537815126, "loss": 0.5568, "step": 6670 }, { "epoch": 3.7268156424581007, "grad_norm": 0.475043922662735, "learning_rate": 0.0008161064425770308, "loss": 0.4911, "step": 6671 }, { "epoch": 3.7273743016759777, "grad_norm": 3.152216911315918, "learning_rate": 0.000816078431372549, "loss": 0.3813, "step": 6672 }, { "epoch": 3.7279329608938547, "grad_norm": 0.6873748302459717, "learning_rate": 0.0008160504201680672, "loss": 0.4272, "step": 6673 }, { "epoch": 3.7284916201117317, "grad_norm": 0.6170068979263306, "learning_rate": 0.0008160224089635855, "loss": 0.5144, "step": 6674 }, { "epoch": 3.729050279329609, "grad_norm": 0.4780423939228058, "learning_rate": 0.0008159943977591037, "loss": 0.4898, "step": 6675 }, { "epoch": 3.7296089385474858, "grad_norm": 0.4953891932964325, "learning_rate": 0.0008159663865546219, "loss": 0.4198, "step": 6676 }, { "epoch": 3.730167597765363, "grad_norm": 0.7887417078018188, "learning_rate": 0.0008159383753501401, "loss": 0.5373, "step": 6677 }, { "epoch": 3.7307262569832402, "grad_norm": 0.36670422554016113, "learning_rate": 0.0008159103641456584, "loss": 0.3325, "step": 6678 }, { "epoch": 3.7312849162011172, "grad_norm": 0.4578920304775238, "learning_rate": 0.0008158823529411765, "loss": 0.4092, "step": 6679 }, { "epoch": 3.7318435754189943, "grad_norm": 0.5179921388626099, "learning_rate": 0.0008158543417366947, "loss": 0.4234, "step": 6680 }, { "epoch": 3.7324022346368713, "grad_norm": 0.7124769687652588, "learning_rate": 0.0008158263305322129, "loss": 0.4455, "step": 6681 }, { "epoch": 3.7329608938547487, "grad_norm": 0.549180269241333, "learning_rate": 0.0008157983193277311, "loss": 0.4463, "step": 6682 }, { "epoch": 3.7335195530726257, "grad_norm": 0.6946071982383728, "learning_rate": 0.0008157703081232494, "loss": 0.5588, "step": 6683 }, { "epoch": 3.7340782122905027, "grad_norm": 0.8743019700050354, "learning_rate": 0.0008157422969187675, "loss": 0.3841, "step": 6684 }, { "epoch": 3.7346368715083798, "grad_norm": 0.4073009788990021, "learning_rate": 0.0008157142857142857, "loss": 0.3929, "step": 6685 }, { "epoch": 3.7351955307262568, "grad_norm": 0.9444915056228638, "learning_rate": 0.0008156862745098039, "loss": 0.5225, "step": 6686 }, { "epoch": 3.735754189944134, "grad_norm": 0.5588600635528564, "learning_rate": 0.0008156582633053221, "loss": 0.4839, "step": 6687 }, { "epoch": 3.7363128491620112, "grad_norm": 2.7004554271698, "learning_rate": 0.0008156302521008404, "loss": 0.9044, "step": 6688 }, { "epoch": 3.7368715083798882, "grad_norm": 0.42151328921318054, "learning_rate": 0.0008156022408963585, "loss": 0.3964, "step": 6689 }, { "epoch": 3.7374301675977653, "grad_norm": 0.6013053059577942, "learning_rate": 0.0008155742296918767, "loss": 0.5085, "step": 6690 }, { "epoch": 3.7379888268156423, "grad_norm": 4.067948341369629, "learning_rate": 0.000815546218487395, "loss": 0.4912, "step": 6691 }, { "epoch": 3.7385474860335197, "grad_norm": 1.083478569984436, "learning_rate": 0.0008155182072829132, "loss": 0.4544, "step": 6692 }, { "epoch": 3.7391061452513967, "grad_norm": 1.0198547840118408, "learning_rate": 0.0008154901960784314, "loss": 0.488, "step": 6693 }, { "epoch": 3.7396648044692737, "grad_norm": 0.7450626492500305, "learning_rate": 0.0008154621848739497, "loss": 0.4072, "step": 6694 }, { "epoch": 3.7402234636871508, "grad_norm": 0.5811417698860168, "learning_rate": 0.0008154341736694678, "loss": 0.4857, "step": 6695 }, { "epoch": 3.7407821229050278, "grad_norm": 0.842680811882019, "learning_rate": 0.000815406162464986, "loss": 0.4494, "step": 6696 }, { "epoch": 3.741340782122905, "grad_norm": 0.5124238729476929, "learning_rate": 0.0008153781512605042, "loss": 0.4462, "step": 6697 }, { "epoch": 3.7418994413407822, "grad_norm": 2.8346331119537354, "learning_rate": 0.0008153501400560224, "loss": 0.6343, "step": 6698 }, { "epoch": 3.7424581005586592, "grad_norm": 1.0235646963119507, "learning_rate": 0.0008153221288515407, "loss": 0.4705, "step": 6699 }, { "epoch": 3.7430167597765363, "grad_norm": 0.7215641736984253, "learning_rate": 0.0008152941176470588, "loss": 0.6051, "step": 6700 }, { "epoch": 3.7435754189944133, "grad_norm": 1.0025595426559448, "learning_rate": 0.000815266106442577, "loss": 0.435, "step": 6701 }, { "epoch": 3.7441340782122907, "grad_norm": 0.43807944655418396, "learning_rate": 0.0008152380952380952, "loss": 0.4462, "step": 6702 }, { "epoch": 3.7446927374301677, "grad_norm": 0.5602341294288635, "learning_rate": 0.0008152100840336134, "loss": 0.5095, "step": 6703 }, { "epoch": 3.7452513966480447, "grad_norm": 0.6232250332832336, "learning_rate": 0.0008151820728291317, "loss": 0.5443, "step": 6704 }, { "epoch": 3.7458100558659218, "grad_norm": 2.684713125228882, "learning_rate": 0.0008151540616246498, "loss": 0.4548, "step": 6705 }, { "epoch": 3.7463687150837988, "grad_norm": 0.6109663248062134, "learning_rate": 0.000815126050420168, "loss": 0.4983, "step": 6706 }, { "epoch": 3.746927374301676, "grad_norm": 0.5532881617546082, "learning_rate": 0.0008150980392156862, "loss": 0.4783, "step": 6707 }, { "epoch": 3.7474860335195532, "grad_norm": 4.465295314788818, "learning_rate": 0.0008150700280112045, "loss": 0.5155, "step": 6708 }, { "epoch": 3.7480446927374302, "grad_norm": 9.735363006591797, "learning_rate": 0.0008150420168067228, "loss": 0.3735, "step": 6709 }, { "epoch": 3.7486033519553073, "grad_norm": 1.4678250551223755, "learning_rate": 0.000815014005602241, "loss": 0.4557, "step": 6710 }, { "epoch": 3.7491620111731843, "grad_norm": 0.838957667350769, "learning_rate": 0.0008149859943977591, "loss": 0.4303, "step": 6711 }, { "epoch": 3.7497206703910617, "grad_norm": 0.7221022844314575, "learning_rate": 0.0008149579831932773, "loss": 0.5343, "step": 6712 }, { "epoch": 3.7502793296089383, "grad_norm": 0.6259539723396301, "learning_rate": 0.0008149299719887955, "loss": 0.5059, "step": 6713 }, { "epoch": 3.7508379888268157, "grad_norm": 0.605409562587738, "learning_rate": 0.0008149019607843138, "loss": 0.5434, "step": 6714 }, { "epoch": 3.7513966480446927, "grad_norm": 0.42584770917892456, "learning_rate": 0.000814873949579832, "loss": 0.3993, "step": 6715 }, { "epoch": 3.7519553072625698, "grad_norm": 1.0011265277862549, "learning_rate": 0.0008148459383753501, "loss": 0.4591, "step": 6716 }, { "epoch": 3.7525139664804468, "grad_norm": 0.740470290184021, "learning_rate": 0.0008148179271708683, "loss": 0.4431, "step": 6717 }, { "epoch": 3.753072625698324, "grad_norm": 11.556285858154297, "learning_rate": 0.0008147899159663865, "loss": 0.4651, "step": 6718 }, { "epoch": 3.7536312849162012, "grad_norm": 0.5221262574195862, "learning_rate": 0.0008147619047619048, "loss": 0.4661, "step": 6719 }, { "epoch": 3.7541899441340782, "grad_norm": 0.6103655695915222, "learning_rate": 0.000814733893557423, "loss": 0.5202, "step": 6720 }, { "epoch": 3.7547486033519553, "grad_norm": 0.817658007144928, "learning_rate": 0.0008147058823529411, "loss": 0.5072, "step": 6721 }, { "epoch": 3.7553072625698323, "grad_norm": 0.695067822933197, "learning_rate": 0.0008146778711484593, "loss": 0.521, "step": 6722 }, { "epoch": 3.7558659217877093, "grad_norm": 0.5560063123703003, "learning_rate": 0.0008146498599439775, "loss": 0.5336, "step": 6723 }, { "epoch": 3.7564245810055867, "grad_norm": 3.9974701404571533, "learning_rate": 0.0008146218487394959, "loss": 0.5197, "step": 6724 }, { "epoch": 3.7569832402234637, "grad_norm": 0.41313260793685913, "learning_rate": 0.0008145938375350141, "loss": 0.4011, "step": 6725 }, { "epoch": 3.7575418994413408, "grad_norm": 1.449632167816162, "learning_rate": 0.0008145658263305323, "loss": 0.3802, "step": 6726 }, { "epoch": 3.7581005586592178, "grad_norm": 0.9330488443374634, "learning_rate": 0.0008145378151260504, "loss": 0.7069, "step": 6727 }, { "epoch": 3.758659217877095, "grad_norm": 1.2418686151504517, "learning_rate": 0.0008145098039215686, "loss": 0.4258, "step": 6728 }, { "epoch": 3.7592178770949722, "grad_norm": 0.5921820998191833, "learning_rate": 0.0008144817927170869, "loss": 0.5241, "step": 6729 }, { "epoch": 3.7597765363128492, "grad_norm": 0.5059346556663513, "learning_rate": 0.0008144537815126051, "loss": 0.4474, "step": 6730 }, { "epoch": 3.7603351955307263, "grad_norm": 0.4856972098350525, "learning_rate": 0.0008144257703081233, "loss": 0.5256, "step": 6731 }, { "epoch": 3.7608938547486033, "grad_norm": 0.9764010906219482, "learning_rate": 0.0008143977591036414, "loss": 0.4904, "step": 6732 }, { "epoch": 3.7614525139664803, "grad_norm": 0.5025168061256409, "learning_rate": 0.0008143697478991596, "loss": 0.3938, "step": 6733 }, { "epoch": 3.7620111731843577, "grad_norm": 0.758261501789093, "learning_rate": 0.0008143417366946779, "loss": 0.5647, "step": 6734 }, { "epoch": 3.7625698324022347, "grad_norm": 0.9293955564498901, "learning_rate": 0.0008143137254901961, "loss": 0.5905, "step": 6735 }, { "epoch": 3.7631284916201118, "grad_norm": 0.5527615547180176, "learning_rate": 0.0008142857142857143, "loss": 0.5036, "step": 6736 }, { "epoch": 3.7636871508379888, "grad_norm": 0.9503520131111145, "learning_rate": 0.0008142577030812324, "loss": 0.5558, "step": 6737 }, { "epoch": 3.764245810055866, "grad_norm": 0.6578249335289001, "learning_rate": 0.0008142296918767506, "loss": 0.4463, "step": 6738 }, { "epoch": 3.7648044692737432, "grad_norm": 0.5259749293327332, "learning_rate": 0.000814201680672269, "loss": 0.4637, "step": 6739 }, { "epoch": 3.7653631284916202, "grad_norm": 0.7044996023178101, "learning_rate": 0.0008141736694677872, "loss": 0.4722, "step": 6740 }, { "epoch": 3.7659217877094973, "grad_norm": 0.5011742115020752, "learning_rate": 0.0008141456582633054, "loss": 0.482, "step": 6741 }, { "epoch": 3.7664804469273743, "grad_norm": 0.4759159982204437, "learning_rate": 0.0008141176470588236, "loss": 0.3951, "step": 6742 }, { "epoch": 3.7670391061452513, "grad_norm": 0.5874620079994202, "learning_rate": 0.0008140896358543417, "loss": 0.381, "step": 6743 }, { "epoch": 3.7675977653631287, "grad_norm": 0.43097150325775146, "learning_rate": 0.00081406162464986, "loss": 0.4077, "step": 6744 }, { "epoch": 3.7681564245810057, "grad_norm": 0.5463243126869202, "learning_rate": 0.0008140336134453782, "loss": 0.4109, "step": 6745 }, { "epoch": 3.7687150837988828, "grad_norm": 0.48519882559776306, "learning_rate": 0.0008140056022408964, "loss": 0.4407, "step": 6746 }, { "epoch": 3.7692737430167598, "grad_norm": 0.9570866227149963, "learning_rate": 0.0008139775910364146, "loss": 0.4123, "step": 6747 }, { "epoch": 3.769832402234637, "grad_norm": 0.6733881235122681, "learning_rate": 0.0008139495798319327, "loss": 0.4993, "step": 6748 }, { "epoch": 3.7703910614525142, "grad_norm": 0.414154052734375, "learning_rate": 0.000813921568627451, "loss": 0.4893, "step": 6749 }, { "epoch": 3.770949720670391, "grad_norm": 0.5273184776306152, "learning_rate": 0.0008138935574229692, "loss": 0.532, "step": 6750 }, { "epoch": 3.7715083798882683, "grad_norm": 0.5399999022483826, "learning_rate": 0.0008138655462184874, "loss": 0.4066, "step": 6751 }, { "epoch": 3.7720670391061453, "grad_norm": 0.7483530640602112, "learning_rate": 0.0008138375350140056, "loss": 0.4364, "step": 6752 }, { "epoch": 3.7726256983240223, "grad_norm": 1.0032182931900024, "learning_rate": 0.0008138095238095237, "loss": 0.4818, "step": 6753 }, { "epoch": 3.7731843575418993, "grad_norm": 0.6334372758865356, "learning_rate": 0.000813781512605042, "loss": 0.4276, "step": 6754 }, { "epoch": 3.7737430167597763, "grad_norm": 0.5629693269729614, "learning_rate": 0.0008137535014005602, "loss": 0.4405, "step": 6755 }, { "epoch": 3.7743016759776538, "grad_norm": 0.8067587018013, "learning_rate": 0.0008137254901960785, "loss": 0.6695, "step": 6756 }, { "epoch": 3.7748603351955308, "grad_norm": 0.4765067398548126, "learning_rate": 0.0008136974789915967, "loss": 0.4581, "step": 6757 }, { "epoch": 3.775418994413408, "grad_norm": 0.3992355763912201, "learning_rate": 0.0008136694677871149, "loss": 0.3805, "step": 6758 }, { "epoch": 3.775977653631285, "grad_norm": 0.8096543550491333, "learning_rate": 0.0008136414565826331, "loss": 0.3946, "step": 6759 }, { "epoch": 3.776536312849162, "grad_norm": 0.6641272306442261, "learning_rate": 0.0008136134453781513, "loss": 0.4327, "step": 6760 }, { "epoch": 3.7770949720670393, "grad_norm": 0.7377946972846985, "learning_rate": 0.0008135854341736695, "loss": 0.3635, "step": 6761 }, { "epoch": 3.7776536312849163, "grad_norm": 0.49328842759132385, "learning_rate": 0.0008135574229691877, "loss": 0.4646, "step": 6762 }, { "epoch": 3.7782122905027933, "grad_norm": 0.46599531173706055, "learning_rate": 0.0008135294117647059, "loss": 0.445, "step": 6763 }, { "epoch": 3.7787709497206703, "grad_norm": 0.7711352109909058, "learning_rate": 0.0008135014005602241, "loss": 0.4761, "step": 6764 }, { "epoch": 3.7793296089385473, "grad_norm": 1.0588428974151611, "learning_rate": 0.0008134733893557423, "loss": 0.4925, "step": 6765 }, { "epoch": 3.7798882681564248, "grad_norm": 0.9532281756401062, "learning_rate": 0.0008134453781512605, "loss": 0.5696, "step": 6766 }, { "epoch": 3.7804469273743018, "grad_norm": 0.5671836733818054, "learning_rate": 0.0008134173669467787, "loss": 0.4673, "step": 6767 }, { "epoch": 3.781005586592179, "grad_norm": 1.4399776458740234, "learning_rate": 0.0008133893557422969, "loss": 0.5052, "step": 6768 }, { "epoch": 3.781564245810056, "grad_norm": 0.5638650059700012, "learning_rate": 0.0008133613445378152, "loss": 0.5438, "step": 6769 }, { "epoch": 3.782122905027933, "grad_norm": 0.5349329710006714, "learning_rate": 0.0008133333333333333, "loss": 0.4396, "step": 6770 }, { "epoch": 3.7826815642458103, "grad_norm": 0.5024678111076355, "learning_rate": 0.0008133053221288515, "loss": 0.4636, "step": 6771 }, { "epoch": 3.7832402234636873, "grad_norm": 0.6195822358131409, "learning_rate": 0.0008132773109243697, "loss": 0.3973, "step": 6772 }, { "epoch": 3.7837988826815643, "grad_norm": 0.6856474280357361, "learning_rate": 0.000813249299719888, "loss": 0.4497, "step": 6773 }, { "epoch": 3.7843575418994413, "grad_norm": 1.4107484817504883, "learning_rate": 0.0008132212885154063, "loss": 0.4807, "step": 6774 }, { "epoch": 3.7849162011173183, "grad_norm": 0.4474133253097534, "learning_rate": 0.0008131932773109244, "loss": 0.4605, "step": 6775 }, { "epoch": 3.7854748603351958, "grad_norm": 0.6936215162277222, "learning_rate": 0.0008131652661064426, "loss": 0.5026, "step": 6776 }, { "epoch": 3.7860335195530728, "grad_norm": 0.611108660697937, "learning_rate": 0.0008131372549019608, "loss": 0.5067, "step": 6777 }, { "epoch": 3.78659217877095, "grad_norm": 0.5875313878059387, "learning_rate": 0.000813109243697479, "loss": 0.4401, "step": 6778 }, { "epoch": 3.787150837988827, "grad_norm": 0.5357989072799683, "learning_rate": 0.0008130812324929973, "loss": 0.4953, "step": 6779 }, { "epoch": 3.787709497206704, "grad_norm": 0.539035975933075, "learning_rate": 0.0008130532212885154, "loss": 0.4428, "step": 6780 }, { "epoch": 3.7882681564245813, "grad_norm": 0.5645351409912109, "learning_rate": 0.0008130252100840336, "loss": 0.5017, "step": 6781 }, { "epoch": 3.788826815642458, "grad_norm": 0.4674677550792694, "learning_rate": 0.0008129971988795518, "loss": 0.4784, "step": 6782 }, { "epoch": 3.7893854748603353, "grad_norm": 0.5537141561508179, "learning_rate": 0.00081296918767507, "loss": 0.4556, "step": 6783 }, { "epoch": 3.7899441340782123, "grad_norm": 0.6152671575546265, "learning_rate": 0.0008129411764705883, "loss": 0.5349, "step": 6784 }, { "epoch": 3.7905027932960893, "grad_norm": 2.1846680641174316, "learning_rate": 0.0008129131652661065, "loss": 0.5873, "step": 6785 }, { "epoch": 3.7910614525139668, "grad_norm": 1.2025355100631714, "learning_rate": 0.0008128851540616246, "loss": 0.4106, "step": 6786 }, { "epoch": 3.7916201117318433, "grad_norm": 0.44122374057769775, "learning_rate": 0.0008128571428571428, "loss": 0.4116, "step": 6787 }, { "epoch": 3.792178770949721, "grad_norm": 0.578125536441803, "learning_rate": 0.000812829131652661, "loss": 0.4374, "step": 6788 }, { "epoch": 3.792737430167598, "grad_norm": 0.5104628801345825, "learning_rate": 0.0008128011204481794, "loss": 0.4366, "step": 6789 }, { "epoch": 3.793296089385475, "grad_norm": 0.566911518573761, "learning_rate": 0.0008127731092436976, "loss": 0.465, "step": 6790 }, { "epoch": 3.793854748603352, "grad_norm": 0.727301836013794, "learning_rate": 0.0008127450980392157, "loss": 0.4883, "step": 6791 }, { "epoch": 3.794413407821229, "grad_norm": 0.5929538607597351, "learning_rate": 0.0008127170868347339, "loss": 0.4769, "step": 6792 }, { "epoch": 3.7949720670391063, "grad_norm": 0.6852974891662598, "learning_rate": 0.0008126890756302521, "loss": 0.3843, "step": 6793 }, { "epoch": 3.7955307262569833, "grad_norm": 4.200253486633301, "learning_rate": 0.0008126610644257704, "loss": 0.4536, "step": 6794 }, { "epoch": 3.7960893854748603, "grad_norm": 0.9143809080123901, "learning_rate": 0.0008126330532212886, "loss": 0.5735, "step": 6795 }, { "epoch": 3.7966480446927373, "grad_norm": 0.7114330530166626, "learning_rate": 0.0008126050420168067, "loss": 0.4702, "step": 6796 }, { "epoch": 3.7972067039106143, "grad_norm": 1.4647160768508911, "learning_rate": 0.0008125770308123249, "loss": 0.4906, "step": 6797 }, { "epoch": 3.7977653631284918, "grad_norm": 0.730023205280304, "learning_rate": 0.0008125490196078431, "loss": 0.3852, "step": 6798 }, { "epoch": 3.798324022346369, "grad_norm": 0.44529619812965393, "learning_rate": 0.0008125210084033614, "loss": 0.3604, "step": 6799 }, { "epoch": 3.798882681564246, "grad_norm": 0.5767450332641602, "learning_rate": 0.0008124929971988796, "loss": 0.5622, "step": 6800 }, { "epoch": 3.799441340782123, "grad_norm": 1.0275883674621582, "learning_rate": 0.0008124649859943978, "loss": 0.559, "step": 6801 }, { "epoch": 3.8, "grad_norm": 1.4503183364868164, "learning_rate": 0.0008124369747899159, "loss": 0.5087, "step": 6802 }, { "epoch": 3.8005586592178773, "grad_norm": 0.6960155963897705, "learning_rate": 0.0008124089635854341, "loss": 0.5502, "step": 6803 }, { "epoch": 3.8011173184357543, "grad_norm": 0.5598259568214417, "learning_rate": 0.0008123809523809524, "loss": 0.4339, "step": 6804 }, { "epoch": 3.8016759776536313, "grad_norm": 0.4804055094718933, "learning_rate": 0.0008123529411764707, "loss": 0.4475, "step": 6805 }, { "epoch": 3.8022346368715083, "grad_norm": 0.4204331040382385, "learning_rate": 0.0008123249299719889, "loss": 0.3992, "step": 6806 }, { "epoch": 3.8027932960893853, "grad_norm": 0.6922451853752136, "learning_rate": 0.000812296918767507, "loss": 0.4941, "step": 6807 }, { "epoch": 3.8033519553072628, "grad_norm": 1.050072431564331, "learning_rate": 0.0008122689075630252, "loss": 0.5497, "step": 6808 }, { "epoch": 3.80391061452514, "grad_norm": 0.7848671078681946, "learning_rate": 0.0008122408963585435, "loss": 0.4701, "step": 6809 }, { "epoch": 3.804469273743017, "grad_norm": 0.5557612776756287, "learning_rate": 0.0008122128851540617, "loss": 0.49, "step": 6810 }, { "epoch": 3.805027932960894, "grad_norm": 0.474393367767334, "learning_rate": 0.0008121848739495799, "loss": 0.4935, "step": 6811 }, { "epoch": 3.805586592178771, "grad_norm": 0.5907094478607178, "learning_rate": 0.000812156862745098, "loss": 0.4478, "step": 6812 }, { "epoch": 3.8061452513966483, "grad_norm": 0.4820985794067383, "learning_rate": 0.0008121288515406162, "loss": 0.4665, "step": 6813 }, { "epoch": 3.8067039106145253, "grad_norm": 0.4366481900215149, "learning_rate": 0.0008121008403361345, "loss": 0.4316, "step": 6814 }, { "epoch": 3.8072625698324023, "grad_norm": 0.5917373299598694, "learning_rate": 0.0008120728291316527, "loss": 0.4873, "step": 6815 }, { "epoch": 3.8078212290502793, "grad_norm": 4.734868049621582, "learning_rate": 0.0008120448179271709, "loss": 0.4826, "step": 6816 }, { "epoch": 3.8083798882681563, "grad_norm": 1.078062653541565, "learning_rate": 0.0008120168067226891, "loss": 0.4775, "step": 6817 }, { "epoch": 3.8089385474860338, "grad_norm": 0.5761260986328125, "learning_rate": 0.0008119887955182072, "loss": 0.5683, "step": 6818 }, { "epoch": 3.8094972067039103, "grad_norm": 6.666396617889404, "learning_rate": 0.0008119607843137255, "loss": 0.4769, "step": 6819 }, { "epoch": 3.810055865921788, "grad_norm": 0.5542675852775574, "learning_rate": 0.0008119327731092437, "loss": 0.4529, "step": 6820 }, { "epoch": 3.810614525139665, "grad_norm": 0.606116533279419, "learning_rate": 0.000811904761904762, "loss": 0.5309, "step": 6821 }, { "epoch": 3.811173184357542, "grad_norm": 0.7645151019096375, "learning_rate": 0.0008118767507002802, "loss": 0.393, "step": 6822 }, { "epoch": 3.811731843575419, "grad_norm": 0.4758078157901764, "learning_rate": 0.0008118487394957983, "loss": 0.4076, "step": 6823 }, { "epoch": 3.812290502793296, "grad_norm": 0.7427760362625122, "learning_rate": 0.0008118207282913166, "loss": 0.517, "step": 6824 }, { "epoch": 3.8128491620111733, "grad_norm": 0.6136566996574402, "learning_rate": 0.0008117927170868348, "loss": 0.419, "step": 6825 }, { "epoch": 3.8134078212290503, "grad_norm": 1.243286371231079, "learning_rate": 0.000811764705882353, "loss": 0.4208, "step": 6826 }, { "epoch": 3.8139664804469273, "grad_norm": 0.7931225895881653, "learning_rate": 0.0008117366946778712, "loss": 0.4574, "step": 6827 }, { "epoch": 3.8145251396648043, "grad_norm": 0.4445907473564148, "learning_rate": 0.0008117086834733893, "loss": 0.3936, "step": 6828 }, { "epoch": 3.8150837988826813, "grad_norm": 0.4751304090023041, "learning_rate": 0.0008116806722689076, "loss": 0.4431, "step": 6829 }, { "epoch": 3.815642458100559, "grad_norm": 0.7380807399749756, "learning_rate": 0.0008116526610644258, "loss": 0.5626, "step": 6830 }, { "epoch": 3.816201117318436, "grad_norm": 0.6071987152099609, "learning_rate": 0.000811624649859944, "loss": 0.5018, "step": 6831 }, { "epoch": 3.816759776536313, "grad_norm": 0.6682330965995789, "learning_rate": 0.0008115966386554622, "loss": 0.559, "step": 6832 }, { "epoch": 3.81731843575419, "grad_norm": 0.5862687230110168, "learning_rate": 0.0008115686274509804, "loss": 0.3786, "step": 6833 }, { "epoch": 3.817877094972067, "grad_norm": 3.3499162197113037, "learning_rate": 0.0008115406162464986, "loss": 0.4302, "step": 6834 }, { "epoch": 3.8184357541899443, "grad_norm": 0.5834749341011047, "learning_rate": 0.0008115126050420168, "loss": 0.4131, "step": 6835 }, { "epoch": 3.8189944134078213, "grad_norm": 0.7662965059280396, "learning_rate": 0.000811484593837535, "loss": 0.4605, "step": 6836 }, { "epoch": 3.8195530726256983, "grad_norm": 1.037217140197754, "learning_rate": 0.0008114565826330532, "loss": 0.4916, "step": 6837 }, { "epoch": 3.8201117318435753, "grad_norm": 0.5889070630073547, "learning_rate": 0.0008114285714285715, "loss": 0.4462, "step": 6838 }, { "epoch": 3.8206703910614523, "grad_norm": 1.1865012645721436, "learning_rate": 0.0008114005602240897, "loss": 0.4037, "step": 6839 }, { "epoch": 3.82122905027933, "grad_norm": 0.7193288803100586, "learning_rate": 0.0008113725490196079, "loss": 0.5748, "step": 6840 }, { "epoch": 3.821787709497207, "grad_norm": 0.8877421617507935, "learning_rate": 0.0008113445378151261, "loss": 0.4212, "step": 6841 }, { "epoch": 3.822346368715084, "grad_norm": 0.7898842692375183, "learning_rate": 0.0008113165266106443, "loss": 0.4114, "step": 6842 }, { "epoch": 3.822905027932961, "grad_norm": 0.6861692070960999, "learning_rate": 0.0008112885154061625, "loss": 0.5395, "step": 6843 }, { "epoch": 3.823463687150838, "grad_norm": 0.44756853580474854, "learning_rate": 0.0008112605042016807, "loss": 0.2973, "step": 6844 }, { "epoch": 3.8240223463687153, "grad_norm": 0.8027526140213013, "learning_rate": 0.0008112324929971989, "loss": 0.6062, "step": 6845 }, { "epoch": 3.8245810055865923, "grad_norm": 0.5482564568519592, "learning_rate": 0.0008112044817927171, "loss": 0.5726, "step": 6846 }, { "epoch": 3.8251396648044693, "grad_norm": 1.787686824798584, "learning_rate": 0.0008111764705882353, "loss": 0.4873, "step": 6847 }, { "epoch": 3.8256983240223463, "grad_norm": 0.5472537279129028, "learning_rate": 0.0008111484593837535, "loss": 0.4862, "step": 6848 }, { "epoch": 3.8262569832402233, "grad_norm": 20.877382278442383, "learning_rate": 0.0008111204481792718, "loss": 0.4342, "step": 6849 }, { "epoch": 3.826815642458101, "grad_norm": 0.8070502281188965, "learning_rate": 0.0008110924369747899, "loss": 0.5737, "step": 6850 }, { "epoch": 3.827374301675978, "grad_norm": 0.6082700490951538, "learning_rate": 0.0008110644257703081, "loss": 0.3914, "step": 6851 }, { "epoch": 3.827932960893855, "grad_norm": 0.7312552332878113, "learning_rate": 0.0008110364145658263, "loss": 0.4696, "step": 6852 }, { "epoch": 3.828491620111732, "grad_norm": 2.196409225463867, "learning_rate": 0.0008110084033613445, "loss": 0.4363, "step": 6853 }, { "epoch": 3.829050279329609, "grad_norm": 0.8176388740539551, "learning_rate": 0.0008109803921568629, "loss": 0.4573, "step": 6854 }, { "epoch": 3.8296089385474863, "grad_norm": 0.9460632801055908, "learning_rate": 0.000810952380952381, "loss": 0.4638, "step": 6855 }, { "epoch": 3.830167597765363, "grad_norm": 2.6144094467163086, "learning_rate": 0.0008109243697478992, "loss": 0.5105, "step": 6856 }, { "epoch": 3.8307262569832403, "grad_norm": 0.5067965388298035, "learning_rate": 0.0008108963585434174, "loss": 0.4402, "step": 6857 }, { "epoch": 3.8312849162011173, "grad_norm": 0.5917914509773254, "learning_rate": 0.0008108683473389356, "loss": 0.4318, "step": 6858 }, { "epoch": 3.8318435754189943, "grad_norm": 0.7330923080444336, "learning_rate": 0.0008108403361344539, "loss": 0.4405, "step": 6859 }, { "epoch": 3.8324022346368714, "grad_norm": 0.6495806574821472, "learning_rate": 0.000810812324929972, "loss": 0.47, "step": 6860 }, { "epoch": 3.8329608938547484, "grad_norm": 0.7199528217315674, "learning_rate": 0.0008107843137254902, "loss": 0.497, "step": 6861 }, { "epoch": 3.833519553072626, "grad_norm": 0.4221893846988678, "learning_rate": 0.0008107563025210084, "loss": 0.4337, "step": 6862 }, { "epoch": 3.834078212290503, "grad_norm": 0.5389212369918823, "learning_rate": 0.0008107282913165266, "loss": 0.5485, "step": 6863 }, { "epoch": 3.83463687150838, "grad_norm": 0.9004932045936584, "learning_rate": 0.0008107002801120449, "loss": 0.5197, "step": 6864 }, { "epoch": 3.835195530726257, "grad_norm": 0.41269150376319885, "learning_rate": 0.0008106722689075631, "loss": 0.462, "step": 6865 }, { "epoch": 3.835754189944134, "grad_norm": 0.5771477818489075, "learning_rate": 0.0008106442577030812, "loss": 0.3544, "step": 6866 }, { "epoch": 3.8363128491620113, "grad_norm": 3.602389335632324, "learning_rate": 0.0008106162464985994, "loss": 0.4453, "step": 6867 }, { "epoch": 3.8368715083798883, "grad_norm": 0.49727004766464233, "learning_rate": 0.0008105882352941176, "loss": 0.4801, "step": 6868 }, { "epoch": 3.8374301675977653, "grad_norm": 21.37616729736328, "learning_rate": 0.0008105602240896359, "loss": 0.5378, "step": 6869 }, { "epoch": 3.8379888268156424, "grad_norm": 1.1396398544311523, "learning_rate": 0.0008105322128851542, "loss": 0.5094, "step": 6870 }, { "epoch": 3.8385474860335194, "grad_norm": 1.5971423387527466, "learning_rate": 0.0008105042016806722, "loss": 0.4703, "step": 6871 }, { "epoch": 3.839106145251397, "grad_norm": 0.5216647982597351, "learning_rate": 0.0008104761904761905, "loss": 0.4093, "step": 6872 }, { "epoch": 3.839664804469274, "grad_norm": 0.5850594639778137, "learning_rate": 0.0008104481792717087, "loss": 0.5847, "step": 6873 }, { "epoch": 3.840223463687151, "grad_norm": 0.5579874515533447, "learning_rate": 0.000810420168067227, "loss": 0.4894, "step": 6874 }, { "epoch": 3.840782122905028, "grad_norm": 1.196791410446167, "learning_rate": 0.0008103921568627452, "loss": 0.5488, "step": 6875 }, { "epoch": 3.841340782122905, "grad_norm": 0.92503422498703, "learning_rate": 0.0008103641456582633, "loss": 0.6328, "step": 6876 }, { "epoch": 3.8418994413407823, "grad_norm": 0.5861455202102661, "learning_rate": 0.0008103361344537815, "loss": 0.4211, "step": 6877 }, { "epoch": 3.8424581005586593, "grad_norm": 0.645728588104248, "learning_rate": 0.0008103081232492997, "loss": 0.5812, "step": 6878 }, { "epoch": 3.8430167597765363, "grad_norm": 0.6964361667633057, "learning_rate": 0.000810280112044818, "loss": 0.4298, "step": 6879 }, { "epoch": 3.8435754189944134, "grad_norm": 3.3224074840545654, "learning_rate": 0.0008102521008403362, "loss": 0.364, "step": 6880 }, { "epoch": 3.8441340782122904, "grad_norm": 0.6650158762931824, "learning_rate": 0.0008102240896358544, "loss": 0.52, "step": 6881 }, { "epoch": 3.844692737430168, "grad_norm": 0.6205301284790039, "learning_rate": 0.0008101960784313725, "loss": 0.4434, "step": 6882 }, { "epoch": 3.845251396648045, "grad_norm": 1.7254462242126465, "learning_rate": 0.0008101680672268907, "loss": 0.5268, "step": 6883 }, { "epoch": 3.845810055865922, "grad_norm": 0.6671683192253113, "learning_rate": 0.000810140056022409, "loss": 0.4726, "step": 6884 }, { "epoch": 3.846368715083799, "grad_norm": 0.4430246353149414, "learning_rate": 0.0008101120448179272, "loss": 0.3557, "step": 6885 }, { "epoch": 3.846927374301676, "grad_norm": 0.43659278750419617, "learning_rate": 0.0008100840336134454, "loss": 0.4207, "step": 6886 }, { "epoch": 3.8474860335195533, "grad_norm": 0.9084954857826233, "learning_rate": 0.0008100560224089635, "loss": 0.6475, "step": 6887 }, { "epoch": 3.8480446927374303, "grad_norm": 0.4397558569908142, "learning_rate": 0.0008100280112044818, "loss": 0.4208, "step": 6888 }, { "epoch": 3.8486033519553073, "grad_norm": 0.5738127827644348, "learning_rate": 0.0008100000000000001, "loss": 0.5228, "step": 6889 }, { "epoch": 3.8491620111731844, "grad_norm": 1.7955875396728516, "learning_rate": 0.0008099719887955183, "loss": 0.3892, "step": 6890 }, { "epoch": 3.8497206703910614, "grad_norm": 0.47176024317741394, "learning_rate": 0.0008099439775910365, "loss": 0.4342, "step": 6891 }, { "epoch": 3.850279329608939, "grad_norm": 0.6593926548957825, "learning_rate": 0.0008099159663865546, "loss": 0.4822, "step": 6892 }, { "epoch": 3.8508379888268154, "grad_norm": 0.6891830563545227, "learning_rate": 0.0008098879551820728, "loss": 0.4241, "step": 6893 }, { "epoch": 3.851396648044693, "grad_norm": 0.7264090180397034, "learning_rate": 0.0008098599439775911, "loss": 0.395, "step": 6894 }, { "epoch": 3.85195530726257, "grad_norm": 0.6550943851470947, "learning_rate": 0.0008098319327731093, "loss": 0.4539, "step": 6895 }, { "epoch": 3.852513966480447, "grad_norm": 3.825160503387451, "learning_rate": 0.0008098039215686275, "loss": 0.5699, "step": 6896 }, { "epoch": 3.853072625698324, "grad_norm": 0.8306968808174133, "learning_rate": 0.0008097759103641457, "loss": 0.4448, "step": 6897 }, { "epoch": 3.853631284916201, "grad_norm": 1.1376948356628418, "learning_rate": 0.0008097478991596638, "loss": 0.4899, "step": 6898 }, { "epoch": 3.8541899441340783, "grad_norm": 0.5389619469642639, "learning_rate": 0.0008097198879551821, "loss": 0.4325, "step": 6899 }, { "epoch": 3.8547486033519553, "grad_norm": 0.5750625133514404, "learning_rate": 0.0008096918767507003, "loss": 0.4977, "step": 6900 }, { "epoch": 3.8553072625698324, "grad_norm": 0.5885065197944641, "learning_rate": 0.0008096638655462185, "loss": 0.4904, "step": 6901 }, { "epoch": 3.8558659217877094, "grad_norm": 0.4275752902030945, "learning_rate": 0.0008096358543417367, "loss": 0.3351, "step": 6902 }, { "epoch": 3.8564245810055864, "grad_norm": 0.6802307367324829, "learning_rate": 0.0008096078431372548, "loss": 0.6557, "step": 6903 }, { "epoch": 3.856983240223464, "grad_norm": 1.1220121383666992, "learning_rate": 0.0008095798319327732, "loss": 0.5859, "step": 6904 }, { "epoch": 3.857541899441341, "grad_norm": 0.5158030986785889, "learning_rate": 0.0008095518207282914, "loss": 0.3694, "step": 6905 }, { "epoch": 3.858100558659218, "grad_norm": 3.132863998413086, "learning_rate": 0.0008095238095238096, "loss": 0.4492, "step": 6906 }, { "epoch": 3.858659217877095, "grad_norm": 0.5213937759399414, "learning_rate": 0.0008094957983193278, "loss": 0.5282, "step": 6907 }, { "epoch": 3.859217877094972, "grad_norm": 0.4709155857563019, "learning_rate": 0.0008094677871148459, "loss": 0.456, "step": 6908 }, { "epoch": 3.8597765363128493, "grad_norm": 0.4908982813358307, "learning_rate": 0.0008094397759103642, "loss": 0.4422, "step": 6909 }, { "epoch": 3.8603351955307263, "grad_norm": 6.982736110687256, "learning_rate": 0.0008094117647058824, "loss": 0.5534, "step": 6910 }, { "epoch": 3.8608938547486034, "grad_norm": 0.5150156617164612, "learning_rate": 0.0008093837535014006, "loss": 0.3855, "step": 6911 }, { "epoch": 3.8614525139664804, "grad_norm": 0.685828447341919, "learning_rate": 0.0008093557422969188, "loss": 0.5353, "step": 6912 }, { "epoch": 3.8620111731843574, "grad_norm": 0.529667317867279, "learning_rate": 0.000809327731092437, "loss": 0.4746, "step": 6913 }, { "epoch": 3.862569832402235, "grad_norm": 0.7907227277755737, "learning_rate": 0.0008092997198879551, "loss": 0.5419, "step": 6914 }, { "epoch": 3.863128491620112, "grad_norm": 0.5212898254394531, "learning_rate": 0.0008092717086834734, "loss": 0.4079, "step": 6915 }, { "epoch": 3.863687150837989, "grad_norm": 0.8711379766464233, "learning_rate": 0.0008092436974789916, "loss": 0.6348, "step": 6916 }, { "epoch": 3.864245810055866, "grad_norm": 0.7282832264900208, "learning_rate": 0.0008092156862745098, "loss": 0.5151, "step": 6917 }, { "epoch": 3.864804469273743, "grad_norm": 0.6962943077087402, "learning_rate": 0.000809187675070028, "loss": 0.5445, "step": 6918 }, { "epoch": 3.8653631284916203, "grad_norm": 0.5087282657623291, "learning_rate": 0.0008091596638655461, "loss": 0.4424, "step": 6919 }, { "epoch": 3.8659217877094973, "grad_norm": 0.6248132586479187, "learning_rate": 0.0008091316526610645, "loss": 0.4197, "step": 6920 }, { "epoch": 3.8664804469273744, "grad_norm": 0.6364431977272034, "learning_rate": 0.0008091036414565827, "loss": 0.5092, "step": 6921 }, { "epoch": 3.8670391061452514, "grad_norm": 0.543777346611023, "learning_rate": 0.0008090756302521009, "loss": 0.4106, "step": 6922 }, { "epoch": 3.8675977653631284, "grad_norm": 0.6014625430107117, "learning_rate": 0.0008090476190476191, "loss": 0.4152, "step": 6923 }, { "epoch": 3.868156424581006, "grad_norm": 0.39569205045700073, "learning_rate": 0.0008090196078431372, "loss": 0.3923, "step": 6924 }, { "epoch": 3.868715083798883, "grad_norm": 0.429889053106308, "learning_rate": 0.0008089915966386555, "loss": 0.4411, "step": 6925 }, { "epoch": 3.86927374301676, "grad_norm": 0.5761198401451111, "learning_rate": 0.0008089635854341737, "loss": 0.4711, "step": 6926 }, { "epoch": 3.869832402234637, "grad_norm": 2.0339722633361816, "learning_rate": 0.0008089355742296919, "loss": 0.4117, "step": 6927 }, { "epoch": 3.870391061452514, "grad_norm": 0.8914132714271545, "learning_rate": 0.0008089075630252101, "loss": 0.4967, "step": 6928 }, { "epoch": 3.8709497206703913, "grad_norm": 0.4805266261100769, "learning_rate": 0.0008088795518207283, "loss": 0.4811, "step": 6929 }, { "epoch": 3.871508379888268, "grad_norm": 0.7968722581863403, "learning_rate": 0.0008088515406162465, "loss": 0.575, "step": 6930 }, { "epoch": 3.8720670391061454, "grad_norm": 0.5343600511550903, "learning_rate": 0.0008088235294117647, "loss": 0.4464, "step": 6931 }, { "epoch": 3.8726256983240224, "grad_norm": 0.7171983122825623, "learning_rate": 0.0008087955182072829, "loss": 0.5011, "step": 6932 }, { "epoch": 3.8731843575418994, "grad_norm": 2.3407227993011475, "learning_rate": 0.0008087675070028011, "loss": 0.5023, "step": 6933 }, { "epoch": 3.8737430167597764, "grad_norm": 0.7763156294822693, "learning_rate": 0.0008087394957983193, "loss": 0.5205, "step": 6934 }, { "epoch": 3.8743016759776534, "grad_norm": 0.8254567980766296, "learning_rate": 0.0008087114845938375, "loss": 0.4492, "step": 6935 }, { "epoch": 3.874860335195531, "grad_norm": 0.5877164602279663, "learning_rate": 0.0008086834733893557, "loss": 0.4079, "step": 6936 }, { "epoch": 3.875418994413408, "grad_norm": 0.4659554660320282, "learning_rate": 0.000808655462184874, "loss": 0.3999, "step": 6937 }, { "epoch": 3.875977653631285, "grad_norm": 0.6956237554550171, "learning_rate": 0.0008086274509803922, "loss": 0.683, "step": 6938 }, { "epoch": 3.876536312849162, "grad_norm": 0.721974790096283, "learning_rate": 0.0008085994397759104, "loss": 0.4509, "step": 6939 }, { "epoch": 3.877094972067039, "grad_norm": 0.719066858291626, "learning_rate": 0.0008085714285714286, "loss": 0.5144, "step": 6940 }, { "epoch": 3.8776536312849164, "grad_norm": 0.7596331834793091, "learning_rate": 0.0008085434173669468, "loss": 0.556, "step": 6941 }, { "epoch": 3.8782122905027934, "grad_norm": 2.164646625518799, "learning_rate": 0.000808515406162465, "loss": 0.3667, "step": 6942 }, { "epoch": 3.8787709497206704, "grad_norm": 0.6435976624488831, "learning_rate": 0.0008084873949579832, "loss": 0.3977, "step": 6943 }, { "epoch": 3.8793296089385474, "grad_norm": 1.3901053667068481, "learning_rate": 0.0008084593837535014, "loss": 0.6029, "step": 6944 }, { "epoch": 3.8798882681564244, "grad_norm": 0.5092434287071228, "learning_rate": 0.0008084313725490197, "loss": 0.4063, "step": 6945 }, { "epoch": 3.880446927374302, "grad_norm": 0.49767711758613586, "learning_rate": 0.0008084033613445378, "loss": 0.3927, "step": 6946 }, { "epoch": 3.881005586592179, "grad_norm": 0.5102831125259399, "learning_rate": 0.000808375350140056, "loss": 0.3939, "step": 6947 }, { "epoch": 3.881564245810056, "grad_norm": 0.4946901798248291, "learning_rate": 0.0008083473389355742, "loss": 0.4616, "step": 6948 }, { "epoch": 3.882122905027933, "grad_norm": 1.0997058153152466, "learning_rate": 0.0008083193277310924, "loss": 0.5067, "step": 6949 }, { "epoch": 3.88268156424581, "grad_norm": 0.6095570921897888, "learning_rate": 0.0008082913165266107, "loss": 0.463, "step": 6950 }, { "epoch": 3.8832402234636874, "grad_norm": 0.5534300208091736, "learning_rate": 0.0008082633053221288, "loss": 0.5615, "step": 6951 }, { "epoch": 3.8837988826815644, "grad_norm": 0.48418498039245605, "learning_rate": 0.000808235294117647, "loss": 0.4672, "step": 6952 }, { "epoch": 3.8843575418994414, "grad_norm": 0.4470704197883606, "learning_rate": 0.0008082072829131652, "loss": 0.4198, "step": 6953 }, { "epoch": 3.8849162011173184, "grad_norm": 1.4324913024902344, "learning_rate": 0.0008081792717086835, "loss": 0.505, "step": 6954 }, { "epoch": 3.8854748603351954, "grad_norm": 0.4697835445404053, "learning_rate": 0.0008081512605042018, "loss": 0.349, "step": 6955 }, { "epoch": 3.886033519553073, "grad_norm": 0.625615656375885, "learning_rate": 0.0008081232492997199, "loss": 0.4977, "step": 6956 }, { "epoch": 3.88659217877095, "grad_norm": 0.4555494487285614, "learning_rate": 0.0008080952380952381, "loss": 0.3867, "step": 6957 }, { "epoch": 3.887150837988827, "grad_norm": 0.5971218347549438, "learning_rate": 0.0008080672268907563, "loss": 0.4265, "step": 6958 }, { "epoch": 3.887709497206704, "grad_norm": 0.48569631576538086, "learning_rate": 0.0008080392156862745, "loss": 0.4778, "step": 6959 }, { "epoch": 3.888268156424581, "grad_norm": 0.4957340955734253, "learning_rate": 0.0008080112044817928, "loss": 0.519, "step": 6960 }, { "epoch": 3.8888268156424584, "grad_norm": 0.8952131867408752, "learning_rate": 0.000807983193277311, "loss": 0.4559, "step": 6961 }, { "epoch": 3.889385474860335, "grad_norm": 0.5165050625801086, "learning_rate": 0.0008079551820728291, "loss": 0.5231, "step": 6962 }, { "epoch": 3.8899441340782124, "grad_norm": 0.5500022172927856, "learning_rate": 0.0008079271708683473, "loss": 0.451, "step": 6963 }, { "epoch": 3.8905027932960894, "grad_norm": 0.6725042462348938, "learning_rate": 0.0008078991596638655, "loss": 0.4561, "step": 6964 }, { "epoch": 3.8910614525139664, "grad_norm": 0.9114112854003906, "learning_rate": 0.0008078711484593838, "loss": 0.7846, "step": 6965 }, { "epoch": 3.8916201117318434, "grad_norm": 0.7531450390815735, "learning_rate": 0.000807843137254902, "loss": 0.3715, "step": 6966 }, { "epoch": 3.8921787709497204, "grad_norm": 0.41826075315475464, "learning_rate": 0.0008078151260504201, "loss": 0.3883, "step": 6967 }, { "epoch": 3.892737430167598, "grad_norm": 0.6685256958007812, "learning_rate": 0.0008077871148459383, "loss": 0.6353, "step": 6968 }, { "epoch": 3.893296089385475, "grad_norm": 0.7648836374282837, "learning_rate": 0.0008077591036414565, "loss": 0.5177, "step": 6969 }, { "epoch": 3.893854748603352, "grad_norm": 0.6054646372795105, "learning_rate": 0.0008077310924369749, "loss": 0.4793, "step": 6970 }, { "epoch": 3.894413407821229, "grad_norm": 0.7664828300476074, "learning_rate": 0.0008077030812324931, "loss": 0.4502, "step": 6971 }, { "epoch": 3.894972067039106, "grad_norm": 0.7895660996437073, "learning_rate": 0.0008076750700280112, "loss": 0.4989, "step": 6972 }, { "epoch": 3.8955307262569834, "grad_norm": 0.4433261454105377, "learning_rate": 0.0008076470588235294, "loss": 0.4142, "step": 6973 }, { "epoch": 3.8960893854748604, "grad_norm": 2.4702680110931396, "learning_rate": 0.0008076190476190476, "loss": 0.3818, "step": 6974 }, { "epoch": 3.8966480446927374, "grad_norm": 5.3434343338012695, "learning_rate": 0.0008075910364145659, "loss": 0.4703, "step": 6975 }, { "epoch": 3.8972067039106144, "grad_norm": 3.6028733253479004, "learning_rate": 0.0008075630252100841, "loss": 0.4731, "step": 6976 }, { "epoch": 3.8977653631284914, "grad_norm": 0.8043778538703918, "learning_rate": 0.0008075350140056023, "loss": 0.3976, "step": 6977 }, { "epoch": 3.898324022346369, "grad_norm": 0.6024851202964783, "learning_rate": 0.0008075070028011204, "loss": 0.4111, "step": 6978 }, { "epoch": 3.898882681564246, "grad_norm": 3.9494760036468506, "learning_rate": 0.0008074789915966386, "loss": 0.3613, "step": 6979 }, { "epoch": 3.899441340782123, "grad_norm": 0.4640497863292694, "learning_rate": 0.0008074509803921569, "loss": 0.5308, "step": 6980 }, { "epoch": 3.9, "grad_norm": 0.9282743334770203, "learning_rate": 0.0008074229691876751, "loss": 0.4543, "step": 6981 }, { "epoch": 3.900558659217877, "grad_norm": 0.5232148170471191, "learning_rate": 0.0008073949579831933, "loss": 0.3963, "step": 6982 }, { "epoch": 3.9011173184357544, "grad_norm": 0.5028994679450989, "learning_rate": 0.0008073669467787114, "loss": 0.393, "step": 6983 }, { "epoch": 3.9016759776536314, "grad_norm": 0.569754421710968, "learning_rate": 0.0008073389355742296, "loss": 0.4253, "step": 6984 }, { "epoch": 3.9022346368715084, "grad_norm": 0.3445882499217987, "learning_rate": 0.000807310924369748, "loss": 0.3335, "step": 6985 }, { "epoch": 3.9027932960893854, "grad_norm": 0.7102305293083191, "learning_rate": 0.0008072829131652662, "loss": 0.6117, "step": 6986 }, { "epoch": 3.9033519553072624, "grad_norm": 0.4957873523235321, "learning_rate": 0.0008072549019607844, "loss": 0.4687, "step": 6987 }, { "epoch": 3.90391061452514, "grad_norm": 0.8255720734596252, "learning_rate": 0.0008072268907563025, "loss": 0.4999, "step": 6988 }, { "epoch": 3.904469273743017, "grad_norm": 0.517471194267273, "learning_rate": 0.0008071988795518207, "loss": 0.4892, "step": 6989 }, { "epoch": 3.905027932960894, "grad_norm": 0.4388323128223419, "learning_rate": 0.000807170868347339, "loss": 0.3793, "step": 6990 }, { "epoch": 3.905586592178771, "grad_norm": 0.5175237655639648, "learning_rate": 0.0008071428571428572, "loss": 0.4927, "step": 6991 }, { "epoch": 3.906145251396648, "grad_norm": 0.4710371792316437, "learning_rate": 0.0008071148459383754, "loss": 0.45, "step": 6992 }, { "epoch": 3.9067039106145254, "grad_norm": 1.0904172658920288, "learning_rate": 0.0008070868347338936, "loss": 0.4551, "step": 6993 }, { "epoch": 3.9072625698324024, "grad_norm": 0.4209286868572235, "learning_rate": 0.0008070588235294117, "loss": 0.3991, "step": 6994 }, { "epoch": 3.9078212290502794, "grad_norm": 0.5297243595123291, "learning_rate": 0.00080703081232493, "loss": 0.426, "step": 6995 }, { "epoch": 3.9083798882681564, "grad_norm": 0.7202159762382507, "learning_rate": 0.0008070028011204482, "loss": 0.5661, "step": 6996 }, { "epoch": 3.9089385474860334, "grad_norm": 1.644371747970581, "learning_rate": 0.0008069747899159664, "loss": 0.5367, "step": 6997 }, { "epoch": 3.909497206703911, "grad_norm": 0.5092253684997559, "learning_rate": 0.0008069467787114846, "loss": 0.3324, "step": 6998 }, { "epoch": 3.9100558659217874, "grad_norm": 0.6591435670852661, "learning_rate": 0.0008069187675070027, "loss": 0.4779, "step": 6999 }, { "epoch": 3.910614525139665, "grad_norm": 0.8748725652694702, "learning_rate": 0.000806890756302521, "loss": 0.4345, "step": 7000 }, { "epoch": 3.910614525139665, "eval_cer": 0.09470502874974088, "eval_loss": 0.35776227712631226, "eval_runtime": 55.4362, "eval_samples_per_second": 81.86, "eval_steps_per_second": 5.123, "eval_wer": 0.3764978631881791, "step": 7000 }, { "epoch": 3.911173184357542, "grad_norm": 0.6731039881706238, "learning_rate": 0.0008068627450980392, "loss": 0.4923, "step": 7001 }, { "epoch": 3.911731843575419, "grad_norm": 0.45611515641212463, "learning_rate": 0.0008068347338935575, "loss": 0.442, "step": 7002 }, { "epoch": 3.912290502793296, "grad_norm": 0.7433273196220398, "learning_rate": 0.0008068067226890757, "loss": 0.4177, "step": 7003 }, { "epoch": 3.912849162011173, "grad_norm": 0.9995244741439819, "learning_rate": 0.0008067787114845938, "loss": 0.4453, "step": 7004 }, { "epoch": 3.9134078212290504, "grad_norm": 0.5766714215278625, "learning_rate": 0.0008067507002801121, "loss": 0.3903, "step": 7005 }, { "epoch": 3.9139664804469274, "grad_norm": 0.6587193608283997, "learning_rate": 0.0008067226890756303, "loss": 0.6398, "step": 7006 }, { "epoch": 3.9145251396648044, "grad_norm": 0.5817322134971619, "learning_rate": 0.0008066946778711485, "loss": 0.5049, "step": 7007 }, { "epoch": 3.9150837988826814, "grad_norm": 0.5882250070571899, "learning_rate": 0.0008066666666666667, "loss": 0.5014, "step": 7008 }, { "epoch": 3.9156424581005584, "grad_norm": 0.4699248671531677, "learning_rate": 0.0008066386554621849, "loss": 0.4198, "step": 7009 }, { "epoch": 3.916201117318436, "grad_norm": 0.5861327648162842, "learning_rate": 0.0008066106442577031, "loss": 0.5021, "step": 7010 }, { "epoch": 3.916759776536313, "grad_norm": 0.6326070427894592, "learning_rate": 0.0008065826330532213, "loss": 0.3746, "step": 7011 }, { "epoch": 3.91731843575419, "grad_norm": 0.4733527898788452, "learning_rate": 0.0008065546218487395, "loss": 0.3857, "step": 7012 }, { "epoch": 3.917877094972067, "grad_norm": 0.8005883097648621, "learning_rate": 0.0008065266106442577, "loss": 0.4941, "step": 7013 }, { "epoch": 3.918435754189944, "grad_norm": 0.5382456183433533, "learning_rate": 0.0008064985994397759, "loss": 0.4692, "step": 7014 }, { "epoch": 3.9189944134078214, "grad_norm": 7.389460563659668, "learning_rate": 0.0008064705882352941, "loss": 0.4415, "step": 7015 }, { "epoch": 3.9195530726256984, "grad_norm": 0.47508105635643005, "learning_rate": 0.0008064425770308123, "loss": 0.4821, "step": 7016 }, { "epoch": 3.9201117318435754, "grad_norm": 0.557486355304718, "learning_rate": 0.0008064145658263305, "loss": 0.4912, "step": 7017 }, { "epoch": 3.9206703910614524, "grad_norm": 0.5815549492835999, "learning_rate": 0.0008063865546218487, "loss": 0.4798, "step": 7018 }, { "epoch": 3.9212290502793294, "grad_norm": 0.5075328946113586, "learning_rate": 0.000806358543417367, "loss": 0.3497, "step": 7019 }, { "epoch": 3.921787709497207, "grad_norm": 0.5094476938247681, "learning_rate": 0.0008063305322128853, "loss": 0.4987, "step": 7020 }, { "epoch": 3.922346368715084, "grad_norm": 0.5268604755401611, "learning_rate": 0.0008063025210084034, "loss": 0.5418, "step": 7021 }, { "epoch": 3.922905027932961, "grad_norm": 0.46845272183418274, "learning_rate": 0.0008062745098039216, "loss": 0.4105, "step": 7022 }, { "epoch": 3.923463687150838, "grad_norm": 4.790427207946777, "learning_rate": 0.0008062464985994398, "loss": 0.5401, "step": 7023 }, { "epoch": 3.924022346368715, "grad_norm": 0.6535236239433289, "learning_rate": 0.000806218487394958, "loss": 0.565, "step": 7024 }, { "epoch": 3.9245810055865924, "grad_norm": 0.5313844680786133, "learning_rate": 0.0008061904761904763, "loss": 0.4814, "step": 7025 }, { "epoch": 3.9251396648044694, "grad_norm": 0.5265361666679382, "learning_rate": 0.0008061624649859944, "loss": 0.4422, "step": 7026 }, { "epoch": 3.9256983240223464, "grad_norm": 0.46917101740837097, "learning_rate": 0.0008061344537815126, "loss": 0.4574, "step": 7027 }, { "epoch": 3.9262569832402234, "grad_norm": 1.8454087972640991, "learning_rate": 0.0008061064425770308, "loss": 0.4999, "step": 7028 }, { "epoch": 3.9268156424581004, "grad_norm": 0.6060447096824646, "learning_rate": 0.000806078431372549, "loss": 0.5635, "step": 7029 }, { "epoch": 3.927374301675978, "grad_norm": 0.512328028678894, "learning_rate": 0.0008060504201680673, "loss": 0.4391, "step": 7030 }, { "epoch": 3.927932960893855, "grad_norm": 0.5457378625869751, "learning_rate": 0.0008060224089635854, "loss": 0.4303, "step": 7031 }, { "epoch": 3.928491620111732, "grad_norm": 0.4917803406715393, "learning_rate": 0.0008059943977591036, "loss": 0.351, "step": 7032 }, { "epoch": 3.929050279329609, "grad_norm": 0.4893527925014496, "learning_rate": 0.0008059663865546218, "loss": 0.4786, "step": 7033 }, { "epoch": 3.929608938547486, "grad_norm": 0.5211455821990967, "learning_rate": 0.00080593837535014, "loss": 0.5069, "step": 7034 }, { "epoch": 3.9301675977653634, "grad_norm": 0.5341497659683228, "learning_rate": 0.0008059103641456584, "loss": 0.5896, "step": 7035 }, { "epoch": 3.93072625698324, "grad_norm": 0.6408417224884033, "learning_rate": 0.0008058823529411766, "loss": 0.4398, "step": 7036 }, { "epoch": 3.9312849162011174, "grad_norm": 0.7231680750846863, "learning_rate": 0.0008058543417366947, "loss": 0.5005, "step": 7037 }, { "epoch": 3.9318435754189944, "grad_norm": 0.890194833278656, "learning_rate": 0.0008058263305322129, "loss": 0.5027, "step": 7038 }, { "epoch": 3.9324022346368714, "grad_norm": 0.40565377473831177, "learning_rate": 0.0008057983193277311, "loss": 0.3459, "step": 7039 }, { "epoch": 3.9329608938547485, "grad_norm": 0.5612426996231079, "learning_rate": 0.0008057703081232494, "loss": 0.4142, "step": 7040 }, { "epoch": 3.9335195530726255, "grad_norm": 0.6588249206542969, "learning_rate": 0.0008057422969187676, "loss": 0.5066, "step": 7041 }, { "epoch": 3.934078212290503, "grad_norm": 8.109230041503906, "learning_rate": 0.0008057142857142857, "loss": 0.3917, "step": 7042 }, { "epoch": 3.93463687150838, "grad_norm": 6.23183536529541, "learning_rate": 0.0008056862745098039, "loss": 0.4999, "step": 7043 }, { "epoch": 3.935195530726257, "grad_norm": 0.6060693264007568, "learning_rate": 0.0008056582633053221, "loss": 0.4874, "step": 7044 }, { "epoch": 3.935754189944134, "grad_norm": 0.4651438891887665, "learning_rate": 0.0008056302521008404, "loss": 0.3899, "step": 7045 }, { "epoch": 3.936312849162011, "grad_norm": 0.39840027689933777, "learning_rate": 0.0008056022408963586, "loss": 0.3739, "step": 7046 }, { "epoch": 3.9368715083798884, "grad_norm": 0.4247986674308777, "learning_rate": 0.0008055742296918767, "loss": 0.4093, "step": 7047 }, { "epoch": 3.9374301675977654, "grad_norm": 0.6381890177726746, "learning_rate": 0.0008055462184873949, "loss": 0.4181, "step": 7048 }, { "epoch": 3.9379888268156424, "grad_norm": 0.6865086555480957, "learning_rate": 0.0008055182072829131, "loss": 0.5476, "step": 7049 }, { "epoch": 3.9385474860335195, "grad_norm": 0.8059674501419067, "learning_rate": 0.0008054901960784314, "loss": 0.6844, "step": 7050 }, { "epoch": 3.9391061452513965, "grad_norm": 0.41092970967292786, "learning_rate": 0.0008054621848739497, "loss": 0.3663, "step": 7051 }, { "epoch": 3.939664804469274, "grad_norm": 0.5282922387123108, "learning_rate": 0.0008054341736694679, "loss": 0.5227, "step": 7052 }, { "epoch": 3.940223463687151, "grad_norm": 0.7463955879211426, "learning_rate": 0.000805406162464986, "loss": 0.7564, "step": 7053 }, { "epoch": 3.940782122905028, "grad_norm": 0.8797734975814819, "learning_rate": 0.0008053781512605042, "loss": 0.4228, "step": 7054 }, { "epoch": 3.941340782122905, "grad_norm": 0.7570948004722595, "learning_rate": 0.0008053501400560225, "loss": 0.6247, "step": 7055 }, { "epoch": 3.941899441340782, "grad_norm": 0.4585045576095581, "learning_rate": 0.0008053221288515407, "loss": 0.4591, "step": 7056 }, { "epoch": 3.9424581005586594, "grad_norm": 0.6605092287063599, "learning_rate": 0.0008052941176470589, "loss": 0.443, "step": 7057 }, { "epoch": 3.9430167597765364, "grad_norm": 0.5274098515510559, "learning_rate": 0.000805266106442577, "loss": 0.5547, "step": 7058 }, { "epoch": 3.9435754189944134, "grad_norm": 0.5492943525314331, "learning_rate": 0.0008052380952380952, "loss": 0.4418, "step": 7059 }, { "epoch": 3.9441340782122905, "grad_norm": 0.5419865250587463, "learning_rate": 0.0008052100840336135, "loss": 0.6448, "step": 7060 }, { "epoch": 3.9446927374301675, "grad_norm": 0.7609917521476746, "learning_rate": 0.0008051820728291317, "loss": 0.6492, "step": 7061 }, { "epoch": 3.945251396648045, "grad_norm": 0.5737199187278748, "learning_rate": 0.0008051540616246499, "loss": 0.4242, "step": 7062 }, { "epoch": 3.945810055865922, "grad_norm": 0.4582638442516327, "learning_rate": 0.000805126050420168, "loss": 0.3942, "step": 7063 }, { "epoch": 3.946368715083799, "grad_norm": 0.5929429531097412, "learning_rate": 0.0008050980392156862, "loss": 0.4288, "step": 7064 }, { "epoch": 3.946927374301676, "grad_norm": 4.677650451660156, "learning_rate": 0.0008050700280112045, "loss": 0.4182, "step": 7065 }, { "epoch": 3.947486033519553, "grad_norm": 0.4796430170536041, "learning_rate": 0.0008050420168067227, "loss": 0.3749, "step": 7066 }, { "epoch": 3.9480446927374304, "grad_norm": 0.45303863286972046, "learning_rate": 0.000805014005602241, "loss": 0.4044, "step": 7067 }, { "epoch": 3.9486033519553074, "grad_norm": 0.5329473614692688, "learning_rate": 0.0008049859943977592, "loss": 0.4969, "step": 7068 }, { "epoch": 3.9491620111731844, "grad_norm": 0.7041875720024109, "learning_rate": 0.0008049579831932773, "loss": 0.5208, "step": 7069 }, { "epoch": 3.9497206703910615, "grad_norm": 0.381235808134079, "learning_rate": 0.0008049299719887956, "loss": 0.3891, "step": 7070 }, { "epoch": 3.9502793296089385, "grad_norm": 0.686011791229248, "learning_rate": 0.0008049019607843138, "loss": 0.4518, "step": 7071 }, { "epoch": 3.950837988826816, "grad_norm": 0.5750831961631775, "learning_rate": 0.000804873949579832, "loss": 0.4822, "step": 7072 }, { "epoch": 3.9513966480446925, "grad_norm": 0.4979250133037567, "learning_rate": 0.0008048459383753502, "loss": 0.4977, "step": 7073 }, { "epoch": 3.95195530726257, "grad_norm": 0.4827015697956085, "learning_rate": 0.0008048179271708683, "loss": 0.4273, "step": 7074 }, { "epoch": 3.952513966480447, "grad_norm": 0.4956649839878082, "learning_rate": 0.0008047899159663866, "loss": 0.4951, "step": 7075 }, { "epoch": 3.953072625698324, "grad_norm": 0.5707218050956726, "learning_rate": 0.0008047619047619048, "loss": 0.5498, "step": 7076 }, { "epoch": 3.953631284916201, "grad_norm": 0.6664177179336548, "learning_rate": 0.000804733893557423, "loss": 0.4399, "step": 7077 }, { "epoch": 3.954189944134078, "grad_norm": 0.6316648125648499, "learning_rate": 0.0008047058823529412, "loss": 0.3213, "step": 7078 }, { "epoch": 3.9547486033519554, "grad_norm": 0.8092297911643982, "learning_rate": 0.0008046778711484593, "loss": 0.4735, "step": 7079 }, { "epoch": 3.9553072625698324, "grad_norm": 0.5198471546173096, "learning_rate": 0.0008046498599439776, "loss": 0.4956, "step": 7080 }, { "epoch": 3.9558659217877095, "grad_norm": 0.4854726791381836, "learning_rate": 0.0008046218487394958, "loss": 0.3551, "step": 7081 }, { "epoch": 3.9564245810055865, "grad_norm": 2.2213189601898193, "learning_rate": 0.000804593837535014, "loss": 0.4437, "step": 7082 }, { "epoch": 3.9569832402234635, "grad_norm": 0.6036872267723083, "learning_rate": 0.0008045658263305322, "loss": 0.5081, "step": 7083 }, { "epoch": 3.957541899441341, "grad_norm": 0.747298538684845, "learning_rate": 0.0008045378151260505, "loss": 0.7727, "step": 7084 }, { "epoch": 3.958100558659218, "grad_norm": 0.5486094951629639, "learning_rate": 0.0008045098039215687, "loss": 0.6463, "step": 7085 }, { "epoch": 3.958659217877095, "grad_norm": 1.0138646364212036, "learning_rate": 0.0008044817927170869, "loss": 0.758, "step": 7086 }, { "epoch": 3.959217877094972, "grad_norm": 0.891136646270752, "learning_rate": 0.0008044537815126051, "loss": 0.6432, "step": 7087 }, { "epoch": 3.959776536312849, "grad_norm": 0.6499348282814026, "learning_rate": 0.0008044257703081233, "loss": 0.3983, "step": 7088 }, { "epoch": 3.9603351955307264, "grad_norm": 0.527060329914093, "learning_rate": 0.0008043977591036415, "loss": 0.4382, "step": 7089 }, { "epoch": 3.9608938547486034, "grad_norm": 0.6966915130615234, "learning_rate": 0.0008043697478991597, "loss": 0.4623, "step": 7090 }, { "epoch": 3.9614525139664805, "grad_norm": 0.4720066785812378, "learning_rate": 0.0008043417366946779, "loss": 0.47, "step": 7091 }, { "epoch": 3.9620111731843575, "grad_norm": 0.6764178276062012, "learning_rate": 0.0008043137254901961, "loss": 0.4105, "step": 7092 }, { "epoch": 3.9625698324022345, "grad_norm": 0.41640740633010864, "learning_rate": 0.0008042857142857143, "loss": 0.3539, "step": 7093 }, { "epoch": 3.963128491620112, "grad_norm": 0.36168813705444336, "learning_rate": 0.0008042577030812325, "loss": 0.383, "step": 7094 }, { "epoch": 3.963687150837989, "grad_norm": 0.7122403979301453, "learning_rate": 0.0008042296918767507, "loss": 0.7117, "step": 7095 }, { "epoch": 3.964245810055866, "grad_norm": 0.5665606260299683, "learning_rate": 0.0008042016806722689, "loss": 0.5116, "step": 7096 }, { "epoch": 3.964804469273743, "grad_norm": 0.6856590509414673, "learning_rate": 0.0008041736694677871, "loss": 0.4778, "step": 7097 }, { "epoch": 3.96536312849162, "grad_norm": 0.5604067444801331, "learning_rate": 0.0008041456582633053, "loss": 0.4897, "step": 7098 }, { "epoch": 3.9659217877094974, "grad_norm": 0.4804839789867401, "learning_rate": 0.0008041176470588235, "loss": 0.4412, "step": 7099 }, { "epoch": 3.9664804469273744, "grad_norm": 0.4849615693092346, "learning_rate": 0.0008040896358543419, "loss": 0.4329, "step": 7100 }, { "epoch": 3.9670391061452515, "grad_norm": 0.627479076385498, "learning_rate": 0.00080406162464986, "loss": 0.5344, "step": 7101 }, { "epoch": 3.9675977653631285, "grad_norm": 0.8258705139160156, "learning_rate": 0.0008040336134453782, "loss": 0.5079, "step": 7102 }, { "epoch": 3.9681564245810055, "grad_norm": 0.5508294701576233, "learning_rate": 0.0008040056022408964, "loss": 0.5612, "step": 7103 }, { "epoch": 3.968715083798883, "grad_norm": 0.5187811851501465, "learning_rate": 0.0008039775910364146, "loss": 0.3862, "step": 7104 }, { "epoch": 3.9692737430167595, "grad_norm": 0.7678642868995667, "learning_rate": 0.0008039495798319329, "loss": 0.4166, "step": 7105 }, { "epoch": 3.969832402234637, "grad_norm": 0.4972739815711975, "learning_rate": 0.000803921568627451, "loss": 0.3716, "step": 7106 }, { "epoch": 3.970391061452514, "grad_norm": 0.41464969515800476, "learning_rate": 0.0008038935574229692, "loss": 0.4396, "step": 7107 }, { "epoch": 3.970949720670391, "grad_norm": 0.5230623483657837, "learning_rate": 0.0008038655462184874, "loss": 0.4529, "step": 7108 }, { "epoch": 3.971508379888268, "grad_norm": 0.6593648195266724, "learning_rate": 0.0008038375350140056, "loss": 0.4248, "step": 7109 }, { "epoch": 3.972067039106145, "grad_norm": 0.41191044449806213, "learning_rate": 0.0008038095238095239, "loss": 0.3882, "step": 7110 }, { "epoch": 3.9726256983240225, "grad_norm": 0.6715481877326965, "learning_rate": 0.000803781512605042, "loss": 0.382, "step": 7111 }, { "epoch": 3.9731843575418995, "grad_norm": 0.47481071949005127, "learning_rate": 0.0008037535014005602, "loss": 0.4108, "step": 7112 }, { "epoch": 3.9737430167597765, "grad_norm": 1.0387659072875977, "learning_rate": 0.0008037254901960784, "loss": 0.5368, "step": 7113 }, { "epoch": 3.9743016759776535, "grad_norm": 0.7313138842582703, "learning_rate": 0.0008036974789915966, "loss": 0.5214, "step": 7114 }, { "epoch": 3.9748603351955305, "grad_norm": 0.6585182547569275, "learning_rate": 0.0008036694677871149, "loss": 0.3771, "step": 7115 }, { "epoch": 3.975418994413408, "grad_norm": 0.5558366179466248, "learning_rate": 0.0008036414565826332, "loss": 0.4826, "step": 7116 }, { "epoch": 3.975977653631285, "grad_norm": 0.7371494770050049, "learning_rate": 0.0008036134453781512, "loss": 0.7938, "step": 7117 }, { "epoch": 3.976536312849162, "grad_norm": 0.4658590257167816, "learning_rate": 0.0008035854341736695, "loss": 0.4104, "step": 7118 }, { "epoch": 3.977094972067039, "grad_norm": 0.695318877696991, "learning_rate": 0.0008035574229691877, "loss": 0.4875, "step": 7119 }, { "epoch": 3.977653631284916, "grad_norm": 0.7919595837593079, "learning_rate": 0.000803529411764706, "loss": 0.5135, "step": 7120 }, { "epoch": 3.9782122905027935, "grad_norm": 1.74186110496521, "learning_rate": 0.0008035014005602242, "loss": 0.545, "step": 7121 }, { "epoch": 3.9787709497206705, "grad_norm": 0.54842209815979, "learning_rate": 0.0008034733893557423, "loss": 0.4151, "step": 7122 }, { "epoch": 3.9793296089385475, "grad_norm": 0.8092365264892578, "learning_rate": 0.0008034453781512605, "loss": 0.4718, "step": 7123 }, { "epoch": 3.9798882681564245, "grad_norm": 0.8299870491027832, "learning_rate": 0.0008034173669467787, "loss": 0.4668, "step": 7124 }, { "epoch": 3.9804469273743015, "grad_norm": 2.1802937984466553, "learning_rate": 0.000803389355742297, "loss": 0.6053, "step": 7125 }, { "epoch": 3.981005586592179, "grad_norm": 0.43331030011177063, "learning_rate": 0.0008033613445378152, "loss": 0.4514, "step": 7126 }, { "epoch": 3.981564245810056, "grad_norm": 0.45537856221199036, "learning_rate": 0.0008033333333333333, "loss": 0.4293, "step": 7127 }, { "epoch": 3.982122905027933, "grad_norm": 0.5720996260643005, "learning_rate": 0.0008033053221288515, "loss": 0.4547, "step": 7128 }, { "epoch": 3.98268156424581, "grad_norm": 0.42259424924850464, "learning_rate": 0.0008032773109243697, "loss": 0.4906, "step": 7129 }, { "epoch": 3.983240223463687, "grad_norm": 0.4236142933368683, "learning_rate": 0.000803249299719888, "loss": 0.4155, "step": 7130 }, { "epoch": 3.9837988826815645, "grad_norm": 0.45177119970321655, "learning_rate": 0.0008032212885154062, "loss": 0.4964, "step": 7131 }, { "epoch": 3.9843575418994415, "grad_norm": 0.6593208909034729, "learning_rate": 0.0008031932773109244, "loss": 0.6244, "step": 7132 }, { "epoch": 3.9849162011173185, "grad_norm": 0.53520268201828, "learning_rate": 0.0008031652661064425, "loss": 0.5647, "step": 7133 }, { "epoch": 3.9854748603351955, "grad_norm": 0.9825749397277832, "learning_rate": 0.0008031372549019608, "loss": 0.4624, "step": 7134 }, { "epoch": 3.9860335195530725, "grad_norm": 0.6735973358154297, "learning_rate": 0.000803109243697479, "loss": 0.4715, "step": 7135 }, { "epoch": 3.98659217877095, "grad_norm": 0.5279591083526611, "learning_rate": 0.0008030812324929973, "loss": 0.4966, "step": 7136 }, { "epoch": 3.987150837988827, "grad_norm": 0.9416318535804749, "learning_rate": 0.0008030532212885155, "loss": 0.3927, "step": 7137 }, { "epoch": 3.987709497206704, "grad_norm": 0.4318915605545044, "learning_rate": 0.0008030252100840336, "loss": 0.4582, "step": 7138 }, { "epoch": 3.988268156424581, "grad_norm": 0.5767625570297241, "learning_rate": 0.0008029971988795518, "loss": 0.4547, "step": 7139 }, { "epoch": 3.988826815642458, "grad_norm": 0.5418605208396912, "learning_rate": 0.00080296918767507, "loss": 0.3949, "step": 7140 }, { "epoch": 3.9893854748603355, "grad_norm": 0.3813818097114563, "learning_rate": 0.0008029411764705883, "loss": 0.4219, "step": 7141 }, { "epoch": 3.989944134078212, "grad_norm": 0.7461854219436646, "learning_rate": 0.0008029131652661065, "loss": 0.4156, "step": 7142 }, { "epoch": 3.9905027932960895, "grad_norm": 0.40891969203948975, "learning_rate": 0.0008028851540616246, "loss": 0.4124, "step": 7143 }, { "epoch": 3.9910614525139665, "grad_norm": 3.145090341567993, "learning_rate": 0.0008028571428571428, "loss": 0.4219, "step": 7144 }, { "epoch": 3.9916201117318435, "grad_norm": 0.4977027475833893, "learning_rate": 0.000802829131652661, "loss": 0.43, "step": 7145 }, { "epoch": 3.9921787709497205, "grad_norm": 0.6141577959060669, "learning_rate": 0.0008028011204481793, "loss": 0.4784, "step": 7146 }, { "epoch": 3.9927374301675975, "grad_norm": 0.4877220094203949, "learning_rate": 0.0008027731092436975, "loss": 0.4408, "step": 7147 }, { "epoch": 3.993296089385475, "grad_norm": 6.336646556854248, "learning_rate": 0.0008027450980392157, "loss": 0.6765, "step": 7148 }, { "epoch": 3.993854748603352, "grad_norm": 0.48627859354019165, "learning_rate": 0.0008027170868347338, "loss": 0.411, "step": 7149 }, { "epoch": 3.994413407821229, "grad_norm": 0.6415491700172424, "learning_rate": 0.000802689075630252, "loss": 0.4663, "step": 7150 }, { "epoch": 3.994972067039106, "grad_norm": 0.628501296043396, "learning_rate": 0.0008026610644257704, "loss": 0.5153, "step": 7151 }, { "epoch": 3.995530726256983, "grad_norm": 0.3527093529701233, "learning_rate": 0.0008026330532212886, "loss": 0.3798, "step": 7152 }, { "epoch": 3.9960893854748605, "grad_norm": 0.388369619846344, "learning_rate": 0.0008026050420168068, "loss": 0.333, "step": 7153 }, { "epoch": 3.9966480446927375, "grad_norm": 0.6742631793022156, "learning_rate": 0.0008025770308123249, "loss": 0.4374, "step": 7154 }, { "epoch": 3.9972067039106145, "grad_norm": 1.166015863418579, "learning_rate": 0.0008025490196078431, "loss": 0.5468, "step": 7155 }, { "epoch": 3.9977653631284915, "grad_norm": 1.0816982984542847, "learning_rate": 0.0008025210084033614, "loss": 0.4357, "step": 7156 }, { "epoch": 3.9983240223463685, "grad_norm": 0.39572039246559143, "learning_rate": 0.0008024929971988796, "loss": 0.3401, "step": 7157 }, { "epoch": 3.998882681564246, "grad_norm": 0.5916157364845276, "learning_rate": 0.0008024649859943978, "loss": 0.4312, "step": 7158 }, { "epoch": 3.999441340782123, "grad_norm": 5.927062511444092, "learning_rate": 0.0008024369747899159, "loss": 0.5623, "step": 7159 }, { "epoch": 4.0, "grad_norm": 1.4475934505462646, "learning_rate": 0.0008024089635854341, "loss": 0.5796, "step": 7160 }, { "epoch": 4.0005586592178775, "grad_norm": 0.6633867025375366, "learning_rate": 0.0008023809523809524, "loss": 0.3719, "step": 7161 }, { "epoch": 4.001117318435754, "grad_norm": 0.7885862588882446, "learning_rate": 0.0008023529411764706, "loss": 0.629, "step": 7162 }, { "epoch": 4.0016759776536315, "grad_norm": 2.0921740531921387, "learning_rate": 0.0008023249299719888, "loss": 0.4163, "step": 7163 }, { "epoch": 4.002234636871508, "grad_norm": 3.035512924194336, "learning_rate": 0.000802296918767507, "loss": 0.4565, "step": 7164 }, { "epoch": 4.0027932960893855, "grad_norm": 0.6488288044929504, "learning_rate": 0.0008022689075630251, "loss": 0.4962, "step": 7165 }, { "epoch": 4.003351955307263, "grad_norm": 0.852614164352417, "learning_rate": 0.0008022408963585435, "loss": 0.4056, "step": 7166 }, { "epoch": 4.0039106145251395, "grad_norm": 0.5377743244171143, "learning_rate": 0.0008022128851540617, "loss": 0.4342, "step": 7167 }, { "epoch": 4.004469273743017, "grad_norm": 2.5451347827911377, "learning_rate": 0.0008021848739495799, "loss": 0.5396, "step": 7168 }, { "epoch": 4.0050279329608935, "grad_norm": 0.4638262987136841, "learning_rate": 0.0008021568627450981, "loss": 0.5815, "step": 7169 }, { "epoch": 4.005586592178771, "grad_norm": 0.7978700995445251, "learning_rate": 0.0008021288515406162, "loss": 0.532, "step": 7170 }, { "epoch": 4.0061452513966485, "grad_norm": 0.6896904706954956, "learning_rate": 0.0008021008403361345, "loss": 0.4431, "step": 7171 }, { "epoch": 4.006703910614525, "grad_norm": 0.5434969067573547, "learning_rate": 0.0008020728291316527, "loss": 0.4866, "step": 7172 }, { "epoch": 4.0072625698324025, "grad_norm": 1.9140547513961792, "learning_rate": 0.0008020448179271709, "loss": 0.4938, "step": 7173 }, { "epoch": 4.007821229050279, "grad_norm": 0.5029609203338623, "learning_rate": 0.0008020168067226891, "loss": 0.4285, "step": 7174 }, { "epoch": 4.0083798882681565, "grad_norm": 4.06582498550415, "learning_rate": 0.0008019887955182072, "loss": 0.5401, "step": 7175 }, { "epoch": 4.008938547486034, "grad_norm": 0.7806519865989685, "learning_rate": 0.0008019607843137255, "loss": 0.5359, "step": 7176 }, { "epoch": 4.0094972067039105, "grad_norm": 0.5114393830299377, "learning_rate": 0.0008019327731092437, "loss": 0.4199, "step": 7177 }, { "epoch": 4.010055865921788, "grad_norm": 0.4742944538593292, "learning_rate": 0.0008019047619047619, "loss": 0.4514, "step": 7178 }, { "epoch": 4.0106145251396645, "grad_norm": 0.9224108457565308, "learning_rate": 0.0008018767507002801, "loss": 0.4562, "step": 7179 }, { "epoch": 4.011173184357542, "grad_norm": 0.3695765435695648, "learning_rate": 0.0008018487394957983, "loss": 0.4639, "step": 7180 }, { "epoch": 4.011731843575419, "grad_norm": 0.5876614451408386, "learning_rate": 0.0008018207282913165, "loss": 0.3645, "step": 7181 }, { "epoch": 4.012290502793296, "grad_norm": 1.222946286201477, "learning_rate": 0.0008017927170868347, "loss": 0.5316, "step": 7182 }, { "epoch": 4.0128491620111735, "grad_norm": 0.7444854974746704, "learning_rate": 0.000801764705882353, "loss": 0.4371, "step": 7183 }, { "epoch": 4.01340782122905, "grad_norm": 0.7403766512870789, "learning_rate": 0.0008017366946778712, "loss": 0.423, "step": 7184 }, { "epoch": 4.0139664804469275, "grad_norm": 0.5357937216758728, "learning_rate": 0.0008017086834733894, "loss": 0.4277, "step": 7185 }, { "epoch": 4.014525139664804, "grad_norm": 1.3541953563690186, "learning_rate": 0.0008016806722689076, "loss": 0.4835, "step": 7186 }, { "epoch": 4.0150837988826815, "grad_norm": 0.7186394333839417, "learning_rate": 0.0008016526610644258, "loss": 0.4908, "step": 7187 }, { "epoch": 4.015642458100559, "grad_norm": 0.5224939584732056, "learning_rate": 0.000801624649859944, "loss": 0.6235, "step": 7188 }, { "epoch": 4.0162011173184355, "grad_norm": 0.5467060804367065, "learning_rate": 0.0008015966386554622, "loss": 0.4844, "step": 7189 }, { "epoch": 4.016759776536313, "grad_norm": 0.8589743375778198, "learning_rate": 0.0008015686274509804, "loss": 0.5053, "step": 7190 }, { "epoch": 4.01731843575419, "grad_norm": 2.804008722305298, "learning_rate": 0.0008015406162464987, "loss": 0.6737, "step": 7191 }, { "epoch": 4.017877094972067, "grad_norm": 0.38436779379844666, "learning_rate": 0.0008015126050420168, "loss": 0.3745, "step": 7192 }, { "epoch": 4.0184357541899445, "grad_norm": 0.5819133520126343, "learning_rate": 0.000801484593837535, "loss": 0.4057, "step": 7193 }, { "epoch": 4.018994413407821, "grad_norm": 0.7214066386222839, "learning_rate": 0.0008014565826330532, "loss": 0.5086, "step": 7194 }, { "epoch": 4.0195530726256985, "grad_norm": 0.4668353199958801, "learning_rate": 0.0008014285714285714, "loss": 0.4019, "step": 7195 }, { "epoch": 4.020111731843575, "grad_norm": 0.6879488229751587, "learning_rate": 0.0008014005602240897, "loss": 0.379, "step": 7196 }, { "epoch": 4.0206703910614525, "grad_norm": 0.7188233733177185, "learning_rate": 0.0008013725490196078, "loss": 0.3883, "step": 7197 }, { "epoch": 4.02122905027933, "grad_norm": 0.4577838182449341, "learning_rate": 0.000801344537815126, "loss": 0.469, "step": 7198 }, { "epoch": 4.0217877094972065, "grad_norm": 0.4861854910850525, "learning_rate": 0.0008013165266106442, "loss": 0.3752, "step": 7199 }, { "epoch": 4.022346368715084, "grad_norm": 0.6420953869819641, "learning_rate": 0.0008012885154061625, "loss": 0.4756, "step": 7200 }, { "epoch": 4.022905027932961, "grad_norm": 0.41528305411338806, "learning_rate": 0.0008012605042016808, "loss": 0.3527, "step": 7201 }, { "epoch": 4.023463687150838, "grad_norm": 0.7263068556785583, "learning_rate": 0.0008012324929971989, "loss": 0.5823, "step": 7202 }, { "epoch": 4.0240223463687155, "grad_norm": 0.5570254325866699, "learning_rate": 0.0008012044817927171, "loss": 0.4963, "step": 7203 }, { "epoch": 4.024581005586592, "grad_norm": 0.5761282444000244, "learning_rate": 0.0008011764705882353, "loss": 0.3855, "step": 7204 }, { "epoch": 4.0251396648044695, "grad_norm": 1.0314967632293701, "learning_rate": 0.0008011484593837535, "loss": 0.4605, "step": 7205 }, { "epoch": 4.025698324022346, "grad_norm": 0.5157830119132996, "learning_rate": 0.0008011204481792718, "loss": 0.6801, "step": 7206 }, { "epoch": 4.0262569832402235, "grad_norm": 0.5801483988761902, "learning_rate": 0.00080109243697479, "loss": 0.5254, "step": 7207 }, { "epoch": 4.026815642458101, "grad_norm": 1.4835443496704102, "learning_rate": 0.0008010644257703081, "loss": 0.5329, "step": 7208 }, { "epoch": 4.0273743016759775, "grad_norm": 0.48205339908599854, "learning_rate": 0.0008010364145658263, "loss": 0.4851, "step": 7209 }, { "epoch": 4.027932960893855, "grad_norm": 0.5513060092926025, "learning_rate": 0.0008010084033613445, "loss": 0.4656, "step": 7210 }, { "epoch": 4.028491620111732, "grad_norm": 0.703374981880188, "learning_rate": 0.0008009803921568628, "loss": 0.4033, "step": 7211 }, { "epoch": 4.029050279329609, "grad_norm": 1.6137562990188599, "learning_rate": 0.000800952380952381, "loss": 0.5413, "step": 7212 }, { "epoch": 4.0296089385474865, "grad_norm": 0.39633744955062866, "learning_rate": 0.0008009243697478991, "loss": 0.4198, "step": 7213 }, { "epoch": 4.030167597765363, "grad_norm": 1.209067702293396, "learning_rate": 0.0008008963585434173, "loss": 0.5209, "step": 7214 }, { "epoch": 4.0307262569832405, "grad_norm": 0.7100725769996643, "learning_rate": 0.0008008683473389355, "loss": 0.5627, "step": 7215 }, { "epoch": 4.031284916201117, "grad_norm": 0.5386775732040405, "learning_rate": 0.0008008403361344539, "loss": 0.5655, "step": 7216 }, { "epoch": 4.0318435754189945, "grad_norm": 0.5487700700759888, "learning_rate": 0.0008008123249299721, "loss": 0.4431, "step": 7217 }, { "epoch": 4.032402234636871, "grad_norm": 0.4018005430698395, "learning_rate": 0.0008007843137254902, "loss": 0.3433, "step": 7218 }, { "epoch": 4.0329608938547485, "grad_norm": 0.49806350469589233, "learning_rate": 0.0008007563025210084, "loss": 0.5024, "step": 7219 }, { "epoch": 4.033519553072626, "grad_norm": 0.6888643503189087, "learning_rate": 0.0008007282913165266, "loss": 0.4066, "step": 7220 }, { "epoch": 4.034078212290503, "grad_norm": 5.580629825592041, "learning_rate": 0.0008007002801120449, "loss": 0.5298, "step": 7221 }, { "epoch": 4.03463687150838, "grad_norm": 0.8422892093658447, "learning_rate": 0.0008006722689075631, "loss": 0.4032, "step": 7222 }, { "epoch": 4.035195530726257, "grad_norm": 0.5582205653190613, "learning_rate": 0.0008006442577030813, "loss": 0.3891, "step": 7223 }, { "epoch": 4.035754189944134, "grad_norm": 2.858771562576294, "learning_rate": 0.0008006162464985994, "loss": 0.4365, "step": 7224 }, { "epoch": 4.0363128491620115, "grad_norm": 0.8811796307563782, "learning_rate": 0.0008005882352941176, "loss": 0.5947, "step": 7225 }, { "epoch": 4.036871508379888, "grad_norm": 0.5713050365447998, "learning_rate": 0.0008005602240896359, "loss": 0.4606, "step": 7226 }, { "epoch": 4.0374301675977655, "grad_norm": 0.4544447064399719, "learning_rate": 0.0008005322128851541, "loss": 0.4567, "step": 7227 }, { "epoch": 4.037988826815642, "grad_norm": 0.4898993670940399, "learning_rate": 0.0008005042016806723, "loss": 0.4513, "step": 7228 }, { "epoch": 4.0385474860335195, "grad_norm": 0.4296659529209137, "learning_rate": 0.0008004761904761904, "loss": 0.3623, "step": 7229 }, { "epoch": 4.039106145251397, "grad_norm": 0.5693150758743286, "learning_rate": 0.0008004481792717086, "loss": 0.5068, "step": 7230 }, { "epoch": 4.039664804469274, "grad_norm": 0.5388212203979492, "learning_rate": 0.000800420168067227, "loss": 0.4271, "step": 7231 }, { "epoch": 4.040223463687151, "grad_norm": 0.5692974328994751, "learning_rate": 0.0008003921568627452, "loss": 0.4069, "step": 7232 }, { "epoch": 4.040782122905028, "grad_norm": 0.4252888262271881, "learning_rate": 0.0008003641456582634, "loss": 0.3949, "step": 7233 }, { "epoch": 4.041340782122905, "grad_norm": 0.41795429587364197, "learning_rate": 0.0008003361344537815, "loss": 0.4216, "step": 7234 }, { "epoch": 4.0418994413407825, "grad_norm": 0.5262228846549988, "learning_rate": 0.0008003081232492997, "loss": 0.4599, "step": 7235 }, { "epoch": 4.042458100558659, "grad_norm": 0.7310873866081238, "learning_rate": 0.000800280112044818, "loss": 0.413, "step": 7236 }, { "epoch": 4.0430167597765365, "grad_norm": 0.734113872051239, "learning_rate": 0.0008002521008403362, "loss": 0.4544, "step": 7237 }, { "epoch": 4.043575418994413, "grad_norm": 0.5125024914741516, "learning_rate": 0.0008002240896358544, "loss": 0.3456, "step": 7238 }, { "epoch": 4.0441340782122905, "grad_norm": 0.7371616363525391, "learning_rate": 0.0008001960784313726, "loss": 0.4838, "step": 7239 }, { "epoch": 4.044692737430168, "grad_norm": 0.43709471821784973, "learning_rate": 0.0008001680672268907, "loss": 0.4565, "step": 7240 }, { "epoch": 4.045251396648045, "grad_norm": 1.1052905321121216, "learning_rate": 0.000800140056022409, "loss": 0.3689, "step": 7241 }, { "epoch": 4.045810055865922, "grad_norm": 0.48502761125564575, "learning_rate": 0.0008001120448179272, "loss": 0.4328, "step": 7242 }, { "epoch": 4.046368715083799, "grad_norm": 0.5574578046798706, "learning_rate": 0.0008000840336134454, "loss": 0.4388, "step": 7243 }, { "epoch": 4.046927374301676, "grad_norm": 2.0033042430877686, "learning_rate": 0.0008000560224089636, "loss": 0.3816, "step": 7244 }, { "epoch": 4.0474860335195535, "grad_norm": 0.6378182172775269, "learning_rate": 0.0008000280112044817, "loss": 0.4358, "step": 7245 }, { "epoch": 4.04804469273743, "grad_norm": 0.497710257768631, "learning_rate": 0.0008, "loss": 0.454, "step": 7246 }, { "epoch": 4.0486033519553075, "grad_norm": 0.7362669110298157, "learning_rate": 0.0007999719887955182, "loss": 0.3975, "step": 7247 }, { "epoch": 4.049162011173184, "grad_norm": 0.5754220485687256, "learning_rate": 0.0007999439775910365, "loss": 0.3739, "step": 7248 }, { "epoch": 4.0497206703910615, "grad_norm": 0.9323798418045044, "learning_rate": 0.0007999159663865547, "loss": 0.4541, "step": 7249 }, { "epoch": 4.050279329608939, "grad_norm": 0.45945262908935547, "learning_rate": 0.0007998879551820728, "loss": 0.3667, "step": 7250 }, { "epoch": 4.050837988826816, "grad_norm": 0.4202263355255127, "learning_rate": 0.0007998599439775911, "loss": 0.4153, "step": 7251 }, { "epoch": 4.051396648044693, "grad_norm": 0.4688238799571991, "learning_rate": 0.0007998319327731093, "loss": 0.3714, "step": 7252 }, { "epoch": 4.05195530726257, "grad_norm": 0.5398940443992615, "learning_rate": 0.0007998039215686275, "loss": 0.2927, "step": 7253 }, { "epoch": 4.052513966480447, "grad_norm": 0.9062156677246094, "learning_rate": 0.0007997759103641457, "loss": 0.4665, "step": 7254 }, { "epoch": 4.053072625698324, "grad_norm": 0.4088192582130432, "learning_rate": 0.0007997478991596639, "loss": 0.4099, "step": 7255 }, { "epoch": 4.053631284916201, "grad_norm": 0.44694456458091736, "learning_rate": 0.0007997198879551821, "loss": 0.4874, "step": 7256 }, { "epoch": 4.0541899441340785, "grad_norm": 0.7342076897621155, "learning_rate": 0.0007996918767507003, "loss": 0.614, "step": 7257 }, { "epoch": 4.054748603351955, "grad_norm": 0.5657563805580139, "learning_rate": 0.0007996638655462185, "loss": 0.4475, "step": 7258 }, { "epoch": 4.0553072625698325, "grad_norm": 1.0953336954116821, "learning_rate": 0.0007996358543417367, "loss": 0.3817, "step": 7259 }, { "epoch": 4.055865921787709, "grad_norm": 0.6628221273422241, "learning_rate": 0.0007996078431372549, "loss": 0.5107, "step": 7260 }, { "epoch": 4.056424581005587, "grad_norm": 0.711455762386322, "learning_rate": 0.0007995798319327731, "loss": 0.4442, "step": 7261 }, { "epoch": 4.056983240223464, "grad_norm": 0.589642345905304, "learning_rate": 0.0007995518207282913, "loss": 0.4535, "step": 7262 }, { "epoch": 4.057541899441341, "grad_norm": 0.42798563838005066, "learning_rate": 0.0007995238095238095, "loss": 0.3706, "step": 7263 }, { "epoch": 4.058100558659218, "grad_norm": 0.49190956354141235, "learning_rate": 0.0007994957983193277, "loss": 0.5598, "step": 7264 }, { "epoch": 4.058659217877095, "grad_norm": 0.6466832756996155, "learning_rate": 0.000799467787114846, "loss": 0.4035, "step": 7265 }, { "epoch": 4.059217877094972, "grad_norm": 0.7363291382789612, "learning_rate": 0.0007994397759103642, "loss": 0.3721, "step": 7266 }, { "epoch": 4.0597765363128495, "grad_norm": 0.4434452950954437, "learning_rate": 0.0007994117647058824, "loss": 0.4365, "step": 7267 }, { "epoch": 4.060335195530726, "grad_norm": 1.9777475595474243, "learning_rate": 0.0007993837535014006, "loss": 0.5794, "step": 7268 }, { "epoch": 4.0608938547486035, "grad_norm": 0.4545868933200836, "learning_rate": 0.0007993557422969188, "loss": 0.373, "step": 7269 }, { "epoch": 4.06145251396648, "grad_norm": 0.5304999351501465, "learning_rate": 0.000799327731092437, "loss": 0.5186, "step": 7270 }, { "epoch": 4.062011173184358, "grad_norm": 0.4710586369037628, "learning_rate": 0.0007992997198879553, "loss": 0.4441, "step": 7271 }, { "epoch": 4.062569832402235, "grad_norm": 0.5025726556777954, "learning_rate": 0.0007992717086834734, "loss": 0.5378, "step": 7272 }, { "epoch": 4.063128491620112, "grad_norm": 0.7614200115203857, "learning_rate": 0.0007992436974789916, "loss": 0.4685, "step": 7273 }, { "epoch": 4.063687150837989, "grad_norm": 0.6292237043380737, "learning_rate": 0.0007992156862745098, "loss": 0.5527, "step": 7274 }, { "epoch": 4.064245810055866, "grad_norm": 0.5824453234672546, "learning_rate": 0.000799187675070028, "loss": 0.4934, "step": 7275 }, { "epoch": 4.064804469273743, "grad_norm": 0.43286553025245667, "learning_rate": 0.0007991596638655463, "loss": 0.357, "step": 7276 }, { "epoch": 4.0653631284916205, "grad_norm": 0.6792674660682678, "learning_rate": 0.0007991316526610644, "loss": 0.4923, "step": 7277 }, { "epoch": 4.065921787709497, "grad_norm": 0.4438602924346924, "learning_rate": 0.0007991036414565826, "loss": 0.4477, "step": 7278 }, { "epoch": 4.0664804469273745, "grad_norm": 0.47576025128364563, "learning_rate": 0.0007990756302521008, "loss": 0.5118, "step": 7279 }, { "epoch": 4.067039106145251, "grad_norm": 0.6798005104064941, "learning_rate": 0.000799047619047619, "loss": 0.448, "step": 7280 }, { "epoch": 4.067597765363129, "grad_norm": 0.5426568984985352, "learning_rate": 0.0007990196078431374, "loss": 0.3574, "step": 7281 }, { "epoch": 4.068156424581006, "grad_norm": 0.5556155443191528, "learning_rate": 0.0007989915966386555, "loss": 0.4929, "step": 7282 }, { "epoch": 4.068715083798883, "grad_norm": 0.5068587064743042, "learning_rate": 0.0007989635854341737, "loss": 0.4859, "step": 7283 }, { "epoch": 4.06927374301676, "grad_norm": 0.6078105568885803, "learning_rate": 0.0007989355742296919, "loss": 0.4158, "step": 7284 }, { "epoch": 4.069832402234637, "grad_norm": 2.5254971981048584, "learning_rate": 0.0007989075630252101, "loss": 0.3766, "step": 7285 }, { "epoch": 4.070391061452514, "grad_norm": 0.4751959443092346, "learning_rate": 0.0007988795518207284, "loss": 0.4775, "step": 7286 }, { "epoch": 4.070949720670391, "grad_norm": 0.8404338955879211, "learning_rate": 0.0007988515406162466, "loss": 0.4173, "step": 7287 }, { "epoch": 4.071508379888268, "grad_norm": 0.4527067244052887, "learning_rate": 0.0007988235294117647, "loss": 0.3846, "step": 7288 }, { "epoch": 4.0720670391061455, "grad_norm": 0.5585878491401672, "learning_rate": 0.0007987955182072829, "loss": 0.4817, "step": 7289 }, { "epoch": 4.072625698324022, "grad_norm": 0.543225884437561, "learning_rate": 0.0007987675070028011, "loss": 0.4698, "step": 7290 }, { "epoch": 4.0731843575419, "grad_norm": 0.6706470847129822, "learning_rate": 0.0007987394957983194, "loss": 0.5246, "step": 7291 }, { "epoch": 4.073743016759776, "grad_norm": 0.5255739092826843, "learning_rate": 0.0007987114845938376, "loss": 0.5613, "step": 7292 }, { "epoch": 4.074301675977654, "grad_norm": 0.8166330456733704, "learning_rate": 0.0007986834733893557, "loss": 0.5181, "step": 7293 }, { "epoch": 4.074860335195531, "grad_norm": 0.4089374542236328, "learning_rate": 0.0007986554621848739, "loss": 0.4353, "step": 7294 }, { "epoch": 4.075418994413408, "grad_norm": 0.43580949306488037, "learning_rate": 0.0007986274509803921, "loss": 0.4298, "step": 7295 }, { "epoch": 4.075977653631285, "grad_norm": 0.5619518756866455, "learning_rate": 0.0007985994397759104, "loss": 0.4126, "step": 7296 }, { "epoch": 4.076536312849162, "grad_norm": 0.6587222814559937, "learning_rate": 0.0007985714285714287, "loss": 0.5342, "step": 7297 }, { "epoch": 4.077094972067039, "grad_norm": 0.7862265706062317, "learning_rate": 0.0007985434173669468, "loss": 0.4177, "step": 7298 }, { "epoch": 4.0776536312849165, "grad_norm": 0.4547424018383026, "learning_rate": 0.000798515406162465, "loss": 0.4669, "step": 7299 }, { "epoch": 4.078212290502793, "grad_norm": 0.5724118947982788, "learning_rate": 0.0007984873949579832, "loss": 0.5051, "step": 7300 }, { "epoch": 4.078770949720671, "grad_norm": 0.41616714000701904, "learning_rate": 0.0007984593837535015, "loss": 0.4388, "step": 7301 }, { "epoch": 4.079329608938547, "grad_norm": 0.5706039071083069, "learning_rate": 0.0007984313725490197, "loss": 0.4561, "step": 7302 }, { "epoch": 4.079888268156425, "grad_norm": 0.8279286623001099, "learning_rate": 0.0007984033613445379, "loss": 0.4807, "step": 7303 }, { "epoch": 4.080446927374302, "grad_norm": 0.5855101943016052, "learning_rate": 0.000798375350140056, "loss": 0.4687, "step": 7304 }, { "epoch": 4.081005586592179, "grad_norm": 0.4317261576652527, "learning_rate": 0.0007983473389355742, "loss": 0.5011, "step": 7305 }, { "epoch": 4.081564245810056, "grad_norm": 0.535336971282959, "learning_rate": 0.0007983193277310925, "loss": 0.5249, "step": 7306 }, { "epoch": 4.082122905027933, "grad_norm": 0.6459070444107056, "learning_rate": 0.0007982913165266107, "loss": 0.4623, "step": 7307 }, { "epoch": 4.08268156424581, "grad_norm": 0.37297797203063965, "learning_rate": 0.0007982633053221289, "loss": 0.4072, "step": 7308 }, { "epoch": 4.0832402234636875, "grad_norm": 0.48094600439071655, "learning_rate": 0.000798235294117647, "loss": 0.4192, "step": 7309 }, { "epoch": 4.083798882681564, "grad_norm": 0.5292118191719055, "learning_rate": 0.0007982072829131652, "loss": 0.4138, "step": 7310 }, { "epoch": 4.084357541899442, "grad_norm": 0.4719665050506592, "learning_rate": 0.0007981792717086835, "loss": 0.3865, "step": 7311 }, { "epoch": 4.084916201117318, "grad_norm": 0.4022238850593567, "learning_rate": 0.0007981512605042017, "loss": 0.3987, "step": 7312 }, { "epoch": 4.085474860335196, "grad_norm": 0.38969457149505615, "learning_rate": 0.00079812324929972, "loss": 0.5041, "step": 7313 }, { "epoch": 4.086033519553073, "grad_norm": 0.6393736600875854, "learning_rate": 0.000798095238095238, "loss": 0.4858, "step": 7314 }, { "epoch": 4.08659217877095, "grad_norm": 0.4996229410171509, "learning_rate": 0.0007980672268907563, "loss": 0.4383, "step": 7315 }, { "epoch": 4.087150837988827, "grad_norm": 0.9065155386924744, "learning_rate": 0.0007980392156862746, "loss": 0.6391, "step": 7316 }, { "epoch": 4.087709497206704, "grad_norm": 0.49954017996788025, "learning_rate": 0.0007980112044817928, "loss": 0.5345, "step": 7317 }, { "epoch": 4.088268156424581, "grad_norm": 0.4392645061016083, "learning_rate": 0.000797983193277311, "loss": 0.4522, "step": 7318 }, { "epoch": 4.0888268156424585, "grad_norm": 0.8859760165214539, "learning_rate": 0.0007979551820728292, "loss": 0.3613, "step": 7319 }, { "epoch": 4.089385474860335, "grad_norm": 0.6901087760925293, "learning_rate": 0.0007979271708683473, "loss": 0.428, "step": 7320 }, { "epoch": 4.089944134078213, "grad_norm": 0.44879892468452454, "learning_rate": 0.0007978991596638656, "loss": 0.3359, "step": 7321 }, { "epoch": 4.090502793296089, "grad_norm": 0.9341223239898682, "learning_rate": 0.0007978711484593838, "loss": 0.4614, "step": 7322 }, { "epoch": 4.091061452513967, "grad_norm": 1.3537898063659668, "learning_rate": 0.000797843137254902, "loss": 0.423, "step": 7323 }, { "epoch": 4.091620111731843, "grad_norm": 0.5254029035568237, "learning_rate": 0.0007978151260504202, "loss": 0.4986, "step": 7324 }, { "epoch": 4.092178770949721, "grad_norm": 0.5618382692337036, "learning_rate": 0.0007977871148459383, "loss": 0.6393, "step": 7325 }, { "epoch": 4.092737430167598, "grad_norm": 0.6128014922142029, "learning_rate": 0.0007977591036414566, "loss": 0.471, "step": 7326 }, { "epoch": 4.093296089385475, "grad_norm": 0.46432483196258545, "learning_rate": 0.0007977310924369748, "loss": 0.3646, "step": 7327 }, { "epoch": 4.093854748603352, "grad_norm": 0.5539751648902893, "learning_rate": 0.000797703081232493, "loss": 0.5108, "step": 7328 }, { "epoch": 4.094413407821229, "grad_norm": 1.7766703367233276, "learning_rate": 0.0007976750700280112, "loss": 0.7217, "step": 7329 }, { "epoch": 4.094972067039106, "grad_norm": 1.1616451740264893, "learning_rate": 0.0007976470588235293, "loss": 0.554, "step": 7330 }, { "epoch": 4.0955307262569836, "grad_norm": 0.5152605772018433, "learning_rate": 0.0007976190476190477, "loss": 0.3991, "step": 7331 }, { "epoch": 4.09608938547486, "grad_norm": 2.1343116760253906, "learning_rate": 0.0007975910364145659, "loss": 0.3697, "step": 7332 }, { "epoch": 4.096648044692738, "grad_norm": 1.0786422491073608, "learning_rate": 0.0007975630252100841, "loss": 0.4623, "step": 7333 }, { "epoch": 4.097206703910614, "grad_norm": 0.6544681787490845, "learning_rate": 0.0007975350140056023, "loss": 0.4705, "step": 7334 }, { "epoch": 4.097765363128492, "grad_norm": 0.5635467767715454, "learning_rate": 0.0007975070028011205, "loss": 0.438, "step": 7335 }, { "epoch": 4.098324022346369, "grad_norm": 1.2926359176635742, "learning_rate": 0.0007974789915966387, "loss": 0.4715, "step": 7336 }, { "epoch": 4.098882681564246, "grad_norm": 0.9510411620140076, "learning_rate": 0.0007974509803921569, "loss": 0.5287, "step": 7337 }, { "epoch": 4.099441340782123, "grad_norm": 0.5677906274795532, "learning_rate": 0.0007974229691876751, "loss": 0.5935, "step": 7338 }, { "epoch": 4.1, "grad_norm": 1.177069902420044, "learning_rate": 0.0007973949579831933, "loss": 0.5339, "step": 7339 }, { "epoch": 4.100558659217877, "grad_norm": 0.5134732723236084, "learning_rate": 0.0007973669467787115, "loss": 0.4403, "step": 7340 }, { "epoch": 4.1011173184357546, "grad_norm": 1.2201228141784668, "learning_rate": 0.0007973389355742297, "loss": 0.446, "step": 7341 }, { "epoch": 4.101675977653631, "grad_norm": 0.4552845060825348, "learning_rate": 0.0007973109243697479, "loss": 0.4449, "step": 7342 }, { "epoch": 4.102234636871509, "grad_norm": 0.5437498688697815, "learning_rate": 0.0007972829131652661, "loss": 0.3812, "step": 7343 }, { "epoch": 4.102793296089385, "grad_norm": 0.5181490778923035, "learning_rate": 0.0007972549019607843, "loss": 0.3754, "step": 7344 }, { "epoch": 4.103351955307263, "grad_norm": 0.6079907417297363, "learning_rate": 0.0007972268907563025, "loss": 0.4544, "step": 7345 }, { "epoch": 4.10391061452514, "grad_norm": 3.3960683345794678, "learning_rate": 0.0007971988795518207, "loss": 0.5105, "step": 7346 }, { "epoch": 4.104469273743017, "grad_norm": 0.6494735479354858, "learning_rate": 0.000797170868347339, "loss": 0.4883, "step": 7347 }, { "epoch": 4.105027932960894, "grad_norm": 0.4463353157043457, "learning_rate": 0.0007971428571428572, "loss": 0.4701, "step": 7348 }, { "epoch": 4.105586592178771, "grad_norm": 0.7758894562721252, "learning_rate": 0.0007971148459383754, "loss": 0.4728, "step": 7349 }, { "epoch": 4.106145251396648, "grad_norm": 0.394568532705307, "learning_rate": 0.0007970868347338936, "loss": 0.3183, "step": 7350 }, { "epoch": 4.1067039106145256, "grad_norm": 0.441532701253891, "learning_rate": 0.0007970588235294119, "loss": 0.5152, "step": 7351 }, { "epoch": 4.107262569832402, "grad_norm": 0.4544018507003784, "learning_rate": 0.00079703081232493, "loss": 0.5499, "step": 7352 }, { "epoch": 4.10782122905028, "grad_norm": 0.5740039944648743, "learning_rate": 0.0007970028011204482, "loss": 0.4844, "step": 7353 }, { "epoch": 4.108379888268156, "grad_norm": 0.7321734428405762, "learning_rate": 0.0007969747899159664, "loss": 0.5316, "step": 7354 }, { "epoch": 4.108938547486034, "grad_norm": 0.5580518245697021, "learning_rate": 0.0007969467787114846, "loss": 0.4511, "step": 7355 }, { "epoch": 4.10949720670391, "grad_norm": 0.5594097971916199, "learning_rate": 0.0007969187675070029, "loss": 0.6818, "step": 7356 }, { "epoch": 4.110055865921788, "grad_norm": 0.6603025794029236, "learning_rate": 0.000796890756302521, "loss": 0.4692, "step": 7357 }, { "epoch": 4.110614525139665, "grad_norm": 0.5391709804534912, "learning_rate": 0.0007968627450980392, "loss": 0.3589, "step": 7358 }, { "epoch": 4.111173184357542, "grad_norm": 1.7947614192962646, "learning_rate": 0.0007968347338935574, "loss": 0.5526, "step": 7359 }, { "epoch": 4.111731843575419, "grad_norm": 0.5025770664215088, "learning_rate": 0.0007968067226890756, "loss": 0.5131, "step": 7360 }, { "epoch": 4.112290502793296, "grad_norm": 0.7856631278991699, "learning_rate": 0.0007967787114845938, "loss": 0.5122, "step": 7361 }, { "epoch": 4.112849162011173, "grad_norm": 0.5644674897193909, "learning_rate": 0.000796750700280112, "loss": 0.4929, "step": 7362 }, { "epoch": 4.113407821229051, "grad_norm": 0.6875373125076294, "learning_rate": 0.0007967226890756302, "loss": 0.3594, "step": 7363 }, { "epoch": 4.113966480446927, "grad_norm": 0.5161254405975342, "learning_rate": 0.0007966946778711485, "loss": 0.5745, "step": 7364 }, { "epoch": 4.114525139664805, "grad_norm": 0.9543882012367249, "learning_rate": 0.0007966666666666667, "loss": 0.4865, "step": 7365 }, { "epoch": 4.115083798882681, "grad_norm": 0.9078003764152527, "learning_rate": 0.0007966386554621849, "loss": 0.4281, "step": 7366 }, { "epoch": 4.115642458100559, "grad_norm": 0.7967604994773865, "learning_rate": 0.0007966106442577032, "loss": 0.5012, "step": 7367 }, { "epoch": 4.116201117318436, "grad_norm": 0.8606827855110168, "learning_rate": 0.0007965826330532213, "loss": 0.4875, "step": 7368 }, { "epoch": 4.116759776536313, "grad_norm": 0.47089481353759766, "learning_rate": 0.0007965546218487395, "loss": 0.4383, "step": 7369 }, { "epoch": 4.11731843575419, "grad_norm": 0.548164427280426, "learning_rate": 0.0007965266106442577, "loss": 0.4271, "step": 7370 }, { "epoch": 4.117877094972067, "grad_norm": 1.3291031122207642, "learning_rate": 0.0007964985994397759, "loss": 0.5259, "step": 7371 }, { "epoch": 4.118435754189944, "grad_norm": 0.7199836373329163, "learning_rate": 0.0007964705882352942, "loss": 0.6184, "step": 7372 }, { "epoch": 4.118994413407822, "grad_norm": 0.5368474125862122, "learning_rate": 0.0007964425770308123, "loss": 0.4822, "step": 7373 }, { "epoch": 4.119553072625698, "grad_norm": 0.6486384868621826, "learning_rate": 0.0007964145658263305, "loss": 0.4145, "step": 7374 }, { "epoch": 4.120111731843576, "grad_norm": 0.5339905619621277, "learning_rate": 0.0007963865546218487, "loss": 0.4962, "step": 7375 }, { "epoch": 4.120670391061452, "grad_norm": 0.514297366142273, "learning_rate": 0.0007963585434173669, "loss": 0.4312, "step": 7376 }, { "epoch": 4.12122905027933, "grad_norm": 0.96578049659729, "learning_rate": 0.0007963305322128852, "loss": 0.4341, "step": 7377 }, { "epoch": 4.121787709497207, "grad_norm": 0.557553768157959, "learning_rate": 0.0007963025210084033, "loss": 0.4634, "step": 7378 }, { "epoch": 4.122346368715084, "grad_norm": 0.4616701304912567, "learning_rate": 0.0007962745098039215, "loss": 0.4096, "step": 7379 }, { "epoch": 4.122905027932961, "grad_norm": 0.5880672335624695, "learning_rate": 0.0007962464985994398, "loss": 0.4552, "step": 7380 }, { "epoch": 4.123463687150838, "grad_norm": 0.976950466632843, "learning_rate": 0.000796218487394958, "loss": 0.7099, "step": 7381 }, { "epoch": 4.124022346368715, "grad_norm": 0.8104521632194519, "learning_rate": 0.0007961904761904763, "loss": 0.5674, "step": 7382 }, { "epoch": 4.124581005586593, "grad_norm": 0.49486520886421204, "learning_rate": 0.0007961624649859945, "loss": 0.42, "step": 7383 }, { "epoch": 4.125139664804469, "grad_norm": 2.083012104034424, "learning_rate": 0.0007961344537815126, "loss": 0.4395, "step": 7384 }, { "epoch": 4.125698324022347, "grad_norm": 0.9928786754608154, "learning_rate": 0.0007961064425770308, "loss": 0.3935, "step": 7385 }, { "epoch": 4.126256983240223, "grad_norm": 0.7612307667732239, "learning_rate": 0.000796078431372549, "loss": 0.3452, "step": 7386 }, { "epoch": 4.126815642458101, "grad_norm": 0.45569178462028503, "learning_rate": 0.0007960504201680673, "loss": 0.4506, "step": 7387 }, { "epoch": 4.127374301675978, "grad_norm": 1.1247708797454834, "learning_rate": 0.0007960224089635855, "loss": 0.4767, "step": 7388 }, { "epoch": 4.127932960893855, "grad_norm": 0.6095284819602966, "learning_rate": 0.0007959943977591036, "loss": 0.4221, "step": 7389 }, { "epoch": 4.128491620111732, "grad_norm": 0.7278775572776794, "learning_rate": 0.0007959663865546218, "loss": 0.4723, "step": 7390 }, { "epoch": 4.129050279329609, "grad_norm": 0.4910142123699188, "learning_rate": 0.00079593837535014, "loss": 0.4914, "step": 7391 }, { "epoch": 4.129608938547486, "grad_norm": 0.538343071937561, "learning_rate": 0.0007959103641456583, "loss": 0.4726, "step": 7392 }, { "epoch": 4.130167597765363, "grad_norm": 1.9881123304367065, "learning_rate": 0.0007958823529411765, "loss": 0.4276, "step": 7393 }, { "epoch": 4.13072625698324, "grad_norm": 0.47892946004867554, "learning_rate": 0.0007958543417366946, "loss": 0.4023, "step": 7394 }, { "epoch": 4.131284916201118, "grad_norm": 0.468246728181839, "learning_rate": 0.0007958263305322128, "loss": 0.4374, "step": 7395 }, { "epoch": 4.131843575418994, "grad_norm": 0.4916151762008667, "learning_rate": 0.000795798319327731, "loss": 0.4503, "step": 7396 }, { "epoch": 4.132402234636872, "grad_norm": 0.7064822316169739, "learning_rate": 0.0007957703081232494, "loss": 0.5386, "step": 7397 }, { "epoch": 4.132960893854748, "grad_norm": 3.0665736198425293, "learning_rate": 0.0007957422969187676, "loss": 0.5213, "step": 7398 }, { "epoch": 4.133519553072626, "grad_norm": 0.5969092845916748, "learning_rate": 0.0007957142857142858, "loss": 0.6016, "step": 7399 }, { "epoch": 4.134078212290503, "grad_norm": 0.8750990629196167, "learning_rate": 0.0007956862745098039, "loss": 0.3964, "step": 7400 }, { "epoch": 4.13463687150838, "grad_norm": 1.998945951461792, "learning_rate": 0.0007956582633053221, "loss": 0.4764, "step": 7401 }, { "epoch": 4.135195530726257, "grad_norm": 1.0746474266052246, "learning_rate": 0.0007956302521008404, "loss": 0.4393, "step": 7402 }, { "epoch": 4.135754189944134, "grad_norm": 0.5045647621154785, "learning_rate": 0.0007956022408963586, "loss": 0.4711, "step": 7403 }, { "epoch": 4.136312849162011, "grad_norm": 0.4553874731063843, "learning_rate": 0.0007955742296918768, "loss": 0.4447, "step": 7404 }, { "epoch": 4.136871508379889, "grad_norm": 0.4946921467781067, "learning_rate": 0.0007955462184873949, "loss": 0.3933, "step": 7405 }, { "epoch": 4.137430167597765, "grad_norm": 0.6165663003921509, "learning_rate": 0.0007955182072829131, "loss": 0.4334, "step": 7406 }, { "epoch": 4.137988826815643, "grad_norm": 0.46721312403678894, "learning_rate": 0.0007954901960784314, "loss": 0.4439, "step": 7407 }, { "epoch": 4.138547486033519, "grad_norm": 0.686400294303894, "learning_rate": 0.0007954621848739496, "loss": 0.4862, "step": 7408 }, { "epoch": 4.139106145251397, "grad_norm": 1.278274416923523, "learning_rate": 0.0007954341736694678, "loss": 0.5088, "step": 7409 }, { "epoch": 4.139664804469274, "grad_norm": 0.7448779940605164, "learning_rate": 0.0007954061624649859, "loss": 0.5026, "step": 7410 }, { "epoch": 4.140223463687151, "grad_norm": 2.245076894760132, "learning_rate": 0.0007953781512605041, "loss": 0.4031, "step": 7411 }, { "epoch": 4.140782122905028, "grad_norm": 0.8871789574623108, "learning_rate": 0.0007953501400560225, "loss": 0.5067, "step": 7412 }, { "epoch": 4.141340782122905, "grad_norm": 1.5357602834701538, "learning_rate": 0.0007953221288515407, "loss": 0.4572, "step": 7413 }, { "epoch": 4.141899441340782, "grad_norm": 1.0998822450637817, "learning_rate": 0.0007952941176470589, "loss": 0.4849, "step": 7414 }, { "epoch": 4.14245810055866, "grad_norm": 0.7770353555679321, "learning_rate": 0.0007952661064425771, "loss": 0.5473, "step": 7415 }, { "epoch": 4.143016759776536, "grad_norm": 0.3788093030452728, "learning_rate": 0.0007952380952380952, "loss": 0.3726, "step": 7416 }, { "epoch": 4.143575418994414, "grad_norm": 0.7680646777153015, "learning_rate": 0.0007952100840336135, "loss": 0.4861, "step": 7417 }, { "epoch": 4.14413407821229, "grad_norm": 0.8892258405685425, "learning_rate": 0.0007951820728291317, "loss": 0.5427, "step": 7418 }, { "epoch": 4.144692737430168, "grad_norm": 3.658684015274048, "learning_rate": 0.0007951540616246499, "loss": 0.4733, "step": 7419 }, { "epoch": 4.145251396648045, "grad_norm": 0.9445491433143616, "learning_rate": 0.0007951260504201681, "loss": 0.3908, "step": 7420 }, { "epoch": 4.145810055865922, "grad_norm": 0.44519010186195374, "learning_rate": 0.0007950980392156862, "loss": 0.5001, "step": 7421 }, { "epoch": 4.146368715083799, "grad_norm": 1.0370937585830688, "learning_rate": 0.0007950700280112045, "loss": 0.5082, "step": 7422 }, { "epoch": 4.146927374301676, "grad_norm": 0.6180882453918457, "learning_rate": 0.0007950420168067227, "loss": 0.4369, "step": 7423 }, { "epoch": 4.147486033519553, "grad_norm": 1.2745493650436401, "learning_rate": 0.0007950140056022409, "loss": 0.4133, "step": 7424 }, { "epoch": 4.148044692737431, "grad_norm": 0.6371618509292603, "learning_rate": 0.0007949859943977591, "loss": 0.6168, "step": 7425 }, { "epoch": 4.148603351955307, "grad_norm": 0.5083094239234924, "learning_rate": 0.0007949579831932772, "loss": 0.5119, "step": 7426 }, { "epoch": 4.149162011173185, "grad_norm": 1.522736668586731, "learning_rate": 0.0007949299719887955, "loss": 0.8669, "step": 7427 }, { "epoch": 4.149720670391061, "grad_norm": 0.6432474851608276, "learning_rate": 0.0007949019607843137, "loss": 0.4847, "step": 7428 }, { "epoch": 4.150279329608939, "grad_norm": 0.6489638090133667, "learning_rate": 0.000794873949579832, "loss": 0.4549, "step": 7429 }, { "epoch": 4.150837988826815, "grad_norm": 0.603775143623352, "learning_rate": 0.0007948459383753502, "loss": 0.7055, "step": 7430 }, { "epoch": 4.151396648044693, "grad_norm": 0.5119211077690125, "learning_rate": 0.0007948179271708684, "loss": 0.4463, "step": 7431 }, { "epoch": 4.15195530726257, "grad_norm": 0.3933148980140686, "learning_rate": 0.0007947899159663866, "loss": 0.4412, "step": 7432 }, { "epoch": 4.152513966480447, "grad_norm": 0.7747071385383606, "learning_rate": 0.0007947619047619048, "loss": 0.4774, "step": 7433 }, { "epoch": 4.153072625698324, "grad_norm": 0.5461366772651672, "learning_rate": 0.000794733893557423, "loss": 0.542, "step": 7434 }, { "epoch": 4.153631284916201, "grad_norm": 0.7179622650146484, "learning_rate": 0.0007947058823529412, "loss": 0.6526, "step": 7435 }, { "epoch": 4.154189944134078, "grad_norm": 0.5717280507087708, "learning_rate": 0.0007946778711484594, "loss": 0.4339, "step": 7436 }, { "epoch": 4.154748603351956, "grad_norm": 1.7125846147537231, "learning_rate": 0.0007946498599439776, "loss": 0.5721, "step": 7437 }, { "epoch": 4.155307262569832, "grad_norm": 1.3465338945388794, "learning_rate": 0.0007946218487394958, "loss": 0.4633, "step": 7438 }, { "epoch": 4.15586592178771, "grad_norm": 0.47714072465896606, "learning_rate": 0.000794593837535014, "loss": 0.4016, "step": 7439 }, { "epoch": 4.156424581005586, "grad_norm": 0.6180019974708557, "learning_rate": 0.0007945658263305322, "loss": 0.4955, "step": 7440 }, { "epoch": 4.156983240223464, "grad_norm": 1.0105268955230713, "learning_rate": 0.0007945378151260504, "loss": 0.4447, "step": 7441 }, { "epoch": 4.157541899441341, "grad_norm": 1.1082491874694824, "learning_rate": 0.0007945098039215687, "loss": 0.5251, "step": 7442 }, { "epoch": 4.158100558659218, "grad_norm": 1.6552913188934326, "learning_rate": 0.0007944817927170868, "loss": 0.4858, "step": 7443 }, { "epoch": 4.158659217877095, "grad_norm": 2.7462158203125, "learning_rate": 0.000794453781512605, "loss": 0.4481, "step": 7444 }, { "epoch": 4.159217877094972, "grad_norm": 0.5992055535316467, "learning_rate": 0.0007944257703081232, "loss": 0.4226, "step": 7445 }, { "epoch": 4.159776536312849, "grad_norm": 0.5990080833435059, "learning_rate": 0.0007943977591036415, "loss": 0.436, "step": 7446 }, { "epoch": 4.160335195530727, "grad_norm": 0.5530053377151489, "learning_rate": 0.0007943697478991598, "loss": 0.4323, "step": 7447 }, { "epoch": 4.160893854748603, "grad_norm": 0.4704790711402893, "learning_rate": 0.0007943417366946779, "loss": 0.4573, "step": 7448 }, { "epoch": 4.161452513966481, "grad_norm": 0.5873286724090576, "learning_rate": 0.0007943137254901961, "loss": 0.4024, "step": 7449 }, { "epoch": 4.162011173184357, "grad_norm": 5.973817825317383, "learning_rate": 0.0007942857142857143, "loss": 0.4604, "step": 7450 }, { "epoch": 4.162569832402235, "grad_norm": 0.5752110481262207, "learning_rate": 0.0007942577030812325, "loss": 0.3965, "step": 7451 }, { "epoch": 4.163128491620112, "grad_norm": 0.4331759810447693, "learning_rate": 0.0007942296918767508, "loss": 0.4327, "step": 7452 }, { "epoch": 4.163687150837989, "grad_norm": 0.749707043170929, "learning_rate": 0.0007942016806722689, "loss": 0.5496, "step": 7453 }, { "epoch": 4.164245810055866, "grad_norm": 0.5506083965301514, "learning_rate": 0.0007941736694677871, "loss": 0.701, "step": 7454 }, { "epoch": 4.164804469273743, "grad_norm": 0.7257410287857056, "learning_rate": 0.0007941456582633053, "loss": 0.5343, "step": 7455 }, { "epoch": 4.16536312849162, "grad_norm": 11.58082103729248, "learning_rate": 0.0007941176470588235, "loss": 0.6011, "step": 7456 }, { "epoch": 4.165921787709498, "grad_norm": 0.7507224082946777, "learning_rate": 0.0007940896358543418, "loss": 0.3415, "step": 7457 }, { "epoch": 4.166480446927374, "grad_norm": 0.7059439420700073, "learning_rate": 0.00079406162464986, "loss": 0.4563, "step": 7458 }, { "epoch": 4.167039106145252, "grad_norm": 0.4393032193183899, "learning_rate": 0.0007940336134453781, "loss": 0.5158, "step": 7459 }, { "epoch": 4.167597765363128, "grad_norm": 0.7159056663513184, "learning_rate": 0.0007940056022408963, "loss": 0.4362, "step": 7460 }, { "epoch": 4.168156424581006, "grad_norm": 0.481918603181839, "learning_rate": 0.0007939775910364145, "loss": 0.3555, "step": 7461 }, { "epoch": 4.168715083798883, "grad_norm": 0.5716583728790283, "learning_rate": 0.0007939495798319329, "loss": 0.4048, "step": 7462 }, { "epoch": 4.16927374301676, "grad_norm": 0.5609585046768188, "learning_rate": 0.0007939215686274511, "loss": 0.5613, "step": 7463 }, { "epoch": 4.169832402234637, "grad_norm": 0.528323769569397, "learning_rate": 0.0007938935574229692, "loss": 0.3869, "step": 7464 }, { "epoch": 4.170391061452514, "grad_norm": 0.45341894030570984, "learning_rate": 0.0007938655462184874, "loss": 0.4711, "step": 7465 }, { "epoch": 4.170949720670391, "grad_norm": 0.5705549716949463, "learning_rate": 0.0007938375350140056, "loss": 0.4762, "step": 7466 }, { "epoch": 4.171508379888268, "grad_norm": 0.5936793088912964, "learning_rate": 0.0007938095238095239, "loss": 0.3704, "step": 7467 }, { "epoch": 4.172067039106145, "grad_norm": 0.4085972309112549, "learning_rate": 0.0007937815126050421, "loss": 0.4892, "step": 7468 }, { "epoch": 4.172625698324023, "grad_norm": 0.5726138949394226, "learning_rate": 0.0007937535014005602, "loss": 0.4411, "step": 7469 }, { "epoch": 4.173184357541899, "grad_norm": 0.38425758481025696, "learning_rate": 0.0007937254901960784, "loss": 0.4204, "step": 7470 }, { "epoch": 4.173743016759777, "grad_norm": 0.4055267572402954, "learning_rate": 0.0007936974789915966, "loss": 0.4325, "step": 7471 }, { "epoch": 4.174301675977653, "grad_norm": 2.616147756576538, "learning_rate": 0.0007936694677871149, "loss": 0.5505, "step": 7472 }, { "epoch": 4.174860335195531, "grad_norm": 0.8516884446144104, "learning_rate": 0.0007936414565826331, "loss": 0.3946, "step": 7473 }, { "epoch": 4.175418994413408, "grad_norm": 2.598735809326172, "learning_rate": 0.0007936134453781513, "loss": 0.347, "step": 7474 }, { "epoch": 4.175977653631285, "grad_norm": 0.6081153154373169, "learning_rate": 0.0007935854341736694, "loss": 0.5275, "step": 7475 }, { "epoch": 4.176536312849162, "grad_norm": 0.6708801984786987, "learning_rate": 0.0007935574229691876, "loss": 0.5353, "step": 7476 }, { "epoch": 4.177094972067039, "grad_norm": 0.4806496202945709, "learning_rate": 0.000793529411764706, "loss": 0.4697, "step": 7477 }, { "epoch": 4.177653631284916, "grad_norm": 1.1978561878204346, "learning_rate": 0.0007935014005602242, "loss": 0.5459, "step": 7478 }, { "epoch": 4.178212290502794, "grad_norm": 0.4890676438808441, "learning_rate": 0.0007934733893557424, "loss": 0.4394, "step": 7479 }, { "epoch": 4.17877094972067, "grad_norm": 0.6475544571876526, "learning_rate": 0.0007934453781512605, "loss": 0.4209, "step": 7480 }, { "epoch": 4.179329608938548, "grad_norm": 0.4206632673740387, "learning_rate": 0.0007934173669467787, "loss": 0.3716, "step": 7481 }, { "epoch": 4.179888268156424, "grad_norm": 0.4854796230792999, "learning_rate": 0.000793389355742297, "loss": 0.3513, "step": 7482 }, { "epoch": 4.180446927374302, "grad_norm": 0.5116716027259827, "learning_rate": 0.0007933613445378152, "loss": 0.4828, "step": 7483 }, { "epoch": 4.181005586592179, "grad_norm": 0.44999393820762634, "learning_rate": 0.0007933333333333334, "loss": 0.4766, "step": 7484 }, { "epoch": 4.181564245810056, "grad_norm": 0.48781394958496094, "learning_rate": 0.0007933053221288515, "loss": 0.4577, "step": 7485 }, { "epoch": 4.182122905027933, "grad_norm": 0.5214735269546509, "learning_rate": 0.0007932773109243697, "loss": 0.4235, "step": 7486 }, { "epoch": 4.18268156424581, "grad_norm": 0.5006760358810425, "learning_rate": 0.000793249299719888, "loss": 0.5185, "step": 7487 }, { "epoch": 4.183240223463687, "grad_norm": 0.9785987734794617, "learning_rate": 0.0007932212885154062, "loss": 0.4112, "step": 7488 }, { "epoch": 4.183798882681565, "grad_norm": 0.6362124681472778, "learning_rate": 0.0007931932773109244, "loss": 0.5654, "step": 7489 }, { "epoch": 4.184357541899441, "grad_norm": 0.4772784113883972, "learning_rate": 0.0007931652661064426, "loss": 0.4175, "step": 7490 }, { "epoch": 4.184916201117319, "grad_norm": 0.4467301368713379, "learning_rate": 0.0007931372549019607, "loss": 0.4848, "step": 7491 }, { "epoch": 4.185474860335195, "grad_norm": 0.6502392292022705, "learning_rate": 0.000793109243697479, "loss": 0.5171, "step": 7492 }, { "epoch": 4.186033519553073, "grad_norm": 0.48810911178588867, "learning_rate": 0.0007930812324929972, "loss": 0.4794, "step": 7493 }, { "epoch": 4.18659217877095, "grad_norm": 0.573898434638977, "learning_rate": 0.0007930532212885155, "loss": 0.3721, "step": 7494 }, { "epoch": 4.187150837988827, "grad_norm": 0.47238484025001526, "learning_rate": 0.0007930252100840337, "loss": 0.5291, "step": 7495 }, { "epoch": 4.187709497206704, "grad_norm": 0.5235174894332886, "learning_rate": 0.0007929971988795518, "loss": 0.384, "step": 7496 }, { "epoch": 4.188268156424581, "grad_norm": 0.44877365231513977, "learning_rate": 0.0007929691876750701, "loss": 0.4032, "step": 7497 }, { "epoch": 4.188826815642458, "grad_norm": 0.4492464065551758, "learning_rate": 0.0007929411764705883, "loss": 0.3234, "step": 7498 }, { "epoch": 4.189385474860336, "grad_norm": 0.5851590037345886, "learning_rate": 0.0007929131652661065, "loss": 0.4365, "step": 7499 }, { "epoch": 4.189944134078212, "grad_norm": 0.6477049589157104, "learning_rate": 0.0007928851540616247, "loss": 0.5514, "step": 7500 }, { "epoch": 4.189944134078212, "eval_cer": 0.09442680545099451, "eval_loss": 0.354936420917511, "eval_runtime": 57.7153, "eval_samples_per_second": 78.627, "eval_steps_per_second": 4.921, "eval_wer": 0.37376050948297534, "step": 7500 }, { "epoch": 4.19050279329609, "grad_norm": 0.5460861921310425, "learning_rate": 0.0007928571428571428, "loss": 0.5029, "step": 7501 }, { "epoch": 4.191061452513966, "grad_norm": 1.2369848489761353, "learning_rate": 0.0007928291316526611, "loss": 0.4525, "step": 7502 }, { "epoch": 4.191620111731844, "grad_norm": 0.6093453764915466, "learning_rate": 0.0007928011204481793, "loss": 0.4699, "step": 7503 }, { "epoch": 4.19217877094972, "grad_norm": 1.9455437660217285, "learning_rate": 0.0007927731092436975, "loss": 0.4312, "step": 7504 }, { "epoch": 4.192737430167598, "grad_norm": 0.47835955023765564, "learning_rate": 0.0007927450980392157, "loss": 0.4457, "step": 7505 }, { "epoch": 4.193296089385475, "grad_norm": 0.7246494293212891, "learning_rate": 0.0007927170868347339, "loss": 0.3625, "step": 7506 }, { "epoch": 4.193854748603352, "grad_norm": 0.7044689655303955, "learning_rate": 0.0007926890756302521, "loss": 0.5968, "step": 7507 }, { "epoch": 4.194413407821229, "grad_norm": 0.4945489764213562, "learning_rate": 0.0007926610644257703, "loss": 0.3848, "step": 7508 }, { "epoch": 4.194972067039106, "grad_norm": 0.45708316564559937, "learning_rate": 0.0007926330532212885, "loss": 0.4053, "step": 7509 }, { "epoch": 4.195530726256983, "grad_norm": 0.8317373991012573, "learning_rate": 0.0007926050420168067, "loss": 0.4273, "step": 7510 }, { "epoch": 4.196089385474861, "grad_norm": 0.46699386835098267, "learning_rate": 0.000792577030812325, "loss": 0.4763, "step": 7511 }, { "epoch": 4.196648044692737, "grad_norm": 1.889947533607483, "learning_rate": 0.0007925490196078432, "loss": 0.4931, "step": 7512 }, { "epoch": 4.197206703910615, "grad_norm": 0.5027772188186646, "learning_rate": 0.0007925210084033614, "loss": 0.4235, "step": 7513 }, { "epoch": 4.197765363128491, "grad_norm": 0.5661888122558594, "learning_rate": 0.0007924929971988796, "loss": 0.7281, "step": 7514 }, { "epoch": 4.198324022346369, "grad_norm": 0.5596293807029724, "learning_rate": 0.0007924649859943978, "loss": 0.5043, "step": 7515 }, { "epoch": 4.198882681564246, "grad_norm": 0.4898509085178375, "learning_rate": 0.000792436974789916, "loss": 0.3877, "step": 7516 }, { "epoch": 4.199441340782123, "grad_norm": 1.6684132814407349, "learning_rate": 0.0007924089635854342, "loss": 0.4171, "step": 7517 }, { "epoch": 4.2, "grad_norm": 0.99784255027771, "learning_rate": 0.0007923809523809524, "loss": 0.5618, "step": 7518 }, { "epoch": 4.200558659217877, "grad_norm": 0.6530553102493286, "learning_rate": 0.0007923529411764706, "loss": 0.4725, "step": 7519 }, { "epoch": 4.201117318435754, "grad_norm": 0.4599669575691223, "learning_rate": 0.0007923249299719888, "loss": 0.4406, "step": 7520 }, { "epoch": 4.201675977653632, "grad_norm": 0.6810505986213684, "learning_rate": 0.000792296918767507, "loss": 0.5319, "step": 7521 }, { "epoch": 4.202234636871508, "grad_norm": 0.7919774651527405, "learning_rate": 0.0007922689075630253, "loss": 0.5626, "step": 7522 }, { "epoch": 4.202793296089386, "grad_norm": 1.0941392183303833, "learning_rate": 0.0007922408963585434, "loss": 0.5579, "step": 7523 }, { "epoch": 4.203351955307262, "grad_norm": 0.6305790543556213, "learning_rate": 0.0007922128851540616, "loss": 0.5523, "step": 7524 }, { "epoch": 4.20391061452514, "grad_norm": 0.6114245653152466, "learning_rate": 0.0007921848739495798, "loss": 0.4681, "step": 7525 }, { "epoch": 4.204469273743017, "grad_norm": 0.8891258835792542, "learning_rate": 0.000792156862745098, "loss": 0.4285, "step": 7526 }, { "epoch": 4.205027932960894, "grad_norm": 0.6705508828163147, "learning_rate": 0.0007921288515406164, "loss": 0.7093, "step": 7527 }, { "epoch": 4.205586592178771, "grad_norm": 0.4631941318511963, "learning_rate": 0.0007921008403361345, "loss": 0.4092, "step": 7528 }, { "epoch": 4.206145251396648, "grad_norm": 0.49045324325561523, "learning_rate": 0.0007920728291316527, "loss": 0.5919, "step": 7529 }, { "epoch": 4.206703910614525, "grad_norm": 0.6780363321304321, "learning_rate": 0.0007920448179271709, "loss": 0.5094, "step": 7530 }, { "epoch": 4.207262569832403, "grad_norm": 0.5109512209892273, "learning_rate": 0.0007920168067226891, "loss": 0.4773, "step": 7531 }, { "epoch": 4.207821229050279, "grad_norm": 0.5376586318016052, "learning_rate": 0.0007919887955182074, "loss": 0.5607, "step": 7532 }, { "epoch": 4.208379888268157, "grad_norm": 0.5449132323265076, "learning_rate": 0.0007919607843137255, "loss": 0.4484, "step": 7533 }, { "epoch": 4.208938547486033, "grad_norm": 0.5536893010139465, "learning_rate": 0.0007919327731092437, "loss": 0.4407, "step": 7534 }, { "epoch": 4.209497206703911, "grad_norm": 0.6017754673957825, "learning_rate": 0.0007919047619047619, "loss": 0.416, "step": 7535 }, { "epoch": 4.210055865921788, "grad_norm": 0.4171697497367859, "learning_rate": 0.0007918767507002801, "loss": 0.461, "step": 7536 }, { "epoch": 4.210614525139665, "grad_norm": 0.5887663960456848, "learning_rate": 0.0007918487394957984, "loss": 0.4654, "step": 7537 }, { "epoch": 4.211173184357542, "grad_norm": 3.2149410247802734, "learning_rate": 0.0007918207282913166, "loss": 0.6762, "step": 7538 }, { "epoch": 4.211731843575419, "grad_norm": 0.37442272901535034, "learning_rate": 0.0007917927170868347, "loss": 0.4011, "step": 7539 }, { "epoch": 4.212290502793296, "grad_norm": 0.5815616846084595, "learning_rate": 0.0007917647058823529, "loss": 0.4366, "step": 7540 }, { "epoch": 4.212849162011173, "grad_norm": 0.8288305997848511, "learning_rate": 0.0007917366946778711, "loss": 0.4455, "step": 7541 }, { "epoch": 4.21340782122905, "grad_norm": 0.6441209316253662, "learning_rate": 0.0007917086834733894, "loss": 0.4496, "step": 7542 }, { "epoch": 4.213966480446928, "grad_norm": 0.6896035671234131, "learning_rate": 0.0007916806722689077, "loss": 0.5166, "step": 7543 }, { "epoch": 4.214525139664804, "grad_norm": 0.5576814413070679, "learning_rate": 0.0007916526610644258, "loss": 0.5518, "step": 7544 }, { "epoch": 4.215083798882682, "grad_norm": 0.4375351667404175, "learning_rate": 0.000791624649859944, "loss": 0.4281, "step": 7545 }, { "epoch": 4.215642458100558, "grad_norm": 0.8181184530258179, "learning_rate": 0.0007915966386554622, "loss": 0.6171, "step": 7546 }, { "epoch": 4.216201117318436, "grad_norm": 0.5615286827087402, "learning_rate": 0.0007915686274509805, "loss": 0.5376, "step": 7547 }, { "epoch": 4.216759776536313, "grad_norm": 0.5858601927757263, "learning_rate": 0.0007915406162464987, "loss": 0.5005, "step": 7548 }, { "epoch": 4.21731843575419, "grad_norm": 0.8927748203277588, "learning_rate": 0.0007915126050420168, "loss": 0.5056, "step": 7549 }, { "epoch": 4.217877094972067, "grad_norm": 0.4644601345062256, "learning_rate": 0.000791484593837535, "loss": 0.467, "step": 7550 }, { "epoch": 4.218435754189944, "grad_norm": 0.5699589848518372, "learning_rate": 0.0007914565826330532, "loss": 0.4393, "step": 7551 }, { "epoch": 4.218994413407821, "grad_norm": 0.6297087669372559, "learning_rate": 0.0007914285714285715, "loss": 0.3804, "step": 7552 }, { "epoch": 4.219553072625699, "grad_norm": 0.4785193204879761, "learning_rate": 0.0007914005602240897, "loss": 0.4091, "step": 7553 }, { "epoch": 4.220111731843575, "grad_norm": 0.40393710136413574, "learning_rate": 0.0007913725490196079, "loss": 0.4572, "step": 7554 }, { "epoch": 4.220670391061453, "grad_norm": 0.4363790452480316, "learning_rate": 0.000791344537815126, "loss": 0.4269, "step": 7555 }, { "epoch": 4.221229050279329, "grad_norm": 0.7848618030548096, "learning_rate": 0.0007913165266106442, "loss": 0.5952, "step": 7556 }, { "epoch": 4.221787709497207, "grad_norm": 2.397650718688965, "learning_rate": 0.0007912885154061625, "loss": 0.3947, "step": 7557 }, { "epoch": 4.222346368715084, "grad_norm": 0.982249915599823, "learning_rate": 0.0007912605042016807, "loss": 0.6712, "step": 7558 }, { "epoch": 4.222905027932961, "grad_norm": 0.3479222059249878, "learning_rate": 0.000791232492997199, "loss": 0.36, "step": 7559 }, { "epoch": 4.223463687150838, "grad_norm": 0.3779882788658142, "learning_rate": 0.000791204481792717, "loss": 0.3762, "step": 7560 }, { "epoch": 4.224022346368715, "grad_norm": Infinity, "learning_rate": 0.000791204481792717, "loss": 0.5007, "step": 7561 }, { "epoch": 4.224581005586592, "grad_norm": 0.5627475380897522, "learning_rate": 0.0007911764705882353, "loss": 0.451, "step": 7562 }, { "epoch": 4.22513966480447, "grad_norm": 2.1478681564331055, "learning_rate": 0.0007911484593837536, "loss": 0.4829, "step": 7563 }, { "epoch": 4.225698324022346, "grad_norm": 0.4308639168739319, "learning_rate": 0.0007911204481792718, "loss": 0.3444, "step": 7564 }, { "epoch": 4.226256983240224, "grad_norm": 0.551754891872406, "learning_rate": 0.00079109243697479, "loss": 0.4436, "step": 7565 }, { "epoch": 4.2268156424581, "grad_norm": 0.5118157267570496, "learning_rate": 0.0007910644257703081, "loss": 0.468, "step": 7566 }, { "epoch": 4.227374301675978, "grad_norm": 0.47258880734443665, "learning_rate": 0.0007910364145658263, "loss": 0.419, "step": 7567 }, { "epoch": 4.227932960893855, "grad_norm": 0.7057540416717529, "learning_rate": 0.0007910084033613446, "loss": 0.4491, "step": 7568 }, { "epoch": 4.228491620111732, "grad_norm": 0.6563587188720703, "learning_rate": 0.0007909803921568628, "loss": 0.5068, "step": 7569 }, { "epoch": 4.229050279329609, "grad_norm": 0.5046238899230957, "learning_rate": 0.000790952380952381, "loss": 0.4865, "step": 7570 }, { "epoch": 4.229608938547486, "grad_norm": 0.5022178888320923, "learning_rate": 0.0007909243697478992, "loss": 0.4151, "step": 7571 }, { "epoch": 4.230167597765363, "grad_norm": 0.5882951617240906, "learning_rate": 0.0007908963585434173, "loss": 0.6282, "step": 7572 }, { "epoch": 4.230726256983241, "grad_norm": 0.4815919101238251, "learning_rate": 0.0007908683473389356, "loss": 0.4525, "step": 7573 }, { "epoch": 4.231284916201117, "grad_norm": 0.5523713231086731, "learning_rate": 0.0007908403361344538, "loss": 0.4286, "step": 7574 }, { "epoch": 4.231843575418995, "grad_norm": 0.6272464990615845, "learning_rate": 0.000790812324929972, "loss": 0.4915, "step": 7575 }, { "epoch": 4.232402234636871, "grad_norm": 0.5121742486953735, "learning_rate": 0.0007907843137254902, "loss": 0.4691, "step": 7576 }, { "epoch": 4.232960893854749, "grad_norm": 0.40071049332618713, "learning_rate": 0.0007907563025210083, "loss": 0.4459, "step": 7577 }, { "epoch": 4.233519553072625, "grad_norm": 0.565533459186554, "learning_rate": 0.0007907282913165267, "loss": 0.6078, "step": 7578 }, { "epoch": 4.234078212290503, "grad_norm": 0.7031722664833069, "learning_rate": 0.0007907002801120449, "loss": 0.3592, "step": 7579 }, { "epoch": 4.23463687150838, "grad_norm": 0.5376628041267395, "learning_rate": 0.0007906722689075631, "loss": 0.4001, "step": 7580 }, { "epoch": 4.235195530726257, "grad_norm": 1.1053074598312378, "learning_rate": 0.0007906442577030813, "loss": 0.354, "step": 7581 }, { "epoch": 4.235754189944134, "grad_norm": 0.47745054960250854, "learning_rate": 0.0007906162464985994, "loss": 0.4687, "step": 7582 }, { "epoch": 4.236312849162011, "grad_norm": 0.630379319190979, "learning_rate": 0.0007905882352941176, "loss": 0.6115, "step": 7583 }, { "epoch": 4.236871508379888, "grad_norm": 0.5156353712081909, "learning_rate": 0.0007905602240896359, "loss": 0.3984, "step": 7584 }, { "epoch": 4.237430167597766, "grad_norm": 0.7615495920181274, "learning_rate": 0.0007905322128851541, "loss": 0.5273, "step": 7585 }, { "epoch": 4.237988826815642, "grad_norm": 0.5601665377616882, "learning_rate": 0.0007905042016806723, "loss": 0.4536, "step": 7586 }, { "epoch": 4.23854748603352, "grad_norm": 0.7228991389274597, "learning_rate": 0.0007904761904761905, "loss": 0.3972, "step": 7587 }, { "epoch": 4.239106145251396, "grad_norm": 0.9597086906433105, "learning_rate": 0.0007904481792717086, "loss": 0.5157, "step": 7588 }, { "epoch": 4.239664804469274, "grad_norm": 0.46680310368537903, "learning_rate": 0.0007904201680672269, "loss": 0.4702, "step": 7589 }, { "epoch": 4.240223463687151, "grad_norm": 0.5271770358085632, "learning_rate": 0.0007903921568627451, "loss": 0.4419, "step": 7590 }, { "epoch": 4.240782122905028, "grad_norm": 0.611297070980072, "learning_rate": 0.0007903641456582633, "loss": 0.4898, "step": 7591 }, { "epoch": 4.241340782122905, "grad_norm": 0.503616988658905, "learning_rate": 0.0007903361344537815, "loss": 0.455, "step": 7592 }, { "epoch": 4.241899441340782, "grad_norm": 0.42161980271339417, "learning_rate": 0.0007903081232492996, "loss": 0.4363, "step": 7593 }, { "epoch": 4.242458100558659, "grad_norm": 0.4580270051956177, "learning_rate": 0.000790280112044818, "loss": 0.4319, "step": 7594 }, { "epoch": 4.243016759776537, "grad_norm": 0.6039807200431824, "learning_rate": 0.0007902521008403362, "loss": 0.4201, "step": 7595 }, { "epoch": 4.243575418994413, "grad_norm": 0.7419303059577942, "learning_rate": 0.0007902240896358544, "loss": 0.3913, "step": 7596 }, { "epoch": 4.244134078212291, "grad_norm": 0.4437410831451416, "learning_rate": 0.0007901960784313726, "loss": 0.4705, "step": 7597 }, { "epoch": 4.244692737430167, "grad_norm": 0.5128861665725708, "learning_rate": 0.0007901680672268907, "loss": 0.3766, "step": 7598 }, { "epoch": 4.245251396648045, "grad_norm": 0.5940971374511719, "learning_rate": 0.000790140056022409, "loss": 0.3963, "step": 7599 }, { "epoch": 4.245810055865922, "grad_norm": 0.6905661225318909, "learning_rate": 0.0007901120448179272, "loss": 0.5304, "step": 7600 }, { "epoch": 4.246368715083799, "grad_norm": 0.6886366605758667, "learning_rate": 0.0007900840336134454, "loss": 0.4096, "step": 7601 }, { "epoch": 4.246927374301676, "grad_norm": 0.600100040435791, "learning_rate": 0.0007900560224089636, "loss": 0.4241, "step": 7602 }, { "epoch": 4.247486033519553, "grad_norm": 0.5681229829788208, "learning_rate": 0.0007900280112044818, "loss": 0.4724, "step": 7603 }, { "epoch": 4.24804469273743, "grad_norm": 0.6665762662887573, "learning_rate": 0.00079, "loss": 0.5504, "step": 7604 }, { "epoch": 4.248603351955307, "grad_norm": 0.6190313100814819, "learning_rate": 0.0007899719887955182, "loss": 0.4141, "step": 7605 }, { "epoch": 4.249162011173184, "grad_norm": 0.6787214279174805, "learning_rate": 0.0007899439775910364, "loss": 0.3803, "step": 7606 }, { "epoch": 4.249720670391062, "grad_norm": 0.9613161087036133, "learning_rate": 0.0007899159663865546, "loss": 0.5026, "step": 7607 }, { "epoch": 4.250279329608938, "grad_norm": 0.6363176703453064, "learning_rate": 0.0007898879551820728, "loss": 0.3536, "step": 7608 }, { "epoch": 4.250837988826816, "grad_norm": 0.5358803868293762, "learning_rate": 0.000789859943977591, "loss": 0.4507, "step": 7609 }, { "epoch": 4.251396648044693, "grad_norm": 0.6599241495132446, "learning_rate": 0.0007898319327731092, "loss": 0.5843, "step": 7610 }, { "epoch": 4.25195530726257, "grad_norm": 0.9755486845970154, "learning_rate": 0.0007898039215686275, "loss": 0.4964, "step": 7611 }, { "epoch": 4.252513966480447, "grad_norm": 1.0052839517593384, "learning_rate": 0.0007897759103641457, "loss": 0.4895, "step": 7612 }, { "epoch": 4.253072625698324, "grad_norm": 0.41139498353004456, "learning_rate": 0.0007897478991596639, "loss": 0.4121, "step": 7613 }, { "epoch": 4.253631284916201, "grad_norm": 0.6142210364341736, "learning_rate": 0.0007897198879551821, "loss": 0.5226, "step": 7614 }, { "epoch": 4.254189944134078, "grad_norm": 0.5242356061935425, "learning_rate": 0.0007896918767507003, "loss": 0.3528, "step": 7615 }, { "epoch": 4.254748603351955, "grad_norm": 0.6971606016159058, "learning_rate": 0.0007896638655462185, "loss": 0.3932, "step": 7616 }, { "epoch": 4.255307262569833, "grad_norm": 0.998999297618866, "learning_rate": 0.0007896358543417367, "loss": 0.4605, "step": 7617 }, { "epoch": 4.255865921787709, "grad_norm": 0.5562071204185486, "learning_rate": 0.0007896078431372549, "loss": 0.5531, "step": 7618 }, { "epoch": 4.256424581005587, "grad_norm": 0.42885369062423706, "learning_rate": 0.0007895798319327732, "loss": 0.4067, "step": 7619 }, { "epoch": 4.256983240223463, "grad_norm": 0.7504162192344666, "learning_rate": 0.0007895518207282913, "loss": 0.7144, "step": 7620 }, { "epoch": 4.257541899441341, "grad_norm": 0.48764362931251526, "learning_rate": 0.0007895238095238095, "loss": 0.4938, "step": 7621 }, { "epoch": 4.258100558659218, "grad_norm": 0.681430459022522, "learning_rate": 0.0007894957983193277, "loss": 0.4711, "step": 7622 }, { "epoch": 4.258659217877095, "grad_norm": 0.5723384618759155, "learning_rate": 0.0007894677871148459, "loss": 0.449, "step": 7623 }, { "epoch": 4.259217877094972, "grad_norm": 0.5640926957130432, "learning_rate": 0.0007894397759103642, "loss": 0.7216, "step": 7624 }, { "epoch": 4.259776536312849, "grad_norm": 0.4291515350341797, "learning_rate": 0.0007894117647058823, "loss": 0.4026, "step": 7625 }, { "epoch": 4.260335195530726, "grad_norm": 0.4841230809688568, "learning_rate": 0.0007893837535014005, "loss": 0.5593, "step": 7626 }, { "epoch": 4.260893854748604, "grad_norm": 0.6583724617958069, "learning_rate": 0.0007893557422969187, "loss": 0.4614, "step": 7627 }, { "epoch": 4.26145251396648, "grad_norm": 0.48537394404411316, "learning_rate": 0.000789327731092437, "loss": 0.454, "step": 7628 }, { "epoch": 4.262011173184358, "grad_norm": 0.6702374815940857, "learning_rate": 0.0007892997198879553, "loss": 0.5409, "step": 7629 }, { "epoch": 4.262569832402234, "grad_norm": 0.47325006127357483, "learning_rate": 0.0007892717086834734, "loss": 0.4857, "step": 7630 }, { "epoch": 4.263128491620112, "grad_norm": 1.185408353805542, "learning_rate": 0.0007892436974789916, "loss": 0.4703, "step": 7631 }, { "epoch": 4.263687150837989, "grad_norm": 0.6024292707443237, "learning_rate": 0.0007892156862745098, "loss": 0.4243, "step": 7632 }, { "epoch": 4.264245810055866, "grad_norm": 0.5890686511993408, "learning_rate": 0.000789187675070028, "loss": 0.3634, "step": 7633 }, { "epoch": 4.264804469273743, "grad_norm": 0.6138582825660706, "learning_rate": 0.0007891596638655463, "loss": 0.5212, "step": 7634 }, { "epoch": 4.26536312849162, "grad_norm": 0.3765842318534851, "learning_rate": 0.0007891316526610645, "loss": 0.4334, "step": 7635 }, { "epoch": 4.265921787709497, "grad_norm": 0.49022841453552246, "learning_rate": 0.0007891036414565826, "loss": 0.4759, "step": 7636 }, { "epoch": 4.266480446927375, "grad_norm": 0.4493389427661896, "learning_rate": 0.0007890756302521008, "loss": 0.4684, "step": 7637 }, { "epoch": 4.267039106145251, "grad_norm": 0.7142762541770935, "learning_rate": 0.000789047619047619, "loss": 0.6615, "step": 7638 }, { "epoch": 4.267597765363129, "grad_norm": 3.7400758266448975, "learning_rate": 0.0007890196078431373, "loss": 0.5036, "step": 7639 }, { "epoch": 4.268156424581005, "grad_norm": 5.28965950012207, "learning_rate": 0.0007889915966386555, "loss": 0.4729, "step": 7640 }, { "epoch": 4.268715083798883, "grad_norm": 0.5655019879341125, "learning_rate": 0.0007889635854341736, "loss": 0.5352, "step": 7641 }, { "epoch": 4.269273743016759, "grad_norm": 0.6827526092529297, "learning_rate": 0.0007889355742296918, "loss": 0.4772, "step": 7642 }, { "epoch": 4.269832402234637, "grad_norm": 0.5001131892204285, "learning_rate": 0.00078890756302521, "loss": 0.4788, "step": 7643 }, { "epoch": 4.270391061452514, "grad_norm": 1.6869261264801025, "learning_rate": 0.0007888795518207284, "loss": 0.5038, "step": 7644 }, { "epoch": 4.270949720670391, "grad_norm": 0.5407483577728271, "learning_rate": 0.0007888515406162466, "loss": 0.4887, "step": 7645 }, { "epoch": 4.271508379888268, "grad_norm": 0.5994249582290649, "learning_rate": 0.0007888235294117647, "loss": 0.4188, "step": 7646 }, { "epoch": 4.272067039106146, "grad_norm": 0.505515456199646, "learning_rate": 0.0007887955182072829, "loss": 0.4225, "step": 7647 }, { "epoch": 4.272625698324022, "grad_norm": 0.8638238906860352, "learning_rate": 0.0007887675070028011, "loss": 0.4624, "step": 7648 }, { "epoch": 4.2731843575419, "grad_norm": 0.4794963598251343, "learning_rate": 0.0007887394957983194, "loss": 0.4043, "step": 7649 }, { "epoch": 4.273743016759776, "grad_norm": 0.6042397022247314, "learning_rate": 0.0007887114845938376, "loss": 0.4738, "step": 7650 }, { "epoch": 4.274301675977654, "grad_norm": 0.5241755247116089, "learning_rate": 0.0007886834733893558, "loss": 0.4644, "step": 7651 }, { "epoch": 4.27486033519553, "grad_norm": 0.5943676829338074, "learning_rate": 0.0007886554621848739, "loss": 0.4022, "step": 7652 }, { "epoch": 4.275418994413408, "grad_norm": 0.42731931805610657, "learning_rate": 0.0007886274509803921, "loss": 0.435, "step": 7653 }, { "epoch": 4.275977653631285, "grad_norm": 0.4742490351200104, "learning_rate": 0.0007885994397759104, "loss": 0.4469, "step": 7654 }, { "epoch": 4.276536312849162, "grad_norm": 6.935084342956543, "learning_rate": 0.0007885714285714286, "loss": 0.4071, "step": 7655 }, { "epoch": 4.277094972067039, "grad_norm": 0.40504932403564453, "learning_rate": 0.0007885434173669468, "loss": 0.3801, "step": 7656 }, { "epoch": 4.277653631284916, "grad_norm": 0.44817519187927246, "learning_rate": 0.0007885154061624649, "loss": 0.3845, "step": 7657 }, { "epoch": 4.278212290502793, "grad_norm": 0.5161679983139038, "learning_rate": 0.0007884873949579831, "loss": 0.4996, "step": 7658 }, { "epoch": 4.278770949720671, "grad_norm": 0.5023806095123291, "learning_rate": 0.0007884593837535015, "loss": 0.598, "step": 7659 }, { "epoch": 4.279329608938547, "grad_norm": 0.6727524995803833, "learning_rate": 0.0007884313725490197, "loss": 0.4819, "step": 7660 }, { "epoch": 4.279888268156425, "grad_norm": 0.7039174437522888, "learning_rate": 0.0007884033613445379, "loss": 0.4021, "step": 7661 }, { "epoch": 4.280446927374301, "grad_norm": 0.688735842704773, "learning_rate": 0.000788375350140056, "loss": 0.464, "step": 7662 }, { "epoch": 4.281005586592179, "grad_norm": 0.5400928854942322, "learning_rate": 0.0007883473389355742, "loss": 0.5049, "step": 7663 }, { "epoch": 4.281564245810056, "grad_norm": 0.6422010064125061, "learning_rate": 0.0007883193277310925, "loss": 0.4616, "step": 7664 }, { "epoch": 4.282122905027933, "grad_norm": 0.4987924098968506, "learning_rate": 0.0007882913165266107, "loss": 0.5504, "step": 7665 }, { "epoch": 4.28268156424581, "grad_norm": 0.9322398900985718, "learning_rate": 0.0007882633053221289, "loss": 0.6021, "step": 7666 }, { "epoch": 4.283240223463687, "grad_norm": 0.5510703325271606, "learning_rate": 0.0007882352941176471, "loss": 0.4432, "step": 7667 }, { "epoch": 4.283798882681564, "grad_norm": 0.7457737922668457, "learning_rate": 0.0007882072829131652, "loss": 0.4212, "step": 7668 }, { "epoch": 4.284357541899442, "grad_norm": 8.379426956176758, "learning_rate": 0.0007881792717086835, "loss": 0.4409, "step": 7669 }, { "epoch": 4.284916201117318, "grad_norm": 0.7301014065742493, "learning_rate": 0.0007881512605042017, "loss": 0.4837, "step": 7670 }, { "epoch": 4.285474860335196, "grad_norm": 0.8636860847473145, "learning_rate": 0.0007881232492997199, "loss": 0.4491, "step": 7671 }, { "epoch": 4.286033519553072, "grad_norm": 0.5195205211639404, "learning_rate": 0.0007880952380952381, "loss": 0.5588, "step": 7672 }, { "epoch": 4.28659217877095, "grad_norm": 0.5753498077392578, "learning_rate": 0.0007880672268907562, "loss": 0.5495, "step": 7673 }, { "epoch": 4.287150837988827, "grad_norm": 0.5269135236740112, "learning_rate": 0.0007880392156862745, "loss": 0.3289, "step": 7674 }, { "epoch": 4.287709497206704, "grad_norm": 0.7850725650787354, "learning_rate": 0.0007880112044817927, "loss": 0.3754, "step": 7675 }, { "epoch": 4.288268156424581, "grad_norm": 0.5600786805152893, "learning_rate": 0.000787983193277311, "loss": 0.4287, "step": 7676 }, { "epoch": 4.288826815642458, "grad_norm": 0.5487927198410034, "learning_rate": 0.0007879551820728292, "loss": 0.6042, "step": 7677 }, { "epoch": 4.289385474860335, "grad_norm": 0.5627371668815613, "learning_rate": 0.0007879271708683473, "loss": 0.5317, "step": 7678 }, { "epoch": 4.289944134078212, "grad_norm": 0.6596568822860718, "learning_rate": 0.0007878991596638656, "loss": 0.7527, "step": 7679 }, { "epoch": 4.290502793296089, "grad_norm": 0.47150665521621704, "learning_rate": 0.0007878711484593838, "loss": 0.395, "step": 7680 }, { "epoch": 4.291061452513967, "grad_norm": 0.6270442605018616, "learning_rate": 0.000787843137254902, "loss": 0.417, "step": 7681 }, { "epoch": 4.291620111731843, "grad_norm": 0.5527673363685608, "learning_rate": 0.0007878151260504202, "loss": 0.4463, "step": 7682 }, { "epoch": 4.292178770949721, "grad_norm": 0.8703657388687134, "learning_rate": 0.0007877871148459384, "loss": 0.4557, "step": 7683 }, { "epoch": 4.292737430167598, "grad_norm": 0.594548761844635, "learning_rate": 0.0007877591036414566, "loss": 0.4068, "step": 7684 }, { "epoch": 4.293296089385475, "grad_norm": 0.7797442078590393, "learning_rate": 0.0007877310924369748, "loss": 0.4055, "step": 7685 }, { "epoch": 4.293854748603352, "grad_norm": 1.4545782804489136, "learning_rate": 0.000787703081232493, "loss": 0.5126, "step": 7686 }, { "epoch": 4.294413407821229, "grad_norm": 0.6887294054031372, "learning_rate": 0.0007876750700280112, "loss": 0.5332, "step": 7687 }, { "epoch": 4.294972067039106, "grad_norm": 0.6880782842636108, "learning_rate": 0.0007876470588235294, "loss": 0.5723, "step": 7688 }, { "epoch": 4.295530726256983, "grad_norm": 1.3066493272781372, "learning_rate": 0.0007876190476190476, "loss": 0.5452, "step": 7689 }, { "epoch": 4.29608938547486, "grad_norm": 0.5060390830039978, "learning_rate": 0.0007875910364145658, "loss": 0.4856, "step": 7690 }, { "epoch": 4.296648044692738, "grad_norm": 0.7090829014778137, "learning_rate": 0.000787563025210084, "loss": 0.4813, "step": 7691 }, { "epoch": 4.297206703910614, "grad_norm": 0.7248459458351135, "learning_rate": 0.0007875350140056022, "loss": 0.4917, "step": 7692 }, { "epoch": 4.297765363128492, "grad_norm": 0.4943256080150604, "learning_rate": 0.0007875070028011205, "loss": 0.3881, "step": 7693 }, { "epoch": 4.298324022346368, "grad_norm": 0.6828133463859558, "learning_rate": 0.0007874789915966388, "loss": 0.51, "step": 7694 }, { "epoch": 4.298882681564246, "grad_norm": 0.6464107632637024, "learning_rate": 0.0007874509803921569, "loss": 0.4633, "step": 7695 }, { "epoch": 4.299441340782123, "grad_norm": 0.49390271306037903, "learning_rate": 0.0007874229691876751, "loss": 0.3729, "step": 7696 }, { "epoch": 4.3, "grad_norm": 0.8593959212303162, "learning_rate": 0.0007873949579831933, "loss": 0.4379, "step": 7697 }, { "epoch": 4.300558659217877, "grad_norm": 0.4330633580684662, "learning_rate": 0.0007873669467787115, "loss": 0.3912, "step": 7698 }, { "epoch": 4.301117318435754, "grad_norm": 0.6093931198120117, "learning_rate": 0.0007873389355742298, "loss": 0.3898, "step": 7699 }, { "epoch": 4.301675977653631, "grad_norm": 0.6548773646354675, "learning_rate": 0.0007873109243697479, "loss": 0.3353, "step": 7700 }, { "epoch": 4.302234636871509, "grad_norm": 0.5223304629325867, "learning_rate": 0.0007872829131652661, "loss": 0.4734, "step": 7701 }, { "epoch": 4.302793296089385, "grad_norm": 1.0821515321731567, "learning_rate": 0.0007872549019607843, "loss": 0.4416, "step": 7702 }, { "epoch": 4.303351955307263, "grad_norm": 2.705974817276001, "learning_rate": 0.0007872268907563025, "loss": 0.443, "step": 7703 }, { "epoch": 4.303910614525139, "grad_norm": 0.5604917407035828, "learning_rate": 0.0007871988795518208, "loss": 0.4503, "step": 7704 }, { "epoch": 4.304469273743017, "grad_norm": 0.6788303852081299, "learning_rate": 0.0007871708683473389, "loss": 0.441, "step": 7705 }, { "epoch": 4.305027932960894, "grad_norm": 0.9525696635246277, "learning_rate": 0.0007871428571428571, "loss": 0.3924, "step": 7706 }, { "epoch": 4.305586592178771, "grad_norm": 1.9131962060928345, "learning_rate": 0.0007871148459383753, "loss": 0.5338, "step": 7707 }, { "epoch": 4.306145251396648, "grad_norm": 0.405402809381485, "learning_rate": 0.0007870868347338935, "loss": 0.428, "step": 7708 }, { "epoch": 4.306703910614525, "grad_norm": 0.40964922308921814, "learning_rate": 0.0007870588235294119, "loss": 0.3859, "step": 7709 }, { "epoch": 4.307262569832402, "grad_norm": 1.9622915983200073, "learning_rate": 0.0007870308123249301, "loss": 0.2718, "step": 7710 }, { "epoch": 4.30782122905028, "grad_norm": 1.5938780307769775, "learning_rate": 0.0007870028011204482, "loss": 0.4962, "step": 7711 }, { "epoch": 4.308379888268156, "grad_norm": 0.3893345892429352, "learning_rate": 0.0007869747899159664, "loss": 0.3454, "step": 7712 }, { "epoch": 4.308938547486034, "grad_norm": 0.6588876247406006, "learning_rate": 0.0007869467787114846, "loss": 0.4171, "step": 7713 }, { "epoch": 4.30949720670391, "grad_norm": 0.6357071399688721, "learning_rate": 0.0007869187675070029, "loss": 0.4736, "step": 7714 }, { "epoch": 4.310055865921788, "grad_norm": 2.4534640312194824, "learning_rate": 0.0007868907563025211, "loss": 0.4182, "step": 7715 }, { "epoch": 4.310614525139664, "grad_norm": 0.3842434883117676, "learning_rate": 0.0007868627450980392, "loss": 0.4628, "step": 7716 }, { "epoch": 4.311173184357542, "grad_norm": 0.8772587776184082, "learning_rate": 0.0007868347338935574, "loss": 0.5188, "step": 7717 }, { "epoch": 4.311731843575419, "grad_norm": 0.8579975366592407, "learning_rate": 0.0007868067226890756, "loss": 0.4531, "step": 7718 }, { "epoch": 4.312290502793296, "grad_norm": 0.9199638962745667, "learning_rate": 0.0007867787114845939, "loss": 0.4908, "step": 7719 }, { "epoch": 4.312849162011173, "grad_norm": 1.6186927556991577, "learning_rate": 0.0007867507002801121, "loss": 0.3756, "step": 7720 }, { "epoch": 4.31340782122905, "grad_norm": 0.6694222688674927, "learning_rate": 0.0007867226890756302, "loss": 0.4425, "step": 7721 }, { "epoch": 4.313966480446927, "grad_norm": 1.8523377180099487, "learning_rate": 0.0007866946778711484, "loss": 0.3456, "step": 7722 }, { "epoch": 4.314525139664805, "grad_norm": 0.6399108171463013, "learning_rate": 0.0007866666666666666, "loss": 0.6505, "step": 7723 }, { "epoch": 4.315083798882681, "grad_norm": 0.48726019263267517, "learning_rate": 0.000786638655462185, "loss": 0.4961, "step": 7724 }, { "epoch": 4.315642458100559, "grad_norm": 0.5752406716346741, "learning_rate": 0.0007866106442577032, "loss": 0.5008, "step": 7725 }, { "epoch": 4.316201117318435, "grad_norm": 0.5257292985916138, "learning_rate": 0.0007865826330532214, "loss": 0.5904, "step": 7726 }, { "epoch": 4.316759776536313, "grad_norm": 0.8009903430938721, "learning_rate": 0.0007865546218487395, "loss": 0.4734, "step": 7727 }, { "epoch": 4.31731843575419, "grad_norm": 2.6037352085113525, "learning_rate": 0.0007865266106442577, "loss": 0.3808, "step": 7728 }, { "epoch": 4.317877094972067, "grad_norm": 0.42437905073165894, "learning_rate": 0.000786498599439776, "loss": 0.5026, "step": 7729 }, { "epoch": 4.318435754189944, "grad_norm": 0.6118500828742981, "learning_rate": 0.0007864705882352942, "loss": 0.4495, "step": 7730 }, { "epoch": 4.318994413407821, "grad_norm": 0.49491187930107117, "learning_rate": 0.0007864425770308124, "loss": 0.5, "step": 7731 }, { "epoch": 4.319553072625698, "grad_norm": 0.5296121835708618, "learning_rate": 0.0007864145658263305, "loss": 0.4045, "step": 7732 }, { "epoch": 4.320111731843576, "grad_norm": 0.45565447211265564, "learning_rate": 0.0007863865546218487, "loss": 0.4107, "step": 7733 }, { "epoch": 4.320670391061452, "grad_norm": 0.556579053401947, "learning_rate": 0.000786358543417367, "loss": 0.4661, "step": 7734 }, { "epoch": 4.32122905027933, "grad_norm": 0.434957891702652, "learning_rate": 0.0007863305322128852, "loss": 0.3443, "step": 7735 }, { "epoch": 4.321787709497206, "grad_norm": 0.6243918538093567, "learning_rate": 0.0007863025210084034, "loss": 0.5631, "step": 7736 }, { "epoch": 4.322346368715084, "grad_norm": 0.546448826789856, "learning_rate": 0.0007862745098039215, "loss": 0.4049, "step": 7737 }, { "epoch": 4.322905027932961, "grad_norm": 0.6531659960746765, "learning_rate": 0.0007862464985994397, "loss": 0.4982, "step": 7738 }, { "epoch": 4.323463687150838, "grad_norm": 1.112093448638916, "learning_rate": 0.000786218487394958, "loss": 0.4305, "step": 7739 }, { "epoch": 4.324022346368715, "grad_norm": 0.5378101468086243, "learning_rate": 0.0007861904761904762, "loss": 0.5778, "step": 7740 }, { "epoch": 4.324581005586592, "grad_norm": 1.5151773691177368, "learning_rate": 0.0007861624649859945, "loss": 0.5471, "step": 7741 }, { "epoch": 4.325139664804469, "grad_norm": 1.3583226203918457, "learning_rate": 0.0007861344537815127, "loss": 0.4314, "step": 7742 }, { "epoch": 4.325698324022347, "grad_norm": 0.5055039525032043, "learning_rate": 0.0007861064425770308, "loss": 0.4683, "step": 7743 }, { "epoch": 4.326256983240223, "grad_norm": 1.527499794960022, "learning_rate": 0.0007860784313725491, "loss": 0.4038, "step": 7744 }, { "epoch": 4.326815642458101, "grad_norm": 0.4101034998893738, "learning_rate": 0.0007860504201680673, "loss": 0.4146, "step": 7745 }, { "epoch": 4.327374301675977, "grad_norm": 0.6584548950195312, "learning_rate": 0.0007860224089635855, "loss": 0.4659, "step": 7746 }, { "epoch": 4.327932960893855, "grad_norm": 0.5194028615951538, "learning_rate": 0.0007859943977591037, "loss": 0.5274, "step": 7747 }, { "epoch": 4.328491620111732, "grad_norm": 0.9011557698249817, "learning_rate": 0.0007859663865546218, "loss": 0.5186, "step": 7748 }, { "epoch": 4.329050279329609, "grad_norm": 0.5350490808486938, "learning_rate": 0.0007859383753501401, "loss": 0.5378, "step": 7749 }, { "epoch": 4.329608938547486, "grad_norm": 0.5108752250671387, "learning_rate": 0.0007859103641456583, "loss": 0.4121, "step": 7750 }, { "epoch": 4.330167597765363, "grad_norm": 0.3764178454875946, "learning_rate": 0.0007858823529411765, "loss": 0.3697, "step": 7751 }, { "epoch": 4.33072625698324, "grad_norm": 0.7153195142745972, "learning_rate": 0.0007858543417366947, "loss": 0.4836, "step": 7752 }, { "epoch": 4.331284916201117, "grad_norm": 0.4839271008968353, "learning_rate": 0.0007858263305322128, "loss": 0.4223, "step": 7753 }, { "epoch": 4.331843575418994, "grad_norm": 0.5079770684242249, "learning_rate": 0.0007857983193277311, "loss": 0.3767, "step": 7754 }, { "epoch": 4.332402234636872, "grad_norm": 0.7013403177261353, "learning_rate": 0.0007857703081232493, "loss": 0.4769, "step": 7755 }, { "epoch": 4.332960893854748, "grad_norm": 0.5539823770523071, "learning_rate": 0.0007857422969187675, "loss": 0.5086, "step": 7756 }, { "epoch": 4.333519553072626, "grad_norm": 0.49007055163383484, "learning_rate": 0.0007857142857142857, "loss": 0.5165, "step": 7757 }, { "epoch": 4.334078212290502, "grad_norm": 0.7004421949386597, "learning_rate": 0.000785686274509804, "loss": 0.4767, "step": 7758 }, { "epoch": 4.33463687150838, "grad_norm": 2.1830976009368896, "learning_rate": 0.0007856582633053222, "loss": 0.4153, "step": 7759 }, { "epoch": 4.335195530726257, "grad_norm": 0.42629343271255493, "learning_rate": 0.0007856302521008404, "loss": 0.4139, "step": 7760 }, { "epoch": 4.335754189944134, "grad_norm": 0.6583541631698608, "learning_rate": 0.0007856022408963586, "loss": 0.536, "step": 7761 }, { "epoch": 4.336312849162011, "grad_norm": 3.1524322032928467, "learning_rate": 0.0007855742296918768, "loss": 0.4799, "step": 7762 }, { "epoch": 4.336871508379888, "grad_norm": 0.6064858436584473, "learning_rate": 0.000785546218487395, "loss": 0.4707, "step": 7763 }, { "epoch": 4.337430167597765, "grad_norm": 0.4527079164981842, "learning_rate": 0.0007855182072829132, "loss": 0.4685, "step": 7764 }, { "epoch": 4.337988826815643, "grad_norm": 0.4843803644180298, "learning_rate": 0.0007854901960784314, "loss": 0.4966, "step": 7765 }, { "epoch": 4.338547486033519, "grad_norm": 0.5902796387672424, "learning_rate": 0.0007854621848739496, "loss": 0.385, "step": 7766 }, { "epoch": 4.339106145251397, "grad_norm": 0.5157030820846558, "learning_rate": 0.0007854341736694678, "loss": 0.4824, "step": 7767 }, { "epoch": 4.339664804469273, "grad_norm": 0.38306954503059387, "learning_rate": 0.000785406162464986, "loss": 0.4456, "step": 7768 }, { "epoch": 4.340223463687151, "grad_norm": 0.3922554850578308, "learning_rate": 0.0007853781512605042, "loss": 0.3762, "step": 7769 }, { "epoch": 4.340782122905028, "grad_norm": 0.3859819173812866, "learning_rate": 0.0007853501400560224, "loss": 0.4752, "step": 7770 }, { "epoch": 4.341340782122905, "grad_norm": 0.7145147919654846, "learning_rate": 0.0007853221288515406, "loss": 0.4156, "step": 7771 }, { "epoch": 4.341899441340782, "grad_norm": 0.5755166411399841, "learning_rate": 0.0007852941176470588, "loss": 0.5298, "step": 7772 }, { "epoch": 4.342458100558659, "grad_norm": 0.8832987546920776, "learning_rate": 0.000785266106442577, "loss": 0.5056, "step": 7773 }, { "epoch": 4.343016759776536, "grad_norm": 0.4791451096534729, "learning_rate": 0.0007852380952380954, "loss": 0.4247, "step": 7774 }, { "epoch": 4.343575418994414, "grad_norm": 0.48688995838165283, "learning_rate": 0.0007852100840336135, "loss": 0.3427, "step": 7775 }, { "epoch": 4.34413407821229, "grad_norm": 0.6144006252288818, "learning_rate": 0.0007851820728291317, "loss": 0.5207, "step": 7776 }, { "epoch": 4.344692737430168, "grad_norm": 0.7153996229171753, "learning_rate": 0.0007851540616246499, "loss": 0.4333, "step": 7777 }, { "epoch": 4.345251396648044, "grad_norm": 0.5082197189331055, "learning_rate": 0.0007851260504201681, "loss": 0.5625, "step": 7778 }, { "epoch": 4.345810055865922, "grad_norm": 0.7209962606430054, "learning_rate": 0.0007850980392156864, "loss": 0.3082, "step": 7779 }, { "epoch": 4.346368715083799, "grad_norm": 1.3629626035690308, "learning_rate": 0.0007850700280112045, "loss": 0.7621, "step": 7780 }, { "epoch": 4.346927374301676, "grad_norm": 0.805191159248352, "learning_rate": 0.0007850420168067227, "loss": 0.3873, "step": 7781 }, { "epoch": 4.347486033519553, "grad_norm": 0.5278725624084473, "learning_rate": 0.0007850140056022409, "loss": 0.5137, "step": 7782 }, { "epoch": 4.34804469273743, "grad_norm": 1.0014837980270386, "learning_rate": 0.0007849859943977591, "loss": 0.4251, "step": 7783 }, { "epoch": 4.348603351955307, "grad_norm": 1.8876439332962036, "learning_rate": 0.0007849579831932774, "loss": 0.4037, "step": 7784 }, { "epoch": 4.349162011173185, "grad_norm": 1.614197015762329, "learning_rate": 0.0007849299719887955, "loss": 0.6387, "step": 7785 }, { "epoch": 4.349720670391061, "grad_norm": 0.41699346899986267, "learning_rate": 0.0007849019607843137, "loss": 0.4062, "step": 7786 }, { "epoch": 4.350279329608939, "grad_norm": 3.9328601360321045, "learning_rate": 0.0007848739495798319, "loss": 0.4809, "step": 7787 }, { "epoch": 4.350837988826815, "grad_norm": 0.7399049997329712, "learning_rate": 0.0007848459383753501, "loss": 0.5158, "step": 7788 }, { "epoch": 4.351396648044693, "grad_norm": 0.4915332496166229, "learning_rate": 0.0007848179271708684, "loss": 0.4852, "step": 7789 }, { "epoch": 4.351955307262569, "grad_norm": 0.622516930103302, "learning_rate": 0.0007847899159663867, "loss": 0.5343, "step": 7790 }, { "epoch": 4.352513966480447, "grad_norm": 0.5885820984840393, "learning_rate": 0.0007847619047619047, "loss": 0.3692, "step": 7791 }, { "epoch": 4.353072625698324, "grad_norm": 0.8940126895904541, "learning_rate": 0.000784733893557423, "loss": 0.5232, "step": 7792 }, { "epoch": 4.353631284916201, "grad_norm": 0.5329613089561462, "learning_rate": 0.0007847058823529412, "loss": 0.4829, "step": 7793 }, { "epoch": 4.354189944134078, "grad_norm": 0.5722092986106873, "learning_rate": 0.0007846778711484595, "loss": 0.3944, "step": 7794 }, { "epoch": 4.354748603351955, "grad_norm": 1.0231282711029053, "learning_rate": 0.0007846498599439777, "loss": 0.6275, "step": 7795 }, { "epoch": 4.355307262569832, "grad_norm": 0.5589284896850586, "learning_rate": 0.0007846218487394958, "loss": 0.463, "step": 7796 }, { "epoch": 4.35586592178771, "grad_norm": 0.7263539433479309, "learning_rate": 0.000784593837535014, "loss": 0.5055, "step": 7797 }, { "epoch": 4.356424581005586, "grad_norm": 0.7210206389427185, "learning_rate": 0.0007845658263305322, "loss": 0.523, "step": 7798 }, { "epoch": 4.356983240223464, "grad_norm": 0.7375660538673401, "learning_rate": 0.0007845378151260505, "loss": 0.5027, "step": 7799 }, { "epoch": 4.35754189944134, "grad_norm": 0.7630391120910645, "learning_rate": 0.0007845098039215687, "loss": 0.5205, "step": 7800 }, { "epoch": 4.358100558659218, "grad_norm": 1.0278470516204834, "learning_rate": 0.0007844817927170868, "loss": 0.4544, "step": 7801 }, { "epoch": 4.358659217877095, "grad_norm": 0.5347258448600769, "learning_rate": 0.000784453781512605, "loss": 0.4345, "step": 7802 }, { "epoch": 4.359217877094972, "grad_norm": 1.4582527875900269, "learning_rate": 0.0007844257703081232, "loss": 0.4329, "step": 7803 }, { "epoch": 4.359776536312849, "grad_norm": 0.9960485100746155, "learning_rate": 0.0007843977591036415, "loss": 0.4255, "step": 7804 }, { "epoch": 4.360335195530726, "grad_norm": 0.6236252784729004, "learning_rate": 0.0007843697478991597, "loss": 0.393, "step": 7805 }, { "epoch": 4.360893854748603, "grad_norm": 0.4125659167766571, "learning_rate": 0.000784341736694678, "loss": 0.4279, "step": 7806 }, { "epoch": 4.361452513966481, "grad_norm": 0.39700162410736084, "learning_rate": 0.000784313725490196, "loss": 0.3637, "step": 7807 }, { "epoch": 4.362011173184357, "grad_norm": 0.9261670112609863, "learning_rate": 0.0007842857142857143, "loss": 0.3885, "step": 7808 }, { "epoch": 4.362569832402235, "grad_norm": 0.5615954995155334, "learning_rate": 0.0007842577030812325, "loss": 0.4794, "step": 7809 }, { "epoch": 4.363128491620111, "grad_norm": 0.527035653591156, "learning_rate": 0.0007842296918767508, "loss": 0.3945, "step": 7810 }, { "epoch": 4.363687150837989, "grad_norm": 0.559574544429779, "learning_rate": 0.000784201680672269, "loss": 0.4401, "step": 7811 }, { "epoch": 4.364245810055866, "grad_norm": 0.8743611574172974, "learning_rate": 0.0007841736694677871, "loss": 0.4705, "step": 7812 }, { "epoch": 4.364804469273743, "grad_norm": 0.5005408525466919, "learning_rate": 0.0007841456582633053, "loss": 0.4856, "step": 7813 }, { "epoch": 4.36536312849162, "grad_norm": 0.6589733958244324, "learning_rate": 0.0007841176470588235, "loss": 0.4012, "step": 7814 }, { "epoch": 4.365921787709497, "grad_norm": 0.5443500280380249, "learning_rate": 0.0007840896358543418, "loss": 0.4373, "step": 7815 }, { "epoch": 4.366480446927374, "grad_norm": 1.3163281679153442, "learning_rate": 0.00078406162464986, "loss": 0.4332, "step": 7816 }, { "epoch": 4.367039106145251, "grad_norm": 0.557763934135437, "learning_rate": 0.0007840336134453781, "loss": 0.3935, "step": 7817 }, { "epoch": 4.367597765363128, "grad_norm": 1.3015865087509155, "learning_rate": 0.0007840056022408963, "loss": 0.5916, "step": 7818 }, { "epoch": 4.368156424581006, "grad_norm": 0.6346587538719177, "learning_rate": 0.0007839775910364145, "loss": 0.4878, "step": 7819 }, { "epoch": 4.368715083798882, "grad_norm": 0.5230052471160889, "learning_rate": 0.0007839495798319328, "loss": 0.5098, "step": 7820 }, { "epoch": 4.36927374301676, "grad_norm": 1.444474458694458, "learning_rate": 0.000783921568627451, "loss": 0.4916, "step": 7821 }, { "epoch": 4.369832402234637, "grad_norm": 0.438775897026062, "learning_rate": 0.0007838935574229692, "loss": 0.4075, "step": 7822 }, { "epoch": 4.370391061452514, "grad_norm": 0.8922704458236694, "learning_rate": 0.0007838655462184873, "loss": 0.4208, "step": 7823 }, { "epoch": 4.370949720670391, "grad_norm": 0.5115568041801453, "learning_rate": 0.0007838375350140055, "loss": 0.4542, "step": 7824 }, { "epoch": 4.371508379888268, "grad_norm": 0.5422394871711731, "learning_rate": 0.0007838095238095239, "loss": 0.6153, "step": 7825 }, { "epoch": 4.372067039106145, "grad_norm": 0.5241320133209229, "learning_rate": 0.0007837815126050421, "loss": 0.5124, "step": 7826 }, { "epoch": 4.372625698324022, "grad_norm": 1.0834288597106934, "learning_rate": 0.0007837535014005603, "loss": 0.4078, "step": 7827 }, { "epoch": 4.373184357541899, "grad_norm": 0.45994749665260315, "learning_rate": 0.0007837254901960784, "loss": 0.4821, "step": 7828 }, { "epoch": 4.373743016759777, "grad_norm": 0.39013850688934326, "learning_rate": 0.0007836974789915966, "loss": 0.4277, "step": 7829 }, { "epoch": 4.374301675977653, "grad_norm": 0.5368283987045288, "learning_rate": 0.0007836694677871149, "loss": 0.4957, "step": 7830 }, { "epoch": 4.374860335195531, "grad_norm": 0.389849454164505, "learning_rate": 0.0007836414565826331, "loss": 0.3773, "step": 7831 }, { "epoch": 4.375418994413407, "grad_norm": 0.41745665669441223, "learning_rate": 0.0007836134453781513, "loss": 0.5205, "step": 7832 }, { "epoch": 4.375977653631285, "grad_norm": 0.49127280712127686, "learning_rate": 0.0007835854341736694, "loss": 0.4195, "step": 7833 }, { "epoch": 4.376536312849162, "grad_norm": 2.224849224090576, "learning_rate": 0.0007835574229691876, "loss": 0.3811, "step": 7834 }, { "epoch": 4.377094972067039, "grad_norm": 0.5694179534912109, "learning_rate": 0.0007835294117647059, "loss": 0.4436, "step": 7835 }, { "epoch": 4.377653631284916, "grad_norm": 0.39705926179885864, "learning_rate": 0.0007835014005602241, "loss": 0.3924, "step": 7836 }, { "epoch": 4.378212290502793, "grad_norm": 0.5426232218742371, "learning_rate": 0.0007834733893557423, "loss": 0.5849, "step": 7837 }, { "epoch": 4.37877094972067, "grad_norm": 0.4692694842815399, "learning_rate": 0.0007834453781512605, "loss": 0.4039, "step": 7838 }, { "epoch": 4.379329608938548, "grad_norm": 0.9278736710548401, "learning_rate": 0.0007834173669467786, "loss": 0.5031, "step": 7839 }, { "epoch": 4.379888268156424, "grad_norm": 1.2467507123947144, "learning_rate": 0.000783389355742297, "loss": 0.3864, "step": 7840 }, { "epoch": 4.380446927374302, "grad_norm": 0.4491453170776367, "learning_rate": 0.0007833613445378152, "loss": 0.425, "step": 7841 }, { "epoch": 4.381005586592178, "grad_norm": 0.44495099782943726, "learning_rate": 0.0007833333333333334, "loss": 0.5741, "step": 7842 }, { "epoch": 4.381564245810056, "grad_norm": 0.5510337948799133, "learning_rate": 0.0007833053221288516, "loss": 0.4354, "step": 7843 }, { "epoch": 4.382122905027933, "grad_norm": 0.6221334338188171, "learning_rate": 0.0007832773109243697, "loss": 0.472, "step": 7844 }, { "epoch": 4.38268156424581, "grad_norm": 0.729478120803833, "learning_rate": 0.000783249299719888, "loss": 0.4852, "step": 7845 }, { "epoch": 4.383240223463687, "grad_norm": 0.4749382734298706, "learning_rate": 0.0007832212885154062, "loss": 0.5305, "step": 7846 }, { "epoch": 4.383798882681564, "grad_norm": 0.5327832698822021, "learning_rate": 0.0007831932773109244, "loss": 0.5709, "step": 7847 }, { "epoch": 4.384357541899441, "grad_norm": 0.5454557538032532, "learning_rate": 0.0007831652661064426, "loss": 0.4244, "step": 7848 }, { "epoch": 4.384916201117319, "grad_norm": 0.4900103807449341, "learning_rate": 0.0007831372549019607, "loss": 0.5125, "step": 7849 }, { "epoch": 4.385474860335195, "grad_norm": 3.827024221420288, "learning_rate": 0.000783109243697479, "loss": 0.4195, "step": 7850 }, { "epoch": 4.386033519553073, "grad_norm": 0.8269819021224976, "learning_rate": 0.0007830812324929972, "loss": 0.5196, "step": 7851 }, { "epoch": 4.386592178770949, "grad_norm": 0.4805721640586853, "learning_rate": 0.0007830532212885154, "loss": 0.4915, "step": 7852 }, { "epoch": 4.387150837988827, "grad_norm": 0.4305911362171173, "learning_rate": 0.0007830252100840336, "loss": 0.4626, "step": 7853 }, { "epoch": 4.3877094972067034, "grad_norm": 0.5364338755607605, "learning_rate": 0.0007829971988795518, "loss": 0.4035, "step": 7854 }, { "epoch": 4.388268156424581, "grad_norm": 0.6063306927680969, "learning_rate": 0.00078296918767507, "loss": 0.5625, "step": 7855 }, { "epoch": 4.388826815642458, "grad_norm": 0.8494971990585327, "learning_rate": 0.0007829411764705882, "loss": 0.4519, "step": 7856 }, { "epoch": 4.389385474860335, "grad_norm": 0.5195983052253723, "learning_rate": 0.0007829131652661065, "loss": 0.4807, "step": 7857 }, { "epoch": 4.389944134078212, "grad_norm": 0.7848777174949646, "learning_rate": 0.0007828851540616247, "loss": 0.6383, "step": 7858 }, { "epoch": 4.39050279329609, "grad_norm": 0.5168319344520569, "learning_rate": 0.0007828571428571429, "loss": 0.5109, "step": 7859 }, { "epoch": 4.391061452513966, "grad_norm": 0.4914497435092926, "learning_rate": 0.0007828291316526611, "loss": 0.3733, "step": 7860 }, { "epoch": 4.391620111731844, "grad_norm": 0.7273851633071899, "learning_rate": 0.0007828011204481793, "loss": 0.3579, "step": 7861 }, { "epoch": 4.39217877094972, "grad_norm": 0.5737335681915283, "learning_rate": 0.0007827731092436975, "loss": 0.3731, "step": 7862 }, { "epoch": 4.392737430167598, "grad_norm": 2.8053438663482666, "learning_rate": 0.0007827450980392157, "loss": 0.5674, "step": 7863 }, { "epoch": 4.3932960893854744, "grad_norm": 0.4080544412136078, "learning_rate": 0.0007827170868347339, "loss": 0.3676, "step": 7864 }, { "epoch": 4.393854748603352, "grad_norm": 0.45019564032554626, "learning_rate": 0.0007826890756302522, "loss": 0.3994, "step": 7865 }, { "epoch": 4.394413407821229, "grad_norm": 0.5106619596481323, "learning_rate": 0.0007826610644257703, "loss": 0.4992, "step": 7866 }, { "epoch": 4.394972067039106, "grad_norm": 0.4138012230396271, "learning_rate": 0.0007826330532212885, "loss": 0.4457, "step": 7867 }, { "epoch": 4.395530726256983, "grad_norm": 0.5644697546958923, "learning_rate": 0.0007826050420168067, "loss": 0.6089, "step": 7868 }, { "epoch": 4.39608938547486, "grad_norm": 0.5922969579696655, "learning_rate": 0.0007825770308123249, "loss": 0.5281, "step": 7869 }, { "epoch": 4.396648044692737, "grad_norm": 0.4072439968585968, "learning_rate": 0.0007825490196078432, "loss": 0.4275, "step": 7870 }, { "epoch": 4.397206703910615, "grad_norm": 0.7072669863700867, "learning_rate": 0.0007825210084033613, "loss": 0.4589, "step": 7871 }, { "epoch": 4.397765363128491, "grad_norm": 0.6356338858604431, "learning_rate": 0.0007824929971988795, "loss": 0.4508, "step": 7872 }, { "epoch": 4.398324022346369, "grad_norm": 0.8144755363464355, "learning_rate": 0.0007824649859943977, "loss": 0.5293, "step": 7873 }, { "epoch": 4.3988826815642454, "grad_norm": 1.7882473468780518, "learning_rate": 0.000782436974789916, "loss": 0.4197, "step": 7874 }, { "epoch": 4.399441340782123, "grad_norm": 0.9120373725891113, "learning_rate": 0.0007824089635854343, "loss": 0.5276, "step": 7875 }, { "epoch": 4.4, "grad_norm": 0.6548590660095215, "learning_rate": 0.0007823809523809524, "loss": 0.5009, "step": 7876 }, { "epoch": 4.400558659217877, "grad_norm": 0.4574669301509857, "learning_rate": 0.0007823529411764706, "loss": 0.4263, "step": 7877 }, { "epoch": 4.401117318435754, "grad_norm": 1.0311496257781982, "learning_rate": 0.0007823249299719888, "loss": 0.4399, "step": 7878 }, { "epoch": 4.401675977653631, "grad_norm": 0.5758455991744995, "learning_rate": 0.000782296918767507, "loss": 0.5233, "step": 7879 }, { "epoch": 4.402234636871508, "grad_norm": 0.5126032829284668, "learning_rate": 0.0007822689075630253, "loss": 0.3857, "step": 7880 }, { "epoch": 4.402793296089386, "grad_norm": 0.5499773621559143, "learning_rate": 0.0007822408963585435, "loss": 0.4389, "step": 7881 }, { "epoch": 4.403351955307262, "grad_norm": 0.477298766374588, "learning_rate": 0.0007822128851540616, "loss": 0.4872, "step": 7882 }, { "epoch": 4.40391061452514, "grad_norm": 0.58132004737854, "learning_rate": 0.0007821848739495798, "loss": 0.5054, "step": 7883 }, { "epoch": 4.4044692737430164, "grad_norm": 0.6866540312767029, "learning_rate": 0.000782156862745098, "loss": 0.5742, "step": 7884 }, { "epoch": 4.405027932960894, "grad_norm": 0.6847009062767029, "learning_rate": 0.0007821288515406163, "loss": 0.5601, "step": 7885 }, { "epoch": 4.405586592178771, "grad_norm": 0.7857428789138794, "learning_rate": 0.0007821008403361345, "loss": 0.4504, "step": 7886 }, { "epoch": 4.406145251396648, "grad_norm": 0.420052170753479, "learning_rate": 0.0007820728291316526, "loss": 0.4357, "step": 7887 }, { "epoch": 4.406703910614525, "grad_norm": 0.7023537158966064, "learning_rate": 0.0007820448179271708, "loss": 0.5265, "step": 7888 }, { "epoch": 4.407262569832402, "grad_norm": 0.5363319516181946, "learning_rate": 0.000782016806722689, "loss": 0.4931, "step": 7889 }, { "epoch": 4.407821229050279, "grad_norm": 0.5596529245376587, "learning_rate": 0.0007819887955182074, "loss": 0.4552, "step": 7890 }, { "epoch": 4.408379888268156, "grad_norm": 0.40796831250190735, "learning_rate": 0.0007819607843137256, "loss": 0.3805, "step": 7891 }, { "epoch": 4.408938547486033, "grad_norm": 0.522153377532959, "learning_rate": 0.0007819327731092437, "loss": 0.4754, "step": 7892 }, { "epoch": 4.409497206703911, "grad_norm": 1.0707128047943115, "learning_rate": 0.0007819047619047619, "loss": 0.4913, "step": 7893 }, { "epoch": 4.410055865921787, "grad_norm": 18.974531173706055, "learning_rate": 0.0007818767507002801, "loss": 0.3199, "step": 7894 }, { "epoch": 4.410614525139665, "grad_norm": 0.6861658096313477, "learning_rate": 0.0007818487394957984, "loss": 0.4648, "step": 7895 }, { "epoch": 4.411173184357542, "grad_norm": 0.6258499622344971, "learning_rate": 0.0007818207282913166, "loss": 0.4205, "step": 7896 }, { "epoch": 4.411731843575419, "grad_norm": 0.6685450673103333, "learning_rate": 0.0007817927170868348, "loss": 0.4357, "step": 7897 }, { "epoch": 4.412290502793296, "grad_norm": 0.8548688292503357, "learning_rate": 0.0007817647058823529, "loss": 0.4457, "step": 7898 }, { "epoch": 4.412849162011173, "grad_norm": 0.678438663482666, "learning_rate": 0.0007817366946778711, "loss": 0.7488, "step": 7899 }, { "epoch": 4.41340782122905, "grad_norm": 0.5791630744934082, "learning_rate": 0.0007817086834733894, "loss": 0.533, "step": 7900 }, { "epoch": 4.413966480446927, "grad_norm": 0.46744436025619507, "learning_rate": 0.0007816806722689076, "loss": 0.4145, "step": 7901 }, { "epoch": 4.414525139664804, "grad_norm": 0.47127410769462585, "learning_rate": 0.0007816526610644258, "loss": 0.403, "step": 7902 }, { "epoch": 4.415083798882682, "grad_norm": 0.5942014455795288, "learning_rate": 0.0007816246498599439, "loss": 0.4657, "step": 7903 }, { "epoch": 4.415642458100558, "grad_norm": 0.5808228254318237, "learning_rate": 0.0007815966386554621, "loss": 0.7274, "step": 7904 }, { "epoch": 4.416201117318436, "grad_norm": 0.634122908115387, "learning_rate": 0.0007815686274509805, "loss": 0.4007, "step": 7905 }, { "epoch": 4.4167597765363125, "grad_norm": 0.6995765566825867, "learning_rate": 0.0007815406162464987, "loss": 0.476, "step": 7906 }, { "epoch": 4.41731843575419, "grad_norm": 0.5117874145507812, "learning_rate": 0.0007815126050420169, "loss": 0.5667, "step": 7907 }, { "epoch": 4.417877094972067, "grad_norm": 0.925533652305603, "learning_rate": 0.000781484593837535, "loss": 0.6343, "step": 7908 }, { "epoch": 4.418435754189944, "grad_norm": 0.6137980818748474, "learning_rate": 0.0007814565826330532, "loss": 0.498, "step": 7909 }, { "epoch": 4.418994413407821, "grad_norm": 0.6770252585411072, "learning_rate": 0.0007814285714285715, "loss": 0.5254, "step": 7910 }, { "epoch": 4.419553072625698, "grad_norm": 0.6024884581565857, "learning_rate": 0.0007814005602240897, "loss": 0.3379, "step": 7911 }, { "epoch": 4.420111731843575, "grad_norm": 2.3432741165161133, "learning_rate": 0.0007813725490196079, "loss": 0.4817, "step": 7912 }, { "epoch": 4.420670391061453, "grad_norm": 0.4075697660446167, "learning_rate": 0.0007813445378151261, "loss": 0.465, "step": 7913 }, { "epoch": 4.421229050279329, "grad_norm": 0.46285125613212585, "learning_rate": 0.0007813165266106442, "loss": 0.5354, "step": 7914 }, { "epoch": 4.421787709497207, "grad_norm": 0.5492990016937256, "learning_rate": 0.0007812885154061625, "loss": 0.4388, "step": 7915 }, { "epoch": 4.4223463687150835, "grad_norm": 0.481118768453598, "learning_rate": 0.0007812605042016807, "loss": 0.446, "step": 7916 }, { "epoch": 4.422905027932961, "grad_norm": 0.629823625087738, "learning_rate": 0.0007812324929971989, "loss": 0.5816, "step": 7917 }, { "epoch": 4.423463687150838, "grad_norm": 0.5381523370742798, "learning_rate": 0.0007812044817927171, "loss": 0.3537, "step": 7918 }, { "epoch": 4.424022346368715, "grad_norm": 0.4390683174133301, "learning_rate": 0.0007811764705882352, "loss": 0.3276, "step": 7919 }, { "epoch": 4.424581005586592, "grad_norm": 0.6150350570678711, "learning_rate": 0.0007811484593837535, "loss": 0.6111, "step": 7920 }, { "epoch": 4.425139664804469, "grad_norm": 5.276768684387207, "learning_rate": 0.0007811204481792717, "loss": 0.5148, "step": 7921 }, { "epoch": 4.425698324022346, "grad_norm": 0.46458321809768677, "learning_rate": 0.00078109243697479, "loss": 0.3958, "step": 7922 }, { "epoch": 4.426256983240224, "grad_norm": 0.49893718957901, "learning_rate": 0.0007810644257703082, "loss": 0.3603, "step": 7923 }, { "epoch": 4.4268156424581, "grad_norm": 0.4741962254047394, "learning_rate": 0.0007810364145658263, "loss": 0.5726, "step": 7924 }, { "epoch": 4.427374301675978, "grad_norm": 0.5961857438087463, "learning_rate": 0.0007810084033613446, "loss": 0.3475, "step": 7925 }, { "epoch": 4.4279329608938545, "grad_norm": 0.5745024681091309, "learning_rate": 0.0007809803921568628, "loss": 0.3747, "step": 7926 }, { "epoch": 4.428491620111732, "grad_norm": 0.44040071964263916, "learning_rate": 0.000780952380952381, "loss": 0.528, "step": 7927 }, { "epoch": 4.4290502793296085, "grad_norm": 0.9399561882019043, "learning_rate": 0.0007809243697478992, "loss": 0.3616, "step": 7928 }, { "epoch": 4.429608938547486, "grad_norm": 0.6722110509872437, "learning_rate": 0.0007808963585434174, "loss": 0.5126, "step": 7929 }, { "epoch": 4.430167597765363, "grad_norm": 0.5289178490638733, "learning_rate": 0.0007808683473389356, "loss": 0.3877, "step": 7930 }, { "epoch": 4.43072625698324, "grad_norm": 0.403576135635376, "learning_rate": 0.0007808403361344538, "loss": 0.476, "step": 7931 }, { "epoch": 4.431284916201117, "grad_norm": 5.940262317657471, "learning_rate": 0.000780812324929972, "loss": 0.3683, "step": 7932 }, { "epoch": 4.431843575418995, "grad_norm": 0.41928836703300476, "learning_rate": 0.0007807843137254902, "loss": 0.4171, "step": 7933 }, { "epoch": 4.432402234636871, "grad_norm": 0.5359971523284912, "learning_rate": 0.0007807563025210084, "loss": 0.4765, "step": 7934 }, { "epoch": 4.432960893854749, "grad_norm": 0.44485288858413696, "learning_rate": 0.0007807282913165266, "loss": 0.4109, "step": 7935 }, { "epoch": 4.4335195530726255, "grad_norm": 1.1097968816757202, "learning_rate": 0.0007807002801120448, "loss": 0.4427, "step": 7936 }, { "epoch": 4.434078212290503, "grad_norm": 0.44607189297676086, "learning_rate": 0.000780672268907563, "loss": 0.4237, "step": 7937 }, { "epoch": 4.4346368715083795, "grad_norm": 0.8777729272842407, "learning_rate": 0.0007806442577030812, "loss": 0.4944, "step": 7938 }, { "epoch": 4.435195530726257, "grad_norm": 1.0470962524414062, "learning_rate": 0.0007806162464985995, "loss": 0.4583, "step": 7939 }, { "epoch": 4.435754189944134, "grad_norm": 0.6383278965950012, "learning_rate": 0.0007805882352941177, "loss": 0.4549, "step": 7940 }, { "epoch": 4.436312849162011, "grad_norm": 0.8835601806640625, "learning_rate": 0.0007805602240896359, "loss": 0.4746, "step": 7941 }, { "epoch": 4.436871508379888, "grad_norm": 0.49641773104667664, "learning_rate": 0.0007805322128851541, "loss": 0.451, "step": 7942 }, { "epoch": 4.437430167597765, "grad_norm": 0.529076337814331, "learning_rate": 0.0007805042016806723, "loss": 0.4666, "step": 7943 }, { "epoch": 4.437988826815642, "grad_norm": 0.3934054672718048, "learning_rate": 0.0007804761904761905, "loss": 0.3835, "step": 7944 }, { "epoch": 4.43854748603352, "grad_norm": 2.3144543170928955, "learning_rate": 0.0007804481792717088, "loss": 0.5169, "step": 7945 }, { "epoch": 4.4391061452513965, "grad_norm": 0.6031854152679443, "learning_rate": 0.0007804201680672269, "loss": 0.4107, "step": 7946 }, { "epoch": 4.439664804469274, "grad_norm": 0.43561676144599915, "learning_rate": 0.0007803921568627451, "loss": 0.4557, "step": 7947 }, { "epoch": 4.4402234636871505, "grad_norm": 0.45999065041542053, "learning_rate": 0.0007803641456582633, "loss": 0.4173, "step": 7948 }, { "epoch": 4.440782122905028, "grad_norm": 0.4490691125392914, "learning_rate": 0.0007803361344537815, "loss": 0.4402, "step": 7949 }, { "epoch": 4.441340782122905, "grad_norm": 0.5090757608413696, "learning_rate": 0.0007803081232492998, "loss": 0.3995, "step": 7950 }, { "epoch": 4.441899441340782, "grad_norm": 4.465834617614746, "learning_rate": 0.0007802801120448179, "loss": 0.3529, "step": 7951 }, { "epoch": 4.442458100558659, "grad_norm": 0.6415351629257202, "learning_rate": 0.0007802521008403361, "loss": 0.4844, "step": 7952 }, { "epoch": 4.443016759776536, "grad_norm": 0.479928582906723, "learning_rate": 0.0007802240896358543, "loss": 0.4223, "step": 7953 }, { "epoch": 4.443575418994413, "grad_norm": 0.5131279230117798, "learning_rate": 0.0007801960784313725, "loss": 0.5309, "step": 7954 }, { "epoch": 4.444134078212291, "grad_norm": 0.8900812864303589, "learning_rate": 0.0007801680672268909, "loss": 0.6127, "step": 7955 }, { "epoch": 4.4446927374301675, "grad_norm": 1.5215139389038086, "learning_rate": 0.000780140056022409, "loss": 0.4101, "step": 7956 }, { "epoch": 4.445251396648045, "grad_norm": 0.4844955503940582, "learning_rate": 0.0007801120448179272, "loss": 0.4045, "step": 7957 }, { "epoch": 4.4458100558659215, "grad_norm": 0.5265480875968933, "learning_rate": 0.0007800840336134454, "loss": 0.4807, "step": 7958 }, { "epoch": 4.446368715083799, "grad_norm": 0.8821099400520325, "learning_rate": 0.0007800560224089636, "loss": 0.4003, "step": 7959 }, { "epoch": 4.446927374301676, "grad_norm": 0.5669199228286743, "learning_rate": 0.0007800280112044819, "loss": 0.4863, "step": 7960 }, { "epoch": 4.447486033519553, "grad_norm": 0.5125001072883606, "learning_rate": 0.0007800000000000001, "loss": 0.3645, "step": 7961 }, { "epoch": 4.44804469273743, "grad_norm": 0.48797422647476196, "learning_rate": 0.0007799719887955182, "loss": 0.462, "step": 7962 }, { "epoch": 4.448603351955307, "grad_norm": 0.6037314534187317, "learning_rate": 0.0007799439775910364, "loss": 0.4518, "step": 7963 }, { "epoch": 4.449162011173184, "grad_norm": 0.609241247177124, "learning_rate": 0.0007799159663865546, "loss": 0.355, "step": 7964 }, { "epoch": 4.449720670391061, "grad_norm": 0.717847466468811, "learning_rate": 0.0007798879551820729, "loss": 0.5084, "step": 7965 }, { "epoch": 4.4502793296089385, "grad_norm": 0.4940887987613678, "learning_rate": 0.0007798599439775911, "loss": 0.4814, "step": 7966 }, { "epoch": 4.450837988826816, "grad_norm": 0.7615885138511658, "learning_rate": 0.0007798319327731092, "loss": 0.442, "step": 7967 }, { "epoch": 4.4513966480446925, "grad_norm": 0.5444573760032654, "learning_rate": 0.0007798039215686274, "loss": 0.5411, "step": 7968 }, { "epoch": 4.45195530726257, "grad_norm": 1.0647783279418945, "learning_rate": 0.0007797759103641456, "loss": 0.4967, "step": 7969 }, { "epoch": 4.452513966480447, "grad_norm": 0.7675191164016724, "learning_rate": 0.000779747899159664, "loss": 0.3854, "step": 7970 }, { "epoch": 4.453072625698324, "grad_norm": 0.4078288972377777, "learning_rate": 0.0007797198879551822, "loss": 0.4345, "step": 7971 }, { "epoch": 4.453631284916201, "grad_norm": 0.5696030259132385, "learning_rate": 0.0007796918767507003, "loss": 0.41, "step": 7972 }, { "epoch": 4.454189944134078, "grad_norm": 2.22721791267395, "learning_rate": 0.0007796638655462185, "loss": 0.4101, "step": 7973 }, { "epoch": 4.454748603351955, "grad_norm": 0.5910645127296448, "learning_rate": 0.0007796358543417367, "loss": 0.4775, "step": 7974 }, { "epoch": 4.455307262569832, "grad_norm": 0.8175109624862671, "learning_rate": 0.000779607843137255, "loss": 0.4807, "step": 7975 }, { "epoch": 4.4558659217877095, "grad_norm": 0.5076733827590942, "learning_rate": 0.0007795798319327732, "loss": 0.5027, "step": 7976 }, { "epoch": 4.456424581005587, "grad_norm": 1.0723190307617188, "learning_rate": 0.0007795518207282914, "loss": 0.6256, "step": 7977 }, { "epoch": 4.4569832402234635, "grad_norm": 0.5672963857650757, "learning_rate": 0.0007795238095238095, "loss": 0.4953, "step": 7978 }, { "epoch": 4.457541899441341, "grad_norm": 1.0351548194885254, "learning_rate": 0.0007794957983193277, "loss": 0.504, "step": 7979 }, { "epoch": 4.4581005586592175, "grad_norm": 0.5324487090110779, "learning_rate": 0.000779467787114846, "loss": 0.4875, "step": 7980 }, { "epoch": 4.458659217877095, "grad_norm": 0.5606355667114258, "learning_rate": 0.0007794397759103642, "loss": 0.5029, "step": 7981 }, { "epoch": 4.459217877094972, "grad_norm": 0.5287914276123047, "learning_rate": 0.0007794117647058824, "loss": 0.4953, "step": 7982 }, { "epoch": 4.459776536312849, "grad_norm": 0.7274687886238098, "learning_rate": 0.0007793837535014005, "loss": 0.5171, "step": 7983 }, { "epoch": 4.460335195530726, "grad_norm": 1.0861328840255737, "learning_rate": 0.0007793557422969187, "loss": 0.4176, "step": 7984 }, { "epoch": 4.460893854748603, "grad_norm": 0.4057898223400116, "learning_rate": 0.000779327731092437, "loss": 0.3301, "step": 7985 }, { "epoch": 4.4614525139664805, "grad_norm": 0.43559736013412476, "learning_rate": 0.0007792997198879552, "loss": 0.4216, "step": 7986 }, { "epoch": 4.462011173184358, "grad_norm": 0.7434982657432556, "learning_rate": 0.0007792717086834735, "loss": 0.5302, "step": 7987 }, { "epoch": 4.4625698324022345, "grad_norm": 0.7655741572380066, "learning_rate": 0.0007792436974789915, "loss": 0.4237, "step": 7988 }, { "epoch": 4.463128491620112, "grad_norm": 0.5505525469779968, "learning_rate": 0.0007792156862745098, "loss": 0.4644, "step": 7989 }, { "epoch": 4.4636871508379885, "grad_norm": 0.45169299840927124, "learning_rate": 0.0007791876750700281, "loss": 0.3699, "step": 7990 }, { "epoch": 4.464245810055866, "grad_norm": 2.1300570964813232, "learning_rate": 0.0007791596638655463, "loss": 0.431, "step": 7991 }, { "epoch": 4.464804469273743, "grad_norm": 0.49451008439064026, "learning_rate": 0.0007791316526610645, "loss": 0.5059, "step": 7992 }, { "epoch": 4.46536312849162, "grad_norm": 0.6582427620887756, "learning_rate": 0.0007791036414565827, "loss": 0.5685, "step": 7993 }, { "epoch": 4.465921787709497, "grad_norm": 0.6332744359970093, "learning_rate": 0.0007790756302521008, "loss": 0.4735, "step": 7994 }, { "epoch": 4.466480446927374, "grad_norm": 0.5850993394851685, "learning_rate": 0.0007790476190476191, "loss": 0.424, "step": 7995 }, { "epoch": 4.4670391061452515, "grad_norm": 0.38231340050697327, "learning_rate": 0.0007790196078431373, "loss": 0.3643, "step": 7996 }, { "epoch": 4.467597765363129, "grad_norm": 0.5427983403205872, "learning_rate": 0.0007789915966386555, "loss": 0.522, "step": 7997 }, { "epoch": 4.4681564245810055, "grad_norm": 1.218829870223999, "learning_rate": 0.0007789635854341737, "loss": 0.4389, "step": 7998 }, { "epoch": 4.468715083798883, "grad_norm": 0.4756756126880646, "learning_rate": 0.0007789355742296918, "loss": 0.3267, "step": 7999 }, { "epoch": 4.4692737430167595, "grad_norm": 1.136558175086975, "learning_rate": 0.0007789075630252101, "loss": 0.4208, "step": 8000 }, { "epoch": 4.4692737430167595, "eval_cer": 0.09291021570488692, "eval_loss": 0.3547140657901764, "eval_runtime": 55.8888, "eval_samples_per_second": 81.197, "eval_steps_per_second": 5.082, "eval_wer": 0.36649814250998575, "step": 8000 }, { "epoch": 4.469832402234637, "grad_norm": 0.3521578907966614, "learning_rate": 0.0007788795518207283, "loss": 0.4491, "step": 8001 }, { "epoch": 4.4703910614525135, "grad_norm": 1.8238716125488281, "learning_rate": 0.0007788515406162465, "loss": 0.4901, "step": 8002 }, { "epoch": 4.470949720670391, "grad_norm": 0.5393496751785278, "learning_rate": 0.0007788235294117647, "loss": 0.5206, "step": 8003 }, { "epoch": 4.471508379888268, "grad_norm": 1.8020495176315308, "learning_rate": 0.0007787955182072828, "loss": 0.4162, "step": 8004 }, { "epoch": 4.472067039106145, "grad_norm": 0.6742940545082092, "learning_rate": 0.0007787675070028012, "loss": 0.3896, "step": 8005 }, { "epoch": 4.4726256983240225, "grad_norm": 0.39385151863098145, "learning_rate": 0.0007787394957983194, "loss": 0.3512, "step": 8006 }, { "epoch": 4.473184357541899, "grad_norm": 0.5696566700935364, "learning_rate": 0.0007787114845938376, "loss": 0.4743, "step": 8007 }, { "epoch": 4.4737430167597765, "grad_norm": 0.5739765167236328, "learning_rate": 0.0007786834733893558, "loss": 0.3647, "step": 8008 }, { "epoch": 4.474301675977654, "grad_norm": 0.6203718781471252, "learning_rate": 0.000778655462184874, "loss": 0.4498, "step": 8009 }, { "epoch": 4.4748603351955305, "grad_norm": 0.6317752599716187, "learning_rate": 0.0007786274509803922, "loss": 0.4509, "step": 8010 }, { "epoch": 4.475418994413408, "grad_norm": 0.660818874835968, "learning_rate": 0.0007785994397759104, "loss": 0.4748, "step": 8011 }, { "epoch": 4.4759776536312845, "grad_norm": 0.9094657301902771, "learning_rate": 0.0007785714285714286, "loss": 0.458, "step": 8012 }, { "epoch": 4.476536312849162, "grad_norm": 0.5310734510421753, "learning_rate": 0.0007785434173669468, "loss": 0.4594, "step": 8013 }, { "epoch": 4.477094972067039, "grad_norm": 0.6198703646659851, "learning_rate": 0.000778515406162465, "loss": 0.3527, "step": 8014 }, { "epoch": 4.477653631284916, "grad_norm": 0.43491727113723755, "learning_rate": 0.0007784873949579832, "loss": 0.4105, "step": 8015 }, { "epoch": 4.4782122905027935, "grad_norm": 0.5587567687034607, "learning_rate": 0.0007784593837535014, "loss": 0.451, "step": 8016 }, { "epoch": 4.47877094972067, "grad_norm": 0.5126293301582336, "learning_rate": 0.0007784313725490196, "loss": 0.4141, "step": 8017 }, { "epoch": 4.4793296089385475, "grad_norm": 0.43808814883232117, "learning_rate": 0.0007784033613445378, "loss": 0.4036, "step": 8018 }, { "epoch": 4.479888268156425, "grad_norm": 0.5458375811576843, "learning_rate": 0.000778375350140056, "loss": 0.3898, "step": 8019 }, { "epoch": 4.4804469273743015, "grad_norm": 0.3859800696372986, "learning_rate": 0.0007783473389355742, "loss": 0.3886, "step": 8020 }, { "epoch": 4.481005586592179, "grad_norm": 0.5757061839103699, "learning_rate": 0.0007783193277310925, "loss": 0.4473, "step": 8021 }, { "epoch": 4.4815642458100555, "grad_norm": 0.6346460580825806, "learning_rate": 0.0007782913165266107, "loss": 0.4926, "step": 8022 }, { "epoch": 4.482122905027933, "grad_norm": 0.40187501907348633, "learning_rate": 0.0007782633053221289, "loss": 0.3927, "step": 8023 }, { "epoch": 4.48268156424581, "grad_norm": 0.3430120348930359, "learning_rate": 0.0007782352941176471, "loss": 0.3219, "step": 8024 }, { "epoch": 4.483240223463687, "grad_norm": 1.0303021669387817, "learning_rate": 0.0007782072829131654, "loss": 0.5212, "step": 8025 }, { "epoch": 4.4837988826815645, "grad_norm": 0.706115186214447, "learning_rate": 0.0007781792717086835, "loss": 0.5569, "step": 8026 }, { "epoch": 4.484357541899441, "grad_norm": 0.8488256335258484, "learning_rate": 0.0007781512605042017, "loss": 0.3556, "step": 8027 }, { "epoch": 4.4849162011173185, "grad_norm": 0.3991093039512634, "learning_rate": 0.0007781232492997199, "loss": 0.5189, "step": 8028 }, { "epoch": 4.485474860335196, "grad_norm": 0.838813841342926, "learning_rate": 0.0007780952380952381, "loss": 0.5975, "step": 8029 }, { "epoch": 4.4860335195530725, "grad_norm": 0.697236955165863, "learning_rate": 0.0007780672268907563, "loss": 0.4672, "step": 8030 }, { "epoch": 4.48659217877095, "grad_norm": 0.4864196181297302, "learning_rate": 0.0007780392156862745, "loss": 0.3837, "step": 8031 }, { "epoch": 4.4871508379888265, "grad_norm": 0.3497955799102783, "learning_rate": 0.0007780112044817927, "loss": 0.4202, "step": 8032 }, { "epoch": 4.487709497206704, "grad_norm": 0.516545295715332, "learning_rate": 0.0007779831932773109, "loss": 0.4012, "step": 8033 }, { "epoch": 4.488268156424581, "grad_norm": 0.5043094158172607, "learning_rate": 0.0007779551820728291, "loss": 0.4695, "step": 8034 }, { "epoch": 4.488826815642458, "grad_norm": 1.0219391584396362, "learning_rate": 0.0007779271708683473, "loss": 0.4908, "step": 8035 }, { "epoch": 4.4893854748603355, "grad_norm": 0.6130095720291138, "learning_rate": 0.0007778991596638655, "loss": 0.4951, "step": 8036 }, { "epoch": 4.489944134078212, "grad_norm": 0.5657668709754944, "learning_rate": 0.0007778711484593837, "loss": 0.4722, "step": 8037 }, { "epoch": 4.4905027932960895, "grad_norm": 0.6289819478988647, "learning_rate": 0.000777843137254902, "loss": 0.5806, "step": 8038 }, { "epoch": 4.491061452513966, "grad_norm": 0.5132222175598145, "learning_rate": 0.0007778151260504202, "loss": 0.4775, "step": 8039 }, { "epoch": 4.4916201117318435, "grad_norm": 1.0361062288284302, "learning_rate": 0.0007777871148459384, "loss": 0.436, "step": 8040 }, { "epoch": 4.492178770949721, "grad_norm": 0.6603046655654907, "learning_rate": 0.0007777591036414567, "loss": 0.4416, "step": 8041 }, { "epoch": 4.4927374301675975, "grad_norm": 0.8079709410667419, "learning_rate": 0.0007777310924369748, "loss": 0.4466, "step": 8042 }, { "epoch": 4.493296089385475, "grad_norm": 0.7630321979522705, "learning_rate": 0.000777703081232493, "loss": 0.4332, "step": 8043 }, { "epoch": 4.4938547486033515, "grad_norm": 0.5348184704780579, "learning_rate": 0.0007776750700280112, "loss": 0.5214, "step": 8044 }, { "epoch": 4.494413407821229, "grad_norm": 0.6643372774124146, "learning_rate": 0.0007776470588235294, "loss": 0.5079, "step": 8045 }, { "epoch": 4.4949720670391065, "grad_norm": 0.44299614429473877, "learning_rate": 0.0007776190476190477, "loss": 0.5242, "step": 8046 }, { "epoch": 4.495530726256983, "grad_norm": 1.164122462272644, "learning_rate": 0.0007775910364145658, "loss": 0.5211, "step": 8047 }, { "epoch": 4.4960893854748605, "grad_norm": 0.5251310467720032, "learning_rate": 0.000777563025210084, "loss": 0.4982, "step": 8048 }, { "epoch": 4.496648044692737, "grad_norm": 0.6160804629325867, "learning_rate": 0.0007775350140056022, "loss": 0.5599, "step": 8049 }, { "epoch": 4.4972067039106145, "grad_norm": 0.49206408858299255, "learning_rate": 0.0007775070028011204, "loss": 0.5309, "step": 8050 }, { "epoch": 4.497765363128492, "grad_norm": 0.4938124418258667, "learning_rate": 0.0007774789915966387, "loss": 0.4639, "step": 8051 }, { "epoch": 4.4983240223463685, "grad_norm": 2.6455161571502686, "learning_rate": 0.0007774509803921568, "loss": 0.4613, "step": 8052 }, { "epoch": 4.498882681564246, "grad_norm": 0.5216096639633179, "learning_rate": 0.000777422969187675, "loss": 0.4416, "step": 8053 }, { "epoch": 4.4994413407821225, "grad_norm": 0.6239911913871765, "learning_rate": 0.0007773949579831933, "loss": 0.4668, "step": 8054 }, { "epoch": 4.5, "grad_norm": 0.900478184223175, "learning_rate": 0.0007773669467787115, "loss": 0.4795, "step": 8055 }, { "epoch": 4.5005586592178775, "grad_norm": 0.5670730471611023, "learning_rate": 0.0007773389355742298, "loss": 0.433, "step": 8056 }, { "epoch": 4.501117318435754, "grad_norm": 0.3907197415828705, "learning_rate": 0.000777310924369748, "loss": 0.4655, "step": 8057 }, { "epoch": 4.5016759776536315, "grad_norm": 0.9997380375862122, "learning_rate": 0.0007772829131652661, "loss": 0.447, "step": 8058 }, { "epoch": 4.502234636871508, "grad_norm": 1.165135383605957, "learning_rate": 0.0007772549019607843, "loss": 0.5339, "step": 8059 }, { "epoch": 4.5027932960893855, "grad_norm": 0.5992472767829895, "learning_rate": 0.0007772268907563025, "loss": 0.4652, "step": 8060 }, { "epoch": 4.503351955307263, "grad_norm": 0.6337040662765503, "learning_rate": 0.0007771988795518208, "loss": 0.509, "step": 8061 }, { "epoch": 4.5039106145251395, "grad_norm": 1.6205978393554688, "learning_rate": 0.000777170868347339, "loss": 0.3834, "step": 8062 }, { "epoch": 4.504469273743017, "grad_norm": 0.5395073294639587, "learning_rate": 0.0007771428571428571, "loss": 0.3847, "step": 8063 }, { "epoch": 4.5050279329608935, "grad_norm": 1.7489677667617798, "learning_rate": 0.0007771148459383753, "loss": 0.4477, "step": 8064 }, { "epoch": 4.505586592178771, "grad_norm": 0.648113489151001, "learning_rate": 0.0007770868347338935, "loss": 0.5817, "step": 8065 }, { "epoch": 4.506145251396648, "grad_norm": 0.6713204383850098, "learning_rate": 0.0007770588235294118, "loss": 0.4423, "step": 8066 }, { "epoch": 4.506703910614525, "grad_norm": 0.5897493362426758, "learning_rate": 0.00077703081232493, "loss": 0.553, "step": 8067 }, { "epoch": 4.5072625698324025, "grad_norm": 0.6195979714393616, "learning_rate": 0.0007770028011204481, "loss": 0.5004, "step": 8068 }, { "epoch": 4.507821229050279, "grad_norm": 0.613839864730835, "learning_rate": 0.0007769747899159663, "loss": 0.4714, "step": 8069 }, { "epoch": 4.5083798882681565, "grad_norm": 0.6737150549888611, "learning_rate": 0.0007769467787114845, "loss": 0.3804, "step": 8070 }, { "epoch": 4.508938547486034, "grad_norm": 0.5012085437774658, "learning_rate": 0.0007769187675070029, "loss": 0.5254, "step": 8071 }, { "epoch": 4.5094972067039105, "grad_norm": 0.6065489649772644, "learning_rate": 0.0007768907563025211, "loss": 0.5669, "step": 8072 }, { "epoch": 4.510055865921788, "grad_norm": 1.0205323696136475, "learning_rate": 0.0007768627450980393, "loss": 0.4508, "step": 8073 }, { "epoch": 4.5106145251396645, "grad_norm": 0.5331271886825562, "learning_rate": 0.0007768347338935574, "loss": 0.5316, "step": 8074 }, { "epoch": 4.511173184357542, "grad_norm": 0.6974080801010132, "learning_rate": 0.0007768067226890756, "loss": 0.4413, "step": 8075 }, { "epoch": 4.511731843575419, "grad_norm": 0.7939553260803223, "learning_rate": 0.0007767787114845939, "loss": 0.6073, "step": 8076 }, { "epoch": 4.512290502793296, "grad_norm": 0.5240582227706909, "learning_rate": 0.0007767507002801121, "loss": 0.4328, "step": 8077 }, { "epoch": 4.5128491620111735, "grad_norm": 0.8325620293617249, "learning_rate": 0.0007767226890756303, "loss": 0.5055, "step": 8078 }, { "epoch": 4.51340782122905, "grad_norm": 0.9341689944267273, "learning_rate": 0.0007766946778711484, "loss": 0.3695, "step": 8079 }, { "epoch": 4.5139664804469275, "grad_norm": 1.3748936653137207, "learning_rate": 0.0007766666666666666, "loss": 0.4082, "step": 8080 }, { "epoch": 4.514525139664805, "grad_norm": 0.618179440498352, "learning_rate": 0.0007766386554621849, "loss": 0.424, "step": 8081 }, { "epoch": 4.5150837988826815, "grad_norm": 0.9769489169120789, "learning_rate": 0.0007766106442577031, "loss": 0.554, "step": 8082 }, { "epoch": 4.515642458100559, "grad_norm": 0.5236263275146484, "learning_rate": 0.0007765826330532213, "loss": 0.5879, "step": 8083 }, { "epoch": 4.5162011173184355, "grad_norm": 0.588412344455719, "learning_rate": 0.0007765546218487394, "loss": 0.5008, "step": 8084 }, { "epoch": 4.516759776536313, "grad_norm": 0.6860974431037903, "learning_rate": 0.0007765266106442576, "loss": 0.5397, "step": 8085 }, { "epoch": 4.51731843575419, "grad_norm": 0.8217350244522095, "learning_rate": 0.000776498599439776, "loss": 0.5159, "step": 8086 }, { "epoch": 4.517877094972067, "grad_norm": 0.7609260678291321, "learning_rate": 0.0007764705882352942, "loss": 0.464, "step": 8087 }, { "epoch": 4.5184357541899445, "grad_norm": 0.8852810263633728, "learning_rate": 0.0007764425770308124, "loss": 0.4189, "step": 8088 }, { "epoch": 4.518994413407821, "grad_norm": 0.6375750303268433, "learning_rate": 0.0007764145658263306, "loss": 0.5251, "step": 8089 }, { "epoch": 4.5195530726256985, "grad_norm": 1.601333498954773, "learning_rate": 0.0007763865546218487, "loss": 0.4357, "step": 8090 }, { "epoch": 4.520111731843575, "grad_norm": 1.655070424079895, "learning_rate": 0.000776358543417367, "loss": 0.3949, "step": 8091 }, { "epoch": 4.5206703910614525, "grad_norm": 1.0199527740478516, "learning_rate": 0.0007763305322128852, "loss": 0.4465, "step": 8092 }, { "epoch": 4.52122905027933, "grad_norm": 2.242251396179199, "learning_rate": 0.0007763025210084034, "loss": 0.4464, "step": 8093 }, { "epoch": 4.5217877094972065, "grad_norm": 0.7333651781082153, "learning_rate": 0.0007762745098039216, "loss": 0.5593, "step": 8094 }, { "epoch": 4.522346368715084, "grad_norm": 2.2402536869049072, "learning_rate": 0.0007762464985994397, "loss": 0.4828, "step": 8095 }, { "epoch": 4.522905027932961, "grad_norm": 0.6529591679573059, "learning_rate": 0.000776218487394958, "loss": 0.4659, "step": 8096 }, { "epoch": 4.523463687150838, "grad_norm": 1.3216164112091064, "learning_rate": 0.0007761904761904762, "loss": 0.3864, "step": 8097 }, { "epoch": 4.5240223463687155, "grad_norm": 0.8586426973342896, "learning_rate": 0.0007761624649859944, "loss": 0.4498, "step": 8098 }, { "epoch": 4.524581005586592, "grad_norm": 0.5126912593841553, "learning_rate": 0.0007761344537815126, "loss": 0.3494, "step": 8099 }, { "epoch": 4.5251396648044695, "grad_norm": 0.6441032290458679, "learning_rate": 0.0007761064425770307, "loss": 0.434, "step": 8100 }, { "epoch": 4.525698324022346, "grad_norm": 0.4300846457481384, "learning_rate": 0.000776078431372549, "loss": 0.414, "step": 8101 }, { "epoch": 4.5262569832402235, "grad_norm": 2.3352677822113037, "learning_rate": 0.0007760504201680672, "loss": 0.4082, "step": 8102 }, { "epoch": 4.5268156424581, "grad_norm": 0.7502132058143616, "learning_rate": 0.0007760224089635855, "loss": 0.557, "step": 8103 }, { "epoch": 4.5273743016759775, "grad_norm": 0.8537489175796509, "learning_rate": 0.0007759943977591037, "loss": 0.4884, "step": 8104 }, { "epoch": 4.527932960893855, "grad_norm": 0.59389328956604, "learning_rate": 0.0007759663865546219, "loss": 0.5073, "step": 8105 }, { "epoch": 4.528491620111732, "grad_norm": 0.8228345513343811, "learning_rate": 0.0007759383753501401, "loss": 0.4008, "step": 8106 }, { "epoch": 4.529050279329609, "grad_norm": 0.6935243606567383, "learning_rate": 0.0007759103641456583, "loss": 0.6024, "step": 8107 }, { "epoch": 4.5296089385474865, "grad_norm": 0.5760891437530518, "learning_rate": 0.0007758823529411765, "loss": 0.5419, "step": 8108 }, { "epoch": 4.530167597765363, "grad_norm": 1.1517388820648193, "learning_rate": 0.0007758543417366947, "loss": 0.4219, "step": 8109 }, { "epoch": 4.5307262569832405, "grad_norm": 1.5431220531463623, "learning_rate": 0.0007758263305322129, "loss": 0.4962, "step": 8110 }, { "epoch": 4.531284916201117, "grad_norm": 0.6908110976219177, "learning_rate": 0.0007757983193277311, "loss": 0.4801, "step": 8111 }, { "epoch": 4.5318435754189945, "grad_norm": 0.872285008430481, "learning_rate": 0.0007757703081232493, "loss": 0.4446, "step": 8112 }, { "epoch": 4.532402234636871, "grad_norm": 0.66185462474823, "learning_rate": 0.0007757422969187675, "loss": 0.4027, "step": 8113 }, { "epoch": 4.5329608938547485, "grad_norm": 0.5269913673400879, "learning_rate": 0.0007757142857142857, "loss": 0.4448, "step": 8114 }, { "epoch": 4.533519553072626, "grad_norm": 0.3996785879135132, "learning_rate": 0.0007756862745098039, "loss": 0.411, "step": 8115 }, { "epoch": 4.534078212290503, "grad_norm": 0.9295637011528015, "learning_rate": 0.0007756582633053222, "loss": 0.5494, "step": 8116 }, { "epoch": 4.53463687150838, "grad_norm": 0.7121503353118896, "learning_rate": 0.0007756302521008403, "loss": 0.6011, "step": 8117 }, { "epoch": 4.5351955307262575, "grad_norm": 0.4291536808013916, "learning_rate": 0.0007756022408963585, "loss": 0.4044, "step": 8118 }, { "epoch": 4.535754189944134, "grad_norm": 0.7450488209724426, "learning_rate": 0.0007755742296918767, "loss": 0.4788, "step": 8119 }, { "epoch": 4.5363128491620115, "grad_norm": 0.8254243731498718, "learning_rate": 0.000775546218487395, "loss": 0.5847, "step": 8120 }, { "epoch": 4.536871508379888, "grad_norm": 0.6319572925567627, "learning_rate": 0.0007755182072829133, "loss": 0.5136, "step": 8121 }, { "epoch": 4.5374301675977655, "grad_norm": 0.5139955282211304, "learning_rate": 0.0007754901960784314, "loss": 0.4766, "step": 8122 }, { "epoch": 4.537988826815642, "grad_norm": 0.66304612159729, "learning_rate": 0.0007754621848739496, "loss": 0.3934, "step": 8123 }, { "epoch": 4.5385474860335195, "grad_norm": 0.9044004678726196, "learning_rate": 0.0007754341736694678, "loss": 0.4549, "step": 8124 }, { "epoch": 4.539106145251397, "grad_norm": 0.5326232314109802, "learning_rate": 0.000775406162464986, "loss": 0.4392, "step": 8125 }, { "epoch": 4.539664804469274, "grad_norm": 1.0321842432022095, "learning_rate": 0.0007753781512605043, "loss": 0.4118, "step": 8126 }, { "epoch": 4.540223463687151, "grad_norm": 0.6323887705802917, "learning_rate": 0.0007753501400560224, "loss": 0.394, "step": 8127 }, { "epoch": 4.540782122905028, "grad_norm": 0.7320694923400879, "learning_rate": 0.0007753221288515406, "loss": 0.4132, "step": 8128 }, { "epoch": 4.541340782122905, "grad_norm": 0.5122719407081604, "learning_rate": 0.0007752941176470588, "loss": 0.4651, "step": 8129 }, { "epoch": 4.5418994413407825, "grad_norm": 0.5361186265945435, "learning_rate": 0.000775266106442577, "loss": 0.3921, "step": 8130 }, { "epoch": 4.542458100558659, "grad_norm": 0.7645115256309509, "learning_rate": 0.0007752380952380953, "loss": 0.6572, "step": 8131 }, { "epoch": 4.5430167597765365, "grad_norm": 0.5335005521774292, "learning_rate": 0.0007752100840336135, "loss": 0.4962, "step": 8132 }, { "epoch": 4.543575418994413, "grad_norm": 0.6003976464271545, "learning_rate": 0.0007751820728291316, "loss": 0.5033, "step": 8133 }, { "epoch": 4.5441340782122905, "grad_norm": 0.5694627165794373, "learning_rate": 0.0007751540616246498, "loss": 0.439, "step": 8134 }, { "epoch": 4.544692737430168, "grad_norm": 0.5356988310813904, "learning_rate": 0.000775126050420168, "loss": 0.3436, "step": 8135 }, { "epoch": 4.545251396648045, "grad_norm": 0.5364171862602234, "learning_rate": 0.0007750980392156864, "loss": 0.5807, "step": 8136 }, { "epoch": 4.545810055865922, "grad_norm": 0.528247594833374, "learning_rate": 0.0007750700280112046, "loss": 0.5342, "step": 8137 }, { "epoch": 4.546368715083799, "grad_norm": 0.5301976799964905, "learning_rate": 0.0007750420168067227, "loss": 0.5402, "step": 8138 }, { "epoch": 4.546927374301676, "grad_norm": 0.6062915921211243, "learning_rate": 0.0007750140056022409, "loss": 0.6306, "step": 8139 }, { "epoch": 4.547486033519553, "grad_norm": 0.6419862508773804, "learning_rate": 0.0007749859943977591, "loss": 0.438, "step": 8140 }, { "epoch": 4.54804469273743, "grad_norm": 0.4715009033679962, "learning_rate": 0.0007749579831932774, "loss": 0.4482, "step": 8141 }, { "epoch": 4.5486033519553075, "grad_norm": 0.8993266224861145, "learning_rate": 0.0007749299719887956, "loss": 0.4836, "step": 8142 }, { "epoch": 4.549162011173184, "grad_norm": 0.4348084330558777, "learning_rate": 0.0007749019607843137, "loss": 0.4708, "step": 8143 }, { "epoch": 4.5497206703910615, "grad_norm": 0.4123710095882416, "learning_rate": 0.0007748739495798319, "loss": 0.3794, "step": 8144 }, { "epoch": 4.550279329608939, "grad_norm": 0.5706660151481628, "learning_rate": 0.0007748459383753501, "loss": 0.5639, "step": 8145 }, { "epoch": 4.550837988826816, "grad_norm": 0.7596791386604309, "learning_rate": 0.0007748179271708684, "loss": 0.471, "step": 8146 }, { "epoch": 4.551396648044693, "grad_norm": 0.4690873324871063, "learning_rate": 0.0007747899159663866, "loss": 0.5288, "step": 8147 }, { "epoch": 4.55195530726257, "grad_norm": 1.1551265716552734, "learning_rate": 0.0007747619047619048, "loss": 0.5019, "step": 8148 }, { "epoch": 4.552513966480447, "grad_norm": 0.41823726892471313, "learning_rate": 0.0007747338935574229, "loss": 0.3941, "step": 8149 }, { "epoch": 4.553072625698324, "grad_norm": 0.6825693249702454, "learning_rate": 0.0007747058823529411, "loss": 0.5645, "step": 8150 }, { "epoch": 4.553631284916201, "grad_norm": 0.46231648325920105, "learning_rate": 0.0007746778711484595, "loss": 0.3865, "step": 8151 }, { "epoch": 4.5541899441340785, "grad_norm": 0.5623779296875, "learning_rate": 0.0007746498599439777, "loss": 0.5154, "step": 8152 }, { "epoch": 4.554748603351955, "grad_norm": 0.4641118347644806, "learning_rate": 0.0007746218487394959, "loss": 0.4932, "step": 8153 }, { "epoch": 4.5553072625698325, "grad_norm": 0.699112057685852, "learning_rate": 0.000774593837535014, "loss": 0.5723, "step": 8154 }, { "epoch": 4.55586592178771, "grad_norm": 0.7715858221054077, "learning_rate": 0.0007745658263305322, "loss": 0.4913, "step": 8155 }, { "epoch": 4.556424581005587, "grad_norm": 0.8410941362380981, "learning_rate": 0.0007745378151260505, "loss": 0.3757, "step": 8156 }, { "epoch": 4.556983240223464, "grad_norm": 0.4754253625869751, "learning_rate": 0.0007745098039215687, "loss": 0.3946, "step": 8157 }, { "epoch": 4.557541899441341, "grad_norm": 2.006784439086914, "learning_rate": 0.0007744817927170869, "loss": 0.5234, "step": 8158 }, { "epoch": 4.558100558659218, "grad_norm": 0.6154943704605103, "learning_rate": 0.000774453781512605, "loss": 0.4225, "step": 8159 }, { "epoch": 4.558659217877095, "grad_norm": 0.6050153374671936, "learning_rate": 0.0007744257703081232, "loss": 0.5755, "step": 8160 }, { "epoch": 4.559217877094972, "grad_norm": 8.078617095947266, "learning_rate": 0.0007743977591036415, "loss": 0.5154, "step": 8161 }, { "epoch": 4.5597765363128495, "grad_norm": 1.2790827751159668, "learning_rate": 0.0007743697478991597, "loss": 0.4278, "step": 8162 }, { "epoch": 4.560335195530726, "grad_norm": 0.8261299729347229, "learning_rate": 0.0007743417366946779, "loss": 0.3154, "step": 8163 }, { "epoch": 4.5608938547486035, "grad_norm": 1.0802264213562012, "learning_rate": 0.0007743137254901961, "loss": 0.5002, "step": 8164 }, { "epoch": 4.56145251396648, "grad_norm": 0.6667917370796204, "learning_rate": 0.0007742857142857142, "loss": 0.5824, "step": 8165 }, { "epoch": 4.562011173184358, "grad_norm": 0.6015957593917847, "learning_rate": 0.0007742577030812325, "loss": 0.6011, "step": 8166 }, { "epoch": 4.562569832402235, "grad_norm": 0.770133912563324, "learning_rate": 0.0007742296918767507, "loss": 0.5135, "step": 8167 }, { "epoch": 4.563128491620112, "grad_norm": 0.7474347352981567, "learning_rate": 0.000774201680672269, "loss": 0.4336, "step": 8168 }, { "epoch": 4.563687150837989, "grad_norm": 0.7899701595306396, "learning_rate": 0.0007741736694677872, "loss": 0.4852, "step": 8169 }, { "epoch": 4.564245810055866, "grad_norm": 0.678300142288208, "learning_rate": 0.0007741456582633053, "loss": 0.4494, "step": 8170 }, { "epoch": 4.564804469273743, "grad_norm": 0.6663585901260376, "learning_rate": 0.0007741176470588236, "loss": 0.4997, "step": 8171 }, { "epoch": 4.5653631284916205, "grad_norm": 2.21020770072937, "learning_rate": 0.0007740896358543418, "loss": 0.4465, "step": 8172 }, { "epoch": 4.565921787709497, "grad_norm": 0.6665765047073364, "learning_rate": 0.00077406162464986, "loss": 0.4638, "step": 8173 }, { "epoch": 4.5664804469273745, "grad_norm": 0.5810917615890503, "learning_rate": 0.0007740336134453782, "loss": 0.423, "step": 8174 }, { "epoch": 4.567039106145251, "grad_norm": 5.412701606750488, "learning_rate": 0.0007740056022408963, "loss": 0.4295, "step": 8175 }, { "epoch": 4.567597765363129, "grad_norm": 0.7749119997024536, "learning_rate": 0.0007739775910364146, "loss": 0.4758, "step": 8176 }, { "epoch": 4.568156424581005, "grad_norm": 0.5137836337089539, "learning_rate": 0.0007739495798319328, "loss": 0.3993, "step": 8177 }, { "epoch": 4.568715083798883, "grad_norm": 0.7287721037864685, "learning_rate": 0.000773921568627451, "loss": 0.4921, "step": 8178 }, { "epoch": 4.56927374301676, "grad_norm": 0.4986781179904938, "learning_rate": 0.0007738935574229692, "loss": 0.4413, "step": 8179 }, { "epoch": 4.569832402234637, "grad_norm": 0.6719263195991516, "learning_rate": 0.0007738655462184874, "loss": 0.3405, "step": 8180 }, { "epoch": 4.570391061452514, "grad_norm": 0.7535628080368042, "learning_rate": 0.0007738375350140056, "loss": 0.5292, "step": 8181 }, { "epoch": 4.5709497206703915, "grad_norm": 0.5775241851806641, "learning_rate": 0.0007738095238095238, "loss": 0.5891, "step": 8182 }, { "epoch": 4.571508379888268, "grad_norm": 0.48770424723625183, "learning_rate": 0.000773781512605042, "loss": 0.3605, "step": 8183 }, { "epoch": 4.5720670391061455, "grad_norm": 0.6410732269287109, "learning_rate": 0.0007737535014005602, "loss": 0.5055, "step": 8184 }, { "epoch": 4.572625698324022, "grad_norm": 0.6988441944122314, "learning_rate": 0.0007737254901960785, "loss": 0.5601, "step": 8185 }, { "epoch": 4.5731843575419, "grad_norm": 13.589212417602539, "learning_rate": 0.0007736974789915967, "loss": 0.4929, "step": 8186 }, { "epoch": 4.573743016759776, "grad_norm": 0.521863579750061, "learning_rate": 0.0007736694677871149, "loss": 0.3959, "step": 8187 }, { "epoch": 4.574301675977654, "grad_norm": 0.5403456687927246, "learning_rate": 0.0007736414565826331, "loss": 0.4072, "step": 8188 }, { "epoch": 4.574860335195531, "grad_norm": 0.4856480658054352, "learning_rate": 0.0007736134453781513, "loss": 0.3716, "step": 8189 }, { "epoch": 4.575418994413408, "grad_norm": 0.6969072222709656, "learning_rate": 0.0007735854341736695, "loss": 0.5043, "step": 8190 }, { "epoch": 4.575977653631285, "grad_norm": 0.40378105640411377, "learning_rate": 0.0007735574229691877, "loss": 0.432, "step": 8191 }, { "epoch": 4.576536312849162, "grad_norm": 0.4986751973628998, "learning_rate": 0.0007735294117647059, "loss": 0.4641, "step": 8192 }, { "epoch": 4.577094972067039, "grad_norm": 0.6272121071815491, "learning_rate": 0.0007735014005602241, "loss": 0.4402, "step": 8193 }, { "epoch": 4.5776536312849165, "grad_norm": 0.34092170000076294, "learning_rate": 0.0007734733893557423, "loss": 0.3474, "step": 8194 }, { "epoch": 4.578212290502793, "grad_norm": 1.0378425121307373, "learning_rate": 0.0007734453781512605, "loss": 0.7377, "step": 8195 }, { "epoch": 4.578770949720671, "grad_norm": 0.49337831139564514, "learning_rate": 0.0007734173669467788, "loss": 0.4425, "step": 8196 }, { "epoch": 4.579329608938547, "grad_norm": 0.47799861431121826, "learning_rate": 0.0007733893557422969, "loss": 0.5455, "step": 8197 }, { "epoch": 4.579888268156425, "grad_norm": 0.6665588617324829, "learning_rate": 0.0007733613445378151, "loss": 0.4893, "step": 8198 }, { "epoch": 4.580446927374302, "grad_norm": 0.4508746862411499, "learning_rate": 0.0007733333333333333, "loss": 0.3744, "step": 8199 }, { "epoch": 4.581005586592179, "grad_norm": 0.5219548940658569, "learning_rate": 0.0007733053221288515, "loss": 0.435, "step": 8200 }, { "epoch": 4.581564245810056, "grad_norm": 2.0675745010375977, "learning_rate": 0.0007732773109243699, "loss": 0.5323, "step": 8201 }, { "epoch": 4.582122905027933, "grad_norm": 0.6970293521881104, "learning_rate": 0.000773249299719888, "loss": 0.4049, "step": 8202 }, { "epoch": 4.58268156424581, "grad_norm": 0.5580114126205444, "learning_rate": 0.0007732212885154062, "loss": 0.4942, "step": 8203 }, { "epoch": 4.5832402234636875, "grad_norm": 0.5304311513900757, "learning_rate": 0.0007731932773109244, "loss": 0.4727, "step": 8204 }, { "epoch": 4.583798882681564, "grad_norm": 0.7618182897567749, "learning_rate": 0.0007731652661064426, "loss": 0.4719, "step": 8205 }, { "epoch": 4.584357541899442, "grad_norm": 0.4298645555973053, "learning_rate": 0.0007731372549019609, "loss": 0.501, "step": 8206 }, { "epoch": 4.584916201117318, "grad_norm": 0.5802479386329651, "learning_rate": 0.000773109243697479, "loss": 0.4233, "step": 8207 }, { "epoch": 4.585474860335196, "grad_norm": 0.5593145489692688, "learning_rate": 0.0007730812324929972, "loss": 0.4219, "step": 8208 }, { "epoch": 4.586033519553073, "grad_norm": 0.4342997968196869, "learning_rate": 0.0007730532212885154, "loss": 0.4518, "step": 8209 }, { "epoch": 4.58659217877095, "grad_norm": 0.5315520763397217, "learning_rate": 0.0007730252100840336, "loss": 0.4455, "step": 8210 }, { "epoch": 4.587150837988827, "grad_norm": 0.6056346297264099, "learning_rate": 0.0007729971988795519, "loss": 0.3567, "step": 8211 }, { "epoch": 4.587709497206704, "grad_norm": 0.4693072438240051, "learning_rate": 0.0007729691876750701, "loss": 0.4232, "step": 8212 }, { "epoch": 4.588268156424581, "grad_norm": 2.016322612762451, "learning_rate": 0.0007729411764705882, "loss": 0.4331, "step": 8213 }, { "epoch": 4.588826815642458, "grad_norm": 0.9382308125495911, "learning_rate": 0.0007729131652661064, "loss": 0.5103, "step": 8214 }, { "epoch": 4.589385474860335, "grad_norm": 0.7534018754959106, "learning_rate": 0.0007728851540616246, "loss": 0.4191, "step": 8215 }, { "epoch": 4.589944134078213, "grad_norm": 0.6166900992393494, "learning_rate": 0.000772857142857143, "loss": 0.6275, "step": 8216 }, { "epoch": 4.590502793296089, "grad_norm": 1.0928400754928589, "learning_rate": 0.0007728291316526612, "loss": 0.4504, "step": 8217 }, { "epoch": 4.591061452513967, "grad_norm": 0.5267983078956604, "learning_rate": 0.0007728011204481793, "loss": 0.4287, "step": 8218 }, { "epoch": 4.591620111731844, "grad_norm": 1.4918850660324097, "learning_rate": 0.0007727731092436975, "loss": 0.4322, "step": 8219 }, { "epoch": 4.592178770949721, "grad_norm": 0.5473289489746094, "learning_rate": 0.0007727450980392157, "loss": 0.4785, "step": 8220 }, { "epoch": 4.592737430167598, "grad_norm": 0.4721951186656952, "learning_rate": 0.000772717086834734, "loss": 0.4394, "step": 8221 }, { "epoch": 4.593296089385475, "grad_norm": 0.521327793598175, "learning_rate": 0.0007726890756302522, "loss": 0.6071, "step": 8222 }, { "epoch": 4.593854748603352, "grad_norm": 0.6525221467018127, "learning_rate": 0.0007726610644257703, "loss": 0.4779, "step": 8223 }, { "epoch": 4.594413407821229, "grad_norm": 0.5691216588020325, "learning_rate": 0.0007726330532212885, "loss": 0.5468, "step": 8224 }, { "epoch": 4.594972067039106, "grad_norm": 0.428978830575943, "learning_rate": 0.0007726050420168067, "loss": 0.4188, "step": 8225 }, { "epoch": 4.5955307262569836, "grad_norm": 0.4750875234603882, "learning_rate": 0.000772577030812325, "loss": 0.4126, "step": 8226 }, { "epoch": 4.59608938547486, "grad_norm": 0.47138598561286926, "learning_rate": 0.0007725490196078432, "loss": 0.4608, "step": 8227 }, { "epoch": 4.596648044692738, "grad_norm": 2.1292762756347656, "learning_rate": 0.0007725210084033614, "loss": 0.4649, "step": 8228 }, { "epoch": 4.597206703910614, "grad_norm": 0.5666723251342773, "learning_rate": 0.0007724929971988795, "loss": 0.5721, "step": 8229 }, { "epoch": 4.597765363128492, "grad_norm": 0.6902828812599182, "learning_rate": 0.0007724649859943977, "loss": 0.3883, "step": 8230 }, { "epoch": 4.598324022346369, "grad_norm": 0.5086633563041687, "learning_rate": 0.000772436974789916, "loss": 0.4883, "step": 8231 }, { "epoch": 4.598882681564246, "grad_norm": 0.4486280381679535, "learning_rate": 0.0007724089635854342, "loss": 0.3923, "step": 8232 }, { "epoch": 4.599441340782123, "grad_norm": 0.5192384719848633, "learning_rate": 0.0007723809523809525, "loss": 0.5095, "step": 8233 }, { "epoch": 4.6, "grad_norm": 0.6304391026496887, "learning_rate": 0.0007723529411764705, "loss": 0.3598, "step": 8234 }, { "epoch": 4.600558659217877, "grad_norm": 0.45914217829704285, "learning_rate": 0.0007723249299719888, "loss": 0.5354, "step": 8235 }, { "epoch": 4.6011173184357546, "grad_norm": 0.5513746738433838, "learning_rate": 0.0007722969187675071, "loss": 0.4755, "step": 8236 }, { "epoch": 4.601675977653631, "grad_norm": 0.4337019622325897, "learning_rate": 0.0007722689075630253, "loss": 0.4217, "step": 8237 }, { "epoch": 4.602234636871509, "grad_norm": 0.4756260812282562, "learning_rate": 0.0007722408963585435, "loss": 0.4939, "step": 8238 }, { "epoch": 4.602793296089385, "grad_norm": 6.45401668548584, "learning_rate": 0.0007722128851540616, "loss": 0.438, "step": 8239 }, { "epoch": 4.603351955307263, "grad_norm": 0.9214911460876465, "learning_rate": 0.0007721848739495798, "loss": 0.5228, "step": 8240 }, { "epoch": 4.603910614525139, "grad_norm": 0.5213215351104736, "learning_rate": 0.0007721568627450981, "loss": 0.4902, "step": 8241 }, { "epoch": 4.604469273743017, "grad_norm": 1.2350841760635376, "learning_rate": 0.0007721288515406163, "loss": 0.4247, "step": 8242 }, { "epoch": 4.605027932960894, "grad_norm": 0.5343257784843445, "learning_rate": 0.0007721008403361345, "loss": 0.5808, "step": 8243 }, { "epoch": 4.605586592178771, "grad_norm": 1.4936286211013794, "learning_rate": 0.0007720728291316527, "loss": 0.6011, "step": 8244 }, { "epoch": 4.606145251396648, "grad_norm": 0.5499721765518188, "learning_rate": 0.0007720448179271708, "loss": 0.4669, "step": 8245 }, { "epoch": 4.6067039106145256, "grad_norm": 0.7632619142532349, "learning_rate": 0.0007720168067226891, "loss": 0.4634, "step": 8246 }, { "epoch": 4.607262569832402, "grad_norm": 0.7666616439819336, "learning_rate": 0.0007719887955182073, "loss": 0.5135, "step": 8247 }, { "epoch": 4.60782122905028, "grad_norm": 0.5069367289543152, "learning_rate": 0.0007719607843137255, "loss": 0.4172, "step": 8248 }, { "epoch": 4.608379888268156, "grad_norm": 0.41482672095298767, "learning_rate": 0.0007719327731092437, "loss": 0.5592, "step": 8249 }, { "epoch": 4.608938547486034, "grad_norm": 0.47095632553100586, "learning_rate": 0.0007719047619047618, "loss": 0.3942, "step": 8250 }, { "epoch": 4.60949720670391, "grad_norm": 0.6012982726097107, "learning_rate": 0.0007718767507002802, "loss": 0.3931, "step": 8251 }, { "epoch": 4.610055865921788, "grad_norm": 0.8348149657249451, "learning_rate": 0.0007718487394957984, "loss": 0.4903, "step": 8252 }, { "epoch": 4.610614525139665, "grad_norm": 1.2411580085754395, "learning_rate": 0.0007718207282913166, "loss": 0.4595, "step": 8253 }, { "epoch": 4.611173184357542, "grad_norm": 0.6678096055984497, "learning_rate": 0.0007717927170868348, "loss": 0.5467, "step": 8254 }, { "epoch": 4.611731843575419, "grad_norm": 0.6354730129241943, "learning_rate": 0.0007717647058823529, "loss": 0.4105, "step": 8255 }, { "epoch": 4.6122905027932966, "grad_norm": 1.0603047609329224, "learning_rate": 0.0007717366946778711, "loss": 0.6296, "step": 8256 }, { "epoch": 4.612849162011173, "grad_norm": 0.6615439057350159, "learning_rate": 0.0007717086834733894, "loss": 0.5102, "step": 8257 }, { "epoch": 4.613407821229051, "grad_norm": 0.5365902185440063, "learning_rate": 0.0007716806722689076, "loss": 0.44, "step": 8258 }, { "epoch": 4.613966480446927, "grad_norm": 1.118316888809204, "learning_rate": 0.0007716526610644258, "loss": 0.5129, "step": 8259 }, { "epoch": 4.614525139664805, "grad_norm": 0.3882056772708893, "learning_rate": 0.000771624649859944, "loss": 0.3273, "step": 8260 }, { "epoch": 4.615083798882681, "grad_norm": 1.1657724380493164, "learning_rate": 0.0007715966386554621, "loss": 0.3995, "step": 8261 }, { "epoch": 4.615642458100559, "grad_norm": 0.5781629681587219, "learning_rate": 0.0007715686274509804, "loss": 0.4789, "step": 8262 }, { "epoch": 4.616201117318436, "grad_norm": 4.094382286071777, "learning_rate": 0.0007715406162464986, "loss": 0.4214, "step": 8263 }, { "epoch": 4.616759776536313, "grad_norm": 0.7160081267356873, "learning_rate": 0.0007715126050420168, "loss": 0.428, "step": 8264 }, { "epoch": 4.61731843575419, "grad_norm": 0.920839786529541, "learning_rate": 0.000771484593837535, "loss": 0.4489, "step": 8265 }, { "epoch": 4.617877094972067, "grad_norm": 0.5811208486557007, "learning_rate": 0.0007714565826330531, "loss": 0.5054, "step": 8266 }, { "epoch": 4.618435754189944, "grad_norm": 0.4951714277267456, "learning_rate": 0.0007714285714285715, "loss": 0.556, "step": 8267 }, { "epoch": 4.618994413407822, "grad_norm": 0.7615565061569214, "learning_rate": 0.0007714005602240897, "loss": 0.5598, "step": 8268 }, { "epoch": 4.619553072625698, "grad_norm": 0.5579128265380859, "learning_rate": 0.0007713725490196079, "loss": 0.3974, "step": 8269 }, { "epoch": 4.620111731843576, "grad_norm": 0.607566237449646, "learning_rate": 0.0007713445378151261, "loss": 0.47, "step": 8270 }, { "epoch": 4.620670391061452, "grad_norm": 0.4827634394168854, "learning_rate": 0.0007713165266106442, "loss": 0.4752, "step": 8271 }, { "epoch": 4.62122905027933, "grad_norm": 0.5341469645500183, "learning_rate": 0.0007712885154061625, "loss": 0.3862, "step": 8272 }, { "epoch": 4.621787709497207, "grad_norm": 0.463590532541275, "learning_rate": 0.0007712605042016807, "loss": 0.4355, "step": 8273 }, { "epoch": 4.622346368715084, "grad_norm": 1.0764509439468384, "learning_rate": 0.0007712324929971989, "loss": 0.4899, "step": 8274 }, { "epoch": 4.622905027932961, "grad_norm": 0.790708601474762, "learning_rate": 0.0007712044817927171, "loss": 0.4543, "step": 8275 }, { "epoch": 4.623463687150838, "grad_norm": 0.6141035556793213, "learning_rate": 0.0007711764705882353, "loss": 0.3959, "step": 8276 }, { "epoch": 4.624022346368715, "grad_norm": 0.8165810108184814, "learning_rate": 0.0007711484593837535, "loss": 0.5135, "step": 8277 }, { "epoch": 4.624581005586592, "grad_norm": 0.947012186050415, "learning_rate": 0.0007711204481792717, "loss": 0.5232, "step": 8278 }, { "epoch": 4.625139664804469, "grad_norm": 2.2460289001464844, "learning_rate": 0.0007710924369747899, "loss": 0.4597, "step": 8279 }, { "epoch": 4.625698324022347, "grad_norm": 0.7627413868904114, "learning_rate": 0.0007710644257703081, "loss": 0.424, "step": 8280 }, { "epoch": 4.626256983240223, "grad_norm": 0.7325783371925354, "learning_rate": 0.0007710364145658263, "loss": 0.3734, "step": 8281 }, { "epoch": 4.626815642458101, "grad_norm": 0.8129794597625732, "learning_rate": 0.0007710084033613445, "loss": 0.5286, "step": 8282 }, { "epoch": 4.627374301675978, "grad_norm": 2.142270088195801, "learning_rate": 0.0007709803921568627, "loss": 0.535, "step": 8283 }, { "epoch": 4.627932960893855, "grad_norm": 0.9217079877853394, "learning_rate": 0.000770952380952381, "loss": 0.51, "step": 8284 }, { "epoch": 4.628491620111732, "grad_norm": 0.6075193881988525, "learning_rate": 0.0007709243697478992, "loss": 0.4099, "step": 8285 }, { "epoch": 4.629050279329609, "grad_norm": 0.5230545401573181, "learning_rate": 0.0007708963585434174, "loss": 0.3557, "step": 8286 }, { "epoch": 4.629608938547486, "grad_norm": 0.9704474806785583, "learning_rate": 0.0007708683473389356, "loss": 0.3946, "step": 8287 }, { "epoch": 4.630167597765363, "grad_norm": 0.46194612979888916, "learning_rate": 0.0007708403361344538, "loss": 0.4338, "step": 8288 }, { "epoch": 4.63072625698324, "grad_norm": 0.7506082057952881, "learning_rate": 0.000770812324929972, "loss": 0.4781, "step": 8289 }, { "epoch": 4.631284916201118, "grad_norm": 0.7269895672798157, "learning_rate": 0.0007707843137254902, "loss": 0.3245, "step": 8290 }, { "epoch": 4.631843575418994, "grad_norm": 0.6341338753700256, "learning_rate": 0.0007707563025210084, "loss": 0.4871, "step": 8291 }, { "epoch": 4.632402234636872, "grad_norm": 0.7796180248260498, "learning_rate": 0.0007707282913165267, "loss": 0.5501, "step": 8292 }, { "epoch": 4.632960893854749, "grad_norm": 1.3124040365219116, "learning_rate": 0.0007707002801120448, "loss": 0.6444, "step": 8293 }, { "epoch": 4.633519553072626, "grad_norm": 0.5190688371658325, "learning_rate": 0.000770672268907563, "loss": 0.4619, "step": 8294 }, { "epoch": 4.634078212290503, "grad_norm": 0.45809775590896606, "learning_rate": 0.0007706442577030812, "loss": 0.3827, "step": 8295 }, { "epoch": 4.63463687150838, "grad_norm": 1.3893835544586182, "learning_rate": 0.0007706162464985994, "loss": 0.4837, "step": 8296 }, { "epoch": 4.635195530726257, "grad_norm": 0.5533860325813293, "learning_rate": 0.0007705882352941177, "loss": 0.3982, "step": 8297 }, { "epoch": 4.635754189944134, "grad_norm": 0.49091672897338867, "learning_rate": 0.0007705602240896358, "loss": 0.52, "step": 8298 }, { "epoch": 4.636312849162011, "grad_norm": 0.5421136617660522, "learning_rate": 0.000770532212885154, "loss": 0.5373, "step": 8299 }, { "epoch": 4.636871508379889, "grad_norm": 0.5311143398284912, "learning_rate": 0.0007705042016806723, "loss": 0.3757, "step": 8300 }, { "epoch": 4.637430167597765, "grad_norm": 0.544424295425415, "learning_rate": 0.0007704761904761905, "loss": 0.4635, "step": 8301 }, { "epoch": 4.637988826815643, "grad_norm": 1.6883760690689087, "learning_rate": 0.0007704481792717088, "loss": 0.4245, "step": 8302 }, { "epoch": 4.638547486033519, "grad_norm": 0.8369178175926208, "learning_rate": 0.0007704201680672269, "loss": 0.7341, "step": 8303 }, { "epoch": 4.639106145251397, "grad_norm": 0.9898208975791931, "learning_rate": 0.0007703921568627451, "loss": 0.5294, "step": 8304 }, { "epoch": 4.639664804469274, "grad_norm": 0.45998162031173706, "learning_rate": 0.0007703641456582633, "loss": 0.4047, "step": 8305 }, { "epoch": 4.640223463687151, "grad_norm": 0.6343604326248169, "learning_rate": 0.0007703361344537815, "loss": 0.5054, "step": 8306 }, { "epoch": 4.640782122905028, "grad_norm": 0.5256215929985046, "learning_rate": 0.0007703081232492998, "loss": 0.4557, "step": 8307 }, { "epoch": 4.641340782122905, "grad_norm": 0.613641619682312, "learning_rate": 0.000770280112044818, "loss": 0.407, "step": 8308 }, { "epoch": 4.641899441340782, "grad_norm": 0.4358263313770294, "learning_rate": 0.0007702521008403361, "loss": 0.4997, "step": 8309 }, { "epoch": 4.64245810055866, "grad_norm": 0.8095864653587341, "learning_rate": 0.0007702240896358543, "loss": 0.4851, "step": 8310 }, { "epoch": 4.643016759776536, "grad_norm": 1.0731861591339111, "learning_rate": 0.0007701960784313725, "loss": 0.5774, "step": 8311 }, { "epoch": 4.643575418994414, "grad_norm": 0.68409264087677, "learning_rate": 0.0007701680672268908, "loss": 0.4754, "step": 8312 }, { "epoch": 4.64413407821229, "grad_norm": 0.7335063815116882, "learning_rate": 0.000770140056022409, "loss": 0.4201, "step": 8313 }, { "epoch": 4.644692737430168, "grad_norm": 0.5331463813781738, "learning_rate": 0.0007701120448179271, "loss": 0.4149, "step": 8314 }, { "epoch": 4.645251396648044, "grad_norm": 0.8423575758934021, "learning_rate": 0.0007700840336134453, "loss": 0.4208, "step": 8315 }, { "epoch": 4.645810055865922, "grad_norm": 0.9110333919525146, "learning_rate": 0.0007700560224089635, "loss": 0.5889, "step": 8316 }, { "epoch": 4.646368715083799, "grad_norm": 0.7645127177238464, "learning_rate": 0.0007700280112044819, "loss": 0.5387, "step": 8317 }, { "epoch": 4.646927374301676, "grad_norm": 0.527195394039154, "learning_rate": 0.0007700000000000001, "loss": 0.4044, "step": 8318 }, { "epoch": 4.647486033519553, "grad_norm": 1.3709142208099365, "learning_rate": 0.0007699719887955182, "loss": 0.3442, "step": 8319 }, { "epoch": 4.648044692737431, "grad_norm": 1.069693922996521, "learning_rate": 0.0007699439775910364, "loss": 0.6279, "step": 8320 }, { "epoch": 4.648603351955307, "grad_norm": 1.5301125049591064, "learning_rate": 0.0007699159663865546, "loss": 0.4696, "step": 8321 }, { "epoch": 4.649162011173185, "grad_norm": 0.7884263396263123, "learning_rate": 0.0007698879551820729, "loss": 0.4188, "step": 8322 }, { "epoch": 4.649720670391061, "grad_norm": 0.5019213557243347, "learning_rate": 0.0007698599439775911, "loss": 0.5026, "step": 8323 }, { "epoch": 4.650279329608939, "grad_norm": 0.7438209056854248, "learning_rate": 0.0007698319327731093, "loss": 0.5183, "step": 8324 }, { "epoch": 4.650837988826815, "grad_norm": 0.5842709541320801, "learning_rate": 0.0007698039215686274, "loss": 0.5326, "step": 8325 }, { "epoch": 4.651396648044693, "grad_norm": 5.246720314025879, "learning_rate": 0.0007697759103641456, "loss": 0.4647, "step": 8326 }, { "epoch": 4.65195530726257, "grad_norm": 0.5407655835151672, "learning_rate": 0.0007697478991596639, "loss": 0.3925, "step": 8327 }, { "epoch": 4.652513966480447, "grad_norm": 0.5838391780853271, "learning_rate": 0.0007697198879551821, "loss": 0.4706, "step": 8328 }, { "epoch": 4.653072625698324, "grad_norm": 0.46035677194595337, "learning_rate": 0.0007696918767507003, "loss": 0.4331, "step": 8329 }, { "epoch": 4.653631284916202, "grad_norm": 0.598491907119751, "learning_rate": 0.0007696638655462184, "loss": 0.4963, "step": 8330 }, { "epoch": 4.654189944134078, "grad_norm": 1.1157419681549072, "learning_rate": 0.0007696358543417366, "loss": 0.4076, "step": 8331 }, { "epoch": 4.654748603351956, "grad_norm": 0.46766576170921326, "learning_rate": 0.000769607843137255, "loss": 0.4299, "step": 8332 }, { "epoch": 4.655307262569832, "grad_norm": 3.0822103023529053, "learning_rate": 0.0007695798319327732, "loss": 0.4938, "step": 8333 }, { "epoch": 4.65586592178771, "grad_norm": 3.285327196121216, "learning_rate": 0.0007695518207282914, "loss": 0.4061, "step": 8334 }, { "epoch": 4.656424581005586, "grad_norm": 3.0389466285705566, "learning_rate": 0.0007695238095238095, "loss": 0.5347, "step": 8335 }, { "epoch": 4.656983240223464, "grad_norm": 0.5797761082649231, "learning_rate": 0.0007694957983193277, "loss": 0.5723, "step": 8336 }, { "epoch": 4.657541899441341, "grad_norm": 0.6282501816749573, "learning_rate": 0.000769467787114846, "loss": 0.4822, "step": 8337 }, { "epoch": 4.658100558659218, "grad_norm": 0.4578629434108734, "learning_rate": 0.0007694397759103642, "loss": 0.4148, "step": 8338 }, { "epoch": 4.658659217877095, "grad_norm": 5.898891925811768, "learning_rate": 0.0007694117647058824, "loss": 0.4121, "step": 8339 }, { "epoch": 4.659217877094972, "grad_norm": 0.5945466160774231, "learning_rate": 0.0007693837535014006, "loss": 0.6493, "step": 8340 }, { "epoch": 4.659776536312849, "grad_norm": 0.6316630840301514, "learning_rate": 0.0007693557422969187, "loss": 0.4453, "step": 8341 }, { "epoch": 4.660335195530727, "grad_norm": 1.0428556203842163, "learning_rate": 0.000769327731092437, "loss": 0.4298, "step": 8342 }, { "epoch": 4.660893854748603, "grad_norm": 0.5396453142166138, "learning_rate": 0.0007692997198879552, "loss": 0.4931, "step": 8343 }, { "epoch": 4.661452513966481, "grad_norm": 0.656947672367096, "learning_rate": 0.0007692717086834734, "loss": 0.4767, "step": 8344 }, { "epoch": 4.662011173184357, "grad_norm": 0.38328754901885986, "learning_rate": 0.0007692436974789916, "loss": 0.4942, "step": 8345 }, { "epoch": 4.662569832402235, "grad_norm": 0.5110989809036255, "learning_rate": 0.0007692156862745097, "loss": 0.5929, "step": 8346 }, { "epoch": 4.663128491620112, "grad_norm": 0.5763763189315796, "learning_rate": 0.000769187675070028, "loss": 0.4603, "step": 8347 }, { "epoch": 4.663687150837989, "grad_norm": 0.6368765830993652, "learning_rate": 0.0007691596638655462, "loss": 0.5005, "step": 8348 }, { "epoch": 4.664245810055866, "grad_norm": 0.5837327241897583, "learning_rate": 0.0007691316526610645, "loss": 0.4204, "step": 8349 }, { "epoch": 4.664804469273743, "grad_norm": 0.4411894679069519, "learning_rate": 0.0007691036414565827, "loss": 0.403, "step": 8350 }, { "epoch": 4.66536312849162, "grad_norm": 0.5981355905532837, "learning_rate": 0.0007690756302521008, "loss": 0.4338, "step": 8351 }, { "epoch": 4.665921787709497, "grad_norm": 0.6112882494926453, "learning_rate": 0.0007690476190476191, "loss": 0.5062, "step": 8352 }, { "epoch": 4.666480446927374, "grad_norm": 0.694929301738739, "learning_rate": 0.0007690196078431373, "loss": 0.3944, "step": 8353 }, { "epoch": 4.667039106145252, "grad_norm": 0.6692671179771423, "learning_rate": 0.0007689915966386555, "loss": 0.6464, "step": 8354 }, { "epoch": 4.667597765363128, "grad_norm": 0.7128127813339233, "learning_rate": 0.0007689635854341737, "loss": 0.4137, "step": 8355 }, { "epoch": 4.668156424581006, "grad_norm": 0.6459019184112549, "learning_rate": 0.0007689355742296919, "loss": 0.4213, "step": 8356 }, { "epoch": 4.668715083798883, "grad_norm": 0.6452242136001587, "learning_rate": 0.0007689075630252101, "loss": 0.49, "step": 8357 }, { "epoch": 4.66927374301676, "grad_norm": 0.44031664729118347, "learning_rate": 0.0007688795518207283, "loss": 0.5177, "step": 8358 }, { "epoch": 4.669832402234637, "grad_norm": 0.5887936353683472, "learning_rate": 0.0007688515406162465, "loss": 0.4505, "step": 8359 }, { "epoch": 4.670391061452514, "grad_norm": 2.7244529724121094, "learning_rate": 0.0007688235294117647, "loss": 0.4451, "step": 8360 }, { "epoch": 4.670949720670391, "grad_norm": 7.343851566314697, "learning_rate": 0.0007687955182072829, "loss": 0.4978, "step": 8361 }, { "epoch": 4.671508379888268, "grad_norm": 0.6784741878509521, "learning_rate": 0.0007687675070028011, "loss": 0.5199, "step": 8362 }, { "epoch": 4.672067039106145, "grad_norm": 2.5949554443359375, "learning_rate": 0.0007687394957983193, "loss": 0.5394, "step": 8363 }, { "epoch": 4.672625698324023, "grad_norm": 0.5449838042259216, "learning_rate": 0.0007687114845938375, "loss": 0.5266, "step": 8364 }, { "epoch": 4.673184357541899, "grad_norm": 0.6077706217765808, "learning_rate": 0.0007686834733893557, "loss": 0.5288, "step": 8365 }, { "epoch": 4.673743016759777, "grad_norm": 0.5879938006401062, "learning_rate": 0.000768655462184874, "loss": 0.4754, "step": 8366 }, { "epoch": 4.674301675977654, "grad_norm": 2.168011426925659, "learning_rate": 0.0007686274509803923, "loss": 0.49, "step": 8367 }, { "epoch": 4.674860335195531, "grad_norm": 0.3998042345046997, "learning_rate": 0.0007685994397759104, "loss": 0.4161, "step": 8368 }, { "epoch": 4.675418994413408, "grad_norm": 0.7290630340576172, "learning_rate": 0.0007685714285714286, "loss": 0.5756, "step": 8369 }, { "epoch": 4.675977653631285, "grad_norm": 0.9285721778869629, "learning_rate": 0.0007685434173669468, "loss": 0.4602, "step": 8370 }, { "epoch": 4.676536312849162, "grad_norm": 0.5415141582489014, "learning_rate": 0.000768515406162465, "loss": 0.4735, "step": 8371 }, { "epoch": 4.677094972067039, "grad_norm": 0.47024989128112793, "learning_rate": 0.0007684873949579833, "loss": 0.4542, "step": 8372 }, { "epoch": 4.677653631284916, "grad_norm": 0.9392772316932678, "learning_rate": 0.0007684593837535014, "loss": 0.4814, "step": 8373 }, { "epoch": 4.678212290502794, "grad_norm": 0.5973453521728516, "learning_rate": 0.0007684313725490196, "loss": 0.5103, "step": 8374 }, { "epoch": 4.67877094972067, "grad_norm": 0.6517735123634338, "learning_rate": 0.0007684033613445378, "loss": 0.626, "step": 8375 }, { "epoch": 4.679329608938548, "grad_norm": 0.5248894691467285, "learning_rate": 0.000768375350140056, "loss": 0.4042, "step": 8376 }, { "epoch": 4.679888268156424, "grad_norm": 1.4121628999710083, "learning_rate": 0.0007683473389355743, "loss": 0.56, "step": 8377 }, { "epoch": 4.680446927374302, "grad_norm": 0.696548342704773, "learning_rate": 0.0007683193277310924, "loss": 0.6, "step": 8378 }, { "epoch": 4.681005586592179, "grad_norm": 4.406064987182617, "learning_rate": 0.0007682913165266106, "loss": 0.4036, "step": 8379 }, { "epoch": 4.681564245810056, "grad_norm": 5.332188129425049, "learning_rate": 0.0007682633053221288, "loss": 0.477, "step": 8380 }, { "epoch": 4.682122905027933, "grad_norm": 5.1298298835754395, "learning_rate": 0.000768235294117647, "loss": 0.4473, "step": 8381 }, { "epoch": 4.68268156424581, "grad_norm": 0.9707376956939697, "learning_rate": 0.0007682072829131654, "loss": 0.4221, "step": 8382 }, { "epoch": 4.683240223463687, "grad_norm": 0.49457040429115295, "learning_rate": 0.0007681792717086836, "loss": 0.4591, "step": 8383 }, { "epoch": 4.683798882681565, "grad_norm": 0.5346271395683289, "learning_rate": 0.0007681512605042017, "loss": 0.5003, "step": 8384 }, { "epoch": 4.684357541899441, "grad_norm": 0.5562250018119812, "learning_rate": 0.0007681232492997199, "loss": 0.5317, "step": 8385 }, { "epoch": 4.684916201117319, "grad_norm": 0.5596450567245483, "learning_rate": 0.0007680952380952381, "loss": 0.4964, "step": 8386 }, { "epoch": 4.685474860335195, "grad_norm": 0.578858494758606, "learning_rate": 0.0007680672268907564, "loss": 0.4327, "step": 8387 }, { "epoch": 4.686033519553073, "grad_norm": 0.47599729895591736, "learning_rate": 0.0007680392156862746, "loss": 0.3794, "step": 8388 }, { "epoch": 4.686592178770949, "grad_norm": 0.6692066788673401, "learning_rate": 0.0007680112044817927, "loss": 0.4484, "step": 8389 }, { "epoch": 4.687150837988827, "grad_norm": 1.602552056312561, "learning_rate": 0.0007679831932773109, "loss": 0.4162, "step": 8390 }, { "epoch": 4.687709497206704, "grad_norm": 0.5399988293647766, "learning_rate": 0.0007679551820728291, "loss": 0.4497, "step": 8391 }, { "epoch": 4.688268156424581, "grad_norm": 0.5623546838760376, "learning_rate": 0.0007679271708683474, "loss": 0.3778, "step": 8392 }, { "epoch": 4.688826815642458, "grad_norm": 0.5338775515556335, "learning_rate": 0.0007678991596638656, "loss": 0.4721, "step": 8393 }, { "epoch": 4.689385474860336, "grad_norm": 0.8115939497947693, "learning_rate": 0.0007678711484593837, "loss": 0.5732, "step": 8394 }, { "epoch": 4.689944134078212, "grad_norm": 0.5184304118156433, "learning_rate": 0.0007678431372549019, "loss": 0.4505, "step": 8395 }, { "epoch": 4.69050279329609, "grad_norm": 0.7624722719192505, "learning_rate": 0.0007678151260504201, "loss": 0.5618, "step": 8396 }, { "epoch": 4.691061452513966, "grad_norm": 0.7532416582107544, "learning_rate": 0.0007677871148459385, "loss": 0.4829, "step": 8397 }, { "epoch": 4.691620111731844, "grad_norm": 0.8131194114685059, "learning_rate": 0.0007677591036414567, "loss": 0.4276, "step": 8398 }, { "epoch": 4.69217877094972, "grad_norm": 1.1290749311447144, "learning_rate": 0.0007677310924369749, "loss": 0.4436, "step": 8399 }, { "epoch": 4.692737430167598, "grad_norm": 0.8095458745956421, "learning_rate": 0.000767703081232493, "loss": 0.4921, "step": 8400 }, { "epoch": 4.693296089385475, "grad_norm": 2.7540712356567383, "learning_rate": 0.0007676750700280112, "loss": 0.4685, "step": 8401 }, { "epoch": 4.693854748603352, "grad_norm": 1.0384215116500854, "learning_rate": 0.0007676470588235295, "loss": 0.7322, "step": 8402 }, { "epoch": 4.694413407821229, "grad_norm": 0.9079729318618774, "learning_rate": 0.0007676190476190477, "loss": 0.5864, "step": 8403 }, { "epoch": 4.694972067039107, "grad_norm": 0.5142230987548828, "learning_rate": 0.0007675910364145659, "loss": 0.6077, "step": 8404 }, { "epoch": 4.695530726256983, "grad_norm": 0.8453822135925293, "learning_rate": 0.000767563025210084, "loss": 0.5175, "step": 8405 }, { "epoch": 4.696089385474861, "grad_norm": 0.6371819376945496, "learning_rate": 0.0007675350140056022, "loss": 0.5104, "step": 8406 }, { "epoch": 4.696648044692737, "grad_norm": 1.5283763408660889, "learning_rate": 0.0007675070028011205, "loss": 0.4872, "step": 8407 }, { "epoch": 4.697206703910615, "grad_norm": 0.5396294593811035, "learning_rate": 0.0007674789915966387, "loss": 0.4043, "step": 8408 }, { "epoch": 4.697765363128491, "grad_norm": 0.7387148141860962, "learning_rate": 0.0007674509803921569, "loss": 0.433, "step": 8409 }, { "epoch": 4.698324022346369, "grad_norm": 0.5455625057220459, "learning_rate": 0.000767422969187675, "loss": 0.3997, "step": 8410 }, { "epoch": 4.698882681564246, "grad_norm": 0.43977129459381104, "learning_rate": 0.0007673949579831932, "loss": 0.4116, "step": 8411 }, { "epoch": 4.699441340782123, "grad_norm": 0.5600402355194092, "learning_rate": 0.0007673669467787115, "loss": 0.5067, "step": 8412 }, { "epoch": 4.7, "grad_norm": 0.5582857131958008, "learning_rate": 0.0007673389355742297, "loss": 0.4591, "step": 8413 }, { "epoch": 4.700558659217877, "grad_norm": 3.107438087463379, "learning_rate": 0.000767310924369748, "loss": 0.4407, "step": 8414 }, { "epoch": 4.701117318435754, "grad_norm": 0.6607743501663208, "learning_rate": 0.0007672829131652662, "loss": 0.5214, "step": 8415 }, { "epoch": 4.701675977653632, "grad_norm": 9.292428970336914, "learning_rate": 0.0007672549019607843, "loss": 0.5008, "step": 8416 }, { "epoch": 4.702234636871508, "grad_norm": 0.5419268608093262, "learning_rate": 0.0007672268907563026, "loss": 0.5598, "step": 8417 }, { "epoch": 4.702793296089386, "grad_norm": 0.6430529356002808, "learning_rate": 0.0007671988795518208, "loss": 0.443, "step": 8418 }, { "epoch": 4.703351955307262, "grad_norm": 0.8362265229225159, "learning_rate": 0.000767170868347339, "loss": 0.5445, "step": 8419 }, { "epoch": 4.70391061452514, "grad_norm": 0.8648584485054016, "learning_rate": 0.0007671428571428572, "loss": 0.4465, "step": 8420 }, { "epoch": 4.704469273743017, "grad_norm": 0.7796539664268494, "learning_rate": 0.0007671148459383753, "loss": 0.6103, "step": 8421 }, { "epoch": 4.705027932960894, "grad_norm": 4.9278154373168945, "learning_rate": 0.0007670868347338936, "loss": 0.4605, "step": 8422 }, { "epoch": 4.705586592178771, "grad_norm": 0.9810303449630737, "learning_rate": 0.0007670588235294118, "loss": 0.3918, "step": 8423 }, { "epoch": 4.706145251396648, "grad_norm": 0.5862292051315308, "learning_rate": 0.00076703081232493, "loss": 0.5266, "step": 8424 }, { "epoch": 4.706703910614525, "grad_norm": 0.8005500435829163, "learning_rate": 0.0007670028011204482, "loss": 0.4616, "step": 8425 }, { "epoch": 4.707262569832402, "grad_norm": 0.5437454581260681, "learning_rate": 0.0007669747899159663, "loss": 0.5492, "step": 8426 }, { "epoch": 4.707821229050279, "grad_norm": 0.5736657381057739, "learning_rate": 0.0007669467787114846, "loss": 0.5325, "step": 8427 }, { "epoch": 4.708379888268157, "grad_norm": 0.6227053999900818, "learning_rate": 0.0007669187675070028, "loss": 0.4251, "step": 8428 }, { "epoch": 4.708938547486033, "grad_norm": 0.8950062394142151, "learning_rate": 0.000766890756302521, "loss": 0.4034, "step": 8429 }, { "epoch": 4.709497206703911, "grad_norm": 0.7234181761741638, "learning_rate": 0.0007668627450980392, "loss": 0.3824, "step": 8430 }, { "epoch": 4.710055865921788, "grad_norm": 2.1865220069885254, "learning_rate": 0.0007668347338935575, "loss": 0.5149, "step": 8431 }, { "epoch": 4.710614525139665, "grad_norm": 0.5704142451286316, "learning_rate": 0.0007668067226890757, "loss": 0.5251, "step": 8432 }, { "epoch": 4.711173184357542, "grad_norm": 0.5133355855941772, "learning_rate": 0.0007667787114845939, "loss": 0.4133, "step": 8433 }, { "epoch": 4.711731843575419, "grad_norm": 0.5629573464393616, "learning_rate": 0.0007667507002801121, "loss": 0.3161, "step": 8434 }, { "epoch": 4.712290502793296, "grad_norm": 6.425321578979492, "learning_rate": 0.0007667226890756303, "loss": 0.5432, "step": 8435 }, { "epoch": 4.712849162011173, "grad_norm": 0.940984308719635, "learning_rate": 0.0007666946778711485, "loss": 0.3184, "step": 8436 }, { "epoch": 4.71340782122905, "grad_norm": 0.9444315433502197, "learning_rate": 0.0007666666666666667, "loss": 0.4171, "step": 8437 }, { "epoch": 4.713966480446928, "grad_norm": 0.5563790202140808, "learning_rate": 0.0007666386554621849, "loss": 0.4324, "step": 8438 }, { "epoch": 4.714525139664804, "grad_norm": 0.7993582487106323, "learning_rate": 0.0007666106442577031, "loss": 0.3976, "step": 8439 }, { "epoch": 4.715083798882682, "grad_norm": 0.6162702441215515, "learning_rate": 0.0007665826330532213, "loss": 0.6114, "step": 8440 }, { "epoch": 4.715642458100559, "grad_norm": 0.9097176194190979, "learning_rate": 0.0007665546218487395, "loss": 0.4878, "step": 8441 }, { "epoch": 4.716201117318436, "grad_norm": 0.6418911218643188, "learning_rate": 0.0007665266106442577, "loss": 0.555, "step": 8442 }, { "epoch": 4.716759776536313, "grad_norm": 0.8568861484527588, "learning_rate": 0.0007664985994397759, "loss": 0.4174, "step": 8443 }, { "epoch": 4.71731843575419, "grad_norm": 0.6429045796394348, "learning_rate": 0.0007664705882352941, "loss": 0.4053, "step": 8444 }, { "epoch": 4.717877094972067, "grad_norm": 0.9385924339294434, "learning_rate": 0.0007664425770308123, "loss": 0.5763, "step": 8445 }, { "epoch": 4.718435754189944, "grad_norm": 1.9731087684631348, "learning_rate": 0.0007664145658263305, "loss": 0.4474, "step": 8446 }, { "epoch": 4.718994413407821, "grad_norm": 0.5894946455955505, "learning_rate": 0.0007663865546218489, "loss": 0.3376, "step": 8447 }, { "epoch": 4.719553072625699, "grad_norm": 0.6896722912788391, "learning_rate": 0.000766358543417367, "loss": 0.4381, "step": 8448 }, { "epoch": 4.720111731843575, "grad_norm": 0.8945481181144714, "learning_rate": 0.0007663305322128852, "loss": 0.4788, "step": 8449 }, { "epoch": 4.720670391061453, "grad_norm": 0.5758554339408875, "learning_rate": 0.0007663025210084034, "loss": 0.4761, "step": 8450 }, { "epoch": 4.721229050279329, "grad_norm": 0.6602922081947327, "learning_rate": 0.0007662745098039216, "loss": 0.4452, "step": 8451 }, { "epoch": 4.721787709497207, "grad_norm": 0.5906196236610413, "learning_rate": 0.0007662464985994399, "loss": 0.4372, "step": 8452 }, { "epoch": 4.722346368715084, "grad_norm": 0.5834033489227295, "learning_rate": 0.000766218487394958, "loss": 0.4272, "step": 8453 }, { "epoch": 4.722905027932961, "grad_norm": 1.266449213027954, "learning_rate": 0.0007661904761904762, "loss": 0.4435, "step": 8454 }, { "epoch": 4.723463687150838, "grad_norm": 0.4318232834339142, "learning_rate": 0.0007661624649859944, "loss": 0.5326, "step": 8455 }, { "epoch": 4.724022346368715, "grad_norm": 1.2043719291687012, "learning_rate": 0.0007661344537815126, "loss": 0.4817, "step": 8456 }, { "epoch": 4.724581005586592, "grad_norm": 0.5707458257675171, "learning_rate": 0.0007661064425770309, "loss": 0.4559, "step": 8457 }, { "epoch": 4.72513966480447, "grad_norm": 0.46222472190856934, "learning_rate": 0.000766078431372549, "loss": 0.5457, "step": 8458 }, { "epoch": 4.725698324022346, "grad_norm": 1.29996657371521, "learning_rate": 0.0007660504201680672, "loss": 0.3586, "step": 8459 }, { "epoch": 4.726256983240224, "grad_norm": 0.8138699531555176, "learning_rate": 0.0007660224089635854, "loss": 0.4255, "step": 8460 }, { "epoch": 4.7268156424581, "grad_norm": 0.6481475830078125, "learning_rate": 0.0007659943977591036, "loss": 0.4189, "step": 8461 }, { "epoch": 4.727374301675978, "grad_norm": 0.4448300302028656, "learning_rate": 0.000765966386554622, "loss": 0.4507, "step": 8462 }, { "epoch": 4.727932960893854, "grad_norm": 1.2780014276504517, "learning_rate": 0.0007659383753501402, "loss": 0.5453, "step": 8463 }, { "epoch": 4.728491620111732, "grad_norm": 0.7414190769195557, "learning_rate": 0.0007659103641456583, "loss": 0.5589, "step": 8464 }, { "epoch": 4.729050279329609, "grad_norm": 0.458852082490921, "learning_rate": 0.0007658823529411765, "loss": 0.4221, "step": 8465 }, { "epoch": 4.729608938547486, "grad_norm": 0.6836581826210022, "learning_rate": 0.0007658543417366947, "loss": 0.4914, "step": 8466 }, { "epoch": 4.730167597765363, "grad_norm": 0.4650897681713104, "learning_rate": 0.000765826330532213, "loss": 0.5294, "step": 8467 }, { "epoch": 4.730726256983241, "grad_norm": 0.3533768057823181, "learning_rate": 0.0007657983193277312, "loss": 0.3611, "step": 8468 }, { "epoch": 4.731284916201117, "grad_norm": 0.7186769247055054, "learning_rate": 0.0007657703081232493, "loss": 0.4371, "step": 8469 }, { "epoch": 4.731843575418995, "grad_norm": 0.4629736542701721, "learning_rate": 0.0007657422969187675, "loss": 0.3897, "step": 8470 }, { "epoch": 4.732402234636871, "grad_norm": 0.6437535881996155, "learning_rate": 0.0007657142857142857, "loss": 0.5201, "step": 8471 }, { "epoch": 4.732960893854749, "grad_norm": 1.0654048919677734, "learning_rate": 0.000765686274509804, "loss": 0.4863, "step": 8472 }, { "epoch": 4.733519553072625, "grad_norm": 0.7273711562156677, "learning_rate": 0.0007656582633053222, "loss": 0.5456, "step": 8473 }, { "epoch": 4.734078212290503, "grad_norm": 0.4468400776386261, "learning_rate": 0.0007656302521008403, "loss": 0.4874, "step": 8474 }, { "epoch": 4.73463687150838, "grad_norm": 0.5781263709068298, "learning_rate": 0.0007656022408963585, "loss": 0.5598, "step": 8475 }, { "epoch": 4.735195530726257, "grad_norm": 0.7847022414207458, "learning_rate": 0.0007655742296918767, "loss": 0.3644, "step": 8476 }, { "epoch": 4.735754189944134, "grad_norm": 0.6021060347557068, "learning_rate": 0.0007655462184873949, "loss": 0.4145, "step": 8477 }, { "epoch": 4.736312849162011, "grad_norm": 0.5077820420265198, "learning_rate": 0.0007655182072829132, "loss": 0.4714, "step": 8478 }, { "epoch": 4.736871508379888, "grad_norm": 0.7651529908180237, "learning_rate": 0.0007654901960784315, "loss": 0.5498, "step": 8479 }, { "epoch": 4.737430167597766, "grad_norm": 5.596915245056152, "learning_rate": 0.0007654621848739495, "loss": 0.4456, "step": 8480 }, { "epoch": 4.737988826815642, "grad_norm": 0.688568115234375, "learning_rate": 0.0007654341736694678, "loss": 0.5245, "step": 8481 }, { "epoch": 4.73854748603352, "grad_norm": 0.5024352073669434, "learning_rate": 0.000765406162464986, "loss": 0.4767, "step": 8482 }, { "epoch": 4.739106145251396, "grad_norm": 1.536091923713684, "learning_rate": 0.0007653781512605043, "loss": 0.4464, "step": 8483 }, { "epoch": 4.739664804469274, "grad_norm": 1.293010950088501, "learning_rate": 0.0007653501400560225, "loss": 0.4876, "step": 8484 }, { "epoch": 4.740223463687151, "grad_norm": 0.4086540639400482, "learning_rate": 0.0007653221288515406, "loss": 0.4024, "step": 8485 }, { "epoch": 4.740782122905028, "grad_norm": 0.5928152799606323, "learning_rate": 0.0007652941176470588, "loss": 0.4414, "step": 8486 }, { "epoch": 4.741340782122905, "grad_norm": 0.431692510843277, "learning_rate": 0.000765266106442577, "loss": 0.4093, "step": 8487 }, { "epoch": 4.741899441340782, "grad_norm": 0.42686524987220764, "learning_rate": 0.0007652380952380953, "loss": 0.4037, "step": 8488 }, { "epoch": 4.742458100558659, "grad_norm": 0.47489142417907715, "learning_rate": 0.0007652100840336135, "loss": 0.3869, "step": 8489 }, { "epoch": 4.743016759776537, "grad_norm": 0.7389423847198486, "learning_rate": 0.0007651820728291316, "loss": 0.5071, "step": 8490 }, { "epoch": 4.743575418994413, "grad_norm": 2.945859432220459, "learning_rate": 0.0007651540616246498, "loss": 0.448, "step": 8491 }, { "epoch": 4.744134078212291, "grad_norm": 0.7166175842285156, "learning_rate": 0.000765126050420168, "loss": 0.4907, "step": 8492 }, { "epoch": 4.744692737430167, "grad_norm": 0.544670045375824, "learning_rate": 0.0007650980392156863, "loss": 0.4525, "step": 8493 }, { "epoch": 4.745251396648045, "grad_norm": 0.6563423871994019, "learning_rate": 0.0007650700280112045, "loss": 0.4313, "step": 8494 }, { "epoch": 4.745810055865922, "grad_norm": 0.40189117193222046, "learning_rate": 0.0007650420168067227, "loss": 0.3294, "step": 8495 }, { "epoch": 4.746368715083799, "grad_norm": 0.48015493154525757, "learning_rate": 0.0007650140056022408, "loss": 0.3317, "step": 8496 }, { "epoch": 4.746927374301676, "grad_norm": 0.5164525508880615, "learning_rate": 0.000764985994397759, "loss": 0.4001, "step": 8497 }, { "epoch": 4.747486033519553, "grad_norm": 0.46735015511512756, "learning_rate": 0.0007649579831932774, "loss": 0.4144, "step": 8498 }, { "epoch": 4.74804469273743, "grad_norm": 0.4527231752872467, "learning_rate": 0.0007649299719887956, "loss": 0.4198, "step": 8499 }, { "epoch": 4.748603351955307, "grad_norm": 1.2354981899261475, "learning_rate": 0.0007649019607843138, "loss": 0.4893, "step": 8500 }, { "epoch": 4.748603351955307, "eval_cer": 0.09194461719747307, "eval_loss": 0.35319676995277405, "eval_runtime": 57.1481, "eval_samples_per_second": 79.408, "eval_steps_per_second": 4.97, "eval_wer": 0.3625597050361722, "step": 8500 }, { "epoch": 4.749162011173184, "grad_norm": 0.7207176089286804, "learning_rate": 0.0007648739495798319, "loss": 0.5563, "step": 8501 }, { "epoch": 4.749720670391062, "grad_norm": 0.38375696539878845, "learning_rate": 0.0007648459383753501, "loss": 0.3644, "step": 8502 }, { "epoch": 4.750279329608938, "grad_norm": 1.0161912441253662, "learning_rate": 0.0007648179271708684, "loss": 0.3834, "step": 8503 }, { "epoch": 4.750837988826816, "grad_norm": 0.72743821144104, "learning_rate": 0.0007647899159663866, "loss": 0.5341, "step": 8504 }, { "epoch": 4.751396648044693, "grad_norm": 0.514685869216919, "learning_rate": 0.0007647619047619048, "loss": 0.5476, "step": 8505 }, { "epoch": 4.75195530726257, "grad_norm": 0.5789101123809814, "learning_rate": 0.0007647338935574229, "loss": 0.5369, "step": 8506 }, { "epoch": 4.752513966480447, "grad_norm": 0.5056542158126831, "learning_rate": 0.0007647058823529411, "loss": 0.4842, "step": 8507 }, { "epoch": 4.753072625698324, "grad_norm": 0.39382168650627136, "learning_rate": 0.0007646778711484594, "loss": 0.4077, "step": 8508 }, { "epoch": 4.753631284916201, "grad_norm": 0.5213464498519897, "learning_rate": 0.0007646498599439776, "loss": 0.4121, "step": 8509 }, { "epoch": 4.754189944134078, "grad_norm": 0.4672740399837494, "learning_rate": 0.0007646218487394958, "loss": 0.3647, "step": 8510 }, { "epoch": 4.754748603351955, "grad_norm": 0.48578375577926636, "learning_rate": 0.000764593837535014, "loss": 0.4473, "step": 8511 }, { "epoch": 4.755307262569833, "grad_norm": 1.0094746351242065, "learning_rate": 0.0007645658263305321, "loss": 0.5339, "step": 8512 }, { "epoch": 4.755865921787709, "grad_norm": 0.6768170595169067, "learning_rate": 0.0007645378151260505, "loss": 0.4931, "step": 8513 }, { "epoch": 4.756424581005587, "grad_norm": 0.4883013963699341, "learning_rate": 0.0007645098039215687, "loss": 0.3961, "step": 8514 }, { "epoch": 4.756983240223463, "grad_norm": 0.6311526894569397, "learning_rate": 0.0007644817927170869, "loss": 0.5226, "step": 8515 }, { "epoch": 4.757541899441341, "grad_norm": 0.5176435708999634, "learning_rate": 0.0007644537815126051, "loss": 0.4457, "step": 8516 }, { "epoch": 4.758100558659218, "grad_norm": 1.0449047088623047, "learning_rate": 0.0007644257703081232, "loss": 0.5141, "step": 8517 }, { "epoch": 4.758659217877095, "grad_norm": 0.3304331600666046, "learning_rate": 0.0007643977591036415, "loss": 0.3415, "step": 8518 }, { "epoch": 4.759217877094972, "grad_norm": 0.5925968885421753, "learning_rate": 0.0007643697478991597, "loss": 0.3381, "step": 8519 }, { "epoch": 4.759776536312849, "grad_norm": 0.9773862957954407, "learning_rate": 0.0007643417366946779, "loss": 0.5377, "step": 8520 }, { "epoch": 4.760335195530726, "grad_norm": 0.7833096385002136, "learning_rate": 0.0007643137254901961, "loss": 0.4993, "step": 8521 }, { "epoch": 4.760893854748604, "grad_norm": 0.6476019620895386, "learning_rate": 0.0007642857142857142, "loss": 0.4434, "step": 8522 }, { "epoch": 4.76145251396648, "grad_norm": 0.4440038800239563, "learning_rate": 0.0007642577030812325, "loss": 0.4266, "step": 8523 }, { "epoch": 4.762011173184358, "grad_norm": 0.8631799221038818, "learning_rate": 0.0007642296918767507, "loss": 0.4521, "step": 8524 }, { "epoch": 4.762569832402234, "grad_norm": 0.5670862793922424, "learning_rate": 0.0007642016806722689, "loss": 0.4143, "step": 8525 }, { "epoch": 4.763128491620112, "grad_norm": 0.6003089547157288, "learning_rate": 0.0007641736694677871, "loss": 0.5468, "step": 8526 }, { "epoch": 4.763687150837989, "grad_norm": 0.5930803418159485, "learning_rate": 0.0007641456582633053, "loss": 0.4601, "step": 8527 }, { "epoch": 4.764245810055866, "grad_norm": 0.3905738294124603, "learning_rate": 0.0007641176470588235, "loss": 0.4098, "step": 8528 }, { "epoch": 4.764804469273743, "grad_norm": 0.562506914138794, "learning_rate": 0.0007640896358543417, "loss": 0.391, "step": 8529 }, { "epoch": 4.76536312849162, "grad_norm": 0.6021487712860107, "learning_rate": 0.00076406162464986, "loss": 0.5974, "step": 8530 }, { "epoch": 4.765921787709497, "grad_norm": 0.7099018096923828, "learning_rate": 0.0007640336134453782, "loss": 0.5282, "step": 8531 }, { "epoch": 4.766480446927375, "grad_norm": 0.7900936603546143, "learning_rate": 0.0007640056022408964, "loss": 0.4747, "step": 8532 }, { "epoch": 4.767039106145251, "grad_norm": 0.523512065410614, "learning_rate": 0.0007639775910364146, "loss": 0.4116, "step": 8533 }, { "epoch": 4.767597765363129, "grad_norm": 0.6078117489814758, "learning_rate": 0.0007639495798319328, "loss": 0.5375, "step": 8534 }, { "epoch": 4.768156424581005, "grad_norm": 0.6556194424629211, "learning_rate": 0.000763921568627451, "loss": 0.435, "step": 8535 }, { "epoch": 4.768715083798883, "grad_norm": 2.1966516971588135, "learning_rate": 0.0007638935574229692, "loss": 0.4182, "step": 8536 }, { "epoch": 4.769273743016759, "grad_norm": 0.8509520888328552, "learning_rate": 0.0007638655462184874, "loss": 0.527, "step": 8537 }, { "epoch": 4.769832402234637, "grad_norm": 1.2802207469940186, "learning_rate": 0.0007638375350140056, "loss": 0.4945, "step": 8538 }, { "epoch": 4.770391061452514, "grad_norm": 0.7501946687698364, "learning_rate": 0.0007638095238095238, "loss": 0.3921, "step": 8539 }, { "epoch": 4.770949720670391, "grad_norm": 0.6180909276008606, "learning_rate": 0.000763781512605042, "loss": 0.4274, "step": 8540 }, { "epoch": 4.771508379888268, "grad_norm": 0.5417438745498657, "learning_rate": 0.0007637535014005602, "loss": 0.5829, "step": 8541 }, { "epoch": 4.772067039106146, "grad_norm": 0.44538432359695435, "learning_rate": 0.0007637254901960784, "loss": 0.4351, "step": 8542 }, { "epoch": 4.772625698324022, "grad_norm": 0.8329975008964539, "learning_rate": 0.0007636974789915967, "loss": 0.4046, "step": 8543 }, { "epoch": 4.7731843575419, "grad_norm": 0.814070463180542, "learning_rate": 0.0007636694677871148, "loss": 0.4987, "step": 8544 }, { "epoch": 4.773743016759776, "grad_norm": 0.5354944467544556, "learning_rate": 0.000763641456582633, "loss": 0.4365, "step": 8545 }, { "epoch": 4.774301675977654, "grad_norm": 0.44521036744117737, "learning_rate": 0.0007636134453781513, "loss": 0.4372, "step": 8546 }, { "epoch": 4.77486033519553, "grad_norm": 1.0154215097427368, "learning_rate": 0.0007635854341736695, "loss": 0.3724, "step": 8547 }, { "epoch": 4.775418994413408, "grad_norm": 0.6184551119804382, "learning_rate": 0.0007635574229691878, "loss": 0.4612, "step": 8548 }, { "epoch": 4.775977653631285, "grad_norm": 0.48441067337989807, "learning_rate": 0.0007635294117647059, "loss": 0.445, "step": 8549 }, { "epoch": 4.776536312849162, "grad_norm": 0.4702484905719757, "learning_rate": 0.0007635014005602241, "loss": 0.5007, "step": 8550 }, { "epoch": 4.777094972067039, "grad_norm": 0.31311506032943726, "learning_rate": 0.0007634733893557423, "loss": 0.2831, "step": 8551 }, { "epoch": 4.777653631284916, "grad_norm": 0.5636079907417297, "learning_rate": 0.0007634453781512605, "loss": 0.5599, "step": 8552 }, { "epoch": 4.778212290502793, "grad_norm": 0.6074462532997131, "learning_rate": 0.0007634173669467788, "loss": 0.4183, "step": 8553 }, { "epoch": 4.778770949720671, "grad_norm": 0.946617066860199, "learning_rate": 0.0007633893557422969, "loss": 0.3671, "step": 8554 }, { "epoch": 4.779329608938547, "grad_norm": 0.46405544877052307, "learning_rate": 0.0007633613445378151, "loss": 0.4828, "step": 8555 }, { "epoch": 4.779888268156425, "grad_norm": 0.49630939960479736, "learning_rate": 0.0007633333333333333, "loss": 0.4057, "step": 8556 }, { "epoch": 4.780446927374301, "grad_norm": 1.0906662940979004, "learning_rate": 0.0007633053221288515, "loss": 0.494, "step": 8557 }, { "epoch": 4.781005586592179, "grad_norm": 0.6826028227806091, "learning_rate": 0.0007632773109243698, "loss": 0.5314, "step": 8558 }, { "epoch": 4.781564245810056, "grad_norm": 0.55367511510849, "learning_rate": 0.000763249299719888, "loss": 0.4674, "step": 8559 }, { "epoch": 4.782122905027933, "grad_norm": 0.44338259100914, "learning_rate": 0.0007632212885154061, "loss": 0.4788, "step": 8560 }, { "epoch": 4.78268156424581, "grad_norm": 0.3534598648548126, "learning_rate": 0.0007631932773109243, "loss": 0.3624, "step": 8561 }, { "epoch": 4.783240223463687, "grad_norm": 0.4212881624698639, "learning_rate": 0.0007631652661064425, "loss": 0.4386, "step": 8562 }, { "epoch": 4.783798882681564, "grad_norm": 0.4065394401550293, "learning_rate": 0.0007631372549019609, "loss": 0.4035, "step": 8563 }, { "epoch": 4.784357541899441, "grad_norm": 0.4878144860267639, "learning_rate": 0.0007631092436974791, "loss": 0.4384, "step": 8564 }, { "epoch": 4.784916201117318, "grad_norm": 0.5604859590530396, "learning_rate": 0.0007630812324929972, "loss": 0.4836, "step": 8565 }, { "epoch": 4.785474860335196, "grad_norm": 0.5255528092384338, "learning_rate": 0.0007630532212885154, "loss": 0.4055, "step": 8566 }, { "epoch": 4.786033519553072, "grad_norm": 0.5735591053962708, "learning_rate": 0.0007630252100840336, "loss": 0.4753, "step": 8567 }, { "epoch": 4.78659217877095, "grad_norm": 1.0948740243911743, "learning_rate": 0.0007629971988795519, "loss": 0.518, "step": 8568 }, { "epoch": 4.787150837988827, "grad_norm": 0.6727017164230347, "learning_rate": 0.0007629691876750701, "loss": 0.428, "step": 8569 }, { "epoch": 4.787709497206704, "grad_norm": 0.577598512172699, "learning_rate": 0.0007629411764705882, "loss": 0.4873, "step": 8570 }, { "epoch": 4.788268156424581, "grad_norm": 0.7192434668540955, "learning_rate": 0.0007629131652661064, "loss": 0.5153, "step": 8571 }, { "epoch": 4.788826815642458, "grad_norm": 0.3713063597679138, "learning_rate": 0.0007628851540616246, "loss": 0.3928, "step": 8572 }, { "epoch": 4.789385474860335, "grad_norm": 0.5450425744056702, "learning_rate": 0.0007628571428571429, "loss": 0.6114, "step": 8573 }, { "epoch": 4.789944134078212, "grad_norm": 0.8751230835914612, "learning_rate": 0.0007628291316526611, "loss": 0.4851, "step": 8574 }, { "epoch": 4.790502793296089, "grad_norm": 0.4314839243888855, "learning_rate": 0.0007628011204481793, "loss": 0.4735, "step": 8575 }, { "epoch": 4.791061452513967, "grad_norm": 0.9281938076019287, "learning_rate": 0.0007627731092436974, "loss": 0.4769, "step": 8576 }, { "epoch": 4.791620111731843, "grad_norm": 0.4785159230232239, "learning_rate": 0.0007627450980392156, "loss": 0.495, "step": 8577 }, { "epoch": 4.792178770949721, "grad_norm": 0.46127575635910034, "learning_rate": 0.000762717086834734, "loss": 0.4093, "step": 8578 }, { "epoch": 4.792737430167598, "grad_norm": 2.6541554927825928, "learning_rate": 0.0007626890756302522, "loss": 0.4028, "step": 8579 }, { "epoch": 4.793296089385475, "grad_norm": 0.6880247592926025, "learning_rate": 0.0007626610644257704, "loss": 0.4358, "step": 8580 }, { "epoch": 4.793854748603352, "grad_norm": 0.70887690782547, "learning_rate": 0.0007626330532212885, "loss": 0.4377, "step": 8581 }, { "epoch": 4.794413407821229, "grad_norm": 5.6827921867370605, "learning_rate": 0.0007626050420168067, "loss": 0.4145, "step": 8582 }, { "epoch": 4.794972067039106, "grad_norm": 0.9522745609283447, "learning_rate": 0.000762577030812325, "loss": 0.6287, "step": 8583 }, { "epoch": 4.795530726256983, "grad_norm": 0.36743617057800293, "learning_rate": 0.0007625490196078432, "loss": 0.4324, "step": 8584 }, { "epoch": 4.79608938547486, "grad_norm": 4.876690864562988, "learning_rate": 0.0007625210084033614, "loss": 0.4611, "step": 8585 }, { "epoch": 4.796648044692738, "grad_norm": 0.720872163772583, "learning_rate": 0.0007624929971988795, "loss": 0.5852, "step": 8586 }, { "epoch": 4.797206703910614, "grad_norm": 0.9262465238571167, "learning_rate": 0.0007624649859943977, "loss": 0.6957, "step": 8587 }, { "epoch": 4.797765363128492, "grad_norm": 2.984988212585449, "learning_rate": 0.000762436974789916, "loss": 0.5382, "step": 8588 }, { "epoch": 4.798324022346368, "grad_norm": 0.7714965343475342, "learning_rate": 0.0007624089635854342, "loss": 0.4909, "step": 8589 }, { "epoch": 4.798882681564246, "grad_norm": 0.5527476668357849, "learning_rate": 0.0007623809523809524, "loss": 0.4358, "step": 8590 }, { "epoch": 4.799441340782123, "grad_norm": 0.4277464747428894, "learning_rate": 0.0007623529411764706, "loss": 0.4174, "step": 8591 }, { "epoch": 4.8, "grad_norm": 0.5458946824073792, "learning_rate": 0.0007623249299719887, "loss": 0.5026, "step": 8592 }, { "epoch": 4.800558659217877, "grad_norm": 0.45590174198150635, "learning_rate": 0.000762296918767507, "loss": 0.3421, "step": 8593 }, { "epoch": 4.801117318435754, "grad_norm": 0.8198550343513489, "learning_rate": 0.0007622689075630252, "loss": 0.3975, "step": 8594 }, { "epoch": 4.801675977653631, "grad_norm": 0.7512623071670532, "learning_rate": 0.0007622408963585435, "loss": 0.4302, "step": 8595 }, { "epoch": 4.802234636871509, "grad_norm": 1.2393786907196045, "learning_rate": 0.0007622128851540617, "loss": 0.4833, "step": 8596 }, { "epoch": 4.802793296089385, "grad_norm": 0.7234466075897217, "learning_rate": 0.0007621848739495798, "loss": 0.4188, "step": 8597 }, { "epoch": 4.803351955307263, "grad_norm": 0.42658236622810364, "learning_rate": 0.0007621568627450981, "loss": 0.3814, "step": 8598 }, { "epoch": 4.803910614525139, "grad_norm": 0.6034008860588074, "learning_rate": 0.0007621288515406163, "loss": 0.5023, "step": 8599 }, { "epoch": 4.804469273743017, "grad_norm": 0.7559418678283691, "learning_rate": 0.0007621008403361345, "loss": 0.4843, "step": 8600 }, { "epoch": 4.805027932960893, "grad_norm": 6.497520446777344, "learning_rate": 0.0007620728291316527, "loss": 0.5978, "step": 8601 }, { "epoch": 4.805586592178771, "grad_norm": 0.5292633771896362, "learning_rate": 0.0007620448179271708, "loss": 0.5179, "step": 8602 }, { "epoch": 4.806145251396648, "grad_norm": 10.337291717529297, "learning_rate": 0.0007620168067226891, "loss": 0.4854, "step": 8603 }, { "epoch": 4.806703910614525, "grad_norm": 0.5464903116226196, "learning_rate": 0.0007619887955182073, "loss": 0.4532, "step": 8604 }, { "epoch": 4.807262569832402, "grad_norm": 0.4960624575614929, "learning_rate": 0.0007619607843137255, "loss": 0.4477, "step": 8605 }, { "epoch": 4.80782122905028, "grad_norm": 0.9065825343132019, "learning_rate": 0.0007619327731092437, "loss": 0.5187, "step": 8606 }, { "epoch": 4.808379888268156, "grad_norm": 0.8069644570350647, "learning_rate": 0.0007619047619047619, "loss": 0.4554, "step": 8607 }, { "epoch": 4.808938547486034, "grad_norm": 0.5447313785552979, "learning_rate": 0.0007618767507002801, "loss": 0.4121, "step": 8608 }, { "epoch": 4.80949720670391, "grad_norm": 0.4326414465904236, "learning_rate": 0.0007618487394957983, "loss": 0.4484, "step": 8609 }, { "epoch": 4.810055865921788, "grad_norm": 0.8181438446044922, "learning_rate": 0.0007618207282913165, "loss": 0.4593, "step": 8610 }, { "epoch": 4.810614525139664, "grad_norm": 0.5639909505844116, "learning_rate": 0.0007617927170868347, "loss": 0.5026, "step": 8611 }, { "epoch": 4.811173184357542, "grad_norm": 0.6129013895988464, "learning_rate": 0.000761764705882353, "loss": 0.3885, "step": 8612 }, { "epoch": 4.811731843575419, "grad_norm": 0.5478720664978027, "learning_rate": 0.0007617366946778712, "loss": 0.4847, "step": 8613 }, { "epoch": 4.812290502793296, "grad_norm": 0.537415087223053, "learning_rate": 0.0007617086834733894, "loss": 0.5038, "step": 8614 }, { "epoch": 4.812849162011173, "grad_norm": 0.5390510559082031, "learning_rate": 0.0007616806722689076, "loss": 0.4432, "step": 8615 }, { "epoch": 4.813407821229051, "grad_norm": 0.5609541535377502, "learning_rate": 0.0007616526610644258, "loss": 0.5991, "step": 8616 }, { "epoch": 4.813966480446927, "grad_norm": 0.5197296738624573, "learning_rate": 0.000761624649859944, "loss": 0.5161, "step": 8617 }, { "epoch": 4.814525139664805, "grad_norm": 0.5966982841491699, "learning_rate": 0.0007615966386554623, "loss": 0.4598, "step": 8618 }, { "epoch": 4.815083798882681, "grad_norm": 0.42973336577415466, "learning_rate": 0.0007615686274509804, "loss": 0.4254, "step": 8619 }, { "epoch": 4.815642458100559, "grad_norm": 0.4096120595932007, "learning_rate": 0.0007615406162464986, "loss": 0.4645, "step": 8620 }, { "epoch": 4.816201117318435, "grad_norm": 0.8303319811820984, "learning_rate": 0.0007615126050420168, "loss": 0.4628, "step": 8621 }, { "epoch": 4.816759776536313, "grad_norm": 0.762954831123352, "learning_rate": 0.000761484593837535, "loss": 0.4831, "step": 8622 }, { "epoch": 4.81731843575419, "grad_norm": 2.0583696365356445, "learning_rate": 0.0007614565826330533, "loss": 0.5275, "step": 8623 }, { "epoch": 4.817877094972067, "grad_norm": 0.7234183549880981, "learning_rate": 0.0007614285714285714, "loss": 0.4301, "step": 8624 }, { "epoch": 4.818435754189944, "grad_norm": 0.5252977609634399, "learning_rate": 0.0007614005602240896, "loss": 0.4538, "step": 8625 }, { "epoch": 4.818994413407821, "grad_norm": 0.39629223942756653, "learning_rate": 0.0007613725490196078, "loss": 0.3955, "step": 8626 }, { "epoch": 4.819553072625698, "grad_norm": 0.42328080534935, "learning_rate": 0.000761344537815126, "loss": 0.4527, "step": 8627 }, { "epoch": 4.820111731843576, "grad_norm": 0.9976792335510254, "learning_rate": 0.0007613165266106444, "loss": 0.397, "step": 8628 }, { "epoch": 4.820670391061452, "grad_norm": 0.8522345423698425, "learning_rate": 0.0007612885154061625, "loss": 0.6468, "step": 8629 }, { "epoch": 4.82122905027933, "grad_norm": 0.5112760066986084, "learning_rate": 0.0007612605042016807, "loss": 0.4858, "step": 8630 }, { "epoch": 4.821787709497206, "grad_norm": 0.5359641313552856, "learning_rate": 0.0007612324929971989, "loss": 0.4196, "step": 8631 }, { "epoch": 4.822346368715084, "grad_norm": 0.4807271361351013, "learning_rate": 0.0007612044817927171, "loss": 0.4695, "step": 8632 }, { "epoch": 4.822905027932961, "grad_norm": 0.4003235101699829, "learning_rate": 0.0007611764705882354, "loss": 0.41, "step": 8633 }, { "epoch": 4.823463687150838, "grad_norm": 0.7319088578224182, "learning_rate": 0.0007611484593837536, "loss": 0.4372, "step": 8634 }, { "epoch": 4.824022346368715, "grad_norm": 0.49317845702171326, "learning_rate": 0.0007611204481792717, "loss": 0.4685, "step": 8635 }, { "epoch": 4.824581005586592, "grad_norm": 0.5065277814865112, "learning_rate": 0.0007610924369747899, "loss": 0.3763, "step": 8636 }, { "epoch": 4.825139664804469, "grad_norm": 0.4806182384490967, "learning_rate": 0.0007610644257703081, "loss": 0.373, "step": 8637 }, { "epoch": 4.825698324022346, "grad_norm": 0.5946223139762878, "learning_rate": 0.0007610364145658264, "loss": 0.5096, "step": 8638 }, { "epoch": 4.826256983240223, "grad_norm": 0.47863414883613586, "learning_rate": 0.0007610084033613446, "loss": 0.5492, "step": 8639 }, { "epoch": 4.826815642458101, "grad_norm": 1.5016567707061768, "learning_rate": 0.0007609803921568627, "loss": 0.4383, "step": 8640 }, { "epoch": 4.827374301675977, "grad_norm": 0.4068509340286255, "learning_rate": 0.0007609523809523809, "loss": 0.4084, "step": 8641 }, { "epoch": 4.827932960893855, "grad_norm": 0.796147346496582, "learning_rate": 0.0007609243697478991, "loss": 0.4047, "step": 8642 }, { "epoch": 4.828491620111732, "grad_norm": 0.5410636067390442, "learning_rate": 0.0007608963585434175, "loss": 0.5005, "step": 8643 }, { "epoch": 4.829050279329609, "grad_norm": 0.5957869291305542, "learning_rate": 0.0007608683473389357, "loss": 0.7698, "step": 8644 }, { "epoch": 4.829608938547486, "grad_norm": 0.461546391248703, "learning_rate": 0.0007608403361344538, "loss": 0.4889, "step": 8645 }, { "epoch": 4.830167597765363, "grad_norm": 0.7469927072525024, "learning_rate": 0.000760812324929972, "loss": 0.4555, "step": 8646 }, { "epoch": 4.83072625698324, "grad_norm": 0.478488951921463, "learning_rate": 0.0007607843137254902, "loss": 0.4145, "step": 8647 }, { "epoch": 4.831284916201117, "grad_norm": 1.2435779571533203, "learning_rate": 0.0007607563025210085, "loss": 0.5261, "step": 8648 }, { "epoch": 4.831843575418994, "grad_norm": 0.6280058026313782, "learning_rate": 0.0007607282913165267, "loss": 0.6567, "step": 8649 }, { "epoch": 4.832402234636872, "grad_norm": 0.7095413208007812, "learning_rate": 0.0007607002801120449, "loss": 0.5206, "step": 8650 }, { "epoch": 4.832960893854748, "grad_norm": 0.560554027557373, "learning_rate": 0.000760672268907563, "loss": 0.6407, "step": 8651 }, { "epoch": 4.833519553072626, "grad_norm": 0.5252218842506409, "learning_rate": 0.0007606442577030812, "loss": 0.4313, "step": 8652 }, { "epoch": 4.834078212290503, "grad_norm": 0.45497995615005493, "learning_rate": 0.0007606162464985995, "loss": 0.3813, "step": 8653 }, { "epoch": 4.83463687150838, "grad_norm": 0.5191269516944885, "learning_rate": 0.0007605882352941177, "loss": 0.4141, "step": 8654 }, { "epoch": 4.835195530726257, "grad_norm": 0.5692581534385681, "learning_rate": 0.0007605602240896359, "loss": 0.4955, "step": 8655 }, { "epoch": 4.835754189944134, "grad_norm": 5.363700866699219, "learning_rate": 0.000760532212885154, "loss": 0.414, "step": 8656 }, { "epoch": 4.836312849162011, "grad_norm": 0.5020806789398193, "learning_rate": 0.0007605042016806722, "loss": 0.451, "step": 8657 }, { "epoch": 4.836871508379888, "grad_norm": 0.49971261620521545, "learning_rate": 0.0007604761904761905, "loss": 0.5365, "step": 8658 }, { "epoch": 4.837430167597765, "grad_norm": 0.5629549622535706, "learning_rate": 0.0007604481792717087, "loss": 0.4659, "step": 8659 }, { "epoch": 4.837988826815643, "grad_norm": 0.4420061409473419, "learning_rate": 0.000760420168067227, "loss": 0.4987, "step": 8660 }, { "epoch": 4.838547486033519, "grad_norm": 1.127530574798584, "learning_rate": 0.000760392156862745, "loss": 0.3674, "step": 8661 }, { "epoch": 4.839106145251397, "grad_norm": 0.49039822816848755, "learning_rate": 0.0007603641456582633, "loss": 0.3769, "step": 8662 }, { "epoch": 4.839664804469273, "grad_norm": 0.6439700722694397, "learning_rate": 0.0007603361344537816, "loss": 0.4574, "step": 8663 }, { "epoch": 4.840223463687151, "grad_norm": 0.5725346803665161, "learning_rate": 0.0007603081232492998, "loss": 0.4913, "step": 8664 }, { "epoch": 4.840782122905028, "grad_norm": 0.7629607319831848, "learning_rate": 0.000760280112044818, "loss": 0.6072, "step": 8665 }, { "epoch": 4.841340782122905, "grad_norm": 0.4991709589958191, "learning_rate": 0.0007602521008403362, "loss": 0.5249, "step": 8666 }, { "epoch": 4.841899441340782, "grad_norm": 0.5673263669013977, "learning_rate": 0.0007602240896358543, "loss": 0.4764, "step": 8667 }, { "epoch": 4.842458100558659, "grad_norm": 0.4850786328315735, "learning_rate": 0.0007601960784313726, "loss": 0.398, "step": 8668 }, { "epoch": 4.843016759776536, "grad_norm": 0.5174915194511414, "learning_rate": 0.0007601680672268908, "loss": 0.529, "step": 8669 }, { "epoch": 4.843575418994414, "grad_norm": 0.6681555509567261, "learning_rate": 0.000760140056022409, "loss": 0.5516, "step": 8670 }, { "epoch": 4.84413407821229, "grad_norm": 1.2518675327301025, "learning_rate": 0.0007601120448179272, "loss": 0.5558, "step": 8671 }, { "epoch": 4.844692737430168, "grad_norm": 0.43766605854034424, "learning_rate": 0.0007600840336134453, "loss": 0.3608, "step": 8672 }, { "epoch": 4.845251396648044, "grad_norm": 0.4313262701034546, "learning_rate": 0.0007600560224089636, "loss": 0.3828, "step": 8673 }, { "epoch": 4.845810055865922, "grad_norm": 5.528144359588623, "learning_rate": 0.0007600280112044818, "loss": 0.498, "step": 8674 }, { "epoch": 4.846368715083798, "grad_norm": 0.6264899969100952, "learning_rate": 0.00076, "loss": 0.6633, "step": 8675 }, { "epoch": 4.846927374301676, "grad_norm": 0.5527233481407166, "learning_rate": 0.0007599719887955182, "loss": 0.446, "step": 8676 }, { "epoch": 4.847486033519553, "grad_norm": 0.8556757569313049, "learning_rate": 0.0007599439775910363, "loss": 0.3835, "step": 8677 }, { "epoch": 4.84804469273743, "grad_norm": 0.5169434547424316, "learning_rate": 0.0007599159663865547, "loss": 0.4091, "step": 8678 }, { "epoch": 4.848603351955307, "grad_norm": 0.7467514872550964, "learning_rate": 0.0007598879551820729, "loss": 0.427, "step": 8679 }, { "epoch": 4.849162011173185, "grad_norm": 0.7596680521965027, "learning_rate": 0.0007598599439775911, "loss": 0.405, "step": 8680 }, { "epoch": 4.849720670391061, "grad_norm": 0.6410501003265381, "learning_rate": 0.0007598319327731093, "loss": 0.3795, "step": 8681 }, { "epoch": 4.850279329608939, "grad_norm": 0.3620051145553589, "learning_rate": 0.0007598039215686275, "loss": 0.4281, "step": 8682 }, { "epoch": 4.850837988826815, "grad_norm": 0.4518038034439087, "learning_rate": 0.0007597759103641457, "loss": 0.4675, "step": 8683 }, { "epoch": 4.851396648044693, "grad_norm": 0.47344234585762024, "learning_rate": 0.0007597478991596639, "loss": 0.4614, "step": 8684 }, { "epoch": 4.851955307262569, "grad_norm": 1.901712417602539, "learning_rate": 0.0007597198879551821, "loss": 0.4486, "step": 8685 }, { "epoch": 4.852513966480447, "grad_norm": 0.6681280136108398, "learning_rate": 0.0007596918767507003, "loss": 0.5473, "step": 8686 }, { "epoch": 4.853072625698324, "grad_norm": 0.8004860877990723, "learning_rate": 0.0007596638655462185, "loss": 0.6469, "step": 8687 }, { "epoch": 4.853631284916201, "grad_norm": 0.694496214389801, "learning_rate": 0.0007596358543417367, "loss": 0.4262, "step": 8688 }, { "epoch": 4.854189944134078, "grad_norm": 1.9153003692626953, "learning_rate": 0.0007596078431372549, "loss": 0.3836, "step": 8689 }, { "epoch": 4.854748603351956, "grad_norm": 2.520158052444458, "learning_rate": 0.0007595798319327731, "loss": 0.4639, "step": 8690 }, { "epoch": 4.855307262569832, "grad_norm": 1.5247082710266113, "learning_rate": 0.0007595518207282913, "loss": 0.5336, "step": 8691 }, { "epoch": 4.85586592178771, "grad_norm": 0.7307385802268982, "learning_rate": 0.0007595238095238095, "loss": 0.4568, "step": 8692 }, { "epoch": 4.856424581005586, "grad_norm": 0.60298091173172, "learning_rate": 0.0007594957983193277, "loss": 0.53, "step": 8693 }, { "epoch": 4.856983240223464, "grad_norm": 1.5359807014465332, "learning_rate": 0.000759467787114846, "loss": 0.4008, "step": 8694 }, { "epoch": 4.85754189944134, "grad_norm": 0.45238712430000305, "learning_rate": 0.0007594397759103642, "loss": 0.5278, "step": 8695 }, { "epoch": 4.858100558659218, "grad_norm": 0.5202834010124207, "learning_rate": 0.0007594117647058824, "loss": 0.4521, "step": 8696 }, { "epoch": 4.858659217877095, "grad_norm": 0.49711424112319946, "learning_rate": 0.0007593837535014006, "loss": 0.4195, "step": 8697 }, { "epoch": 4.859217877094972, "grad_norm": 0.554820716381073, "learning_rate": 0.0007593557422969188, "loss": 0.4874, "step": 8698 }, { "epoch": 4.859776536312849, "grad_norm": 0.48045921325683594, "learning_rate": 0.000759327731092437, "loss": 0.4587, "step": 8699 }, { "epoch": 4.860335195530726, "grad_norm": 0.4410632252693176, "learning_rate": 0.0007592997198879552, "loss": 0.4341, "step": 8700 }, { "epoch": 4.860893854748603, "grad_norm": 0.47672775387763977, "learning_rate": 0.0007592717086834734, "loss": 0.4631, "step": 8701 }, { "epoch": 4.861452513966481, "grad_norm": 1.4025802612304688, "learning_rate": 0.0007592436974789916, "loss": 0.4088, "step": 8702 }, { "epoch": 4.862011173184357, "grad_norm": 1.1027954816818237, "learning_rate": 0.0007592156862745098, "loss": 0.4637, "step": 8703 }, { "epoch": 4.862569832402235, "grad_norm": 0.83026123046875, "learning_rate": 0.000759187675070028, "loss": 0.4811, "step": 8704 }, { "epoch": 4.863128491620111, "grad_norm": 0.5367934703826904, "learning_rate": 0.0007591596638655462, "loss": 0.4482, "step": 8705 }, { "epoch": 4.863687150837989, "grad_norm": 1.2069411277770996, "learning_rate": 0.0007591316526610644, "loss": 0.458, "step": 8706 }, { "epoch": 4.864245810055866, "grad_norm": 0.5686973929405212, "learning_rate": 0.0007591036414565826, "loss": 0.4673, "step": 8707 }, { "epoch": 4.864804469273743, "grad_norm": 0.7726436257362366, "learning_rate": 0.0007590756302521008, "loss": 0.558, "step": 8708 }, { "epoch": 4.86536312849162, "grad_norm": 0.676937997341156, "learning_rate": 0.000759047619047619, "loss": 0.6981, "step": 8709 }, { "epoch": 4.865921787709497, "grad_norm": 0.6577152013778687, "learning_rate": 0.0007590196078431373, "loss": 0.4472, "step": 8710 }, { "epoch": 4.866480446927374, "grad_norm": 0.47892898321151733, "learning_rate": 0.0007589915966386555, "loss": 0.4808, "step": 8711 }, { "epoch": 4.867039106145251, "grad_norm": 0.5467607378959656, "learning_rate": 0.0007589635854341737, "loss": 0.4267, "step": 8712 }, { "epoch": 4.867597765363128, "grad_norm": 0.4089581072330475, "learning_rate": 0.0007589355742296919, "loss": 0.4266, "step": 8713 }, { "epoch": 4.868156424581006, "grad_norm": 0.5872803926467896, "learning_rate": 0.0007589075630252102, "loss": 0.5169, "step": 8714 }, { "epoch": 4.868715083798882, "grad_norm": 0.422395795583725, "learning_rate": 0.0007588795518207283, "loss": 0.4513, "step": 8715 }, { "epoch": 4.86927374301676, "grad_norm": 0.5978456735610962, "learning_rate": 0.0007588515406162465, "loss": 0.535, "step": 8716 }, { "epoch": 4.869832402234637, "grad_norm": 0.42006736993789673, "learning_rate": 0.0007588235294117647, "loss": 0.4155, "step": 8717 }, { "epoch": 4.870391061452514, "grad_norm": 0.5250824093818665, "learning_rate": 0.0007587955182072829, "loss": 0.4221, "step": 8718 }, { "epoch": 4.870949720670391, "grad_norm": 0.5473908185958862, "learning_rate": 0.0007587675070028012, "loss": 0.4665, "step": 8719 }, { "epoch": 4.871508379888268, "grad_norm": 1.4276212453842163, "learning_rate": 0.0007587394957983193, "loss": 0.451, "step": 8720 }, { "epoch": 4.872067039106145, "grad_norm": 0.8969179391860962, "learning_rate": 0.0007587114845938375, "loss": 0.4372, "step": 8721 }, { "epoch": 4.872625698324022, "grad_norm": 0.49443671107292175, "learning_rate": 0.0007586834733893557, "loss": 0.6035, "step": 8722 }, { "epoch": 4.873184357541899, "grad_norm": 0.49140894412994385, "learning_rate": 0.0007586554621848739, "loss": 0.5041, "step": 8723 }, { "epoch": 4.873743016759777, "grad_norm": 0.6224446892738342, "learning_rate": 0.0007586274509803922, "loss": 0.4787, "step": 8724 }, { "epoch": 4.874301675977653, "grad_norm": 4.660036563873291, "learning_rate": 0.0007585994397759103, "loss": 0.4448, "step": 8725 }, { "epoch": 4.874860335195531, "grad_norm": 0.6111028790473938, "learning_rate": 0.0007585714285714285, "loss": 0.4609, "step": 8726 }, { "epoch": 4.875418994413408, "grad_norm": 0.5526478886604309, "learning_rate": 0.0007585434173669468, "loss": 0.4369, "step": 8727 }, { "epoch": 4.875977653631285, "grad_norm": 0.49239787459373474, "learning_rate": 0.000758515406162465, "loss": 0.4605, "step": 8728 }, { "epoch": 4.876536312849162, "grad_norm": 1.1657596826553345, "learning_rate": 0.0007584873949579833, "loss": 0.4463, "step": 8729 }, { "epoch": 4.877094972067039, "grad_norm": 1.0578683614730835, "learning_rate": 0.0007584593837535015, "loss": 0.4995, "step": 8730 }, { "epoch": 4.877653631284916, "grad_norm": 5.073737621307373, "learning_rate": 0.0007584313725490196, "loss": 0.4353, "step": 8731 }, { "epoch": 4.878212290502793, "grad_norm": 0.5168876647949219, "learning_rate": 0.0007584033613445378, "loss": 0.3778, "step": 8732 }, { "epoch": 4.87877094972067, "grad_norm": 0.6424051523208618, "learning_rate": 0.000758375350140056, "loss": 0.4002, "step": 8733 }, { "epoch": 4.879329608938548, "grad_norm": 0.5979295372962952, "learning_rate": 0.0007583473389355743, "loss": 0.4553, "step": 8734 }, { "epoch": 4.879888268156424, "grad_norm": 0.4738885462284088, "learning_rate": 0.0007583193277310925, "loss": 0.4754, "step": 8735 }, { "epoch": 4.880446927374302, "grad_norm": 0.7919620275497437, "learning_rate": 0.0007582913165266106, "loss": 0.5094, "step": 8736 }, { "epoch": 4.881005586592178, "grad_norm": 0.4680382013320923, "learning_rate": 0.0007582633053221288, "loss": 0.4582, "step": 8737 }, { "epoch": 4.881564245810056, "grad_norm": 0.47519004344940186, "learning_rate": 0.000758235294117647, "loss": 0.4853, "step": 8738 }, { "epoch": 4.882122905027933, "grad_norm": 0.4977691173553467, "learning_rate": 0.0007582072829131653, "loss": 0.5458, "step": 8739 }, { "epoch": 4.88268156424581, "grad_norm": 0.5639273524284363, "learning_rate": 0.0007581792717086835, "loss": 0.4325, "step": 8740 }, { "epoch": 4.883240223463687, "grad_norm": 1.9027615785598755, "learning_rate": 0.0007581512605042016, "loss": 0.4996, "step": 8741 }, { "epoch": 4.883798882681564, "grad_norm": 1.072900652885437, "learning_rate": 0.0007581232492997198, "loss": 0.3651, "step": 8742 }, { "epoch": 4.884357541899441, "grad_norm": 0.9303123354911804, "learning_rate": 0.000758095238095238, "loss": 0.4311, "step": 8743 }, { "epoch": 4.884916201117319, "grad_norm": 0.4010227620601654, "learning_rate": 0.0007580672268907564, "loss": 0.3677, "step": 8744 }, { "epoch": 4.885474860335195, "grad_norm": 0.7847115397453308, "learning_rate": 0.0007580392156862746, "loss": 0.4087, "step": 8745 }, { "epoch": 4.886033519553073, "grad_norm": Infinity, "learning_rate": 0.0007580392156862746, "loss": 0.549, "step": 8746 }, { "epoch": 4.886592178770949, "grad_norm": 0.39211156964302063, "learning_rate": 0.0007580112044817928, "loss": 0.4083, "step": 8747 }, { "epoch": 4.887150837988827, "grad_norm": 0.5703455209732056, "learning_rate": 0.0007579831932773109, "loss": 0.5028, "step": 8748 }, { "epoch": 4.8877094972067034, "grad_norm": 0.478864848613739, "learning_rate": 0.0007579551820728291, "loss": 0.4676, "step": 8749 }, { "epoch": 4.888268156424581, "grad_norm": 0.48383629322052, "learning_rate": 0.0007579271708683474, "loss": 0.4342, "step": 8750 }, { "epoch": 4.888826815642458, "grad_norm": 0.7051289677619934, "learning_rate": 0.0007578991596638656, "loss": 0.3469, "step": 8751 }, { "epoch": 4.889385474860335, "grad_norm": 0.9486963748931885, "learning_rate": 0.0007578711484593838, "loss": 0.4363, "step": 8752 }, { "epoch": 4.889944134078212, "grad_norm": 0.5282118916511536, "learning_rate": 0.0007578431372549019, "loss": 0.4348, "step": 8753 }, { "epoch": 4.89050279329609, "grad_norm": 1.3129225969314575, "learning_rate": 0.0007578151260504201, "loss": 0.4452, "step": 8754 }, { "epoch": 4.891061452513966, "grad_norm": 0.48157554864883423, "learning_rate": 0.0007577871148459384, "loss": 0.4788, "step": 8755 }, { "epoch": 4.891620111731844, "grad_norm": 0.5641001462936401, "learning_rate": 0.0007577591036414566, "loss": 0.3712, "step": 8756 }, { "epoch": 4.89217877094972, "grad_norm": 0.407925009727478, "learning_rate": 0.0007577310924369748, "loss": 0.4158, "step": 8757 }, { "epoch": 4.892737430167598, "grad_norm": 0.474031537771225, "learning_rate": 0.0007577030812324929, "loss": 0.5754, "step": 8758 }, { "epoch": 4.8932960893854744, "grad_norm": 0.5503466725349426, "learning_rate": 0.0007576750700280111, "loss": 0.4634, "step": 8759 }, { "epoch": 4.893854748603352, "grad_norm": 0.471722275018692, "learning_rate": 0.0007576470588235295, "loss": 0.3822, "step": 8760 }, { "epoch": 4.894413407821229, "grad_norm": 0.7298048734664917, "learning_rate": 0.0007576190476190477, "loss": 0.4524, "step": 8761 }, { "epoch": 4.894972067039106, "grad_norm": 0.6199996471405029, "learning_rate": 0.0007575910364145659, "loss": 0.4481, "step": 8762 }, { "epoch": 4.895530726256983, "grad_norm": 0.6268185377120972, "learning_rate": 0.0007575630252100841, "loss": 0.5344, "step": 8763 }, { "epoch": 4.896089385474861, "grad_norm": 11.937359809875488, "learning_rate": 0.0007575350140056022, "loss": 0.3685, "step": 8764 }, { "epoch": 4.896648044692737, "grad_norm": 0.7418108582496643, "learning_rate": 0.0007575070028011205, "loss": 0.4452, "step": 8765 }, { "epoch": 4.897206703910615, "grad_norm": 0.41409215331077576, "learning_rate": 0.0007574789915966387, "loss": 0.4274, "step": 8766 }, { "epoch": 4.897765363128491, "grad_norm": 2.3767995834350586, "learning_rate": 0.0007574509803921569, "loss": 0.4353, "step": 8767 }, { "epoch": 4.898324022346369, "grad_norm": 1.2191287279129028, "learning_rate": 0.0007574229691876751, "loss": 0.5076, "step": 8768 }, { "epoch": 4.8988826815642454, "grad_norm": 0.5433291792869568, "learning_rate": 0.0007573949579831932, "loss": 0.4504, "step": 8769 }, { "epoch": 4.899441340782123, "grad_norm": 0.4664328098297119, "learning_rate": 0.0007573669467787115, "loss": 0.4013, "step": 8770 }, { "epoch": 4.9, "grad_norm": 0.4200805425643921, "learning_rate": 0.0007573389355742297, "loss": 0.4857, "step": 8771 }, { "epoch": 4.900558659217877, "grad_norm": 0.5408905744552612, "learning_rate": 0.0007573109243697479, "loss": 0.5885, "step": 8772 }, { "epoch": 4.901117318435754, "grad_norm": 0.6539720892906189, "learning_rate": 0.0007572829131652661, "loss": 0.5112, "step": 8773 }, { "epoch": 4.901675977653631, "grad_norm": 0.6490855813026428, "learning_rate": 0.0007572549019607842, "loss": 0.4567, "step": 8774 }, { "epoch": 4.902234636871508, "grad_norm": 0.3859747350215912, "learning_rate": 0.0007572268907563025, "loss": 0.3942, "step": 8775 }, { "epoch": 4.902793296089386, "grad_norm": 0.544288158416748, "learning_rate": 0.0007571988795518207, "loss": 0.475, "step": 8776 }, { "epoch": 4.903351955307262, "grad_norm": 0.4559132754802704, "learning_rate": 0.000757170868347339, "loss": 0.4431, "step": 8777 }, { "epoch": 4.90391061452514, "grad_norm": 0.5335123538970947, "learning_rate": 0.0007571428571428572, "loss": 0.528, "step": 8778 }, { "epoch": 4.9044692737430164, "grad_norm": 4.882332801818848, "learning_rate": 0.0007571148459383754, "loss": 0.4795, "step": 8779 }, { "epoch": 4.905027932960894, "grad_norm": 0.4744202494621277, "learning_rate": 0.0007570868347338936, "loss": 0.3669, "step": 8780 }, { "epoch": 4.905586592178771, "grad_norm": 0.39103662967681885, "learning_rate": 0.0007570588235294118, "loss": 0.4065, "step": 8781 }, { "epoch": 4.906145251396648, "grad_norm": 0.8635833859443665, "learning_rate": 0.00075703081232493, "loss": 0.4335, "step": 8782 }, { "epoch": 4.906703910614525, "grad_norm": 0.5420430898666382, "learning_rate": 0.0007570028011204482, "loss": 0.4903, "step": 8783 }, { "epoch": 4.907262569832402, "grad_norm": 0.4244260787963867, "learning_rate": 0.0007569747899159664, "loss": 0.4615, "step": 8784 }, { "epoch": 4.907821229050279, "grad_norm": 1.7777653932571411, "learning_rate": 0.0007569467787114846, "loss": 0.4363, "step": 8785 }, { "epoch": 4.908379888268156, "grad_norm": 1.8669118881225586, "learning_rate": 0.0007569187675070028, "loss": 0.4838, "step": 8786 }, { "epoch": 4.908938547486033, "grad_norm": 0.5046985745429993, "learning_rate": 0.000756890756302521, "loss": 0.6408, "step": 8787 }, { "epoch": 4.909497206703911, "grad_norm": 0.7819457054138184, "learning_rate": 0.0007568627450980392, "loss": 0.4383, "step": 8788 }, { "epoch": 4.910055865921787, "grad_norm": 0.4063820242881775, "learning_rate": 0.0007568347338935574, "loss": 0.4339, "step": 8789 }, { "epoch": 4.910614525139665, "grad_norm": 0.4208689033985138, "learning_rate": 0.0007568067226890757, "loss": 0.4471, "step": 8790 }, { "epoch": 4.911173184357542, "grad_norm": 0.3869853615760803, "learning_rate": 0.0007567787114845938, "loss": 0.4663, "step": 8791 }, { "epoch": 4.911731843575419, "grad_norm": 0.4823906123638153, "learning_rate": 0.000756750700280112, "loss": 0.4342, "step": 8792 }, { "epoch": 4.912290502793296, "grad_norm": 0.5998381972312927, "learning_rate": 0.0007567226890756303, "loss": 0.5075, "step": 8793 }, { "epoch": 4.912849162011173, "grad_norm": 0.6274259686470032, "learning_rate": 0.0007566946778711485, "loss": 0.4661, "step": 8794 }, { "epoch": 4.91340782122905, "grad_norm": 0.5794113874435425, "learning_rate": 0.0007566666666666668, "loss": 0.6888, "step": 8795 }, { "epoch": 4.913966480446927, "grad_norm": 0.4683435559272766, "learning_rate": 0.0007566386554621849, "loss": 0.4427, "step": 8796 }, { "epoch": 4.914525139664804, "grad_norm": 0.4963485896587372, "learning_rate": 0.0007566106442577031, "loss": 0.3689, "step": 8797 }, { "epoch": 4.915083798882682, "grad_norm": 0.7152925133705139, "learning_rate": 0.0007565826330532213, "loss": 0.4986, "step": 8798 }, { "epoch": 4.915642458100558, "grad_norm": 0.45349279046058655, "learning_rate": 0.0007565546218487395, "loss": 0.457, "step": 8799 }, { "epoch": 4.916201117318436, "grad_norm": 1.5383104085922241, "learning_rate": 0.0007565266106442578, "loss": 0.5562, "step": 8800 }, { "epoch": 4.9167597765363125, "grad_norm": 0.6608338952064514, "learning_rate": 0.0007564985994397759, "loss": 0.5309, "step": 8801 }, { "epoch": 4.91731843575419, "grad_norm": 0.6817737221717834, "learning_rate": 0.0007564705882352941, "loss": 0.5386, "step": 8802 }, { "epoch": 4.917877094972067, "grad_norm": 0.5446931719779968, "learning_rate": 0.0007564425770308123, "loss": 0.473, "step": 8803 }, { "epoch": 4.918435754189944, "grad_norm": 0.392940878868103, "learning_rate": 0.0007564145658263305, "loss": 0.4018, "step": 8804 }, { "epoch": 4.918994413407821, "grad_norm": 0.8185021281242371, "learning_rate": 0.0007563865546218488, "loss": 0.499, "step": 8805 }, { "epoch": 4.919553072625698, "grad_norm": 0.5131251215934753, "learning_rate": 0.000756358543417367, "loss": 0.4505, "step": 8806 }, { "epoch": 4.920111731843575, "grad_norm": 1.0140050649642944, "learning_rate": 0.0007563305322128851, "loss": 0.4862, "step": 8807 }, { "epoch": 4.920670391061453, "grad_norm": 4.409199237823486, "learning_rate": 0.0007563025210084033, "loss": 0.4453, "step": 8808 }, { "epoch": 4.921229050279329, "grad_norm": 0.4739084541797638, "learning_rate": 0.0007562745098039215, "loss": 0.47, "step": 8809 }, { "epoch": 4.921787709497207, "grad_norm": 0.7353098392486572, "learning_rate": 0.0007562464985994399, "loss": 0.391, "step": 8810 }, { "epoch": 4.9223463687150835, "grad_norm": 0.42085981369018555, "learning_rate": 0.0007562184873949581, "loss": 0.4492, "step": 8811 }, { "epoch": 4.922905027932961, "grad_norm": 0.5738317370414734, "learning_rate": 0.0007561904761904762, "loss": 0.444, "step": 8812 }, { "epoch": 4.923463687150838, "grad_norm": 0.4134341776371002, "learning_rate": 0.0007561624649859944, "loss": 0.4263, "step": 8813 }, { "epoch": 4.924022346368715, "grad_norm": 0.7517583966255188, "learning_rate": 0.0007561344537815126, "loss": 0.6281, "step": 8814 }, { "epoch": 4.924581005586592, "grad_norm": 0.43969208002090454, "learning_rate": 0.0007561064425770309, "loss": 0.3849, "step": 8815 }, { "epoch": 4.925139664804469, "grad_norm": 0.568121075630188, "learning_rate": 0.0007560784313725491, "loss": 0.3956, "step": 8816 }, { "epoch": 4.925698324022346, "grad_norm": 0.5273006558418274, "learning_rate": 0.0007560504201680672, "loss": 0.3976, "step": 8817 }, { "epoch": 4.926256983240224, "grad_norm": 0.7773993611335754, "learning_rate": 0.0007560224089635854, "loss": 0.4725, "step": 8818 }, { "epoch": 4.9268156424581, "grad_norm": 0.6869404911994934, "learning_rate": 0.0007559943977591036, "loss": 0.5455, "step": 8819 }, { "epoch": 4.927374301675978, "grad_norm": 0.5815285444259644, "learning_rate": 0.0007559663865546219, "loss": 0.4704, "step": 8820 }, { "epoch": 4.9279329608938545, "grad_norm": 0.8208044171333313, "learning_rate": 0.0007559383753501401, "loss": 0.425, "step": 8821 }, { "epoch": 4.928491620111732, "grad_norm": 0.5147770643234253, "learning_rate": 0.0007559103641456583, "loss": 0.5813, "step": 8822 }, { "epoch": 4.9290502793296085, "grad_norm": 0.4230298697948456, "learning_rate": 0.0007558823529411764, "loss": 0.4552, "step": 8823 }, { "epoch": 4.929608938547486, "grad_norm": 0.5045498013496399, "learning_rate": 0.0007558543417366946, "loss": 0.4139, "step": 8824 }, { "epoch": 4.930167597765363, "grad_norm": 0.5355737209320068, "learning_rate": 0.000755826330532213, "loss": 0.4922, "step": 8825 }, { "epoch": 4.93072625698324, "grad_norm": 0.5541037321090698, "learning_rate": 0.0007557983193277312, "loss": 0.4409, "step": 8826 }, { "epoch": 4.931284916201117, "grad_norm": 0.4639340341091156, "learning_rate": 0.0007557703081232494, "loss": 0.4725, "step": 8827 }, { "epoch": 4.931843575418995, "grad_norm": 0.42134127020835876, "learning_rate": 0.0007557422969187675, "loss": 0.4122, "step": 8828 }, { "epoch": 4.932402234636871, "grad_norm": 0.6108148694038391, "learning_rate": 0.0007557142857142857, "loss": 0.3604, "step": 8829 }, { "epoch": 4.932960893854749, "grad_norm": 0.6303445100784302, "learning_rate": 0.000755686274509804, "loss": 0.5771, "step": 8830 }, { "epoch": 4.9335195530726255, "grad_norm": 0.6305598616600037, "learning_rate": 0.0007556582633053222, "loss": 0.3262, "step": 8831 }, { "epoch": 4.934078212290503, "grad_norm": 0.492229700088501, "learning_rate": 0.0007556302521008404, "loss": 0.4008, "step": 8832 }, { "epoch": 4.9346368715083795, "grad_norm": 0.5563464164733887, "learning_rate": 0.0007556022408963585, "loss": 0.4474, "step": 8833 }, { "epoch": 4.935195530726257, "grad_norm": 1.8338207006454468, "learning_rate": 0.0007555742296918767, "loss": 0.4931, "step": 8834 }, { "epoch": 4.935754189944134, "grad_norm": 0.5386955142021179, "learning_rate": 0.000755546218487395, "loss": 0.4754, "step": 8835 }, { "epoch": 4.936312849162011, "grad_norm": 1.487391710281372, "learning_rate": 0.0007555182072829132, "loss": 0.4392, "step": 8836 }, { "epoch": 4.936871508379888, "grad_norm": 1.9271928071975708, "learning_rate": 0.0007554901960784314, "loss": 0.3616, "step": 8837 }, { "epoch": 4.937430167597765, "grad_norm": 0.5119422674179077, "learning_rate": 0.0007554621848739496, "loss": 0.3675, "step": 8838 }, { "epoch": 4.937988826815642, "grad_norm": 2.6700937747955322, "learning_rate": 0.0007554341736694677, "loss": 0.4194, "step": 8839 }, { "epoch": 4.93854748603352, "grad_norm": 2.884505033493042, "learning_rate": 0.000755406162464986, "loss": 0.4576, "step": 8840 }, { "epoch": 4.9391061452513965, "grad_norm": 0.4905424416065216, "learning_rate": 0.0007553781512605042, "loss": 0.4137, "step": 8841 }, { "epoch": 4.939664804469274, "grad_norm": 0.5485576391220093, "learning_rate": 0.0007553501400560225, "loss": 0.4069, "step": 8842 }, { "epoch": 4.9402234636871505, "grad_norm": 0.5925578474998474, "learning_rate": 0.0007553221288515407, "loss": 0.4242, "step": 8843 }, { "epoch": 4.940782122905028, "grad_norm": 0.6026496887207031, "learning_rate": 0.0007552941176470588, "loss": 0.4257, "step": 8844 }, { "epoch": 4.941340782122905, "grad_norm": 0.6376490592956543, "learning_rate": 0.0007552661064425771, "loss": 0.4238, "step": 8845 }, { "epoch": 4.941899441340782, "grad_norm": 0.5547580718994141, "learning_rate": 0.0007552380952380953, "loss": 0.5217, "step": 8846 }, { "epoch": 4.942458100558659, "grad_norm": 2.5092527866363525, "learning_rate": 0.0007552100840336135, "loss": 0.5043, "step": 8847 }, { "epoch": 4.943016759776536, "grad_norm": 0.5121408104896545, "learning_rate": 0.0007551820728291317, "loss": 0.5123, "step": 8848 }, { "epoch": 4.943575418994413, "grad_norm": 0.46961572766304016, "learning_rate": 0.0007551540616246498, "loss": 0.4247, "step": 8849 }, { "epoch": 4.94413407821229, "grad_norm": 3.7292540073394775, "learning_rate": 0.0007551260504201681, "loss": 0.4625, "step": 8850 }, { "epoch": 4.9446927374301675, "grad_norm": 0.5970547199249268, "learning_rate": 0.0007550980392156863, "loss": 0.4108, "step": 8851 }, { "epoch": 4.945251396648045, "grad_norm": 0.5754162073135376, "learning_rate": 0.0007550700280112045, "loss": 0.3074, "step": 8852 }, { "epoch": 4.9458100558659215, "grad_norm": 1.8235074281692505, "learning_rate": 0.0007550420168067227, "loss": 0.4488, "step": 8853 }, { "epoch": 4.946368715083799, "grad_norm": 0.402045875787735, "learning_rate": 0.0007550140056022409, "loss": 0.3172, "step": 8854 }, { "epoch": 4.946927374301676, "grad_norm": 0.3946745693683624, "learning_rate": 0.0007549859943977591, "loss": 0.4759, "step": 8855 }, { "epoch": 4.947486033519553, "grad_norm": 0.5963053703308105, "learning_rate": 0.0007549579831932773, "loss": 0.3645, "step": 8856 }, { "epoch": 4.94804469273743, "grad_norm": 0.7214495539665222, "learning_rate": 0.0007549299719887955, "loss": 0.4495, "step": 8857 }, { "epoch": 4.948603351955307, "grad_norm": 0.7899619340896606, "learning_rate": 0.0007549019607843137, "loss": 0.4516, "step": 8858 }, { "epoch": 4.949162011173184, "grad_norm": 0.3767538368701935, "learning_rate": 0.000754873949579832, "loss": 0.3875, "step": 8859 }, { "epoch": 4.949720670391061, "grad_norm": 0.7419856786727905, "learning_rate": 0.0007548459383753502, "loss": 0.3864, "step": 8860 }, { "epoch": 4.9502793296089385, "grad_norm": 0.39955633878707886, "learning_rate": 0.0007548179271708684, "loss": 0.4206, "step": 8861 }, { "epoch": 4.950837988826816, "grad_norm": 0.46973446011543274, "learning_rate": 0.0007547899159663866, "loss": 0.4204, "step": 8862 }, { "epoch": 4.9513966480446925, "grad_norm": 0.5184533596038818, "learning_rate": 0.0007547619047619048, "loss": 0.4635, "step": 8863 }, { "epoch": 4.95195530726257, "grad_norm": 0.7716824412345886, "learning_rate": 0.000754733893557423, "loss": 0.5901, "step": 8864 }, { "epoch": 4.952513966480447, "grad_norm": 0.49903708696365356, "learning_rate": 0.0007547058823529412, "loss": 0.4156, "step": 8865 }, { "epoch": 4.953072625698324, "grad_norm": 0.4803718328475952, "learning_rate": 0.0007546778711484594, "loss": 0.4387, "step": 8866 }, { "epoch": 4.953631284916201, "grad_norm": 0.6003878712654114, "learning_rate": 0.0007546498599439776, "loss": 0.4588, "step": 8867 }, { "epoch": 4.954189944134078, "grad_norm": 0.6134268045425415, "learning_rate": 0.0007546218487394958, "loss": 0.4461, "step": 8868 }, { "epoch": 4.954748603351955, "grad_norm": 0.657756507396698, "learning_rate": 0.000754593837535014, "loss": 0.4863, "step": 8869 }, { "epoch": 4.955307262569832, "grad_norm": 0.6040956974029541, "learning_rate": 0.0007545658263305323, "loss": 0.3518, "step": 8870 }, { "epoch": 4.9558659217877095, "grad_norm": 0.7183671593666077, "learning_rate": 0.0007545378151260504, "loss": 0.5694, "step": 8871 }, { "epoch": 4.956424581005587, "grad_norm": 0.46575382351875305, "learning_rate": 0.0007545098039215686, "loss": 0.3515, "step": 8872 }, { "epoch": 4.9569832402234635, "grad_norm": 0.4562683701515198, "learning_rate": 0.0007544817927170868, "loss": 0.4903, "step": 8873 }, { "epoch": 4.957541899441341, "grad_norm": 0.5428005456924438, "learning_rate": 0.000754453781512605, "loss": 0.3775, "step": 8874 }, { "epoch": 4.9581005586592175, "grad_norm": 1.8966871500015259, "learning_rate": 0.0007544257703081234, "loss": 0.3946, "step": 8875 }, { "epoch": 4.958659217877095, "grad_norm": 0.48232144117355347, "learning_rate": 0.0007543977591036415, "loss": 0.4313, "step": 8876 }, { "epoch": 4.959217877094972, "grad_norm": 0.44306883215904236, "learning_rate": 0.0007543697478991597, "loss": 0.438, "step": 8877 }, { "epoch": 4.959776536312849, "grad_norm": 0.4111923277378082, "learning_rate": 0.0007543417366946779, "loss": 0.4041, "step": 8878 }, { "epoch": 4.960335195530726, "grad_norm": 0.7629689574241638, "learning_rate": 0.0007543137254901961, "loss": 0.4726, "step": 8879 }, { "epoch": 4.960893854748603, "grad_norm": 0.4854073226451874, "learning_rate": 0.0007542857142857144, "loss": 0.4355, "step": 8880 }, { "epoch": 4.9614525139664805, "grad_norm": 0.5218124389648438, "learning_rate": 0.0007542577030812325, "loss": 0.4827, "step": 8881 }, { "epoch": 4.962011173184358, "grad_norm": 1.7910889387130737, "learning_rate": 0.0007542296918767507, "loss": 0.3861, "step": 8882 }, { "epoch": 4.9625698324022345, "grad_norm": 1.2245216369628906, "learning_rate": 0.0007542016806722689, "loss": 0.5188, "step": 8883 }, { "epoch": 4.963128491620112, "grad_norm": 0.6214673519134521, "learning_rate": 0.0007541736694677871, "loss": 0.3982, "step": 8884 }, { "epoch": 4.9636871508379885, "grad_norm": 0.5548007488250732, "learning_rate": 0.0007541456582633054, "loss": 0.3222, "step": 8885 }, { "epoch": 4.964245810055866, "grad_norm": 0.5983314514160156, "learning_rate": 0.0007541176470588236, "loss": 0.5297, "step": 8886 }, { "epoch": 4.9648044692737425, "grad_norm": 0.4969734251499176, "learning_rate": 0.0007540896358543417, "loss": 0.3995, "step": 8887 }, { "epoch": 4.96536312849162, "grad_norm": 0.3779778778553009, "learning_rate": 0.0007540616246498599, "loss": 0.4144, "step": 8888 }, { "epoch": 4.965921787709497, "grad_norm": 0.6147105097770691, "learning_rate": 0.0007540336134453781, "loss": 0.5256, "step": 8889 }, { "epoch": 4.966480446927374, "grad_norm": 0.4686896800994873, "learning_rate": 0.0007540056022408964, "loss": 0.3076, "step": 8890 }, { "epoch": 4.9670391061452515, "grad_norm": 0.4621661603450775, "learning_rate": 0.0007539775910364147, "loss": 0.374, "step": 8891 }, { "epoch": 4.967597765363129, "grad_norm": 0.7105911374092102, "learning_rate": 0.0007539495798319328, "loss": 0.3769, "step": 8892 }, { "epoch": 4.9681564245810055, "grad_norm": 0.4076268970966339, "learning_rate": 0.000753921568627451, "loss": 0.4254, "step": 8893 }, { "epoch": 4.968715083798883, "grad_norm": 0.7149912714958191, "learning_rate": 0.0007538935574229692, "loss": 0.459, "step": 8894 }, { "epoch": 4.9692737430167595, "grad_norm": 0.7878284454345703, "learning_rate": 0.0007538655462184875, "loss": 0.4826, "step": 8895 }, { "epoch": 4.969832402234637, "grad_norm": 0.5910987854003906, "learning_rate": 0.0007538375350140057, "loss": 0.4134, "step": 8896 }, { "epoch": 4.9703910614525135, "grad_norm": 0.6179719567298889, "learning_rate": 0.0007538095238095238, "loss": 0.4654, "step": 8897 }, { "epoch": 4.970949720670391, "grad_norm": 0.7106649875640869, "learning_rate": 0.000753781512605042, "loss": 0.6835, "step": 8898 }, { "epoch": 4.971508379888268, "grad_norm": 0.43262267112731934, "learning_rate": 0.0007537535014005602, "loss": 0.456, "step": 8899 }, { "epoch": 4.972067039106145, "grad_norm": 1.1712673902511597, "learning_rate": 0.0007537254901960785, "loss": 0.5101, "step": 8900 }, { "epoch": 4.9726256983240225, "grad_norm": 0.6719967722892761, "learning_rate": 0.0007536974789915967, "loss": 0.4613, "step": 8901 }, { "epoch": 4.9731843575419, "grad_norm": 0.5877483487129211, "learning_rate": 0.0007536694677871149, "loss": 0.3872, "step": 8902 }, { "epoch": 4.9737430167597765, "grad_norm": 0.6149214506149292, "learning_rate": 0.000753641456582633, "loss": 0.4977, "step": 8903 }, { "epoch": 4.974301675977654, "grad_norm": 0.5694226026535034, "learning_rate": 0.0007536134453781512, "loss": 0.5598, "step": 8904 }, { "epoch": 4.9748603351955305, "grad_norm": 1.9931093454360962, "learning_rate": 0.0007535854341736695, "loss": 0.3765, "step": 8905 }, { "epoch": 4.975418994413408, "grad_norm": 0.7991714477539062, "learning_rate": 0.0007535574229691877, "loss": 0.4882, "step": 8906 }, { "epoch": 4.9759776536312845, "grad_norm": 0.5801398754119873, "learning_rate": 0.000753529411764706, "loss": 0.413, "step": 8907 }, { "epoch": 4.976536312849162, "grad_norm": 1.0248736143112183, "learning_rate": 0.000753501400560224, "loss": 0.5372, "step": 8908 }, { "epoch": 4.977094972067039, "grad_norm": 0.6892338395118713, "learning_rate": 0.0007534733893557423, "loss": 0.4978, "step": 8909 }, { "epoch": 4.977653631284916, "grad_norm": 0.420864075422287, "learning_rate": 0.0007534453781512606, "loss": 0.4207, "step": 8910 }, { "epoch": 4.9782122905027935, "grad_norm": 0.8040164113044739, "learning_rate": 0.0007534173669467788, "loss": 0.6046, "step": 8911 }, { "epoch": 4.97877094972067, "grad_norm": 0.7165022492408752, "learning_rate": 0.000753389355742297, "loss": 0.5481, "step": 8912 }, { "epoch": 4.9793296089385475, "grad_norm": 0.5217366218566895, "learning_rate": 0.0007533613445378151, "loss": 0.5051, "step": 8913 }, { "epoch": 4.979888268156425, "grad_norm": 0.47434869408607483, "learning_rate": 0.0007533333333333333, "loss": 0.4835, "step": 8914 }, { "epoch": 4.9804469273743015, "grad_norm": 0.7647278904914856, "learning_rate": 0.0007533053221288516, "loss": 0.3348, "step": 8915 }, { "epoch": 4.981005586592179, "grad_norm": 0.8804173469543457, "learning_rate": 0.0007532773109243698, "loss": 0.499, "step": 8916 }, { "epoch": 4.9815642458100555, "grad_norm": 0.590628981590271, "learning_rate": 0.000753249299719888, "loss": 0.5345, "step": 8917 }, { "epoch": 4.982122905027933, "grad_norm": 0.5201048254966736, "learning_rate": 0.0007532212885154062, "loss": 0.4925, "step": 8918 }, { "epoch": 4.98268156424581, "grad_norm": 0.5053492188453674, "learning_rate": 0.0007531932773109243, "loss": 0.4573, "step": 8919 }, { "epoch": 4.983240223463687, "grad_norm": 0.7040517330169678, "learning_rate": 0.0007531652661064426, "loss": 0.4012, "step": 8920 }, { "epoch": 4.9837988826815645, "grad_norm": 1.1395535469055176, "learning_rate": 0.0007531372549019608, "loss": 0.5303, "step": 8921 }, { "epoch": 4.984357541899441, "grad_norm": 0.5153475403785706, "learning_rate": 0.000753109243697479, "loss": 0.4744, "step": 8922 }, { "epoch": 4.9849162011173185, "grad_norm": 0.4273338317871094, "learning_rate": 0.0007530812324929972, "loss": 0.4674, "step": 8923 }, { "epoch": 4.985474860335195, "grad_norm": 1.0757174491882324, "learning_rate": 0.0007530532212885153, "loss": 0.463, "step": 8924 }, { "epoch": 4.9860335195530725, "grad_norm": 0.6814197897911072, "learning_rate": 0.0007530252100840336, "loss": 0.4083, "step": 8925 }, { "epoch": 4.98659217877095, "grad_norm": 0.5240745544433594, "learning_rate": 0.0007529971988795519, "loss": 0.4688, "step": 8926 }, { "epoch": 4.9871508379888265, "grad_norm": 0.6920828223228455, "learning_rate": 0.0007529691876750701, "loss": 0.6228, "step": 8927 }, { "epoch": 4.987709497206704, "grad_norm": 4.5850982666015625, "learning_rate": 0.0007529411764705883, "loss": 0.5397, "step": 8928 }, { "epoch": 4.988268156424581, "grad_norm": 2.1096279621124268, "learning_rate": 0.0007529131652661064, "loss": 0.4948, "step": 8929 }, { "epoch": 4.988826815642458, "grad_norm": 0.4749988317489624, "learning_rate": 0.0007528851540616246, "loss": 0.4483, "step": 8930 }, { "epoch": 4.9893854748603355, "grad_norm": 10.724010467529297, "learning_rate": 0.0007528571428571429, "loss": 0.3862, "step": 8931 }, { "epoch": 4.989944134078212, "grad_norm": 0.8744003176689148, "learning_rate": 0.0007528291316526611, "loss": 0.5138, "step": 8932 }, { "epoch": 4.9905027932960895, "grad_norm": 0.5407172441482544, "learning_rate": 0.0007528011204481793, "loss": 0.7005, "step": 8933 }, { "epoch": 4.991061452513966, "grad_norm": 0.4657617509365082, "learning_rate": 0.0007527731092436975, "loss": 0.4848, "step": 8934 }, { "epoch": 4.9916201117318435, "grad_norm": 0.771497905254364, "learning_rate": 0.0007527450980392156, "loss": 0.6096, "step": 8935 }, { "epoch": 4.992178770949721, "grad_norm": 0.44296932220458984, "learning_rate": 0.0007527170868347339, "loss": 0.4915, "step": 8936 }, { "epoch": 4.9927374301675975, "grad_norm": 0.3926461935043335, "learning_rate": 0.0007526890756302521, "loss": 0.4053, "step": 8937 }, { "epoch": 4.993296089385475, "grad_norm": 0.562260091304779, "learning_rate": 0.0007526610644257703, "loss": 0.5297, "step": 8938 }, { "epoch": 4.993854748603352, "grad_norm": 0.41102197766304016, "learning_rate": 0.0007526330532212885, "loss": 0.3997, "step": 8939 }, { "epoch": 4.994413407821229, "grad_norm": 0.4598945379257202, "learning_rate": 0.0007526050420168066, "loss": 0.4386, "step": 8940 }, { "epoch": 4.9949720670391065, "grad_norm": 0.4748890697956085, "learning_rate": 0.000752577030812325, "loss": 0.4497, "step": 8941 }, { "epoch": 4.995530726256983, "grad_norm": 0.9812225699424744, "learning_rate": 0.0007525490196078432, "loss": 0.4071, "step": 8942 }, { "epoch": 4.9960893854748605, "grad_norm": 0.6202051639556885, "learning_rate": 0.0007525210084033614, "loss": 0.4608, "step": 8943 }, { "epoch": 4.996648044692737, "grad_norm": 0.45992499589920044, "learning_rate": 0.0007524929971988796, "loss": 0.4516, "step": 8944 }, { "epoch": 4.9972067039106145, "grad_norm": 0.9308560490608215, "learning_rate": 0.0007524649859943977, "loss": 0.4501, "step": 8945 }, { "epoch": 4.997765363128492, "grad_norm": 0.736585259437561, "learning_rate": 0.000752436974789916, "loss": 0.6691, "step": 8946 }, { "epoch": 4.9983240223463685, "grad_norm": 0.4983275830745697, "learning_rate": 0.0007524089635854342, "loss": 0.3534, "step": 8947 }, { "epoch": 4.998882681564246, "grad_norm": 0.8690245151519775, "learning_rate": 0.0007523809523809524, "loss": 0.482, "step": 8948 }, { "epoch": 4.9994413407821225, "grad_norm": 0.7713837623596191, "learning_rate": 0.0007523529411764706, "loss": 0.4586, "step": 8949 }, { "epoch": 5.0, "grad_norm": 0.4516015946865082, "learning_rate": 0.0007523249299719888, "loss": 0.3913, "step": 8950 }, { "epoch": 5.0005586592178775, "grad_norm": 0.4232989251613617, "learning_rate": 0.000752296918767507, "loss": 0.5067, "step": 8951 }, { "epoch": 5.001117318435754, "grad_norm": 0.5575375556945801, "learning_rate": 0.0007522689075630252, "loss": 0.392, "step": 8952 }, { "epoch": 5.0016759776536315, "grad_norm": 0.49774113297462463, "learning_rate": 0.0007522408963585434, "loss": 0.4076, "step": 8953 }, { "epoch": 5.002234636871508, "grad_norm": 0.5956636071205139, "learning_rate": 0.0007522128851540616, "loss": 0.4615, "step": 8954 }, { "epoch": 5.0027932960893855, "grad_norm": 0.6072801947593689, "learning_rate": 0.0007521848739495798, "loss": 0.4391, "step": 8955 }, { "epoch": 5.003351955307263, "grad_norm": 0.4491267204284668, "learning_rate": 0.000752156862745098, "loss": 0.4243, "step": 8956 }, { "epoch": 5.0039106145251395, "grad_norm": 0.4794199764728546, "learning_rate": 0.0007521288515406163, "loss": 0.4824, "step": 8957 }, { "epoch": 5.004469273743017, "grad_norm": 0.5326511263847351, "learning_rate": 0.0007521008403361345, "loss": 0.4098, "step": 8958 }, { "epoch": 5.0050279329608935, "grad_norm": 0.5080529451370239, "learning_rate": 0.0007520728291316527, "loss": 0.4363, "step": 8959 }, { "epoch": 5.005586592178771, "grad_norm": 0.6544544100761414, "learning_rate": 0.0007520448179271709, "loss": 0.4628, "step": 8960 }, { "epoch": 5.0061452513966485, "grad_norm": 3.7938168048858643, "learning_rate": 0.0007520168067226891, "loss": 0.4157, "step": 8961 }, { "epoch": 5.006703910614525, "grad_norm": 0.696471631526947, "learning_rate": 0.0007519887955182073, "loss": 0.6147, "step": 8962 }, { "epoch": 5.0072625698324025, "grad_norm": 0.5954288244247437, "learning_rate": 0.0007519607843137255, "loss": 0.5235, "step": 8963 }, { "epoch": 5.007821229050279, "grad_norm": 0.7126972675323486, "learning_rate": 0.0007519327731092437, "loss": 0.3917, "step": 8964 }, { "epoch": 5.0083798882681565, "grad_norm": 0.5534854531288147, "learning_rate": 0.0007519047619047619, "loss": 0.4274, "step": 8965 }, { "epoch": 5.008938547486034, "grad_norm": 0.5404927134513855, "learning_rate": 0.0007518767507002802, "loss": 0.4745, "step": 8966 }, { "epoch": 5.0094972067039105, "grad_norm": 0.5117726922035217, "learning_rate": 0.0007518487394957983, "loss": 0.4486, "step": 8967 }, { "epoch": 5.010055865921788, "grad_norm": 0.8328001499176025, "learning_rate": 0.0007518207282913165, "loss": 0.4853, "step": 8968 }, { "epoch": 5.0106145251396645, "grad_norm": 0.5918221473693848, "learning_rate": 0.0007517927170868347, "loss": 0.3931, "step": 8969 }, { "epoch": 5.011173184357542, "grad_norm": 1.0455037355422974, "learning_rate": 0.0007517647058823529, "loss": 0.3951, "step": 8970 }, { "epoch": 5.011731843575419, "grad_norm": 0.40491145849227905, "learning_rate": 0.0007517366946778712, "loss": 0.3488, "step": 8971 }, { "epoch": 5.012290502793296, "grad_norm": 0.4737810492515564, "learning_rate": 0.0007517086834733893, "loss": 0.3953, "step": 8972 }, { "epoch": 5.0128491620111735, "grad_norm": 0.5814889669418335, "learning_rate": 0.0007516806722689075, "loss": 0.5089, "step": 8973 }, { "epoch": 5.01340782122905, "grad_norm": 0.445928692817688, "learning_rate": 0.0007516526610644258, "loss": 0.4894, "step": 8974 }, { "epoch": 5.0139664804469275, "grad_norm": 0.5846951007843018, "learning_rate": 0.000751624649859944, "loss": 0.4451, "step": 8975 }, { "epoch": 5.014525139664804, "grad_norm": 0.5077082514762878, "learning_rate": 0.0007515966386554623, "loss": 0.4082, "step": 8976 }, { "epoch": 5.0150837988826815, "grad_norm": 0.5170125365257263, "learning_rate": 0.0007515686274509804, "loss": 0.5307, "step": 8977 }, { "epoch": 5.015642458100559, "grad_norm": 0.5874345302581787, "learning_rate": 0.0007515406162464986, "loss": 0.3947, "step": 8978 }, { "epoch": 5.0162011173184355, "grad_norm": 0.4098903238773346, "learning_rate": 0.0007515126050420168, "loss": 0.4447, "step": 8979 }, { "epoch": 5.016759776536313, "grad_norm": 0.680138349533081, "learning_rate": 0.000751484593837535, "loss": 0.4923, "step": 8980 }, { "epoch": 5.01731843575419, "grad_norm": 0.5135407447814941, "learning_rate": 0.0007514565826330533, "loss": 0.5303, "step": 8981 }, { "epoch": 5.017877094972067, "grad_norm": 0.43305161595344543, "learning_rate": 0.0007514285714285715, "loss": 0.407, "step": 8982 }, { "epoch": 5.0184357541899445, "grad_norm": 0.8420162796974182, "learning_rate": 0.0007514005602240896, "loss": 0.4851, "step": 8983 }, { "epoch": 5.018994413407821, "grad_norm": 0.457429438829422, "learning_rate": 0.0007513725490196078, "loss": 0.3937, "step": 8984 }, { "epoch": 5.0195530726256985, "grad_norm": 0.46801939606666565, "learning_rate": 0.000751344537815126, "loss": 0.4118, "step": 8985 }, { "epoch": 5.020111731843575, "grad_norm": 0.5727358460426331, "learning_rate": 0.0007513165266106443, "loss": 0.3909, "step": 8986 }, { "epoch": 5.0206703910614525, "grad_norm": 9.009857177734375, "learning_rate": 0.0007512885154061625, "loss": 0.487, "step": 8987 }, { "epoch": 5.02122905027933, "grad_norm": 2.25361704826355, "learning_rate": 0.0007512605042016806, "loss": 0.3629, "step": 8988 }, { "epoch": 5.0217877094972065, "grad_norm": 0.4565248191356659, "learning_rate": 0.0007512324929971988, "loss": 0.3981, "step": 8989 }, { "epoch": 5.022346368715084, "grad_norm": 2.0229146480560303, "learning_rate": 0.000751204481792717, "loss": 0.7014, "step": 8990 }, { "epoch": 5.022905027932961, "grad_norm": 0.5379871129989624, "learning_rate": 0.0007511764705882354, "loss": 0.388, "step": 8991 }, { "epoch": 5.023463687150838, "grad_norm": 1.0842766761779785, "learning_rate": 0.0007511484593837536, "loss": 0.352, "step": 8992 }, { "epoch": 5.0240223463687155, "grad_norm": 0.5023524165153503, "learning_rate": 0.0007511204481792717, "loss": 0.5143, "step": 8993 }, { "epoch": 5.024581005586592, "grad_norm": 0.5101609826087952, "learning_rate": 0.0007510924369747899, "loss": 0.4125, "step": 8994 }, { "epoch": 5.0251396648044695, "grad_norm": 0.5665290951728821, "learning_rate": 0.0007510644257703081, "loss": 0.4995, "step": 8995 }, { "epoch": 5.025698324022346, "grad_norm": 0.550108015537262, "learning_rate": 0.0007510364145658264, "loss": 0.5133, "step": 8996 }, { "epoch": 5.0262569832402235, "grad_norm": 0.6484015583992004, "learning_rate": 0.0007510084033613446, "loss": 0.3865, "step": 8997 }, { "epoch": 5.026815642458101, "grad_norm": 1.9246560335159302, "learning_rate": 0.0007509803921568628, "loss": 0.4518, "step": 8998 }, { "epoch": 5.0273743016759775, "grad_norm": 0.6043315529823303, "learning_rate": 0.0007509523809523809, "loss": 0.5676, "step": 8999 }, { "epoch": 5.027932960893855, "grad_norm": 0.4110215902328491, "learning_rate": 0.0007509243697478991, "loss": 0.3894, "step": 9000 }, { "epoch": 5.027932960893855, "eval_cer": 0.09487960023130722, "eval_loss": 0.35608309507369995, "eval_runtime": 55.9186, "eval_samples_per_second": 81.154, "eval_steps_per_second": 5.079, "eval_wer": 0.37409569565095946, "step": 9000 }, { "epoch": 5.028491620111732, "grad_norm": 0.46151965856552124, "learning_rate": 0.0007508963585434174, "loss": 0.4472, "step": 9001 }, { "epoch": 5.029050279329609, "grad_norm": 0.4746703505516052, "learning_rate": 0.0007508683473389356, "loss": 0.3553, "step": 9002 }, { "epoch": 5.0296089385474865, "grad_norm": 0.7386363744735718, "learning_rate": 0.0007508403361344538, "loss": 0.5406, "step": 9003 }, { "epoch": 5.030167597765363, "grad_norm": 0.4979327619075775, "learning_rate": 0.0007508123249299719, "loss": 0.4625, "step": 9004 }, { "epoch": 5.0307262569832405, "grad_norm": 0.4723169505596161, "learning_rate": 0.0007507843137254901, "loss": 0.4235, "step": 9005 }, { "epoch": 5.031284916201117, "grad_norm": 0.48328202962875366, "learning_rate": 0.0007507563025210085, "loss": 0.4307, "step": 9006 }, { "epoch": 5.0318435754189945, "grad_norm": 0.6487880945205688, "learning_rate": 0.0007507282913165267, "loss": 0.4204, "step": 9007 }, { "epoch": 5.032402234636871, "grad_norm": 5.918490409851074, "learning_rate": 0.0007507002801120449, "loss": 0.6761, "step": 9008 }, { "epoch": 5.0329608938547485, "grad_norm": 0.5372149348258972, "learning_rate": 0.000750672268907563, "loss": 0.4374, "step": 9009 }, { "epoch": 5.033519553072626, "grad_norm": 0.6469781994819641, "learning_rate": 0.0007506442577030812, "loss": 0.4518, "step": 9010 }, { "epoch": 5.034078212290503, "grad_norm": 0.5016604661941528, "learning_rate": 0.0007506162464985995, "loss": 0.4616, "step": 9011 }, { "epoch": 5.03463687150838, "grad_norm": 0.4969886243343353, "learning_rate": 0.0007505882352941177, "loss": 0.4594, "step": 9012 }, { "epoch": 5.035195530726257, "grad_norm": 0.5083377361297607, "learning_rate": 0.0007505602240896359, "loss": 0.4521, "step": 9013 }, { "epoch": 5.035754189944134, "grad_norm": 1.9022886753082275, "learning_rate": 0.0007505322128851541, "loss": 0.3606, "step": 9014 }, { "epoch": 5.0363128491620115, "grad_norm": 0.6921699047088623, "learning_rate": 0.0007505042016806722, "loss": 0.6119, "step": 9015 }, { "epoch": 5.036871508379888, "grad_norm": 1.0125654935836792, "learning_rate": 0.0007504761904761905, "loss": 0.4553, "step": 9016 }, { "epoch": 5.0374301675977655, "grad_norm": 0.7234715223312378, "learning_rate": 0.0007504481792717087, "loss": 0.5036, "step": 9017 }, { "epoch": 5.037988826815642, "grad_norm": 0.5878175497055054, "learning_rate": 0.0007504201680672269, "loss": 0.4681, "step": 9018 }, { "epoch": 5.0385474860335195, "grad_norm": 0.5051795244216919, "learning_rate": 0.0007503921568627451, "loss": 0.5717, "step": 9019 }, { "epoch": 5.039106145251397, "grad_norm": 0.4283452332019806, "learning_rate": 0.0007503641456582632, "loss": 0.4911, "step": 9020 }, { "epoch": 5.039664804469274, "grad_norm": 0.5273513793945312, "learning_rate": 0.0007503361344537815, "loss": 0.3506, "step": 9021 }, { "epoch": 5.040223463687151, "grad_norm": 0.8313674330711365, "learning_rate": 0.0007503081232492997, "loss": 0.372, "step": 9022 }, { "epoch": 5.040782122905028, "grad_norm": 0.8957833647727966, "learning_rate": 0.000750280112044818, "loss": 0.5206, "step": 9023 }, { "epoch": 5.041340782122905, "grad_norm": 0.87749844789505, "learning_rate": 0.0007502521008403362, "loss": 0.4415, "step": 9024 }, { "epoch": 5.0418994413407825, "grad_norm": 0.3926472067832947, "learning_rate": 0.0007502240896358543, "loss": 0.3942, "step": 9025 }, { "epoch": 5.042458100558659, "grad_norm": 0.49140673875808716, "learning_rate": 0.0007501960784313726, "loss": 0.4766, "step": 9026 }, { "epoch": 5.0430167597765365, "grad_norm": 0.48330798745155334, "learning_rate": 0.0007501680672268908, "loss": 0.4223, "step": 9027 }, { "epoch": 5.043575418994413, "grad_norm": 0.5474338531494141, "learning_rate": 0.000750140056022409, "loss": 0.4604, "step": 9028 }, { "epoch": 5.0441340782122905, "grad_norm": 0.6906026005744934, "learning_rate": 0.0007501120448179272, "loss": 0.4861, "step": 9029 }, { "epoch": 5.044692737430168, "grad_norm": 1.0677733421325684, "learning_rate": 0.0007500840336134454, "loss": 0.4486, "step": 9030 }, { "epoch": 5.045251396648045, "grad_norm": 0.7197783589363098, "learning_rate": 0.0007500560224089636, "loss": 0.4067, "step": 9031 }, { "epoch": 5.045810055865922, "grad_norm": 0.416558176279068, "learning_rate": 0.0007500280112044818, "loss": 0.4526, "step": 9032 }, { "epoch": 5.046368715083799, "grad_norm": 0.37266403436660767, "learning_rate": 0.00075, "loss": 0.3532, "step": 9033 }, { "epoch": 5.046927374301676, "grad_norm": 0.5296469926834106, "learning_rate": 0.0007499719887955182, "loss": 0.3976, "step": 9034 }, { "epoch": 5.0474860335195535, "grad_norm": 0.5366743803024292, "learning_rate": 0.0007499439775910364, "loss": 0.544, "step": 9035 }, { "epoch": 5.04804469273743, "grad_norm": 0.709815263748169, "learning_rate": 0.0007499159663865546, "loss": 0.366, "step": 9036 }, { "epoch": 5.0486033519553075, "grad_norm": 0.6635186076164246, "learning_rate": 0.0007498879551820728, "loss": 0.4646, "step": 9037 }, { "epoch": 5.049162011173184, "grad_norm": 0.5140885710716248, "learning_rate": 0.000749859943977591, "loss": 0.4292, "step": 9038 }, { "epoch": 5.0497206703910615, "grad_norm": 0.8237327337265015, "learning_rate": 0.0007498319327731093, "loss": 0.5936, "step": 9039 }, { "epoch": 5.050279329608939, "grad_norm": 0.7342623472213745, "learning_rate": 0.0007498039215686275, "loss": 0.6089, "step": 9040 }, { "epoch": 5.050837988826816, "grad_norm": 0.6316502690315247, "learning_rate": 0.0007497759103641458, "loss": 0.3639, "step": 9041 }, { "epoch": 5.051396648044693, "grad_norm": 0.5191710591316223, "learning_rate": 0.0007497478991596639, "loss": 0.4286, "step": 9042 }, { "epoch": 5.05195530726257, "grad_norm": 0.6342172622680664, "learning_rate": 0.0007497198879551821, "loss": 0.5062, "step": 9043 }, { "epoch": 5.052513966480447, "grad_norm": 0.6966168880462646, "learning_rate": 0.0007496918767507003, "loss": 0.4927, "step": 9044 }, { "epoch": 5.053072625698324, "grad_norm": 0.7143132090568542, "learning_rate": 0.0007496638655462185, "loss": 0.6288, "step": 9045 }, { "epoch": 5.053631284916201, "grad_norm": 0.8026169538497925, "learning_rate": 0.0007496358543417368, "loss": 0.3856, "step": 9046 }, { "epoch": 5.0541899441340785, "grad_norm": 1.144308090209961, "learning_rate": 0.0007496078431372549, "loss": 0.4914, "step": 9047 }, { "epoch": 5.054748603351955, "grad_norm": 0.410597562789917, "learning_rate": 0.0007495798319327731, "loss": 0.4735, "step": 9048 }, { "epoch": 5.0553072625698325, "grad_norm": 0.5788492560386658, "learning_rate": 0.0007495518207282913, "loss": 0.4474, "step": 9049 }, { "epoch": 5.055865921787709, "grad_norm": 0.937309741973877, "learning_rate": 0.0007495238095238095, "loss": 0.5295, "step": 9050 }, { "epoch": 5.056424581005587, "grad_norm": 0.7546955943107605, "learning_rate": 0.0007494957983193278, "loss": 0.3908, "step": 9051 }, { "epoch": 5.056983240223464, "grad_norm": 0.4760865569114685, "learning_rate": 0.0007494677871148459, "loss": 0.472, "step": 9052 }, { "epoch": 5.057541899441341, "grad_norm": 0.48237109184265137, "learning_rate": 0.0007494397759103641, "loss": 0.4583, "step": 9053 }, { "epoch": 5.058100558659218, "grad_norm": 0.5296656489372253, "learning_rate": 0.0007494117647058823, "loss": 0.4464, "step": 9054 }, { "epoch": 5.058659217877095, "grad_norm": 0.5461738109588623, "learning_rate": 0.0007493837535014005, "loss": 0.4793, "step": 9055 }, { "epoch": 5.059217877094972, "grad_norm": 0.7374666929244995, "learning_rate": 0.0007493557422969189, "loss": 0.4267, "step": 9056 }, { "epoch": 5.0597765363128495, "grad_norm": 0.46226826310157776, "learning_rate": 0.0007493277310924371, "loss": 0.4209, "step": 9057 }, { "epoch": 5.060335195530726, "grad_norm": 0.49997398257255554, "learning_rate": 0.0007492997198879552, "loss": 0.504, "step": 9058 }, { "epoch": 5.0608938547486035, "grad_norm": 0.45420151948928833, "learning_rate": 0.0007492717086834734, "loss": 0.4307, "step": 9059 }, { "epoch": 5.06145251396648, "grad_norm": 5.43703031539917, "learning_rate": 0.0007492436974789916, "loss": 0.4714, "step": 9060 }, { "epoch": 5.062011173184358, "grad_norm": 0.5206681489944458, "learning_rate": 0.0007492156862745099, "loss": 0.4188, "step": 9061 }, { "epoch": 5.062569832402235, "grad_norm": 0.9799054265022278, "learning_rate": 0.0007491876750700281, "loss": 0.5357, "step": 9062 }, { "epoch": 5.063128491620112, "grad_norm": 0.42604079842567444, "learning_rate": 0.0007491596638655462, "loss": 0.3856, "step": 9063 }, { "epoch": 5.063687150837989, "grad_norm": 0.4713807702064514, "learning_rate": 0.0007491316526610644, "loss": 0.5249, "step": 9064 }, { "epoch": 5.064245810055866, "grad_norm": 0.686998188495636, "learning_rate": 0.0007491036414565826, "loss": 0.4541, "step": 9065 }, { "epoch": 5.064804469273743, "grad_norm": 0.4297754466533661, "learning_rate": 0.0007490756302521009, "loss": 0.3749, "step": 9066 }, { "epoch": 5.0653631284916205, "grad_norm": 0.6463320255279541, "learning_rate": 0.0007490476190476191, "loss": 0.464, "step": 9067 }, { "epoch": 5.065921787709497, "grad_norm": 0.525516152381897, "learning_rate": 0.0007490196078431372, "loss": 0.4105, "step": 9068 }, { "epoch": 5.0664804469273745, "grad_norm": 0.8416241407394409, "learning_rate": 0.0007489915966386554, "loss": 0.5063, "step": 9069 }, { "epoch": 5.067039106145251, "grad_norm": 0.42118191719055176, "learning_rate": 0.0007489635854341736, "loss": 0.4002, "step": 9070 }, { "epoch": 5.067597765363129, "grad_norm": 0.8004279732704163, "learning_rate": 0.000748935574229692, "loss": 0.4241, "step": 9071 }, { "epoch": 5.068156424581006, "grad_norm": 0.7429008483886719, "learning_rate": 0.0007489075630252102, "loss": 0.5162, "step": 9072 }, { "epoch": 5.068715083798883, "grad_norm": 0.7366108298301697, "learning_rate": 0.0007488795518207284, "loss": 0.4642, "step": 9073 }, { "epoch": 5.06927374301676, "grad_norm": 0.8561540842056274, "learning_rate": 0.0007488515406162465, "loss": 0.664, "step": 9074 }, { "epoch": 5.069832402234637, "grad_norm": 0.5409652590751648, "learning_rate": 0.0007488235294117647, "loss": 0.5168, "step": 9075 }, { "epoch": 5.070391061452514, "grad_norm": 0.5133274793624878, "learning_rate": 0.000748795518207283, "loss": 0.4855, "step": 9076 }, { "epoch": 5.070949720670391, "grad_norm": 0.8196202516555786, "learning_rate": 0.0007487675070028012, "loss": 0.4366, "step": 9077 }, { "epoch": 5.071508379888268, "grad_norm": 0.5594240427017212, "learning_rate": 0.0007487394957983194, "loss": 0.4384, "step": 9078 }, { "epoch": 5.0720670391061455, "grad_norm": 0.6306514143943787, "learning_rate": 0.0007487114845938375, "loss": 0.4009, "step": 9079 }, { "epoch": 5.072625698324022, "grad_norm": 0.6437781453132629, "learning_rate": 0.0007486834733893557, "loss": 0.3712, "step": 9080 }, { "epoch": 5.0731843575419, "grad_norm": 0.819551944732666, "learning_rate": 0.000748655462184874, "loss": 0.3228, "step": 9081 }, { "epoch": 5.073743016759776, "grad_norm": 0.5984007120132446, "learning_rate": 0.0007486274509803922, "loss": 0.4926, "step": 9082 }, { "epoch": 5.074301675977654, "grad_norm": 0.7875058650970459, "learning_rate": 0.0007485994397759104, "loss": 0.4012, "step": 9083 }, { "epoch": 5.074860335195531, "grad_norm": 0.641601026058197, "learning_rate": 0.0007485714285714285, "loss": 0.4481, "step": 9084 }, { "epoch": 5.075418994413408, "grad_norm": 0.5293326377868652, "learning_rate": 0.0007485434173669467, "loss": 0.4653, "step": 9085 }, { "epoch": 5.075977653631285, "grad_norm": 0.6156439185142517, "learning_rate": 0.000748515406162465, "loss": 0.4449, "step": 9086 }, { "epoch": 5.076536312849162, "grad_norm": 0.41242364048957825, "learning_rate": 0.0007484873949579832, "loss": 0.4783, "step": 9087 }, { "epoch": 5.077094972067039, "grad_norm": 0.6608604788780212, "learning_rate": 0.0007484593837535015, "loss": 0.4942, "step": 9088 }, { "epoch": 5.0776536312849165, "grad_norm": 1.0168522596359253, "learning_rate": 0.0007484313725490197, "loss": 0.3869, "step": 9089 }, { "epoch": 5.078212290502793, "grad_norm": 0.8041546940803528, "learning_rate": 0.0007484033613445378, "loss": 0.5234, "step": 9090 }, { "epoch": 5.078770949720671, "grad_norm": 0.6877681612968445, "learning_rate": 0.0007483753501400561, "loss": 0.4753, "step": 9091 }, { "epoch": 5.079329608938547, "grad_norm": 0.6746410131454468, "learning_rate": 0.0007483473389355743, "loss": 0.5417, "step": 9092 }, { "epoch": 5.079888268156425, "grad_norm": 1.3586100339889526, "learning_rate": 0.0007483193277310925, "loss": 0.537, "step": 9093 }, { "epoch": 5.080446927374302, "grad_norm": 0.81231689453125, "learning_rate": 0.0007482913165266107, "loss": 0.4411, "step": 9094 }, { "epoch": 5.081005586592179, "grad_norm": 1.9372198581695557, "learning_rate": 0.0007482633053221288, "loss": 0.551, "step": 9095 }, { "epoch": 5.081564245810056, "grad_norm": 0.6222761273384094, "learning_rate": 0.0007482352941176471, "loss": 0.4393, "step": 9096 }, { "epoch": 5.082122905027933, "grad_norm": 1.0141596794128418, "learning_rate": 0.0007482072829131653, "loss": 0.521, "step": 9097 }, { "epoch": 5.08268156424581, "grad_norm": 0.7729804515838623, "learning_rate": 0.0007481792717086835, "loss": 0.5078, "step": 9098 }, { "epoch": 5.0832402234636875, "grad_norm": 0.6606077551841736, "learning_rate": 0.0007481512605042017, "loss": 0.5209, "step": 9099 }, { "epoch": 5.083798882681564, "grad_norm": 0.5069326758384705, "learning_rate": 0.0007481232492997198, "loss": 0.4772, "step": 9100 }, { "epoch": 5.084357541899442, "grad_norm": 0.47148627042770386, "learning_rate": 0.0007480952380952381, "loss": 0.4758, "step": 9101 }, { "epoch": 5.084916201117318, "grad_norm": 0.9009197950363159, "learning_rate": 0.0007480672268907563, "loss": 0.5149, "step": 9102 }, { "epoch": 5.085474860335196, "grad_norm": 0.5535609722137451, "learning_rate": 0.0007480392156862745, "loss": 0.3996, "step": 9103 }, { "epoch": 5.086033519553073, "grad_norm": 0.6379386782646179, "learning_rate": 0.0007480112044817927, "loss": 0.3911, "step": 9104 }, { "epoch": 5.08659217877095, "grad_norm": 0.7699502110481262, "learning_rate": 0.000747983193277311, "loss": 0.517, "step": 9105 }, { "epoch": 5.087150837988827, "grad_norm": 0.5233026742935181, "learning_rate": 0.0007479551820728292, "loss": 0.4707, "step": 9106 }, { "epoch": 5.087709497206704, "grad_norm": 0.5945190191268921, "learning_rate": 0.0007479271708683474, "loss": 0.4372, "step": 9107 }, { "epoch": 5.088268156424581, "grad_norm": 0.5146439075469971, "learning_rate": 0.0007478991596638656, "loss": 0.4505, "step": 9108 }, { "epoch": 5.0888268156424585, "grad_norm": 0.4786728322505951, "learning_rate": 0.0007478711484593838, "loss": 0.4103, "step": 9109 }, { "epoch": 5.089385474860335, "grad_norm": 0.4101194739341736, "learning_rate": 0.000747843137254902, "loss": 0.3519, "step": 9110 }, { "epoch": 5.089944134078213, "grad_norm": 0.7371591925621033, "learning_rate": 0.0007478151260504202, "loss": 0.3725, "step": 9111 }, { "epoch": 5.090502793296089, "grad_norm": 0.5282604694366455, "learning_rate": 0.0007477871148459384, "loss": 0.4296, "step": 9112 }, { "epoch": 5.091061452513967, "grad_norm": 0.5157302618026733, "learning_rate": 0.0007477591036414566, "loss": 0.4006, "step": 9113 }, { "epoch": 5.091620111731843, "grad_norm": 0.7631750106811523, "learning_rate": 0.0007477310924369748, "loss": 0.4982, "step": 9114 }, { "epoch": 5.092178770949721, "grad_norm": 5.686546325683594, "learning_rate": 0.000747703081232493, "loss": 0.6268, "step": 9115 }, { "epoch": 5.092737430167598, "grad_norm": 0.48100242018699646, "learning_rate": 0.0007476750700280112, "loss": 0.5106, "step": 9116 }, { "epoch": 5.093296089385475, "grad_norm": 0.6259045600891113, "learning_rate": 0.0007476470588235294, "loss": 0.4049, "step": 9117 }, { "epoch": 5.093854748603352, "grad_norm": 0.5095353722572327, "learning_rate": 0.0007476190476190476, "loss": 0.4413, "step": 9118 }, { "epoch": 5.094413407821229, "grad_norm": 0.6002395749092102, "learning_rate": 0.0007475910364145658, "loss": 0.4494, "step": 9119 }, { "epoch": 5.094972067039106, "grad_norm": 0.5260825157165527, "learning_rate": 0.000747563025210084, "loss": 0.4345, "step": 9120 }, { "epoch": 5.0955307262569836, "grad_norm": 0.46637779474258423, "learning_rate": 0.0007475350140056024, "loss": 0.472, "step": 9121 }, { "epoch": 5.09608938547486, "grad_norm": 0.5473801493644714, "learning_rate": 0.0007475070028011205, "loss": 0.5044, "step": 9122 }, { "epoch": 5.096648044692738, "grad_norm": 0.9179593920707703, "learning_rate": 0.0007474789915966387, "loss": 0.3786, "step": 9123 }, { "epoch": 5.097206703910614, "grad_norm": 1.1870479583740234, "learning_rate": 0.0007474509803921569, "loss": 0.4681, "step": 9124 }, { "epoch": 5.097765363128492, "grad_norm": 0.8743658065795898, "learning_rate": 0.0007474229691876751, "loss": 0.4436, "step": 9125 }, { "epoch": 5.098324022346369, "grad_norm": 0.6783514618873596, "learning_rate": 0.0007473949579831934, "loss": 0.4095, "step": 9126 }, { "epoch": 5.098882681564246, "grad_norm": 0.38759341835975647, "learning_rate": 0.0007473669467787115, "loss": 0.3211, "step": 9127 }, { "epoch": 5.099441340782123, "grad_norm": 0.5709186792373657, "learning_rate": 0.0007473389355742297, "loss": 0.5117, "step": 9128 }, { "epoch": 5.1, "grad_norm": 2.3181209564208984, "learning_rate": 0.0007473109243697479, "loss": 0.4957, "step": 9129 }, { "epoch": 5.100558659217877, "grad_norm": 0.5004379749298096, "learning_rate": 0.0007472829131652661, "loss": 0.4353, "step": 9130 }, { "epoch": 5.1011173184357546, "grad_norm": 0.4079465866088867, "learning_rate": 0.0007472549019607844, "loss": 0.3752, "step": 9131 }, { "epoch": 5.101675977653631, "grad_norm": 0.6083446145057678, "learning_rate": 0.0007472268907563025, "loss": 0.5024, "step": 9132 }, { "epoch": 5.102234636871509, "grad_norm": 0.4655718207359314, "learning_rate": 0.0007471988795518207, "loss": 0.4567, "step": 9133 }, { "epoch": 5.102793296089385, "grad_norm": 0.45232367515563965, "learning_rate": 0.0007471708683473389, "loss": 0.345, "step": 9134 }, { "epoch": 5.103351955307263, "grad_norm": 0.9004889130592346, "learning_rate": 0.0007471428571428571, "loss": 0.4456, "step": 9135 }, { "epoch": 5.10391061452514, "grad_norm": 0.7123329639434814, "learning_rate": 0.0007471148459383754, "loss": 0.4709, "step": 9136 }, { "epoch": 5.104469273743017, "grad_norm": 0.6250120997428894, "learning_rate": 0.0007470868347338937, "loss": 0.4538, "step": 9137 }, { "epoch": 5.105027932960894, "grad_norm": 0.38795205950737, "learning_rate": 0.0007470588235294118, "loss": 0.3503, "step": 9138 }, { "epoch": 5.105586592178771, "grad_norm": 0.7008611559867859, "learning_rate": 0.00074703081232493, "loss": 0.3955, "step": 9139 }, { "epoch": 5.106145251396648, "grad_norm": 0.5417254567146301, "learning_rate": 0.0007470028011204482, "loss": 0.4044, "step": 9140 }, { "epoch": 5.1067039106145256, "grad_norm": 0.506192147731781, "learning_rate": 0.0007469747899159665, "loss": 0.484, "step": 9141 }, { "epoch": 5.107262569832402, "grad_norm": 0.5177536010742188, "learning_rate": 0.0007469467787114847, "loss": 0.3632, "step": 9142 }, { "epoch": 5.10782122905028, "grad_norm": 1.4445257186889648, "learning_rate": 0.0007469187675070028, "loss": 0.4051, "step": 9143 }, { "epoch": 5.108379888268156, "grad_norm": 0.35983484983444214, "learning_rate": 0.000746890756302521, "loss": 0.3717, "step": 9144 }, { "epoch": 5.108938547486034, "grad_norm": 0.5104691982269287, "learning_rate": 0.0007468627450980392, "loss": 0.54, "step": 9145 }, { "epoch": 5.10949720670391, "grad_norm": 0.6074373126029968, "learning_rate": 0.0007468347338935574, "loss": 0.423, "step": 9146 }, { "epoch": 5.110055865921788, "grad_norm": 0.5419580936431885, "learning_rate": 0.0007468067226890757, "loss": 0.6334, "step": 9147 }, { "epoch": 5.110614525139665, "grad_norm": 0.6220030188560486, "learning_rate": 0.0007467787114845938, "loss": 0.4293, "step": 9148 }, { "epoch": 5.111173184357542, "grad_norm": 0.8056634664535522, "learning_rate": 0.000746750700280112, "loss": 0.5729, "step": 9149 }, { "epoch": 5.111731843575419, "grad_norm": 0.42025524377822876, "learning_rate": 0.0007467226890756302, "loss": 0.3801, "step": 9150 }, { "epoch": 5.112290502793296, "grad_norm": 0.593021810054779, "learning_rate": 0.0007466946778711484, "loss": 0.4028, "step": 9151 }, { "epoch": 5.112849162011173, "grad_norm": 0.48286619782447815, "learning_rate": 0.0007466666666666667, "loss": 0.4314, "step": 9152 }, { "epoch": 5.113407821229051, "grad_norm": 0.47215911746025085, "learning_rate": 0.000746638655462185, "loss": 0.4138, "step": 9153 }, { "epoch": 5.113966480446927, "grad_norm": 1.1260055303573608, "learning_rate": 0.000746610644257703, "loss": 0.4264, "step": 9154 }, { "epoch": 5.114525139664805, "grad_norm": 0.5313096046447754, "learning_rate": 0.0007465826330532213, "loss": 0.5381, "step": 9155 }, { "epoch": 5.115083798882681, "grad_norm": 0.4856265187263489, "learning_rate": 0.0007465546218487395, "loss": 0.5178, "step": 9156 }, { "epoch": 5.115642458100559, "grad_norm": 0.39768341183662415, "learning_rate": 0.0007465266106442578, "loss": 0.4264, "step": 9157 }, { "epoch": 5.116201117318436, "grad_norm": 0.3987128734588623, "learning_rate": 0.000746498599439776, "loss": 0.4354, "step": 9158 }, { "epoch": 5.116759776536313, "grad_norm": 6.93054723739624, "learning_rate": 0.0007464705882352941, "loss": 0.5876, "step": 9159 }, { "epoch": 5.11731843575419, "grad_norm": 0.6016881465911865, "learning_rate": 0.0007464425770308123, "loss": 0.517, "step": 9160 }, { "epoch": 5.117877094972067, "grad_norm": 1.1399112939834595, "learning_rate": 0.0007464145658263305, "loss": 0.5397, "step": 9161 }, { "epoch": 5.118435754189944, "grad_norm": 0.717146098613739, "learning_rate": 0.0007463865546218488, "loss": 0.497, "step": 9162 }, { "epoch": 5.118994413407822, "grad_norm": 0.46363526582717896, "learning_rate": 0.000746358543417367, "loss": 0.5519, "step": 9163 }, { "epoch": 5.119553072625698, "grad_norm": 0.5593603253364563, "learning_rate": 0.0007463305322128851, "loss": 0.3576, "step": 9164 }, { "epoch": 5.120111731843576, "grad_norm": 0.4428239166736603, "learning_rate": 0.0007463025210084033, "loss": 0.5007, "step": 9165 }, { "epoch": 5.120670391061452, "grad_norm": 0.5615933537483215, "learning_rate": 0.0007462745098039215, "loss": 0.4523, "step": 9166 }, { "epoch": 5.12122905027933, "grad_norm": 0.5217528939247131, "learning_rate": 0.0007462464985994398, "loss": 0.4572, "step": 9167 }, { "epoch": 5.121787709497207, "grad_norm": 0.4537884294986725, "learning_rate": 0.000746218487394958, "loss": 0.499, "step": 9168 }, { "epoch": 5.122346368715084, "grad_norm": 0.5292918086051941, "learning_rate": 0.0007461904761904762, "loss": 0.5083, "step": 9169 }, { "epoch": 5.122905027932961, "grad_norm": 1.3333250284194946, "learning_rate": 0.0007461624649859943, "loss": 0.5122, "step": 9170 }, { "epoch": 5.123463687150838, "grad_norm": 0.532231867313385, "learning_rate": 0.0007461344537815126, "loss": 0.4038, "step": 9171 }, { "epoch": 5.124022346368715, "grad_norm": 0.5123986005783081, "learning_rate": 0.0007461064425770309, "loss": 0.4967, "step": 9172 }, { "epoch": 5.124581005586593, "grad_norm": 0.740041196346283, "learning_rate": 0.0007460784313725491, "loss": 0.5546, "step": 9173 }, { "epoch": 5.125139664804469, "grad_norm": 0.5402716994285583, "learning_rate": 0.0007460504201680673, "loss": 0.4548, "step": 9174 }, { "epoch": 5.125698324022347, "grad_norm": 0.5447051525115967, "learning_rate": 0.0007460224089635854, "loss": 0.5365, "step": 9175 }, { "epoch": 5.126256983240223, "grad_norm": 0.4971652626991272, "learning_rate": 0.0007459943977591036, "loss": 0.3755, "step": 9176 }, { "epoch": 5.126815642458101, "grad_norm": 0.5869438052177429, "learning_rate": 0.0007459663865546219, "loss": 0.4202, "step": 9177 }, { "epoch": 5.127374301675978, "grad_norm": 12.306452751159668, "learning_rate": 0.0007459383753501401, "loss": 0.5314, "step": 9178 }, { "epoch": 5.127932960893855, "grad_norm": 0.6971128582954407, "learning_rate": 0.0007459103641456583, "loss": 0.7208, "step": 9179 }, { "epoch": 5.128491620111732, "grad_norm": 1.1146947145462036, "learning_rate": 0.0007458823529411764, "loss": 0.4351, "step": 9180 }, { "epoch": 5.129050279329609, "grad_norm": 0.7231548428535461, "learning_rate": 0.0007458543417366946, "loss": 0.5181, "step": 9181 }, { "epoch": 5.129608938547486, "grad_norm": 0.46546879410743713, "learning_rate": 0.0007458263305322129, "loss": 0.4337, "step": 9182 }, { "epoch": 5.130167597765363, "grad_norm": 0.529313325881958, "learning_rate": 0.0007457983193277311, "loss": 0.3499, "step": 9183 }, { "epoch": 5.13072625698324, "grad_norm": 1.1186801195144653, "learning_rate": 0.0007457703081232493, "loss": 0.4444, "step": 9184 }, { "epoch": 5.131284916201118, "grad_norm": 0.5682976841926575, "learning_rate": 0.0007457422969187675, "loss": 0.3459, "step": 9185 }, { "epoch": 5.131843575418994, "grad_norm": 0.5462518930435181, "learning_rate": 0.0007457142857142856, "loss": 0.4414, "step": 9186 }, { "epoch": 5.132402234636872, "grad_norm": 1.2040019035339355, "learning_rate": 0.000745686274509804, "loss": 0.5522, "step": 9187 }, { "epoch": 5.132960893854748, "grad_norm": 0.5593796372413635, "learning_rate": 0.0007456582633053222, "loss": 0.3806, "step": 9188 }, { "epoch": 5.133519553072626, "grad_norm": 0.44086018204689026, "learning_rate": 0.0007456302521008404, "loss": 0.3796, "step": 9189 }, { "epoch": 5.134078212290503, "grad_norm": 0.6522893905639648, "learning_rate": 0.0007456022408963586, "loss": 0.4245, "step": 9190 }, { "epoch": 5.13463687150838, "grad_norm": 0.6190874576568604, "learning_rate": 0.0007455742296918767, "loss": 0.4861, "step": 9191 }, { "epoch": 5.135195530726257, "grad_norm": 0.7108556628227234, "learning_rate": 0.000745546218487395, "loss": 0.3839, "step": 9192 }, { "epoch": 5.135754189944134, "grad_norm": 0.5619034767150879, "learning_rate": 0.0007455182072829132, "loss": 0.5357, "step": 9193 }, { "epoch": 5.136312849162011, "grad_norm": 1.3583414554595947, "learning_rate": 0.0007454901960784314, "loss": 0.4291, "step": 9194 }, { "epoch": 5.136871508379889, "grad_norm": 0.5836233496665955, "learning_rate": 0.0007454621848739496, "loss": 0.5288, "step": 9195 }, { "epoch": 5.137430167597765, "grad_norm": 0.390178382396698, "learning_rate": 0.0007454341736694677, "loss": 0.3994, "step": 9196 }, { "epoch": 5.137988826815643, "grad_norm": 0.5170450806617737, "learning_rate": 0.000745406162464986, "loss": 0.4195, "step": 9197 }, { "epoch": 5.138547486033519, "grad_norm": 0.8974315524101257, "learning_rate": 0.0007453781512605042, "loss": 0.5007, "step": 9198 }, { "epoch": 5.139106145251397, "grad_norm": 0.5151286125183105, "learning_rate": 0.0007453501400560224, "loss": 0.5086, "step": 9199 }, { "epoch": 5.139664804469274, "grad_norm": 0.393502801656723, "learning_rate": 0.0007453221288515406, "loss": 0.3409, "step": 9200 }, { "epoch": 5.140223463687151, "grad_norm": 0.45447811484336853, "learning_rate": 0.0007452941176470588, "loss": 0.3762, "step": 9201 }, { "epoch": 5.140782122905028, "grad_norm": 0.40816089510917664, "learning_rate": 0.000745266106442577, "loss": 0.5043, "step": 9202 }, { "epoch": 5.141340782122905, "grad_norm": 0.5953418016433716, "learning_rate": 0.0007452380952380953, "loss": 0.5736, "step": 9203 }, { "epoch": 5.141899441340782, "grad_norm": 4.842398643493652, "learning_rate": 0.0007452100840336135, "loss": 0.521, "step": 9204 }, { "epoch": 5.14245810055866, "grad_norm": 0.7641180753707886, "learning_rate": 0.0007451820728291317, "loss": 0.5186, "step": 9205 }, { "epoch": 5.143016759776536, "grad_norm": 0.6450425982475281, "learning_rate": 0.0007451540616246499, "loss": 0.4421, "step": 9206 }, { "epoch": 5.143575418994414, "grad_norm": 0.4553753733634949, "learning_rate": 0.0007451260504201681, "loss": 0.4423, "step": 9207 }, { "epoch": 5.14413407821229, "grad_norm": 1.2479188442230225, "learning_rate": 0.0007450980392156863, "loss": 0.4086, "step": 9208 }, { "epoch": 5.144692737430168, "grad_norm": 0.5659211277961731, "learning_rate": 0.0007450700280112045, "loss": 0.5707, "step": 9209 }, { "epoch": 5.145251396648045, "grad_norm": 0.5723447799682617, "learning_rate": 0.0007450420168067227, "loss": 0.3486, "step": 9210 }, { "epoch": 5.145810055865922, "grad_norm": 0.875298023223877, "learning_rate": 0.0007450140056022409, "loss": 0.5293, "step": 9211 }, { "epoch": 5.146368715083799, "grad_norm": 1.6071817874908447, "learning_rate": 0.0007449859943977591, "loss": 0.5443, "step": 9212 }, { "epoch": 5.146927374301676, "grad_norm": 1.6044070720672607, "learning_rate": 0.0007449579831932773, "loss": 0.4786, "step": 9213 }, { "epoch": 5.147486033519553, "grad_norm": 0.5789911150932312, "learning_rate": 0.0007449299719887955, "loss": 0.5145, "step": 9214 }, { "epoch": 5.148044692737431, "grad_norm": 0.7926276922225952, "learning_rate": 0.0007449019607843137, "loss": 0.4674, "step": 9215 }, { "epoch": 5.148603351955307, "grad_norm": 1.0170307159423828, "learning_rate": 0.0007448739495798319, "loss": 0.7411, "step": 9216 }, { "epoch": 5.149162011173185, "grad_norm": 0.9485819935798645, "learning_rate": 0.0007448459383753502, "loss": 0.5215, "step": 9217 }, { "epoch": 5.149720670391061, "grad_norm": 0.47148680686950684, "learning_rate": 0.0007448179271708683, "loss": 0.4163, "step": 9218 }, { "epoch": 5.150279329608939, "grad_norm": 0.6326032280921936, "learning_rate": 0.0007447899159663865, "loss": 0.4939, "step": 9219 }, { "epoch": 5.150837988826815, "grad_norm": 0.5185278654098511, "learning_rate": 0.0007447619047619048, "loss": 0.3868, "step": 9220 }, { "epoch": 5.151396648044693, "grad_norm": 0.6758152842521667, "learning_rate": 0.000744733893557423, "loss": 0.4409, "step": 9221 }, { "epoch": 5.15195530726257, "grad_norm": 0.9032146334648132, "learning_rate": 0.0007447058823529413, "loss": 0.5934, "step": 9222 }, { "epoch": 5.152513966480447, "grad_norm": 0.615212082862854, "learning_rate": 0.0007446778711484594, "loss": 0.389, "step": 9223 }, { "epoch": 5.153072625698324, "grad_norm": 0.47535285353660583, "learning_rate": 0.0007446498599439776, "loss": 0.4048, "step": 9224 }, { "epoch": 5.153631284916201, "grad_norm": 0.5904978513717651, "learning_rate": 0.0007446218487394958, "loss": 0.5017, "step": 9225 }, { "epoch": 5.154189944134078, "grad_norm": 0.5763953328132629, "learning_rate": 0.000744593837535014, "loss": 0.4708, "step": 9226 }, { "epoch": 5.154748603351956, "grad_norm": 0.8433234691619873, "learning_rate": 0.0007445658263305323, "loss": 0.453, "step": 9227 }, { "epoch": 5.155307262569832, "grad_norm": 2.287301778793335, "learning_rate": 0.0007445378151260504, "loss": 0.4227, "step": 9228 }, { "epoch": 5.15586592178771, "grad_norm": 0.948485791683197, "learning_rate": 0.0007445098039215686, "loss": 0.4887, "step": 9229 }, { "epoch": 5.156424581005586, "grad_norm": 0.544860303401947, "learning_rate": 0.0007444817927170868, "loss": 0.5745, "step": 9230 }, { "epoch": 5.156983240223464, "grad_norm": 0.6630926728248596, "learning_rate": 0.000744453781512605, "loss": 0.6264, "step": 9231 }, { "epoch": 5.157541899441341, "grad_norm": 0.6315627694129944, "learning_rate": 0.0007444257703081233, "loss": 0.3774, "step": 9232 }, { "epoch": 5.158100558659218, "grad_norm": 0.703891932964325, "learning_rate": 0.0007443977591036415, "loss": 0.4564, "step": 9233 }, { "epoch": 5.158659217877095, "grad_norm": 1.1930180788040161, "learning_rate": 0.0007443697478991596, "loss": 0.4812, "step": 9234 }, { "epoch": 5.159217877094972, "grad_norm": 0.5696471929550171, "learning_rate": 0.0007443417366946778, "loss": 0.482, "step": 9235 }, { "epoch": 5.159776536312849, "grad_norm": 1.9199336767196655, "learning_rate": 0.000744313725490196, "loss": 0.4004, "step": 9236 }, { "epoch": 5.160335195530727, "grad_norm": 1.5826082229614258, "learning_rate": 0.0007442857142857144, "loss": 0.4288, "step": 9237 }, { "epoch": 5.160893854748603, "grad_norm": 0.9928046464920044, "learning_rate": 0.0007442577030812326, "loss": 0.7165, "step": 9238 }, { "epoch": 5.161452513966481, "grad_norm": 0.5731468200683594, "learning_rate": 0.0007442296918767507, "loss": 0.4374, "step": 9239 }, { "epoch": 5.162011173184357, "grad_norm": 0.7084184288978577, "learning_rate": 0.0007442016806722689, "loss": 0.4329, "step": 9240 }, { "epoch": 5.162569832402235, "grad_norm": 0.5002947449684143, "learning_rate": 0.0007441736694677871, "loss": 0.3726, "step": 9241 }, { "epoch": 5.163128491620112, "grad_norm": 0.4340097904205322, "learning_rate": 0.0007441456582633054, "loss": 0.485, "step": 9242 }, { "epoch": 5.163687150837989, "grad_norm": 0.8195523023605347, "learning_rate": 0.0007441176470588236, "loss": 0.501, "step": 9243 }, { "epoch": 5.164245810055866, "grad_norm": 0.5582486987113953, "learning_rate": 0.0007440896358543417, "loss": 0.5458, "step": 9244 }, { "epoch": 5.164804469273743, "grad_norm": 0.8311848640441895, "learning_rate": 0.0007440616246498599, "loss": 0.4329, "step": 9245 }, { "epoch": 5.16536312849162, "grad_norm": 0.5118499398231506, "learning_rate": 0.0007440336134453781, "loss": 0.4444, "step": 9246 }, { "epoch": 5.165921787709498, "grad_norm": 0.39821428060531616, "learning_rate": 0.0007440056022408964, "loss": 0.4042, "step": 9247 }, { "epoch": 5.166480446927374, "grad_norm": 0.590999960899353, "learning_rate": 0.0007439775910364146, "loss": 0.3764, "step": 9248 }, { "epoch": 5.167039106145252, "grad_norm": 0.6888015866279602, "learning_rate": 0.0007439495798319328, "loss": 0.4367, "step": 9249 }, { "epoch": 5.167597765363128, "grad_norm": 0.43305695056915283, "learning_rate": 0.0007439215686274509, "loss": 0.4577, "step": 9250 }, { "epoch": 5.168156424581006, "grad_norm": 0.47872641682624817, "learning_rate": 0.0007438935574229691, "loss": 0.4112, "step": 9251 }, { "epoch": 5.168715083798883, "grad_norm": 0.5339536666870117, "learning_rate": 0.0007438655462184875, "loss": 0.4605, "step": 9252 }, { "epoch": 5.16927374301676, "grad_norm": 0.8600516319274902, "learning_rate": 0.0007438375350140057, "loss": 0.4863, "step": 9253 }, { "epoch": 5.169832402234637, "grad_norm": 1.5130808353424072, "learning_rate": 0.0007438095238095239, "loss": 0.4264, "step": 9254 }, { "epoch": 5.170391061452514, "grad_norm": 1.0064526796340942, "learning_rate": 0.000743781512605042, "loss": 0.4165, "step": 9255 }, { "epoch": 5.170949720670391, "grad_norm": 0.9091895818710327, "learning_rate": 0.0007437535014005602, "loss": 0.6808, "step": 9256 }, { "epoch": 5.171508379888268, "grad_norm": 0.7874834537506104, "learning_rate": 0.0007437254901960785, "loss": 0.4314, "step": 9257 }, { "epoch": 5.172067039106145, "grad_norm": 0.49575164914131165, "learning_rate": 0.0007436974789915967, "loss": 0.4311, "step": 9258 }, { "epoch": 5.172625698324023, "grad_norm": 0.41619476675987244, "learning_rate": 0.0007436694677871149, "loss": 0.3942, "step": 9259 }, { "epoch": 5.173184357541899, "grad_norm": 0.3923790752887726, "learning_rate": 0.000743641456582633, "loss": 0.3437, "step": 9260 }, { "epoch": 5.173743016759777, "grad_norm": 0.4505344331264496, "learning_rate": 0.0007436134453781512, "loss": 0.3975, "step": 9261 }, { "epoch": 5.174301675977653, "grad_norm": 0.41459575295448303, "learning_rate": 0.0007435854341736695, "loss": 0.4426, "step": 9262 }, { "epoch": 5.174860335195531, "grad_norm": 0.41789692640304565, "learning_rate": 0.0007435574229691877, "loss": 0.4668, "step": 9263 }, { "epoch": 5.175418994413408, "grad_norm": 0.6984832286834717, "learning_rate": 0.0007435294117647059, "loss": 0.3303, "step": 9264 }, { "epoch": 5.175977653631285, "grad_norm": 0.5196592211723328, "learning_rate": 0.0007435014005602241, "loss": 0.4269, "step": 9265 }, { "epoch": 5.176536312849162, "grad_norm": 0.5434472560882568, "learning_rate": 0.0007434733893557422, "loss": 0.3435, "step": 9266 }, { "epoch": 5.177094972067039, "grad_norm": 0.41812241077423096, "learning_rate": 0.0007434453781512605, "loss": 0.3234, "step": 9267 }, { "epoch": 5.177653631284916, "grad_norm": 0.40646231174468994, "learning_rate": 0.0007434173669467787, "loss": 0.4378, "step": 9268 }, { "epoch": 5.178212290502794, "grad_norm": 0.4325316250324249, "learning_rate": 0.000743389355742297, "loss": 0.4799, "step": 9269 }, { "epoch": 5.17877094972067, "grad_norm": 0.36815252900123596, "learning_rate": 0.0007433613445378152, "loss": 0.4526, "step": 9270 }, { "epoch": 5.179329608938548, "grad_norm": 0.41733407974243164, "learning_rate": 0.0007433333333333333, "loss": 0.4701, "step": 9271 }, { "epoch": 5.179888268156424, "grad_norm": 0.5900431871414185, "learning_rate": 0.0007433053221288516, "loss": 0.4061, "step": 9272 }, { "epoch": 5.180446927374302, "grad_norm": 1.3708845376968384, "learning_rate": 0.0007432773109243698, "loss": 0.4489, "step": 9273 }, { "epoch": 5.181005586592179, "grad_norm": 0.4360641837120056, "learning_rate": 0.000743249299719888, "loss": 0.3557, "step": 9274 }, { "epoch": 5.181564245810056, "grad_norm": 0.6780818104743958, "learning_rate": 0.0007432212885154062, "loss": 0.4028, "step": 9275 }, { "epoch": 5.182122905027933, "grad_norm": 0.9363614320755005, "learning_rate": 0.0007431932773109243, "loss": 0.5742, "step": 9276 }, { "epoch": 5.18268156424581, "grad_norm": 3.5284266471862793, "learning_rate": 0.0007431652661064426, "loss": 0.3808, "step": 9277 }, { "epoch": 5.183240223463687, "grad_norm": 0.574120283126831, "learning_rate": 0.0007431372549019608, "loss": 0.4559, "step": 9278 }, { "epoch": 5.183798882681565, "grad_norm": 0.7071759104728699, "learning_rate": 0.000743109243697479, "loss": 0.5033, "step": 9279 }, { "epoch": 5.184357541899441, "grad_norm": 0.49550479650497437, "learning_rate": 0.0007430812324929972, "loss": 0.5458, "step": 9280 }, { "epoch": 5.184916201117319, "grad_norm": 0.5007463693618774, "learning_rate": 0.0007430532212885154, "loss": 0.5074, "step": 9281 }, { "epoch": 5.185474860335195, "grad_norm": 1.6928448677062988, "learning_rate": 0.0007430252100840336, "loss": 0.3536, "step": 9282 }, { "epoch": 5.186033519553073, "grad_norm": 1.4974571466445923, "learning_rate": 0.0007429971988795518, "loss": 0.4392, "step": 9283 }, { "epoch": 5.18659217877095, "grad_norm": 8.268061637878418, "learning_rate": 0.00074296918767507, "loss": 0.388, "step": 9284 }, { "epoch": 5.187150837988827, "grad_norm": 2.442584991455078, "learning_rate": 0.0007429411764705883, "loss": 0.4953, "step": 9285 }, { "epoch": 5.187709497206704, "grad_norm": 2.018022298812866, "learning_rate": 0.0007429131652661065, "loss": 0.3898, "step": 9286 }, { "epoch": 5.188268156424581, "grad_norm": 0.5392963290214539, "learning_rate": 0.0007428851540616247, "loss": 0.5185, "step": 9287 }, { "epoch": 5.188826815642458, "grad_norm": 0.5354064702987671, "learning_rate": 0.0007428571428571429, "loss": 0.447, "step": 9288 }, { "epoch": 5.189385474860336, "grad_norm": 0.6769338250160217, "learning_rate": 0.0007428291316526611, "loss": 0.5134, "step": 9289 }, { "epoch": 5.189944134078212, "grad_norm": 0.5994898676872253, "learning_rate": 0.0007428011204481793, "loss": 0.5327, "step": 9290 }, { "epoch": 5.19050279329609, "grad_norm": 1.0860724449157715, "learning_rate": 0.0007427731092436975, "loss": 0.5241, "step": 9291 }, { "epoch": 5.191061452513966, "grad_norm": 0.4479406177997589, "learning_rate": 0.0007427450980392158, "loss": 0.4274, "step": 9292 }, { "epoch": 5.191620111731844, "grad_norm": 0.6470661759376526, "learning_rate": 0.0007427170868347339, "loss": 0.3776, "step": 9293 }, { "epoch": 5.19217877094972, "grad_norm": 0.5875535011291504, "learning_rate": 0.0007426890756302521, "loss": 0.5605, "step": 9294 }, { "epoch": 5.192737430167598, "grad_norm": 0.6037549376487732, "learning_rate": 0.0007426610644257703, "loss": 0.3953, "step": 9295 }, { "epoch": 5.193296089385475, "grad_norm": 0.5385110378265381, "learning_rate": 0.0007426330532212885, "loss": 0.3951, "step": 9296 }, { "epoch": 5.193854748603352, "grad_norm": 0.518102765083313, "learning_rate": 0.0007426050420168068, "loss": 0.4269, "step": 9297 }, { "epoch": 5.194413407821229, "grad_norm": 0.7049671411514282, "learning_rate": 0.0007425770308123249, "loss": 0.553, "step": 9298 }, { "epoch": 5.194972067039106, "grad_norm": 0.3856646418571472, "learning_rate": 0.0007425490196078431, "loss": 0.4273, "step": 9299 }, { "epoch": 5.195530726256983, "grad_norm": 0.8590890169143677, "learning_rate": 0.0007425210084033613, "loss": 0.6444, "step": 9300 }, { "epoch": 5.196089385474861, "grad_norm": 0.5261777639389038, "learning_rate": 0.0007424929971988795, "loss": 0.3433, "step": 9301 }, { "epoch": 5.196648044692737, "grad_norm": 0.4589046239852905, "learning_rate": 0.0007424649859943979, "loss": 0.4006, "step": 9302 }, { "epoch": 5.197206703910615, "grad_norm": 0.5377180576324463, "learning_rate": 0.000742436974789916, "loss": 0.4386, "step": 9303 }, { "epoch": 5.197765363128491, "grad_norm": 0.5259334444999695, "learning_rate": 0.0007424089635854342, "loss": 0.544, "step": 9304 }, { "epoch": 5.198324022346369, "grad_norm": 0.46482986211776733, "learning_rate": 0.0007423809523809524, "loss": 0.5363, "step": 9305 }, { "epoch": 5.198882681564246, "grad_norm": 0.8501517176628113, "learning_rate": 0.0007423529411764706, "loss": 0.4826, "step": 9306 }, { "epoch": 5.199441340782123, "grad_norm": 0.6368624567985535, "learning_rate": 0.0007423249299719889, "loss": 0.46, "step": 9307 }, { "epoch": 5.2, "grad_norm": 0.4156450033187866, "learning_rate": 0.0007422969187675071, "loss": 0.3231, "step": 9308 }, { "epoch": 5.200558659217877, "grad_norm": 0.5898316502571106, "learning_rate": 0.0007422689075630252, "loss": 0.4461, "step": 9309 }, { "epoch": 5.201117318435754, "grad_norm": 1.5258007049560547, "learning_rate": 0.0007422408963585434, "loss": 0.5537, "step": 9310 }, { "epoch": 5.201675977653632, "grad_norm": 0.4372830092906952, "learning_rate": 0.0007422128851540616, "loss": 0.3891, "step": 9311 }, { "epoch": 5.202234636871508, "grad_norm": 0.5006040930747986, "learning_rate": 0.0007421848739495799, "loss": 0.4505, "step": 9312 }, { "epoch": 5.202793296089386, "grad_norm": 0.43406760692596436, "learning_rate": 0.0007421568627450981, "loss": 0.4142, "step": 9313 }, { "epoch": 5.203351955307262, "grad_norm": 0.4532420337200165, "learning_rate": 0.0007421288515406162, "loss": 0.367, "step": 9314 }, { "epoch": 5.20391061452514, "grad_norm": 0.7268055081367493, "learning_rate": 0.0007421008403361344, "loss": 0.485, "step": 9315 }, { "epoch": 5.204469273743017, "grad_norm": 0.6865625977516174, "learning_rate": 0.0007420728291316526, "loss": 0.4381, "step": 9316 }, { "epoch": 5.205027932960894, "grad_norm": 0.3839685320854187, "learning_rate": 0.000742044817927171, "loss": 0.4183, "step": 9317 }, { "epoch": 5.205586592178771, "grad_norm": 0.6105719208717346, "learning_rate": 0.0007420168067226892, "loss": 0.5089, "step": 9318 }, { "epoch": 5.206145251396648, "grad_norm": 0.4529527723789215, "learning_rate": 0.0007419887955182073, "loss": 0.4367, "step": 9319 }, { "epoch": 5.206703910614525, "grad_norm": 0.4352875351905823, "learning_rate": 0.0007419607843137255, "loss": 0.4493, "step": 9320 }, { "epoch": 5.207262569832403, "grad_norm": 0.5288578867912292, "learning_rate": 0.0007419327731092437, "loss": 0.5192, "step": 9321 }, { "epoch": 5.207821229050279, "grad_norm": 0.6562674045562744, "learning_rate": 0.000741904761904762, "loss": 0.4628, "step": 9322 }, { "epoch": 5.208379888268157, "grad_norm": 0.7749854922294617, "learning_rate": 0.0007418767507002802, "loss": 0.4132, "step": 9323 }, { "epoch": 5.208938547486033, "grad_norm": 0.665708065032959, "learning_rate": 0.0007418487394957984, "loss": 0.4753, "step": 9324 }, { "epoch": 5.209497206703911, "grad_norm": 0.717275083065033, "learning_rate": 0.0007418207282913165, "loss": 0.5044, "step": 9325 }, { "epoch": 5.210055865921788, "grad_norm": 0.9967623353004456, "learning_rate": 0.0007417927170868347, "loss": 0.3689, "step": 9326 }, { "epoch": 5.210614525139665, "grad_norm": 0.544273853302002, "learning_rate": 0.000741764705882353, "loss": 0.4247, "step": 9327 }, { "epoch": 5.211173184357542, "grad_norm": 0.4892696440219879, "learning_rate": 0.0007417366946778712, "loss": 0.5132, "step": 9328 }, { "epoch": 5.211731843575419, "grad_norm": 0.6195788979530334, "learning_rate": 0.0007417086834733894, "loss": 0.492, "step": 9329 }, { "epoch": 5.212290502793296, "grad_norm": 0.5596709251403809, "learning_rate": 0.0007416806722689075, "loss": 0.4446, "step": 9330 }, { "epoch": 5.212849162011173, "grad_norm": 1.2067221403121948, "learning_rate": 0.0007416526610644257, "loss": 0.5659, "step": 9331 }, { "epoch": 5.21340782122905, "grad_norm": 0.5361933708190918, "learning_rate": 0.000741624649859944, "loss": 0.4487, "step": 9332 }, { "epoch": 5.213966480446928, "grad_norm": 0.46188193559646606, "learning_rate": 0.0007415966386554622, "loss": 0.4042, "step": 9333 }, { "epoch": 5.214525139664804, "grad_norm": 0.9497390985488892, "learning_rate": 0.0007415686274509805, "loss": 0.4782, "step": 9334 }, { "epoch": 5.215083798882682, "grad_norm": 0.5016930103302002, "learning_rate": 0.0007415406162464986, "loss": 0.4773, "step": 9335 }, { "epoch": 5.215642458100558, "grad_norm": 1.062241792678833, "learning_rate": 0.0007415126050420168, "loss": 0.3903, "step": 9336 }, { "epoch": 5.216201117318436, "grad_norm": 0.5524981617927551, "learning_rate": 0.0007414845938375351, "loss": 0.4973, "step": 9337 }, { "epoch": 5.216759776536313, "grad_norm": 0.6969456672668457, "learning_rate": 0.0007414565826330533, "loss": 0.4872, "step": 9338 }, { "epoch": 5.21731843575419, "grad_norm": 1.7280601263046265, "learning_rate": 0.0007414285714285715, "loss": 0.416, "step": 9339 }, { "epoch": 5.217877094972067, "grad_norm": 0.5588036775588989, "learning_rate": 0.0007414005602240897, "loss": 0.4818, "step": 9340 }, { "epoch": 5.218435754189944, "grad_norm": 1.1933562755584717, "learning_rate": 0.0007413725490196078, "loss": 0.3757, "step": 9341 }, { "epoch": 5.218994413407821, "grad_norm": 0.6037487983703613, "learning_rate": 0.0007413445378151261, "loss": 0.6115, "step": 9342 }, { "epoch": 5.219553072625699, "grad_norm": 0.4822850525379181, "learning_rate": 0.0007413165266106443, "loss": 0.3554, "step": 9343 }, { "epoch": 5.220111731843575, "grad_norm": 0.5273537635803223, "learning_rate": 0.0007412885154061625, "loss": 0.5025, "step": 9344 }, { "epoch": 5.220670391061453, "grad_norm": 0.5153448581695557, "learning_rate": 0.0007412605042016807, "loss": 0.4439, "step": 9345 }, { "epoch": 5.221229050279329, "grad_norm": 0.3843933939933777, "learning_rate": 0.0007412324929971988, "loss": 0.3665, "step": 9346 }, { "epoch": 5.221787709497207, "grad_norm": 0.525409460067749, "learning_rate": 0.0007412044817927171, "loss": 0.4527, "step": 9347 }, { "epoch": 5.222346368715084, "grad_norm": 0.34481489658355713, "learning_rate": 0.0007411764705882353, "loss": 0.521, "step": 9348 }, { "epoch": 5.222905027932961, "grad_norm": 0.37644752860069275, "learning_rate": 0.0007411484593837535, "loss": 0.3723, "step": 9349 }, { "epoch": 5.223463687150838, "grad_norm": 0.4853006601333618, "learning_rate": 0.0007411204481792717, "loss": 0.5253, "step": 9350 }, { "epoch": 5.224022346368715, "grad_norm": 0.6095213294029236, "learning_rate": 0.0007410924369747898, "loss": 0.4466, "step": 9351 }, { "epoch": 5.224581005586592, "grad_norm": 0.41500964760780334, "learning_rate": 0.0007410644257703082, "loss": 0.3549, "step": 9352 }, { "epoch": 5.22513966480447, "grad_norm": 1.1184957027435303, "learning_rate": 0.0007410364145658264, "loss": 0.3494, "step": 9353 }, { "epoch": 5.225698324022346, "grad_norm": 0.7289383411407471, "learning_rate": 0.0007410084033613446, "loss": 0.5212, "step": 9354 }, { "epoch": 5.226256983240224, "grad_norm": 0.44477397203445435, "learning_rate": 0.0007409803921568628, "loss": 0.448, "step": 9355 }, { "epoch": 5.2268156424581, "grad_norm": 0.7348518967628479, "learning_rate": 0.000740952380952381, "loss": 0.4261, "step": 9356 }, { "epoch": 5.227374301675978, "grad_norm": 0.8562750220298767, "learning_rate": 0.0007409243697478992, "loss": 0.6878, "step": 9357 }, { "epoch": 5.227932960893855, "grad_norm": 0.46679219603538513, "learning_rate": 0.0007408963585434174, "loss": 0.3871, "step": 9358 }, { "epoch": 5.228491620111732, "grad_norm": 0.5426879525184631, "learning_rate": 0.0007408683473389356, "loss": 0.4177, "step": 9359 }, { "epoch": 5.229050279329609, "grad_norm": 1.2118076086044312, "learning_rate": 0.0007408403361344538, "loss": 0.5355, "step": 9360 }, { "epoch": 5.229608938547486, "grad_norm": 0.8733117580413818, "learning_rate": 0.000740812324929972, "loss": 0.3816, "step": 9361 }, { "epoch": 5.230167597765363, "grad_norm": 0.596260130405426, "learning_rate": 0.0007407843137254902, "loss": 0.4672, "step": 9362 }, { "epoch": 5.230726256983241, "grad_norm": 0.5930527448654175, "learning_rate": 0.0007407563025210084, "loss": 0.4285, "step": 9363 }, { "epoch": 5.231284916201117, "grad_norm": 0.6616517305374146, "learning_rate": 0.0007407282913165266, "loss": 0.6199, "step": 9364 }, { "epoch": 5.231843575418995, "grad_norm": 0.6076006889343262, "learning_rate": 0.0007407002801120448, "loss": 0.4827, "step": 9365 }, { "epoch": 5.232402234636871, "grad_norm": 0.6483851671218872, "learning_rate": 0.000740672268907563, "loss": 0.4048, "step": 9366 }, { "epoch": 5.232960893854749, "grad_norm": 0.4512960612773895, "learning_rate": 0.0007406442577030813, "loss": 0.5279, "step": 9367 }, { "epoch": 5.233519553072625, "grad_norm": 0.5346662402153015, "learning_rate": 0.0007406162464985995, "loss": 0.3669, "step": 9368 }, { "epoch": 5.234078212290503, "grad_norm": 0.47256213426589966, "learning_rate": 0.0007405882352941177, "loss": 0.3534, "step": 9369 }, { "epoch": 5.23463687150838, "grad_norm": 2.0141563415527344, "learning_rate": 0.0007405602240896359, "loss": 0.5079, "step": 9370 }, { "epoch": 5.235195530726257, "grad_norm": 0.5680559873580933, "learning_rate": 0.0007405322128851541, "loss": 0.4236, "step": 9371 }, { "epoch": 5.235754189944134, "grad_norm": 0.4786226451396942, "learning_rate": 0.0007405042016806723, "loss": 0.4883, "step": 9372 }, { "epoch": 5.236312849162011, "grad_norm": 0.6113656759262085, "learning_rate": 0.0007404761904761905, "loss": 0.5046, "step": 9373 }, { "epoch": 5.236871508379888, "grad_norm": 0.6588099002838135, "learning_rate": 0.0007404481792717087, "loss": 0.5455, "step": 9374 }, { "epoch": 5.237430167597766, "grad_norm": 0.5834494233131409, "learning_rate": 0.0007404201680672269, "loss": 0.3592, "step": 9375 }, { "epoch": 5.237988826815642, "grad_norm": 0.6198643445968628, "learning_rate": 0.0007403921568627451, "loss": 0.4408, "step": 9376 }, { "epoch": 5.23854748603352, "grad_norm": 1.7232438325881958, "learning_rate": 0.0007403641456582633, "loss": 0.4576, "step": 9377 }, { "epoch": 5.239106145251396, "grad_norm": 2.003746271133423, "learning_rate": 0.0007403361344537815, "loss": 0.4295, "step": 9378 }, { "epoch": 5.239664804469274, "grad_norm": 0.5383461713790894, "learning_rate": 0.0007403081232492997, "loss": 0.4801, "step": 9379 }, { "epoch": 5.240223463687151, "grad_norm": 0.6325685381889343, "learning_rate": 0.0007402801120448179, "loss": 0.4634, "step": 9380 }, { "epoch": 5.240782122905028, "grad_norm": 1.5545400381088257, "learning_rate": 0.0007402521008403361, "loss": 0.4192, "step": 9381 }, { "epoch": 5.241340782122905, "grad_norm": 0.48496976494789124, "learning_rate": 0.0007402240896358543, "loss": 0.5299, "step": 9382 }, { "epoch": 5.241899441340782, "grad_norm": 0.6085032224655151, "learning_rate": 0.0007401960784313725, "loss": 0.3802, "step": 9383 }, { "epoch": 5.242458100558659, "grad_norm": 0.4096871316432953, "learning_rate": 0.0007401680672268908, "loss": 0.3941, "step": 9384 }, { "epoch": 5.243016759776537, "grad_norm": 0.8473930954933167, "learning_rate": 0.000740140056022409, "loss": 0.4374, "step": 9385 }, { "epoch": 5.243575418994413, "grad_norm": 2.512249231338501, "learning_rate": 0.0007401120448179272, "loss": 0.5393, "step": 9386 }, { "epoch": 5.244134078212291, "grad_norm": 0.4696202576160431, "learning_rate": 0.0007400840336134454, "loss": 0.4097, "step": 9387 }, { "epoch": 5.244692737430167, "grad_norm": 0.890044629573822, "learning_rate": 0.0007400560224089637, "loss": 0.5202, "step": 9388 }, { "epoch": 5.245251396648045, "grad_norm": 0.968052864074707, "learning_rate": 0.0007400280112044818, "loss": 0.4386, "step": 9389 }, { "epoch": 5.245810055865922, "grad_norm": 0.6479350924491882, "learning_rate": 0.00074, "loss": 0.4354, "step": 9390 }, { "epoch": 5.246368715083799, "grad_norm": 0.5397375226020813, "learning_rate": 0.0007399719887955182, "loss": 0.4201, "step": 9391 }, { "epoch": 5.246927374301676, "grad_norm": 0.93658846616745, "learning_rate": 0.0007399439775910364, "loss": 0.644, "step": 9392 }, { "epoch": 5.247486033519553, "grad_norm": 0.757621169090271, "learning_rate": 0.0007399159663865547, "loss": 0.508, "step": 9393 }, { "epoch": 5.24804469273743, "grad_norm": 1.8535314798355103, "learning_rate": 0.0007398879551820728, "loss": 0.5549, "step": 9394 }, { "epoch": 5.248603351955307, "grad_norm": 0.749240517616272, "learning_rate": 0.000739859943977591, "loss": 0.4933, "step": 9395 }, { "epoch": 5.249162011173184, "grad_norm": 0.5966920256614685, "learning_rate": 0.0007398319327731092, "loss": 0.6242, "step": 9396 }, { "epoch": 5.249720670391062, "grad_norm": 0.42783230543136597, "learning_rate": 0.0007398039215686274, "loss": 0.435, "step": 9397 }, { "epoch": 5.250279329608938, "grad_norm": 0.62357097864151, "learning_rate": 0.0007397759103641457, "loss": 0.4688, "step": 9398 }, { "epoch": 5.250837988826816, "grad_norm": 0.5383201837539673, "learning_rate": 0.0007397478991596638, "loss": 0.4474, "step": 9399 }, { "epoch": 5.251396648044693, "grad_norm": 0.44935116171836853, "learning_rate": 0.000739719887955182, "loss": 0.406, "step": 9400 }, { "epoch": 5.25195530726257, "grad_norm": 0.5153525471687317, "learning_rate": 0.0007396918767507003, "loss": 0.4749, "step": 9401 }, { "epoch": 5.252513966480447, "grad_norm": 0.51866614818573, "learning_rate": 0.0007396638655462185, "loss": 0.4952, "step": 9402 }, { "epoch": 5.253072625698324, "grad_norm": 0.4570440351963043, "learning_rate": 0.0007396358543417368, "loss": 0.4558, "step": 9403 }, { "epoch": 5.253631284916201, "grad_norm": 0.5158559679985046, "learning_rate": 0.000739607843137255, "loss": 0.5161, "step": 9404 }, { "epoch": 5.254189944134078, "grad_norm": 10.15989875793457, "learning_rate": 0.0007395798319327731, "loss": 0.4096, "step": 9405 }, { "epoch": 5.254748603351955, "grad_norm": 0.4160911440849304, "learning_rate": 0.0007395518207282913, "loss": 0.4035, "step": 9406 }, { "epoch": 5.255307262569833, "grad_norm": 0.6594462990760803, "learning_rate": 0.0007395238095238095, "loss": 0.4395, "step": 9407 }, { "epoch": 5.255865921787709, "grad_norm": 0.45077621936798096, "learning_rate": 0.0007394957983193278, "loss": 0.4227, "step": 9408 }, { "epoch": 5.256424581005587, "grad_norm": 0.8619872331619263, "learning_rate": 0.000739467787114846, "loss": 0.6381, "step": 9409 }, { "epoch": 5.256983240223463, "grad_norm": 0.5996870398521423, "learning_rate": 0.0007394397759103641, "loss": 0.393, "step": 9410 }, { "epoch": 5.257541899441341, "grad_norm": 0.5486067533493042, "learning_rate": 0.0007394117647058823, "loss": 0.4793, "step": 9411 }, { "epoch": 5.258100558659218, "grad_norm": 0.5369665026664734, "learning_rate": 0.0007393837535014005, "loss": 0.6459, "step": 9412 }, { "epoch": 5.258659217877095, "grad_norm": 0.41904544830322266, "learning_rate": 0.0007393557422969188, "loss": 0.4429, "step": 9413 }, { "epoch": 5.259217877094972, "grad_norm": 0.79512619972229, "learning_rate": 0.000739327731092437, "loss": 0.4083, "step": 9414 }, { "epoch": 5.259776536312849, "grad_norm": 0.8285619616508484, "learning_rate": 0.0007392997198879551, "loss": 0.5299, "step": 9415 }, { "epoch": 5.260335195530726, "grad_norm": 0.5362997651100159, "learning_rate": 0.0007392717086834733, "loss": 0.3856, "step": 9416 }, { "epoch": 5.260893854748604, "grad_norm": 0.772506833076477, "learning_rate": 0.0007392436974789916, "loss": 0.6419, "step": 9417 }, { "epoch": 5.26145251396648, "grad_norm": 0.8862659335136414, "learning_rate": 0.0007392156862745099, "loss": 0.4034, "step": 9418 }, { "epoch": 5.262011173184358, "grad_norm": 0.44163113832473755, "learning_rate": 0.0007391876750700281, "loss": 0.479, "step": 9419 }, { "epoch": 5.262569832402234, "grad_norm": 0.7946412563323975, "learning_rate": 0.0007391596638655463, "loss": 0.547, "step": 9420 }, { "epoch": 5.263128491620112, "grad_norm": 0.8583797812461853, "learning_rate": 0.0007391316526610644, "loss": 0.4984, "step": 9421 }, { "epoch": 5.263687150837989, "grad_norm": 0.8412925601005554, "learning_rate": 0.0007391036414565826, "loss": 0.4153, "step": 9422 }, { "epoch": 5.264245810055866, "grad_norm": 1.3151321411132812, "learning_rate": 0.0007390756302521009, "loss": 0.5074, "step": 9423 }, { "epoch": 5.264804469273743, "grad_norm": 0.48241835832595825, "learning_rate": 0.0007390476190476191, "loss": 0.4557, "step": 9424 }, { "epoch": 5.26536312849162, "grad_norm": 0.4340708255767822, "learning_rate": 0.0007390196078431373, "loss": 0.457, "step": 9425 }, { "epoch": 5.265921787709497, "grad_norm": 0.7778294682502747, "learning_rate": 0.0007389915966386554, "loss": 0.5335, "step": 9426 }, { "epoch": 5.266480446927375, "grad_norm": 0.7259948253631592, "learning_rate": 0.0007389635854341736, "loss": 0.4542, "step": 9427 }, { "epoch": 5.267039106145251, "grad_norm": 0.9651069641113281, "learning_rate": 0.0007389355742296919, "loss": 0.5057, "step": 9428 }, { "epoch": 5.267597765363129, "grad_norm": 0.7418129444122314, "learning_rate": 0.0007389075630252101, "loss": 0.4645, "step": 9429 }, { "epoch": 5.268156424581005, "grad_norm": 3.919517993927002, "learning_rate": 0.0007388795518207283, "loss": 0.411, "step": 9430 }, { "epoch": 5.268715083798883, "grad_norm": 0.611871063709259, "learning_rate": 0.0007388515406162464, "loss": 0.4784, "step": 9431 }, { "epoch": 5.269273743016759, "grad_norm": 1.5884042978286743, "learning_rate": 0.0007388235294117646, "loss": 0.4451, "step": 9432 }, { "epoch": 5.269832402234637, "grad_norm": 0.8458524346351624, "learning_rate": 0.000738795518207283, "loss": 0.4626, "step": 9433 }, { "epoch": 5.270391061452514, "grad_norm": 1.2369534969329834, "learning_rate": 0.0007387675070028012, "loss": 0.3762, "step": 9434 }, { "epoch": 5.270949720670391, "grad_norm": 0.9712255597114563, "learning_rate": 0.0007387394957983194, "loss": 0.4892, "step": 9435 }, { "epoch": 5.271508379888268, "grad_norm": 0.4695819318294525, "learning_rate": 0.0007387114845938376, "loss": 0.4326, "step": 9436 }, { "epoch": 5.272067039106146, "grad_norm": 0.5627531409263611, "learning_rate": 0.0007386834733893557, "loss": 0.4219, "step": 9437 }, { "epoch": 5.272625698324022, "grad_norm": 0.6104740500450134, "learning_rate": 0.000738655462184874, "loss": 0.4783, "step": 9438 }, { "epoch": 5.2731843575419, "grad_norm": 16.905256271362305, "learning_rate": 0.0007386274509803922, "loss": 0.4663, "step": 9439 }, { "epoch": 5.273743016759776, "grad_norm": 0.901716411113739, "learning_rate": 0.0007385994397759104, "loss": 0.4681, "step": 9440 }, { "epoch": 5.274301675977654, "grad_norm": 1.5548311471939087, "learning_rate": 0.0007385714285714286, "loss": 0.4684, "step": 9441 }, { "epoch": 5.27486033519553, "grad_norm": 2.0308778285980225, "learning_rate": 0.0007385434173669467, "loss": 0.4874, "step": 9442 }, { "epoch": 5.275418994413408, "grad_norm": 0.8268308639526367, "learning_rate": 0.000738515406162465, "loss": 0.386, "step": 9443 }, { "epoch": 5.275977653631285, "grad_norm": 0.7548191547393799, "learning_rate": 0.0007384873949579832, "loss": 0.5778, "step": 9444 }, { "epoch": 5.276536312849162, "grad_norm": 0.7626224160194397, "learning_rate": 0.0007384593837535014, "loss": 0.4707, "step": 9445 }, { "epoch": 5.277094972067039, "grad_norm": 0.45574378967285156, "learning_rate": 0.0007384313725490196, "loss": 0.5238, "step": 9446 }, { "epoch": 5.277653631284916, "grad_norm": 0.8960602879524231, "learning_rate": 0.0007384033613445377, "loss": 0.3711, "step": 9447 }, { "epoch": 5.278212290502793, "grad_norm": 0.5864726305007935, "learning_rate": 0.000738375350140056, "loss": 0.5101, "step": 9448 }, { "epoch": 5.278770949720671, "grad_norm": 0.9299264550209045, "learning_rate": 0.0007383473389355743, "loss": 0.447, "step": 9449 }, { "epoch": 5.279329608938547, "grad_norm": 0.46527713537216187, "learning_rate": 0.0007383193277310925, "loss": 0.4552, "step": 9450 }, { "epoch": 5.279888268156425, "grad_norm": 0.4526190757751465, "learning_rate": 0.0007382913165266107, "loss": 0.4653, "step": 9451 }, { "epoch": 5.280446927374301, "grad_norm": 0.515207052230835, "learning_rate": 0.0007382633053221289, "loss": 0.4275, "step": 9452 }, { "epoch": 5.281005586592179, "grad_norm": 0.6904286742210388, "learning_rate": 0.0007382352941176471, "loss": 0.4785, "step": 9453 }, { "epoch": 5.281564245810056, "grad_norm": 2.0360584259033203, "learning_rate": 0.0007382072829131653, "loss": 0.4571, "step": 9454 }, { "epoch": 5.282122905027933, "grad_norm": 0.5173314809799194, "learning_rate": 0.0007381792717086835, "loss": 0.419, "step": 9455 }, { "epoch": 5.28268156424581, "grad_norm": 1.329249620437622, "learning_rate": 0.0007381512605042017, "loss": 0.5878, "step": 9456 }, { "epoch": 5.283240223463687, "grad_norm": 1.6766265630722046, "learning_rate": 0.0007381232492997199, "loss": 0.4501, "step": 9457 }, { "epoch": 5.283798882681564, "grad_norm": 0.47712254524230957, "learning_rate": 0.0007380952380952381, "loss": 0.4657, "step": 9458 }, { "epoch": 5.284357541899442, "grad_norm": 2.790231466293335, "learning_rate": 0.0007380672268907563, "loss": 0.4721, "step": 9459 }, { "epoch": 5.284916201117318, "grad_norm": 0.7931320071220398, "learning_rate": 0.0007380392156862745, "loss": 0.4282, "step": 9460 }, { "epoch": 5.285474860335196, "grad_norm": 0.6824520230293274, "learning_rate": 0.0007380112044817927, "loss": 0.4684, "step": 9461 }, { "epoch": 5.286033519553072, "grad_norm": 0.6180979013442993, "learning_rate": 0.0007379831932773109, "loss": 0.4594, "step": 9462 }, { "epoch": 5.28659217877095, "grad_norm": 0.7881502509117126, "learning_rate": 0.0007379551820728292, "loss": 0.5103, "step": 9463 }, { "epoch": 5.287150837988827, "grad_norm": 0.4643149971961975, "learning_rate": 0.0007379271708683473, "loss": 0.5473, "step": 9464 }, { "epoch": 5.287709497206704, "grad_norm": 0.5156552195549011, "learning_rate": 0.0007378991596638655, "loss": 0.4879, "step": 9465 }, { "epoch": 5.288268156424581, "grad_norm": 0.865510106086731, "learning_rate": 0.0007378711484593838, "loss": 0.5488, "step": 9466 }, { "epoch": 5.288826815642458, "grad_norm": 0.6039503812789917, "learning_rate": 0.000737843137254902, "loss": 0.5231, "step": 9467 }, { "epoch": 5.289385474860335, "grad_norm": 0.5857338309288025, "learning_rate": 0.0007378151260504203, "loss": 0.5269, "step": 9468 }, { "epoch": 5.289944134078212, "grad_norm": 2.5231263637542725, "learning_rate": 0.0007377871148459384, "loss": 0.4478, "step": 9469 }, { "epoch": 5.290502793296089, "grad_norm": 0.5198138952255249, "learning_rate": 0.0007377591036414566, "loss": 0.4359, "step": 9470 }, { "epoch": 5.291061452513967, "grad_norm": 0.543129563331604, "learning_rate": 0.0007377310924369748, "loss": 0.4212, "step": 9471 }, { "epoch": 5.291620111731843, "grad_norm": 0.8600278496742249, "learning_rate": 0.000737703081232493, "loss": 0.4645, "step": 9472 }, { "epoch": 5.292178770949721, "grad_norm": 0.6033267378807068, "learning_rate": 0.0007376750700280113, "loss": 0.4627, "step": 9473 }, { "epoch": 5.292737430167598, "grad_norm": 0.6613966226577759, "learning_rate": 0.0007376470588235294, "loss": 0.4022, "step": 9474 }, { "epoch": 5.293296089385475, "grad_norm": 0.5299205183982849, "learning_rate": 0.0007376190476190476, "loss": 0.4745, "step": 9475 }, { "epoch": 5.293854748603352, "grad_norm": 0.5181791186332703, "learning_rate": 0.0007375910364145658, "loss": 0.4996, "step": 9476 }, { "epoch": 5.294413407821229, "grad_norm": 0.46199846267700195, "learning_rate": 0.000737563025210084, "loss": 0.4442, "step": 9477 }, { "epoch": 5.294972067039106, "grad_norm": 0.37143176794052124, "learning_rate": 0.0007375350140056023, "loss": 0.4191, "step": 9478 }, { "epoch": 5.295530726256983, "grad_norm": 0.4858918786048889, "learning_rate": 0.0007375070028011205, "loss": 0.5081, "step": 9479 }, { "epoch": 5.29608938547486, "grad_norm": 0.5810004472732544, "learning_rate": 0.0007374789915966386, "loss": 0.463, "step": 9480 }, { "epoch": 5.296648044692738, "grad_norm": 0.5940622091293335, "learning_rate": 0.0007374509803921568, "loss": 0.5178, "step": 9481 }, { "epoch": 5.297206703910614, "grad_norm": 0.3989871144294739, "learning_rate": 0.000737422969187675, "loss": 0.3872, "step": 9482 }, { "epoch": 5.297765363128492, "grad_norm": 1.742755651473999, "learning_rate": 0.0007373949579831934, "loss": 0.5275, "step": 9483 }, { "epoch": 5.298324022346368, "grad_norm": 0.5456664562225342, "learning_rate": 0.0007373669467787116, "loss": 0.3836, "step": 9484 }, { "epoch": 5.298882681564246, "grad_norm": 0.5809146165847778, "learning_rate": 0.0007373389355742297, "loss": 0.4341, "step": 9485 }, { "epoch": 5.299441340782123, "grad_norm": 1.146213412284851, "learning_rate": 0.0007373109243697479, "loss": 0.4086, "step": 9486 }, { "epoch": 5.3, "grad_norm": 0.6028604507446289, "learning_rate": 0.0007372829131652661, "loss": 0.581, "step": 9487 }, { "epoch": 5.300558659217877, "grad_norm": 1.5353025197982788, "learning_rate": 0.0007372549019607844, "loss": 0.6055, "step": 9488 }, { "epoch": 5.301117318435754, "grad_norm": 1.2211227416992188, "learning_rate": 0.0007372268907563026, "loss": 0.4879, "step": 9489 }, { "epoch": 5.301675977653631, "grad_norm": 0.5455441474914551, "learning_rate": 0.0007371988795518207, "loss": 0.3609, "step": 9490 }, { "epoch": 5.302234636871509, "grad_norm": 1.0635169744491577, "learning_rate": 0.0007371708683473389, "loss": 0.5163, "step": 9491 }, { "epoch": 5.302793296089385, "grad_norm": 0.5655789971351624, "learning_rate": 0.0007371428571428571, "loss": 0.4182, "step": 9492 }, { "epoch": 5.303351955307263, "grad_norm": 0.9413765668869019, "learning_rate": 0.0007371148459383754, "loss": 0.508, "step": 9493 }, { "epoch": 5.303910614525139, "grad_norm": 0.4511139690876007, "learning_rate": 0.0007370868347338936, "loss": 0.3578, "step": 9494 }, { "epoch": 5.304469273743017, "grad_norm": 0.5222950577735901, "learning_rate": 0.0007370588235294118, "loss": 0.4763, "step": 9495 }, { "epoch": 5.305027932960894, "grad_norm": 1.1022090911865234, "learning_rate": 0.0007370308123249299, "loss": 0.5076, "step": 9496 }, { "epoch": 5.305586592178771, "grad_norm": 0.492253839969635, "learning_rate": 0.0007370028011204481, "loss": 0.4645, "step": 9497 }, { "epoch": 5.306145251396648, "grad_norm": 3.3863306045532227, "learning_rate": 0.0007369747899159665, "loss": 0.4724, "step": 9498 }, { "epoch": 5.306703910614525, "grad_norm": 0.8537158966064453, "learning_rate": 0.0007369467787114847, "loss": 0.5155, "step": 9499 }, { "epoch": 5.307262569832402, "grad_norm": 1.990062952041626, "learning_rate": 0.0007369187675070029, "loss": 0.4344, "step": 9500 }, { "epoch": 5.307262569832402, "eval_cer": 0.09389763564749654, "eval_loss": 0.35308894515037537, "eval_runtime": 55.6031, "eval_samples_per_second": 81.614, "eval_steps_per_second": 5.108, "eval_wer": 0.3660512276193402, "step": 9500 }, { "epoch": 5.30782122905028, "grad_norm": 0.5177279114723206, "learning_rate": 0.000736890756302521, "loss": 0.4007, "step": 9501 }, { "epoch": 5.308379888268156, "grad_norm": 0.5062219500541687, "learning_rate": 0.0007368627450980392, "loss": 0.4037, "step": 9502 }, { "epoch": 5.308938547486034, "grad_norm": 0.43697401881217957, "learning_rate": 0.0007368347338935575, "loss": 0.4633, "step": 9503 }, { "epoch": 5.30949720670391, "grad_norm": 0.6515137553215027, "learning_rate": 0.0007368067226890757, "loss": 0.4096, "step": 9504 }, { "epoch": 5.310055865921788, "grad_norm": 0.5856940150260925, "learning_rate": 0.0007367787114845939, "loss": 0.7466, "step": 9505 }, { "epoch": 5.310614525139664, "grad_norm": 0.5021346807479858, "learning_rate": 0.000736750700280112, "loss": 0.3782, "step": 9506 }, { "epoch": 5.311173184357542, "grad_norm": 0.5606846213340759, "learning_rate": 0.0007367226890756302, "loss": 0.6457, "step": 9507 }, { "epoch": 5.311731843575419, "grad_norm": 0.6789116263389587, "learning_rate": 0.0007366946778711485, "loss": 0.5971, "step": 9508 }, { "epoch": 5.312290502793296, "grad_norm": 0.5939801931381226, "learning_rate": 0.0007366666666666667, "loss": 0.5072, "step": 9509 }, { "epoch": 5.312849162011173, "grad_norm": 1.4139007329940796, "learning_rate": 0.0007366386554621849, "loss": 0.5588, "step": 9510 }, { "epoch": 5.31340782122905, "grad_norm": 0.5975939631462097, "learning_rate": 0.0007366106442577031, "loss": 0.4135, "step": 9511 }, { "epoch": 5.313966480446927, "grad_norm": 0.7397623062133789, "learning_rate": 0.0007365826330532212, "loss": 0.4587, "step": 9512 }, { "epoch": 5.314525139664805, "grad_norm": 0.6376792788505554, "learning_rate": 0.0007365546218487395, "loss": 0.4865, "step": 9513 }, { "epoch": 5.315083798882681, "grad_norm": 1.171474814414978, "learning_rate": 0.0007365266106442577, "loss": 0.4868, "step": 9514 }, { "epoch": 5.315642458100559, "grad_norm": 1.2171443700790405, "learning_rate": 0.000736498599439776, "loss": 0.4821, "step": 9515 }, { "epoch": 5.316201117318435, "grad_norm": 0.5943564176559448, "learning_rate": 0.0007364705882352942, "loss": 0.5972, "step": 9516 }, { "epoch": 5.316759776536313, "grad_norm": 0.5895178318023682, "learning_rate": 0.0007364425770308123, "loss": 0.4112, "step": 9517 }, { "epoch": 5.31731843575419, "grad_norm": 0.9535491466522217, "learning_rate": 0.0007364145658263306, "loss": 0.5116, "step": 9518 }, { "epoch": 5.317877094972067, "grad_norm": 0.47943904995918274, "learning_rate": 0.0007363865546218488, "loss": 0.5044, "step": 9519 }, { "epoch": 5.318435754189944, "grad_norm": 0.9426802396774292, "learning_rate": 0.000736358543417367, "loss": 0.5899, "step": 9520 }, { "epoch": 5.318994413407821, "grad_norm": 0.6176594495773315, "learning_rate": 0.0007363305322128852, "loss": 0.4837, "step": 9521 }, { "epoch": 5.319553072625698, "grad_norm": 0.4418538212776184, "learning_rate": 0.0007363025210084033, "loss": 0.4643, "step": 9522 }, { "epoch": 5.320111731843576, "grad_norm": 0.5352808237075806, "learning_rate": 0.0007362745098039216, "loss": 0.4562, "step": 9523 }, { "epoch": 5.320670391061452, "grad_norm": 0.6674498319625854, "learning_rate": 0.0007362464985994398, "loss": 0.4663, "step": 9524 }, { "epoch": 5.32122905027933, "grad_norm": 0.9467893838882446, "learning_rate": 0.000736218487394958, "loss": 0.5103, "step": 9525 }, { "epoch": 5.321787709497206, "grad_norm": 0.9457280039787292, "learning_rate": 0.0007361904761904762, "loss": 0.5996, "step": 9526 }, { "epoch": 5.322346368715084, "grad_norm": 0.7973175048828125, "learning_rate": 0.0007361624649859944, "loss": 0.4759, "step": 9527 }, { "epoch": 5.322905027932961, "grad_norm": 0.8320454955101013, "learning_rate": 0.0007361344537815126, "loss": 0.5847, "step": 9528 }, { "epoch": 5.323463687150838, "grad_norm": 0.8061323165893555, "learning_rate": 0.0007361064425770308, "loss": 0.3615, "step": 9529 }, { "epoch": 5.324022346368715, "grad_norm": 0.5948437452316284, "learning_rate": 0.000736078431372549, "loss": 0.4482, "step": 9530 }, { "epoch": 5.324581005586592, "grad_norm": 0.741431713104248, "learning_rate": 0.0007360504201680673, "loss": 0.458, "step": 9531 }, { "epoch": 5.325139664804469, "grad_norm": 0.6204893589019775, "learning_rate": 0.0007360224089635855, "loss": 0.4559, "step": 9532 }, { "epoch": 5.325698324022347, "grad_norm": 0.5414924621582031, "learning_rate": 0.0007359943977591037, "loss": 0.4561, "step": 9533 }, { "epoch": 5.326256983240223, "grad_norm": 0.5504729747772217, "learning_rate": 0.0007359663865546219, "loss": 0.4376, "step": 9534 }, { "epoch": 5.326815642458101, "grad_norm": 0.9887414574623108, "learning_rate": 0.0007359383753501401, "loss": 0.4397, "step": 9535 }, { "epoch": 5.327374301675977, "grad_norm": 0.5801450610160828, "learning_rate": 0.0007359103641456583, "loss": 0.5307, "step": 9536 }, { "epoch": 5.327932960893855, "grad_norm": 2.0622060298919678, "learning_rate": 0.0007358823529411765, "loss": 0.4666, "step": 9537 }, { "epoch": 5.328491620111732, "grad_norm": 0.42980602383613586, "learning_rate": 0.0007358543417366947, "loss": 0.5286, "step": 9538 }, { "epoch": 5.329050279329609, "grad_norm": 0.6252503395080566, "learning_rate": 0.0007358263305322129, "loss": 0.5033, "step": 9539 }, { "epoch": 5.329608938547486, "grad_norm": 7.814577102661133, "learning_rate": 0.0007357983193277311, "loss": 0.4454, "step": 9540 }, { "epoch": 5.330167597765363, "grad_norm": 0.5140522122383118, "learning_rate": 0.0007357703081232493, "loss": 0.4402, "step": 9541 }, { "epoch": 5.33072625698324, "grad_norm": 0.5562108755111694, "learning_rate": 0.0007357422969187675, "loss": 0.4818, "step": 9542 }, { "epoch": 5.331284916201117, "grad_norm": 0.7808133363723755, "learning_rate": 0.0007357142857142858, "loss": 0.3713, "step": 9543 }, { "epoch": 5.331843575418994, "grad_norm": 0.5227891802787781, "learning_rate": 0.0007356862745098039, "loss": 0.5561, "step": 9544 }, { "epoch": 5.332402234636872, "grad_norm": 0.4147838354110718, "learning_rate": 0.0007356582633053221, "loss": 0.3992, "step": 9545 }, { "epoch": 5.332960893854748, "grad_norm": 0.8092637062072754, "learning_rate": 0.0007356302521008403, "loss": 0.4472, "step": 9546 }, { "epoch": 5.333519553072626, "grad_norm": 0.617157518863678, "learning_rate": 0.0007356022408963585, "loss": 0.3695, "step": 9547 }, { "epoch": 5.334078212290502, "grad_norm": 0.7355561256408691, "learning_rate": 0.0007355742296918769, "loss": 0.5167, "step": 9548 }, { "epoch": 5.33463687150838, "grad_norm": 0.613189697265625, "learning_rate": 0.000735546218487395, "loss": 0.4588, "step": 9549 }, { "epoch": 5.335195530726257, "grad_norm": 0.5412580370903015, "learning_rate": 0.0007355182072829132, "loss": 0.4713, "step": 9550 }, { "epoch": 5.335754189944134, "grad_norm": 0.5945103168487549, "learning_rate": 0.0007354901960784314, "loss": 0.68, "step": 9551 }, { "epoch": 5.336312849162011, "grad_norm": 0.934156596660614, "learning_rate": 0.0007354621848739496, "loss": 0.5731, "step": 9552 }, { "epoch": 5.336871508379888, "grad_norm": 6.088711261749268, "learning_rate": 0.0007354341736694679, "loss": 0.3591, "step": 9553 }, { "epoch": 5.337430167597765, "grad_norm": 0.594549298286438, "learning_rate": 0.000735406162464986, "loss": 0.4115, "step": 9554 }, { "epoch": 5.337988826815643, "grad_norm": 0.5507545471191406, "learning_rate": 0.0007353781512605042, "loss": 0.5486, "step": 9555 }, { "epoch": 5.338547486033519, "grad_norm": 0.568227231502533, "learning_rate": 0.0007353501400560224, "loss": 0.5236, "step": 9556 }, { "epoch": 5.339106145251397, "grad_norm": 0.5101413726806641, "learning_rate": 0.0007353221288515406, "loss": 0.3114, "step": 9557 }, { "epoch": 5.339664804469273, "grad_norm": 1.19283127784729, "learning_rate": 0.0007352941176470589, "loss": 0.3956, "step": 9558 }, { "epoch": 5.340223463687151, "grad_norm": 1.1779811382293701, "learning_rate": 0.0007352661064425771, "loss": 0.4663, "step": 9559 }, { "epoch": 5.340782122905028, "grad_norm": 0.46355003118515015, "learning_rate": 0.0007352380952380952, "loss": 0.4456, "step": 9560 }, { "epoch": 5.341340782122905, "grad_norm": 0.5419101119041443, "learning_rate": 0.0007352100840336134, "loss": 0.4775, "step": 9561 }, { "epoch": 5.341899441340782, "grad_norm": 0.4649917781352997, "learning_rate": 0.0007351820728291316, "loss": 0.2917, "step": 9562 }, { "epoch": 5.342458100558659, "grad_norm": 0.8416962027549744, "learning_rate": 0.00073515406162465, "loss": 0.5765, "step": 9563 }, { "epoch": 5.343016759776536, "grad_norm": 2.6413912773132324, "learning_rate": 0.0007351260504201682, "loss": 0.4417, "step": 9564 }, { "epoch": 5.343575418994414, "grad_norm": 0.5505444407463074, "learning_rate": 0.0007350980392156863, "loss": 0.3918, "step": 9565 }, { "epoch": 5.34413407821229, "grad_norm": 0.4794973134994507, "learning_rate": 0.0007350700280112045, "loss": 0.4912, "step": 9566 }, { "epoch": 5.344692737430168, "grad_norm": 0.5175204277038574, "learning_rate": 0.0007350420168067227, "loss": 0.5054, "step": 9567 }, { "epoch": 5.345251396648044, "grad_norm": 0.4456702172756195, "learning_rate": 0.000735014005602241, "loss": 0.4262, "step": 9568 }, { "epoch": 5.345810055865922, "grad_norm": 0.6180078387260437, "learning_rate": 0.0007349859943977592, "loss": 0.3775, "step": 9569 }, { "epoch": 5.346368715083799, "grad_norm": 0.972252607345581, "learning_rate": 0.0007349579831932773, "loss": 0.4475, "step": 9570 }, { "epoch": 5.346927374301676, "grad_norm": 0.5381904244422913, "learning_rate": 0.0007349299719887955, "loss": 0.4487, "step": 9571 }, { "epoch": 5.347486033519553, "grad_norm": 0.7485121488571167, "learning_rate": 0.0007349019607843137, "loss": 0.6094, "step": 9572 }, { "epoch": 5.34804469273743, "grad_norm": 0.4415353536605835, "learning_rate": 0.000734873949579832, "loss": 0.5194, "step": 9573 }, { "epoch": 5.348603351955307, "grad_norm": 0.40677085518836975, "learning_rate": 0.0007348459383753502, "loss": 0.3824, "step": 9574 }, { "epoch": 5.349162011173185, "grad_norm": 0.48893308639526367, "learning_rate": 0.0007348179271708684, "loss": 0.4422, "step": 9575 }, { "epoch": 5.349720670391061, "grad_norm": 0.9173416495323181, "learning_rate": 0.0007347899159663865, "loss": 0.4142, "step": 9576 }, { "epoch": 5.350279329608939, "grad_norm": 0.5607465505599976, "learning_rate": 0.0007347619047619047, "loss": 0.5624, "step": 9577 }, { "epoch": 5.350837988826815, "grad_norm": 3.155935764312744, "learning_rate": 0.000734733893557423, "loss": 0.48, "step": 9578 }, { "epoch": 5.351396648044693, "grad_norm": 0.4716293513774872, "learning_rate": 0.0007347058823529412, "loss": 0.4586, "step": 9579 }, { "epoch": 5.351955307262569, "grad_norm": 0.49036017060279846, "learning_rate": 0.0007346778711484595, "loss": 0.514, "step": 9580 }, { "epoch": 5.352513966480447, "grad_norm": 0.5479791760444641, "learning_rate": 0.0007346498599439776, "loss": 0.3693, "step": 9581 }, { "epoch": 5.353072625698324, "grad_norm": 1.1325300931930542, "learning_rate": 0.0007346218487394958, "loss": 0.4994, "step": 9582 }, { "epoch": 5.353631284916201, "grad_norm": 0.40058624744415283, "learning_rate": 0.0007345938375350141, "loss": 0.3965, "step": 9583 }, { "epoch": 5.354189944134078, "grad_norm": 0.5006220936775208, "learning_rate": 0.0007345658263305323, "loss": 0.3884, "step": 9584 }, { "epoch": 5.354748603351955, "grad_norm": 0.4322543144226074, "learning_rate": 0.0007345378151260505, "loss": 0.436, "step": 9585 }, { "epoch": 5.355307262569832, "grad_norm": 0.6249381899833679, "learning_rate": 0.0007345098039215686, "loss": 0.4477, "step": 9586 }, { "epoch": 5.35586592178771, "grad_norm": 2.471202850341797, "learning_rate": 0.0007344817927170868, "loss": 0.5496, "step": 9587 }, { "epoch": 5.356424581005586, "grad_norm": 0.5098752975463867, "learning_rate": 0.0007344537815126051, "loss": 0.4727, "step": 9588 }, { "epoch": 5.356983240223464, "grad_norm": 0.43282684683799744, "learning_rate": 0.0007344257703081233, "loss": 0.4393, "step": 9589 }, { "epoch": 5.35754189944134, "grad_norm": 0.699896514415741, "learning_rate": 0.0007343977591036415, "loss": 0.4678, "step": 9590 }, { "epoch": 5.358100558659218, "grad_norm": 3.43294620513916, "learning_rate": 0.0007343697478991597, "loss": 0.4484, "step": 9591 }, { "epoch": 5.358659217877095, "grad_norm": 0.6883873343467712, "learning_rate": 0.0007343417366946778, "loss": 0.5667, "step": 9592 }, { "epoch": 5.359217877094972, "grad_norm": 0.6168370246887207, "learning_rate": 0.000734313725490196, "loss": 0.5427, "step": 9593 }, { "epoch": 5.359776536312849, "grad_norm": 0.5809407830238342, "learning_rate": 0.0007342857142857143, "loss": 0.4899, "step": 9594 }, { "epoch": 5.360335195530726, "grad_norm": 4.26202917098999, "learning_rate": 0.0007342577030812325, "loss": 0.5517, "step": 9595 }, { "epoch": 5.360893854748603, "grad_norm": 0.45270437002182007, "learning_rate": 0.0007342296918767507, "loss": 0.4688, "step": 9596 }, { "epoch": 5.361452513966481, "grad_norm": 0.46052780747413635, "learning_rate": 0.0007342016806722688, "loss": 0.3735, "step": 9597 }, { "epoch": 5.362011173184357, "grad_norm": 0.8623282313346863, "learning_rate": 0.000734173669467787, "loss": 0.5642, "step": 9598 }, { "epoch": 5.362569832402235, "grad_norm": 0.6653597354888916, "learning_rate": 0.0007341456582633054, "loss": 0.5399, "step": 9599 }, { "epoch": 5.363128491620111, "grad_norm": 0.7668310403823853, "learning_rate": 0.0007341176470588236, "loss": 0.401, "step": 9600 }, { "epoch": 5.363687150837989, "grad_norm": 0.5306023955345154, "learning_rate": 0.0007340896358543418, "loss": 0.3953, "step": 9601 }, { "epoch": 5.364245810055866, "grad_norm": 0.5377447605133057, "learning_rate": 0.0007340616246498599, "loss": 0.5033, "step": 9602 }, { "epoch": 5.364804469273743, "grad_norm": 0.5565674901008606, "learning_rate": 0.0007340336134453781, "loss": 0.4676, "step": 9603 }, { "epoch": 5.36536312849162, "grad_norm": 0.6691960096359253, "learning_rate": 0.0007340056022408964, "loss": 0.3914, "step": 9604 }, { "epoch": 5.365921787709497, "grad_norm": 0.660874605178833, "learning_rate": 0.0007339775910364146, "loss": 0.5166, "step": 9605 }, { "epoch": 5.366480446927374, "grad_norm": 0.6722997426986694, "learning_rate": 0.0007339495798319328, "loss": 0.5726, "step": 9606 }, { "epoch": 5.367039106145251, "grad_norm": 0.5346750617027283, "learning_rate": 0.000733921568627451, "loss": 0.3429, "step": 9607 }, { "epoch": 5.367597765363128, "grad_norm": 0.808154821395874, "learning_rate": 0.0007338935574229691, "loss": 0.4468, "step": 9608 }, { "epoch": 5.368156424581006, "grad_norm": 0.7308852076530457, "learning_rate": 0.0007338655462184874, "loss": 0.4426, "step": 9609 }, { "epoch": 5.368715083798882, "grad_norm": 0.6538249254226685, "learning_rate": 0.0007338375350140056, "loss": 0.4008, "step": 9610 }, { "epoch": 5.36927374301676, "grad_norm": 0.5729008913040161, "learning_rate": 0.0007338095238095238, "loss": 0.4915, "step": 9611 }, { "epoch": 5.369832402234637, "grad_norm": 0.6548587083816528, "learning_rate": 0.000733781512605042, "loss": 0.469, "step": 9612 }, { "epoch": 5.370391061452514, "grad_norm": 0.9366130232810974, "learning_rate": 0.0007337535014005601, "loss": 0.5295, "step": 9613 }, { "epoch": 5.370949720670391, "grad_norm": 0.602325975894928, "learning_rate": 0.0007337254901960785, "loss": 0.3406, "step": 9614 }, { "epoch": 5.371508379888268, "grad_norm": 0.652629017829895, "learning_rate": 0.0007336974789915967, "loss": 0.469, "step": 9615 }, { "epoch": 5.372067039106145, "grad_norm": 0.442274272441864, "learning_rate": 0.0007336694677871149, "loss": 0.4669, "step": 9616 }, { "epoch": 5.372625698324022, "grad_norm": 0.5751197934150696, "learning_rate": 0.0007336414565826331, "loss": 0.4388, "step": 9617 }, { "epoch": 5.373184357541899, "grad_norm": 0.4405896067619324, "learning_rate": 0.0007336134453781512, "loss": 0.3996, "step": 9618 }, { "epoch": 5.373743016759777, "grad_norm": 0.457009494304657, "learning_rate": 0.0007335854341736695, "loss": 0.5106, "step": 9619 }, { "epoch": 5.374301675977653, "grad_norm": 0.4784098267555237, "learning_rate": 0.0007335574229691877, "loss": 0.3808, "step": 9620 }, { "epoch": 5.374860335195531, "grad_norm": 0.6183488368988037, "learning_rate": 0.0007335294117647059, "loss": 0.4381, "step": 9621 }, { "epoch": 5.375418994413407, "grad_norm": 0.5040037035942078, "learning_rate": 0.0007335014005602241, "loss": 0.4397, "step": 9622 }, { "epoch": 5.375977653631285, "grad_norm": 0.6120518445968628, "learning_rate": 0.0007334733893557423, "loss": 0.514, "step": 9623 }, { "epoch": 5.376536312849162, "grad_norm": 1.4700332880020142, "learning_rate": 0.0007334453781512605, "loss": 0.463, "step": 9624 }, { "epoch": 5.377094972067039, "grad_norm": 3.4195826053619385, "learning_rate": 0.0007334173669467787, "loss": 0.461, "step": 9625 }, { "epoch": 5.377653631284916, "grad_norm": 0.5422326326370239, "learning_rate": 0.0007333893557422969, "loss": 0.4584, "step": 9626 }, { "epoch": 5.378212290502793, "grad_norm": 0.48698753118515015, "learning_rate": 0.0007333613445378151, "loss": 0.3529, "step": 9627 }, { "epoch": 5.37877094972067, "grad_norm": 0.7807590365409851, "learning_rate": 0.0007333333333333333, "loss": 0.4786, "step": 9628 }, { "epoch": 5.379329608938548, "grad_norm": 0.4735409915447235, "learning_rate": 0.0007333053221288515, "loss": 0.4722, "step": 9629 }, { "epoch": 5.379888268156424, "grad_norm": 0.5114326477050781, "learning_rate": 0.0007332773109243698, "loss": 0.582, "step": 9630 }, { "epoch": 5.380446927374302, "grad_norm": 1.298922061920166, "learning_rate": 0.000733249299719888, "loss": 0.5337, "step": 9631 }, { "epoch": 5.381005586592178, "grad_norm": 0.39356788992881775, "learning_rate": 0.0007332212885154062, "loss": 0.3435, "step": 9632 }, { "epoch": 5.381564245810056, "grad_norm": 0.60985267162323, "learning_rate": 0.0007331932773109244, "loss": 0.3877, "step": 9633 }, { "epoch": 5.382122905027933, "grad_norm": 0.4025561809539795, "learning_rate": 0.0007331652661064426, "loss": 0.4089, "step": 9634 }, { "epoch": 5.38268156424581, "grad_norm": 0.8815410733222961, "learning_rate": 0.0007331372549019608, "loss": 0.4336, "step": 9635 }, { "epoch": 5.383240223463687, "grad_norm": 0.8768227100372314, "learning_rate": 0.000733109243697479, "loss": 0.4922, "step": 9636 }, { "epoch": 5.383798882681564, "grad_norm": 0.4536188244819641, "learning_rate": 0.0007330812324929972, "loss": 0.4011, "step": 9637 }, { "epoch": 5.384357541899441, "grad_norm": 0.4479326903820038, "learning_rate": 0.0007330532212885154, "loss": 0.5036, "step": 9638 }, { "epoch": 5.384916201117319, "grad_norm": 0.5947161912918091, "learning_rate": 0.0007330252100840337, "loss": 0.4754, "step": 9639 }, { "epoch": 5.385474860335195, "grad_norm": 0.5461640954017639, "learning_rate": 0.0007329971988795518, "loss": 0.5277, "step": 9640 }, { "epoch": 5.386033519553073, "grad_norm": 0.5795295238494873, "learning_rate": 0.00073296918767507, "loss": 0.5474, "step": 9641 }, { "epoch": 5.386592178770949, "grad_norm": 0.6543920636177063, "learning_rate": 0.0007329411764705882, "loss": 0.5214, "step": 9642 }, { "epoch": 5.387150837988827, "grad_norm": 0.4117977023124695, "learning_rate": 0.0007329131652661064, "loss": 0.4334, "step": 9643 }, { "epoch": 5.3877094972067034, "grad_norm": 0.8344374299049377, "learning_rate": 0.0007328851540616247, "loss": 0.5254, "step": 9644 }, { "epoch": 5.388268156424581, "grad_norm": 0.8225786089897156, "learning_rate": 0.0007328571428571428, "loss": 0.3996, "step": 9645 }, { "epoch": 5.388826815642458, "grad_norm": 0.5805999636650085, "learning_rate": 0.000732829131652661, "loss": 0.4765, "step": 9646 }, { "epoch": 5.389385474860335, "grad_norm": 0.9628885984420776, "learning_rate": 0.0007328011204481793, "loss": 0.4203, "step": 9647 }, { "epoch": 5.389944134078212, "grad_norm": 0.3919629752635956, "learning_rate": 0.0007327731092436975, "loss": 0.4132, "step": 9648 }, { "epoch": 5.39050279329609, "grad_norm": 0.4957062005996704, "learning_rate": 0.0007327450980392158, "loss": 0.5428, "step": 9649 }, { "epoch": 5.391061452513966, "grad_norm": 0.5382682681083679, "learning_rate": 0.0007327170868347339, "loss": 0.4927, "step": 9650 }, { "epoch": 5.391620111731844, "grad_norm": 1.3976613283157349, "learning_rate": 0.0007326890756302521, "loss": 0.4873, "step": 9651 }, { "epoch": 5.39217877094972, "grad_norm": 0.39772841334342957, "learning_rate": 0.0007326610644257703, "loss": 0.4222, "step": 9652 }, { "epoch": 5.392737430167598, "grad_norm": 0.4938870370388031, "learning_rate": 0.0007326330532212885, "loss": 0.4397, "step": 9653 }, { "epoch": 5.3932960893854744, "grad_norm": 1.7677993774414062, "learning_rate": 0.0007326050420168068, "loss": 0.4443, "step": 9654 }, { "epoch": 5.393854748603352, "grad_norm": 0.8407166004180908, "learning_rate": 0.000732577030812325, "loss": 0.4433, "step": 9655 }, { "epoch": 5.394413407821229, "grad_norm": 0.4558146595954895, "learning_rate": 0.0007325490196078431, "loss": 0.434, "step": 9656 }, { "epoch": 5.394972067039106, "grad_norm": 0.46504995226860046, "learning_rate": 0.0007325210084033613, "loss": 0.5059, "step": 9657 }, { "epoch": 5.395530726256983, "grad_norm": 0.6100008487701416, "learning_rate": 0.0007324929971988795, "loss": 0.4628, "step": 9658 }, { "epoch": 5.39608938547486, "grad_norm": 6.689543724060059, "learning_rate": 0.0007324649859943978, "loss": 0.5696, "step": 9659 }, { "epoch": 5.396648044692737, "grad_norm": 0.4569566547870636, "learning_rate": 0.000732436974789916, "loss": 0.4606, "step": 9660 }, { "epoch": 5.397206703910615, "grad_norm": 0.46072351932525635, "learning_rate": 0.0007324089635854341, "loss": 0.516, "step": 9661 }, { "epoch": 5.397765363128491, "grad_norm": 1.1802020072937012, "learning_rate": 0.0007323809523809523, "loss": 0.8239, "step": 9662 }, { "epoch": 5.398324022346369, "grad_norm": 1.0785757303237915, "learning_rate": 0.0007323529411764706, "loss": 0.4371, "step": 9663 }, { "epoch": 5.3988826815642454, "grad_norm": 0.5387957692146301, "learning_rate": 0.0007323249299719889, "loss": 0.4216, "step": 9664 }, { "epoch": 5.399441340782123, "grad_norm": 0.9764959216117859, "learning_rate": 0.0007322969187675071, "loss": 0.5584, "step": 9665 }, { "epoch": 5.4, "grad_norm": 0.49934449791908264, "learning_rate": 0.0007322689075630252, "loss": 0.3339, "step": 9666 }, { "epoch": 5.400558659217877, "grad_norm": 0.46985989809036255, "learning_rate": 0.0007322408963585434, "loss": 0.4857, "step": 9667 }, { "epoch": 5.401117318435754, "grad_norm": 0.4268762469291687, "learning_rate": 0.0007322128851540616, "loss": 0.4363, "step": 9668 }, { "epoch": 5.401675977653631, "grad_norm": 0.6550259590148926, "learning_rate": 0.0007321848739495799, "loss": 0.4064, "step": 9669 }, { "epoch": 5.402234636871508, "grad_norm": 0.5340927243232727, "learning_rate": 0.0007321568627450981, "loss": 0.4919, "step": 9670 }, { "epoch": 5.402793296089386, "grad_norm": 0.48531490564346313, "learning_rate": 0.0007321288515406163, "loss": 0.4698, "step": 9671 }, { "epoch": 5.403351955307262, "grad_norm": 0.6543476581573486, "learning_rate": 0.0007321008403361344, "loss": 0.6542, "step": 9672 }, { "epoch": 5.40391061452514, "grad_norm": 0.7351266741752625, "learning_rate": 0.0007320728291316526, "loss": 0.4447, "step": 9673 }, { "epoch": 5.4044692737430164, "grad_norm": 0.3648409843444824, "learning_rate": 0.0007320448179271709, "loss": 0.3731, "step": 9674 }, { "epoch": 5.405027932960894, "grad_norm": 0.6719562411308289, "learning_rate": 0.0007320168067226891, "loss": 0.7884, "step": 9675 }, { "epoch": 5.405586592178771, "grad_norm": 1.019452452659607, "learning_rate": 0.0007319887955182073, "loss": 0.53, "step": 9676 }, { "epoch": 5.406145251396648, "grad_norm": 0.6058816313743591, "learning_rate": 0.0007319607843137254, "loss": 0.6994, "step": 9677 }, { "epoch": 5.406703910614525, "grad_norm": 0.6727103590965271, "learning_rate": 0.0007319327731092436, "loss": 0.5242, "step": 9678 }, { "epoch": 5.407262569832402, "grad_norm": 0.5533923506736755, "learning_rate": 0.000731904761904762, "loss": 0.4036, "step": 9679 }, { "epoch": 5.407821229050279, "grad_norm": 1.0296107530593872, "learning_rate": 0.0007318767507002802, "loss": 0.4419, "step": 9680 }, { "epoch": 5.408379888268156, "grad_norm": 0.42621302604675293, "learning_rate": 0.0007318487394957984, "loss": 0.3606, "step": 9681 }, { "epoch": 5.408938547486033, "grad_norm": 0.4223034381866455, "learning_rate": 0.0007318207282913165, "loss": 0.449, "step": 9682 }, { "epoch": 5.409497206703911, "grad_norm": 0.7462381720542908, "learning_rate": 0.0007317927170868347, "loss": 0.5363, "step": 9683 }, { "epoch": 5.410055865921787, "grad_norm": 0.58668452501297, "learning_rate": 0.000731764705882353, "loss": 0.4428, "step": 9684 }, { "epoch": 5.410614525139665, "grad_norm": 0.5859023332595825, "learning_rate": 0.0007317366946778712, "loss": 0.4257, "step": 9685 }, { "epoch": 5.411173184357542, "grad_norm": 0.5037703514099121, "learning_rate": 0.0007317086834733894, "loss": 0.4004, "step": 9686 }, { "epoch": 5.411731843575419, "grad_norm": 0.3915973901748657, "learning_rate": 0.0007316806722689076, "loss": 0.4615, "step": 9687 }, { "epoch": 5.412290502793296, "grad_norm": 1.3105286359786987, "learning_rate": 0.0007316526610644257, "loss": 0.5061, "step": 9688 }, { "epoch": 5.412849162011173, "grad_norm": 0.759484589099884, "learning_rate": 0.000731624649859944, "loss": 0.4783, "step": 9689 }, { "epoch": 5.41340782122905, "grad_norm": 0.4818784296512604, "learning_rate": 0.0007315966386554622, "loss": 0.4281, "step": 9690 }, { "epoch": 5.413966480446927, "grad_norm": 0.5857297778129578, "learning_rate": 0.0007315686274509804, "loss": 0.3879, "step": 9691 }, { "epoch": 5.414525139664804, "grad_norm": 0.4561799168586731, "learning_rate": 0.0007315406162464986, "loss": 0.4391, "step": 9692 }, { "epoch": 5.415083798882682, "grad_norm": 0.5454745888710022, "learning_rate": 0.0007315126050420167, "loss": 0.369, "step": 9693 }, { "epoch": 5.415642458100558, "grad_norm": 0.4950173497200012, "learning_rate": 0.000731484593837535, "loss": 0.4246, "step": 9694 }, { "epoch": 5.416201117318436, "grad_norm": 0.6390392184257507, "learning_rate": 0.0007314565826330533, "loss": 0.5974, "step": 9695 }, { "epoch": 5.4167597765363125, "grad_norm": 3.024045944213867, "learning_rate": 0.0007314285714285715, "loss": 0.4589, "step": 9696 }, { "epoch": 5.41731843575419, "grad_norm": 0.5367445945739746, "learning_rate": 0.0007314005602240897, "loss": 0.3821, "step": 9697 }, { "epoch": 5.417877094972067, "grad_norm": 0.599183976650238, "learning_rate": 0.0007313725490196078, "loss": 0.3664, "step": 9698 }, { "epoch": 5.418435754189944, "grad_norm": 0.6512631773948669, "learning_rate": 0.0007313445378151261, "loss": 0.4725, "step": 9699 }, { "epoch": 5.418994413407821, "grad_norm": 3.2039411067962646, "learning_rate": 0.0007313165266106443, "loss": 0.4358, "step": 9700 }, { "epoch": 5.419553072625698, "grad_norm": 0.4627593755722046, "learning_rate": 0.0007312885154061625, "loss": 0.4605, "step": 9701 }, { "epoch": 5.420111731843575, "grad_norm": 0.5062171220779419, "learning_rate": 0.0007312605042016807, "loss": 0.5212, "step": 9702 }, { "epoch": 5.420670391061453, "grad_norm": 0.43370017409324646, "learning_rate": 0.0007312324929971989, "loss": 0.3959, "step": 9703 }, { "epoch": 5.421229050279329, "grad_norm": 2.050886392593384, "learning_rate": 0.0007312044817927171, "loss": 0.476, "step": 9704 }, { "epoch": 5.421787709497207, "grad_norm": 0.8578056693077087, "learning_rate": 0.0007311764705882353, "loss": 0.4784, "step": 9705 }, { "epoch": 5.4223463687150835, "grad_norm": 29.487979888916016, "learning_rate": 0.0007311484593837535, "loss": 0.4053, "step": 9706 }, { "epoch": 5.422905027932961, "grad_norm": 0.6425113677978516, "learning_rate": 0.0007311204481792717, "loss": 0.5146, "step": 9707 }, { "epoch": 5.423463687150838, "grad_norm": 0.5732616782188416, "learning_rate": 0.0007310924369747899, "loss": 0.5239, "step": 9708 }, { "epoch": 5.424022346368715, "grad_norm": 0.46527257561683655, "learning_rate": 0.0007310644257703081, "loss": 0.4067, "step": 9709 }, { "epoch": 5.424581005586592, "grad_norm": 0.8030074238777161, "learning_rate": 0.0007310364145658263, "loss": 0.4974, "step": 9710 }, { "epoch": 5.425139664804469, "grad_norm": 0.48365288972854614, "learning_rate": 0.0007310084033613445, "loss": 0.4305, "step": 9711 }, { "epoch": 5.425698324022346, "grad_norm": 0.468726247549057, "learning_rate": 0.0007309803921568628, "loss": 0.4297, "step": 9712 }, { "epoch": 5.426256983240224, "grad_norm": 0.6161282062530518, "learning_rate": 0.000730952380952381, "loss": 0.4664, "step": 9713 }, { "epoch": 5.4268156424581, "grad_norm": 0.6594555377960205, "learning_rate": 0.0007309243697478993, "loss": 0.5754, "step": 9714 }, { "epoch": 5.427374301675978, "grad_norm": 0.4305954575538635, "learning_rate": 0.0007308963585434174, "loss": 0.3506, "step": 9715 }, { "epoch": 5.4279329608938545, "grad_norm": 0.4502977728843689, "learning_rate": 0.0007308683473389356, "loss": 0.4304, "step": 9716 }, { "epoch": 5.428491620111732, "grad_norm": 0.502530038356781, "learning_rate": 0.0007308403361344538, "loss": 0.48, "step": 9717 }, { "epoch": 5.4290502793296085, "grad_norm": 0.5808756947517395, "learning_rate": 0.000730812324929972, "loss": 0.5729, "step": 9718 }, { "epoch": 5.429608938547486, "grad_norm": 3.9587249755859375, "learning_rate": 0.0007307843137254903, "loss": 0.508, "step": 9719 }, { "epoch": 5.430167597765363, "grad_norm": 0.4714196026325226, "learning_rate": 0.0007307563025210084, "loss": 0.434, "step": 9720 }, { "epoch": 5.43072625698324, "grad_norm": 0.6579060554504395, "learning_rate": 0.0007307282913165266, "loss": 0.3937, "step": 9721 }, { "epoch": 5.431284916201117, "grad_norm": 0.5075948238372803, "learning_rate": 0.0007307002801120448, "loss": 0.4437, "step": 9722 }, { "epoch": 5.431843575418995, "grad_norm": 0.5919167995452881, "learning_rate": 0.000730672268907563, "loss": 0.4974, "step": 9723 }, { "epoch": 5.432402234636871, "grad_norm": 0.6856719851493835, "learning_rate": 0.0007306442577030813, "loss": 0.4381, "step": 9724 }, { "epoch": 5.432960893854749, "grad_norm": 0.4521169066429138, "learning_rate": 0.0007306162464985994, "loss": 0.3568, "step": 9725 }, { "epoch": 5.4335195530726255, "grad_norm": 0.6040863394737244, "learning_rate": 0.0007305882352941176, "loss": 0.6364, "step": 9726 }, { "epoch": 5.434078212290503, "grad_norm": 0.47824618220329285, "learning_rate": 0.0007305602240896358, "loss": 0.3731, "step": 9727 }, { "epoch": 5.4346368715083795, "grad_norm": 0.6427138447761536, "learning_rate": 0.000730532212885154, "loss": 0.4161, "step": 9728 }, { "epoch": 5.435195530726257, "grad_norm": 0.42427805066108704, "learning_rate": 0.0007305042016806724, "loss": 0.3788, "step": 9729 }, { "epoch": 5.435754189944134, "grad_norm": 0.890143871307373, "learning_rate": 0.0007304761904761906, "loss": 0.3498, "step": 9730 }, { "epoch": 5.436312849162011, "grad_norm": 0.7862064838409424, "learning_rate": 0.0007304481792717087, "loss": 0.6898, "step": 9731 }, { "epoch": 5.436871508379888, "grad_norm": 0.5681707262992859, "learning_rate": 0.0007304201680672269, "loss": 0.4026, "step": 9732 }, { "epoch": 5.437430167597765, "grad_norm": 0.7881913781166077, "learning_rate": 0.0007303921568627451, "loss": 0.5982, "step": 9733 }, { "epoch": 5.437988826815642, "grad_norm": 0.4226723611354828, "learning_rate": 0.0007303641456582634, "loss": 0.5315, "step": 9734 }, { "epoch": 5.43854748603352, "grad_norm": 0.5856690406799316, "learning_rate": 0.0007303361344537816, "loss": 0.5702, "step": 9735 }, { "epoch": 5.4391061452513965, "grad_norm": 0.962823212146759, "learning_rate": 0.0007303081232492997, "loss": 0.4158, "step": 9736 }, { "epoch": 5.439664804469274, "grad_norm": 0.4471222162246704, "learning_rate": 0.0007302801120448179, "loss": 0.437, "step": 9737 }, { "epoch": 5.4402234636871505, "grad_norm": 0.592616856098175, "learning_rate": 0.0007302521008403361, "loss": 0.3787, "step": 9738 }, { "epoch": 5.440782122905028, "grad_norm": 0.9380432963371277, "learning_rate": 0.0007302240896358544, "loss": 0.4581, "step": 9739 }, { "epoch": 5.441340782122905, "grad_norm": 0.5861191153526306, "learning_rate": 0.0007301960784313726, "loss": 0.5485, "step": 9740 }, { "epoch": 5.441899441340782, "grad_norm": 0.46296703815460205, "learning_rate": 0.0007301680672268907, "loss": 0.3949, "step": 9741 }, { "epoch": 5.442458100558659, "grad_norm": 0.5828236937522888, "learning_rate": 0.0007301400560224089, "loss": 0.5779, "step": 9742 }, { "epoch": 5.443016759776536, "grad_norm": 0.43828508257865906, "learning_rate": 0.0007301120448179271, "loss": 0.4102, "step": 9743 }, { "epoch": 5.443575418994413, "grad_norm": 0.7192258238792419, "learning_rate": 0.0007300840336134455, "loss": 0.4313, "step": 9744 }, { "epoch": 5.444134078212291, "grad_norm": 0.39279791712760925, "learning_rate": 0.0007300560224089637, "loss": 0.3728, "step": 9745 }, { "epoch": 5.4446927374301675, "grad_norm": 0.5375004410743713, "learning_rate": 0.0007300280112044819, "loss": 0.4513, "step": 9746 }, { "epoch": 5.445251396648045, "grad_norm": 0.5508938431739807, "learning_rate": 0.00073, "loss": 0.4021, "step": 9747 }, { "epoch": 5.4458100558659215, "grad_norm": 0.9927380084991455, "learning_rate": 0.0007299719887955182, "loss": 0.4315, "step": 9748 }, { "epoch": 5.446368715083799, "grad_norm": 0.5101918578147888, "learning_rate": 0.0007299439775910365, "loss": 0.3883, "step": 9749 }, { "epoch": 5.446927374301676, "grad_norm": 0.5668933987617493, "learning_rate": 0.0007299159663865547, "loss": 0.5092, "step": 9750 }, { "epoch": 5.447486033519553, "grad_norm": 1.411636471748352, "learning_rate": 0.0007298879551820729, "loss": 0.4665, "step": 9751 }, { "epoch": 5.44804469273743, "grad_norm": 0.6135072708129883, "learning_rate": 0.000729859943977591, "loss": 0.4248, "step": 9752 }, { "epoch": 5.448603351955307, "grad_norm": 0.5498002767562866, "learning_rate": 0.0007298319327731092, "loss": 0.4838, "step": 9753 }, { "epoch": 5.449162011173184, "grad_norm": 1.2738165855407715, "learning_rate": 0.0007298039215686275, "loss": 0.516, "step": 9754 }, { "epoch": 5.449720670391061, "grad_norm": 0.48241597414016724, "learning_rate": 0.0007297759103641457, "loss": 0.3739, "step": 9755 }, { "epoch": 5.4502793296089385, "grad_norm": 1.0763256549835205, "learning_rate": 0.0007297478991596639, "loss": 0.5358, "step": 9756 }, { "epoch": 5.450837988826816, "grad_norm": 0.4620930552482605, "learning_rate": 0.000729719887955182, "loss": 0.4688, "step": 9757 }, { "epoch": 5.4513966480446925, "grad_norm": 0.7245018482208252, "learning_rate": 0.0007296918767507002, "loss": 0.5372, "step": 9758 }, { "epoch": 5.45195530726257, "grad_norm": 0.7951422333717346, "learning_rate": 0.0007296638655462185, "loss": 0.4482, "step": 9759 }, { "epoch": 5.452513966480447, "grad_norm": 0.67264324426651, "learning_rate": 0.0007296358543417367, "loss": 0.5326, "step": 9760 }, { "epoch": 5.453072625698324, "grad_norm": 0.4882766604423523, "learning_rate": 0.000729607843137255, "loss": 0.37, "step": 9761 }, { "epoch": 5.453631284916201, "grad_norm": 0.5028199553489685, "learning_rate": 0.0007295798319327732, "loss": 0.4846, "step": 9762 }, { "epoch": 5.454189944134078, "grad_norm": 0.5273077487945557, "learning_rate": 0.0007295518207282913, "loss": 0.4144, "step": 9763 }, { "epoch": 5.454748603351955, "grad_norm": 0.5949251651763916, "learning_rate": 0.0007295238095238096, "loss": 0.5072, "step": 9764 }, { "epoch": 5.455307262569832, "grad_norm": 0.9257952570915222, "learning_rate": 0.0007294957983193278, "loss": 0.4774, "step": 9765 }, { "epoch": 5.4558659217877095, "grad_norm": 0.876560628414154, "learning_rate": 0.000729467787114846, "loss": 0.5045, "step": 9766 }, { "epoch": 5.456424581005587, "grad_norm": 0.47527071833610535, "learning_rate": 0.0007294397759103642, "loss": 0.5013, "step": 9767 }, { "epoch": 5.4569832402234635, "grad_norm": 0.5068494081497192, "learning_rate": 0.0007294117647058823, "loss": 0.4553, "step": 9768 }, { "epoch": 5.457541899441341, "grad_norm": 1.2847996950149536, "learning_rate": 0.0007293837535014006, "loss": 0.4806, "step": 9769 }, { "epoch": 5.4581005586592175, "grad_norm": 0.5115010142326355, "learning_rate": 0.0007293557422969188, "loss": 0.4574, "step": 9770 }, { "epoch": 5.458659217877095, "grad_norm": 0.7836250066757202, "learning_rate": 0.000729327731092437, "loss": 0.4988, "step": 9771 }, { "epoch": 5.459217877094972, "grad_norm": 0.4542462229728699, "learning_rate": 0.0007292997198879552, "loss": 0.481, "step": 9772 }, { "epoch": 5.459776536312849, "grad_norm": 0.5937549471855164, "learning_rate": 0.0007292717086834733, "loss": 0.4334, "step": 9773 }, { "epoch": 5.460335195530726, "grad_norm": 1.0559362173080444, "learning_rate": 0.0007292436974789916, "loss": 0.5298, "step": 9774 }, { "epoch": 5.460893854748603, "grad_norm": 0.48945626616477966, "learning_rate": 0.0007292156862745098, "loss": 0.4505, "step": 9775 }, { "epoch": 5.4614525139664805, "grad_norm": 0.6664998531341553, "learning_rate": 0.000729187675070028, "loss": 0.4166, "step": 9776 }, { "epoch": 5.462011173184358, "grad_norm": 0.7264735698699951, "learning_rate": 0.0007291596638655463, "loss": 0.4789, "step": 9777 }, { "epoch": 5.4625698324022345, "grad_norm": 0.596433162689209, "learning_rate": 0.0007291316526610645, "loss": 0.4553, "step": 9778 }, { "epoch": 5.463128491620112, "grad_norm": 0.4596468210220337, "learning_rate": 0.0007291036414565827, "loss": 0.4302, "step": 9779 }, { "epoch": 5.4636871508379885, "grad_norm": 0.5624420642852783, "learning_rate": 0.0007290756302521009, "loss": 0.5445, "step": 9780 }, { "epoch": 5.464245810055866, "grad_norm": 1.8031001091003418, "learning_rate": 0.0007290476190476191, "loss": 0.4658, "step": 9781 }, { "epoch": 5.464804469273743, "grad_norm": 0.5349305272102356, "learning_rate": 0.0007290196078431373, "loss": 0.5365, "step": 9782 }, { "epoch": 5.46536312849162, "grad_norm": 0.6741350293159485, "learning_rate": 0.0007289915966386555, "loss": 0.5111, "step": 9783 }, { "epoch": 5.465921787709497, "grad_norm": 0.5121055841445923, "learning_rate": 0.0007289635854341737, "loss": 0.4619, "step": 9784 }, { "epoch": 5.466480446927374, "grad_norm": 0.4975737929344177, "learning_rate": 0.0007289355742296919, "loss": 0.4467, "step": 9785 }, { "epoch": 5.4670391061452515, "grad_norm": 0.5289967656135559, "learning_rate": 0.0007289075630252101, "loss": 0.5125, "step": 9786 }, { "epoch": 5.467597765363129, "grad_norm": 0.4235764145851135, "learning_rate": 0.0007288795518207283, "loss": 0.4657, "step": 9787 }, { "epoch": 5.4681564245810055, "grad_norm": 0.697348415851593, "learning_rate": 0.0007288515406162465, "loss": 0.4799, "step": 9788 }, { "epoch": 5.468715083798883, "grad_norm": 0.5696758031845093, "learning_rate": 0.0007288235294117647, "loss": 0.3823, "step": 9789 }, { "epoch": 5.4692737430167595, "grad_norm": 0.5714661478996277, "learning_rate": 0.0007287955182072829, "loss": 0.3269, "step": 9790 }, { "epoch": 5.469832402234637, "grad_norm": 0.8170825242996216, "learning_rate": 0.0007287675070028011, "loss": 0.4135, "step": 9791 }, { "epoch": 5.4703910614525135, "grad_norm": 0.49487197399139404, "learning_rate": 0.0007287394957983193, "loss": 0.53, "step": 9792 }, { "epoch": 5.470949720670391, "grad_norm": 0.7495507001876831, "learning_rate": 0.0007287114845938375, "loss": 0.4666, "step": 9793 }, { "epoch": 5.471508379888268, "grad_norm": 0.5392534136772156, "learning_rate": 0.0007286834733893559, "loss": 0.3898, "step": 9794 }, { "epoch": 5.472067039106145, "grad_norm": 1.5419838428497314, "learning_rate": 0.000728655462184874, "loss": 0.4037, "step": 9795 }, { "epoch": 5.4726256983240225, "grad_norm": 0.36275818943977356, "learning_rate": 0.0007286274509803922, "loss": 0.3731, "step": 9796 }, { "epoch": 5.473184357541899, "grad_norm": 0.7083741426467896, "learning_rate": 0.0007285994397759104, "loss": 0.6887, "step": 9797 }, { "epoch": 5.4737430167597765, "grad_norm": 1.5125281810760498, "learning_rate": 0.0007285714285714286, "loss": 0.6228, "step": 9798 }, { "epoch": 5.474301675977654, "grad_norm": 2.1880712509155273, "learning_rate": 0.0007285434173669469, "loss": 0.4433, "step": 9799 }, { "epoch": 5.4748603351955305, "grad_norm": 0.7548524141311646, "learning_rate": 0.000728515406162465, "loss": 0.5692, "step": 9800 }, { "epoch": 5.475418994413408, "grad_norm": 0.6337263584136963, "learning_rate": 0.0007284873949579832, "loss": 0.4067, "step": 9801 }, { "epoch": 5.4759776536312845, "grad_norm": 0.7854358553886414, "learning_rate": 0.0007284593837535014, "loss": 0.5491, "step": 9802 }, { "epoch": 5.476536312849162, "grad_norm": 0.5236221551895142, "learning_rate": 0.0007284313725490196, "loss": 0.4444, "step": 9803 }, { "epoch": 5.477094972067039, "grad_norm": 3.851414680480957, "learning_rate": 0.0007284033613445379, "loss": 0.705, "step": 9804 }, { "epoch": 5.477653631284916, "grad_norm": 0.47031188011169434, "learning_rate": 0.000728375350140056, "loss": 0.5239, "step": 9805 }, { "epoch": 5.4782122905027935, "grad_norm": 0.4021458923816681, "learning_rate": 0.0007283473389355742, "loss": 0.4387, "step": 9806 }, { "epoch": 5.47877094972067, "grad_norm": 0.5655771493911743, "learning_rate": 0.0007283193277310924, "loss": 0.3806, "step": 9807 }, { "epoch": 5.4793296089385475, "grad_norm": 0.5272669196128845, "learning_rate": 0.0007282913165266106, "loss": 0.5229, "step": 9808 }, { "epoch": 5.479888268156425, "grad_norm": 0.5111368298530579, "learning_rate": 0.000728263305322129, "loss": 0.367, "step": 9809 }, { "epoch": 5.4804469273743015, "grad_norm": 0.5307624936103821, "learning_rate": 0.0007282352941176472, "loss": 0.425, "step": 9810 }, { "epoch": 5.481005586592179, "grad_norm": 0.48857417702674866, "learning_rate": 0.0007282072829131653, "loss": 0.3748, "step": 9811 }, { "epoch": 5.4815642458100555, "grad_norm": 0.4147144854068756, "learning_rate": 0.0007281792717086835, "loss": 0.4621, "step": 9812 }, { "epoch": 5.482122905027933, "grad_norm": 0.5259796380996704, "learning_rate": 0.0007281512605042017, "loss": 0.3953, "step": 9813 }, { "epoch": 5.48268156424581, "grad_norm": 0.829331636428833, "learning_rate": 0.0007281232492997199, "loss": 0.3615, "step": 9814 }, { "epoch": 5.483240223463687, "grad_norm": 0.6177496910095215, "learning_rate": 0.0007280952380952382, "loss": 0.4471, "step": 9815 }, { "epoch": 5.4837988826815645, "grad_norm": 0.5386406183242798, "learning_rate": 0.0007280672268907563, "loss": 0.3801, "step": 9816 }, { "epoch": 5.484357541899441, "grad_norm": 0.48177188634872437, "learning_rate": 0.0007280392156862745, "loss": 0.4954, "step": 9817 }, { "epoch": 5.4849162011173185, "grad_norm": 0.6160786747932434, "learning_rate": 0.0007280112044817927, "loss": 0.4273, "step": 9818 }, { "epoch": 5.485474860335196, "grad_norm": 0.42306891083717346, "learning_rate": 0.0007279831932773109, "loss": 0.3996, "step": 9819 }, { "epoch": 5.4860335195530725, "grad_norm": 0.6724539399147034, "learning_rate": 0.0007279551820728292, "loss": 0.4493, "step": 9820 }, { "epoch": 5.48659217877095, "grad_norm": 2.4731643199920654, "learning_rate": 0.0007279271708683473, "loss": 0.431, "step": 9821 }, { "epoch": 5.4871508379888265, "grad_norm": 0.6919944286346436, "learning_rate": 0.0007278991596638655, "loss": 0.5869, "step": 9822 }, { "epoch": 5.487709497206704, "grad_norm": 0.5119506120681763, "learning_rate": 0.0007278711484593837, "loss": 0.5099, "step": 9823 }, { "epoch": 5.488268156424581, "grad_norm": 0.5532276034355164, "learning_rate": 0.0007278431372549019, "loss": 0.4434, "step": 9824 }, { "epoch": 5.488826815642458, "grad_norm": 1.0022008419036865, "learning_rate": 0.0007278151260504202, "loss": 0.3452, "step": 9825 }, { "epoch": 5.4893854748603355, "grad_norm": 0.8269253373146057, "learning_rate": 0.0007277871148459385, "loss": 0.4684, "step": 9826 }, { "epoch": 5.489944134078212, "grad_norm": 0.4292662441730499, "learning_rate": 0.0007277591036414566, "loss": 0.4355, "step": 9827 }, { "epoch": 5.4905027932960895, "grad_norm": 1.08241605758667, "learning_rate": 0.0007277310924369748, "loss": 0.3902, "step": 9828 }, { "epoch": 5.491061452513966, "grad_norm": 1.653778076171875, "learning_rate": 0.000727703081232493, "loss": 0.4446, "step": 9829 }, { "epoch": 5.4916201117318435, "grad_norm": 2.0861117839813232, "learning_rate": 0.0007276750700280113, "loss": 0.4145, "step": 9830 }, { "epoch": 5.492178770949721, "grad_norm": 0.6282828450202942, "learning_rate": 0.0007276470588235295, "loss": 0.4928, "step": 9831 }, { "epoch": 5.4927374301675975, "grad_norm": 0.4635153114795685, "learning_rate": 0.0007276190476190476, "loss": 0.4133, "step": 9832 }, { "epoch": 5.493296089385475, "grad_norm": 0.698635995388031, "learning_rate": 0.0007275910364145658, "loss": 0.461, "step": 9833 }, { "epoch": 5.4938547486033515, "grad_norm": 0.3883945047855377, "learning_rate": 0.000727563025210084, "loss": 0.3465, "step": 9834 }, { "epoch": 5.494413407821229, "grad_norm": 0.5274906158447266, "learning_rate": 0.0007275350140056023, "loss": 0.5029, "step": 9835 }, { "epoch": 5.4949720670391065, "grad_norm": 0.5786053538322449, "learning_rate": 0.0007275070028011205, "loss": 0.5087, "step": 9836 }, { "epoch": 5.495530726256983, "grad_norm": 0.49566227197647095, "learning_rate": 0.0007274789915966386, "loss": 0.3724, "step": 9837 }, { "epoch": 5.4960893854748605, "grad_norm": 4.963715076446533, "learning_rate": 0.0007274509803921568, "loss": 0.481, "step": 9838 }, { "epoch": 5.496648044692737, "grad_norm": 0.6923301815986633, "learning_rate": 0.000727422969187675, "loss": 0.4789, "step": 9839 }, { "epoch": 5.4972067039106145, "grad_norm": 0.5754944086074829, "learning_rate": 0.0007273949579831933, "loss": 0.4784, "step": 9840 }, { "epoch": 5.497765363128492, "grad_norm": 0.6470469832420349, "learning_rate": 0.0007273669467787115, "loss": 0.3967, "step": 9841 }, { "epoch": 5.4983240223463685, "grad_norm": 0.5435767769813538, "learning_rate": 0.0007273389355742297, "loss": 0.4058, "step": 9842 }, { "epoch": 5.498882681564246, "grad_norm": 0.646068274974823, "learning_rate": 0.0007273109243697478, "loss": 0.4498, "step": 9843 }, { "epoch": 5.4994413407821225, "grad_norm": 0.5059846639633179, "learning_rate": 0.000727282913165266, "loss": 0.3569, "step": 9844 }, { "epoch": 5.5, "grad_norm": 0.5060825943946838, "learning_rate": 0.0007272549019607844, "loss": 0.3963, "step": 9845 }, { "epoch": 5.5005586592178775, "grad_norm": 0.5156931281089783, "learning_rate": 0.0007272268907563026, "loss": 0.4709, "step": 9846 }, { "epoch": 5.501117318435754, "grad_norm": 0.4591083228588104, "learning_rate": 0.0007271988795518208, "loss": 0.3489, "step": 9847 }, { "epoch": 5.5016759776536315, "grad_norm": 0.6754613518714905, "learning_rate": 0.0007271708683473389, "loss": 0.5642, "step": 9848 }, { "epoch": 5.502234636871508, "grad_norm": 0.6588128209114075, "learning_rate": 0.0007271428571428571, "loss": 0.4926, "step": 9849 }, { "epoch": 5.5027932960893855, "grad_norm": 0.5223484039306641, "learning_rate": 0.0007271148459383754, "loss": 0.4975, "step": 9850 }, { "epoch": 5.503351955307263, "grad_norm": 0.9048058986663818, "learning_rate": 0.0007270868347338936, "loss": 0.4119, "step": 9851 }, { "epoch": 5.5039106145251395, "grad_norm": 0.5210031867027283, "learning_rate": 0.0007270588235294118, "loss": 0.4993, "step": 9852 }, { "epoch": 5.504469273743017, "grad_norm": 0.5703561305999756, "learning_rate": 0.0007270308123249299, "loss": 0.4657, "step": 9853 }, { "epoch": 5.5050279329608935, "grad_norm": 0.3826907277107239, "learning_rate": 0.0007270028011204481, "loss": 0.3661, "step": 9854 }, { "epoch": 5.505586592178771, "grad_norm": 0.5331421494483948, "learning_rate": 0.0007269747899159664, "loss": 0.528, "step": 9855 }, { "epoch": 5.506145251396648, "grad_norm": 0.9331299066543579, "learning_rate": 0.0007269467787114846, "loss": 0.4596, "step": 9856 }, { "epoch": 5.506703910614525, "grad_norm": 0.7615708708763123, "learning_rate": 0.0007269187675070028, "loss": 0.4064, "step": 9857 }, { "epoch": 5.5072625698324025, "grad_norm": 1.6598408222198486, "learning_rate": 0.000726890756302521, "loss": 0.3933, "step": 9858 }, { "epoch": 5.507821229050279, "grad_norm": 0.45569321513175964, "learning_rate": 0.0007268627450980391, "loss": 0.4479, "step": 9859 }, { "epoch": 5.5083798882681565, "grad_norm": 1.6721045970916748, "learning_rate": 0.0007268347338935575, "loss": 0.5206, "step": 9860 }, { "epoch": 5.508938547486034, "grad_norm": 0.4890991747379303, "learning_rate": 0.0007268067226890757, "loss": 0.3752, "step": 9861 }, { "epoch": 5.5094972067039105, "grad_norm": 0.7073611617088318, "learning_rate": 0.0007267787114845939, "loss": 0.5763, "step": 9862 }, { "epoch": 5.510055865921788, "grad_norm": 0.4285040497779846, "learning_rate": 0.0007267507002801121, "loss": 0.3729, "step": 9863 }, { "epoch": 5.5106145251396645, "grad_norm": 0.6984803080558777, "learning_rate": 0.0007267226890756302, "loss": 0.4455, "step": 9864 }, { "epoch": 5.511173184357542, "grad_norm": 0.6127260327339172, "learning_rate": 0.0007266946778711485, "loss": 0.3976, "step": 9865 }, { "epoch": 5.511731843575419, "grad_norm": 0.5203281044960022, "learning_rate": 0.0007266666666666667, "loss": 0.4531, "step": 9866 }, { "epoch": 5.512290502793296, "grad_norm": 0.5179316401481628, "learning_rate": 0.0007266386554621849, "loss": 0.4745, "step": 9867 }, { "epoch": 5.5128491620111735, "grad_norm": 0.5677104592323303, "learning_rate": 0.0007266106442577031, "loss": 0.4281, "step": 9868 }, { "epoch": 5.51340782122905, "grad_norm": 11.250227928161621, "learning_rate": 0.0007265826330532212, "loss": 0.4614, "step": 9869 }, { "epoch": 5.5139664804469275, "grad_norm": 0.6089310050010681, "learning_rate": 0.0007265546218487395, "loss": 0.4924, "step": 9870 }, { "epoch": 5.514525139664805, "grad_norm": 0.38104772567749023, "learning_rate": 0.0007265266106442577, "loss": 0.3731, "step": 9871 }, { "epoch": 5.5150837988826815, "grad_norm": 0.4221259653568268, "learning_rate": 0.0007264985994397759, "loss": 0.3629, "step": 9872 }, { "epoch": 5.515642458100559, "grad_norm": 0.6919800043106079, "learning_rate": 0.0007264705882352941, "loss": 0.4592, "step": 9873 }, { "epoch": 5.5162011173184355, "grad_norm": 5.412047863006592, "learning_rate": 0.0007264425770308123, "loss": 0.4079, "step": 9874 }, { "epoch": 5.516759776536313, "grad_norm": 18.767086029052734, "learning_rate": 0.0007264145658263305, "loss": 0.4061, "step": 9875 }, { "epoch": 5.51731843575419, "grad_norm": 0.4553179442882538, "learning_rate": 0.0007263865546218488, "loss": 0.4817, "step": 9876 }, { "epoch": 5.517877094972067, "grad_norm": 0.5219476819038391, "learning_rate": 0.000726358543417367, "loss": 0.5107, "step": 9877 }, { "epoch": 5.5184357541899445, "grad_norm": 0.5047419667243958, "learning_rate": 0.0007263305322128852, "loss": 0.4855, "step": 9878 }, { "epoch": 5.518994413407821, "grad_norm": 0.4721706211566925, "learning_rate": 0.0007263025210084034, "loss": 0.4413, "step": 9879 }, { "epoch": 5.5195530726256985, "grad_norm": 0.6387419700622559, "learning_rate": 0.0007262745098039216, "loss": 0.463, "step": 9880 }, { "epoch": 5.520111731843575, "grad_norm": 1.0378801822662354, "learning_rate": 0.0007262464985994398, "loss": 0.3899, "step": 9881 }, { "epoch": 5.5206703910614525, "grad_norm": 0.587094783782959, "learning_rate": 0.000726218487394958, "loss": 0.5024, "step": 9882 }, { "epoch": 5.52122905027933, "grad_norm": 0.4050324857234955, "learning_rate": 0.0007261904761904762, "loss": 0.408, "step": 9883 }, { "epoch": 5.5217877094972065, "grad_norm": 0.8569478392601013, "learning_rate": 0.0007261624649859944, "loss": 0.4879, "step": 9884 }, { "epoch": 5.522346368715084, "grad_norm": 0.401920884847641, "learning_rate": 0.0007261344537815126, "loss": 0.4856, "step": 9885 }, { "epoch": 5.522905027932961, "grad_norm": 0.39946064352989197, "learning_rate": 0.0007261064425770308, "loss": 0.3503, "step": 9886 }, { "epoch": 5.523463687150838, "grad_norm": 0.5640581846237183, "learning_rate": 0.000726078431372549, "loss": 0.4927, "step": 9887 }, { "epoch": 5.5240223463687155, "grad_norm": 0.9073072075843811, "learning_rate": 0.0007260504201680672, "loss": 0.628, "step": 9888 }, { "epoch": 5.524581005586592, "grad_norm": 0.4360564053058624, "learning_rate": 0.0007260224089635854, "loss": 0.3375, "step": 9889 }, { "epoch": 5.5251396648044695, "grad_norm": 0.5369274616241455, "learning_rate": 0.0007259943977591037, "loss": 0.4587, "step": 9890 }, { "epoch": 5.525698324022346, "grad_norm": 0.49115100502967834, "learning_rate": 0.0007259663865546218, "loss": 0.3569, "step": 9891 }, { "epoch": 5.5262569832402235, "grad_norm": 0.44144749641418457, "learning_rate": 0.00072593837535014, "loss": 0.3566, "step": 9892 }, { "epoch": 5.5268156424581, "grad_norm": 0.41151079535484314, "learning_rate": 0.0007259103641456583, "loss": 0.4256, "step": 9893 }, { "epoch": 5.5273743016759775, "grad_norm": 0.5457106828689575, "learning_rate": 0.0007258823529411765, "loss": 0.4617, "step": 9894 }, { "epoch": 5.527932960893855, "grad_norm": 0.5878649353981018, "learning_rate": 0.0007258543417366948, "loss": 0.5457, "step": 9895 }, { "epoch": 5.528491620111732, "grad_norm": 0.5504418611526489, "learning_rate": 0.0007258263305322129, "loss": 0.4108, "step": 9896 }, { "epoch": 5.529050279329609, "grad_norm": 2.616567373275757, "learning_rate": 0.0007257983193277311, "loss": 0.5422, "step": 9897 }, { "epoch": 5.5296089385474865, "grad_norm": 0.3624570667743683, "learning_rate": 0.0007257703081232493, "loss": 0.3618, "step": 9898 }, { "epoch": 5.530167597765363, "grad_norm": 0.6448395252227783, "learning_rate": 0.0007257422969187675, "loss": 0.4461, "step": 9899 }, { "epoch": 5.5307262569832405, "grad_norm": 0.5649136304855347, "learning_rate": 0.0007257142857142858, "loss": 0.4244, "step": 9900 }, { "epoch": 5.531284916201117, "grad_norm": 0.9016652703285217, "learning_rate": 0.0007256862745098039, "loss": 0.6765, "step": 9901 }, { "epoch": 5.5318435754189945, "grad_norm": 0.8346880674362183, "learning_rate": 0.0007256582633053221, "loss": 0.6174, "step": 9902 }, { "epoch": 5.532402234636871, "grad_norm": 0.6906309127807617, "learning_rate": 0.0007256302521008403, "loss": 0.679, "step": 9903 }, { "epoch": 5.5329608938547485, "grad_norm": 0.8676113486289978, "learning_rate": 0.0007256022408963585, "loss": 0.412, "step": 9904 }, { "epoch": 5.533519553072626, "grad_norm": 0.545275092124939, "learning_rate": 0.0007255742296918768, "loss": 0.5197, "step": 9905 }, { "epoch": 5.534078212290503, "grad_norm": 1.7213584184646606, "learning_rate": 0.000725546218487395, "loss": 0.5242, "step": 9906 }, { "epoch": 5.53463687150838, "grad_norm": 0.709518551826477, "learning_rate": 0.0007255182072829131, "loss": 0.4413, "step": 9907 }, { "epoch": 5.5351955307262575, "grad_norm": 0.5518812537193298, "learning_rate": 0.0007254901960784313, "loss": 0.4004, "step": 9908 }, { "epoch": 5.535754189944134, "grad_norm": 0.5170642137527466, "learning_rate": 0.0007254621848739496, "loss": 0.3437, "step": 9909 }, { "epoch": 5.5363128491620115, "grad_norm": 0.6805608868598938, "learning_rate": 0.0007254341736694679, "loss": 0.5541, "step": 9910 }, { "epoch": 5.536871508379888, "grad_norm": 0.5276179313659668, "learning_rate": 0.0007254061624649861, "loss": 0.435, "step": 9911 }, { "epoch": 5.5374301675977655, "grad_norm": 0.438228964805603, "learning_rate": 0.0007253781512605042, "loss": 0.4204, "step": 9912 }, { "epoch": 5.537988826815642, "grad_norm": 0.5030065178871155, "learning_rate": 0.0007253501400560224, "loss": 0.5013, "step": 9913 }, { "epoch": 5.5385474860335195, "grad_norm": 0.5147072076797485, "learning_rate": 0.0007253221288515406, "loss": 0.4694, "step": 9914 }, { "epoch": 5.539106145251397, "grad_norm": 0.479183554649353, "learning_rate": 0.0007252941176470589, "loss": 0.5558, "step": 9915 }, { "epoch": 5.539664804469274, "grad_norm": 0.592819094657898, "learning_rate": 0.0007252661064425771, "loss": 0.3598, "step": 9916 }, { "epoch": 5.540223463687151, "grad_norm": 0.41784223914146423, "learning_rate": 0.0007252380952380952, "loss": 0.4706, "step": 9917 }, { "epoch": 5.540782122905028, "grad_norm": 0.4165116548538208, "learning_rate": 0.0007252100840336134, "loss": 0.3884, "step": 9918 }, { "epoch": 5.541340782122905, "grad_norm": 0.6245352029800415, "learning_rate": 0.0007251820728291316, "loss": 0.4169, "step": 9919 }, { "epoch": 5.5418994413407825, "grad_norm": 2.388648271560669, "learning_rate": 0.0007251540616246499, "loss": 0.4267, "step": 9920 }, { "epoch": 5.542458100558659, "grad_norm": 1.1768643856048584, "learning_rate": 0.0007251260504201681, "loss": 0.4908, "step": 9921 }, { "epoch": 5.5430167597765365, "grad_norm": 0.4066906273365021, "learning_rate": 0.0007250980392156863, "loss": 0.5364, "step": 9922 }, { "epoch": 5.543575418994413, "grad_norm": 0.6321830153465271, "learning_rate": 0.0007250700280112044, "loss": 0.3889, "step": 9923 }, { "epoch": 5.5441340782122905, "grad_norm": 0.3898478150367737, "learning_rate": 0.0007250420168067226, "loss": 0.3647, "step": 9924 }, { "epoch": 5.544692737430168, "grad_norm": 0.7242388129234314, "learning_rate": 0.000725014005602241, "loss": 0.4181, "step": 9925 }, { "epoch": 5.545251396648045, "grad_norm": 0.4089735150337219, "learning_rate": 0.0007249859943977592, "loss": 0.4361, "step": 9926 }, { "epoch": 5.545810055865922, "grad_norm": 0.5072340965270996, "learning_rate": 0.0007249579831932774, "loss": 0.5734, "step": 9927 }, { "epoch": 5.546368715083799, "grad_norm": 0.3940463066101074, "learning_rate": 0.0007249299719887955, "loss": 0.4593, "step": 9928 }, { "epoch": 5.546927374301676, "grad_norm": 0.48787176609039307, "learning_rate": 0.0007249019607843137, "loss": 0.5869, "step": 9929 }, { "epoch": 5.547486033519553, "grad_norm": 0.4486599564552307, "learning_rate": 0.000724873949579832, "loss": 0.3893, "step": 9930 }, { "epoch": 5.54804469273743, "grad_norm": 0.5582526922225952, "learning_rate": 0.0007248459383753502, "loss": 0.5283, "step": 9931 }, { "epoch": 5.5486033519553075, "grad_norm": 3.792804479598999, "learning_rate": 0.0007248179271708684, "loss": 0.4343, "step": 9932 }, { "epoch": 5.549162011173184, "grad_norm": 0.7790818810462952, "learning_rate": 0.0007247899159663865, "loss": 0.4612, "step": 9933 }, { "epoch": 5.5497206703910615, "grad_norm": 1.0483152866363525, "learning_rate": 0.0007247619047619047, "loss": 0.6877, "step": 9934 }, { "epoch": 5.550279329608939, "grad_norm": 0.4311125576496124, "learning_rate": 0.000724733893557423, "loss": 0.4446, "step": 9935 }, { "epoch": 5.550837988826816, "grad_norm": 0.8082439303398132, "learning_rate": 0.0007247058823529412, "loss": 0.5547, "step": 9936 }, { "epoch": 5.551396648044693, "grad_norm": 0.807525634765625, "learning_rate": 0.0007246778711484594, "loss": 0.4132, "step": 9937 }, { "epoch": 5.55195530726257, "grad_norm": 0.6115683913230896, "learning_rate": 0.0007246498599439776, "loss": 0.4223, "step": 9938 }, { "epoch": 5.552513966480447, "grad_norm": 0.6792250275611877, "learning_rate": 0.0007246218487394957, "loss": 0.4893, "step": 9939 }, { "epoch": 5.553072625698324, "grad_norm": 0.5155826807022095, "learning_rate": 0.000724593837535014, "loss": 0.4944, "step": 9940 }, { "epoch": 5.553631284916201, "grad_norm": 0.7451338171958923, "learning_rate": 0.0007245658263305323, "loss": 0.4332, "step": 9941 }, { "epoch": 5.5541899441340785, "grad_norm": 1.2530287504196167, "learning_rate": 0.0007245378151260505, "loss": 0.4383, "step": 9942 }, { "epoch": 5.554748603351955, "grad_norm": 0.6665396690368652, "learning_rate": 0.0007245098039215687, "loss": 0.4881, "step": 9943 }, { "epoch": 5.5553072625698325, "grad_norm": 0.6986837387084961, "learning_rate": 0.0007244817927170868, "loss": 0.3823, "step": 9944 }, { "epoch": 5.55586592178771, "grad_norm": 0.6157588362693787, "learning_rate": 0.0007244537815126051, "loss": 0.4765, "step": 9945 }, { "epoch": 5.556424581005587, "grad_norm": 0.5928972363471985, "learning_rate": 0.0007244257703081233, "loss": 0.4312, "step": 9946 }, { "epoch": 5.556983240223464, "grad_norm": 0.6123839616775513, "learning_rate": 0.0007243977591036415, "loss": 0.3446, "step": 9947 }, { "epoch": 5.557541899441341, "grad_norm": 0.7116967439651489, "learning_rate": 0.0007243697478991597, "loss": 0.3571, "step": 9948 }, { "epoch": 5.558100558659218, "grad_norm": 0.4645528197288513, "learning_rate": 0.0007243417366946778, "loss": 0.4266, "step": 9949 }, { "epoch": 5.558659217877095, "grad_norm": 0.44229093194007874, "learning_rate": 0.0007243137254901961, "loss": 0.3995, "step": 9950 }, { "epoch": 5.559217877094972, "grad_norm": 0.5539903044700623, "learning_rate": 0.0007242857142857143, "loss": 0.3754, "step": 9951 }, { "epoch": 5.5597765363128495, "grad_norm": 0.5988803505897522, "learning_rate": 0.0007242577030812325, "loss": 0.414, "step": 9952 }, { "epoch": 5.560335195530726, "grad_norm": 2.0920209884643555, "learning_rate": 0.0007242296918767507, "loss": 0.4041, "step": 9953 }, { "epoch": 5.5608938547486035, "grad_norm": 0.7040426731109619, "learning_rate": 0.0007242016806722689, "loss": 0.3424, "step": 9954 }, { "epoch": 5.56145251396648, "grad_norm": 0.6377585530281067, "learning_rate": 0.0007241736694677871, "loss": 0.3787, "step": 9955 }, { "epoch": 5.562011173184358, "grad_norm": 0.6620262861251831, "learning_rate": 0.0007241456582633053, "loss": 0.5126, "step": 9956 }, { "epoch": 5.562569832402235, "grad_norm": 0.627249002456665, "learning_rate": 0.0007241176470588235, "loss": 0.5437, "step": 9957 }, { "epoch": 5.563128491620112, "grad_norm": 0.6866033673286438, "learning_rate": 0.0007240896358543418, "loss": 0.5025, "step": 9958 }, { "epoch": 5.563687150837989, "grad_norm": 0.5920504927635193, "learning_rate": 0.00072406162464986, "loss": 0.4295, "step": 9959 }, { "epoch": 5.564245810055866, "grad_norm": 0.4929662048816681, "learning_rate": 0.0007240336134453782, "loss": 0.5608, "step": 9960 }, { "epoch": 5.564804469273743, "grad_norm": 0.4381335377693176, "learning_rate": 0.0007240056022408964, "loss": 0.4269, "step": 9961 }, { "epoch": 5.5653631284916205, "grad_norm": 0.6184817552566528, "learning_rate": 0.0007239775910364146, "loss": 0.5056, "step": 9962 }, { "epoch": 5.565921787709497, "grad_norm": 1.3165264129638672, "learning_rate": 0.0007239495798319328, "loss": 0.5942, "step": 9963 }, { "epoch": 5.5664804469273745, "grad_norm": 0.6004919409751892, "learning_rate": 0.000723921568627451, "loss": 0.599, "step": 9964 }, { "epoch": 5.567039106145251, "grad_norm": 2.6897833347320557, "learning_rate": 0.0007238935574229693, "loss": 0.3412, "step": 9965 }, { "epoch": 5.567597765363129, "grad_norm": 5.2298665046691895, "learning_rate": 0.0007238655462184874, "loss": 0.4602, "step": 9966 }, { "epoch": 5.568156424581005, "grad_norm": 0.398495078086853, "learning_rate": 0.0007238375350140056, "loss": 0.3792, "step": 9967 }, { "epoch": 5.568715083798883, "grad_norm": 0.4069007933139801, "learning_rate": 0.0007238095238095238, "loss": 0.358, "step": 9968 }, { "epoch": 5.56927374301676, "grad_norm": 0.39702823758125305, "learning_rate": 0.000723781512605042, "loss": 0.3858, "step": 9969 }, { "epoch": 5.569832402234637, "grad_norm": 0.6081941723823547, "learning_rate": 0.0007237535014005603, "loss": 0.4399, "step": 9970 }, { "epoch": 5.570391061452514, "grad_norm": 0.9728443622589111, "learning_rate": 0.0007237254901960784, "loss": 0.3563, "step": 9971 }, { "epoch": 5.5709497206703915, "grad_norm": 0.8170782923698425, "learning_rate": 0.0007236974789915966, "loss": 0.4035, "step": 9972 }, { "epoch": 5.571508379888268, "grad_norm": 0.48218995332717896, "learning_rate": 0.0007236694677871148, "loss": 0.3905, "step": 9973 }, { "epoch": 5.5720670391061455, "grad_norm": 0.5158736705780029, "learning_rate": 0.000723641456582633, "loss": 0.4134, "step": 9974 }, { "epoch": 5.572625698324022, "grad_norm": 0.5635305643081665, "learning_rate": 0.0007236134453781514, "loss": 0.4232, "step": 9975 }, { "epoch": 5.5731843575419, "grad_norm": 3.2909793853759766, "learning_rate": 0.0007235854341736695, "loss": 0.3994, "step": 9976 }, { "epoch": 5.573743016759776, "grad_norm": 0.9748474955558777, "learning_rate": 0.0007235574229691877, "loss": 0.4116, "step": 9977 }, { "epoch": 5.574301675977654, "grad_norm": 0.603339672088623, "learning_rate": 0.0007235294117647059, "loss": 0.4266, "step": 9978 }, { "epoch": 5.574860335195531, "grad_norm": 0.5575515627861023, "learning_rate": 0.0007235014005602241, "loss": 0.5807, "step": 9979 }, { "epoch": 5.575418994413408, "grad_norm": 0.5503337383270264, "learning_rate": 0.0007234733893557424, "loss": 0.4602, "step": 9980 }, { "epoch": 5.575977653631285, "grad_norm": 0.6396551132202148, "learning_rate": 0.0007234453781512606, "loss": 0.5343, "step": 9981 }, { "epoch": 5.576536312849162, "grad_norm": 0.5318945050239563, "learning_rate": 0.0007234173669467787, "loss": 0.4157, "step": 9982 }, { "epoch": 5.577094972067039, "grad_norm": 0.4633936583995819, "learning_rate": 0.0007233893557422969, "loss": 0.5588, "step": 9983 }, { "epoch": 5.5776536312849165, "grad_norm": 0.7308537364006042, "learning_rate": 0.0007233613445378151, "loss": 0.4908, "step": 9984 }, { "epoch": 5.578212290502793, "grad_norm": 0.5050171613693237, "learning_rate": 0.0007233333333333334, "loss": 0.4761, "step": 9985 }, { "epoch": 5.578770949720671, "grad_norm": 0.8002045154571533, "learning_rate": 0.0007233053221288516, "loss": 0.7016, "step": 9986 }, { "epoch": 5.579329608938547, "grad_norm": 0.5756903886795044, "learning_rate": 0.0007232773109243697, "loss": 0.4641, "step": 9987 }, { "epoch": 5.579888268156425, "grad_norm": 0.6262851357460022, "learning_rate": 0.0007232492997198879, "loss": 0.4732, "step": 9988 }, { "epoch": 5.580446927374302, "grad_norm": 4.5401811599731445, "learning_rate": 0.0007232212885154061, "loss": 0.4239, "step": 9989 }, { "epoch": 5.581005586592179, "grad_norm": 2.0694973468780518, "learning_rate": 0.0007231932773109245, "loss": 0.4129, "step": 9990 }, { "epoch": 5.581564245810056, "grad_norm": 1.45489501953125, "learning_rate": 0.0007231652661064427, "loss": 0.533, "step": 9991 }, { "epoch": 5.582122905027933, "grad_norm": 0.6292015314102173, "learning_rate": 0.0007231372549019608, "loss": 0.5515, "step": 9992 }, { "epoch": 5.58268156424581, "grad_norm": 2.903557777404785, "learning_rate": 0.000723109243697479, "loss": 0.4424, "step": 9993 }, { "epoch": 5.5832402234636875, "grad_norm": 0.6302845478057861, "learning_rate": 0.0007230812324929972, "loss": 0.4439, "step": 9994 }, { "epoch": 5.583798882681564, "grad_norm": 0.7181702256202698, "learning_rate": 0.0007230532212885155, "loss": 0.5026, "step": 9995 }, { "epoch": 5.584357541899442, "grad_norm": 0.4197083115577698, "learning_rate": 0.0007230252100840337, "loss": 0.4236, "step": 9996 }, { "epoch": 5.584916201117318, "grad_norm": 0.43952012062072754, "learning_rate": 0.0007229971988795519, "loss": 0.422, "step": 9997 }, { "epoch": 5.585474860335196, "grad_norm": 0.44125857949256897, "learning_rate": 0.00072296918767507, "loss": 0.52, "step": 9998 }, { "epoch": 5.586033519553073, "grad_norm": 1.0360386371612549, "learning_rate": 0.0007229411764705882, "loss": 0.4764, "step": 9999 }, { "epoch": 5.58659217877095, "grad_norm": 0.4642835855484009, "learning_rate": 0.0007229131652661065, "loss": 0.5008, "step": 10000 }, { "epoch": 5.58659217877095, "eval_cer": 0.09317752828603537, "eval_loss": 0.348407119512558, "eval_runtime": 55.8205, "eval_samples_per_second": 81.296, "eval_steps_per_second": 5.088, "eval_wer": 0.3695427502025083, "step": 10000 }, { "epoch": 5.587150837988827, "grad_norm": 0.46861758828163147, "learning_rate": 0.0007228851540616247, "loss": 0.3906, "step": 10001 }, { "epoch": 5.587709497206704, "grad_norm": 0.40227460861206055, "learning_rate": 0.0007228571428571429, "loss": 0.43, "step": 10002 }, { "epoch": 5.588268156424581, "grad_norm": 0.6384165287017822, "learning_rate": 0.000722829131652661, "loss": 0.4269, "step": 10003 }, { "epoch": 5.588826815642458, "grad_norm": 0.7281396389007568, "learning_rate": 0.0007228011204481792, "loss": 0.4503, "step": 10004 }, { "epoch": 5.589385474860335, "grad_norm": 0.421785444021225, "learning_rate": 0.0007227731092436975, "loss": 0.3037, "step": 10005 }, { "epoch": 5.589944134078213, "grad_norm": 1.030700445175171, "learning_rate": 0.0007227450980392157, "loss": 0.4839, "step": 10006 }, { "epoch": 5.590502793296089, "grad_norm": 0.5000261664390564, "learning_rate": 0.000722717086834734, "loss": 0.4447, "step": 10007 }, { "epoch": 5.591061452513967, "grad_norm": 0.8885688781738281, "learning_rate": 0.000722689075630252, "loss": 0.5159, "step": 10008 }, { "epoch": 5.591620111731844, "grad_norm": 0.5367618203163147, "learning_rate": 0.0007226610644257703, "loss": 0.5675, "step": 10009 }, { "epoch": 5.592178770949721, "grad_norm": 0.5591814517974854, "learning_rate": 0.0007226330532212886, "loss": 0.447, "step": 10010 }, { "epoch": 5.592737430167598, "grad_norm": 0.4015854299068451, "learning_rate": 0.0007226050420168068, "loss": 0.4855, "step": 10011 }, { "epoch": 5.593296089385475, "grad_norm": 0.4199011027812958, "learning_rate": 0.000722577030812325, "loss": 0.4855, "step": 10012 }, { "epoch": 5.593854748603352, "grad_norm": 0.5143280625343323, "learning_rate": 0.0007225490196078432, "loss": 0.4491, "step": 10013 }, { "epoch": 5.594413407821229, "grad_norm": 0.3952634334564209, "learning_rate": 0.0007225210084033613, "loss": 0.3515, "step": 10014 }, { "epoch": 5.594972067039106, "grad_norm": 0.4140816330909729, "learning_rate": 0.0007224929971988796, "loss": 0.4413, "step": 10015 }, { "epoch": 5.5955307262569836, "grad_norm": 0.6067275404930115, "learning_rate": 0.0007224649859943978, "loss": 0.703, "step": 10016 }, { "epoch": 5.59608938547486, "grad_norm": 0.8579608201980591, "learning_rate": 0.000722436974789916, "loss": 0.4522, "step": 10017 }, { "epoch": 5.596648044692738, "grad_norm": 0.46375545859336853, "learning_rate": 0.0007224089635854342, "loss": 0.3829, "step": 10018 }, { "epoch": 5.597206703910614, "grad_norm": 0.43228957056999207, "learning_rate": 0.0007223809523809523, "loss": 0.4277, "step": 10019 }, { "epoch": 5.597765363128492, "grad_norm": 2.5335819721221924, "learning_rate": 0.0007223529411764706, "loss": 0.3521, "step": 10020 }, { "epoch": 5.598324022346369, "grad_norm": 0.6500746607780457, "learning_rate": 0.0007223249299719888, "loss": 0.5025, "step": 10021 }, { "epoch": 5.598882681564246, "grad_norm": 0.6348538398742676, "learning_rate": 0.000722296918767507, "loss": 0.3942, "step": 10022 }, { "epoch": 5.599441340782123, "grad_norm": 0.5580580830574036, "learning_rate": 0.0007222689075630253, "loss": 0.5733, "step": 10023 }, { "epoch": 5.6, "grad_norm": 0.817731499671936, "learning_rate": 0.0007222408963585433, "loss": 0.5185, "step": 10024 }, { "epoch": 5.600558659217877, "grad_norm": 0.4958416521549225, "learning_rate": 0.0007222128851540617, "loss": 0.4505, "step": 10025 }, { "epoch": 5.6011173184357546, "grad_norm": 0.42211151123046875, "learning_rate": 0.0007221848739495799, "loss": 0.3859, "step": 10026 }, { "epoch": 5.601675977653631, "grad_norm": 0.4639894366264343, "learning_rate": 0.0007221568627450981, "loss": 0.4849, "step": 10027 }, { "epoch": 5.602234636871509, "grad_norm": 0.601848840713501, "learning_rate": 0.0007221288515406163, "loss": 0.4942, "step": 10028 }, { "epoch": 5.602793296089385, "grad_norm": 1.1188138723373413, "learning_rate": 0.0007221008403361345, "loss": 0.4027, "step": 10029 }, { "epoch": 5.603351955307263, "grad_norm": 0.6030825972557068, "learning_rate": 0.0007220728291316527, "loss": 0.4657, "step": 10030 }, { "epoch": 5.603910614525139, "grad_norm": 0.6522092819213867, "learning_rate": 0.0007220448179271709, "loss": 0.4364, "step": 10031 }, { "epoch": 5.604469273743017, "grad_norm": 0.5696516633033752, "learning_rate": 0.0007220168067226891, "loss": 0.4526, "step": 10032 }, { "epoch": 5.605027932960894, "grad_norm": 0.5284076929092407, "learning_rate": 0.0007219887955182073, "loss": 0.5557, "step": 10033 }, { "epoch": 5.605586592178771, "grad_norm": 0.5809754133224487, "learning_rate": 0.0007219607843137255, "loss": 0.4597, "step": 10034 }, { "epoch": 5.606145251396648, "grad_norm": 0.442489355802536, "learning_rate": 0.0007219327731092437, "loss": 0.4967, "step": 10035 }, { "epoch": 5.6067039106145256, "grad_norm": 0.5501169562339783, "learning_rate": 0.0007219047619047619, "loss": 0.3924, "step": 10036 }, { "epoch": 5.607262569832402, "grad_norm": 0.3731783628463745, "learning_rate": 0.0007218767507002801, "loss": 0.3764, "step": 10037 }, { "epoch": 5.60782122905028, "grad_norm": 0.4078274071216583, "learning_rate": 0.0007218487394957983, "loss": 0.5023, "step": 10038 }, { "epoch": 5.608379888268156, "grad_norm": 0.44391918182373047, "learning_rate": 0.0007218207282913165, "loss": 0.435, "step": 10039 }, { "epoch": 5.608938547486034, "grad_norm": 0.3873365819454193, "learning_rate": 0.0007217927170868346, "loss": 0.4155, "step": 10040 }, { "epoch": 5.60949720670391, "grad_norm": 0.5123922228813171, "learning_rate": 0.000721764705882353, "loss": 0.5375, "step": 10041 }, { "epoch": 5.610055865921788, "grad_norm": 0.4906674921512604, "learning_rate": 0.0007217366946778712, "loss": 0.4599, "step": 10042 }, { "epoch": 5.610614525139665, "grad_norm": 0.48765140771865845, "learning_rate": 0.0007217086834733894, "loss": 0.4794, "step": 10043 }, { "epoch": 5.611173184357542, "grad_norm": 0.6538378000259399, "learning_rate": 0.0007216806722689076, "loss": 0.4788, "step": 10044 }, { "epoch": 5.611731843575419, "grad_norm": 0.5697200894355774, "learning_rate": 0.0007216526610644258, "loss": 0.4451, "step": 10045 }, { "epoch": 5.6122905027932966, "grad_norm": 1.6420912742614746, "learning_rate": 0.000721624649859944, "loss": 0.4216, "step": 10046 }, { "epoch": 5.612849162011173, "grad_norm": 0.502399742603302, "learning_rate": 0.0007215966386554622, "loss": 0.3592, "step": 10047 }, { "epoch": 5.613407821229051, "grad_norm": 4.2799530029296875, "learning_rate": 0.0007215686274509804, "loss": 0.5068, "step": 10048 }, { "epoch": 5.613966480446927, "grad_norm": 0.9166780710220337, "learning_rate": 0.0007215406162464986, "loss": 0.5354, "step": 10049 }, { "epoch": 5.614525139664805, "grad_norm": 0.6505798697471619, "learning_rate": 0.0007215126050420168, "loss": 0.446, "step": 10050 }, { "epoch": 5.615083798882681, "grad_norm": 0.3875667154788971, "learning_rate": 0.000721484593837535, "loss": 0.3177, "step": 10051 }, { "epoch": 5.615642458100559, "grad_norm": 0.5634588599205017, "learning_rate": 0.0007214565826330532, "loss": 0.4833, "step": 10052 }, { "epoch": 5.616201117318436, "grad_norm": 0.43827882409095764, "learning_rate": 0.0007214285714285714, "loss": 0.4703, "step": 10053 }, { "epoch": 5.616759776536313, "grad_norm": 0.6062840223312378, "learning_rate": 0.0007214005602240896, "loss": 0.4048, "step": 10054 }, { "epoch": 5.61731843575419, "grad_norm": 4.657212734222412, "learning_rate": 0.0007213725490196078, "loss": 0.4246, "step": 10055 }, { "epoch": 5.617877094972067, "grad_norm": 0.48280224204063416, "learning_rate": 0.000721344537815126, "loss": 0.4399, "step": 10056 }, { "epoch": 5.618435754189944, "grad_norm": 0.6430901288986206, "learning_rate": 0.0007213165266106443, "loss": 0.4206, "step": 10057 }, { "epoch": 5.618994413407822, "grad_norm": 0.8701556324958801, "learning_rate": 0.0007212885154061625, "loss": 0.4454, "step": 10058 }, { "epoch": 5.619553072625698, "grad_norm": 0.8723263740539551, "learning_rate": 0.0007212605042016807, "loss": 0.5346, "step": 10059 }, { "epoch": 5.620111731843576, "grad_norm": 0.5074014663696289, "learning_rate": 0.0007212324929971989, "loss": 0.4705, "step": 10060 }, { "epoch": 5.620670391061452, "grad_norm": 0.4879077970981598, "learning_rate": 0.0007212044817927172, "loss": 0.5322, "step": 10061 }, { "epoch": 5.62122905027933, "grad_norm": 0.8208219408988953, "learning_rate": 0.0007211764705882353, "loss": 0.454, "step": 10062 }, { "epoch": 5.621787709497207, "grad_norm": 0.4299928843975067, "learning_rate": 0.0007211484593837535, "loss": 0.4776, "step": 10063 }, { "epoch": 5.622346368715084, "grad_norm": 0.7997061014175415, "learning_rate": 0.0007211204481792717, "loss": 0.5368, "step": 10064 }, { "epoch": 5.622905027932961, "grad_norm": 0.6432591676712036, "learning_rate": 0.0007210924369747899, "loss": 0.4587, "step": 10065 }, { "epoch": 5.623463687150838, "grad_norm": 0.49981507658958435, "learning_rate": 0.0007210644257703082, "loss": 0.3957, "step": 10066 }, { "epoch": 5.624022346368715, "grad_norm": 0.48898595571517944, "learning_rate": 0.0007210364145658263, "loss": 0.5023, "step": 10067 }, { "epoch": 5.624581005586592, "grad_norm": 0.4667215347290039, "learning_rate": 0.0007210084033613445, "loss": 0.6151, "step": 10068 }, { "epoch": 5.625139664804469, "grad_norm": 0.5658891797065735, "learning_rate": 0.0007209803921568627, "loss": 0.5951, "step": 10069 }, { "epoch": 5.625698324022347, "grad_norm": 1.6414737701416016, "learning_rate": 0.0007209523809523809, "loss": 0.4226, "step": 10070 }, { "epoch": 5.626256983240223, "grad_norm": 1.6741398572921753, "learning_rate": 0.0007209243697478992, "loss": 0.7108, "step": 10071 }, { "epoch": 5.626815642458101, "grad_norm": 0.40429213643074036, "learning_rate": 0.0007208963585434173, "loss": 0.4004, "step": 10072 }, { "epoch": 5.627374301675978, "grad_norm": 0.5193113088607788, "learning_rate": 0.0007208683473389356, "loss": 0.4949, "step": 10073 }, { "epoch": 5.627932960893855, "grad_norm": 0.634591281414032, "learning_rate": 0.0007208403361344538, "loss": 0.4283, "step": 10074 }, { "epoch": 5.628491620111732, "grad_norm": 0.5754101872444153, "learning_rate": 0.000720812324929972, "loss": 0.507, "step": 10075 }, { "epoch": 5.629050279329609, "grad_norm": 0.635532796382904, "learning_rate": 0.0007207843137254903, "loss": 0.5256, "step": 10076 }, { "epoch": 5.629608938547486, "grad_norm": 0.6660797595977783, "learning_rate": 0.0007207563025210085, "loss": 0.6456, "step": 10077 }, { "epoch": 5.630167597765363, "grad_norm": 0.5921980738639832, "learning_rate": 0.0007207282913165266, "loss": 0.4674, "step": 10078 }, { "epoch": 5.63072625698324, "grad_norm": 0.4608224332332611, "learning_rate": 0.0007207002801120448, "loss": 0.5155, "step": 10079 }, { "epoch": 5.631284916201118, "grad_norm": 0.8263514637947083, "learning_rate": 0.000720672268907563, "loss": 0.5754, "step": 10080 }, { "epoch": 5.631843575418994, "grad_norm": 0.7397025227546692, "learning_rate": 0.0007206442577030813, "loss": 0.4833, "step": 10081 }, { "epoch": 5.632402234636872, "grad_norm": 0.5124477744102478, "learning_rate": 0.0007206162464985995, "loss": 0.4982, "step": 10082 }, { "epoch": 5.632960893854749, "grad_norm": 0.4849470853805542, "learning_rate": 0.0007205882352941176, "loss": 0.5175, "step": 10083 }, { "epoch": 5.633519553072626, "grad_norm": 0.3957095146179199, "learning_rate": 0.0007205602240896358, "loss": 0.3494, "step": 10084 }, { "epoch": 5.634078212290503, "grad_norm": 1.271714448928833, "learning_rate": 0.000720532212885154, "loss": 0.4368, "step": 10085 }, { "epoch": 5.63463687150838, "grad_norm": 0.5071250796318054, "learning_rate": 0.0007205042016806723, "loss": 0.428, "step": 10086 }, { "epoch": 5.635195530726257, "grad_norm": 0.5866625905036926, "learning_rate": 0.0007204761904761905, "loss": 0.4318, "step": 10087 }, { "epoch": 5.635754189944134, "grad_norm": 0.8688901662826538, "learning_rate": 0.0007204481792717086, "loss": 0.5897, "step": 10088 }, { "epoch": 5.636312849162011, "grad_norm": 0.5585296154022217, "learning_rate": 0.0007204201680672268, "loss": 0.4684, "step": 10089 }, { "epoch": 5.636871508379889, "grad_norm": 0.4540899693965912, "learning_rate": 0.000720392156862745, "loss": 0.4765, "step": 10090 }, { "epoch": 5.637430167597765, "grad_norm": 3.2529242038726807, "learning_rate": 0.0007203641456582634, "loss": 0.426, "step": 10091 }, { "epoch": 5.637988826815643, "grad_norm": 1.139200210571289, "learning_rate": 0.0007203361344537816, "loss": 0.6208, "step": 10092 }, { "epoch": 5.638547486033519, "grad_norm": 0.6055642366409302, "learning_rate": 0.0007203081232492998, "loss": 0.4254, "step": 10093 }, { "epoch": 5.639106145251397, "grad_norm": 1.2333184480667114, "learning_rate": 0.0007202801120448179, "loss": 0.4284, "step": 10094 }, { "epoch": 5.639664804469274, "grad_norm": 1.5095566511154175, "learning_rate": 0.0007202521008403361, "loss": 0.4449, "step": 10095 }, { "epoch": 5.640223463687151, "grad_norm": 4.3493475914001465, "learning_rate": 0.0007202240896358544, "loss": 0.3722, "step": 10096 }, { "epoch": 5.640782122905028, "grad_norm": 0.5825946927070618, "learning_rate": 0.0007201960784313726, "loss": 0.3941, "step": 10097 }, { "epoch": 5.641340782122905, "grad_norm": 0.5449908375740051, "learning_rate": 0.0007201680672268908, "loss": 0.5186, "step": 10098 }, { "epoch": 5.641899441340782, "grad_norm": 0.5583450794219971, "learning_rate": 0.0007201400560224089, "loss": 0.5047, "step": 10099 }, { "epoch": 5.64245810055866, "grad_norm": 0.7477580308914185, "learning_rate": 0.0007201120448179271, "loss": 0.3964, "step": 10100 }, { "epoch": 5.643016759776536, "grad_norm": 0.7733327150344849, "learning_rate": 0.0007200840336134454, "loss": 0.4127, "step": 10101 }, { "epoch": 5.643575418994414, "grad_norm": 0.5495614409446716, "learning_rate": 0.0007200560224089636, "loss": 0.4351, "step": 10102 }, { "epoch": 5.64413407821229, "grad_norm": 0.5987143516540527, "learning_rate": 0.0007200280112044818, "loss": 0.404, "step": 10103 }, { "epoch": 5.644692737430168, "grad_norm": 0.5004493594169617, "learning_rate": 0.0007199999999999999, "loss": 0.407, "step": 10104 }, { "epoch": 5.645251396648044, "grad_norm": 0.6078566908836365, "learning_rate": 0.0007199719887955181, "loss": 0.4627, "step": 10105 }, { "epoch": 5.645810055865922, "grad_norm": 0.7295481562614441, "learning_rate": 0.0007199439775910365, "loss": 0.423, "step": 10106 }, { "epoch": 5.646368715083799, "grad_norm": 0.5811827778816223, "learning_rate": 0.0007199159663865547, "loss": 0.4175, "step": 10107 }, { "epoch": 5.646927374301676, "grad_norm": 0.7878211736679077, "learning_rate": 0.0007198879551820729, "loss": 0.4309, "step": 10108 }, { "epoch": 5.647486033519553, "grad_norm": 0.7037503719329834, "learning_rate": 0.0007198599439775911, "loss": 0.4791, "step": 10109 }, { "epoch": 5.648044692737431, "grad_norm": 0.2669017016887665, "learning_rate": 0.0007198319327731092, "loss": 0.3246, "step": 10110 }, { "epoch": 5.648603351955307, "grad_norm": 0.5799449682235718, "learning_rate": 0.0007198039215686275, "loss": 0.4184, "step": 10111 }, { "epoch": 5.649162011173185, "grad_norm": 1.132827639579773, "learning_rate": 0.0007197759103641457, "loss": 0.42, "step": 10112 }, { "epoch": 5.649720670391061, "grad_norm": 0.6218265295028687, "learning_rate": 0.0007197478991596639, "loss": 0.4334, "step": 10113 }, { "epoch": 5.650279329608939, "grad_norm": 0.5614800453186035, "learning_rate": 0.0007197198879551821, "loss": 0.3936, "step": 10114 }, { "epoch": 5.650837988826815, "grad_norm": 2.8094356060028076, "learning_rate": 0.0007196918767507002, "loss": 0.5851, "step": 10115 }, { "epoch": 5.651396648044693, "grad_norm": 1.7567263841629028, "learning_rate": 0.0007196638655462185, "loss": 0.4774, "step": 10116 }, { "epoch": 5.65195530726257, "grad_norm": 0.6033588647842407, "learning_rate": 0.0007196358543417367, "loss": 0.5989, "step": 10117 }, { "epoch": 5.652513966480447, "grad_norm": 1.7935905456542969, "learning_rate": 0.0007196078431372549, "loss": 0.4067, "step": 10118 }, { "epoch": 5.653072625698324, "grad_norm": 0.4951060712337494, "learning_rate": 0.0007195798319327731, "loss": 0.4529, "step": 10119 }, { "epoch": 5.653631284916202, "grad_norm": 0.5029152631759644, "learning_rate": 0.0007195518207282912, "loss": 0.5307, "step": 10120 }, { "epoch": 5.654189944134078, "grad_norm": 0.458808034658432, "learning_rate": 0.0007195238095238095, "loss": 0.4112, "step": 10121 }, { "epoch": 5.654748603351956, "grad_norm": 0.7599425315856934, "learning_rate": 0.0007194957983193278, "loss": 0.4658, "step": 10122 }, { "epoch": 5.655307262569832, "grad_norm": 0.5707579851150513, "learning_rate": 0.000719467787114846, "loss": 0.3847, "step": 10123 }, { "epoch": 5.65586592178771, "grad_norm": 0.4810388684272766, "learning_rate": 0.0007194397759103642, "loss": 0.4089, "step": 10124 }, { "epoch": 5.656424581005586, "grad_norm": 2.0035946369171143, "learning_rate": 0.0007194117647058824, "loss": 0.4846, "step": 10125 }, { "epoch": 5.656983240223464, "grad_norm": 0.46774283051490784, "learning_rate": 0.0007193837535014006, "loss": 0.4436, "step": 10126 }, { "epoch": 5.657541899441341, "grad_norm": 0.7552431225776672, "learning_rate": 0.0007193557422969188, "loss": 0.6333, "step": 10127 }, { "epoch": 5.658100558659218, "grad_norm": 0.6337360143661499, "learning_rate": 0.000719327731092437, "loss": 0.4392, "step": 10128 }, { "epoch": 5.658659217877095, "grad_norm": 0.37299537658691406, "learning_rate": 0.0007192997198879552, "loss": 0.3343, "step": 10129 }, { "epoch": 5.659217877094972, "grad_norm": 1.113067626953125, "learning_rate": 0.0007192717086834734, "loss": 0.4078, "step": 10130 }, { "epoch": 5.659776536312849, "grad_norm": 0.9046213030815125, "learning_rate": 0.0007192436974789916, "loss": 0.4936, "step": 10131 }, { "epoch": 5.660335195530727, "grad_norm": 0.7275684475898743, "learning_rate": 0.0007192156862745098, "loss": 0.4217, "step": 10132 }, { "epoch": 5.660893854748603, "grad_norm": 0.43270766735076904, "learning_rate": 0.000719187675070028, "loss": 0.4088, "step": 10133 }, { "epoch": 5.661452513966481, "grad_norm": 0.6650369763374329, "learning_rate": 0.0007191596638655462, "loss": 0.4309, "step": 10134 }, { "epoch": 5.662011173184357, "grad_norm": 0.6442579627037048, "learning_rate": 0.0007191316526610644, "loss": 0.6323, "step": 10135 }, { "epoch": 5.662569832402235, "grad_norm": 0.5259701609611511, "learning_rate": 0.0007191036414565826, "loss": 0.3865, "step": 10136 }, { "epoch": 5.663128491620112, "grad_norm": 0.4101817309856415, "learning_rate": 0.0007190756302521008, "loss": 0.343, "step": 10137 }, { "epoch": 5.663687150837989, "grad_norm": 0.7517940998077393, "learning_rate": 0.000719047619047619, "loss": 0.567, "step": 10138 }, { "epoch": 5.664245810055866, "grad_norm": 0.4668457508087158, "learning_rate": 0.0007190196078431373, "loss": 0.442, "step": 10139 }, { "epoch": 5.664804469273743, "grad_norm": 0.8796895146369934, "learning_rate": 0.0007189915966386555, "loss": 0.5088, "step": 10140 }, { "epoch": 5.66536312849162, "grad_norm": 0.9703786969184875, "learning_rate": 0.0007189635854341738, "loss": 0.5957, "step": 10141 }, { "epoch": 5.665921787709497, "grad_norm": 0.5584157109260559, "learning_rate": 0.0007189355742296919, "loss": 0.6407, "step": 10142 }, { "epoch": 5.666480446927374, "grad_norm": 0.5805408358573914, "learning_rate": 0.0007189075630252101, "loss": 0.4389, "step": 10143 }, { "epoch": 5.667039106145252, "grad_norm": 0.4985531270503998, "learning_rate": 0.0007188795518207283, "loss": 0.4821, "step": 10144 }, { "epoch": 5.667597765363128, "grad_norm": 0.4365057349205017, "learning_rate": 0.0007188515406162465, "loss": 0.4635, "step": 10145 }, { "epoch": 5.668156424581006, "grad_norm": 0.517618715763092, "learning_rate": 0.0007188235294117648, "loss": 0.407, "step": 10146 }, { "epoch": 5.668715083798883, "grad_norm": 0.9290892481803894, "learning_rate": 0.0007187955182072829, "loss": 0.7564, "step": 10147 }, { "epoch": 5.66927374301676, "grad_norm": 0.5010323524475098, "learning_rate": 0.0007187675070028011, "loss": 0.4413, "step": 10148 }, { "epoch": 5.669832402234637, "grad_norm": 0.4111177623271942, "learning_rate": 0.0007187394957983193, "loss": 0.3555, "step": 10149 }, { "epoch": 5.670391061452514, "grad_norm": 0.5751590728759766, "learning_rate": 0.0007187114845938375, "loss": 0.4882, "step": 10150 }, { "epoch": 5.670949720670391, "grad_norm": 0.6412973999977112, "learning_rate": 0.0007186834733893558, "loss": 0.4287, "step": 10151 }, { "epoch": 5.671508379888268, "grad_norm": 0.7515106201171875, "learning_rate": 0.0007186554621848739, "loss": 0.4534, "step": 10152 }, { "epoch": 5.672067039106145, "grad_norm": 0.514158308506012, "learning_rate": 0.0007186274509803921, "loss": 0.4138, "step": 10153 }, { "epoch": 5.672625698324023, "grad_norm": 0.4929622411727905, "learning_rate": 0.0007185994397759103, "loss": 0.406, "step": 10154 }, { "epoch": 5.673184357541899, "grad_norm": 0.7281866669654846, "learning_rate": 0.0007185714285714286, "loss": 0.596, "step": 10155 }, { "epoch": 5.673743016759777, "grad_norm": 0.4281335771083832, "learning_rate": 0.0007185434173669469, "loss": 0.4713, "step": 10156 }, { "epoch": 5.674301675977654, "grad_norm": 0.6203215718269348, "learning_rate": 0.0007185154061624651, "loss": 0.4433, "step": 10157 }, { "epoch": 5.674860335195531, "grad_norm": 0.5056416392326355, "learning_rate": 0.0007184873949579832, "loss": 0.4264, "step": 10158 }, { "epoch": 5.675418994413408, "grad_norm": 1.670349359512329, "learning_rate": 0.0007184593837535014, "loss": 0.4141, "step": 10159 }, { "epoch": 5.675977653631285, "grad_norm": 0.6079891324043274, "learning_rate": 0.0007184313725490196, "loss": 0.4828, "step": 10160 }, { "epoch": 5.676536312849162, "grad_norm": 1.1369798183441162, "learning_rate": 0.0007184033613445379, "loss": 0.4626, "step": 10161 }, { "epoch": 5.677094972067039, "grad_norm": 0.42787474393844604, "learning_rate": 0.0007183753501400561, "loss": 0.6025, "step": 10162 }, { "epoch": 5.677653631284916, "grad_norm": 0.39584946632385254, "learning_rate": 0.0007183473389355742, "loss": 0.4049, "step": 10163 }, { "epoch": 5.678212290502794, "grad_norm": 0.7364213466644287, "learning_rate": 0.0007183193277310924, "loss": 0.5055, "step": 10164 }, { "epoch": 5.67877094972067, "grad_norm": 0.4109117090702057, "learning_rate": 0.0007182913165266106, "loss": 0.3837, "step": 10165 }, { "epoch": 5.679329608938548, "grad_norm": 0.42509889602661133, "learning_rate": 0.0007182633053221289, "loss": 0.4439, "step": 10166 }, { "epoch": 5.679888268156424, "grad_norm": 0.6144108772277832, "learning_rate": 0.0007182352941176471, "loss": 0.429, "step": 10167 }, { "epoch": 5.680446927374302, "grad_norm": 0.5343363881111145, "learning_rate": 0.0007182072829131652, "loss": 0.3778, "step": 10168 }, { "epoch": 5.681005586592179, "grad_norm": 0.4703051745891571, "learning_rate": 0.0007181792717086834, "loss": 0.4238, "step": 10169 }, { "epoch": 5.681564245810056, "grad_norm": 0.5371783971786499, "learning_rate": 0.0007181512605042016, "loss": 0.4451, "step": 10170 }, { "epoch": 5.682122905027933, "grad_norm": 0.5565941333770752, "learning_rate": 0.00071812324929972, "loss": 0.4208, "step": 10171 }, { "epoch": 5.68268156424581, "grad_norm": 2.1324219703674316, "learning_rate": 0.0007180952380952382, "loss": 0.4747, "step": 10172 }, { "epoch": 5.683240223463687, "grad_norm": 0.3692035675048828, "learning_rate": 0.0007180672268907564, "loss": 0.456, "step": 10173 }, { "epoch": 5.683798882681565, "grad_norm": 0.5666664838790894, "learning_rate": 0.0007180392156862745, "loss": 0.5193, "step": 10174 }, { "epoch": 5.684357541899441, "grad_norm": 0.47725966572761536, "learning_rate": 0.0007180112044817927, "loss": 0.5569, "step": 10175 }, { "epoch": 5.684916201117319, "grad_norm": 0.9071924686431885, "learning_rate": 0.000717983193277311, "loss": 0.4178, "step": 10176 }, { "epoch": 5.685474860335195, "grad_norm": 0.4128570556640625, "learning_rate": 0.0007179551820728292, "loss": 0.4413, "step": 10177 }, { "epoch": 5.686033519553073, "grad_norm": 0.6640491485595703, "learning_rate": 0.0007179271708683474, "loss": 0.4865, "step": 10178 }, { "epoch": 5.686592178770949, "grad_norm": 0.5974729657173157, "learning_rate": 0.0007178991596638655, "loss": 0.5042, "step": 10179 }, { "epoch": 5.687150837988827, "grad_norm": 0.5193836688995361, "learning_rate": 0.0007178711484593837, "loss": 0.5873, "step": 10180 }, { "epoch": 5.687709497206704, "grad_norm": 0.43944063782691956, "learning_rate": 0.000717843137254902, "loss": 0.4738, "step": 10181 }, { "epoch": 5.688268156424581, "grad_norm": 0.4762427508831024, "learning_rate": 0.0007178151260504202, "loss": 0.3782, "step": 10182 }, { "epoch": 5.688826815642458, "grad_norm": 0.6396538019180298, "learning_rate": 0.0007177871148459384, "loss": 0.5364, "step": 10183 }, { "epoch": 5.689385474860336, "grad_norm": 1.5707911252975464, "learning_rate": 0.0007177591036414565, "loss": 0.466, "step": 10184 }, { "epoch": 5.689944134078212, "grad_norm": 0.521094799041748, "learning_rate": 0.0007177310924369747, "loss": 0.4169, "step": 10185 }, { "epoch": 5.69050279329609, "grad_norm": 0.7004362344741821, "learning_rate": 0.000717703081232493, "loss": 0.4224, "step": 10186 }, { "epoch": 5.691061452513966, "grad_norm": 0.5133131146430969, "learning_rate": 0.0007176750700280113, "loss": 0.4152, "step": 10187 }, { "epoch": 5.691620111731844, "grad_norm": 1.3676213026046753, "learning_rate": 0.0007176470588235295, "loss": 0.4561, "step": 10188 }, { "epoch": 5.69217877094972, "grad_norm": 0.6307364106178284, "learning_rate": 0.0007176190476190477, "loss": 0.3587, "step": 10189 }, { "epoch": 5.692737430167598, "grad_norm": 0.4155840277671814, "learning_rate": 0.0007175910364145658, "loss": 0.3527, "step": 10190 }, { "epoch": 5.693296089385475, "grad_norm": 0.3923536241054535, "learning_rate": 0.0007175630252100841, "loss": 0.3642, "step": 10191 }, { "epoch": 5.693854748603352, "grad_norm": 0.47840192914009094, "learning_rate": 0.0007175350140056023, "loss": 0.3695, "step": 10192 }, { "epoch": 5.694413407821229, "grad_norm": 0.44021400809288025, "learning_rate": 0.0007175070028011205, "loss": 0.4623, "step": 10193 }, { "epoch": 5.694972067039107, "grad_norm": 0.620509922504425, "learning_rate": 0.0007174789915966387, "loss": 0.4494, "step": 10194 }, { "epoch": 5.695530726256983, "grad_norm": 0.6490848064422607, "learning_rate": 0.0007174509803921568, "loss": 0.4601, "step": 10195 }, { "epoch": 5.696089385474861, "grad_norm": 0.4066997170448303, "learning_rate": 0.0007174229691876751, "loss": 0.4243, "step": 10196 }, { "epoch": 5.696648044692737, "grad_norm": 0.4810340404510498, "learning_rate": 0.0007173949579831933, "loss": 0.5408, "step": 10197 }, { "epoch": 5.697206703910615, "grad_norm": 2.20889949798584, "learning_rate": 0.0007173669467787115, "loss": 0.4733, "step": 10198 }, { "epoch": 5.697765363128491, "grad_norm": 0.3742657005786896, "learning_rate": 0.0007173389355742297, "loss": 0.3776, "step": 10199 }, { "epoch": 5.698324022346369, "grad_norm": 3.9947080612182617, "learning_rate": 0.0007173109243697478, "loss": 0.5621, "step": 10200 }, { "epoch": 5.698882681564246, "grad_norm": 0.5836235880851746, "learning_rate": 0.0007172829131652661, "loss": 0.4484, "step": 10201 }, { "epoch": 5.699441340782123, "grad_norm": 0.49547767639160156, "learning_rate": 0.0007172549019607843, "loss": 0.4442, "step": 10202 }, { "epoch": 5.7, "grad_norm": 0.4873258173465729, "learning_rate": 0.0007172268907563025, "loss": 0.5619, "step": 10203 }, { "epoch": 5.700558659217877, "grad_norm": 0.465565949678421, "learning_rate": 0.0007171988795518208, "loss": 0.4779, "step": 10204 }, { "epoch": 5.701117318435754, "grad_norm": 0.5477548837661743, "learning_rate": 0.000717170868347339, "loss": 0.5451, "step": 10205 }, { "epoch": 5.701675977653632, "grad_norm": 0.48681530356407166, "learning_rate": 0.0007171428571428572, "loss": 0.5464, "step": 10206 }, { "epoch": 5.702234636871508, "grad_norm": 0.4436895251274109, "learning_rate": 0.0007171148459383754, "loss": 0.3968, "step": 10207 }, { "epoch": 5.702793296089386, "grad_norm": 4.854309558868408, "learning_rate": 0.0007170868347338936, "loss": 0.4396, "step": 10208 }, { "epoch": 5.703351955307262, "grad_norm": 1.396460771560669, "learning_rate": 0.0007170588235294118, "loss": 0.4414, "step": 10209 }, { "epoch": 5.70391061452514, "grad_norm": 0.5090497136116028, "learning_rate": 0.00071703081232493, "loss": 0.3962, "step": 10210 }, { "epoch": 5.704469273743017, "grad_norm": 0.4225035011768341, "learning_rate": 0.0007170028011204482, "loss": 0.527, "step": 10211 }, { "epoch": 5.705027932960894, "grad_norm": 0.40933382511138916, "learning_rate": 0.0007169747899159664, "loss": 0.3944, "step": 10212 }, { "epoch": 5.705586592178771, "grad_norm": 0.38999760150909424, "learning_rate": 0.0007169467787114846, "loss": 0.4669, "step": 10213 }, { "epoch": 5.706145251396648, "grad_norm": 0.6663619875907898, "learning_rate": 0.0007169187675070028, "loss": 0.5686, "step": 10214 }, { "epoch": 5.706703910614525, "grad_norm": 0.38841113448143005, "learning_rate": 0.000716890756302521, "loss": 0.4206, "step": 10215 }, { "epoch": 5.707262569832402, "grad_norm": 0.5985347628593445, "learning_rate": 0.0007168627450980393, "loss": 0.4112, "step": 10216 }, { "epoch": 5.707821229050279, "grad_norm": 0.3735370934009552, "learning_rate": 0.0007168347338935574, "loss": 0.452, "step": 10217 }, { "epoch": 5.708379888268157, "grad_norm": 0.5071160197257996, "learning_rate": 0.0007168067226890756, "loss": 0.3923, "step": 10218 }, { "epoch": 5.708938547486033, "grad_norm": 0.45847848057746887, "learning_rate": 0.0007167787114845938, "loss": 0.4339, "step": 10219 }, { "epoch": 5.709497206703911, "grad_norm": 0.9417409896850586, "learning_rate": 0.000716750700280112, "loss": 0.4745, "step": 10220 }, { "epoch": 5.710055865921788, "grad_norm": 1.1794190406799316, "learning_rate": 0.0007167226890756304, "loss": 0.4921, "step": 10221 }, { "epoch": 5.710614525139665, "grad_norm": 0.4215124845504761, "learning_rate": 0.0007166946778711485, "loss": 0.401, "step": 10222 }, { "epoch": 5.711173184357542, "grad_norm": 0.5434643626213074, "learning_rate": 0.0007166666666666667, "loss": 0.6894, "step": 10223 }, { "epoch": 5.711731843575419, "grad_norm": 0.49288007616996765, "learning_rate": 0.0007166386554621849, "loss": 0.4719, "step": 10224 }, { "epoch": 5.712290502793296, "grad_norm": 13.83658218383789, "learning_rate": 0.0007166106442577031, "loss": 0.5467, "step": 10225 }, { "epoch": 5.712849162011173, "grad_norm": 0.5512245297431946, "learning_rate": 0.0007165826330532214, "loss": 0.3874, "step": 10226 }, { "epoch": 5.71340782122905, "grad_norm": 0.640646755695343, "learning_rate": 0.0007165546218487395, "loss": 0.3851, "step": 10227 }, { "epoch": 5.713966480446928, "grad_norm": 1.979858160018921, "learning_rate": 0.0007165266106442577, "loss": 0.4239, "step": 10228 }, { "epoch": 5.714525139664804, "grad_norm": 0.49379274249076843, "learning_rate": 0.0007164985994397759, "loss": 0.4977, "step": 10229 }, { "epoch": 5.715083798882682, "grad_norm": 0.5241057276725769, "learning_rate": 0.0007164705882352941, "loss": 0.4931, "step": 10230 }, { "epoch": 5.715642458100559, "grad_norm": 0.41967758536338806, "learning_rate": 0.0007164425770308124, "loss": 0.33, "step": 10231 }, { "epoch": 5.716201117318436, "grad_norm": 0.4155007600784302, "learning_rate": 0.0007164145658263306, "loss": 0.4143, "step": 10232 }, { "epoch": 5.716759776536313, "grad_norm": 0.48228001594543457, "learning_rate": 0.0007163865546218487, "loss": 0.4352, "step": 10233 }, { "epoch": 5.71731843575419, "grad_norm": 0.6980542540550232, "learning_rate": 0.0007163585434173669, "loss": 0.4942, "step": 10234 }, { "epoch": 5.717877094972067, "grad_norm": 1.4606268405914307, "learning_rate": 0.0007163305322128851, "loss": 0.6699, "step": 10235 }, { "epoch": 5.718435754189944, "grad_norm": 0.5530686378479004, "learning_rate": 0.0007163025210084035, "loss": 0.4618, "step": 10236 }, { "epoch": 5.718994413407821, "grad_norm": 0.5177992582321167, "learning_rate": 0.0007162745098039217, "loss": 0.4234, "step": 10237 }, { "epoch": 5.719553072625699, "grad_norm": 0.7660933136940002, "learning_rate": 0.0007162464985994398, "loss": 0.4053, "step": 10238 }, { "epoch": 5.720111731843575, "grad_norm": 1.2872079610824585, "learning_rate": 0.000716218487394958, "loss": 0.5579, "step": 10239 }, { "epoch": 5.720670391061453, "grad_norm": 0.3990005552768707, "learning_rate": 0.0007161904761904762, "loss": 0.4447, "step": 10240 }, { "epoch": 5.721229050279329, "grad_norm": 0.5466039180755615, "learning_rate": 0.0007161624649859945, "loss": 0.4488, "step": 10241 }, { "epoch": 5.721787709497207, "grad_norm": 0.533645749092102, "learning_rate": 0.0007161344537815127, "loss": 0.4425, "step": 10242 }, { "epoch": 5.722346368715084, "grad_norm": 1.3025493621826172, "learning_rate": 0.0007161064425770308, "loss": 0.4362, "step": 10243 }, { "epoch": 5.722905027932961, "grad_norm": 0.45794904232025146, "learning_rate": 0.000716078431372549, "loss": 0.5088, "step": 10244 }, { "epoch": 5.723463687150838, "grad_norm": 0.4998818635940552, "learning_rate": 0.0007160504201680672, "loss": 0.3459, "step": 10245 }, { "epoch": 5.724022346368715, "grad_norm": 0.6522479057312012, "learning_rate": 0.0007160224089635855, "loss": 0.3285, "step": 10246 }, { "epoch": 5.724581005586592, "grad_norm": 0.45060428977012634, "learning_rate": 0.0007159943977591037, "loss": 0.4389, "step": 10247 }, { "epoch": 5.72513966480447, "grad_norm": 0.5087399482727051, "learning_rate": 0.0007159663865546219, "loss": 0.4732, "step": 10248 }, { "epoch": 5.725698324022346, "grad_norm": 0.617437481880188, "learning_rate": 0.00071593837535014, "loss": 0.5119, "step": 10249 }, { "epoch": 5.726256983240224, "grad_norm": 0.572313666343689, "learning_rate": 0.0007159103641456582, "loss": 0.4244, "step": 10250 }, { "epoch": 5.7268156424581, "grad_norm": 0.44844773411750793, "learning_rate": 0.0007158823529411765, "loss": 0.4789, "step": 10251 }, { "epoch": 5.727374301675978, "grad_norm": 0.4281400442123413, "learning_rate": 0.0007158543417366947, "loss": 0.4069, "step": 10252 }, { "epoch": 5.727932960893854, "grad_norm": 0.6595506072044373, "learning_rate": 0.000715826330532213, "loss": 0.5112, "step": 10253 }, { "epoch": 5.728491620111732, "grad_norm": 0.48393046855926514, "learning_rate": 0.000715798319327731, "loss": 0.4836, "step": 10254 }, { "epoch": 5.729050279329609, "grad_norm": 0.7495980262756348, "learning_rate": 0.0007157703081232493, "loss": 0.4125, "step": 10255 }, { "epoch": 5.729608938547486, "grad_norm": 0.4771103262901306, "learning_rate": 0.0007157422969187676, "loss": 0.3413, "step": 10256 }, { "epoch": 5.730167597765363, "grad_norm": 0.7307485938072205, "learning_rate": 0.0007157142857142858, "loss": 0.402, "step": 10257 }, { "epoch": 5.730726256983241, "grad_norm": 0.440533846616745, "learning_rate": 0.000715686274509804, "loss": 0.4158, "step": 10258 }, { "epoch": 5.731284916201117, "grad_norm": 0.633655309677124, "learning_rate": 0.0007156582633053221, "loss": 0.5131, "step": 10259 }, { "epoch": 5.731843575418995, "grad_norm": 0.6138968467712402, "learning_rate": 0.0007156302521008403, "loss": 0.4086, "step": 10260 }, { "epoch": 5.732402234636871, "grad_norm": 0.5836642980575562, "learning_rate": 0.0007156022408963585, "loss": 0.5043, "step": 10261 }, { "epoch": 5.732960893854749, "grad_norm": 1.9546562433242798, "learning_rate": 0.0007155742296918768, "loss": 0.4488, "step": 10262 }, { "epoch": 5.733519553072625, "grad_norm": 0.876761794090271, "learning_rate": 0.000715546218487395, "loss": 0.6389, "step": 10263 }, { "epoch": 5.734078212290503, "grad_norm": 0.7043126821517944, "learning_rate": 0.0007155182072829132, "loss": 0.5307, "step": 10264 }, { "epoch": 5.73463687150838, "grad_norm": 0.7099644541740417, "learning_rate": 0.0007154901960784313, "loss": 0.449, "step": 10265 }, { "epoch": 5.735195530726257, "grad_norm": 1.3028637170791626, "learning_rate": 0.0007154621848739495, "loss": 0.546, "step": 10266 }, { "epoch": 5.735754189944134, "grad_norm": 0.4699321389198303, "learning_rate": 0.0007154341736694678, "loss": 0.4753, "step": 10267 }, { "epoch": 5.736312849162011, "grad_norm": 0.6209031939506531, "learning_rate": 0.000715406162464986, "loss": 0.6278, "step": 10268 }, { "epoch": 5.736871508379888, "grad_norm": 0.3387393057346344, "learning_rate": 0.0007153781512605043, "loss": 0.3813, "step": 10269 }, { "epoch": 5.737430167597766, "grad_norm": 1.0591232776641846, "learning_rate": 0.0007153501400560223, "loss": 0.406, "step": 10270 }, { "epoch": 5.737988826815642, "grad_norm": 0.8121812343597412, "learning_rate": 0.0007153221288515406, "loss": 0.5272, "step": 10271 }, { "epoch": 5.73854748603352, "grad_norm": 0.6363793015480042, "learning_rate": 0.0007152941176470589, "loss": 0.4774, "step": 10272 }, { "epoch": 5.739106145251396, "grad_norm": 0.4615616500377655, "learning_rate": 0.0007152661064425771, "loss": 0.3486, "step": 10273 }, { "epoch": 5.739664804469274, "grad_norm": 0.5085633397102356, "learning_rate": 0.0007152380952380953, "loss": 0.5245, "step": 10274 }, { "epoch": 5.740223463687151, "grad_norm": 0.4302261769771576, "learning_rate": 0.0007152100840336134, "loss": 0.4086, "step": 10275 }, { "epoch": 5.740782122905028, "grad_norm": 0.5649064779281616, "learning_rate": 0.0007151820728291316, "loss": 0.5173, "step": 10276 }, { "epoch": 5.741340782122905, "grad_norm": 0.800110936164856, "learning_rate": 0.0007151540616246499, "loss": 0.3889, "step": 10277 }, { "epoch": 5.741899441340782, "grad_norm": 0.5093740820884705, "learning_rate": 0.0007151260504201681, "loss": 0.4682, "step": 10278 }, { "epoch": 5.742458100558659, "grad_norm": 0.66042560338974, "learning_rate": 0.0007150980392156863, "loss": 0.5723, "step": 10279 }, { "epoch": 5.743016759776537, "grad_norm": 0.46901175379753113, "learning_rate": 0.0007150700280112045, "loss": 0.4157, "step": 10280 }, { "epoch": 5.743575418994413, "grad_norm": 0.7348647713661194, "learning_rate": 0.0007150420168067226, "loss": 0.4622, "step": 10281 }, { "epoch": 5.744134078212291, "grad_norm": 0.6721197366714478, "learning_rate": 0.0007150140056022409, "loss": 0.3587, "step": 10282 }, { "epoch": 5.744692737430167, "grad_norm": 0.502974271774292, "learning_rate": 0.0007149859943977591, "loss": 0.3856, "step": 10283 }, { "epoch": 5.745251396648045, "grad_norm": 0.5577751398086548, "learning_rate": 0.0007149579831932773, "loss": 0.5753, "step": 10284 }, { "epoch": 5.745810055865922, "grad_norm": 0.6245629191398621, "learning_rate": 0.0007149299719887955, "loss": 0.4758, "step": 10285 }, { "epoch": 5.746368715083799, "grad_norm": 10.034965515136719, "learning_rate": 0.0007149019607843136, "loss": 0.4458, "step": 10286 }, { "epoch": 5.746927374301676, "grad_norm": 0.38283371925354004, "learning_rate": 0.000714873949579832, "loss": 0.3838, "step": 10287 }, { "epoch": 5.747486033519553, "grad_norm": 0.8203089237213135, "learning_rate": 0.0007148459383753502, "loss": 0.5509, "step": 10288 }, { "epoch": 5.74804469273743, "grad_norm": 0.5299038290977478, "learning_rate": 0.0007148179271708684, "loss": 0.4389, "step": 10289 }, { "epoch": 5.748603351955307, "grad_norm": 0.5275065302848816, "learning_rate": 0.0007147899159663866, "loss": 0.4486, "step": 10290 }, { "epoch": 5.749162011173184, "grad_norm": 0.5049828290939331, "learning_rate": 0.0007147619047619047, "loss": 0.4602, "step": 10291 }, { "epoch": 5.749720670391062, "grad_norm": 0.5503336787223816, "learning_rate": 0.000714733893557423, "loss": 0.4351, "step": 10292 }, { "epoch": 5.750279329608938, "grad_norm": 0.772299587726593, "learning_rate": 0.0007147058823529412, "loss": 0.6089, "step": 10293 }, { "epoch": 5.750837988826816, "grad_norm": 0.4710327684879303, "learning_rate": 0.0007146778711484594, "loss": 0.4321, "step": 10294 }, { "epoch": 5.751396648044693, "grad_norm": 0.648944616317749, "learning_rate": 0.0007146498599439776, "loss": 0.5418, "step": 10295 }, { "epoch": 5.75195530726257, "grad_norm": 0.7221190333366394, "learning_rate": 0.0007146218487394958, "loss": 0.4424, "step": 10296 }, { "epoch": 5.752513966480447, "grad_norm": 1.8884402513504028, "learning_rate": 0.000714593837535014, "loss": 0.5027, "step": 10297 }, { "epoch": 5.753072625698324, "grad_norm": 0.525856614112854, "learning_rate": 0.0007145658263305322, "loss": 0.5015, "step": 10298 }, { "epoch": 5.753631284916201, "grad_norm": 0.48546022176742554, "learning_rate": 0.0007145378151260504, "loss": 0.4952, "step": 10299 }, { "epoch": 5.754189944134078, "grad_norm": 0.5645453333854675, "learning_rate": 0.0007145098039215686, "loss": 0.5335, "step": 10300 }, { "epoch": 5.754748603351955, "grad_norm": 0.5032395124435425, "learning_rate": 0.0007144817927170868, "loss": 0.4258, "step": 10301 }, { "epoch": 5.755307262569833, "grad_norm": 0.47148433327674866, "learning_rate": 0.000714453781512605, "loss": 0.3793, "step": 10302 }, { "epoch": 5.755865921787709, "grad_norm": 0.5534130930900574, "learning_rate": 0.0007144257703081233, "loss": 0.3904, "step": 10303 }, { "epoch": 5.756424581005587, "grad_norm": 0.5768478512763977, "learning_rate": 0.0007143977591036415, "loss": 0.3728, "step": 10304 }, { "epoch": 5.756983240223463, "grad_norm": 0.6742956042289734, "learning_rate": 0.0007143697478991597, "loss": 0.4512, "step": 10305 }, { "epoch": 5.757541899441341, "grad_norm": 0.4233131408691406, "learning_rate": 0.0007143417366946779, "loss": 0.4657, "step": 10306 }, { "epoch": 5.758100558659218, "grad_norm": 0.8073203563690186, "learning_rate": 0.0007143137254901961, "loss": 0.5316, "step": 10307 }, { "epoch": 5.758659217877095, "grad_norm": 0.6402434706687927, "learning_rate": 0.0007142857142857143, "loss": 0.4549, "step": 10308 }, { "epoch": 5.759217877094972, "grad_norm": 1.0459699630737305, "learning_rate": 0.0007142577030812325, "loss": 0.4441, "step": 10309 }, { "epoch": 5.759776536312849, "grad_norm": 0.7883827090263367, "learning_rate": 0.0007142296918767507, "loss": 0.4036, "step": 10310 }, { "epoch": 5.760335195530726, "grad_norm": 0.739895761013031, "learning_rate": 0.0007142016806722689, "loss": 0.4846, "step": 10311 }, { "epoch": 5.760893854748604, "grad_norm": 0.4047786295413971, "learning_rate": 0.0007141736694677872, "loss": 0.4197, "step": 10312 }, { "epoch": 5.76145251396648, "grad_norm": 0.7069640755653381, "learning_rate": 0.0007141456582633053, "loss": 0.4007, "step": 10313 }, { "epoch": 5.762011173184358, "grad_norm": 0.49129244685173035, "learning_rate": 0.0007141176470588235, "loss": 0.5691, "step": 10314 }, { "epoch": 5.762569832402234, "grad_norm": 0.5147718191146851, "learning_rate": 0.0007140896358543417, "loss": 0.4255, "step": 10315 }, { "epoch": 5.763128491620112, "grad_norm": 0.45998597145080566, "learning_rate": 0.0007140616246498599, "loss": 0.3919, "step": 10316 }, { "epoch": 5.763687150837989, "grad_norm": 0.4655545651912689, "learning_rate": 0.0007140336134453782, "loss": 0.3918, "step": 10317 }, { "epoch": 5.764245810055866, "grad_norm": 0.7525028586387634, "learning_rate": 0.0007140056022408963, "loss": 0.6541, "step": 10318 }, { "epoch": 5.764804469273743, "grad_norm": 0.6135449409484863, "learning_rate": 0.0007139775910364146, "loss": 0.4143, "step": 10319 }, { "epoch": 5.76536312849162, "grad_norm": 0.3862467110157013, "learning_rate": 0.0007139495798319328, "loss": 0.4043, "step": 10320 }, { "epoch": 5.765921787709497, "grad_norm": 0.4504019021987915, "learning_rate": 0.000713921568627451, "loss": 0.4101, "step": 10321 }, { "epoch": 5.766480446927375, "grad_norm": 0.5463133454322815, "learning_rate": 0.0007138935574229693, "loss": 0.4725, "step": 10322 }, { "epoch": 5.767039106145251, "grad_norm": 0.8920431137084961, "learning_rate": 0.0007138655462184874, "loss": 0.37, "step": 10323 }, { "epoch": 5.767597765363129, "grad_norm": 0.5116588473320007, "learning_rate": 0.0007138375350140056, "loss": 0.3676, "step": 10324 }, { "epoch": 5.768156424581005, "grad_norm": 0.5672568082809448, "learning_rate": 0.0007138095238095238, "loss": 0.46, "step": 10325 }, { "epoch": 5.768715083798883, "grad_norm": 0.6564370393753052, "learning_rate": 0.000713781512605042, "loss": 0.4016, "step": 10326 }, { "epoch": 5.769273743016759, "grad_norm": 0.5924671292304993, "learning_rate": 0.0007137535014005603, "loss": 0.444, "step": 10327 }, { "epoch": 5.769832402234637, "grad_norm": 0.46051570773124695, "learning_rate": 0.0007137254901960785, "loss": 0.4421, "step": 10328 }, { "epoch": 5.770391061452514, "grad_norm": 0.4963991343975067, "learning_rate": 0.0007136974789915966, "loss": 0.4837, "step": 10329 }, { "epoch": 5.770949720670391, "grad_norm": 1.0286465883255005, "learning_rate": 0.0007136694677871148, "loss": 0.5905, "step": 10330 }, { "epoch": 5.771508379888268, "grad_norm": 1.0372523069381714, "learning_rate": 0.000713641456582633, "loss": 0.566, "step": 10331 }, { "epoch": 5.772067039106146, "grad_norm": 0.7292980551719666, "learning_rate": 0.0007136134453781513, "loss": 0.5482, "step": 10332 }, { "epoch": 5.772625698324022, "grad_norm": 2.2000749111175537, "learning_rate": 0.0007135854341736695, "loss": 0.4456, "step": 10333 }, { "epoch": 5.7731843575419, "grad_norm": 0.9369186162948608, "learning_rate": 0.0007135574229691876, "loss": 0.6232, "step": 10334 }, { "epoch": 5.773743016759776, "grad_norm": 0.5925668478012085, "learning_rate": 0.0007135294117647058, "loss": 0.541, "step": 10335 }, { "epoch": 5.774301675977654, "grad_norm": 0.6764436960220337, "learning_rate": 0.000713501400560224, "loss": 0.4441, "step": 10336 }, { "epoch": 5.77486033519553, "grad_norm": 0.516507625579834, "learning_rate": 0.0007134733893557424, "loss": 0.4232, "step": 10337 }, { "epoch": 5.775418994413408, "grad_norm": 1.0051759481430054, "learning_rate": 0.0007134453781512606, "loss": 0.3122, "step": 10338 }, { "epoch": 5.775977653631285, "grad_norm": 0.986807107925415, "learning_rate": 0.0007134173669467787, "loss": 0.5684, "step": 10339 }, { "epoch": 5.776536312849162, "grad_norm": 0.59564208984375, "learning_rate": 0.0007133893557422969, "loss": 0.4224, "step": 10340 }, { "epoch": 5.777094972067039, "grad_norm": 0.7339884638786316, "learning_rate": 0.0007133613445378151, "loss": 0.5999, "step": 10341 }, { "epoch": 5.777653631284916, "grad_norm": 0.6786849498748779, "learning_rate": 0.0007133333333333334, "loss": 0.4191, "step": 10342 }, { "epoch": 5.778212290502793, "grad_norm": 0.47214388847351074, "learning_rate": 0.0007133053221288516, "loss": 0.4719, "step": 10343 }, { "epoch": 5.778770949720671, "grad_norm": 0.5573878288269043, "learning_rate": 0.0007132773109243698, "loss": 0.3942, "step": 10344 }, { "epoch": 5.779329608938547, "grad_norm": 0.5015531778335571, "learning_rate": 0.0007132492997198879, "loss": 0.3887, "step": 10345 }, { "epoch": 5.779888268156425, "grad_norm": 0.656991183757782, "learning_rate": 0.0007132212885154061, "loss": 0.432, "step": 10346 }, { "epoch": 5.780446927374301, "grad_norm": 0.4074009358882904, "learning_rate": 0.0007131932773109244, "loss": 0.4126, "step": 10347 }, { "epoch": 5.781005586592179, "grad_norm": 0.40070173144340515, "learning_rate": 0.0007131652661064426, "loss": 0.3531, "step": 10348 }, { "epoch": 5.781564245810056, "grad_norm": 1.1118261814117432, "learning_rate": 0.0007131372549019608, "loss": 0.3628, "step": 10349 }, { "epoch": 5.782122905027933, "grad_norm": 0.4356982111930847, "learning_rate": 0.0007131092436974789, "loss": 0.4028, "step": 10350 }, { "epoch": 5.78268156424581, "grad_norm": 0.5115057826042175, "learning_rate": 0.0007130812324929971, "loss": 0.4273, "step": 10351 }, { "epoch": 5.783240223463687, "grad_norm": 0.7075468301773071, "learning_rate": 0.0007130532212885155, "loss": 0.4247, "step": 10352 }, { "epoch": 5.783798882681564, "grad_norm": 0.48956945538520813, "learning_rate": 0.0007130252100840337, "loss": 0.5386, "step": 10353 }, { "epoch": 5.784357541899441, "grad_norm": 0.6290279030799866, "learning_rate": 0.0007129971988795519, "loss": 0.4949, "step": 10354 }, { "epoch": 5.784916201117318, "grad_norm": 0.3998968005180359, "learning_rate": 0.00071296918767507, "loss": 0.4515, "step": 10355 }, { "epoch": 5.785474860335196, "grad_norm": 1.2403266429901123, "learning_rate": 0.0007129411764705882, "loss": 0.4137, "step": 10356 }, { "epoch": 5.786033519553072, "grad_norm": 0.6960744857788086, "learning_rate": 0.0007129131652661065, "loss": 0.4211, "step": 10357 }, { "epoch": 5.78659217877095, "grad_norm": 1.0042225122451782, "learning_rate": 0.0007128851540616247, "loss": 0.4041, "step": 10358 }, { "epoch": 5.787150837988827, "grad_norm": 0.5344101190567017, "learning_rate": 0.0007128571428571429, "loss": 0.3921, "step": 10359 }, { "epoch": 5.787709497206704, "grad_norm": 0.8131825923919678, "learning_rate": 0.0007128291316526611, "loss": 0.3912, "step": 10360 }, { "epoch": 5.788268156424581, "grad_norm": 0.4721587300300598, "learning_rate": 0.0007128011204481792, "loss": 0.5389, "step": 10361 }, { "epoch": 5.788826815642458, "grad_norm": 0.5934752821922302, "learning_rate": 0.0007127731092436975, "loss": 0.4513, "step": 10362 }, { "epoch": 5.789385474860335, "grad_norm": 1.3433643579483032, "learning_rate": 0.0007127450980392157, "loss": 0.4972, "step": 10363 }, { "epoch": 5.789944134078212, "grad_norm": 3.16454815864563, "learning_rate": 0.0007127170868347339, "loss": 0.3907, "step": 10364 }, { "epoch": 5.790502793296089, "grad_norm": 0.5132924318313599, "learning_rate": 0.0007126890756302521, "loss": 0.6453, "step": 10365 }, { "epoch": 5.791061452513967, "grad_norm": 0.6273893117904663, "learning_rate": 0.0007126610644257702, "loss": 0.4369, "step": 10366 }, { "epoch": 5.791620111731843, "grad_norm": 0.5797558426856995, "learning_rate": 0.0007126330532212885, "loss": 0.5125, "step": 10367 }, { "epoch": 5.792178770949721, "grad_norm": 0.5737980604171753, "learning_rate": 0.0007126050420168068, "loss": 0.3799, "step": 10368 }, { "epoch": 5.792737430167598, "grad_norm": 0.613592803478241, "learning_rate": 0.000712577030812325, "loss": 0.51, "step": 10369 }, { "epoch": 5.793296089385475, "grad_norm": 1.2404719591140747, "learning_rate": 0.0007125490196078432, "loss": 0.3916, "step": 10370 }, { "epoch": 5.793854748603352, "grad_norm": 0.8153291940689087, "learning_rate": 0.0007125210084033613, "loss": 0.5167, "step": 10371 }, { "epoch": 5.794413407821229, "grad_norm": 0.45598992705345154, "learning_rate": 0.0007124929971988796, "loss": 0.5011, "step": 10372 }, { "epoch": 5.794972067039106, "grad_norm": 0.844488799571991, "learning_rate": 0.0007124649859943978, "loss": 0.4639, "step": 10373 }, { "epoch": 5.795530726256983, "grad_norm": 0.5530940294265747, "learning_rate": 0.000712436974789916, "loss": 0.5155, "step": 10374 }, { "epoch": 5.79608938547486, "grad_norm": 0.5928524732589722, "learning_rate": 0.0007124089635854342, "loss": 0.386, "step": 10375 }, { "epoch": 5.796648044692738, "grad_norm": 0.4422083795070648, "learning_rate": 0.0007123809523809524, "loss": 0.4759, "step": 10376 }, { "epoch": 5.797206703910614, "grad_norm": 0.41824138164520264, "learning_rate": 0.0007123529411764706, "loss": 0.386, "step": 10377 }, { "epoch": 5.797765363128492, "grad_norm": 1.7007778882980347, "learning_rate": 0.0007123249299719888, "loss": 0.4932, "step": 10378 }, { "epoch": 5.798324022346368, "grad_norm": 0.7457218170166016, "learning_rate": 0.000712296918767507, "loss": 0.4775, "step": 10379 }, { "epoch": 5.798882681564246, "grad_norm": 0.7043110728263855, "learning_rate": 0.0007122689075630252, "loss": 0.4898, "step": 10380 }, { "epoch": 5.799441340782123, "grad_norm": 0.5834572315216064, "learning_rate": 0.0007122408963585434, "loss": 0.5153, "step": 10381 }, { "epoch": 5.8, "grad_norm": 0.771232545375824, "learning_rate": 0.0007122128851540616, "loss": 0.4092, "step": 10382 }, { "epoch": 5.800558659217877, "grad_norm": 0.5131063461303711, "learning_rate": 0.0007121848739495798, "loss": 0.3896, "step": 10383 }, { "epoch": 5.801117318435754, "grad_norm": 0.4704035222530365, "learning_rate": 0.000712156862745098, "loss": 0.3581, "step": 10384 }, { "epoch": 5.801675977653631, "grad_norm": 3.205909013748169, "learning_rate": 0.0007121288515406163, "loss": 0.5036, "step": 10385 }, { "epoch": 5.802234636871509, "grad_norm": 3.2709145545959473, "learning_rate": 0.0007121008403361345, "loss": 0.4595, "step": 10386 }, { "epoch": 5.802793296089385, "grad_norm": 0.7111645936965942, "learning_rate": 0.0007120728291316528, "loss": 0.4382, "step": 10387 }, { "epoch": 5.803351955307263, "grad_norm": 0.6028965711593628, "learning_rate": 0.0007120448179271709, "loss": 0.45, "step": 10388 }, { "epoch": 5.803910614525139, "grad_norm": 0.5565794706344604, "learning_rate": 0.0007120168067226891, "loss": 0.336, "step": 10389 }, { "epoch": 5.804469273743017, "grad_norm": 0.539331316947937, "learning_rate": 0.0007119887955182073, "loss": 0.4887, "step": 10390 }, { "epoch": 5.805027932960893, "grad_norm": 0.5169692039489746, "learning_rate": 0.0007119607843137255, "loss": 0.5495, "step": 10391 }, { "epoch": 5.805586592178771, "grad_norm": 0.4366172254085541, "learning_rate": 0.0007119327731092438, "loss": 0.4383, "step": 10392 }, { "epoch": 5.806145251396648, "grad_norm": 0.9151692986488342, "learning_rate": 0.0007119047619047619, "loss": 0.6503, "step": 10393 }, { "epoch": 5.806703910614525, "grad_norm": 0.6308380365371704, "learning_rate": 0.0007118767507002801, "loss": 0.3807, "step": 10394 }, { "epoch": 5.807262569832402, "grad_norm": 0.6575770974159241, "learning_rate": 0.0007118487394957983, "loss": 0.4787, "step": 10395 }, { "epoch": 5.80782122905028, "grad_norm": 0.6000781059265137, "learning_rate": 0.0007118207282913165, "loss": 0.4419, "step": 10396 }, { "epoch": 5.808379888268156, "grad_norm": 0.6404375433921814, "learning_rate": 0.0007117927170868348, "loss": 0.4397, "step": 10397 }, { "epoch": 5.808938547486034, "grad_norm": 0.4470246732234955, "learning_rate": 0.0007117647058823529, "loss": 0.5114, "step": 10398 }, { "epoch": 5.80949720670391, "grad_norm": 0.6480503082275391, "learning_rate": 0.0007117366946778711, "loss": 0.3989, "step": 10399 }, { "epoch": 5.810055865921788, "grad_norm": 0.5222627520561218, "learning_rate": 0.0007117086834733893, "loss": 0.4436, "step": 10400 }, { "epoch": 5.810614525139664, "grad_norm": 0.4827338755130768, "learning_rate": 0.0007116806722689076, "loss": 0.3718, "step": 10401 }, { "epoch": 5.811173184357542, "grad_norm": 0.5411868095397949, "learning_rate": 0.0007116526610644259, "loss": 0.5172, "step": 10402 }, { "epoch": 5.811731843575419, "grad_norm": 0.4699190855026245, "learning_rate": 0.0007116246498599441, "loss": 0.3949, "step": 10403 }, { "epoch": 5.812290502793296, "grad_norm": 2.8595128059387207, "learning_rate": 0.0007115966386554622, "loss": 0.457, "step": 10404 }, { "epoch": 5.812849162011173, "grad_norm": 0.7540532946586609, "learning_rate": 0.0007115686274509804, "loss": 0.5276, "step": 10405 }, { "epoch": 5.813407821229051, "grad_norm": 0.43818703293800354, "learning_rate": 0.0007115406162464986, "loss": 0.3988, "step": 10406 }, { "epoch": 5.813966480446927, "grad_norm": 0.542113721370697, "learning_rate": 0.0007115126050420169, "loss": 0.4029, "step": 10407 }, { "epoch": 5.814525139664805, "grad_norm": 2.103590488433838, "learning_rate": 0.0007114845938375351, "loss": 0.5161, "step": 10408 }, { "epoch": 5.815083798882681, "grad_norm": 0.6302580237388611, "learning_rate": 0.0007114565826330532, "loss": 0.4646, "step": 10409 }, { "epoch": 5.815642458100559, "grad_norm": 0.5420263409614563, "learning_rate": 0.0007114285714285714, "loss": 0.5032, "step": 10410 }, { "epoch": 5.816201117318435, "grad_norm": 0.42350655794143677, "learning_rate": 0.0007114005602240896, "loss": 0.4667, "step": 10411 }, { "epoch": 5.816759776536313, "grad_norm": 1.8075344562530518, "learning_rate": 0.0007113725490196079, "loss": 0.4886, "step": 10412 }, { "epoch": 5.81731843575419, "grad_norm": 0.5061565637588501, "learning_rate": 0.0007113445378151261, "loss": 0.3747, "step": 10413 }, { "epoch": 5.817877094972067, "grad_norm": 0.7426716089248657, "learning_rate": 0.0007113165266106442, "loss": 0.5273, "step": 10414 }, { "epoch": 5.818435754189944, "grad_norm": 0.5342215299606323, "learning_rate": 0.0007112885154061624, "loss": 0.4341, "step": 10415 }, { "epoch": 5.818994413407821, "grad_norm": 0.5137396454811096, "learning_rate": 0.0007112605042016806, "loss": 0.3784, "step": 10416 }, { "epoch": 5.819553072625698, "grad_norm": 0.5618327260017395, "learning_rate": 0.000711232492997199, "loss": 0.3531, "step": 10417 }, { "epoch": 5.820111731843576, "grad_norm": 0.4717462360858917, "learning_rate": 0.0007112044817927172, "loss": 0.6047, "step": 10418 }, { "epoch": 5.820670391061452, "grad_norm": 0.48858439922332764, "learning_rate": 0.0007111764705882354, "loss": 0.4873, "step": 10419 }, { "epoch": 5.82122905027933, "grad_norm": 0.6286826729774475, "learning_rate": 0.0007111484593837535, "loss": 0.4355, "step": 10420 }, { "epoch": 5.821787709497206, "grad_norm": 0.5934100151062012, "learning_rate": 0.0007111204481792717, "loss": 0.3404, "step": 10421 }, { "epoch": 5.822346368715084, "grad_norm": 0.6859205961227417, "learning_rate": 0.00071109243697479, "loss": 0.5466, "step": 10422 }, { "epoch": 5.822905027932961, "grad_norm": 0.6118091344833374, "learning_rate": 0.0007110644257703082, "loss": 0.4698, "step": 10423 }, { "epoch": 5.823463687150838, "grad_norm": 0.6592184901237488, "learning_rate": 0.0007110364145658264, "loss": 0.4413, "step": 10424 }, { "epoch": 5.824022346368715, "grad_norm": 0.37606433033943176, "learning_rate": 0.0007110084033613445, "loss": 0.4124, "step": 10425 }, { "epoch": 5.824581005586592, "grad_norm": 0.3604859709739685, "learning_rate": 0.0007109803921568627, "loss": 0.4038, "step": 10426 }, { "epoch": 5.825139664804469, "grad_norm": 0.38896092772483826, "learning_rate": 0.000710952380952381, "loss": 0.3804, "step": 10427 }, { "epoch": 5.825698324022346, "grad_norm": 0.6724205017089844, "learning_rate": 0.0007109243697478992, "loss": 0.4783, "step": 10428 }, { "epoch": 5.826256983240223, "grad_norm": 4.228426456451416, "learning_rate": 0.0007108963585434174, "loss": 0.41, "step": 10429 }, { "epoch": 5.826815642458101, "grad_norm": 0.6450451612472534, "learning_rate": 0.0007108683473389355, "loss": 0.5602, "step": 10430 }, { "epoch": 5.827374301675977, "grad_norm": 0.7637776136398315, "learning_rate": 0.0007108403361344537, "loss": 0.5773, "step": 10431 }, { "epoch": 5.827932960893855, "grad_norm": 0.5785620212554932, "learning_rate": 0.000710812324929972, "loss": 0.394, "step": 10432 }, { "epoch": 5.828491620111732, "grad_norm": 0.38797181844711304, "learning_rate": 0.0007107843137254903, "loss": 0.356, "step": 10433 }, { "epoch": 5.829050279329609, "grad_norm": 0.5849924683570862, "learning_rate": 0.0007107563025210085, "loss": 0.4449, "step": 10434 }, { "epoch": 5.829608938547486, "grad_norm": 0.49999329447746277, "learning_rate": 0.0007107282913165267, "loss": 0.5016, "step": 10435 }, { "epoch": 5.830167597765363, "grad_norm": 0.4862354099750519, "learning_rate": 0.0007107002801120448, "loss": 0.4636, "step": 10436 }, { "epoch": 5.83072625698324, "grad_norm": 0.6120949983596802, "learning_rate": 0.0007106722689075631, "loss": 0.479, "step": 10437 }, { "epoch": 5.831284916201117, "grad_norm": 0.4680287837982178, "learning_rate": 0.0007106442577030813, "loss": 0.3783, "step": 10438 }, { "epoch": 5.831843575418994, "grad_norm": 0.4549527168273926, "learning_rate": 0.0007106162464985995, "loss": 0.3926, "step": 10439 }, { "epoch": 5.832402234636872, "grad_norm": 0.5019168257713318, "learning_rate": 0.0007105882352941177, "loss": 0.434, "step": 10440 }, { "epoch": 5.832960893854748, "grad_norm": 0.5315141677856445, "learning_rate": 0.0007105602240896358, "loss": 0.4836, "step": 10441 }, { "epoch": 5.833519553072626, "grad_norm": 0.5233185887336731, "learning_rate": 0.0007105322128851541, "loss": 0.477, "step": 10442 }, { "epoch": 5.834078212290503, "grad_norm": 0.4724504053592682, "learning_rate": 0.0007105042016806723, "loss": 0.3778, "step": 10443 }, { "epoch": 5.83463687150838, "grad_norm": 0.6455507278442383, "learning_rate": 0.0007104761904761905, "loss": 0.554, "step": 10444 }, { "epoch": 5.835195530726257, "grad_norm": 0.41713330149650574, "learning_rate": 0.0007104481792717087, "loss": 0.4065, "step": 10445 }, { "epoch": 5.835754189944134, "grad_norm": 0.6680293083190918, "learning_rate": 0.0007104201680672268, "loss": 0.3554, "step": 10446 }, { "epoch": 5.836312849162011, "grad_norm": 0.4157554507255554, "learning_rate": 0.0007103921568627451, "loss": 0.4236, "step": 10447 }, { "epoch": 5.836871508379888, "grad_norm": 0.7821092009544373, "learning_rate": 0.0007103641456582633, "loss": 0.3547, "step": 10448 }, { "epoch": 5.837430167597765, "grad_norm": 0.4302677512168884, "learning_rate": 0.0007103361344537815, "loss": 0.3936, "step": 10449 }, { "epoch": 5.837988826815643, "grad_norm": 0.6865050792694092, "learning_rate": 0.0007103081232492998, "loss": 0.4549, "step": 10450 }, { "epoch": 5.838547486033519, "grad_norm": 0.5388014912605286, "learning_rate": 0.000710280112044818, "loss": 0.4881, "step": 10451 }, { "epoch": 5.839106145251397, "grad_norm": 0.49156346917152405, "learning_rate": 0.0007102521008403362, "loss": 0.4371, "step": 10452 }, { "epoch": 5.839664804469273, "grad_norm": 0.49882760643959045, "learning_rate": 0.0007102240896358544, "loss": 0.3914, "step": 10453 }, { "epoch": 5.840223463687151, "grad_norm": 0.501832127571106, "learning_rate": 0.0007101960784313726, "loss": 0.5015, "step": 10454 }, { "epoch": 5.840782122905028, "grad_norm": 0.5040813088417053, "learning_rate": 0.0007101680672268908, "loss": 0.4468, "step": 10455 }, { "epoch": 5.841340782122905, "grad_norm": 0.7020991444587708, "learning_rate": 0.000710140056022409, "loss": 0.4642, "step": 10456 }, { "epoch": 5.841899441340782, "grad_norm": 0.7725340127944946, "learning_rate": 0.0007101120448179272, "loss": 0.3988, "step": 10457 }, { "epoch": 5.842458100558659, "grad_norm": 0.5417423844337463, "learning_rate": 0.0007100840336134454, "loss": 0.4571, "step": 10458 }, { "epoch": 5.843016759776536, "grad_norm": 0.7756684422492981, "learning_rate": 0.0007100560224089636, "loss": 0.6887, "step": 10459 }, { "epoch": 5.843575418994414, "grad_norm": 0.873654305934906, "learning_rate": 0.0007100280112044818, "loss": 0.4264, "step": 10460 }, { "epoch": 5.84413407821229, "grad_norm": 1.2273424863815308, "learning_rate": 0.00071, "loss": 0.4112, "step": 10461 }, { "epoch": 5.844692737430168, "grad_norm": 0.868361234664917, "learning_rate": 0.0007099719887955182, "loss": 0.379, "step": 10462 }, { "epoch": 5.845251396648044, "grad_norm": 1.1167317628860474, "learning_rate": 0.0007099439775910364, "loss": 0.4187, "step": 10463 }, { "epoch": 5.845810055865922, "grad_norm": 0.42700713872909546, "learning_rate": 0.0007099159663865546, "loss": 0.3559, "step": 10464 }, { "epoch": 5.846368715083798, "grad_norm": 0.6540542840957642, "learning_rate": 0.0007098879551820728, "loss": 0.4894, "step": 10465 }, { "epoch": 5.846927374301676, "grad_norm": 17.374895095825195, "learning_rate": 0.000709859943977591, "loss": 0.498, "step": 10466 }, { "epoch": 5.847486033519553, "grad_norm": 0.5259714722633362, "learning_rate": 0.0007098319327731094, "loss": 0.5107, "step": 10467 }, { "epoch": 5.84804469273743, "grad_norm": 0.671551525592804, "learning_rate": 0.0007098039215686275, "loss": 0.4188, "step": 10468 }, { "epoch": 5.848603351955307, "grad_norm": 0.7632198929786682, "learning_rate": 0.0007097759103641457, "loss": 0.4265, "step": 10469 }, { "epoch": 5.849162011173185, "grad_norm": 0.6515686511993408, "learning_rate": 0.0007097478991596639, "loss": 0.4556, "step": 10470 }, { "epoch": 5.849720670391061, "grad_norm": 0.4687560498714447, "learning_rate": 0.0007097198879551821, "loss": 0.414, "step": 10471 }, { "epoch": 5.850279329608939, "grad_norm": 0.4172324538230896, "learning_rate": 0.0007096918767507004, "loss": 0.4027, "step": 10472 }, { "epoch": 5.850837988826815, "grad_norm": 0.5436916947364807, "learning_rate": 0.0007096638655462185, "loss": 0.5866, "step": 10473 }, { "epoch": 5.851396648044693, "grad_norm": 0.5017995834350586, "learning_rate": 0.0007096358543417367, "loss": 0.3561, "step": 10474 }, { "epoch": 5.851955307262569, "grad_norm": 1.3895572423934937, "learning_rate": 0.0007096078431372549, "loss": 0.4267, "step": 10475 }, { "epoch": 5.852513966480447, "grad_norm": 0.5709172487258911, "learning_rate": 0.0007095798319327731, "loss": 0.6533, "step": 10476 }, { "epoch": 5.853072625698324, "grad_norm": 3.526177167892456, "learning_rate": 0.0007095518207282914, "loss": 0.4235, "step": 10477 }, { "epoch": 5.853631284916201, "grad_norm": 0.6979643106460571, "learning_rate": 0.0007095238095238095, "loss": 0.5456, "step": 10478 }, { "epoch": 5.854189944134078, "grad_norm": 0.4838812053203583, "learning_rate": 0.0007094957983193277, "loss": 0.4353, "step": 10479 }, { "epoch": 5.854748603351956, "grad_norm": 0.48904502391815186, "learning_rate": 0.0007094677871148459, "loss": 0.4469, "step": 10480 }, { "epoch": 5.855307262569832, "grad_norm": 0.5256216526031494, "learning_rate": 0.0007094397759103641, "loss": 0.4255, "step": 10481 }, { "epoch": 5.85586592178771, "grad_norm": 1.4294583797454834, "learning_rate": 0.0007094117647058825, "loss": 0.6436, "step": 10482 }, { "epoch": 5.856424581005586, "grad_norm": 0.6884363293647766, "learning_rate": 0.0007093837535014007, "loss": 0.5158, "step": 10483 }, { "epoch": 5.856983240223464, "grad_norm": 0.3635435402393341, "learning_rate": 0.0007093557422969188, "loss": 0.3553, "step": 10484 }, { "epoch": 5.85754189944134, "grad_norm": 0.49657881259918213, "learning_rate": 0.000709327731092437, "loss": 0.4269, "step": 10485 }, { "epoch": 5.858100558659218, "grad_norm": 0.5020352005958557, "learning_rate": 0.0007092997198879552, "loss": 0.4535, "step": 10486 }, { "epoch": 5.858659217877095, "grad_norm": 0.8311812877655029, "learning_rate": 0.0007092717086834734, "loss": 0.6703, "step": 10487 }, { "epoch": 5.859217877094972, "grad_norm": 1.1023448705673218, "learning_rate": 0.0007092436974789917, "loss": 0.4366, "step": 10488 }, { "epoch": 5.859776536312849, "grad_norm": 0.5711884498596191, "learning_rate": 0.0007092156862745098, "loss": 0.6715, "step": 10489 }, { "epoch": 5.860335195530726, "grad_norm": 0.5218866467475891, "learning_rate": 0.000709187675070028, "loss": 0.4071, "step": 10490 }, { "epoch": 5.860893854748603, "grad_norm": 0.5009090304374695, "learning_rate": 0.0007091596638655462, "loss": 0.3999, "step": 10491 }, { "epoch": 5.861452513966481, "grad_norm": 0.5133971571922302, "learning_rate": 0.0007091316526610644, "loss": 0.4329, "step": 10492 }, { "epoch": 5.862011173184357, "grad_norm": 0.48217669129371643, "learning_rate": 0.0007091036414565827, "loss": 0.5211, "step": 10493 }, { "epoch": 5.862569832402235, "grad_norm": 0.7686821222305298, "learning_rate": 0.0007090756302521008, "loss": 0.4309, "step": 10494 }, { "epoch": 5.863128491620111, "grad_norm": 1.671038031578064, "learning_rate": 0.000709047619047619, "loss": 0.5433, "step": 10495 }, { "epoch": 5.863687150837989, "grad_norm": 0.4058610200881958, "learning_rate": 0.0007090196078431372, "loss": 0.3102, "step": 10496 }, { "epoch": 5.864245810055866, "grad_norm": 0.5447606444358826, "learning_rate": 0.0007089915966386554, "loss": 0.4792, "step": 10497 }, { "epoch": 5.864804469273743, "grad_norm": 0.4325959086418152, "learning_rate": 0.0007089635854341737, "loss": 0.4216, "step": 10498 }, { "epoch": 5.86536312849162, "grad_norm": 0.34113359451293945, "learning_rate": 0.000708935574229692, "loss": 0.404, "step": 10499 }, { "epoch": 5.865921787709497, "grad_norm": 0.44729116559028625, "learning_rate": 0.00070890756302521, "loss": 0.4309, "step": 10500 }, { "epoch": 5.865921787709497, "eval_cer": 0.09162820638713408, "eval_loss": 0.34694910049438477, "eval_runtime": 55.6054, "eval_samples_per_second": 81.611, "eval_steps_per_second": 5.107, "eval_wer": 0.36197312924219993, "step": 10500 }, { "epoch": 5.866480446927374, "grad_norm": 0.9637112021446228, "learning_rate": 0.0007088795518207283, "loss": 0.4681, "step": 10501 }, { "epoch": 5.867039106145251, "grad_norm": 0.5004937648773193, "learning_rate": 0.0007088515406162465, "loss": 0.4719, "step": 10502 }, { "epoch": 5.867597765363128, "grad_norm": 0.4892960786819458, "learning_rate": 0.0007088235294117648, "loss": 0.435, "step": 10503 }, { "epoch": 5.868156424581006, "grad_norm": 0.587005078792572, "learning_rate": 0.000708795518207283, "loss": 0.4385, "step": 10504 }, { "epoch": 5.868715083798882, "grad_norm": 1.0113680362701416, "learning_rate": 0.0007087675070028011, "loss": 0.4313, "step": 10505 }, { "epoch": 5.86927374301676, "grad_norm": 0.43034034967422485, "learning_rate": 0.0007087394957983193, "loss": 0.3942, "step": 10506 }, { "epoch": 5.869832402234637, "grad_norm": 0.45791736245155334, "learning_rate": 0.0007087114845938375, "loss": 0.4069, "step": 10507 }, { "epoch": 5.870391061452514, "grad_norm": 0.363300085067749, "learning_rate": 0.0007086834733893558, "loss": 0.4497, "step": 10508 }, { "epoch": 5.870949720670391, "grad_norm": 0.5509997606277466, "learning_rate": 0.000708655462184874, "loss": 0.5023, "step": 10509 }, { "epoch": 5.871508379888268, "grad_norm": 0.6267831921577454, "learning_rate": 0.0007086274509803921, "loss": 0.4743, "step": 10510 }, { "epoch": 5.872067039106145, "grad_norm": 1.38927161693573, "learning_rate": 0.0007085994397759103, "loss": 0.4181, "step": 10511 }, { "epoch": 5.872625698324022, "grad_norm": 0.6998752951622009, "learning_rate": 0.0007085714285714285, "loss": 0.5421, "step": 10512 }, { "epoch": 5.873184357541899, "grad_norm": 0.4201306700706482, "learning_rate": 0.0007085434173669468, "loss": 0.4872, "step": 10513 }, { "epoch": 5.873743016759777, "grad_norm": 0.6429771184921265, "learning_rate": 0.000708515406162465, "loss": 0.4769, "step": 10514 }, { "epoch": 5.874301675977653, "grad_norm": 0.5104084014892578, "learning_rate": 0.0007084873949579833, "loss": 0.4284, "step": 10515 }, { "epoch": 5.874860335195531, "grad_norm": 0.5287335515022278, "learning_rate": 0.0007084593837535013, "loss": 0.4826, "step": 10516 }, { "epoch": 5.875418994413408, "grad_norm": 0.4161457121372223, "learning_rate": 0.0007084313725490196, "loss": 0.3787, "step": 10517 }, { "epoch": 5.875977653631285, "grad_norm": 0.4690793454647064, "learning_rate": 0.0007084033613445379, "loss": 0.4553, "step": 10518 }, { "epoch": 5.876536312849162, "grad_norm": 0.6330850720405579, "learning_rate": 0.0007083753501400561, "loss": 0.4217, "step": 10519 }, { "epoch": 5.877094972067039, "grad_norm": 0.72749263048172, "learning_rate": 0.0007083473389355743, "loss": 0.3944, "step": 10520 }, { "epoch": 5.877653631284916, "grad_norm": 0.5926832556724548, "learning_rate": 0.0007083193277310924, "loss": 0.4276, "step": 10521 }, { "epoch": 5.878212290502793, "grad_norm": 0.9759839773178101, "learning_rate": 0.0007082913165266106, "loss": 0.5273, "step": 10522 }, { "epoch": 5.87877094972067, "grad_norm": 0.5710582733154297, "learning_rate": 0.0007082633053221289, "loss": 0.4441, "step": 10523 }, { "epoch": 5.879329608938548, "grad_norm": 0.5046635270118713, "learning_rate": 0.0007082352941176471, "loss": 0.3817, "step": 10524 }, { "epoch": 5.879888268156424, "grad_norm": 0.6082225441932678, "learning_rate": 0.0007082072829131653, "loss": 0.4131, "step": 10525 }, { "epoch": 5.880446927374302, "grad_norm": 0.6650238037109375, "learning_rate": 0.0007081792717086834, "loss": 0.4509, "step": 10526 }, { "epoch": 5.881005586592178, "grad_norm": 0.7564213275909424, "learning_rate": 0.0007081512605042016, "loss": 0.4422, "step": 10527 }, { "epoch": 5.881564245810056, "grad_norm": 0.376693993806839, "learning_rate": 0.0007081232492997199, "loss": 0.4314, "step": 10528 }, { "epoch": 5.882122905027933, "grad_norm": 0.40023693442344666, "learning_rate": 0.0007080952380952381, "loss": 0.4076, "step": 10529 }, { "epoch": 5.88268156424581, "grad_norm": 0.9997773766517639, "learning_rate": 0.0007080672268907563, "loss": 0.4007, "step": 10530 }, { "epoch": 5.883240223463687, "grad_norm": 0.4893626570701599, "learning_rate": 0.0007080392156862745, "loss": 0.3829, "step": 10531 }, { "epoch": 5.883798882681564, "grad_norm": 8.082022666931152, "learning_rate": 0.0007080112044817926, "loss": 0.3434, "step": 10532 }, { "epoch": 5.884357541899441, "grad_norm": 0.5004198551177979, "learning_rate": 0.000707983193277311, "loss": 0.4379, "step": 10533 }, { "epoch": 5.884916201117319, "grad_norm": 0.3681134283542633, "learning_rate": 0.0007079551820728292, "loss": 0.3594, "step": 10534 }, { "epoch": 5.885474860335195, "grad_norm": 0.45535004138946533, "learning_rate": 0.0007079271708683474, "loss": 0.4661, "step": 10535 }, { "epoch": 5.886033519553073, "grad_norm": 0.6571336388587952, "learning_rate": 0.0007078991596638656, "loss": 0.4049, "step": 10536 }, { "epoch": 5.886592178770949, "grad_norm": 0.638870894908905, "learning_rate": 0.0007078711484593837, "loss": 0.4013, "step": 10537 }, { "epoch": 5.887150837988827, "grad_norm": 0.6915528774261475, "learning_rate": 0.000707843137254902, "loss": 0.3957, "step": 10538 }, { "epoch": 5.8877094972067034, "grad_norm": 0.6207171678543091, "learning_rate": 0.0007078151260504202, "loss": 0.4515, "step": 10539 }, { "epoch": 5.888268156424581, "grad_norm": 0.5034327507019043, "learning_rate": 0.0007077871148459384, "loss": 0.398, "step": 10540 }, { "epoch": 5.888826815642458, "grad_norm": 0.39464282989501953, "learning_rate": 0.0007077591036414566, "loss": 0.3175, "step": 10541 }, { "epoch": 5.889385474860335, "grad_norm": 0.6110559105873108, "learning_rate": 0.0007077310924369747, "loss": 0.487, "step": 10542 }, { "epoch": 5.889944134078212, "grad_norm": 0.48830339312553406, "learning_rate": 0.000707703081232493, "loss": 0.3654, "step": 10543 }, { "epoch": 5.89050279329609, "grad_norm": 0.444409042596817, "learning_rate": 0.0007076750700280112, "loss": 0.4212, "step": 10544 }, { "epoch": 5.891061452513966, "grad_norm": 0.5622066259384155, "learning_rate": 0.0007076470588235294, "loss": 0.3945, "step": 10545 }, { "epoch": 5.891620111731844, "grad_norm": 0.4948543608188629, "learning_rate": 0.0007076190476190476, "loss": 0.5903, "step": 10546 }, { "epoch": 5.89217877094972, "grad_norm": 1.0931496620178223, "learning_rate": 0.0007075910364145658, "loss": 0.4833, "step": 10547 }, { "epoch": 5.892737430167598, "grad_norm": 0.5254516005516052, "learning_rate": 0.000707563025210084, "loss": 0.4919, "step": 10548 }, { "epoch": 5.8932960893854744, "grad_norm": 0.6026060581207275, "learning_rate": 0.0007075350140056023, "loss": 0.5276, "step": 10549 }, { "epoch": 5.893854748603352, "grad_norm": 0.47187286615371704, "learning_rate": 0.0007075070028011205, "loss": 0.4777, "step": 10550 }, { "epoch": 5.894413407821229, "grad_norm": 0.5213013291358948, "learning_rate": 0.0007074789915966387, "loss": 0.5804, "step": 10551 }, { "epoch": 5.894972067039106, "grad_norm": 1.7274495363235474, "learning_rate": 0.0007074509803921569, "loss": 0.466, "step": 10552 }, { "epoch": 5.895530726256983, "grad_norm": 0.48574063181877136, "learning_rate": 0.0007074229691876751, "loss": 0.4384, "step": 10553 }, { "epoch": 5.896089385474861, "grad_norm": 0.6204369068145752, "learning_rate": 0.0007073949579831933, "loss": 0.5534, "step": 10554 }, { "epoch": 5.896648044692737, "grad_norm": 0.5332340598106384, "learning_rate": 0.0007073669467787115, "loss": 0.4587, "step": 10555 }, { "epoch": 5.897206703910615, "grad_norm": 1.092002272605896, "learning_rate": 0.0007073389355742297, "loss": 0.4286, "step": 10556 }, { "epoch": 5.897765363128491, "grad_norm": 0.4838496744632721, "learning_rate": 0.0007073109243697479, "loss": 0.4651, "step": 10557 }, { "epoch": 5.898324022346369, "grad_norm": 0.49133217334747314, "learning_rate": 0.0007072829131652661, "loss": 0.4712, "step": 10558 }, { "epoch": 5.8988826815642454, "grad_norm": 0.8116902709007263, "learning_rate": 0.0007072549019607843, "loss": 0.5069, "step": 10559 }, { "epoch": 5.899441340782123, "grad_norm": 0.37905728816986084, "learning_rate": 0.0007072268907563025, "loss": 0.359, "step": 10560 }, { "epoch": 5.9, "grad_norm": 0.8048175573348999, "learning_rate": 0.0007071988795518207, "loss": 0.5839, "step": 10561 }, { "epoch": 5.900558659217877, "grad_norm": 0.6506947875022888, "learning_rate": 0.0007071708683473389, "loss": 0.4196, "step": 10562 }, { "epoch": 5.901117318435754, "grad_norm": 0.6302304267883301, "learning_rate": 0.0007071428571428572, "loss": 0.5325, "step": 10563 }, { "epoch": 5.901675977653631, "grad_norm": 0.44592854380607605, "learning_rate": 0.0007071148459383753, "loss": 0.3849, "step": 10564 }, { "epoch": 5.902234636871508, "grad_norm": 1.670932650566101, "learning_rate": 0.0007070868347338936, "loss": 0.3905, "step": 10565 }, { "epoch": 5.902793296089386, "grad_norm": 0.6100943088531494, "learning_rate": 0.0007070588235294118, "loss": 0.4002, "step": 10566 }, { "epoch": 5.903351955307262, "grad_norm": 0.5980740189552307, "learning_rate": 0.00070703081232493, "loss": 0.4991, "step": 10567 }, { "epoch": 5.90391061452514, "grad_norm": 1.6025962829589844, "learning_rate": 0.0007070028011204483, "loss": 0.4177, "step": 10568 }, { "epoch": 5.9044692737430164, "grad_norm": 0.9667633771896362, "learning_rate": 0.0007069747899159664, "loss": 0.5212, "step": 10569 }, { "epoch": 5.905027932960894, "grad_norm": 0.6661363244056702, "learning_rate": 0.0007069467787114846, "loss": 0.3631, "step": 10570 }, { "epoch": 5.905586592178771, "grad_norm": 0.4226272404193878, "learning_rate": 0.0007069187675070028, "loss": 0.4953, "step": 10571 }, { "epoch": 5.906145251396648, "grad_norm": 0.6326567530632019, "learning_rate": 0.000706890756302521, "loss": 0.4929, "step": 10572 }, { "epoch": 5.906703910614525, "grad_norm": 0.6285231709480286, "learning_rate": 0.0007068627450980393, "loss": 0.4136, "step": 10573 }, { "epoch": 5.907262569832402, "grad_norm": 0.5302049517631531, "learning_rate": 0.0007068347338935574, "loss": 0.4279, "step": 10574 }, { "epoch": 5.907821229050279, "grad_norm": 1.0716586112976074, "learning_rate": 0.0007068067226890756, "loss": 0.6027, "step": 10575 }, { "epoch": 5.908379888268156, "grad_norm": 0.6912524700164795, "learning_rate": 0.0007067787114845938, "loss": 0.6106, "step": 10576 }, { "epoch": 5.908938547486033, "grad_norm": 0.7536414861679077, "learning_rate": 0.000706750700280112, "loss": 0.361, "step": 10577 }, { "epoch": 5.909497206703911, "grad_norm": 0.4771345853805542, "learning_rate": 0.0007067226890756303, "loss": 0.4497, "step": 10578 }, { "epoch": 5.910055865921787, "grad_norm": 0.4545881450176239, "learning_rate": 0.0007066946778711485, "loss": 0.3132, "step": 10579 }, { "epoch": 5.910614525139665, "grad_norm": 0.6520172953605652, "learning_rate": 0.0007066666666666666, "loss": 0.7032, "step": 10580 }, { "epoch": 5.911173184357542, "grad_norm": 1.0696566104888916, "learning_rate": 0.0007066386554621848, "loss": 0.4989, "step": 10581 }, { "epoch": 5.911731843575419, "grad_norm": 0.4830806851387024, "learning_rate": 0.000706610644257703, "loss": 0.4242, "step": 10582 }, { "epoch": 5.912290502793296, "grad_norm": 0.6830497980117798, "learning_rate": 0.0007065826330532214, "loss": 0.5273, "step": 10583 }, { "epoch": 5.912849162011173, "grad_norm": 0.517987072467804, "learning_rate": 0.0007065546218487396, "loss": 0.362, "step": 10584 }, { "epoch": 5.91340782122905, "grad_norm": 0.7807379364967346, "learning_rate": 0.0007065266106442577, "loss": 0.4447, "step": 10585 }, { "epoch": 5.913966480446927, "grad_norm": 0.6242043375968933, "learning_rate": 0.0007064985994397759, "loss": 0.3775, "step": 10586 }, { "epoch": 5.914525139664804, "grad_norm": 2.115849256515503, "learning_rate": 0.0007064705882352941, "loss": 0.4227, "step": 10587 }, { "epoch": 5.915083798882682, "grad_norm": 0.45200830698013306, "learning_rate": 0.0007064425770308124, "loss": 0.404, "step": 10588 }, { "epoch": 5.915642458100558, "grad_norm": 0.6316574215888977, "learning_rate": 0.0007064145658263306, "loss": 0.5078, "step": 10589 }, { "epoch": 5.916201117318436, "grad_norm": 0.7201128602027893, "learning_rate": 0.0007063865546218487, "loss": 0.4984, "step": 10590 }, { "epoch": 5.9167597765363125, "grad_norm": 0.49354565143585205, "learning_rate": 0.0007063585434173669, "loss": 0.3863, "step": 10591 }, { "epoch": 5.91731843575419, "grad_norm": 0.7998040318489075, "learning_rate": 0.0007063305322128851, "loss": 0.5084, "step": 10592 }, { "epoch": 5.917877094972067, "grad_norm": 0.5736560225486755, "learning_rate": 0.0007063025210084034, "loss": 0.4712, "step": 10593 }, { "epoch": 5.918435754189944, "grad_norm": 0.630725622177124, "learning_rate": 0.0007062745098039216, "loss": 0.4383, "step": 10594 }, { "epoch": 5.918994413407821, "grad_norm": 0.42156875133514404, "learning_rate": 0.0007062464985994398, "loss": 0.5056, "step": 10595 }, { "epoch": 5.919553072625698, "grad_norm": 0.5405137538909912, "learning_rate": 0.0007062184873949579, "loss": 0.4783, "step": 10596 }, { "epoch": 5.920111731843575, "grad_norm": 0.6225018501281738, "learning_rate": 0.0007061904761904761, "loss": 0.4109, "step": 10597 }, { "epoch": 5.920670391061453, "grad_norm": 1.165069818496704, "learning_rate": 0.0007061624649859945, "loss": 0.5023, "step": 10598 }, { "epoch": 5.921229050279329, "grad_norm": 0.5271239876747131, "learning_rate": 0.0007061344537815127, "loss": 0.4741, "step": 10599 }, { "epoch": 5.921787709497207, "grad_norm": 0.4791272282600403, "learning_rate": 0.0007061064425770309, "loss": 0.4346, "step": 10600 }, { "epoch": 5.9223463687150835, "grad_norm": 0.6669300198554993, "learning_rate": 0.000706078431372549, "loss": 0.3778, "step": 10601 }, { "epoch": 5.922905027932961, "grad_norm": 0.601402997970581, "learning_rate": 0.0007060504201680672, "loss": 0.4113, "step": 10602 }, { "epoch": 5.923463687150838, "grad_norm": 0.8014582395553589, "learning_rate": 0.0007060224089635855, "loss": 0.4926, "step": 10603 }, { "epoch": 5.924022346368715, "grad_norm": 0.628642737865448, "learning_rate": 0.0007059943977591037, "loss": 0.467, "step": 10604 }, { "epoch": 5.924581005586592, "grad_norm": 0.7804936766624451, "learning_rate": 0.0007059663865546219, "loss": 0.4549, "step": 10605 }, { "epoch": 5.925139664804469, "grad_norm": 1.0483605861663818, "learning_rate": 0.00070593837535014, "loss": 0.445, "step": 10606 }, { "epoch": 5.925698324022346, "grad_norm": 0.5775977373123169, "learning_rate": 0.0007059103641456582, "loss": 0.6146, "step": 10607 }, { "epoch": 5.926256983240224, "grad_norm": 0.8711464405059814, "learning_rate": 0.0007058823529411765, "loss": 0.5122, "step": 10608 }, { "epoch": 5.9268156424581, "grad_norm": 0.4983471632003784, "learning_rate": 0.0007058543417366947, "loss": 0.4494, "step": 10609 }, { "epoch": 5.927374301675978, "grad_norm": 0.47444915771484375, "learning_rate": 0.0007058263305322129, "loss": 0.4098, "step": 10610 }, { "epoch": 5.9279329608938545, "grad_norm": 0.5697492361068726, "learning_rate": 0.0007057983193277311, "loss": 0.4544, "step": 10611 }, { "epoch": 5.928491620111732, "grad_norm": 0.6584872603416443, "learning_rate": 0.0007057703081232492, "loss": 0.5686, "step": 10612 }, { "epoch": 5.9290502793296085, "grad_norm": 0.7022781372070312, "learning_rate": 0.0007057422969187675, "loss": 0.4933, "step": 10613 }, { "epoch": 5.929608938547486, "grad_norm": 0.5768987536430359, "learning_rate": 0.0007057142857142858, "loss": 0.4003, "step": 10614 }, { "epoch": 5.930167597765363, "grad_norm": 0.5249398946762085, "learning_rate": 0.000705686274509804, "loss": 0.4013, "step": 10615 }, { "epoch": 5.93072625698324, "grad_norm": 0.5577698349952698, "learning_rate": 0.0007056582633053222, "loss": 0.3886, "step": 10616 }, { "epoch": 5.931284916201117, "grad_norm": 0.5374637246131897, "learning_rate": 0.0007056302521008403, "loss": 0.4343, "step": 10617 }, { "epoch": 5.931843575418995, "grad_norm": 0.8629102110862732, "learning_rate": 0.0007056022408963586, "loss": 0.3971, "step": 10618 }, { "epoch": 5.932402234636871, "grad_norm": 0.5537786483764648, "learning_rate": 0.0007055742296918768, "loss": 0.407, "step": 10619 }, { "epoch": 5.932960893854749, "grad_norm": 0.6504737734794617, "learning_rate": 0.000705546218487395, "loss": 0.3978, "step": 10620 }, { "epoch": 5.9335195530726255, "grad_norm": 0.4973798990249634, "learning_rate": 0.0007055182072829132, "loss": 0.3837, "step": 10621 }, { "epoch": 5.934078212290503, "grad_norm": 0.3515792787075043, "learning_rate": 0.0007054901960784313, "loss": 0.4248, "step": 10622 }, { "epoch": 5.9346368715083795, "grad_norm": 0.5916847586631775, "learning_rate": 0.0007054621848739496, "loss": 0.5761, "step": 10623 }, { "epoch": 5.935195530726257, "grad_norm": 0.3694547414779663, "learning_rate": 0.0007054341736694678, "loss": 0.4321, "step": 10624 }, { "epoch": 5.935754189944134, "grad_norm": 0.6989243626594543, "learning_rate": 0.000705406162464986, "loss": 0.5094, "step": 10625 }, { "epoch": 5.936312849162011, "grad_norm": 1.0088986158370972, "learning_rate": 0.0007053781512605042, "loss": 0.508, "step": 10626 }, { "epoch": 5.936871508379888, "grad_norm": 0.8861255645751953, "learning_rate": 0.0007053501400560224, "loss": 0.5528, "step": 10627 }, { "epoch": 5.937430167597765, "grad_norm": 0.5517739057540894, "learning_rate": 0.0007053221288515406, "loss": 0.5115, "step": 10628 }, { "epoch": 5.937988826815642, "grad_norm": 1.1692125797271729, "learning_rate": 0.0007052941176470588, "loss": 0.583, "step": 10629 }, { "epoch": 5.93854748603352, "grad_norm": 0.6481218338012695, "learning_rate": 0.000705266106442577, "loss": 0.3649, "step": 10630 }, { "epoch": 5.9391061452513965, "grad_norm": 0.5833912491798401, "learning_rate": 0.0007052380952380953, "loss": 0.385, "step": 10631 }, { "epoch": 5.939664804469274, "grad_norm": 0.689988374710083, "learning_rate": 0.0007052100840336135, "loss": 0.4179, "step": 10632 }, { "epoch": 5.9402234636871505, "grad_norm": 0.7670750617980957, "learning_rate": 0.0007051820728291317, "loss": 0.4557, "step": 10633 }, { "epoch": 5.940782122905028, "grad_norm": 2.537328004837036, "learning_rate": 0.0007051540616246499, "loss": 0.4419, "step": 10634 }, { "epoch": 5.941340782122905, "grad_norm": 0.44420579075813293, "learning_rate": 0.0007051260504201681, "loss": 0.4102, "step": 10635 }, { "epoch": 5.941899441340782, "grad_norm": 0.7864313125610352, "learning_rate": 0.0007050980392156863, "loss": 0.404, "step": 10636 }, { "epoch": 5.942458100558659, "grad_norm": 1.1022429466247559, "learning_rate": 0.0007050700280112045, "loss": 0.6072, "step": 10637 }, { "epoch": 5.943016759776536, "grad_norm": 0.7302351593971252, "learning_rate": 0.0007050420168067228, "loss": 0.5018, "step": 10638 }, { "epoch": 5.943575418994413, "grad_norm": 0.415154367685318, "learning_rate": 0.0007050140056022409, "loss": 0.4589, "step": 10639 }, { "epoch": 5.94413407821229, "grad_norm": 0.8116351962089539, "learning_rate": 0.0007049859943977591, "loss": 0.4651, "step": 10640 }, { "epoch": 5.9446927374301675, "grad_norm": 1.8977586030960083, "learning_rate": 0.0007049579831932773, "loss": 0.4032, "step": 10641 }, { "epoch": 5.945251396648045, "grad_norm": 0.49679845571517944, "learning_rate": 0.0007049299719887955, "loss": 0.3642, "step": 10642 }, { "epoch": 5.9458100558659215, "grad_norm": 0.7046087384223938, "learning_rate": 0.0007049019607843138, "loss": 0.4383, "step": 10643 }, { "epoch": 5.946368715083799, "grad_norm": 0.4634682834148407, "learning_rate": 0.0007048739495798319, "loss": 0.4783, "step": 10644 }, { "epoch": 5.946927374301676, "grad_norm": 0.5060982704162598, "learning_rate": 0.0007048459383753501, "loss": 0.4279, "step": 10645 }, { "epoch": 5.947486033519553, "grad_norm": 1.9865999221801758, "learning_rate": 0.0007048179271708683, "loss": 0.4345, "step": 10646 }, { "epoch": 5.94804469273743, "grad_norm": 0.7619455456733704, "learning_rate": 0.0007047899159663866, "loss": 0.4927, "step": 10647 }, { "epoch": 5.948603351955307, "grad_norm": 0.5044295191764832, "learning_rate": 0.0007047619047619049, "loss": 0.4822, "step": 10648 }, { "epoch": 5.949162011173184, "grad_norm": 1.1890429258346558, "learning_rate": 0.000704733893557423, "loss": 0.4739, "step": 10649 }, { "epoch": 5.949720670391061, "grad_norm": 0.47186458110809326, "learning_rate": 0.0007047058823529412, "loss": 0.4479, "step": 10650 }, { "epoch": 5.9502793296089385, "grad_norm": 0.4976537525653839, "learning_rate": 0.0007046778711484594, "loss": 0.3963, "step": 10651 }, { "epoch": 5.950837988826816, "grad_norm": 0.3979592025279999, "learning_rate": 0.0007046498599439776, "loss": 0.4367, "step": 10652 }, { "epoch": 5.9513966480446925, "grad_norm": 1.8060015439987183, "learning_rate": 0.0007046218487394959, "loss": 0.4222, "step": 10653 }, { "epoch": 5.95195530726257, "grad_norm": 0.5210453867912292, "learning_rate": 0.0007045938375350141, "loss": 0.5088, "step": 10654 }, { "epoch": 5.952513966480447, "grad_norm": 0.6186861395835876, "learning_rate": 0.0007045658263305322, "loss": 0.5537, "step": 10655 }, { "epoch": 5.953072625698324, "grad_norm": 0.5123553276062012, "learning_rate": 0.0007045378151260504, "loss": 0.45, "step": 10656 }, { "epoch": 5.953631284916201, "grad_norm": 0.5243229269981384, "learning_rate": 0.0007045098039215686, "loss": 0.4015, "step": 10657 }, { "epoch": 5.954189944134078, "grad_norm": 0.47566747665405273, "learning_rate": 0.0007044817927170869, "loss": 0.4194, "step": 10658 }, { "epoch": 5.954748603351955, "grad_norm": 1.1357111930847168, "learning_rate": 0.0007044537815126051, "loss": 0.4013, "step": 10659 }, { "epoch": 5.955307262569832, "grad_norm": 0.4731360971927643, "learning_rate": 0.0007044257703081232, "loss": 0.492, "step": 10660 }, { "epoch": 5.9558659217877095, "grad_norm": 0.6241510510444641, "learning_rate": 0.0007043977591036414, "loss": 0.3763, "step": 10661 }, { "epoch": 5.956424581005587, "grad_norm": 0.4301688075065613, "learning_rate": 0.0007043697478991596, "loss": 0.2866, "step": 10662 }, { "epoch": 5.9569832402234635, "grad_norm": 0.5356236100196838, "learning_rate": 0.000704341736694678, "loss": 0.4482, "step": 10663 }, { "epoch": 5.957541899441341, "grad_norm": 0.5268016457557678, "learning_rate": 0.0007043137254901962, "loss": 0.4882, "step": 10664 }, { "epoch": 5.9581005586592175, "grad_norm": 0.5201060771942139, "learning_rate": 0.0007042857142857143, "loss": 0.578, "step": 10665 }, { "epoch": 5.958659217877095, "grad_norm": 0.6463531851768494, "learning_rate": 0.0007042577030812325, "loss": 0.4167, "step": 10666 }, { "epoch": 5.959217877094972, "grad_norm": 0.45140498876571655, "learning_rate": 0.0007042296918767507, "loss": 0.3724, "step": 10667 }, { "epoch": 5.959776536312849, "grad_norm": 0.5019129514694214, "learning_rate": 0.000704201680672269, "loss": 0.4632, "step": 10668 }, { "epoch": 5.960335195530726, "grad_norm": 0.5050307512283325, "learning_rate": 0.0007041736694677872, "loss": 0.4926, "step": 10669 }, { "epoch": 5.960893854748603, "grad_norm": 0.47213369607925415, "learning_rate": 0.0007041456582633054, "loss": 0.4789, "step": 10670 }, { "epoch": 5.9614525139664805, "grad_norm": 0.7370476722717285, "learning_rate": 0.0007041176470588235, "loss": 0.679, "step": 10671 }, { "epoch": 5.962011173184358, "grad_norm": 0.5161084532737732, "learning_rate": 0.0007040896358543417, "loss": 0.5845, "step": 10672 }, { "epoch": 5.9625698324022345, "grad_norm": 0.43537983298301697, "learning_rate": 0.00070406162464986, "loss": 0.4057, "step": 10673 }, { "epoch": 5.963128491620112, "grad_norm": 0.5349956750869751, "learning_rate": 0.0007040336134453782, "loss": 0.4299, "step": 10674 }, { "epoch": 5.9636871508379885, "grad_norm": 0.4862646758556366, "learning_rate": 0.0007040056022408964, "loss": 0.4585, "step": 10675 }, { "epoch": 5.964245810055866, "grad_norm": 0.40399888157844543, "learning_rate": 0.0007039775910364145, "loss": 0.4013, "step": 10676 }, { "epoch": 5.9648044692737425, "grad_norm": 0.9501025676727295, "learning_rate": 0.0007039495798319327, "loss": 0.5028, "step": 10677 }, { "epoch": 5.96536312849162, "grad_norm": 0.5965065956115723, "learning_rate": 0.000703921568627451, "loss": 0.6575, "step": 10678 }, { "epoch": 5.965921787709497, "grad_norm": 0.542474627494812, "learning_rate": 0.0007038935574229693, "loss": 0.4879, "step": 10679 }, { "epoch": 5.966480446927374, "grad_norm": 0.7965006828308105, "learning_rate": 0.0007038655462184875, "loss": 0.5095, "step": 10680 }, { "epoch": 5.9670391061452515, "grad_norm": 0.49771595001220703, "learning_rate": 0.0007038375350140056, "loss": 0.4371, "step": 10681 }, { "epoch": 5.967597765363129, "grad_norm": 0.38680100440979004, "learning_rate": 0.0007038095238095238, "loss": 0.3783, "step": 10682 }, { "epoch": 5.9681564245810055, "grad_norm": 0.483853280544281, "learning_rate": 0.0007037815126050421, "loss": 0.4089, "step": 10683 }, { "epoch": 5.968715083798883, "grad_norm": 0.557817816734314, "learning_rate": 0.0007037535014005603, "loss": 0.5088, "step": 10684 }, { "epoch": 5.9692737430167595, "grad_norm": 0.42193880677223206, "learning_rate": 0.0007037254901960785, "loss": 0.3933, "step": 10685 }, { "epoch": 5.969832402234637, "grad_norm": 0.5682546496391296, "learning_rate": 0.0007036974789915967, "loss": 0.4603, "step": 10686 }, { "epoch": 5.9703910614525135, "grad_norm": 0.8125534057617188, "learning_rate": 0.0007036694677871148, "loss": 0.426, "step": 10687 }, { "epoch": 5.970949720670391, "grad_norm": 0.5613397359848022, "learning_rate": 0.0007036414565826331, "loss": 0.4178, "step": 10688 }, { "epoch": 5.971508379888268, "grad_norm": 0.7704717516899109, "learning_rate": 0.0007036134453781513, "loss": 0.3796, "step": 10689 }, { "epoch": 5.972067039106145, "grad_norm": 0.7329124212265015, "learning_rate": 0.0007035854341736695, "loss": 0.5127, "step": 10690 }, { "epoch": 5.9726256983240225, "grad_norm": 1.2618038654327393, "learning_rate": 0.0007035574229691877, "loss": 0.5323, "step": 10691 }, { "epoch": 5.9731843575419, "grad_norm": 0.48770755529403687, "learning_rate": 0.0007035294117647058, "loss": 0.3667, "step": 10692 }, { "epoch": 5.9737430167597765, "grad_norm": 0.8446019887924194, "learning_rate": 0.0007035014005602241, "loss": 0.5731, "step": 10693 }, { "epoch": 5.974301675977654, "grad_norm": 0.46063557267189026, "learning_rate": 0.0007034733893557423, "loss": 0.4143, "step": 10694 }, { "epoch": 5.9748603351955305, "grad_norm": 0.48112040758132935, "learning_rate": 0.0007034453781512605, "loss": 0.3198, "step": 10695 }, { "epoch": 5.975418994413408, "grad_norm": 0.5702455639839172, "learning_rate": 0.0007034173669467788, "loss": 0.3204, "step": 10696 }, { "epoch": 5.9759776536312845, "grad_norm": 0.4615860879421234, "learning_rate": 0.0007033893557422969, "loss": 0.433, "step": 10697 }, { "epoch": 5.976536312849162, "grad_norm": 0.44421282410621643, "learning_rate": 0.0007033613445378152, "loss": 0.438, "step": 10698 }, { "epoch": 5.977094972067039, "grad_norm": 0.6527910232543945, "learning_rate": 0.0007033333333333334, "loss": 0.4898, "step": 10699 }, { "epoch": 5.977653631284916, "grad_norm": 0.4210759401321411, "learning_rate": 0.0007033053221288516, "loss": 0.4204, "step": 10700 }, { "epoch": 5.9782122905027935, "grad_norm": 0.8129958510398865, "learning_rate": 0.0007032773109243698, "loss": 0.3917, "step": 10701 }, { "epoch": 5.97877094972067, "grad_norm": 0.6258031129837036, "learning_rate": 0.000703249299719888, "loss": 0.4117, "step": 10702 }, { "epoch": 5.9793296089385475, "grad_norm": 0.5234305262565613, "learning_rate": 0.0007032212885154062, "loss": 0.5158, "step": 10703 }, { "epoch": 5.979888268156425, "grad_norm": 1.7617279291152954, "learning_rate": 0.0007031932773109244, "loss": 0.4604, "step": 10704 }, { "epoch": 5.9804469273743015, "grad_norm": 0.35483261942863464, "learning_rate": 0.0007031652661064426, "loss": 0.3561, "step": 10705 }, { "epoch": 5.981005586592179, "grad_norm": 0.8235047459602356, "learning_rate": 0.0007031372549019608, "loss": 0.3557, "step": 10706 }, { "epoch": 5.9815642458100555, "grad_norm": 1.5603936910629272, "learning_rate": 0.000703109243697479, "loss": 0.3923, "step": 10707 }, { "epoch": 5.982122905027933, "grad_norm": 0.5309181213378906, "learning_rate": 0.0007030812324929971, "loss": 0.3921, "step": 10708 }, { "epoch": 5.98268156424581, "grad_norm": 0.5465649962425232, "learning_rate": 0.0007030532212885154, "loss": 0.4679, "step": 10709 }, { "epoch": 5.983240223463687, "grad_norm": 0.686987042427063, "learning_rate": 0.0007030252100840336, "loss": 0.4157, "step": 10710 }, { "epoch": 5.9837988826815645, "grad_norm": 0.49509668350219727, "learning_rate": 0.0007029971988795518, "loss": 0.4808, "step": 10711 }, { "epoch": 5.984357541899441, "grad_norm": 0.5534369349479675, "learning_rate": 0.00070296918767507, "loss": 0.419, "step": 10712 }, { "epoch": 5.9849162011173185, "grad_norm": 0.5116916298866272, "learning_rate": 0.0007029411764705881, "loss": 0.5181, "step": 10713 }, { "epoch": 5.985474860335195, "grad_norm": 0.4299747347831726, "learning_rate": 0.0007029131652661065, "loss": 0.4109, "step": 10714 }, { "epoch": 5.9860335195530725, "grad_norm": 0.5178081393241882, "learning_rate": 0.0007028851540616247, "loss": 0.5312, "step": 10715 }, { "epoch": 5.98659217877095, "grad_norm": 0.7596054673194885, "learning_rate": 0.0007028571428571429, "loss": 0.3994, "step": 10716 }, { "epoch": 5.9871508379888265, "grad_norm": 0.618139922618866, "learning_rate": 0.0007028291316526611, "loss": 0.553, "step": 10717 }, { "epoch": 5.987709497206704, "grad_norm": 0.9482153654098511, "learning_rate": 0.0007028011204481793, "loss": 0.4184, "step": 10718 }, { "epoch": 5.988268156424581, "grad_norm": 0.5059841871261597, "learning_rate": 0.0007027731092436975, "loss": 0.3966, "step": 10719 }, { "epoch": 5.988826815642458, "grad_norm": 0.5375123023986816, "learning_rate": 0.0007027450980392157, "loss": 0.4351, "step": 10720 }, { "epoch": 5.9893854748603355, "grad_norm": 1.5509437322616577, "learning_rate": 0.0007027170868347339, "loss": 0.577, "step": 10721 }, { "epoch": 5.989944134078212, "grad_norm": 0.3913935422897339, "learning_rate": 0.0007026890756302521, "loss": 0.4034, "step": 10722 }, { "epoch": 5.9905027932960895, "grad_norm": 0.5882735848426819, "learning_rate": 0.0007026610644257703, "loss": 0.553, "step": 10723 }, { "epoch": 5.991061452513966, "grad_norm": 0.49759504199028015, "learning_rate": 0.0007026330532212885, "loss": 0.4367, "step": 10724 }, { "epoch": 5.9916201117318435, "grad_norm": 0.5933146476745605, "learning_rate": 0.0007026050420168067, "loss": 0.501, "step": 10725 }, { "epoch": 5.992178770949721, "grad_norm": 0.8480409979820251, "learning_rate": 0.0007025770308123249, "loss": 0.5142, "step": 10726 }, { "epoch": 5.9927374301675975, "grad_norm": 0.5606933236122131, "learning_rate": 0.0007025490196078431, "loss": 0.4981, "step": 10727 }, { "epoch": 5.993296089385475, "grad_norm": 0.3889835476875305, "learning_rate": 0.0007025210084033613, "loss": 0.5629, "step": 10728 }, { "epoch": 5.993854748603352, "grad_norm": 0.4942566156387329, "learning_rate": 0.0007024929971988796, "loss": 0.3911, "step": 10729 }, { "epoch": 5.994413407821229, "grad_norm": 1.4093338251113892, "learning_rate": 0.0007024649859943978, "loss": 0.4624, "step": 10730 }, { "epoch": 5.9949720670391065, "grad_norm": 1.139153242111206, "learning_rate": 0.000702436974789916, "loss": 0.4055, "step": 10731 }, { "epoch": 5.995530726256983, "grad_norm": 0.45949316024780273, "learning_rate": 0.0007024089635854342, "loss": 0.435, "step": 10732 }, { "epoch": 5.9960893854748605, "grad_norm": 0.3905138373374939, "learning_rate": 0.0007023809523809524, "loss": 0.4219, "step": 10733 }, { "epoch": 5.996648044692737, "grad_norm": 1.0105937719345093, "learning_rate": 0.0007023529411764707, "loss": 0.5904, "step": 10734 }, { "epoch": 5.9972067039106145, "grad_norm": 0.5596426725387573, "learning_rate": 0.0007023249299719888, "loss": 0.3982, "step": 10735 }, { "epoch": 5.997765363128492, "grad_norm": 0.38583436608314514, "learning_rate": 0.000702296918767507, "loss": 0.4682, "step": 10736 }, { "epoch": 5.9983240223463685, "grad_norm": 0.5878229737281799, "learning_rate": 0.0007022689075630252, "loss": 0.5023, "step": 10737 }, { "epoch": 5.998882681564246, "grad_norm": 0.7048560976982117, "learning_rate": 0.0007022408963585434, "loss": 0.3977, "step": 10738 }, { "epoch": 5.9994413407821225, "grad_norm": 0.632676362991333, "learning_rate": 0.0007022128851540617, "loss": 0.38, "step": 10739 }, { "epoch": 6.0, "grad_norm": 1.2492765188217163, "learning_rate": 0.0007021848739495798, "loss": 0.4723, "step": 10740 }, { "epoch": 6.0005586592178775, "grad_norm": 0.6041464805603027, "learning_rate": 0.000702156862745098, "loss": 0.4666, "step": 10741 }, { "epoch": 6.001117318435754, "grad_norm": 0.7570438981056213, "learning_rate": 0.0007021288515406162, "loss": 0.4116, "step": 10742 }, { "epoch": 6.0016759776536315, "grad_norm": 0.5255514979362488, "learning_rate": 0.0007021008403361344, "loss": 0.3727, "step": 10743 }, { "epoch": 6.002234636871508, "grad_norm": 0.6134583353996277, "learning_rate": 0.0007020728291316527, "loss": 0.4397, "step": 10744 }, { "epoch": 6.0027932960893855, "grad_norm": 2.010389804840088, "learning_rate": 0.0007020448179271708, "loss": 0.4007, "step": 10745 }, { "epoch": 6.003351955307263, "grad_norm": 0.5612832307815552, "learning_rate": 0.000702016806722689, "loss": 0.4107, "step": 10746 }, { "epoch": 6.0039106145251395, "grad_norm": 0.46385201811790466, "learning_rate": 0.0007019887955182073, "loss": 0.4381, "step": 10747 }, { "epoch": 6.004469273743017, "grad_norm": 0.5772256255149841, "learning_rate": 0.0007019607843137255, "loss": 0.4458, "step": 10748 }, { "epoch": 6.0050279329608935, "grad_norm": 0.3665994107723236, "learning_rate": 0.0007019327731092438, "loss": 0.3647, "step": 10749 }, { "epoch": 6.005586592178771, "grad_norm": 0.7996562719345093, "learning_rate": 0.000701904761904762, "loss": 0.4073, "step": 10750 }, { "epoch": 6.0061452513966485, "grad_norm": 0.46867483854293823, "learning_rate": 0.0007018767507002801, "loss": 0.3554, "step": 10751 }, { "epoch": 6.006703910614525, "grad_norm": 0.4355800747871399, "learning_rate": 0.0007018487394957983, "loss": 0.481, "step": 10752 }, { "epoch": 6.0072625698324025, "grad_norm": 0.5530795454978943, "learning_rate": 0.0007018207282913165, "loss": 0.4939, "step": 10753 }, { "epoch": 6.007821229050279, "grad_norm": 0.4148414134979248, "learning_rate": 0.0007017927170868348, "loss": 0.4056, "step": 10754 }, { "epoch": 6.0083798882681565, "grad_norm": 0.5292692184448242, "learning_rate": 0.000701764705882353, "loss": 0.4066, "step": 10755 }, { "epoch": 6.008938547486034, "grad_norm": 0.6804555058479309, "learning_rate": 0.0007017366946778711, "loss": 0.4545, "step": 10756 }, { "epoch": 6.0094972067039105, "grad_norm": 0.6187072396278381, "learning_rate": 0.0007017086834733893, "loss": 0.5057, "step": 10757 }, { "epoch": 6.010055865921788, "grad_norm": 0.45158496499061584, "learning_rate": 0.0007016806722689075, "loss": 0.5135, "step": 10758 }, { "epoch": 6.0106145251396645, "grad_norm": 0.5854185223579407, "learning_rate": 0.0007016526610644258, "loss": 0.3839, "step": 10759 }, { "epoch": 6.011173184357542, "grad_norm": 0.4598930776119232, "learning_rate": 0.000701624649859944, "loss": 0.4112, "step": 10760 }, { "epoch": 6.011731843575419, "grad_norm": 0.3839957118034363, "learning_rate": 0.0007015966386554621, "loss": 0.3706, "step": 10761 }, { "epoch": 6.012290502793296, "grad_norm": 0.4976672828197479, "learning_rate": 0.0007015686274509803, "loss": 0.5029, "step": 10762 }, { "epoch": 6.0128491620111735, "grad_norm": 0.4182429015636444, "learning_rate": 0.0007015406162464986, "loss": 0.3773, "step": 10763 }, { "epoch": 6.01340782122905, "grad_norm": 0.9692990183830261, "learning_rate": 0.0007015126050420169, "loss": 0.4158, "step": 10764 }, { "epoch": 6.0139664804469275, "grad_norm": 0.4656253159046173, "learning_rate": 0.0007014845938375351, "loss": 0.4477, "step": 10765 }, { "epoch": 6.014525139664804, "grad_norm": 0.6307067275047302, "learning_rate": 0.0007014565826330533, "loss": 0.4127, "step": 10766 }, { "epoch": 6.0150837988826815, "grad_norm": 1.356160044670105, "learning_rate": 0.0007014285714285714, "loss": 0.5075, "step": 10767 }, { "epoch": 6.015642458100559, "grad_norm": 0.4145680367946625, "learning_rate": 0.0007014005602240896, "loss": 0.362, "step": 10768 }, { "epoch": 6.0162011173184355, "grad_norm": 0.747823178768158, "learning_rate": 0.0007013725490196079, "loss": 0.3907, "step": 10769 }, { "epoch": 6.016759776536313, "grad_norm": 0.43720245361328125, "learning_rate": 0.0007013445378151261, "loss": 0.3736, "step": 10770 }, { "epoch": 6.01731843575419, "grad_norm": 1.323954701423645, "learning_rate": 0.0007013165266106443, "loss": 0.4856, "step": 10771 }, { "epoch": 6.017877094972067, "grad_norm": 0.528076708316803, "learning_rate": 0.0007012885154061624, "loss": 0.5145, "step": 10772 }, { "epoch": 6.0184357541899445, "grad_norm": 0.5131105780601501, "learning_rate": 0.0007012605042016806, "loss": 0.4855, "step": 10773 }, { "epoch": 6.018994413407821, "grad_norm": 0.4567725658416748, "learning_rate": 0.0007012324929971989, "loss": 0.4204, "step": 10774 }, { "epoch": 6.0195530726256985, "grad_norm": 0.9652224183082581, "learning_rate": 0.0007012044817927171, "loss": 0.4426, "step": 10775 }, { "epoch": 6.020111731843575, "grad_norm": 0.5839270949363708, "learning_rate": 0.0007011764705882353, "loss": 0.3688, "step": 10776 }, { "epoch": 6.0206703910614525, "grad_norm": 0.8496503233909607, "learning_rate": 0.0007011484593837534, "loss": 0.6914, "step": 10777 }, { "epoch": 6.02122905027933, "grad_norm": 0.548754096031189, "learning_rate": 0.0007011204481792716, "loss": 0.5181, "step": 10778 }, { "epoch": 6.0217877094972065, "grad_norm": 0.6896282434463501, "learning_rate": 0.00070109243697479, "loss": 0.5319, "step": 10779 }, { "epoch": 6.022346368715084, "grad_norm": 0.540732741355896, "learning_rate": 0.0007010644257703082, "loss": 0.4831, "step": 10780 }, { "epoch": 6.022905027932961, "grad_norm": 0.5916521549224854, "learning_rate": 0.0007010364145658264, "loss": 0.433, "step": 10781 }, { "epoch": 6.023463687150838, "grad_norm": 0.8262721300125122, "learning_rate": 0.0007010084033613446, "loss": 0.4857, "step": 10782 }, { "epoch": 6.0240223463687155, "grad_norm": 0.6633471250534058, "learning_rate": 0.0007009803921568627, "loss": 0.4667, "step": 10783 }, { "epoch": 6.024581005586592, "grad_norm": 0.9709869027137756, "learning_rate": 0.000700952380952381, "loss": 0.4724, "step": 10784 }, { "epoch": 6.0251396648044695, "grad_norm": 3.0102927684783936, "learning_rate": 0.0007009243697478992, "loss": 0.6534, "step": 10785 }, { "epoch": 6.025698324022346, "grad_norm": 7.023932456970215, "learning_rate": 0.0007008963585434174, "loss": 0.3773, "step": 10786 }, { "epoch": 6.0262569832402235, "grad_norm": 0.6988338232040405, "learning_rate": 0.0007008683473389356, "loss": 0.3827, "step": 10787 }, { "epoch": 6.026815642458101, "grad_norm": 0.8771420121192932, "learning_rate": 0.0007008403361344537, "loss": 0.5022, "step": 10788 }, { "epoch": 6.0273743016759775, "grad_norm": 0.6200162768363953, "learning_rate": 0.000700812324929972, "loss": 0.4222, "step": 10789 }, { "epoch": 6.027932960893855, "grad_norm": 1.523466944694519, "learning_rate": 0.0007007843137254902, "loss": 0.4948, "step": 10790 }, { "epoch": 6.028491620111732, "grad_norm": 0.6968160271644592, "learning_rate": 0.0007007563025210084, "loss": 0.433, "step": 10791 }, { "epoch": 6.029050279329609, "grad_norm": 2.646639585494995, "learning_rate": 0.0007007282913165266, "loss": 0.467, "step": 10792 }, { "epoch": 6.0296089385474865, "grad_norm": 0.5532181262969971, "learning_rate": 0.0007007002801120447, "loss": 0.4283, "step": 10793 }, { "epoch": 6.030167597765363, "grad_norm": 0.5752729177474976, "learning_rate": 0.000700672268907563, "loss": 0.3218, "step": 10794 }, { "epoch": 6.0307262569832405, "grad_norm": 0.7599282264709473, "learning_rate": 0.0007006442577030813, "loss": 0.5184, "step": 10795 }, { "epoch": 6.031284916201117, "grad_norm": 1.3925663232803345, "learning_rate": 0.0007006162464985995, "loss": 0.4733, "step": 10796 }, { "epoch": 6.0318435754189945, "grad_norm": 0.5994170904159546, "learning_rate": 0.0007005882352941177, "loss": 0.4751, "step": 10797 }, { "epoch": 6.032402234636871, "grad_norm": 0.5804548263549805, "learning_rate": 0.0007005602240896359, "loss": 0.4027, "step": 10798 }, { "epoch": 6.0329608938547485, "grad_norm": 0.6561393737792969, "learning_rate": 0.0007005322128851541, "loss": 0.4743, "step": 10799 }, { "epoch": 6.033519553072626, "grad_norm": 0.5753200650215149, "learning_rate": 0.0007005042016806723, "loss": 0.5643, "step": 10800 }, { "epoch": 6.034078212290503, "grad_norm": 0.6040725111961365, "learning_rate": 0.0007004761904761905, "loss": 0.4895, "step": 10801 }, { "epoch": 6.03463687150838, "grad_norm": 0.7829612493515015, "learning_rate": 0.0007004481792717087, "loss": 0.3975, "step": 10802 }, { "epoch": 6.035195530726257, "grad_norm": 0.8545989990234375, "learning_rate": 0.0007004201680672269, "loss": 0.4989, "step": 10803 }, { "epoch": 6.035754189944134, "grad_norm": 0.4869842529296875, "learning_rate": 0.0007003921568627451, "loss": 0.4032, "step": 10804 }, { "epoch": 6.0363128491620115, "grad_norm": 0.8219355344772339, "learning_rate": 0.0007003641456582633, "loss": 0.5121, "step": 10805 }, { "epoch": 6.036871508379888, "grad_norm": 1.1885137557983398, "learning_rate": 0.0007003361344537815, "loss": 0.6034, "step": 10806 }, { "epoch": 6.0374301675977655, "grad_norm": 1.8642171621322632, "learning_rate": 0.0007003081232492997, "loss": 0.416, "step": 10807 }, { "epoch": 6.037988826815642, "grad_norm": 0.5085421204566956, "learning_rate": 0.0007002801120448179, "loss": 0.4397, "step": 10808 }, { "epoch": 6.0385474860335195, "grad_norm": 0.5510767102241516, "learning_rate": 0.0007002521008403361, "loss": 0.4573, "step": 10809 }, { "epoch": 6.039106145251397, "grad_norm": 0.45946282148361206, "learning_rate": 0.0007002240896358543, "loss": 0.3685, "step": 10810 }, { "epoch": 6.039664804469274, "grad_norm": 0.6027918457984924, "learning_rate": 0.0007001960784313726, "loss": 0.3502, "step": 10811 }, { "epoch": 6.040223463687151, "grad_norm": 0.5526122450828552, "learning_rate": 0.0007001680672268908, "loss": 0.4661, "step": 10812 }, { "epoch": 6.040782122905028, "grad_norm": 0.5164427757263184, "learning_rate": 0.000700140056022409, "loss": 0.5412, "step": 10813 }, { "epoch": 6.041340782122905, "grad_norm": 0.46552905440330505, "learning_rate": 0.0007001120448179273, "loss": 0.5202, "step": 10814 }, { "epoch": 6.0418994413407825, "grad_norm": 0.4000746011734009, "learning_rate": 0.0007000840336134454, "loss": 0.4629, "step": 10815 }, { "epoch": 6.042458100558659, "grad_norm": 0.42634910345077515, "learning_rate": 0.0007000560224089636, "loss": 0.3893, "step": 10816 }, { "epoch": 6.0430167597765365, "grad_norm": 0.5009442567825317, "learning_rate": 0.0007000280112044818, "loss": 0.4373, "step": 10817 }, { "epoch": 6.043575418994413, "grad_norm": 0.660260021686554, "learning_rate": 0.0007, "loss": 0.3632, "step": 10818 }, { "epoch": 6.0441340782122905, "grad_norm": 1.0034247636795044, "learning_rate": 0.0006999719887955183, "loss": 0.4488, "step": 10819 }, { "epoch": 6.044692737430168, "grad_norm": 0.7092230916023254, "learning_rate": 0.0006999439775910364, "loss": 0.4103, "step": 10820 }, { "epoch": 6.045251396648045, "grad_norm": 0.6398437023162842, "learning_rate": 0.0006999159663865546, "loss": 0.5053, "step": 10821 }, { "epoch": 6.045810055865922, "grad_norm": 0.6886416077613831, "learning_rate": 0.0006998879551820728, "loss": 0.5198, "step": 10822 }, { "epoch": 6.046368715083799, "grad_norm": 0.4392039179801941, "learning_rate": 0.000699859943977591, "loss": 0.3337, "step": 10823 }, { "epoch": 6.046927374301676, "grad_norm": 0.6431219577789307, "learning_rate": 0.0006998319327731093, "loss": 0.459, "step": 10824 }, { "epoch": 6.0474860335195535, "grad_norm": 0.8359147310256958, "learning_rate": 0.0006998039215686274, "loss": 0.4441, "step": 10825 }, { "epoch": 6.04804469273743, "grad_norm": 1.4082962274551392, "learning_rate": 0.0006997759103641456, "loss": 0.3914, "step": 10826 }, { "epoch": 6.0486033519553075, "grad_norm": 0.7931913137435913, "learning_rate": 0.0006997478991596638, "loss": 0.389, "step": 10827 }, { "epoch": 6.049162011173184, "grad_norm": 1.2825291156768799, "learning_rate": 0.000699719887955182, "loss": 0.4261, "step": 10828 }, { "epoch": 6.0497206703910615, "grad_norm": 0.7035611867904663, "learning_rate": 0.0006996918767507004, "loss": 0.616, "step": 10829 }, { "epoch": 6.050279329608939, "grad_norm": 2.3387644290924072, "learning_rate": 0.0006996638655462186, "loss": 0.4332, "step": 10830 }, { "epoch": 6.050837988826816, "grad_norm": 0.46920862793922424, "learning_rate": 0.0006996358543417367, "loss": 0.3945, "step": 10831 }, { "epoch": 6.051396648044693, "grad_norm": 0.7833524942398071, "learning_rate": 0.0006996078431372549, "loss": 0.5773, "step": 10832 }, { "epoch": 6.05195530726257, "grad_norm": 0.6396458745002747, "learning_rate": 0.0006995798319327731, "loss": 0.4959, "step": 10833 }, { "epoch": 6.052513966480447, "grad_norm": 0.836787760257721, "learning_rate": 0.0006995518207282914, "loss": 0.3687, "step": 10834 }, { "epoch": 6.053072625698324, "grad_norm": 0.6487652659416199, "learning_rate": 0.0006995238095238096, "loss": 0.6013, "step": 10835 }, { "epoch": 6.053631284916201, "grad_norm": 0.7880006432533264, "learning_rate": 0.0006994957983193277, "loss": 0.455, "step": 10836 }, { "epoch": 6.0541899441340785, "grad_norm": 0.7589018940925598, "learning_rate": 0.0006994677871148459, "loss": 0.5345, "step": 10837 }, { "epoch": 6.054748603351955, "grad_norm": 0.7541858553886414, "learning_rate": 0.0006994397759103641, "loss": 0.3533, "step": 10838 }, { "epoch": 6.0553072625698325, "grad_norm": 0.49204450845718384, "learning_rate": 0.0006994117647058824, "loss": 0.4926, "step": 10839 }, { "epoch": 6.055865921787709, "grad_norm": 0.4427184760570526, "learning_rate": 0.0006993837535014006, "loss": 0.3897, "step": 10840 }, { "epoch": 6.056424581005587, "grad_norm": 0.47213247418403625, "learning_rate": 0.0006993557422969187, "loss": 0.339, "step": 10841 }, { "epoch": 6.056983240223464, "grad_norm": 0.5522042512893677, "learning_rate": 0.0006993277310924369, "loss": 0.4789, "step": 10842 }, { "epoch": 6.057541899441341, "grad_norm": 0.7679473757743835, "learning_rate": 0.0006992997198879551, "loss": 0.5913, "step": 10843 }, { "epoch": 6.058100558659218, "grad_norm": 1.0393184423446655, "learning_rate": 0.0006992717086834735, "loss": 0.5372, "step": 10844 }, { "epoch": 6.058659217877095, "grad_norm": 0.8953571319580078, "learning_rate": 0.0006992436974789917, "loss": 0.5416, "step": 10845 }, { "epoch": 6.059217877094972, "grad_norm": 1.2806627750396729, "learning_rate": 0.0006992156862745099, "loss": 0.4492, "step": 10846 }, { "epoch": 6.0597765363128495, "grad_norm": 2.110543727874756, "learning_rate": 0.000699187675070028, "loss": 0.3661, "step": 10847 }, { "epoch": 6.060335195530726, "grad_norm": 1.072935700416565, "learning_rate": 0.0006991596638655462, "loss": 0.4846, "step": 10848 }, { "epoch": 6.0608938547486035, "grad_norm": 0.4998075067996979, "learning_rate": 0.0006991316526610645, "loss": 0.3933, "step": 10849 }, { "epoch": 6.06145251396648, "grad_norm": 0.5481994152069092, "learning_rate": 0.0006991036414565827, "loss": 0.5352, "step": 10850 }, { "epoch": 6.062011173184358, "grad_norm": 0.5999717712402344, "learning_rate": 0.0006990756302521009, "loss": 0.5079, "step": 10851 }, { "epoch": 6.062569832402235, "grad_norm": 0.44448959827423096, "learning_rate": 0.000699047619047619, "loss": 0.5184, "step": 10852 }, { "epoch": 6.063128491620112, "grad_norm": 0.7252323031425476, "learning_rate": 0.0006990196078431372, "loss": 0.3998, "step": 10853 }, { "epoch": 6.063687150837989, "grad_norm": 0.6027101278305054, "learning_rate": 0.0006989915966386555, "loss": 0.4991, "step": 10854 }, { "epoch": 6.064245810055866, "grad_norm": 0.9127732515335083, "learning_rate": 0.0006989635854341737, "loss": 0.4681, "step": 10855 }, { "epoch": 6.064804469273743, "grad_norm": 0.40761685371398926, "learning_rate": 0.0006989355742296919, "loss": 0.4431, "step": 10856 }, { "epoch": 6.0653631284916205, "grad_norm": 0.41250237822532654, "learning_rate": 0.00069890756302521, "loss": 0.4757, "step": 10857 }, { "epoch": 6.065921787709497, "grad_norm": 0.573297381401062, "learning_rate": 0.0006988795518207282, "loss": 0.5019, "step": 10858 }, { "epoch": 6.0664804469273745, "grad_norm": 0.6602842211723328, "learning_rate": 0.0006988515406162465, "loss": 0.4551, "step": 10859 }, { "epoch": 6.067039106145251, "grad_norm": 0.6826991438865662, "learning_rate": 0.0006988235294117648, "loss": 0.427, "step": 10860 }, { "epoch": 6.067597765363129, "grad_norm": 0.482316255569458, "learning_rate": 0.000698795518207283, "loss": 0.3526, "step": 10861 }, { "epoch": 6.068156424581006, "grad_norm": 0.9022002220153809, "learning_rate": 0.0006987675070028012, "loss": 0.45, "step": 10862 }, { "epoch": 6.068715083798883, "grad_norm": 0.5509048700332642, "learning_rate": 0.0006987394957983193, "loss": 0.4807, "step": 10863 }, { "epoch": 6.06927374301676, "grad_norm": 2.927882671356201, "learning_rate": 0.0006987114845938376, "loss": 0.443, "step": 10864 }, { "epoch": 6.069832402234637, "grad_norm": 0.712352991104126, "learning_rate": 0.0006986834733893558, "loss": 0.5154, "step": 10865 }, { "epoch": 6.070391061452514, "grad_norm": 0.7284734845161438, "learning_rate": 0.000698655462184874, "loss": 0.3712, "step": 10866 }, { "epoch": 6.070949720670391, "grad_norm": 0.4085279405117035, "learning_rate": 0.0006986274509803922, "loss": 0.4312, "step": 10867 }, { "epoch": 6.071508379888268, "grad_norm": 0.6925187706947327, "learning_rate": 0.0006985994397759103, "loss": 0.4419, "step": 10868 }, { "epoch": 6.0720670391061455, "grad_norm": 2.800260066986084, "learning_rate": 0.0006985714285714286, "loss": 0.3975, "step": 10869 }, { "epoch": 6.072625698324022, "grad_norm": 0.6637143492698669, "learning_rate": 0.0006985434173669468, "loss": 0.5204, "step": 10870 }, { "epoch": 6.0731843575419, "grad_norm": 0.5931059718132019, "learning_rate": 0.000698515406162465, "loss": 0.4851, "step": 10871 }, { "epoch": 6.073743016759776, "grad_norm": 2.2102060317993164, "learning_rate": 0.0006984873949579832, "loss": 0.5179, "step": 10872 }, { "epoch": 6.074301675977654, "grad_norm": 0.5598397254943848, "learning_rate": 0.0006984593837535013, "loss": 0.4871, "step": 10873 }, { "epoch": 6.074860335195531, "grad_norm": 0.483901709318161, "learning_rate": 0.0006984313725490196, "loss": 0.3633, "step": 10874 }, { "epoch": 6.075418994413408, "grad_norm": 1.239761233329773, "learning_rate": 0.0006984033613445378, "loss": 0.4288, "step": 10875 }, { "epoch": 6.075977653631285, "grad_norm": 0.5510937571525574, "learning_rate": 0.000698375350140056, "loss": 0.4639, "step": 10876 }, { "epoch": 6.076536312849162, "grad_norm": 0.5686545372009277, "learning_rate": 0.0006983473389355743, "loss": 0.4021, "step": 10877 }, { "epoch": 6.077094972067039, "grad_norm": 2.322188377380371, "learning_rate": 0.0006983193277310925, "loss": 0.3936, "step": 10878 }, { "epoch": 6.0776536312849165, "grad_norm": 0.7091884016990662, "learning_rate": 0.0006982913165266107, "loss": 0.371, "step": 10879 }, { "epoch": 6.078212290502793, "grad_norm": 0.6452813148498535, "learning_rate": 0.0006982633053221289, "loss": 0.4153, "step": 10880 }, { "epoch": 6.078770949720671, "grad_norm": 0.4575195908546448, "learning_rate": 0.0006982352941176471, "loss": 0.4103, "step": 10881 }, { "epoch": 6.079329608938547, "grad_norm": 0.5927631258964539, "learning_rate": 0.0006982072829131653, "loss": 0.6678, "step": 10882 }, { "epoch": 6.079888268156425, "grad_norm": 0.521142840385437, "learning_rate": 0.0006981792717086835, "loss": 0.454, "step": 10883 }, { "epoch": 6.080446927374302, "grad_norm": 0.6282399892807007, "learning_rate": 0.0006981512605042017, "loss": 0.471, "step": 10884 }, { "epoch": 6.081005586592179, "grad_norm": 0.6412461996078491, "learning_rate": 0.0006981232492997199, "loss": 0.4296, "step": 10885 }, { "epoch": 6.081564245810056, "grad_norm": 0.41716331243515015, "learning_rate": 0.0006980952380952381, "loss": 0.3336, "step": 10886 }, { "epoch": 6.082122905027933, "grad_norm": 0.43193814158439636, "learning_rate": 0.0006980672268907563, "loss": 0.4132, "step": 10887 }, { "epoch": 6.08268156424581, "grad_norm": 0.36316362023353577, "learning_rate": 0.0006980392156862745, "loss": 0.3262, "step": 10888 }, { "epoch": 6.0832402234636875, "grad_norm": 0.5462936162948608, "learning_rate": 0.0006980112044817928, "loss": 0.523, "step": 10889 }, { "epoch": 6.083798882681564, "grad_norm": 0.5788365602493286, "learning_rate": 0.0006979831932773109, "loss": 0.572, "step": 10890 }, { "epoch": 6.084357541899442, "grad_norm": 0.4404538869857788, "learning_rate": 0.0006979551820728291, "loss": 0.3874, "step": 10891 }, { "epoch": 6.084916201117318, "grad_norm": 0.670192539691925, "learning_rate": 0.0006979271708683473, "loss": 0.4835, "step": 10892 }, { "epoch": 6.085474860335196, "grad_norm": 0.44022247195243835, "learning_rate": 0.0006978991596638656, "loss": 0.4005, "step": 10893 }, { "epoch": 6.086033519553073, "grad_norm": 0.45533043146133423, "learning_rate": 0.0006978711484593839, "loss": 0.3686, "step": 10894 }, { "epoch": 6.08659217877095, "grad_norm": 1.3965787887573242, "learning_rate": 0.000697843137254902, "loss": 0.4165, "step": 10895 }, { "epoch": 6.087150837988827, "grad_norm": 0.8195257782936096, "learning_rate": 0.0006978151260504202, "loss": 0.4008, "step": 10896 }, { "epoch": 6.087709497206704, "grad_norm": 0.8205869197845459, "learning_rate": 0.0006977871148459384, "loss": 0.3417, "step": 10897 }, { "epoch": 6.088268156424581, "grad_norm": 1.1164945363998413, "learning_rate": 0.0006977591036414566, "loss": 0.3749, "step": 10898 }, { "epoch": 6.0888268156424585, "grad_norm": 0.40060552954673767, "learning_rate": 0.0006977310924369749, "loss": 0.4623, "step": 10899 }, { "epoch": 6.089385474860335, "grad_norm": 0.5135126113891602, "learning_rate": 0.000697703081232493, "loss": 0.4877, "step": 10900 }, { "epoch": 6.089944134078213, "grad_norm": 1.592182993888855, "learning_rate": 0.0006976750700280112, "loss": 0.5295, "step": 10901 }, { "epoch": 6.090502793296089, "grad_norm": 0.84066241979599, "learning_rate": 0.0006976470588235294, "loss": 0.4461, "step": 10902 }, { "epoch": 6.091061452513967, "grad_norm": 0.6580346822738647, "learning_rate": 0.0006976190476190476, "loss": 0.4283, "step": 10903 }, { "epoch": 6.091620111731843, "grad_norm": 0.591770350933075, "learning_rate": 0.0006975910364145659, "loss": 0.4478, "step": 10904 }, { "epoch": 6.092178770949721, "grad_norm": 2.3529205322265625, "learning_rate": 0.0006975630252100841, "loss": 0.4048, "step": 10905 }, { "epoch": 6.092737430167598, "grad_norm": 0.8928508162498474, "learning_rate": 0.0006975350140056022, "loss": 0.4431, "step": 10906 }, { "epoch": 6.093296089385475, "grad_norm": 0.5485340356826782, "learning_rate": 0.0006975070028011204, "loss": 0.3834, "step": 10907 }, { "epoch": 6.093854748603352, "grad_norm": 0.40008461475372314, "learning_rate": 0.0006974789915966386, "loss": 0.3491, "step": 10908 }, { "epoch": 6.094413407821229, "grad_norm": 0.44092485308647156, "learning_rate": 0.000697450980392157, "loss": 0.4522, "step": 10909 }, { "epoch": 6.094972067039106, "grad_norm": 0.41834747791290283, "learning_rate": 0.0006974229691876752, "loss": 0.4308, "step": 10910 }, { "epoch": 6.0955307262569836, "grad_norm": 1.0991649627685547, "learning_rate": 0.0006973949579831933, "loss": 0.5114, "step": 10911 }, { "epoch": 6.09608938547486, "grad_norm": 0.9742670655250549, "learning_rate": 0.0006973669467787115, "loss": 0.5979, "step": 10912 }, { "epoch": 6.096648044692738, "grad_norm": 2.3714523315429688, "learning_rate": 0.0006973389355742297, "loss": 0.4323, "step": 10913 }, { "epoch": 6.097206703910614, "grad_norm": 0.5217916369438171, "learning_rate": 0.000697310924369748, "loss": 0.5236, "step": 10914 }, { "epoch": 6.097765363128492, "grad_norm": 0.6856021285057068, "learning_rate": 0.0006972829131652662, "loss": 0.4675, "step": 10915 }, { "epoch": 6.098324022346369, "grad_norm": 0.7215303182601929, "learning_rate": 0.0006972549019607843, "loss": 0.4032, "step": 10916 }, { "epoch": 6.098882681564246, "grad_norm": 0.5496611595153809, "learning_rate": 0.0006972268907563025, "loss": 0.4939, "step": 10917 }, { "epoch": 6.099441340782123, "grad_norm": 0.6390330195426941, "learning_rate": 0.0006971988795518207, "loss": 0.5086, "step": 10918 }, { "epoch": 6.1, "grad_norm": 0.4926900565624237, "learning_rate": 0.000697170868347339, "loss": 0.3372, "step": 10919 }, { "epoch": 6.100558659217877, "grad_norm": 0.547737181186676, "learning_rate": 0.0006971428571428572, "loss": 0.4523, "step": 10920 }, { "epoch": 6.1011173184357546, "grad_norm": 0.5713638663291931, "learning_rate": 0.0006971148459383754, "loss": 0.5189, "step": 10921 }, { "epoch": 6.101675977653631, "grad_norm": 0.6128251552581787, "learning_rate": 0.0006970868347338935, "loss": 0.2731, "step": 10922 }, { "epoch": 6.102234636871509, "grad_norm": 0.7094868421554565, "learning_rate": 0.0006970588235294117, "loss": 0.4075, "step": 10923 }, { "epoch": 6.102793296089385, "grad_norm": 0.482438862323761, "learning_rate": 0.00069703081232493, "loss": 0.4253, "step": 10924 }, { "epoch": 6.103351955307263, "grad_norm": 0.7579505443572998, "learning_rate": 0.0006970028011204483, "loss": 0.5492, "step": 10925 }, { "epoch": 6.10391061452514, "grad_norm": 1.9759689569473267, "learning_rate": 0.0006969747899159665, "loss": 0.5222, "step": 10926 }, { "epoch": 6.104469273743017, "grad_norm": 0.4022391438484192, "learning_rate": 0.0006969467787114846, "loss": 0.3137, "step": 10927 }, { "epoch": 6.105027932960894, "grad_norm": 0.8322412967681885, "learning_rate": 0.0006969187675070028, "loss": 0.53, "step": 10928 }, { "epoch": 6.105586592178771, "grad_norm": 0.501099169254303, "learning_rate": 0.0006968907563025211, "loss": 0.3716, "step": 10929 }, { "epoch": 6.106145251396648, "grad_norm": 0.5984638929367065, "learning_rate": 0.0006968627450980393, "loss": 0.4958, "step": 10930 }, { "epoch": 6.1067039106145256, "grad_norm": 0.578277587890625, "learning_rate": 0.0006968347338935575, "loss": 0.4391, "step": 10931 }, { "epoch": 6.107262569832402, "grad_norm": 1.397057056427002, "learning_rate": 0.0006968067226890756, "loss": 0.49, "step": 10932 }, { "epoch": 6.10782122905028, "grad_norm": 0.376043438911438, "learning_rate": 0.0006967787114845938, "loss": 0.4112, "step": 10933 }, { "epoch": 6.108379888268156, "grad_norm": 0.4655259847640991, "learning_rate": 0.000696750700280112, "loss": 0.4504, "step": 10934 }, { "epoch": 6.108938547486034, "grad_norm": 0.7578765749931335, "learning_rate": 0.0006967226890756303, "loss": 0.5295, "step": 10935 }, { "epoch": 6.10949720670391, "grad_norm": 0.43772193789482117, "learning_rate": 0.0006966946778711485, "loss": 0.437, "step": 10936 }, { "epoch": 6.110055865921788, "grad_norm": 0.9742315411567688, "learning_rate": 0.0006966666666666667, "loss": 0.3953, "step": 10937 }, { "epoch": 6.110614525139665, "grad_norm": 0.4262291193008423, "learning_rate": 0.0006966386554621848, "loss": 0.4598, "step": 10938 }, { "epoch": 6.111173184357542, "grad_norm": 0.4859914481639862, "learning_rate": 0.000696610644257703, "loss": 0.3415, "step": 10939 }, { "epoch": 6.111731843575419, "grad_norm": 0.7761494517326355, "learning_rate": 0.0006965826330532213, "loss": 0.4251, "step": 10940 }, { "epoch": 6.112290502793296, "grad_norm": 0.5885061621665955, "learning_rate": 0.0006965546218487395, "loss": 0.6029, "step": 10941 }, { "epoch": 6.112849162011173, "grad_norm": 0.5668952465057373, "learning_rate": 0.0006965266106442578, "loss": 0.5304, "step": 10942 }, { "epoch": 6.113407821229051, "grad_norm": 0.6129389405250549, "learning_rate": 0.0006964985994397759, "loss": 0.4598, "step": 10943 }, { "epoch": 6.113966480446927, "grad_norm": 0.35725122690200806, "learning_rate": 0.0006964705882352941, "loss": 0.3318, "step": 10944 }, { "epoch": 6.114525139664805, "grad_norm": 1.4380866289138794, "learning_rate": 0.0006964425770308124, "loss": 0.4372, "step": 10945 }, { "epoch": 6.115083798882681, "grad_norm": 0.6878176331520081, "learning_rate": 0.0006964145658263306, "loss": 0.5267, "step": 10946 }, { "epoch": 6.115642458100559, "grad_norm": 0.7436250448226929, "learning_rate": 0.0006963865546218488, "loss": 0.4181, "step": 10947 }, { "epoch": 6.116201117318436, "grad_norm": 0.8786506056785583, "learning_rate": 0.0006963585434173669, "loss": 0.5409, "step": 10948 }, { "epoch": 6.116759776536313, "grad_norm": 0.4254874587059021, "learning_rate": 0.0006963305322128851, "loss": 0.4699, "step": 10949 }, { "epoch": 6.11731843575419, "grad_norm": 0.7329756021499634, "learning_rate": 0.0006963025210084034, "loss": 0.5014, "step": 10950 }, { "epoch": 6.117877094972067, "grad_norm": 0.7794732451438904, "learning_rate": 0.0006962745098039216, "loss": 0.3411, "step": 10951 }, { "epoch": 6.118435754189944, "grad_norm": 0.6167165637016296, "learning_rate": 0.0006962464985994398, "loss": 0.5812, "step": 10952 }, { "epoch": 6.118994413407822, "grad_norm": 0.49819305539131165, "learning_rate": 0.000696218487394958, "loss": 0.4392, "step": 10953 }, { "epoch": 6.119553072625698, "grad_norm": 0.4116550385951996, "learning_rate": 0.0006961904761904761, "loss": 0.3648, "step": 10954 }, { "epoch": 6.120111731843576, "grad_norm": 0.5190322995185852, "learning_rate": 0.0006961624649859944, "loss": 0.4, "step": 10955 }, { "epoch": 6.120670391061452, "grad_norm": 0.4842533469200134, "learning_rate": 0.0006961344537815126, "loss": 0.4824, "step": 10956 }, { "epoch": 6.12122905027933, "grad_norm": 0.8224414587020874, "learning_rate": 0.0006961064425770308, "loss": 0.5171, "step": 10957 }, { "epoch": 6.121787709497207, "grad_norm": 0.5653104186058044, "learning_rate": 0.000696078431372549, "loss": 0.3723, "step": 10958 }, { "epoch": 6.122346368715084, "grad_norm": 0.9112353920936584, "learning_rate": 0.0006960504201680671, "loss": 0.5437, "step": 10959 }, { "epoch": 6.122905027932961, "grad_norm": 0.504174530506134, "learning_rate": 0.0006960224089635855, "loss": 0.4373, "step": 10960 }, { "epoch": 6.123463687150838, "grad_norm": 0.9851964116096497, "learning_rate": 0.0006959943977591037, "loss": 0.4757, "step": 10961 }, { "epoch": 6.124022346368715, "grad_norm": 2.3386800289154053, "learning_rate": 0.0006959663865546219, "loss": 0.3673, "step": 10962 }, { "epoch": 6.124581005586593, "grad_norm": 0.6139798760414124, "learning_rate": 0.0006959383753501401, "loss": 0.4811, "step": 10963 }, { "epoch": 6.125139664804469, "grad_norm": 0.5551549792289734, "learning_rate": 0.0006959103641456582, "loss": 0.4608, "step": 10964 }, { "epoch": 6.125698324022347, "grad_norm": 0.4621923863887787, "learning_rate": 0.0006958823529411765, "loss": 0.5717, "step": 10965 }, { "epoch": 6.126256983240223, "grad_norm": 3.5591931343078613, "learning_rate": 0.0006958543417366947, "loss": 0.4244, "step": 10966 }, { "epoch": 6.126815642458101, "grad_norm": 0.3890925943851471, "learning_rate": 0.0006958263305322129, "loss": 0.3799, "step": 10967 }, { "epoch": 6.127374301675978, "grad_norm": 0.42973747849464417, "learning_rate": 0.0006957983193277311, "loss": 0.3414, "step": 10968 }, { "epoch": 6.127932960893855, "grad_norm": 0.4976039230823517, "learning_rate": 0.0006957703081232493, "loss": 0.4378, "step": 10969 }, { "epoch": 6.128491620111732, "grad_norm": 0.8133490085601807, "learning_rate": 0.0006957422969187675, "loss": 0.609, "step": 10970 }, { "epoch": 6.129050279329609, "grad_norm": 0.575567364692688, "learning_rate": 0.0006957142857142857, "loss": 0.4739, "step": 10971 }, { "epoch": 6.129608938547486, "grad_norm": 0.4603343605995178, "learning_rate": 0.0006956862745098039, "loss": 0.5469, "step": 10972 }, { "epoch": 6.130167597765363, "grad_norm": 0.5232548713684082, "learning_rate": 0.0006956582633053221, "loss": 0.3963, "step": 10973 }, { "epoch": 6.13072625698324, "grad_norm": 0.9490572810173035, "learning_rate": 0.0006956302521008403, "loss": 0.4825, "step": 10974 }, { "epoch": 6.131284916201118, "grad_norm": 0.495437353849411, "learning_rate": 0.0006956022408963586, "loss": 0.4201, "step": 10975 }, { "epoch": 6.131843575418994, "grad_norm": 0.4871094226837158, "learning_rate": 0.0006955742296918768, "loss": 0.5183, "step": 10976 }, { "epoch": 6.132402234636872, "grad_norm": 0.46342968940734863, "learning_rate": 0.000695546218487395, "loss": 0.4424, "step": 10977 }, { "epoch": 6.132960893854748, "grad_norm": 0.6024301648139954, "learning_rate": 0.0006955182072829132, "loss": 0.5927, "step": 10978 }, { "epoch": 6.133519553072626, "grad_norm": 0.6028921008110046, "learning_rate": 0.0006954901960784314, "loss": 0.5112, "step": 10979 }, { "epoch": 6.134078212290503, "grad_norm": 0.5339969992637634, "learning_rate": 0.0006954621848739496, "loss": 0.4154, "step": 10980 }, { "epoch": 6.13463687150838, "grad_norm": 0.49067366123199463, "learning_rate": 0.0006954341736694678, "loss": 0.3258, "step": 10981 }, { "epoch": 6.135195530726257, "grad_norm": 0.4345940947532654, "learning_rate": 0.000695406162464986, "loss": 0.4244, "step": 10982 }, { "epoch": 6.135754189944134, "grad_norm": 0.4652898907661438, "learning_rate": 0.0006953781512605042, "loss": 0.4991, "step": 10983 }, { "epoch": 6.136312849162011, "grad_norm": 0.5036503076553345, "learning_rate": 0.0006953501400560224, "loss": 0.438, "step": 10984 }, { "epoch": 6.136871508379889, "grad_norm": 0.519578218460083, "learning_rate": 0.0006953221288515407, "loss": 0.4883, "step": 10985 }, { "epoch": 6.137430167597765, "grad_norm": 0.635047435760498, "learning_rate": 0.0006952941176470588, "loss": 0.4135, "step": 10986 }, { "epoch": 6.137988826815643, "grad_norm": 0.5979741215705872, "learning_rate": 0.000695266106442577, "loss": 0.3571, "step": 10987 }, { "epoch": 6.138547486033519, "grad_norm": 0.4660211503505707, "learning_rate": 0.0006952380952380952, "loss": 0.5042, "step": 10988 }, { "epoch": 6.139106145251397, "grad_norm": 0.5490683317184448, "learning_rate": 0.0006952100840336134, "loss": 0.4652, "step": 10989 }, { "epoch": 6.139664804469274, "grad_norm": 0.7679424285888672, "learning_rate": 0.0006951820728291317, "loss": 0.5416, "step": 10990 }, { "epoch": 6.140223463687151, "grad_norm": 0.688495934009552, "learning_rate": 0.0006951540616246498, "loss": 0.4643, "step": 10991 }, { "epoch": 6.140782122905028, "grad_norm": 0.335827112197876, "learning_rate": 0.000695126050420168, "loss": 0.3742, "step": 10992 }, { "epoch": 6.141340782122905, "grad_norm": 0.5576304197311401, "learning_rate": 0.0006950980392156863, "loss": 0.4624, "step": 10993 }, { "epoch": 6.141899441340782, "grad_norm": 1.2468128204345703, "learning_rate": 0.0006950700280112045, "loss": 0.5582, "step": 10994 }, { "epoch": 6.14245810055866, "grad_norm": 0.8157767653465271, "learning_rate": 0.0006950420168067228, "loss": 0.4305, "step": 10995 }, { "epoch": 6.143016759776536, "grad_norm": 0.5396414995193481, "learning_rate": 0.0006950140056022409, "loss": 0.4267, "step": 10996 }, { "epoch": 6.143575418994414, "grad_norm": 0.7560551762580872, "learning_rate": 0.0006949859943977591, "loss": 0.4171, "step": 10997 }, { "epoch": 6.14413407821229, "grad_norm": 0.5291085839271545, "learning_rate": 0.0006949579831932773, "loss": 0.5123, "step": 10998 }, { "epoch": 6.144692737430168, "grad_norm": 0.5279495716094971, "learning_rate": 0.0006949299719887955, "loss": 0.5024, "step": 10999 }, { "epoch": 6.145251396648045, "grad_norm": 0.6578406095504761, "learning_rate": 0.0006949019607843138, "loss": 0.5366, "step": 11000 }, { "epoch": 6.145251396648045, "eval_cer": 0.09167730461632462, "eval_loss": 0.34710562229156494, "eval_runtime": 55.6759, "eval_samples_per_second": 81.508, "eval_steps_per_second": 5.101, "eval_wer": 0.36133068908689703, "step": 11000 }, { "epoch": 6.145810055865922, "grad_norm": 0.6645103096961975, "learning_rate": 0.000694873949579832, "loss": 0.456, "step": 11001 }, { "epoch": 6.146368715083799, "grad_norm": 0.485580176115036, "learning_rate": 0.0006948459383753501, "loss": 0.4979, "step": 11002 }, { "epoch": 6.146927374301676, "grad_norm": 0.603653073310852, "learning_rate": 0.0006948179271708683, "loss": 0.5114, "step": 11003 }, { "epoch": 6.147486033519553, "grad_norm": 0.6199735403060913, "learning_rate": 0.0006947899159663865, "loss": 0.4725, "step": 11004 }, { "epoch": 6.148044692737431, "grad_norm": 0.4745272994041443, "learning_rate": 0.0006947619047619048, "loss": 0.3131, "step": 11005 }, { "epoch": 6.148603351955307, "grad_norm": 0.444668710231781, "learning_rate": 0.000694733893557423, "loss": 0.3948, "step": 11006 }, { "epoch": 6.149162011173185, "grad_norm": 0.395626425743103, "learning_rate": 0.0006947058823529411, "loss": 0.4473, "step": 11007 }, { "epoch": 6.149720670391061, "grad_norm": 0.644459068775177, "learning_rate": 0.0006946778711484593, "loss": 0.4119, "step": 11008 }, { "epoch": 6.150279329608939, "grad_norm": 0.8359875679016113, "learning_rate": 0.0006946498599439776, "loss": 0.3634, "step": 11009 }, { "epoch": 6.150837988826815, "grad_norm": 0.5190132856369019, "learning_rate": 0.0006946218487394959, "loss": 0.4067, "step": 11010 }, { "epoch": 6.151396648044693, "grad_norm": 9.551300048828125, "learning_rate": 0.0006945938375350141, "loss": 0.427, "step": 11011 }, { "epoch": 6.15195530726257, "grad_norm": 0.3548396825790405, "learning_rate": 0.0006945658263305322, "loss": 0.3743, "step": 11012 }, { "epoch": 6.152513966480447, "grad_norm": 0.6031273603439331, "learning_rate": 0.0006945378151260504, "loss": 0.571, "step": 11013 }, { "epoch": 6.153072625698324, "grad_norm": 0.4338412880897522, "learning_rate": 0.0006945098039215686, "loss": 0.3511, "step": 11014 }, { "epoch": 6.153631284916201, "grad_norm": 0.40045738220214844, "learning_rate": 0.0006944817927170869, "loss": 0.3809, "step": 11015 }, { "epoch": 6.154189944134078, "grad_norm": 0.5165607929229736, "learning_rate": 0.0006944537815126051, "loss": 0.4093, "step": 11016 }, { "epoch": 6.154748603351956, "grad_norm": 0.782764196395874, "learning_rate": 0.0006944257703081233, "loss": 0.497, "step": 11017 }, { "epoch": 6.155307262569832, "grad_norm": 0.5764148235321045, "learning_rate": 0.0006943977591036414, "loss": 0.4085, "step": 11018 }, { "epoch": 6.15586592178771, "grad_norm": 0.5112626552581787, "learning_rate": 0.0006943697478991596, "loss": 0.4169, "step": 11019 }, { "epoch": 6.156424581005586, "grad_norm": 0.5369787812232971, "learning_rate": 0.0006943417366946779, "loss": 0.3273, "step": 11020 }, { "epoch": 6.156983240223464, "grad_norm": 0.44596096873283386, "learning_rate": 0.0006943137254901961, "loss": 0.4841, "step": 11021 }, { "epoch": 6.157541899441341, "grad_norm": 0.46426451206207275, "learning_rate": 0.0006942857142857143, "loss": 0.4868, "step": 11022 }, { "epoch": 6.158100558659218, "grad_norm": 0.5903044939041138, "learning_rate": 0.0006942577030812324, "loss": 0.4962, "step": 11023 }, { "epoch": 6.158659217877095, "grad_norm": 0.4951884150505066, "learning_rate": 0.0006942296918767506, "loss": 0.5406, "step": 11024 }, { "epoch": 6.159217877094972, "grad_norm": 0.5276403427124023, "learning_rate": 0.000694201680672269, "loss": 0.421, "step": 11025 }, { "epoch": 6.159776536312849, "grad_norm": 7.846343040466309, "learning_rate": 0.0006941736694677872, "loss": 0.3978, "step": 11026 }, { "epoch": 6.160335195530727, "grad_norm": 0.9427791833877563, "learning_rate": 0.0006941456582633054, "loss": 0.6597, "step": 11027 }, { "epoch": 6.160893854748603, "grad_norm": 0.44784265756607056, "learning_rate": 0.0006941176470588235, "loss": 0.3436, "step": 11028 }, { "epoch": 6.161452513966481, "grad_norm": 1.2415848970413208, "learning_rate": 0.0006940896358543417, "loss": 0.5221, "step": 11029 }, { "epoch": 6.162011173184357, "grad_norm": 0.5714986324310303, "learning_rate": 0.00069406162464986, "loss": 0.424, "step": 11030 }, { "epoch": 6.162569832402235, "grad_norm": 0.7360231280326843, "learning_rate": 0.0006940336134453782, "loss": 0.544, "step": 11031 }, { "epoch": 6.163128491620112, "grad_norm": 0.88707035779953, "learning_rate": 0.0006940056022408964, "loss": 0.4633, "step": 11032 }, { "epoch": 6.163687150837989, "grad_norm": 1.9829038381576538, "learning_rate": 0.0006939775910364146, "loss": 0.5355, "step": 11033 }, { "epoch": 6.164245810055866, "grad_norm": 0.4707237184047699, "learning_rate": 0.0006939495798319327, "loss": 0.4324, "step": 11034 }, { "epoch": 6.164804469273743, "grad_norm": 2.2109055519104004, "learning_rate": 0.000693921568627451, "loss": 0.4969, "step": 11035 }, { "epoch": 6.16536312849162, "grad_norm": 0.6761896014213562, "learning_rate": 0.0006938935574229692, "loss": 0.5069, "step": 11036 }, { "epoch": 6.165921787709498, "grad_norm": 0.6342883110046387, "learning_rate": 0.0006938655462184874, "loss": 0.3927, "step": 11037 }, { "epoch": 6.166480446927374, "grad_norm": 1.644527792930603, "learning_rate": 0.0006938375350140056, "loss": 0.3895, "step": 11038 }, { "epoch": 6.167039106145252, "grad_norm": 0.46073096990585327, "learning_rate": 0.0006938095238095237, "loss": 0.523, "step": 11039 }, { "epoch": 6.167597765363128, "grad_norm": 2.2446236610412598, "learning_rate": 0.000693781512605042, "loss": 0.5024, "step": 11040 }, { "epoch": 6.168156424581006, "grad_norm": 0.4837959110736847, "learning_rate": 0.0006937535014005603, "loss": 0.5021, "step": 11041 }, { "epoch": 6.168715083798883, "grad_norm": 0.4140658974647522, "learning_rate": 0.0006937254901960785, "loss": 0.4148, "step": 11042 }, { "epoch": 6.16927374301676, "grad_norm": 0.9625561833381653, "learning_rate": 0.0006936974789915967, "loss": 0.4664, "step": 11043 }, { "epoch": 6.169832402234637, "grad_norm": 1.376027226448059, "learning_rate": 0.0006936694677871148, "loss": 0.7111, "step": 11044 }, { "epoch": 6.170391061452514, "grad_norm": 0.5922132730484009, "learning_rate": 0.0006936414565826331, "loss": 0.474, "step": 11045 }, { "epoch": 6.170949720670391, "grad_norm": 0.46543070673942566, "learning_rate": 0.0006936134453781513, "loss": 0.5159, "step": 11046 }, { "epoch": 6.171508379888268, "grad_norm": 0.6008877754211426, "learning_rate": 0.0006935854341736695, "loss": 0.4303, "step": 11047 }, { "epoch": 6.172067039106145, "grad_norm": 0.37619924545288086, "learning_rate": 0.0006935574229691877, "loss": 0.4361, "step": 11048 }, { "epoch": 6.172625698324023, "grad_norm": 1.0105559825897217, "learning_rate": 0.0006935294117647059, "loss": 0.5195, "step": 11049 }, { "epoch": 6.173184357541899, "grad_norm": 0.5267997980117798, "learning_rate": 0.0006935014005602241, "loss": 0.4048, "step": 11050 }, { "epoch": 6.173743016759777, "grad_norm": 0.40528345108032227, "learning_rate": 0.0006934733893557423, "loss": 0.3128, "step": 11051 }, { "epoch": 6.174301675977653, "grad_norm": 0.5212454199790955, "learning_rate": 0.0006934453781512605, "loss": 0.476, "step": 11052 }, { "epoch": 6.174860335195531, "grad_norm": 2.836933135986328, "learning_rate": 0.0006934173669467787, "loss": 0.7061, "step": 11053 }, { "epoch": 6.175418994413408, "grad_norm": 0.562639057636261, "learning_rate": 0.0006933893557422969, "loss": 0.4495, "step": 11054 }, { "epoch": 6.175977653631285, "grad_norm": 0.7593250870704651, "learning_rate": 0.0006933613445378151, "loss": 0.4694, "step": 11055 }, { "epoch": 6.176536312849162, "grad_norm": 0.6319076418876648, "learning_rate": 0.0006933333333333333, "loss": 0.4637, "step": 11056 }, { "epoch": 6.177094972067039, "grad_norm": 0.4834778308868408, "learning_rate": 0.0006933053221288516, "loss": 0.5669, "step": 11057 }, { "epoch": 6.177653631284916, "grad_norm": 0.8001792430877686, "learning_rate": 0.0006932773109243698, "loss": 0.4873, "step": 11058 }, { "epoch": 6.178212290502794, "grad_norm": 2.105329990386963, "learning_rate": 0.000693249299719888, "loss": 0.3873, "step": 11059 }, { "epoch": 6.17877094972067, "grad_norm": 0.9396324753761292, "learning_rate": 0.0006932212885154063, "loss": 0.476, "step": 11060 }, { "epoch": 6.179329608938548, "grad_norm": 1.8959730863571167, "learning_rate": 0.0006931932773109244, "loss": 0.4138, "step": 11061 }, { "epoch": 6.179888268156424, "grad_norm": 0.5052026510238647, "learning_rate": 0.0006931652661064426, "loss": 0.3591, "step": 11062 }, { "epoch": 6.180446927374302, "grad_norm": 0.6946987509727478, "learning_rate": 0.0006931372549019608, "loss": 0.474, "step": 11063 }, { "epoch": 6.181005586592179, "grad_norm": 0.42790892720222473, "learning_rate": 0.000693109243697479, "loss": 0.4547, "step": 11064 }, { "epoch": 6.181564245810056, "grad_norm": 0.5898541212081909, "learning_rate": 0.0006930812324929973, "loss": 0.5052, "step": 11065 }, { "epoch": 6.182122905027933, "grad_norm": 0.5448857545852661, "learning_rate": 0.0006930532212885154, "loss": 0.4416, "step": 11066 }, { "epoch": 6.18268156424581, "grad_norm": 0.443786084651947, "learning_rate": 0.0006930252100840336, "loss": 0.3703, "step": 11067 }, { "epoch": 6.183240223463687, "grad_norm": 0.7968162894248962, "learning_rate": 0.0006929971988795518, "loss": 0.466, "step": 11068 }, { "epoch": 6.183798882681565, "grad_norm": 4.451937675476074, "learning_rate": 0.00069296918767507, "loss": 0.435, "step": 11069 }, { "epoch": 6.184357541899441, "grad_norm": 0.829754114151001, "learning_rate": 0.0006929411764705883, "loss": 0.475, "step": 11070 }, { "epoch": 6.184916201117319, "grad_norm": 0.7205123901367188, "learning_rate": 0.0006929131652661064, "loss": 0.3662, "step": 11071 }, { "epoch": 6.185474860335195, "grad_norm": 1.06096351146698, "learning_rate": 0.0006928851540616246, "loss": 0.4171, "step": 11072 }, { "epoch": 6.186033519553073, "grad_norm": 0.4879666268825531, "learning_rate": 0.0006928571428571428, "loss": 0.4564, "step": 11073 }, { "epoch": 6.18659217877095, "grad_norm": 5.856074810028076, "learning_rate": 0.000692829131652661, "loss": 0.4013, "step": 11074 }, { "epoch": 6.187150837988827, "grad_norm": 0.5490097999572754, "learning_rate": 0.0006928011204481794, "loss": 0.4586, "step": 11075 }, { "epoch": 6.187709497206704, "grad_norm": 0.41726183891296387, "learning_rate": 0.0006927731092436976, "loss": 0.4309, "step": 11076 }, { "epoch": 6.188268156424581, "grad_norm": 0.4752315580844879, "learning_rate": 0.0006927450980392157, "loss": 0.4691, "step": 11077 }, { "epoch": 6.188826815642458, "grad_norm": 0.7366651892662048, "learning_rate": 0.0006927170868347339, "loss": 0.3906, "step": 11078 }, { "epoch": 6.189385474860336, "grad_norm": 1.2753126621246338, "learning_rate": 0.0006926890756302521, "loss": 0.4413, "step": 11079 }, { "epoch": 6.189944134078212, "grad_norm": 0.747553288936615, "learning_rate": 0.0006926610644257704, "loss": 0.4707, "step": 11080 }, { "epoch": 6.19050279329609, "grad_norm": 0.6181213855743408, "learning_rate": 0.0006926330532212886, "loss": 0.4345, "step": 11081 }, { "epoch": 6.191061452513966, "grad_norm": 5.963598728179932, "learning_rate": 0.0006926050420168067, "loss": 0.4175, "step": 11082 }, { "epoch": 6.191620111731844, "grad_norm": 0.6342845559120178, "learning_rate": 0.0006925770308123249, "loss": 0.4417, "step": 11083 }, { "epoch": 6.19217877094972, "grad_norm": 1.0767654180526733, "learning_rate": 0.0006925490196078431, "loss": 0.4798, "step": 11084 }, { "epoch": 6.192737430167598, "grad_norm": 0.8455361723899841, "learning_rate": 0.0006925210084033614, "loss": 0.5267, "step": 11085 }, { "epoch": 6.193296089385475, "grad_norm": 0.7232323288917542, "learning_rate": 0.0006924929971988796, "loss": 0.4536, "step": 11086 }, { "epoch": 6.193854748603352, "grad_norm": 0.47066929936408997, "learning_rate": 0.0006924649859943977, "loss": 0.3842, "step": 11087 }, { "epoch": 6.194413407821229, "grad_norm": 0.5924574732780457, "learning_rate": 0.0006924369747899159, "loss": 0.4954, "step": 11088 }, { "epoch": 6.194972067039106, "grad_norm": 0.48131537437438965, "learning_rate": 0.0006924089635854341, "loss": 0.3583, "step": 11089 }, { "epoch": 6.195530726256983, "grad_norm": 0.7969999313354492, "learning_rate": 0.0006923809523809525, "loss": 0.3847, "step": 11090 }, { "epoch": 6.196089385474861, "grad_norm": 0.5471673607826233, "learning_rate": 0.0006923529411764707, "loss": 0.4483, "step": 11091 }, { "epoch": 6.196648044692737, "grad_norm": 0.5499786138534546, "learning_rate": 0.0006923249299719889, "loss": 0.5417, "step": 11092 }, { "epoch": 6.197206703910615, "grad_norm": 0.5248214602470398, "learning_rate": 0.000692296918767507, "loss": 0.4075, "step": 11093 }, { "epoch": 6.197765363128491, "grad_norm": 0.7355324029922485, "learning_rate": 0.0006922689075630252, "loss": 0.4283, "step": 11094 }, { "epoch": 6.198324022346369, "grad_norm": 0.8176501989364624, "learning_rate": 0.0006922408963585435, "loss": 0.4692, "step": 11095 }, { "epoch": 6.198882681564246, "grad_norm": 0.5303985476493835, "learning_rate": 0.0006922128851540617, "loss": 0.4899, "step": 11096 }, { "epoch": 6.199441340782123, "grad_norm": 0.59996497631073, "learning_rate": 0.0006921848739495799, "loss": 0.51, "step": 11097 }, { "epoch": 6.2, "grad_norm": 0.6563166379928589, "learning_rate": 0.000692156862745098, "loss": 0.3627, "step": 11098 }, { "epoch": 6.200558659217877, "grad_norm": 2.303741693496704, "learning_rate": 0.0006921288515406162, "loss": 0.5127, "step": 11099 }, { "epoch": 6.201117318435754, "grad_norm": 0.5335306525230408, "learning_rate": 0.0006921008403361345, "loss": 0.4675, "step": 11100 }, { "epoch": 6.201675977653632, "grad_norm": 0.6454495787620544, "learning_rate": 0.0006920728291316527, "loss": 0.481, "step": 11101 }, { "epoch": 6.202234636871508, "grad_norm": 0.4385012090206146, "learning_rate": 0.0006920448179271709, "loss": 0.386, "step": 11102 }, { "epoch": 6.202793296089386, "grad_norm": 1.1113243103027344, "learning_rate": 0.000692016806722689, "loss": 0.4558, "step": 11103 }, { "epoch": 6.203351955307262, "grad_norm": 0.7041577696800232, "learning_rate": 0.0006919887955182072, "loss": 0.4173, "step": 11104 }, { "epoch": 6.20391061452514, "grad_norm": 0.577457070350647, "learning_rate": 0.0006919607843137255, "loss": 0.4972, "step": 11105 }, { "epoch": 6.204469273743017, "grad_norm": 0.4837631583213806, "learning_rate": 0.0006919327731092438, "loss": 0.4035, "step": 11106 }, { "epoch": 6.205027932960894, "grad_norm": 0.4688953161239624, "learning_rate": 0.000691904761904762, "loss": 0.4554, "step": 11107 }, { "epoch": 6.205586592178771, "grad_norm": 2.351287841796875, "learning_rate": 0.0006918767507002802, "loss": 0.5017, "step": 11108 }, { "epoch": 6.206145251396648, "grad_norm": 0.6057361960411072, "learning_rate": 0.0006918487394957983, "loss": 0.4949, "step": 11109 }, { "epoch": 6.206703910614525, "grad_norm": 0.7558247447013855, "learning_rate": 0.0006918207282913166, "loss": 0.5533, "step": 11110 }, { "epoch": 6.207262569832403, "grad_norm": 0.9976764917373657, "learning_rate": 0.0006917927170868348, "loss": 0.411, "step": 11111 }, { "epoch": 6.207821229050279, "grad_norm": 0.6596027612686157, "learning_rate": 0.000691764705882353, "loss": 0.5329, "step": 11112 }, { "epoch": 6.208379888268157, "grad_norm": 0.3358546495437622, "learning_rate": 0.0006917366946778712, "loss": 0.2956, "step": 11113 }, { "epoch": 6.208938547486033, "grad_norm": 0.5593414902687073, "learning_rate": 0.0006917086834733893, "loss": 0.4266, "step": 11114 }, { "epoch": 6.209497206703911, "grad_norm": 0.5333533883094788, "learning_rate": 0.0006916806722689076, "loss": 0.5247, "step": 11115 }, { "epoch": 6.210055865921788, "grad_norm": 0.4437052309513092, "learning_rate": 0.0006916526610644258, "loss": 0.4344, "step": 11116 }, { "epoch": 6.210614525139665, "grad_norm": 2.343329429626465, "learning_rate": 0.000691624649859944, "loss": 0.4622, "step": 11117 }, { "epoch": 6.211173184357542, "grad_norm": 0.5243825912475586, "learning_rate": 0.0006915966386554622, "loss": 0.444, "step": 11118 }, { "epoch": 6.211731843575419, "grad_norm": 0.8121657967567444, "learning_rate": 0.0006915686274509803, "loss": 0.4872, "step": 11119 }, { "epoch": 6.212290502793296, "grad_norm": 0.43471094965934753, "learning_rate": 0.0006915406162464986, "loss": 0.3266, "step": 11120 }, { "epoch": 6.212849162011173, "grad_norm": 0.47909021377563477, "learning_rate": 0.0006915126050420168, "loss": 0.5425, "step": 11121 }, { "epoch": 6.21340782122905, "grad_norm": 0.6361687779426575, "learning_rate": 0.000691484593837535, "loss": 0.6024, "step": 11122 }, { "epoch": 6.213966480446928, "grad_norm": 0.40451547503471375, "learning_rate": 0.0006914565826330533, "loss": 0.4516, "step": 11123 }, { "epoch": 6.214525139664804, "grad_norm": 0.5453373789787292, "learning_rate": 0.0006914285714285715, "loss": 0.4877, "step": 11124 }, { "epoch": 6.215083798882682, "grad_norm": 0.5723085999488831, "learning_rate": 0.0006914005602240897, "loss": 0.466, "step": 11125 }, { "epoch": 6.215642458100558, "grad_norm": 1.1354963779449463, "learning_rate": 0.0006913725490196079, "loss": 0.4337, "step": 11126 }, { "epoch": 6.216201117318436, "grad_norm": 0.5381463170051575, "learning_rate": 0.0006913445378151261, "loss": 0.5454, "step": 11127 }, { "epoch": 6.216759776536313, "grad_norm": 0.6525371074676514, "learning_rate": 0.0006913165266106443, "loss": 0.4847, "step": 11128 }, { "epoch": 6.21731843575419, "grad_norm": 1.4016340970993042, "learning_rate": 0.0006912885154061625, "loss": 0.4733, "step": 11129 }, { "epoch": 6.217877094972067, "grad_norm": 0.7441390752792358, "learning_rate": 0.0006912605042016807, "loss": 0.3809, "step": 11130 }, { "epoch": 6.218435754189944, "grad_norm": 0.3427259922027588, "learning_rate": 0.0006912324929971989, "loss": 0.3909, "step": 11131 }, { "epoch": 6.218994413407821, "grad_norm": 0.6850032806396484, "learning_rate": 0.0006912044817927171, "loss": 0.4014, "step": 11132 }, { "epoch": 6.219553072625699, "grad_norm": 0.5331707000732422, "learning_rate": 0.0006911764705882353, "loss": 0.4339, "step": 11133 }, { "epoch": 6.220111731843575, "grad_norm": 0.5954989194869995, "learning_rate": 0.0006911484593837535, "loss": 0.4132, "step": 11134 }, { "epoch": 6.220670391061453, "grad_norm": 0.6553730964660645, "learning_rate": 0.0006911204481792717, "loss": 0.4344, "step": 11135 }, { "epoch": 6.221229050279329, "grad_norm": 0.8323370218276978, "learning_rate": 0.0006910924369747899, "loss": 0.4824, "step": 11136 }, { "epoch": 6.221787709497207, "grad_norm": 0.5582082271575928, "learning_rate": 0.0006910644257703081, "loss": 0.4433, "step": 11137 }, { "epoch": 6.222346368715084, "grad_norm": 0.5115128755569458, "learning_rate": 0.0006910364145658263, "loss": 0.4503, "step": 11138 }, { "epoch": 6.222905027932961, "grad_norm": 0.3388581871986389, "learning_rate": 0.0006910084033613446, "loss": 0.3357, "step": 11139 }, { "epoch": 6.223463687150838, "grad_norm": 0.48183414340019226, "learning_rate": 0.0006909803921568629, "loss": 0.4473, "step": 11140 }, { "epoch": 6.224022346368715, "grad_norm": 0.5802580714225769, "learning_rate": 0.000690952380952381, "loss": 0.4185, "step": 11141 }, { "epoch": 6.224581005586592, "grad_norm": 0.5617557764053345, "learning_rate": 0.0006909243697478992, "loss": 0.4135, "step": 11142 }, { "epoch": 6.22513966480447, "grad_norm": 0.5625030994415283, "learning_rate": 0.0006908963585434174, "loss": 0.4139, "step": 11143 }, { "epoch": 6.225698324022346, "grad_norm": 0.37672555446624756, "learning_rate": 0.0006908683473389356, "loss": 0.3918, "step": 11144 }, { "epoch": 6.226256983240224, "grad_norm": 3.426548480987549, "learning_rate": 0.0006908403361344539, "loss": 0.5271, "step": 11145 }, { "epoch": 6.2268156424581, "grad_norm": 1.4194186925888062, "learning_rate": 0.000690812324929972, "loss": 0.5401, "step": 11146 }, { "epoch": 6.227374301675978, "grad_norm": 0.6973713040351868, "learning_rate": 0.0006907843137254902, "loss": 0.4769, "step": 11147 }, { "epoch": 6.227932960893855, "grad_norm": 0.5698657631874084, "learning_rate": 0.0006907563025210084, "loss": 0.4104, "step": 11148 }, { "epoch": 6.228491620111732, "grad_norm": 0.582277238368988, "learning_rate": 0.0006907282913165266, "loss": 0.3403, "step": 11149 }, { "epoch": 6.229050279329609, "grad_norm": 0.5936879515647888, "learning_rate": 0.0006907002801120449, "loss": 0.5796, "step": 11150 }, { "epoch": 6.229608938547486, "grad_norm": 0.5503798127174377, "learning_rate": 0.000690672268907563, "loss": 0.4832, "step": 11151 }, { "epoch": 6.230167597765363, "grad_norm": 0.40784943103790283, "learning_rate": 0.0006906442577030812, "loss": 0.4235, "step": 11152 }, { "epoch": 6.230726256983241, "grad_norm": 0.6808150410652161, "learning_rate": 0.0006906162464985994, "loss": 0.5843, "step": 11153 }, { "epoch": 6.231284916201117, "grad_norm": 1.1461037397384644, "learning_rate": 0.0006905882352941176, "loss": 0.4227, "step": 11154 }, { "epoch": 6.231843575418995, "grad_norm": 0.5101745128631592, "learning_rate": 0.0006905602240896358, "loss": 0.4225, "step": 11155 }, { "epoch": 6.232402234636871, "grad_norm": 0.6600402593612671, "learning_rate": 0.0006905322128851542, "loss": 0.4565, "step": 11156 }, { "epoch": 6.232960893854749, "grad_norm": 0.6657623052597046, "learning_rate": 0.0006905042016806723, "loss": 0.5936, "step": 11157 }, { "epoch": 6.233519553072625, "grad_norm": 0.9028830528259277, "learning_rate": 0.0006904761904761905, "loss": 0.364, "step": 11158 }, { "epoch": 6.234078212290503, "grad_norm": 0.7732554078102112, "learning_rate": 0.0006904481792717087, "loss": 0.4662, "step": 11159 }, { "epoch": 6.23463687150838, "grad_norm": 0.6529476046562195, "learning_rate": 0.0006904201680672269, "loss": 0.3948, "step": 11160 }, { "epoch": 6.235195530726257, "grad_norm": 0.3281601071357727, "learning_rate": 0.0006903921568627452, "loss": 0.3461, "step": 11161 }, { "epoch": 6.235754189944134, "grad_norm": 13.75284194946289, "learning_rate": 0.0006903641456582633, "loss": 0.6447, "step": 11162 }, { "epoch": 6.236312849162011, "grad_norm": 0.4098324775695801, "learning_rate": 0.0006903361344537815, "loss": 0.4928, "step": 11163 }, { "epoch": 6.236871508379888, "grad_norm": 0.5982289910316467, "learning_rate": 0.0006903081232492997, "loss": 0.4533, "step": 11164 }, { "epoch": 6.237430167597766, "grad_norm": 0.49558225274086, "learning_rate": 0.0006902801120448179, "loss": 0.4527, "step": 11165 }, { "epoch": 6.237988826815642, "grad_norm": 0.7247928977012634, "learning_rate": 0.0006902521008403362, "loss": 0.4862, "step": 11166 }, { "epoch": 6.23854748603352, "grad_norm": 0.5528486371040344, "learning_rate": 0.0006902240896358543, "loss": 0.4562, "step": 11167 }, { "epoch": 6.239106145251396, "grad_norm": 0.5573023557662964, "learning_rate": 0.0006901960784313725, "loss": 0.3641, "step": 11168 }, { "epoch": 6.239664804469274, "grad_norm": 0.8229979872703552, "learning_rate": 0.0006901680672268907, "loss": 0.3698, "step": 11169 }, { "epoch": 6.240223463687151, "grad_norm": 0.6634820103645325, "learning_rate": 0.0006901400560224089, "loss": 0.6129, "step": 11170 }, { "epoch": 6.240782122905028, "grad_norm": 0.4847666025161743, "learning_rate": 0.0006901120448179273, "loss": 0.4213, "step": 11171 }, { "epoch": 6.241340782122905, "grad_norm": 0.7485854029655457, "learning_rate": 0.0006900840336134455, "loss": 0.4874, "step": 11172 }, { "epoch": 6.241899441340782, "grad_norm": 1.7963004112243652, "learning_rate": 0.0006900560224089636, "loss": 0.4252, "step": 11173 }, { "epoch": 6.242458100558659, "grad_norm": 0.613847553730011, "learning_rate": 0.0006900280112044818, "loss": 0.4939, "step": 11174 }, { "epoch": 6.243016759776537, "grad_norm": 0.5112985372543335, "learning_rate": 0.00069, "loss": 0.4271, "step": 11175 }, { "epoch": 6.243575418994413, "grad_norm": 0.47414660453796387, "learning_rate": 0.0006899719887955183, "loss": 0.3679, "step": 11176 }, { "epoch": 6.244134078212291, "grad_norm": 1.2529995441436768, "learning_rate": 0.0006899439775910365, "loss": 0.3995, "step": 11177 }, { "epoch": 6.244692737430167, "grad_norm": 0.6523088216781616, "learning_rate": 0.0006899159663865546, "loss": 0.4173, "step": 11178 }, { "epoch": 6.245251396648045, "grad_norm": 0.5217496752738953, "learning_rate": 0.0006898879551820728, "loss": 0.4311, "step": 11179 }, { "epoch": 6.245810055865922, "grad_norm": 0.830846905708313, "learning_rate": 0.000689859943977591, "loss": 0.4375, "step": 11180 }, { "epoch": 6.246368715083799, "grad_norm": 0.5552548170089722, "learning_rate": 0.0006898319327731093, "loss": 0.4065, "step": 11181 }, { "epoch": 6.246927374301676, "grad_norm": 0.4875868856906891, "learning_rate": 0.0006898039215686275, "loss": 0.3814, "step": 11182 }, { "epoch": 6.247486033519553, "grad_norm": 0.4327409863471985, "learning_rate": 0.0006897759103641456, "loss": 0.3711, "step": 11183 }, { "epoch": 6.24804469273743, "grad_norm": 0.6267342567443848, "learning_rate": 0.0006897478991596638, "loss": 0.5649, "step": 11184 }, { "epoch": 6.248603351955307, "grad_norm": 0.4159642159938812, "learning_rate": 0.000689719887955182, "loss": 0.3937, "step": 11185 }, { "epoch": 6.249162011173184, "grad_norm": 0.4459898769855499, "learning_rate": 0.0006896918767507003, "loss": 0.3673, "step": 11186 }, { "epoch": 6.249720670391062, "grad_norm": 0.6854798793792725, "learning_rate": 0.0006896638655462185, "loss": 0.4599, "step": 11187 }, { "epoch": 6.250279329608938, "grad_norm": 0.5844081044197083, "learning_rate": 0.0006896358543417368, "loss": 0.5304, "step": 11188 }, { "epoch": 6.250837988826816, "grad_norm": 0.5282191634178162, "learning_rate": 0.0006896078431372549, "loss": 0.3807, "step": 11189 }, { "epoch": 6.251396648044693, "grad_norm": 0.7448979020118713, "learning_rate": 0.0006895798319327731, "loss": 0.5425, "step": 11190 }, { "epoch": 6.25195530726257, "grad_norm": 0.5471347570419312, "learning_rate": 0.0006895518207282914, "loss": 0.5398, "step": 11191 }, { "epoch": 6.252513966480447, "grad_norm": 0.461185097694397, "learning_rate": 0.0006895238095238096, "loss": 0.4359, "step": 11192 }, { "epoch": 6.253072625698324, "grad_norm": 0.5877832770347595, "learning_rate": 0.0006894957983193278, "loss": 0.554, "step": 11193 }, { "epoch": 6.253631284916201, "grad_norm": 1.967076301574707, "learning_rate": 0.0006894677871148459, "loss": 0.4656, "step": 11194 }, { "epoch": 6.254189944134078, "grad_norm": 0.5672575235366821, "learning_rate": 0.0006894397759103641, "loss": 0.4791, "step": 11195 }, { "epoch": 6.254748603351955, "grad_norm": 0.4789290726184845, "learning_rate": 0.0006894117647058824, "loss": 0.3457, "step": 11196 }, { "epoch": 6.255307262569833, "grad_norm": 0.5638444423675537, "learning_rate": 0.0006893837535014006, "loss": 0.4037, "step": 11197 }, { "epoch": 6.255865921787709, "grad_norm": 0.7057085037231445, "learning_rate": 0.0006893557422969188, "loss": 0.4674, "step": 11198 }, { "epoch": 6.256424581005587, "grad_norm": 2.4486758708953857, "learning_rate": 0.0006893277310924369, "loss": 0.391, "step": 11199 }, { "epoch": 6.256983240223463, "grad_norm": 0.5865665674209595, "learning_rate": 0.0006892997198879551, "loss": 0.5648, "step": 11200 }, { "epoch": 6.257541899441341, "grad_norm": 0.6624398827552795, "learning_rate": 0.0006892717086834734, "loss": 0.6568, "step": 11201 }, { "epoch": 6.258100558659218, "grad_norm": 0.581609308719635, "learning_rate": 0.0006892436974789916, "loss": 0.4473, "step": 11202 }, { "epoch": 6.258659217877095, "grad_norm": 0.49125662446022034, "learning_rate": 0.0006892156862745098, "loss": 0.497, "step": 11203 }, { "epoch": 6.259217877094972, "grad_norm": 0.4363628029823303, "learning_rate": 0.000689187675070028, "loss": 0.4023, "step": 11204 }, { "epoch": 6.259776536312849, "grad_norm": 1.1108808517456055, "learning_rate": 0.0006891596638655461, "loss": 0.5842, "step": 11205 }, { "epoch": 6.260335195530726, "grad_norm": 0.3604320287704468, "learning_rate": 0.0006891316526610645, "loss": 0.3915, "step": 11206 }, { "epoch": 6.260893854748604, "grad_norm": 0.54599928855896, "learning_rate": 0.0006891036414565827, "loss": 0.4666, "step": 11207 }, { "epoch": 6.26145251396648, "grad_norm": 0.43186667561531067, "learning_rate": 0.0006890756302521009, "loss": 0.4123, "step": 11208 }, { "epoch": 6.262011173184358, "grad_norm": 0.4337267577648163, "learning_rate": 0.0006890476190476191, "loss": 0.372, "step": 11209 }, { "epoch": 6.262569832402234, "grad_norm": 0.7012616395950317, "learning_rate": 0.0006890196078431372, "loss": 0.4099, "step": 11210 }, { "epoch": 6.263128491620112, "grad_norm": 0.5912482142448425, "learning_rate": 0.0006889915966386555, "loss": 0.4252, "step": 11211 }, { "epoch": 6.263687150837989, "grad_norm": 0.5213072299957275, "learning_rate": 0.0006889635854341737, "loss": 0.4222, "step": 11212 }, { "epoch": 6.264245810055866, "grad_norm": 0.547344446182251, "learning_rate": 0.0006889355742296919, "loss": 0.4906, "step": 11213 }, { "epoch": 6.264804469273743, "grad_norm": 0.40549513697624207, "learning_rate": 0.0006889075630252101, "loss": 0.4714, "step": 11214 }, { "epoch": 6.26536312849162, "grad_norm": 0.6650232076644897, "learning_rate": 0.0006888795518207282, "loss": 0.479, "step": 11215 }, { "epoch": 6.265921787709497, "grad_norm": 0.717628538608551, "learning_rate": 0.0006888515406162465, "loss": 0.5645, "step": 11216 }, { "epoch": 6.266480446927375, "grad_norm": 0.825869083404541, "learning_rate": 0.0006888235294117647, "loss": 0.393, "step": 11217 }, { "epoch": 6.267039106145251, "grad_norm": 0.5630092620849609, "learning_rate": 0.0006887955182072829, "loss": 0.3434, "step": 11218 }, { "epoch": 6.267597765363129, "grad_norm": 0.7241304516792297, "learning_rate": 0.0006887675070028011, "loss": 0.4394, "step": 11219 }, { "epoch": 6.268156424581005, "grad_norm": 0.4889439046382904, "learning_rate": 0.0006887394957983193, "loss": 0.4697, "step": 11220 }, { "epoch": 6.268715083798883, "grad_norm": 0.8159801959991455, "learning_rate": 0.0006887114845938376, "loss": 0.5028, "step": 11221 }, { "epoch": 6.269273743016759, "grad_norm": 0.4378489553928375, "learning_rate": 0.0006886834733893558, "loss": 0.4606, "step": 11222 }, { "epoch": 6.269832402234637, "grad_norm": 0.36192065477371216, "learning_rate": 0.000688655462184874, "loss": 0.2936, "step": 11223 }, { "epoch": 6.270391061452514, "grad_norm": 0.36072278022766113, "learning_rate": 0.0006886274509803922, "loss": 0.4113, "step": 11224 }, { "epoch": 6.270949720670391, "grad_norm": 3.2129781246185303, "learning_rate": 0.0006885994397759104, "loss": 0.4601, "step": 11225 }, { "epoch": 6.271508379888268, "grad_norm": 0.5723569393157959, "learning_rate": 0.0006885714285714286, "loss": 0.416, "step": 11226 }, { "epoch": 6.272067039106146, "grad_norm": 1.205926775932312, "learning_rate": 0.0006885434173669468, "loss": 0.3953, "step": 11227 }, { "epoch": 6.272625698324022, "grad_norm": 0.5477660298347473, "learning_rate": 0.000688515406162465, "loss": 0.4305, "step": 11228 }, { "epoch": 6.2731843575419, "grad_norm": 0.4841614365577698, "learning_rate": 0.0006884873949579832, "loss": 0.4569, "step": 11229 }, { "epoch": 6.273743016759776, "grad_norm": 1.21365487575531, "learning_rate": 0.0006884593837535014, "loss": 0.5721, "step": 11230 }, { "epoch": 6.274301675977654, "grad_norm": 0.8916204571723938, "learning_rate": 0.0006884313725490196, "loss": 0.4278, "step": 11231 }, { "epoch": 6.27486033519553, "grad_norm": 0.566136360168457, "learning_rate": 0.0006884033613445378, "loss": 0.3799, "step": 11232 }, { "epoch": 6.275418994413408, "grad_norm": 0.7047808766365051, "learning_rate": 0.000688375350140056, "loss": 0.4102, "step": 11233 }, { "epoch": 6.275977653631285, "grad_norm": 1.0304712057113647, "learning_rate": 0.0006883473389355742, "loss": 0.3965, "step": 11234 }, { "epoch": 6.276536312849162, "grad_norm": 0.7288039922714233, "learning_rate": 0.0006883193277310924, "loss": 0.6641, "step": 11235 }, { "epoch": 6.277094972067039, "grad_norm": 0.9225782752037048, "learning_rate": 0.0006882913165266107, "loss": 0.4155, "step": 11236 }, { "epoch": 6.277653631284916, "grad_norm": 0.4543639123439789, "learning_rate": 0.0006882633053221288, "loss": 0.469, "step": 11237 }, { "epoch": 6.278212290502793, "grad_norm": 0.4584183990955353, "learning_rate": 0.000688235294117647, "loss": 0.4513, "step": 11238 }, { "epoch": 6.278770949720671, "grad_norm": 0.5508493185043335, "learning_rate": 0.0006882072829131653, "loss": 0.5104, "step": 11239 }, { "epoch": 6.279329608938547, "grad_norm": 0.5691764950752258, "learning_rate": 0.0006881792717086835, "loss": 0.4397, "step": 11240 }, { "epoch": 6.279888268156425, "grad_norm": 0.39158087968826294, "learning_rate": 0.0006881512605042018, "loss": 0.4525, "step": 11241 }, { "epoch": 6.280446927374301, "grad_norm": 0.8279297351837158, "learning_rate": 0.0006881232492997199, "loss": 0.5181, "step": 11242 }, { "epoch": 6.281005586592179, "grad_norm": 0.48144567012786865, "learning_rate": 0.0006880952380952381, "loss": 0.4334, "step": 11243 }, { "epoch": 6.281564245810056, "grad_norm": 0.5417659282684326, "learning_rate": 0.0006880672268907563, "loss": 0.4659, "step": 11244 }, { "epoch": 6.282122905027933, "grad_norm": 0.4606843888759613, "learning_rate": 0.0006880392156862745, "loss": 0.4423, "step": 11245 }, { "epoch": 6.28268156424581, "grad_norm": 0.5715280175209045, "learning_rate": 0.0006880112044817928, "loss": 0.5241, "step": 11246 }, { "epoch": 6.283240223463687, "grad_norm": 0.7559725046157837, "learning_rate": 0.0006879831932773109, "loss": 0.445, "step": 11247 }, { "epoch": 6.283798882681564, "grad_norm": 0.45731693506240845, "learning_rate": 0.0006879551820728291, "loss": 0.4304, "step": 11248 }, { "epoch": 6.284357541899442, "grad_norm": 0.42387619614601135, "learning_rate": 0.0006879271708683473, "loss": 0.378, "step": 11249 }, { "epoch": 6.284916201117318, "grad_norm": 1.0018961429595947, "learning_rate": 0.0006878991596638655, "loss": 0.3522, "step": 11250 }, { "epoch": 6.285474860335196, "grad_norm": 0.38723766803741455, "learning_rate": 0.0006878711484593838, "loss": 0.3889, "step": 11251 }, { "epoch": 6.286033519553072, "grad_norm": 0.5693039894104004, "learning_rate": 0.000687843137254902, "loss": 0.4528, "step": 11252 }, { "epoch": 6.28659217877095, "grad_norm": 0.5103390216827393, "learning_rate": 0.0006878151260504201, "loss": 0.5247, "step": 11253 }, { "epoch": 6.287150837988827, "grad_norm": 1.2775022983551025, "learning_rate": 0.0006877871148459383, "loss": 0.5028, "step": 11254 }, { "epoch": 6.287709497206704, "grad_norm": 1.2915436029434204, "learning_rate": 0.0006877591036414566, "loss": 0.4331, "step": 11255 }, { "epoch": 6.288268156424581, "grad_norm": 0.49905383586883545, "learning_rate": 0.0006877310924369749, "loss": 0.4093, "step": 11256 }, { "epoch": 6.288826815642458, "grad_norm": 0.36620283126831055, "learning_rate": 0.0006877030812324931, "loss": 0.3804, "step": 11257 }, { "epoch": 6.289385474860335, "grad_norm": 0.5044558048248291, "learning_rate": 0.0006876750700280112, "loss": 0.4475, "step": 11258 }, { "epoch": 6.289944134078212, "grad_norm": 1.3240736722946167, "learning_rate": 0.0006876470588235294, "loss": 0.3913, "step": 11259 }, { "epoch": 6.290502793296089, "grad_norm": 0.4556600749492645, "learning_rate": 0.0006876190476190476, "loss": 0.3909, "step": 11260 }, { "epoch": 6.291061452513967, "grad_norm": 0.35912489891052246, "learning_rate": 0.0006875910364145659, "loss": 0.374, "step": 11261 }, { "epoch": 6.291620111731843, "grad_norm": 0.8574111461639404, "learning_rate": 0.0006875630252100841, "loss": 0.6357, "step": 11262 }, { "epoch": 6.292178770949721, "grad_norm": 0.448162317276001, "learning_rate": 0.0006875350140056022, "loss": 0.3778, "step": 11263 }, { "epoch": 6.292737430167598, "grad_norm": 0.5595035552978516, "learning_rate": 0.0006875070028011204, "loss": 0.416, "step": 11264 }, { "epoch": 6.293296089385475, "grad_norm": 0.5972183346748352, "learning_rate": 0.0006874789915966386, "loss": 0.5932, "step": 11265 }, { "epoch": 6.293854748603352, "grad_norm": 0.6435426473617554, "learning_rate": 0.0006874509803921569, "loss": 0.695, "step": 11266 }, { "epoch": 6.294413407821229, "grad_norm": 0.5722102522850037, "learning_rate": 0.0006874229691876751, "loss": 0.483, "step": 11267 }, { "epoch": 6.294972067039106, "grad_norm": 0.4654307961463928, "learning_rate": 0.0006873949579831933, "loss": 0.3998, "step": 11268 }, { "epoch": 6.295530726256983, "grad_norm": 0.5992904901504517, "learning_rate": 0.0006873669467787114, "loss": 0.51, "step": 11269 }, { "epoch": 6.29608938547486, "grad_norm": 0.458285927772522, "learning_rate": 0.0006873389355742296, "loss": 0.4778, "step": 11270 }, { "epoch": 6.296648044692738, "grad_norm": 0.8597704768180847, "learning_rate": 0.000687310924369748, "loss": 0.4958, "step": 11271 }, { "epoch": 6.297206703910614, "grad_norm": 0.6881229877471924, "learning_rate": 0.0006872829131652662, "loss": 0.5557, "step": 11272 }, { "epoch": 6.297765363128492, "grad_norm": 0.5812937021255493, "learning_rate": 0.0006872549019607844, "loss": 0.4134, "step": 11273 }, { "epoch": 6.298324022346368, "grad_norm": 0.5038365125656128, "learning_rate": 0.0006872268907563025, "loss": 0.5143, "step": 11274 }, { "epoch": 6.298882681564246, "grad_norm": 0.441057026386261, "learning_rate": 0.0006871988795518207, "loss": 0.4384, "step": 11275 }, { "epoch": 6.299441340782123, "grad_norm": 0.6233197450637817, "learning_rate": 0.000687170868347339, "loss": 0.4051, "step": 11276 }, { "epoch": 6.3, "grad_norm": 9.194594383239746, "learning_rate": 0.0006871428571428572, "loss": 0.5031, "step": 11277 }, { "epoch": 6.300558659217877, "grad_norm": 0.588850200176239, "learning_rate": 0.0006871148459383754, "loss": 0.4499, "step": 11278 }, { "epoch": 6.301117318435754, "grad_norm": 1.5040974617004395, "learning_rate": 0.0006870868347338935, "loss": 0.5344, "step": 11279 }, { "epoch": 6.301675977653631, "grad_norm": 0.6354177594184875, "learning_rate": 0.0006870588235294117, "loss": 0.3549, "step": 11280 }, { "epoch": 6.302234636871509, "grad_norm": 2.3752048015594482, "learning_rate": 0.00068703081232493, "loss": 0.5296, "step": 11281 }, { "epoch": 6.302793296089385, "grad_norm": 0.7926993370056152, "learning_rate": 0.0006870028011204482, "loss": 0.4856, "step": 11282 }, { "epoch": 6.303351955307263, "grad_norm": 0.9727863669395447, "learning_rate": 0.0006869747899159664, "loss": 0.4839, "step": 11283 }, { "epoch": 6.303910614525139, "grad_norm": 0.8497305512428284, "learning_rate": 0.0006869467787114846, "loss": 0.5174, "step": 11284 }, { "epoch": 6.304469273743017, "grad_norm": 0.39355963468551636, "learning_rate": 0.0006869187675070027, "loss": 0.3971, "step": 11285 }, { "epoch": 6.305027932960894, "grad_norm": 0.48200666904449463, "learning_rate": 0.000686890756302521, "loss": 0.4982, "step": 11286 }, { "epoch": 6.305586592178771, "grad_norm": 4.587625980377197, "learning_rate": 0.0006868627450980393, "loss": 0.5231, "step": 11287 }, { "epoch": 6.306145251396648, "grad_norm": 1.2941746711730957, "learning_rate": 0.0006868347338935575, "loss": 0.3786, "step": 11288 }, { "epoch": 6.306703910614525, "grad_norm": 0.41783273220062256, "learning_rate": 0.0006868067226890757, "loss": 0.36, "step": 11289 }, { "epoch": 6.307262569832402, "grad_norm": 0.46241331100463867, "learning_rate": 0.0006867787114845938, "loss": 0.4543, "step": 11290 }, { "epoch": 6.30782122905028, "grad_norm": 0.5026640892028809, "learning_rate": 0.0006867507002801121, "loss": 0.4374, "step": 11291 }, { "epoch": 6.308379888268156, "grad_norm": 0.7011479139328003, "learning_rate": 0.0006867226890756303, "loss": 0.4345, "step": 11292 }, { "epoch": 6.308938547486034, "grad_norm": 0.5282343029975891, "learning_rate": 0.0006866946778711485, "loss": 0.4435, "step": 11293 }, { "epoch": 6.30949720670391, "grad_norm": 0.6447503566741943, "learning_rate": 0.0006866666666666667, "loss": 0.4792, "step": 11294 }, { "epoch": 6.310055865921788, "grad_norm": 0.5949207544326782, "learning_rate": 0.0006866386554621848, "loss": 0.4869, "step": 11295 }, { "epoch": 6.310614525139664, "grad_norm": 0.5433316826820374, "learning_rate": 0.0006866106442577031, "loss": 0.4122, "step": 11296 }, { "epoch": 6.311173184357542, "grad_norm": 0.5279645919799805, "learning_rate": 0.0006865826330532213, "loss": 0.4121, "step": 11297 }, { "epoch": 6.311731843575419, "grad_norm": 0.4653691053390503, "learning_rate": 0.0006865546218487395, "loss": 0.3951, "step": 11298 }, { "epoch": 6.312290502793296, "grad_norm": 0.6034505367279053, "learning_rate": 0.0006865266106442577, "loss": 0.6397, "step": 11299 }, { "epoch": 6.312849162011173, "grad_norm": 0.547141969203949, "learning_rate": 0.0006864985994397759, "loss": 0.423, "step": 11300 }, { "epoch": 6.31340782122905, "grad_norm": 1.405989646911621, "learning_rate": 0.0006864705882352941, "loss": 0.3807, "step": 11301 }, { "epoch": 6.313966480446927, "grad_norm": 0.5511883497238159, "learning_rate": 0.0006864425770308123, "loss": 0.4357, "step": 11302 }, { "epoch": 6.314525139664805, "grad_norm": 0.48211267590522766, "learning_rate": 0.0006864145658263306, "loss": 0.4102, "step": 11303 }, { "epoch": 6.315083798882681, "grad_norm": 1.332704782485962, "learning_rate": 0.0006863865546218488, "loss": 0.3709, "step": 11304 }, { "epoch": 6.315642458100559, "grad_norm": 0.42972102761268616, "learning_rate": 0.000686358543417367, "loss": 0.374, "step": 11305 }, { "epoch": 6.316201117318435, "grad_norm": 0.5429015159606934, "learning_rate": 0.0006863305322128852, "loss": 0.4569, "step": 11306 }, { "epoch": 6.316759776536313, "grad_norm": 0.40285253524780273, "learning_rate": 0.0006863025210084034, "loss": 0.4019, "step": 11307 }, { "epoch": 6.31731843575419, "grad_norm": 0.6154240965843201, "learning_rate": 0.0006862745098039216, "loss": 0.4195, "step": 11308 }, { "epoch": 6.317877094972067, "grad_norm": 0.36808153986930847, "learning_rate": 0.0006862464985994398, "loss": 0.3715, "step": 11309 }, { "epoch": 6.318435754189944, "grad_norm": 1.278550148010254, "learning_rate": 0.000686218487394958, "loss": 0.4924, "step": 11310 }, { "epoch": 6.318994413407821, "grad_norm": 0.7281808853149414, "learning_rate": 0.0006861904761904763, "loss": 0.652, "step": 11311 }, { "epoch": 6.319553072625698, "grad_norm": 0.8655731678009033, "learning_rate": 0.0006861624649859944, "loss": 0.6149, "step": 11312 }, { "epoch": 6.320111731843576, "grad_norm": 0.6195973753929138, "learning_rate": 0.0006861344537815126, "loss": 0.4744, "step": 11313 }, { "epoch": 6.320670391061452, "grad_norm": 0.7145146727561951, "learning_rate": 0.0006861064425770308, "loss": 0.4884, "step": 11314 }, { "epoch": 6.32122905027933, "grad_norm": 0.8602355718612671, "learning_rate": 0.000686078431372549, "loss": 0.508, "step": 11315 }, { "epoch": 6.321787709497206, "grad_norm": 1.4732013940811157, "learning_rate": 0.0006860504201680673, "loss": 0.5067, "step": 11316 }, { "epoch": 6.322346368715084, "grad_norm": 0.8393772840499878, "learning_rate": 0.0006860224089635854, "loss": 0.562, "step": 11317 }, { "epoch": 6.322905027932961, "grad_norm": 0.5369130373001099, "learning_rate": 0.0006859943977591036, "loss": 0.491, "step": 11318 }, { "epoch": 6.323463687150838, "grad_norm": 0.45826050639152527, "learning_rate": 0.0006859663865546218, "loss": 0.412, "step": 11319 }, { "epoch": 6.324022346368715, "grad_norm": 0.4535401463508606, "learning_rate": 0.00068593837535014, "loss": 0.3771, "step": 11320 }, { "epoch": 6.324581005586592, "grad_norm": 0.6311883926391602, "learning_rate": 0.0006859103641456584, "loss": 0.4375, "step": 11321 }, { "epoch": 6.325139664804469, "grad_norm": 0.6865175366401672, "learning_rate": 0.0006858823529411765, "loss": 0.3538, "step": 11322 }, { "epoch": 6.325698324022347, "grad_norm": 0.6206411123275757, "learning_rate": 0.0006858543417366947, "loss": 0.4271, "step": 11323 }, { "epoch": 6.326256983240223, "grad_norm": 0.5162012577056885, "learning_rate": 0.0006858263305322129, "loss": 0.3732, "step": 11324 }, { "epoch": 6.326815642458101, "grad_norm": 0.633078932762146, "learning_rate": 0.0006857983193277311, "loss": 0.4613, "step": 11325 }, { "epoch": 6.327374301675977, "grad_norm": 0.4444442093372345, "learning_rate": 0.0006857703081232494, "loss": 0.3598, "step": 11326 }, { "epoch": 6.327932960893855, "grad_norm": 0.4896429777145386, "learning_rate": 0.0006857422969187676, "loss": 0.414, "step": 11327 }, { "epoch": 6.328491620111732, "grad_norm": 0.4538799524307251, "learning_rate": 0.0006857142857142857, "loss": 0.3602, "step": 11328 }, { "epoch": 6.329050279329609, "grad_norm": 0.6309373378753662, "learning_rate": 0.0006856862745098039, "loss": 0.4364, "step": 11329 }, { "epoch": 6.329608938547486, "grad_norm": 0.5095107555389404, "learning_rate": 0.0006856582633053221, "loss": 0.4064, "step": 11330 }, { "epoch": 6.330167597765363, "grad_norm": 0.7527093887329102, "learning_rate": 0.0006856302521008404, "loss": 0.482, "step": 11331 }, { "epoch": 6.33072625698324, "grad_norm": 0.5486929416656494, "learning_rate": 0.0006856022408963586, "loss": 0.5146, "step": 11332 }, { "epoch": 6.331284916201117, "grad_norm": 0.8615903854370117, "learning_rate": 0.0006855742296918767, "loss": 0.5306, "step": 11333 }, { "epoch": 6.331843575418994, "grad_norm": 0.5210286974906921, "learning_rate": 0.0006855462184873949, "loss": 0.5113, "step": 11334 }, { "epoch": 6.332402234636872, "grad_norm": 0.542382001876831, "learning_rate": 0.0006855182072829131, "loss": 0.4101, "step": 11335 }, { "epoch": 6.332960893854748, "grad_norm": 3.9956793785095215, "learning_rate": 0.0006854901960784315, "loss": 0.4807, "step": 11336 }, { "epoch": 6.333519553072626, "grad_norm": 0.5636622309684753, "learning_rate": 0.0006854621848739497, "loss": 0.607, "step": 11337 }, { "epoch": 6.334078212290502, "grad_norm": 0.5061502456665039, "learning_rate": 0.0006854341736694678, "loss": 0.4997, "step": 11338 }, { "epoch": 6.33463687150838, "grad_norm": 0.5561506748199463, "learning_rate": 0.000685406162464986, "loss": 0.4133, "step": 11339 }, { "epoch": 6.335195530726257, "grad_norm": 0.5363943576812744, "learning_rate": 0.0006853781512605042, "loss": 0.4951, "step": 11340 }, { "epoch": 6.335754189944134, "grad_norm": 0.5179654359817505, "learning_rate": 0.0006853501400560225, "loss": 0.51, "step": 11341 }, { "epoch": 6.336312849162011, "grad_norm": 0.5023557543754578, "learning_rate": 0.0006853221288515407, "loss": 0.3788, "step": 11342 }, { "epoch": 6.336871508379888, "grad_norm": 0.8036561012268066, "learning_rate": 0.0006852941176470589, "loss": 0.5158, "step": 11343 }, { "epoch": 6.337430167597765, "grad_norm": 0.4334007799625397, "learning_rate": 0.000685266106442577, "loss": 0.446, "step": 11344 }, { "epoch": 6.337988826815643, "grad_norm": 0.7328843474388123, "learning_rate": 0.0006852380952380952, "loss": 0.4617, "step": 11345 }, { "epoch": 6.338547486033519, "grad_norm": 0.4876031279563904, "learning_rate": 0.0006852100840336135, "loss": 0.3867, "step": 11346 }, { "epoch": 6.339106145251397, "grad_norm": 0.5504794120788574, "learning_rate": 0.0006851820728291317, "loss": 0.5307, "step": 11347 }, { "epoch": 6.339664804469273, "grad_norm": 0.7562088966369629, "learning_rate": 0.0006851540616246499, "loss": 0.455, "step": 11348 }, { "epoch": 6.340223463687151, "grad_norm": 0.5796620845794678, "learning_rate": 0.000685126050420168, "loss": 0.4663, "step": 11349 }, { "epoch": 6.340782122905028, "grad_norm": 0.7910043001174927, "learning_rate": 0.0006850980392156862, "loss": 0.434, "step": 11350 }, { "epoch": 6.341340782122905, "grad_norm": 1.6399033069610596, "learning_rate": 0.0006850700280112045, "loss": 0.4973, "step": 11351 }, { "epoch": 6.341899441340782, "grad_norm": 0.580203115940094, "learning_rate": 0.0006850420168067228, "loss": 0.4167, "step": 11352 }, { "epoch": 6.342458100558659, "grad_norm": 0.7104974389076233, "learning_rate": 0.000685014005602241, "loss": 0.3859, "step": 11353 }, { "epoch": 6.343016759776536, "grad_norm": 0.5037657618522644, "learning_rate": 0.0006849859943977591, "loss": 0.4201, "step": 11354 }, { "epoch": 6.343575418994414, "grad_norm": 1.741327166557312, "learning_rate": 0.0006849579831932773, "loss": 0.4617, "step": 11355 }, { "epoch": 6.34413407821229, "grad_norm": 0.6208281517028809, "learning_rate": 0.0006849299719887956, "loss": 0.5056, "step": 11356 }, { "epoch": 6.344692737430168, "grad_norm": 0.8182736039161682, "learning_rate": 0.0006849019607843138, "loss": 0.5144, "step": 11357 }, { "epoch": 6.345251396648044, "grad_norm": 1.3300970792770386, "learning_rate": 0.000684873949579832, "loss": 0.4582, "step": 11358 }, { "epoch": 6.345810055865922, "grad_norm": 0.5753659605979919, "learning_rate": 0.0006848459383753502, "loss": 0.3599, "step": 11359 }, { "epoch": 6.346368715083799, "grad_norm": 0.6026843786239624, "learning_rate": 0.0006848179271708683, "loss": 0.3852, "step": 11360 }, { "epoch": 6.346927374301676, "grad_norm": 0.4887150824069977, "learning_rate": 0.0006847899159663866, "loss": 0.4188, "step": 11361 }, { "epoch": 6.347486033519553, "grad_norm": 0.6755757927894592, "learning_rate": 0.0006847619047619048, "loss": 0.4709, "step": 11362 }, { "epoch": 6.34804469273743, "grad_norm": 1.9353394508361816, "learning_rate": 0.000684733893557423, "loss": 0.5553, "step": 11363 }, { "epoch": 6.348603351955307, "grad_norm": 0.527420699596405, "learning_rate": 0.0006847058823529412, "loss": 0.4796, "step": 11364 }, { "epoch": 6.349162011173185, "grad_norm": 0.646976113319397, "learning_rate": 0.0006846778711484593, "loss": 0.6217, "step": 11365 }, { "epoch": 6.349720670391061, "grad_norm": 0.5248512625694275, "learning_rate": 0.0006846498599439776, "loss": 0.437, "step": 11366 }, { "epoch": 6.350279329608939, "grad_norm": 0.5128051042556763, "learning_rate": 0.0006846218487394958, "loss": 0.5445, "step": 11367 }, { "epoch": 6.350837988826815, "grad_norm": 0.4348798394203186, "learning_rate": 0.000684593837535014, "loss": 0.4063, "step": 11368 }, { "epoch": 6.351396648044693, "grad_norm": 0.9783489108085632, "learning_rate": 0.0006845658263305323, "loss": 0.5715, "step": 11369 }, { "epoch": 6.351955307262569, "grad_norm": 0.6436389684677124, "learning_rate": 0.0006845378151260504, "loss": 0.4803, "step": 11370 }, { "epoch": 6.352513966480447, "grad_norm": 0.5941984057426453, "learning_rate": 0.0006845098039215687, "loss": 0.485, "step": 11371 }, { "epoch": 6.353072625698324, "grad_norm": 0.6140072345733643, "learning_rate": 0.0006844817927170869, "loss": 0.3766, "step": 11372 }, { "epoch": 6.353631284916201, "grad_norm": 0.955089807510376, "learning_rate": 0.0006844537815126051, "loss": 0.5038, "step": 11373 }, { "epoch": 6.354189944134078, "grad_norm": 0.5532419085502625, "learning_rate": 0.0006844257703081233, "loss": 0.4049, "step": 11374 }, { "epoch": 6.354748603351955, "grad_norm": 0.5676504969596863, "learning_rate": 0.0006843977591036415, "loss": 0.491, "step": 11375 }, { "epoch": 6.355307262569832, "grad_norm": 0.5290305614471436, "learning_rate": 0.0006843697478991596, "loss": 0.4818, "step": 11376 }, { "epoch": 6.35586592178771, "grad_norm": 0.4742472171783447, "learning_rate": 0.0006843417366946779, "loss": 0.4658, "step": 11377 }, { "epoch": 6.356424581005586, "grad_norm": 0.508601188659668, "learning_rate": 0.0006843137254901961, "loss": 0.4436, "step": 11378 }, { "epoch": 6.356983240223464, "grad_norm": 0.5605050325393677, "learning_rate": 0.0006842857142857143, "loss": 0.4842, "step": 11379 }, { "epoch": 6.35754189944134, "grad_norm": 1.2105941772460938, "learning_rate": 0.0006842577030812325, "loss": 0.5238, "step": 11380 }, { "epoch": 6.358100558659218, "grad_norm": 1.2805817127227783, "learning_rate": 0.0006842296918767506, "loss": 0.5011, "step": 11381 }, { "epoch": 6.358659217877095, "grad_norm": 0.5717642903327942, "learning_rate": 0.0006842016806722689, "loss": 0.4405, "step": 11382 }, { "epoch": 6.359217877094972, "grad_norm": 0.5865155458450317, "learning_rate": 0.0006841736694677871, "loss": 0.4009, "step": 11383 }, { "epoch": 6.359776536312849, "grad_norm": 0.5778446793556213, "learning_rate": 0.0006841456582633053, "loss": 0.4466, "step": 11384 }, { "epoch": 6.360335195530726, "grad_norm": 0.9259564280509949, "learning_rate": 0.0006841176470588236, "loss": 0.4787, "step": 11385 }, { "epoch": 6.360893854748603, "grad_norm": 3.199401378631592, "learning_rate": 0.0006840896358543416, "loss": 0.362, "step": 11386 }, { "epoch": 6.361452513966481, "grad_norm": 0.5453430414199829, "learning_rate": 0.00068406162464986, "loss": 0.4299, "step": 11387 }, { "epoch": 6.362011173184357, "grad_norm": 0.4666514992713928, "learning_rate": 0.0006840336134453782, "loss": 0.4708, "step": 11388 }, { "epoch": 6.362569832402235, "grad_norm": 0.5683820247650146, "learning_rate": 0.0006840056022408964, "loss": 0.3801, "step": 11389 }, { "epoch": 6.363128491620111, "grad_norm": 0.5500078797340393, "learning_rate": 0.0006839775910364146, "loss": 0.4868, "step": 11390 }, { "epoch": 6.363687150837989, "grad_norm": 2.013174295425415, "learning_rate": 0.0006839495798319328, "loss": 0.5194, "step": 11391 }, { "epoch": 6.364245810055866, "grad_norm": 0.9869382977485657, "learning_rate": 0.000683921568627451, "loss": 0.4943, "step": 11392 }, { "epoch": 6.364804469273743, "grad_norm": 0.5604583024978638, "learning_rate": 0.0006838935574229692, "loss": 0.3823, "step": 11393 }, { "epoch": 6.36536312849162, "grad_norm": 0.8456790447235107, "learning_rate": 0.0006838655462184874, "loss": 0.5184, "step": 11394 }, { "epoch": 6.365921787709497, "grad_norm": 0.3989706337451935, "learning_rate": 0.0006838375350140056, "loss": 0.3675, "step": 11395 }, { "epoch": 6.366480446927374, "grad_norm": 0.6773026585578918, "learning_rate": 0.0006838095238095238, "loss": 0.4875, "step": 11396 }, { "epoch": 6.367039106145251, "grad_norm": 1.5115110874176025, "learning_rate": 0.000683781512605042, "loss": 0.4182, "step": 11397 }, { "epoch": 6.367597765363128, "grad_norm": 0.6385343074798584, "learning_rate": 0.0006837535014005602, "loss": 0.4116, "step": 11398 }, { "epoch": 6.368156424581006, "grad_norm": 1.1313433647155762, "learning_rate": 0.0006837254901960784, "loss": 0.4713, "step": 11399 }, { "epoch": 6.368715083798882, "grad_norm": 0.9814034104347229, "learning_rate": 0.0006836974789915966, "loss": 0.384, "step": 11400 }, { "epoch": 6.36927374301676, "grad_norm": 0.5504094958305359, "learning_rate": 0.0006836694677871148, "loss": 0.4777, "step": 11401 }, { "epoch": 6.369832402234637, "grad_norm": 0.6145797371864319, "learning_rate": 0.000683641456582633, "loss": 0.4953, "step": 11402 }, { "epoch": 6.370391061452514, "grad_norm": 1.0758200883865356, "learning_rate": 0.0006836134453781513, "loss": 0.6673, "step": 11403 }, { "epoch": 6.370949720670391, "grad_norm": 0.5151605010032654, "learning_rate": 0.0006835854341736695, "loss": 0.5061, "step": 11404 }, { "epoch": 6.371508379888268, "grad_norm": 0.9018517732620239, "learning_rate": 0.0006835574229691877, "loss": 0.4564, "step": 11405 }, { "epoch": 6.372067039106145, "grad_norm": 0.47681277990341187, "learning_rate": 0.0006835294117647059, "loss": 0.4757, "step": 11406 }, { "epoch": 6.372625698324022, "grad_norm": 0.4705057740211487, "learning_rate": 0.0006835014005602242, "loss": 0.4192, "step": 11407 }, { "epoch": 6.373184357541899, "grad_norm": 0.5153605341911316, "learning_rate": 0.0006834733893557423, "loss": 0.4288, "step": 11408 }, { "epoch": 6.373743016759777, "grad_norm": 0.955181896686554, "learning_rate": 0.0006834453781512605, "loss": 0.5228, "step": 11409 }, { "epoch": 6.374301675977653, "grad_norm": 0.491047203540802, "learning_rate": 0.0006834173669467787, "loss": 0.4676, "step": 11410 }, { "epoch": 6.374860335195531, "grad_norm": 0.5429548621177673, "learning_rate": 0.0006833893557422969, "loss": 0.4835, "step": 11411 }, { "epoch": 6.375418994413407, "grad_norm": 0.6758902072906494, "learning_rate": 0.0006833613445378152, "loss": 0.5477, "step": 11412 }, { "epoch": 6.375977653631285, "grad_norm": 0.5432047247886658, "learning_rate": 0.0006833333333333333, "loss": 0.4523, "step": 11413 }, { "epoch": 6.376536312849162, "grad_norm": 0.5245417356491089, "learning_rate": 0.0006833053221288515, "loss": 0.4615, "step": 11414 }, { "epoch": 6.377094972067039, "grad_norm": 0.5416039228439331, "learning_rate": 0.0006832773109243697, "loss": 0.3934, "step": 11415 }, { "epoch": 6.377653631284916, "grad_norm": 0.7096738219261169, "learning_rate": 0.0006832492997198879, "loss": 0.4815, "step": 11416 }, { "epoch": 6.378212290502793, "grad_norm": 0.481178879737854, "learning_rate": 0.0006832212885154063, "loss": 0.4362, "step": 11417 }, { "epoch": 6.37877094972067, "grad_norm": 0.46963879466056824, "learning_rate": 0.0006831932773109243, "loss": 0.4127, "step": 11418 }, { "epoch": 6.379329608938548, "grad_norm": 0.6784202456474304, "learning_rate": 0.0006831652661064426, "loss": 0.3865, "step": 11419 }, { "epoch": 6.379888268156424, "grad_norm": 0.42951303720474243, "learning_rate": 0.0006831372549019608, "loss": 0.4301, "step": 11420 }, { "epoch": 6.380446927374302, "grad_norm": 0.6414159536361694, "learning_rate": 0.000683109243697479, "loss": 0.5283, "step": 11421 }, { "epoch": 6.381005586592178, "grad_norm": 0.674823522567749, "learning_rate": 0.0006830812324929973, "loss": 0.4984, "step": 11422 }, { "epoch": 6.381564245810056, "grad_norm": 0.44607362151145935, "learning_rate": 0.0006830532212885155, "loss": 0.4302, "step": 11423 }, { "epoch": 6.382122905027933, "grad_norm": 0.5833104848861694, "learning_rate": 0.0006830252100840336, "loss": 0.4465, "step": 11424 }, { "epoch": 6.38268156424581, "grad_norm": 0.7467641830444336, "learning_rate": 0.0006829971988795518, "loss": 0.4321, "step": 11425 }, { "epoch": 6.383240223463687, "grad_norm": 0.5185431838035583, "learning_rate": 0.00068296918767507, "loss": 0.3528, "step": 11426 }, { "epoch": 6.383798882681564, "grad_norm": 0.8562811613082886, "learning_rate": 0.0006829411764705883, "loss": 0.4064, "step": 11427 }, { "epoch": 6.384357541899441, "grad_norm": 0.44138962030410767, "learning_rate": 0.0006829131652661065, "loss": 0.4917, "step": 11428 }, { "epoch": 6.384916201117319, "grad_norm": 0.4690287411212921, "learning_rate": 0.0006828851540616246, "loss": 0.4146, "step": 11429 }, { "epoch": 6.385474860335195, "grad_norm": 0.5973803400993347, "learning_rate": 0.0006828571428571428, "loss": 0.3892, "step": 11430 }, { "epoch": 6.386033519553073, "grad_norm": 1.1588422060012817, "learning_rate": 0.000682829131652661, "loss": 0.5026, "step": 11431 }, { "epoch": 6.386592178770949, "grad_norm": 0.7707207798957825, "learning_rate": 0.0006828011204481793, "loss": 0.644, "step": 11432 }, { "epoch": 6.387150837988827, "grad_norm": 0.5774877667427063, "learning_rate": 0.0006827731092436975, "loss": 0.4519, "step": 11433 }, { "epoch": 6.3877094972067034, "grad_norm": 0.4946560561656952, "learning_rate": 0.0006827450980392156, "loss": 0.3831, "step": 11434 }, { "epoch": 6.388268156424581, "grad_norm": 0.4862198829650879, "learning_rate": 0.0006827170868347339, "loss": 0.4656, "step": 11435 }, { "epoch": 6.388826815642458, "grad_norm": 0.4830875098705292, "learning_rate": 0.0006826890756302521, "loss": 0.4426, "step": 11436 }, { "epoch": 6.389385474860335, "grad_norm": 0.7705630660057068, "learning_rate": 0.0006826610644257704, "loss": 0.5365, "step": 11437 }, { "epoch": 6.389944134078212, "grad_norm": 3.553464651107788, "learning_rate": 0.0006826330532212886, "loss": 0.4289, "step": 11438 }, { "epoch": 6.39050279329609, "grad_norm": 5.979016304016113, "learning_rate": 0.0006826050420168068, "loss": 0.4035, "step": 11439 }, { "epoch": 6.391061452513966, "grad_norm": 0.5048288106918335, "learning_rate": 0.0006825770308123249, "loss": 0.4505, "step": 11440 }, { "epoch": 6.391620111731844, "grad_norm": 0.6292498707771301, "learning_rate": 0.0006825490196078431, "loss": 0.3552, "step": 11441 }, { "epoch": 6.39217877094972, "grad_norm": 0.5961422920227051, "learning_rate": 0.0006825210084033614, "loss": 0.4166, "step": 11442 }, { "epoch": 6.392737430167598, "grad_norm": 0.6224048137664795, "learning_rate": 0.0006824929971988796, "loss": 0.4495, "step": 11443 }, { "epoch": 6.3932960893854744, "grad_norm": 0.8895552158355713, "learning_rate": 0.0006824649859943978, "loss": 0.5218, "step": 11444 }, { "epoch": 6.393854748603352, "grad_norm": 0.7797205448150635, "learning_rate": 0.0006824369747899159, "loss": 0.5335, "step": 11445 }, { "epoch": 6.394413407821229, "grad_norm": 1.263240098953247, "learning_rate": 0.0006824089635854341, "loss": 0.3963, "step": 11446 }, { "epoch": 6.394972067039106, "grad_norm": 0.9457670450210571, "learning_rate": 0.0006823809523809524, "loss": 0.5925, "step": 11447 }, { "epoch": 6.395530726256983, "grad_norm": 0.91689532995224, "learning_rate": 0.0006823529411764706, "loss": 0.3814, "step": 11448 }, { "epoch": 6.39608938547486, "grad_norm": 0.5170482993125916, "learning_rate": 0.0006823249299719888, "loss": 0.3701, "step": 11449 }, { "epoch": 6.396648044692737, "grad_norm": 0.8940397500991821, "learning_rate": 0.0006822969187675069, "loss": 0.4448, "step": 11450 }, { "epoch": 6.397206703910615, "grad_norm": 0.6540806293487549, "learning_rate": 0.0006822689075630251, "loss": 0.4433, "step": 11451 }, { "epoch": 6.397765363128491, "grad_norm": 3.5318446159362793, "learning_rate": 0.0006822408963585435, "loss": 0.4573, "step": 11452 }, { "epoch": 6.398324022346369, "grad_norm": 4.933814525604248, "learning_rate": 0.0006822128851540617, "loss": 0.3694, "step": 11453 }, { "epoch": 6.3988826815642454, "grad_norm": 0.7283658981323242, "learning_rate": 0.0006821848739495799, "loss": 0.3455, "step": 11454 }, { "epoch": 6.399441340782123, "grad_norm": 1.1376770734786987, "learning_rate": 0.0006821568627450981, "loss": 0.451, "step": 11455 }, { "epoch": 6.4, "grad_norm": 0.7912377119064331, "learning_rate": 0.0006821288515406162, "loss": 0.481, "step": 11456 }, { "epoch": 6.400558659217877, "grad_norm": 1.2436774969100952, "learning_rate": 0.0006821008403361345, "loss": 0.4199, "step": 11457 }, { "epoch": 6.401117318435754, "grad_norm": 1.6595700979232788, "learning_rate": 0.0006820728291316527, "loss": 0.3624, "step": 11458 }, { "epoch": 6.401675977653631, "grad_norm": 0.8691661953926086, "learning_rate": 0.0006820448179271709, "loss": 0.5335, "step": 11459 }, { "epoch": 6.402234636871508, "grad_norm": 1.5098490715026855, "learning_rate": 0.0006820168067226891, "loss": 0.3824, "step": 11460 }, { "epoch": 6.402793296089386, "grad_norm": 0.693132221698761, "learning_rate": 0.0006819887955182072, "loss": 0.5411, "step": 11461 }, { "epoch": 6.403351955307262, "grad_norm": 0.7161781191825867, "learning_rate": 0.0006819607843137255, "loss": 0.4855, "step": 11462 }, { "epoch": 6.40391061452514, "grad_norm": 0.5640398263931274, "learning_rate": 0.0006819327731092437, "loss": 0.6136, "step": 11463 }, { "epoch": 6.4044692737430164, "grad_norm": 0.5421932935714722, "learning_rate": 0.0006819047619047619, "loss": 0.5153, "step": 11464 }, { "epoch": 6.405027932960894, "grad_norm": 0.4436020255088806, "learning_rate": 0.0006818767507002801, "loss": 0.3355, "step": 11465 }, { "epoch": 6.405586592178771, "grad_norm": 0.6029152870178223, "learning_rate": 0.0006818487394957982, "loss": 0.4013, "step": 11466 }, { "epoch": 6.406145251396648, "grad_norm": 0.5074669122695923, "learning_rate": 0.0006818207282913166, "loss": 0.4693, "step": 11467 }, { "epoch": 6.406703910614525, "grad_norm": 0.7190271615982056, "learning_rate": 0.0006817927170868348, "loss": 0.5282, "step": 11468 }, { "epoch": 6.407262569832402, "grad_norm": 0.6207207441329956, "learning_rate": 0.000681764705882353, "loss": 0.5311, "step": 11469 }, { "epoch": 6.407821229050279, "grad_norm": 0.48141947388648987, "learning_rate": 0.0006817366946778712, "loss": 0.4243, "step": 11470 }, { "epoch": 6.408379888268156, "grad_norm": 1.4258897304534912, "learning_rate": 0.0006817086834733894, "loss": 0.5017, "step": 11471 }, { "epoch": 6.408938547486033, "grad_norm": 0.542827844619751, "learning_rate": 0.0006816806722689076, "loss": 0.5222, "step": 11472 }, { "epoch": 6.409497206703911, "grad_norm": 3.085486650466919, "learning_rate": 0.0006816526610644258, "loss": 0.4184, "step": 11473 }, { "epoch": 6.410055865921787, "grad_norm": 0.9297780394554138, "learning_rate": 0.000681624649859944, "loss": 0.4804, "step": 11474 }, { "epoch": 6.410614525139665, "grad_norm": 0.8626901507377625, "learning_rate": 0.0006815966386554622, "loss": 0.5163, "step": 11475 }, { "epoch": 6.411173184357542, "grad_norm": 0.6746317744255066, "learning_rate": 0.0006815686274509804, "loss": 0.5117, "step": 11476 }, { "epoch": 6.411731843575419, "grad_norm": 0.3959582448005676, "learning_rate": 0.0006815406162464986, "loss": 0.4302, "step": 11477 }, { "epoch": 6.412290502793296, "grad_norm": 0.6799911856651306, "learning_rate": 0.0006815126050420168, "loss": 0.4736, "step": 11478 }, { "epoch": 6.412849162011173, "grad_norm": 0.5117669105529785, "learning_rate": 0.000681484593837535, "loss": 0.4298, "step": 11479 }, { "epoch": 6.41340782122905, "grad_norm": 0.6756731271743774, "learning_rate": 0.0006814565826330532, "loss": 0.3862, "step": 11480 }, { "epoch": 6.413966480446927, "grad_norm": 0.7800499796867371, "learning_rate": 0.0006814285714285714, "loss": 0.4372, "step": 11481 }, { "epoch": 6.414525139664804, "grad_norm": 0.5478665232658386, "learning_rate": 0.0006814005602240896, "loss": 0.388, "step": 11482 }, { "epoch": 6.415083798882682, "grad_norm": 0.7572131156921387, "learning_rate": 0.0006813725490196078, "loss": 0.3239, "step": 11483 }, { "epoch": 6.415642458100558, "grad_norm": 0.4937751591205597, "learning_rate": 0.000681344537815126, "loss": 0.4346, "step": 11484 }, { "epoch": 6.416201117318436, "grad_norm": 0.4888262450695038, "learning_rate": 0.0006813165266106443, "loss": 0.4668, "step": 11485 }, { "epoch": 6.4167597765363125, "grad_norm": 0.6490456461906433, "learning_rate": 0.0006812885154061625, "loss": 0.4866, "step": 11486 }, { "epoch": 6.41731843575419, "grad_norm": 0.9592542052268982, "learning_rate": 0.0006812605042016808, "loss": 0.4989, "step": 11487 }, { "epoch": 6.417877094972067, "grad_norm": 1.562477707862854, "learning_rate": 0.0006812324929971989, "loss": 0.3994, "step": 11488 }, { "epoch": 6.418435754189944, "grad_norm": 0.5914081931114197, "learning_rate": 0.0006812044817927171, "loss": 0.4347, "step": 11489 }, { "epoch": 6.418994413407821, "grad_norm": 0.421370267868042, "learning_rate": 0.0006811764705882353, "loss": 0.4007, "step": 11490 }, { "epoch": 6.419553072625698, "grad_norm": 0.5330716967582703, "learning_rate": 0.0006811484593837535, "loss": 0.4284, "step": 11491 }, { "epoch": 6.420111731843575, "grad_norm": 0.6734991073608398, "learning_rate": 0.0006811204481792718, "loss": 0.469, "step": 11492 }, { "epoch": 6.420670391061453, "grad_norm": 0.5247898697853088, "learning_rate": 0.0006810924369747899, "loss": 0.377, "step": 11493 }, { "epoch": 6.421229050279329, "grad_norm": 0.4436488151550293, "learning_rate": 0.0006810644257703081, "loss": 0.3854, "step": 11494 }, { "epoch": 6.421787709497207, "grad_norm": 0.4950900375843048, "learning_rate": 0.0006810364145658263, "loss": 0.4331, "step": 11495 }, { "epoch": 6.4223463687150835, "grad_norm": 0.605148196220398, "learning_rate": 0.0006810084033613445, "loss": 0.4171, "step": 11496 }, { "epoch": 6.422905027932961, "grad_norm": 0.6597362756729126, "learning_rate": 0.0006809803921568628, "loss": 0.4259, "step": 11497 }, { "epoch": 6.423463687150838, "grad_norm": 1.375606894493103, "learning_rate": 0.0006809523809523809, "loss": 0.4399, "step": 11498 }, { "epoch": 6.424022346368715, "grad_norm": 0.6441894769668579, "learning_rate": 0.0006809243697478991, "loss": 0.4406, "step": 11499 }, { "epoch": 6.424581005586592, "grad_norm": 0.9051856994628906, "learning_rate": 0.0006808963585434173, "loss": 0.5179, "step": 11500 }, { "epoch": 6.424581005586592, "eval_cer": 0.0949450645368946, "eval_loss": 0.35293301939964294, "eval_runtime": 57.2426, "eval_samples_per_second": 79.277, "eval_steps_per_second": 4.961, "eval_wer": 0.380380436300662, "step": 11500 }, { "epoch": 6.425139664804469, "grad_norm": 1.4579472541809082, "learning_rate": 0.0006808683473389356, "loss": 0.4749, "step": 11501 }, { "epoch": 6.425698324022346, "grad_norm": 1.4717273712158203, "learning_rate": 0.0006808403361344539, "loss": 0.6759, "step": 11502 }, { "epoch": 6.426256983240224, "grad_norm": 0.49385038018226624, "learning_rate": 0.0006808123249299721, "loss": 0.4624, "step": 11503 }, { "epoch": 6.4268156424581, "grad_norm": 0.48125413060188293, "learning_rate": 0.0006807843137254902, "loss": 0.3982, "step": 11504 }, { "epoch": 6.427374301675978, "grad_norm": 0.7675278186798096, "learning_rate": 0.0006807563025210084, "loss": 0.3999, "step": 11505 }, { "epoch": 6.4279329608938545, "grad_norm": 0.8417807221412659, "learning_rate": 0.0006807282913165266, "loss": 0.4844, "step": 11506 }, { "epoch": 6.428491620111732, "grad_norm": 0.7927025556564331, "learning_rate": 0.0006807002801120449, "loss": 0.5138, "step": 11507 }, { "epoch": 6.4290502793296085, "grad_norm": 0.42114967107772827, "learning_rate": 0.0006806722689075631, "loss": 0.4367, "step": 11508 }, { "epoch": 6.429608938547486, "grad_norm": 0.32600682973861694, "learning_rate": 0.0006806442577030812, "loss": 0.2956, "step": 11509 }, { "epoch": 6.430167597765363, "grad_norm": 0.7442623972892761, "learning_rate": 0.0006806162464985994, "loss": 0.6598, "step": 11510 }, { "epoch": 6.43072625698324, "grad_norm": 0.6744649410247803, "learning_rate": 0.0006805882352941176, "loss": 0.5523, "step": 11511 }, { "epoch": 6.431284916201117, "grad_norm": 2.1014201641082764, "learning_rate": 0.0006805602240896359, "loss": 0.5378, "step": 11512 }, { "epoch": 6.431843575418995, "grad_norm": 0.4607792794704437, "learning_rate": 0.0006805322128851541, "loss": 0.5108, "step": 11513 }, { "epoch": 6.432402234636871, "grad_norm": 0.4744274616241455, "learning_rate": 0.0006805042016806722, "loss": 0.4303, "step": 11514 }, { "epoch": 6.432960893854749, "grad_norm": 0.6840748190879822, "learning_rate": 0.0006804761904761904, "loss": 0.4678, "step": 11515 }, { "epoch": 6.4335195530726255, "grad_norm": 12.445887565612793, "learning_rate": 0.0006804481792717086, "loss": 0.5343, "step": 11516 }, { "epoch": 6.434078212290503, "grad_norm": 0.5468632578849792, "learning_rate": 0.000680420168067227, "loss": 0.4468, "step": 11517 }, { "epoch": 6.4346368715083795, "grad_norm": 0.6844728589057922, "learning_rate": 0.0006803921568627452, "loss": 0.4387, "step": 11518 }, { "epoch": 6.435195530726257, "grad_norm": 0.4937349259853363, "learning_rate": 0.0006803641456582634, "loss": 0.4165, "step": 11519 }, { "epoch": 6.435754189944134, "grad_norm": 0.5430785417556763, "learning_rate": 0.0006803361344537815, "loss": 0.4366, "step": 11520 }, { "epoch": 6.436312849162011, "grad_norm": 0.7449158430099487, "learning_rate": 0.0006803081232492997, "loss": 0.369, "step": 11521 }, { "epoch": 6.436871508379888, "grad_norm": 0.5434979796409607, "learning_rate": 0.000680280112044818, "loss": 0.4346, "step": 11522 }, { "epoch": 6.437430167597765, "grad_norm": 0.6099750995635986, "learning_rate": 0.0006802521008403362, "loss": 0.4457, "step": 11523 }, { "epoch": 6.437988826815642, "grad_norm": 5.076451778411865, "learning_rate": 0.0006802240896358544, "loss": 0.3957, "step": 11524 }, { "epoch": 6.43854748603352, "grad_norm": 0.7869237065315247, "learning_rate": 0.0006801960784313725, "loss": 0.4016, "step": 11525 }, { "epoch": 6.4391061452513965, "grad_norm": 1.243279218673706, "learning_rate": 0.0006801680672268907, "loss": 0.4972, "step": 11526 }, { "epoch": 6.439664804469274, "grad_norm": 0.7846372127532959, "learning_rate": 0.000680140056022409, "loss": 0.445, "step": 11527 }, { "epoch": 6.4402234636871505, "grad_norm": 0.6177942156791687, "learning_rate": 0.0006801120448179272, "loss": 0.5236, "step": 11528 }, { "epoch": 6.440782122905028, "grad_norm": 0.7969048023223877, "learning_rate": 0.0006800840336134454, "loss": 0.4799, "step": 11529 }, { "epoch": 6.441340782122905, "grad_norm": 0.5414762496948242, "learning_rate": 0.0006800560224089635, "loss": 0.3888, "step": 11530 }, { "epoch": 6.441899441340782, "grad_norm": 1.089887261390686, "learning_rate": 0.0006800280112044817, "loss": 0.3662, "step": 11531 }, { "epoch": 6.442458100558659, "grad_norm": 0.7972704768180847, "learning_rate": 0.00068, "loss": 0.5145, "step": 11532 }, { "epoch": 6.443016759776536, "grad_norm": 0.6176258325576782, "learning_rate": 0.0006799719887955183, "loss": 0.496, "step": 11533 }, { "epoch": 6.443575418994413, "grad_norm": 0.6138110160827637, "learning_rate": 0.0006799439775910365, "loss": 0.5631, "step": 11534 }, { "epoch": 6.444134078212291, "grad_norm": 1.5930955410003662, "learning_rate": 0.0006799159663865547, "loss": 0.4199, "step": 11535 }, { "epoch": 6.4446927374301675, "grad_norm": 0.443279504776001, "learning_rate": 0.0006798879551820728, "loss": 0.524, "step": 11536 }, { "epoch": 6.445251396648045, "grad_norm": 0.8224071860313416, "learning_rate": 0.0006798599439775911, "loss": 0.4894, "step": 11537 }, { "epoch": 6.4458100558659215, "grad_norm": 0.5819796919822693, "learning_rate": 0.0006798319327731093, "loss": 0.4802, "step": 11538 }, { "epoch": 6.446368715083799, "grad_norm": 0.5683058500289917, "learning_rate": 0.0006798039215686275, "loss": 0.4199, "step": 11539 }, { "epoch": 6.446927374301676, "grad_norm": 0.4222470223903656, "learning_rate": 0.0006797759103641457, "loss": 0.3875, "step": 11540 }, { "epoch": 6.447486033519553, "grad_norm": 0.8234904408454895, "learning_rate": 0.0006797478991596638, "loss": 0.5202, "step": 11541 }, { "epoch": 6.44804469273743, "grad_norm": 0.7311606407165527, "learning_rate": 0.0006797198879551821, "loss": 0.5252, "step": 11542 }, { "epoch": 6.448603351955307, "grad_norm": 0.6099271178245544, "learning_rate": 0.0006796918767507003, "loss": 0.4048, "step": 11543 }, { "epoch": 6.449162011173184, "grad_norm": 0.5512201189994812, "learning_rate": 0.0006796638655462185, "loss": 0.4183, "step": 11544 }, { "epoch": 6.449720670391061, "grad_norm": 0.4703015387058258, "learning_rate": 0.0006796358543417367, "loss": 0.432, "step": 11545 }, { "epoch": 6.4502793296089385, "grad_norm": 0.4602089524269104, "learning_rate": 0.0006796078431372548, "loss": 0.4712, "step": 11546 }, { "epoch": 6.450837988826816, "grad_norm": 0.6360549330711365, "learning_rate": 0.0006795798319327731, "loss": 0.539, "step": 11547 }, { "epoch": 6.4513966480446925, "grad_norm": 0.46027907729148865, "learning_rate": 0.0006795518207282913, "loss": 0.3673, "step": 11548 }, { "epoch": 6.45195530726257, "grad_norm": 0.5553647875785828, "learning_rate": 0.0006795238095238096, "loss": 0.4222, "step": 11549 }, { "epoch": 6.452513966480447, "grad_norm": 0.8476529717445374, "learning_rate": 0.0006794957983193278, "loss": 0.5464, "step": 11550 }, { "epoch": 6.453072625698324, "grad_norm": 0.6125985980033875, "learning_rate": 0.000679467787114846, "loss": 0.5105, "step": 11551 }, { "epoch": 6.453631284916201, "grad_norm": 0.45297735929489136, "learning_rate": 0.0006794397759103642, "loss": 0.485, "step": 11552 }, { "epoch": 6.454189944134078, "grad_norm": 0.4742082357406616, "learning_rate": 0.0006794117647058824, "loss": 0.4205, "step": 11553 }, { "epoch": 6.454748603351955, "grad_norm": 1.5596706867218018, "learning_rate": 0.0006793837535014006, "loss": 0.495, "step": 11554 }, { "epoch": 6.455307262569832, "grad_norm": 0.7248988151550293, "learning_rate": 0.0006793557422969188, "loss": 0.4575, "step": 11555 }, { "epoch": 6.4558659217877095, "grad_norm": 0.3988633453845978, "learning_rate": 0.000679327731092437, "loss": 0.4069, "step": 11556 }, { "epoch": 6.456424581005587, "grad_norm": 0.5369634628295898, "learning_rate": 0.0006792997198879552, "loss": 0.6724, "step": 11557 }, { "epoch": 6.4569832402234635, "grad_norm": 0.5384875535964966, "learning_rate": 0.0006792717086834734, "loss": 0.4388, "step": 11558 }, { "epoch": 6.457541899441341, "grad_norm": 1.1868678331375122, "learning_rate": 0.0006792436974789916, "loss": 0.4208, "step": 11559 }, { "epoch": 6.4581005586592175, "grad_norm": 0.4786730408668518, "learning_rate": 0.0006792156862745098, "loss": 0.367, "step": 11560 }, { "epoch": 6.458659217877095, "grad_norm": 0.5368600487709045, "learning_rate": 0.000679187675070028, "loss": 0.5334, "step": 11561 }, { "epoch": 6.459217877094972, "grad_norm": 0.38555872440338135, "learning_rate": 0.0006791596638655463, "loss": 0.5225, "step": 11562 }, { "epoch": 6.459776536312849, "grad_norm": 0.6935184597969055, "learning_rate": 0.0006791316526610644, "loss": 0.6592, "step": 11563 }, { "epoch": 6.460335195530726, "grad_norm": 0.6343544125556946, "learning_rate": 0.0006791036414565826, "loss": 0.5693, "step": 11564 }, { "epoch": 6.460893854748603, "grad_norm": 0.7088921666145325, "learning_rate": 0.0006790756302521008, "loss": 0.6159, "step": 11565 }, { "epoch": 6.4614525139664805, "grad_norm": 0.4299537241458893, "learning_rate": 0.000679047619047619, "loss": 0.4482, "step": 11566 }, { "epoch": 6.462011173184358, "grad_norm": 0.6019023060798645, "learning_rate": 0.0006790196078431374, "loss": 0.5049, "step": 11567 }, { "epoch": 6.4625698324022345, "grad_norm": 0.5985503792762756, "learning_rate": 0.0006789915966386555, "loss": 0.4236, "step": 11568 }, { "epoch": 6.463128491620112, "grad_norm": 0.7729337811470032, "learning_rate": 0.0006789635854341737, "loss": 0.4296, "step": 11569 }, { "epoch": 6.4636871508379885, "grad_norm": 0.4970942735671997, "learning_rate": 0.0006789355742296919, "loss": 0.4497, "step": 11570 }, { "epoch": 6.464245810055866, "grad_norm": 1.7168594598770142, "learning_rate": 0.0006789075630252101, "loss": 0.574, "step": 11571 }, { "epoch": 6.464804469273743, "grad_norm": 0.4154176414012909, "learning_rate": 0.0006788795518207284, "loss": 0.3927, "step": 11572 }, { "epoch": 6.46536312849162, "grad_norm": 0.5008817315101624, "learning_rate": 0.0006788515406162465, "loss": 0.5798, "step": 11573 }, { "epoch": 6.465921787709497, "grad_norm": 0.6479870080947876, "learning_rate": 0.0006788235294117647, "loss": 0.5343, "step": 11574 }, { "epoch": 6.466480446927374, "grad_norm": 0.5261443257331848, "learning_rate": 0.0006787955182072829, "loss": 0.4389, "step": 11575 }, { "epoch": 6.4670391061452515, "grad_norm": Infinity, "learning_rate": 0.0006787955182072829, "loss": 0.4804, "step": 11576 }, { "epoch": 6.467597765363129, "grad_norm": 0.4131874442100525, "learning_rate": 0.0006787675070028011, "loss": 0.4144, "step": 11577 }, { "epoch": 6.4681564245810055, "grad_norm": 0.5997766256332397, "learning_rate": 0.0006787394957983194, "loss": 0.467, "step": 11578 }, { "epoch": 6.468715083798883, "grad_norm": 2.000835418701172, "learning_rate": 0.0006787114845938376, "loss": 0.3998, "step": 11579 }, { "epoch": 6.4692737430167595, "grad_norm": 0.4934137165546417, "learning_rate": 0.0006786834733893557, "loss": 0.4218, "step": 11580 }, { "epoch": 6.469832402234637, "grad_norm": 0.5290830731391907, "learning_rate": 0.0006786554621848739, "loss": 0.3911, "step": 11581 }, { "epoch": 6.4703910614525135, "grad_norm": 0.6661310791969299, "learning_rate": 0.0006786274509803921, "loss": 0.3786, "step": 11582 }, { "epoch": 6.470949720670391, "grad_norm": 0.5292418599128723, "learning_rate": 0.0006785994397759105, "loss": 0.4864, "step": 11583 }, { "epoch": 6.471508379888268, "grad_norm": 1.2342309951782227, "learning_rate": 0.0006785714285714287, "loss": 0.5016, "step": 11584 }, { "epoch": 6.472067039106145, "grad_norm": 0.3915422260761261, "learning_rate": 0.0006785434173669468, "loss": 0.4954, "step": 11585 }, { "epoch": 6.4726256983240225, "grad_norm": 0.48492929339408875, "learning_rate": 0.000678515406162465, "loss": 0.4447, "step": 11586 }, { "epoch": 6.473184357541899, "grad_norm": 0.33723577857017517, "learning_rate": 0.0006784873949579832, "loss": 0.3359, "step": 11587 }, { "epoch": 6.4737430167597765, "grad_norm": 0.4832063615322113, "learning_rate": 0.0006784593837535015, "loss": 0.3196, "step": 11588 }, { "epoch": 6.474301675977654, "grad_norm": 2.238978147506714, "learning_rate": 0.0006784313725490197, "loss": 0.4031, "step": 11589 }, { "epoch": 6.4748603351955305, "grad_norm": 0.7958325147628784, "learning_rate": 0.0006784033613445378, "loss": 0.4567, "step": 11590 }, { "epoch": 6.475418994413408, "grad_norm": 4.274355411529541, "learning_rate": 0.000678375350140056, "loss": 0.3562, "step": 11591 }, { "epoch": 6.4759776536312845, "grad_norm": 0.40913107991218567, "learning_rate": 0.0006783473389355742, "loss": 0.4224, "step": 11592 }, { "epoch": 6.476536312849162, "grad_norm": 0.905695378780365, "learning_rate": 0.0006783193277310925, "loss": 0.5078, "step": 11593 }, { "epoch": 6.477094972067039, "grad_norm": 0.5614436864852905, "learning_rate": 0.0006782913165266107, "loss": 0.4966, "step": 11594 }, { "epoch": 6.477653631284916, "grad_norm": 0.9083330035209656, "learning_rate": 0.0006782633053221289, "loss": 0.4057, "step": 11595 }, { "epoch": 6.4782122905027935, "grad_norm": 0.9854283332824707, "learning_rate": 0.000678235294117647, "loss": 0.4475, "step": 11596 }, { "epoch": 6.47877094972067, "grad_norm": 0.8095300197601318, "learning_rate": 0.0006782072829131652, "loss": 0.4906, "step": 11597 }, { "epoch": 6.4793296089385475, "grad_norm": 0.4725625514984131, "learning_rate": 0.0006781792717086835, "loss": 0.4797, "step": 11598 }, { "epoch": 6.479888268156425, "grad_norm": 1.2080659866333008, "learning_rate": 0.0006781512605042018, "loss": 0.5809, "step": 11599 }, { "epoch": 6.4804469273743015, "grad_norm": 0.49512118101119995, "learning_rate": 0.00067812324929972, "loss": 0.5115, "step": 11600 }, { "epoch": 6.481005586592179, "grad_norm": 0.9185149669647217, "learning_rate": 0.0006780952380952381, "loss": 0.3803, "step": 11601 }, { "epoch": 6.4815642458100555, "grad_norm": 0.536938488483429, "learning_rate": 0.0006780672268907563, "loss": 0.3408, "step": 11602 }, { "epoch": 6.482122905027933, "grad_norm": 0.6550922989845276, "learning_rate": 0.0006780392156862745, "loss": 0.4763, "step": 11603 }, { "epoch": 6.48268156424581, "grad_norm": 0.451121985912323, "learning_rate": 0.0006780112044817928, "loss": 0.2965, "step": 11604 }, { "epoch": 6.483240223463687, "grad_norm": 0.48272010684013367, "learning_rate": 0.000677983193277311, "loss": 0.4601, "step": 11605 }, { "epoch": 6.4837988826815645, "grad_norm": 0.4383988380432129, "learning_rate": 0.0006779551820728291, "loss": 0.4034, "step": 11606 }, { "epoch": 6.484357541899441, "grad_norm": 0.4807858467102051, "learning_rate": 0.0006779271708683473, "loss": 0.4006, "step": 11607 }, { "epoch": 6.4849162011173185, "grad_norm": 0.8803003430366516, "learning_rate": 0.0006778991596638655, "loss": 0.5064, "step": 11608 }, { "epoch": 6.485474860335196, "grad_norm": 0.45994246006011963, "learning_rate": 0.0006778711484593838, "loss": 0.4319, "step": 11609 }, { "epoch": 6.4860335195530725, "grad_norm": 0.8877516388893127, "learning_rate": 0.000677843137254902, "loss": 0.3802, "step": 11610 }, { "epoch": 6.48659217877095, "grad_norm": 0.8846052289009094, "learning_rate": 0.0006778151260504202, "loss": 0.5155, "step": 11611 }, { "epoch": 6.4871508379888265, "grad_norm": 0.5974485874176025, "learning_rate": 0.0006777871148459383, "loss": 0.4132, "step": 11612 }, { "epoch": 6.487709497206704, "grad_norm": 0.5771796703338623, "learning_rate": 0.0006777591036414565, "loss": 0.5279, "step": 11613 }, { "epoch": 6.488268156424581, "grad_norm": 0.42405661940574646, "learning_rate": 0.0006777310924369748, "loss": 0.4475, "step": 11614 }, { "epoch": 6.488826815642458, "grad_norm": 0.7989774942398071, "learning_rate": 0.000677703081232493, "loss": 0.5182, "step": 11615 }, { "epoch": 6.4893854748603355, "grad_norm": 1.0028172731399536, "learning_rate": 0.0006776750700280113, "loss": 0.2954, "step": 11616 }, { "epoch": 6.489944134078212, "grad_norm": 4.393218994140625, "learning_rate": 0.0006776470588235294, "loss": 0.4148, "step": 11617 }, { "epoch": 6.4905027932960895, "grad_norm": 0.7309901118278503, "learning_rate": 0.0006776190476190476, "loss": 0.4974, "step": 11618 }, { "epoch": 6.491061452513966, "grad_norm": 0.426919162273407, "learning_rate": 0.0006775910364145659, "loss": 0.4249, "step": 11619 }, { "epoch": 6.4916201117318435, "grad_norm": 0.574055552482605, "learning_rate": 0.0006775630252100841, "loss": 0.5229, "step": 11620 }, { "epoch": 6.492178770949721, "grad_norm": 0.5560129284858704, "learning_rate": 0.0006775350140056023, "loss": 0.5134, "step": 11621 }, { "epoch": 6.4927374301675975, "grad_norm": 0.6170814037322998, "learning_rate": 0.0006775070028011204, "loss": 0.43, "step": 11622 }, { "epoch": 6.493296089385475, "grad_norm": 0.5968080163002014, "learning_rate": 0.0006774789915966386, "loss": 0.3324, "step": 11623 }, { "epoch": 6.4938547486033515, "grad_norm": 0.5597171783447266, "learning_rate": 0.0006774509803921569, "loss": 0.3745, "step": 11624 }, { "epoch": 6.494413407821229, "grad_norm": 0.4412710666656494, "learning_rate": 0.0006774229691876751, "loss": 0.4077, "step": 11625 }, { "epoch": 6.4949720670391065, "grad_norm": 0.499252587556839, "learning_rate": 0.0006773949579831933, "loss": 0.3957, "step": 11626 }, { "epoch": 6.495530726256983, "grad_norm": 0.4536375403404236, "learning_rate": 0.0006773669467787115, "loss": 0.4084, "step": 11627 }, { "epoch": 6.4960893854748605, "grad_norm": 0.6035411357879639, "learning_rate": 0.0006773389355742296, "loss": 0.3971, "step": 11628 }, { "epoch": 6.496648044692737, "grad_norm": 0.528416097164154, "learning_rate": 0.0006773109243697479, "loss": 0.4148, "step": 11629 }, { "epoch": 6.4972067039106145, "grad_norm": 4.354679107666016, "learning_rate": 0.0006772829131652661, "loss": 0.4047, "step": 11630 }, { "epoch": 6.497765363128492, "grad_norm": 0.732670783996582, "learning_rate": 0.0006772549019607843, "loss": 0.3754, "step": 11631 }, { "epoch": 6.4983240223463685, "grad_norm": 0.5008953809738159, "learning_rate": 0.0006772268907563026, "loss": 0.4057, "step": 11632 }, { "epoch": 6.498882681564246, "grad_norm": 2.3818132877349854, "learning_rate": 0.0006771988795518206, "loss": 0.4109, "step": 11633 }, { "epoch": 6.4994413407821225, "grad_norm": 0.4366646409034729, "learning_rate": 0.000677170868347339, "loss": 0.4189, "step": 11634 }, { "epoch": 6.5, "grad_norm": 3.6260876655578613, "learning_rate": 0.0006771428571428572, "loss": 0.4343, "step": 11635 }, { "epoch": 6.5005586592178775, "grad_norm": 0.4655911922454834, "learning_rate": 0.0006771148459383754, "loss": 0.3994, "step": 11636 }, { "epoch": 6.501117318435754, "grad_norm": 0.44507983326911926, "learning_rate": 0.0006770868347338936, "loss": 0.271, "step": 11637 }, { "epoch": 6.5016759776536315, "grad_norm": 0.6009631156921387, "learning_rate": 0.0006770588235294117, "loss": 0.4648, "step": 11638 }, { "epoch": 6.502234636871508, "grad_norm": 0.5486988425254822, "learning_rate": 0.00067703081232493, "loss": 0.4593, "step": 11639 }, { "epoch": 6.5027932960893855, "grad_norm": 0.46845799684524536, "learning_rate": 0.0006770028011204482, "loss": 0.4711, "step": 11640 }, { "epoch": 6.503351955307263, "grad_norm": 0.4946540594100952, "learning_rate": 0.0006769747899159664, "loss": 0.4122, "step": 11641 }, { "epoch": 6.5039106145251395, "grad_norm": 0.6086100339889526, "learning_rate": 0.0006769467787114846, "loss": 0.4373, "step": 11642 }, { "epoch": 6.504469273743017, "grad_norm": 0.5401648879051208, "learning_rate": 0.0006769187675070028, "loss": 0.4221, "step": 11643 }, { "epoch": 6.5050279329608935, "grad_norm": 0.44656991958618164, "learning_rate": 0.000676890756302521, "loss": 0.426, "step": 11644 }, { "epoch": 6.505586592178771, "grad_norm": 1.386893391609192, "learning_rate": 0.0006768627450980392, "loss": 0.5002, "step": 11645 }, { "epoch": 6.506145251396648, "grad_norm": 0.3629169464111328, "learning_rate": 0.0006768347338935574, "loss": 0.5152, "step": 11646 }, { "epoch": 6.506703910614525, "grad_norm": 0.5242171287536621, "learning_rate": 0.0006768067226890756, "loss": 0.468, "step": 11647 }, { "epoch": 6.5072625698324025, "grad_norm": 0.7179381847381592, "learning_rate": 0.0006767787114845938, "loss": 0.4532, "step": 11648 }, { "epoch": 6.507821229050279, "grad_norm": 0.7212526202201843, "learning_rate": 0.000676750700280112, "loss": 0.4488, "step": 11649 }, { "epoch": 6.5083798882681565, "grad_norm": 1.554672360420227, "learning_rate": 0.0006767226890756303, "loss": 0.4312, "step": 11650 }, { "epoch": 6.508938547486034, "grad_norm": 0.45662686228752136, "learning_rate": 0.0006766946778711485, "loss": 0.5353, "step": 11651 }, { "epoch": 6.5094972067039105, "grad_norm": 0.5225405693054199, "learning_rate": 0.0006766666666666667, "loss": 0.5689, "step": 11652 }, { "epoch": 6.510055865921788, "grad_norm": 0.6175033450126648, "learning_rate": 0.0006766386554621849, "loss": 0.3839, "step": 11653 }, { "epoch": 6.5106145251396645, "grad_norm": 2.2123074531555176, "learning_rate": 0.0006766106442577031, "loss": 0.4167, "step": 11654 }, { "epoch": 6.511173184357542, "grad_norm": 0.5899970531463623, "learning_rate": 0.0006765826330532213, "loss": 0.3908, "step": 11655 }, { "epoch": 6.511731843575419, "grad_norm": 0.5495463013648987, "learning_rate": 0.0006765546218487395, "loss": 0.491, "step": 11656 }, { "epoch": 6.512290502793296, "grad_norm": 0.7404274940490723, "learning_rate": 0.0006765266106442577, "loss": 0.6168, "step": 11657 }, { "epoch": 6.5128491620111735, "grad_norm": 0.5841642618179321, "learning_rate": 0.0006764985994397759, "loss": 0.4724, "step": 11658 }, { "epoch": 6.51340782122905, "grad_norm": 0.47262054681777954, "learning_rate": 0.0006764705882352942, "loss": 0.4655, "step": 11659 }, { "epoch": 6.5139664804469275, "grad_norm": 1.0039445161819458, "learning_rate": 0.0006764425770308123, "loss": 0.5438, "step": 11660 }, { "epoch": 6.514525139664805, "grad_norm": 9.346894264221191, "learning_rate": 0.0006764145658263305, "loss": 0.4436, "step": 11661 }, { "epoch": 6.5150837988826815, "grad_norm": 0.5571748614311218, "learning_rate": 0.0006763865546218487, "loss": 0.4725, "step": 11662 }, { "epoch": 6.515642458100559, "grad_norm": 0.5540754795074463, "learning_rate": 0.0006763585434173669, "loss": 0.5995, "step": 11663 }, { "epoch": 6.5162011173184355, "grad_norm": 0.6166994571685791, "learning_rate": 0.0006763305322128853, "loss": 0.4852, "step": 11664 }, { "epoch": 6.516759776536313, "grad_norm": 0.4597347676753998, "learning_rate": 0.0006763025210084033, "loss": 0.3297, "step": 11665 }, { "epoch": 6.51731843575419, "grad_norm": 0.7996395826339722, "learning_rate": 0.0006762745098039216, "loss": 0.6306, "step": 11666 }, { "epoch": 6.517877094972067, "grad_norm": 1.1374506950378418, "learning_rate": 0.0006762464985994398, "loss": 0.4721, "step": 11667 }, { "epoch": 6.5184357541899445, "grad_norm": 0.6028649210929871, "learning_rate": 0.000676218487394958, "loss": 0.7393, "step": 11668 }, { "epoch": 6.518994413407821, "grad_norm": 0.5631989240646362, "learning_rate": 0.0006761904761904763, "loss": 0.3864, "step": 11669 }, { "epoch": 6.5195530726256985, "grad_norm": 1.0149052143096924, "learning_rate": 0.0006761624649859944, "loss": 0.4789, "step": 11670 }, { "epoch": 6.520111731843575, "grad_norm": 0.5269141793251038, "learning_rate": 0.0006761344537815126, "loss": 0.4225, "step": 11671 }, { "epoch": 6.5206703910614525, "grad_norm": 0.6035739779472351, "learning_rate": 0.0006761064425770308, "loss": 0.4988, "step": 11672 }, { "epoch": 6.52122905027933, "grad_norm": 0.5897813439369202, "learning_rate": 0.000676078431372549, "loss": 0.4789, "step": 11673 }, { "epoch": 6.5217877094972065, "grad_norm": 1.0650050640106201, "learning_rate": 0.0006760504201680673, "loss": 0.4742, "step": 11674 }, { "epoch": 6.522346368715084, "grad_norm": 1.0785117149353027, "learning_rate": 0.0006760224089635855, "loss": 0.4457, "step": 11675 }, { "epoch": 6.522905027932961, "grad_norm": 0.5334888100624084, "learning_rate": 0.0006759943977591036, "loss": 0.3945, "step": 11676 }, { "epoch": 6.523463687150838, "grad_norm": 0.6811595559120178, "learning_rate": 0.0006759663865546218, "loss": 0.4326, "step": 11677 }, { "epoch": 6.5240223463687155, "grad_norm": 0.4486103355884552, "learning_rate": 0.00067593837535014, "loss": 0.4587, "step": 11678 }, { "epoch": 6.524581005586592, "grad_norm": 0.594249963760376, "learning_rate": 0.0006759103641456583, "loss": 0.3933, "step": 11679 }, { "epoch": 6.5251396648044695, "grad_norm": 0.7737732529640198, "learning_rate": 0.0006758823529411765, "loss": 0.3668, "step": 11680 }, { "epoch": 6.525698324022346, "grad_norm": 0.4339161813259125, "learning_rate": 0.0006758543417366946, "loss": 0.3771, "step": 11681 }, { "epoch": 6.5262569832402235, "grad_norm": 0.3916019797325134, "learning_rate": 0.0006758263305322129, "loss": 0.344, "step": 11682 }, { "epoch": 6.5268156424581, "grad_norm": 0.6638060808181763, "learning_rate": 0.0006757983193277311, "loss": 0.6867, "step": 11683 }, { "epoch": 6.5273743016759775, "grad_norm": 0.7278107404708862, "learning_rate": 0.0006757703081232494, "loss": 0.4547, "step": 11684 }, { "epoch": 6.527932960893855, "grad_norm": 0.5387098789215088, "learning_rate": 0.0006757422969187676, "loss": 0.4331, "step": 11685 }, { "epoch": 6.528491620111732, "grad_norm": 0.46186915040016174, "learning_rate": 0.0006757142857142857, "loss": 0.4794, "step": 11686 }, { "epoch": 6.529050279329609, "grad_norm": 3.535621166229248, "learning_rate": 0.0006756862745098039, "loss": 0.4159, "step": 11687 }, { "epoch": 6.5296089385474865, "grad_norm": 1.0551750659942627, "learning_rate": 0.0006756582633053221, "loss": 0.4948, "step": 11688 }, { "epoch": 6.530167597765363, "grad_norm": 10.206016540527344, "learning_rate": 0.0006756302521008404, "loss": 0.3857, "step": 11689 }, { "epoch": 6.5307262569832405, "grad_norm": 5.077488422393799, "learning_rate": 0.0006756022408963586, "loss": 0.455, "step": 11690 }, { "epoch": 6.531284916201117, "grad_norm": 0.6153773665428162, "learning_rate": 0.0006755742296918768, "loss": 0.4023, "step": 11691 }, { "epoch": 6.5318435754189945, "grad_norm": 0.7012595534324646, "learning_rate": 0.0006755462184873949, "loss": 0.6857, "step": 11692 }, { "epoch": 6.532402234636871, "grad_norm": 0.5747553110122681, "learning_rate": 0.0006755182072829131, "loss": 0.4673, "step": 11693 }, { "epoch": 6.5329608938547485, "grad_norm": 0.7796198725700378, "learning_rate": 0.0006754901960784314, "loss": 0.5701, "step": 11694 }, { "epoch": 6.533519553072626, "grad_norm": 0.389577716588974, "learning_rate": 0.0006754621848739496, "loss": 0.3423, "step": 11695 }, { "epoch": 6.534078212290503, "grad_norm": 0.46891096234321594, "learning_rate": 0.0006754341736694678, "loss": 0.3994, "step": 11696 }, { "epoch": 6.53463687150838, "grad_norm": 0.4067317843437195, "learning_rate": 0.0006754061624649859, "loss": 0.4095, "step": 11697 }, { "epoch": 6.5351955307262575, "grad_norm": 0.5995504260063171, "learning_rate": 0.0006753781512605041, "loss": 0.4651, "step": 11698 }, { "epoch": 6.535754189944134, "grad_norm": 0.5230239033699036, "learning_rate": 0.0006753501400560225, "loss": 0.3816, "step": 11699 }, { "epoch": 6.5363128491620115, "grad_norm": 0.8123779296875, "learning_rate": 0.0006753221288515407, "loss": 0.4034, "step": 11700 }, { "epoch": 6.536871508379888, "grad_norm": 0.6425394415855408, "learning_rate": 0.0006752941176470589, "loss": 0.489, "step": 11701 }, { "epoch": 6.5374301675977655, "grad_norm": 0.37751275300979614, "learning_rate": 0.000675266106442577, "loss": 0.4423, "step": 11702 }, { "epoch": 6.537988826815642, "grad_norm": 0.5902776718139648, "learning_rate": 0.0006752380952380952, "loss": 0.4689, "step": 11703 }, { "epoch": 6.5385474860335195, "grad_norm": 0.5996629595756531, "learning_rate": 0.0006752100840336135, "loss": 0.6342, "step": 11704 }, { "epoch": 6.539106145251397, "grad_norm": 0.9844701290130615, "learning_rate": 0.0006751820728291317, "loss": 0.3726, "step": 11705 }, { "epoch": 6.539664804469274, "grad_norm": 0.4572041928768158, "learning_rate": 0.0006751540616246499, "loss": 0.4269, "step": 11706 }, { "epoch": 6.540223463687151, "grad_norm": 0.36848145723342896, "learning_rate": 0.0006751260504201681, "loss": 0.3772, "step": 11707 }, { "epoch": 6.540782122905028, "grad_norm": 3.8569045066833496, "learning_rate": 0.0006750980392156862, "loss": 0.6048, "step": 11708 }, { "epoch": 6.541340782122905, "grad_norm": 0.4871978461742401, "learning_rate": 0.0006750700280112045, "loss": 0.388, "step": 11709 }, { "epoch": 6.5418994413407825, "grad_norm": 0.49558138847351074, "learning_rate": 0.0006750420168067227, "loss": 0.4798, "step": 11710 }, { "epoch": 6.542458100558659, "grad_norm": 1.104504108428955, "learning_rate": 0.0006750140056022409, "loss": 0.3837, "step": 11711 }, { "epoch": 6.5430167597765365, "grad_norm": 0.44812867045402527, "learning_rate": 0.0006749859943977591, "loss": 0.4445, "step": 11712 }, { "epoch": 6.543575418994413, "grad_norm": 1.1642338037490845, "learning_rate": 0.0006749579831932772, "loss": 0.4395, "step": 11713 }, { "epoch": 6.5441340782122905, "grad_norm": 0.5086650252342224, "learning_rate": 0.0006749299719887956, "loss": 0.4913, "step": 11714 }, { "epoch": 6.544692737430168, "grad_norm": 0.5418620705604553, "learning_rate": 0.0006749019607843138, "loss": 0.4173, "step": 11715 }, { "epoch": 6.545251396648045, "grad_norm": 0.4850539267063141, "learning_rate": 0.000674873949579832, "loss": 0.4777, "step": 11716 }, { "epoch": 6.545810055865922, "grad_norm": 0.5534653663635254, "learning_rate": 0.0006748459383753502, "loss": 0.5654, "step": 11717 }, { "epoch": 6.546368715083799, "grad_norm": 1.098381757736206, "learning_rate": 0.0006748179271708683, "loss": 0.3952, "step": 11718 }, { "epoch": 6.546927374301676, "grad_norm": 0.7109861373901367, "learning_rate": 0.0006747899159663866, "loss": 0.467, "step": 11719 }, { "epoch": 6.547486033519553, "grad_norm": 0.4619545042514801, "learning_rate": 0.0006747619047619048, "loss": 0.4914, "step": 11720 }, { "epoch": 6.54804469273743, "grad_norm": 0.6146419644355774, "learning_rate": 0.000674733893557423, "loss": 0.5054, "step": 11721 }, { "epoch": 6.5486033519553075, "grad_norm": 2.3284287452697754, "learning_rate": 0.0006747058823529412, "loss": 0.4157, "step": 11722 }, { "epoch": 6.549162011173184, "grad_norm": 0.6265036463737488, "learning_rate": 0.0006746778711484594, "loss": 0.4746, "step": 11723 }, { "epoch": 6.5497206703910615, "grad_norm": 1.0419626235961914, "learning_rate": 0.0006746498599439776, "loss": 0.3497, "step": 11724 }, { "epoch": 6.550279329608939, "grad_norm": 0.6008273363113403, "learning_rate": 0.0006746218487394958, "loss": 0.4081, "step": 11725 }, { "epoch": 6.550837988826816, "grad_norm": 0.448623925447464, "learning_rate": 0.000674593837535014, "loss": 0.3503, "step": 11726 }, { "epoch": 6.551396648044693, "grad_norm": 0.4313521087169647, "learning_rate": 0.0006745658263305322, "loss": 0.3847, "step": 11727 }, { "epoch": 6.55195530726257, "grad_norm": 0.44949379563331604, "learning_rate": 0.0006745378151260504, "loss": 0.4124, "step": 11728 }, { "epoch": 6.552513966480447, "grad_norm": 0.5030506253242493, "learning_rate": 0.0006745098039215686, "loss": 0.415, "step": 11729 }, { "epoch": 6.553072625698324, "grad_norm": 0.48275724053382874, "learning_rate": 0.0006744817927170868, "loss": 0.5779, "step": 11730 }, { "epoch": 6.553631284916201, "grad_norm": 0.45968884229660034, "learning_rate": 0.000674453781512605, "loss": 0.4339, "step": 11731 }, { "epoch": 6.5541899441340785, "grad_norm": 0.4956890046596527, "learning_rate": 0.0006744257703081233, "loss": 0.4526, "step": 11732 }, { "epoch": 6.554748603351955, "grad_norm": 0.3742595911026001, "learning_rate": 0.0006743977591036415, "loss": 0.4289, "step": 11733 }, { "epoch": 6.5553072625698325, "grad_norm": 0.39774876832962036, "learning_rate": 0.0006743697478991597, "loss": 0.4237, "step": 11734 }, { "epoch": 6.55586592178771, "grad_norm": 0.3887314200401306, "learning_rate": 0.0006743417366946779, "loss": 0.4092, "step": 11735 }, { "epoch": 6.556424581005587, "grad_norm": 0.5866477489471436, "learning_rate": 0.0006743137254901961, "loss": 0.4723, "step": 11736 }, { "epoch": 6.556983240223464, "grad_norm": 0.38468068838119507, "learning_rate": 0.0006742857142857143, "loss": 0.3693, "step": 11737 }, { "epoch": 6.557541899441341, "grad_norm": 0.37022414803504944, "learning_rate": 0.0006742577030812325, "loss": 0.4326, "step": 11738 }, { "epoch": 6.558100558659218, "grad_norm": 1.8264210224151611, "learning_rate": 0.0006742296918767508, "loss": 0.5193, "step": 11739 }, { "epoch": 6.558659217877095, "grad_norm": 0.5144490003585815, "learning_rate": 0.0006742016806722689, "loss": 0.4028, "step": 11740 }, { "epoch": 6.559217877094972, "grad_norm": 0.42541375756263733, "learning_rate": 0.0006741736694677871, "loss": 0.4463, "step": 11741 }, { "epoch": 6.5597765363128495, "grad_norm": 0.5716822147369385, "learning_rate": 0.0006741456582633053, "loss": 0.5107, "step": 11742 }, { "epoch": 6.560335195530726, "grad_norm": 0.5076964497566223, "learning_rate": 0.0006741176470588235, "loss": 0.3544, "step": 11743 }, { "epoch": 6.5608938547486035, "grad_norm": 1.107096791267395, "learning_rate": 0.0006740896358543418, "loss": 0.5096, "step": 11744 }, { "epoch": 6.56145251396648, "grad_norm": 0.4176803231239319, "learning_rate": 0.0006740616246498599, "loss": 0.3936, "step": 11745 }, { "epoch": 6.562011173184358, "grad_norm": 0.44088423252105713, "learning_rate": 0.0006740336134453781, "loss": 0.3798, "step": 11746 }, { "epoch": 6.562569832402235, "grad_norm": 1.4666974544525146, "learning_rate": 0.0006740056022408963, "loss": 0.5046, "step": 11747 }, { "epoch": 6.563128491620112, "grad_norm": 5.996772766113281, "learning_rate": 0.0006739775910364146, "loss": 0.4642, "step": 11748 }, { "epoch": 6.563687150837989, "grad_norm": 0.3904889225959778, "learning_rate": 0.0006739495798319329, "loss": 0.3806, "step": 11749 }, { "epoch": 6.564245810055866, "grad_norm": 3.2268543243408203, "learning_rate": 0.000673921568627451, "loss": 0.3809, "step": 11750 }, { "epoch": 6.564804469273743, "grad_norm": 0.6965512037277222, "learning_rate": 0.0006738935574229692, "loss": 0.3569, "step": 11751 }, { "epoch": 6.5653631284916205, "grad_norm": 0.8824526071548462, "learning_rate": 0.0006738655462184874, "loss": 0.5219, "step": 11752 }, { "epoch": 6.565921787709497, "grad_norm": 0.6158630847930908, "learning_rate": 0.0006738375350140056, "loss": 0.4949, "step": 11753 }, { "epoch": 6.5664804469273745, "grad_norm": 0.392378568649292, "learning_rate": 0.0006738095238095239, "loss": 0.3244, "step": 11754 }, { "epoch": 6.567039106145251, "grad_norm": 0.6249035000801086, "learning_rate": 0.0006737815126050421, "loss": 0.5117, "step": 11755 }, { "epoch": 6.567597765363129, "grad_norm": 0.47027185559272766, "learning_rate": 0.0006737535014005602, "loss": 0.4036, "step": 11756 }, { "epoch": 6.568156424581005, "grad_norm": 0.5993382334709167, "learning_rate": 0.0006737254901960784, "loss": 0.4145, "step": 11757 }, { "epoch": 6.568715083798883, "grad_norm": 0.46324798464775085, "learning_rate": 0.0006736974789915966, "loss": 0.4604, "step": 11758 }, { "epoch": 6.56927374301676, "grad_norm": 0.6123028993606567, "learning_rate": 0.0006736694677871149, "loss": 0.4716, "step": 11759 }, { "epoch": 6.569832402234637, "grad_norm": 0.5876016020774841, "learning_rate": 0.0006736414565826331, "loss": 0.4727, "step": 11760 }, { "epoch": 6.570391061452514, "grad_norm": 0.590881884098053, "learning_rate": 0.0006736134453781512, "loss": 0.4719, "step": 11761 }, { "epoch": 6.5709497206703915, "grad_norm": 0.6244599223136902, "learning_rate": 0.0006735854341736694, "loss": 0.4553, "step": 11762 }, { "epoch": 6.571508379888268, "grad_norm": 0.7276081442832947, "learning_rate": 0.0006735574229691876, "loss": 0.5725, "step": 11763 }, { "epoch": 6.5720670391061455, "grad_norm": 0.5244298577308655, "learning_rate": 0.000673529411764706, "loss": 0.3908, "step": 11764 }, { "epoch": 6.572625698324022, "grad_norm": 0.40962591767311096, "learning_rate": 0.0006735014005602242, "loss": 0.4009, "step": 11765 }, { "epoch": 6.5731843575419, "grad_norm": 0.47400426864624023, "learning_rate": 0.0006734733893557423, "loss": 0.4106, "step": 11766 }, { "epoch": 6.573743016759776, "grad_norm": 0.6502686738967896, "learning_rate": 0.0006734453781512605, "loss": 0.4072, "step": 11767 }, { "epoch": 6.574301675977654, "grad_norm": 0.6568413972854614, "learning_rate": 0.0006734173669467787, "loss": 0.4041, "step": 11768 }, { "epoch": 6.574860335195531, "grad_norm": 0.38906148076057434, "learning_rate": 0.000673389355742297, "loss": 0.3412, "step": 11769 }, { "epoch": 6.575418994413408, "grad_norm": 0.755928635597229, "learning_rate": 0.0006733613445378152, "loss": 0.4001, "step": 11770 }, { "epoch": 6.575977653631285, "grad_norm": 0.608433723449707, "learning_rate": 0.0006733333333333334, "loss": 0.442, "step": 11771 }, { "epoch": 6.576536312849162, "grad_norm": 0.5073427557945251, "learning_rate": 0.0006733053221288515, "loss": 0.3972, "step": 11772 }, { "epoch": 6.577094972067039, "grad_norm": 0.8906851410865784, "learning_rate": 0.0006732773109243697, "loss": 0.5405, "step": 11773 }, { "epoch": 6.5776536312849165, "grad_norm": 0.8808069229125977, "learning_rate": 0.000673249299719888, "loss": 0.389, "step": 11774 }, { "epoch": 6.578212290502793, "grad_norm": 0.37472110986709595, "learning_rate": 0.0006732212885154062, "loss": 0.331, "step": 11775 }, { "epoch": 6.578770949720671, "grad_norm": 0.949889600276947, "learning_rate": 0.0006731932773109244, "loss": 0.475, "step": 11776 }, { "epoch": 6.579329608938547, "grad_norm": 0.5708540081977844, "learning_rate": 0.0006731652661064425, "loss": 0.4685, "step": 11777 }, { "epoch": 6.579888268156425, "grad_norm": 1.2496596574783325, "learning_rate": 0.0006731372549019607, "loss": 0.5369, "step": 11778 }, { "epoch": 6.580446927374302, "grad_norm": 0.41932421922683716, "learning_rate": 0.000673109243697479, "loss": 0.4791, "step": 11779 }, { "epoch": 6.581005586592179, "grad_norm": 1.2046666145324707, "learning_rate": 0.0006730812324929973, "loss": 0.6789, "step": 11780 }, { "epoch": 6.581564245810056, "grad_norm": 0.5528629422187805, "learning_rate": 0.0006730532212885155, "loss": 0.4585, "step": 11781 }, { "epoch": 6.582122905027933, "grad_norm": 0.635690450668335, "learning_rate": 0.0006730252100840336, "loss": 0.4981, "step": 11782 }, { "epoch": 6.58268156424581, "grad_norm": 0.36201101541519165, "learning_rate": 0.0006729971988795518, "loss": 0.3224, "step": 11783 }, { "epoch": 6.5832402234636875, "grad_norm": 0.6385451555252075, "learning_rate": 0.0006729691876750701, "loss": 0.3413, "step": 11784 }, { "epoch": 6.583798882681564, "grad_norm": 2.327892541885376, "learning_rate": 0.0006729411764705883, "loss": 0.4261, "step": 11785 }, { "epoch": 6.584357541899442, "grad_norm": 0.8808413147926331, "learning_rate": 0.0006729131652661065, "loss": 0.4425, "step": 11786 }, { "epoch": 6.584916201117318, "grad_norm": 0.5280790328979492, "learning_rate": 0.0006728851540616247, "loss": 0.4275, "step": 11787 }, { "epoch": 6.585474860335196, "grad_norm": 0.9772935509681702, "learning_rate": 0.0006728571428571428, "loss": 0.475, "step": 11788 }, { "epoch": 6.586033519553073, "grad_norm": 0.6907405853271484, "learning_rate": 0.0006728291316526611, "loss": 0.5107, "step": 11789 }, { "epoch": 6.58659217877095, "grad_norm": 0.40243226289749146, "learning_rate": 0.0006728011204481793, "loss": 0.45, "step": 11790 }, { "epoch": 6.587150837988827, "grad_norm": 0.40675678849220276, "learning_rate": 0.0006727731092436975, "loss": 0.3203, "step": 11791 }, { "epoch": 6.587709497206704, "grad_norm": 0.7469590306282043, "learning_rate": 0.0006727450980392157, "loss": 0.4546, "step": 11792 }, { "epoch": 6.588268156424581, "grad_norm": 0.48736336827278137, "learning_rate": 0.0006727170868347338, "loss": 0.5293, "step": 11793 }, { "epoch": 6.588826815642458, "grad_norm": 0.5058596730232239, "learning_rate": 0.0006726890756302521, "loss": 0.3562, "step": 11794 }, { "epoch": 6.589385474860335, "grad_norm": 0.8745835423469543, "learning_rate": 0.0006726610644257703, "loss": 0.526, "step": 11795 }, { "epoch": 6.589944134078213, "grad_norm": 0.43587151169776917, "learning_rate": 0.0006726330532212886, "loss": 0.4278, "step": 11796 }, { "epoch": 6.590502793296089, "grad_norm": 0.4505431354045868, "learning_rate": 0.0006726050420168068, "loss": 0.4032, "step": 11797 }, { "epoch": 6.591061452513967, "grad_norm": 0.7565644979476929, "learning_rate": 0.0006725770308123249, "loss": 0.4498, "step": 11798 }, { "epoch": 6.591620111731844, "grad_norm": 0.45958027243614197, "learning_rate": 0.0006725490196078432, "loss": 0.4441, "step": 11799 }, { "epoch": 6.592178770949721, "grad_norm": 0.4430047869682312, "learning_rate": 0.0006725210084033614, "loss": 0.4691, "step": 11800 }, { "epoch": 6.592737430167598, "grad_norm": 0.6521354913711548, "learning_rate": 0.0006724929971988796, "loss": 0.4246, "step": 11801 }, { "epoch": 6.593296089385475, "grad_norm": 0.828670084476471, "learning_rate": 0.0006724649859943978, "loss": 0.4228, "step": 11802 }, { "epoch": 6.593854748603352, "grad_norm": 0.737628698348999, "learning_rate": 0.000672436974789916, "loss": 0.3494, "step": 11803 }, { "epoch": 6.594413407821229, "grad_norm": 1.446258783340454, "learning_rate": 0.0006724089635854342, "loss": 0.4789, "step": 11804 }, { "epoch": 6.594972067039106, "grad_norm": 0.5946828126907349, "learning_rate": 0.0006723809523809524, "loss": 0.4658, "step": 11805 }, { "epoch": 6.5955307262569836, "grad_norm": 0.5721814632415771, "learning_rate": 0.0006723529411764706, "loss": 0.4715, "step": 11806 }, { "epoch": 6.59608938547486, "grad_norm": 0.5953073501586914, "learning_rate": 0.0006723249299719888, "loss": 0.4247, "step": 11807 }, { "epoch": 6.596648044692738, "grad_norm": 3.5989556312561035, "learning_rate": 0.000672296918767507, "loss": 0.4803, "step": 11808 }, { "epoch": 6.597206703910614, "grad_norm": 0.4964918792247772, "learning_rate": 0.0006722689075630252, "loss": 0.3813, "step": 11809 }, { "epoch": 6.597765363128492, "grad_norm": 0.4691878855228424, "learning_rate": 0.0006722408963585434, "loss": 0.4632, "step": 11810 }, { "epoch": 6.598324022346369, "grad_norm": 0.9500154256820679, "learning_rate": 0.0006722128851540616, "loss": 0.4424, "step": 11811 }, { "epoch": 6.598882681564246, "grad_norm": 0.501692533493042, "learning_rate": 0.0006721848739495798, "loss": 0.3665, "step": 11812 }, { "epoch": 6.599441340782123, "grad_norm": 1.8537033796310425, "learning_rate": 0.000672156862745098, "loss": 0.4284, "step": 11813 }, { "epoch": 6.6, "grad_norm": 0.5142460465431213, "learning_rate": 0.0006721288515406164, "loss": 0.6066, "step": 11814 }, { "epoch": 6.600558659217877, "grad_norm": 0.4929690957069397, "learning_rate": 0.0006721008403361345, "loss": 0.4803, "step": 11815 }, { "epoch": 6.6011173184357546, "grad_norm": 1.3422304391860962, "learning_rate": 0.0006720728291316527, "loss": 0.4165, "step": 11816 }, { "epoch": 6.601675977653631, "grad_norm": 0.6303967833518982, "learning_rate": 0.0006720448179271709, "loss": 0.4945, "step": 11817 }, { "epoch": 6.602234636871509, "grad_norm": 0.586501955986023, "learning_rate": 0.0006720168067226891, "loss": 0.3817, "step": 11818 }, { "epoch": 6.602793296089385, "grad_norm": 0.49638524651527405, "learning_rate": 0.0006719887955182074, "loss": 0.4508, "step": 11819 }, { "epoch": 6.603351955307263, "grad_norm": 0.5122184753417969, "learning_rate": 0.0006719607843137255, "loss": 0.4056, "step": 11820 }, { "epoch": 6.603910614525139, "grad_norm": 1.6933468580245972, "learning_rate": 0.0006719327731092437, "loss": 0.375, "step": 11821 }, { "epoch": 6.604469273743017, "grad_norm": 0.598682701587677, "learning_rate": 0.0006719047619047619, "loss": 0.4776, "step": 11822 }, { "epoch": 6.605027932960894, "grad_norm": 0.5985621809959412, "learning_rate": 0.0006718767507002801, "loss": 0.5135, "step": 11823 }, { "epoch": 6.605586592178771, "grad_norm": 0.5397905707359314, "learning_rate": 0.0006718487394957983, "loss": 0.4153, "step": 11824 }, { "epoch": 6.606145251396648, "grad_norm": 0.43009278178215027, "learning_rate": 0.0006718207282913165, "loss": 0.3868, "step": 11825 }, { "epoch": 6.6067039106145256, "grad_norm": 0.6789054870605469, "learning_rate": 0.0006717927170868347, "loss": 0.3637, "step": 11826 }, { "epoch": 6.607262569832402, "grad_norm": 0.7343611121177673, "learning_rate": 0.0006717647058823529, "loss": 0.4905, "step": 11827 }, { "epoch": 6.60782122905028, "grad_norm": 0.9991815090179443, "learning_rate": 0.0006717366946778711, "loss": 0.453, "step": 11828 }, { "epoch": 6.608379888268156, "grad_norm": 0.45997855067253113, "learning_rate": 0.0006717086834733893, "loss": 0.396, "step": 11829 }, { "epoch": 6.608938547486034, "grad_norm": 0.5412790179252625, "learning_rate": 0.0006716806722689077, "loss": 0.3623, "step": 11830 }, { "epoch": 6.60949720670391, "grad_norm": 0.6508433818817139, "learning_rate": 0.0006716526610644258, "loss": 0.4101, "step": 11831 }, { "epoch": 6.610055865921788, "grad_norm": 0.646654486656189, "learning_rate": 0.000671624649859944, "loss": 0.5739, "step": 11832 }, { "epoch": 6.610614525139665, "grad_norm": 0.47801244258880615, "learning_rate": 0.0006715966386554622, "loss": 0.4186, "step": 11833 }, { "epoch": 6.611173184357542, "grad_norm": 0.4593677818775177, "learning_rate": 0.0006715686274509804, "loss": 0.3877, "step": 11834 }, { "epoch": 6.611731843575419, "grad_norm": 0.7329741716384888, "learning_rate": 0.0006715406162464987, "loss": 0.502, "step": 11835 }, { "epoch": 6.6122905027932966, "grad_norm": 0.46324464678764343, "learning_rate": 0.0006715126050420168, "loss": 0.3572, "step": 11836 }, { "epoch": 6.612849162011173, "grad_norm": 1.8622710704803467, "learning_rate": 0.000671484593837535, "loss": 0.4531, "step": 11837 }, { "epoch": 6.613407821229051, "grad_norm": 0.6932263374328613, "learning_rate": 0.0006714565826330532, "loss": 0.3735, "step": 11838 }, { "epoch": 6.613966480446927, "grad_norm": 0.5971950888633728, "learning_rate": 0.0006714285714285714, "loss": 0.4406, "step": 11839 }, { "epoch": 6.614525139664805, "grad_norm": 0.48642411828041077, "learning_rate": 0.0006714005602240897, "loss": 0.4243, "step": 11840 }, { "epoch": 6.615083798882681, "grad_norm": 0.4740616977214813, "learning_rate": 0.0006713725490196078, "loss": 0.7003, "step": 11841 }, { "epoch": 6.615642458100559, "grad_norm": 1.5656554698944092, "learning_rate": 0.000671344537815126, "loss": 0.523, "step": 11842 }, { "epoch": 6.616201117318436, "grad_norm": 0.52299565076828, "learning_rate": 0.0006713165266106442, "loss": 0.4681, "step": 11843 }, { "epoch": 6.616759776536313, "grad_norm": 0.4576612710952759, "learning_rate": 0.0006712885154061624, "loss": 0.3388, "step": 11844 }, { "epoch": 6.61731843575419, "grad_norm": 1.0242376327514648, "learning_rate": 0.0006712605042016808, "loss": 0.545, "step": 11845 }, { "epoch": 6.617877094972067, "grad_norm": 0.533794641494751, "learning_rate": 0.000671232492997199, "loss": 0.5442, "step": 11846 }, { "epoch": 6.618435754189944, "grad_norm": 0.6163023114204407, "learning_rate": 0.0006712044817927171, "loss": 0.5745, "step": 11847 }, { "epoch": 6.618994413407822, "grad_norm": 0.5679111480712891, "learning_rate": 0.0006711764705882353, "loss": 0.3956, "step": 11848 }, { "epoch": 6.619553072625698, "grad_norm": 0.49660468101501465, "learning_rate": 0.0006711484593837535, "loss": 0.3644, "step": 11849 }, { "epoch": 6.620111731843576, "grad_norm": 0.5545671582221985, "learning_rate": 0.0006711204481792718, "loss": 0.438, "step": 11850 }, { "epoch": 6.620670391061452, "grad_norm": 0.4909864664077759, "learning_rate": 0.00067109243697479, "loss": 0.4799, "step": 11851 }, { "epoch": 6.62122905027933, "grad_norm": 0.7022113800048828, "learning_rate": 0.0006710644257703081, "loss": 0.4846, "step": 11852 }, { "epoch": 6.621787709497207, "grad_norm": 0.5411577820777893, "learning_rate": 0.0006710364145658263, "loss": 0.446, "step": 11853 }, { "epoch": 6.622346368715084, "grad_norm": 0.5130810737609863, "learning_rate": 0.0006710084033613445, "loss": 0.4285, "step": 11854 }, { "epoch": 6.622905027932961, "grad_norm": 0.5893478989601135, "learning_rate": 0.0006709803921568628, "loss": 0.4307, "step": 11855 }, { "epoch": 6.623463687150838, "grad_norm": 4.347335338592529, "learning_rate": 0.000670952380952381, "loss": 0.477, "step": 11856 }, { "epoch": 6.624022346368715, "grad_norm": 0.8039206266403198, "learning_rate": 0.0006709243697478991, "loss": 0.4949, "step": 11857 }, { "epoch": 6.624581005586592, "grad_norm": 0.5761498212814331, "learning_rate": 0.0006708963585434173, "loss": 0.4806, "step": 11858 }, { "epoch": 6.625139664804469, "grad_norm": 0.3392082750797272, "learning_rate": 0.0006708683473389355, "loss": 0.3649, "step": 11859 }, { "epoch": 6.625698324022347, "grad_norm": 6.200045585632324, "learning_rate": 0.0006708403361344538, "loss": 0.4398, "step": 11860 }, { "epoch": 6.626256983240223, "grad_norm": 0.4584237337112427, "learning_rate": 0.000670812324929972, "loss": 0.4103, "step": 11861 }, { "epoch": 6.626815642458101, "grad_norm": 0.5027204751968384, "learning_rate": 0.0006707843137254903, "loss": 0.4293, "step": 11862 }, { "epoch": 6.627374301675978, "grad_norm": 0.5450441241264343, "learning_rate": 0.0006707563025210084, "loss": 0.4954, "step": 11863 }, { "epoch": 6.627932960893855, "grad_norm": 0.42596668004989624, "learning_rate": 0.0006707282913165266, "loss": 0.4258, "step": 11864 }, { "epoch": 6.628491620111732, "grad_norm": 0.5425561666488647, "learning_rate": 0.0006707002801120449, "loss": 0.4617, "step": 11865 }, { "epoch": 6.629050279329609, "grad_norm": 0.5719782114028931, "learning_rate": 0.0006706722689075631, "loss": 0.5718, "step": 11866 }, { "epoch": 6.629608938547486, "grad_norm": 1.2569321393966675, "learning_rate": 0.0006706442577030813, "loss": 0.4671, "step": 11867 }, { "epoch": 6.630167597765363, "grad_norm": 0.4194042384624481, "learning_rate": 0.0006706162464985994, "loss": 0.4061, "step": 11868 }, { "epoch": 6.63072625698324, "grad_norm": 0.7099469900131226, "learning_rate": 0.0006705882352941176, "loss": 0.4435, "step": 11869 }, { "epoch": 6.631284916201118, "grad_norm": 0.39683932065963745, "learning_rate": 0.0006705602240896359, "loss": 0.3773, "step": 11870 }, { "epoch": 6.631843575418994, "grad_norm": 1.4019768238067627, "learning_rate": 0.0006705322128851541, "loss": 0.5027, "step": 11871 }, { "epoch": 6.632402234636872, "grad_norm": 2.508816957473755, "learning_rate": 0.0006705042016806723, "loss": 0.4704, "step": 11872 }, { "epoch": 6.632960893854749, "grad_norm": 0.7168423533439636, "learning_rate": 0.0006704761904761904, "loss": 0.4874, "step": 11873 }, { "epoch": 6.633519553072626, "grad_norm": 0.44258370995521545, "learning_rate": 0.0006704481792717086, "loss": 0.3925, "step": 11874 }, { "epoch": 6.634078212290503, "grad_norm": 0.4182576835155487, "learning_rate": 0.0006704201680672269, "loss": 0.4099, "step": 11875 }, { "epoch": 6.63463687150838, "grad_norm": 0.41694721579551697, "learning_rate": 0.0006703921568627451, "loss": 0.4324, "step": 11876 }, { "epoch": 6.635195530726257, "grad_norm": 0.41879621148109436, "learning_rate": 0.0006703641456582633, "loss": 0.4035, "step": 11877 }, { "epoch": 6.635754189944134, "grad_norm": 0.6156706213951111, "learning_rate": 0.0006703361344537816, "loss": 0.3844, "step": 11878 }, { "epoch": 6.636312849162011, "grad_norm": 0.6671702861785889, "learning_rate": 0.0006703081232492996, "loss": 0.5469, "step": 11879 }, { "epoch": 6.636871508379889, "grad_norm": 0.4193579852581024, "learning_rate": 0.000670280112044818, "loss": 0.4663, "step": 11880 }, { "epoch": 6.637430167597765, "grad_norm": 1.7180861234664917, "learning_rate": 0.0006702521008403362, "loss": 0.4079, "step": 11881 }, { "epoch": 6.637988826815643, "grad_norm": 0.41882604360580444, "learning_rate": 0.0006702240896358544, "loss": 0.3873, "step": 11882 }, { "epoch": 6.638547486033519, "grad_norm": 1.1675763130187988, "learning_rate": 0.0006701960784313726, "loss": 0.4308, "step": 11883 }, { "epoch": 6.639106145251397, "grad_norm": 0.5950065851211548, "learning_rate": 0.0006701680672268907, "loss": 0.4061, "step": 11884 }, { "epoch": 6.639664804469274, "grad_norm": 0.7166126370429993, "learning_rate": 0.000670140056022409, "loss": 0.4293, "step": 11885 }, { "epoch": 6.640223463687151, "grad_norm": 0.5278535485267639, "learning_rate": 0.0006701120448179272, "loss": 0.479, "step": 11886 }, { "epoch": 6.640782122905028, "grad_norm": 0.473520964384079, "learning_rate": 0.0006700840336134454, "loss": 0.4338, "step": 11887 }, { "epoch": 6.641340782122905, "grad_norm": 0.43621179461479187, "learning_rate": 0.0006700560224089636, "loss": 0.4466, "step": 11888 }, { "epoch": 6.641899441340782, "grad_norm": 0.5028188228607178, "learning_rate": 0.0006700280112044817, "loss": 0.4559, "step": 11889 }, { "epoch": 6.64245810055866, "grad_norm": 1.0624299049377441, "learning_rate": 0.00067, "loss": 0.4077, "step": 11890 }, { "epoch": 6.643016759776536, "grad_norm": 0.4720621407032013, "learning_rate": 0.0006699719887955182, "loss": 0.4667, "step": 11891 }, { "epoch": 6.643575418994414, "grad_norm": 0.38738808035850525, "learning_rate": 0.0006699439775910364, "loss": 0.3834, "step": 11892 }, { "epoch": 6.64413407821229, "grad_norm": 0.6606648564338684, "learning_rate": 0.0006699159663865546, "loss": 0.6001, "step": 11893 }, { "epoch": 6.644692737430168, "grad_norm": 1.6325286626815796, "learning_rate": 0.0006698879551820728, "loss": 0.3995, "step": 11894 }, { "epoch": 6.645251396648044, "grad_norm": 0.597726047039032, "learning_rate": 0.000669859943977591, "loss": 0.4367, "step": 11895 }, { "epoch": 6.645810055865922, "grad_norm": 0.4438652992248535, "learning_rate": 0.0006698319327731093, "loss": 0.3943, "step": 11896 }, { "epoch": 6.646368715083799, "grad_norm": 0.6187936067581177, "learning_rate": 0.0006698039215686275, "loss": 0.4991, "step": 11897 }, { "epoch": 6.646927374301676, "grad_norm": 1.6471140384674072, "learning_rate": 0.0006697759103641457, "loss": 0.4304, "step": 11898 }, { "epoch": 6.647486033519553, "grad_norm": 0.4638585150241852, "learning_rate": 0.0006697478991596639, "loss": 0.3907, "step": 11899 }, { "epoch": 6.648044692737431, "grad_norm": 0.6080419421195984, "learning_rate": 0.0006697198879551821, "loss": 0.5716, "step": 11900 }, { "epoch": 6.648603351955307, "grad_norm": 0.7600609660148621, "learning_rate": 0.0006696918767507003, "loss": 0.5238, "step": 11901 }, { "epoch": 6.649162011173185, "grad_norm": 0.40342411398887634, "learning_rate": 0.0006696638655462185, "loss": 0.3784, "step": 11902 }, { "epoch": 6.649720670391061, "grad_norm": 0.4914536774158478, "learning_rate": 0.0006696358543417367, "loss": 0.3133, "step": 11903 }, { "epoch": 6.650279329608939, "grad_norm": 0.36723074316978455, "learning_rate": 0.0006696078431372549, "loss": 0.3745, "step": 11904 }, { "epoch": 6.650837988826815, "grad_norm": 0.8028084635734558, "learning_rate": 0.0006695798319327731, "loss": 0.4228, "step": 11905 }, { "epoch": 6.651396648044693, "grad_norm": 0.39742809534072876, "learning_rate": 0.0006695518207282913, "loss": 0.3796, "step": 11906 }, { "epoch": 6.65195530726257, "grad_norm": 0.7723726034164429, "learning_rate": 0.0006695238095238095, "loss": 0.4192, "step": 11907 }, { "epoch": 6.652513966480447, "grad_norm": 0.42612823843955994, "learning_rate": 0.0006694957983193277, "loss": 0.5041, "step": 11908 }, { "epoch": 6.653072625698324, "grad_norm": 0.5529517531394958, "learning_rate": 0.0006694677871148459, "loss": 0.6456, "step": 11909 }, { "epoch": 6.653631284916202, "grad_norm": 0.432962030172348, "learning_rate": 0.0006694397759103643, "loss": 0.4052, "step": 11910 }, { "epoch": 6.654189944134078, "grad_norm": 0.7557727694511414, "learning_rate": 0.0006694117647058823, "loss": 0.5632, "step": 11911 }, { "epoch": 6.654748603351956, "grad_norm": 0.44415441155433655, "learning_rate": 0.0006693837535014006, "loss": 0.556, "step": 11912 }, { "epoch": 6.655307262569832, "grad_norm": 1.462938666343689, "learning_rate": 0.0006693557422969188, "loss": 0.5532, "step": 11913 }, { "epoch": 6.65586592178771, "grad_norm": 0.7390327453613281, "learning_rate": 0.000669327731092437, "loss": 0.4529, "step": 11914 }, { "epoch": 6.656424581005586, "grad_norm": 2.6326262950897217, "learning_rate": 0.0006692997198879553, "loss": 0.3835, "step": 11915 }, { "epoch": 6.656983240223464, "grad_norm": 0.4834195673465729, "learning_rate": 0.0006692717086834734, "loss": 0.3513, "step": 11916 }, { "epoch": 6.657541899441341, "grad_norm": 0.48843714594841003, "learning_rate": 0.0006692436974789916, "loss": 0.4866, "step": 11917 }, { "epoch": 6.658100558659218, "grad_norm": 0.9501739144325256, "learning_rate": 0.0006692156862745098, "loss": 0.4514, "step": 11918 }, { "epoch": 6.658659217877095, "grad_norm": 0.5491495728492737, "learning_rate": 0.000669187675070028, "loss": 0.4346, "step": 11919 }, { "epoch": 6.659217877094972, "grad_norm": 0.5092610716819763, "learning_rate": 0.0006691596638655463, "loss": 0.5333, "step": 11920 }, { "epoch": 6.659776536312849, "grad_norm": 0.5051995515823364, "learning_rate": 0.0006691316526610644, "loss": 0.4432, "step": 11921 }, { "epoch": 6.660335195530727, "grad_norm": 0.6132280230522156, "learning_rate": 0.0006691036414565826, "loss": 0.3799, "step": 11922 }, { "epoch": 6.660893854748603, "grad_norm": 0.7479427456855774, "learning_rate": 0.0006690756302521008, "loss": 0.5133, "step": 11923 }, { "epoch": 6.661452513966481, "grad_norm": 0.6331705451011658, "learning_rate": 0.000669047619047619, "loss": 0.4406, "step": 11924 }, { "epoch": 6.662011173184357, "grad_norm": 0.6844003796577454, "learning_rate": 0.0006690196078431373, "loss": 0.449, "step": 11925 }, { "epoch": 6.662569832402235, "grad_norm": 0.4110753834247589, "learning_rate": 0.0006689915966386555, "loss": 0.4646, "step": 11926 }, { "epoch": 6.663128491620112, "grad_norm": 0.7246172428131104, "learning_rate": 0.0006689635854341736, "loss": 0.4685, "step": 11927 }, { "epoch": 6.663687150837989, "grad_norm": 0.7584655284881592, "learning_rate": 0.0006689355742296919, "loss": 0.7446, "step": 11928 }, { "epoch": 6.664245810055866, "grad_norm": 0.6096429228782654, "learning_rate": 0.0006689075630252101, "loss": 0.5194, "step": 11929 }, { "epoch": 6.664804469273743, "grad_norm": 0.9509395956993103, "learning_rate": 0.0006688795518207284, "loss": 0.5149, "step": 11930 }, { "epoch": 6.66536312849162, "grad_norm": 0.41306325793266296, "learning_rate": 0.0006688515406162466, "loss": 0.4323, "step": 11931 }, { "epoch": 6.665921787709497, "grad_norm": 0.618394136428833, "learning_rate": 0.0006688235294117647, "loss": 0.4677, "step": 11932 }, { "epoch": 6.666480446927374, "grad_norm": 0.8841884732246399, "learning_rate": 0.0006687955182072829, "loss": 0.4686, "step": 11933 }, { "epoch": 6.667039106145252, "grad_norm": 0.7433735132217407, "learning_rate": 0.0006687675070028011, "loss": 0.4896, "step": 11934 }, { "epoch": 6.667597765363128, "grad_norm": 0.9621394276618958, "learning_rate": 0.0006687394957983194, "loss": 0.4834, "step": 11935 }, { "epoch": 6.668156424581006, "grad_norm": 1.4438282251358032, "learning_rate": 0.0006687114845938376, "loss": 0.4916, "step": 11936 }, { "epoch": 6.668715083798883, "grad_norm": 0.3853825032711029, "learning_rate": 0.0006686834733893557, "loss": 0.4277, "step": 11937 }, { "epoch": 6.66927374301676, "grad_norm": 2.2963244915008545, "learning_rate": 0.0006686554621848739, "loss": 0.5279, "step": 11938 }, { "epoch": 6.669832402234637, "grad_norm": 0.48953819274902344, "learning_rate": 0.0006686274509803921, "loss": 0.5855, "step": 11939 }, { "epoch": 6.670391061452514, "grad_norm": 0.7464914917945862, "learning_rate": 0.0006685994397759104, "loss": 0.549, "step": 11940 }, { "epoch": 6.670949720670391, "grad_norm": 0.5952526926994324, "learning_rate": 0.0006685714285714286, "loss": 0.4605, "step": 11941 }, { "epoch": 6.671508379888268, "grad_norm": 2.3519411087036133, "learning_rate": 0.0006685434173669468, "loss": 0.4862, "step": 11942 }, { "epoch": 6.672067039106145, "grad_norm": 0.5067525506019592, "learning_rate": 0.0006685154061624649, "loss": 0.5444, "step": 11943 }, { "epoch": 6.672625698324023, "grad_norm": 1.0751349925994873, "learning_rate": 0.0006684873949579831, "loss": 0.5272, "step": 11944 }, { "epoch": 6.673184357541899, "grad_norm": 0.5294737219810486, "learning_rate": 0.0006684593837535015, "loss": 0.3459, "step": 11945 }, { "epoch": 6.673743016759777, "grad_norm": 0.4766441583633423, "learning_rate": 0.0006684313725490197, "loss": 0.373, "step": 11946 }, { "epoch": 6.674301675977654, "grad_norm": 0.5001503825187683, "learning_rate": 0.0006684033613445379, "loss": 0.3901, "step": 11947 }, { "epoch": 6.674860335195531, "grad_norm": 0.5113135576248169, "learning_rate": 0.000668375350140056, "loss": 0.4565, "step": 11948 }, { "epoch": 6.675418994413408, "grad_norm": 0.6157906651496887, "learning_rate": 0.0006683473389355742, "loss": 0.398, "step": 11949 }, { "epoch": 6.675977653631285, "grad_norm": 0.5761928558349609, "learning_rate": 0.0006683193277310925, "loss": 0.4914, "step": 11950 }, { "epoch": 6.676536312849162, "grad_norm": 0.4351007640361786, "learning_rate": 0.0006682913165266107, "loss": 0.4139, "step": 11951 }, { "epoch": 6.677094972067039, "grad_norm": 0.4437105655670166, "learning_rate": 0.0006682633053221289, "loss": 0.4052, "step": 11952 }, { "epoch": 6.677653631284916, "grad_norm": 0.5127741694450378, "learning_rate": 0.000668235294117647, "loss": 0.4141, "step": 11953 }, { "epoch": 6.678212290502794, "grad_norm": 0.7881187796592712, "learning_rate": 0.0006682072829131652, "loss": 0.4227, "step": 11954 }, { "epoch": 6.67877094972067, "grad_norm": 0.5743787288665771, "learning_rate": 0.0006681792717086835, "loss": 0.4614, "step": 11955 }, { "epoch": 6.679329608938548, "grad_norm": 0.5567173957824707, "learning_rate": 0.0006681512605042017, "loss": 0.4296, "step": 11956 }, { "epoch": 6.679888268156424, "grad_norm": 0.6149084568023682, "learning_rate": 0.0006681232492997199, "loss": 0.527, "step": 11957 }, { "epoch": 6.680446927374302, "grad_norm": 0.664783775806427, "learning_rate": 0.0006680952380952381, "loss": 0.4199, "step": 11958 }, { "epoch": 6.681005586592179, "grad_norm": 0.5344581604003906, "learning_rate": 0.0006680672268907562, "loss": 0.5034, "step": 11959 }, { "epoch": 6.681564245810056, "grad_norm": 0.5123084783554077, "learning_rate": 0.0006680392156862746, "loss": 0.4463, "step": 11960 }, { "epoch": 6.682122905027933, "grad_norm": 0.4994768798351288, "learning_rate": 0.0006680112044817928, "loss": 0.4244, "step": 11961 }, { "epoch": 6.68268156424581, "grad_norm": 0.4394984245300293, "learning_rate": 0.000667983193277311, "loss": 0.5317, "step": 11962 }, { "epoch": 6.683240223463687, "grad_norm": 0.44369298219680786, "learning_rate": 0.0006679551820728292, "loss": 0.5142, "step": 11963 }, { "epoch": 6.683798882681565, "grad_norm": 0.4762931168079376, "learning_rate": 0.0006679271708683473, "loss": 0.411, "step": 11964 }, { "epoch": 6.684357541899441, "grad_norm": 0.9273788332939148, "learning_rate": 0.0006678991596638656, "loss": 0.4241, "step": 11965 }, { "epoch": 6.684916201117319, "grad_norm": 0.42331764101982117, "learning_rate": 0.0006678711484593838, "loss": 0.4133, "step": 11966 }, { "epoch": 6.685474860335195, "grad_norm": 0.6511138677597046, "learning_rate": 0.000667843137254902, "loss": 0.5919, "step": 11967 }, { "epoch": 6.686033519553073, "grad_norm": 0.49632054567337036, "learning_rate": 0.0006678151260504202, "loss": 0.4345, "step": 11968 }, { "epoch": 6.686592178770949, "grad_norm": 0.543677806854248, "learning_rate": 0.0006677871148459383, "loss": 0.4024, "step": 11969 }, { "epoch": 6.687150837988827, "grad_norm": 0.5130450129508972, "learning_rate": 0.0006677591036414566, "loss": 0.4684, "step": 11970 }, { "epoch": 6.687709497206704, "grad_norm": 0.5427372455596924, "learning_rate": 0.0006677310924369748, "loss": 0.4437, "step": 11971 }, { "epoch": 6.688268156424581, "grad_norm": 0.775134265422821, "learning_rate": 0.000667703081232493, "loss": 0.4596, "step": 11972 }, { "epoch": 6.688826815642458, "grad_norm": 1.0085527896881104, "learning_rate": 0.0006676750700280112, "loss": 0.6023, "step": 11973 }, { "epoch": 6.689385474860336, "grad_norm": 0.3617066740989685, "learning_rate": 0.0006676470588235294, "loss": 0.3793, "step": 11974 }, { "epoch": 6.689944134078212, "grad_norm": 1.0881379842758179, "learning_rate": 0.0006676190476190476, "loss": 0.7233, "step": 11975 }, { "epoch": 6.69050279329609, "grad_norm": 0.3984529972076416, "learning_rate": 0.0006675910364145658, "loss": 0.477, "step": 11976 }, { "epoch": 6.691061452513966, "grad_norm": 0.8676496148109436, "learning_rate": 0.000667563025210084, "loss": 0.4396, "step": 11977 }, { "epoch": 6.691620111731844, "grad_norm": 0.43005532026290894, "learning_rate": 0.0006675350140056023, "loss": 0.4039, "step": 11978 }, { "epoch": 6.69217877094972, "grad_norm": 0.5955531001091003, "learning_rate": 0.0006675070028011205, "loss": 0.3239, "step": 11979 }, { "epoch": 6.692737430167598, "grad_norm": 0.4432985782623291, "learning_rate": 0.0006674789915966387, "loss": 0.4006, "step": 11980 }, { "epoch": 6.693296089385475, "grad_norm": 0.44831740856170654, "learning_rate": 0.0006674509803921569, "loss": 0.3296, "step": 11981 }, { "epoch": 6.693854748603352, "grad_norm": 0.5185133814811707, "learning_rate": 0.0006674229691876751, "loss": 0.4796, "step": 11982 }, { "epoch": 6.694413407821229, "grad_norm": 1.4772552251815796, "learning_rate": 0.0006673949579831933, "loss": 0.4602, "step": 11983 }, { "epoch": 6.694972067039107, "grad_norm": 0.6739949584007263, "learning_rate": 0.0006673669467787115, "loss": 0.4528, "step": 11984 }, { "epoch": 6.695530726256983, "grad_norm": 1.486723780632019, "learning_rate": 0.0006673389355742298, "loss": 0.5464, "step": 11985 }, { "epoch": 6.696089385474861, "grad_norm": 0.4152049720287323, "learning_rate": 0.0006673109243697479, "loss": 0.401, "step": 11986 }, { "epoch": 6.696648044692737, "grad_norm": 1.7777329683303833, "learning_rate": 0.0006672829131652661, "loss": 0.433, "step": 11987 }, { "epoch": 6.697206703910615, "grad_norm": 0.5812040567398071, "learning_rate": 0.0006672549019607843, "loss": 0.4011, "step": 11988 }, { "epoch": 6.697765363128491, "grad_norm": 0.4774755537509918, "learning_rate": 0.0006672268907563025, "loss": 0.4119, "step": 11989 }, { "epoch": 6.698324022346369, "grad_norm": 0.4158855080604553, "learning_rate": 0.0006671988795518208, "loss": 0.4326, "step": 11990 }, { "epoch": 6.698882681564246, "grad_norm": 1.1127920150756836, "learning_rate": 0.0006671708683473389, "loss": 0.4633, "step": 11991 }, { "epoch": 6.699441340782123, "grad_norm": 0.383543461561203, "learning_rate": 0.0006671428571428571, "loss": 0.3522, "step": 11992 }, { "epoch": 6.7, "grad_norm": 0.5842460989952087, "learning_rate": 0.0006671148459383753, "loss": 0.4311, "step": 11993 }, { "epoch": 6.700558659217877, "grad_norm": 0.53638756275177, "learning_rate": 0.0006670868347338936, "loss": 0.407, "step": 11994 }, { "epoch": 6.701117318435754, "grad_norm": 0.8463553786277771, "learning_rate": 0.0006670588235294119, "loss": 0.3638, "step": 11995 }, { "epoch": 6.701675977653632, "grad_norm": 0.4048050045967102, "learning_rate": 0.00066703081232493, "loss": 0.3987, "step": 11996 }, { "epoch": 6.702234636871508, "grad_norm": 0.9497975707054138, "learning_rate": 0.0006670028011204482, "loss": 0.3657, "step": 11997 }, { "epoch": 6.702793296089386, "grad_norm": 0.6640595197677612, "learning_rate": 0.0006669747899159664, "loss": 0.431, "step": 11998 }, { "epoch": 6.703351955307262, "grad_norm": 0.9014831781387329, "learning_rate": 0.0006669467787114846, "loss": 0.5538, "step": 11999 }, { "epoch": 6.70391061452514, "grad_norm": 3.5594048500061035, "learning_rate": 0.0006669187675070029, "loss": 0.7649, "step": 12000 }, { "epoch": 6.70391061452514, "eval_cer": 0.09162820638713408, "eval_loss": 0.34500652551651, "eval_runtime": 60.9046, "eval_samples_per_second": 74.51, "eval_steps_per_second": 4.663, "eval_wer": 0.364654618586073, "step": 12000 }, { "epoch": 6.704469273743017, "grad_norm": 0.5483406186103821, "learning_rate": 0.0006668907563025211, "loss": 0.5314, "step": 12001 }, { "epoch": 6.705027932960894, "grad_norm": 0.9583224058151245, "learning_rate": 0.0006668627450980392, "loss": 0.3826, "step": 12002 }, { "epoch": 6.705586592178771, "grad_norm": 0.6977572441101074, "learning_rate": 0.0006668347338935574, "loss": 0.4966, "step": 12003 }, { "epoch": 6.706145251396648, "grad_norm": 2.1843104362487793, "learning_rate": 0.0006668067226890756, "loss": 0.3792, "step": 12004 }, { "epoch": 6.706703910614525, "grad_norm": 0.5551536083221436, "learning_rate": 0.0006667787114845939, "loss": 0.4793, "step": 12005 }, { "epoch": 6.707262569832402, "grad_norm": 0.665658175945282, "learning_rate": 0.0006667507002801121, "loss": 0.5097, "step": 12006 }, { "epoch": 6.707821229050279, "grad_norm": 0.7750752568244934, "learning_rate": 0.0006667226890756302, "loss": 0.4892, "step": 12007 }, { "epoch": 6.708379888268157, "grad_norm": 0.5526975393295288, "learning_rate": 0.0006666946778711484, "loss": 0.6484, "step": 12008 }, { "epoch": 6.708938547486033, "grad_norm": 0.6482200026512146, "learning_rate": 0.0006666666666666666, "loss": 0.4489, "step": 12009 }, { "epoch": 6.709497206703911, "grad_norm": 0.4073962867259979, "learning_rate": 0.000666638655462185, "loss": 0.3483, "step": 12010 }, { "epoch": 6.710055865921788, "grad_norm": 0.4463841915130615, "learning_rate": 0.0006666106442577032, "loss": 0.4073, "step": 12011 }, { "epoch": 6.710614525139665, "grad_norm": 0.4955034852027893, "learning_rate": 0.0006665826330532213, "loss": 0.4875, "step": 12012 }, { "epoch": 6.711173184357542, "grad_norm": 0.4222066104412079, "learning_rate": 0.0006665546218487395, "loss": 0.3956, "step": 12013 }, { "epoch": 6.711731843575419, "grad_norm": 4.740274429321289, "learning_rate": 0.0006665266106442577, "loss": 0.4488, "step": 12014 }, { "epoch": 6.712290502793296, "grad_norm": 0.8777411580085754, "learning_rate": 0.000666498599439776, "loss": 0.7606, "step": 12015 }, { "epoch": 6.712849162011173, "grad_norm": 0.4971480071544647, "learning_rate": 0.0006664705882352942, "loss": 0.4609, "step": 12016 }, { "epoch": 6.71340782122905, "grad_norm": 0.5550456643104553, "learning_rate": 0.0006664425770308124, "loss": 0.3525, "step": 12017 }, { "epoch": 6.713966480446928, "grad_norm": 0.7842357158660889, "learning_rate": 0.0006664145658263305, "loss": 0.3546, "step": 12018 }, { "epoch": 6.714525139664804, "grad_norm": 0.41769638657569885, "learning_rate": 0.0006663865546218487, "loss": 0.426, "step": 12019 }, { "epoch": 6.715083798882682, "grad_norm": 0.454759418964386, "learning_rate": 0.000666358543417367, "loss": 0.4387, "step": 12020 }, { "epoch": 6.715642458100559, "grad_norm": 0.6732493042945862, "learning_rate": 0.0006663305322128852, "loss": 0.4907, "step": 12021 }, { "epoch": 6.716201117318436, "grad_norm": 0.4838760197162628, "learning_rate": 0.0006663025210084034, "loss": 0.4969, "step": 12022 }, { "epoch": 6.716759776536313, "grad_norm": 0.7262096405029297, "learning_rate": 0.0006662745098039215, "loss": 0.5392, "step": 12023 }, { "epoch": 6.71731843575419, "grad_norm": 0.6183413863182068, "learning_rate": 0.0006662464985994397, "loss": 0.5025, "step": 12024 }, { "epoch": 6.717877094972067, "grad_norm": 0.506846010684967, "learning_rate": 0.000666218487394958, "loss": 0.4099, "step": 12025 }, { "epoch": 6.718435754189944, "grad_norm": 0.7997880578041077, "learning_rate": 0.0006661904761904763, "loss": 0.4685, "step": 12026 }, { "epoch": 6.718994413407821, "grad_norm": 0.3239750862121582, "learning_rate": 0.0006661624649859945, "loss": 0.3795, "step": 12027 }, { "epoch": 6.719553072625699, "grad_norm": 1.2035984992980957, "learning_rate": 0.0006661344537815126, "loss": 0.6141, "step": 12028 }, { "epoch": 6.720111731843575, "grad_norm": 0.40416109561920166, "learning_rate": 0.0006661064425770308, "loss": 0.3345, "step": 12029 }, { "epoch": 6.720670391061453, "grad_norm": 0.9208695292472839, "learning_rate": 0.0006660784313725491, "loss": 0.5067, "step": 12030 }, { "epoch": 6.721229050279329, "grad_norm": 0.41875791549682617, "learning_rate": 0.0006660504201680673, "loss": 0.4151, "step": 12031 }, { "epoch": 6.721787709497207, "grad_norm": 0.521105945110321, "learning_rate": 0.0006660224089635855, "loss": 0.3748, "step": 12032 }, { "epoch": 6.722346368715084, "grad_norm": 0.4621300995349884, "learning_rate": 0.0006659943977591037, "loss": 0.3065, "step": 12033 }, { "epoch": 6.722905027932961, "grad_norm": 1.4029648303985596, "learning_rate": 0.0006659663865546218, "loss": 0.4561, "step": 12034 }, { "epoch": 6.723463687150838, "grad_norm": 0.46591830253601074, "learning_rate": 0.0006659383753501401, "loss": 0.4577, "step": 12035 }, { "epoch": 6.724022346368715, "grad_norm": 1.3738367557525635, "learning_rate": 0.0006659103641456583, "loss": 0.4709, "step": 12036 }, { "epoch": 6.724581005586592, "grad_norm": 0.6199467182159424, "learning_rate": 0.0006658823529411765, "loss": 0.43, "step": 12037 }, { "epoch": 6.72513966480447, "grad_norm": 0.47354018688201904, "learning_rate": 0.0006658543417366947, "loss": 0.4562, "step": 12038 }, { "epoch": 6.725698324022346, "grad_norm": 0.46467769145965576, "learning_rate": 0.0006658263305322128, "loss": 0.3213, "step": 12039 }, { "epoch": 6.726256983240224, "grad_norm": 0.4763118028640747, "learning_rate": 0.0006657983193277311, "loss": 0.4166, "step": 12040 }, { "epoch": 6.7268156424581, "grad_norm": 0.4471207857131958, "learning_rate": 0.0006657703081232493, "loss": 0.398, "step": 12041 }, { "epoch": 6.727374301675978, "grad_norm": 0.7418835163116455, "learning_rate": 0.0006657422969187676, "loss": 0.4962, "step": 12042 }, { "epoch": 6.727932960893854, "grad_norm": 0.671700656414032, "learning_rate": 0.0006657142857142858, "loss": 0.412, "step": 12043 }, { "epoch": 6.728491620111732, "grad_norm": 1.4614101648330688, "learning_rate": 0.0006656862745098039, "loss": 0.3884, "step": 12044 }, { "epoch": 6.729050279329609, "grad_norm": 0.4766533374786377, "learning_rate": 0.0006656582633053222, "loss": 0.3857, "step": 12045 }, { "epoch": 6.729608938547486, "grad_norm": 0.4485678970813751, "learning_rate": 0.0006656302521008404, "loss": 0.4191, "step": 12046 }, { "epoch": 6.730167597765363, "grad_norm": 0.5006360411643982, "learning_rate": 0.0006656022408963586, "loss": 0.3175, "step": 12047 }, { "epoch": 6.730726256983241, "grad_norm": 0.707591712474823, "learning_rate": 0.0006655742296918768, "loss": 0.4525, "step": 12048 }, { "epoch": 6.731284916201117, "grad_norm": 0.6048533916473389, "learning_rate": 0.000665546218487395, "loss": 0.4362, "step": 12049 }, { "epoch": 6.731843575418995, "grad_norm": 0.5978702306747437, "learning_rate": 0.0006655182072829131, "loss": 0.5811, "step": 12050 }, { "epoch": 6.732402234636871, "grad_norm": 0.5664288997650146, "learning_rate": 0.0006654901960784314, "loss": 0.4725, "step": 12051 }, { "epoch": 6.732960893854749, "grad_norm": 0.40197309851646423, "learning_rate": 0.0006654621848739496, "loss": 0.3741, "step": 12052 }, { "epoch": 6.733519553072625, "grad_norm": 0.8479698896408081, "learning_rate": 0.0006654341736694678, "loss": 0.5441, "step": 12053 }, { "epoch": 6.734078212290503, "grad_norm": 1.4909826517105103, "learning_rate": 0.000665406162464986, "loss": 0.6403, "step": 12054 }, { "epoch": 6.73463687150838, "grad_norm": 0.5784264206886292, "learning_rate": 0.0006653781512605041, "loss": 0.4866, "step": 12055 }, { "epoch": 6.735195530726257, "grad_norm": 1.651665210723877, "learning_rate": 0.0006653501400560224, "loss": 0.4543, "step": 12056 }, { "epoch": 6.735754189944134, "grad_norm": 2.030379295349121, "learning_rate": 0.0006653221288515406, "loss": 0.4316, "step": 12057 }, { "epoch": 6.736312849162011, "grad_norm": 0.4977875351905823, "learning_rate": 0.0006652941176470588, "loss": 0.4297, "step": 12058 }, { "epoch": 6.736871508379888, "grad_norm": 0.7647444009780884, "learning_rate": 0.000665266106442577, "loss": 0.5852, "step": 12059 }, { "epoch": 6.737430167597766, "grad_norm": 0.46734631061553955, "learning_rate": 0.0006652380952380952, "loss": 0.4704, "step": 12060 }, { "epoch": 6.737988826815642, "grad_norm": 2.0098142623901367, "learning_rate": 0.0006652100840336135, "loss": 0.5337, "step": 12061 }, { "epoch": 6.73854748603352, "grad_norm": 0.5219240784645081, "learning_rate": 0.0006651820728291317, "loss": 0.5184, "step": 12062 }, { "epoch": 6.739106145251396, "grad_norm": 0.8293431401252747, "learning_rate": 0.0006651540616246499, "loss": 0.3782, "step": 12063 }, { "epoch": 6.739664804469274, "grad_norm": 0.7018773555755615, "learning_rate": 0.0006651260504201681, "loss": 0.5091, "step": 12064 }, { "epoch": 6.740223463687151, "grad_norm": 0.5579187273979187, "learning_rate": 0.0006650980392156863, "loss": 0.5382, "step": 12065 }, { "epoch": 6.740782122905028, "grad_norm": 0.43822458386421204, "learning_rate": 0.0006650700280112045, "loss": 0.4922, "step": 12066 }, { "epoch": 6.741340782122905, "grad_norm": 1.3476670980453491, "learning_rate": 0.0006650420168067227, "loss": 0.5205, "step": 12067 }, { "epoch": 6.741899441340782, "grad_norm": 0.5670324563980103, "learning_rate": 0.0006650140056022409, "loss": 0.4844, "step": 12068 }, { "epoch": 6.742458100558659, "grad_norm": 0.4712945818901062, "learning_rate": 0.0006649859943977591, "loss": 0.4032, "step": 12069 }, { "epoch": 6.743016759776537, "grad_norm": 0.4902419447898865, "learning_rate": 0.0006649579831932773, "loss": 0.3289, "step": 12070 }, { "epoch": 6.743575418994413, "grad_norm": 0.7132523059844971, "learning_rate": 0.0006649299719887955, "loss": 0.4696, "step": 12071 }, { "epoch": 6.744134078212291, "grad_norm": 0.48290959000587463, "learning_rate": 0.0006649019607843137, "loss": 0.464, "step": 12072 }, { "epoch": 6.744692737430167, "grad_norm": 0.4635353684425354, "learning_rate": 0.0006648739495798319, "loss": 0.4492, "step": 12073 }, { "epoch": 6.745251396648045, "grad_norm": 0.6393581032752991, "learning_rate": 0.0006648459383753501, "loss": 0.4542, "step": 12074 }, { "epoch": 6.745810055865922, "grad_norm": 0.47961464524269104, "learning_rate": 0.0006648179271708683, "loss": 0.5034, "step": 12075 }, { "epoch": 6.746368715083799, "grad_norm": 1.6072580814361572, "learning_rate": 0.0006647899159663866, "loss": 0.3757, "step": 12076 }, { "epoch": 6.746927374301676, "grad_norm": 0.53092360496521, "learning_rate": 0.0006647619047619048, "loss": 0.3748, "step": 12077 }, { "epoch": 6.747486033519553, "grad_norm": 0.5965705513954163, "learning_rate": 0.000664733893557423, "loss": 0.4304, "step": 12078 }, { "epoch": 6.74804469273743, "grad_norm": 1.8006287813186646, "learning_rate": 0.0006647058823529412, "loss": 0.4139, "step": 12079 }, { "epoch": 6.748603351955307, "grad_norm": 0.6920422911643982, "learning_rate": 0.0006646778711484594, "loss": 0.4542, "step": 12080 }, { "epoch": 6.749162011173184, "grad_norm": 0.4300786256790161, "learning_rate": 0.0006646498599439777, "loss": 0.4624, "step": 12081 }, { "epoch": 6.749720670391062, "grad_norm": 0.39852672815322876, "learning_rate": 0.0006646218487394958, "loss": 0.3792, "step": 12082 }, { "epoch": 6.750279329608938, "grad_norm": 1.3067153692245483, "learning_rate": 0.000664593837535014, "loss": 0.4524, "step": 12083 }, { "epoch": 6.750837988826816, "grad_norm": 0.5748780369758606, "learning_rate": 0.0006645658263305322, "loss": 0.4365, "step": 12084 }, { "epoch": 6.751396648044693, "grad_norm": 0.6959697604179382, "learning_rate": 0.0006645378151260504, "loss": 0.5574, "step": 12085 }, { "epoch": 6.75195530726257, "grad_norm": 0.5242602229118347, "learning_rate": 0.0006645098039215687, "loss": 0.4369, "step": 12086 }, { "epoch": 6.752513966480447, "grad_norm": 1.3811161518096924, "learning_rate": 0.0006644817927170868, "loss": 0.4515, "step": 12087 }, { "epoch": 6.753072625698324, "grad_norm": 0.5280629396438599, "learning_rate": 0.000664453781512605, "loss": 0.4773, "step": 12088 }, { "epoch": 6.753631284916201, "grad_norm": 0.41292819380760193, "learning_rate": 0.0006644257703081232, "loss": 0.4881, "step": 12089 }, { "epoch": 6.754189944134078, "grad_norm": 0.4884382486343384, "learning_rate": 0.0006643977591036414, "loss": 0.3855, "step": 12090 }, { "epoch": 6.754748603351955, "grad_norm": 0.5977643132209778, "learning_rate": 0.0006643697478991598, "loss": 0.4134, "step": 12091 }, { "epoch": 6.755307262569833, "grad_norm": 0.5409367084503174, "learning_rate": 0.0006643417366946779, "loss": 0.4497, "step": 12092 }, { "epoch": 6.755865921787709, "grad_norm": 0.5569071173667908, "learning_rate": 0.0006643137254901961, "loss": 0.4372, "step": 12093 }, { "epoch": 6.756424581005587, "grad_norm": 1.4106059074401855, "learning_rate": 0.0006642857142857143, "loss": 0.3422, "step": 12094 }, { "epoch": 6.756983240223463, "grad_norm": 0.46886613965034485, "learning_rate": 0.0006642577030812325, "loss": 0.4842, "step": 12095 }, { "epoch": 6.757541899441341, "grad_norm": 0.601372480392456, "learning_rate": 0.0006642296918767508, "loss": 0.5204, "step": 12096 }, { "epoch": 6.758100558659218, "grad_norm": 0.5059488415718079, "learning_rate": 0.000664201680672269, "loss": 0.3942, "step": 12097 }, { "epoch": 6.758659217877095, "grad_norm": 0.7449119091033936, "learning_rate": 0.0006641736694677871, "loss": 0.4941, "step": 12098 }, { "epoch": 6.759217877094972, "grad_norm": 0.46882739663124084, "learning_rate": 0.0006641456582633053, "loss": 0.431, "step": 12099 }, { "epoch": 6.759776536312849, "grad_norm": 0.7931537628173828, "learning_rate": 0.0006641176470588235, "loss": 0.472, "step": 12100 }, { "epoch": 6.760335195530726, "grad_norm": 0.45187756419181824, "learning_rate": 0.0006640896358543418, "loss": 0.575, "step": 12101 }, { "epoch": 6.760893854748604, "grad_norm": 0.5821505188941956, "learning_rate": 0.00066406162464986, "loss": 0.4749, "step": 12102 }, { "epoch": 6.76145251396648, "grad_norm": 1.4876537322998047, "learning_rate": 0.0006640336134453781, "loss": 0.4754, "step": 12103 }, { "epoch": 6.762011173184358, "grad_norm": 0.866144061088562, "learning_rate": 0.0006640056022408963, "loss": 0.4289, "step": 12104 }, { "epoch": 6.762569832402234, "grad_norm": 0.4579249620437622, "learning_rate": 0.0006639775910364145, "loss": 0.5009, "step": 12105 }, { "epoch": 6.763128491620112, "grad_norm": 0.6374160051345825, "learning_rate": 0.0006639495798319328, "loss": 0.3892, "step": 12106 }, { "epoch": 6.763687150837989, "grad_norm": 0.650264322757721, "learning_rate": 0.000663921568627451, "loss": 0.4387, "step": 12107 }, { "epoch": 6.764245810055866, "grad_norm": 0.6742350459098816, "learning_rate": 0.0006638935574229691, "loss": 0.5609, "step": 12108 }, { "epoch": 6.764804469273743, "grad_norm": 0.4441060423851013, "learning_rate": 0.0006638655462184874, "loss": 0.4389, "step": 12109 }, { "epoch": 6.76536312849162, "grad_norm": 0.4060734510421753, "learning_rate": 0.0006638375350140056, "loss": 0.4307, "step": 12110 }, { "epoch": 6.765921787709497, "grad_norm": 0.4801897406578064, "learning_rate": 0.0006638095238095239, "loss": 0.4521, "step": 12111 }, { "epoch": 6.766480446927375, "grad_norm": 2.5180137157440186, "learning_rate": 0.0006637815126050421, "loss": 0.5442, "step": 12112 }, { "epoch": 6.767039106145251, "grad_norm": 0.43988385796546936, "learning_rate": 0.0006637535014005603, "loss": 0.5378, "step": 12113 }, { "epoch": 6.767597765363129, "grad_norm": 0.5194052457809448, "learning_rate": 0.0006637254901960784, "loss": 0.4361, "step": 12114 }, { "epoch": 6.768156424581005, "grad_norm": 0.6256169080734253, "learning_rate": 0.0006636974789915966, "loss": 0.454, "step": 12115 }, { "epoch": 6.768715083798883, "grad_norm": 0.4615011215209961, "learning_rate": 0.0006636694677871149, "loss": 0.4668, "step": 12116 }, { "epoch": 6.769273743016759, "grad_norm": 0.4563002586364746, "learning_rate": 0.0006636414565826331, "loss": 0.3617, "step": 12117 }, { "epoch": 6.769832402234637, "grad_norm": 1.2184298038482666, "learning_rate": 0.0006636134453781513, "loss": 0.3967, "step": 12118 }, { "epoch": 6.770391061452514, "grad_norm": 0.7010212540626526, "learning_rate": 0.0006635854341736694, "loss": 0.4723, "step": 12119 }, { "epoch": 6.770949720670391, "grad_norm": 0.4014045298099518, "learning_rate": 0.0006635574229691876, "loss": 0.3291, "step": 12120 }, { "epoch": 6.771508379888268, "grad_norm": 0.43545007705688477, "learning_rate": 0.0006635294117647059, "loss": 0.4718, "step": 12121 }, { "epoch": 6.772067039106146, "grad_norm": 0.48875871300697327, "learning_rate": 0.0006635014005602241, "loss": 0.4272, "step": 12122 }, { "epoch": 6.772625698324022, "grad_norm": 0.6607202291488647, "learning_rate": 0.0006634733893557423, "loss": 0.4756, "step": 12123 }, { "epoch": 6.7731843575419, "grad_norm": 1.0386394262313843, "learning_rate": 0.0006634453781512604, "loss": 0.4069, "step": 12124 }, { "epoch": 6.773743016759776, "grad_norm": 1.3962101936340332, "learning_rate": 0.0006634173669467786, "loss": 0.3741, "step": 12125 }, { "epoch": 6.774301675977654, "grad_norm": 0.5029513835906982, "learning_rate": 0.000663389355742297, "loss": 0.5083, "step": 12126 }, { "epoch": 6.77486033519553, "grad_norm": 0.5117112398147583, "learning_rate": 0.0006633613445378152, "loss": 0.4086, "step": 12127 }, { "epoch": 6.775418994413408, "grad_norm": 1.061491847038269, "learning_rate": 0.0006633333333333334, "loss": 0.3721, "step": 12128 }, { "epoch": 6.775977653631285, "grad_norm": 1.292675256729126, "learning_rate": 0.0006633053221288516, "loss": 0.446, "step": 12129 }, { "epoch": 6.776536312849162, "grad_norm": 0.5468816757202148, "learning_rate": 0.0006632773109243697, "loss": 0.4314, "step": 12130 }, { "epoch": 6.777094972067039, "grad_norm": 0.594006359577179, "learning_rate": 0.000663249299719888, "loss": 0.4042, "step": 12131 }, { "epoch": 6.777653631284916, "grad_norm": 0.34831440448760986, "learning_rate": 0.0006632212885154062, "loss": 0.3854, "step": 12132 }, { "epoch": 6.778212290502793, "grad_norm": 0.6102625727653503, "learning_rate": 0.0006631932773109244, "loss": 0.5423, "step": 12133 }, { "epoch": 6.778770949720671, "grad_norm": 0.4647621214389801, "learning_rate": 0.0006631652661064426, "loss": 0.4341, "step": 12134 }, { "epoch": 6.779329608938547, "grad_norm": 0.4577479958534241, "learning_rate": 0.0006631372549019607, "loss": 0.358, "step": 12135 }, { "epoch": 6.779888268156425, "grad_norm": 0.504641592502594, "learning_rate": 0.000663109243697479, "loss": 0.3962, "step": 12136 }, { "epoch": 6.780446927374301, "grad_norm": 3.415985584259033, "learning_rate": 0.0006630812324929972, "loss": 0.4264, "step": 12137 }, { "epoch": 6.781005586592179, "grad_norm": 6.4437713623046875, "learning_rate": 0.0006630532212885154, "loss": 0.434, "step": 12138 }, { "epoch": 6.781564245810056, "grad_norm": 3.226740837097168, "learning_rate": 0.0006630252100840336, "loss": 0.4523, "step": 12139 }, { "epoch": 6.782122905027933, "grad_norm": 0.8114117980003357, "learning_rate": 0.0006629971988795517, "loss": 0.4896, "step": 12140 }, { "epoch": 6.78268156424581, "grad_norm": 0.7347950339317322, "learning_rate": 0.00066296918767507, "loss": 0.4357, "step": 12141 }, { "epoch": 6.783240223463687, "grad_norm": 1.5617716312408447, "learning_rate": 0.0006629411764705883, "loss": 0.4382, "step": 12142 }, { "epoch": 6.783798882681564, "grad_norm": 2.4106078147888184, "learning_rate": 0.0006629131652661065, "loss": 0.3704, "step": 12143 }, { "epoch": 6.784357541899441, "grad_norm": 0.8199379444122314, "learning_rate": 0.0006628851540616247, "loss": 0.4761, "step": 12144 }, { "epoch": 6.784916201117318, "grad_norm": 0.46194446086883545, "learning_rate": 0.0006628571428571429, "loss": 0.4813, "step": 12145 }, { "epoch": 6.785474860335196, "grad_norm": 0.7091647982597351, "learning_rate": 0.0006628291316526611, "loss": 0.4157, "step": 12146 }, { "epoch": 6.786033519553072, "grad_norm": 0.5812698006629944, "learning_rate": 0.0006628011204481793, "loss": 0.4228, "step": 12147 }, { "epoch": 6.78659217877095, "grad_norm": 0.39596813917160034, "learning_rate": 0.0006627731092436975, "loss": 0.4331, "step": 12148 }, { "epoch": 6.787150837988827, "grad_norm": 0.5265296697616577, "learning_rate": 0.0006627450980392157, "loss": 0.5176, "step": 12149 }, { "epoch": 6.787709497206704, "grad_norm": 0.38072067499160767, "learning_rate": 0.0006627170868347339, "loss": 0.2953, "step": 12150 }, { "epoch": 6.788268156424581, "grad_norm": 0.7729352712631226, "learning_rate": 0.0006626890756302521, "loss": 0.6336, "step": 12151 }, { "epoch": 6.788826815642458, "grad_norm": 0.4469284415245056, "learning_rate": 0.0006626610644257703, "loss": 0.3789, "step": 12152 }, { "epoch": 6.789385474860335, "grad_norm": 0.3830004632472992, "learning_rate": 0.0006626330532212885, "loss": 0.4208, "step": 12153 }, { "epoch": 6.789944134078212, "grad_norm": 0.4384060502052307, "learning_rate": 0.0006626050420168067, "loss": 0.4982, "step": 12154 }, { "epoch": 6.790502793296089, "grad_norm": 0.4818457365036011, "learning_rate": 0.0006625770308123249, "loss": 0.6442, "step": 12155 }, { "epoch": 6.791061452513967, "grad_norm": 0.780181348323822, "learning_rate": 0.0006625490196078431, "loss": 0.3893, "step": 12156 }, { "epoch": 6.791620111731843, "grad_norm": 0.5324886441230774, "learning_rate": 0.0006625210084033613, "loss": 0.5591, "step": 12157 }, { "epoch": 6.792178770949721, "grad_norm": 0.6413914561271667, "learning_rate": 0.0006624929971988796, "loss": 0.4302, "step": 12158 }, { "epoch": 6.792737430167598, "grad_norm": 0.5562315583229065, "learning_rate": 0.0006624649859943978, "loss": 0.3592, "step": 12159 }, { "epoch": 6.793296089385475, "grad_norm": 0.4010394811630249, "learning_rate": 0.000662436974789916, "loss": 0.402, "step": 12160 }, { "epoch": 6.793854748603352, "grad_norm": 0.6908612251281738, "learning_rate": 0.0006624089635854343, "loss": 0.5004, "step": 12161 }, { "epoch": 6.794413407821229, "grad_norm": 0.48671984672546387, "learning_rate": 0.0006623809523809524, "loss": 0.4387, "step": 12162 }, { "epoch": 6.794972067039106, "grad_norm": 10.109496116638184, "learning_rate": 0.0006623529411764706, "loss": 0.4356, "step": 12163 }, { "epoch": 6.795530726256983, "grad_norm": 0.5088633894920349, "learning_rate": 0.0006623249299719888, "loss": 0.3064, "step": 12164 }, { "epoch": 6.79608938547486, "grad_norm": 0.4546014368534088, "learning_rate": 0.000662296918767507, "loss": 0.5701, "step": 12165 }, { "epoch": 6.796648044692738, "grad_norm": 1.4646694660186768, "learning_rate": 0.0006622689075630253, "loss": 0.5189, "step": 12166 }, { "epoch": 6.797206703910614, "grad_norm": 0.4038674533367157, "learning_rate": 0.0006622408963585434, "loss": 0.3389, "step": 12167 }, { "epoch": 6.797765363128492, "grad_norm": 1.1339504718780518, "learning_rate": 0.0006622128851540616, "loss": 0.6742, "step": 12168 }, { "epoch": 6.798324022346368, "grad_norm": 0.6354513168334961, "learning_rate": 0.0006621848739495798, "loss": 0.4754, "step": 12169 }, { "epoch": 6.798882681564246, "grad_norm": 0.4913661777973175, "learning_rate": 0.000662156862745098, "loss": 0.4298, "step": 12170 }, { "epoch": 6.799441340782123, "grad_norm": 0.469773530960083, "learning_rate": 0.0006621288515406163, "loss": 0.5205, "step": 12171 }, { "epoch": 6.8, "grad_norm": 0.5128031969070435, "learning_rate": 0.0006621008403361344, "loss": 0.3146, "step": 12172 }, { "epoch": 6.800558659217877, "grad_norm": 4.143674850463867, "learning_rate": 0.0006620728291316526, "loss": 0.383, "step": 12173 }, { "epoch": 6.801117318435754, "grad_norm": 0.6529032588005066, "learning_rate": 0.0006620448179271709, "loss": 0.5017, "step": 12174 }, { "epoch": 6.801675977653631, "grad_norm": 0.5830149054527283, "learning_rate": 0.0006620168067226891, "loss": 0.474, "step": 12175 }, { "epoch": 6.802234636871509, "grad_norm": 0.6475193500518799, "learning_rate": 0.0006619887955182074, "loss": 0.411, "step": 12176 }, { "epoch": 6.802793296089385, "grad_norm": 0.5908905267715454, "learning_rate": 0.0006619607843137256, "loss": 0.419, "step": 12177 }, { "epoch": 6.803351955307263, "grad_norm": 0.47705554962158203, "learning_rate": 0.0006619327731092437, "loss": 0.494, "step": 12178 }, { "epoch": 6.803910614525139, "grad_norm": 0.5022670030593872, "learning_rate": 0.0006619047619047619, "loss": 0.5003, "step": 12179 }, { "epoch": 6.804469273743017, "grad_norm": 0.47296231985092163, "learning_rate": 0.0006618767507002801, "loss": 0.5382, "step": 12180 }, { "epoch": 6.805027932960893, "grad_norm": 0.5288333296775818, "learning_rate": 0.0006618487394957984, "loss": 0.4106, "step": 12181 }, { "epoch": 6.805586592178771, "grad_norm": 0.36206546425819397, "learning_rate": 0.0006618207282913166, "loss": 0.4195, "step": 12182 }, { "epoch": 6.806145251396648, "grad_norm": 1.007214069366455, "learning_rate": 0.0006617927170868347, "loss": 0.4235, "step": 12183 }, { "epoch": 6.806703910614525, "grad_norm": 0.46468690037727356, "learning_rate": 0.0006617647058823529, "loss": 0.3398, "step": 12184 }, { "epoch": 6.807262569832402, "grad_norm": 0.5715383887290955, "learning_rate": 0.0006617366946778711, "loss": 0.5331, "step": 12185 }, { "epoch": 6.80782122905028, "grad_norm": 0.48019129037857056, "learning_rate": 0.0006617086834733894, "loss": 0.4826, "step": 12186 }, { "epoch": 6.808379888268156, "grad_norm": 1.925338864326477, "learning_rate": 0.0006616806722689076, "loss": 0.4267, "step": 12187 }, { "epoch": 6.808938547486034, "grad_norm": 0.42642077803611755, "learning_rate": 0.0006616526610644257, "loss": 0.4655, "step": 12188 }, { "epoch": 6.80949720670391, "grad_norm": 0.5507749915122986, "learning_rate": 0.0006616246498599439, "loss": 0.3932, "step": 12189 }, { "epoch": 6.810055865921788, "grad_norm": 0.7477067112922668, "learning_rate": 0.0006615966386554621, "loss": 0.4779, "step": 12190 }, { "epoch": 6.810614525139664, "grad_norm": 0.4381631314754486, "learning_rate": 0.0006615686274509805, "loss": 0.3774, "step": 12191 }, { "epoch": 6.811173184357542, "grad_norm": 0.561799168586731, "learning_rate": 0.0006615406162464987, "loss": 0.5242, "step": 12192 }, { "epoch": 6.811731843575419, "grad_norm": 0.4435920715332031, "learning_rate": 0.0006615126050420169, "loss": 0.4929, "step": 12193 }, { "epoch": 6.812290502793296, "grad_norm": 0.4417363703250885, "learning_rate": 0.000661484593837535, "loss": 0.4342, "step": 12194 }, { "epoch": 6.812849162011173, "grad_norm": 0.5318664908409119, "learning_rate": 0.0006614565826330532, "loss": 0.4526, "step": 12195 }, { "epoch": 6.813407821229051, "grad_norm": 0.4912884831428528, "learning_rate": 0.0006614285714285715, "loss": 0.6299, "step": 12196 }, { "epoch": 6.813966480446927, "grad_norm": 0.44741302728652954, "learning_rate": 0.0006614005602240897, "loss": 0.4084, "step": 12197 }, { "epoch": 6.814525139664805, "grad_norm": 0.4176372289657593, "learning_rate": 0.0006613725490196079, "loss": 0.4655, "step": 12198 }, { "epoch": 6.815083798882681, "grad_norm": 0.48217812180519104, "learning_rate": 0.000661344537815126, "loss": 0.4032, "step": 12199 }, { "epoch": 6.815642458100559, "grad_norm": 0.4372022747993469, "learning_rate": 0.0006613165266106442, "loss": 0.507, "step": 12200 }, { "epoch": 6.816201117318435, "grad_norm": 0.7491111159324646, "learning_rate": 0.0006612885154061625, "loss": 0.5372, "step": 12201 }, { "epoch": 6.816759776536313, "grad_norm": 1.053197979927063, "learning_rate": 0.0006612605042016807, "loss": 0.5283, "step": 12202 }, { "epoch": 6.81731843575419, "grad_norm": 1.8735992908477783, "learning_rate": 0.0006612324929971989, "loss": 0.3841, "step": 12203 }, { "epoch": 6.817877094972067, "grad_norm": 2.15081524848938, "learning_rate": 0.000661204481792717, "loss": 0.5999, "step": 12204 }, { "epoch": 6.818435754189944, "grad_norm": 0.4968929886817932, "learning_rate": 0.0006611764705882352, "loss": 0.4864, "step": 12205 }, { "epoch": 6.818994413407821, "grad_norm": 0.548363983631134, "learning_rate": 0.0006611484593837536, "loss": 0.4815, "step": 12206 }, { "epoch": 6.819553072625698, "grad_norm": 0.5376548171043396, "learning_rate": 0.0006611204481792718, "loss": 0.462, "step": 12207 }, { "epoch": 6.820111731843576, "grad_norm": 1.8746628761291504, "learning_rate": 0.00066109243697479, "loss": 0.3926, "step": 12208 }, { "epoch": 6.820670391061452, "grad_norm": 0.4753383994102478, "learning_rate": 0.0006610644257703082, "loss": 0.4537, "step": 12209 }, { "epoch": 6.82122905027933, "grad_norm": 0.5809012651443481, "learning_rate": 0.0006610364145658263, "loss": 0.4553, "step": 12210 }, { "epoch": 6.821787709497206, "grad_norm": 0.6956520676612854, "learning_rate": 0.0006610084033613446, "loss": 0.4233, "step": 12211 }, { "epoch": 6.822346368715084, "grad_norm": 0.5689812302589417, "learning_rate": 0.0006609803921568628, "loss": 0.5841, "step": 12212 }, { "epoch": 6.822905027932961, "grad_norm": 0.7909679412841797, "learning_rate": 0.000660952380952381, "loss": 0.5484, "step": 12213 }, { "epoch": 6.823463687150838, "grad_norm": 0.5619382858276367, "learning_rate": 0.0006609243697478992, "loss": 0.341, "step": 12214 }, { "epoch": 6.824022346368715, "grad_norm": 0.41307708621025085, "learning_rate": 0.0006608963585434173, "loss": 0.431, "step": 12215 }, { "epoch": 6.824581005586592, "grad_norm": 0.4418524205684662, "learning_rate": 0.0006608683473389356, "loss": 0.3833, "step": 12216 }, { "epoch": 6.825139664804469, "grad_norm": 0.9368011355400085, "learning_rate": 0.0006608403361344538, "loss": 0.4328, "step": 12217 }, { "epoch": 6.825698324022346, "grad_norm": 0.8204415440559387, "learning_rate": 0.000660812324929972, "loss": 0.5172, "step": 12218 }, { "epoch": 6.826256983240223, "grad_norm": 0.90938800573349, "learning_rate": 0.0006607843137254902, "loss": 0.431, "step": 12219 }, { "epoch": 6.826815642458101, "grad_norm": 0.48122236132621765, "learning_rate": 0.0006607563025210083, "loss": 0.3777, "step": 12220 }, { "epoch": 6.827374301675977, "grad_norm": 0.4968211054801941, "learning_rate": 0.0006607282913165266, "loss": 0.5716, "step": 12221 }, { "epoch": 6.827932960893855, "grad_norm": 0.4477780759334564, "learning_rate": 0.0006607002801120448, "loss": 0.4006, "step": 12222 }, { "epoch": 6.828491620111732, "grad_norm": 0.5600734949111938, "learning_rate": 0.000660672268907563, "loss": 0.5963, "step": 12223 }, { "epoch": 6.829050279329609, "grad_norm": 1.032914400100708, "learning_rate": 0.0006606442577030813, "loss": 0.4769, "step": 12224 }, { "epoch": 6.829608938547486, "grad_norm": 0.7054173946380615, "learning_rate": 0.0006606162464985995, "loss": 0.3962, "step": 12225 }, { "epoch": 6.830167597765363, "grad_norm": 2.3067338466644287, "learning_rate": 0.0006605882352941177, "loss": 0.589, "step": 12226 }, { "epoch": 6.83072625698324, "grad_norm": 0.525468647480011, "learning_rate": 0.0006605602240896359, "loss": 0.4507, "step": 12227 }, { "epoch": 6.831284916201117, "grad_norm": 0.4376436471939087, "learning_rate": 0.0006605322128851541, "loss": 0.4141, "step": 12228 }, { "epoch": 6.831843575418994, "grad_norm": 2.277660846710205, "learning_rate": 0.0006605042016806723, "loss": 0.4964, "step": 12229 }, { "epoch": 6.832402234636872, "grad_norm": 0.5358434319496155, "learning_rate": 0.0006604761904761905, "loss": 0.4127, "step": 12230 }, { "epoch": 6.832960893854748, "grad_norm": 0.5851360559463501, "learning_rate": 0.0006604481792717087, "loss": 0.4064, "step": 12231 }, { "epoch": 6.833519553072626, "grad_norm": 0.3869569003582001, "learning_rate": 0.0006604201680672269, "loss": 0.4763, "step": 12232 }, { "epoch": 6.834078212290503, "grad_norm": 1.0456703901290894, "learning_rate": 0.0006603921568627451, "loss": 0.6841, "step": 12233 }, { "epoch": 6.83463687150838, "grad_norm": 0.45951998233795166, "learning_rate": 0.0006603641456582633, "loss": 0.4759, "step": 12234 }, { "epoch": 6.835195530726257, "grad_norm": 0.4701312780380249, "learning_rate": 0.0006603361344537815, "loss": 0.5475, "step": 12235 }, { "epoch": 6.835754189944134, "grad_norm": 0.513654887676239, "learning_rate": 0.0006603081232492998, "loss": 0.4243, "step": 12236 }, { "epoch": 6.836312849162011, "grad_norm": 0.6751559972763062, "learning_rate": 0.0006602801120448179, "loss": 0.4939, "step": 12237 }, { "epoch": 6.836871508379888, "grad_norm": 0.5372594594955444, "learning_rate": 0.0006602521008403361, "loss": 0.486, "step": 12238 }, { "epoch": 6.837430167597765, "grad_norm": 2.6426103115081787, "learning_rate": 0.0006602240896358543, "loss": 0.5077, "step": 12239 }, { "epoch": 6.837988826815643, "grad_norm": 0.5769447684288025, "learning_rate": 0.0006601960784313726, "loss": 0.5089, "step": 12240 }, { "epoch": 6.838547486033519, "grad_norm": 0.6189960241317749, "learning_rate": 0.0006601680672268909, "loss": 0.4863, "step": 12241 }, { "epoch": 6.839106145251397, "grad_norm": 0.8525975346565247, "learning_rate": 0.000660140056022409, "loss": 0.3988, "step": 12242 }, { "epoch": 6.839664804469273, "grad_norm": 0.4903383255004883, "learning_rate": 0.0006601120448179272, "loss": 0.479, "step": 12243 }, { "epoch": 6.840223463687151, "grad_norm": 1.2140885591506958, "learning_rate": 0.0006600840336134454, "loss": 0.5013, "step": 12244 }, { "epoch": 6.840782122905028, "grad_norm": 1.1946229934692383, "learning_rate": 0.0006600560224089636, "loss": 0.4261, "step": 12245 }, { "epoch": 6.841340782122905, "grad_norm": 0.6053236722946167, "learning_rate": 0.0006600280112044819, "loss": 0.6284, "step": 12246 }, { "epoch": 6.841899441340782, "grad_norm": 0.5141407251358032, "learning_rate": 0.00066, "loss": 0.3859, "step": 12247 }, { "epoch": 6.842458100558659, "grad_norm": 0.5617372393608093, "learning_rate": 0.0006599719887955182, "loss": 0.4183, "step": 12248 }, { "epoch": 6.843016759776536, "grad_norm": 0.7290052771568298, "learning_rate": 0.0006599439775910364, "loss": 0.4398, "step": 12249 }, { "epoch": 6.843575418994414, "grad_norm": 0.4437260031700134, "learning_rate": 0.0006599159663865546, "loss": 0.3766, "step": 12250 }, { "epoch": 6.84413407821229, "grad_norm": 0.43845051527023315, "learning_rate": 0.0006598879551820729, "loss": 0.3611, "step": 12251 }, { "epoch": 6.844692737430168, "grad_norm": 0.6631003022193909, "learning_rate": 0.0006598599439775911, "loss": 0.5254, "step": 12252 }, { "epoch": 6.845251396648044, "grad_norm": 0.39586126804351807, "learning_rate": 0.0006598319327731092, "loss": 0.4706, "step": 12253 }, { "epoch": 6.845810055865922, "grad_norm": 0.3851293921470642, "learning_rate": 0.0006598039215686274, "loss": 0.3492, "step": 12254 }, { "epoch": 6.846368715083798, "grad_norm": 0.8514540195465088, "learning_rate": 0.0006597759103641456, "loss": 0.4679, "step": 12255 }, { "epoch": 6.846927374301676, "grad_norm": 0.8617717027664185, "learning_rate": 0.000659747899159664, "loss": 0.3896, "step": 12256 }, { "epoch": 6.847486033519553, "grad_norm": 0.47195181250572205, "learning_rate": 0.0006597198879551822, "loss": 0.5756, "step": 12257 }, { "epoch": 6.84804469273743, "grad_norm": 0.6824280023574829, "learning_rate": 0.0006596918767507003, "loss": 0.4798, "step": 12258 }, { "epoch": 6.848603351955307, "grad_norm": 1.5259748697280884, "learning_rate": 0.0006596638655462185, "loss": 0.4248, "step": 12259 }, { "epoch": 6.849162011173185, "grad_norm": 1.1958094835281372, "learning_rate": 0.0006596358543417367, "loss": 0.5645, "step": 12260 }, { "epoch": 6.849720670391061, "grad_norm": 0.5710233449935913, "learning_rate": 0.000659607843137255, "loss": 0.4509, "step": 12261 }, { "epoch": 6.850279329608939, "grad_norm": 0.6793966889381409, "learning_rate": 0.0006595798319327732, "loss": 0.3582, "step": 12262 }, { "epoch": 6.850837988826815, "grad_norm": 0.4943723678588867, "learning_rate": 0.0006595518207282913, "loss": 0.4839, "step": 12263 }, { "epoch": 6.851396648044693, "grad_norm": 0.6937837600708008, "learning_rate": 0.0006595238095238095, "loss": 0.4814, "step": 12264 }, { "epoch": 6.851955307262569, "grad_norm": 0.5042548179626465, "learning_rate": 0.0006594957983193277, "loss": 0.4474, "step": 12265 }, { "epoch": 6.852513966480447, "grad_norm": 0.6350553035736084, "learning_rate": 0.000659467787114846, "loss": 0.4229, "step": 12266 }, { "epoch": 6.853072625698324, "grad_norm": 0.44231465458869934, "learning_rate": 0.0006594397759103642, "loss": 0.3864, "step": 12267 }, { "epoch": 6.853631284916201, "grad_norm": 0.4294978976249695, "learning_rate": 0.0006594117647058824, "loss": 0.3916, "step": 12268 }, { "epoch": 6.854189944134078, "grad_norm": 1.0935415029525757, "learning_rate": 0.0006593837535014005, "loss": 0.4628, "step": 12269 }, { "epoch": 6.854748603351956, "grad_norm": 0.6725505590438843, "learning_rate": 0.0006593557422969187, "loss": 0.5135, "step": 12270 }, { "epoch": 6.855307262569832, "grad_norm": 0.8837329149246216, "learning_rate": 0.0006593277310924369, "loss": 0.6269, "step": 12271 }, { "epoch": 6.85586592178771, "grad_norm": 0.5046632885932922, "learning_rate": 0.0006592997198879553, "loss": 0.4485, "step": 12272 }, { "epoch": 6.856424581005586, "grad_norm": 0.4662707448005676, "learning_rate": 0.0006592717086834735, "loss": 0.4243, "step": 12273 }, { "epoch": 6.856983240223464, "grad_norm": 0.39036089181900024, "learning_rate": 0.0006592436974789916, "loss": 0.3864, "step": 12274 }, { "epoch": 6.85754189944134, "grad_norm": 0.4357408285140991, "learning_rate": 0.0006592156862745098, "loss": 0.4034, "step": 12275 }, { "epoch": 6.858100558659218, "grad_norm": 0.4454643726348877, "learning_rate": 0.000659187675070028, "loss": 0.3906, "step": 12276 }, { "epoch": 6.858659217877095, "grad_norm": 1.276620864868164, "learning_rate": 0.0006591596638655463, "loss": 0.404, "step": 12277 }, { "epoch": 6.859217877094972, "grad_norm": 0.4004911780357361, "learning_rate": 0.0006591316526610645, "loss": 0.4148, "step": 12278 }, { "epoch": 6.859776536312849, "grad_norm": 0.5384070873260498, "learning_rate": 0.0006591036414565826, "loss": 0.5143, "step": 12279 }, { "epoch": 6.860335195530726, "grad_norm": 0.8736839294433594, "learning_rate": 0.0006590756302521008, "loss": 0.3602, "step": 12280 }, { "epoch": 6.860893854748603, "grad_norm": 0.8059192895889282, "learning_rate": 0.000659047619047619, "loss": 0.4569, "step": 12281 }, { "epoch": 6.861452513966481, "grad_norm": 0.8635017275810242, "learning_rate": 0.0006590196078431373, "loss": 0.5441, "step": 12282 }, { "epoch": 6.862011173184357, "grad_norm": 0.5241972804069519, "learning_rate": 0.0006589915966386555, "loss": 0.4499, "step": 12283 }, { "epoch": 6.862569832402235, "grad_norm": 0.5192648768424988, "learning_rate": 0.0006589635854341737, "loss": 0.4598, "step": 12284 }, { "epoch": 6.863128491620111, "grad_norm": 0.6250534653663635, "learning_rate": 0.0006589355742296918, "loss": 0.3846, "step": 12285 }, { "epoch": 6.863687150837989, "grad_norm": 0.38513192534446716, "learning_rate": 0.00065890756302521, "loss": 0.3629, "step": 12286 }, { "epoch": 6.864245810055866, "grad_norm": 0.5308732986450195, "learning_rate": 0.0006588795518207283, "loss": 0.5477, "step": 12287 }, { "epoch": 6.864804469273743, "grad_norm": 0.4723777770996094, "learning_rate": 0.0006588515406162466, "loss": 0.372, "step": 12288 }, { "epoch": 6.86536312849162, "grad_norm": 0.544681191444397, "learning_rate": 0.0006588235294117648, "loss": 0.5191, "step": 12289 }, { "epoch": 6.865921787709497, "grad_norm": 0.4156869947910309, "learning_rate": 0.0006587955182072829, "loss": 0.3577, "step": 12290 }, { "epoch": 6.866480446927374, "grad_norm": 0.9283410310745239, "learning_rate": 0.0006587675070028011, "loss": 0.4218, "step": 12291 }, { "epoch": 6.867039106145251, "grad_norm": 0.45266541838645935, "learning_rate": 0.0006587394957983194, "loss": 0.3658, "step": 12292 }, { "epoch": 6.867597765363128, "grad_norm": 0.4927558898925781, "learning_rate": 0.0006587114845938376, "loss": 0.3867, "step": 12293 }, { "epoch": 6.868156424581006, "grad_norm": 0.6897336840629578, "learning_rate": 0.0006586834733893558, "loss": 0.4684, "step": 12294 }, { "epoch": 6.868715083798882, "grad_norm": 0.9791338443756104, "learning_rate": 0.0006586554621848739, "loss": 0.4581, "step": 12295 }, { "epoch": 6.86927374301676, "grad_norm": 0.5750959515571594, "learning_rate": 0.0006586274509803921, "loss": 0.3691, "step": 12296 }, { "epoch": 6.869832402234637, "grad_norm": 0.4801139533519745, "learning_rate": 0.0006585994397759104, "loss": 0.4913, "step": 12297 }, { "epoch": 6.870391061452514, "grad_norm": 0.4182223379611969, "learning_rate": 0.0006585714285714286, "loss": 0.4591, "step": 12298 }, { "epoch": 6.870949720670391, "grad_norm": 0.5962969660758972, "learning_rate": 0.0006585434173669468, "loss": 0.4151, "step": 12299 }, { "epoch": 6.871508379888268, "grad_norm": 1.4177026748657227, "learning_rate": 0.000658515406162465, "loss": 0.5151, "step": 12300 }, { "epoch": 6.872067039106145, "grad_norm": 0.6685916185379028, "learning_rate": 0.0006584873949579831, "loss": 0.5699, "step": 12301 }, { "epoch": 6.872625698324022, "grad_norm": 0.5574515461921692, "learning_rate": 0.0006584593837535014, "loss": 0.3825, "step": 12302 }, { "epoch": 6.873184357541899, "grad_norm": 1.0649938583374023, "learning_rate": 0.0006584313725490196, "loss": 0.4321, "step": 12303 }, { "epoch": 6.873743016759777, "grad_norm": 0.517022967338562, "learning_rate": 0.0006584033613445378, "loss": 0.5233, "step": 12304 }, { "epoch": 6.874301675977653, "grad_norm": 0.6731800436973572, "learning_rate": 0.000658375350140056, "loss": 0.603, "step": 12305 }, { "epoch": 6.874860335195531, "grad_norm": 0.4743667542934418, "learning_rate": 0.0006583473389355742, "loss": 0.4186, "step": 12306 }, { "epoch": 6.875418994413408, "grad_norm": 0.5006465315818787, "learning_rate": 0.0006583193277310925, "loss": 0.3138, "step": 12307 }, { "epoch": 6.875977653631285, "grad_norm": 0.5262283086776733, "learning_rate": 0.0006582913165266107, "loss": 0.4515, "step": 12308 }, { "epoch": 6.876536312849162, "grad_norm": 0.6108571887016296, "learning_rate": 0.0006582633053221289, "loss": 0.4945, "step": 12309 }, { "epoch": 6.877094972067039, "grad_norm": 0.795237123966217, "learning_rate": 0.0006582352941176471, "loss": 0.4214, "step": 12310 }, { "epoch": 6.877653631284916, "grad_norm": 0.6529590487480164, "learning_rate": 0.0006582072829131652, "loss": 0.4101, "step": 12311 }, { "epoch": 6.878212290502793, "grad_norm": 0.4235641658306122, "learning_rate": 0.0006581792717086835, "loss": 0.4968, "step": 12312 }, { "epoch": 6.87877094972067, "grad_norm": 0.4552220106124878, "learning_rate": 0.0006581512605042017, "loss": 0.4687, "step": 12313 }, { "epoch": 6.879329608938548, "grad_norm": 0.41594234108924866, "learning_rate": 0.0006581232492997199, "loss": 0.372, "step": 12314 }, { "epoch": 6.879888268156424, "grad_norm": 0.4671579599380493, "learning_rate": 0.0006580952380952381, "loss": 0.4097, "step": 12315 }, { "epoch": 6.880446927374302, "grad_norm": 0.6716263890266418, "learning_rate": 0.0006580672268907563, "loss": 0.6013, "step": 12316 }, { "epoch": 6.881005586592178, "grad_norm": 0.4165213406085968, "learning_rate": 0.0006580392156862745, "loss": 0.4166, "step": 12317 }, { "epoch": 6.881564245810056, "grad_norm": 0.539641261100769, "learning_rate": 0.0006580112044817927, "loss": 0.3606, "step": 12318 }, { "epoch": 6.882122905027933, "grad_norm": 0.7333235740661621, "learning_rate": 0.0006579831932773109, "loss": 0.4156, "step": 12319 }, { "epoch": 6.88268156424581, "grad_norm": 0.39429736137390137, "learning_rate": 0.0006579551820728291, "loss": 0.444, "step": 12320 }, { "epoch": 6.883240223463687, "grad_norm": 0.6604355573654175, "learning_rate": 0.0006579271708683473, "loss": 0.5062, "step": 12321 }, { "epoch": 6.883798882681564, "grad_norm": 0.43606069684028625, "learning_rate": 0.0006578991596638656, "loss": 0.4795, "step": 12322 }, { "epoch": 6.884357541899441, "grad_norm": 0.7540426254272461, "learning_rate": 0.0006578711484593838, "loss": 0.5598, "step": 12323 }, { "epoch": 6.884916201117319, "grad_norm": 0.4999866485595703, "learning_rate": 0.000657843137254902, "loss": 0.4493, "step": 12324 }, { "epoch": 6.885474860335195, "grad_norm": 1.0222454071044922, "learning_rate": 0.0006578151260504202, "loss": 0.4033, "step": 12325 }, { "epoch": 6.886033519553073, "grad_norm": 0.40791088342666626, "learning_rate": 0.0006577871148459384, "loss": 0.3806, "step": 12326 }, { "epoch": 6.886592178770949, "grad_norm": 0.45409032702445984, "learning_rate": 0.0006577591036414566, "loss": 0.4798, "step": 12327 }, { "epoch": 6.887150837988827, "grad_norm": 0.6834008693695068, "learning_rate": 0.0006577310924369748, "loss": 0.3432, "step": 12328 }, { "epoch": 6.8877094972067034, "grad_norm": 3.514155149459839, "learning_rate": 0.000657703081232493, "loss": 0.5613, "step": 12329 }, { "epoch": 6.888268156424581, "grad_norm": 0.44188401103019714, "learning_rate": 0.0006576750700280112, "loss": 0.4209, "step": 12330 }, { "epoch": 6.888826815642458, "grad_norm": 1.5433259010314941, "learning_rate": 0.0006576470588235294, "loss": 0.4409, "step": 12331 }, { "epoch": 6.889385474860335, "grad_norm": 0.7367574572563171, "learning_rate": 0.0006576190476190477, "loss": 0.4655, "step": 12332 }, { "epoch": 6.889944134078212, "grad_norm": 2.3072218894958496, "learning_rate": 0.0006575910364145658, "loss": 0.4912, "step": 12333 }, { "epoch": 6.89050279329609, "grad_norm": 0.5888306498527527, "learning_rate": 0.000657563025210084, "loss": 0.407, "step": 12334 }, { "epoch": 6.891061452513966, "grad_norm": 0.5177647471427917, "learning_rate": 0.0006575350140056022, "loss": 0.4127, "step": 12335 }, { "epoch": 6.891620111731844, "grad_norm": 0.5400302410125732, "learning_rate": 0.0006575070028011204, "loss": 0.4269, "step": 12336 }, { "epoch": 6.89217877094972, "grad_norm": 0.6724732518196106, "learning_rate": 0.0006574789915966388, "loss": 0.4289, "step": 12337 }, { "epoch": 6.892737430167598, "grad_norm": 0.9561570286750793, "learning_rate": 0.0006574509803921569, "loss": 0.5423, "step": 12338 }, { "epoch": 6.8932960893854744, "grad_norm": 0.48202231526374817, "learning_rate": 0.0006574229691876751, "loss": 0.4312, "step": 12339 }, { "epoch": 6.893854748603352, "grad_norm": 1.6430753469467163, "learning_rate": 0.0006573949579831933, "loss": 0.498, "step": 12340 }, { "epoch": 6.894413407821229, "grad_norm": 0.5640503764152527, "learning_rate": 0.0006573669467787115, "loss": 0.3957, "step": 12341 }, { "epoch": 6.894972067039106, "grad_norm": 0.3838455080986023, "learning_rate": 0.0006573389355742298, "loss": 0.4335, "step": 12342 }, { "epoch": 6.895530726256983, "grad_norm": 0.5746883749961853, "learning_rate": 0.0006573109243697479, "loss": 0.3845, "step": 12343 }, { "epoch": 6.896089385474861, "grad_norm": 0.4784086346626282, "learning_rate": 0.0006572829131652661, "loss": 0.4257, "step": 12344 }, { "epoch": 6.896648044692737, "grad_norm": 0.752626359462738, "learning_rate": 0.0006572549019607843, "loss": 0.4444, "step": 12345 }, { "epoch": 6.897206703910615, "grad_norm": 1.0025484561920166, "learning_rate": 0.0006572268907563025, "loss": 0.5299, "step": 12346 }, { "epoch": 6.897765363128491, "grad_norm": 0.7981438636779785, "learning_rate": 0.0006571988795518208, "loss": 0.3938, "step": 12347 }, { "epoch": 6.898324022346369, "grad_norm": 0.5628315210342407, "learning_rate": 0.000657170868347339, "loss": 0.4264, "step": 12348 }, { "epoch": 6.8988826815642454, "grad_norm": 0.5373659729957581, "learning_rate": 0.0006571428571428571, "loss": 0.5799, "step": 12349 }, { "epoch": 6.899441340782123, "grad_norm": 0.6609413623809814, "learning_rate": 0.0006571148459383753, "loss": 0.3921, "step": 12350 }, { "epoch": 6.9, "grad_norm": 0.457053542137146, "learning_rate": 0.0006570868347338935, "loss": 0.3676, "step": 12351 }, { "epoch": 6.900558659217877, "grad_norm": 0.7940835356712341, "learning_rate": 0.0006570588235294118, "loss": 0.4878, "step": 12352 }, { "epoch": 6.901117318435754, "grad_norm": 0.4627826511859894, "learning_rate": 0.00065703081232493, "loss": 0.4674, "step": 12353 }, { "epoch": 6.901675977653631, "grad_norm": 0.6538100242614746, "learning_rate": 0.0006570028011204481, "loss": 0.4804, "step": 12354 }, { "epoch": 6.902234636871508, "grad_norm": 0.943400502204895, "learning_rate": 0.0006569747899159664, "loss": 0.3998, "step": 12355 }, { "epoch": 6.902793296089386, "grad_norm": 0.6363654732704163, "learning_rate": 0.0006569467787114846, "loss": 0.5836, "step": 12356 }, { "epoch": 6.903351955307262, "grad_norm": 0.6421704888343811, "learning_rate": 0.0006569187675070029, "loss": 0.483, "step": 12357 }, { "epoch": 6.90391061452514, "grad_norm": 0.6273989677429199, "learning_rate": 0.0006568907563025211, "loss": 0.6133, "step": 12358 }, { "epoch": 6.9044692737430164, "grad_norm": 0.4673924148082733, "learning_rate": 0.0006568627450980392, "loss": 0.394, "step": 12359 }, { "epoch": 6.905027932960894, "grad_norm": 0.36412474513053894, "learning_rate": 0.0006568347338935574, "loss": 0.4247, "step": 12360 }, { "epoch": 6.905586592178771, "grad_norm": 1.1147143840789795, "learning_rate": 0.0006568067226890756, "loss": 0.5324, "step": 12361 }, { "epoch": 6.906145251396648, "grad_norm": 0.5823172926902771, "learning_rate": 0.0006567787114845939, "loss": 0.4566, "step": 12362 }, { "epoch": 6.906703910614525, "grad_norm": 0.6668529510498047, "learning_rate": 0.0006567507002801121, "loss": 0.36, "step": 12363 }, { "epoch": 6.907262569832402, "grad_norm": 0.5669211149215698, "learning_rate": 0.0006567226890756303, "loss": 0.3919, "step": 12364 }, { "epoch": 6.907821229050279, "grad_norm": 0.5733174085617065, "learning_rate": 0.0006566946778711484, "loss": 0.5691, "step": 12365 }, { "epoch": 6.908379888268156, "grad_norm": 0.5229942798614502, "learning_rate": 0.0006566666666666666, "loss": 0.416, "step": 12366 }, { "epoch": 6.908938547486033, "grad_norm": 0.8055040240287781, "learning_rate": 0.0006566386554621849, "loss": 0.5443, "step": 12367 }, { "epoch": 6.909497206703911, "grad_norm": 0.4818640649318695, "learning_rate": 0.0006566106442577031, "loss": 0.3809, "step": 12368 }, { "epoch": 6.910055865921787, "grad_norm": 0.384899765253067, "learning_rate": 0.0006565826330532213, "loss": 0.4262, "step": 12369 }, { "epoch": 6.910614525139665, "grad_norm": 0.4965118169784546, "learning_rate": 0.0006565546218487394, "loss": 0.4224, "step": 12370 }, { "epoch": 6.911173184357542, "grad_norm": 0.4606805741786957, "learning_rate": 0.0006565266106442576, "loss": 0.5537, "step": 12371 }, { "epoch": 6.911731843575419, "grad_norm": 1.11643385887146, "learning_rate": 0.000656498599439776, "loss": 0.4484, "step": 12372 }, { "epoch": 6.912290502793296, "grad_norm": 0.47402969002723694, "learning_rate": 0.0006564705882352942, "loss": 0.4947, "step": 12373 }, { "epoch": 6.912849162011173, "grad_norm": 0.5438334941864014, "learning_rate": 0.0006564425770308124, "loss": 0.5606, "step": 12374 }, { "epoch": 6.91340782122905, "grad_norm": 0.8218211531639099, "learning_rate": 0.0006564145658263305, "loss": 0.4115, "step": 12375 }, { "epoch": 6.913966480446927, "grad_norm": 0.9032309651374817, "learning_rate": 0.0006563865546218487, "loss": 0.5259, "step": 12376 }, { "epoch": 6.914525139664804, "grad_norm": 0.42247480154037476, "learning_rate": 0.000656358543417367, "loss": 0.4629, "step": 12377 }, { "epoch": 6.915083798882682, "grad_norm": 0.9950020909309387, "learning_rate": 0.0006563305322128852, "loss": 0.4232, "step": 12378 }, { "epoch": 6.915642458100558, "grad_norm": 1.1210905313491821, "learning_rate": 0.0006563025210084034, "loss": 0.5409, "step": 12379 }, { "epoch": 6.916201117318436, "grad_norm": 0.49637311697006226, "learning_rate": 0.0006562745098039216, "loss": 0.4644, "step": 12380 }, { "epoch": 6.9167597765363125, "grad_norm": 0.5367304682731628, "learning_rate": 0.0006562464985994397, "loss": 0.4843, "step": 12381 }, { "epoch": 6.91731843575419, "grad_norm": 0.4905094802379608, "learning_rate": 0.000656218487394958, "loss": 0.5144, "step": 12382 }, { "epoch": 6.917877094972067, "grad_norm": 0.5550079941749573, "learning_rate": 0.0006561904761904762, "loss": 0.5285, "step": 12383 }, { "epoch": 6.918435754189944, "grad_norm": 0.5665770173072815, "learning_rate": 0.0006561624649859944, "loss": 0.5761, "step": 12384 }, { "epoch": 6.918994413407821, "grad_norm": 0.397034615278244, "learning_rate": 0.0006561344537815126, "loss": 0.463, "step": 12385 }, { "epoch": 6.919553072625698, "grad_norm": 0.391889750957489, "learning_rate": 0.0006561064425770307, "loss": 0.3847, "step": 12386 }, { "epoch": 6.920111731843575, "grad_norm": 0.4215538799762726, "learning_rate": 0.000656078431372549, "loss": 0.4007, "step": 12387 }, { "epoch": 6.920670391061453, "grad_norm": 0.49836936593055725, "learning_rate": 0.0006560504201680673, "loss": 0.5631, "step": 12388 }, { "epoch": 6.921229050279329, "grad_norm": 0.3793417811393738, "learning_rate": 0.0006560224089635855, "loss": 0.4234, "step": 12389 }, { "epoch": 6.921787709497207, "grad_norm": 0.3756851851940155, "learning_rate": 0.0006559943977591037, "loss": 0.3831, "step": 12390 }, { "epoch": 6.9223463687150835, "grad_norm": 0.5286829471588135, "learning_rate": 0.0006559663865546218, "loss": 0.4984, "step": 12391 }, { "epoch": 6.922905027932961, "grad_norm": 0.4152444899082184, "learning_rate": 0.0006559383753501401, "loss": 0.4575, "step": 12392 }, { "epoch": 6.923463687150838, "grad_norm": 2.9444100856781006, "learning_rate": 0.0006559103641456583, "loss": 0.5293, "step": 12393 }, { "epoch": 6.924022346368715, "grad_norm": 0.5320962071418762, "learning_rate": 0.0006558823529411765, "loss": 0.4551, "step": 12394 }, { "epoch": 6.924581005586592, "grad_norm": 3.482010841369629, "learning_rate": 0.0006558543417366947, "loss": 0.3821, "step": 12395 }, { "epoch": 6.925139664804469, "grad_norm": 0.48805537819862366, "learning_rate": 0.0006558263305322129, "loss": 0.4393, "step": 12396 }, { "epoch": 6.925698324022346, "grad_norm": 1.2725995779037476, "learning_rate": 0.0006557983193277311, "loss": 0.3222, "step": 12397 }, { "epoch": 6.926256983240224, "grad_norm": 0.49517494440078735, "learning_rate": 0.0006557703081232493, "loss": 0.3314, "step": 12398 }, { "epoch": 6.9268156424581, "grad_norm": 0.9687004685401917, "learning_rate": 0.0006557422969187675, "loss": 0.3499, "step": 12399 }, { "epoch": 6.927374301675978, "grad_norm": 0.40634989738464355, "learning_rate": 0.0006557142857142857, "loss": 0.4285, "step": 12400 }, { "epoch": 6.9279329608938545, "grad_norm": 0.7133250832557678, "learning_rate": 0.0006556862745098039, "loss": 0.5531, "step": 12401 }, { "epoch": 6.928491620111732, "grad_norm": 0.5127828121185303, "learning_rate": 0.0006556582633053221, "loss": 0.3871, "step": 12402 }, { "epoch": 6.9290502793296085, "grad_norm": 0.5962216258049011, "learning_rate": 0.0006556302521008403, "loss": 0.4671, "step": 12403 }, { "epoch": 6.929608938547486, "grad_norm": 1.6099315881729126, "learning_rate": 0.0006556022408963586, "loss": 0.3547, "step": 12404 }, { "epoch": 6.930167597765363, "grad_norm": 0.5895669460296631, "learning_rate": 0.0006555742296918768, "loss": 0.3576, "step": 12405 }, { "epoch": 6.93072625698324, "grad_norm": 0.5484691262245178, "learning_rate": 0.000655546218487395, "loss": 0.488, "step": 12406 }, { "epoch": 6.931284916201117, "grad_norm": 2.1635115146636963, "learning_rate": 0.0006555182072829132, "loss": 0.406, "step": 12407 }, { "epoch": 6.931843575418995, "grad_norm": 0.40444448590278625, "learning_rate": 0.0006554901960784314, "loss": 0.4016, "step": 12408 }, { "epoch": 6.932402234636871, "grad_norm": 0.5239859819412231, "learning_rate": 0.0006554621848739496, "loss": 0.5337, "step": 12409 }, { "epoch": 6.932960893854749, "grad_norm": 0.9825257062911987, "learning_rate": 0.0006554341736694678, "loss": 0.5708, "step": 12410 }, { "epoch": 6.9335195530726255, "grad_norm": 0.6810752153396606, "learning_rate": 0.000655406162464986, "loss": 0.6135, "step": 12411 }, { "epoch": 6.934078212290503, "grad_norm": 0.42385968565940857, "learning_rate": 0.0006553781512605043, "loss": 0.4227, "step": 12412 }, { "epoch": 6.9346368715083795, "grad_norm": 0.5549126267433167, "learning_rate": 0.0006553501400560224, "loss": 0.3593, "step": 12413 }, { "epoch": 6.935195530726257, "grad_norm": 0.732172966003418, "learning_rate": 0.0006553221288515406, "loss": 0.4366, "step": 12414 }, { "epoch": 6.935754189944134, "grad_norm": 0.5036166310310364, "learning_rate": 0.0006552941176470588, "loss": 0.5113, "step": 12415 }, { "epoch": 6.936312849162011, "grad_norm": 0.47933119535446167, "learning_rate": 0.000655266106442577, "loss": 0.3737, "step": 12416 }, { "epoch": 6.936871508379888, "grad_norm": 0.45102137327194214, "learning_rate": 0.0006552380952380953, "loss": 0.5188, "step": 12417 }, { "epoch": 6.937430167597765, "grad_norm": 0.4979225695133209, "learning_rate": 0.0006552100840336134, "loss": 0.3399, "step": 12418 }, { "epoch": 6.937988826815642, "grad_norm": 0.41004273295402527, "learning_rate": 0.0006551820728291316, "loss": 0.4299, "step": 12419 }, { "epoch": 6.93854748603352, "grad_norm": 0.6230870485305786, "learning_rate": 0.0006551540616246499, "loss": 0.4275, "step": 12420 }, { "epoch": 6.9391061452513965, "grad_norm": 0.5949428081512451, "learning_rate": 0.0006551260504201681, "loss": 0.4264, "step": 12421 }, { "epoch": 6.939664804469274, "grad_norm": 0.500310480594635, "learning_rate": 0.0006550980392156864, "loss": 0.483, "step": 12422 }, { "epoch": 6.9402234636871505, "grad_norm": 0.35211479663848877, "learning_rate": 0.0006550700280112045, "loss": 0.3301, "step": 12423 }, { "epoch": 6.940782122905028, "grad_norm": 1.598307490348816, "learning_rate": 0.0006550420168067227, "loss": 0.4115, "step": 12424 }, { "epoch": 6.941340782122905, "grad_norm": 0.5416072010993958, "learning_rate": 0.0006550140056022409, "loss": 0.445, "step": 12425 }, { "epoch": 6.941899441340782, "grad_norm": 0.8757627606391907, "learning_rate": 0.0006549859943977591, "loss": 0.5127, "step": 12426 }, { "epoch": 6.942458100558659, "grad_norm": 0.41450515389442444, "learning_rate": 0.0006549579831932774, "loss": 0.3773, "step": 12427 }, { "epoch": 6.943016759776536, "grad_norm": 0.40004053711891174, "learning_rate": 0.0006549299719887956, "loss": 0.4692, "step": 12428 }, { "epoch": 6.943575418994413, "grad_norm": 0.5712906122207642, "learning_rate": 0.0006549019607843137, "loss": 0.447, "step": 12429 }, { "epoch": 6.94413407821229, "grad_norm": 0.6028456091880798, "learning_rate": 0.0006548739495798319, "loss": 0.5044, "step": 12430 }, { "epoch": 6.9446927374301675, "grad_norm": 0.5334956049919128, "learning_rate": 0.0006548459383753501, "loss": 0.469, "step": 12431 }, { "epoch": 6.945251396648045, "grad_norm": 0.49053484201431274, "learning_rate": 0.0006548179271708684, "loss": 0.3471, "step": 12432 }, { "epoch": 6.9458100558659215, "grad_norm": 0.5994573831558228, "learning_rate": 0.0006547899159663866, "loss": 0.4562, "step": 12433 }, { "epoch": 6.946368715083799, "grad_norm": 0.601992666721344, "learning_rate": 0.0006547619047619047, "loss": 0.4583, "step": 12434 }, { "epoch": 6.946927374301676, "grad_norm": 0.48436716198921204, "learning_rate": 0.0006547338935574229, "loss": 0.4055, "step": 12435 }, { "epoch": 6.947486033519553, "grad_norm": 0.40805190801620483, "learning_rate": 0.0006547058823529411, "loss": 0.3068, "step": 12436 }, { "epoch": 6.94804469273743, "grad_norm": 0.4589974880218506, "learning_rate": 0.0006546778711484595, "loss": 0.3387, "step": 12437 }, { "epoch": 6.948603351955307, "grad_norm": 0.5143588185310364, "learning_rate": 0.0006546498599439777, "loss": 0.5418, "step": 12438 }, { "epoch": 6.949162011173184, "grad_norm": 0.4102528393268585, "learning_rate": 0.0006546218487394958, "loss": 0.3706, "step": 12439 }, { "epoch": 6.949720670391061, "grad_norm": 0.6891082525253296, "learning_rate": 0.000654593837535014, "loss": 0.4248, "step": 12440 }, { "epoch": 6.9502793296089385, "grad_norm": 0.4390251934528351, "learning_rate": 0.0006545658263305322, "loss": 0.4379, "step": 12441 }, { "epoch": 6.950837988826816, "grad_norm": 0.844826877117157, "learning_rate": 0.0006545378151260505, "loss": 0.392, "step": 12442 }, { "epoch": 6.9513966480446925, "grad_norm": 0.5709537267684937, "learning_rate": 0.0006545098039215687, "loss": 0.5046, "step": 12443 }, { "epoch": 6.95195530726257, "grad_norm": 0.46316081285476685, "learning_rate": 0.0006544817927170869, "loss": 0.3944, "step": 12444 }, { "epoch": 6.952513966480447, "grad_norm": 0.6412991285324097, "learning_rate": 0.000654453781512605, "loss": 0.3976, "step": 12445 }, { "epoch": 6.953072625698324, "grad_norm": 1.5535904169082642, "learning_rate": 0.0006544257703081232, "loss": 0.4837, "step": 12446 }, { "epoch": 6.953631284916201, "grad_norm": 0.4667535722255707, "learning_rate": 0.0006543977591036415, "loss": 0.3707, "step": 12447 }, { "epoch": 6.954189944134078, "grad_norm": 0.7522304654121399, "learning_rate": 0.0006543697478991597, "loss": 0.4734, "step": 12448 }, { "epoch": 6.954748603351955, "grad_norm": 0.4077203869819641, "learning_rate": 0.0006543417366946779, "loss": 0.4009, "step": 12449 }, { "epoch": 6.955307262569832, "grad_norm": 0.6013436317443848, "learning_rate": 0.000654313725490196, "loss": 0.4216, "step": 12450 }, { "epoch": 6.9558659217877095, "grad_norm": 0.5561041831970215, "learning_rate": 0.0006542857142857142, "loss": 0.5843, "step": 12451 }, { "epoch": 6.956424581005587, "grad_norm": 1.5629853010177612, "learning_rate": 0.0006542577030812326, "loss": 0.5155, "step": 12452 }, { "epoch": 6.9569832402234635, "grad_norm": 0.6675905585289001, "learning_rate": 0.0006542296918767508, "loss": 0.4216, "step": 12453 }, { "epoch": 6.957541899441341, "grad_norm": 0.40994587540626526, "learning_rate": 0.000654201680672269, "loss": 0.4117, "step": 12454 }, { "epoch": 6.9581005586592175, "grad_norm": 0.5865683555603027, "learning_rate": 0.0006541736694677871, "loss": 0.496, "step": 12455 }, { "epoch": 6.958659217877095, "grad_norm": 0.5727628469467163, "learning_rate": 0.0006541456582633053, "loss": 0.4381, "step": 12456 }, { "epoch": 6.959217877094972, "grad_norm": 0.39902451634407043, "learning_rate": 0.0006541176470588236, "loss": 0.4047, "step": 12457 }, { "epoch": 6.959776536312849, "grad_norm": 7.920273303985596, "learning_rate": 0.0006540896358543418, "loss": 0.4464, "step": 12458 }, { "epoch": 6.960335195530726, "grad_norm": 0.4360896348953247, "learning_rate": 0.00065406162464986, "loss": 0.4946, "step": 12459 }, { "epoch": 6.960893854748603, "grad_norm": 0.4141186475753784, "learning_rate": 0.0006540336134453782, "loss": 0.3844, "step": 12460 }, { "epoch": 6.9614525139664805, "grad_norm": 0.6742033362388611, "learning_rate": 0.0006540056022408963, "loss": 0.441, "step": 12461 }, { "epoch": 6.962011173184358, "grad_norm": 2.065803050994873, "learning_rate": 0.0006539775910364146, "loss": 0.4161, "step": 12462 }, { "epoch": 6.9625698324022345, "grad_norm": 0.5230733156204224, "learning_rate": 0.0006539495798319328, "loss": 0.4631, "step": 12463 }, { "epoch": 6.963128491620112, "grad_norm": 0.4006344676017761, "learning_rate": 0.000653921568627451, "loss": 0.486, "step": 12464 }, { "epoch": 6.9636871508379885, "grad_norm": 0.5748187303543091, "learning_rate": 0.0006538935574229692, "loss": 0.432, "step": 12465 }, { "epoch": 6.964245810055866, "grad_norm": 0.4753177762031555, "learning_rate": 0.0006538655462184873, "loss": 0.4776, "step": 12466 }, { "epoch": 6.9648044692737425, "grad_norm": 0.8131725788116455, "learning_rate": 0.0006538375350140056, "loss": 0.4465, "step": 12467 }, { "epoch": 6.96536312849162, "grad_norm": 1.4685733318328857, "learning_rate": 0.0006538095238095238, "loss": 0.5757, "step": 12468 }, { "epoch": 6.965921787709497, "grad_norm": 0.4905511736869812, "learning_rate": 0.000653781512605042, "loss": 0.5628, "step": 12469 }, { "epoch": 6.966480446927374, "grad_norm": 0.7473582625389099, "learning_rate": 0.0006537535014005603, "loss": 0.4403, "step": 12470 }, { "epoch": 6.9670391061452515, "grad_norm": 1.3870606422424316, "learning_rate": 0.0006537254901960784, "loss": 0.4142, "step": 12471 }, { "epoch": 6.967597765363129, "grad_norm": 0.5380938053131104, "learning_rate": 0.0006536974789915967, "loss": 0.3878, "step": 12472 }, { "epoch": 6.9681564245810055, "grad_norm": 0.34117642045021057, "learning_rate": 0.0006536694677871149, "loss": 0.5236, "step": 12473 }, { "epoch": 6.968715083798883, "grad_norm": 0.42269209027290344, "learning_rate": 0.0006536414565826331, "loss": 0.4183, "step": 12474 }, { "epoch": 6.9692737430167595, "grad_norm": 0.3919951319694519, "learning_rate": 0.0006536134453781513, "loss": 0.4123, "step": 12475 }, { "epoch": 6.969832402234637, "grad_norm": 0.784248411655426, "learning_rate": 0.0006535854341736695, "loss": 0.4593, "step": 12476 }, { "epoch": 6.9703910614525135, "grad_norm": 0.4799293577671051, "learning_rate": 0.0006535574229691877, "loss": 0.4164, "step": 12477 }, { "epoch": 6.970949720670391, "grad_norm": 0.5970208644866943, "learning_rate": 0.0006535294117647059, "loss": 0.518, "step": 12478 }, { "epoch": 6.971508379888268, "grad_norm": 0.3954332768917084, "learning_rate": 0.0006535014005602241, "loss": 0.4724, "step": 12479 }, { "epoch": 6.972067039106145, "grad_norm": 0.4743720293045044, "learning_rate": 0.0006534733893557423, "loss": 0.4906, "step": 12480 }, { "epoch": 6.9726256983240225, "grad_norm": 1.1048377752304077, "learning_rate": 0.0006534453781512605, "loss": 0.6001, "step": 12481 }, { "epoch": 6.9731843575419, "grad_norm": 0.4624098837375641, "learning_rate": 0.0006534173669467787, "loss": 0.3546, "step": 12482 }, { "epoch": 6.9737430167597765, "grad_norm": 0.6275320649147034, "learning_rate": 0.0006533893557422969, "loss": 0.5894, "step": 12483 }, { "epoch": 6.974301675977654, "grad_norm": 0.5248343348503113, "learning_rate": 0.0006533613445378151, "loss": 0.3975, "step": 12484 }, { "epoch": 6.9748603351955305, "grad_norm": 0.36769047379493713, "learning_rate": 0.0006533333333333333, "loss": 0.3626, "step": 12485 }, { "epoch": 6.975418994413408, "grad_norm": 0.9091798067092896, "learning_rate": 0.0006533053221288516, "loss": 0.4925, "step": 12486 }, { "epoch": 6.9759776536312845, "grad_norm": 0.5735195875167847, "learning_rate": 0.0006532773109243699, "loss": 0.4249, "step": 12487 }, { "epoch": 6.976536312849162, "grad_norm": 0.7241012454032898, "learning_rate": 0.000653249299719888, "loss": 0.4004, "step": 12488 }, { "epoch": 6.977094972067039, "grad_norm": 0.4537748396396637, "learning_rate": 0.0006532212885154062, "loss": 0.3994, "step": 12489 }, { "epoch": 6.977653631284916, "grad_norm": 0.4960888624191284, "learning_rate": 0.0006531932773109244, "loss": 0.3374, "step": 12490 }, { "epoch": 6.9782122905027935, "grad_norm": 0.4250923991203308, "learning_rate": 0.0006531652661064426, "loss": 0.3804, "step": 12491 }, { "epoch": 6.97877094972067, "grad_norm": 2.0398457050323486, "learning_rate": 0.0006531372549019609, "loss": 0.4517, "step": 12492 }, { "epoch": 6.9793296089385475, "grad_norm": 0.6344838738441467, "learning_rate": 0.000653109243697479, "loss": 0.3714, "step": 12493 }, { "epoch": 6.979888268156425, "grad_norm": 0.44564327597618103, "learning_rate": 0.0006530812324929972, "loss": 0.4715, "step": 12494 }, { "epoch": 6.9804469273743015, "grad_norm": 0.4302724599838257, "learning_rate": 0.0006530532212885154, "loss": 0.4688, "step": 12495 }, { "epoch": 6.981005586592179, "grad_norm": 0.6383557319641113, "learning_rate": 0.0006530252100840336, "loss": 0.512, "step": 12496 }, { "epoch": 6.9815642458100555, "grad_norm": 0.5707694888114929, "learning_rate": 0.0006529971988795518, "loss": 0.5227, "step": 12497 }, { "epoch": 6.982122905027933, "grad_norm": 0.6816879510879517, "learning_rate": 0.00065296918767507, "loss": 0.5732, "step": 12498 }, { "epoch": 6.98268156424581, "grad_norm": 2.52976131439209, "learning_rate": 0.0006529411764705882, "loss": 0.5392, "step": 12499 }, { "epoch": 6.983240223463687, "grad_norm": 0.4378868639469147, "learning_rate": 0.0006529131652661064, "loss": 0.3632, "step": 12500 }, { "epoch": 6.983240223463687, "eval_cer": 0.09071716146770974, "eval_loss": 0.3444172739982605, "eval_runtime": 61.6052, "eval_samples_per_second": 73.663, "eval_steps_per_second": 4.61, "eval_wer": 0.3576995056004022, "step": 12500 }, { "epoch": 6.9837988826815645, "grad_norm": 1.5691546201705933, "learning_rate": 0.0006528851540616246, "loss": 0.4101, "step": 12501 }, { "epoch": 6.984357541899441, "grad_norm": 0.7826452255249023, "learning_rate": 0.0006528571428571429, "loss": 0.5157, "step": 12502 }, { "epoch": 6.9849162011173185, "grad_norm": 1.035007357597351, "learning_rate": 0.0006528291316526612, "loss": 0.5782, "step": 12503 }, { "epoch": 6.985474860335195, "grad_norm": 1.1592096090316772, "learning_rate": 0.0006528011204481793, "loss": 0.5618, "step": 12504 }, { "epoch": 6.9860335195530725, "grad_norm": 0.7418396472930908, "learning_rate": 0.0006527731092436975, "loss": 0.4641, "step": 12505 }, { "epoch": 6.98659217877095, "grad_norm": 1.3196353912353516, "learning_rate": 0.0006527450980392157, "loss": 0.419, "step": 12506 }, { "epoch": 6.9871508379888265, "grad_norm": 0.6099117398262024, "learning_rate": 0.0006527170868347339, "loss": 0.3834, "step": 12507 }, { "epoch": 6.987709497206704, "grad_norm": 0.7795719504356384, "learning_rate": 0.0006526890756302522, "loss": 0.4327, "step": 12508 }, { "epoch": 6.988268156424581, "grad_norm": 0.45224884152412415, "learning_rate": 0.0006526610644257703, "loss": 0.353, "step": 12509 }, { "epoch": 6.988826815642458, "grad_norm": 0.8954925537109375, "learning_rate": 0.0006526330532212885, "loss": 0.5832, "step": 12510 }, { "epoch": 6.9893854748603355, "grad_norm": 3.428053855895996, "learning_rate": 0.0006526050420168067, "loss": 0.5943, "step": 12511 }, { "epoch": 6.989944134078212, "grad_norm": 0.5529339909553528, "learning_rate": 0.0006525770308123249, "loss": 0.3559, "step": 12512 }, { "epoch": 6.9905027932960895, "grad_norm": 0.8013816475868225, "learning_rate": 0.0006525490196078432, "loss": 0.383, "step": 12513 }, { "epoch": 6.991061452513966, "grad_norm": 0.5604310631752014, "learning_rate": 0.0006525210084033613, "loss": 0.5928, "step": 12514 }, { "epoch": 6.9916201117318435, "grad_norm": 0.779806911945343, "learning_rate": 0.0006524929971988795, "loss": 0.4769, "step": 12515 }, { "epoch": 6.992178770949721, "grad_norm": 0.5772127509117126, "learning_rate": 0.0006524649859943977, "loss": 0.4831, "step": 12516 }, { "epoch": 6.9927374301675975, "grad_norm": 0.47065261006355286, "learning_rate": 0.0006524369747899159, "loss": 0.4899, "step": 12517 }, { "epoch": 6.993296089385475, "grad_norm": 1.526015281677246, "learning_rate": 0.0006524089635854343, "loss": 0.402, "step": 12518 }, { "epoch": 6.993854748603352, "grad_norm": 0.3837558329105377, "learning_rate": 0.0006523809523809525, "loss": 0.4607, "step": 12519 }, { "epoch": 6.994413407821229, "grad_norm": 0.4979607164859772, "learning_rate": 0.0006523529411764706, "loss": 0.4267, "step": 12520 }, { "epoch": 6.9949720670391065, "grad_norm": 0.4629913866519928, "learning_rate": 0.0006523249299719888, "loss": 0.4747, "step": 12521 }, { "epoch": 6.995530726256983, "grad_norm": 0.9315935969352722, "learning_rate": 0.000652296918767507, "loss": 0.4815, "step": 12522 }, { "epoch": 6.9960893854748605, "grad_norm": 0.7310153841972351, "learning_rate": 0.0006522689075630253, "loss": 0.448, "step": 12523 }, { "epoch": 6.996648044692737, "grad_norm": 0.3153943121433258, "learning_rate": 0.0006522408963585435, "loss": 0.3609, "step": 12524 }, { "epoch": 6.9972067039106145, "grad_norm": 0.5415436625480652, "learning_rate": 0.0006522128851540616, "loss": 0.4289, "step": 12525 }, { "epoch": 6.997765363128492, "grad_norm": 0.40753111243247986, "learning_rate": 0.0006521848739495798, "loss": 0.4147, "step": 12526 }, { "epoch": 6.9983240223463685, "grad_norm": 0.5162279605865479, "learning_rate": 0.000652156862745098, "loss": 0.5075, "step": 12527 }, { "epoch": 6.998882681564246, "grad_norm": 0.4145958423614502, "learning_rate": 0.0006521288515406163, "loss": 0.4486, "step": 12528 }, { "epoch": 6.9994413407821225, "grad_norm": 0.6179265975952148, "learning_rate": 0.0006521008403361345, "loss": 0.3807, "step": 12529 }, { "epoch": 7.0, "grad_norm": 0.4103699028491974, "learning_rate": 0.0006520728291316526, "loss": 0.416, "step": 12530 }, { "epoch": 7.0005586592178775, "grad_norm": 0.659807562828064, "learning_rate": 0.0006520448179271708, "loss": 0.4172, "step": 12531 }, { "epoch": 7.001117318435754, "grad_norm": 0.7853411436080933, "learning_rate": 0.000652016806722689, "loss": 0.5219, "step": 12532 }, { "epoch": 7.0016759776536315, "grad_norm": 0.4956013262271881, "learning_rate": 0.0006519887955182073, "loss": 0.4955, "step": 12533 }, { "epoch": 7.002234636871508, "grad_norm": 0.4066002070903778, "learning_rate": 0.0006519607843137256, "loss": 0.4167, "step": 12534 }, { "epoch": 7.0027932960893855, "grad_norm": 0.5741144418716431, "learning_rate": 0.0006519327731092438, "loss": 0.5755, "step": 12535 }, { "epoch": 7.003351955307263, "grad_norm": 0.6808263659477234, "learning_rate": 0.0006519047619047619, "loss": 0.5082, "step": 12536 }, { "epoch": 7.0039106145251395, "grad_norm": 0.3875439465045929, "learning_rate": 0.0006518767507002801, "loss": 0.4286, "step": 12537 }, { "epoch": 7.004469273743017, "grad_norm": 1.193933367729187, "learning_rate": 0.0006518487394957984, "loss": 0.4419, "step": 12538 }, { "epoch": 7.0050279329608935, "grad_norm": 0.398002028465271, "learning_rate": 0.0006518207282913166, "loss": 0.4691, "step": 12539 }, { "epoch": 7.005586592178771, "grad_norm": 0.839568018913269, "learning_rate": 0.0006517927170868348, "loss": 0.4841, "step": 12540 }, { "epoch": 7.0061452513966485, "grad_norm": 0.5931982398033142, "learning_rate": 0.0006517647058823529, "loss": 0.5055, "step": 12541 }, { "epoch": 7.006703910614525, "grad_norm": 0.4617202877998352, "learning_rate": 0.0006517366946778711, "loss": 0.4025, "step": 12542 }, { "epoch": 7.0072625698324025, "grad_norm": 1.279715657234192, "learning_rate": 0.0006517086834733894, "loss": 0.4088, "step": 12543 }, { "epoch": 7.007821229050279, "grad_norm": 1.403926968574524, "learning_rate": 0.0006516806722689076, "loss": 0.3234, "step": 12544 }, { "epoch": 7.0083798882681565, "grad_norm": 0.48188167810440063, "learning_rate": 0.0006516526610644258, "loss": 0.445, "step": 12545 }, { "epoch": 7.008938547486034, "grad_norm": 0.49491217732429504, "learning_rate": 0.0006516246498599439, "loss": 0.3788, "step": 12546 }, { "epoch": 7.0094972067039105, "grad_norm": 0.6949318647384644, "learning_rate": 0.0006515966386554621, "loss": 0.4554, "step": 12547 }, { "epoch": 7.010055865921788, "grad_norm": 0.4032461643218994, "learning_rate": 0.0006515686274509804, "loss": 0.3735, "step": 12548 }, { "epoch": 7.0106145251396645, "grad_norm": 0.6850608587265015, "learning_rate": 0.0006515406162464986, "loss": 0.683, "step": 12549 }, { "epoch": 7.011173184357542, "grad_norm": 0.4039214849472046, "learning_rate": 0.0006515126050420168, "loss": 0.3644, "step": 12550 }, { "epoch": 7.011731843575419, "grad_norm": 0.4704190194606781, "learning_rate": 0.000651484593837535, "loss": 0.4775, "step": 12551 }, { "epoch": 7.012290502793296, "grad_norm": 0.5351200699806213, "learning_rate": 0.0006514565826330532, "loss": 0.4556, "step": 12552 }, { "epoch": 7.0128491620111735, "grad_norm": 0.7129198312759399, "learning_rate": 0.0006514285714285715, "loss": 0.5092, "step": 12553 }, { "epoch": 7.01340782122905, "grad_norm": 0.8035323619842529, "learning_rate": 0.0006514005602240897, "loss": 0.8438, "step": 12554 }, { "epoch": 7.0139664804469275, "grad_norm": 0.6217595338821411, "learning_rate": 0.0006513725490196079, "loss": 0.4633, "step": 12555 }, { "epoch": 7.014525139664804, "grad_norm": 0.9007251262664795, "learning_rate": 0.0006513445378151261, "loss": 0.4926, "step": 12556 }, { "epoch": 7.0150837988826815, "grad_norm": 0.3942495584487915, "learning_rate": 0.0006513165266106442, "loss": 0.3861, "step": 12557 }, { "epoch": 7.015642458100559, "grad_norm": 0.4143909811973572, "learning_rate": 0.0006512885154061625, "loss": 0.342, "step": 12558 }, { "epoch": 7.0162011173184355, "grad_norm": 0.5664569735527039, "learning_rate": 0.0006512605042016807, "loss": 0.5453, "step": 12559 }, { "epoch": 7.016759776536313, "grad_norm": 0.5399006009101868, "learning_rate": 0.0006512324929971989, "loss": 0.4397, "step": 12560 }, { "epoch": 7.01731843575419, "grad_norm": 0.9018974304199219, "learning_rate": 0.0006512044817927171, "loss": 0.4963, "step": 12561 }, { "epoch": 7.017877094972067, "grad_norm": 10.350446701049805, "learning_rate": 0.0006511764705882352, "loss": 0.452, "step": 12562 }, { "epoch": 7.0184357541899445, "grad_norm": 0.7601476907730103, "learning_rate": 0.0006511484593837535, "loss": 0.4971, "step": 12563 }, { "epoch": 7.018994413407821, "grad_norm": 1.6554700136184692, "learning_rate": 0.0006511204481792717, "loss": 0.3939, "step": 12564 }, { "epoch": 7.0195530726256985, "grad_norm": 0.6143400073051453, "learning_rate": 0.0006510924369747899, "loss": 0.6002, "step": 12565 }, { "epoch": 7.020111731843575, "grad_norm": 0.6016078591346741, "learning_rate": 0.0006510644257703081, "loss": 0.4271, "step": 12566 }, { "epoch": 7.0206703910614525, "grad_norm": 0.6250494122505188, "learning_rate": 0.0006510364145658263, "loss": 0.4922, "step": 12567 }, { "epoch": 7.02122905027933, "grad_norm": 0.5893234014511108, "learning_rate": 0.0006510084033613446, "loss": 0.4632, "step": 12568 }, { "epoch": 7.0217877094972065, "grad_norm": 0.4258977472782135, "learning_rate": 0.0006509803921568628, "loss": 0.3828, "step": 12569 }, { "epoch": 7.022346368715084, "grad_norm": 0.43992477655410767, "learning_rate": 0.000650952380952381, "loss": 0.5041, "step": 12570 }, { "epoch": 7.022905027932961, "grad_norm": 0.6892359852790833, "learning_rate": 0.0006509243697478992, "loss": 0.5246, "step": 12571 }, { "epoch": 7.023463687150838, "grad_norm": 0.43133360147476196, "learning_rate": 0.0006508963585434174, "loss": 0.4757, "step": 12572 }, { "epoch": 7.0240223463687155, "grad_norm": 0.7305607795715332, "learning_rate": 0.0006508683473389356, "loss": 0.379, "step": 12573 }, { "epoch": 7.024581005586592, "grad_norm": 0.634161114692688, "learning_rate": 0.0006508403361344538, "loss": 0.4357, "step": 12574 }, { "epoch": 7.0251396648044695, "grad_norm": 0.9562336206436157, "learning_rate": 0.000650812324929972, "loss": 0.45, "step": 12575 }, { "epoch": 7.025698324022346, "grad_norm": 0.5201601386070251, "learning_rate": 0.0006507843137254902, "loss": 0.5613, "step": 12576 }, { "epoch": 7.0262569832402235, "grad_norm": 1.1862400770187378, "learning_rate": 0.0006507563025210084, "loss": 0.426, "step": 12577 }, { "epoch": 7.026815642458101, "grad_norm": 0.4462496340274811, "learning_rate": 0.0006507282913165266, "loss": 0.3742, "step": 12578 }, { "epoch": 7.0273743016759775, "grad_norm": 0.49201565980911255, "learning_rate": 0.0006507002801120448, "loss": 0.5107, "step": 12579 }, { "epoch": 7.027932960893855, "grad_norm": 0.5636613965034485, "learning_rate": 0.000650672268907563, "loss": 0.4795, "step": 12580 }, { "epoch": 7.028491620111732, "grad_norm": 2.4695205688476562, "learning_rate": 0.0006506442577030812, "loss": 0.3979, "step": 12581 }, { "epoch": 7.029050279329609, "grad_norm": 1.5455950498580933, "learning_rate": 0.0006506162464985994, "loss": 0.3458, "step": 12582 }, { "epoch": 7.0296089385474865, "grad_norm": 0.49119916558265686, "learning_rate": 0.0006505882352941178, "loss": 0.4275, "step": 12583 }, { "epoch": 7.030167597765363, "grad_norm": 0.44068941473960876, "learning_rate": 0.0006505602240896359, "loss": 0.3821, "step": 12584 }, { "epoch": 7.0307262569832405, "grad_norm": 0.7384152412414551, "learning_rate": 0.0006505322128851541, "loss": 0.3813, "step": 12585 }, { "epoch": 7.031284916201117, "grad_norm": 0.6572919487953186, "learning_rate": 0.0006505042016806723, "loss": 0.3948, "step": 12586 }, { "epoch": 7.0318435754189945, "grad_norm": 0.6202011108398438, "learning_rate": 0.0006504761904761905, "loss": 0.3574, "step": 12587 }, { "epoch": 7.032402234636871, "grad_norm": 0.9921704530715942, "learning_rate": 0.0006504481792717088, "loss": 0.4431, "step": 12588 }, { "epoch": 7.0329608938547485, "grad_norm": 1.0408921241760254, "learning_rate": 0.0006504201680672269, "loss": 0.4842, "step": 12589 }, { "epoch": 7.033519553072626, "grad_norm": 0.41934728622436523, "learning_rate": 0.0006503921568627451, "loss": 0.3641, "step": 12590 }, { "epoch": 7.034078212290503, "grad_norm": 0.7358263731002808, "learning_rate": 0.0006503641456582633, "loss": 0.3463, "step": 12591 }, { "epoch": 7.03463687150838, "grad_norm": 0.8981183767318726, "learning_rate": 0.0006503361344537815, "loss": 0.5734, "step": 12592 }, { "epoch": 7.035195530726257, "grad_norm": 0.46990492939949036, "learning_rate": 0.0006503081232492998, "loss": 0.4154, "step": 12593 }, { "epoch": 7.035754189944134, "grad_norm": 0.4415307939052582, "learning_rate": 0.0006502801120448179, "loss": 0.3851, "step": 12594 }, { "epoch": 7.0363128491620115, "grad_norm": 3.0129590034484863, "learning_rate": 0.0006502521008403361, "loss": 0.3889, "step": 12595 }, { "epoch": 7.036871508379888, "grad_norm": 0.6053531169891357, "learning_rate": 0.0006502240896358543, "loss": 0.777, "step": 12596 }, { "epoch": 7.0374301675977655, "grad_norm": 2.343404769897461, "learning_rate": 0.0006501960784313725, "loss": 0.489, "step": 12597 }, { "epoch": 7.037988826815642, "grad_norm": 0.7072979807853699, "learning_rate": 0.0006501680672268908, "loss": 0.3918, "step": 12598 }, { "epoch": 7.0385474860335195, "grad_norm": 0.5817676186561584, "learning_rate": 0.000650140056022409, "loss": 0.4072, "step": 12599 }, { "epoch": 7.039106145251397, "grad_norm": 0.546341598033905, "learning_rate": 0.0006501120448179271, "loss": 0.3653, "step": 12600 }, { "epoch": 7.039664804469274, "grad_norm": 0.5390695929527283, "learning_rate": 0.0006500840336134454, "loss": 0.4747, "step": 12601 }, { "epoch": 7.040223463687151, "grad_norm": 0.8624338507652283, "learning_rate": 0.0006500560224089636, "loss": 0.6596, "step": 12602 }, { "epoch": 7.040782122905028, "grad_norm": 0.6916890144348145, "learning_rate": 0.0006500280112044819, "loss": 0.3261, "step": 12603 }, { "epoch": 7.041340782122905, "grad_norm": 1.2541073560714722, "learning_rate": 0.0006500000000000001, "loss": 0.4168, "step": 12604 }, { "epoch": 7.0418994413407825, "grad_norm": 0.47612178325653076, "learning_rate": 0.0006499719887955182, "loss": 0.4731, "step": 12605 }, { "epoch": 7.042458100558659, "grad_norm": 0.5722107887268066, "learning_rate": 0.0006499439775910364, "loss": 0.3883, "step": 12606 }, { "epoch": 7.0430167597765365, "grad_norm": 0.8015046119689941, "learning_rate": 0.0006499159663865546, "loss": 0.4488, "step": 12607 }, { "epoch": 7.043575418994413, "grad_norm": 1.1525018215179443, "learning_rate": 0.0006498879551820729, "loss": 0.3645, "step": 12608 }, { "epoch": 7.0441340782122905, "grad_norm": 1.1446833610534668, "learning_rate": 0.0006498599439775911, "loss": 0.4705, "step": 12609 }, { "epoch": 7.044692737430168, "grad_norm": 0.4137093722820282, "learning_rate": 0.0006498319327731092, "loss": 0.3488, "step": 12610 }, { "epoch": 7.045251396648045, "grad_norm": 0.4494577944278717, "learning_rate": 0.0006498039215686274, "loss": 0.4752, "step": 12611 }, { "epoch": 7.045810055865922, "grad_norm": 0.5632845163345337, "learning_rate": 0.0006497759103641456, "loss": 0.469, "step": 12612 }, { "epoch": 7.046368715083799, "grad_norm": 0.47470736503601074, "learning_rate": 0.0006497478991596639, "loss": 0.3659, "step": 12613 }, { "epoch": 7.046927374301676, "grad_norm": 0.7343278527259827, "learning_rate": 0.0006497198879551821, "loss": 0.4551, "step": 12614 }, { "epoch": 7.0474860335195535, "grad_norm": 0.5307226777076721, "learning_rate": 0.0006496918767507003, "loss": 0.3913, "step": 12615 }, { "epoch": 7.04804469273743, "grad_norm": 0.5707471370697021, "learning_rate": 0.0006496638655462184, "loss": 0.489, "step": 12616 }, { "epoch": 7.0486033519553075, "grad_norm": 0.4415913224220276, "learning_rate": 0.0006496358543417366, "loss": 0.4385, "step": 12617 }, { "epoch": 7.049162011173184, "grad_norm": 0.6077116131782532, "learning_rate": 0.000649607843137255, "loss": 0.3921, "step": 12618 }, { "epoch": 7.0497206703910615, "grad_norm": 1.2557839155197144, "learning_rate": 0.0006495798319327732, "loss": 0.6063, "step": 12619 }, { "epoch": 7.050279329608939, "grad_norm": 0.4803507328033447, "learning_rate": 0.0006495518207282914, "loss": 0.368, "step": 12620 }, { "epoch": 7.050837988826816, "grad_norm": 0.42184460163116455, "learning_rate": 0.0006495238095238095, "loss": 0.3273, "step": 12621 }, { "epoch": 7.051396648044693, "grad_norm": 0.636117160320282, "learning_rate": 0.0006494957983193277, "loss": 0.4355, "step": 12622 }, { "epoch": 7.05195530726257, "grad_norm": 0.5575524568557739, "learning_rate": 0.000649467787114846, "loss": 0.4663, "step": 12623 }, { "epoch": 7.052513966480447, "grad_norm": 0.49263331294059753, "learning_rate": 0.0006494397759103642, "loss": 0.5282, "step": 12624 }, { "epoch": 7.053072625698324, "grad_norm": 0.4541114866733551, "learning_rate": 0.0006494117647058824, "loss": 0.4486, "step": 12625 }, { "epoch": 7.053631284916201, "grad_norm": 1.2242417335510254, "learning_rate": 0.0006493837535014005, "loss": 0.5726, "step": 12626 }, { "epoch": 7.0541899441340785, "grad_norm": 1.0743666887283325, "learning_rate": 0.0006493557422969187, "loss": 0.5283, "step": 12627 }, { "epoch": 7.054748603351955, "grad_norm": 0.979205310344696, "learning_rate": 0.000649327731092437, "loss": 0.3351, "step": 12628 }, { "epoch": 7.0553072625698325, "grad_norm": 0.5161188840866089, "learning_rate": 0.0006492997198879552, "loss": 0.4595, "step": 12629 }, { "epoch": 7.055865921787709, "grad_norm": 0.6742265820503235, "learning_rate": 0.0006492717086834734, "loss": 0.437, "step": 12630 }, { "epoch": 7.056424581005587, "grad_norm": 0.5923387408256531, "learning_rate": 0.0006492436974789916, "loss": 0.3738, "step": 12631 }, { "epoch": 7.056983240223464, "grad_norm": 0.552234411239624, "learning_rate": 0.0006492156862745097, "loss": 0.4338, "step": 12632 }, { "epoch": 7.057541899441341, "grad_norm": 1.2429804801940918, "learning_rate": 0.000649187675070028, "loss": 0.4514, "step": 12633 }, { "epoch": 7.058100558659218, "grad_norm": 0.45925721526145935, "learning_rate": 0.0006491596638655463, "loss": 0.3422, "step": 12634 }, { "epoch": 7.058659217877095, "grad_norm": 0.5782500505447388, "learning_rate": 0.0006491316526610645, "loss": 0.4749, "step": 12635 }, { "epoch": 7.059217877094972, "grad_norm": 0.42826059460639954, "learning_rate": 0.0006491036414565827, "loss": 0.466, "step": 12636 }, { "epoch": 7.0597765363128495, "grad_norm": 0.5627808570861816, "learning_rate": 0.0006490756302521008, "loss": 0.4615, "step": 12637 }, { "epoch": 7.060335195530726, "grad_norm": 0.45309755206108093, "learning_rate": 0.0006490476190476191, "loss": 0.3708, "step": 12638 }, { "epoch": 7.0608938547486035, "grad_norm": 0.46642163395881653, "learning_rate": 0.0006490196078431373, "loss": 0.5278, "step": 12639 }, { "epoch": 7.06145251396648, "grad_norm": 0.4951694905757904, "learning_rate": 0.0006489915966386555, "loss": 0.5147, "step": 12640 }, { "epoch": 7.062011173184358, "grad_norm": 0.4209784269332886, "learning_rate": 0.0006489635854341737, "loss": 0.5168, "step": 12641 }, { "epoch": 7.062569832402235, "grad_norm": 0.7425562143325806, "learning_rate": 0.0006489355742296918, "loss": 0.5149, "step": 12642 }, { "epoch": 7.063128491620112, "grad_norm": 0.6006401777267456, "learning_rate": 0.0006489075630252101, "loss": 0.5103, "step": 12643 }, { "epoch": 7.063687150837989, "grad_norm": 0.43235620856285095, "learning_rate": 0.0006488795518207283, "loss": 0.317, "step": 12644 }, { "epoch": 7.064245810055866, "grad_norm": 0.597262978553772, "learning_rate": 0.0006488515406162465, "loss": 0.5351, "step": 12645 }, { "epoch": 7.064804469273743, "grad_norm": 0.36786171793937683, "learning_rate": 0.0006488235294117647, "loss": 0.3967, "step": 12646 }, { "epoch": 7.0653631284916205, "grad_norm": 0.47619083523750305, "learning_rate": 0.0006487955182072829, "loss": 0.5102, "step": 12647 }, { "epoch": 7.065921787709497, "grad_norm": 1.6786117553710938, "learning_rate": 0.0006487675070028011, "loss": 0.4214, "step": 12648 }, { "epoch": 7.0664804469273745, "grad_norm": 0.4987828731536865, "learning_rate": 0.0006487394957983193, "loss": 0.3541, "step": 12649 }, { "epoch": 7.067039106145251, "grad_norm": 0.4355384409427643, "learning_rate": 0.0006487114845938376, "loss": 0.4752, "step": 12650 }, { "epoch": 7.067597765363129, "grad_norm": 0.4388313889503479, "learning_rate": 0.0006486834733893558, "loss": 0.4073, "step": 12651 }, { "epoch": 7.068156424581006, "grad_norm": 0.450723260641098, "learning_rate": 0.000648655462184874, "loss": 0.4633, "step": 12652 }, { "epoch": 7.068715083798883, "grad_norm": 0.5171381235122681, "learning_rate": 0.0006486274509803922, "loss": 0.4776, "step": 12653 }, { "epoch": 7.06927374301676, "grad_norm": 0.4311113953590393, "learning_rate": 0.0006485994397759104, "loss": 0.4724, "step": 12654 }, { "epoch": 7.069832402234637, "grad_norm": 0.4372659921646118, "learning_rate": 0.0006485714285714286, "loss": 0.4037, "step": 12655 }, { "epoch": 7.070391061452514, "grad_norm": 0.5358920693397522, "learning_rate": 0.0006485434173669468, "loss": 0.3544, "step": 12656 }, { "epoch": 7.070949720670391, "grad_norm": 0.3797658681869507, "learning_rate": 0.000648515406162465, "loss": 0.3823, "step": 12657 }, { "epoch": 7.071508379888268, "grad_norm": 0.5499960780143738, "learning_rate": 0.0006484873949579833, "loss": 0.4706, "step": 12658 }, { "epoch": 7.0720670391061455, "grad_norm": 0.6061813831329346, "learning_rate": 0.0006484593837535014, "loss": 0.4559, "step": 12659 }, { "epoch": 7.072625698324022, "grad_norm": 6.584712982177734, "learning_rate": 0.0006484313725490196, "loss": 0.5266, "step": 12660 }, { "epoch": 7.0731843575419, "grad_norm": 0.5736716389656067, "learning_rate": 0.0006484033613445378, "loss": 0.5633, "step": 12661 }, { "epoch": 7.073743016759776, "grad_norm": 0.38336530327796936, "learning_rate": 0.000648375350140056, "loss": 0.3396, "step": 12662 }, { "epoch": 7.074301675977654, "grad_norm": 0.6666037440299988, "learning_rate": 0.0006483473389355743, "loss": 0.4675, "step": 12663 }, { "epoch": 7.074860335195531, "grad_norm": 0.5090362429618835, "learning_rate": 0.0006483193277310924, "loss": 0.4982, "step": 12664 }, { "epoch": 7.075418994413408, "grad_norm": 0.5086391568183899, "learning_rate": 0.0006482913165266106, "loss": 0.4555, "step": 12665 }, { "epoch": 7.075977653631285, "grad_norm": 0.46071144938468933, "learning_rate": 0.0006482633053221289, "loss": 0.3868, "step": 12666 }, { "epoch": 7.076536312849162, "grad_norm": 0.47882330417633057, "learning_rate": 0.0006482352941176471, "loss": 0.421, "step": 12667 }, { "epoch": 7.077094972067039, "grad_norm": 0.6279311776161194, "learning_rate": 0.0006482072829131654, "loss": 0.4989, "step": 12668 }, { "epoch": 7.0776536312849165, "grad_norm": 0.6202947497367859, "learning_rate": 0.0006481792717086835, "loss": 0.5398, "step": 12669 }, { "epoch": 7.078212290502793, "grad_norm": 0.5088551044464111, "learning_rate": 0.0006481512605042017, "loss": 0.3737, "step": 12670 }, { "epoch": 7.078770949720671, "grad_norm": 0.7917990684509277, "learning_rate": 0.0006481232492997199, "loss": 0.4523, "step": 12671 }, { "epoch": 7.079329608938547, "grad_norm": 1.0711537599563599, "learning_rate": 0.0006480952380952381, "loss": 0.4611, "step": 12672 }, { "epoch": 7.079888268156425, "grad_norm": 0.6013180017471313, "learning_rate": 0.0006480672268907564, "loss": 0.5376, "step": 12673 }, { "epoch": 7.080446927374302, "grad_norm": 0.7891436219215393, "learning_rate": 0.0006480392156862746, "loss": 0.4557, "step": 12674 }, { "epoch": 7.081005586592179, "grad_norm": 0.5295562148094177, "learning_rate": 0.0006480112044817927, "loss": 0.5685, "step": 12675 }, { "epoch": 7.081564245810056, "grad_norm": 0.5940903425216675, "learning_rate": 0.0006479831932773109, "loss": 0.4283, "step": 12676 }, { "epoch": 7.082122905027933, "grad_norm": 0.4647383689880371, "learning_rate": 0.0006479551820728291, "loss": 0.5954, "step": 12677 }, { "epoch": 7.08268156424581, "grad_norm": 0.7529733777046204, "learning_rate": 0.0006479271708683474, "loss": 0.3366, "step": 12678 }, { "epoch": 7.0832402234636875, "grad_norm": 0.4236205816268921, "learning_rate": 0.0006478991596638656, "loss": 0.4257, "step": 12679 }, { "epoch": 7.083798882681564, "grad_norm": 0.44986915588378906, "learning_rate": 0.0006478711484593837, "loss": 0.4193, "step": 12680 }, { "epoch": 7.084357541899442, "grad_norm": 0.5581731200218201, "learning_rate": 0.0006478431372549019, "loss": 0.4689, "step": 12681 }, { "epoch": 7.084916201117318, "grad_norm": 0.4808078706264496, "learning_rate": 0.0006478151260504201, "loss": 0.4337, "step": 12682 }, { "epoch": 7.085474860335196, "grad_norm": 0.4529827833175659, "learning_rate": 0.0006477871148459385, "loss": 0.3852, "step": 12683 }, { "epoch": 7.086033519553073, "grad_norm": 0.43915534019470215, "learning_rate": 0.0006477591036414567, "loss": 0.4549, "step": 12684 }, { "epoch": 7.08659217877095, "grad_norm": 0.5764197111129761, "learning_rate": 0.0006477310924369748, "loss": 0.4775, "step": 12685 }, { "epoch": 7.087150837988827, "grad_norm": 0.7181698679924011, "learning_rate": 0.000647703081232493, "loss": 0.5242, "step": 12686 }, { "epoch": 7.087709497206704, "grad_norm": 0.47215521335601807, "learning_rate": 0.0006476750700280112, "loss": 0.3651, "step": 12687 }, { "epoch": 7.088268156424581, "grad_norm": 0.6749891638755798, "learning_rate": 0.0006476470588235295, "loss": 0.4695, "step": 12688 }, { "epoch": 7.0888268156424585, "grad_norm": 0.8477067947387695, "learning_rate": 0.0006476190476190477, "loss": 0.3801, "step": 12689 }, { "epoch": 7.089385474860335, "grad_norm": 0.43932828307151794, "learning_rate": 0.0006475910364145659, "loss": 0.4026, "step": 12690 }, { "epoch": 7.089944134078213, "grad_norm": 0.9299423694610596, "learning_rate": 0.000647563025210084, "loss": 0.4854, "step": 12691 }, { "epoch": 7.090502793296089, "grad_norm": 0.4638229012489319, "learning_rate": 0.0006475350140056022, "loss": 0.4092, "step": 12692 }, { "epoch": 7.091061452513967, "grad_norm": 0.48176753520965576, "learning_rate": 0.0006475070028011205, "loss": 0.38, "step": 12693 }, { "epoch": 7.091620111731843, "grad_norm": 0.5932209491729736, "learning_rate": 0.0006474789915966387, "loss": 0.4243, "step": 12694 }, { "epoch": 7.092178770949721, "grad_norm": 3.0513081550598145, "learning_rate": 0.0006474509803921569, "loss": 0.5551, "step": 12695 }, { "epoch": 7.092737430167598, "grad_norm": 0.3734993636608124, "learning_rate": 0.000647422969187675, "loss": 0.411, "step": 12696 }, { "epoch": 7.093296089385475, "grad_norm": 0.4572320580482483, "learning_rate": 0.0006473949579831932, "loss": 0.612, "step": 12697 }, { "epoch": 7.093854748603352, "grad_norm": 0.42675843834877014, "learning_rate": 0.0006473669467787116, "loss": 0.4259, "step": 12698 }, { "epoch": 7.094413407821229, "grad_norm": 0.4693995714187622, "learning_rate": 0.0006473389355742298, "loss": 0.4438, "step": 12699 }, { "epoch": 7.094972067039106, "grad_norm": 0.5776245594024658, "learning_rate": 0.000647310924369748, "loss": 0.6619, "step": 12700 }, { "epoch": 7.0955307262569836, "grad_norm": 0.5128140449523926, "learning_rate": 0.0006472829131652661, "loss": 0.5135, "step": 12701 }, { "epoch": 7.09608938547486, "grad_norm": 0.5361731052398682, "learning_rate": 0.0006472549019607843, "loss": 0.452, "step": 12702 }, { "epoch": 7.096648044692738, "grad_norm": 0.5004317164421082, "learning_rate": 0.0006472268907563026, "loss": 0.4145, "step": 12703 }, { "epoch": 7.097206703910614, "grad_norm": 0.4886071979999542, "learning_rate": 0.0006471988795518208, "loss": 0.4396, "step": 12704 }, { "epoch": 7.097765363128492, "grad_norm": 1.0755926370620728, "learning_rate": 0.000647170868347339, "loss": 0.536, "step": 12705 }, { "epoch": 7.098324022346369, "grad_norm": 0.4762767255306244, "learning_rate": 0.0006471428571428572, "loss": 0.4837, "step": 12706 }, { "epoch": 7.098882681564246, "grad_norm": 0.6282446980476379, "learning_rate": 0.0006471148459383753, "loss": 0.5586, "step": 12707 }, { "epoch": 7.099441340782123, "grad_norm": 0.6570466160774231, "learning_rate": 0.0006470868347338936, "loss": 0.3973, "step": 12708 }, { "epoch": 7.1, "grad_norm": 0.39744311571121216, "learning_rate": 0.0006470588235294118, "loss": 0.4994, "step": 12709 }, { "epoch": 7.100558659217877, "grad_norm": 0.48977991938591003, "learning_rate": 0.00064703081232493, "loss": 0.4068, "step": 12710 }, { "epoch": 7.1011173184357546, "grad_norm": 0.49988874793052673, "learning_rate": 0.0006470028011204482, "loss": 0.3902, "step": 12711 }, { "epoch": 7.101675977653631, "grad_norm": 0.5221667885780334, "learning_rate": 0.0006469747899159663, "loss": 0.5238, "step": 12712 }, { "epoch": 7.102234636871509, "grad_norm": 0.6152661442756653, "learning_rate": 0.0006469467787114846, "loss": 0.4372, "step": 12713 }, { "epoch": 7.102793296089385, "grad_norm": 0.5400908589363098, "learning_rate": 0.0006469187675070028, "loss": 0.4648, "step": 12714 }, { "epoch": 7.103351955307263, "grad_norm": 0.48163044452667236, "learning_rate": 0.000646890756302521, "loss": 0.4323, "step": 12715 }, { "epoch": 7.10391061452514, "grad_norm": 1.400691270828247, "learning_rate": 0.0006468627450980393, "loss": 0.4309, "step": 12716 }, { "epoch": 7.104469273743017, "grad_norm": 0.7338702082633972, "learning_rate": 0.0006468347338935574, "loss": 0.6295, "step": 12717 }, { "epoch": 7.105027932960894, "grad_norm": 0.6296538710594177, "learning_rate": 0.0006468067226890756, "loss": 0.4425, "step": 12718 }, { "epoch": 7.105586592178771, "grad_norm": 0.5258376002311707, "learning_rate": 0.0006467787114845939, "loss": 0.5288, "step": 12719 }, { "epoch": 7.106145251396648, "grad_norm": 0.6234728693962097, "learning_rate": 0.0006467507002801121, "loss": 0.3663, "step": 12720 }, { "epoch": 7.1067039106145256, "grad_norm": 1.0232789516448975, "learning_rate": 0.0006467226890756303, "loss": 0.4714, "step": 12721 }, { "epoch": 7.107262569832402, "grad_norm": 0.4057672917842865, "learning_rate": 0.0006466946778711485, "loss": 0.4586, "step": 12722 }, { "epoch": 7.10782122905028, "grad_norm": 0.6065461039543152, "learning_rate": 0.0006466666666666666, "loss": 0.457, "step": 12723 }, { "epoch": 7.108379888268156, "grad_norm": 0.6504439115524292, "learning_rate": 0.0006466386554621849, "loss": 0.411, "step": 12724 }, { "epoch": 7.108938547486034, "grad_norm": 0.537788987159729, "learning_rate": 0.0006466106442577031, "loss": 0.3954, "step": 12725 }, { "epoch": 7.10949720670391, "grad_norm": 1.1965739727020264, "learning_rate": 0.0006465826330532213, "loss": 0.4266, "step": 12726 }, { "epoch": 7.110055865921788, "grad_norm": 0.39094939827919006, "learning_rate": 0.0006465546218487395, "loss": 0.3475, "step": 12727 }, { "epoch": 7.110614525139665, "grad_norm": 0.7719897627830505, "learning_rate": 0.0006465266106442576, "loss": 0.4283, "step": 12728 }, { "epoch": 7.111173184357542, "grad_norm": 5.570230484008789, "learning_rate": 0.0006464985994397759, "loss": 0.4914, "step": 12729 }, { "epoch": 7.111731843575419, "grad_norm": 1.7925388813018799, "learning_rate": 0.0006464705882352941, "loss": 0.4616, "step": 12730 }, { "epoch": 7.112290502793296, "grad_norm": 0.33797651529312134, "learning_rate": 0.0006464425770308123, "loss": 0.3696, "step": 12731 }, { "epoch": 7.112849162011173, "grad_norm": 0.5232540369033813, "learning_rate": 0.0006464145658263306, "loss": 0.406, "step": 12732 }, { "epoch": 7.113407821229051, "grad_norm": 0.4373687505722046, "learning_rate": 0.0006463865546218487, "loss": 0.4399, "step": 12733 }, { "epoch": 7.113966480446927, "grad_norm": 0.37885135412216187, "learning_rate": 0.000646358543417367, "loss": 0.4033, "step": 12734 }, { "epoch": 7.114525139664805, "grad_norm": 0.40603572130203247, "learning_rate": 0.0006463305322128852, "loss": 0.4128, "step": 12735 }, { "epoch": 7.115083798882681, "grad_norm": 0.8822109699249268, "learning_rate": 0.0006463025210084034, "loss": 0.4754, "step": 12736 }, { "epoch": 7.115642458100559, "grad_norm": 0.5899255275726318, "learning_rate": 0.0006462745098039216, "loss": 0.4115, "step": 12737 }, { "epoch": 7.116201117318436, "grad_norm": 0.4771755039691925, "learning_rate": 0.0006462464985994398, "loss": 0.428, "step": 12738 }, { "epoch": 7.116759776536313, "grad_norm": 0.4788636267185211, "learning_rate": 0.000646218487394958, "loss": 0.4184, "step": 12739 }, { "epoch": 7.11731843575419, "grad_norm": 0.5708326101303101, "learning_rate": 0.0006461904761904762, "loss": 0.4483, "step": 12740 }, { "epoch": 7.117877094972067, "grad_norm": 0.3364965319633484, "learning_rate": 0.0006461624649859944, "loss": 0.5065, "step": 12741 }, { "epoch": 7.118435754189944, "grad_norm": 0.4152913987636566, "learning_rate": 0.0006461344537815126, "loss": 0.4076, "step": 12742 }, { "epoch": 7.118994413407822, "grad_norm": 0.4488314986228943, "learning_rate": 0.0006461064425770308, "loss": 0.4691, "step": 12743 }, { "epoch": 7.119553072625698, "grad_norm": 0.6819281578063965, "learning_rate": 0.000646078431372549, "loss": 0.5166, "step": 12744 }, { "epoch": 7.120111731843576, "grad_norm": 0.5671976208686829, "learning_rate": 0.0006460504201680672, "loss": 0.4185, "step": 12745 }, { "epoch": 7.120670391061452, "grad_norm": 0.44680994749069214, "learning_rate": 0.0006460224089635854, "loss": 0.4176, "step": 12746 }, { "epoch": 7.12122905027933, "grad_norm": 1.061310887336731, "learning_rate": 0.0006459943977591036, "loss": 0.4174, "step": 12747 }, { "epoch": 7.121787709497207, "grad_norm": 3.3708105087280273, "learning_rate": 0.0006459663865546219, "loss": 0.4232, "step": 12748 }, { "epoch": 7.122346368715084, "grad_norm": 0.43583595752716064, "learning_rate": 0.0006459383753501401, "loss": 0.5121, "step": 12749 }, { "epoch": 7.122905027932961, "grad_norm": 1.007943034172058, "learning_rate": 0.0006459103641456583, "loss": 0.5472, "step": 12750 }, { "epoch": 7.123463687150838, "grad_norm": 0.44202613830566406, "learning_rate": 0.0006458823529411765, "loss": 0.5354, "step": 12751 }, { "epoch": 7.124022346368715, "grad_norm": 0.9356262683868408, "learning_rate": 0.0006458543417366947, "loss": 0.5202, "step": 12752 }, { "epoch": 7.124581005586593, "grad_norm": 0.5274031162261963, "learning_rate": 0.0006458263305322129, "loss": 0.4766, "step": 12753 }, { "epoch": 7.125139664804469, "grad_norm": 0.6325783729553223, "learning_rate": 0.0006457983193277312, "loss": 0.4718, "step": 12754 }, { "epoch": 7.125698324022347, "grad_norm": 0.46212828159332275, "learning_rate": 0.0006457703081232493, "loss": 0.4406, "step": 12755 }, { "epoch": 7.126256983240223, "grad_norm": 0.8070359826087952, "learning_rate": 0.0006457422969187675, "loss": 0.3565, "step": 12756 }, { "epoch": 7.126815642458101, "grad_norm": 0.4731125831604004, "learning_rate": 0.0006457142857142857, "loss": 0.4638, "step": 12757 }, { "epoch": 7.127374301675978, "grad_norm": 0.5495564937591553, "learning_rate": 0.0006456862745098039, "loss": 0.4266, "step": 12758 }, { "epoch": 7.127932960893855, "grad_norm": 1.6205756664276123, "learning_rate": 0.0006456582633053222, "loss": 0.5318, "step": 12759 }, { "epoch": 7.128491620111732, "grad_norm": 0.4181329607963562, "learning_rate": 0.0006456302521008403, "loss": 0.3918, "step": 12760 }, { "epoch": 7.129050279329609, "grad_norm": 0.9298834800720215, "learning_rate": 0.0006456022408963585, "loss": 0.7562, "step": 12761 }, { "epoch": 7.129608938547486, "grad_norm": 0.4737931489944458, "learning_rate": 0.0006455742296918767, "loss": 0.3728, "step": 12762 }, { "epoch": 7.130167597765363, "grad_norm": 0.7189401984214783, "learning_rate": 0.0006455462184873949, "loss": 0.4413, "step": 12763 }, { "epoch": 7.13072625698324, "grad_norm": 0.7077210545539856, "learning_rate": 0.0006455182072829133, "loss": 0.3631, "step": 12764 }, { "epoch": 7.131284916201118, "grad_norm": 0.641346275806427, "learning_rate": 0.0006454901960784314, "loss": 0.543, "step": 12765 }, { "epoch": 7.131843575418994, "grad_norm": 0.4871518611907959, "learning_rate": 0.0006454621848739496, "loss": 0.5246, "step": 12766 }, { "epoch": 7.132402234636872, "grad_norm": 1.3887220621109009, "learning_rate": 0.0006454341736694678, "loss": 0.5868, "step": 12767 }, { "epoch": 7.132960893854748, "grad_norm": 0.6222584843635559, "learning_rate": 0.000645406162464986, "loss": 0.3746, "step": 12768 }, { "epoch": 7.133519553072626, "grad_norm": 0.5651334524154663, "learning_rate": 0.0006453781512605043, "loss": 0.4544, "step": 12769 }, { "epoch": 7.134078212290503, "grad_norm": 0.5305673480033875, "learning_rate": 0.0006453501400560225, "loss": 0.4357, "step": 12770 }, { "epoch": 7.13463687150838, "grad_norm": 0.4170058071613312, "learning_rate": 0.0006453221288515406, "loss": 0.4594, "step": 12771 }, { "epoch": 7.135195530726257, "grad_norm": 0.4718420207500458, "learning_rate": 0.0006452941176470588, "loss": 0.3915, "step": 12772 }, { "epoch": 7.135754189944134, "grad_norm": 0.33161619305610657, "learning_rate": 0.000645266106442577, "loss": 0.3861, "step": 12773 }, { "epoch": 7.136312849162011, "grad_norm": 0.7735040187835693, "learning_rate": 0.0006452380952380953, "loss": 0.5066, "step": 12774 }, { "epoch": 7.136871508379889, "grad_norm": 0.5578786134719849, "learning_rate": 0.0006452100840336135, "loss": 0.5385, "step": 12775 }, { "epoch": 7.137430167597765, "grad_norm": 0.6634144186973572, "learning_rate": 0.0006451820728291316, "loss": 0.6731, "step": 12776 }, { "epoch": 7.137988826815643, "grad_norm": 1.489732265472412, "learning_rate": 0.0006451540616246498, "loss": 0.4059, "step": 12777 }, { "epoch": 7.138547486033519, "grad_norm": 0.45307981967926025, "learning_rate": 0.000645126050420168, "loss": 0.4548, "step": 12778 }, { "epoch": 7.139106145251397, "grad_norm": 0.5672163963317871, "learning_rate": 0.0006450980392156863, "loss": 0.5295, "step": 12779 }, { "epoch": 7.139664804469274, "grad_norm": 1.4033528566360474, "learning_rate": 0.0006450700280112046, "loss": 0.4371, "step": 12780 }, { "epoch": 7.140223463687151, "grad_norm": 0.6696240901947021, "learning_rate": 0.0006450420168067226, "loss": 0.4999, "step": 12781 }, { "epoch": 7.140782122905028, "grad_norm": 0.46200865507125854, "learning_rate": 0.0006450140056022409, "loss": 0.4236, "step": 12782 }, { "epoch": 7.141340782122905, "grad_norm": 0.4872981309890747, "learning_rate": 0.0006449859943977591, "loss": 0.4379, "step": 12783 }, { "epoch": 7.141899441340782, "grad_norm": 0.39078593254089355, "learning_rate": 0.0006449579831932774, "loss": 0.3524, "step": 12784 }, { "epoch": 7.14245810055866, "grad_norm": 2.3813068866729736, "learning_rate": 0.0006449299719887956, "loss": 0.3891, "step": 12785 }, { "epoch": 7.143016759776536, "grad_norm": 0.5509335398674011, "learning_rate": 0.0006449019607843138, "loss": 0.4508, "step": 12786 }, { "epoch": 7.143575418994414, "grad_norm": 0.764968991279602, "learning_rate": 0.0006448739495798319, "loss": 0.4261, "step": 12787 }, { "epoch": 7.14413407821229, "grad_norm": 0.48758915066719055, "learning_rate": 0.0006448459383753501, "loss": 0.438, "step": 12788 }, { "epoch": 7.144692737430168, "grad_norm": 0.5093138217926025, "learning_rate": 0.0006448179271708684, "loss": 0.3542, "step": 12789 }, { "epoch": 7.145251396648045, "grad_norm": 6.327672004699707, "learning_rate": 0.0006447899159663866, "loss": 0.4273, "step": 12790 }, { "epoch": 7.145810055865922, "grad_norm": 0.8771942853927612, "learning_rate": 0.0006447619047619048, "loss": 0.3891, "step": 12791 }, { "epoch": 7.146368715083799, "grad_norm": 3.5664823055267334, "learning_rate": 0.0006447338935574229, "loss": 0.4918, "step": 12792 }, { "epoch": 7.146927374301676, "grad_norm": 0.4794231355190277, "learning_rate": 0.0006447058823529411, "loss": 0.4275, "step": 12793 }, { "epoch": 7.147486033519553, "grad_norm": 0.5505436062812805, "learning_rate": 0.0006446778711484594, "loss": 0.6091, "step": 12794 }, { "epoch": 7.148044692737431, "grad_norm": 0.41391441226005554, "learning_rate": 0.0006446498599439776, "loss": 0.4286, "step": 12795 }, { "epoch": 7.148603351955307, "grad_norm": 0.6714885234832764, "learning_rate": 0.0006446218487394958, "loss": 0.449, "step": 12796 }, { "epoch": 7.149162011173185, "grad_norm": 0.37468045949935913, "learning_rate": 0.0006445938375350139, "loss": 0.3685, "step": 12797 }, { "epoch": 7.149720670391061, "grad_norm": 0.5984861850738525, "learning_rate": 0.0006445658263305321, "loss": 0.4558, "step": 12798 }, { "epoch": 7.150279329608939, "grad_norm": 0.5259456634521484, "learning_rate": 0.0006445378151260505, "loss": 0.3493, "step": 12799 }, { "epoch": 7.150837988826815, "grad_norm": 0.5435981154441833, "learning_rate": 0.0006445098039215687, "loss": 0.4379, "step": 12800 }, { "epoch": 7.151396648044693, "grad_norm": 0.43052938580513, "learning_rate": 0.0006444817927170869, "loss": 0.4249, "step": 12801 }, { "epoch": 7.15195530726257, "grad_norm": 1.0683350563049316, "learning_rate": 0.0006444537815126051, "loss": 0.5386, "step": 12802 }, { "epoch": 7.152513966480447, "grad_norm": 1.0287319421768188, "learning_rate": 0.0006444257703081232, "loss": 0.4937, "step": 12803 }, { "epoch": 7.153072625698324, "grad_norm": 0.428891122341156, "learning_rate": 0.0006443977591036415, "loss": 0.4899, "step": 12804 }, { "epoch": 7.153631284916201, "grad_norm": 0.5163244009017944, "learning_rate": 0.0006443697478991597, "loss": 0.445, "step": 12805 }, { "epoch": 7.154189944134078, "grad_norm": 0.5456315875053406, "learning_rate": 0.0006443417366946779, "loss": 0.6807, "step": 12806 }, { "epoch": 7.154748603351956, "grad_norm": 0.6007381677627563, "learning_rate": 0.0006443137254901961, "loss": 0.4452, "step": 12807 }, { "epoch": 7.155307262569832, "grad_norm": 0.5373033881187439, "learning_rate": 0.0006442857142857142, "loss": 0.6039, "step": 12808 }, { "epoch": 7.15586592178771, "grad_norm": 0.5036280155181885, "learning_rate": 0.0006442577030812325, "loss": 0.5336, "step": 12809 }, { "epoch": 7.156424581005586, "grad_norm": 0.5868043303489685, "learning_rate": 0.0006442296918767507, "loss": 0.6274, "step": 12810 }, { "epoch": 7.156983240223464, "grad_norm": 0.5793883204460144, "learning_rate": 0.0006442016806722689, "loss": 0.4187, "step": 12811 }, { "epoch": 7.157541899441341, "grad_norm": 0.571361243724823, "learning_rate": 0.0006441736694677871, "loss": 0.471, "step": 12812 }, { "epoch": 7.158100558659218, "grad_norm": 0.5304030179977417, "learning_rate": 0.0006441456582633052, "loss": 0.5783, "step": 12813 }, { "epoch": 7.158659217877095, "grad_norm": 1.6864985227584839, "learning_rate": 0.0006441176470588236, "loss": 0.4137, "step": 12814 }, { "epoch": 7.159217877094972, "grad_norm": 0.5903187394142151, "learning_rate": 0.0006440896358543418, "loss": 0.4528, "step": 12815 }, { "epoch": 7.159776536312849, "grad_norm": 1.0610902309417725, "learning_rate": 0.00064406162464986, "loss": 0.4505, "step": 12816 }, { "epoch": 7.160335195530727, "grad_norm": 0.8419244289398193, "learning_rate": 0.0006440336134453782, "loss": 0.4472, "step": 12817 }, { "epoch": 7.160893854748603, "grad_norm": 0.44933366775512695, "learning_rate": 0.0006440056022408964, "loss": 0.4482, "step": 12818 }, { "epoch": 7.161452513966481, "grad_norm": 0.6411414742469788, "learning_rate": 0.0006439775910364146, "loss": 0.3909, "step": 12819 }, { "epoch": 7.162011173184357, "grad_norm": 0.7263995409011841, "learning_rate": 0.0006439495798319328, "loss": 0.5136, "step": 12820 }, { "epoch": 7.162569832402235, "grad_norm": 0.5708878636360168, "learning_rate": 0.000643921568627451, "loss": 0.3808, "step": 12821 }, { "epoch": 7.163128491620112, "grad_norm": 0.3861788213253021, "learning_rate": 0.0006438935574229692, "loss": 0.4345, "step": 12822 }, { "epoch": 7.163687150837989, "grad_norm": 0.45457369089126587, "learning_rate": 0.0006438655462184874, "loss": 0.2889, "step": 12823 }, { "epoch": 7.164245810055866, "grad_norm": 0.5887379050254822, "learning_rate": 0.0006438375350140056, "loss": 0.7333, "step": 12824 }, { "epoch": 7.164804469273743, "grad_norm": 1.1495230197906494, "learning_rate": 0.0006438095238095238, "loss": 0.4128, "step": 12825 }, { "epoch": 7.16536312849162, "grad_norm": 0.3804246187210083, "learning_rate": 0.000643781512605042, "loss": 0.4147, "step": 12826 }, { "epoch": 7.165921787709498, "grad_norm": 0.4984986186027527, "learning_rate": 0.0006437535014005602, "loss": 0.4712, "step": 12827 }, { "epoch": 7.166480446927374, "grad_norm": 0.44855397939682007, "learning_rate": 0.0006437254901960784, "loss": 0.3908, "step": 12828 }, { "epoch": 7.167039106145252, "grad_norm": 0.6057441234588623, "learning_rate": 0.0006436974789915966, "loss": 0.3828, "step": 12829 }, { "epoch": 7.167597765363128, "grad_norm": 4.7428107261657715, "learning_rate": 0.0006436694677871149, "loss": 0.3937, "step": 12830 }, { "epoch": 7.168156424581006, "grad_norm": 1.20963454246521, "learning_rate": 0.0006436414565826331, "loss": 0.5028, "step": 12831 }, { "epoch": 7.168715083798883, "grad_norm": 0.7283357381820679, "learning_rate": 0.0006436134453781513, "loss": 0.5822, "step": 12832 }, { "epoch": 7.16927374301676, "grad_norm": 0.6972780227661133, "learning_rate": 0.0006435854341736695, "loss": 0.4718, "step": 12833 }, { "epoch": 7.169832402234637, "grad_norm": 0.5511775016784668, "learning_rate": 0.0006435574229691878, "loss": 0.3353, "step": 12834 }, { "epoch": 7.170391061452514, "grad_norm": 0.4870832860469818, "learning_rate": 0.0006435294117647059, "loss": 0.5524, "step": 12835 }, { "epoch": 7.170949720670391, "grad_norm": 0.4437590539455414, "learning_rate": 0.0006435014005602241, "loss": 0.3883, "step": 12836 }, { "epoch": 7.171508379888268, "grad_norm": 0.5412284135818481, "learning_rate": 0.0006434733893557423, "loss": 0.4194, "step": 12837 }, { "epoch": 7.172067039106145, "grad_norm": 1.660475492477417, "learning_rate": 0.0006434453781512605, "loss": 0.4, "step": 12838 }, { "epoch": 7.172625698324023, "grad_norm": 0.43075165152549744, "learning_rate": 0.0006434173669467788, "loss": 0.3833, "step": 12839 }, { "epoch": 7.173184357541899, "grad_norm": 0.39243221282958984, "learning_rate": 0.0006433893557422969, "loss": 0.4517, "step": 12840 }, { "epoch": 7.173743016759777, "grad_norm": 0.5009211897850037, "learning_rate": 0.0006433613445378151, "loss": 0.4029, "step": 12841 }, { "epoch": 7.174301675977653, "grad_norm": 0.5243275761604309, "learning_rate": 0.0006433333333333333, "loss": 0.5026, "step": 12842 }, { "epoch": 7.174860335195531, "grad_norm": 0.4553791880607605, "learning_rate": 0.0006433053221288515, "loss": 0.4587, "step": 12843 }, { "epoch": 7.175418994413408, "grad_norm": 0.464993417263031, "learning_rate": 0.0006432773109243698, "loss": 0.383, "step": 12844 }, { "epoch": 7.175977653631285, "grad_norm": 0.6442744135856628, "learning_rate": 0.0006432492997198879, "loss": 0.4961, "step": 12845 }, { "epoch": 7.176536312849162, "grad_norm": 0.45668184757232666, "learning_rate": 0.0006432212885154061, "loss": 0.3952, "step": 12846 }, { "epoch": 7.177094972067039, "grad_norm": 0.6154555082321167, "learning_rate": 0.0006431932773109244, "loss": 0.3509, "step": 12847 }, { "epoch": 7.177653631284916, "grad_norm": 0.5299314856529236, "learning_rate": 0.0006431652661064426, "loss": 0.4692, "step": 12848 }, { "epoch": 7.178212290502794, "grad_norm": 0.41880351305007935, "learning_rate": 0.0006431372549019609, "loss": 0.4731, "step": 12849 }, { "epoch": 7.17877094972067, "grad_norm": 0.7551229000091553, "learning_rate": 0.0006431092436974791, "loss": 0.6476, "step": 12850 }, { "epoch": 7.179329608938548, "grad_norm": 0.5173777341842651, "learning_rate": 0.0006430812324929972, "loss": 0.4406, "step": 12851 }, { "epoch": 7.179888268156424, "grad_norm": 0.45377928018569946, "learning_rate": 0.0006430532212885154, "loss": 0.5003, "step": 12852 }, { "epoch": 7.180446927374302, "grad_norm": 1.4140952825546265, "learning_rate": 0.0006430252100840336, "loss": 0.4726, "step": 12853 }, { "epoch": 7.181005586592179, "grad_norm": 0.6744934320449829, "learning_rate": 0.0006429971988795519, "loss": 0.611, "step": 12854 }, { "epoch": 7.181564245810056, "grad_norm": 0.7543616890907288, "learning_rate": 0.0006429691876750701, "loss": 0.4849, "step": 12855 }, { "epoch": 7.182122905027933, "grad_norm": 0.382301390171051, "learning_rate": 0.0006429411764705882, "loss": 0.3688, "step": 12856 }, { "epoch": 7.18268156424581, "grad_norm": 0.652662456035614, "learning_rate": 0.0006429131652661064, "loss": 0.4044, "step": 12857 }, { "epoch": 7.183240223463687, "grad_norm": 0.4762151539325714, "learning_rate": 0.0006428851540616246, "loss": 0.4239, "step": 12858 }, { "epoch": 7.183798882681565, "grad_norm": 0.6116922497749329, "learning_rate": 0.0006428571428571429, "loss": 0.3874, "step": 12859 }, { "epoch": 7.184357541899441, "grad_norm": 0.5512605309486389, "learning_rate": 0.0006428291316526611, "loss": 0.4232, "step": 12860 }, { "epoch": 7.184916201117319, "grad_norm": 1.3778985738754272, "learning_rate": 0.0006428011204481792, "loss": 0.4114, "step": 12861 }, { "epoch": 7.185474860335195, "grad_norm": 0.4655029773712158, "learning_rate": 0.0006427731092436974, "loss": 0.3501, "step": 12862 }, { "epoch": 7.186033519553073, "grad_norm": 0.43497544527053833, "learning_rate": 0.0006427450980392156, "loss": 0.4285, "step": 12863 }, { "epoch": 7.18659217877095, "grad_norm": 0.6692568063735962, "learning_rate": 0.000642717086834734, "loss": 0.4947, "step": 12864 }, { "epoch": 7.187150837988827, "grad_norm": 0.42030754685401917, "learning_rate": 0.0006426890756302522, "loss": 0.3645, "step": 12865 }, { "epoch": 7.187709497206704, "grad_norm": 0.5105885863304138, "learning_rate": 0.0006426610644257704, "loss": 0.4794, "step": 12866 }, { "epoch": 7.188268156424581, "grad_norm": 0.43217065930366516, "learning_rate": 0.0006426330532212885, "loss": 0.3949, "step": 12867 }, { "epoch": 7.188826815642458, "grad_norm": 0.5292461514472961, "learning_rate": 0.0006426050420168067, "loss": 0.4397, "step": 12868 }, { "epoch": 7.189385474860336, "grad_norm": 0.635705828666687, "learning_rate": 0.000642577030812325, "loss": 0.4063, "step": 12869 }, { "epoch": 7.189944134078212, "grad_norm": 0.49821507930755615, "learning_rate": 0.0006425490196078432, "loss": 0.4543, "step": 12870 }, { "epoch": 7.19050279329609, "grad_norm": 1.5830234289169312, "learning_rate": 0.0006425210084033614, "loss": 0.5662, "step": 12871 }, { "epoch": 7.191061452513966, "grad_norm": 0.7206829190254211, "learning_rate": 0.0006424929971988795, "loss": 0.3783, "step": 12872 }, { "epoch": 7.191620111731844, "grad_norm": 0.4381575584411621, "learning_rate": 0.0006424649859943977, "loss": 0.5115, "step": 12873 }, { "epoch": 7.19217877094972, "grad_norm": 0.5439664125442505, "learning_rate": 0.000642436974789916, "loss": 0.4643, "step": 12874 }, { "epoch": 7.192737430167598, "grad_norm": 0.6580325961112976, "learning_rate": 0.0006424089635854342, "loss": 0.5256, "step": 12875 }, { "epoch": 7.193296089385475, "grad_norm": 0.7051538825035095, "learning_rate": 0.0006423809523809524, "loss": 0.4853, "step": 12876 }, { "epoch": 7.193854748603352, "grad_norm": 0.4369136393070221, "learning_rate": 0.0006423529411764705, "loss": 0.5359, "step": 12877 }, { "epoch": 7.194413407821229, "grad_norm": 0.7481156587600708, "learning_rate": 0.0006423249299719887, "loss": 0.4422, "step": 12878 }, { "epoch": 7.194972067039106, "grad_norm": 0.41737329959869385, "learning_rate": 0.000642296918767507, "loss": 0.503, "step": 12879 }, { "epoch": 7.195530726256983, "grad_norm": 0.5005262494087219, "learning_rate": 0.0006422689075630253, "loss": 0.4643, "step": 12880 }, { "epoch": 7.196089385474861, "grad_norm": 0.4186493456363678, "learning_rate": 0.0006422408963585435, "loss": 0.3752, "step": 12881 }, { "epoch": 7.196648044692737, "grad_norm": 0.7866208553314209, "learning_rate": 0.0006422128851540617, "loss": 0.3106, "step": 12882 }, { "epoch": 7.197206703910615, "grad_norm": 4.869137287139893, "learning_rate": 0.0006421848739495798, "loss": 0.4292, "step": 12883 }, { "epoch": 7.197765363128491, "grad_norm": 0.4108200967311859, "learning_rate": 0.0006421568627450981, "loss": 0.4045, "step": 12884 }, { "epoch": 7.198324022346369, "grad_norm": 0.6972508430480957, "learning_rate": 0.0006421288515406163, "loss": 0.3792, "step": 12885 }, { "epoch": 7.198882681564246, "grad_norm": 0.4358613193035126, "learning_rate": 0.0006421008403361345, "loss": 0.4711, "step": 12886 }, { "epoch": 7.199441340782123, "grad_norm": 0.5593037009239197, "learning_rate": 0.0006420728291316527, "loss": 0.4848, "step": 12887 }, { "epoch": 7.2, "grad_norm": 0.5414353013038635, "learning_rate": 0.0006420448179271708, "loss": 0.3897, "step": 12888 }, { "epoch": 7.200558659217877, "grad_norm": 0.6617635488510132, "learning_rate": 0.0006420168067226891, "loss": 0.5381, "step": 12889 }, { "epoch": 7.201117318435754, "grad_norm": 0.6181525588035583, "learning_rate": 0.0006419887955182073, "loss": 0.4574, "step": 12890 }, { "epoch": 7.201675977653632, "grad_norm": 0.44847801327705383, "learning_rate": 0.0006419607843137255, "loss": 0.3639, "step": 12891 }, { "epoch": 7.202234636871508, "grad_norm": 0.5697240829467773, "learning_rate": 0.0006419327731092437, "loss": 0.3302, "step": 12892 }, { "epoch": 7.202793296089386, "grad_norm": 0.45044589042663574, "learning_rate": 0.0006419047619047618, "loss": 0.3428, "step": 12893 }, { "epoch": 7.203351955307262, "grad_norm": 0.4011487662792206, "learning_rate": 0.0006418767507002801, "loss": 0.4987, "step": 12894 }, { "epoch": 7.20391061452514, "grad_norm": 4.3708086013793945, "learning_rate": 0.0006418487394957983, "loss": 0.5639, "step": 12895 }, { "epoch": 7.204469273743017, "grad_norm": 0.4025447964668274, "learning_rate": 0.0006418207282913166, "loss": 0.358, "step": 12896 }, { "epoch": 7.205027932960894, "grad_norm": 1.2090263366699219, "learning_rate": 0.0006417927170868348, "loss": 0.3927, "step": 12897 }, { "epoch": 7.205586592178771, "grad_norm": 0.47650420665740967, "learning_rate": 0.000641764705882353, "loss": 0.4537, "step": 12898 }, { "epoch": 7.206145251396648, "grad_norm": 0.48592033982276917, "learning_rate": 0.0006417366946778712, "loss": 0.4952, "step": 12899 }, { "epoch": 7.206703910614525, "grad_norm": 0.41837751865386963, "learning_rate": 0.0006417086834733894, "loss": 0.361, "step": 12900 }, { "epoch": 7.207262569832403, "grad_norm": 0.5799628496170044, "learning_rate": 0.0006416806722689076, "loss": 0.4705, "step": 12901 }, { "epoch": 7.207821229050279, "grad_norm": 0.5551765561103821, "learning_rate": 0.0006416526610644258, "loss": 0.3652, "step": 12902 }, { "epoch": 7.208379888268157, "grad_norm": 0.6752025485038757, "learning_rate": 0.000641624649859944, "loss": 0.5995, "step": 12903 }, { "epoch": 7.208938547486033, "grad_norm": 0.70978182554245, "learning_rate": 0.0006415966386554622, "loss": 0.4445, "step": 12904 }, { "epoch": 7.209497206703911, "grad_norm": 0.6277596950531006, "learning_rate": 0.0006415686274509804, "loss": 0.4324, "step": 12905 }, { "epoch": 7.210055865921788, "grad_norm": 0.3992590308189392, "learning_rate": 0.0006415406162464986, "loss": 0.395, "step": 12906 }, { "epoch": 7.210614525139665, "grad_norm": 0.6173178553581238, "learning_rate": 0.0006415126050420168, "loss": 0.4062, "step": 12907 }, { "epoch": 7.211173184357542, "grad_norm": 0.3988576829433441, "learning_rate": 0.000641484593837535, "loss": 0.4263, "step": 12908 }, { "epoch": 7.211731843575419, "grad_norm": 0.5829341411590576, "learning_rate": 0.0006414565826330533, "loss": 0.4291, "step": 12909 }, { "epoch": 7.212290502793296, "grad_norm": 0.5598450899124146, "learning_rate": 0.0006414285714285714, "loss": 0.394, "step": 12910 }, { "epoch": 7.212849162011173, "grad_norm": 0.5120077133178711, "learning_rate": 0.0006414005602240896, "loss": 0.3612, "step": 12911 }, { "epoch": 7.21340782122905, "grad_norm": 0.9902352690696716, "learning_rate": 0.0006413725490196079, "loss": 0.4317, "step": 12912 }, { "epoch": 7.213966480446928, "grad_norm": 0.5374852418899536, "learning_rate": 0.0006413445378151261, "loss": 0.4455, "step": 12913 }, { "epoch": 7.214525139664804, "grad_norm": 0.5447959899902344, "learning_rate": 0.0006413165266106444, "loss": 0.3945, "step": 12914 }, { "epoch": 7.215083798882682, "grad_norm": 0.6012040376663208, "learning_rate": 0.0006412885154061625, "loss": 0.3984, "step": 12915 }, { "epoch": 7.215642458100558, "grad_norm": 0.4044865369796753, "learning_rate": 0.0006412605042016807, "loss": 0.3964, "step": 12916 }, { "epoch": 7.216201117318436, "grad_norm": 0.9325155019760132, "learning_rate": 0.0006412324929971989, "loss": 0.3636, "step": 12917 }, { "epoch": 7.216759776536313, "grad_norm": 0.5127488970756531, "learning_rate": 0.0006412044817927171, "loss": 0.4919, "step": 12918 }, { "epoch": 7.21731843575419, "grad_norm": 1.1355258226394653, "learning_rate": 0.0006411764705882354, "loss": 0.3927, "step": 12919 }, { "epoch": 7.217877094972067, "grad_norm": 0.4683167636394501, "learning_rate": 0.0006411484593837535, "loss": 0.4414, "step": 12920 }, { "epoch": 7.218435754189944, "grad_norm": 1.3356767892837524, "learning_rate": 0.0006411204481792717, "loss": 0.5558, "step": 12921 }, { "epoch": 7.218994413407821, "grad_norm": 0.392017126083374, "learning_rate": 0.0006410924369747899, "loss": 0.3589, "step": 12922 }, { "epoch": 7.219553072625699, "grad_norm": 0.49749869108200073, "learning_rate": 0.0006410644257703081, "loss": 0.3816, "step": 12923 }, { "epoch": 7.220111731843575, "grad_norm": 0.7638598680496216, "learning_rate": 0.0006410364145658264, "loss": 0.447, "step": 12924 }, { "epoch": 7.220670391061453, "grad_norm": 0.4961168169975281, "learning_rate": 0.0006410084033613446, "loss": 0.3948, "step": 12925 }, { "epoch": 7.221229050279329, "grad_norm": 0.6985642910003662, "learning_rate": 0.0006409803921568627, "loss": 0.3327, "step": 12926 }, { "epoch": 7.221787709497207, "grad_norm": 0.8201794028282166, "learning_rate": 0.0006409523809523809, "loss": 0.5289, "step": 12927 }, { "epoch": 7.222346368715084, "grad_norm": 0.45156192779541016, "learning_rate": 0.0006409243697478991, "loss": 0.4555, "step": 12928 }, { "epoch": 7.222905027932961, "grad_norm": 0.5104547739028931, "learning_rate": 0.0006408963585434175, "loss": 0.5608, "step": 12929 }, { "epoch": 7.223463687150838, "grad_norm": 0.9331592321395874, "learning_rate": 0.0006408683473389357, "loss": 0.4529, "step": 12930 }, { "epoch": 7.224022346368715, "grad_norm": 0.44113239645957947, "learning_rate": 0.0006408403361344538, "loss": 0.4393, "step": 12931 }, { "epoch": 7.224581005586592, "grad_norm": 1.6009677648544312, "learning_rate": 0.000640812324929972, "loss": 0.3751, "step": 12932 }, { "epoch": 7.22513966480447, "grad_norm": 0.6767282485961914, "learning_rate": 0.0006407843137254902, "loss": 0.4119, "step": 12933 }, { "epoch": 7.225698324022346, "grad_norm": 0.517426073551178, "learning_rate": 0.0006407563025210085, "loss": 0.4043, "step": 12934 }, { "epoch": 7.226256983240224, "grad_norm": 0.4232938587665558, "learning_rate": 0.0006407282913165267, "loss": 0.4271, "step": 12935 }, { "epoch": 7.2268156424581, "grad_norm": 0.48331397771835327, "learning_rate": 0.0006407002801120448, "loss": 0.4649, "step": 12936 }, { "epoch": 7.227374301675978, "grad_norm": 0.41168758273124695, "learning_rate": 0.000640672268907563, "loss": 0.4445, "step": 12937 }, { "epoch": 7.227932960893855, "grad_norm": 3.627685785293579, "learning_rate": 0.0006406442577030812, "loss": 0.4475, "step": 12938 }, { "epoch": 7.228491620111732, "grad_norm": 0.4270423650741577, "learning_rate": 0.0006406162464985994, "loss": 0.4504, "step": 12939 }, { "epoch": 7.229050279329609, "grad_norm": 0.416037917137146, "learning_rate": 0.0006405882352941177, "loss": 0.5165, "step": 12940 }, { "epoch": 7.229608938547486, "grad_norm": 0.5177565217018127, "learning_rate": 0.0006405602240896359, "loss": 0.4384, "step": 12941 }, { "epoch": 7.230167597765363, "grad_norm": 0.6901390552520752, "learning_rate": 0.000640532212885154, "loss": 0.4848, "step": 12942 }, { "epoch": 7.230726256983241, "grad_norm": 0.45817461609840393, "learning_rate": 0.0006405042016806722, "loss": 0.3719, "step": 12943 }, { "epoch": 7.231284916201117, "grad_norm": 0.4559125602245331, "learning_rate": 0.0006404761904761904, "loss": 0.3874, "step": 12944 }, { "epoch": 7.231843575418995, "grad_norm": 0.7202390432357788, "learning_rate": 0.0006404481792717088, "loss": 0.4825, "step": 12945 }, { "epoch": 7.232402234636871, "grad_norm": 0.42777398228645325, "learning_rate": 0.000640420168067227, "loss": 0.3962, "step": 12946 }, { "epoch": 7.232960893854749, "grad_norm": 0.43763643503189087, "learning_rate": 0.0006403921568627451, "loss": 0.3618, "step": 12947 }, { "epoch": 7.233519553072625, "grad_norm": 0.5025946497917175, "learning_rate": 0.0006403641456582633, "loss": 0.5123, "step": 12948 }, { "epoch": 7.234078212290503, "grad_norm": 0.6518982648849487, "learning_rate": 0.0006403361344537815, "loss": 0.5455, "step": 12949 }, { "epoch": 7.23463687150838, "grad_norm": 0.6964249610900879, "learning_rate": 0.0006403081232492998, "loss": 0.4437, "step": 12950 }, { "epoch": 7.235195530726257, "grad_norm": 0.5587266087532043, "learning_rate": 0.000640280112044818, "loss": 0.4159, "step": 12951 }, { "epoch": 7.235754189944134, "grad_norm": 0.6424814462661743, "learning_rate": 0.0006402521008403361, "loss": 0.4672, "step": 12952 }, { "epoch": 7.236312849162011, "grad_norm": 0.5570558905601501, "learning_rate": 0.0006402240896358543, "loss": 0.4099, "step": 12953 }, { "epoch": 7.236871508379888, "grad_norm": 0.49858906865119934, "learning_rate": 0.0006401960784313725, "loss": 0.4288, "step": 12954 }, { "epoch": 7.237430167597766, "grad_norm": 0.7407189607620239, "learning_rate": 0.0006401680672268908, "loss": 0.3844, "step": 12955 }, { "epoch": 7.237988826815642, "grad_norm": 0.4942444860935211, "learning_rate": 0.000640140056022409, "loss": 0.414, "step": 12956 }, { "epoch": 7.23854748603352, "grad_norm": 0.5886725187301636, "learning_rate": 0.0006401120448179272, "loss": 0.396, "step": 12957 }, { "epoch": 7.239106145251396, "grad_norm": 0.6650775671005249, "learning_rate": 0.0006400840336134453, "loss": 0.3157, "step": 12958 }, { "epoch": 7.239664804469274, "grad_norm": 0.7435926795005798, "learning_rate": 0.0006400560224089635, "loss": 0.5349, "step": 12959 }, { "epoch": 7.240223463687151, "grad_norm": 0.4443039000034332, "learning_rate": 0.0006400280112044818, "loss": 0.4573, "step": 12960 }, { "epoch": 7.240782122905028, "grad_norm": 0.5859401822090149, "learning_rate": 0.00064, "loss": 0.4784, "step": 12961 }, { "epoch": 7.241340782122905, "grad_norm": 1.9100741147994995, "learning_rate": 0.0006399719887955183, "loss": 0.6601, "step": 12962 }, { "epoch": 7.241899441340782, "grad_norm": 0.8647416830062866, "learning_rate": 0.0006399439775910364, "loss": 0.4608, "step": 12963 }, { "epoch": 7.242458100558659, "grad_norm": 0.4086284637451172, "learning_rate": 0.0006399159663865546, "loss": 0.4334, "step": 12964 }, { "epoch": 7.243016759776537, "grad_norm": 0.4809357225894928, "learning_rate": 0.0006398879551820729, "loss": 0.3844, "step": 12965 }, { "epoch": 7.243575418994413, "grad_norm": 0.4752311110496521, "learning_rate": 0.0006398599439775911, "loss": 0.3657, "step": 12966 }, { "epoch": 7.244134078212291, "grad_norm": 0.6342294812202454, "learning_rate": 0.0006398319327731093, "loss": 0.4687, "step": 12967 }, { "epoch": 7.244692737430167, "grad_norm": 0.5234696865081787, "learning_rate": 0.0006398039215686274, "loss": 0.3994, "step": 12968 }, { "epoch": 7.245251396648045, "grad_norm": 0.7517098784446716, "learning_rate": 0.0006397759103641456, "loss": 0.5198, "step": 12969 }, { "epoch": 7.245810055865922, "grad_norm": 0.44918322563171387, "learning_rate": 0.0006397478991596639, "loss": 0.4208, "step": 12970 }, { "epoch": 7.246368715083799, "grad_norm": 0.46741700172424316, "learning_rate": 0.0006397198879551821, "loss": 0.4292, "step": 12971 }, { "epoch": 7.246927374301676, "grad_norm": 0.48479223251342773, "learning_rate": 0.0006396918767507003, "loss": 0.3691, "step": 12972 }, { "epoch": 7.247486033519553, "grad_norm": 0.8295164704322815, "learning_rate": 0.0006396638655462185, "loss": 0.5168, "step": 12973 }, { "epoch": 7.24804469273743, "grad_norm": 0.5968831181526184, "learning_rate": 0.0006396358543417366, "loss": 0.5099, "step": 12974 }, { "epoch": 7.248603351955307, "grad_norm": 0.5067930817604065, "learning_rate": 0.0006396078431372549, "loss": 0.5719, "step": 12975 }, { "epoch": 7.249162011173184, "grad_norm": 0.9654607176780701, "learning_rate": 0.0006395798319327731, "loss": 0.4232, "step": 12976 }, { "epoch": 7.249720670391062, "grad_norm": 0.3738797605037689, "learning_rate": 0.0006395518207282913, "loss": 0.3812, "step": 12977 }, { "epoch": 7.250279329608938, "grad_norm": 0.4697604775428772, "learning_rate": 0.0006395238095238096, "loss": 0.4742, "step": 12978 }, { "epoch": 7.250837988826816, "grad_norm": 0.693396270275116, "learning_rate": 0.0006394957983193277, "loss": 0.5476, "step": 12979 }, { "epoch": 7.251396648044693, "grad_norm": 0.6955320239067078, "learning_rate": 0.000639467787114846, "loss": 0.3677, "step": 12980 }, { "epoch": 7.25195530726257, "grad_norm": 0.36686623096466064, "learning_rate": 0.0006394397759103642, "loss": 0.4218, "step": 12981 }, { "epoch": 7.252513966480447, "grad_norm": 0.45032379031181335, "learning_rate": 0.0006394117647058824, "loss": 0.4203, "step": 12982 }, { "epoch": 7.253072625698324, "grad_norm": 0.5395963788032532, "learning_rate": 0.0006393837535014006, "loss": 0.4838, "step": 12983 }, { "epoch": 7.253631284916201, "grad_norm": 0.4786473214626312, "learning_rate": 0.0006393557422969187, "loss": 0.5148, "step": 12984 }, { "epoch": 7.254189944134078, "grad_norm": 0.4348624050617218, "learning_rate": 0.000639327731092437, "loss": 0.4096, "step": 12985 }, { "epoch": 7.254748603351955, "grad_norm": 0.562849760055542, "learning_rate": 0.0006392997198879552, "loss": 0.4654, "step": 12986 }, { "epoch": 7.255307262569833, "grad_norm": 0.502211332321167, "learning_rate": 0.0006392717086834734, "loss": 0.477, "step": 12987 }, { "epoch": 7.255865921787709, "grad_norm": 0.6770154237747192, "learning_rate": 0.0006392436974789916, "loss": 0.6185, "step": 12988 }, { "epoch": 7.256424581005587, "grad_norm": 0.43307065963745117, "learning_rate": 0.0006392156862745098, "loss": 0.5102, "step": 12989 }, { "epoch": 7.256983240223463, "grad_norm": 1.998072862625122, "learning_rate": 0.000639187675070028, "loss": 0.4388, "step": 12990 }, { "epoch": 7.257541899441341, "grad_norm": 0.5020814538002014, "learning_rate": 0.0006391596638655462, "loss": 0.4729, "step": 12991 }, { "epoch": 7.258100558659218, "grad_norm": 0.5628366470336914, "learning_rate": 0.0006391316526610644, "loss": 0.4677, "step": 12992 }, { "epoch": 7.258659217877095, "grad_norm": 1.2835266590118408, "learning_rate": 0.0006391036414565826, "loss": 0.4214, "step": 12993 }, { "epoch": 7.259217877094972, "grad_norm": 0.6612874269485474, "learning_rate": 0.0006390756302521009, "loss": 0.4315, "step": 12994 }, { "epoch": 7.259776536312849, "grad_norm": 0.4183623790740967, "learning_rate": 0.0006390476190476191, "loss": 0.4478, "step": 12995 }, { "epoch": 7.260335195530726, "grad_norm": 0.44214096665382385, "learning_rate": 0.0006390196078431373, "loss": 0.3958, "step": 12996 }, { "epoch": 7.260893854748604, "grad_norm": 0.44483283162117004, "learning_rate": 0.0006389915966386555, "loss": 0.5137, "step": 12997 }, { "epoch": 7.26145251396648, "grad_norm": 1.0209646224975586, "learning_rate": 0.0006389635854341737, "loss": 0.5574, "step": 12998 }, { "epoch": 7.262011173184358, "grad_norm": 0.5998567938804626, "learning_rate": 0.0006389355742296919, "loss": 0.445, "step": 12999 }, { "epoch": 7.262569832402234, "grad_norm": 0.7421895265579224, "learning_rate": 0.0006389075630252101, "loss": 0.4515, "step": 13000 }, { "epoch": 7.262569832402234, "eval_cer": 0.09136089380598562, "eval_loss": 0.34045925736427307, "eval_runtime": 55.5748, "eval_samples_per_second": 81.656, "eval_steps_per_second": 5.11, "eval_wer": 0.36032513058294463, "step": 13000 }, { "epoch": 7.263128491620112, "grad_norm": 0.39072561264038086, "learning_rate": 0.0006388795518207283, "loss": 0.4199, "step": 13001 }, { "epoch": 7.263687150837989, "grad_norm": 0.6472405791282654, "learning_rate": 0.0006388515406162465, "loss": 0.4129, "step": 13002 }, { "epoch": 7.264245810055866, "grad_norm": 0.3537490963935852, "learning_rate": 0.0006388235294117647, "loss": 0.3476, "step": 13003 }, { "epoch": 7.264804469273743, "grad_norm": 0.38289088010787964, "learning_rate": 0.0006387955182072829, "loss": 0.3555, "step": 13004 }, { "epoch": 7.26536312849162, "grad_norm": 0.6368013620376587, "learning_rate": 0.0006387675070028012, "loss": 0.4559, "step": 13005 }, { "epoch": 7.265921787709497, "grad_norm": 0.4954204857349396, "learning_rate": 0.0006387394957983193, "loss": 0.4473, "step": 13006 }, { "epoch": 7.266480446927375, "grad_norm": 0.6269132494926453, "learning_rate": 0.0006387114845938375, "loss": 0.4599, "step": 13007 }, { "epoch": 7.267039106145251, "grad_norm": 0.5162798762321472, "learning_rate": 0.0006386834733893557, "loss": 0.5593, "step": 13008 }, { "epoch": 7.267597765363129, "grad_norm": 0.4024692177772522, "learning_rate": 0.0006386554621848739, "loss": 0.3817, "step": 13009 }, { "epoch": 7.268156424581005, "grad_norm": 0.3753225803375244, "learning_rate": 0.0006386274509803923, "loss": 0.3267, "step": 13010 }, { "epoch": 7.268715083798883, "grad_norm": 0.4820145070552826, "learning_rate": 0.0006385994397759104, "loss": 0.4598, "step": 13011 }, { "epoch": 7.269273743016759, "grad_norm": 0.4811007082462311, "learning_rate": 0.0006385714285714286, "loss": 0.3824, "step": 13012 }, { "epoch": 7.269832402234637, "grad_norm": 0.6950400471687317, "learning_rate": 0.0006385434173669468, "loss": 0.374, "step": 13013 }, { "epoch": 7.270391061452514, "grad_norm": 0.5128576159477234, "learning_rate": 0.000638515406162465, "loss": 0.4784, "step": 13014 }, { "epoch": 7.270949720670391, "grad_norm": 0.3894340395927429, "learning_rate": 0.0006384873949579833, "loss": 0.4427, "step": 13015 }, { "epoch": 7.271508379888268, "grad_norm": 0.5755175948143005, "learning_rate": 0.0006384593837535014, "loss": 0.471, "step": 13016 }, { "epoch": 7.272067039106146, "grad_norm": 0.37587255239486694, "learning_rate": 0.0006384313725490196, "loss": 0.4461, "step": 13017 }, { "epoch": 7.272625698324022, "grad_norm": 0.5242875814437866, "learning_rate": 0.0006384033613445378, "loss": 0.3799, "step": 13018 }, { "epoch": 7.2731843575419, "grad_norm": 0.47452613711357117, "learning_rate": 0.000638375350140056, "loss": 0.5535, "step": 13019 }, { "epoch": 7.273743016759776, "grad_norm": 0.7593187689781189, "learning_rate": 0.0006383473389355743, "loss": 0.4142, "step": 13020 }, { "epoch": 7.274301675977654, "grad_norm": 0.4709136486053467, "learning_rate": 0.0006383193277310925, "loss": 0.4364, "step": 13021 }, { "epoch": 7.27486033519553, "grad_norm": 0.4354836940765381, "learning_rate": 0.0006382913165266106, "loss": 0.5133, "step": 13022 }, { "epoch": 7.275418994413408, "grad_norm": 0.42964091897010803, "learning_rate": 0.0006382633053221288, "loss": 0.3765, "step": 13023 }, { "epoch": 7.275977653631285, "grad_norm": 0.3984837830066681, "learning_rate": 0.000638235294117647, "loss": 0.4564, "step": 13024 }, { "epoch": 7.276536312849162, "grad_norm": 0.8874265551567078, "learning_rate": 0.0006382072829131653, "loss": 0.5248, "step": 13025 }, { "epoch": 7.277094972067039, "grad_norm": 0.7759783864021301, "learning_rate": 0.0006381792717086836, "loss": 0.4771, "step": 13026 }, { "epoch": 7.277653631284916, "grad_norm": 0.47065407037734985, "learning_rate": 0.0006381512605042016, "loss": 0.3769, "step": 13027 }, { "epoch": 7.278212290502793, "grad_norm": 0.35122138261795044, "learning_rate": 0.0006381232492997199, "loss": 0.3498, "step": 13028 }, { "epoch": 7.278770949720671, "grad_norm": 0.43810176849365234, "learning_rate": 0.0006380952380952381, "loss": 0.379, "step": 13029 }, { "epoch": 7.279329608938547, "grad_norm": 0.46714839339256287, "learning_rate": 0.0006380672268907564, "loss": 0.4172, "step": 13030 }, { "epoch": 7.279888268156425, "grad_norm": 0.45940467715263367, "learning_rate": 0.0006380392156862746, "loss": 0.4781, "step": 13031 }, { "epoch": 7.280446927374301, "grad_norm": 1.0352586507797241, "learning_rate": 0.0006380112044817927, "loss": 0.3935, "step": 13032 }, { "epoch": 7.281005586592179, "grad_norm": 0.5442858338356018, "learning_rate": 0.0006379831932773109, "loss": 0.4074, "step": 13033 }, { "epoch": 7.281564245810056, "grad_norm": 0.3928253650665283, "learning_rate": 0.0006379551820728291, "loss": 0.3953, "step": 13034 }, { "epoch": 7.282122905027933, "grad_norm": 0.6782439947128296, "learning_rate": 0.0006379271708683474, "loss": 0.5075, "step": 13035 }, { "epoch": 7.28268156424581, "grad_norm": 0.528557538986206, "learning_rate": 0.0006378991596638656, "loss": 0.4141, "step": 13036 }, { "epoch": 7.283240223463687, "grad_norm": 1.3695623874664307, "learning_rate": 0.0006378711484593838, "loss": 0.5236, "step": 13037 }, { "epoch": 7.283798882681564, "grad_norm": 0.49207592010498047, "learning_rate": 0.0006378431372549019, "loss": 0.5545, "step": 13038 }, { "epoch": 7.284357541899442, "grad_norm": 0.4578695297241211, "learning_rate": 0.0006378151260504201, "loss": 0.4887, "step": 13039 }, { "epoch": 7.284916201117318, "grad_norm": 0.5954352617263794, "learning_rate": 0.0006377871148459384, "loss": 0.3935, "step": 13040 }, { "epoch": 7.285474860335196, "grad_norm": 0.47506049275398254, "learning_rate": 0.0006377591036414566, "loss": 0.4123, "step": 13041 }, { "epoch": 7.286033519553072, "grad_norm": 0.6954988837242126, "learning_rate": 0.0006377310924369748, "loss": 0.4929, "step": 13042 }, { "epoch": 7.28659217877095, "grad_norm": 0.38984450697898865, "learning_rate": 0.0006377030812324929, "loss": 0.3274, "step": 13043 }, { "epoch": 7.287150837988827, "grad_norm": 0.6316025257110596, "learning_rate": 0.0006376750700280111, "loss": 0.4704, "step": 13044 }, { "epoch": 7.287709497206704, "grad_norm": 0.6168654561042786, "learning_rate": 0.0006376470588235295, "loss": 0.4664, "step": 13045 }, { "epoch": 7.288268156424581, "grad_norm": 0.6625655889511108, "learning_rate": 0.0006376190476190477, "loss": 0.4639, "step": 13046 }, { "epoch": 7.288826815642458, "grad_norm": 0.6546689867973328, "learning_rate": 0.0006375910364145659, "loss": 0.7042, "step": 13047 }, { "epoch": 7.289385474860335, "grad_norm": 0.4011949300765991, "learning_rate": 0.000637563025210084, "loss": 0.4428, "step": 13048 }, { "epoch": 7.289944134078212, "grad_norm": 0.7093271613121033, "learning_rate": 0.0006375350140056022, "loss": 0.6982, "step": 13049 }, { "epoch": 7.290502793296089, "grad_norm": 0.44860026240348816, "learning_rate": 0.0006375070028011205, "loss": 0.4716, "step": 13050 }, { "epoch": 7.291061452513967, "grad_norm": 0.5320126414299011, "learning_rate": 0.0006374789915966387, "loss": 0.4712, "step": 13051 }, { "epoch": 7.291620111731843, "grad_norm": 0.3747129738330841, "learning_rate": 0.0006374509803921569, "loss": 0.3464, "step": 13052 }, { "epoch": 7.292178770949721, "grad_norm": 0.5009068250656128, "learning_rate": 0.0006374229691876751, "loss": 0.4855, "step": 13053 }, { "epoch": 7.292737430167598, "grad_norm": 0.5898135900497437, "learning_rate": 0.0006373949579831932, "loss": 0.356, "step": 13054 }, { "epoch": 7.293296089385475, "grad_norm": 2.223297357559204, "learning_rate": 0.0006373669467787115, "loss": 0.5516, "step": 13055 }, { "epoch": 7.293854748603352, "grad_norm": 0.5072278380393982, "learning_rate": 0.0006373389355742297, "loss": 0.474, "step": 13056 }, { "epoch": 7.294413407821229, "grad_norm": 0.6756377220153809, "learning_rate": 0.0006373109243697479, "loss": 0.4579, "step": 13057 }, { "epoch": 7.294972067039106, "grad_norm": 1.3354785442352295, "learning_rate": 0.0006372829131652661, "loss": 0.4413, "step": 13058 }, { "epoch": 7.295530726256983, "grad_norm": 0.5601457357406616, "learning_rate": 0.0006372549019607842, "loss": 0.5884, "step": 13059 }, { "epoch": 7.29608938547486, "grad_norm": 1.3154665231704712, "learning_rate": 0.0006372268907563026, "loss": 0.4807, "step": 13060 }, { "epoch": 7.296648044692738, "grad_norm": 0.5090473294258118, "learning_rate": 0.0006371988795518208, "loss": 0.4381, "step": 13061 }, { "epoch": 7.297206703910614, "grad_norm": 0.6379081606864929, "learning_rate": 0.000637170868347339, "loss": 0.4265, "step": 13062 }, { "epoch": 7.297765363128492, "grad_norm": 0.46991169452667236, "learning_rate": 0.0006371428571428572, "loss": 0.415, "step": 13063 }, { "epoch": 7.298324022346368, "grad_norm": 0.3955645263195038, "learning_rate": 0.0006371148459383753, "loss": 0.4217, "step": 13064 }, { "epoch": 7.298882681564246, "grad_norm": 0.5086484551429749, "learning_rate": 0.0006370868347338936, "loss": 0.537, "step": 13065 }, { "epoch": 7.299441340782123, "grad_norm": 0.5588968992233276, "learning_rate": 0.0006370588235294118, "loss": 0.3502, "step": 13066 }, { "epoch": 7.3, "grad_norm": 0.5842592716217041, "learning_rate": 0.00063703081232493, "loss": 0.4281, "step": 13067 }, { "epoch": 7.300558659217877, "grad_norm": 0.5039697885513306, "learning_rate": 0.0006370028011204482, "loss": 0.4264, "step": 13068 }, { "epoch": 7.301117318435754, "grad_norm": 0.5301884412765503, "learning_rate": 0.0006369747899159664, "loss": 0.5225, "step": 13069 }, { "epoch": 7.301675977653631, "grad_norm": 0.43623143434524536, "learning_rate": 0.0006369467787114846, "loss": 0.4709, "step": 13070 }, { "epoch": 7.302234636871509, "grad_norm": 0.46189144253730774, "learning_rate": 0.0006369187675070028, "loss": 0.4573, "step": 13071 }, { "epoch": 7.302793296089385, "grad_norm": 0.38868826627731323, "learning_rate": 0.000636890756302521, "loss": 0.3842, "step": 13072 }, { "epoch": 7.303351955307263, "grad_norm": 0.46762970089912415, "learning_rate": 0.0006368627450980392, "loss": 0.4529, "step": 13073 }, { "epoch": 7.303910614525139, "grad_norm": 0.4306127429008484, "learning_rate": 0.0006368347338935574, "loss": 0.41, "step": 13074 }, { "epoch": 7.304469273743017, "grad_norm": 0.44215765595436096, "learning_rate": 0.0006368067226890756, "loss": 0.4002, "step": 13075 }, { "epoch": 7.305027932960894, "grad_norm": 0.5541375875473022, "learning_rate": 0.0006367787114845939, "loss": 0.4775, "step": 13076 }, { "epoch": 7.305586592178771, "grad_norm": 1.2680015563964844, "learning_rate": 0.0006367507002801121, "loss": 0.4617, "step": 13077 }, { "epoch": 7.306145251396648, "grad_norm": 0.707722008228302, "learning_rate": 0.0006367226890756303, "loss": 0.5097, "step": 13078 }, { "epoch": 7.306703910614525, "grad_norm": 0.5952586531639099, "learning_rate": 0.0006366946778711485, "loss": 0.4845, "step": 13079 }, { "epoch": 7.307262569832402, "grad_norm": 0.7303816080093384, "learning_rate": 0.0006366666666666667, "loss": 0.4522, "step": 13080 }, { "epoch": 7.30782122905028, "grad_norm": 0.4877987504005432, "learning_rate": 0.0006366386554621849, "loss": 0.3812, "step": 13081 }, { "epoch": 7.308379888268156, "grad_norm": 10.018289566040039, "learning_rate": 0.0006366106442577031, "loss": 0.4897, "step": 13082 }, { "epoch": 7.308938547486034, "grad_norm": 2.4549496173858643, "learning_rate": 0.0006365826330532213, "loss": 0.3864, "step": 13083 }, { "epoch": 7.30949720670391, "grad_norm": 0.6826034784317017, "learning_rate": 0.0006365546218487395, "loss": 0.5868, "step": 13084 }, { "epoch": 7.310055865921788, "grad_norm": 0.6241563558578491, "learning_rate": 0.0006365266106442578, "loss": 0.4755, "step": 13085 }, { "epoch": 7.310614525139664, "grad_norm": 1.0280530452728271, "learning_rate": 0.0006364985994397759, "loss": 0.501, "step": 13086 }, { "epoch": 7.311173184357542, "grad_norm": 0.5856412053108215, "learning_rate": 0.0006364705882352941, "loss": 0.3779, "step": 13087 }, { "epoch": 7.311731843575419, "grad_norm": 0.4948166012763977, "learning_rate": 0.0006364425770308123, "loss": 0.3793, "step": 13088 }, { "epoch": 7.312290502793296, "grad_norm": 0.4947815537452698, "learning_rate": 0.0006364145658263305, "loss": 0.4661, "step": 13089 }, { "epoch": 7.312849162011173, "grad_norm": 0.7149274349212646, "learning_rate": 0.0006363865546218488, "loss": 0.4161, "step": 13090 }, { "epoch": 7.31340782122905, "grad_norm": 0.5715914964675903, "learning_rate": 0.0006363585434173669, "loss": 0.4912, "step": 13091 }, { "epoch": 7.313966480446927, "grad_norm": 1.0212067365646362, "learning_rate": 0.0006363305322128851, "loss": 0.4814, "step": 13092 }, { "epoch": 7.314525139664805, "grad_norm": 0.6858073472976685, "learning_rate": 0.0006363025210084034, "loss": 0.4723, "step": 13093 }, { "epoch": 7.315083798882681, "grad_norm": 0.4616902470588684, "learning_rate": 0.0006362745098039216, "loss": 0.2839, "step": 13094 }, { "epoch": 7.315642458100559, "grad_norm": 3.5520362854003906, "learning_rate": 0.0006362464985994399, "loss": 0.4204, "step": 13095 }, { "epoch": 7.316201117318435, "grad_norm": 0.584577202796936, "learning_rate": 0.000636218487394958, "loss": 0.5285, "step": 13096 }, { "epoch": 7.316759776536313, "grad_norm": 0.478901743888855, "learning_rate": 0.0006361904761904762, "loss": 0.4025, "step": 13097 }, { "epoch": 7.31731843575419, "grad_norm": 0.7869493365287781, "learning_rate": 0.0006361624649859944, "loss": 0.4436, "step": 13098 }, { "epoch": 7.317877094972067, "grad_norm": 0.4529644846916199, "learning_rate": 0.0006361344537815126, "loss": 0.4191, "step": 13099 }, { "epoch": 7.318435754189944, "grad_norm": 0.6310835480690002, "learning_rate": 0.0006361064425770309, "loss": 0.5404, "step": 13100 }, { "epoch": 7.318994413407821, "grad_norm": 0.5917539000511169, "learning_rate": 0.0006360784313725491, "loss": 0.4313, "step": 13101 }, { "epoch": 7.319553072625698, "grad_norm": 0.4380839765071869, "learning_rate": 0.0006360504201680672, "loss": 0.4137, "step": 13102 }, { "epoch": 7.320111731843576, "grad_norm": 0.842938244342804, "learning_rate": 0.0006360224089635854, "loss": 0.3857, "step": 13103 }, { "epoch": 7.320670391061452, "grad_norm": 0.8247633576393127, "learning_rate": 0.0006359943977591036, "loss": 0.4361, "step": 13104 }, { "epoch": 7.32122905027933, "grad_norm": 0.48302939534187317, "learning_rate": 0.0006359663865546219, "loss": 0.4689, "step": 13105 }, { "epoch": 7.321787709497206, "grad_norm": 0.7881895303726196, "learning_rate": 0.0006359383753501401, "loss": 0.4015, "step": 13106 }, { "epoch": 7.322346368715084, "grad_norm": 0.6172569394111633, "learning_rate": 0.0006359103641456582, "loss": 0.5689, "step": 13107 }, { "epoch": 7.322905027932961, "grad_norm": 0.9039911031723022, "learning_rate": 0.0006358823529411764, "loss": 0.4664, "step": 13108 }, { "epoch": 7.323463687150838, "grad_norm": 3.5886175632476807, "learning_rate": 0.0006358543417366946, "loss": 0.3362, "step": 13109 }, { "epoch": 7.324022346368715, "grad_norm": 0.4437963664531708, "learning_rate": 0.000635826330532213, "loss": 0.5628, "step": 13110 }, { "epoch": 7.324581005586592, "grad_norm": 0.6224605441093445, "learning_rate": 0.0006357983193277312, "loss": 0.4705, "step": 13111 }, { "epoch": 7.325139664804469, "grad_norm": 0.5060938000679016, "learning_rate": 0.0006357703081232493, "loss": 0.5119, "step": 13112 }, { "epoch": 7.325698324022347, "grad_norm": 0.42316100001335144, "learning_rate": 0.0006357422969187675, "loss": 0.3545, "step": 13113 }, { "epoch": 7.326256983240223, "grad_norm": 0.48364296555519104, "learning_rate": 0.0006357142857142857, "loss": 0.5177, "step": 13114 }, { "epoch": 7.326815642458101, "grad_norm": 0.5766605734825134, "learning_rate": 0.000635686274509804, "loss": 0.4974, "step": 13115 }, { "epoch": 7.327374301675977, "grad_norm": 0.5737341046333313, "learning_rate": 0.0006356582633053222, "loss": 0.4544, "step": 13116 }, { "epoch": 7.327932960893855, "grad_norm": 0.7070626616477966, "learning_rate": 0.0006356302521008404, "loss": 0.5661, "step": 13117 }, { "epoch": 7.328491620111732, "grad_norm": 0.8192995190620422, "learning_rate": 0.0006356022408963585, "loss": 0.3683, "step": 13118 }, { "epoch": 7.329050279329609, "grad_norm": 0.6192365288734436, "learning_rate": 0.0006355742296918767, "loss": 0.4181, "step": 13119 }, { "epoch": 7.329608938547486, "grad_norm": 0.46250036358833313, "learning_rate": 0.000635546218487395, "loss": 0.524, "step": 13120 }, { "epoch": 7.330167597765363, "grad_norm": 0.6766829490661621, "learning_rate": 0.0006355182072829132, "loss": 0.4381, "step": 13121 }, { "epoch": 7.33072625698324, "grad_norm": 0.5478894710540771, "learning_rate": 0.0006354901960784314, "loss": 0.6328, "step": 13122 }, { "epoch": 7.331284916201117, "grad_norm": 1.5401118993759155, "learning_rate": 0.0006354621848739495, "loss": 0.3689, "step": 13123 }, { "epoch": 7.331843575418994, "grad_norm": 0.7330833077430725, "learning_rate": 0.0006354341736694677, "loss": 0.4797, "step": 13124 }, { "epoch": 7.332402234636872, "grad_norm": 0.532580554485321, "learning_rate": 0.000635406162464986, "loss": 0.3267, "step": 13125 }, { "epoch": 7.332960893854748, "grad_norm": 0.4393022358417511, "learning_rate": 0.0006353781512605043, "loss": 0.4645, "step": 13126 }, { "epoch": 7.333519553072626, "grad_norm": 0.7605563402175903, "learning_rate": 0.0006353501400560225, "loss": 0.5058, "step": 13127 }, { "epoch": 7.334078212290502, "grad_norm": 1.0321952104568481, "learning_rate": 0.0006353221288515406, "loss": 0.4405, "step": 13128 }, { "epoch": 7.33463687150838, "grad_norm": 0.3676065504550934, "learning_rate": 0.0006352941176470588, "loss": 0.3764, "step": 13129 }, { "epoch": 7.335195530726257, "grad_norm": 0.6089480519294739, "learning_rate": 0.0006352661064425771, "loss": 0.3649, "step": 13130 }, { "epoch": 7.335754189944134, "grad_norm": 0.483052134513855, "learning_rate": 0.0006352380952380953, "loss": 0.4819, "step": 13131 }, { "epoch": 7.336312849162011, "grad_norm": 0.47349387407302856, "learning_rate": 0.0006352100840336135, "loss": 0.4485, "step": 13132 }, { "epoch": 7.336871508379888, "grad_norm": 0.43881145119667053, "learning_rate": 0.0006351820728291317, "loss": 0.4042, "step": 13133 }, { "epoch": 7.337430167597765, "grad_norm": 0.4796612858772278, "learning_rate": 0.0006351540616246498, "loss": 0.4279, "step": 13134 }, { "epoch": 7.337988826815643, "grad_norm": 0.324850857257843, "learning_rate": 0.0006351260504201681, "loss": 0.3721, "step": 13135 }, { "epoch": 7.338547486033519, "grad_norm": 0.4559110105037689, "learning_rate": 0.0006350980392156863, "loss": 0.5117, "step": 13136 }, { "epoch": 7.339106145251397, "grad_norm": 0.3338627517223358, "learning_rate": 0.0006350700280112045, "loss": 0.3957, "step": 13137 }, { "epoch": 7.339664804469273, "grad_norm": 0.5494134426116943, "learning_rate": 0.0006350420168067227, "loss": 0.5581, "step": 13138 }, { "epoch": 7.340223463687151, "grad_norm": 0.36717334389686584, "learning_rate": 0.0006350140056022408, "loss": 0.3289, "step": 13139 }, { "epoch": 7.340782122905028, "grad_norm": 0.46892473101615906, "learning_rate": 0.0006349859943977591, "loss": 0.4301, "step": 13140 }, { "epoch": 7.341340782122905, "grad_norm": 0.48795053362846375, "learning_rate": 0.0006349579831932773, "loss": 0.4147, "step": 13141 }, { "epoch": 7.341899441340782, "grad_norm": 0.481522798538208, "learning_rate": 0.0006349299719887956, "loss": 0.4936, "step": 13142 }, { "epoch": 7.342458100558659, "grad_norm": 3.654675245285034, "learning_rate": 0.0006349019607843138, "loss": 0.5425, "step": 13143 }, { "epoch": 7.343016759776536, "grad_norm": 0.4006863236427307, "learning_rate": 0.0006348739495798319, "loss": 0.5416, "step": 13144 }, { "epoch": 7.343575418994414, "grad_norm": 0.5376516580581665, "learning_rate": 0.0006348459383753502, "loss": 0.4267, "step": 13145 }, { "epoch": 7.34413407821229, "grad_norm": 0.44530048966407776, "learning_rate": 0.0006348179271708684, "loss": 0.4671, "step": 13146 }, { "epoch": 7.344692737430168, "grad_norm": 1.1315100193023682, "learning_rate": 0.0006347899159663866, "loss": 0.4961, "step": 13147 }, { "epoch": 7.345251396648044, "grad_norm": 0.39795854687690735, "learning_rate": 0.0006347619047619048, "loss": 0.3401, "step": 13148 }, { "epoch": 7.345810055865922, "grad_norm": 2.752030372619629, "learning_rate": 0.000634733893557423, "loss": 0.4108, "step": 13149 }, { "epoch": 7.346368715083799, "grad_norm": 0.5136620998382568, "learning_rate": 0.0006347058823529412, "loss": 0.5094, "step": 13150 }, { "epoch": 7.346927374301676, "grad_norm": 0.3893483579158783, "learning_rate": 0.0006346778711484594, "loss": 0.4612, "step": 13151 }, { "epoch": 7.347486033519553, "grad_norm": 0.47714313864707947, "learning_rate": 0.0006346498599439776, "loss": 0.4142, "step": 13152 }, { "epoch": 7.34804469273743, "grad_norm": 3.0546116828918457, "learning_rate": 0.0006346218487394958, "loss": 0.3998, "step": 13153 }, { "epoch": 7.348603351955307, "grad_norm": 0.5748021602630615, "learning_rate": 0.000634593837535014, "loss": 0.508, "step": 13154 }, { "epoch": 7.349162011173185, "grad_norm": 0.41996580362319946, "learning_rate": 0.0006345658263305322, "loss": 0.4229, "step": 13155 }, { "epoch": 7.349720670391061, "grad_norm": 0.5384958386421204, "learning_rate": 0.0006345378151260504, "loss": 0.4358, "step": 13156 }, { "epoch": 7.350279329608939, "grad_norm": 0.606629490852356, "learning_rate": 0.0006345098039215686, "loss": 0.3531, "step": 13157 }, { "epoch": 7.350837988826815, "grad_norm": 0.415831059217453, "learning_rate": 0.0006344817927170869, "loss": 0.328, "step": 13158 }, { "epoch": 7.351396648044693, "grad_norm": 0.4611166715621948, "learning_rate": 0.0006344537815126051, "loss": 0.4616, "step": 13159 }, { "epoch": 7.351955307262569, "grad_norm": 0.6233753561973572, "learning_rate": 0.0006344257703081234, "loss": 0.4287, "step": 13160 }, { "epoch": 7.352513966480447, "grad_norm": 0.6902372241020203, "learning_rate": 0.0006343977591036415, "loss": 0.4051, "step": 13161 }, { "epoch": 7.353072625698324, "grad_norm": 0.38717037439346313, "learning_rate": 0.0006343697478991597, "loss": 0.3819, "step": 13162 }, { "epoch": 7.353631284916201, "grad_norm": 0.6942891478538513, "learning_rate": 0.0006343417366946779, "loss": 0.3681, "step": 13163 }, { "epoch": 7.354189944134078, "grad_norm": 1.0617579221725464, "learning_rate": 0.0006343137254901961, "loss": 0.5983, "step": 13164 }, { "epoch": 7.354748603351955, "grad_norm": 1.2739969491958618, "learning_rate": 0.0006342857142857143, "loss": 0.4021, "step": 13165 }, { "epoch": 7.355307262569832, "grad_norm": 0.6465200185775757, "learning_rate": 0.0006342577030812325, "loss": 0.3021, "step": 13166 }, { "epoch": 7.35586592178771, "grad_norm": 0.6682726144790649, "learning_rate": 0.0006342296918767507, "loss": 0.4849, "step": 13167 }, { "epoch": 7.356424581005586, "grad_norm": 0.4172304570674896, "learning_rate": 0.0006342016806722689, "loss": 0.4584, "step": 13168 }, { "epoch": 7.356983240223464, "grad_norm": 0.6555324196815491, "learning_rate": 0.0006341736694677871, "loss": 0.4939, "step": 13169 }, { "epoch": 7.35754189944134, "grad_norm": 0.5502414703369141, "learning_rate": 0.0006341456582633053, "loss": 0.5627, "step": 13170 }, { "epoch": 7.358100558659218, "grad_norm": 1.2976518869400024, "learning_rate": 0.0006341176470588235, "loss": 0.4506, "step": 13171 }, { "epoch": 7.358659217877095, "grad_norm": 0.42921504378318787, "learning_rate": 0.0006340896358543417, "loss": 0.4531, "step": 13172 }, { "epoch": 7.359217877094972, "grad_norm": 0.4945555031299591, "learning_rate": 0.0006340616246498599, "loss": 0.4605, "step": 13173 }, { "epoch": 7.359776536312849, "grad_norm": 0.5439903140068054, "learning_rate": 0.0006340336134453781, "loss": 0.3161, "step": 13174 }, { "epoch": 7.360335195530726, "grad_norm": 0.6298562288284302, "learning_rate": 0.0006340056022408964, "loss": 0.4117, "step": 13175 }, { "epoch": 7.360893854748603, "grad_norm": 0.8301893472671509, "learning_rate": 0.0006339775910364147, "loss": 0.5166, "step": 13176 }, { "epoch": 7.361452513966481, "grad_norm": 0.46162816882133484, "learning_rate": 0.0006339495798319328, "loss": 0.4338, "step": 13177 }, { "epoch": 7.362011173184357, "grad_norm": 0.6729776859283447, "learning_rate": 0.000633921568627451, "loss": 0.3576, "step": 13178 }, { "epoch": 7.362569832402235, "grad_norm": 7.691051483154297, "learning_rate": 0.0006338935574229692, "loss": 0.4666, "step": 13179 }, { "epoch": 7.363128491620111, "grad_norm": 0.8211254477500916, "learning_rate": 0.0006338655462184874, "loss": 0.4905, "step": 13180 }, { "epoch": 7.363687150837989, "grad_norm": 0.7436676621437073, "learning_rate": 0.0006338375350140057, "loss": 0.5449, "step": 13181 }, { "epoch": 7.364245810055866, "grad_norm": 0.4844485819339752, "learning_rate": 0.0006338095238095238, "loss": 0.4974, "step": 13182 }, { "epoch": 7.364804469273743, "grad_norm": 0.4114803075790405, "learning_rate": 0.000633781512605042, "loss": 0.4529, "step": 13183 }, { "epoch": 7.36536312849162, "grad_norm": 0.8345039486885071, "learning_rate": 0.0006337535014005602, "loss": 0.4747, "step": 13184 }, { "epoch": 7.365921787709497, "grad_norm": 0.5096237659454346, "learning_rate": 0.0006337254901960784, "loss": 0.4078, "step": 13185 }, { "epoch": 7.366480446927374, "grad_norm": 0.36407962441444397, "learning_rate": 0.0006336974789915967, "loss": 0.4519, "step": 13186 }, { "epoch": 7.367039106145251, "grad_norm": 0.4901229739189148, "learning_rate": 0.0006336694677871148, "loss": 0.4024, "step": 13187 }, { "epoch": 7.367597765363128, "grad_norm": 0.45065784454345703, "learning_rate": 0.000633641456582633, "loss": 0.4746, "step": 13188 }, { "epoch": 7.368156424581006, "grad_norm": 0.38963842391967773, "learning_rate": 0.0006336134453781512, "loss": 0.3924, "step": 13189 }, { "epoch": 7.368715083798882, "grad_norm": 4.01322603225708, "learning_rate": 0.0006335854341736694, "loss": 0.5011, "step": 13190 }, { "epoch": 7.36927374301676, "grad_norm": 1.151841402053833, "learning_rate": 0.0006335574229691878, "loss": 0.4343, "step": 13191 }, { "epoch": 7.369832402234637, "grad_norm": 0.5479128956794739, "learning_rate": 0.000633529411764706, "loss": 0.5524, "step": 13192 }, { "epoch": 7.370391061452514, "grad_norm": 0.3792780041694641, "learning_rate": 0.0006335014005602241, "loss": 0.3224, "step": 13193 }, { "epoch": 7.370949720670391, "grad_norm": 0.45167112350463867, "learning_rate": 0.0006334733893557423, "loss": 0.4711, "step": 13194 }, { "epoch": 7.371508379888268, "grad_norm": 0.6191294193267822, "learning_rate": 0.0006334453781512605, "loss": 0.4157, "step": 13195 }, { "epoch": 7.372067039106145, "grad_norm": 0.5590991377830505, "learning_rate": 0.0006334173669467788, "loss": 0.4232, "step": 13196 }, { "epoch": 7.372625698324022, "grad_norm": 0.4230961203575134, "learning_rate": 0.000633389355742297, "loss": 0.4401, "step": 13197 }, { "epoch": 7.373184357541899, "grad_norm": 0.7729098796844482, "learning_rate": 0.0006333613445378151, "loss": 0.7506, "step": 13198 }, { "epoch": 7.373743016759777, "grad_norm": 0.4981231987476349, "learning_rate": 0.0006333333333333333, "loss": 0.4668, "step": 13199 }, { "epoch": 7.374301675977653, "grad_norm": 16.94438934326172, "learning_rate": 0.0006333053221288515, "loss": 0.4515, "step": 13200 }, { "epoch": 7.374860335195531, "grad_norm": 0.40868082642555237, "learning_rate": 0.0006332773109243698, "loss": 0.4, "step": 13201 }, { "epoch": 7.375418994413407, "grad_norm": 0.48749709129333496, "learning_rate": 0.000633249299719888, "loss": 0.3826, "step": 13202 }, { "epoch": 7.375977653631285, "grad_norm": 0.7036557793617249, "learning_rate": 0.0006332212885154061, "loss": 0.3823, "step": 13203 }, { "epoch": 7.376536312849162, "grad_norm": 0.4205828905105591, "learning_rate": 0.0006331932773109243, "loss": 0.353, "step": 13204 }, { "epoch": 7.377094972067039, "grad_norm": 0.3899551033973694, "learning_rate": 0.0006331652661064425, "loss": 0.4245, "step": 13205 }, { "epoch": 7.377653631284916, "grad_norm": 0.4924805164337158, "learning_rate": 0.0006331372549019608, "loss": 0.4464, "step": 13206 }, { "epoch": 7.378212290502793, "grad_norm": 1.1695677042007446, "learning_rate": 0.000633109243697479, "loss": 0.4038, "step": 13207 }, { "epoch": 7.37877094972067, "grad_norm": 0.529436469078064, "learning_rate": 0.0006330812324929973, "loss": 0.5105, "step": 13208 }, { "epoch": 7.379329608938548, "grad_norm": 0.4653843939304352, "learning_rate": 0.0006330532212885154, "loss": 0.4918, "step": 13209 }, { "epoch": 7.379888268156424, "grad_norm": 0.3980516493320465, "learning_rate": 0.0006330252100840336, "loss": 0.408, "step": 13210 }, { "epoch": 7.380446927374302, "grad_norm": 0.4431236684322357, "learning_rate": 0.0006329971988795519, "loss": 0.4575, "step": 13211 }, { "epoch": 7.381005586592178, "grad_norm": 0.704119086265564, "learning_rate": 0.0006329691876750701, "loss": 0.4815, "step": 13212 }, { "epoch": 7.381564245810056, "grad_norm": 0.6382777094841003, "learning_rate": 0.0006329411764705883, "loss": 0.4759, "step": 13213 }, { "epoch": 7.382122905027933, "grad_norm": 1.5455689430236816, "learning_rate": 0.0006329131652661064, "loss": 0.487, "step": 13214 }, { "epoch": 7.38268156424581, "grad_norm": 3.0327768325805664, "learning_rate": 0.0006328851540616246, "loss": 0.5227, "step": 13215 }, { "epoch": 7.383240223463687, "grad_norm": 0.5812059044837952, "learning_rate": 0.0006328571428571429, "loss": 0.4597, "step": 13216 }, { "epoch": 7.383798882681564, "grad_norm": 0.4017048180103302, "learning_rate": 0.0006328291316526611, "loss": 0.4463, "step": 13217 }, { "epoch": 7.384357541899441, "grad_norm": 0.5154291391372681, "learning_rate": 0.0006328011204481793, "loss": 0.5196, "step": 13218 }, { "epoch": 7.384916201117319, "grad_norm": 1.431203842163086, "learning_rate": 0.0006327731092436974, "loss": 0.4399, "step": 13219 }, { "epoch": 7.385474860335195, "grad_norm": 0.4689692556858063, "learning_rate": 0.0006327450980392156, "loss": 0.4999, "step": 13220 }, { "epoch": 7.386033519553073, "grad_norm": 0.4327826201915741, "learning_rate": 0.0006327170868347339, "loss": 0.4883, "step": 13221 }, { "epoch": 7.386592178770949, "grad_norm": 0.3834301829338074, "learning_rate": 0.0006326890756302521, "loss": 0.3313, "step": 13222 }, { "epoch": 7.387150837988827, "grad_norm": 0.6593403220176697, "learning_rate": 0.0006326610644257703, "loss": 0.6204, "step": 13223 }, { "epoch": 7.3877094972067034, "grad_norm": 0.7226658463478088, "learning_rate": 0.0006326330532212886, "loss": 0.4978, "step": 13224 }, { "epoch": 7.388268156424581, "grad_norm": 1.397457242012024, "learning_rate": 0.0006326050420168067, "loss": 0.5043, "step": 13225 }, { "epoch": 7.388826815642458, "grad_norm": 0.5905756950378418, "learning_rate": 0.000632577030812325, "loss": 0.5169, "step": 13226 }, { "epoch": 7.389385474860335, "grad_norm": 0.692711591720581, "learning_rate": 0.0006325490196078432, "loss": 0.4879, "step": 13227 }, { "epoch": 7.389944134078212, "grad_norm": 14.020736694335938, "learning_rate": 0.0006325210084033614, "loss": 0.5024, "step": 13228 }, { "epoch": 7.39050279329609, "grad_norm": 2.3759541511535645, "learning_rate": 0.0006324929971988796, "loss": 0.4677, "step": 13229 }, { "epoch": 7.391061452513966, "grad_norm": 0.6416304707527161, "learning_rate": 0.0006324649859943977, "loss": 0.3308, "step": 13230 }, { "epoch": 7.391620111731844, "grad_norm": 0.7347482442855835, "learning_rate": 0.000632436974789916, "loss": 0.6094, "step": 13231 }, { "epoch": 7.39217877094972, "grad_norm": 1.13887357711792, "learning_rate": 0.0006324089635854342, "loss": 0.3676, "step": 13232 }, { "epoch": 7.392737430167598, "grad_norm": 0.48946502804756165, "learning_rate": 0.0006323809523809524, "loss": 0.4465, "step": 13233 }, { "epoch": 7.3932960893854744, "grad_norm": 0.5838249921798706, "learning_rate": 0.0006323529411764706, "loss": 0.4723, "step": 13234 }, { "epoch": 7.393854748603352, "grad_norm": 0.5177309513092041, "learning_rate": 0.0006323249299719887, "loss": 0.5624, "step": 13235 }, { "epoch": 7.394413407821229, "grad_norm": 1.3859809637069702, "learning_rate": 0.000632296918767507, "loss": 0.4088, "step": 13236 }, { "epoch": 7.394972067039106, "grad_norm": 0.4547765552997589, "learning_rate": 0.0006322689075630252, "loss": 0.5584, "step": 13237 }, { "epoch": 7.395530726256983, "grad_norm": 0.6429945230484009, "learning_rate": 0.0006322408963585434, "loss": 0.5376, "step": 13238 }, { "epoch": 7.39608938547486, "grad_norm": 0.5619122385978699, "learning_rate": 0.0006322128851540616, "loss": 0.4954, "step": 13239 }, { "epoch": 7.396648044692737, "grad_norm": 0.5644948482513428, "learning_rate": 0.0006321848739495799, "loss": 0.4621, "step": 13240 }, { "epoch": 7.397206703910615, "grad_norm": 0.8649450540542603, "learning_rate": 0.0006321568627450981, "loss": 0.4631, "step": 13241 }, { "epoch": 7.397765363128491, "grad_norm": 0.4329600930213928, "learning_rate": 0.0006321288515406163, "loss": 0.4635, "step": 13242 }, { "epoch": 7.398324022346369, "grad_norm": 4.139972686767578, "learning_rate": 0.0006321008403361345, "loss": 0.487, "step": 13243 }, { "epoch": 7.3988826815642454, "grad_norm": 0.4320136606693268, "learning_rate": 0.0006320728291316527, "loss": 0.449, "step": 13244 }, { "epoch": 7.399441340782123, "grad_norm": 2.186617851257324, "learning_rate": 0.0006320448179271709, "loss": 0.4427, "step": 13245 }, { "epoch": 7.4, "grad_norm": 0.6491425633430481, "learning_rate": 0.0006320168067226891, "loss": 0.5894, "step": 13246 }, { "epoch": 7.400558659217877, "grad_norm": 0.6695373058319092, "learning_rate": 0.0006319887955182073, "loss": 0.4565, "step": 13247 }, { "epoch": 7.401117318435754, "grad_norm": 0.7496480941772461, "learning_rate": 0.0006319607843137255, "loss": 0.4661, "step": 13248 }, { "epoch": 7.401675977653631, "grad_norm": 0.38105449080467224, "learning_rate": 0.0006319327731092437, "loss": 0.3993, "step": 13249 }, { "epoch": 7.402234636871508, "grad_norm": 0.5346215963363647, "learning_rate": 0.0006319047619047619, "loss": 0.3761, "step": 13250 }, { "epoch": 7.402793296089386, "grad_norm": 0.683036208152771, "learning_rate": 0.0006318767507002801, "loss": 0.4968, "step": 13251 }, { "epoch": 7.403351955307262, "grad_norm": 0.40833696722984314, "learning_rate": 0.0006318487394957983, "loss": 0.499, "step": 13252 }, { "epoch": 7.40391061452514, "grad_norm": 1.670440435409546, "learning_rate": 0.0006318207282913165, "loss": 0.381, "step": 13253 }, { "epoch": 7.4044692737430164, "grad_norm": 0.4978441298007965, "learning_rate": 0.0006317927170868347, "loss": 0.4784, "step": 13254 }, { "epoch": 7.405027932960894, "grad_norm": 0.5723251104354858, "learning_rate": 0.0006317647058823529, "loss": 0.5265, "step": 13255 }, { "epoch": 7.405586592178771, "grad_norm": 0.7274388670921326, "learning_rate": 0.0006317366946778713, "loss": 0.3593, "step": 13256 }, { "epoch": 7.406145251396648, "grad_norm": 0.3940960168838501, "learning_rate": 0.0006317086834733894, "loss": 0.4622, "step": 13257 }, { "epoch": 7.406703910614525, "grad_norm": 0.5888367891311646, "learning_rate": 0.0006316806722689076, "loss": 0.4838, "step": 13258 }, { "epoch": 7.407262569832402, "grad_norm": 0.4865497946739197, "learning_rate": 0.0006316526610644258, "loss": 0.4549, "step": 13259 }, { "epoch": 7.407821229050279, "grad_norm": 0.5355167388916016, "learning_rate": 0.000631624649859944, "loss": 0.4255, "step": 13260 }, { "epoch": 7.408379888268156, "grad_norm": 0.5155198574066162, "learning_rate": 0.0006315966386554623, "loss": 0.4661, "step": 13261 }, { "epoch": 7.408938547486033, "grad_norm": 0.4844139516353607, "learning_rate": 0.0006315686274509804, "loss": 0.4747, "step": 13262 }, { "epoch": 7.409497206703911, "grad_norm": 0.5467159152030945, "learning_rate": 0.0006315406162464986, "loss": 0.6514, "step": 13263 }, { "epoch": 7.410055865921787, "grad_norm": 0.916296124458313, "learning_rate": 0.0006315126050420168, "loss": 0.3834, "step": 13264 }, { "epoch": 7.410614525139665, "grad_norm": 0.5033332109451294, "learning_rate": 0.000631484593837535, "loss": 0.4656, "step": 13265 }, { "epoch": 7.411173184357542, "grad_norm": 0.6166537404060364, "learning_rate": 0.0006314565826330533, "loss": 0.4434, "step": 13266 }, { "epoch": 7.411731843575419, "grad_norm": 0.5477985739707947, "learning_rate": 0.0006314285714285714, "loss": 0.483, "step": 13267 }, { "epoch": 7.412290502793296, "grad_norm": 0.5080099701881409, "learning_rate": 0.0006314005602240896, "loss": 0.4495, "step": 13268 }, { "epoch": 7.412849162011173, "grad_norm": 0.46978458762168884, "learning_rate": 0.0006313725490196078, "loss": 0.5162, "step": 13269 }, { "epoch": 7.41340782122905, "grad_norm": 0.9543061256408691, "learning_rate": 0.000631344537815126, "loss": 0.5561, "step": 13270 }, { "epoch": 7.413966480446927, "grad_norm": 0.43025583028793335, "learning_rate": 0.0006313165266106443, "loss": 0.3908, "step": 13271 }, { "epoch": 7.414525139664804, "grad_norm": 2.7704813480377197, "learning_rate": 0.0006312885154061626, "loss": 0.4813, "step": 13272 }, { "epoch": 7.415083798882682, "grad_norm": 0.6134544610977173, "learning_rate": 0.0006312605042016806, "loss": 0.4849, "step": 13273 }, { "epoch": 7.415642458100558, "grad_norm": 0.7115921974182129, "learning_rate": 0.0006312324929971989, "loss": 0.3744, "step": 13274 }, { "epoch": 7.416201117318436, "grad_norm": 0.44600415229797363, "learning_rate": 0.0006312044817927171, "loss": 0.58, "step": 13275 }, { "epoch": 7.4167597765363125, "grad_norm": 3.882844924926758, "learning_rate": 0.0006311764705882354, "loss": 0.4281, "step": 13276 }, { "epoch": 7.41731843575419, "grad_norm": 0.5672256350517273, "learning_rate": 0.0006311484593837536, "loss": 0.4533, "step": 13277 }, { "epoch": 7.417877094972067, "grad_norm": 0.5520586967468262, "learning_rate": 0.0006311204481792717, "loss": 0.3926, "step": 13278 }, { "epoch": 7.418435754189944, "grad_norm": 0.5408652424812317, "learning_rate": 0.0006310924369747899, "loss": 0.6045, "step": 13279 }, { "epoch": 7.418994413407821, "grad_norm": 0.41654324531555176, "learning_rate": 0.0006310644257703081, "loss": 0.3347, "step": 13280 }, { "epoch": 7.419553072625698, "grad_norm": 0.3420940041542053, "learning_rate": 0.0006310364145658264, "loss": 0.3489, "step": 13281 }, { "epoch": 7.420111731843575, "grad_norm": 1.138730764389038, "learning_rate": 0.0006310084033613446, "loss": 0.4176, "step": 13282 }, { "epoch": 7.420670391061453, "grad_norm": 1.5793524980545044, "learning_rate": 0.0006309803921568627, "loss": 0.4954, "step": 13283 }, { "epoch": 7.421229050279329, "grad_norm": 0.41933029890060425, "learning_rate": 0.0006309523809523809, "loss": 0.4331, "step": 13284 }, { "epoch": 7.421787709497207, "grad_norm": 2.55085825920105, "learning_rate": 0.0006309243697478991, "loss": 0.5244, "step": 13285 }, { "epoch": 7.4223463687150835, "grad_norm": 0.6810856461524963, "learning_rate": 0.0006308963585434174, "loss": 0.4382, "step": 13286 }, { "epoch": 7.422905027932961, "grad_norm": 0.81991046667099, "learning_rate": 0.0006308683473389356, "loss": 0.4149, "step": 13287 }, { "epoch": 7.423463687150838, "grad_norm": 0.5027201771736145, "learning_rate": 0.0006308403361344538, "loss": 0.5835, "step": 13288 }, { "epoch": 7.424022346368715, "grad_norm": 0.4079318642616272, "learning_rate": 0.0006308123249299719, "loss": 0.48, "step": 13289 }, { "epoch": 7.424581005586592, "grad_norm": 0.4573747515678406, "learning_rate": 0.0006307843137254901, "loss": 0.4164, "step": 13290 }, { "epoch": 7.425139664804469, "grad_norm": 0.5541324615478516, "learning_rate": 0.0006307563025210085, "loss": 0.3837, "step": 13291 }, { "epoch": 7.425698324022346, "grad_norm": 0.3572680950164795, "learning_rate": 0.0006307282913165267, "loss": 0.4226, "step": 13292 }, { "epoch": 7.426256983240224, "grad_norm": 0.745029091835022, "learning_rate": 0.0006307002801120449, "loss": 0.4143, "step": 13293 }, { "epoch": 7.4268156424581, "grad_norm": 0.4126095473766327, "learning_rate": 0.000630672268907563, "loss": 0.4211, "step": 13294 }, { "epoch": 7.427374301675978, "grad_norm": 0.3626716732978821, "learning_rate": 0.0006306442577030812, "loss": 0.4179, "step": 13295 }, { "epoch": 7.4279329608938545, "grad_norm": 0.47655558586120605, "learning_rate": 0.0006306162464985995, "loss": 0.3942, "step": 13296 }, { "epoch": 7.428491620111732, "grad_norm": 0.5622798204421997, "learning_rate": 0.0006305882352941177, "loss": 0.4579, "step": 13297 }, { "epoch": 7.4290502793296085, "grad_norm": 0.41237446665763855, "learning_rate": 0.0006305602240896359, "loss": 0.3956, "step": 13298 }, { "epoch": 7.429608938547486, "grad_norm": 0.4969257414340973, "learning_rate": 0.000630532212885154, "loss": 0.397, "step": 13299 }, { "epoch": 7.430167597765363, "grad_norm": 0.36382973194122314, "learning_rate": 0.0006305042016806722, "loss": 0.4329, "step": 13300 }, { "epoch": 7.43072625698324, "grad_norm": 0.3624275326728821, "learning_rate": 0.0006304761904761905, "loss": 0.406, "step": 13301 }, { "epoch": 7.431284916201117, "grad_norm": 0.5830177068710327, "learning_rate": 0.0006304481792717087, "loss": 0.4038, "step": 13302 }, { "epoch": 7.431843575418995, "grad_norm": 0.5396106839179993, "learning_rate": 0.0006304201680672269, "loss": 0.4249, "step": 13303 }, { "epoch": 7.432402234636871, "grad_norm": 0.4341740012168884, "learning_rate": 0.0006303921568627451, "loss": 0.3713, "step": 13304 }, { "epoch": 7.432960893854749, "grad_norm": 0.48321545124053955, "learning_rate": 0.0006303641456582632, "loss": 0.3979, "step": 13305 }, { "epoch": 7.4335195530726255, "grad_norm": 0.6073716282844543, "learning_rate": 0.0006303361344537816, "loss": 0.4398, "step": 13306 }, { "epoch": 7.434078212290503, "grad_norm": 2.9777920246124268, "learning_rate": 0.0006303081232492998, "loss": 0.4581, "step": 13307 }, { "epoch": 7.4346368715083795, "grad_norm": 0.3766200840473175, "learning_rate": 0.000630280112044818, "loss": 0.3866, "step": 13308 }, { "epoch": 7.435195530726257, "grad_norm": 0.4557076096534729, "learning_rate": 0.0006302521008403362, "loss": 0.381, "step": 13309 }, { "epoch": 7.435754189944134, "grad_norm": 0.3866672217845917, "learning_rate": 0.0006302240896358543, "loss": 0.4298, "step": 13310 }, { "epoch": 7.436312849162011, "grad_norm": 0.4263617694377899, "learning_rate": 0.0006301960784313726, "loss": 0.3948, "step": 13311 }, { "epoch": 7.436871508379888, "grad_norm": 0.3464999198913574, "learning_rate": 0.0006301680672268908, "loss": 0.3821, "step": 13312 }, { "epoch": 7.437430167597765, "grad_norm": 0.706479012966156, "learning_rate": 0.000630140056022409, "loss": 0.4068, "step": 13313 }, { "epoch": 7.437988826815642, "grad_norm": 0.40135225653648376, "learning_rate": 0.0006301120448179272, "loss": 0.4466, "step": 13314 }, { "epoch": 7.43854748603352, "grad_norm": 0.5254541635513306, "learning_rate": 0.0006300840336134453, "loss": 0.5856, "step": 13315 }, { "epoch": 7.4391061452513965, "grad_norm": 0.6211190223693848, "learning_rate": 0.0006300560224089636, "loss": 0.3508, "step": 13316 }, { "epoch": 7.439664804469274, "grad_norm": 0.7710056900978088, "learning_rate": 0.0006300280112044818, "loss": 0.3955, "step": 13317 }, { "epoch": 7.4402234636871505, "grad_norm": 1.1574424505233765, "learning_rate": 0.00063, "loss": 0.3814, "step": 13318 }, { "epoch": 7.440782122905028, "grad_norm": 0.4815942049026489, "learning_rate": 0.0006299719887955182, "loss": 0.3752, "step": 13319 }, { "epoch": 7.441340782122905, "grad_norm": 0.41815364360809326, "learning_rate": 0.0006299439775910364, "loss": 0.5149, "step": 13320 }, { "epoch": 7.441899441340782, "grad_norm": 0.49710094928741455, "learning_rate": 0.0006299159663865546, "loss": 0.4977, "step": 13321 }, { "epoch": 7.442458100558659, "grad_norm": 0.4088105261325836, "learning_rate": 0.0006298879551820729, "loss": 0.3766, "step": 13322 }, { "epoch": 7.443016759776536, "grad_norm": 0.4568706154823303, "learning_rate": 0.0006298599439775911, "loss": 0.4925, "step": 13323 }, { "epoch": 7.443575418994413, "grad_norm": 0.593758761882782, "learning_rate": 0.0006298319327731093, "loss": 0.5544, "step": 13324 }, { "epoch": 7.444134078212291, "grad_norm": 1.2638896703720093, "learning_rate": 0.0006298039215686275, "loss": 0.3589, "step": 13325 }, { "epoch": 7.4446927374301675, "grad_norm": 0.4110514521598816, "learning_rate": 0.0006297759103641457, "loss": 0.382, "step": 13326 }, { "epoch": 7.445251396648045, "grad_norm": 0.35596656799316406, "learning_rate": 0.0006297478991596639, "loss": 0.418, "step": 13327 }, { "epoch": 7.4458100558659215, "grad_norm": 0.4594074785709381, "learning_rate": 0.0006297198879551821, "loss": 0.4052, "step": 13328 }, { "epoch": 7.446368715083799, "grad_norm": 0.7637801766395569, "learning_rate": 0.0006296918767507003, "loss": 0.4079, "step": 13329 }, { "epoch": 7.446927374301676, "grad_norm": 1.1117703914642334, "learning_rate": 0.0006296638655462185, "loss": 0.4423, "step": 13330 }, { "epoch": 7.447486033519553, "grad_norm": 0.46981385350227356, "learning_rate": 0.0006296358543417367, "loss": 0.3929, "step": 13331 }, { "epoch": 7.44804469273743, "grad_norm": 0.36793094873428345, "learning_rate": 0.0006296078431372549, "loss": 0.4437, "step": 13332 }, { "epoch": 7.448603351955307, "grad_norm": 1.425216555595398, "learning_rate": 0.0006295798319327731, "loss": 0.5872, "step": 13333 }, { "epoch": 7.449162011173184, "grad_norm": 3.6802706718444824, "learning_rate": 0.0006295518207282913, "loss": 0.3871, "step": 13334 }, { "epoch": 7.449720670391061, "grad_norm": 0.42191699147224426, "learning_rate": 0.0006295238095238095, "loss": 0.3316, "step": 13335 }, { "epoch": 7.4502793296089385, "grad_norm": 0.5018813610076904, "learning_rate": 0.0006294957983193278, "loss": 0.5356, "step": 13336 }, { "epoch": 7.450837988826816, "grad_norm": 0.454581618309021, "learning_rate": 0.0006294677871148459, "loss": 0.3017, "step": 13337 }, { "epoch": 7.4513966480446925, "grad_norm": 0.7520351409912109, "learning_rate": 0.0006294397759103641, "loss": 0.5615, "step": 13338 }, { "epoch": 7.45195530726257, "grad_norm": 0.5690171122550964, "learning_rate": 0.0006294117647058824, "loss": 0.5761, "step": 13339 }, { "epoch": 7.452513966480447, "grad_norm": 0.5402337908744812, "learning_rate": 0.0006293837535014006, "loss": 0.4329, "step": 13340 }, { "epoch": 7.453072625698324, "grad_norm": 0.48431089520454407, "learning_rate": 0.0006293557422969189, "loss": 0.3782, "step": 13341 }, { "epoch": 7.453631284916201, "grad_norm": 0.3996334671974182, "learning_rate": 0.000629327731092437, "loss": 0.3892, "step": 13342 }, { "epoch": 7.454189944134078, "grad_norm": 0.6260446906089783, "learning_rate": 0.0006292997198879552, "loss": 0.6518, "step": 13343 }, { "epoch": 7.454748603351955, "grad_norm": 0.4749130308628082, "learning_rate": 0.0006292717086834734, "loss": 0.4031, "step": 13344 }, { "epoch": 7.455307262569832, "grad_norm": 0.36266636848449707, "learning_rate": 0.0006292436974789916, "loss": 0.4639, "step": 13345 }, { "epoch": 7.4558659217877095, "grad_norm": 0.7757393717765808, "learning_rate": 0.0006292156862745099, "loss": 0.5068, "step": 13346 }, { "epoch": 7.456424581005587, "grad_norm": 0.4906960427761078, "learning_rate": 0.000629187675070028, "loss": 0.4805, "step": 13347 }, { "epoch": 7.4569832402234635, "grad_norm": 0.4969324767589569, "learning_rate": 0.0006291596638655462, "loss": 0.5474, "step": 13348 }, { "epoch": 7.457541899441341, "grad_norm": 0.4112037420272827, "learning_rate": 0.0006291316526610644, "loss": 0.4957, "step": 13349 }, { "epoch": 7.4581005586592175, "grad_norm": 0.8729506134986877, "learning_rate": 0.0006291036414565826, "loss": 0.4357, "step": 13350 }, { "epoch": 7.458659217877095, "grad_norm": 0.5021253228187561, "learning_rate": 0.0006290756302521009, "loss": 0.4743, "step": 13351 }, { "epoch": 7.459217877094972, "grad_norm": 0.418542742729187, "learning_rate": 0.0006290476190476191, "loss": 0.4678, "step": 13352 }, { "epoch": 7.459776536312849, "grad_norm": 0.48293745517730713, "learning_rate": 0.0006290196078431372, "loss": 0.369, "step": 13353 }, { "epoch": 7.460335195530726, "grad_norm": 0.578088641166687, "learning_rate": 0.0006289915966386554, "loss": 0.4541, "step": 13354 }, { "epoch": 7.460893854748603, "grad_norm": 0.48990461230278015, "learning_rate": 0.0006289635854341736, "loss": 0.4408, "step": 13355 }, { "epoch": 7.4614525139664805, "grad_norm": 0.5749280452728271, "learning_rate": 0.000628935574229692, "loss": 0.3713, "step": 13356 }, { "epoch": 7.462011173184358, "grad_norm": 0.5096279382705688, "learning_rate": 0.0006289075630252102, "loss": 0.4402, "step": 13357 }, { "epoch": 7.4625698324022345, "grad_norm": 0.40594661235809326, "learning_rate": 0.0006288795518207283, "loss": 0.3948, "step": 13358 }, { "epoch": 7.463128491620112, "grad_norm": 0.44064679741859436, "learning_rate": 0.0006288515406162465, "loss": 0.5458, "step": 13359 }, { "epoch": 7.4636871508379885, "grad_norm": 0.3884088695049286, "learning_rate": 0.0006288235294117647, "loss": 0.4223, "step": 13360 }, { "epoch": 7.464245810055866, "grad_norm": 0.47736164927482605, "learning_rate": 0.000628795518207283, "loss": 0.4198, "step": 13361 }, { "epoch": 7.464804469273743, "grad_norm": 0.5243260860443115, "learning_rate": 0.0006287675070028012, "loss": 0.5427, "step": 13362 }, { "epoch": 7.46536312849162, "grad_norm": 0.6184183359146118, "learning_rate": 0.0006287394957983193, "loss": 0.3739, "step": 13363 }, { "epoch": 7.465921787709497, "grad_norm": 1.2865631580352783, "learning_rate": 0.0006287114845938375, "loss": 0.4546, "step": 13364 }, { "epoch": 7.466480446927374, "grad_norm": 0.8537730574607849, "learning_rate": 0.0006286834733893557, "loss": 0.3757, "step": 13365 }, { "epoch": 7.4670391061452515, "grad_norm": 0.5568868517875671, "learning_rate": 0.000628655462184874, "loss": 0.6031, "step": 13366 }, { "epoch": 7.467597765363129, "grad_norm": 0.991691529750824, "learning_rate": 0.0006286274509803922, "loss": 0.3505, "step": 13367 }, { "epoch": 7.4681564245810055, "grad_norm": 0.7091882824897766, "learning_rate": 0.0006285994397759104, "loss": 0.4866, "step": 13368 }, { "epoch": 7.468715083798883, "grad_norm": 0.5065934062004089, "learning_rate": 0.0006285714285714285, "loss": 0.396, "step": 13369 }, { "epoch": 7.4692737430167595, "grad_norm": 0.5529679656028748, "learning_rate": 0.0006285434173669467, "loss": 0.5114, "step": 13370 }, { "epoch": 7.469832402234637, "grad_norm": 0.7804819941520691, "learning_rate": 0.000628515406162465, "loss": 0.5331, "step": 13371 }, { "epoch": 7.4703910614525135, "grad_norm": 0.5834393501281738, "learning_rate": 0.0006284873949579833, "loss": 0.4095, "step": 13372 }, { "epoch": 7.470949720670391, "grad_norm": 0.48990458250045776, "learning_rate": 0.0006284593837535015, "loss": 0.4611, "step": 13373 }, { "epoch": 7.471508379888268, "grad_norm": 0.623346209526062, "learning_rate": 0.0006284313725490196, "loss": 0.5069, "step": 13374 }, { "epoch": 7.472067039106145, "grad_norm": 1.558680534362793, "learning_rate": 0.0006284033613445378, "loss": 0.3005, "step": 13375 }, { "epoch": 7.4726256983240225, "grad_norm": 0.6571272611618042, "learning_rate": 0.0006283753501400561, "loss": 0.4002, "step": 13376 }, { "epoch": 7.473184357541899, "grad_norm": 0.49199920892715454, "learning_rate": 0.0006283473389355743, "loss": 0.3986, "step": 13377 }, { "epoch": 7.4737430167597765, "grad_norm": 0.44239863753318787, "learning_rate": 0.0006283193277310925, "loss": 0.4948, "step": 13378 }, { "epoch": 7.474301675977654, "grad_norm": 0.5550490617752075, "learning_rate": 0.0006282913165266106, "loss": 0.5536, "step": 13379 }, { "epoch": 7.4748603351955305, "grad_norm": 0.4111871123313904, "learning_rate": 0.0006282633053221288, "loss": 0.3545, "step": 13380 }, { "epoch": 7.475418994413408, "grad_norm": 0.531445324420929, "learning_rate": 0.0006282352941176471, "loss": 0.4724, "step": 13381 }, { "epoch": 7.4759776536312845, "grad_norm": 0.4715085029602051, "learning_rate": 0.0006282072829131653, "loss": 0.3724, "step": 13382 }, { "epoch": 7.476536312849162, "grad_norm": 0.7833395004272461, "learning_rate": 0.0006281792717086835, "loss": 0.3996, "step": 13383 }, { "epoch": 7.477094972067039, "grad_norm": 0.726565957069397, "learning_rate": 0.0006281512605042017, "loss": 0.5204, "step": 13384 }, { "epoch": 7.477653631284916, "grad_norm": 0.4775674343109131, "learning_rate": 0.0006281232492997198, "loss": 0.5317, "step": 13385 }, { "epoch": 7.4782122905027935, "grad_norm": 0.5930054783821106, "learning_rate": 0.000628095238095238, "loss": 0.5465, "step": 13386 }, { "epoch": 7.47877094972067, "grad_norm": 0.8893391489982605, "learning_rate": 0.0006280672268907563, "loss": 0.5135, "step": 13387 }, { "epoch": 7.4793296089385475, "grad_norm": 0.38850027322769165, "learning_rate": 0.0006280392156862746, "loss": 0.337, "step": 13388 }, { "epoch": 7.479888268156425, "grad_norm": 0.39688146114349365, "learning_rate": 0.0006280112044817928, "loss": 0.3272, "step": 13389 }, { "epoch": 7.4804469273743015, "grad_norm": 0.6377097964286804, "learning_rate": 0.0006279831932773109, "loss": 0.3738, "step": 13390 }, { "epoch": 7.481005586592179, "grad_norm": 0.47277647256851196, "learning_rate": 0.0006279551820728291, "loss": 0.3544, "step": 13391 }, { "epoch": 7.4815642458100555, "grad_norm": 0.44037455320358276, "learning_rate": 0.0006279271708683474, "loss": 0.3983, "step": 13392 }, { "epoch": 7.482122905027933, "grad_norm": 0.5357094407081604, "learning_rate": 0.0006278991596638656, "loss": 0.3683, "step": 13393 }, { "epoch": 7.48268156424581, "grad_norm": 0.4651302099227905, "learning_rate": 0.0006278711484593838, "loss": 0.4055, "step": 13394 }, { "epoch": 7.483240223463687, "grad_norm": 0.495184063911438, "learning_rate": 0.0006278431372549019, "loss": 0.43, "step": 13395 }, { "epoch": 7.4837988826815645, "grad_norm": 0.5564959049224854, "learning_rate": 0.0006278151260504201, "loss": 0.6228, "step": 13396 }, { "epoch": 7.484357541899441, "grad_norm": 0.4047142565250397, "learning_rate": 0.0006277871148459384, "loss": 0.386, "step": 13397 }, { "epoch": 7.4849162011173185, "grad_norm": 0.41901081800460815, "learning_rate": 0.0006277591036414566, "loss": 0.4516, "step": 13398 }, { "epoch": 7.485474860335196, "grad_norm": 0.450962632894516, "learning_rate": 0.0006277310924369748, "loss": 0.4742, "step": 13399 }, { "epoch": 7.4860335195530725, "grad_norm": 1.0303248167037964, "learning_rate": 0.000627703081232493, "loss": 0.5202, "step": 13400 }, { "epoch": 7.48659217877095, "grad_norm": 1.1069631576538086, "learning_rate": 0.0006276750700280111, "loss": 0.3502, "step": 13401 }, { "epoch": 7.4871508379888265, "grad_norm": 0.3482000529766083, "learning_rate": 0.0006276470588235294, "loss": 0.4233, "step": 13402 }, { "epoch": 7.487709497206704, "grad_norm": 1.8103731870651245, "learning_rate": 0.0006276190476190476, "loss": 0.4433, "step": 13403 }, { "epoch": 7.488268156424581, "grad_norm": 0.9675346612930298, "learning_rate": 0.0006275910364145659, "loss": 0.404, "step": 13404 }, { "epoch": 7.488826815642458, "grad_norm": 0.4502348303794861, "learning_rate": 0.0006275630252100841, "loss": 0.4194, "step": 13405 }, { "epoch": 7.4893854748603355, "grad_norm": 0.3939608931541443, "learning_rate": 0.0006275350140056022, "loss": 0.4586, "step": 13406 }, { "epoch": 7.489944134078212, "grad_norm": 0.4570101797580719, "learning_rate": 0.0006275070028011205, "loss": 0.4836, "step": 13407 }, { "epoch": 7.4905027932960895, "grad_norm": 0.4995557367801666, "learning_rate": 0.0006274789915966387, "loss": 0.4504, "step": 13408 }, { "epoch": 7.491061452513966, "grad_norm": 0.46654728055000305, "learning_rate": 0.0006274509803921569, "loss": 0.372, "step": 13409 }, { "epoch": 7.4916201117318435, "grad_norm": 1.703250765800476, "learning_rate": 0.0006274229691876751, "loss": 0.5072, "step": 13410 }, { "epoch": 7.492178770949721, "grad_norm": 0.6606800556182861, "learning_rate": 0.0006273949579831932, "loss": 0.455, "step": 13411 }, { "epoch": 7.4927374301675975, "grad_norm": 2.4163243770599365, "learning_rate": 0.0006273669467787115, "loss": 0.4573, "step": 13412 }, { "epoch": 7.493296089385475, "grad_norm": 1.5161129236221313, "learning_rate": 0.0006273389355742297, "loss": 0.4091, "step": 13413 }, { "epoch": 7.4938547486033515, "grad_norm": 0.51549232006073, "learning_rate": 0.0006273109243697479, "loss": 0.4583, "step": 13414 }, { "epoch": 7.494413407821229, "grad_norm": 0.416126549243927, "learning_rate": 0.0006272829131652661, "loss": 0.445, "step": 13415 }, { "epoch": 7.4949720670391065, "grad_norm": 0.5740472674369812, "learning_rate": 0.0006272549019607843, "loss": 0.4026, "step": 13416 }, { "epoch": 7.495530726256983, "grad_norm": 0.4391186833381653, "learning_rate": 0.0006272268907563025, "loss": 0.3458, "step": 13417 }, { "epoch": 7.4960893854748605, "grad_norm": 0.7656887173652649, "learning_rate": 0.0006271988795518207, "loss": 0.4026, "step": 13418 }, { "epoch": 7.496648044692737, "grad_norm": 0.45423150062561035, "learning_rate": 0.0006271708683473389, "loss": 0.3774, "step": 13419 }, { "epoch": 7.4972067039106145, "grad_norm": 0.5736512541770935, "learning_rate": 0.0006271428571428571, "loss": 0.5254, "step": 13420 }, { "epoch": 7.497765363128492, "grad_norm": 0.40836790204048157, "learning_rate": 0.0006271148459383754, "loss": 0.3873, "step": 13421 }, { "epoch": 7.4983240223463685, "grad_norm": 0.4353005886077881, "learning_rate": 0.0006270868347338936, "loss": 0.4329, "step": 13422 }, { "epoch": 7.498882681564246, "grad_norm": 0.7054917216300964, "learning_rate": 0.0006270588235294118, "loss": 0.5089, "step": 13423 }, { "epoch": 7.4994413407821225, "grad_norm": 0.4563477337360382, "learning_rate": 0.00062703081232493, "loss": 0.3622, "step": 13424 }, { "epoch": 7.5, "grad_norm": 0.4830285906791687, "learning_rate": 0.0006270028011204482, "loss": 0.4478, "step": 13425 }, { "epoch": 7.5005586592178775, "grad_norm": 0.4337867796421051, "learning_rate": 0.0006269747899159664, "loss": 0.4299, "step": 13426 }, { "epoch": 7.501117318435754, "grad_norm": 1.2124769687652588, "learning_rate": 0.0006269467787114847, "loss": 0.4545, "step": 13427 }, { "epoch": 7.5016759776536315, "grad_norm": 0.5080555081367493, "learning_rate": 0.0006269187675070028, "loss": 0.4935, "step": 13428 }, { "epoch": 7.502234636871508, "grad_norm": 8.636521339416504, "learning_rate": 0.000626890756302521, "loss": 0.5313, "step": 13429 }, { "epoch": 7.5027932960893855, "grad_norm": 0.9094671607017517, "learning_rate": 0.0006268627450980392, "loss": 0.7791, "step": 13430 }, { "epoch": 7.503351955307263, "grad_norm": 0.7183095812797546, "learning_rate": 0.0006268347338935574, "loss": 0.3839, "step": 13431 }, { "epoch": 7.5039106145251395, "grad_norm": 0.4022354185581207, "learning_rate": 0.0006268067226890757, "loss": 0.2891, "step": 13432 }, { "epoch": 7.504469273743017, "grad_norm": 0.5049694180488586, "learning_rate": 0.0006267787114845938, "loss": 0.4637, "step": 13433 }, { "epoch": 7.5050279329608935, "grad_norm": 0.5416582226753235, "learning_rate": 0.000626750700280112, "loss": 0.4444, "step": 13434 }, { "epoch": 7.505586592178771, "grad_norm": 0.5205233097076416, "learning_rate": 0.0006267226890756302, "loss": 0.4978, "step": 13435 }, { "epoch": 7.506145251396648, "grad_norm": 1.540658712387085, "learning_rate": 0.0006266946778711484, "loss": 0.4372, "step": 13436 }, { "epoch": 7.506703910614525, "grad_norm": 0.5724209547042847, "learning_rate": 0.0006266666666666668, "loss": 0.4544, "step": 13437 }, { "epoch": 7.5072625698324025, "grad_norm": 0.6350071430206299, "learning_rate": 0.0006266386554621849, "loss": 0.3814, "step": 13438 }, { "epoch": 7.507821229050279, "grad_norm": 2.5332157611846924, "learning_rate": 0.0006266106442577031, "loss": 0.8797, "step": 13439 }, { "epoch": 7.5083798882681565, "grad_norm": 0.6972220540046692, "learning_rate": 0.0006265826330532213, "loss": 0.4871, "step": 13440 }, { "epoch": 7.508938547486034, "grad_norm": 0.3587109446525574, "learning_rate": 0.0006265546218487395, "loss": 0.377, "step": 13441 }, { "epoch": 7.5094972067039105, "grad_norm": 0.9471145868301392, "learning_rate": 0.0006265266106442578, "loss": 0.3801, "step": 13442 }, { "epoch": 7.510055865921788, "grad_norm": 0.5360437035560608, "learning_rate": 0.000626498599439776, "loss": 0.3852, "step": 13443 }, { "epoch": 7.5106145251396645, "grad_norm": 0.3196997046470642, "learning_rate": 0.0006264705882352941, "loss": 0.4081, "step": 13444 }, { "epoch": 7.511173184357542, "grad_norm": 0.46493256092071533, "learning_rate": 0.0006264425770308123, "loss": 0.3728, "step": 13445 }, { "epoch": 7.511731843575419, "grad_norm": 0.5148099660873413, "learning_rate": 0.0006264145658263305, "loss": 0.4404, "step": 13446 }, { "epoch": 7.512290502793296, "grad_norm": 0.41929781436920166, "learning_rate": 0.0006263865546218488, "loss": 0.3995, "step": 13447 }, { "epoch": 7.5128491620111735, "grad_norm": 4.587903022766113, "learning_rate": 0.000626358543417367, "loss": 0.4701, "step": 13448 }, { "epoch": 7.51340782122905, "grad_norm": 0.6348535418510437, "learning_rate": 0.0006263305322128851, "loss": 0.3999, "step": 13449 }, { "epoch": 7.5139664804469275, "grad_norm": 0.47435715794563293, "learning_rate": 0.0006263025210084033, "loss": 0.4129, "step": 13450 }, { "epoch": 7.514525139664805, "grad_norm": 0.5418163537979126, "learning_rate": 0.0006262745098039215, "loss": 0.4317, "step": 13451 }, { "epoch": 7.5150837988826815, "grad_norm": 0.41280433535575867, "learning_rate": 0.0006262464985994398, "loss": 0.4728, "step": 13452 }, { "epoch": 7.515642458100559, "grad_norm": 0.5352231860160828, "learning_rate": 0.000626218487394958, "loss": 0.4941, "step": 13453 }, { "epoch": 7.5162011173184355, "grad_norm": 0.5814662575721741, "learning_rate": 0.0006261904761904761, "loss": 0.4841, "step": 13454 }, { "epoch": 7.516759776536313, "grad_norm": 0.4405836760997772, "learning_rate": 0.0006261624649859944, "loss": 0.3607, "step": 13455 }, { "epoch": 7.51731843575419, "grad_norm": 2.6674387454986572, "learning_rate": 0.0006261344537815126, "loss": 0.4573, "step": 13456 }, { "epoch": 7.517877094972067, "grad_norm": 0.702226996421814, "learning_rate": 0.0006261064425770309, "loss": 0.5032, "step": 13457 }, { "epoch": 7.5184357541899445, "grad_norm": 0.6695470809936523, "learning_rate": 0.0006260784313725491, "loss": 0.3487, "step": 13458 }, { "epoch": 7.518994413407821, "grad_norm": 0.44072970747947693, "learning_rate": 0.0006260504201680673, "loss": 0.4149, "step": 13459 }, { "epoch": 7.5195530726256985, "grad_norm": 0.4761718213558197, "learning_rate": 0.0006260224089635854, "loss": 0.4124, "step": 13460 }, { "epoch": 7.520111731843575, "grad_norm": 0.4066667854785919, "learning_rate": 0.0006259943977591036, "loss": 0.388, "step": 13461 }, { "epoch": 7.5206703910614525, "grad_norm": 0.49014201760292053, "learning_rate": 0.0006259663865546219, "loss": 0.4879, "step": 13462 }, { "epoch": 7.52122905027933, "grad_norm": 0.5860188603401184, "learning_rate": 0.0006259383753501401, "loss": 0.439, "step": 13463 }, { "epoch": 7.5217877094972065, "grad_norm": 0.7908342480659485, "learning_rate": 0.0006259103641456583, "loss": 0.3859, "step": 13464 }, { "epoch": 7.522346368715084, "grad_norm": 0.4601080119609833, "learning_rate": 0.0006258823529411764, "loss": 0.4344, "step": 13465 }, { "epoch": 7.522905027932961, "grad_norm": 1.624560832977295, "learning_rate": 0.0006258543417366946, "loss": 0.4217, "step": 13466 }, { "epoch": 7.523463687150838, "grad_norm": 0.4940057098865509, "learning_rate": 0.0006258263305322129, "loss": 0.4054, "step": 13467 }, { "epoch": 7.5240223463687155, "grad_norm": 0.4555158317089081, "learning_rate": 0.0006257983193277311, "loss": 0.4714, "step": 13468 }, { "epoch": 7.524581005586592, "grad_norm": 0.7547162175178528, "learning_rate": 0.0006257703081232493, "loss": 0.4268, "step": 13469 }, { "epoch": 7.5251396648044695, "grad_norm": 1.0132737159729004, "learning_rate": 0.0006257422969187674, "loss": 0.4763, "step": 13470 }, { "epoch": 7.525698324022346, "grad_norm": 1.5819170475006104, "learning_rate": 0.0006257142857142857, "loss": 0.4103, "step": 13471 }, { "epoch": 7.5262569832402235, "grad_norm": 0.4714759886264801, "learning_rate": 0.000625686274509804, "loss": 0.5127, "step": 13472 }, { "epoch": 7.5268156424581, "grad_norm": 0.6439411640167236, "learning_rate": 0.0006256582633053222, "loss": 0.5208, "step": 13473 }, { "epoch": 7.5273743016759775, "grad_norm": 0.4376458525657654, "learning_rate": 0.0006256302521008404, "loss": 0.4689, "step": 13474 }, { "epoch": 7.527932960893855, "grad_norm": 1.8966127634048462, "learning_rate": 0.0006256022408963586, "loss": 0.4653, "step": 13475 }, { "epoch": 7.528491620111732, "grad_norm": 0.509615957736969, "learning_rate": 0.0006255742296918767, "loss": 0.4716, "step": 13476 }, { "epoch": 7.529050279329609, "grad_norm": 0.5375753045082092, "learning_rate": 0.000625546218487395, "loss": 0.5063, "step": 13477 }, { "epoch": 7.5296089385474865, "grad_norm": 0.48832905292510986, "learning_rate": 0.0006255182072829132, "loss": 0.4141, "step": 13478 }, { "epoch": 7.530167597765363, "grad_norm": 0.5039777755737305, "learning_rate": 0.0006254901960784314, "loss": 0.4793, "step": 13479 }, { "epoch": 7.5307262569832405, "grad_norm": 0.7332884669303894, "learning_rate": 0.0006254621848739496, "loss": 0.4505, "step": 13480 }, { "epoch": 7.531284916201117, "grad_norm": 0.4855954945087433, "learning_rate": 0.0006254341736694677, "loss": 0.4759, "step": 13481 }, { "epoch": 7.5318435754189945, "grad_norm": 0.4676671624183655, "learning_rate": 0.000625406162464986, "loss": 0.4221, "step": 13482 }, { "epoch": 7.532402234636871, "grad_norm": 0.44321921467781067, "learning_rate": 0.0006253781512605042, "loss": 0.4429, "step": 13483 }, { "epoch": 7.5329608938547485, "grad_norm": 3.397167921066284, "learning_rate": 0.0006253501400560224, "loss": 0.4447, "step": 13484 }, { "epoch": 7.533519553072626, "grad_norm": 0.6599857211112976, "learning_rate": 0.0006253221288515406, "loss": 0.393, "step": 13485 }, { "epoch": 7.534078212290503, "grad_norm": 0.6302818059921265, "learning_rate": 0.0006252941176470587, "loss": 0.4711, "step": 13486 }, { "epoch": 7.53463687150838, "grad_norm": 0.5066717267036438, "learning_rate": 0.0006252661064425771, "loss": 0.4334, "step": 13487 }, { "epoch": 7.5351955307262575, "grad_norm": 0.6920668482780457, "learning_rate": 0.0006252380952380953, "loss": 0.4643, "step": 13488 }, { "epoch": 7.535754189944134, "grad_norm": 4.822314262390137, "learning_rate": 0.0006252100840336135, "loss": 0.3331, "step": 13489 }, { "epoch": 7.5363128491620115, "grad_norm": 0.3996482789516449, "learning_rate": 0.0006251820728291317, "loss": 0.4245, "step": 13490 }, { "epoch": 7.536871508379888, "grad_norm": 0.6152485013008118, "learning_rate": 0.0006251540616246499, "loss": 0.4439, "step": 13491 }, { "epoch": 7.5374301675977655, "grad_norm": 0.5713964700698853, "learning_rate": 0.0006251260504201681, "loss": 0.3872, "step": 13492 }, { "epoch": 7.537988826815642, "grad_norm": 1.7460379600524902, "learning_rate": 0.0006250980392156863, "loss": 0.4409, "step": 13493 }, { "epoch": 7.5385474860335195, "grad_norm": 0.44153743982315063, "learning_rate": 0.0006250700280112045, "loss": 0.4277, "step": 13494 }, { "epoch": 7.539106145251397, "grad_norm": 0.5277459025382996, "learning_rate": 0.0006250420168067227, "loss": 0.512, "step": 13495 }, { "epoch": 7.539664804469274, "grad_norm": 1.0423752069473267, "learning_rate": 0.0006250140056022409, "loss": 0.4216, "step": 13496 }, { "epoch": 7.540223463687151, "grad_norm": 0.47321251034736633, "learning_rate": 0.0006249859943977591, "loss": 0.3874, "step": 13497 }, { "epoch": 7.540782122905028, "grad_norm": 0.6798033714294434, "learning_rate": 0.0006249579831932773, "loss": 0.4549, "step": 13498 }, { "epoch": 7.541340782122905, "grad_norm": 0.4562455415725708, "learning_rate": 0.0006249299719887955, "loss": 0.369, "step": 13499 }, { "epoch": 7.5418994413407825, "grad_norm": 0.6898072957992554, "learning_rate": 0.0006249019607843137, "loss": 0.5776, "step": 13500 }, { "epoch": 7.5418994413407825, "eval_cer": 0.0892878574623853, "eval_loss": 0.3408820331096649, "eval_runtime": 55.542, "eval_samples_per_second": 81.704, "eval_steps_per_second": 5.113, "eval_wer": 0.3519175442026759, "step": 13500 }, { "epoch": 7.542458100558659, "grad_norm": 2.380089521408081, "learning_rate": 0.0006248739495798319, "loss": 0.5645, "step": 13501 }, { "epoch": 7.5430167597765365, "grad_norm": 0.7679510712623596, "learning_rate": 0.0006248459383753501, "loss": 0.6456, "step": 13502 }, { "epoch": 7.543575418994413, "grad_norm": 0.54976886510849, "learning_rate": 0.0006248179271708684, "loss": 0.4245, "step": 13503 }, { "epoch": 7.5441340782122905, "grad_norm": 0.7989188432693481, "learning_rate": 0.0006247899159663866, "loss": 0.3701, "step": 13504 }, { "epoch": 7.544692737430168, "grad_norm": 0.8670735955238342, "learning_rate": 0.0006247619047619048, "loss": 0.5506, "step": 13505 }, { "epoch": 7.545251396648045, "grad_norm": 0.46816739439964294, "learning_rate": 0.000624733893557423, "loss": 0.3936, "step": 13506 }, { "epoch": 7.545810055865922, "grad_norm": 0.6991981863975525, "learning_rate": 0.0006247058823529413, "loss": 0.486, "step": 13507 }, { "epoch": 7.546368715083799, "grad_norm": 0.5753023624420166, "learning_rate": 0.0006246778711484594, "loss": 0.538, "step": 13508 }, { "epoch": 7.546927374301676, "grad_norm": 0.4900223910808563, "learning_rate": 0.0006246498599439776, "loss": 0.4206, "step": 13509 }, { "epoch": 7.547486033519553, "grad_norm": 0.5980709791183472, "learning_rate": 0.0006246218487394958, "loss": 0.4568, "step": 13510 }, { "epoch": 7.54804469273743, "grad_norm": 0.4128499925136566, "learning_rate": 0.000624593837535014, "loss": 0.414, "step": 13511 }, { "epoch": 7.5486033519553075, "grad_norm": 0.5706645846366882, "learning_rate": 0.0006245658263305323, "loss": 0.3477, "step": 13512 }, { "epoch": 7.549162011173184, "grad_norm": 0.550557017326355, "learning_rate": 0.0006245378151260504, "loss": 0.4958, "step": 13513 }, { "epoch": 7.5497206703910615, "grad_norm": 0.32888537645339966, "learning_rate": 0.0006245098039215686, "loss": 0.3844, "step": 13514 }, { "epoch": 7.550279329608939, "grad_norm": 0.9568273425102234, "learning_rate": 0.0006244817927170868, "loss": 0.5141, "step": 13515 }, { "epoch": 7.550837988826816, "grad_norm": 0.3877546191215515, "learning_rate": 0.000624453781512605, "loss": 0.3862, "step": 13516 }, { "epoch": 7.551396648044693, "grad_norm": 0.6546022891998291, "learning_rate": 0.0006244257703081233, "loss": 0.3884, "step": 13517 }, { "epoch": 7.55195530726257, "grad_norm": 0.5250709652900696, "learning_rate": 0.0006243977591036414, "loss": 0.4772, "step": 13518 }, { "epoch": 7.552513966480447, "grad_norm": 0.40196648240089417, "learning_rate": 0.0006243697478991596, "loss": 0.4018, "step": 13519 }, { "epoch": 7.553072625698324, "grad_norm": 0.7349145412445068, "learning_rate": 0.0006243417366946779, "loss": 0.4227, "step": 13520 }, { "epoch": 7.553631284916201, "grad_norm": 0.4224432408809662, "learning_rate": 0.0006243137254901961, "loss": 0.3324, "step": 13521 }, { "epoch": 7.5541899441340785, "grad_norm": 0.5843234658241272, "learning_rate": 0.0006242857142857144, "loss": 0.4288, "step": 13522 }, { "epoch": 7.554748603351955, "grad_norm": 0.5495042204856873, "learning_rate": 0.0006242577030812326, "loss": 0.4855, "step": 13523 }, { "epoch": 7.5553072625698325, "grad_norm": 0.6440644264221191, "learning_rate": 0.0006242296918767507, "loss": 0.4254, "step": 13524 }, { "epoch": 7.55586592178771, "grad_norm": 0.571147620677948, "learning_rate": 0.0006242016806722689, "loss": 0.4285, "step": 13525 }, { "epoch": 7.556424581005587, "grad_norm": 0.6583604216575623, "learning_rate": 0.0006241736694677871, "loss": 0.4267, "step": 13526 }, { "epoch": 7.556983240223464, "grad_norm": 0.5905967354774475, "learning_rate": 0.0006241456582633054, "loss": 0.3538, "step": 13527 }, { "epoch": 7.557541899441341, "grad_norm": 0.5711806416511536, "learning_rate": 0.0006241176470588236, "loss": 0.4205, "step": 13528 }, { "epoch": 7.558100558659218, "grad_norm": 5.9775848388671875, "learning_rate": 0.0006240896358543417, "loss": 0.3912, "step": 13529 }, { "epoch": 7.558659217877095, "grad_norm": 0.4094686210155487, "learning_rate": 0.0006240616246498599, "loss": 0.4045, "step": 13530 }, { "epoch": 7.559217877094972, "grad_norm": 0.4282020926475525, "learning_rate": 0.0006240336134453781, "loss": 0.3374, "step": 13531 }, { "epoch": 7.5597765363128495, "grad_norm": 0.5447085499763489, "learning_rate": 0.0006240056022408964, "loss": 0.335, "step": 13532 }, { "epoch": 7.560335195530726, "grad_norm": 0.48971641063690186, "learning_rate": 0.0006239775910364146, "loss": 0.4137, "step": 13533 }, { "epoch": 7.5608938547486035, "grad_norm": 0.9005027413368225, "learning_rate": 0.0006239495798319327, "loss": 0.5634, "step": 13534 }, { "epoch": 7.56145251396648, "grad_norm": 0.45324206352233887, "learning_rate": 0.0006239215686274509, "loss": 0.4517, "step": 13535 }, { "epoch": 7.562011173184358, "grad_norm": 1.0080307722091675, "learning_rate": 0.0006238935574229691, "loss": 0.453, "step": 13536 }, { "epoch": 7.562569832402235, "grad_norm": 0.8796266913414001, "learning_rate": 0.0006238655462184875, "loss": 0.4719, "step": 13537 }, { "epoch": 7.563128491620112, "grad_norm": 1.5173414945602417, "learning_rate": 0.0006238375350140057, "loss": 0.4004, "step": 13538 }, { "epoch": 7.563687150837989, "grad_norm": 0.6940330266952515, "learning_rate": 0.0006238095238095239, "loss": 0.4645, "step": 13539 }, { "epoch": 7.564245810055866, "grad_norm": 0.8782386183738708, "learning_rate": 0.000623781512605042, "loss": 0.4195, "step": 13540 }, { "epoch": 7.564804469273743, "grad_norm": 0.4251776337623596, "learning_rate": 0.0006237535014005602, "loss": 0.3882, "step": 13541 }, { "epoch": 7.5653631284916205, "grad_norm": 0.6051243543624878, "learning_rate": 0.0006237254901960785, "loss": 0.3694, "step": 13542 }, { "epoch": 7.565921787709497, "grad_norm": 0.4507146179676056, "learning_rate": 0.0006236974789915967, "loss": 0.4456, "step": 13543 }, { "epoch": 7.5664804469273745, "grad_norm": 0.5082181692123413, "learning_rate": 0.0006236694677871149, "loss": 0.5975, "step": 13544 }, { "epoch": 7.567039106145251, "grad_norm": 1.060773253440857, "learning_rate": 0.000623641456582633, "loss": 0.472, "step": 13545 }, { "epoch": 7.567597765363129, "grad_norm": 0.7521158456802368, "learning_rate": 0.0006236134453781512, "loss": 0.3807, "step": 13546 }, { "epoch": 7.568156424581005, "grad_norm": 0.4635113477706909, "learning_rate": 0.0006235854341736695, "loss": 0.388, "step": 13547 }, { "epoch": 7.568715083798883, "grad_norm": 0.8785490989685059, "learning_rate": 0.0006235574229691877, "loss": 0.4587, "step": 13548 }, { "epoch": 7.56927374301676, "grad_norm": 0.512169361114502, "learning_rate": 0.0006235294117647059, "loss": 0.3665, "step": 13549 }, { "epoch": 7.569832402234637, "grad_norm": 0.3617511987686157, "learning_rate": 0.000623501400560224, "loss": 0.3619, "step": 13550 }, { "epoch": 7.570391061452514, "grad_norm": 0.4750588536262512, "learning_rate": 0.0006234733893557422, "loss": 0.3763, "step": 13551 }, { "epoch": 7.5709497206703915, "grad_norm": 1.6142890453338623, "learning_rate": 0.0006234453781512606, "loss": 0.4686, "step": 13552 }, { "epoch": 7.571508379888268, "grad_norm": 0.4497030973434448, "learning_rate": 0.0006234173669467788, "loss": 0.4023, "step": 13553 }, { "epoch": 7.5720670391061455, "grad_norm": 1.4944583177566528, "learning_rate": 0.000623389355742297, "loss": 0.4646, "step": 13554 }, { "epoch": 7.572625698324022, "grad_norm": 2.2621238231658936, "learning_rate": 0.0006233613445378152, "loss": 0.3752, "step": 13555 }, { "epoch": 7.5731843575419, "grad_norm": 0.45141011476516724, "learning_rate": 0.0006233333333333333, "loss": 0.3661, "step": 13556 }, { "epoch": 7.573743016759776, "grad_norm": 1.2667438983917236, "learning_rate": 0.0006233053221288516, "loss": 0.4109, "step": 13557 }, { "epoch": 7.574301675977654, "grad_norm": 0.44011032581329346, "learning_rate": 0.0006232773109243698, "loss": 0.4115, "step": 13558 }, { "epoch": 7.574860335195531, "grad_norm": 0.9447159767150879, "learning_rate": 0.000623249299719888, "loss": 0.4522, "step": 13559 }, { "epoch": 7.575418994413408, "grad_norm": 0.43916839361190796, "learning_rate": 0.0006232212885154062, "loss": 0.428, "step": 13560 }, { "epoch": 7.575977653631285, "grad_norm": 0.40302279591560364, "learning_rate": 0.0006231932773109243, "loss": 0.4065, "step": 13561 }, { "epoch": 7.576536312849162, "grad_norm": 0.480938196182251, "learning_rate": 0.0006231652661064426, "loss": 0.3824, "step": 13562 }, { "epoch": 7.577094972067039, "grad_norm": 1.4348293542861938, "learning_rate": 0.0006231372549019608, "loss": 0.5577, "step": 13563 }, { "epoch": 7.5776536312849165, "grad_norm": 0.37254858016967773, "learning_rate": 0.000623109243697479, "loss": 0.3684, "step": 13564 }, { "epoch": 7.578212290502793, "grad_norm": 1.5769639015197754, "learning_rate": 0.0006230812324929972, "loss": 0.4637, "step": 13565 }, { "epoch": 7.578770949720671, "grad_norm": 0.38803431391716003, "learning_rate": 0.0006230532212885153, "loss": 0.3209, "step": 13566 }, { "epoch": 7.579329608938547, "grad_norm": 0.9016589522361755, "learning_rate": 0.0006230252100840336, "loss": 0.5289, "step": 13567 }, { "epoch": 7.579888268156425, "grad_norm": 0.393205463886261, "learning_rate": 0.0006229971988795519, "loss": 0.3557, "step": 13568 }, { "epoch": 7.580446927374302, "grad_norm": 1.1996532678604126, "learning_rate": 0.0006229691876750701, "loss": 0.5249, "step": 13569 }, { "epoch": 7.581005586592179, "grad_norm": 0.5125927329063416, "learning_rate": 0.0006229411764705883, "loss": 0.4817, "step": 13570 }, { "epoch": 7.581564245810056, "grad_norm": 0.7786597013473511, "learning_rate": 0.0006229131652661065, "loss": 0.4115, "step": 13571 }, { "epoch": 7.582122905027933, "grad_norm": 0.6712331175804138, "learning_rate": 0.0006228851540616247, "loss": 0.5255, "step": 13572 }, { "epoch": 7.58268156424581, "grad_norm": 1.7728350162506104, "learning_rate": 0.0006228571428571429, "loss": 0.3863, "step": 13573 }, { "epoch": 7.5832402234636875, "grad_norm": 0.4781673550605774, "learning_rate": 0.0006228291316526611, "loss": 0.4006, "step": 13574 }, { "epoch": 7.583798882681564, "grad_norm": 3.055995225906372, "learning_rate": 0.0006228011204481793, "loss": 0.4622, "step": 13575 }, { "epoch": 7.584357541899442, "grad_norm": 2.026175022125244, "learning_rate": 0.0006227731092436975, "loss": 0.4294, "step": 13576 }, { "epoch": 7.584916201117318, "grad_norm": 0.476855605840683, "learning_rate": 0.0006227450980392157, "loss": 0.4465, "step": 13577 }, { "epoch": 7.585474860335196, "grad_norm": 1.9430747032165527, "learning_rate": 0.0006227170868347339, "loss": 0.4249, "step": 13578 }, { "epoch": 7.586033519553073, "grad_norm": 0.47005364298820496, "learning_rate": 0.0006226890756302521, "loss": 0.433, "step": 13579 }, { "epoch": 7.58659217877095, "grad_norm": 0.2990330755710602, "learning_rate": 0.0006226610644257703, "loss": 0.3467, "step": 13580 }, { "epoch": 7.587150837988827, "grad_norm": 0.47463586926460266, "learning_rate": 0.0006226330532212885, "loss": 0.4612, "step": 13581 }, { "epoch": 7.587709497206704, "grad_norm": 0.4742593765258789, "learning_rate": 0.0006226050420168068, "loss": 0.4915, "step": 13582 }, { "epoch": 7.588268156424581, "grad_norm": 0.48226580023765564, "learning_rate": 0.0006225770308123249, "loss": 0.7002, "step": 13583 }, { "epoch": 7.588826815642458, "grad_norm": 0.4428882598876953, "learning_rate": 0.0006225490196078431, "loss": 0.4133, "step": 13584 }, { "epoch": 7.589385474860335, "grad_norm": 1.0538718700408936, "learning_rate": 0.0006225210084033614, "loss": 0.408, "step": 13585 }, { "epoch": 7.589944134078213, "grad_norm": 1.024491310119629, "learning_rate": 0.0006224929971988796, "loss": 0.3832, "step": 13586 }, { "epoch": 7.590502793296089, "grad_norm": 0.4697954058647156, "learning_rate": 0.0006224649859943979, "loss": 0.4656, "step": 13587 }, { "epoch": 7.591061452513967, "grad_norm": 0.49036210775375366, "learning_rate": 0.000622436974789916, "loss": 0.4792, "step": 13588 }, { "epoch": 7.591620111731844, "grad_norm": 1.243543267250061, "learning_rate": 0.0006224089635854342, "loss": 0.5612, "step": 13589 }, { "epoch": 7.592178770949721, "grad_norm": 0.4247657060623169, "learning_rate": 0.0006223809523809524, "loss": 0.4548, "step": 13590 }, { "epoch": 7.592737430167598, "grad_norm": 0.4095500409603119, "learning_rate": 0.0006223529411764706, "loss": 0.3511, "step": 13591 }, { "epoch": 7.593296089385475, "grad_norm": 0.5707187652587891, "learning_rate": 0.0006223249299719889, "loss": 0.4413, "step": 13592 }, { "epoch": 7.593854748603352, "grad_norm": 1.092761754989624, "learning_rate": 0.000622296918767507, "loss": 0.4691, "step": 13593 }, { "epoch": 7.594413407821229, "grad_norm": 1.0158687829971313, "learning_rate": 0.0006222689075630252, "loss": 0.4498, "step": 13594 }, { "epoch": 7.594972067039106, "grad_norm": 0.531940758228302, "learning_rate": 0.0006222408963585434, "loss": 0.5006, "step": 13595 }, { "epoch": 7.5955307262569836, "grad_norm": 0.4816807508468628, "learning_rate": 0.0006222128851540616, "loss": 0.5206, "step": 13596 }, { "epoch": 7.59608938547486, "grad_norm": 0.40765947103500366, "learning_rate": 0.0006221848739495799, "loss": 0.4152, "step": 13597 }, { "epoch": 7.596648044692738, "grad_norm": 0.6307384371757507, "learning_rate": 0.0006221568627450981, "loss": 0.464, "step": 13598 }, { "epoch": 7.597206703910614, "grad_norm": 0.5930641889572144, "learning_rate": 0.0006221288515406162, "loss": 0.4582, "step": 13599 }, { "epoch": 7.597765363128492, "grad_norm": 0.6988797187805176, "learning_rate": 0.0006221008403361344, "loss": 0.3736, "step": 13600 }, { "epoch": 7.598324022346369, "grad_norm": 0.4459543526172638, "learning_rate": 0.0006220728291316526, "loss": 0.4106, "step": 13601 }, { "epoch": 7.598882681564246, "grad_norm": 0.5185832977294922, "learning_rate": 0.000622044817927171, "loss": 0.3804, "step": 13602 }, { "epoch": 7.599441340782123, "grad_norm": 0.4887022376060486, "learning_rate": 0.0006220168067226892, "loss": 0.5291, "step": 13603 }, { "epoch": 7.6, "grad_norm": 0.5255801677703857, "learning_rate": 0.0006219887955182073, "loss": 0.4708, "step": 13604 }, { "epoch": 7.600558659217877, "grad_norm": 1.3447906970977783, "learning_rate": 0.0006219607843137255, "loss": 0.4201, "step": 13605 }, { "epoch": 7.6011173184357546, "grad_norm": 0.4351365864276886, "learning_rate": 0.0006219327731092437, "loss": 0.3431, "step": 13606 }, { "epoch": 7.601675977653631, "grad_norm": 0.5405589938163757, "learning_rate": 0.000621904761904762, "loss": 0.4339, "step": 13607 }, { "epoch": 7.602234636871509, "grad_norm": 0.5554489493370056, "learning_rate": 0.0006218767507002802, "loss": 0.4816, "step": 13608 }, { "epoch": 7.602793296089385, "grad_norm": 0.5024551153182983, "learning_rate": 0.0006218487394957983, "loss": 0.3799, "step": 13609 }, { "epoch": 7.603351955307263, "grad_norm": 0.4021596908569336, "learning_rate": 0.0006218207282913165, "loss": 0.4172, "step": 13610 }, { "epoch": 7.603910614525139, "grad_norm": 0.36767902970314026, "learning_rate": 0.0006217927170868347, "loss": 0.3499, "step": 13611 }, { "epoch": 7.604469273743017, "grad_norm": 0.5077435374259949, "learning_rate": 0.0006217647058823529, "loss": 0.5479, "step": 13612 }, { "epoch": 7.605027932960894, "grad_norm": 0.7681729793548584, "learning_rate": 0.0006217366946778712, "loss": 0.3696, "step": 13613 }, { "epoch": 7.605586592178771, "grad_norm": 1.5435250997543335, "learning_rate": 0.0006217086834733894, "loss": 0.3915, "step": 13614 }, { "epoch": 7.606145251396648, "grad_norm": 0.5356452465057373, "learning_rate": 0.0006216806722689075, "loss": 0.4438, "step": 13615 }, { "epoch": 7.6067039106145256, "grad_norm": 0.41589537262916565, "learning_rate": 0.0006216526610644257, "loss": 0.4388, "step": 13616 }, { "epoch": 7.607262569832402, "grad_norm": 0.4666784405708313, "learning_rate": 0.0006216246498599439, "loss": 0.5367, "step": 13617 }, { "epoch": 7.60782122905028, "grad_norm": 1.6144362688064575, "learning_rate": 0.0006215966386554623, "loss": 0.3293, "step": 13618 }, { "epoch": 7.608379888268156, "grad_norm": 0.39060524106025696, "learning_rate": 0.0006215686274509805, "loss": 0.3585, "step": 13619 }, { "epoch": 7.608938547486034, "grad_norm": 0.898628294467926, "learning_rate": 0.0006215406162464986, "loss": 0.3869, "step": 13620 }, { "epoch": 7.60949720670391, "grad_norm": 0.6355387568473816, "learning_rate": 0.0006215126050420168, "loss": 0.5001, "step": 13621 }, { "epoch": 7.610055865921788, "grad_norm": 0.5170354843139648, "learning_rate": 0.000621484593837535, "loss": 0.4455, "step": 13622 }, { "epoch": 7.610614525139665, "grad_norm": 0.31859737634658813, "learning_rate": 0.0006214565826330533, "loss": 0.3214, "step": 13623 }, { "epoch": 7.611173184357542, "grad_norm": 0.8199390172958374, "learning_rate": 0.0006214285714285715, "loss": 0.6691, "step": 13624 }, { "epoch": 7.611731843575419, "grad_norm": 3.0901899337768555, "learning_rate": 0.0006214005602240896, "loss": 0.4273, "step": 13625 }, { "epoch": 7.6122905027932966, "grad_norm": 0.7332158088684082, "learning_rate": 0.0006213725490196078, "loss": 0.5764, "step": 13626 }, { "epoch": 7.612849162011173, "grad_norm": 0.3383885622024536, "learning_rate": 0.000621344537815126, "loss": 0.3479, "step": 13627 }, { "epoch": 7.613407821229051, "grad_norm": 0.6298010945320129, "learning_rate": 0.0006213165266106443, "loss": 0.3987, "step": 13628 }, { "epoch": 7.613966480446927, "grad_norm": 0.6476728916168213, "learning_rate": 0.0006212885154061625, "loss": 0.53, "step": 13629 }, { "epoch": 7.614525139664805, "grad_norm": 4.1088128089904785, "learning_rate": 0.0006212605042016807, "loss": 0.4242, "step": 13630 }, { "epoch": 7.615083798882681, "grad_norm": 1.6146824359893799, "learning_rate": 0.0006212324929971988, "loss": 0.4315, "step": 13631 }, { "epoch": 7.615642458100559, "grad_norm": 0.6278559565544128, "learning_rate": 0.000621204481792717, "loss": 0.457, "step": 13632 }, { "epoch": 7.616201117318436, "grad_norm": 0.5750777125358582, "learning_rate": 0.0006211764705882353, "loss": 0.4914, "step": 13633 }, { "epoch": 7.616759776536313, "grad_norm": 0.4993010461330414, "learning_rate": 0.0006211484593837536, "loss": 0.4251, "step": 13634 }, { "epoch": 7.61731843575419, "grad_norm": 0.5032191276550293, "learning_rate": 0.0006211204481792718, "loss": 0.6733, "step": 13635 }, { "epoch": 7.617877094972067, "grad_norm": 0.5283039808273315, "learning_rate": 0.0006210924369747899, "loss": 0.4457, "step": 13636 }, { "epoch": 7.618435754189944, "grad_norm": 0.37390825152397156, "learning_rate": 0.0006210644257703081, "loss": 0.5707, "step": 13637 }, { "epoch": 7.618994413407822, "grad_norm": 2.6217222213745117, "learning_rate": 0.0006210364145658264, "loss": 0.4876, "step": 13638 }, { "epoch": 7.619553072625698, "grad_norm": 0.8103950023651123, "learning_rate": 0.0006210084033613446, "loss": 0.4258, "step": 13639 }, { "epoch": 7.620111731843576, "grad_norm": 0.549705982208252, "learning_rate": 0.0006209803921568628, "loss": 0.2865, "step": 13640 }, { "epoch": 7.620670391061452, "grad_norm": 0.5610224008560181, "learning_rate": 0.0006209523809523809, "loss": 0.4444, "step": 13641 }, { "epoch": 7.62122905027933, "grad_norm": 0.3924856185913086, "learning_rate": 0.0006209243697478991, "loss": 0.3473, "step": 13642 }, { "epoch": 7.621787709497207, "grad_norm": 0.5121474862098694, "learning_rate": 0.0006208963585434174, "loss": 0.344, "step": 13643 }, { "epoch": 7.622346368715084, "grad_norm": 0.7494958639144897, "learning_rate": 0.0006208683473389356, "loss": 0.5047, "step": 13644 }, { "epoch": 7.622905027932961, "grad_norm": 0.7541096210479736, "learning_rate": 0.0006208403361344538, "loss": 0.5364, "step": 13645 }, { "epoch": 7.623463687150838, "grad_norm": 1.1734641790390015, "learning_rate": 0.000620812324929972, "loss": 0.4056, "step": 13646 }, { "epoch": 7.624022346368715, "grad_norm": 0.47596099972724915, "learning_rate": 0.0006207843137254901, "loss": 0.347, "step": 13647 }, { "epoch": 7.624581005586592, "grad_norm": 0.5886275768280029, "learning_rate": 0.0006207563025210084, "loss": 0.4595, "step": 13648 }, { "epoch": 7.625139664804469, "grad_norm": 0.6051918268203735, "learning_rate": 0.0006207282913165266, "loss": 0.4868, "step": 13649 }, { "epoch": 7.625698324022347, "grad_norm": 0.5815566778182983, "learning_rate": 0.0006207002801120449, "loss": 0.4056, "step": 13650 }, { "epoch": 7.626256983240223, "grad_norm": 0.4260379374027252, "learning_rate": 0.0006206722689075631, "loss": 0.3437, "step": 13651 }, { "epoch": 7.626815642458101, "grad_norm": 0.5471779704093933, "learning_rate": 0.0006206442577030812, "loss": 0.3978, "step": 13652 }, { "epoch": 7.627374301675978, "grad_norm": 0.6251311898231506, "learning_rate": 0.0006206162464985995, "loss": 0.6238, "step": 13653 }, { "epoch": 7.627932960893855, "grad_norm": 0.5866640210151672, "learning_rate": 0.0006205882352941177, "loss": 0.4578, "step": 13654 }, { "epoch": 7.628491620111732, "grad_norm": 0.6820197105407715, "learning_rate": 0.0006205602240896359, "loss": 0.4633, "step": 13655 }, { "epoch": 7.629050279329609, "grad_norm": 0.4338420033454895, "learning_rate": 0.0006205322128851541, "loss": 0.4273, "step": 13656 }, { "epoch": 7.629608938547486, "grad_norm": 0.364718496799469, "learning_rate": 0.0006205042016806722, "loss": 0.3253, "step": 13657 }, { "epoch": 7.630167597765363, "grad_norm": 0.505489706993103, "learning_rate": 0.0006204761904761905, "loss": 0.4642, "step": 13658 }, { "epoch": 7.63072625698324, "grad_norm": 0.4781780540943146, "learning_rate": 0.0006204481792717087, "loss": 0.4871, "step": 13659 }, { "epoch": 7.631284916201118, "grad_norm": 3.7457022666931152, "learning_rate": 0.0006204201680672269, "loss": 0.3851, "step": 13660 }, { "epoch": 7.631843575418994, "grad_norm": 0.47109392285346985, "learning_rate": 0.0006203921568627451, "loss": 0.3423, "step": 13661 }, { "epoch": 7.632402234636872, "grad_norm": 1.9330334663391113, "learning_rate": 0.0006203641456582633, "loss": 0.4179, "step": 13662 }, { "epoch": 7.632960893854749, "grad_norm": 1.0891554355621338, "learning_rate": 0.0006203361344537815, "loss": 0.4047, "step": 13663 }, { "epoch": 7.633519553072626, "grad_norm": 0.5759936571121216, "learning_rate": 0.0006203081232492997, "loss": 0.5828, "step": 13664 }, { "epoch": 7.634078212290503, "grad_norm": 0.5856107473373413, "learning_rate": 0.0006202801120448179, "loss": 0.4522, "step": 13665 }, { "epoch": 7.63463687150838, "grad_norm": 0.47771868109703064, "learning_rate": 0.0006202521008403361, "loss": 0.418, "step": 13666 }, { "epoch": 7.635195530726257, "grad_norm": 0.5531140565872192, "learning_rate": 0.0006202240896358544, "loss": 0.3931, "step": 13667 }, { "epoch": 7.635754189944134, "grad_norm": 0.43420663475990295, "learning_rate": 0.0006201960784313726, "loss": 0.3911, "step": 13668 }, { "epoch": 7.636312849162011, "grad_norm": 0.6260654926300049, "learning_rate": 0.0006201680672268908, "loss": 0.3894, "step": 13669 }, { "epoch": 7.636871508379889, "grad_norm": 1.5277457237243652, "learning_rate": 0.000620140056022409, "loss": 0.5073, "step": 13670 }, { "epoch": 7.637430167597765, "grad_norm": 0.424627423286438, "learning_rate": 0.0006201120448179272, "loss": 0.4319, "step": 13671 }, { "epoch": 7.637988826815643, "grad_norm": 1.800803542137146, "learning_rate": 0.0006200840336134454, "loss": 0.4499, "step": 13672 }, { "epoch": 7.638547486033519, "grad_norm": 0.4708835184574127, "learning_rate": 0.0006200560224089636, "loss": 0.4628, "step": 13673 }, { "epoch": 7.639106145251397, "grad_norm": 1.399085283279419, "learning_rate": 0.0006200280112044818, "loss": 0.529, "step": 13674 }, { "epoch": 7.639664804469274, "grad_norm": 0.512787938117981, "learning_rate": 0.00062, "loss": 0.4991, "step": 13675 }, { "epoch": 7.640223463687151, "grad_norm": 0.46335405111312866, "learning_rate": 0.0006199719887955182, "loss": 0.5069, "step": 13676 }, { "epoch": 7.640782122905028, "grad_norm": 0.4293338656425476, "learning_rate": 0.0006199439775910364, "loss": 0.3096, "step": 13677 }, { "epoch": 7.641340782122905, "grad_norm": 0.46071186661720276, "learning_rate": 0.0006199159663865547, "loss": 0.3379, "step": 13678 }, { "epoch": 7.641899441340782, "grad_norm": 0.5742217898368835, "learning_rate": 0.0006198879551820728, "loss": 0.5081, "step": 13679 }, { "epoch": 7.64245810055866, "grad_norm": 0.4921104609966278, "learning_rate": 0.000619859943977591, "loss": 0.4397, "step": 13680 }, { "epoch": 7.643016759776536, "grad_norm": 0.5453481674194336, "learning_rate": 0.0006198319327731092, "loss": 0.4033, "step": 13681 }, { "epoch": 7.643575418994414, "grad_norm": 2.258979320526123, "learning_rate": 0.0006198039215686274, "loss": 0.3496, "step": 13682 }, { "epoch": 7.64413407821229, "grad_norm": 0.483030766248703, "learning_rate": 0.0006197759103641458, "loss": 0.4889, "step": 13683 }, { "epoch": 7.644692737430168, "grad_norm": 0.5434699058532715, "learning_rate": 0.0006197478991596639, "loss": 0.5406, "step": 13684 }, { "epoch": 7.645251396648044, "grad_norm": 0.5193633437156677, "learning_rate": 0.0006197198879551821, "loss": 0.5057, "step": 13685 }, { "epoch": 7.645810055865922, "grad_norm": 0.4553646147251129, "learning_rate": 0.0006196918767507003, "loss": 0.4554, "step": 13686 }, { "epoch": 7.646368715083799, "grad_norm": 0.9845026135444641, "learning_rate": 0.0006196638655462185, "loss": 0.4455, "step": 13687 }, { "epoch": 7.646927374301676, "grad_norm": 0.3351835310459137, "learning_rate": 0.0006196358543417368, "loss": 0.4613, "step": 13688 }, { "epoch": 7.647486033519553, "grad_norm": 0.3721778690814972, "learning_rate": 0.0006196078431372549, "loss": 0.4132, "step": 13689 }, { "epoch": 7.648044692737431, "grad_norm": 0.5659770369529724, "learning_rate": 0.0006195798319327731, "loss": 0.396, "step": 13690 }, { "epoch": 7.648603351955307, "grad_norm": 0.4577784538269043, "learning_rate": 0.0006195518207282913, "loss": 0.4694, "step": 13691 }, { "epoch": 7.649162011173185, "grad_norm": 0.7744741439819336, "learning_rate": 0.0006195238095238095, "loss": 0.5063, "step": 13692 }, { "epoch": 7.649720670391061, "grad_norm": 0.6406466364860535, "learning_rate": 0.0006194957983193278, "loss": 0.4249, "step": 13693 }, { "epoch": 7.650279329608939, "grad_norm": 0.5525261759757996, "learning_rate": 0.000619467787114846, "loss": 0.4521, "step": 13694 }, { "epoch": 7.650837988826815, "grad_norm": 0.5090089440345764, "learning_rate": 0.0006194397759103641, "loss": 0.3405, "step": 13695 }, { "epoch": 7.651396648044693, "grad_norm": 0.6989052295684814, "learning_rate": 0.0006194117647058823, "loss": 0.4281, "step": 13696 }, { "epoch": 7.65195530726257, "grad_norm": 0.6486166715621948, "learning_rate": 0.0006193837535014005, "loss": 0.5128, "step": 13697 }, { "epoch": 7.652513966480447, "grad_norm": 0.6600151658058167, "learning_rate": 0.0006193557422969188, "loss": 0.4793, "step": 13698 }, { "epoch": 7.653072625698324, "grad_norm": 0.601128876209259, "learning_rate": 0.000619327731092437, "loss": 0.5005, "step": 13699 }, { "epoch": 7.653631284916202, "grad_norm": 0.4556083679199219, "learning_rate": 0.0006192997198879551, "loss": 0.4707, "step": 13700 }, { "epoch": 7.654189944134078, "grad_norm": 0.47777992486953735, "learning_rate": 0.0006192717086834734, "loss": 0.4096, "step": 13701 }, { "epoch": 7.654748603351956, "grad_norm": 0.5485050678253174, "learning_rate": 0.0006192436974789916, "loss": 0.4695, "step": 13702 }, { "epoch": 7.655307262569832, "grad_norm": 2.2587099075317383, "learning_rate": 0.0006192156862745099, "loss": 0.4956, "step": 13703 }, { "epoch": 7.65586592178771, "grad_norm": 0.3950224220752716, "learning_rate": 0.0006191876750700281, "loss": 0.4163, "step": 13704 }, { "epoch": 7.656424581005586, "grad_norm": 0.43634507060050964, "learning_rate": 0.0006191596638655462, "loss": 0.4292, "step": 13705 }, { "epoch": 7.656983240223464, "grad_norm": 1.0446161031723022, "learning_rate": 0.0006191316526610644, "loss": 0.4639, "step": 13706 }, { "epoch": 7.657541899441341, "grad_norm": 0.5352032780647278, "learning_rate": 0.0006191036414565826, "loss": 0.488, "step": 13707 }, { "epoch": 7.658100558659218, "grad_norm": 0.5349762439727783, "learning_rate": 0.0006190756302521009, "loss": 0.4923, "step": 13708 }, { "epoch": 7.658659217877095, "grad_norm": 0.46903496980667114, "learning_rate": 0.0006190476190476191, "loss": 0.4169, "step": 13709 }, { "epoch": 7.659217877094972, "grad_norm": 0.42552435398101807, "learning_rate": 0.0006190196078431373, "loss": 0.4271, "step": 13710 }, { "epoch": 7.659776536312849, "grad_norm": 0.4879568815231323, "learning_rate": 0.0006189915966386554, "loss": 0.4185, "step": 13711 }, { "epoch": 7.660335195530727, "grad_norm": 0.830924928188324, "learning_rate": 0.0006189635854341736, "loss": 0.4635, "step": 13712 }, { "epoch": 7.660893854748603, "grad_norm": 0.43467411398887634, "learning_rate": 0.0006189355742296919, "loss": 0.3426, "step": 13713 }, { "epoch": 7.661452513966481, "grad_norm": 0.41393110156059265, "learning_rate": 0.0006189075630252101, "loss": 0.4423, "step": 13714 }, { "epoch": 7.662011173184357, "grad_norm": 0.7466068863868713, "learning_rate": 0.0006188795518207283, "loss": 0.3492, "step": 13715 }, { "epoch": 7.662569832402235, "grad_norm": 2.6812796592712402, "learning_rate": 0.0006188515406162464, "loss": 0.5415, "step": 13716 }, { "epoch": 7.663128491620112, "grad_norm": 0.42886239290237427, "learning_rate": 0.0006188235294117647, "loss": 0.5442, "step": 13717 }, { "epoch": 7.663687150837989, "grad_norm": 0.9191313982009888, "learning_rate": 0.000618795518207283, "loss": 0.5346, "step": 13718 }, { "epoch": 7.664245810055866, "grad_norm": 0.5040920972824097, "learning_rate": 0.0006187675070028012, "loss": 0.4269, "step": 13719 }, { "epoch": 7.664804469273743, "grad_norm": 0.43607982993125916, "learning_rate": 0.0006187394957983194, "loss": 0.4148, "step": 13720 }, { "epoch": 7.66536312849162, "grad_norm": 0.411907434463501, "learning_rate": 0.0006187114845938375, "loss": 0.4211, "step": 13721 }, { "epoch": 7.665921787709497, "grad_norm": 0.8341143131256104, "learning_rate": 0.0006186834733893557, "loss": 0.5373, "step": 13722 }, { "epoch": 7.666480446927374, "grad_norm": 0.8826488256454468, "learning_rate": 0.000618655462184874, "loss": 0.4897, "step": 13723 }, { "epoch": 7.667039106145252, "grad_norm": 0.8334322571754456, "learning_rate": 0.0006186274509803922, "loss": 0.4648, "step": 13724 }, { "epoch": 7.667597765363128, "grad_norm": 0.3868120014667511, "learning_rate": 0.0006185994397759104, "loss": 0.3887, "step": 13725 }, { "epoch": 7.668156424581006, "grad_norm": 0.3800918757915497, "learning_rate": 0.0006185714285714286, "loss": 0.4395, "step": 13726 }, { "epoch": 7.668715083798883, "grad_norm": 0.7623074054718018, "learning_rate": 0.0006185434173669467, "loss": 0.4298, "step": 13727 }, { "epoch": 7.66927374301676, "grad_norm": 0.49751996994018555, "learning_rate": 0.000618515406162465, "loss": 0.6008, "step": 13728 }, { "epoch": 7.669832402234637, "grad_norm": 0.34923678636550903, "learning_rate": 0.0006184873949579832, "loss": 0.4395, "step": 13729 }, { "epoch": 7.670391061452514, "grad_norm": 0.6198163032531738, "learning_rate": 0.0006184593837535014, "loss": 0.3518, "step": 13730 }, { "epoch": 7.670949720670391, "grad_norm": 0.5441950559616089, "learning_rate": 0.0006184313725490196, "loss": 0.5727, "step": 13731 }, { "epoch": 7.671508379888268, "grad_norm": 1.0818986892700195, "learning_rate": 0.0006184033613445377, "loss": 0.3853, "step": 13732 }, { "epoch": 7.672067039106145, "grad_norm": 0.7946770787239075, "learning_rate": 0.0006183753501400561, "loss": 0.445, "step": 13733 }, { "epoch": 7.672625698324023, "grad_norm": 1.122252345085144, "learning_rate": 0.0006183473389355743, "loss": 0.5267, "step": 13734 }, { "epoch": 7.673184357541899, "grad_norm": 0.7753942608833313, "learning_rate": 0.0006183193277310925, "loss": 0.4658, "step": 13735 }, { "epoch": 7.673743016759777, "grad_norm": 0.46905720233917236, "learning_rate": 0.0006182913165266107, "loss": 0.4746, "step": 13736 }, { "epoch": 7.674301675977654, "grad_norm": 1.0591672658920288, "learning_rate": 0.0006182633053221288, "loss": 0.5493, "step": 13737 }, { "epoch": 7.674860335195531, "grad_norm": 0.3919164538383484, "learning_rate": 0.0006182352941176471, "loss": 0.3957, "step": 13738 }, { "epoch": 7.675418994413408, "grad_norm": 0.5964024662971497, "learning_rate": 0.0006182072829131653, "loss": 0.4231, "step": 13739 }, { "epoch": 7.675977653631285, "grad_norm": 0.37612593173980713, "learning_rate": 0.0006181792717086835, "loss": 0.4415, "step": 13740 }, { "epoch": 7.676536312849162, "grad_norm": 1.1040171384811401, "learning_rate": 0.0006181512605042017, "loss": 0.4617, "step": 13741 }, { "epoch": 7.677094972067039, "grad_norm": 0.6453474760055542, "learning_rate": 0.0006181232492997199, "loss": 0.3868, "step": 13742 }, { "epoch": 7.677653631284916, "grad_norm": 0.42111751437187195, "learning_rate": 0.0006180952380952381, "loss": 0.4798, "step": 13743 }, { "epoch": 7.678212290502794, "grad_norm": 0.36102917790412903, "learning_rate": 0.0006180672268907563, "loss": 0.4075, "step": 13744 }, { "epoch": 7.67877094972067, "grad_norm": 0.6224989295005798, "learning_rate": 0.0006180392156862745, "loss": 0.4577, "step": 13745 }, { "epoch": 7.679329608938548, "grad_norm": 0.6655389070510864, "learning_rate": 0.0006180112044817927, "loss": 0.4789, "step": 13746 }, { "epoch": 7.679888268156424, "grad_norm": 2.1099164485931396, "learning_rate": 0.0006179831932773109, "loss": 0.5262, "step": 13747 }, { "epoch": 7.680446927374302, "grad_norm": 0.5552861094474792, "learning_rate": 0.0006179551820728291, "loss": 0.4693, "step": 13748 }, { "epoch": 7.681005586592179, "grad_norm": 0.7028999328613281, "learning_rate": 0.0006179271708683474, "loss": 0.5608, "step": 13749 }, { "epoch": 7.681564245810056, "grad_norm": 6.781050682067871, "learning_rate": 0.0006178991596638656, "loss": 0.463, "step": 13750 }, { "epoch": 7.682122905027933, "grad_norm": 0.5714400410652161, "learning_rate": 0.0006178711484593838, "loss": 0.5884, "step": 13751 }, { "epoch": 7.68268156424581, "grad_norm": 1.9843143224716187, "learning_rate": 0.000617843137254902, "loss": 0.4781, "step": 13752 }, { "epoch": 7.683240223463687, "grad_norm": 0.6092252135276794, "learning_rate": 0.0006178151260504202, "loss": 0.5105, "step": 13753 }, { "epoch": 7.683798882681565, "grad_norm": 0.7522796392440796, "learning_rate": 0.0006177871148459384, "loss": 0.5926, "step": 13754 }, { "epoch": 7.684357541899441, "grad_norm": 0.3750437796115875, "learning_rate": 0.0006177591036414566, "loss": 0.4542, "step": 13755 }, { "epoch": 7.684916201117319, "grad_norm": 0.38676556944847107, "learning_rate": 0.0006177310924369748, "loss": 0.3416, "step": 13756 }, { "epoch": 7.685474860335195, "grad_norm": 0.4673371911048889, "learning_rate": 0.000617703081232493, "loss": 0.3595, "step": 13757 }, { "epoch": 7.686033519553073, "grad_norm": 0.9639487862586975, "learning_rate": 0.0006176750700280113, "loss": 0.4054, "step": 13758 }, { "epoch": 7.686592178770949, "grad_norm": 2.0492560863494873, "learning_rate": 0.0006176470588235294, "loss": 0.4234, "step": 13759 }, { "epoch": 7.687150837988827, "grad_norm": 0.47220635414123535, "learning_rate": 0.0006176190476190476, "loss": 0.4411, "step": 13760 }, { "epoch": 7.687709497206704, "grad_norm": 0.9797062873840332, "learning_rate": 0.0006175910364145658, "loss": 0.4645, "step": 13761 }, { "epoch": 7.688268156424581, "grad_norm": 0.522896945476532, "learning_rate": 0.000617563025210084, "loss": 0.3668, "step": 13762 }, { "epoch": 7.688826815642458, "grad_norm": 0.6306886672973633, "learning_rate": 0.0006175350140056023, "loss": 0.5104, "step": 13763 }, { "epoch": 7.689385474860336, "grad_norm": 0.8065402507781982, "learning_rate": 0.0006175070028011204, "loss": 0.4484, "step": 13764 }, { "epoch": 7.689944134078212, "grad_norm": 0.6034227013587952, "learning_rate": 0.0006174789915966386, "loss": 0.396, "step": 13765 }, { "epoch": 7.69050279329609, "grad_norm": 0.6344764232635498, "learning_rate": 0.0006174509803921569, "loss": 0.3911, "step": 13766 }, { "epoch": 7.691061452513966, "grad_norm": 0.7747047543525696, "learning_rate": 0.0006174229691876751, "loss": 0.4212, "step": 13767 }, { "epoch": 7.691620111731844, "grad_norm": 0.5411653518676758, "learning_rate": 0.0006173949579831934, "loss": 0.4052, "step": 13768 }, { "epoch": 7.69217877094972, "grad_norm": 0.4202643930912018, "learning_rate": 0.0006173669467787115, "loss": 0.3904, "step": 13769 }, { "epoch": 7.692737430167598, "grad_norm": 0.376617968082428, "learning_rate": 0.0006173389355742297, "loss": 0.3124, "step": 13770 }, { "epoch": 7.693296089385475, "grad_norm": 0.5287662148475647, "learning_rate": 0.0006173109243697479, "loss": 0.4483, "step": 13771 }, { "epoch": 7.693854748603352, "grad_norm": 0.4734002649784088, "learning_rate": 0.0006172829131652661, "loss": 0.44, "step": 13772 }, { "epoch": 7.694413407821229, "grad_norm": 0.37186411023139954, "learning_rate": 0.0006172549019607844, "loss": 0.3672, "step": 13773 }, { "epoch": 7.694972067039107, "grad_norm": 0.408000648021698, "learning_rate": 0.0006172268907563026, "loss": 0.3329, "step": 13774 }, { "epoch": 7.695530726256983, "grad_norm": 0.7121781706809998, "learning_rate": 0.0006171988795518207, "loss": 0.5811, "step": 13775 }, { "epoch": 7.696089385474861, "grad_norm": 0.38978955149650574, "learning_rate": 0.0006171708683473389, "loss": 0.4693, "step": 13776 }, { "epoch": 7.696648044692737, "grad_norm": 0.5049538016319275, "learning_rate": 0.0006171428571428571, "loss": 0.3904, "step": 13777 }, { "epoch": 7.697206703910615, "grad_norm": 0.8071524500846863, "learning_rate": 0.0006171148459383754, "loss": 0.4922, "step": 13778 }, { "epoch": 7.697765363128491, "grad_norm": 0.5353125333786011, "learning_rate": 0.0006170868347338936, "loss": 0.5046, "step": 13779 }, { "epoch": 7.698324022346369, "grad_norm": 0.583459734916687, "learning_rate": 0.0006170588235294117, "loss": 0.4739, "step": 13780 }, { "epoch": 7.698882681564246, "grad_norm": 0.49595117568969727, "learning_rate": 0.0006170308123249299, "loss": 0.3648, "step": 13781 }, { "epoch": 7.699441340782123, "grad_norm": 0.5471254587173462, "learning_rate": 0.0006170028011204481, "loss": 0.51, "step": 13782 }, { "epoch": 7.7, "grad_norm": 0.5223873257637024, "learning_rate": 0.0006169747899159665, "loss": 0.4504, "step": 13783 }, { "epoch": 7.700558659217877, "grad_norm": 0.4435765743255615, "learning_rate": 0.0006169467787114847, "loss": 0.4574, "step": 13784 }, { "epoch": 7.701117318435754, "grad_norm": 0.8653535842895508, "learning_rate": 0.0006169187675070028, "loss": 0.4024, "step": 13785 }, { "epoch": 7.701675977653632, "grad_norm": 0.3829124867916107, "learning_rate": 0.000616890756302521, "loss": 0.4495, "step": 13786 }, { "epoch": 7.702234636871508, "grad_norm": 0.4708978831768036, "learning_rate": 0.0006168627450980392, "loss": 0.425, "step": 13787 }, { "epoch": 7.702793296089386, "grad_norm": 0.5074251890182495, "learning_rate": 0.0006168347338935575, "loss": 0.4854, "step": 13788 }, { "epoch": 7.703351955307262, "grad_norm": 0.46025800704956055, "learning_rate": 0.0006168067226890757, "loss": 0.3775, "step": 13789 }, { "epoch": 7.70391061452514, "grad_norm": 0.8103023171424866, "learning_rate": 0.0006167787114845939, "loss": 0.451, "step": 13790 }, { "epoch": 7.704469273743017, "grad_norm": 0.582000732421875, "learning_rate": 0.000616750700280112, "loss": 0.5085, "step": 13791 }, { "epoch": 7.705027932960894, "grad_norm": 0.371208131313324, "learning_rate": 0.0006167226890756302, "loss": 0.3684, "step": 13792 }, { "epoch": 7.705586592178771, "grad_norm": 0.3921825885772705, "learning_rate": 0.0006166946778711485, "loss": 0.4204, "step": 13793 }, { "epoch": 7.706145251396648, "grad_norm": 1.094571590423584, "learning_rate": 0.0006166666666666667, "loss": 0.4288, "step": 13794 }, { "epoch": 7.706703910614525, "grad_norm": 0.530928909778595, "learning_rate": 0.0006166386554621849, "loss": 0.3052, "step": 13795 }, { "epoch": 7.707262569832402, "grad_norm": 0.5719661116600037, "learning_rate": 0.000616610644257703, "loss": 0.4633, "step": 13796 }, { "epoch": 7.707821229050279, "grad_norm": 0.5013129711151123, "learning_rate": 0.0006165826330532212, "loss": 0.432, "step": 13797 }, { "epoch": 7.708379888268157, "grad_norm": 0.7338741421699524, "learning_rate": 0.0006165546218487396, "loss": 0.5131, "step": 13798 }, { "epoch": 7.708938547486033, "grad_norm": 2.6967313289642334, "learning_rate": 0.0006165266106442578, "loss": 0.4484, "step": 13799 }, { "epoch": 7.709497206703911, "grad_norm": 0.46453040838241577, "learning_rate": 0.000616498599439776, "loss": 0.4021, "step": 13800 }, { "epoch": 7.710055865921788, "grad_norm": 0.6692829728126526, "learning_rate": 0.0006164705882352941, "loss": 0.5783, "step": 13801 }, { "epoch": 7.710614525139665, "grad_norm": 0.5053221583366394, "learning_rate": 0.0006164425770308123, "loss": 0.4154, "step": 13802 }, { "epoch": 7.711173184357542, "grad_norm": 0.5044556856155396, "learning_rate": 0.0006164145658263306, "loss": 0.3968, "step": 13803 }, { "epoch": 7.711731843575419, "grad_norm": 0.5830820798873901, "learning_rate": 0.0006163865546218488, "loss": 0.4953, "step": 13804 }, { "epoch": 7.712290502793296, "grad_norm": 1.0309406518936157, "learning_rate": 0.000616358543417367, "loss": 0.3941, "step": 13805 }, { "epoch": 7.712849162011173, "grad_norm": 2.83329439163208, "learning_rate": 0.0006163305322128852, "loss": 0.4314, "step": 13806 }, { "epoch": 7.71340782122905, "grad_norm": 0.4441019594669342, "learning_rate": 0.0006163025210084033, "loss": 0.4172, "step": 13807 }, { "epoch": 7.713966480446928, "grad_norm": 0.6268557906150818, "learning_rate": 0.0006162745098039216, "loss": 0.4257, "step": 13808 }, { "epoch": 7.714525139664804, "grad_norm": 2.7386374473571777, "learning_rate": 0.0006162464985994398, "loss": 0.466, "step": 13809 }, { "epoch": 7.715083798882682, "grad_norm": 0.45009568333625793, "learning_rate": 0.000616218487394958, "loss": 0.4323, "step": 13810 }, { "epoch": 7.715642458100559, "grad_norm": 4.924854755401611, "learning_rate": 0.0006161904761904762, "loss": 0.6347, "step": 13811 }, { "epoch": 7.716201117318436, "grad_norm": 0.4844169616699219, "learning_rate": 0.0006161624649859943, "loss": 0.4234, "step": 13812 }, { "epoch": 7.716759776536313, "grad_norm": 0.5485273599624634, "learning_rate": 0.0006161344537815126, "loss": 0.4263, "step": 13813 }, { "epoch": 7.71731843575419, "grad_norm": 0.5497127771377563, "learning_rate": 0.0006161064425770308, "loss": 0.3986, "step": 13814 }, { "epoch": 7.717877094972067, "grad_norm": 0.5263583660125732, "learning_rate": 0.0006160784313725491, "loss": 0.5311, "step": 13815 }, { "epoch": 7.718435754189944, "grad_norm": 0.36241477727890015, "learning_rate": 0.0006160504201680673, "loss": 0.3968, "step": 13816 }, { "epoch": 7.718994413407821, "grad_norm": 0.4783209562301636, "learning_rate": 0.0006160224089635854, "loss": 0.4473, "step": 13817 }, { "epoch": 7.719553072625699, "grad_norm": 1.1497730016708374, "learning_rate": 0.0006159943977591037, "loss": 0.4315, "step": 13818 }, { "epoch": 7.720111731843575, "grad_norm": 0.391686350107193, "learning_rate": 0.0006159663865546219, "loss": 0.3978, "step": 13819 }, { "epoch": 7.720670391061453, "grad_norm": 0.47612759470939636, "learning_rate": 0.0006159383753501401, "loss": 0.4918, "step": 13820 }, { "epoch": 7.721229050279329, "grad_norm": 0.7015030384063721, "learning_rate": 0.0006159103641456583, "loss": 0.4627, "step": 13821 }, { "epoch": 7.721787709497207, "grad_norm": 0.5020100474357605, "learning_rate": 0.0006158823529411765, "loss": 0.4659, "step": 13822 }, { "epoch": 7.722346368715084, "grad_norm": 0.5628128051757812, "learning_rate": 0.0006158543417366947, "loss": 0.4695, "step": 13823 }, { "epoch": 7.722905027932961, "grad_norm": 0.4184693992137909, "learning_rate": 0.0006158263305322129, "loss": 0.3769, "step": 13824 }, { "epoch": 7.723463687150838, "grad_norm": 0.5309139490127563, "learning_rate": 0.0006157983193277311, "loss": 0.5206, "step": 13825 }, { "epoch": 7.724022346368715, "grad_norm": 0.7914676070213318, "learning_rate": 0.0006157703081232493, "loss": 0.439, "step": 13826 }, { "epoch": 7.724581005586592, "grad_norm": 0.555533230304718, "learning_rate": 0.0006157422969187675, "loss": 0.4721, "step": 13827 }, { "epoch": 7.72513966480447, "grad_norm": 0.6769536137580872, "learning_rate": 0.0006157142857142857, "loss": 0.4511, "step": 13828 }, { "epoch": 7.725698324022346, "grad_norm": 0.4408147931098938, "learning_rate": 0.0006156862745098039, "loss": 0.4181, "step": 13829 }, { "epoch": 7.726256983240224, "grad_norm": 0.4702502489089966, "learning_rate": 0.0006156582633053221, "loss": 0.4911, "step": 13830 }, { "epoch": 7.7268156424581, "grad_norm": 0.3909347951412201, "learning_rate": 0.0006156302521008404, "loss": 0.3978, "step": 13831 }, { "epoch": 7.727374301675978, "grad_norm": 0.42259547114372253, "learning_rate": 0.0006156022408963586, "loss": 0.4038, "step": 13832 }, { "epoch": 7.727932960893854, "grad_norm": 0.730812668800354, "learning_rate": 0.0006155742296918767, "loss": 0.4515, "step": 13833 }, { "epoch": 7.728491620111732, "grad_norm": 0.6177743077278137, "learning_rate": 0.000615546218487395, "loss": 0.5445, "step": 13834 }, { "epoch": 7.729050279329609, "grad_norm": 0.956550657749176, "learning_rate": 0.0006155182072829132, "loss": 0.4675, "step": 13835 }, { "epoch": 7.729608938547486, "grad_norm": 1.4229481220245361, "learning_rate": 0.0006154901960784314, "loss": 0.4468, "step": 13836 }, { "epoch": 7.730167597765363, "grad_norm": 0.45281124114990234, "learning_rate": 0.0006154621848739496, "loss": 0.3918, "step": 13837 }, { "epoch": 7.730726256983241, "grad_norm": 1.5274964570999146, "learning_rate": 0.0006154341736694678, "loss": 0.3822, "step": 13838 }, { "epoch": 7.731284916201117, "grad_norm": 1.0664663314819336, "learning_rate": 0.000615406162464986, "loss": 0.3988, "step": 13839 }, { "epoch": 7.731843575418995, "grad_norm": 0.568108856678009, "learning_rate": 0.0006153781512605042, "loss": 0.5821, "step": 13840 }, { "epoch": 7.732402234636871, "grad_norm": 0.426876038312912, "learning_rate": 0.0006153501400560224, "loss": 0.365, "step": 13841 }, { "epoch": 7.732960893854749, "grad_norm": 0.7191917300224304, "learning_rate": 0.0006153221288515406, "loss": 0.5099, "step": 13842 }, { "epoch": 7.733519553072625, "grad_norm": 1.9407932758331299, "learning_rate": 0.0006152941176470588, "loss": 0.6146, "step": 13843 }, { "epoch": 7.734078212290503, "grad_norm": 0.8126131296157837, "learning_rate": 0.000615266106442577, "loss": 0.3946, "step": 13844 }, { "epoch": 7.73463687150838, "grad_norm": 0.7253028154373169, "learning_rate": 0.0006152380952380952, "loss": 0.4626, "step": 13845 }, { "epoch": 7.735195530726257, "grad_norm": 0.4987674057483673, "learning_rate": 0.0006152100840336134, "loss": 0.4327, "step": 13846 }, { "epoch": 7.735754189944134, "grad_norm": 0.7138853073120117, "learning_rate": 0.0006151820728291316, "loss": 0.4799, "step": 13847 }, { "epoch": 7.736312849162011, "grad_norm": 0.6187353134155273, "learning_rate": 0.0006151540616246499, "loss": 0.4471, "step": 13848 }, { "epoch": 7.736871508379888, "grad_norm": 0.544151246547699, "learning_rate": 0.0006151260504201682, "loss": 0.4107, "step": 13849 }, { "epoch": 7.737430167597766, "grad_norm": 0.531272292137146, "learning_rate": 0.0006150980392156863, "loss": 0.3343, "step": 13850 }, { "epoch": 7.737988826815642, "grad_norm": 0.42586618661880493, "learning_rate": 0.0006150700280112045, "loss": 0.4436, "step": 13851 }, { "epoch": 7.73854748603352, "grad_norm": 0.7693768739700317, "learning_rate": 0.0006150420168067227, "loss": 0.6109, "step": 13852 }, { "epoch": 7.739106145251396, "grad_norm": 0.47911494970321655, "learning_rate": 0.0006150140056022409, "loss": 0.3975, "step": 13853 }, { "epoch": 7.739664804469274, "grad_norm": 0.6103163361549377, "learning_rate": 0.0006149859943977592, "loss": 0.4093, "step": 13854 }, { "epoch": 7.740223463687151, "grad_norm": 0.4705524742603302, "learning_rate": 0.0006149579831932773, "loss": 0.4595, "step": 13855 }, { "epoch": 7.740782122905028, "grad_norm": 0.443238765001297, "learning_rate": 0.0006149299719887955, "loss": 0.5113, "step": 13856 }, { "epoch": 7.741340782122905, "grad_norm": 0.6643243432044983, "learning_rate": 0.0006149019607843137, "loss": 0.5926, "step": 13857 }, { "epoch": 7.741899441340782, "grad_norm": 0.3780375123023987, "learning_rate": 0.0006148739495798319, "loss": 0.3841, "step": 13858 }, { "epoch": 7.742458100558659, "grad_norm": 0.4166991412639618, "learning_rate": 0.0006148459383753502, "loss": 0.4298, "step": 13859 }, { "epoch": 7.743016759776537, "grad_norm": 0.49918797612190247, "learning_rate": 0.0006148179271708683, "loss": 0.3641, "step": 13860 }, { "epoch": 7.743575418994413, "grad_norm": 1.1586129665374756, "learning_rate": 0.0006147899159663865, "loss": 0.5314, "step": 13861 }, { "epoch": 7.744134078212291, "grad_norm": 0.3844227194786072, "learning_rate": 0.0006147619047619047, "loss": 0.4187, "step": 13862 }, { "epoch": 7.744692737430167, "grad_norm": 0.8257869482040405, "learning_rate": 0.0006147338935574229, "loss": 0.4557, "step": 13863 }, { "epoch": 7.745251396648045, "grad_norm": 0.4101067781448364, "learning_rate": 0.0006147058823529413, "loss": 0.4573, "step": 13864 }, { "epoch": 7.745810055865922, "grad_norm": 0.6604530811309814, "learning_rate": 0.0006146778711484595, "loss": 0.4762, "step": 13865 }, { "epoch": 7.746368715083799, "grad_norm": 0.4204275906085968, "learning_rate": 0.0006146498599439776, "loss": 0.5202, "step": 13866 }, { "epoch": 7.746927374301676, "grad_norm": 0.6597739458084106, "learning_rate": 0.0006146218487394958, "loss": 0.4185, "step": 13867 }, { "epoch": 7.747486033519553, "grad_norm": 0.43951407074928284, "learning_rate": 0.000614593837535014, "loss": 0.4793, "step": 13868 }, { "epoch": 7.74804469273743, "grad_norm": 0.4143410325050354, "learning_rate": 0.0006145658263305323, "loss": 0.4153, "step": 13869 }, { "epoch": 7.748603351955307, "grad_norm": 1.717625617980957, "learning_rate": 0.0006145378151260505, "loss": 0.444, "step": 13870 }, { "epoch": 7.749162011173184, "grad_norm": 0.4756516218185425, "learning_rate": 0.0006145098039215686, "loss": 0.5483, "step": 13871 }, { "epoch": 7.749720670391062, "grad_norm": 0.6400270462036133, "learning_rate": 0.0006144817927170868, "loss": 0.4511, "step": 13872 }, { "epoch": 7.750279329608938, "grad_norm": 1.033082365989685, "learning_rate": 0.000614453781512605, "loss": 0.4038, "step": 13873 }, { "epoch": 7.750837988826816, "grad_norm": 0.48727673292160034, "learning_rate": 0.0006144257703081233, "loss": 0.4063, "step": 13874 }, { "epoch": 7.751396648044693, "grad_norm": 0.47570937871932983, "learning_rate": 0.0006143977591036415, "loss": 0.4392, "step": 13875 }, { "epoch": 7.75195530726257, "grad_norm": 0.47843945026397705, "learning_rate": 0.0006143697478991596, "loss": 0.4342, "step": 13876 }, { "epoch": 7.752513966480447, "grad_norm": 1.9364606142044067, "learning_rate": 0.0006143417366946778, "loss": 0.4192, "step": 13877 }, { "epoch": 7.753072625698324, "grad_norm": 0.6362441182136536, "learning_rate": 0.000614313725490196, "loss": 0.6112, "step": 13878 }, { "epoch": 7.753631284916201, "grad_norm": 0.4092274308204651, "learning_rate": 0.0006142857142857143, "loss": 0.3732, "step": 13879 }, { "epoch": 7.754189944134078, "grad_norm": 0.5682749152183533, "learning_rate": 0.0006142577030812326, "loss": 0.5345, "step": 13880 }, { "epoch": 7.754748603351955, "grad_norm": 0.8510589599609375, "learning_rate": 0.0006142296918767508, "loss": 0.3834, "step": 13881 }, { "epoch": 7.755307262569833, "grad_norm": 0.5240651369094849, "learning_rate": 0.0006142016806722689, "loss": 0.4374, "step": 13882 }, { "epoch": 7.755865921787709, "grad_norm": 0.6527811884880066, "learning_rate": 0.0006141736694677871, "loss": 0.4909, "step": 13883 }, { "epoch": 7.756424581005587, "grad_norm": 0.790418803691864, "learning_rate": 0.0006141456582633054, "loss": 0.4613, "step": 13884 }, { "epoch": 7.756983240223463, "grad_norm": 0.6083050966262817, "learning_rate": 0.0006141176470588236, "loss": 0.5396, "step": 13885 }, { "epoch": 7.757541899441341, "grad_norm": 0.44148558378219604, "learning_rate": 0.0006140896358543418, "loss": 0.4566, "step": 13886 }, { "epoch": 7.758100558659218, "grad_norm": 0.4408504366874695, "learning_rate": 0.0006140616246498599, "loss": 0.5184, "step": 13887 }, { "epoch": 7.758659217877095, "grad_norm": 0.6519942879676819, "learning_rate": 0.0006140336134453781, "loss": 0.5159, "step": 13888 }, { "epoch": 7.759217877094972, "grad_norm": 0.6187353730201721, "learning_rate": 0.0006140056022408964, "loss": 0.4056, "step": 13889 }, { "epoch": 7.759776536312849, "grad_norm": 0.810020387172699, "learning_rate": 0.0006139775910364146, "loss": 0.624, "step": 13890 }, { "epoch": 7.760335195530726, "grad_norm": 0.6215172410011292, "learning_rate": 0.0006139495798319328, "loss": 0.5284, "step": 13891 }, { "epoch": 7.760893854748604, "grad_norm": 0.7776293158531189, "learning_rate": 0.0006139215686274509, "loss": 0.5887, "step": 13892 }, { "epoch": 7.76145251396648, "grad_norm": 0.3514454662799835, "learning_rate": 0.0006138935574229691, "loss": 0.365, "step": 13893 }, { "epoch": 7.762011173184358, "grad_norm": 0.6031270623207092, "learning_rate": 0.0006138655462184874, "loss": 0.5181, "step": 13894 }, { "epoch": 7.762569832402234, "grad_norm": 0.8488714694976807, "learning_rate": 0.0006138375350140056, "loss": 0.3842, "step": 13895 }, { "epoch": 7.763128491620112, "grad_norm": 1.634118676185608, "learning_rate": 0.0006138095238095238, "loss": 0.4515, "step": 13896 }, { "epoch": 7.763687150837989, "grad_norm": 0.4528811573982239, "learning_rate": 0.0006137815126050421, "loss": 0.4765, "step": 13897 }, { "epoch": 7.764245810055866, "grad_norm": 0.7632697820663452, "learning_rate": 0.0006137535014005602, "loss": 0.5145, "step": 13898 }, { "epoch": 7.764804469273743, "grad_norm": 0.9000115394592285, "learning_rate": 0.0006137254901960785, "loss": 0.4056, "step": 13899 }, { "epoch": 7.76536312849162, "grad_norm": 0.4654691815376282, "learning_rate": 0.0006136974789915967, "loss": 0.3723, "step": 13900 }, { "epoch": 7.765921787709497, "grad_norm": 0.8328071236610413, "learning_rate": 0.0006136694677871149, "loss": 0.4179, "step": 13901 }, { "epoch": 7.766480446927375, "grad_norm": 0.8510044813156128, "learning_rate": 0.0006136414565826331, "loss": 0.4124, "step": 13902 }, { "epoch": 7.767039106145251, "grad_norm": 0.5416488647460938, "learning_rate": 0.0006136134453781512, "loss": 0.3962, "step": 13903 }, { "epoch": 7.767597765363129, "grad_norm": 0.6769051551818848, "learning_rate": 0.0006135854341736695, "loss": 0.427, "step": 13904 }, { "epoch": 7.768156424581005, "grad_norm": 0.5259209275245667, "learning_rate": 0.0006135574229691877, "loss": 0.4533, "step": 13905 }, { "epoch": 7.768715083798883, "grad_norm": 0.7382765412330627, "learning_rate": 0.0006135294117647059, "loss": 0.3871, "step": 13906 }, { "epoch": 7.769273743016759, "grad_norm": 0.7412647604942322, "learning_rate": 0.0006135014005602241, "loss": 0.5075, "step": 13907 }, { "epoch": 7.769832402234637, "grad_norm": 4.510376453399658, "learning_rate": 0.0006134733893557422, "loss": 0.4838, "step": 13908 }, { "epoch": 7.770391061452514, "grad_norm": 2.4300150871276855, "learning_rate": 0.0006134453781512605, "loss": 0.5708, "step": 13909 }, { "epoch": 7.770949720670391, "grad_norm": 0.5524688959121704, "learning_rate": 0.0006134173669467787, "loss": 0.4641, "step": 13910 }, { "epoch": 7.771508379888268, "grad_norm": 0.3958134651184082, "learning_rate": 0.0006133893557422969, "loss": 0.3993, "step": 13911 }, { "epoch": 7.772067039106146, "grad_norm": 0.39522585272789, "learning_rate": 0.0006133613445378151, "loss": 0.4739, "step": 13912 }, { "epoch": 7.772625698324022, "grad_norm": 0.5362353324890137, "learning_rate": 0.0006133333333333334, "loss": 0.4541, "step": 13913 }, { "epoch": 7.7731843575419, "grad_norm": 0.866977870464325, "learning_rate": 0.0006133053221288516, "loss": 0.3998, "step": 13914 }, { "epoch": 7.773743016759776, "grad_norm": 0.840677797794342, "learning_rate": 0.0006132773109243698, "loss": 0.4495, "step": 13915 }, { "epoch": 7.774301675977654, "grad_norm": 1.0331155061721802, "learning_rate": 0.000613249299719888, "loss": 0.4686, "step": 13916 }, { "epoch": 7.77486033519553, "grad_norm": 0.44592276215553284, "learning_rate": 0.0006132212885154062, "loss": 0.3728, "step": 13917 }, { "epoch": 7.775418994413408, "grad_norm": 0.534802258014679, "learning_rate": 0.0006131932773109244, "loss": 0.3878, "step": 13918 }, { "epoch": 7.775977653631285, "grad_norm": 0.6893162131309509, "learning_rate": 0.0006131652661064426, "loss": 0.3981, "step": 13919 }, { "epoch": 7.776536312849162, "grad_norm": 0.46987706422805786, "learning_rate": 0.0006131372549019608, "loss": 0.4733, "step": 13920 }, { "epoch": 7.777094972067039, "grad_norm": 0.39327356219291687, "learning_rate": 0.000613109243697479, "loss": 0.3758, "step": 13921 }, { "epoch": 7.777653631284916, "grad_norm": 0.7337353229522705, "learning_rate": 0.0006130812324929972, "loss": 0.3359, "step": 13922 }, { "epoch": 7.778212290502793, "grad_norm": 2.117324113845825, "learning_rate": 0.0006130532212885154, "loss": 0.3588, "step": 13923 }, { "epoch": 7.778770949720671, "grad_norm": 0.4609636068344116, "learning_rate": 0.0006130252100840336, "loss": 0.4937, "step": 13924 }, { "epoch": 7.779329608938547, "grad_norm": 0.4368499517440796, "learning_rate": 0.0006129971988795518, "loss": 0.4018, "step": 13925 }, { "epoch": 7.779888268156425, "grad_norm": 0.34663155674934387, "learning_rate": 0.00061296918767507, "loss": 0.3147, "step": 13926 }, { "epoch": 7.780446927374301, "grad_norm": 0.4378778636455536, "learning_rate": 0.0006129411764705882, "loss": 0.3358, "step": 13927 }, { "epoch": 7.781005586592179, "grad_norm": 1.629021406173706, "learning_rate": 0.0006129131652661064, "loss": 0.5392, "step": 13928 }, { "epoch": 7.781564245810056, "grad_norm": 1.9251132011413574, "learning_rate": 0.0006128851540616248, "loss": 0.4689, "step": 13929 }, { "epoch": 7.782122905027933, "grad_norm": 0.49251148104667664, "learning_rate": 0.0006128571428571429, "loss": 0.4831, "step": 13930 }, { "epoch": 7.78268156424581, "grad_norm": 1.156540870666504, "learning_rate": 0.0006128291316526611, "loss": 0.5427, "step": 13931 }, { "epoch": 7.783240223463687, "grad_norm": 3.9049224853515625, "learning_rate": 0.0006128011204481793, "loss": 0.2848, "step": 13932 }, { "epoch": 7.783798882681564, "grad_norm": 0.6031736135482788, "learning_rate": 0.0006127731092436975, "loss": 0.3101, "step": 13933 }, { "epoch": 7.784357541899441, "grad_norm": 0.39596477150917053, "learning_rate": 0.0006127450980392158, "loss": 0.4133, "step": 13934 }, { "epoch": 7.784916201117318, "grad_norm": 0.5235522985458374, "learning_rate": 0.0006127170868347339, "loss": 0.5971, "step": 13935 }, { "epoch": 7.785474860335196, "grad_norm": 0.4688574969768524, "learning_rate": 0.0006126890756302521, "loss": 0.3608, "step": 13936 }, { "epoch": 7.786033519553072, "grad_norm": 0.48614370822906494, "learning_rate": 0.0006126610644257703, "loss": 0.5324, "step": 13937 }, { "epoch": 7.78659217877095, "grad_norm": 0.6135228872299194, "learning_rate": 0.0006126330532212885, "loss": 0.4906, "step": 13938 }, { "epoch": 7.787150837988827, "grad_norm": 0.6341299414634705, "learning_rate": 0.0006126050420168068, "loss": 0.5944, "step": 13939 }, { "epoch": 7.787709497206704, "grad_norm": 0.4454633593559265, "learning_rate": 0.0006125770308123249, "loss": 0.4093, "step": 13940 }, { "epoch": 7.788268156424581, "grad_norm": 0.7565609812736511, "learning_rate": 0.0006125490196078431, "loss": 0.4687, "step": 13941 }, { "epoch": 7.788826815642458, "grad_norm": 0.4242513179779053, "learning_rate": 0.0006125210084033613, "loss": 0.4787, "step": 13942 }, { "epoch": 7.789385474860335, "grad_norm": 0.47021517157554626, "learning_rate": 0.0006124929971988795, "loss": 0.4099, "step": 13943 }, { "epoch": 7.789944134078212, "grad_norm": 0.7617304921150208, "learning_rate": 0.0006124649859943978, "loss": 0.5548, "step": 13944 }, { "epoch": 7.790502793296089, "grad_norm": 0.4995030164718628, "learning_rate": 0.000612436974789916, "loss": 0.3532, "step": 13945 }, { "epoch": 7.791061452513967, "grad_norm": 0.38720646500587463, "learning_rate": 0.0006124089635854341, "loss": 0.3722, "step": 13946 }, { "epoch": 7.791620111731843, "grad_norm": 0.5832614898681641, "learning_rate": 0.0006123809523809524, "loss": 0.5378, "step": 13947 }, { "epoch": 7.792178770949721, "grad_norm": 1.8605190515518188, "learning_rate": 0.0006123529411764706, "loss": 0.4192, "step": 13948 }, { "epoch": 7.792737430167598, "grad_norm": 0.48895007371902466, "learning_rate": 0.0006123249299719889, "loss": 0.4937, "step": 13949 }, { "epoch": 7.793296089385475, "grad_norm": 0.38163071870803833, "learning_rate": 0.0006122969187675071, "loss": 0.351, "step": 13950 }, { "epoch": 7.793854748603352, "grad_norm": 1.0164268016815186, "learning_rate": 0.0006122689075630252, "loss": 0.3331, "step": 13951 }, { "epoch": 7.794413407821229, "grad_norm": 0.8862943649291992, "learning_rate": 0.0006122408963585434, "loss": 0.4055, "step": 13952 }, { "epoch": 7.794972067039106, "grad_norm": 0.3968307077884674, "learning_rate": 0.0006122128851540616, "loss": 0.482, "step": 13953 }, { "epoch": 7.795530726256983, "grad_norm": 0.5490445494651794, "learning_rate": 0.0006121848739495799, "loss": 0.4625, "step": 13954 }, { "epoch": 7.79608938547486, "grad_norm": 1.4571541547775269, "learning_rate": 0.0006121568627450981, "loss": 0.3694, "step": 13955 }, { "epoch": 7.796648044692738, "grad_norm": 0.5338897705078125, "learning_rate": 0.0006121288515406162, "loss": 0.4824, "step": 13956 }, { "epoch": 7.797206703910614, "grad_norm": 5.459682464599609, "learning_rate": 0.0006121008403361344, "loss": 0.6282, "step": 13957 }, { "epoch": 7.797765363128492, "grad_norm": 1.1113295555114746, "learning_rate": 0.0006120728291316526, "loss": 0.5635, "step": 13958 }, { "epoch": 7.798324022346368, "grad_norm": 0.5434353947639465, "learning_rate": 0.0006120448179271709, "loss": 0.3491, "step": 13959 }, { "epoch": 7.798882681564246, "grad_norm": 0.7481069564819336, "learning_rate": 0.0006120168067226891, "loss": 0.3484, "step": 13960 }, { "epoch": 7.799441340782123, "grad_norm": 0.6251439452171326, "learning_rate": 0.0006119887955182073, "loss": 0.4828, "step": 13961 }, { "epoch": 7.8, "grad_norm": 0.48824432492256165, "learning_rate": 0.0006119607843137254, "loss": 0.48, "step": 13962 }, { "epoch": 7.800558659217877, "grad_norm": 0.5305771827697754, "learning_rate": 0.0006119327731092437, "loss": 0.3644, "step": 13963 }, { "epoch": 7.801117318435754, "grad_norm": 0.5494493246078491, "learning_rate": 0.000611904761904762, "loss": 0.4209, "step": 13964 }, { "epoch": 7.801675977653631, "grad_norm": 0.5097732543945312, "learning_rate": 0.0006118767507002802, "loss": 0.4117, "step": 13965 }, { "epoch": 7.802234636871509, "grad_norm": 1.5923417806625366, "learning_rate": 0.0006118487394957984, "loss": 0.49, "step": 13966 }, { "epoch": 7.802793296089385, "grad_norm": 0.523015022277832, "learning_rate": 0.0006118207282913165, "loss": 0.408, "step": 13967 }, { "epoch": 7.803351955307263, "grad_norm": Infinity, "learning_rate": 0.0006118207282913165, "loss": 0.4448, "step": 13968 }, { "epoch": 7.803910614525139, "grad_norm": 0.5710523724555969, "learning_rate": 0.0006117927170868347, "loss": 0.4922, "step": 13969 }, { "epoch": 7.804469273743017, "grad_norm": 0.9690161943435669, "learning_rate": 0.000611764705882353, "loss": 0.3205, "step": 13970 }, { "epoch": 7.805027932960893, "grad_norm": 0.4549117684364319, "learning_rate": 0.0006117366946778712, "loss": 0.4894, "step": 13971 }, { "epoch": 7.805586592178771, "grad_norm": 0.4980126619338989, "learning_rate": 0.0006117086834733894, "loss": 0.4853, "step": 13972 }, { "epoch": 7.806145251396648, "grad_norm": 0.5128645896911621, "learning_rate": 0.0006116806722689075, "loss": 0.4183, "step": 13973 }, { "epoch": 7.806703910614525, "grad_norm": 3.2597928047180176, "learning_rate": 0.0006116526610644257, "loss": 0.3818, "step": 13974 }, { "epoch": 7.807262569832402, "grad_norm": 0.4002695083618164, "learning_rate": 0.000611624649859944, "loss": 0.3737, "step": 13975 }, { "epoch": 7.80782122905028, "grad_norm": 0.5315811634063721, "learning_rate": 0.0006115966386554622, "loss": 0.4243, "step": 13976 }, { "epoch": 7.808379888268156, "grad_norm": 0.5031778812408447, "learning_rate": 0.0006115686274509804, "loss": 0.3221, "step": 13977 }, { "epoch": 7.808938547486034, "grad_norm": 0.791736900806427, "learning_rate": 0.0006115406162464986, "loss": 0.5553, "step": 13978 }, { "epoch": 7.80949720670391, "grad_norm": 0.5402506589889526, "learning_rate": 0.0006115126050420167, "loss": 0.4367, "step": 13979 }, { "epoch": 7.810055865921788, "grad_norm": 0.8056809306144714, "learning_rate": 0.0006114845938375351, "loss": 0.6272, "step": 13980 }, { "epoch": 7.810614525139664, "grad_norm": 1.1596993207931519, "learning_rate": 0.0006114565826330533, "loss": 0.3993, "step": 13981 }, { "epoch": 7.811173184357542, "grad_norm": 0.7039431929588318, "learning_rate": 0.0006114285714285715, "loss": 0.3529, "step": 13982 }, { "epoch": 7.811731843575419, "grad_norm": 0.4426352381706238, "learning_rate": 0.0006114005602240897, "loss": 0.4302, "step": 13983 }, { "epoch": 7.812290502793296, "grad_norm": 0.49005192518234253, "learning_rate": 0.0006113725490196078, "loss": 0.4461, "step": 13984 }, { "epoch": 7.812849162011173, "grad_norm": 0.37636321783065796, "learning_rate": 0.0006113445378151261, "loss": 0.4068, "step": 13985 }, { "epoch": 7.813407821229051, "grad_norm": 0.5424584746360779, "learning_rate": 0.0006113165266106443, "loss": 0.4707, "step": 13986 }, { "epoch": 7.813966480446927, "grad_norm": 0.4625190198421478, "learning_rate": 0.0006112885154061625, "loss": 0.3768, "step": 13987 }, { "epoch": 7.814525139664805, "grad_norm": 1.3029929399490356, "learning_rate": 0.0006112605042016807, "loss": 0.4689, "step": 13988 }, { "epoch": 7.815083798882681, "grad_norm": 0.627018392086029, "learning_rate": 0.0006112324929971988, "loss": 0.5588, "step": 13989 }, { "epoch": 7.815642458100559, "grad_norm": 0.42316123843193054, "learning_rate": 0.0006112044817927171, "loss": 0.4039, "step": 13990 }, { "epoch": 7.816201117318435, "grad_norm": 0.32366514205932617, "learning_rate": 0.0006111764705882353, "loss": 0.4052, "step": 13991 }, { "epoch": 7.816759776536313, "grad_norm": 0.41852328181266785, "learning_rate": 0.0006111484593837535, "loss": 0.4181, "step": 13992 }, { "epoch": 7.81731843575419, "grad_norm": 0.6539618372917175, "learning_rate": 0.0006111204481792717, "loss": 0.5097, "step": 13993 }, { "epoch": 7.817877094972067, "grad_norm": 0.48087573051452637, "learning_rate": 0.0006110924369747899, "loss": 0.3821, "step": 13994 }, { "epoch": 7.818435754189944, "grad_norm": 0.708215594291687, "learning_rate": 0.0006110644257703081, "loss": 0.3635, "step": 13995 }, { "epoch": 7.818994413407821, "grad_norm": 0.38754674792289734, "learning_rate": 0.0006110364145658264, "loss": 0.3689, "step": 13996 }, { "epoch": 7.819553072625698, "grad_norm": 0.39824309945106506, "learning_rate": 0.0006110084033613446, "loss": 0.4058, "step": 13997 }, { "epoch": 7.820111731843576, "grad_norm": 0.6965510249137878, "learning_rate": 0.0006109803921568628, "loss": 0.4776, "step": 13998 }, { "epoch": 7.820670391061452, "grad_norm": 0.501475989818573, "learning_rate": 0.000610952380952381, "loss": 0.4461, "step": 13999 }, { "epoch": 7.82122905027933, "grad_norm": 0.6898168921470642, "learning_rate": 0.0006109243697478992, "loss": 0.4488, "step": 14000 }, { "epoch": 7.82122905027933, "eval_cer": 0.09159547423434039, "eval_loss": 0.3414621353149414, "eval_runtime": 57.6769, "eval_samples_per_second": 78.68, "eval_steps_per_second": 4.924, "eval_wer": 0.36518533001871456, "step": 14000 }, { "epoch": 7.821787709497206, "grad_norm": 0.47615861892700195, "learning_rate": 0.0006108963585434174, "loss": 0.4425, "step": 14001 }, { "epoch": 7.822346368715084, "grad_norm": 0.514773964881897, "learning_rate": 0.0006108683473389356, "loss": 0.46, "step": 14002 }, { "epoch": 7.822905027932961, "grad_norm": 0.511339008808136, "learning_rate": 0.0006108403361344538, "loss": 0.4479, "step": 14003 }, { "epoch": 7.823463687150838, "grad_norm": 0.39632734656333923, "learning_rate": 0.000610812324929972, "loss": 0.412, "step": 14004 }, { "epoch": 7.824022346368715, "grad_norm": 0.37343478202819824, "learning_rate": 0.0006107843137254902, "loss": 0.3882, "step": 14005 }, { "epoch": 7.824581005586592, "grad_norm": 0.609805703163147, "learning_rate": 0.0006107563025210084, "loss": 0.4255, "step": 14006 }, { "epoch": 7.825139664804469, "grad_norm": 0.6717429757118225, "learning_rate": 0.0006107282913165266, "loss": 0.4501, "step": 14007 }, { "epoch": 7.825698324022346, "grad_norm": 0.6591873168945312, "learning_rate": 0.0006107002801120448, "loss": 0.505, "step": 14008 }, { "epoch": 7.826256983240223, "grad_norm": 0.4814958870410919, "learning_rate": 0.000610672268907563, "loss": 0.6377, "step": 14009 }, { "epoch": 7.826815642458101, "grad_norm": 0.34744584560394287, "learning_rate": 0.0006106442577030813, "loss": 0.3312, "step": 14010 }, { "epoch": 7.827374301675977, "grad_norm": 0.5938692688941956, "learning_rate": 0.0006106162464985994, "loss": 0.4223, "step": 14011 }, { "epoch": 7.827932960893855, "grad_norm": 0.40614497661590576, "learning_rate": 0.0006105882352941176, "loss": 0.4203, "step": 14012 }, { "epoch": 7.828491620111732, "grad_norm": 2.525165557861328, "learning_rate": 0.0006105602240896359, "loss": 0.3115, "step": 14013 }, { "epoch": 7.829050279329609, "grad_norm": 0.38710471987724304, "learning_rate": 0.0006105322128851541, "loss": 0.4365, "step": 14014 }, { "epoch": 7.829608938547486, "grad_norm": 0.6059528589248657, "learning_rate": 0.0006105042016806724, "loss": 0.4143, "step": 14015 }, { "epoch": 7.830167597765363, "grad_norm": 0.5290810465812683, "learning_rate": 0.0006104761904761905, "loss": 0.44, "step": 14016 }, { "epoch": 7.83072625698324, "grad_norm": 0.593165397644043, "learning_rate": 0.0006104481792717087, "loss": 0.4513, "step": 14017 }, { "epoch": 7.831284916201117, "grad_norm": 0.39692336320877075, "learning_rate": 0.0006104201680672269, "loss": 0.4607, "step": 14018 }, { "epoch": 7.831843575418994, "grad_norm": 0.40724900364875793, "learning_rate": 0.0006103921568627451, "loss": 0.3811, "step": 14019 }, { "epoch": 7.832402234636872, "grad_norm": 0.5121590495109558, "learning_rate": 0.0006103641456582634, "loss": 0.4841, "step": 14020 }, { "epoch": 7.832960893854748, "grad_norm": 0.6458014249801636, "learning_rate": 0.0006103361344537815, "loss": 0.5812, "step": 14021 }, { "epoch": 7.833519553072626, "grad_norm": 0.5924685001373291, "learning_rate": 0.0006103081232492997, "loss": 0.4381, "step": 14022 }, { "epoch": 7.834078212290503, "grad_norm": 0.46820512413978577, "learning_rate": 0.0006102801120448179, "loss": 0.3811, "step": 14023 }, { "epoch": 7.83463687150838, "grad_norm": 0.4230148196220398, "learning_rate": 0.0006102521008403361, "loss": 0.4229, "step": 14024 }, { "epoch": 7.835195530726257, "grad_norm": 0.4968454837799072, "learning_rate": 0.0006102240896358544, "loss": 0.3683, "step": 14025 }, { "epoch": 7.835754189944134, "grad_norm": 0.6096512079238892, "learning_rate": 0.0006101960784313726, "loss": 0.4128, "step": 14026 }, { "epoch": 7.836312849162011, "grad_norm": 0.4862516224384308, "learning_rate": 0.0006101680672268907, "loss": 0.4409, "step": 14027 }, { "epoch": 7.836871508379888, "grad_norm": 2.0232625007629395, "learning_rate": 0.0006101400560224089, "loss": 0.4563, "step": 14028 }, { "epoch": 7.837430167597765, "grad_norm": 0.43342125415802, "learning_rate": 0.0006101120448179271, "loss": 0.3823, "step": 14029 }, { "epoch": 7.837988826815643, "grad_norm": 0.573444128036499, "learning_rate": 0.0006100840336134455, "loss": 0.3678, "step": 14030 }, { "epoch": 7.838547486033519, "grad_norm": 0.4459739923477173, "learning_rate": 0.0006100560224089637, "loss": 0.4485, "step": 14031 }, { "epoch": 7.839106145251397, "grad_norm": 0.8919585943222046, "learning_rate": 0.0006100280112044818, "loss": 0.4919, "step": 14032 }, { "epoch": 7.839664804469273, "grad_norm": 0.42220550775527954, "learning_rate": 0.00061, "loss": 0.4151, "step": 14033 }, { "epoch": 7.840223463687151, "grad_norm": 0.44479402899742126, "learning_rate": 0.0006099719887955182, "loss": 0.3601, "step": 14034 }, { "epoch": 7.840782122905028, "grad_norm": 0.44729384779930115, "learning_rate": 0.0006099439775910365, "loss": 0.5094, "step": 14035 }, { "epoch": 7.841340782122905, "grad_norm": 0.6608854532241821, "learning_rate": 0.0006099159663865547, "loss": 0.5859, "step": 14036 }, { "epoch": 7.841899441340782, "grad_norm": 0.41493016481399536, "learning_rate": 0.0006098879551820728, "loss": 0.4069, "step": 14037 }, { "epoch": 7.842458100558659, "grad_norm": 0.38937169313430786, "learning_rate": 0.000609859943977591, "loss": 0.3618, "step": 14038 }, { "epoch": 7.843016759776536, "grad_norm": 0.5008031129837036, "learning_rate": 0.0006098319327731092, "loss": 0.3578, "step": 14039 }, { "epoch": 7.843575418994414, "grad_norm": 0.4291411340236664, "learning_rate": 0.0006098039215686275, "loss": 0.3225, "step": 14040 }, { "epoch": 7.84413407821229, "grad_norm": 0.5514256358146667, "learning_rate": 0.0006097759103641457, "loss": 0.4761, "step": 14041 }, { "epoch": 7.844692737430168, "grad_norm": 0.37141722440719604, "learning_rate": 0.0006097478991596639, "loss": 0.3846, "step": 14042 }, { "epoch": 7.845251396648044, "grad_norm": 0.5600492358207703, "learning_rate": 0.000609719887955182, "loss": 0.3408, "step": 14043 }, { "epoch": 7.845810055865922, "grad_norm": 3.491687536239624, "learning_rate": 0.0006096918767507002, "loss": 0.4988, "step": 14044 }, { "epoch": 7.846368715083798, "grad_norm": 0.4222472906112671, "learning_rate": 0.0006096638655462186, "loss": 0.3946, "step": 14045 }, { "epoch": 7.846927374301676, "grad_norm": 0.3901357650756836, "learning_rate": 0.0006096358543417368, "loss": 0.4548, "step": 14046 }, { "epoch": 7.847486033519553, "grad_norm": 0.33597564697265625, "learning_rate": 0.000609607843137255, "loss": 0.37, "step": 14047 }, { "epoch": 7.84804469273743, "grad_norm": 0.4221252501010895, "learning_rate": 0.0006095798319327731, "loss": 0.3616, "step": 14048 }, { "epoch": 7.848603351955307, "grad_norm": 2.5202057361602783, "learning_rate": 0.0006095518207282913, "loss": 0.4123, "step": 14049 }, { "epoch": 7.849162011173185, "grad_norm": 0.458539217710495, "learning_rate": 0.0006095238095238096, "loss": 0.3724, "step": 14050 }, { "epoch": 7.849720670391061, "grad_norm": 0.4702228307723999, "learning_rate": 0.0006094957983193278, "loss": 0.3862, "step": 14051 }, { "epoch": 7.850279329608939, "grad_norm": 0.4447256624698639, "learning_rate": 0.000609467787114846, "loss": 0.464, "step": 14052 }, { "epoch": 7.850837988826815, "grad_norm": 0.46216046810150146, "learning_rate": 0.0006094397759103641, "loss": 0.5121, "step": 14053 }, { "epoch": 7.851396648044693, "grad_norm": 2.6394336223602295, "learning_rate": 0.0006094117647058823, "loss": 0.368, "step": 14054 }, { "epoch": 7.851955307262569, "grad_norm": 1.201012134552002, "learning_rate": 0.0006093837535014006, "loss": 0.3659, "step": 14055 }, { "epoch": 7.852513966480447, "grad_norm": 0.4681328237056732, "learning_rate": 0.0006093557422969188, "loss": 0.4, "step": 14056 }, { "epoch": 7.853072625698324, "grad_norm": 2.520373582839966, "learning_rate": 0.000609327731092437, "loss": 0.6831, "step": 14057 }, { "epoch": 7.853631284916201, "grad_norm": 0.8120802640914917, "learning_rate": 0.0006092997198879552, "loss": 0.4608, "step": 14058 }, { "epoch": 7.854189944134078, "grad_norm": 0.6128634810447693, "learning_rate": 0.0006092717086834733, "loss": 0.3924, "step": 14059 }, { "epoch": 7.854748603351956, "grad_norm": 0.5131211876869202, "learning_rate": 0.0006092436974789915, "loss": 0.4283, "step": 14060 }, { "epoch": 7.855307262569832, "grad_norm": 0.5298057794570923, "learning_rate": 0.0006092156862745098, "loss": 0.5154, "step": 14061 }, { "epoch": 7.85586592178771, "grad_norm": 0.5277855396270752, "learning_rate": 0.0006091876750700281, "loss": 0.4556, "step": 14062 }, { "epoch": 7.856424581005586, "grad_norm": 1.4483544826507568, "learning_rate": 0.0006091596638655463, "loss": 0.5639, "step": 14063 }, { "epoch": 7.856983240223464, "grad_norm": 0.6038522720336914, "learning_rate": 0.0006091316526610644, "loss": 0.4247, "step": 14064 }, { "epoch": 7.85754189944134, "grad_norm": 0.4921419024467468, "learning_rate": 0.0006091036414565826, "loss": 0.4428, "step": 14065 }, { "epoch": 7.858100558659218, "grad_norm": 0.6876301169395447, "learning_rate": 0.0006090756302521009, "loss": 0.4493, "step": 14066 }, { "epoch": 7.858659217877095, "grad_norm": 0.5611335039138794, "learning_rate": 0.0006090476190476191, "loss": 0.4226, "step": 14067 }, { "epoch": 7.859217877094972, "grad_norm": 0.6593359708786011, "learning_rate": 0.0006090196078431373, "loss": 0.6283, "step": 14068 }, { "epoch": 7.859776536312849, "grad_norm": 0.45986607670783997, "learning_rate": 0.0006089915966386554, "loss": 0.3943, "step": 14069 }, { "epoch": 7.860335195530726, "grad_norm": 0.5802178382873535, "learning_rate": 0.0006089635854341736, "loss": 0.4123, "step": 14070 }, { "epoch": 7.860893854748603, "grad_norm": 1.6869314908981323, "learning_rate": 0.0006089355742296919, "loss": 0.4492, "step": 14071 }, { "epoch": 7.861452513966481, "grad_norm": 1.0273398160934448, "learning_rate": 0.0006089075630252101, "loss": 0.4047, "step": 14072 }, { "epoch": 7.862011173184357, "grad_norm": 0.6036380529403687, "learning_rate": 0.0006088795518207283, "loss": 0.5452, "step": 14073 }, { "epoch": 7.862569832402235, "grad_norm": 0.590654730796814, "learning_rate": 0.0006088515406162465, "loss": 0.4417, "step": 14074 }, { "epoch": 7.863128491620111, "grad_norm": 0.5519625544548035, "learning_rate": 0.0006088235294117646, "loss": 0.4764, "step": 14075 }, { "epoch": 7.863687150837989, "grad_norm": 1.196094036102295, "learning_rate": 0.0006087955182072829, "loss": 0.4474, "step": 14076 }, { "epoch": 7.864245810055866, "grad_norm": 0.5589098930358887, "learning_rate": 0.0006087675070028011, "loss": 0.4415, "step": 14077 }, { "epoch": 7.864804469273743, "grad_norm": 0.4926721155643463, "learning_rate": 0.0006087394957983194, "loss": 0.4017, "step": 14078 }, { "epoch": 7.86536312849162, "grad_norm": 0.42424827814102173, "learning_rate": 0.0006087114845938376, "loss": 0.4248, "step": 14079 }, { "epoch": 7.865921787709497, "grad_norm": 0.4860125780105591, "learning_rate": 0.0006086834733893557, "loss": 0.4152, "step": 14080 }, { "epoch": 7.866480446927374, "grad_norm": 0.6805820465087891, "learning_rate": 0.000608655462184874, "loss": 0.5482, "step": 14081 }, { "epoch": 7.867039106145251, "grad_norm": 0.4036625921726227, "learning_rate": 0.0006086274509803922, "loss": 0.3668, "step": 14082 }, { "epoch": 7.867597765363128, "grad_norm": 0.45382872223854065, "learning_rate": 0.0006085994397759104, "loss": 0.4688, "step": 14083 }, { "epoch": 7.868156424581006, "grad_norm": 0.5808756351470947, "learning_rate": 0.0006085714285714286, "loss": 0.5037, "step": 14084 }, { "epoch": 7.868715083798882, "grad_norm": 1.0217854976654053, "learning_rate": 0.0006085434173669467, "loss": 0.474, "step": 14085 }, { "epoch": 7.86927374301676, "grad_norm": 0.4961625933647156, "learning_rate": 0.000608515406162465, "loss": 0.5245, "step": 14086 }, { "epoch": 7.869832402234637, "grad_norm": 0.3712250590324402, "learning_rate": 0.0006084873949579832, "loss": 0.4348, "step": 14087 }, { "epoch": 7.870391061452514, "grad_norm": 0.39424946904182434, "learning_rate": 0.0006084593837535014, "loss": 0.3369, "step": 14088 }, { "epoch": 7.870949720670391, "grad_norm": 0.496334433555603, "learning_rate": 0.0006084313725490196, "loss": 0.4656, "step": 14089 }, { "epoch": 7.871508379888268, "grad_norm": 0.6619940996170044, "learning_rate": 0.0006084033613445378, "loss": 0.469, "step": 14090 }, { "epoch": 7.872067039106145, "grad_norm": 0.563731849193573, "learning_rate": 0.000608375350140056, "loss": 0.5851, "step": 14091 }, { "epoch": 7.872625698324022, "grad_norm": 0.36841338872909546, "learning_rate": 0.0006083473389355742, "loss": 0.3926, "step": 14092 }, { "epoch": 7.873184357541899, "grad_norm": 0.7273062467575073, "learning_rate": 0.0006083193277310924, "loss": 0.4798, "step": 14093 }, { "epoch": 7.873743016759777, "grad_norm": 1.2027250528335571, "learning_rate": 0.0006082913165266106, "loss": 0.3554, "step": 14094 }, { "epoch": 7.874301675977653, "grad_norm": 0.4926111102104187, "learning_rate": 0.0006082633053221289, "loss": 0.423, "step": 14095 }, { "epoch": 7.874860335195531, "grad_norm": 0.4224153459072113, "learning_rate": 0.0006082352941176471, "loss": 0.3501, "step": 14096 }, { "epoch": 7.875418994413408, "grad_norm": 0.9446578621864319, "learning_rate": 0.0006082072829131653, "loss": 0.4769, "step": 14097 }, { "epoch": 7.875977653631285, "grad_norm": 0.9632366299629211, "learning_rate": 0.0006081792717086835, "loss": 0.4178, "step": 14098 }, { "epoch": 7.876536312849162, "grad_norm": 0.6089878082275391, "learning_rate": 0.0006081512605042017, "loss": 0.383, "step": 14099 }, { "epoch": 7.877094972067039, "grad_norm": 0.4659743905067444, "learning_rate": 0.0006081232492997199, "loss": 0.4408, "step": 14100 }, { "epoch": 7.877653631284916, "grad_norm": 0.6155918836593628, "learning_rate": 0.0006080952380952382, "loss": 0.5053, "step": 14101 }, { "epoch": 7.878212290502793, "grad_norm": 0.595474898815155, "learning_rate": 0.0006080672268907563, "loss": 0.3606, "step": 14102 }, { "epoch": 7.87877094972067, "grad_norm": 0.5520682334899902, "learning_rate": 0.0006080392156862745, "loss": 0.5041, "step": 14103 }, { "epoch": 7.879329608938548, "grad_norm": 0.4283052980899811, "learning_rate": 0.0006080112044817927, "loss": 0.306, "step": 14104 }, { "epoch": 7.879888268156424, "grad_norm": 0.5950391292572021, "learning_rate": 0.0006079831932773109, "loss": 0.4656, "step": 14105 }, { "epoch": 7.880446927374302, "grad_norm": 0.37386465072631836, "learning_rate": 0.0006079551820728292, "loss": 0.34, "step": 14106 }, { "epoch": 7.881005586592178, "grad_norm": 0.45117971301078796, "learning_rate": 0.0006079271708683473, "loss": 0.4369, "step": 14107 }, { "epoch": 7.881564245810056, "grad_norm": 0.45627790689468384, "learning_rate": 0.0006078991596638655, "loss": 0.5299, "step": 14108 }, { "epoch": 7.882122905027933, "grad_norm": 0.5724135041236877, "learning_rate": 0.0006078711484593837, "loss": 0.3796, "step": 14109 }, { "epoch": 7.88268156424581, "grad_norm": 0.6409525275230408, "learning_rate": 0.0006078431372549019, "loss": 0.3968, "step": 14110 }, { "epoch": 7.883240223463687, "grad_norm": 0.4467275142669678, "learning_rate": 0.0006078151260504203, "loss": 0.4365, "step": 14111 }, { "epoch": 7.883798882681564, "grad_norm": 0.6085941791534424, "learning_rate": 0.0006077871148459384, "loss": 0.6568, "step": 14112 }, { "epoch": 7.884357541899441, "grad_norm": 0.6792378425598145, "learning_rate": 0.0006077591036414566, "loss": 0.4271, "step": 14113 }, { "epoch": 7.884916201117319, "grad_norm": 0.6143374443054199, "learning_rate": 0.0006077310924369748, "loss": 0.4595, "step": 14114 }, { "epoch": 7.885474860335195, "grad_norm": 0.5388185977935791, "learning_rate": 0.000607703081232493, "loss": 0.5783, "step": 14115 }, { "epoch": 7.886033519553073, "grad_norm": 0.6613753437995911, "learning_rate": 0.0006076750700280113, "loss": 0.5576, "step": 14116 }, { "epoch": 7.886592178770949, "grad_norm": 0.77463299036026, "learning_rate": 0.0006076470588235295, "loss": 0.4237, "step": 14117 }, { "epoch": 7.887150837988827, "grad_norm": 0.5430334210395813, "learning_rate": 0.0006076190476190476, "loss": 0.3825, "step": 14118 }, { "epoch": 7.8877094972067034, "grad_norm": 0.46947580575942993, "learning_rate": 0.0006075910364145658, "loss": 0.4381, "step": 14119 }, { "epoch": 7.888268156424581, "grad_norm": 0.44095292687416077, "learning_rate": 0.000607563025210084, "loss": 0.394, "step": 14120 }, { "epoch": 7.888826815642458, "grad_norm": 3.042280673980713, "learning_rate": 0.0006075350140056023, "loss": 0.4902, "step": 14121 }, { "epoch": 7.889385474860335, "grad_norm": 0.6561064124107361, "learning_rate": 0.0006075070028011205, "loss": 0.4371, "step": 14122 }, { "epoch": 7.889944134078212, "grad_norm": 0.41461801528930664, "learning_rate": 0.0006074789915966386, "loss": 0.4457, "step": 14123 }, { "epoch": 7.89050279329609, "grad_norm": 0.45347532629966736, "learning_rate": 0.0006074509803921568, "loss": 0.4344, "step": 14124 }, { "epoch": 7.891061452513966, "grad_norm": 1.6163285970687866, "learning_rate": 0.000607422969187675, "loss": 0.4911, "step": 14125 }, { "epoch": 7.891620111731844, "grad_norm": 0.5072400569915771, "learning_rate": 0.0006073949579831933, "loss": 0.4782, "step": 14126 }, { "epoch": 7.89217877094972, "grad_norm": 0.6784650683403015, "learning_rate": 0.0006073669467787116, "loss": 0.5995, "step": 14127 }, { "epoch": 7.892737430167598, "grad_norm": 0.5433607697486877, "learning_rate": 0.0006073389355742297, "loss": 0.4732, "step": 14128 }, { "epoch": 7.8932960893854744, "grad_norm": 1.1077600717544556, "learning_rate": 0.0006073109243697479, "loss": 0.4915, "step": 14129 }, { "epoch": 7.893854748603352, "grad_norm": 0.4496769905090332, "learning_rate": 0.0006072829131652661, "loss": 0.4551, "step": 14130 }, { "epoch": 7.894413407821229, "grad_norm": 1.2492722272872925, "learning_rate": 0.0006072549019607844, "loss": 0.4537, "step": 14131 }, { "epoch": 7.894972067039106, "grad_norm": 0.4690402150154114, "learning_rate": 0.0006072268907563026, "loss": 0.5178, "step": 14132 }, { "epoch": 7.895530726256983, "grad_norm": 0.6281135082244873, "learning_rate": 0.0006071988795518208, "loss": 0.4628, "step": 14133 }, { "epoch": 7.896089385474861, "grad_norm": 1.967771053314209, "learning_rate": 0.0006071708683473389, "loss": 0.4882, "step": 14134 }, { "epoch": 7.896648044692737, "grad_norm": 0.5978216528892517, "learning_rate": 0.0006071428571428571, "loss": 0.5313, "step": 14135 }, { "epoch": 7.897206703910615, "grad_norm": 0.6420818567276001, "learning_rate": 0.0006071148459383754, "loss": 0.4625, "step": 14136 }, { "epoch": 7.897765363128491, "grad_norm": 0.8040462732315063, "learning_rate": 0.0006070868347338936, "loss": 0.5419, "step": 14137 }, { "epoch": 7.898324022346369, "grad_norm": 0.670534074306488, "learning_rate": 0.0006070588235294118, "loss": 0.4328, "step": 14138 }, { "epoch": 7.8988826815642454, "grad_norm": 0.5469644665718079, "learning_rate": 0.0006070308123249299, "loss": 0.3776, "step": 14139 }, { "epoch": 7.899441340782123, "grad_norm": 0.35390475392341614, "learning_rate": 0.0006070028011204481, "loss": 0.3148, "step": 14140 }, { "epoch": 7.9, "grad_norm": 0.35059481859207153, "learning_rate": 0.0006069747899159664, "loss": 0.3497, "step": 14141 }, { "epoch": 7.900558659217877, "grad_norm": 0.5717032551765442, "learning_rate": 0.0006069467787114846, "loss": 0.3873, "step": 14142 }, { "epoch": 7.901117318435754, "grad_norm": 0.9199079275131226, "learning_rate": 0.0006069187675070028, "loss": 0.4667, "step": 14143 }, { "epoch": 7.901675977653631, "grad_norm": 0.6967477202415466, "learning_rate": 0.000606890756302521, "loss": 0.4552, "step": 14144 }, { "epoch": 7.902234636871508, "grad_norm": 0.5415611267089844, "learning_rate": 0.0006068627450980392, "loss": 0.4036, "step": 14145 }, { "epoch": 7.902793296089386, "grad_norm": 0.5598707795143127, "learning_rate": 0.0006068347338935575, "loss": 0.4102, "step": 14146 }, { "epoch": 7.903351955307262, "grad_norm": 0.4622749090194702, "learning_rate": 0.0006068067226890757, "loss": 0.4958, "step": 14147 }, { "epoch": 7.90391061452514, "grad_norm": 1.8067772388458252, "learning_rate": 0.0006067787114845939, "loss": 0.6638, "step": 14148 }, { "epoch": 7.9044692737430164, "grad_norm": 0.44978487491607666, "learning_rate": 0.0006067507002801121, "loss": 0.3963, "step": 14149 }, { "epoch": 7.905027932960894, "grad_norm": 0.4657762944698334, "learning_rate": 0.0006067226890756302, "loss": 0.3393, "step": 14150 }, { "epoch": 7.905586592178771, "grad_norm": 0.7096941471099854, "learning_rate": 0.0006066946778711485, "loss": 0.5014, "step": 14151 }, { "epoch": 7.906145251396648, "grad_norm": 1.3319052457809448, "learning_rate": 0.0006066666666666667, "loss": 0.3297, "step": 14152 }, { "epoch": 7.906703910614525, "grad_norm": 0.5790479183197021, "learning_rate": 0.0006066386554621849, "loss": 0.4536, "step": 14153 }, { "epoch": 7.907262569832402, "grad_norm": 0.4217730760574341, "learning_rate": 0.0006066106442577031, "loss": 0.3818, "step": 14154 }, { "epoch": 7.907821229050279, "grad_norm": 0.3936649560928345, "learning_rate": 0.0006065826330532212, "loss": 0.3547, "step": 14155 }, { "epoch": 7.908379888268156, "grad_norm": 0.6060563325881958, "learning_rate": 0.0006065546218487395, "loss": 0.4621, "step": 14156 }, { "epoch": 7.908938547486033, "grad_norm": 0.5636858940124512, "learning_rate": 0.0006065266106442577, "loss": 0.5161, "step": 14157 }, { "epoch": 7.909497206703911, "grad_norm": 0.6794993877410889, "learning_rate": 0.0006064985994397759, "loss": 0.4597, "step": 14158 }, { "epoch": 7.910055865921787, "grad_norm": 0.7797375321388245, "learning_rate": 0.0006064705882352941, "loss": 0.3909, "step": 14159 }, { "epoch": 7.910614525139665, "grad_norm": 0.535366415977478, "learning_rate": 0.0006064425770308122, "loss": 0.4556, "step": 14160 }, { "epoch": 7.911173184357542, "grad_norm": 0.45064032077789307, "learning_rate": 0.0006064145658263306, "loss": 0.4461, "step": 14161 }, { "epoch": 7.911731843575419, "grad_norm": 0.3856082260608673, "learning_rate": 0.0006063865546218488, "loss": 0.4205, "step": 14162 }, { "epoch": 7.912290502793296, "grad_norm": 0.879021942615509, "learning_rate": 0.000606358543417367, "loss": 0.3695, "step": 14163 }, { "epoch": 7.912849162011173, "grad_norm": 1.0510387420654297, "learning_rate": 0.0006063305322128852, "loss": 0.489, "step": 14164 }, { "epoch": 7.91340782122905, "grad_norm": 1.6861861944198608, "learning_rate": 0.0006063025210084034, "loss": 0.459, "step": 14165 }, { "epoch": 7.913966480446927, "grad_norm": 0.4760240316390991, "learning_rate": 0.0006062745098039216, "loss": 0.4776, "step": 14166 }, { "epoch": 7.914525139664804, "grad_norm": 0.5942274928092957, "learning_rate": 0.0006062464985994398, "loss": 0.4878, "step": 14167 }, { "epoch": 7.915083798882682, "grad_norm": 0.5481272339820862, "learning_rate": 0.000606218487394958, "loss": 0.5073, "step": 14168 }, { "epoch": 7.915642458100558, "grad_norm": 0.39387619495391846, "learning_rate": 0.0006061904761904762, "loss": 0.4309, "step": 14169 }, { "epoch": 7.916201117318436, "grad_norm": 0.6636371612548828, "learning_rate": 0.0006061624649859944, "loss": 0.4852, "step": 14170 }, { "epoch": 7.9167597765363125, "grad_norm": 0.4540569484233856, "learning_rate": 0.0006061344537815126, "loss": 0.4822, "step": 14171 }, { "epoch": 7.91731843575419, "grad_norm": 4.315849781036377, "learning_rate": 0.0006061064425770308, "loss": 0.4356, "step": 14172 }, { "epoch": 7.917877094972067, "grad_norm": 2.6544439792633057, "learning_rate": 0.000606078431372549, "loss": 0.4241, "step": 14173 }, { "epoch": 7.918435754189944, "grad_norm": 0.6716331243515015, "learning_rate": 0.0006060504201680672, "loss": 0.4079, "step": 14174 }, { "epoch": 7.918994413407821, "grad_norm": 0.6275873780250549, "learning_rate": 0.0006060224089635854, "loss": 0.4237, "step": 14175 }, { "epoch": 7.919553072625698, "grad_norm": 0.668163537979126, "learning_rate": 0.0006059943977591036, "loss": 0.5494, "step": 14176 }, { "epoch": 7.920111731843575, "grad_norm": 0.4181583821773529, "learning_rate": 0.0006059663865546219, "loss": 0.452, "step": 14177 }, { "epoch": 7.920670391061453, "grad_norm": 0.6004208922386169, "learning_rate": 0.0006059383753501401, "loss": 0.4012, "step": 14178 }, { "epoch": 7.921229050279329, "grad_norm": 0.5395012497901917, "learning_rate": 0.0006059103641456583, "loss": 0.3843, "step": 14179 }, { "epoch": 7.921787709497207, "grad_norm": 0.5836853384971619, "learning_rate": 0.0006058823529411765, "loss": 0.4605, "step": 14180 }, { "epoch": 7.9223463687150835, "grad_norm": 0.8164491653442383, "learning_rate": 0.0006058543417366948, "loss": 0.491, "step": 14181 }, { "epoch": 7.922905027932961, "grad_norm": 0.6076754331588745, "learning_rate": 0.0006058263305322129, "loss": 0.4937, "step": 14182 }, { "epoch": 7.923463687150838, "grad_norm": 0.8746344447135925, "learning_rate": 0.0006057983193277311, "loss": 0.4295, "step": 14183 }, { "epoch": 7.924022346368715, "grad_norm": 0.43245530128479004, "learning_rate": 0.0006057703081232493, "loss": 0.3798, "step": 14184 }, { "epoch": 7.924581005586592, "grad_norm": 0.4194217026233673, "learning_rate": 0.0006057422969187675, "loss": 0.2922, "step": 14185 }, { "epoch": 7.925139664804469, "grad_norm": 0.45826587080955505, "learning_rate": 0.0006057142857142858, "loss": 0.3696, "step": 14186 }, { "epoch": 7.925698324022346, "grad_norm": 0.579105019569397, "learning_rate": 0.0006056862745098039, "loss": 0.3938, "step": 14187 }, { "epoch": 7.926256983240224, "grad_norm": 0.44036081433296204, "learning_rate": 0.0006056582633053221, "loss": 0.3296, "step": 14188 }, { "epoch": 7.9268156424581, "grad_norm": 0.7250059247016907, "learning_rate": 0.0006056302521008403, "loss": 0.5079, "step": 14189 }, { "epoch": 7.927374301675978, "grad_norm": 1.0376495122909546, "learning_rate": 0.0006056022408963585, "loss": 0.5475, "step": 14190 }, { "epoch": 7.9279329608938545, "grad_norm": 0.47896936535835266, "learning_rate": 0.0006055742296918768, "loss": 0.5369, "step": 14191 }, { "epoch": 7.928491620111732, "grad_norm": 1.2012934684753418, "learning_rate": 0.0006055462184873949, "loss": 0.421, "step": 14192 }, { "epoch": 7.9290502793296085, "grad_norm": 0.976495087146759, "learning_rate": 0.0006055182072829131, "loss": 0.5422, "step": 14193 }, { "epoch": 7.929608938547486, "grad_norm": 0.8057586550712585, "learning_rate": 0.0006054901960784314, "loss": 0.4739, "step": 14194 }, { "epoch": 7.930167597765363, "grad_norm": 0.6573531031608582, "learning_rate": 0.0006054621848739496, "loss": 0.4128, "step": 14195 }, { "epoch": 7.93072625698324, "grad_norm": 0.6205860376358032, "learning_rate": 0.0006054341736694679, "loss": 0.4637, "step": 14196 }, { "epoch": 7.931284916201117, "grad_norm": 0.5603929162025452, "learning_rate": 0.0006054061624649861, "loss": 0.5366, "step": 14197 }, { "epoch": 7.931843575418995, "grad_norm": 0.6897447109222412, "learning_rate": 0.0006053781512605042, "loss": 0.5378, "step": 14198 }, { "epoch": 7.932402234636871, "grad_norm": 0.45130735635757446, "learning_rate": 0.0006053501400560224, "loss": 0.5279, "step": 14199 }, { "epoch": 7.932960893854749, "grad_norm": 1.1561640501022339, "learning_rate": 0.0006053221288515406, "loss": 0.5776, "step": 14200 }, { "epoch": 7.9335195530726255, "grad_norm": 0.5011961460113525, "learning_rate": 0.0006052941176470589, "loss": 0.4582, "step": 14201 }, { "epoch": 7.934078212290503, "grad_norm": 0.4498681128025055, "learning_rate": 0.0006052661064425771, "loss": 0.4753, "step": 14202 }, { "epoch": 7.9346368715083795, "grad_norm": 0.7079980373382568, "learning_rate": 0.0006052380952380952, "loss": 0.4491, "step": 14203 }, { "epoch": 7.935195530726257, "grad_norm": 0.5874936580657959, "learning_rate": 0.0006052100840336134, "loss": 0.5496, "step": 14204 }, { "epoch": 7.935754189944134, "grad_norm": 0.794951856136322, "learning_rate": 0.0006051820728291316, "loss": 0.5788, "step": 14205 }, { "epoch": 7.936312849162011, "grad_norm": 0.5293338894844055, "learning_rate": 0.0006051540616246499, "loss": 0.4832, "step": 14206 }, { "epoch": 7.936871508379888, "grad_norm": 0.5555122494697571, "learning_rate": 0.0006051260504201681, "loss": 0.4358, "step": 14207 }, { "epoch": 7.937430167597765, "grad_norm": 0.4400259256362915, "learning_rate": 0.0006050980392156862, "loss": 0.5594, "step": 14208 }, { "epoch": 7.937988826815642, "grad_norm": 0.5891273021697998, "learning_rate": 0.0006050700280112044, "loss": 0.3796, "step": 14209 }, { "epoch": 7.93854748603352, "grad_norm": 0.5500006079673767, "learning_rate": 0.0006050420168067227, "loss": 0.4296, "step": 14210 }, { "epoch": 7.9391061452513965, "grad_norm": 0.4392663538455963, "learning_rate": 0.000605014005602241, "loss": 0.4465, "step": 14211 }, { "epoch": 7.939664804469274, "grad_norm": 0.5625128746032715, "learning_rate": 0.0006049859943977592, "loss": 0.5645, "step": 14212 }, { "epoch": 7.9402234636871505, "grad_norm": 0.715496301651001, "learning_rate": 0.0006049579831932774, "loss": 0.5525, "step": 14213 }, { "epoch": 7.940782122905028, "grad_norm": 1.6878738403320312, "learning_rate": 0.0006049299719887955, "loss": 0.4062, "step": 14214 }, { "epoch": 7.941340782122905, "grad_norm": 0.39091014862060547, "learning_rate": 0.0006049019607843137, "loss": 0.4523, "step": 14215 }, { "epoch": 7.941899441340782, "grad_norm": 0.45580625534057617, "learning_rate": 0.000604873949579832, "loss": 0.3868, "step": 14216 }, { "epoch": 7.942458100558659, "grad_norm": 0.5214827656745911, "learning_rate": 0.0006048459383753502, "loss": 0.4109, "step": 14217 }, { "epoch": 7.943016759776536, "grad_norm": 0.73016357421875, "learning_rate": 0.0006048179271708684, "loss": 0.5033, "step": 14218 }, { "epoch": 7.943575418994413, "grad_norm": 0.36870265007019043, "learning_rate": 0.0006047899159663865, "loss": 0.4019, "step": 14219 }, { "epoch": 7.94413407821229, "grad_norm": 0.4197850525379181, "learning_rate": 0.0006047619047619047, "loss": 0.448, "step": 14220 }, { "epoch": 7.9446927374301675, "grad_norm": 0.4646594226360321, "learning_rate": 0.000604733893557423, "loss": 0.4483, "step": 14221 }, { "epoch": 7.945251396648045, "grad_norm": 0.40932217240333557, "learning_rate": 0.0006047058823529412, "loss": 0.4873, "step": 14222 }, { "epoch": 7.9458100558659215, "grad_norm": 0.4788350760936737, "learning_rate": 0.0006046778711484594, "loss": 0.4524, "step": 14223 }, { "epoch": 7.946368715083799, "grad_norm": 0.620957612991333, "learning_rate": 0.0006046498599439775, "loss": 0.6384, "step": 14224 }, { "epoch": 7.946927374301676, "grad_norm": 0.5223733186721802, "learning_rate": 0.0006046218487394957, "loss": 0.5105, "step": 14225 }, { "epoch": 7.947486033519553, "grad_norm": 0.5959054827690125, "learning_rate": 0.0006045938375350141, "loss": 0.45, "step": 14226 }, { "epoch": 7.94804469273743, "grad_norm": 0.40770187973976135, "learning_rate": 0.0006045658263305323, "loss": 0.4236, "step": 14227 }, { "epoch": 7.948603351955307, "grad_norm": 0.7343426942825317, "learning_rate": 0.0006045378151260505, "loss": 0.4794, "step": 14228 }, { "epoch": 7.949162011173184, "grad_norm": 0.9903770685195923, "learning_rate": 0.0006045098039215687, "loss": 0.4373, "step": 14229 }, { "epoch": 7.949720670391061, "grad_norm": 0.4142523407936096, "learning_rate": 0.0006044817927170868, "loss": 0.3739, "step": 14230 }, { "epoch": 7.9502793296089385, "grad_norm": 4.297020435333252, "learning_rate": 0.0006044537815126051, "loss": 0.4263, "step": 14231 }, { "epoch": 7.950837988826816, "grad_norm": 0.7485659718513489, "learning_rate": 0.0006044257703081233, "loss": 0.4112, "step": 14232 }, { "epoch": 7.9513966480446925, "grad_norm": 0.9614757299423218, "learning_rate": 0.0006043977591036415, "loss": 0.4796, "step": 14233 }, { "epoch": 7.95195530726257, "grad_norm": 1.32154381275177, "learning_rate": 0.0006043697478991597, "loss": 0.4991, "step": 14234 }, { "epoch": 7.952513966480447, "grad_norm": 0.4387473165988922, "learning_rate": 0.0006043417366946778, "loss": 0.4062, "step": 14235 }, { "epoch": 7.953072625698324, "grad_norm": 0.6479970812797546, "learning_rate": 0.0006043137254901961, "loss": 0.3529, "step": 14236 }, { "epoch": 7.953631284916201, "grad_norm": 0.3849358856678009, "learning_rate": 0.0006042857142857143, "loss": 0.4237, "step": 14237 }, { "epoch": 7.954189944134078, "grad_norm": 0.5455474853515625, "learning_rate": 0.0006042577030812325, "loss": 0.3911, "step": 14238 }, { "epoch": 7.954748603351955, "grad_norm": 0.43311941623687744, "learning_rate": 0.0006042296918767507, "loss": 0.3977, "step": 14239 }, { "epoch": 7.955307262569832, "grad_norm": 1.1351542472839355, "learning_rate": 0.0006042016806722688, "loss": 0.3528, "step": 14240 }, { "epoch": 7.9558659217877095, "grad_norm": 2.5995676517486572, "learning_rate": 0.0006041736694677871, "loss": 0.4128, "step": 14241 }, { "epoch": 7.956424581005587, "grad_norm": 1.4863731861114502, "learning_rate": 0.0006041456582633054, "loss": 0.4887, "step": 14242 }, { "epoch": 7.9569832402234635, "grad_norm": 0.34584298729896545, "learning_rate": 0.0006041176470588236, "loss": 0.401, "step": 14243 }, { "epoch": 7.957541899441341, "grad_norm": 1.1386308670043945, "learning_rate": 0.0006040896358543418, "loss": 0.5095, "step": 14244 }, { "epoch": 7.9581005586592175, "grad_norm": 0.5187742114067078, "learning_rate": 0.00060406162464986, "loss": 0.5606, "step": 14245 }, { "epoch": 7.958659217877095, "grad_norm": 0.646282970905304, "learning_rate": 0.0006040336134453782, "loss": 0.4379, "step": 14246 }, { "epoch": 7.959217877094972, "grad_norm": 0.39817845821380615, "learning_rate": 0.0006040056022408964, "loss": 0.4476, "step": 14247 }, { "epoch": 7.959776536312849, "grad_norm": 1.0522841215133667, "learning_rate": 0.0006039775910364146, "loss": 0.6138, "step": 14248 }, { "epoch": 7.960335195530726, "grad_norm": 0.3197142779827118, "learning_rate": 0.0006039495798319328, "loss": 0.3769, "step": 14249 }, { "epoch": 7.960893854748603, "grad_norm": 0.913466215133667, "learning_rate": 0.000603921568627451, "loss": 0.4181, "step": 14250 }, { "epoch": 7.9614525139664805, "grad_norm": 0.5283289551734924, "learning_rate": 0.0006038935574229692, "loss": 0.4581, "step": 14251 }, { "epoch": 7.962011173184358, "grad_norm": 4.923588752746582, "learning_rate": 0.0006038655462184874, "loss": 0.6446, "step": 14252 }, { "epoch": 7.9625698324022345, "grad_norm": 0.9310001134872437, "learning_rate": 0.0006038375350140056, "loss": 0.4058, "step": 14253 }, { "epoch": 7.963128491620112, "grad_norm": 0.7428258657455444, "learning_rate": 0.0006038095238095238, "loss": 0.4027, "step": 14254 }, { "epoch": 7.9636871508379885, "grad_norm": 0.82972252368927, "learning_rate": 0.000603781512605042, "loss": 0.5034, "step": 14255 }, { "epoch": 7.964245810055866, "grad_norm": 1.3849762678146362, "learning_rate": 0.0006037535014005602, "loss": 0.5053, "step": 14256 }, { "epoch": 7.9648044692737425, "grad_norm": 0.43868687748908997, "learning_rate": 0.0006037254901960784, "loss": 0.4129, "step": 14257 }, { "epoch": 7.96536312849162, "grad_norm": 0.5154237151145935, "learning_rate": 0.0006036974789915966, "loss": 0.4137, "step": 14258 }, { "epoch": 7.965921787709497, "grad_norm": 0.43162018060684204, "learning_rate": 0.0006036694677871149, "loss": 0.3895, "step": 14259 }, { "epoch": 7.966480446927374, "grad_norm": 0.6027476787567139, "learning_rate": 0.0006036414565826331, "loss": 0.3962, "step": 14260 }, { "epoch": 7.9670391061452515, "grad_norm": 0.6836234927177429, "learning_rate": 0.0006036134453781514, "loss": 0.5137, "step": 14261 }, { "epoch": 7.967597765363129, "grad_norm": 0.4649446904659271, "learning_rate": 0.0006035854341736695, "loss": 0.3996, "step": 14262 }, { "epoch": 7.9681564245810055, "grad_norm": 0.5667934417724609, "learning_rate": 0.0006035574229691877, "loss": 0.5143, "step": 14263 }, { "epoch": 7.968715083798883, "grad_norm": 0.4855731427669525, "learning_rate": 0.0006035294117647059, "loss": 0.4689, "step": 14264 }, { "epoch": 7.9692737430167595, "grad_norm": 0.5278502106666565, "learning_rate": 0.0006035014005602241, "loss": 0.4851, "step": 14265 }, { "epoch": 7.969832402234637, "grad_norm": 10.22043514251709, "learning_rate": 0.0006034733893557424, "loss": 0.5294, "step": 14266 }, { "epoch": 7.9703910614525135, "grad_norm": 0.5031912922859192, "learning_rate": 0.0006034453781512605, "loss": 0.4817, "step": 14267 }, { "epoch": 7.970949720670391, "grad_norm": 0.985588788986206, "learning_rate": 0.0006034173669467787, "loss": 0.4337, "step": 14268 }, { "epoch": 7.971508379888268, "grad_norm": 0.5934202075004578, "learning_rate": 0.0006033893557422969, "loss": 0.394, "step": 14269 }, { "epoch": 7.972067039106145, "grad_norm": 0.39451858401298523, "learning_rate": 0.0006033613445378151, "loss": 0.4091, "step": 14270 }, { "epoch": 7.9726256983240225, "grad_norm": 0.45145025849342346, "learning_rate": 0.0006033333333333334, "loss": 0.4962, "step": 14271 }, { "epoch": 7.9731843575419, "grad_norm": 0.5986431837081909, "learning_rate": 0.0006033053221288515, "loss": 0.3873, "step": 14272 }, { "epoch": 7.9737430167597765, "grad_norm": 0.6096537113189697, "learning_rate": 0.0006032773109243697, "loss": 0.4633, "step": 14273 }, { "epoch": 7.974301675977654, "grad_norm": 0.8488360047340393, "learning_rate": 0.0006032492997198879, "loss": 0.5554, "step": 14274 }, { "epoch": 7.9748603351955305, "grad_norm": 0.44510534405708313, "learning_rate": 0.0006032212885154061, "loss": 0.4214, "step": 14275 }, { "epoch": 7.975418994413408, "grad_norm": 0.35789167881011963, "learning_rate": 0.0006031932773109245, "loss": 0.3912, "step": 14276 }, { "epoch": 7.9759776536312845, "grad_norm": 0.8667805790901184, "learning_rate": 0.0006031652661064427, "loss": 0.4503, "step": 14277 }, { "epoch": 7.976536312849162, "grad_norm": 0.4212509095668793, "learning_rate": 0.0006031372549019608, "loss": 0.4124, "step": 14278 }, { "epoch": 7.977094972067039, "grad_norm": 0.495271772146225, "learning_rate": 0.000603109243697479, "loss": 0.4409, "step": 14279 }, { "epoch": 7.977653631284916, "grad_norm": 0.5545368194580078, "learning_rate": 0.0006030812324929972, "loss": 0.4684, "step": 14280 }, { "epoch": 7.9782122905027935, "grad_norm": 0.45595961809158325, "learning_rate": 0.0006030532212885154, "loss": 0.5211, "step": 14281 }, { "epoch": 7.97877094972067, "grad_norm": 0.46896255016326904, "learning_rate": 0.0006030252100840337, "loss": 0.4067, "step": 14282 }, { "epoch": 7.9793296089385475, "grad_norm": 4.56844425201416, "learning_rate": 0.0006029971988795518, "loss": 0.4198, "step": 14283 }, { "epoch": 7.979888268156425, "grad_norm": 0.5166363716125488, "learning_rate": 0.00060296918767507, "loss": 0.3801, "step": 14284 }, { "epoch": 7.9804469273743015, "grad_norm": 0.4709360897541046, "learning_rate": 0.0006029411764705882, "loss": 0.4743, "step": 14285 }, { "epoch": 7.981005586592179, "grad_norm": 0.4801344573497772, "learning_rate": 0.0006029131652661064, "loss": 0.3977, "step": 14286 }, { "epoch": 7.9815642458100555, "grad_norm": 0.5393338799476624, "learning_rate": 0.0006028851540616247, "loss": 0.4951, "step": 14287 }, { "epoch": 7.982122905027933, "grad_norm": 2.87831974029541, "learning_rate": 0.0006028571428571428, "loss": 0.3686, "step": 14288 }, { "epoch": 7.98268156424581, "grad_norm": 1.395812749862671, "learning_rate": 0.000602829131652661, "loss": 0.4826, "step": 14289 }, { "epoch": 7.983240223463687, "grad_norm": 0.5166707038879395, "learning_rate": 0.0006028011204481792, "loss": 0.4475, "step": 14290 }, { "epoch": 7.9837988826815645, "grad_norm": 0.4548737704753876, "learning_rate": 0.0006027731092436974, "loss": 0.3641, "step": 14291 }, { "epoch": 7.984357541899441, "grad_norm": 0.4100731313228607, "learning_rate": 0.0006027450980392158, "loss": 0.4128, "step": 14292 }, { "epoch": 7.9849162011173185, "grad_norm": 2.9874231815338135, "learning_rate": 0.000602717086834734, "loss": 0.4089, "step": 14293 }, { "epoch": 7.985474860335195, "grad_norm": 0.525781512260437, "learning_rate": 0.0006026890756302521, "loss": 0.406, "step": 14294 }, { "epoch": 7.9860335195530725, "grad_norm": 1.8277138471603394, "learning_rate": 0.0006026610644257703, "loss": 0.8354, "step": 14295 }, { "epoch": 7.98659217877095, "grad_norm": 0.5151097774505615, "learning_rate": 0.0006026330532212885, "loss": 0.4598, "step": 14296 }, { "epoch": 7.9871508379888265, "grad_norm": 0.6688753366470337, "learning_rate": 0.0006026050420168068, "loss": 0.5116, "step": 14297 }, { "epoch": 7.987709497206704, "grad_norm": 0.40915465354919434, "learning_rate": 0.000602577030812325, "loss": 0.2826, "step": 14298 }, { "epoch": 7.988268156424581, "grad_norm": 0.6947688460350037, "learning_rate": 0.0006025490196078431, "loss": 0.3685, "step": 14299 }, { "epoch": 7.988826815642458, "grad_norm": 1.5197031497955322, "learning_rate": 0.0006025210084033613, "loss": 0.3787, "step": 14300 }, { "epoch": 7.9893854748603355, "grad_norm": 0.38484910130500793, "learning_rate": 0.0006024929971988795, "loss": 0.3972, "step": 14301 }, { "epoch": 7.989944134078212, "grad_norm": 0.5499008893966675, "learning_rate": 0.0006024649859943978, "loss": 0.4426, "step": 14302 }, { "epoch": 7.9905027932960895, "grad_norm": 0.48515400290489197, "learning_rate": 0.000602436974789916, "loss": 0.4094, "step": 14303 }, { "epoch": 7.991061452513966, "grad_norm": 0.5160617828369141, "learning_rate": 0.0006024089635854341, "loss": 0.4687, "step": 14304 }, { "epoch": 7.9916201117318435, "grad_norm": 0.4319307506084442, "learning_rate": 0.0006023809523809523, "loss": 0.4441, "step": 14305 }, { "epoch": 7.992178770949721, "grad_norm": 0.6816244721412659, "learning_rate": 0.0006023529411764705, "loss": 0.4317, "step": 14306 }, { "epoch": 7.9927374301675975, "grad_norm": 1.7799586057662964, "learning_rate": 0.0006023249299719888, "loss": 0.4308, "step": 14307 }, { "epoch": 7.993296089385475, "grad_norm": 1.1707338094711304, "learning_rate": 0.0006022969187675071, "loss": 0.5587, "step": 14308 }, { "epoch": 7.993854748603352, "grad_norm": 0.988422691822052, "learning_rate": 0.0006022689075630253, "loss": 0.4772, "step": 14309 }, { "epoch": 7.994413407821229, "grad_norm": 0.5512896180152893, "learning_rate": 0.0006022408963585434, "loss": 0.3473, "step": 14310 }, { "epoch": 7.9949720670391065, "grad_norm": 0.7550446391105652, "learning_rate": 0.0006022128851540616, "loss": 0.4443, "step": 14311 }, { "epoch": 7.995530726256983, "grad_norm": 0.6585484147071838, "learning_rate": 0.0006021848739495799, "loss": 0.4696, "step": 14312 }, { "epoch": 7.9960893854748605, "grad_norm": 1.5440630912780762, "learning_rate": 0.0006021568627450981, "loss": 0.4739, "step": 14313 }, { "epoch": 7.996648044692737, "grad_norm": 0.5748957395553589, "learning_rate": 0.0006021288515406163, "loss": 0.332, "step": 14314 }, { "epoch": 7.9972067039106145, "grad_norm": 0.5756217837333679, "learning_rate": 0.0006021008403361344, "loss": 0.5738, "step": 14315 }, { "epoch": 7.997765363128492, "grad_norm": 0.38247138261795044, "learning_rate": 0.0006020728291316526, "loss": 0.3749, "step": 14316 }, { "epoch": 7.9983240223463685, "grad_norm": 0.42976677417755127, "learning_rate": 0.0006020448179271709, "loss": 0.4572, "step": 14317 }, { "epoch": 7.998882681564246, "grad_norm": 0.5669113397598267, "learning_rate": 0.0006020168067226891, "loss": 0.4158, "step": 14318 }, { "epoch": 7.9994413407821225, "grad_norm": 0.4263019561767578, "learning_rate": 0.0006019887955182073, "loss": 0.4493, "step": 14319 }, { "epoch": 8.0, "grad_norm": 0.42987707257270813, "learning_rate": 0.0006019607843137254, "loss": 0.3959, "step": 14320 }, { "epoch": 8.000558659217877, "grad_norm": 0.4581637680530548, "learning_rate": 0.0006019327731092436, "loss": 0.4532, "step": 14321 }, { "epoch": 8.001117318435755, "grad_norm": 1.0903503894805908, "learning_rate": 0.0006019047619047619, "loss": 0.514, "step": 14322 }, { "epoch": 8.001675977653631, "grad_norm": 0.393097460269928, "learning_rate": 0.0006018767507002801, "loss": 0.3976, "step": 14323 }, { "epoch": 8.002234636871508, "grad_norm": 2.589627981185913, "learning_rate": 0.0006018487394957984, "loss": 0.5084, "step": 14324 }, { "epoch": 8.002793296089385, "grad_norm": 0.4391290545463562, "learning_rate": 0.0006018207282913166, "loss": 0.4712, "step": 14325 }, { "epoch": 8.003351955307263, "grad_norm": 0.5736142992973328, "learning_rate": 0.0006017927170868347, "loss": 0.3762, "step": 14326 }, { "epoch": 8.00391061452514, "grad_norm": 0.49017664790153503, "learning_rate": 0.000601764705882353, "loss": 0.5359, "step": 14327 }, { "epoch": 8.004469273743016, "grad_norm": 1.2210017442703247, "learning_rate": 0.0006017366946778712, "loss": 0.4317, "step": 14328 }, { "epoch": 8.005027932960894, "grad_norm": 0.46017375588417053, "learning_rate": 0.0006017086834733894, "loss": 0.4174, "step": 14329 }, { "epoch": 8.005586592178771, "grad_norm": 0.48255613446235657, "learning_rate": 0.0006016806722689076, "loss": 0.4545, "step": 14330 }, { "epoch": 8.006145251396648, "grad_norm": 0.4368647336959839, "learning_rate": 0.0006016526610644257, "loss": 0.429, "step": 14331 }, { "epoch": 8.006703910614526, "grad_norm": 0.5759446024894714, "learning_rate": 0.000601624649859944, "loss": 0.4202, "step": 14332 }, { "epoch": 8.007262569832402, "grad_norm": 0.4744316637516022, "learning_rate": 0.0006015966386554622, "loss": 0.5632, "step": 14333 }, { "epoch": 8.007821229050279, "grad_norm": 0.5011651515960693, "learning_rate": 0.0006015686274509804, "loss": 0.418, "step": 14334 }, { "epoch": 8.008379888268156, "grad_norm": 0.3920082747936249, "learning_rate": 0.0006015406162464986, "loss": 0.4656, "step": 14335 }, { "epoch": 8.008938547486034, "grad_norm": 0.9925408959388733, "learning_rate": 0.0006015126050420167, "loss": 0.4318, "step": 14336 }, { "epoch": 8.00949720670391, "grad_norm": 0.3603690266609192, "learning_rate": 0.000601484593837535, "loss": 0.368, "step": 14337 }, { "epoch": 8.010055865921787, "grad_norm": 2.0020430088043213, "learning_rate": 0.0006014565826330532, "loss": 0.4571, "step": 14338 }, { "epoch": 8.010614525139665, "grad_norm": 0.6948301792144775, "learning_rate": 0.0006014285714285714, "loss": 0.4483, "step": 14339 }, { "epoch": 8.011173184357542, "grad_norm": 0.5292946100234985, "learning_rate": 0.0006014005602240896, "loss": 0.46, "step": 14340 }, { "epoch": 8.011731843575419, "grad_norm": 0.4924343526363373, "learning_rate": 0.0006013725490196079, "loss": 0.3763, "step": 14341 }, { "epoch": 8.012290502793297, "grad_norm": 0.5706194639205933, "learning_rate": 0.0006013445378151261, "loss": 0.2917, "step": 14342 }, { "epoch": 8.012849162011173, "grad_norm": 1.1087849140167236, "learning_rate": 0.0006013165266106443, "loss": 0.3821, "step": 14343 }, { "epoch": 8.01340782122905, "grad_norm": 0.4120514690876007, "learning_rate": 0.0006012885154061625, "loss": 0.4046, "step": 14344 }, { "epoch": 8.013966480446927, "grad_norm": 0.3571608066558838, "learning_rate": 0.0006012605042016807, "loss": 0.3373, "step": 14345 }, { "epoch": 8.014525139664805, "grad_norm": 0.5736892223358154, "learning_rate": 0.0006012324929971989, "loss": 0.4238, "step": 14346 }, { "epoch": 8.015083798882682, "grad_norm": 0.6381986737251282, "learning_rate": 0.0006012044817927171, "loss": 0.3924, "step": 14347 }, { "epoch": 8.015642458100558, "grad_norm": 0.41436097025871277, "learning_rate": 0.0006011764705882353, "loss": 0.3639, "step": 14348 }, { "epoch": 8.016201117318436, "grad_norm": 0.712432861328125, "learning_rate": 0.0006011484593837535, "loss": 0.4726, "step": 14349 }, { "epoch": 8.016759776536313, "grad_norm": 0.6390038728713989, "learning_rate": 0.0006011204481792717, "loss": 0.4026, "step": 14350 }, { "epoch": 8.01731843575419, "grad_norm": 0.8491302132606506, "learning_rate": 0.0006010924369747899, "loss": 0.4274, "step": 14351 }, { "epoch": 8.017877094972068, "grad_norm": 0.717250645160675, "learning_rate": 0.0006010644257703082, "loss": 0.4014, "step": 14352 }, { "epoch": 8.018435754189944, "grad_norm": 0.5589627027511597, "learning_rate": 0.0006010364145658263, "loss": 0.5026, "step": 14353 }, { "epoch": 8.018994413407821, "grad_norm": 0.5699906349182129, "learning_rate": 0.0006010084033613445, "loss": 0.441, "step": 14354 }, { "epoch": 8.019553072625698, "grad_norm": 0.5970957279205322, "learning_rate": 0.0006009803921568627, "loss": 0.3878, "step": 14355 }, { "epoch": 8.020111731843576, "grad_norm": 0.5575460195541382, "learning_rate": 0.0006009523809523809, "loss": 0.4442, "step": 14356 }, { "epoch": 8.020670391061453, "grad_norm": 0.6154443025588989, "learning_rate": 0.0006009243697478993, "loss": 0.3454, "step": 14357 }, { "epoch": 8.021229050279329, "grad_norm": 0.32149264216423035, "learning_rate": 0.0006008963585434174, "loss": 0.3733, "step": 14358 }, { "epoch": 8.021787709497207, "grad_norm": 0.748559296131134, "learning_rate": 0.0006008683473389356, "loss": 0.3792, "step": 14359 }, { "epoch": 8.022346368715084, "grad_norm": 0.4470595121383667, "learning_rate": 0.0006008403361344538, "loss": 0.3553, "step": 14360 }, { "epoch": 8.02290502793296, "grad_norm": 0.48731231689453125, "learning_rate": 0.000600812324929972, "loss": 0.4171, "step": 14361 }, { "epoch": 8.023463687150837, "grad_norm": 0.8704900741577148, "learning_rate": 0.0006007843137254903, "loss": 0.4265, "step": 14362 }, { "epoch": 8.024022346368715, "grad_norm": 0.39106935262680054, "learning_rate": 0.0006007563025210084, "loss": 0.3407, "step": 14363 }, { "epoch": 8.024581005586592, "grad_norm": 0.5778666734695435, "learning_rate": 0.0006007282913165266, "loss": 0.5475, "step": 14364 }, { "epoch": 8.025139664804469, "grad_norm": 0.5334743857383728, "learning_rate": 0.0006007002801120448, "loss": 0.4085, "step": 14365 }, { "epoch": 8.025698324022347, "grad_norm": 1.2521016597747803, "learning_rate": 0.000600672268907563, "loss": 0.4243, "step": 14366 }, { "epoch": 8.026256983240224, "grad_norm": 0.4407244920730591, "learning_rate": 0.0006006442577030813, "loss": 0.5232, "step": 14367 }, { "epoch": 8.0268156424581, "grad_norm": 0.5303959846496582, "learning_rate": 0.0006006162464985995, "loss": 0.4063, "step": 14368 }, { "epoch": 8.027374301675978, "grad_norm": 0.5556484460830688, "learning_rate": 0.0006005882352941176, "loss": 0.3528, "step": 14369 }, { "epoch": 8.027932960893855, "grad_norm": 0.7826901078224182, "learning_rate": 0.0006005602240896358, "loss": 0.6155, "step": 14370 }, { "epoch": 8.028491620111732, "grad_norm": 0.47236984968185425, "learning_rate": 0.000600532212885154, "loss": 0.4217, "step": 14371 }, { "epoch": 8.029050279329608, "grad_norm": 0.5443273186683655, "learning_rate": 0.0006005042016806723, "loss": 0.4279, "step": 14372 }, { "epoch": 8.029608938547486, "grad_norm": 0.5189141035079956, "learning_rate": 0.0006004761904761906, "loss": 0.4762, "step": 14373 }, { "epoch": 8.030167597765363, "grad_norm": 0.5528595447540283, "learning_rate": 0.0006004481792717087, "loss": 0.3505, "step": 14374 }, { "epoch": 8.03072625698324, "grad_norm": 0.5922738313674927, "learning_rate": 0.0006004201680672269, "loss": 0.4317, "step": 14375 }, { "epoch": 8.031284916201118, "grad_norm": 0.5522679686546326, "learning_rate": 0.0006003921568627451, "loss": 0.4224, "step": 14376 }, { "epoch": 8.031843575418995, "grad_norm": 0.5774714350700378, "learning_rate": 0.0006003641456582634, "loss": 0.6338, "step": 14377 }, { "epoch": 8.032402234636871, "grad_norm": 1.2023193836212158, "learning_rate": 0.0006003361344537816, "loss": 0.5284, "step": 14378 }, { "epoch": 8.03296089385475, "grad_norm": 0.5452879071235657, "learning_rate": 0.0006003081232492997, "loss": 0.5738, "step": 14379 }, { "epoch": 8.033519553072626, "grad_norm": 0.4827750027179718, "learning_rate": 0.0006002801120448179, "loss": 0.5209, "step": 14380 }, { "epoch": 8.034078212290503, "grad_norm": 0.3710368871688843, "learning_rate": 0.0006002521008403361, "loss": 0.4059, "step": 14381 }, { "epoch": 8.03463687150838, "grad_norm": 0.4284456968307495, "learning_rate": 0.0006002240896358544, "loss": 0.4133, "step": 14382 }, { "epoch": 8.035195530726257, "grad_norm": 0.40405189990997314, "learning_rate": 0.0006001960784313726, "loss": 0.3613, "step": 14383 }, { "epoch": 8.035754189944134, "grad_norm": 0.71453857421875, "learning_rate": 0.0006001680672268908, "loss": 0.3838, "step": 14384 }, { "epoch": 8.03631284916201, "grad_norm": 0.9455159306526184, "learning_rate": 0.0006001400560224089, "loss": 0.4732, "step": 14385 }, { "epoch": 8.036871508379889, "grad_norm": 1.5728727579116821, "learning_rate": 0.0006001120448179271, "loss": 0.5204, "step": 14386 }, { "epoch": 8.037430167597766, "grad_norm": 0.4212017357349396, "learning_rate": 0.0006000840336134454, "loss": 0.4458, "step": 14387 }, { "epoch": 8.037988826815642, "grad_norm": 0.6370043158531189, "learning_rate": 0.0006000560224089636, "loss": 0.4711, "step": 14388 }, { "epoch": 8.03854748603352, "grad_norm": 7.670841217041016, "learning_rate": 0.0006000280112044818, "loss": 0.3343, "step": 14389 }, { "epoch": 8.039106145251397, "grad_norm": 0.5167133808135986, "learning_rate": 0.0006, "loss": 0.4198, "step": 14390 }, { "epoch": 8.039664804469274, "grad_norm": 0.501754641532898, "learning_rate": 0.0005999719887955182, "loss": 0.4012, "step": 14391 }, { "epoch": 8.04022346368715, "grad_norm": 0.8308843374252319, "learning_rate": 0.0005999439775910365, "loss": 0.4356, "step": 14392 }, { "epoch": 8.040782122905028, "grad_norm": 0.5902323126792908, "learning_rate": 0.0005999159663865547, "loss": 0.4823, "step": 14393 }, { "epoch": 8.041340782122905, "grad_norm": 0.8933522701263428, "learning_rate": 0.0005998879551820729, "loss": 0.5271, "step": 14394 }, { "epoch": 8.041899441340782, "grad_norm": 0.5437177419662476, "learning_rate": 0.000599859943977591, "loss": 0.6006, "step": 14395 }, { "epoch": 8.04245810055866, "grad_norm": 1.7194788455963135, "learning_rate": 0.0005998319327731092, "loss": 0.5133, "step": 14396 }, { "epoch": 8.043016759776537, "grad_norm": 2.9548416137695312, "learning_rate": 0.0005998039215686275, "loss": 0.4619, "step": 14397 }, { "epoch": 8.043575418994413, "grad_norm": 1.250157356262207, "learning_rate": 0.0005997759103641457, "loss": 0.4708, "step": 14398 }, { "epoch": 8.04413407821229, "grad_norm": 0.5268141627311707, "learning_rate": 0.0005997478991596639, "loss": 0.4167, "step": 14399 }, { "epoch": 8.044692737430168, "grad_norm": 0.40548741817474365, "learning_rate": 0.0005997198879551821, "loss": 0.4076, "step": 14400 }, { "epoch": 8.045251396648045, "grad_norm": 2.5386669635772705, "learning_rate": 0.0005996918767507002, "loss": 0.3719, "step": 14401 }, { "epoch": 8.045810055865921, "grad_norm": 0.5602366924285889, "learning_rate": 0.0005996638655462185, "loss": 0.4048, "step": 14402 }, { "epoch": 8.0463687150838, "grad_norm": 0.3502439558506012, "learning_rate": 0.0005996358543417367, "loss": 0.3355, "step": 14403 }, { "epoch": 8.046927374301676, "grad_norm": 0.5376352667808533, "learning_rate": 0.0005996078431372549, "loss": 0.4692, "step": 14404 }, { "epoch": 8.047486033519553, "grad_norm": 1.0940701961517334, "learning_rate": 0.0005995798319327731, "loss": 0.3624, "step": 14405 }, { "epoch": 8.048044692737431, "grad_norm": 0.4787084758281708, "learning_rate": 0.0005995518207282912, "loss": 0.4247, "step": 14406 }, { "epoch": 8.048603351955308, "grad_norm": 1.0625576972961426, "learning_rate": 0.0005995238095238096, "loss": 0.4153, "step": 14407 }, { "epoch": 8.049162011173184, "grad_norm": 1.6665457487106323, "learning_rate": 0.0005994957983193278, "loss": 0.3258, "step": 14408 }, { "epoch": 8.04972067039106, "grad_norm": 0.48011183738708496, "learning_rate": 0.000599467787114846, "loss": 0.459, "step": 14409 }, { "epoch": 8.050279329608939, "grad_norm": 0.5652352571487427, "learning_rate": 0.0005994397759103642, "loss": 0.3983, "step": 14410 }, { "epoch": 8.050837988826816, "grad_norm": 0.6023281812667847, "learning_rate": 0.0005994117647058823, "loss": 0.424, "step": 14411 }, { "epoch": 8.051396648044692, "grad_norm": 0.5585233569145203, "learning_rate": 0.0005993837535014006, "loss": 0.4479, "step": 14412 }, { "epoch": 8.05195530726257, "grad_norm": 0.45445168018341064, "learning_rate": 0.0005993557422969188, "loss": 0.4882, "step": 14413 }, { "epoch": 8.052513966480447, "grad_norm": 3.381598711013794, "learning_rate": 0.000599327731092437, "loss": 0.4316, "step": 14414 }, { "epoch": 8.053072625698324, "grad_norm": 0.4645046591758728, "learning_rate": 0.0005992997198879552, "loss": 0.3061, "step": 14415 }, { "epoch": 8.053631284916202, "grad_norm": 1.2431318759918213, "learning_rate": 0.0005992717086834734, "loss": 0.3166, "step": 14416 }, { "epoch": 8.054189944134079, "grad_norm": 0.653498113155365, "learning_rate": 0.0005992436974789916, "loss": 0.5003, "step": 14417 }, { "epoch": 8.054748603351955, "grad_norm": 0.644396185874939, "learning_rate": 0.0005992156862745098, "loss": 0.4213, "step": 14418 }, { "epoch": 8.055307262569832, "grad_norm": 0.40860694646835327, "learning_rate": 0.000599187675070028, "loss": 0.4328, "step": 14419 }, { "epoch": 8.05586592178771, "grad_norm": 0.5199441313743591, "learning_rate": 0.0005991596638655462, "loss": 0.4717, "step": 14420 }, { "epoch": 8.056424581005587, "grad_norm": 0.5356389284133911, "learning_rate": 0.0005991316526610644, "loss": 0.446, "step": 14421 }, { "epoch": 8.056983240223463, "grad_norm": 2.948608875274658, "learning_rate": 0.0005991036414565826, "loss": 0.4904, "step": 14422 }, { "epoch": 8.057541899441341, "grad_norm": 0.40729671716690063, "learning_rate": 0.0005990756302521009, "loss": 0.4429, "step": 14423 }, { "epoch": 8.058100558659218, "grad_norm": 0.49135828018188477, "learning_rate": 0.0005990476190476191, "loss": 0.4937, "step": 14424 }, { "epoch": 8.058659217877095, "grad_norm": 0.6867174506187439, "learning_rate": 0.0005990196078431373, "loss": 0.4428, "step": 14425 }, { "epoch": 8.059217877094973, "grad_norm": 1.6701709032058716, "learning_rate": 0.0005989915966386555, "loss": 0.6129, "step": 14426 }, { "epoch": 8.05977653631285, "grad_norm": 0.5143259763717651, "learning_rate": 0.0005989635854341737, "loss": 0.4244, "step": 14427 }, { "epoch": 8.060335195530726, "grad_norm": 0.8569937944412231, "learning_rate": 0.0005989355742296919, "loss": 0.5474, "step": 14428 }, { "epoch": 8.060893854748603, "grad_norm": 0.7850049138069153, "learning_rate": 0.0005989075630252101, "loss": 0.5752, "step": 14429 }, { "epoch": 8.061452513966481, "grad_norm": 1.0247721672058105, "learning_rate": 0.0005988795518207283, "loss": 0.42, "step": 14430 }, { "epoch": 8.062011173184358, "grad_norm": 0.75715172290802, "learning_rate": 0.0005988515406162465, "loss": 0.4341, "step": 14431 }, { "epoch": 8.062569832402234, "grad_norm": 0.5107081532478333, "learning_rate": 0.0005988235294117648, "loss": 0.4448, "step": 14432 }, { "epoch": 8.063128491620112, "grad_norm": 0.5078255534172058, "learning_rate": 0.0005987955182072829, "loss": 0.5197, "step": 14433 }, { "epoch": 8.063687150837989, "grad_norm": 0.668635904788971, "learning_rate": 0.0005987675070028011, "loss": 0.4056, "step": 14434 }, { "epoch": 8.064245810055866, "grad_norm": 0.6517415046691895, "learning_rate": 0.0005987394957983193, "loss": 0.3851, "step": 14435 }, { "epoch": 8.064804469273742, "grad_norm": 0.9981436133384705, "learning_rate": 0.0005987114845938375, "loss": 0.4224, "step": 14436 }, { "epoch": 8.06536312849162, "grad_norm": 0.4603477716445923, "learning_rate": 0.0005986834733893558, "loss": 0.4494, "step": 14437 }, { "epoch": 8.065921787709497, "grad_norm": 0.5559819936752319, "learning_rate": 0.0005986554621848739, "loss": 0.5596, "step": 14438 }, { "epoch": 8.066480446927374, "grad_norm": 0.4180082678794861, "learning_rate": 0.0005986274509803921, "loss": 0.4175, "step": 14439 }, { "epoch": 8.067039106145252, "grad_norm": 0.8038545250892639, "learning_rate": 0.0005985994397759104, "loss": 0.63, "step": 14440 }, { "epoch": 8.067597765363129, "grad_norm": 0.6108267903327942, "learning_rate": 0.0005985714285714286, "loss": 0.5032, "step": 14441 }, { "epoch": 8.068156424581005, "grad_norm": 0.49462592601776123, "learning_rate": 0.0005985434173669469, "loss": 0.4102, "step": 14442 }, { "epoch": 8.068715083798883, "grad_norm": 2.032073497772217, "learning_rate": 0.000598515406162465, "loss": 0.4789, "step": 14443 }, { "epoch": 8.06927374301676, "grad_norm": 0.48974502086639404, "learning_rate": 0.0005984873949579832, "loss": 0.5186, "step": 14444 }, { "epoch": 8.069832402234637, "grad_norm": 0.8576788902282715, "learning_rate": 0.0005984593837535014, "loss": 0.5996, "step": 14445 }, { "epoch": 8.070391061452513, "grad_norm": 12.471939086914062, "learning_rate": 0.0005984313725490196, "loss": 0.3605, "step": 14446 }, { "epoch": 8.070949720670392, "grad_norm": 0.5204570293426514, "learning_rate": 0.0005984033613445379, "loss": 0.3861, "step": 14447 }, { "epoch": 8.071508379888268, "grad_norm": 0.3632824122905731, "learning_rate": 0.0005983753501400561, "loss": 0.4516, "step": 14448 }, { "epoch": 8.072067039106145, "grad_norm": 0.6021848917007446, "learning_rate": 0.0005983473389355742, "loss": 0.383, "step": 14449 }, { "epoch": 8.072625698324023, "grad_norm": 0.9489167928695679, "learning_rate": 0.0005983193277310924, "loss": 0.4295, "step": 14450 }, { "epoch": 8.0731843575419, "grad_norm": 0.5070911645889282, "learning_rate": 0.0005982913165266106, "loss": 0.444, "step": 14451 }, { "epoch": 8.073743016759776, "grad_norm": 2.872343063354492, "learning_rate": 0.0005982633053221289, "loss": 0.3669, "step": 14452 }, { "epoch": 8.074301675977654, "grad_norm": 0.9399489164352417, "learning_rate": 0.0005982352941176471, "loss": 0.3951, "step": 14453 }, { "epoch": 8.074860335195531, "grad_norm": 1.8839850425720215, "learning_rate": 0.0005982072829131652, "loss": 0.4762, "step": 14454 }, { "epoch": 8.075418994413408, "grad_norm": 0.6631466746330261, "learning_rate": 0.0005981792717086834, "loss": 0.4784, "step": 14455 }, { "epoch": 8.075977653631284, "grad_norm": 0.7401856184005737, "learning_rate": 0.0005981512605042017, "loss": 0.4392, "step": 14456 }, { "epoch": 8.076536312849163, "grad_norm": 0.65260249376297, "learning_rate": 0.00059812324929972, "loss": 0.3996, "step": 14457 }, { "epoch": 8.077094972067039, "grad_norm": 0.6222133636474609, "learning_rate": 0.0005980952380952382, "loss": 0.3404, "step": 14458 }, { "epoch": 8.077653631284916, "grad_norm": 0.9325791597366333, "learning_rate": 0.0005980672268907563, "loss": 0.3248, "step": 14459 }, { "epoch": 8.078212290502794, "grad_norm": 3.357792854309082, "learning_rate": 0.0005980392156862745, "loss": 0.3724, "step": 14460 }, { "epoch": 8.07877094972067, "grad_norm": 0.4824487268924713, "learning_rate": 0.0005980112044817927, "loss": 0.4733, "step": 14461 }, { "epoch": 8.079329608938547, "grad_norm": 0.7129288911819458, "learning_rate": 0.000597983193277311, "loss": 0.5004, "step": 14462 }, { "epoch": 8.079888268156424, "grad_norm": 0.4432324767112732, "learning_rate": 0.0005979551820728292, "loss": 0.4279, "step": 14463 }, { "epoch": 8.080446927374302, "grad_norm": 0.4276464879512787, "learning_rate": 0.0005979271708683474, "loss": 0.352, "step": 14464 }, { "epoch": 8.081005586592179, "grad_norm": 0.5981405973434448, "learning_rate": 0.0005978991596638655, "loss": 0.4644, "step": 14465 }, { "epoch": 8.081564245810055, "grad_norm": 0.4256029427051544, "learning_rate": 0.0005978711484593837, "loss": 0.3899, "step": 14466 }, { "epoch": 8.082122905027934, "grad_norm": 0.6515499353408813, "learning_rate": 0.000597843137254902, "loss": 0.4576, "step": 14467 }, { "epoch": 8.08268156424581, "grad_norm": 0.500608503818512, "learning_rate": 0.0005978151260504202, "loss": 0.6457, "step": 14468 }, { "epoch": 8.083240223463687, "grad_norm": 0.535923182964325, "learning_rate": 0.0005977871148459384, "loss": 0.4132, "step": 14469 }, { "epoch": 8.083798882681565, "grad_norm": 0.5433781147003174, "learning_rate": 0.0005977591036414565, "loss": 0.5366, "step": 14470 }, { "epoch": 8.084357541899442, "grad_norm": 0.8429211974143982, "learning_rate": 0.0005977310924369747, "loss": 0.542, "step": 14471 }, { "epoch": 8.084916201117318, "grad_norm": 0.45777586102485657, "learning_rate": 0.0005977030812324931, "loss": 0.4971, "step": 14472 }, { "epoch": 8.085474860335195, "grad_norm": 0.5182831287384033, "learning_rate": 0.0005976750700280113, "loss": 0.4184, "step": 14473 }, { "epoch": 8.086033519553073, "grad_norm": 1.22562837600708, "learning_rate": 0.0005976470588235295, "loss": 0.5364, "step": 14474 }, { "epoch": 8.08659217877095, "grad_norm": 0.9164565205574036, "learning_rate": 0.0005976190476190476, "loss": 0.3132, "step": 14475 }, { "epoch": 8.087150837988826, "grad_norm": 0.4706647992134094, "learning_rate": 0.0005975910364145658, "loss": 0.3871, "step": 14476 }, { "epoch": 8.087709497206705, "grad_norm": 0.45112869143486023, "learning_rate": 0.0005975630252100841, "loss": 0.3858, "step": 14477 }, { "epoch": 8.088268156424581, "grad_norm": 0.5883400440216064, "learning_rate": 0.0005975350140056023, "loss": 0.3692, "step": 14478 }, { "epoch": 8.088826815642458, "grad_norm": 0.43818268179893494, "learning_rate": 0.0005975070028011205, "loss": 0.3544, "step": 14479 }, { "epoch": 8.089385474860336, "grad_norm": 0.43844330310821533, "learning_rate": 0.0005974789915966387, "loss": 0.4451, "step": 14480 }, { "epoch": 8.089944134078213, "grad_norm": 0.379326194524765, "learning_rate": 0.0005974509803921568, "loss": 0.3861, "step": 14481 }, { "epoch": 8.09050279329609, "grad_norm": 1.012355089187622, "learning_rate": 0.0005974229691876751, "loss": 0.6071, "step": 14482 }, { "epoch": 8.091061452513966, "grad_norm": 1.403200387954712, "learning_rate": 0.0005973949579831933, "loss": 0.4221, "step": 14483 }, { "epoch": 8.091620111731844, "grad_norm": 0.5278259515762329, "learning_rate": 0.0005973669467787115, "loss": 0.3276, "step": 14484 }, { "epoch": 8.09217877094972, "grad_norm": 0.47505414485931396, "learning_rate": 0.0005973389355742297, "loss": 0.452, "step": 14485 }, { "epoch": 8.092737430167597, "grad_norm": 0.5616818070411682, "learning_rate": 0.0005973109243697478, "loss": 0.3871, "step": 14486 }, { "epoch": 8.093296089385476, "grad_norm": 0.5413137078285217, "learning_rate": 0.0005972829131652661, "loss": 0.4361, "step": 14487 }, { "epoch": 8.093854748603352, "grad_norm": 0.5107696652412415, "learning_rate": 0.0005972549019607844, "loss": 0.4843, "step": 14488 }, { "epoch": 8.094413407821229, "grad_norm": 0.7350447773933411, "learning_rate": 0.0005972268907563026, "loss": 0.5338, "step": 14489 }, { "epoch": 8.094972067039107, "grad_norm": 0.7885605692863464, "learning_rate": 0.0005971988795518208, "loss": 0.4659, "step": 14490 }, { "epoch": 8.095530726256984, "grad_norm": 0.49230703711509705, "learning_rate": 0.0005971708683473389, "loss": 0.3995, "step": 14491 }, { "epoch": 8.09608938547486, "grad_norm": 13.724759101867676, "learning_rate": 0.0005971428571428572, "loss": 0.3574, "step": 14492 }, { "epoch": 8.096648044692737, "grad_norm": 0.6826164126396179, "learning_rate": 0.0005971148459383754, "loss": 0.4435, "step": 14493 }, { "epoch": 8.097206703910615, "grad_norm": 0.8319756388664246, "learning_rate": 0.0005970868347338936, "loss": 0.3703, "step": 14494 }, { "epoch": 8.097765363128492, "grad_norm": 0.5146135091781616, "learning_rate": 0.0005970588235294118, "loss": 0.2916, "step": 14495 }, { "epoch": 8.098324022346368, "grad_norm": 0.5947471261024475, "learning_rate": 0.00059703081232493, "loss": 0.5095, "step": 14496 }, { "epoch": 8.098882681564247, "grad_norm": 0.5056770443916321, "learning_rate": 0.0005970028011204482, "loss": 0.3667, "step": 14497 }, { "epoch": 8.099441340782123, "grad_norm": 0.39849406480789185, "learning_rate": 0.0005969747899159664, "loss": 0.393, "step": 14498 }, { "epoch": 8.1, "grad_norm": 0.4292544424533844, "learning_rate": 0.0005969467787114846, "loss": 0.4284, "step": 14499 }, { "epoch": 8.100558659217878, "grad_norm": 0.4043031334877014, "learning_rate": 0.0005969187675070028, "loss": 0.4377, "step": 14500 }, { "epoch": 8.100558659217878, "eval_cer": 0.09223375121381733, "eval_loss": 0.34655728936195374, "eval_runtime": 55.558, "eval_samples_per_second": 81.68, "eval_steps_per_second": 5.112, "eval_wer": 0.37174939247507055, "step": 14500 }, { "epoch": 8.101117318435755, "grad_norm": 0.9145787358283997, "learning_rate": 0.000596890756302521, "loss": 0.5343, "step": 14501 }, { "epoch": 8.101675977653631, "grad_norm": 1.1518689393997192, "learning_rate": 0.0005968627450980391, "loss": 0.4324, "step": 14502 }, { "epoch": 8.102234636871508, "grad_norm": 0.5576058626174927, "learning_rate": 0.0005968347338935574, "loss": 0.4712, "step": 14503 }, { "epoch": 8.102793296089386, "grad_norm": 1.422631025314331, "learning_rate": 0.0005968067226890756, "loss": 0.3519, "step": 14504 }, { "epoch": 8.103351955307263, "grad_norm": 0.7505115270614624, "learning_rate": 0.0005967787114845939, "loss": 0.5005, "step": 14505 }, { "epoch": 8.10391061452514, "grad_norm": 1.0042904615402222, "learning_rate": 0.0005967507002801121, "loss": 0.3961, "step": 14506 }, { "epoch": 8.104469273743018, "grad_norm": 0.595643937587738, "learning_rate": 0.0005967226890756302, "loss": 0.6221, "step": 14507 }, { "epoch": 8.105027932960894, "grad_norm": 1.31422758102417, "learning_rate": 0.0005966946778711485, "loss": 0.4962, "step": 14508 }, { "epoch": 8.10558659217877, "grad_norm": 0.8656612634658813, "learning_rate": 0.0005966666666666667, "loss": 0.4151, "step": 14509 }, { "epoch": 8.106145251396647, "grad_norm": 0.857059895992279, "learning_rate": 0.0005966386554621849, "loss": 0.5488, "step": 14510 }, { "epoch": 8.106703910614526, "grad_norm": 0.5608837008476257, "learning_rate": 0.0005966106442577031, "loss": 0.4081, "step": 14511 }, { "epoch": 8.107262569832402, "grad_norm": 0.5355949401855469, "learning_rate": 0.0005965826330532213, "loss": 0.4165, "step": 14512 }, { "epoch": 8.107821229050279, "grad_norm": 0.7131775617599487, "learning_rate": 0.0005965546218487395, "loss": 0.3979, "step": 14513 }, { "epoch": 8.108379888268157, "grad_norm": 0.6019068956375122, "learning_rate": 0.0005965266106442577, "loss": 0.4851, "step": 14514 }, { "epoch": 8.108938547486034, "grad_norm": 0.9674901962280273, "learning_rate": 0.0005964985994397759, "loss": 0.4301, "step": 14515 }, { "epoch": 8.10949720670391, "grad_norm": 0.5199589133262634, "learning_rate": 0.0005964705882352941, "loss": 0.2968, "step": 14516 }, { "epoch": 8.110055865921789, "grad_norm": 0.44233837723731995, "learning_rate": 0.0005964425770308123, "loss": 0.4034, "step": 14517 }, { "epoch": 8.110614525139665, "grad_norm": 0.5038004517555237, "learning_rate": 0.0005964145658263305, "loss": 0.535, "step": 14518 }, { "epoch": 8.111173184357542, "grad_norm": 0.4609309136867523, "learning_rate": 0.0005963865546218487, "loss": 0.4152, "step": 14519 }, { "epoch": 8.111731843575418, "grad_norm": 0.4320252239704132, "learning_rate": 0.0005963585434173669, "loss": 0.3917, "step": 14520 }, { "epoch": 8.112290502793297, "grad_norm": 0.4233516454696655, "learning_rate": 0.0005963305322128851, "loss": 0.3793, "step": 14521 }, { "epoch": 8.112849162011173, "grad_norm": 0.5657986402511597, "learning_rate": 0.0005963025210084034, "loss": 0.5482, "step": 14522 }, { "epoch": 8.11340782122905, "grad_norm": 0.7307112812995911, "learning_rate": 0.0005962745098039217, "loss": 0.4447, "step": 14523 }, { "epoch": 8.113966480446928, "grad_norm": 0.5015503764152527, "learning_rate": 0.0005962464985994398, "loss": 0.4389, "step": 14524 }, { "epoch": 8.114525139664805, "grad_norm": 0.5661396384239197, "learning_rate": 0.000596218487394958, "loss": 0.4389, "step": 14525 }, { "epoch": 8.115083798882681, "grad_norm": 0.5356577634811401, "learning_rate": 0.0005961904761904762, "loss": 0.4356, "step": 14526 }, { "epoch": 8.11564245810056, "grad_norm": 0.5272166132926941, "learning_rate": 0.0005961624649859944, "loss": 0.3509, "step": 14527 }, { "epoch": 8.116201117318436, "grad_norm": 0.6902635097503662, "learning_rate": 0.0005961344537815127, "loss": 0.3811, "step": 14528 }, { "epoch": 8.116759776536313, "grad_norm": 0.4835710823535919, "learning_rate": 0.0005961064425770308, "loss": 0.5222, "step": 14529 }, { "epoch": 8.11731843575419, "grad_norm": 0.6754847168922424, "learning_rate": 0.000596078431372549, "loss": 0.4314, "step": 14530 }, { "epoch": 8.117877094972068, "grad_norm": 0.7182206511497498, "learning_rate": 0.0005960504201680672, "loss": 0.6087, "step": 14531 }, { "epoch": 8.118435754189944, "grad_norm": 1.2911028861999512, "learning_rate": 0.0005960224089635854, "loss": 0.505, "step": 14532 }, { "epoch": 8.11899441340782, "grad_norm": 0.4551449716091156, "learning_rate": 0.0005959943977591037, "loss": 0.3461, "step": 14533 }, { "epoch": 8.119553072625699, "grad_norm": 0.3183300495147705, "learning_rate": 0.0005959663865546218, "loss": 0.376, "step": 14534 }, { "epoch": 8.120111731843576, "grad_norm": 0.6173131465911865, "learning_rate": 0.00059593837535014, "loss": 0.5936, "step": 14535 }, { "epoch": 8.120670391061452, "grad_norm": 1.7582385540008545, "learning_rate": 0.0005959103641456582, "loss": 0.3967, "step": 14536 }, { "epoch": 8.121229050279329, "grad_norm": 0.6299956440925598, "learning_rate": 0.0005958823529411764, "loss": 0.4445, "step": 14537 }, { "epoch": 8.121787709497207, "grad_norm": 0.4965883195400238, "learning_rate": 0.0005958543417366948, "loss": 0.6776, "step": 14538 }, { "epoch": 8.122346368715084, "grad_norm": 0.46675586700439453, "learning_rate": 0.000595826330532213, "loss": 0.341, "step": 14539 }, { "epoch": 8.12290502793296, "grad_norm": 2.13820743560791, "learning_rate": 0.0005957983193277311, "loss": 0.4011, "step": 14540 }, { "epoch": 8.123463687150839, "grad_norm": 0.6492177844047546, "learning_rate": 0.0005957703081232493, "loss": 0.4528, "step": 14541 }, { "epoch": 8.124022346368715, "grad_norm": 0.4511052966117859, "learning_rate": 0.0005957422969187675, "loss": 0.3951, "step": 14542 }, { "epoch": 8.124581005586592, "grad_norm": 0.5533331036567688, "learning_rate": 0.0005957142857142858, "loss": 0.3492, "step": 14543 }, { "epoch": 8.12513966480447, "grad_norm": 0.5376847982406616, "learning_rate": 0.000595686274509804, "loss": 0.3695, "step": 14544 }, { "epoch": 8.125698324022347, "grad_norm": 0.4759940505027771, "learning_rate": 0.0005956582633053221, "loss": 0.3654, "step": 14545 }, { "epoch": 8.126256983240223, "grad_norm": 0.8489264249801636, "learning_rate": 0.0005956302521008403, "loss": 0.4596, "step": 14546 }, { "epoch": 8.1268156424581, "grad_norm": 0.4568347632884979, "learning_rate": 0.0005956022408963585, "loss": 0.397, "step": 14547 }, { "epoch": 8.127374301675978, "grad_norm": 0.37949851155281067, "learning_rate": 0.0005955742296918768, "loss": 0.4906, "step": 14548 }, { "epoch": 8.127932960893855, "grad_norm": 0.478251576423645, "learning_rate": 0.000595546218487395, "loss": 0.512, "step": 14549 }, { "epoch": 8.128491620111731, "grad_norm": 11.636027336120605, "learning_rate": 0.0005955182072829131, "loss": 0.3584, "step": 14550 }, { "epoch": 8.12905027932961, "grad_norm": 0.5634673833847046, "learning_rate": 0.0005954901960784313, "loss": 0.5212, "step": 14551 }, { "epoch": 8.129608938547486, "grad_norm": 0.42000940442085266, "learning_rate": 0.0005954621848739495, "loss": 0.3316, "step": 14552 }, { "epoch": 8.130167597765363, "grad_norm": 0.5818812847137451, "learning_rate": 0.0005954341736694678, "loss": 0.4819, "step": 14553 }, { "epoch": 8.130726256983241, "grad_norm": 2.9924280643463135, "learning_rate": 0.0005954061624649861, "loss": 0.4644, "step": 14554 }, { "epoch": 8.131284916201118, "grad_norm": 0.39869850873947144, "learning_rate": 0.0005953781512605043, "loss": 0.3366, "step": 14555 }, { "epoch": 8.131843575418994, "grad_norm": 0.42670831084251404, "learning_rate": 0.0005953501400560224, "loss": 0.4306, "step": 14556 }, { "epoch": 8.13240223463687, "grad_norm": 5.134698867797852, "learning_rate": 0.0005953221288515406, "loss": 0.3672, "step": 14557 }, { "epoch": 8.132960893854749, "grad_norm": 0.529649019241333, "learning_rate": 0.0005952941176470589, "loss": 0.5841, "step": 14558 }, { "epoch": 8.133519553072626, "grad_norm": 0.4746466279029846, "learning_rate": 0.0005952661064425771, "loss": 0.4608, "step": 14559 }, { "epoch": 8.134078212290502, "grad_norm": 0.6194973587989807, "learning_rate": 0.0005952380952380953, "loss": 0.5004, "step": 14560 }, { "epoch": 8.13463687150838, "grad_norm": 0.5827535390853882, "learning_rate": 0.0005952100840336134, "loss": 0.4828, "step": 14561 }, { "epoch": 8.135195530726257, "grad_norm": 0.8640741109848022, "learning_rate": 0.0005951820728291316, "loss": 0.3779, "step": 14562 }, { "epoch": 8.135754189944134, "grad_norm": 0.3662491738796234, "learning_rate": 0.0005951540616246499, "loss": 0.437, "step": 14563 }, { "epoch": 8.136312849162012, "grad_norm": 0.5627434253692627, "learning_rate": 0.0005951260504201681, "loss": 0.4627, "step": 14564 }, { "epoch": 8.136871508379889, "grad_norm": 4.679205894470215, "learning_rate": 0.0005950980392156863, "loss": 0.4268, "step": 14565 }, { "epoch": 8.137430167597765, "grad_norm": 0.437671035528183, "learning_rate": 0.0005950700280112044, "loss": 0.3921, "step": 14566 }, { "epoch": 8.137988826815642, "grad_norm": 0.4792025685310364, "learning_rate": 0.0005950420168067226, "loss": 0.3736, "step": 14567 }, { "epoch": 8.13854748603352, "grad_norm": 0.7871202826499939, "learning_rate": 0.0005950140056022409, "loss": 0.4548, "step": 14568 }, { "epoch": 8.139106145251397, "grad_norm": 0.5289686918258667, "learning_rate": 0.0005949859943977591, "loss": 0.4754, "step": 14569 }, { "epoch": 8.139664804469273, "grad_norm": 0.7209805250167847, "learning_rate": 0.0005949579831932774, "loss": 0.4984, "step": 14570 }, { "epoch": 8.140223463687152, "grad_norm": 0.5015156269073486, "learning_rate": 0.0005949299719887956, "loss": 0.385, "step": 14571 }, { "epoch": 8.140782122905028, "grad_norm": 0.5621908903121948, "learning_rate": 0.0005949019607843137, "loss": 0.4219, "step": 14572 }, { "epoch": 8.141340782122905, "grad_norm": 0.5540359020233154, "learning_rate": 0.000594873949579832, "loss": 0.5039, "step": 14573 }, { "epoch": 8.141899441340781, "grad_norm": 0.9592664241790771, "learning_rate": 0.0005948459383753502, "loss": 0.4666, "step": 14574 }, { "epoch": 8.14245810055866, "grad_norm": 0.4834812581539154, "learning_rate": 0.0005948179271708684, "loss": 0.3982, "step": 14575 }, { "epoch": 8.143016759776536, "grad_norm": 0.596591055393219, "learning_rate": 0.0005947899159663866, "loss": 0.3617, "step": 14576 }, { "epoch": 8.143575418994413, "grad_norm": 0.5861586332321167, "learning_rate": 0.0005947619047619047, "loss": 0.5164, "step": 14577 }, { "epoch": 8.144134078212291, "grad_norm": 1.3980904817581177, "learning_rate": 0.000594733893557423, "loss": 0.4516, "step": 14578 }, { "epoch": 8.144692737430168, "grad_norm": 0.5210471749305725, "learning_rate": 0.0005947058823529412, "loss": 0.4487, "step": 14579 }, { "epoch": 8.145251396648044, "grad_norm": 0.5994924306869507, "learning_rate": 0.0005946778711484594, "loss": 0.4097, "step": 14580 }, { "epoch": 8.145810055865923, "grad_norm": 0.6276729702949524, "learning_rate": 0.0005946498599439776, "loss": 0.5411, "step": 14581 }, { "epoch": 8.1463687150838, "grad_norm": 0.8257356882095337, "learning_rate": 0.0005946218487394957, "loss": 0.4328, "step": 14582 }, { "epoch": 8.146927374301676, "grad_norm": 0.470299631357193, "learning_rate": 0.000594593837535014, "loss": 0.4495, "step": 14583 }, { "epoch": 8.147486033519552, "grad_norm": 0.47502925992012024, "learning_rate": 0.0005945658263305322, "loss": 0.4185, "step": 14584 }, { "epoch": 8.14804469273743, "grad_norm": 0.3554491698741913, "learning_rate": 0.0005945378151260504, "loss": 0.3505, "step": 14585 }, { "epoch": 8.148603351955307, "grad_norm": 0.5056933760643005, "learning_rate": 0.0005945098039215686, "loss": 0.4115, "step": 14586 }, { "epoch": 8.149162011173184, "grad_norm": 2.021817922592163, "learning_rate": 0.0005944817927170869, "loss": 0.4242, "step": 14587 }, { "epoch": 8.149720670391062, "grad_norm": 0.76506507396698, "learning_rate": 0.0005944537815126051, "loss": 0.4783, "step": 14588 }, { "epoch": 8.150279329608939, "grad_norm": 0.3604593575000763, "learning_rate": 0.0005944257703081233, "loss": 0.3483, "step": 14589 }, { "epoch": 8.150837988826815, "grad_norm": 0.5649810433387756, "learning_rate": 0.0005943977591036415, "loss": 0.4487, "step": 14590 }, { "epoch": 8.151396648044694, "grad_norm": 0.6508882641792297, "learning_rate": 0.0005943697478991597, "loss": 0.4834, "step": 14591 }, { "epoch": 8.15195530726257, "grad_norm": 0.46237534284591675, "learning_rate": 0.0005943417366946779, "loss": 0.4237, "step": 14592 }, { "epoch": 8.152513966480447, "grad_norm": 0.7530639171600342, "learning_rate": 0.0005943137254901961, "loss": 0.3495, "step": 14593 }, { "epoch": 8.153072625698323, "grad_norm": 0.5456220507621765, "learning_rate": 0.0005942857142857143, "loss": 0.4674, "step": 14594 }, { "epoch": 8.153631284916202, "grad_norm": 0.57440584897995, "learning_rate": 0.0005942577030812325, "loss": 0.4725, "step": 14595 }, { "epoch": 8.154189944134078, "grad_norm": 0.4562806487083435, "learning_rate": 0.0005942296918767507, "loss": 0.3635, "step": 14596 }, { "epoch": 8.154748603351955, "grad_norm": 0.41335538029670715, "learning_rate": 0.0005942016806722689, "loss": 0.3356, "step": 14597 }, { "epoch": 8.155307262569833, "grad_norm": 0.6414315104484558, "learning_rate": 0.0005941736694677871, "loss": 0.503, "step": 14598 }, { "epoch": 8.15586592178771, "grad_norm": 0.917523205280304, "learning_rate": 0.0005941456582633053, "loss": 0.4008, "step": 14599 }, { "epoch": 8.156424581005586, "grad_norm": 0.6649678349494934, "learning_rate": 0.0005941176470588235, "loss": 0.5333, "step": 14600 }, { "epoch": 8.156983240223465, "grad_norm": 0.8768752813339233, "learning_rate": 0.0005940896358543417, "loss": 0.4267, "step": 14601 }, { "epoch": 8.157541899441341, "grad_norm": 0.40516921877861023, "learning_rate": 0.0005940616246498599, "loss": 0.4035, "step": 14602 }, { "epoch": 8.158100558659218, "grad_norm": 0.4275057017803192, "learning_rate": 0.0005940336134453783, "loss": 0.4139, "step": 14603 }, { "epoch": 8.158659217877094, "grad_norm": 0.6677563786506653, "learning_rate": 0.0005940056022408964, "loss": 0.5749, "step": 14604 }, { "epoch": 8.159217877094973, "grad_norm": 0.48846563696861267, "learning_rate": 0.0005939775910364146, "loss": 0.4933, "step": 14605 }, { "epoch": 8.15977653631285, "grad_norm": 0.47140753269195557, "learning_rate": 0.0005939495798319328, "loss": 0.416, "step": 14606 }, { "epoch": 8.160335195530726, "grad_norm": 0.3789716958999634, "learning_rate": 0.000593921568627451, "loss": 0.4539, "step": 14607 }, { "epoch": 8.160893854748604, "grad_norm": 0.35282275080680847, "learning_rate": 0.0005938935574229693, "loss": 0.4277, "step": 14608 }, { "epoch": 8.16145251396648, "grad_norm": 0.4037696123123169, "learning_rate": 0.0005938655462184874, "loss": 0.4295, "step": 14609 }, { "epoch": 8.162011173184357, "grad_norm": 2.7429211139678955, "learning_rate": 0.0005938375350140056, "loss": 0.3659, "step": 14610 }, { "epoch": 8.162569832402234, "grad_norm": 0.46585479378700256, "learning_rate": 0.0005938095238095238, "loss": 0.4222, "step": 14611 }, { "epoch": 8.163128491620112, "grad_norm": 0.4178381562232971, "learning_rate": 0.000593781512605042, "loss": 0.4079, "step": 14612 }, { "epoch": 8.163687150837989, "grad_norm": 0.6297781467437744, "learning_rate": 0.0005937535014005603, "loss": 0.3685, "step": 14613 }, { "epoch": 8.164245810055865, "grad_norm": 0.45283186435699463, "learning_rate": 0.0005937254901960784, "loss": 0.3259, "step": 14614 }, { "epoch": 8.164804469273744, "grad_norm": 1.1479166746139526, "learning_rate": 0.0005936974789915966, "loss": 0.3486, "step": 14615 }, { "epoch": 8.16536312849162, "grad_norm": 1.191484808921814, "learning_rate": 0.0005936694677871148, "loss": 0.3828, "step": 14616 }, { "epoch": 8.165921787709497, "grad_norm": 0.763431191444397, "learning_rate": 0.000593641456582633, "loss": 0.5039, "step": 14617 }, { "epoch": 8.166480446927375, "grad_norm": 0.6036748290061951, "learning_rate": 0.0005936134453781513, "loss": 0.4643, "step": 14618 }, { "epoch": 8.167039106145252, "grad_norm": 1.3252840042114258, "learning_rate": 0.0005935854341736696, "loss": 0.6149, "step": 14619 }, { "epoch": 8.167597765363128, "grad_norm": 0.4684814214706421, "learning_rate": 0.0005935574229691877, "loss": 0.3629, "step": 14620 }, { "epoch": 8.168156424581005, "grad_norm": 0.593262255191803, "learning_rate": 0.0005935294117647059, "loss": 0.3708, "step": 14621 }, { "epoch": 8.168715083798883, "grad_norm": 1.5334376096725464, "learning_rate": 0.0005935014005602241, "loss": 0.4969, "step": 14622 }, { "epoch": 8.16927374301676, "grad_norm": 0.44006046652793884, "learning_rate": 0.0005934733893557424, "loss": 0.4523, "step": 14623 }, { "epoch": 8.169832402234636, "grad_norm": 0.604774534702301, "learning_rate": 0.0005934453781512606, "loss": 0.4958, "step": 14624 }, { "epoch": 8.170391061452515, "grad_norm": 1.083076000213623, "learning_rate": 0.0005934173669467787, "loss": 0.3813, "step": 14625 }, { "epoch": 8.170949720670391, "grad_norm": 0.9524022936820984, "learning_rate": 0.0005933893557422969, "loss": 0.4315, "step": 14626 }, { "epoch": 8.171508379888268, "grad_norm": 1.3805259466171265, "learning_rate": 0.0005933613445378151, "loss": 0.5444, "step": 14627 }, { "epoch": 8.172067039106146, "grad_norm": 0.612502932548523, "learning_rate": 0.0005933333333333334, "loss": 0.4183, "step": 14628 }, { "epoch": 8.172625698324023, "grad_norm": 0.7701265215873718, "learning_rate": 0.0005933053221288516, "loss": 0.5105, "step": 14629 }, { "epoch": 8.1731843575419, "grad_norm": 0.47687798738479614, "learning_rate": 0.0005932773109243697, "loss": 0.4961, "step": 14630 }, { "epoch": 8.173743016759776, "grad_norm": 0.3955836594104767, "learning_rate": 0.0005932492997198879, "loss": 0.3551, "step": 14631 }, { "epoch": 8.174301675977654, "grad_norm": 0.40008121728897095, "learning_rate": 0.0005932212885154061, "loss": 0.4104, "step": 14632 }, { "epoch": 8.17486033519553, "grad_norm": 2.1390469074249268, "learning_rate": 0.0005931932773109244, "loss": 0.4783, "step": 14633 }, { "epoch": 8.175418994413407, "grad_norm": 0.8249197006225586, "learning_rate": 0.0005931652661064426, "loss": 0.5323, "step": 14634 }, { "epoch": 8.175977653631286, "grad_norm": 0.8177069425582886, "learning_rate": 0.0005931372549019608, "loss": 0.4727, "step": 14635 }, { "epoch": 8.176536312849162, "grad_norm": 0.6845154762268066, "learning_rate": 0.000593109243697479, "loss": 0.4864, "step": 14636 }, { "epoch": 8.177094972067039, "grad_norm": 0.6187852025032043, "learning_rate": 0.0005930812324929972, "loss": 0.4349, "step": 14637 }, { "epoch": 8.177653631284917, "grad_norm": 0.4934499263763428, "learning_rate": 0.0005930532212885155, "loss": 0.5327, "step": 14638 }, { "epoch": 8.178212290502794, "grad_norm": 0.8398798108100891, "learning_rate": 0.0005930252100840337, "loss": 0.5663, "step": 14639 }, { "epoch": 8.17877094972067, "grad_norm": 0.7031766772270203, "learning_rate": 0.0005929971988795519, "loss": 0.418, "step": 14640 }, { "epoch": 8.179329608938547, "grad_norm": 0.46452808380126953, "learning_rate": 0.00059296918767507, "loss": 0.3706, "step": 14641 }, { "epoch": 8.179888268156425, "grad_norm": 0.5066023468971252, "learning_rate": 0.0005929411764705882, "loss": 0.4157, "step": 14642 }, { "epoch": 8.180446927374302, "grad_norm": 1.4259673357009888, "learning_rate": 0.0005929131652661065, "loss": 0.5676, "step": 14643 }, { "epoch": 8.181005586592178, "grad_norm": 0.5272063612937927, "learning_rate": 0.0005928851540616247, "loss": 0.4865, "step": 14644 }, { "epoch": 8.181564245810057, "grad_norm": 0.7903665900230408, "learning_rate": 0.0005928571428571429, "loss": 0.5629, "step": 14645 }, { "epoch": 8.182122905027933, "grad_norm": 0.9882017374038696, "learning_rate": 0.000592829131652661, "loss": 0.4886, "step": 14646 }, { "epoch": 8.18268156424581, "grad_norm": 0.49136465787887573, "learning_rate": 0.0005928011204481792, "loss": 0.4844, "step": 14647 }, { "epoch": 8.183240223463686, "grad_norm": 0.8579321503639221, "learning_rate": 0.0005927731092436975, "loss": 0.4557, "step": 14648 }, { "epoch": 8.183798882681565, "grad_norm": 0.7831169366836548, "learning_rate": 0.0005927450980392157, "loss": 0.4687, "step": 14649 }, { "epoch": 8.184357541899441, "grad_norm": 0.3897453248500824, "learning_rate": 0.0005927170868347339, "loss": 0.3899, "step": 14650 }, { "epoch": 8.184916201117318, "grad_norm": 0.3552367687225342, "learning_rate": 0.0005926890756302521, "loss": 0.3602, "step": 14651 }, { "epoch": 8.185474860335196, "grad_norm": 0.9231951236724854, "learning_rate": 0.0005926610644257702, "loss": 0.4286, "step": 14652 }, { "epoch": 8.186033519553073, "grad_norm": 0.3408631980419159, "learning_rate": 0.0005926330532212886, "loss": 0.394, "step": 14653 }, { "epoch": 8.18659217877095, "grad_norm": 0.5777474641799927, "learning_rate": 0.0005926050420168068, "loss": 0.4021, "step": 14654 }, { "epoch": 8.187150837988828, "grad_norm": 1.9134604930877686, "learning_rate": 0.000592577030812325, "loss": 0.5811, "step": 14655 }, { "epoch": 8.187709497206704, "grad_norm": 0.519878089427948, "learning_rate": 0.0005925490196078432, "loss": 0.3929, "step": 14656 }, { "epoch": 8.18826815642458, "grad_norm": 0.46713653206825256, "learning_rate": 0.0005925210084033613, "loss": 0.482, "step": 14657 }, { "epoch": 8.188826815642457, "grad_norm": 0.5428266525268555, "learning_rate": 0.0005924929971988796, "loss": 0.4266, "step": 14658 }, { "epoch": 8.189385474860336, "grad_norm": 0.6040323376655579, "learning_rate": 0.0005924649859943978, "loss": 0.4549, "step": 14659 }, { "epoch": 8.189944134078212, "grad_norm": 0.4560234546661377, "learning_rate": 0.000592436974789916, "loss": 0.4911, "step": 14660 }, { "epoch": 8.190502793296089, "grad_norm": 0.6009203195571899, "learning_rate": 0.0005924089635854342, "loss": 0.4201, "step": 14661 }, { "epoch": 8.191061452513967, "grad_norm": 0.42681869864463806, "learning_rate": 0.0005923809523809523, "loss": 0.4044, "step": 14662 }, { "epoch": 8.191620111731844, "grad_norm": 0.6925278306007385, "learning_rate": 0.0005923529411764706, "loss": 0.5338, "step": 14663 }, { "epoch": 8.19217877094972, "grad_norm": 0.5270254015922546, "learning_rate": 0.0005923249299719888, "loss": 0.6332, "step": 14664 }, { "epoch": 8.192737430167599, "grad_norm": 0.4446677565574646, "learning_rate": 0.000592296918767507, "loss": 0.615, "step": 14665 }, { "epoch": 8.193296089385475, "grad_norm": 1.4417424201965332, "learning_rate": 0.0005922689075630252, "loss": 0.4108, "step": 14666 }, { "epoch": 8.193854748603352, "grad_norm": 0.6009916067123413, "learning_rate": 0.0005922408963585434, "loss": 0.406, "step": 14667 }, { "epoch": 8.194413407821228, "grad_norm": 0.5024697184562683, "learning_rate": 0.0005922128851540616, "loss": 0.4359, "step": 14668 }, { "epoch": 8.194972067039107, "grad_norm": 0.5052469372749329, "learning_rate": 0.0005921848739495799, "loss": 0.4521, "step": 14669 }, { "epoch": 8.195530726256983, "grad_norm": 1.6570814847946167, "learning_rate": 0.0005921568627450981, "loss": 0.7235, "step": 14670 }, { "epoch": 8.19608938547486, "grad_norm": 0.8787750005722046, "learning_rate": 0.0005921288515406163, "loss": 0.5639, "step": 14671 }, { "epoch": 8.196648044692738, "grad_norm": 0.46569761633872986, "learning_rate": 0.0005921008403361345, "loss": 0.5512, "step": 14672 }, { "epoch": 8.197206703910615, "grad_norm": 0.41483837366104126, "learning_rate": 0.0005920728291316527, "loss": 0.4842, "step": 14673 }, { "epoch": 8.197765363128491, "grad_norm": 0.5686022043228149, "learning_rate": 0.0005920448179271709, "loss": 0.5086, "step": 14674 }, { "epoch": 8.19832402234637, "grad_norm": 0.6898638010025024, "learning_rate": 0.0005920168067226891, "loss": 0.4715, "step": 14675 }, { "epoch": 8.198882681564246, "grad_norm": 0.6545287370681763, "learning_rate": 0.0005919887955182073, "loss": 0.656, "step": 14676 }, { "epoch": 8.199441340782123, "grad_norm": 0.5481773614883423, "learning_rate": 0.0005919607843137255, "loss": 0.3534, "step": 14677 }, { "epoch": 8.2, "grad_norm": 0.7593606114387512, "learning_rate": 0.0005919327731092437, "loss": 0.343, "step": 14678 }, { "epoch": 8.200558659217878, "grad_norm": 2.396329879760742, "learning_rate": 0.0005919047619047619, "loss": 0.4138, "step": 14679 }, { "epoch": 8.201117318435754, "grad_norm": 0.4819258153438568, "learning_rate": 0.0005918767507002801, "loss": 0.3967, "step": 14680 }, { "epoch": 8.20167597765363, "grad_norm": 0.4910144507884979, "learning_rate": 0.0005918487394957983, "loss": 0.4803, "step": 14681 }, { "epoch": 8.202234636871509, "grad_norm": 0.713206946849823, "learning_rate": 0.0005918207282913165, "loss": 0.3215, "step": 14682 }, { "epoch": 8.202793296089386, "grad_norm": 0.430527001619339, "learning_rate": 0.0005917927170868348, "loss": 0.3517, "step": 14683 }, { "epoch": 8.203351955307262, "grad_norm": 5.664494514465332, "learning_rate": 0.0005917647058823529, "loss": 0.5239, "step": 14684 }, { "epoch": 8.203910614525139, "grad_norm": 0.5775011777877808, "learning_rate": 0.0005917366946778711, "loss": 0.3743, "step": 14685 }, { "epoch": 8.204469273743017, "grad_norm": 0.6236304044723511, "learning_rate": 0.0005917086834733894, "loss": 0.4016, "step": 14686 }, { "epoch": 8.205027932960894, "grad_norm": 0.4228741526603699, "learning_rate": 0.0005916806722689076, "loss": 0.4528, "step": 14687 }, { "epoch": 8.20558659217877, "grad_norm": 4.4908447265625, "learning_rate": 0.0005916526610644259, "loss": 0.4665, "step": 14688 }, { "epoch": 8.206145251396649, "grad_norm": 0.7010394930839539, "learning_rate": 0.000591624649859944, "loss": 0.4896, "step": 14689 }, { "epoch": 8.206703910614525, "grad_norm": 0.6479232311248779, "learning_rate": 0.0005915966386554622, "loss": 0.4491, "step": 14690 }, { "epoch": 8.207262569832402, "grad_norm": 0.46199384331703186, "learning_rate": 0.0005915686274509804, "loss": 0.4074, "step": 14691 }, { "epoch": 8.20782122905028, "grad_norm": 0.37624022364616394, "learning_rate": 0.0005915406162464986, "loss": 0.4328, "step": 14692 }, { "epoch": 8.208379888268157, "grad_norm": 0.5073803663253784, "learning_rate": 0.0005915126050420169, "loss": 0.528, "step": 14693 }, { "epoch": 8.208938547486033, "grad_norm": 0.5571429133415222, "learning_rate": 0.000591484593837535, "loss": 0.4453, "step": 14694 }, { "epoch": 8.20949720670391, "grad_norm": 0.5724432468414307, "learning_rate": 0.0005914565826330532, "loss": 0.4459, "step": 14695 }, { "epoch": 8.210055865921788, "grad_norm": 6.897602558135986, "learning_rate": 0.0005914285714285714, "loss": 0.5211, "step": 14696 }, { "epoch": 8.210614525139665, "grad_norm": 0.4925957918167114, "learning_rate": 0.0005914005602240896, "loss": 0.4479, "step": 14697 }, { "epoch": 8.211173184357541, "grad_norm": 0.5022900700569153, "learning_rate": 0.0005913725490196079, "loss": 0.4388, "step": 14698 }, { "epoch": 8.21173184357542, "grad_norm": 0.42731478810310364, "learning_rate": 0.0005913445378151261, "loss": 0.4333, "step": 14699 }, { "epoch": 8.212290502793296, "grad_norm": 0.9523333311080933, "learning_rate": 0.0005913165266106442, "loss": 0.6219, "step": 14700 }, { "epoch": 8.212849162011173, "grad_norm": 0.6128084063529968, "learning_rate": 0.0005912885154061624, "loss": 0.3607, "step": 14701 }, { "epoch": 8.213407821229051, "grad_norm": 0.7442939281463623, "learning_rate": 0.0005912605042016807, "loss": 0.4726, "step": 14702 }, { "epoch": 8.213966480446928, "grad_norm": 1.232234239578247, "learning_rate": 0.000591232492997199, "loss": 0.4757, "step": 14703 }, { "epoch": 8.214525139664804, "grad_norm": 0.7523201704025269, "learning_rate": 0.0005912044817927172, "loss": 0.5099, "step": 14704 }, { "epoch": 8.21508379888268, "grad_norm": 0.9683617353439331, "learning_rate": 0.0005911764705882353, "loss": 0.5804, "step": 14705 }, { "epoch": 8.21564245810056, "grad_norm": 0.4094729721546173, "learning_rate": 0.0005911484593837535, "loss": 0.4704, "step": 14706 }, { "epoch": 8.216201117318436, "grad_norm": 2.586884021759033, "learning_rate": 0.0005911204481792717, "loss": 0.3714, "step": 14707 }, { "epoch": 8.216759776536312, "grad_norm": 0.5156849026679993, "learning_rate": 0.00059109243697479, "loss": 0.4664, "step": 14708 }, { "epoch": 8.21731843575419, "grad_norm": 0.4346437156200409, "learning_rate": 0.0005910644257703082, "loss": 0.4427, "step": 14709 }, { "epoch": 8.217877094972067, "grad_norm": 0.5476559996604919, "learning_rate": 0.0005910364145658263, "loss": 0.503, "step": 14710 }, { "epoch": 8.218435754189944, "grad_norm": 0.7687256932258606, "learning_rate": 0.0005910084033613445, "loss": 0.4863, "step": 14711 }, { "epoch": 8.21899441340782, "grad_norm": 1.872087001800537, "learning_rate": 0.0005909803921568627, "loss": 0.4466, "step": 14712 }, { "epoch": 8.219553072625699, "grad_norm": 0.40305033326148987, "learning_rate": 0.000590952380952381, "loss": 0.4211, "step": 14713 }, { "epoch": 8.220111731843575, "grad_norm": 1.1250699758529663, "learning_rate": 0.0005909243697478992, "loss": 0.4989, "step": 14714 }, { "epoch": 8.220670391061452, "grad_norm": 0.4963032007217407, "learning_rate": 0.0005908963585434174, "loss": 0.3835, "step": 14715 }, { "epoch": 8.22122905027933, "grad_norm": 0.5300571918487549, "learning_rate": 0.0005908683473389355, "loss": 0.4368, "step": 14716 }, { "epoch": 8.221787709497207, "grad_norm": 1.3161786794662476, "learning_rate": 0.0005908403361344537, "loss": 0.4646, "step": 14717 }, { "epoch": 8.222346368715083, "grad_norm": 0.5102779269218445, "learning_rate": 0.0005908123249299721, "loss": 0.5128, "step": 14718 }, { "epoch": 8.222905027932962, "grad_norm": 0.744270384311676, "learning_rate": 0.0005907843137254903, "loss": 0.5052, "step": 14719 }, { "epoch": 8.223463687150838, "grad_norm": 0.4997437596321106, "learning_rate": 0.0005907563025210085, "loss": 0.4973, "step": 14720 }, { "epoch": 8.224022346368715, "grad_norm": 2.8112409114837646, "learning_rate": 0.0005907282913165266, "loss": 0.3742, "step": 14721 }, { "epoch": 8.224581005586591, "grad_norm": 0.4091721177101135, "learning_rate": 0.0005907002801120448, "loss": 0.4351, "step": 14722 }, { "epoch": 8.22513966480447, "grad_norm": 0.5062128305435181, "learning_rate": 0.0005906722689075631, "loss": 0.5213, "step": 14723 }, { "epoch": 8.225698324022346, "grad_norm": 0.7321590781211853, "learning_rate": 0.0005906442577030813, "loss": 0.4003, "step": 14724 }, { "epoch": 8.226256983240223, "grad_norm": 0.5163283348083496, "learning_rate": 0.0005906162464985995, "loss": 0.4479, "step": 14725 }, { "epoch": 8.226815642458101, "grad_norm": 0.44984593987464905, "learning_rate": 0.0005905882352941176, "loss": 0.3771, "step": 14726 }, { "epoch": 8.227374301675978, "grad_norm": 0.48010358214378357, "learning_rate": 0.0005905602240896358, "loss": 0.438, "step": 14727 }, { "epoch": 8.227932960893854, "grad_norm": 0.503609836101532, "learning_rate": 0.000590532212885154, "loss": 0.4221, "step": 14728 }, { "epoch": 8.228491620111733, "grad_norm": 0.40139448642730713, "learning_rate": 0.0005905042016806723, "loss": 0.3252, "step": 14729 }, { "epoch": 8.22905027932961, "grad_norm": 0.4938288629055023, "learning_rate": 0.0005904761904761905, "loss": 0.4279, "step": 14730 }, { "epoch": 8.229608938547486, "grad_norm": 1.9487265348434448, "learning_rate": 0.0005904481792717087, "loss": 0.4552, "step": 14731 }, { "epoch": 8.230167597765362, "grad_norm": 0.471813440322876, "learning_rate": 0.0005904201680672268, "loss": 0.426, "step": 14732 }, { "epoch": 8.23072625698324, "grad_norm": 0.5210540294647217, "learning_rate": 0.000590392156862745, "loss": 0.428, "step": 14733 }, { "epoch": 8.231284916201117, "grad_norm": 0.7190614342689514, "learning_rate": 0.0005903641456582634, "loss": 0.3844, "step": 14734 }, { "epoch": 8.231843575418994, "grad_norm": 0.5396774411201477, "learning_rate": 0.0005903361344537816, "loss": 0.4167, "step": 14735 }, { "epoch": 8.232402234636872, "grad_norm": 0.602973461151123, "learning_rate": 0.0005903081232492998, "loss": 0.3907, "step": 14736 }, { "epoch": 8.232960893854749, "grad_norm": 0.39327582716941833, "learning_rate": 0.0005902801120448179, "loss": 0.4841, "step": 14737 }, { "epoch": 8.233519553072625, "grad_norm": 0.4999508261680603, "learning_rate": 0.0005902521008403361, "loss": 0.5663, "step": 14738 }, { "epoch": 8.234078212290504, "grad_norm": 0.6242808699607849, "learning_rate": 0.0005902240896358544, "loss": 0.3476, "step": 14739 }, { "epoch": 8.23463687150838, "grad_norm": 0.8377295136451721, "learning_rate": 0.0005901960784313726, "loss": 0.4243, "step": 14740 }, { "epoch": 8.235195530726257, "grad_norm": 0.4045666754245758, "learning_rate": 0.0005901680672268908, "loss": 0.4446, "step": 14741 }, { "epoch": 8.235754189944133, "grad_norm": 0.7267147898674011, "learning_rate": 0.0005901400560224089, "loss": 0.6043, "step": 14742 }, { "epoch": 8.236312849162012, "grad_norm": 0.53114253282547, "learning_rate": 0.0005901120448179271, "loss": 0.6431, "step": 14743 }, { "epoch": 8.236871508379888, "grad_norm": 0.43891292810440063, "learning_rate": 0.0005900840336134454, "loss": 0.4504, "step": 14744 }, { "epoch": 8.237430167597765, "grad_norm": 0.47547683119773865, "learning_rate": 0.0005900560224089636, "loss": 0.4518, "step": 14745 }, { "epoch": 8.237988826815643, "grad_norm": 0.441685289144516, "learning_rate": 0.0005900280112044818, "loss": 0.4763, "step": 14746 }, { "epoch": 8.23854748603352, "grad_norm": 0.7056431174278259, "learning_rate": 0.00059, "loss": 0.411, "step": 14747 }, { "epoch": 8.239106145251396, "grad_norm": 0.7852954268455505, "learning_rate": 0.0005899719887955181, "loss": 0.4232, "step": 14748 }, { "epoch": 8.239664804469275, "grad_norm": 0.6250525116920471, "learning_rate": 0.0005899439775910364, "loss": 0.4849, "step": 14749 }, { "epoch": 8.240223463687151, "grad_norm": 0.42558979988098145, "learning_rate": 0.0005899159663865546, "loss": 0.4631, "step": 14750 }, { "epoch": 8.240782122905028, "grad_norm": 2.365145683288574, "learning_rate": 0.0005898879551820729, "loss": 0.4091, "step": 14751 }, { "epoch": 8.241340782122904, "grad_norm": 0.5177446603775024, "learning_rate": 0.0005898599439775911, "loss": 0.3935, "step": 14752 }, { "epoch": 8.241899441340783, "grad_norm": 0.3864086866378784, "learning_rate": 0.0005898319327731092, "loss": 0.4146, "step": 14753 }, { "epoch": 8.24245810055866, "grad_norm": 0.6299644708633423, "learning_rate": 0.0005898039215686275, "loss": 0.4555, "step": 14754 }, { "epoch": 8.243016759776536, "grad_norm": 0.3918527662754059, "learning_rate": 0.0005897759103641457, "loss": 0.3623, "step": 14755 }, { "epoch": 8.243575418994414, "grad_norm": 0.4721129834651947, "learning_rate": 0.0005897478991596639, "loss": 0.4337, "step": 14756 }, { "epoch": 8.24413407821229, "grad_norm": 0.7677056789398193, "learning_rate": 0.0005897198879551821, "loss": 0.4758, "step": 14757 }, { "epoch": 8.244692737430167, "grad_norm": 0.5721067786216736, "learning_rate": 0.0005896918767507002, "loss": 0.589, "step": 14758 }, { "epoch": 8.245251396648044, "grad_norm": 1.1299498081207275, "learning_rate": 0.0005896638655462185, "loss": 0.5644, "step": 14759 }, { "epoch": 8.245810055865922, "grad_norm": 0.5949721336364746, "learning_rate": 0.0005896358543417367, "loss": 0.4949, "step": 14760 }, { "epoch": 8.246368715083799, "grad_norm": 0.5529798865318298, "learning_rate": 0.0005896078431372549, "loss": 0.5172, "step": 14761 }, { "epoch": 8.246927374301675, "grad_norm": 0.615172266960144, "learning_rate": 0.0005895798319327731, "loss": 0.5837, "step": 14762 }, { "epoch": 8.247486033519554, "grad_norm": 0.4608076810836792, "learning_rate": 0.0005895518207282913, "loss": 0.3914, "step": 14763 }, { "epoch": 8.24804469273743, "grad_norm": 0.38453319668769836, "learning_rate": 0.0005895238095238095, "loss": 0.3675, "step": 14764 }, { "epoch": 8.248603351955307, "grad_norm": 0.752778172492981, "learning_rate": 0.0005894957983193277, "loss": 0.4572, "step": 14765 }, { "epoch": 8.249162011173185, "grad_norm": 0.5437651872634888, "learning_rate": 0.0005894677871148459, "loss": 0.6394, "step": 14766 }, { "epoch": 8.249720670391062, "grad_norm": 0.6189334392547607, "learning_rate": 0.0005894397759103641, "loss": 0.3928, "step": 14767 }, { "epoch": 8.250279329608938, "grad_norm": 0.9584677219390869, "learning_rate": 0.0005894117647058824, "loss": 0.4543, "step": 14768 }, { "epoch": 8.250837988826815, "grad_norm": 0.5113201141357422, "learning_rate": 0.0005893837535014006, "loss": 0.4851, "step": 14769 }, { "epoch": 8.251396648044693, "grad_norm": 0.41447630524635315, "learning_rate": 0.0005893557422969188, "loss": 0.4632, "step": 14770 }, { "epoch": 8.25195530726257, "grad_norm": 0.5667605400085449, "learning_rate": 0.000589327731092437, "loss": 0.3922, "step": 14771 }, { "epoch": 8.252513966480446, "grad_norm": 3.096233367919922, "learning_rate": 0.0005892997198879552, "loss": 0.5371, "step": 14772 }, { "epoch": 8.253072625698325, "grad_norm": 0.4266369044780731, "learning_rate": 0.0005892717086834734, "loss": 0.368, "step": 14773 }, { "epoch": 8.253631284916201, "grad_norm": 2.1888341903686523, "learning_rate": 0.0005892436974789917, "loss": 0.592, "step": 14774 }, { "epoch": 8.254189944134078, "grad_norm": 0.5836003422737122, "learning_rate": 0.0005892156862745098, "loss": 0.4407, "step": 14775 }, { "epoch": 8.254748603351956, "grad_norm": 0.6670942306518555, "learning_rate": 0.000589187675070028, "loss": 0.5716, "step": 14776 }, { "epoch": 8.255307262569833, "grad_norm": 0.5108802914619446, "learning_rate": 0.0005891596638655462, "loss": 0.442, "step": 14777 }, { "epoch": 8.25586592178771, "grad_norm": 0.8770310282707214, "learning_rate": 0.0005891316526610644, "loss": 0.5376, "step": 14778 }, { "epoch": 8.256424581005586, "grad_norm": 0.49834105372428894, "learning_rate": 0.0005891036414565827, "loss": 0.4849, "step": 14779 }, { "epoch": 8.256983240223464, "grad_norm": 0.538061261177063, "learning_rate": 0.0005890756302521008, "loss": 0.4243, "step": 14780 }, { "epoch": 8.25754189944134, "grad_norm": 0.43380460143089294, "learning_rate": 0.000589047619047619, "loss": 0.4664, "step": 14781 }, { "epoch": 8.258100558659217, "grad_norm": 0.5464326739311218, "learning_rate": 0.0005890196078431372, "loss": 0.4397, "step": 14782 }, { "epoch": 8.258659217877096, "grad_norm": 2.4383933544158936, "learning_rate": 0.0005889915966386554, "loss": 0.2838, "step": 14783 }, { "epoch": 8.259217877094972, "grad_norm": 0.6485145688056946, "learning_rate": 0.0005889635854341738, "loss": 0.5056, "step": 14784 }, { "epoch": 8.259776536312849, "grad_norm": 0.4227007329463959, "learning_rate": 0.0005889355742296919, "loss": 0.3799, "step": 14785 }, { "epoch": 8.260335195530725, "grad_norm": 0.6206688284873962, "learning_rate": 0.0005889075630252101, "loss": 0.3493, "step": 14786 }, { "epoch": 8.260893854748604, "grad_norm": 0.7826471924781799, "learning_rate": 0.0005888795518207283, "loss": 0.4162, "step": 14787 }, { "epoch": 8.26145251396648, "grad_norm": 2.4297897815704346, "learning_rate": 0.0005888515406162465, "loss": 0.506, "step": 14788 }, { "epoch": 8.262011173184357, "grad_norm": 0.5957176089286804, "learning_rate": 0.0005888235294117648, "loss": 0.5489, "step": 14789 }, { "epoch": 8.262569832402235, "grad_norm": 0.7456678748130798, "learning_rate": 0.000588795518207283, "loss": 0.5109, "step": 14790 }, { "epoch": 8.263128491620112, "grad_norm": 0.3750740587711334, "learning_rate": 0.0005887675070028011, "loss": 0.3543, "step": 14791 }, { "epoch": 8.263687150837988, "grad_norm": 1.6128426790237427, "learning_rate": 0.0005887394957983193, "loss": 0.4306, "step": 14792 }, { "epoch": 8.264245810055867, "grad_norm": 0.40688377618789673, "learning_rate": 0.0005887114845938375, "loss": 0.415, "step": 14793 }, { "epoch": 8.264804469273743, "grad_norm": 0.45110994577407837, "learning_rate": 0.0005886834733893558, "loss": 0.3979, "step": 14794 }, { "epoch": 8.26536312849162, "grad_norm": 0.46282047033309937, "learning_rate": 0.000588655462184874, "loss": 0.3669, "step": 14795 }, { "epoch": 8.265921787709496, "grad_norm": 0.7603232264518738, "learning_rate": 0.0005886274509803921, "loss": 0.5639, "step": 14796 }, { "epoch": 8.266480446927375, "grad_norm": 0.641492486000061, "learning_rate": 0.0005885994397759103, "loss": 0.3854, "step": 14797 }, { "epoch": 8.267039106145251, "grad_norm": 0.7381540536880493, "learning_rate": 0.0005885714285714285, "loss": 0.5072, "step": 14798 }, { "epoch": 8.267597765363128, "grad_norm": 0.616783857345581, "learning_rate": 0.0005885434173669468, "loss": 0.4772, "step": 14799 }, { "epoch": 8.268156424581006, "grad_norm": 0.6735701560974121, "learning_rate": 0.0005885154061624651, "loss": 0.3688, "step": 14800 }, { "epoch": 8.268715083798883, "grad_norm": 1.3496677875518799, "learning_rate": 0.0005884873949579832, "loss": 0.4789, "step": 14801 }, { "epoch": 8.26927374301676, "grad_norm": 0.4321037232875824, "learning_rate": 0.0005884593837535014, "loss": 0.4514, "step": 14802 }, { "epoch": 8.269832402234638, "grad_norm": 0.38056668639183044, "learning_rate": 0.0005884313725490196, "loss": 0.3793, "step": 14803 }, { "epoch": 8.270391061452514, "grad_norm": 0.44099777936935425, "learning_rate": 0.0005884033613445379, "loss": 0.4295, "step": 14804 }, { "epoch": 8.27094972067039, "grad_norm": 0.3462850749492645, "learning_rate": 0.0005883753501400561, "loss": 0.4106, "step": 14805 }, { "epoch": 8.271508379888267, "grad_norm": 0.5856468081474304, "learning_rate": 0.0005883473389355743, "loss": 0.4962, "step": 14806 }, { "epoch": 8.272067039106146, "grad_norm": 0.4173562824726105, "learning_rate": 0.0005883193277310924, "loss": 0.4585, "step": 14807 }, { "epoch": 8.272625698324022, "grad_norm": 0.4310797154903412, "learning_rate": 0.0005882913165266106, "loss": 0.3862, "step": 14808 }, { "epoch": 8.273184357541899, "grad_norm": 0.461738258600235, "learning_rate": 0.0005882633053221289, "loss": 0.5116, "step": 14809 }, { "epoch": 8.273743016759777, "grad_norm": 0.41237929463386536, "learning_rate": 0.0005882352941176471, "loss": 0.392, "step": 14810 }, { "epoch": 8.274301675977654, "grad_norm": 0.46872952580451965, "learning_rate": 0.0005882072829131653, "loss": 0.4341, "step": 14811 }, { "epoch": 8.27486033519553, "grad_norm": 0.49128663539886475, "learning_rate": 0.0005881792717086834, "loss": 0.5157, "step": 14812 }, { "epoch": 8.275418994413409, "grad_norm": 0.5641964077949524, "learning_rate": 0.0005881512605042016, "loss": 0.5872, "step": 14813 }, { "epoch": 8.275977653631285, "grad_norm": 0.379846453666687, "learning_rate": 0.0005881232492997199, "loss": 0.3761, "step": 14814 }, { "epoch": 8.276536312849162, "grad_norm": 0.5453080534934998, "learning_rate": 0.0005880952380952381, "loss": 0.3727, "step": 14815 }, { "epoch": 8.277094972067038, "grad_norm": 0.6268227696418762, "learning_rate": 0.0005880672268907564, "loss": 0.4792, "step": 14816 }, { "epoch": 8.277653631284917, "grad_norm": 0.8663527965545654, "learning_rate": 0.0005880392156862744, "loss": 0.6346, "step": 14817 }, { "epoch": 8.278212290502793, "grad_norm": 0.5720897912979126, "learning_rate": 0.0005880112044817927, "loss": 0.4205, "step": 14818 }, { "epoch": 8.27877094972067, "grad_norm": 0.6963171362876892, "learning_rate": 0.000587983193277311, "loss": 0.4822, "step": 14819 }, { "epoch": 8.279329608938548, "grad_norm": 0.766904354095459, "learning_rate": 0.0005879551820728292, "loss": 0.4264, "step": 14820 }, { "epoch": 8.279888268156425, "grad_norm": 0.3742465674877167, "learning_rate": 0.0005879271708683474, "loss": 0.4017, "step": 14821 }, { "epoch": 8.280446927374301, "grad_norm": 0.7821504473686218, "learning_rate": 0.0005878991596638656, "loss": 0.4799, "step": 14822 }, { "epoch": 8.28100558659218, "grad_norm": 0.6238220930099487, "learning_rate": 0.0005878711484593837, "loss": 0.4299, "step": 14823 }, { "epoch": 8.281564245810056, "grad_norm": 0.8606948852539062, "learning_rate": 0.000587843137254902, "loss": 0.4149, "step": 14824 }, { "epoch": 8.282122905027933, "grad_norm": 1.495185136795044, "learning_rate": 0.0005878151260504202, "loss": 0.3506, "step": 14825 }, { "epoch": 8.28268156424581, "grad_norm": 2.243377208709717, "learning_rate": 0.0005877871148459384, "loss": 0.5415, "step": 14826 }, { "epoch": 8.283240223463688, "grad_norm": 0.3555644750595093, "learning_rate": 0.0005877591036414566, "loss": 0.4041, "step": 14827 }, { "epoch": 8.283798882681564, "grad_norm": 0.5263892412185669, "learning_rate": 0.0005877310924369747, "loss": 0.4354, "step": 14828 }, { "epoch": 8.28435754189944, "grad_norm": 0.5312854647636414, "learning_rate": 0.000587703081232493, "loss": 0.5255, "step": 14829 }, { "epoch": 8.28491620111732, "grad_norm": 1.4136950969696045, "learning_rate": 0.0005876750700280112, "loss": 0.3816, "step": 14830 }, { "epoch": 8.285474860335196, "grad_norm": 0.642829179763794, "learning_rate": 0.0005876470588235294, "loss": 0.4379, "step": 14831 }, { "epoch": 8.286033519553072, "grad_norm": 0.49640750885009766, "learning_rate": 0.0005876190476190476, "loss": 0.5183, "step": 14832 }, { "epoch": 8.286592178770949, "grad_norm": 1.241936206817627, "learning_rate": 0.0005875910364145657, "loss": 0.3919, "step": 14833 }, { "epoch": 8.287150837988827, "grad_norm": 0.5289754867553711, "learning_rate": 0.0005875630252100841, "loss": 0.3893, "step": 14834 }, { "epoch": 8.287709497206704, "grad_norm": 0.41866251826286316, "learning_rate": 0.0005875350140056023, "loss": 0.4189, "step": 14835 }, { "epoch": 8.28826815642458, "grad_norm": 0.6389116644859314, "learning_rate": 0.0005875070028011205, "loss": 0.5844, "step": 14836 }, { "epoch": 8.288826815642459, "grad_norm": 3.5144050121307373, "learning_rate": 0.0005874789915966387, "loss": 0.4533, "step": 14837 }, { "epoch": 8.289385474860335, "grad_norm": 0.6351314187049866, "learning_rate": 0.0005874509803921569, "loss": 0.4714, "step": 14838 }, { "epoch": 8.289944134078212, "grad_norm": 2.8534011840820312, "learning_rate": 0.0005874229691876751, "loss": 0.4773, "step": 14839 }, { "epoch": 8.29050279329609, "grad_norm": 0.5138262510299683, "learning_rate": 0.0005873949579831933, "loss": 0.416, "step": 14840 }, { "epoch": 8.291061452513967, "grad_norm": 3.2907907962799072, "learning_rate": 0.0005873669467787115, "loss": 0.4474, "step": 14841 }, { "epoch": 8.291620111731843, "grad_norm": 0.3572208285331726, "learning_rate": 0.0005873389355742297, "loss": 0.3088, "step": 14842 }, { "epoch": 8.29217877094972, "grad_norm": 0.5566766858100891, "learning_rate": 0.0005873109243697479, "loss": 0.3802, "step": 14843 }, { "epoch": 8.292737430167598, "grad_norm": 0.753349244594574, "learning_rate": 0.0005872829131652661, "loss": 0.4487, "step": 14844 }, { "epoch": 8.293296089385475, "grad_norm": 0.4712984561920166, "learning_rate": 0.0005872549019607843, "loss": 0.4232, "step": 14845 }, { "epoch": 8.293854748603351, "grad_norm": 0.8797717094421387, "learning_rate": 0.0005872268907563025, "loss": 0.3483, "step": 14846 }, { "epoch": 8.29441340782123, "grad_norm": 1.030199408531189, "learning_rate": 0.0005871988795518207, "loss": 0.5208, "step": 14847 }, { "epoch": 8.294972067039106, "grad_norm": 0.41478410363197327, "learning_rate": 0.0005871708683473389, "loss": 0.4291, "step": 14848 }, { "epoch": 8.295530726256983, "grad_norm": 0.40925636887550354, "learning_rate": 0.0005871428571428571, "loss": 0.4043, "step": 14849 }, { "epoch": 8.296089385474861, "grad_norm": 0.9737657904624939, "learning_rate": 0.0005871148459383754, "loss": 0.4006, "step": 14850 }, { "epoch": 8.296648044692738, "grad_norm": 0.5900642275810242, "learning_rate": 0.0005870868347338936, "loss": 0.4699, "step": 14851 }, { "epoch": 8.297206703910614, "grad_norm": 0.6438400149345398, "learning_rate": 0.0005870588235294118, "loss": 0.4955, "step": 14852 }, { "epoch": 8.297765363128491, "grad_norm": 0.652421236038208, "learning_rate": 0.00058703081232493, "loss": 0.4258, "step": 14853 }, { "epoch": 8.29832402234637, "grad_norm": 0.7232846021652222, "learning_rate": 0.0005870028011204483, "loss": 0.4107, "step": 14854 }, { "epoch": 8.298882681564246, "grad_norm": 0.534042477607727, "learning_rate": 0.0005869747899159664, "loss": 0.3472, "step": 14855 }, { "epoch": 8.299441340782122, "grad_norm": 0.5714625716209412, "learning_rate": 0.0005869467787114846, "loss": 0.4111, "step": 14856 }, { "epoch": 8.3, "grad_norm": 3.703117847442627, "learning_rate": 0.0005869187675070028, "loss": 0.4347, "step": 14857 }, { "epoch": 8.300558659217877, "grad_norm": 0.47215965390205383, "learning_rate": 0.000586890756302521, "loss": 0.4054, "step": 14858 }, { "epoch": 8.301117318435754, "grad_norm": 0.5239530801773071, "learning_rate": 0.0005868627450980393, "loss": 0.4525, "step": 14859 }, { "epoch": 8.30167597765363, "grad_norm": 0.36858704686164856, "learning_rate": 0.0005868347338935574, "loss": 0.3215, "step": 14860 }, { "epoch": 8.302234636871509, "grad_norm": 0.6091682314872742, "learning_rate": 0.0005868067226890756, "loss": 0.348, "step": 14861 }, { "epoch": 8.302793296089385, "grad_norm": 0.39357200264930725, "learning_rate": 0.0005867787114845938, "loss": 0.3912, "step": 14862 }, { "epoch": 8.303351955307262, "grad_norm": 0.45767244696617126, "learning_rate": 0.000586750700280112, "loss": 0.4689, "step": 14863 }, { "epoch": 8.30391061452514, "grad_norm": 0.5937530994415283, "learning_rate": 0.0005867226890756303, "loss": 0.3904, "step": 14864 }, { "epoch": 8.304469273743017, "grad_norm": 0.6735968589782715, "learning_rate": 0.0005866946778711484, "loss": 0.4025, "step": 14865 }, { "epoch": 8.305027932960893, "grad_norm": 0.5939727425575256, "learning_rate": 0.0005866666666666667, "loss": 0.4157, "step": 14866 }, { "epoch": 8.305586592178772, "grad_norm": 0.4244139492511749, "learning_rate": 0.0005866386554621849, "loss": 0.3759, "step": 14867 }, { "epoch": 8.306145251396648, "grad_norm": 0.3585618734359741, "learning_rate": 0.0005866106442577031, "loss": 0.3625, "step": 14868 }, { "epoch": 8.306703910614525, "grad_norm": 0.9539227485656738, "learning_rate": 0.0005865826330532214, "loss": 0.4714, "step": 14869 }, { "epoch": 8.307262569832401, "grad_norm": 0.475507527589798, "learning_rate": 0.0005865546218487396, "loss": 0.3395, "step": 14870 }, { "epoch": 8.30782122905028, "grad_norm": 0.6727422475814819, "learning_rate": 0.0005865266106442577, "loss": 0.5992, "step": 14871 }, { "epoch": 8.308379888268156, "grad_norm": 0.47513654828071594, "learning_rate": 0.0005864985994397759, "loss": 0.356, "step": 14872 }, { "epoch": 8.308938547486033, "grad_norm": Infinity, "learning_rate": 0.0005864985994397759, "loss": 0.5356, "step": 14873 }, { "epoch": 8.309497206703911, "grad_norm": 0.4587685763835907, "learning_rate": 0.0005864705882352941, "loss": 0.3335, "step": 14874 }, { "epoch": 8.310055865921788, "grad_norm": 0.6229572892189026, "learning_rate": 0.0005864425770308124, "loss": 0.4461, "step": 14875 }, { "epoch": 8.310614525139664, "grad_norm": 0.4542607069015503, "learning_rate": 0.0005864145658263306, "loss": 0.4247, "step": 14876 }, { "epoch": 8.311173184357543, "grad_norm": 1.3235516548156738, "learning_rate": 0.0005863865546218487, "loss": 0.3939, "step": 14877 }, { "epoch": 8.31173184357542, "grad_norm": 1.194519281387329, "learning_rate": 0.0005863585434173669, "loss": 0.3328, "step": 14878 }, { "epoch": 8.312290502793296, "grad_norm": 0.644889235496521, "learning_rate": 0.0005863305322128851, "loss": 0.4413, "step": 14879 }, { "epoch": 8.312849162011172, "grad_norm": 0.4533769190311432, "learning_rate": 0.0005863025210084034, "loss": 0.5828, "step": 14880 }, { "epoch": 8.31340782122905, "grad_norm": 0.47157952189445496, "learning_rate": 0.0005862745098039216, "loss": 0.4517, "step": 14881 }, { "epoch": 8.313966480446927, "grad_norm": 0.47866547107696533, "learning_rate": 0.0005862464985994397, "loss": 0.5156, "step": 14882 }, { "epoch": 8.314525139664804, "grad_norm": 0.5529776811599731, "learning_rate": 0.000586218487394958, "loss": 0.5069, "step": 14883 }, { "epoch": 8.315083798882682, "grad_norm": 0.5036071538925171, "learning_rate": 0.0005861904761904762, "loss": 0.3871, "step": 14884 }, { "epoch": 8.315642458100559, "grad_norm": 2.540558338165283, "learning_rate": 0.0005861624649859945, "loss": 0.4002, "step": 14885 }, { "epoch": 8.316201117318435, "grad_norm": 0.5781753063201904, "learning_rate": 0.0005861344537815127, "loss": 0.4731, "step": 14886 }, { "epoch": 8.316759776536314, "grad_norm": 2.816850423812866, "learning_rate": 0.0005861064425770309, "loss": 0.3895, "step": 14887 }, { "epoch": 8.31731843575419, "grad_norm": 0.7096949219703674, "learning_rate": 0.000586078431372549, "loss": 0.4181, "step": 14888 }, { "epoch": 8.317877094972067, "grad_norm": 1.2901709079742432, "learning_rate": 0.0005860504201680672, "loss": 0.4351, "step": 14889 }, { "epoch": 8.318435754189943, "grad_norm": 0.412121057510376, "learning_rate": 0.0005860224089635855, "loss": 0.4472, "step": 14890 }, { "epoch": 8.318994413407822, "grad_norm": 0.35975179076194763, "learning_rate": 0.0005859943977591037, "loss": 0.3562, "step": 14891 }, { "epoch": 8.319553072625698, "grad_norm": 0.3908674120903015, "learning_rate": 0.0005859663865546219, "loss": 0.4021, "step": 14892 }, { "epoch": 8.320111731843575, "grad_norm": 1.7003639936447144, "learning_rate": 0.00058593837535014, "loss": 0.4258, "step": 14893 }, { "epoch": 8.320670391061453, "grad_norm": 1.2953040599822998, "learning_rate": 0.0005859103641456582, "loss": 0.4537, "step": 14894 }, { "epoch": 8.32122905027933, "grad_norm": 0.3916996419429779, "learning_rate": 0.0005858823529411765, "loss": 0.458, "step": 14895 }, { "epoch": 8.321787709497206, "grad_norm": 0.6036856174468994, "learning_rate": 0.0005858543417366947, "loss": 0.2812, "step": 14896 }, { "epoch": 8.322346368715085, "grad_norm": 1.1489933729171753, "learning_rate": 0.0005858263305322129, "loss": 0.5304, "step": 14897 }, { "epoch": 8.322905027932961, "grad_norm": 0.5643019676208496, "learning_rate": 0.000585798319327731, "loss": 0.4582, "step": 14898 }, { "epoch": 8.323463687150838, "grad_norm": 0.5098575353622437, "learning_rate": 0.0005857703081232492, "loss": 0.4368, "step": 14899 }, { "epoch": 8.324022346368714, "grad_norm": 0.8201267123222351, "learning_rate": 0.0005857422969187676, "loss": 0.3759, "step": 14900 }, { "epoch": 8.324581005586593, "grad_norm": 0.7555643916130066, "learning_rate": 0.0005857142857142858, "loss": 0.5012, "step": 14901 }, { "epoch": 8.32513966480447, "grad_norm": 0.8516841530799866, "learning_rate": 0.000585686274509804, "loss": 0.6133, "step": 14902 }, { "epoch": 8.325698324022346, "grad_norm": 0.5094266533851624, "learning_rate": 0.0005856582633053222, "loss": 0.5723, "step": 14903 }, { "epoch": 8.326256983240224, "grad_norm": 0.632649302482605, "learning_rate": 0.0005856302521008403, "loss": 0.4469, "step": 14904 }, { "epoch": 8.3268156424581, "grad_norm": 0.5945995450019836, "learning_rate": 0.0005856022408963586, "loss": 0.3827, "step": 14905 }, { "epoch": 8.327374301675977, "grad_norm": 0.4750930368900299, "learning_rate": 0.0005855742296918768, "loss": 0.3888, "step": 14906 }, { "epoch": 8.327932960893854, "grad_norm": 0.46484503149986267, "learning_rate": 0.000585546218487395, "loss": 0.4446, "step": 14907 }, { "epoch": 8.328491620111732, "grad_norm": 0.6619868278503418, "learning_rate": 0.0005855182072829132, "loss": 0.423, "step": 14908 }, { "epoch": 8.329050279329609, "grad_norm": 0.7141128182411194, "learning_rate": 0.0005854901960784313, "loss": 0.4426, "step": 14909 }, { "epoch": 8.329608938547485, "grad_norm": 0.48962703347206116, "learning_rate": 0.0005854621848739496, "loss": 0.5299, "step": 14910 }, { "epoch": 8.330167597765364, "grad_norm": 0.7770978212356567, "learning_rate": 0.0005854341736694678, "loss": 0.4347, "step": 14911 }, { "epoch": 8.33072625698324, "grad_norm": 0.47875073552131653, "learning_rate": 0.000585406162464986, "loss": 0.43, "step": 14912 }, { "epoch": 8.331284916201117, "grad_norm": 0.4565463066101074, "learning_rate": 0.0005853781512605042, "loss": 0.5082, "step": 14913 }, { "epoch": 8.331843575418995, "grad_norm": 0.39837974309921265, "learning_rate": 0.0005853501400560223, "loss": 0.4339, "step": 14914 }, { "epoch": 8.332402234636872, "grad_norm": 0.7079082727432251, "learning_rate": 0.0005853221288515406, "loss": 0.6329, "step": 14915 }, { "epoch": 8.332960893854748, "grad_norm": 0.5177701115608215, "learning_rate": 0.0005852941176470589, "loss": 0.4776, "step": 14916 }, { "epoch": 8.333519553072625, "grad_norm": 0.5062240958213806, "learning_rate": 0.0005852661064425771, "loss": 0.475, "step": 14917 }, { "epoch": 8.334078212290503, "grad_norm": 0.4818429946899414, "learning_rate": 0.0005852380952380953, "loss": 0.362, "step": 14918 }, { "epoch": 8.33463687150838, "grad_norm": 0.6559048295021057, "learning_rate": 0.0005852100840336135, "loss": 0.5981, "step": 14919 }, { "epoch": 8.335195530726256, "grad_norm": 0.4394361674785614, "learning_rate": 0.0005851820728291317, "loss": 0.4462, "step": 14920 }, { "epoch": 8.335754189944135, "grad_norm": 0.5834078192710876, "learning_rate": 0.0005851540616246499, "loss": 0.466, "step": 14921 }, { "epoch": 8.336312849162011, "grad_norm": 1.0570108890533447, "learning_rate": 0.0005851260504201681, "loss": 0.4706, "step": 14922 }, { "epoch": 8.336871508379888, "grad_norm": 0.38948193192481995, "learning_rate": 0.0005850980392156863, "loss": 0.3212, "step": 14923 }, { "epoch": 8.337430167597766, "grad_norm": 0.44940298795700073, "learning_rate": 0.0005850700280112045, "loss": 0.4889, "step": 14924 }, { "epoch": 8.337988826815643, "grad_norm": 0.4493415057659149, "learning_rate": 0.0005850420168067227, "loss": 0.567, "step": 14925 }, { "epoch": 8.33854748603352, "grad_norm": 0.39843258261680603, "learning_rate": 0.0005850140056022409, "loss": 0.3646, "step": 14926 }, { "epoch": 8.339106145251396, "grad_norm": 1.4064139127731323, "learning_rate": 0.0005849859943977591, "loss": 0.4846, "step": 14927 }, { "epoch": 8.339664804469274, "grad_norm": 0.6744639873504639, "learning_rate": 0.0005849579831932773, "loss": 0.5923, "step": 14928 }, { "epoch": 8.34022346368715, "grad_norm": 0.4554484188556671, "learning_rate": 0.0005849299719887955, "loss": 0.3656, "step": 14929 }, { "epoch": 8.340782122905027, "grad_norm": 0.4262297749519348, "learning_rate": 0.0005849019607843137, "loss": 0.419, "step": 14930 }, { "epoch": 8.341340782122906, "grad_norm": 0.6004900932312012, "learning_rate": 0.0005848739495798319, "loss": 0.511, "step": 14931 }, { "epoch": 8.341899441340782, "grad_norm": 0.46860986948013306, "learning_rate": 0.0005848459383753501, "loss": 0.4257, "step": 14932 }, { "epoch": 8.342458100558659, "grad_norm": 0.4969500005245209, "learning_rate": 0.0005848179271708684, "loss": 0.3263, "step": 14933 }, { "epoch": 8.343016759776535, "grad_norm": 0.6126229763031006, "learning_rate": 0.0005847899159663866, "loss": 0.5477, "step": 14934 }, { "epoch": 8.343575418994414, "grad_norm": 0.5851977467536926, "learning_rate": 0.0005847619047619049, "loss": 0.507, "step": 14935 }, { "epoch": 8.34413407821229, "grad_norm": 0.6072599291801453, "learning_rate": 0.000584733893557423, "loss": 0.3371, "step": 14936 }, { "epoch": 8.344692737430167, "grad_norm": 0.4667683243751526, "learning_rate": 0.0005847058823529412, "loss": 0.4495, "step": 14937 }, { "epoch": 8.345251396648045, "grad_norm": 0.7304673790931702, "learning_rate": 0.0005846778711484594, "loss": 0.5209, "step": 14938 }, { "epoch": 8.345810055865922, "grad_norm": 1.4601686000823975, "learning_rate": 0.0005846498599439776, "loss": 0.4581, "step": 14939 }, { "epoch": 8.346368715083798, "grad_norm": 0.5530440807342529, "learning_rate": 0.0005846218487394959, "loss": 0.4586, "step": 14940 }, { "epoch": 8.346927374301677, "grad_norm": 0.4376341998577118, "learning_rate": 0.000584593837535014, "loss": 0.4564, "step": 14941 }, { "epoch": 8.347486033519553, "grad_norm": 0.5981418490409851, "learning_rate": 0.0005845658263305322, "loss": 0.3566, "step": 14942 }, { "epoch": 8.34804469273743, "grad_norm": 0.578819751739502, "learning_rate": 0.0005845378151260504, "loss": 0.4327, "step": 14943 }, { "epoch": 8.348603351955306, "grad_norm": 0.6129190921783447, "learning_rate": 0.0005845098039215686, "loss": 0.5493, "step": 14944 }, { "epoch": 8.349162011173185, "grad_norm": 0.4983977675437927, "learning_rate": 0.0005844817927170869, "loss": 0.5175, "step": 14945 }, { "epoch": 8.349720670391061, "grad_norm": 0.5523300766944885, "learning_rate": 0.000584453781512605, "loss": 0.4596, "step": 14946 }, { "epoch": 8.350279329608938, "grad_norm": 0.5708497762680054, "learning_rate": 0.0005844257703081232, "loss": 0.5217, "step": 14947 }, { "epoch": 8.350837988826816, "grad_norm": 12.819463729858398, "learning_rate": 0.0005843977591036414, "loss": 0.5491, "step": 14948 }, { "epoch": 8.351396648044693, "grad_norm": 0.46666884422302246, "learning_rate": 0.0005843697478991597, "loss": 0.454, "step": 14949 }, { "epoch": 8.35195530726257, "grad_norm": 0.6145971417427063, "learning_rate": 0.0005843417366946779, "loss": 0.3775, "step": 14950 }, { "epoch": 8.352513966480448, "grad_norm": 0.384982168674469, "learning_rate": 0.0005843137254901962, "loss": 0.3397, "step": 14951 }, { "epoch": 8.353072625698324, "grad_norm": 1.0820281505584717, "learning_rate": 0.0005842857142857143, "loss": 0.4324, "step": 14952 }, { "epoch": 8.3536312849162, "grad_norm": 0.5820189714431763, "learning_rate": 0.0005842577030812325, "loss": 0.4352, "step": 14953 }, { "epoch": 8.354189944134077, "grad_norm": 1.0354069471359253, "learning_rate": 0.0005842296918767507, "loss": 0.6378, "step": 14954 }, { "epoch": 8.354748603351956, "grad_norm": 0.48619651794433594, "learning_rate": 0.0005842016806722689, "loss": 0.4484, "step": 14955 }, { "epoch": 8.355307262569832, "grad_norm": 0.6371716856956482, "learning_rate": 0.0005841736694677872, "loss": 0.6083, "step": 14956 }, { "epoch": 8.355865921787709, "grad_norm": 0.9120499491691589, "learning_rate": 0.0005841456582633053, "loss": 0.4088, "step": 14957 }, { "epoch": 8.356424581005587, "grad_norm": 0.4198913276195526, "learning_rate": 0.0005841176470588235, "loss": 0.5411, "step": 14958 }, { "epoch": 8.356983240223464, "grad_norm": 0.40404555201530457, "learning_rate": 0.0005840896358543417, "loss": 0.3832, "step": 14959 }, { "epoch": 8.35754189944134, "grad_norm": 0.8810367584228516, "learning_rate": 0.0005840616246498599, "loss": 0.6768, "step": 14960 }, { "epoch": 8.358100558659217, "grad_norm": 0.4589516222476959, "learning_rate": 0.0005840336134453782, "loss": 0.4203, "step": 14961 }, { "epoch": 8.358659217877095, "grad_norm": 0.6582610607147217, "learning_rate": 0.0005840056022408963, "loss": 0.4572, "step": 14962 }, { "epoch": 8.359217877094972, "grad_norm": 0.5612242221832275, "learning_rate": 0.0005839775910364145, "loss": 0.5905, "step": 14963 }, { "epoch": 8.359776536312848, "grad_norm": 0.36198267340660095, "learning_rate": 0.0005839495798319327, "loss": 0.3823, "step": 14964 }, { "epoch": 8.360335195530727, "grad_norm": 0.4489588141441345, "learning_rate": 0.000583921568627451, "loss": 0.4179, "step": 14965 }, { "epoch": 8.360893854748603, "grad_norm": 0.4734388291835785, "learning_rate": 0.0005838935574229693, "loss": 0.438, "step": 14966 }, { "epoch": 8.36145251396648, "grad_norm": 0.5238151550292969, "learning_rate": 0.0005838655462184875, "loss": 0.53, "step": 14967 }, { "epoch": 8.362011173184358, "grad_norm": 0.7804664373397827, "learning_rate": 0.0005838375350140056, "loss": 0.4179, "step": 14968 }, { "epoch": 8.362569832402235, "grad_norm": 0.4341186285018921, "learning_rate": 0.0005838095238095238, "loss": 0.4099, "step": 14969 }, { "epoch": 8.363128491620111, "grad_norm": 0.7319085001945496, "learning_rate": 0.000583781512605042, "loss": 0.4674, "step": 14970 }, { "epoch": 8.363687150837988, "grad_norm": 0.8095124959945679, "learning_rate": 0.0005837535014005603, "loss": 0.6271, "step": 14971 }, { "epoch": 8.364245810055866, "grad_norm": 2.2014153003692627, "learning_rate": 0.0005837254901960785, "loss": 0.3597, "step": 14972 }, { "epoch": 8.364804469273743, "grad_norm": 0.5027381777763367, "learning_rate": 0.0005836974789915966, "loss": 0.3901, "step": 14973 }, { "epoch": 8.36536312849162, "grad_norm": 0.4144800305366516, "learning_rate": 0.0005836694677871148, "loss": 0.4262, "step": 14974 }, { "epoch": 8.365921787709498, "grad_norm": 0.8771882057189941, "learning_rate": 0.000583641456582633, "loss": 0.3776, "step": 14975 }, { "epoch": 8.366480446927374, "grad_norm": 0.37773919105529785, "learning_rate": 0.0005836134453781513, "loss": 0.4941, "step": 14976 }, { "epoch": 8.367039106145251, "grad_norm": 0.6087919473648071, "learning_rate": 0.0005835854341736695, "loss": 0.4216, "step": 14977 }, { "epoch": 8.36759776536313, "grad_norm": 0.591992199420929, "learning_rate": 0.0005835574229691876, "loss": 0.4708, "step": 14978 }, { "epoch": 8.368156424581006, "grad_norm": 0.5549662113189697, "learning_rate": 0.0005835294117647058, "loss": 0.4493, "step": 14979 }, { "epoch": 8.368715083798882, "grad_norm": 0.43173640966415405, "learning_rate": 0.000583501400560224, "loss": 0.4514, "step": 14980 }, { "epoch": 8.369273743016759, "grad_norm": 1.4267487525939941, "learning_rate": 0.0005834733893557424, "loss": 0.3851, "step": 14981 }, { "epoch": 8.369832402234637, "grad_norm": 0.4697130024433136, "learning_rate": 0.0005834453781512606, "loss": 0.4601, "step": 14982 }, { "epoch": 8.370391061452514, "grad_norm": 0.5327762961387634, "learning_rate": 0.0005834173669467788, "loss": 0.4097, "step": 14983 }, { "epoch": 8.37094972067039, "grad_norm": 0.656789243221283, "learning_rate": 0.0005833893557422969, "loss": 0.4823, "step": 14984 }, { "epoch": 8.371508379888269, "grad_norm": 4.2425994873046875, "learning_rate": 0.0005833613445378151, "loss": 0.4776, "step": 14985 }, { "epoch": 8.372067039106145, "grad_norm": 0.42395198345184326, "learning_rate": 0.0005833333333333334, "loss": 0.3747, "step": 14986 }, { "epoch": 8.372625698324022, "grad_norm": 0.4683852791786194, "learning_rate": 0.0005833053221288516, "loss": 0.5181, "step": 14987 }, { "epoch": 8.3731843575419, "grad_norm": 0.5276800990104675, "learning_rate": 0.0005832773109243698, "loss": 0.4857, "step": 14988 }, { "epoch": 8.373743016759777, "grad_norm": 0.6185788512229919, "learning_rate": 0.0005832492997198879, "loss": 0.4546, "step": 14989 }, { "epoch": 8.374301675977653, "grad_norm": 0.6540019512176514, "learning_rate": 0.0005832212885154061, "loss": 0.5035, "step": 14990 }, { "epoch": 8.37486033519553, "grad_norm": 0.4345349371433258, "learning_rate": 0.0005831932773109244, "loss": 0.4356, "step": 14991 }, { "epoch": 8.375418994413408, "grad_norm": 0.7053734660148621, "learning_rate": 0.0005831652661064426, "loss": 0.5728, "step": 14992 }, { "epoch": 8.375977653631285, "grad_norm": 0.4583536684513092, "learning_rate": 0.0005831372549019608, "loss": 0.4444, "step": 14993 }, { "epoch": 8.376536312849161, "grad_norm": 0.5855910181999207, "learning_rate": 0.0005831092436974789, "loss": 0.2691, "step": 14994 }, { "epoch": 8.37709497206704, "grad_norm": 0.5928325057029724, "learning_rate": 0.0005830812324929971, "loss": 0.4249, "step": 14995 }, { "epoch": 8.377653631284916, "grad_norm": 0.5972592830657959, "learning_rate": 0.0005830532212885154, "loss": 0.3577, "step": 14996 }, { "epoch": 8.378212290502793, "grad_norm": 0.5805910229682922, "learning_rate": 0.0005830252100840336, "loss": 0.4642, "step": 14997 }, { "epoch": 8.378770949720671, "grad_norm": 0.5550611615180969, "learning_rate": 0.0005829971988795519, "loss": 0.4218, "step": 14998 }, { "epoch": 8.379329608938548, "grad_norm": 0.6600235104560852, "learning_rate": 0.0005829691876750701, "loss": 0.4222, "step": 14999 }, { "epoch": 8.379888268156424, "grad_norm": 1.6635565757751465, "learning_rate": 0.0005829411764705882, "loss": 0.4987, "step": 15000 }, { "epoch": 8.379888268156424, "eval_cer": 0.09018253630541281, "eval_loss": 0.33840733766555786, "eval_runtime": 55.6651, "eval_samples_per_second": 81.523, "eval_steps_per_second": 5.102, "eval_wer": 0.35859333538169325, "step": 15000 }, { "epoch": 8.380446927374301, "grad_norm": 0.4069099426269531, "learning_rate": 0.0005829131652661065, "loss": 0.3955, "step": 15001 }, { "epoch": 8.38100558659218, "grad_norm": 2.484233856201172, "learning_rate": 0.0005828851540616247, "loss": 0.455, "step": 15002 }, { "epoch": 8.381564245810056, "grad_norm": 0.8413163423538208, "learning_rate": 0.0005828571428571429, "loss": 0.5025, "step": 15003 }, { "epoch": 8.382122905027932, "grad_norm": 0.6786373257637024, "learning_rate": 0.0005828291316526611, "loss": 0.4657, "step": 15004 }, { "epoch": 8.38268156424581, "grad_norm": 7.700829982757568, "learning_rate": 0.0005828011204481792, "loss": 0.4499, "step": 15005 }, { "epoch": 8.383240223463687, "grad_norm": 0.4800693988800049, "learning_rate": 0.0005827731092436975, "loss": 0.4174, "step": 15006 }, { "epoch": 8.383798882681564, "grad_norm": 0.4424120783805847, "learning_rate": 0.0005827450980392157, "loss": 0.5419, "step": 15007 }, { "epoch": 8.38435754189944, "grad_norm": 0.6140261292457581, "learning_rate": 0.0005827170868347339, "loss": 0.6571, "step": 15008 }, { "epoch": 8.384916201117319, "grad_norm": 0.6278320550918579, "learning_rate": 0.0005826890756302521, "loss": 0.4594, "step": 15009 }, { "epoch": 8.385474860335195, "grad_norm": 1.014625906944275, "learning_rate": 0.0005826610644257702, "loss": 0.4305, "step": 15010 }, { "epoch": 8.386033519553072, "grad_norm": 0.6238791346549988, "learning_rate": 0.0005826330532212885, "loss": 0.5134, "step": 15011 }, { "epoch": 8.38659217877095, "grad_norm": 0.9392037391662598, "learning_rate": 0.0005826050420168067, "loss": 0.5098, "step": 15012 }, { "epoch": 8.387150837988827, "grad_norm": 0.734483003616333, "learning_rate": 0.0005825770308123249, "loss": 0.4961, "step": 15013 }, { "epoch": 8.387709497206703, "grad_norm": 0.3761874735355377, "learning_rate": 0.0005825490196078431, "loss": 0.3452, "step": 15014 }, { "epoch": 8.388268156424582, "grad_norm": 0.5336946249008179, "learning_rate": 0.0005825210084033614, "loss": 0.4099, "step": 15015 }, { "epoch": 8.388826815642458, "grad_norm": 0.9680999517440796, "learning_rate": 0.0005824929971988796, "loss": 0.5749, "step": 15016 }, { "epoch": 8.389385474860335, "grad_norm": 0.6268932819366455, "learning_rate": 0.0005824649859943978, "loss": 0.4869, "step": 15017 }, { "epoch": 8.389944134078211, "grad_norm": 0.48394903540611267, "learning_rate": 0.000582436974789916, "loss": 0.4142, "step": 15018 }, { "epoch": 8.39050279329609, "grad_norm": 0.3707742691040039, "learning_rate": 0.0005824089635854342, "loss": 0.427, "step": 15019 }, { "epoch": 8.391061452513966, "grad_norm": 0.6092075109481812, "learning_rate": 0.0005823809523809524, "loss": 0.3906, "step": 15020 }, { "epoch": 8.391620111731843, "grad_norm": 0.4559633433818817, "learning_rate": 0.0005823529411764706, "loss": 0.5168, "step": 15021 }, { "epoch": 8.392178770949721, "grad_norm": 0.36660459637641907, "learning_rate": 0.0005823249299719888, "loss": 0.399, "step": 15022 }, { "epoch": 8.392737430167598, "grad_norm": 1.7564077377319336, "learning_rate": 0.000582296918767507, "loss": 0.3607, "step": 15023 }, { "epoch": 8.393296089385474, "grad_norm": 0.6242188811302185, "learning_rate": 0.0005822689075630252, "loss": 0.3524, "step": 15024 }, { "epoch": 8.393854748603353, "grad_norm": 2.5956006050109863, "learning_rate": 0.0005822408963585434, "loss": 0.4181, "step": 15025 }, { "epoch": 8.39441340782123, "grad_norm": 0.3992879390716553, "learning_rate": 0.0005822128851540617, "loss": 0.4334, "step": 15026 }, { "epoch": 8.394972067039106, "grad_norm": 0.5396314263343811, "learning_rate": 0.0005821848739495798, "loss": 0.4547, "step": 15027 }, { "epoch": 8.395530726256982, "grad_norm": 0.49497008323669434, "learning_rate": 0.000582156862745098, "loss": 0.4041, "step": 15028 }, { "epoch": 8.39608938547486, "grad_norm": 0.45197969675064087, "learning_rate": 0.0005821288515406162, "loss": 0.4061, "step": 15029 }, { "epoch": 8.396648044692737, "grad_norm": 2.5198616981506348, "learning_rate": 0.0005821008403361344, "loss": 0.4119, "step": 15030 }, { "epoch": 8.397206703910614, "grad_norm": 0.5039351582527161, "learning_rate": 0.0005820728291316528, "loss": 0.3602, "step": 15031 }, { "epoch": 8.397765363128492, "grad_norm": 7.539263725280762, "learning_rate": 0.0005820448179271709, "loss": 0.4095, "step": 15032 }, { "epoch": 8.398324022346369, "grad_norm": 0.7699964642524719, "learning_rate": 0.0005820168067226891, "loss": 0.5351, "step": 15033 }, { "epoch": 8.398882681564245, "grad_norm": 1.0932117700576782, "learning_rate": 0.0005819887955182073, "loss": 0.5278, "step": 15034 }, { "epoch": 8.399441340782122, "grad_norm": 0.6231299638748169, "learning_rate": 0.0005819607843137255, "loss": 0.413, "step": 15035 }, { "epoch": 8.4, "grad_norm": 0.5333104729652405, "learning_rate": 0.0005819327731092438, "loss": 0.4203, "step": 15036 }, { "epoch": 8.400558659217877, "grad_norm": 0.9620609283447266, "learning_rate": 0.0005819047619047619, "loss": 0.4379, "step": 15037 }, { "epoch": 8.401117318435753, "grad_norm": 0.5910012125968933, "learning_rate": 0.0005818767507002801, "loss": 0.3737, "step": 15038 }, { "epoch": 8.401675977653632, "grad_norm": 0.8014978766441345, "learning_rate": 0.0005818487394957983, "loss": 0.3765, "step": 15039 }, { "epoch": 8.402234636871508, "grad_norm": 0.5707065463066101, "learning_rate": 0.0005818207282913165, "loss": 0.4146, "step": 15040 }, { "epoch": 8.402793296089385, "grad_norm": 0.6289256811141968, "learning_rate": 0.0005817927170868348, "loss": 0.4353, "step": 15041 }, { "epoch": 8.403351955307263, "grad_norm": 0.8036611080169678, "learning_rate": 0.000581764705882353, "loss": 0.577, "step": 15042 }, { "epoch": 8.40391061452514, "grad_norm": 0.4298895001411438, "learning_rate": 0.0005817366946778711, "loss": 0.3891, "step": 15043 }, { "epoch": 8.404469273743016, "grad_norm": 0.4730525314807892, "learning_rate": 0.0005817086834733893, "loss": 0.4311, "step": 15044 }, { "epoch": 8.405027932960893, "grad_norm": 0.5966941714286804, "learning_rate": 0.0005816806722689075, "loss": 0.3262, "step": 15045 }, { "epoch": 8.405586592178771, "grad_norm": 0.5134749412536621, "learning_rate": 0.0005816526610644258, "loss": 0.4683, "step": 15046 }, { "epoch": 8.406145251396648, "grad_norm": 0.4108908176422119, "learning_rate": 0.0005816246498599441, "loss": 0.3255, "step": 15047 }, { "epoch": 8.406703910614524, "grad_norm": 0.4103609621524811, "learning_rate": 0.0005815966386554622, "loss": 0.4047, "step": 15048 }, { "epoch": 8.407262569832403, "grad_norm": 0.5080446600914001, "learning_rate": 0.0005815686274509804, "loss": 0.4178, "step": 15049 }, { "epoch": 8.40782122905028, "grad_norm": 0.6712818145751953, "learning_rate": 0.0005815406162464986, "loss": 0.4931, "step": 15050 }, { "epoch": 8.408379888268156, "grad_norm": 0.6717536449432373, "learning_rate": 0.0005815126050420169, "loss": 0.5419, "step": 15051 }, { "epoch": 8.408938547486034, "grad_norm": 0.7146703600883484, "learning_rate": 0.0005814845938375351, "loss": 0.4064, "step": 15052 }, { "epoch": 8.40949720670391, "grad_norm": 7.56820821762085, "learning_rate": 0.0005814565826330532, "loss": 0.5398, "step": 15053 }, { "epoch": 8.410055865921787, "grad_norm": 0.44317319989204407, "learning_rate": 0.0005814285714285714, "loss": 0.3379, "step": 15054 }, { "epoch": 8.410614525139664, "grad_norm": 0.5120975375175476, "learning_rate": 0.0005814005602240896, "loss": 0.4168, "step": 15055 }, { "epoch": 8.411173184357542, "grad_norm": 1.1907446384429932, "learning_rate": 0.0005813725490196079, "loss": 0.3392, "step": 15056 }, { "epoch": 8.411731843575419, "grad_norm": 0.5381262898445129, "learning_rate": 0.0005813445378151261, "loss": 0.3176, "step": 15057 }, { "epoch": 8.412290502793295, "grad_norm": 0.43007978796958923, "learning_rate": 0.0005813165266106443, "loss": 0.4151, "step": 15058 }, { "epoch": 8.412849162011174, "grad_norm": 0.505859911441803, "learning_rate": 0.0005812885154061624, "loss": 0.5643, "step": 15059 }, { "epoch": 8.41340782122905, "grad_norm": 0.6560646891593933, "learning_rate": 0.0005812605042016806, "loss": 0.4442, "step": 15060 }, { "epoch": 8.413966480446927, "grad_norm": 0.6167864203453064, "learning_rate": 0.0005812324929971989, "loss": 0.3798, "step": 15061 }, { "epoch": 8.414525139664805, "grad_norm": 0.689209520816803, "learning_rate": 0.0005812044817927171, "loss": 0.4153, "step": 15062 }, { "epoch": 8.415083798882682, "grad_norm": 0.8740217685699463, "learning_rate": 0.0005811764705882354, "loss": 0.4053, "step": 15063 }, { "epoch": 8.415642458100558, "grad_norm": 0.5117918848991394, "learning_rate": 0.0005811484593837534, "loss": 0.4156, "step": 15064 }, { "epoch": 8.416201117318435, "grad_norm": 0.4569648504257202, "learning_rate": 0.0005811204481792717, "loss": 0.4219, "step": 15065 }, { "epoch": 8.416759776536313, "grad_norm": 2.984717607498169, "learning_rate": 0.00058109243697479, "loss": 0.3834, "step": 15066 }, { "epoch": 8.41731843575419, "grad_norm": 0.4624974727630615, "learning_rate": 0.0005810644257703082, "loss": 0.3727, "step": 15067 }, { "epoch": 8.417877094972066, "grad_norm": 2.9909536838531494, "learning_rate": 0.0005810364145658264, "loss": 0.5461, "step": 15068 }, { "epoch": 8.418435754189945, "grad_norm": 0.44195353984832764, "learning_rate": 0.0005810084033613445, "loss": 0.363, "step": 15069 }, { "epoch": 8.418994413407821, "grad_norm": 0.5280654430389404, "learning_rate": 0.0005809803921568627, "loss": 0.4627, "step": 15070 }, { "epoch": 8.419553072625698, "grad_norm": 0.7593404650688171, "learning_rate": 0.000580952380952381, "loss": 0.4568, "step": 15071 }, { "epoch": 8.420111731843576, "grad_norm": 0.4967724680900574, "learning_rate": 0.0005809243697478992, "loss": 0.5067, "step": 15072 }, { "epoch": 8.420670391061453, "grad_norm": 0.41193273663520813, "learning_rate": 0.0005808963585434174, "loss": 0.4349, "step": 15073 }, { "epoch": 8.42122905027933, "grad_norm": 0.5806664228439331, "learning_rate": 0.0005808683473389356, "loss": 0.4153, "step": 15074 }, { "epoch": 8.421787709497206, "grad_norm": 0.43386971950531006, "learning_rate": 0.0005808403361344537, "loss": 0.4706, "step": 15075 }, { "epoch": 8.422346368715084, "grad_norm": 0.6706230044364929, "learning_rate": 0.000580812324929972, "loss": 0.5027, "step": 15076 }, { "epoch": 8.422905027932961, "grad_norm": 0.5473315715789795, "learning_rate": 0.0005807843137254902, "loss": 0.6938, "step": 15077 }, { "epoch": 8.423463687150837, "grad_norm": 2.0396227836608887, "learning_rate": 0.0005807563025210084, "loss": 0.4742, "step": 15078 }, { "epoch": 8.424022346368716, "grad_norm": 0.41608479619026184, "learning_rate": 0.0005807282913165266, "loss": 0.3652, "step": 15079 }, { "epoch": 8.424581005586592, "grad_norm": 0.5501396059989929, "learning_rate": 0.0005807002801120447, "loss": 0.4825, "step": 15080 }, { "epoch": 8.425139664804469, "grad_norm": 0.4052152931690216, "learning_rate": 0.0005806722689075631, "loss": 0.4553, "step": 15081 }, { "epoch": 8.425698324022346, "grad_norm": 0.360068678855896, "learning_rate": 0.0005806442577030813, "loss": 0.4187, "step": 15082 }, { "epoch": 8.426256983240224, "grad_norm": 0.3935549855232239, "learning_rate": 0.0005806162464985995, "loss": 0.4486, "step": 15083 }, { "epoch": 8.4268156424581, "grad_norm": 0.6176988482475281, "learning_rate": 0.0005805882352941177, "loss": 0.4482, "step": 15084 }, { "epoch": 8.427374301675977, "grad_norm": 4.438694953918457, "learning_rate": 0.0005805602240896358, "loss": 0.3855, "step": 15085 }, { "epoch": 8.427932960893855, "grad_norm": 0.591834545135498, "learning_rate": 0.0005805322128851541, "loss": 0.4643, "step": 15086 }, { "epoch": 8.428491620111732, "grad_norm": 0.48653504252433777, "learning_rate": 0.0005805042016806723, "loss": 0.3746, "step": 15087 }, { "epoch": 8.429050279329608, "grad_norm": 0.43574297428131104, "learning_rate": 0.0005804761904761905, "loss": 0.4731, "step": 15088 }, { "epoch": 8.429608938547487, "grad_norm": 0.9128918051719666, "learning_rate": 0.0005804481792717087, "loss": 0.4218, "step": 15089 }, { "epoch": 8.430167597765363, "grad_norm": 0.4452519118785858, "learning_rate": 0.0005804201680672269, "loss": 0.4395, "step": 15090 }, { "epoch": 8.43072625698324, "grad_norm": 0.5181838870048523, "learning_rate": 0.0005803921568627451, "loss": 0.3677, "step": 15091 }, { "epoch": 8.431284916201117, "grad_norm": 0.4824831783771515, "learning_rate": 0.0005803641456582633, "loss": 0.4234, "step": 15092 }, { "epoch": 8.431843575418995, "grad_norm": 0.5785755515098572, "learning_rate": 0.0005803361344537815, "loss": 0.6059, "step": 15093 }, { "epoch": 8.432402234636871, "grad_norm": 0.42026451230049133, "learning_rate": 0.0005803081232492997, "loss": 0.4067, "step": 15094 }, { "epoch": 8.432960893854748, "grad_norm": 0.5909212827682495, "learning_rate": 0.0005802801120448179, "loss": 0.4481, "step": 15095 }, { "epoch": 8.433519553072626, "grad_norm": 0.44709333777427673, "learning_rate": 0.0005802521008403361, "loss": 0.5091, "step": 15096 }, { "epoch": 8.434078212290503, "grad_norm": 2.2895781993865967, "learning_rate": 0.0005802240896358544, "loss": 0.4165, "step": 15097 }, { "epoch": 8.43463687150838, "grad_norm": 1.7314233779907227, "learning_rate": 0.0005801960784313726, "loss": 0.3606, "step": 15098 }, { "epoch": 8.435195530726258, "grad_norm": 0.47517508268356323, "learning_rate": 0.0005801680672268908, "loss": 0.4047, "step": 15099 }, { "epoch": 8.435754189944134, "grad_norm": 0.511033296585083, "learning_rate": 0.000580140056022409, "loss": 0.4535, "step": 15100 }, { "epoch": 8.436312849162011, "grad_norm": 0.5666338205337524, "learning_rate": 0.0005801120448179272, "loss": 0.4598, "step": 15101 }, { "epoch": 8.436871508379888, "grad_norm": 1.1614675521850586, "learning_rate": 0.0005800840336134454, "loss": 0.4548, "step": 15102 }, { "epoch": 8.437430167597766, "grad_norm": 0.4814290404319763, "learning_rate": 0.0005800560224089636, "loss": 0.4376, "step": 15103 }, { "epoch": 8.437988826815642, "grad_norm": 0.8443406224250793, "learning_rate": 0.0005800280112044818, "loss": 0.3793, "step": 15104 }, { "epoch": 8.438547486033519, "grad_norm": 0.709213376045227, "learning_rate": 0.00058, "loss": 0.3492, "step": 15105 }, { "epoch": 8.439106145251397, "grad_norm": 0.6441038846969604, "learning_rate": 0.0005799719887955183, "loss": 0.4306, "step": 15106 }, { "epoch": 8.439664804469274, "grad_norm": 0.6008891463279724, "learning_rate": 0.0005799439775910364, "loss": 0.4937, "step": 15107 }, { "epoch": 8.44022346368715, "grad_norm": 0.5976288914680481, "learning_rate": 0.0005799159663865546, "loss": 0.4254, "step": 15108 }, { "epoch": 8.440782122905027, "grad_norm": 1.7645634412765503, "learning_rate": 0.0005798879551820728, "loss": 0.4629, "step": 15109 }, { "epoch": 8.441340782122905, "grad_norm": 0.43416234850883484, "learning_rate": 0.000579859943977591, "loss": 0.4221, "step": 15110 }, { "epoch": 8.441899441340782, "grad_norm": 0.46453604102134705, "learning_rate": 0.0005798319327731093, "loss": 0.3954, "step": 15111 }, { "epoch": 8.442458100558659, "grad_norm": 0.5179375410079956, "learning_rate": 0.0005798039215686274, "loss": 0.4671, "step": 15112 }, { "epoch": 8.443016759776537, "grad_norm": 0.6036710739135742, "learning_rate": 0.0005797759103641457, "loss": 0.5123, "step": 15113 }, { "epoch": 8.443575418994413, "grad_norm": 0.3649946451187134, "learning_rate": 0.0005797478991596639, "loss": 0.4524, "step": 15114 }, { "epoch": 8.44413407821229, "grad_norm": 0.4086681008338928, "learning_rate": 0.0005797198879551821, "loss": 0.403, "step": 15115 }, { "epoch": 8.444692737430168, "grad_norm": 0.4461531341075897, "learning_rate": 0.0005796918767507004, "loss": 0.5342, "step": 15116 }, { "epoch": 8.445251396648045, "grad_norm": 0.4921421706676483, "learning_rate": 0.0005796638655462185, "loss": 0.4663, "step": 15117 }, { "epoch": 8.445810055865921, "grad_norm": 1.8387770652770996, "learning_rate": 0.0005796358543417367, "loss": 0.5494, "step": 15118 }, { "epoch": 8.446368715083798, "grad_norm": 0.4927188456058502, "learning_rate": 0.0005796078431372549, "loss": 0.5956, "step": 15119 }, { "epoch": 8.446927374301676, "grad_norm": 0.5424768328666687, "learning_rate": 0.0005795798319327731, "loss": 0.4196, "step": 15120 }, { "epoch": 8.447486033519553, "grad_norm": 0.9578824639320374, "learning_rate": 0.0005795518207282914, "loss": 0.4401, "step": 15121 }, { "epoch": 8.44804469273743, "grad_norm": 0.6570304036140442, "learning_rate": 0.0005795238095238096, "loss": 0.4994, "step": 15122 }, { "epoch": 8.448603351955308, "grad_norm": 0.5701783895492554, "learning_rate": 0.0005794957983193277, "loss": 0.3914, "step": 15123 }, { "epoch": 8.449162011173184, "grad_norm": 0.4354120194911957, "learning_rate": 0.0005794677871148459, "loss": 0.4802, "step": 15124 }, { "epoch": 8.449720670391061, "grad_norm": 0.9803155660629272, "learning_rate": 0.0005794397759103641, "loss": 0.3394, "step": 15125 }, { "epoch": 8.45027932960894, "grad_norm": 0.715829074382782, "learning_rate": 0.0005794117647058824, "loss": 0.4679, "step": 15126 }, { "epoch": 8.450837988826816, "grad_norm": 0.5965988636016846, "learning_rate": 0.0005793837535014006, "loss": 0.4591, "step": 15127 }, { "epoch": 8.451396648044692, "grad_norm": 1.1053520441055298, "learning_rate": 0.0005793557422969187, "loss": 0.4367, "step": 15128 }, { "epoch": 8.451955307262569, "grad_norm": 0.882635772228241, "learning_rate": 0.000579327731092437, "loss": 0.4614, "step": 15129 }, { "epoch": 8.452513966480447, "grad_norm": 0.5637270212173462, "learning_rate": 0.0005792997198879552, "loss": 0.5258, "step": 15130 }, { "epoch": 8.453072625698324, "grad_norm": 0.4686194360256195, "learning_rate": 0.0005792717086834735, "loss": 0.4381, "step": 15131 }, { "epoch": 8.4536312849162, "grad_norm": 0.5487255454063416, "learning_rate": 0.0005792436974789917, "loss": 0.5203, "step": 15132 }, { "epoch": 8.454189944134079, "grad_norm": 0.4387367367744446, "learning_rate": 0.0005792156862745098, "loss": 0.5721, "step": 15133 }, { "epoch": 8.454748603351955, "grad_norm": 0.4858744442462921, "learning_rate": 0.000579187675070028, "loss": 0.5477, "step": 15134 }, { "epoch": 8.455307262569832, "grad_norm": 0.6573995351791382, "learning_rate": 0.0005791596638655462, "loss": 0.4172, "step": 15135 }, { "epoch": 8.45586592178771, "grad_norm": 0.7264791131019592, "learning_rate": 0.0005791316526610645, "loss": 0.3538, "step": 15136 }, { "epoch": 8.456424581005587, "grad_norm": 0.5942917466163635, "learning_rate": 0.0005791036414565827, "loss": 0.4882, "step": 15137 }, { "epoch": 8.456983240223463, "grad_norm": 0.6617839932441711, "learning_rate": 0.0005790756302521009, "loss": 0.4583, "step": 15138 }, { "epoch": 8.45754189944134, "grad_norm": 0.8159322738647461, "learning_rate": 0.000579047619047619, "loss": 0.4415, "step": 15139 }, { "epoch": 8.458100558659218, "grad_norm": 0.42285773158073425, "learning_rate": 0.0005790196078431372, "loss": 0.3736, "step": 15140 }, { "epoch": 8.458659217877095, "grad_norm": 0.46349599957466125, "learning_rate": 0.0005789915966386555, "loss": 0.4708, "step": 15141 }, { "epoch": 8.459217877094972, "grad_norm": 0.5299896597862244, "learning_rate": 0.0005789635854341737, "loss": 0.3909, "step": 15142 }, { "epoch": 8.45977653631285, "grad_norm": 0.5893663167953491, "learning_rate": 0.0005789355742296919, "loss": 0.4049, "step": 15143 }, { "epoch": 8.460335195530726, "grad_norm": 0.4479421079158783, "learning_rate": 0.00057890756302521, "loss": 0.4355, "step": 15144 }, { "epoch": 8.460893854748603, "grad_norm": 0.4521818459033966, "learning_rate": 0.0005788795518207282, "loss": 0.4395, "step": 15145 }, { "epoch": 8.461452513966481, "grad_norm": 0.8326586484909058, "learning_rate": 0.0005788515406162466, "loss": 0.5878, "step": 15146 }, { "epoch": 8.462011173184358, "grad_norm": 2.5874366760253906, "learning_rate": 0.0005788235294117648, "loss": 0.3954, "step": 15147 }, { "epoch": 8.462569832402234, "grad_norm": 0.4896738529205322, "learning_rate": 0.000578795518207283, "loss": 0.4108, "step": 15148 }, { "epoch": 8.463128491620111, "grad_norm": 0.4276901185512543, "learning_rate": 0.0005787675070028011, "loss": 0.4182, "step": 15149 }, { "epoch": 8.46368715083799, "grad_norm": 0.9241102337837219, "learning_rate": 0.0005787394957983193, "loss": 0.4939, "step": 15150 }, { "epoch": 8.464245810055866, "grad_norm": 1.7278085947036743, "learning_rate": 0.0005787114845938376, "loss": 0.3167, "step": 15151 }, { "epoch": 8.464804469273743, "grad_norm": 0.516864538192749, "learning_rate": 0.0005786834733893558, "loss": 0.369, "step": 15152 }, { "epoch": 8.46536312849162, "grad_norm": 0.47903817892074585, "learning_rate": 0.000578655462184874, "loss": 0.4597, "step": 15153 }, { "epoch": 8.465921787709497, "grad_norm": 0.6159040927886963, "learning_rate": 0.0005786274509803922, "loss": 0.3888, "step": 15154 }, { "epoch": 8.466480446927374, "grad_norm": 0.35229989886283875, "learning_rate": 0.0005785994397759103, "loss": 0.323, "step": 15155 }, { "epoch": 8.46703910614525, "grad_norm": 0.4384755492210388, "learning_rate": 0.0005785714285714286, "loss": 0.3895, "step": 15156 }, { "epoch": 8.467597765363129, "grad_norm": 0.6398372054100037, "learning_rate": 0.0005785434173669468, "loss": 0.4142, "step": 15157 }, { "epoch": 8.468156424581005, "grad_norm": 0.47204533219337463, "learning_rate": 0.000578515406162465, "loss": 0.3795, "step": 15158 }, { "epoch": 8.468715083798882, "grad_norm": 0.6622194051742554, "learning_rate": 0.0005784873949579832, "loss": 0.4378, "step": 15159 }, { "epoch": 8.46927374301676, "grad_norm": 1.2734688520431519, "learning_rate": 0.0005784593837535013, "loss": 0.4494, "step": 15160 }, { "epoch": 8.469832402234637, "grad_norm": 0.9295127987861633, "learning_rate": 0.0005784313725490196, "loss": 0.6095, "step": 15161 }, { "epoch": 8.470391061452514, "grad_norm": 0.6259679794311523, "learning_rate": 0.0005784033613445379, "loss": 0.4424, "step": 15162 }, { "epoch": 8.470949720670392, "grad_norm": 1.059126853942871, "learning_rate": 0.0005783753501400561, "loss": 0.3123, "step": 15163 }, { "epoch": 8.471508379888268, "grad_norm": 0.5010936260223389, "learning_rate": 0.0005783473389355743, "loss": 0.4049, "step": 15164 }, { "epoch": 8.472067039106145, "grad_norm": 0.5045092701911926, "learning_rate": 0.0005783193277310924, "loss": 0.4235, "step": 15165 }, { "epoch": 8.472625698324022, "grad_norm": 0.3551951050758362, "learning_rate": 0.0005782913165266107, "loss": 0.344, "step": 15166 }, { "epoch": 8.4731843575419, "grad_norm": 0.6202253103256226, "learning_rate": 0.0005782633053221289, "loss": 0.4448, "step": 15167 }, { "epoch": 8.473743016759776, "grad_norm": 0.5454356074333191, "learning_rate": 0.0005782352941176471, "loss": 0.5295, "step": 15168 }, { "epoch": 8.474301675977653, "grad_norm": 0.41051018238067627, "learning_rate": 0.0005782072829131653, "loss": 0.3878, "step": 15169 }, { "epoch": 8.474860335195531, "grad_norm": 0.4879007637500763, "learning_rate": 0.0005781792717086835, "loss": 0.4787, "step": 15170 }, { "epoch": 8.475418994413408, "grad_norm": 0.7446616888046265, "learning_rate": 0.0005781512605042017, "loss": 0.4479, "step": 15171 }, { "epoch": 8.475977653631285, "grad_norm": 1.3310225009918213, "learning_rate": 0.0005781232492997199, "loss": 0.4161, "step": 15172 }, { "epoch": 8.476536312849163, "grad_norm": 0.4325145184993744, "learning_rate": 0.0005780952380952381, "loss": 0.3515, "step": 15173 }, { "epoch": 8.47709497206704, "grad_norm": 0.5783650875091553, "learning_rate": 0.0005780672268907563, "loss": 0.5555, "step": 15174 }, { "epoch": 8.477653631284916, "grad_norm": 0.6773547530174255, "learning_rate": 0.0005780392156862745, "loss": 0.5505, "step": 15175 }, { "epoch": 8.478212290502793, "grad_norm": 0.47681915760040283, "learning_rate": 0.0005780112044817926, "loss": 0.3965, "step": 15176 }, { "epoch": 8.478770949720671, "grad_norm": 0.8694610595703125, "learning_rate": 0.0005779831932773109, "loss": 0.4967, "step": 15177 }, { "epoch": 8.479329608938547, "grad_norm": 0.7414939403533936, "learning_rate": 0.0005779551820728291, "loss": 0.3652, "step": 15178 }, { "epoch": 8.479888268156424, "grad_norm": 0.7186941504478455, "learning_rate": 0.0005779271708683474, "loss": 0.5713, "step": 15179 }, { "epoch": 8.480446927374302, "grad_norm": 0.6920742392539978, "learning_rate": 0.0005778991596638656, "loss": 0.5204, "step": 15180 }, { "epoch": 8.481005586592179, "grad_norm": 0.4140740633010864, "learning_rate": 0.0005778711484593837, "loss": 0.4269, "step": 15181 }, { "epoch": 8.481564245810056, "grad_norm": 0.7242079973220825, "learning_rate": 0.000577843137254902, "loss": 0.5297, "step": 15182 }, { "epoch": 8.482122905027932, "grad_norm": 6.258434295654297, "learning_rate": 0.0005778151260504202, "loss": 0.4646, "step": 15183 }, { "epoch": 8.48268156424581, "grad_norm": 0.4407448470592499, "learning_rate": 0.0005777871148459384, "loss": 0.2855, "step": 15184 }, { "epoch": 8.483240223463687, "grad_norm": 0.5108163356781006, "learning_rate": 0.0005777591036414566, "loss": 0.4594, "step": 15185 }, { "epoch": 8.483798882681564, "grad_norm": 0.4854128360748291, "learning_rate": 0.0005777310924369748, "loss": 0.4463, "step": 15186 }, { "epoch": 8.484357541899442, "grad_norm": 0.5266446471214294, "learning_rate": 0.000577703081232493, "loss": 0.3882, "step": 15187 }, { "epoch": 8.484916201117318, "grad_norm": 2.322105884552002, "learning_rate": 0.0005776750700280112, "loss": 0.3792, "step": 15188 }, { "epoch": 8.485474860335195, "grad_norm": 0.515484631061554, "learning_rate": 0.0005776470588235294, "loss": 0.4347, "step": 15189 }, { "epoch": 8.486033519553073, "grad_norm": 0.5316684246063232, "learning_rate": 0.0005776190476190476, "loss": 0.3438, "step": 15190 }, { "epoch": 8.48659217877095, "grad_norm": 0.4150233268737793, "learning_rate": 0.0005775910364145658, "loss": 0.3617, "step": 15191 }, { "epoch": 8.487150837988827, "grad_norm": 1.9137550592422485, "learning_rate": 0.000577563025210084, "loss": 0.6027, "step": 15192 }, { "epoch": 8.487709497206703, "grad_norm": 0.5668200254440308, "learning_rate": 0.0005775350140056022, "loss": 0.4251, "step": 15193 }, { "epoch": 8.488268156424581, "grad_norm": 1.2568035125732422, "learning_rate": 0.0005775070028011204, "loss": 0.36, "step": 15194 }, { "epoch": 8.488826815642458, "grad_norm": 4.105048656463623, "learning_rate": 0.0005774789915966387, "loss": 0.4748, "step": 15195 }, { "epoch": 8.489385474860335, "grad_norm": 0.5091862082481384, "learning_rate": 0.0005774509803921569, "loss": 0.3959, "step": 15196 }, { "epoch": 8.489944134078213, "grad_norm": 0.6350382566452026, "learning_rate": 0.0005774229691876752, "loss": 0.7157, "step": 15197 }, { "epoch": 8.49050279329609, "grad_norm": 0.47594332695007324, "learning_rate": 0.0005773949579831933, "loss": 0.4638, "step": 15198 }, { "epoch": 8.491061452513966, "grad_norm": 0.5144283771514893, "learning_rate": 0.0005773669467787115, "loss": 0.4744, "step": 15199 }, { "epoch": 8.491620111731844, "grad_norm": 0.9460113048553467, "learning_rate": 0.0005773389355742297, "loss": 0.3787, "step": 15200 }, { "epoch": 8.492178770949721, "grad_norm": 0.5416004061698914, "learning_rate": 0.0005773109243697479, "loss": 0.4765, "step": 15201 }, { "epoch": 8.492737430167598, "grad_norm": 0.3924878239631653, "learning_rate": 0.0005772829131652662, "loss": 0.4494, "step": 15202 }, { "epoch": 8.493296089385474, "grad_norm": 0.4454866349697113, "learning_rate": 0.0005772549019607843, "loss": 0.3877, "step": 15203 }, { "epoch": 8.493854748603352, "grad_norm": 0.9776567220687866, "learning_rate": 0.0005772268907563025, "loss": 0.3407, "step": 15204 }, { "epoch": 8.494413407821229, "grad_norm": 0.49780526757240295, "learning_rate": 0.0005771988795518207, "loss": 0.5092, "step": 15205 }, { "epoch": 8.494972067039106, "grad_norm": 0.4817385673522949, "learning_rate": 0.0005771708683473389, "loss": 0.4408, "step": 15206 }, { "epoch": 8.495530726256984, "grad_norm": 1.1811668872833252, "learning_rate": 0.0005771428571428572, "loss": 0.4696, "step": 15207 }, { "epoch": 8.49608938547486, "grad_norm": 0.5603366494178772, "learning_rate": 0.0005771148459383753, "loss": 0.4709, "step": 15208 }, { "epoch": 8.496648044692737, "grad_norm": 0.4623156189918518, "learning_rate": 0.0005770868347338935, "loss": 0.3893, "step": 15209 }, { "epoch": 8.497206703910614, "grad_norm": 0.47783979773521423, "learning_rate": 0.0005770588235294117, "loss": 0.4488, "step": 15210 }, { "epoch": 8.497765363128492, "grad_norm": 0.365688294172287, "learning_rate": 0.00057703081232493, "loss": 0.3015, "step": 15211 }, { "epoch": 8.498324022346369, "grad_norm": 0.3822142779827118, "learning_rate": 0.0005770028011204483, "loss": 0.422, "step": 15212 }, { "epoch": 8.498882681564245, "grad_norm": 0.6450942754745483, "learning_rate": 0.0005769747899159665, "loss": 0.4827, "step": 15213 }, { "epoch": 8.499441340782123, "grad_norm": 0.4435880780220032, "learning_rate": 0.0005769467787114846, "loss": 0.388, "step": 15214 }, { "epoch": 8.5, "grad_norm": 0.49928146600723267, "learning_rate": 0.0005769187675070028, "loss": 0.3566, "step": 15215 }, { "epoch": 8.500558659217877, "grad_norm": 0.7004232406616211, "learning_rate": 0.000576890756302521, "loss": 0.3701, "step": 15216 }, { "epoch": 8.501117318435755, "grad_norm": 0.48921772837638855, "learning_rate": 0.0005768627450980393, "loss": 0.4805, "step": 15217 }, { "epoch": 8.501675977653631, "grad_norm": 0.35773390531539917, "learning_rate": 0.0005768347338935575, "loss": 0.4482, "step": 15218 }, { "epoch": 8.502234636871508, "grad_norm": 0.7257769107818604, "learning_rate": 0.0005768067226890756, "loss": 0.5911, "step": 15219 }, { "epoch": 8.502793296089386, "grad_norm": 0.4031614363193512, "learning_rate": 0.0005767787114845938, "loss": 0.3966, "step": 15220 }, { "epoch": 8.503351955307263, "grad_norm": 0.334354966878891, "learning_rate": 0.000576750700280112, "loss": 0.4081, "step": 15221 }, { "epoch": 8.50391061452514, "grad_norm": 0.40169692039489746, "learning_rate": 0.0005767226890756303, "loss": 0.3978, "step": 15222 }, { "epoch": 8.504469273743016, "grad_norm": 0.8172411322593689, "learning_rate": 0.0005766946778711485, "loss": 0.3922, "step": 15223 }, { "epoch": 8.505027932960894, "grad_norm": 1.7994247674942017, "learning_rate": 0.0005766666666666666, "loss": 0.4577, "step": 15224 }, { "epoch": 8.505586592178771, "grad_norm": 0.5939977169036865, "learning_rate": 0.0005766386554621848, "loss": 0.4589, "step": 15225 }, { "epoch": 8.506145251396648, "grad_norm": 0.4617646038532257, "learning_rate": 0.000576610644257703, "loss": 0.5809, "step": 15226 }, { "epoch": 8.506703910614526, "grad_norm": 0.7643673419952393, "learning_rate": 0.0005765826330532214, "loss": 0.4111, "step": 15227 }, { "epoch": 8.507262569832402, "grad_norm": 0.6527523398399353, "learning_rate": 0.0005765546218487396, "loss": 0.5062, "step": 15228 }, { "epoch": 8.507821229050279, "grad_norm": 0.7315698862075806, "learning_rate": 0.0005765266106442578, "loss": 0.4736, "step": 15229 }, { "epoch": 8.508379888268156, "grad_norm": 0.92759108543396, "learning_rate": 0.0005764985994397759, "loss": 0.3637, "step": 15230 }, { "epoch": 8.508938547486034, "grad_norm": 0.3782186210155487, "learning_rate": 0.0005764705882352941, "loss": 0.4743, "step": 15231 }, { "epoch": 8.50949720670391, "grad_norm": 4.267871379852295, "learning_rate": 0.0005764425770308124, "loss": 0.4108, "step": 15232 }, { "epoch": 8.510055865921787, "grad_norm": 0.7674300074577332, "learning_rate": 0.0005764145658263306, "loss": 0.3554, "step": 15233 }, { "epoch": 8.510614525139665, "grad_norm": 0.5992588400840759, "learning_rate": 0.0005763865546218488, "loss": 0.5023, "step": 15234 }, { "epoch": 8.511173184357542, "grad_norm": 0.7348669767379761, "learning_rate": 0.0005763585434173669, "loss": 0.4499, "step": 15235 }, { "epoch": 8.511731843575419, "grad_norm": 0.8790232539176941, "learning_rate": 0.0005763305322128851, "loss": 0.3699, "step": 15236 }, { "epoch": 8.512290502793297, "grad_norm": 0.4855366349220276, "learning_rate": 0.0005763025210084034, "loss": 0.393, "step": 15237 }, { "epoch": 8.512849162011173, "grad_norm": 2.1212544441223145, "learning_rate": 0.0005762745098039216, "loss": 0.3952, "step": 15238 }, { "epoch": 8.51340782122905, "grad_norm": 0.5433536171913147, "learning_rate": 0.0005762464985994398, "loss": 0.37, "step": 15239 }, { "epoch": 8.513966480446927, "grad_norm": 0.7903736233711243, "learning_rate": 0.0005762184873949579, "loss": 0.4866, "step": 15240 }, { "epoch": 8.514525139664805, "grad_norm": 1.2631672620773315, "learning_rate": 0.0005761904761904761, "loss": 0.6079, "step": 15241 }, { "epoch": 8.515083798882682, "grad_norm": 3.1984617710113525, "learning_rate": 0.0005761624649859944, "loss": 0.386, "step": 15242 }, { "epoch": 8.515642458100558, "grad_norm": 0.6026468873023987, "learning_rate": 0.0005761344537815126, "loss": 0.4322, "step": 15243 }, { "epoch": 8.516201117318436, "grad_norm": 0.49546369910240173, "learning_rate": 0.0005761064425770309, "loss": 0.4695, "step": 15244 }, { "epoch": 8.516759776536313, "grad_norm": 0.6571424007415771, "learning_rate": 0.0005760784313725491, "loss": 0.4463, "step": 15245 }, { "epoch": 8.51731843575419, "grad_norm": 0.5202311277389526, "learning_rate": 0.0005760504201680672, "loss": 0.4609, "step": 15246 }, { "epoch": 8.517877094972068, "grad_norm": 0.5377156138420105, "learning_rate": 0.0005760224089635855, "loss": 0.5627, "step": 15247 }, { "epoch": 8.518435754189944, "grad_norm": 0.9329795837402344, "learning_rate": 0.0005759943977591037, "loss": 0.5134, "step": 15248 }, { "epoch": 8.518994413407821, "grad_norm": 1.0959504842758179, "learning_rate": 0.0005759663865546219, "loss": 0.3847, "step": 15249 }, { "epoch": 8.519553072625698, "grad_norm": 0.6080384254455566, "learning_rate": 0.0005759383753501401, "loss": 0.5454, "step": 15250 }, { "epoch": 8.520111731843576, "grad_norm": 0.5090042352676392, "learning_rate": 0.0005759103641456582, "loss": 0.4289, "step": 15251 }, { "epoch": 8.520670391061453, "grad_norm": 0.32931584119796753, "learning_rate": 0.0005758823529411765, "loss": 0.3426, "step": 15252 }, { "epoch": 8.521229050279329, "grad_norm": 0.721106767654419, "learning_rate": 0.0005758543417366947, "loss": 0.4052, "step": 15253 }, { "epoch": 8.521787709497207, "grad_norm": 0.5239723920822144, "learning_rate": 0.0005758263305322129, "loss": 0.4452, "step": 15254 }, { "epoch": 8.522346368715084, "grad_norm": 0.5963440537452698, "learning_rate": 0.0005757983193277311, "loss": 0.3637, "step": 15255 }, { "epoch": 8.52290502793296, "grad_norm": 0.709584653377533, "learning_rate": 0.0005757703081232492, "loss": 0.4127, "step": 15256 }, { "epoch": 8.523463687150837, "grad_norm": 0.6826097369194031, "learning_rate": 0.0005757422969187675, "loss": 0.4371, "step": 15257 }, { "epoch": 8.524022346368715, "grad_norm": 0.4796157479286194, "learning_rate": 0.0005757142857142857, "loss": 0.4362, "step": 15258 }, { "epoch": 8.524581005586592, "grad_norm": 0.530832052230835, "learning_rate": 0.0005756862745098039, "loss": 0.447, "step": 15259 }, { "epoch": 8.525139664804469, "grad_norm": 0.5507177114486694, "learning_rate": 0.0005756582633053221, "loss": 0.5301, "step": 15260 }, { "epoch": 8.525698324022347, "grad_norm": 0.45665866136550903, "learning_rate": 0.0005756302521008404, "loss": 0.4246, "step": 15261 }, { "epoch": 8.526256983240224, "grad_norm": 1.238662838935852, "learning_rate": 0.0005756022408963586, "loss": 0.3877, "step": 15262 }, { "epoch": 8.5268156424581, "grad_norm": 0.5967044234275818, "learning_rate": 0.0005755742296918768, "loss": 0.4119, "step": 15263 }, { "epoch": 8.527374301675978, "grad_norm": 1.104540467262268, "learning_rate": 0.000575546218487395, "loss": 0.3732, "step": 15264 }, { "epoch": 8.527932960893855, "grad_norm": 0.39002472162246704, "learning_rate": 0.0005755182072829132, "loss": 0.4222, "step": 15265 }, { "epoch": 8.528491620111732, "grad_norm": 1.3714148998260498, "learning_rate": 0.0005754901960784314, "loss": 0.3174, "step": 15266 }, { "epoch": 8.529050279329608, "grad_norm": 0.40640711784362793, "learning_rate": 0.0005754621848739496, "loss": 0.4269, "step": 15267 }, { "epoch": 8.529608938547486, "grad_norm": 0.4001014530658722, "learning_rate": 0.0005754341736694678, "loss": 0.4606, "step": 15268 }, { "epoch": 8.530167597765363, "grad_norm": 0.6572843194007874, "learning_rate": 0.000575406162464986, "loss": 0.4876, "step": 15269 }, { "epoch": 8.53072625698324, "grad_norm": 0.5424188375473022, "learning_rate": 0.0005753781512605042, "loss": 0.4822, "step": 15270 }, { "epoch": 8.531284916201118, "grad_norm": 0.3950853943824768, "learning_rate": 0.0005753501400560224, "loss": 0.4627, "step": 15271 }, { "epoch": 8.531843575418995, "grad_norm": 0.580479085445404, "learning_rate": 0.0005753221288515406, "loss": 0.5232, "step": 15272 }, { "epoch": 8.532402234636871, "grad_norm": 0.5094438791275024, "learning_rate": 0.0005752941176470588, "loss": 0.5471, "step": 15273 }, { "epoch": 8.53296089385475, "grad_norm": 2.7602362632751465, "learning_rate": 0.000575266106442577, "loss": 0.4116, "step": 15274 }, { "epoch": 8.533519553072626, "grad_norm": 0.4748554527759552, "learning_rate": 0.0005752380952380952, "loss": 0.4031, "step": 15275 }, { "epoch": 8.534078212290503, "grad_norm": 0.48180916905403137, "learning_rate": 0.0005752100840336134, "loss": 0.4343, "step": 15276 }, { "epoch": 8.53463687150838, "grad_norm": 2.5633301734924316, "learning_rate": 0.0005751820728291318, "loss": 0.4275, "step": 15277 }, { "epoch": 8.535195530726257, "grad_norm": 1.068892478942871, "learning_rate": 0.0005751540616246499, "loss": 0.418, "step": 15278 }, { "epoch": 8.535754189944134, "grad_norm": 0.4912353456020355, "learning_rate": 0.0005751260504201681, "loss": 0.3105, "step": 15279 }, { "epoch": 8.53631284916201, "grad_norm": 1.028026819229126, "learning_rate": 0.0005750980392156863, "loss": 0.4586, "step": 15280 }, { "epoch": 8.536871508379889, "grad_norm": 0.45351430773735046, "learning_rate": 0.0005750700280112045, "loss": 0.5092, "step": 15281 }, { "epoch": 8.537430167597766, "grad_norm": 0.5421093106269836, "learning_rate": 0.0005750420168067228, "loss": 0.5087, "step": 15282 }, { "epoch": 8.537988826815642, "grad_norm": 0.706861674785614, "learning_rate": 0.0005750140056022409, "loss": 0.3875, "step": 15283 }, { "epoch": 8.538547486033519, "grad_norm": 0.4213111698627472, "learning_rate": 0.0005749859943977591, "loss": 0.4406, "step": 15284 }, { "epoch": 8.539106145251397, "grad_norm": 0.7430433630943298, "learning_rate": 0.0005749579831932773, "loss": 0.5058, "step": 15285 }, { "epoch": 8.539664804469274, "grad_norm": 1.3065729141235352, "learning_rate": 0.0005749299719887955, "loss": 0.4532, "step": 15286 }, { "epoch": 8.54022346368715, "grad_norm": 0.45534664392471313, "learning_rate": 0.0005749019607843138, "loss": 0.399, "step": 15287 }, { "epoch": 8.540782122905028, "grad_norm": 0.6419287919998169, "learning_rate": 0.0005748739495798319, "loss": 0.4442, "step": 15288 }, { "epoch": 8.541340782122905, "grad_norm": 0.8328287601470947, "learning_rate": 0.0005748459383753501, "loss": 0.3241, "step": 15289 }, { "epoch": 8.541899441340782, "grad_norm": 0.6678518652915955, "learning_rate": 0.0005748179271708683, "loss": 0.4441, "step": 15290 }, { "epoch": 8.54245810055866, "grad_norm": 0.4900563359260559, "learning_rate": 0.0005747899159663865, "loss": 0.5411, "step": 15291 }, { "epoch": 8.543016759776537, "grad_norm": 0.415881484746933, "learning_rate": 0.0005747619047619048, "loss": 0.3548, "step": 15292 }, { "epoch": 8.543575418994413, "grad_norm": 0.48712825775146484, "learning_rate": 0.0005747338935574231, "loss": 0.417, "step": 15293 }, { "epoch": 8.544134078212291, "grad_norm": 0.5948696136474609, "learning_rate": 0.0005747058823529412, "loss": 0.5534, "step": 15294 }, { "epoch": 8.544692737430168, "grad_norm": 0.6844908595085144, "learning_rate": 0.0005746778711484594, "loss": 0.3397, "step": 15295 }, { "epoch": 8.545251396648045, "grad_norm": 0.516048014163971, "learning_rate": 0.0005746498599439776, "loss": 0.4157, "step": 15296 }, { "epoch": 8.545810055865921, "grad_norm": 0.37359654903411865, "learning_rate": 0.0005746218487394959, "loss": 0.3686, "step": 15297 }, { "epoch": 8.5463687150838, "grad_norm": 0.4231356680393219, "learning_rate": 0.0005745938375350141, "loss": 0.3044, "step": 15298 }, { "epoch": 8.546927374301676, "grad_norm": 0.5538113117218018, "learning_rate": 0.0005745658263305322, "loss": 0.394, "step": 15299 }, { "epoch": 8.547486033519553, "grad_norm": 0.44817057251930237, "learning_rate": 0.0005745378151260504, "loss": 0.4676, "step": 15300 }, { "epoch": 8.548044692737431, "grad_norm": 0.5843313932418823, "learning_rate": 0.0005745098039215686, "loss": 0.4392, "step": 15301 }, { "epoch": 8.548603351955308, "grad_norm": 0.5026608109474182, "learning_rate": 0.0005744817927170869, "loss": 0.4901, "step": 15302 }, { "epoch": 8.549162011173184, "grad_norm": 0.5650805234909058, "learning_rate": 0.0005744537815126051, "loss": 0.4373, "step": 15303 }, { "epoch": 8.54972067039106, "grad_norm": 0.4040001332759857, "learning_rate": 0.0005744257703081232, "loss": 0.323, "step": 15304 }, { "epoch": 8.550279329608939, "grad_norm": 1.3389577865600586, "learning_rate": 0.0005743977591036414, "loss": 0.3031, "step": 15305 }, { "epoch": 8.550837988826816, "grad_norm": 1.3753235340118408, "learning_rate": 0.0005743697478991596, "loss": 0.4942, "step": 15306 }, { "epoch": 8.551396648044692, "grad_norm": 0.437633752822876, "learning_rate": 0.0005743417366946779, "loss": 0.4945, "step": 15307 }, { "epoch": 8.55195530726257, "grad_norm": 1.9556528329849243, "learning_rate": 0.0005743137254901961, "loss": 0.4551, "step": 15308 }, { "epoch": 8.552513966480447, "grad_norm": 0.35048234462738037, "learning_rate": 0.0005742857142857144, "loss": 0.4185, "step": 15309 }, { "epoch": 8.553072625698324, "grad_norm": 0.44669902324676514, "learning_rate": 0.0005742577030812324, "loss": 0.4259, "step": 15310 }, { "epoch": 8.553631284916202, "grad_norm": 3.903268814086914, "learning_rate": 0.0005742296918767507, "loss": 0.5387, "step": 15311 }, { "epoch": 8.554189944134079, "grad_norm": 0.6867513656616211, "learning_rate": 0.000574201680672269, "loss": 0.4175, "step": 15312 }, { "epoch": 8.554748603351955, "grad_norm": 0.5278183817863464, "learning_rate": 0.0005741736694677872, "loss": 0.5153, "step": 15313 }, { "epoch": 8.555307262569832, "grad_norm": 2.99004864692688, "learning_rate": 0.0005741456582633054, "loss": 0.4439, "step": 15314 }, { "epoch": 8.55586592178771, "grad_norm": 0.5834138989448547, "learning_rate": 0.0005741176470588235, "loss": 0.4667, "step": 15315 }, { "epoch": 8.556424581005587, "grad_norm": 0.4948843717575073, "learning_rate": 0.0005740896358543417, "loss": 0.4084, "step": 15316 }, { "epoch": 8.556983240223463, "grad_norm": 1.5831941366195679, "learning_rate": 0.00057406162464986, "loss": 0.5034, "step": 15317 }, { "epoch": 8.557541899441341, "grad_norm": 0.47506552934646606, "learning_rate": 0.0005740336134453782, "loss": 0.4968, "step": 15318 }, { "epoch": 8.558100558659218, "grad_norm": 0.40010133385658264, "learning_rate": 0.0005740056022408964, "loss": 0.4323, "step": 15319 }, { "epoch": 8.558659217877095, "grad_norm": 0.46140024065971375, "learning_rate": 0.0005739775910364145, "loss": 0.522, "step": 15320 }, { "epoch": 8.559217877094973, "grad_norm": 0.8153825402259827, "learning_rate": 0.0005739495798319327, "loss": 0.5142, "step": 15321 }, { "epoch": 8.55977653631285, "grad_norm": 0.6610863208770752, "learning_rate": 0.000573921568627451, "loss": 0.4634, "step": 15322 }, { "epoch": 8.560335195530726, "grad_norm": 0.4047147035598755, "learning_rate": 0.0005738935574229692, "loss": 0.4321, "step": 15323 }, { "epoch": 8.560893854748603, "grad_norm": 0.5521747469902039, "learning_rate": 0.0005738655462184874, "loss": 0.3799, "step": 15324 }, { "epoch": 8.561452513966481, "grad_norm": 0.47687071561813354, "learning_rate": 0.0005738375350140056, "loss": 0.4006, "step": 15325 }, { "epoch": 8.562011173184358, "grad_norm": 0.38448652625083923, "learning_rate": 0.0005738095238095237, "loss": 0.3422, "step": 15326 }, { "epoch": 8.562569832402234, "grad_norm": 0.4361882209777832, "learning_rate": 0.0005737815126050421, "loss": 0.4377, "step": 15327 }, { "epoch": 8.563128491620112, "grad_norm": 0.544578492641449, "learning_rate": 0.0005737535014005603, "loss": 0.4607, "step": 15328 }, { "epoch": 8.563687150837989, "grad_norm": 0.37169408798217773, "learning_rate": 0.0005737254901960785, "loss": 0.3756, "step": 15329 }, { "epoch": 8.564245810055866, "grad_norm": 0.5007482767105103, "learning_rate": 0.0005736974789915967, "loss": 0.4422, "step": 15330 }, { "epoch": 8.564804469273742, "grad_norm": 0.4904552400112152, "learning_rate": 0.0005736694677871148, "loss": 0.3253, "step": 15331 }, { "epoch": 8.56536312849162, "grad_norm": 1.5046217441558838, "learning_rate": 0.0005736414565826331, "loss": 0.4161, "step": 15332 }, { "epoch": 8.565921787709497, "grad_norm": 0.6295437216758728, "learning_rate": 0.0005736134453781513, "loss": 0.4532, "step": 15333 }, { "epoch": 8.566480446927374, "grad_norm": 0.5816341042518616, "learning_rate": 0.0005735854341736695, "loss": 0.3213, "step": 15334 }, { "epoch": 8.567039106145252, "grad_norm": 0.4504592716693878, "learning_rate": 0.0005735574229691877, "loss": 0.4142, "step": 15335 }, { "epoch": 8.567597765363129, "grad_norm": 0.4077862799167633, "learning_rate": 0.0005735294117647058, "loss": 0.363, "step": 15336 }, { "epoch": 8.568156424581005, "grad_norm": 0.47129207849502563, "learning_rate": 0.0005735014005602241, "loss": 0.372, "step": 15337 }, { "epoch": 8.568715083798883, "grad_norm": 1.7956185340881348, "learning_rate": 0.0005734733893557423, "loss": 0.5204, "step": 15338 }, { "epoch": 8.56927374301676, "grad_norm": 0.42570939660072327, "learning_rate": 0.0005734453781512605, "loss": 0.5525, "step": 15339 }, { "epoch": 8.569832402234637, "grad_norm": 0.38253700733184814, "learning_rate": 0.0005734173669467787, "loss": 0.3981, "step": 15340 }, { "epoch": 8.570391061452513, "grad_norm": 0.5737777948379517, "learning_rate": 0.0005733893557422969, "loss": 0.6334, "step": 15341 }, { "epoch": 8.570949720670392, "grad_norm": 0.552773654460907, "learning_rate": 0.0005733613445378151, "loss": 0.4469, "step": 15342 }, { "epoch": 8.571508379888268, "grad_norm": 0.5476149320602417, "learning_rate": 0.0005733333333333334, "loss": 0.3532, "step": 15343 }, { "epoch": 8.572067039106145, "grad_norm": 0.7920732498168945, "learning_rate": 0.0005733053221288516, "loss": 0.5412, "step": 15344 }, { "epoch": 8.572625698324023, "grad_norm": 0.4449453055858612, "learning_rate": 0.0005732773109243698, "loss": 0.4464, "step": 15345 }, { "epoch": 8.5731843575419, "grad_norm": 0.5351168513298035, "learning_rate": 0.000573249299719888, "loss": 0.4083, "step": 15346 }, { "epoch": 8.573743016759776, "grad_norm": 0.5519024729728699, "learning_rate": 0.0005732212885154062, "loss": 0.4925, "step": 15347 }, { "epoch": 8.574301675977654, "grad_norm": 1.1811132431030273, "learning_rate": 0.0005731932773109244, "loss": 0.4455, "step": 15348 }, { "epoch": 8.574860335195531, "grad_norm": 3.202483654022217, "learning_rate": 0.0005731652661064426, "loss": 0.5439, "step": 15349 }, { "epoch": 8.575418994413408, "grad_norm": 0.559126079082489, "learning_rate": 0.0005731372549019608, "loss": 0.4076, "step": 15350 }, { "epoch": 8.575977653631284, "grad_norm": 0.5949681997299194, "learning_rate": 0.000573109243697479, "loss": 0.5334, "step": 15351 }, { "epoch": 8.576536312849163, "grad_norm": 1.6870633363723755, "learning_rate": 0.0005730812324929972, "loss": 0.43, "step": 15352 }, { "epoch": 8.577094972067039, "grad_norm": 1.1216844320297241, "learning_rate": 0.0005730532212885154, "loss": 0.4933, "step": 15353 }, { "epoch": 8.577653631284916, "grad_norm": 0.5340660810470581, "learning_rate": 0.0005730252100840336, "loss": 0.4221, "step": 15354 }, { "epoch": 8.578212290502794, "grad_norm": 1.3156418800354004, "learning_rate": 0.0005729971988795518, "loss": 0.6627, "step": 15355 }, { "epoch": 8.57877094972067, "grad_norm": 0.49273672699928284, "learning_rate": 0.00057296918767507, "loss": 0.4833, "step": 15356 }, { "epoch": 8.579329608938547, "grad_norm": 6.776882648468018, "learning_rate": 0.0005729411764705883, "loss": 0.4101, "step": 15357 }, { "epoch": 8.579888268156424, "grad_norm": 0.3707313537597656, "learning_rate": 0.0005729131652661064, "loss": 0.3967, "step": 15358 }, { "epoch": 8.580446927374302, "grad_norm": 0.7379883527755737, "learning_rate": 0.0005728851540616247, "loss": 0.4327, "step": 15359 }, { "epoch": 8.581005586592179, "grad_norm": 0.7171437740325928, "learning_rate": 0.0005728571428571429, "loss": 0.4237, "step": 15360 }, { "epoch": 8.581564245810055, "grad_norm": 0.5564234256744385, "learning_rate": 0.0005728291316526611, "loss": 0.6196, "step": 15361 }, { "epoch": 8.582122905027934, "grad_norm": 0.6342710852622986, "learning_rate": 0.0005728011204481794, "loss": 0.5136, "step": 15362 }, { "epoch": 8.58268156424581, "grad_norm": 0.4498765170574188, "learning_rate": 0.0005727731092436975, "loss": 0.4119, "step": 15363 }, { "epoch": 8.583240223463687, "grad_norm": 0.5579925179481506, "learning_rate": 0.0005727450980392157, "loss": 0.4959, "step": 15364 }, { "epoch": 8.583798882681565, "grad_norm": 0.4603900611400604, "learning_rate": 0.0005727170868347339, "loss": 0.4239, "step": 15365 }, { "epoch": 8.584357541899442, "grad_norm": 0.616898238658905, "learning_rate": 0.0005726890756302521, "loss": 0.3897, "step": 15366 }, { "epoch": 8.584916201117318, "grad_norm": 0.8591034412384033, "learning_rate": 0.0005726610644257704, "loss": 0.4368, "step": 15367 }, { "epoch": 8.585474860335196, "grad_norm": 1.5668877363204956, "learning_rate": 0.0005726330532212885, "loss": 0.4951, "step": 15368 }, { "epoch": 8.586033519553073, "grad_norm": 0.7134303450584412, "learning_rate": 0.0005726050420168067, "loss": 0.3598, "step": 15369 }, { "epoch": 8.58659217877095, "grad_norm": 0.6393819451332092, "learning_rate": 0.0005725770308123249, "loss": 0.4035, "step": 15370 }, { "epoch": 8.587150837988826, "grad_norm": 0.3917793333530426, "learning_rate": 0.0005725490196078431, "loss": 0.4271, "step": 15371 }, { "epoch": 8.587709497206705, "grad_norm": 0.3668496310710907, "learning_rate": 0.0005725210084033614, "loss": 0.4282, "step": 15372 }, { "epoch": 8.588268156424581, "grad_norm": 0.48604726791381836, "learning_rate": 0.0005724929971988796, "loss": 0.3779, "step": 15373 }, { "epoch": 8.588826815642458, "grad_norm": 0.469411700963974, "learning_rate": 0.0005724649859943977, "loss": 0.506, "step": 15374 }, { "epoch": 8.589385474860336, "grad_norm": 1.0414034128189087, "learning_rate": 0.000572436974789916, "loss": 0.58, "step": 15375 }, { "epoch": 8.589944134078213, "grad_norm": 0.650741696357727, "learning_rate": 0.0005724089635854342, "loss": 0.4933, "step": 15376 }, { "epoch": 8.59050279329609, "grad_norm": 1.2917307615280151, "learning_rate": 0.0005723809523809525, "loss": 0.486, "step": 15377 }, { "epoch": 8.591061452513966, "grad_norm": 0.5404037833213806, "learning_rate": 0.0005723529411764707, "loss": 0.5444, "step": 15378 }, { "epoch": 8.591620111731844, "grad_norm": 0.5045173764228821, "learning_rate": 0.0005723249299719888, "loss": 0.4291, "step": 15379 }, { "epoch": 8.59217877094972, "grad_norm": 0.4748923182487488, "learning_rate": 0.000572296918767507, "loss": 0.5593, "step": 15380 }, { "epoch": 8.592737430167597, "grad_norm": 0.4588840901851654, "learning_rate": 0.0005722689075630252, "loss": 0.4488, "step": 15381 }, { "epoch": 8.593296089385476, "grad_norm": 0.4534672200679779, "learning_rate": 0.0005722408963585435, "loss": 0.5363, "step": 15382 }, { "epoch": 8.593854748603352, "grad_norm": 0.5587313771247864, "learning_rate": 0.0005722128851540617, "loss": 0.3772, "step": 15383 }, { "epoch": 8.594413407821229, "grad_norm": 0.5406129956245422, "learning_rate": 0.0005721848739495798, "loss": 0.4719, "step": 15384 }, { "epoch": 8.594972067039105, "grad_norm": 0.46785107254981995, "learning_rate": 0.000572156862745098, "loss": 0.5067, "step": 15385 }, { "epoch": 8.595530726256984, "grad_norm": 0.3882419466972351, "learning_rate": 0.0005721288515406162, "loss": 0.3249, "step": 15386 }, { "epoch": 8.59608938547486, "grad_norm": 0.696916401386261, "learning_rate": 0.0005721008403361345, "loss": 0.5048, "step": 15387 }, { "epoch": 8.596648044692737, "grad_norm": 0.7198442220687866, "learning_rate": 0.0005720728291316527, "loss": 0.3699, "step": 15388 }, { "epoch": 8.597206703910615, "grad_norm": 0.5968286395072937, "learning_rate": 0.0005720448179271709, "loss": 0.4135, "step": 15389 }, { "epoch": 8.597765363128492, "grad_norm": 0.4334351122379303, "learning_rate": 0.000572016806722689, "loss": 0.3991, "step": 15390 }, { "epoch": 8.598324022346368, "grad_norm": 4.028077602386475, "learning_rate": 0.0005719887955182072, "loss": 0.4556, "step": 15391 }, { "epoch": 8.598882681564247, "grad_norm": 0.38883304595947266, "learning_rate": 0.0005719607843137256, "loss": 0.3998, "step": 15392 }, { "epoch": 8.599441340782123, "grad_norm": 0.47652119398117065, "learning_rate": 0.0005719327731092438, "loss": 0.5709, "step": 15393 }, { "epoch": 8.6, "grad_norm": 0.48822495341300964, "learning_rate": 0.000571904761904762, "loss": 0.3295, "step": 15394 }, { "epoch": 8.600558659217878, "grad_norm": 0.5199114680290222, "learning_rate": 0.0005718767507002801, "loss": 0.4382, "step": 15395 }, { "epoch": 8.601117318435755, "grad_norm": 0.4140242338180542, "learning_rate": 0.0005718487394957983, "loss": 0.3759, "step": 15396 }, { "epoch": 8.601675977653631, "grad_norm": 9.742976188659668, "learning_rate": 0.0005718207282913165, "loss": 0.455, "step": 15397 }, { "epoch": 8.602234636871508, "grad_norm": 0.5877695679664612, "learning_rate": 0.0005717927170868348, "loss": 0.3994, "step": 15398 }, { "epoch": 8.602793296089386, "grad_norm": 0.5117267370223999, "learning_rate": 0.000571764705882353, "loss": 0.4464, "step": 15399 }, { "epoch": 8.603351955307263, "grad_norm": 0.3696885108947754, "learning_rate": 0.0005717366946778711, "loss": 0.3576, "step": 15400 }, { "epoch": 8.60391061452514, "grad_norm": 3.027294397354126, "learning_rate": 0.0005717086834733893, "loss": 0.4177, "step": 15401 }, { "epoch": 8.604469273743018, "grad_norm": 0.7337448000907898, "learning_rate": 0.0005716806722689075, "loss": 0.2937, "step": 15402 }, { "epoch": 8.605027932960894, "grad_norm": 1.5567195415496826, "learning_rate": 0.0005716526610644258, "loss": 0.3165, "step": 15403 }, { "epoch": 8.60558659217877, "grad_norm": 0.7158293724060059, "learning_rate": 0.000571624649859944, "loss": 0.5073, "step": 15404 }, { "epoch": 8.606145251396647, "grad_norm": 3.5723652839660645, "learning_rate": 0.0005715966386554622, "loss": 0.4221, "step": 15405 }, { "epoch": 8.606703910614526, "grad_norm": 0.4587322175502777, "learning_rate": 0.0005715686274509803, "loss": 0.4709, "step": 15406 }, { "epoch": 8.607262569832402, "grad_norm": 0.5292487740516663, "learning_rate": 0.0005715406162464985, "loss": 0.4456, "step": 15407 }, { "epoch": 8.607821229050279, "grad_norm": 0.5739074945449829, "learning_rate": 0.0005715126050420169, "loss": 0.4395, "step": 15408 }, { "epoch": 8.608379888268157, "grad_norm": 0.46167340874671936, "learning_rate": 0.0005714845938375351, "loss": 0.4064, "step": 15409 }, { "epoch": 8.608938547486034, "grad_norm": 0.46245571970939636, "learning_rate": 0.0005714565826330533, "loss": 0.4121, "step": 15410 }, { "epoch": 8.60949720670391, "grad_norm": 0.4056048095226288, "learning_rate": 0.0005714285714285714, "loss": 0.4483, "step": 15411 }, { "epoch": 8.610055865921789, "grad_norm": 0.6184943914413452, "learning_rate": 0.0005714005602240896, "loss": 0.6366, "step": 15412 }, { "epoch": 8.610614525139665, "grad_norm": 0.33188676834106445, "learning_rate": 0.0005713725490196079, "loss": 0.4143, "step": 15413 }, { "epoch": 8.611173184357542, "grad_norm": 0.3528975546360016, "learning_rate": 0.0005713445378151261, "loss": 0.3827, "step": 15414 }, { "epoch": 8.611731843575418, "grad_norm": 0.5689566135406494, "learning_rate": 0.0005713165266106443, "loss": 0.6413, "step": 15415 }, { "epoch": 8.612290502793297, "grad_norm": 0.4984848201274872, "learning_rate": 0.0005712885154061624, "loss": 0.4425, "step": 15416 }, { "epoch": 8.612849162011173, "grad_norm": 0.5701281428337097, "learning_rate": 0.0005712605042016806, "loss": 0.4005, "step": 15417 }, { "epoch": 8.61340782122905, "grad_norm": 0.8601603507995605, "learning_rate": 0.0005712324929971989, "loss": 0.5124, "step": 15418 }, { "epoch": 8.613966480446928, "grad_norm": 0.3771197199821472, "learning_rate": 0.0005712044817927171, "loss": 0.4134, "step": 15419 }, { "epoch": 8.614525139664805, "grad_norm": 0.5102972984313965, "learning_rate": 0.0005711764705882353, "loss": 0.3771, "step": 15420 }, { "epoch": 8.615083798882681, "grad_norm": 0.3841908872127533, "learning_rate": 0.0005711484593837535, "loss": 0.3402, "step": 15421 }, { "epoch": 8.61564245810056, "grad_norm": 17.929323196411133, "learning_rate": 0.0005711204481792716, "loss": 0.3302, "step": 15422 }, { "epoch": 8.616201117318436, "grad_norm": 0.5760158896446228, "learning_rate": 0.0005710924369747899, "loss": 0.498, "step": 15423 }, { "epoch": 8.616759776536313, "grad_norm": 0.5124297738075256, "learning_rate": 0.0005710644257703081, "loss": 0.5373, "step": 15424 }, { "epoch": 8.61731843575419, "grad_norm": 0.42361506819725037, "learning_rate": 0.0005710364145658264, "loss": 0.3442, "step": 15425 }, { "epoch": 8.617877094972068, "grad_norm": 0.9741013646125793, "learning_rate": 0.0005710084033613446, "loss": 0.364, "step": 15426 }, { "epoch": 8.618435754189944, "grad_norm": 0.663214385509491, "learning_rate": 0.0005709803921568627, "loss": 0.4551, "step": 15427 }, { "epoch": 8.61899441340782, "grad_norm": 0.5509231686592102, "learning_rate": 0.000570952380952381, "loss": 0.3593, "step": 15428 }, { "epoch": 8.619553072625699, "grad_norm": 0.5926401019096375, "learning_rate": 0.0005709243697478992, "loss": 0.5347, "step": 15429 }, { "epoch": 8.620111731843576, "grad_norm": 0.42184609174728394, "learning_rate": 0.0005708963585434174, "loss": 0.4203, "step": 15430 }, { "epoch": 8.620670391061452, "grad_norm": 0.5329546332359314, "learning_rate": 0.0005708683473389356, "loss": 0.4386, "step": 15431 }, { "epoch": 8.621229050279329, "grad_norm": 0.77520751953125, "learning_rate": 0.0005708403361344537, "loss": 0.3368, "step": 15432 }, { "epoch": 8.621787709497207, "grad_norm": 0.5103943943977356, "learning_rate": 0.000570812324929972, "loss": 0.5568, "step": 15433 }, { "epoch": 8.622346368715084, "grad_norm": 0.48108652234077454, "learning_rate": 0.0005707843137254902, "loss": 0.345, "step": 15434 }, { "epoch": 8.62290502793296, "grad_norm": 0.42470112442970276, "learning_rate": 0.0005707563025210084, "loss": 0.4091, "step": 15435 }, { "epoch": 8.623463687150839, "grad_norm": 0.4808240234851837, "learning_rate": 0.0005707282913165266, "loss": 0.4693, "step": 15436 }, { "epoch": 8.624022346368715, "grad_norm": 0.6123438477516174, "learning_rate": 0.0005707002801120448, "loss": 0.4776, "step": 15437 }, { "epoch": 8.624581005586592, "grad_norm": 0.43505755066871643, "learning_rate": 0.000570672268907563, "loss": 0.4637, "step": 15438 }, { "epoch": 8.62513966480447, "grad_norm": 0.5505017042160034, "learning_rate": 0.0005706442577030812, "loss": 0.4632, "step": 15439 }, { "epoch": 8.625698324022347, "grad_norm": 0.5906779170036316, "learning_rate": 0.0005706162464985994, "loss": 0.4499, "step": 15440 }, { "epoch": 8.626256983240223, "grad_norm": 0.45223239064216614, "learning_rate": 0.0005705882352941177, "loss": 0.3646, "step": 15441 }, { "epoch": 8.6268156424581, "grad_norm": 0.7143236398696899, "learning_rate": 0.0005705602240896359, "loss": 0.3868, "step": 15442 }, { "epoch": 8.627374301675978, "grad_norm": 0.39840784668922424, "learning_rate": 0.0005705322128851541, "loss": 0.4613, "step": 15443 }, { "epoch": 8.627932960893855, "grad_norm": 1.2329320907592773, "learning_rate": 0.0005705042016806723, "loss": 0.5497, "step": 15444 }, { "epoch": 8.628491620111731, "grad_norm": 0.6231057047843933, "learning_rate": 0.0005704761904761905, "loss": 0.4027, "step": 15445 }, { "epoch": 8.62905027932961, "grad_norm": 0.7226108908653259, "learning_rate": 0.0005704481792717087, "loss": 0.4338, "step": 15446 }, { "epoch": 8.629608938547486, "grad_norm": 0.5482200980186462, "learning_rate": 0.0005704201680672269, "loss": 0.3992, "step": 15447 }, { "epoch": 8.630167597765363, "grad_norm": 0.8195366263389587, "learning_rate": 0.0005703921568627452, "loss": 0.4491, "step": 15448 }, { "epoch": 8.630726256983241, "grad_norm": 0.5906659364700317, "learning_rate": 0.0005703641456582633, "loss": 0.4374, "step": 15449 }, { "epoch": 8.631284916201118, "grad_norm": 0.44893544912338257, "learning_rate": 0.0005703361344537815, "loss": 0.3673, "step": 15450 }, { "epoch": 8.631843575418994, "grad_norm": 1.057785153388977, "learning_rate": 0.0005703081232492997, "loss": 0.4845, "step": 15451 }, { "epoch": 8.63240223463687, "grad_norm": 0.422775536775589, "learning_rate": 0.0005702801120448179, "loss": 0.4269, "step": 15452 }, { "epoch": 8.632960893854749, "grad_norm": 0.4011528193950653, "learning_rate": 0.0005702521008403362, "loss": 0.4145, "step": 15453 }, { "epoch": 8.633519553072626, "grad_norm": 1.4747127294540405, "learning_rate": 0.0005702240896358543, "loss": 0.5081, "step": 15454 }, { "epoch": 8.634078212290502, "grad_norm": 0.5384272933006287, "learning_rate": 0.0005701960784313725, "loss": 0.4532, "step": 15455 }, { "epoch": 8.63463687150838, "grad_norm": 0.4927268624305725, "learning_rate": 0.0005701680672268907, "loss": 0.6223, "step": 15456 }, { "epoch": 8.635195530726257, "grad_norm": 0.8540382981300354, "learning_rate": 0.000570140056022409, "loss": 0.3853, "step": 15457 }, { "epoch": 8.635754189944134, "grad_norm": 0.5726216435432434, "learning_rate": 0.0005701120448179273, "loss": 0.4688, "step": 15458 }, { "epoch": 8.63631284916201, "grad_norm": 0.5496420860290527, "learning_rate": 0.0005700840336134454, "loss": 0.4294, "step": 15459 }, { "epoch": 8.636871508379889, "grad_norm": 0.5806383490562439, "learning_rate": 0.0005700560224089636, "loss": 0.4136, "step": 15460 }, { "epoch": 8.637430167597765, "grad_norm": 0.6385259628295898, "learning_rate": 0.0005700280112044818, "loss": 0.4054, "step": 15461 }, { "epoch": 8.637988826815642, "grad_norm": 0.7559404969215393, "learning_rate": 0.00057, "loss": 0.5113, "step": 15462 }, { "epoch": 8.63854748603352, "grad_norm": 0.4123435318470001, "learning_rate": 0.0005699719887955183, "loss": 0.3998, "step": 15463 }, { "epoch": 8.639106145251397, "grad_norm": 0.47525525093078613, "learning_rate": 0.0005699439775910365, "loss": 0.477, "step": 15464 }, { "epoch": 8.639664804469273, "grad_norm": 0.5204905867576599, "learning_rate": 0.0005699159663865546, "loss": 0.4073, "step": 15465 }, { "epoch": 8.640223463687152, "grad_norm": 0.4411095678806305, "learning_rate": 0.0005698879551820728, "loss": 0.3632, "step": 15466 }, { "epoch": 8.640782122905028, "grad_norm": 0.5747869610786438, "learning_rate": 0.000569859943977591, "loss": 0.3954, "step": 15467 }, { "epoch": 8.641340782122905, "grad_norm": 0.5323585867881775, "learning_rate": 0.0005698319327731093, "loss": 0.3872, "step": 15468 }, { "epoch": 8.641899441340783, "grad_norm": 0.46407270431518555, "learning_rate": 0.0005698039215686275, "loss": 0.3738, "step": 15469 }, { "epoch": 8.64245810055866, "grad_norm": 0.45193350315093994, "learning_rate": 0.0005697759103641456, "loss": 0.4271, "step": 15470 }, { "epoch": 8.643016759776536, "grad_norm": 0.526462197303772, "learning_rate": 0.0005697478991596638, "loss": 0.4023, "step": 15471 }, { "epoch": 8.643575418994413, "grad_norm": 0.5079617500305176, "learning_rate": 0.000569719887955182, "loss": 0.346, "step": 15472 }, { "epoch": 8.644134078212291, "grad_norm": 1.8635884523391724, "learning_rate": 0.0005696918767507004, "loss": 0.4141, "step": 15473 }, { "epoch": 8.644692737430168, "grad_norm": 0.4533693790435791, "learning_rate": 0.0005696638655462186, "loss": 0.3465, "step": 15474 }, { "epoch": 8.645251396648044, "grad_norm": 1.205929160118103, "learning_rate": 0.0005696358543417367, "loss": 0.5821, "step": 15475 }, { "epoch": 8.645810055865923, "grad_norm": 0.6712439656257629, "learning_rate": 0.0005696078431372549, "loss": 0.4141, "step": 15476 }, { "epoch": 8.6463687150838, "grad_norm": 0.4632731080055237, "learning_rate": 0.0005695798319327731, "loss": 0.4221, "step": 15477 }, { "epoch": 8.646927374301676, "grad_norm": 0.6880497336387634, "learning_rate": 0.0005695518207282914, "loss": 0.4579, "step": 15478 }, { "epoch": 8.647486033519552, "grad_norm": 1.4594767093658447, "learning_rate": 0.0005695238095238096, "loss": 0.4154, "step": 15479 }, { "epoch": 8.64804469273743, "grad_norm": 0.5992676615715027, "learning_rate": 0.0005694957983193278, "loss": 0.4456, "step": 15480 }, { "epoch": 8.648603351955307, "grad_norm": 0.331184446811676, "learning_rate": 0.0005694677871148459, "loss": 0.3202, "step": 15481 }, { "epoch": 8.649162011173184, "grad_norm": 0.5219162702560425, "learning_rate": 0.0005694397759103641, "loss": 0.4742, "step": 15482 }, { "epoch": 8.649720670391062, "grad_norm": 0.45459991693496704, "learning_rate": 0.0005694117647058824, "loss": 0.4612, "step": 15483 }, { "epoch": 8.650279329608939, "grad_norm": 0.40991586446762085, "learning_rate": 0.0005693837535014006, "loss": 0.3984, "step": 15484 }, { "epoch": 8.650837988826815, "grad_norm": 0.973129153251648, "learning_rate": 0.0005693557422969188, "loss": 0.7084, "step": 15485 }, { "epoch": 8.651396648044694, "grad_norm": 0.4905858039855957, "learning_rate": 0.0005693277310924369, "loss": 0.4442, "step": 15486 }, { "epoch": 8.65195530726257, "grad_norm": 0.5168609619140625, "learning_rate": 0.0005692997198879551, "loss": 0.4178, "step": 15487 }, { "epoch": 8.652513966480447, "grad_norm": 0.45132094621658325, "learning_rate": 0.0005692717086834734, "loss": 0.3536, "step": 15488 }, { "epoch": 8.653072625698323, "grad_norm": 0.48733070492744446, "learning_rate": 0.0005692436974789916, "loss": 0.3673, "step": 15489 }, { "epoch": 8.653631284916202, "grad_norm": 0.5058335661888123, "learning_rate": 0.0005692156862745099, "loss": 0.5012, "step": 15490 }, { "epoch": 8.654189944134078, "grad_norm": 0.6719403862953186, "learning_rate": 0.000569187675070028, "loss": 0.5382, "step": 15491 }, { "epoch": 8.654748603351955, "grad_norm": 0.7544112801551819, "learning_rate": 0.0005691596638655462, "loss": 0.6047, "step": 15492 }, { "epoch": 8.655307262569833, "grad_norm": 0.7219251990318298, "learning_rate": 0.0005691316526610645, "loss": 0.494, "step": 15493 }, { "epoch": 8.65586592178771, "grad_norm": 0.8679959177970886, "learning_rate": 0.0005691036414565827, "loss": 0.5226, "step": 15494 }, { "epoch": 8.656424581005586, "grad_norm": 0.5559894442558289, "learning_rate": 0.0005690756302521009, "loss": 0.4454, "step": 15495 }, { "epoch": 8.656983240223465, "grad_norm": 0.4497896432876587, "learning_rate": 0.0005690476190476191, "loss": 0.4466, "step": 15496 }, { "epoch": 8.657541899441341, "grad_norm": 0.46997177600860596, "learning_rate": 0.0005690196078431372, "loss": 0.4347, "step": 15497 }, { "epoch": 8.658100558659218, "grad_norm": 0.6016250252723694, "learning_rate": 0.0005689915966386555, "loss": 0.4085, "step": 15498 }, { "epoch": 8.658659217877094, "grad_norm": 0.8662602305412292, "learning_rate": 0.0005689635854341737, "loss": 0.3554, "step": 15499 }, { "epoch": 8.659217877094973, "grad_norm": 0.5054382085800171, "learning_rate": 0.0005689355742296919, "loss": 0.4934, "step": 15500 }, { "epoch": 8.659217877094973, "eval_cer": 0.09258834953574896, "eval_loss": 0.33935561776161194, "eval_runtime": 61.3442, "eval_samples_per_second": 73.976, "eval_steps_per_second": 4.63, "eval_wer": 0.37049244434513, "step": 15500 }, { "epoch": 8.65977653631285, "grad_norm": 0.5360488891601562, "learning_rate": 0.0005689075630252101, "loss": 0.4385, "step": 15501 }, { "epoch": 8.660335195530726, "grad_norm": 0.8469676971435547, "learning_rate": 0.0005688795518207282, "loss": 0.6167, "step": 15502 }, { "epoch": 8.660893854748604, "grad_norm": 0.471036434173584, "learning_rate": 0.0005688515406162465, "loss": 0.4706, "step": 15503 }, { "epoch": 8.66145251396648, "grad_norm": 0.5919326543807983, "learning_rate": 0.0005688235294117647, "loss": 0.5583, "step": 15504 }, { "epoch": 8.662011173184357, "grad_norm": 1.2199716567993164, "learning_rate": 0.0005687955182072829, "loss": 0.4125, "step": 15505 }, { "epoch": 8.662569832402234, "grad_norm": 0.3077165484428406, "learning_rate": 0.0005687675070028011, "loss": 0.3382, "step": 15506 }, { "epoch": 8.663128491620112, "grad_norm": 2.023994207382202, "learning_rate": 0.0005687394957983192, "loss": 0.3842, "step": 15507 }, { "epoch": 8.663687150837989, "grad_norm": 0.7194250226020813, "learning_rate": 0.0005687114845938376, "loss": 0.4205, "step": 15508 }, { "epoch": 8.664245810055865, "grad_norm": 0.5387637615203857, "learning_rate": 0.0005686834733893558, "loss": 0.3307, "step": 15509 }, { "epoch": 8.664804469273744, "grad_norm": 0.398230642080307, "learning_rate": 0.000568655462184874, "loss": 0.4441, "step": 15510 }, { "epoch": 8.66536312849162, "grad_norm": 1.3800987005233765, "learning_rate": 0.0005686274509803922, "loss": 0.492, "step": 15511 }, { "epoch": 8.665921787709497, "grad_norm": 0.43140509724617004, "learning_rate": 0.0005685994397759104, "loss": 0.4437, "step": 15512 }, { "epoch": 8.666480446927375, "grad_norm": 0.6863518357276917, "learning_rate": 0.0005685714285714286, "loss": 0.4814, "step": 15513 }, { "epoch": 8.667039106145252, "grad_norm": 0.5488103628158569, "learning_rate": 0.0005685434173669468, "loss": 0.3479, "step": 15514 }, { "epoch": 8.667597765363128, "grad_norm": 0.3921966850757599, "learning_rate": 0.000568515406162465, "loss": 0.4351, "step": 15515 }, { "epoch": 8.668156424581005, "grad_norm": 0.5004563927650452, "learning_rate": 0.0005684873949579832, "loss": 0.411, "step": 15516 }, { "epoch": 8.668715083798883, "grad_norm": 0.43397316336631775, "learning_rate": 0.0005684593837535014, "loss": 0.4898, "step": 15517 }, { "epoch": 8.66927374301676, "grad_norm": 0.4364555776119232, "learning_rate": 0.0005684313725490196, "loss": 0.5329, "step": 15518 }, { "epoch": 8.669832402234636, "grad_norm": 0.4831409454345703, "learning_rate": 0.0005684033613445378, "loss": 0.3991, "step": 15519 }, { "epoch": 8.670391061452515, "grad_norm": 0.7217563986778259, "learning_rate": 0.000568375350140056, "loss": 0.4335, "step": 15520 }, { "epoch": 8.670949720670391, "grad_norm": 0.412957102060318, "learning_rate": 0.0005683473389355742, "loss": 0.3641, "step": 15521 }, { "epoch": 8.671508379888268, "grad_norm": 0.7792677879333496, "learning_rate": 0.0005683193277310924, "loss": 0.4475, "step": 15522 }, { "epoch": 8.672067039106146, "grad_norm": 1.1043118238449097, "learning_rate": 0.0005682913165266107, "loss": 0.4727, "step": 15523 }, { "epoch": 8.672625698324023, "grad_norm": 0.5187425017356873, "learning_rate": 0.0005682633053221289, "loss": 0.3608, "step": 15524 }, { "epoch": 8.6731843575419, "grad_norm": 0.4371188282966614, "learning_rate": 0.0005682352941176471, "loss": 0.356, "step": 15525 }, { "epoch": 8.673743016759776, "grad_norm": 0.9478253126144409, "learning_rate": 0.0005682072829131653, "loss": 0.4465, "step": 15526 }, { "epoch": 8.674301675977654, "grad_norm": 0.9206333160400391, "learning_rate": 0.0005681792717086835, "loss": 0.4917, "step": 15527 }, { "epoch": 8.67486033519553, "grad_norm": 1.079020380973816, "learning_rate": 0.0005681512605042018, "loss": 0.3557, "step": 15528 }, { "epoch": 8.675418994413407, "grad_norm": 0.4506421983242035, "learning_rate": 0.0005681232492997199, "loss": 0.4713, "step": 15529 }, { "epoch": 8.675977653631286, "grad_norm": 0.44211676716804504, "learning_rate": 0.0005680952380952381, "loss": 0.3573, "step": 15530 }, { "epoch": 8.676536312849162, "grad_norm": 0.41927507519721985, "learning_rate": 0.0005680672268907563, "loss": 0.3704, "step": 15531 }, { "epoch": 8.677094972067039, "grad_norm": 1.226069688796997, "learning_rate": 0.0005680392156862745, "loss": 0.7053, "step": 15532 }, { "epoch": 8.677653631284915, "grad_norm": 1.1424435377120972, "learning_rate": 0.0005680112044817928, "loss": 0.335, "step": 15533 }, { "epoch": 8.678212290502794, "grad_norm": 0.8386105298995972, "learning_rate": 0.0005679831932773109, "loss": 0.4867, "step": 15534 }, { "epoch": 8.67877094972067, "grad_norm": 0.5779457092285156, "learning_rate": 0.0005679551820728291, "loss": 0.5181, "step": 15535 }, { "epoch": 8.679329608938547, "grad_norm": 0.761139452457428, "learning_rate": 0.0005679271708683473, "loss": 0.4619, "step": 15536 }, { "epoch": 8.679888268156425, "grad_norm": 0.5999415516853333, "learning_rate": 0.0005678991596638655, "loss": 0.505, "step": 15537 }, { "epoch": 8.680446927374302, "grad_norm": 0.5625087022781372, "learning_rate": 0.0005678711484593838, "loss": 0.4821, "step": 15538 }, { "epoch": 8.681005586592178, "grad_norm": 0.7208386659622192, "learning_rate": 0.000567843137254902, "loss": 0.5699, "step": 15539 }, { "epoch": 8.681564245810057, "grad_norm": 0.5278449058532715, "learning_rate": 0.0005678151260504202, "loss": 0.4437, "step": 15540 }, { "epoch": 8.682122905027933, "grad_norm": 0.5902572870254517, "learning_rate": 0.0005677871148459384, "loss": 0.4088, "step": 15541 }, { "epoch": 8.68268156424581, "grad_norm": 0.7975791692733765, "learning_rate": 0.0005677591036414566, "loss": 0.5233, "step": 15542 }, { "epoch": 8.683240223463688, "grad_norm": 0.44139623641967773, "learning_rate": 0.0005677310924369749, "loss": 0.4734, "step": 15543 }, { "epoch": 8.683798882681565, "grad_norm": 2.1698899269104004, "learning_rate": 0.0005677030812324931, "loss": 0.4868, "step": 15544 }, { "epoch": 8.684357541899441, "grad_norm": 0.4290623068809509, "learning_rate": 0.0005676750700280112, "loss": 0.3866, "step": 15545 }, { "epoch": 8.684916201117318, "grad_norm": 0.5705886483192444, "learning_rate": 0.0005676470588235294, "loss": 0.5036, "step": 15546 }, { "epoch": 8.685474860335196, "grad_norm": 0.4813299775123596, "learning_rate": 0.0005676190476190476, "loss": 0.3888, "step": 15547 }, { "epoch": 8.686033519553073, "grad_norm": 1.1574420928955078, "learning_rate": 0.0005675910364145659, "loss": 0.4428, "step": 15548 }, { "epoch": 8.68659217877095, "grad_norm": 0.3449275493621826, "learning_rate": 0.0005675630252100841, "loss": 0.3787, "step": 15549 }, { "epoch": 8.687150837988828, "grad_norm": 0.6266234517097473, "learning_rate": 0.0005675350140056022, "loss": 0.7406, "step": 15550 }, { "epoch": 8.687709497206704, "grad_norm": 0.7907277941703796, "learning_rate": 0.0005675070028011204, "loss": 0.4311, "step": 15551 }, { "epoch": 8.68826815642458, "grad_norm": 2.236419677734375, "learning_rate": 0.0005674789915966386, "loss": 0.4123, "step": 15552 }, { "epoch": 8.688826815642457, "grad_norm": 0.800818920135498, "learning_rate": 0.0005674509803921569, "loss": 0.6012, "step": 15553 }, { "epoch": 8.689385474860336, "grad_norm": 0.6959667801856995, "learning_rate": 0.0005674229691876751, "loss": 0.4439, "step": 15554 }, { "epoch": 8.689944134078212, "grad_norm": 0.6517641544342041, "learning_rate": 0.0005673949579831932, "loss": 0.4705, "step": 15555 }, { "epoch": 8.690502793296089, "grad_norm": 0.4902878999710083, "learning_rate": 0.0005673669467787114, "loss": 0.45, "step": 15556 }, { "epoch": 8.691061452513967, "grad_norm": 0.41238394379615784, "learning_rate": 0.0005673389355742297, "loss": 0.4023, "step": 15557 }, { "epoch": 8.691620111731844, "grad_norm": 0.9365857839584351, "learning_rate": 0.000567310924369748, "loss": 0.46, "step": 15558 }, { "epoch": 8.69217877094972, "grad_norm": 0.45045730471611023, "learning_rate": 0.0005672829131652662, "loss": 0.4269, "step": 15559 }, { "epoch": 8.692737430167599, "grad_norm": 0.5339806079864502, "learning_rate": 0.0005672549019607844, "loss": 0.4217, "step": 15560 }, { "epoch": 8.693296089385475, "grad_norm": 0.396564245223999, "learning_rate": 0.0005672268907563025, "loss": 0.3856, "step": 15561 }, { "epoch": 8.693854748603352, "grad_norm": 0.6059952974319458, "learning_rate": 0.0005671988795518207, "loss": 0.6047, "step": 15562 }, { "epoch": 8.694413407821228, "grad_norm": 2.8737268447875977, "learning_rate": 0.000567170868347339, "loss": 0.4055, "step": 15563 }, { "epoch": 8.694972067039107, "grad_norm": 0.4673866033554077, "learning_rate": 0.0005671428571428572, "loss": 0.4483, "step": 15564 }, { "epoch": 8.695530726256983, "grad_norm": 1.530030369758606, "learning_rate": 0.0005671148459383754, "loss": 0.4653, "step": 15565 }, { "epoch": 8.69608938547486, "grad_norm": 0.5205464959144592, "learning_rate": 0.0005670868347338935, "loss": 0.4363, "step": 15566 }, { "epoch": 8.696648044692738, "grad_norm": 0.5952099561691284, "learning_rate": 0.0005670588235294117, "loss": 0.4541, "step": 15567 }, { "epoch": 8.697206703910615, "grad_norm": 0.3770199716091156, "learning_rate": 0.00056703081232493, "loss": 0.4282, "step": 15568 }, { "epoch": 8.697765363128491, "grad_norm": 0.3640885055065155, "learning_rate": 0.0005670028011204482, "loss": 0.3999, "step": 15569 }, { "epoch": 8.69832402234637, "grad_norm": 0.47385725378990173, "learning_rate": 0.0005669747899159664, "loss": 0.3928, "step": 15570 }, { "epoch": 8.698882681564246, "grad_norm": 1.3914686441421509, "learning_rate": 0.0005669467787114845, "loss": 0.3759, "step": 15571 }, { "epoch": 8.699441340782123, "grad_norm": 0.4102199077606201, "learning_rate": 0.0005669187675070027, "loss": 0.4028, "step": 15572 }, { "epoch": 8.7, "grad_norm": 0.5234174728393555, "learning_rate": 0.0005668907563025211, "loss": 0.3744, "step": 15573 }, { "epoch": 8.700558659217878, "grad_norm": 0.46223580837249756, "learning_rate": 0.0005668627450980393, "loss": 0.3884, "step": 15574 }, { "epoch": 8.701117318435754, "grad_norm": 0.5250284075737, "learning_rate": 0.0005668347338935575, "loss": 0.4637, "step": 15575 }, { "epoch": 8.70167597765363, "grad_norm": 0.8773692846298218, "learning_rate": 0.0005668067226890757, "loss": 0.4147, "step": 15576 }, { "epoch": 8.702234636871509, "grad_norm": 0.5146695375442505, "learning_rate": 0.0005667787114845938, "loss": 0.3863, "step": 15577 }, { "epoch": 8.702793296089386, "grad_norm": 0.48896458745002747, "learning_rate": 0.0005667507002801121, "loss": 0.4693, "step": 15578 }, { "epoch": 8.703351955307262, "grad_norm": 0.49369266629219055, "learning_rate": 0.0005667226890756303, "loss": 0.5659, "step": 15579 }, { "epoch": 8.703910614525139, "grad_norm": 0.43496614694595337, "learning_rate": 0.0005666946778711485, "loss": 0.4846, "step": 15580 }, { "epoch": 8.704469273743017, "grad_norm": 3.0558488368988037, "learning_rate": 0.0005666666666666667, "loss": 0.4418, "step": 15581 }, { "epoch": 8.705027932960894, "grad_norm": 0.5131064057350159, "learning_rate": 0.0005666386554621848, "loss": 0.4421, "step": 15582 }, { "epoch": 8.70558659217877, "grad_norm": 0.4947444498538971, "learning_rate": 0.0005666106442577031, "loss": 0.5146, "step": 15583 }, { "epoch": 8.706145251396649, "grad_norm": 0.7857218980789185, "learning_rate": 0.0005665826330532213, "loss": 0.4214, "step": 15584 }, { "epoch": 8.706703910614525, "grad_norm": 0.4524097740650177, "learning_rate": 0.0005665546218487395, "loss": 0.3314, "step": 15585 }, { "epoch": 8.707262569832402, "grad_norm": 0.5022789835929871, "learning_rate": 0.0005665266106442577, "loss": 0.3777, "step": 15586 }, { "epoch": 8.70782122905028, "grad_norm": 0.7888333201408386, "learning_rate": 0.0005664985994397758, "loss": 0.486, "step": 15587 }, { "epoch": 8.708379888268157, "grad_norm": 0.5010085105895996, "learning_rate": 0.0005664705882352941, "loss": 0.5468, "step": 15588 }, { "epoch": 8.708938547486033, "grad_norm": 0.5213218927383423, "learning_rate": 0.0005664425770308124, "loss": 0.4705, "step": 15589 }, { "epoch": 8.70949720670391, "grad_norm": 1.07518470287323, "learning_rate": 0.0005664145658263306, "loss": 0.6021, "step": 15590 }, { "epoch": 8.710055865921788, "grad_norm": 0.5122137665748596, "learning_rate": 0.0005663865546218488, "loss": 0.351, "step": 15591 }, { "epoch": 8.710614525139665, "grad_norm": 0.4663032591342926, "learning_rate": 0.000566358543417367, "loss": 0.4456, "step": 15592 }, { "epoch": 8.711173184357541, "grad_norm": 0.7306776642799377, "learning_rate": 0.0005663305322128852, "loss": 0.4339, "step": 15593 }, { "epoch": 8.71173184357542, "grad_norm": 0.7633882164955139, "learning_rate": 0.0005663025210084034, "loss": 0.6889, "step": 15594 }, { "epoch": 8.712290502793296, "grad_norm": 1.062889575958252, "learning_rate": 0.0005662745098039216, "loss": 0.5849, "step": 15595 }, { "epoch": 8.712849162011173, "grad_norm": 0.5082654356956482, "learning_rate": 0.0005662464985994398, "loss": 0.328, "step": 15596 }, { "epoch": 8.713407821229051, "grad_norm": 0.4522295892238617, "learning_rate": 0.000566218487394958, "loss": 0.3886, "step": 15597 }, { "epoch": 8.713966480446928, "grad_norm": 0.6530178785324097, "learning_rate": 0.0005661904761904762, "loss": 0.388, "step": 15598 }, { "epoch": 8.714525139664804, "grad_norm": 0.5231906771659851, "learning_rate": 0.0005661624649859944, "loss": 0.4088, "step": 15599 }, { "epoch": 8.71508379888268, "grad_norm": 0.7256081104278564, "learning_rate": 0.0005661344537815126, "loss": 0.4477, "step": 15600 }, { "epoch": 8.71564245810056, "grad_norm": 0.5418559908866882, "learning_rate": 0.0005661064425770308, "loss": 0.4031, "step": 15601 }, { "epoch": 8.716201117318436, "grad_norm": 9.977767944335938, "learning_rate": 0.000566078431372549, "loss": 0.4733, "step": 15602 }, { "epoch": 8.716759776536312, "grad_norm": 0.5321292877197266, "learning_rate": 0.0005660504201680672, "loss": 0.4721, "step": 15603 }, { "epoch": 8.71731843575419, "grad_norm": 1.3056150674819946, "learning_rate": 0.0005660224089635854, "loss": 0.5374, "step": 15604 }, { "epoch": 8.717877094972067, "grad_norm": 0.4756324589252472, "learning_rate": 0.0005659943977591037, "loss": 0.4491, "step": 15605 }, { "epoch": 8.718435754189944, "grad_norm": 0.5027273893356323, "learning_rate": 0.0005659663865546219, "loss": 0.4188, "step": 15606 }, { "epoch": 8.71899441340782, "grad_norm": 0.7712967395782471, "learning_rate": 0.0005659383753501401, "loss": 0.4498, "step": 15607 }, { "epoch": 8.719553072625699, "grad_norm": 0.8776791095733643, "learning_rate": 0.0005659103641456584, "loss": 0.6183, "step": 15608 }, { "epoch": 8.720111731843575, "grad_norm": 0.38432690501213074, "learning_rate": 0.0005658823529411765, "loss": 0.3343, "step": 15609 }, { "epoch": 8.720670391061452, "grad_norm": 0.531984806060791, "learning_rate": 0.0005658543417366947, "loss": 0.5629, "step": 15610 }, { "epoch": 8.72122905027933, "grad_norm": 0.40593528747558594, "learning_rate": 0.0005658263305322129, "loss": 0.4715, "step": 15611 }, { "epoch": 8.721787709497207, "grad_norm": 0.41519421339035034, "learning_rate": 0.0005657983193277311, "loss": 0.4198, "step": 15612 }, { "epoch": 8.722346368715083, "grad_norm": 0.8204532265663147, "learning_rate": 0.0005657703081232494, "loss": 0.3999, "step": 15613 }, { "epoch": 8.722905027932962, "grad_norm": 0.49945172667503357, "learning_rate": 0.0005657422969187675, "loss": 0.3515, "step": 15614 }, { "epoch": 8.723463687150838, "grad_norm": 0.6600934863090515, "learning_rate": 0.0005657142857142857, "loss": 0.4702, "step": 15615 }, { "epoch": 8.724022346368715, "grad_norm": 0.47808966040611267, "learning_rate": 0.0005656862745098039, "loss": 0.479, "step": 15616 }, { "epoch": 8.724581005586593, "grad_norm": 0.6460592746734619, "learning_rate": 0.0005656582633053221, "loss": 0.4807, "step": 15617 }, { "epoch": 8.72513966480447, "grad_norm": 0.48469334840774536, "learning_rate": 0.0005656302521008404, "loss": 0.4792, "step": 15618 }, { "epoch": 8.725698324022346, "grad_norm": 0.5133102536201477, "learning_rate": 0.0005656022408963585, "loss": 0.3524, "step": 15619 }, { "epoch": 8.726256983240223, "grad_norm": 2.818528890609741, "learning_rate": 0.0005655742296918767, "loss": 0.5097, "step": 15620 }, { "epoch": 8.726815642458101, "grad_norm": 0.5690695643424988, "learning_rate": 0.000565546218487395, "loss": 0.5503, "step": 15621 }, { "epoch": 8.727374301675978, "grad_norm": 0.5174113512039185, "learning_rate": 0.0005655182072829132, "loss": 0.5199, "step": 15622 }, { "epoch": 8.727932960893854, "grad_norm": 0.5243664979934692, "learning_rate": 0.0005654901960784314, "loss": 0.4061, "step": 15623 }, { "epoch": 8.728491620111733, "grad_norm": 0.5984207987785339, "learning_rate": 0.0005654621848739497, "loss": 0.4391, "step": 15624 }, { "epoch": 8.72905027932961, "grad_norm": 0.485016405582428, "learning_rate": 0.0005654341736694678, "loss": 0.4019, "step": 15625 }, { "epoch": 8.729608938547486, "grad_norm": 0.4126144051551819, "learning_rate": 0.000565406162464986, "loss": 0.4607, "step": 15626 }, { "epoch": 8.730167597765362, "grad_norm": 0.45564934611320496, "learning_rate": 0.0005653781512605042, "loss": 0.3749, "step": 15627 }, { "epoch": 8.73072625698324, "grad_norm": 0.3843534290790558, "learning_rate": 0.0005653501400560224, "loss": 0.4009, "step": 15628 }, { "epoch": 8.731284916201117, "grad_norm": 0.6612676382064819, "learning_rate": 0.0005653221288515407, "loss": 0.3881, "step": 15629 }, { "epoch": 8.731843575418994, "grad_norm": 0.7044295072555542, "learning_rate": 0.0005652941176470588, "loss": 0.3608, "step": 15630 }, { "epoch": 8.732402234636872, "grad_norm": 0.4451577961444855, "learning_rate": 0.000565266106442577, "loss": 0.3588, "step": 15631 }, { "epoch": 8.732960893854749, "grad_norm": 0.49735331535339355, "learning_rate": 0.0005652380952380952, "loss": 0.367, "step": 15632 }, { "epoch": 8.733519553072625, "grad_norm": 2.1209583282470703, "learning_rate": 0.0005652100840336134, "loss": 0.3505, "step": 15633 }, { "epoch": 8.734078212290502, "grad_norm": 0.9366016983985901, "learning_rate": 0.0005651820728291317, "loss": 0.4919, "step": 15634 }, { "epoch": 8.73463687150838, "grad_norm": 0.49355000257492065, "learning_rate": 0.0005651540616246498, "loss": 0.4626, "step": 15635 }, { "epoch": 8.735195530726257, "grad_norm": 0.3209354877471924, "learning_rate": 0.000565126050420168, "loss": 0.3048, "step": 15636 }, { "epoch": 8.735754189944133, "grad_norm": 0.5550408363342285, "learning_rate": 0.0005650980392156862, "loss": 0.453, "step": 15637 }, { "epoch": 8.736312849162012, "grad_norm": 0.5163437724113464, "learning_rate": 0.0005650700280112044, "loss": 0.4577, "step": 15638 }, { "epoch": 8.736871508379888, "grad_norm": 0.43063703179359436, "learning_rate": 0.0005650420168067228, "loss": 0.364, "step": 15639 }, { "epoch": 8.737430167597765, "grad_norm": 0.40728363394737244, "learning_rate": 0.000565014005602241, "loss": 0.362, "step": 15640 }, { "epoch": 8.737988826815643, "grad_norm": 0.6699554324150085, "learning_rate": 0.0005649859943977591, "loss": 0.4406, "step": 15641 }, { "epoch": 8.73854748603352, "grad_norm": 0.4548247158527374, "learning_rate": 0.0005649579831932773, "loss": 0.3391, "step": 15642 }, { "epoch": 8.739106145251396, "grad_norm": 0.7346000075340271, "learning_rate": 0.0005649299719887955, "loss": 0.4069, "step": 15643 }, { "epoch": 8.739664804469275, "grad_norm": 0.42775166034698486, "learning_rate": 0.0005649019607843138, "loss": 0.3878, "step": 15644 }, { "epoch": 8.740223463687151, "grad_norm": 0.3822396993637085, "learning_rate": 0.000564873949579832, "loss": 0.3349, "step": 15645 }, { "epoch": 8.740782122905028, "grad_norm": 0.5837403535842896, "learning_rate": 0.0005648459383753501, "loss": 0.4553, "step": 15646 }, { "epoch": 8.741340782122904, "grad_norm": 0.36033493280410767, "learning_rate": 0.0005648179271708683, "loss": 0.3393, "step": 15647 }, { "epoch": 8.741899441340783, "grad_norm": 0.4193015694618225, "learning_rate": 0.0005647899159663865, "loss": 0.4682, "step": 15648 }, { "epoch": 8.74245810055866, "grad_norm": 0.500306248664856, "learning_rate": 0.0005647619047619048, "loss": 0.4939, "step": 15649 }, { "epoch": 8.743016759776536, "grad_norm": 0.4655872583389282, "learning_rate": 0.000564733893557423, "loss": 0.3617, "step": 15650 }, { "epoch": 8.743575418994414, "grad_norm": 8.034947395324707, "learning_rate": 0.0005647058823529411, "loss": 0.4282, "step": 15651 }, { "epoch": 8.74413407821229, "grad_norm": 0.4258803427219391, "learning_rate": 0.0005646778711484593, "loss": 0.4751, "step": 15652 }, { "epoch": 8.744692737430167, "grad_norm": 0.4732818603515625, "learning_rate": 0.0005646498599439775, "loss": 0.4379, "step": 15653 }, { "epoch": 8.745251396648044, "grad_norm": 0.4753307104110718, "learning_rate": 0.0005646218487394959, "loss": 0.4345, "step": 15654 }, { "epoch": 8.745810055865922, "grad_norm": 1.1873834133148193, "learning_rate": 0.0005645938375350141, "loss": 0.3901, "step": 15655 }, { "epoch": 8.746368715083799, "grad_norm": 0.5162121653556824, "learning_rate": 0.0005645658263305323, "loss": 0.4958, "step": 15656 }, { "epoch": 8.746927374301675, "grad_norm": 0.4946577250957489, "learning_rate": 0.0005645378151260504, "loss": 0.4076, "step": 15657 }, { "epoch": 8.747486033519554, "grad_norm": 1.1167137622833252, "learning_rate": 0.0005645098039215686, "loss": 0.27, "step": 15658 }, { "epoch": 8.74804469273743, "grad_norm": 1.0856842994689941, "learning_rate": 0.0005644817927170869, "loss": 0.5634, "step": 15659 }, { "epoch": 8.748603351955307, "grad_norm": 1.3085237741470337, "learning_rate": 0.0005644537815126051, "loss": 0.5078, "step": 15660 }, { "epoch": 8.749162011173185, "grad_norm": 0.3998778760433197, "learning_rate": 0.0005644257703081233, "loss": 0.3229, "step": 15661 }, { "epoch": 8.749720670391062, "grad_norm": 0.9483353495597839, "learning_rate": 0.0005643977591036414, "loss": 0.3889, "step": 15662 }, { "epoch": 8.750279329608938, "grad_norm": 0.38763555884361267, "learning_rate": 0.0005643697478991596, "loss": 0.4316, "step": 15663 }, { "epoch": 8.750837988826815, "grad_norm": 0.5255081653594971, "learning_rate": 0.0005643417366946779, "loss": 0.4302, "step": 15664 }, { "epoch": 8.751396648044693, "grad_norm": 0.6534271836280823, "learning_rate": 0.0005643137254901961, "loss": 0.398, "step": 15665 }, { "epoch": 8.75195530726257, "grad_norm": 2.007716178894043, "learning_rate": 0.0005642857142857143, "loss": 0.4102, "step": 15666 }, { "epoch": 8.752513966480446, "grad_norm": 0.42920124530792236, "learning_rate": 0.0005642577030812324, "loss": 0.5197, "step": 15667 }, { "epoch": 8.753072625698325, "grad_norm": 0.7165144085884094, "learning_rate": 0.0005642296918767506, "loss": 0.4966, "step": 15668 }, { "epoch": 8.753631284916201, "grad_norm": 0.5029805302619934, "learning_rate": 0.0005642016806722689, "loss": 0.4287, "step": 15669 }, { "epoch": 8.754189944134078, "grad_norm": 0.5944901704788208, "learning_rate": 0.0005641736694677871, "loss": 0.5286, "step": 15670 }, { "epoch": 8.754748603351956, "grad_norm": 0.5133156180381775, "learning_rate": 0.0005641456582633054, "loss": 0.2861, "step": 15671 }, { "epoch": 8.755307262569833, "grad_norm": 0.3726537823677063, "learning_rate": 0.0005641176470588236, "loss": 0.4109, "step": 15672 }, { "epoch": 8.75586592178771, "grad_norm": 0.5534281134605408, "learning_rate": 0.0005640896358543417, "loss": 0.4931, "step": 15673 }, { "epoch": 8.756424581005586, "grad_norm": 0.5244345664978027, "learning_rate": 0.00056406162464986, "loss": 0.4225, "step": 15674 }, { "epoch": 8.756983240223464, "grad_norm": 0.6160147190093994, "learning_rate": 0.0005640336134453782, "loss": 0.4799, "step": 15675 }, { "epoch": 8.75754189944134, "grad_norm": 0.7252782583236694, "learning_rate": 0.0005640056022408964, "loss": 0.4367, "step": 15676 }, { "epoch": 8.758100558659217, "grad_norm": 1.1900861263275146, "learning_rate": 0.0005639775910364146, "loss": 0.3935, "step": 15677 }, { "epoch": 8.758659217877096, "grad_norm": 0.6065216064453125, "learning_rate": 0.0005639495798319327, "loss": 0.4505, "step": 15678 }, { "epoch": 8.759217877094972, "grad_norm": 0.5070031881332397, "learning_rate": 0.000563921568627451, "loss": 0.479, "step": 15679 }, { "epoch": 8.759776536312849, "grad_norm": 0.3886505365371704, "learning_rate": 0.0005638935574229692, "loss": 0.3378, "step": 15680 }, { "epoch": 8.760335195530725, "grad_norm": 0.4132860600948334, "learning_rate": 0.0005638655462184874, "loss": 0.3514, "step": 15681 }, { "epoch": 8.760893854748604, "grad_norm": 0.47031691670417786, "learning_rate": 0.0005638375350140056, "loss": 0.5206, "step": 15682 }, { "epoch": 8.76145251396648, "grad_norm": 0.49106690287590027, "learning_rate": 0.0005638095238095237, "loss": 0.4429, "step": 15683 }, { "epoch": 8.762011173184357, "grad_norm": 0.45995762944221497, "learning_rate": 0.000563781512605042, "loss": 0.3899, "step": 15684 }, { "epoch": 8.762569832402235, "grad_norm": 0.35264644026756287, "learning_rate": 0.0005637535014005602, "loss": 0.3835, "step": 15685 }, { "epoch": 8.763128491620112, "grad_norm": 0.42212116718292236, "learning_rate": 0.0005637254901960784, "loss": 0.4782, "step": 15686 }, { "epoch": 8.763687150837988, "grad_norm": 0.7341591715812683, "learning_rate": 0.0005636974789915967, "loss": 0.4344, "step": 15687 }, { "epoch": 8.764245810055867, "grad_norm": 0.578353762626648, "learning_rate": 0.0005636694677871149, "loss": 0.375, "step": 15688 }, { "epoch": 8.764804469273743, "grad_norm": 11.390461921691895, "learning_rate": 0.0005636414565826331, "loss": 0.3373, "step": 15689 }, { "epoch": 8.76536312849162, "grad_norm": 0.43578940629959106, "learning_rate": 0.0005636134453781513, "loss": 0.363, "step": 15690 }, { "epoch": 8.765921787709498, "grad_norm": 0.6495600342750549, "learning_rate": 0.0005635854341736695, "loss": 0.3482, "step": 15691 }, { "epoch": 8.766480446927375, "grad_norm": 0.4802863597869873, "learning_rate": 0.0005635574229691877, "loss": 0.5019, "step": 15692 }, { "epoch": 8.767039106145251, "grad_norm": 0.49207985401153564, "learning_rate": 0.0005635294117647059, "loss": 0.4726, "step": 15693 }, { "epoch": 8.767597765363128, "grad_norm": 0.8635436296463013, "learning_rate": 0.0005635014005602241, "loss": 0.5205, "step": 15694 }, { "epoch": 8.768156424581006, "grad_norm": 0.3986515998840332, "learning_rate": 0.0005634733893557423, "loss": 0.3936, "step": 15695 }, { "epoch": 8.768715083798883, "grad_norm": 0.46163687109947205, "learning_rate": 0.0005634453781512605, "loss": 0.4748, "step": 15696 }, { "epoch": 8.76927374301676, "grad_norm": 1.6048675775527954, "learning_rate": 0.0005634173669467787, "loss": 0.5088, "step": 15697 }, { "epoch": 8.769832402234638, "grad_norm": 2.0931811332702637, "learning_rate": 0.0005633893557422969, "loss": 0.3983, "step": 15698 }, { "epoch": 8.770391061452514, "grad_norm": 0.5724558234214783, "learning_rate": 0.0005633613445378152, "loss": 0.3966, "step": 15699 }, { "epoch": 8.77094972067039, "grad_norm": 0.5514177680015564, "learning_rate": 0.0005633333333333333, "loss": 0.4012, "step": 15700 }, { "epoch": 8.771508379888267, "grad_norm": 0.438016802072525, "learning_rate": 0.0005633053221288515, "loss": 0.3459, "step": 15701 }, { "epoch": 8.772067039106146, "grad_norm": 0.4400658905506134, "learning_rate": 0.0005632773109243697, "loss": 0.4276, "step": 15702 }, { "epoch": 8.772625698324022, "grad_norm": 1.2535483837127686, "learning_rate": 0.000563249299719888, "loss": 0.5748, "step": 15703 }, { "epoch": 8.773184357541899, "grad_norm": 1.3620244264602661, "learning_rate": 0.0005632212885154063, "loss": 0.4012, "step": 15704 }, { "epoch": 8.773743016759777, "grad_norm": 0.4720567762851715, "learning_rate": 0.0005631932773109244, "loss": 0.3815, "step": 15705 }, { "epoch": 8.774301675977654, "grad_norm": 0.36377981305122375, "learning_rate": 0.0005631652661064426, "loss": 0.4601, "step": 15706 }, { "epoch": 8.77486033519553, "grad_norm": 1.499476671218872, "learning_rate": 0.0005631372549019608, "loss": 0.5913, "step": 15707 }, { "epoch": 8.775418994413407, "grad_norm": 0.5073271989822388, "learning_rate": 0.000563109243697479, "loss": 0.4163, "step": 15708 }, { "epoch": 8.775977653631285, "grad_norm": 0.5782145261764526, "learning_rate": 0.0005630812324929973, "loss": 0.4112, "step": 15709 }, { "epoch": 8.776536312849162, "grad_norm": 0.566599428653717, "learning_rate": 0.0005630532212885154, "loss": 0.4603, "step": 15710 }, { "epoch": 8.777094972067038, "grad_norm": 0.5575801134109497, "learning_rate": 0.0005630252100840336, "loss": 0.5307, "step": 15711 }, { "epoch": 8.777653631284917, "grad_norm": 0.5349463224411011, "learning_rate": 0.0005629971988795518, "loss": 0.4837, "step": 15712 }, { "epoch": 8.778212290502793, "grad_norm": 0.4377054274082184, "learning_rate": 0.00056296918767507, "loss": 0.3102, "step": 15713 }, { "epoch": 8.77877094972067, "grad_norm": 0.345467209815979, "learning_rate": 0.0005629411764705883, "loss": 0.377, "step": 15714 }, { "epoch": 8.779329608938548, "grad_norm": 0.9769173860549927, "learning_rate": 0.0005629131652661065, "loss": 0.4129, "step": 15715 }, { "epoch": 8.779888268156425, "grad_norm": 0.761565625667572, "learning_rate": 0.0005628851540616246, "loss": 0.4586, "step": 15716 }, { "epoch": 8.780446927374301, "grad_norm": 1.0414402484893799, "learning_rate": 0.0005628571428571428, "loss": 0.4703, "step": 15717 }, { "epoch": 8.78100558659218, "grad_norm": 0.7781525254249573, "learning_rate": 0.000562829131652661, "loss": 0.4029, "step": 15718 }, { "epoch": 8.781564245810056, "grad_norm": 0.46379581093788147, "learning_rate": 0.0005628011204481794, "loss": 0.4168, "step": 15719 }, { "epoch": 8.782122905027933, "grad_norm": 0.30571869015693665, "learning_rate": 0.0005627731092436976, "loss": 0.2938, "step": 15720 }, { "epoch": 8.78268156424581, "grad_norm": 0.5634928941726685, "learning_rate": 0.0005627450980392157, "loss": 0.4369, "step": 15721 }, { "epoch": 8.783240223463688, "grad_norm": 0.71206134557724, "learning_rate": 0.0005627170868347339, "loss": 0.4624, "step": 15722 }, { "epoch": 8.783798882681564, "grad_norm": 1.0042665004730225, "learning_rate": 0.0005626890756302521, "loss": 0.4571, "step": 15723 }, { "epoch": 8.78435754189944, "grad_norm": 0.669148862361908, "learning_rate": 0.0005626610644257704, "loss": 0.5413, "step": 15724 }, { "epoch": 8.78491620111732, "grad_norm": 0.4178120791912079, "learning_rate": 0.0005626330532212886, "loss": 0.4269, "step": 15725 }, { "epoch": 8.785474860335196, "grad_norm": 0.4230409264564514, "learning_rate": 0.0005626050420168067, "loss": 0.3929, "step": 15726 }, { "epoch": 8.786033519553072, "grad_norm": 0.5099184513092041, "learning_rate": 0.0005625770308123249, "loss": 0.504, "step": 15727 }, { "epoch": 8.786592178770949, "grad_norm": 1.1595661640167236, "learning_rate": 0.0005625490196078431, "loss": 0.3423, "step": 15728 }, { "epoch": 8.787150837988827, "grad_norm": 0.39395782351493835, "learning_rate": 0.0005625210084033614, "loss": 0.4581, "step": 15729 }, { "epoch": 8.787709497206704, "grad_norm": 0.5747889280319214, "learning_rate": 0.0005624929971988796, "loss": 0.412, "step": 15730 }, { "epoch": 8.78826815642458, "grad_norm": 0.42089641094207764, "learning_rate": 0.0005624649859943978, "loss": 0.4908, "step": 15731 }, { "epoch": 8.788826815642459, "grad_norm": 0.4230941832065582, "learning_rate": 0.0005624369747899159, "loss": 0.4879, "step": 15732 }, { "epoch": 8.789385474860335, "grad_norm": 0.6767685413360596, "learning_rate": 0.0005624089635854341, "loss": 0.4485, "step": 15733 }, { "epoch": 8.789944134078212, "grad_norm": 3.443275213241577, "learning_rate": 0.0005623809523809524, "loss": 0.6271, "step": 15734 }, { "epoch": 8.79050279329609, "grad_norm": 0.4694518744945526, "learning_rate": 0.0005623529411764706, "loss": 0.4284, "step": 15735 }, { "epoch": 8.791061452513967, "grad_norm": 0.5341271162033081, "learning_rate": 0.0005623249299719889, "loss": 0.4376, "step": 15736 }, { "epoch": 8.791620111731843, "grad_norm": 0.5891209244728088, "learning_rate": 0.000562296918767507, "loss": 0.3885, "step": 15737 }, { "epoch": 8.79217877094972, "grad_norm": 0.4981018602848053, "learning_rate": 0.0005622689075630252, "loss": 0.5263, "step": 15738 }, { "epoch": 8.792737430167598, "grad_norm": 0.5746358633041382, "learning_rate": 0.0005622408963585435, "loss": 0.4476, "step": 15739 }, { "epoch": 8.793296089385475, "grad_norm": 1.7005538940429688, "learning_rate": 0.0005622128851540617, "loss": 0.6176, "step": 15740 }, { "epoch": 8.793854748603351, "grad_norm": 0.5340922474861145, "learning_rate": 0.0005621848739495799, "loss": 0.3977, "step": 15741 }, { "epoch": 8.79441340782123, "grad_norm": 0.5265353918075562, "learning_rate": 0.000562156862745098, "loss": 0.4059, "step": 15742 }, { "epoch": 8.794972067039106, "grad_norm": 0.396685928106308, "learning_rate": 0.0005621288515406162, "loss": 0.2848, "step": 15743 }, { "epoch": 8.795530726256983, "grad_norm": 2.166918992996216, "learning_rate": 0.0005621008403361345, "loss": 0.3943, "step": 15744 }, { "epoch": 8.796089385474861, "grad_norm": 1.0985817909240723, "learning_rate": 0.0005620728291316527, "loss": 0.5232, "step": 15745 }, { "epoch": 8.796648044692738, "grad_norm": 1.067770004272461, "learning_rate": 0.0005620448179271709, "loss": 0.5097, "step": 15746 }, { "epoch": 8.797206703910614, "grad_norm": 0.7104836106300354, "learning_rate": 0.0005620168067226891, "loss": 0.4293, "step": 15747 }, { "epoch": 8.797765363128491, "grad_norm": 0.34466931223869324, "learning_rate": 0.0005619887955182072, "loss": 0.32, "step": 15748 }, { "epoch": 8.79832402234637, "grad_norm": 0.5265138149261475, "learning_rate": 0.0005619607843137255, "loss": 0.593, "step": 15749 }, { "epoch": 8.798882681564246, "grad_norm": 1.250335931777954, "learning_rate": 0.0005619327731092437, "loss": 0.4092, "step": 15750 }, { "epoch": 8.799441340782122, "grad_norm": 0.3780997097492218, "learning_rate": 0.0005619047619047619, "loss": 0.3767, "step": 15751 }, { "epoch": 8.8, "grad_norm": 0.4873446226119995, "learning_rate": 0.0005618767507002801, "loss": 0.5189, "step": 15752 }, { "epoch": 8.800558659217877, "grad_norm": 0.4541783332824707, "learning_rate": 0.0005618487394957982, "loss": 0.396, "step": 15753 }, { "epoch": 8.801117318435754, "grad_norm": 0.7754843235015869, "learning_rate": 0.0005618207282913166, "loss": 0.4513, "step": 15754 }, { "epoch": 8.80167597765363, "grad_norm": 2.0724496841430664, "learning_rate": 0.0005617927170868348, "loss": 0.4561, "step": 15755 }, { "epoch": 8.802234636871509, "grad_norm": 0.7389378547668457, "learning_rate": 0.000561764705882353, "loss": 0.3614, "step": 15756 }, { "epoch": 8.802793296089385, "grad_norm": 0.42319151759147644, "learning_rate": 0.0005617366946778712, "loss": 0.3707, "step": 15757 }, { "epoch": 8.803351955307262, "grad_norm": 0.43397441506385803, "learning_rate": 0.0005617086834733893, "loss": 0.4021, "step": 15758 }, { "epoch": 8.80391061452514, "grad_norm": 5.564876556396484, "learning_rate": 0.0005616806722689076, "loss": 0.3989, "step": 15759 }, { "epoch": 8.804469273743017, "grad_norm": 0.6502397060394287, "learning_rate": 0.0005616526610644258, "loss": 0.4283, "step": 15760 }, { "epoch": 8.805027932960893, "grad_norm": 0.8007463216781616, "learning_rate": 0.000561624649859944, "loss": 0.4429, "step": 15761 }, { "epoch": 8.805586592178772, "grad_norm": 0.8617485761642456, "learning_rate": 0.0005615966386554622, "loss": 0.5963, "step": 15762 }, { "epoch": 8.806145251396648, "grad_norm": 0.5161985158920288, "learning_rate": 0.0005615686274509804, "loss": 0.401, "step": 15763 }, { "epoch": 8.806703910614525, "grad_norm": 0.5018290281295776, "learning_rate": 0.0005615406162464986, "loss": 0.4164, "step": 15764 }, { "epoch": 8.807262569832401, "grad_norm": 0.5203245878219604, "learning_rate": 0.0005615126050420168, "loss": 0.4833, "step": 15765 }, { "epoch": 8.80782122905028, "grad_norm": 0.7248677015304565, "learning_rate": 0.000561484593837535, "loss": 0.4828, "step": 15766 }, { "epoch": 8.808379888268156, "grad_norm": 0.5944013595581055, "learning_rate": 0.0005614565826330532, "loss": 0.3959, "step": 15767 }, { "epoch": 8.808938547486033, "grad_norm": 0.5159067511558533, "learning_rate": 0.0005614285714285714, "loss": 0.3531, "step": 15768 }, { "epoch": 8.809497206703911, "grad_norm": 0.4834436774253845, "learning_rate": 0.0005614005602240897, "loss": 0.506, "step": 15769 }, { "epoch": 8.810055865921788, "grad_norm": 0.5468249917030334, "learning_rate": 0.0005613725490196079, "loss": 0.4982, "step": 15770 }, { "epoch": 8.810614525139664, "grad_norm": 1.7091220617294312, "learning_rate": 0.0005613445378151261, "loss": 0.4169, "step": 15771 }, { "epoch": 8.811173184357543, "grad_norm": 0.613426923751831, "learning_rate": 0.0005613165266106443, "loss": 0.773, "step": 15772 }, { "epoch": 8.81173184357542, "grad_norm": 0.6755386590957642, "learning_rate": 0.0005612885154061625, "loss": 0.3302, "step": 15773 }, { "epoch": 8.812290502793296, "grad_norm": 0.6490817070007324, "learning_rate": 0.0005612605042016807, "loss": 0.4201, "step": 15774 }, { "epoch": 8.812849162011172, "grad_norm": 0.8108155131340027, "learning_rate": 0.0005612324929971989, "loss": 0.4452, "step": 15775 }, { "epoch": 8.81340782122905, "grad_norm": 0.4282110929489136, "learning_rate": 0.0005612044817927171, "loss": 0.3913, "step": 15776 }, { "epoch": 8.813966480446927, "grad_norm": 0.38874006271362305, "learning_rate": 0.0005611764705882353, "loss": 0.3941, "step": 15777 }, { "epoch": 8.814525139664804, "grad_norm": 0.5276618003845215, "learning_rate": 0.0005611484593837535, "loss": 0.5142, "step": 15778 }, { "epoch": 8.815083798882682, "grad_norm": 0.504787802696228, "learning_rate": 0.0005611204481792718, "loss": 0.4565, "step": 15779 }, { "epoch": 8.815642458100559, "grad_norm": 0.494769424200058, "learning_rate": 0.0005610924369747899, "loss": 0.4245, "step": 15780 }, { "epoch": 8.816201117318435, "grad_norm": 0.3847169280052185, "learning_rate": 0.0005610644257703081, "loss": 0.4498, "step": 15781 }, { "epoch": 8.816759776536312, "grad_norm": 0.6524850130081177, "learning_rate": 0.0005610364145658263, "loss": 0.4731, "step": 15782 }, { "epoch": 8.81731843575419, "grad_norm": 0.6199373602867126, "learning_rate": 0.0005610084033613445, "loss": 0.4352, "step": 15783 }, { "epoch": 8.817877094972067, "grad_norm": 0.8549118638038635, "learning_rate": 0.0005609803921568628, "loss": 0.4191, "step": 15784 }, { "epoch": 8.818435754189943, "grad_norm": 0.969581127166748, "learning_rate": 0.000560952380952381, "loss": 0.5507, "step": 15785 }, { "epoch": 8.818994413407822, "grad_norm": 0.43300533294677734, "learning_rate": 0.0005609243697478992, "loss": 0.5037, "step": 15786 }, { "epoch": 8.819553072625698, "grad_norm": 0.648641049861908, "learning_rate": 0.0005608963585434174, "loss": 0.3187, "step": 15787 }, { "epoch": 8.820111731843575, "grad_norm": 0.5054218769073486, "learning_rate": 0.0005608683473389356, "loss": 0.4771, "step": 15788 }, { "epoch": 8.820670391061453, "grad_norm": 0.4416903555393219, "learning_rate": 0.0005608403361344539, "loss": 0.4213, "step": 15789 }, { "epoch": 8.82122905027933, "grad_norm": 0.6103943586349487, "learning_rate": 0.000560812324929972, "loss": 0.4273, "step": 15790 }, { "epoch": 8.821787709497206, "grad_norm": 0.5159399509429932, "learning_rate": 0.0005607843137254902, "loss": 0.4427, "step": 15791 }, { "epoch": 8.822346368715085, "grad_norm": 0.5488128662109375, "learning_rate": 0.0005607563025210084, "loss": 0.4177, "step": 15792 }, { "epoch": 8.822905027932961, "grad_norm": 0.6271675229072571, "learning_rate": 0.0005607282913165266, "loss": 0.4656, "step": 15793 }, { "epoch": 8.823463687150838, "grad_norm": 1.1658530235290527, "learning_rate": 0.0005607002801120449, "loss": 0.405, "step": 15794 }, { "epoch": 8.824022346368714, "grad_norm": 0.7646031975746155, "learning_rate": 0.0005606722689075631, "loss": 0.4228, "step": 15795 }, { "epoch": 8.824581005586593, "grad_norm": 0.5794070363044739, "learning_rate": 0.0005606442577030812, "loss": 0.4064, "step": 15796 }, { "epoch": 8.82513966480447, "grad_norm": 0.4996819794178009, "learning_rate": 0.0005606162464985994, "loss": 0.4272, "step": 15797 }, { "epoch": 8.825698324022346, "grad_norm": 0.37560370564460754, "learning_rate": 0.0005605882352941176, "loss": 0.3945, "step": 15798 }, { "epoch": 8.826256983240224, "grad_norm": 0.4004479646682739, "learning_rate": 0.0005605602240896359, "loss": 0.457, "step": 15799 }, { "epoch": 8.8268156424581, "grad_norm": 0.5167516469955444, "learning_rate": 0.0005605322128851541, "loss": 0.4259, "step": 15800 }, { "epoch": 8.827374301675977, "grad_norm": 0.47834140062332153, "learning_rate": 0.0005605042016806722, "loss": 0.3343, "step": 15801 }, { "epoch": 8.827932960893854, "grad_norm": 0.5612804293632507, "learning_rate": 0.0005604761904761904, "loss": 0.344, "step": 15802 }, { "epoch": 8.828491620111732, "grad_norm": 0.5487278699874878, "learning_rate": 0.0005604481792717087, "loss": 0.3641, "step": 15803 }, { "epoch": 8.829050279329609, "grad_norm": 2.5715460777282715, "learning_rate": 0.000560420168067227, "loss": 0.3943, "step": 15804 }, { "epoch": 8.829608938547485, "grad_norm": 0.8560015559196472, "learning_rate": 0.0005603921568627452, "loss": 0.5879, "step": 15805 }, { "epoch": 8.830167597765364, "grad_norm": 0.5358844995498657, "learning_rate": 0.0005603641456582633, "loss": 0.4238, "step": 15806 }, { "epoch": 8.83072625698324, "grad_norm": 0.6962670683860779, "learning_rate": 0.0005603361344537815, "loss": 0.5367, "step": 15807 }, { "epoch": 8.831284916201117, "grad_norm": 1.5312117338180542, "learning_rate": 0.0005603081232492997, "loss": 0.5755, "step": 15808 }, { "epoch": 8.831843575418995, "grad_norm": 2.177212715148926, "learning_rate": 0.000560280112044818, "loss": 0.3926, "step": 15809 }, { "epoch": 8.832402234636872, "grad_norm": 2.0129446983337402, "learning_rate": 0.0005602521008403362, "loss": 0.5054, "step": 15810 }, { "epoch": 8.832960893854748, "grad_norm": 0.6918999552726746, "learning_rate": 0.0005602240896358544, "loss": 0.4311, "step": 15811 }, { "epoch": 8.833519553072625, "grad_norm": 0.4057055413722992, "learning_rate": 0.0005601960784313725, "loss": 0.3703, "step": 15812 }, { "epoch": 8.834078212290503, "grad_norm": 0.5047594904899597, "learning_rate": 0.0005601680672268907, "loss": 0.4717, "step": 15813 }, { "epoch": 8.83463687150838, "grad_norm": 1.7702102661132812, "learning_rate": 0.000560140056022409, "loss": 0.4751, "step": 15814 }, { "epoch": 8.835195530726256, "grad_norm": 0.8252912163734436, "learning_rate": 0.0005601120448179272, "loss": 0.5057, "step": 15815 }, { "epoch": 8.835754189944135, "grad_norm": 0.5314022302627563, "learning_rate": 0.0005600840336134454, "loss": 0.4176, "step": 15816 }, { "epoch": 8.836312849162011, "grad_norm": 0.44567450881004333, "learning_rate": 0.0005600560224089635, "loss": 0.4846, "step": 15817 }, { "epoch": 8.836871508379888, "grad_norm": 0.5232447981834412, "learning_rate": 0.0005600280112044817, "loss": 0.4326, "step": 15818 }, { "epoch": 8.837430167597766, "grad_norm": 0.46586373448371887, "learning_rate": 0.0005600000000000001, "loss": 0.4706, "step": 15819 }, { "epoch": 8.837988826815643, "grad_norm": 1.3477208614349365, "learning_rate": 0.0005599719887955183, "loss": 0.4725, "step": 15820 }, { "epoch": 8.83854748603352, "grad_norm": 0.49078822135925293, "learning_rate": 0.0005599439775910365, "loss": 0.4124, "step": 15821 }, { "epoch": 8.839106145251396, "grad_norm": 0.8453941345214844, "learning_rate": 0.0005599159663865546, "loss": 0.5441, "step": 15822 }, { "epoch": 8.839664804469274, "grad_norm": 0.5877315998077393, "learning_rate": 0.0005598879551820728, "loss": 0.4284, "step": 15823 }, { "epoch": 8.84022346368715, "grad_norm": 1.5403003692626953, "learning_rate": 0.0005598599439775911, "loss": 0.36, "step": 15824 }, { "epoch": 8.840782122905027, "grad_norm": 0.3417825996875763, "learning_rate": 0.0005598319327731093, "loss": 0.3587, "step": 15825 }, { "epoch": 8.841340782122906, "grad_norm": 1.1989957094192505, "learning_rate": 0.0005598039215686275, "loss": 0.4314, "step": 15826 }, { "epoch": 8.841899441340782, "grad_norm": 0.8693377375602722, "learning_rate": 0.0005597759103641457, "loss": 0.5083, "step": 15827 }, { "epoch": 8.842458100558659, "grad_norm": 0.7427593469619751, "learning_rate": 0.0005597478991596638, "loss": 0.4641, "step": 15828 }, { "epoch": 8.843016759776535, "grad_norm": 0.5602688789367676, "learning_rate": 0.0005597198879551821, "loss": 0.4509, "step": 15829 }, { "epoch": 8.843575418994414, "grad_norm": 0.9375371336936951, "learning_rate": 0.0005596918767507003, "loss": 0.4398, "step": 15830 }, { "epoch": 8.84413407821229, "grad_norm": 0.3733755052089691, "learning_rate": 0.0005596638655462185, "loss": 0.4562, "step": 15831 }, { "epoch": 8.844692737430167, "grad_norm": 0.8695487380027771, "learning_rate": 0.0005596358543417367, "loss": 0.4735, "step": 15832 }, { "epoch": 8.845251396648045, "grad_norm": 6.599333763122559, "learning_rate": 0.0005596078431372548, "loss": 0.4304, "step": 15833 }, { "epoch": 8.845810055865922, "grad_norm": 0.41975608468055725, "learning_rate": 0.0005595798319327731, "loss": 0.4463, "step": 15834 }, { "epoch": 8.846368715083798, "grad_norm": 11.498644828796387, "learning_rate": 0.0005595518207282914, "loss": 0.4016, "step": 15835 }, { "epoch": 8.846927374301677, "grad_norm": 0.5879505276679993, "learning_rate": 0.0005595238095238096, "loss": 0.372, "step": 15836 }, { "epoch": 8.847486033519553, "grad_norm": 0.5154557824134827, "learning_rate": 0.0005594957983193278, "loss": 0.5299, "step": 15837 }, { "epoch": 8.84804469273743, "grad_norm": 0.48858073353767395, "learning_rate": 0.0005594677871148459, "loss": 0.4914, "step": 15838 }, { "epoch": 8.848603351955306, "grad_norm": 0.39561858773231506, "learning_rate": 0.0005594397759103642, "loss": 0.3889, "step": 15839 }, { "epoch": 8.849162011173185, "grad_norm": 0.8169729113578796, "learning_rate": 0.0005594117647058824, "loss": 0.3588, "step": 15840 }, { "epoch": 8.849720670391061, "grad_norm": 0.6235904097557068, "learning_rate": 0.0005593837535014006, "loss": 0.4483, "step": 15841 }, { "epoch": 8.850279329608938, "grad_norm": 1.2435609102249146, "learning_rate": 0.0005593557422969188, "loss": 0.4193, "step": 15842 }, { "epoch": 8.850837988826816, "grad_norm": 0.6560278534889221, "learning_rate": 0.000559327731092437, "loss": 0.3958, "step": 15843 }, { "epoch": 8.851396648044693, "grad_norm": 0.5255511403083801, "learning_rate": 0.0005592997198879551, "loss": 0.3439, "step": 15844 }, { "epoch": 8.85195530726257, "grad_norm": 0.4293096363544464, "learning_rate": 0.0005592717086834734, "loss": 0.4303, "step": 15845 }, { "epoch": 8.852513966480448, "grad_norm": 0.5821239352226257, "learning_rate": 0.0005592436974789916, "loss": 0.5027, "step": 15846 }, { "epoch": 8.853072625698324, "grad_norm": 0.5697384476661682, "learning_rate": 0.0005592156862745098, "loss": 0.5261, "step": 15847 }, { "epoch": 8.8536312849162, "grad_norm": 0.44587865471839905, "learning_rate": 0.000559187675070028, "loss": 0.325, "step": 15848 }, { "epoch": 8.854189944134077, "grad_norm": 3.178269624710083, "learning_rate": 0.0005591596638655461, "loss": 0.4744, "step": 15849 }, { "epoch": 8.854748603351956, "grad_norm": 0.4200572669506073, "learning_rate": 0.0005591316526610644, "loss": 0.4273, "step": 15850 }, { "epoch": 8.855307262569832, "grad_norm": 0.4692087769508362, "learning_rate": 0.0005591036414565827, "loss": 0.3946, "step": 15851 }, { "epoch": 8.855865921787709, "grad_norm": 1.0718377828598022, "learning_rate": 0.0005590756302521009, "loss": 0.4253, "step": 15852 }, { "epoch": 8.856424581005587, "grad_norm": 0.47882017493247986, "learning_rate": 0.0005590476190476191, "loss": 0.4089, "step": 15853 }, { "epoch": 8.856983240223464, "grad_norm": 0.4071679711341858, "learning_rate": 0.0005590196078431372, "loss": 0.4351, "step": 15854 }, { "epoch": 8.85754189944134, "grad_norm": 0.37174373865127563, "learning_rate": 0.0005589915966386555, "loss": 0.395, "step": 15855 }, { "epoch": 8.858100558659217, "grad_norm": 0.5214393138885498, "learning_rate": 0.0005589635854341737, "loss": 0.491, "step": 15856 }, { "epoch": 8.858659217877095, "grad_norm": 1.4114837646484375, "learning_rate": 0.0005589355742296919, "loss": 0.5245, "step": 15857 }, { "epoch": 8.859217877094972, "grad_norm": 0.5743383169174194, "learning_rate": 0.0005589075630252101, "loss": 0.4338, "step": 15858 }, { "epoch": 8.859776536312848, "grad_norm": 0.5833740830421448, "learning_rate": 0.0005588795518207283, "loss": 0.4578, "step": 15859 }, { "epoch": 8.860335195530727, "grad_norm": 0.47941040992736816, "learning_rate": 0.0005588515406162465, "loss": 0.4404, "step": 15860 }, { "epoch": 8.860893854748603, "grad_norm": 0.46471959352493286, "learning_rate": 0.0005588235294117647, "loss": 0.3843, "step": 15861 }, { "epoch": 8.86145251396648, "grad_norm": 0.7174744009971619, "learning_rate": 0.0005587955182072829, "loss": 0.4565, "step": 15862 }, { "epoch": 8.862011173184358, "grad_norm": 0.5444117784500122, "learning_rate": 0.0005587675070028011, "loss": 0.4772, "step": 15863 }, { "epoch": 8.862569832402235, "grad_norm": 1.0654371976852417, "learning_rate": 0.0005587394957983193, "loss": 0.4805, "step": 15864 }, { "epoch": 8.863128491620111, "grad_norm": 1.2941851615905762, "learning_rate": 0.0005587114845938375, "loss": 0.4403, "step": 15865 }, { "epoch": 8.86368715083799, "grad_norm": 0.5275970697402954, "learning_rate": 0.0005586834733893557, "loss": 0.4753, "step": 15866 }, { "epoch": 8.864245810055866, "grad_norm": 0.4790375530719757, "learning_rate": 0.000558655462184874, "loss": 0.3831, "step": 15867 }, { "epoch": 8.864804469273743, "grad_norm": 0.368158221244812, "learning_rate": 0.0005586274509803922, "loss": 0.3218, "step": 15868 }, { "epoch": 8.86536312849162, "grad_norm": 0.4192827343940735, "learning_rate": 0.0005585994397759104, "loss": 0.5125, "step": 15869 }, { "epoch": 8.865921787709498, "grad_norm": 0.7649282217025757, "learning_rate": 0.0005585714285714286, "loss": 0.4307, "step": 15870 }, { "epoch": 8.866480446927374, "grad_norm": 0.6270117163658142, "learning_rate": 0.0005585434173669468, "loss": 0.4363, "step": 15871 }, { "epoch": 8.867039106145251, "grad_norm": 0.6097122430801392, "learning_rate": 0.000558515406162465, "loss": 0.418, "step": 15872 }, { "epoch": 8.86759776536313, "grad_norm": 0.4520244896411896, "learning_rate": 0.0005584873949579832, "loss": 0.4579, "step": 15873 }, { "epoch": 8.868156424581006, "grad_norm": 3.2079672813415527, "learning_rate": 0.0005584593837535014, "loss": 0.6236, "step": 15874 }, { "epoch": 8.868715083798882, "grad_norm": 0.6120218634605408, "learning_rate": 0.0005584313725490197, "loss": 0.4458, "step": 15875 }, { "epoch": 8.869273743016759, "grad_norm": 1.532673954963684, "learning_rate": 0.0005584033613445378, "loss": 0.3609, "step": 15876 }, { "epoch": 8.869832402234637, "grad_norm": 0.43884918093681335, "learning_rate": 0.000558375350140056, "loss": 0.4103, "step": 15877 }, { "epoch": 8.870391061452514, "grad_norm": 0.4125737249851227, "learning_rate": 0.0005583473389355742, "loss": 0.374, "step": 15878 }, { "epoch": 8.87094972067039, "grad_norm": 0.4179338216781616, "learning_rate": 0.0005583193277310924, "loss": 0.3871, "step": 15879 }, { "epoch": 8.871508379888269, "grad_norm": 0.4581180512905121, "learning_rate": 0.0005582913165266107, "loss": 0.4421, "step": 15880 }, { "epoch": 8.872067039106145, "grad_norm": 0.5099257230758667, "learning_rate": 0.0005582633053221288, "loss": 0.3815, "step": 15881 }, { "epoch": 8.872625698324022, "grad_norm": 0.5308847427368164, "learning_rate": 0.000558235294117647, "loss": 0.3722, "step": 15882 }, { "epoch": 8.8731843575419, "grad_norm": 0.48744913935661316, "learning_rate": 0.0005582072829131652, "loss": 0.3683, "step": 15883 }, { "epoch": 8.873743016759777, "grad_norm": 1.6459118127822876, "learning_rate": 0.0005581792717086834, "loss": 0.4929, "step": 15884 }, { "epoch": 8.874301675977653, "grad_norm": 1.429837942123413, "learning_rate": 0.0005581512605042018, "loss": 0.4272, "step": 15885 }, { "epoch": 8.87486033519553, "grad_norm": 0.599993109703064, "learning_rate": 0.0005581232492997199, "loss": 0.5076, "step": 15886 }, { "epoch": 8.875418994413408, "grad_norm": 0.9193503856658936, "learning_rate": 0.0005580952380952381, "loss": 0.6935, "step": 15887 }, { "epoch": 8.875977653631285, "grad_norm": 0.4025314748287201, "learning_rate": 0.0005580672268907563, "loss": 0.3597, "step": 15888 }, { "epoch": 8.876536312849161, "grad_norm": 0.8811167478561401, "learning_rate": 0.0005580392156862745, "loss": 0.4429, "step": 15889 }, { "epoch": 8.87709497206704, "grad_norm": 0.6658065319061279, "learning_rate": 0.0005580112044817928, "loss": 0.4109, "step": 15890 }, { "epoch": 8.877653631284916, "grad_norm": 1.5478134155273438, "learning_rate": 0.000557983193277311, "loss": 0.505, "step": 15891 }, { "epoch": 8.878212290502793, "grad_norm": 0.36211755871772766, "learning_rate": 0.0005579551820728291, "loss": 0.4024, "step": 15892 }, { "epoch": 8.878770949720671, "grad_norm": 0.9082402586936951, "learning_rate": 0.0005579271708683473, "loss": 0.3936, "step": 15893 }, { "epoch": 8.879329608938548, "grad_norm": 0.8617127537727356, "learning_rate": 0.0005578991596638655, "loss": 0.4812, "step": 15894 }, { "epoch": 8.879888268156424, "grad_norm": 1.3491594791412354, "learning_rate": 0.0005578711484593838, "loss": 0.4301, "step": 15895 }, { "epoch": 8.880446927374301, "grad_norm": 0.7563647031784058, "learning_rate": 0.000557843137254902, "loss": 0.458, "step": 15896 }, { "epoch": 8.88100558659218, "grad_norm": 18.068227767944336, "learning_rate": 0.0005578151260504201, "loss": 0.5537, "step": 15897 }, { "epoch": 8.881564245810056, "grad_norm": 1.4140278100967407, "learning_rate": 0.0005577871148459383, "loss": 0.5839, "step": 15898 }, { "epoch": 8.882122905027932, "grad_norm": 1.5591644048690796, "learning_rate": 0.0005577591036414565, "loss": 0.457, "step": 15899 }, { "epoch": 8.88268156424581, "grad_norm": 0.5397035479545593, "learning_rate": 0.0005577310924369749, "loss": 0.3799, "step": 15900 }, { "epoch": 8.883240223463687, "grad_norm": 4.0931243896484375, "learning_rate": 0.0005577030812324931, "loss": 0.7184, "step": 15901 }, { "epoch": 8.883798882681564, "grad_norm": 0.6044573783874512, "learning_rate": 0.0005576750700280112, "loss": 0.4119, "step": 15902 }, { "epoch": 8.88435754189944, "grad_norm": 1.6572458744049072, "learning_rate": 0.0005576470588235294, "loss": 0.3577, "step": 15903 }, { "epoch": 8.884916201117319, "grad_norm": 0.4851267337799072, "learning_rate": 0.0005576190476190476, "loss": 0.3608, "step": 15904 }, { "epoch": 8.885474860335195, "grad_norm": 0.48618993163108826, "learning_rate": 0.0005575910364145659, "loss": 0.4901, "step": 15905 }, { "epoch": 8.886033519553072, "grad_norm": 0.5448348522186279, "learning_rate": 0.0005575630252100841, "loss": 0.4887, "step": 15906 }, { "epoch": 8.88659217877095, "grad_norm": 3.0821704864501953, "learning_rate": 0.0005575350140056023, "loss": 0.4261, "step": 15907 }, { "epoch": 8.887150837988827, "grad_norm": 1.0967351198196411, "learning_rate": 0.0005575070028011204, "loss": 0.3791, "step": 15908 }, { "epoch": 8.887709497206703, "grad_norm": 0.4589420557022095, "learning_rate": 0.0005574789915966386, "loss": 0.5087, "step": 15909 }, { "epoch": 8.888268156424582, "grad_norm": 0.559450089931488, "learning_rate": 0.0005574509803921569, "loss": 0.3874, "step": 15910 }, { "epoch": 8.888826815642458, "grad_norm": 0.5953260660171509, "learning_rate": 0.0005574229691876751, "loss": 0.4488, "step": 15911 }, { "epoch": 8.889385474860335, "grad_norm": 0.7041288614273071, "learning_rate": 0.0005573949579831933, "loss": 0.4065, "step": 15912 }, { "epoch": 8.889944134078211, "grad_norm": 0.45519065856933594, "learning_rate": 0.0005573669467787114, "loss": 0.4226, "step": 15913 }, { "epoch": 8.89050279329609, "grad_norm": 0.41252848505973816, "learning_rate": 0.0005573389355742296, "loss": 0.3932, "step": 15914 }, { "epoch": 8.891061452513966, "grad_norm": 0.6071236729621887, "learning_rate": 0.0005573109243697479, "loss": 0.4147, "step": 15915 }, { "epoch": 8.891620111731843, "grad_norm": 0.4969694912433624, "learning_rate": 0.0005572829131652661, "loss": 0.399, "step": 15916 }, { "epoch": 8.892178770949721, "grad_norm": 0.8268481492996216, "learning_rate": 0.0005572549019607844, "loss": 0.5349, "step": 15917 }, { "epoch": 8.892737430167598, "grad_norm": 0.5528133511543274, "learning_rate": 0.0005572268907563025, "loss": 0.3975, "step": 15918 }, { "epoch": 8.893296089385474, "grad_norm": 0.5288232564926147, "learning_rate": 0.0005571988795518207, "loss": 0.4877, "step": 15919 }, { "epoch": 8.893854748603353, "grad_norm": 0.6598970293998718, "learning_rate": 0.000557170868347339, "loss": 0.3626, "step": 15920 }, { "epoch": 8.89441340782123, "grad_norm": 0.36831241846084595, "learning_rate": 0.0005571428571428572, "loss": 0.3136, "step": 15921 }, { "epoch": 8.894972067039106, "grad_norm": 1.1029791831970215, "learning_rate": 0.0005571148459383754, "loss": 0.5374, "step": 15922 }, { "epoch": 8.895530726256982, "grad_norm": 0.5720617771148682, "learning_rate": 0.0005570868347338936, "loss": 0.4872, "step": 15923 }, { "epoch": 8.89608938547486, "grad_norm": 0.397910475730896, "learning_rate": 0.0005570588235294117, "loss": 0.3855, "step": 15924 }, { "epoch": 8.896648044692737, "grad_norm": 0.981838583946228, "learning_rate": 0.00055703081232493, "loss": 0.4459, "step": 15925 }, { "epoch": 8.897206703910614, "grad_norm": 0.5285311937332153, "learning_rate": 0.0005570028011204482, "loss": 0.3435, "step": 15926 }, { "epoch": 8.897765363128492, "grad_norm": 0.6074533462524414, "learning_rate": 0.0005569747899159664, "loss": 0.4337, "step": 15927 }, { "epoch": 8.898324022346369, "grad_norm": 0.7320986390113831, "learning_rate": 0.0005569467787114846, "loss": 0.5181, "step": 15928 }, { "epoch": 8.898882681564245, "grad_norm": 0.6360154151916504, "learning_rate": 0.0005569187675070027, "loss": 0.6402, "step": 15929 }, { "epoch": 8.899441340782122, "grad_norm": 0.6049297451972961, "learning_rate": 0.000556890756302521, "loss": 0.4989, "step": 15930 }, { "epoch": 8.9, "grad_norm": 1.0205636024475098, "learning_rate": 0.0005568627450980392, "loss": 0.5307, "step": 15931 }, { "epoch": 8.900558659217877, "grad_norm": 0.656028151512146, "learning_rate": 0.0005568347338935574, "loss": 0.4034, "step": 15932 }, { "epoch": 8.901117318435753, "grad_norm": 0.5328769683837891, "learning_rate": 0.0005568067226890757, "loss": 0.5032, "step": 15933 }, { "epoch": 8.901675977653632, "grad_norm": 0.5702680349349976, "learning_rate": 0.0005567787114845937, "loss": 0.4325, "step": 15934 }, { "epoch": 8.902234636871508, "grad_norm": 0.48292964696884155, "learning_rate": 0.0005567507002801121, "loss": 0.4008, "step": 15935 }, { "epoch": 8.902793296089385, "grad_norm": 0.6621236205101013, "learning_rate": 0.0005567226890756303, "loss": 0.502, "step": 15936 }, { "epoch": 8.903351955307263, "grad_norm": 0.43324798345565796, "learning_rate": 0.0005566946778711485, "loss": 0.4301, "step": 15937 }, { "epoch": 8.90391061452514, "grad_norm": 7.006535530090332, "learning_rate": 0.0005566666666666667, "loss": 0.4692, "step": 15938 }, { "epoch": 8.904469273743016, "grad_norm": 0.6407557129859924, "learning_rate": 0.0005566386554621849, "loss": 0.6233, "step": 15939 }, { "epoch": 8.905027932960895, "grad_norm": 0.6427462100982666, "learning_rate": 0.0005566106442577031, "loss": 0.3878, "step": 15940 }, { "epoch": 8.905586592178771, "grad_norm": 1.8678460121154785, "learning_rate": 0.0005565826330532213, "loss": 0.4084, "step": 15941 }, { "epoch": 8.906145251396648, "grad_norm": 0.40178200602531433, "learning_rate": 0.0005565546218487395, "loss": 0.3604, "step": 15942 }, { "epoch": 8.906703910614524, "grad_norm": 0.4935017228126526, "learning_rate": 0.0005565266106442577, "loss": 0.4652, "step": 15943 }, { "epoch": 8.907262569832403, "grad_norm": 1.5065382719039917, "learning_rate": 0.0005564985994397759, "loss": 0.5144, "step": 15944 }, { "epoch": 8.90782122905028, "grad_norm": 0.5022974610328674, "learning_rate": 0.0005564705882352941, "loss": 0.5253, "step": 15945 }, { "epoch": 8.908379888268156, "grad_norm": 0.5157644748687744, "learning_rate": 0.0005564425770308123, "loss": 0.5352, "step": 15946 }, { "epoch": 8.908938547486034, "grad_norm": 1.0089850425720215, "learning_rate": 0.0005564145658263305, "loss": 0.5403, "step": 15947 }, { "epoch": 8.90949720670391, "grad_norm": 0.6091569662094116, "learning_rate": 0.0005563865546218487, "loss": 0.4576, "step": 15948 }, { "epoch": 8.910055865921787, "grad_norm": 0.5464770793914795, "learning_rate": 0.000556358543417367, "loss": 0.346, "step": 15949 }, { "epoch": 8.910614525139664, "grad_norm": 1.5108433961868286, "learning_rate": 0.0005563305322128853, "loss": 0.4025, "step": 15950 }, { "epoch": 8.911173184357542, "grad_norm": 0.8324965238571167, "learning_rate": 0.0005563025210084034, "loss": 0.5877, "step": 15951 }, { "epoch": 8.911731843575419, "grad_norm": 0.7443004846572876, "learning_rate": 0.0005562745098039216, "loss": 0.4558, "step": 15952 }, { "epoch": 8.912290502793295, "grad_norm": 0.6209191679954529, "learning_rate": 0.0005562464985994398, "loss": 0.4783, "step": 15953 }, { "epoch": 8.912849162011174, "grad_norm": 0.5537817478179932, "learning_rate": 0.000556218487394958, "loss": 0.4153, "step": 15954 }, { "epoch": 8.91340782122905, "grad_norm": 0.6437365412712097, "learning_rate": 0.0005561904761904763, "loss": 0.4323, "step": 15955 }, { "epoch": 8.913966480446927, "grad_norm": 0.5271468758583069, "learning_rate": 0.0005561624649859944, "loss": 0.426, "step": 15956 }, { "epoch": 8.914525139664804, "grad_norm": 1.0343917608261108, "learning_rate": 0.0005561344537815126, "loss": 0.5515, "step": 15957 }, { "epoch": 8.915083798882682, "grad_norm": 1.8202875852584839, "learning_rate": 0.0005561064425770308, "loss": 0.4184, "step": 15958 }, { "epoch": 8.915642458100558, "grad_norm": 0.5937560200691223, "learning_rate": 0.000556078431372549, "loss": 0.4586, "step": 15959 }, { "epoch": 8.916201117318435, "grad_norm": 1.5224997997283936, "learning_rate": 0.0005560504201680673, "loss": 0.4259, "step": 15960 }, { "epoch": 8.916759776536313, "grad_norm": 0.7288526296615601, "learning_rate": 0.0005560224089635854, "loss": 0.4676, "step": 15961 }, { "epoch": 8.91731843575419, "grad_norm": 0.4420980215072632, "learning_rate": 0.0005559943977591036, "loss": 0.4393, "step": 15962 }, { "epoch": 8.917877094972066, "grad_norm": 0.5820836424827576, "learning_rate": 0.0005559663865546218, "loss": 0.3767, "step": 15963 }, { "epoch": 8.918435754189945, "grad_norm": 0.5217689871788025, "learning_rate": 0.00055593837535014, "loss": 0.379, "step": 15964 }, { "epoch": 8.918994413407821, "grad_norm": 0.3979574143886566, "learning_rate": 0.0005559103641456584, "loss": 0.4204, "step": 15965 }, { "epoch": 8.919553072625698, "grad_norm": 0.5714628100395203, "learning_rate": 0.0005558823529411766, "loss": 0.5348, "step": 15966 }, { "epoch": 8.920111731843576, "grad_norm": 1.7164430618286133, "learning_rate": 0.0005558543417366947, "loss": 0.5723, "step": 15967 }, { "epoch": 8.920670391061453, "grad_norm": 0.4394230544567108, "learning_rate": 0.0005558263305322129, "loss": 0.3848, "step": 15968 }, { "epoch": 8.92122905027933, "grad_norm": 0.6123559474945068, "learning_rate": 0.0005557983193277311, "loss": 0.4742, "step": 15969 }, { "epoch": 8.921787709497206, "grad_norm": 1.2475509643554688, "learning_rate": 0.0005557703081232494, "loss": 0.4408, "step": 15970 }, { "epoch": 8.922346368715084, "grad_norm": 0.471023291349411, "learning_rate": 0.0005557422969187676, "loss": 0.4743, "step": 15971 }, { "epoch": 8.922905027932961, "grad_norm": 0.6953405141830444, "learning_rate": 0.0005557142857142857, "loss": 0.4367, "step": 15972 }, { "epoch": 8.923463687150837, "grad_norm": 0.681628406047821, "learning_rate": 0.0005556862745098039, "loss": 0.4739, "step": 15973 }, { "epoch": 8.924022346368716, "grad_norm": 0.6660521030426025, "learning_rate": 0.0005556582633053221, "loss": 0.3252, "step": 15974 }, { "epoch": 8.924581005586592, "grad_norm": 0.42581498622894287, "learning_rate": 0.0005556302521008404, "loss": 0.4024, "step": 15975 }, { "epoch": 8.925139664804469, "grad_norm": 0.4710841476917267, "learning_rate": 0.0005556022408963586, "loss": 0.3389, "step": 15976 }, { "epoch": 8.925698324022346, "grad_norm": 0.423613578081131, "learning_rate": 0.0005555742296918767, "loss": 0.4655, "step": 15977 }, { "epoch": 8.926256983240224, "grad_norm": 1.524860143661499, "learning_rate": 0.0005555462184873949, "loss": 0.4266, "step": 15978 }, { "epoch": 8.9268156424581, "grad_norm": 0.8713619709014893, "learning_rate": 0.0005555182072829131, "loss": 0.4722, "step": 15979 }, { "epoch": 8.927374301675977, "grad_norm": 0.7668302059173584, "learning_rate": 0.0005554901960784314, "loss": 0.5078, "step": 15980 }, { "epoch": 8.927932960893855, "grad_norm": 0.6430107951164246, "learning_rate": 0.0005554621848739496, "loss": 0.4857, "step": 15981 }, { "epoch": 8.928491620111732, "grad_norm": 0.4193095862865448, "learning_rate": 0.0005554341736694679, "loss": 0.4686, "step": 15982 }, { "epoch": 8.929050279329608, "grad_norm": 1.3140244483947754, "learning_rate": 0.000555406162464986, "loss": 0.3749, "step": 15983 }, { "epoch": 8.929608938547487, "grad_norm": 0.3882800340652466, "learning_rate": 0.0005553781512605042, "loss": 0.4179, "step": 15984 }, { "epoch": 8.930167597765363, "grad_norm": 0.4938243329524994, "learning_rate": 0.0005553501400560225, "loss": 0.3634, "step": 15985 }, { "epoch": 8.93072625698324, "grad_norm": 0.5283041596412659, "learning_rate": 0.0005553221288515407, "loss": 0.3488, "step": 15986 }, { "epoch": 8.931284916201117, "grad_norm": 0.862450122833252, "learning_rate": 0.0005552941176470589, "loss": 0.54, "step": 15987 }, { "epoch": 8.931843575418995, "grad_norm": 1.287535309791565, "learning_rate": 0.000555266106442577, "loss": 0.4541, "step": 15988 }, { "epoch": 8.932402234636871, "grad_norm": 0.6303169131278992, "learning_rate": 0.0005552380952380952, "loss": 0.5074, "step": 15989 }, { "epoch": 8.932960893854748, "grad_norm": 0.38338854908943176, "learning_rate": 0.0005552100840336135, "loss": 0.4274, "step": 15990 }, { "epoch": 8.933519553072626, "grad_norm": 0.7298899292945862, "learning_rate": 0.0005551820728291317, "loss": 0.5025, "step": 15991 }, { "epoch": 8.934078212290503, "grad_norm": 0.4986940920352936, "learning_rate": 0.0005551540616246499, "loss": 0.4693, "step": 15992 }, { "epoch": 8.93463687150838, "grad_norm": 0.4189278483390808, "learning_rate": 0.000555126050420168, "loss": 0.4832, "step": 15993 }, { "epoch": 8.935195530726258, "grad_norm": 0.601718544960022, "learning_rate": 0.0005550980392156862, "loss": 0.5076, "step": 15994 }, { "epoch": 8.935754189944134, "grad_norm": 0.5314221978187561, "learning_rate": 0.0005550700280112045, "loss": 0.4861, "step": 15995 }, { "epoch": 8.936312849162011, "grad_norm": 0.3965783417224884, "learning_rate": 0.0005550420168067227, "loss": 0.3447, "step": 15996 }, { "epoch": 8.936871508379888, "grad_norm": 0.48908886313438416, "learning_rate": 0.0005550140056022409, "loss": 0.4221, "step": 15997 }, { "epoch": 8.937430167597766, "grad_norm": 0.4414675235748291, "learning_rate": 0.0005549859943977591, "loss": 0.5481, "step": 15998 }, { "epoch": 8.937988826815642, "grad_norm": 0.449522465467453, "learning_rate": 0.0005549579831932772, "loss": 0.4437, "step": 15999 }, { "epoch": 8.938547486033519, "grad_norm": 0.687625527381897, "learning_rate": 0.0005549299719887956, "loss": 0.3812, "step": 16000 }, { "epoch": 8.938547486033519, "eval_cer": 0.08986067013627486, "eval_loss": 0.338677316904068, "eval_runtime": 59.3212, "eval_samples_per_second": 76.499, "eval_steps_per_second": 4.787, "eval_wer": 0.3547945588112064, "step": 16000 }, { "epoch": 8.939106145251397, "grad_norm": 0.4118690490722656, "learning_rate": 0.0005549019607843138, "loss": 0.4363, "step": 16001 }, { "epoch": 8.939664804469274, "grad_norm": 0.7898194789886475, "learning_rate": 0.000554873949579832, "loss": 0.4652, "step": 16002 }, { "epoch": 8.94022346368715, "grad_norm": 0.6410890817642212, "learning_rate": 0.0005548459383753502, "loss": 0.5963, "step": 16003 }, { "epoch": 8.940782122905027, "grad_norm": 0.6669525504112244, "learning_rate": 0.0005548179271708683, "loss": 0.4695, "step": 16004 }, { "epoch": 8.941340782122905, "grad_norm": 0.4108714759349823, "learning_rate": 0.0005547899159663866, "loss": 0.414, "step": 16005 }, { "epoch": 8.941899441340782, "grad_norm": 2.204586982727051, "learning_rate": 0.0005547619047619048, "loss": 0.4554, "step": 16006 }, { "epoch": 8.942458100558659, "grad_norm": 0.6554802060127258, "learning_rate": 0.000554733893557423, "loss": 0.5453, "step": 16007 }, { "epoch": 8.943016759776537, "grad_norm": 1.2897108793258667, "learning_rate": 0.0005547058823529412, "loss": 0.5655, "step": 16008 }, { "epoch": 8.943575418994413, "grad_norm": 0.6379144787788391, "learning_rate": 0.0005546778711484593, "loss": 0.4617, "step": 16009 }, { "epoch": 8.94413407821229, "grad_norm": 0.41212019324302673, "learning_rate": 0.0005546498599439776, "loss": 0.3335, "step": 16010 }, { "epoch": 8.944692737430168, "grad_norm": 7.185947418212891, "learning_rate": 0.0005546218487394958, "loss": 0.4866, "step": 16011 }, { "epoch": 8.945251396648045, "grad_norm": 0.4587954580783844, "learning_rate": 0.000554593837535014, "loss": 0.4626, "step": 16012 }, { "epoch": 8.945810055865921, "grad_norm": 2.7650115489959717, "learning_rate": 0.0005545658263305322, "loss": 0.4682, "step": 16013 }, { "epoch": 8.946368715083798, "grad_norm": 1.127885341644287, "learning_rate": 0.0005545378151260504, "loss": 0.3877, "step": 16014 }, { "epoch": 8.946927374301676, "grad_norm": 0.8281756043434143, "learning_rate": 0.0005545098039215687, "loss": 0.4863, "step": 16015 }, { "epoch": 8.947486033519553, "grad_norm": 0.5242045521736145, "learning_rate": 0.0005544817927170869, "loss": 0.4594, "step": 16016 }, { "epoch": 8.94804469273743, "grad_norm": 0.6643726229667664, "learning_rate": 0.0005544537815126051, "loss": 0.5377, "step": 16017 }, { "epoch": 8.948603351955308, "grad_norm": 0.4242427349090576, "learning_rate": 0.0005544257703081233, "loss": 0.4764, "step": 16018 }, { "epoch": 8.949162011173184, "grad_norm": 0.6682928800582886, "learning_rate": 0.0005543977591036415, "loss": 0.481, "step": 16019 }, { "epoch": 8.949720670391061, "grad_norm": 0.3470328748226166, "learning_rate": 0.0005543697478991597, "loss": 0.4326, "step": 16020 }, { "epoch": 8.95027932960894, "grad_norm": 0.46993130445480347, "learning_rate": 0.0005543417366946779, "loss": 0.4656, "step": 16021 }, { "epoch": 8.950837988826816, "grad_norm": 0.49074608087539673, "learning_rate": 0.0005543137254901961, "loss": 0.4227, "step": 16022 }, { "epoch": 8.951396648044692, "grad_norm": 0.39384642243385315, "learning_rate": 0.0005542857142857143, "loss": 0.385, "step": 16023 }, { "epoch": 8.951955307262569, "grad_norm": 0.42947933077812195, "learning_rate": 0.0005542577030812325, "loss": 0.3922, "step": 16024 }, { "epoch": 8.952513966480447, "grad_norm": 0.5113601684570312, "learning_rate": 0.0005542296918767507, "loss": 0.4506, "step": 16025 }, { "epoch": 8.953072625698324, "grad_norm": 0.8232506513595581, "learning_rate": 0.0005542016806722689, "loss": 0.5329, "step": 16026 }, { "epoch": 8.9536312849162, "grad_norm": 0.8880576491355896, "learning_rate": 0.0005541736694677871, "loss": 0.4885, "step": 16027 }, { "epoch": 8.954189944134079, "grad_norm": 0.8181654810905457, "learning_rate": 0.0005541456582633053, "loss": 0.5341, "step": 16028 }, { "epoch": 8.954748603351955, "grad_norm": 0.6600484848022461, "learning_rate": 0.0005541176470588235, "loss": 0.4963, "step": 16029 }, { "epoch": 8.955307262569832, "grad_norm": 0.5024636387825012, "learning_rate": 0.0005540896358543418, "loss": 0.4829, "step": 16030 }, { "epoch": 8.955865921787709, "grad_norm": 0.7213287353515625, "learning_rate": 0.00055406162464986, "loss": 0.3964, "step": 16031 }, { "epoch": 8.956424581005587, "grad_norm": 0.6209570169448853, "learning_rate": 0.0005540336134453782, "loss": 0.4451, "step": 16032 }, { "epoch": 8.956983240223463, "grad_norm": 0.5490360856056213, "learning_rate": 0.0005540056022408964, "loss": 0.3946, "step": 16033 }, { "epoch": 8.95754189944134, "grad_norm": 0.43646201491355896, "learning_rate": 0.0005539775910364146, "loss": 0.451, "step": 16034 }, { "epoch": 8.958100558659218, "grad_norm": 3.0522241592407227, "learning_rate": 0.0005539495798319329, "loss": 0.6898, "step": 16035 }, { "epoch": 8.958659217877095, "grad_norm": 1.25969398021698, "learning_rate": 0.000553921568627451, "loss": 0.3744, "step": 16036 }, { "epoch": 8.959217877094972, "grad_norm": 0.4619274139404297, "learning_rate": 0.0005538935574229692, "loss": 0.4291, "step": 16037 }, { "epoch": 8.95977653631285, "grad_norm": 0.9642127752304077, "learning_rate": 0.0005538655462184874, "loss": 0.3878, "step": 16038 }, { "epoch": 8.960335195530726, "grad_norm": 0.614017903804779, "learning_rate": 0.0005538375350140056, "loss": 0.4823, "step": 16039 }, { "epoch": 8.960893854748603, "grad_norm": 0.9552125930786133, "learning_rate": 0.0005538095238095239, "loss": 0.4313, "step": 16040 }, { "epoch": 8.961452513966481, "grad_norm": 0.49200764298439026, "learning_rate": 0.000553781512605042, "loss": 0.4594, "step": 16041 }, { "epoch": 8.962011173184358, "grad_norm": 0.751595139503479, "learning_rate": 0.0005537535014005602, "loss": 0.4385, "step": 16042 }, { "epoch": 8.962569832402234, "grad_norm": 0.6982694268226624, "learning_rate": 0.0005537254901960784, "loss": 0.3801, "step": 16043 }, { "epoch": 8.963128491620111, "grad_norm": 1.366797924041748, "learning_rate": 0.0005536974789915966, "loss": 0.4394, "step": 16044 }, { "epoch": 8.96368715083799, "grad_norm": 1.549389362335205, "learning_rate": 0.0005536694677871149, "loss": 0.4521, "step": 16045 }, { "epoch": 8.964245810055866, "grad_norm": 0.3726401925086975, "learning_rate": 0.0005536414565826331, "loss": 0.3531, "step": 16046 }, { "epoch": 8.964804469273743, "grad_norm": 0.4503166675567627, "learning_rate": 0.0005536134453781512, "loss": 0.4028, "step": 16047 }, { "epoch": 8.96536312849162, "grad_norm": 0.6732364892959595, "learning_rate": 0.0005535854341736694, "loss": 0.4026, "step": 16048 }, { "epoch": 8.965921787709497, "grad_norm": 4.926407337188721, "learning_rate": 0.0005535574229691877, "loss": 0.3675, "step": 16049 }, { "epoch": 8.966480446927374, "grad_norm": 0.6232845187187195, "learning_rate": 0.000553529411764706, "loss": 0.6547, "step": 16050 }, { "epoch": 8.96703910614525, "grad_norm": 0.5606221556663513, "learning_rate": 0.0005535014005602242, "loss": 0.5447, "step": 16051 }, { "epoch": 8.967597765363129, "grad_norm": 1.1458344459533691, "learning_rate": 0.0005534733893557423, "loss": 0.5058, "step": 16052 }, { "epoch": 8.968156424581005, "grad_norm": 0.7482420206069946, "learning_rate": 0.0005534453781512605, "loss": 0.4652, "step": 16053 }, { "epoch": 8.968715083798882, "grad_norm": 0.4654027223587036, "learning_rate": 0.0005534173669467787, "loss": 0.5068, "step": 16054 }, { "epoch": 8.96927374301676, "grad_norm": 0.5109484791755676, "learning_rate": 0.000553389355742297, "loss": 0.4004, "step": 16055 }, { "epoch": 8.969832402234637, "grad_norm": 0.4242281913757324, "learning_rate": 0.0005533613445378152, "loss": 0.367, "step": 16056 }, { "epoch": 8.970391061452514, "grad_norm": 0.46628156304359436, "learning_rate": 0.0005533333333333333, "loss": 0.5677, "step": 16057 }, { "epoch": 8.970949720670392, "grad_norm": 0.6533046364784241, "learning_rate": 0.0005533053221288515, "loss": 0.6275, "step": 16058 }, { "epoch": 8.971508379888268, "grad_norm": 0.3921302258968353, "learning_rate": 0.0005532773109243697, "loss": 0.2887, "step": 16059 }, { "epoch": 8.972067039106145, "grad_norm": 0.5315089225769043, "learning_rate": 0.000553249299719888, "loss": 0.4757, "step": 16060 }, { "epoch": 8.972625698324022, "grad_norm": 0.6399401426315308, "learning_rate": 0.0005532212885154062, "loss": 0.3979, "step": 16061 }, { "epoch": 8.9731843575419, "grad_norm": 0.5833736658096313, "learning_rate": 0.0005531932773109244, "loss": 0.4549, "step": 16062 }, { "epoch": 8.973743016759776, "grad_norm": 0.5431896448135376, "learning_rate": 0.0005531652661064425, "loss": 0.4373, "step": 16063 }, { "epoch": 8.974301675977653, "grad_norm": 0.4164978265762329, "learning_rate": 0.0005531372549019607, "loss": 0.3365, "step": 16064 }, { "epoch": 8.974860335195531, "grad_norm": 0.27505579590797424, "learning_rate": 0.000553109243697479, "loss": 0.2261, "step": 16065 }, { "epoch": 8.975418994413408, "grad_norm": 0.4429325461387634, "learning_rate": 0.0005530812324929973, "loss": 0.3987, "step": 16066 }, { "epoch": 8.975977653631285, "grad_norm": 0.5422666072845459, "learning_rate": 0.0005530532212885155, "loss": 0.5242, "step": 16067 }, { "epoch": 8.976536312849163, "grad_norm": 0.4366532266139984, "learning_rate": 0.0005530252100840336, "loss": 0.3941, "step": 16068 }, { "epoch": 8.97709497206704, "grad_norm": 3.298611640930176, "learning_rate": 0.0005529971988795518, "loss": 0.3795, "step": 16069 }, { "epoch": 8.977653631284916, "grad_norm": 0.433038592338562, "learning_rate": 0.00055296918767507, "loss": 0.4503, "step": 16070 }, { "epoch": 8.978212290502793, "grad_norm": 0.38121265172958374, "learning_rate": 0.0005529411764705883, "loss": 0.4205, "step": 16071 }, { "epoch": 8.978770949720671, "grad_norm": 0.4107948839664459, "learning_rate": 0.0005529131652661065, "loss": 0.4777, "step": 16072 }, { "epoch": 8.979329608938547, "grad_norm": 19.80625343322754, "learning_rate": 0.0005528851540616246, "loss": 0.4442, "step": 16073 }, { "epoch": 8.979888268156424, "grad_norm": 0.3977179527282715, "learning_rate": 0.0005528571428571428, "loss": 0.3453, "step": 16074 }, { "epoch": 8.980446927374302, "grad_norm": 0.5534055829048157, "learning_rate": 0.000552829131652661, "loss": 0.3843, "step": 16075 }, { "epoch": 8.981005586592179, "grad_norm": 0.8614566922187805, "learning_rate": 0.0005528011204481793, "loss": 0.4216, "step": 16076 }, { "epoch": 8.981564245810056, "grad_norm": 4.549533367156982, "learning_rate": 0.0005527731092436975, "loss": 0.37, "step": 16077 }, { "epoch": 8.982122905027932, "grad_norm": 0.49782413244247437, "learning_rate": 0.0005527450980392157, "loss": 0.5538, "step": 16078 }, { "epoch": 8.98268156424581, "grad_norm": 0.4126283824443817, "learning_rate": 0.0005527170868347338, "loss": 0.4421, "step": 16079 }, { "epoch": 8.983240223463687, "grad_norm": 1.023959994316101, "learning_rate": 0.000552689075630252, "loss": 0.4062, "step": 16080 }, { "epoch": 8.983798882681564, "grad_norm": 0.8352178931236267, "learning_rate": 0.0005526610644257704, "loss": 0.5029, "step": 16081 }, { "epoch": 8.984357541899442, "grad_norm": 2.0720584392547607, "learning_rate": 0.0005526330532212886, "loss": 0.4871, "step": 16082 }, { "epoch": 8.984916201117318, "grad_norm": 0.6406990885734558, "learning_rate": 0.0005526050420168068, "loss": 0.3551, "step": 16083 }, { "epoch": 8.985474860335195, "grad_norm": 0.478938490152359, "learning_rate": 0.0005525770308123249, "loss": 0.497, "step": 16084 }, { "epoch": 8.986033519553073, "grad_norm": 0.4386378824710846, "learning_rate": 0.0005525490196078431, "loss": 0.3771, "step": 16085 }, { "epoch": 8.98659217877095, "grad_norm": 0.6012040972709656, "learning_rate": 0.0005525210084033614, "loss": 0.4388, "step": 16086 }, { "epoch": 8.987150837988827, "grad_norm": 0.4123627245426178, "learning_rate": 0.0005524929971988796, "loss": 0.4408, "step": 16087 }, { "epoch": 8.987709497206703, "grad_norm": 0.43089112639427185, "learning_rate": 0.0005524649859943978, "loss": 0.3995, "step": 16088 }, { "epoch": 8.988268156424581, "grad_norm": 0.42207980155944824, "learning_rate": 0.0005524369747899159, "loss": 0.4455, "step": 16089 }, { "epoch": 8.988826815642458, "grad_norm": 3.493142604827881, "learning_rate": 0.0005524089635854341, "loss": 0.3508, "step": 16090 }, { "epoch": 8.989385474860335, "grad_norm": 0.535279393196106, "learning_rate": 0.0005523809523809524, "loss": 0.4705, "step": 16091 }, { "epoch": 8.989944134078213, "grad_norm": 0.453834593296051, "learning_rate": 0.0005523529411764706, "loss": 0.3667, "step": 16092 }, { "epoch": 8.99050279329609, "grad_norm": 1.633609652519226, "learning_rate": 0.0005523249299719888, "loss": 0.4161, "step": 16093 }, { "epoch": 8.991061452513966, "grad_norm": 0.48708850145339966, "learning_rate": 0.000552296918767507, "loss": 0.4177, "step": 16094 }, { "epoch": 8.991620111731844, "grad_norm": 0.6837671995162964, "learning_rate": 0.0005522689075630251, "loss": 0.545, "step": 16095 }, { "epoch": 8.992178770949721, "grad_norm": 0.40899500250816345, "learning_rate": 0.0005522408963585434, "loss": 0.3441, "step": 16096 }, { "epoch": 8.992737430167598, "grad_norm": 0.4136612117290497, "learning_rate": 0.0005522128851540617, "loss": 0.2907, "step": 16097 }, { "epoch": 8.993296089385474, "grad_norm": 0.4239876866340637, "learning_rate": 0.0005521848739495799, "loss": 0.4237, "step": 16098 }, { "epoch": 8.993854748603352, "grad_norm": 0.6510956883430481, "learning_rate": 0.0005521568627450981, "loss": 0.4515, "step": 16099 }, { "epoch": 8.994413407821229, "grad_norm": 0.4541299641132355, "learning_rate": 0.0005521288515406162, "loss": 0.4396, "step": 16100 }, { "epoch": 8.994972067039106, "grad_norm": 0.4164002537727356, "learning_rate": 0.0005521008403361345, "loss": 0.4, "step": 16101 }, { "epoch": 8.995530726256984, "grad_norm": 0.35673004388809204, "learning_rate": 0.0005520728291316527, "loss": 0.4109, "step": 16102 }, { "epoch": 8.99608938547486, "grad_norm": 0.8713533282279968, "learning_rate": 0.0005520448179271709, "loss": 0.4883, "step": 16103 }, { "epoch": 8.996648044692737, "grad_norm": 0.5718778967857361, "learning_rate": 0.0005520168067226891, "loss": 0.4175, "step": 16104 }, { "epoch": 8.997206703910614, "grad_norm": 1.1776643991470337, "learning_rate": 0.0005519887955182072, "loss": 0.3605, "step": 16105 }, { "epoch": 8.997765363128492, "grad_norm": 0.8573605418205261, "learning_rate": 0.0005519607843137255, "loss": 0.4634, "step": 16106 }, { "epoch": 8.998324022346369, "grad_norm": 0.37743502855300903, "learning_rate": 0.0005519327731092437, "loss": 0.4115, "step": 16107 }, { "epoch": 8.998882681564245, "grad_norm": 0.4381525218486786, "learning_rate": 0.0005519047619047619, "loss": 0.4647, "step": 16108 }, { "epoch": 8.999441340782123, "grad_norm": 0.5276052951812744, "learning_rate": 0.0005518767507002801, "loss": 0.3447, "step": 16109 }, { "epoch": 9.0, "grad_norm": 0.45398926734924316, "learning_rate": 0.0005518487394957983, "loss": 0.4692, "step": 16110 }, { "epoch": 9.000558659217877, "grad_norm": 0.471686989068985, "learning_rate": 0.0005518207282913165, "loss": 0.372, "step": 16111 }, { "epoch": 9.001117318435755, "grad_norm": 0.48771005868911743, "learning_rate": 0.0005517927170868347, "loss": 0.5023, "step": 16112 }, { "epoch": 9.001675977653631, "grad_norm": 0.49092793464660645, "learning_rate": 0.000551764705882353, "loss": 0.4116, "step": 16113 }, { "epoch": 9.002234636871508, "grad_norm": 0.435975044965744, "learning_rate": 0.0005517366946778712, "loss": 0.3816, "step": 16114 }, { "epoch": 9.002793296089385, "grad_norm": 0.45199039578437805, "learning_rate": 0.0005517086834733894, "loss": 0.3544, "step": 16115 }, { "epoch": 9.003351955307263, "grad_norm": 7.319671630859375, "learning_rate": 0.0005516806722689076, "loss": 0.5074, "step": 16116 }, { "epoch": 9.00391061452514, "grad_norm": 2.8106987476348877, "learning_rate": 0.0005516526610644258, "loss": 0.426, "step": 16117 }, { "epoch": 9.004469273743016, "grad_norm": 0.6041256785392761, "learning_rate": 0.000551624649859944, "loss": 0.5083, "step": 16118 }, { "epoch": 9.005027932960894, "grad_norm": 0.4483242332935333, "learning_rate": 0.0005515966386554622, "loss": 0.4408, "step": 16119 }, { "epoch": 9.005586592178771, "grad_norm": 0.7909282445907593, "learning_rate": 0.0005515686274509804, "loss": 0.5326, "step": 16120 }, { "epoch": 9.006145251396648, "grad_norm": 0.6927090883255005, "learning_rate": 0.0005515406162464987, "loss": 0.5765, "step": 16121 }, { "epoch": 9.006703910614526, "grad_norm": 0.6927720904350281, "learning_rate": 0.0005515126050420168, "loss": 0.6415, "step": 16122 }, { "epoch": 9.007262569832402, "grad_norm": 0.4872031509876251, "learning_rate": 0.000551484593837535, "loss": 0.3654, "step": 16123 }, { "epoch": 9.007821229050279, "grad_norm": 0.7285237312316895, "learning_rate": 0.0005514565826330532, "loss": 0.4196, "step": 16124 }, { "epoch": 9.008379888268156, "grad_norm": 0.3630661368370056, "learning_rate": 0.0005514285714285714, "loss": 0.3919, "step": 16125 }, { "epoch": 9.008938547486034, "grad_norm": 0.44762906432151794, "learning_rate": 0.0005514005602240897, "loss": 0.3732, "step": 16126 }, { "epoch": 9.00949720670391, "grad_norm": 0.6799202561378479, "learning_rate": 0.0005513725490196078, "loss": 0.436, "step": 16127 }, { "epoch": 9.010055865921787, "grad_norm": 0.4219238758087158, "learning_rate": 0.000551344537815126, "loss": 0.4476, "step": 16128 }, { "epoch": 9.010614525139665, "grad_norm": 0.7865345478057861, "learning_rate": 0.0005513165266106442, "loss": 0.3476, "step": 16129 }, { "epoch": 9.011173184357542, "grad_norm": 2.6672730445861816, "learning_rate": 0.0005512885154061624, "loss": 0.4237, "step": 16130 }, { "epoch": 9.011731843575419, "grad_norm": 0.6888644099235535, "learning_rate": 0.0005512605042016808, "loss": 0.429, "step": 16131 }, { "epoch": 9.012290502793297, "grad_norm": 0.6142561435699463, "learning_rate": 0.0005512324929971989, "loss": 0.4408, "step": 16132 }, { "epoch": 9.012849162011173, "grad_norm": 0.4682404398918152, "learning_rate": 0.0005512044817927171, "loss": 0.4748, "step": 16133 }, { "epoch": 9.01340782122905, "grad_norm": 0.6848462224006653, "learning_rate": 0.0005511764705882353, "loss": 0.3949, "step": 16134 }, { "epoch": 9.013966480446927, "grad_norm": 0.7022143006324768, "learning_rate": 0.0005511484593837535, "loss": 0.4867, "step": 16135 }, { "epoch": 9.014525139664805, "grad_norm": 1.2872141599655151, "learning_rate": 0.0005511204481792718, "loss": 0.4714, "step": 16136 }, { "epoch": 9.015083798882682, "grad_norm": 0.4741916060447693, "learning_rate": 0.00055109243697479, "loss": 0.3864, "step": 16137 }, { "epoch": 9.015642458100558, "grad_norm": 0.7256016731262207, "learning_rate": 0.0005510644257703081, "loss": 0.4008, "step": 16138 }, { "epoch": 9.016201117318436, "grad_norm": 0.46023839712142944, "learning_rate": 0.0005510364145658263, "loss": 0.4585, "step": 16139 }, { "epoch": 9.016759776536313, "grad_norm": 0.6426053047180176, "learning_rate": 0.0005510084033613445, "loss": 0.4557, "step": 16140 }, { "epoch": 9.01731843575419, "grad_norm": 0.40671733021736145, "learning_rate": 0.0005509803921568628, "loss": 0.335, "step": 16141 }, { "epoch": 9.017877094972068, "grad_norm": 0.3892160654067993, "learning_rate": 0.000550952380952381, "loss": 0.3745, "step": 16142 }, { "epoch": 9.018435754189944, "grad_norm": 1.191564679145813, "learning_rate": 0.0005509243697478991, "loss": 0.4222, "step": 16143 }, { "epoch": 9.018994413407821, "grad_norm": 0.654286801815033, "learning_rate": 0.0005508963585434173, "loss": 0.3245, "step": 16144 }, { "epoch": 9.019553072625698, "grad_norm": 0.7706868648529053, "learning_rate": 0.0005508683473389355, "loss": 0.5063, "step": 16145 }, { "epoch": 9.020111731843576, "grad_norm": 0.5676131844520569, "learning_rate": 0.0005508403361344539, "loss": 0.5046, "step": 16146 }, { "epoch": 9.020670391061453, "grad_norm": 0.6341442465782166, "learning_rate": 0.0005508123249299721, "loss": 0.4806, "step": 16147 }, { "epoch": 9.021229050279329, "grad_norm": 0.35954737663269043, "learning_rate": 0.0005507843137254902, "loss": 0.4144, "step": 16148 }, { "epoch": 9.021787709497207, "grad_norm": 0.6082434058189392, "learning_rate": 0.0005507563025210084, "loss": 0.544, "step": 16149 }, { "epoch": 9.022346368715084, "grad_norm": 1.1365302801132202, "learning_rate": 0.0005507282913165266, "loss": 0.4561, "step": 16150 }, { "epoch": 9.02290502793296, "grad_norm": 0.5052218437194824, "learning_rate": 0.0005507002801120449, "loss": 0.5296, "step": 16151 }, { "epoch": 9.023463687150837, "grad_norm": 0.8568074703216553, "learning_rate": 0.0005506722689075631, "loss": 0.5617, "step": 16152 }, { "epoch": 9.024022346368715, "grad_norm": 0.5012927055358887, "learning_rate": 0.0005506442577030813, "loss": 0.4135, "step": 16153 }, { "epoch": 9.024581005586592, "grad_norm": 0.9031180739402771, "learning_rate": 0.0005506162464985994, "loss": 0.4345, "step": 16154 }, { "epoch": 9.025139664804469, "grad_norm": 0.32558107376098633, "learning_rate": 0.0005505882352941176, "loss": 0.4025, "step": 16155 }, { "epoch": 9.025698324022347, "grad_norm": 0.8359012603759766, "learning_rate": 0.0005505602240896359, "loss": 0.4153, "step": 16156 }, { "epoch": 9.026256983240224, "grad_norm": 0.5309123396873474, "learning_rate": 0.0005505322128851541, "loss": 0.3908, "step": 16157 }, { "epoch": 9.0268156424581, "grad_norm": 0.47509562969207764, "learning_rate": 0.0005505042016806723, "loss": 0.5155, "step": 16158 }, { "epoch": 9.027374301675978, "grad_norm": 0.3786229193210602, "learning_rate": 0.0005504761904761904, "loss": 0.3358, "step": 16159 }, { "epoch": 9.027932960893855, "grad_norm": 0.4620549976825714, "learning_rate": 0.0005504481792717086, "loss": 0.3888, "step": 16160 }, { "epoch": 9.028491620111732, "grad_norm": 0.570759117603302, "learning_rate": 0.0005504201680672269, "loss": 0.4881, "step": 16161 }, { "epoch": 9.029050279329608, "grad_norm": 0.4059472382068634, "learning_rate": 0.0005503921568627451, "loss": 0.4683, "step": 16162 }, { "epoch": 9.029608938547486, "grad_norm": 0.6964138746261597, "learning_rate": 0.0005503641456582634, "loss": 0.4716, "step": 16163 }, { "epoch": 9.030167597765363, "grad_norm": 0.4319922924041748, "learning_rate": 0.0005503361344537815, "loss": 0.3864, "step": 16164 }, { "epoch": 9.03072625698324, "grad_norm": 0.4732145667076111, "learning_rate": 0.0005503081232492997, "loss": 0.4117, "step": 16165 }, { "epoch": 9.031284916201118, "grad_norm": 0.7796878218650818, "learning_rate": 0.000550280112044818, "loss": 0.4702, "step": 16166 }, { "epoch": 9.031843575418995, "grad_norm": 0.3250197470188141, "learning_rate": 0.0005502521008403362, "loss": 0.3937, "step": 16167 }, { "epoch": 9.032402234636871, "grad_norm": 0.5737577080726624, "learning_rate": 0.0005502240896358544, "loss": 0.437, "step": 16168 }, { "epoch": 9.03296089385475, "grad_norm": 0.4221270978450775, "learning_rate": 0.0005501960784313726, "loss": 0.4572, "step": 16169 }, { "epoch": 9.033519553072626, "grad_norm": 0.36608004570007324, "learning_rate": 0.0005501680672268907, "loss": 0.3638, "step": 16170 }, { "epoch": 9.034078212290503, "grad_norm": 1.0141520500183105, "learning_rate": 0.000550140056022409, "loss": 0.3616, "step": 16171 }, { "epoch": 9.03463687150838, "grad_norm": 0.6397863030433655, "learning_rate": 0.0005501120448179272, "loss": 0.4809, "step": 16172 }, { "epoch": 9.035195530726257, "grad_norm": 1.5604029893875122, "learning_rate": 0.0005500840336134454, "loss": 0.4924, "step": 16173 }, { "epoch": 9.035754189944134, "grad_norm": 0.495320588350296, "learning_rate": 0.0005500560224089636, "loss": 0.4222, "step": 16174 }, { "epoch": 9.03631284916201, "grad_norm": 0.5173612833023071, "learning_rate": 0.0005500280112044817, "loss": 0.4937, "step": 16175 }, { "epoch": 9.036871508379889, "grad_norm": 0.6018291115760803, "learning_rate": 0.00055, "loss": 0.4473, "step": 16176 }, { "epoch": 9.037430167597766, "grad_norm": 0.3838387429714203, "learning_rate": 0.0005499719887955182, "loss": 0.405, "step": 16177 }, { "epoch": 9.037988826815642, "grad_norm": 0.4314991235733032, "learning_rate": 0.0005499439775910364, "loss": 0.4874, "step": 16178 }, { "epoch": 9.03854748603352, "grad_norm": 1.260207176208496, "learning_rate": 0.0005499159663865547, "loss": 0.5416, "step": 16179 }, { "epoch": 9.039106145251397, "grad_norm": 0.6589161157608032, "learning_rate": 0.0005498879551820727, "loss": 0.433, "step": 16180 }, { "epoch": 9.039664804469274, "grad_norm": 0.6899911165237427, "learning_rate": 0.0005498599439775911, "loss": 0.4592, "step": 16181 }, { "epoch": 9.04022346368715, "grad_norm": 0.5168753862380981, "learning_rate": 0.0005498319327731093, "loss": 0.4809, "step": 16182 }, { "epoch": 9.040782122905028, "grad_norm": 0.7457990050315857, "learning_rate": 0.0005498039215686275, "loss": 0.4037, "step": 16183 }, { "epoch": 9.041340782122905, "grad_norm": 0.4349231719970703, "learning_rate": 0.0005497759103641457, "loss": 0.3702, "step": 16184 }, { "epoch": 9.041899441340782, "grad_norm": 0.7760615944862366, "learning_rate": 0.0005497478991596639, "loss": 0.3719, "step": 16185 }, { "epoch": 9.04245810055866, "grad_norm": 0.5469179749488831, "learning_rate": 0.0005497198879551821, "loss": 0.458, "step": 16186 }, { "epoch": 9.043016759776537, "grad_norm": 1.2574890851974487, "learning_rate": 0.0005496918767507003, "loss": 0.4447, "step": 16187 }, { "epoch": 9.043575418994413, "grad_norm": 0.63707435131073, "learning_rate": 0.0005496638655462185, "loss": 0.4889, "step": 16188 }, { "epoch": 9.04413407821229, "grad_norm": 0.43079155683517456, "learning_rate": 0.0005496358543417367, "loss": 0.3619, "step": 16189 }, { "epoch": 9.044692737430168, "grad_norm": 0.6787812113761902, "learning_rate": 0.0005496078431372549, "loss": 0.4939, "step": 16190 }, { "epoch": 9.045251396648045, "grad_norm": 0.4049622714519501, "learning_rate": 0.0005495798319327731, "loss": 0.349, "step": 16191 }, { "epoch": 9.045810055865921, "grad_norm": 1.0897622108459473, "learning_rate": 0.0005495518207282913, "loss": 0.4392, "step": 16192 }, { "epoch": 9.0463687150838, "grad_norm": 11.24601936340332, "learning_rate": 0.0005495238095238095, "loss": 0.5245, "step": 16193 }, { "epoch": 9.046927374301676, "grad_norm": 0.8324848413467407, "learning_rate": 0.0005494957983193277, "loss": 0.4513, "step": 16194 }, { "epoch": 9.047486033519553, "grad_norm": 0.5224907994270325, "learning_rate": 0.000549467787114846, "loss": 0.5327, "step": 16195 }, { "epoch": 9.048044692737431, "grad_norm": 0.5830835700035095, "learning_rate": 0.0005494397759103642, "loss": 0.4373, "step": 16196 }, { "epoch": 9.048603351955308, "grad_norm": 1.0531364679336548, "learning_rate": 0.0005494117647058824, "loss": 0.4228, "step": 16197 }, { "epoch": 9.049162011173184, "grad_norm": 0.4486483037471771, "learning_rate": 0.0005493837535014006, "loss": 0.4906, "step": 16198 }, { "epoch": 9.04972067039106, "grad_norm": 0.9480206966400146, "learning_rate": 0.0005493557422969188, "loss": 0.4164, "step": 16199 }, { "epoch": 9.050279329608939, "grad_norm": 3.4200010299682617, "learning_rate": 0.000549327731092437, "loss": 0.3918, "step": 16200 }, { "epoch": 9.050837988826816, "grad_norm": 0.5610361099243164, "learning_rate": 0.0005492997198879553, "loss": 0.5111, "step": 16201 }, { "epoch": 9.051396648044692, "grad_norm": 0.5053939819335938, "learning_rate": 0.0005492717086834734, "loss": 0.5089, "step": 16202 }, { "epoch": 9.05195530726257, "grad_norm": 0.40537121891975403, "learning_rate": 0.0005492436974789916, "loss": 0.3967, "step": 16203 }, { "epoch": 9.052513966480447, "grad_norm": 0.4986492693424225, "learning_rate": 0.0005492156862745098, "loss": 0.3496, "step": 16204 }, { "epoch": 9.053072625698324, "grad_norm": 0.39778822660446167, "learning_rate": 0.000549187675070028, "loss": 0.4901, "step": 16205 }, { "epoch": 9.053631284916202, "grad_norm": 0.6032705307006836, "learning_rate": 0.0005491596638655463, "loss": 0.4693, "step": 16206 }, { "epoch": 9.054189944134079, "grad_norm": 0.7003213167190552, "learning_rate": 0.0005491316526610644, "loss": 0.4886, "step": 16207 }, { "epoch": 9.054748603351955, "grad_norm": 1.5134395360946655, "learning_rate": 0.0005491036414565826, "loss": 0.6274, "step": 16208 }, { "epoch": 9.055307262569832, "grad_norm": 0.5603281259536743, "learning_rate": 0.0005490756302521008, "loss": 0.4845, "step": 16209 }, { "epoch": 9.05586592178771, "grad_norm": 0.6256247758865356, "learning_rate": 0.000549047619047619, "loss": 0.4094, "step": 16210 }, { "epoch": 9.056424581005587, "grad_norm": 0.582689106464386, "learning_rate": 0.0005490196078431374, "loss": 0.5259, "step": 16211 }, { "epoch": 9.056983240223463, "grad_norm": 0.40499866008758545, "learning_rate": 0.0005489915966386554, "loss": 0.2875, "step": 16212 }, { "epoch": 9.057541899441341, "grad_norm": 0.6198334693908691, "learning_rate": 0.0005489635854341737, "loss": 0.4245, "step": 16213 }, { "epoch": 9.058100558659218, "grad_norm": 0.8410502076148987, "learning_rate": 0.0005489355742296919, "loss": 0.456, "step": 16214 }, { "epoch": 9.058659217877095, "grad_norm": 0.6758708953857422, "learning_rate": 0.0005489075630252101, "loss": 0.4311, "step": 16215 }, { "epoch": 9.059217877094973, "grad_norm": 0.4737738370895386, "learning_rate": 0.0005488795518207284, "loss": 0.468, "step": 16216 }, { "epoch": 9.05977653631285, "grad_norm": 0.4956648647785187, "learning_rate": 0.0005488515406162466, "loss": 0.3793, "step": 16217 }, { "epoch": 9.060335195530726, "grad_norm": 0.477300226688385, "learning_rate": 0.0005488235294117647, "loss": 0.3688, "step": 16218 }, { "epoch": 9.060893854748603, "grad_norm": 0.49101436138153076, "learning_rate": 0.0005487955182072829, "loss": 0.3954, "step": 16219 }, { "epoch": 9.061452513966481, "grad_norm": 0.6452205777168274, "learning_rate": 0.0005487675070028011, "loss": 0.3309, "step": 16220 }, { "epoch": 9.062011173184358, "grad_norm": 5.432021617889404, "learning_rate": 0.0005487394957983194, "loss": 0.5069, "step": 16221 }, { "epoch": 9.062569832402234, "grad_norm": 0.6202031970024109, "learning_rate": 0.0005487114845938376, "loss": 0.3848, "step": 16222 }, { "epoch": 9.063128491620112, "grad_norm": 4.939457416534424, "learning_rate": 0.0005486834733893557, "loss": 0.4985, "step": 16223 }, { "epoch": 9.063687150837989, "grad_norm": 0.7293437123298645, "learning_rate": 0.0005486554621848739, "loss": 0.449, "step": 16224 }, { "epoch": 9.064245810055866, "grad_norm": 0.39150670170783997, "learning_rate": 0.0005486274509803921, "loss": 0.3321, "step": 16225 }, { "epoch": 9.064804469273742, "grad_norm": 0.5872224569320679, "learning_rate": 0.0005485994397759104, "loss": 0.4727, "step": 16226 }, { "epoch": 9.06536312849162, "grad_norm": 0.5569866299629211, "learning_rate": 0.0005485714285714286, "loss": 0.4433, "step": 16227 }, { "epoch": 9.065921787709497, "grad_norm": 0.41548797488212585, "learning_rate": 0.0005485434173669467, "loss": 0.366, "step": 16228 }, { "epoch": 9.066480446927374, "grad_norm": 0.5427729487419128, "learning_rate": 0.000548515406162465, "loss": 0.3932, "step": 16229 }, { "epoch": 9.067039106145252, "grad_norm": 0.5051064491271973, "learning_rate": 0.0005484873949579832, "loss": 0.5481, "step": 16230 }, { "epoch": 9.067597765363129, "grad_norm": 0.5244288444519043, "learning_rate": 0.0005484593837535015, "loss": 0.5337, "step": 16231 }, { "epoch": 9.068156424581005, "grad_norm": 0.3912734389305115, "learning_rate": 0.0005484313725490197, "loss": 0.3239, "step": 16232 }, { "epoch": 9.068715083798883, "grad_norm": 0.5021672248840332, "learning_rate": 0.0005484033613445379, "loss": 0.5159, "step": 16233 }, { "epoch": 9.06927374301676, "grad_norm": 0.7317255139350891, "learning_rate": 0.000548375350140056, "loss": 0.4602, "step": 16234 }, { "epoch": 9.069832402234637, "grad_norm": 0.3760683834552765, "learning_rate": 0.0005483473389355742, "loss": 0.3894, "step": 16235 }, { "epoch": 9.070391061452513, "grad_norm": 0.4998409152030945, "learning_rate": 0.0005483193277310925, "loss": 0.4143, "step": 16236 }, { "epoch": 9.070949720670392, "grad_norm": 0.5495596528053284, "learning_rate": 0.0005482913165266107, "loss": 0.4539, "step": 16237 }, { "epoch": 9.071508379888268, "grad_norm": 0.5514495372772217, "learning_rate": 0.0005482633053221289, "loss": 0.4303, "step": 16238 }, { "epoch": 9.072067039106145, "grad_norm": 0.582670271396637, "learning_rate": 0.000548235294117647, "loss": 0.4043, "step": 16239 }, { "epoch": 9.072625698324023, "grad_norm": 0.35094568133354187, "learning_rate": 0.0005482072829131652, "loss": 0.4455, "step": 16240 }, { "epoch": 9.0731843575419, "grad_norm": 0.6852053999900818, "learning_rate": 0.0005481792717086835, "loss": 0.5387, "step": 16241 }, { "epoch": 9.073743016759776, "grad_norm": 0.3727850317955017, "learning_rate": 0.0005481512605042017, "loss": 0.3634, "step": 16242 }, { "epoch": 9.074301675977654, "grad_norm": 0.45923852920532227, "learning_rate": 0.0005481232492997199, "loss": 0.5085, "step": 16243 }, { "epoch": 9.074860335195531, "grad_norm": 0.6615126729011536, "learning_rate": 0.000548095238095238, "loss": 0.4763, "step": 16244 }, { "epoch": 9.075418994413408, "grad_norm": 0.37014415860176086, "learning_rate": 0.0005480672268907562, "loss": 0.3241, "step": 16245 }, { "epoch": 9.075977653631284, "grad_norm": 1.961401104927063, "learning_rate": 0.0005480392156862746, "loss": 0.3432, "step": 16246 }, { "epoch": 9.076536312849163, "grad_norm": 0.5294041037559509, "learning_rate": 0.0005480112044817928, "loss": 0.487, "step": 16247 }, { "epoch": 9.077094972067039, "grad_norm": 1.2390879392623901, "learning_rate": 0.000547983193277311, "loss": 0.4214, "step": 16248 }, { "epoch": 9.077653631284916, "grad_norm": 0.5112480521202087, "learning_rate": 0.0005479551820728292, "loss": 0.5072, "step": 16249 }, { "epoch": 9.078212290502794, "grad_norm": 0.5959158539772034, "learning_rate": 0.0005479271708683473, "loss": 0.4592, "step": 16250 }, { "epoch": 9.07877094972067, "grad_norm": 0.44943875074386597, "learning_rate": 0.0005478991596638656, "loss": 0.352, "step": 16251 }, { "epoch": 9.079329608938547, "grad_norm": 0.543141782283783, "learning_rate": 0.0005478711484593838, "loss": 0.4888, "step": 16252 }, { "epoch": 9.079888268156424, "grad_norm": 0.5933079123497009, "learning_rate": 0.000547843137254902, "loss": 0.5776, "step": 16253 }, { "epoch": 9.080446927374302, "grad_norm": 0.6944230198860168, "learning_rate": 0.0005478151260504202, "loss": 0.4523, "step": 16254 }, { "epoch": 9.081005586592179, "grad_norm": 0.5095569491386414, "learning_rate": 0.0005477871148459383, "loss": 0.5002, "step": 16255 }, { "epoch": 9.081564245810055, "grad_norm": 2.954151153564453, "learning_rate": 0.0005477591036414566, "loss": 0.5539, "step": 16256 }, { "epoch": 9.082122905027934, "grad_norm": 0.5832377672195435, "learning_rate": 0.0005477310924369748, "loss": 0.4825, "step": 16257 }, { "epoch": 9.08268156424581, "grad_norm": 0.7789512872695923, "learning_rate": 0.000547703081232493, "loss": 0.5376, "step": 16258 }, { "epoch": 9.083240223463687, "grad_norm": 0.4797183573246002, "learning_rate": 0.0005476750700280112, "loss": 0.4311, "step": 16259 }, { "epoch": 9.083798882681565, "grad_norm": 0.48299819231033325, "learning_rate": 0.0005476470588235293, "loss": 0.4117, "step": 16260 }, { "epoch": 9.084357541899442, "grad_norm": 0.44618839025497437, "learning_rate": 0.0005476190476190477, "loss": 0.4477, "step": 16261 }, { "epoch": 9.084916201117318, "grad_norm": 0.7945277690887451, "learning_rate": 0.0005475910364145659, "loss": 0.4485, "step": 16262 }, { "epoch": 9.085474860335195, "grad_norm": 0.5267883539199829, "learning_rate": 0.0005475630252100841, "loss": 0.3896, "step": 16263 }, { "epoch": 9.086033519553073, "grad_norm": 0.3976774215698242, "learning_rate": 0.0005475350140056023, "loss": 0.3821, "step": 16264 }, { "epoch": 9.08659217877095, "grad_norm": 0.5658947825431824, "learning_rate": 0.0005475070028011205, "loss": 0.4726, "step": 16265 }, { "epoch": 9.087150837988826, "grad_norm": 0.46620821952819824, "learning_rate": 0.0005474789915966387, "loss": 0.3273, "step": 16266 }, { "epoch": 9.087709497206705, "grad_norm": 0.4152989983558655, "learning_rate": 0.0005474509803921569, "loss": 0.3535, "step": 16267 }, { "epoch": 9.088268156424581, "grad_norm": 0.4430373013019562, "learning_rate": 0.0005474229691876751, "loss": 0.4914, "step": 16268 }, { "epoch": 9.088826815642458, "grad_norm": 0.6288483142852783, "learning_rate": 0.0005473949579831933, "loss": 0.4947, "step": 16269 }, { "epoch": 9.089385474860336, "grad_norm": 0.5687662959098816, "learning_rate": 0.0005473669467787115, "loss": 0.4216, "step": 16270 }, { "epoch": 9.089944134078213, "grad_norm": 0.5555469393730164, "learning_rate": 0.0005473389355742297, "loss": 0.4108, "step": 16271 }, { "epoch": 9.09050279329609, "grad_norm": 0.6849669814109802, "learning_rate": 0.0005473109243697479, "loss": 0.5279, "step": 16272 }, { "epoch": 9.091061452513966, "grad_norm": 0.3727986514568329, "learning_rate": 0.0005472829131652661, "loss": 0.316, "step": 16273 }, { "epoch": 9.091620111731844, "grad_norm": 0.6479452252388, "learning_rate": 0.0005472549019607843, "loss": 0.427, "step": 16274 }, { "epoch": 9.09217877094972, "grad_norm": 1.1027398109436035, "learning_rate": 0.0005472268907563025, "loss": 0.4555, "step": 16275 }, { "epoch": 9.092737430167597, "grad_norm": 0.7032692432403564, "learning_rate": 0.0005471988795518207, "loss": 0.4967, "step": 16276 }, { "epoch": 9.093296089385476, "grad_norm": 1.9388405084609985, "learning_rate": 0.000547170868347339, "loss": 0.5312, "step": 16277 }, { "epoch": 9.093854748603352, "grad_norm": 0.7738831639289856, "learning_rate": 0.0005471428571428572, "loss": 0.4959, "step": 16278 }, { "epoch": 9.094413407821229, "grad_norm": 0.6083651185035706, "learning_rate": 0.0005471148459383754, "loss": 0.4799, "step": 16279 }, { "epoch": 9.094972067039107, "grad_norm": 0.5386562943458557, "learning_rate": 0.0005470868347338936, "loss": 0.4181, "step": 16280 }, { "epoch": 9.095530726256984, "grad_norm": 0.5231610536575317, "learning_rate": 0.0005470588235294119, "loss": 0.451, "step": 16281 }, { "epoch": 9.09608938547486, "grad_norm": 0.4870569407939911, "learning_rate": 0.00054703081232493, "loss": 0.4537, "step": 16282 }, { "epoch": 9.096648044692737, "grad_norm": 0.6581200361251831, "learning_rate": 0.0005470028011204482, "loss": 0.571, "step": 16283 }, { "epoch": 9.097206703910615, "grad_norm": 0.41583675146102905, "learning_rate": 0.0005469747899159664, "loss": 0.4759, "step": 16284 }, { "epoch": 9.097765363128492, "grad_norm": 0.5102593898773193, "learning_rate": 0.0005469467787114846, "loss": 0.4556, "step": 16285 }, { "epoch": 9.098324022346368, "grad_norm": 0.3559733033180237, "learning_rate": 0.0005469187675070029, "loss": 0.341, "step": 16286 }, { "epoch": 9.098882681564247, "grad_norm": 0.7939794063568115, "learning_rate": 0.000546890756302521, "loss": 0.4808, "step": 16287 }, { "epoch": 9.099441340782123, "grad_norm": 0.4100939929485321, "learning_rate": 0.0005468627450980392, "loss": 0.3925, "step": 16288 }, { "epoch": 9.1, "grad_norm": 0.45097044110298157, "learning_rate": 0.0005468347338935574, "loss": 0.4335, "step": 16289 }, { "epoch": 9.100558659217878, "grad_norm": 0.4148938059806824, "learning_rate": 0.0005468067226890756, "loss": 0.4388, "step": 16290 }, { "epoch": 9.101117318435755, "grad_norm": 0.5107712745666504, "learning_rate": 0.0005467787114845938, "loss": 0.39, "step": 16291 }, { "epoch": 9.101675977653631, "grad_norm": 0.7259695529937744, "learning_rate": 0.000546750700280112, "loss": 0.5608, "step": 16292 }, { "epoch": 9.102234636871508, "grad_norm": 0.8637893199920654, "learning_rate": 0.0005467226890756302, "loss": 0.4, "step": 16293 }, { "epoch": 9.102793296089386, "grad_norm": 0.738400936126709, "learning_rate": 0.0005466946778711484, "loss": 0.4209, "step": 16294 }, { "epoch": 9.103351955307263, "grad_norm": 0.3919229507446289, "learning_rate": 0.0005466666666666667, "loss": 0.3675, "step": 16295 }, { "epoch": 9.10391061452514, "grad_norm": 1.9260445833206177, "learning_rate": 0.0005466386554621849, "loss": 0.4413, "step": 16296 }, { "epoch": 9.104469273743018, "grad_norm": 0.8303425312042236, "learning_rate": 0.0005466106442577032, "loss": 0.398, "step": 16297 }, { "epoch": 9.105027932960894, "grad_norm": 2.134166955947876, "learning_rate": 0.0005465826330532213, "loss": 0.561, "step": 16298 }, { "epoch": 9.10558659217877, "grad_norm": 0.895537793636322, "learning_rate": 0.0005465546218487395, "loss": 0.5047, "step": 16299 }, { "epoch": 9.106145251396647, "grad_norm": 0.4418204426765442, "learning_rate": 0.0005465266106442577, "loss": 0.4127, "step": 16300 }, { "epoch": 9.106703910614526, "grad_norm": 0.43009352684020996, "learning_rate": 0.0005464985994397759, "loss": 0.3654, "step": 16301 }, { "epoch": 9.107262569832402, "grad_norm": 0.4846309423446655, "learning_rate": 0.0005464705882352942, "loss": 0.4188, "step": 16302 }, { "epoch": 9.107821229050279, "grad_norm": 0.6969563961029053, "learning_rate": 0.0005464425770308123, "loss": 0.5873, "step": 16303 }, { "epoch": 9.108379888268157, "grad_norm": 0.5111419558525085, "learning_rate": 0.0005464145658263305, "loss": 0.3658, "step": 16304 }, { "epoch": 9.108938547486034, "grad_norm": 0.5028784275054932, "learning_rate": 0.0005463865546218487, "loss": 0.5035, "step": 16305 }, { "epoch": 9.10949720670391, "grad_norm": 0.4106832444667816, "learning_rate": 0.0005463585434173669, "loss": 0.3582, "step": 16306 }, { "epoch": 9.110055865921789, "grad_norm": 0.45648127794265747, "learning_rate": 0.0005463305322128852, "loss": 0.3728, "step": 16307 }, { "epoch": 9.110614525139665, "grad_norm": 1.281018853187561, "learning_rate": 0.0005463025210084033, "loss": 0.5196, "step": 16308 }, { "epoch": 9.111173184357542, "grad_norm": 0.6316792964935303, "learning_rate": 0.0005462745098039215, "loss": 0.4413, "step": 16309 }, { "epoch": 9.111731843575418, "grad_norm": 0.5648047924041748, "learning_rate": 0.0005462464985994397, "loss": 0.4137, "step": 16310 }, { "epoch": 9.112290502793297, "grad_norm": 0.524459719657898, "learning_rate": 0.000546218487394958, "loss": 0.5085, "step": 16311 }, { "epoch": 9.112849162011173, "grad_norm": 0.5050011277198792, "learning_rate": 0.0005461904761904763, "loss": 0.3521, "step": 16312 }, { "epoch": 9.11340782122905, "grad_norm": 0.6483074426651001, "learning_rate": 0.0005461624649859945, "loss": 0.4539, "step": 16313 }, { "epoch": 9.113966480446928, "grad_norm": 0.542499840259552, "learning_rate": 0.0005461344537815126, "loss": 0.4493, "step": 16314 }, { "epoch": 9.114525139664805, "grad_norm": 0.3561041057109833, "learning_rate": 0.0005461064425770308, "loss": 0.4473, "step": 16315 }, { "epoch": 9.115083798882681, "grad_norm": 2.08398699760437, "learning_rate": 0.000546078431372549, "loss": 0.5668, "step": 16316 }, { "epoch": 9.11564245810056, "grad_norm": 0.523814857006073, "learning_rate": 0.0005460504201680673, "loss": 0.4261, "step": 16317 }, { "epoch": 9.116201117318436, "grad_norm": 0.5716768503189087, "learning_rate": 0.0005460224089635855, "loss": 0.5053, "step": 16318 }, { "epoch": 9.116759776536313, "grad_norm": 0.5685926675796509, "learning_rate": 0.0005459943977591036, "loss": 0.4171, "step": 16319 }, { "epoch": 9.11731843575419, "grad_norm": 0.5755152106285095, "learning_rate": 0.0005459663865546218, "loss": 0.5121, "step": 16320 }, { "epoch": 9.117877094972068, "grad_norm": 0.4981805086135864, "learning_rate": 0.00054593837535014, "loss": 0.4212, "step": 16321 }, { "epoch": 9.118435754189944, "grad_norm": 0.4252035319805145, "learning_rate": 0.0005459103641456583, "loss": 0.4311, "step": 16322 }, { "epoch": 9.11899441340782, "grad_norm": 0.42670243978500366, "learning_rate": 0.0005458823529411765, "loss": 0.4606, "step": 16323 }, { "epoch": 9.119553072625699, "grad_norm": 0.6980792284011841, "learning_rate": 0.0005458543417366946, "loss": 0.5582, "step": 16324 }, { "epoch": 9.120111731843576, "grad_norm": 2.64755916595459, "learning_rate": 0.0005458263305322128, "loss": 0.4501, "step": 16325 }, { "epoch": 9.120670391061452, "grad_norm": 1.0233116149902344, "learning_rate": 0.000545798319327731, "loss": 0.393, "step": 16326 }, { "epoch": 9.121229050279329, "grad_norm": 0.396380752325058, "learning_rate": 0.0005457703081232494, "loss": 0.3441, "step": 16327 }, { "epoch": 9.121787709497207, "grad_norm": 0.512162983417511, "learning_rate": 0.0005457422969187676, "loss": 0.4543, "step": 16328 }, { "epoch": 9.122346368715084, "grad_norm": 0.5223424434661865, "learning_rate": 0.0005457142857142858, "loss": 0.4577, "step": 16329 }, { "epoch": 9.12290502793296, "grad_norm": 7.071179389953613, "learning_rate": 0.0005456862745098039, "loss": 0.468, "step": 16330 }, { "epoch": 9.123463687150839, "grad_norm": 0.42750778794288635, "learning_rate": 0.0005456582633053221, "loss": 0.4144, "step": 16331 }, { "epoch": 9.124022346368715, "grad_norm": 0.946935772895813, "learning_rate": 0.0005456302521008404, "loss": 0.5107, "step": 16332 }, { "epoch": 9.124581005586592, "grad_norm": 0.4396444261074066, "learning_rate": 0.0005456022408963586, "loss": 0.3985, "step": 16333 }, { "epoch": 9.12513966480447, "grad_norm": 0.7168882489204407, "learning_rate": 0.0005455742296918768, "loss": 0.4534, "step": 16334 }, { "epoch": 9.125698324022347, "grad_norm": 0.48841142654418945, "learning_rate": 0.0005455462184873949, "loss": 0.5296, "step": 16335 }, { "epoch": 9.126256983240223, "grad_norm": 0.6995057463645935, "learning_rate": 0.0005455182072829131, "loss": 0.4467, "step": 16336 }, { "epoch": 9.1268156424581, "grad_norm": 4.894993305206299, "learning_rate": 0.0005454901960784314, "loss": 0.3613, "step": 16337 }, { "epoch": 9.127374301675978, "grad_norm": 0.6327822804450989, "learning_rate": 0.0005454621848739496, "loss": 0.3516, "step": 16338 }, { "epoch": 9.127932960893855, "grad_norm": 0.4114134907722473, "learning_rate": 0.0005454341736694678, "loss": 0.4014, "step": 16339 }, { "epoch": 9.128491620111731, "grad_norm": 0.41836825013160706, "learning_rate": 0.0005454061624649859, "loss": 0.4139, "step": 16340 }, { "epoch": 9.12905027932961, "grad_norm": 0.42986246943473816, "learning_rate": 0.0005453781512605041, "loss": 0.4561, "step": 16341 }, { "epoch": 9.129608938547486, "grad_norm": 0.5191091895103455, "learning_rate": 0.0005453501400560224, "loss": 0.3996, "step": 16342 }, { "epoch": 9.130167597765363, "grad_norm": 0.52755206823349, "learning_rate": 0.0005453221288515407, "loss": 0.3932, "step": 16343 }, { "epoch": 9.130726256983241, "grad_norm": 0.34361231327056885, "learning_rate": 0.0005452941176470589, "loss": 0.3027, "step": 16344 }, { "epoch": 9.131284916201118, "grad_norm": 0.3853558301925659, "learning_rate": 0.0005452661064425771, "loss": 0.4704, "step": 16345 }, { "epoch": 9.131843575418994, "grad_norm": 0.5141075253486633, "learning_rate": 0.0005452380952380952, "loss": 0.3316, "step": 16346 }, { "epoch": 9.13240223463687, "grad_norm": 1.0674580335617065, "learning_rate": 0.0005452100840336135, "loss": 0.5051, "step": 16347 }, { "epoch": 9.132960893854749, "grad_norm": 2.7568652629852295, "learning_rate": 0.0005451820728291317, "loss": 0.7126, "step": 16348 }, { "epoch": 9.133519553072626, "grad_norm": 0.5429947376251221, "learning_rate": 0.0005451540616246499, "loss": 0.5427, "step": 16349 }, { "epoch": 9.134078212290502, "grad_norm": 0.45803746581077576, "learning_rate": 0.0005451260504201681, "loss": 0.465, "step": 16350 }, { "epoch": 9.13463687150838, "grad_norm": 1.558245062828064, "learning_rate": 0.0005450980392156862, "loss": 0.5021, "step": 16351 }, { "epoch": 9.135195530726257, "grad_norm": 0.4956776797771454, "learning_rate": 0.0005450700280112045, "loss": 0.4833, "step": 16352 }, { "epoch": 9.135754189944134, "grad_norm": 0.40609315037727356, "learning_rate": 0.0005450420168067227, "loss": 0.3424, "step": 16353 }, { "epoch": 9.136312849162012, "grad_norm": 0.5548574328422546, "learning_rate": 0.0005450140056022409, "loss": 0.3084, "step": 16354 }, { "epoch": 9.136871508379889, "grad_norm": 0.5057917833328247, "learning_rate": 0.0005449859943977591, "loss": 0.5042, "step": 16355 }, { "epoch": 9.137430167597765, "grad_norm": 0.5761809349060059, "learning_rate": 0.0005449579831932772, "loss": 0.4063, "step": 16356 }, { "epoch": 9.137988826815642, "grad_norm": 0.4334220588207245, "learning_rate": 0.0005449299719887955, "loss": 0.5039, "step": 16357 }, { "epoch": 9.13854748603352, "grad_norm": 0.8143592476844788, "learning_rate": 0.0005449019607843137, "loss": 0.4099, "step": 16358 }, { "epoch": 9.139106145251397, "grad_norm": 0.7351080179214478, "learning_rate": 0.000544873949579832, "loss": 0.5194, "step": 16359 }, { "epoch": 9.139664804469273, "grad_norm": 0.37233632802963257, "learning_rate": 0.0005448459383753502, "loss": 0.3753, "step": 16360 }, { "epoch": 9.140223463687152, "grad_norm": 0.36705389618873596, "learning_rate": 0.0005448179271708684, "loss": 0.2991, "step": 16361 }, { "epoch": 9.140782122905028, "grad_norm": 1.0420055389404297, "learning_rate": 0.0005447899159663866, "loss": 0.4184, "step": 16362 }, { "epoch": 9.141340782122905, "grad_norm": 0.6832417249679565, "learning_rate": 0.0005447619047619048, "loss": 0.6216, "step": 16363 }, { "epoch": 9.141899441340781, "grad_norm": 0.7023866772651672, "learning_rate": 0.000544733893557423, "loss": 0.4298, "step": 16364 }, { "epoch": 9.14245810055866, "grad_norm": 0.4837002456188202, "learning_rate": 0.0005447058823529412, "loss": 0.5234, "step": 16365 }, { "epoch": 9.143016759776536, "grad_norm": 0.45409059524536133, "learning_rate": 0.0005446778711484594, "loss": 0.3929, "step": 16366 }, { "epoch": 9.143575418994413, "grad_norm": 6.828324317932129, "learning_rate": 0.0005446498599439776, "loss": 0.4482, "step": 16367 }, { "epoch": 9.144134078212291, "grad_norm": 0.5492183566093445, "learning_rate": 0.0005446218487394958, "loss": 0.5474, "step": 16368 }, { "epoch": 9.144692737430168, "grad_norm": 0.5949881672859192, "learning_rate": 0.000544593837535014, "loss": 0.5293, "step": 16369 }, { "epoch": 9.145251396648044, "grad_norm": 0.5678888559341431, "learning_rate": 0.0005445658263305322, "loss": 0.4042, "step": 16370 }, { "epoch": 9.145810055865923, "grad_norm": 0.3871781527996063, "learning_rate": 0.0005445378151260504, "loss": 0.3747, "step": 16371 }, { "epoch": 9.1463687150838, "grad_norm": 0.5532249212265015, "learning_rate": 0.0005445098039215687, "loss": 0.422, "step": 16372 }, { "epoch": 9.146927374301676, "grad_norm": 0.47664597630500793, "learning_rate": 0.0005444817927170868, "loss": 0.4691, "step": 16373 }, { "epoch": 9.147486033519552, "grad_norm": 0.46502628922462463, "learning_rate": 0.000544453781512605, "loss": 0.4153, "step": 16374 }, { "epoch": 9.14804469273743, "grad_norm": 0.535905659198761, "learning_rate": 0.0005444257703081232, "loss": 0.4587, "step": 16375 }, { "epoch": 9.148603351955307, "grad_norm": 0.4472079575061798, "learning_rate": 0.0005443977591036414, "loss": 0.3934, "step": 16376 }, { "epoch": 9.149162011173184, "grad_norm": 0.4134097397327423, "learning_rate": 0.0005443697478991598, "loss": 0.432, "step": 16377 }, { "epoch": 9.149720670391062, "grad_norm": 0.5542639493942261, "learning_rate": 0.0005443417366946779, "loss": 0.3759, "step": 16378 }, { "epoch": 9.150279329608939, "grad_norm": 0.8177208304405212, "learning_rate": 0.0005443137254901961, "loss": 0.4236, "step": 16379 }, { "epoch": 9.150837988826815, "grad_norm": 0.514380156993866, "learning_rate": 0.0005442857142857143, "loss": 0.4575, "step": 16380 }, { "epoch": 9.151396648044694, "grad_norm": 0.4157094359397888, "learning_rate": 0.0005442577030812325, "loss": 0.358, "step": 16381 }, { "epoch": 9.15195530726257, "grad_norm": 0.4507540166378021, "learning_rate": 0.0005442296918767508, "loss": 0.3936, "step": 16382 }, { "epoch": 9.152513966480447, "grad_norm": 0.7144804000854492, "learning_rate": 0.0005442016806722689, "loss": 0.4704, "step": 16383 }, { "epoch": 9.153072625698323, "grad_norm": 0.33832982182502747, "learning_rate": 0.0005441736694677871, "loss": 0.4255, "step": 16384 }, { "epoch": 9.153631284916202, "grad_norm": 0.6611616015434265, "learning_rate": 0.0005441456582633053, "loss": 0.413, "step": 16385 }, { "epoch": 9.154189944134078, "grad_norm": 0.43906164169311523, "learning_rate": 0.0005441176470588235, "loss": 0.4903, "step": 16386 }, { "epoch": 9.154748603351955, "grad_norm": 0.5179803967475891, "learning_rate": 0.0005440896358543418, "loss": 0.5272, "step": 16387 }, { "epoch": 9.155307262569833, "grad_norm": 0.5361232757568359, "learning_rate": 0.00054406162464986, "loss": 0.3685, "step": 16388 }, { "epoch": 9.15586592178771, "grad_norm": 0.6487807035446167, "learning_rate": 0.0005440336134453781, "loss": 0.4931, "step": 16389 }, { "epoch": 9.156424581005586, "grad_norm": 0.3948875665664673, "learning_rate": 0.0005440056022408963, "loss": 0.3591, "step": 16390 }, { "epoch": 9.156983240223465, "grad_norm": 0.4617760479450226, "learning_rate": 0.0005439775910364145, "loss": 0.4945, "step": 16391 }, { "epoch": 9.157541899441341, "grad_norm": 0.5252341032028198, "learning_rate": 0.0005439495798319329, "loss": 0.4994, "step": 16392 }, { "epoch": 9.158100558659218, "grad_norm": 0.3993321359157562, "learning_rate": 0.0005439215686274511, "loss": 0.535, "step": 16393 }, { "epoch": 9.158659217877094, "grad_norm": 0.529315173625946, "learning_rate": 0.0005438935574229692, "loss": 0.3762, "step": 16394 }, { "epoch": 9.159217877094973, "grad_norm": 0.7219976186752319, "learning_rate": 0.0005438655462184874, "loss": 0.5184, "step": 16395 }, { "epoch": 9.15977653631285, "grad_norm": 0.7702376246452332, "learning_rate": 0.0005438375350140056, "loss": 0.4463, "step": 16396 }, { "epoch": 9.160335195530726, "grad_norm": 0.4217546880245209, "learning_rate": 0.0005438095238095239, "loss": 0.4454, "step": 16397 }, { "epoch": 9.160893854748604, "grad_norm": 0.5808950662612915, "learning_rate": 0.0005437815126050421, "loss": 0.3412, "step": 16398 }, { "epoch": 9.16145251396648, "grad_norm": 0.3684050142765045, "learning_rate": 0.0005437535014005602, "loss": 0.3591, "step": 16399 }, { "epoch": 9.162011173184357, "grad_norm": 0.5749147534370422, "learning_rate": 0.0005437254901960784, "loss": 0.4092, "step": 16400 }, { "epoch": 9.162569832402234, "grad_norm": 0.43909958004951477, "learning_rate": 0.0005436974789915966, "loss": 0.4478, "step": 16401 }, { "epoch": 9.163128491620112, "grad_norm": 0.5004404187202454, "learning_rate": 0.0005436694677871149, "loss": 0.4707, "step": 16402 }, { "epoch": 9.163687150837989, "grad_norm": 0.3971354067325592, "learning_rate": 0.0005436414565826331, "loss": 0.336, "step": 16403 }, { "epoch": 9.164245810055865, "grad_norm": 0.48322901129722595, "learning_rate": 0.0005436134453781513, "loss": 0.4363, "step": 16404 }, { "epoch": 9.164804469273744, "grad_norm": 0.36465978622436523, "learning_rate": 0.0005435854341736694, "loss": 0.295, "step": 16405 }, { "epoch": 9.16536312849162, "grad_norm": 13.25984001159668, "learning_rate": 0.0005435574229691876, "loss": 0.4929, "step": 16406 }, { "epoch": 9.165921787709497, "grad_norm": 0.5462678074836731, "learning_rate": 0.0005435294117647059, "loss": 0.3636, "step": 16407 }, { "epoch": 9.166480446927375, "grad_norm": 0.6351718902587891, "learning_rate": 0.0005435014005602241, "loss": 0.3828, "step": 16408 }, { "epoch": 9.167039106145252, "grad_norm": 0.31751272082328796, "learning_rate": 0.0005434733893557424, "loss": 0.3198, "step": 16409 }, { "epoch": 9.167597765363128, "grad_norm": 0.2950994670391083, "learning_rate": 0.0005434453781512605, "loss": 0.3686, "step": 16410 }, { "epoch": 9.168156424581005, "grad_norm": 0.5025097727775574, "learning_rate": 0.0005434173669467787, "loss": 0.4293, "step": 16411 }, { "epoch": 9.168715083798883, "grad_norm": 0.632214367389679, "learning_rate": 0.000543389355742297, "loss": 0.4881, "step": 16412 }, { "epoch": 9.16927374301676, "grad_norm": 0.6147112846374512, "learning_rate": 0.0005433613445378152, "loss": 0.3965, "step": 16413 }, { "epoch": 9.169832402234636, "grad_norm": 0.6025201678276062, "learning_rate": 0.0005433333333333334, "loss": 0.4268, "step": 16414 }, { "epoch": 9.170391061452515, "grad_norm": 0.3863448202610016, "learning_rate": 0.0005433053221288515, "loss": 0.464, "step": 16415 }, { "epoch": 9.170949720670391, "grad_norm": 0.41197606921195984, "learning_rate": 0.0005432773109243697, "loss": 0.4562, "step": 16416 }, { "epoch": 9.171508379888268, "grad_norm": 0.3912128210067749, "learning_rate": 0.000543249299719888, "loss": 0.4079, "step": 16417 }, { "epoch": 9.172067039106146, "grad_norm": 0.3998691439628601, "learning_rate": 0.0005432212885154062, "loss": 0.5347, "step": 16418 }, { "epoch": 9.172625698324023, "grad_norm": 0.35939306020736694, "learning_rate": 0.0005431932773109244, "loss": 0.3342, "step": 16419 }, { "epoch": 9.1731843575419, "grad_norm": 0.3489071726799011, "learning_rate": 0.0005431652661064426, "loss": 0.3803, "step": 16420 }, { "epoch": 9.173743016759776, "grad_norm": 0.6111111640930176, "learning_rate": 0.0005431372549019607, "loss": 0.5551, "step": 16421 }, { "epoch": 9.174301675977654, "grad_norm": 0.6878377199172974, "learning_rate": 0.000543109243697479, "loss": 0.3191, "step": 16422 }, { "epoch": 9.17486033519553, "grad_norm": 0.908305287361145, "learning_rate": 0.0005430812324929972, "loss": 0.5914, "step": 16423 }, { "epoch": 9.175418994413407, "grad_norm": 1.1644045114517212, "learning_rate": 0.0005430532212885154, "loss": 0.3874, "step": 16424 }, { "epoch": 9.175977653631286, "grad_norm": 2.926616668701172, "learning_rate": 0.0005430252100840337, "loss": 0.3941, "step": 16425 }, { "epoch": 9.176536312849162, "grad_norm": 1.587501049041748, "learning_rate": 0.0005429971988795517, "loss": 0.448, "step": 16426 }, { "epoch": 9.177094972067039, "grad_norm": 0.8188040256500244, "learning_rate": 0.0005429691876750701, "loss": 0.3955, "step": 16427 }, { "epoch": 9.177653631284917, "grad_norm": 0.5032440423965454, "learning_rate": 0.0005429411764705883, "loss": 0.4582, "step": 16428 }, { "epoch": 9.178212290502794, "grad_norm": 0.5606140494346619, "learning_rate": 0.0005429131652661065, "loss": 0.5445, "step": 16429 }, { "epoch": 9.17877094972067, "grad_norm": 0.5652497410774231, "learning_rate": 0.0005428851540616247, "loss": 0.4381, "step": 16430 }, { "epoch": 9.179329608938547, "grad_norm": 0.40602239966392517, "learning_rate": 0.0005428571428571428, "loss": 0.4297, "step": 16431 }, { "epoch": 9.179888268156425, "grad_norm": 0.519846498966217, "learning_rate": 0.0005428291316526611, "loss": 0.4639, "step": 16432 }, { "epoch": 9.180446927374302, "grad_norm": 1.7221403121948242, "learning_rate": 0.0005428011204481793, "loss": 0.3747, "step": 16433 }, { "epoch": 9.181005586592178, "grad_norm": 0.6046132445335388, "learning_rate": 0.0005427731092436975, "loss": 0.4141, "step": 16434 }, { "epoch": 9.181564245810057, "grad_norm": 0.37778180837631226, "learning_rate": 0.0005427450980392157, "loss": 0.4471, "step": 16435 }, { "epoch": 9.182122905027933, "grad_norm": 0.6673175692558289, "learning_rate": 0.0005427170868347339, "loss": 0.3409, "step": 16436 }, { "epoch": 9.18268156424581, "grad_norm": 0.4877397418022156, "learning_rate": 0.0005426890756302521, "loss": 0.5284, "step": 16437 }, { "epoch": 9.183240223463686, "grad_norm": 1.0286041498184204, "learning_rate": 0.0005426610644257703, "loss": 0.3731, "step": 16438 }, { "epoch": 9.183798882681565, "grad_norm": 0.5005021095275879, "learning_rate": 0.0005426330532212885, "loss": 0.375, "step": 16439 }, { "epoch": 9.184357541899441, "grad_norm": 0.9342933893203735, "learning_rate": 0.0005426050420168067, "loss": 0.5854, "step": 16440 }, { "epoch": 9.184916201117318, "grad_norm": 0.5026494264602661, "learning_rate": 0.000542577030812325, "loss": 0.405, "step": 16441 }, { "epoch": 9.185474860335196, "grad_norm": 0.4556749165058136, "learning_rate": 0.0005425490196078432, "loss": 0.427, "step": 16442 }, { "epoch": 9.186033519553073, "grad_norm": 0.5704842805862427, "learning_rate": 0.0005425210084033614, "loss": 0.4343, "step": 16443 }, { "epoch": 9.18659217877095, "grad_norm": 0.5358864665031433, "learning_rate": 0.0005424929971988796, "loss": 0.4051, "step": 16444 }, { "epoch": 9.187150837988828, "grad_norm": 0.6559305191040039, "learning_rate": 0.0005424649859943978, "loss": 0.4203, "step": 16445 }, { "epoch": 9.187709497206704, "grad_norm": 0.6133913993835449, "learning_rate": 0.000542436974789916, "loss": 0.4764, "step": 16446 }, { "epoch": 9.18826815642458, "grad_norm": 0.39269503951072693, "learning_rate": 0.0005424089635854342, "loss": 0.3491, "step": 16447 }, { "epoch": 9.188826815642457, "grad_norm": 0.4439775347709656, "learning_rate": 0.0005423809523809524, "loss": 0.4143, "step": 16448 }, { "epoch": 9.189385474860336, "grad_norm": 0.49662068486213684, "learning_rate": 0.0005423529411764706, "loss": 0.3804, "step": 16449 }, { "epoch": 9.189944134078212, "grad_norm": 0.5026037693023682, "learning_rate": 0.0005423249299719888, "loss": 0.3871, "step": 16450 }, { "epoch": 9.190502793296089, "grad_norm": 0.3771837055683136, "learning_rate": 0.000542296918767507, "loss": 0.3394, "step": 16451 }, { "epoch": 9.191061452513967, "grad_norm": 0.34782183170318604, "learning_rate": 0.0005422689075630253, "loss": 0.3255, "step": 16452 }, { "epoch": 9.191620111731844, "grad_norm": 0.3992161452770233, "learning_rate": 0.0005422408963585434, "loss": 0.4664, "step": 16453 }, { "epoch": 9.19217877094972, "grad_norm": 0.6021296977996826, "learning_rate": 0.0005422128851540616, "loss": 0.4732, "step": 16454 }, { "epoch": 9.192737430167599, "grad_norm": 0.4611443877220154, "learning_rate": 0.0005421848739495798, "loss": 0.4681, "step": 16455 }, { "epoch": 9.193296089385475, "grad_norm": 0.3801502585411072, "learning_rate": 0.000542156862745098, "loss": 0.3517, "step": 16456 }, { "epoch": 9.193854748603352, "grad_norm": 0.43669968843460083, "learning_rate": 0.0005421288515406164, "loss": 0.4708, "step": 16457 }, { "epoch": 9.194413407821228, "grad_norm": 0.44060125946998596, "learning_rate": 0.0005421008403361344, "loss": 0.4519, "step": 16458 }, { "epoch": 9.194972067039107, "grad_norm": 4.516201019287109, "learning_rate": 0.0005420728291316527, "loss": 0.523, "step": 16459 }, { "epoch": 9.195530726256983, "grad_norm": 0.7689078450202942, "learning_rate": 0.0005420448179271709, "loss": 0.3896, "step": 16460 }, { "epoch": 9.19608938547486, "grad_norm": 2.603292465209961, "learning_rate": 0.0005420168067226891, "loss": 0.3577, "step": 16461 }, { "epoch": 9.196648044692738, "grad_norm": 0.4897790551185608, "learning_rate": 0.0005419887955182074, "loss": 0.3882, "step": 16462 }, { "epoch": 9.197206703910615, "grad_norm": 0.4763418436050415, "learning_rate": 0.0005419607843137255, "loss": 0.5867, "step": 16463 }, { "epoch": 9.197765363128491, "grad_norm": 0.5391951203346252, "learning_rate": 0.0005419327731092437, "loss": 0.4056, "step": 16464 }, { "epoch": 9.19832402234637, "grad_norm": 1.0460638999938965, "learning_rate": 0.0005419047619047619, "loss": 0.4438, "step": 16465 }, { "epoch": 9.198882681564246, "grad_norm": 0.5877397656440735, "learning_rate": 0.0005418767507002801, "loss": 0.466, "step": 16466 }, { "epoch": 9.199441340782123, "grad_norm": 0.4966689646244049, "learning_rate": 0.0005418487394957984, "loss": 0.4797, "step": 16467 }, { "epoch": 9.2, "grad_norm": 0.5362020134925842, "learning_rate": 0.0005418207282913166, "loss": 0.4477, "step": 16468 }, { "epoch": 9.200558659217878, "grad_norm": 0.4578613042831421, "learning_rate": 0.0005417927170868347, "loss": 0.4791, "step": 16469 }, { "epoch": 9.201117318435754, "grad_norm": 1.344063639640808, "learning_rate": 0.0005417647058823529, "loss": 0.4641, "step": 16470 }, { "epoch": 9.20167597765363, "grad_norm": 0.6875057220458984, "learning_rate": 0.0005417366946778711, "loss": 0.4328, "step": 16471 }, { "epoch": 9.202234636871509, "grad_norm": 1.160433292388916, "learning_rate": 0.0005417086834733894, "loss": 0.6508, "step": 16472 }, { "epoch": 9.202793296089386, "grad_norm": 1.4570869207382202, "learning_rate": 0.0005416806722689076, "loss": 0.3674, "step": 16473 }, { "epoch": 9.203351955307262, "grad_norm": 2.2910892963409424, "learning_rate": 0.0005416526610644257, "loss": 0.4061, "step": 16474 }, { "epoch": 9.203910614525139, "grad_norm": 0.5284371376037598, "learning_rate": 0.000541624649859944, "loss": 0.5464, "step": 16475 }, { "epoch": 9.204469273743017, "grad_norm": 0.5682401061058044, "learning_rate": 0.0005415966386554622, "loss": 0.4537, "step": 16476 }, { "epoch": 9.205027932960894, "grad_norm": 0.5928774476051331, "learning_rate": 0.0005415686274509805, "loss": 0.4038, "step": 16477 }, { "epoch": 9.20558659217877, "grad_norm": 0.39407432079315186, "learning_rate": 0.0005415406162464987, "loss": 0.3553, "step": 16478 }, { "epoch": 9.206145251396649, "grad_norm": 0.526796817779541, "learning_rate": 0.0005415126050420168, "loss": 0.4353, "step": 16479 }, { "epoch": 9.206703910614525, "grad_norm": 0.47668054699897766, "learning_rate": 0.000541484593837535, "loss": 0.5527, "step": 16480 }, { "epoch": 9.207262569832402, "grad_norm": 0.5429408550262451, "learning_rate": 0.0005414565826330532, "loss": 0.4561, "step": 16481 }, { "epoch": 9.20782122905028, "grad_norm": 0.4693089425563812, "learning_rate": 0.0005414285714285715, "loss": 0.3852, "step": 16482 }, { "epoch": 9.208379888268157, "grad_norm": 0.48932701349258423, "learning_rate": 0.0005414005602240897, "loss": 0.4439, "step": 16483 }, { "epoch": 9.208938547486033, "grad_norm": 0.4219457507133484, "learning_rate": 0.0005413725490196079, "loss": 0.3291, "step": 16484 }, { "epoch": 9.20949720670391, "grad_norm": 0.38331130146980286, "learning_rate": 0.000541344537815126, "loss": 0.3829, "step": 16485 }, { "epoch": 9.210055865921788, "grad_norm": 1.3285800218582153, "learning_rate": 0.0005413165266106442, "loss": 0.4194, "step": 16486 }, { "epoch": 9.210614525139665, "grad_norm": 0.6707630753517151, "learning_rate": 0.0005412885154061625, "loss": 0.4472, "step": 16487 }, { "epoch": 9.211173184357541, "grad_norm": 0.9894908666610718, "learning_rate": 0.0005412605042016807, "loss": 0.3662, "step": 16488 }, { "epoch": 9.21173184357542, "grad_norm": 0.3866855204105377, "learning_rate": 0.0005412324929971989, "loss": 0.3997, "step": 16489 }, { "epoch": 9.212290502793296, "grad_norm": 0.6083716154098511, "learning_rate": 0.000541204481792717, "loss": 0.4733, "step": 16490 }, { "epoch": 9.212849162011173, "grad_norm": 0.5836795568466187, "learning_rate": 0.0005411764705882352, "loss": 0.5028, "step": 16491 }, { "epoch": 9.213407821229051, "grad_norm": 0.6724944114685059, "learning_rate": 0.0005411484593837536, "loss": 0.4246, "step": 16492 }, { "epoch": 9.213966480446928, "grad_norm": 0.5771934390068054, "learning_rate": 0.0005411204481792718, "loss": 0.4776, "step": 16493 }, { "epoch": 9.214525139664804, "grad_norm": 1.0224010944366455, "learning_rate": 0.00054109243697479, "loss": 0.4138, "step": 16494 }, { "epoch": 9.21508379888268, "grad_norm": 0.4859912395477295, "learning_rate": 0.0005410644257703081, "loss": 0.345, "step": 16495 }, { "epoch": 9.21564245810056, "grad_norm": 0.4043622314929962, "learning_rate": 0.0005410364145658263, "loss": 0.446, "step": 16496 }, { "epoch": 9.216201117318436, "grad_norm": 0.5242271423339844, "learning_rate": 0.0005410084033613446, "loss": 0.496, "step": 16497 }, { "epoch": 9.216759776536312, "grad_norm": 0.47943079471588135, "learning_rate": 0.0005409803921568628, "loss": 0.4433, "step": 16498 }, { "epoch": 9.21731843575419, "grad_norm": 0.4939671754837036, "learning_rate": 0.000540952380952381, "loss": 0.5138, "step": 16499 }, { "epoch": 9.217877094972067, "grad_norm": 0.604418158531189, "learning_rate": 0.0005409243697478992, "loss": 0.5597, "step": 16500 }, { "epoch": 9.217877094972067, "eval_cer": 0.08833316967256936, "eval_loss": 0.33648619055747986, "eval_runtime": 55.6829, "eval_samples_per_second": 81.497, "eval_steps_per_second": 5.1, "eval_wer": 0.3492360548588028, "step": 16500 }, { "epoch": 9.218435754189944, "grad_norm": 0.4354100525379181, "learning_rate": 0.0005408963585434173, "loss": 0.3938, "step": 16501 }, { "epoch": 9.21899441340782, "grad_norm": 1.2374842166900635, "learning_rate": 0.0005408683473389356, "loss": 0.4506, "step": 16502 }, { "epoch": 9.219553072625699, "grad_norm": 0.39970171451568604, "learning_rate": 0.0005408403361344538, "loss": 0.4018, "step": 16503 }, { "epoch": 9.220111731843575, "grad_norm": 0.45534956455230713, "learning_rate": 0.000540812324929972, "loss": 0.4006, "step": 16504 }, { "epoch": 9.220670391061452, "grad_norm": 0.8092905282974243, "learning_rate": 0.0005407843137254902, "loss": 0.4661, "step": 16505 }, { "epoch": 9.22122905027933, "grad_norm": 0.604893684387207, "learning_rate": 0.0005407563025210083, "loss": 0.4511, "step": 16506 }, { "epoch": 9.221787709497207, "grad_norm": 0.4756384491920471, "learning_rate": 0.0005407282913165267, "loss": 0.4438, "step": 16507 }, { "epoch": 9.222346368715083, "grad_norm": 0.572142481803894, "learning_rate": 0.0005407002801120449, "loss": 0.5343, "step": 16508 }, { "epoch": 9.222905027932962, "grad_norm": 0.43380674719810486, "learning_rate": 0.0005406722689075631, "loss": 0.3714, "step": 16509 }, { "epoch": 9.223463687150838, "grad_norm": 0.644777774810791, "learning_rate": 0.0005406442577030813, "loss": 0.4151, "step": 16510 }, { "epoch": 9.224022346368715, "grad_norm": 0.357843279838562, "learning_rate": 0.0005406162464985994, "loss": 0.3504, "step": 16511 }, { "epoch": 9.224581005586591, "grad_norm": 0.4684431254863739, "learning_rate": 0.0005405882352941176, "loss": 0.5099, "step": 16512 }, { "epoch": 9.22513966480447, "grad_norm": 0.7435186505317688, "learning_rate": 0.0005405602240896359, "loss": 0.4077, "step": 16513 }, { "epoch": 9.225698324022346, "grad_norm": 0.7387958765029907, "learning_rate": 0.0005405322128851541, "loss": 0.5861, "step": 16514 }, { "epoch": 9.226256983240223, "grad_norm": 0.3787241578102112, "learning_rate": 0.0005405042016806723, "loss": 0.2862, "step": 16515 }, { "epoch": 9.226815642458101, "grad_norm": 0.38561609387397766, "learning_rate": 0.0005404761904761905, "loss": 0.3696, "step": 16516 }, { "epoch": 9.227374301675978, "grad_norm": 0.37068963050842285, "learning_rate": 0.0005404481792717086, "loss": 0.3752, "step": 16517 }, { "epoch": 9.227932960893854, "grad_norm": 1.7013413906097412, "learning_rate": 0.0005404201680672269, "loss": 0.4507, "step": 16518 }, { "epoch": 9.228491620111733, "grad_norm": 0.5270462036132812, "learning_rate": 0.0005403921568627451, "loss": 0.3809, "step": 16519 }, { "epoch": 9.22905027932961, "grad_norm": 0.7093483805656433, "learning_rate": 0.0005403641456582633, "loss": 0.4598, "step": 16520 }, { "epoch": 9.229608938547486, "grad_norm": 0.45013388991355896, "learning_rate": 0.0005403361344537815, "loss": 0.4179, "step": 16521 }, { "epoch": 9.230167597765362, "grad_norm": 0.4635022282600403, "learning_rate": 0.0005403081232492996, "loss": 0.4092, "step": 16522 }, { "epoch": 9.23072625698324, "grad_norm": 1.5607857704162598, "learning_rate": 0.000540280112044818, "loss": 0.3359, "step": 16523 }, { "epoch": 9.231284916201117, "grad_norm": 0.6553241610527039, "learning_rate": 0.0005402521008403362, "loss": 0.3788, "step": 16524 }, { "epoch": 9.231843575418994, "grad_norm": 0.3479006886482239, "learning_rate": 0.0005402240896358544, "loss": 0.3871, "step": 16525 }, { "epoch": 9.232402234636872, "grad_norm": 0.6250573992729187, "learning_rate": 0.0005401960784313726, "loss": 0.5014, "step": 16526 }, { "epoch": 9.232960893854749, "grad_norm": 0.7511268854141235, "learning_rate": 0.0005401680672268907, "loss": 0.4398, "step": 16527 }, { "epoch": 9.233519553072625, "grad_norm": 0.8214605450630188, "learning_rate": 0.000540140056022409, "loss": 0.5629, "step": 16528 }, { "epoch": 9.234078212290504, "grad_norm": 0.3612486720085144, "learning_rate": 0.0005401120448179272, "loss": 0.4069, "step": 16529 }, { "epoch": 9.23463687150838, "grad_norm": 0.693700909614563, "learning_rate": 0.0005400840336134454, "loss": 0.4707, "step": 16530 }, { "epoch": 9.235195530726257, "grad_norm": 0.935588538646698, "learning_rate": 0.0005400560224089636, "loss": 0.5702, "step": 16531 }, { "epoch": 9.235754189944133, "grad_norm": 0.5711575150489807, "learning_rate": 0.0005400280112044818, "loss": 0.4543, "step": 16532 }, { "epoch": 9.236312849162012, "grad_norm": 0.7414027452468872, "learning_rate": 0.00054, "loss": 0.5058, "step": 16533 }, { "epoch": 9.236871508379888, "grad_norm": 0.549874484539032, "learning_rate": 0.0005399719887955182, "loss": 0.4346, "step": 16534 }, { "epoch": 9.237430167597765, "grad_norm": 0.52960205078125, "learning_rate": 0.0005399439775910364, "loss": 0.5008, "step": 16535 }, { "epoch": 9.237988826815643, "grad_norm": 0.4601619243621826, "learning_rate": 0.0005399159663865546, "loss": 0.472, "step": 16536 }, { "epoch": 9.23854748603352, "grad_norm": 0.8490204811096191, "learning_rate": 0.0005398879551820728, "loss": 0.4669, "step": 16537 }, { "epoch": 9.239106145251396, "grad_norm": 0.6143187284469604, "learning_rate": 0.000539859943977591, "loss": 0.4888, "step": 16538 }, { "epoch": 9.239664804469275, "grad_norm": 1.2110612392425537, "learning_rate": 0.0005398319327731092, "loss": 0.3526, "step": 16539 }, { "epoch": 9.240223463687151, "grad_norm": 0.5195451974868774, "learning_rate": 0.0005398039215686274, "loss": 0.3983, "step": 16540 }, { "epoch": 9.240782122905028, "grad_norm": 0.6040216088294983, "learning_rate": 0.0005397759103641457, "loss": 0.5021, "step": 16541 }, { "epoch": 9.241340782122904, "grad_norm": 0.4761215150356293, "learning_rate": 0.0005397478991596639, "loss": 0.3394, "step": 16542 }, { "epoch": 9.241899441340783, "grad_norm": 0.3308653235435486, "learning_rate": 0.0005397198879551821, "loss": 0.3188, "step": 16543 }, { "epoch": 9.24245810055866, "grad_norm": 0.7960337400436401, "learning_rate": 0.0005396918767507003, "loss": 0.4609, "step": 16544 }, { "epoch": 9.243016759776536, "grad_norm": 0.7407571077346802, "learning_rate": 0.0005396638655462185, "loss": 0.477, "step": 16545 }, { "epoch": 9.243575418994414, "grad_norm": 0.6363726258277893, "learning_rate": 0.0005396358543417367, "loss": 0.4224, "step": 16546 }, { "epoch": 9.24413407821229, "grad_norm": 0.47694161534309387, "learning_rate": 0.0005396078431372549, "loss": 0.4466, "step": 16547 }, { "epoch": 9.244692737430167, "grad_norm": 0.5374645590782166, "learning_rate": 0.0005395798319327732, "loss": 0.4513, "step": 16548 }, { "epoch": 9.245251396648044, "grad_norm": 1.3158732652664185, "learning_rate": 0.0005395518207282913, "loss": 0.5985, "step": 16549 }, { "epoch": 9.245810055865922, "grad_norm": 1.9134275913238525, "learning_rate": 0.0005395238095238095, "loss": 0.4046, "step": 16550 }, { "epoch": 9.246368715083799, "grad_norm": 0.4959862232208252, "learning_rate": 0.0005394957983193277, "loss": 0.4526, "step": 16551 }, { "epoch": 9.246927374301675, "grad_norm": 3.6215617656707764, "learning_rate": 0.0005394677871148459, "loss": 0.531, "step": 16552 }, { "epoch": 9.247486033519554, "grad_norm": 0.6431394219398499, "learning_rate": 0.0005394397759103642, "loss": 0.4339, "step": 16553 }, { "epoch": 9.24804469273743, "grad_norm": 0.5379325747489929, "learning_rate": 0.0005394117647058823, "loss": 0.4173, "step": 16554 }, { "epoch": 9.248603351955307, "grad_norm": 1.1802860498428345, "learning_rate": 0.0005393837535014005, "loss": 0.4145, "step": 16555 }, { "epoch": 9.249162011173185, "grad_norm": 0.5084881782531738, "learning_rate": 0.0005393557422969187, "loss": 0.4539, "step": 16556 }, { "epoch": 9.249720670391062, "grad_norm": 0.7915683388710022, "learning_rate": 0.000539327731092437, "loss": 0.4085, "step": 16557 }, { "epoch": 9.250279329608938, "grad_norm": 0.41347792744636536, "learning_rate": 0.0005392997198879553, "loss": 0.4164, "step": 16558 }, { "epoch": 9.250837988826815, "grad_norm": 0.6693623065948486, "learning_rate": 0.0005392717086834734, "loss": 0.6032, "step": 16559 }, { "epoch": 9.251396648044693, "grad_norm": 0.5454085469245911, "learning_rate": 0.0005392436974789916, "loss": 0.406, "step": 16560 }, { "epoch": 9.25195530726257, "grad_norm": 0.9983789324760437, "learning_rate": 0.0005392156862745098, "loss": 0.4268, "step": 16561 }, { "epoch": 9.252513966480446, "grad_norm": 0.6356601715087891, "learning_rate": 0.000539187675070028, "loss": 0.412, "step": 16562 }, { "epoch": 9.253072625698325, "grad_norm": 0.6159977316856384, "learning_rate": 0.0005391596638655463, "loss": 0.4515, "step": 16563 }, { "epoch": 9.253631284916201, "grad_norm": 0.6717731356620789, "learning_rate": 0.0005391316526610645, "loss": 0.4526, "step": 16564 }, { "epoch": 9.254189944134078, "grad_norm": 0.4269741475582123, "learning_rate": 0.0005391036414565826, "loss": 0.3681, "step": 16565 }, { "epoch": 9.254748603351956, "grad_norm": 0.5695484280586243, "learning_rate": 0.0005390756302521008, "loss": 0.4207, "step": 16566 }, { "epoch": 9.255307262569833, "grad_norm": 0.4539811611175537, "learning_rate": 0.000539047619047619, "loss": 0.3735, "step": 16567 }, { "epoch": 9.25586592178771, "grad_norm": 0.5337157845497131, "learning_rate": 0.0005390196078431373, "loss": 0.4526, "step": 16568 }, { "epoch": 9.256424581005586, "grad_norm": 0.575912356376648, "learning_rate": 0.0005389915966386555, "loss": 0.4824, "step": 16569 }, { "epoch": 9.256983240223464, "grad_norm": 0.48774194717407227, "learning_rate": 0.0005389635854341736, "loss": 0.4498, "step": 16570 }, { "epoch": 9.25754189944134, "grad_norm": 1.373374104499817, "learning_rate": 0.0005389355742296918, "loss": 0.3093, "step": 16571 }, { "epoch": 9.258100558659217, "grad_norm": 0.5435925126075745, "learning_rate": 0.00053890756302521, "loss": 0.4147, "step": 16572 }, { "epoch": 9.258659217877096, "grad_norm": 0.5785366892814636, "learning_rate": 0.0005388795518207284, "loss": 0.3713, "step": 16573 }, { "epoch": 9.259217877094972, "grad_norm": 0.43125826120376587, "learning_rate": 0.0005388515406162466, "loss": 0.4413, "step": 16574 }, { "epoch": 9.259776536312849, "grad_norm": 0.5177059769630432, "learning_rate": 0.0005388235294117647, "loss": 0.4123, "step": 16575 }, { "epoch": 9.260335195530725, "grad_norm": 0.9824718832969666, "learning_rate": 0.0005387955182072829, "loss": 0.3841, "step": 16576 }, { "epoch": 9.260893854748604, "grad_norm": 0.7645853161811829, "learning_rate": 0.0005387675070028011, "loss": 0.3987, "step": 16577 }, { "epoch": 9.26145251396648, "grad_norm": 0.44827255606651306, "learning_rate": 0.0005387394957983194, "loss": 0.4107, "step": 16578 }, { "epoch": 9.262011173184357, "grad_norm": 1.3404701948165894, "learning_rate": 0.0005387114845938376, "loss": 0.402, "step": 16579 }, { "epoch": 9.262569832402235, "grad_norm": 0.4581243395805359, "learning_rate": 0.0005386834733893558, "loss": 0.5066, "step": 16580 }, { "epoch": 9.263128491620112, "grad_norm": 1.6659650802612305, "learning_rate": 0.0005386554621848739, "loss": 0.3811, "step": 16581 }, { "epoch": 9.263687150837988, "grad_norm": 1.3342540264129639, "learning_rate": 0.0005386274509803921, "loss": 0.3909, "step": 16582 }, { "epoch": 9.264245810055867, "grad_norm": 0.5435566902160645, "learning_rate": 0.0005385994397759104, "loss": 0.4321, "step": 16583 }, { "epoch": 9.264804469273743, "grad_norm": 0.6568428874015808, "learning_rate": 0.0005385714285714286, "loss": 0.4963, "step": 16584 }, { "epoch": 9.26536312849162, "grad_norm": 1.972517490386963, "learning_rate": 0.0005385434173669468, "loss": 0.4758, "step": 16585 }, { "epoch": 9.265921787709496, "grad_norm": 0.3659202456474304, "learning_rate": 0.0005385154061624649, "loss": 0.4621, "step": 16586 }, { "epoch": 9.266480446927375, "grad_norm": 0.46245095133781433, "learning_rate": 0.0005384873949579831, "loss": 0.4123, "step": 16587 }, { "epoch": 9.267039106145251, "grad_norm": 0.892911970615387, "learning_rate": 0.0005384593837535014, "loss": 0.4285, "step": 16588 }, { "epoch": 9.267597765363128, "grad_norm": 1.022056221961975, "learning_rate": 0.0005384313725490197, "loss": 0.556, "step": 16589 }, { "epoch": 9.268156424581006, "grad_norm": 0.3725251853466034, "learning_rate": 0.0005384033613445379, "loss": 0.4063, "step": 16590 }, { "epoch": 9.268715083798883, "grad_norm": 2.3333992958068848, "learning_rate": 0.000538375350140056, "loss": 0.3644, "step": 16591 }, { "epoch": 9.26927374301676, "grad_norm": 0.5404777526855469, "learning_rate": 0.0005383473389355742, "loss": 0.5207, "step": 16592 }, { "epoch": 9.269832402234638, "grad_norm": 0.646561861038208, "learning_rate": 0.0005383193277310925, "loss": 0.3858, "step": 16593 }, { "epoch": 9.270391061452514, "grad_norm": 0.3435373306274414, "learning_rate": 0.0005382913165266107, "loss": 0.3408, "step": 16594 }, { "epoch": 9.27094972067039, "grad_norm": 1.0020469427108765, "learning_rate": 0.0005382633053221289, "loss": 0.4863, "step": 16595 }, { "epoch": 9.271508379888267, "grad_norm": 0.7925450801849365, "learning_rate": 0.0005382352941176471, "loss": 0.5796, "step": 16596 }, { "epoch": 9.272067039106146, "grad_norm": 0.5109665393829346, "learning_rate": 0.0005382072829131652, "loss": 0.4686, "step": 16597 }, { "epoch": 9.272625698324022, "grad_norm": 0.36900794506073, "learning_rate": 0.0005381792717086835, "loss": 0.3643, "step": 16598 }, { "epoch": 9.273184357541899, "grad_norm": 0.6303600072860718, "learning_rate": 0.0005381512605042017, "loss": 0.478, "step": 16599 }, { "epoch": 9.273743016759777, "grad_norm": 0.45135247707366943, "learning_rate": 0.0005381232492997199, "loss": 0.4186, "step": 16600 }, { "epoch": 9.274301675977654, "grad_norm": 0.7109841108322144, "learning_rate": 0.0005380952380952381, "loss": 0.4153, "step": 16601 }, { "epoch": 9.27486033519553, "grad_norm": 0.6614009737968445, "learning_rate": 0.0005380672268907562, "loss": 0.5569, "step": 16602 }, { "epoch": 9.275418994413409, "grad_norm": 0.6850992441177368, "learning_rate": 0.0005380392156862745, "loss": 0.4905, "step": 16603 }, { "epoch": 9.275977653631285, "grad_norm": 0.4450646638870239, "learning_rate": 0.0005380112044817927, "loss": 0.4828, "step": 16604 }, { "epoch": 9.276536312849162, "grad_norm": 0.5977611541748047, "learning_rate": 0.000537983193277311, "loss": 0.4771, "step": 16605 }, { "epoch": 9.277094972067038, "grad_norm": 1.1330811977386475, "learning_rate": 0.0005379551820728292, "loss": 0.3757, "step": 16606 }, { "epoch": 9.277653631284917, "grad_norm": 0.6245313286781311, "learning_rate": 0.0005379271708683473, "loss": 0.4266, "step": 16607 }, { "epoch": 9.278212290502793, "grad_norm": 0.5469295978546143, "learning_rate": 0.0005378991596638656, "loss": 0.4935, "step": 16608 }, { "epoch": 9.27877094972067, "grad_norm": 0.6180469393730164, "learning_rate": 0.0005378711484593838, "loss": 0.5896, "step": 16609 }, { "epoch": 9.279329608938548, "grad_norm": 0.4080008268356323, "learning_rate": 0.000537843137254902, "loss": 0.4612, "step": 16610 }, { "epoch": 9.279888268156425, "grad_norm": 0.8717343211174011, "learning_rate": 0.0005378151260504202, "loss": 0.4475, "step": 16611 }, { "epoch": 9.280446927374301, "grad_norm": 0.448574423789978, "learning_rate": 0.0005377871148459384, "loss": 0.3906, "step": 16612 }, { "epoch": 9.28100558659218, "grad_norm": 0.5210317969322205, "learning_rate": 0.0005377591036414566, "loss": 0.4591, "step": 16613 }, { "epoch": 9.281564245810056, "grad_norm": 4.129405498504639, "learning_rate": 0.0005377310924369748, "loss": 0.4641, "step": 16614 }, { "epoch": 9.282122905027933, "grad_norm": 0.38154488801956177, "learning_rate": 0.000537703081232493, "loss": 0.3892, "step": 16615 }, { "epoch": 9.28268156424581, "grad_norm": 0.49611175060272217, "learning_rate": 0.0005376750700280112, "loss": 0.5124, "step": 16616 }, { "epoch": 9.283240223463688, "grad_norm": 0.8293722867965698, "learning_rate": 0.0005376470588235294, "loss": 0.4572, "step": 16617 }, { "epoch": 9.283798882681564, "grad_norm": 0.5510809421539307, "learning_rate": 0.0005376190476190476, "loss": 0.4863, "step": 16618 }, { "epoch": 9.28435754189944, "grad_norm": 0.6533995866775513, "learning_rate": 0.0005375910364145658, "loss": 0.3275, "step": 16619 }, { "epoch": 9.28491620111732, "grad_norm": 0.9118890762329102, "learning_rate": 0.000537563025210084, "loss": 0.4116, "step": 16620 }, { "epoch": 9.285474860335196, "grad_norm": 1.0961288213729858, "learning_rate": 0.0005375350140056022, "loss": 0.3776, "step": 16621 }, { "epoch": 9.286033519553072, "grad_norm": 0.4982014298439026, "learning_rate": 0.0005375070028011204, "loss": 0.4194, "step": 16622 }, { "epoch": 9.286592178770949, "grad_norm": 0.48637276887893677, "learning_rate": 0.0005374789915966388, "loss": 0.4334, "step": 16623 }, { "epoch": 9.287150837988827, "grad_norm": 0.37924978137016296, "learning_rate": 0.0005374509803921569, "loss": 0.3464, "step": 16624 }, { "epoch": 9.287709497206704, "grad_norm": 0.7492765188217163, "learning_rate": 0.0005374229691876751, "loss": 0.3901, "step": 16625 }, { "epoch": 9.28826815642458, "grad_norm": 0.767127275466919, "learning_rate": 0.0005373949579831933, "loss": 0.3437, "step": 16626 }, { "epoch": 9.288826815642459, "grad_norm": 0.6950533390045166, "learning_rate": 0.0005373669467787115, "loss": 0.3816, "step": 16627 }, { "epoch": 9.289385474860335, "grad_norm": 0.4253644645214081, "learning_rate": 0.0005373389355742298, "loss": 0.3878, "step": 16628 }, { "epoch": 9.289944134078212, "grad_norm": 0.39599257707595825, "learning_rate": 0.0005373109243697479, "loss": 0.4126, "step": 16629 }, { "epoch": 9.29050279329609, "grad_norm": 0.38296350836753845, "learning_rate": 0.0005372829131652661, "loss": 0.3328, "step": 16630 }, { "epoch": 9.291061452513967, "grad_norm": 0.5903850197792053, "learning_rate": 0.0005372549019607843, "loss": 0.4336, "step": 16631 }, { "epoch": 9.291620111731843, "grad_norm": 0.52068692445755, "learning_rate": 0.0005372268907563025, "loss": 0.5449, "step": 16632 }, { "epoch": 9.29217877094972, "grad_norm": 0.8957650065422058, "learning_rate": 0.0005371988795518208, "loss": 0.4016, "step": 16633 }, { "epoch": 9.292737430167598, "grad_norm": 1.8552970886230469, "learning_rate": 0.0005371708683473389, "loss": 0.3548, "step": 16634 }, { "epoch": 9.293296089385475, "grad_norm": 0.6319858431816101, "learning_rate": 0.0005371428571428571, "loss": 0.573, "step": 16635 }, { "epoch": 9.293854748603351, "grad_norm": 0.6675060987472534, "learning_rate": 0.0005371148459383753, "loss": 0.4497, "step": 16636 }, { "epoch": 9.29441340782123, "grad_norm": 0.7903112173080444, "learning_rate": 0.0005370868347338935, "loss": 0.5128, "step": 16637 }, { "epoch": 9.294972067039106, "grad_norm": 0.824683666229248, "learning_rate": 0.0005370588235294119, "loss": 0.3936, "step": 16638 }, { "epoch": 9.295530726256983, "grad_norm": 8.301406860351562, "learning_rate": 0.0005370308123249301, "loss": 0.3125, "step": 16639 }, { "epoch": 9.296089385474861, "grad_norm": 0.3907196819782257, "learning_rate": 0.0005370028011204482, "loss": 0.3362, "step": 16640 }, { "epoch": 9.296648044692738, "grad_norm": 0.6023927927017212, "learning_rate": 0.0005369747899159664, "loss": 0.6357, "step": 16641 }, { "epoch": 9.297206703910614, "grad_norm": 0.366914302110672, "learning_rate": 0.0005369467787114846, "loss": 0.389, "step": 16642 }, { "epoch": 9.297765363128491, "grad_norm": 0.5850542187690735, "learning_rate": 0.0005369187675070029, "loss": 0.344, "step": 16643 }, { "epoch": 9.29832402234637, "grad_norm": 0.5472568869590759, "learning_rate": 0.0005368907563025211, "loss": 0.501, "step": 16644 }, { "epoch": 9.298882681564246, "grad_norm": 0.6412962079048157, "learning_rate": 0.0005368627450980392, "loss": 0.3934, "step": 16645 }, { "epoch": 9.299441340782122, "grad_norm": 1.08909010887146, "learning_rate": 0.0005368347338935574, "loss": 0.4727, "step": 16646 }, { "epoch": 9.3, "grad_norm": 0.5555177330970764, "learning_rate": 0.0005368067226890756, "loss": 0.4778, "step": 16647 }, { "epoch": 9.300558659217877, "grad_norm": 0.4491305649280548, "learning_rate": 0.0005367787114845939, "loss": 0.3367, "step": 16648 }, { "epoch": 9.301117318435754, "grad_norm": 0.6214771866798401, "learning_rate": 0.0005367507002801121, "loss": 0.742, "step": 16649 }, { "epoch": 9.30167597765363, "grad_norm": 0.69321608543396, "learning_rate": 0.0005367226890756302, "loss": 0.4564, "step": 16650 }, { "epoch": 9.302234636871509, "grad_norm": 0.7769429683685303, "learning_rate": 0.0005366946778711484, "loss": 0.3556, "step": 16651 }, { "epoch": 9.302793296089385, "grad_norm": 0.5233587622642517, "learning_rate": 0.0005366666666666666, "loss": 0.4942, "step": 16652 }, { "epoch": 9.303351955307262, "grad_norm": 0.9653082489967346, "learning_rate": 0.0005366386554621849, "loss": 0.4555, "step": 16653 }, { "epoch": 9.30391061452514, "grad_norm": 0.44297635555267334, "learning_rate": 0.0005366106442577031, "loss": 0.4801, "step": 16654 }, { "epoch": 9.304469273743017, "grad_norm": 0.4555809199810028, "learning_rate": 0.0005365826330532214, "loss": 0.4131, "step": 16655 }, { "epoch": 9.305027932960893, "grad_norm": 0.7620997428894043, "learning_rate": 0.0005365546218487395, "loss": 0.3868, "step": 16656 }, { "epoch": 9.305586592178772, "grad_norm": 0.38635891675949097, "learning_rate": 0.0005365266106442577, "loss": 0.3834, "step": 16657 }, { "epoch": 9.306145251396648, "grad_norm": 0.6984046697616577, "learning_rate": 0.000536498599439776, "loss": 0.3649, "step": 16658 }, { "epoch": 9.306703910614525, "grad_norm": 0.5802876949310303, "learning_rate": 0.0005364705882352942, "loss": 0.4963, "step": 16659 }, { "epoch": 9.307262569832401, "grad_norm": 1.2500238418579102, "learning_rate": 0.0005364425770308124, "loss": 0.3584, "step": 16660 }, { "epoch": 9.30782122905028, "grad_norm": 0.49604320526123047, "learning_rate": 0.0005364145658263305, "loss": 0.3811, "step": 16661 }, { "epoch": 9.308379888268156, "grad_norm": 0.5880340933799744, "learning_rate": 0.0005363865546218487, "loss": 0.5039, "step": 16662 }, { "epoch": 9.308938547486033, "grad_norm": 0.3959980010986328, "learning_rate": 0.000536358543417367, "loss": 0.4416, "step": 16663 }, { "epoch": 9.309497206703911, "grad_norm": 0.6716723442077637, "learning_rate": 0.0005363305322128852, "loss": 0.5665, "step": 16664 }, { "epoch": 9.310055865921788, "grad_norm": 0.4211269021034241, "learning_rate": 0.0005363025210084034, "loss": 0.433, "step": 16665 }, { "epoch": 9.310614525139664, "grad_norm": 0.8555530309677124, "learning_rate": 0.0005362745098039215, "loss": 0.3842, "step": 16666 }, { "epoch": 9.311173184357543, "grad_norm": 0.8323864936828613, "learning_rate": 0.0005362464985994397, "loss": 0.4497, "step": 16667 }, { "epoch": 9.31173184357542, "grad_norm": 0.5423151254653931, "learning_rate": 0.000536218487394958, "loss": 0.472, "step": 16668 }, { "epoch": 9.312290502793296, "grad_norm": 0.5062610507011414, "learning_rate": 0.0005361904761904762, "loss": 0.5441, "step": 16669 }, { "epoch": 9.312849162011172, "grad_norm": 0.4727415442466736, "learning_rate": 0.0005361624649859944, "loss": 0.4517, "step": 16670 }, { "epoch": 9.31340782122905, "grad_norm": 0.6677928566932678, "learning_rate": 0.0005361344537815127, "loss": 0.4403, "step": 16671 }, { "epoch": 9.313966480446927, "grad_norm": 0.5396201014518738, "learning_rate": 0.0005361064425770307, "loss": 0.37, "step": 16672 }, { "epoch": 9.314525139664804, "grad_norm": 1.5581986904144287, "learning_rate": 0.0005360784313725491, "loss": 0.5874, "step": 16673 }, { "epoch": 9.315083798882682, "grad_norm": 0.42642301321029663, "learning_rate": 0.0005360504201680673, "loss": 0.4968, "step": 16674 }, { "epoch": 9.315642458100559, "grad_norm": 0.41232115030288696, "learning_rate": 0.0005360224089635855, "loss": 0.3941, "step": 16675 }, { "epoch": 9.316201117318435, "grad_norm": 0.7357610464096069, "learning_rate": 0.0005359943977591037, "loss": 0.3497, "step": 16676 }, { "epoch": 9.316759776536314, "grad_norm": 0.6841923594474792, "learning_rate": 0.0005359663865546218, "loss": 0.4366, "step": 16677 }, { "epoch": 9.31731843575419, "grad_norm": 0.6401249170303345, "learning_rate": 0.0005359383753501401, "loss": 0.6711, "step": 16678 }, { "epoch": 9.317877094972067, "grad_norm": 0.5170163512229919, "learning_rate": 0.0005359103641456583, "loss": 0.4026, "step": 16679 }, { "epoch": 9.318435754189943, "grad_norm": 0.46179407835006714, "learning_rate": 0.0005358823529411765, "loss": 0.4657, "step": 16680 }, { "epoch": 9.318994413407822, "grad_norm": 0.35712406039237976, "learning_rate": 0.0005358543417366947, "loss": 0.3633, "step": 16681 }, { "epoch": 9.319553072625698, "grad_norm": 0.8107830882072449, "learning_rate": 0.0005358263305322128, "loss": 0.5316, "step": 16682 }, { "epoch": 9.320111731843575, "grad_norm": 0.5190209746360779, "learning_rate": 0.0005357983193277311, "loss": 0.4528, "step": 16683 }, { "epoch": 9.320670391061453, "grad_norm": 0.45393043756484985, "learning_rate": 0.0005357703081232493, "loss": 0.3212, "step": 16684 }, { "epoch": 9.32122905027933, "grad_norm": 0.5104181170463562, "learning_rate": 0.0005357422969187675, "loss": 0.3212, "step": 16685 }, { "epoch": 9.321787709497206, "grad_norm": 0.7569250464439392, "learning_rate": 0.0005357142857142857, "loss": 0.4558, "step": 16686 }, { "epoch": 9.322346368715085, "grad_norm": 0.42169100046157837, "learning_rate": 0.000535686274509804, "loss": 0.354, "step": 16687 }, { "epoch": 9.322905027932961, "grad_norm": 0.39165931940078735, "learning_rate": 0.0005356582633053222, "loss": 0.3489, "step": 16688 }, { "epoch": 9.323463687150838, "grad_norm": 0.4005206823348999, "learning_rate": 0.0005356302521008404, "loss": 0.4199, "step": 16689 }, { "epoch": 9.324022346368714, "grad_norm": 4.686884880065918, "learning_rate": 0.0005356022408963586, "loss": 0.4251, "step": 16690 }, { "epoch": 9.324581005586593, "grad_norm": 0.8771541118621826, "learning_rate": 0.0005355742296918768, "loss": 0.5771, "step": 16691 }, { "epoch": 9.32513966480447, "grad_norm": 0.8012189269065857, "learning_rate": 0.000535546218487395, "loss": 0.4466, "step": 16692 }, { "epoch": 9.325698324022346, "grad_norm": 0.4651605784893036, "learning_rate": 0.0005355182072829132, "loss": 0.4002, "step": 16693 }, { "epoch": 9.326256983240224, "grad_norm": 0.5822586417198181, "learning_rate": 0.0005354901960784314, "loss": 0.4188, "step": 16694 }, { "epoch": 9.3268156424581, "grad_norm": 0.3681603968143463, "learning_rate": 0.0005354621848739496, "loss": 0.3724, "step": 16695 }, { "epoch": 9.327374301675977, "grad_norm": 0.565109133720398, "learning_rate": 0.0005354341736694678, "loss": 0.4867, "step": 16696 }, { "epoch": 9.327932960893854, "grad_norm": 0.5700971484184265, "learning_rate": 0.000535406162464986, "loss": 0.4823, "step": 16697 }, { "epoch": 9.328491620111732, "grad_norm": 0.940841555595398, "learning_rate": 0.0005353781512605042, "loss": 0.5465, "step": 16698 }, { "epoch": 9.329050279329609, "grad_norm": 0.577726423740387, "learning_rate": 0.0005353501400560224, "loss": 0.4409, "step": 16699 }, { "epoch": 9.329608938547485, "grad_norm": 0.4471920430660248, "learning_rate": 0.0005353221288515406, "loss": 0.5679, "step": 16700 }, { "epoch": 9.330167597765364, "grad_norm": 0.9432402849197388, "learning_rate": 0.0005352941176470588, "loss": 0.4979, "step": 16701 }, { "epoch": 9.33072625698324, "grad_norm": 0.36435049772262573, "learning_rate": 0.000535266106442577, "loss": 0.3961, "step": 16702 }, { "epoch": 9.331284916201117, "grad_norm": 0.6592210531234741, "learning_rate": 0.0005352380952380954, "loss": 0.4546, "step": 16703 }, { "epoch": 9.331843575418995, "grad_norm": 0.5919036269187927, "learning_rate": 0.0005352100840336134, "loss": 0.3894, "step": 16704 }, { "epoch": 9.332402234636872, "grad_norm": 0.5600491762161255, "learning_rate": 0.0005351820728291317, "loss": 0.3748, "step": 16705 }, { "epoch": 9.332960893854748, "grad_norm": 0.7861679792404175, "learning_rate": 0.0005351540616246499, "loss": 0.3913, "step": 16706 }, { "epoch": 9.333519553072625, "grad_norm": 0.3903673589229584, "learning_rate": 0.0005351260504201681, "loss": 0.5132, "step": 16707 }, { "epoch": 9.334078212290503, "grad_norm": 0.6793580651283264, "learning_rate": 0.0005350980392156864, "loss": 0.4321, "step": 16708 }, { "epoch": 9.33463687150838, "grad_norm": 0.5604198575019836, "learning_rate": 0.0005350700280112045, "loss": 0.4345, "step": 16709 }, { "epoch": 9.335195530726256, "grad_norm": 1.4051264524459839, "learning_rate": 0.0005350420168067227, "loss": 0.3798, "step": 16710 }, { "epoch": 9.335754189944135, "grad_norm": 0.49308064579963684, "learning_rate": 0.0005350140056022409, "loss": 0.405, "step": 16711 }, { "epoch": 9.336312849162011, "grad_norm": 0.8990593552589417, "learning_rate": 0.0005349859943977591, "loss": 0.4412, "step": 16712 }, { "epoch": 9.336871508379888, "grad_norm": 0.6738452911376953, "learning_rate": 0.0005349579831932774, "loss": 0.4535, "step": 16713 }, { "epoch": 9.337430167597766, "grad_norm": 0.3531082570552826, "learning_rate": 0.0005349299719887955, "loss": 0.3906, "step": 16714 }, { "epoch": 9.337988826815643, "grad_norm": 2.3532865047454834, "learning_rate": 0.0005349019607843137, "loss": 0.3796, "step": 16715 }, { "epoch": 9.33854748603352, "grad_norm": 0.5781932473182678, "learning_rate": 0.0005348739495798319, "loss": 0.4727, "step": 16716 }, { "epoch": 9.339106145251396, "grad_norm": 0.4092429280281067, "learning_rate": 0.0005348459383753501, "loss": 0.3558, "step": 16717 }, { "epoch": 9.339664804469274, "grad_norm": 1.010851263999939, "learning_rate": 0.0005348179271708684, "loss": 0.4183, "step": 16718 }, { "epoch": 9.34022346368715, "grad_norm": 0.5052831768989563, "learning_rate": 0.0005347899159663866, "loss": 0.4556, "step": 16719 }, { "epoch": 9.340782122905027, "grad_norm": 0.3975076377391815, "learning_rate": 0.0005347619047619047, "loss": 0.3884, "step": 16720 }, { "epoch": 9.341340782122906, "grad_norm": 0.38268956542015076, "learning_rate": 0.000534733893557423, "loss": 0.3694, "step": 16721 }, { "epoch": 9.341899441340782, "grad_norm": 0.721857488155365, "learning_rate": 0.0005347058823529412, "loss": 0.5405, "step": 16722 }, { "epoch": 9.342458100558659, "grad_norm": 0.6094126105308533, "learning_rate": 0.0005346778711484595, "loss": 0.4913, "step": 16723 }, { "epoch": 9.343016759776535, "grad_norm": 0.5790989398956299, "learning_rate": 0.0005346498599439777, "loss": 0.535, "step": 16724 }, { "epoch": 9.343575418994414, "grad_norm": 0.4490100145339966, "learning_rate": 0.0005346218487394958, "loss": 0.4619, "step": 16725 }, { "epoch": 9.34413407821229, "grad_norm": 0.5182918310165405, "learning_rate": 0.000534593837535014, "loss": 0.5676, "step": 16726 }, { "epoch": 9.344692737430167, "grad_norm": 0.9785728454589844, "learning_rate": 0.0005345658263305322, "loss": 0.5234, "step": 16727 }, { "epoch": 9.345251396648045, "grad_norm": 0.4276009202003479, "learning_rate": 0.0005345378151260505, "loss": 0.4458, "step": 16728 }, { "epoch": 9.345810055865922, "grad_norm": 0.5573778748512268, "learning_rate": 0.0005345098039215687, "loss": 0.3893, "step": 16729 }, { "epoch": 9.346368715083798, "grad_norm": 0.5121758580207825, "learning_rate": 0.0005344817927170868, "loss": 0.4946, "step": 16730 }, { "epoch": 9.346927374301677, "grad_norm": 0.3346363306045532, "learning_rate": 0.000534453781512605, "loss": 0.3682, "step": 16731 }, { "epoch": 9.347486033519553, "grad_norm": 0.47509777545928955, "learning_rate": 0.0005344257703081232, "loss": 0.435, "step": 16732 }, { "epoch": 9.34804469273743, "grad_norm": 0.5008212924003601, "learning_rate": 0.0005343977591036415, "loss": 0.4415, "step": 16733 }, { "epoch": 9.348603351955306, "grad_norm": 0.6001195907592773, "learning_rate": 0.0005343697478991597, "loss": 0.3558, "step": 16734 }, { "epoch": 9.349162011173185, "grad_norm": 0.3894767761230469, "learning_rate": 0.0005343417366946779, "loss": 0.3785, "step": 16735 }, { "epoch": 9.349720670391061, "grad_norm": 0.5864819884300232, "learning_rate": 0.000534313725490196, "loss": 0.4, "step": 16736 }, { "epoch": 9.350279329608938, "grad_norm": 3.946179151535034, "learning_rate": 0.0005342857142857142, "loss": 0.4666, "step": 16737 }, { "epoch": 9.350837988826816, "grad_norm": 0.5504373908042908, "learning_rate": 0.0005342577030812325, "loss": 0.5113, "step": 16738 }, { "epoch": 9.351396648044693, "grad_norm": 0.540897786617279, "learning_rate": 0.0005342296918767508, "loss": 0.4218, "step": 16739 }, { "epoch": 9.35195530726257, "grad_norm": 1.406132459640503, "learning_rate": 0.000534201680672269, "loss": 0.3711, "step": 16740 }, { "epoch": 9.352513966480448, "grad_norm": 0.4389044940471649, "learning_rate": 0.0005341736694677871, "loss": 0.4069, "step": 16741 }, { "epoch": 9.353072625698324, "grad_norm": 0.39769864082336426, "learning_rate": 0.0005341456582633053, "loss": 0.3836, "step": 16742 }, { "epoch": 9.3536312849162, "grad_norm": 0.8196011781692505, "learning_rate": 0.0005341176470588235, "loss": 0.4535, "step": 16743 }, { "epoch": 9.354189944134077, "grad_norm": 0.4231727123260498, "learning_rate": 0.0005340896358543418, "loss": 0.4003, "step": 16744 }, { "epoch": 9.354748603351956, "grad_norm": 0.35200005769729614, "learning_rate": 0.00053406162464986, "loss": 0.3886, "step": 16745 }, { "epoch": 9.355307262569832, "grad_norm": 0.6663103699684143, "learning_rate": 0.0005340336134453781, "loss": 0.46, "step": 16746 }, { "epoch": 9.355865921787709, "grad_norm": 0.44087740778923035, "learning_rate": 0.0005340056022408963, "loss": 0.4076, "step": 16747 }, { "epoch": 9.356424581005587, "grad_norm": 0.45731955766677856, "learning_rate": 0.0005339775910364145, "loss": 0.3895, "step": 16748 }, { "epoch": 9.356983240223464, "grad_norm": 0.5980762243270874, "learning_rate": 0.0005339495798319328, "loss": 0.3999, "step": 16749 }, { "epoch": 9.35754189944134, "grad_norm": 0.6291030049324036, "learning_rate": 0.000533921568627451, "loss": 0.394, "step": 16750 }, { "epoch": 9.358100558659217, "grad_norm": 0.5832840800285339, "learning_rate": 0.0005338935574229692, "loss": 0.3709, "step": 16751 }, { "epoch": 9.358659217877095, "grad_norm": 0.5454499125480652, "learning_rate": 0.0005338655462184873, "loss": 0.5831, "step": 16752 }, { "epoch": 9.359217877094972, "grad_norm": 3.0721566677093506, "learning_rate": 0.0005338375350140055, "loss": 0.6549, "step": 16753 }, { "epoch": 9.359776536312848, "grad_norm": 1.4152270555496216, "learning_rate": 0.0005338095238095239, "loss": 0.3353, "step": 16754 }, { "epoch": 9.360335195530727, "grad_norm": 0.5737860798835754, "learning_rate": 0.0005337815126050421, "loss": 0.3669, "step": 16755 }, { "epoch": 9.360893854748603, "grad_norm": 0.5673283934593201, "learning_rate": 0.0005337535014005603, "loss": 0.3832, "step": 16756 }, { "epoch": 9.36145251396648, "grad_norm": 0.5884286165237427, "learning_rate": 0.0005337254901960784, "loss": 0.3579, "step": 16757 }, { "epoch": 9.362011173184358, "grad_norm": 0.493373841047287, "learning_rate": 0.0005336974789915966, "loss": 0.3934, "step": 16758 }, { "epoch": 9.362569832402235, "grad_norm": 0.5414632558822632, "learning_rate": 0.0005336694677871149, "loss": 0.5144, "step": 16759 }, { "epoch": 9.363128491620111, "grad_norm": 0.6368604302406311, "learning_rate": 0.0005336414565826331, "loss": 0.3807, "step": 16760 }, { "epoch": 9.363687150837988, "grad_norm": 0.4669869542121887, "learning_rate": 0.0005336134453781513, "loss": 0.4277, "step": 16761 }, { "epoch": 9.364245810055866, "grad_norm": 0.3774166703224182, "learning_rate": 0.0005335854341736694, "loss": 0.3903, "step": 16762 }, { "epoch": 9.364804469273743, "grad_norm": 0.774614155292511, "learning_rate": 0.0005335574229691876, "loss": 0.4535, "step": 16763 }, { "epoch": 9.36536312849162, "grad_norm": 0.42842450737953186, "learning_rate": 0.0005335294117647059, "loss": 0.3665, "step": 16764 }, { "epoch": 9.365921787709498, "grad_norm": 0.40375590324401855, "learning_rate": 0.0005335014005602241, "loss": 0.383, "step": 16765 }, { "epoch": 9.366480446927374, "grad_norm": 0.7816968560218811, "learning_rate": 0.0005334733893557423, "loss": 0.5081, "step": 16766 }, { "epoch": 9.367039106145251, "grad_norm": 0.619130551815033, "learning_rate": 0.0005334453781512605, "loss": 0.356, "step": 16767 }, { "epoch": 9.36759776536313, "grad_norm": 0.44995802640914917, "learning_rate": 0.0005334173669467786, "loss": 0.4221, "step": 16768 }, { "epoch": 9.368156424581006, "grad_norm": 0.5371401309967041, "learning_rate": 0.000533389355742297, "loss": 0.5107, "step": 16769 }, { "epoch": 9.368715083798882, "grad_norm": 1.8955756425857544, "learning_rate": 0.0005333613445378152, "loss": 0.3834, "step": 16770 }, { "epoch": 9.369273743016759, "grad_norm": 0.3807872235774994, "learning_rate": 0.0005333333333333334, "loss": 0.3927, "step": 16771 }, { "epoch": 9.369832402234637, "grad_norm": 0.6134088635444641, "learning_rate": 0.0005333053221288516, "loss": 0.4867, "step": 16772 }, { "epoch": 9.370391061452514, "grad_norm": 0.6308655142784119, "learning_rate": 0.0005332773109243697, "loss": 0.4624, "step": 16773 }, { "epoch": 9.37094972067039, "grad_norm": 3.3655457496643066, "learning_rate": 0.000533249299719888, "loss": 0.3889, "step": 16774 }, { "epoch": 9.371508379888269, "grad_norm": 0.8009724617004395, "learning_rate": 0.0005332212885154062, "loss": 0.3911, "step": 16775 }, { "epoch": 9.372067039106145, "grad_norm": 0.41596177220344543, "learning_rate": 0.0005331932773109244, "loss": 0.456, "step": 16776 }, { "epoch": 9.372625698324022, "grad_norm": 0.4668046534061432, "learning_rate": 0.0005331652661064426, "loss": 0.4022, "step": 16777 }, { "epoch": 9.3731843575419, "grad_norm": 0.7238803505897522, "learning_rate": 0.0005331372549019607, "loss": 0.4643, "step": 16778 }, { "epoch": 9.373743016759777, "grad_norm": 2.1968822479248047, "learning_rate": 0.000533109243697479, "loss": 0.4543, "step": 16779 }, { "epoch": 9.374301675977653, "grad_norm": 0.4328564405441284, "learning_rate": 0.0005330812324929972, "loss": 0.4487, "step": 16780 }, { "epoch": 9.37486033519553, "grad_norm": 0.46303772926330566, "learning_rate": 0.0005330532212885154, "loss": 0.4249, "step": 16781 }, { "epoch": 9.375418994413408, "grad_norm": 0.40519464015960693, "learning_rate": 0.0005330252100840336, "loss": 0.412, "step": 16782 }, { "epoch": 9.375977653631285, "grad_norm": 0.555400550365448, "learning_rate": 0.0005329971988795518, "loss": 0.4956, "step": 16783 }, { "epoch": 9.376536312849161, "grad_norm": 0.6737897992134094, "learning_rate": 0.00053296918767507, "loss": 0.4742, "step": 16784 }, { "epoch": 9.37709497206704, "grad_norm": 0.5997216105461121, "learning_rate": 0.0005329411764705882, "loss": 0.4937, "step": 16785 }, { "epoch": 9.377653631284916, "grad_norm": 0.6114674806594849, "learning_rate": 0.0005329131652661064, "loss": 0.4871, "step": 16786 }, { "epoch": 9.378212290502793, "grad_norm": 0.4657524526119232, "learning_rate": 0.0005328851540616247, "loss": 0.4668, "step": 16787 }, { "epoch": 9.378770949720671, "grad_norm": 0.4247683882713318, "learning_rate": 0.0005328571428571429, "loss": 0.4095, "step": 16788 }, { "epoch": 9.379329608938548, "grad_norm": 0.3457503616809845, "learning_rate": 0.0005328291316526611, "loss": 0.3695, "step": 16789 }, { "epoch": 9.379888268156424, "grad_norm": 0.4852539300918579, "learning_rate": 0.0005328011204481793, "loss": 0.3954, "step": 16790 }, { "epoch": 9.380446927374301, "grad_norm": 0.4772719144821167, "learning_rate": 0.0005327731092436975, "loss": 0.4913, "step": 16791 }, { "epoch": 9.38100558659218, "grad_norm": 0.4078742265701294, "learning_rate": 0.0005327450980392157, "loss": 0.3472, "step": 16792 }, { "epoch": 9.381564245810056, "grad_norm": 0.7070629000663757, "learning_rate": 0.0005327170868347339, "loss": 0.6128, "step": 16793 }, { "epoch": 9.382122905027932, "grad_norm": 0.652103841304779, "learning_rate": 0.0005326890756302522, "loss": 0.3886, "step": 16794 }, { "epoch": 9.38268156424581, "grad_norm": 0.6479032635688782, "learning_rate": 0.0005326610644257703, "loss": 0.4171, "step": 16795 }, { "epoch": 9.383240223463687, "grad_norm": 0.7084524035453796, "learning_rate": 0.0005326330532212885, "loss": 0.4825, "step": 16796 }, { "epoch": 9.383798882681564, "grad_norm": 0.9993151426315308, "learning_rate": 0.0005326050420168067, "loss": 0.3809, "step": 16797 }, { "epoch": 9.38435754189944, "grad_norm": 1.4311127662658691, "learning_rate": 0.0005325770308123249, "loss": 0.3121, "step": 16798 }, { "epoch": 9.384916201117319, "grad_norm": 0.5914056897163391, "learning_rate": 0.0005325490196078432, "loss": 0.4658, "step": 16799 }, { "epoch": 9.385474860335195, "grad_norm": 3.6157281398773193, "learning_rate": 0.0005325210084033613, "loss": 0.622, "step": 16800 }, { "epoch": 9.386033519553072, "grad_norm": 1.3350077867507935, "learning_rate": 0.0005324929971988795, "loss": 0.5424, "step": 16801 }, { "epoch": 9.38659217877095, "grad_norm": 0.6850478649139404, "learning_rate": 0.0005324649859943977, "loss": 0.4787, "step": 16802 }, { "epoch": 9.387150837988827, "grad_norm": 0.7428293228149414, "learning_rate": 0.000532436974789916, "loss": 0.4541, "step": 16803 }, { "epoch": 9.387709497206703, "grad_norm": 2.18629789352417, "learning_rate": 0.0005324089635854343, "loss": 0.3548, "step": 16804 }, { "epoch": 9.388268156424582, "grad_norm": 0.7521222233772278, "learning_rate": 0.0005323809523809524, "loss": 0.512, "step": 16805 }, { "epoch": 9.388826815642458, "grad_norm": 0.6294946074485779, "learning_rate": 0.0005323529411764706, "loss": 0.5323, "step": 16806 }, { "epoch": 9.389385474860335, "grad_norm": 0.5802832245826721, "learning_rate": 0.0005323249299719888, "loss": 0.2995, "step": 16807 }, { "epoch": 9.389944134078211, "grad_norm": 2.8059754371643066, "learning_rate": 0.000532296918767507, "loss": 0.4557, "step": 16808 }, { "epoch": 9.39050279329609, "grad_norm": 0.657271146774292, "learning_rate": 0.0005322689075630253, "loss": 0.5192, "step": 16809 }, { "epoch": 9.391061452513966, "grad_norm": 0.7676663994789124, "learning_rate": 0.0005322408963585435, "loss": 0.4827, "step": 16810 }, { "epoch": 9.391620111731843, "grad_norm": 0.6108667850494385, "learning_rate": 0.0005322128851540616, "loss": 0.4051, "step": 16811 }, { "epoch": 9.392178770949721, "grad_norm": 0.3898201882839203, "learning_rate": 0.0005321848739495798, "loss": 0.4939, "step": 16812 }, { "epoch": 9.392737430167598, "grad_norm": 1.2757179737091064, "learning_rate": 0.000532156862745098, "loss": 0.6895, "step": 16813 }, { "epoch": 9.393296089385474, "grad_norm": 0.5117576718330383, "learning_rate": 0.0005321288515406163, "loss": 0.4027, "step": 16814 }, { "epoch": 9.393854748603353, "grad_norm": 0.44022637605667114, "learning_rate": 0.0005321008403361345, "loss": 0.3671, "step": 16815 }, { "epoch": 9.39441340782123, "grad_norm": 0.3825591802597046, "learning_rate": 0.0005320728291316526, "loss": 0.3846, "step": 16816 }, { "epoch": 9.394972067039106, "grad_norm": 0.5309603810310364, "learning_rate": 0.0005320448179271708, "loss": 0.513, "step": 16817 }, { "epoch": 9.395530726256982, "grad_norm": 0.43423765897750854, "learning_rate": 0.000532016806722689, "loss": 0.4442, "step": 16818 }, { "epoch": 9.39608938547486, "grad_norm": 0.5367094278335571, "learning_rate": 0.0005319887955182074, "loss": 0.4505, "step": 16819 }, { "epoch": 9.396648044692737, "grad_norm": 0.5798840522766113, "learning_rate": 0.0005319607843137256, "loss": 0.4677, "step": 16820 }, { "epoch": 9.397206703910614, "grad_norm": 0.4283909201622009, "learning_rate": 0.0005319327731092437, "loss": 0.4188, "step": 16821 }, { "epoch": 9.397765363128492, "grad_norm": 0.47862565517425537, "learning_rate": 0.0005319047619047619, "loss": 0.341, "step": 16822 }, { "epoch": 9.398324022346369, "grad_norm": 0.5590355396270752, "learning_rate": 0.0005318767507002801, "loss": 0.3156, "step": 16823 }, { "epoch": 9.398882681564245, "grad_norm": 0.4354957342147827, "learning_rate": 0.0005318487394957984, "loss": 0.2996, "step": 16824 }, { "epoch": 9.399441340782122, "grad_norm": 0.5914400815963745, "learning_rate": 0.0005318207282913166, "loss": 0.448, "step": 16825 }, { "epoch": 9.4, "grad_norm": 0.7777588963508606, "learning_rate": 0.0005317927170868348, "loss": 0.4169, "step": 16826 }, { "epoch": 9.400558659217877, "grad_norm": 0.5501382350921631, "learning_rate": 0.0005317647058823529, "loss": 0.475, "step": 16827 }, { "epoch": 9.401117318435753, "grad_norm": 0.39645954966545105, "learning_rate": 0.0005317366946778711, "loss": 0.5077, "step": 16828 }, { "epoch": 9.401675977653632, "grad_norm": 0.9203370809555054, "learning_rate": 0.0005317086834733894, "loss": 0.4567, "step": 16829 }, { "epoch": 9.402234636871508, "grad_norm": 0.6831592917442322, "learning_rate": 0.0005316806722689076, "loss": 0.4235, "step": 16830 }, { "epoch": 9.402793296089385, "grad_norm": 1.0460988283157349, "learning_rate": 0.0005316526610644258, "loss": 0.5288, "step": 16831 }, { "epoch": 9.403351955307263, "grad_norm": 0.6364946365356445, "learning_rate": 0.0005316246498599439, "loss": 0.4513, "step": 16832 }, { "epoch": 9.40391061452514, "grad_norm": 0.515491783618927, "learning_rate": 0.0005315966386554621, "loss": 0.471, "step": 16833 }, { "epoch": 9.404469273743016, "grad_norm": 1.3763272762298584, "learning_rate": 0.0005315686274509804, "loss": 0.3567, "step": 16834 }, { "epoch": 9.405027932960893, "grad_norm": 1.1162863969802856, "learning_rate": 0.0005315406162464987, "loss": 0.5599, "step": 16835 }, { "epoch": 9.405586592178771, "grad_norm": 0.44892141222953796, "learning_rate": 0.0005315126050420169, "loss": 0.4461, "step": 16836 }, { "epoch": 9.406145251396648, "grad_norm": 0.42992711067199707, "learning_rate": 0.000531484593837535, "loss": 0.4084, "step": 16837 }, { "epoch": 9.406703910614524, "grad_norm": 0.6918512582778931, "learning_rate": 0.0005314565826330532, "loss": 0.4327, "step": 16838 }, { "epoch": 9.407262569832403, "grad_norm": 0.40214061737060547, "learning_rate": 0.0005314285714285715, "loss": 0.2999, "step": 16839 }, { "epoch": 9.40782122905028, "grad_norm": 0.48275989294052124, "learning_rate": 0.0005314005602240897, "loss": 0.4804, "step": 16840 }, { "epoch": 9.408379888268156, "grad_norm": 0.42350292205810547, "learning_rate": 0.0005313725490196079, "loss": 0.375, "step": 16841 }, { "epoch": 9.408938547486034, "grad_norm": 0.7236517071723938, "learning_rate": 0.0005313445378151261, "loss": 0.5642, "step": 16842 }, { "epoch": 9.40949720670391, "grad_norm": 0.4190064072608948, "learning_rate": 0.0005313165266106442, "loss": 0.3392, "step": 16843 }, { "epoch": 9.410055865921787, "grad_norm": 0.42308881878852844, "learning_rate": 0.0005312885154061625, "loss": 0.3061, "step": 16844 }, { "epoch": 9.410614525139664, "grad_norm": 0.3551797568798065, "learning_rate": 0.0005312605042016807, "loss": 0.3581, "step": 16845 }, { "epoch": 9.411173184357542, "grad_norm": 0.5283050537109375, "learning_rate": 0.0005312324929971989, "loss": 0.4388, "step": 16846 }, { "epoch": 9.411731843575419, "grad_norm": 0.6589013338088989, "learning_rate": 0.0005312044817927171, "loss": 0.3644, "step": 16847 }, { "epoch": 9.412290502793295, "grad_norm": 0.556256890296936, "learning_rate": 0.0005311764705882352, "loss": 0.4125, "step": 16848 }, { "epoch": 9.412849162011174, "grad_norm": 0.6540932059288025, "learning_rate": 0.0005311484593837535, "loss": 0.4097, "step": 16849 }, { "epoch": 9.41340782122905, "grad_norm": 0.5505632162094116, "learning_rate": 0.0005311204481792717, "loss": 0.4131, "step": 16850 }, { "epoch": 9.413966480446927, "grad_norm": 0.3756787180900574, "learning_rate": 0.00053109243697479, "loss": 0.3572, "step": 16851 }, { "epoch": 9.414525139664805, "grad_norm": 0.7743303179740906, "learning_rate": 0.0005310644257703082, "loss": 0.5434, "step": 16852 }, { "epoch": 9.415083798882682, "grad_norm": 0.5008177161216736, "learning_rate": 0.0005310364145658263, "loss": 0.4358, "step": 16853 }, { "epoch": 9.415642458100558, "grad_norm": 0.5340622663497925, "learning_rate": 0.0005310084033613446, "loss": 0.4683, "step": 16854 }, { "epoch": 9.416201117318435, "grad_norm": 0.4869961440563202, "learning_rate": 0.0005309803921568628, "loss": 0.494, "step": 16855 }, { "epoch": 9.416759776536313, "grad_norm": 0.47588032484054565, "learning_rate": 0.000530952380952381, "loss": 0.47, "step": 16856 }, { "epoch": 9.41731843575419, "grad_norm": 0.48139962553977966, "learning_rate": 0.0005309243697478992, "loss": 0.4264, "step": 16857 }, { "epoch": 9.417877094972066, "grad_norm": 1.1310714483261108, "learning_rate": 0.0005308963585434174, "loss": 0.3937, "step": 16858 }, { "epoch": 9.418435754189945, "grad_norm": 2.103879690170288, "learning_rate": 0.0005308683473389356, "loss": 0.4049, "step": 16859 }, { "epoch": 9.418994413407821, "grad_norm": 0.450005441904068, "learning_rate": 0.0005308403361344538, "loss": 0.4111, "step": 16860 }, { "epoch": 9.419553072625698, "grad_norm": 0.41125884652137756, "learning_rate": 0.000530812324929972, "loss": 0.3421, "step": 16861 }, { "epoch": 9.420111731843576, "grad_norm": 0.4717095196247101, "learning_rate": 0.0005307843137254902, "loss": 0.3288, "step": 16862 }, { "epoch": 9.420670391061453, "grad_norm": 0.5915235877037048, "learning_rate": 0.0005307563025210084, "loss": 0.6243, "step": 16863 }, { "epoch": 9.42122905027933, "grad_norm": 0.638978123664856, "learning_rate": 0.0005307282913165266, "loss": 0.5166, "step": 16864 }, { "epoch": 9.421787709497206, "grad_norm": 0.7382144927978516, "learning_rate": 0.0005307002801120448, "loss": 0.4204, "step": 16865 }, { "epoch": 9.422346368715084, "grad_norm": 1.3231853246688843, "learning_rate": 0.000530672268907563, "loss": 0.3653, "step": 16866 }, { "epoch": 9.422905027932961, "grad_norm": 0.513291597366333, "learning_rate": 0.0005306442577030812, "loss": 0.5272, "step": 16867 }, { "epoch": 9.423463687150837, "grad_norm": 0.4303114116191864, "learning_rate": 0.0005306162464985994, "loss": 0.3788, "step": 16868 }, { "epoch": 9.424022346368716, "grad_norm": 0.5129906535148621, "learning_rate": 0.0005305882352941177, "loss": 0.4965, "step": 16869 }, { "epoch": 9.424581005586592, "grad_norm": 0.7556983828544617, "learning_rate": 0.0005305602240896359, "loss": 0.468, "step": 16870 }, { "epoch": 9.425139664804469, "grad_norm": 0.4534633755683899, "learning_rate": 0.0005305322128851541, "loss": 0.422, "step": 16871 }, { "epoch": 9.425698324022346, "grad_norm": 0.7855282425880432, "learning_rate": 0.0005305042016806723, "loss": 0.4841, "step": 16872 }, { "epoch": 9.426256983240224, "grad_norm": 0.6528869867324829, "learning_rate": 0.0005304761904761905, "loss": 0.4319, "step": 16873 }, { "epoch": 9.4268156424581, "grad_norm": 1.577876091003418, "learning_rate": 0.0005304481792717088, "loss": 0.5527, "step": 16874 }, { "epoch": 9.427374301675977, "grad_norm": 0.40677517652511597, "learning_rate": 0.0005304201680672269, "loss": 0.4119, "step": 16875 }, { "epoch": 9.427932960893855, "grad_norm": 0.4042321741580963, "learning_rate": 0.0005303921568627451, "loss": 0.3946, "step": 16876 }, { "epoch": 9.428491620111732, "grad_norm": 3.2340877056121826, "learning_rate": 0.0005303641456582633, "loss": 0.392, "step": 16877 }, { "epoch": 9.429050279329608, "grad_norm": 0.6412645578384399, "learning_rate": 0.0005303361344537815, "loss": 0.4745, "step": 16878 }, { "epoch": 9.429608938547487, "grad_norm": 0.38093122839927673, "learning_rate": 0.0005303081232492998, "loss": 0.3173, "step": 16879 }, { "epoch": 9.430167597765363, "grad_norm": 0.5791457295417786, "learning_rate": 0.0005302801120448179, "loss": 0.4417, "step": 16880 }, { "epoch": 9.43072625698324, "grad_norm": 0.6296137571334839, "learning_rate": 0.0005302521008403361, "loss": 0.3854, "step": 16881 }, { "epoch": 9.431284916201117, "grad_norm": 0.8443319201469421, "learning_rate": 0.0005302240896358543, "loss": 0.4034, "step": 16882 }, { "epoch": 9.431843575418995, "grad_norm": 1.5449734926223755, "learning_rate": 0.0005301960784313725, "loss": 0.4717, "step": 16883 }, { "epoch": 9.432402234636871, "grad_norm": 0.7071899175643921, "learning_rate": 0.0005301680672268909, "loss": 0.3662, "step": 16884 }, { "epoch": 9.432960893854748, "grad_norm": 0.5175532102584839, "learning_rate": 0.000530140056022409, "loss": 0.4511, "step": 16885 }, { "epoch": 9.433519553072626, "grad_norm": 0.435304194688797, "learning_rate": 0.0005301120448179272, "loss": 0.4855, "step": 16886 }, { "epoch": 9.434078212290503, "grad_norm": 1.1449611186981201, "learning_rate": 0.0005300840336134454, "loss": 0.4142, "step": 16887 }, { "epoch": 9.43463687150838, "grad_norm": 0.3325803875923157, "learning_rate": 0.0005300560224089636, "loss": 0.3357, "step": 16888 }, { "epoch": 9.435195530726258, "grad_norm": 0.7689087986946106, "learning_rate": 0.0005300280112044819, "loss": 0.5358, "step": 16889 }, { "epoch": 9.435754189944134, "grad_norm": 0.5600887537002563, "learning_rate": 0.0005300000000000001, "loss": 0.4531, "step": 16890 }, { "epoch": 9.436312849162011, "grad_norm": 2.0872435569763184, "learning_rate": 0.0005299719887955182, "loss": 0.4624, "step": 16891 }, { "epoch": 9.436871508379888, "grad_norm": 0.8863978385925293, "learning_rate": 0.0005299439775910364, "loss": 0.438, "step": 16892 }, { "epoch": 9.437430167597766, "grad_norm": 0.569903552532196, "learning_rate": 0.0005299159663865546, "loss": 0.5174, "step": 16893 }, { "epoch": 9.437988826815642, "grad_norm": 0.7479849457740784, "learning_rate": 0.0005298879551820729, "loss": 0.4118, "step": 16894 }, { "epoch": 9.438547486033519, "grad_norm": 0.5508785843849182, "learning_rate": 0.0005298599439775911, "loss": 0.3754, "step": 16895 }, { "epoch": 9.439106145251397, "grad_norm": 5.470061302185059, "learning_rate": 0.0005298319327731092, "loss": 0.3891, "step": 16896 }, { "epoch": 9.439664804469274, "grad_norm": 0.4785255789756775, "learning_rate": 0.0005298039215686274, "loss": 0.4148, "step": 16897 }, { "epoch": 9.44022346368715, "grad_norm": 0.7331400513648987, "learning_rate": 0.0005297759103641456, "loss": 0.4232, "step": 16898 }, { "epoch": 9.440782122905027, "grad_norm": 2.038973808288574, "learning_rate": 0.0005297478991596639, "loss": 0.5461, "step": 16899 }, { "epoch": 9.441340782122905, "grad_norm": 0.40712592005729675, "learning_rate": 0.0005297198879551821, "loss": 0.428, "step": 16900 }, { "epoch": 9.441899441340782, "grad_norm": 0.7654996514320374, "learning_rate": 0.0005296918767507002, "loss": 0.3192, "step": 16901 }, { "epoch": 9.442458100558659, "grad_norm": 0.5104309916496277, "learning_rate": 0.0005296638655462185, "loss": 0.4549, "step": 16902 }, { "epoch": 9.443016759776537, "grad_norm": 0.6030893921852112, "learning_rate": 0.0005296358543417367, "loss": 0.3589, "step": 16903 }, { "epoch": 9.443575418994413, "grad_norm": 0.5829979777336121, "learning_rate": 0.000529607843137255, "loss": 0.4501, "step": 16904 }, { "epoch": 9.44413407821229, "grad_norm": 0.7452841997146606, "learning_rate": 0.0005295798319327732, "loss": 0.5193, "step": 16905 }, { "epoch": 9.444692737430168, "grad_norm": 0.5121172070503235, "learning_rate": 0.0005295518207282914, "loss": 0.5095, "step": 16906 }, { "epoch": 9.445251396648045, "grad_norm": 0.8370568752288818, "learning_rate": 0.0005295238095238095, "loss": 0.4063, "step": 16907 }, { "epoch": 9.445810055865921, "grad_norm": 0.58674556016922, "learning_rate": 0.0005294957983193277, "loss": 0.4101, "step": 16908 }, { "epoch": 9.446368715083798, "grad_norm": 0.44542819261550903, "learning_rate": 0.000529467787114846, "loss": 0.3733, "step": 16909 }, { "epoch": 9.446927374301676, "grad_norm": 0.6354761719703674, "learning_rate": 0.0005294397759103642, "loss": 0.4817, "step": 16910 }, { "epoch": 9.447486033519553, "grad_norm": 0.797371506690979, "learning_rate": 0.0005294117647058824, "loss": 0.6478, "step": 16911 }, { "epoch": 9.44804469273743, "grad_norm": 1.0392273664474487, "learning_rate": 0.0005293837535014005, "loss": 0.5313, "step": 16912 }, { "epoch": 9.448603351955308, "grad_norm": 0.454172819852829, "learning_rate": 0.0005293557422969187, "loss": 0.4089, "step": 16913 }, { "epoch": 9.449162011173184, "grad_norm": 0.40602946281433105, "learning_rate": 0.000529327731092437, "loss": 0.3903, "step": 16914 }, { "epoch": 9.449720670391061, "grad_norm": 0.4775692820549011, "learning_rate": 0.0005292997198879552, "loss": 0.3848, "step": 16915 }, { "epoch": 9.45027932960894, "grad_norm": 1.2314726114273071, "learning_rate": 0.0005292717086834734, "loss": 0.3862, "step": 16916 }, { "epoch": 9.450837988826816, "grad_norm": 0.5477046966552734, "learning_rate": 0.0005292436974789915, "loss": 0.3504, "step": 16917 }, { "epoch": 9.451396648044692, "grad_norm": 0.9424893260002136, "learning_rate": 0.0005292156862745097, "loss": 0.4473, "step": 16918 }, { "epoch": 9.451955307262569, "grad_norm": 0.6837775111198425, "learning_rate": 0.0005291876750700281, "loss": 0.5215, "step": 16919 }, { "epoch": 9.452513966480447, "grad_norm": 0.43599507212638855, "learning_rate": 0.0005291596638655463, "loss": 0.405, "step": 16920 }, { "epoch": 9.453072625698324, "grad_norm": 0.609921932220459, "learning_rate": 0.0005291316526610645, "loss": 0.494, "step": 16921 }, { "epoch": 9.4536312849162, "grad_norm": 0.7851957082748413, "learning_rate": 0.0005291036414565827, "loss": 0.4908, "step": 16922 }, { "epoch": 9.454189944134079, "grad_norm": 1.0331196784973145, "learning_rate": 0.0005290756302521008, "loss": 0.4542, "step": 16923 }, { "epoch": 9.454748603351955, "grad_norm": 0.45207321643829346, "learning_rate": 0.0005290476190476191, "loss": 0.5175, "step": 16924 }, { "epoch": 9.455307262569832, "grad_norm": 0.6439725756645203, "learning_rate": 0.0005290196078431373, "loss": 0.3814, "step": 16925 }, { "epoch": 9.45586592178771, "grad_norm": 0.7960869073867798, "learning_rate": 0.0005289915966386555, "loss": 0.3217, "step": 16926 }, { "epoch": 9.456424581005587, "grad_norm": 0.49538466334342957, "learning_rate": 0.0005289635854341737, "loss": 0.604, "step": 16927 }, { "epoch": 9.456983240223463, "grad_norm": 0.40449148416519165, "learning_rate": 0.0005289355742296918, "loss": 0.3721, "step": 16928 }, { "epoch": 9.45754189944134, "grad_norm": 0.48132526874542236, "learning_rate": 0.0005289075630252101, "loss": 0.4298, "step": 16929 }, { "epoch": 9.458100558659218, "grad_norm": 0.49563801288604736, "learning_rate": 0.0005288795518207283, "loss": 0.3348, "step": 16930 }, { "epoch": 9.458659217877095, "grad_norm": 0.67368483543396, "learning_rate": 0.0005288515406162465, "loss": 0.4975, "step": 16931 }, { "epoch": 9.459217877094972, "grad_norm": 0.7780280113220215, "learning_rate": 0.0005288235294117647, "loss": 0.4339, "step": 16932 }, { "epoch": 9.45977653631285, "grad_norm": 1.3457205295562744, "learning_rate": 0.0005287955182072828, "loss": 0.4638, "step": 16933 }, { "epoch": 9.460335195530726, "grad_norm": 0.7658423185348511, "learning_rate": 0.0005287675070028012, "loss": 0.3708, "step": 16934 }, { "epoch": 9.460893854748603, "grad_norm": 0.7547198534011841, "learning_rate": 0.0005287394957983194, "loss": 0.2732, "step": 16935 }, { "epoch": 9.461452513966481, "grad_norm": 0.4372788667678833, "learning_rate": 0.0005287114845938376, "loss": 0.3738, "step": 16936 }, { "epoch": 9.462011173184358, "grad_norm": 0.4659584164619446, "learning_rate": 0.0005286834733893558, "loss": 0.3969, "step": 16937 }, { "epoch": 9.462569832402234, "grad_norm": 2.6904969215393066, "learning_rate": 0.000528655462184874, "loss": 0.4689, "step": 16938 }, { "epoch": 9.463128491620111, "grad_norm": 0.6364842057228088, "learning_rate": 0.0005286274509803922, "loss": 0.5047, "step": 16939 }, { "epoch": 9.46368715083799, "grad_norm": 6.599497318267822, "learning_rate": 0.0005285994397759104, "loss": 0.4468, "step": 16940 }, { "epoch": 9.464245810055866, "grad_norm": 0.45465609431266785, "learning_rate": 0.0005285714285714286, "loss": 0.4386, "step": 16941 }, { "epoch": 9.464804469273743, "grad_norm": 0.5392580032348633, "learning_rate": 0.0005285434173669468, "loss": 0.419, "step": 16942 }, { "epoch": 9.46536312849162, "grad_norm": 0.5984982252120972, "learning_rate": 0.000528515406162465, "loss": 0.4554, "step": 16943 }, { "epoch": 9.465921787709497, "grad_norm": 1.6093237400054932, "learning_rate": 0.0005284873949579832, "loss": 0.4048, "step": 16944 }, { "epoch": 9.466480446927374, "grad_norm": 0.7827469706535339, "learning_rate": 0.0005284593837535014, "loss": 0.5575, "step": 16945 }, { "epoch": 9.46703910614525, "grad_norm": 0.6151099801063538, "learning_rate": 0.0005284313725490196, "loss": 0.3772, "step": 16946 }, { "epoch": 9.467597765363129, "grad_norm": 0.4412667453289032, "learning_rate": 0.0005284033613445378, "loss": 0.3938, "step": 16947 }, { "epoch": 9.468156424581005, "grad_norm": 0.4706019163131714, "learning_rate": 0.000528375350140056, "loss": 0.3906, "step": 16948 }, { "epoch": 9.468715083798882, "grad_norm": 0.5834082365036011, "learning_rate": 0.0005283473389355742, "loss": 0.5105, "step": 16949 }, { "epoch": 9.46927374301676, "grad_norm": 0.5690367221832275, "learning_rate": 0.0005283193277310924, "loss": 0.4442, "step": 16950 }, { "epoch": 9.469832402234637, "grad_norm": 0.49667978286743164, "learning_rate": 0.0005282913165266107, "loss": 0.4298, "step": 16951 }, { "epoch": 9.470391061452514, "grad_norm": 0.42091116309165955, "learning_rate": 0.0005282633053221289, "loss": 0.3867, "step": 16952 }, { "epoch": 9.470949720670392, "grad_norm": 1.3130115270614624, "learning_rate": 0.0005282352941176471, "loss": 0.5803, "step": 16953 }, { "epoch": 9.471508379888268, "grad_norm": 0.6040555834770203, "learning_rate": 0.0005282072829131654, "loss": 0.3901, "step": 16954 }, { "epoch": 9.472067039106145, "grad_norm": 0.4838673770427704, "learning_rate": 0.0005281792717086835, "loss": 0.4193, "step": 16955 }, { "epoch": 9.472625698324022, "grad_norm": 0.5954147577285767, "learning_rate": 0.0005281512605042017, "loss": 0.349, "step": 16956 }, { "epoch": 9.4731843575419, "grad_norm": 0.597293496131897, "learning_rate": 0.0005281232492997199, "loss": 0.4784, "step": 16957 }, { "epoch": 9.473743016759776, "grad_norm": 0.39264634251594543, "learning_rate": 0.0005280952380952381, "loss": 0.3667, "step": 16958 }, { "epoch": 9.474301675977653, "grad_norm": 0.47603321075439453, "learning_rate": 0.0005280672268907563, "loss": 0.4177, "step": 16959 }, { "epoch": 9.474860335195531, "grad_norm": 4.853092193603516, "learning_rate": 0.0005280392156862745, "loss": 0.508, "step": 16960 }, { "epoch": 9.475418994413408, "grad_norm": 0.4145950973033905, "learning_rate": 0.0005280112044817927, "loss": 0.424, "step": 16961 }, { "epoch": 9.475977653631285, "grad_norm": 7.030914306640625, "learning_rate": 0.0005279831932773109, "loss": 0.4347, "step": 16962 }, { "epoch": 9.476536312849163, "grad_norm": 0.7101901769638062, "learning_rate": 0.0005279551820728291, "loss": 0.4203, "step": 16963 }, { "epoch": 9.47709497206704, "grad_norm": 0.40349534153938293, "learning_rate": 0.0005279271708683473, "loss": 0.481, "step": 16964 }, { "epoch": 9.477653631284916, "grad_norm": 0.4451386332511902, "learning_rate": 0.0005278991596638655, "loss": 0.4566, "step": 16965 }, { "epoch": 9.478212290502793, "grad_norm": 0.986737847328186, "learning_rate": 0.0005278711484593837, "loss": 0.4478, "step": 16966 }, { "epoch": 9.478770949720671, "grad_norm": 0.5118899941444397, "learning_rate": 0.000527843137254902, "loss": 0.5561, "step": 16967 }, { "epoch": 9.479329608938547, "grad_norm": 0.5588580369949341, "learning_rate": 0.0005278151260504202, "loss": 0.4867, "step": 16968 }, { "epoch": 9.479888268156424, "grad_norm": 0.4059693515300751, "learning_rate": 0.0005277871148459384, "loss": 0.3931, "step": 16969 }, { "epoch": 9.480446927374302, "grad_norm": 0.464345782995224, "learning_rate": 0.0005277591036414567, "loss": 0.3336, "step": 16970 }, { "epoch": 9.481005586592179, "grad_norm": 0.4442157447338104, "learning_rate": 0.0005277310924369748, "loss": 0.478, "step": 16971 }, { "epoch": 9.481564245810056, "grad_norm": 0.5415831804275513, "learning_rate": 0.000527703081232493, "loss": 0.3304, "step": 16972 }, { "epoch": 9.482122905027932, "grad_norm": 0.48225876688957214, "learning_rate": 0.0005276750700280112, "loss": 0.3886, "step": 16973 }, { "epoch": 9.48268156424581, "grad_norm": 0.5581726431846619, "learning_rate": 0.0005276470588235294, "loss": 0.4473, "step": 16974 }, { "epoch": 9.483240223463687, "grad_norm": 0.6811268329620361, "learning_rate": 0.0005276190476190477, "loss": 0.4646, "step": 16975 }, { "epoch": 9.483798882681564, "grad_norm": 0.6459947824478149, "learning_rate": 0.0005275910364145658, "loss": 0.4539, "step": 16976 }, { "epoch": 9.484357541899442, "grad_norm": 0.5975168347358704, "learning_rate": 0.000527563025210084, "loss": 0.4654, "step": 16977 }, { "epoch": 9.484916201117318, "grad_norm": 0.38554954528808594, "learning_rate": 0.0005275350140056022, "loss": 0.3777, "step": 16978 }, { "epoch": 9.485474860335195, "grad_norm": 0.6111862063407898, "learning_rate": 0.0005275070028011204, "loss": 0.4024, "step": 16979 }, { "epoch": 9.486033519553073, "grad_norm": 0.8880608081817627, "learning_rate": 0.0005274789915966387, "loss": 0.5205, "step": 16980 }, { "epoch": 9.48659217877095, "grad_norm": 0.6676656603813171, "learning_rate": 0.0005274509803921568, "loss": 0.439, "step": 16981 }, { "epoch": 9.487150837988827, "grad_norm": 0.5042214393615723, "learning_rate": 0.000527422969187675, "loss": 0.5277, "step": 16982 }, { "epoch": 9.487709497206703, "grad_norm": 0.5696197152137756, "learning_rate": 0.0005273949579831932, "loss": 0.4675, "step": 16983 }, { "epoch": 9.488268156424581, "grad_norm": 0.3811723291873932, "learning_rate": 0.0005273669467787115, "loss": 0.381, "step": 16984 }, { "epoch": 9.488826815642458, "grad_norm": 2.059382438659668, "learning_rate": 0.0005273389355742298, "loss": 0.3652, "step": 16985 }, { "epoch": 9.489385474860335, "grad_norm": 0.6013665795326233, "learning_rate": 0.000527310924369748, "loss": 0.2809, "step": 16986 }, { "epoch": 9.489944134078213, "grad_norm": 1.5045030117034912, "learning_rate": 0.0005272829131652661, "loss": 0.5084, "step": 16987 }, { "epoch": 9.49050279329609, "grad_norm": 0.9651634097099304, "learning_rate": 0.0005272549019607843, "loss": 0.5106, "step": 16988 }, { "epoch": 9.491061452513966, "grad_norm": 0.580617368221283, "learning_rate": 0.0005272268907563025, "loss": 0.4587, "step": 16989 }, { "epoch": 9.491620111731844, "grad_norm": 0.5041201114654541, "learning_rate": 0.0005271988795518208, "loss": 0.3886, "step": 16990 }, { "epoch": 9.492178770949721, "grad_norm": 0.5230264663696289, "learning_rate": 0.000527170868347339, "loss": 0.4946, "step": 16991 }, { "epoch": 9.492737430167598, "grad_norm": 0.358958899974823, "learning_rate": 0.0005271428571428571, "loss": 0.4651, "step": 16992 }, { "epoch": 9.493296089385474, "grad_norm": 0.5267083048820496, "learning_rate": 0.0005271148459383753, "loss": 0.6409, "step": 16993 }, { "epoch": 9.493854748603352, "grad_norm": 0.46035170555114746, "learning_rate": 0.0005270868347338935, "loss": 0.4324, "step": 16994 }, { "epoch": 9.494413407821229, "grad_norm": 0.466959148645401, "learning_rate": 0.0005270588235294118, "loss": 0.4284, "step": 16995 }, { "epoch": 9.494972067039106, "grad_norm": 0.6545068025588989, "learning_rate": 0.00052703081232493, "loss": 0.5019, "step": 16996 }, { "epoch": 9.495530726256984, "grad_norm": 0.6382648348808289, "learning_rate": 0.0005270028011204481, "loss": 0.5205, "step": 16997 }, { "epoch": 9.49608938547486, "grad_norm": 2.8890585899353027, "learning_rate": 0.0005269747899159663, "loss": 0.4159, "step": 16998 }, { "epoch": 9.496648044692737, "grad_norm": 2.189188241958618, "learning_rate": 0.0005269467787114845, "loss": 0.4453, "step": 16999 }, { "epoch": 9.497206703910614, "grad_norm": 0.39940744638442993, "learning_rate": 0.0005269187675070029, "loss": 0.3932, "step": 17000 }, { "epoch": 9.497206703910614, "eval_cer": 0.0885622947421252, "eval_loss": 0.3346686065196991, "eval_runtime": 55.465, "eval_samples_per_second": 81.817, "eval_steps_per_second": 5.12, "eval_wer": 0.34962710538811764, "step": 17000 }, { "epoch": 9.497765363128492, "grad_norm": 0.8588578701019287, "learning_rate": 0.0005268907563025211, "loss": 0.4771, "step": 17001 }, { "epoch": 9.498324022346369, "grad_norm": 2.3360366821289062, "learning_rate": 0.0005268627450980393, "loss": 0.4208, "step": 17002 }, { "epoch": 9.498882681564245, "grad_norm": 0.4767686426639557, "learning_rate": 0.0005268347338935574, "loss": 0.4254, "step": 17003 }, { "epoch": 9.499441340782123, "grad_norm": 0.5392374396324158, "learning_rate": 0.0005268067226890756, "loss": 0.3953, "step": 17004 }, { "epoch": 9.5, "grad_norm": 1.5540601015090942, "learning_rate": 0.0005267787114845939, "loss": 0.4818, "step": 17005 }, { "epoch": 9.500558659217877, "grad_norm": 0.3668883442878723, "learning_rate": 0.0005267507002801121, "loss": 0.415, "step": 17006 }, { "epoch": 9.501117318435755, "grad_norm": 0.4730549454689026, "learning_rate": 0.0005267226890756303, "loss": 0.4503, "step": 17007 }, { "epoch": 9.501675977653631, "grad_norm": 0.4590531587600708, "learning_rate": 0.0005266946778711484, "loss": 0.4928, "step": 17008 }, { "epoch": 9.502234636871508, "grad_norm": 0.5151906609535217, "learning_rate": 0.0005266666666666666, "loss": 0.4737, "step": 17009 }, { "epoch": 9.502793296089386, "grad_norm": 0.5052680969238281, "learning_rate": 0.0005266386554621849, "loss": 0.4757, "step": 17010 }, { "epoch": 9.503351955307263, "grad_norm": 1.080153226852417, "learning_rate": 0.0005266106442577031, "loss": 0.4144, "step": 17011 }, { "epoch": 9.50391061452514, "grad_norm": 0.3803623616695404, "learning_rate": 0.0005265826330532213, "loss": 0.355, "step": 17012 }, { "epoch": 9.504469273743016, "grad_norm": 1.0341953039169312, "learning_rate": 0.0005265546218487394, "loss": 0.335, "step": 17013 }, { "epoch": 9.505027932960894, "grad_norm": 0.5581839680671692, "learning_rate": 0.0005265266106442576, "loss": 0.3988, "step": 17014 }, { "epoch": 9.505586592178771, "grad_norm": 0.5371859073638916, "learning_rate": 0.000526498599439776, "loss": 0.5534, "step": 17015 }, { "epoch": 9.506145251396648, "grad_norm": 0.7883785963058472, "learning_rate": 0.0005264705882352942, "loss": 0.3991, "step": 17016 }, { "epoch": 9.506703910614526, "grad_norm": 1.2164338827133179, "learning_rate": 0.0005264425770308124, "loss": 0.3963, "step": 17017 }, { "epoch": 9.507262569832402, "grad_norm": 0.7702980637550354, "learning_rate": 0.0005264145658263306, "loss": 0.4526, "step": 17018 }, { "epoch": 9.507821229050279, "grad_norm": 1.0856456756591797, "learning_rate": 0.0005263865546218487, "loss": 0.4469, "step": 17019 }, { "epoch": 9.508379888268156, "grad_norm": 0.5283039212226868, "learning_rate": 0.000526358543417367, "loss": 0.4258, "step": 17020 }, { "epoch": 9.508938547486034, "grad_norm": 0.706710696220398, "learning_rate": 0.0005263305322128852, "loss": 0.4735, "step": 17021 }, { "epoch": 9.50949720670391, "grad_norm": 0.49506470561027527, "learning_rate": 0.0005263025210084034, "loss": 0.4785, "step": 17022 }, { "epoch": 9.510055865921787, "grad_norm": 0.48370540142059326, "learning_rate": 0.0005262745098039216, "loss": 0.4778, "step": 17023 }, { "epoch": 9.510614525139665, "grad_norm": 0.5205458402633667, "learning_rate": 0.0005262464985994397, "loss": 0.38, "step": 17024 }, { "epoch": 9.511173184357542, "grad_norm": 5.673548698425293, "learning_rate": 0.000526218487394958, "loss": 0.4341, "step": 17025 }, { "epoch": 9.511731843575419, "grad_norm": 0.855028510093689, "learning_rate": 0.0005261904761904762, "loss": 0.4253, "step": 17026 }, { "epoch": 9.512290502793297, "grad_norm": 0.4802238345146179, "learning_rate": 0.0005261624649859944, "loss": 0.606, "step": 17027 }, { "epoch": 9.512849162011173, "grad_norm": 0.623631477355957, "learning_rate": 0.0005261344537815126, "loss": 0.4405, "step": 17028 }, { "epoch": 9.51340782122905, "grad_norm": 0.8969951272010803, "learning_rate": 0.0005261064425770307, "loss": 0.4085, "step": 17029 }, { "epoch": 9.513966480446927, "grad_norm": 1.3782315254211426, "learning_rate": 0.000526078431372549, "loss": 0.5027, "step": 17030 }, { "epoch": 9.514525139664805, "grad_norm": 0.5698773860931396, "learning_rate": 0.0005260504201680672, "loss": 0.4724, "step": 17031 }, { "epoch": 9.515083798882682, "grad_norm": 0.5380564332008362, "learning_rate": 0.0005260224089635854, "loss": 0.4539, "step": 17032 }, { "epoch": 9.515642458100558, "grad_norm": 0.5562434792518616, "learning_rate": 0.0005259943977591037, "loss": 0.4309, "step": 17033 }, { "epoch": 9.516201117318436, "grad_norm": 0.5673577189445496, "learning_rate": 0.0005259663865546219, "loss": 0.4182, "step": 17034 }, { "epoch": 9.516759776536313, "grad_norm": 0.38618579506874084, "learning_rate": 0.0005259383753501401, "loss": 0.3485, "step": 17035 }, { "epoch": 9.51731843575419, "grad_norm": 0.4398757219314575, "learning_rate": 0.0005259103641456583, "loss": 0.5243, "step": 17036 }, { "epoch": 9.517877094972068, "grad_norm": 1.4248276948928833, "learning_rate": 0.0005258823529411765, "loss": 0.4168, "step": 17037 }, { "epoch": 9.518435754189944, "grad_norm": 0.502399742603302, "learning_rate": 0.0005258543417366947, "loss": 0.3848, "step": 17038 }, { "epoch": 9.518994413407821, "grad_norm": 0.4897719621658325, "learning_rate": 0.0005258263305322129, "loss": 0.5654, "step": 17039 }, { "epoch": 9.519553072625698, "grad_norm": 1.2701818943023682, "learning_rate": 0.0005257983193277311, "loss": 0.4242, "step": 17040 }, { "epoch": 9.520111731843576, "grad_norm": 0.4697544574737549, "learning_rate": 0.0005257703081232493, "loss": 0.4251, "step": 17041 }, { "epoch": 9.520670391061453, "grad_norm": 0.7305477261543274, "learning_rate": 0.0005257422969187675, "loss": 0.5089, "step": 17042 }, { "epoch": 9.521229050279329, "grad_norm": 0.7140673995018005, "learning_rate": 0.0005257142857142857, "loss": 0.4771, "step": 17043 }, { "epoch": 9.521787709497207, "grad_norm": 0.4751395881175995, "learning_rate": 0.0005256862745098039, "loss": 0.4024, "step": 17044 }, { "epoch": 9.522346368715084, "grad_norm": 0.8670386672019958, "learning_rate": 0.0005256582633053222, "loss": 0.5812, "step": 17045 }, { "epoch": 9.52290502793296, "grad_norm": 0.5611286759376526, "learning_rate": 0.0005256302521008403, "loss": 0.4133, "step": 17046 }, { "epoch": 9.523463687150837, "grad_norm": 0.6129143238067627, "learning_rate": 0.0005256022408963585, "loss": 0.3555, "step": 17047 }, { "epoch": 9.524022346368715, "grad_norm": 0.5914970636367798, "learning_rate": 0.0005255742296918767, "loss": 0.3675, "step": 17048 }, { "epoch": 9.524581005586592, "grad_norm": 0.3674723207950592, "learning_rate": 0.000525546218487395, "loss": 0.3392, "step": 17049 }, { "epoch": 9.525139664804469, "grad_norm": 0.45090529322624207, "learning_rate": 0.0005255182072829133, "loss": 0.3598, "step": 17050 }, { "epoch": 9.525698324022347, "grad_norm": 1.039182424545288, "learning_rate": 0.0005254901960784314, "loss": 0.5497, "step": 17051 }, { "epoch": 9.526256983240224, "grad_norm": 0.5035821199417114, "learning_rate": 0.0005254621848739496, "loss": 0.3655, "step": 17052 }, { "epoch": 9.5268156424581, "grad_norm": 0.7699378728866577, "learning_rate": 0.0005254341736694678, "loss": 0.4663, "step": 17053 }, { "epoch": 9.527374301675978, "grad_norm": 1.0760384798049927, "learning_rate": 0.000525406162464986, "loss": 0.4905, "step": 17054 }, { "epoch": 9.527932960893855, "grad_norm": 0.41430097818374634, "learning_rate": 0.0005253781512605043, "loss": 0.4142, "step": 17055 }, { "epoch": 9.528491620111732, "grad_norm": 0.4487805664539337, "learning_rate": 0.0005253501400560224, "loss": 0.3556, "step": 17056 }, { "epoch": 9.529050279329608, "grad_norm": 1.30894136428833, "learning_rate": 0.0005253221288515406, "loss": 0.4198, "step": 17057 }, { "epoch": 9.529608938547486, "grad_norm": 0.39597150683403015, "learning_rate": 0.0005252941176470588, "loss": 0.382, "step": 17058 }, { "epoch": 9.530167597765363, "grad_norm": 0.49407562613487244, "learning_rate": 0.000525266106442577, "loss": 0.472, "step": 17059 }, { "epoch": 9.53072625698324, "grad_norm": 1.321121335029602, "learning_rate": 0.0005252380952380953, "loss": 0.5272, "step": 17060 }, { "epoch": 9.531284916201118, "grad_norm": 0.5465450286865234, "learning_rate": 0.0005252100840336135, "loss": 0.4492, "step": 17061 }, { "epoch": 9.531843575418995, "grad_norm": 0.4928973913192749, "learning_rate": 0.0005251820728291316, "loss": 0.3634, "step": 17062 }, { "epoch": 9.532402234636871, "grad_norm": 0.5599426627159119, "learning_rate": 0.0005251540616246498, "loss": 0.5228, "step": 17063 }, { "epoch": 9.53296089385475, "grad_norm": 0.40625089406967163, "learning_rate": 0.000525126050420168, "loss": 0.3379, "step": 17064 }, { "epoch": 9.533519553072626, "grad_norm": 0.6190981268882751, "learning_rate": 0.0005250980392156864, "loss": 0.3809, "step": 17065 }, { "epoch": 9.534078212290503, "grad_norm": 0.8735527396202087, "learning_rate": 0.0005250700280112046, "loss": 0.3654, "step": 17066 }, { "epoch": 9.53463687150838, "grad_norm": 0.6221041679382324, "learning_rate": 0.0005250420168067227, "loss": 0.5481, "step": 17067 }, { "epoch": 9.535195530726257, "grad_norm": 0.8592880964279175, "learning_rate": 0.0005250140056022409, "loss": 0.4099, "step": 17068 }, { "epoch": 9.535754189944134, "grad_norm": 0.4455025792121887, "learning_rate": 0.0005249859943977591, "loss": 0.3824, "step": 17069 }, { "epoch": 9.53631284916201, "grad_norm": 0.5507128238677979, "learning_rate": 0.0005249579831932774, "loss": 0.4256, "step": 17070 }, { "epoch": 9.536871508379889, "grad_norm": 0.3528655171394348, "learning_rate": 0.0005249299719887956, "loss": 0.38, "step": 17071 }, { "epoch": 9.537430167597766, "grad_norm": 0.5165811777114868, "learning_rate": 0.0005249019607843137, "loss": 0.4086, "step": 17072 }, { "epoch": 9.537988826815642, "grad_norm": 0.43222153186798096, "learning_rate": 0.0005248739495798319, "loss": 0.4373, "step": 17073 }, { "epoch": 9.538547486033519, "grad_norm": 0.5648389458656311, "learning_rate": 0.0005248459383753501, "loss": 0.5987, "step": 17074 }, { "epoch": 9.539106145251397, "grad_norm": 1.7469907999038696, "learning_rate": 0.0005248179271708684, "loss": 0.4046, "step": 17075 }, { "epoch": 9.539664804469274, "grad_norm": 0.5755770206451416, "learning_rate": 0.0005247899159663866, "loss": 0.3985, "step": 17076 }, { "epoch": 9.54022346368715, "grad_norm": 1.8374018669128418, "learning_rate": 0.0005247619047619048, "loss": 0.6532, "step": 17077 }, { "epoch": 9.540782122905028, "grad_norm": 0.4430503249168396, "learning_rate": 0.0005247338935574229, "loss": 0.4077, "step": 17078 }, { "epoch": 9.541340782122905, "grad_norm": 0.5844117403030396, "learning_rate": 0.0005247058823529411, "loss": 0.4748, "step": 17079 }, { "epoch": 9.541899441340782, "grad_norm": 0.4782406687736511, "learning_rate": 0.0005246778711484594, "loss": 0.4011, "step": 17080 }, { "epoch": 9.54245810055866, "grad_norm": 0.9458624720573425, "learning_rate": 0.0005246498599439777, "loss": 0.578, "step": 17081 }, { "epoch": 9.543016759776537, "grad_norm": 0.7406259775161743, "learning_rate": 0.0005246218487394959, "loss": 0.4464, "step": 17082 }, { "epoch": 9.543575418994413, "grad_norm": 0.7134833335876465, "learning_rate": 0.000524593837535014, "loss": 0.5599, "step": 17083 }, { "epoch": 9.544134078212291, "grad_norm": 0.4752214848995209, "learning_rate": 0.0005245658263305322, "loss": 0.4252, "step": 17084 }, { "epoch": 9.544692737430168, "grad_norm": 0.48502466082572937, "learning_rate": 0.0005245378151260505, "loss": 0.4379, "step": 17085 }, { "epoch": 9.545251396648045, "grad_norm": 0.7046025395393372, "learning_rate": 0.0005245098039215687, "loss": 0.4014, "step": 17086 }, { "epoch": 9.545810055865921, "grad_norm": 1.863295555114746, "learning_rate": 0.0005244817927170869, "loss": 0.5312, "step": 17087 }, { "epoch": 9.5463687150838, "grad_norm": 0.7555891871452332, "learning_rate": 0.000524453781512605, "loss": 0.5204, "step": 17088 }, { "epoch": 9.546927374301676, "grad_norm": 0.774724006652832, "learning_rate": 0.0005244257703081232, "loss": 0.4239, "step": 17089 }, { "epoch": 9.547486033519553, "grad_norm": 1.4700146913528442, "learning_rate": 0.0005243977591036415, "loss": 0.3992, "step": 17090 }, { "epoch": 9.548044692737431, "grad_norm": 1.0987331867218018, "learning_rate": 0.0005243697478991597, "loss": 0.5973, "step": 17091 }, { "epoch": 9.548603351955308, "grad_norm": 0.526216983795166, "learning_rate": 0.0005243417366946779, "loss": 0.4423, "step": 17092 }, { "epoch": 9.549162011173184, "grad_norm": 4.453642845153809, "learning_rate": 0.0005243137254901961, "loss": 0.5893, "step": 17093 }, { "epoch": 9.54972067039106, "grad_norm": 0.529809832572937, "learning_rate": 0.0005242857142857142, "loss": 0.525, "step": 17094 }, { "epoch": 9.550279329608939, "grad_norm": 0.45865899324417114, "learning_rate": 0.0005242577030812325, "loss": 0.2893, "step": 17095 }, { "epoch": 9.550837988826816, "grad_norm": 0.36625370383262634, "learning_rate": 0.0005242296918767507, "loss": 0.3691, "step": 17096 }, { "epoch": 9.551396648044692, "grad_norm": 2.300689220428467, "learning_rate": 0.000524201680672269, "loss": 0.5251, "step": 17097 }, { "epoch": 9.55195530726257, "grad_norm": 0.7351963520050049, "learning_rate": 0.0005241736694677872, "loss": 0.4333, "step": 17098 }, { "epoch": 9.552513966480447, "grad_norm": 0.47708648443222046, "learning_rate": 0.0005241456582633053, "loss": 0.5072, "step": 17099 }, { "epoch": 9.553072625698324, "grad_norm": 0.6173315048217773, "learning_rate": 0.0005241176470588236, "loss": 0.5266, "step": 17100 }, { "epoch": 9.553631284916202, "grad_norm": 0.5904353260993958, "learning_rate": 0.0005240896358543418, "loss": 0.4647, "step": 17101 }, { "epoch": 9.554189944134079, "grad_norm": 0.5267318487167358, "learning_rate": 0.00052406162464986, "loss": 0.5438, "step": 17102 }, { "epoch": 9.554748603351955, "grad_norm": 0.7505891919136047, "learning_rate": 0.0005240336134453782, "loss": 0.4739, "step": 17103 }, { "epoch": 9.555307262569832, "grad_norm": 2.939276695251465, "learning_rate": 0.0005240056022408963, "loss": 0.3609, "step": 17104 }, { "epoch": 9.55586592178771, "grad_norm": 0.6280815601348877, "learning_rate": 0.0005239775910364146, "loss": 0.4767, "step": 17105 }, { "epoch": 9.556424581005587, "grad_norm": 0.7200734615325928, "learning_rate": 0.0005239495798319328, "loss": 0.3892, "step": 17106 }, { "epoch": 9.556983240223463, "grad_norm": 0.49386996030807495, "learning_rate": 0.000523921568627451, "loss": 0.3605, "step": 17107 }, { "epoch": 9.557541899441341, "grad_norm": 0.5270695686340332, "learning_rate": 0.0005238935574229692, "loss": 0.4552, "step": 17108 }, { "epoch": 9.558100558659218, "grad_norm": 0.3849281370639801, "learning_rate": 0.0005238655462184874, "loss": 0.3747, "step": 17109 }, { "epoch": 9.558659217877095, "grad_norm": 0.9354639649391174, "learning_rate": 0.0005238375350140056, "loss": 0.5575, "step": 17110 }, { "epoch": 9.559217877094973, "grad_norm": 1.1783463954925537, "learning_rate": 0.0005238095238095238, "loss": 0.4527, "step": 17111 }, { "epoch": 9.55977653631285, "grad_norm": 0.47555312514305115, "learning_rate": 0.000523781512605042, "loss": 0.3406, "step": 17112 }, { "epoch": 9.560335195530726, "grad_norm": 1.0341235399246216, "learning_rate": 0.0005237535014005602, "loss": 0.5136, "step": 17113 }, { "epoch": 9.560893854748603, "grad_norm": 0.45691102743148804, "learning_rate": 0.0005237254901960784, "loss": 0.3725, "step": 17114 }, { "epoch": 9.561452513966481, "grad_norm": 0.5268739461898804, "learning_rate": 0.0005236974789915967, "loss": 0.416, "step": 17115 }, { "epoch": 9.562011173184358, "grad_norm": 0.7065299153327942, "learning_rate": 0.0005236694677871149, "loss": 0.5814, "step": 17116 }, { "epoch": 9.562569832402234, "grad_norm": 0.5363345742225647, "learning_rate": 0.0005236414565826331, "loss": 0.46, "step": 17117 }, { "epoch": 9.563128491620112, "grad_norm": 0.5881487131118774, "learning_rate": 0.0005236134453781513, "loss": 0.4344, "step": 17118 }, { "epoch": 9.563687150837989, "grad_norm": 0.5991069674491882, "learning_rate": 0.0005235854341736695, "loss": 0.4994, "step": 17119 }, { "epoch": 9.564245810055866, "grad_norm": 0.540647566318512, "learning_rate": 0.0005235574229691877, "loss": 0.4531, "step": 17120 }, { "epoch": 9.564804469273742, "grad_norm": 0.6199446320533752, "learning_rate": 0.0005235294117647059, "loss": 0.4049, "step": 17121 }, { "epoch": 9.56536312849162, "grad_norm": 0.9489259123802185, "learning_rate": 0.0005235014005602241, "loss": 0.4576, "step": 17122 }, { "epoch": 9.565921787709497, "grad_norm": 1.1844803094863892, "learning_rate": 0.0005234733893557423, "loss": 0.4348, "step": 17123 }, { "epoch": 9.566480446927374, "grad_norm": 0.6842337846755981, "learning_rate": 0.0005234453781512605, "loss": 0.5164, "step": 17124 }, { "epoch": 9.567039106145252, "grad_norm": 0.9702425599098206, "learning_rate": 0.0005234173669467788, "loss": 0.5053, "step": 17125 }, { "epoch": 9.567597765363129, "grad_norm": 0.4444902539253235, "learning_rate": 0.0005233893557422969, "loss": 0.3998, "step": 17126 }, { "epoch": 9.568156424581005, "grad_norm": 0.39580947160720825, "learning_rate": 0.0005233613445378151, "loss": 0.4246, "step": 17127 }, { "epoch": 9.568715083798883, "grad_norm": 0.38838502764701843, "learning_rate": 0.0005233333333333333, "loss": 0.3448, "step": 17128 }, { "epoch": 9.56927374301676, "grad_norm": 0.529678463935852, "learning_rate": 0.0005233053221288515, "loss": 0.5969, "step": 17129 }, { "epoch": 9.569832402234637, "grad_norm": 0.5953384041786194, "learning_rate": 0.0005232773109243699, "loss": 0.3602, "step": 17130 }, { "epoch": 9.570391061452513, "grad_norm": 0.4435705542564392, "learning_rate": 0.000523249299719888, "loss": 0.3981, "step": 17131 }, { "epoch": 9.570949720670392, "grad_norm": 0.5549498200416565, "learning_rate": 0.0005232212885154062, "loss": 0.413, "step": 17132 }, { "epoch": 9.571508379888268, "grad_norm": 0.462931364774704, "learning_rate": 0.0005231932773109244, "loss": 0.4177, "step": 17133 }, { "epoch": 9.572067039106145, "grad_norm": 0.3711908161640167, "learning_rate": 0.0005231652661064426, "loss": 0.4648, "step": 17134 }, { "epoch": 9.572625698324023, "grad_norm": 0.4939176142215729, "learning_rate": 0.0005231372549019609, "loss": 0.4048, "step": 17135 }, { "epoch": 9.5731843575419, "grad_norm": 1.4673243761062622, "learning_rate": 0.000523109243697479, "loss": 0.5333, "step": 17136 }, { "epoch": 9.573743016759776, "grad_norm": 0.443583220243454, "learning_rate": 0.0005230812324929972, "loss": 0.305, "step": 17137 }, { "epoch": 9.574301675977654, "grad_norm": 0.5470555424690247, "learning_rate": 0.0005230532212885154, "loss": 0.4449, "step": 17138 }, { "epoch": 9.574860335195531, "grad_norm": 0.40456125140190125, "learning_rate": 0.0005230252100840336, "loss": 0.4328, "step": 17139 }, { "epoch": 9.575418994413408, "grad_norm": 0.7064840197563171, "learning_rate": 0.0005229971988795519, "loss": 0.5424, "step": 17140 }, { "epoch": 9.575977653631284, "grad_norm": 0.4466869831085205, "learning_rate": 0.0005229691876750701, "loss": 0.4153, "step": 17141 }, { "epoch": 9.576536312849163, "grad_norm": 0.5580495595932007, "learning_rate": 0.0005229411764705882, "loss": 0.5362, "step": 17142 }, { "epoch": 9.577094972067039, "grad_norm": 0.3946962356567383, "learning_rate": 0.0005229131652661064, "loss": 0.3823, "step": 17143 }, { "epoch": 9.577653631284916, "grad_norm": 0.44697222113609314, "learning_rate": 0.0005228851540616246, "loss": 0.4547, "step": 17144 }, { "epoch": 9.578212290502794, "grad_norm": 0.6752206087112427, "learning_rate": 0.0005228571428571429, "loss": 0.435, "step": 17145 }, { "epoch": 9.57877094972067, "grad_norm": 0.4149279296398163, "learning_rate": 0.0005228291316526611, "loss": 0.3296, "step": 17146 }, { "epoch": 9.579329608938547, "grad_norm": 0.48432645201683044, "learning_rate": 0.0005228011204481792, "loss": 0.4652, "step": 17147 }, { "epoch": 9.579888268156424, "grad_norm": 0.49763989448547363, "learning_rate": 0.0005227731092436975, "loss": 0.4825, "step": 17148 }, { "epoch": 9.580446927374302, "grad_norm": 0.5180466771125793, "learning_rate": 0.0005227450980392157, "loss": 0.4342, "step": 17149 }, { "epoch": 9.581005586592179, "grad_norm": 1.0930254459381104, "learning_rate": 0.000522717086834734, "loss": 0.4871, "step": 17150 }, { "epoch": 9.581564245810055, "grad_norm": 0.6297570466995239, "learning_rate": 0.0005226890756302522, "loss": 0.3721, "step": 17151 }, { "epoch": 9.582122905027934, "grad_norm": 0.49666717648506165, "learning_rate": 0.0005226610644257703, "loss": 0.4522, "step": 17152 }, { "epoch": 9.58268156424581, "grad_norm": 0.4924290180206299, "learning_rate": 0.0005226330532212885, "loss": 0.4572, "step": 17153 }, { "epoch": 9.583240223463687, "grad_norm": 5.374730587005615, "learning_rate": 0.0005226050420168067, "loss": 0.4623, "step": 17154 }, { "epoch": 9.583798882681565, "grad_norm": 0.49165159463882446, "learning_rate": 0.000522577030812325, "loss": 0.4094, "step": 17155 }, { "epoch": 9.584357541899442, "grad_norm": 0.6179744601249695, "learning_rate": 0.0005225490196078432, "loss": 0.4683, "step": 17156 }, { "epoch": 9.584916201117318, "grad_norm": 0.5643314123153687, "learning_rate": 0.0005225210084033614, "loss": 0.3381, "step": 17157 }, { "epoch": 9.585474860335196, "grad_norm": 0.4656287431716919, "learning_rate": 0.0005224929971988795, "loss": 0.4767, "step": 17158 }, { "epoch": 9.586033519553073, "grad_norm": 0.4668651223182678, "learning_rate": 0.0005224649859943977, "loss": 0.3958, "step": 17159 }, { "epoch": 9.58659217877095, "grad_norm": 0.6359231472015381, "learning_rate": 0.000522436974789916, "loss": 0.4359, "step": 17160 }, { "epoch": 9.587150837988826, "grad_norm": 0.9883036017417908, "learning_rate": 0.0005224089635854342, "loss": 0.5551, "step": 17161 }, { "epoch": 9.587709497206705, "grad_norm": 0.4519002139568329, "learning_rate": 0.0005223809523809524, "loss": 0.3913, "step": 17162 }, { "epoch": 9.588268156424581, "grad_norm": 1.19994056224823, "learning_rate": 0.0005223529411764705, "loss": 0.3671, "step": 17163 }, { "epoch": 9.588826815642458, "grad_norm": 0.43731117248535156, "learning_rate": 0.0005223249299719887, "loss": 0.4964, "step": 17164 }, { "epoch": 9.589385474860336, "grad_norm": 1.8858529329299927, "learning_rate": 0.0005222969187675071, "loss": 0.3982, "step": 17165 }, { "epoch": 9.589944134078213, "grad_norm": 0.5303972363471985, "learning_rate": 0.0005222689075630253, "loss": 0.4732, "step": 17166 }, { "epoch": 9.59050279329609, "grad_norm": 0.4779343903064728, "learning_rate": 0.0005222408963585435, "loss": 0.447, "step": 17167 }, { "epoch": 9.591061452513966, "grad_norm": 42.68880844116211, "learning_rate": 0.0005222128851540616, "loss": 0.4795, "step": 17168 }, { "epoch": 9.591620111731844, "grad_norm": 0.3906610906124115, "learning_rate": 0.0005221848739495798, "loss": 0.3446, "step": 17169 }, { "epoch": 9.59217877094972, "grad_norm": 0.7117505073547363, "learning_rate": 0.0005221568627450981, "loss": 0.4275, "step": 17170 }, { "epoch": 9.592737430167597, "grad_norm": 0.7052455544471741, "learning_rate": 0.0005221288515406163, "loss": 0.4334, "step": 17171 }, { "epoch": 9.593296089385476, "grad_norm": 1.207653522491455, "learning_rate": 0.0005221008403361345, "loss": 0.3687, "step": 17172 }, { "epoch": 9.593854748603352, "grad_norm": 0.5037881135940552, "learning_rate": 0.0005220728291316527, "loss": 0.448, "step": 17173 }, { "epoch": 9.594413407821229, "grad_norm": 0.4416349530220032, "learning_rate": 0.0005220448179271708, "loss": 0.4845, "step": 17174 }, { "epoch": 9.594972067039105, "grad_norm": 0.5492764115333557, "learning_rate": 0.0005220168067226891, "loss": 0.4785, "step": 17175 }, { "epoch": 9.595530726256984, "grad_norm": 0.4624919891357422, "learning_rate": 0.0005219887955182073, "loss": 0.4793, "step": 17176 }, { "epoch": 9.59608938547486, "grad_norm": 1.1057995557785034, "learning_rate": 0.0005219607843137255, "loss": 0.4647, "step": 17177 }, { "epoch": 9.596648044692737, "grad_norm": 0.5127424001693726, "learning_rate": 0.0005219327731092437, "loss": 0.4357, "step": 17178 }, { "epoch": 9.597206703910615, "grad_norm": 0.39835458993911743, "learning_rate": 0.0005219047619047618, "loss": 0.357, "step": 17179 }, { "epoch": 9.597765363128492, "grad_norm": 0.816056489944458, "learning_rate": 0.0005218767507002802, "loss": 0.3523, "step": 17180 }, { "epoch": 9.598324022346368, "grad_norm": 0.8206701874732971, "learning_rate": 0.0005218487394957984, "loss": 0.2964, "step": 17181 }, { "epoch": 9.598882681564247, "grad_norm": 0.6436958909034729, "learning_rate": 0.0005218207282913166, "loss": 0.455, "step": 17182 }, { "epoch": 9.599441340782123, "grad_norm": 0.5124699473381042, "learning_rate": 0.0005217927170868348, "loss": 0.4362, "step": 17183 }, { "epoch": 9.6, "grad_norm": 0.5424758195877075, "learning_rate": 0.0005217647058823529, "loss": 0.3933, "step": 17184 }, { "epoch": 9.600558659217878, "grad_norm": 0.413993239402771, "learning_rate": 0.0005217366946778711, "loss": 0.3832, "step": 17185 }, { "epoch": 9.601117318435755, "grad_norm": 0.526695966720581, "learning_rate": 0.0005217086834733894, "loss": 0.4115, "step": 17186 }, { "epoch": 9.601675977653631, "grad_norm": 0.43858984112739563, "learning_rate": 0.0005216806722689076, "loss": 0.472, "step": 17187 }, { "epoch": 9.602234636871508, "grad_norm": 0.5752077698707581, "learning_rate": 0.0005216526610644258, "loss": 0.5045, "step": 17188 }, { "epoch": 9.602793296089386, "grad_norm": 0.6708674430847168, "learning_rate": 0.000521624649859944, "loss": 0.4964, "step": 17189 }, { "epoch": 9.603351955307263, "grad_norm": 1.5958267450332642, "learning_rate": 0.0005215966386554621, "loss": 0.4032, "step": 17190 }, { "epoch": 9.60391061452514, "grad_norm": 1.4779036045074463, "learning_rate": 0.0005215686274509804, "loss": 0.4673, "step": 17191 }, { "epoch": 9.604469273743018, "grad_norm": 0.4911649525165558, "learning_rate": 0.0005215406162464986, "loss": 0.4242, "step": 17192 }, { "epoch": 9.605027932960894, "grad_norm": 0.37525609135627747, "learning_rate": 0.0005215126050420168, "loss": 0.4277, "step": 17193 }, { "epoch": 9.60558659217877, "grad_norm": 0.4698725938796997, "learning_rate": 0.000521484593837535, "loss": 0.4921, "step": 17194 }, { "epoch": 9.606145251396647, "grad_norm": 0.6171064376831055, "learning_rate": 0.0005214565826330531, "loss": 0.5934, "step": 17195 }, { "epoch": 9.606703910614526, "grad_norm": 0.9733958840370178, "learning_rate": 0.0005214285714285714, "loss": 0.5253, "step": 17196 }, { "epoch": 9.607262569832402, "grad_norm": 0.3835925757884979, "learning_rate": 0.0005214005602240897, "loss": 0.396, "step": 17197 }, { "epoch": 9.607821229050279, "grad_norm": 0.6190245151519775, "learning_rate": 0.0005213725490196079, "loss": 0.4192, "step": 17198 }, { "epoch": 9.608379888268157, "grad_norm": 0.8092804551124573, "learning_rate": 0.0005213445378151261, "loss": 0.398, "step": 17199 }, { "epoch": 9.608938547486034, "grad_norm": 0.3974584937095642, "learning_rate": 0.0005213165266106442, "loss": 0.4114, "step": 17200 }, { "epoch": 9.60949720670391, "grad_norm": 0.5593611598014832, "learning_rate": 0.0005212885154061625, "loss": 0.3764, "step": 17201 }, { "epoch": 9.610055865921789, "grad_norm": 0.6625487804412842, "learning_rate": 0.0005212605042016807, "loss": 0.522, "step": 17202 }, { "epoch": 9.610614525139665, "grad_norm": 0.46181565523147583, "learning_rate": 0.0005212324929971989, "loss": 0.386, "step": 17203 }, { "epoch": 9.611173184357542, "grad_norm": 0.5924334526062012, "learning_rate": 0.0005212044817927171, "loss": 0.5211, "step": 17204 }, { "epoch": 9.611731843575418, "grad_norm": 0.38895201683044434, "learning_rate": 0.0005211764705882353, "loss": 0.4185, "step": 17205 }, { "epoch": 9.612290502793297, "grad_norm": 0.5148903131484985, "learning_rate": 0.0005211484593837535, "loss": 0.3631, "step": 17206 }, { "epoch": 9.612849162011173, "grad_norm": 0.6946407556533813, "learning_rate": 0.0005211204481792717, "loss": 0.4007, "step": 17207 }, { "epoch": 9.61340782122905, "grad_norm": 1.7788351774215698, "learning_rate": 0.0005210924369747899, "loss": 0.3872, "step": 17208 }, { "epoch": 9.613966480446928, "grad_norm": 0.3721999228000641, "learning_rate": 0.0005210644257703081, "loss": 0.4727, "step": 17209 }, { "epoch": 9.614525139664805, "grad_norm": 0.7394548654556274, "learning_rate": 0.0005210364145658263, "loss": 0.4723, "step": 17210 }, { "epoch": 9.615083798882681, "grad_norm": 3.182971239089966, "learning_rate": 0.0005210084033613445, "loss": 0.4694, "step": 17211 }, { "epoch": 9.61564245810056, "grad_norm": 3.233119010925293, "learning_rate": 0.0005209803921568627, "loss": 0.4268, "step": 17212 }, { "epoch": 9.616201117318436, "grad_norm": 0.8366853594779968, "learning_rate": 0.000520952380952381, "loss": 0.7175, "step": 17213 }, { "epoch": 9.616759776536313, "grad_norm": 0.37128975987434387, "learning_rate": 0.0005209243697478992, "loss": 0.3626, "step": 17214 }, { "epoch": 9.61731843575419, "grad_norm": 0.625723659992218, "learning_rate": 0.0005208963585434174, "loss": 0.3871, "step": 17215 }, { "epoch": 9.617877094972068, "grad_norm": 0.6446060538291931, "learning_rate": 0.0005208683473389356, "loss": 0.3854, "step": 17216 }, { "epoch": 9.618435754189944, "grad_norm": 0.935449481010437, "learning_rate": 0.0005208403361344538, "loss": 0.3154, "step": 17217 }, { "epoch": 9.61899441340782, "grad_norm": 1.6552786827087402, "learning_rate": 0.000520812324929972, "loss": 0.3992, "step": 17218 }, { "epoch": 9.619553072625699, "grad_norm": 0.48644429445266724, "learning_rate": 0.0005207843137254902, "loss": 0.4763, "step": 17219 }, { "epoch": 9.620111731843576, "grad_norm": 0.6879404783248901, "learning_rate": 0.0005207563025210084, "loss": 0.3049, "step": 17220 }, { "epoch": 9.620670391061452, "grad_norm": 0.5403700470924377, "learning_rate": 0.0005207282913165267, "loss": 0.4361, "step": 17221 }, { "epoch": 9.621229050279329, "grad_norm": 0.7189821004867554, "learning_rate": 0.0005207002801120448, "loss": 0.4991, "step": 17222 }, { "epoch": 9.621787709497207, "grad_norm": 0.4064416289329529, "learning_rate": 0.000520672268907563, "loss": 0.4204, "step": 17223 }, { "epoch": 9.622346368715084, "grad_norm": 0.5660318732261658, "learning_rate": 0.0005206442577030812, "loss": 0.4846, "step": 17224 }, { "epoch": 9.62290502793296, "grad_norm": 0.44163334369659424, "learning_rate": 0.0005206162464985994, "loss": 0.3166, "step": 17225 }, { "epoch": 9.623463687150839, "grad_norm": 0.4681432843208313, "learning_rate": 0.0005205882352941177, "loss": 0.4286, "step": 17226 }, { "epoch": 9.624022346368715, "grad_norm": 0.48604559898376465, "learning_rate": 0.0005205602240896358, "loss": 0.3907, "step": 17227 }, { "epoch": 9.624581005586592, "grad_norm": 12.079716682434082, "learning_rate": 0.000520532212885154, "loss": 0.5273, "step": 17228 }, { "epoch": 9.62513966480447, "grad_norm": 0.7280187010765076, "learning_rate": 0.0005205042016806722, "loss": 0.3899, "step": 17229 }, { "epoch": 9.625698324022347, "grad_norm": 0.5819796919822693, "learning_rate": 0.0005204761904761905, "loss": 0.4096, "step": 17230 }, { "epoch": 9.626256983240223, "grad_norm": 0.588688313961029, "learning_rate": 0.0005204481792717088, "loss": 0.4111, "step": 17231 }, { "epoch": 9.6268156424581, "grad_norm": 1.4041037559509277, "learning_rate": 0.0005204201680672269, "loss": 0.346, "step": 17232 }, { "epoch": 9.627374301675978, "grad_norm": 0.6162841320037842, "learning_rate": 0.0005203921568627451, "loss": 0.4711, "step": 17233 }, { "epoch": 9.627932960893855, "grad_norm": 0.4820128083229065, "learning_rate": 0.0005203641456582633, "loss": 0.4651, "step": 17234 }, { "epoch": 9.628491620111731, "grad_norm": 0.6277658939361572, "learning_rate": 0.0005203361344537815, "loss": 0.4503, "step": 17235 }, { "epoch": 9.62905027932961, "grad_norm": 0.44564491510391235, "learning_rate": 0.0005203081232492998, "loss": 0.4209, "step": 17236 }, { "epoch": 9.629608938547486, "grad_norm": 0.4967097342014313, "learning_rate": 0.000520280112044818, "loss": 0.4725, "step": 17237 }, { "epoch": 9.630167597765363, "grad_norm": 0.9343072175979614, "learning_rate": 0.0005202521008403361, "loss": 0.5744, "step": 17238 }, { "epoch": 9.630726256983241, "grad_norm": 0.5217739343643188, "learning_rate": 0.0005202240896358543, "loss": 0.3833, "step": 17239 }, { "epoch": 9.631284916201118, "grad_norm": 0.458037793636322, "learning_rate": 0.0005201960784313725, "loss": 0.3939, "step": 17240 }, { "epoch": 9.631843575418994, "grad_norm": 1.8534164428710938, "learning_rate": 0.0005201680672268908, "loss": 0.57, "step": 17241 }, { "epoch": 9.63240223463687, "grad_norm": 0.7761991024017334, "learning_rate": 0.000520140056022409, "loss": 0.5147, "step": 17242 }, { "epoch": 9.632960893854749, "grad_norm": 0.6547293663024902, "learning_rate": 0.0005201120448179271, "loss": 0.5333, "step": 17243 }, { "epoch": 9.633519553072626, "grad_norm": 2.3486833572387695, "learning_rate": 0.0005200840336134453, "loss": 0.3496, "step": 17244 }, { "epoch": 9.634078212290502, "grad_norm": 0.8718780875205994, "learning_rate": 0.0005200560224089635, "loss": 0.4624, "step": 17245 }, { "epoch": 9.63463687150838, "grad_norm": 0.4057908058166504, "learning_rate": 0.0005200280112044819, "loss": 0.3739, "step": 17246 }, { "epoch": 9.635195530726257, "grad_norm": 0.525743305683136, "learning_rate": 0.0005200000000000001, "loss": 0.4772, "step": 17247 }, { "epoch": 9.635754189944134, "grad_norm": 0.3417477309703827, "learning_rate": 0.0005199719887955182, "loss": 0.3156, "step": 17248 }, { "epoch": 9.63631284916201, "grad_norm": 0.7415743470191956, "learning_rate": 0.0005199439775910364, "loss": 0.3991, "step": 17249 }, { "epoch": 9.636871508379889, "grad_norm": 0.6087435483932495, "learning_rate": 0.0005199159663865546, "loss": 0.6012, "step": 17250 }, { "epoch": 9.637430167597765, "grad_norm": 0.5264108180999756, "learning_rate": 0.0005198879551820729, "loss": 0.4477, "step": 17251 }, { "epoch": 9.637988826815642, "grad_norm": 0.5705776810646057, "learning_rate": 0.0005198599439775911, "loss": 0.375, "step": 17252 }, { "epoch": 9.63854748603352, "grad_norm": 3.4019694328308105, "learning_rate": 0.0005198319327731093, "loss": 0.5642, "step": 17253 }, { "epoch": 9.639106145251397, "grad_norm": 0.4593721330165863, "learning_rate": 0.0005198039215686274, "loss": 0.3923, "step": 17254 }, { "epoch": 9.639664804469273, "grad_norm": 0.4560600519180298, "learning_rate": 0.0005197759103641456, "loss": 0.4236, "step": 17255 }, { "epoch": 9.640223463687152, "grad_norm": 0.5203753113746643, "learning_rate": 0.0005197478991596639, "loss": 0.3902, "step": 17256 }, { "epoch": 9.640782122905028, "grad_norm": 0.9243482351303101, "learning_rate": 0.0005197198879551821, "loss": 0.4105, "step": 17257 }, { "epoch": 9.641340782122905, "grad_norm": 0.3731384575366974, "learning_rate": 0.0005196918767507003, "loss": 0.4162, "step": 17258 }, { "epoch": 9.641899441340783, "grad_norm": 0.593356192111969, "learning_rate": 0.0005196638655462184, "loss": 0.3857, "step": 17259 }, { "epoch": 9.64245810055866, "grad_norm": 0.41375938057899475, "learning_rate": 0.0005196358543417366, "loss": 0.4508, "step": 17260 }, { "epoch": 9.643016759776536, "grad_norm": 0.42015644907951355, "learning_rate": 0.000519607843137255, "loss": 0.3475, "step": 17261 }, { "epoch": 9.643575418994413, "grad_norm": 0.6232340931892395, "learning_rate": 0.0005195798319327732, "loss": 0.453, "step": 17262 }, { "epoch": 9.644134078212291, "grad_norm": 0.8085982203483582, "learning_rate": 0.0005195518207282914, "loss": 0.6541, "step": 17263 }, { "epoch": 9.644692737430168, "grad_norm": 0.6777638792991638, "learning_rate": 0.0005195238095238095, "loss": 0.4783, "step": 17264 }, { "epoch": 9.645251396648044, "grad_norm": 0.4959794282913208, "learning_rate": 0.0005194957983193277, "loss": 0.5392, "step": 17265 }, { "epoch": 9.645810055865923, "grad_norm": 0.49847304821014404, "learning_rate": 0.000519467787114846, "loss": 0.4279, "step": 17266 }, { "epoch": 9.6463687150838, "grad_norm": 0.38611137866973877, "learning_rate": 0.0005194397759103642, "loss": 0.4752, "step": 17267 }, { "epoch": 9.646927374301676, "grad_norm": 0.8109422326087952, "learning_rate": 0.0005194117647058824, "loss": 0.443, "step": 17268 }, { "epoch": 9.647486033519552, "grad_norm": 0.45339465141296387, "learning_rate": 0.0005193837535014006, "loss": 0.3606, "step": 17269 }, { "epoch": 9.64804469273743, "grad_norm": 0.8560296297073364, "learning_rate": 0.0005193557422969187, "loss": 0.4049, "step": 17270 }, { "epoch": 9.648603351955307, "grad_norm": 0.48132944107055664, "learning_rate": 0.000519327731092437, "loss": 0.526, "step": 17271 }, { "epoch": 9.649162011173184, "grad_norm": 1.4822685718536377, "learning_rate": 0.0005192997198879552, "loss": 0.6057, "step": 17272 }, { "epoch": 9.649720670391062, "grad_norm": 0.43631210923194885, "learning_rate": 0.0005192717086834734, "loss": 0.4402, "step": 17273 }, { "epoch": 9.650279329608939, "grad_norm": 0.4125272333621979, "learning_rate": 0.0005192436974789916, "loss": 0.4166, "step": 17274 }, { "epoch": 9.650837988826815, "grad_norm": 0.38469111919403076, "learning_rate": 0.0005192156862745097, "loss": 0.4541, "step": 17275 }, { "epoch": 9.651396648044694, "grad_norm": 0.4103352725505829, "learning_rate": 0.000519187675070028, "loss": 0.2842, "step": 17276 }, { "epoch": 9.65195530726257, "grad_norm": 1.4568179845809937, "learning_rate": 0.0005191596638655462, "loss": 0.3357, "step": 17277 }, { "epoch": 9.652513966480447, "grad_norm": 0.5212461352348328, "learning_rate": 0.0005191316526610644, "loss": 0.3724, "step": 17278 }, { "epoch": 9.653072625698323, "grad_norm": 0.577918529510498, "learning_rate": 0.0005191036414565827, "loss": 0.3594, "step": 17279 }, { "epoch": 9.653631284916202, "grad_norm": 0.39440223574638367, "learning_rate": 0.0005190756302521008, "loss": 0.4137, "step": 17280 }, { "epoch": 9.654189944134078, "grad_norm": 0.4893341362476349, "learning_rate": 0.0005190476190476191, "loss": 0.4233, "step": 17281 }, { "epoch": 9.654748603351955, "grad_norm": 2.2492904663085938, "learning_rate": 0.0005190196078431373, "loss": 0.3848, "step": 17282 }, { "epoch": 9.655307262569833, "grad_norm": 0.588296115398407, "learning_rate": 0.0005189915966386555, "loss": 0.4553, "step": 17283 }, { "epoch": 9.65586592178771, "grad_norm": 0.8972691297531128, "learning_rate": 0.0005189635854341737, "loss": 0.457, "step": 17284 }, { "epoch": 9.656424581005586, "grad_norm": 0.5906715393066406, "learning_rate": 0.0005189355742296919, "loss": 0.4258, "step": 17285 }, { "epoch": 9.656983240223465, "grad_norm": 0.42371079325675964, "learning_rate": 0.0005189075630252101, "loss": 0.3763, "step": 17286 }, { "epoch": 9.657541899441341, "grad_norm": 0.4780693054199219, "learning_rate": 0.0005188795518207283, "loss": 0.345, "step": 17287 }, { "epoch": 9.658100558659218, "grad_norm": 0.8028094172477722, "learning_rate": 0.0005188515406162465, "loss": 0.4291, "step": 17288 }, { "epoch": 9.658659217877094, "grad_norm": 0.44257014989852905, "learning_rate": 0.0005188235294117647, "loss": 0.4354, "step": 17289 }, { "epoch": 9.659217877094973, "grad_norm": 0.7343131303787231, "learning_rate": 0.0005187955182072829, "loss": 0.3915, "step": 17290 }, { "epoch": 9.65977653631285, "grad_norm": 0.4098905622959137, "learning_rate": 0.0005187675070028011, "loss": 0.3703, "step": 17291 }, { "epoch": 9.660335195530726, "grad_norm": 0.5763243436813354, "learning_rate": 0.0005187394957983193, "loss": 0.3786, "step": 17292 }, { "epoch": 9.660893854748604, "grad_norm": 0.4634205996990204, "learning_rate": 0.0005187114845938375, "loss": 0.4564, "step": 17293 }, { "epoch": 9.66145251396648, "grad_norm": 0.7018575668334961, "learning_rate": 0.0005186834733893557, "loss": 0.3865, "step": 17294 }, { "epoch": 9.662011173184357, "grad_norm": 0.8497735261917114, "learning_rate": 0.000518655462184874, "loss": 0.4725, "step": 17295 }, { "epoch": 9.662569832402234, "grad_norm": 1.325561285018921, "learning_rate": 0.0005186274509803923, "loss": 0.4575, "step": 17296 }, { "epoch": 9.663128491620112, "grad_norm": 0.39585602283477783, "learning_rate": 0.0005185994397759104, "loss": 0.4186, "step": 17297 }, { "epoch": 9.663687150837989, "grad_norm": 0.9237062931060791, "learning_rate": 0.0005185714285714286, "loss": 0.5237, "step": 17298 }, { "epoch": 9.664245810055865, "grad_norm": 0.3961396813392639, "learning_rate": 0.0005185434173669468, "loss": 0.3538, "step": 17299 }, { "epoch": 9.664804469273744, "grad_norm": 0.5346406698226929, "learning_rate": 0.000518515406162465, "loss": 0.5811, "step": 17300 }, { "epoch": 9.66536312849162, "grad_norm": 0.42963486909866333, "learning_rate": 0.0005184873949579833, "loss": 0.3283, "step": 17301 }, { "epoch": 9.665921787709497, "grad_norm": 0.6642529368400574, "learning_rate": 0.0005184593837535014, "loss": 0.409, "step": 17302 }, { "epoch": 9.666480446927375, "grad_norm": 0.5658999085426331, "learning_rate": 0.0005184313725490196, "loss": 0.5139, "step": 17303 }, { "epoch": 9.667039106145252, "grad_norm": 0.5177510380744934, "learning_rate": 0.0005184033613445378, "loss": 0.4558, "step": 17304 }, { "epoch": 9.667597765363128, "grad_norm": 0.3965812623500824, "learning_rate": 0.000518375350140056, "loss": 0.4795, "step": 17305 }, { "epoch": 9.668156424581005, "grad_norm": 0.5368238687515259, "learning_rate": 0.0005183473389355743, "loss": 0.5326, "step": 17306 }, { "epoch": 9.668715083798883, "grad_norm": 0.4565581679344177, "learning_rate": 0.0005183193277310924, "loss": 0.3878, "step": 17307 }, { "epoch": 9.66927374301676, "grad_norm": 0.6998317837715149, "learning_rate": 0.0005182913165266106, "loss": 0.4608, "step": 17308 }, { "epoch": 9.669832402234636, "grad_norm": 1.0472875833511353, "learning_rate": 0.0005182633053221288, "loss": 0.4721, "step": 17309 }, { "epoch": 9.670391061452515, "grad_norm": 0.7842323184013367, "learning_rate": 0.000518235294117647, "loss": 0.5487, "step": 17310 }, { "epoch": 9.670949720670391, "grad_norm": 0.49156635999679565, "learning_rate": 0.0005182072829131654, "loss": 0.4039, "step": 17311 }, { "epoch": 9.671508379888268, "grad_norm": 1.769970417022705, "learning_rate": 0.0005181792717086836, "loss": 0.4403, "step": 17312 }, { "epoch": 9.672067039106146, "grad_norm": 0.557755172252655, "learning_rate": 0.0005181512605042017, "loss": 0.5044, "step": 17313 }, { "epoch": 9.672625698324023, "grad_norm": 0.5054857730865479, "learning_rate": 0.0005181232492997199, "loss": 0.4111, "step": 17314 }, { "epoch": 9.6731843575419, "grad_norm": 0.708062469959259, "learning_rate": 0.0005180952380952381, "loss": 0.3802, "step": 17315 }, { "epoch": 9.673743016759776, "grad_norm": 0.42805802822113037, "learning_rate": 0.0005180672268907564, "loss": 0.36, "step": 17316 }, { "epoch": 9.674301675977654, "grad_norm": 0.4069773554801941, "learning_rate": 0.0005180392156862746, "loss": 0.4357, "step": 17317 }, { "epoch": 9.67486033519553, "grad_norm": 0.676276683807373, "learning_rate": 0.0005180112044817927, "loss": 0.4035, "step": 17318 }, { "epoch": 9.675418994413407, "grad_norm": 0.4525884985923767, "learning_rate": 0.0005179831932773109, "loss": 0.3963, "step": 17319 }, { "epoch": 9.675977653631286, "grad_norm": 0.6365451812744141, "learning_rate": 0.0005179551820728291, "loss": 0.4527, "step": 17320 }, { "epoch": 9.676536312849162, "grad_norm": 0.8492831587791443, "learning_rate": 0.0005179271708683474, "loss": 0.5113, "step": 17321 }, { "epoch": 9.677094972067039, "grad_norm": 1.2222706079483032, "learning_rate": 0.0005178991596638656, "loss": 0.3895, "step": 17322 }, { "epoch": 9.677653631284915, "grad_norm": 0.6792795658111572, "learning_rate": 0.0005178711484593837, "loss": 0.4578, "step": 17323 }, { "epoch": 9.678212290502794, "grad_norm": 0.45879504084587097, "learning_rate": 0.0005178431372549019, "loss": 0.5174, "step": 17324 }, { "epoch": 9.67877094972067, "grad_norm": 0.6171234846115112, "learning_rate": 0.0005178151260504201, "loss": 0.4221, "step": 17325 }, { "epoch": 9.679329608938547, "grad_norm": 0.6004194617271423, "learning_rate": 0.0005177871148459384, "loss": 0.4285, "step": 17326 }, { "epoch": 9.679888268156425, "grad_norm": 0.44632893800735474, "learning_rate": 0.0005177591036414567, "loss": 0.3802, "step": 17327 }, { "epoch": 9.680446927374302, "grad_norm": 2.143056869506836, "learning_rate": 0.0005177310924369749, "loss": 0.4039, "step": 17328 }, { "epoch": 9.681005586592178, "grad_norm": 2.0904133319854736, "learning_rate": 0.000517703081232493, "loss": 0.4136, "step": 17329 }, { "epoch": 9.681564245810057, "grad_norm": 1.096933364868164, "learning_rate": 0.0005176750700280112, "loss": 0.3925, "step": 17330 }, { "epoch": 9.682122905027933, "grad_norm": 0.3774789869785309, "learning_rate": 0.0005176470588235295, "loss": 0.4076, "step": 17331 }, { "epoch": 9.68268156424581, "grad_norm": 1.2568482160568237, "learning_rate": 0.0005176190476190477, "loss": 0.4481, "step": 17332 }, { "epoch": 9.683240223463688, "grad_norm": 0.6713975667953491, "learning_rate": 0.0005175910364145659, "loss": 0.4465, "step": 17333 }, { "epoch": 9.683798882681565, "grad_norm": 0.47389307618141174, "learning_rate": 0.000517563025210084, "loss": 0.4224, "step": 17334 }, { "epoch": 9.684357541899441, "grad_norm": 0.4750920832157135, "learning_rate": 0.0005175350140056022, "loss": 0.4169, "step": 17335 }, { "epoch": 9.684916201117318, "grad_norm": 0.5598821043968201, "learning_rate": 0.0005175070028011205, "loss": 0.4247, "step": 17336 }, { "epoch": 9.685474860335196, "grad_norm": 0.5696367621421814, "learning_rate": 0.0005174789915966387, "loss": 0.4178, "step": 17337 }, { "epoch": 9.686033519553073, "grad_norm": 0.6177706718444824, "learning_rate": 0.0005174509803921569, "loss": 0.547, "step": 17338 }, { "epoch": 9.68659217877095, "grad_norm": 0.40286052227020264, "learning_rate": 0.000517422969187675, "loss": 0.3373, "step": 17339 }, { "epoch": 9.687150837988828, "grad_norm": 1.664947271347046, "learning_rate": 0.0005173949579831932, "loss": 0.4093, "step": 17340 }, { "epoch": 9.687709497206704, "grad_norm": 0.4349423348903656, "learning_rate": 0.0005173669467787115, "loss": 0.4371, "step": 17341 }, { "epoch": 9.68826815642458, "grad_norm": 0.4237612783908844, "learning_rate": 0.0005173389355742297, "loss": 0.3985, "step": 17342 }, { "epoch": 9.688826815642457, "grad_norm": 0.45765864849090576, "learning_rate": 0.000517310924369748, "loss": 0.473, "step": 17343 }, { "epoch": 9.689385474860336, "grad_norm": 3.4241554737091064, "learning_rate": 0.0005172829131652662, "loss": 0.5189, "step": 17344 }, { "epoch": 9.689944134078212, "grad_norm": 0.6101832985877991, "learning_rate": 0.0005172549019607843, "loss": 0.3334, "step": 17345 }, { "epoch": 9.690502793296089, "grad_norm": 0.43298497796058655, "learning_rate": 0.0005172268907563026, "loss": 0.3926, "step": 17346 }, { "epoch": 9.691061452513967, "grad_norm": 0.544924795627594, "learning_rate": 0.0005171988795518208, "loss": 0.3795, "step": 17347 }, { "epoch": 9.691620111731844, "grad_norm": 2.2309019565582275, "learning_rate": 0.000517170868347339, "loss": 0.363, "step": 17348 }, { "epoch": 9.69217877094972, "grad_norm": 0.43780797719955444, "learning_rate": 0.0005171428571428572, "loss": 0.3341, "step": 17349 }, { "epoch": 9.692737430167599, "grad_norm": 0.5309510231018066, "learning_rate": 0.0005171148459383753, "loss": 0.5498, "step": 17350 }, { "epoch": 9.693296089385475, "grad_norm": 0.4866096079349518, "learning_rate": 0.0005170868347338936, "loss": 0.4216, "step": 17351 }, { "epoch": 9.693854748603352, "grad_norm": 0.4058377742767334, "learning_rate": 0.0005170588235294118, "loss": 0.4583, "step": 17352 }, { "epoch": 9.694413407821228, "grad_norm": 0.3556528389453888, "learning_rate": 0.00051703081232493, "loss": 0.3125, "step": 17353 }, { "epoch": 9.694972067039107, "grad_norm": 0.598050057888031, "learning_rate": 0.0005170028011204482, "loss": 0.3957, "step": 17354 }, { "epoch": 9.695530726256983, "grad_norm": 7.379345893859863, "learning_rate": 0.0005169747899159663, "loss": 0.4797, "step": 17355 }, { "epoch": 9.69608938547486, "grad_norm": 0.8071146607398987, "learning_rate": 0.0005169467787114846, "loss": 0.6773, "step": 17356 }, { "epoch": 9.696648044692738, "grad_norm": 0.4843011200428009, "learning_rate": 0.0005169187675070028, "loss": 0.4337, "step": 17357 }, { "epoch": 9.697206703910615, "grad_norm": 0.4166673421859741, "learning_rate": 0.000516890756302521, "loss": 0.4847, "step": 17358 }, { "epoch": 9.697765363128491, "grad_norm": 0.6579785346984863, "learning_rate": 0.0005168627450980392, "loss": 0.4721, "step": 17359 }, { "epoch": 9.69832402234637, "grad_norm": 0.6555850505828857, "learning_rate": 0.0005168347338935574, "loss": 0.511, "step": 17360 }, { "epoch": 9.698882681564246, "grad_norm": 0.40956735610961914, "learning_rate": 0.0005168067226890757, "loss": 0.3491, "step": 17361 }, { "epoch": 9.699441340782123, "grad_norm": 0.42788368463516235, "learning_rate": 0.0005167787114845939, "loss": 0.3996, "step": 17362 }, { "epoch": 9.7, "grad_norm": 0.5142825841903687, "learning_rate": 0.0005167507002801121, "loss": 0.4358, "step": 17363 }, { "epoch": 9.700558659217878, "grad_norm": 0.4930938184261322, "learning_rate": 0.0005167226890756303, "loss": 0.3458, "step": 17364 }, { "epoch": 9.701117318435754, "grad_norm": 2.621616840362549, "learning_rate": 0.0005166946778711485, "loss": 0.5364, "step": 17365 }, { "epoch": 9.70167597765363, "grad_norm": 1.3645325899124146, "learning_rate": 0.0005166666666666667, "loss": 0.3791, "step": 17366 }, { "epoch": 9.702234636871509, "grad_norm": 0.51255863904953, "learning_rate": 0.0005166386554621849, "loss": 0.4757, "step": 17367 }, { "epoch": 9.702793296089386, "grad_norm": 0.45767876505851746, "learning_rate": 0.0005166106442577031, "loss": 0.5137, "step": 17368 }, { "epoch": 9.703351955307262, "grad_norm": 1.3031829595565796, "learning_rate": 0.0005165826330532213, "loss": 0.3678, "step": 17369 }, { "epoch": 9.703910614525139, "grad_norm": 0.502005934715271, "learning_rate": 0.0005165546218487395, "loss": 0.3858, "step": 17370 }, { "epoch": 9.704469273743017, "grad_norm": 0.3450477421283722, "learning_rate": 0.0005165266106442577, "loss": 0.4116, "step": 17371 }, { "epoch": 9.705027932960894, "grad_norm": 0.9023796916007996, "learning_rate": 0.0005164985994397759, "loss": 0.3769, "step": 17372 }, { "epoch": 9.70558659217877, "grad_norm": 0.36074599623680115, "learning_rate": 0.0005164705882352941, "loss": 0.3729, "step": 17373 }, { "epoch": 9.706145251396649, "grad_norm": 0.5085729360580444, "learning_rate": 0.0005164425770308123, "loss": 0.5042, "step": 17374 }, { "epoch": 9.706703910614525, "grad_norm": 0.5425418615341187, "learning_rate": 0.0005164145658263305, "loss": 0.4355, "step": 17375 }, { "epoch": 9.707262569832402, "grad_norm": 0.4391874074935913, "learning_rate": 0.0005163865546218489, "loss": 0.5507, "step": 17376 }, { "epoch": 9.70782122905028, "grad_norm": 0.5718971490859985, "learning_rate": 0.000516358543417367, "loss": 0.5251, "step": 17377 }, { "epoch": 9.708379888268157, "grad_norm": 0.38186365365982056, "learning_rate": 0.0005163305322128852, "loss": 0.3621, "step": 17378 }, { "epoch": 9.708938547486033, "grad_norm": 0.5581526756286621, "learning_rate": 0.0005163025210084034, "loss": 0.4716, "step": 17379 }, { "epoch": 9.70949720670391, "grad_norm": 0.6507248282432556, "learning_rate": 0.0005162745098039216, "loss": 0.4515, "step": 17380 }, { "epoch": 9.710055865921788, "grad_norm": 0.515035092830658, "learning_rate": 0.0005162464985994399, "loss": 0.5512, "step": 17381 }, { "epoch": 9.710614525139665, "grad_norm": 0.7244021892547607, "learning_rate": 0.000516218487394958, "loss": 0.55, "step": 17382 }, { "epoch": 9.711173184357541, "grad_norm": 0.5577474236488342, "learning_rate": 0.0005161904761904762, "loss": 0.4144, "step": 17383 }, { "epoch": 9.71173184357542, "grad_norm": 0.3789259195327759, "learning_rate": 0.0005161624649859944, "loss": 0.3352, "step": 17384 }, { "epoch": 9.712290502793296, "grad_norm": 0.5367281436920166, "learning_rate": 0.0005161344537815126, "loss": 0.4809, "step": 17385 }, { "epoch": 9.712849162011173, "grad_norm": 0.37130919098854065, "learning_rate": 0.0005161064425770309, "loss": 0.3973, "step": 17386 }, { "epoch": 9.713407821229051, "grad_norm": 0.5869512557983398, "learning_rate": 0.000516078431372549, "loss": 0.5635, "step": 17387 }, { "epoch": 9.713966480446928, "grad_norm": 0.4152495861053467, "learning_rate": 0.0005160504201680672, "loss": 0.4131, "step": 17388 }, { "epoch": 9.714525139664804, "grad_norm": 0.4453161954879761, "learning_rate": 0.0005160224089635854, "loss": 0.4033, "step": 17389 }, { "epoch": 9.71508379888268, "grad_norm": 0.4040379226207733, "learning_rate": 0.0005159943977591036, "loss": 0.5094, "step": 17390 }, { "epoch": 9.71564245810056, "grad_norm": 0.5727993845939636, "learning_rate": 0.0005159663865546219, "loss": 0.3878, "step": 17391 }, { "epoch": 9.716201117318436, "grad_norm": 0.5577274560928345, "learning_rate": 0.0005159383753501401, "loss": 0.4524, "step": 17392 }, { "epoch": 9.716759776536312, "grad_norm": 0.6149861812591553, "learning_rate": 0.0005159103641456582, "loss": 0.5505, "step": 17393 }, { "epoch": 9.71731843575419, "grad_norm": 0.4324537515640259, "learning_rate": 0.0005158823529411765, "loss": 0.3578, "step": 17394 }, { "epoch": 9.717877094972067, "grad_norm": 0.5104597806930542, "learning_rate": 0.0005158543417366947, "loss": 0.3794, "step": 17395 }, { "epoch": 9.718435754189944, "grad_norm": 0.7242637872695923, "learning_rate": 0.000515826330532213, "loss": 0.5203, "step": 17396 }, { "epoch": 9.71899441340782, "grad_norm": 0.49718573689460754, "learning_rate": 0.0005157983193277312, "loss": 0.3944, "step": 17397 }, { "epoch": 9.719553072625699, "grad_norm": 0.536837100982666, "learning_rate": 0.0005157703081232493, "loss": 0.3955, "step": 17398 }, { "epoch": 9.720111731843575, "grad_norm": 0.6168854832649231, "learning_rate": 0.0005157422969187675, "loss": 0.4174, "step": 17399 }, { "epoch": 9.720670391061452, "grad_norm": 0.5200393795967102, "learning_rate": 0.0005157142857142857, "loss": 0.4442, "step": 17400 }, { "epoch": 9.72122905027933, "grad_norm": 0.3781569302082062, "learning_rate": 0.000515686274509804, "loss": 0.3348, "step": 17401 }, { "epoch": 9.721787709497207, "grad_norm": 0.3797781765460968, "learning_rate": 0.0005156582633053222, "loss": 0.3355, "step": 17402 }, { "epoch": 9.722346368715083, "grad_norm": 1.1978373527526855, "learning_rate": 0.0005156302521008403, "loss": 0.4886, "step": 17403 }, { "epoch": 9.722905027932962, "grad_norm": 3.591677665710449, "learning_rate": 0.0005156022408963585, "loss": 0.3189, "step": 17404 }, { "epoch": 9.723463687150838, "grad_norm": 0.8176594376564026, "learning_rate": 0.0005155742296918767, "loss": 0.4319, "step": 17405 }, { "epoch": 9.724022346368715, "grad_norm": 0.7716394662857056, "learning_rate": 0.0005155462184873949, "loss": 0.4223, "step": 17406 }, { "epoch": 9.724581005586593, "grad_norm": 0.7487101554870605, "learning_rate": 0.0005155182072829132, "loss": 0.4126, "step": 17407 }, { "epoch": 9.72513966480447, "grad_norm": 0.5277567505836487, "learning_rate": 0.0005154901960784314, "loss": 0.373, "step": 17408 }, { "epoch": 9.725698324022346, "grad_norm": 0.7035177946090698, "learning_rate": 0.0005154621848739495, "loss": 0.4986, "step": 17409 }, { "epoch": 9.726256983240223, "grad_norm": 0.4386945366859436, "learning_rate": 0.0005154341736694677, "loss": 0.4114, "step": 17410 }, { "epoch": 9.726815642458101, "grad_norm": 0.46786728501319885, "learning_rate": 0.000515406162464986, "loss": 0.386, "step": 17411 }, { "epoch": 9.727374301675978, "grad_norm": 0.76334547996521, "learning_rate": 0.0005153781512605043, "loss": 0.4074, "step": 17412 }, { "epoch": 9.727932960893854, "grad_norm": 0.8161738514900208, "learning_rate": 0.0005153501400560225, "loss": 0.4366, "step": 17413 }, { "epoch": 9.728491620111733, "grad_norm": 2.1180057525634766, "learning_rate": 0.0005153221288515406, "loss": 0.4547, "step": 17414 }, { "epoch": 9.72905027932961, "grad_norm": 1.3848165273666382, "learning_rate": 0.0005152941176470588, "loss": 0.4559, "step": 17415 }, { "epoch": 9.729608938547486, "grad_norm": 0.6294699311256409, "learning_rate": 0.000515266106442577, "loss": 0.6064, "step": 17416 }, { "epoch": 9.730167597765362, "grad_norm": 0.6414377093315125, "learning_rate": 0.0005152380952380953, "loss": 0.376, "step": 17417 }, { "epoch": 9.73072625698324, "grad_norm": 0.5367869138717651, "learning_rate": 0.0005152100840336135, "loss": 0.4111, "step": 17418 }, { "epoch": 9.731284916201117, "grad_norm": 0.863456130027771, "learning_rate": 0.0005151820728291316, "loss": 0.4863, "step": 17419 }, { "epoch": 9.731843575418994, "grad_norm": 0.7863894701004028, "learning_rate": 0.0005151540616246498, "loss": 0.5944, "step": 17420 }, { "epoch": 9.732402234636872, "grad_norm": 0.6741968989372253, "learning_rate": 0.000515126050420168, "loss": 0.4391, "step": 17421 }, { "epoch": 9.732960893854749, "grad_norm": 1.422831416130066, "learning_rate": 0.0005150980392156863, "loss": 0.4722, "step": 17422 }, { "epoch": 9.733519553072625, "grad_norm": 0.4514162540435791, "learning_rate": 0.0005150700280112045, "loss": 0.4898, "step": 17423 }, { "epoch": 9.734078212290502, "grad_norm": 0.6525101661682129, "learning_rate": 0.0005150420168067227, "loss": 0.4869, "step": 17424 }, { "epoch": 9.73463687150838, "grad_norm": 2.357609272003174, "learning_rate": 0.0005150140056022408, "loss": 0.4184, "step": 17425 }, { "epoch": 9.735195530726257, "grad_norm": 0.46039775013923645, "learning_rate": 0.000514985994397759, "loss": 0.3712, "step": 17426 }, { "epoch": 9.735754189944133, "grad_norm": 0.4177815616130829, "learning_rate": 0.0005149579831932774, "loss": 0.3431, "step": 17427 }, { "epoch": 9.736312849162012, "grad_norm": 0.6000614166259766, "learning_rate": 0.0005149299719887956, "loss": 0.4305, "step": 17428 }, { "epoch": 9.736871508379888, "grad_norm": 1.6972154378890991, "learning_rate": 0.0005149019607843138, "loss": 0.4688, "step": 17429 }, { "epoch": 9.737430167597765, "grad_norm": 0.48101353645324707, "learning_rate": 0.0005148739495798319, "loss": 0.6671, "step": 17430 }, { "epoch": 9.737988826815643, "grad_norm": 0.5486119985580444, "learning_rate": 0.0005148459383753501, "loss": 0.4086, "step": 17431 }, { "epoch": 9.73854748603352, "grad_norm": 0.8113036751747131, "learning_rate": 0.0005148179271708684, "loss": 0.3624, "step": 17432 }, { "epoch": 9.739106145251396, "grad_norm": 0.4654565155506134, "learning_rate": 0.0005147899159663866, "loss": 0.4304, "step": 17433 }, { "epoch": 9.739664804469275, "grad_norm": 0.4594738185405731, "learning_rate": 0.0005147619047619048, "loss": 0.4001, "step": 17434 }, { "epoch": 9.740223463687151, "grad_norm": 0.40408098697662354, "learning_rate": 0.0005147338935574229, "loss": 0.3278, "step": 17435 }, { "epoch": 9.740782122905028, "grad_norm": 1.3653596639633179, "learning_rate": 0.0005147058823529411, "loss": 0.4319, "step": 17436 }, { "epoch": 9.741340782122904, "grad_norm": 0.5705268383026123, "learning_rate": 0.0005146778711484594, "loss": 0.4371, "step": 17437 }, { "epoch": 9.741899441340783, "grad_norm": 0.5972915887832642, "learning_rate": 0.0005146498599439776, "loss": 0.5505, "step": 17438 }, { "epoch": 9.74245810055866, "grad_norm": 0.46979445219039917, "learning_rate": 0.0005146218487394958, "loss": 0.3926, "step": 17439 }, { "epoch": 9.743016759776536, "grad_norm": 0.6100826263427734, "learning_rate": 0.000514593837535014, "loss": 0.4634, "step": 17440 }, { "epoch": 9.743575418994414, "grad_norm": 0.3186141550540924, "learning_rate": 0.0005145658263305321, "loss": 0.3184, "step": 17441 }, { "epoch": 9.74413407821229, "grad_norm": 0.3863731622695923, "learning_rate": 0.0005145378151260504, "loss": 0.4622, "step": 17442 }, { "epoch": 9.744692737430167, "grad_norm": 0.6097526550292969, "learning_rate": 0.0005145098039215687, "loss": 0.3044, "step": 17443 }, { "epoch": 9.745251396648044, "grad_norm": 1.0645852088928223, "learning_rate": 0.0005144817927170869, "loss": 0.3744, "step": 17444 }, { "epoch": 9.745810055865922, "grad_norm": 0.6177852153778076, "learning_rate": 0.0005144537815126051, "loss": 0.4153, "step": 17445 }, { "epoch": 9.746368715083799, "grad_norm": 3.6419007778167725, "learning_rate": 0.0005144257703081232, "loss": 0.4129, "step": 17446 }, { "epoch": 9.746927374301675, "grad_norm": 0.5785045027732849, "learning_rate": 0.0005143977591036415, "loss": 0.3975, "step": 17447 }, { "epoch": 9.747486033519554, "grad_norm": 0.6446021795272827, "learning_rate": 0.0005143697478991597, "loss": 0.4617, "step": 17448 }, { "epoch": 9.74804469273743, "grad_norm": 0.6637579202651978, "learning_rate": 0.0005143417366946779, "loss": 0.4362, "step": 17449 }, { "epoch": 9.748603351955307, "grad_norm": 0.7729748487472534, "learning_rate": 0.0005143137254901961, "loss": 0.5518, "step": 17450 }, { "epoch": 9.749162011173185, "grad_norm": 0.3719259798526764, "learning_rate": 0.0005142857142857142, "loss": 0.3733, "step": 17451 }, { "epoch": 9.749720670391062, "grad_norm": 0.6170015931129456, "learning_rate": 0.0005142577030812325, "loss": 0.5065, "step": 17452 }, { "epoch": 9.750279329608938, "grad_norm": 6.93314790725708, "learning_rate": 0.0005142296918767507, "loss": 0.4768, "step": 17453 }, { "epoch": 9.750837988826815, "grad_norm": 1.0931603908538818, "learning_rate": 0.0005142016806722689, "loss": 0.504, "step": 17454 }, { "epoch": 9.751396648044693, "grad_norm": 1.4975506067276, "learning_rate": 0.0005141736694677871, "loss": 0.5809, "step": 17455 }, { "epoch": 9.75195530726257, "grad_norm": 0.6435559988021851, "learning_rate": 0.0005141456582633053, "loss": 0.4173, "step": 17456 }, { "epoch": 9.752513966480446, "grad_norm": 0.7482951283454895, "learning_rate": 0.0005141176470588235, "loss": 0.486, "step": 17457 }, { "epoch": 9.753072625698325, "grad_norm": 0.4143964946269989, "learning_rate": 0.0005140896358543417, "loss": 0.3543, "step": 17458 }, { "epoch": 9.753631284916201, "grad_norm": 0.6078479290008545, "learning_rate": 0.00051406162464986, "loss": 0.4561, "step": 17459 }, { "epoch": 9.754189944134078, "grad_norm": 0.5232478976249695, "learning_rate": 0.0005140336134453782, "loss": 0.5092, "step": 17460 }, { "epoch": 9.754748603351956, "grad_norm": 0.4944794774055481, "learning_rate": 0.0005140056022408964, "loss": 0.3744, "step": 17461 }, { "epoch": 9.755307262569833, "grad_norm": 1.5352438688278198, "learning_rate": 0.0005139775910364146, "loss": 0.5147, "step": 17462 }, { "epoch": 9.75586592178771, "grad_norm": 0.44495660066604614, "learning_rate": 0.0005139495798319328, "loss": 0.4241, "step": 17463 }, { "epoch": 9.756424581005586, "grad_norm": 0.42115840315818787, "learning_rate": 0.000513921568627451, "loss": 0.4605, "step": 17464 }, { "epoch": 9.756983240223464, "grad_norm": 0.43991491198539734, "learning_rate": 0.0005138935574229692, "loss": 0.3346, "step": 17465 }, { "epoch": 9.75754189944134, "grad_norm": 1.1453438997268677, "learning_rate": 0.0005138655462184874, "loss": 0.494, "step": 17466 }, { "epoch": 9.758100558659217, "grad_norm": 0.45045948028564453, "learning_rate": 0.0005138375350140056, "loss": 0.4267, "step": 17467 }, { "epoch": 9.758659217877096, "grad_norm": 0.5257158875465393, "learning_rate": 0.0005138095238095238, "loss": 0.3169, "step": 17468 }, { "epoch": 9.759217877094972, "grad_norm": 0.5546936392784119, "learning_rate": 0.000513781512605042, "loss": 0.3687, "step": 17469 }, { "epoch": 9.759776536312849, "grad_norm": 0.9328604936599731, "learning_rate": 0.0005137535014005602, "loss": 0.3868, "step": 17470 }, { "epoch": 9.760335195530725, "grad_norm": 0.4966212511062622, "learning_rate": 0.0005137254901960784, "loss": 0.3948, "step": 17471 }, { "epoch": 9.760893854748604, "grad_norm": 0.4861254394054413, "learning_rate": 0.0005136974789915967, "loss": 0.3569, "step": 17472 }, { "epoch": 9.76145251396648, "grad_norm": 1.0003819465637207, "learning_rate": 0.0005136694677871148, "loss": 0.3406, "step": 17473 }, { "epoch": 9.762011173184357, "grad_norm": 0.7772659659385681, "learning_rate": 0.000513641456582633, "loss": 0.4225, "step": 17474 }, { "epoch": 9.762569832402235, "grad_norm": 0.4207322597503662, "learning_rate": 0.0005136134453781512, "loss": 0.3888, "step": 17475 }, { "epoch": 9.763128491620112, "grad_norm": 0.42850035429000854, "learning_rate": 0.0005135854341736695, "loss": 0.4259, "step": 17476 }, { "epoch": 9.763687150837988, "grad_norm": 0.745561420917511, "learning_rate": 0.0005135574229691878, "loss": 0.4161, "step": 17477 }, { "epoch": 9.764245810055867, "grad_norm": 0.4545363783836365, "learning_rate": 0.0005135294117647059, "loss": 0.48, "step": 17478 }, { "epoch": 9.764804469273743, "grad_norm": 0.6444083452224731, "learning_rate": 0.0005135014005602241, "loss": 0.3608, "step": 17479 }, { "epoch": 9.76536312849162, "grad_norm": 0.5528808236122131, "learning_rate": 0.0005134733893557423, "loss": 0.409, "step": 17480 }, { "epoch": 9.765921787709498, "grad_norm": 0.4951499402523041, "learning_rate": 0.0005134453781512605, "loss": 0.4347, "step": 17481 }, { "epoch": 9.766480446927375, "grad_norm": 0.43165549635887146, "learning_rate": 0.0005134173669467788, "loss": 0.4015, "step": 17482 }, { "epoch": 9.767039106145251, "grad_norm": 0.6858075857162476, "learning_rate": 0.0005133893557422969, "loss": 0.3906, "step": 17483 }, { "epoch": 9.767597765363128, "grad_norm": 0.4866388440132141, "learning_rate": 0.0005133613445378151, "loss": 0.4811, "step": 17484 }, { "epoch": 9.768156424581006, "grad_norm": 0.8450616598129272, "learning_rate": 0.0005133333333333333, "loss": 0.4078, "step": 17485 }, { "epoch": 9.768715083798883, "grad_norm": 0.47369492053985596, "learning_rate": 0.0005133053221288515, "loss": 0.46, "step": 17486 }, { "epoch": 9.76927374301676, "grad_norm": 0.4902918338775635, "learning_rate": 0.0005132773109243698, "loss": 0.3865, "step": 17487 }, { "epoch": 9.769832402234638, "grad_norm": 2.542403221130371, "learning_rate": 0.000513249299719888, "loss": 0.4601, "step": 17488 }, { "epoch": 9.770391061452514, "grad_norm": 0.458379328250885, "learning_rate": 0.0005132212885154061, "loss": 0.6388, "step": 17489 }, { "epoch": 9.77094972067039, "grad_norm": 0.5277748107910156, "learning_rate": 0.0005131932773109243, "loss": 0.4498, "step": 17490 }, { "epoch": 9.771508379888267, "grad_norm": 0.46168458461761475, "learning_rate": 0.0005131652661064425, "loss": 0.4498, "step": 17491 }, { "epoch": 9.772067039106146, "grad_norm": 0.4343564510345459, "learning_rate": 0.0005131372549019609, "loss": 0.4392, "step": 17492 }, { "epoch": 9.772625698324022, "grad_norm": 0.5236911177635193, "learning_rate": 0.0005131092436974791, "loss": 0.4222, "step": 17493 }, { "epoch": 9.773184357541899, "grad_norm": 0.5493744015693665, "learning_rate": 0.0005130812324929972, "loss": 0.4464, "step": 17494 }, { "epoch": 9.773743016759777, "grad_norm": 0.517719566822052, "learning_rate": 0.0005130532212885154, "loss": 0.4947, "step": 17495 }, { "epoch": 9.774301675977654, "grad_norm": 0.5794305205345154, "learning_rate": 0.0005130252100840336, "loss": 0.4684, "step": 17496 }, { "epoch": 9.77486033519553, "grad_norm": 0.4486599266529083, "learning_rate": 0.0005129971988795519, "loss": 0.3477, "step": 17497 }, { "epoch": 9.775418994413407, "grad_norm": 0.7341869473457336, "learning_rate": 0.0005129691876750701, "loss": 0.4012, "step": 17498 }, { "epoch": 9.775977653631285, "grad_norm": 0.6696038842201233, "learning_rate": 0.0005129411764705882, "loss": 0.422, "step": 17499 }, { "epoch": 9.776536312849162, "grad_norm": 0.5272493362426758, "learning_rate": 0.0005129131652661064, "loss": 0.4425, "step": 17500 }, { "epoch": 9.776536312849162, "eval_cer": 0.08913510741601476, "eval_loss": 0.33675557374954224, "eval_runtime": 55.6443, "eval_samples_per_second": 81.554, "eval_steps_per_second": 5.104, "eval_wer": 0.35376106812658864, "step": 17500 }, { "epoch": 9.777094972067038, "grad_norm": 5.756768226623535, "learning_rate": 0.0005128851540616246, "loss": 0.4709, "step": 17501 }, { "epoch": 9.777653631284917, "grad_norm": 0.5121567249298096, "learning_rate": 0.0005128571428571429, "loss": 0.4846, "step": 17502 }, { "epoch": 9.778212290502793, "grad_norm": 0.8362381458282471, "learning_rate": 0.0005128291316526611, "loss": 0.4251, "step": 17503 }, { "epoch": 9.77877094972067, "grad_norm": 0.5548500418663025, "learning_rate": 0.0005128011204481793, "loss": 0.4297, "step": 17504 }, { "epoch": 9.779329608938548, "grad_norm": 0.30815228819847107, "learning_rate": 0.0005127731092436974, "loss": 0.2953, "step": 17505 }, { "epoch": 9.779888268156425, "grad_norm": 0.5616171360015869, "learning_rate": 0.0005127450980392156, "loss": 0.3667, "step": 17506 }, { "epoch": 9.780446927374301, "grad_norm": 0.4028511047363281, "learning_rate": 0.000512717086834734, "loss": 0.4273, "step": 17507 }, { "epoch": 9.78100558659218, "grad_norm": 2.9142491817474365, "learning_rate": 0.0005126890756302522, "loss": 0.4801, "step": 17508 }, { "epoch": 9.781564245810056, "grad_norm": 0.4930936098098755, "learning_rate": 0.0005126610644257704, "loss": 0.4564, "step": 17509 }, { "epoch": 9.782122905027933, "grad_norm": 0.47975635528564453, "learning_rate": 0.0005126330532212885, "loss": 0.467, "step": 17510 }, { "epoch": 9.78268156424581, "grad_norm": 0.42697492241859436, "learning_rate": 0.0005126050420168067, "loss": 0.381, "step": 17511 }, { "epoch": 9.783240223463688, "grad_norm": 1.0264018774032593, "learning_rate": 0.000512577030812325, "loss": 0.326, "step": 17512 }, { "epoch": 9.783798882681564, "grad_norm": 0.7078700065612793, "learning_rate": 0.0005125490196078432, "loss": 0.3513, "step": 17513 }, { "epoch": 9.78435754189944, "grad_norm": 0.4582546055316925, "learning_rate": 0.0005125210084033614, "loss": 0.3684, "step": 17514 }, { "epoch": 9.78491620111732, "grad_norm": 0.4572550654411316, "learning_rate": 0.0005124929971988795, "loss": 0.3964, "step": 17515 }, { "epoch": 9.785474860335196, "grad_norm": 0.7325780391693115, "learning_rate": 0.0005124649859943977, "loss": 0.5065, "step": 17516 }, { "epoch": 9.786033519553072, "grad_norm": 0.7576181888580322, "learning_rate": 0.000512436974789916, "loss": 0.4888, "step": 17517 }, { "epoch": 9.786592178770949, "grad_norm": 0.44088447093963623, "learning_rate": 0.0005124089635854342, "loss": 0.4157, "step": 17518 }, { "epoch": 9.787150837988827, "grad_norm": 0.4319549798965454, "learning_rate": 0.0005123809523809524, "loss": 0.4053, "step": 17519 }, { "epoch": 9.787709497206704, "grad_norm": 2.964113473892212, "learning_rate": 0.0005123529411764706, "loss": 0.3993, "step": 17520 }, { "epoch": 9.78826815642458, "grad_norm": 0.5118347406387329, "learning_rate": 0.0005123249299719887, "loss": 0.4678, "step": 17521 }, { "epoch": 9.788826815642459, "grad_norm": 0.8929596543312073, "learning_rate": 0.000512296918767507, "loss": 0.7208, "step": 17522 }, { "epoch": 9.789385474860335, "grad_norm": 0.6069864630699158, "learning_rate": 0.0005122689075630252, "loss": 0.4929, "step": 17523 }, { "epoch": 9.789944134078212, "grad_norm": 0.7654553651809692, "learning_rate": 0.0005122408963585434, "loss": 0.4634, "step": 17524 }, { "epoch": 9.79050279329609, "grad_norm": 0.5669147968292236, "learning_rate": 0.0005122128851540617, "loss": 0.4851, "step": 17525 }, { "epoch": 9.791061452513967, "grad_norm": 0.650214672088623, "learning_rate": 0.0005121848739495798, "loss": 0.5101, "step": 17526 }, { "epoch": 9.791620111731843, "grad_norm": 0.6564046144485474, "learning_rate": 0.0005121568627450981, "loss": 0.4136, "step": 17527 }, { "epoch": 9.79217877094972, "grad_norm": 0.5590186715126038, "learning_rate": 0.0005121288515406163, "loss": 0.5181, "step": 17528 }, { "epoch": 9.792737430167598, "grad_norm": 0.6540253758430481, "learning_rate": 0.0005121008403361345, "loss": 0.5555, "step": 17529 }, { "epoch": 9.793296089385475, "grad_norm": 1.2710096836090088, "learning_rate": 0.0005120728291316527, "loss": 0.5116, "step": 17530 }, { "epoch": 9.793854748603351, "grad_norm": 0.5380557775497437, "learning_rate": 0.0005120448179271708, "loss": 0.4605, "step": 17531 }, { "epoch": 9.79441340782123, "grad_norm": 0.8932152986526489, "learning_rate": 0.0005120168067226891, "loss": 0.4825, "step": 17532 }, { "epoch": 9.794972067039106, "grad_norm": 0.41872331500053406, "learning_rate": 0.0005119887955182073, "loss": 0.4765, "step": 17533 }, { "epoch": 9.795530726256983, "grad_norm": 2.593411445617676, "learning_rate": 0.0005119607843137255, "loss": 0.4638, "step": 17534 }, { "epoch": 9.796089385474861, "grad_norm": 1.1170240640640259, "learning_rate": 0.0005119327731092437, "loss": 0.4862, "step": 17535 }, { "epoch": 9.796648044692738, "grad_norm": 0.7942990064620972, "learning_rate": 0.0005119047619047619, "loss": 0.4025, "step": 17536 }, { "epoch": 9.797206703910614, "grad_norm": 0.38618552684783936, "learning_rate": 0.0005118767507002801, "loss": 0.3998, "step": 17537 }, { "epoch": 9.797765363128491, "grad_norm": 0.5663967132568359, "learning_rate": 0.0005118487394957983, "loss": 0.3549, "step": 17538 }, { "epoch": 9.79832402234637, "grad_norm": 0.4338151812553406, "learning_rate": 0.0005118207282913165, "loss": 0.5271, "step": 17539 }, { "epoch": 9.798882681564246, "grad_norm": 0.946584165096283, "learning_rate": 0.0005117927170868347, "loss": 0.5013, "step": 17540 }, { "epoch": 9.799441340782122, "grad_norm": 0.5233994722366333, "learning_rate": 0.000511764705882353, "loss": 0.4657, "step": 17541 }, { "epoch": 9.8, "grad_norm": 0.5305754542350769, "learning_rate": 0.0005117366946778712, "loss": 0.4774, "step": 17542 }, { "epoch": 9.800558659217877, "grad_norm": 0.564389169216156, "learning_rate": 0.0005117086834733894, "loss": 0.4033, "step": 17543 }, { "epoch": 9.801117318435754, "grad_norm": 0.37814152240753174, "learning_rate": 0.0005116806722689076, "loss": 0.3245, "step": 17544 }, { "epoch": 9.80167597765363, "grad_norm": 0.4387277364730835, "learning_rate": 0.0005116526610644258, "loss": 0.4064, "step": 17545 }, { "epoch": 9.802234636871509, "grad_norm": 0.5256638526916504, "learning_rate": 0.000511624649859944, "loss": 0.3621, "step": 17546 }, { "epoch": 9.802793296089385, "grad_norm": 0.40469667315483093, "learning_rate": 0.0005115966386554623, "loss": 0.4084, "step": 17547 }, { "epoch": 9.803351955307262, "grad_norm": 0.6191628575325012, "learning_rate": 0.0005115686274509804, "loss": 0.3368, "step": 17548 }, { "epoch": 9.80391061452514, "grad_norm": 0.3349907100200653, "learning_rate": 0.0005115406162464986, "loss": 0.3558, "step": 17549 }, { "epoch": 9.804469273743017, "grad_norm": 0.7227099537849426, "learning_rate": 0.0005115126050420168, "loss": 0.4658, "step": 17550 }, { "epoch": 9.805027932960893, "grad_norm": 0.33272337913513184, "learning_rate": 0.000511484593837535, "loss": 0.3493, "step": 17551 }, { "epoch": 9.805586592178772, "grad_norm": 0.6169582009315491, "learning_rate": 0.0005114565826330533, "loss": 0.4483, "step": 17552 }, { "epoch": 9.806145251396648, "grad_norm": 0.842114269733429, "learning_rate": 0.0005114285714285714, "loss": 0.4186, "step": 17553 }, { "epoch": 9.806703910614525, "grad_norm": 1.0288182497024536, "learning_rate": 0.0005114005602240896, "loss": 0.4989, "step": 17554 }, { "epoch": 9.807262569832401, "grad_norm": 0.45129239559173584, "learning_rate": 0.0005113725490196078, "loss": 0.4613, "step": 17555 }, { "epoch": 9.80782122905028, "grad_norm": 0.5221298336982727, "learning_rate": 0.000511344537815126, "loss": 0.428, "step": 17556 }, { "epoch": 9.808379888268156, "grad_norm": 0.5256417989730835, "learning_rate": 0.0005113165266106444, "loss": 0.5703, "step": 17557 }, { "epoch": 9.808938547486033, "grad_norm": 0.398550808429718, "learning_rate": 0.0005112885154061625, "loss": 0.4125, "step": 17558 }, { "epoch": 9.809497206703911, "grad_norm": 1.0827676057815552, "learning_rate": 0.0005112605042016807, "loss": 0.3728, "step": 17559 }, { "epoch": 9.810055865921788, "grad_norm": 0.6408460736274719, "learning_rate": 0.0005112324929971989, "loss": 0.3541, "step": 17560 }, { "epoch": 9.810614525139664, "grad_norm": 0.4745464026927948, "learning_rate": 0.0005112044817927171, "loss": 0.481, "step": 17561 }, { "epoch": 9.811173184357543, "grad_norm": 0.8619210720062256, "learning_rate": 0.0005111764705882354, "loss": 0.4815, "step": 17562 }, { "epoch": 9.81173184357542, "grad_norm": 0.5621064305305481, "learning_rate": 0.0005111484593837536, "loss": 0.5229, "step": 17563 }, { "epoch": 9.812290502793296, "grad_norm": 0.6314519643783569, "learning_rate": 0.0005111204481792717, "loss": 0.525, "step": 17564 }, { "epoch": 9.812849162011172, "grad_norm": 0.5272702574729919, "learning_rate": 0.0005110924369747899, "loss": 0.4177, "step": 17565 }, { "epoch": 9.81340782122905, "grad_norm": 1.2172542810440063, "learning_rate": 0.0005110644257703081, "loss": 0.5238, "step": 17566 }, { "epoch": 9.813966480446927, "grad_norm": 1.2464280128479004, "learning_rate": 0.0005110364145658264, "loss": 0.522, "step": 17567 }, { "epoch": 9.814525139664804, "grad_norm": 0.6002777218818665, "learning_rate": 0.0005110084033613446, "loss": 0.4432, "step": 17568 }, { "epoch": 9.815083798882682, "grad_norm": 1.5450071096420288, "learning_rate": 0.0005109803921568627, "loss": 0.4427, "step": 17569 }, { "epoch": 9.815642458100559, "grad_norm": 0.8230819702148438, "learning_rate": 0.0005109523809523809, "loss": 0.539, "step": 17570 }, { "epoch": 9.816201117318435, "grad_norm": 0.7993802428245544, "learning_rate": 0.0005109243697478991, "loss": 0.4118, "step": 17571 }, { "epoch": 9.816759776536312, "grad_norm": 0.5523188710212708, "learning_rate": 0.0005108963585434174, "loss": 0.5507, "step": 17572 }, { "epoch": 9.81731843575419, "grad_norm": 0.6646541357040405, "learning_rate": 0.0005108683473389357, "loss": 0.5483, "step": 17573 }, { "epoch": 9.817877094972067, "grad_norm": 0.49421900510787964, "learning_rate": 0.0005108403361344537, "loss": 0.4018, "step": 17574 }, { "epoch": 9.818435754189943, "grad_norm": 0.4183419644832611, "learning_rate": 0.000510812324929972, "loss": 0.3837, "step": 17575 }, { "epoch": 9.818994413407822, "grad_norm": 0.6737042665481567, "learning_rate": 0.0005107843137254902, "loss": 0.5087, "step": 17576 }, { "epoch": 9.819553072625698, "grad_norm": 1.118836522102356, "learning_rate": 0.0005107563025210085, "loss": 0.503, "step": 17577 }, { "epoch": 9.820111731843575, "grad_norm": 0.5079019069671631, "learning_rate": 0.0005107282913165267, "loss": 0.4871, "step": 17578 }, { "epoch": 9.820670391061453, "grad_norm": 0.6374537944793701, "learning_rate": 0.0005107002801120449, "loss": 0.4988, "step": 17579 }, { "epoch": 9.82122905027933, "grad_norm": 0.443655401468277, "learning_rate": 0.000510672268907563, "loss": 0.4187, "step": 17580 }, { "epoch": 9.821787709497206, "grad_norm": 0.44134342670440674, "learning_rate": 0.0005106442577030812, "loss": 0.4351, "step": 17581 }, { "epoch": 9.822346368715085, "grad_norm": 0.5324344635009766, "learning_rate": 0.0005106162464985995, "loss": 0.4203, "step": 17582 }, { "epoch": 9.822905027932961, "grad_norm": 0.5665680766105652, "learning_rate": 0.0005105882352941177, "loss": 0.5292, "step": 17583 }, { "epoch": 9.823463687150838, "grad_norm": 1.1033124923706055, "learning_rate": 0.0005105602240896359, "loss": 0.4381, "step": 17584 }, { "epoch": 9.824022346368714, "grad_norm": 0.8590704202651978, "learning_rate": 0.000510532212885154, "loss": 0.5774, "step": 17585 }, { "epoch": 9.824581005586593, "grad_norm": 0.9189087152481079, "learning_rate": 0.0005105042016806722, "loss": 0.4903, "step": 17586 }, { "epoch": 9.82513966480447, "grad_norm": 0.6272187232971191, "learning_rate": 0.0005104761904761905, "loss": 0.4192, "step": 17587 }, { "epoch": 9.825698324022346, "grad_norm": 1.2049518823623657, "learning_rate": 0.0005104481792717087, "loss": 0.4588, "step": 17588 }, { "epoch": 9.826256983240224, "grad_norm": 0.714314877986908, "learning_rate": 0.000510420168067227, "loss": 0.4723, "step": 17589 }, { "epoch": 9.8268156424581, "grad_norm": 2.792663335800171, "learning_rate": 0.000510392156862745, "loss": 0.4281, "step": 17590 }, { "epoch": 9.827374301675977, "grad_norm": 0.49839502573013306, "learning_rate": 0.0005103641456582633, "loss": 0.4482, "step": 17591 }, { "epoch": 9.827932960893854, "grad_norm": 0.45128345489501953, "learning_rate": 0.0005103361344537816, "loss": 0.4066, "step": 17592 }, { "epoch": 9.828491620111732, "grad_norm": 2.272136926651001, "learning_rate": 0.0005103081232492998, "loss": 0.6903, "step": 17593 }, { "epoch": 9.829050279329609, "grad_norm": 0.6582977175712585, "learning_rate": 0.000510280112044818, "loss": 0.5057, "step": 17594 }, { "epoch": 9.829608938547485, "grad_norm": 0.5356632471084595, "learning_rate": 0.0005102521008403362, "loss": 0.3518, "step": 17595 }, { "epoch": 9.830167597765364, "grad_norm": 0.5593933463096619, "learning_rate": 0.0005102240896358543, "loss": 0.4968, "step": 17596 }, { "epoch": 9.83072625698324, "grad_norm": 0.382621705532074, "learning_rate": 0.0005101960784313726, "loss": 0.3852, "step": 17597 }, { "epoch": 9.831284916201117, "grad_norm": 0.6879888772964478, "learning_rate": 0.0005101680672268908, "loss": 0.4928, "step": 17598 }, { "epoch": 9.831843575418995, "grad_norm": 1.9961494207382202, "learning_rate": 0.000510140056022409, "loss": 0.442, "step": 17599 }, { "epoch": 9.832402234636872, "grad_norm": 0.42393195629119873, "learning_rate": 0.0005101120448179272, "loss": 0.3717, "step": 17600 }, { "epoch": 9.832960893854748, "grad_norm": 0.7105714678764343, "learning_rate": 0.0005100840336134453, "loss": 0.6116, "step": 17601 }, { "epoch": 9.833519553072625, "grad_norm": 0.38168278336524963, "learning_rate": 0.0005100560224089636, "loss": 0.4296, "step": 17602 }, { "epoch": 9.834078212290503, "grad_norm": 3.837827444076538, "learning_rate": 0.0005100280112044818, "loss": 0.5037, "step": 17603 }, { "epoch": 9.83463687150838, "grad_norm": 0.43454086780548096, "learning_rate": 0.00051, "loss": 0.4355, "step": 17604 }, { "epoch": 9.835195530726256, "grad_norm": 0.48860910534858704, "learning_rate": 0.0005099719887955182, "loss": 0.4094, "step": 17605 }, { "epoch": 9.835754189944135, "grad_norm": 1.1021904945373535, "learning_rate": 0.0005099439775910363, "loss": 0.4156, "step": 17606 }, { "epoch": 9.836312849162011, "grad_norm": 0.42669644951820374, "learning_rate": 0.0005099159663865547, "loss": 0.4312, "step": 17607 }, { "epoch": 9.836871508379888, "grad_norm": 1.3057827949523926, "learning_rate": 0.0005098879551820729, "loss": 0.3712, "step": 17608 }, { "epoch": 9.837430167597766, "grad_norm": 0.5693615674972534, "learning_rate": 0.0005098599439775911, "loss": 0.4627, "step": 17609 }, { "epoch": 9.837988826815643, "grad_norm": 0.47908318042755127, "learning_rate": 0.0005098319327731093, "loss": 0.4758, "step": 17610 }, { "epoch": 9.83854748603352, "grad_norm": 1.0456914901733398, "learning_rate": 0.0005098039215686275, "loss": 0.4804, "step": 17611 }, { "epoch": 9.839106145251396, "grad_norm": 0.5318778157234192, "learning_rate": 0.0005097759103641457, "loss": 0.4948, "step": 17612 }, { "epoch": 9.839664804469274, "grad_norm": 0.7408348321914673, "learning_rate": 0.0005097478991596639, "loss": 0.5706, "step": 17613 }, { "epoch": 9.84022346368715, "grad_norm": 0.38129833340644836, "learning_rate": 0.0005097198879551821, "loss": 0.4359, "step": 17614 }, { "epoch": 9.840782122905027, "grad_norm": 0.4278886020183563, "learning_rate": 0.0005096918767507003, "loss": 0.4706, "step": 17615 }, { "epoch": 9.841340782122906, "grad_norm": 0.5908858776092529, "learning_rate": 0.0005096638655462185, "loss": 0.4086, "step": 17616 }, { "epoch": 9.841899441340782, "grad_norm": 0.463882714509964, "learning_rate": 0.0005096358543417367, "loss": 0.4633, "step": 17617 }, { "epoch": 9.842458100558659, "grad_norm": 0.5576109290122986, "learning_rate": 0.0005096078431372549, "loss": 0.4767, "step": 17618 }, { "epoch": 9.843016759776535, "grad_norm": 2.875582695007324, "learning_rate": 0.0005095798319327731, "loss": 0.4883, "step": 17619 }, { "epoch": 9.843575418994414, "grad_norm": 1.7783489227294922, "learning_rate": 0.0005095518207282913, "loss": 0.5467, "step": 17620 }, { "epoch": 9.84413407821229, "grad_norm": 0.44130298495292664, "learning_rate": 0.0005095238095238095, "loss": 0.3833, "step": 17621 }, { "epoch": 9.844692737430167, "grad_norm": 0.5307114124298096, "learning_rate": 0.0005094957983193277, "loss": 0.4824, "step": 17622 }, { "epoch": 9.845251396648045, "grad_norm": 0.5682210326194763, "learning_rate": 0.000509467787114846, "loss": 0.393, "step": 17623 }, { "epoch": 9.845810055865922, "grad_norm": 0.5218472480773926, "learning_rate": 0.0005094397759103642, "loss": 0.3546, "step": 17624 }, { "epoch": 9.846368715083798, "grad_norm": 0.38048240542411804, "learning_rate": 0.0005094117647058824, "loss": 0.3798, "step": 17625 }, { "epoch": 9.846927374301677, "grad_norm": 0.3587186932563782, "learning_rate": 0.0005093837535014006, "loss": 0.4206, "step": 17626 }, { "epoch": 9.847486033519553, "grad_norm": 0.9400306940078735, "learning_rate": 0.0005093557422969188, "loss": 0.5953, "step": 17627 }, { "epoch": 9.84804469273743, "grad_norm": 0.46474337577819824, "learning_rate": 0.000509327731092437, "loss": 0.3993, "step": 17628 }, { "epoch": 9.848603351955306, "grad_norm": 0.6591094732284546, "learning_rate": 0.0005092997198879552, "loss": 0.4134, "step": 17629 }, { "epoch": 9.849162011173185, "grad_norm": 2.574821710586548, "learning_rate": 0.0005092717086834734, "loss": 0.4574, "step": 17630 }, { "epoch": 9.849720670391061, "grad_norm": 0.8347105979919434, "learning_rate": 0.0005092436974789916, "loss": 0.5108, "step": 17631 }, { "epoch": 9.850279329608938, "grad_norm": 0.49660438299179077, "learning_rate": 0.0005092156862745098, "loss": 0.4522, "step": 17632 }, { "epoch": 9.850837988826816, "grad_norm": 0.4733448028564453, "learning_rate": 0.000509187675070028, "loss": 0.5828, "step": 17633 }, { "epoch": 9.851396648044693, "grad_norm": 0.5776231288909912, "learning_rate": 0.0005091596638655462, "loss": 0.382, "step": 17634 }, { "epoch": 9.85195530726257, "grad_norm": 0.44750189781188965, "learning_rate": 0.0005091316526610644, "loss": 0.4602, "step": 17635 }, { "epoch": 9.852513966480448, "grad_norm": 0.5970128178596497, "learning_rate": 0.0005091036414565826, "loss": 0.4939, "step": 17636 }, { "epoch": 9.853072625698324, "grad_norm": 0.9238325953483582, "learning_rate": 0.0005090756302521008, "loss": 0.4096, "step": 17637 }, { "epoch": 9.8536312849162, "grad_norm": 1.023821234703064, "learning_rate": 0.000509047619047619, "loss": 0.4228, "step": 17638 }, { "epoch": 9.854189944134077, "grad_norm": 0.6652180552482605, "learning_rate": 0.0005090196078431372, "loss": 0.5411, "step": 17639 }, { "epoch": 9.854748603351956, "grad_norm": 0.41319939494132996, "learning_rate": 0.0005089915966386555, "loss": 0.416, "step": 17640 }, { "epoch": 9.855307262569832, "grad_norm": 1.6668133735656738, "learning_rate": 0.0005089635854341737, "loss": 0.5576, "step": 17641 }, { "epoch": 9.855865921787709, "grad_norm": 0.5754125714302063, "learning_rate": 0.0005089355742296919, "loss": 0.4303, "step": 17642 }, { "epoch": 9.856424581005587, "grad_norm": 0.8854314684867859, "learning_rate": 0.0005089075630252102, "loss": 0.4667, "step": 17643 }, { "epoch": 9.856983240223464, "grad_norm": 1.496046781539917, "learning_rate": 0.0005088795518207283, "loss": 0.4558, "step": 17644 }, { "epoch": 9.85754189944134, "grad_norm": 0.5817050933837891, "learning_rate": 0.0005088515406162465, "loss": 0.3912, "step": 17645 }, { "epoch": 9.858100558659217, "grad_norm": 0.7718605995178223, "learning_rate": 0.0005088235294117647, "loss": 0.4674, "step": 17646 }, { "epoch": 9.858659217877095, "grad_norm": 0.791430652141571, "learning_rate": 0.0005087955182072829, "loss": 0.3752, "step": 17647 }, { "epoch": 9.859217877094972, "grad_norm": 0.718021035194397, "learning_rate": 0.0005087675070028012, "loss": 0.4314, "step": 17648 }, { "epoch": 9.859776536312848, "grad_norm": 0.7523576021194458, "learning_rate": 0.0005087394957983193, "loss": 0.3345, "step": 17649 }, { "epoch": 9.860335195530727, "grad_norm": 0.6502276062965393, "learning_rate": 0.0005087114845938375, "loss": 0.4004, "step": 17650 }, { "epoch": 9.860893854748603, "grad_norm": 0.5218349099159241, "learning_rate": 0.0005086834733893557, "loss": 0.4006, "step": 17651 }, { "epoch": 9.86145251396648, "grad_norm": 14.275579452514648, "learning_rate": 0.0005086554621848739, "loss": 0.417, "step": 17652 }, { "epoch": 9.862011173184358, "grad_norm": 0.4533391296863556, "learning_rate": 0.0005086274509803922, "loss": 0.3713, "step": 17653 }, { "epoch": 9.862569832402235, "grad_norm": 0.5260671973228455, "learning_rate": 0.0005085994397759103, "loss": 0.3648, "step": 17654 }, { "epoch": 9.863128491620111, "grad_norm": 0.684863269329071, "learning_rate": 0.0005085714285714285, "loss": 0.3956, "step": 17655 }, { "epoch": 9.86368715083799, "grad_norm": 0.43199965357780457, "learning_rate": 0.0005085434173669467, "loss": 0.4028, "step": 17656 }, { "epoch": 9.864245810055866, "grad_norm": 0.48945263028144836, "learning_rate": 0.000508515406162465, "loss": 0.4304, "step": 17657 }, { "epoch": 9.864804469273743, "grad_norm": 0.4334677755832672, "learning_rate": 0.0005084873949579833, "loss": 0.4946, "step": 17658 }, { "epoch": 9.86536312849162, "grad_norm": 0.6097220778465271, "learning_rate": 0.0005084593837535015, "loss": 0.4646, "step": 17659 }, { "epoch": 9.865921787709498, "grad_norm": 0.5355980396270752, "learning_rate": 0.0005084313725490196, "loss": 0.5117, "step": 17660 }, { "epoch": 9.866480446927374, "grad_norm": 0.7096639275550842, "learning_rate": 0.0005084033613445378, "loss": 0.3179, "step": 17661 }, { "epoch": 9.867039106145251, "grad_norm": 1.6744120121002197, "learning_rate": 0.000508375350140056, "loss": 0.5419, "step": 17662 }, { "epoch": 9.86759776536313, "grad_norm": 0.6097776889801025, "learning_rate": 0.0005083473389355743, "loss": 0.4162, "step": 17663 }, { "epoch": 9.868156424581006, "grad_norm": 3.0255117416381836, "learning_rate": 0.0005083193277310925, "loss": 0.4343, "step": 17664 }, { "epoch": 9.868715083798882, "grad_norm": 0.4438476860523224, "learning_rate": 0.0005082913165266106, "loss": 0.4919, "step": 17665 }, { "epoch": 9.869273743016759, "grad_norm": 0.6422849893569946, "learning_rate": 0.0005082633053221288, "loss": 0.5293, "step": 17666 }, { "epoch": 9.869832402234637, "grad_norm": 1.6900489330291748, "learning_rate": 0.000508235294117647, "loss": 0.397, "step": 17667 }, { "epoch": 9.870391061452514, "grad_norm": 0.5288065075874329, "learning_rate": 0.0005082072829131653, "loss": 0.413, "step": 17668 }, { "epoch": 9.87094972067039, "grad_norm": 0.9630688428878784, "learning_rate": 0.0005081792717086835, "loss": 0.3634, "step": 17669 }, { "epoch": 9.871508379888269, "grad_norm": 0.4045056700706482, "learning_rate": 0.0005081512605042016, "loss": 0.3873, "step": 17670 }, { "epoch": 9.872067039106145, "grad_norm": 0.6144238114356995, "learning_rate": 0.0005081232492997198, "loss": 0.4214, "step": 17671 }, { "epoch": 9.872625698324022, "grad_norm": 0.7358296513557434, "learning_rate": 0.000508095238095238, "loss": 0.5928, "step": 17672 }, { "epoch": 9.8731843575419, "grad_norm": 0.37702804803848267, "learning_rate": 0.0005080672268907564, "loss": 0.3447, "step": 17673 }, { "epoch": 9.873743016759777, "grad_norm": 0.3421069085597992, "learning_rate": 0.0005080392156862746, "loss": 0.4447, "step": 17674 }, { "epoch": 9.874301675977653, "grad_norm": 1.2603915929794312, "learning_rate": 0.0005080112044817928, "loss": 0.3705, "step": 17675 }, { "epoch": 9.87486033519553, "grad_norm": 0.4427424967288971, "learning_rate": 0.0005079831932773109, "loss": 0.3877, "step": 17676 }, { "epoch": 9.875418994413408, "grad_norm": 0.4370991885662079, "learning_rate": 0.0005079551820728291, "loss": 0.3291, "step": 17677 }, { "epoch": 9.875977653631285, "grad_norm": 4.742631435394287, "learning_rate": 0.0005079271708683474, "loss": 0.3813, "step": 17678 }, { "epoch": 9.876536312849161, "grad_norm": 0.4913809597492218, "learning_rate": 0.0005078991596638656, "loss": 0.4501, "step": 17679 }, { "epoch": 9.87709497206704, "grad_norm": 0.40640562772750854, "learning_rate": 0.0005078711484593838, "loss": 0.48, "step": 17680 }, { "epoch": 9.877653631284916, "grad_norm": 0.5571386814117432, "learning_rate": 0.0005078431372549019, "loss": 0.5487, "step": 17681 }, { "epoch": 9.878212290502793, "grad_norm": 0.6430655121803284, "learning_rate": 0.0005078151260504201, "loss": 0.435, "step": 17682 }, { "epoch": 9.878770949720671, "grad_norm": 0.8957878351211548, "learning_rate": 0.0005077871148459384, "loss": 0.432, "step": 17683 }, { "epoch": 9.879329608938548, "grad_norm": 0.3793388605117798, "learning_rate": 0.0005077591036414566, "loss": 0.4078, "step": 17684 }, { "epoch": 9.879888268156424, "grad_norm": 0.4135552644729614, "learning_rate": 0.0005077310924369748, "loss": 0.3916, "step": 17685 }, { "epoch": 9.880446927374301, "grad_norm": 0.6391292810440063, "learning_rate": 0.0005077030812324929, "loss": 0.4032, "step": 17686 }, { "epoch": 9.88100558659218, "grad_norm": 3.5902090072631836, "learning_rate": 0.0005076750700280111, "loss": 0.5199, "step": 17687 }, { "epoch": 9.881564245810056, "grad_norm": 0.6722155809402466, "learning_rate": 0.0005076470588235294, "loss": 0.4596, "step": 17688 }, { "epoch": 9.882122905027932, "grad_norm": 0.5453365445137024, "learning_rate": 0.0005076190476190477, "loss": 0.3791, "step": 17689 }, { "epoch": 9.88268156424581, "grad_norm": 0.49773773550987244, "learning_rate": 0.0005075910364145659, "loss": 0.3436, "step": 17690 }, { "epoch": 9.883240223463687, "grad_norm": 1.6311397552490234, "learning_rate": 0.0005075630252100841, "loss": 0.4237, "step": 17691 }, { "epoch": 9.883798882681564, "grad_norm": 0.4438299238681793, "learning_rate": 0.0005075350140056022, "loss": 0.3747, "step": 17692 }, { "epoch": 9.88435754189944, "grad_norm": 0.45684701204299927, "learning_rate": 0.0005075070028011205, "loss": 0.4337, "step": 17693 }, { "epoch": 9.884916201117319, "grad_norm": 1.1657993793487549, "learning_rate": 0.0005074789915966387, "loss": 0.5087, "step": 17694 }, { "epoch": 9.885474860335195, "grad_norm": 0.5270035266876221, "learning_rate": 0.0005074509803921569, "loss": 0.499, "step": 17695 }, { "epoch": 9.886033519553072, "grad_norm": 0.397850900888443, "learning_rate": 0.0005074229691876751, "loss": 0.4688, "step": 17696 }, { "epoch": 9.88659217877095, "grad_norm": 0.4842422902584076, "learning_rate": 0.0005073949579831932, "loss": 0.456, "step": 17697 }, { "epoch": 9.887150837988827, "grad_norm": 0.6004319190979004, "learning_rate": 0.0005073669467787115, "loss": 0.3446, "step": 17698 }, { "epoch": 9.887709497206703, "grad_norm": 0.41606444120407104, "learning_rate": 0.0005073389355742297, "loss": 0.3644, "step": 17699 }, { "epoch": 9.888268156424582, "grad_norm": 0.5245066285133362, "learning_rate": 0.0005073109243697479, "loss": 0.4748, "step": 17700 }, { "epoch": 9.888826815642458, "grad_norm": 0.5773292183876038, "learning_rate": 0.0005072829131652661, "loss": 0.4035, "step": 17701 }, { "epoch": 9.889385474860335, "grad_norm": 0.5196065902709961, "learning_rate": 0.0005072549019607842, "loss": 0.5584, "step": 17702 }, { "epoch": 9.889944134078211, "grad_norm": 0.6403607130050659, "learning_rate": 0.0005072268907563025, "loss": 0.4928, "step": 17703 }, { "epoch": 9.89050279329609, "grad_norm": 1.6563434600830078, "learning_rate": 0.0005071988795518207, "loss": 0.3383, "step": 17704 }, { "epoch": 9.891061452513966, "grad_norm": 0.5171833038330078, "learning_rate": 0.000507170868347339, "loss": 0.3978, "step": 17705 }, { "epoch": 9.891620111731843, "grad_norm": 0.5451369881629944, "learning_rate": 0.0005071428571428572, "loss": 0.4419, "step": 17706 }, { "epoch": 9.892178770949721, "grad_norm": 0.7001411318778992, "learning_rate": 0.0005071148459383754, "loss": 0.4389, "step": 17707 }, { "epoch": 9.892737430167598, "grad_norm": 0.5099281072616577, "learning_rate": 0.0005070868347338936, "loss": 0.4806, "step": 17708 }, { "epoch": 9.893296089385474, "grad_norm": 0.9550559520721436, "learning_rate": 0.0005070588235294118, "loss": 0.3437, "step": 17709 }, { "epoch": 9.893854748603353, "grad_norm": 1.1679702997207642, "learning_rate": 0.00050703081232493, "loss": 0.5546, "step": 17710 }, { "epoch": 9.89441340782123, "grad_norm": 0.42507076263427734, "learning_rate": 0.0005070028011204482, "loss": 0.4687, "step": 17711 }, { "epoch": 9.894972067039106, "grad_norm": 0.6230679750442505, "learning_rate": 0.0005069747899159664, "loss": 0.4307, "step": 17712 }, { "epoch": 9.895530726256982, "grad_norm": 0.46311771869659424, "learning_rate": 0.0005069467787114846, "loss": 0.4823, "step": 17713 }, { "epoch": 9.89608938547486, "grad_norm": 0.2997567057609558, "learning_rate": 0.0005069187675070028, "loss": 0.3435, "step": 17714 }, { "epoch": 9.896648044692737, "grad_norm": 0.5212675333023071, "learning_rate": 0.000506890756302521, "loss": 0.3867, "step": 17715 }, { "epoch": 9.897206703910614, "grad_norm": 1.2878317832946777, "learning_rate": 0.0005068627450980392, "loss": 0.4853, "step": 17716 }, { "epoch": 9.897765363128492, "grad_norm": 1.5933674573898315, "learning_rate": 0.0005068347338935574, "loss": 0.4322, "step": 17717 }, { "epoch": 9.898324022346369, "grad_norm": 0.6506137847900391, "learning_rate": 0.0005068067226890757, "loss": 0.5069, "step": 17718 }, { "epoch": 9.898882681564245, "grad_norm": 0.5195789933204651, "learning_rate": 0.0005067787114845938, "loss": 0.3979, "step": 17719 }, { "epoch": 9.899441340782122, "grad_norm": 0.635689914226532, "learning_rate": 0.000506750700280112, "loss": 0.3611, "step": 17720 }, { "epoch": 9.9, "grad_norm": 0.41705521941185, "learning_rate": 0.0005067226890756302, "loss": 0.4093, "step": 17721 }, { "epoch": 9.900558659217877, "grad_norm": 0.5046812891960144, "learning_rate": 0.0005066946778711485, "loss": 0.4071, "step": 17722 }, { "epoch": 9.901117318435753, "grad_norm": 0.5913689136505127, "learning_rate": 0.0005066666666666668, "loss": 0.5093, "step": 17723 }, { "epoch": 9.901675977653632, "grad_norm": 0.5863388180732727, "learning_rate": 0.0005066386554621849, "loss": 0.495, "step": 17724 }, { "epoch": 9.902234636871508, "grad_norm": 0.4121083617210388, "learning_rate": 0.0005066106442577031, "loss": 0.5517, "step": 17725 }, { "epoch": 9.902793296089385, "grad_norm": 0.4791109263896942, "learning_rate": 0.0005065826330532213, "loss": 0.4433, "step": 17726 }, { "epoch": 9.903351955307263, "grad_norm": 0.557059109210968, "learning_rate": 0.0005065546218487395, "loss": 0.3982, "step": 17727 }, { "epoch": 9.90391061452514, "grad_norm": 0.4848213195800781, "learning_rate": 0.0005065266106442578, "loss": 0.5228, "step": 17728 }, { "epoch": 9.904469273743016, "grad_norm": 0.439617782831192, "learning_rate": 0.0005064985994397759, "loss": 0.3029, "step": 17729 }, { "epoch": 9.905027932960895, "grad_norm": 0.6478733420372009, "learning_rate": 0.0005064705882352941, "loss": 0.5194, "step": 17730 }, { "epoch": 9.905586592178771, "grad_norm": 0.3450714945793152, "learning_rate": 0.0005064425770308123, "loss": 0.419, "step": 17731 }, { "epoch": 9.906145251396648, "grad_norm": 0.6491840481758118, "learning_rate": 0.0005064145658263305, "loss": 0.6409, "step": 17732 }, { "epoch": 9.906703910614524, "grad_norm": 0.6130540370941162, "learning_rate": 0.0005063865546218488, "loss": 0.3175, "step": 17733 }, { "epoch": 9.907262569832403, "grad_norm": 0.4240414798259735, "learning_rate": 0.000506358543417367, "loss": 0.3694, "step": 17734 }, { "epoch": 9.90782122905028, "grad_norm": 0.44559845328330994, "learning_rate": 0.0005063305322128851, "loss": 0.5907, "step": 17735 }, { "epoch": 9.908379888268156, "grad_norm": 0.8267097473144531, "learning_rate": 0.0005063025210084033, "loss": 0.4164, "step": 17736 }, { "epoch": 9.908938547486034, "grad_norm": 0.5043236017227173, "learning_rate": 0.0005062745098039215, "loss": 0.4432, "step": 17737 }, { "epoch": 9.90949720670391, "grad_norm": 0.6198955178260803, "learning_rate": 0.0005062464985994399, "loss": 0.3964, "step": 17738 }, { "epoch": 9.910055865921787, "grad_norm": 0.41644683480262756, "learning_rate": 0.0005062184873949581, "loss": 0.4098, "step": 17739 }, { "epoch": 9.910614525139664, "grad_norm": 0.6375578045845032, "learning_rate": 0.0005061904761904762, "loss": 0.3837, "step": 17740 }, { "epoch": 9.911173184357542, "grad_norm": 0.6016095876693726, "learning_rate": 0.0005061624649859944, "loss": 0.4679, "step": 17741 }, { "epoch": 9.911731843575419, "grad_norm": 0.9189779758453369, "learning_rate": 0.0005061344537815126, "loss": 0.4224, "step": 17742 }, { "epoch": 9.912290502793295, "grad_norm": 0.5817536115646362, "learning_rate": 0.0005061064425770309, "loss": 0.4614, "step": 17743 }, { "epoch": 9.912849162011174, "grad_norm": 0.528851330280304, "learning_rate": 0.0005060784313725491, "loss": 0.479, "step": 17744 }, { "epoch": 9.91340782122905, "grad_norm": 0.576828122138977, "learning_rate": 0.0005060504201680672, "loss": 0.4654, "step": 17745 }, { "epoch": 9.913966480446927, "grad_norm": 0.39457982778549194, "learning_rate": 0.0005060224089635854, "loss": 0.4541, "step": 17746 }, { "epoch": 9.914525139664804, "grad_norm": 1.1841816902160645, "learning_rate": 0.0005059943977591036, "loss": 0.3473, "step": 17747 }, { "epoch": 9.915083798882682, "grad_norm": 4.332278728485107, "learning_rate": 0.0005059663865546219, "loss": 0.5184, "step": 17748 }, { "epoch": 9.915642458100558, "grad_norm": 0.4184720814228058, "learning_rate": 0.0005059383753501401, "loss": 0.4291, "step": 17749 }, { "epoch": 9.916201117318435, "grad_norm": 0.4247817099094391, "learning_rate": 0.0005059103641456583, "loss": 0.5183, "step": 17750 }, { "epoch": 9.916759776536313, "grad_norm": 0.6020652651786804, "learning_rate": 0.0005058823529411764, "loss": 0.3901, "step": 17751 }, { "epoch": 9.91731843575419, "grad_norm": 0.3913191258907318, "learning_rate": 0.0005058543417366946, "loss": 0.3606, "step": 17752 }, { "epoch": 9.917877094972066, "grad_norm": 0.7320433855056763, "learning_rate": 0.000505826330532213, "loss": 0.4539, "step": 17753 }, { "epoch": 9.918435754189945, "grad_norm": 0.4420003890991211, "learning_rate": 0.0005057983193277312, "loss": 0.4493, "step": 17754 }, { "epoch": 9.918994413407821, "grad_norm": 0.504277765750885, "learning_rate": 0.0005057703081232494, "loss": 0.3642, "step": 17755 }, { "epoch": 9.919553072625698, "grad_norm": 0.5234881043434143, "learning_rate": 0.0005057422969187675, "loss": 0.3795, "step": 17756 }, { "epoch": 9.920111731843576, "grad_norm": 0.9350456595420837, "learning_rate": 0.0005057142857142857, "loss": 0.378, "step": 17757 }, { "epoch": 9.920670391061453, "grad_norm": 0.3998241722583771, "learning_rate": 0.000505686274509804, "loss": 0.3876, "step": 17758 }, { "epoch": 9.92122905027933, "grad_norm": 0.5045170783996582, "learning_rate": 0.0005056582633053222, "loss": 0.4123, "step": 17759 }, { "epoch": 9.921787709497206, "grad_norm": 2.2669641971588135, "learning_rate": 0.0005056302521008404, "loss": 0.3603, "step": 17760 }, { "epoch": 9.922346368715084, "grad_norm": 0.5163483619689941, "learning_rate": 0.0005056022408963585, "loss": 0.4299, "step": 17761 }, { "epoch": 9.922905027932961, "grad_norm": 1.0533607006072998, "learning_rate": 0.0005055742296918767, "loss": 0.4692, "step": 17762 }, { "epoch": 9.923463687150837, "grad_norm": 0.44947054982185364, "learning_rate": 0.000505546218487395, "loss": 0.4444, "step": 17763 }, { "epoch": 9.924022346368716, "grad_norm": 0.4417736530303955, "learning_rate": 0.0005055182072829132, "loss": 0.3622, "step": 17764 }, { "epoch": 9.924581005586592, "grad_norm": 0.6334666609764099, "learning_rate": 0.0005054901960784314, "loss": 0.5589, "step": 17765 }, { "epoch": 9.925139664804469, "grad_norm": 0.33501386642456055, "learning_rate": 0.0005054621848739496, "loss": 0.2728, "step": 17766 }, { "epoch": 9.925698324022346, "grad_norm": 0.445417582988739, "learning_rate": 0.0005054341736694677, "loss": 0.532, "step": 17767 }, { "epoch": 9.926256983240224, "grad_norm": 0.4315336346626282, "learning_rate": 0.000505406162464986, "loss": 0.477, "step": 17768 }, { "epoch": 9.9268156424581, "grad_norm": 0.543921947479248, "learning_rate": 0.0005053781512605042, "loss": 0.3535, "step": 17769 }, { "epoch": 9.927374301675977, "grad_norm": 0.3253154158592224, "learning_rate": 0.0005053501400560224, "loss": 0.3135, "step": 17770 }, { "epoch": 9.927932960893855, "grad_norm": 0.5165511965751648, "learning_rate": 0.0005053221288515407, "loss": 0.3982, "step": 17771 }, { "epoch": 9.928491620111732, "grad_norm": 0.4735526442527771, "learning_rate": 0.0005052941176470588, "loss": 0.4189, "step": 17772 }, { "epoch": 9.929050279329608, "grad_norm": 0.3920416235923767, "learning_rate": 0.0005052661064425771, "loss": 0.3485, "step": 17773 }, { "epoch": 9.929608938547487, "grad_norm": 0.41889169812202454, "learning_rate": 0.0005052380952380953, "loss": 0.47, "step": 17774 }, { "epoch": 9.930167597765363, "grad_norm": 0.5417645573616028, "learning_rate": 0.0005052100840336135, "loss": 0.3252, "step": 17775 }, { "epoch": 9.93072625698324, "grad_norm": 1.3262546062469482, "learning_rate": 0.0005051820728291317, "loss": 0.3659, "step": 17776 }, { "epoch": 9.931284916201117, "grad_norm": 0.5170767307281494, "learning_rate": 0.0005051540616246498, "loss": 0.3634, "step": 17777 }, { "epoch": 9.931843575418995, "grad_norm": 0.41425251960754395, "learning_rate": 0.0005051260504201681, "loss": 0.3723, "step": 17778 }, { "epoch": 9.932402234636871, "grad_norm": 0.4712209701538086, "learning_rate": 0.0005050980392156863, "loss": 0.4487, "step": 17779 }, { "epoch": 9.932960893854748, "grad_norm": 0.43091684579849243, "learning_rate": 0.0005050700280112045, "loss": 0.397, "step": 17780 }, { "epoch": 9.933519553072626, "grad_norm": 0.6392014026641846, "learning_rate": 0.0005050420168067227, "loss": 0.4536, "step": 17781 }, { "epoch": 9.934078212290503, "grad_norm": 5.059877872467041, "learning_rate": 0.0005050140056022409, "loss": 0.5352, "step": 17782 }, { "epoch": 9.93463687150838, "grad_norm": 0.5430557131767273, "learning_rate": 0.0005049859943977591, "loss": 0.4014, "step": 17783 }, { "epoch": 9.935195530726258, "grad_norm": 0.5238717794418335, "learning_rate": 0.0005049579831932773, "loss": 0.4468, "step": 17784 }, { "epoch": 9.935754189944134, "grad_norm": 0.6257060170173645, "learning_rate": 0.0005049299719887955, "loss": 0.5419, "step": 17785 }, { "epoch": 9.936312849162011, "grad_norm": 0.6622463464736938, "learning_rate": 0.0005049019607843137, "loss": 0.4419, "step": 17786 }, { "epoch": 9.936871508379888, "grad_norm": 0.6698209643363953, "learning_rate": 0.000504873949579832, "loss": 0.4104, "step": 17787 }, { "epoch": 9.937430167597766, "grad_norm": 0.6647917032241821, "learning_rate": 0.0005048459383753502, "loss": 0.4414, "step": 17788 }, { "epoch": 9.937988826815642, "grad_norm": 1.059608817100525, "learning_rate": 0.0005048179271708684, "loss": 0.3558, "step": 17789 }, { "epoch": 9.938547486033519, "grad_norm": 1.3215471506118774, "learning_rate": 0.0005047899159663866, "loss": 0.4392, "step": 17790 }, { "epoch": 9.939106145251397, "grad_norm": 0.42416998744010925, "learning_rate": 0.0005047619047619048, "loss": 0.4676, "step": 17791 }, { "epoch": 9.939664804469274, "grad_norm": 0.5323529839515686, "learning_rate": 0.000504733893557423, "loss": 0.4977, "step": 17792 }, { "epoch": 9.94022346368715, "grad_norm": 0.4561229348182678, "learning_rate": 0.0005047058823529412, "loss": 0.3788, "step": 17793 }, { "epoch": 9.940782122905027, "grad_norm": 0.550279974937439, "learning_rate": 0.0005046778711484594, "loss": 0.4523, "step": 17794 }, { "epoch": 9.941340782122905, "grad_norm": 0.47205206751823425, "learning_rate": 0.0005046498599439776, "loss": 0.4351, "step": 17795 }, { "epoch": 9.941899441340782, "grad_norm": 0.3215457499027252, "learning_rate": 0.0005046218487394958, "loss": 0.3374, "step": 17796 }, { "epoch": 9.942458100558659, "grad_norm": 0.7078421711921692, "learning_rate": 0.000504593837535014, "loss": 0.368, "step": 17797 }, { "epoch": 9.943016759776537, "grad_norm": 0.777804434299469, "learning_rate": 0.0005045658263305323, "loss": 0.4869, "step": 17798 }, { "epoch": 9.943575418994413, "grad_norm": 0.4482170343399048, "learning_rate": 0.0005045378151260504, "loss": 0.3924, "step": 17799 }, { "epoch": 9.94413407821229, "grad_norm": 0.5563918352127075, "learning_rate": 0.0005045098039215686, "loss": 0.4068, "step": 17800 }, { "epoch": 9.944692737430168, "grad_norm": 1.5845192670822144, "learning_rate": 0.0005044817927170868, "loss": 0.5286, "step": 17801 }, { "epoch": 9.945251396648045, "grad_norm": 0.4471222460269928, "learning_rate": 0.000504453781512605, "loss": 0.3576, "step": 17802 }, { "epoch": 9.945810055865921, "grad_norm": 1.390992283821106, "learning_rate": 0.0005044257703081234, "loss": 0.6128, "step": 17803 }, { "epoch": 9.946368715083798, "grad_norm": 0.4919310510158539, "learning_rate": 0.0005043977591036415, "loss": 0.4478, "step": 17804 }, { "epoch": 9.946927374301676, "grad_norm": 0.6390125155448914, "learning_rate": 0.0005043697478991597, "loss": 0.5085, "step": 17805 }, { "epoch": 9.947486033519553, "grad_norm": 0.7367087006568909, "learning_rate": 0.0005043417366946779, "loss": 0.3303, "step": 17806 }, { "epoch": 9.94804469273743, "grad_norm": 0.4817686676979065, "learning_rate": 0.0005043137254901961, "loss": 0.5774, "step": 17807 }, { "epoch": 9.948603351955308, "grad_norm": 0.4301929771900177, "learning_rate": 0.0005042857142857144, "loss": 0.4102, "step": 17808 }, { "epoch": 9.949162011173184, "grad_norm": 0.44621533155441284, "learning_rate": 0.0005042577030812325, "loss": 0.3929, "step": 17809 }, { "epoch": 9.949720670391061, "grad_norm": 0.5734425783157349, "learning_rate": 0.0005042296918767507, "loss": 0.4805, "step": 17810 }, { "epoch": 9.95027932960894, "grad_norm": 0.4563545882701874, "learning_rate": 0.0005042016806722689, "loss": 0.4574, "step": 17811 }, { "epoch": 9.950837988826816, "grad_norm": 0.6577954888343811, "learning_rate": 0.0005041736694677871, "loss": 0.3443, "step": 17812 }, { "epoch": 9.951396648044692, "grad_norm": 1.6626604795455933, "learning_rate": 0.0005041456582633054, "loss": 0.4839, "step": 17813 }, { "epoch": 9.951955307262569, "grad_norm": 0.44711384177207947, "learning_rate": 0.0005041176470588236, "loss": 0.4409, "step": 17814 }, { "epoch": 9.952513966480447, "grad_norm": 0.5573927760124207, "learning_rate": 0.0005040896358543417, "loss": 0.5416, "step": 17815 }, { "epoch": 9.953072625698324, "grad_norm": 0.5896942019462585, "learning_rate": 0.0005040616246498599, "loss": 0.4672, "step": 17816 }, { "epoch": 9.9536312849162, "grad_norm": 0.5028023719787598, "learning_rate": 0.0005040336134453781, "loss": 0.3652, "step": 17817 }, { "epoch": 9.954189944134079, "grad_norm": 0.5095908045768738, "learning_rate": 0.0005040056022408964, "loss": 0.3766, "step": 17818 }, { "epoch": 9.954748603351955, "grad_norm": 0.6158854365348816, "learning_rate": 0.0005039775910364147, "loss": 0.4199, "step": 17819 }, { "epoch": 9.955307262569832, "grad_norm": 0.6902062296867371, "learning_rate": 0.0005039495798319327, "loss": 0.4139, "step": 17820 }, { "epoch": 9.955865921787709, "grad_norm": 0.5172967910766602, "learning_rate": 0.000503921568627451, "loss": 0.5466, "step": 17821 }, { "epoch": 9.956424581005587, "grad_norm": 0.618057906627655, "learning_rate": 0.0005038935574229692, "loss": 0.502, "step": 17822 }, { "epoch": 9.956983240223463, "grad_norm": 5.883916854858398, "learning_rate": 0.0005038655462184875, "loss": 0.4512, "step": 17823 }, { "epoch": 9.95754189944134, "grad_norm": 0.43965721130371094, "learning_rate": 0.0005038375350140057, "loss": 0.3345, "step": 17824 }, { "epoch": 9.958100558659218, "grad_norm": 1.9393320083618164, "learning_rate": 0.0005038095238095238, "loss": 0.5221, "step": 17825 }, { "epoch": 9.958659217877095, "grad_norm": 0.5314610600471497, "learning_rate": 0.000503781512605042, "loss": 0.3104, "step": 17826 }, { "epoch": 9.959217877094972, "grad_norm": 0.6425861716270447, "learning_rate": 0.0005037535014005602, "loss": 0.4868, "step": 17827 }, { "epoch": 9.95977653631285, "grad_norm": 0.35783296823501587, "learning_rate": 0.0005037254901960785, "loss": 0.3919, "step": 17828 }, { "epoch": 9.960335195530726, "grad_norm": 1.5035312175750732, "learning_rate": 0.0005036974789915967, "loss": 0.4951, "step": 17829 }, { "epoch": 9.960893854748603, "grad_norm": 9.320940017700195, "learning_rate": 0.0005036694677871149, "loss": 0.3845, "step": 17830 }, { "epoch": 9.961452513966481, "grad_norm": 0.3904717266559601, "learning_rate": 0.000503641456582633, "loss": 0.4452, "step": 17831 }, { "epoch": 9.962011173184358, "grad_norm": 0.5966362357139587, "learning_rate": 0.0005036134453781512, "loss": 0.4985, "step": 17832 }, { "epoch": 9.962569832402234, "grad_norm": 0.7172439098358154, "learning_rate": 0.0005035854341736695, "loss": 0.4255, "step": 17833 }, { "epoch": 9.963128491620111, "grad_norm": 0.5624261498451233, "learning_rate": 0.0005035574229691877, "loss": 0.4191, "step": 17834 }, { "epoch": 9.96368715083799, "grad_norm": 1.0373311042785645, "learning_rate": 0.000503529411764706, "loss": 0.3993, "step": 17835 }, { "epoch": 9.964245810055866, "grad_norm": 0.47587740421295166, "learning_rate": 0.000503501400560224, "loss": 0.5601, "step": 17836 }, { "epoch": 9.964804469273743, "grad_norm": 0.8644841313362122, "learning_rate": 0.0005034733893557423, "loss": 0.4302, "step": 17837 }, { "epoch": 9.96536312849162, "grad_norm": 0.6737934947013855, "learning_rate": 0.0005034453781512606, "loss": 0.4372, "step": 17838 }, { "epoch": 9.965921787709497, "grad_norm": 0.8161765933036804, "learning_rate": 0.0005034173669467788, "loss": 0.3883, "step": 17839 }, { "epoch": 9.966480446927374, "grad_norm": 0.9269985556602478, "learning_rate": 0.000503389355742297, "loss": 0.497, "step": 17840 }, { "epoch": 9.96703910614525, "grad_norm": 0.40938255190849304, "learning_rate": 0.0005033613445378151, "loss": 0.4569, "step": 17841 }, { "epoch": 9.967597765363129, "grad_norm": 1.0076881647109985, "learning_rate": 0.0005033333333333333, "loss": 0.3871, "step": 17842 }, { "epoch": 9.968156424581005, "grad_norm": 0.7609528303146362, "learning_rate": 0.0005033053221288516, "loss": 0.4331, "step": 17843 }, { "epoch": 9.968715083798882, "grad_norm": 0.44980940222740173, "learning_rate": 0.0005032773109243698, "loss": 0.3597, "step": 17844 }, { "epoch": 9.96927374301676, "grad_norm": 0.3942873477935791, "learning_rate": 0.000503249299719888, "loss": 0.4042, "step": 17845 }, { "epoch": 9.969832402234637, "grad_norm": 0.5557286143302917, "learning_rate": 0.0005032212885154062, "loss": 0.5173, "step": 17846 }, { "epoch": 9.970391061452514, "grad_norm": 0.5568078756332397, "learning_rate": 0.0005031932773109243, "loss": 0.3698, "step": 17847 }, { "epoch": 9.970949720670392, "grad_norm": 0.3488023281097412, "learning_rate": 0.0005031652661064426, "loss": 0.3522, "step": 17848 }, { "epoch": 9.971508379888268, "grad_norm": 0.5862781405448914, "learning_rate": 0.0005031372549019608, "loss": 0.4666, "step": 17849 }, { "epoch": 9.972067039106145, "grad_norm": 0.4708919823169708, "learning_rate": 0.000503109243697479, "loss": 0.4491, "step": 17850 }, { "epoch": 9.972625698324022, "grad_norm": 0.46804797649383545, "learning_rate": 0.0005030812324929972, "loss": 0.3982, "step": 17851 }, { "epoch": 9.9731843575419, "grad_norm": 0.48031699657440186, "learning_rate": 0.0005030532212885153, "loss": 0.3883, "step": 17852 }, { "epoch": 9.973743016759776, "grad_norm": 0.429153174161911, "learning_rate": 0.0005030252100840335, "loss": 0.3737, "step": 17853 }, { "epoch": 9.974301675977653, "grad_norm": 0.539635956287384, "learning_rate": 0.0005029971988795519, "loss": 0.4289, "step": 17854 }, { "epoch": 9.974860335195531, "grad_norm": 0.797146201133728, "learning_rate": 0.0005029691876750701, "loss": 0.293, "step": 17855 }, { "epoch": 9.975418994413408, "grad_norm": 1.0138105154037476, "learning_rate": 0.0005029411764705883, "loss": 0.4333, "step": 17856 }, { "epoch": 9.975977653631285, "grad_norm": 2.880821704864502, "learning_rate": 0.0005029131652661064, "loss": 0.5098, "step": 17857 }, { "epoch": 9.976536312849163, "grad_norm": 0.42172062397003174, "learning_rate": 0.0005028851540616246, "loss": 0.4094, "step": 17858 }, { "epoch": 9.97709497206704, "grad_norm": 0.4181463420391083, "learning_rate": 0.0005028571428571429, "loss": 0.3671, "step": 17859 }, { "epoch": 9.977653631284916, "grad_norm": 0.4769952893257141, "learning_rate": 0.0005028291316526611, "loss": 0.448, "step": 17860 }, { "epoch": 9.978212290502793, "grad_norm": 0.47520503401756287, "learning_rate": 0.0005028011204481793, "loss": 0.4918, "step": 17861 }, { "epoch": 9.978770949720671, "grad_norm": 0.5936439633369446, "learning_rate": 0.0005027731092436975, "loss": 0.432, "step": 17862 }, { "epoch": 9.979329608938547, "grad_norm": 2.3392996788024902, "learning_rate": 0.0005027450980392156, "loss": 0.4427, "step": 17863 }, { "epoch": 9.979888268156424, "grad_norm": 0.4882737100124359, "learning_rate": 0.0005027170868347339, "loss": 0.4121, "step": 17864 }, { "epoch": 9.980446927374302, "grad_norm": 0.35804474353790283, "learning_rate": 0.0005026890756302521, "loss": 0.3434, "step": 17865 }, { "epoch": 9.981005586592179, "grad_norm": 0.4074746072292328, "learning_rate": 0.0005026610644257703, "loss": 0.3969, "step": 17866 }, { "epoch": 9.981564245810056, "grad_norm": 0.6635401844978333, "learning_rate": 0.0005026330532212885, "loss": 0.482, "step": 17867 }, { "epoch": 9.982122905027932, "grad_norm": 0.5572589039802551, "learning_rate": 0.0005026050420168066, "loss": 0.5156, "step": 17868 }, { "epoch": 9.98268156424581, "grad_norm": 0.8025408983230591, "learning_rate": 0.000502577030812325, "loss": 0.4049, "step": 17869 }, { "epoch": 9.983240223463687, "grad_norm": 0.8880243897438049, "learning_rate": 0.0005025490196078432, "loss": 0.4613, "step": 17870 }, { "epoch": 9.983798882681564, "grad_norm": 0.34688395261764526, "learning_rate": 0.0005025210084033614, "loss": 0.306, "step": 17871 }, { "epoch": 9.984357541899442, "grad_norm": 1.3696792125701904, "learning_rate": 0.0005024929971988796, "loss": 0.3963, "step": 17872 }, { "epoch": 9.984916201117318, "grad_norm": 0.5034725069999695, "learning_rate": 0.0005024649859943977, "loss": 0.3283, "step": 17873 }, { "epoch": 9.985474860335195, "grad_norm": 0.44516313076019287, "learning_rate": 0.000502436974789916, "loss": 0.4339, "step": 17874 }, { "epoch": 9.986033519553073, "grad_norm": 0.5296297669410706, "learning_rate": 0.0005024089635854342, "loss": 0.4944, "step": 17875 }, { "epoch": 9.98659217877095, "grad_norm": 0.5013049840927124, "learning_rate": 0.0005023809523809524, "loss": 0.3323, "step": 17876 }, { "epoch": 9.987150837988827, "grad_norm": 0.5601192712783813, "learning_rate": 0.0005023529411764706, "loss": 0.3613, "step": 17877 }, { "epoch": 9.987709497206703, "grad_norm": 0.5455748438835144, "learning_rate": 0.0005023249299719888, "loss": 0.4636, "step": 17878 }, { "epoch": 9.988268156424581, "grad_norm": 0.42147040367126465, "learning_rate": 0.000502296918767507, "loss": 0.4245, "step": 17879 }, { "epoch": 9.988826815642458, "grad_norm": 3.4742698669433594, "learning_rate": 0.0005022689075630252, "loss": 0.4802, "step": 17880 }, { "epoch": 9.989385474860335, "grad_norm": 2.0615384578704834, "learning_rate": 0.0005022408963585434, "loss": 0.4587, "step": 17881 }, { "epoch": 9.989944134078213, "grad_norm": 0.7011036276817322, "learning_rate": 0.0005022128851540616, "loss": 0.5258, "step": 17882 }, { "epoch": 9.99050279329609, "grad_norm": 0.6774871349334717, "learning_rate": 0.0005021848739495798, "loss": 0.4676, "step": 17883 }, { "epoch": 9.991061452513966, "grad_norm": 0.48109954595565796, "learning_rate": 0.000502156862745098, "loss": 0.4101, "step": 17884 }, { "epoch": 9.991620111731844, "grad_norm": 0.5987567901611328, "learning_rate": 0.0005021288515406162, "loss": 0.3873, "step": 17885 }, { "epoch": 9.992178770949721, "grad_norm": 0.5523086786270142, "learning_rate": 0.0005021008403361345, "loss": 0.4494, "step": 17886 }, { "epoch": 9.992737430167598, "grad_norm": 0.44415542483329773, "learning_rate": 0.0005020728291316527, "loss": 0.5245, "step": 17887 }, { "epoch": 9.993296089385474, "grad_norm": 0.5693686008453369, "learning_rate": 0.0005020448179271709, "loss": 0.4991, "step": 17888 }, { "epoch": 9.993854748603352, "grad_norm": 1.1321344375610352, "learning_rate": 0.0005020168067226891, "loss": 0.3953, "step": 17889 }, { "epoch": 9.994413407821229, "grad_norm": 0.4679591953754425, "learning_rate": 0.0005019887955182073, "loss": 0.4459, "step": 17890 }, { "epoch": 9.994972067039106, "grad_norm": 1.1107648611068726, "learning_rate": 0.0005019607843137255, "loss": 0.4412, "step": 17891 }, { "epoch": 9.995530726256984, "grad_norm": 0.7567646503448486, "learning_rate": 0.0005019327731092437, "loss": 0.4893, "step": 17892 }, { "epoch": 9.99608938547486, "grad_norm": 0.42347589135169983, "learning_rate": 0.0005019047619047619, "loss": 0.4143, "step": 17893 }, { "epoch": 9.996648044692737, "grad_norm": 0.5419673323631287, "learning_rate": 0.0005018767507002802, "loss": 0.4728, "step": 17894 }, { "epoch": 9.997206703910614, "grad_norm": 0.40730077028274536, "learning_rate": 0.0005018487394957983, "loss": 0.4279, "step": 17895 }, { "epoch": 9.997765363128492, "grad_norm": 0.6806232333183289, "learning_rate": 0.0005018207282913165, "loss": 0.4555, "step": 17896 }, { "epoch": 9.998324022346369, "grad_norm": 0.440498948097229, "learning_rate": 0.0005017927170868347, "loss": 0.4593, "step": 17897 }, { "epoch": 9.998882681564245, "grad_norm": 0.4612373411655426, "learning_rate": 0.0005017647058823529, "loss": 0.3831, "step": 17898 }, { "epoch": 9.999441340782123, "grad_norm": 0.5210971236228943, "learning_rate": 0.0005017366946778712, "loss": 0.3804, "step": 17899 }, { "epoch": 10.0, "grad_norm": 0.4399031400680542, "learning_rate": 0.0005017086834733893, "loss": 0.4062, "step": 17900 }, { "epoch": 10.000558659217877, "grad_norm": 0.345892071723938, "learning_rate": 0.0005016806722689075, "loss": 0.3255, "step": 17901 }, { "epoch": 10.001117318435755, "grad_norm": 0.49039262533187866, "learning_rate": 0.0005016526610644257, "loss": 0.3856, "step": 17902 }, { "epoch": 10.001675977653631, "grad_norm": 0.4026343822479248, "learning_rate": 0.000501624649859944, "loss": 0.3857, "step": 17903 }, { "epoch": 10.002234636871508, "grad_norm": 1.6864407062530518, "learning_rate": 0.0005015966386554623, "loss": 0.463, "step": 17904 }, { "epoch": 10.002793296089385, "grad_norm": 0.46433213353157043, "learning_rate": 0.0005015686274509804, "loss": 0.4358, "step": 17905 }, { "epoch": 10.003351955307263, "grad_norm": 0.4463176131248474, "learning_rate": 0.0005015406162464986, "loss": 0.4161, "step": 17906 }, { "epoch": 10.00391061452514, "grad_norm": 0.8651337623596191, "learning_rate": 0.0005015126050420168, "loss": 0.5041, "step": 17907 }, { "epoch": 10.004469273743016, "grad_norm": 0.5406264066696167, "learning_rate": 0.000501484593837535, "loss": 0.4205, "step": 17908 }, { "epoch": 10.005027932960894, "grad_norm": 0.593603253364563, "learning_rate": 0.0005014565826330533, "loss": 0.4034, "step": 17909 }, { "epoch": 10.005586592178771, "grad_norm": 0.5999714136123657, "learning_rate": 0.0005014285714285715, "loss": 0.4496, "step": 17910 }, { "epoch": 10.006145251396648, "grad_norm": 0.6581023335456848, "learning_rate": 0.0005014005602240896, "loss": 0.4811, "step": 17911 }, { "epoch": 10.006703910614526, "grad_norm": 0.7229752540588379, "learning_rate": 0.0005013725490196078, "loss": 0.5404, "step": 17912 }, { "epoch": 10.007262569832402, "grad_norm": 0.414296954870224, "learning_rate": 0.000501344537815126, "loss": 0.4058, "step": 17913 }, { "epoch": 10.007821229050279, "grad_norm": 0.5042400360107422, "learning_rate": 0.0005013165266106443, "loss": 0.4155, "step": 17914 }, { "epoch": 10.008379888268156, "grad_norm": 1.791998028755188, "learning_rate": 0.0005012885154061625, "loss": 0.3966, "step": 17915 }, { "epoch": 10.008938547486034, "grad_norm": 0.42034295201301575, "learning_rate": 0.0005012605042016806, "loss": 0.4131, "step": 17916 }, { "epoch": 10.00949720670391, "grad_norm": 0.6980493068695068, "learning_rate": 0.0005012324929971988, "loss": 0.4552, "step": 17917 }, { "epoch": 10.010055865921787, "grad_norm": 0.4247693419456482, "learning_rate": 0.000501204481792717, "loss": 0.3665, "step": 17918 }, { "epoch": 10.010614525139665, "grad_norm": 0.8249256014823914, "learning_rate": 0.0005011764705882354, "loss": 0.426, "step": 17919 }, { "epoch": 10.011173184357542, "grad_norm": 0.44600003957748413, "learning_rate": 0.0005011484593837536, "loss": 0.4388, "step": 17920 }, { "epoch": 10.011731843575419, "grad_norm": 0.5670394897460938, "learning_rate": 0.0005011204481792717, "loss": 0.4706, "step": 17921 }, { "epoch": 10.012290502793297, "grad_norm": 0.47556865215301514, "learning_rate": 0.0005010924369747899, "loss": 0.4874, "step": 17922 }, { "epoch": 10.012849162011173, "grad_norm": 1.188564419746399, "learning_rate": 0.0005010644257703081, "loss": 0.4724, "step": 17923 }, { "epoch": 10.01340782122905, "grad_norm": 0.4579189121723175, "learning_rate": 0.0005010364145658264, "loss": 0.3863, "step": 17924 }, { "epoch": 10.013966480446927, "grad_norm": 0.500243604183197, "learning_rate": 0.0005010084033613446, "loss": 0.3734, "step": 17925 }, { "epoch": 10.014525139664805, "grad_norm": 0.5416630506515503, "learning_rate": 0.0005009803921568628, "loss": 0.4144, "step": 17926 }, { "epoch": 10.015083798882682, "grad_norm": 0.4887044429779053, "learning_rate": 0.0005009523809523809, "loss": 0.484, "step": 17927 }, { "epoch": 10.015642458100558, "grad_norm": 16.893583297729492, "learning_rate": 0.0005009243697478991, "loss": 0.3519, "step": 17928 }, { "epoch": 10.016201117318436, "grad_norm": 0.46560680866241455, "learning_rate": 0.0005008963585434174, "loss": 0.4451, "step": 17929 }, { "epoch": 10.016759776536313, "grad_norm": 1.105960726737976, "learning_rate": 0.0005008683473389356, "loss": 0.4774, "step": 17930 }, { "epoch": 10.01731843575419, "grad_norm": 1.2146358489990234, "learning_rate": 0.0005008403361344538, "loss": 0.3292, "step": 17931 }, { "epoch": 10.017877094972068, "grad_norm": 0.44622984528541565, "learning_rate": 0.0005008123249299719, "loss": 0.4929, "step": 17932 }, { "epoch": 10.018435754189944, "grad_norm": 0.5632505416870117, "learning_rate": 0.0005007843137254901, "loss": 0.3551, "step": 17933 }, { "epoch": 10.018994413407821, "grad_norm": 0.43393099308013916, "learning_rate": 0.0005007563025210084, "loss": 0.5032, "step": 17934 }, { "epoch": 10.019553072625698, "grad_norm": 1.2526994943618774, "learning_rate": 0.0005007282913165267, "loss": 0.4211, "step": 17935 }, { "epoch": 10.020111731843576, "grad_norm": 0.4087871313095093, "learning_rate": 0.0005007002801120449, "loss": 0.3924, "step": 17936 }, { "epoch": 10.020670391061453, "grad_norm": 0.5228903889656067, "learning_rate": 0.000500672268907563, "loss": 0.409, "step": 17937 }, { "epoch": 10.021229050279329, "grad_norm": 1.160319209098816, "learning_rate": 0.0005006442577030812, "loss": 0.3508, "step": 17938 }, { "epoch": 10.021787709497207, "grad_norm": 0.39838096499443054, "learning_rate": 0.0005006162464985995, "loss": 0.4804, "step": 17939 }, { "epoch": 10.022346368715084, "grad_norm": 0.40692901611328125, "learning_rate": 0.0005005882352941177, "loss": 0.346, "step": 17940 }, { "epoch": 10.02290502793296, "grad_norm": 0.7101580500602722, "learning_rate": 0.0005005602240896359, "loss": 0.5936, "step": 17941 }, { "epoch": 10.023463687150837, "grad_norm": 0.4281863272190094, "learning_rate": 0.0005005322128851541, "loss": 0.3857, "step": 17942 }, { "epoch": 10.024022346368715, "grad_norm": 0.3620590269565582, "learning_rate": 0.0005005042016806722, "loss": 0.3799, "step": 17943 }, { "epoch": 10.024581005586592, "grad_norm": 0.3812369704246521, "learning_rate": 0.0005004761904761905, "loss": 0.3059, "step": 17944 }, { "epoch": 10.025139664804469, "grad_norm": 0.4740908741950989, "learning_rate": 0.0005004481792717087, "loss": 0.3452, "step": 17945 }, { "epoch": 10.025698324022347, "grad_norm": 0.6070629954338074, "learning_rate": 0.0005004201680672269, "loss": 0.4658, "step": 17946 }, { "epoch": 10.026256983240224, "grad_norm": 0.4376888871192932, "learning_rate": 0.0005003921568627451, "loss": 0.4862, "step": 17947 }, { "epoch": 10.0268156424581, "grad_norm": 0.5635068416595459, "learning_rate": 0.0005003641456582632, "loss": 0.4731, "step": 17948 }, { "epoch": 10.027374301675978, "grad_norm": 0.6688268184661865, "learning_rate": 0.0005003361344537815, "loss": 0.4297, "step": 17949 }, { "epoch": 10.027932960893855, "grad_norm": 0.3915763199329376, "learning_rate": 0.0005003081232492997, "loss": 0.2974, "step": 17950 }, { "epoch": 10.028491620111732, "grad_norm": 0.731935441493988, "learning_rate": 0.000500280112044818, "loss": 0.6455, "step": 17951 }, { "epoch": 10.029050279329608, "grad_norm": 0.5909689664840698, "learning_rate": 0.0005002521008403362, "loss": 0.4373, "step": 17952 }, { "epoch": 10.029608938547486, "grad_norm": 0.6215769052505493, "learning_rate": 0.0005002240896358543, "loss": 0.3597, "step": 17953 }, { "epoch": 10.030167597765363, "grad_norm": 0.739921510219574, "learning_rate": 0.0005001960784313726, "loss": 0.4298, "step": 17954 }, { "epoch": 10.03072625698324, "grad_norm": 2.083953619003296, "learning_rate": 0.0005001680672268908, "loss": 0.3057, "step": 17955 }, { "epoch": 10.031284916201118, "grad_norm": 0.5171045660972595, "learning_rate": 0.000500140056022409, "loss": 0.4428, "step": 17956 }, { "epoch": 10.031843575418995, "grad_norm": 1.0370972156524658, "learning_rate": 0.0005001120448179272, "loss": 0.5112, "step": 17957 }, { "epoch": 10.032402234636871, "grad_norm": 3.2398455142974854, "learning_rate": 0.0005000840336134454, "loss": 0.3883, "step": 17958 }, { "epoch": 10.03296089385475, "grad_norm": 0.5504884719848633, "learning_rate": 0.0005000560224089636, "loss": 0.4238, "step": 17959 }, { "epoch": 10.033519553072626, "grad_norm": 0.6857721209526062, "learning_rate": 0.0005000280112044818, "loss": 0.4023, "step": 17960 }, { "epoch": 10.034078212290503, "grad_norm": 0.4622032940387726, "learning_rate": 0.0005, "loss": 0.4926, "step": 17961 }, { "epoch": 10.03463687150838, "grad_norm": 0.6019774675369263, "learning_rate": 0.0004999719887955182, "loss": 0.4645, "step": 17962 }, { "epoch": 10.035195530726257, "grad_norm": 0.4611119031906128, "learning_rate": 0.0004999439775910364, "loss": 0.5331, "step": 17963 }, { "epoch": 10.035754189944134, "grad_norm": 0.5036234855651855, "learning_rate": 0.0004999159663865546, "loss": 0.3573, "step": 17964 }, { "epoch": 10.03631284916201, "grad_norm": 0.5459470748901367, "learning_rate": 0.0004998879551820728, "loss": 0.4795, "step": 17965 }, { "epoch": 10.036871508379889, "grad_norm": 1.5565532445907593, "learning_rate": 0.000499859943977591, "loss": 0.5216, "step": 17966 }, { "epoch": 10.037430167597766, "grad_norm": 0.38636332750320435, "learning_rate": 0.0004998319327731092, "loss": 0.3909, "step": 17967 }, { "epoch": 10.037988826815642, "grad_norm": 0.3940342962741852, "learning_rate": 0.0004998039215686275, "loss": 0.3195, "step": 17968 }, { "epoch": 10.03854748603352, "grad_norm": 0.7279254794120789, "learning_rate": 0.0004997759103641457, "loss": 0.4641, "step": 17969 }, { "epoch": 10.039106145251397, "grad_norm": 0.40888601541519165, "learning_rate": 0.0004997478991596639, "loss": 0.5189, "step": 17970 }, { "epoch": 10.039664804469274, "grad_norm": 0.47585687041282654, "learning_rate": 0.0004997198879551821, "loss": 0.4612, "step": 17971 }, { "epoch": 10.04022346368715, "grad_norm": 0.9656063914299011, "learning_rate": 0.0004996918767507003, "loss": 0.4296, "step": 17972 }, { "epoch": 10.040782122905028, "grad_norm": 0.397722989320755, "learning_rate": 0.0004996638655462185, "loss": 0.3917, "step": 17973 }, { "epoch": 10.041340782122905, "grad_norm": 0.8667500019073486, "learning_rate": 0.0004996358543417367, "loss": 0.4959, "step": 17974 }, { "epoch": 10.041899441340782, "grad_norm": 0.4051498472690582, "learning_rate": 0.0004996078431372549, "loss": 0.4592, "step": 17975 }, { "epoch": 10.04245810055866, "grad_norm": 1.0580335855484009, "learning_rate": 0.0004995798319327731, "loss": 0.3187, "step": 17976 }, { "epoch": 10.043016759776537, "grad_norm": 0.48696503043174744, "learning_rate": 0.0004995518207282913, "loss": 0.4534, "step": 17977 }, { "epoch": 10.043575418994413, "grad_norm": 8.047385215759277, "learning_rate": 0.0004995238095238095, "loss": 0.3802, "step": 17978 }, { "epoch": 10.04413407821229, "grad_norm": 1.4092398881912231, "learning_rate": 0.0004994957983193277, "loss": 0.4651, "step": 17979 }, { "epoch": 10.044692737430168, "grad_norm": 0.4118942320346832, "learning_rate": 0.0004994677871148459, "loss": 0.4148, "step": 17980 }, { "epoch": 10.045251396648045, "grad_norm": 0.46698030829429626, "learning_rate": 0.0004994397759103641, "loss": 0.4624, "step": 17981 }, { "epoch": 10.045810055865921, "grad_norm": 0.6111294627189636, "learning_rate": 0.0004994117647058823, "loss": 0.3908, "step": 17982 }, { "epoch": 10.0463687150838, "grad_norm": 0.5152270197868347, "learning_rate": 0.0004993837535014005, "loss": 0.3396, "step": 17983 }, { "epoch": 10.046927374301676, "grad_norm": 4.6959452629089355, "learning_rate": 0.0004993557422969187, "loss": 0.4675, "step": 17984 }, { "epoch": 10.047486033519553, "grad_norm": 0.8851702213287354, "learning_rate": 0.0004993277310924371, "loss": 0.4131, "step": 17985 }, { "epoch": 10.048044692737431, "grad_norm": 0.4822857081890106, "learning_rate": 0.0004992997198879552, "loss": 0.3688, "step": 17986 }, { "epoch": 10.048603351955308, "grad_norm": 1.4531407356262207, "learning_rate": 0.0004992717086834734, "loss": 0.4738, "step": 17987 }, { "epoch": 10.049162011173184, "grad_norm": 0.46136125922203064, "learning_rate": 0.0004992436974789916, "loss": 0.5313, "step": 17988 }, { "epoch": 10.04972067039106, "grad_norm": 1.3123393058776855, "learning_rate": 0.0004992156862745098, "loss": 0.5028, "step": 17989 }, { "epoch": 10.050279329608939, "grad_norm": 0.5183739066123962, "learning_rate": 0.0004991876750700281, "loss": 0.3755, "step": 17990 }, { "epoch": 10.050837988826816, "grad_norm": 0.8639532327651978, "learning_rate": 0.0004991596638655462, "loss": 0.3771, "step": 17991 }, { "epoch": 10.051396648044692, "grad_norm": 0.4637674391269684, "learning_rate": 0.0004991316526610644, "loss": 0.3649, "step": 17992 }, { "epoch": 10.05195530726257, "grad_norm": 0.7903505563735962, "learning_rate": 0.0004991036414565827, "loss": 0.4078, "step": 17993 }, { "epoch": 10.052513966480447, "grad_norm": 0.49363213777542114, "learning_rate": 0.0004990756302521008, "loss": 0.5178, "step": 17994 }, { "epoch": 10.053072625698324, "grad_norm": 5.578216075897217, "learning_rate": 0.0004990476190476191, "loss": 0.4068, "step": 17995 }, { "epoch": 10.053631284916202, "grad_norm": 0.7987362146377563, "learning_rate": 0.0004990196078431372, "loss": 0.6, "step": 17996 }, { "epoch": 10.054189944134079, "grad_norm": 0.4944694936275482, "learning_rate": 0.0004989915966386554, "loss": 0.5439, "step": 17997 }, { "epoch": 10.054748603351955, "grad_norm": 0.5824446678161621, "learning_rate": 0.0004989635854341737, "loss": 0.5314, "step": 17998 }, { "epoch": 10.055307262569832, "grad_norm": 0.6227288842201233, "learning_rate": 0.0004989355742296918, "loss": 0.4775, "step": 17999 }, { "epoch": 10.05586592178771, "grad_norm": 0.5089334845542908, "learning_rate": 0.0004989075630252102, "loss": 0.3765, "step": 18000 }, { "epoch": 10.05586592178771, "eval_cer": 0.08866049120050626, "eval_loss": 0.3333585262298584, "eval_runtime": 55.6479, "eval_samples_per_second": 81.548, "eval_steps_per_second": 5.104, "eval_wer": 0.3523644590933214, "step": 18000 }, { "epoch": 10.056424581005587, "grad_norm": 0.3654351830482483, "learning_rate": 0.0004988795518207284, "loss": 0.2883, "step": 18001 }, { "epoch": 10.056983240223463, "grad_norm": 0.4010741114616394, "learning_rate": 0.0004988515406162465, "loss": 0.367, "step": 18002 }, { "epoch": 10.057541899441341, "grad_norm": 0.7492659687995911, "learning_rate": 0.0004988235294117648, "loss": 0.5268, "step": 18003 }, { "epoch": 10.058100558659218, "grad_norm": 3.2686333656311035, "learning_rate": 0.0004987955182072829, "loss": 0.4709, "step": 18004 }, { "epoch": 10.058659217877095, "grad_norm": 0.394586980342865, "learning_rate": 0.0004987675070028012, "loss": 0.4133, "step": 18005 }, { "epoch": 10.059217877094973, "grad_norm": 0.6349456310272217, "learning_rate": 0.0004987394957983194, "loss": 0.4667, "step": 18006 }, { "epoch": 10.05977653631285, "grad_norm": 0.3994496464729309, "learning_rate": 0.0004987114845938375, "loss": 0.4202, "step": 18007 }, { "epoch": 10.060335195530726, "grad_norm": 0.43270522356033325, "learning_rate": 0.0004986834733893558, "loss": 0.3797, "step": 18008 }, { "epoch": 10.060893854748603, "grad_norm": 0.7027539014816284, "learning_rate": 0.000498655462184874, "loss": 0.3529, "step": 18009 }, { "epoch": 10.061452513966481, "grad_norm": 0.5405945777893066, "learning_rate": 0.0004986274509803922, "loss": 0.4125, "step": 18010 }, { "epoch": 10.062011173184358, "grad_norm": 0.4811776578426361, "learning_rate": 0.0004985994397759104, "loss": 0.4808, "step": 18011 }, { "epoch": 10.062569832402234, "grad_norm": 0.407446026802063, "learning_rate": 0.0004985714285714285, "loss": 0.3681, "step": 18012 }, { "epoch": 10.063128491620112, "grad_norm": 0.3555853068828583, "learning_rate": 0.0004985434173669468, "loss": 0.3904, "step": 18013 }, { "epoch": 10.063687150837989, "grad_norm": 0.66319739818573, "learning_rate": 0.000498515406162465, "loss": 0.65, "step": 18014 }, { "epoch": 10.064245810055866, "grad_norm": 0.46124690771102905, "learning_rate": 0.0004984873949579832, "loss": 0.3195, "step": 18015 }, { "epoch": 10.064804469273742, "grad_norm": 0.4796826243400574, "learning_rate": 0.0004984593837535014, "loss": 0.4868, "step": 18016 }, { "epoch": 10.06536312849162, "grad_norm": 0.34285181760787964, "learning_rate": 0.0004984313725490197, "loss": 0.3275, "step": 18017 }, { "epoch": 10.065921787709497, "grad_norm": 0.45430615544319153, "learning_rate": 0.0004984033613445379, "loss": 0.3633, "step": 18018 }, { "epoch": 10.066480446927374, "grad_norm": 0.46112847328186035, "learning_rate": 0.0004983753501400561, "loss": 0.6426, "step": 18019 }, { "epoch": 10.067039106145252, "grad_norm": 0.6184660196304321, "learning_rate": 0.0004983473389355742, "loss": 0.5031, "step": 18020 }, { "epoch": 10.067597765363129, "grad_norm": 0.7644140720367432, "learning_rate": 0.0004983193277310925, "loss": 0.4417, "step": 18021 }, { "epoch": 10.068156424581005, "grad_norm": 0.6032038331031799, "learning_rate": 0.0004982913165266107, "loss": 0.4618, "step": 18022 }, { "epoch": 10.068715083798883, "grad_norm": 0.6615059971809387, "learning_rate": 0.0004982633053221289, "loss": 0.3599, "step": 18023 }, { "epoch": 10.06927374301676, "grad_norm": 0.6413350701332092, "learning_rate": 0.0004982352941176471, "loss": 0.3698, "step": 18024 }, { "epoch": 10.069832402234637, "grad_norm": 1.8722656965255737, "learning_rate": 0.0004982072829131653, "loss": 0.4012, "step": 18025 }, { "epoch": 10.070391061452513, "grad_norm": 1.1173092126846313, "learning_rate": 0.0004981792717086835, "loss": 0.4373, "step": 18026 }, { "epoch": 10.070949720670392, "grad_norm": 0.45847341418266296, "learning_rate": 0.0004981512605042017, "loss": 0.3597, "step": 18027 }, { "epoch": 10.071508379888268, "grad_norm": 0.4837469160556793, "learning_rate": 0.0004981232492997199, "loss": 0.3547, "step": 18028 }, { "epoch": 10.072067039106145, "grad_norm": 0.4144695997238159, "learning_rate": 0.0004980952380952381, "loss": 0.3277, "step": 18029 }, { "epoch": 10.072625698324023, "grad_norm": 0.5408447980880737, "learning_rate": 0.0004980672268907563, "loss": 0.4625, "step": 18030 }, { "epoch": 10.0731843575419, "grad_norm": 0.5198730230331421, "learning_rate": 0.0004980392156862745, "loss": 0.4248, "step": 18031 }, { "epoch": 10.073743016759776, "grad_norm": 0.993948221206665, "learning_rate": 0.0004980112044817927, "loss": 0.5049, "step": 18032 }, { "epoch": 10.074301675977654, "grad_norm": 0.35296565294265747, "learning_rate": 0.000497983193277311, "loss": 0.3798, "step": 18033 }, { "epoch": 10.074860335195531, "grad_norm": 1.2401721477508545, "learning_rate": 0.0004979551820728292, "loss": 0.376, "step": 18034 }, { "epoch": 10.075418994413408, "grad_norm": 0.432939738035202, "learning_rate": 0.0004979271708683474, "loss": 0.5478, "step": 18035 }, { "epoch": 10.075977653631284, "grad_norm": 0.5400649309158325, "learning_rate": 0.0004978991596638656, "loss": 0.3319, "step": 18036 }, { "epoch": 10.076536312849163, "grad_norm": 1.9363116025924683, "learning_rate": 0.0004978711484593838, "loss": 0.5767, "step": 18037 }, { "epoch": 10.077094972067039, "grad_norm": 0.5125581622123718, "learning_rate": 0.000497843137254902, "loss": 0.3716, "step": 18038 }, { "epoch": 10.077653631284916, "grad_norm": 16.39263153076172, "learning_rate": 0.0004978151260504202, "loss": 0.4797, "step": 18039 }, { "epoch": 10.078212290502794, "grad_norm": 0.44551587104797363, "learning_rate": 0.0004977871148459384, "loss": 0.4268, "step": 18040 }, { "epoch": 10.07877094972067, "grad_norm": 0.6325133442878723, "learning_rate": 0.0004977591036414566, "loss": 0.4095, "step": 18041 }, { "epoch": 10.079329608938547, "grad_norm": 0.44222506880760193, "learning_rate": 0.0004977310924369748, "loss": 0.4128, "step": 18042 }, { "epoch": 10.079888268156424, "grad_norm": 0.41075995564460754, "learning_rate": 0.000497703081232493, "loss": 0.4313, "step": 18043 }, { "epoch": 10.080446927374302, "grad_norm": 0.43985557556152344, "learning_rate": 0.0004976750700280112, "loss": 0.395, "step": 18044 }, { "epoch": 10.081005586592179, "grad_norm": 0.48438480496406555, "learning_rate": 0.0004976470588235294, "loss": 0.4277, "step": 18045 }, { "epoch": 10.081564245810055, "grad_norm": 0.3173558712005615, "learning_rate": 0.0004976190476190476, "loss": 0.4226, "step": 18046 }, { "epoch": 10.082122905027934, "grad_norm": 0.9954471588134766, "learning_rate": 0.0004975910364145658, "loss": 0.3688, "step": 18047 }, { "epoch": 10.08268156424581, "grad_norm": 0.5256673097610474, "learning_rate": 0.000497563025210084, "loss": 0.4816, "step": 18048 }, { "epoch": 10.083240223463687, "grad_norm": 0.34848225116729736, "learning_rate": 0.0004975350140056022, "loss": 0.3846, "step": 18049 }, { "epoch": 10.083798882681565, "grad_norm": 0.46043679118156433, "learning_rate": 0.0004975070028011205, "loss": 0.5424, "step": 18050 }, { "epoch": 10.084357541899442, "grad_norm": 0.5182067155838013, "learning_rate": 0.0004974789915966387, "loss": 0.4828, "step": 18051 }, { "epoch": 10.084916201117318, "grad_norm": 0.6389055848121643, "learning_rate": 0.0004974509803921569, "loss": 0.3969, "step": 18052 }, { "epoch": 10.085474860335195, "grad_norm": 0.6957405805587769, "learning_rate": 0.0004974229691876751, "loss": 0.4516, "step": 18053 }, { "epoch": 10.086033519553073, "grad_norm": 0.761493444442749, "learning_rate": 0.0004973949579831933, "loss": 0.7, "step": 18054 }, { "epoch": 10.08659217877095, "grad_norm": 0.34772759675979614, "learning_rate": 0.0004973669467787115, "loss": 0.3788, "step": 18055 }, { "epoch": 10.087150837988826, "grad_norm": 0.6579962968826294, "learning_rate": 0.0004973389355742297, "loss": 0.4843, "step": 18056 }, { "epoch": 10.087709497206705, "grad_norm": 1.1136418581008911, "learning_rate": 0.0004973109243697479, "loss": 0.4985, "step": 18057 }, { "epoch": 10.088268156424581, "grad_norm": 0.5118082761764526, "learning_rate": 0.0004972829131652661, "loss": 0.4457, "step": 18058 }, { "epoch": 10.088826815642458, "grad_norm": 0.40080875158309937, "learning_rate": 0.0004972549019607843, "loss": 0.5929, "step": 18059 }, { "epoch": 10.089385474860336, "grad_norm": 0.5157171487808228, "learning_rate": 0.0004972268907563025, "loss": 0.4551, "step": 18060 }, { "epoch": 10.089944134078213, "grad_norm": 0.9593400955200195, "learning_rate": 0.0004971988795518207, "loss": 0.5289, "step": 18061 }, { "epoch": 10.09050279329609, "grad_norm": 2.8089518547058105, "learning_rate": 0.0004971708683473389, "loss": 0.378, "step": 18062 }, { "epoch": 10.091061452513966, "grad_norm": 3.404726266860962, "learning_rate": 0.0004971428571428571, "loss": 0.3314, "step": 18063 }, { "epoch": 10.091620111731844, "grad_norm": 0.4110618829727173, "learning_rate": 0.0004971148459383753, "loss": 0.4634, "step": 18064 }, { "epoch": 10.09217877094972, "grad_norm": 0.5282087326049805, "learning_rate": 0.0004970868347338935, "loss": 0.3968, "step": 18065 }, { "epoch": 10.092737430167597, "grad_norm": 0.5879765152931213, "learning_rate": 0.0004970588235294117, "loss": 0.594, "step": 18066 }, { "epoch": 10.093296089385476, "grad_norm": 0.5131786465644836, "learning_rate": 0.00049703081232493, "loss": 0.4439, "step": 18067 }, { "epoch": 10.093854748603352, "grad_norm": 0.5974563360214233, "learning_rate": 0.0004970028011204482, "loss": 0.3734, "step": 18068 }, { "epoch": 10.094413407821229, "grad_norm": 0.4999324083328247, "learning_rate": 0.0004969747899159664, "loss": 0.4895, "step": 18069 }, { "epoch": 10.094972067039107, "grad_norm": 0.5282110571861267, "learning_rate": 0.0004969467787114846, "loss": 0.4574, "step": 18070 }, { "epoch": 10.095530726256984, "grad_norm": 0.37837883830070496, "learning_rate": 0.0004969187675070028, "loss": 0.4464, "step": 18071 }, { "epoch": 10.09608938547486, "grad_norm": 0.5894453525543213, "learning_rate": 0.000496890756302521, "loss": 0.4037, "step": 18072 }, { "epoch": 10.096648044692737, "grad_norm": 0.49189305305480957, "learning_rate": 0.0004968627450980393, "loss": 0.4571, "step": 18073 }, { "epoch": 10.097206703910615, "grad_norm": 0.5240940451622009, "learning_rate": 0.0004968347338935574, "loss": 0.3701, "step": 18074 }, { "epoch": 10.097765363128492, "grad_norm": 0.5711789727210999, "learning_rate": 0.0004968067226890756, "loss": 0.4576, "step": 18075 }, { "epoch": 10.098324022346368, "grad_norm": 0.5005568265914917, "learning_rate": 0.0004967787114845938, "loss": 0.5472, "step": 18076 }, { "epoch": 10.098882681564247, "grad_norm": 0.43374624848365784, "learning_rate": 0.000496750700280112, "loss": 0.3925, "step": 18077 }, { "epoch": 10.099441340782123, "grad_norm": 0.6369708180427551, "learning_rate": 0.0004967226890756303, "loss": 0.4185, "step": 18078 }, { "epoch": 10.1, "grad_norm": 0.49052974581718445, "learning_rate": 0.0004966946778711484, "loss": 0.4878, "step": 18079 }, { "epoch": 10.100558659217878, "grad_norm": 0.6738065481185913, "learning_rate": 0.0004966666666666666, "loss": 0.4841, "step": 18080 }, { "epoch": 10.101117318435755, "grad_norm": 1.1961359977722168, "learning_rate": 0.000496638655462185, "loss": 0.4584, "step": 18081 }, { "epoch": 10.101675977653631, "grad_norm": 0.5539975166320801, "learning_rate": 0.000496610644257703, "loss": 0.4395, "step": 18082 }, { "epoch": 10.102234636871508, "grad_norm": 0.9628750085830688, "learning_rate": 0.0004965826330532214, "loss": 0.4173, "step": 18083 }, { "epoch": 10.102793296089386, "grad_norm": 0.4032144844532013, "learning_rate": 0.0004965546218487395, "loss": 0.3792, "step": 18084 }, { "epoch": 10.103351955307263, "grad_norm": 1.255530834197998, "learning_rate": 0.0004965266106442577, "loss": 0.3715, "step": 18085 }, { "epoch": 10.10391061452514, "grad_norm": 0.3632354438304901, "learning_rate": 0.000496498599439776, "loss": 0.4655, "step": 18086 }, { "epoch": 10.104469273743018, "grad_norm": 0.7581839561462402, "learning_rate": 0.0004964705882352941, "loss": 0.3759, "step": 18087 }, { "epoch": 10.105027932960894, "grad_norm": 0.8161591291427612, "learning_rate": 0.0004964425770308124, "loss": 0.455, "step": 18088 }, { "epoch": 10.10558659217877, "grad_norm": 0.6192954778671265, "learning_rate": 0.0004964145658263306, "loss": 0.3898, "step": 18089 }, { "epoch": 10.106145251396647, "grad_norm": 0.4983682334423065, "learning_rate": 0.0004963865546218487, "loss": 0.4853, "step": 18090 }, { "epoch": 10.106703910614526, "grad_norm": 0.5258122682571411, "learning_rate": 0.000496358543417367, "loss": 0.4513, "step": 18091 }, { "epoch": 10.107262569832402, "grad_norm": 0.6247334480285645, "learning_rate": 0.0004963305322128851, "loss": 0.421, "step": 18092 }, { "epoch": 10.107821229050279, "grad_norm": 0.5308733582496643, "learning_rate": 0.0004963025210084034, "loss": 0.4366, "step": 18093 }, { "epoch": 10.108379888268157, "grad_norm": 0.3999197483062744, "learning_rate": 0.0004962745098039216, "loss": 0.3886, "step": 18094 }, { "epoch": 10.108938547486034, "grad_norm": 0.47458192706108093, "learning_rate": 0.0004962464985994397, "loss": 0.3928, "step": 18095 }, { "epoch": 10.10949720670391, "grad_norm": 0.5964575409889221, "learning_rate": 0.000496218487394958, "loss": 0.479, "step": 18096 }, { "epoch": 10.110055865921789, "grad_norm": 0.5020251870155334, "learning_rate": 0.0004961904761904762, "loss": 0.4364, "step": 18097 }, { "epoch": 10.110614525139665, "grad_norm": 1.2431504726409912, "learning_rate": 0.0004961624649859944, "loss": 0.5645, "step": 18098 }, { "epoch": 10.111173184357542, "grad_norm": 0.5170184373855591, "learning_rate": 0.0004961344537815127, "loss": 0.4006, "step": 18099 }, { "epoch": 10.111731843575418, "grad_norm": 0.4978105425834656, "learning_rate": 0.0004961064425770308, "loss": 0.3498, "step": 18100 }, { "epoch": 10.112290502793297, "grad_norm": 0.5204949378967285, "learning_rate": 0.0004960784313725491, "loss": 0.5475, "step": 18101 }, { "epoch": 10.112849162011173, "grad_norm": 0.5680293440818787, "learning_rate": 0.0004960504201680673, "loss": 0.4506, "step": 18102 }, { "epoch": 10.11340782122905, "grad_norm": 0.5670507550239563, "learning_rate": 0.0004960224089635855, "loss": 0.5376, "step": 18103 }, { "epoch": 10.113966480446928, "grad_norm": 0.41433772444725037, "learning_rate": 0.0004959943977591037, "loss": 0.5161, "step": 18104 }, { "epoch": 10.114525139664805, "grad_norm": 0.44655802845954895, "learning_rate": 0.0004959663865546219, "loss": 0.4676, "step": 18105 }, { "epoch": 10.115083798882681, "grad_norm": 0.6903935670852661, "learning_rate": 0.0004959383753501401, "loss": 0.5333, "step": 18106 }, { "epoch": 10.11564245810056, "grad_norm": 0.4442330300807953, "learning_rate": 0.0004959103641456583, "loss": 0.434, "step": 18107 }, { "epoch": 10.116201117318436, "grad_norm": 0.8517215251922607, "learning_rate": 0.0004958823529411765, "loss": 0.4592, "step": 18108 }, { "epoch": 10.116759776536313, "grad_norm": 0.5763704776763916, "learning_rate": 0.0004958543417366947, "loss": 0.4281, "step": 18109 }, { "epoch": 10.11731843575419, "grad_norm": 0.7409971952438354, "learning_rate": 0.0004958263305322129, "loss": 0.5307, "step": 18110 }, { "epoch": 10.117877094972068, "grad_norm": 0.3384193480014801, "learning_rate": 0.0004957983193277311, "loss": 0.3736, "step": 18111 }, { "epoch": 10.118435754189944, "grad_norm": 0.5298641920089722, "learning_rate": 0.0004957703081232493, "loss": 0.4543, "step": 18112 }, { "epoch": 10.11899441340782, "grad_norm": 1.018093228340149, "learning_rate": 0.0004957422969187675, "loss": 0.397, "step": 18113 }, { "epoch": 10.119553072625699, "grad_norm": 0.7656188011169434, "learning_rate": 0.0004957142857142857, "loss": 0.4782, "step": 18114 }, { "epoch": 10.120111731843576, "grad_norm": 0.4250064790248871, "learning_rate": 0.000495686274509804, "loss": 0.4174, "step": 18115 }, { "epoch": 10.120670391061452, "grad_norm": 3.176320791244507, "learning_rate": 0.0004956582633053222, "loss": 0.4966, "step": 18116 }, { "epoch": 10.121229050279329, "grad_norm": 0.9759548306465149, "learning_rate": 0.0004956302521008404, "loss": 0.4804, "step": 18117 }, { "epoch": 10.121787709497207, "grad_norm": 1.0502034425735474, "learning_rate": 0.0004956022408963586, "loss": 0.4608, "step": 18118 }, { "epoch": 10.122346368715084, "grad_norm": 0.6199468374252319, "learning_rate": 0.0004955742296918768, "loss": 0.6233, "step": 18119 }, { "epoch": 10.12290502793296, "grad_norm": 0.7264186143875122, "learning_rate": 0.000495546218487395, "loss": 0.4783, "step": 18120 }, { "epoch": 10.123463687150839, "grad_norm": 0.5781332850456238, "learning_rate": 0.0004955182072829132, "loss": 0.5594, "step": 18121 }, { "epoch": 10.124022346368715, "grad_norm": 0.8736269474029541, "learning_rate": 0.0004954901960784314, "loss": 0.4358, "step": 18122 }, { "epoch": 10.124581005586592, "grad_norm": 0.6616122722625732, "learning_rate": 0.0004954621848739496, "loss": 0.4337, "step": 18123 }, { "epoch": 10.12513966480447, "grad_norm": 0.4661155343055725, "learning_rate": 0.0004954341736694678, "loss": 0.4205, "step": 18124 }, { "epoch": 10.125698324022347, "grad_norm": 0.3954992890357971, "learning_rate": 0.000495406162464986, "loss": 0.3641, "step": 18125 }, { "epoch": 10.126256983240223, "grad_norm": 0.42161041498184204, "learning_rate": 0.0004953781512605042, "loss": 0.3659, "step": 18126 }, { "epoch": 10.1268156424581, "grad_norm": 0.6067443490028381, "learning_rate": 0.0004953501400560224, "loss": 0.3581, "step": 18127 }, { "epoch": 10.127374301675978, "grad_norm": 0.5355249047279358, "learning_rate": 0.0004953221288515406, "loss": 0.3928, "step": 18128 }, { "epoch": 10.127932960893855, "grad_norm": 0.619052529335022, "learning_rate": 0.0004952941176470588, "loss": 0.402, "step": 18129 }, { "epoch": 10.128491620111731, "grad_norm": 2.500716209411621, "learning_rate": 0.000495266106442577, "loss": 0.44, "step": 18130 }, { "epoch": 10.12905027932961, "grad_norm": 0.6444413065910339, "learning_rate": 0.0004952380952380952, "loss": 0.5018, "step": 18131 }, { "epoch": 10.129608938547486, "grad_norm": 0.4753556251525879, "learning_rate": 0.0004952100840336135, "loss": 0.394, "step": 18132 }, { "epoch": 10.130167597765363, "grad_norm": 0.44528928399086, "learning_rate": 0.0004951820728291317, "loss": 0.3956, "step": 18133 }, { "epoch": 10.130726256983241, "grad_norm": 0.471708744764328, "learning_rate": 0.0004951540616246499, "loss": 0.4736, "step": 18134 }, { "epoch": 10.131284916201118, "grad_norm": 2.1290929317474365, "learning_rate": 0.0004951260504201681, "loss": 0.427, "step": 18135 }, { "epoch": 10.131843575418994, "grad_norm": 1.0335098505020142, "learning_rate": 0.0004950980392156863, "loss": 0.4109, "step": 18136 }, { "epoch": 10.13240223463687, "grad_norm": 0.49380993843078613, "learning_rate": 0.0004950700280112045, "loss": 0.4965, "step": 18137 }, { "epoch": 10.132960893854749, "grad_norm": 1.5063607692718506, "learning_rate": 0.0004950420168067227, "loss": 0.3847, "step": 18138 }, { "epoch": 10.133519553072626, "grad_norm": 1.4352372884750366, "learning_rate": 0.0004950140056022409, "loss": 0.4098, "step": 18139 }, { "epoch": 10.134078212290502, "grad_norm": 0.48557960987091064, "learning_rate": 0.0004949859943977591, "loss": 0.3645, "step": 18140 }, { "epoch": 10.13463687150838, "grad_norm": 1.1111335754394531, "learning_rate": 0.0004949579831932773, "loss": 0.4738, "step": 18141 }, { "epoch": 10.135195530726257, "grad_norm": 0.3614799678325653, "learning_rate": 0.0004949299719887955, "loss": 0.4305, "step": 18142 }, { "epoch": 10.135754189944134, "grad_norm": 0.6348438858985901, "learning_rate": 0.0004949019607843137, "loss": 0.4955, "step": 18143 }, { "epoch": 10.136312849162012, "grad_norm": 2.1298398971557617, "learning_rate": 0.0004948739495798319, "loss": 0.4701, "step": 18144 }, { "epoch": 10.136871508379889, "grad_norm": 0.5491769313812256, "learning_rate": 0.0004948459383753501, "loss": 0.389, "step": 18145 }, { "epoch": 10.137430167597765, "grad_norm": 0.5336526036262512, "learning_rate": 0.0004948179271708683, "loss": 0.5628, "step": 18146 }, { "epoch": 10.137988826815642, "grad_norm": 0.47665512561798096, "learning_rate": 0.0004947899159663865, "loss": 0.41, "step": 18147 }, { "epoch": 10.13854748603352, "grad_norm": 7.293799877166748, "learning_rate": 0.0004947619047619047, "loss": 0.472, "step": 18148 }, { "epoch": 10.139106145251397, "grad_norm": 12.360549926757812, "learning_rate": 0.000494733893557423, "loss": 0.3851, "step": 18149 }, { "epoch": 10.139664804469273, "grad_norm": 0.47572776675224304, "learning_rate": 0.0004947058823529412, "loss": 0.4082, "step": 18150 }, { "epoch": 10.140223463687152, "grad_norm": 0.5174591541290283, "learning_rate": 0.0004946778711484594, "loss": 0.364, "step": 18151 }, { "epoch": 10.140782122905028, "grad_norm": 0.5342432260513306, "learning_rate": 0.0004946498599439776, "loss": 0.4121, "step": 18152 }, { "epoch": 10.141340782122905, "grad_norm": 0.6754600405693054, "learning_rate": 0.0004946218487394958, "loss": 0.6592, "step": 18153 }, { "epoch": 10.141899441340781, "grad_norm": 0.37145087122917175, "learning_rate": 0.000494593837535014, "loss": 0.4249, "step": 18154 }, { "epoch": 10.14245810055866, "grad_norm": 0.541826069355011, "learning_rate": 0.0004945658263305322, "loss": 0.3715, "step": 18155 }, { "epoch": 10.143016759776536, "grad_norm": 2.7477400302886963, "learning_rate": 0.0004945378151260504, "loss": 0.3865, "step": 18156 }, { "epoch": 10.143575418994413, "grad_norm": 0.45038044452667236, "learning_rate": 0.0004945098039215686, "loss": 0.3855, "step": 18157 }, { "epoch": 10.144134078212291, "grad_norm": 0.7185829877853394, "learning_rate": 0.0004944817927170868, "loss": 0.3504, "step": 18158 }, { "epoch": 10.144692737430168, "grad_norm": 0.5916851758956909, "learning_rate": 0.000494453781512605, "loss": 0.4265, "step": 18159 }, { "epoch": 10.145251396648044, "grad_norm": 3.196746826171875, "learning_rate": 0.0004944257703081232, "loss": 0.3565, "step": 18160 }, { "epoch": 10.145810055865923, "grad_norm": 0.41470780968666077, "learning_rate": 0.0004943977591036415, "loss": 0.4461, "step": 18161 }, { "epoch": 10.1463687150838, "grad_norm": 0.6514317989349365, "learning_rate": 0.0004943697478991596, "loss": 0.3879, "step": 18162 }, { "epoch": 10.146927374301676, "grad_norm": 0.4813372492790222, "learning_rate": 0.0004943417366946778, "loss": 0.5368, "step": 18163 }, { "epoch": 10.147486033519552, "grad_norm": 0.8580808043479919, "learning_rate": 0.000494313725490196, "loss": 0.4005, "step": 18164 }, { "epoch": 10.14804469273743, "grad_norm": 0.5529158711433411, "learning_rate": 0.0004942857142857143, "loss": 0.4396, "step": 18165 }, { "epoch": 10.148603351955307, "grad_norm": 0.4545688033103943, "learning_rate": 0.0004942577030812326, "loss": 0.4924, "step": 18166 }, { "epoch": 10.149162011173184, "grad_norm": 0.526366651058197, "learning_rate": 0.0004942296918767507, "loss": 0.3854, "step": 18167 }, { "epoch": 10.149720670391062, "grad_norm": 0.353569895029068, "learning_rate": 0.0004942016806722689, "loss": 0.4371, "step": 18168 }, { "epoch": 10.150279329608939, "grad_norm": 0.8198625445365906, "learning_rate": 0.0004941736694677872, "loss": 0.4525, "step": 18169 }, { "epoch": 10.150837988826815, "grad_norm": 3.6741857528686523, "learning_rate": 0.0004941456582633053, "loss": 0.4214, "step": 18170 }, { "epoch": 10.151396648044694, "grad_norm": 0.4922366142272949, "learning_rate": 0.0004941176470588236, "loss": 0.4973, "step": 18171 }, { "epoch": 10.15195530726257, "grad_norm": 0.4238635301589966, "learning_rate": 0.0004940896358543417, "loss": 0.5431, "step": 18172 }, { "epoch": 10.152513966480447, "grad_norm": 2.6393191814422607, "learning_rate": 0.0004940616246498599, "loss": 0.3844, "step": 18173 }, { "epoch": 10.153072625698323, "grad_norm": 0.5375121831893921, "learning_rate": 0.0004940336134453782, "loss": 0.4498, "step": 18174 }, { "epoch": 10.153631284916202, "grad_norm": 0.47014671564102173, "learning_rate": 0.0004940056022408963, "loss": 0.3736, "step": 18175 }, { "epoch": 10.154189944134078, "grad_norm": 3.2533578872680664, "learning_rate": 0.0004939775910364146, "loss": 0.4734, "step": 18176 }, { "epoch": 10.154748603351955, "grad_norm": 0.6932738423347473, "learning_rate": 0.0004939495798319328, "loss": 0.4479, "step": 18177 }, { "epoch": 10.155307262569833, "grad_norm": 0.6455178260803223, "learning_rate": 0.0004939215686274509, "loss": 0.5127, "step": 18178 }, { "epoch": 10.15586592178771, "grad_norm": 0.7665501236915588, "learning_rate": 0.0004938935574229692, "loss": 0.3283, "step": 18179 }, { "epoch": 10.156424581005586, "grad_norm": 0.49217841029167175, "learning_rate": 0.0004938655462184873, "loss": 0.3913, "step": 18180 }, { "epoch": 10.156983240223465, "grad_norm": 0.5359972715377808, "learning_rate": 0.0004938375350140057, "loss": 0.507, "step": 18181 }, { "epoch": 10.157541899441341, "grad_norm": 0.5304698944091797, "learning_rate": 0.0004938095238095239, "loss": 0.523, "step": 18182 }, { "epoch": 10.158100558659218, "grad_norm": 0.4691931903362274, "learning_rate": 0.000493781512605042, "loss": 0.4527, "step": 18183 }, { "epoch": 10.158659217877094, "grad_norm": 0.6343221664428711, "learning_rate": 0.0004937535014005603, "loss": 0.4865, "step": 18184 }, { "epoch": 10.159217877094973, "grad_norm": 0.47323688864707947, "learning_rate": 0.0004937254901960785, "loss": 0.4604, "step": 18185 }, { "epoch": 10.15977653631285, "grad_norm": 0.4352110028266907, "learning_rate": 0.0004936974789915967, "loss": 0.4303, "step": 18186 }, { "epoch": 10.160335195530726, "grad_norm": 0.6599476933479309, "learning_rate": 0.0004936694677871149, "loss": 0.4235, "step": 18187 }, { "epoch": 10.160893854748604, "grad_norm": 0.42270833253860474, "learning_rate": 0.000493641456582633, "loss": 0.4375, "step": 18188 }, { "epoch": 10.16145251396648, "grad_norm": 0.5312519073486328, "learning_rate": 0.0004936134453781513, "loss": 0.3648, "step": 18189 }, { "epoch": 10.162011173184357, "grad_norm": 0.9655329585075378, "learning_rate": 0.0004935854341736695, "loss": 0.4165, "step": 18190 }, { "epoch": 10.162569832402234, "grad_norm": 0.4983498156070709, "learning_rate": 0.0004935574229691877, "loss": 0.4158, "step": 18191 }, { "epoch": 10.163128491620112, "grad_norm": 0.561797022819519, "learning_rate": 0.0004935294117647059, "loss": 0.4084, "step": 18192 }, { "epoch": 10.163687150837989, "grad_norm": 0.41133561730384827, "learning_rate": 0.0004935014005602241, "loss": 0.3845, "step": 18193 }, { "epoch": 10.164245810055865, "grad_norm": 0.489717036485672, "learning_rate": 0.0004934733893557423, "loss": 0.4187, "step": 18194 }, { "epoch": 10.164804469273744, "grad_norm": 2.4858293533325195, "learning_rate": 0.0004934453781512605, "loss": 0.4201, "step": 18195 }, { "epoch": 10.16536312849162, "grad_norm": 0.39093923568725586, "learning_rate": 0.0004934173669467787, "loss": 0.4491, "step": 18196 }, { "epoch": 10.165921787709497, "grad_norm": 0.605914294719696, "learning_rate": 0.000493389355742297, "loss": 0.4077, "step": 18197 }, { "epoch": 10.166480446927375, "grad_norm": 0.6784054040908813, "learning_rate": 0.0004933613445378152, "loss": 0.4893, "step": 18198 }, { "epoch": 10.167039106145252, "grad_norm": 1.2724392414093018, "learning_rate": 0.0004933333333333334, "loss": 0.3767, "step": 18199 }, { "epoch": 10.167597765363128, "grad_norm": 0.4819677174091339, "learning_rate": 0.0004933053221288516, "loss": 0.4753, "step": 18200 }, { "epoch": 10.168156424581005, "grad_norm": 0.5700948238372803, "learning_rate": 0.0004932773109243698, "loss": 0.6043, "step": 18201 }, { "epoch": 10.168715083798883, "grad_norm": 1.1814908981323242, "learning_rate": 0.000493249299719888, "loss": 0.4522, "step": 18202 }, { "epoch": 10.16927374301676, "grad_norm": 0.47052881121635437, "learning_rate": 0.0004932212885154062, "loss": 0.4532, "step": 18203 }, { "epoch": 10.169832402234636, "grad_norm": 1.132826566696167, "learning_rate": 0.0004931932773109244, "loss": 0.5304, "step": 18204 }, { "epoch": 10.170391061452515, "grad_norm": 0.5704160928726196, "learning_rate": 0.0004931652661064426, "loss": 0.4389, "step": 18205 }, { "epoch": 10.170949720670391, "grad_norm": 0.5240380764007568, "learning_rate": 0.0004931372549019608, "loss": 0.4061, "step": 18206 }, { "epoch": 10.171508379888268, "grad_norm": 0.42711037397384644, "learning_rate": 0.000493109243697479, "loss": 0.3504, "step": 18207 }, { "epoch": 10.172067039106146, "grad_norm": 0.5761241316795349, "learning_rate": 0.0004930812324929972, "loss": 0.4013, "step": 18208 }, { "epoch": 10.172625698324023, "grad_norm": 0.369295597076416, "learning_rate": 0.0004930532212885154, "loss": 0.3438, "step": 18209 }, { "epoch": 10.1731843575419, "grad_norm": 0.42897453904151917, "learning_rate": 0.0004930252100840336, "loss": 0.366, "step": 18210 }, { "epoch": 10.173743016759776, "grad_norm": 0.7355853915214539, "learning_rate": 0.0004929971988795518, "loss": 0.4373, "step": 18211 }, { "epoch": 10.174301675977654, "grad_norm": 1.1857928037643433, "learning_rate": 0.00049296918767507, "loss": 0.4844, "step": 18212 }, { "epoch": 10.17486033519553, "grad_norm": 1.0118834972381592, "learning_rate": 0.0004929411764705882, "loss": 0.4156, "step": 18213 }, { "epoch": 10.175418994413407, "grad_norm": 0.5024769306182861, "learning_rate": 0.0004929131652661065, "loss": 0.4816, "step": 18214 }, { "epoch": 10.175977653631286, "grad_norm": 0.45484137535095215, "learning_rate": 0.0004928851540616247, "loss": 0.3951, "step": 18215 }, { "epoch": 10.176536312849162, "grad_norm": 0.5874055624008179, "learning_rate": 0.0004928571428571429, "loss": 0.4298, "step": 18216 }, { "epoch": 10.177094972067039, "grad_norm": 0.6035540103912354, "learning_rate": 0.0004928291316526611, "loss": 0.5326, "step": 18217 }, { "epoch": 10.177653631284917, "grad_norm": 1.7438260316848755, "learning_rate": 0.0004928011204481793, "loss": 0.4025, "step": 18218 }, { "epoch": 10.178212290502794, "grad_norm": 0.37224647402763367, "learning_rate": 0.0004927731092436975, "loss": 0.3906, "step": 18219 }, { "epoch": 10.17877094972067, "grad_norm": 0.6038146018981934, "learning_rate": 0.0004927450980392157, "loss": 0.4161, "step": 18220 }, { "epoch": 10.179329608938547, "grad_norm": 0.4241250157356262, "learning_rate": 0.0004927170868347339, "loss": 0.4258, "step": 18221 }, { "epoch": 10.179888268156425, "grad_norm": 0.45891451835632324, "learning_rate": 0.0004926890756302521, "loss": 0.4263, "step": 18222 }, { "epoch": 10.180446927374302, "grad_norm": 0.47805875539779663, "learning_rate": 0.0004926610644257703, "loss": 0.4186, "step": 18223 }, { "epoch": 10.181005586592178, "grad_norm": 0.4869680404663086, "learning_rate": 0.0004926330532212885, "loss": 0.3178, "step": 18224 }, { "epoch": 10.181564245810057, "grad_norm": 0.49226653575897217, "learning_rate": 0.0004926050420168067, "loss": 0.4549, "step": 18225 }, { "epoch": 10.182122905027933, "grad_norm": 1.3692060708999634, "learning_rate": 0.0004925770308123249, "loss": 0.3805, "step": 18226 }, { "epoch": 10.18268156424581, "grad_norm": 0.5086750388145447, "learning_rate": 0.0004925490196078431, "loss": 0.4788, "step": 18227 }, { "epoch": 10.183240223463686, "grad_norm": 0.4098435938358307, "learning_rate": 0.0004925210084033613, "loss": 0.4599, "step": 18228 }, { "epoch": 10.183798882681565, "grad_norm": 0.38413023948669434, "learning_rate": 0.0004924929971988795, "loss": 0.3367, "step": 18229 }, { "epoch": 10.184357541899441, "grad_norm": 0.5165103673934937, "learning_rate": 0.0004924649859943977, "loss": 0.4078, "step": 18230 }, { "epoch": 10.184916201117318, "grad_norm": 0.6591992378234863, "learning_rate": 0.000492436974789916, "loss": 0.5216, "step": 18231 }, { "epoch": 10.185474860335196, "grad_norm": 0.6246803402900696, "learning_rate": 0.0004924089635854342, "loss": 0.4171, "step": 18232 }, { "epoch": 10.186033519553073, "grad_norm": 0.41472601890563965, "learning_rate": 0.0004923809523809524, "loss": 0.3166, "step": 18233 }, { "epoch": 10.18659217877095, "grad_norm": 0.5151275992393494, "learning_rate": 0.0004923529411764706, "loss": 0.3946, "step": 18234 }, { "epoch": 10.187150837988828, "grad_norm": 0.6874570250511169, "learning_rate": 0.0004923249299719888, "loss": 0.4903, "step": 18235 }, { "epoch": 10.187709497206704, "grad_norm": 1.259940266609192, "learning_rate": 0.0004922969187675071, "loss": 0.4883, "step": 18236 }, { "epoch": 10.18826815642458, "grad_norm": 0.5133796334266663, "learning_rate": 0.0004922689075630252, "loss": 0.4804, "step": 18237 }, { "epoch": 10.188826815642457, "grad_norm": 0.4293467402458191, "learning_rate": 0.0004922408963585434, "loss": 0.3459, "step": 18238 }, { "epoch": 10.189385474860336, "grad_norm": 0.5919665098190308, "learning_rate": 0.0004922128851540616, "loss": 0.3801, "step": 18239 }, { "epoch": 10.189944134078212, "grad_norm": 1.5704176425933838, "learning_rate": 0.0004921848739495798, "loss": 0.3732, "step": 18240 }, { "epoch": 10.190502793296089, "grad_norm": 0.5676593780517578, "learning_rate": 0.000492156862745098, "loss": 0.4393, "step": 18241 }, { "epoch": 10.191061452513967, "grad_norm": 0.45903074741363525, "learning_rate": 0.0004921288515406162, "loss": 0.3448, "step": 18242 }, { "epoch": 10.191620111731844, "grad_norm": 0.8279778361320496, "learning_rate": 0.0004921008403361344, "loss": 0.5311, "step": 18243 }, { "epoch": 10.19217877094972, "grad_norm": 0.39877593517303467, "learning_rate": 0.0004920728291316527, "loss": 0.4688, "step": 18244 }, { "epoch": 10.192737430167599, "grad_norm": 0.393664687871933, "learning_rate": 0.0004920448179271708, "loss": 0.3654, "step": 18245 }, { "epoch": 10.193296089385475, "grad_norm": 0.5351702570915222, "learning_rate": 0.000492016806722689, "loss": 0.5736, "step": 18246 }, { "epoch": 10.193854748603352, "grad_norm": 0.535545825958252, "learning_rate": 0.0004919887955182073, "loss": 0.4901, "step": 18247 }, { "epoch": 10.194413407821228, "grad_norm": 0.6228219866752625, "learning_rate": 0.0004919607843137255, "loss": 0.8606, "step": 18248 }, { "epoch": 10.194972067039107, "grad_norm": 0.823955237865448, "learning_rate": 0.0004919327731092438, "loss": 0.5484, "step": 18249 }, { "epoch": 10.195530726256983, "grad_norm": 0.5338960289955139, "learning_rate": 0.0004919047619047619, "loss": 0.3941, "step": 18250 }, { "epoch": 10.19608938547486, "grad_norm": 0.9280003905296326, "learning_rate": 0.0004918767507002801, "loss": 0.4034, "step": 18251 }, { "epoch": 10.196648044692738, "grad_norm": 0.5905791521072388, "learning_rate": 0.0004918487394957984, "loss": 0.4744, "step": 18252 }, { "epoch": 10.197206703910615, "grad_norm": 1.1923887729644775, "learning_rate": 0.0004918207282913165, "loss": 0.4744, "step": 18253 }, { "epoch": 10.197765363128491, "grad_norm": 0.5870530605316162, "learning_rate": 0.0004917927170868348, "loss": 0.4276, "step": 18254 }, { "epoch": 10.19832402234637, "grad_norm": 1.8262437582015991, "learning_rate": 0.0004917647058823529, "loss": 0.6013, "step": 18255 }, { "epoch": 10.198882681564246, "grad_norm": 0.5767238140106201, "learning_rate": 0.0004917366946778711, "loss": 0.4003, "step": 18256 }, { "epoch": 10.199441340782123, "grad_norm": 0.6672751903533936, "learning_rate": 0.0004917086834733894, "loss": 0.5047, "step": 18257 }, { "epoch": 10.2, "grad_norm": 1.7866566181182861, "learning_rate": 0.0004916806722689075, "loss": 0.3972, "step": 18258 }, { "epoch": 10.200558659217878, "grad_norm": 0.651545524597168, "learning_rate": 0.0004916526610644258, "loss": 0.3761, "step": 18259 }, { "epoch": 10.201117318435754, "grad_norm": 0.5305073261260986, "learning_rate": 0.000491624649859944, "loss": 0.4314, "step": 18260 }, { "epoch": 10.20167597765363, "grad_norm": 0.6352785229682922, "learning_rate": 0.0004915966386554621, "loss": 0.3676, "step": 18261 }, { "epoch": 10.202234636871509, "grad_norm": 0.5113483667373657, "learning_rate": 0.0004915686274509804, "loss": 0.4431, "step": 18262 }, { "epoch": 10.202793296089386, "grad_norm": 8.376091957092285, "learning_rate": 0.0004915406162464985, "loss": 0.5128, "step": 18263 }, { "epoch": 10.203351955307262, "grad_norm": 0.7416399121284485, "learning_rate": 0.0004915126050420169, "loss": 0.6542, "step": 18264 }, { "epoch": 10.203910614525139, "grad_norm": 0.8436415791511536, "learning_rate": 0.0004914845938375351, "loss": 0.5969, "step": 18265 }, { "epoch": 10.204469273743017, "grad_norm": 1.1047639846801758, "learning_rate": 0.0004914565826330532, "loss": 0.4549, "step": 18266 }, { "epoch": 10.205027932960894, "grad_norm": 0.5435006618499756, "learning_rate": 0.0004914285714285715, "loss": 0.4351, "step": 18267 }, { "epoch": 10.20558659217877, "grad_norm": 0.6689305901527405, "learning_rate": 0.0004914005602240897, "loss": 0.4636, "step": 18268 }, { "epoch": 10.206145251396649, "grad_norm": 0.5237941145896912, "learning_rate": 0.0004913725490196079, "loss": 0.3409, "step": 18269 }, { "epoch": 10.206703910614525, "grad_norm": 0.4465031921863556, "learning_rate": 0.0004913445378151261, "loss": 0.4917, "step": 18270 }, { "epoch": 10.207262569832402, "grad_norm": 0.5103542804718018, "learning_rate": 0.0004913165266106442, "loss": 0.4172, "step": 18271 }, { "epoch": 10.20782122905028, "grad_norm": 0.7021303176879883, "learning_rate": 0.0004912885154061625, "loss": 0.5499, "step": 18272 }, { "epoch": 10.208379888268157, "grad_norm": 0.6792715787887573, "learning_rate": 0.0004912605042016807, "loss": 0.5376, "step": 18273 }, { "epoch": 10.208938547486033, "grad_norm": 0.455497682094574, "learning_rate": 0.0004912324929971989, "loss": 0.6352, "step": 18274 }, { "epoch": 10.20949720670391, "grad_norm": 0.4163501262664795, "learning_rate": 0.0004912044817927171, "loss": 0.365, "step": 18275 }, { "epoch": 10.210055865921788, "grad_norm": 0.5540227293968201, "learning_rate": 0.0004911764705882353, "loss": 0.4643, "step": 18276 }, { "epoch": 10.210614525139665, "grad_norm": 1.2980217933654785, "learning_rate": 0.0004911484593837535, "loss": 0.5005, "step": 18277 }, { "epoch": 10.211173184357541, "grad_norm": 0.4069842994213104, "learning_rate": 0.0004911204481792717, "loss": 0.4948, "step": 18278 }, { "epoch": 10.21173184357542, "grad_norm": 0.4946157932281494, "learning_rate": 0.00049109243697479, "loss": 0.4181, "step": 18279 }, { "epoch": 10.212290502793296, "grad_norm": 1.5342533588409424, "learning_rate": 0.0004910644257703082, "loss": 0.4167, "step": 18280 }, { "epoch": 10.212849162011173, "grad_norm": 1.0761616230010986, "learning_rate": 0.0004910364145658264, "loss": 0.3983, "step": 18281 }, { "epoch": 10.213407821229051, "grad_norm": 0.5338460803031921, "learning_rate": 0.0004910084033613446, "loss": 0.4254, "step": 18282 }, { "epoch": 10.213966480446928, "grad_norm": 0.46749648451805115, "learning_rate": 0.0004909803921568628, "loss": 0.3791, "step": 18283 }, { "epoch": 10.214525139664804, "grad_norm": 0.43255990743637085, "learning_rate": 0.000490952380952381, "loss": 0.5062, "step": 18284 }, { "epoch": 10.21508379888268, "grad_norm": 1.0770831108093262, "learning_rate": 0.0004909243697478992, "loss": 0.475, "step": 18285 }, { "epoch": 10.21564245810056, "grad_norm": 0.4636324942111969, "learning_rate": 0.0004908963585434174, "loss": 0.4113, "step": 18286 }, { "epoch": 10.216201117318436, "grad_norm": 0.6983132362365723, "learning_rate": 0.0004908683473389356, "loss": 0.4709, "step": 18287 }, { "epoch": 10.216759776536312, "grad_norm": 0.6466900706291199, "learning_rate": 0.0004908403361344538, "loss": 0.4279, "step": 18288 }, { "epoch": 10.21731843575419, "grad_norm": 0.5919768810272217, "learning_rate": 0.000490812324929972, "loss": 0.3915, "step": 18289 }, { "epoch": 10.217877094972067, "grad_norm": 0.48180440068244934, "learning_rate": 0.0004907843137254902, "loss": 0.3838, "step": 18290 }, { "epoch": 10.218435754189944, "grad_norm": 0.5758348107337952, "learning_rate": 0.0004907563025210084, "loss": 0.425, "step": 18291 }, { "epoch": 10.21899441340782, "grad_norm": 2.158036708831787, "learning_rate": 0.0004907282913165266, "loss": 0.6896, "step": 18292 }, { "epoch": 10.219553072625699, "grad_norm": 0.3848896324634552, "learning_rate": 0.0004907002801120448, "loss": 0.357, "step": 18293 }, { "epoch": 10.220111731843575, "grad_norm": 0.6241127848625183, "learning_rate": 0.000490672268907563, "loss": 0.4611, "step": 18294 }, { "epoch": 10.220670391061452, "grad_norm": 0.588411271572113, "learning_rate": 0.0004906442577030812, "loss": 0.4577, "step": 18295 }, { "epoch": 10.22122905027933, "grad_norm": 0.4453670382499695, "learning_rate": 0.0004906162464985995, "loss": 0.5436, "step": 18296 }, { "epoch": 10.221787709497207, "grad_norm": 0.7782946825027466, "learning_rate": 0.0004905882352941177, "loss": 0.4541, "step": 18297 }, { "epoch": 10.222346368715083, "grad_norm": 0.4636748433113098, "learning_rate": 0.0004905602240896359, "loss": 0.4391, "step": 18298 }, { "epoch": 10.222905027932962, "grad_norm": 1.5460546016693115, "learning_rate": 0.0004905322128851541, "loss": 0.4155, "step": 18299 }, { "epoch": 10.223463687150838, "grad_norm": 1.421724796295166, "learning_rate": 0.0004905042016806723, "loss": 0.3777, "step": 18300 }, { "epoch": 10.224022346368715, "grad_norm": 0.4560874402523041, "learning_rate": 0.0004904761904761905, "loss": 0.4149, "step": 18301 }, { "epoch": 10.224581005586591, "grad_norm": 0.40369710326194763, "learning_rate": 0.0004904481792717087, "loss": 0.4196, "step": 18302 }, { "epoch": 10.22513966480447, "grad_norm": 1.113968014717102, "learning_rate": 0.0004904201680672269, "loss": 0.413, "step": 18303 }, { "epoch": 10.225698324022346, "grad_norm": 1.7583075761795044, "learning_rate": 0.0004903921568627451, "loss": 0.503, "step": 18304 }, { "epoch": 10.226256983240223, "grad_norm": 0.33197975158691406, "learning_rate": 0.0004903641456582633, "loss": 0.3293, "step": 18305 }, { "epoch": 10.226815642458101, "grad_norm": 0.4173920154571533, "learning_rate": 0.0004903361344537815, "loss": 0.3964, "step": 18306 }, { "epoch": 10.227374301675978, "grad_norm": 3.0750677585601807, "learning_rate": 0.0004903081232492997, "loss": 0.3213, "step": 18307 }, { "epoch": 10.227932960893854, "grad_norm": 1.424278974533081, "learning_rate": 0.0004902801120448179, "loss": 0.437, "step": 18308 }, { "epoch": 10.228491620111733, "grad_norm": 0.42078617215156555, "learning_rate": 0.0004902521008403361, "loss": 0.3803, "step": 18309 }, { "epoch": 10.22905027932961, "grad_norm": 0.6340539455413818, "learning_rate": 0.0004902240896358543, "loss": 0.4872, "step": 18310 }, { "epoch": 10.229608938547486, "grad_norm": 0.629845380783081, "learning_rate": 0.0004901960784313725, "loss": 0.4366, "step": 18311 }, { "epoch": 10.230167597765362, "grad_norm": 0.8952680826187134, "learning_rate": 0.0004901680672268907, "loss": 0.4015, "step": 18312 }, { "epoch": 10.23072625698324, "grad_norm": 0.5406718850135803, "learning_rate": 0.000490140056022409, "loss": 0.466, "step": 18313 }, { "epoch": 10.231284916201117, "grad_norm": 2.296178102493286, "learning_rate": 0.0004901120448179272, "loss": 0.4153, "step": 18314 }, { "epoch": 10.231843575418994, "grad_norm": 1.01360285282135, "learning_rate": 0.0004900840336134454, "loss": 0.5564, "step": 18315 }, { "epoch": 10.232402234636872, "grad_norm": 0.4351801574230194, "learning_rate": 0.0004900560224089636, "loss": 0.4249, "step": 18316 }, { "epoch": 10.232960893854749, "grad_norm": 0.6425051689147949, "learning_rate": 0.0004900280112044818, "loss": 0.387, "step": 18317 }, { "epoch": 10.233519553072625, "grad_norm": 0.5039723515510559, "learning_rate": 0.00049, "loss": 0.4677, "step": 18318 }, { "epoch": 10.234078212290504, "grad_norm": 0.41770872473716736, "learning_rate": 0.0004899719887955182, "loss": 0.3289, "step": 18319 }, { "epoch": 10.23463687150838, "grad_norm": 0.497715026140213, "learning_rate": 0.0004899439775910364, "loss": 0.4723, "step": 18320 }, { "epoch": 10.235195530726257, "grad_norm": 0.5172687768936157, "learning_rate": 0.0004899159663865546, "loss": 0.4855, "step": 18321 }, { "epoch": 10.235754189944133, "grad_norm": 0.37893110513687134, "learning_rate": 0.0004898879551820728, "loss": 0.4661, "step": 18322 }, { "epoch": 10.236312849162012, "grad_norm": 0.4221186935901642, "learning_rate": 0.000489859943977591, "loss": 0.3704, "step": 18323 }, { "epoch": 10.236871508379888, "grad_norm": 0.5625306963920593, "learning_rate": 0.0004898319327731093, "loss": 0.4644, "step": 18324 }, { "epoch": 10.237430167597765, "grad_norm": 0.4371757209300995, "learning_rate": 0.0004898039215686274, "loss": 0.4999, "step": 18325 }, { "epoch": 10.237988826815643, "grad_norm": 0.46273258328437805, "learning_rate": 0.0004897759103641456, "loss": 0.5286, "step": 18326 }, { "epoch": 10.23854748603352, "grad_norm": 0.4261535406112671, "learning_rate": 0.0004897478991596638, "loss": 0.3816, "step": 18327 }, { "epoch": 10.239106145251396, "grad_norm": 0.4743141829967499, "learning_rate": 0.000489719887955182, "loss": 0.4516, "step": 18328 }, { "epoch": 10.239664804469275, "grad_norm": 1.27565598487854, "learning_rate": 0.0004896918767507004, "loss": 0.4046, "step": 18329 }, { "epoch": 10.240223463687151, "grad_norm": 0.3932933509349823, "learning_rate": 0.0004896638655462185, "loss": 0.4082, "step": 18330 }, { "epoch": 10.240782122905028, "grad_norm": 0.5737097263336182, "learning_rate": 0.0004896358543417367, "loss": 0.5647, "step": 18331 }, { "epoch": 10.241340782122904, "grad_norm": 0.47776395082473755, "learning_rate": 0.000489607843137255, "loss": 0.3503, "step": 18332 }, { "epoch": 10.241899441340783, "grad_norm": 0.3598838746547699, "learning_rate": 0.0004895798319327731, "loss": 0.4211, "step": 18333 }, { "epoch": 10.24245810055866, "grad_norm": 0.5205954313278198, "learning_rate": 0.0004895518207282914, "loss": 0.3933, "step": 18334 }, { "epoch": 10.243016759776536, "grad_norm": 0.45160582661628723, "learning_rate": 0.0004895238095238095, "loss": 0.3835, "step": 18335 }, { "epoch": 10.243575418994414, "grad_norm": 0.8252968788146973, "learning_rate": 0.0004894957983193277, "loss": 0.3551, "step": 18336 }, { "epoch": 10.24413407821229, "grad_norm": 0.5469398498535156, "learning_rate": 0.000489467787114846, "loss": 0.4355, "step": 18337 }, { "epoch": 10.244692737430167, "grad_norm": 1.1218631267547607, "learning_rate": 0.0004894397759103641, "loss": 0.3217, "step": 18338 }, { "epoch": 10.245251396648044, "grad_norm": 0.48319295048713684, "learning_rate": 0.0004894117647058824, "loss": 0.4432, "step": 18339 }, { "epoch": 10.245810055865922, "grad_norm": 0.3665015399456024, "learning_rate": 0.0004893837535014006, "loss": 0.3908, "step": 18340 }, { "epoch": 10.246368715083799, "grad_norm": 0.892652690410614, "learning_rate": 0.0004893557422969187, "loss": 0.4797, "step": 18341 }, { "epoch": 10.246927374301675, "grad_norm": 0.6330655813217163, "learning_rate": 0.000489327731092437, "loss": 0.3845, "step": 18342 }, { "epoch": 10.247486033519554, "grad_norm": 0.40359973907470703, "learning_rate": 0.0004892997198879551, "loss": 0.4087, "step": 18343 }, { "epoch": 10.24804469273743, "grad_norm": 0.6576468348503113, "learning_rate": 0.0004892717086834734, "loss": 0.4535, "step": 18344 }, { "epoch": 10.248603351955307, "grad_norm": 0.5093878507614136, "learning_rate": 0.0004892436974789917, "loss": 0.3408, "step": 18345 }, { "epoch": 10.249162011173185, "grad_norm": 0.4071648120880127, "learning_rate": 0.0004892156862745098, "loss": 0.3672, "step": 18346 }, { "epoch": 10.249720670391062, "grad_norm": 0.4684867858886719, "learning_rate": 0.0004891876750700281, "loss": 0.4411, "step": 18347 }, { "epoch": 10.250279329608938, "grad_norm": 1.053542971611023, "learning_rate": 0.0004891596638655463, "loss": 0.39, "step": 18348 }, { "epoch": 10.250837988826815, "grad_norm": 1.0055053234100342, "learning_rate": 0.0004891316526610645, "loss": 0.7021, "step": 18349 }, { "epoch": 10.251396648044693, "grad_norm": 3.780545711517334, "learning_rate": 0.0004891036414565827, "loss": 0.3463, "step": 18350 }, { "epoch": 10.25195530726257, "grad_norm": 0.3567473590373993, "learning_rate": 0.0004890756302521008, "loss": 0.3226, "step": 18351 }, { "epoch": 10.252513966480446, "grad_norm": 0.38406747579574585, "learning_rate": 0.0004890476190476191, "loss": 0.4224, "step": 18352 }, { "epoch": 10.253072625698325, "grad_norm": 0.6070484519004822, "learning_rate": 0.0004890196078431373, "loss": 0.4863, "step": 18353 }, { "epoch": 10.253631284916201, "grad_norm": 0.5813414454460144, "learning_rate": 0.0004889915966386554, "loss": 0.4126, "step": 18354 }, { "epoch": 10.254189944134078, "grad_norm": 0.6065589189529419, "learning_rate": 0.0004889635854341737, "loss": 0.4598, "step": 18355 }, { "epoch": 10.254748603351956, "grad_norm": 0.5930861830711365, "learning_rate": 0.0004889355742296919, "loss": 0.4485, "step": 18356 }, { "epoch": 10.255307262569833, "grad_norm": 0.6899927854537964, "learning_rate": 0.0004889075630252101, "loss": 0.6009, "step": 18357 }, { "epoch": 10.25586592178771, "grad_norm": 0.5042360424995422, "learning_rate": 0.0004888795518207283, "loss": 0.4671, "step": 18358 }, { "epoch": 10.256424581005586, "grad_norm": 0.7181597352027893, "learning_rate": 0.0004888515406162464, "loss": 0.4897, "step": 18359 }, { "epoch": 10.256983240223464, "grad_norm": 1.2735143899917603, "learning_rate": 0.0004888235294117647, "loss": 0.5099, "step": 18360 }, { "epoch": 10.25754189944134, "grad_norm": 0.6729240417480469, "learning_rate": 0.000488795518207283, "loss": 0.3862, "step": 18361 }, { "epoch": 10.258100558659217, "grad_norm": 0.6995847821235657, "learning_rate": 0.0004887675070028012, "loss": 0.484, "step": 18362 }, { "epoch": 10.258659217877096, "grad_norm": 0.42959994077682495, "learning_rate": 0.0004887394957983194, "loss": 0.4264, "step": 18363 }, { "epoch": 10.259217877094972, "grad_norm": 1.6488803625106812, "learning_rate": 0.0004887114845938376, "loss": 0.3879, "step": 18364 }, { "epoch": 10.259776536312849, "grad_norm": 0.4537966549396515, "learning_rate": 0.0004886834733893558, "loss": 0.4116, "step": 18365 }, { "epoch": 10.260335195530725, "grad_norm": 0.5976909399032593, "learning_rate": 0.000488655462184874, "loss": 0.3789, "step": 18366 }, { "epoch": 10.260893854748604, "grad_norm": 0.6692116856575012, "learning_rate": 0.0004886274509803922, "loss": 0.4396, "step": 18367 }, { "epoch": 10.26145251396648, "grad_norm": 2.3390302658081055, "learning_rate": 0.0004885994397759104, "loss": 0.4978, "step": 18368 }, { "epoch": 10.262011173184357, "grad_norm": 1.1843814849853516, "learning_rate": 0.0004885714285714286, "loss": 0.5256, "step": 18369 }, { "epoch": 10.262569832402235, "grad_norm": 1.6812130212783813, "learning_rate": 0.0004885434173669468, "loss": 0.4063, "step": 18370 }, { "epoch": 10.263128491620112, "grad_norm": 0.6371800899505615, "learning_rate": 0.000488515406162465, "loss": 0.4366, "step": 18371 }, { "epoch": 10.263687150837988, "grad_norm": 0.45672523975372314, "learning_rate": 0.0004884873949579832, "loss": 0.5356, "step": 18372 }, { "epoch": 10.264245810055867, "grad_norm": 0.44220802187919617, "learning_rate": 0.0004884593837535014, "loss": 0.6256, "step": 18373 }, { "epoch": 10.264804469273743, "grad_norm": 0.472271591424942, "learning_rate": 0.0004884313725490196, "loss": 0.4116, "step": 18374 }, { "epoch": 10.26536312849162, "grad_norm": 0.6216697096824646, "learning_rate": 0.0004884033613445378, "loss": 0.3601, "step": 18375 }, { "epoch": 10.265921787709496, "grad_norm": 0.5687991380691528, "learning_rate": 0.000488375350140056, "loss": 0.3816, "step": 18376 }, { "epoch": 10.266480446927375, "grad_norm": 0.49071210622787476, "learning_rate": 0.0004883473389355742, "loss": 0.3691, "step": 18377 }, { "epoch": 10.267039106145251, "grad_norm": 0.5811916589736938, "learning_rate": 0.0004883193277310925, "loss": 0.4555, "step": 18378 }, { "epoch": 10.267597765363128, "grad_norm": 0.4637194275856018, "learning_rate": 0.0004882913165266107, "loss": 0.4222, "step": 18379 }, { "epoch": 10.268156424581006, "grad_norm": 0.9916163086891174, "learning_rate": 0.00048826330532212886, "loss": 0.3469, "step": 18380 }, { "epoch": 10.268715083798883, "grad_norm": 0.4936084747314453, "learning_rate": 0.00048823529411764707, "loss": 0.3552, "step": 18381 }, { "epoch": 10.26927374301676, "grad_norm": 0.4554215371608734, "learning_rate": 0.0004882072829131653, "loss": 0.4944, "step": 18382 }, { "epoch": 10.269832402234638, "grad_norm": 0.5257346034049988, "learning_rate": 0.0004881792717086835, "loss": 0.4905, "step": 18383 }, { "epoch": 10.270391061452514, "grad_norm": 0.661540150642395, "learning_rate": 0.0004881512605042017, "loss": 0.4703, "step": 18384 }, { "epoch": 10.27094972067039, "grad_norm": 1.3188999891281128, "learning_rate": 0.0004881232492997199, "loss": 0.4724, "step": 18385 }, { "epoch": 10.271508379888267, "grad_norm": 2.559964418411255, "learning_rate": 0.0004880952380952381, "loss": 0.4642, "step": 18386 }, { "epoch": 10.272067039106146, "grad_norm": 5.632225036621094, "learning_rate": 0.0004880672268907563, "loss": 0.3893, "step": 18387 }, { "epoch": 10.272625698324022, "grad_norm": 0.5761430859565735, "learning_rate": 0.0004880392156862745, "loss": 0.4852, "step": 18388 }, { "epoch": 10.273184357541899, "grad_norm": 0.4436241090297699, "learning_rate": 0.0004880112044817927, "loss": 0.456, "step": 18389 }, { "epoch": 10.273743016759777, "grad_norm": 0.41078269481658936, "learning_rate": 0.0004879831932773109, "loss": 0.4756, "step": 18390 }, { "epoch": 10.274301675977654, "grad_norm": 0.8542408347129822, "learning_rate": 0.00048795518207282913, "loss": 0.5041, "step": 18391 }, { "epoch": 10.27486033519553, "grad_norm": 0.7590973377227783, "learning_rate": 0.0004879271708683474, "loss": 0.6663, "step": 18392 }, { "epoch": 10.275418994413409, "grad_norm": 0.45177537202835083, "learning_rate": 0.00048789915966386554, "loss": 0.4165, "step": 18393 }, { "epoch": 10.275977653631285, "grad_norm": 4.279130458831787, "learning_rate": 0.00048787114845938375, "loss": 0.3249, "step": 18394 }, { "epoch": 10.276536312849162, "grad_norm": 0.5268144607543945, "learning_rate": 0.00048784313725490195, "loss": 0.4678, "step": 18395 }, { "epoch": 10.277094972067038, "grad_norm": 0.38767245411872864, "learning_rate": 0.00048781512605042016, "loss": 0.3663, "step": 18396 }, { "epoch": 10.277653631284917, "grad_norm": 0.642672061920166, "learning_rate": 0.0004877871148459384, "loss": 0.49, "step": 18397 }, { "epoch": 10.278212290502793, "grad_norm": 0.39219579100608826, "learning_rate": 0.00048775910364145657, "loss": 0.4031, "step": 18398 }, { "epoch": 10.27877094972067, "grad_norm": 0.4188181757926941, "learning_rate": 0.0004877310924369748, "loss": 0.4243, "step": 18399 }, { "epoch": 10.279329608938548, "grad_norm": 0.4049554467201233, "learning_rate": 0.00048770308123249304, "loss": 0.4487, "step": 18400 }, { "epoch": 10.279888268156425, "grad_norm": 2.1493453979492188, "learning_rate": 0.0004876750700280112, "loss": 0.411, "step": 18401 }, { "epoch": 10.280446927374301, "grad_norm": 5.6451897621154785, "learning_rate": 0.00048764705882352945, "loss": 0.4399, "step": 18402 }, { "epoch": 10.28100558659218, "grad_norm": 0.40940535068511963, "learning_rate": 0.0004876190476190476, "loss": 0.4728, "step": 18403 }, { "epoch": 10.281564245810056, "grad_norm": 0.3710485100746155, "learning_rate": 0.0004875910364145658, "loss": 0.3848, "step": 18404 }, { "epoch": 10.282122905027933, "grad_norm": 1.3781269788742065, "learning_rate": 0.00048756302521008407, "loss": 0.3992, "step": 18405 }, { "epoch": 10.28268156424581, "grad_norm": 0.5431393980979919, "learning_rate": 0.0004875350140056022, "loss": 0.392, "step": 18406 }, { "epoch": 10.283240223463688, "grad_norm": 0.5656924247741699, "learning_rate": 0.0004875070028011205, "loss": 0.4264, "step": 18407 }, { "epoch": 10.283798882681564, "grad_norm": 0.6770188808441162, "learning_rate": 0.0004874789915966387, "loss": 0.4653, "step": 18408 }, { "epoch": 10.28435754189944, "grad_norm": 0.6118614077568054, "learning_rate": 0.00048745098039215684, "loss": 0.3346, "step": 18409 }, { "epoch": 10.28491620111732, "grad_norm": 0.6598475575447083, "learning_rate": 0.0004874229691876751, "loss": 0.3794, "step": 18410 }, { "epoch": 10.285474860335196, "grad_norm": 0.5067694783210754, "learning_rate": 0.00048739495798319325, "loss": 0.3673, "step": 18411 }, { "epoch": 10.286033519553072, "grad_norm": 0.4660192131996155, "learning_rate": 0.0004873669467787115, "loss": 0.4083, "step": 18412 }, { "epoch": 10.286592178770949, "grad_norm": 0.38352593779563904, "learning_rate": 0.0004873389355742297, "loss": 0.3487, "step": 18413 }, { "epoch": 10.287150837988827, "grad_norm": 0.45538339018821716, "learning_rate": 0.00048731092436974787, "loss": 0.3654, "step": 18414 }, { "epoch": 10.287709497206704, "grad_norm": 1.3965487480163574, "learning_rate": 0.00048728291316526613, "loss": 0.4017, "step": 18415 }, { "epoch": 10.28826815642458, "grad_norm": 0.620423436164856, "learning_rate": 0.00048725490196078433, "loss": 0.4182, "step": 18416 }, { "epoch": 10.288826815642459, "grad_norm": 0.575762927532196, "learning_rate": 0.00048722689075630254, "loss": 0.5285, "step": 18417 }, { "epoch": 10.289385474860335, "grad_norm": 0.6503027081489563, "learning_rate": 0.00048719887955182075, "loss": 0.4045, "step": 18418 }, { "epoch": 10.289944134078212, "grad_norm": 0.463217556476593, "learning_rate": 0.0004871708683473389, "loss": 0.4092, "step": 18419 }, { "epoch": 10.29050279329609, "grad_norm": 0.5537541508674622, "learning_rate": 0.00048714285714285716, "loss": 0.3835, "step": 18420 }, { "epoch": 10.291061452513967, "grad_norm": 0.41254428029060364, "learning_rate": 0.00048711484593837536, "loss": 0.4602, "step": 18421 }, { "epoch": 10.291620111731843, "grad_norm": 1.661500334739685, "learning_rate": 0.00048708683473389357, "loss": 0.4145, "step": 18422 }, { "epoch": 10.29217877094972, "grad_norm": 2.934598684310913, "learning_rate": 0.0004870588235294118, "loss": 0.4143, "step": 18423 }, { "epoch": 10.292737430167598, "grad_norm": 0.48016679286956787, "learning_rate": 0.00048703081232493, "loss": 0.4883, "step": 18424 }, { "epoch": 10.293296089385475, "grad_norm": 0.5752936601638794, "learning_rate": 0.0004870028011204482, "loss": 0.4257, "step": 18425 }, { "epoch": 10.293854748603351, "grad_norm": 0.3991956412792206, "learning_rate": 0.0004869747899159664, "loss": 0.4346, "step": 18426 }, { "epoch": 10.29441340782123, "grad_norm": 0.3808521330356598, "learning_rate": 0.00048694677871148465, "loss": 0.3322, "step": 18427 }, { "epoch": 10.294972067039106, "grad_norm": 6.706089973449707, "learning_rate": 0.0004869187675070028, "loss": 0.3817, "step": 18428 }, { "epoch": 10.295530726256983, "grad_norm": 0.5070836544036865, "learning_rate": 0.000486890756302521, "loss": 0.4568, "step": 18429 }, { "epoch": 10.296089385474861, "grad_norm": 2.9301469326019287, "learning_rate": 0.0004868627450980392, "loss": 0.4551, "step": 18430 }, { "epoch": 10.296648044692738, "grad_norm": 0.682773768901825, "learning_rate": 0.0004868347338935574, "loss": 0.5501, "step": 18431 }, { "epoch": 10.297206703910614, "grad_norm": 0.9472801685333252, "learning_rate": 0.0004868067226890757, "loss": 0.5857, "step": 18432 }, { "epoch": 10.297765363128491, "grad_norm": 0.5117703676223755, "learning_rate": 0.00048677871148459384, "loss": 0.4218, "step": 18433 }, { "epoch": 10.29832402234637, "grad_norm": 0.6095899939537048, "learning_rate": 0.00048675070028011204, "loss": 0.3983, "step": 18434 }, { "epoch": 10.298882681564246, "grad_norm": 0.5017976760864258, "learning_rate": 0.0004867226890756303, "loss": 0.4705, "step": 18435 }, { "epoch": 10.299441340782122, "grad_norm": 0.3897111415863037, "learning_rate": 0.00048669467787114845, "loss": 0.3911, "step": 18436 }, { "epoch": 10.3, "grad_norm": 0.43075549602508545, "learning_rate": 0.0004866666666666667, "loss": 0.3405, "step": 18437 }, { "epoch": 10.300558659217877, "grad_norm": 0.5940316915512085, "learning_rate": 0.00048663865546218487, "loss": 0.4006, "step": 18438 }, { "epoch": 10.301117318435754, "grad_norm": 0.49035781621932983, "learning_rate": 0.00048661064425770307, "loss": 0.6055, "step": 18439 }, { "epoch": 10.30167597765363, "grad_norm": 0.6272029280662537, "learning_rate": 0.00048658263305322133, "loss": 0.4145, "step": 18440 }, { "epoch": 10.302234636871509, "grad_norm": 0.3923509120941162, "learning_rate": 0.0004865546218487395, "loss": 0.4174, "step": 18441 }, { "epoch": 10.302793296089385, "grad_norm": 0.5060518980026245, "learning_rate": 0.00048652661064425774, "loss": 0.4013, "step": 18442 }, { "epoch": 10.303351955307262, "grad_norm": 1.4095302820205688, "learning_rate": 0.00048649859943977595, "loss": 0.4449, "step": 18443 }, { "epoch": 10.30391061452514, "grad_norm": 0.36465132236480713, "learning_rate": 0.0004864705882352941, "loss": 0.4214, "step": 18444 }, { "epoch": 10.304469273743017, "grad_norm": 0.5210620760917664, "learning_rate": 0.00048644257703081236, "loss": 0.3518, "step": 18445 }, { "epoch": 10.305027932960893, "grad_norm": 1.245948314666748, "learning_rate": 0.0004864145658263305, "loss": 0.4379, "step": 18446 }, { "epoch": 10.305586592178772, "grad_norm": 0.48197925090789795, "learning_rate": 0.0004863865546218488, "loss": 0.4393, "step": 18447 }, { "epoch": 10.306145251396648, "grad_norm": 0.5753238201141357, "learning_rate": 0.000486358543417367, "loss": 0.6433, "step": 18448 }, { "epoch": 10.306703910614525, "grad_norm": 0.45251309871673584, "learning_rate": 0.00048633053221288513, "loss": 0.4302, "step": 18449 }, { "epoch": 10.307262569832401, "grad_norm": 0.5044090151786804, "learning_rate": 0.0004863025210084034, "loss": 0.4201, "step": 18450 }, { "epoch": 10.30782122905028, "grad_norm": 0.45618823170661926, "learning_rate": 0.0004862745098039216, "loss": 0.4309, "step": 18451 }, { "epoch": 10.308379888268156, "grad_norm": 0.466362863779068, "learning_rate": 0.0004862464985994398, "loss": 0.3217, "step": 18452 }, { "epoch": 10.308938547486033, "grad_norm": 6.978814601898193, "learning_rate": 0.000486218487394958, "loss": 0.5725, "step": 18453 }, { "epoch": 10.309497206703911, "grad_norm": 0.7230717539787292, "learning_rate": 0.00048619047619047616, "loss": 0.5104, "step": 18454 }, { "epoch": 10.310055865921788, "grad_norm": 0.44195520877838135, "learning_rate": 0.0004861624649859944, "loss": 0.4972, "step": 18455 }, { "epoch": 10.310614525139664, "grad_norm": 0.47468531131744385, "learning_rate": 0.00048613445378151263, "loss": 0.4006, "step": 18456 }, { "epoch": 10.311173184357543, "grad_norm": 1.8223645687103271, "learning_rate": 0.00048610644257703083, "loss": 0.4071, "step": 18457 }, { "epoch": 10.31173184357542, "grad_norm": 0.46808022260665894, "learning_rate": 0.00048607843137254904, "loss": 0.5316, "step": 18458 }, { "epoch": 10.312290502793296, "grad_norm": 0.5461525917053223, "learning_rate": 0.00048605042016806725, "loss": 0.3873, "step": 18459 }, { "epoch": 10.312849162011172, "grad_norm": 0.4563542306423187, "learning_rate": 0.00048602240896358545, "loss": 0.437, "step": 18460 }, { "epoch": 10.31340782122905, "grad_norm": 1.3422718048095703, "learning_rate": 0.00048599439775910366, "loss": 0.3769, "step": 18461 }, { "epoch": 10.313966480446927, "grad_norm": 0.399704247713089, "learning_rate": 0.00048596638655462186, "loss": 0.4078, "step": 18462 }, { "epoch": 10.314525139664804, "grad_norm": 0.5637335777282715, "learning_rate": 0.00048593837535014007, "loss": 0.4454, "step": 18463 }, { "epoch": 10.315083798882682, "grad_norm": 0.5965197086334229, "learning_rate": 0.0004859103641456583, "loss": 0.3185, "step": 18464 }, { "epoch": 10.315642458100559, "grad_norm": 0.4701148569583893, "learning_rate": 0.0004858823529411765, "loss": 0.3932, "step": 18465 }, { "epoch": 10.316201117318435, "grad_norm": 1.511795163154602, "learning_rate": 0.0004858543417366947, "loss": 0.4163, "step": 18466 }, { "epoch": 10.316759776536314, "grad_norm": 0.4629707336425781, "learning_rate": 0.0004858263305322129, "loss": 0.4453, "step": 18467 }, { "epoch": 10.31731843575419, "grad_norm": 2.6410837173461914, "learning_rate": 0.0004857983193277311, "loss": 0.4609, "step": 18468 }, { "epoch": 10.317877094972067, "grad_norm": 1.1350764036178589, "learning_rate": 0.0004857703081232493, "loss": 0.3947, "step": 18469 }, { "epoch": 10.318435754189943, "grad_norm": 0.9224898219108582, "learning_rate": 0.0004857422969187675, "loss": 0.355, "step": 18470 }, { "epoch": 10.318994413407822, "grad_norm": 0.710609495639801, "learning_rate": 0.0004857142857142857, "loss": 0.4634, "step": 18471 }, { "epoch": 10.319553072625698, "grad_norm": 1.5205860137939453, "learning_rate": 0.0004856862745098039, "loss": 0.5903, "step": 18472 }, { "epoch": 10.320111731843575, "grad_norm": 0.6428022384643555, "learning_rate": 0.00048565826330532213, "loss": 0.4682, "step": 18473 }, { "epoch": 10.320670391061453, "grad_norm": 0.4200070798397064, "learning_rate": 0.00048563025210084034, "loss": 0.35, "step": 18474 }, { "epoch": 10.32122905027933, "grad_norm": 0.4596361219882965, "learning_rate": 0.0004856022408963586, "loss": 0.4756, "step": 18475 }, { "epoch": 10.321787709497206, "grad_norm": 0.37173566222190857, "learning_rate": 0.00048557422969187675, "loss": 0.435, "step": 18476 }, { "epoch": 10.322346368715085, "grad_norm": 0.6648851037025452, "learning_rate": 0.00048554621848739495, "loss": 0.3565, "step": 18477 }, { "epoch": 10.322905027932961, "grad_norm": 1.3423856496810913, "learning_rate": 0.00048551820728291316, "loss": 0.4056, "step": 18478 }, { "epoch": 10.323463687150838, "grad_norm": 0.48765912652015686, "learning_rate": 0.00048549019607843137, "loss": 0.5069, "step": 18479 }, { "epoch": 10.324022346368714, "grad_norm": 0.9991682767868042, "learning_rate": 0.0004854621848739496, "loss": 0.4077, "step": 18480 }, { "epoch": 10.324581005586593, "grad_norm": 3.380387544631958, "learning_rate": 0.0004854341736694678, "loss": 0.5988, "step": 18481 }, { "epoch": 10.32513966480447, "grad_norm": 0.43948987126350403, "learning_rate": 0.000485406162464986, "loss": 0.4611, "step": 18482 }, { "epoch": 10.325698324022346, "grad_norm": 1.0265700817108154, "learning_rate": 0.00048537815126050424, "loss": 0.4076, "step": 18483 }, { "epoch": 10.326256983240224, "grad_norm": 0.6248056292533875, "learning_rate": 0.0004853501400560224, "loss": 0.5667, "step": 18484 }, { "epoch": 10.3268156424581, "grad_norm": 0.428129106760025, "learning_rate": 0.00048532212885154066, "loss": 0.399, "step": 18485 }, { "epoch": 10.327374301675977, "grad_norm": 1.3516876697540283, "learning_rate": 0.0004852941176470588, "loss": 0.3419, "step": 18486 }, { "epoch": 10.327932960893854, "grad_norm": 0.41521528363227844, "learning_rate": 0.000485266106442577, "loss": 0.2776, "step": 18487 }, { "epoch": 10.328491620111732, "grad_norm": 0.5115822553634644, "learning_rate": 0.0004852380952380953, "loss": 0.4015, "step": 18488 }, { "epoch": 10.329050279329609, "grad_norm": 3.43815279006958, "learning_rate": 0.0004852100840336134, "loss": 0.3941, "step": 18489 }, { "epoch": 10.329608938547485, "grad_norm": 0.5160900354385376, "learning_rate": 0.0004851820728291317, "loss": 0.3538, "step": 18490 }, { "epoch": 10.330167597765364, "grad_norm": 0.5784170627593994, "learning_rate": 0.0004851540616246499, "loss": 0.3808, "step": 18491 }, { "epoch": 10.33072625698324, "grad_norm": 0.5866448283195496, "learning_rate": 0.00048512605042016804, "loss": 0.4005, "step": 18492 }, { "epoch": 10.331284916201117, "grad_norm": 0.6335739493370056, "learning_rate": 0.0004850980392156863, "loss": 0.4965, "step": 18493 }, { "epoch": 10.331843575418995, "grad_norm": 0.9090087413787842, "learning_rate": 0.00048507002801120446, "loss": 0.4821, "step": 18494 }, { "epoch": 10.332402234636872, "grad_norm": 0.6544280648231506, "learning_rate": 0.0004850420168067227, "loss": 0.5083, "step": 18495 }, { "epoch": 10.332960893854748, "grad_norm": 0.5462419390678406, "learning_rate": 0.0004850140056022409, "loss": 0.4255, "step": 18496 }, { "epoch": 10.333519553072625, "grad_norm": 0.4685802757740021, "learning_rate": 0.0004849859943977591, "loss": 0.4342, "step": 18497 }, { "epoch": 10.334078212290503, "grad_norm": 0.46307775378227234, "learning_rate": 0.00048495798319327733, "loss": 0.4785, "step": 18498 }, { "epoch": 10.33463687150838, "grad_norm": 0.6195305585861206, "learning_rate": 0.00048492997198879554, "loss": 0.4477, "step": 18499 }, { "epoch": 10.335195530726256, "grad_norm": 0.4749375581741333, "learning_rate": 0.00048490196078431375, "loss": 0.4459, "step": 18500 }, { "epoch": 10.335195530726256, "eval_cer": 0.08912419669841685, "eval_loss": 0.33553820848464966, "eval_runtime": 55.512, "eval_samples_per_second": 81.748, "eval_steps_per_second": 5.116, "eval_wer": 0.3543755761012262, "step": 18500 }, { "epoch": 10.335754189944135, "grad_norm": 1.1466726064682007, "learning_rate": 0.00048487394957983195, "loss": 0.2767, "step": 18501 }, { "epoch": 10.336312849162011, "grad_norm": 0.44482168555259705, "learning_rate": 0.0004848459383753501, "loss": 0.3529, "step": 18502 }, { "epoch": 10.336871508379888, "grad_norm": 0.5936769843101501, "learning_rate": 0.00048481792717086836, "loss": 0.4692, "step": 18503 }, { "epoch": 10.337430167597766, "grad_norm": 0.42303466796875, "learning_rate": 0.00048478991596638657, "loss": 0.3935, "step": 18504 }, { "epoch": 10.337988826815643, "grad_norm": 0.43544089794158936, "learning_rate": 0.0004847619047619048, "loss": 0.4149, "step": 18505 }, { "epoch": 10.33854748603352, "grad_norm": 0.4810880124568939, "learning_rate": 0.000484733893557423, "loss": 0.4738, "step": 18506 }, { "epoch": 10.339106145251396, "grad_norm": 0.6476143598556519, "learning_rate": 0.0004847058823529412, "loss": 0.3405, "step": 18507 }, { "epoch": 10.339664804469274, "grad_norm": 0.5902200937271118, "learning_rate": 0.0004846778711484594, "loss": 0.5164, "step": 18508 }, { "epoch": 10.34022346368715, "grad_norm": 0.8516894578933716, "learning_rate": 0.0004846498599439776, "loss": 0.4093, "step": 18509 }, { "epoch": 10.340782122905027, "grad_norm": 2.0323398113250732, "learning_rate": 0.0004846218487394958, "loss": 0.4778, "step": 18510 }, { "epoch": 10.341340782122906, "grad_norm": 0.7313889265060425, "learning_rate": 0.000484593837535014, "loss": 0.4381, "step": 18511 }, { "epoch": 10.341899441340782, "grad_norm": 0.482281357049942, "learning_rate": 0.0004845658263305322, "loss": 0.3419, "step": 18512 }, { "epoch": 10.342458100558659, "grad_norm": 0.5999189615249634, "learning_rate": 0.0004845378151260504, "loss": 0.4298, "step": 18513 }, { "epoch": 10.343016759776535, "grad_norm": 0.43831872940063477, "learning_rate": 0.00048450980392156863, "loss": 0.4432, "step": 18514 }, { "epoch": 10.343575418994414, "grad_norm": 0.4799043536186218, "learning_rate": 0.0004844817927170869, "loss": 0.414, "step": 18515 }, { "epoch": 10.34413407821229, "grad_norm": 0.569431483745575, "learning_rate": 0.00048445378151260504, "loss": 0.3974, "step": 18516 }, { "epoch": 10.344692737430167, "grad_norm": 0.47372135519981384, "learning_rate": 0.00048442577030812325, "loss": 0.3766, "step": 18517 }, { "epoch": 10.345251396648045, "grad_norm": 0.685141384601593, "learning_rate": 0.00048439775910364145, "loss": 0.4488, "step": 18518 }, { "epoch": 10.345810055865922, "grad_norm": 0.7046328186988831, "learning_rate": 0.00048436974789915966, "loss": 0.5682, "step": 18519 }, { "epoch": 10.346368715083798, "grad_norm": 0.4307580590248108, "learning_rate": 0.0004843417366946779, "loss": 0.4466, "step": 18520 }, { "epoch": 10.346927374301677, "grad_norm": 0.41561874747276306, "learning_rate": 0.00048431372549019607, "loss": 0.4341, "step": 18521 }, { "epoch": 10.347486033519553, "grad_norm": 0.5324827432632446, "learning_rate": 0.0004842857142857143, "loss": 0.531, "step": 18522 }, { "epoch": 10.34804469273743, "grad_norm": 0.49715253710746765, "learning_rate": 0.00048425770308123254, "loss": 0.3066, "step": 18523 }, { "epoch": 10.348603351955306, "grad_norm": 0.6423181891441345, "learning_rate": 0.0004842296918767507, "loss": 0.7424, "step": 18524 }, { "epoch": 10.349162011173185, "grad_norm": 0.7908527851104736, "learning_rate": 0.00048420168067226895, "loss": 0.4363, "step": 18525 }, { "epoch": 10.349720670391061, "grad_norm": 0.6770151853561401, "learning_rate": 0.0004841736694677871, "loss": 0.4476, "step": 18526 }, { "epoch": 10.350279329608938, "grad_norm": 0.4739961624145508, "learning_rate": 0.0004841456582633053, "loss": 0.3642, "step": 18527 }, { "epoch": 10.350837988826816, "grad_norm": 1.1014668941497803, "learning_rate": 0.00048411764705882357, "loss": 0.4062, "step": 18528 }, { "epoch": 10.351396648044693, "grad_norm": 2.1694178581237793, "learning_rate": 0.0004840896358543417, "loss": 0.4481, "step": 18529 }, { "epoch": 10.35195530726257, "grad_norm": 0.5644750595092773, "learning_rate": 0.00048406162464986, "loss": 0.4766, "step": 18530 }, { "epoch": 10.352513966480448, "grad_norm": 0.4566260576248169, "learning_rate": 0.0004840336134453782, "loss": 0.5553, "step": 18531 }, { "epoch": 10.353072625698324, "grad_norm": 0.4291819930076599, "learning_rate": 0.00048400560224089634, "loss": 0.3907, "step": 18532 }, { "epoch": 10.3536312849162, "grad_norm": 0.6987180709838867, "learning_rate": 0.0004839775910364146, "loss": 0.3594, "step": 18533 }, { "epoch": 10.354189944134077, "grad_norm": 0.44914641976356506, "learning_rate": 0.00048394957983193275, "loss": 0.3452, "step": 18534 }, { "epoch": 10.354748603351956, "grad_norm": 0.760997474193573, "learning_rate": 0.000483921568627451, "loss": 0.4537, "step": 18535 }, { "epoch": 10.355307262569832, "grad_norm": 0.6706992387771606, "learning_rate": 0.0004838935574229692, "loss": 0.5267, "step": 18536 }, { "epoch": 10.355865921787709, "grad_norm": 0.6490182280540466, "learning_rate": 0.00048386554621848737, "loss": 0.4754, "step": 18537 }, { "epoch": 10.356424581005587, "grad_norm": 0.5547512769699097, "learning_rate": 0.00048383753501400563, "loss": 0.4393, "step": 18538 }, { "epoch": 10.356983240223464, "grad_norm": 0.47428181767463684, "learning_rate": 0.00048380952380952383, "loss": 0.4836, "step": 18539 }, { "epoch": 10.35754189944134, "grad_norm": 0.48504653573036194, "learning_rate": 0.00048378151260504204, "loss": 0.4846, "step": 18540 }, { "epoch": 10.358100558659217, "grad_norm": 0.5197486281394958, "learning_rate": 0.00048375350140056025, "loss": 0.5641, "step": 18541 }, { "epoch": 10.358659217877095, "grad_norm": 0.5134580135345459, "learning_rate": 0.0004837254901960784, "loss": 0.3856, "step": 18542 }, { "epoch": 10.359217877094972, "grad_norm": 0.9766239523887634, "learning_rate": 0.00048369747899159666, "loss": 0.3663, "step": 18543 }, { "epoch": 10.359776536312848, "grad_norm": 0.423331081867218, "learning_rate": 0.00048366946778711486, "loss": 0.4179, "step": 18544 }, { "epoch": 10.360335195530727, "grad_norm": 0.5242456793785095, "learning_rate": 0.00048364145658263307, "loss": 0.4284, "step": 18545 }, { "epoch": 10.360893854748603, "grad_norm": 0.4409019947052002, "learning_rate": 0.0004836134453781513, "loss": 0.4368, "step": 18546 }, { "epoch": 10.36145251396648, "grad_norm": 0.4317437708377838, "learning_rate": 0.0004835854341736695, "loss": 0.3708, "step": 18547 }, { "epoch": 10.362011173184358, "grad_norm": 0.7175754308700562, "learning_rate": 0.0004835574229691877, "loss": 0.4632, "step": 18548 }, { "epoch": 10.362569832402235, "grad_norm": 0.5007825493812561, "learning_rate": 0.0004835294117647059, "loss": 0.4097, "step": 18549 }, { "epoch": 10.363128491620111, "grad_norm": 0.6789500117301941, "learning_rate": 0.0004835014005602241, "loss": 0.5448, "step": 18550 }, { "epoch": 10.363687150837988, "grad_norm": 0.434480220079422, "learning_rate": 0.0004834733893557423, "loss": 0.5772, "step": 18551 }, { "epoch": 10.364245810055866, "grad_norm": 0.38877207040786743, "learning_rate": 0.0004834453781512605, "loss": 0.3669, "step": 18552 }, { "epoch": 10.364804469273743, "grad_norm": 0.6788049936294556, "learning_rate": 0.0004834173669467787, "loss": 0.6052, "step": 18553 }, { "epoch": 10.36536312849162, "grad_norm": 8.106139183044434, "learning_rate": 0.0004833893557422969, "loss": 0.4574, "step": 18554 }, { "epoch": 10.365921787709498, "grad_norm": 0.6326924562454224, "learning_rate": 0.0004833613445378152, "loss": 0.5245, "step": 18555 }, { "epoch": 10.366480446927374, "grad_norm": 0.4364500939846039, "learning_rate": 0.00048333333333333334, "loss": 0.2962, "step": 18556 }, { "epoch": 10.367039106145251, "grad_norm": 0.3921828269958496, "learning_rate": 0.00048330532212885154, "loss": 0.3731, "step": 18557 }, { "epoch": 10.36759776536313, "grad_norm": 0.33790871500968933, "learning_rate": 0.00048327731092436975, "loss": 0.3375, "step": 18558 }, { "epoch": 10.368156424581006, "grad_norm": 0.4161319434642792, "learning_rate": 0.00048324929971988795, "loss": 0.3653, "step": 18559 }, { "epoch": 10.368715083798882, "grad_norm": 0.3491482436656952, "learning_rate": 0.0004832212885154062, "loss": 0.3125, "step": 18560 }, { "epoch": 10.369273743016759, "grad_norm": 0.4830874502658844, "learning_rate": 0.00048319327731092437, "loss": 0.4309, "step": 18561 }, { "epoch": 10.369832402234637, "grad_norm": 0.5594037771224976, "learning_rate": 0.00048316526610644257, "loss": 0.4668, "step": 18562 }, { "epoch": 10.370391061452514, "grad_norm": 0.45328056812286377, "learning_rate": 0.00048313725490196083, "loss": 0.3663, "step": 18563 }, { "epoch": 10.37094972067039, "grad_norm": 0.550655722618103, "learning_rate": 0.000483109243697479, "loss": 0.3791, "step": 18564 }, { "epoch": 10.371508379888269, "grad_norm": 0.6805617213249207, "learning_rate": 0.00048308123249299724, "loss": 0.3942, "step": 18565 }, { "epoch": 10.372067039106145, "grad_norm": 0.5872673988342285, "learning_rate": 0.0004830532212885154, "loss": 0.3954, "step": 18566 }, { "epoch": 10.372625698324022, "grad_norm": 0.5242465734481812, "learning_rate": 0.0004830252100840336, "loss": 0.5089, "step": 18567 }, { "epoch": 10.3731843575419, "grad_norm": 0.5024154186248779, "learning_rate": 0.00048299719887955186, "loss": 0.495, "step": 18568 }, { "epoch": 10.373743016759777, "grad_norm": 0.5108323097229004, "learning_rate": 0.00048296918767507, "loss": 0.3692, "step": 18569 }, { "epoch": 10.374301675977653, "grad_norm": 0.6018432378768921, "learning_rate": 0.0004829411764705883, "loss": 0.4887, "step": 18570 }, { "epoch": 10.37486033519553, "grad_norm": 0.9265491366386414, "learning_rate": 0.0004829131652661065, "loss": 0.4313, "step": 18571 }, { "epoch": 10.375418994413408, "grad_norm": 1.1394156217575073, "learning_rate": 0.00048288515406162463, "loss": 0.3922, "step": 18572 }, { "epoch": 10.375977653631285, "grad_norm": 0.6537106037139893, "learning_rate": 0.0004828571428571429, "loss": 0.6844, "step": 18573 }, { "epoch": 10.376536312849161, "grad_norm": 0.6739285588264465, "learning_rate": 0.00048282913165266104, "loss": 0.3817, "step": 18574 }, { "epoch": 10.37709497206704, "grad_norm": 0.49427366256713867, "learning_rate": 0.00048280112044817925, "loss": 0.4028, "step": 18575 }, { "epoch": 10.377653631284916, "grad_norm": 0.856814444065094, "learning_rate": 0.0004827731092436975, "loss": 0.393, "step": 18576 }, { "epoch": 10.378212290502793, "grad_norm": 0.9378678798675537, "learning_rate": 0.00048274509803921566, "loss": 0.5244, "step": 18577 }, { "epoch": 10.378770949720671, "grad_norm": 0.4787735044956207, "learning_rate": 0.0004827170868347339, "loss": 0.3893, "step": 18578 }, { "epoch": 10.379329608938548, "grad_norm": 1.0707627534866333, "learning_rate": 0.00048268907563025213, "loss": 0.475, "step": 18579 }, { "epoch": 10.379888268156424, "grad_norm": 0.9720556139945984, "learning_rate": 0.0004826610644257703, "loss": 0.3682, "step": 18580 }, { "epoch": 10.380446927374301, "grad_norm": 0.3987704813480377, "learning_rate": 0.00048263305322128854, "loss": 0.3484, "step": 18581 }, { "epoch": 10.38100558659218, "grad_norm": 1.6980855464935303, "learning_rate": 0.0004826050420168067, "loss": 0.3772, "step": 18582 }, { "epoch": 10.381564245810056, "grad_norm": 0.5145918726921082, "learning_rate": 0.00048257703081232495, "loss": 0.4045, "step": 18583 }, { "epoch": 10.382122905027932, "grad_norm": 0.4570493698120117, "learning_rate": 0.00048254901960784316, "loss": 0.3807, "step": 18584 }, { "epoch": 10.38268156424581, "grad_norm": 0.5919678807258606, "learning_rate": 0.0004825210084033613, "loss": 0.488, "step": 18585 }, { "epoch": 10.383240223463687, "grad_norm": 0.9404540061950684, "learning_rate": 0.00048249299719887957, "loss": 0.4843, "step": 18586 }, { "epoch": 10.383798882681564, "grad_norm": 0.5159085988998413, "learning_rate": 0.0004824649859943978, "loss": 0.4021, "step": 18587 }, { "epoch": 10.38435754189944, "grad_norm": 0.47732096910476685, "learning_rate": 0.000482436974789916, "loss": 0.5265, "step": 18588 }, { "epoch": 10.384916201117319, "grad_norm": 0.3719688057899475, "learning_rate": 0.0004824089635854342, "loss": 0.3489, "step": 18589 }, { "epoch": 10.385474860335195, "grad_norm": 1.1730204820632935, "learning_rate": 0.00048238095238095234, "loss": 0.5116, "step": 18590 }, { "epoch": 10.386033519553072, "grad_norm": 0.3958281874656677, "learning_rate": 0.0004823529411764706, "loss": 0.3393, "step": 18591 }, { "epoch": 10.38659217877095, "grad_norm": 0.48720407485961914, "learning_rate": 0.0004823249299719888, "loss": 0.6112, "step": 18592 }, { "epoch": 10.387150837988827, "grad_norm": 0.8982740640640259, "learning_rate": 0.000482296918767507, "loss": 0.5717, "step": 18593 }, { "epoch": 10.387709497206703, "grad_norm": 0.46810615062713623, "learning_rate": 0.0004822689075630252, "loss": 0.5008, "step": 18594 }, { "epoch": 10.388268156424582, "grad_norm": 0.5596848130226135, "learning_rate": 0.0004822408963585434, "loss": 0.403, "step": 18595 }, { "epoch": 10.388826815642458, "grad_norm": 1.020903468132019, "learning_rate": 0.00048221288515406163, "loss": 0.398, "step": 18596 }, { "epoch": 10.389385474860335, "grad_norm": 0.5042295455932617, "learning_rate": 0.00048218487394957984, "loss": 0.4673, "step": 18597 }, { "epoch": 10.389944134078211, "grad_norm": 0.7460339665412903, "learning_rate": 0.00048215686274509804, "loss": 0.5537, "step": 18598 }, { "epoch": 10.39050279329609, "grad_norm": 0.40921106934547424, "learning_rate": 0.00048212885154061625, "loss": 0.3676, "step": 18599 }, { "epoch": 10.391061452513966, "grad_norm": 0.5893543362617493, "learning_rate": 0.00048210084033613445, "loss": 0.5622, "step": 18600 }, { "epoch": 10.391620111731843, "grad_norm": 0.4187788665294647, "learning_rate": 0.00048207282913165266, "loss": 0.3712, "step": 18601 }, { "epoch": 10.392178770949721, "grad_norm": 0.4277508556842804, "learning_rate": 0.00048204481792717087, "loss": 0.5834, "step": 18602 }, { "epoch": 10.392737430167598, "grad_norm": 0.6084835529327393, "learning_rate": 0.0004820168067226891, "loss": 0.4529, "step": 18603 }, { "epoch": 10.393296089385474, "grad_norm": 1.1256463527679443, "learning_rate": 0.0004819887955182073, "loss": 0.3722, "step": 18604 }, { "epoch": 10.393854748603353, "grad_norm": 0.6930166482925415, "learning_rate": 0.0004819607843137255, "loss": 0.4092, "step": 18605 }, { "epoch": 10.39441340782123, "grad_norm": 0.5441730618476868, "learning_rate": 0.0004819327731092437, "loss": 0.3744, "step": 18606 }, { "epoch": 10.394972067039106, "grad_norm": 0.44403278827667236, "learning_rate": 0.0004819047619047619, "loss": 0.3954, "step": 18607 }, { "epoch": 10.395530726256982, "grad_norm": 0.528443455696106, "learning_rate": 0.00048187675070028016, "loss": 0.4874, "step": 18608 }, { "epoch": 10.39608938547486, "grad_norm": 0.47436845302581787, "learning_rate": 0.0004818487394957983, "loss": 0.4757, "step": 18609 }, { "epoch": 10.396648044692737, "grad_norm": 0.640788733959198, "learning_rate": 0.0004818207282913165, "loss": 0.4745, "step": 18610 }, { "epoch": 10.397206703910614, "grad_norm": 0.6771900653839111, "learning_rate": 0.0004817927170868348, "loss": 0.5779, "step": 18611 }, { "epoch": 10.397765363128492, "grad_norm": 0.5182864665985107, "learning_rate": 0.0004817647058823529, "loss": 0.4725, "step": 18612 }, { "epoch": 10.398324022346369, "grad_norm": 0.5506117343902588, "learning_rate": 0.0004817366946778712, "loss": 0.4565, "step": 18613 }, { "epoch": 10.398882681564245, "grad_norm": 0.6980795860290527, "learning_rate": 0.00048170868347338934, "loss": 0.5009, "step": 18614 }, { "epoch": 10.399441340782122, "grad_norm": 0.5445689558982849, "learning_rate": 0.00048168067226890754, "loss": 0.4513, "step": 18615 }, { "epoch": 10.4, "grad_norm": 0.5470642447471619, "learning_rate": 0.0004816526610644258, "loss": 0.4577, "step": 18616 }, { "epoch": 10.400558659217877, "grad_norm": 0.49638262391090393, "learning_rate": 0.00048162464985994396, "loss": 0.5327, "step": 18617 }, { "epoch": 10.401117318435753, "grad_norm": 0.4458141028881073, "learning_rate": 0.0004815966386554622, "loss": 0.5097, "step": 18618 }, { "epoch": 10.401675977653632, "grad_norm": 8.243219375610352, "learning_rate": 0.0004815686274509804, "loss": 0.5264, "step": 18619 }, { "epoch": 10.402234636871508, "grad_norm": 0.3683255612850189, "learning_rate": 0.0004815406162464986, "loss": 0.3233, "step": 18620 }, { "epoch": 10.402793296089385, "grad_norm": 0.5535828471183777, "learning_rate": 0.00048151260504201683, "loss": 0.4949, "step": 18621 }, { "epoch": 10.403351955307263, "grad_norm": 0.44889092445373535, "learning_rate": 0.000481484593837535, "loss": 0.3159, "step": 18622 }, { "epoch": 10.40391061452514, "grad_norm": 0.5467238426208496, "learning_rate": 0.00048145658263305325, "loss": 0.4271, "step": 18623 }, { "epoch": 10.404469273743016, "grad_norm": 0.5221225619316101, "learning_rate": 0.00048142857142857145, "loss": 0.4192, "step": 18624 }, { "epoch": 10.405027932960893, "grad_norm": 0.4691750407218933, "learning_rate": 0.0004814005602240896, "loss": 0.385, "step": 18625 }, { "epoch": 10.405586592178771, "grad_norm": 0.7881584167480469, "learning_rate": 0.00048137254901960786, "loss": 0.3605, "step": 18626 }, { "epoch": 10.406145251396648, "grad_norm": 0.4600408971309662, "learning_rate": 0.00048134453781512607, "loss": 0.4444, "step": 18627 }, { "epoch": 10.406703910614524, "grad_norm": 0.5457239747047424, "learning_rate": 0.0004813165266106443, "loss": 0.4614, "step": 18628 }, { "epoch": 10.407262569832403, "grad_norm": 0.4991118013858795, "learning_rate": 0.0004812885154061625, "loss": 0.4409, "step": 18629 }, { "epoch": 10.40782122905028, "grad_norm": 3.9297842979431152, "learning_rate": 0.00048126050420168063, "loss": 0.4899, "step": 18630 }, { "epoch": 10.408379888268156, "grad_norm": 1.6745059490203857, "learning_rate": 0.0004812324929971989, "loss": 0.5218, "step": 18631 }, { "epoch": 10.408938547486034, "grad_norm": 0.3986119329929352, "learning_rate": 0.0004812044817927171, "loss": 0.4616, "step": 18632 }, { "epoch": 10.40949720670391, "grad_norm": 0.3511456549167633, "learning_rate": 0.0004811764705882353, "loss": 0.3976, "step": 18633 }, { "epoch": 10.410055865921787, "grad_norm": 0.4709932804107666, "learning_rate": 0.0004811484593837535, "loss": 0.4354, "step": 18634 }, { "epoch": 10.410614525139664, "grad_norm": 0.7571269869804382, "learning_rate": 0.0004811204481792717, "loss": 0.4053, "step": 18635 }, { "epoch": 10.411173184357542, "grad_norm": 0.3287263512611389, "learning_rate": 0.0004810924369747899, "loss": 0.3821, "step": 18636 }, { "epoch": 10.411731843575419, "grad_norm": 0.6286267638206482, "learning_rate": 0.00048106442577030813, "loss": 0.5083, "step": 18637 }, { "epoch": 10.412290502793295, "grad_norm": 0.5040934085845947, "learning_rate": 0.0004810364145658264, "loss": 0.5267, "step": 18638 }, { "epoch": 10.412849162011174, "grad_norm": 0.9080308079719543, "learning_rate": 0.00048100840336134454, "loss": 0.49, "step": 18639 }, { "epoch": 10.41340782122905, "grad_norm": 0.49590161442756653, "learning_rate": 0.00048098039215686275, "loss": 0.4509, "step": 18640 }, { "epoch": 10.413966480446927, "grad_norm": 0.42925822734832764, "learning_rate": 0.00048095238095238095, "loss": 0.3837, "step": 18641 }, { "epoch": 10.414525139664805, "grad_norm": 0.4432879388332367, "learning_rate": 0.00048092436974789916, "loss": 0.5905, "step": 18642 }, { "epoch": 10.415083798882682, "grad_norm": 0.4242720305919647, "learning_rate": 0.0004808963585434174, "loss": 0.432, "step": 18643 }, { "epoch": 10.415642458100558, "grad_norm": 0.4358902871608734, "learning_rate": 0.00048086834733893557, "loss": 0.3908, "step": 18644 }, { "epoch": 10.416201117318435, "grad_norm": 0.45682021975517273, "learning_rate": 0.0004808403361344538, "loss": 0.4088, "step": 18645 }, { "epoch": 10.416759776536313, "grad_norm": 0.41442611813545227, "learning_rate": 0.00048081232492997204, "loss": 0.3955, "step": 18646 }, { "epoch": 10.41731843575419, "grad_norm": 1.1255244016647339, "learning_rate": 0.0004807843137254902, "loss": 0.4282, "step": 18647 }, { "epoch": 10.417877094972066, "grad_norm": 0.8667262196540833, "learning_rate": 0.00048075630252100845, "loss": 0.4312, "step": 18648 }, { "epoch": 10.418435754189945, "grad_norm": 0.41436898708343506, "learning_rate": 0.0004807282913165266, "loss": 0.4442, "step": 18649 }, { "epoch": 10.418994413407821, "grad_norm": 0.638525128364563, "learning_rate": 0.0004807002801120448, "loss": 0.4361, "step": 18650 }, { "epoch": 10.419553072625698, "grad_norm": 1.5867396593093872, "learning_rate": 0.00048067226890756307, "loss": 0.4266, "step": 18651 }, { "epoch": 10.420111731843576, "grad_norm": 0.5097740292549133, "learning_rate": 0.0004806442577030812, "loss": 0.3818, "step": 18652 }, { "epoch": 10.420670391061453, "grad_norm": 0.49930477142333984, "learning_rate": 0.0004806162464985995, "loss": 0.3885, "step": 18653 }, { "epoch": 10.42122905027933, "grad_norm": 0.49806830286979675, "learning_rate": 0.0004805882352941177, "loss": 0.3605, "step": 18654 }, { "epoch": 10.421787709497206, "grad_norm": 0.37628641724586487, "learning_rate": 0.00048056022408963584, "loss": 0.3592, "step": 18655 }, { "epoch": 10.422346368715084, "grad_norm": 0.3944506347179413, "learning_rate": 0.0004805322128851541, "loss": 0.4147, "step": 18656 }, { "epoch": 10.422905027932961, "grad_norm": 1.5315886735916138, "learning_rate": 0.00048050420168067225, "loss": 0.4642, "step": 18657 }, { "epoch": 10.423463687150837, "grad_norm": 0.3762303590774536, "learning_rate": 0.0004804761904761905, "loss": 0.4504, "step": 18658 }, { "epoch": 10.424022346368716, "grad_norm": 0.3893774747848511, "learning_rate": 0.0004804481792717087, "loss": 0.3136, "step": 18659 }, { "epoch": 10.424581005586592, "grad_norm": 0.48417970538139343, "learning_rate": 0.00048042016806722687, "loss": 0.4474, "step": 18660 }, { "epoch": 10.425139664804469, "grad_norm": 0.6687957644462585, "learning_rate": 0.00048039215686274513, "loss": 0.5093, "step": 18661 }, { "epoch": 10.425698324022346, "grad_norm": 0.550114631652832, "learning_rate": 0.00048036414565826333, "loss": 0.6183, "step": 18662 }, { "epoch": 10.426256983240224, "grad_norm": 0.48493045568466187, "learning_rate": 0.00048033613445378154, "loss": 0.4293, "step": 18663 }, { "epoch": 10.4268156424581, "grad_norm": 0.6532963514328003, "learning_rate": 0.00048030812324929975, "loss": 0.4161, "step": 18664 }, { "epoch": 10.427374301675977, "grad_norm": 0.5330410599708557, "learning_rate": 0.0004802801120448179, "loss": 0.3695, "step": 18665 }, { "epoch": 10.427932960893855, "grad_norm": 0.5917158126831055, "learning_rate": 0.00048025210084033616, "loss": 0.4935, "step": 18666 }, { "epoch": 10.428491620111732, "grad_norm": 0.4779118299484253, "learning_rate": 0.00048022408963585436, "loss": 0.3319, "step": 18667 }, { "epoch": 10.429050279329608, "grad_norm": 0.6365251541137695, "learning_rate": 0.00048019607843137257, "loss": 0.4135, "step": 18668 }, { "epoch": 10.429608938547487, "grad_norm": 0.42598897218704224, "learning_rate": 0.0004801680672268908, "loss": 0.2796, "step": 18669 }, { "epoch": 10.430167597765363, "grad_norm": 0.44862398505210876, "learning_rate": 0.000480140056022409, "loss": 0.402, "step": 18670 }, { "epoch": 10.43072625698324, "grad_norm": 0.5557389855384827, "learning_rate": 0.0004801120448179272, "loss": 0.535, "step": 18671 }, { "epoch": 10.431284916201117, "grad_norm": 0.41991230845451355, "learning_rate": 0.0004800840336134454, "loss": 0.3874, "step": 18672 }, { "epoch": 10.431843575418995, "grad_norm": 0.8289154767990112, "learning_rate": 0.0004800560224089636, "loss": 0.463, "step": 18673 }, { "epoch": 10.432402234636871, "grad_norm": 0.47423654794692993, "learning_rate": 0.0004800280112044818, "loss": 0.4877, "step": 18674 }, { "epoch": 10.432960893854748, "grad_norm": 1.3550552129745483, "learning_rate": 0.00048, "loss": 0.3922, "step": 18675 }, { "epoch": 10.433519553072626, "grad_norm": 0.518094003200531, "learning_rate": 0.0004799719887955182, "loss": 0.4311, "step": 18676 }, { "epoch": 10.434078212290503, "grad_norm": 0.7114032506942749, "learning_rate": 0.0004799439775910364, "loss": 0.4145, "step": 18677 }, { "epoch": 10.43463687150838, "grad_norm": 0.5246087908744812, "learning_rate": 0.0004799159663865547, "loss": 0.4299, "step": 18678 }, { "epoch": 10.435195530726258, "grad_norm": 0.7234489321708679, "learning_rate": 0.00047988795518207284, "loss": 0.5988, "step": 18679 }, { "epoch": 10.435754189944134, "grad_norm": 1.3226077556610107, "learning_rate": 0.00047985994397759104, "loss": 0.4171, "step": 18680 }, { "epoch": 10.436312849162011, "grad_norm": 0.8794574737548828, "learning_rate": 0.00047983193277310925, "loss": 0.51, "step": 18681 }, { "epoch": 10.436871508379888, "grad_norm": 0.6875120997428894, "learning_rate": 0.00047980392156862745, "loss": 0.4661, "step": 18682 }, { "epoch": 10.437430167597766, "grad_norm": 0.5675374865531921, "learning_rate": 0.0004797759103641457, "loss": 0.4183, "step": 18683 }, { "epoch": 10.437988826815642, "grad_norm": 0.4331020712852478, "learning_rate": 0.00047974789915966387, "loss": 0.3466, "step": 18684 }, { "epoch": 10.438547486033519, "grad_norm": 0.4964342713356018, "learning_rate": 0.00047971988795518207, "loss": 0.473, "step": 18685 }, { "epoch": 10.439106145251397, "grad_norm": 0.3521219491958618, "learning_rate": 0.00047969187675070033, "loss": 0.4089, "step": 18686 }, { "epoch": 10.439664804469274, "grad_norm": 0.7384117841720581, "learning_rate": 0.0004796638655462185, "loss": 0.4892, "step": 18687 }, { "epoch": 10.44022346368715, "grad_norm": 0.3999738097190857, "learning_rate": 0.0004796358543417367, "loss": 0.4303, "step": 18688 }, { "epoch": 10.440782122905027, "grad_norm": 0.6329213380813599, "learning_rate": 0.0004796078431372549, "loss": 0.5215, "step": 18689 }, { "epoch": 10.441340782122905, "grad_norm": 0.4546065330505371, "learning_rate": 0.0004795798319327731, "loss": 0.42, "step": 18690 }, { "epoch": 10.441899441340782, "grad_norm": 0.5098863840103149, "learning_rate": 0.00047955182072829136, "loss": 0.353, "step": 18691 }, { "epoch": 10.442458100558659, "grad_norm": 0.455161452293396, "learning_rate": 0.0004795238095238095, "loss": 0.3969, "step": 18692 }, { "epoch": 10.443016759776537, "grad_norm": 1.7095807790756226, "learning_rate": 0.0004794957983193277, "loss": 0.5034, "step": 18693 }, { "epoch": 10.443575418994413, "grad_norm": 0.44077131152153015, "learning_rate": 0.000479467787114846, "loss": 0.3477, "step": 18694 }, { "epoch": 10.44413407821229, "grad_norm": 0.5123345851898193, "learning_rate": 0.00047943977591036413, "loss": 0.3586, "step": 18695 }, { "epoch": 10.444692737430168, "grad_norm": 0.6031753420829773, "learning_rate": 0.0004794117647058824, "loss": 0.5442, "step": 18696 }, { "epoch": 10.445251396648045, "grad_norm": 0.49947431683540344, "learning_rate": 0.00047938375350140054, "loss": 0.3883, "step": 18697 }, { "epoch": 10.445810055865921, "grad_norm": 0.5277776122093201, "learning_rate": 0.00047935574229691875, "loss": 0.4561, "step": 18698 }, { "epoch": 10.446368715083798, "grad_norm": 0.680150032043457, "learning_rate": 0.000479327731092437, "loss": 0.5531, "step": 18699 }, { "epoch": 10.446927374301676, "grad_norm": 0.8029719591140747, "learning_rate": 0.00047929971988795516, "loss": 0.3867, "step": 18700 }, { "epoch": 10.447486033519553, "grad_norm": 1.9289336204528809, "learning_rate": 0.0004792717086834734, "loss": 0.4727, "step": 18701 }, { "epoch": 10.44804469273743, "grad_norm": 0.9800059199333191, "learning_rate": 0.00047924369747899163, "loss": 0.4341, "step": 18702 }, { "epoch": 10.448603351955308, "grad_norm": 0.39864057302474976, "learning_rate": 0.0004792156862745098, "loss": 0.3679, "step": 18703 }, { "epoch": 10.449162011173184, "grad_norm": 0.4866752326488495, "learning_rate": 0.00047918767507002804, "loss": 0.4556, "step": 18704 }, { "epoch": 10.449720670391061, "grad_norm": 0.8988238573074341, "learning_rate": 0.0004791596638655462, "loss": 0.4758, "step": 18705 }, { "epoch": 10.45027932960894, "grad_norm": 0.39659643173217773, "learning_rate": 0.00047913165266106445, "loss": 0.3788, "step": 18706 }, { "epoch": 10.450837988826816, "grad_norm": 0.4448933005332947, "learning_rate": 0.00047910364145658266, "loss": 0.434, "step": 18707 }, { "epoch": 10.451396648044692, "grad_norm": 0.45187652111053467, "learning_rate": 0.0004790756302521008, "loss": 0.3899, "step": 18708 }, { "epoch": 10.451955307262569, "grad_norm": 0.406544953584671, "learning_rate": 0.00047904761904761907, "loss": 0.4306, "step": 18709 }, { "epoch": 10.452513966480447, "grad_norm": 0.4826112687587738, "learning_rate": 0.0004790196078431373, "loss": 0.5228, "step": 18710 }, { "epoch": 10.453072625698324, "grad_norm": 1.4266471862792969, "learning_rate": 0.0004789915966386555, "loss": 0.4651, "step": 18711 }, { "epoch": 10.4536312849162, "grad_norm": 0.47993677854537964, "learning_rate": 0.0004789635854341737, "loss": 0.3941, "step": 18712 }, { "epoch": 10.454189944134079, "grad_norm": 0.4121052026748657, "learning_rate": 0.00047893557422969184, "loss": 0.4249, "step": 18713 }, { "epoch": 10.454748603351955, "grad_norm": 0.5191360116004944, "learning_rate": 0.0004789075630252101, "loss": 0.4523, "step": 18714 }, { "epoch": 10.455307262569832, "grad_norm": 2.145153522491455, "learning_rate": 0.0004788795518207283, "loss": 0.3553, "step": 18715 }, { "epoch": 10.45586592178771, "grad_norm": 0.5439107418060303, "learning_rate": 0.0004788515406162465, "loss": 0.4296, "step": 18716 }, { "epoch": 10.456424581005587, "grad_norm": 0.5652459263801575, "learning_rate": 0.0004788235294117647, "loss": 0.4462, "step": 18717 }, { "epoch": 10.456983240223463, "grad_norm": 0.9325499534606934, "learning_rate": 0.0004787955182072829, "loss": 0.4919, "step": 18718 }, { "epoch": 10.45754189944134, "grad_norm": 0.6296287178993225, "learning_rate": 0.00047876750700280113, "loss": 0.5705, "step": 18719 }, { "epoch": 10.458100558659218, "grad_norm": 0.44084757566452026, "learning_rate": 0.00047873949579831934, "loss": 0.4614, "step": 18720 }, { "epoch": 10.458659217877095, "grad_norm": 0.40304630994796753, "learning_rate": 0.00047871148459383754, "loss": 0.3985, "step": 18721 }, { "epoch": 10.459217877094972, "grad_norm": 0.9078214168548584, "learning_rate": 0.00047868347338935575, "loss": 0.4369, "step": 18722 }, { "epoch": 10.45977653631285, "grad_norm": 0.7024386525154114, "learning_rate": 0.00047865546218487395, "loss": 0.3923, "step": 18723 }, { "epoch": 10.460335195530726, "grad_norm": 0.49723172187805176, "learning_rate": 0.00047862745098039216, "loss": 0.5408, "step": 18724 }, { "epoch": 10.460893854748603, "grad_norm": 0.626876711845398, "learning_rate": 0.00047859943977591037, "loss": 0.6996, "step": 18725 }, { "epoch": 10.461452513966481, "grad_norm": 0.4237803518772125, "learning_rate": 0.0004785714285714286, "loss": 0.3362, "step": 18726 }, { "epoch": 10.462011173184358, "grad_norm": 0.4565712809562683, "learning_rate": 0.0004785434173669468, "loss": 0.3611, "step": 18727 }, { "epoch": 10.462569832402234, "grad_norm": 2.418124198913574, "learning_rate": 0.000478515406162465, "loss": 0.3753, "step": 18728 }, { "epoch": 10.463128491620111, "grad_norm": 0.3883202373981476, "learning_rate": 0.0004784873949579832, "loss": 0.3862, "step": 18729 }, { "epoch": 10.46368715083799, "grad_norm": 0.3922578692436218, "learning_rate": 0.0004784593837535014, "loss": 0.3851, "step": 18730 }, { "epoch": 10.464245810055866, "grad_norm": 0.4642290771007538, "learning_rate": 0.00047843137254901966, "loss": 0.3958, "step": 18731 }, { "epoch": 10.464804469273743, "grad_norm": 0.47723084688186646, "learning_rate": 0.0004784033613445378, "loss": 0.4896, "step": 18732 }, { "epoch": 10.46536312849162, "grad_norm": 0.41552916169166565, "learning_rate": 0.000478375350140056, "loss": 0.3977, "step": 18733 }, { "epoch": 10.465921787709497, "grad_norm": 0.572848916053772, "learning_rate": 0.0004783473389355743, "loss": 0.3607, "step": 18734 }, { "epoch": 10.466480446927374, "grad_norm": 0.4564259648323059, "learning_rate": 0.0004783193277310924, "loss": 0.4351, "step": 18735 }, { "epoch": 10.46703910614525, "grad_norm": 0.9085177779197693, "learning_rate": 0.0004782913165266107, "loss": 0.4619, "step": 18736 }, { "epoch": 10.467597765363129, "grad_norm": 0.9216148257255554, "learning_rate": 0.00047826330532212884, "loss": 0.4802, "step": 18737 }, { "epoch": 10.468156424581005, "grad_norm": 0.6707174181938171, "learning_rate": 0.00047823529411764704, "loss": 0.3866, "step": 18738 }, { "epoch": 10.468715083798882, "grad_norm": 0.699027955532074, "learning_rate": 0.0004782072829131653, "loss": 0.4813, "step": 18739 }, { "epoch": 10.46927374301676, "grad_norm": 0.6003559231758118, "learning_rate": 0.00047817927170868346, "loss": 0.5355, "step": 18740 }, { "epoch": 10.469832402234637, "grad_norm": 0.5621857643127441, "learning_rate": 0.0004781512605042017, "loss": 0.3269, "step": 18741 }, { "epoch": 10.470391061452514, "grad_norm": 0.7335500121116638, "learning_rate": 0.0004781232492997199, "loss": 0.406, "step": 18742 }, { "epoch": 10.470949720670392, "grad_norm": 2.226038694381714, "learning_rate": 0.0004780952380952381, "loss": 0.4694, "step": 18743 }, { "epoch": 10.471508379888268, "grad_norm": 1.3616586923599243, "learning_rate": 0.00047806722689075633, "loss": 0.42, "step": 18744 }, { "epoch": 10.472067039106145, "grad_norm": 0.562004566192627, "learning_rate": 0.0004780392156862745, "loss": 0.4225, "step": 18745 }, { "epoch": 10.472625698324022, "grad_norm": 0.7751689553260803, "learning_rate": 0.00047801120448179275, "loss": 0.4119, "step": 18746 }, { "epoch": 10.4731843575419, "grad_norm": 0.6460176110267639, "learning_rate": 0.00047798319327731095, "loss": 0.5587, "step": 18747 }, { "epoch": 10.473743016759776, "grad_norm": 0.6039220690727234, "learning_rate": 0.0004779551820728291, "loss": 0.5091, "step": 18748 }, { "epoch": 10.474301675977653, "grad_norm": 0.5342893004417419, "learning_rate": 0.00047792717086834736, "loss": 0.3973, "step": 18749 }, { "epoch": 10.474860335195531, "grad_norm": 1.0211554765701294, "learning_rate": 0.00047789915966386557, "loss": 0.4281, "step": 18750 }, { "epoch": 10.475418994413408, "grad_norm": 0.4941284954547882, "learning_rate": 0.0004778711484593838, "loss": 0.3737, "step": 18751 }, { "epoch": 10.475977653631285, "grad_norm": 0.4917150139808655, "learning_rate": 0.000477843137254902, "loss": 0.4507, "step": 18752 }, { "epoch": 10.476536312849163, "grad_norm": 0.3774113953113556, "learning_rate": 0.00047781512605042013, "loss": 0.3939, "step": 18753 }, { "epoch": 10.47709497206704, "grad_norm": 0.6499609351158142, "learning_rate": 0.0004777871148459384, "loss": 0.4722, "step": 18754 }, { "epoch": 10.477653631284916, "grad_norm": 0.9482568502426147, "learning_rate": 0.0004777591036414566, "loss": 0.5944, "step": 18755 }, { "epoch": 10.478212290502793, "grad_norm": 0.6117006540298462, "learning_rate": 0.0004777310924369748, "loss": 0.4002, "step": 18756 }, { "epoch": 10.478770949720671, "grad_norm": 0.4160557687282562, "learning_rate": 0.000477703081232493, "loss": 0.4083, "step": 18757 }, { "epoch": 10.479329608938547, "grad_norm": 0.5017178654670715, "learning_rate": 0.0004776750700280112, "loss": 0.3784, "step": 18758 }, { "epoch": 10.479888268156424, "grad_norm": 0.6609714031219482, "learning_rate": 0.0004776470588235294, "loss": 0.4072, "step": 18759 }, { "epoch": 10.480446927374302, "grad_norm": 0.6508843302726746, "learning_rate": 0.00047761904761904763, "loss": 0.3628, "step": 18760 }, { "epoch": 10.481005586592179, "grad_norm": 0.9584119915962219, "learning_rate": 0.00047759103641456584, "loss": 0.5122, "step": 18761 }, { "epoch": 10.481564245810056, "grad_norm": 0.5339064002037048, "learning_rate": 0.00047756302521008404, "loss": 0.509, "step": 18762 }, { "epoch": 10.482122905027932, "grad_norm": 0.3849220275878906, "learning_rate": 0.00047753501400560225, "loss": 0.4129, "step": 18763 }, { "epoch": 10.48268156424581, "grad_norm": 0.5345190167427063, "learning_rate": 0.00047750700280112045, "loss": 0.3398, "step": 18764 }, { "epoch": 10.483240223463687, "grad_norm": 1.053317666053772, "learning_rate": 0.00047747899159663866, "loss": 0.3873, "step": 18765 }, { "epoch": 10.483798882681564, "grad_norm": 0.554428219795227, "learning_rate": 0.0004774509803921569, "loss": 0.3478, "step": 18766 }, { "epoch": 10.484357541899442, "grad_norm": 0.624885082244873, "learning_rate": 0.00047742296918767507, "loss": 0.5484, "step": 18767 }, { "epoch": 10.484916201117318, "grad_norm": 0.4288087785243988, "learning_rate": 0.0004773949579831933, "loss": 0.4341, "step": 18768 }, { "epoch": 10.485474860335195, "grad_norm": 0.4760594964027405, "learning_rate": 0.0004773669467787115, "loss": 0.3866, "step": 18769 }, { "epoch": 10.486033519553073, "grad_norm": 0.9241908192634583, "learning_rate": 0.0004773389355742297, "loss": 0.5935, "step": 18770 }, { "epoch": 10.48659217877095, "grad_norm": 0.5899703502655029, "learning_rate": 0.00047731092436974795, "loss": 0.4963, "step": 18771 }, { "epoch": 10.487150837988827, "grad_norm": 0.5740683078765869, "learning_rate": 0.0004772829131652661, "loss": 0.4553, "step": 18772 }, { "epoch": 10.487709497206703, "grad_norm": 0.6702711582183838, "learning_rate": 0.0004772549019607843, "loss": 0.5234, "step": 18773 }, { "epoch": 10.488268156424581, "grad_norm": 1.3503100872039795, "learning_rate": 0.00047722689075630257, "loss": 0.5251, "step": 18774 }, { "epoch": 10.488826815642458, "grad_norm": 0.42755404114723206, "learning_rate": 0.0004771988795518207, "loss": 0.4722, "step": 18775 }, { "epoch": 10.489385474860335, "grad_norm": 1.348813772201538, "learning_rate": 0.000477170868347339, "loss": 0.4844, "step": 18776 }, { "epoch": 10.489944134078213, "grad_norm": 0.3657236397266388, "learning_rate": 0.00047714285714285713, "loss": 0.3683, "step": 18777 }, { "epoch": 10.49050279329609, "grad_norm": 0.42276760935783386, "learning_rate": 0.00047711484593837534, "loss": 0.438, "step": 18778 }, { "epoch": 10.491061452513966, "grad_norm": 0.5383284687995911, "learning_rate": 0.0004770868347338936, "loss": 0.6893, "step": 18779 }, { "epoch": 10.491620111731844, "grad_norm": 0.3489207625389099, "learning_rate": 0.00047705882352941175, "loss": 0.3293, "step": 18780 }, { "epoch": 10.492178770949721, "grad_norm": 0.3855345547199249, "learning_rate": 0.00047703081232493, "loss": 0.3083, "step": 18781 }, { "epoch": 10.492737430167598, "grad_norm": 0.6857742667198181, "learning_rate": 0.0004770028011204482, "loss": 0.4679, "step": 18782 }, { "epoch": 10.493296089385474, "grad_norm": 0.6259848475456238, "learning_rate": 0.00047697478991596637, "loss": 0.4955, "step": 18783 }, { "epoch": 10.493854748603352, "grad_norm": 0.3654603064060211, "learning_rate": 0.00047694677871148463, "loss": 0.4107, "step": 18784 }, { "epoch": 10.494413407821229, "grad_norm": 0.9129793643951416, "learning_rate": 0.0004769187675070028, "loss": 0.3774, "step": 18785 }, { "epoch": 10.494972067039106, "grad_norm": 0.4993589520454407, "learning_rate": 0.00047689075630252104, "loss": 0.3951, "step": 18786 }, { "epoch": 10.495530726256984, "grad_norm": 0.4195460081100464, "learning_rate": 0.00047686274509803925, "loss": 0.5221, "step": 18787 }, { "epoch": 10.49608938547486, "grad_norm": 0.5069847106933594, "learning_rate": 0.0004768347338935574, "loss": 0.3223, "step": 18788 }, { "epoch": 10.496648044692737, "grad_norm": 0.47360026836395264, "learning_rate": 0.00047680672268907566, "loss": 0.3753, "step": 18789 }, { "epoch": 10.497206703910614, "grad_norm": 0.45140454173088074, "learning_rate": 0.00047677871148459386, "loss": 0.5297, "step": 18790 }, { "epoch": 10.497765363128492, "grad_norm": 0.47519221901893616, "learning_rate": 0.00047675070028011207, "loss": 0.516, "step": 18791 }, { "epoch": 10.498324022346369, "grad_norm": 0.4776628017425537, "learning_rate": 0.0004767226890756303, "loss": 0.5112, "step": 18792 }, { "epoch": 10.498882681564245, "grad_norm": 0.4391259253025055, "learning_rate": 0.00047669467787114843, "loss": 0.4241, "step": 18793 }, { "epoch": 10.499441340782123, "grad_norm": Infinity, "learning_rate": 0.00047669467787114843, "loss": 0.44, "step": 18794 }, { "epoch": 10.5, "grad_norm": 1.7532165050506592, "learning_rate": 0.0004766666666666667, "loss": 0.5259, "step": 18795 }, { "epoch": 10.500558659217877, "grad_norm": 0.8160237669944763, "learning_rate": 0.0004766386554621849, "loss": 0.4555, "step": 18796 }, { "epoch": 10.501117318435755, "grad_norm": 0.5965665578842163, "learning_rate": 0.0004766106442577031, "loss": 0.4871, "step": 18797 }, { "epoch": 10.501675977653631, "grad_norm": 0.39612504839897156, "learning_rate": 0.0004765826330532213, "loss": 0.3631, "step": 18798 }, { "epoch": 10.502234636871508, "grad_norm": 0.5372137427330017, "learning_rate": 0.0004765546218487395, "loss": 0.4, "step": 18799 }, { "epoch": 10.502793296089386, "grad_norm": 0.531446099281311, "learning_rate": 0.0004765266106442577, "loss": 0.3325, "step": 18800 }, { "epoch": 10.503351955307263, "grad_norm": 0.6240618228912354, "learning_rate": 0.0004764985994397759, "loss": 0.3937, "step": 18801 }, { "epoch": 10.50391061452514, "grad_norm": 0.5450451970100403, "learning_rate": 0.0004764705882352941, "loss": 0.4917, "step": 18802 }, { "epoch": 10.504469273743016, "grad_norm": 14.657256126403809, "learning_rate": 0.00047644257703081234, "loss": 0.4132, "step": 18803 }, { "epoch": 10.505027932960894, "grad_norm": 0.6727287769317627, "learning_rate": 0.00047641456582633054, "loss": 0.4998, "step": 18804 }, { "epoch": 10.505586592178771, "grad_norm": 0.4053119719028473, "learning_rate": 0.00047638655462184875, "loss": 0.4285, "step": 18805 }, { "epoch": 10.506145251396648, "grad_norm": 1.2318850755691528, "learning_rate": 0.00047635854341736695, "loss": 0.3816, "step": 18806 }, { "epoch": 10.506703910614526, "grad_norm": 0.5370670557022095, "learning_rate": 0.00047633053221288516, "loss": 0.443, "step": 18807 }, { "epoch": 10.507262569832402, "grad_norm": 0.5725485682487488, "learning_rate": 0.00047630252100840337, "loss": 0.3975, "step": 18808 }, { "epoch": 10.507821229050279, "grad_norm": 1.87415611743927, "learning_rate": 0.00047627450980392157, "loss": 0.4543, "step": 18809 }, { "epoch": 10.508379888268156, "grad_norm": 0.5333583354949951, "learning_rate": 0.0004762464985994398, "loss": 0.4313, "step": 18810 }, { "epoch": 10.508938547486034, "grad_norm": 0.4548228085041046, "learning_rate": 0.000476218487394958, "loss": 0.4386, "step": 18811 }, { "epoch": 10.50949720670391, "grad_norm": 0.6111850142478943, "learning_rate": 0.0004761904761904762, "loss": 0.4234, "step": 18812 }, { "epoch": 10.510055865921787, "grad_norm": 0.4857129156589508, "learning_rate": 0.0004761624649859944, "loss": 0.4298, "step": 18813 }, { "epoch": 10.510614525139665, "grad_norm": 1.2505449056625366, "learning_rate": 0.0004761344537815126, "loss": 0.3958, "step": 18814 }, { "epoch": 10.511173184357542, "grad_norm": 0.5724055171012878, "learning_rate": 0.00047610644257703086, "loss": 0.4273, "step": 18815 }, { "epoch": 10.511731843575419, "grad_norm": 0.46530163288116455, "learning_rate": 0.000476078431372549, "loss": 0.3641, "step": 18816 }, { "epoch": 10.512290502793297, "grad_norm": 5.036036491394043, "learning_rate": 0.0004760504201680672, "loss": 0.4276, "step": 18817 }, { "epoch": 10.512849162011173, "grad_norm": 0.4140670895576477, "learning_rate": 0.0004760224089635854, "loss": 0.4816, "step": 18818 }, { "epoch": 10.51340782122905, "grad_norm": 0.5452653169631958, "learning_rate": 0.00047599439775910363, "loss": 0.4379, "step": 18819 }, { "epoch": 10.513966480446927, "grad_norm": 0.5115152597427368, "learning_rate": 0.0004759663865546219, "loss": 0.476, "step": 18820 }, { "epoch": 10.514525139664805, "grad_norm": 0.4841180443763733, "learning_rate": 0.00047593837535014004, "loss": 0.4552, "step": 18821 }, { "epoch": 10.515083798882682, "grad_norm": 0.4810382127761841, "learning_rate": 0.00047591036414565825, "loss": 0.3098, "step": 18822 }, { "epoch": 10.515642458100558, "grad_norm": 0.7918359637260437, "learning_rate": 0.0004758823529411765, "loss": 0.4614, "step": 18823 }, { "epoch": 10.516201117318436, "grad_norm": 0.5926598310470581, "learning_rate": 0.00047585434173669466, "loss": 0.3873, "step": 18824 }, { "epoch": 10.516759776536313, "grad_norm": 3.9362900257110596, "learning_rate": 0.0004758263305322129, "loss": 0.4953, "step": 18825 }, { "epoch": 10.51731843575419, "grad_norm": 0.5332849621772766, "learning_rate": 0.0004757983193277311, "loss": 0.4205, "step": 18826 }, { "epoch": 10.517877094972068, "grad_norm": 2.4900104999542236, "learning_rate": 0.0004757703081232493, "loss": 0.4053, "step": 18827 }, { "epoch": 10.518435754189944, "grad_norm": 0.5412886738777161, "learning_rate": 0.00047574229691876754, "loss": 0.3219, "step": 18828 }, { "epoch": 10.518994413407821, "grad_norm": 0.33584755659103394, "learning_rate": 0.0004757142857142857, "loss": 0.3529, "step": 18829 }, { "epoch": 10.519553072625698, "grad_norm": 0.39209020137786865, "learning_rate": 0.00047568627450980395, "loss": 0.3778, "step": 18830 }, { "epoch": 10.520111731843576, "grad_norm": 0.4009786546230316, "learning_rate": 0.00047565826330532216, "loss": 0.3357, "step": 18831 }, { "epoch": 10.520670391061453, "grad_norm": 0.4256860911846161, "learning_rate": 0.0004756302521008403, "loss": 0.437, "step": 18832 }, { "epoch": 10.521229050279329, "grad_norm": 1.2227140665054321, "learning_rate": 0.00047560224089635857, "loss": 0.4364, "step": 18833 }, { "epoch": 10.521787709497207, "grad_norm": 0.4340318441390991, "learning_rate": 0.0004755742296918767, "loss": 0.3346, "step": 18834 }, { "epoch": 10.522346368715084, "grad_norm": 1.3686751127243042, "learning_rate": 0.000475546218487395, "loss": 0.4971, "step": 18835 }, { "epoch": 10.52290502793296, "grad_norm": 4.517106056213379, "learning_rate": 0.0004755182072829132, "loss": 0.5161, "step": 18836 }, { "epoch": 10.523463687150837, "grad_norm": 0.4715244472026825, "learning_rate": 0.00047549019607843134, "loss": 0.3535, "step": 18837 }, { "epoch": 10.524022346368715, "grad_norm": 0.5791744589805603, "learning_rate": 0.0004754621848739496, "loss": 0.3698, "step": 18838 }, { "epoch": 10.524581005586592, "grad_norm": 1.0592786073684692, "learning_rate": 0.0004754341736694678, "loss": 0.3173, "step": 18839 }, { "epoch": 10.525139664804469, "grad_norm": 0.5131112933158875, "learning_rate": 0.000475406162464986, "loss": 0.4733, "step": 18840 }, { "epoch": 10.525698324022347, "grad_norm": 0.48055174946784973, "learning_rate": 0.0004753781512605042, "loss": 0.4621, "step": 18841 }, { "epoch": 10.526256983240224, "grad_norm": 0.5353572964668274, "learning_rate": 0.00047535014005602237, "loss": 0.5686, "step": 18842 }, { "epoch": 10.5268156424581, "grad_norm": 0.5716177225112915, "learning_rate": 0.00047532212885154063, "loss": 0.5158, "step": 18843 }, { "epoch": 10.527374301675978, "grad_norm": 1.1526488065719604, "learning_rate": 0.00047529411764705884, "loss": 0.3879, "step": 18844 }, { "epoch": 10.527932960893855, "grad_norm": 0.5159631967544556, "learning_rate": 0.00047526610644257704, "loss": 0.428, "step": 18845 }, { "epoch": 10.528491620111732, "grad_norm": 0.3464567959308624, "learning_rate": 0.00047523809523809525, "loss": 0.4076, "step": 18846 }, { "epoch": 10.529050279329608, "grad_norm": 0.5847417712211609, "learning_rate": 0.00047521008403361345, "loss": 0.4315, "step": 18847 }, { "epoch": 10.529608938547486, "grad_norm": 0.43284904956817627, "learning_rate": 0.00047518207282913166, "loss": 0.3904, "step": 18848 }, { "epoch": 10.530167597765363, "grad_norm": 0.5853347778320312, "learning_rate": 0.00047515406162464987, "loss": 0.5065, "step": 18849 }, { "epoch": 10.53072625698324, "grad_norm": 8.45334243774414, "learning_rate": 0.00047512605042016807, "loss": 0.4646, "step": 18850 }, { "epoch": 10.531284916201118, "grad_norm": 0.42541107535362244, "learning_rate": 0.0004750980392156863, "loss": 0.4477, "step": 18851 }, { "epoch": 10.531843575418995, "grad_norm": 0.36790648102760315, "learning_rate": 0.0004750700280112045, "loss": 0.4281, "step": 18852 }, { "epoch": 10.532402234636871, "grad_norm": 0.46153897047042847, "learning_rate": 0.0004750420168067227, "loss": 0.417, "step": 18853 }, { "epoch": 10.53296089385475, "grad_norm": 0.6567018628120422, "learning_rate": 0.0004750140056022409, "loss": 0.5118, "step": 18854 }, { "epoch": 10.533519553072626, "grad_norm": 0.5465989112854004, "learning_rate": 0.00047498599439775916, "loss": 0.3924, "step": 18855 }, { "epoch": 10.534078212290503, "grad_norm": 0.8438539505004883, "learning_rate": 0.0004749579831932773, "loss": 0.5306, "step": 18856 }, { "epoch": 10.53463687150838, "grad_norm": 0.49790751934051514, "learning_rate": 0.0004749299719887955, "loss": 0.482, "step": 18857 }, { "epoch": 10.535195530726257, "grad_norm": 0.4106896221637726, "learning_rate": 0.0004749019607843137, "loss": 0.4669, "step": 18858 }, { "epoch": 10.535754189944134, "grad_norm": 0.42277494072914124, "learning_rate": 0.0004748739495798319, "loss": 0.4363, "step": 18859 }, { "epoch": 10.53631284916201, "grad_norm": 0.712372362613678, "learning_rate": 0.0004748459383753502, "loss": 0.3562, "step": 18860 }, { "epoch": 10.536871508379889, "grad_norm": 0.6095969676971436, "learning_rate": 0.00047481792717086834, "loss": 0.4157, "step": 18861 }, { "epoch": 10.537430167597766, "grad_norm": 0.4902176558971405, "learning_rate": 0.00047478991596638654, "loss": 0.4956, "step": 18862 }, { "epoch": 10.537988826815642, "grad_norm": 0.5600163340568542, "learning_rate": 0.0004747619047619048, "loss": 0.4604, "step": 18863 }, { "epoch": 10.538547486033519, "grad_norm": 0.53652024269104, "learning_rate": 0.00047473389355742296, "loss": 0.3888, "step": 18864 }, { "epoch": 10.539106145251397, "grad_norm": 0.950908899307251, "learning_rate": 0.0004747058823529412, "loss": 0.6144, "step": 18865 }, { "epoch": 10.539664804469274, "grad_norm": 1.8051820993423462, "learning_rate": 0.00047467787114845937, "loss": 0.3821, "step": 18866 }, { "epoch": 10.54022346368715, "grad_norm": 0.35427772998809814, "learning_rate": 0.0004746498599439776, "loss": 0.3084, "step": 18867 }, { "epoch": 10.540782122905028, "grad_norm": 0.42422544956207275, "learning_rate": 0.00047462184873949583, "loss": 0.5019, "step": 18868 }, { "epoch": 10.541340782122905, "grad_norm": 0.6202530860900879, "learning_rate": 0.000474593837535014, "loss": 0.583, "step": 18869 }, { "epoch": 10.541899441340782, "grad_norm": 0.4086197316646576, "learning_rate": 0.00047456582633053225, "loss": 0.441, "step": 18870 }, { "epoch": 10.54245810055866, "grad_norm": 0.4702913165092468, "learning_rate": 0.00047453781512605045, "loss": 0.3333, "step": 18871 }, { "epoch": 10.543016759776537, "grad_norm": 0.420767217874527, "learning_rate": 0.0004745098039215686, "loss": 0.4638, "step": 18872 }, { "epoch": 10.543575418994413, "grad_norm": 0.43494337797164917, "learning_rate": 0.00047448179271708686, "loss": 0.4266, "step": 18873 }, { "epoch": 10.544134078212291, "grad_norm": 0.4264630973339081, "learning_rate": 0.000474453781512605, "loss": 0.4766, "step": 18874 }, { "epoch": 10.544692737430168, "grad_norm": 0.693032443523407, "learning_rate": 0.0004744257703081233, "loss": 0.4934, "step": 18875 }, { "epoch": 10.545251396648045, "grad_norm": 0.4030987024307251, "learning_rate": 0.0004743977591036415, "loss": 0.4482, "step": 18876 }, { "epoch": 10.545810055865921, "grad_norm": 0.4038350284099579, "learning_rate": 0.00047436974789915963, "loss": 0.4302, "step": 18877 }, { "epoch": 10.5463687150838, "grad_norm": 3.9995579719543457, "learning_rate": 0.0004743417366946779, "loss": 0.3512, "step": 18878 }, { "epoch": 10.546927374301676, "grad_norm": 0.6617869138717651, "learning_rate": 0.0004743137254901961, "loss": 0.5645, "step": 18879 }, { "epoch": 10.547486033519553, "grad_norm": 0.6665805578231812, "learning_rate": 0.0004742857142857143, "loss": 0.4271, "step": 18880 }, { "epoch": 10.548044692737431, "grad_norm": 0.43734288215637207, "learning_rate": 0.0004742577030812325, "loss": 0.396, "step": 18881 }, { "epoch": 10.548603351955308, "grad_norm": 0.5726372003555298, "learning_rate": 0.00047422969187675066, "loss": 0.4307, "step": 18882 }, { "epoch": 10.549162011173184, "grad_norm": 0.8013854026794434, "learning_rate": 0.0004742016806722689, "loss": 0.4529, "step": 18883 }, { "epoch": 10.54972067039106, "grad_norm": 0.3717445433139801, "learning_rate": 0.00047417366946778713, "loss": 0.3774, "step": 18884 }, { "epoch": 10.550279329608939, "grad_norm": 0.5159956812858582, "learning_rate": 0.00047414565826330534, "loss": 0.3561, "step": 18885 }, { "epoch": 10.550837988826816, "grad_norm": 0.44090867042541504, "learning_rate": 0.00047411764705882354, "loss": 0.4146, "step": 18886 }, { "epoch": 10.551396648044692, "grad_norm": 0.4632580578327179, "learning_rate": 0.00047408963585434175, "loss": 0.428, "step": 18887 }, { "epoch": 10.55195530726257, "grad_norm": 0.5727682113647461, "learning_rate": 0.00047406162464985995, "loss": 0.5145, "step": 18888 }, { "epoch": 10.552513966480447, "grad_norm": 0.5884736180305481, "learning_rate": 0.00047403361344537816, "loss": 0.4905, "step": 18889 }, { "epoch": 10.553072625698324, "grad_norm": 0.8395491242408752, "learning_rate": 0.0004740056022408964, "loss": 0.455, "step": 18890 }, { "epoch": 10.553631284916202, "grad_norm": 0.5693738460540771, "learning_rate": 0.00047397759103641457, "loss": 0.5395, "step": 18891 }, { "epoch": 10.554189944134079, "grad_norm": 0.6569846272468567, "learning_rate": 0.0004739495798319328, "loss": 0.5621, "step": 18892 }, { "epoch": 10.554748603351955, "grad_norm": 0.7034730315208435, "learning_rate": 0.000473921568627451, "loss": 0.5091, "step": 18893 }, { "epoch": 10.555307262569832, "grad_norm": 0.8309515714645386, "learning_rate": 0.0004738935574229692, "loss": 0.4025, "step": 18894 }, { "epoch": 10.55586592178771, "grad_norm": 1.3515961170196533, "learning_rate": 0.00047386554621848745, "loss": 0.4052, "step": 18895 }, { "epoch": 10.556424581005587, "grad_norm": 0.4653107821941376, "learning_rate": 0.0004738375350140056, "loss": 0.3627, "step": 18896 }, { "epoch": 10.556983240223463, "grad_norm": 0.6703113317489624, "learning_rate": 0.0004738095238095238, "loss": 0.4167, "step": 18897 }, { "epoch": 10.557541899441341, "grad_norm": 0.6055869460105896, "learning_rate": 0.00047378151260504207, "loss": 0.5314, "step": 18898 }, { "epoch": 10.558100558659218, "grad_norm": 0.47751203179359436, "learning_rate": 0.0004737535014005602, "loss": 0.4254, "step": 18899 }, { "epoch": 10.558659217877095, "grad_norm": 0.5802996754646301, "learning_rate": 0.0004737254901960785, "loss": 0.3882, "step": 18900 }, { "epoch": 10.559217877094973, "grad_norm": 0.5866131782531738, "learning_rate": 0.00047369747899159663, "loss": 0.4328, "step": 18901 }, { "epoch": 10.55977653631285, "grad_norm": 0.6149768233299255, "learning_rate": 0.00047366946778711484, "loss": 0.4349, "step": 18902 }, { "epoch": 10.560335195530726, "grad_norm": 0.4731472432613373, "learning_rate": 0.0004736414565826331, "loss": 0.5065, "step": 18903 }, { "epoch": 10.560893854748603, "grad_norm": 1.0369417667388916, "learning_rate": 0.00047361344537815125, "loss": 0.5574, "step": 18904 }, { "epoch": 10.561452513966481, "grad_norm": 0.5057957768440247, "learning_rate": 0.0004735854341736695, "loss": 0.497, "step": 18905 }, { "epoch": 10.562011173184358, "grad_norm": 0.7955731153488159, "learning_rate": 0.0004735574229691877, "loss": 0.5806, "step": 18906 }, { "epoch": 10.562569832402234, "grad_norm": 1.3785350322723389, "learning_rate": 0.00047352941176470587, "loss": 0.6099, "step": 18907 }, { "epoch": 10.563128491620112, "grad_norm": 1.5240799188613892, "learning_rate": 0.00047350140056022413, "loss": 0.493, "step": 18908 }, { "epoch": 10.563687150837989, "grad_norm": 0.989829421043396, "learning_rate": 0.0004734733893557423, "loss": 0.5169, "step": 18909 }, { "epoch": 10.564245810055866, "grad_norm": 0.41497549414634705, "learning_rate": 0.00047344537815126054, "loss": 0.4873, "step": 18910 }, { "epoch": 10.564804469273742, "grad_norm": 0.40413084626197815, "learning_rate": 0.00047341736694677875, "loss": 0.3753, "step": 18911 }, { "epoch": 10.56536312849162, "grad_norm": 0.36074674129486084, "learning_rate": 0.0004733893557422969, "loss": 0.3289, "step": 18912 }, { "epoch": 10.565921787709497, "grad_norm": 0.8511361479759216, "learning_rate": 0.00047336134453781516, "loss": 0.4386, "step": 18913 }, { "epoch": 10.566480446927374, "grad_norm": 0.5242519378662109, "learning_rate": 0.00047333333333333336, "loss": 0.4989, "step": 18914 }, { "epoch": 10.567039106145252, "grad_norm": 0.5416566729545593, "learning_rate": 0.0004733053221288515, "loss": 0.4007, "step": 18915 }, { "epoch": 10.567597765363129, "grad_norm": 0.49826931953430176, "learning_rate": 0.0004732773109243698, "loss": 0.4561, "step": 18916 }, { "epoch": 10.568156424581005, "grad_norm": 0.6546459197998047, "learning_rate": 0.00047324929971988793, "loss": 0.3232, "step": 18917 }, { "epoch": 10.568715083798883, "grad_norm": 0.6619869470596313, "learning_rate": 0.0004732212885154062, "loss": 0.3627, "step": 18918 }, { "epoch": 10.56927374301676, "grad_norm": 0.720614492893219, "learning_rate": 0.0004731932773109244, "loss": 0.4592, "step": 18919 }, { "epoch": 10.569832402234637, "grad_norm": 0.6503860950469971, "learning_rate": 0.00047316526610644255, "loss": 0.3995, "step": 18920 }, { "epoch": 10.570391061452513, "grad_norm": 0.45763805508613586, "learning_rate": 0.0004731372549019608, "loss": 0.3341, "step": 18921 }, { "epoch": 10.570949720670392, "grad_norm": 0.5539149045944214, "learning_rate": 0.000473109243697479, "loss": 0.4985, "step": 18922 }, { "epoch": 10.571508379888268, "grad_norm": 0.3869083523750305, "learning_rate": 0.0004730812324929972, "loss": 0.3519, "step": 18923 }, { "epoch": 10.572067039106145, "grad_norm": 0.45173728466033936, "learning_rate": 0.0004730532212885154, "loss": 0.5502, "step": 18924 }, { "epoch": 10.572625698324023, "grad_norm": 0.47461721301078796, "learning_rate": 0.0004730252100840336, "loss": 0.3923, "step": 18925 }, { "epoch": 10.5731843575419, "grad_norm": 0.7339091897010803, "learning_rate": 0.00047299719887955184, "loss": 0.4437, "step": 18926 }, { "epoch": 10.573743016759776, "grad_norm": 0.4360916316509247, "learning_rate": 0.00047296918767507004, "loss": 0.354, "step": 18927 }, { "epoch": 10.574301675977654, "grad_norm": 0.882824718952179, "learning_rate": 0.00047294117647058825, "loss": 0.417, "step": 18928 }, { "epoch": 10.574860335195531, "grad_norm": 0.5574464797973633, "learning_rate": 0.00047291316526610645, "loss": 0.5044, "step": 18929 }, { "epoch": 10.575418994413408, "grad_norm": 0.8435484170913696, "learning_rate": 0.00047288515406162466, "loss": 0.4042, "step": 18930 }, { "epoch": 10.575977653631284, "grad_norm": 0.6394529938697815, "learning_rate": 0.00047285714285714287, "loss": 0.3711, "step": 18931 }, { "epoch": 10.576536312849163, "grad_norm": 0.9052165746688843, "learning_rate": 0.00047282913165266107, "loss": 0.4115, "step": 18932 }, { "epoch": 10.577094972067039, "grad_norm": 0.5005930066108704, "learning_rate": 0.0004728011204481793, "loss": 0.4264, "step": 18933 }, { "epoch": 10.577653631284916, "grad_norm": 0.8237519264221191, "learning_rate": 0.0004727731092436975, "loss": 0.4763, "step": 18934 }, { "epoch": 10.578212290502794, "grad_norm": 1.2540061473846436, "learning_rate": 0.0004727450980392157, "loss": 0.6188, "step": 18935 }, { "epoch": 10.57877094972067, "grad_norm": 1.9022924900054932, "learning_rate": 0.0004727170868347339, "loss": 0.4723, "step": 18936 }, { "epoch": 10.579329608938547, "grad_norm": 0.49300217628479004, "learning_rate": 0.0004726890756302521, "loss": 0.3986, "step": 18937 }, { "epoch": 10.579888268156424, "grad_norm": 5.604837417602539, "learning_rate": 0.00047266106442577036, "loss": 0.4071, "step": 18938 }, { "epoch": 10.580446927374302, "grad_norm": 0.6023059487342834, "learning_rate": 0.0004726330532212885, "loss": 0.466, "step": 18939 }, { "epoch": 10.581005586592179, "grad_norm": 0.8436830043792725, "learning_rate": 0.0004726050420168067, "loss": 0.3965, "step": 18940 }, { "epoch": 10.581564245810055, "grad_norm": 0.4717503488063812, "learning_rate": 0.0004725770308123249, "loss": 0.4241, "step": 18941 }, { "epoch": 10.582122905027934, "grad_norm": 0.818313717842102, "learning_rate": 0.00047254901960784313, "loss": 0.4704, "step": 18942 }, { "epoch": 10.58268156424581, "grad_norm": 0.45900532603263855, "learning_rate": 0.0004725210084033614, "loss": 0.46, "step": 18943 }, { "epoch": 10.583240223463687, "grad_norm": 0.47023844718933105, "learning_rate": 0.00047249299719887954, "loss": 0.4157, "step": 18944 }, { "epoch": 10.583798882681565, "grad_norm": 0.5904989838600159, "learning_rate": 0.00047246498599439775, "loss": 0.4479, "step": 18945 }, { "epoch": 10.584357541899442, "grad_norm": 0.5782806873321533, "learning_rate": 0.000472436974789916, "loss": 0.4258, "step": 18946 }, { "epoch": 10.584916201117318, "grad_norm": 0.9767335653305054, "learning_rate": 0.00047240896358543416, "loss": 0.4605, "step": 18947 }, { "epoch": 10.585474860335196, "grad_norm": 1.2483271360397339, "learning_rate": 0.0004723809523809524, "loss": 0.5678, "step": 18948 }, { "epoch": 10.586033519553073, "grad_norm": 0.3802148997783661, "learning_rate": 0.0004723529411764706, "loss": 0.4135, "step": 18949 }, { "epoch": 10.58659217877095, "grad_norm": 0.6582540273666382, "learning_rate": 0.0004723249299719888, "loss": 0.4118, "step": 18950 }, { "epoch": 10.587150837988826, "grad_norm": 0.6382595896720886, "learning_rate": 0.00047229691876750704, "loss": 0.43, "step": 18951 }, { "epoch": 10.587709497206705, "grad_norm": 0.43404585123062134, "learning_rate": 0.0004722689075630252, "loss": 0.4691, "step": 18952 }, { "epoch": 10.588268156424581, "grad_norm": 1.5711432695388794, "learning_rate": 0.00047224089635854345, "loss": 0.4473, "step": 18953 }, { "epoch": 10.588826815642458, "grad_norm": 0.5249388217926025, "learning_rate": 0.00047221288515406166, "loss": 0.3789, "step": 18954 }, { "epoch": 10.589385474860336, "grad_norm": 1.7224370241165161, "learning_rate": 0.0004721848739495798, "loss": 0.4746, "step": 18955 }, { "epoch": 10.589944134078213, "grad_norm": 0.6495590806007385, "learning_rate": 0.00047215686274509807, "loss": 0.4295, "step": 18956 }, { "epoch": 10.59050279329609, "grad_norm": 0.6040468215942383, "learning_rate": 0.0004721288515406162, "loss": 0.5346, "step": 18957 }, { "epoch": 10.591061452513966, "grad_norm": 0.6303696036338806, "learning_rate": 0.0004721008403361345, "loss": 0.38, "step": 18958 }, { "epoch": 10.591620111731844, "grad_norm": 0.5952167510986328, "learning_rate": 0.0004720728291316527, "loss": 0.4043, "step": 18959 }, { "epoch": 10.59217877094972, "grad_norm": 0.42092835903167725, "learning_rate": 0.00047204481792717084, "loss": 0.4298, "step": 18960 }, { "epoch": 10.592737430167597, "grad_norm": 3.261221408843994, "learning_rate": 0.0004720168067226891, "loss": 0.4501, "step": 18961 }, { "epoch": 10.593296089385476, "grad_norm": 1.8740071058273315, "learning_rate": 0.0004719887955182073, "loss": 0.4757, "step": 18962 }, { "epoch": 10.593854748603352, "grad_norm": 0.4286591708660126, "learning_rate": 0.0004719607843137255, "loss": 0.3307, "step": 18963 }, { "epoch": 10.594413407821229, "grad_norm": 1.3903018236160278, "learning_rate": 0.0004719327731092437, "loss": 0.4135, "step": 18964 }, { "epoch": 10.594972067039105, "grad_norm": 0.5239282846450806, "learning_rate": 0.00047190476190476187, "loss": 0.3613, "step": 18965 }, { "epoch": 10.595530726256984, "grad_norm": 0.5262951850891113, "learning_rate": 0.00047187675070028013, "loss": 0.4185, "step": 18966 }, { "epoch": 10.59608938547486, "grad_norm": 0.7183969616889954, "learning_rate": 0.00047184873949579834, "loss": 0.3164, "step": 18967 }, { "epoch": 10.596648044692737, "grad_norm": 0.7471548318862915, "learning_rate": 0.00047182072829131654, "loss": 0.4525, "step": 18968 }, { "epoch": 10.597206703910615, "grad_norm": 0.4025716483592987, "learning_rate": 0.00047179271708683475, "loss": 0.4073, "step": 18969 }, { "epoch": 10.597765363128492, "grad_norm": 0.33605897426605225, "learning_rate": 0.00047176470588235295, "loss": 0.3077, "step": 18970 }, { "epoch": 10.598324022346368, "grad_norm": 0.38188832998275757, "learning_rate": 0.00047173669467787116, "loss": 0.4213, "step": 18971 }, { "epoch": 10.598882681564247, "grad_norm": 0.3840387165546417, "learning_rate": 0.00047170868347338937, "loss": 0.4146, "step": 18972 }, { "epoch": 10.599441340782123, "grad_norm": 0.32684099674224854, "learning_rate": 0.00047168067226890757, "loss": 0.3573, "step": 18973 }, { "epoch": 10.6, "grad_norm": 0.4086560308933258, "learning_rate": 0.0004716526610644258, "loss": 0.3264, "step": 18974 }, { "epoch": 10.600558659217878, "grad_norm": 0.953455924987793, "learning_rate": 0.000471624649859944, "loss": 0.3764, "step": 18975 }, { "epoch": 10.601117318435755, "grad_norm": 1.29454505443573, "learning_rate": 0.0004715966386554622, "loss": 0.4365, "step": 18976 }, { "epoch": 10.601675977653631, "grad_norm": 0.34539467096328735, "learning_rate": 0.0004715686274509804, "loss": 0.3728, "step": 18977 }, { "epoch": 10.602234636871508, "grad_norm": 0.7120475769042969, "learning_rate": 0.00047154061624649866, "loss": 0.5695, "step": 18978 }, { "epoch": 10.602793296089386, "grad_norm": 1.0958791971206665, "learning_rate": 0.0004715126050420168, "loss": 0.4471, "step": 18979 }, { "epoch": 10.603351955307263, "grad_norm": 0.5674501657485962, "learning_rate": 0.000471484593837535, "loss": 0.3892, "step": 18980 }, { "epoch": 10.60391061452514, "grad_norm": 2.854255199432373, "learning_rate": 0.0004714565826330532, "loss": 0.4022, "step": 18981 }, { "epoch": 10.604469273743018, "grad_norm": 0.5068051815032959, "learning_rate": 0.0004714285714285714, "loss": 0.4523, "step": 18982 }, { "epoch": 10.605027932960894, "grad_norm": 0.44000405073165894, "learning_rate": 0.0004714005602240897, "loss": 0.458, "step": 18983 }, { "epoch": 10.60558659217877, "grad_norm": 0.508782148361206, "learning_rate": 0.00047137254901960784, "loss": 0.3501, "step": 18984 }, { "epoch": 10.606145251396647, "grad_norm": 0.6872757077217102, "learning_rate": 0.00047134453781512604, "loss": 0.4488, "step": 18985 }, { "epoch": 10.606703910614526, "grad_norm": 0.7772918939590454, "learning_rate": 0.0004713165266106443, "loss": 0.34, "step": 18986 }, { "epoch": 10.607262569832402, "grad_norm": 1.1493984460830688, "learning_rate": 0.00047128851540616246, "loss": 0.3969, "step": 18987 }, { "epoch": 10.607821229050279, "grad_norm": 0.5101087689399719, "learning_rate": 0.0004712605042016807, "loss": 0.4444, "step": 18988 }, { "epoch": 10.608379888268157, "grad_norm": 0.354507178068161, "learning_rate": 0.00047123249299719887, "loss": 0.384, "step": 18989 }, { "epoch": 10.608938547486034, "grad_norm": 0.5652009844779968, "learning_rate": 0.0004712044817927171, "loss": 0.4034, "step": 18990 }, { "epoch": 10.60949720670391, "grad_norm": 1.1361922025680542, "learning_rate": 0.00047117647058823533, "loss": 0.5921, "step": 18991 }, { "epoch": 10.610055865921789, "grad_norm": 0.4728354215621948, "learning_rate": 0.0004711484593837535, "loss": 0.4261, "step": 18992 }, { "epoch": 10.610614525139665, "grad_norm": 0.40010306239128113, "learning_rate": 0.00047112044817927175, "loss": 0.4103, "step": 18993 }, { "epoch": 10.611173184357542, "grad_norm": 13.812115669250488, "learning_rate": 0.00047109243697478995, "loss": 0.4396, "step": 18994 }, { "epoch": 10.611731843575418, "grad_norm": 0.5026459693908691, "learning_rate": 0.0004710644257703081, "loss": 0.4042, "step": 18995 }, { "epoch": 10.612290502793297, "grad_norm": 0.4620753824710846, "learning_rate": 0.00047103641456582636, "loss": 0.4696, "step": 18996 }, { "epoch": 10.612849162011173, "grad_norm": 0.864603579044342, "learning_rate": 0.0004710084033613445, "loss": 0.5172, "step": 18997 }, { "epoch": 10.61340782122905, "grad_norm": 2.830671787261963, "learning_rate": 0.0004709803921568628, "loss": 0.3802, "step": 18998 }, { "epoch": 10.613966480446928, "grad_norm": 0.5160785913467407, "learning_rate": 0.000470952380952381, "loss": 0.5051, "step": 18999 }, { "epoch": 10.614525139664805, "grad_norm": 8.048046112060547, "learning_rate": 0.00047092436974789913, "loss": 0.417, "step": 19000 }, { "epoch": 10.614525139664805, "eval_cer": 0.08926603602718951, "eval_loss": 0.3345629572868347, "eval_runtime": 55.3272, "eval_samples_per_second": 82.021, "eval_steps_per_second": 5.133, "eval_wer": 0.35300689924862433, "step": 19000 }, { "epoch": 10.615083798882681, "grad_norm": 0.415183961391449, "learning_rate": 0.0004708963585434174, "loss": 0.5224, "step": 19001 }, { "epoch": 10.61564245810056, "grad_norm": 0.5513841509819031, "learning_rate": 0.0004708683473389356, "loss": 0.4743, "step": 19002 }, { "epoch": 10.616201117318436, "grad_norm": 1.6740401983261108, "learning_rate": 0.0004708403361344538, "loss": 0.5792, "step": 19003 }, { "epoch": 10.616759776536313, "grad_norm": 0.4966926574707031, "learning_rate": 0.000470812324929972, "loss": 0.4092, "step": 19004 }, { "epoch": 10.61731843575419, "grad_norm": 0.5723060369491577, "learning_rate": 0.00047078431372549016, "loss": 0.4056, "step": 19005 }, { "epoch": 10.617877094972068, "grad_norm": 0.4851645231246948, "learning_rate": 0.0004707563025210084, "loss": 0.4239, "step": 19006 }, { "epoch": 10.618435754189944, "grad_norm": 1.4907587766647339, "learning_rate": 0.00047072829131652663, "loss": 0.4596, "step": 19007 }, { "epoch": 10.61899441340782, "grad_norm": 0.5339739918708801, "learning_rate": 0.00047070028011204484, "loss": 0.3565, "step": 19008 }, { "epoch": 10.619553072625699, "grad_norm": 0.6172865033149719, "learning_rate": 0.00047067226890756304, "loss": 0.3941, "step": 19009 }, { "epoch": 10.620111731843576, "grad_norm": 0.7645795941352844, "learning_rate": 0.00047064425770308125, "loss": 0.5722, "step": 19010 }, { "epoch": 10.620670391061452, "grad_norm": 0.5453724265098572, "learning_rate": 0.00047061624649859945, "loss": 0.5565, "step": 19011 }, { "epoch": 10.621229050279329, "grad_norm": 0.6668143272399902, "learning_rate": 0.00047058823529411766, "loss": 0.4093, "step": 19012 }, { "epoch": 10.621787709497207, "grad_norm": 2.8203282356262207, "learning_rate": 0.00047056022408963587, "loss": 0.5476, "step": 19013 }, { "epoch": 10.622346368715084, "grad_norm": 0.5626693367958069, "learning_rate": 0.00047053221288515407, "loss": 0.4818, "step": 19014 }, { "epoch": 10.62290502793296, "grad_norm": 0.4832860827445984, "learning_rate": 0.0004705042016806723, "loss": 0.4534, "step": 19015 }, { "epoch": 10.623463687150839, "grad_norm": 0.6268319487571716, "learning_rate": 0.0004704761904761905, "loss": 0.3492, "step": 19016 }, { "epoch": 10.624022346368715, "grad_norm": 0.43191006779670715, "learning_rate": 0.0004704481792717087, "loss": 0.4319, "step": 19017 }, { "epoch": 10.624581005586592, "grad_norm": 0.6733386516571045, "learning_rate": 0.00047042016806722695, "loss": 0.546, "step": 19018 }, { "epoch": 10.62513966480447, "grad_norm": 0.3711135983467102, "learning_rate": 0.0004703921568627451, "loss": 0.3334, "step": 19019 }, { "epoch": 10.625698324022347, "grad_norm": 0.37633016705513, "learning_rate": 0.0004703641456582633, "loss": 0.374, "step": 19020 }, { "epoch": 10.626256983240223, "grad_norm": 0.6560848951339722, "learning_rate": 0.0004703361344537815, "loss": 0.4801, "step": 19021 }, { "epoch": 10.6268156424581, "grad_norm": 0.5283510088920593, "learning_rate": 0.0004703081232492997, "loss": 0.4622, "step": 19022 }, { "epoch": 10.627374301675978, "grad_norm": 0.62628573179245, "learning_rate": 0.0004702801120448179, "loss": 0.4052, "step": 19023 }, { "epoch": 10.627932960893855, "grad_norm": 0.9499527215957642, "learning_rate": 0.00047025210084033613, "loss": 0.4045, "step": 19024 }, { "epoch": 10.628491620111731, "grad_norm": 0.4918128550052643, "learning_rate": 0.00047022408963585434, "loss": 0.3417, "step": 19025 }, { "epoch": 10.62905027932961, "grad_norm": 2.0121538639068604, "learning_rate": 0.0004701960784313726, "loss": 0.4088, "step": 19026 }, { "epoch": 10.629608938547486, "grad_norm": 0.3911835253238678, "learning_rate": 0.00047016806722689075, "loss": 0.3493, "step": 19027 }, { "epoch": 10.630167597765363, "grad_norm": 0.519016444683075, "learning_rate": 0.00047014005602240896, "loss": 0.4779, "step": 19028 }, { "epoch": 10.630726256983241, "grad_norm": 2.3829596042633057, "learning_rate": 0.00047011204481792716, "loss": 0.4247, "step": 19029 }, { "epoch": 10.631284916201118, "grad_norm": 0.5463372468948364, "learning_rate": 0.00047008403361344537, "loss": 0.4618, "step": 19030 }, { "epoch": 10.631843575418994, "grad_norm": 0.564556896686554, "learning_rate": 0.00047005602240896363, "loss": 0.4602, "step": 19031 }, { "epoch": 10.63240223463687, "grad_norm": 1.4305754899978638, "learning_rate": 0.0004700280112044818, "loss": 0.4028, "step": 19032 }, { "epoch": 10.632960893854749, "grad_norm": 0.5458186864852905, "learning_rate": 0.00047, "loss": 0.4983, "step": 19033 }, { "epoch": 10.633519553072626, "grad_norm": 1.3629544973373413, "learning_rate": 0.00046997198879551825, "loss": 0.7202, "step": 19034 }, { "epoch": 10.634078212290502, "grad_norm": 0.40971869230270386, "learning_rate": 0.0004699439775910364, "loss": 0.4323, "step": 19035 }, { "epoch": 10.63463687150838, "grad_norm": 0.5169001221656799, "learning_rate": 0.00046991596638655466, "loss": 0.3726, "step": 19036 }, { "epoch": 10.635195530726257, "grad_norm": 5.221506118774414, "learning_rate": 0.0004698879551820728, "loss": 0.4903, "step": 19037 }, { "epoch": 10.635754189944134, "grad_norm": 0.4050253629684448, "learning_rate": 0.000469859943977591, "loss": 0.4436, "step": 19038 }, { "epoch": 10.63631284916201, "grad_norm": 0.3868137300014496, "learning_rate": 0.0004698319327731093, "loss": 0.366, "step": 19039 }, { "epoch": 10.636871508379889, "grad_norm": 55.83261489868164, "learning_rate": 0.00046980392156862743, "loss": 0.4528, "step": 19040 }, { "epoch": 10.637430167597765, "grad_norm": 1.1053935289382935, "learning_rate": 0.0004697759103641457, "loss": 0.4221, "step": 19041 }, { "epoch": 10.637988826815642, "grad_norm": 1.2547225952148438, "learning_rate": 0.0004697478991596639, "loss": 0.3847, "step": 19042 }, { "epoch": 10.63854748603352, "grad_norm": 0.48490244150161743, "learning_rate": 0.00046971988795518205, "loss": 0.3517, "step": 19043 }, { "epoch": 10.639106145251397, "grad_norm": 0.3545001149177551, "learning_rate": 0.0004696918767507003, "loss": 0.4162, "step": 19044 }, { "epoch": 10.639664804469273, "grad_norm": 0.5452660322189331, "learning_rate": 0.00046966386554621846, "loss": 0.3535, "step": 19045 }, { "epoch": 10.640223463687152, "grad_norm": 0.3560701906681061, "learning_rate": 0.0004696358543417367, "loss": 0.4084, "step": 19046 }, { "epoch": 10.640782122905028, "grad_norm": 0.32805687189102173, "learning_rate": 0.0004696078431372549, "loss": 0.4022, "step": 19047 }, { "epoch": 10.641340782122905, "grad_norm": 0.5181145668029785, "learning_rate": 0.0004695798319327731, "loss": 0.4451, "step": 19048 }, { "epoch": 10.641899441340783, "grad_norm": 0.49063727259635925, "learning_rate": 0.00046955182072829134, "loss": 0.5393, "step": 19049 }, { "epoch": 10.64245810055866, "grad_norm": 0.6111874580383301, "learning_rate": 0.00046952380952380954, "loss": 0.385, "step": 19050 }, { "epoch": 10.643016759776536, "grad_norm": 0.5436881184577942, "learning_rate": 0.00046949579831932775, "loss": 0.4127, "step": 19051 }, { "epoch": 10.643575418994413, "grad_norm": 0.8494965434074402, "learning_rate": 0.00046946778711484595, "loss": 0.4366, "step": 19052 }, { "epoch": 10.644134078212291, "grad_norm": 0.4573822021484375, "learning_rate": 0.0004694397759103641, "loss": 0.4255, "step": 19053 }, { "epoch": 10.644692737430168, "grad_norm": 0.3451094925403595, "learning_rate": 0.00046941176470588237, "loss": 0.3417, "step": 19054 }, { "epoch": 10.645251396648044, "grad_norm": 0.36737358570098877, "learning_rate": 0.00046938375350140057, "loss": 0.4192, "step": 19055 }, { "epoch": 10.645810055865923, "grad_norm": 0.5293173789978027, "learning_rate": 0.0004693557422969188, "loss": 0.4484, "step": 19056 }, { "epoch": 10.6463687150838, "grad_norm": 0.44499993324279785, "learning_rate": 0.000469327731092437, "loss": 0.4155, "step": 19057 }, { "epoch": 10.646927374301676, "grad_norm": 0.6773020625114441, "learning_rate": 0.0004692997198879552, "loss": 0.4605, "step": 19058 }, { "epoch": 10.647486033519552, "grad_norm": 0.4028243124485016, "learning_rate": 0.0004692717086834734, "loss": 0.3437, "step": 19059 }, { "epoch": 10.64804469273743, "grad_norm": 0.6818606853485107, "learning_rate": 0.0004692436974789916, "loss": 0.4542, "step": 19060 }, { "epoch": 10.648603351955307, "grad_norm": 2.3583338260650635, "learning_rate": 0.0004692156862745098, "loss": 0.5257, "step": 19061 }, { "epoch": 10.649162011173184, "grad_norm": 0.4159480929374695, "learning_rate": 0.000469187675070028, "loss": 0.4626, "step": 19062 }, { "epoch": 10.649720670391062, "grad_norm": 0.44404101371765137, "learning_rate": 0.0004691596638655462, "loss": 0.3446, "step": 19063 }, { "epoch": 10.650279329608939, "grad_norm": 1.7233636379241943, "learning_rate": 0.0004691316526610644, "loss": 0.292, "step": 19064 }, { "epoch": 10.650837988826815, "grad_norm": 1.3191317319869995, "learning_rate": 0.00046910364145658263, "loss": 0.4955, "step": 19065 }, { "epoch": 10.651396648044694, "grad_norm": 0.7556615471839905, "learning_rate": 0.0004690756302521009, "loss": 0.4689, "step": 19066 }, { "epoch": 10.65195530726257, "grad_norm": 0.43262651562690735, "learning_rate": 0.00046904761904761904, "loss": 0.4419, "step": 19067 }, { "epoch": 10.652513966480447, "grad_norm": 0.4974231421947479, "learning_rate": 0.00046901960784313725, "loss": 0.3429, "step": 19068 }, { "epoch": 10.653072625698323, "grad_norm": 0.5129490494728088, "learning_rate": 0.00046899159663865546, "loss": 0.5127, "step": 19069 }, { "epoch": 10.653631284916202, "grad_norm": 0.5030393004417419, "learning_rate": 0.00046896358543417366, "loss": 0.4539, "step": 19070 }, { "epoch": 10.654189944134078, "grad_norm": 0.4107878506183624, "learning_rate": 0.0004689355742296919, "loss": 0.3833, "step": 19071 }, { "epoch": 10.654748603351955, "grad_norm": 0.46170878410339355, "learning_rate": 0.0004689075630252101, "loss": 0.3309, "step": 19072 }, { "epoch": 10.655307262569833, "grad_norm": 0.8438941240310669, "learning_rate": 0.0004688795518207283, "loss": 0.5224, "step": 19073 }, { "epoch": 10.65586592178771, "grad_norm": 0.7511799335479736, "learning_rate": 0.00046885154061624654, "loss": 0.4567, "step": 19074 }, { "epoch": 10.656424581005586, "grad_norm": 2.3624446392059326, "learning_rate": 0.0004688235294117647, "loss": 0.4491, "step": 19075 }, { "epoch": 10.656983240223465, "grad_norm": 0.5792214274406433, "learning_rate": 0.00046879551820728295, "loss": 0.4607, "step": 19076 }, { "epoch": 10.657541899441341, "grad_norm": 0.8547051548957825, "learning_rate": 0.0004687675070028011, "loss": 0.4094, "step": 19077 }, { "epoch": 10.658100558659218, "grad_norm": 0.6848317980766296, "learning_rate": 0.0004687394957983193, "loss": 0.4159, "step": 19078 }, { "epoch": 10.658659217877094, "grad_norm": 0.5106977224349976, "learning_rate": 0.00046871148459383757, "loss": 0.5057, "step": 19079 }, { "epoch": 10.659217877094973, "grad_norm": 0.6351894736289978, "learning_rate": 0.0004686834733893557, "loss": 0.369, "step": 19080 }, { "epoch": 10.65977653631285, "grad_norm": 0.5743239521980286, "learning_rate": 0.000468655462184874, "loss": 0.5266, "step": 19081 }, { "epoch": 10.660335195530726, "grad_norm": 0.5067649483680725, "learning_rate": 0.0004686274509803922, "loss": 0.4256, "step": 19082 }, { "epoch": 10.660893854748604, "grad_norm": 0.41858527064323425, "learning_rate": 0.00046859943977591034, "loss": 0.4271, "step": 19083 }, { "epoch": 10.66145251396648, "grad_norm": 0.7740573883056641, "learning_rate": 0.0004685714285714286, "loss": 0.5517, "step": 19084 }, { "epoch": 10.662011173184357, "grad_norm": 0.49993398785591125, "learning_rate": 0.00046854341736694675, "loss": 0.4022, "step": 19085 }, { "epoch": 10.662569832402234, "grad_norm": 0.49791771173477173, "learning_rate": 0.000468515406162465, "loss": 0.4383, "step": 19086 }, { "epoch": 10.663128491620112, "grad_norm": 0.8350551128387451, "learning_rate": 0.0004684873949579832, "loss": 0.4659, "step": 19087 }, { "epoch": 10.663687150837989, "grad_norm": 1.5306137800216675, "learning_rate": 0.00046845938375350137, "loss": 0.3581, "step": 19088 }, { "epoch": 10.664245810055865, "grad_norm": 0.6567550301551819, "learning_rate": 0.00046843137254901963, "loss": 0.3781, "step": 19089 }, { "epoch": 10.664804469273744, "grad_norm": 0.8038487434387207, "learning_rate": 0.00046840336134453784, "loss": 0.5969, "step": 19090 }, { "epoch": 10.66536312849162, "grad_norm": 0.6921828389167786, "learning_rate": 0.00046837535014005604, "loss": 0.3682, "step": 19091 }, { "epoch": 10.665921787709497, "grad_norm": 0.4302246868610382, "learning_rate": 0.00046834733893557425, "loss": 0.3922, "step": 19092 }, { "epoch": 10.666480446927375, "grad_norm": 0.5351213812828064, "learning_rate": 0.0004683193277310924, "loss": 0.5228, "step": 19093 }, { "epoch": 10.667039106145252, "grad_norm": 0.4133875370025635, "learning_rate": 0.00046829131652661066, "loss": 0.4169, "step": 19094 }, { "epoch": 10.667597765363128, "grad_norm": 0.6462990045547485, "learning_rate": 0.00046826330532212887, "loss": 0.5254, "step": 19095 }, { "epoch": 10.668156424581005, "grad_norm": 1.2833349704742432, "learning_rate": 0.00046823529411764707, "loss": 0.3988, "step": 19096 }, { "epoch": 10.668715083798883, "grad_norm": 0.7234786152839661, "learning_rate": 0.0004682072829131653, "loss": 0.4706, "step": 19097 }, { "epoch": 10.66927374301676, "grad_norm": 0.7720224857330322, "learning_rate": 0.0004681792717086835, "loss": 0.5365, "step": 19098 }, { "epoch": 10.669832402234636, "grad_norm": 0.4070813059806824, "learning_rate": 0.0004681512605042017, "loss": 0.3656, "step": 19099 }, { "epoch": 10.670391061452515, "grad_norm": 0.45597848296165466, "learning_rate": 0.0004681232492997199, "loss": 0.4102, "step": 19100 }, { "epoch": 10.670949720670391, "grad_norm": 0.5677588582038879, "learning_rate": 0.00046809523809523816, "loss": 0.5528, "step": 19101 }, { "epoch": 10.671508379888268, "grad_norm": 0.5414113402366638, "learning_rate": 0.0004680672268907563, "loss": 0.4445, "step": 19102 }, { "epoch": 10.672067039106146, "grad_norm": 0.47070086002349854, "learning_rate": 0.0004680392156862745, "loss": 0.4174, "step": 19103 }, { "epoch": 10.672625698324023, "grad_norm": 0.6463845372200012, "learning_rate": 0.0004680112044817927, "loss": 0.3636, "step": 19104 }, { "epoch": 10.6731843575419, "grad_norm": 0.34276607632637024, "learning_rate": 0.0004679831932773109, "loss": 0.3433, "step": 19105 }, { "epoch": 10.673743016759776, "grad_norm": 0.47840774059295654, "learning_rate": 0.0004679551820728292, "loss": 0.3954, "step": 19106 }, { "epoch": 10.674301675977654, "grad_norm": 0.7085204720497131, "learning_rate": 0.00046792717086834734, "loss": 0.3905, "step": 19107 }, { "epoch": 10.67486033519553, "grad_norm": 1.1861666440963745, "learning_rate": 0.00046789915966386554, "loss": 0.4145, "step": 19108 }, { "epoch": 10.675418994413407, "grad_norm": 0.3899077773094177, "learning_rate": 0.0004678711484593838, "loss": 0.3976, "step": 19109 }, { "epoch": 10.675977653631286, "grad_norm": 0.45743659138679504, "learning_rate": 0.00046784313725490196, "loss": 0.3251, "step": 19110 }, { "epoch": 10.676536312849162, "grad_norm": 0.45469725131988525, "learning_rate": 0.0004678151260504202, "loss": 0.463, "step": 19111 }, { "epoch": 10.677094972067039, "grad_norm": 0.3300894498825073, "learning_rate": 0.00046778711484593837, "loss": 0.385, "step": 19112 }, { "epoch": 10.677653631284915, "grad_norm": 0.4054047465324402, "learning_rate": 0.0004677591036414566, "loss": 0.4341, "step": 19113 }, { "epoch": 10.678212290502794, "grad_norm": 0.47632908821105957, "learning_rate": 0.00046773109243697483, "loss": 0.2723, "step": 19114 }, { "epoch": 10.67877094972067, "grad_norm": 1.205341100692749, "learning_rate": 0.000467703081232493, "loss": 0.5243, "step": 19115 }, { "epoch": 10.679329608938547, "grad_norm": 0.3325849771499634, "learning_rate": 0.00046767507002801125, "loss": 0.3289, "step": 19116 }, { "epoch": 10.679888268156425, "grad_norm": 0.5517097115516663, "learning_rate": 0.00046764705882352945, "loss": 0.3591, "step": 19117 }, { "epoch": 10.680446927374302, "grad_norm": 2.2804384231567383, "learning_rate": 0.0004676190476190476, "loss": 0.3, "step": 19118 }, { "epoch": 10.681005586592178, "grad_norm": 0.45297345519065857, "learning_rate": 0.00046759103641456586, "loss": 0.3275, "step": 19119 }, { "epoch": 10.681564245810057, "grad_norm": 0.47264283895492554, "learning_rate": 0.000467563025210084, "loss": 0.4513, "step": 19120 }, { "epoch": 10.682122905027933, "grad_norm": 0.6037371754646301, "learning_rate": 0.0004675350140056023, "loss": 0.3924, "step": 19121 }, { "epoch": 10.68268156424581, "grad_norm": 0.44970548152923584, "learning_rate": 0.0004675070028011205, "loss": 0.4976, "step": 19122 }, { "epoch": 10.683240223463688, "grad_norm": 0.38732364773750305, "learning_rate": 0.00046747899159663863, "loss": 0.423, "step": 19123 }, { "epoch": 10.683798882681565, "grad_norm": 0.6231667995452881, "learning_rate": 0.0004674509803921569, "loss": 0.5034, "step": 19124 }, { "epoch": 10.684357541899441, "grad_norm": 0.4319717288017273, "learning_rate": 0.0004674229691876751, "loss": 0.3598, "step": 19125 }, { "epoch": 10.684916201117318, "grad_norm": 4.4659013748168945, "learning_rate": 0.0004673949579831933, "loss": 0.4826, "step": 19126 }, { "epoch": 10.685474860335196, "grad_norm": 0.4170956611633301, "learning_rate": 0.0004673669467787115, "loss": 0.4104, "step": 19127 }, { "epoch": 10.686033519553073, "grad_norm": 0.554125964641571, "learning_rate": 0.00046733893557422966, "loss": 0.4121, "step": 19128 }, { "epoch": 10.68659217877095, "grad_norm": 0.7523870468139648, "learning_rate": 0.0004673109243697479, "loss": 0.7048, "step": 19129 }, { "epoch": 10.687150837988828, "grad_norm": 0.44762739539146423, "learning_rate": 0.00046728291316526613, "loss": 0.4358, "step": 19130 }, { "epoch": 10.687709497206704, "grad_norm": 0.5005160570144653, "learning_rate": 0.00046725490196078434, "loss": 0.4308, "step": 19131 }, { "epoch": 10.68826815642458, "grad_norm": 0.3986276388168335, "learning_rate": 0.00046722689075630254, "loss": 0.3784, "step": 19132 }, { "epoch": 10.688826815642457, "grad_norm": 0.41815945506095886, "learning_rate": 0.00046719887955182075, "loss": 0.3808, "step": 19133 }, { "epoch": 10.689385474860336, "grad_norm": 0.47712522745132446, "learning_rate": 0.00046717086834733895, "loss": 0.4194, "step": 19134 }, { "epoch": 10.689944134078212, "grad_norm": 0.534307599067688, "learning_rate": 0.00046714285714285716, "loss": 0.3459, "step": 19135 }, { "epoch": 10.690502793296089, "grad_norm": 0.37046006321907043, "learning_rate": 0.0004671148459383753, "loss": 0.4017, "step": 19136 }, { "epoch": 10.691061452513967, "grad_norm": 0.6235345602035522, "learning_rate": 0.00046708683473389357, "loss": 0.3615, "step": 19137 }, { "epoch": 10.691620111731844, "grad_norm": 1.132794976234436, "learning_rate": 0.0004670588235294118, "loss": 0.4584, "step": 19138 }, { "epoch": 10.69217877094972, "grad_norm": 0.5851292610168457, "learning_rate": 0.00046703081232493, "loss": 0.3411, "step": 19139 }, { "epoch": 10.692737430167599, "grad_norm": 0.5423732995986938, "learning_rate": 0.0004670028011204482, "loss": 0.4639, "step": 19140 }, { "epoch": 10.693296089385475, "grad_norm": 1.5003374814987183, "learning_rate": 0.0004669747899159664, "loss": 0.3638, "step": 19141 }, { "epoch": 10.693854748603352, "grad_norm": 0.5174238085746765, "learning_rate": 0.0004669467787114846, "loss": 0.5133, "step": 19142 }, { "epoch": 10.694413407821228, "grad_norm": 0.5892506837844849, "learning_rate": 0.0004669187675070028, "loss": 0.5287, "step": 19143 }, { "epoch": 10.694972067039107, "grad_norm": 0.4213191866874695, "learning_rate": 0.000466890756302521, "loss": 0.4115, "step": 19144 }, { "epoch": 10.695530726256983, "grad_norm": 0.41962504386901855, "learning_rate": 0.0004668627450980392, "loss": 0.4542, "step": 19145 }, { "epoch": 10.69608938547486, "grad_norm": 0.6039366722106934, "learning_rate": 0.0004668347338935574, "loss": 0.4822, "step": 19146 }, { "epoch": 10.696648044692738, "grad_norm": 1.4804936647415161, "learning_rate": 0.00046680672268907563, "loss": 0.586, "step": 19147 }, { "epoch": 10.697206703910615, "grad_norm": 0.5975735187530518, "learning_rate": 0.00046677871148459384, "loss": 0.4225, "step": 19148 }, { "epoch": 10.697765363128491, "grad_norm": 0.9269648790359497, "learning_rate": 0.0004667507002801121, "loss": 0.6056, "step": 19149 }, { "epoch": 10.69832402234637, "grad_norm": 4.277854919433594, "learning_rate": 0.00046672268907563025, "loss": 0.4429, "step": 19150 }, { "epoch": 10.698882681564246, "grad_norm": 0.5252466797828674, "learning_rate": 0.00046669467787114846, "loss": 0.4758, "step": 19151 }, { "epoch": 10.699441340782123, "grad_norm": 0.4072783291339874, "learning_rate": 0.00046666666666666666, "loss": 0.4086, "step": 19152 }, { "epoch": 10.7, "grad_norm": 0.5791612863540649, "learning_rate": 0.00046663865546218487, "loss": 0.4137, "step": 19153 }, { "epoch": 10.700558659217878, "grad_norm": 0.841476321220398, "learning_rate": 0.00046661064425770313, "loss": 0.4458, "step": 19154 }, { "epoch": 10.701117318435754, "grad_norm": 4.3205790519714355, "learning_rate": 0.0004665826330532213, "loss": 0.3965, "step": 19155 }, { "epoch": 10.70167597765363, "grad_norm": 1.5909810066223145, "learning_rate": 0.0004665546218487395, "loss": 0.3965, "step": 19156 }, { "epoch": 10.702234636871509, "grad_norm": 0.4131898880004883, "learning_rate": 0.00046652661064425775, "loss": 0.3952, "step": 19157 }, { "epoch": 10.702793296089386, "grad_norm": 0.9349154233932495, "learning_rate": 0.0004664985994397759, "loss": 0.39, "step": 19158 }, { "epoch": 10.703351955307262, "grad_norm": 0.5243114233016968, "learning_rate": 0.00046647058823529416, "loss": 0.4094, "step": 19159 }, { "epoch": 10.703910614525139, "grad_norm": 0.3909927308559418, "learning_rate": 0.0004664425770308123, "loss": 0.4031, "step": 19160 }, { "epoch": 10.704469273743017, "grad_norm": 0.38771721720695496, "learning_rate": 0.0004664145658263305, "loss": 0.3625, "step": 19161 }, { "epoch": 10.705027932960894, "grad_norm": 0.5744292736053467, "learning_rate": 0.0004663865546218488, "loss": 0.4711, "step": 19162 }, { "epoch": 10.70558659217877, "grad_norm": 0.6499484181404114, "learning_rate": 0.00046635854341736693, "loss": 0.322, "step": 19163 }, { "epoch": 10.706145251396649, "grad_norm": 0.4552537500858307, "learning_rate": 0.0004663305322128852, "loss": 0.3913, "step": 19164 }, { "epoch": 10.706703910614525, "grad_norm": 3.466280460357666, "learning_rate": 0.0004663025210084034, "loss": 0.54, "step": 19165 }, { "epoch": 10.707262569832402, "grad_norm": 0.6018717288970947, "learning_rate": 0.00046627450980392155, "loss": 0.4178, "step": 19166 }, { "epoch": 10.70782122905028, "grad_norm": 1.7603206634521484, "learning_rate": 0.0004662464985994398, "loss": 0.404, "step": 19167 }, { "epoch": 10.708379888268157, "grad_norm": 0.4864920377731323, "learning_rate": 0.00046621848739495796, "loss": 0.4645, "step": 19168 }, { "epoch": 10.708938547486033, "grad_norm": 0.49840936064720154, "learning_rate": 0.0004661904761904762, "loss": 0.4351, "step": 19169 }, { "epoch": 10.70949720670391, "grad_norm": 0.36510297656059265, "learning_rate": 0.0004661624649859944, "loss": 0.4488, "step": 19170 }, { "epoch": 10.710055865921788, "grad_norm": 1.0267553329467773, "learning_rate": 0.0004661344537815126, "loss": 0.3793, "step": 19171 }, { "epoch": 10.710614525139665, "grad_norm": 1.7075783014297485, "learning_rate": 0.00046610644257703084, "loss": 0.4351, "step": 19172 }, { "epoch": 10.711173184357541, "grad_norm": 0.41012072563171387, "learning_rate": 0.00046607843137254904, "loss": 0.3765, "step": 19173 }, { "epoch": 10.71173184357542, "grad_norm": 0.4107798635959625, "learning_rate": 0.00046605042016806725, "loss": 0.4827, "step": 19174 }, { "epoch": 10.712290502793296, "grad_norm": 0.5288774967193604, "learning_rate": 0.00046602240896358545, "loss": 0.4132, "step": 19175 }, { "epoch": 10.712849162011173, "grad_norm": 0.34799283742904663, "learning_rate": 0.0004659943977591036, "loss": 0.3065, "step": 19176 }, { "epoch": 10.713407821229051, "grad_norm": 0.5456564426422119, "learning_rate": 0.00046596638655462187, "loss": 0.419, "step": 19177 }, { "epoch": 10.713966480446928, "grad_norm": 0.4334261417388916, "learning_rate": 0.00046593837535014007, "loss": 0.3324, "step": 19178 }, { "epoch": 10.714525139664804, "grad_norm": 2.0333592891693115, "learning_rate": 0.0004659103641456583, "loss": 0.3161, "step": 19179 }, { "epoch": 10.71508379888268, "grad_norm": 0.40038445591926575, "learning_rate": 0.0004658823529411765, "loss": 0.3818, "step": 19180 }, { "epoch": 10.71564245810056, "grad_norm": 0.3658725321292877, "learning_rate": 0.0004658543417366947, "loss": 0.377, "step": 19181 }, { "epoch": 10.716201117318436, "grad_norm": 1.070756435394287, "learning_rate": 0.0004658263305322129, "loss": 0.3694, "step": 19182 }, { "epoch": 10.716759776536312, "grad_norm": 0.588098406791687, "learning_rate": 0.0004657983193277311, "loss": 0.3662, "step": 19183 }, { "epoch": 10.71731843575419, "grad_norm": 0.6097560524940491, "learning_rate": 0.0004657703081232493, "loss": 0.3477, "step": 19184 }, { "epoch": 10.717877094972067, "grad_norm": 0.42431163787841797, "learning_rate": 0.0004657422969187675, "loss": 0.3821, "step": 19185 }, { "epoch": 10.718435754189944, "grad_norm": 0.48769694566726685, "learning_rate": 0.0004657142857142857, "loss": 0.363, "step": 19186 }, { "epoch": 10.71899441340782, "grad_norm": 0.5009562373161316, "learning_rate": 0.0004656862745098039, "loss": 0.3721, "step": 19187 }, { "epoch": 10.719553072625699, "grad_norm": 0.5543577671051025, "learning_rate": 0.00046565826330532213, "loss": 0.3801, "step": 19188 }, { "epoch": 10.720111731843575, "grad_norm": 0.3637104630470276, "learning_rate": 0.0004656302521008404, "loss": 0.3793, "step": 19189 }, { "epoch": 10.720670391061452, "grad_norm": 0.6036620140075684, "learning_rate": 0.00046560224089635854, "loss": 0.3778, "step": 19190 }, { "epoch": 10.72122905027933, "grad_norm": 0.4180900752544403, "learning_rate": 0.00046557422969187675, "loss": 0.5679, "step": 19191 }, { "epoch": 10.721787709497207, "grad_norm": 0.7165327072143555, "learning_rate": 0.00046554621848739496, "loss": 0.454, "step": 19192 }, { "epoch": 10.722346368715083, "grad_norm": 0.34337225556373596, "learning_rate": 0.00046551820728291316, "loss": 0.4537, "step": 19193 }, { "epoch": 10.722905027932962, "grad_norm": 0.47642239928245544, "learning_rate": 0.0004654901960784314, "loss": 0.464, "step": 19194 }, { "epoch": 10.723463687150838, "grad_norm": 0.4889824390411377, "learning_rate": 0.0004654621848739496, "loss": 0.4008, "step": 19195 }, { "epoch": 10.724022346368715, "grad_norm": 0.6504570245742798, "learning_rate": 0.0004654341736694678, "loss": 0.4218, "step": 19196 }, { "epoch": 10.724581005586593, "grad_norm": 0.5067644715309143, "learning_rate": 0.00046540616246498604, "loss": 0.467, "step": 19197 }, { "epoch": 10.72513966480447, "grad_norm": 0.6029285192489624, "learning_rate": 0.0004653781512605042, "loss": 0.3726, "step": 19198 }, { "epoch": 10.725698324022346, "grad_norm": 0.4918954372406006, "learning_rate": 0.00046535014005602245, "loss": 0.4649, "step": 19199 }, { "epoch": 10.726256983240223, "grad_norm": 0.5521659851074219, "learning_rate": 0.0004653221288515406, "loss": 0.5541, "step": 19200 }, { "epoch": 10.726815642458101, "grad_norm": 0.3751126825809479, "learning_rate": 0.0004652941176470588, "loss": 0.4037, "step": 19201 }, { "epoch": 10.727374301675978, "grad_norm": 0.4620317816734314, "learning_rate": 0.00046526610644257707, "loss": 0.3312, "step": 19202 }, { "epoch": 10.727932960893854, "grad_norm": 2.065751075744629, "learning_rate": 0.0004652380952380952, "loss": 0.5624, "step": 19203 }, { "epoch": 10.728491620111733, "grad_norm": 0.4712981879711151, "learning_rate": 0.0004652100840336135, "loss": 0.3809, "step": 19204 }, { "epoch": 10.72905027932961, "grad_norm": 0.5137621164321899, "learning_rate": 0.0004651820728291317, "loss": 0.5207, "step": 19205 }, { "epoch": 10.729608938547486, "grad_norm": 0.5231024622917175, "learning_rate": 0.00046515406162464984, "loss": 0.424, "step": 19206 }, { "epoch": 10.730167597765362, "grad_norm": 0.3687092065811157, "learning_rate": 0.0004651260504201681, "loss": 0.415, "step": 19207 }, { "epoch": 10.73072625698324, "grad_norm": 0.4202491343021393, "learning_rate": 0.00046509803921568625, "loss": 0.4009, "step": 19208 }, { "epoch": 10.731284916201117, "grad_norm": 1.6730176210403442, "learning_rate": 0.0004650700280112045, "loss": 0.3533, "step": 19209 }, { "epoch": 10.731843575418994, "grad_norm": 1.897058129310608, "learning_rate": 0.0004650420168067227, "loss": 0.4782, "step": 19210 }, { "epoch": 10.732402234636872, "grad_norm": 0.7021287679672241, "learning_rate": 0.00046501400560224087, "loss": 0.387, "step": 19211 }, { "epoch": 10.732960893854749, "grad_norm": 0.29879865050315857, "learning_rate": 0.00046498599439775913, "loss": 0.3061, "step": 19212 }, { "epoch": 10.733519553072625, "grad_norm": 1.0450514554977417, "learning_rate": 0.00046495798319327734, "loss": 0.3492, "step": 19213 }, { "epoch": 10.734078212290502, "grad_norm": 1.3027139902114868, "learning_rate": 0.00046492997198879554, "loss": 0.4735, "step": 19214 }, { "epoch": 10.73463687150838, "grad_norm": 0.501135528087616, "learning_rate": 0.00046490196078431375, "loss": 0.3938, "step": 19215 }, { "epoch": 10.735195530726257, "grad_norm": 0.43792861700057983, "learning_rate": 0.0004648739495798319, "loss": 0.5522, "step": 19216 }, { "epoch": 10.735754189944133, "grad_norm": 0.8639389276504517, "learning_rate": 0.00046484593837535016, "loss": 0.4747, "step": 19217 }, { "epoch": 10.736312849162012, "grad_norm": 0.5563386082649231, "learning_rate": 0.00046481792717086837, "loss": 0.4683, "step": 19218 }, { "epoch": 10.736871508379888, "grad_norm": 0.5339515209197998, "learning_rate": 0.00046478991596638657, "loss": 0.4707, "step": 19219 }, { "epoch": 10.737430167597765, "grad_norm": 0.8274433612823486, "learning_rate": 0.0004647619047619048, "loss": 0.5157, "step": 19220 }, { "epoch": 10.737988826815643, "grad_norm": 0.5010893940925598, "learning_rate": 0.000464733893557423, "loss": 0.4804, "step": 19221 }, { "epoch": 10.73854748603352, "grad_norm": 0.42481040954589844, "learning_rate": 0.0004647058823529412, "loss": 0.459, "step": 19222 }, { "epoch": 10.739106145251396, "grad_norm": 0.4648483395576477, "learning_rate": 0.0004646778711484594, "loss": 0.4011, "step": 19223 }, { "epoch": 10.739664804469275, "grad_norm": 0.5737546682357788, "learning_rate": 0.0004646498599439776, "loss": 0.539, "step": 19224 }, { "epoch": 10.740223463687151, "grad_norm": 0.9265821576118469, "learning_rate": 0.0004646218487394958, "loss": 0.4534, "step": 19225 }, { "epoch": 10.740782122905028, "grad_norm": 0.5134668946266174, "learning_rate": 0.000464593837535014, "loss": 0.4256, "step": 19226 }, { "epoch": 10.741340782122904, "grad_norm": 0.49933183193206787, "learning_rate": 0.0004645658263305322, "loss": 0.4049, "step": 19227 }, { "epoch": 10.741899441340783, "grad_norm": 0.49328574538230896, "learning_rate": 0.0004645378151260504, "loss": 0.5104, "step": 19228 }, { "epoch": 10.74245810055866, "grad_norm": 3.4530794620513916, "learning_rate": 0.0004645098039215687, "loss": 0.4194, "step": 19229 }, { "epoch": 10.743016759776536, "grad_norm": 0.7529295086860657, "learning_rate": 0.00046448179271708684, "loss": 0.3637, "step": 19230 }, { "epoch": 10.743575418994414, "grad_norm": 0.9591073989868164, "learning_rate": 0.00046445378151260504, "loss": 0.3951, "step": 19231 }, { "epoch": 10.74413407821229, "grad_norm": 0.643728494644165, "learning_rate": 0.00046442577030812325, "loss": 0.5053, "step": 19232 }, { "epoch": 10.744692737430167, "grad_norm": 0.8256698846817017, "learning_rate": 0.00046439775910364146, "loss": 0.4126, "step": 19233 }, { "epoch": 10.745251396648044, "grad_norm": 0.3897912800312042, "learning_rate": 0.0004643697478991597, "loss": 0.3792, "step": 19234 }, { "epoch": 10.745810055865922, "grad_norm": 1.7647591829299927, "learning_rate": 0.00046434173669467787, "loss": 0.4318, "step": 19235 }, { "epoch": 10.746368715083799, "grad_norm": 0.5318904519081116, "learning_rate": 0.0004643137254901961, "loss": 0.3695, "step": 19236 }, { "epoch": 10.746927374301675, "grad_norm": 0.6479517221450806, "learning_rate": 0.00046428571428571433, "loss": 0.3973, "step": 19237 }, { "epoch": 10.747486033519554, "grad_norm": 0.38081344962120056, "learning_rate": 0.0004642577030812325, "loss": 0.458, "step": 19238 }, { "epoch": 10.74804469273743, "grad_norm": 0.5555077791213989, "learning_rate": 0.00046422969187675075, "loss": 0.4492, "step": 19239 }, { "epoch": 10.748603351955307, "grad_norm": 0.8116068243980408, "learning_rate": 0.0004642016806722689, "loss": 0.3887, "step": 19240 }, { "epoch": 10.749162011173185, "grad_norm": 0.46870726346969604, "learning_rate": 0.0004641736694677871, "loss": 0.4963, "step": 19241 }, { "epoch": 10.749720670391062, "grad_norm": 0.424879789352417, "learning_rate": 0.00046414565826330536, "loss": 0.4673, "step": 19242 }, { "epoch": 10.750279329608938, "grad_norm": 4.5148820877075195, "learning_rate": 0.0004641176470588235, "loss": 0.5258, "step": 19243 }, { "epoch": 10.750837988826815, "grad_norm": 0.6333886981010437, "learning_rate": 0.0004640896358543418, "loss": 0.4546, "step": 19244 }, { "epoch": 10.751396648044693, "grad_norm": 2.736905097961426, "learning_rate": 0.00046406162464986, "loss": 0.3824, "step": 19245 }, { "epoch": 10.75195530726257, "grad_norm": 0.4120141863822937, "learning_rate": 0.00046403361344537813, "loss": 0.3597, "step": 19246 }, { "epoch": 10.752513966480446, "grad_norm": 0.709510862827301, "learning_rate": 0.0004640056022408964, "loss": 0.4432, "step": 19247 }, { "epoch": 10.753072625698325, "grad_norm": 0.4020548164844513, "learning_rate": 0.00046397759103641455, "loss": 0.4354, "step": 19248 }, { "epoch": 10.753631284916201, "grad_norm": 6.594186305999756, "learning_rate": 0.00046394957983193275, "loss": 0.3966, "step": 19249 }, { "epoch": 10.754189944134078, "grad_norm": 0.4017973840236664, "learning_rate": 0.000463921568627451, "loss": 0.4345, "step": 19250 }, { "epoch": 10.754748603351956, "grad_norm": 1.7600725889205933, "learning_rate": 0.00046389355742296916, "loss": 0.5708, "step": 19251 }, { "epoch": 10.755307262569833, "grad_norm": 0.4477103054523468, "learning_rate": 0.0004638655462184874, "loss": 0.5661, "step": 19252 }, { "epoch": 10.75586592178771, "grad_norm": 0.923815131187439, "learning_rate": 0.00046383753501400563, "loss": 0.5779, "step": 19253 }, { "epoch": 10.756424581005586, "grad_norm": 0.395528644323349, "learning_rate": 0.0004638095238095238, "loss": 0.4131, "step": 19254 }, { "epoch": 10.756983240223464, "grad_norm": 0.5090433359146118, "learning_rate": 0.00046378151260504204, "loss": 0.4368, "step": 19255 }, { "epoch": 10.75754189944134, "grad_norm": 0.7328028678894043, "learning_rate": 0.0004637535014005602, "loss": 0.6518, "step": 19256 }, { "epoch": 10.758100558659217, "grad_norm": 0.7842195630073547, "learning_rate": 0.00046372549019607845, "loss": 0.4425, "step": 19257 }, { "epoch": 10.758659217877096, "grad_norm": 0.4387664198875427, "learning_rate": 0.00046369747899159666, "loss": 0.3423, "step": 19258 }, { "epoch": 10.759217877094972, "grad_norm": 0.6029638051986694, "learning_rate": 0.0004636694677871148, "loss": 0.5207, "step": 19259 }, { "epoch": 10.759776536312849, "grad_norm": 0.43201056122779846, "learning_rate": 0.00046364145658263307, "loss": 0.4241, "step": 19260 }, { "epoch": 10.760335195530725, "grad_norm": 2.0606930255889893, "learning_rate": 0.0004636134453781513, "loss": 0.4359, "step": 19261 }, { "epoch": 10.760893854748604, "grad_norm": 0.5689659714698792, "learning_rate": 0.0004635854341736695, "loss": 0.3455, "step": 19262 }, { "epoch": 10.76145251396648, "grad_norm": 1.1604888439178467, "learning_rate": 0.0004635574229691877, "loss": 0.4831, "step": 19263 }, { "epoch": 10.762011173184357, "grad_norm": 0.467805951833725, "learning_rate": 0.00046352941176470584, "loss": 0.4345, "step": 19264 }, { "epoch": 10.762569832402235, "grad_norm": 13.2154541015625, "learning_rate": 0.0004635014005602241, "loss": 0.4018, "step": 19265 }, { "epoch": 10.763128491620112, "grad_norm": 0.435615599155426, "learning_rate": 0.0004634733893557423, "loss": 0.4652, "step": 19266 }, { "epoch": 10.763687150837988, "grad_norm": 0.39532455801963806, "learning_rate": 0.0004634453781512605, "loss": 0.3569, "step": 19267 }, { "epoch": 10.764245810055867, "grad_norm": 0.4446870982646942, "learning_rate": 0.0004634173669467787, "loss": 0.3963, "step": 19268 }, { "epoch": 10.764804469273743, "grad_norm": 0.7825211882591248, "learning_rate": 0.0004633893557422969, "loss": 0.4749, "step": 19269 }, { "epoch": 10.76536312849162, "grad_norm": 0.6228837370872498, "learning_rate": 0.00046336134453781513, "loss": 0.3874, "step": 19270 }, { "epoch": 10.765921787709498, "grad_norm": 1.3253917694091797, "learning_rate": 0.00046333333333333334, "loss": 0.5808, "step": 19271 }, { "epoch": 10.766480446927375, "grad_norm": 3.4840729236602783, "learning_rate": 0.00046330532212885154, "loss": 0.4553, "step": 19272 }, { "epoch": 10.767039106145251, "grad_norm": 0.46659964323043823, "learning_rate": 0.00046327731092436975, "loss": 0.4037, "step": 19273 }, { "epoch": 10.767597765363128, "grad_norm": 0.37165331840515137, "learning_rate": 0.00046324929971988796, "loss": 0.4175, "step": 19274 }, { "epoch": 10.768156424581006, "grad_norm": 0.6347272992134094, "learning_rate": 0.00046322128851540616, "loss": 0.497, "step": 19275 }, { "epoch": 10.768715083798883, "grad_norm": 1.4658212661743164, "learning_rate": 0.00046319327731092437, "loss": 0.4301, "step": 19276 }, { "epoch": 10.76927374301676, "grad_norm": 0.3680577874183655, "learning_rate": 0.00046316526610644263, "loss": 0.3865, "step": 19277 }, { "epoch": 10.769832402234638, "grad_norm": 0.7368124127388, "learning_rate": 0.0004631372549019608, "loss": 0.3583, "step": 19278 }, { "epoch": 10.770391061452514, "grad_norm": 0.4230891466140747, "learning_rate": 0.000463109243697479, "loss": 0.5101, "step": 19279 }, { "epoch": 10.77094972067039, "grad_norm": 1.1111137866973877, "learning_rate": 0.0004630812324929972, "loss": 0.397, "step": 19280 }, { "epoch": 10.771508379888267, "grad_norm": 0.42726680636405945, "learning_rate": 0.0004630532212885154, "loss": 0.4537, "step": 19281 }, { "epoch": 10.772067039106146, "grad_norm": 0.4583140015602112, "learning_rate": 0.00046302521008403366, "loss": 0.5424, "step": 19282 }, { "epoch": 10.772625698324022, "grad_norm": 0.6325045824050903, "learning_rate": 0.0004629971988795518, "loss": 0.4279, "step": 19283 }, { "epoch": 10.773184357541899, "grad_norm": 0.8716033101081848, "learning_rate": 0.00046296918767507, "loss": 0.5713, "step": 19284 }, { "epoch": 10.773743016759777, "grad_norm": 0.6241661310195923, "learning_rate": 0.0004629411764705883, "loss": 0.4899, "step": 19285 }, { "epoch": 10.774301675977654, "grad_norm": 0.7427290678024292, "learning_rate": 0.00046291316526610643, "loss": 0.4353, "step": 19286 }, { "epoch": 10.77486033519553, "grad_norm": 0.5163410902023315, "learning_rate": 0.0004628851540616247, "loss": 0.5552, "step": 19287 }, { "epoch": 10.775418994413407, "grad_norm": 0.4569401741027832, "learning_rate": 0.00046285714285714284, "loss": 0.3789, "step": 19288 }, { "epoch": 10.775977653631285, "grad_norm": 0.5449912548065186, "learning_rate": 0.00046282913165266105, "loss": 0.3555, "step": 19289 }, { "epoch": 10.776536312849162, "grad_norm": 0.4345206022262573, "learning_rate": 0.0004628011204481793, "loss": 0.3725, "step": 19290 }, { "epoch": 10.777094972067038, "grad_norm": 0.40944766998291016, "learning_rate": 0.00046277310924369746, "loss": 0.4106, "step": 19291 }, { "epoch": 10.777653631284917, "grad_norm": 0.4483919143676758, "learning_rate": 0.0004627450980392157, "loss": 0.4336, "step": 19292 }, { "epoch": 10.778212290502793, "grad_norm": 0.4667958617210388, "learning_rate": 0.0004627170868347339, "loss": 0.4359, "step": 19293 }, { "epoch": 10.77877094972067, "grad_norm": 0.5636364817619324, "learning_rate": 0.0004626890756302521, "loss": 0.4771, "step": 19294 }, { "epoch": 10.779329608938548, "grad_norm": 0.619242250919342, "learning_rate": 0.00046266106442577034, "loss": 0.5751, "step": 19295 }, { "epoch": 10.779888268156425, "grad_norm": 0.5496019721031189, "learning_rate": 0.0004626330532212885, "loss": 0.3757, "step": 19296 }, { "epoch": 10.780446927374301, "grad_norm": 0.5858065485954285, "learning_rate": 0.00046260504201680675, "loss": 0.5946, "step": 19297 }, { "epoch": 10.78100558659218, "grad_norm": 0.7319847941398621, "learning_rate": 0.00046257703081232495, "loss": 0.4712, "step": 19298 }, { "epoch": 10.781564245810056, "grad_norm": 0.6593323945999146, "learning_rate": 0.0004625490196078431, "loss": 0.4029, "step": 19299 }, { "epoch": 10.782122905027933, "grad_norm": 0.4027957320213318, "learning_rate": 0.00046252100840336137, "loss": 0.4971, "step": 19300 }, { "epoch": 10.78268156424581, "grad_norm": 0.4077528417110443, "learning_rate": 0.00046249299719887957, "loss": 0.4621, "step": 19301 }, { "epoch": 10.783240223463688, "grad_norm": 0.3862953186035156, "learning_rate": 0.0004624649859943978, "loss": 0.3546, "step": 19302 }, { "epoch": 10.783798882681564, "grad_norm": 0.414044052362442, "learning_rate": 0.000462436974789916, "loss": 0.4204, "step": 19303 }, { "epoch": 10.78435754189944, "grad_norm": 0.5123696327209473, "learning_rate": 0.00046240896358543414, "loss": 0.5273, "step": 19304 }, { "epoch": 10.78491620111732, "grad_norm": 0.47156015038490295, "learning_rate": 0.0004623809523809524, "loss": 0.4362, "step": 19305 }, { "epoch": 10.785474860335196, "grad_norm": 0.9246543645858765, "learning_rate": 0.0004623529411764706, "loss": 0.4105, "step": 19306 }, { "epoch": 10.786033519553072, "grad_norm": 1.2858933210372925, "learning_rate": 0.0004623249299719888, "loss": 0.5014, "step": 19307 }, { "epoch": 10.786592178770949, "grad_norm": 0.6493561863899231, "learning_rate": 0.000462296918767507, "loss": 0.5241, "step": 19308 }, { "epoch": 10.787150837988827, "grad_norm": 0.48784035444259644, "learning_rate": 0.0004622689075630252, "loss": 0.4331, "step": 19309 }, { "epoch": 10.787709497206704, "grad_norm": 0.39675384759902954, "learning_rate": 0.0004622408963585434, "loss": 0.3875, "step": 19310 }, { "epoch": 10.78826815642458, "grad_norm": 0.49113771319389343, "learning_rate": 0.00046221288515406163, "loss": 0.3633, "step": 19311 }, { "epoch": 10.788826815642459, "grad_norm": 0.6159145832061768, "learning_rate": 0.00046218487394957984, "loss": 0.4832, "step": 19312 }, { "epoch": 10.789385474860335, "grad_norm": 0.8310093283653259, "learning_rate": 0.00046215686274509804, "loss": 0.4591, "step": 19313 }, { "epoch": 10.789944134078212, "grad_norm": 0.3561941385269165, "learning_rate": 0.00046212885154061625, "loss": 0.304, "step": 19314 }, { "epoch": 10.79050279329609, "grad_norm": 0.4736616909503937, "learning_rate": 0.00046210084033613446, "loss": 0.4513, "step": 19315 }, { "epoch": 10.791061452513967, "grad_norm": 0.5063034892082214, "learning_rate": 0.00046207282913165266, "loss": 0.4467, "step": 19316 }, { "epoch": 10.791620111731843, "grad_norm": 0.45543110370635986, "learning_rate": 0.0004620448179271709, "loss": 0.4513, "step": 19317 }, { "epoch": 10.79217877094972, "grad_norm": 0.6345983743667603, "learning_rate": 0.0004620168067226891, "loss": 0.5543, "step": 19318 }, { "epoch": 10.792737430167598, "grad_norm": 0.3974752128124237, "learning_rate": 0.0004619887955182073, "loss": 0.3946, "step": 19319 }, { "epoch": 10.793296089385475, "grad_norm": 1.067287802696228, "learning_rate": 0.0004619607843137255, "loss": 0.4421, "step": 19320 }, { "epoch": 10.793854748603351, "grad_norm": 1.379459261894226, "learning_rate": 0.0004619327731092437, "loss": 0.3857, "step": 19321 }, { "epoch": 10.79441340782123, "grad_norm": 1.1232118606567383, "learning_rate": 0.00046190476190476195, "loss": 0.4589, "step": 19322 }, { "epoch": 10.794972067039106, "grad_norm": 0.5565237998962402, "learning_rate": 0.0004618767507002801, "loss": 0.3886, "step": 19323 }, { "epoch": 10.795530726256983, "grad_norm": 2.12746524810791, "learning_rate": 0.0004618487394957983, "loss": 0.4434, "step": 19324 }, { "epoch": 10.796089385474861, "grad_norm": 0.4889336824417114, "learning_rate": 0.00046182072829131657, "loss": 0.5011, "step": 19325 }, { "epoch": 10.796648044692738, "grad_norm": 1.116256594657898, "learning_rate": 0.0004617927170868347, "loss": 0.3395, "step": 19326 }, { "epoch": 10.797206703910614, "grad_norm": 1.0063740015029907, "learning_rate": 0.000461764705882353, "loss": 0.5212, "step": 19327 }, { "epoch": 10.797765363128491, "grad_norm": 0.5698748230934143, "learning_rate": 0.00046173669467787113, "loss": 0.4851, "step": 19328 }, { "epoch": 10.79832402234637, "grad_norm": 0.4347343444824219, "learning_rate": 0.00046170868347338934, "loss": 0.3816, "step": 19329 }, { "epoch": 10.798882681564246, "grad_norm": 1.0486772060394287, "learning_rate": 0.0004616806722689076, "loss": 0.4106, "step": 19330 }, { "epoch": 10.799441340782122, "grad_norm": 0.5287651419639587, "learning_rate": 0.00046165266106442575, "loss": 0.3629, "step": 19331 }, { "epoch": 10.8, "grad_norm": 0.48144909739494324, "learning_rate": 0.000461624649859944, "loss": 0.4464, "step": 19332 }, { "epoch": 10.800558659217877, "grad_norm": 0.40220141410827637, "learning_rate": 0.0004615966386554622, "loss": 0.4883, "step": 19333 }, { "epoch": 10.801117318435754, "grad_norm": 1.6693894863128662, "learning_rate": 0.00046156862745098037, "loss": 0.3668, "step": 19334 }, { "epoch": 10.80167597765363, "grad_norm": 0.47027599811553955, "learning_rate": 0.00046154061624649863, "loss": 0.4018, "step": 19335 }, { "epoch": 10.802234636871509, "grad_norm": 0.5801828503608704, "learning_rate": 0.0004615126050420168, "loss": 0.4979, "step": 19336 }, { "epoch": 10.802793296089385, "grad_norm": 0.42702654004096985, "learning_rate": 0.00046148459383753504, "loss": 0.3765, "step": 19337 }, { "epoch": 10.803351955307262, "grad_norm": 0.5157850384712219, "learning_rate": 0.00046145658263305325, "loss": 0.4205, "step": 19338 }, { "epoch": 10.80391061452514, "grad_norm": 0.6636309027671814, "learning_rate": 0.0004614285714285714, "loss": 0.5256, "step": 19339 }, { "epoch": 10.804469273743017, "grad_norm": 1.0942429304122925, "learning_rate": 0.00046140056022408966, "loss": 0.4275, "step": 19340 }, { "epoch": 10.805027932960893, "grad_norm": 1.4030613899230957, "learning_rate": 0.00046137254901960787, "loss": 0.3863, "step": 19341 }, { "epoch": 10.805586592178772, "grad_norm": 0.5275806784629822, "learning_rate": 0.00046134453781512607, "loss": 0.4348, "step": 19342 }, { "epoch": 10.806145251396648, "grad_norm": 0.44086840748786926, "learning_rate": 0.0004613165266106443, "loss": 0.5261, "step": 19343 }, { "epoch": 10.806703910614525, "grad_norm": 0.5196534395217896, "learning_rate": 0.00046128851540616243, "loss": 0.33, "step": 19344 }, { "epoch": 10.807262569832401, "grad_norm": 0.9620259404182434, "learning_rate": 0.0004612605042016807, "loss": 0.3992, "step": 19345 }, { "epoch": 10.80782122905028, "grad_norm": 1.5872310400009155, "learning_rate": 0.0004612324929971989, "loss": 0.3695, "step": 19346 }, { "epoch": 10.808379888268156, "grad_norm": 0.544471800327301, "learning_rate": 0.0004612044817927171, "loss": 0.5001, "step": 19347 }, { "epoch": 10.808938547486033, "grad_norm": 0.3735467195510864, "learning_rate": 0.0004611764705882353, "loss": 0.4871, "step": 19348 }, { "epoch": 10.809497206703911, "grad_norm": 0.38875165581703186, "learning_rate": 0.0004611484593837535, "loss": 0.4097, "step": 19349 }, { "epoch": 10.810055865921788, "grad_norm": 0.36824342608451843, "learning_rate": 0.0004611204481792717, "loss": 0.4045, "step": 19350 }, { "epoch": 10.810614525139664, "grad_norm": 1.3030104637145996, "learning_rate": 0.0004610924369747899, "loss": 0.4166, "step": 19351 }, { "epoch": 10.811173184357543, "grad_norm": 0.5162912607192993, "learning_rate": 0.0004610644257703082, "loss": 0.4594, "step": 19352 }, { "epoch": 10.81173184357542, "grad_norm": 0.402105450630188, "learning_rate": 0.00046103641456582634, "loss": 0.3883, "step": 19353 }, { "epoch": 10.812290502793296, "grad_norm": 0.4113750159740448, "learning_rate": 0.00046100840336134454, "loss": 0.3873, "step": 19354 }, { "epoch": 10.812849162011172, "grad_norm": 1.1792447566986084, "learning_rate": 0.00046098039215686275, "loss": 0.4022, "step": 19355 }, { "epoch": 10.81340782122905, "grad_norm": 0.4565427601337433, "learning_rate": 0.00046095238095238096, "loss": 0.3777, "step": 19356 }, { "epoch": 10.813966480446927, "grad_norm": 0.473994642496109, "learning_rate": 0.00046092436974789916, "loss": 0.4238, "step": 19357 }, { "epoch": 10.814525139664804, "grad_norm": 0.43914365768432617, "learning_rate": 0.00046089635854341737, "loss": 0.4176, "step": 19358 }, { "epoch": 10.815083798882682, "grad_norm": 0.37854260206222534, "learning_rate": 0.0004608683473389356, "loss": 0.4129, "step": 19359 }, { "epoch": 10.815642458100559, "grad_norm": 1.0585401058197021, "learning_rate": 0.00046084033613445383, "loss": 0.3534, "step": 19360 }, { "epoch": 10.816201117318435, "grad_norm": 0.938851535320282, "learning_rate": 0.000460812324929972, "loss": 0.4993, "step": 19361 }, { "epoch": 10.816759776536312, "grad_norm": 0.45085254311561584, "learning_rate": 0.0004607843137254902, "loss": 0.3952, "step": 19362 }, { "epoch": 10.81731843575419, "grad_norm": 0.7993432879447937, "learning_rate": 0.0004607563025210084, "loss": 0.6465, "step": 19363 }, { "epoch": 10.817877094972067, "grad_norm": 0.38208821415901184, "learning_rate": 0.0004607282913165266, "loss": 0.4511, "step": 19364 }, { "epoch": 10.818435754189943, "grad_norm": 0.5797911286354065, "learning_rate": 0.00046070028011204486, "loss": 0.4004, "step": 19365 }, { "epoch": 10.818994413407822, "grad_norm": 0.3544953465461731, "learning_rate": 0.000460672268907563, "loss": 0.3816, "step": 19366 }, { "epoch": 10.819553072625698, "grad_norm": 0.37194350361824036, "learning_rate": 0.0004606442577030812, "loss": 0.3402, "step": 19367 }, { "epoch": 10.820111731843575, "grad_norm": 0.45848625898361206, "learning_rate": 0.0004606162464985995, "loss": 0.4818, "step": 19368 }, { "epoch": 10.820670391061453, "grad_norm": 0.5389447212219238, "learning_rate": 0.00046058823529411763, "loss": 0.4482, "step": 19369 }, { "epoch": 10.82122905027933, "grad_norm": 0.6187888383865356, "learning_rate": 0.0004605602240896359, "loss": 0.5049, "step": 19370 }, { "epoch": 10.821787709497206, "grad_norm": 0.4174392521381378, "learning_rate": 0.00046053221288515405, "loss": 0.3571, "step": 19371 }, { "epoch": 10.822346368715085, "grad_norm": 1.2650152444839478, "learning_rate": 0.00046050420168067225, "loss": 0.3678, "step": 19372 }, { "epoch": 10.822905027932961, "grad_norm": 0.38347044587135315, "learning_rate": 0.0004604761904761905, "loss": 0.3014, "step": 19373 }, { "epoch": 10.823463687150838, "grad_norm": 1.2803605794906616, "learning_rate": 0.00046044817927170866, "loss": 0.3693, "step": 19374 }, { "epoch": 10.824022346368714, "grad_norm": 0.41358181834220886, "learning_rate": 0.0004604201680672269, "loss": 0.3828, "step": 19375 }, { "epoch": 10.824581005586593, "grad_norm": 0.46894052624702454, "learning_rate": 0.00046039215686274513, "loss": 0.5347, "step": 19376 }, { "epoch": 10.82513966480447, "grad_norm": 0.49601656198501587, "learning_rate": 0.0004603641456582633, "loss": 0.4682, "step": 19377 }, { "epoch": 10.825698324022346, "grad_norm": 0.3628762364387512, "learning_rate": 0.00046033613445378154, "loss": 0.3663, "step": 19378 }, { "epoch": 10.826256983240224, "grad_norm": 0.9737803936004639, "learning_rate": 0.0004603081232492997, "loss": 0.3428, "step": 19379 }, { "epoch": 10.8268156424581, "grad_norm": 0.43125107884407043, "learning_rate": 0.00046028011204481795, "loss": 0.4197, "step": 19380 }, { "epoch": 10.827374301675977, "grad_norm": 0.8552989959716797, "learning_rate": 0.00046025210084033616, "loss": 0.4036, "step": 19381 }, { "epoch": 10.827932960893854, "grad_norm": 0.3894343078136444, "learning_rate": 0.0004602240896358543, "loss": 0.3402, "step": 19382 }, { "epoch": 10.828491620111732, "grad_norm": 2.2663543224334717, "learning_rate": 0.00046019607843137257, "loss": 0.3912, "step": 19383 }, { "epoch": 10.829050279329609, "grad_norm": 0.489553838968277, "learning_rate": 0.0004601680672268908, "loss": 0.4027, "step": 19384 }, { "epoch": 10.829608938547485, "grad_norm": 0.4080771207809448, "learning_rate": 0.000460140056022409, "loss": 0.3698, "step": 19385 }, { "epoch": 10.830167597765364, "grad_norm": 0.4230101704597473, "learning_rate": 0.0004601120448179272, "loss": 0.4366, "step": 19386 }, { "epoch": 10.83072625698324, "grad_norm": 0.5856796503067017, "learning_rate": 0.00046008403361344534, "loss": 0.5126, "step": 19387 }, { "epoch": 10.831284916201117, "grad_norm": 0.4532826840877533, "learning_rate": 0.0004600560224089636, "loss": 0.4678, "step": 19388 }, { "epoch": 10.831843575418995, "grad_norm": 1.1305073499679565, "learning_rate": 0.0004600280112044818, "loss": 0.3604, "step": 19389 }, { "epoch": 10.832402234636872, "grad_norm": 8.677497863769531, "learning_rate": 0.00046, "loss": 0.4249, "step": 19390 }, { "epoch": 10.832960893854748, "grad_norm": 0.4060731530189514, "learning_rate": 0.0004599719887955182, "loss": 0.4867, "step": 19391 }, { "epoch": 10.833519553072625, "grad_norm": 0.5433281660079956, "learning_rate": 0.0004599439775910364, "loss": 0.4187, "step": 19392 }, { "epoch": 10.834078212290503, "grad_norm": 0.7483600378036499, "learning_rate": 0.00045991596638655463, "loss": 0.4404, "step": 19393 }, { "epoch": 10.83463687150838, "grad_norm": 0.4292261004447937, "learning_rate": 0.00045988795518207284, "loss": 0.4674, "step": 19394 }, { "epoch": 10.835195530726256, "grad_norm": 3.1649255752563477, "learning_rate": 0.00045985994397759104, "loss": 0.3724, "step": 19395 }, { "epoch": 10.835754189944135, "grad_norm": 1.2901256084442139, "learning_rate": 0.00045983193277310925, "loss": 0.4943, "step": 19396 }, { "epoch": 10.836312849162011, "grad_norm": 0.6612398028373718, "learning_rate": 0.00045980392156862746, "loss": 0.3116, "step": 19397 }, { "epoch": 10.836871508379888, "grad_norm": 0.5664133429527283, "learning_rate": 0.00045977591036414566, "loss": 0.5129, "step": 19398 }, { "epoch": 10.837430167597766, "grad_norm": 0.501022458076477, "learning_rate": 0.00045974789915966387, "loss": 0.4489, "step": 19399 }, { "epoch": 10.837988826815643, "grad_norm": 0.6660512685775757, "learning_rate": 0.00045971988795518213, "loss": 0.3566, "step": 19400 }, { "epoch": 10.83854748603352, "grad_norm": 0.5224534869194031, "learning_rate": 0.0004596918767507003, "loss": 0.4298, "step": 19401 }, { "epoch": 10.839106145251396, "grad_norm": 0.6409733891487122, "learning_rate": 0.0004596638655462185, "loss": 0.4639, "step": 19402 }, { "epoch": 10.839664804469274, "grad_norm": 0.5712777972221375, "learning_rate": 0.0004596358543417367, "loss": 0.4727, "step": 19403 }, { "epoch": 10.84022346368715, "grad_norm": 0.7224316000938416, "learning_rate": 0.0004596078431372549, "loss": 0.4512, "step": 19404 }, { "epoch": 10.840782122905027, "grad_norm": 1.6012191772460938, "learning_rate": 0.00045957983193277316, "loss": 0.6956, "step": 19405 }, { "epoch": 10.841340782122906, "grad_norm": 0.41232651472091675, "learning_rate": 0.0004595518207282913, "loss": 0.3873, "step": 19406 }, { "epoch": 10.841899441340782, "grad_norm": 0.4890575408935547, "learning_rate": 0.0004595238095238095, "loss": 0.3495, "step": 19407 }, { "epoch": 10.842458100558659, "grad_norm": 0.3697237968444824, "learning_rate": 0.0004594957983193278, "loss": 0.3459, "step": 19408 }, { "epoch": 10.843016759776535, "grad_norm": 0.40435993671417236, "learning_rate": 0.00045946778711484593, "loss": 0.384, "step": 19409 }, { "epoch": 10.843575418994414, "grad_norm": 0.6315296292304993, "learning_rate": 0.0004594397759103642, "loss": 0.4325, "step": 19410 }, { "epoch": 10.84413407821229, "grad_norm": 1.816961646080017, "learning_rate": 0.00045941176470588234, "loss": 0.4791, "step": 19411 }, { "epoch": 10.844692737430167, "grad_norm": 1.417698860168457, "learning_rate": 0.00045938375350140055, "loss": 0.44, "step": 19412 }, { "epoch": 10.845251396648045, "grad_norm": 0.5425516366958618, "learning_rate": 0.0004593557422969188, "loss": 0.5434, "step": 19413 }, { "epoch": 10.845810055865922, "grad_norm": 0.5822159051895142, "learning_rate": 0.00045932773109243696, "loss": 0.3976, "step": 19414 }, { "epoch": 10.846368715083798, "grad_norm": 0.5106617212295532, "learning_rate": 0.0004592997198879552, "loss": 0.4486, "step": 19415 }, { "epoch": 10.846927374301677, "grad_norm": 0.5801783204078674, "learning_rate": 0.0004592717086834734, "loss": 0.3663, "step": 19416 }, { "epoch": 10.847486033519553, "grad_norm": 0.5556785464286804, "learning_rate": 0.0004592436974789916, "loss": 0.4236, "step": 19417 }, { "epoch": 10.84804469273743, "grad_norm": 0.39270102977752686, "learning_rate": 0.00045921568627450984, "loss": 0.4367, "step": 19418 }, { "epoch": 10.848603351955306, "grad_norm": 0.7479550838470459, "learning_rate": 0.000459187675070028, "loss": 0.4018, "step": 19419 }, { "epoch": 10.849162011173185, "grad_norm": 0.3583800792694092, "learning_rate": 0.00045915966386554625, "loss": 0.3817, "step": 19420 }, { "epoch": 10.849720670391061, "grad_norm": 0.5465046167373657, "learning_rate": 0.00045913165266106445, "loss": 0.4551, "step": 19421 }, { "epoch": 10.850279329608938, "grad_norm": 0.46662572026252747, "learning_rate": 0.0004591036414565826, "loss": 0.42, "step": 19422 }, { "epoch": 10.850837988826816, "grad_norm": 0.5551797151565552, "learning_rate": 0.00045907563025210087, "loss": 0.3999, "step": 19423 }, { "epoch": 10.851396648044693, "grad_norm": 0.4343171715736389, "learning_rate": 0.00045904761904761907, "loss": 0.367, "step": 19424 }, { "epoch": 10.85195530726257, "grad_norm": 0.5589561462402344, "learning_rate": 0.0004590196078431373, "loss": 0.4261, "step": 19425 }, { "epoch": 10.852513966480448, "grad_norm": 0.7213286757469177, "learning_rate": 0.0004589915966386555, "loss": 0.4521, "step": 19426 }, { "epoch": 10.853072625698324, "grad_norm": 0.4499867856502533, "learning_rate": 0.00045896358543417364, "loss": 0.4668, "step": 19427 }, { "epoch": 10.8536312849162, "grad_norm": 1.3919888734817505, "learning_rate": 0.0004589355742296919, "loss": 0.4661, "step": 19428 }, { "epoch": 10.854189944134077, "grad_norm": 1.2904208898544312, "learning_rate": 0.0004589075630252101, "loss": 0.3783, "step": 19429 }, { "epoch": 10.854748603351956, "grad_norm": 1.404299020767212, "learning_rate": 0.0004588795518207283, "loss": 0.3654, "step": 19430 }, { "epoch": 10.855307262569832, "grad_norm": 0.7671793103218079, "learning_rate": 0.0004588515406162465, "loss": 0.5028, "step": 19431 }, { "epoch": 10.855865921787709, "grad_norm": 0.6422199606895447, "learning_rate": 0.0004588235294117647, "loss": 0.3595, "step": 19432 }, { "epoch": 10.856424581005587, "grad_norm": 0.48911821842193604, "learning_rate": 0.0004587955182072829, "loss": 0.452, "step": 19433 }, { "epoch": 10.856983240223464, "grad_norm": 0.3717813491821289, "learning_rate": 0.00045876750700280113, "loss": 0.3661, "step": 19434 }, { "epoch": 10.85754189944134, "grad_norm": 0.5459839105606079, "learning_rate": 0.00045873949579831934, "loss": 0.3389, "step": 19435 }, { "epoch": 10.858100558659217, "grad_norm": 0.5253312587738037, "learning_rate": 0.00045871148459383754, "loss": 0.5371, "step": 19436 }, { "epoch": 10.858659217877095, "grad_norm": 0.44571197032928467, "learning_rate": 0.00045868347338935575, "loss": 0.5183, "step": 19437 }, { "epoch": 10.859217877094972, "grad_norm": 0.3728795647621155, "learning_rate": 0.00045865546218487396, "loss": 0.3295, "step": 19438 }, { "epoch": 10.859776536312848, "grad_norm": 0.5091723203659058, "learning_rate": 0.00045862745098039216, "loss": 0.4881, "step": 19439 }, { "epoch": 10.860335195530727, "grad_norm": 12.88525390625, "learning_rate": 0.0004585994397759104, "loss": 0.3995, "step": 19440 }, { "epoch": 10.860893854748603, "grad_norm": 0.40066832304000854, "learning_rate": 0.0004585714285714286, "loss": 0.4227, "step": 19441 }, { "epoch": 10.86145251396648, "grad_norm": 1.599927544593811, "learning_rate": 0.0004585434173669468, "loss": 0.4424, "step": 19442 }, { "epoch": 10.862011173184358, "grad_norm": 4.741647720336914, "learning_rate": 0.000458515406162465, "loss": 0.4309, "step": 19443 }, { "epoch": 10.862569832402235, "grad_norm": 0.7968552708625793, "learning_rate": 0.0004584873949579832, "loss": 0.5603, "step": 19444 }, { "epoch": 10.863128491620111, "grad_norm": 0.5232609510421753, "learning_rate": 0.00045845938375350145, "loss": 0.4461, "step": 19445 }, { "epoch": 10.86368715083799, "grad_norm": 0.892772376537323, "learning_rate": 0.0004584313725490196, "loss": 0.3901, "step": 19446 }, { "epoch": 10.864245810055866, "grad_norm": 0.7650253772735596, "learning_rate": 0.0004584033613445378, "loss": 0.4357, "step": 19447 }, { "epoch": 10.864804469273743, "grad_norm": 0.8138840794563293, "learning_rate": 0.00045837535014005607, "loss": 0.4213, "step": 19448 }, { "epoch": 10.86536312849162, "grad_norm": 0.7876865863800049, "learning_rate": 0.0004583473389355742, "loss": 0.4421, "step": 19449 }, { "epoch": 10.865921787709498, "grad_norm": 0.5165186524391174, "learning_rate": 0.0004583193277310925, "loss": 0.408, "step": 19450 }, { "epoch": 10.866480446927374, "grad_norm": 0.8853394985198975, "learning_rate": 0.00045829131652661063, "loss": 0.5033, "step": 19451 }, { "epoch": 10.867039106145251, "grad_norm": 0.5527562499046326, "learning_rate": 0.00045826330532212884, "loss": 0.4613, "step": 19452 }, { "epoch": 10.86759776536313, "grad_norm": 0.45241275429725647, "learning_rate": 0.0004582352941176471, "loss": 0.4677, "step": 19453 }, { "epoch": 10.868156424581006, "grad_norm": 0.39702826738357544, "learning_rate": 0.00045820728291316525, "loss": 0.3307, "step": 19454 }, { "epoch": 10.868715083798882, "grad_norm": 0.9053224325180054, "learning_rate": 0.0004581792717086835, "loss": 0.3657, "step": 19455 }, { "epoch": 10.869273743016759, "grad_norm": 2.9940106868743896, "learning_rate": 0.0004581512605042017, "loss": 0.4027, "step": 19456 }, { "epoch": 10.869832402234637, "grad_norm": 0.6413319706916809, "learning_rate": 0.00045812324929971987, "loss": 0.4932, "step": 19457 }, { "epoch": 10.870391061452514, "grad_norm": 0.735305666923523, "learning_rate": 0.00045809523809523813, "loss": 0.5561, "step": 19458 }, { "epoch": 10.87094972067039, "grad_norm": 0.5540690422058105, "learning_rate": 0.0004580672268907563, "loss": 0.3939, "step": 19459 }, { "epoch": 10.871508379888269, "grad_norm": 0.48930805921554565, "learning_rate": 0.00045803921568627454, "loss": 0.3325, "step": 19460 }, { "epoch": 10.872067039106145, "grad_norm": 0.5332046747207642, "learning_rate": 0.00045801120448179275, "loss": 0.4251, "step": 19461 }, { "epoch": 10.872625698324022, "grad_norm": 0.4967195689678192, "learning_rate": 0.0004579831932773109, "loss": 0.3769, "step": 19462 }, { "epoch": 10.8731843575419, "grad_norm": 0.8713586330413818, "learning_rate": 0.00045795518207282916, "loss": 0.324, "step": 19463 }, { "epoch": 10.873743016759777, "grad_norm": 0.5224945545196533, "learning_rate": 0.00045792717086834737, "loss": 0.4552, "step": 19464 }, { "epoch": 10.874301675977653, "grad_norm": 0.5018672943115234, "learning_rate": 0.00045789915966386557, "loss": 0.4526, "step": 19465 }, { "epoch": 10.87486033519553, "grad_norm": 0.4567224681377411, "learning_rate": 0.0004578711484593838, "loss": 0.518, "step": 19466 }, { "epoch": 10.875418994413408, "grad_norm": 0.6916138529777527, "learning_rate": 0.00045784313725490193, "loss": 0.4227, "step": 19467 }, { "epoch": 10.875977653631285, "grad_norm": 0.40818580985069275, "learning_rate": 0.0004578151260504202, "loss": 0.4148, "step": 19468 }, { "epoch": 10.876536312849161, "grad_norm": 0.5612096786499023, "learning_rate": 0.0004577871148459384, "loss": 0.4683, "step": 19469 }, { "epoch": 10.87709497206704, "grad_norm": 0.4822520315647125, "learning_rate": 0.00045775910364145655, "loss": 0.4671, "step": 19470 }, { "epoch": 10.877653631284916, "grad_norm": 0.7350682020187378, "learning_rate": 0.0004577310924369748, "loss": 0.4428, "step": 19471 }, { "epoch": 10.878212290502793, "grad_norm": 0.4277208149433136, "learning_rate": 0.000457703081232493, "loss": 0.4075, "step": 19472 }, { "epoch": 10.878770949720671, "grad_norm": 0.9943501353263855, "learning_rate": 0.0004576750700280112, "loss": 0.4584, "step": 19473 }, { "epoch": 10.879329608938548, "grad_norm": 2.121504545211792, "learning_rate": 0.0004576470588235294, "loss": 0.3357, "step": 19474 }, { "epoch": 10.879888268156424, "grad_norm": 0.5127196311950684, "learning_rate": 0.0004576190476190476, "loss": 0.3678, "step": 19475 }, { "epoch": 10.880446927374301, "grad_norm": 0.5205025672912598, "learning_rate": 0.00045759103641456584, "loss": 0.3766, "step": 19476 }, { "epoch": 10.88100558659218, "grad_norm": 0.768817126750946, "learning_rate": 0.00045756302521008404, "loss": 0.3878, "step": 19477 }, { "epoch": 10.881564245810056, "grad_norm": 0.3431500792503357, "learning_rate": 0.00045753501400560225, "loss": 0.3893, "step": 19478 }, { "epoch": 10.882122905027932, "grad_norm": 0.3882765769958496, "learning_rate": 0.00045750700280112046, "loss": 0.3951, "step": 19479 }, { "epoch": 10.88268156424581, "grad_norm": 1.219213604927063, "learning_rate": 0.00045747899159663866, "loss": 0.4234, "step": 19480 }, { "epoch": 10.883240223463687, "grad_norm": 0.40456315875053406, "learning_rate": 0.00045745098039215687, "loss": 0.4455, "step": 19481 }, { "epoch": 10.883798882681564, "grad_norm": 0.43122079968452454, "learning_rate": 0.0004574229691876751, "loss": 0.4727, "step": 19482 }, { "epoch": 10.88435754189944, "grad_norm": 0.5437071323394775, "learning_rate": 0.0004573949579831933, "loss": 0.3954, "step": 19483 }, { "epoch": 10.884916201117319, "grad_norm": 0.4350295662879944, "learning_rate": 0.0004573669467787115, "loss": 0.4002, "step": 19484 }, { "epoch": 10.885474860335195, "grad_norm": 0.5720260143280029, "learning_rate": 0.0004573389355742297, "loss": 0.5156, "step": 19485 }, { "epoch": 10.886033519553072, "grad_norm": 0.686155378818512, "learning_rate": 0.0004573109243697479, "loss": 0.6256, "step": 19486 }, { "epoch": 10.88659217877095, "grad_norm": 0.7230165004730225, "learning_rate": 0.0004572829131652661, "loss": 0.5493, "step": 19487 }, { "epoch": 10.887150837988827, "grad_norm": 0.4863119125366211, "learning_rate": 0.00045725490196078436, "loss": 0.5854, "step": 19488 }, { "epoch": 10.887709497206703, "grad_norm": 1.0747337341308594, "learning_rate": 0.0004572268907563025, "loss": 0.5878, "step": 19489 }, { "epoch": 10.888268156424582, "grad_norm": 0.9444344639778137, "learning_rate": 0.0004571988795518207, "loss": 0.4221, "step": 19490 }, { "epoch": 10.888826815642458, "grad_norm": 0.5995955467224121, "learning_rate": 0.00045717086834733893, "loss": 0.5155, "step": 19491 }, { "epoch": 10.889385474860335, "grad_norm": 0.9069985747337341, "learning_rate": 0.00045714285714285713, "loss": 0.4391, "step": 19492 }, { "epoch": 10.889944134078211, "grad_norm": 0.4465606212615967, "learning_rate": 0.0004571148459383754, "loss": 0.5045, "step": 19493 }, { "epoch": 10.89050279329609, "grad_norm": 0.38655421137809753, "learning_rate": 0.00045708683473389355, "loss": 0.3466, "step": 19494 }, { "epoch": 10.891061452513966, "grad_norm": 1.4529123306274414, "learning_rate": 0.00045705882352941175, "loss": 0.4651, "step": 19495 }, { "epoch": 10.891620111731843, "grad_norm": 0.7777120471000671, "learning_rate": 0.00045703081232493, "loss": 0.4409, "step": 19496 }, { "epoch": 10.892178770949721, "grad_norm": 0.43112510442733765, "learning_rate": 0.00045700280112044816, "loss": 0.5001, "step": 19497 }, { "epoch": 10.892737430167598, "grad_norm": 0.581341564655304, "learning_rate": 0.0004569747899159664, "loss": 0.527, "step": 19498 }, { "epoch": 10.893296089385474, "grad_norm": 0.5028506517410278, "learning_rate": 0.0004569467787114846, "loss": 0.494, "step": 19499 }, { "epoch": 10.893854748603353, "grad_norm": 0.3946606516838074, "learning_rate": 0.0004569187675070028, "loss": 0.3967, "step": 19500 }, { "epoch": 10.893854748603353, "eval_cer": 0.0880767678090188, "eval_loss": 0.3288307785987854, "eval_runtime": 55.5374, "eval_samples_per_second": 81.711, "eval_steps_per_second": 5.114, "eval_wer": 0.3508561213373928, "step": 19500 }, { "epoch": 10.89441340782123, "grad_norm": 0.41130995750427246, "learning_rate": 0.00045689075630252104, "loss": 0.3713, "step": 19501 }, { "epoch": 10.894972067039106, "grad_norm": 0.5164546966552734, "learning_rate": 0.0004568627450980392, "loss": 0.3377, "step": 19502 }, { "epoch": 10.895530726256982, "grad_norm": 0.5026293992996216, "learning_rate": 0.00045683473389355745, "loss": 0.4165, "step": 19503 }, { "epoch": 10.89608938547486, "grad_norm": 0.49078041315078735, "learning_rate": 0.00045680672268907566, "loss": 0.395, "step": 19504 }, { "epoch": 10.896648044692737, "grad_norm": 0.43103906512260437, "learning_rate": 0.0004567787114845938, "loss": 0.4234, "step": 19505 }, { "epoch": 10.897206703910614, "grad_norm": 0.5015199780464172, "learning_rate": 0.00045675070028011207, "loss": 0.6125, "step": 19506 }, { "epoch": 10.897765363128492, "grad_norm": 0.401161253452301, "learning_rate": 0.0004567226890756302, "loss": 0.4157, "step": 19507 }, { "epoch": 10.898324022346369, "grad_norm": 2.0513744354248047, "learning_rate": 0.0004566946778711485, "loss": 0.5296, "step": 19508 }, { "epoch": 10.898882681564245, "grad_norm": 0.6313964128494263, "learning_rate": 0.0004566666666666667, "loss": 0.27, "step": 19509 }, { "epoch": 10.899441340782122, "grad_norm": 0.3549821972846985, "learning_rate": 0.00045663865546218484, "loss": 0.4157, "step": 19510 }, { "epoch": 10.9, "grad_norm": 0.33573731780052185, "learning_rate": 0.0004566106442577031, "loss": 0.2892, "step": 19511 }, { "epoch": 10.900558659217877, "grad_norm": 0.8451279401779175, "learning_rate": 0.0004565826330532213, "loss": 0.4456, "step": 19512 }, { "epoch": 10.901117318435753, "grad_norm": 0.5604871511459351, "learning_rate": 0.0004565546218487395, "loss": 0.5025, "step": 19513 }, { "epoch": 10.901675977653632, "grad_norm": 1.018762230873108, "learning_rate": 0.0004565266106442577, "loss": 0.4146, "step": 19514 }, { "epoch": 10.902234636871508, "grad_norm": 0.459634393453598, "learning_rate": 0.00045649859943977587, "loss": 0.4496, "step": 19515 }, { "epoch": 10.902793296089385, "grad_norm": 0.47295621037483215, "learning_rate": 0.00045647058823529413, "loss": 0.523, "step": 19516 }, { "epoch": 10.903351955307263, "grad_norm": 0.4551648795604706, "learning_rate": 0.00045644257703081234, "loss": 0.5013, "step": 19517 }, { "epoch": 10.90391061452514, "grad_norm": 0.41703560948371887, "learning_rate": 0.00045641456582633054, "loss": 0.3904, "step": 19518 }, { "epoch": 10.904469273743016, "grad_norm": 7.310099124908447, "learning_rate": 0.00045638655462184875, "loss": 0.5181, "step": 19519 }, { "epoch": 10.905027932960895, "grad_norm": 0.4674391746520996, "learning_rate": 0.00045635854341736696, "loss": 0.3536, "step": 19520 }, { "epoch": 10.905586592178771, "grad_norm": 0.5428652167320251, "learning_rate": 0.00045633053221288516, "loss": 0.5864, "step": 19521 }, { "epoch": 10.906145251396648, "grad_norm": 0.9354425668716431, "learning_rate": 0.00045630252100840337, "loss": 0.4639, "step": 19522 }, { "epoch": 10.906703910614524, "grad_norm": 0.4676138162612915, "learning_rate": 0.0004562745098039216, "loss": 0.3484, "step": 19523 }, { "epoch": 10.907262569832403, "grad_norm": 0.5535160899162292, "learning_rate": 0.0004562464985994398, "loss": 0.4784, "step": 19524 }, { "epoch": 10.90782122905028, "grad_norm": 0.7905294299125671, "learning_rate": 0.000456218487394958, "loss": 0.4046, "step": 19525 }, { "epoch": 10.908379888268156, "grad_norm": 0.8160191178321838, "learning_rate": 0.0004561904761904762, "loss": 0.4403, "step": 19526 }, { "epoch": 10.908938547486034, "grad_norm": 0.6045393943786621, "learning_rate": 0.0004561624649859944, "loss": 0.3802, "step": 19527 }, { "epoch": 10.90949720670391, "grad_norm": 0.9762187004089355, "learning_rate": 0.00045613445378151266, "loss": 0.4623, "step": 19528 }, { "epoch": 10.910055865921787, "grad_norm": 0.8041545152664185, "learning_rate": 0.0004561064425770308, "loss": 0.3273, "step": 19529 }, { "epoch": 10.910614525139664, "grad_norm": 2.0979504585266113, "learning_rate": 0.000456078431372549, "loss": 0.3865, "step": 19530 }, { "epoch": 10.911173184357542, "grad_norm": 0.32649150490760803, "learning_rate": 0.0004560504201680672, "loss": 0.3553, "step": 19531 }, { "epoch": 10.911731843575419, "grad_norm": 0.44879236817359924, "learning_rate": 0.00045602240896358543, "loss": 0.3794, "step": 19532 }, { "epoch": 10.912290502793295, "grad_norm": 0.5523907542228699, "learning_rate": 0.0004559943977591037, "loss": 0.4494, "step": 19533 }, { "epoch": 10.912849162011174, "grad_norm": 0.7766351699829102, "learning_rate": 0.00045596638655462184, "loss": 0.4794, "step": 19534 }, { "epoch": 10.91340782122905, "grad_norm": 0.47743871808052063, "learning_rate": 0.00045593837535014005, "loss": 0.4017, "step": 19535 }, { "epoch": 10.913966480446927, "grad_norm": 0.4808497726917267, "learning_rate": 0.0004559103641456583, "loss": 0.5614, "step": 19536 }, { "epoch": 10.914525139664804, "grad_norm": 0.6021647453308105, "learning_rate": 0.00045588235294117646, "loss": 0.3827, "step": 19537 }, { "epoch": 10.915083798882682, "grad_norm": 0.435291588306427, "learning_rate": 0.0004558543417366947, "loss": 0.4085, "step": 19538 }, { "epoch": 10.915642458100558, "grad_norm": 0.38414716720581055, "learning_rate": 0.00045582633053221287, "loss": 0.3872, "step": 19539 }, { "epoch": 10.916201117318435, "grad_norm": 1.3766226768493652, "learning_rate": 0.0004557983193277311, "loss": 0.4004, "step": 19540 }, { "epoch": 10.916759776536313, "grad_norm": 0.4242534637451172, "learning_rate": 0.00045577030812324934, "loss": 0.377, "step": 19541 }, { "epoch": 10.91731843575419, "grad_norm": 0.5262918472290039, "learning_rate": 0.0004557422969187675, "loss": 0.4333, "step": 19542 }, { "epoch": 10.917877094972066, "grad_norm": 0.7887991070747375, "learning_rate": 0.00045571428571428575, "loss": 0.3603, "step": 19543 }, { "epoch": 10.918435754189945, "grad_norm": 0.7010632753372192, "learning_rate": 0.00045568627450980395, "loss": 0.4901, "step": 19544 }, { "epoch": 10.918994413407821, "grad_norm": 1.2077034711837769, "learning_rate": 0.0004556582633053221, "loss": 0.348, "step": 19545 }, { "epoch": 10.919553072625698, "grad_norm": 0.41414928436279297, "learning_rate": 0.00045563025210084037, "loss": 0.3163, "step": 19546 }, { "epoch": 10.920111731843576, "grad_norm": 0.4330168068408966, "learning_rate": 0.0004556022408963585, "loss": 0.4114, "step": 19547 }, { "epoch": 10.920670391061453, "grad_norm": 0.7332313060760498, "learning_rate": 0.0004555742296918768, "loss": 0.4734, "step": 19548 }, { "epoch": 10.92122905027933, "grad_norm": 0.815731942653656, "learning_rate": 0.000455546218487395, "loss": 0.4167, "step": 19549 }, { "epoch": 10.921787709497206, "grad_norm": 0.41119226813316345, "learning_rate": 0.00045551820728291314, "loss": 0.4512, "step": 19550 }, { "epoch": 10.922346368715084, "grad_norm": 0.38646405935287476, "learning_rate": 0.0004554901960784314, "loss": 0.3779, "step": 19551 }, { "epoch": 10.922905027932961, "grad_norm": 2.6060380935668945, "learning_rate": 0.0004554621848739496, "loss": 0.4122, "step": 19552 }, { "epoch": 10.923463687150837, "grad_norm": 0.403250515460968, "learning_rate": 0.0004554341736694678, "loss": 0.4977, "step": 19553 }, { "epoch": 10.924022346368716, "grad_norm": 3.6118671894073486, "learning_rate": 0.000455406162464986, "loss": 0.4153, "step": 19554 }, { "epoch": 10.924581005586592, "grad_norm": 0.34593239426612854, "learning_rate": 0.00045537815126050417, "loss": 0.4242, "step": 19555 }, { "epoch": 10.925139664804469, "grad_norm": 2.0285918712615967, "learning_rate": 0.0004553501400560224, "loss": 0.3982, "step": 19556 }, { "epoch": 10.925698324022346, "grad_norm": 0.35781940817832947, "learning_rate": 0.00045532212885154063, "loss": 0.3475, "step": 19557 }, { "epoch": 10.926256983240224, "grad_norm": 0.5869525074958801, "learning_rate": 0.00045529411764705884, "loss": 0.4845, "step": 19558 }, { "epoch": 10.9268156424581, "grad_norm": 0.390906423330307, "learning_rate": 0.00045526610644257704, "loss": 0.4572, "step": 19559 }, { "epoch": 10.927374301675977, "grad_norm": 0.6141411662101746, "learning_rate": 0.00045523809523809525, "loss": 0.4154, "step": 19560 }, { "epoch": 10.927932960893855, "grad_norm": 0.4375908672809601, "learning_rate": 0.00045521008403361346, "loss": 0.3124, "step": 19561 }, { "epoch": 10.928491620111732, "grad_norm": 0.5885324478149414, "learning_rate": 0.00045518207282913166, "loss": 0.5413, "step": 19562 }, { "epoch": 10.929050279329608, "grad_norm": 0.5527775287628174, "learning_rate": 0.0004551540616246499, "loss": 0.4553, "step": 19563 }, { "epoch": 10.929608938547487, "grad_norm": 0.4902518689632416, "learning_rate": 0.0004551260504201681, "loss": 0.3416, "step": 19564 }, { "epoch": 10.930167597765363, "grad_norm": 0.5091426372528076, "learning_rate": 0.0004550980392156863, "loss": 0.4116, "step": 19565 }, { "epoch": 10.93072625698324, "grad_norm": 0.5823405385017395, "learning_rate": 0.0004550700280112045, "loss": 0.4464, "step": 19566 }, { "epoch": 10.931284916201117, "grad_norm": 1.0579520463943481, "learning_rate": 0.0004550420168067227, "loss": 0.3851, "step": 19567 }, { "epoch": 10.931843575418995, "grad_norm": 0.5456339120864868, "learning_rate": 0.00045501400560224095, "loss": 0.4308, "step": 19568 }, { "epoch": 10.932402234636871, "grad_norm": 0.4888848662376404, "learning_rate": 0.0004549859943977591, "loss": 0.4242, "step": 19569 }, { "epoch": 10.932960893854748, "grad_norm": 0.5546827912330627, "learning_rate": 0.0004549579831932773, "loss": 0.478, "step": 19570 }, { "epoch": 10.933519553072626, "grad_norm": 0.5477078557014465, "learning_rate": 0.00045492997198879557, "loss": 0.531, "step": 19571 }, { "epoch": 10.934078212290503, "grad_norm": 0.3058905601501465, "learning_rate": 0.0004549019607843137, "loss": 0.3468, "step": 19572 }, { "epoch": 10.93463687150838, "grad_norm": 1.571875810623169, "learning_rate": 0.000454873949579832, "loss": 0.4637, "step": 19573 }, { "epoch": 10.935195530726258, "grad_norm": 0.4084053337574005, "learning_rate": 0.00045484593837535013, "loss": 0.4177, "step": 19574 }, { "epoch": 10.935754189944134, "grad_norm": 0.48023900389671326, "learning_rate": 0.00045481792717086834, "loss": 0.4431, "step": 19575 }, { "epoch": 10.936312849162011, "grad_norm": 0.36626118421554565, "learning_rate": 0.0004547899159663866, "loss": 0.4176, "step": 19576 }, { "epoch": 10.936871508379888, "grad_norm": 0.6702055335044861, "learning_rate": 0.00045476190476190475, "loss": 0.4911, "step": 19577 }, { "epoch": 10.937430167597766, "grad_norm": 0.6162322759628296, "learning_rate": 0.000454733893557423, "loss": 0.4082, "step": 19578 }, { "epoch": 10.937988826815642, "grad_norm": 4.33035135269165, "learning_rate": 0.0004547058823529412, "loss": 0.355, "step": 19579 }, { "epoch": 10.938547486033519, "grad_norm": 0.5423091650009155, "learning_rate": 0.00045467787114845937, "loss": 0.4879, "step": 19580 }, { "epoch": 10.939106145251397, "grad_norm": 1.243164300918579, "learning_rate": 0.00045464985994397763, "loss": 0.3925, "step": 19581 }, { "epoch": 10.939664804469274, "grad_norm": 0.675317645072937, "learning_rate": 0.0004546218487394958, "loss": 0.5342, "step": 19582 }, { "epoch": 10.94022346368715, "grad_norm": 0.5734110474586487, "learning_rate": 0.000454593837535014, "loss": 0.4463, "step": 19583 }, { "epoch": 10.940782122905027, "grad_norm": 0.4258284866809845, "learning_rate": 0.00045456582633053225, "loss": 0.3748, "step": 19584 }, { "epoch": 10.941340782122905, "grad_norm": 2.7716588973999023, "learning_rate": 0.0004545378151260504, "loss": 0.4513, "step": 19585 }, { "epoch": 10.941899441340782, "grad_norm": 0.9812430739402771, "learning_rate": 0.00045450980392156866, "loss": 0.3437, "step": 19586 }, { "epoch": 10.942458100558659, "grad_norm": 0.4784356951713562, "learning_rate": 0.00045448179271708687, "loss": 0.4679, "step": 19587 }, { "epoch": 10.943016759776537, "grad_norm": 0.6685768961906433, "learning_rate": 0.000454453781512605, "loss": 0.5758, "step": 19588 }, { "epoch": 10.943575418994413, "grad_norm": 5.183349132537842, "learning_rate": 0.0004544257703081233, "loss": 0.4057, "step": 19589 }, { "epoch": 10.94413407821229, "grad_norm": 0.5789082646369934, "learning_rate": 0.00045439775910364143, "loss": 0.5449, "step": 19590 }, { "epoch": 10.944692737430168, "grad_norm": 1.1220394372940063, "learning_rate": 0.0004543697478991597, "loss": 0.3977, "step": 19591 }, { "epoch": 10.945251396648045, "grad_norm": 0.7104514241218567, "learning_rate": 0.0004543417366946779, "loss": 0.4354, "step": 19592 }, { "epoch": 10.945810055865921, "grad_norm": 1.457007646560669, "learning_rate": 0.00045431372549019605, "loss": 0.4375, "step": 19593 }, { "epoch": 10.946368715083798, "grad_norm": 0.5466610789299011, "learning_rate": 0.0004542857142857143, "loss": 0.4628, "step": 19594 }, { "epoch": 10.946927374301676, "grad_norm": 0.7462083697319031, "learning_rate": 0.0004542577030812325, "loss": 0.4249, "step": 19595 }, { "epoch": 10.947486033519553, "grad_norm": 0.43555358052253723, "learning_rate": 0.0004542296918767507, "loss": 0.3507, "step": 19596 }, { "epoch": 10.94804469273743, "grad_norm": 0.45313170552253723, "learning_rate": 0.0004542016806722689, "loss": 0.431, "step": 19597 }, { "epoch": 10.948603351955308, "grad_norm": 1.132649540901184, "learning_rate": 0.0004541736694677871, "loss": 0.4257, "step": 19598 }, { "epoch": 10.949162011173184, "grad_norm": 0.3683547377586365, "learning_rate": 0.00045414565826330534, "loss": 0.4093, "step": 19599 }, { "epoch": 10.949720670391061, "grad_norm": 0.6518691778182983, "learning_rate": 0.00045411764705882354, "loss": 0.4432, "step": 19600 }, { "epoch": 10.95027932960894, "grad_norm": 0.47441366314888, "learning_rate": 0.00045408963585434175, "loss": 0.3596, "step": 19601 }, { "epoch": 10.950837988826816, "grad_norm": 0.3779560327529907, "learning_rate": 0.00045406162464985996, "loss": 0.3943, "step": 19602 }, { "epoch": 10.951396648044692, "grad_norm": 0.5354276299476624, "learning_rate": 0.00045403361344537816, "loss": 0.4064, "step": 19603 }, { "epoch": 10.951955307262569, "grad_norm": 6.9575300216674805, "learning_rate": 0.00045400560224089637, "loss": 0.4225, "step": 19604 }, { "epoch": 10.952513966480447, "grad_norm": 0.33621883392333984, "learning_rate": 0.0004539775910364146, "loss": 0.3562, "step": 19605 }, { "epoch": 10.953072625698324, "grad_norm": 1.0441874265670776, "learning_rate": 0.0004539495798319328, "loss": 0.5699, "step": 19606 }, { "epoch": 10.9536312849162, "grad_norm": 0.7098649144172668, "learning_rate": 0.000453921568627451, "loss": 0.5251, "step": 19607 }, { "epoch": 10.954189944134079, "grad_norm": 0.6170877814292908, "learning_rate": 0.0004538935574229692, "loss": 0.4174, "step": 19608 }, { "epoch": 10.954748603351955, "grad_norm": 0.6251637935638428, "learning_rate": 0.0004538655462184874, "loss": 0.4187, "step": 19609 }, { "epoch": 10.955307262569832, "grad_norm": 0.5573031306266785, "learning_rate": 0.0004538375350140056, "loss": 0.447, "step": 19610 }, { "epoch": 10.955865921787709, "grad_norm": 2.5758886337280273, "learning_rate": 0.00045380952380952386, "loss": 0.3442, "step": 19611 }, { "epoch": 10.956424581005587, "grad_norm": 0.5471271276473999, "learning_rate": 0.000453781512605042, "loss": 0.4465, "step": 19612 }, { "epoch": 10.956983240223463, "grad_norm": 0.5629642605781555, "learning_rate": 0.0004537535014005602, "loss": 0.3749, "step": 19613 }, { "epoch": 10.95754189944134, "grad_norm": 0.7012425065040588, "learning_rate": 0.00045372549019607843, "loss": 0.4484, "step": 19614 }, { "epoch": 10.958100558659218, "grad_norm": 0.5014681220054626, "learning_rate": 0.00045369747899159663, "loss": 0.3617, "step": 19615 }, { "epoch": 10.958659217877095, "grad_norm": 0.4093373119831085, "learning_rate": 0.0004536694677871149, "loss": 0.44, "step": 19616 }, { "epoch": 10.959217877094972, "grad_norm": 0.44349992275238037, "learning_rate": 0.00045364145658263305, "loss": 0.3918, "step": 19617 }, { "epoch": 10.95977653631285, "grad_norm": 0.4647926688194275, "learning_rate": 0.00045361344537815125, "loss": 0.5832, "step": 19618 }, { "epoch": 10.960335195530726, "grad_norm": 0.5177651047706604, "learning_rate": 0.0004535854341736695, "loss": 0.3546, "step": 19619 }, { "epoch": 10.960893854748603, "grad_norm": 3.884995698928833, "learning_rate": 0.00045355742296918766, "loss": 0.4733, "step": 19620 }, { "epoch": 10.961452513966481, "grad_norm": 0.3276974558830261, "learning_rate": 0.0004535294117647059, "loss": 0.3642, "step": 19621 }, { "epoch": 10.962011173184358, "grad_norm": 0.4807997941970825, "learning_rate": 0.0004535014005602241, "loss": 0.3803, "step": 19622 }, { "epoch": 10.962569832402234, "grad_norm": 0.40378236770629883, "learning_rate": 0.0004534733893557423, "loss": 0.4202, "step": 19623 }, { "epoch": 10.963128491620111, "grad_norm": 0.7819784879684448, "learning_rate": 0.00045344537815126054, "loss": 0.4873, "step": 19624 }, { "epoch": 10.96368715083799, "grad_norm": 0.6481210589408875, "learning_rate": 0.0004534173669467787, "loss": 0.5176, "step": 19625 }, { "epoch": 10.964245810055866, "grad_norm": 0.4645385146141052, "learning_rate": 0.00045338935574229695, "loss": 0.3849, "step": 19626 }, { "epoch": 10.964804469273743, "grad_norm": 0.40287497639656067, "learning_rate": 0.00045336134453781516, "loss": 0.375, "step": 19627 }, { "epoch": 10.96536312849162, "grad_norm": 0.507799506187439, "learning_rate": 0.0004533333333333333, "loss": 0.4873, "step": 19628 }, { "epoch": 10.965921787709497, "grad_norm": 1.1175541877746582, "learning_rate": 0.00045330532212885157, "loss": 0.5788, "step": 19629 }, { "epoch": 10.966480446927374, "grad_norm": 0.6667212843894958, "learning_rate": 0.0004532773109243697, "loss": 0.4776, "step": 19630 }, { "epoch": 10.96703910614525, "grad_norm": 0.5277851819992065, "learning_rate": 0.000453249299719888, "loss": 0.4856, "step": 19631 }, { "epoch": 10.967597765363129, "grad_norm": 1.7395663261413574, "learning_rate": 0.0004532212885154062, "loss": 0.4985, "step": 19632 }, { "epoch": 10.968156424581005, "grad_norm": 0.7707605957984924, "learning_rate": 0.00045319327731092434, "loss": 0.3627, "step": 19633 }, { "epoch": 10.968715083798882, "grad_norm": 0.462990403175354, "learning_rate": 0.0004531652661064426, "loss": 0.4145, "step": 19634 }, { "epoch": 10.96927374301676, "grad_norm": 0.7646647095680237, "learning_rate": 0.0004531372549019608, "loss": 0.4486, "step": 19635 }, { "epoch": 10.969832402234637, "grad_norm": 0.7143821120262146, "learning_rate": 0.000453109243697479, "loss": 0.4108, "step": 19636 }, { "epoch": 10.970391061452514, "grad_norm": 0.3491571247577667, "learning_rate": 0.0004530812324929972, "loss": 0.3908, "step": 19637 }, { "epoch": 10.970949720670392, "grad_norm": 0.5693480968475342, "learning_rate": 0.00045305322128851537, "loss": 0.4161, "step": 19638 }, { "epoch": 10.971508379888268, "grad_norm": 0.4876122772693634, "learning_rate": 0.00045302521008403363, "loss": 0.582, "step": 19639 }, { "epoch": 10.972067039106145, "grad_norm": 1.2239868640899658, "learning_rate": 0.00045299719887955184, "loss": 0.394, "step": 19640 }, { "epoch": 10.972625698324022, "grad_norm": 0.4342758357524872, "learning_rate": 0.00045296918767507004, "loss": 0.4103, "step": 19641 }, { "epoch": 10.9731843575419, "grad_norm": 0.7984012961387634, "learning_rate": 0.00045294117647058825, "loss": 0.5361, "step": 19642 }, { "epoch": 10.973743016759776, "grad_norm": 0.4931205213069916, "learning_rate": 0.00045291316526610646, "loss": 0.3766, "step": 19643 }, { "epoch": 10.974301675977653, "grad_norm": 0.36186158657073975, "learning_rate": 0.00045288515406162466, "loss": 0.3871, "step": 19644 }, { "epoch": 10.974860335195531, "grad_norm": 0.4351528286933899, "learning_rate": 0.00045285714285714287, "loss": 0.4285, "step": 19645 }, { "epoch": 10.975418994413408, "grad_norm": 0.44537338614463806, "learning_rate": 0.0004528291316526611, "loss": 0.3881, "step": 19646 }, { "epoch": 10.975977653631285, "grad_norm": 0.4209382236003876, "learning_rate": 0.0004528011204481793, "loss": 0.3895, "step": 19647 }, { "epoch": 10.976536312849163, "grad_norm": 3.2156825065612793, "learning_rate": 0.0004527731092436975, "loss": 0.4198, "step": 19648 }, { "epoch": 10.97709497206704, "grad_norm": 0.36199796199798584, "learning_rate": 0.0004527450980392157, "loss": 0.399, "step": 19649 }, { "epoch": 10.977653631284916, "grad_norm": 1.9104856252670288, "learning_rate": 0.0004527170868347339, "loss": 0.429, "step": 19650 }, { "epoch": 10.978212290502793, "grad_norm": 0.6647889614105225, "learning_rate": 0.00045268907563025216, "loss": 0.3044, "step": 19651 }, { "epoch": 10.978770949720671, "grad_norm": 0.3544287383556366, "learning_rate": 0.0004526610644257703, "loss": 0.3189, "step": 19652 }, { "epoch": 10.979329608938547, "grad_norm": 0.489487886428833, "learning_rate": 0.0004526330532212885, "loss": 0.4892, "step": 19653 }, { "epoch": 10.979888268156424, "grad_norm": 0.4223870635032654, "learning_rate": 0.0004526050420168067, "loss": 0.4181, "step": 19654 }, { "epoch": 10.980446927374302, "grad_norm": 0.6175500750541687, "learning_rate": 0.00045257703081232493, "loss": 0.4078, "step": 19655 }, { "epoch": 10.981005586592179, "grad_norm": 1.7539653778076172, "learning_rate": 0.0004525490196078432, "loss": 0.4003, "step": 19656 }, { "epoch": 10.981564245810056, "grad_norm": 0.4580148160457611, "learning_rate": 0.00045252100840336134, "loss": 0.5029, "step": 19657 }, { "epoch": 10.982122905027932, "grad_norm": 14.965265274047852, "learning_rate": 0.00045249299719887955, "loss": 0.4467, "step": 19658 }, { "epoch": 10.98268156424581, "grad_norm": 0.43277639150619507, "learning_rate": 0.0004524649859943978, "loss": 0.4291, "step": 19659 }, { "epoch": 10.983240223463687, "grad_norm": 0.610205888748169, "learning_rate": 0.00045243697478991596, "loss": 0.3418, "step": 19660 }, { "epoch": 10.983798882681564, "grad_norm": 2.1606523990631104, "learning_rate": 0.0004524089635854342, "loss": 0.4099, "step": 19661 }, { "epoch": 10.984357541899442, "grad_norm": 1.0042824745178223, "learning_rate": 0.00045238095238095237, "loss": 0.3864, "step": 19662 }, { "epoch": 10.984916201117318, "grad_norm": 0.5823230147361755, "learning_rate": 0.0004523529411764706, "loss": 0.4438, "step": 19663 }, { "epoch": 10.985474860335195, "grad_norm": 0.5017644762992859, "learning_rate": 0.00045232492997198884, "loss": 0.3367, "step": 19664 }, { "epoch": 10.986033519553073, "grad_norm": 0.3199414312839508, "learning_rate": 0.000452296918767507, "loss": 0.3371, "step": 19665 }, { "epoch": 10.98659217877095, "grad_norm": 0.5106126666069031, "learning_rate": 0.00045226890756302525, "loss": 0.41, "step": 19666 }, { "epoch": 10.987150837988827, "grad_norm": 0.4596010744571686, "learning_rate": 0.00045224089635854345, "loss": 0.3919, "step": 19667 }, { "epoch": 10.987709497206703, "grad_norm": 3.0303168296813965, "learning_rate": 0.0004522128851540616, "loss": 0.5195, "step": 19668 }, { "epoch": 10.988268156424581, "grad_norm": 0.38064494729042053, "learning_rate": 0.00045218487394957987, "loss": 0.4028, "step": 19669 }, { "epoch": 10.988826815642458, "grad_norm": 0.44305285811424255, "learning_rate": 0.000452156862745098, "loss": 0.423, "step": 19670 }, { "epoch": 10.989385474860335, "grad_norm": 0.4474530816078186, "learning_rate": 0.0004521288515406163, "loss": 0.5321, "step": 19671 }, { "epoch": 10.989944134078213, "grad_norm": 0.5138105154037476, "learning_rate": 0.0004521008403361345, "loss": 0.4436, "step": 19672 }, { "epoch": 10.99050279329609, "grad_norm": 1.440447211265564, "learning_rate": 0.00045207282913165264, "loss": 0.4462, "step": 19673 }, { "epoch": 10.991061452513966, "grad_norm": 1.712256669998169, "learning_rate": 0.0004520448179271709, "loss": 0.5729, "step": 19674 }, { "epoch": 10.991620111731844, "grad_norm": 0.6162176132202148, "learning_rate": 0.0004520168067226891, "loss": 0.4236, "step": 19675 }, { "epoch": 10.992178770949721, "grad_norm": 0.46638524532318115, "learning_rate": 0.0004519887955182073, "loss": 0.3226, "step": 19676 }, { "epoch": 10.992737430167598, "grad_norm": 0.39813655614852905, "learning_rate": 0.0004519607843137255, "loss": 0.4374, "step": 19677 }, { "epoch": 10.993296089385474, "grad_norm": 0.6996338367462158, "learning_rate": 0.00045193277310924367, "loss": 0.4331, "step": 19678 }, { "epoch": 10.993854748603352, "grad_norm": 0.5107343196868896, "learning_rate": 0.0004519047619047619, "loss": 0.4094, "step": 19679 }, { "epoch": 10.994413407821229, "grad_norm": 0.5090128183364868, "learning_rate": 0.00045187675070028013, "loss": 0.4573, "step": 19680 }, { "epoch": 10.994972067039106, "grad_norm": 0.5859510898590088, "learning_rate": 0.00045184873949579834, "loss": 0.3451, "step": 19681 }, { "epoch": 10.995530726256984, "grad_norm": 0.7955729961395264, "learning_rate": 0.00045182072829131654, "loss": 0.3594, "step": 19682 }, { "epoch": 10.99608938547486, "grad_norm": 0.3718481957912445, "learning_rate": 0.00045179271708683475, "loss": 0.4275, "step": 19683 }, { "epoch": 10.996648044692737, "grad_norm": 0.4856449067592621, "learning_rate": 0.00045176470588235296, "loss": 0.4408, "step": 19684 }, { "epoch": 10.997206703910614, "grad_norm": 0.3392998278141022, "learning_rate": 0.00045173669467787116, "loss": 0.4583, "step": 19685 }, { "epoch": 10.997765363128492, "grad_norm": 0.3918527066707611, "learning_rate": 0.00045170868347338937, "loss": 0.4144, "step": 19686 }, { "epoch": 10.998324022346369, "grad_norm": 0.43196964263916016, "learning_rate": 0.0004516806722689076, "loss": 0.4474, "step": 19687 }, { "epoch": 10.998882681564245, "grad_norm": 0.5626681447029114, "learning_rate": 0.0004516526610644258, "loss": 0.3745, "step": 19688 }, { "epoch": 10.999441340782123, "grad_norm": 0.3662945628166199, "learning_rate": 0.000451624649859944, "loss": 0.3981, "step": 19689 }, { "epoch": 11.0, "grad_norm": 0.3969745934009552, "learning_rate": 0.0004515966386554622, "loss": 0.4305, "step": 19690 }, { "epoch": 11.000558659217877, "grad_norm": 0.5482631325721741, "learning_rate": 0.00045156862745098045, "loss": 0.5046, "step": 19691 }, { "epoch": 11.001117318435755, "grad_norm": 0.6897459626197815, "learning_rate": 0.0004515406162464986, "loss": 0.6416, "step": 19692 }, { "epoch": 11.001675977653631, "grad_norm": 0.9451212882995605, "learning_rate": 0.0004515126050420168, "loss": 0.3999, "step": 19693 }, { "epoch": 11.002234636871508, "grad_norm": 0.8297941088676453, "learning_rate": 0.000451484593837535, "loss": 0.4482, "step": 19694 }, { "epoch": 11.002793296089385, "grad_norm": 1.6589319705963135, "learning_rate": 0.0004514565826330532, "loss": 0.4373, "step": 19695 }, { "epoch": 11.003351955307263, "grad_norm": 0.4920943081378937, "learning_rate": 0.00045142857142857143, "loss": 0.3615, "step": 19696 }, { "epoch": 11.00391061452514, "grad_norm": 0.5959135890007019, "learning_rate": 0.00045140056022408963, "loss": 0.5231, "step": 19697 }, { "epoch": 11.004469273743016, "grad_norm": 0.527924120426178, "learning_rate": 0.00045137254901960784, "loss": 0.58, "step": 19698 }, { "epoch": 11.005027932960894, "grad_norm": 0.3551040291786194, "learning_rate": 0.0004513445378151261, "loss": 0.3689, "step": 19699 }, { "epoch": 11.005586592178771, "grad_norm": 2.1573057174682617, "learning_rate": 0.00045131652661064425, "loss": 0.3492, "step": 19700 }, { "epoch": 11.006145251396648, "grad_norm": 0.8560137748718262, "learning_rate": 0.00045128851540616246, "loss": 0.5545, "step": 19701 }, { "epoch": 11.006703910614526, "grad_norm": 0.7332513332366943, "learning_rate": 0.00045126050420168066, "loss": 0.5847, "step": 19702 }, { "epoch": 11.007262569832402, "grad_norm": 0.4452804625034332, "learning_rate": 0.00045123249299719887, "loss": 0.5877, "step": 19703 }, { "epoch": 11.007821229050279, "grad_norm": 0.48486676812171936, "learning_rate": 0.00045120448179271713, "loss": 0.3465, "step": 19704 }, { "epoch": 11.008379888268156, "grad_norm": 0.5134686827659607, "learning_rate": 0.0004511764705882353, "loss": 0.4179, "step": 19705 }, { "epoch": 11.008938547486034, "grad_norm": 0.5725839138031006, "learning_rate": 0.0004511484593837535, "loss": 0.65, "step": 19706 }, { "epoch": 11.00949720670391, "grad_norm": 0.54770827293396, "learning_rate": 0.00045112044817927175, "loss": 0.4343, "step": 19707 }, { "epoch": 11.010055865921787, "grad_norm": 0.6473379731178284, "learning_rate": 0.0004510924369747899, "loss": 0.4204, "step": 19708 }, { "epoch": 11.010614525139665, "grad_norm": 0.4246036112308502, "learning_rate": 0.00045106442577030816, "loss": 0.476, "step": 19709 }, { "epoch": 11.011173184357542, "grad_norm": 0.4953240156173706, "learning_rate": 0.0004510364145658263, "loss": 0.427, "step": 19710 }, { "epoch": 11.011731843575419, "grad_norm": 0.6312206387519836, "learning_rate": 0.0004510084033613445, "loss": 0.3995, "step": 19711 }, { "epoch": 11.012290502793297, "grad_norm": 0.5234584808349609, "learning_rate": 0.0004509803921568628, "loss": 0.4363, "step": 19712 }, { "epoch": 11.012849162011173, "grad_norm": 0.6393572092056274, "learning_rate": 0.00045095238095238093, "loss": 0.463, "step": 19713 }, { "epoch": 11.01340782122905, "grad_norm": 0.5865660905838013, "learning_rate": 0.0004509243697478992, "loss": 0.4178, "step": 19714 }, { "epoch": 11.013966480446927, "grad_norm": 0.49153652787208557, "learning_rate": 0.0004508963585434174, "loss": 0.4386, "step": 19715 }, { "epoch": 11.014525139664805, "grad_norm": 1.3606570959091187, "learning_rate": 0.00045086834733893555, "loss": 0.3972, "step": 19716 }, { "epoch": 11.015083798882682, "grad_norm": 0.44186320900917053, "learning_rate": 0.0004508403361344538, "loss": 0.522, "step": 19717 }, { "epoch": 11.015642458100558, "grad_norm": 0.355182021856308, "learning_rate": 0.00045081232492997196, "loss": 0.3898, "step": 19718 }, { "epoch": 11.016201117318436, "grad_norm": 0.8190295696258545, "learning_rate": 0.0004507843137254902, "loss": 0.4114, "step": 19719 }, { "epoch": 11.016759776536313, "grad_norm": 0.5568543076515198, "learning_rate": 0.0004507563025210084, "loss": 0.4222, "step": 19720 }, { "epoch": 11.01731843575419, "grad_norm": 0.7755473256111145, "learning_rate": 0.0004507282913165266, "loss": 0.4088, "step": 19721 }, { "epoch": 11.017877094972068, "grad_norm": 0.7063185572624207, "learning_rate": 0.00045070028011204484, "loss": 0.5097, "step": 19722 }, { "epoch": 11.018435754189944, "grad_norm": 1.5051993131637573, "learning_rate": 0.00045067226890756304, "loss": 0.4066, "step": 19723 }, { "epoch": 11.018994413407821, "grad_norm": 0.6611260771751404, "learning_rate": 0.00045064425770308125, "loss": 0.3836, "step": 19724 }, { "epoch": 11.019553072625698, "grad_norm": 0.6900299191474915, "learning_rate": 0.00045061624649859946, "loss": 0.3303, "step": 19725 }, { "epoch": 11.020111731843576, "grad_norm": 0.8698602914810181, "learning_rate": 0.0004505882352941176, "loss": 0.5044, "step": 19726 }, { "epoch": 11.020670391061453, "grad_norm": 0.5802265405654907, "learning_rate": 0.00045056022408963587, "loss": 0.578, "step": 19727 }, { "epoch": 11.021229050279329, "grad_norm": 0.4247480034828186, "learning_rate": 0.0004505322128851541, "loss": 0.4857, "step": 19728 }, { "epoch": 11.021787709497207, "grad_norm": 0.601398229598999, "learning_rate": 0.0004505042016806723, "loss": 0.5832, "step": 19729 }, { "epoch": 11.022346368715084, "grad_norm": 0.6081108450889587, "learning_rate": 0.0004504761904761905, "loss": 0.3957, "step": 19730 }, { "epoch": 11.02290502793296, "grad_norm": 0.5422305464744568, "learning_rate": 0.0004504481792717087, "loss": 0.3656, "step": 19731 }, { "epoch": 11.023463687150837, "grad_norm": 0.5673686861991882, "learning_rate": 0.0004504201680672269, "loss": 0.3877, "step": 19732 }, { "epoch": 11.024022346368715, "grad_norm": 0.42768824100494385, "learning_rate": 0.0004503921568627451, "loss": 0.3224, "step": 19733 }, { "epoch": 11.024581005586592, "grad_norm": 0.4438234865665436, "learning_rate": 0.0004503641456582633, "loss": 0.4193, "step": 19734 }, { "epoch": 11.025139664804469, "grad_norm": 3.423943519592285, "learning_rate": 0.0004503361344537815, "loss": 0.4265, "step": 19735 }, { "epoch": 11.025698324022347, "grad_norm": 0.4046982526779175, "learning_rate": 0.0004503081232492997, "loss": 0.416, "step": 19736 }, { "epoch": 11.026256983240224, "grad_norm": 0.8301132917404175, "learning_rate": 0.00045028011204481793, "loss": 0.4521, "step": 19737 }, { "epoch": 11.0268156424581, "grad_norm": 0.7326377630233765, "learning_rate": 0.00045025210084033613, "loss": 0.4588, "step": 19738 }, { "epoch": 11.027374301675978, "grad_norm": 0.7532413005828857, "learning_rate": 0.0004502240896358544, "loss": 0.4204, "step": 19739 }, { "epoch": 11.027932960893855, "grad_norm": 1.4342658519744873, "learning_rate": 0.00045019607843137255, "loss": 0.3627, "step": 19740 }, { "epoch": 11.028491620111732, "grad_norm": 0.39807072281837463, "learning_rate": 0.00045016806722689075, "loss": 0.4182, "step": 19741 }, { "epoch": 11.029050279329608, "grad_norm": 0.6088711023330688, "learning_rate": 0.00045014005602240896, "loss": 0.5232, "step": 19742 }, { "epoch": 11.029608938547486, "grad_norm": 0.6009140014648438, "learning_rate": 0.00045011204481792716, "loss": 0.4527, "step": 19743 }, { "epoch": 11.030167597765363, "grad_norm": 0.3853987753391266, "learning_rate": 0.0004500840336134454, "loss": 0.3275, "step": 19744 }, { "epoch": 11.03072625698324, "grad_norm": 0.5026534795761108, "learning_rate": 0.0004500560224089636, "loss": 0.4687, "step": 19745 }, { "epoch": 11.031284916201118, "grad_norm": 3.3531880378723145, "learning_rate": 0.0004500280112044818, "loss": 0.4827, "step": 19746 }, { "epoch": 11.031843575418995, "grad_norm": 0.41678839921951294, "learning_rate": 0.00045000000000000004, "loss": 0.45, "step": 19747 }, { "epoch": 11.032402234636871, "grad_norm": 1.356294870376587, "learning_rate": 0.0004499719887955182, "loss": 0.456, "step": 19748 }, { "epoch": 11.03296089385475, "grad_norm": 0.5405905842781067, "learning_rate": 0.00044994397759103645, "loss": 0.4509, "step": 19749 }, { "epoch": 11.033519553072626, "grad_norm": 1.7592824697494507, "learning_rate": 0.0004499159663865546, "loss": 0.4872, "step": 19750 }, { "epoch": 11.034078212290503, "grad_norm": 0.8724478483200073, "learning_rate": 0.0004498879551820728, "loss": 0.4421, "step": 19751 }, { "epoch": 11.03463687150838, "grad_norm": 1.4287068843841553, "learning_rate": 0.00044985994397759107, "loss": 0.4321, "step": 19752 }, { "epoch": 11.035195530726257, "grad_norm": 0.5304585695266724, "learning_rate": 0.0004498319327731092, "loss": 0.4313, "step": 19753 }, { "epoch": 11.035754189944134, "grad_norm": 0.44525033235549927, "learning_rate": 0.0004498039215686275, "loss": 0.4617, "step": 19754 }, { "epoch": 11.03631284916201, "grad_norm": 0.4671137034893036, "learning_rate": 0.0004497759103641457, "loss": 0.4735, "step": 19755 }, { "epoch": 11.036871508379889, "grad_norm": 0.4294011890888214, "learning_rate": 0.00044974789915966384, "loss": 0.5439, "step": 19756 }, { "epoch": 11.037430167597766, "grad_norm": 2.162557363510132, "learning_rate": 0.0004497198879551821, "loss": 0.4158, "step": 19757 }, { "epoch": 11.037988826815642, "grad_norm": 0.5015937685966492, "learning_rate": 0.00044969187675070025, "loss": 0.4842, "step": 19758 }, { "epoch": 11.03854748603352, "grad_norm": 0.3503091037273407, "learning_rate": 0.0004496638655462185, "loss": 0.3708, "step": 19759 }, { "epoch": 11.039106145251397, "grad_norm": 0.6573071479797363, "learning_rate": 0.0004496358543417367, "loss": 0.4353, "step": 19760 }, { "epoch": 11.039664804469274, "grad_norm": 0.672507643699646, "learning_rate": 0.00044960784313725487, "loss": 0.4525, "step": 19761 }, { "epoch": 11.04022346368715, "grad_norm": 0.5295575261116028, "learning_rate": 0.00044957983193277313, "loss": 0.4883, "step": 19762 }, { "epoch": 11.040782122905028, "grad_norm": 6.157355308532715, "learning_rate": 0.00044955182072829134, "loss": 0.3086, "step": 19763 }, { "epoch": 11.041340782122905, "grad_norm": 2.1904854774475098, "learning_rate": 0.00044952380952380954, "loss": 0.3571, "step": 19764 }, { "epoch": 11.041899441340782, "grad_norm": 0.5147450566291809, "learning_rate": 0.00044949579831932775, "loss": 0.3984, "step": 19765 }, { "epoch": 11.04245810055866, "grad_norm": 6.060114860534668, "learning_rate": 0.0004494677871148459, "loss": 0.6153, "step": 19766 }, { "epoch": 11.043016759776537, "grad_norm": 0.7461329698562622, "learning_rate": 0.00044943977591036416, "loss": 0.3992, "step": 19767 }, { "epoch": 11.043575418994413, "grad_norm": 0.9306190609931946, "learning_rate": 0.00044941176470588237, "loss": 0.3516, "step": 19768 }, { "epoch": 11.04413407821229, "grad_norm": 0.4361603260040283, "learning_rate": 0.0004493837535014006, "loss": 0.4271, "step": 19769 }, { "epoch": 11.044692737430168, "grad_norm": 0.7545832991600037, "learning_rate": 0.0004493557422969188, "loss": 0.4637, "step": 19770 }, { "epoch": 11.045251396648045, "grad_norm": 0.4830663502216339, "learning_rate": 0.000449327731092437, "loss": 0.4796, "step": 19771 }, { "epoch": 11.045810055865921, "grad_norm": 2.228134870529175, "learning_rate": 0.0004492997198879552, "loss": 0.4392, "step": 19772 }, { "epoch": 11.0463687150838, "grad_norm": 0.4732452929019928, "learning_rate": 0.0004492717086834734, "loss": 0.3396, "step": 19773 }, { "epoch": 11.046927374301676, "grad_norm": 0.631354808807373, "learning_rate": 0.00044924369747899166, "loss": 0.444, "step": 19774 }, { "epoch": 11.047486033519553, "grad_norm": 0.6586755514144897, "learning_rate": 0.0004492156862745098, "loss": 0.4068, "step": 19775 }, { "epoch": 11.048044692737431, "grad_norm": 0.7469296455383301, "learning_rate": 0.000449187675070028, "loss": 0.3993, "step": 19776 }, { "epoch": 11.048603351955308, "grad_norm": 0.6499115824699402, "learning_rate": 0.0004491596638655462, "loss": 0.5036, "step": 19777 }, { "epoch": 11.049162011173184, "grad_norm": 0.4841870367527008, "learning_rate": 0.00044913165266106443, "loss": 0.3586, "step": 19778 }, { "epoch": 11.04972067039106, "grad_norm": 0.34010013937950134, "learning_rate": 0.0004491036414565827, "loss": 0.3428, "step": 19779 }, { "epoch": 11.050279329608939, "grad_norm": 0.48760271072387695, "learning_rate": 0.00044907563025210084, "loss": 0.4189, "step": 19780 }, { "epoch": 11.050837988826816, "grad_norm": 0.5678296685218811, "learning_rate": 0.00044904761904761905, "loss": 0.4008, "step": 19781 }, { "epoch": 11.051396648044692, "grad_norm": 0.6407414078712463, "learning_rate": 0.0004490196078431373, "loss": 0.4333, "step": 19782 }, { "epoch": 11.05195530726257, "grad_norm": 0.7218817472457886, "learning_rate": 0.00044899159663865546, "loss": 0.4397, "step": 19783 }, { "epoch": 11.052513966480447, "grad_norm": 0.6650747060775757, "learning_rate": 0.0004489635854341737, "loss": 0.5319, "step": 19784 }, { "epoch": 11.053072625698324, "grad_norm": 0.4200950860977173, "learning_rate": 0.00044893557422969187, "loss": 0.3557, "step": 19785 }, { "epoch": 11.053631284916202, "grad_norm": 0.52342289686203, "learning_rate": 0.0004489075630252101, "loss": 0.4107, "step": 19786 }, { "epoch": 11.054189944134079, "grad_norm": 0.4744264483451843, "learning_rate": 0.00044887955182072834, "loss": 0.3897, "step": 19787 }, { "epoch": 11.054748603351955, "grad_norm": 0.43665143847465515, "learning_rate": 0.0004488515406162465, "loss": 0.3561, "step": 19788 }, { "epoch": 11.055307262569832, "grad_norm": 0.3322528898715973, "learning_rate": 0.00044882352941176475, "loss": 0.3173, "step": 19789 }, { "epoch": 11.05586592178771, "grad_norm": 0.7613691687583923, "learning_rate": 0.00044879551820728295, "loss": 0.4838, "step": 19790 }, { "epoch": 11.056424581005587, "grad_norm": 0.5752625465393066, "learning_rate": 0.0004487675070028011, "loss": 0.5196, "step": 19791 }, { "epoch": 11.056983240223463, "grad_norm": 0.4546201229095459, "learning_rate": 0.00044873949579831937, "loss": 0.4134, "step": 19792 }, { "epoch": 11.057541899441341, "grad_norm": 0.46799811720848083, "learning_rate": 0.0004487114845938375, "loss": 0.3838, "step": 19793 }, { "epoch": 11.058100558659218, "grad_norm": 0.6219011545181274, "learning_rate": 0.0004486834733893558, "loss": 0.3627, "step": 19794 }, { "epoch": 11.058659217877095, "grad_norm": 0.5327205061912537, "learning_rate": 0.000448655462184874, "loss": 0.4026, "step": 19795 }, { "epoch": 11.059217877094973, "grad_norm": 0.46416011452674866, "learning_rate": 0.00044862745098039214, "loss": 0.444, "step": 19796 }, { "epoch": 11.05977653631285, "grad_norm": 0.4102841913700104, "learning_rate": 0.0004485994397759104, "loss": 0.4167, "step": 19797 }, { "epoch": 11.060335195530726, "grad_norm": 0.8700477480888367, "learning_rate": 0.0004485714285714286, "loss": 0.4323, "step": 19798 }, { "epoch": 11.060893854748603, "grad_norm": 0.7467467784881592, "learning_rate": 0.0004485434173669468, "loss": 0.3808, "step": 19799 }, { "epoch": 11.061452513966481, "grad_norm": 1.0814316272735596, "learning_rate": 0.000448515406162465, "loss": 0.5042, "step": 19800 }, { "epoch": 11.062011173184358, "grad_norm": 0.6582698822021484, "learning_rate": 0.00044848739495798317, "loss": 0.3572, "step": 19801 }, { "epoch": 11.062569832402234, "grad_norm": 0.7060786485671997, "learning_rate": 0.0004484593837535014, "loss": 0.4106, "step": 19802 }, { "epoch": 11.063128491620112, "grad_norm": 1.318556785583496, "learning_rate": 0.00044843137254901963, "loss": 0.5086, "step": 19803 }, { "epoch": 11.063687150837989, "grad_norm": 0.6132367253303528, "learning_rate": 0.0004484033613445378, "loss": 0.4418, "step": 19804 }, { "epoch": 11.064245810055866, "grad_norm": 0.5333831906318665, "learning_rate": 0.00044837535014005604, "loss": 0.3852, "step": 19805 }, { "epoch": 11.064804469273742, "grad_norm": 0.5851463675498962, "learning_rate": 0.00044834733893557425, "loss": 0.5034, "step": 19806 }, { "epoch": 11.06536312849162, "grad_norm": 0.399917334318161, "learning_rate": 0.00044831932773109246, "loss": 0.434, "step": 19807 }, { "epoch": 11.065921787709497, "grad_norm": 2.6709144115448, "learning_rate": 0.00044829131652661066, "loss": 0.3053, "step": 19808 }, { "epoch": 11.066480446927374, "grad_norm": 0.646639347076416, "learning_rate": 0.0004482633053221288, "loss": 0.5963, "step": 19809 }, { "epoch": 11.067039106145252, "grad_norm": 0.4972863793373108, "learning_rate": 0.0004482352941176471, "loss": 0.4244, "step": 19810 }, { "epoch": 11.067597765363129, "grad_norm": 0.39430907368659973, "learning_rate": 0.0004482072829131653, "loss": 0.4065, "step": 19811 }, { "epoch": 11.068156424581005, "grad_norm": 0.3925905227661133, "learning_rate": 0.0004481792717086835, "loss": 0.4313, "step": 19812 }, { "epoch": 11.068715083798883, "grad_norm": 0.36944013833999634, "learning_rate": 0.0004481512605042017, "loss": 0.3659, "step": 19813 }, { "epoch": 11.06927374301676, "grad_norm": 0.49206021428108215, "learning_rate": 0.0004481232492997199, "loss": 0.3996, "step": 19814 }, { "epoch": 11.069832402234637, "grad_norm": 0.38169413805007935, "learning_rate": 0.0004480952380952381, "loss": 0.3325, "step": 19815 }, { "epoch": 11.070391061452513, "grad_norm": 0.38647326827049255, "learning_rate": 0.0004480672268907563, "loss": 0.3054, "step": 19816 }, { "epoch": 11.070949720670392, "grad_norm": 0.6288229823112488, "learning_rate": 0.0004480392156862745, "loss": 0.4725, "step": 19817 }, { "epoch": 11.071508379888268, "grad_norm": 0.43710818886756897, "learning_rate": 0.0004480112044817927, "loss": 0.2975, "step": 19818 }, { "epoch": 11.072067039106145, "grad_norm": 0.536785364151001, "learning_rate": 0.00044798319327731093, "loss": 0.3972, "step": 19819 }, { "epoch": 11.072625698324023, "grad_norm": 0.6924615502357483, "learning_rate": 0.00044795518207282913, "loss": 0.5361, "step": 19820 }, { "epoch": 11.0731843575419, "grad_norm": 1.0973560810089111, "learning_rate": 0.00044792717086834734, "loss": 0.3645, "step": 19821 }, { "epoch": 11.073743016759776, "grad_norm": 0.40427523851394653, "learning_rate": 0.0004478991596638656, "loss": 0.4499, "step": 19822 }, { "epoch": 11.074301675977654, "grad_norm": 0.7949575185775757, "learning_rate": 0.00044787114845938375, "loss": 0.4101, "step": 19823 }, { "epoch": 11.074860335195531, "grad_norm": 0.7031083703041077, "learning_rate": 0.00044784313725490196, "loss": 0.6343, "step": 19824 }, { "epoch": 11.075418994413408, "grad_norm": 0.7106063961982727, "learning_rate": 0.00044781512605042016, "loss": 0.4775, "step": 19825 }, { "epoch": 11.075977653631284, "grad_norm": 0.37123459577560425, "learning_rate": 0.00044778711484593837, "loss": 0.4577, "step": 19826 }, { "epoch": 11.076536312849163, "grad_norm": 0.4597998857498169, "learning_rate": 0.00044775910364145663, "loss": 0.4196, "step": 19827 }, { "epoch": 11.077094972067039, "grad_norm": 0.742706298828125, "learning_rate": 0.0004477310924369748, "loss": 0.4793, "step": 19828 }, { "epoch": 11.077653631284916, "grad_norm": 1.2470176219940186, "learning_rate": 0.000447703081232493, "loss": 0.4176, "step": 19829 }, { "epoch": 11.078212290502794, "grad_norm": 0.43885621428489685, "learning_rate": 0.00044767507002801125, "loss": 0.5263, "step": 19830 }, { "epoch": 11.07877094972067, "grad_norm": 0.5389963984489441, "learning_rate": 0.0004476470588235294, "loss": 0.3911, "step": 19831 }, { "epoch": 11.079329608938547, "grad_norm": 0.5724214911460876, "learning_rate": 0.00044761904761904766, "loss": 0.3604, "step": 19832 }, { "epoch": 11.079888268156424, "grad_norm": 6.444626331329346, "learning_rate": 0.0004475910364145658, "loss": 0.4119, "step": 19833 }, { "epoch": 11.080446927374302, "grad_norm": 0.6999254822731018, "learning_rate": 0.000447563025210084, "loss": 0.4683, "step": 19834 }, { "epoch": 11.081005586592179, "grad_norm": 0.30982500314712524, "learning_rate": 0.0004475350140056023, "loss": 0.2887, "step": 19835 }, { "epoch": 11.081564245810055, "grad_norm": 0.548913300037384, "learning_rate": 0.00044750700280112043, "loss": 0.2762, "step": 19836 }, { "epoch": 11.082122905027934, "grad_norm": 0.7421655058860779, "learning_rate": 0.0004474789915966387, "loss": 0.4327, "step": 19837 }, { "epoch": 11.08268156424581, "grad_norm": 0.6502736210823059, "learning_rate": 0.0004474509803921569, "loss": 0.4731, "step": 19838 }, { "epoch": 11.083240223463687, "grad_norm": 0.6469879746437073, "learning_rate": 0.00044742296918767505, "loss": 0.5146, "step": 19839 }, { "epoch": 11.083798882681565, "grad_norm": 0.7766003012657166, "learning_rate": 0.0004473949579831933, "loss": 0.4411, "step": 19840 }, { "epoch": 11.084357541899442, "grad_norm": 0.6554601788520813, "learning_rate": 0.00044736694677871146, "loss": 0.5981, "step": 19841 }, { "epoch": 11.084916201117318, "grad_norm": 0.3707920014858246, "learning_rate": 0.0004473389355742297, "loss": 0.3617, "step": 19842 }, { "epoch": 11.085474860335195, "grad_norm": 0.6389076709747314, "learning_rate": 0.0004473109243697479, "loss": 0.4439, "step": 19843 }, { "epoch": 11.086033519553073, "grad_norm": 0.5022822022438049, "learning_rate": 0.0004472829131652661, "loss": 0.4366, "step": 19844 }, { "epoch": 11.08659217877095, "grad_norm": 0.6682159900665283, "learning_rate": 0.00044725490196078434, "loss": 0.3834, "step": 19845 }, { "epoch": 11.087150837988826, "grad_norm": 0.43023720383644104, "learning_rate": 0.00044722689075630254, "loss": 0.447, "step": 19846 }, { "epoch": 11.087709497206705, "grad_norm": 0.5825912952423096, "learning_rate": 0.00044719887955182075, "loss": 0.5824, "step": 19847 }, { "epoch": 11.088268156424581, "grad_norm": 0.41087085008621216, "learning_rate": 0.00044717086834733896, "loss": 0.5243, "step": 19848 }, { "epoch": 11.088826815642458, "grad_norm": 0.40642911195755005, "learning_rate": 0.0004471428571428571, "loss": 0.3839, "step": 19849 }, { "epoch": 11.089385474860336, "grad_norm": 0.3995497524738312, "learning_rate": 0.00044711484593837537, "loss": 0.3825, "step": 19850 }, { "epoch": 11.089944134078213, "grad_norm": 0.7660233378410339, "learning_rate": 0.0004470868347338936, "loss": 0.4725, "step": 19851 }, { "epoch": 11.09050279329609, "grad_norm": 0.4556220769882202, "learning_rate": 0.0004470588235294118, "loss": 0.3796, "step": 19852 }, { "epoch": 11.091061452513966, "grad_norm": 0.47020071744918823, "learning_rate": 0.00044703081232493, "loss": 0.4272, "step": 19853 }, { "epoch": 11.091620111731844, "grad_norm": 0.46526384353637695, "learning_rate": 0.0004470028011204482, "loss": 0.4588, "step": 19854 }, { "epoch": 11.09217877094972, "grad_norm": 0.7570987939834595, "learning_rate": 0.0004469747899159664, "loss": 0.5176, "step": 19855 }, { "epoch": 11.092737430167597, "grad_norm": 0.37922248244285583, "learning_rate": 0.0004469467787114846, "loss": 0.3792, "step": 19856 }, { "epoch": 11.093296089385476, "grad_norm": 0.45460763573646545, "learning_rate": 0.0004469187675070028, "loss": 0.4851, "step": 19857 }, { "epoch": 11.093854748603352, "grad_norm": 1.700627326965332, "learning_rate": 0.000446890756302521, "loss": 0.3735, "step": 19858 }, { "epoch": 11.094413407821229, "grad_norm": 0.5089961290359497, "learning_rate": 0.0004468627450980392, "loss": 0.4219, "step": 19859 }, { "epoch": 11.094972067039107, "grad_norm": 0.4787507653236389, "learning_rate": 0.00044683473389355743, "loss": 0.3761, "step": 19860 }, { "epoch": 11.095530726256984, "grad_norm": 0.6233643293380737, "learning_rate": 0.00044680672268907563, "loss": 0.5126, "step": 19861 }, { "epoch": 11.09608938547486, "grad_norm": 1.026381015777588, "learning_rate": 0.0004467787114845939, "loss": 0.3771, "step": 19862 }, { "epoch": 11.096648044692737, "grad_norm": 0.3337191939353943, "learning_rate": 0.00044675070028011205, "loss": 0.3513, "step": 19863 }, { "epoch": 11.097206703910615, "grad_norm": 0.37014496326446533, "learning_rate": 0.00044672268907563025, "loss": 0.3559, "step": 19864 }, { "epoch": 11.097765363128492, "grad_norm": 0.41693469882011414, "learning_rate": 0.00044669467787114846, "loss": 0.4258, "step": 19865 }, { "epoch": 11.098324022346368, "grad_norm": 0.5736404657363892, "learning_rate": 0.00044666666666666666, "loss": 0.4507, "step": 19866 }, { "epoch": 11.098882681564247, "grad_norm": 0.50935298204422, "learning_rate": 0.0004466386554621849, "loss": 0.3954, "step": 19867 }, { "epoch": 11.099441340782123, "grad_norm": 0.9463884830474854, "learning_rate": 0.0004466106442577031, "loss": 0.3927, "step": 19868 }, { "epoch": 11.1, "grad_norm": 0.5462876558303833, "learning_rate": 0.0004465826330532213, "loss": 0.4026, "step": 19869 }, { "epoch": 11.100558659217878, "grad_norm": 2.7741801738739014, "learning_rate": 0.00044655462184873954, "loss": 0.3712, "step": 19870 }, { "epoch": 11.101117318435755, "grad_norm": 0.3957228660583496, "learning_rate": 0.0004465266106442577, "loss": 0.4742, "step": 19871 }, { "epoch": 11.101675977653631, "grad_norm": 0.664828896522522, "learning_rate": 0.00044649859943977595, "loss": 0.5621, "step": 19872 }, { "epoch": 11.102234636871508, "grad_norm": 0.9680612087249756, "learning_rate": 0.0004464705882352941, "loss": 0.4785, "step": 19873 }, { "epoch": 11.102793296089386, "grad_norm": 0.6311147809028625, "learning_rate": 0.0004464425770308123, "loss": 0.4577, "step": 19874 }, { "epoch": 11.103351955307263, "grad_norm": 0.8508023023605347, "learning_rate": 0.00044641456582633057, "loss": 0.376, "step": 19875 }, { "epoch": 11.10391061452514, "grad_norm": 13.140066146850586, "learning_rate": 0.0004463865546218487, "loss": 0.4167, "step": 19876 }, { "epoch": 11.104469273743018, "grad_norm": 0.689651608467102, "learning_rate": 0.000446358543417367, "loss": 0.5557, "step": 19877 }, { "epoch": 11.105027932960894, "grad_norm": 0.40504926443099976, "learning_rate": 0.0004463305322128852, "loss": 0.4504, "step": 19878 }, { "epoch": 11.10558659217877, "grad_norm": 0.4543781578540802, "learning_rate": 0.00044630252100840334, "loss": 0.4336, "step": 19879 }, { "epoch": 11.106145251396647, "grad_norm": 0.6363242268562317, "learning_rate": 0.0004462745098039216, "loss": 0.3772, "step": 19880 }, { "epoch": 11.106703910614526, "grad_norm": 0.4519440233707428, "learning_rate": 0.00044624649859943975, "loss": 0.527, "step": 19881 }, { "epoch": 11.107262569832402, "grad_norm": 0.3944387435913086, "learning_rate": 0.000446218487394958, "loss": 0.3321, "step": 19882 }, { "epoch": 11.107821229050279, "grad_norm": 0.45843812823295593, "learning_rate": 0.0004461904761904762, "loss": 0.4609, "step": 19883 }, { "epoch": 11.108379888268157, "grad_norm": 0.5077905654907227, "learning_rate": 0.00044616246498599437, "loss": 0.4625, "step": 19884 }, { "epoch": 11.108938547486034, "grad_norm": 0.6277848482131958, "learning_rate": 0.00044613445378151263, "loss": 0.444, "step": 19885 }, { "epoch": 11.10949720670391, "grad_norm": 4.500689506530762, "learning_rate": 0.00044610644257703084, "loss": 0.4412, "step": 19886 }, { "epoch": 11.110055865921789, "grad_norm": 0.6838209629058838, "learning_rate": 0.00044607843137254904, "loss": 0.5411, "step": 19887 }, { "epoch": 11.110614525139665, "grad_norm": 0.45014968514442444, "learning_rate": 0.00044605042016806725, "loss": 0.397, "step": 19888 }, { "epoch": 11.111173184357542, "grad_norm": 0.4000174403190613, "learning_rate": 0.0004460224089635854, "loss": 0.3811, "step": 19889 }, { "epoch": 11.111731843575418, "grad_norm": 3.1476850509643555, "learning_rate": 0.00044599439775910366, "loss": 0.5675, "step": 19890 }, { "epoch": 11.112290502793297, "grad_norm": 0.4931184947490692, "learning_rate": 0.00044596638655462187, "loss": 0.3352, "step": 19891 }, { "epoch": 11.112849162011173, "grad_norm": 0.38772642612457275, "learning_rate": 0.0004459383753501401, "loss": 0.3462, "step": 19892 }, { "epoch": 11.11340782122905, "grad_norm": 0.37878426909446716, "learning_rate": 0.0004459103641456583, "loss": 0.3791, "step": 19893 }, { "epoch": 11.113966480446928, "grad_norm": 0.36854350566864014, "learning_rate": 0.0004458823529411765, "loss": 0.4479, "step": 19894 }, { "epoch": 11.114525139664805, "grad_norm": 0.5654700994491577, "learning_rate": 0.0004458543417366947, "loss": 0.4475, "step": 19895 }, { "epoch": 11.115083798882681, "grad_norm": 1.123820185661316, "learning_rate": 0.0004458263305322129, "loss": 0.4679, "step": 19896 }, { "epoch": 11.11564245810056, "grad_norm": 0.5729300379753113, "learning_rate": 0.0004457983193277311, "loss": 0.4097, "step": 19897 }, { "epoch": 11.116201117318436, "grad_norm": 0.3579021096229553, "learning_rate": 0.0004457703081232493, "loss": 0.3735, "step": 19898 }, { "epoch": 11.116759776536313, "grad_norm": 0.3928564190864563, "learning_rate": 0.0004457422969187675, "loss": 0.3832, "step": 19899 }, { "epoch": 11.11731843575419, "grad_norm": 0.4491223394870758, "learning_rate": 0.0004457142857142857, "loss": 0.5005, "step": 19900 }, { "epoch": 11.117877094972068, "grad_norm": 1.066507339477539, "learning_rate": 0.00044568627450980393, "loss": 0.414, "step": 19901 }, { "epoch": 11.118435754189944, "grad_norm": 0.4069530665874481, "learning_rate": 0.0004456582633053222, "loss": 0.4845, "step": 19902 }, { "epoch": 11.11899441340782, "grad_norm": 0.3738228678703308, "learning_rate": 0.00044563025210084034, "loss": 0.3443, "step": 19903 }, { "epoch": 11.119553072625699, "grad_norm": 0.5073080658912659, "learning_rate": 0.00044560224089635855, "loss": 0.3996, "step": 19904 }, { "epoch": 11.120111731843576, "grad_norm": 0.34824588894844055, "learning_rate": 0.00044557422969187675, "loss": 0.3185, "step": 19905 }, { "epoch": 11.120670391061452, "grad_norm": 0.7932087182998657, "learning_rate": 0.00044554621848739496, "loss": 0.4965, "step": 19906 }, { "epoch": 11.121229050279329, "grad_norm": 0.8209249973297119, "learning_rate": 0.0004455182072829132, "loss": 0.394, "step": 19907 }, { "epoch": 11.121787709497207, "grad_norm": 0.4790240228176117, "learning_rate": 0.00044549019607843137, "loss": 0.4401, "step": 19908 }, { "epoch": 11.122346368715084, "grad_norm": 0.48526036739349365, "learning_rate": 0.0004454621848739496, "loss": 0.4143, "step": 19909 }, { "epoch": 11.12290502793296, "grad_norm": 0.4222531020641327, "learning_rate": 0.00044543417366946784, "loss": 0.4277, "step": 19910 }, { "epoch": 11.123463687150839, "grad_norm": 0.5396952629089355, "learning_rate": 0.000445406162464986, "loss": 0.5434, "step": 19911 }, { "epoch": 11.124022346368715, "grad_norm": 0.3793957233428955, "learning_rate": 0.00044537815126050425, "loss": 0.404, "step": 19912 }, { "epoch": 11.124581005586592, "grad_norm": 0.7523414492607117, "learning_rate": 0.0004453501400560224, "loss": 0.4877, "step": 19913 }, { "epoch": 11.12513966480447, "grad_norm": 0.48998716473579407, "learning_rate": 0.0004453221288515406, "loss": 0.3564, "step": 19914 }, { "epoch": 11.125698324022347, "grad_norm": 2.81388521194458, "learning_rate": 0.00044529411764705887, "loss": 0.5428, "step": 19915 }, { "epoch": 11.126256983240223, "grad_norm": 0.9800290465354919, "learning_rate": 0.000445266106442577, "loss": 0.5023, "step": 19916 }, { "epoch": 11.1268156424581, "grad_norm": 0.6452310681343079, "learning_rate": 0.0004452380952380952, "loss": 0.7114, "step": 19917 }, { "epoch": 11.127374301675978, "grad_norm": 0.4753224849700928, "learning_rate": 0.0004452100840336135, "loss": 0.4169, "step": 19918 }, { "epoch": 11.127932960893855, "grad_norm": 2.317497730255127, "learning_rate": 0.00044518207282913164, "loss": 0.4244, "step": 19919 }, { "epoch": 11.128491620111731, "grad_norm": 0.6090908050537109, "learning_rate": 0.0004451540616246499, "loss": 0.461, "step": 19920 }, { "epoch": 11.12905027932961, "grad_norm": 0.556962251663208, "learning_rate": 0.00044512605042016805, "loss": 0.3918, "step": 19921 }, { "epoch": 11.129608938547486, "grad_norm": 0.7319069504737854, "learning_rate": 0.00044509803921568625, "loss": 0.3506, "step": 19922 }, { "epoch": 11.130167597765363, "grad_norm": 0.4105057716369629, "learning_rate": 0.0004450700280112045, "loss": 0.4249, "step": 19923 }, { "epoch": 11.130726256983241, "grad_norm": 1.0867705345153809, "learning_rate": 0.00044504201680672267, "loss": 0.4178, "step": 19924 }, { "epoch": 11.131284916201118, "grad_norm": 0.7936561703681946, "learning_rate": 0.0004450140056022409, "loss": 0.3833, "step": 19925 }, { "epoch": 11.131843575418994, "grad_norm": 12.50045394897461, "learning_rate": 0.00044498599439775913, "loss": 0.5131, "step": 19926 }, { "epoch": 11.13240223463687, "grad_norm": 0.46691498160362244, "learning_rate": 0.0004449579831932773, "loss": 0.4661, "step": 19927 }, { "epoch": 11.132960893854749, "grad_norm": 0.5960050821304321, "learning_rate": 0.00044492997198879554, "loss": 0.4234, "step": 19928 }, { "epoch": 11.133519553072626, "grad_norm": 1.6895347833633423, "learning_rate": 0.0004449019607843137, "loss": 0.4954, "step": 19929 }, { "epoch": 11.134078212290502, "grad_norm": 0.5937805771827698, "learning_rate": 0.00044487394957983196, "loss": 0.4785, "step": 19930 }, { "epoch": 11.13463687150838, "grad_norm": 0.4939204752445221, "learning_rate": 0.00044484593837535016, "loss": 0.4696, "step": 19931 }, { "epoch": 11.135195530726257, "grad_norm": 0.3809947967529297, "learning_rate": 0.0004448179271708683, "loss": 0.4102, "step": 19932 }, { "epoch": 11.135754189944134, "grad_norm": 8.647324562072754, "learning_rate": 0.0004447899159663866, "loss": 0.4694, "step": 19933 }, { "epoch": 11.136312849162012, "grad_norm": 0.41407257318496704, "learning_rate": 0.0004447619047619048, "loss": 0.4039, "step": 19934 }, { "epoch": 11.136871508379889, "grad_norm": 0.47642093896865845, "learning_rate": 0.000444733893557423, "loss": 0.387, "step": 19935 }, { "epoch": 11.137430167597765, "grad_norm": 0.5021941661834717, "learning_rate": 0.0004447058823529412, "loss": 0.3938, "step": 19936 }, { "epoch": 11.137988826815642, "grad_norm": 0.5977472066879272, "learning_rate": 0.00044467787114845934, "loss": 0.5128, "step": 19937 }, { "epoch": 11.13854748603352, "grad_norm": 2.3964288234710693, "learning_rate": 0.0004446498599439776, "loss": 0.3804, "step": 19938 }, { "epoch": 11.139106145251397, "grad_norm": 0.9563775062561035, "learning_rate": 0.0004446218487394958, "loss": 0.4082, "step": 19939 }, { "epoch": 11.139664804469273, "grad_norm": 0.4272240400314331, "learning_rate": 0.000444593837535014, "loss": 0.4287, "step": 19940 }, { "epoch": 11.140223463687152, "grad_norm": 1.8745205402374268, "learning_rate": 0.0004445658263305322, "loss": 0.3488, "step": 19941 }, { "epoch": 11.140782122905028, "grad_norm": 5.201748371124268, "learning_rate": 0.00044453781512605043, "loss": 0.436, "step": 19942 }, { "epoch": 11.141340782122905, "grad_norm": 0.5420041680335999, "learning_rate": 0.00044450980392156863, "loss": 0.4879, "step": 19943 }, { "epoch": 11.141899441340781, "grad_norm": 0.4293597340583801, "learning_rate": 0.00044448179271708684, "loss": 0.3684, "step": 19944 }, { "epoch": 11.14245810055866, "grad_norm": 0.4900834262371063, "learning_rate": 0.00044445378151260505, "loss": 0.6012, "step": 19945 }, { "epoch": 11.143016759776536, "grad_norm": 1.0333806276321411, "learning_rate": 0.00044442577030812325, "loss": 0.5209, "step": 19946 }, { "epoch": 11.143575418994413, "grad_norm": 0.36754944920539856, "learning_rate": 0.00044439775910364146, "loss": 0.3517, "step": 19947 }, { "epoch": 11.144134078212291, "grad_norm": 0.37449154257774353, "learning_rate": 0.00044436974789915966, "loss": 0.4389, "step": 19948 }, { "epoch": 11.144692737430168, "grad_norm": 0.6703065633773804, "learning_rate": 0.00044434173669467787, "loss": 0.4326, "step": 19949 }, { "epoch": 11.145251396648044, "grad_norm": 0.6214983463287354, "learning_rate": 0.00044431372549019613, "loss": 0.4155, "step": 19950 }, { "epoch": 11.145810055865923, "grad_norm": 0.4014701247215271, "learning_rate": 0.0004442857142857143, "loss": 0.3575, "step": 19951 }, { "epoch": 11.1463687150838, "grad_norm": 0.5953041911125183, "learning_rate": 0.0004442577030812325, "loss": 0.4555, "step": 19952 }, { "epoch": 11.146927374301676, "grad_norm": 0.4547383189201355, "learning_rate": 0.0004442296918767507, "loss": 0.4249, "step": 19953 }, { "epoch": 11.147486033519552, "grad_norm": 0.4030759334564209, "learning_rate": 0.0004442016806722689, "loss": 0.418, "step": 19954 }, { "epoch": 11.14804469273743, "grad_norm": 0.48749279975891113, "learning_rate": 0.00044417366946778716, "loss": 0.5147, "step": 19955 }, { "epoch": 11.148603351955307, "grad_norm": 0.43067467212677, "learning_rate": 0.0004441456582633053, "loss": 0.3918, "step": 19956 }, { "epoch": 11.149162011173184, "grad_norm": 0.5205173492431641, "learning_rate": 0.0004441176470588235, "loss": 0.4978, "step": 19957 }, { "epoch": 11.149720670391062, "grad_norm": 1.1520217657089233, "learning_rate": 0.0004440896358543418, "loss": 0.4844, "step": 19958 }, { "epoch": 11.150279329608939, "grad_norm": 0.4912845194339752, "learning_rate": 0.00044406162464985993, "loss": 0.5776, "step": 19959 }, { "epoch": 11.150837988826815, "grad_norm": 0.6092059016227722, "learning_rate": 0.0004440336134453782, "loss": 0.5862, "step": 19960 }, { "epoch": 11.151396648044694, "grad_norm": 0.7184321880340576, "learning_rate": 0.00044400560224089634, "loss": 0.4388, "step": 19961 }, { "epoch": 11.15195530726257, "grad_norm": 0.40275368094444275, "learning_rate": 0.00044397759103641455, "loss": 0.3847, "step": 19962 }, { "epoch": 11.152513966480447, "grad_norm": 0.3934175968170166, "learning_rate": 0.0004439495798319328, "loss": 0.4448, "step": 19963 }, { "epoch": 11.153072625698323, "grad_norm": 0.8231940269470215, "learning_rate": 0.00044392156862745096, "loss": 0.4, "step": 19964 }, { "epoch": 11.153631284916202, "grad_norm": 0.714524507522583, "learning_rate": 0.0004438935574229692, "loss": 0.4606, "step": 19965 }, { "epoch": 11.154189944134078, "grad_norm": 0.44008609652519226, "learning_rate": 0.0004438655462184874, "loss": 0.4001, "step": 19966 }, { "epoch": 11.154748603351955, "grad_norm": 0.44089022278785706, "learning_rate": 0.0004438375350140056, "loss": 0.4447, "step": 19967 }, { "epoch": 11.155307262569833, "grad_norm": 0.370942622423172, "learning_rate": 0.00044380952380952384, "loss": 0.3814, "step": 19968 }, { "epoch": 11.15586592178771, "grad_norm": 1.131296157836914, "learning_rate": 0.000443781512605042, "loss": 0.3927, "step": 19969 }, { "epoch": 11.156424581005586, "grad_norm": 0.5789841413497925, "learning_rate": 0.00044375350140056025, "loss": 0.4624, "step": 19970 }, { "epoch": 11.156983240223465, "grad_norm": 0.5582478642463684, "learning_rate": 0.00044372549019607846, "loss": 0.5091, "step": 19971 }, { "epoch": 11.157541899441341, "grad_norm": 0.5246102809906006, "learning_rate": 0.0004436974789915966, "loss": 0.414, "step": 19972 }, { "epoch": 11.158100558659218, "grad_norm": 0.9045739769935608, "learning_rate": 0.00044366946778711487, "loss": 0.4948, "step": 19973 }, { "epoch": 11.158659217877094, "grad_norm": 0.38677120208740234, "learning_rate": 0.0004436414565826331, "loss": 0.421, "step": 19974 }, { "epoch": 11.159217877094973, "grad_norm": 1.924946665763855, "learning_rate": 0.0004436134453781513, "loss": 0.3525, "step": 19975 }, { "epoch": 11.15977653631285, "grad_norm": 0.3892768621444702, "learning_rate": 0.0004435854341736695, "loss": 0.4033, "step": 19976 }, { "epoch": 11.160335195530726, "grad_norm": 0.4049968719482422, "learning_rate": 0.00044355742296918764, "loss": 0.3679, "step": 19977 }, { "epoch": 11.160893854748604, "grad_norm": 0.45830294489860535, "learning_rate": 0.0004435294117647059, "loss": 0.4137, "step": 19978 }, { "epoch": 11.16145251396648, "grad_norm": 0.7053163647651672, "learning_rate": 0.0004435014005602241, "loss": 0.4338, "step": 19979 }, { "epoch": 11.162011173184357, "grad_norm": 0.4060651659965515, "learning_rate": 0.0004434733893557423, "loss": 0.4131, "step": 19980 }, { "epoch": 11.162569832402234, "grad_norm": 0.5380635857582092, "learning_rate": 0.0004434453781512605, "loss": 0.54, "step": 19981 }, { "epoch": 11.163128491620112, "grad_norm": 0.4502077102661133, "learning_rate": 0.0004434173669467787, "loss": 0.3791, "step": 19982 }, { "epoch": 11.163687150837989, "grad_norm": 0.4709026515483856, "learning_rate": 0.00044338935574229693, "loss": 0.3722, "step": 19983 }, { "epoch": 11.164245810055865, "grad_norm": 0.53351891040802, "learning_rate": 0.00044336134453781513, "loss": 0.4523, "step": 19984 }, { "epoch": 11.164804469273744, "grad_norm": 0.4962504208087921, "learning_rate": 0.00044333333333333334, "loss": 0.4393, "step": 19985 }, { "epoch": 11.16536312849162, "grad_norm": 0.4671856760978699, "learning_rate": 0.00044330532212885155, "loss": 0.3741, "step": 19986 }, { "epoch": 11.165921787709497, "grad_norm": 0.44896411895751953, "learning_rate": 0.00044327731092436975, "loss": 0.4095, "step": 19987 }, { "epoch": 11.166480446927375, "grad_norm": 0.6326718330383301, "learning_rate": 0.00044324929971988796, "loss": 0.3112, "step": 19988 }, { "epoch": 11.167039106145252, "grad_norm": 0.4235628843307495, "learning_rate": 0.00044322128851540616, "loss": 0.3399, "step": 19989 }, { "epoch": 11.167597765363128, "grad_norm": 0.48997122049331665, "learning_rate": 0.0004431932773109244, "loss": 0.3867, "step": 19990 }, { "epoch": 11.168156424581005, "grad_norm": 0.4693921208381653, "learning_rate": 0.0004431652661064426, "loss": 0.5065, "step": 19991 }, { "epoch": 11.168715083798883, "grad_norm": 0.6533905267715454, "learning_rate": 0.0004431372549019608, "loss": 0.4556, "step": 19992 }, { "epoch": 11.16927374301676, "grad_norm": 0.6487933397293091, "learning_rate": 0.000443109243697479, "loss": 0.3861, "step": 19993 }, { "epoch": 11.169832402234636, "grad_norm": 0.5712664127349854, "learning_rate": 0.0004430812324929972, "loss": 0.3827, "step": 19994 }, { "epoch": 11.170391061452515, "grad_norm": 0.8969149589538574, "learning_rate": 0.00044305322128851545, "loss": 0.421, "step": 19995 }, { "epoch": 11.170949720670391, "grad_norm": 0.3209882080554962, "learning_rate": 0.0004430252100840336, "loss": 0.3328, "step": 19996 }, { "epoch": 11.171508379888268, "grad_norm": 0.42669442296028137, "learning_rate": 0.0004429971988795518, "loss": 0.3969, "step": 19997 }, { "epoch": 11.172067039106146, "grad_norm": 0.42877790331840515, "learning_rate": 0.00044296918767507007, "loss": 0.3806, "step": 19998 }, { "epoch": 11.172625698324023, "grad_norm": 0.7065102458000183, "learning_rate": 0.0004429411764705882, "loss": 0.4662, "step": 19999 }, { "epoch": 11.1731843575419, "grad_norm": 1.6102545261383057, "learning_rate": 0.0004429131652661065, "loss": 0.3371, "step": 20000 }, { "epoch": 11.1731843575419, "eval_cer": 0.08795674991544195, "eval_loss": 0.3355470299720764, "eval_runtime": 55.5822, "eval_samples_per_second": 81.645, "eval_steps_per_second": 5.11, "eval_wer": 0.34795117454819696, "step": 20000 }, { "epoch": 11.173743016759776, "grad_norm": 1.0497506856918335, "learning_rate": 0.00044288515406162464, "loss": 0.4268, "step": 20001 }, { "epoch": 11.174301675977654, "grad_norm": 0.4438062906265259, "learning_rate": 0.00044285714285714284, "loss": 0.3994, "step": 20002 }, { "epoch": 11.17486033519553, "grad_norm": 0.4353538751602173, "learning_rate": 0.0004428291316526611, "loss": 0.4089, "step": 20003 }, { "epoch": 11.175418994413407, "grad_norm": 0.7603114247322083, "learning_rate": 0.00044280112044817925, "loss": 0.471, "step": 20004 }, { "epoch": 11.175977653631286, "grad_norm": 0.5623716115951538, "learning_rate": 0.0004427731092436975, "loss": 0.5002, "step": 20005 }, { "epoch": 11.176536312849162, "grad_norm": 0.34662359952926636, "learning_rate": 0.0004427450980392157, "loss": 0.2911, "step": 20006 }, { "epoch": 11.177094972067039, "grad_norm": 0.4288632869720459, "learning_rate": 0.00044271708683473387, "loss": 0.3179, "step": 20007 }, { "epoch": 11.177653631284917, "grad_norm": 0.8289805054664612, "learning_rate": 0.00044268907563025213, "loss": 0.4186, "step": 20008 }, { "epoch": 11.178212290502794, "grad_norm": 0.5202844738960266, "learning_rate": 0.0004426610644257703, "loss": 0.3907, "step": 20009 }, { "epoch": 11.17877094972067, "grad_norm": 0.4844478368759155, "learning_rate": 0.00044263305322128854, "loss": 0.3755, "step": 20010 }, { "epoch": 11.179329608938547, "grad_norm": 0.45475879311561584, "learning_rate": 0.00044260504201680675, "loss": 0.4255, "step": 20011 }, { "epoch": 11.179888268156425, "grad_norm": 0.6086148619651794, "learning_rate": 0.0004425770308123249, "loss": 0.482, "step": 20012 }, { "epoch": 11.180446927374302, "grad_norm": 0.5118682980537415, "learning_rate": 0.00044254901960784316, "loss": 0.4234, "step": 20013 }, { "epoch": 11.181005586592178, "grad_norm": 0.44856396317481995, "learning_rate": 0.00044252100840336137, "loss": 0.3857, "step": 20014 }, { "epoch": 11.181564245810057, "grad_norm": 0.6991450190544128, "learning_rate": 0.0004424929971988796, "loss": 0.4822, "step": 20015 }, { "epoch": 11.182122905027933, "grad_norm": 0.4700288474559784, "learning_rate": 0.0004424649859943978, "loss": 0.3751, "step": 20016 }, { "epoch": 11.18268156424581, "grad_norm": 5.777561187744141, "learning_rate": 0.00044243697478991593, "loss": 0.4723, "step": 20017 }, { "epoch": 11.183240223463686, "grad_norm": 1.6418412923812866, "learning_rate": 0.0004424089635854342, "loss": 0.4915, "step": 20018 }, { "epoch": 11.183798882681565, "grad_norm": 0.7723186612129211, "learning_rate": 0.0004423809523809524, "loss": 0.4334, "step": 20019 }, { "epoch": 11.184357541899441, "grad_norm": 2.2807466983795166, "learning_rate": 0.0004423529411764706, "loss": 0.5472, "step": 20020 }, { "epoch": 11.184916201117318, "grad_norm": 9.813264846801758, "learning_rate": 0.0004423249299719888, "loss": 0.4824, "step": 20021 }, { "epoch": 11.185474860335196, "grad_norm": 1.8531826734542847, "learning_rate": 0.000442296918767507, "loss": 0.437, "step": 20022 }, { "epoch": 11.186033519553073, "grad_norm": 0.5479413866996765, "learning_rate": 0.0004422689075630252, "loss": 0.6534, "step": 20023 }, { "epoch": 11.18659217877095, "grad_norm": 0.4835755527019501, "learning_rate": 0.00044224089635854343, "loss": 0.3739, "step": 20024 }, { "epoch": 11.187150837988828, "grad_norm": 0.8052713871002197, "learning_rate": 0.0004422128851540617, "loss": 0.4332, "step": 20025 }, { "epoch": 11.187709497206704, "grad_norm": 0.46861356496810913, "learning_rate": 0.00044218487394957984, "loss": 0.4136, "step": 20026 }, { "epoch": 11.18826815642458, "grad_norm": 0.5138416290283203, "learning_rate": 0.00044215686274509805, "loss": 0.4212, "step": 20027 }, { "epoch": 11.188826815642457, "grad_norm": 0.4061744213104248, "learning_rate": 0.00044212885154061625, "loss": 0.3101, "step": 20028 }, { "epoch": 11.189385474860336, "grad_norm": 0.5533204078674316, "learning_rate": 0.00044210084033613446, "loss": 0.3935, "step": 20029 }, { "epoch": 11.189944134078212, "grad_norm": 0.8668876886367798, "learning_rate": 0.00044207282913165266, "loss": 0.4078, "step": 20030 }, { "epoch": 11.190502793296089, "grad_norm": 0.4431772530078888, "learning_rate": 0.00044204481792717087, "loss": 0.3593, "step": 20031 }, { "epoch": 11.191061452513967, "grad_norm": 0.5002390146255493, "learning_rate": 0.0004420168067226891, "loss": 0.4997, "step": 20032 }, { "epoch": 11.191620111731844, "grad_norm": 0.5600509643554688, "learning_rate": 0.00044198879551820734, "loss": 0.5281, "step": 20033 }, { "epoch": 11.19217877094972, "grad_norm": 0.41320517659187317, "learning_rate": 0.0004419607843137255, "loss": 0.3685, "step": 20034 }, { "epoch": 11.192737430167599, "grad_norm": 0.46651437878608704, "learning_rate": 0.0004419327731092437, "loss": 0.3518, "step": 20035 }, { "epoch": 11.193296089385475, "grad_norm": 0.649560809135437, "learning_rate": 0.0004419047619047619, "loss": 0.3718, "step": 20036 }, { "epoch": 11.193854748603352, "grad_norm": 1.3389005661010742, "learning_rate": 0.0004418767507002801, "loss": 0.4594, "step": 20037 }, { "epoch": 11.194413407821228, "grad_norm": 3.4143636226654053, "learning_rate": 0.00044184873949579837, "loss": 0.4759, "step": 20038 }, { "epoch": 11.194972067039107, "grad_norm": 1.9919544458389282, "learning_rate": 0.0004418207282913165, "loss": 0.3858, "step": 20039 }, { "epoch": 11.195530726256983, "grad_norm": 1.37981379032135, "learning_rate": 0.0004417927170868347, "loss": 0.4451, "step": 20040 }, { "epoch": 11.19608938547486, "grad_norm": 0.38582009077072144, "learning_rate": 0.000441764705882353, "loss": 0.428, "step": 20041 }, { "epoch": 11.196648044692738, "grad_norm": 0.3976221978664398, "learning_rate": 0.00044173669467787114, "loss": 0.4256, "step": 20042 }, { "epoch": 11.197206703910615, "grad_norm": 0.8136613965034485, "learning_rate": 0.0004417086834733894, "loss": 0.5439, "step": 20043 }, { "epoch": 11.197765363128491, "grad_norm": 0.49499455094337463, "learning_rate": 0.00044168067226890755, "loss": 0.4703, "step": 20044 }, { "epoch": 11.19832402234637, "grad_norm": 0.4361111521720886, "learning_rate": 0.00044165266106442575, "loss": 0.3807, "step": 20045 }, { "epoch": 11.198882681564246, "grad_norm": 0.5261693000793457, "learning_rate": 0.000441624649859944, "loss": 0.2976, "step": 20046 }, { "epoch": 11.199441340782123, "grad_norm": 0.6489242315292358, "learning_rate": 0.00044159663865546217, "loss": 0.418, "step": 20047 }, { "epoch": 11.2, "grad_norm": 4.530487060546875, "learning_rate": 0.0004415686274509804, "loss": 0.7306, "step": 20048 }, { "epoch": 11.200558659217878, "grad_norm": 0.4683583676815033, "learning_rate": 0.00044154061624649863, "loss": 0.4467, "step": 20049 }, { "epoch": 11.201117318435754, "grad_norm": 2.0726184844970703, "learning_rate": 0.0004415126050420168, "loss": 0.3234, "step": 20050 }, { "epoch": 11.20167597765363, "grad_norm": 0.5123502612113953, "learning_rate": 0.00044148459383753504, "loss": 0.5108, "step": 20051 }, { "epoch": 11.202234636871509, "grad_norm": 0.4397718012332916, "learning_rate": 0.0004414565826330532, "loss": 0.4451, "step": 20052 }, { "epoch": 11.202793296089386, "grad_norm": 0.4337514638900757, "learning_rate": 0.00044142857142857146, "loss": 0.4575, "step": 20053 }, { "epoch": 11.203351955307262, "grad_norm": 0.39273107051849365, "learning_rate": 0.00044140056022408966, "loss": 0.498, "step": 20054 }, { "epoch": 11.203910614525139, "grad_norm": 0.5106762647628784, "learning_rate": 0.0004413725490196078, "loss": 0.4031, "step": 20055 }, { "epoch": 11.204469273743017, "grad_norm": 0.6422686576843262, "learning_rate": 0.0004413445378151261, "loss": 0.3635, "step": 20056 }, { "epoch": 11.205027932960894, "grad_norm": 0.5077261924743652, "learning_rate": 0.0004413165266106443, "loss": 0.4331, "step": 20057 }, { "epoch": 11.20558659217877, "grad_norm": 0.5573102831840515, "learning_rate": 0.0004412885154061625, "loss": 0.3637, "step": 20058 }, { "epoch": 11.206145251396649, "grad_norm": 0.48876655101776123, "learning_rate": 0.0004412605042016807, "loss": 0.4376, "step": 20059 }, { "epoch": 11.206703910614525, "grad_norm": 0.47710248827934265, "learning_rate": 0.00044123249299719884, "loss": 0.3698, "step": 20060 }, { "epoch": 11.207262569832402, "grad_norm": 0.5610468983650208, "learning_rate": 0.0004412044817927171, "loss": 0.5781, "step": 20061 }, { "epoch": 11.20782122905028, "grad_norm": 0.3984878957271576, "learning_rate": 0.0004411764705882353, "loss": 0.4171, "step": 20062 }, { "epoch": 11.208379888268157, "grad_norm": 0.557945966720581, "learning_rate": 0.0004411484593837535, "loss": 0.3684, "step": 20063 }, { "epoch": 11.208938547486033, "grad_norm": 2.785871982574463, "learning_rate": 0.0004411204481792717, "loss": 0.4418, "step": 20064 }, { "epoch": 11.20949720670391, "grad_norm": 2.873032569885254, "learning_rate": 0.00044109243697478993, "loss": 0.4568, "step": 20065 }, { "epoch": 11.210055865921788, "grad_norm": 0.5205723643302917, "learning_rate": 0.00044106442577030813, "loss": 0.4417, "step": 20066 }, { "epoch": 11.210614525139665, "grad_norm": 0.5439926385879517, "learning_rate": 0.00044103641456582634, "loss": 0.4095, "step": 20067 }, { "epoch": 11.211173184357541, "grad_norm": 0.46510207653045654, "learning_rate": 0.00044100840336134455, "loss": 0.49, "step": 20068 }, { "epoch": 11.21173184357542, "grad_norm": 0.4424624741077423, "learning_rate": 0.00044098039215686275, "loss": 0.3856, "step": 20069 }, { "epoch": 11.212290502793296, "grad_norm": 0.6569352746009827, "learning_rate": 0.00044095238095238096, "loss": 0.3589, "step": 20070 }, { "epoch": 11.212849162011173, "grad_norm": 0.3509271740913391, "learning_rate": 0.00044092436974789916, "loss": 0.407, "step": 20071 }, { "epoch": 11.213407821229051, "grad_norm": 0.45570892095565796, "learning_rate": 0.00044089635854341737, "loss": 0.4675, "step": 20072 }, { "epoch": 11.213966480446928, "grad_norm": 0.8975301384925842, "learning_rate": 0.00044086834733893563, "loss": 0.448, "step": 20073 }, { "epoch": 11.214525139664804, "grad_norm": 0.4648612141609192, "learning_rate": 0.0004408403361344538, "loss": 0.4848, "step": 20074 }, { "epoch": 11.21508379888268, "grad_norm": 0.689265787601471, "learning_rate": 0.000440812324929972, "loss": 0.3949, "step": 20075 }, { "epoch": 11.21564245810056, "grad_norm": 0.6003746390342712, "learning_rate": 0.0004407843137254902, "loss": 0.4982, "step": 20076 }, { "epoch": 11.216201117318436, "grad_norm": 0.6151201128959656, "learning_rate": 0.0004407563025210084, "loss": 0.4687, "step": 20077 }, { "epoch": 11.216759776536312, "grad_norm": 0.428731769323349, "learning_rate": 0.00044072829131652666, "loss": 0.4462, "step": 20078 }, { "epoch": 11.21731843575419, "grad_norm": 0.5962857604026794, "learning_rate": 0.0004407002801120448, "loss": 0.3454, "step": 20079 }, { "epoch": 11.217877094972067, "grad_norm": 0.35154932737350464, "learning_rate": 0.000440672268907563, "loss": 0.4113, "step": 20080 }, { "epoch": 11.218435754189944, "grad_norm": 0.5738747715950012, "learning_rate": 0.0004406442577030813, "loss": 0.4235, "step": 20081 }, { "epoch": 11.21899441340782, "grad_norm": 0.4769916236400604, "learning_rate": 0.00044061624649859943, "loss": 0.4319, "step": 20082 }, { "epoch": 11.219553072625699, "grad_norm": 0.5487616658210754, "learning_rate": 0.0004405882352941177, "loss": 0.3855, "step": 20083 }, { "epoch": 11.220111731843575, "grad_norm": 3.416422128677368, "learning_rate": 0.00044056022408963584, "loss": 0.4535, "step": 20084 }, { "epoch": 11.220670391061452, "grad_norm": 0.45399385690689087, "learning_rate": 0.00044053221288515405, "loss": 0.3677, "step": 20085 }, { "epoch": 11.22122905027933, "grad_norm": 0.3997017443180084, "learning_rate": 0.0004405042016806723, "loss": 0.4031, "step": 20086 }, { "epoch": 11.221787709497207, "grad_norm": 0.6133577823638916, "learning_rate": 0.00044047619047619046, "loss": 0.4233, "step": 20087 }, { "epoch": 11.222346368715083, "grad_norm": 0.5656393766403198, "learning_rate": 0.0004404481792717087, "loss": 0.4745, "step": 20088 }, { "epoch": 11.222905027932962, "grad_norm": 0.42285895347595215, "learning_rate": 0.0004404201680672269, "loss": 0.3373, "step": 20089 }, { "epoch": 11.223463687150838, "grad_norm": 0.747688353061676, "learning_rate": 0.0004403921568627451, "loss": 0.4509, "step": 20090 }, { "epoch": 11.224022346368715, "grad_norm": 0.5643873810768127, "learning_rate": 0.00044036414565826334, "loss": 0.4337, "step": 20091 }, { "epoch": 11.224581005586591, "grad_norm": 0.4267115592956543, "learning_rate": 0.0004403361344537815, "loss": 0.3616, "step": 20092 }, { "epoch": 11.22513966480447, "grad_norm": 0.5259825587272644, "learning_rate": 0.00044030812324929975, "loss": 0.4448, "step": 20093 }, { "epoch": 11.225698324022346, "grad_norm": 0.6425420641899109, "learning_rate": 0.00044028011204481796, "loss": 0.439, "step": 20094 }, { "epoch": 11.226256983240223, "grad_norm": 0.5163062810897827, "learning_rate": 0.0004402521008403361, "loss": 0.498, "step": 20095 }, { "epoch": 11.226815642458101, "grad_norm": 0.4639945924282074, "learning_rate": 0.00044022408963585437, "loss": 0.3496, "step": 20096 }, { "epoch": 11.227374301675978, "grad_norm": 0.5147891044616699, "learning_rate": 0.0004401960784313726, "loss": 0.3996, "step": 20097 }, { "epoch": 11.227932960893854, "grad_norm": 0.6439340114593506, "learning_rate": 0.0004401680672268908, "loss": 0.3965, "step": 20098 }, { "epoch": 11.228491620111733, "grad_norm": 0.8488362431526184, "learning_rate": 0.000440140056022409, "loss": 0.475, "step": 20099 }, { "epoch": 11.22905027932961, "grad_norm": 0.4989221394062042, "learning_rate": 0.00044011204481792714, "loss": 0.4403, "step": 20100 }, { "epoch": 11.229608938547486, "grad_norm": 0.9705707430839539, "learning_rate": 0.0004400840336134454, "loss": 0.3873, "step": 20101 }, { "epoch": 11.230167597765362, "grad_norm": 0.7108225226402283, "learning_rate": 0.0004400560224089636, "loss": 0.3668, "step": 20102 }, { "epoch": 11.23072625698324, "grad_norm": 0.5318310856819153, "learning_rate": 0.0004400280112044818, "loss": 0.4534, "step": 20103 }, { "epoch": 11.231284916201117, "grad_norm": 0.5211368799209595, "learning_rate": 0.00044, "loss": 0.6341, "step": 20104 }, { "epoch": 11.231843575418994, "grad_norm": 0.5484535694122314, "learning_rate": 0.0004399719887955182, "loss": 0.4812, "step": 20105 }, { "epoch": 11.232402234636872, "grad_norm": 0.5162181258201599, "learning_rate": 0.00043994397759103643, "loss": 0.6242, "step": 20106 }, { "epoch": 11.232960893854749, "grad_norm": 0.5498502254486084, "learning_rate": 0.00043991596638655463, "loss": 0.5494, "step": 20107 }, { "epoch": 11.233519553072625, "grad_norm": 0.9606100916862488, "learning_rate": 0.00043988795518207284, "loss": 0.4041, "step": 20108 }, { "epoch": 11.234078212290504, "grad_norm": 0.7064275145530701, "learning_rate": 0.00043985994397759105, "loss": 0.4119, "step": 20109 }, { "epoch": 11.23463687150838, "grad_norm": 0.5120725631713867, "learning_rate": 0.00043983193277310925, "loss": 0.4046, "step": 20110 }, { "epoch": 11.235195530726257, "grad_norm": 0.4233984351158142, "learning_rate": 0.00043980392156862746, "loss": 0.4601, "step": 20111 }, { "epoch": 11.235754189944133, "grad_norm": 0.47705525159835815, "learning_rate": 0.00043977591036414566, "loss": 0.4644, "step": 20112 }, { "epoch": 11.236312849162012, "grad_norm": 0.5872893929481506, "learning_rate": 0.0004397478991596639, "loss": 0.5413, "step": 20113 }, { "epoch": 11.236871508379888, "grad_norm": 0.4676712453365326, "learning_rate": 0.0004397198879551821, "loss": 0.3803, "step": 20114 }, { "epoch": 11.237430167597765, "grad_norm": 2.7982687950134277, "learning_rate": 0.0004396918767507003, "loss": 0.3956, "step": 20115 }, { "epoch": 11.237988826815643, "grad_norm": 0.3906034827232361, "learning_rate": 0.0004396638655462185, "loss": 0.3147, "step": 20116 }, { "epoch": 11.23854748603352, "grad_norm": 0.4676041901111603, "learning_rate": 0.0004396358543417367, "loss": 0.4466, "step": 20117 }, { "epoch": 11.239106145251396, "grad_norm": 1.093482494354248, "learning_rate": 0.00043960784313725495, "loss": 0.4907, "step": 20118 }, { "epoch": 11.239664804469275, "grad_norm": 0.583071231842041, "learning_rate": 0.0004395798319327731, "loss": 0.403, "step": 20119 }, { "epoch": 11.240223463687151, "grad_norm": 0.45677652955055237, "learning_rate": 0.0004395518207282913, "loss": 0.3397, "step": 20120 }, { "epoch": 11.240782122905028, "grad_norm": 0.7649537324905396, "learning_rate": 0.00043952380952380957, "loss": 0.4074, "step": 20121 }, { "epoch": 11.241340782122904, "grad_norm": 2.905608892440796, "learning_rate": 0.0004394957983193277, "loss": 0.4069, "step": 20122 }, { "epoch": 11.241899441340783, "grad_norm": 2.4164202213287354, "learning_rate": 0.000439467787114846, "loss": 0.4268, "step": 20123 }, { "epoch": 11.24245810055866, "grad_norm": 0.44573885202407837, "learning_rate": 0.00043943977591036414, "loss": 0.4055, "step": 20124 }, { "epoch": 11.243016759776536, "grad_norm": 0.41281893849372864, "learning_rate": 0.00043941176470588234, "loss": 0.4484, "step": 20125 }, { "epoch": 11.243575418994414, "grad_norm": 1.6746652126312256, "learning_rate": 0.0004393837535014006, "loss": 0.5086, "step": 20126 }, { "epoch": 11.24413407821229, "grad_norm": 0.4273971617221832, "learning_rate": 0.00043935574229691875, "loss": 0.3718, "step": 20127 }, { "epoch": 11.244692737430167, "grad_norm": 1.4274877309799194, "learning_rate": 0.000439327731092437, "loss": 0.4878, "step": 20128 }, { "epoch": 11.245251396648044, "grad_norm": 0.44678208231925964, "learning_rate": 0.0004392997198879552, "loss": 0.3721, "step": 20129 }, { "epoch": 11.245810055865922, "grad_norm": 0.4419698715209961, "learning_rate": 0.00043927170868347337, "loss": 0.3436, "step": 20130 }, { "epoch": 11.246368715083799, "grad_norm": 0.4058476686477661, "learning_rate": 0.00043924369747899163, "loss": 0.3984, "step": 20131 }, { "epoch": 11.246927374301675, "grad_norm": 0.4887247383594513, "learning_rate": 0.0004392156862745098, "loss": 0.3782, "step": 20132 }, { "epoch": 11.247486033519554, "grad_norm": 0.6937375664710999, "learning_rate": 0.00043918767507002804, "loss": 0.4758, "step": 20133 }, { "epoch": 11.24804469273743, "grad_norm": 0.49732986092567444, "learning_rate": 0.00043915966386554625, "loss": 0.5085, "step": 20134 }, { "epoch": 11.248603351955307, "grad_norm": 0.6458745002746582, "learning_rate": 0.0004391316526610644, "loss": 0.3339, "step": 20135 }, { "epoch": 11.249162011173185, "grad_norm": 0.555120587348938, "learning_rate": 0.00043910364145658266, "loss": 0.4745, "step": 20136 }, { "epoch": 11.249720670391062, "grad_norm": 3.8175039291381836, "learning_rate": 0.00043907563025210087, "loss": 0.4063, "step": 20137 }, { "epoch": 11.250279329608938, "grad_norm": 0.4628995358943939, "learning_rate": 0.000439047619047619, "loss": 0.4174, "step": 20138 }, { "epoch": 11.250837988826815, "grad_norm": 0.44974204897880554, "learning_rate": 0.0004390196078431373, "loss": 0.3945, "step": 20139 }, { "epoch": 11.251396648044693, "grad_norm": 6.7041425704956055, "learning_rate": 0.00043899159663865543, "loss": 0.3884, "step": 20140 }, { "epoch": 11.25195530726257, "grad_norm": 0.4144757091999054, "learning_rate": 0.0004389635854341737, "loss": 0.4072, "step": 20141 }, { "epoch": 11.252513966480446, "grad_norm": 0.5192364454269409, "learning_rate": 0.0004389355742296919, "loss": 0.4504, "step": 20142 }, { "epoch": 11.253072625698325, "grad_norm": 0.8089028000831604, "learning_rate": 0.00043890756302521005, "loss": 0.3536, "step": 20143 }, { "epoch": 11.253631284916201, "grad_norm": 0.42806190252304077, "learning_rate": 0.0004388795518207283, "loss": 0.3846, "step": 20144 }, { "epoch": 11.254189944134078, "grad_norm": 0.6048259735107422, "learning_rate": 0.0004388515406162465, "loss": 0.2979, "step": 20145 }, { "epoch": 11.254748603351956, "grad_norm": 0.4058989882469177, "learning_rate": 0.0004388235294117647, "loss": 0.4547, "step": 20146 }, { "epoch": 11.255307262569833, "grad_norm": 0.3987780213356018, "learning_rate": 0.00043879551820728293, "loss": 0.3823, "step": 20147 }, { "epoch": 11.25586592178771, "grad_norm": 0.41179659962654114, "learning_rate": 0.0004387675070028011, "loss": 0.4133, "step": 20148 }, { "epoch": 11.256424581005586, "grad_norm": 0.5493605136871338, "learning_rate": 0.00043873949579831934, "loss": 0.4084, "step": 20149 }, { "epoch": 11.256983240223464, "grad_norm": 1.3462769985198975, "learning_rate": 0.00043871148459383755, "loss": 0.4169, "step": 20150 }, { "epoch": 11.25754189944134, "grad_norm": 0.4494118392467499, "learning_rate": 0.00043868347338935575, "loss": 0.4997, "step": 20151 }, { "epoch": 11.258100558659217, "grad_norm": 0.842294454574585, "learning_rate": 0.00043865546218487396, "loss": 0.4013, "step": 20152 }, { "epoch": 11.258659217877096, "grad_norm": 0.5919236540794373, "learning_rate": 0.00043862745098039216, "loss": 0.4914, "step": 20153 }, { "epoch": 11.259217877094972, "grad_norm": 0.4723402261734009, "learning_rate": 0.00043859943977591037, "loss": 0.4451, "step": 20154 }, { "epoch": 11.259776536312849, "grad_norm": 1.2802542448043823, "learning_rate": 0.0004385714285714286, "loss": 0.4232, "step": 20155 }, { "epoch": 11.260335195530725, "grad_norm": 0.5456537008285522, "learning_rate": 0.0004385434173669468, "loss": 0.3986, "step": 20156 }, { "epoch": 11.260893854748604, "grad_norm": 0.42404642701148987, "learning_rate": 0.000438515406162465, "loss": 0.4002, "step": 20157 }, { "epoch": 11.26145251396648, "grad_norm": 0.4189004600048065, "learning_rate": 0.0004384873949579832, "loss": 0.4459, "step": 20158 }, { "epoch": 11.262011173184357, "grad_norm": 0.43470510840415955, "learning_rate": 0.0004384593837535014, "loss": 0.4298, "step": 20159 }, { "epoch": 11.262569832402235, "grad_norm": 0.787608802318573, "learning_rate": 0.0004384313725490196, "loss": 0.399, "step": 20160 }, { "epoch": 11.263128491620112, "grad_norm": 0.509988009929657, "learning_rate": 0.00043840336134453787, "loss": 0.4686, "step": 20161 }, { "epoch": 11.263687150837988, "grad_norm": 0.6208260655403137, "learning_rate": 0.000438375350140056, "loss": 0.4392, "step": 20162 }, { "epoch": 11.264245810055867, "grad_norm": 0.40109121799468994, "learning_rate": 0.0004383473389355742, "loss": 0.3987, "step": 20163 }, { "epoch": 11.264804469273743, "grad_norm": 2.0710699558258057, "learning_rate": 0.00043831932773109243, "loss": 0.723, "step": 20164 }, { "epoch": 11.26536312849162, "grad_norm": 2.471428871154785, "learning_rate": 0.00043829131652661064, "loss": 0.403, "step": 20165 }, { "epoch": 11.265921787709496, "grad_norm": 0.6072873473167419, "learning_rate": 0.0004382633053221289, "loss": 0.456, "step": 20166 }, { "epoch": 11.266480446927375, "grad_norm": 0.38765987753868103, "learning_rate": 0.00043823529411764705, "loss": 0.3871, "step": 20167 }, { "epoch": 11.267039106145251, "grad_norm": 0.4405870735645294, "learning_rate": 0.00043820728291316525, "loss": 0.5005, "step": 20168 }, { "epoch": 11.267597765363128, "grad_norm": 1.8352257013320923, "learning_rate": 0.0004381792717086835, "loss": 0.3751, "step": 20169 }, { "epoch": 11.268156424581006, "grad_norm": 1.0349719524383545, "learning_rate": 0.00043815126050420167, "loss": 0.4266, "step": 20170 }, { "epoch": 11.268715083798883, "grad_norm": 0.558402419090271, "learning_rate": 0.0004381232492997199, "loss": 0.3411, "step": 20171 }, { "epoch": 11.26927374301676, "grad_norm": 0.4799840450286865, "learning_rate": 0.0004380952380952381, "loss": 0.4086, "step": 20172 }, { "epoch": 11.269832402234638, "grad_norm": 0.611286997795105, "learning_rate": 0.0004380672268907563, "loss": 0.5257, "step": 20173 }, { "epoch": 11.270391061452514, "grad_norm": 0.5433061122894287, "learning_rate": 0.00043803921568627454, "loss": 0.4249, "step": 20174 }, { "epoch": 11.27094972067039, "grad_norm": 0.6384894251823425, "learning_rate": 0.0004380112044817927, "loss": 0.4175, "step": 20175 }, { "epoch": 11.271508379888267, "grad_norm": 0.6537908911705017, "learning_rate": 0.00043798319327731096, "loss": 0.6883, "step": 20176 }, { "epoch": 11.272067039106146, "grad_norm": 0.8897542953491211, "learning_rate": 0.00043795518207282916, "loss": 0.4312, "step": 20177 }, { "epoch": 11.272625698324022, "grad_norm": 0.3867975175380707, "learning_rate": 0.0004379271708683473, "loss": 0.4023, "step": 20178 }, { "epoch": 11.273184357541899, "grad_norm": 0.8229814767837524, "learning_rate": 0.0004378991596638656, "loss": 0.3449, "step": 20179 }, { "epoch": 11.273743016759777, "grad_norm": 1.3613076210021973, "learning_rate": 0.0004378711484593837, "loss": 0.3285, "step": 20180 }, { "epoch": 11.274301675977654, "grad_norm": 0.5792914628982544, "learning_rate": 0.000437843137254902, "loss": 0.4653, "step": 20181 }, { "epoch": 11.27486033519553, "grad_norm": 0.6109485030174255, "learning_rate": 0.0004378151260504202, "loss": 0.4795, "step": 20182 }, { "epoch": 11.275418994413409, "grad_norm": 0.4183536469936371, "learning_rate": 0.00043778711484593834, "loss": 0.3641, "step": 20183 }, { "epoch": 11.275977653631285, "grad_norm": 0.41660964488983154, "learning_rate": 0.0004377591036414566, "loss": 0.4267, "step": 20184 }, { "epoch": 11.276536312849162, "grad_norm": 7.3677144050598145, "learning_rate": 0.0004377310924369748, "loss": 0.4426, "step": 20185 }, { "epoch": 11.277094972067038, "grad_norm": 0.575406014919281, "learning_rate": 0.000437703081232493, "loss": 0.4732, "step": 20186 }, { "epoch": 11.277653631284917, "grad_norm": 0.39118093252182007, "learning_rate": 0.0004376750700280112, "loss": 0.4539, "step": 20187 }, { "epoch": 11.278212290502793, "grad_norm": 0.5141621828079224, "learning_rate": 0.0004376470588235294, "loss": 0.4559, "step": 20188 }, { "epoch": 11.27877094972067, "grad_norm": 0.818881094455719, "learning_rate": 0.00043761904761904763, "loss": 0.4649, "step": 20189 }, { "epoch": 11.279329608938548, "grad_norm": 0.6279159188270569, "learning_rate": 0.00043759103641456584, "loss": 0.4411, "step": 20190 }, { "epoch": 11.279888268156425, "grad_norm": 0.4050810635089874, "learning_rate": 0.00043756302521008405, "loss": 0.4597, "step": 20191 }, { "epoch": 11.280446927374301, "grad_norm": 0.7124335169792175, "learning_rate": 0.00043753501400560225, "loss": 0.4177, "step": 20192 }, { "epoch": 11.28100558659218, "grad_norm": 0.6197972893714905, "learning_rate": 0.00043750700280112046, "loss": 0.3634, "step": 20193 }, { "epoch": 11.281564245810056, "grad_norm": 0.637822687625885, "learning_rate": 0.00043747899159663866, "loss": 0.5034, "step": 20194 }, { "epoch": 11.282122905027933, "grad_norm": 0.563567042350769, "learning_rate": 0.00043745098039215687, "loss": 0.4783, "step": 20195 }, { "epoch": 11.28268156424581, "grad_norm": 0.8783479332923889, "learning_rate": 0.0004374229691876751, "loss": 0.382, "step": 20196 }, { "epoch": 11.283240223463688, "grad_norm": 0.854576587677002, "learning_rate": 0.0004373949579831933, "loss": 0.6866, "step": 20197 }, { "epoch": 11.283798882681564, "grad_norm": 0.5092287659645081, "learning_rate": 0.0004373669467787115, "loss": 0.4377, "step": 20198 }, { "epoch": 11.28435754189944, "grad_norm": 0.37525826692581177, "learning_rate": 0.0004373389355742297, "loss": 0.4989, "step": 20199 }, { "epoch": 11.28491620111732, "grad_norm": 0.5437232851982117, "learning_rate": 0.0004373109243697479, "loss": 0.6531, "step": 20200 }, { "epoch": 11.285474860335196, "grad_norm": 0.7121294736862183, "learning_rate": 0.00043728291316526616, "loss": 0.6267, "step": 20201 }, { "epoch": 11.286033519553072, "grad_norm": 0.41168221831321716, "learning_rate": 0.0004372549019607843, "loss": 0.437, "step": 20202 }, { "epoch": 11.286592178770949, "grad_norm": 0.4956684112548828, "learning_rate": 0.0004372268907563025, "loss": 0.4571, "step": 20203 }, { "epoch": 11.287150837988827, "grad_norm": 0.46635955572128296, "learning_rate": 0.0004371988795518207, "loss": 0.3708, "step": 20204 }, { "epoch": 11.287709497206704, "grad_norm": 0.5189955234527588, "learning_rate": 0.00043717086834733893, "loss": 0.4038, "step": 20205 }, { "epoch": 11.28826815642458, "grad_norm": 0.6829469799995422, "learning_rate": 0.0004371428571428572, "loss": 0.7769, "step": 20206 }, { "epoch": 11.288826815642459, "grad_norm": 0.3898891806602478, "learning_rate": 0.00043711484593837534, "loss": 0.3301, "step": 20207 }, { "epoch": 11.289385474860335, "grad_norm": 1.0952848196029663, "learning_rate": 0.00043708683473389355, "loss": 0.575, "step": 20208 }, { "epoch": 11.289944134078212, "grad_norm": 0.4770362675189972, "learning_rate": 0.0004370588235294118, "loss": 0.3375, "step": 20209 }, { "epoch": 11.29050279329609, "grad_norm": 0.46579399704933167, "learning_rate": 0.00043703081232492996, "loss": 0.3755, "step": 20210 }, { "epoch": 11.291061452513967, "grad_norm": 4.949335098266602, "learning_rate": 0.0004370028011204482, "loss": 0.4253, "step": 20211 }, { "epoch": 11.291620111731843, "grad_norm": 0.48682326078414917, "learning_rate": 0.00043697478991596637, "loss": 0.4022, "step": 20212 }, { "epoch": 11.29217877094972, "grad_norm": 0.573822557926178, "learning_rate": 0.0004369467787114846, "loss": 0.4693, "step": 20213 }, { "epoch": 11.292737430167598, "grad_norm": 0.4340304732322693, "learning_rate": 0.00043691876750700284, "loss": 0.3648, "step": 20214 }, { "epoch": 11.293296089385475, "grad_norm": 0.8414255976676941, "learning_rate": 0.000436890756302521, "loss": 0.4972, "step": 20215 }, { "epoch": 11.293854748603351, "grad_norm": 0.6106770634651184, "learning_rate": 0.00043686274509803925, "loss": 0.4185, "step": 20216 }, { "epoch": 11.29441340782123, "grad_norm": 0.47612854838371277, "learning_rate": 0.00043683473389355746, "loss": 0.4969, "step": 20217 }, { "epoch": 11.294972067039106, "grad_norm": 0.5907502174377441, "learning_rate": 0.0004368067226890756, "loss": 0.5458, "step": 20218 }, { "epoch": 11.295530726256983, "grad_norm": 0.7037196755409241, "learning_rate": 0.00043677871148459387, "loss": 0.3775, "step": 20219 }, { "epoch": 11.296089385474861, "grad_norm": 0.8624817728996277, "learning_rate": 0.000436750700280112, "loss": 0.3347, "step": 20220 }, { "epoch": 11.296648044692738, "grad_norm": 0.42462775111198425, "learning_rate": 0.0004367226890756303, "loss": 0.4554, "step": 20221 }, { "epoch": 11.297206703910614, "grad_norm": 0.4273512661457062, "learning_rate": 0.0004366946778711485, "loss": 0.4585, "step": 20222 }, { "epoch": 11.297765363128491, "grad_norm": 0.8187879323959351, "learning_rate": 0.00043666666666666664, "loss": 0.4186, "step": 20223 }, { "epoch": 11.29832402234637, "grad_norm": 1.2574671506881714, "learning_rate": 0.0004366386554621849, "loss": 0.4469, "step": 20224 }, { "epoch": 11.298882681564246, "grad_norm": 0.5985570549964905, "learning_rate": 0.0004366106442577031, "loss": 0.3576, "step": 20225 }, { "epoch": 11.299441340782122, "grad_norm": 0.47140201926231384, "learning_rate": 0.0004365826330532213, "loss": 0.4792, "step": 20226 }, { "epoch": 11.3, "grad_norm": 0.8115884065628052, "learning_rate": 0.0004365546218487395, "loss": 0.5552, "step": 20227 }, { "epoch": 11.300558659217877, "grad_norm": 0.4647372364997864, "learning_rate": 0.00043652661064425767, "loss": 0.4706, "step": 20228 }, { "epoch": 11.301117318435754, "grad_norm": 1.0155870914459229, "learning_rate": 0.00043649859943977593, "loss": 0.4154, "step": 20229 }, { "epoch": 11.30167597765363, "grad_norm": 0.7085390686988831, "learning_rate": 0.00043647058823529413, "loss": 0.446, "step": 20230 }, { "epoch": 11.302234636871509, "grad_norm": 1.5133395195007324, "learning_rate": 0.00043644257703081234, "loss": 0.3566, "step": 20231 }, { "epoch": 11.302793296089385, "grad_norm": 1.619756817817688, "learning_rate": 0.00043641456582633055, "loss": 0.5994, "step": 20232 }, { "epoch": 11.303351955307262, "grad_norm": 0.45566222071647644, "learning_rate": 0.00043638655462184875, "loss": 0.5035, "step": 20233 }, { "epoch": 11.30391061452514, "grad_norm": 0.4176664352416992, "learning_rate": 0.00043635854341736696, "loss": 0.4385, "step": 20234 }, { "epoch": 11.304469273743017, "grad_norm": 0.9250991940498352, "learning_rate": 0.00043633053221288516, "loss": 0.4199, "step": 20235 }, { "epoch": 11.305027932960893, "grad_norm": 0.34724947810173035, "learning_rate": 0.0004363025210084034, "loss": 0.3563, "step": 20236 }, { "epoch": 11.305586592178772, "grad_norm": 0.7343859076499939, "learning_rate": 0.0004362745098039216, "loss": 0.5057, "step": 20237 }, { "epoch": 11.306145251396648, "grad_norm": 0.3646984100341797, "learning_rate": 0.0004362464985994398, "loss": 0.317, "step": 20238 }, { "epoch": 11.306703910614525, "grad_norm": 0.4432050585746765, "learning_rate": 0.000436218487394958, "loss": 0.4004, "step": 20239 }, { "epoch": 11.307262569832401, "grad_norm": 1.527118444442749, "learning_rate": 0.0004361904761904762, "loss": 0.4745, "step": 20240 }, { "epoch": 11.30782122905028, "grad_norm": 0.5312463641166687, "learning_rate": 0.00043616246498599445, "loss": 0.564, "step": 20241 }, { "epoch": 11.308379888268156, "grad_norm": 0.4000355005264282, "learning_rate": 0.0004361344537815126, "loss": 0.3696, "step": 20242 }, { "epoch": 11.308938547486033, "grad_norm": 0.7436264753341675, "learning_rate": 0.0004361064425770308, "loss": 0.333, "step": 20243 }, { "epoch": 11.309497206703911, "grad_norm": 0.4654131233692169, "learning_rate": 0.00043607843137254907, "loss": 0.4086, "step": 20244 }, { "epoch": 11.310055865921788, "grad_norm": 0.5024712681770325, "learning_rate": 0.0004360504201680672, "loss": 0.3552, "step": 20245 }, { "epoch": 11.310614525139664, "grad_norm": 0.2869241237640381, "learning_rate": 0.0004360224089635855, "loss": 0.2277, "step": 20246 }, { "epoch": 11.311173184357543, "grad_norm": 0.431352436542511, "learning_rate": 0.00043599439775910364, "loss": 0.4093, "step": 20247 }, { "epoch": 11.31173184357542, "grad_norm": 0.5880358815193176, "learning_rate": 0.00043596638655462184, "loss": 0.4375, "step": 20248 }, { "epoch": 11.312290502793296, "grad_norm": 1.8696519136428833, "learning_rate": 0.0004359383753501401, "loss": 0.5504, "step": 20249 }, { "epoch": 11.312849162011172, "grad_norm": 1.9979270696640015, "learning_rate": 0.00043591036414565825, "loss": 0.3871, "step": 20250 }, { "epoch": 11.31340782122905, "grad_norm": 0.6203386187553406, "learning_rate": 0.00043588235294117646, "loss": 0.437, "step": 20251 }, { "epoch": 11.313966480446927, "grad_norm": 0.926114559173584, "learning_rate": 0.0004358543417366947, "loss": 0.4037, "step": 20252 }, { "epoch": 11.314525139664804, "grad_norm": 0.5830777883529663, "learning_rate": 0.00043582633053221287, "loss": 0.4406, "step": 20253 }, { "epoch": 11.315083798882682, "grad_norm": 0.6230756640434265, "learning_rate": 0.00043579831932773113, "loss": 0.3607, "step": 20254 }, { "epoch": 11.315642458100559, "grad_norm": 0.730209231376648, "learning_rate": 0.0004357703081232493, "loss": 0.4044, "step": 20255 }, { "epoch": 11.316201117318435, "grad_norm": 0.36078113317489624, "learning_rate": 0.0004357422969187675, "loss": 0.4099, "step": 20256 }, { "epoch": 11.316759776536314, "grad_norm": 0.9884338974952698, "learning_rate": 0.00043571428571428575, "loss": 0.4508, "step": 20257 }, { "epoch": 11.31731843575419, "grad_norm": 25.895471572875977, "learning_rate": 0.0004356862745098039, "loss": 0.3458, "step": 20258 }, { "epoch": 11.317877094972067, "grad_norm": 0.5229678750038147, "learning_rate": 0.00043565826330532216, "loss": 0.4585, "step": 20259 }, { "epoch": 11.318435754189943, "grad_norm": 0.4289412200450897, "learning_rate": 0.00043563025210084037, "loss": 0.521, "step": 20260 }, { "epoch": 11.318994413407822, "grad_norm": 0.6651564836502075, "learning_rate": 0.0004356022408963585, "loss": 0.4817, "step": 20261 }, { "epoch": 11.319553072625698, "grad_norm": 0.5189133286476135, "learning_rate": 0.0004355742296918768, "loss": 0.5234, "step": 20262 }, { "epoch": 11.320111731843575, "grad_norm": 1.1015655994415283, "learning_rate": 0.00043554621848739493, "loss": 0.4493, "step": 20263 }, { "epoch": 11.320670391061453, "grad_norm": 0.3143925666809082, "learning_rate": 0.0004355182072829132, "loss": 0.3268, "step": 20264 }, { "epoch": 11.32122905027933, "grad_norm": 0.7230599522590637, "learning_rate": 0.0004354901960784314, "loss": 0.3853, "step": 20265 }, { "epoch": 11.321787709497206, "grad_norm": 1.5055962800979614, "learning_rate": 0.00043546218487394955, "loss": 0.4847, "step": 20266 }, { "epoch": 11.322346368715085, "grad_norm": 0.4309387505054474, "learning_rate": 0.0004354341736694678, "loss": 0.3525, "step": 20267 }, { "epoch": 11.322905027932961, "grad_norm": 0.5553687810897827, "learning_rate": 0.000435406162464986, "loss": 0.4343, "step": 20268 }, { "epoch": 11.323463687150838, "grad_norm": 5.23468017578125, "learning_rate": 0.0004353781512605042, "loss": 0.4469, "step": 20269 }, { "epoch": 11.324022346368714, "grad_norm": 0.4122627377510071, "learning_rate": 0.00043535014005602243, "loss": 0.3864, "step": 20270 }, { "epoch": 11.324581005586593, "grad_norm": 0.9588366150856018, "learning_rate": 0.0004353221288515406, "loss": 0.4719, "step": 20271 }, { "epoch": 11.32513966480447, "grad_norm": 0.5181857943534851, "learning_rate": 0.00043529411764705884, "loss": 0.4342, "step": 20272 }, { "epoch": 11.325698324022346, "grad_norm": 0.8975045680999756, "learning_rate": 0.00043526610644257705, "loss": 0.4456, "step": 20273 }, { "epoch": 11.326256983240224, "grad_norm": 0.5405422449111938, "learning_rate": 0.00043523809523809525, "loss": 0.3905, "step": 20274 }, { "epoch": 11.3268156424581, "grad_norm": 0.8306060433387756, "learning_rate": 0.00043521008403361346, "loss": 0.3984, "step": 20275 }, { "epoch": 11.327374301675977, "grad_norm": 0.504985511302948, "learning_rate": 0.00043518207282913166, "loss": 0.4037, "step": 20276 }, { "epoch": 11.327932960893854, "grad_norm": 0.4910610318183899, "learning_rate": 0.00043515406162464987, "loss": 0.4551, "step": 20277 }, { "epoch": 11.328491620111732, "grad_norm": 0.712019681930542, "learning_rate": 0.0004351260504201681, "loss": 0.506, "step": 20278 }, { "epoch": 11.329050279329609, "grad_norm": 0.44327428936958313, "learning_rate": 0.0004350980392156863, "loss": 0.3396, "step": 20279 }, { "epoch": 11.329608938547485, "grad_norm": 2.1232106685638428, "learning_rate": 0.0004350700280112045, "loss": 0.3874, "step": 20280 }, { "epoch": 11.330167597765364, "grad_norm": 0.4614521265029907, "learning_rate": 0.0004350420168067227, "loss": 0.4225, "step": 20281 }, { "epoch": 11.33072625698324, "grad_norm": 0.3407973647117615, "learning_rate": 0.0004350140056022409, "loss": 0.4285, "step": 20282 }, { "epoch": 11.331284916201117, "grad_norm": 0.46013057231903076, "learning_rate": 0.0004349859943977591, "loss": 0.4959, "step": 20283 }, { "epoch": 11.331843575418995, "grad_norm": 0.5374694466590881, "learning_rate": 0.00043495798319327737, "loss": 0.4444, "step": 20284 }, { "epoch": 11.332402234636872, "grad_norm": 0.4497166872024536, "learning_rate": 0.0004349299719887955, "loss": 0.4405, "step": 20285 }, { "epoch": 11.332960893854748, "grad_norm": 4.576594352722168, "learning_rate": 0.0004349019607843137, "loss": 0.3169, "step": 20286 }, { "epoch": 11.333519553072625, "grad_norm": 0.4949341416358948, "learning_rate": 0.00043487394957983193, "loss": 0.4425, "step": 20287 }, { "epoch": 11.334078212290503, "grad_norm": 0.5314093828201294, "learning_rate": 0.00043484593837535014, "loss": 0.4037, "step": 20288 }, { "epoch": 11.33463687150838, "grad_norm": 0.5761443376541138, "learning_rate": 0.0004348179271708684, "loss": 0.3584, "step": 20289 }, { "epoch": 11.335195530726256, "grad_norm": 0.7590041756629944, "learning_rate": 0.00043478991596638655, "loss": 0.4299, "step": 20290 }, { "epoch": 11.335754189944135, "grad_norm": 0.8453172445297241, "learning_rate": 0.00043476190476190475, "loss": 0.4523, "step": 20291 }, { "epoch": 11.336312849162011, "grad_norm": 0.3963601291179657, "learning_rate": 0.000434733893557423, "loss": 0.527, "step": 20292 }, { "epoch": 11.336871508379888, "grad_norm": 0.4410884976387024, "learning_rate": 0.00043470588235294117, "loss": 0.4116, "step": 20293 }, { "epoch": 11.337430167597766, "grad_norm": 0.6327182054519653, "learning_rate": 0.0004346778711484594, "loss": 0.3284, "step": 20294 }, { "epoch": 11.337988826815643, "grad_norm": 0.5186989903450012, "learning_rate": 0.0004346498599439776, "loss": 0.3263, "step": 20295 }, { "epoch": 11.33854748603352, "grad_norm": 0.8051605820655823, "learning_rate": 0.0004346218487394958, "loss": 0.399, "step": 20296 }, { "epoch": 11.339106145251396, "grad_norm": 0.4688992500305176, "learning_rate": 0.00043459383753501404, "loss": 0.4468, "step": 20297 }, { "epoch": 11.339664804469274, "grad_norm": 0.47650954127311707, "learning_rate": 0.0004345658263305322, "loss": 0.4508, "step": 20298 }, { "epoch": 11.34022346368715, "grad_norm": 0.42817622423171997, "learning_rate": 0.00043453781512605046, "loss": 0.3343, "step": 20299 }, { "epoch": 11.340782122905027, "grad_norm": 0.42224690318107605, "learning_rate": 0.00043450980392156866, "loss": 0.5413, "step": 20300 }, { "epoch": 11.341340782122906, "grad_norm": 0.8090122938156128, "learning_rate": 0.0004344817927170868, "loss": 0.4035, "step": 20301 }, { "epoch": 11.341899441340782, "grad_norm": 0.5524550080299377, "learning_rate": 0.0004344537815126051, "loss": 0.4138, "step": 20302 }, { "epoch": 11.342458100558659, "grad_norm": 0.550574779510498, "learning_rate": 0.0004344257703081232, "loss": 0.4671, "step": 20303 }, { "epoch": 11.343016759776535, "grad_norm": 0.560962438583374, "learning_rate": 0.0004343977591036415, "loss": 0.3193, "step": 20304 }, { "epoch": 11.343575418994414, "grad_norm": 1.720365285873413, "learning_rate": 0.0004343697478991597, "loss": 0.3345, "step": 20305 }, { "epoch": 11.34413407821229, "grad_norm": 0.6047013998031616, "learning_rate": 0.00043434173669467784, "loss": 0.4359, "step": 20306 }, { "epoch": 11.344692737430167, "grad_norm": 0.4994995892047882, "learning_rate": 0.0004343137254901961, "loss": 0.3999, "step": 20307 }, { "epoch": 11.345251396648045, "grad_norm": 1.1507071256637573, "learning_rate": 0.0004342857142857143, "loss": 0.3648, "step": 20308 }, { "epoch": 11.345810055865922, "grad_norm": 0.5431200265884399, "learning_rate": 0.0004342577030812325, "loss": 0.483, "step": 20309 }, { "epoch": 11.346368715083798, "grad_norm": 1.2475614547729492, "learning_rate": 0.0004342296918767507, "loss": 0.3695, "step": 20310 }, { "epoch": 11.346927374301677, "grad_norm": 0.4749751091003418, "learning_rate": 0.0004342016806722689, "loss": 0.3798, "step": 20311 }, { "epoch": 11.347486033519553, "grad_norm": 0.4504222869873047, "learning_rate": 0.00043417366946778713, "loss": 0.3549, "step": 20312 }, { "epoch": 11.34804469273743, "grad_norm": 1.5910853147506714, "learning_rate": 0.00043414565826330534, "loss": 0.4002, "step": 20313 }, { "epoch": 11.348603351955306, "grad_norm": 0.6830914616584778, "learning_rate": 0.00043411764705882355, "loss": 0.3999, "step": 20314 }, { "epoch": 11.349162011173185, "grad_norm": 0.5797119140625, "learning_rate": 0.00043408963585434175, "loss": 0.4553, "step": 20315 }, { "epoch": 11.349720670391061, "grad_norm": 0.6772024631500244, "learning_rate": 0.00043406162464985996, "loss": 0.3756, "step": 20316 }, { "epoch": 11.350279329608938, "grad_norm": 0.4456321895122528, "learning_rate": 0.00043403361344537816, "loss": 0.3555, "step": 20317 }, { "epoch": 11.350837988826816, "grad_norm": 0.5499882698059082, "learning_rate": 0.00043400560224089637, "loss": 0.357, "step": 20318 }, { "epoch": 11.351396648044693, "grad_norm": 0.7099266648292542, "learning_rate": 0.0004339775910364146, "loss": 0.4934, "step": 20319 }, { "epoch": 11.35195530726257, "grad_norm": 0.48719528317451477, "learning_rate": 0.0004339495798319328, "loss": 0.3907, "step": 20320 }, { "epoch": 11.352513966480448, "grad_norm": 0.48918482661247253, "learning_rate": 0.000433921568627451, "loss": 0.4743, "step": 20321 }, { "epoch": 11.353072625698324, "grad_norm": 0.4085235893726349, "learning_rate": 0.0004338935574229692, "loss": 0.4434, "step": 20322 }, { "epoch": 11.3536312849162, "grad_norm": 0.7279258370399475, "learning_rate": 0.0004338655462184874, "loss": 0.5687, "step": 20323 }, { "epoch": 11.354189944134077, "grad_norm": 0.48025304079055786, "learning_rate": 0.00043383753501400566, "loss": 0.3214, "step": 20324 }, { "epoch": 11.354748603351956, "grad_norm": 0.4051593840122223, "learning_rate": 0.0004338095238095238, "loss": 0.4089, "step": 20325 }, { "epoch": 11.355307262569832, "grad_norm": 1.1692802906036377, "learning_rate": 0.000433781512605042, "loss": 0.3266, "step": 20326 }, { "epoch": 11.355865921787709, "grad_norm": 0.6688805818557739, "learning_rate": 0.0004337535014005602, "loss": 0.6353, "step": 20327 }, { "epoch": 11.356424581005587, "grad_norm": 0.4597741365432739, "learning_rate": 0.00043372549019607843, "loss": 0.4367, "step": 20328 }, { "epoch": 11.356983240223464, "grad_norm": 0.45323848724365234, "learning_rate": 0.0004336974789915967, "loss": 0.4132, "step": 20329 }, { "epoch": 11.35754189944134, "grad_norm": 0.5354863405227661, "learning_rate": 0.00043366946778711484, "loss": 0.5552, "step": 20330 }, { "epoch": 11.358100558659217, "grad_norm": 0.46319136023521423, "learning_rate": 0.00043364145658263305, "loss": 0.3433, "step": 20331 }, { "epoch": 11.358659217877095, "grad_norm": 3.5432674884796143, "learning_rate": 0.0004336134453781513, "loss": 0.3622, "step": 20332 }, { "epoch": 11.359217877094972, "grad_norm": 0.3947017192840576, "learning_rate": 0.00043358543417366946, "loss": 0.4026, "step": 20333 }, { "epoch": 11.359776536312848, "grad_norm": 0.6255600452423096, "learning_rate": 0.0004335574229691877, "loss": 0.5444, "step": 20334 }, { "epoch": 11.360335195530727, "grad_norm": 0.32233691215515137, "learning_rate": 0.00043352941176470587, "loss": 0.4493, "step": 20335 }, { "epoch": 11.360893854748603, "grad_norm": 0.4895597994327545, "learning_rate": 0.0004335014005602241, "loss": 0.4253, "step": 20336 }, { "epoch": 11.36145251396648, "grad_norm": 0.7000439167022705, "learning_rate": 0.00043347338935574234, "loss": 0.5388, "step": 20337 }, { "epoch": 11.362011173184358, "grad_norm": 0.5233309864997864, "learning_rate": 0.0004334453781512605, "loss": 0.3871, "step": 20338 }, { "epoch": 11.362569832402235, "grad_norm": 0.5473671555519104, "learning_rate": 0.00043341736694677875, "loss": 0.4168, "step": 20339 }, { "epoch": 11.363128491620111, "grad_norm": 0.4244515597820282, "learning_rate": 0.00043338935574229696, "loss": 0.4895, "step": 20340 }, { "epoch": 11.363687150837988, "grad_norm": 0.461105078458786, "learning_rate": 0.0004333613445378151, "loss": 0.4639, "step": 20341 }, { "epoch": 11.364245810055866, "grad_norm": 0.9202731251716614, "learning_rate": 0.00043333333333333337, "loss": 0.3506, "step": 20342 }, { "epoch": 11.364804469273743, "grad_norm": 0.577087938785553, "learning_rate": 0.0004333053221288515, "loss": 0.3851, "step": 20343 }, { "epoch": 11.36536312849162, "grad_norm": 0.5346361398696899, "learning_rate": 0.0004332773109243698, "loss": 0.4443, "step": 20344 }, { "epoch": 11.365921787709498, "grad_norm": 0.4487300217151642, "learning_rate": 0.000433249299719888, "loss": 0.413, "step": 20345 }, { "epoch": 11.366480446927374, "grad_norm": 0.48424363136291504, "learning_rate": 0.00043322128851540614, "loss": 0.3914, "step": 20346 }, { "epoch": 11.367039106145251, "grad_norm": 0.38616514205932617, "learning_rate": 0.0004331932773109244, "loss": 0.3891, "step": 20347 }, { "epoch": 11.36759776536313, "grad_norm": 0.7378391623497009, "learning_rate": 0.0004331652661064426, "loss": 0.449, "step": 20348 }, { "epoch": 11.368156424581006, "grad_norm": 1.0481181144714355, "learning_rate": 0.0004331372549019608, "loss": 0.5323, "step": 20349 }, { "epoch": 11.368715083798882, "grad_norm": 0.5536805987358093, "learning_rate": 0.000433109243697479, "loss": 0.4726, "step": 20350 }, { "epoch": 11.369273743016759, "grad_norm": 0.5338822603225708, "learning_rate": 0.00043308123249299717, "loss": 0.4927, "step": 20351 }, { "epoch": 11.369832402234637, "grad_norm": 0.3848881125450134, "learning_rate": 0.00043305322128851543, "loss": 0.4178, "step": 20352 }, { "epoch": 11.370391061452514, "grad_norm": 0.5526819825172424, "learning_rate": 0.00043302521008403363, "loss": 0.4285, "step": 20353 }, { "epoch": 11.37094972067039, "grad_norm": 0.44540566205978394, "learning_rate": 0.00043299719887955184, "loss": 0.3048, "step": 20354 }, { "epoch": 11.371508379888269, "grad_norm": 0.8788999319076538, "learning_rate": 0.00043296918767507005, "loss": 0.3888, "step": 20355 }, { "epoch": 11.372067039106145, "grad_norm": 0.5624000430107117, "learning_rate": 0.00043294117647058825, "loss": 0.369, "step": 20356 }, { "epoch": 11.372625698324022, "grad_norm": 0.9151301383972168, "learning_rate": 0.00043291316526610646, "loss": 0.6925, "step": 20357 }, { "epoch": 11.3731843575419, "grad_norm": 0.5433090925216675, "learning_rate": 0.00043288515406162466, "loss": 0.4438, "step": 20358 }, { "epoch": 11.373743016759777, "grad_norm": 0.5443361401557922, "learning_rate": 0.00043285714285714287, "loss": 0.2824, "step": 20359 }, { "epoch": 11.374301675977653, "grad_norm": 2.2954258918762207, "learning_rate": 0.0004328291316526611, "loss": 0.5234, "step": 20360 }, { "epoch": 11.37486033519553, "grad_norm": 0.5968257784843445, "learning_rate": 0.0004328011204481793, "loss": 0.417, "step": 20361 }, { "epoch": 11.375418994413408, "grad_norm": 0.9542478322982788, "learning_rate": 0.0004327731092436975, "loss": 0.3149, "step": 20362 }, { "epoch": 11.375977653631285, "grad_norm": 0.5495153665542603, "learning_rate": 0.0004327450980392157, "loss": 0.3557, "step": 20363 }, { "epoch": 11.376536312849161, "grad_norm": 2.898282766342163, "learning_rate": 0.0004327170868347339, "loss": 0.6315, "step": 20364 }, { "epoch": 11.37709497206704, "grad_norm": 1.5180634260177612, "learning_rate": 0.0004326890756302521, "loss": 0.4758, "step": 20365 }, { "epoch": 11.377653631284916, "grad_norm": 0.4966793358325958, "learning_rate": 0.0004326610644257703, "loss": 0.4433, "step": 20366 }, { "epoch": 11.378212290502793, "grad_norm": 0.4382321536540985, "learning_rate": 0.0004326330532212885, "loss": 0.4331, "step": 20367 }, { "epoch": 11.378770949720671, "grad_norm": 0.4112316966056824, "learning_rate": 0.0004326050420168067, "loss": 0.3642, "step": 20368 }, { "epoch": 11.379329608938548, "grad_norm": 0.5049471259117126, "learning_rate": 0.00043257703081232493, "loss": 0.4079, "step": 20369 }, { "epoch": 11.379888268156424, "grad_norm": 0.467467337846756, "learning_rate": 0.00043254901960784314, "loss": 0.3643, "step": 20370 }, { "epoch": 11.380446927374301, "grad_norm": 0.3777334988117218, "learning_rate": 0.00043252100840336134, "loss": 0.3886, "step": 20371 }, { "epoch": 11.38100558659218, "grad_norm": 0.4159626364707947, "learning_rate": 0.0004324929971988796, "loss": 0.4021, "step": 20372 }, { "epoch": 11.381564245810056, "grad_norm": 0.5214095115661621, "learning_rate": 0.00043246498599439775, "loss": 0.4936, "step": 20373 }, { "epoch": 11.382122905027932, "grad_norm": 0.48910412192344666, "learning_rate": 0.00043243697478991596, "loss": 0.3898, "step": 20374 }, { "epoch": 11.38268156424581, "grad_norm": 0.30974388122558594, "learning_rate": 0.00043240896358543417, "loss": 0.3703, "step": 20375 }, { "epoch": 11.383240223463687, "grad_norm": 0.3718155026435852, "learning_rate": 0.00043238095238095237, "loss": 0.3675, "step": 20376 }, { "epoch": 11.383798882681564, "grad_norm": 1.363289713859558, "learning_rate": 0.00043235294117647063, "loss": 0.3934, "step": 20377 }, { "epoch": 11.38435754189944, "grad_norm": 1.0593390464782715, "learning_rate": 0.0004323249299719888, "loss": 0.3917, "step": 20378 }, { "epoch": 11.384916201117319, "grad_norm": 1.3262704610824585, "learning_rate": 0.000432296918767507, "loss": 0.2937, "step": 20379 }, { "epoch": 11.385474860335195, "grad_norm": 0.8163474202156067, "learning_rate": 0.00043226890756302525, "loss": 0.515, "step": 20380 }, { "epoch": 11.386033519553072, "grad_norm": 0.37344738841056824, "learning_rate": 0.0004322408963585434, "loss": 0.3521, "step": 20381 }, { "epoch": 11.38659217877095, "grad_norm": 0.6237058639526367, "learning_rate": 0.00043221288515406166, "loss": 0.4177, "step": 20382 }, { "epoch": 11.387150837988827, "grad_norm": 0.7022706270217896, "learning_rate": 0.0004321848739495798, "loss": 0.4078, "step": 20383 }, { "epoch": 11.387709497206703, "grad_norm": 0.7716498374938965, "learning_rate": 0.000432156862745098, "loss": 0.4292, "step": 20384 }, { "epoch": 11.388268156424582, "grad_norm": 0.46022769808769226, "learning_rate": 0.0004321288515406163, "loss": 0.3802, "step": 20385 }, { "epoch": 11.388826815642458, "grad_norm": 0.5086798667907715, "learning_rate": 0.00043210084033613443, "loss": 0.3647, "step": 20386 }, { "epoch": 11.389385474860335, "grad_norm": 0.40174588561058044, "learning_rate": 0.0004320728291316527, "loss": 0.4151, "step": 20387 }, { "epoch": 11.389944134078211, "grad_norm": 0.5052075982093811, "learning_rate": 0.0004320448179271709, "loss": 0.4673, "step": 20388 }, { "epoch": 11.39050279329609, "grad_norm": 0.4945640563964844, "learning_rate": 0.00043201680672268905, "loss": 0.4161, "step": 20389 }, { "epoch": 11.391061452513966, "grad_norm": 1.064143419265747, "learning_rate": 0.0004319887955182073, "loss": 0.4443, "step": 20390 }, { "epoch": 11.391620111731843, "grad_norm": 1.2059634923934937, "learning_rate": 0.00043196078431372546, "loss": 0.448, "step": 20391 }, { "epoch": 11.392178770949721, "grad_norm": 0.5958070755004883, "learning_rate": 0.0004319327731092437, "loss": 0.4951, "step": 20392 }, { "epoch": 11.392737430167598, "grad_norm": 0.45267143845558167, "learning_rate": 0.00043190476190476193, "loss": 0.3704, "step": 20393 }, { "epoch": 11.393296089385474, "grad_norm": 0.41555067896842957, "learning_rate": 0.0004318767507002801, "loss": 0.3979, "step": 20394 }, { "epoch": 11.393854748603353, "grad_norm": 0.44813475012779236, "learning_rate": 0.00043184873949579834, "loss": 0.4527, "step": 20395 }, { "epoch": 11.39441340782123, "grad_norm": 0.5134300589561462, "learning_rate": 0.00043182072829131655, "loss": 0.5213, "step": 20396 }, { "epoch": 11.394972067039106, "grad_norm": 0.4372962713241577, "learning_rate": 0.00043179271708683475, "loss": 0.3296, "step": 20397 }, { "epoch": 11.395530726256982, "grad_norm": 5.657855033874512, "learning_rate": 0.00043176470588235296, "loss": 0.3748, "step": 20398 }, { "epoch": 11.39608938547486, "grad_norm": 0.7380702495574951, "learning_rate": 0.0004317366946778711, "loss": 0.4264, "step": 20399 }, { "epoch": 11.396648044692737, "grad_norm": 0.5794607400894165, "learning_rate": 0.00043170868347338937, "loss": 0.4823, "step": 20400 }, { "epoch": 11.397206703910614, "grad_norm": 0.602072536945343, "learning_rate": 0.0004316806722689076, "loss": 0.3035, "step": 20401 }, { "epoch": 11.397765363128492, "grad_norm": 0.5261669158935547, "learning_rate": 0.0004316526610644258, "loss": 0.4707, "step": 20402 }, { "epoch": 11.398324022346369, "grad_norm": 0.4762754440307617, "learning_rate": 0.000431624649859944, "loss": 0.3798, "step": 20403 }, { "epoch": 11.398882681564245, "grad_norm": 0.6748090982437134, "learning_rate": 0.0004315966386554622, "loss": 0.5468, "step": 20404 }, { "epoch": 11.399441340782122, "grad_norm": 0.5334838032722473, "learning_rate": 0.0004315686274509804, "loss": 0.4193, "step": 20405 }, { "epoch": 11.4, "grad_norm": 0.68052077293396, "learning_rate": 0.0004315406162464986, "loss": 0.502, "step": 20406 }, { "epoch": 11.400558659217877, "grad_norm": 0.6950442790985107, "learning_rate": 0.0004315126050420168, "loss": 0.3883, "step": 20407 }, { "epoch": 11.401117318435753, "grad_norm": 0.6587467193603516, "learning_rate": 0.000431484593837535, "loss": 0.4802, "step": 20408 }, { "epoch": 11.401675977653632, "grad_norm": 0.5734756588935852, "learning_rate": 0.0004314565826330532, "loss": 0.4865, "step": 20409 }, { "epoch": 11.402234636871508, "grad_norm": 0.7394313812255859, "learning_rate": 0.00043142857142857143, "loss": 0.5259, "step": 20410 }, { "epoch": 11.402793296089385, "grad_norm": 0.46636223793029785, "learning_rate": 0.00043140056022408964, "loss": 0.519, "step": 20411 }, { "epoch": 11.403351955307263, "grad_norm": 0.3804307281970978, "learning_rate": 0.0004313725490196079, "loss": 0.3641, "step": 20412 }, { "epoch": 11.40391061452514, "grad_norm": 0.5207472443580627, "learning_rate": 0.00043134453781512605, "loss": 0.4119, "step": 20413 }, { "epoch": 11.404469273743016, "grad_norm": 0.687709629535675, "learning_rate": 0.00043131652661064425, "loss": 0.4611, "step": 20414 }, { "epoch": 11.405027932960893, "grad_norm": 0.7759419679641724, "learning_rate": 0.00043128851540616246, "loss": 0.3967, "step": 20415 }, { "epoch": 11.405586592178771, "grad_norm": 0.8317773342132568, "learning_rate": 0.00043126050420168067, "loss": 0.3992, "step": 20416 }, { "epoch": 11.406145251396648, "grad_norm": 0.47342318296432495, "learning_rate": 0.0004312324929971989, "loss": 0.4551, "step": 20417 }, { "epoch": 11.406703910614524, "grad_norm": 0.519398033618927, "learning_rate": 0.0004312044817927171, "loss": 0.4847, "step": 20418 }, { "epoch": 11.407262569832403, "grad_norm": 0.641997218132019, "learning_rate": 0.0004311764705882353, "loss": 0.3771, "step": 20419 }, { "epoch": 11.40782122905028, "grad_norm": 0.7098687291145325, "learning_rate": 0.00043114845938375354, "loss": 0.48, "step": 20420 }, { "epoch": 11.408379888268156, "grad_norm": 0.6648222208023071, "learning_rate": 0.0004311204481792717, "loss": 0.3898, "step": 20421 }, { "epoch": 11.408938547486034, "grad_norm": 0.734363853931427, "learning_rate": 0.00043109243697478996, "loss": 0.5481, "step": 20422 }, { "epoch": 11.40949720670391, "grad_norm": 1.2085016965866089, "learning_rate": 0.0004310644257703081, "loss": 0.3938, "step": 20423 }, { "epoch": 11.410055865921787, "grad_norm": 0.6026791930198669, "learning_rate": 0.0004310364145658263, "loss": 0.668, "step": 20424 }, { "epoch": 11.410614525139664, "grad_norm": 0.46169158816337585, "learning_rate": 0.0004310084033613446, "loss": 0.4686, "step": 20425 }, { "epoch": 11.411173184357542, "grad_norm": 0.519842803478241, "learning_rate": 0.0004309803921568627, "loss": 0.3675, "step": 20426 }, { "epoch": 11.411731843575419, "grad_norm": 0.5539289116859436, "learning_rate": 0.000430952380952381, "loss": 0.3825, "step": 20427 }, { "epoch": 11.412290502793295, "grad_norm": 0.5642621517181396, "learning_rate": 0.0004309243697478992, "loss": 0.4798, "step": 20428 }, { "epoch": 11.412849162011174, "grad_norm": 0.4757636487483978, "learning_rate": 0.00043089635854341734, "loss": 0.3381, "step": 20429 }, { "epoch": 11.41340782122905, "grad_norm": 0.7182319760322571, "learning_rate": 0.0004308683473389356, "loss": 0.517, "step": 20430 }, { "epoch": 11.413966480446927, "grad_norm": 1.3319298028945923, "learning_rate": 0.00043084033613445376, "loss": 0.3738, "step": 20431 }, { "epoch": 11.414525139664805, "grad_norm": 0.9071935415267944, "learning_rate": 0.000430812324929972, "loss": 0.4779, "step": 20432 }, { "epoch": 11.415083798882682, "grad_norm": 0.42323726415634155, "learning_rate": 0.0004307843137254902, "loss": 0.4384, "step": 20433 }, { "epoch": 11.415642458100558, "grad_norm": 0.5242413282394409, "learning_rate": 0.0004307563025210084, "loss": 0.4699, "step": 20434 }, { "epoch": 11.416201117318435, "grad_norm": 0.4536595046520233, "learning_rate": 0.00043072829131652663, "loss": 0.4191, "step": 20435 }, { "epoch": 11.416759776536313, "grad_norm": 0.5623801946640015, "learning_rate": 0.00043070028011204484, "loss": 0.4698, "step": 20436 }, { "epoch": 11.41731843575419, "grad_norm": 0.37475845217704773, "learning_rate": 0.00043067226890756305, "loss": 0.3837, "step": 20437 }, { "epoch": 11.417877094972066, "grad_norm": 0.6459819674491882, "learning_rate": 0.00043064425770308125, "loss": 0.4446, "step": 20438 }, { "epoch": 11.418435754189945, "grad_norm": 0.4501931965351105, "learning_rate": 0.0004306162464985994, "loss": 0.4452, "step": 20439 }, { "epoch": 11.418994413407821, "grad_norm": 1.2586090564727783, "learning_rate": 0.00043058823529411766, "loss": 0.4561, "step": 20440 }, { "epoch": 11.419553072625698, "grad_norm": 0.4796806275844574, "learning_rate": 0.00043056022408963587, "loss": 0.5221, "step": 20441 }, { "epoch": 11.420111731843576, "grad_norm": 0.485820472240448, "learning_rate": 0.0004305322128851541, "loss": 0.4164, "step": 20442 }, { "epoch": 11.420670391061453, "grad_norm": 0.6834775805473328, "learning_rate": 0.0004305042016806723, "loss": 0.448, "step": 20443 }, { "epoch": 11.42122905027933, "grad_norm": 0.45969659090042114, "learning_rate": 0.0004304761904761905, "loss": 0.4925, "step": 20444 }, { "epoch": 11.421787709497206, "grad_norm": 0.3783314526081085, "learning_rate": 0.0004304481792717087, "loss": 0.3769, "step": 20445 }, { "epoch": 11.422346368715084, "grad_norm": 0.3382120430469513, "learning_rate": 0.0004304201680672269, "loss": 0.3181, "step": 20446 }, { "epoch": 11.422905027932961, "grad_norm": 1.0975066423416138, "learning_rate": 0.0004303921568627451, "loss": 0.3762, "step": 20447 }, { "epoch": 11.423463687150837, "grad_norm": 1.7357056140899658, "learning_rate": 0.0004303641456582633, "loss": 0.3632, "step": 20448 }, { "epoch": 11.424022346368716, "grad_norm": 0.511623203754425, "learning_rate": 0.0004303361344537815, "loss": 0.499, "step": 20449 }, { "epoch": 11.424581005586592, "grad_norm": 1.1244245767593384, "learning_rate": 0.0004303081232492997, "loss": 0.3796, "step": 20450 }, { "epoch": 11.425139664804469, "grad_norm": 0.4435977339744568, "learning_rate": 0.00043028011204481793, "loss": 0.4031, "step": 20451 }, { "epoch": 11.425698324022346, "grad_norm": 0.35999220609664917, "learning_rate": 0.0004302521008403362, "loss": 0.3835, "step": 20452 }, { "epoch": 11.426256983240224, "grad_norm": 0.4780190885066986, "learning_rate": 0.00043022408963585434, "loss": 0.3443, "step": 20453 }, { "epoch": 11.4268156424581, "grad_norm": 0.4865695536136627, "learning_rate": 0.00043019607843137255, "loss": 0.3709, "step": 20454 }, { "epoch": 11.427374301675977, "grad_norm": 0.44215822219848633, "learning_rate": 0.00043016806722689075, "loss": 0.3697, "step": 20455 }, { "epoch": 11.427932960893855, "grad_norm": 0.6452834010124207, "learning_rate": 0.00043014005602240896, "loss": 0.5452, "step": 20456 }, { "epoch": 11.428491620111732, "grad_norm": 2.594749927520752, "learning_rate": 0.0004301120448179272, "loss": 0.3503, "step": 20457 }, { "epoch": 11.429050279329608, "grad_norm": 1.3245359659194946, "learning_rate": 0.00043008403361344537, "loss": 0.5101, "step": 20458 }, { "epoch": 11.429608938547487, "grad_norm": 0.6216909289360046, "learning_rate": 0.0004300560224089636, "loss": 0.5023, "step": 20459 }, { "epoch": 11.430167597765363, "grad_norm": 0.3987646698951721, "learning_rate": 0.00043002801120448184, "loss": 0.5, "step": 20460 }, { "epoch": 11.43072625698324, "grad_norm": 0.8627544045448303, "learning_rate": 0.00043, "loss": 0.4369, "step": 20461 }, { "epoch": 11.431284916201117, "grad_norm": 1.1489179134368896, "learning_rate": 0.00042997198879551825, "loss": 0.3656, "step": 20462 }, { "epoch": 11.431843575418995, "grad_norm": 0.4449630379676819, "learning_rate": 0.0004299439775910364, "loss": 0.4089, "step": 20463 }, { "epoch": 11.432402234636871, "grad_norm": 2.871068239212036, "learning_rate": 0.0004299159663865546, "loss": 0.3754, "step": 20464 }, { "epoch": 11.432960893854748, "grad_norm": 0.8406011462211609, "learning_rate": 0.00042988795518207287, "loss": 0.4799, "step": 20465 }, { "epoch": 11.433519553072626, "grad_norm": 0.5874283909797668, "learning_rate": 0.000429859943977591, "loss": 0.4136, "step": 20466 }, { "epoch": 11.434078212290503, "grad_norm": 0.6754739284515381, "learning_rate": 0.0004298319327731093, "loss": 0.6977, "step": 20467 }, { "epoch": 11.43463687150838, "grad_norm": 0.4682725667953491, "learning_rate": 0.0004298039215686275, "loss": 0.4273, "step": 20468 }, { "epoch": 11.435195530726258, "grad_norm": 2.1621012687683105, "learning_rate": 0.00042977591036414564, "loss": 0.3661, "step": 20469 }, { "epoch": 11.435754189944134, "grad_norm": 0.34660494327545166, "learning_rate": 0.0004297478991596639, "loss": 0.4264, "step": 20470 }, { "epoch": 11.436312849162011, "grad_norm": 0.622973620891571, "learning_rate": 0.00042971988795518205, "loss": 0.3835, "step": 20471 }, { "epoch": 11.436871508379888, "grad_norm": 0.5168856978416443, "learning_rate": 0.0004296918767507003, "loss": 0.4199, "step": 20472 }, { "epoch": 11.437430167597766, "grad_norm": 5.298727035522461, "learning_rate": 0.0004296638655462185, "loss": 0.5567, "step": 20473 }, { "epoch": 11.437988826815642, "grad_norm": 0.8622888326644897, "learning_rate": 0.00042963585434173667, "loss": 0.4301, "step": 20474 }, { "epoch": 11.438547486033519, "grad_norm": 0.3953629732131958, "learning_rate": 0.00042960784313725493, "loss": 0.3623, "step": 20475 }, { "epoch": 11.439106145251397, "grad_norm": 0.4186992943286896, "learning_rate": 0.00042957983193277313, "loss": 0.345, "step": 20476 }, { "epoch": 11.439664804469274, "grad_norm": 0.3621259033679962, "learning_rate": 0.0004295518207282913, "loss": 0.3733, "step": 20477 }, { "epoch": 11.44022346368715, "grad_norm": 0.4001814126968384, "learning_rate": 0.00042952380952380955, "loss": 0.4272, "step": 20478 }, { "epoch": 11.440782122905027, "grad_norm": 0.45869895815849304, "learning_rate": 0.0004294957983193277, "loss": 0.498, "step": 20479 }, { "epoch": 11.441340782122905, "grad_norm": 1.2094117403030396, "learning_rate": 0.00042946778711484596, "loss": 0.4492, "step": 20480 }, { "epoch": 11.441899441340782, "grad_norm": 0.46167707443237305, "learning_rate": 0.00042943977591036416, "loss": 0.3967, "step": 20481 }, { "epoch": 11.442458100558659, "grad_norm": 0.35813504457473755, "learning_rate": 0.0004294117647058823, "loss": 0.4525, "step": 20482 }, { "epoch": 11.443016759776537, "grad_norm": 0.4364907741546631, "learning_rate": 0.0004293837535014006, "loss": 0.3388, "step": 20483 }, { "epoch": 11.443575418994413, "grad_norm": 1.050809383392334, "learning_rate": 0.0004293557422969188, "loss": 0.46, "step": 20484 }, { "epoch": 11.44413407821229, "grad_norm": 0.44603869318962097, "learning_rate": 0.000429327731092437, "loss": 0.4162, "step": 20485 }, { "epoch": 11.444692737430168, "grad_norm": 0.40829548239707947, "learning_rate": 0.0004292997198879552, "loss": 0.3276, "step": 20486 }, { "epoch": 11.445251396648045, "grad_norm": 0.3749415874481201, "learning_rate": 0.00042927170868347335, "loss": 0.3637, "step": 20487 }, { "epoch": 11.445810055865921, "grad_norm": 0.4271961748600006, "learning_rate": 0.0004292436974789916, "loss": 0.3762, "step": 20488 }, { "epoch": 11.446368715083798, "grad_norm": 0.44431793689727783, "learning_rate": 0.0004292156862745098, "loss": 0.3894, "step": 20489 }, { "epoch": 11.446927374301676, "grad_norm": 2.429311513900757, "learning_rate": 0.000429187675070028, "loss": 0.5131, "step": 20490 }, { "epoch": 11.447486033519553, "grad_norm": 0.6587164998054504, "learning_rate": 0.0004291596638655462, "loss": 0.4741, "step": 20491 }, { "epoch": 11.44804469273743, "grad_norm": 4.382190227508545, "learning_rate": 0.00042913165266106443, "loss": 0.3677, "step": 20492 }, { "epoch": 11.448603351955308, "grad_norm": 0.566003680229187, "learning_rate": 0.00042910364145658264, "loss": 0.4863, "step": 20493 }, { "epoch": 11.449162011173184, "grad_norm": 0.5580727458000183, "learning_rate": 0.00042907563025210084, "loss": 0.3704, "step": 20494 }, { "epoch": 11.449720670391061, "grad_norm": 0.5175549387931824, "learning_rate": 0.0004290476190476191, "loss": 0.3775, "step": 20495 }, { "epoch": 11.45027932960894, "grad_norm": 0.674576997756958, "learning_rate": 0.00042901960784313725, "loss": 0.4498, "step": 20496 }, { "epoch": 11.450837988826816, "grad_norm": 0.5521635413169861, "learning_rate": 0.00042899159663865546, "loss": 0.444, "step": 20497 }, { "epoch": 11.451396648044692, "grad_norm": 0.7391864657402039, "learning_rate": 0.00042896358543417367, "loss": 0.3582, "step": 20498 }, { "epoch": 11.451955307262569, "grad_norm": 0.4824247360229492, "learning_rate": 0.00042893557422969187, "loss": 0.2599, "step": 20499 }, { "epoch": 11.452513966480447, "grad_norm": 0.3248860239982605, "learning_rate": 0.00042890756302521013, "loss": 0.3387, "step": 20500 }, { "epoch": 11.452513966480447, "eval_cer": 0.08737302652395448, "eval_loss": 0.33205243945121765, "eval_runtime": 55.7602, "eval_samples_per_second": 81.384, "eval_steps_per_second": 5.093, "eval_wer": 0.345437278288316, "step": 20500 }, { "epoch": 11.453072625698324, "grad_norm": 0.7746720314025879, "learning_rate": 0.0004288795518207283, "loss": 0.3719, "step": 20501 }, { "epoch": 11.4536312849162, "grad_norm": 0.37366852164268494, "learning_rate": 0.0004288515406162465, "loss": 0.4067, "step": 20502 }, { "epoch": 11.454189944134079, "grad_norm": 0.5273412466049194, "learning_rate": 0.00042882352941176475, "loss": 0.3561, "step": 20503 }, { "epoch": 11.454748603351955, "grad_norm": 8.741072654724121, "learning_rate": 0.0004287955182072829, "loss": 0.4188, "step": 20504 }, { "epoch": 11.455307262569832, "grad_norm": 0.7163769602775574, "learning_rate": 0.00042876750700280116, "loss": 0.38, "step": 20505 }, { "epoch": 11.45586592178771, "grad_norm": 0.36931779980659485, "learning_rate": 0.0004287394957983193, "loss": 0.3797, "step": 20506 }, { "epoch": 11.456424581005587, "grad_norm": 0.5512630343437195, "learning_rate": 0.0004287114845938375, "loss": 0.4726, "step": 20507 }, { "epoch": 11.456983240223463, "grad_norm": 0.4833839237689972, "learning_rate": 0.0004286834733893558, "loss": 0.45, "step": 20508 }, { "epoch": 11.45754189944134, "grad_norm": 0.6387859582901001, "learning_rate": 0.00042865546218487393, "loss": 0.4567, "step": 20509 }, { "epoch": 11.458100558659218, "grad_norm": 0.34747910499572754, "learning_rate": 0.0004286274509803922, "loss": 0.2904, "step": 20510 }, { "epoch": 11.458659217877095, "grad_norm": 0.4075853228569031, "learning_rate": 0.0004285994397759104, "loss": 0.4634, "step": 20511 }, { "epoch": 11.459217877094972, "grad_norm": 0.5226553082466125, "learning_rate": 0.00042857142857142855, "loss": 0.3698, "step": 20512 }, { "epoch": 11.45977653631285, "grad_norm": 0.7752663493156433, "learning_rate": 0.0004285434173669468, "loss": 0.5457, "step": 20513 }, { "epoch": 11.460335195530726, "grad_norm": 0.7579894065856934, "learning_rate": 0.00042851540616246496, "loss": 0.3865, "step": 20514 }, { "epoch": 11.460893854748603, "grad_norm": 0.5567745566368103, "learning_rate": 0.0004284873949579832, "loss": 0.3046, "step": 20515 }, { "epoch": 11.461452513966481, "grad_norm": 0.6356061697006226, "learning_rate": 0.00042845938375350143, "loss": 0.4375, "step": 20516 }, { "epoch": 11.462011173184358, "grad_norm": 0.5166113972663879, "learning_rate": 0.0004284313725490196, "loss": 0.4939, "step": 20517 }, { "epoch": 11.462569832402234, "grad_norm": 0.3722655177116394, "learning_rate": 0.00042840336134453784, "loss": 0.3356, "step": 20518 }, { "epoch": 11.463128491620111, "grad_norm": 0.5552321076393127, "learning_rate": 0.00042837535014005605, "loss": 0.417, "step": 20519 }, { "epoch": 11.46368715083799, "grad_norm": 0.8605543971061707, "learning_rate": 0.00042834733893557425, "loss": 0.4364, "step": 20520 }, { "epoch": 11.464245810055866, "grad_norm": 0.9286550283432007, "learning_rate": 0.00042831932773109246, "loss": 0.3419, "step": 20521 }, { "epoch": 11.464804469273743, "grad_norm": 2.1893012523651123, "learning_rate": 0.0004282913165266106, "loss": 0.4016, "step": 20522 }, { "epoch": 11.46536312849162, "grad_norm": 0.5527318120002747, "learning_rate": 0.00042826330532212887, "loss": 0.4268, "step": 20523 }, { "epoch": 11.465921787709497, "grad_norm": 1.7358183860778809, "learning_rate": 0.0004282352941176471, "loss": 0.4319, "step": 20524 }, { "epoch": 11.466480446927374, "grad_norm": 0.8993737101554871, "learning_rate": 0.0004282072829131653, "loss": 0.4777, "step": 20525 }, { "epoch": 11.46703910614525, "grad_norm": 0.5329033136367798, "learning_rate": 0.0004281792717086835, "loss": 0.4646, "step": 20526 }, { "epoch": 11.467597765363129, "grad_norm": 0.5158475041389465, "learning_rate": 0.0004281512605042017, "loss": 0.4838, "step": 20527 }, { "epoch": 11.468156424581005, "grad_norm": 2.5568575859069824, "learning_rate": 0.0004281232492997199, "loss": 0.5044, "step": 20528 }, { "epoch": 11.468715083798882, "grad_norm": 0.389017254114151, "learning_rate": 0.0004280952380952381, "loss": 0.3942, "step": 20529 }, { "epoch": 11.46927374301676, "grad_norm": 0.5271598100662231, "learning_rate": 0.0004280672268907563, "loss": 0.3744, "step": 20530 }, { "epoch": 11.469832402234637, "grad_norm": 0.8620637059211731, "learning_rate": 0.0004280392156862745, "loss": 0.4213, "step": 20531 }, { "epoch": 11.470391061452514, "grad_norm": 0.5783917903900146, "learning_rate": 0.0004280112044817927, "loss": 0.4436, "step": 20532 }, { "epoch": 11.470949720670392, "grad_norm": 0.5613778233528137, "learning_rate": 0.00042798319327731093, "loss": 0.4387, "step": 20533 }, { "epoch": 11.471508379888268, "grad_norm": 0.6934236288070679, "learning_rate": 0.00042795518207282914, "loss": 0.5286, "step": 20534 }, { "epoch": 11.472067039106145, "grad_norm": 0.502943217754364, "learning_rate": 0.0004279271708683474, "loss": 0.4772, "step": 20535 }, { "epoch": 11.472625698324022, "grad_norm": 0.4045088589191437, "learning_rate": 0.00042789915966386555, "loss": 0.41, "step": 20536 }, { "epoch": 11.4731843575419, "grad_norm": 0.675239622592926, "learning_rate": 0.00042787114845938375, "loss": 0.4631, "step": 20537 }, { "epoch": 11.473743016759776, "grad_norm": 0.9886194467544556, "learning_rate": 0.00042784313725490196, "loss": 0.3601, "step": 20538 }, { "epoch": 11.474301675977653, "grad_norm": 1.2970094680786133, "learning_rate": 0.00042781512605042017, "loss": 0.4427, "step": 20539 }, { "epoch": 11.474860335195531, "grad_norm": 0.7917665243148804, "learning_rate": 0.0004277871148459384, "loss": 0.5053, "step": 20540 }, { "epoch": 11.475418994413408, "grad_norm": 0.47673848271369934, "learning_rate": 0.0004277591036414566, "loss": 0.4823, "step": 20541 }, { "epoch": 11.475977653631285, "grad_norm": 0.9772157669067383, "learning_rate": 0.0004277310924369748, "loss": 0.4771, "step": 20542 }, { "epoch": 11.476536312849163, "grad_norm": 0.4587653577327728, "learning_rate": 0.00042770308123249304, "loss": 0.3909, "step": 20543 }, { "epoch": 11.47709497206704, "grad_norm": 1.3717995882034302, "learning_rate": 0.0004276750700280112, "loss": 0.4589, "step": 20544 }, { "epoch": 11.477653631284916, "grad_norm": 0.5064394474029541, "learning_rate": 0.00042764705882352946, "loss": 0.4088, "step": 20545 }, { "epoch": 11.478212290502793, "grad_norm": 0.451831191778183, "learning_rate": 0.0004276190476190476, "loss": 0.4928, "step": 20546 }, { "epoch": 11.478770949720671, "grad_norm": 0.49596506357192993, "learning_rate": 0.0004275910364145658, "loss": 0.4663, "step": 20547 }, { "epoch": 11.479329608938547, "grad_norm": 0.5681385397911072, "learning_rate": 0.0004275630252100841, "loss": 0.509, "step": 20548 }, { "epoch": 11.479888268156424, "grad_norm": 0.4054035544395447, "learning_rate": 0.0004275350140056022, "loss": 0.3317, "step": 20549 }, { "epoch": 11.480446927374302, "grad_norm": 0.9036941528320312, "learning_rate": 0.0004275070028011205, "loss": 0.4982, "step": 20550 }, { "epoch": 11.481005586592179, "grad_norm": 0.4798491895198822, "learning_rate": 0.0004274789915966387, "loss": 0.411, "step": 20551 }, { "epoch": 11.481564245810056, "grad_norm": 0.6063157916069031, "learning_rate": 0.00042745098039215684, "loss": 0.5997, "step": 20552 }, { "epoch": 11.482122905027932, "grad_norm": 0.5110703706741333, "learning_rate": 0.0004274229691876751, "loss": 0.4079, "step": 20553 }, { "epoch": 11.48268156424581, "grad_norm": 0.5609031319618225, "learning_rate": 0.00042739495798319326, "loss": 0.5443, "step": 20554 }, { "epoch": 11.483240223463687, "grad_norm": 0.5750454068183899, "learning_rate": 0.0004273669467787115, "loss": 0.4339, "step": 20555 }, { "epoch": 11.483798882681564, "grad_norm": 0.4941383898258209, "learning_rate": 0.0004273389355742297, "loss": 0.523, "step": 20556 }, { "epoch": 11.484357541899442, "grad_norm": 3.4129364490509033, "learning_rate": 0.0004273109243697479, "loss": 0.3486, "step": 20557 }, { "epoch": 11.484916201117318, "grad_norm": 0.5427618622779846, "learning_rate": 0.00042728291316526613, "loss": 0.4667, "step": 20558 }, { "epoch": 11.485474860335195, "grad_norm": 0.7671644687652588, "learning_rate": 0.00042725490196078434, "loss": 0.3694, "step": 20559 }, { "epoch": 11.486033519553073, "grad_norm": 0.6954537034034729, "learning_rate": 0.00042722689075630255, "loss": 0.4228, "step": 20560 }, { "epoch": 11.48659217877095, "grad_norm": 0.810337245464325, "learning_rate": 0.00042719887955182075, "loss": 0.3424, "step": 20561 }, { "epoch": 11.487150837988827, "grad_norm": 0.5496257543563843, "learning_rate": 0.0004271708683473389, "loss": 0.5107, "step": 20562 }, { "epoch": 11.487709497206703, "grad_norm": 2.4249541759490967, "learning_rate": 0.00042714285714285716, "loss": 0.3634, "step": 20563 }, { "epoch": 11.488268156424581, "grad_norm": 0.6596747040748596, "learning_rate": 0.00042711484593837537, "loss": 0.5431, "step": 20564 }, { "epoch": 11.488826815642458, "grad_norm": 6.44435977935791, "learning_rate": 0.0004270868347338936, "loss": 0.4203, "step": 20565 }, { "epoch": 11.489385474860335, "grad_norm": 0.5959506630897522, "learning_rate": 0.0004270588235294118, "loss": 0.493, "step": 20566 }, { "epoch": 11.489944134078213, "grad_norm": 0.5207118988037109, "learning_rate": 0.00042703081232493, "loss": 0.4061, "step": 20567 }, { "epoch": 11.49050279329609, "grad_norm": 1.3582121133804321, "learning_rate": 0.0004270028011204482, "loss": 0.4175, "step": 20568 }, { "epoch": 11.491061452513966, "grad_norm": 0.551295816898346, "learning_rate": 0.0004269747899159664, "loss": 0.4301, "step": 20569 }, { "epoch": 11.491620111731844, "grad_norm": 0.3976898193359375, "learning_rate": 0.0004269467787114846, "loss": 0.4097, "step": 20570 }, { "epoch": 11.492178770949721, "grad_norm": 0.6724882125854492, "learning_rate": 0.0004269187675070028, "loss": 0.4524, "step": 20571 }, { "epoch": 11.492737430167598, "grad_norm": 0.4446307122707367, "learning_rate": 0.000426890756302521, "loss": 0.3682, "step": 20572 }, { "epoch": 11.493296089385474, "grad_norm": 0.6518819332122803, "learning_rate": 0.0004268627450980392, "loss": 0.5342, "step": 20573 }, { "epoch": 11.493854748603352, "grad_norm": 0.38295605778694153, "learning_rate": 0.00042683473389355743, "loss": 0.4583, "step": 20574 }, { "epoch": 11.494413407821229, "grad_norm": 2.055450677871704, "learning_rate": 0.0004268067226890757, "loss": 0.412, "step": 20575 }, { "epoch": 11.494972067039106, "grad_norm": 0.41317152976989746, "learning_rate": 0.00042677871148459384, "loss": 0.4511, "step": 20576 }, { "epoch": 11.495530726256984, "grad_norm": 0.42795634269714355, "learning_rate": 0.00042675070028011205, "loss": 0.3204, "step": 20577 }, { "epoch": 11.49608938547486, "grad_norm": 4.33543586730957, "learning_rate": 0.00042672268907563025, "loss": 0.336, "step": 20578 }, { "epoch": 11.496648044692737, "grad_norm": 0.3578462600708008, "learning_rate": 0.00042669467787114846, "loss": 0.3054, "step": 20579 }, { "epoch": 11.497206703910614, "grad_norm": 2.628692150115967, "learning_rate": 0.0004266666666666667, "loss": 0.4077, "step": 20580 }, { "epoch": 11.497765363128492, "grad_norm": 0.5811374187469482, "learning_rate": 0.00042663865546218487, "loss": 0.3342, "step": 20581 }, { "epoch": 11.498324022346369, "grad_norm": 0.4065400958061218, "learning_rate": 0.0004266106442577031, "loss": 0.389, "step": 20582 }, { "epoch": 11.498882681564245, "grad_norm": 0.543093740940094, "learning_rate": 0.00042658263305322134, "loss": 0.4708, "step": 20583 }, { "epoch": 11.499441340782123, "grad_norm": 8.488954544067383, "learning_rate": 0.0004265546218487395, "loss": 0.368, "step": 20584 }, { "epoch": 11.5, "grad_norm": 0.43113794922828674, "learning_rate": 0.0004265266106442577, "loss": 0.4222, "step": 20585 }, { "epoch": 11.500558659217877, "grad_norm": 0.4199307858943939, "learning_rate": 0.0004264985994397759, "loss": 0.3772, "step": 20586 }, { "epoch": 11.501117318435755, "grad_norm": 1.419294834136963, "learning_rate": 0.0004264705882352941, "loss": 0.3452, "step": 20587 }, { "epoch": 11.501675977653631, "grad_norm": 0.38322269916534424, "learning_rate": 0.00042644257703081237, "loss": 0.4025, "step": 20588 }, { "epoch": 11.502234636871508, "grad_norm": 0.4934639632701874, "learning_rate": 0.0004264145658263305, "loss": 0.3242, "step": 20589 }, { "epoch": 11.502793296089386, "grad_norm": 0.41777971386909485, "learning_rate": 0.0004263865546218487, "loss": 0.4451, "step": 20590 }, { "epoch": 11.503351955307263, "grad_norm": 0.32535412907600403, "learning_rate": 0.000426358543417367, "loss": 0.345, "step": 20591 }, { "epoch": 11.50391061452514, "grad_norm": 0.4173828661441803, "learning_rate": 0.00042633053221288514, "loss": 0.3212, "step": 20592 }, { "epoch": 11.504469273743016, "grad_norm": 0.5191079378128052, "learning_rate": 0.0004263025210084034, "loss": 0.5303, "step": 20593 }, { "epoch": 11.505027932960894, "grad_norm": 0.4365408718585968, "learning_rate": 0.00042627450980392155, "loss": 0.2847, "step": 20594 }, { "epoch": 11.505586592178771, "grad_norm": 0.6085261702537537, "learning_rate": 0.00042624649859943976, "loss": 0.406, "step": 20595 }, { "epoch": 11.506145251396648, "grad_norm": 0.9300827980041504, "learning_rate": 0.000426218487394958, "loss": 0.3382, "step": 20596 }, { "epoch": 11.506703910614526, "grad_norm": 0.6728081703186035, "learning_rate": 0.00042619047619047617, "loss": 0.4516, "step": 20597 }, { "epoch": 11.507262569832402, "grad_norm": 0.8834640979766846, "learning_rate": 0.00042616246498599443, "loss": 0.3371, "step": 20598 }, { "epoch": 11.507821229050279, "grad_norm": 0.40679025650024414, "learning_rate": 0.00042613445378151263, "loss": 0.2974, "step": 20599 }, { "epoch": 11.508379888268156, "grad_norm": 1.0970028638839722, "learning_rate": 0.0004261064425770308, "loss": 0.4262, "step": 20600 }, { "epoch": 11.508938547486034, "grad_norm": 0.42507368326187134, "learning_rate": 0.00042607843137254905, "loss": 0.4422, "step": 20601 }, { "epoch": 11.50949720670391, "grad_norm": 0.7584292888641357, "learning_rate": 0.0004260504201680672, "loss": 0.4584, "step": 20602 }, { "epoch": 11.510055865921787, "grad_norm": 2.2101552486419678, "learning_rate": 0.00042602240896358546, "loss": 0.448, "step": 20603 }, { "epoch": 11.510614525139665, "grad_norm": 0.7655834555625916, "learning_rate": 0.00042599439775910366, "loss": 0.5018, "step": 20604 }, { "epoch": 11.511173184357542, "grad_norm": 0.8183310627937317, "learning_rate": 0.0004259663865546218, "loss": 0.5154, "step": 20605 }, { "epoch": 11.511731843575419, "grad_norm": 0.45593780279159546, "learning_rate": 0.0004259383753501401, "loss": 0.3614, "step": 20606 }, { "epoch": 11.512290502793297, "grad_norm": 0.4433230459690094, "learning_rate": 0.0004259103641456583, "loss": 0.2986, "step": 20607 }, { "epoch": 11.512849162011173, "grad_norm": 0.4816119968891144, "learning_rate": 0.0004258823529411765, "loss": 0.4038, "step": 20608 }, { "epoch": 11.51340782122905, "grad_norm": 0.4995867609977722, "learning_rate": 0.0004258543417366947, "loss": 0.4413, "step": 20609 }, { "epoch": 11.513966480446927, "grad_norm": 0.39168286323547363, "learning_rate": 0.00042582633053221285, "loss": 0.4418, "step": 20610 }, { "epoch": 11.514525139664805, "grad_norm": 0.34210118651390076, "learning_rate": 0.0004257983193277311, "loss": 0.3392, "step": 20611 }, { "epoch": 11.515083798882682, "grad_norm": 0.39874303340911865, "learning_rate": 0.0004257703081232493, "loss": 0.3339, "step": 20612 }, { "epoch": 11.515642458100558, "grad_norm": 0.34214985370635986, "learning_rate": 0.0004257422969187675, "loss": 0.4134, "step": 20613 }, { "epoch": 11.516201117318436, "grad_norm": 0.40317386388778687, "learning_rate": 0.0004257142857142857, "loss": 0.3663, "step": 20614 }, { "epoch": 11.516759776536313, "grad_norm": 0.4051573872566223, "learning_rate": 0.00042568627450980393, "loss": 0.3973, "step": 20615 }, { "epoch": 11.51731843575419, "grad_norm": 0.6257272362709045, "learning_rate": 0.00042565826330532214, "loss": 0.3988, "step": 20616 }, { "epoch": 11.517877094972068, "grad_norm": 0.5876336693763733, "learning_rate": 0.00042563025210084034, "loss": 0.4521, "step": 20617 }, { "epoch": 11.518435754189944, "grad_norm": 0.3877696096897125, "learning_rate": 0.00042560224089635855, "loss": 0.3665, "step": 20618 }, { "epoch": 11.518994413407821, "grad_norm": 0.803636372089386, "learning_rate": 0.00042557422969187675, "loss": 0.4727, "step": 20619 }, { "epoch": 11.519553072625698, "grad_norm": 0.5632331967353821, "learning_rate": 0.00042554621848739496, "loss": 0.3677, "step": 20620 }, { "epoch": 11.520111731843576, "grad_norm": 0.6445800065994263, "learning_rate": 0.00042551820728291317, "loss": 0.4206, "step": 20621 }, { "epoch": 11.520670391061453, "grad_norm": 0.7842410802841187, "learning_rate": 0.00042549019607843137, "loss": 0.3914, "step": 20622 }, { "epoch": 11.521229050279329, "grad_norm": 0.3753604292869568, "learning_rate": 0.00042546218487394963, "loss": 0.349, "step": 20623 }, { "epoch": 11.521787709497207, "grad_norm": 0.5364634990692139, "learning_rate": 0.0004254341736694678, "loss": 0.4359, "step": 20624 }, { "epoch": 11.522346368715084, "grad_norm": 0.41717830300331116, "learning_rate": 0.000425406162464986, "loss": 0.4191, "step": 20625 }, { "epoch": 11.52290502793296, "grad_norm": 0.4487048387527466, "learning_rate": 0.0004253781512605042, "loss": 0.4097, "step": 20626 }, { "epoch": 11.523463687150837, "grad_norm": 0.5949258208274841, "learning_rate": 0.0004253501400560224, "loss": 0.4337, "step": 20627 }, { "epoch": 11.524022346368715, "grad_norm": 0.49007198214530945, "learning_rate": 0.00042532212885154066, "loss": 0.5302, "step": 20628 }, { "epoch": 11.524581005586592, "grad_norm": 0.5633420944213867, "learning_rate": 0.0004252941176470588, "loss": 0.3146, "step": 20629 }, { "epoch": 11.525139664804469, "grad_norm": 0.5080938339233398, "learning_rate": 0.000425266106442577, "loss": 0.3896, "step": 20630 }, { "epoch": 11.525698324022347, "grad_norm": 0.5473659038543701, "learning_rate": 0.0004252380952380953, "loss": 0.465, "step": 20631 }, { "epoch": 11.526256983240224, "grad_norm": 0.43636035919189453, "learning_rate": 0.00042521008403361343, "loss": 0.4344, "step": 20632 }, { "epoch": 11.5268156424581, "grad_norm": 0.46388959884643555, "learning_rate": 0.0004251820728291317, "loss": 0.414, "step": 20633 }, { "epoch": 11.527374301675978, "grad_norm": 0.41416123509407043, "learning_rate": 0.00042515406162464984, "loss": 0.3742, "step": 20634 }, { "epoch": 11.527932960893855, "grad_norm": 0.5078537464141846, "learning_rate": 0.00042512605042016805, "loss": 0.4377, "step": 20635 }, { "epoch": 11.528491620111732, "grad_norm": 0.4472281038761139, "learning_rate": 0.0004250980392156863, "loss": 0.3161, "step": 20636 }, { "epoch": 11.529050279329608, "grad_norm": 0.4106791317462921, "learning_rate": 0.00042507002801120446, "loss": 0.4361, "step": 20637 }, { "epoch": 11.529608938547486, "grad_norm": 0.48135775327682495, "learning_rate": 0.0004250420168067227, "loss": 0.4131, "step": 20638 }, { "epoch": 11.530167597765363, "grad_norm": 0.4622468054294586, "learning_rate": 0.00042501400560224093, "loss": 0.3848, "step": 20639 }, { "epoch": 11.53072625698324, "grad_norm": 0.3971022069454193, "learning_rate": 0.0004249859943977591, "loss": 0.3508, "step": 20640 }, { "epoch": 11.531284916201118, "grad_norm": 0.5203205347061157, "learning_rate": 0.00042495798319327734, "loss": 0.389, "step": 20641 }, { "epoch": 11.531843575418995, "grad_norm": 0.38899949193000793, "learning_rate": 0.0004249299719887955, "loss": 0.3718, "step": 20642 }, { "epoch": 11.532402234636871, "grad_norm": 0.40717244148254395, "learning_rate": 0.00042490196078431375, "loss": 0.3621, "step": 20643 }, { "epoch": 11.53296089385475, "grad_norm": 0.7253324389457703, "learning_rate": 0.00042487394957983196, "loss": 0.3396, "step": 20644 }, { "epoch": 11.533519553072626, "grad_norm": 0.4019280970096588, "learning_rate": 0.0004248459383753501, "loss": 0.3684, "step": 20645 }, { "epoch": 11.534078212290503, "grad_norm": 0.41470420360565186, "learning_rate": 0.00042481792717086837, "loss": 0.3996, "step": 20646 }, { "epoch": 11.53463687150838, "grad_norm": 0.9583471417427063, "learning_rate": 0.0004247899159663866, "loss": 0.596, "step": 20647 }, { "epoch": 11.535195530726257, "grad_norm": 0.8679529428482056, "learning_rate": 0.0004247619047619048, "loss": 0.4855, "step": 20648 }, { "epoch": 11.535754189944134, "grad_norm": 5.495964527130127, "learning_rate": 0.000424733893557423, "loss": 0.3894, "step": 20649 }, { "epoch": 11.53631284916201, "grad_norm": 0.39126309752464294, "learning_rate": 0.00042470588235294114, "loss": 0.3242, "step": 20650 }, { "epoch": 11.536871508379889, "grad_norm": 0.5814370512962341, "learning_rate": 0.0004246778711484594, "loss": 0.4276, "step": 20651 }, { "epoch": 11.537430167597766, "grad_norm": 0.5931792855262756, "learning_rate": 0.0004246498599439776, "loss": 0.4052, "step": 20652 }, { "epoch": 11.537988826815642, "grad_norm": 0.7188152074813843, "learning_rate": 0.0004246218487394958, "loss": 0.3873, "step": 20653 }, { "epoch": 11.538547486033519, "grad_norm": 0.28363490104675293, "learning_rate": 0.000424593837535014, "loss": 0.3008, "step": 20654 }, { "epoch": 11.539106145251397, "grad_norm": 0.5203449130058289, "learning_rate": 0.0004245658263305322, "loss": 0.4843, "step": 20655 }, { "epoch": 11.539664804469274, "grad_norm": 0.43934735655784607, "learning_rate": 0.00042453781512605043, "loss": 0.4092, "step": 20656 }, { "epoch": 11.54022346368715, "grad_norm": 0.48860037326812744, "learning_rate": 0.00042450980392156864, "loss": 0.4898, "step": 20657 }, { "epoch": 11.540782122905028, "grad_norm": 0.4542331397533417, "learning_rate": 0.00042448179271708684, "loss": 0.4047, "step": 20658 }, { "epoch": 11.541340782122905, "grad_norm": 0.4746894836425781, "learning_rate": 0.00042445378151260505, "loss": 0.5379, "step": 20659 }, { "epoch": 11.541899441340782, "grad_norm": Infinity, "learning_rate": 0.00042445378151260505, "loss": 0.547, "step": 20660 }, { "epoch": 11.54245810055866, "grad_norm": 0.4919686019420624, "learning_rate": 0.00042442577030812325, "loss": 0.5296, "step": 20661 }, { "epoch": 11.543016759776537, "grad_norm": 0.30281633138656616, "learning_rate": 0.00042439775910364146, "loss": 0.3203, "step": 20662 }, { "epoch": 11.543575418994413, "grad_norm": 0.9004039168357849, "learning_rate": 0.00042436974789915967, "loss": 0.4759, "step": 20663 }, { "epoch": 11.544134078212291, "grad_norm": 0.6114302277565002, "learning_rate": 0.0004243417366946779, "loss": 0.5435, "step": 20664 }, { "epoch": 11.544692737430168, "grad_norm": 0.4828857183456421, "learning_rate": 0.0004243137254901961, "loss": 0.4714, "step": 20665 }, { "epoch": 11.545251396648045, "grad_norm": 0.6116296052932739, "learning_rate": 0.0004242857142857143, "loss": 0.3286, "step": 20666 }, { "epoch": 11.545810055865921, "grad_norm": 0.8764639496803284, "learning_rate": 0.0004242577030812325, "loss": 0.4248, "step": 20667 }, { "epoch": 11.5463687150838, "grad_norm": 0.55197674036026, "learning_rate": 0.0004242296918767507, "loss": 0.5273, "step": 20668 }, { "epoch": 11.546927374301676, "grad_norm": 0.8051908612251282, "learning_rate": 0.00042420168067226896, "loss": 0.4746, "step": 20669 }, { "epoch": 11.547486033519553, "grad_norm": 0.625347375869751, "learning_rate": 0.0004241736694677871, "loss": 0.4496, "step": 20670 }, { "epoch": 11.548044692737431, "grad_norm": 0.7284619808197021, "learning_rate": 0.0004241456582633053, "loss": 0.6122, "step": 20671 }, { "epoch": 11.548603351955308, "grad_norm": 0.6920905709266663, "learning_rate": 0.0004241176470588236, "loss": 0.4777, "step": 20672 }, { "epoch": 11.549162011173184, "grad_norm": 0.565899670124054, "learning_rate": 0.0004240896358543417, "loss": 0.3905, "step": 20673 }, { "epoch": 11.54972067039106, "grad_norm": 0.4656364619731903, "learning_rate": 0.00042406162464986, "loss": 0.4232, "step": 20674 }, { "epoch": 11.550279329608939, "grad_norm": 0.4026971161365509, "learning_rate": 0.00042403361344537814, "loss": 0.385, "step": 20675 }, { "epoch": 11.550837988826816, "grad_norm": 0.4009784460067749, "learning_rate": 0.00042400560224089634, "loss": 0.3341, "step": 20676 }, { "epoch": 11.551396648044692, "grad_norm": 1.2701104879379272, "learning_rate": 0.0004239775910364146, "loss": 0.3925, "step": 20677 }, { "epoch": 11.55195530726257, "grad_norm": 0.5902116298675537, "learning_rate": 0.00042394957983193276, "loss": 0.4186, "step": 20678 }, { "epoch": 11.552513966480447, "grad_norm": 1.1804386377334595, "learning_rate": 0.000423921568627451, "loss": 0.3698, "step": 20679 }, { "epoch": 11.553072625698324, "grad_norm": 1.080917239189148, "learning_rate": 0.0004238935574229692, "loss": 0.5306, "step": 20680 }, { "epoch": 11.553631284916202, "grad_norm": 0.5245029926300049, "learning_rate": 0.0004238655462184874, "loss": 0.5989, "step": 20681 }, { "epoch": 11.554189944134079, "grad_norm": 0.5329370498657227, "learning_rate": 0.00042383753501400563, "loss": 0.3679, "step": 20682 }, { "epoch": 11.554748603351955, "grad_norm": 1.1451594829559326, "learning_rate": 0.0004238095238095238, "loss": 0.4497, "step": 20683 }, { "epoch": 11.555307262569832, "grad_norm": 0.4891843795776367, "learning_rate": 0.00042378151260504205, "loss": 0.3997, "step": 20684 }, { "epoch": 11.55586592178771, "grad_norm": 0.42326244711875916, "learning_rate": 0.00042375350140056025, "loss": 0.4702, "step": 20685 }, { "epoch": 11.556424581005587, "grad_norm": 0.688256025314331, "learning_rate": 0.0004237254901960784, "loss": 0.402, "step": 20686 }, { "epoch": 11.556983240223463, "grad_norm": 0.4372667074203491, "learning_rate": 0.00042369747899159666, "loss": 0.5072, "step": 20687 }, { "epoch": 11.557541899441341, "grad_norm": 0.7290706038475037, "learning_rate": 0.00042366946778711487, "loss": 0.5547, "step": 20688 }, { "epoch": 11.558100558659218, "grad_norm": 0.5263441205024719, "learning_rate": 0.0004236414565826331, "loss": 0.4542, "step": 20689 }, { "epoch": 11.558659217877095, "grad_norm": 0.573900043964386, "learning_rate": 0.0004236134453781513, "loss": 0.4285, "step": 20690 }, { "epoch": 11.559217877094973, "grad_norm": 0.5307414531707764, "learning_rate": 0.00042358543417366943, "loss": 0.4331, "step": 20691 }, { "epoch": 11.55977653631285, "grad_norm": 0.41378021240234375, "learning_rate": 0.0004235574229691877, "loss": 0.48, "step": 20692 }, { "epoch": 11.560335195530726, "grad_norm": 0.5707724690437317, "learning_rate": 0.0004235294117647059, "loss": 0.2765, "step": 20693 }, { "epoch": 11.560893854748603, "grad_norm": 0.7068626880645752, "learning_rate": 0.0004235014005602241, "loss": 0.4568, "step": 20694 }, { "epoch": 11.561452513966481, "grad_norm": 0.60820472240448, "learning_rate": 0.0004234733893557423, "loss": 0.3903, "step": 20695 }, { "epoch": 11.562011173184358, "grad_norm": 0.622393012046814, "learning_rate": 0.0004234453781512605, "loss": 0.3972, "step": 20696 }, { "epoch": 11.562569832402234, "grad_norm": 0.593914270401001, "learning_rate": 0.0004234173669467787, "loss": 0.5067, "step": 20697 }, { "epoch": 11.563128491620112, "grad_norm": 0.39853671193122864, "learning_rate": 0.00042338935574229693, "loss": 0.4047, "step": 20698 }, { "epoch": 11.563687150837989, "grad_norm": 0.5406050682067871, "learning_rate": 0.0004233613445378151, "loss": 0.4034, "step": 20699 }, { "epoch": 11.564245810055866, "grad_norm": 0.503292441368103, "learning_rate": 0.00042333333333333334, "loss": 0.3445, "step": 20700 }, { "epoch": 11.564804469273742, "grad_norm": 0.41019079089164734, "learning_rate": 0.00042330532212885155, "loss": 0.4172, "step": 20701 }, { "epoch": 11.56536312849162, "grad_norm": 0.4900561571121216, "learning_rate": 0.00042327731092436975, "loss": 0.6158, "step": 20702 }, { "epoch": 11.565921787709497, "grad_norm": 5.624237537384033, "learning_rate": 0.00042324929971988796, "loss": 0.4684, "step": 20703 }, { "epoch": 11.566480446927374, "grad_norm": 0.7807705998420715, "learning_rate": 0.00042322128851540617, "loss": 0.4705, "step": 20704 }, { "epoch": 11.567039106145252, "grad_norm": 0.4776996970176697, "learning_rate": 0.00042319327731092437, "loss": 0.2959, "step": 20705 }, { "epoch": 11.567597765363129, "grad_norm": 0.679042398929596, "learning_rate": 0.0004231652661064426, "loss": 0.4381, "step": 20706 }, { "epoch": 11.568156424581005, "grad_norm": 0.44969356060028076, "learning_rate": 0.00042313725490196084, "loss": 0.4179, "step": 20707 }, { "epoch": 11.568715083798883, "grad_norm": 0.48446929454803467, "learning_rate": 0.000423109243697479, "loss": 0.403, "step": 20708 }, { "epoch": 11.56927374301676, "grad_norm": 0.36746180057525635, "learning_rate": 0.0004230812324929972, "loss": 0.3835, "step": 20709 }, { "epoch": 11.569832402234637, "grad_norm": 0.34233328700065613, "learning_rate": 0.0004230532212885154, "loss": 0.3753, "step": 20710 }, { "epoch": 11.570391061452513, "grad_norm": 0.7545540928840637, "learning_rate": 0.0004230252100840336, "loss": 0.3771, "step": 20711 }, { "epoch": 11.570949720670392, "grad_norm": 0.7465047240257263, "learning_rate": 0.00042299719887955187, "loss": 0.424, "step": 20712 }, { "epoch": 11.571508379888268, "grad_norm": 2.300227642059326, "learning_rate": 0.00042296918767507, "loss": 0.3609, "step": 20713 }, { "epoch": 11.572067039106145, "grad_norm": 0.4362581670284271, "learning_rate": 0.0004229411764705882, "loss": 0.4067, "step": 20714 }, { "epoch": 11.572625698324023, "grad_norm": 0.38785016536712646, "learning_rate": 0.0004229131652661065, "loss": 0.3483, "step": 20715 }, { "epoch": 11.5731843575419, "grad_norm": 0.9845558404922485, "learning_rate": 0.00042288515406162464, "loss": 0.3439, "step": 20716 }, { "epoch": 11.573743016759776, "grad_norm": 0.3465559184551239, "learning_rate": 0.0004228571428571429, "loss": 0.3822, "step": 20717 }, { "epoch": 11.574301675977654, "grad_norm": 1.5967515707015991, "learning_rate": 0.00042282913165266105, "loss": 0.5245, "step": 20718 }, { "epoch": 11.574860335195531, "grad_norm": 0.6658174991607666, "learning_rate": 0.00042280112044817926, "loss": 0.431, "step": 20719 }, { "epoch": 11.575418994413408, "grad_norm": 1.569270133972168, "learning_rate": 0.0004227731092436975, "loss": 0.4943, "step": 20720 }, { "epoch": 11.575977653631284, "grad_norm": 0.5644358396530151, "learning_rate": 0.00042274509803921567, "loss": 0.454, "step": 20721 }, { "epoch": 11.576536312849163, "grad_norm": 2.7591288089752197, "learning_rate": 0.00042271708683473393, "loss": 0.4999, "step": 20722 }, { "epoch": 11.577094972067039, "grad_norm": 1.3475178480148315, "learning_rate": 0.00042268907563025213, "loss": 0.5448, "step": 20723 }, { "epoch": 11.577653631284916, "grad_norm": 0.6630269289016724, "learning_rate": 0.0004226610644257703, "loss": 0.4617, "step": 20724 }, { "epoch": 11.578212290502794, "grad_norm": 6.117388725280762, "learning_rate": 0.00042263305322128855, "loss": 0.5274, "step": 20725 }, { "epoch": 11.57877094972067, "grad_norm": 0.6152281761169434, "learning_rate": 0.0004226050420168067, "loss": 0.5729, "step": 20726 }, { "epoch": 11.579329608938547, "grad_norm": 0.48115888237953186, "learning_rate": 0.00042257703081232496, "loss": 0.531, "step": 20727 }, { "epoch": 11.579888268156424, "grad_norm": 0.46707797050476074, "learning_rate": 0.00042254901960784316, "loss": 0.4932, "step": 20728 }, { "epoch": 11.580446927374302, "grad_norm": 0.41524726152420044, "learning_rate": 0.0004225210084033613, "loss": 0.3432, "step": 20729 }, { "epoch": 11.581005586592179, "grad_norm": 0.4186629354953766, "learning_rate": 0.0004224929971988796, "loss": 0.4565, "step": 20730 }, { "epoch": 11.581564245810055, "grad_norm": 0.39581429958343506, "learning_rate": 0.0004224649859943978, "loss": 0.3454, "step": 20731 }, { "epoch": 11.582122905027934, "grad_norm": 0.4746589958667755, "learning_rate": 0.000422436974789916, "loss": 0.3529, "step": 20732 }, { "epoch": 11.58268156424581, "grad_norm": 0.7876299619674683, "learning_rate": 0.0004224089635854342, "loss": 0.4922, "step": 20733 }, { "epoch": 11.583240223463687, "grad_norm": 0.5797983407974243, "learning_rate": 0.00042238095238095235, "loss": 0.3779, "step": 20734 }, { "epoch": 11.583798882681565, "grad_norm": 0.5217782258987427, "learning_rate": 0.0004223529411764706, "loss": 0.4248, "step": 20735 }, { "epoch": 11.584357541899442, "grad_norm": 0.637617290019989, "learning_rate": 0.0004223249299719888, "loss": 0.4861, "step": 20736 }, { "epoch": 11.584916201117318, "grad_norm": 0.5186054706573486, "learning_rate": 0.000422296918767507, "loss": 0.4451, "step": 20737 }, { "epoch": 11.585474860335196, "grad_norm": 4.0225324630737305, "learning_rate": 0.0004222689075630252, "loss": 0.4671, "step": 20738 }, { "epoch": 11.586033519553073, "grad_norm": 1.0567153692245483, "learning_rate": 0.00042224089635854343, "loss": 0.4672, "step": 20739 }, { "epoch": 11.58659217877095, "grad_norm": 0.6557429432868958, "learning_rate": 0.00042221288515406164, "loss": 0.3473, "step": 20740 }, { "epoch": 11.587150837988826, "grad_norm": 1.4269450902938843, "learning_rate": 0.00042218487394957984, "loss": 0.4872, "step": 20741 }, { "epoch": 11.587709497206705, "grad_norm": 0.6591324210166931, "learning_rate": 0.00042215686274509805, "loss": 0.4492, "step": 20742 }, { "epoch": 11.588268156424581, "grad_norm": 0.4599229395389557, "learning_rate": 0.00042212885154061625, "loss": 0.4154, "step": 20743 }, { "epoch": 11.588826815642458, "grad_norm": 0.5963237285614014, "learning_rate": 0.00042210084033613446, "loss": 0.4679, "step": 20744 }, { "epoch": 11.589385474860336, "grad_norm": 0.44597873091697693, "learning_rate": 0.00042207282913165267, "loss": 0.4093, "step": 20745 }, { "epoch": 11.589944134078213, "grad_norm": 0.6215892434120178, "learning_rate": 0.00042204481792717087, "loss": 0.3445, "step": 20746 }, { "epoch": 11.59050279329609, "grad_norm": 0.44218289852142334, "learning_rate": 0.00042201680672268913, "loss": 0.3988, "step": 20747 }, { "epoch": 11.591061452513966, "grad_norm": 0.30041131377220154, "learning_rate": 0.0004219887955182073, "loss": 0.3357, "step": 20748 }, { "epoch": 11.591620111731844, "grad_norm": 0.5011849999427795, "learning_rate": 0.0004219607843137255, "loss": 0.3558, "step": 20749 }, { "epoch": 11.59217877094972, "grad_norm": 0.5519015789031982, "learning_rate": 0.0004219327731092437, "loss": 0.4369, "step": 20750 }, { "epoch": 11.592737430167597, "grad_norm": 0.4621015787124634, "learning_rate": 0.0004219047619047619, "loss": 0.3814, "step": 20751 }, { "epoch": 11.593296089385476, "grad_norm": 0.4769175350666046, "learning_rate": 0.00042187675070028016, "loss": 0.3719, "step": 20752 }, { "epoch": 11.593854748603352, "grad_norm": 0.9842724204063416, "learning_rate": 0.0004218487394957983, "loss": 0.4707, "step": 20753 }, { "epoch": 11.594413407821229, "grad_norm": 0.5011136531829834, "learning_rate": 0.0004218207282913165, "loss": 0.3874, "step": 20754 }, { "epoch": 11.594972067039105, "grad_norm": 0.5640696287155151, "learning_rate": 0.0004217927170868348, "loss": 0.4147, "step": 20755 }, { "epoch": 11.595530726256984, "grad_norm": 0.6944668292999268, "learning_rate": 0.00042176470588235293, "loss": 0.4971, "step": 20756 }, { "epoch": 11.59608938547486, "grad_norm": 3.0826215744018555, "learning_rate": 0.0004217366946778712, "loss": 0.4951, "step": 20757 }, { "epoch": 11.596648044692737, "grad_norm": 0.6589071154594421, "learning_rate": 0.00042170868347338934, "loss": 0.6799, "step": 20758 }, { "epoch": 11.597206703910615, "grad_norm": 0.49689385294914246, "learning_rate": 0.00042168067226890755, "loss": 0.665, "step": 20759 }, { "epoch": 11.597765363128492, "grad_norm": 2.7962138652801514, "learning_rate": 0.0004216526610644258, "loss": 0.378, "step": 20760 }, { "epoch": 11.598324022346368, "grad_norm": 1.0508677959442139, "learning_rate": 0.00042162464985994396, "loss": 0.5141, "step": 20761 }, { "epoch": 11.598882681564247, "grad_norm": 0.9208343029022217, "learning_rate": 0.0004215966386554622, "loss": 0.403, "step": 20762 }, { "epoch": 11.599441340782123, "grad_norm": 0.4969636797904968, "learning_rate": 0.00042156862745098043, "loss": 0.4329, "step": 20763 }, { "epoch": 11.6, "grad_norm": 0.597708523273468, "learning_rate": 0.0004215406162464986, "loss": 0.3487, "step": 20764 }, { "epoch": 11.600558659217878, "grad_norm": 3.5610849857330322, "learning_rate": 0.00042151260504201684, "loss": 0.4488, "step": 20765 }, { "epoch": 11.601117318435755, "grad_norm": 0.4976906478404999, "learning_rate": 0.000421484593837535, "loss": 0.4382, "step": 20766 }, { "epoch": 11.601675977653631, "grad_norm": 1.0895826816558838, "learning_rate": 0.00042145658263305325, "loss": 0.4961, "step": 20767 }, { "epoch": 11.602234636871508, "grad_norm": 0.38939765095710754, "learning_rate": 0.00042142857142857146, "loss": 0.4, "step": 20768 }, { "epoch": 11.602793296089386, "grad_norm": 0.8267750144004822, "learning_rate": 0.0004214005602240896, "loss": 0.5018, "step": 20769 }, { "epoch": 11.603351955307263, "grad_norm": 0.5503513216972351, "learning_rate": 0.00042137254901960787, "loss": 0.464, "step": 20770 }, { "epoch": 11.60391061452514, "grad_norm": 0.45620280504226685, "learning_rate": 0.0004213445378151261, "loss": 0.4632, "step": 20771 }, { "epoch": 11.604469273743018, "grad_norm": 0.4537382125854492, "learning_rate": 0.0004213165266106443, "loss": 0.312, "step": 20772 }, { "epoch": 11.605027932960894, "grad_norm": 0.5610388517379761, "learning_rate": 0.0004212885154061625, "loss": 0.4445, "step": 20773 }, { "epoch": 11.60558659217877, "grad_norm": 0.7527524828910828, "learning_rate": 0.00042126050420168064, "loss": 0.3672, "step": 20774 }, { "epoch": 11.606145251396647, "grad_norm": 2.493600845336914, "learning_rate": 0.0004212324929971989, "loss": 0.4032, "step": 20775 }, { "epoch": 11.606703910614526, "grad_norm": 0.4892067313194275, "learning_rate": 0.0004212044817927171, "loss": 0.2884, "step": 20776 }, { "epoch": 11.607262569832402, "grad_norm": 0.8390831351280212, "learning_rate": 0.0004211764705882353, "loss": 0.6152, "step": 20777 }, { "epoch": 11.607821229050279, "grad_norm": 0.4732486307621002, "learning_rate": 0.0004211484593837535, "loss": 0.4295, "step": 20778 }, { "epoch": 11.608379888268157, "grad_norm": 0.8024941086769104, "learning_rate": 0.0004211204481792717, "loss": 0.4585, "step": 20779 }, { "epoch": 11.608938547486034, "grad_norm": 0.5505977869033813, "learning_rate": 0.00042109243697478993, "loss": 0.3713, "step": 20780 }, { "epoch": 11.60949720670391, "grad_norm": 0.7352420687675476, "learning_rate": 0.00042106442577030814, "loss": 0.5876, "step": 20781 }, { "epoch": 11.610055865921789, "grad_norm": 0.5054692029953003, "learning_rate": 0.00042103641456582634, "loss": 0.4342, "step": 20782 }, { "epoch": 11.610614525139665, "grad_norm": 4.087777614593506, "learning_rate": 0.00042100840336134455, "loss": 0.5471, "step": 20783 }, { "epoch": 11.611173184357542, "grad_norm": 0.6946427226066589, "learning_rate": 0.00042098039215686275, "loss": 0.4172, "step": 20784 }, { "epoch": 11.611731843575418, "grad_norm": 0.4584228992462158, "learning_rate": 0.00042095238095238096, "loss": 0.369, "step": 20785 }, { "epoch": 11.612290502793297, "grad_norm": 0.6100547909736633, "learning_rate": 0.00042092436974789917, "loss": 0.4526, "step": 20786 }, { "epoch": 11.612849162011173, "grad_norm": 1.6176875829696655, "learning_rate": 0.0004208963585434174, "loss": 0.4305, "step": 20787 }, { "epoch": 11.61340782122905, "grad_norm": 0.5075684189796448, "learning_rate": 0.0004208683473389356, "loss": 0.3849, "step": 20788 }, { "epoch": 11.613966480446928, "grad_norm": 0.5725739002227783, "learning_rate": 0.0004208403361344538, "loss": 0.3565, "step": 20789 }, { "epoch": 11.614525139664805, "grad_norm": 0.5863872170448303, "learning_rate": 0.000420812324929972, "loss": 0.5095, "step": 20790 }, { "epoch": 11.615083798882681, "grad_norm": 0.8431749939918518, "learning_rate": 0.0004207843137254902, "loss": 0.3658, "step": 20791 }, { "epoch": 11.61564245810056, "grad_norm": 0.5147542357444763, "learning_rate": 0.00042075630252100846, "loss": 0.5801, "step": 20792 }, { "epoch": 11.616201117318436, "grad_norm": 0.5380616188049316, "learning_rate": 0.0004207282913165266, "loss": 0.6028, "step": 20793 }, { "epoch": 11.616759776536313, "grad_norm": 0.7210173010826111, "learning_rate": 0.0004207002801120448, "loss": 0.5253, "step": 20794 }, { "epoch": 11.61731843575419, "grad_norm": 0.34263548254966736, "learning_rate": 0.0004206722689075631, "loss": 0.4054, "step": 20795 }, { "epoch": 11.617877094972068, "grad_norm": 0.4094128906726837, "learning_rate": 0.0004206442577030812, "loss": 0.3966, "step": 20796 }, { "epoch": 11.618435754189944, "grad_norm": 0.4657425284385681, "learning_rate": 0.0004206162464985995, "loss": 0.4782, "step": 20797 }, { "epoch": 11.61899441340782, "grad_norm": 0.5022130012512207, "learning_rate": 0.00042058823529411764, "loss": 0.4481, "step": 20798 }, { "epoch": 11.619553072625699, "grad_norm": 0.5468015670776367, "learning_rate": 0.00042056022408963584, "loss": 0.4901, "step": 20799 }, { "epoch": 11.620111731843576, "grad_norm": 0.5638743042945862, "learning_rate": 0.0004205322128851541, "loss": 0.5493, "step": 20800 }, { "epoch": 11.620670391061452, "grad_norm": 0.5176316499710083, "learning_rate": 0.00042050420168067226, "loss": 0.4455, "step": 20801 }, { "epoch": 11.621229050279329, "grad_norm": 0.4255404770374298, "learning_rate": 0.0004204761904761905, "loss": 0.4306, "step": 20802 }, { "epoch": 11.621787709497207, "grad_norm": 0.39125552773475647, "learning_rate": 0.0004204481792717087, "loss": 0.4491, "step": 20803 }, { "epoch": 11.622346368715084, "grad_norm": 0.46689918637275696, "learning_rate": 0.0004204201680672269, "loss": 0.4205, "step": 20804 }, { "epoch": 11.62290502793296, "grad_norm": 0.5403127074241638, "learning_rate": 0.00042039215686274513, "loss": 0.4873, "step": 20805 }, { "epoch": 11.623463687150839, "grad_norm": 0.3441616892814636, "learning_rate": 0.0004203641456582633, "loss": 0.3205, "step": 20806 }, { "epoch": 11.624022346368715, "grad_norm": 0.7106980681419373, "learning_rate": 0.00042033613445378155, "loss": 0.4552, "step": 20807 }, { "epoch": 11.624581005586592, "grad_norm": 1.0218454599380493, "learning_rate": 0.00042030812324929975, "loss": 0.4789, "step": 20808 }, { "epoch": 11.62513966480447, "grad_norm": 0.7296574115753174, "learning_rate": 0.0004202801120448179, "loss": 0.3677, "step": 20809 }, { "epoch": 11.625698324022347, "grad_norm": 0.42564648389816284, "learning_rate": 0.00042025210084033616, "loss": 0.4097, "step": 20810 }, { "epoch": 11.626256983240223, "grad_norm": 0.7909795641899109, "learning_rate": 0.00042022408963585437, "loss": 0.3307, "step": 20811 }, { "epoch": 11.6268156424581, "grad_norm": 0.4138389527797699, "learning_rate": 0.0004201960784313725, "loss": 0.4124, "step": 20812 }, { "epoch": 11.627374301675978, "grad_norm": 0.3971346616744995, "learning_rate": 0.0004201680672268908, "loss": 0.4456, "step": 20813 }, { "epoch": 11.627932960893855, "grad_norm": 0.5548760890960693, "learning_rate": 0.00042014005602240893, "loss": 0.3383, "step": 20814 }, { "epoch": 11.628491620111731, "grad_norm": 0.38403066992759705, "learning_rate": 0.0004201120448179272, "loss": 0.3961, "step": 20815 }, { "epoch": 11.62905027932961, "grad_norm": 1.153908133506775, "learning_rate": 0.0004200840336134454, "loss": 0.4114, "step": 20816 }, { "epoch": 11.629608938547486, "grad_norm": 0.4572281241416931, "learning_rate": 0.00042005602240896355, "loss": 0.3735, "step": 20817 }, { "epoch": 11.630167597765363, "grad_norm": 1.088218331336975, "learning_rate": 0.0004200280112044818, "loss": 0.5125, "step": 20818 }, { "epoch": 11.630726256983241, "grad_norm": 0.4902636706829071, "learning_rate": 0.00042, "loss": 0.4338, "step": 20819 }, { "epoch": 11.631284916201118, "grad_norm": 0.828087329864502, "learning_rate": 0.0004199719887955182, "loss": 0.4828, "step": 20820 }, { "epoch": 11.631843575418994, "grad_norm": 0.4409154951572418, "learning_rate": 0.00041994397759103643, "loss": 0.4241, "step": 20821 }, { "epoch": 11.63240223463687, "grad_norm": 3.852677583694458, "learning_rate": 0.0004199159663865546, "loss": 0.521, "step": 20822 }, { "epoch": 11.632960893854749, "grad_norm": 0.43827804923057556, "learning_rate": 0.00041988795518207284, "loss": 0.3276, "step": 20823 }, { "epoch": 11.633519553072626, "grad_norm": 0.8542401194572449, "learning_rate": 0.00041985994397759105, "loss": 0.5294, "step": 20824 }, { "epoch": 11.634078212290502, "grad_norm": 0.6204919219017029, "learning_rate": 0.00041983193277310925, "loss": 0.4354, "step": 20825 }, { "epoch": 11.63463687150838, "grad_norm": 1.001447081565857, "learning_rate": 0.00041980392156862746, "loss": 0.4689, "step": 20826 }, { "epoch": 11.635195530726257, "grad_norm": 0.4870937168598175, "learning_rate": 0.00041977591036414567, "loss": 0.4152, "step": 20827 }, { "epoch": 11.635754189944134, "grad_norm": 0.5477774739265442, "learning_rate": 0.00041974789915966387, "loss": 0.3665, "step": 20828 }, { "epoch": 11.63631284916201, "grad_norm": 0.4083455502986908, "learning_rate": 0.0004197198879551821, "loss": 0.4155, "step": 20829 }, { "epoch": 11.636871508379889, "grad_norm": 0.32426324486732483, "learning_rate": 0.0004196918767507003, "loss": 0.3393, "step": 20830 }, { "epoch": 11.637430167597765, "grad_norm": 0.446688175201416, "learning_rate": 0.0004196638655462185, "loss": 0.4328, "step": 20831 }, { "epoch": 11.637988826815642, "grad_norm": 0.44993913173675537, "learning_rate": 0.0004196358543417367, "loss": 0.5483, "step": 20832 }, { "epoch": 11.63854748603352, "grad_norm": 0.5969384908676147, "learning_rate": 0.0004196078431372549, "loss": 0.4346, "step": 20833 }, { "epoch": 11.639106145251397, "grad_norm": 0.4580068290233612, "learning_rate": 0.0004195798319327731, "loss": 0.3369, "step": 20834 }, { "epoch": 11.639664804469273, "grad_norm": 3.9589877128601074, "learning_rate": 0.00041955182072829137, "loss": 0.4299, "step": 20835 }, { "epoch": 11.640223463687152, "grad_norm": 0.41882380843162537, "learning_rate": 0.0004195238095238095, "loss": 0.364, "step": 20836 }, { "epoch": 11.640782122905028, "grad_norm": 6.274776458740234, "learning_rate": 0.0004194957983193277, "loss": 0.421, "step": 20837 }, { "epoch": 11.641340782122905, "grad_norm": 0.3929876983165741, "learning_rate": 0.00041946778711484593, "loss": 0.4225, "step": 20838 }, { "epoch": 11.641899441340783, "grad_norm": 3.386552333831787, "learning_rate": 0.00041943977591036414, "loss": 0.6064, "step": 20839 }, { "epoch": 11.64245810055866, "grad_norm": 0.498435914516449, "learning_rate": 0.0004194117647058824, "loss": 0.5534, "step": 20840 }, { "epoch": 11.643016759776536, "grad_norm": 0.45741790533065796, "learning_rate": 0.00041938375350140055, "loss": 0.3762, "step": 20841 }, { "epoch": 11.643575418994413, "grad_norm": 0.742429256439209, "learning_rate": 0.00041935574229691876, "loss": 0.5266, "step": 20842 }, { "epoch": 11.644134078212291, "grad_norm": 0.9417940378189087, "learning_rate": 0.000419327731092437, "loss": 0.439, "step": 20843 }, { "epoch": 11.644692737430168, "grad_norm": 1.004478096961975, "learning_rate": 0.00041929971988795517, "loss": 0.4475, "step": 20844 }, { "epoch": 11.645251396648044, "grad_norm": 0.962350606918335, "learning_rate": 0.00041927170868347343, "loss": 0.5052, "step": 20845 }, { "epoch": 11.645810055865923, "grad_norm": 0.5542930364608765, "learning_rate": 0.0004192436974789916, "loss": 0.4201, "step": 20846 }, { "epoch": 11.6463687150838, "grad_norm": 2.6939172744750977, "learning_rate": 0.0004192156862745098, "loss": 0.5011, "step": 20847 }, { "epoch": 11.646927374301676, "grad_norm": 0.44292253255844116, "learning_rate": 0.00041918767507002805, "loss": 0.3903, "step": 20848 }, { "epoch": 11.647486033519552, "grad_norm": 0.510962963104248, "learning_rate": 0.0004191596638655462, "loss": 0.5295, "step": 20849 }, { "epoch": 11.64804469273743, "grad_norm": 0.5129163861274719, "learning_rate": 0.00041913165266106446, "loss": 0.4276, "step": 20850 }, { "epoch": 11.648603351955307, "grad_norm": 0.4973587989807129, "learning_rate": 0.00041910364145658266, "loss": 0.4428, "step": 20851 }, { "epoch": 11.649162011173184, "grad_norm": 0.5627951622009277, "learning_rate": 0.0004190756302521008, "loss": 0.4555, "step": 20852 }, { "epoch": 11.649720670391062, "grad_norm": 2.3687398433685303, "learning_rate": 0.0004190476190476191, "loss": 0.4367, "step": 20853 }, { "epoch": 11.650279329608939, "grad_norm": 22.65110206604004, "learning_rate": 0.00041901960784313723, "loss": 0.5558, "step": 20854 }, { "epoch": 11.650837988826815, "grad_norm": 0.5034905076026917, "learning_rate": 0.0004189915966386555, "loss": 0.3427, "step": 20855 }, { "epoch": 11.651396648044694, "grad_norm": 0.4828323721885681, "learning_rate": 0.0004189635854341737, "loss": 0.3909, "step": 20856 }, { "epoch": 11.65195530726257, "grad_norm": 0.3465307652950287, "learning_rate": 0.00041893557422969185, "loss": 0.3121, "step": 20857 }, { "epoch": 11.652513966480447, "grad_norm": 0.6500255465507507, "learning_rate": 0.0004189075630252101, "loss": 0.5043, "step": 20858 }, { "epoch": 11.653072625698323, "grad_norm": 0.4145534336566925, "learning_rate": 0.0004188795518207283, "loss": 0.4569, "step": 20859 }, { "epoch": 11.653631284916202, "grad_norm": 0.4527547061443329, "learning_rate": 0.0004188515406162465, "loss": 0.4054, "step": 20860 }, { "epoch": 11.654189944134078, "grad_norm": 0.5085048675537109, "learning_rate": 0.0004188235294117647, "loss": 0.5424, "step": 20861 }, { "epoch": 11.654748603351955, "grad_norm": 0.6608612537384033, "learning_rate": 0.0004187955182072829, "loss": 0.5669, "step": 20862 }, { "epoch": 11.655307262569833, "grad_norm": 0.45789891481399536, "learning_rate": 0.00041876750700280114, "loss": 0.3768, "step": 20863 }, { "epoch": 11.65586592178771, "grad_norm": 0.3989100456237793, "learning_rate": 0.00041873949579831934, "loss": 0.3732, "step": 20864 }, { "epoch": 11.656424581005586, "grad_norm": 0.4907222092151642, "learning_rate": 0.00041871148459383755, "loss": 0.4106, "step": 20865 }, { "epoch": 11.656983240223465, "grad_norm": 0.9251715540885925, "learning_rate": 0.00041868347338935575, "loss": 0.4696, "step": 20866 }, { "epoch": 11.657541899441341, "grad_norm": 1.0267250537872314, "learning_rate": 0.00041865546218487396, "loss": 0.5998, "step": 20867 }, { "epoch": 11.658100558659218, "grad_norm": 4.684356689453125, "learning_rate": 0.00041862745098039217, "loss": 0.3843, "step": 20868 }, { "epoch": 11.658659217877094, "grad_norm": 0.4320904612541199, "learning_rate": 0.00041859943977591037, "loss": 0.4287, "step": 20869 }, { "epoch": 11.659217877094973, "grad_norm": 5.6273627281188965, "learning_rate": 0.0004185714285714286, "loss": 0.3861, "step": 20870 }, { "epoch": 11.65977653631285, "grad_norm": 0.40378838777542114, "learning_rate": 0.0004185434173669468, "loss": 0.3187, "step": 20871 }, { "epoch": 11.660335195530726, "grad_norm": 0.5093754529953003, "learning_rate": 0.000418515406162465, "loss": 0.363, "step": 20872 }, { "epoch": 11.660893854748604, "grad_norm": 0.7717400789260864, "learning_rate": 0.0004184873949579832, "loss": 0.4281, "step": 20873 }, { "epoch": 11.66145251396648, "grad_norm": 0.6055014729499817, "learning_rate": 0.0004184593837535014, "loss": 0.4512, "step": 20874 }, { "epoch": 11.662011173184357, "grad_norm": 0.4012812674045563, "learning_rate": 0.00041843137254901966, "loss": 0.3706, "step": 20875 }, { "epoch": 11.662569832402234, "grad_norm": 0.3984925150871277, "learning_rate": 0.0004184033613445378, "loss": 0.4649, "step": 20876 }, { "epoch": 11.663128491620112, "grad_norm": 0.5753477215766907, "learning_rate": 0.000418375350140056, "loss": 0.31, "step": 20877 }, { "epoch": 11.663687150837989, "grad_norm": 2.1902623176574707, "learning_rate": 0.0004183473389355742, "loss": 0.5273, "step": 20878 }, { "epoch": 11.664245810055865, "grad_norm": 0.4382067024707794, "learning_rate": 0.00041831932773109243, "loss": 0.4245, "step": 20879 }, { "epoch": 11.664804469273744, "grad_norm": 0.8010841608047485, "learning_rate": 0.0004182913165266107, "loss": 0.4593, "step": 20880 }, { "epoch": 11.66536312849162, "grad_norm": 0.46820876002311707, "learning_rate": 0.00041826330532212884, "loss": 0.4346, "step": 20881 }, { "epoch": 11.665921787709497, "grad_norm": 0.5519371032714844, "learning_rate": 0.00041823529411764705, "loss": 0.4786, "step": 20882 }, { "epoch": 11.666480446927375, "grad_norm": 0.45646730065345764, "learning_rate": 0.0004182072829131653, "loss": 0.3799, "step": 20883 }, { "epoch": 11.667039106145252, "grad_norm": 0.5841661691665649, "learning_rate": 0.00041817927170868346, "loss": 0.4647, "step": 20884 }, { "epoch": 11.667597765363128, "grad_norm": 0.5996007919311523, "learning_rate": 0.0004181512605042017, "loss": 0.3582, "step": 20885 }, { "epoch": 11.668156424581005, "grad_norm": 0.4924525022506714, "learning_rate": 0.0004181232492997199, "loss": 0.334, "step": 20886 }, { "epoch": 11.668715083798883, "grad_norm": 0.5998673439025879, "learning_rate": 0.0004180952380952381, "loss": 0.4265, "step": 20887 }, { "epoch": 11.66927374301676, "grad_norm": 0.4154660999774933, "learning_rate": 0.00041806722689075634, "loss": 0.3879, "step": 20888 }, { "epoch": 11.669832402234636, "grad_norm": 0.7532088756561279, "learning_rate": 0.0004180392156862745, "loss": 0.4445, "step": 20889 }, { "epoch": 11.670391061452515, "grad_norm": 0.4688023626804352, "learning_rate": 0.00041801120448179275, "loss": 0.4407, "step": 20890 }, { "epoch": 11.670949720670391, "grad_norm": 1.3237495422363281, "learning_rate": 0.00041798319327731096, "loss": 0.4689, "step": 20891 }, { "epoch": 11.671508379888268, "grad_norm": 1.004561424255371, "learning_rate": 0.0004179551820728291, "loss": 0.5923, "step": 20892 }, { "epoch": 11.672067039106146, "grad_norm": 0.4932515621185303, "learning_rate": 0.00041792717086834737, "loss": 0.4147, "step": 20893 }, { "epoch": 11.672625698324023, "grad_norm": 0.406093567609787, "learning_rate": 0.0004178991596638655, "loss": 0.4448, "step": 20894 }, { "epoch": 11.6731843575419, "grad_norm": 1.1091928482055664, "learning_rate": 0.0004178711484593838, "loss": 0.4067, "step": 20895 }, { "epoch": 11.673743016759776, "grad_norm": 0.699067234992981, "learning_rate": 0.000417843137254902, "loss": 0.3972, "step": 20896 }, { "epoch": 11.674301675977654, "grad_norm": 1.3785072565078735, "learning_rate": 0.00041781512605042014, "loss": 0.4695, "step": 20897 }, { "epoch": 11.67486033519553, "grad_norm": 0.5263012647628784, "learning_rate": 0.0004177871148459384, "loss": 0.3302, "step": 20898 }, { "epoch": 11.675418994413407, "grad_norm": 0.4251067638397217, "learning_rate": 0.0004177591036414566, "loss": 0.4764, "step": 20899 }, { "epoch": 11.675977653631286, "grad_norm": 0.47318360209465027, "learning_rate": 0.0004177310924369748, "loss": 0.5472, "step": 20900 }, { "epoch": 11.676536312849162, "grad_norm": 0.3908814489841461, "learning_rate": 0.000417703081232493, "loss": 0.5137, "step": 20901 }, { "epoch": 11.677094972067039, "grad_norm": 0.6362344622612, "learning_rate": 0.00041767507002801117, "loss": 0.4494, "step": 20902 }, { "epoch": 11.677653631284915, "grad_norm": 0.5055546164512634, "learning_rate": 0.00041764705882352943, "loss": 0.488, "step": 20903 }, { "epoch": 11.678212290502794, "grad_norm": 0.5171534419059753, "learning_rate": 0.00041761904761904764, "loss": 0.484, "step": 20904 }, { "epoch": 11.67877094972067, "grad_norm": 2.00119686126709, "learning_rate": 0.00041759103641456584, "loss": 0.4824, "step": 20905 }, { "epoch": 11.679329608938547, "grad_norm": 0.6595990657806396, "learning_rate": 0.00041756302521008405, "loss": 0.4405, "step": 20906 }, { "epoch": 11.679888268156425, "grad_norm": 0.5611948370933533, "learning_rate": 0.00041753501400560225, "loss": 0.4683, "step": 20907 }, { "epoch": 11.680446927374302, "grad_norm": 0.37523218989372253, "learning_rate": 0.00041750700280112046, "loss": 0.3346, "step": 20908 }, { "epoch": 11.681005586592178, "grad_norm": 0.5734403729438782, "learning_rate": 0.00041747899159663867, "loss": 0.4778, "step": 20909 }, { "epoch": 11.681564245810057, "grad_norm": 0.5474722385406494, "learning_rate": 0.00041745098039215687, "loss": 0.4319, "step": 20910 }, { "epoch": 11.682122905027933, "grad_norm": 0.5411948561668396, "learning_rate": 0.0004174229691876751, "loss": 0.4802, "step": 20911 }, { "epoch": 11.68268156424581, "grad_norm": 0.5044373273849487, "learning_rate": 0.0004173949579831933, "loss": 0.3734, "step": 20912 }, { "epoch": 11.683240223463688, "grad_norm": 0.3963293135166168, "learning_rate": 0.0004173669467787115, "loss": 0.3806, "step": 20913 }, { "epoch": 11.683798882681565, "grad_norm": 0.5499990582466125, "learning_rate": 0.0004173389355742297, "loss": 0.4071, "step": 20914 }, { "epoch": 11.684357541899441, "grad_norm": 3.9386253356933594, "learning_rate": 0.00041731092436974796, "loss": 0.5305, "step": 20915 }, { "epoch": 11.684916201117318, "grad_norm": 1.509673833847046, "learning_rate": 0.0004172829131652661, "loss": 0.4179, "step": 20916 }, { "epoch": 11.685474860335196, "grad_norm": 0.8844584226608276, "learning_rate": 0.0004172549019607843, "loss": 0.4317, "step": 20917 }, { "epoch": 11.686033519553073, "grad_norm": 0.43616244196891785, "learning_rate": 0.0004172268907563025, "loss": 0.4323, "step": 20918 }, { "epoch": 11.68659217877095, "grad_norm": 0.4146277904510498, "learning_rate": 0.0004171988795518207, "loss": 0.3762, "step": 20919 }, { "epoch": 11.687150837988828, "grad_norm": 0.6163322925567627, "learning_rate": 0.00041717086834733893, "loss": 0.4465, "step": 20920 }, { "epoch": 11.687709497206704, "grad_norm": 0.5647704005241394, "learning_rate": 0.00041714285714285714, "loss": 0.4416, "step": 20921 }, { "epoch": 11.68826815642458, "grad_norm": 0.5847733020782471, "learning_rate": 0.00041711484593837534, "loss": 0.4299, "step": 20922 }, { "epoch": 11.688826815642457, "grad_norm": 0.4575791358947754, "learning_rate": 0.0004170868347338936, "loss": 0.4421, "step": 20923 }, { "epoch": 11.689385474860336, "grad_norm": 0.7809575796127319, "learning_rate": 0.00041705882352941176, "loss": 0.4015, "step": 20924 }, { "epoch": 11.689944134078212, "grad_norm": 0.4937390685081482, "learning_rate": 0.00041703081232492996, "loss": 0.4945, "step": 20925 }, { "epoch": 11.690502793296089, "grad_norm": 0.518347442150116, "learning_rate": 0.00041700280112044817, "loss": 0.3909, "step": 20926 }, { "epoch": 11.691061452513967, "grad_norm": 0.4789871573448181, "learning_rate": 0.0004169747899159664, "loss": 0.538, "step": 20927 }, { "epoch": 11.691620111731844, "grad_norm": 0.5728869438171387, "learning_rate": 0.00041694677871148463, "loss": 0.4441, "step": 20928 }, { "epoch": 11.69217877094972, "grad_norm": 0.4812524914741516, "learning_rate": 0.0004169187675070028, "loss": 0.5193, "step": 20929 }, { "epoch": 11.692737430167599, "grad_norm": 0.5269397497177124, "learning_rate": 0.000416890756302521, "loss": 0.4327, "step": 20930 }, { "epoch": 11.693296089385475, "grad_norm": 0.5200487971305847, "learning_rate": 0.00041686274509803925, "loss": 0.4149, "step": 20931 }, { "epoch": 11.693854748603352, "grad_norm": 0.5177311301231384, "learning_rate": 0.0004168347338935574, "loss": 0.5222, "step": 20932 }, { "epoch": 11.694413407821228, "grad_norm": 1.0114914178848267, "learning_rate": 0.00041680672268907566, "loss": 0.4473, "step": 20933 }, { "epoch": 11.694972067039107, "grad_norm": 0.39139434695243835, "learning_rate": 0.0004167787114845938, "loss": 0.4353, "step": 20934 }, { "epoch": 11.695530726256983, "grad_norm": 0.671846866607666, "learning_rate": 0.000416750700280112, "loss": 0.5476, "step": 20935 }, { "epoch": 11.69608938547486, "grad_norm": 2.2795498371124268, "learning_rate": 0.0004167226890756303, "loss": 0.4468, "step": 20936 }, { "epoch": 11.696648044692738, "grad_norm": 0.4834044277667999, "learning_rate": 0.00041669467787114843, "loss": 0.4177, "step": 20937 }, { "epoch": 11.697206703910615, "grad_norm": 0.7285804748535156, "learning_rate": 0.0004166666666666667, "loss": 0.5578, "step": 20938 }, { "epoch": 11.697765363128491, "grad_norm": 4.1155104637146, "learning_rate": 0.0004166386554621849, "loss": 0.4506, "step": 20939 }, { "epoch": 11.69832402234637, "grad_norm": 0.43977445363998413, "learning_rate": 0.00041661064425770305, "loss": 0.4407, "step": 20940 }, { "epoch": 11.698882681564246, "grad_norm": 0.363665372133255, "learning_rate": 0.0004165826330532213, "loss": 0.2814, "step": 20941 }, { "epoch": 11.699441340782123, "grad_norm": 1.0198465585708618, "learning_rate": 0.00041655462184873946, "loss": 0.3967, "step": 20942 }, { "epoch": 11.7, "grad_norm": 1.0242189168930054, "learning_rate": 0.0004165266106442577, "loss": 0.4392, "step": 20943 }, { "epoch": 11.700558659217878, "grad_norm": 0.815778911113739, "learning_rate": 0.00041649859943977593, "loss": 0.4214, "step": 20944 }, { "epoch": 11.701117318435754, "grad_norm": 1.8759633302688599, "learning_rate": 0.0004164705882352941, "loss": 0.4485, "step": 20945 }, { "epoch": 11.70167597765363, "grad_norm": 1.4267017841339111, "learning_rate": 0.00041644257703081234, "loss": 0.3579, "step": 20946 }, { "epoch": 11.702234636871509, "grad_norm": 0.706054151058197, "learning_rate": 0.00041641456582633055, "loss": 0.3676, "step": 20947 }, { "epoch": 11.702793296089386, "grad_norm": 1.0912322998046875, "learning_rate": 0.00041638655462184875, "loss": 0.457, "step": 20948 }, { "epoch": 11.703351955307262, "grad_norm": 0.4204034209251404, "learning_rate": 0.00041635854341736696, "loss": 0.4386, "step": 20949 }, { "epoch": 11.703910614525139, "grad_norm": 1.5568163394927979, "learning_rate": 0.0004163305322128851, "loss": 0.4777, "step": 20950 }, { "epoch": 11.704469273743017, "grad_norm": 0.7590762972831726, "learning_rate": 0.00041630252100840337, "loss": 0.4947, "step": 20951 }, { "epoch": 11.705027932960894, "grad_norm": 0.5208397507667542, "learning_rate": 0.0004162745098039216, "loss": 0.3341, "step": 20952 }, { "epoch": 11.70558659217877, "grad_norm": 0.5330596566200256, "learning_rate": 0.0004162464985994398, "loss": 0.5348, "step": 20953 }, { "epoch": 11.706145251396649, "grad_norm": 0.5282301306724548, "learning_rate": 0.000416218487394958, "loss": 0.4584, "step": 20954 }, { "epoch": 11.706703910614525, "grad_norm": 0.6201488375663757, "learning_rate": 0.0004161904761904762, "loss": 0.462, "step": 20955 }, { "epoch": 11.707262569832402, "grad_norm": 0.5473606586456299, "learning_rate": 0.0004161624649859944, "loss": 0.4445, "step": 20956 }, { "epoch": 11.70782122905028, "grad_norm": 0.42217910289764404, "learning_rate": 0.0004161344537815126, "loss": 0.4231, "step": 20957 }, { "epoch": 11.708379888268157, "grad_norm": 0.5057543516159058, "learning_rate": 0.00041610644257703087, "loss": 0.3717, "step": 20958 }, { "epoch": 11.708938547486033, "grad_norm": 0.35181501507759094, "learning_rate": 0.000416078431372549, "loss": 0.3805, "step": 20959 }, { "epoch": 11.70949720670391, "grad_norm": 0.3671259582042694, "learning_rate": 0.0004160504201680672, "loss": 0.3887, "step": 20960 }, { "epoch": 11.710055865921788, "grad_norm": 0.5064424872398376, "learning_rate": 0.00041602240896358543, "loss": 0.5408, "step": 20961 }, { "epoch": 11.710614525139665, "grad_norm": 0.49026134610176086, "learning_rate": 0.00041599439775910364, "loss": 0.3025, "step": 20962 }, { "epoch": 11.711173184357541, "grad_norm": 3.980346202850342, "learning_rate": 0.0004159663865546219, "loss": 0.3122, "step": 20963 }, { "epoch": 11.71173184357542, "grad_norm": 0.5637076497077942, "learning_rate": 0.00041593837535014005, "loss": 0.5266, "step": 20964 }, { "epoch": 11.712290502793296, "grad_norm": 0.6552236080169678, "learning_rate": 0.00041591036414565826, "loss": 0.4445, "step": 20965 }, { "epoch": 11.712849162011173, "grad_norm": 0.8326232433319092, "learning_rate": 0.0004158823529411765, "loss": 0.3617, "step": 20966 }, { "epoch": 11.713407821229051, "grad_norm": 0.4824349582195282, "learning_rate": 0.00041585434173669467, "loss": 0.3881, "step": 20967 }, { "epoch": 11.713966480446928, "grad_norm": 3.46415114402771, "learning_rate": 0.00041582633053221293, "loss": 0.3791, "step": 20968 }, { "epoch": 11.714525139664804, "grad_norm": 0.4594406485557556, "learning_rate": 0.0004157983193277311, "loss": 0.3782, "step": 20969 }, { "epoch": 11.71508379888268, "grad_norm": 0.47127431631088257, "learning_rate": 0.0004157703081232493, "loss": 0.3632, "step": 20970 }, { "epoch": 11.71564245810056, "grad_norm": 0.6570918560028076, "learning_rate": 0.00041574229691876755, "loss": 0.4198, "step": 20971 }, { "epoch": 11.716201117318436, "grad_norm": 0.4378843605518341, "learning_rate": 0.0004157142857142857, "loss": 0.3927, "step": 20972 }, { "epoch": 11.716759776536312, "grad_norm": 0.540259599685669, "learning_rate": 0.00041568627450980396, "loss": 0.445, "step": 20973 }, { "epoch": 11.71731843575419, "grad_norm": 1.033901572227478, "learning_rate": 0.00041565826330532216, "loss": 0.4615, "step": 20974 }, { "epoch": 11.717877094972067, "grad_norm": 0.7708370685577393, "learning_rate": 0.0004156302521008403, "loss": 0.5225, "step": 20975 }, { "epoch": 11.718435754189944, "grad_norm": 0.7920206189155579, "learning_rate": 0.0004156022408963586, "loss": 0.4068, "step": 20976 }, { "epoch": 11.71899441340782, "grad_norm": 0.9534438848495483, "learning_rate": 0.00041557422969187673, "loss": 0.4742, "step": 20977 }, { "epoch": 11.719553072625699, "grad_norm": 0.5032855272293091, "learning_rate": 0.000415546218487395, "loss": 0.4401, "step": 20978 }, { "epoch": 11.720111731843575, "grad_norm": 0.3207739293575287, "learning_rate": 0.0004155182072829132, "loss": 0.2921, "step": 20979 }, { "epoch": 11.720670391061452, "grad_norm": 0.3972409665584564, "learning_rate": 0.00041549019607843135, "loss": 0.4261, "step": 20980 }, { "epoch": 11.72122905027933, "grad_norm": 1.6158636808395386, "learning_rate": 0.0004154621848739496, "loss": 0.5182, "step": 20981 }, { "epoch": 11.721787709497207, "grad_norm": 0.5116075873374939, "learning_rate": 0.0004154341736694678, "loss": 0.4188, "step": 20982 }, { "epoch": 11.722346368715083, "grad_norm": 0.6547574996948242, "learning_rate": 0.000415406162464986, "loss": 0.5221, "step": 20983 }, { "epoch": 11.722905027932962, "grad_norm": 9.015817642211914, "learning_rate": 0.0004153781512605042, "loss": 0.4194, "step": 20984 }, { "epoch": 11.723463687150838, "grad_norm": 0.5121548175811768, "learning_rate": 0.0004153501400560224, "loss": 0.4588, "step": 20985 }, { "epoch": 11.724022346368715, "grad_norm": 0.6617794036865234, "learning_rate": 0.00041532212885154064, "loss": 0.5173, "step": 20986 }, { "epoch": 11.724581005586593, "grad_norm": 0.4122159779071808, "learning_rate": 0.00041529411764705884, "loss": 0.5111, "step": 20987 }, { "epoch": 11.72513966480447, "grad_norm": 1.077163815498352, "learning_rate": 0.00041526610644257705, "loss": 0.4942, "step": 20988 }, { "epoch": 11.725698324022346, "grad_norm": 0.9594558477401733, "learning_rate": 0.00041523809523809525, "loss": 0.6044, "step": 20989 }, { "epoch": 11.726256983240223, "grad_norm": 0.5694657564163208, "learning_rate": 0.00041521008403361346, "loss": 0.4677, "step": 20990 }, { "epoch": 11.726815642458101, "grad_norm": 0.5785322785377502, "learning_rate": 0.00041518207282913167, "loss": 0.5567, "step": 20991 }, { "epoch": 11.727374301675978, "grad_norm": 0.449042946100235, "learning_rate": 0.00041515406162464987, "loss": 0.4259, "step": 20992 }, { "epoch": 11.727932960893854, "grad_norm": 0.5365514159202576, "learning_rate": 0.0004151260504201681, "loss": 0.4718, "step": 20993 }, { "epoch": 11.728491620111733, "grad_norm": 1.9636155366897583, "learning_rate": 0.0004150980392156863, "loss": 0.451, "step": 20994 }, { "epoch": 11.72905027932961, "grad_norm": 0.8794490098953247, "learning_rate": 0.0004150700280112045, "loss": 0.3745, "step": 20995 }, { "epoch": 11.729608938547486, "grad_norm": 1.2629022598266602, "learning_rate": 0.0004150420168067227, "loss": 0.4083, "step": 20996 }, { "epoch": 11.730167597765362, "grad_norm": 0.4553702473640442, "learning_rate": 0.0004150140056022409, "loss": 0.3784, "step": 20997 }, { "epoch": 11.73072625698324, "grad_norm": 0.5279976725578308, "learning_rate": 0.00041498599439775916, "loss": 0.3578, "step": 20998 }, { "epoch": 11.731284916201117, "grad_norm": 1.3618521690368652, "learning_rate": 0.0004149579831932773, "loss": 0.3737, "step": 20999 }, { "epoch": 11.731843575418994, "grad_norm": 0.4541051685810089, "learning_rate": 0.0004149299719887955, "loss": 0.4536, "step": 21000 }, { "epoch": 11.731843575418994, "eval_cer": 0.08786400881585982, "eval_loss": 0.33108505606651306, "eval_runtime": 56.2558, "eval_samples_per_second": 80.667, "eval_steps_per_second": 5.048, "eval_wer": 0.34750425965755144, "step": 21000 }, { "epoch": 11.732402234636872, "grad_norm": 0.7673790454864502, "learning_rate": 0.0004149019607843137, "loss": 0.5946, "step": 21001 }, { "epoch": 11.732960893854749, "grad_norm": 0.3800128400325775, "learning_rate": 0.00041487394957983193, "loss": 0.4643, "step": 21002 }, { "epoch": 11.733519553072625, "grad_norm": 0.4540000557899475, "learning_rate": 0.0004148459383753502, "loss": 0.4183, "step": 21003 }, { "epoch": 11.734078212290502, "grad_norm": 0.7685021162033081, "learning_rate": 0.00041481792717086834, "loss": 0.4576, "step": 21004 }, { "epoch": 11.73463687150838, "grad_norm": 0.3957034647464752, "learning_rate": 0.00041478991596638655, "loss": 0.425, "step": 21005 }, { "epoch": 11.735195530726257, "grad_norm": 0.866580605506897, "learning_rate": 0.0004147619047619048, "loss": 0.4591, "step": 21006 }, { "epoch": 11.735754189944133, "grad_norm": 0.8084816336631775, "learning_rate": 0.00041473389355742296, "loss": 0.4333, "step": 21007 }, { "epoch": 11.736312849162012, "grad_norm": 0.5898215770721436, "learning_rate": 0.0004147058823529412, "loss": 0.4038, "step": 21008 }, { "epoch": 11.736871508379888, "grad_norm": 0.387378454208374, "learning_rate": 0.0004146778711484594, "loss": 0.4309, "step": 21009 }, { "epoch": 11.737430167597765, "grad_norm": 0.38650548458099365, "learning_rate": 0.0004146498599439776, "loss": 0.4005, "step": 21010 }, { "epoch": 11.737988826815643, "grad_norm": 0.5134881734848022, "learning_rate": 0.00041462184873949584, "loss": 0.4904, "step": 21011 }, { "epoch": 11.73854748603352, "grad_norm": 1.4361058473587036, "learning_rate": 0.000414593837535014, "loss": 0.3718, "step": 21012 }, { "epoch": 11.739106145251396, "grad_norm": 0.3456028699874878, "learning_rate": 0.00041456582633053225, "loss": 0.3193, "step": 21013 }, { "epoch": 11.739664804469275, "grad_norm": 0.49402251839637756, "learning_rate": 0.00041453781512605046, "loss": 0.3571, "step": 21014 }, { "epoch": 11.740223463687151, "grad_norm": 0.615706205368042, "learning_rate": 0.0004145098039215686, "loss": 0.6151, "step": 21015 }, { "epoch": 11.740782122905028, "grad_norm": 0.43488016724586487, "learning_rate": 0.00041448179271708687, "loss": 0.4743, "step": 21016 }, { "epoch": 11.741340782122904, "grad_norm": 0.7332260608673096, "learning_rate": 0.000414453781512605, "loss": 0.4763, "step": 21017 }, { "epoch": 11.741899441340783, "grad_norm": 0.49806851148605347, "learning_rate": 0.0004144257703081233, "loss": 0.449, "step": 21018 }, { "epoch": 11.74245810055866, "grad_norm": 0.622251570224762, "learning_rate": 0.0004143977591036415, "loss": 0.4362, "step": 21019 }, { "epoch": 11.743016759776536, "grad_norm": 3.3908772468566895, "learning_rate": 0.00041436974789915964, "loss": 0.3371, "step": 21020 }, { "epoch": 11.743575418994414, "grad_norm": 3.059241771697998, "learning_rate": 0.0004143417366946779, "loss": 0.4249, "step": 21021 }, { "epoch": 11.74413407821229, "grad_norm": 2.3904757499694824, "learning_rate": 0.0004143137254901961, "loss": 0.4964, "step": 21022 }, { "epoch": 11.744692737430167, "grad_norm": 0.6678722500801086, "learning_rate": 0.0004142857142857143, "loss": 0.3736, "step": 21023 }, { "epoch": 11.745251396648044, "grad_norm": 0.9124069213867188, "learning_rate": 0.0004142577030812325, "loss": 0.4052, "step": 21024 }, { "epoch": 11.745810055865922, "grad_norm": 2.4555346965789795, "learning_rate": 0.00041422969187675067, "loss": 0.3867, "step": 21025 }, { "epoch": 11.746368715083799, "grad_norm": 0.7680983543395996, "learning_rate": 0.00041420168067226893, "loss": 0.4906, "step": 21026 }, { "epoch": 11.746927374301675, "grad_norm": 0.5838503241539001, "learning_rate": 0.00041417366946778714, "loss": 0.4238, "step": 21027 }, { "epoch": 11.747486033519554, "grad_norm": 0.4185757339000702, "learning_rate": 0.00041414565826330534, "loss": 0.4598, "step": 21028 }, { "epoch": 11.74804469273743, "grad_norm": 0.45984694361686707, "learning_rate": 0.00041411764705882355, "loss": 0.4286, "step": 21029 }, { "epoch": 11.748603351955307, "grad_norm": 0.5626301169395447, "learning_rate": 0.00041408963585434175, "loss": 0.42, "step": 21030 }, { "epoch": 11.749162011173185, "grad_norm": 0.6233865022659302, "learning_rate": 0.00041406162464985996, "loss": 0.4049, "step": 21031 }, { "epoch": 11.749720670391062, "grad_norm": 0.483267217874527, "learning_rate": 0.00041403361344537817, "loss": 0.3646, "step": 21032 }, { "epoch": 11.750279329608938, "grad_norm": 0.4188838005065918, "learning_rate": 0.0004140056022408963, "loss": 0.4286, "step": 21033 }, { "epoch": 11.750837988826815, "grad_norm": 0.43827927112579346, "learning_rate": 0.0004139775910364146, "loss": 0.3417, "step": 21034 }, { "epoch": 11.751396648044693, "grad_norm": 0.5048608779907227, "learning_rate": 0.0004139495798319328, "loss": 0.4599, "step": 21035 }, { "epoch": 11.75195530726257, "grad_norm": 1.21855890750885, "learning_rate": 0.000413921568627451, "loss": 0.4923, "step": 21036 }, { "epoch": 11.752513966480446, "grad_norm": 0.6267884373664856, "learning_rate": 0.0004138935574229692, "loss": 0.4708, "step": 21037 }, { "epoch": 11.753072625698325, "grad_norm": 0.6575756669044495, "learning_rate": 0.0004138655462184874, "loss": 0.3788, "step": 21038 }, { "epoch": 11.753631284916201, "grad_norm": 0.6032079458236694, "learning_rate": 0.0004138375350140056, "loss": 0.4088, "step": 21039 }, { "epoch": 11.754189944134078, "grad_norm": 0.3609331250190735, "learning_rate": 0.0004138095238095238, "loss": 0.4419, "step": 21040 }, { "epoch": 11.754748603351956, "grad_norm": 0.4986215829849243, "learning_rate": 0.000413781512605042, "loss": 0.4019, "step": 21041 }, { "epoch": 11.755307262569833, "grad_norm": 0.626741886138916, "learning_rate": 0.0004137535014005602, "loss": 0.6113, "step": 21042 }, { "epoch": 11.75586592178771, "grad_norm": 0.9297717809677124, "learning_rate": 0.00041372549019607843, "loss": 0.42, "step": 21043 }, { "epoch": 11.756424581005586, "grad_norm": 0.4717349112033844, "learning_rate": 0.00041369747899159664, "loss": 0.3961, "step": 21044 }, { "epoch": 11.756983240223464, "grad_norm": 0.48344510793685913, "learning_rate": 0.00041366946778711484, "loss": 0.4034, "step": 21045 }, { "epoch": 11.75754189944134, "grad_norm": 0.7169546484947205, "learning_rate": 0.0004136414565826331, "loss": 0.4604, "step": 21046 }, { "epoch": 11.758100558659217, "grad_norm": 0.5718005299568176, "learning_rate": 0.00041361344537815126, "loss": 0.3963, "step": 21047 }, { "epoch": 11.758659217877096, "grad_norm": 0.4658730626106262, "learning_rate": 0.00041358543417366946, "loss": 0.4585, "step": 21048 }, { "epoch": 11.759217877094972, "grad_norm": 0.47685444355010986, "learning_rate": 0.00041355742296918767, "loss": 0.3827, "step": 21049 }, { "epoch": 11.759776536312849, "grad_norm": 0.47769901156425476, "learning_rate": 0.0004135294117647059, "loss": 0.4263, "step": 21050 }, { "epoch": 11.760335195530725, "grad_norm": 0.4674915373325348, "learning_rate": 0.00041350140056022413, "loss": 0.3213, "step": 21051 }, { "epoch": 11.760893854748604, "grad_norm": 0.7401136755943298, "learning_rate": 0.0004134733893557423, "loss": 0.3675, "step": 21052 }, { "epoch": 11.76145251396648, "grad_norm": 0.5001193881034851, "learning_rate": 0.0004134453781512605, "loss": 0.4973, "step": 21053 }, { "epoch": 11.762011173184357, "grad_norm": 0.4197474420070648, "learning_rate": 0.00041341736694677875, "loss": 0.3978, "step": 21054 }, { "epoch": 11.762569832402235, "grad_norm": 0.6338546276092529, "learning_rate": 0.0004133893557422969, "loss": 0.4277, "step": 21055 }, { "epoch": 11.763128491620112, "grad_norm": 0.5549630522727966, "learning_rate": 0.00041336134453781516, "loss": 0.3819, "step": 21056 }, { "epoch": 11.763687150837988, "grad_norm": 0.35421422123908997, "learning_rate": 0.0004133333333333333, "loss": 0.4265, "step": 21057 }, { "epoch": 11.764245810055867, "grad_norm": 0.637296736240387, "learning_rate": 0.0004133053221288515, "loss": 0.4988, "step": 21058 }, { "epoch": 11.764804469273743, "grad_norm": 0.7246748805046082, "learning_rate": 0.0004132773109243698, "loss": 0.4287, "step": 21059 }, { "epoch": 11.76536312849162, "grad_norm": 0.7421596050262451, "learning_rate": 0.00041324929971988793, "loss": 0.4299, "step": 21060 }, { "epoch": 11.765921787709498, "grad_norm": 0.4843403398990631, "learning_rate": 0.0004132212885154062, "loss": 0.4074, "step": 21061 }, { "epoch": 11.766480446927375, "grad_norm": 0.4219281077384949, "learning_rate": 0.0004131932773109244, "loss": 0.3862, "step": 21062 }, { "epoch": 11.767039106145251, "grad_norm": 0.40698567032814026, "learning_rate": 0.00041316526610644255, "loss": 0.3779, "step": 21063 }, { "epoch": 11.767597765363128, "grad_norm": 0.3869565427303314, "learning_rate": 0.0004131372549019608, "loss": 0.4651, "step": 21064 }, { "epoch": 11.768156424581006, "grad_norm": 0.49842697381973267, "learning_rate": 0.00041310924369747896, "loss": 0.4472, "step": 21065 }, { "epoch": 11.768715083798883, "grad_norm": 0.7378855347633362, "learning_rate": 0.0004130812324929972, "loss": 0.6714, "step": 21066 }, { "epoch": 11.76927374301676, "grad_norm": 0.5357637405395508, "learning_rate": 0.00041305322128851543, "loss": 0.3991, "step": 21067 }, { "epoch": 11.769832402234638, "grad_norm": 2.3240857124328613, "learning_rate": 0.0004130252100840336, "loss": 0.4187, "step": 21068 }, { "epoch": 11.770391061452514, "grad_norm": 0.42079317569732666, "learning_rate": 0.00041299719887955184, "loss": 0.3602, "step": 21069 }, { "epoch": 11.77094972067039, "grad_norm": 0.4476144313812256, "learning_rate": 0.00041296918767507005, "loss": 0.3387, "step": 21070 }, { "epoch": 11.771508379888267, "grad_norm": 0.5272865295410156, "learning_rate": 0.00041294117647058825, "loss": 0.4694, "step": 21071 }, { "epoch": 11.772067039106146, "grad_norm": 0.48232021927833557, "learning_rate": 0.00041291316526610646, "loss": 0.3466, "step": 21072 }, { "epoch": 11.772625698324022, "grad_norm": 0.8820331692695618, "learning_rate": 0.0004128851540616246, "loss": 0.4046, "step": 21073 }, { "epoch": 11.773184357541899, "grad_norm": 0.5438117980957031, "learning_rate": 0.00041285714285714287, "loss": 0.4588, "step": 21074 }, { "epoch": 11.773743016759777, "grad_norm": 0.38747113943099976, "learning_rate": 0.0004128291316526611, "loss": 0.44, "step": 21075 }, { "epoch": 11.774301675977654, "grad_norm": 4.289586544036865, "learning_rate": 0.0004128011204481793, "loss": 0.5024, "step": 21076 }, { "epoch": 11.77486033519553, "grad_norm": 0.5946493744850159, "learning_rate": 0.0004127731092436975, "loss": 0.3977, "step": 21077 }, { "epoch": 11.775418994413407, "grad_norm": 2.347083330154419, "learning_rate": 0.0004127450980392157, "loss": 0.4292, "step": 21078 }, { "epoch": 11.775977653631285, "grad_norm": 0.6738131046295166, "learning_rate": 0.0004127170868347339, "loss": 0.5922, "step": 21079 }, { "epoch": 11.776536312849162, "grad_norm": 0.3936261534690857, "learning_rate": 0.0004126890756302521, "loss": 0.3009, "step": 21080 }, { "epoch": 11.777094972067038, "grad_norm": 0.854155421257019, "learning_rate": 0.0004126610644257703, "loss": 0.3567, "step": 21081 }, { "epoch": 11.777653631284917, "grad_norm": 0.4538637101650238, "learning_rate": 0.0004126330532212885, "loss": 0.3475, "step": 21082 }, { "epoch": 11.778212290502793, "grad_norm": 0.5938655734062195, "learning_rate": 0.0004126050420168067, "loss": 0.3873, "step": 21083 }, { "epoch": 11.77877094972067, "grad_norm": 0.5250483155250549, "learning_rate": 0.00041257703081232493, "loss": 0.4788, "step": 21084 }, { "epoch": 11.779329608938548, "grad_norm": 1.1067556142807007, "learning_rate": 0.00041254901960784314, "loss": 0.5936, "step": 21085 }, { "epoch": 11.779888268156425, "grad_norm": 0.4631839692592621, "learning_rate": 0.0004125210084033614, "loss": 0.4123, "step": 21086 }, { "epoch": 11.780446927374301, "grad_norm": 0.3276774287223816, "learning_rate": 0.00041249299719887955, "loss": 0.3682, "step": 21087 }, { "epoch": 11.78100558659218, "grad_norm": 0.4596749246120453, "learning_rate": 0.00041246498599439776, "loss": 0.3901, "step": 21088 }, { "epoch": 11.781564245810056, "grad_norm": 0.5934166312217712, "learning_rate": 0.00041243697478991596, "loss": 0.454, "step": 21089 }, { "epoch": 11.782122905027933, "grad_norm": 0.5500884652137756, "learning_rate": 0.00041240896358543417, "loss": 0.4513, "step": 21090 }, { "epoch": 11.78268156424581, "grad_norm": 0.7103753685951233, "learning_rate": 0.00041238095238095243, "loss": 0.3383, "step": 21091 }, { "epoch": 11.783240223463688, "grad_norm": 0.6784194111824036, "learning_rate": 0.0004123529411764706, "loss": 0.3965, "step": 21092 }, { "epoch": 11.783798882681564, "grad_norm": 0.5971254706382751, "learning_rate": 0.0004123249299719888, "loss": 0.68, "step": 21093 }, { "epoch": 11.78435754189944, "grad_norm": 0.5012035369873047, "learning_rate": 0.00041229691876750705, "loss": 0.5393, "step": 21094 }, { "epoch": 11.78491620111732, "grad_norm": 0.3639828562736511, "learning_rate": 0.0004122689075630252, "loss": 0.3184, "step": 21095 }, { "epoch": 11.785474860335196, "grad_norm": 0.4151918292045593, "learning_rate": 0.00041224089635854346, "loss": 0.3936, "step": 21096 }, { "epoch": 11.786033519553072, "grad_norm": 0.6073093414306641, "learning_rate": 0.0004122128851540616, "loss": 0.4088, "step": 21097 }, { "epoch": 11.786592178770949, "grad_norm": 0.9797057509422302, "learning_rate": 0.0004121848739495798, "loss": 0.3837, "step": 21098 }, { "epoch": 11.787150837988827, "grad_norm": 0.3993837237358093, "learning_rate": 0.0004121568627450981, "loss": 0.405, "step": 21099 }, { "epoch": 11.787709497206704, "grad_norm": 1.075048804283142, "learning_rate": 0.00041212885154061623, "loss": 0.5063, "step": 21100 }, { "epoch": 11.78826815642458, "grad_norm": 0.6167490482330322, "learning_rate": 0.0004121008403361345, "loss": 0.5004, "step": 21101 }, { "epoch": 11.788826815642459, "grad_norm": 0.38762179017066956, "learning_rate": 0.0004120728291316527, "loss": 0.3749, "step": 21102 }, { "epoch": 11.789385474860335, "grad_norm": 0.832881510257721, "learning_rate": 0.00041204481792717085, "loss": 0.3566, "step": 21103 }, { "epoch": 11.789944134078212, "grad_norm": 0.5114133358001709, "learning_rate": 0.0004120168067226891, "loss": 0.4031, "step": 21104 }, { "epoch": 11.79050279329609, "grad_norm": 0.47123122215270996, "learning_rate": 0.00041198879551820726, "loss": 0.5073, "step": 21105 }, { "epoch": 11.791061452513967, "grad_norm": 0.4798649549484253, "learning_rate": 0.0004119607843137255, "loss": 0.5166, "step": 21106 }, { "epoch": 11.791620111731843, "grad_norm": 0.3645530343055725, "learning_rate": 0.0004119327731092437, "loss": 0.3972, "step": 21107 }, { "epoch": 11.79217877094972, "grad_norm": 0.8375932574272156, "learning_rate": 0.0004119047619047619, "loss": 0.5054, "step": 21108 }, { "epoch": 11.792737430167598, "grad_norm": 0.41399019956588745, "learning_rate": 0.00041187675070028014, "loss": 0.4554, "step": 21109 }, { "epoch": 11.793296089385475, "grad_norm": 0.3531157076358795, "learning_rate": 0.00041184873949579834, "loss": 0.3153, "step": 21110 }, { "epoch": 11.793854748603351, "grad_norm": 0.4534890651702881, "learning_rate": 0.00041182072829131655, "loss": 0.6091, "step": 21111 }, { "epoch": 11.79441340782123, "grad_norm": 0.47167131304740906, "learning_rate": 0.00041179271708683475, "loss": 0.5476, "step": 21112 }, { "epoch": 11.794972067039106, "grad_norm": 0.553837239742279, "learning_rate": 0.0004117647058823529, "loss": 0.5244, "step": 21113 }, { "epoch": 11.795530726256983, "grad_norm": 0.5114364624023438, "learning_rate": 0.00041173669467787117, "loss": 0.3889, "step": 21114 }, { "epoch": 11.796089385474861, "grad_norm": 5.881773471832275, "learning_rate": 0.00041170868347338937, "loss": 0.4724, "step": 21115 }, { "epoch": 11.796648044692738, "grad_norm": 0.4173142910003662, "learning_rate": 0.0004116806722689076, "loss": 0.3378, "step": 21116 }, { "epoch": 11.797206703910614, "grad_norm": 0.4318006634712219, "learning_rate": 0.0004116526610644258, "loss": 0.448, "step": 21117 }, { "epoch": 11.797765363128491, "grad_norm": 1.1033917665481567, "learning_rate": 0.000411624649859944, "loss": 0.3458, "step": 21118 }, { "epoch": 11.79832402234637, "grad_norm": 0.3786972165107727, "learning_rate": 0.0004115966386554622, "loss": 0.4512, "step": 21119 }, { "epoch": 11.798882681564246, "grad_norm": 0.3894638419151306, "learning_rate": 0.0004115686274509804, "loss": 0.3944, "step": 21120 }, { "epoch": 11.799441340782122, "grad_norm": 0.4841580092906952, "learning_rate": 0.0004115406162464986, "loss": 0.421, "step": 21121 }, { "epoch": 11.8, "grad_norm": 0.3173466920852661, "learning_rate": 0.0004115126050420168, "loss": 0.2402, "step": 21122 }, { "epoch": 11.800558659217877, "grad_norm": 0.5621463656425476, "learning_rate": 0.000411484593837535, "loss": 0.4858, "step": 21123 }, { "epoch": 11.801117318435754, "grad_norm": 0.5451611876487732, "learning_rate": 0.0004114565826330532, "loss": 0.3647, "step": 21124 }, { "epoch": 11.80167597765363, "grad_norm": 0.4159115254878998, "learning_rate": 0.00041142857142857143, "loss": 0.3247, "step": 21125 }, { "epoch": 11.802234636871509, "grad_norm": 0.8276838660240173, "learning_rate": 0.0004114005602240897, "loss": 0.5077, "step": 21126 }, { "epoch": 11.802793296089385, "grad_norm": 0.45653706789016724, "learning_rate": 0.00041137254901960784, "loss": 0.3926, "step": 21127 }, { "epoch": 11.803351955307262, "grad_norm": 0.9907598495483398, "learning_rate": 0.00041134453781512605, "loss": 0.4088, "step": 21128 }, { "epoch": 11.80391061452514, "grad_norm": 0.9811581373214722, "learning_rate": 0.00041131652661064426, "loss": 0.3878, "step": 21129 }, { "epoch": 11.804469273743017, "grad_norm": 0.5454853773117065, "learning_rate": 0.00041128851540616246, "loss": 0.401, "step": 21130 }, { "epoch": 11.805027932960893, "grad_norm": 0.6509480476379395, "learning_rate": 0.0004112605042016807, "loss": 0.4322, "step": 21131 }, { "epoch": 11.805586592178772, "grad_norm": 0.4455661475658417, "learning_rate": 0.0004112324929971989, "loss": 0.414, "step": 21132 }, { "epoch": 11.806145251396648, "grad_norm": 0.8075137734413147, "learning_rate": 0.0004112044817927171, "loss": 0.4282, "step": 21133 }, { "epoch": 11.806703910614525, "grad_norm": 1.9101922512054443, "learning_rate": 0.00041117647058823534, "loss": 0.4059, "step": 21134 }, { "epoch": 11.807262569832401, "grad_norm": 1.0270003080368042, "learning_rate": 0.0004111484593837535, "loss": 0.336, "step": 21135 }, { "epoch": 11.80782122905028, "grad_norm": 0.5320864319801331, "learning_rate": 0.00041112044817927175, "loss": 0.5753, "step": 21136 }, { "epoch": 11.808379888268156, "grad_norm": 0.4468209445476532, "learning_rate": 0.0004110924369747899, "loss": 0.397, "step": 21137 }, { "epoch": 11.808938547486033, "grad_norm": 0.5232101678848267, "learning_rate": 0.0004110644257703081, "loss": 0.4884, "step": 21138 }, { "epoch": 11.809497206703911, "grad_norm": 0.6122629046440125, "learning_rate": 0.00041103641456582637, "loss": 0.4768, "step": 21139 }, { "epoch": 11.810055865921788, "grad_norm": 0.5297895669937134, "learning_rate": 0.0004110084033613445, "loss": 0.4466, "step": 21140 }, { "epoch": 11.810614525139664, "grad_norm": 0.7453494071960449, "learning_rate": 0.0004109803921568628, "loss": 0.6733, "step": 21141 }, { "epoch": 11.811173184357543, "grad_norm": 0.488770991563797, "learning_rate": 0.000410952380952381, "loss": 0.3585, "step": 21142 }, { "epoch": 11.81173184357542, "grad_norm": 0.5269141793251038, "learning_rate": 0.00041092436974789914, "loss": 0.4053, "step": 21143 }, { "epoch": 11.812290502793296, "grad_norm": 0.6079568266868591, "learning_rate": 0.0004108963585434174, "loss": 0.4506, "step": 21144 }, { "epoch": 11.812849162011172, "grad_norm": 0.7228178381919861, "learning_rate": 0.00041086834733893555, "loss": 0.4481, "step": 21145 }, { "epoch": 11.81340782122905, "grad_norm": 0.5010695457458496, "learning_rate": 0.00041084033613445376, "loss": 0.3148, "step": 21146 }, { "epoch": 11.813966480446927, "grad_norm": 0.43662238121032715, "learning_rate": 0.000410812324929972, "loss": 0.3595, "step": 21147 }, { "epoch": 11.814525139664804, "grad_norm": 4.003963947296143, "learning_rate": 0.00041078431372549017, "loss": 0.3745, "step": 21148 }, { "epoch": 11.815083798882682, "grad_norm": 0.37572428584098816, "learning_rate": 0.00041075630252100843, "loss": 0.4348, "step": 21149 }, { "epoch": 11.815642458100559, "grad_norm": 0.5043729543685913, "learning_rate": 0.00041072829131652664, "loss": 0.5413, "step": 21150 }, { "epoch": 11.816201117318435, "grad_norm": 0.8976406455039978, "learning_rate": 0.0004107002801120448, "loss": 0.4063, "step": 21151 }, { "epoch": 11.816759776536312, "grad_norm": 0.3893214464187622, "learning_rate": 0.00041067226890756305, "loss": 0.37, "step": 21152 }, { "epoch": 11.81731843575419, "grad_norm": 0.7534773945808411, "learning_rate": 0.0004106442577030812, "loss": 0.5862, "step": 21153 }, { "epoch": 11.817877094972067, "grad_norm": 0.48971685767173767, "learning_rate": 0.00041061624649859946, "loss": 0.5326, "step": 21154 }, { "epoch": 11.818435754189943, "grad_norm": 0.8797467947006226, "learning_rate": 0.00041058823529411767, "loss": 0.5168, "step": 21155 }, { "epoch": 11.818994413407822, "grad_norm": 0.5101203918457031, "learning_rate": 0.0004105602240896358, "loss": 0.4865, "step": 21156 }, { "epoch": 11.819553072625698, "grad_norm": 0.9895678758621216, "learning_rate": 0.0004105322128851541, "loss": 0.7454, "step": 21157 }, { "epoch": 11.820111731843575, "grad_norm": 1.2504035234451294, "learning_rate": 0.0004105042016806723, "loss": 0.4513, "step": 21158 }, { "epoch": 11.820670391061453, "grad_norm": 0.8474510312080383, "learning_rate": 0.0004104761904761905, "loss": 0.4064, "step": 21159 }, { "epoch": 11.82122905027933, "grad_norm": 0.44312119483947754, "learning_rate": 0.0004104481792717087, "loss": 0.4153, "step": 21160 }, { "epoch": 11.821787709497206, "grad_norm": 0.6110444664955139, "learning_rate": 0.00041042016806722685, "loss": 0.4067, "step": 21161 }, { "epoch": 11.822346368715085, "grad_norm": 0.4869738221168518, "learning_rate": 0.0004103921568627451, "loss": 0.453, "step": 21162 }, { "epoch": 11.822905027932961, "grad_norm": 0.8458188772201538, "learning_rate": 0.0004103641456582633, "loss": 0.4881, "step": 21163 }, { "epoch": 11.823463687150838, "grad_norm": 1.248897910118103, "learning_rate": 0.0004103361344537815, "loss": 0.3721, "step": 21164 }, { "epoch": 11.824022346368714, "grad_norm": 0.9057021737098694, "learning_rate": 0.0004103081232492997, "loss": 0.245, "step": 21165 }, { "epoch": 11.824581005586593, "grad_norm": 0.45632514357566833, "learning_rate": 0.00041028011204481793, "loss": 0.3486, "step": 21166 }, { "epoch": 11.82513966480447, "grad_norm": 0.6380709409713745, "learning_rate": 0.00041025210084033614, "loss": 0.4601, "step": 21167 }, { "epoch": 11.825698324022346, "grad_norm": 1.0564601421356201, "learning_rate": 0.00041022408963585434, "loss": 0.3373, "step": 21168 }, { "epoch": 11.826256983240224, "grad_norm": 0.43833258748054504, "learning_rate": 0.0004101960784313726, "loss": 0.3901, "step": 21169 }, { "epoch": 11.8268156424581, "grad_norm": 0.38899490237236023, "learning_rate": 0.00041016806722689076, "loss": 0.4047, "step": 21170 }, { "epoch": 11.827374301675977, "grad_norm": 0.4540770351886749, "learning_rate": 0.00041014005602240896, "loss": 0.3086, "step": 21171 }, { "epoch": 11.827932960893854, "grad_norm": 1.2066417932510376, "learning_rate": 0.00041011204481792717, "loss": 0.4631, "step": 21172 }, { "epoch": 11.828491620111732, "grad_norm": 0.9145241975784302, "learning_rate": 0.0004100840336134454, "loss": 0.3756, "step": 21173 }, { "epoch": 11.829050279329609, "grad_norm": 0.7616272568702698, "learning_rate": 0.00041005602240896363, "loss": 0.3951, "step": 21174 }, { "epoch": 11.829608938547485, "grad_norm": 2.4852514266967773, "learning_rate": 0.0004100280112044818, "loss": 0.4663, "step": 21175 }, { "epoch": 11.830167597765364, "grad_norm": 0.3470335006713867, "learning_rate": 0.00041, "loss": 0.3709, "step": 21176 }, { "epoch": 11.83072625698324, "grad_norm": 0.4376351237297058, "learning_rate": 0.00040997198879551825, "loss": 0.4154, "step": 21177 }, { "epoch": 11.831284916201117, "grad_norm": 0.5533244609832764, "learning_rate": 0.0004099439775910364, "loss": 0.5036, "step": 21178 }, { "epoch": 11.831843575418995, "grad_norm": 0.47184041142463684, "learning_rate": 0.00040991596638655466, "loss": 0.4714, "step": 21179 }, { "epoch": 11.832402234636872, "grad_norm": 0.64646315574646, "learning_rate": 0.0004098879551820728, "loss": 0.5599, "step": 21180 }, { "epoch": 11.832960893854748, "grad_norm": 0.9532169699668884, "learning_rate": 0.000409859943977591, "loss": 0.4352, "step": 21181 }, { "epoch": 11.833519553072625, "grad_norm": 0.4523061513900757, "learning_rate": 0.0004098319327731093, "loss": 0.3722, "step": 21182 }, { "epoch": 11.834078212290503, "grad_norm": 0.7187719941139221, "learning_rate": 0.00040980392156862743, "loss": 0.3737, "step": 21183 }, { "epoch": 11.83463687150838, "grad_norm": 0.5003236532211304, "learning_rate": 0.0004097759103641457, "loss": 0.5246, "step": 21184 }, { "epoch": 11.835195530726256, "grad_norm": 0.6886492371559143, "learning_rate": 0.0004097478991596639, "loss": 0.5718, "step": 21185 }, { "epoch": 11.835754189944135, "grad_norm": 0.39473968744277954, "learning_rate": 0.00040971988795518205, "loss": 0.3909, "step": 21186 }, { "epoch": 11.836312849162011, "grad_norm": 0.4980083405971527, "learning_rate": 0.0004096918767507003, "loss": 0.3819, "step": 21187 }, { "epoch": 11.836871508379888, "grad_norm": 0.4633629620075226, "learning_rate": 0.00040966386554621846, "loss": 0.3969, "step": 21188 }, { "epoch": 11.837430167597766, "grad_norm": 0.425487220287323, "learning_rate": 0.0004096358543417367, "loss": 0.39, "step": 21189 }, { "epoch": 11.837988826815643, "grad_norm": 6.8655829429626465, "learning_rate": 0.00040960784313725493, "loss": 0.5207, "step": 21190 }, { "epoch": 11.83854748603352, "grad_norm": 0.4440138041973114, "learning_rate": 0.0004095798319327731, "loss": 0.4067, "step": 21191 }, { "epoch": 11.839106145251396, "grad_norm": 0.5871977210044861, "learning_rate": 0.00040955182072829134, "loss": 0.5089, "step": 21192 }, { "epoch": 11.839664804469274, "grad_norm": 0.3644394874572754, "learning_rate": 0.00040952380952380955, "loss": 0.3505, "step": 21193 }, { "epoch": 11.84022346368715, "grad_norm": 0.6120206117630005, "learning_rate": 0.00040949579831932775, "loss": 0.4932, "step": 21194 }, { "epoch": 11.840782122905027, "grad_norm": 0.6395864486694336, "learning_rate": 0.00040946778711484596, "loss": 0.5225, "step": 21195 }, { "epoch": 11.841340782122906, "grad_norm": 0.5474733710289001, "learning_rate": 0.0004094397759103641, "loss": 0.3859, "step": 21196 }, { "epoch": 11.841899441340782, "grad_norm": 0.6462367177009583, "learning_rate": 0.00040941176470588237, "loss": 0.5595, "step": 21197 }, { "epoch": 11.842458100558659, "grad_norm": 0.6597458720207214, "learning_rate": 0.0004093837535014006, "loss": 0.3896, "step": 21198 }, { "epoch": 11.843016759776535, "grad_norm": 0.7283481359481812, "learning_rate": 0.0004093557422969188, "loss": 0.4628, "step": 21199 }, { "epoch": 11.843575418994414, "grad_norm": 0.5071769952774048, "learning_rate": 0.000409327731092437, "loss": 0.4068, "step": 21200 }, { "epoch": 11.84413407821229, "grad_norm": 0.7466905117034912, "learning_rate": 0.0004092997198879552, "loss": 0.528, "step": 21201 }, { "epoch": 11.844692737430167, "grad_norm": 0.5412207245826721, "learning_rate": 0.0004092717086834734, "loss": 0.4866, "step": 21202 }, { "epoch": 11.845251396648045, "grad_norm": 0.445582777261734, "learning_rate": 0.0004092436974789916, "loss": 0.3945, "step": 21203 }, { "epoch": 11.845810055865922, "grad_norm": 0.7843878269195557, "learning_rate": 0.0004092156862745098, "loss": 0.4853, "step": 21204 }, { "epoch": 11.846368715083798, "grad_norm": 0.7705541849136353, "learning_rate": 0.000409187675070028, "loss": 0.383, "step": 21205 }, { "epoch": 11.846927374301677, "grad_norm": 0.700949490070343, "learning_rate": 0.0004091596638655462, "loss": 0.3974, "step": 21206 }, { "epoch": 11.847486033519553, "grad_norm": 1.0537673234939575, "learning_rate": 0.00040913165266106443, "loss": 0.4724, "step": 21207 }, { "epoch": 11.84804469273743, "grad_norm": 0.6811145544052124, "learning_rate": 0.00040910364145658264, "loss": 0.5616, "step": 21208 }, { "epoch": 11.848603351955306, "grad_norm": 0.5758842825889587, "learning_rate": 0.0004090756302521009, "loss": 0.4885, "step": 21209 }, { "epoch": 11.849162011173185, "grad_norm": 0.5112441778182983, "learning_rate": 0.00040904761904761905, "loss": 0.5056, "step": 21210 }, { "epoch": 11.849720670391061, "grad_norm": 0.3904726803302765, "learning_rate": 0.00040901960784313726, "loss": 0.4036, "step": 21211 }, { "epoch": 11.850279329608938, "grad_norm": 1.1251496076583862, "learning_rate": 0.00040899159663865546, "loss": 0.5042, "step": 21212 }, { "epoch": 11.850837988826816, "grad_norm": 0.48130232095718384, "learning_rate": 0.00040896358543417367, "loss": 0.4126, "step": 21213 }, { "epoch": 11.851396648044693, "grad_norm": 0.6811621785163879, "learning_rate": 0.00040893557422969193, "loss": 0.4029, "step": 21214 }, { "epoch": 11.85195530726257, "grad_norm": 0.46350258588790894, "learning_rate": 0.0004089075630252101, "loss": 0.3203, "step": 21215 }, { "epoch": 11.852513966480448, "grad_norm": 0.5114133954048157, "learning_rate": 0.0004088795518207283, "loss": 0.3945, "step": 21216 }, { "epoch": 11.853072625698324, "grad_norm": 0.49909162521362305, "learning_rate": 0.00040885154061624655, "loss": 0.5361, "step": 21217 }, { "epoch": 11.8536312849162, "grad_norm": 0.3986165225505829, "learning_rate": 0.0004088235294117647, "loss": 0.4049, "step": 21218 }, { "epoch": 11.854189944134077, "grad_norm": 0.8127726912498474, "learning_rate": 0.00040879551820728296, "loss": 0.4125, "step": 21219 }, { "epoch": 11.854748603351956, "grad_norm": 0.4818750023841858, "learning_rate": 0.0004087675070028011, "loss": 0.3824, "step": 21220 }, { "epoch": 11.855307262569832, "grad_norm": 0.43652278184890747, "learning_rate": 0.0004087394957983193, "loss": 0.4454, "step": 21221 }, { "epoch": 11.855865921787709, "grad_norm": 0.6294071078300476, "learning_rate": 0.0004087114845938376, "loss": 0.5113, "step": 21222 }, { "epoch": 11.856424581005587, "grad_norm": 0.7095273733139038, "learning_rate": 0.00040868347338935573, "loss": 0.3583, "step": 21223 }, { "epoch": 11.856983240223464, "grad_norm": 0.453199565410614, "learning_rate": 0.000408655462184874, "loss": 0.3652, "step": 21224 }, { "epoch": 11.85754189944134, "grad_norm": 0.64100182056427, "learning_rate": 0.0004086274509803922, "loss": 0.3838, "step": 21225 }, { "epoch": 11.858100558659217, "grad_norm": 0.42354270815849304, "learning_rate": 0.00040859943977591035, "loss": 0.4516, "step": 21226 }, { "epoch": 11.858659217877095, "grad_norm": 0.3842369019985199, "learning_rate": 0.0004085714285714286, "loss": 0.3885, "step": 21227 }, { "epoch": 11.859217877094972, "grad_norm": 0.6809428930282593, "learning_rate": 0.00040854341736694676, "loss": 0.4357, "step": 21228 }, { "epoch": 11.859776536312848, "grad_norm": 0.5415769219398499, "learning_rate": 0.000408515406162465, "loss": 0.5023, "step": 21229 }, { "epoch": 11.860335195530727, "grad_norm": 0.3625841438770294, "learning_rate": 0.0004084873949579832, "loss": 0.329, "step": 21230 }, { "epoch": 11.860893854748603, "grad_norm": 0.5092974305152893, "learning_rate": 0.0004084593837535014, "loss": 0.4724, "step": 21231 }, { "epoch": 11.86145251396648, "grad_norm": 0.6173868179321289, "learning_rate": 0.00040843137254901964, "loss": 0.5882, "step": 21232 }, { "epoch": 11.862011173184358, "grad_norm": 0.331398069858551, "learning_rate": 0.00040840336134453784, "loss": 0.3486, "step": 21233 }, { "epoch": 11.862569832402235, "grad_norm": 0.7325568199157715, "learning_rate": 0.00040837535014005605, "loss": 0.466, "step": 21234 }, { "epoch": 11.863128491620111, "grad_norm": 0.318058580160141, "learning_rate": 0.00040834733893557425, "loss": 0.2464, "step": 21235 }, { "epoch": 11.86368715083799, "grad_norm": 0.7323331832885742, "learning_rate": 0.0004083193277310924, "loss": 0.4964, "step": 21236 }, { "epoch": 11.864245810055866, "grad_norm": 0.5039665102958679, "learning_rate": 0.00040829131652661067, "loss": 0.3552, "step": 21237 }, { "epoch": 11.864804469273743, "grad_norm": 1.4152615070343018, "learning_rate": 0.00040826330532212887, "loss": 0.4769, "step": 21238 }, { "epoch": 11.86536312849162, "grad_norm": 0.324214369058609, "learning_rate": 0.0004082352941176471, "loss": 0.3355, "step": 21239 }, { "epoch": 11.865921787709498, "grad_norm": 2.453305721282959, "learning_rate": 0.0004082072829131653, "loss": 0.4643, "step": 21240 }, { "epoch": 11.866480446927374, "grad_norm": 0.7970339059829712, "learning_rate": 0.0004081792717086835, "loss": 0.6719, "step": 21241 }, { "epoch": 11.867039106145251, "grad_norm": 0.7285621166229248, "learning_rate": 0.0004081512605042017, "loss": 0.3942, "step": 21242 }, { "epoch": 11.86759776536313, "grad_norm": 0.8324102759361267, "learning_rate": 0.0004081232492997199, "loss": 0.535, "step": 21243 }, { "epoch": 11.868156424581006, "grad_norm": 0.7698560953140259, "learning_rate": 0.0004080952380952381, "loss": 0.5079, "step": 21244 }, { "epoch": 11.868715083798882, "grad_norm": 1.0058929920196533, "learning_rate": 0.0004080672268907563, "loss": 0.323, "step": 21245 }, { "epoch": 11.869273743016759, "grad_norm": 0.5254631638526917, "learning_rate": 0.0004080392156862745, "loss": 0.3405, "step": 21246 }, { "epoch": 11.869832402234637, "grad_norm": 0.41968396306037903, "learning_rate": 0.0004080112044817927, "loss": 0.4673, "step": 21247 }, { "epoch": 11.870391061452514, "grad_norm": 0.4429382383823395, "learning_rate": 0.00040798319327731093, "loss": 0.3835, "step": 21248 }, { "epoch": 11.87094972067039, "grad_norm": 0.4723098576068878, "learning_rate": 0.0004079551820728292, "loss": 0.3512, "step": 21249 }, { "epoch": 11.871508379888269, "grad_norm": 0.5303624272346497, "learning_rate": 0.00040792717086834734, "loss": 0.4501, "step": 21250 }, { "epoch": 11.872067039106145, "grad_norm": 1.0976248979568481, "learning_rate": 0.00040789915966386555, "loss": 0.5495, "step": 21251 }, { "epoch": 11.872625698324022, "grad_norm": 0.831189751625061, "learning_rate": 0.00040787114845938376, "loss": 0.4487, "step": 21252 }, { "epoch": 11.8731843575419, "grad_norm": 0.47820669412612915, "learning_rate": 0.00040784313725490196, "loss": 0.4165, "step": 21253 }, { "epoch": 11.873743016759777, "grad_norm": 0.3727686107158661, "learning_rate": 0.0004078151260504202, "loss": 0.4361, "step": 21254 }, { "epoch": 11.874301675977653, "grad_norm": 3.9249637126922607, "learning_rate": 0.0004077871148459384, "loss": 0.3182, "step": 21255 }, { "epoch": 11.87486033519553, "grad_norm": 0.540917158126831, "learning_rate": 0.0004077591036414566, "loss": 0.3422, "step": 21256 }, { "epoch": 11.875418994413408, "grad_norm": 0.6028260588645935, "learning_rate": 0.00040773109243697484, "loss": 0.4084, "step": 21257 }, { "epoch": 11.875977653631285, "grad_norm": 0.842046856880188, "learning_rate": 0.000407703081232493, "loss": 0.5078, "step": 21258 }, { "epoch": 11.876536312849161, "grad_norm": 0.5099661946296692, "learning_rate": 0.0004076750700280112, "loss": 0.3774, "step": 21259 }, { "epoch": 11.87709497206704, "grad_norm": 0.5762701630592346, "learning_rate": 0.0004076470588235294, "loss": 0.4432, "step": 21260 }, { "epoch": 11.877653631284916, "grad_norm": 1.1062768697738647, "learning_rate": 0.0004076190476190476, "loss": 0.4896, "step": 21261 }, { "epoch": 11.878212290502793, "grad_norm": 0.42448118329048157, "learning_rate": 0.00040759103641456587, "loss": 0.3162, "step": 21262 }, { "epoch": 11.878770949720671, "grad_norm": 0.8198091983795166, "learning_rate": 0.000407563025210084, "loss": 0.5053, "step": 21263 }, { "epoch": 11.879329608938548, "grad_norm": 0.612362265586853, "learning_rate": 0.00040753501400560223, "loss": 0.4886, "step": 21264 }, { "epoch": 11.879888268156424, "grad_norm": 0.33380112051963806, "learning_rate": 0.0004075070028011205, "loss": 0.2727, "step": 21265 }, { "epoch": 11.880446927374301, "grad_norm": 1.6635680198669434, "learning_rate": 0.00040747899159663864, "loss": 0.4065, "step": 21266 }, { "epoch": 11.88100558659218, "grad_norm": 1.0300723314285278, "learning_rate": 0.0004074509803921569, "loss": 0.3884, "step": 21267 }, { "epoch": 11.881564245810056, "grad_norm": 0.5811362862586975, "learning_rate": 0.00040742296918767505, "loss": 0.3729, "step": 21268 }, { "epoch": 11.882122905027932, "grad_norm": 1.3218542337417603, "learning_rate": 0.00040739495798319326, "loss": 0.4302, "step": 21269 }, { "epoch": 11.88268156424581, "grad_norm": 0.4359176754951477, "learning_rate": 0.0004073669467787115, "loss": 0.2741, "step": 21270 }, { "epoch": 11.883240223463687, "grad_norm": 0.5745676159858704, "learning_rate": 0.00040733893557422967, "loss": 0.5311, "step": 21271 }, { "epoch": 11.883798882681564, "grad_norm": 0.4885067343711853, "learning_rate": 0.00040731092436974793, "loss": 0.4657, "step": 21272 }, { "epoch": 11.88435754189944, "grad_norm": 0.3968677520751953, "learning_rate": 0.00040728291316526614, "loss": 0.3218, "step": 21273 }, { "epoch": 11.884916201117319, "grad_norm": 0.5480905175209045, "learning_rate": 0.0004072549019607843, "loss": 0.3741, "step": 21274 }, { "epoch": 11.885474860335195, "grad_norm": 1.2292654514312744, "learning_rate": 0.00040722689075630255, "loss": 0.3282, "step": 21275 }, { "epoch": 11.886033519553072, "grad_norm": 0.8086422085762024, "learning_rate": 0.0004071988795518207, "loss": 0.3245, "step": 21276 }, { "epoch": 11.88659217877095, "grad_norm": 1.569286584854126, "learning_rate": 0.00040717086834733896, "loss": 0.4709, "step": 21277 }, { "epoch": 11.887150837988827, "grad_norm": 1.3830845355987549, "learning_rate": 0.00040714285714285717, "loss": 0.3265, "step": 21278 }, { "epoch": 11.887709497206703, "grad_norm": 0.3987477123737335, "learning_rate": 0.0004071148459383753, "loss": 0.4086, "step": 21279 }, { "epoch": 11.888268156424582, "grad_norm": 0.622778594493866, "learning_rate": 0.0004070868347338936, "loss": 0.51, "step": 21280 }, { "epoch": 11.888826815642458, "grad_norm": 0.6695683598518372, "learning_rate": 0.0004070588235294118, "loss": 0.5908, "step": 21281 }, { "epoch": 11.889385474860335, "grad_norm": 0.6142347455024719, "learning_rate": 0.00040703081232493, "loss": 0.48, "step": 21282 }, { "epoch": 11.889944134078211, "grad_norm": 0.6269024014472961, "learning_rate": 0.0004070028011204482, "loss": 0.5048, "step": 21283 }, { "epoch": 11.89050279329609, "grad_norm": 0.4405671954154968, "learning_rate": 0.00040697478991596635, "loss": 0.3683, "step": 21284 }, { "epoch": 11.891061452513966, "grad_norm": 0.46253782510757446, "learning_rate": 0.0004069467787114846, "loss": 0.3849, "step": 21285 }, { "epoch": 11.891620111731843, "grad_norm": 0.8965322375297546, "learning_rate": 0.0004069187675070028, "loss": 0.4877, "step": 21286 }, { "epoch": 11.892178770949721, "grad_norm": 11.137365341186523, "learning_rate": 0.000406890756302521, "loss": 0.5581, "step": 21287 }, { "epoch": 11.892737430167598, "grad_norm": 0.5854904651641846, "learning_rate": 0.0004068627450980392, "loss": 0.3538, "step": 21288 }, { "epoch": 11.893296089385474, "grad_norm": 0.427871972322464, "learning_rate": 0.00040683473389355743, "loss": 0.4615, "step": 21289 }, { "epoch": 11.893854748603353, "grad_norm": 0.40088075399398804, "learning_rate": 0.00040680672268907564, "loss": 0.3706, "step": 21290 }, { "epoch": 11.89441340782123, "grad_norm": 0.6828593611717224, "learning_rate": 0.00040677871148459384, "loss": 0.4686, "step": 21291 }, { "epoch": 11.894972067039106, "grad_norm": 0.9827330708503723, "learning_rate": 0.00040675070028011205, "loss": 0.4403, "step": 21292 }, { "epoch": 11.895530726256982, "grad_norm": 0.4057422876358032, "learning_rate": 0.00040672268907563026, "loss": 0.3575, "step": 21293 }, { "epoch": 11.89608938547486, "grad_norm": 0.5105399489402771, "learning_rate": 0.00040669467787114846, "loss": 0.5119, "step": 21294 }, { "epoch": 11.896648044692737, "grad_norm": 1.131582260131836, "learning_rate": 0.00040666666666666667, "loss": 0.3716, "step": 21295 }, { "epoch": 11.897206703910614, "grad_norm": 0.990439236164093, "learning_rate": 0.0004066386554621849, "loss": 0.3286, "step": 21296 }, { "epoch": 11.897765363128492, "grad_norm": 0.4834293723106384, "learning_rate": 0.00040661064425770313, "loss": 0.5017, "step": 21297 }, { "epoch": 11.898324022346369, "grad_norm": 0.4201735258102417, "learning_rate": 0.0004065826330532213, "loss": 0.3577, "step": 21298 }, { "epoch": 11.898882681564245, "grad_norm": 0.3657025992870331, "learning_rate": 0.0004065546218487395, "loss": 0.4109, "step": 21299 }, { "epoch": 11.899441340782122, "grad_norm": 1.0979835987091064, "learning_rate": 0.0004065266106442577, "loss": 0.3041, "step": 21300 }, { "epoch": 11.9, "grad_norm": 0.5186835527420044, "learning_rate": 0.0004064985994397759, "loss": 0.3369, "step": 21301 }, { "epoch": 11.900558659217877, "grad_norm": 0.5685844421386719, "learning_rate": 0.00040647058823529416, "loss": 0.4358, "step": 21302 }, { "epoch": 11.901117318435753, "grad_norm": 0.5636425018310547, "learning_rate": 0.0004064425770308123, "loss": 0.4475, "step": 21303 }, { "epoch": 11.901675977653632, "grad_norm": 0.4192626178264618, "learning_rate": 0.0004064145658263305, "loss": 0.3539, "step": 21304 }, { "epoch": 11.902234636871508, "grad_norm": 11.530969619750977, "learning_rate": 0.0004063865546218488, "loss": 0.5548, "step": 21305 }, { "epoch": 11.902793296089385, "grad_norm": 3.8384783267974854, "learning_rate": 0.00040635854341736693, "loss": 0.3482, "step": 21306 }, { "epoch": 11.903351955307263, "grad_norm": 1.4641417264938354, "learning_rate": 0.0004063305322128852, "loss": 0.4488, "step": 21307 }, { "epoch": 11.90391061452514, "grad_norm": 0.6003350019454956, "learning_rate": 0.00040630252100840335, "loss": 0.4896, "step": 21308 }, { "epoch": 11.904469273743016, "grad_norm": 0.5757750272750854, "learning_rate": 0.00040627450980392155, "loss": 0.5085, "step": 21309 }, { "epoch": 11.905027932960895, "grad_norm": 0.4466097354888916, "learning_rate": 0.0004062464985994398, "loss": 0.3448, "step": 21310 }, { "epoch": 11.905586592178771, "grad_norm": 16.73419189453125, "learning_rate": 0.00040621848739495796, "loss": 0.4516, "step": 21311 }, { "epoch": 11.906145251396648, "grad_norm": 0.5923506617546082, "learning_rate": 0.0004061904761904762, "loss": 0.4252, "step": 21312 }, { "epoch": 11.906703910614524, "grad_norm": 2.602841377258301, "learning_rate": 0.00040616246498599443, "loss": 0.322, "step": 21313 }, { "epoch": 11.907262569832403, "grad_norm": 1.155871868133545, "learning_rate": 0.0004061344537815126, "loss": 0.4191, "step": 21314 }, { "epoch": 11.90782122905028, "grad_norm": 0.4974883198738098, "learning_rate": 0.00040610644257703084, "loss": 0.4063, "step": 21315 }, { "epoch": 11.908379888268156, "grad_norm": 3.6762585639953613, "learning_rate": 0.000406078431372549, "loss": 0.3998, "step": 21316 }, { "epoch": 11.908938547486034, "grad_norm": 0.6080918908119202, "learning_rate": 0.00040605042016806725, "loss": 0.5263, "step": 21317 }, { "epoch": 11.90949720670391, "grad_norm": 2.4871015548706055, "learning_rate": 0.00040602240896358546, "loss": 0.4439, "step": 21318 }, { "epoch": 11.910055865921787, "grad_norm": 0.39962974190711975, "learning_rate": 0.0004059943977591036, "loss": 0.3624, "step": 21319 }, { "epoch": 11.910614525139664, "grad_norm": 0.4608193635940552, "learning_rate": 0.00040596638655462187, "loss": 0.3423, "step": 21320 }, { "epoch": 11.911173184357542, "grad_norm": 0.41747844219207764, "learning_rate": 0.0004059383753501401, "loss": 0.3799, "step": 21321 }, { "epoch": 11.911731843575419, "grad_norm": 0.624941885471344, "learning_rate": 0.0004059103641456583, "loss": 0.5725, "step": 21322 }, { "epoch": 11.912290502793295, "grad_norm": 0.41783246397972107, "learning_rate": 0.0004058823529411765, "loss": 0.4539, "step": 21323 }, { "epoch": 11.912849162011174, "grad_norm": 0.37872254848480225, "learning_rate": 0.00040585434173669464, "loss": 0.4947, "step": 21324 }, { "epoch": 11.91340782122905, "grad_norm": 0.5078139901161194, "learning_rate": 0.0004058263305322129, "loss": 0.4066, "step": 21325 }, { "epoch": 11.913966480446927, "grad_norm": 0.6185089349746704, "learning_rate": 0.0004057983193277311, "loss": 0.3433, "step": 21326 }, { "epoch": 11.914525139664804, "grad_norm": 0.5614469647407532, "learning_rate": 0.0004057703081232493, "loss": 0.4078, "step": 21327 }, { "epoch": 11.915083798882682, "grad_norm": 0.3830602467060089, "learning_rate": 0.0004057422969187675, "loss": 0.2841, "step": 21328 }, { "epoch": 11.915642458100558, "grad_norm": 0.43531695008277893, "learning_rate": 0.0004057142857142857, "loss": 0.4895, "step": 21329 }, { "epoch": 11.916201117318435, "grad_norm": 1.2865535020828247, "learning_rate": 0.00040568627450980393, "loss": 0.4536, "step": 21330 }, { "epoch": 11.916759776536313, "grad_norm": 0.7823388576507568, "learning_rate": 0.00040565826330532214, "loss": 0.4322, "step": 21331 }, { "epoch": 11.91731843575419, "grad_norm": 0.9831047654151917, "learning_rate": 0.00040563025210084034, "loss": 0.4312, "step": 21332 }, { "epoch": 11.917877094972066, "grad_norm": 0.4631880223751068, "learning_rate": 0.00040560224089635855, "loss": 0.3827, "step": 21333 }, { "epoch": 11.918435754189945, "grad_norm": 0.48576414585113525, "learning_rate": 0.00040557422969187676, "loss": 0.4894, "step": 21334 }, { "epoch": 11.918994413407821, "grad_norm": 0.5551460981369019, "learning_rate": 0.00040554621848739496, "loss": 0.4013, "step": 21335 }, { "epoch": 11.919553072625698, "grad_norm": 0.39262422919273376, "learning_rate": 0.00040551820728291317, "loss": 0.331, "step": 21336 }, { "epoch": 11.920111731843576, "grad_norm": 0.4389094412326813, "learning_rate": 0.00040549019607843143, "loss": 0.3676, "step": 21337 }, { "epoch": 11.920670391061453, "grad_norm": 0.4645405411720276, "learning_rate": 0.0004054621848739496, "loss": 0.522, "step": 21338 }, { "epoch": 11.92122905027933, "grad_norm": 0.6992455124855042, "learning_rate": 0.0004054341736694678, "loss": 0.4609, "step": 21339 }, { "epoch": 11.921787709497206, "grad_norm": 1.2163187265396118, "learning_rate": 0.000405406162464986, "loss": 0.4299, "step": 21340 }, { "epoch": 11.922346368715084, "grad_norm": 0.548793375492096, "learning_rate": 0.0004053781512605042, "loss": 0.544, "step": 21341 }, { "epoch": 11.922905027932961, "grad_norm": 0.9195559024810791, "learning_rate": 0.00040535014005602246, "loss": 0.4118, "step": 21342 }, { "epoch": 11.923463687150837, "grad_norm": 0.4788474440574646, "learning_rate": 0.0004053221288515406, "loss": 0.4488, "step": 21343 }, { "epoch": 11.924022346368716, "grad_norm": 0.5037703514099121, "learning_rate": 0.0004052941176470588, "loss": 0.3988, "step": 21344 }, { "epoch": 11.924581005586592, "grad_norm": 0.3578023314476013, "learning_rate": 0.0004052661064425771, "loss": 0.4265, "step": 21345 }, { "epoch": 11.925139664804469, "grad_norm": 0.6640678644180298, "learning_rate": 0.00040523809523809523, "loss": 0.4028, "step": 21346 }, { "epoch": 11.925698324022346, "grad_norm": 0.41473472118377686, "learning_rate": 0.0004052100840336135, "loss": 0.4429, "step": 21347 }, { "epoch": 11.926256983240224, "grad_norm": 0.44315069913864136, "learning_rate": 0.00040518207282913164, "loss": 0.3701, "step": 21348 }, { "epoch": 11.9268156424581, "grad_norm": 0.43728509545326233, "learning_rate": 0.00040515406162464985, "loss": 0.4413, "step": 21349 }, { "epoch": 11.927374301675977, "grad_norm": 0.7392879128456116, "learning_rate": 0.0004051260504201681, "loss": 0.4249, "step": 21350 }, { "epoch": 11.927932960893855, "grad_norm": 0.4067839980125427, "learning_rate": 0.00040509803921568626, "loss": 0.3119, "step": 21351 }, { "epoch": 11.928491620111732, "grad_norm": 0.4650818407535553, "learning_rate": 0.0004050700280112045, "loss": 0.458, "step": 21352 }, { "epoch": 11.929050279329608, "grad_norm": 0.9499150514602661, "learning_rate": 0.0004050420168067227, "loss": 0.4244, "step": 21353 }, { "epoch": 11.929608938547487, "grad_norm": 0.5892748832702637, "learning_rate": 0.0004050140056022409, "loss": 0.423, "step": 21354 }, { "epoch": 11.930167597765363, "grad_norm": 0.6039199233055115, "learning_rate": 0.00040498599439775914, "loss": 0.5248, "step": 21355 }, { "epoch": 11.93072625698324, "grad_norm": 0.9062708616256714, "learning_rate": 0.0004049579831932773, "loss": 0.5105, "step": 21356 }, { "epoch": 11.931284916201117, "grad_norm": 0.5044767260551453, "learning_rate": 0.00040492997198879555, "loss": 0.3958, "step": 21357 }, { "epoch": 11.931843575418995, "grad_norm": 0.7158891558647156, "learning_rate": 0.00040490196078431375, "loss": 0.513, "step": 21358 }, { "epoch": 11.932402234636871, "grad_norm": 2.45357084274292, "learning_rate": 0.0004048739495798319, "loss": 0.3832, "step": 21359 }, { "epoch": 11.932960893854748, "grad_norm": 0.657284677028656, "learning_rate": 0.00040484593837535017, "loss": 0.4027, "step": 21360 }, { "epoch": 11.933519553072626, "grad_norm": 1.0045700073242188, "learning_rate": 0.00040481792717086837, "loss": 0.552, "step": 21361 }, { "epoch": 11.934078212290503, "grad_norm": 0.3644641935825348, "learning_rate": 0.0004047899159663866, "loss": 0.3589, "step": 21362 }, { "epoch": 11.93463687150838, "grad_norm": 0.9093109965324402, "learning_rate": 0.0004047619047619048, "loss": 0.4456, "step": 21363 }, { "epoch": 11.935195530726258, "grad_norm": 0.4375036656856537, "learning_rate": 0.00040473389355742294, "loss": 0.4686, "step": 21364 }, { "epoch": 11.935754189944134, "grad_norm": 0.4587719142436981, "learning_rate": 0.0004047058823529412, "loss": 0.4899, "step": 21365 }, { "epoch": 11.936312849162011, "grad_norm": 0.6220703125, "learning_rate": 0.0004046778711484594, "loss": 0.4008, "step": 21366 }, { "epoch": 11.936871508379888, "grad_norm": 1.466020107269287, "learning_rate": 0.00040464985994397755, "loss": 0.4538, "step": 21367 }, { "epoch": 11.937430167597766, "grad_norm": 0.5692799091339111, "learning_rate": 0.0004046218487394958, "loss": 0.5368, "step": 21368 }, { "epoch": 11.937988826815642, "grad_norm": 0.45667150616645813, "learning_rate": 0.000404593837535014, "loss": 0.4653, "step": 21369 }, { "epoch": 11.938547486033519, "grad_norm": 0.4480198919773102, "learning_rate": 0.0004045658263305322, "loss": 0.5272, "step": 21370 }, { "epoch": 11.939106145251397, "grad_norm": 1.0969327688217163, "learning_rate": 0.00040453781512605043, "loss": 0.3963, "step": 21371 }, { "epoch": 11.939664804469274, "grad_norm": 0.6369842886924744, "learning_rate": 0.0004045098039215686, "loss": 0.5154, "step": 21372 }, { "epoch": 11.94022346368715, "grad_norm": 0.5188466310501099, "learning_rate": 0.00040448179271708684, "loss": 0.4026, "step": 21373 }, { "epoch": 11.940782122905027, "grad_norm": 0.471169650554657, "learning_rate": 0.00040445378151260505, "loss": 0.4585, "step": 21374 }, { "epoch": 11.941340782122905, "grad_norm": 0.44957196712493896, "learning_rate": 0.00040442577030812326, "loss": 0.49, "step": 21375 }, { "epoch": 11.941899441340782, "grad_norm": 5.944665431976318, "learning_rate": 0.00040439775910364146, "loss": 0.447, "step": 21376 }, { "epoch": 11.942458100558659, "grad_norm": 0.48717060685157776, "learning_rate": 0.00040436974789915967, "loss": 0.3551, "step": 21377 }, { "epoch": 11.943016759776537, "grad_norm": 0.6072143912315369, "learning_rate": 0.0004043417366946779, "loss": 0.4667, "step": 21378 }, { "epoch": 11.943575418994413, "grad_norm": 1.032568335533142, "learning_rate": 0.0004043137254901961, "loss": 0.3677, "step": 21379 }, { "epoch": 11.94413407821229, "grad_norm": 0.4335152208805084, "learning_rate": 0.0004042857142857143, "loss": 0.4538, "step": 21380 }, { "epoch": 11.944692737430168, "grad_norm": 0.4862251877784729, "learning_rate": 0.0004042577030812325, "loss": 0.5147, "step": 21381 }, { "epoch": 11.945251396648045, "grad_norm": 0.840763509273529, "learning_rate": 0.0004042296918767507, "loss": 0.4164, "step": 21382 }, { "epoch": 11.945810055865921, "grad_norm": 0.4739285111427307, "learning_rate": 0.0004042016806722689, "loss": 0.4818, "step": 21383 }, { "epoch": 11.946368715083798, "grad_norm": 23.829708099365234, "learning_rate": 0.0004041736694677871, "loss": 0.4786, "step": 21384 }, { "epoch": 11.946927374301676, "grad_norm": 0.6557050943374634, "learning_rate": 0.00040414565826330537, "loss": 0.4389, "step": 21385 }, { "epoch": 11.947486033519553, "grad_norm": 0.3654273748397827, "learning_rate": 0.0004041176470588235, "loss": 0.3677, "step": 21386 }, { "epoch": 11.94804469273743, "grad_norm": 0.552470862865448, "learning_rate": 0.00040408963585434173, "loss": 0.4975, "step": 21387 }, { "epoch": 11.948603351955308, "grad_norm": 0.39677512645721436, "learning_rate": 0.00040406162464985993, "loss": 0.4021, "step": 21388 }, { "epoch": 11.949162011173184, "grad_norm": 0.4119769334793091, "learning_rate": 0.00040403361344537814, "loss": 0.4177, "step": 21389 }, { "epoch": 11.949720670391061, "grad_norm": 0.40992507338523865, "learning_rate": 0.0004040056022408964, "loss": 0.427, "step": 21390 }, { "epoch": 11.95027932960894, "grad_norm": 0.8193026781082153, "learning_rate": 0.00040397759103641455, "loss": 0.4525, "step": 21391 }, { "epoch": 11.950837988826816, "grad_norm": 0.4050707519054413, "learning_rate": 0.00040394957983193276, "loss": 0.4839, "step": 21392 }, { "epoch": 11.951396648044692, "grad_norm": 0.4837300181388855, "learning_rate": 0.000403921568627451, "loss": 0.3684, "step": 21393 }, { "epoch": 11.951955307262569, "grad_norm": 0.38019701838493347, "learning_rate": 0.00040389355742296917, "loss": 0.3643, "step": 21394 }, { "epoch": 11.952513966480447, "grad_norm": 0.535209059715271, "learning_rate": 0.00040386554621848743, "loss": 0.3914, "step": 21395 }, { "epoch": 11.953072625698324, "grad_norm": 0.5260859727859497, "learning_rate": 0.0004038375350140056, "loss": 0.4598, "step": 21396 }, { "epoch": 11.9536312849162, "grad_norm": 0.870768666267395, "learning_rate": 0.0004038095238095238, "loss": 0.3948, "step": 21397 }, { "epoch": 11.954189944134079, "grad_norm": 0.44812509417533875, "learning_rate": 0.00040378151260504205, "loss": 0.4447, "step": 21398 }, { "epoch": 11.954748603351955, "grad_norm": 0.49573367834091187, "learning_rate": 0.0004037535014005602, "loss": 0.404, "step": 21399 }, { "epoch": 11.955307262569832, "grad_norm": 0.3819088041782379, "learning_rate": 0.00040372549019607846, "loss": 0.4229, "step": 21400 }, { "epoch": 11.955865921787709, "grad_norm": 0.7327035665512085, "learning_rate": 0.00040369747899159667, "loss": 0.4963, "step": 21401 }, { "epoch": 11.956424581005587, "grad_norm": 0.7574234008789062, "learning_rate": 0.0004036694677871148, "loss": 0.3949, "step": 21402 }, { "epoch": 11.956983240223463, "grad_norm": 0.5388967394828796, "learning_rate": 0.0004036414565826331, "loss": 0.4076, "step": 21403 }, { "epoch": 11.95754189944134, "grad_norm": 1.0600173473358154, "learning_rate": 0.00040361344537815123, "loss": 0.4323, "step": 21404 }, { "epoch": 11.958100558659218, "grad_norm": 0.3955949544906616, "learning_rate": 0.0004035854341736695, "loss": 0.3476, "step": 21405 }, { "epoch": 11.958659217877095, "grad_norm": 0.7843801975250244, "learning_rate": 0.0004035574229691877, "loss": 0.5156, "step": 21406 }, { "epoch": 11.959217877094972, "grad_norm": 0.563435971736908, "learning_rate": 0.00040352941176470585, "loss": 0.4189, "step": 21407 }, { "epoch": 11.95977653631285, "grad_norm": 0.8962976336479187, "learning_rate": 0.0004035014005602241, "loss": 0.3651, "step": 21408 }, { "epoch": 11.960335195530726, "grad_norm": 0.659602165222168, "learning_rate": 0.0004034733893557423, "loss": 0.4733, "step": 21409 }, { "epoch": 11.960893854748603, "grad_norm": 0.6104586720466614, "learning_rate": 0.0004034453781512605, "loss": 0.3869, "step": 21410 }, { "epoch": 11.961452513966481, "grad_norm": 0.952088475227356, "learning_rate": 0.0004034173669467787, "loss": 0.5045, "step": 21411 }, { "epoch": 11.962011173184358, "grad_norm": 1.0782089233398438, "learning_rate": 0.0004033893557422969, "loss": 0.4553, "step": 21412 }, { "epoch": 11.962569832402234, "grad_norm": 0.8350033164024353, "learning_rate": 0.00040336134453781514, "loss": 0.5616, "step": 21413 }, { "epoch": 11.963128491620111, "grad_norm": 1.7913035154342651, "learning_rate": 0.00040333333333333334, "loss": 0.571, "step": 21414 }, { "epoch": 11.96368715083799, "grad_norm": 0.5928846597671509, "learning_rate": 0.00040330532212885155, "loss": 0.4653, "step": 21415 }, { "epoch": 11.964245810055866, "grad_norm": 0.3542138934135437, "learning_rate": 0.00040327731092436976, "loss": 0.3622, "step": 21416 }, { "epoch": 11.964804469273743, "grad_norm": 0.855762243270874, "learning_rate": 0.00040324929971988796, "loss": 0.3851, "step": 21417 }, { "epoch": 11.96536312849162, "grad_norm": 1.241692066192627, "learning_rate": 0.00040322128851540617, "loss": 0.3584, "step": 21418 }, { "epoch": 11.965921787709497, "grad_norm": 0.5246260762214661, "learning_rate": 0.0004031932773109244, "loss": 0.4798, "step": 21419 }, { "epoch": 11.966480446927374, "grad_norm": 0.5350035429000854, "learning_rate": 0.00040316526610644263, "loss": 0.424, "step": 21420 }, { "epoch": 11.96703910614525, "grad_norm": 0.48071035742759705, "learning_rate": 0.0004031372549019608, "loss": 0.4337, "step": 21421 }, { "epoch": 11.967597765363129, "grad_norm": 0.6035120487213135, "learning_rate": 0.000403109243697479, "loss": 0.4308, "step": 21422 }, { "epoch": 11.968156424581005, "grad_norm": 0.481770783662796, "learning_rate": 0.0004030812324929972, "loss": 0.4139, "step": 21423 }, { "epoch": 11.968715083798882, "grad_norm": 0.6137310266494751, "learning_rate": 0.0004030532212885154, "loss": 0.5393, "step": 21424 }, { "epoch": 11.96927374301676, "grad_norm": 0.5707393288612366, "learning_rate": 0.00040302521008403366, "loss": 0.3689, "step": 21425 }, { "epoch": 11.969832402234637, "grad_norm": 0.6308573484420776, "learning_rate": 0.0004029971988795518, "loss": 0.4094, "step": 21426 }, { "epoch": 11.970391061452514, "grad_norm": 0.6052660942077637, "learning_rate": 0.00040296918767507, "loss": 0.3737, "step": 21427 }, { "epoch": 11.970949720670392, "grad_norm": 0.5102715492248535, "learning_rate": 0.0004029411764705883, "loss": 0.4508, "step": 21428 }, { "epoch": 11.971508379888268, "grad_norm": 0.4994828999042511, "learning_rate": 0.00040291316526610643, "loss": 0.3651, "step": 21429 }, { "epoch": 11.972067039106145, "grad_norm": 0.48235514760017395, "learning_rate": 0.0004028851540616247, "loss": 0.4765, "step": 21430 }, { "epoch": 11.972625698324022, "grad_norm": 0.6641901135444641, "learning_rate": 0.00040285714285714285, "loss": 0.4346, "step": 21431 }, { "epoch": 11.9731843575419, "grad_norm": 0.5821069478988647, "learning_rate": 0.00040282913165266105, "loss": 0.526, "step": 21432 }, { "epoch": 11.973743016759776, "grad_norm": 0.9090389609336853, "learning_rate": 0.0004028011204481793, "loss": 0.308, "step": 21433 }, { "epoch": 11.974301675977653, "grad_norm": 0.5727768540382385, "learning_rate": 0.00040277310924369746, "loss": 0.4178, "step": 21434 }, { "epoch": 11.974860335195531, "grad_norm": 0.4237106442451477, "learning_rate": 0.0004027450980392157, "loss": 0.3878, "step": 21435 }, { "epoch": 11.975418994413408, "grad_norm": 0.4511132538318634, "learning_rate": 0.00040271708683473393, "loss": 0.4352, "step": 21436 }, { "epoch": 11.975977653631285, "grad_norm": 0.9254518747329712, "learning_rate": 0.0004026890756302521, "loss": 0.494, "step": 21437 }, { "epoch": 11.976536312849163, "grad_norm": 1.2570074796676636, "learning_rate": 0.00040266106442577034, "loss": 0.4191, "step": 21438 }, { "epoch": 11.97709497206704, "grad_norm": 0.35035431385040283, "learning_rate": 0.0004026330532212885, "loss": 0.401, "step": 21439 }, { "epoch": 11.977653631284916, "grad_norm": 0.5531349778175354, "learning_rate": 0.00040260504201680675, "loss": 0.5512, "step": 21440 }, { "epoch": 11.978212290502793, "grad_norm": 0.7655909061431885, "learning_rate": 0.00040257703081232496, "loss": 0.563, "step": 21441 }, { "epoch": 11.978770949720671, "grad_norm": 0.44826754927635193, "learning_rate": 0.0004025490196078431, "loss": 0.4718, "step": 21442 }, { "epoch": 11.979329608938547, "grad_norm": 0.5259056091308594, "learning_rate": 0.00040252100840336137, "loss": 0.4015, "step": 21443 }, { "epoch": 11.979888268156424, "grad_norm": 0.5748715400695801, "learning_rate": 0.0004024929971988796, "loss": 0.4365, "step": 21444 }, { "epoch": 11.980446927374302, "grad_norm": 0.8144130706787109, "learning_rate": 0.0004024649859943978, "loss": 0.3662, "step": 21445 }, { "epoch": 11.981005586592179, "grad_norm": 0.3493744432926178, "learning_rate": 0.000402436974789916, "loss": 0.379, "step": 21446 }, { "epoch": 11.981564245810056, "grad_norm": 0.7372487187385559, "learning_rate": 0.00040240896358543414, "loss": 0.6159, "step": 21447 }, { "epoch": 11.982122905027932, "grad_norm": 1.4101358652114868, "learning_rate": 0.0004023809523809524, "loss": 0.3666, "step": 21448 }, { "epoch": 11.98268156424581, "grad_norm": 8.36441421508789, "learning_rate": 0.0004023529411764706, "loss": 0.349, "step": 21449 }, { "epoch": 11.983240223463687, "grad_norm": 0.5477078557014465, "learning_rate": 0.0004023249299719888, "loss": 0.445, "step": 21450 }, { "epoch": 11.983798882681564, "grad_norm": 0.5610117316246033, "learning_rate": 0.000402296918767507, "loss": 0.3944, "step": 21451 }, { "epoch": 11.984357541899442, "grad_norm": 0.3802027702331543, "learning_rate": 0.0004022689075630252, "loss": 0.3331, "step": 21452 }, { "epoch": 11.984916201117318, "grad_norm": 0.58562171459198, "learning_rate": 0.00040224089635854343, "loss": 0.4818, "step": 21453 }, { "epoch": 11.985474860335195, "grad_norm": 0.4802526831626892, "learning_rate": 0.00040221288515406164, "loss": 0.5043, "step": 21454 }, { "epoch": 11.986033519553073, "grad_norm": 0.9251011610031128, "learning_rate": 0.00040218487394957984, "loss": 0.4765, "step": 21455 }, { "epoch": 11.98659217877095, "grad_norm": 0.7091112732887268, "learning_rate": 0.00040215686274509805, "loss": 0.478, "step": 21456 }, { "epoch": 11.987150837988827, "grad_norm": 0.9871140718460083, "learning_rate": 0.00040212885154061626, "loss": 0.4323, "step": 21457 }, { "epoch": 11.987709497206703, "grad_norm": 0.9973250031471252, "learning_rate": 0.00040210084033613446, "loss": 0.3803, "step": 21458 }, { "epoch": 11.988268156424581, "grad_norm": 0.6726212501525879, "learning_rate": 0.00040207282913165267, "loss": 0.4414, "step": 21459 }, { "epoch": 11.988826815642458, "grad_norm": 0.35632383823394775, "learning_rate": 0.00040204481792717093, "loss": 0.3801, "step": 21460 }, { "epoch": 11.989385474860335, "grad_norm": 0.5990647673606873, "learning_rate": 0.0004020168067226891, "loss": 0.5631, "step": 21461 }, { "epoch": 11.989944134078213, "grad_norm": 1.3093101978302002, "learning_rate": 0.0004019887955182073, "loss": 0.4107, "step": 21462 }, { "epoch": 11.99050279329609, "grad_norm": 0.6561041474342346, "learning_rate": 0.0004019607843137255, "loss": 0.6508, "step": 21463 }, { "epoch": 11.991061452513966, "grad_norm": 0.5097846984863281, "learning_rate": 0.0004019327731092437, "loss": 0.4593, "step": 21464 }, { "epoch": 11.991620111731844, "grad_norm": 0.41337135434150696, "learning_rate": 0.00040190476190476196, "loss": 0.5292, "step": 21465 }, { "epoch": 11.992178770949721, "grad_norm": 0.46019884943962097, "learning_rate": 0.0004018767507002801, "loss": 0.3881, "step": 21466 }, { "epoch": 11.992737430167598, "grad_norm": 0.5083510875701904, "learning_rate": 0.0004018487394957983, "loss": 0.4658, "step": 21467 }, { "epoch": 11.993296089385474, "grad_norm": 0.43040454387664795, "learning_rate": 0.0004018207282913166, "loss": 0.404, "step": 21468 }, { "epoch": 11.993854748603352, "grad_norm": 0.7379305958747864, "learning_rate": 0.0004017927170868347, "loss": 0.3714, "step": 21469 }, { "epoch": 11.994413407821229, "grad_norm": 0.49640461802482605, "learning_rate": 0.000401764705882353, "loss": 0.411, "step": 21470 }, { "epoch": 11.994972067039106, "grad_norm": 0.8307761549949646, "learning_rate": 0.00040173669467787114, "loss": 0.3684, "step": 21471 }, { "epoch": 11.995530726256984, "grad_norm": 1.5874541997909546, "learning_rate": 0.00040170868347338935, "loss": 0.3373, "step": 21472 }, { "epoch": 11.99608938547486, "grad_norm": 0.43602287769317627, "learning_rate": 0.0004016806722689076, "loss": 0.4466, "step": 21473 }, { "epoch": 11.996648044692737, "grad_norm": 0.5021793246269226, "learning_rate": 0.00040165266106442576, "loss": 0.3648, "step": 21474 }, { "epoch": 11.997206703910614, "grad_norm": 0.47018346190452576, "learning_rate": 0.000401624649859944, "loss": 0.4914, "step": 21475 }, { "epoch": 11.997765363128492, "grad_norm": 0.8784552812576294, "learning_rate": 0.0004015966386554622, "loss": 0.5085, "step": 21476 }, { "epoch": 11.998324022346369, "grad_norm": 0.5817292928695679, "learning_rate": 0.0004015686274509804, "loss": 0.4076, "step": 21477 }, { "epoch": 11.998882681564245, "grad_norm": 0.6286437511444092, "learning_rate": 0.00040154061624649864, "loss": 0.4085, "step": 21478 }, { "epoch": 11.999441340782123, "grad_norm": 0.47691115736961365, "learning_rate": 0.0004015126050420168, "loss": 0.4291, "step": 21479 }, { "epoch": 12.0, "grad_norm": 0.44007375836372375, "learning_rate": 0.000401484593837535, "loss": 0.3999, "step": 21480 }, { "epoch": 12.000558659217877, "grad_norm": 4.054800510406494, "learning_rate": 0.00040145658263305325, "loss": 0.4104, "step": 21481 }, { "epoch": 12.001117318435755, "grad_norm": 0.5809652805328369, "learning_rate": 0.0004014285714285714, "loss": 0.4753, "step": 21482 }, { "epoch": 12.001675977653631, "grad_norm": 7.96524715423584, "learning_rate": 0.00040140056022408967, "loss": 0.4473, "step": 21483 }, { "epoch": 12.002234636871508, "grad_norm": 0.8553932905197144, "learning_rate": 0.00040137254901960787, "loss": 0.4088, "step": 21484 }, { "epoch": 12.002793296089385, "grad_norm": 1.017467975616455, "learning_rate": 0.000401344537815126, "loss": 0.4812, "step": 21485 }, { "epoch": 12.003351955307263, "grad_norm": 0.414492666721344, "learning_rate": 0.0004013165266106443, "loss": 0.3337, "step": 21486 }, { "epoch": 12.00391061452514, "grad_norm": 1.8006995916366577, "learning_rate": 0.00040128851540616243, "loss": 0.5128, "step": 21487 }, { "epoch": 12.004469273743016, "grad_norm": 0.5660163164138794, "learning_rate": 0.0004012605042016807, "loss": 0.3852, "step": 21488 }, { "epoch": 12.005027932960894, "grad_norm": 0.5661418437957764, "learning_rate": 0.0004012324929971989, "loss": 0.3802, "step": 21489 }, { "epoch": 12.005586592178771, "grad_norm": 0.6057156324386597, "learning_rate": 0.00040120448179271705, "loss": 0.3801, "step": 21490 }, { "epoch": 12.006145251396648, "grad_norm": 0.5141134262084961, "learning_rate": 0.0004011764705882353, "loss": 0.5154, "step": 21491 }, { "epoch": 12.006703910614526, "grad_norm": 0.8683204054832458, "learning_rate": 0.0004011484593837535, "loss": 0.4275, "step": 21492 }, { "epoch": 12.007262569832402, "grad_norm": 0.576714277267456, "learning_rate": 0.0004011204481792717, "loss": 0.3941, "step": 21493 }, { "epoch": 12.007821229050279, "grad_norm": 0.6502825021743774, "learning_rate": 0.00040109243697478993, "loss": 0.5747, "step": 21494 }, { "epoch": 12.008379888268156, "grad_norm": 0.5278416275978088, "learning_rate": 0.0004010644257703081, "loss": 0.4673, "step": 21495 }, { "epoch": 12.008938547486034, "grad_norm": 0.3412959575653076, "learning_rate": 0.00040103641456582634, "loss": 0.3165, "step": 21496 }, { "epoch": 12.00949720670391, "grad_norm": 0.6991682052612305, "learning_rate": 0.00040100840336134455, "loss": 0.4227, "step": 21497 }, { "epoch": 12.010055865921787, "grad_norm": 1.3554376363754272, "learning_rate": 0.00040098039215686276, "loss": 0.4333, "step": 21498 }, { "epoch": 12.010614525139665, "grad_norm": 0.7261898517608643, "learning_rate": 0.00040095238095238096, "loss": 0.4795, "step": 21499 }, { "epoch": 12.011173184357542, "grad_norm": 0.6800056099891663, "learning_rate": 0.00040092436974789917, "loss": 0.3799, "step": 21500 }, { "epoch": 12.011173184357542, "eval_cer": 0.08738393724155237, "eval_loss": 0.33092448115348816, "eval_runtime": 55.623, "eval_samples_per_second": 81.585, "eval_steps_per_second": 5.106, "eval_wer": 0.34373341526772994, "step": 21500 }, { "epoch": 12.011731843575419, "grad_norm": 3.178755283355713, "learning_rate": 0.0004008963585434174, "loss": 0.4439, "step": 21501 }, { "epoch": 12.012290502793297, "grad_norm": 0.40753012895584106, "learning_rate": 0.0004008683473389356, "loss": 0.3713, "step": 21502 }, { "epoch": 12.012849162011173, "grad_norm": 0.44675058126449585, "learning_rate": 0.0004008403361344538, "loss": 0.3506, "step": 21503 }, { "epoch": 12.01340782122905, "grad_norm": 0.8386898040771484, "learning_rate": 0.000400812324929972, "loss": 0.5338, "step": 21504 }, { "epoch": 12.013966480446927, "grad_norm": 0.3318725526332855, "learning_rate": 0.0004007843137254902, "loss": 0.291, "step": 21505 }, { "epoch": 12.014525139664805, "grad_norm": 0.6348558068275452, "learning_rate": 0.0004007563025210084, "loss": 0.5524, "step": 21506 }, { "epoch": 12.015083798882682, "grad_norm": 1.133283257484436, "learning_rate": 0.0004007282913165266, "loss": 0.5196, "step": 21507 }, { "epoch": 12.015642458100558, "grad_norm": 0.4056191146373749, "learning_rate": 0.00040070028011204487, "loss": 0.2897, "step": 21508 }, { "epoch": 12.016201117318436, "grad_norm": 0.5457218289375305, "learning_rate": 0.000400672268907563, "loss": 0.3484, "step": 21509 }, { "epoch": 12.016759776536313, "grad_norm": 0.4435984790325165, "learning_rate": 0.0004006442577030812, "loss": 0.4898, "step": 21510 }, { "epoch": 12.01731843575419, "grad_norm": 0.3934905529022217, "learning_rate": 0.00040061624649859943, "loss": 0.4794, "step": 21511 }, { "epoch": 12.017877094972068, "grad_norm": 1.8118194341659546, "learning_rate": 0.00040058823529411764, "loss": 0.3515, "step": 21512 }, { "epoch": 12.018435754189944, "grad_norm": 0.626777172088623, "learning_rate": 0.0004005602240896359, "loss": 0.4214, "step": 21513 }, { "epoch": 12.018994413407821, "grad_norm": 1.8108199834823608, "learning_rate": 0.00040053221288515405, "loss": 0.5368, "step": 21514 }, { "epoch": 12.019553072625698, "grad_norm": 0.5234801173210144, "learning_rate": 0.00040050420168067226, "loss": 0.5102, "step": 21515 }, { "epoch": 12.020111731843576, "grad_norm": 0.612450122833252, "learning_rate": 0.0004004761904761905, "loss": 0.3754, "step": 21516 }, { "epoch": 12.020670391061453, "grad_norm": 0.5905392169952393, "learning_rate": 0.00040044817927170867, "loss": 0.4769, "step": 21517 }, { "epoch": 12.021229050279329, "grad_norm": 0.5143051147460938, "learning_rate": 0.00040042016806722693, "loss": 0.371, "step": 21518 }, { "epoch": 12.021787709497207, "grad_norm": 0.8932301998138428, "learning_rate": 0.0004003921568627451, "loss": 0.4304, "step": 21519 }, { "epoch": 12.022346368715084, "grad_norm": 0.407192200422287, "learning_rate": 0.0004003641456582633, "loss": 0.3741, "step": 21520 }, { "epoch": 12.02290502793296, "grad_norm": 2.492185115814209, "learning_rate": 0.00040033613445378155, "loss": 0.3627, "step": 21521 }, { "epoch": 12.023463687150837, "grad_norm": 0.5570883750915527, "learning_rate": 0.0004003081232492997, "loss": 0.4403, "step": 21522 }, { "epoch": 12.024022346368715, "grad_norm": 0.6157993674278259, "learning_rate": 0.00040028011204481796, "loss": 0.3439, "step": 21523 }, { "epoch": 12.024581005586592, "grad_norm": 0.5208712220191956, "learning_rate": 0.00040025210084033617, "loss": 0.6999, "step": 21524 }, { "epoch": 12.025139664804469, "grad_norm": 0.46682968735694885, "learning_rate": 0.0004002240896358543, "loss": 0.4223, "step": 21525 }, { "epoch": 12.025698324022347, "grad_norm": 0.6710468530654907, "learning_rate": 0.0004001960784313726, "loss": 0.3785, "step": 21526 }, { "epoch": 12.026256983240224, "grad_norm": 0.38457804918289185, "learning_rate": 0.00040016806722689073, "loss": 0.4177, "step": 21527 }, { "epoch": 12.0268156424581, "grad_norm": 0.7539198994636536, "learning_rate": 0.000400140056022409, "loss": 0.5561, "step": 21528 }, { "epoch": 12.027374301675978, "grad_norm": 1.5708000659942627, "learning_rate": 0.0004001120448179272, "loss": 0.4673, "step": 21529 }, { "epoch": 12.027932960893855, "grad_norm": 0.4048934876918793, "learning_rate": 0.00040008403361344535, "loss": 0.5231, "step": 21530 }, { "epoch": 12.028491620111732, "grad_norm": 0.3864879608154297, "learning_rate": 0.0004000560224089636, "loss": 0.3949, "step": 21531 }, { "epoch": 12.029050279329608, "grad_norm": 0.38573333621025085, "learning_rate": 0.0004000280112044818, "loss": 0.3977, "step": 21532 }, { "epoch": 12.029608938547486, "grad_norm": 0.5003073811531067, "learning_rate": 0.0004, "loss": 0.4605, "step": 21533 }, { "epoch": 12.030167597765363, "grad_norm": 6.38754415512085, "learning_rate": 0.0003999719887955182, "loss": 0.4038, "step": 21534 }, { "epoch": 12.03072625698324, "grad_norm": 11.37295913696289, "learning_rate": 0.0003999439775910364, "loss": 0.4571, "step": 21535 }, { "epoch": 12.031284916201118, "grad_norm": 4.7879438400268555, "learning_rate": 0.00039991596638655464, "loss": 0.4373, "step": 21536 }, { "epoch": 12.031843575418995, "grad_norm": 0.9730218648910522, "learning_rate": 0.00039988795518207284, "loss": 0.3795, "step": 21537 }, { "epoch": 12.032402234636871, "grad_norm": 0.5231620073318481, "learning_rate": 0.00039985994397759105, "loss": 0.4207, "step": 21538 }, { "epoch": 12.03296089385475, "grad_norm": 0.5141884684562683, "learning_rate": 0.00039983193277310926, "loss": 0.3889, "step": 21539 }, { "epoch": 12.033519553072626, "grad_norm": 0.4972028434276581, "learning_rate": 0.00039980392156862746, "loss": 0.4673, "step": 21540 }, { "epoch": 12.034078212290503, "grad_norm": 0.9289790987968445, "learning_rate": 0.00039977591036414567, "loss": 0.4573, "step": 21541 }, { "epoch": 12.03463687150838, "grad_norm": 3.2927439212799072, "learning_rate": 0.0003997478991596639, "loss": 0.3546, "step": 21542 }, { "epoch": 12.035195530726257, "grad_norm": 0.7546213269233704, "learning_rate": 0.0003997198879551821, "loss": 0.4986, "step": 21543 }, { "epoch": 12.035754189944134, "grad_norm": 0.4614641070365906, "learning_rate": 0.0003996918767507003, "loss": 0.4107, "step": 21544 }, { "epoch": 12.03631284916201, "grad_norm": 1.9791784286499023, "learning_rate": 0.0003996638655462185, "loss": 0.4615, "step": 21545 }, { "epoch": 12.036871508379889, "grad_norm": 0.5705390572547913, "learning_rate": 0.0003996358543417367, "loss": 0.4557, "step": 21546 }, { "epoch": 12.037430167597766, "grad_norm": 0.6609106659889221, "learning_rate": 0.0003996078431372549, "loss": 0.5273, "step": 21547 }, { "epoch": 12.037988826815642, "grad_norm": 1.7435355186462402, "learning_rate": 0.00039957983193277316, "loss": 0.4297, "step": 21548 }, { "epoch": 12.03854748603352, "grad_norm": 0.6595619320869446, "learning_rate": 0.0003995518207282913, "loss": 0.5128, "step": 21549 }, { "epoch": 12.039106145251397, "grad_norm": 0.5085327625274658, "learning_rate": 0.0003995238095238095, "loss": 0.4025, "step": 21550 }, { "epoch": 12.039664804469274, "grad_norm": 0.43308913707733154, "learning_rate": 0.0003994957983193277, "loss": 0.3433, "step": 21551 }, { "epoch": 12.04022346368715, "grad_norm": 0.5915241837501526, "learning_rate": 0.00039946778711484593, "loss": 0.3883, "step": 21552 }, { "epoch": 12.040782122905028, "grad_norm": 0.49047043919563293, "learning_rate": 0.0003994397759103642, "loss": 0.4529, "step": 21553 }, { "epoch": 12.041340782122905, "grad_norm": 0.9699426889419556, "learning_rate": 0.00039941176470588235, "loss": 0.5972, "step": 21554 }, { "epoch": 12.041899441340782, "grad_norm": 0.330026239156723, "learning_rate": 0.00039938375350140055, "loss": 0.4121, "step": 21555 }, { "epoch": 12.04245810055866, "grad_norm": 0.7895104885101318, "learning_rate": 0.0003993557422969188, "loss": 0.4655, "step": 21556 }, { "epoch": 12.043016759776537, "grad_norm": 0.419988214969635, "learning_rate": 0.00039932773109243696, "loss": 0.393, "step": 21557 }, { "epoch": 12.043575418994413, "grad_norm": 0.5506265163421631, "learning_rate": 0.0003992997198879552, "loss": 0.3533, "step": 21558 }, { "epoch": 12.04413407821229, "grad_norm": 0.49021315574645996, "learning_rate": 0.0003992717086834734, "loss": 0.3966, "step": 21559 }, { "epoch": 12.044692737430168, "grad_norm": 0.6519712209701538, "learning_rate": 0.0003992436974789916, "loss": 0.4206, "step": 21560 }, { "epoch": 12.045251396648045, "grad_norm": 0.6331649422645569, "learning_rate": 0.00039921568627450984, "loss": 0.4176, "step": 21561 }, { "epoch": 12.045810055865921, "grad_norm": 0.794137179851532, "learning_rate": 0.000399187675070028, "loss": 0.429, "step": 21562 }, { "epoch": 12.0463687150838, "grad_norm": 1.9765437841415405, "learning_rate": 0.00039915966386554625, "loss": 0.4986, "step": 21563 }, { "epoch": 12.046927374301676, "grad_norm": 1.6925710439682007, "learning_rate": 0.00039913165266106446, "loss": 0.423, "step": 21564 }, { "epoch": 12.047486033519553, "grad_norm": 1.5677542686462402, "learning_rate": 0.0003991036414565826, "loss": 0.4193, "step": 21565 }, { "epoch": 12.048044692737431, "grad_norm": 0.7156217098236084, "learning_rate": 0.00039907563025210087, "loss": 0.4831, "step": 21566 }, { "epoch": 12.048603351955308, "grad_norm": 0.6260681748390198, "learning_rate": 0.000399047619047619, "loss": 0.4537, "step": 21567 }, { "epoch": 12.049162011173184, "grad_norm": 0.48465362191200256, "learning_rate": 0.0003990196078431373, "loss": 0.4521, "step": 21568 }, { "epoch": 12.04972067039106, "grad_norm": 0.753529965877533, "learning_rate": 0.0003989915966386555, "loss": 0.4158, "step": 21569 }, { "epoch": 12.050279329608939, "grad_norm": 0.4164171814918518, "learning_rate": 0.00039896358543417364, "loss": 0.4175, "step": 21570 }, { "epoch": 12.050837988826816, "grad_norm": 0.733792781829834, "learning_rate": 0.0003989355742296919, "loss": 0.5642, "step": 21571 }, { "epoch": 12.051396648044692, "grad_norm": 0.3996419906616211, "learning_rate": 0.0003989075630252101, "loss": 0.399, "step": 21572 }, { "epoch": 12.05195530726257, "grad_norm": 0.8819694519042969, "learning_rate": 0.0003988795518207283, "loss": 0.392, "step": 21573 }, { "epoch": 12.052513966480447, "grad_norm": 3.5123233795166016, "learning_rate": 0.0003988515406162465, "loss": 0.4584, "step": 21574 }, { "epoch": 12.053072625698324, "grad_norm": 0.358894944190979, "learning_rate": 0.00039882352941176467, "loss": 0.403, "step": 21575 }, { "epoch": 12.053631284916202, "grad_norm": 0.4959317445755005, "learning_rate": 0.00039879551820728293, "loss": 0.4753, "step": 21576 }, { "epoch": 12.054189944134079, "grad_norm": 0.3478318750858307, "learning_rate": 0.00039876750700280114, "loss": 0.3805, "step": 21577 }, { "epoch": 12.054748603351955, "grad_norm": 0.6848805546760559, "learning_rate": 0.00039873949579831934, "loss": 0.5441, "step": 21578 }, { "epoch": 12.055307262569832, "grad_norm": 0.5436789989471436, "learning_rate": 0.00039871148459383755, "loss": 0.428, "step": 21579 }, { "epoch": 12.05586592178771, "grad_norm": 0.3651781380176544, "learning_rate": 0.00039868347338935576, "loss": 0.3435, "step": 21580 }, { "epoch": 12.056424581005587, "grad_norm": 0.4721587002277374, "learning_rate": 0.00039865546218487396, "loss": 0.5477, "step": 21581 }, { "epoch": 12.056983240223463, "grad_norm": 0.6732951402664185, "learning_rate": 0.00039862745098039217, "loss": 0.4458, "step": 21582 }, { "epoch": 12.057541899441341, "grad_norm": 0.8742517232894897, "learning_rate": 0.0003985994397759104, "loss": 0.3513, "step": 21583 }, { "epoch": 12.058100558659218, "grad_norm": 0.68865966796875, "learning_rate": 0.0003985714285714286, "loss": 0.7124, "step": 21584 }, { "epoch": 12.058659217877095, "grad_norm": 0.5192102789878845, "learning_rate": 0.0003985434173669468, "loss": 0.3528, "step": 21585 }, { "epoch": 12.059217877094973, "grad_norm": 0.4324816167354584, "learning_rate": 0.000398515406162465, "loss": 0.3938, "step": 21586 }, { "epoch": 12.05977653631285, "grad_norm": 0.5224592685699463, "learning_rate": 0.0003984873949579832, "loss": 0.4083, "step": 21587 }, { "epoch": 12.060335195530726, "grad_norm": 0.3568752706050873, "learning_rate": 0.00039845938375350146, "loss": 0.3534, "step": 21588 }, { "epoch": 12.060893854748603, "grad_norm": 3.1321470737457275, "learning_rate": 0.0003984313725490196, "loss": 0.3082, "step": 21589 }, { "epoch": 12.061452513966481, "grad_norm": 0.6076870560646057, "learning_rate": 0.0003984033613445378, "loss": 0.5191, "step": 21590 }, { "epoch": 12.062011173184358, "grad_norm": 0.8609892129898071, "learning_rate": 0.000398375350140056, "loss": 0.4518, "step": 21591 }, { "epoch": 12.062569832402234, "grad_norm": 0.5587970018386841, "learning_rate": 0.0003983473389355742, "loss": 0.4176, "step": 21592 }, { "epoch": 12.063128491620112, "grad_norm": 0.4230564534664154, "learning_rate": 0.00039831932773109243, "loss": 0.5093, "step": 21593 }, { "epoch": 12.063687150837989, "grad_norm": 0.5761800408363342, "learning_rate": 0.00039829131652661064, "loss": 0.4522, "step": 21594 }, { "epoch": 12.064245810055866, "grad_norm": 3.0153326988220215, "learning_rate": 0.00039826330532212885, "loss": 0.5196, "step": 21595 }, { "epoch": 12.064804469273742, "grad_norm": 0.5321272611618042, "learning_rate": 0.0003982352941176471, "loss": 0.3619, "step": 21596 }, { "epoch": 12.06536312849162, "grad_norm": 0.48106786608695984, "learning_rate": 0.00039820728291316526, "loss": 0.4905, "step": 21597 }, { "epoch": 12.065921787709497, "grad_norm": 0.7509476542472839, "learning_rate": 0.00039817927170868346, "loss": 0.35, "step": 21598 }, { "epoch": 12.066480446927374, "grad_norm": 0.6596659421920776, "learning_rate": 0.00039815126050420167, "loss": 0.4304, "step": 21599 }, { "epoch": 12.067039106145252, "grad_norm": 0.503322422504425, "learning_rate": 0.0003981232492997199, "loss": 0.4014, "step": 21600 }, { "epoch": 12.067597765363129, "grad_norm": 1.1880236864089966, "learning_rate": 0.00039809523809523814, "loss": 0.4543, "step": 21601 }, { "epoch": 12.068156424581005, "grad_norm": 0.39636269211769104, "learning_rate": 0.0003980672268907563, "loss": 0.3756, "step": 21602 }, { "epoch": 12.068715083798883, "grad_norm": 7.900854110717773, "learning_rate": 0.0003980392156862745, "loss": 0.3677, "step": 21603 }, { "epoch": 12.06927374301676, "grad_norm": 0.7973554134368896, "learning_rate": 0.00039801120448179275, "loss": 0.3423, "step": 21604 }, { "epoch": 12.069832402234637, "grad_norm": 0.6639948487281799, "learning_rate": 0.0003979831932773109, "loss": 0.4873, "step": 21605 }, { "epoch": 12.070391061452513, "grad_norm": 0.4054809510707855, "learning_rate": 0.00039795518207282917, "loss": 0.4029, "step": 21606 }, { "epoch": 12.070949720670392, "grad_norm": 0.8257238268852234, "learning_rate": 0.0003979271708683473, "loss": 0.4838, "step": 21607 }, { "epoch": 12.071508379888268, "grad_norm": 0.6885493993759155, "learning_rate": 0.0003978991596638655, "loss": 0.5635, "step": 21608 }, { "epoch": 12.072067039106145, "grad_norm": 0.4674849510192871, "learning_rate": 0.0003978711484593838, "loss": 0.4613, "step": 21609 }, { "epoch": 12.072625698324023, "grad_norm": 1.5057246685028076, "learning_rate": 0.00039784313725490193, "loss": 0.419, "step": 21610 }, { "epoch": 12.0731843575419, "grad_norm": 0.5516594052314758, "learning_rate": 0.0003978151260504202, "loss": 0.5002, "step": 21611 }, { "epoch": 12.073743016759776, "grad_norm": 0.37535205483436584, "learning_rate": 0.0003977871148459384, "loss": 0.4082, "step": 21612 }, { "epoch": 12.074301675977654, "grad_norm": 0.9043879508972168, "learning_rate": 0.00039775910364145655, "loss": 0.5307, "step": 21613 }, { "epoch": 12.074860335195531, "grad_norm": 0.8339896202087402, "learning_rate": 0.0003977310924369748, "loss": 0.4068, "step": 21614 }, { "epoch": 12.075418994413408, "grad_norm": 0.8790621757507324, "learning_rate": 0.00039770308123249296, "loss": 0.5259, "step": 21615 }, { "epoch": 12.075977653631284, "grad_norm": 0.5176562070846558, "learning_rate": 0.0003976750700280112, "loss": 0.5019, "step": 21616 }, { "epoch": 12.076536312849163, "grad_norm": 2.765859842300415, "learning_rate": 0.00039764705882352943, "loss": 0.6903, "step": 21617 }, { "epoch": 12.077094972067039, "grad_norm": 0.49033039808273315, "learning_rate": 0.0003976190476190476, "loss": 0.3566, "step": 21618 }, { "epoch": 12.077653631284916, "grad_norm": 0.596653938293457, "learning_rate": 0.00039759103641456584, "loss": 0.492, "step": 21619 }, { "epoch": 12.078212290502794, "grad_norm": 0.877156138420105, "learning_rate": 0.00039756302521008405, "loss": 0.3871, "step": 21620 }, { "epoch": 12.07877094972067, "grad_norm": 3.1856210231781006, "learning_rate": 0.00039753501400560226, "loss": 0.3289, "step": 21621 }, { "epoch": 12.079329608938547, "grad_norm": 0.5671728849411011, "learning_rate": 0.00039750700280112046, "loss": 0.4278, "step": 21622 }, { "epoch": 12.079888268156424, "grad_norm": 0.9842540621757507, "learning_rate": 0.0003974789915966386, "loss": 0.4035, "step": 21623 }, { "epoch": 12.080446927374302, "grad_norm": 0.413460910320282, "learning_rate": 0.0003974509803921569, "loss": 0.3879, "step": 21624 }, { "epoch": 12.081005586592179, "grad_norm": 0.5376746654510498, "learning_rate": 0.0003974229691876751, "loss": 0.4039, "step": 21625 }, { "epoch": 12.081564245810055, "grad_norm": 0.6759371757507324, "learning_rate": 0.0003973949579831933, "loss": 0.4685, "step": 21626 }, { "epoch": 12.082122905027934, "grad_norm": 0.4697682857513428, "learning_rate": 0.0003973669467787115, "loss": 0.3497, "step": 21627 }, { "epoch": 12.08268156424581, "grad_norm": 0.5227476954460144, "learning_rate": 0.0003973389355742297, "loss": 0.4569, "step": 21628 }, { "epoch": 12.083240223463687, "grad_norm": 0.5758565664291382, "learning_rate": 0.0003973109243697479, "loss": 0.3827, "step": 21629 }, { "epoch": 12.083798882681565, "grad_norm": 0.4359157979488373, "learning_rate": 0.0003972829131652661, "loss": 0.4406, "step": 21630 }, { "epoch": 12.084357541899442, "grad_norm": 0.6225452423095703, "learning_rate": 0.00039725490196078437, "loss": 0.4388, "step": 21631 }, { "epoch": 12.084916201117318, "grad_norm": 0.6389302015304565, "learning_rate": 0.0003972268907563025, "loss": 0.3945, "step": 21632 }, { "epoch": 12.085474860335195, "grad_norm": 1.9121235609054565, "learning_rate": 0.0003971988795518207, "loss": 0.4263, "step": 21633 }, { "epoch": 12.086033519553073, "grad_norm": 0.5296115875244141, "learning_rate": 0.00039717086834733893, "loss": 0.3678, "step": 21634 }, { "epoch": 12.08659217877095, "grad_norm": 0.4797914922237396, "learning_rate": 0.00039714285714285714, "loss": 0.4622, "step": 21635 }, { "epoch": 12.087150837988826, "grad_norm": 0.5294155478477478, "learning_rate": 0.0003971148459383754, "loss": 0.3964, "step": 21636 }, { "epoch": 12.087709497206705, "grad_norm": 1.4047764539718628, "learning_rate": 0.00039708683473389355, "loss": 0.4143, "step": 21637 }, { "epoch": 12.088268156424581, "grad_norm": 0.738347589969635, "learning_rate": 0.00039705882352941176, "loss": 0.3618, "step": 21638 }, { "epoch": 12.088826815642458, "grad_norm": 0.47197914123535156, "learning_rate": 0.00039703081232493, "loss": 0.4172, "step": 21639 }, { "epoch": 12.089385474860336, "grad_norm": 0.41236546635627747, "learning_rate": 0.00039700280112044817, "loss": 0.4524, "step": 21640 }, { "epoch": 12.089944134078213, "grad_norm": 0.9284387230873108, "learning_rate": 0.00039697478991596643, "loss": 0.3191, "step": 21641 }, { "epoch": 12.09050279329609, "grad_norm": 0.40866610407829285, "learning_rate": 0.0003969467787114846, "loss": 0.4294, "step": 21642 }, { "epoch": 12.091061452513966, "grad_norm": 0.3907952308654785, "learning_rate": 0.0003969187675070028, "loss": 0.3367, "step": 21643 }, { "epoch": 12.091620111731844, "grad_norm": 0.6081427335739136, "learning_rate": 0.00039689075630252105, "loss": 0.3373, "step": 21644 }, { "epoch": 12.09217877094972, "grad_norm": 0.525584876537323, "learning_rate": 0.0003968627450980392, "loss": 0.4389, "step": 21645 }, { "epoch": 12.092737430167597, "grad_norm": 0.3987991511821747, "learning_rate": 0.00039683473389355746, "loss": 0.3855, "step": 21646 }, { "epoch": 12.093296089385476, "grad_norm": 1.655609130859375, "learning_rate": 0.00039680672268907567, "loss": 0.4978, "step": 21647 }, { "epoch": 12.093854748603352, "grad_norm": 1.1261324882507324, "learning_rate": 0.0003967787114845938, "loss": 0.3963, "step": 21648 }, { "epoch": 12.094413407821229, "grad_norm": 0.34029123187065125, "learning_rate": 0.0003967507002801121, "loss": 0.322, "step": 21649 }, { "epoch": 12.094972067039107, "grad_norm": 0.4872231185436249, "learning_rate": 0.00039672268907563023, "loss": 0.4012, "step": 21650 }, { "epoch": 12.095530726256984, "grad_norm": 1.4977905750274658, "learning_rate": 0.0003966946778711485, "loss": 0.481, "step": 21651 }, { "epoch": 12.09608938547486, "grad_norm": 0.47866091132164, "learning_rate": 0.0003966666666666667, "loss": 0.4402, "step": 21652 }, { "epoch": 12.096648044692737, "grad_norm": 1.3456724882125854, "learning_rate": 0.00039663865546218485, "loss": 0.391, "step": 21653 }, { "epoch": 12.097206703910615, "grad_norm": 0.5541782379150391, "learning_rate": 0.0003966106442577031, "loss": 0.5628, "step": 21654 }, { "epoch": 12.097765363128492, "grad_norm": 0.42005544900894165, "learning_rate": 0.0003965826330532213, "loss": 0.4257, "step": 21655 }, { "epoch": 12.098324022346368, "grad_norm": 0.5318379402160645, "learning_rate": 0.0003965546218487395, "loss": 0.5421, "step": 21656 }, { "epoch": 12.098882681564247, "grad_norm": 0.4454403817653656, "learning_rate": 0.0003965266106442577, "loss": 0.3015, "step": 21657 }, { "epoch": 12.099441340782123, "grad_norm": 0.3577471971511841, "learning_rate": 0.0003964985994397759, "loss": 0.3636, "step": 21658 }, { "epoch": 12.1, "grad_norm": 1.3671613931655884, "learning_rate": 0.00039647058823529414, "loss": 0.3838, "step": 21659 }, { "epoch": 12.100558659217878, "grad_norm": 0.6704123616218567, "learning_rate": 0.00039644257703081234, "loss": 0.3494, "step": 21660 }, { "epoch": 12.101117318435755, "grad_norm": 0.5111764669418335, "learning_rate": 0.00039641456582633055, "loss": 0.4476, "step": 21661 }, { "epoch": 12.101675977653631, "grad_norm": 0.5964788794517517, "learning_rate": 0.00039638655462184876, "loss": 0.5387, "step": 21662 }, { "epoch": 12.102234636871508, "grad_norm": 1.0383455753326416, "learning_rate": 0.00039635854341736696, "loss": 0.4288, "step": 21663 }, { "epoch": 12.102793296089386, "grad_norm": 0.9435370564460754, "learning_rate": 0.00039633053221288517, "loss": 0.3505, "step": 21664 }, { "epoch": 12.103351955307263, "grad_norm": 0.5997491478919983, "learning_rate": 0.0003963025210084034, "loss": 0.458, "step": 21665 }, { "epoch": 12.10391061452514, "grad_norm": 1.4295728206634521, "learning_rate": 0.0003962745098039216, "loss": 0.4292, "step": 21666 }, { "epoch": 12.104469273743018, "grad_norm": 0.6672996878623962, "learning_rate": 0.0003962464985994398, "loss": 0.4339, "step": 21667 }, { "epoch": 12.105027932960894, "grad_norm": 2.0946834087371826, "learning_rate": 0.000396218487394958, "loss": 0.3834, "step": 21668 }, { "epoch": 12.10558659217877, "grad_norm": 0.5532956719398499, "learning_rate": 0.0003961904761904762, "loss": 0.4761, "step": 21669 }, { "epoch": 12.106145251396647, "grad_norm": 0.5710166096687317, "learning_rate": 0.0003961624649859944, "loss": 0.4348, "step": 21670 }, { "epoch": 12.106703910614526, "grad_norm": 0.5889133810997009, "learning_rate": 0.00039613445378151266, "loss": 0.484, "step": 21671 }, { "epoch": 12.107262569832402, "grad_norm": 0.40772464871406555, "learning_rate": 0.0003961064425770308, "loss": 0.3391, "step": 21672 }, { "epoch": 12.107821229050279, "grad_norm": 0.5061005353927612, "learning_rate": 0.000396078431372549, "loss": 0.4248, "step": 21673 }, { "epoch": 12.108379888268157, "grad_norm": 0.36669039726257324, "learning_rate": 0.0003960504201680672, "loss": 0.4144, "step": 21674 }, { "epoch": 12.108938547486034, "grad_norm": 1.1601930856704712, "learning_rate": 0.00039602240896358543, "loss": 0.4211, "step": 21675 }, { "epoch": 12.10949720670391, "grad_norm": 0.5963717103004456, "learning_rate": 0.0003959943977591037, "loss": 0.426, "step": 21676 }, { "epoch": 12.110055865921789, "grad_norm": 0.6810588836669922, "learning_rate": 0.00039596638655462185, "loss": 0.4785, "step": 21677 }, { "epoch": 12.110614525139665, "grad_norm": 1.5960572957992554, "learning_rate": 0.00039593837535014005, "loss": 0.45, "step": 21678 }, { "epoch": 12.111173184357542, "grad_norm": 0.4219338893890381, "learning_rate": 0.0003959103641456583, "loss": 0.3176, "step": 21679 }, { "epoch": 12.111731843575418, "grad_norm": 0.6799542307853699, "learning_rate": 0.00039588235294117646, "loss": 0.4181, "step": 21680 }, { "epoch": 12.112290502793297, "grad_norm": 0.6910079121589661, "learning_rate": 0.0003958543417366947, "loss": 0.495, "step": 21681 }, { "epoch": 12.112849162011173, "grad_norm": 0.6280542016029358, "learning_rate": 0.0003958263305322129, "loss": 0.512, "step": 21682 }, { "epoch": 12.11340782122905, "grad_norm": 0.615943968296051, "learning_rate": 0.0003957983193277311, "loss": 0.358, "step": 21683 }, { "epoch": 12.113966480446928, "grad_norm": 0.6539126038551331, "learning_rate": 0.00039577030812324934, "loss": 0.3918, "step": 21684 }, { "epoch": 12.114525139664805, "grad_norm": 0.5996832847595215, "learning_rate": 0.0003957422969187675, "loss": 0.4776, "step": 21685 }, { "epoch": 12.115083798882681, "grad_norm": 0.5373105406761169, "learning_rate": 0.00039571428571428575, "loss": 0.3535, "step": 21686 }, { "epoch": 12.11564245810056, "grad_norm": 5.450588703155518, "learning_rate": 0.00039568627450980396, "loss": 0.3515, "step": 21687 }, { "epoch": 12.116201117318436, "grad_norm": 0.6423935294151306, "learning_rate": 0.0003956582633053221, "loss": 0.3753, "step": 21688 }, { "epoch": 12.116759776536313, "grad_norm": 0.48174917697906494, "learning_rate": 0.00039563025210084037, "loss": 0.4535, "step": 21689 }, { "epoch": 12.11731843575419, "grad_norm": 0.41791272163391113, "learning_rate": 0.0003956022408963585, "loss": 0.4169, "step": 21690 }, { "epoch": 12.117877094972068, "grad_norm": 1.2938919067382812, "learning_rate": 0.0003955742296918768, "loss": 0.5359, "step": 21691 }, { "epoch": 12.118435754189944, "grad_norm": 0.713721752166748, "learning_rate": 0.000395546218487395, "loss": 0.5202, "step": 21692 }, { "epoch": 12.11899441340782, "grad_norm": 0.35895228385925293, "learning_rate": 0.00039551820728291314, "loss": 0.4554, "step": 21693 }, { "epoch": 12.119553072625699, "grad_norm": 0.7618927359580994, "learning_rate": 0.0003954901960784314, "loss": 0.5268, "step": 21694 }, { "epoch": 12.120111731843576, "grad_norm": 0.9843438863754272, "learning_rate": 0.0003954621848739496, "loss": 0.4075, "step": 21695 }, { "epoch": 12.120670391061452, "grad_norm": 0.4600014090538025, "learning_rate": 0.0003954341736694678, "loss": 0.4734, "step": 21696 }, { "epoch": 12.121229050279329, "grad_norm": 1.582340955734253, "learning_rate": 0.000395406162464986, "loss": 0.3683, "step": 21697 }, { "epoch": 12.121787709497207, "grad_norm": 0.5499454140663147, "learning_rate": 0.00039537815126050417, "loss": 0.3954, "step": 21698 }, { "epoch": 12.122346368715084, "grad_norm": 0.5066421031951904, "learning_rate": 0.00039535014005602243, "loss": 0.3895, "step": 21699 }, { "epoch": 12.12290502793296, "grad_norm": 1.064005732536316, "learning_rate": 0.00039532212885154064, "loss": 0.3704, "step": 21700 }, { "epoch": 12.123463687150839, "grad_norm": 0.516848087310791, "learning_rate": 0.0003952941176470588, "loss": 0.4433, "step": 21701 }, { "epoch": 12.124022346368715, "grad_norm": 0.8953462839126587, "learning_rate": 0.00039526610644257705, "loss": 0.479, "step": 21702 }, { "epoch": 12.124581005586592, "grad_norm": 3.992640733718872, "learning_rate": 0.00039523809523809526, "loss": 0.3705, "step": 21703 }, { "epoch": 12.12513966480447, "grad_norm": 0.8260511159896851, "learning_rate": 0.00039521008403361346, "loss": 0.4977, "step": 21704 }, { "epoch": 12.125698324022347, "grad_norm": 0.5871229767799377, "learning_rate": 0.00039518207282913167, "loss": 0.4156, "step": 21705 }, { "epoch": 12.126256983240223, "grad_norm": 1.9053579568862915, "learning_rate": 0.0003951540616246498, "loss": 0.5071, "step": 21706 }, { "epoch": 12.1268156424581, "grad_norm": 0.5244836211204529, "learning_rate": 0.0003951260504201681, "loss": 0.4865, "step": 21707 }, { "epoch": 12.127374301675978, "grad_norm": 0.4213542938232422, "learning_rate": 0.0003950980392156863, "loss": 0.3706, "step": 21708 }, { "epoch": 12.127932960893855, "grad_norm": 1.2047513723373413, "learning_rate": 0.0003950700280112045, "loss": 0.5565, "step": 21709 }, { "epoch": 12.128491620111731, "grad_norm": 0.4915350675582886, "learning_rate": 0.0003950420168067227, "loss": 0.3024, "step": 21710 }, { "epoch": 12.12905027932961, "grad_norm": 1.2328293323516846, "learning_rate": 0.0003950140056022409, "loss": 0.3636, "step": 21711 }, { "epoch": 12.129608938547486, "grad_norm": 0.5793976783752441, "learning_rate": 0.0003949859943977591, "loss": 0.339, "step": 21712 }, { "epoch": 12.130167597765363, "grad_norm": 0.5257045030593872, "learning_rate": 0.0003949579831932773, "loss": 0.6747, "step": 21713 }, { "epoch": 12.130726256983241, "grad_norm": 0.45344027876853943, "learning_rate": 0.0003949299719887955, "loss": 0.3846, "step": 21714 }, { "epoch": 12.131284916201118, "grad_norm": 0.4222092628479004, "learning_rate": 0.0003949019607843137, "loss": 0.3812, "step": 21715 }, { "epoch": 12.131843575418994, "grad_norm": 0.43243011832237244, "learning_rate": 0.00039487394957983193, "loss": 0.3886, "step": 21716 }, { "epoch": 12.13240223463687, "grad_norm": 0.40126949548721313, "learning_rate": 0.00039484593837535014, "loss": 0.4995, "step": 21717 }, { "epoch": 12.132960893854749, "grad_norm": 0.6041075587272644, "learning_rate": 0.00039481792717086835, "loss": 0.4616, "step": 21718 }, { "epoch": 12.133519553072626, "grad_norm": 0.4662020206451416, "learning_rate": 0.0003947899159663866, "loss": 0.4267, "step": 21719 }, { "epoch": 12.134078212290502, "grad_norm": 0.5810329914093018, "learning_rate": 0.00039476190476190476, "loss": 0.3977, "step": 21720 }, { "epoch": 12.13463687150838, "grad_norm": 0.4164109528064728, "learning_rate": 0.00039473389355742296, "loss": 0.3633, "step": 21721 }, { "epoch": 12.135195530726257, "grad_norm": 0.7491818070411682, "learning_rate": 0.00039470588235294117, "loss": 0.3673, "step": 21722 }, { "epoch": 12.135754189944134, "grad_norm": 0.7716153264045715, "learning_rate": 0.0003946778711484594, "loss": 0.4072, "step": 21723 }, { "epoch": 12.136312849162012, "grad_norm": 2.2200896739959717, "learning_rate": 0.00039464985994397764, "loss": 0.4544, "step": 21724 }, { "epoch": 12.136871508379889, "grad_norm": 0.6296133995056152, "learning_rate": 0.0003946218487394958, "loss": 0.4959, "step": 21725 }, { "epoch": 12.137430167597765, "grad_norm": 0.7471591830253601, "learning_rate": 0.000394593837535014, "loss": 0.4906, "step": 21726 }, { "epoch": 12.137988826815642, "grad_norm": 0.7727543115615845, "learning_rate": 0.00039456582633053225, "loss": 0.6243, "step": 21727 }, { "epoch": 12.13854748603352, "grad_norm": 0.5286030173301697, "learning_rate": 0.0003945378151260504, "loss": 0.4391, "step": 21728 }, { "epoch": 12.139106145251397, "grad_norm": 0.6781374216079712, "learning_rate": 0.00039450980392156867, "loss": 0.4692, "step": 21729 }, { "epoch": 12.139664804469273, "grad_norm": 0.5865398049354553, "learning_rate": 0.0003944817927170868, "loss": 0.2876, "step": 21730 }, { "epoch": 12.140223463687152, "grad_norm": 0.36739417910575867, "learning_rate": 0.000394453781512605, "loss": 0.3569, "step": 21731 }, { "epoch": 12.140782122905028, "grad_norm": 2.0289204120635986, "learning_rate": 0.0003944257703081233, "loss": 0.4431, "step": 21732 }, { "epoch": 12.141340782122905, "grad_norm": 0.4773618280887604, "learning_rate": 0.00039439775910364143, "loss": 0.5413, "step": 21733 }, { "epoch": 12.141899441340781, "grad_norm": 0.438223659992218, "learning_rate": 0.0003943697478991597, "loss": 0.3063, "step": 21734 }, { "epoch": 12.14245810055866, "grad_norm": 9.199708938598633, "learning_rate": 0.0003943417366946779, "loss": 0.3716, "step": 21735 }, { "epoch": 12.143016759776536, "grad_norm": 0.41729608178138733, "learning_rate": 0.00039431372549019605, "loss": 0.4976, "step": 21736 }, { "epoch": 12.143575418994413, "grad_norm": 0.7056612372398376, "learning_rate": 0.0003942857142857143, "loss": 0.4576, "step": 21737 }, { "epoch": 12.144134078212291, "grad_norm": 0.42185676097869873, "learning_rate": 0.00039425770308123246, "loss": 0.3542, "step": 21738 }, { "epoch": 12.144692737430168, "grad_norm": 3.568018913269043, "learning_rate": 0.0003942296918767507, "loss": 0.4505, "step": 21739 }, { "epoch": 12.145251396648044, "grad_norm": 0.6252919435501099, "learning_rate": 0.00039420168067226893, "loss": 0.5047, "step": 21740 }, { "epoch": 12.145810055865923, "grad_norm": 1.604628324508667, "learning_rate": 0.0003941736694677871, "loss": 0.3986, "step": 21741 }, { "epoch": 12.1463687150838, "grad_norm": 0.4350687265396118, "learning_rate": 0.00039414565826330534, "loss": 0.3925, "step": 21742 }, { "epoch": 12.146927374301676, "grad_norm": 0.41298022866249084, "learning_rate": 0.00039411764705882355, "loss": 0.4036, "step": 21743 }, { "epoch": 12.147486033519552, "grad_norm": 3.243593692779541, "learning_rate": 0.00039408963585434176, "loss": 0.4881, "step": 21744 }, { "epoch": 12.14804469273743, "grad_norm": 0.48016592860221863, "learning_rate": 0.00039406162464985996, "loss": 0.421, "step": 21745 }, { "epoch": 12.148603351955307, "grad_norm": 0.4243204891681671, "learning_rate": 0.0003940336134453781, "loss": 0.4075, "step": 21746 }, { "epoch": 12.149162011173184, "grad_norm": 0.37057727575302124, "learning_rate": 0.0003940056022408964, "loss": 0.3608, "step": 21747 }, { "epoch": 12.149720670391062, "grad_norm": 0.9835109710693359, "learning_rate": 0.0003939775910364146, "loss": 0.4239, "step": 21748 }, { "epoch": 12.150279329608939, "grad_norm": 0.6910563707351685, "learning_rate": 0.0003939495798319328, "loss": 0.4765, "step": 21749 }, { "epoch": 12.150837988826815, "grad_norm": 0.5976811051368713, "learning_rate": 0.000393921568627451, "loss": 0.4454, "step": 21750 }, { "epoch": 12.151396648044694, "grad_norm": 0.5772486329078674, "learning_rate": 0.0003938935574229692, "loss": 0.3372, "step": 21751 }, { "epoch": 12.15195530726257, "grad_norm": 0.9358725547790527, "learning_rate": 0.0003938655462184874, "loss": 0.4362, "step": 21752 }, { "epoch": 12.152513966480447, "grad_norm": 1.1724869012832642, "learning_rate": 0.0003938375350140056, "loss": 0.462, "step": 21753 }, { "epoch": 12.153072625698323, "grad_norm": 0.5281693339347839, "learning_rate": 0.0003938095238095238, "loss": 0.4371, "step": 21754 }, { "epoch": 12.153631284916202, "grad_norm": 0.579055905342102, "learning_rate": 0.000393781512605042, "loss": 0.4903, "step": 21755 }, { "epoch": 12.154189944134078, "grad_norm": 0.33133259415626526, "learning_rate": 0.0003937535014005602, "loss": 0.378, "step": 21756 }, { "epoch": 12.154748603351955, "grad_norm": 0.46535757184028625, "learning_rate": 0.00039372549019607843, "loss": 0.4663, "step": 21757 }, { "epoch": 12.155307262569833, "grad_norm": 1.664006233215332, "learning_rate": 0.00039369747899159664, "loss": 0.3621, "step": 21758 }, { "epoch": 12.15586592178771, "grad_norm": 0.6977758407592773, "learning_rate": 0.0003936694677871149, "loss": 0.4525, "step": 21759 }, { "epoch": 12.156424581005586, "grad_norm": 0.4795726537704468, "learning_rate": 0.00039364145658263305, "loss": 0.3693, "step": 21760 }, { "epoch": 12.156983240223465, "grad_norm": 0.39268234372138977, "learning_rate": 0.00039361344537815126, "loss": 0.3372, "step": 21761 }, { "epoch": 12.157541899441341, "grad_norm": 0.4742097556591034, "learning_rate": 0.00039358543417366946, "loss": 0.3502, "step": 21762 }, { "epoch": 12.158100558659218, "grad_norm": 0.5306321978569031, "learning_rate": 0.00039355742296918767, "loss": 0.401, "step": 21763 }, { "epoch": 12.158659217877094, "grad_norm": 0.4546867311000824, "learning_rate": 0.00039352941176470593, "loss": 0.4108, "step": 21764 }, { "epoch": 12.159217877094973, "grad_norm": 0.4598774015903473, "learning_rate": 0.0003935014005602241, "loss": 0.4106, "step": 21765 }, { "epoch": 12.15977653631285, "grad_norm": 1.1472396850585938, "learning_rate": 0.0003934733893557423, "loss": 0.329, "step": 21766 }, { "epoch": 12.160335195530726, "grad_norm": 2.278527021408081, "learning_rate": 0.00039344537815126055, "loss": 0.3068, "step": 21767 }, { "epoch": 12.160893854748604, "grad_norm": 0.5542152523994446, "learning_rate": 0.0003934173669467787, "loss": 0.5449, "step": 21768 }, { "epoch": 12.16145251396648, "grad_norm": 0.5256080627441406, "learning_rate": 0.00039338935574229696, "loss": 0.3507, "step": 21769 }, { "epoch": 12.162011173184357, "grad_norm": 0.41452229022979736, "learning_rate": 0.0003933613445378151, "loss": 0.376, "step": 21770 }, { "epoch": 12.162569832402234, "grad_norm": 1.3607451915740967, "learning_rate": 0.0003933333333333333, "loss": 0.505, "step": 21771 }, { "epoch": 12.163128491620112, "grad_norm": 0.38324180245399475, "learning_rate": 0.0003933053221288516, "loss": 0.2948, "step": 21772 }, { "epoch": 12.163687150837989, "grad_norm": 0.9718806147575378, "learning_rate": 0.00039327731092436973, "loss": 0.3485, "step": 21773 }, { "epoch": 12.164245810055865, "grad_norm": 0.6692209839820862, "learning_rate": 0.000393249299719888, "loss": 0.4018, "step": 21774 }, { "epoch": 12.164804469273744, "grad_norm": 0.3984488248825073, "learning_rate": 0.0003932212885154062, "loss": 0.434, "step": 21775 }, { "epoch": 12.16536312849162, "grad_norm": 0.5287492275238037, "learning_rate": 0.00039319327731092435, "loss": 0.5105, "step": 21776 }, { "epoch": 12.165921787709497, "grad_norm": 0.7724483609199524, "learning_rate": 0.0003931652661064426, "loss": 0.4274, "step": 21777 }, { "epoch": 12.166480446927375, "grad_norm": 1.267682433128357, "learning_rate": 0.00039313725490196076, "loss": 0.3106, "step": 21778 }, { "epoch": 12.167039106145252, "grad_norm": 0.48012182116508484, "learning_rate": 0.000393109243697479, "loss": 0.4039, "step": 21779 }, { "epoch": 12.167597765363128, "grad_norm": 0.4909907281398773, "learning_rate": 0.0003930812324929972, "loss": 0.3256, "step": 21780 }, { "epoch": 12.168156424581005, "grad_norm": 0.7097208499908447, "learning_rate": 0.0003930532212885154, "loss": 0.6106, "step": 21781 }, { "epoch": 12.168715083798883, "grad_norm": 0.5284512042999268, "learning_rate": 0.00039302521008403364, "loss": 0.2588, "step": 21782 }, { "epoch": 12.16927374301676, "grad_norm": 0.7502999305725098, "learning_rate": 0.00039299719887955184, "loss": 0.358, "step": 21783 }, { "epoch": 12.169832402234636, "grad_norm": 0.5383977890014648, "learning_rate": 0.00039296918767507005, "loss": 0.521, "step": 21784 }, { "epoch": 12.170391061452515, "grad_norm": 1.2534509897232056, "learning_rate": 0.00039294117647058826, "loss": 0.55, "step": 21785 }, { "epoch": 12.170949720670391, "grad_norm": 0.3851598799228668, "learning_rate": 0.0003929131652661064, "loss": 0.3642, "step": 21786 }, { "epoch": 12.171508379888268, "grad_norm": 0.6332042813301086, "learning_rate": 0.00039288515406162467, "loss": 0.4268, "step": 21787 }, { "epoch": 12.172067039106146, "grad_norm": 0.592551589012146, "learning_rate": 0.0003928571428571429, "loss": 0.3564, "step": 21788 }, { "epoch": 12.172625698324023, "grad_norm": 1.0004148483276367, "learning_rate": 0.0003928291316526611, "loss": 0.4341, "step": 21789 }, { "epoch": 12.1731843575419, "grad_norm": 0.4812932014465332, "learning_rate": 0.0003928011204481793, "loss": 0.4541, "step": 21790 }, { "epoch": 12.173743016759776, "grad_norm": 0.41774073243141174, "learning_rate": 0.0003927731092436975, "loss": 0.3839, "step": 21791 }, { "epoch": 12.174301675977654, "grad_norm": 0.4575801193714142, "learning_rate": 0.0003927450980392157, "loss": 0.4468, "step": 21792 }, { "epoch": 12.17486033519553, "grad_norm": 0.47126543521881104, "learning_rate": 0.0003927170868347339, "loss": 0.3536, "step": 21793 }, { "epoch": 12.175418994413407, "grad_norm": 0.7147666811943054, "learning_rate": 0.0003926890756302521, "loss": 0.4329, "step": 21794 }, { "epoch": 12.175977653631286, "grad_norm": 1.9386667013168335, "learning_rate": 0.0003926610644257703, "loss": 0.4113, "step": 21795 }, { "epoch": 12.176536312849162, "grad_norm": 0.49150145053863525, "learning_rate": 0.0003926330532212885, "loss": 0.3867, "step": 21796 }, { "epoch": 12.177094972067039, "grad_norm": 0.478884220123291, "learning_rate": 0.0003926050420168067, "loss": 0.4083, "step": 21797 }, { "epoch": 12.177653631284917, "grad_norm": 1.033188819885254, "learning_rate": 0.00039257703081232493, "loss": 0.422, "step": 21798 }, { "epoch": 12.178212290502794, "grad_norm": 0.6465140581130981, "learning_rate": 0.0003925490196078432, "loss": 0.4363, "step": 21799 }, { "epoch": 12.17877094972067, "grad_norm": 1.037026286125183, "learning_rate": 0.00039252100840336135, "loss": 0.4185, "step": 21800 }, { "epoch": 12.179329608938547, "grad_norm": 0.36252695322036743, "learning_rate": 0.00039249299719887955, "loss": 0.3977, "step": 21801 }, { "epoch": 12.179888268156425, "grad_norm": 0.7120213508605957, "learning_rate": 0.00039246498599439776, "loss": 0.381, "step": 21802 }, { "epoch": 12.180446927374302, "grad_norm": 1.418805480003357, "learning_rate": 0.00039243697478991596, "loss": 0.3741, "step": 21803 }, { "epoch": 12.181005586592178, "grad_norm": 0.3128635287284851, "learning_rate": 0.0003924089635854342, "loss": 0.312, "step": 21804 }, { "epoch": 12.181564245810057, "grad_norm": 1.3979904651641846, "learning_rate": 0.0003923809523809524, "loss": 0.3047, "step": 21805 }, { "epoch": 12.182122905027933, "grad_norm": 0.5204529166221619, "learning_rate": 0.0003923529411764706, "loss": 0.4432, "step": 21806 }, { "epoch": 12.18268156424581, "grad_norm": 0.5199275016784668, "learning_rate": 0.00039232492997198884, "loss": 0.5098, "step": 21807 }, { "epoch": 12.183240223463686, "grad_norm": 1.4085497856140137, "learning_rate": 0.000392296918767507, "loss": 0.3794, "step": 21808 }, { "epoch": 12.183798882681565, "grad_norm": 0.40714210271835327, "learning_rate": 0.00039226890756302525, "loss": 0.4829, "step": 21809 }, { "epoch": 12.184357541899441, "grad_norm": 0.6840947270393372, "learning_rate": 0.0003922408963585434, "loss": 0.5658, "step": 21810 }, { "epoch": 12.184916201117318, "grad_norm": 0.3958841562271118, "learning_rate": 0.0003922128851540616, "loss": 0.2873, "step": 21811 }, { "epoch": 12.185474860335196, "grad_norm": 0.5028731822967529, "learning_rate": 0.00039218487394957987, "loss": 0.3564, "step": 21812 }, { "epoch": 12.186033519553073, "grad_norm": 0.3707486391067505, "learning_rate": 0.000392156862745098, "loss": 0.3626, "step": 21813 }, { "epoch": 12.18659217877095, "grad_norm": 0.4465099573135376, "learning_rate": 0.00039212885154061623, "loss": 0.3649, "step": 21814 }, { "epoch": 12.187150837988828, "grad_norm": 0.5002136826515198, "learning_rate": 0.0003921008403361345, "loss": 0.45, "step": 21815 }, { "epoch": 12.187709497206704, "grad_norm": 0.4699725806713104, "learning_rate": 0.00039207282913165264, "loss": 0.3947, "step": 21816 }, { "epoch": 12.18826815642458, "grad_norm": 0.3924983739852905, "learning_rate": 0.0003920448179271709, "loss": 0.4159, "step": 21817 }, { "epoch": 12.188826815642457, "grad_norm": 0.4827716648578644, "learning_rate": 0.00039201680672268905, "loss": 0.5022, "step": 21818 }, { "epoch": 12.189385474860336, "grad_norm": 1.0485687255859375, "learning_rate": 0.00039198879551820726, "loss": 0.4069, "step": 21819 }, { "epoch": 12.189944134078212, "grad_norm": 0.6386340856552124, "learning_rate": 0.0003919607843137255, "loss": 0.2849, "step": 21820 }, { "epoch": 12.190502793296089, "grad_norm": 0.9350286722183228, "learning_rate": 0.00039193277310924367, "loss": 0.3835, "step": 21821 }, { "epoch": 12.191061452513967, "grad_norm": 1.1977585554122925, "learning_rate": 0.00039190476190476193, "loss": 0.4274, "step": 21822 }, { "epoch": 12.191620111731844, "grad_norm": 0.6727390289306641, "learning_rate": 0.00039187675070028014, "loss": 0.4387, "step": 21823 }, { "epoch": 12.19217877094972, "grad_norm": 1.412946343421936, "learning_rate": 0.0003918487394957983, "loss": 0.4394, "step": 21824 }, { "epoch": 12.192737430167599, "grad_norm": 1.2503514289855957, "learning_rate": 0.00039182072829131655, "loss": 0.3633, "step": 21825 }, { "epoch": 12.193296089385475, "grad_norm": 0.476701945066452, "learning_rate": 0.0003917927170868347, "loss": 0.43, "step": 21826 }, { "epoch": 12.193854748603352, "grad_norm": 0.4076438248157501, "learning_rate": 0.00039176470588235296, "loss": 0.4223, "step": 21827 }, { "epoch": 12.194413407821228, "grad_norm": 0.4849054515361786, "learning_rate": 0.00039173669467787117, "loss": 0.4347, "step": 21828 }, { "epoch": 12.194972067039107, "grad_norm": 0.4874263107776642, "learning_rate": 0.0003917086834733893, "loss": 0.4409, "step": 21829 }, { "epoch": 12.195530726256983, "grad_norm": 0.4042741060256958, "learning_rate": 0.0003916806722689076, "loss": 0.3468, "step": 21830 }, { "epoch": 12.19608938547486, "grad_norm": 0.4822596311569214, "learning_rate": 0.0003916526610644258, "loss": 0.4527, "step": 21831 }, { "epoch": 12.196648044692738, "grad_norm": 0.6793760657310486, "learning_rate": 0.000391624649859944, "loss": 0.5699, "step": 21832 }, { "epoch": 12.197206703910615, "grad_norm": 0.532418429851532, "learning_rate": 0.0003915966386554622, "loss": 0.4835, "step": 21833 }, { "epoch": 12.197765363128491, "grad_norm": 0.36411771178245544, "learning_rate": 0.00039156862745098035, "loss": 0.3571, "step": 21834 }, { "epoch": 12.19832402234637, "grad_norm": 0.7971248030662537, "learning_rate": 0.0003915406162464986, "loss": 0.4263, "step": 21835 }, { "epoch": 12.198882681564246, "grad_norm": 0.8669003248214722, "learning_rate": 0.0003915126050420168, "loss": 0.4788, "step": 21836 }, { "epoch": 12.199441340782123, "grad_norm": 0.4638930559158325, "learning_rate": 0.000391484593837535, "loss": 0.3206, "step": 21837 }, { "epoch": 12.2, "grad_norm": 0.5332055687904358, "learning_rate": 0.0003914565826330532, "loss": 0.4596, "step": 21838 }, { "epoch": 12.200558659217878, "grad_norm": 0.8995439410209656, "learning_rate": 0.00039142857142857143, "loss": 0.3574, "step": 21839 }, { "epoch": 12.201117318435754, "grad_norm": 2.44504714012146, "learning_rate": 0.00039140056022408964, "loss": 0.3655, "step": 21840 }, { "epoch": 12.20167597765363, "grad_norm": 2.5652449131011963, "learning_rate": 0.00039137254901960784, "loss": 0.3832, "step": 21841 }, { "epoch": 12.202234636871509, "grad_norm": 0.5657622814178467, "learning_rate": 0.0003913445378151261, "loss": 0.5945, "step": 21842 }, { "epoch": 12.202793296089386, "grad_norm": 0.4920353591442108, "learning_rate": 0.00039131652661064426, "loss": 0.4678, "step": 21843 }, { "epoch": 12.203351955307262, "grad_norm": 1.2122029066085815, "learning_rate": 0.00039128851540616246, "loss": 0.3435, "step": 21844 }, { "epoch": 12.203910614525139, "grad_norm": 0.500775933265686, "learning_rate": 0.00039126050420168067, "loss": 0.3498, "step": 21845 }, { "epoch": 12.204469273743017, "grad_norm": 0.45466670393943787, "learning_rate": 0.0003912324929971989, "loss": 0.3981, "step": 21846 }, { "epoch": 12.205027932960894, "grad_norm": 0.37359780073165894, "learning_rate": 0.00039120448179271714, "loss": 0.4142, "step": 21847 }, { "epoch": 12.20558659217877, "grad_norm": 0.5335162281990051, "learning_rate": 0.0003911764705882353, "loss": 0.4375, "step": 21848 }, { "epoch": 12.206145251396649, "grad_norm": 0.5312724113464355, "learning_rate": 0.0003911484593837535, "loss": 0.3921, "step": 21849 }, { "epoch": 12.206703910614525, "grad_norm": 0.44479164481163025, "learning_rate": 0.00039112044817927175, "loss": 0.4252, "step": 21850 }, { "epoch": 12.207262569832402, "grad_norm": 0.35407671332359314, "learning_rate": 0.0003910924369747899, "loss": 0.2967, "step": 21851 }, { "epoch": 12.20782122905028, "grad_norm": 4.332983493804932, "learning_rate": 0.00039106442577030817, "loss": 0.3265, "step": 21852 }, { "epoch": 12.208379888268157, "grad_norm": 0.4648149907588959, "learning_rate": 0.0003910364145658263, "loss": 0.4087, "step": 21853 }, { "epoch": 12.208938547486033, "grad_norm": 0.6404945254325867, "learning_rate": 0.0003910084033613445, "loss": 0.4507, "step": 21854 }, { "epoch": 12.20949720670391, "grad_norm": 0.8594422340393066, "learning_rate": 0.0003909803921568628, "loss": 0.3098, "step": 21855 }, { "epoch": 12.210055865921788, "grad_norm": 7.797704696655273, "learning_rate": 0.00039095238095238093, "loss": 0.409, "step": 21856 }, { "epoch": 12.210614525139665, "grad_norm": 0.5687383413314819, "learning_rate": 0.0003909243697478992, "loss": 0.6765, "step": 21857 }, { "epoch": 12.211173184357541, "grad_norm": 7.988146781921387, "learning_rate": 0.0003908963585434174, "loss": 0.4515, "step": 21858 }, { "epoch": 12.21173184357542, "grad_norm": 1.9037461280822754, "learning_rate": 0.00039086834733893555, "loss": 0.3847, "step": 21859 }, { "epoch": 12.212290502793296, "grad_norm": 0.4811187982559204, "learning_rate": 0.0003908403361344538, "loss": 0.6082, "step": 21860 }, { "epoch": 12.212849162011173, "grad_norm": 0.5357823967933655, "learning_rate": 0.00039081232492997196, "loss": 0.4657, "step": 21861 }, { "epoch": 12.213407821229051, "grad_norm": 0.6308609247207642, "learning_rate": 0.0003907843137254902, "loss": 0.3716, "step": 21862 }, { "epoch": 12.213966480446928, "grad_norm": 0.4544468820095062, "learning_rate": 0.00039075630252100843, "loss": 0.4571, "step": 21863 }, { "epoch": 12.214525139664804, "grad_norm": 0.5269650816917419, "learning_rate": 0.0003907282913165266, "loss": 0.3746, "step": 21864 }, { "epoch": 12.21508379888268, "grad_norm": 0.6236300468444824, "learning_rate": 0.00039070028011204484, "loss": 0.4047, "step": 21865 }, { "epoch": 12.21564245810056, "grad_norm": 0.5601384043693542, "learning_rate": 0.00039067226890756305, "loss": 0.4223, "step": 21866 }, { "epoch": 12.216201117318436, "grad_norm": 0.44696560502052307, "learning_rate": 0.00039064425770308126, "loss": 0.4471, "step": 21867 }, { "epoch": 12.216759776536312, "grad_norm": 1.1227378845214844, "learning_rate": 0.00039061624649859946, "loss": 0.4578, "step": 21868 }, { "epoch": 12.21731843575419, "grad_norm": 0.6919159293174744, "learning_rate": 0.0003905882352941176, "loss": 0.4738, "step": 21869 }, { "epoch": 12.217877094972067, "grad_norm": 1.015834927558899, "learning_rate": 0.0003905602240896359, "loss": 0.5356, "step": 21870 }, { "epoch": 12.218435754189944, "grad_norm": 2.636415719985962, "learning_rate": 0.0003905322128851541, "loss": 0.4774, "step": 21871 }, { "epoch": 12.21899441340782, "grad_norm": 0.4044748544692993, "learning_rate": 0.0003905042016806723, "loss": 0.3946, "step": 21872 }, { "epoch": 12.219553072625699, "grad_norm": 0.6548739075660706, "learning_rate": 0.0003904761904761905, "loss": 0.4648, "step": 21873 }, { "epoch": 12.220111731843575, "grad_norm": 0.42191267013549805, "learning_rate": 0.0003904481792717087, "loss": 0.5553, "step": 21874 }, { "epoch": 12.220670391061452, "grad_norm": 0.9451420903205872, "learning_rate": 0.0003904201680672269, "loss": 0.452, "step": 21875 }, { "epoch": 12.22122905027933, "grad_norm": 0.572333037853241, "learning_rate": 0.0003903921568627451, "loss": 0.414, "step": 21876 }, { "epoch": 12.221787709497207, "grad_norm": 0.39203354716300964, "learning_rate": 0.0003903641456582633, "loss": 0.526, "step": 21877 }, { "epoch": 12.222346368715083, "grad_norm": 8.167726516723633, "learning_rate": 0.0003903361344537815, "loss": 0.4928, "step": 21878 }, { "epoch": 12.222905027932962, "grad_norm": 0.468376100063324, "learning_rate": 0.0003903081232492997, "loss": 0.4226, "step": 21879 }, { "epoch": 12.223463687150838, "grad_norm": 0.5577556490898132, "learning_rate": 0.00039028011204481793, "loss": 0.3841, "step": 21880 }, { "epoch": 12.224022346368715, "grad_norm": 0.6264350414276123, "learning_rate": 0.00039025210084033614, "loss": 0.4877, "step": 21881 }, { "epoch": 12.224581005586591, "grad_norm": 0.5471148490905762, "learning_rate": 0.0003902240896358544, "loss": 0.4355, "step": 21882 }, { "epoch": 12.22513966480447, "grad_norm": 0.4771871566772461, "learning_rate": 0.00039019607843137255, "loss": 0.3246, "step": 21883 }, { "epoch": 12.225698324022346, "grad_norm": 0.6908655166625977, "learning_rate": 0.00039016806722689076, "loss": 0.3336, "step": 21884 }, { "epoch": 12.226256983240223, "grad_norm": 0.4203045666217804, "learning_rate": 0.00039014005602240896, "loss": 0.3613, "step": 21885 }, { "epoch": 12.226815642458101, "grad_norm": 0.522285521030426, "learning_rate": 0.00039011204481792717, "loss": 0.4583, "step": 21886 }, { "epoch": 12.227374301675978, "grad_norm": 0.6972352266311646, "learning_rate": 0.00039008403361344543, "loss": 0.326, "step": 21887 }, { "epoch": 12.227932960893854, "grad_norm": 1.22415030002594, "learning_rate": 0.0003900560224089636, "loss": 0.4579, "step": 21888 }, { "epoch": 12.228491620111733, "grad_norm": 2.0768392086029053, "learning_rate": 0.0003900280112044818, "loss": 0.341, "step": 21889 }, { "epoch": 12.22905027932961, "grad_norm": 0.5469762682914734, "learning_rate": 0.00039000000000000005, "loss": 0.5079, "step": 21890 }, { "epoch": 12.229608938547486, "grad_norm": 1.2877812385559082, "learning_rate": 0.0003899719887955182, "loss": 0.3778, "step": 21891 }, { "epoch": 12.230167597765362, "grad_norm": 1.5677930116653442, "learning_rate": 0.00038994397759103646, "loss": 0.4174, "step": 21892 }, { "epoch": 12.23072625698324, "grad_norm": 0.7079290747642517, "learning_rate": 0.0003899159663865546, "loss": 0.5094, "step": 21893 }, { "epoch": 12.231284916201117, "grad_norm": 0.4397013485431671, "learning_rate": 0.0003898879551820728, "loss": 0.4256, "step": 21894 }, { "epoch": 12.231843575418994, "grad_norm": 0.3371831476688385, "learning_rate": 0.0003898599439775911, "loss": 0.382, "step": 21895 }, { "epoch": 12.232402234636872, "grad_norm": 0.5015655755996704, "learning_rate": 0.00038983193277310923, "loss": 0.464, "step": 21896 }, { "epoch": 12.232960893854749, "grad_norm": 0.4748333990573883, "learning_rate": 0.0003898039215686275, "loss": 0.5532, "step": 21897 }, { "epoch": 12.233519553072625, "grad_norm": 0.40756455063819885, "learning_rate": 0.0003897759103641457, "loss": 0.383, "step": 21898 }, { "epoch": 12.234078212290504, "grad_norm": 0.4960795044898987, "learning_rate": 0.00038974789915966385, "loss": 0.3991, "step": 21899 }, { "epoch": 12.23463687150838, "grad_norm": 0.4264710247516632, "learning_rate": 0.0003897198879551821, "loss": 0.4124, "step": 21900 }, { "epoch": 12.235195530726257, "grad_norm": 1.2038625478744507, "learning_rate": 0.00038969187675070026, "loss": 0.335, "step": 21901 }, { "epoch": 12.235754189944133, "grad_norm": 0.4595504403114319, "learning_rate": 0.0003896638655462185, "loss": 0.2935, "step": 21902 }, { "epoch": 12.236312849162012, "grad_norm": 0.36419862508773804, "learning_rate": 0.0003896358543417367, "loss": 0.4935, "step": 21903 }, { "epoch": 12.236871508379888, "grad_norm": 0.47434210777282715, "learning_rate": 0.0003896078431372549, "loss": 0.3734, "step": 21904 }, { "epoch": 12.237430167597765, "grad_norm": 0.4589269757270813, "learning_rate": 0.00038957983193277314, "loss": 0.4204, "step": 21905 }, { "epoch": 12.237988826815643, "grad_norm": 0.527160108089447, "learning_rate": 0.00038955182072829134, "loss": 0.4681, "step": 21906 }, { "epoch": 12.23854748603352, "grad_norm": 0.3831530511379242, "learning_rate": 0.00038952380952380955, "loss": 0.3478, "step": 21907 }, { "epoch": 12.239106145251396, "grad_norm": 2.4585824012756348, "learning_rate": 0.00038949579831932776, "loss": 0.4246, "step": 21908 }, { "epoch": 12.239664804469275, "grad_norm": 1.2817026376724243, "learning_rate": 0.0003894677871148459, "loss": 0.3695, "step": 21909 }, { "epoch": 12.240223463687151, "grad_norm": 3.9584319591522217, "learning_rate": 0.00038943977591036417, "loss": 0.4669, "step": 21910 }, { "epoch": 12.240782122905028, "grad_norm": 0.5206130146980286, "learning_rate": 0.0003894117647058824, "loss": 0.4447, "step": 21911 }, { "epoch": 12.241340782122904, "grad_norm": 0.6187414526939392, "learning_rate": 0.0003893837535014006, "loss": 0.3849, "step": 21912 }, { "epoch": 12.241899441340783, "grad_norm": 0.6064357161521912, "learning_rate": 0.0003893557422969188, "loss": 0.4901, "step": 21913 }, { "epoch": 12.24245810055866, "grad_norm": 0.4968681037425995, "learning_rate": 0.000389327731092437, "loss": 0.5264, "step": 21914 }, { "epoch": 12.243016759776536, "grad_norm": 0.47898945212364197, "learning_rate": 0.0003892997198879552, "loss": 0.3699, "step": 21915 }, { "epoch": 12.243575418994414, "grad_norm": 0.6146661639213562, "learning_rate": 0.0003892717086834734, "loss": 0.5232, "step": 21916 }, { "epoch": 12.24413407821229, "grad_norm": 1.489929437637329, "learning_rate": 0.0003892436974789916, "loss": 0.4723, "step": 21917 }, { "epoch": 12.244692737430167, "grad_norm": 9.172572135925293, "learning_rate": 0.0003892156862745098, "loss": 0.3544, "step": 21918 }, { "epoch": 12.245251396648044, "grad_norm": 0.6824479103088379, "learning_rate": 0.000389187675070028, "loss": 0.5045, "step": 21919 }, { "epoch": 12.245810055865922, "grad_norm": 0.6482643485069275, "learning_rate": 0.0003891596638655462, "loss": 0.4159, "step": 21920 }, { "epoch": 12.246368715083799, "grad_norm": 0.8398950099945068, "learning_rate": 0.00038913165266106443, "loss": 0.4938, "step": 21921 }, { "epoch": 12.246927374301675, "grad_norm": 0.48368746042251587, "learning_rate": 0.0003891036414565827, "loss": 0.486, "step": 21922 }, { "epoch": 12.247486033519554, "grad_norm": 1.9520163536071777, "learning_rate": 0.00038907563025210084, "loss": 0.3875, "step": 21923 }, { "epoch": 12.24804469273743, "grad_norm": 0.3982439935207367, "learning_rate": 0.00038904761904761905, "loss": 0.3845, "step": 21924 }, { "epoch": 12.248603351955307, "grad_norm": 0.6793547868728638, "learning_rate": 0.00038901960784313726, "loss": 0.6255, "step": 21925 }, { "epoch": 12.249162011173185, "grad_norm": 1.007798194885254, "learning_rate": 0.00038899159663865546, "loss": 0.4387, "step": 21926 }, { "epoch": 12.249720670391062, "grad_norm": 0.501794159412384, "learning_rate": 0.00038896358543417367, "loss": 0.4539, "step": 21927 }, { "epoch": 12.250279329608938, "grad_norm": 0.7932931780815125, "learning_rate": 0.0003889355742296919, "loss": 0.6936, "step": 21928 }, { "epoch": 12.250837988826815, "grad_norm": 0.478601336479187, "learning_rate": 0.0003889075630252101, "loss": 0.5219, "step": 21929 }, { "epoch": 12.251396648044693, "grad_norm": 0.8970796465873718, "learning_rate": 0.00038887955182072834, "loss": 0.6378, "step": 21930 }, { "epoch": 12.25195530726257, "grad_norm": 0.4218604266643524, "learning_rate": 0.0003888515406162465, "loss": 0.4637, "step": 21931 }, { "epoch": 12.252513966480446, "grad_norm": 0.5470377802848816, "learning_rate": 0.0003888235294117647, "loss": 0.3953, "step": 21932 }, { "epoch": 12.253072625698325, "grad_norm": 0.6472342014312744, "learning_rate": 0.0003887955182072829, "loss": 0.4612, "step": 21933 }, { "epoch": 12.253631284916201, "grad_norm": 0.6655737161636353, "learning_rate": 0.0003887675070028011, "loss": 0.4249, "step": 21934 }, { "epoch": 12.254189944134078, "grad_norm": 0.4794478714466095, "learning_rate": 0.00038873949579831937, "loss": 0.4755, "step": 21935 }, { "epoch": 12.254748603351956, "grad_norm": 0.37384161353111267, "learning_rate": 0.0003887114845938375, "loss": 0.3093, "step": 21936 }, { "epoch": 12.255307262569833, "grad_norm": 0.47591012716293335, "learning_rate": 0.00038868347338935573, "loss": 0.5251, "step": 21937 }, { "epoch": 12.25586592178771, "grad_norm": 1.1966019868850708, "learning_rate": 0.000388655462184874, "loss": 0.5843, "step": 21938 }, { "epoch": 12.256424581005586, "grad_norm": 0.5154739618301392, "learning_rate": 0.00038862745098039214, "loss": 0.4434, "step": 21939 }, { "epoch": 12.256983240223464, "grad_norm": 0.43514811992645264, "learning_rate": 0.0003885994397759104, "loss": 0.4456, "step": 21940 }, { "epoch": 12.25754189944134, "grad_norm": 0.9524070620536804, "learning_rate": 0.00038857142857142855, "loss": 0.4501, "step": 21941 }, { "epoch": 12.258100558659217, "grad_norm": 0.5993633270263672, "learning_rate": 0.00038854341736694676, "loss": 0.3877, "step": 21942 }, { "epoch": 12.258659217877096, "grad_norm": 7.890388488769531, "learning_rate": 0.000388515406162465, "loss": 0.418, "step": 21943 }, { "epoch": 12.259217877094972, "grad_norm": 0.47539111971855164, "learning_rate": 0.00038848739495798317, "loss": 0.412, "step": 21944 }, { "epoch": 12.259776536312849, "grad_norm": 0.5194594860076904, "learning_rate": 0.00038845938375350143, "loss": 0.4101, "step": 21945 }, { "epoch": 12.260335195530725, "grad_norm": 1.1685597896575928, "learning_rate": 0.00038843137254901964, "loss": 0.4934, "step": 21946 }, { "epoch": 12.260893854748604, "grad_norm": 0.3871569037437439, "learning_rate": 0.0003884033613445378, "loss": 0.4218, "step": 21947 }, { "epoch": 12.26145251396648, "grad_norm": 0.44887304306030273, "learning_rate": 0.00038837535014005605, "loss": 0.4318, "step": 21948 }, { "epoch": 12.262011173184357, "grad_norm": 0.5415121912956238, "learning_rate": 0.0003883473389355742, "loss": 0.6098, "step": 21949 }, { "epoch": 12.262569832402235, "grad_norm": 0.5002323389053345, "learning_rate": 0.00038831932773109246, "loss": 0.3703, "step": 21950 }, { "epoch": 12.263128491620112, "grad_norm": 2.433748960494995, "learning_rate": 0.00038829131652661067, "loss": 0.4141, "step": 21951 }, { "epoch": 12.263687150837988, "grad_norm": 1.0336363315582275, "learning_rate": 0.0003882633053221288, "loss": 0.4337, "step": 21952 }, { "epoch": 12.264245810055867, "grad_norm": 2.1985156536102295, "learning_rate": 0.0003882352941176471, "loss": 0.3889, "step": 21953 }, { "epoch": 12.264804469273743, "grad_norm": 0.4408109486103058, "learning_rate": 0.0003882072829131653, "loss": 0.3566, "step": 21954 }, { "epoch": 12.26536312849162, "grad_norm": 0.7503076791763306, "learning_rate": 0.0003881792717086835, "loss": 0.5297, "step": 21955 }, { "epoch": 12.265921787709496, "grad_norm": 0.5883480310440063, "learning_rate": 0.0003881512605042017, "loss": 0.4237, "step": 21956 }, { "epoch": 12.266480446927375, "grad_norm": 0.5162433981895447, "learning_rate": 0.00038812324929971985, "loss": 0.5023, "step": 21957 }, { "epoch": 12.267039106145251, "grad_norm": 0.5030010342597961, "learning_rate": 0.0003880952380952381, "loss": 0.4108, "step": 21958 }, { "epoch": 12.267597765363128, "grad_norm": 0.8635503649711609, "learning_rate": 0.0003880672268907563, "loss": 0.4756, "step": 21959 }, { "epoch": 12.268156424581006, "grad_norm": 1.5629216432571411, "learning_rate": 0.0003880392156862745, "loss": 0.478, "step": 21960 }, { "epoch": 12.268715083798883, "grad_norm": 0.45288270711898804, "learning_rate": 0.0003880112044817927, "loss": 0.4164, "step": 21961 }, { "epoch": 12.26927374301676, "grad_norm": 0.37146326899528503, "learning_rate": 0.00038798319327731093, "loss": 0.3652, "step": 21962 }, { "epoch": 12.269832402234638, "grad_norm": 0.7616936564445496, "learning_rate": 0.00038795518207282914, "loss": 0.4825, "step": 21963 }, { "epoch": 12.270391061452514, "grad_norm": 0.4957352876663208, "learning_rate": 0.00038792717086834734, "loss": 0.4551, "step": 21964 }, { "epoch": 12.27094972067039, "grad_norm": 0.47230660915374756, "learning_rate": 0.00038789915966386555, "loss": 0.4663, "step": 21965 }, { "epoch": 12.271508379888267, "grad_norm": 0.6230714321136475, "learning_rate": 0.00038787114845938376, "loss": 0.4603, "step": 21966 }, { "epoch": 12.272067039106146, "grad_norm": 0.35418015718460083, "learning_rate": 0.00038784313725490196, "loss": 0.3535, "step": 21967 }, { "epoch": 12.272625698324022, "grad_norm": 0.4451562762260437, "learning_rate": 0.00038781512605042017, "loss": 0.4465, "step": 21968 }, { "epoch": 12.273184357541899, "grad_norm": 0.4882161319255829, "learning_rate": 0.0003877871148459384, "loss": 0.3678, "step": 21969 }, { "epoch": 12.273743016759777, "grad_norm": 4.364351749420166, "learning_rate": 0.00038775910364145664, "loss": 0.339, "step": 21970 }, { "epoch": 12.274301675977654, "grad_norm": 0.4866425395011902, "learning_rate": 0.0003877310924369748, "loss": 0.4275, "step": 21971 }, { "epoch": 12.27486033519553, "grad_norm": 0.48463326692581177, "learning_rate": 0.000387703081232493, "loss": 0.4512, "step": 21972 }, { "epoch": 12.275418994413409, "grad_norm": 0.8256913423538208, "learning_rate": 0.0003876750700280112, "loss": 0.4908, "step": 21973 }, { "epoch": 12.275977653631285, "grad_norm": 0.4348258674144745, "learning_rate": 0.0003876470588235294, "loss": 0.419, "step": 21974 }, { "epoch": 12.276536312849162, "grad_norm": 0.44900065660476685, "learning_rate": 0.00038761904761904767, "loss": 0.4138, "step": 21975 }, { "epoch": 12.277094972067038, "grad_norm": 0.6732671856880188, "learning_rate": 0.0003875910364145658, "loss": 0.4057, "step": 21976 }, { "epoch": 12.277653631284917, "grad_norm": 0.7130894064903259, "learning_rate": 0.000387563025210084, "loss": 0.4044, "step": 21977 }, { "epoch": 12.278212290502793, "grad_norm": 0.3504762649536133, "learning_rate": 0.0003875350140056023, "loss": 0.3917, "step": 21978 }, { "epoch": 12.27877094972067, "grad_norm": 1.5566868782043457, "learning_rate": 0.00038750700280112043, "loss": 0.3734, "step": 21979 }, { "epoch": 12.279329608938548, "grad_norm": 0.8719233870506287, "learning_rate": 0.0003874789915966387, "loss": 0.4389, "step": 21980 }, { "epoch": 12.279888268156425, "grad_norm": 4.940595626831055, "learning_rate": 0.00038745098039215685, "loss": 0.4811, "step": 21981 }, { "epoch": 12.280446927374301, "grad_norm": 0.39729028940200806, "learning_rate": 0.00038742296918767505, "loss": 0.4077, "step": 21982 }, { "epoch": 12.28100558659218, "grad_norm": 0.6331236362457275, "learning_rate": 0.0003873949579831933, "loss": 0.5556, "step": 21983 }, { "epoch": 12.281564245810056, "grad_norm": 2.016129493713379, "learning_rate": 0.00038736694677871146, "loss": 0.3951, "step": 21984 }, { "epoch": 12.282122905027933, "grad_norm": 0.6197336316108704, "learning_rate": 0.0003873389355742297, "loss": 0.3922, "step": 21985 }, { "epoch": 12.28268156424581, "grad_norm": 0.38644084334373474, "learning_rate": 0.00038731092436974793, "loss": 0.4405, "step": 21986 }, { "epoch": 12.283240223463688, "grad_norm": 0.29793092608451843, "learning_rate": 0.0003872829131652661, "loss": 0.2778, "step": 21987 }, { "epoch": 12.283798882681564, "grad_norm": 0.39753955602645874, "learning_rate": 0.00038725490196078434, "loss": 0.3941, "step": 21988 }, { "epoch": 12.28435754189944, "grad_norm": 0.5762141346931458, "learning_rate": 0.0003872268907563025, "loss": 0.415, "step": 21989 }, { "epoch": 12.28491620111732, "grad_norm": 0.44524964690208435, "learning_rate": 0.00038719887955182076, "loss": 0.3884, "step": 21990 }, { "epoch": 12.285474860335196, "grad_norm": 0.4412602186203003, "learning_rate": 0.00038717086834733896, "loss": 0.4031, "step": 21991 }, { "epoch": 12.286033519553072, "grad_norm": 0.4033774733543396, "learning_rate": 0.0003871428571428571, "loss": 0.3993, "step": 21992 }, { "epoch": 12.286592178770949, "grad_norm": 0.4507732093334198, "learning_rate": 0.0003871148459383754, "loss": 0.4622, "step": 21993 }, { "epoch": 12.287150837988827, "grad_norm": 0.3614402711391449, "learning_rate": 0.0003870868347338936, "loss": 0.3849, "step": 21994 }, { "epoch": 12.287709497206704, "grad_norm": 0.6344165205955505, "learning_rate": 0.0003870588235294118, "loss": 0.4199, "step": 21995 }, { "epoch": 12.28826815642458, "grad_norm": 0.44309067726135254, "learning_rate": 0.00038703081232493, "loss": 0.3603, "step": 21996 }, { "epoch": 12.288826815642459, "grad_norm": 0.5011874437332153, "learning_rate": 0.00038700280112044814, "loss": 0.5027, "step": 21997 }, { "epoch": 12.289385474860335, "grad_norm": 0.40383729338645935, "learning_rate": 0.0003869747899159664, "loss": 0.3343, "step": 21998 }, { "epoch": 12.289944134078212, "grad_norm": 2.4490933418273926, "learning_rate": 0.0003869467787114846, "loss": 0.6019, "step": 21999 }, { "epoch": 12.29050279329609, "grad_norm": 0.5161100625991821, "learning_rate": 0.0003869187675070028, "loss": 0.3194, "step": 22000 }, { "epoch": 12.29050279329609, "eval_cer": 0.08725846398917657, "eval_loss": 0.33356931805610657, "eval_runtime": 55.2916, "eval_samples_per_second": 82.074, "eval_steps_per_second": 5.136, "eval_wer": 0.3433702969190805, "step": 22000 }, { "epoch": 12.291061452513967, "grad_norm": 0.5101909637451172, "learning_rate": 0.000386890756302521, "loss": 0.5044, "step": 22001 }, { "epoch": 12.291620111731843, "grad_norm": 0.5509570837020874, "learning_rate": 0.0003868627450980392, "loss": 0.503, "step": 22002 }, { "epoch": 12.29217877094972, "grad_norm": 0.4949786365032196, "learning_rate": 0.00038683473389355743, "loss": 0.4503, "step": 22003 }, { "epoch": 12.292737430167598, "grad_norm": 0.41839534044265747, "learning_rate": 0.00038680672268907564, "loss": 0.4616, "step": 22004 }, { "epoch": 12.293296089385475, "grad_norm": 0.4735906422138214, "learning_rate": 0.00038677871148459384, "loss": 0.3397, "step": 22005 }, { "epoch": 12.293854748603351, "grad_norm": 0.569817841053009, "learning_rate": 0.00038675070028011205, "loss": 0.389, "step": 22006 }, { "epoch": 12.29441340782123, "grad_norm": 0.5748640298843384, "learning_rate": 0.00038672268907563026, "loss": 0.4819, "step": 22007 }, { "epoch": 12.294972067039106, "grad_norm": 0.34630876779556274, "learning_rate": 0.00038669467787114846, "loss": 0.3394, "step": 22008 }, { "epoch": 12.295530726256983, "grad_norm": 0.30318352580070496, "learning_rate": 0.00038666666666666667, "loss": 0.3045, "step": 22009 }, { "epoch": 12.296089385474861, "grad_norm": 20.493606567382812, "learning_rate": 0.00038663865546218493, "loss": 0.4628, "step": 22010 }, { "epoch": 12.296648044692738, "grad_norm": 0.6786620616912842, "learning_rate": 0.0003866106442577031, "loss": 0.403, "step": 22011 }, { "epoch": 12.297206703910614, "grad_norm": 1.9632556438446045, "learning_rate": 0.0003865826330532213, "loss": 0.54, "step": 22012 }, { "epoch": 12.297765363128491, "grad_norm": 0.435391902923584, "learning_rate": 0.0003865546218487395, "loss": 0.3931, "step": 22013 }, { "epoch": 12.29832402234637, "grad_norm": 0.5964452624320984, "learning_rate": 0.0003865266106442577, "loss": 0.4574, "step": 22014 }, { "epoch": 12.298882681564246, "grad_norm": 0.6367564797401428, "learning_rate": 0.00038649859943977596, "loss": 0.4706, "step": 22015 }, { "epoch": 12.299441340782122, "grad_norm": 0.4371486306190491, "learning_rate": 0.0003864705882352941, "loss": 0.3904, "step": 22016 }, { "epoch": 12.3, "grad_norm": 1.2134084701538086, "learning_rate": 0.0003864425770308123, "loss": 0.3511, "step": 22017 }, { "epoch": 12.300558659217877, "grad_norm": 1.6539559364318848, "learning_rate": 0.0003864145658263306, "loss": 0.4262, "step": 22018 }, { "epoch": 12.301117318435754, "grad_norm": 0.42026272416114807, "learning_rate": 0.00038638655462184873, "loss": 0.4589, "step": 22019 }, { "epoch": 12.30167597765363, "grad_norm": 0.5202992558479309, "learning_rate": 0.000386358543417367, "loss": 0.4315, "step": 22020 }, { "epoch": 12.302234636871509, "grad_norm": 0.467745840549469, "learning_rate": 0.00038633053221288514, "loss": 0.3924, "step": 22021 }, { "epoch": 12.302793296089385, "grad_norm": 2.2003281116485596, "learning_rate": 0.00038630252100840335, "loss": 0.4182, "step": 22022 }, { "epoch": 12.303351955307262, "grad_norm": 0.9660027027130127, "learning_rate": 0.0003862745098039216, "loss": 0.3344, "step": 22023 }, { "epoch": 12.30391061452514, "grad_norm": 1.4834377765655518, "learning_rate": 0.00038624649859943976, "loss": 0.4439, "step": 22024 }, { "epoch": 12.304469273743017, "grad_norm": 0.4662884473800659, "learning_rate": 0.000386218487394958, "loss": 0.4835, "step": 22025 }, { "epoch": 12.305027932960893, "grad_norm": 0.4322328269481659, "learning_rate": 0.0003861904761904762, "loss": 0.5046, "step": 22026 }, { "epoch": 12.305586592178772, "grad_norm": 1.4548461437225342, "learning_rate": 0.0003861624649859944, "loss": 0.5176, "step": 22027 }, { "epoch": 12.306145251396648, "grad_norm": 1.0186183452606201, "learning_rate": 0.00038613445378151264, "loss": 0.3485, "step": 22028 }, { "epoch": 12.306703910614525, "grad_norm": 0.4916955232620239, "learning_rate": 0.0003861064425770308, "loss": 0.4494, "step": 22029 }, { "epoch": 12.307262569832401, "grad_norm": 0.4791578948497772, "learning_rate": 0.00038607843137254905, "loss": 0.3474, "step": 22030 }, { "epoch": 12.30782122905028, "grad_norm": 0.718127965927124, "learning_rate": 0.00038605042016806726, "loss": 0.5104, "step": 22031 }, { "epoch": 12.308379888268156, "grad_norm": 0.7696358561515808, "learning_rate": 0.0003860224089635854, "loss": 0.6354, "step": 22032 }, { "epoch": 12.308938547486033, "grad_norm": 0.46793481707572937, "learning_rate": 0.00038599439775910367, "loss": 0.4074, "step": 22033 }, { "epoch": 12.309497206703911, "grad_norm": 0.6133039593696594, "learning_rate": 0.0003859663865546219, "loss": 0.3894, "step": 22034 }, { "epoch": 12.310055865921788, "grad_norm": 0.8257023692131042, "learning_rate": 0.0003859383753501401, "loss": 0.3952, "step": 22035 }, { "epoch": 12.310614525139664, "grad_norm": 0.5027741193771362, "learning_rate": 0.0003859103641456583, "loss": 0.572, "step": 22036 }, { "epoch": 12.311173184357543, "grad_norm": 6.1996612548828125, "learning_rate": 0.00038588235294117644, "loss": 0.5828, "step": 22037 }, { "epoch": 12.31173184357542, "grad_norm": 0.4067029356956482, "learning_rate": 0.0003858543417366947, "loss": 0.4379, "step": 22038 }, { "epoch": 12.312290502793296, "grad_norm": 2.2580740451812744, "learning_rate": 0.0003858263305322129, "loss": 0.4404, "step": 22039 }, { "epoch": 12.312849162011172, "grad_norm": 0.3362097442150116, "learning_rate": 0.00038579831932773105, "loss": 0.4378, "step": 22040 }, { "epoch": 12.31340782122905, "grad_norm": 0.5648675560951233, "learning_rate": 0.0003857703081232493, "loss": 0.5131, "step": 22041 }, { "epoch": 12.313966480446927, "grad_norm": 0.8387340307235718, "learning_rate": 0.0003857422969187675, "loss": 0.336, "step": 22042 }, { "epoch": 12.314525139664804, "grad_norm": 0.5210297107696533, "learning_rate": 0.0003857142857142857, "loss": 0.4216, "step": 22043 }, { "epoch": 12.315083798882682, "grad_norm": 1.334911584854126, "learning_rate": 0.00038568627450980393, "loss": 0.3932, "step": 22044 }, { "epoch": 12.315642458100559, "grad_norm": 0.5634598135948181, "learning_rate": 0.0003856582633053221, "loss": 0.432, "step": 22045 }, { "epoch": 12.316201117318435, "grad_norm": 0.526818573474884, "learning_rate": 0.00038563025210084034, "loss": 0.4862, "step": 22046 }, { "epoch": 12.316759776536314, "grad_norm": 0.41560494899749756, "learning_rate": 0.00038560224089635855, "loss": 0.4336, "step": 22047 }, { "epoch": 12.31731843575419, "grad_norm": 0.676376461982727, "learning_rate": 0.00038557422969187676, "loss": 0.6179, "step": 22048 }, { "epoch": 12.317877094972067, "grad_norm": 1.035886287689209, "learning_rate": 0.00038554621848739496, "loss": 0.5195, "step": 22049 }, { "epoch": 12.318435754189943, "grad_norm": 1.0377691984176636, "learning_rate": 0.00038551820728291317, "loss": 0.3766, "step": 22050 }, { "epoch": 12.318994413407822, "grad_norm": 1.0605108737945557, "learning_rate": 0.0003854901960784314, "loss": 0.5, "step": 22051 }, { "epoch": 12.319553072625698, "grad_norm": 1.2051405906677246, "learning_rate": 0.0003854621848739496, "loss": 0.4617, "step": 22052 }, { "epoch": 12.320111731843575, "grad_norm": 1.3574597835540771, "learning_rate": 0.0003854341736694678, "loss": 0.4602, "step": 22053 }, { "epoch": 12.320670391061453, "grad_norm": 0.4773907959461212, "learning_rate": 0.000385406162464986, "loss": 0.3053, "step": 22054 }, { "epoch": 12.32122905027933, "grad_norm": 0.4621227979660034, "learning_rate": 0.0003853781512605042, "loss": 0.4155, "step": 22055 }, { "epoch": 12.321787709497206, "grad_norm": 1.189773440361023, "learning_rate": 0.0003853501400560224, "loss": 0.3165, "step": 22056 }, { "epoch": 12.322346368715085, "grad_norm": 0.571053147315979, "learning_rate": 0.0003853221288515406, "loss": 0.3451, "step": 22057 }, { "epoch": 12.322905027932961, "grad_norm": 0.6405549645423889, "learning_rate": 0.00038529411764705887, "loss": 0.4913, "step": 22058 }, { "epoch": 12.323463687150838, "grad_norm": 0.7246174812316895, "learning_rate": 0.000385266106442577, "loss": 0.5107, "step": 22059 }, { "epoch": 12.324022346368714, "grad_norm": 0.36921757459640503, "learning_rate": 0.00038523809523809523, "loss": 0.5043, "step": 22060 }, { "epoch": 12.324581005586593, "grad_norm": 0.785773754119873, "learning_rate": 0.00038521008403361343, "loss": 0.417, "step": 22061 }, { "epoch": 12.32513966480447, "grad_norm": 0.33367887139320374, "learning_rate": 0.00038518207282913164, "loss": 0.35, "step": 22062 }, { "epoch": 12.325698324022346, "grad_norm": 3.0431160926818848, "learning_rate": 0.0003851540616246499, "loss": 0.4313, "step": 22063 }, { "epoch": 12.326256983240224, "grad_norm": 0.46786046028137207, "learning_rate": 0.00038512605042016805, "loss": 0.3928, "step": 22064 }, { "epoch": 12.3268156424581, "grad_norm": 1.3510156869888306, "learning_rate": 0.00038509803921568626, "loss": 0.4261, "step": 22065 }, { "epoch": 12.327374301675977, "grad_norm": 0.6182519197463989, "learning_rate": 0.0003850700280112045, "loss": 0.3767, "step": 22066 }, { "epoch": 12.327932960893854, "grad_norm": 0.49069443345069885, "learning_rate": 0.00038504201680672267, "loss": 0.4449, "step": 22067 }, { "epoch": 12.328491620111732, "grad_norm": 0.427541583776474, "learning_rate": 0.00038501400560224093, "loss": 0.3877, "step": 22068 }, { "epoch": 12.329050279329609, "grad_norm": 0.44517752528190613, "learning_rate": 0.0003849859943977591, "loss": 0.404, "step": 22069 }, { "epoch": 12.329608938547485, "grad_norm": 0.4926365315914154, "learning_rate": 0.0003849579831932773, "loss": 0.4675, "step": 22070 }, { "epoch": 12.330167597765364, "grad_norm": 0.6203492283821106, "learning_rate": 0.00038492997198879555, "loss": 0.4113, "step": 22071 }, { "epoch": 12.33072625698324, "grad_norm": 0.61383455991745, "learning_rate": 0.0003849019607843137, "loss": 0.5191, "step": 22072 }, { "epoch": 12.331284916201117, "grad_norm": 0.5258894562721252, "learning_rate": 0.00038487394957983196, "loss": 0.4, "step": 22073 }, { "epoch": 12.331843575418995, "grad_norm": 0.5182380676269531, "learning_rate": 0.00038484593837535017, "loss": 0.4227, "step": 22074 }, { "epoch": 12.332402234636872, "grad_norm": 0.5613095760345459, "learning_rate": 0.0003848179271708683, "loss": 0.5018, "step": 22075 }, { "epoch": 12.332960893854748, "grad_norm": 0.46325626969337463, "learning_rate": 0.0003847899159663866, "loss": 0.3538, "step": 22076 }, { "epoch": 12.333519553072625, "grad_norm": 0.7671961188316345, "learning_rate": 0.00038476190476190473, "loss": 0.6775, "step": 22077 }, { "epoch": 12.334078212290503, "grad_norm": 0.3868919312953949, "learning_rate": 0.000384733893557423, "loss": 0.326, "step": 22078 }, { "epoch": 12.33463687150838, "grad_norm": 0.49969127774238586, "learning_rate": 0.0003847058823529412, "loss": 0.6402, "step": 22079 }, { "epoch": 12.335195530726256, "grad_norm": 0.73891681432724, "learning_rate": 0.00038467787114845935, "loss": 0.3978, "step": 22080 }, { "epoch": 12.335754189944135, "grad_norm": 1.2865787744522095, "learning_rate": 0.0003846498599439776, "loss": 0.5412, "step": 22081 }, { "epoch": 12.336312849162011, "grad_norm": 1.1106302738189697, "learning_rate": 0.0003846218487394958, "loss": 0.368, "step": 22082 }, { "epoch": 12.336871508379888, "grad_norm": 0.5012155771255493, "learning_rate": 0.000384593837535014, "loss": 0.3491, "step": 22083 }, { "epoch": 12.337430167597766, "grad_norm": 0.40300559997558594, "learning_rate": 0.0003845658263305322, "loss": 0.4637, "step": 22084 }, { "epoch": 12.337988826815643, "grad_norm": 0.3870534300804138, "learning_rate": 0.0003845378151260504, "loss": 0.4074, "step": 22085 }, { "epoch": 12.33854748603352, "grad_norm": 0.32733431458473206, "learning_rate": 0.00038450980392156864, "loss": 0.3158, "step": 22086 }, { "epoch": 12.339106145251396, "grad_norm": 0.4238924980163574, "learning_rate": 0.00038448179271708684, "loss": 0.5426, "step": 22087 }, { "epoch": 12.339664804469274, "grad_norm": 0.7052682042121887, "learning_rate": 0.00038445378151260505, "loss": 0.3858, "step": 22088 }, { "epoch": 12.34022346368715, "grad_norm": 0.3622893989086151, "learning_rate": 0.00038442577030812326, "loss": 0.3668, "step": 22089 }, { "epoch": 12.340782122905027, "grad_norm": 4.846566200256348, "learning_rate": 0.00038439775910364146, "loss": 0.3675, "step": 22090 }, { "epoch": 12.341340782122906, "grad_norm": 0.4018767178058624, "learning_rate": 0.00038436974789915967, "loss": 0.4772, "step": 22091 }, { "epoch": 12.341899441340782, "grad_norm": 0.7569673657417297, "learning_rate": 0.0003843417366946779, "loss": 0.5905, "step": 22092 }, { "epoch": 12.342458100558659, "grad_norm": 0.47620949149131775, "learning_rate": 0.00038431372549019614, "loss": 0.4197, "step": 22093 }, { "epoch": 12.343016759776535, "grad_norm": 0.4854866862297058, "learning_rate": 0.0003842857142857143, "loss": 0.3195, "step": 22094 }, { "epoch": 12.343575418994414, "grad_norm": 0.44752511382102966, "learning_rate": 0.0003842577030812325, "loss": 0.3688, "step": 22095 }, { "epoch": 12.34413407821229, "grad_norm": 1.077589511871338, "learning_rate": 0.0003842296918767507, "loss": 0.5776, "step": 22096 }, { "epoch": 12.344692737430167, "grad_norm": 0.7049968242645264, "learning_rate": 0.0003842016806722689, "loss": 0.4006, "step": 22097 }, { "epoch": 12.345251396648045, "grad_norm": 0.5065101981163025, "learning_rate": 0.00038417366946778717, "loss": 0.3826, "step": 22098 }, { "epoch": 12.345810055865922, "grad_norm": 0.43125924468040466, "learning_rate": 0.0003841456582633053, "loss": 0.4319, "step": 22099 }, { "epoch": 12.346368715083798, "grad_norm": 0.756377637386322, "learning_rate": 0.0003841176470588235, "loss": 0.4943, "step": 22100 }, { "epoch": 12.346927374301677, "grad_norm": 11.611139297485352, "learning_rate": 0.0003840896358543418, "loss": 0.4376, "step": 22101 }, { "epoch": 12.347486033519553, "grad_norm": 3.8073177337646484, "learning_rate": 0.00038406162464985993, "loss": 0.2788, "step": 22102 }, { "epoch": 12.34804469273743, "grad_norm": 0.471903920173645, "learning_rate": 0.0003840336134453782, "loss": 0.4464, "step": 22103 }, { "epoch": 12.348603351955306, "grad_norm": 3.4419453144073486, "learning_rate": 0.00038400560224089635, "loss": 0.4921, "step": 22104 }, { "epoch": 12.349162011173185, "grad_norm": 0.4697394073009491, "learning_rate": 0.00038397759103641455, "loss": 0.5058, "step": 22105 }, { "epoch": 12.349720670391061, "grad_norm": 0.41084542870521545, "learning_rate": 0.0003839495798319328, "loss": 0.3971, "step": 22106 }, { "epoch": 12.350279329608938, "grad_norm": 0.5376641750335693, "learning_rate": 0.00038392156862745096, "loss": 0.5377, "step": 22107 }, { "epoch": 12.350837988826816, "grad_norm": 0.4319959282875061, "learning_rate": 0.0003838935574229692, "loss": 0.4161, "step": 22108 }, { "epoch": 12.351396648044693, "grad_norm": 0.427133172750473, "learning_rate": 0.00038386554621848743, "loss": 0.4302, "step": 22109 }, { "epoch": 12.35195530726257, "grad_norm": 1.3137071132659912, "learning_rate": 0.0003838375350140056, "loss": 0.4051, "step": 22110 }, { "epoch": 12.352513966480448, "grad_norm": 0.4548395276069641, "learning_rate": 0.00038380952380952384, "loss": 0.3753, "step": 22111 }, { "epoch": 12.353072625698324, "grad_norm": 0.4807973802089691, "learning_rate": 0.000383781512605042, "loss": 0.3288, "step": 22112 }, { "epoch": 12.3536312849162, "grad_norm": 0.49493372440338135, "learning_rate": 0.00038375350140056026, "loss": 0.3117, "step": 22113 }, { "epoch": 12.354189944134077, "grad_norm": 5.739772319793701, "learning_rate": 0.00038372549019607846, "loss": 0.3862, "step": 22114 }, { "epoch": 12.354748603351956, "grad_norm": 0.5182005763053894, "learning_rate": 0.0003836974789915966, "loss": 0.4909, "step": 22115 }, { "epoch": 12.355307262569832, "grad_norm": 0.3472490608692169, "learning_rate": 0.0003836694677871149, "loss": 0.3603, "step": 22116 }, { "epoch": 12.355865921787709, "grad_norm": 0.408651202917099, "learning_rate": 0.0003836414565826331, "loss": 0.5001, "step": 22117 }, { "epoch": 12.356424581005587, "grad_norm": 0.540635883808136, "learning_rate": 0.0003836134453781513, "loss": 0.4973, "step": 22118 }, { "epoch": 12.356983240223464, "grad_norm": 0.48477354645729065, "learning_rate": 0.0003835854341736695, "loss": 0.3761, "step": 22119 }, { "epoch": 12.35754189944134, "grad_norm": 11.91270637512207, "learning_rate": 0.00038355742296918764, "loss": 0.4147, "step": 22120 }, { "epoch": 12.358100558659217, "grad_norm": 0.9062607288360596, "learning_rate": 0.0003835294117647059, "loss": 0.4661, "step": 22121 }, { "epoch": 12.358659217877095, "grad_norm": 0.4032188951969147, "learning_rate": 0.0003835014005602241, "loss": 0.4517, "step": 22122 }, { "epoch": 12.359217877094972, "grad_norm": 0.41880401968955994, "learning_rate": 0.0003834733893557423, "loss": 0.3438, "step": 22123 }, { "epoch": 12.359776536312848, "grad_norm": 2.1132102012634277, "learning_rate": 0.0003834453781512605, "loss": 0.4405, "step": 22124 }, { "epoch": 12.360335195530727, "grad_norm": 0.496634304523468, "learning_rate": 0.0003834173669467787, "loss": 0.3625, "step": 22125 }, { "epoch": 12.360893854748603, "grad_norm": 0.46067485213279724, "learning_rate": 0.00038338935574229693, "loss": 0.4481, "step": 22126 }, { "epoch": 12.36145251396648, "grad_norm": 0.4707186222076416, "learning_rate": 0.00038336134453781514, "loss": 0.4023, "step": 22127 }, { "epoch": 12.362011173184358, "grad_norm": 2.328150749206543, "learning_rate": 0.00038333333333333334, "loss": 0.4081, "step": 22128 }, { "epoch": 12.362569832402235, "grad_norm": 0.6449905037879944, "learning_rate": 0.00038330532212885155, "loss": 0.4883, "step": 22129 }, { "epoch": 12.363128491620111, "grad_norm": 0.5139626860618591, "learning_rate": 0.00038327731092436976, "loss": 0.4341, "step": 22130 }, { "epoch": 12.363687150837988, "grad_norm": 1.5703986883163452, "learning_rate": 0.00038324929971988796, "loss": 0.461, "step": 22131 }, { "epoch": 12.364245810055866, "grad_norm": 0.4426910877227783, "learning_rate": 0.00038322128851540617, "loss": 0.4139, "step": 22132 }, { "epoch": 12.364804469273743, "grad_norm": 0.49806609749794006, "learning_rate": 0.00038319327731092443, "loss": 0.3995, "step": 22133 }, { "epoch": 12.36536312849162, "grad_norm": 0.48100659251213074, "learning_rate": 0.0003831652661064426, "loss": 0.4737, "step": 22134 }, { "epoch": 12.365921787709498, "grad_norm": 0.3108961284160614, "learning_rate": 0.0003831372549019608, "loss": 0.3132, "step": 22135 }, { "epoch": 12.366480446927374, "grad_norm": 0.49499815702438354, "learning_rate": 0.000383109243697479, "loss": 0.3311, "step": 22136 }, { "epoch": 12.367039106145251, "grad_norm": 0.7148417234420776, "learning_rate": 0.0003830812324929972, "loss": 0.3893, "step": 22137 }, { "epoch": 12.36759776536313, "grad_norm": 0.7804326415061951, "learning_rate": 0.00038305322128851546, "loss": 0.3815, "step": 22138 }, { "epoch": 12.368156424581006, "grad_norm": 0.6722231507301331, "learning_rate": 0.0003830252100840336, "loss": 0.4323, "step": 22139 }, { "epoch": 12.368715083798882, "grad_norm": 0.645348846912384, "learning_rate": 0.0003829971988795518, "loss": 0.569, "step": 22140 }, { "epoch": 12.369273743016759, "grad_norm": 0.6104491353034973, "learning_rate": 0.0003829691876750701, "loss": 0.5616, "step": 22141 }, { "epoch": 12.369832402234637, "grad_norm": 1.1796618700027466, "learning_rate": 0.00038294117647058823, "loss": 0.4672, "step": 22142 }, { "epoch": 12.370391061452514, "grad_norm": 0.8682754635810852, "learning_rate": 0.0003829131652661065, "loss": 0.4328, "step": 22143 }, { "epoch": 12.37094972067039, "grad_norm": 0.5486952662467957, "learning_rate": 0.00038288515406162464, "loss": 0.4437, "step": 22144 }, { "epoch": 12.371508379888269, "grad_norm": 0.5833659172058105, "learning_rate": 0.00038285714285714285, "loss": 0.4126, "step": 22145 }, { "epoch": 12.372067039106145, "grad_norm": 0.6327529549598694, "learning_rate": 0.0003828291316526611, "loss": 0.3344, "step": 22146 }, { "epoch": 12.372625698324022, "grad_norm": 0.6232379674911499, "learning_rate": 0.00038280112044817926, "loss": 0.485, "step": 22147 }, { "epoch": 12.3731843575419, "grad_norm": 0.5624763369560242, "learning_rate": 0.00038277310924369746, "loss": 0.4345, "step": 22148 }, { "epoch": 12.373743016759777, "grad_norm": 3.373488664627075, "learning_rate": 0.0003827450980392157, "loss": 0.5175, "step": 22149 }, { "epoch": 12.374301675977653, "grad_norm": 10.65363883972168, "learning_rate": 0.0003827170868347339, "loss": 0.4708, "step": 22150 }, { "epoch": 12.37486033519553, "grad_norm": 0.5036160349845886, "learning_rate": 0.00038268907563025214, "loss": 0.3163, "step": 22151 }, { "epoch": 12.375418994413408, "grad_norm": 0.47631320357322693, "learning_rate": 0.0003826610644257703, "loss": 0.5565, "step": 22152 }, { "epoch": 12.375977653631285, "grad_norm": 0.6353400945663452, "learning_rate": 0.0003826330532212885, "loss": 0.4484, "step": 22153 }, { "epoch": 12.376536312849161, "grad_norm": 0.46554893255233765, "learning_rate": 0.00038260504201680675, "loss": 0.4478, "step": 22154 }, { "epoch": 12.37709497206704, "grad_norm": 0.6046351790428162, "learning_rate": 0.0003825770308123249, "loss": 0.4124, "step": 22155 }, { "epoch": 12.377653631284916, "grad_norm": 0.5346139073371887, "learning_rate": 0.00038254901960784317, "loss": 0.5369, "step": 22156 }, { "epoch": 12.378212290502793, "grad_norm": 0.4957559108734131, "learning_rate": 0.0003825210084033614, "loss": 0.4417, "step": 22157 }, { "epoch": 12.378770949720671, "grad_norm": 0.5550747513771057, "learning_rate": 0.0003824929971988795, "loss": 0.418, "step": 22158 }, { "epoch": 12.379329608938548, "grad_norm": 0.42560771107673645, "learning_rate": 0.0003824649859943978, "loss": 0.2678, "step": 22159 }, { "epoch": 12.379888268156424, "grad_norm": 0.62616366147995, "learning_rate": 0.00038243697478991594, "loss": 0.3804, "step": 22160 }, { "epoch": 12.380446927374301, "grad_norm": 0.5516969561576843, "learning_rate": 0.0003824089635854342, "loss": 0.4018, "step": 22161 }, { "epoch": 12.38100558659218, "grad_norm": 0.9069478511810303, "learning_rate": 0.0003823809523809524, "loss": 0.5022, "step": 22162 }, { "epoch": 12.381564245810056, "grad_norm": 0.45761433243751526, "learning_rate": 0.00038235294117647055, "loss": 0.5687, "step": 22163 }, { "epoch": 12.382122905027932, "grad_norm": 0.6873829960823059, "learning_rate": 0.0003823249299719888, "loss": 0.4693, "step": 22164 }, { "epoch": 12.38268156424581, "grad_norm": 0.3658673167228699, "learning_rate": 0.000382296918767507, "loss": 0.381, "step": 22165 }, { "epoch": 12.383240223463687, "grad_norm": 2.664102077484131, "learning_rate": 0.0003822689075630252, "loss": 0.4665, "step": 22166 }, { "epoch": 12.383798882681564, "grad_norm": 4.503544807434082, "learning_rate": 0.00038224089635854343, "loss": 0.5338, "step": 22167 }, { "epoch": 12.38435754189944, "grad_norm": 0.5606874227523804, "learning_rate": 0.0003822128851540616, "loss": 0.4669, "step": 22168 }, { "epoch": 12.384916201117319, "grad_norm": 0.6650618314743042, "learning_rate": 0.00038218487394957984, "loss": 0.455, "step": 22169 }, { "epoch": 12.385474860335195, "grad_norm": 0.6762911677360535, "learning_rate": 0.00038215686274509805, "loss": 0.4971, "step": 22170 }, { "epoch": 12.386033519553072, "grad_norm": 0.6417225003242493, "learning_rate": 0.00038212885154061626, "loss": 0.6135, "step": 22171 }, { "epoch": 12.38659217877095, "grad_norm": 0.7029066681861877, "learning_rate": 0.00038210084033613446, "loss": 0.4607, "step": 22172 }, { "epoch": 12.387150837988827, "grad_norm": 0.49348974227905273, "learning_rate": 0.00038207282913165267, "loss": 0.4209, "step": 22173 }, { "epoch": 12.387709497206703, "grad_norm": 0.5835763216018677, "learning_rate": 0.0003820448179271709, "loss": 0.45, "step": 22174 }, { "epoch": 12.388268156424582, "grad_norm": 1.3397552967071533, "learning_rate": 0.0003820168067226891, "loss": 0.3866, "step": 22175 }, { "epoch": 12.388826815642458, "grad_norm": 3.6497960090637207, "learning_rate": 0.0003819887955182073, "loss": 0.4991, "step": 22176 }, { "epoch": 12.389385474860335, "grad_norm": 0.478920578956604, "learning_rate": 0.0003819607843137255, "loss": 0.4764, "step": 22177 }, { "epoch": 12.389944134078211, "grad_norm": 0.5885761976242065, "learning_rate": 0.0003819327731092437, "loss": 0.4812, "step": 22178 }, { "epoch": 12.39050279329609, "grad_norm": 0.38666030764579773, "learning_rate": 0.0003819047619047619, "loss": 0.3301, "step": 22179 }, { "epoch": 12.391061452513966, "grad_norm": 0.5020449757575989, "learning_rate": 0.0003818767507002801, "loss": 0.4738, "step": 22180 }, { "epoch": 12.391620111731843, "grad_norm": 0.4787828326225281, "learning_rate": 0.00038184873949579837, "loss": 0.434, "step": 22181 }, { "epoch": 12.392178770949721, "grad_norm": 3.974708318710327, "learning_rate": 0.0003818207282913165, "loss": 0.4115, "step": 22182 }, { "epoch": 12.392737430167598, "grad_norm": 0.6274554133415222, "learning_rate": 0.00038179271708683473, "loss": 0.479, "step": 22183 }, { "epoch": 12.393296089385474, "grad_norm": 0.41928815841674805, "learning_rate": 0.00038176470588235293, "loss": 0.4133, "step": 22184 }, { "epoch": 12.393854748603353, "grad_norm": 0.42631253600120544, "learning_rate": 0.00038173669467787114, "loss": 0.4161, "step": 22185 }, { "epoch": 12.39441340782123, "grad_norm": 0.3683593273162842, "learning_rate": 0.0003817086834733894, "loss": 0.4397, "step": 22186 }, { "epoch": 12.394972067039106, "grad_norm": 0.6384584307670593, "learning_rate": 0.00038168067226890755, "loss": 0.4684, "step": 22187 }, { "epoch": 12.395530726256982, "grad_norm": 0.3789200484752655, "learning_rate": 0.00038165266106442576, "loss": 0.3977, "step": 22188 }, { "epoch": 12.39608938547486, "grad_norm": 0.8033278584480286, "learning_rate": 0.000381624649859944, "loss": 0.3437, "step": 22189 }, { "epoch": 12.396648044692737, "grad_norm": 0.7364555597305298, "learning_rate": 0.00038159663865546217, "loss": 0.4174, "step": 22190 }, { "epoch": 12.397206703910614, "grad_norm": 0.5501554012298584, "learning_rate": 0.00038156862745098043, "loss": 0.5531, "step": 22191 }, { "epoch": 12.397765363128492, "grad_norm": 0.5520037412643433, "learning_rate": 0.0003815406162464986, "loss": 0.5227, "step": 22192 }, { "epoch": 12.398324022346369, "grad_norm": 0.43416768312454224, "learning_rate": 0.0003815126050420168, "loss": 0.4131, "step": 22193 }, { "epoch": 12.398882681564245, "grad_norm": 0.6907766461372375, "learning_rate": 0.00038148459383753505, "loss": 0.4374, "step": 22194 }, { "epoch": 12.399441340782122, "grad_norm": 0.6432982683181763, "learning_rate": 0.0003814565826330532, "loss": 0.4399, "step": 22195 }, { "epoch": 12.4, "grad_norm": 0.3986281454563141, "learning_rate": 0.00038142857142857146, "loss": 0.3806, "step": 22196 }, { "epoch": 12.400558659217877, "grad_norm": 0.6119687557220459, "learning_rate": 0.00038140056022408967, "loss": 0.6667, "step": 22197 }, { "epoch": 12.401117318435753, "grad_norm": 0.397361695766449, "learning_rate": 0.0003813725490196078, "loss": 0.4009, "step": 22198 }, { "epoch": 12.401675977653632, "grad_norm": 0.5475913286209106, "learning_rate": 0.0003813445378151261, "loss": 0.4018, "step": 22199 }, { "epoch": 12.402234636871508, "grad_norm": 2.4565999507904053, "learning_rate": 0.00038131652661064423, "loss": 0.5485, "step": 22200 }, { "epoch": 12.402793296089385, "grad_norm": 0.49206361174583435, "learning_rate": 0.0003812885154061625, "loss": 0.485, "step": 22201 }, { "epoch": 12.403351955307263, "grad_norm": 0.689730703830719, "learning_rate": 0.0003812605042016807, "loss": 0.5028, "step": 22202 }, { "epoch": 12.40391061452514, "grad_norm": 0.4874515235424042, "learning_rate": 0.00038123249299719885, "loss": 0.5252, "step": 22203 }, { "epoch": 12.404469273743016, "grad_norm": 0.663447380065918, "learning_rate": 0.0003812044817927171, "loss": 0.5117, "step": 22204 }, { "epoch": 12.405027932960893, "grad_norm": 0.7527303099632263, "learning_rate": 0.0003811764705882353, "loss": 0.2986, "step": 22205 }, { "epoch": 12.405586592178771, "grad_norm": 0.650242805480957, "learning_rate": 0.0003811484593837535, "loss": 0.5176, "step": 22206 }, { "epoch": 12.406145251396648, "grad_norm": 0.8362577557563782, "learning_rate": 0.0003811204481792717, "loss": 0.366, "step": 22207 }, { "epoch": 12.406703910614524, "grad_norm": 0.5000497698783875, "learning_rate": 0.0003810924369747899, "loss": 0.4623, "step": 22208 }, { "epoch": 12.407262569832403, "grad_norm": 0.43475061655044556, "learning_rate": 0.00038106442577030814, "loss": 0.3866, "step": 22209 }, { "epoch": 12.40782122905028, "grad_norm": 0.4988260567188263, "learning_rate": 0.00038103641456582634, "loss": 0.3685, "step": 22210 }, { "epoch": 12.408379888268156, "grad_norm": 0.4560915231704712, "learning_rate": 0.00038100840336134455, "loss": 0.4678, "step": 22211 }, { "epoch": 12.408938547486034, "grad_norm": 0.929999589920044, "learning_rate": 0.00038098039215686276, "loss": 0.3801, "step": 22212 }, { "epoch": 12.40949720670391, "grad_norm": 0.3650863766670227, "learning_rate": 0.00038095238095238096, "loss": 0.4155, "step": 22213 }, { "epoch": 12.410055865921787, "grad_norm": 0.593860387802124, "learning_rate": 0.00038092436974789917, "loss": 0.3116, "step": 22214 }, { "epoch": 12.410614525139664, "grad_norm": 0.3912438154220581, "learning_rate": 0.0003808963585434174, "loss": 0.4783, "step": 22215 }, { "epoch": 12.411173184357542, "grad_norm": 0.5202620625495911, "learning_rate": 0.0003808683473389356, "loss": 0.4655, "step": 22216 }, { "epoch": 12.411731843575419, "grad_norm": 0.445864200592041, "learning_rate": 0.0003808403361344538, "loss": 0.5187, "step": 22217 }, { "epoch": 12.412290502793295, "grad_norm": 0.4446728229522705, "learning_rate": 0.000380812324929972, "loss": 0.3209, "step": 22218 }, { "epoch": 12.412849162011174, "grad_norm": 3.3452131748199463, "learning_rate": 0.0003807843137254902, "loss": 0.6367, "step": 22219 }, { "epoch": 12.41340782122905, "grad_norm": 0.5621975064277649, "learning_rate": 0.0003807563025210084, "loss": 0.4413, "step": 22220 }, { "epoch": 12.413966480446927, "grad_norm": 0.41152825951576233, "learning_rate": 0.00038072829131652667, "loss": 0.4247, "step": 22221 }, { "epoch": 12.414525139664805, "grad_norm": 0.4380573034286499, "learning_rate": 0.0003807002801120448, "loss": 0.4063, "step": 22222 }, { "epoch": 12.415083798882682, "grad_norm": 0.4403732419013977, "learning_rate": 0.000380672268907563, "loss": 0.3676, "step": 22223 }, { "epoch": 12.415642458100558, "grad_norm": 0.4695834219455719, "learning_rate": 0.00038064425770308123, "loss": 0.4188, "step": 22224 }, { "epoch": 12.416201117318435, "grad_norm": 4.389756202697754, "learning_rate": 0.00038061624649859943, "loss": 0.3637, "step": 22225 }, { "epoch": 12.416759776536313, "grad_norm": 0.4033837914466858, "learning_rate": 0.0003805882352941177, "loss": 0.3565, "step": 22226 }, { "epoch": 12.41731843575419, "grad_norm": 0.4082547724246979, "learning_rate": 0.00038056022408963585, "loss": 0.4156, "step": 22227 }, { "epoch": 12.417877094972066, "grad_norm": 0.4367124140262604, "learning_rate": 0.00038053221288515405, "loss": 0.3405, "step": 22228 }, { "epoch": 12.418435754189945, "grad_norm": 0.7928216457366943, "learning_rate": 0.0003805042016806723, "loss": 0.5301, "step": 22229 }, { "epoch": 12.418994413407821, "grad_norm": 0.5452948212623596, "learning_rate": 0.00038047619047619046, "loss": 0.4489, "step": 22230 }, { "epoch": 12.419553072625698, "grad_norm": 0.44494733214378357, "learning_rate": 0.0003804481792717087, "loss": 0.3705, "step": 22231 }, { "epoch": 12.420111731843576, "grad_norm": 0.5175156593322754, "learning_rate": 0.0003804201680672269, "loss": 0.397, "step": 22232 }, { "epoch": 12.420670391061453, "grad_norm": 0.6962254643440247, "learning_rate": 0.0003803921568627451, "loss": 0.3377, "step": 22233 }, { "epoch": 12.42122905027933, "grad_norm": 2.20906662940979, "learning_rate": 0.00038036414565826334, "loss": 0.4602, "step": 22234 }, { "epoch": 12.421787709497206, "grad_norm": 0.5983732342720032, "learning_rate": 0.0003803361344537815, "loss": 0.4225, "step": 22235 }, { "epoch": 12.422346368715084, "grad_norm": 0.6306063532829285, "learning_rate": 0.00038030812324929975, "loss": 0.486, "step": 22236 }, { "epoch": 12.422905027932961, "grad_norm": 0.6764522194862366, "learning_rate": 0.00038028011204481796, "loss": 0.4634, "step": 22237 }, { "epoch": 12.423463687150837, "grad_norm": 0.40836602449417114, "learning_rate": 0.0003802521008403361, "loss": 0.432, "step": 22238 }, { "epoch": 12.424022346368716, "grad_norm": 0.32523348927497864, "learning_rate": 0.0003802240896358544, "loss": 0.3161, "step": 22239 }, { "epoch": 12.424581005586592, "grad_norm": 0.603829562664032, "learning_rate": 0.0003801960784313725, "loss": 0.4671, "step": 22240 }, { "epoch": 12.425139664804469, "grad_norm": 1.2986538410186768, "learning_rate": 0.0003801680672268908, "loss": 0.5123, "step": 22241 }, { "epoch": 12.425698324022346, "grad_norm": 0.513359010219574, "learning_rate": 0.000380140056022409, "loss": 0.3993, "step": 22242 }, { "epoch": 12.426256983240224, "grad_norm": 0.4499400556087494, "learning_rate": 0.00038011204481792714, "loss": 0.3895, "step": 22243 }, { "epoch": 12.4268156424581, "grad_norm": 0.8446559906005859, "learning_rate": 0.0003800840336134454, "loss": 0.434, "step": 22244 }, { "epoch": 12.427374301675977, "grad_norm": 1.5230028629302979, "learning_rate": 0.0003800560224089636, "loss": 0.4332, "step": 22245 }, { "epoch": 12.427932960893855, "grad_norm": 5.320984363555908, "learning_rate": 0.0003800280112044818, "loss": 0.3902, "step": 22246 }, { "epoch": 12.428491620111732, "grad_norm": 0.39092665910720825, "learning_rate": 0.00038, "loss": 0.4122, "step": 22247 }, { "epoch": 12.429050279329608, "grad_norm": 2.8248467445373535, "learning_rate": 0.00037997198879551817, "loss": 0.4669, "step": 22248 }, { "epoch": 12.429608938547487, "grad_norm": 0.5952622890472412, "learning_rate": 0.00037994397759103643, "loss": 0.4862, "step": 22249 }, { "epoch": 12.430167597765363, "grad_norm": 0.46401551365852356, "learning_rate": 0.00037991596638655464, "loss": 0.3724, "step": 22250 }, { "epoch": 12.43072625698324, "grad_norm": 0.49525415897369385, "learning_rate": 0.00037988795518207284, "loss": 0.3572, "step": 22251 }, { "epoch": 12.431284916201117, "grad_norm": 0.678400993347168, "learning_rate": 0.00037985994397759105, "loss": 0.4558, "step": 22252 }, { "epoch": 12.431843575418995, "grad_norm": 5.064430236816406, "learning_rate": 0.00037983193277310926, "loss": 0.31, "step": 22253 }, { "epoch": 12.432402234636871, "grad_norm": 0.5133535861968994, "learning_rate": 0.00037980392156862746, "loss": 0.5087, "step": 22254 }, { "epoch": 12.432960893854748, "grad_norm": 0.4685398042201996, "learning_rate": 0.00037977591036414567, "loss": 0.4688, "step": 22255 }, { "epoch": 12.433519553072626, "grad_norm": 0.6730914115905762, "learning_rate": 0.0003797478991596639, "loss": 0.5012, "step": 22256 }, { "epoch": 12.434078212290503, "grad_norm": 0.5985603332519531, "learning_rate": 0.0003797198879551821, "loss": 0.4412, "step": 22257 }, { "epoch": 12.43463687150838, "grad_norm": 0.8360685110092163, "learning_rate": 0.0003796918767507003, "loss": 0.4078, "step": 22258 }, { "epoch": 12.435195530726258, "grad_norm": 0.5260396003723145, "learning_rate": 0.0003796638655462185, "loss": 0.4636, "step": 22259 }, { "epoch": 12.435754189944134, "grad_norm": 1.8112053871154785, "learning_rate": 0.0003796358543417367, "loss": 0.4253, "step": 22260 }, { "epoch": 12.436312849162011, "grad_norm": 0.4029355049133301, "learning_rate": 0.0003796078431372549, "loss": 0.4499, "step": 22261 }, { "epoch": 12.436871508379888, "grad_norm": 0.3339584171772003, "learning_rate": 0.0003795798319327731, "loss": 0.2828, "step": 22262 }, { "epoch": 12.437430167597766, "grad_norm": 1.3417764902114868, "learning_rate": 0.0003795518207282913, "loss": 0.4251, "step": 22263 }, { "epoch": 12.437988826815642, "grad_norm": 0.4353804290294647, "learning_rate": 0.0003795238095238095, "loss": 0.4752, "step": 22264 }, { "epoch": 12.438547486033519, "grad_norm": 0.6685983538627625, "learning_rate": 0.00037949579831932773, "loss": 0.3468, "step": 22265 }, { "epoch": 12.439106145251397, "grad_norm": 0.45141395926475525, "learning_rate": 0.00037946778711484593, "loss": 0.4109, "step": 22266 }, { "epoch": 12.439664804469274, "grad_norm": 0.41326484084129333, "learning_rate": 0.00037943977591036414, "loss": 0.4098, "step": 22267 }, { "epoch": 12.44022346368715, "grad_norm": 0.5875102281570435, "learning_rate": 0.00037941176470588235, "loss": 0.4737, "step": 22268 }, { "epoch": 12.440782122905027, "grad_norm": 0.4500752389431, "learning_rate": 0.0003793837535014006, "loss": 0.4482, "step": 22269 }, { "epoch": 12.441340782122905, "grad_norm": 3.4752230644226074, "learning_rate": 0.00037935574229691876, "loss": 0.4877, "step": 22270 }, { "epoch": 12.441899441340782, "grad_norm": 1.4074817895889282, "learning_rate": 0.00037932773109243696, "loss": 0.544, "step": 22271 }, { "epoch": 12.442458100558659, "grad_norm": 1.156740427017212, "learning_rate": 0.00037929971988795517, "loss": 0.3871, "step": 22272 }, { "epoch": 12.443016759776537, "grad_norm": 0.43119776248931885, "learning_rate": 0.0003792717086834734, "loss": 0.3439, "step": 22273 }, { "epoch": 12.443575418994413, "grad_norm": 0.5617696046829224, "learning_rate": 0.00037924369747899164, "loss": 0.4565, "step": 22274 }, { "epoch": 12.44413407821229, "grad_norm": 0.5131112337112427, "learning_rate": 0.0003792156862745098, "loss": 0.4534, "step": 22275 }, { "epoch": 12.444692737430168, "grad_norm": 0.48486822843551636, "learning_rate": 0.000379187675070028, "loss": 0.4206, "step": 22276 }, { "epoch": 12.445251396648045, "grad_norm": 0.6418407559394836, "learning_rate": 0.00037915966386554625, "loss": 0.4712, "step": 22277 }, { "epoch": 12.445810055865921, "grad_norm": 0.7483553886413574, "learning_rate": 0.0003791316526610644, "loss": 0.5092, "step": 22278 }, { "epoch": 12.446368715083798, "grad_norm": 0.641573965549469, "learning_rate": 0.00037910364145658267, "loss": 0.4297, "step": 22279 }, { "epoch": 12.446927374301676, "grad_norm": 1.379223346710205, "learning_rate": 0.0003790756302521008, "loss": 0.3879, "step": 22280 }, { "epoch": 12.447486033519553, "grad_norm": 0.7533921003341675, "learning_rate": 0.000379047619047619, "loss": 0.5088, "step": 22281 }, { "epoch": 12.44804469273743, "grad_norm": 0.5609806180000305, "learning_rate": 0.0003790196078431373, "loss": 0.4506, "step": 22282 }, { "epoch": 12.448603351955308, "grad_norm": 0.42809367179870605, "learning_rate": 0.00037899159663865544, "loss": 0.4643, "step": 22283 }, { "epoch": 12.449162011173184, "grad_norm": 0.8922832608222961, "learning_rate": 0.0003789635854341737, "loss": 0.4519, "step": 22284 }, { "epoch": 12.449720670391061, "grad_norm": 0.6599184274673462, "learning_rate": 0.0003789355742296919, "loss": 0.3862, "step": 22285 }, { "epoch": 12.45027932960894, "grad_norm": 0.6763696670532227, "learning_rate": 0.00037890756302521005, "loss": 0.5211, "step": 22286 }, { "epoch": 12.450837988826816, "grad_norm": 0.4426666498184204, "learning_rate": 0.0003788795518207283, "loss": 0.3664, "step": 22287 }, { "epoch": 12.451396648044692, "grad_norm": 0.6292835474014282, "learning_rate": 0.00037885154061624647, "loss": 0.4443, "step": 22288 }, { "epoch": 12.451955307262569, "grad_norm": 0.6607323288917542, "learning_rate": 0.0003788235294117647, "loss": 0.3689, "step": 22289 }, { "epoch": 12.452513966480447, "grad_norm": 0.46668827533721924, "learning_rate": 0.00037879551820728293, "loss": 0.307, "step": 22290 }, { "epoch": 12.453072625698324, "grad_norm": 0.4630614221096039, "learning_rate": 0.0003787675070028011, "loss": 0.3133, "step": 22291 }, { "epoch": 12.4536312849162, "grad_norm": 0.4242039918899536, "learning_rate": 0.00037873949579831934, "loss": 0.3941, "step": 22292 }, { "epoch": 12.454189944134079, "grad_norm": 0.45228031277656555, "learning_rate": 0.00037871148459383755, "loss": 0.4242, "step": 22293 }, { "epoch": 12.454748603351955, "grad_norm": 0.6884719133377075, "learning_rate": 0.00037868347338935576, "loss": 0.383, "step": 22294 }, { "epoch": 12.455307262569832, "grad_norm": 0.3652999997138977, "learning_rate": 0.00037865546218487396, "loss": 0.3999, "step": 22295 }, { "epoch": 12.45586592178771, "grad_norm": 0.43142572045326233, "learning_rate": 0.0003786274509803921, "loss": 0.4736, "step": 22296 }, { "epoch": 12.456424581005587, "grad_norm": 0.5138300061225891, "learning_rate": 0.0003785994397759104, "loss": 0.4097, "step": 22297 }, { "epoch": 12.456983240223463, "grad_norm": 0.5320408344268799, "learning_rate": 0.0003785714285714286, "loss": 0.4147, "step": 22298 }, { "epoch": 12.45754189944134, "grad_norm": 1.596933364868164, "learning_rate": 0.0003785434173669468, "loss": 0.3769, "step": 22299 }, { "epoch": 12.458100558659218, "grad_norm": 1.4512317180633545, "learning_rate": 0.000378515406162465, "loss": 0.6775, "step": 22300 }, { "epoch": 12.458659217877095, "grad_norm": 4.665525436401367, "learning_rate": 0.0003784873949579832, "loss": 0.3617, "step": 22301 }, { "epoch": 12.459217877094972, "grad_norm": 0.6398403644561768, "learning_rate": 0.0003784593837535014, "loss": 0.4885, "step": 22302 }, { "epoch": 12.45977653631285, "grad_norm": 0.6020791530609131, "learning_rate": 0.0003784313725490196, "loss": 0.4942, "step": 22303 }, { "epoch": 12.460335195530726, "grad_norm": 0.5313255786895752, "learning_rate": 0.00037840336134453787, "loss": 0.3656, "step": 22304 }, { "epoch": 12.460893854748603, "grad_norm": 0.37811189889907837, "learning_rate": 0.000378375350140056, "loss": 0.4451, "step": 22305 }, { "epoch": 12.461452513966481, "grad_norm": 0.5424933433532715, "learning_rate": 0.00037834733893557423, "loss": 0.4903, "step": 22306 }, { "epoch": 12.462011173184358, "grad_norm": 0.631622314453125, "learning_rate": 0.00037831932773109243, "loss": 0.3897, "step": 22307 }, { "epoch": 12.462569832402234, "grad_norm": 0.46100568771362305, "learning_rate": 0.00037829131652661064, "loss": 0.4792, "step": 22308 }, { "epoch": 12.463128491620111, "grad_norm": 0.6238300800323486, "learning_rate": 0.0003782633053221289, "loss": 0.3555, "step": 22309 }, { "epoch": 12.46368715083799, "grad_norm": 2.455458164215088, "learning_rate": 0.00037823529411764705, "loss": 0.5361, "step": 22310 }, { "epoch": 12.464245810055866, "grad_norm": 0.5340656638145447, "learning_rate": 0.00037820728291316526, "loss": 0.405, "step": 22311 }, { "epoch": 12.464804469273743, "grad_norm": 0.4993051588535309, "learning_rate": 0.0003781792717086835, "loss": 0.3895, "step": 22312 }, { "epoch": 12.46536312849162, "grad_norm": 0.4735506474971771, "learning_rate": 0.00037815126050420167, "loss": 0.429, "step": 22313 }, { "epoch": 12.465921787709497, "grad_norm": 0.43139177560806274, "learning_rate": 0.00037812324929971993, "loss": 0.4543, "step": 22314 }, { "epoch": 12.466480446927374, "grad_norm": 0.7186444401741028, "learning_rate": 0.0003780952380952381, "loss": 0.3158, "step": 22315 }, { "epoch": 12.46703910614525, "grad_norm": 0.37876617908477783, "learning_rate": 0.0003780672268907563, "loss": 0.3068, "step": 22316 }, { "epoch": 12.467597765363129, "grad_norm": 0.6739128232002258, "learning_rate": 0.00037803921568627455, "loss": 0.395, "step": 22317 }, { "epoch": 12.468156424581005, "grad_norm": 0.4372479021549225, "learning_rate": 0.0003780112044817927, "loss": 0.4616, "step": 22318 }, { "epoch": 12.468715083798882, "grad_norm": 0.476572722196579, "learning_rate": 0.00037798319327731096, "loss": 0.3813, "step": 22319 }, { "epoch": 12.46927374301676, "grad_norm": 0.313006192445755, "learning_rate": 0.00037795518207282917, "loss": 0.3633, "step": 22320 }, { "epoch": 12.469832402234637, "grad_norm": 0.42260926961898804, "learning_rate": 0.0003779271708683473, "loss": 0.379, "step": 22321 }, { "epoch": 12.470391061452514, "grad_norm": 0.547232449054718, "learning_rate": 0.0003778991596638656, "loss": 0.5649, "step": 22322 }, { "epoch": 12.470949720670392, "grad_norm": 0.5282040238380432, "learning_rate": 0.00037787114845938373, "loss": 0.6735, "step": 22323 }, { "epoch": 12.471508379888268, "grad_norm": 0.9079980254173279, "learning_rate": 0.000377843137254902, "loss": 0.4699, "step": 22324 }, { "epoch": 12.472067039106145, "grad_norm": 0.5286577939987183, "learning_rate": 0.0003778151260504202, "loss": 0.5274, "step": 22325 }, { "epoch": 12.472625698324022, "grad_norm": 0.5055443048477173, "learning_rate": 0.00037778711484593835, "loss": 0.427, "step": 22326 }, { "epoch": 12.4731843575419, "grad_norm": 0.5971497893333435, "learning_rate": 0.0003777591036414566, "loss": 0.3526, "step": 22327 }, { "epoch": 12.473743016759776, "grad_norm": 0.8135359287261963, "learning_rate": 0.0003777310924369748, "loss": 0.5703, "step": 22328 }, { "epoch": 12.474301675977653, "grad_norm": 0.4614641070365906, "learning_rate": 0.000377703081232493, "loss": 0.5145, "step": 22329 }, { "epoch": 12.474860335195531, "grad_norm": 0.5052471160888672, "learning_rate": 0.0003776750700280112, "loss": 0.4039, "step": 22330 }, { "epoch": 12.475418994413408, "grad_norm": 0.4049806296825409, "learning_rate": 0.0003776470588235294, "loss": 0.3715, "step": 22331 }, { "epoch": 12.475977653631285, "grad_norm": 0.5324528217315674, "learning_rate": 0.00037761904761904764, "loss": 0.3494, "step": 22332 }, { "epoch": 12.476536312849163, "grad_norm": 0.5156274437904358, "learning_rate": 0.00037759103641456584, "loss": 0.3729, "step": 22333 }, { "epoch": 12.47709497206704, "grad_norm": 0.34459835290908813, "learning_rate": 0.00037756302521008405, "loss": 0.3578, "step": 22334 }, { "epoch": 12.477653631284916, "grad_norm": 0.46284016966819763, "learning_rate": 0.00037753501400560226, "loss": 0.4223, "step": 22335 }, { "epoch": 12.478212290502793, "grad_norm": 0.6220923066139221, "learning_rate": 0.00037750700280112046, "loss": 0.4281, "step": 22336 }, { "epoch": 12.478770949720671, "grad_norm": 0.6622411012649536, "learning_rate": 0.00037747899159663867, "loss": 0.4348, "step": 22337 }, { "epoch": 12.479329608938547, "grad_norm": 0.43630436062812805, "learning_rate": 0.0003774509803921569, "loss": 0.4421, "step": 22338 }, { "epoch": 12.479888268156424, "grad_norm": 0.4875757396221161, "learning_rate": 0.0003774229691876751, "loss": 0.4511, "step": 22339 }, { "epoch": 12.480446927374302, "grad_norm": 0.5452539324760437, "learning_rate": 0.0003773949579831933, "loss": 0.4978, "step": 22340 }, { "epoch": 12.481005586592179, "grad_norm": 0.7059219479560852, "learning_rate": 0.0003773669467787115, "loss": 0.6353, "step": 22341 }, { "epoch": 12.481564245810056, "grad_norm": 0.3773597478866577, "learning_rate": 0.0003773389355742297, "loss": 0.3723, "step": 22342 }, { "epoch": 12.482122905027932, "grad_norm": 0.4117157757282257, "learning_rate": 0.0003773109243697479, "loss": 0.4021, "step": 22343 }, { "epoch": 12.48268156424581, "grad_norm": 0.7017507553100586, "learning_rate": 0.00037728291316526617, "loss": 0.4756, "step": 22344 }, { "epoch": 12.483240223463687, "grad_norm": 0.7124935984611511, "learning_rate": 0.0003772549019607843, "loss": 0.5007, "step": 22345 }, { "epoch": 12.483798882681564, "grad_norm": 0.5178902745246887, "learning_rate": 0.0003772268907563025, "loss": 0.3957, "step": 22346 }, { "epoch": 12.484357541899442, "grad_norm": 0.8460575938224792, "learning_rate": 0.00037719887955182073, "loss": 0.4692, "step": 22347 }, { "epoch": 12.484916201117318, "grad_norm": 0.48622167110443115, "learning_rate": 0.00037717086834733893, "loss": 0.4074, "step": 22348 }, { "epoch": 12.485474860335195, "grad_norm": 0.5220047235488892, "learning_rate": 0.0003771428571428572, "loss": 0.3927, "step": 22349 }, { "epoch": 12.486033519553073, "grad_norm": 0.6508392095565796, "learning_rate": 0.00037711484593837535, "loss": 0.381, "step": 22350 }, { "epoch": 12.48659217877095, "grad_norm": 1.6685177087783813, "learning_rate": 0.00037708683473389355, "loss": 0.4279, "step": 22351 }, { "epoch": 12.487150837988827, "grad_norm": 0.5280059576034546, "learning_rate": 0.0003770588235294118, "loss": 0.3787, "step": 22352 }, { "epoch": 12.487709497206703, "grad_norm": 1.572139024734497, "learning_rate": 0.00037703081232492996, "loss": 0.4289, "step": 22353 }, { "epoch": 12.488268156424581, "grad_norm": 0.7184972167015076, "learning_rate": 0.0003770028011204482, "loss": 0.3445, "step": 22354 }, { "epoch": 12.488826815642458, "grad_norm": 0.5787521004676819, "learning_rate": 0.0003769747899159664, "loss": 0.4458, "step": 22355 }, { "epoch": 12.489385474860335, "grad_norm": 1.8154767751693726, "learning_rate": 0.0003769467787114846, "loss": 0.3592, "step": 22356 }, { "epoch": 12.489944134078213, "grad_norm": 0.5421994924545288, "learning_rate": 0.00037691876750700284, "loss": 0.5095, "step": 22357 }, { "epoch": 12.49050279329609, "grad_norm": 0.4253360629081726, "learning_rate": 0.000376890756302521, "loss": 0.4143, "step": 22358 }, { "epoch": 12.491061452513966, "grad_norm": 0.4831967055797577, "learning_rate": 0.00037686274509803925, "loss": 0.3922, "step": 22359 }, { "epoch": 12.491620111731844, "grad_norm": 0.5803911685943604, "learning_rate": 0.00037683473389355746, "loss": 0.3825, "step": 22360 }, { "epoch": 12.492178770949721, "grad_norm": 0.5731937289237976, "learning_rate": 0.0003768067226890756, "loss": 0.3661, "step": 22361 }, { "epoch": 12.492737430167598, "grad_norm": 4.877864837646484, "learning_rate": 0.0003767787114845939, "loss": 0.4392, "step": 22362 }, { "epoch": 12.493296089385474, "grad_norm": 0.8301437497138977, "learning_rate": 0.000376750700280112, "loss": 0.5388, "step": 22363 }, { "epoch": 12.493854748603352, "grad_norm": 0.5814093947410583, "learning_rate": 0.0003767226890756303, "loss": 0.4255, "step": 22364 }, { "epoch": 12.494413407821229, "grad_norm": 0.8469949960708618, "learning_rate": 0.0003766946778711485, "loss": 0.5702, "step": 22365 }, { "epoch": 12.494972067039106, "grad_norm": 0.40682846307754517, "learning_rate": 0.00037666666666666664, "loss": 0.3448, "step": 22366 }, { "epoch": 12.495530726256984, "grad_norm": 0.5928486585617065, "learning_rate": 0.0003766386554621849, "loss": 0.4481, "step": 22367 }, { "epoch": 12.49608938547486, "grad_norm": 0.5610290765762329, "learning_rate": 0.0003766106442577031, "loss": 0.4467, "step": 22368 }, { "epoch": 12.496648044692737, "grad_norm": 0.6019773483276367, "learning_rate": 0.0003765826330532213, "loss": 0.3853, "step": 22369 }, { "epoch": 12.497206703910614, "grad_norm": 0.8053200244903564, "learning_rate": 0.0003765546218487395, "loss": 0.4132, "step": 22370 }, { "epoch": 12.497765363128492, "grad_norm": 0.7026535868644714, "learning_rate": 0.00037652661064425767, "loss": 0.3959, "step": 22371 }, { "epoch": 12.498324022346369, "grad_norm": 0.3878311812877655, "learning_rate": 0.00037649859943977593, "loss": 0.312, "step": 22372 }, { "epoch": 12.498882681564245, "grad_norm": 1.6654926538467407, "learning_rate": 0.00037647058823529414, "loss": 0.4812, "step": 22373 }, { "epoch": 12.499441340782123, "grad_norm": 0.6538389325141907, "learning_rate": 0.0003764425770308123, "loss": 0.4063, "step": 22374 }, { "epoch": 12.5, "grad_norm": 0.7435426712036133, "learning_rate": 0.00037641456582633055, "loss": 0.3508, "step": 22375 }, { "epoch": 12.500558659217877, "grad_norm": 6.916824817657471, "learning_rate": 0.00037638655462184876, "loss": 0.4492, "step": 22376 }, { "epoch": 12.501117318435755, "grad_norm": 0.5683846473693848, "learning_rate": 0.00037635854341736696, "loss": 0.4936, "step": 22377 }, { "epoch": 12.501675977653631, "grad_norm": 0.5606112480163574, "learning_rate": 0.00037633053221288517, "loss": 0.415, "step": 22378 }, { "epoch": 12.502234636871508, "grad_norm": 0.6635780334472656, "learning_rate": 0.0003763025210084033, "loss": 0.3643, "step": 22379 }, { "epoch": 12.502793296089386, "grad_norm": 2.184018611907959, "learning_rate": 0.0003762745098039216, "loss": 0.4611, "step": 22380 }, { "epoch": 12.503351955307263, "grad_norm": 0.44623345136642456, "learning_rate": 0.0003762464985994398, "loss": 0.4458, "step": 22381 }, { "epoch": 12.50391061452514, "grad_norm": 0.3886384665966034, "learning_rate": 0.000376218487394958, "loss": 0.3932, "step": 22382 }, { "epoch": 12.504469273743016, "grad_norm": 0.5578534603118896, "learning_rate": 0.0003761904761904762, "loss": 0.4275, "step": 22383 }, { "epoch": 12.505027932960894, "grad_norm": 0.6590132713317871, "learning_rate": 0.0003761624649859944, "loss": 0.4364, "step": 22384 }, { "epoch": 12.505586592178771, "grad_norm": 0.48075932264328003, "learning_rate": 0.0003761344537815126, "loss": 0.3414, "step": 22385 }, { "epoch": 12.506145251396648, "grad_norm": 0.38701489567756653, "learning_rate": 0.0003761064425770308, "loss": 0.439, "step": 22386 }, { "epoch": 12.506703910614526, "grad_norm": 3.3040475845336914, "learning_rate": 0.000376078431372549, "loss": 0.5619, "step": 22387 }, { "epoch": 12.507262569832402, "grad_norm": 0.5540610551834106, "learning_rate": 0.00037605042016806723, "loss": 0.5202, "step": 22388 }, { "epoch": 12.507821229050279, "grad_norm": 0.45256611704826355, "learning_rate": 0.00037602240896358543, "loss": 0.4175, "step": 22389 }, { "epoch": 12.508379888268156, "grad_norm": 4.7016377449035645, "learning_rate": 0.00037599439775910364, "loss": 0.4584, "step": 22390 }, { "epoch": 12.508938547486034, "grad_norm": 0.37993180751800537, "learning_rate": 0.00037596638655462185, "loss": 0.3877, "step": 22391 }, { "epoch": 12.50949720670391, "grad_norm": 0.37144502997398376, "learning_rate": 0.0003759383753501401, "loss": 0.3715, "step": 22392 }, { "epoch": 12.510055865921787, "grad_norm": 0.3228665888309479, "learning_rate": 0.00037591036414565826, "loss": 0.2991, "step": 22393 }, { "epoch": 12.510614525139665, "grad_norm": 0.39813941717147827, "learning_rate": 0.00037588235294117646, "loss": 0.3389, "step": 22394 }, { "epoch": 12.511173184357542, "grad_norm": 0.7362692952156067, "learning_rate": 0.00037585434173669467, "loss": 0.441, "step": 22395 }, { "epoch": 12.511731843575419, "grad_norm": 0.6402891874313354, "learning_rate": 0.0003758263305322129, "loss": 0.471, "step": 22396 }, { "epoch": 12.512290502793297, "grad_norm": 0.9931805729866028, "learning_rate": 0.00037579831932773114, "loss": 0.4023, "step": 22397 }, { "epoch": 12.512849162011173, "grad_norm": 0.9622296094894409, "learning_rate": 0.0003757703081232493, "loss": 0.3927, "step": 22398 }, { "epoch": 12.51340782122905, "grad_norm": 0.521777331829071, "learning_rate": 0.0003757422969187675, "loss": 0.3422, "step": 22399 }, { "epoch": 12.513966480446927, "grad_norm": 0.49033617973327637, "learning_rate": 0.00037571428571428575, "loss": 0.3294, "step": 22400 }, { "epoch": 12.514525139664805, "grad_norm": 0.5281840562820435, "learning_rate": 0.0003756862745098039, "loss": 0.3958, "step": 22401 }, { "epoch": 12.515083798882682, "grad_norm": 0.4699585735797882, "learning_rate": 0.00037565826330532217, "loss": 0.4798, "step": 22402 }, { "epoch": 12.515642458100558, "grad_norm": 0.8030683994293213, "learning_rate": 0.0003756302521008403, "loss": 0.4972, "step": 22403 }, { "epoch": 12.516201117318436, "grad_norm": 1.1272097826004028, "learning_rate": 0.0003756022408963585, "loss": 0.3031, "step": 22404 }, { "epoch": 12.516759776536313, "grad_norm": 0.4536973237991333, "learning_rate": 0.0003755742296918768, "loss": 0.3132, "step": 22405 }, { "epoch": 12.51731843575419, "grad_norm": 0.7675778269767761, "learning_rate": 0.00037554621848739494, "loss": 0.4384, "step": 22406 }, { "epoch": 12.517877094972068, "grad_norm": 1.0920019149780273, "learning_rate": 0.0003755182072829132, "loss": 0.4312, "step": 22407 }, { "epoch": 12.518435754189944, "grad_norm": 1.5458168983459473, "learning_rate": 0.0003754901960784314, "loss": 0.4865, "step": 22408 }, { "epoch": 12.518994413407821, "grad_norm": 1.7475107908248901, "learning_rate": 0.00037546218487394955, "loss": 0.4396, "step": 22409 }, { "epoch": 12.519553072625698, "grad_norm": 0.39102673530578613, "learning_rate": 0.0003754341736694678, "loss": 0.2953, "step": 22410 }, { "epoch": 12.520111731843576, "grad_norm": 0.3776918649673462, "learning_rate": 0.00037540616246498597, "loss": 0.4415, "step": 22411 }, { "epoch": 12.520670391061453, "grad_norm": 0.48570331931114197, "learning_rate": 0.0003753781512605042, "loss": 0.511, "step": 22412 }, { "epoch": 12.521229050279329, "grad_norm": 1.24405038356781, "learning_rate": 0.00037535014005602243, "loss": 0.4123, "step": 22413 }, { "epoch": 12.521787709497207, "grad_norm": 0.7005947232246399, "learning_rate": 0.0003753221288515406, "loss": 0.4271, "step": 22414 }, { "epoch": 12.522346368715084, "grad_norm": 0.5677045583724976, "learning_rate": 0.00037529411764705884, "loss": 0.5057, "step": 22415 }, { "epoch": 12.52290502793296, "grad_norm": 0.41748666763305664, "learning_rate": 0.00037526610644257705, "loss": 0.4178, "step": 22416 }, { "epoch": 12.523463687150837, "grad_norm": 0.4275798201560974, "learning_rate": 0.00037523809523809526, "loss": 0.4096, "step": 22417 }, { "epoch": 12.524022346368715, "grad_norm": 0.4180930554866791, "learning_rate": 0.00037521008403361346, "loss": 0.4446, "step": 22418 }, { "epoch": 12.524581005586592, "grad_norm": 9.960973739624023, "learning_rate": 0.0003751820728291316, "loss": 0.5341, "step": 22419 }, { "epoch": 12.525139664804469, "grad_norm": 3.246262311935425, "learning_rate": 0.0003751540616246499, "loss": 0.4872, "step": 22420 }, { "epoch": 12.525698324022347, "grad_norm": 0.584867537021637, "learning_rate": 0.0003751260504201681, "loss": 0.5785, "step": 22421 }, { "epoch": 12.526256983240224, "grad_norm": 0.5116361379623413, "learning_rate": 0.0003750980392156863, "loss": 0.3505, "step": 22422 }, { "epoch": 12.5268156424581, "grad_norm": 0.3268989026546478, "learning_rate": 0.0003750700280112045, "loss": 0.3381, "step": 22423 }, { "epoch": 12.527374301675978, "grad_norm": 0.40259113907814026, "learning_rate": 0.0003750420168067227, "loss": 0.3822, "step": 22424 }, { "epoch": 12.527932960893855, "grad_norm": 0.48108726739883423, "learning_rate": 0.0003750140056022409, "loss": 0.4306, "step": 22425 }, { "epoch": 12.528491620111732, "grad_norm": 0.601531982421875, "learning_rate": 0.0003749859943977591, "loss": 0.5006, "step": 22426 }, { "epoch": 12.529050279329608, "grad_norm": 0.47578126192092896, "learning_rate": 0.0003749579831932773, "loss": 0.4961, "step": 22427 }, { "epoch": 12.529608938547486, "grad_norm": 0.5015585422515869, "learning_rate": 0.0003749299719887955, "loss": 0.4548, "step": 22428 }, { "epoch": 12.530167597765363, "grad_norm": 0.7206169962882996, "learning_rate": 0.00037490196078431373, "loss": 0.4622, "step": 22429 }, { "epoch": 12.53072625698324, "grad_norm": 0.4819530248641968, "learning_rate": 0.00037487394957983193, "loss": 0.4867, "step": 22430 }, { "epoch": 12.531284916201118, "grad_norm": 0.8435218930244446, "learning_rate": 0.00037484593837535014, "loss": 0.3721, "step": 22431 }, { "epoch": 12.531843575418995, "grad_norm": 0.6111214756965637, "learning_rate": 0.0003748179271708684, "loss": 0.457, "step": 22432 }, { "epoch": 12.532402234636871, "grad_norm": 0.44760215282440186, "learning_rate": 0.00037478991596638655, "loss": 0.3866, "step": 22433 }, { "epoch": 12.53296089385475, "grad_norm": 0.5960481762886047, "learning_rate": 0.00037476190476190476, "loss": 0.415, "step": 22434 }, { "epoch": 12.533519553072626, "grad_norm": 0.9160645604133606, "learning_rate": 0.00037473389355742296, "loss": 0.3975, "step": 22435 }, { "epoch": 12.534078212290503, "grad_norm": 1.0403597354888916, "learning_rate": 0.00037470588235294117, "loss": 0.3687, "step": 22436 }, { "epoch": 12.53463687150838, "grad_norm": 1.976467490196228, "learning_rate": 0.00037467787114845943, "loss": 0.4413, "step": 22437 }, { "epoch": 12.535195530726257, "grad_norm": 0.39749184250831604, "learning_rate": 0.0003746498599439776, "loss": 0.5165, "step": 22438 }, { "epoch": 12.535754189944134, "grad_norm": 0.4099052846431732, "learning_rate": 0.0003746218487394958, "loss": 0.3771, "step": 22439 }, { "epoch": 12.53631284916201, "grad_norm": 0.491862028837204, "learning_rate": 0.00037459383753501405, "loss": 0.3967, "step": 22440 }, { "epoch": 12.536871508379889, "grad_norm": 0.5626294612884521, "learning_rate": 0.0003745658263305322, "loss": 0.3193, "step": 22441 }, { "epoch": 12.537430167597766, "grad_norm": 0.3762354552745819, "learning_rate": 0.00037453781512605046, "loss": 0.4102, "step": 22442 }, { "epoch": 12.537988826815642, "grad_norm": 0.7769125699996948, "learning_rate": 0.0003745098039215686, "loss": 0.5664, "step": 22443 }, { "epoch": 12.538547486033519, "grad_norm": 0.40960627794265747, "learning_rate": 0.0003744817927170868, "loss": 0.4173, "step": 22444 }, { "epoch": 12.539106145251397, "grad_norm": 0.40965136885643005, "learning_rate": 0.0003744537815126051, "loss": 0.4408, "step": 22445 }, { "epoch": 12.539664804469274, "grad_norm": 0.7106426358222961, "learning_rate": 0.00037442577030812323, "loss": 0.4431, "step": 22446 }, { "epoch": 12.54022346368715, "grad_norm": 1.4433718919754028, "learning_rate": 0.0003743977591036415, "loss": 0.4828, "step": 22447 }, { "epoch": 12.540782122905028, "grad_norm": 0.38598471879959106, "learning_rate": 0.0003743697478991597, "loss": 0.3986, "step": 22448 }, { "epoch": 12.541340782122905, "grad_norm": 0.33879542350769043, "learning_rate": 0.00037434173669467785, "loss": 0.3909, "step": 22449 }, { "epoch": 12.541899441340782, "grad_norm": 0.4508078396320343, "learning_rate": 0.0003743137254901961, "loss": 0.4385, "step": 22450 }, { "epoch": 12.54245810055866, "grad_norm": 0.7343797087669373, "learning_rate": 0.00037428571428571426, "loss": 0.4046, "step": 22451 }, { "epoch": 12.543016759776537, "grad_norm": 3.512624740600586, "learning_rate": 0.0003742577030812325, "loss": 0.3836, "step": 22452 }, { "epoch": 12.543575418994413, "grad_norm": 0.4800110459327698, "learning_rate": 0.0003742296918767507, "loss": 0.3622, "step": 22453 }, { "epoch": 12.544134078212291, "grad_norm": 0.4378245770931244, "learning_rate": 0.0003742016806722689, "loss": 0.3643, "step": 22454 }, { "epoch": 12.544692737430168, "grad_norm": 0.5712656378746033, "learning_rate": 0.00037417366946778714, "loss": 0.4772, "step": 22455 }, { "epoch": 12.545251396648045, "grad_norm": 0.40475600957870483, "learning_rate": 0.00037414565826330534, "loss": 0.3983, "step": 22456 }, { "epoch": 12.545810055865921, "grad_norm": 0.3769834041595459, "learning_rate": 0.00037411764705882355, "loss": 0.4161, "step": 22457 }, { "epoch": 12.5463687150838, "grad_norm": 0.45288193225860596, "learning_rate": 0.00037408963585434176, "loss": 0.4712, "step": 22458 }, { "epoch": 12.546927374301676, "grad_norm": 0.6904294490814209, "learning_rate": 0.0003740616246498599, "loss": 0.467, "step": 22459 }, { "epoch": 12.547486033519553, "grad_norm": 0.667701780796051, "learning_rate": 0.00037403361344537817, "loss": 0.4746, "step": 22460 }, { "epoch": 12.548044692737431, "grad_norm": 0.4270884692668915, "learning_rate": 0.0003740056022408964, "loss": 0.4712, "step": 22461 }, { "epoch": 12.548603351955308, "grad_norm": 0.6284533143043518, "learning_rate": 0.0003739775910364146, "loss": 0.3833, "step": 22462 }, { "epoch": 12.549162011173184, "grad_norm": 1.2914358377456665, "learning_rate": 0.0003739495798319328, "loss": 0.3785, "step": 22463 }, { "epoch": 12.54972067039106, "grad_norm": 1.172956943511963, "learning_rate": 0.000373921568627451, "loss": 0.3974, "step": 22464 }, { "epoch": 12.550279329608939, "grad_norm": 0.7549270391464233, "learning_rate": 0.0003738935574229692, "loss": 0.372, "step": 22465 }, { "epoch": 12.550837988826816, "grad_norm": 0.4130135774612427, "learning_rate": 0.0003738655462184874, "loss": 0.3578, "step": 22466 }, { "epoch": 12.551396648044692, "grad_norm": 0.40527695417404175, "learning_rate": 0.0003738375350140056, "loss": 0.3401, "step": 22467 }, { "epoch": 12.55195530726257, "grad_norm": 0.6868615746498108, "learning_rate": 0.0003738095238095238, "loss": 0.3728, "step": 22468 }, { "epoch": 12.552513966480447, "grad_norm": 0.6799153089523315, "learning_rate": 0.000373781512605042, "loss": 0.3624, "step": 22469 }, { "epoch": 12.553072625698324, "grad_norm": 0.5361668467521667, "learning_rate": 0.00037375350140056023, "loss": 0.4261, "step": 22470 }, { "epoch": 12.553631284916202, "grad_norm": 0.5403088331222534, "learning_rate": 0.00037372549019607843, "loss": 0.4815, "step": 22471 }, { "epoch": 12.554189944134079, "grad_norm": 1.6084001064300537, "learning_rate": 0.0003736974789915967, "loss": 0.4416, "step": 22472 }, { "epoch": 12.554748603351955, "grad_norm": 0.41897672414779663, "learning_rate": 0.00037366946778711485, "loss": 0.3341, "step": 22473 }, { "epoch": 12.555307262569832, "grad_norm": 0.4434489607810974, "learning_rate": 0.00037364145658263305, "loss": 0.4018, "step": 22474 }, { "epoch": 12.55586592178771, "grad_norm": 0.3878321349620819, "learning_rate": 0.00037361344537815126, "loss": 0.3278, "step": 22475 }, { "epoch": 12.556424581005587, "grad_norm": 0.3136431872844696, "learning_rate": 0.00037358543417366946, "loss": 0.4089, "step": 22476 }, { "epoch": 12.556983240223463, "grad_norm": 0.42402777075767517, "learning_rate": 0.0003735574229691877, "loss": 0.3563, "step": 22477 }, { "epoch": 12.557541899441341, "grad_norm": 0.49588528275489807, "learning_rate": 0.0003735294117647059, "loss": 0.3614, "step": 22478 }, { "epoch": 12.558100558659218, "grad_norm": 0.48343682289123535, "learning_rate": 0.0003735014005602241, "loss": 0.4303, "step": 22479 }, { "epoch": 12.558659217877095, "grad_norm": 0.5517815351486206, "learning_rate": 0.00037347338935574234, "loss": 0.4309, "step": 22480 }, { "epoch": 12.559217877094973, "grad_norm": 0.588929295539856, "learning_rate": 0.0003734453781512605, "loss": 0.5073, "step": 22481 }, { "epoch": 12.55977653631285, "grad_norm": 0.5685851573944092, "learning_rate": 0.0003734173669467787, "loss": 0.36, "step": 22482 }, { "epoch": 12.560335195530726, "grad_norm": 1.3220291137695312, "learning_rate": 0.0003733893557422969, "loss": 0.4066, "step": 22483 }, { "epoch": 12.560893854748603, "grad_norm": 0.5837740302085876, "learning_rate": 0.0003733613445378151, "loss": 0.5779, "step": 22484 }, { "epoch": 12.561452513966481, "grad_norm": 0.596911609172821, "learning_rate": 0.0003733333333333334, "loss": 0.3997, "step": 22485 }, { "epoch": 12.562011173184358, "grad_norm": 1.013767957687378, "learning_rate": 0.0003733053221288515, "loss": 0.5074, "step": 22486 }, { "epoch": 12.562569832402234, "grad_norm": 0.6253725290298462, "learning_rate": 0.00037327731092436973, "loss": 0.3561, "step": 22487 }, { "epoch": 12.563128491620112, "grad_norm": 0.47042229771614075, "learning_rate": 0.000373249299719888, "loss": 0.3947, "step": 22488 }, { "epoch": 12.563687150837989, "grad_norm": 0.41710326075553894, "learning_rate": 0.00037322128851540614, "loss": 0.3406, "step": 22489 }, { "epoch": 12.564245810055866, "grad_norm": 0.46124324202537537, "learning_rate": 0.0003731932773109244, "loss": 0.313, "step": 22490 }, { "epoch": 12.564804469273742, "grad_norm": 0.8389171361923218, "learning_rate": 0.00037316526610644255, "loss": 0.4081, "step": 22491 }, { "epoch": 12.56536312849162, "grad_norm": 0.8235301971435547, "learning_rate": 0.00037313725490196076, "loss": 0.4478, "step": 22492 }, { "epoch": 12.565921787709497, "grad_norm": 0.7095931172370911, "learning_rate": 0.000373109243697479, "loss": 0.5667, "step": 22493 }, { "epoch": 12.566480446927374, "grad_norm": 0.5218303799629211, "learning_rate": 0.00037308123249299717, "loss": 0.3155, "step": 22494 }, { "epoch": 12.567039106145252, "grad_norm": 0.41921132802963257, "learning_rate": 0.00037305322128851543, "loss": 0.373, "step": 22495 }, { "epoch": 12.567597765363129, "grad_norm": 0.46741050481796265, "learning_rate": 0.00037302521008403364, "loss": 0.4563, "step": 22496 }, { "epoch": 12.568156424581005, "grad_norm": 0.6967788338661194, "learning_rate": 0.0003729971988795518, "loss": 0.522, "step": 22497 }, { "epoch": 12.568715083798883, "grad_norm": 0.8631910085678101, "learning_rate": 0.00037296918767507005, "loss": 0.4022, "step": 22498 }, { "epoch": 12.56927374301676, "grad_norm": 0.4822239279747009, "learning_rate": 0.0003729411764705882, "loss": 0.4814, "step": 22499 }, { "epoch": 12.569832402234637, "grad_norm": 1.4286555051803589, "learning_rate": 0.00037291316526610646, "loss": 0.3527, "step": 22500 }, { "epoch": 12.569832402234637, "eval_cer": 0.0869911514080281, "eval_loss": 0.33226311206817627, "eval_runtime": 55.5702, "eval_samples_per_second": 81.662, "eval_steps_per_second": 5.111, "eval_wer": 0.3423368062344627, "step": 22500 }, { "epoch": 12.570391061452513, "grad_norm": 0.33531805872917175, "learning_rate": 0.00037288515406162467, "loss": 0.4203, "step": 22501 }, { "epoch": 12.570949720670392, "grad_norm": 0.40229159593582153, "learning_rate": 0.0003728571428571428, "loss": 0.3863, "step": 22502 }, { "epoch": 12.571508379888268, "grad_norm": 1.1792356967926025, "learning_rate": 0.0003728291316526611, "loss": 0.4736, "step": 22503 }, { "epoch": 12.572067039106145, "grad_norm": 0.5437342524528503, "learning_rate": 0.0003728011204481793, "loss": 0.3849, "step": 22504 }, { "epoch": 12.572625698324023, "grad_norm": 0.7495285272598267, "learning_rate": 0.0003727731092436975, "loss": 0.4733, "step": 22505 }, { "epoch": 12.5731843575419, "grad_norm": 0.6106339693069458, "learning_rate": 0.0003727450980392157, "loss": 0.3952, "step": 22506 }, { "epoch": 12.573743016759776, "grad_norm": 0.6154392957687378, "learning_rate": 0.00037271708683473385, "loss": 0.3716, "step": 22507 }, { "epoch": 12.574301675977654, "grad_norm": 0.5503832697868347, "learning_rate": 0.0003726890756302521, "loss": 0.4636, "step": 22508 }, { "epoch": 12.574860335195531, "grad_norm": 0.4107510447502136, "learning_rate": 0.0003726610644257703, "loss": 0.364, "step": 22509 }, { "epoch": 12.575418994413408, "grad_norm": 0.470242440700531, "learning_rate": 0.0003726330532212885, "loss": 0.3888, "step": 22510 }, { "epoch": 12.575977653631284, "grad_norm": 0.6612895131111145, "learning_rate": 0.00037260504201680673, "loss": 0.7543, "step": 22511 }, { "epoch": 12.576536312849163, "grad_norm": 0.5041276216506958, "learning_rate": 0.00037257703081232493, "loss": 0.3962, "step": 22512 }, { "epoch": 12.577094972067039, "grad_norm": 0.7390826940536499, "learning_rate": 0.00037254901960784314, "loss": 0.4902, "step": 22513 }, { "epoch": 12.577653631284916, "grad_norm": 0.9431338906288147, "learning_rate": 0.00037252100840336135, "loss": 0.4666, "step": 22514 }, { "epoch": 12.578212290502794, "grad_norm": 0.5420820713043213, "learning_rate": 0.00037249299719887955, "loss": 0.3939, "step": 22515 }, { "epoch": 12.57877094972067, "grad_norm": 0.48682859539985657, "learning_rate": 0.00037246498599439776, "loss": 0.3618, "step": 22516 }, { "epoch": 12.579329608938547, "grad_norm": 0.39323559403419495, "learning_rate": 0.00037243697478991596, "loss": 0.4543, "step": 22517 }, { "epoch": 12.579888268156424, "grad_norm": 0.5573785305023193, "learning_rate": 0.00037240896358543417, "loss": 0.4314, "step": 22518 }, { "epoch": 12.580446927374302, "grad_norm": 1.7468023300170898, "learning_rate": 0.0003723809523809524, "loss": 0.451, "step": 22519 }, { "epoch": 12.581005586592179, "grad_norm": 0.8025546073913574, "learning_rate": 0.00037235294117647064, "loss": 0.5438, "step": 22520 }, { "epoch": 12.581564245810055, "grad_norm": 0.4134397506713867, "learning_rate": 0.0003723249299719888, "loss": 0.3929, "step": 22521 }, { "epoch": 12.582122905027934, "grad_norm": 0.47350767254829407, "learning_rate": 0.000372296918767507, "loss": 0.4415, "step": 22522 }, { "epoch": 12.58268156424581, "grad_norm": 0.48985230922698975, "learning_rate": 0.0003722689075630252, "loss": 0.4311, "step": 22523 }, { "epoch": 12.583240223463687, "grad_norm": 0.3546047806739807, "learning_rate": 0.0003722408963585434, "loss": 0.4008, "step": 22524 }, { "epoch": 12.583798882681565, "grad_norm": 0.8964436650276184, "learning_rate": 0.00037221288515406167, "loss": 0.3502, "step": 22525 }, { "epoch": 12.584357541899442, "grad_norm": 0.3794797956943512, "learning_rate": 0.0003721848739495798, "loss": 0.3348, "step": 22526 }, { "epoch": 12.584916201117318, "grad_norm": 0.6527863144874573, "learning_rate": 0.000372156862745098, "loss": 0.5589, "step": 22527 }, { "epoch": 12.585474860335196, "grad_norm": 0.3487827181816101, "learning_rate": 0.0003721288515406163, "loss": 0.3433, "step": 22528 }, { "epoch": 12.586033519553073, "grad_norm": 0.43349307775497437, "learning_rate": 0.00037210084033613444, "loss": 0.4171, "step": 22529 }, { "epoch": 12.58659217877095, "grad_norm": 0.39403101801872253, "learning_rate": 0.0003720728291316527, "loss": 0.3671, "step": 22530 }, { "epoch": 12.587150837988826, "grad_norm": 0.6479273438453674, "learning_rate": 0.00037204481792717085, "loss": 0.3234, "step": 22531 }, { "epoch": 12.587709497206705, "grad_norm": 0.379276305437088, "learning_rate": 0.00037201680672268905, "loss": 0.3705, "step": 22532 }, { "epoch": 12.588268156424581, "grad_norm": 0.608769416809082, "learning_rate": 0.0003719887955182073, "loss": 0.5333, "step": 22533 }, { "epoch": 12.588826815642458, "grad_norm": 4.696609973907471, "learning_rate": 0.00037196078431372547, "loss": 0.3334, "step": 22534 }, { "epoch": 12.589385474860336, "grad_norm": 2.129253625869751, "learning_rate": 0.0003719327731092437, "loss": 0.4207, "step": 22535 }, { "epoch": 12.589944134078213, "grad_norm": 0.5442612767219543, "learning_rate": 0.00037190476190476193, "loss": 0.4343, "step": 22536 }, { "epoch": 12.59050279329609, "grad_norm": 0.8003788590431213, "learning_rate": 0.0003718767507002801, "loss": 0.4659, "step": 22537 }, { "epoch": 12.591061452513966, "grad_norm": 0.6233965158462524, "learning_rate": 0.00037184873949579834, "loss": 0.4769, "step": 22538 }, { "epoch": 12.591620111731844, "grad_norm": 0.30588507652282715, "learning_rate": 0.0003718207282913165, "loss": 0.3835, "step": 22539 }, { "epoch": 12.59217877094972, "grad_norm": 0.6887941360473633, "learning_rate": 0.00037179271708683476, "loss": 0.4423, "step": 22540 }, { "epoch": 12.592737430167597, "grad_norm": 0.6773592233657837, "learning_rate": 0.00037176470588235296, "loss": 0.6422, "step": 22541 }, { "epoch": 12.593296089385476, "grad_norm": 0.466437965631485, "learning_rate": 0.0003717366946778711, "loss": 0.3935, "step": 22542 }, { "epoch": 12.593854748603352, "grad_norm": 0.45137104392051697, "learning_rate": 0.0003717086834733894, "loss": 0.343, "step": 22543 }, { "epoch": 12.594413407821229, "grad_norm": 0.5953842997550964, "learning_rate": 0.0003716806722689076, "loss": 0.4223, "step": 22544 }, { "epoch": 12.594972067039105, "grad_norm": 0.5215619802474976, "learning_rate": 0.0003716526610644258, "loss": 0.5252, "step": 22545 }, { "epoch": 12.595530726256984, "grad_norm": 0.4695204794406891, "learning_rate": 0.000371624649859944, "loss": 0.4253, "step": 22546 }, { "epoch": 12.59608938547486, "grad_norm": 0.4120956063270569, "learning_rate": 0.00037159663865546214, "loss": 0.3966, "step": 22547 }, { "epoch": 12.596648044692737, "grad_norm": 1.2609736919403076, "learning_rate": 0.0003715686274509804, "loss": 0.4993, "step": 22548 }, { "epoch": 12.597206703910615, "grad_norm": 0.3601849675178528, "learning_rate": 0.0003715406162464986, "loss": 0.4309, "step": 22549 }, { "epoch": 12.597765363128492, "grad_norm": 0.44100022315979004, "learning_rate": 0.0003715126050420168, "loss": 0.446, "step": 22550 }, { "epoch": 12.598324022346368, "grad_norm": 0.5519166588783264, "learning_rate": 0.000371484593837535, "loss": 0.4023, "step": 22551 }, { "epoch": 12.598882681564247, "grad_norm": 0.4104040563106537, "learning_rate": 0.00037145658263305323, "loss": 0.3614, "step": 22552 }, { "epoch": 12.599441340782123, "grad_norm": 2.154728412628174, "learning_rate": 0.00037142857142857143, "loss": 0.3741, "step": 22553 }, { "epoch": 12.6, "grad_norm": 0.4629014730453491, "learning_rate": 0.00037140056022408964, "loss": 0.4071, "step": 22554 }, { "epoch": 12.600558659217878, "grad_norm": 0.45616215467453003, "learning_rate": 0.0003713725490196079, "loss": 0.3089, "step": 22555 }, { "epoch": 12.601117318435755, "grad_norm": 0.5728338956832886, "learning_rate": 0.00037134453781512605, "loss": 0.4909, "step": 22556 }, { "epoch": 12.601675977653631, "grad_norm": 0.5634094476699829, "learning_rate": 0.00037131652661064426, "loss": 0.3893, "step": 22557 }, { "epoch": 12.602234636871508, "grad_norm": 3.124847650527954, "learning_rate": 0.00037128851540616246, "loss": 0.6828, "step": 22558 }, { "epoch": 12.602793296089386, "grad_norm": 0.39011862874031067, "learning_rate": 0.00037126050420168067, "loss": 0.3127, "step": 22559 }, { "epoch": 12.603351955307263, "grad_norm": 0.37355750799179077, "learning_rate": 0.00037123249299719893, "loss": 0.4067, "step": 22560 }, { "epoch": 12.60391061452514, "grad_norm": 0.5913501381874084, "learning_rate": 0.0003712044817927171, "loss": 0.3871, "step": 22561 }, { "epoch": 12.604469273743018, "grad_norm": 0.6833252906799316, "learning_rate": 0.0003711764705882353, "loss": 0.4781, "step": 22562 }, { "epoch": 12.605027932960894, "grad_norm": 0.44945746660232544, "learning_rate": 0.00037114845938375355, "loss": 0.3824, "step": 22563 }, { "epoch": 12.60558659217877, "grad_norm": 0.6070159077644348, "learning_rate": 0.0003711204481792717, "loss": 0.3528, "step": 22564 }, { "epoch": 12.606145251396647, "grad_norm": 0.7241159677505493, "learning_rate": 0.00037109243697478996, "loss": 0.5414, "step": 22565 }, { "epoch": 12.606703910614526, "grad_norm": 0.4528590738773346, "learning_rate": 0.0003710644257703081, "loss": 0.3055, "step": 22566 }, { "epoch": 12.607262569832402, "grad_norm": 1.4933909177780151, "learning_rate": 0.0003710364145658263, "loss": 0.3507, "step": 22567 }, { "epoch": 12.607821229050279, "grad_norm": 0.5839433670043945, "learning_rate": 0.0003710084033613446, "loss": 0.504, "step": 22568 }, { "epoch": 12.608379888268157, "grad_norm": 0.37618908286094666, "learning_rate": 0.00037098039215686273, "loss": 0.3388, "step": 22569 }, { "epoch": 12.608938547486034, "grad_norm": 0.5161586999893188, "learning_rate": 0.000370952380952381, "loss": 0.3468, "step": 22570 }, { "epoch": 12.60949720670391, "grad_norm": 0.5248129963874817, "learning_rate": 0.0003709243697478992, "loss": 0.4103, "step": 22571 }, { "epoch": 12.610055865921789, "grad_norm": 0.5380997657775879, "learning_rate": 0.00037089635854341735, "loss": 0.3879, "step": 22572 }, { "epoch": 12.610614525139665, "grad_norm": 0.7444873452186584, "learning_rate": 0.0003708683473389356, "loss": 0.6215, "step": 22573 }, { "epoch": 12.611173184357542, "grad_norm": 0.3966597020626068, "learning_rate": 0.00037084033613445376, "loss": 0.4008, "step": 22574 }, { "epoch": 12.611731843575418, "grad_norm": 0.9485647678375244, "learning_rate": 0.000370812324929972, "loss": 0.421, "step": 22575 }, { "epoch": 12.612290502793297, "grad_norm": 0.5700395703315735, "learning_rate": 0.0003707843137254902, "loss": 0.4223, "step": 22576 }, { "epoch": 12.612849162011173, "grad_norm": 0.6104342937469482, "learning_rate": 0.0003707563025210084, "loss": 0.3854, "step": 22577 }, { "epoch": 12.61340782122905, "grad_norm": 0.5377269387245178, "learning_rate": 0.00037072829131652664, "loss": 0.4307, "step": 22578 }, { "epoch": 12.613966480446928, "grad_norm": 0.696079671382904, "learning_rate": 0.00037070028011204484, "loss": 0.3261, "step": 22579 }, { "epoch": 12.614525139664805, "grad_norm": 0.4880560040473938, "learning_rate": 0.00037067226890756305, "loss": 0.4687, "step": 22580 }, { "epoch": 12.615083798882681, "grad_norm": 0.4921223223209381, "learning_rate": 0.00037064425770308126, "loss": 0.4409, "step": 22581 }, { "epoch": 12.61564245810056, "grad_norm": 0.6960219740867615, "learning_rate": 0.0003706162464985994, "loss": 0.4429, "step": 22582 }, { "epoch": 12.616201117318436, "grad_norm": 0.5629876255989075, "learning_rate": 0.00037058823529411767, "loss": 0.5916, "step": 22583 }, { "epoch": 12.616759776536313, "grad_norm": 0.4182674288749695, "learning_rate": 0.0003705602240896359, "loss": 0.4767, "step": 22584 }, { "epoch": 12.61731843575419, "grad_norm": 9.708643913269043, "learning_rate": 0.0003705322128851541, "loss": 0.4888, "step": 22585 }, { "epoch": 12.617877094972068, "grad_norm": 0.5720090270042419, "learning_rate": 0.0003705042016806723, "loss": 0.4589, "step": 22586 }, { "epoch": 12.618435754189944, "grad_norm": 0.673067033290863, "learning_rate": 0.0003704761904761905, "loss": 0.3328, "step": 22587 }, { "epoch": 12.61899441340782, "grad_norm": 0.6160628795623779, "learning_rate": 0.0003704481792717087, "loss": 0.4705, "step": 22588 }, { "epoch": 12.619553072625699, "grad_norm": 0.4490644931793213, "learning_rate": 0.0003704201680672269, "loss": 0.3173, "step": 22589 }, { "epoch": 12.620111731843576, "grad_norm": 1.8294893503189087, "learning_rate": 0.0003703921568627451, "loss": 0.421, "step": 22590 }, { "epoch": 12.620670391061452, "grad_norm": 0.4117777943611145, "learning_rate": 0.0003703641456582633, "loss": 0.3146, "step": 22591 }, { "epoch": 12.621229050279329, "grad_norm": 0.5099352598190308, "learning_rate": 0.0003703361344537815, "loss": 0.4525, "step": 22592 }, { "epoch": 12.621787709497207, "grad_norm": 0.6766747832298279, "learning_rate": 0.00037030812324929973, "loss": 0.5214, "step": 22593 }, { "epoch": 12.622346368715084, "grad_norm": 0.47820141911506653, "learning_rate": 0.00037028011204481793, "loss": 0.3575, "step": 22594 }, { "epoch": 12.62290502793296, "grad_norm": 0.9130155444145203, "learning_rate": 0.00037025210084033614, "loss": 0.4422, "step": 22595 }, { "epoch": 12.623463687150839, "grad_norm": 0.4260018765926361, "learning_rate": 0.00037022408963585435, "loss": 0.467, "step": 22596 }, { "epoch": 12.624022346368715, "grad_norm": 0.9684244394302368, "learning_rate": 0.00037019607843137255, "loss": 0.4052, "step": 22597 }, { "epoch": 12.624581005586592, "grad_norm": 0.7404807209968567, "learning_rate": 0.00037016806722689076, "loss": 0.3478, "step": 22598 }, { "epoch": 12.62513966480447, "grad_norm": 0.5057596564292908, "learning_rate": 0.00037014005602240896, "loss": 0.3585, "step": 22599 }, { "epoch": 12.625698324022347, "grad_norm": 0.4183289110660553, "learning_rate": 0.00037011204481792717, "loss": 0.3725, "step": 22600 }, { "epoch": 12.626256983240223, "grad_norm": 2.5891125202178955, "learning_rate": 0.0003700840336134454, "loss": 0.4142, "step": 22601 }, { "epoch": 12.6268156424581, "grad_norm": 2.5536437034606934, "learning_rate": 0.0003700560224089636, "loss": 0.4403, "step": 22602 }, { "epoch": 12.627374301675978, "grad_norm": 0.4288052022457123, "learning_rate": 0.00037002801120448184, "loss": 0.4337, "step": 22603 }, { "epoch": 12.627932960893855, "grad_norm": 1.304640293121338, "learning_rate": 0.00037, "loss": 0.4519, "step": 22604 }, { "epoch": 12.628491620111731, "grad_norm": 0.4733700752258301, "learning_rate": 0.0003699719887955182, "loss": 0.3852, "step": 22605 }, { "epoch": 12.62905027932961, "grad_norm": 0.6350199580192566, "learning_rate": 0.0003699439775910364, "loss": 0.4585, "step": 22606 }, { "epoch": 12.629608938547486, "grad_norm": 0.5748293995857239, "learning_rate": 0.0003699159663865546, "loss": 0.4301, "step": 22607 }, { "epoch": 12.630167597765363, "grad_norm": 3.6047136783599854, "learning_rate": 0.00036988795518207287, "loss": 0.5009, "step": 22608 }, { "epoch": 12.630726256983241, "grad_norm": 0.392987996339798, "learning_rate": 0.000369859943977591, "loss": 0.358, "step": 22609 }, { "epoch": 12.631284916201118, "grad_norm": 0.5424877405166626, "learning_rate": 0.00036983193277310923, "loss": 0.3397, "step": 22610 }, { "epoch": 12.631843575418994, "grad_norm": 0.4962330758571625, "learning_rate": 0.0003698039215686275, "loss": 0.3657, "step": 22611 }, { "epoch": 12.63240223463687, "grad_norm": 0.5226778984069824, "learning_rate": 0.00036977591036414564, "loss": 0.4884, "step": 22612 }, { "epoch": 12.632960893854749, "grad_norm": 0.630646288394928, "learning_rate": 0.0003697478991596639, "loss": 0.4892, "step": 22613 }, { "epoch": 12.633519553072626, "grad_norm": 0.4850040674209595, "learning_rate": 0.00036971988795518205, "loss": 0.512, "step": 22614 }, { "epoch": 12.634078212290502, "grad_norm": 0.4644118547439575, "learning_rate": 0.00036969187675070026, "loss": 0.3383, "step": 22615 }, { "epoch": 12.63463687150838, "grad_norm": 1.8969917297363281, "learning_rate": 0.0003696638655462185, "loss": 0.4529, "step": 22616 }, { "epoch": 12.635195530726257, "grad_norm": 0.4594047963619232, "learning_rate": 0.00036963585434173667, "loss": 0.4195, "step": 22617 }, { "epoch": 12.635754189944134, "grad_norm": 0.5045561194419861, "learning_rate": 0.00036960784313725493, "loss": 0.49, "step": 22618 }, { "epoch": 12.63631284916201, "grad_norm": 0.8643025159835815, "learning_rate": 0.00036957983193277314, "loss": 0.426, "step": 22619 }, { "epoch": 12.636871508379889, "grad_norm": 0.38268062472343445, "learning_rate": 0.0003695518207282913, "loss": 0.4288, "step": 22620 }, { "epoch": 12.637430167597765, "grad_norm": 0.477268785238266, "learning_rate": 0.00036952380952380955, "loss": 0.3189, "step": 22621 }, { "epoch": 12.637988826815642, "grad_norm": 0.5417362451553345, "learning_rate": 0.0003694957983193277, "loss": 0.4972, "step": 22622 }, { "epoch": 12.63854748603352, "grad_norm": 3.8287198543548584, "learning_rate": 0.00036946778711484596, "loss": 0.4789, "step": 22623 }, { "epoch": 12.639106145251397, "grad_norm": 0.36878421902656555, "learning_rate": 0.00036943977591036417, "loss": 0.3783, "step": 22624 }, { "epoch": 12.639664804469273, "grad_norm": 0.6996465921401978, "learning_rate": 0.0003694117647058823, "loss": 0.4531, "step": 22625 }, { "epoch": 12.640223463687152, "grad_norm": 0.6451622247695923, "learning_rate": 0.0003693837535014006, "loss": 0.6835, "step": 22626 }, { "epoch": 12.640782122905028, "grad_norm": 2.6806771755218506, "learning_rate": 0.0003693557422969188, "loss": 0.3358, "step": 22627 }, { "epoch": 12.641340782122905, "grad_norm": 0.44551214575767517, "learning_rate": 0.000369327731092437, "loss": 0.5094, "step": 22628 }, { "epoch": 12.641899441340783, "grad_norm": 1.0081738233566284, "learning_rate": 0.0003692997198879552, "loss": 0.5406, "step": 22629 }, { "epoch": 12.64245810055866, "grad_norm": 0.9619008898735046, "learning_rate": 0.00036927170868347335, "loss": 0.4743, "step": 22630 }, { "epoch": 12.643016759776536, "grad_norm": 0.9496386051177979, "learning_rate": 0.0003692436974789916, "loss": 0.7209, "step": 22631 }, { "epoch": 12.643575418994413, "grad_norm": 0.5079296827316284, "learning_rate": 0.0003692156862745098, "loss": 0.4111, "step": 22632 }, { "epoch": 12.644134078212291, "grad_norm": 0.5236098170280457, "learning_rate": 0.000369187675070028, "loss": 0.3777, "step": 22633 }, { "epoch": 12.644692737430168, "grad_norm": 0.4201522767543793, "learning_rate": 0.00036915966386554623, "loss": 0.401, "step": 22634 }, { "epoch": 12.645251396648044, "grad_norm": 1.2196553945541382, "learning_rate": 0.00036913165266106443, "loss": 0.4771, "step": 22635 }, { "epoch": 12.645810055865923, "grad_norm": 0.37933313846588135, "learning_rate": 0.00036910364145658264, "loss": 0.3819, "step": 22636 }, { "epoch": 12.6463687150838, "grad_norm": 0.6158190965652466, "learning_rate": 0.00036907563025210085, "loss": 0.7561, "step": 22637 }, { "epoch": 12.646927374301676, "grad_norm": 0.8624254465103149, "learning_rate": 0.00036904761904761905, "loss": 0.5712, "step": 22638 }, { "epoch": 12.647486033519552, "grad_norm": 0.6276279091835022, "learning_rate": 0.00036901960784313726, "loss": 0.3783, "step": 22639 }, { "epoch": 12.64804469273743, "grad_norm": 0.5077083706855774, "learning_rate": 0.00036899159663865546, "loss": 0.4371, "step": 22640 }, { "epoch": 12.648603351955307, "grad_norm": 0.8366996049880981, "learning_rate": 0.00036896358543417367, "loss": 0.4396, "step": 22641 }, { "epoch": 12.649162011173184, "grad_norm": 0.6168509721755981, "learning_rate": 0.0003689355742296919, "loss": 0.4076, "step": 22642 }, { "epoch": 12.649720670391062, "grad_norm": 0.4172072410583496, "learning_rate": 0.00036890756302521014, "loss": 0.3869, "step": 22643 }, { "epoch": 12.650279329608939, "grad_norm": 0.5623812675476074, "learning_rate": 0.0003688795518207283, "loss": 0.3181, "step": 22644 }, { "epoch": 12.650837988826815, "grad_norm": 0.4591881036758423, "learning_rate": 0.0003688515406162465, "loss": 0.3614, "step": 22645 }, { "epoch": 12.651396648044694, "grad_norm": 0.47652843594551086, "learning_rate": 0.0003688235294117647, "loss": 0.3753, "step": 22646 }, { "epoch": 12.65195530726257, "grad_norm": 0.42527127265930176, "learning_rate": 0.0003687955182072829, "loss": 0.4329, "step": 22647 }, { "epoch": 12.652513966480447, "grad_norm": 0.4065473675727844, "learning_rate": 0.00036876750700280117, "loss": 0.4287, "step": 22648 }, { "epoch": 12.653072625698323, "grad_norm": 0.5770248770713806, "learning_rate": 0.0003687394957983193, "loss": 0.3847, "step": 22649 }, { "epoch": 12.653631284916202, "grad_norm": 0.4943188428878784, "learning_rate": 0.0003687114845938375, "loss": 0.4073, "step": 22650 }, { "epoch": 12.654189944134078, "grad_norm": 0.7674317955970764, "learning_rate": 0.0003686834733893558, "loss": 0.3779, "step": 22651 }, { "epoch": 12.654748603351955, "grad_norm": 0.45545870065689087, "learning_rate": 0.00036865546218487394, "loss": 0.5403, "step": 22652 }, { "epoch": 12.655307262569833, "grad_norm": 0.4823314845561981, "learning_rate": 0.0003686274509803922, "loss": 0.4919, "step": 22653 }, { "epoch": 12.65586592178771, "grad_norm": 0.4444320797920227, "learning_rate": 0.00036859943977591035, "loss": 0.4732, "step": 22654 }, { "epoch": 12.656424581005586, "grad_norm": 0.48194339871406555, "learning_rate": 0.00036857142857142855, "loss": 0.371, "step": 22655 }, { "epoch": 12.656983240223465, "grad_norm": 0.9455886483192444, "learning_rate": 0.0003685434173669468, "loss": 0.4799, "step": 22656 }, { "epoch": 12.657541899441341, "grad_norm": 0.5675630569458008, "learning_rate": 0.00036851540616246497, "loss": 0.5449, "step": 22657 }, { "epoch": 12.658100558659218, "grad_norm": 1.1131759881973267, "learning_rate": 0.0003684873949579832, "loss": 0.5092, "step": 22658 }, { "epoch": 12.658659217877094, "grad_norm": 0.3302125632762909, "learning_rate": 0.00036845938375350143, "loss": 0.4138, "step": 22659 }, { "epoch": 12.659217877094973, "grad_norm": 1.188501238822937, "learning_rate": 0.0003684313725490196, "loss": 0.3869, "step": 22660 }, { "epoch": 12.65977653631285, "grad_norm": 0.4855312705039978, "learning_rate": 0.00036840336134453784, "loss": 0.4653, "step": 22661 }, { "epoch": 12.660335195530726, "grad_norm": 2.120654344558716, "learning_rate": 0.000368375350140056, "loss": 0.4393, "step": 22662 }, { "epoch": 12.660893854748604, "grad_norm": 0.40007635951042175, "learning_rate": 0.00036834733893557426, "loss": 0.3682, "step": 22663 }, { "epoch": 12.66145251396648, "grad_norm": 0.43272078037261963, "learning_rate": 0.00036831932773109246, "loss": 0.4097, "step": 22664 }, { "epoch": 12.662011173184357, "grad_norm": 0.8415060639381409, "learning_rate": 0.0003682913165266106, "loss": 0.4651, "step": 22665 }, { "epoch": 12.662569832402234, "grad_norm": 0.5167807936668396, "learning_rate": 0.0003682633053221289, "loss": 0.437, "step": 22666 }, { "epoch": 12.663128491620112, "grad_norm": 0.5113229751586914, "learning_rate": 0.0003682352941176471, "loss": 0.5192, "step": 22667 }, { "epoch": 12.663687150837989, "grad_norm": 0.4238426983356476, "learning_rate": 0.0003682072829131653, "loss": 0.3454, "step": 22668 }, { "epoch": 12.664245810055865, "grad_norm": 4.335702896118164, "learning_rate": 0.0003681792717086835, "loss": 0.4291, "step": 22669 }, { "epoch": 12.664804469273744, "grad_norm": 0.4370681047439575, "learning_rate": 0.00036815126050420164, "loss": 0.4076, "step": 22670 }, { "epoch": 12.66536312849162, "grad_norm": 0.570739209651947, "learning_rate": 0.0003681232492997199, "loss": 0.3993, "step": 22671 }, { "epoch": 12.665921787709497, "grad_norm": 0.9036211371421814, "learning_rate": 0.0003680952380952381, "loss": 0.587, "step": 22672 }, { "epoch": 12.666480446927375, "grad_norm": 0.5403720140457153, "learning_rate": 0.0003680672268907563, "loss": 0.4692, "step": 22673 }, { "epoch": 12.667039106145252, "grad_norm": 0.4469272792339325, "learning_rate": 0.0003680392156862745, "loss": 0.558, "step": 22674 }, { "epoch": 12.667597765363128, "grad_norm": 2.5028650760650635, "learning_rate": 0.00036801120448179273, "loss": 0.4619, "step": 22675 }, { "epoch": 12.668156424581005, "grad_norm": 0.8241000175476074, "learning_rate": 0.00036798319327731093, "loss": 0.4284, "step": 22676 }, { "epoch": 12.668715083798883, "grad_norm": 0.4155273139476776, "learning_rate": 0.00036795518207282914, "loss": 0.4793, "step": 22677 }, { "epoch": 12.66927374301676, "grad_norm": 0.8346865177154541, "learning_rate": 0.00036792717086834735, "loss": 0.4944, "step": 22678 }, { "epoch": 12.669832402234636, "grad_norm": 0.3694761395454407, "learning_rate": 0.00036789915966386555, "loss": 0.4222, "step": 22679 }, { "epoch": 12.670391061452515, "grad_norm": 0.6826328635215759, "learning_rate": 0.00036787114845938376, "loss": 0.5012, "step": 22680 }, { "epoch": 12.670949720670391, "grad_norm": 0.7971574068069458, "learning_rate": 0.00036784313725490196, "loss": 0.4402, "step": 22681 }, { "epoch": 12.671508379888268, "grad_norm": 0.4051872193813324, "learning_rate": 0.00036781512605042017, "loss": 0.3355, "step": 22682 }, { "epoch": 12.672067039106146, "grad_norm": 0.44159600138664246, "learning_rate": 0.00036778711484593843, "loss": 0.3613, "step": 22683 }, { "epoch": 12.672625698324023, "grad_norm": 0.6788287162780762, "learning_rate": 0.0003677591036414566, "loss": 0.4363, "step": 22684 }, { "epoch": 12.6731843575419, "grad_norm": 0.4831417500972748, "learning_rate": 0.0003677310924369748, "loss": 0.4171, "step": 22685 }, { "epoch": 12.673743016759776, "grad_norm": 0.4618101418018341, "learning_rate": 0.000367703081232493, "loss": 0.5157, "step": 22686 }, { "epoch": 12.674301675977654, "grad_norm": 0.4800151586532593, "learning_rate": 0.0003676750700280112, "loss": 0.4015, "step": 22687 }, { "epoch": 12.67486033519553, "grad_norm": 0.6734005212783813, "learning_rate": 0.00036764705882352946, "loss": 0.5868, "step": 22688 }, { "epoch": 12.675418994413407, "grad_norm": 13.141242027282715, "learning_rate": 0.0003676190476190476, "loss": 0.365, "step": 22689 }, { "epoch": 12.675977653631286, "grad_norm": 0.6268458962440491, "learning_rate": 0.0003675910364145658, "loss": 0.3227, "step": 22690 }, { "epoch": 12.676536312849162, "grad_norm": 0.5545414686203003, "learning_rate": 0.0003675630252100841, "loss": 0.4361, "step": 22691 }, { "epoch": 12.677094972067039, "grad_norm": 0.40543267130851746, "learning_rate": 0.00036753501400560223, "loss": 0.3819, "step": 22692 }, { "epoch": 12.677653631284915, "grad_norm": 0.5659895539283752, "learning_rate": 0.0003675070028011205, "loss": 0.5817, "step": 22693 }, { "epoch": 12.678212290502794, "grad_norm": 0.38993850350379944, "learning_rate": 0.00036747899159663864, "loss": 0.4236, "step": 22694 }, { "epoch": 12.67877094972067, "grad_norm": 0.4423362910747528, "learning_rate": 0.00036745098039215685, "loss": 0.3407, "step": 22695 }, { "epoch": 12.679329608938547, "grad_norm": 0.432059645652771, "learning_rate": 0.0003674229691876751, "loss": 0.4736, "step": 22696 }, { "epoch": 12.679888268156425, "grad_norm": 0.6562801003456116, "learning_rate": 0.00036739495798319326, "loss": 0.389, "step": 22697 }, { "epoch": 12.680446927374302, "grad_norm": 0.9713101387023926, "learning_rate": 0.0003673669467787115, "loss": 0.502, "step": 22698 }, { "epoch": 12.681005586592178, "grad_norm": 0.83583664894104, "learning_rate": 0.0003673389355742297, "loss": 0.4063, "step": 22699 }, { "epoch": 12.681564245810057, "grad_norm": 0.3693397343158722, "learning_rate": 0.0003673109243697479, "loss": 0.3886, "step": 22700 }, { "epoch": 12.682122905027933, "grad_norm": 0.5915853977203369, "learning_rate": 0.00036728291316526614, "loss": 0.4183, "step": 22701 }, { "epoch": 12.68268156424581, "grad_norm": 1.1918507814407349, "learning_rate": 0.0003672549019607843, "loss": 0.6949, "step": 22702 }, { "epoch": 12.683240223463688, "grad_norm": 1.0061208009719849, "learning_rate": 0.00036722689075630255, "loss": 0.5751, "step": 22703 }, { "epoch": 12.683798882681565, "grad_norm": 0.4901619851589203, "learning_rate": 0.00036719887955182076, "loss": 0.4106, "step": 22704 }, { "epoch": 12.684357541899441, "grad_norm": 0.4705338478088379, "learning_rate": 0.0003671708683473389, "loss": 0.4329, "step": 22705 }, { "epoch": 12.684916201117318, "grad_norm": 0.4751470386981964, "learning_rate": 0.00036714285714285717, "loss": 0.406, "step": 22706 }, { "epoch": 12.685474860335196, "grad_norm": 0.5742958188056946, "learning_rate": 0.0003671148459383754, "loss": 0.4762, "step": 22707 }, { "epoch": 12.686033519553073, "grad_norm": 0.42881226539611816, "learning_rate": 0.0003670868347338935, "loss": 0.3553, "step": 22708 }, { "epoch": 12.68659217877095, "grad_norm": 0.44101089239120483, "learning_rate": 0.0003670588235294118, "loss": 0.4532, "step": 22709 }, { "epoch": 12.687150837988828, "grad_norm": 0.4362788200378418, "learning_rate": 0.00036703081232492994, "loss": 0.4415, "step": 22710 }, { "epoch": 12.687709497206704, "grad_norm": 0.46455127000808716, "learning_rate": 0.0003670028011204482, "loss": 0.4227, "step": 22711 }, { "epoch": 12.68826815642458, "grad_norm": 1.557179570198059, "learning_rate": 0.0003669747899159664, "loss": 0.3986, "step": 22712 }, { "epoch": 12.688826815642457, "grad_norm": 1.4577324390411377, "learning_rate": 0.00036694677871148456, "loss": 0.4091, "step": 22713 }, { "epoch": 12.689385474860336, "grad_norm": 0.4985298812389374, "learning_rate": 0.0003669187675070028, "loss": 0.4474, "step": 22714 }, { "epoch": 12.689944134078212, "grad_norm": 1.6187843084335327, "learning_rate": 0.000366890756302521, "loss": 0.5188, "step": 22715 }, { "epoch": 12.690502793296089, "grad_norm": 0.47952544689178467, "learning_rate": 0.00036686274509803923, "loss": 0.4026, "step": 22716 }, { "epoch": 12.691061452513967, "grad_norm": 0.5362875461578369, "learning_rate": 0.00036683473389355743, "loss": 0.4839, "step": 22717 }, { "epoch": 12.691620111731844, "grad_norm": 0.48344671726226807, "learning_rate": 0.0003668067226890756, "loss": 0.4729, "step": 22718 }, { "epoch": 12.69217877094972, "grad_norm": 0.4527988135814667, "learning_rate": 0.00036677871148459385, "loss": 0.4319, "step": 22719 }, { "epoch": 12.692737430167599, "grad_norm": 0.45833876729011536, "learning_rate": 0.00036675070028011205, "loss": 0.4193, "step": 22720 }, { "epoch": 12.693296089385475, "grad_norm": 0.4064957797527313, "learning_rate": 0.00036672268907563026, "loss": 0.4351, "step": 22721 }, { "epoch": 12.693854748603352, "grad_norm": 0.5433568954467773, "learning_rate": 0.00036669467787114846, "loss": 0.5695, "step": 22722 }, { "epoch": 12.694413407821228, "grad_norm": 0.40819454193115234, "learning_rate": 0.00036666666666666667, "loss": 0.3407, "step": 22723 }, { "epoch": 12.694972067039107, "grad_norm": 1.3608016967773438, "learning_rate": 0.0003666386554621849, "loss": 0.3421, "step": 22724 }, { "epoch": 12.695530726256983, "grad_norm": 0.4803275465965271, "learning_rate": 0.0003666106442577031, "loss": 0.4272, "step": 22725 }, { "epoch": 12.69608938547486, "grad_norm": 0.6079450249671936, "learning_rate": 0.0003665826330532213, "loss": 0.3481, "step": 22726 }, { "epoch": 12.696648044692738, "grad_norm": 0.3614291250705719, "learning_rate": 0.0003665546218487395, "loss": 0.4092, "step": 22727 }, { "epoch": 12.697206703910615, "grad_norm": 0.3670042157173157, "learning_rate": 0.0003665266106442577, "loss": 0.3459, "step": 22728 }, { "epoch": 12.697765363128491, "grad_norm": 0.3679581880569458, "learning_rate": 0.0003664985994397759, "loss": 0.3265, "step": 22729 }, { "epoch": 12.69832402234637, "grad_norm": 0.47507980465888977, "learning_rate": 0.0003664705882352941, "loss": 0.4102, "step": 22730 }, { "epoch": 12.698882681564246, "grad_norm": 0.6242567896842957, "learning_rate": 0.00036644257703081237, "loss": 0.3666, "step": 22731 }, { "epoch": 12.699441340782123, "grad_norm": 0.5092839598655701, "learning_rate": 0.0003664145658263305, "loss": 0.3389, "step": 22732 }, { "epoch": 12.7, "grad_norm": 0.6447473168373108, "learning_rate": 0.00036638655462184873, "loss": 0.5652, "step": 22733 }, { "epoch": 12.700558659217878, "grad_norm": 1.5840729475021362, "learning_rate": 0.00036635854341736694, "loss": 0.3944, "step": 22734 }, { "epoch": 12.701117318435754, "grad_norm": 0.4584546685218811, "learning_rate": 0.00036633053221288514, "loss": 0.3827, "step": 22735 }, { "epoch": 12.70167597765363, "grad_norm": 0.44648319482803345, "learning_rate": 0.0003663025210084034, "loss": 0.4522, "step": 22736 }, { "epoch": 12.702234636871509, "grad_norm": 1.1005603075027466, "learning_rate": 0.00036627450980392155, "loss": 0.4222, "step": 22737 }, { "epoch": 12.702793296089386, "grad_norm": 0.7060168385505676, "learning_rate": 0.00036624649859943976, "loss": 0.4703, "step": 22738 }, { "epoch": 12.703351955307262, "grad_norm": 0.42375731468200684, "learning_rate": 0.000366218487394958, "loss": 0.488, "step": 22739 }, { "epoch": 12.703910614525139, "grad_norm": 0.5345640778541565, "learning_rate": 0.00036619047619047617, "loss": 0.5145, "step": 22740 }, { "epoch": 12.704469273743017, "grad_norm": 0.3759945034980774, "learning_rate": 0.00036616246498599443, "loss": 0.3848, "step": 22741 }, { "epoch": 12.705027932960894, "grad_norm": 0.5260798335075378, "learning_rate": 0.0003661344537815126, "loss": 0.3616, "step": 22742 }, { "epoch": 12.70558659217877, "grad_norm": 0.6330503821372986, "learning_rate": 0.0003661064425770308, "loss": 0.5406, "step": 22743 }, { "epoch": 12.706145251396649, "grad_norm": 0.4426876902580261, "learning_rate": 0.00036607843137254905, "loss": 0.3502, "step": 22744 }, { "epoch": 12.706703910614525, "grad_norm": 0.37959563732147217, "learning_rate": 0.0003660504201680672, "loss": 0.3262, "step": 22745 }, { "epoch": 12.707262569832402, "grad_norm": 0.4771775007247925, "learning_rate": 0.00036602240896358546, "loss": 0.4442, "step": 22746 }, { "epoch": 12.70782122905028, "grad_norm": 1.4068987369537354, "learning_rate": 0.00036599439775910367, "loss": 0.4878, "step": 22747 }, { "epoch": 12.708379888268157, "grad_norm": 0.649357795715332, "learning_rate": 0.0003659663865546218, "loss": 0.4428, "step": 22748 }, { "epoch": 12.708938547486033, "grad_norm": 0.5546964406967163, "learning_rate": 0.0003659383753501401, "loss": 0.4378, "step": 22749 }, { "epoch": 12.70949720670391, "grad_norm": 0.5037041306495667, "learning_rate": 0.00036591036414565823, "loss": 0.3744, "step": 22750 }, { "epoch": 12.710055865921788, "grad_norm": 0.4368305504322052, "learning_rate": 0.0003658823529411765, "loss": 0.443, "step": 22751 }, { "epoch": 12.710614525139665, "grad_norm": 0.42577028274536133, "learning_rate": 0.0003658543417366947, "loss": 0.4505, "step": 22752 }, { "epoch": 12.711173184357541, "grad_norm": 0.48462414741516113, "learning_rate": 0.00036582633053221285, "loss": 0.3704, "step": 22753 }, { "epoch": 12.71173184357542, "grad_norm": 1.2744355201721191, "learning_rate": 0.0003657983193277311, "loss": 0.3861, "step": 22754 }, { "epoch": 12.712290502793296, "grad_norm": 7.955973148345947, "learning_rate": 0.0003657703081232493, "loss": 0.4129, "step": 22755 }, { "epoch": 12.712849162011173, "grad_norm": 0.5515907406806946, "learning_rate": 0.0003657422969187675, "loss": 0.4737, "step": 22756 }, { "epoch": 12.713407821229051, "grad_norm": 0.41590380668640137, "learning_rate": 0.00036571428571428573, "loss": 0.4009, "step": 22757 }, { "epoch": 12.713966480446928, "grad_norm": 0.5429763793945312, "learning_rate": 0.0003656862745098039, "loss": 0.4156, "step": 22758 }, { "epoch": 12.714525139664804, "grad_norm": 0.43287140130996704, "learning_rate": 0.00036565826330532214, "loss": 0.4542, "step": 22759 }, { "epoch": 12.71508379888268, "grad_norm": 0.3885638415813446, "learning_rate": 0.00036563025210084035, "loss": 0.3889, "step": 22760 }, { "epoch": 12.71564245810056, "grad_norm": 1.02476966381073, "learning_rate": 0.00036560224089635855, "loss": 0.4165, "step": 22761 }, { "epoch": 12.716201117318436, "grad_norm": 0.34999823570251465, "learning_rate": 0.00036557422969187676, "loss": 0.3533, "step": 22762 }, { "epoch": 12.716759776536312, "grad_norm": 0.6705333590507507, "learning_rate": 0.00036554621848739496, "loss": 0.3529, "step": 22763 }, { "epoch": 12.71731843575419, "grad_norm": 0.5039358735084534, "learning_rate": 0.00036551820728291317, "loss": 0.3938, "step": 22764 }, { "epoch": 12.717877094972067, "grad_norm": 0.34424564242362976, "learning_rate": 0.0003654901960784314, "loss": 0.2597, "step": 22765 }, { "epoch": 12.718435754189944, "grad_norm": 1.6279664039611816, "learning_rate": 0.00036546218487394964, "loss": 0.4814, "step": 22766 }, { "epoch": 12.71899441340782, "grad_norm": 1.525254726409912, "learning_rate": 0.0003654341736694678, "loss": 0.4829, "step": 22767 }, { "epoch": 12.719553072625699, "grad_norm": 0.4523603022098541, "learning_rate": 0.000365406162464986, "loss": 0.4156, "step": 22768 }, { "epoch": 12.720111731843575, "grad_norm": 0.43614131212234497, "learning_rate": 0.0003653781512605042, "loss": 0.4833, "step": 22769 }, { "epoch": 12.720670391061452, "grad_norm": 0.34730708599090576, "learning_rate": 0.0003653501400560224, "loss": 0.3606, "step": 22770 }, { "epoch": 12.72122905027933, "grad_norm": 0.40569254755973816, "learning_rate": 0.00036532212885154067, "loss": 0.3819, "step": 22771 }, { "epoch": 12.721787709497207, "grad_norm": 0.916185200214386, "learning_rate": 0.0003652941176470588, "loss": 0.458, "step": 22772 }, { "epoch": 12.722346368715083, "grad_norm": 0.6924963593482971, "learning_rate": 0.000365266106442577, "loss": 0.4698, "step": 22773 }, { "epoch": 12.722905027932962, "grad_norm": 0.5590592622756958, "learning_rate": 0.0003652380952380953, "loss": 0.3562, "step": 22774 }, { "epoch": 12.723463687150838, "grad_norm": 0.42891693115234375, "learning_rate": 0.00036521008403361344, "loss": 0.3461, "step": 22775 }, { "epoch": 12.724022346368715, "grad_norm": 0.7148812413215637, "learning_rate": 0.0003651820728291317, "loss": 0.354, "step": 22776 }, { "epoch": 12.724581005586593, "grad_norm": 5.826832294464111, "learning_rate": 0.00036515406162464985, "loss": 0.454, "step": 22777 }, { "epoch": 12.72513966480447, "grad_norm": 0.6415731310844421, "learning_rate": 0.00036512605042016805, "loss": 0.4511, "step": 22778 }, { "epoch": 12.725698324022346, "grad_norm": 0.7285545468330383, "learning_rate": 0.0003650980392156863, "loss": 0.5046, "step": 22779 }, { "epoch": 12.726256983240223, "grad_norm": 0.5698167681694031, "learning_rate": 0.00036507002801120447, "loss": 0.4213, "step": 22780 }, { "epoch": 12.726815642458101, "grad_norm": 0.5560302734375, "learning_rate": 0.0003650420168067227, "loss": 0.409, "step": 22781 }, { "epoch": 12.727374301675978, "grad_norm": 0.3795311748981476, "learning_rate": 0.00036501400560224093, "loss": 0.3622, "step": 22782 }, { "epoch": 12.727932960893854, "grad_norm": 0.4959107041358948, "learning_rate": 0.0003649859943977591, "loss": 0.5039, "step": 22783 }, { "epoch": 12.728491620111733, "grad_norm": 1.0322513580322266, "learning_rate": 0.00036495798319327734, "loss": 0.5891, "step": 22784 }, { "epoch": 12.72905027932961, "grad_norm": 0.4532834589481354, "learning_rate": 0.0003649299719887955, "loss": 0.3771, "step": 22785 }, { "epoch": 12.729608938547486, "grad_norm": 0.43512043356895447, "learning_rate": 0.00036490196078431376, "loss": 0.3772, "step": 22786 }, { "epoch": 12.730167597765362, "grad_norm": 0.45097917318344116, "learning_rate": 0.00036487394957983196, "loss": 0.4052, "step": 22787 }, { "epoch": 12.73072625698324, "grad_norm": 10.076991081237793, "learning_rate": 0.0003648459383753501, "loss": 0.4625, "step": 22788 }, { "epoch": 12.731284916201117, "grad_norm": 0.5700581669807434, "learning_rate": 0.0003648179271708684, "loss": 0.3814, "step": 22789 }, { "epoch": 12.731843575418994, "grad_norm": 0.8671738505363464, "learning_rate": 0.0003647899159663866, "loss": 0.4473, "step": 22790 }, { "epoch": 12.732402234636872, "grad_norm": 0.39406871795654297, "learning_rate": 0.0003647619047619048, "loss": 0.374, "step": 22791 }, { "epoch": 12.732960893854749, "grad_norm": 0.3909502625465393, "learning_rate": 0.000364733893557423, "loss": 0.4332, "step": 22792 }, { "epoch": 12.733519553072625, "grad_norm": 0.561922550201416, "learning_rate": 0.00036470588235294114, "loss": 0.388, "step": 22793 }, { "epoch": 12.734078212290502, "grad_norm": 1.209590196609497, "learning_rate": 0.0003646778711484594, "loss": 0.3889, "step": 22794 }, { "epoch": 12.73463687150838, "grad_norm": 0.591681182384491, "learning_rate": 0.0003646498599439776, "loss": 0.3201, "step": 22795 }, { "epoch": 12.735195530726257, "grad_norm": 2.4374759197235107, "learning_rate": 0.0003646218487394958, "loss": 0.3817, "step": 22796 }, { "epoch": 12.735754189944133, "grad_norm": 0.6187816858291626, "learning_rate": 0.000364593837535014, "loss": 0.3821, "step": 22797 }, { "epoch": 12.736312849162012, "grad_norm": 0.3471083343029022, "learning_rate": 0.00036456582633053223, "loss": 0.4019, "step": 22798 }, { "epoch": 12.736871508379888, "grad_norm": 0.3987346589565277, "learning_rate": 0.00036453781512605043, "loss": 0.4526, "step": 22799 }, { "epoch": 12.737430167597765, "grad_norm": 0.5573232173919678, "learning_rate": 0.00036450980392156864, "loss": 0.5068, "step": 22800 }, { "epoch": 12.737988826815643, "grad_norm": 0.7180553674697876, "learning_rate": 0.00036448179271708685, "loss": 0.4944, "step": 22801 }, { "epoch": 12.73854748603352, "grad_norm": 0.45446667075157166, "learning_rate": 0.00036445378151260505, "loss": 0.431, "step": 22802 }, { "epoch": 12.739106145251396, "grad_norm": 4.7218708992004395, "learning_rate": 0.00036442577030812326, "loss": 0.3109, "step": 22803 }, { "epoch": 12.739664804469275, "grad_norm": 0.5455268025398254, "learning_rate": 0.00036439775910364146, "loss": 0.5835, "step": 22804 }, { "epoch": 12.740223463687151, "grad_norm": 0.6029795408248901, "learning_rate": 0.00036436974789915967, "loss": 0.4175, "step": 22805 }, { "epoch": 12.740782122905028, "grad_norm": 1.880365014076233, "learning_rate": 0.00036434173669467793, "loss": 0.4707, "step": 22806 }, { "epoch": 12.741340782122904, "grad_norm": 0.50532066822052, "learning_rate": 0.0003643137254901961, "loss": 0.4525, "step": 22807 }, { "epoch": 12.741899441340783, "grad_norm": 0.5116075277328491, "learning_rate": 0.0003642857142857143, "loss": 0.473, "step": 22808 }, { "epoch": 12.74245810055866, "grad_norm": 0.6967884302139282, "learning_rate": 0.0003642577030812325, "loss": 0.6258, "step": 22809 }, { "epoch": 12.743016759776536, "grad_norm": 0.6727312803268433, "learning_rate": 0.0003642296918767507, "loss": 0.3504, "step": 22810 }, { "epoch": 12.743575418994414, "grad_norm": 0.6625656485557556, "learning_rate": 0.00036420168067226896, "loss": 0.6394, "step": 22811 }, { "epoch": 12.74413407821229, "grad_norm": 0.567778468132019, "learning_rate": 0.0003641736694677871, "loss": 0.5532, "step": 22812 }, { "epoch": 12.744692737430167, "grad_norm": 2.000669240951538, "learning_rate": 0.0003641456582633053, "loss": 0.3604, "step": 22813 }, { "epoch": 12.745251396648044, "grad_norm": 1.4496207237243652, "learning_rate": 0.0003641176470588236, "loss": 0.455, "step": 22814 }, { "epoch": 12.745810055865922, "grad_norm": 0.5075682997703552, "learning_rate": 0.00036408963585434173, "loss": 0.3656, "step": 22815 }, { "epoch": 12.746368715083799, "grad_norm": 0.41381144523620605, "learning_rate": 0.00036406162464985994, "loss": 0.4398, "step": 22816 }, { "epoch": 12.746927374301675, "grad_norm": 0.4415242075920105, "learning_rate": 0.00036403361344537814, "loss": 0.443, "step": 22817 }, { "epoch": 12.747486033519554, "grad_norm": 1.0416975021362305, "learning_rate": 0.00036400560224089635, "loss": 0.4336, "step": 22818 }, { "epoch": 12.74804469273743, "grad_norm": 0.6818392276763916, "learning_rate": 0.0003639775910364146, "loss": 0.4606, "step": 22819 }, { "epoch": 12.748603351955307, "grad_norm": 0.4275626838207245, "learning_rate": 0.00036394957983193276, "loss": 0.4179, "step": 22820 }, { "epoch": 12.749162011173185, "grad_norm": 0.6371297240257263, "learning_rate": 0.00036392156862745097, "loss": 0.4707, "step": 22821 }, { "epoch": 12.749720670391062, "grad_norm": 0.6007310152053833, "learning_rate": 0.0003638935574229692, "loss": 0.5097, "step": 22822 }, { "epoch": 12.750279329608938, "grad_norm": 0.7302827835083008, "learning_rate": 0.0003638655462184874, "loss": 0.4123, "step": 22823 }, { "epoch": 12.750837988826815, "grad_norm": 0.4860741198062897, "learning_rate": 0.00036383753501400564, "loss": 0.4368, "step": 22824 }, { "epoch": 12.751396648044693, "grad_norm": 0.377386212348938, "learning_rate": 0.0003638095238095238, "loss": 0.4169, "step": 22825 }, { "epoch": 12.75195530726257, "grad_norm": 0.7121120691299438, "learning_rate": 0.000363781512605042, "loss": 0.4321, "step": 22826 }, { "epoch": 12.752513966480446, "grad_norm": 0.44249460101127625, "learning_rate": 0.00036375350140056026, "loss": 0.4036, "step": 22827 }, { "epoch": 12.753072625698325, "grad_norm": 0.6371538639068604, "learning_rate": 0.0003637254901960784, "loss": 0.5075, "step": 22828 }, { "epoch": 12.753631284916201, "grad_norm": 1.1038780212402344, "learning_rate": 0.00036369747899159667, "loss": 0.3929, "step": 22829 }, { "epoch": 12.754189944134078, "grad_norm": 0.6869402527809143, "learning_rate": 0.0003636694677871149, "loss": 0.3606, "step": 22830 }, { "epoch": 12.754748603351956, "grad_norm": 0.5216014981269836, "learning_rate": 0.000363641456582633, "loss": 0.3238, "step": 22831 }, { "epoch": 12.755307262569833, "grad_norm": 0.3676243722438812, "learning_rate": 0.0003636134453781513, "loss": 0.3371, "step": 22832 }, { "epoch": 12.75586592178771, "grad_norm": 0.4803237020969391, "learning_rate": 0.00036358543417366944, "loss": 0.4185, "step": 22833 }, { "epoch": 12.756424581005586, "grad_norm": 0.4397362768650055, "learning_rate": 0.0003635574229691877, "loss": 0.4349, "step": 22834 }, { "epoch": 12.756983240223464, "grad_norm": 0.9142764210700989, "learning_rate": 0.0003635294117647059, "loss": 0.4274, "step": 22835 }, { "epoch": 12.75754189944134, "grad_norm": 0.6187541484832764, "learning_rate": 0.00036350140056022406, "loss": 0.2993, "step": 22836 }, { "epoch": 12.758100558659217, "grad_norm": 0.6531397104263306, "learning_rate": 0.0003634733893557423, "loss": 0.4021, "step": 22837 }, { "epoch": 12.758659217877096, "grad_norm": 1.6469725370407104, "learning_rate": 0.0003634453781512605, "loss": 0.4299, "step": 22838 }, { "epoch": 12.759217877094972, "grad_norm": 0.42806869745254517, "learning_rate": 0.00036341736694677873, "loss": 0.3581, "step": 22839 }, { "epoch": 12.759776536312849, "grad_norm": 0.6976181268692017, "learning_rate": 0.00036338935574229693, "loss": 0.4947, "step": 22840 }, { "epoch": 12.760335195530725, "grad_norm": 0.469889760017395, "learning_rate": 0.0003633613445378151, "loss": 0.3771, "step": 22841 }, { "epoch": 12.760893854748604, "grad_norm": 0.5360656380653381, "learning_rate": 0.00036333333333333335, "loss": 0.4259, "step": 22842 }, { "epoch": 12.76145251396648, "grad_norm": 0.8430083394050598, "learning_rate": 0.00036330532212885155, "loss": 0.4319, "step": 22843 }, { "epoch": 12.762011173184357, "grad_norm": 0.5406758189201355, "learning_rate": 0.00036327731092436976, "loss": 0.4406, "step": 22844 }, { "epoch": 12.762569832402235, "grad_norm": 0.5195489525794983, "learning_rate": 0.00036324929971988796, "loss": 0.2963, "step": 22845 }, { "epoch": 12.763128491620112, "grad_norm": 0.5169768333435059, "learning_rate": 0.00036322128851540617, "loss": 0.3281, "step": 22846 }, { "epoch": 12.763687150837988, "grad_norm": 0.39881834387779236, "learning_rate": 0.0003631932773109244, "loss": 0.3603, "step": 22847 }, { "epoch": 12.764245810055867, "grad_norm": 0.4261707365512848, "learning_rate": 0.0003631652661064426, "loss": 0.449, "step": 22848 }, { "epoch": 12.764804469273743, "grad_norm": 0.41769784688949585, "learning_rate": 0.0003631372549019608, "loss": 0.3626, "step": 22849 }, { "epoch": 12.76536312849162, "grad_norm": 0.4486564099788666, "learning_rate": 0.000363109243697479, "loss": 0.4275, "step": 22850 }, { "epoch": 12.765921787709498, "grad_norm": 0.4733645021915436, "learning_rate": 0.0003630812324929972, "loss": 0.3359, "step": 22851 }, { "epoch": 12.766480446927375, "grad_norm": 0.5320999622344971, "learning_rate": 0.0003630532212885154, "loss": 0.4027, "step": 22852 }, { "epoch": 12.767039106145251, "grad_norm": 0.48368027806282043, "learning_rate": 0.0003630252100840336, "loss": 0.3766, "step": 22853 }, { "epoch": 12.767597765363128, "grad_norm": 0.5685689449310303, "learning_rate": 0.00036299719887955187, "loss": 0.4203, "step": 22854 }, { "epoch": 12.768156424581006, "grad_norm": 0.40302178263664246, "learning_rate": 0.00036296918767507, "loss": 0.3017, "step": 22855 }, { "epoch": 12.768715083798883, "grad_norm": 0.7145180702209473, "learning_rate": 0.00036294117647058823, "loss": 0.3863, "step": 22856 }, { "epoch": 12.76927374301676, "grad_norm": 0.48648586869239807, "learning_rate": 0.00036291316526610644, "loss": 0.3131, "step": 22857 }, { "epoch": 12.769832402234638, "grad_norm": 0.574644148349762, "learning_rate": 0.00036288515406162464, "loss": 0.3018, "step": 22858 }, { "epoch": 12.770391061452514, "grad_norm": 0.4302155673503876, "learning_rate": 0.0003628571428571429, "loss": 0.4593, "step": 22859 }, { "epoch": 12.77094972067039, "grad_norm": 0.49935561418533325, "learning_rate": 0.00036282913165266105, "loss": 0.333, "step": 22860 }, { "epoch": 12.771508379888267, "grad_norm": 0.5210148692131042, "learning_rate": 0.00036280112044817926, "loss": 0.3756, "step": 22861 }, { "epoch": 12.772067039106146, "grad_norm": 0.4947070777416229, "learning_rate": 0.0003627731092436975, "loss": 0.4755, "step": 22862 }, { "epoch": 12.772625698324022, "grad_norm": 0.3514373302459717, "learning_rate": 0.00036274509803921567, "loss": 0.3161, "step": 22863 }, { "epoch": 12.773184357541899, "grad_norm": 0.4451432228088379, "learning_rate": 0.00036271708683473393, "loss": 0.4335, "step": 22864 }, { "epoch": 12.773743016759777, "grad_norm": 0.7320363521575928, "learning_rate": 0.0003626890756302521, "loss": 0.4721, "step": 22865 }, { "epoch": 12.774301675977654, "grad_norm": 0.41618093848228455, "learning_rate": 0.0003626610644257703, "loss": 0.3359, "step": 22866 }, { "epoch": 12.77486033519553, "grad_norm": 1.3386528491973877, "learning_rate": 0.00036263305322128855, "loss": 0.517, "step": 22867 }, { "epoch": 12.775418994413407, "grad_norm": 0.38059747219085693, "learning_rate": 0.0003626050420168067, "loss": 0.3733, "step": 22868 }, { "epoch": 12.775977653631285, "grad_norm": 0.8537725806236267, "learning_rate": 0.00036257703081232496, "loss": 0.3632, "step": 22869 }, { "epoch": 12.776536312849162, "grad_norm": 0.7909241318702698, "learning_rate": 0.00036254901960784317, "loss": 0.5222, "step": 22870 }, { "epoch": 12.777094972067038, "grad_norm": 0.5452353358268738, "learning_rate": 0.0003625210084033613, "loss": 0.4052, "step": 22871 }, { "epoch": 12.777653631284917, "grad_norm": 0.5343122482299805, "learning_rate": 0.0003624929971988796, "loss": 0.4686, "step": 22872 }, { "epoch": 12.778212290502793, "grad_norm": 0.4161309003829956, "learning_rate": 0.00036246498599439773, "loss": 0.3909, "step": 22873 }, { "epoch": 12.77877094972067, "grad_norm": 0.5088701248168945, "learning_rate": 0.000362436974789916, "loss": 0.4229, "step": 22874 }, { "epoch": 12.779329608938548, "grad_norm": 0.6684455275535583, "learning_rate": 0.0003624089635854342, "loss": 0.4262, "step": 22875 }, { "epoch": 12.779888268156425, "grad_norm": 0.39843887090682983, "learning_rate": 0.00036238095238095235, "loss": 0.3187, "step": 22876 }, { "epoch": 12.780446927374301, "grad_norm": 0.4985075294971466, "learning_rate": 0.0003623529411764706, "loss": 0.3945, "step": 22877 }, { "epoch": 12.78100558659218, "grad_norm": 0.33783870935440063, "learning_rate": 0.0003623249299719888, "loss": 0.3238, "step": 22878 }, { "epoch": 12.781564245810056, "grad_norm": 0.5383481383323669, "learning_rate": 0.000362296918767507, "loss": 0.3598, "step": 22879 }, { "epoch": 12.782122905027933, "grad_norm": 0.40740877389907837, "learning_rate": 0.00036226890756302523, "loss": 0.3878, "step": 22880 }, { "epoch": 12.78268156424581, "grad_norm": 3.1352524757385254, "learning_rate": 0.0003622408963585434, "loss": 0.4102, "step": 22881 }, { "epoch": 12.783240223463688, "grad_norm": 0.5671041011810303, "learning_rate": 0.00036221288515406164, "loss": 0.5092, "step": 22882 }, { "epoch": 12.783798882681564, "grad_norm": 0.6266922950744629, "learning_rate": 0.00036218487394957985, "loss": 0.4234, "step": 22883 }, { "epoch": 12.78435754189944, "grad_norm": 4.076899528503418, "learning_rate": 0.00036215686274509805, "loss": 0.3562, "step": 22884 }, { "epoch": 12.78491620111732, "grad_norm": 0.8048804402351379, "learning_rate": 0.00036212885154061626, "loss": 0.581, "step": 22885 }, { "epoch": 12.785474860335196, "grad_norm": 0.4180176854133606, "learning_rate": 0.00036210084033613446, "loss": 0.4466, "step": 22886 }, { "epoch": 12.786033519553072, "grad_norm": 1.5027180910110474, "learning_rate": 0.00036207282913165267, "loss": 0.4641, "step": 22887 }, { "epoch": 12.786592178770949, "grad_norm": 0.49694186449050903, "learning_rate": 0.0003620448179271709, "loss": 0.3743, "step": 22888 }, { "epoch": 12.787150837988827, "grad_norm": 0.569611668586731, "learning_rate": 0.0003620168067226891, "loss": 0.4991, "step": 22889 }, { "epoch": 12.787709497206704, "grad_norm": 0.4208122491836548, "learning_rate": 0.0003619887955182073, "loss": 0.4373, "step": 22890 }, { "epoch": 12.78826815642458, "grad_norm": 0.350697785615921, "learning_rate": 0.0003619607843137255, "loss": 0.336, "step": 22891 }, { "epoch": 12.788826815642459, "grad_norm": 0.39987149834632874, "learning_rate": 0.0003619327731092437, "loss": 0.3555, "step": 22892 }, { "epoch": 12.789385474860335, "grad_norm": 0.4770813584327698, "learning_rate": 0.0003619047619047619, "loss": 0.3663, "step": 22893 }, { "epoch": 12.789944134078212, "grad_norm": 0.4965968728065491, "learning_rate": 0.00036187675070028017, "loss": 0.3886, "step": 22894 }, { "epoch": 12.79050279329609, "grad_norm": 0.5654256939888, "learning_rate": 0.0003618487394957983, "loss": 0.4519, "step": 22895 }, { "epoch": 12.791061452513967, "grad_norm": 0.39105844497680664, "learning_rate": 0.0003618207282913165, "loss": 0.3464, "step": 22896 }, { "epoch": 12.791620111731843, "grad_norm": 0.39361634850502014, "learning_rate": 0.00036179271708683473, "loss": 0.3567, "step": 22897 }, { "epoch": 12.79217877094972, "grad_norm": 0.5732298493385315, "learning_rate": 0.00036176470588235294, "loss": 0.3563, "step": 22898 }, { "epoch": 12.792737430167598, "grad_norm": 0.8701691627502441, "learning_rate": 0.0003617366946778712, "loss": 0.4507, "step": 22899 }, { "epoch": 12.793296089385475, "grad_norm": 0.83237624168396, "learning_rate": 0.00036170868347338935, "loss": 0.4936, "step": 22900 }, { "epoch": 12.793854748603351, "grad_norm": 0.666620135307312, "learning_rate": 0.00036168067226890755, "loss": 0.465, "step": 22901 }, { "epoch": 12.79441340782123, "grad_norm": 0.42541056871414185, "learning_rate": 0.0003616526610644258, "loss": 0.3368, "step": 22902 }, { "epoch": 12.794972067039106, "grad_norm": 0.462836891412735, "learning_rate": 0.00036162464985994397, "loss": 0.4149, "step": 22903 }, { "epoch": 12.795530726256983, "grad_norm": 2.050976514816284, "learning_rate": 0.0003615966386554622, "loss": 0.8036, "step": 22904 }, { "epoch": 12.796089385474861, "grad_norm": 0.47639819979667664, "learning_rate": 0.0003615686274509804, "loss": 0.5363, "step": 22905 }, { "epoch": 12.796648044692738, "grad_norm": 3.898019790649414, "learning_rate": 0.0003615406162464986, "loss": 0.4147, "step": 22906 }, { "epoch": 12.797206703910614, "grad_norm": 0.48763981461524963, "learning_rate": 0.00036151260504201684, "loss": 0.5244, "step": 22907 }, { "epoch": 12.797765363128491, "grad_norm": 0.6783661246299744, "learning_rate": 0.000361484593837535, "loss": 0.3365, "step": 22908 }, { "epoch": 12.79832402234637, "grad_norm": 0.5876408219337463, "learning_rate": 0.00036145658263305326, "loss": 0.4194, "step": 22909 }, { "epoch": 12.798882681564246, "grad_norm": 0.44247081875801086, "learning_rate": 0.00036142857142857146, "loss": 0.4621, "step": 22910 }, { "epoch": 12.799441340782122, "grad_norm": 0.3946887254714966, "learning_rate": 0.0003614005602240896, "loss": 0.3529, "step": 22911 }, { "epoch": 12.8, "grad_norm": 0.6611924171447754, "learning_rate": 0.0003613725490196079, "loss": 0.4283, "step": 22912 }, { "epoch": 12.800558659217877, "grad_norm": 0.8747823238372803, "learning_rate": 0.000361344537815126, "loss": 0.6711, "step": 22913 }, { "epoch": 12.801117318435754, "grad_norm": 0.8581296801567078, "learning_rate": 0.0003613165266106443, "loss": 0.3636, "step": 22914 }, { "epoch": 12.80167597765363, "grad_norm": 0.4482992887496948, "learning_rate": 0.0003612885154061625, "loss": 0.4435, "step": 22915 }, { "epoch": 12.802234636871509, "grad_norm": 0.4412821829319, "learning_rate": 0.00036126050420168064, "loss": 0.4349, "step": 22916 }, { "epoch": 12.802793296089385, "grad_norm": 0.5532223582267761, "learning_rate": 0.0003612324929971989, "loss": 0.3913, "step": 22917 }, { "epoch": 12.803351955307262, "grad_norm": 3.270271062850952, "learning_rate": 0.0003612044817927171, "loss": 0.4952, "step": 22918 }, { "epoch": 12.80391061452514, "grad_norm": 0.8067111372947693, "learning_rate": 0.0003611764705882353, "loss": 0.5013, "step": 22919 }, { "epoch": 12.804469273743017, "grad_norm": 0.7011149525642395, "learning_rate": 0.0003611484593837535, "loss": 0.496, "step": 22920 }, { "epoch": 12.805027932960893, "grad_norm": 0.6165128946304321, "learning_rate": 0.0003611204481792717, "loss": 0.4906, "step": 22921 }, { "epoch": 12.805586592178772, "grad_norm": 0.3503299653530121, "learning_rate": 0.00036109243697478993, "loss": 0.2899, "step": 22922 }, { "epoch": 12.806145251396648, "grad_norm": 0.5084133744239807, "learning_rate": 0.00036106442577030814, "loss": 0.4163, "step": 22923 }, { "epoch": 12.806703910614525, "grad_norm": 0.7758415341377258, "learning_rate": 0.00036103641456582635, "loss": 0.3803, "step": 22924 }, { "epoch": 12.807262569832401, "grad_norm": 0.43046730756759644, "learning_rate": 0.00036100840336134455, "loss": 0.3943, "step": 22925 }, { "epoch": 12.80782122905028, "grad_norm": 0.7915734052658081, "learning_rate": 0.00036098039215686276, "loss": 0.3862, "step": 22926 }, { "epoch": 12.808379888268156, "grad_norm": 0.8833620548248291, "learning_rate": 0.00036095238095238096, "loss": 0.407, "step": 22927 }, { "epoch": 12.808938547486033, "grad_norm": 2.662686824798584, "learning_rate": 0.00036092436974789917, "loss": 0.5277, "step": 22928 }, { "epoch": 12.809497206703911, "grad_norm": 0.7573880553245544, "learning_rate": 0.0003608963585434173, "loss": 0.4419, "step": 22929 }, { "epoch": 12.810055865921788, "grad_norm": 0.4178950786590576, "learning_rate": 0.0003608683473389356, "loss": 0.4042, "step": 22930 }, { "epoch": 12.810614525139664, "grad_norm": 2.9414188861846924, "learning_rate": 0.0003608403361344538, "loss": 0.4143, "step": 22931 }, { "epoch": 12.811173184357543, "grad_norm": 0.43912386894226074, "learning_rate": 0.000360812324929972, "loss": 0.3899, "step": 22932 }, { "epoch": 12.81173184357542, "grad_norm": 1.2010248899459839, "learning_rate": 0.0003607843137254902, "loss": 0.4636, "step": 22933 }, { "epoch": 12.812290502793296, "grad_norm": 0.5587520599365234, "learning_rate": 0.0003607563025210084, "loss": 0.329, "step": 22934 }, { "epoch": 12.812849162011172, "grad_norm": 0.44351106882095337, "learning_rate": 0.0003607282913165266, "loss": 0.4201, "step": 22935 }, { "epoch": 12.81340782122905, "grad_norm": 0.5449080467224121, "learning_rate": 0.0003607002801120448, "loss": 0.3573, "step": 22936 }, { "epoch": 12.813966480446927, "grad_norm": 1.6266264915466309, "learning_rate": 0.000360672268907563, "loss": 0.485, "step": 22937 }, { "epoch": 12.814525139664804, "grad_norm": 0.44367125630378723, "learning_rate": 0.00036064425770308123, "loss": 0.3097, "step": 22938 }, { "epoch": 12.815083798882682, "grad_norm": 0.37211188673973083, "learning_rate": 0.00036061624649859944, "loss": 0.3652, "step": 22939 }, { "epoch": 12.815642458100559, "grad_norm": 0.8515840768814087, "learning_rate": 0.00036058823529411764, "loss": 0.5518, "step": 22940 }, { "epoch": 12.816201117318435, "grad_norm": 0.7962684035301208, "learning_rate": 0.00036056022408963585, "loss": 0.3652, "step": 22941 }, { "epoch": 12.816759776536312, "grad_norm": 0.37861326336860657, "learning_rate": 0.0003605322128851541, "loss": 0.4161, "step": 22942 }, { "epoch": 12.81731843575419, "grad_norm": 0.3585122525691986, "learning_rate": 0.00036050420168067226, "loss": 0.3956, "step": 22943 }, { "epoch": 12.817877094972067, "grad_norm": 0.634141206741333, "learning_rate": 0.00036047619047619047, "loss": 0.3857, "step": 22944 }, { "epoch": 12.818435754189943, "grad_norm": 0.5188333988189697, "learning_rate": 0.00036044817927170867, "loss": 0.4066, "step": 22945 }, { "epoch": 12.818994413407822, "grad_norm": 0.5290367007255554, "learning_rate": 0.0003604201680672269, "loss": 0.3227, "step": 22946 }, { "epoch": 12.819553072625698, "grad_norm": 0.7687883973121643, "learning_rate": 0.00036039215686274514, "loss": 0.4216, "step": 22947 }, { "epoch": 12.820111731843575, "grad_norm": 0.4438192844390869, "learning_rate": 0.0003603641456582633, "loss": 0.5563, "step": 22948 }, { "epoch": 12.820670391061453, "grad_norm": 0.5439226031303406, "learning_rate": 0.0003603361344537815, "loss": 0.3598, "step": 22949 }, { "epoch": 12.82122905027933, "grad_norm": 0.5417304039001465, "learning_rate": 0.00036030812324929976, "loss": 0.4334, "step": 22950 }, { "epoch": 12.821787709497206, "grad_norm": 0.4443223178386688, "learning_rate": 0.0003602801120448179, "loss": 0.4263, "step": 22951 }, { "epoch": 12.822346368715085, "grad_norm": 0.3897508978843689, "learning_rate": 0.00036025210084033617, "loss": 0.3734, "step": 22952 }, { "epoch": 12.822905027932961, "grad_norm": 0.4214841425418854, "learning_rate": 0.0003602240896358543, "loss": 0.4257, "step": 22953 }, { "epoch": 12.823463687150838, "grad_norm": 0.4963054060935974, "learning_rate": 0.0003601960784313725, "loss": 0.3706, "step": 22954 }, { "epoch": 12.824022346368714, "grad_norm": 0.8631716370582581, "learning_rate": 0.0003601680672268908, "loss": 0.3748, "step": 22955 }, { "epoch": 12.824581005586593, "grad_norm": 0.49519115686416626, "learning_rate": 0.00036014005602240894, "loss": 0.3873, "step": 22956 }, { "epoch": 12.82513966480447, "grad_norm": 1.7033321857452393, "learning_rate": 0.0003601120448179272, "loss": 0.5147, "step": 22957 }, { "epoch": 12.825698324022346, "grad_norm": 0.5086231827735901, "learning_rate": 0.0003600840336134454, "loss": 0.3751, "step": 22958 }, { "epoch": 12.826256983240224, "grad_norm": 2.3101048469543457, "learning_rate": 0.00036005602240896356, "loss": 0.3818, "step": 22959 }, { "epoch": 12.8268156424581, "grad_norm": 0.3681088387966156, "learning_rate": 0.0003600280112044818, "loss": 0.345, "step": 22960 }, { "epoch": 12.827374301675977, "grad_norm": 0.560340166091919, "learning_rate": 0.00035999999999999997, "loss": 0.3633, "step": 22961 }, { "epoch": 12.827932960893854, "grad_norm": 0.9640902280807495, "learning_rate": 0.00035997198879551823, "loss": 0.426, "step": 22962 }, { "epoch": 12.828491620111732, "grad_norm": 0.5351158380508423, "learning_rate": 0.00035994397759103643, "loss": 0.3972, "step": 22963 }, { "epoch": 12.829050279329609, "grad_norm": 0.5665839910507202, "learning_rate": 0.0003599159663865546, "loss": 0.4003, "step": 22964 }, { "epoch": 12.829608938547485, "grad_norm": 0.48218879103660583, "learning_rate": 0.00035988795518207285, "loss": 0.3298, "step": 22965 }, { "epoch": 12.830167597765364, "grad_norm": 0.5230398178100586, "learning_rate": 0.00035985994397759105, "loss": 0.5165, "step": 22966 }, { "epoch": 12.83072625698324, "grad_norm": 0.6515301465988159, "learning_rate": 0.00035983193277310926, "loss": 0.4461, "step": 22967 }, { "epoch": 12.831284916201117, "grad_norm": 0.8557658195495605, "learning_rate": 0.00035980392156862746, "loss": 0.4364, "step": 22968 }, { "epoch": 12.831843575418995, "grad_norm": 0.4500856101512909, "learning_rate": 0.0003597759103641456, "loss": 0.3393, "step": 22969 }, { "epoch": 12.832402234636872, "grad_norm": 0.5157396793365479, "learning_rate": 0.0003597478991596639, "loss": 0.3789, "step": 22970 }, { "epoch": 12.832960893854748, "grad_norm": 1.061504602432251, "learning_rate": 0.0003597198879551821, "loss": 0.4093, "step": 22971 }, { "epoch": 12.833519553072625, "grad_norm": 1.2061611413955688, "learning_rate": 0.0003596918767507003, "loss": 0.3858, "step": 22972 }, { "epoch": 12.834078212290503, "grad_norm": 0.369796484708786, "learning_rate": 0.0003596638655462185, "loss": 0.3654, "step": 22973 }, { "epoch": 12.83463687150838, "grad_norm": 0.5278509259223938, "learning_rate": 0.0003596358543417367, "loss": 0.4444, "step": 22974 }, { "epoch": 12.835195530726256, "grad_norm": 0.7993066906929016, "learning_rate": 0.0003596078431372549, "loss": 0.435, "step": 22975 }, { "epoch": 12.835754189944135, "grad_norm": 0.4012240171432495, "learning_rate": 0.0003595798319327731, "loss": 0.3085, "step": 22976 }, { "epoch": 12.836312849162011, "grad_norm": 0.49167600274086, "learning_rate": 0.0003595518207282913, "loss": 0.4289, "step": 22977 }, { "epoch": 12.836871508379888, "grad_norm": 0.6099218130111694, "learning_rate": 0.0003595238095238095, "loss": 0.4413, "step": 22978 }, { "epoch": 12.837430167597766, "grad_norm": 0.38181930780410767, "learning_rate": 0.00035949579831932773, "loss": 0.341, "step": 22979 }, { "epoch": 12.837988826815643, "grad_norm": 0.3030468821525574, "learning_rate": 0.00035946778711484594, "loss": 0.3439, "step": 22980 }, { "epoch": 12.83854748603352, "grad_norm": 0.5551451444625854, "learning_rate": 0.00035943977591036414, "loss": 0.3628, "step": 22981 }, { "epoch": 12.839106145251396, "grad_norm": 0.5391368269920349, "learning_rate": 0.0003594117647058824, "loss": 0.4453, "step": 22982 }, { "epoch": 12.839664804469274, "grad_norm": 0.4426036477088928, "learning_rate": 0.00035938375350140055, "loss": 0.4381, "step": 22983 }, { "epoch": 12.84022346368715, "grad_norm": 0.7820568680763245, "learning_rate": 0.00035935574229691876, "loss": 0.4928, "step": 22984 }, { "epoch": 12.840782122905027, "grad_norm": 0.5407456755638123, "learning_rate": 0.00035932773109243697, "loss": 0.4277, "step": 22985 }, { "epoch": 12.841340782122906, "grad_norm": 0.44331902265548706, "learning_rate": 0.00035929971988795517, "loss": 0.3223, "step": 22986 }, { "epoch": 12.841899441340782, "grad_norm": 0.6615450382232666, "learning_rate": 0.00035927170868347343, "loss": 0.5106, "step": 22987 }, { "epoch": 12.842458100558659, "grad_norm": 0.6417344212532043, "learning_rate": 0.0003592436974789916, "loss": 0.4632, "step": 22988 }, { "epoch": 12.843016759776535, "grad_norm": 0.4910949170589447, "learning_rate": 0.0003592156862745098, "loss": 0.4686, "step": 22989 }, { "epoch": 12.843575418994414, "grad_norm": 0.6603783965110779, "learning_rate": 0.00035918767507002805, "loss": 0.4865, "step": 22990 }, { "epoch": 12.84413407821229, "grad_norm": 0.36142870783805847, "learning_rate": 0.0003591596638655462, "loss": 0.3994, "step": 22991 }, { "epoch": 12.844692737430167, "grad_norm": 0.3348754048347473, "learning_rate": 0.00035913165266106446, "loss": 0.2924, "step": 22992 }, { "epoch": 12.845251396648045, "grad_norm": 0.38223209977149963, "learning_rate": 0.0003591036414565826, "loss": 0.3638, "step": 22993 }, { "epoch": 12.845810055865922, "grad_norm": 4.185999393463135, "learning_rate": 0.0003590756302521008, "loss": 0.3598, "step": 22994 }, { "epoch": 12.846368715083798, "grad_norm": 0.5923586487770081, "learning_rate": 0.0003590476190476191, "loss": 0.5234, "step": 22995 }, { "epoch": 12.846927374301677, "grad_norm": 0.4538070857524872, "learning_rate": 0.00035901960784313723, "loss": 0.3444, "step": 22996 }, { "epoch": 12.847486033519553, "grad_norm": 0.4295232892036438, "learning_rate": 0.0003589915966386555, "loss": 0.3595, "step": 22997 }, { "epoch": 12.84804469273743, "grad_norm": 0.4248203933238983, "learning_rate": 0.0003589635854341737, "loss": 0.4056, "step": 22998 }, { "epoch": 12.848603351955306, "grad_norm": 0.4672292470932007, "learning_rate": 0.00035893557422969185, "loss": 0.4036, "step": 22999 }, { "epoch": 12.849162011173185, "grad_norm": 0.5464761257171631, "learning_rate": 0.0003589075630252101, "loss": 0.4748, "step": 23000 }, { "epoch": 12.849162011173185, "eval_cer": 0.08653290126891645, "eval_loss": 0.3306316137313843, "eval_runtime": 55.5835, "eval_samples_per_second": 81.643, "eval_steps_per_second": 5.109, "eval_wer": 0.34015809614256587, "step": 23000 }, { "epoch": 12.849720670391061, "grad_norm": 0.40832653641700745, "learning_rate": 0.00035887955182072826, "loss": 0.3351, "step": 23001 }, { "epoch": 12.850279329608938, "grad_norm": 0.419050395488739, "learning_rate": 0.0003588515406162465, "loss": 0.3767, "step": 23002 }, { "epoch": 12.850837988826816, "grad_norm": 0.4054844379425049, "learning_rate": 0.00035882352941176473, "loss": 0.3843, "step": 23003 }, { "epoch": 12.851396648044693, "grad_norm": 0.979784369468689, "learning_rate": 0.0003587955182072829, "loss": 0.3915, "step": 23004 }, { "epoch": 12.85195530726257, "grad_norm": 0.8679656982421875, "learning_rate": 0.00035876750700280114, "loss": 0.3773, "step": 23005 }, { "epoch": 12.852513966480448, "grad_norm": 0.4106649160385132, "learning_rate": 0.00035873949579831935, "loss": 0.4321, "step": 23006 }, { "epoch": 12.853072625698324, "grad_norm": 0.4178391695022583, "learning_rate": 0.00035871148459383755, "loss": 0.4791, "step": 23007 }, { "epoch": 12.8536312849162, "grad_norm": 1.3904122114181519, "learning_rate": 0.00035868347338935576, "loss": 0.3309, "step": 23008 }, { "epoch": 12.854189944134077, "grad_norm": 0.5001409649848938, "learning_rate": 0.0003586554621848739, "loss": 0.4192, "step": 23009 }, { "epoch": 12.854748603351956, "grad_norm": 0.5209900736808777, "learning_rate": 0.00035862745098039217, "loss": 0.4104, "step": 23010 }, { "epoch": 12.855307262569832, "grad_norm": 0.4196328818798065, "learning_rate": 0.0003585994397759104, "loss": 0.3401, "step": 23011 }, { "epoch": 12.855865921787709, "grad_norm": 0.5003356337547302, "learning_rate": 0.0003585714285714286, "loss": 0.5244, "step": 23012 }, { "epoch": 12.856424581005587, "grad_norm": 0.46145743131637573, "learning_rate": 0.0003585434173669468, "loss": 0.3921, "step": 23013 }, { "epoch": 12.856983240223464, "grad_norm": 0.5887634754180908, "learning_rate": 0.000358515406162465, "loss": 0.4759, "step": 23014 }, { "epoch": 12.85754189944134, "grad_norm": 0.9846682548522949, "learning_rate": 0.0003584873949579832, "loss": 0.4528, "step": 23015 }, { "epoch": 12.858100558659217, "grad_norm": 2.030968427658081, "learning_rate": 0.0003584593837535014, "loss": 0.4218, "step": 23016 }, { "epoch": 12.858659217877095, "grad_norm": 0.9259823560714722, "learning_rate": 0.00035843137254901967, "loss": 0.6358, "step": 23017 }, { "epoch": 12.859217877094972, "grad_norm": 0.5755375623703003, "learning_rate": 0.0003584033613445378, "loss": 0.4643, "step": 23018 }, { "epoch": 12.859776536312848, "grad_norm": 2.5767340660095215, "learning_rate": 0.000358375350140056, "loss": 0.4682, "step": 23019 }, { "epoch": 12.860335195530727, "grad_norm": 0.5438586473464966, "learning_rate": 0.00035834733893557423, "loss": 0.575, "step": 23020 }, { "epoch": 12.860893854748603, "grad_norm": 1.0188018083572388, "learning_rate": 0.00035831932773109244, "loss": 0.5526, "step": 23021 }, { "epoch": 12.86145251396648, "grad_norm": 0.6130913496017456, "learning_rate": 0.0003582913165266107, "loss": 0.4491, "step": 23022 }, { "epoch": 12.862011173184358, "grad_norm": 0.6643819212913513, "learning_rate": 0.00035826330532212885, "loss": 0.5539, "step": 23023 }, { "epoch": 12.862569832402235, "grad_norm": 0.3844551146030426, "learning_rate": 0.00035823529411764705, "loss": 0.3479, "step": 23024 }, { "epoch": 12.863128491620111, "grad_norm": 1.7012395858764648, "learning_rate": 0.0003582072829131653, "loss": 0.4918, "step": 23025 }, { "epoch": 12.86368715083799, "grad_norm": 0.6724205613136292, "learning_rate": 0.00035817927170868347, "loss": 0.3504, "step": 23026 }, { "epoch": 12.864245810055866, "grad_norm": 0.6052526235580444, "learning_rate": 0.0003581512605042017, "loss": 0.5405, "step": 23027 }, { "epoch": 12.864804469273743, "grad_norm": 0.707908570766449, "learning_rate": 0.0003581232492997199, "loss": 0.3254, "step": 23028 }, { "epoch": 12.86536312849162, "grad_norm": 0.3449500799179077, "learning_rate": 0.0003580952380952381, "loss": 0.3633, "step": 23029 }, { "epoch": 12.865921787709498, "grad_norm": 6.125621795654297, "learning_rate": 0.00035806722689075634, "loss": 0.5264, "step": 23030 }, { "epoch": 12.866480446927374, "grad_norm": 0.4929667115211487, "learning_rate": 0.0003580392156862745, "loss": 0.37, "step": 23031 }, { "epoch": 12.867039106145251, "grad_norm": 0.6589158177375793, "learning_rate": 0.00035801120448179276, "loss": 0.4408, "step": 23032 }, { "epoch": 12.86759776536313, "grad_norm": 0.5033377408981323, "learning_rate": 0.00035798319327731096, "loss": 0.4684, "step": 23033 }, { "epoch": 12.868156424581006, "grad_norm": 0.9584864974021912, "learning_rate": 0.0003579551820728291, "loss": 0.3205, "step": 23034 }, { "epoch": 12.868715083798882, "grad_norm": 2.052773952484131, "learning_rate": 0.0003579271708683474, "loss": 0.3615, "step": 23035 }, { "epoch": 12.869273743016759, "grad_norm": 0.3121645450592041, "learning_rate": 0.0003578991596638655, "loss": 0.2918, "step": 23036 }, { "epoch": 12.869832402234637, "grad_norm": 0.501052737236023, "learning_rate": 0.0003578711484593838, "loss": 0.3291, "step": 23037 }, { "epoch": 12.870391061452514, "grad_norm": 0.699521541595459, "learning_rate": 0.000357843137254902, "loss": 0.3831, "step": 23038 }, { "epoch": 12.87094972067039, "grad_norm": 0.5161980986595154, "learning_rate": 0.00035781512605042014, "loss": 0.4041, "step": 23039 }, { "epoch": 12.871508379888269, "grad_norm": 1.0380103588104248, "learning_rate": 0.0003577871148459384, "loss": 0.4158, "step": 23040 }, { "epoch": 12.872067039106145, "grad_norm": 0.4340762495994568, "learning_rate": 0.0003577591036414566, "loss": 0.3768, "step": 23041 }, { "epoch": 12.872625698324022, "grad_norm": 1.3014675378799438, "learning_rate": 0.00035773109243697476, "loss": 0.4367, "step": 23042 }, { "epoch": 12.8731843575419, "grad_norm": 0.4287246763706207, "learning_rate": 0.000357703081232493, "loss": 0.3806, "step": 23043 }, { "epoch": 12.873743016759777, "grad_norm": 0.43329617381095886, "learning_rate": 0.0003576750700280112, "loss": 0.3864, "step": 23044 }, { "epoch": 12.874301675977653, "grad_norm": 0.40220052003860474, "learning_rate": 0.00035764705882352943, "loss": 0.3513, "step": 23045 }, { "epoch": 12.87486033519553, "grad_norm": 11.074256896972656, "learning_rate": 0.00035761904761904764, "loss": 0.3399, "step": 23046 }, { "epoch": 12.875418994413408, "grad_norm": 0.5627591609954834, "learning_rate": 0.0003575910364145658, "loss": 0.402, "step": 23047 }, { "epoch": 12.875977653631285, "grad_norm": 0.3989485204219818, "learning_rate": 0.00035756302521008405, "loss": 0.4485, "step": 23048 }, { "epoch": 12.876536312849161, "grad_norm": 0.8534471988677979, "learning_rate": 0.00035753501400560226, "loss": 0.4582, "step": 23049 }, { "epoch": 12.87709497206704, "grad_norm": 1.5509145259857178, "learning_rate": 0.00035750700280112046, "loss": 0.3113, "step": 23050 }, { "epoch": 12.877653631284916, "grad_norm": 0.5349453687667847, "learning_rate": 0.00035747899159663867, "loss": 0.3604, "step": 23051 }, { "epoch": 12.878212290502793, "grad_norm": 0.5117002725601196, "learning_rate": 0.0003574509803921568, "loss": 0.5092, "step": 23052 }, { "epoch": 12.878770949720671, "grad_norm": 2.3256473541259766, "learning_rate": 0.0003574229691876751, "loss": 0.4547, "step": 23053 }, { "epoch": 12.879329608938548, "grad_norm": 0.5117779970169067, "learning_rate": 0.0003573949579831933, "loss": 0.483, "step": 23054 }, { "epoch": 12.879888268156424, "grad_norm": 0.48029854893684387, "learning_rate": 0.0003573669467787115, "loss": 0.3635, "step": 23055 }, { "epoch": 12.880446927374301, "grad_norm": 0.4011361300945282, "learning_rate": 0.0003573389355742297, "loss": 0.4259, "step": 23056 }, { "epoch": 12.88100558659218, "grad_norm": 0.5677130818367004, "learning_rate": 0.0003573109243697479, "loss": 0.5313, "step": 23057 }, { "epoch": 12.881564245810056, "grad_norm": 0.48748213052749634, "learning_rate": 0.0003572829131652661, "loss": 0.4961, "step": 23058 }, { "epoch": 12.882122905027932, "grad_norm": 0.4813239574432373, "learning_rate": 0.0003572549019607843, "loss": 0.3753, "step": 23059 }, { "epoch": 12.88268156424581, "grad_norm": 0.44387978315353394, "learning_rate": 0.0003572268907563025, "loss": 0.4695, "step": 23060 }, { "epoch": 12.883240223463687, "grad_norm": 0.43491995334625244, "learning_rate": 0.00035719887955182073, "loss": 0.3946, "step": 23061 }, { "epoch": 12.883798882681564, "grad_norm": 0.5463988780975342, "learning_rate": 0.00035717086834733894, "loss": 0.4322, "step": 23062 }, { "epoch": 12.88435754189944, "grad_norm": 0.6533722877502441, "learning_rate": 0.00035714285714285714, "loss": 0.3946, "step": 23063 }, { "epoch": 12.884916201117319, "grad_norm": 0.43979984521865845, "learning_rate": 0.00035711484593837535, "loss": 0.4218, "step": 23064 }, { "epoch": 12.885474860335195, "grad_norm": 0.4594962000846863, "learning_rate": 0.0003570868347338936, "loss": 0.5339, "step": 23065 }, { "epoch": 12.886033519553072, "grad_norm": 0.5154690146446228, "learning_rate": 0.00035705882352941176, "loss": 0.3145, "step": 23066 }, { "epoch": 12.88659217877095, "grad_norm": 0.5820730328559875, "learning_rate": 0.00035703081232492997, "loss": 0.4964, "step": 23067 }, { "epoch": 12.887150837988827, "grad_norm": 0.5742661952972412, "learning_rate": 0.00035700280112044817, "loss": 0.436, "step": 23068 }, { "epoch": 12.887709497206703, "grad_norm": 0.45133382081985474, "learning_rate": 0.0003569747899159664, "loss": 0.3889, "step": 23069 }, { "epoch": 12.888268156424582, "grad_norm": 2.184577703475952, "learning_rate": 0.00035694677871148464, "loss": 0.4346, "step": 23070 }, { "epoch": 12.888826815642458, "grad_norm": 0.5054942965507507, "learning_rate": 0.0003569187675070028, "loss": 0.3372, "step": 23071 }, { "epoch": 12.889385474860335, "grad_norm": 0.5646455883979797, "learning_rate": 0.000356890756302521, "loss": 0.4239, "step": 23072 }, { "epoch": 12.889944134078211, "grad_norm": 0.47510045766830444, "learning_rate": 0.00035686274509803926, "loss": 0.3814, "step": 23073 }, { "epoch": 12.89050279329609, "grad_norm": 0.3632448613643646, "learning_rate": 0.0003568347338935574, "loss": 0.4174, "step": 23074 }, { "epoch": 12.891061452513966, "grad_norm": 0.5906867384910583, "learning_rate": 0.00035680672268907567, "loss": 0.5197, "step": 23075 }, { "epoch": 12.891620111731843, "grad_norm": 0.4474334120750427, "learning_rate": 0.0003567787114845938, "loss": 0.4522, "step": 23076 }, { "epoch": 12.892178770949721, "grad_norm": 0.7047088742256165, "learning_rate": 0.000356750700280112, "loss": 0.3908, "step": 23077 }, { "epoch": 12.892737430167598, "grad_norm": 4.913631916046143, "learning_rate": 0.0003567226890756303, "loss": 0.4105, "step": 23078 }, { "epoch": 12.893296089385474, "grad_norm": 0.3191641569137573, "learning_rate": 0.00035669467787114844, "loss": 0.2841, "step": 23079 }, { "epoch": 12.893854748603353, "grad_norm": 0.5091173052787781, "learning_rate": 0.0003566666666666667, "loss": 0.5345, "step": 23080 }, { "epoch": 12.89441340782123, "grad_norm": 0.9211945533752441, "learning_rate": 0.0003566386554621849, "loss": 0.4845, "step": 23081 }, { "epoch": 12.894972067039106, "grad_norm": 0.5055422186851501, "learning_rate": 0.00035661064425770306, "loss": 0.4182, "step": 23082 }, { "epoch": 12.895530726256982, "grad_norm": 0.8235592842102051, "learning_rate": 0.0003565826330532213, "loss": 0.4562, "step": 23083 }, { "epoch": 12.89608938547486, "grad_norm": 0.4405900835990906, "learning_rate": 0.00035655462184873947, "loss": 0.4622, "step": 23084 }, { "epoch": 12.896648044692737, "grad_norm": 2.4743685722351074, "learning_rate": 0.00035652661064425773, "loss": 0.3816, "step": 23085 }, { "epoch": 12.897206703910614, "grad_norm": 0.6617835164070129, "learning_rate": 0.00035649859943977593, "loss": 0.4124, "step": 23086 }, { "epoch": 12.897765363128492, "grad_norm": 0.36478501558303833, "learning_rate": 0.0003564705882352941, "loss": 0.3631, "step": 23087 }, { "epoch": 12.898324022346369, "grad_norm": 0.7991409301757812, "learning_rate": 0.00035644257703081235, "loss": 0.4525, "step": 23088 }, { "epoch": 12.898882681564245, "grad_norm": 0.7258668541908264, "learning_rate": 0.00035641456582633055, "loss": 0.4349, "step": 23089 }, { "epoch": 12.899441340782122, "grad_norm": 0.33859020471572876, "learning_rate": 0.00035638655462184876, "loss": 0.4368, "step": 23090 }, { "epoch": 12.9, "grad_norm": 0.3822377920150757, "learning_rate": 0.00035635854341736696, "loss": 0.4163, "step": 23091 }, { "epoch": 12.900558659217877, "grad_norm": 0.7589262127876282, "learning_rate": 0.0003563305322128851, "loss": 0.4327, "step": 23092 }, { "epoch": 12.901117318435753, "grad_norm": 0.7192412614822388, "learning_rate": 0.0003563025210084034, "loss": 0.3176, "step": 23093 }, { "epoch": 12.901675977653632, "grad_norm": 1.1496564149856567, "learning_rate": 0.0003562745098039216, "loss": 0.4879, "step": 23094 }, { "epoch": 12.902234636871508, "grad_norm": 0.40294763445854187, "learning_rate": 0.0003562464985994398, "loss": 0.3654, "step": 23095 }, { "epoch": 12.902793296089385, "grad_norm": 0.5239118933677673, "learning_rate": 0.000356218487394958, "loss": 0.479, "step": 23096 }, { "epoch": 12.903351955307263, "grad_norm": 0.43779295682907104, "learning_rate": 0.0003561904761904762, "loss": 0.3349, "step": 23097 }, { "epoch": 12.90391061452514, "grad_norm": 0.6125030517578125, "learning_rate": 0.0003561624649859944, "loss": 0.3977, "step": 23098 }, { "epoch": 12.904469273743016, "grad_norm": 0.39839741587638855, "learning_rate": 0.0003561344537815126, "loss": 0.2895, "step": 23099 }, { "epoch": 12.905027932960895, "grad_norm": 5.303694248199463, "learning_rate": 0.0003561064425770308, "loss": 0.4634, "step": 23100 }, { "epoch": 12.905586592178771, "grad_norm": 1.5205161571502686, "learning_rate": 0.000356078431372549, "loss": 0.4677, "step": 23101 }, { "epoch": 12.906145251396648, "grad_norm": 0.39830920100212097, "learning_rate": 0.00035605042016806723, "loss": 0.384, "step": 23102 }, { "epoch": 12.906703910614524, "grad_norm": 1.0327866077423096, "learning_rate": 0.00035602240896358544, "loss": 0.3333, "step": 23103 }, { "epoch": 12.907262569832403, "grad_norm": 0.9274595379829407, "learning_rate": 0.00035599439775910364, "loss": 0.5955, "step": 23104 }, { "epoch": 12.90782122905028, "grad_norm": 0.45628514885902405, "learning_rate": 0.0003559663865546219, "loss": 0.3501, "step": 23105 }, { "epoch": 12.908379888268156, "grad_norm": 0.4545165002346039, "learning_rate": 0.00035593837535014005, "loss": 0.3669, "step": 23106 }, { "epoch": 12.908938547486034, "grad_norm": 3.164350748062134, "learning_rate": 0.00035591036414565826, "loss": 0.3939, "step": 23107 }, { "epoch": 12.90949720670391, "grad_norm": 0.5116806626319885, "learning_rate": 0.00035588235294117647, "loss": 0.4668, "step": 23108 }, { "epoch": 12.910055865921787, "grad_norm": 0.3244902491569519, "learning_rate": 0.00035585434173669467, "loss": 0.3115, "step": 23109 }, { "epoch": 12.910614525139664, "grad_norm": 0.6180371046066284, "learning_rate": 0.00035582633053221293, "loss": 0.4601, "step": 23110 }, { "epoch": 12.911173184357542, "grad_norm": 0.40666645765304565, "learning_rate": 0.0003557983193277311, "loss": 0.35, "step": 23111 }, { "epoch": 12.911731843575419, "grad_norm": 0.6671972274780273, "learning_rate": 0.0003557703081232493, "loss": 0.3457, "step": 23112 }, { "epoch": 12.912290502793295, "grad_norm": 0.4347480237483978, "learning_rate": 0.00035574229691876755, "loss": 0.3041, "step": 23113 }, { "epoch": 12.912849162011174, "grad_norm": 0.39989516139030457, "learning_rate": 0.0003557142857142857, "loss": 0.4196, "step": 23114 }, { "epoch": 12.91340782122905, "grad_norm": 0.44127461314201355, "learning_rate": 0.00035568627450980396, "loss": 0.3872, "step": 23115 }, { "epoch": 12.913966480446927, "grad_norm": 0.5842620134353638, "learning_rate": 0.0003556582633053221, "loss": 0.489, "step": 23116 }, { "epoch": 12.914525139664804, "grad_norm": 0.9391373991966248, "learning_rate": 0.0003556302521008403, "loss": 0.5214, "step": 23117 }, { "epoch": 12.915083798882682, "grad_norm": 0.7196061611175537, "learning_rate": 0.0003556022408963586, "loss": 0.3244, "step": 23118 }, { "epoch": 12.915642458100558, "grad_norm": 0.630444347858429, "learning_rate": 0.00035557422969187673, "loss": 0.5156, "step": 23119 }, { "epoch": 12.916201117318435, "grad_norm": 0.7112671732902527, "learning_rate": 0.000355546218487395, "loss": 0.4456, "step": 23120 }, { "epoch": 12.916759776536313, "grad_norm": 0.4061303436756134, "learning_rate": 0.0003555182072829132, "loss": 0.3914, "step": 23121 }, { "epoch": 12.91731843575419, "grad_norm": 0.6440028548240662, "learning_rate": 0.00035549019607843135, "loss": 0.4317, "step": 23122 }, { "epoch": 12.917877094972066, "grad_norm": 5.443751335144043, "learning_rate": 0.0003554621848739496, "loss": 0.3821, "step": 23123 }, { "epoch": 12.918435754189945, "grad_norm": 0.4395512342453003, "learning_rate": 0.00035543417366946776, "loss": 0.4109, "step": 23124 }, { "epoch": 12.918994413407821, "grad_norm": 0.6705725789070129, "learning_rate": 0.000355406162464986, "loss": 0.4778, "step": 23125 }, { "epoch": 12.919553072625698, "grad_norm": 0.669812023639679, "learning_rate": 0.00035537815126050423, "loss": 0.3954, "step": 23126 }, { "epoch": 12.920111731843576, "grad_norm": 6.2944865226745605, "learning_rate": 0.0003553501400560224, "loss": 0.5463, "step": 23127 }, { "epoch": 12.920670391061453, "grad_norm": 0.34514120221138, "learning_rate": 0.00035532212885154064, "loss": 0.3698, "step": 23128 }, { "epoch": 12.92122905027933, "grad_norm": 0.4975703954696655, "learning_rate": 0.00035529411764705885, "loss": 0.54, "step": 23129 }, { "epoch": 12.921787709497206, "grad_norm": 0.5599039196968079, "learning_rate": 0.00035526610644257705, "loss": 0.4116, "step": 23130 }, { "epoch": 12.922346368715084, "grad_norm": 0.5498924255371094, "learning_rate": 0.00035523809523809526, "loss": 0.4792, "step": 23131 }, { "epoch": 12.922905027932961, "grad_norm": 0.5034617781639099, "learning_rate": 0.0003552100840336134, "loss": 0.4584, "step": 23132 }, { "epoch": 12.923463687150837, "grad_norm": 0.6296890377998352, "learning_rate": 0.00035518207282913167, "loss": 0.4363, "step": 23133 }, { "epoch": 12.924022346368716, "grad_norm": 5.106089115142822, "learning_rate": 0.0003551540616246499, "loss": 0.3957, "step": 23134 }, { "epoch": 12.924581005586592, "grad_norm": 0.8492132425308228, "learning_rate": 0.0003551260504201681, "loss": 0.3406, "step": 23135 }, { "epoch": 12.925139664804469, "grad_norm": 1.0980751514434814, "learning_rate": 0.0003550980392156863, "loss": 0.401, "step": 23136 }, { "epoch": 12.925698324022346, "grad_norm": 0.5778860449790955, "learning_rate": 0.0003550700280112045, "loss": 0.4991, "step": 23137 }, { "epoch": 12.926256983240224, "grad_norm": 0.5193675756454468, "learning_rate": 0.0003550420168067227, "loss": 0.3746, "step": 23138 }, { "epoch": 12.9268156424581, "grad_norm": 1.41737699508667, "learning_rate": 0.0003550140056022409, "loss": 0.3229, "step": 23139 }, { "epoch": 12.927374301675977, "grad_norm": 1.3138048648834229, "learning_rate": 0.0003549859943977591, "loss": 0.4481, "step": 23140 }, { "epoch": 12.927932960893855, "grad_norm": 0.411640465259552, "learning_rate": 0.0003549579831932773, "loss": 0.4498, "step": 23141 }, { "epoch": 12.928491620111732, "grad_norm": 0.6565712094306946, "learning_rate": 0.0003549299719887955, "loss": 0.5351, "step": 23142 }, { "epoch": 12.929050279329608, "grad_norm": 0.5316324830055237, "learning_rate": 0.00035490196078431373, "loss": 0.3879, "step": 23143 }, { "epoch": 12.929608938547487, "grad_norm": 0.4305028021335602, "learning_rate": 0.00035487394957983194, "loss": 0.3697, "step": 23144 }, { "epoch": 12.930167597765363, "grad_norm": 0.5711145401000977, "learning_rate": 0.0003548459383753502, "loss": 0.3749, "step": 23145 }, { "epoch": 12.93072625698324, "grad_norm": 0.6483446955680847, "learning_rate": 0.00035481792717086835, "loss": 0.5618, "step": 23146 }, { "epoch": 12.931284916201117, "grad_norm": 0.4535178542137146, "learning_rate": 0.00035478991596638655, "loss": 0.4038, "step": 23147 }, { "epoch": 12.931843575418995, "grad_norm": 0.6860631108283997, "learning_rate": 0.00035476190476190476, "loss": 0.5147, "step": 23148 }, { "epoch": 12.932402234636871, "grad_norm": 0.5085126757621765, "learning_rate": 0.00035473389355742297, "loss": 0.4812, "step": 23149 }, { "epoch": 12.932960893854748, "grad_norm": 0.37389346957206726, "learning_rate": 0.0003547058823529412, "loss": 0.3888, "step": 23150 }, { "epoch": 12.933519553072626, "grad_norm": 0.4842715561389923, "learning_rate": 0.0003546778711484594, "loss": 0.5799, "step": 23151 }, { "epoch": 12.934078212290503, "grad_norm": 0.3209175169467926, "learning_rate": 0.0003546498599439776, "loss": 0.3765, "step": 23152 }, { "epoch": 12.93463687150838, "grad_norm": 0.371474951505661, "learning_rate": 0.00035462184873949584, "loss": 0.4479, "step": 23153 }, { "epoch": 12.935195530726258, "grad_norm": 0.46804267168045044, "learning_rate": 0.000354593837535014, "loss": 0.3831, "step": 23154 }, { "epoch": 12.935754189944134, "grad_norm": 0.43284279108047485, "learning_rate": 0.0003545658263305322, "loss": 0.4451, "step": 23155 }, { "epoch": 12.936312849162011, "grad_norm": 0.41407787799835205, "learning_rate": 0.0003545378151260504, "loss": 0.3459, "step": 23156 }, { "epoch": 12.936871508379888, "grad_norm": 0.39946722984313965, "learning_rate": 0.0003545098039215686, "loss": 0.3971, "step": 23157 }, { "epoch": 12.937430167597766, "grad_norm": 2.627351760864258, "learning_rate": 0.0003544817927170869, "loss": 0.4448, "step": 23158 }, { "epoch": 12.937988826815642, "grad_norm": 0.5211644172668457, "learning_rate": 0.000354453781512605, "loss": 0.4807, "step": 23159 }, { "epoch": 12.938547486033519, "grad_norm": 0.4213368594646454, "learning_rate": 0.00035442577030812323, "loss": 0.4305, "step": 23160 }, { "epoch": 12.939106145251397, "grad_norm": 0.49920469522476196, "learning_rate": 0.0003543977591036415, "loss": 0.4454, "step": 23161 }, { "epoch": 12.939664804469274, "grad_norm": 0.6270092129707336, "learning_rate": 0.00035436974789915964, "loss": 0.3566, "step": 23162 }, { "epoch": 12.94022346368715, "grad_norm": 0.40327441692352295, "learning_rate": 0.0003543417366946779, "loss": 0.3618, "step": 23163 }, { "epoch": 12.940782122905027, "grad_norm": 0.3988189697265625, "learning_rate": 0.00035431372549019606, "loss": 0.4086, "step": 23164 }, { "epoch": 12.941340782122905, "grad_norm": 0.41284316778182983, "learning_rate": 0.00035428571428571426, "loss": 0.406, "step": 23165 }, { "epoch": 12.941899441340782, "grad_norm": 0.5242443680763245, "learning_rate": 0.0003542577030812325, "loss": 0.4835, "step": 23166 }, { "epoch": 12.942458100558659, "grad_norm": 0.37576621770858765, "learning_rate": 0.0003542296918767507, "loss": 0.3588, "step": 23167 }, { "epoch": 12.943016759776537, "grad_norm": 0.5130129456520081, "learning_rate": 0.00035420168067226893, "loss": 0.3783, "step": 23168 }, { "epoch": 12.943575418994413, "grad_norm": 0.5981467962265015, "learning_rate": 0.00035417366946778714, "loss": 0.3826, "step": 23169 }, { "epoch": 12.94413407821229, "grad_norm": 0.6164994835853577, "learning_rate": 0.0003541456582633053, "loss": 0.3734, "step": 23170 }, { "epoch": 12.944692737430168, "grad_norm": 5.469063758850098, "learning_rate": 0.00035411764705882355, "loss": 0.3224, "step": 23171 }, { "epoch": 12.945251396648045, "grad_norm": 0.7147475481033325, "learning_rate": 0.0003540896358543417, "loss": 0.4188, "step": 23172 }, { "epoch": 12.945810055865921, "grad_norm": 0.38063937425613403, "learning_rate": 0.00035406162464985996, "loss": 0.3715, "step": 23173 }, { "epoch": 12.946368715083798, "grad_norm": 0.748613178730011, "learning_rate": 0.00035403361344537817, "loss": 0.3842, "step": 23174 }, { "epoch": 12.946927374301676, "grad_norm": 0.4692458510398865, "learning_rate": 0.0003540056022408963, "loss": 0.5025, "step": 23175 }, { "epoch": 12.947486033519553, "grad_norm": 0.549880862236023, "learning_rate": 0.0003539775910364146, "loss": 0.4091, "step": 23176 }, { "epoch": 12.94804469273743, "grad_norm": 0.6346397399902344, "learning_rate": 0.0003539495798319328, "loss": 0.4154, "step": 23177 }, { "epoch": 12.948603351955308, "grad_norm": 0.5505090951919556, "learning_rate": 0.000353921568627451, "loss": 0.396, "step": 23178 }, { "epoch": 12.949162011173184, "grad_norm": 0.6531315445899963, "learning_rate": 0.0003538935574229692, "loss": 0.6022, "step": 23179 }, { "epoch": 12.949720670391061, "grad_norm": 0.48369401693344116, "learning_rate": 0.00035386554621848735, "loss": 0.4315, "step": 23180 }, { "epoch": 12.95027932960894, "grad_norm": 0.6149060130119324, "learning_rate": 0.0003538375350140056, "loss": 0.5696, "step": 23181 }, { "epoch": 12.950837988826816, "grad_norm": 0.6020926833152771, "learning_rate": 0.0003538095238095238, "loss": 0.4266, "step": 23182 }, { "epoch": 12.951396648044692, "grad_norm": 0.8118475675582886, "learning_rate": 0.000353781512605042, "loss": 0.4989, "step": 23183 }, { "epoch": 12.951955307262569, "grad_norm": 0.5111149549484253, "learning_rate": 0.00035375350140056023, "loss": 0.3074, "step": 23184 }, { "epoch": 12.952513966480447, "grad_norm": 0.4930194318294525, "learning_rate": 0.00035372549019607844, "loss": 0.3905, "step": 23185 }, { "epoch": 12.953072625698324, "grad_norm": 0.44921284914016724, "learning_rate": 0.00035369747899159664, "loss": 0.3908, "step": 23186 }, { "epoch": 12.9536312849162, "grad_norm": 0.4587908983230591, "learning_rate": 0.00035366946778711485, "loss": 0.3576, "step": 23187 }, { "epoch": 12.954189944134079, "grad_norm": 0.6486285328865051, "learning_rate": 0.00035364145658263305, "loss": 0.3579, "step": 23188 }, { "epoch": 12.954748603351955, "grad_norm": 1.1239144802093506, "learning_rate": 0.00035361344537815126, "loss": 0.3341, "step": 23189 }, { "epoch": 12.955307262569832, "grad_norm": 0.537642776966095, "learning_rate": 0.00035358543417366947, "loss": 0.4835, "step": 23190 }, { "epoch": 12.955865921787709, "grad_norm": 0.7267135977745056, "learning_rate": 0.00035355742296918767, "loss": 0.3661, "step": 23191 }, { "epoch": 12.956424581005587, "grad_norm": 0.5360385775566101, "learning_rate": 0.0003535294117647059, "loss": 0.4747, "step": 23192 }, { "epoch": 12.956983240223463, "grad_norm": 1.1423780918121338, "learning_rate": 0.00035350140056022414, "loss": 0.4156, "step": 23193 }, { "epoch": 12.95754189944134, "grad_norm": 0.48165175318717957, "learning_rate": 0.0003534733893557423, "loss": 0.4929, "step": 23194 }, { "epoch": 12.958100558659218, "grad_norm": 0.5315850973129272, "learning_rate": 0.0003534453781512605, "loss": 0.463, "step": 23195 }, { "epoch": 12.958659217877095, "grad_norm": 0.6171473264694214, "learning_rate": 0.0003534173669467787, "loss": 0.3475, "step": 23196 }, { "epoch": 12.959217877094972, "grad_norm": 0.6063719391822815, "learning_rate": 0.0003533893557422969, "loss": 0.4375, "step": 23197 }, { "epoch": 12.95977653631285, "grad_norm": 2.2525529861450195, "learning_rate": 0.00035336134453781517, "loss": 0.4534, "step": 23198 }, { "epoch": 12.960335195530726, "grad_norm": 0.9050586223602295, "learning_rate": 0.0003533333333333333, "loss": 0.5288, "step": 23199 }, { "epoch": 12.960893854748603, "grad_norm": 5.178910732269287, "learning_rate": 0.0003533053221288515, "loss": 0.4181, "step": 23200 }, { "epoch": 12.961452513966481, "grad_norm": 0.6808676719665527, "learning_rate": 0.0003532773109243698, "loss": 0.4808, "step": 23201 }, { "epoch": 12.962011173184358, "grad_norm": 0.49515166878700256, "learning_rate": 0.00035324929971988794, "loss": 0.458, "step": 23202 }, { "epoch": 12.962569832402234, "grad_norm": 3.1959047317504883, "learning_rate": 0.0003532212885154062, "loss": 0.412, "step": 23203 }, { "epoch": 12.963128491620111, "grad_norm": 0.556829571723938, "learning_rate": 0.00035319327731092435, "loss": 0.3096, "step": 23204 }, { "epoch": 12.96368715083799, "grad_norm": 3.763230562210083, "learning_rate": 0.00035316526610644256, "loss": 0.3793, "step": 23205 }, { "epoch": 12.964245810055866, "grad_norm": 0.4567667245864868, "learning_rate": 0.0003531372549019608, "loss": 0.424, "step": 23206 }, { "epoch": 12.964804469273743, "grad_norm": 0.5227357149124146, "learning_rate": 0.00035310924369747897, "loss": 0.4248, "step": 23207 }, { "epoch": 12.96536312849162, "grad_norm": 0.6566517353057861, "learning_rate": 0.00035308123249299723, "loss": 0.6314, "step": 23208 }, { "epoch": 12.965921787709497, "grad_norm": 0.4390355050563812, "learning_rate": 0.00035305322128851543, "loss": 0.4633, "step": 23209 }, { "epoch": 12.966480446927374, "grad_norm": 0.4011116623878479, "learning_rate": 0.0003530252100840336, "loss": 0.3925, "step": 23210 }, { "epoch": 12.96703910614525, "grad_norm": 0.36842766404151917, "learning_rate": 0.00035299719887955185, "loss": 0.4092, "step": 23211 }, { "epoch": 12.967597765363129, "grad_norm": 0.47220832109451294, "learning_rate": 0.00035296918767507, "loss": 0.3117, "step": 23212 }, { "epoch": 12.968156424581005, "grad_norm": 3.219080924987793, "learning_rate": 0.00035294117647058826, "loss": 0.7143, "step": 23213 }, { "epoch": 12.968715083798882, "grad_norm": 1.0819718837738037, "learning_rate": 0.00035291316526610646, "loss": 0.3358, "step": 23214 }, { "epoch": 12.96927374301676, "grad_norm": 0.6638665795326233, "learning_rate": 0.0003528851540616246, "loss": 0.4422, "step": 23215 }, { "epoch": 12.969832402234637, "grad_norm": 0.3828807771205902, "learning_rate": 0.0003528571428571429, "loss": 0.3964, "step": 23216 }, { "epoch": 12.970391061452514, "grad_norm": 0.5035955309867859, "learning_rate": 0.0003528291316526611, "loss": 0.4449, "step": 23217 }, { "epoch": 12.970949720670392, "grad_norm": 0.5694970488548279, "learning_rate": 0.0003528011204481793, "loss": 0.4365, "step": 23218 }, { "epoch": 12.971508379888268, "grad_norm": 2.992596387863159, "learning_rate": 0.0003527731092436975, "loss": 0.3939, "step": 23219 }, { "epoch": 12.972067039106145, "grad_norm": 0.407412052154541, "learning_rate": 0.00035274509803921565, "loss": 0.3656, "step": 23220 }, { "epoch": 12.972625698324022, "grad_norm": 1.1889253854751587, "learning_rate": 0.0003527170868347339, "loss": 0.367, "step": 23221 }, { "epoch": 12.9731843575419, "grad_norm": 0.5009036660194397, "learning_rate": 0.0003526890756302521, "loss": 0.3725, "step": 23222 }, { "epoch": 12.973743016759776, "grad_norm": 0.4660651385784149, "learning_rate": 0.0003526610644257703, "loss": 0.3455, "step": 23223 }, { "epoch": 12.974301675977653, "grad_norm": 0.540440559387207, "learning_rate": 0.0003526330532212885, "loss": 0.456, "step": 23224 }, { "epoch": 12.974860335195531, "grad_norm": 0.46635663509368896, "learning_rate": 0.00035260504201680673, "loss": 0.3262, "step": 23225 }, { "epoch": 12.975418994413408, "grad_norm": 0.5682308673858643, "learning_rate": 0.00035257703081232494, "loss": 0.3867, "step": 23226 }, { "epoch": 12.975977653631285, "grad_norm": 3.2422878742218018, "learning_rate": 0.00035254901960784314, "loss": 0.4573, "step": 23227 }, { "epoch": 12.976536312849163, "grad_norm": 0.4941241443157196, "learning_rate": 0.0003525210084033614, "loss": 0.3987, "step": 23228 }, { "epoch": 12.97709497206704, "grad_norm": 0.7356036305427551, "learning_rate": 0.00035249299719887955, "loss": 0.4712, "step": 23229 }, { "epoch": 12.977653631284916, "grad_norm": 0.45608341693878174, "learning_rate": 0.00035246498599439776, "loss": 0.4382, "step": 23230 }, { "epoch": 12.978212290502793, "grad_norm": 0.449614018201828, "learning_rate": 0.00035243697478991597, "loss": 0.4004, "step": 23231 }, { "epoch": 12.978770949720671, "grad_norm": 0.4191884994506836, "learning_rate": 0.00035240896358543417, "loss": 0.3354, "step": 23232 }, { "epoch": 12.979329608938547, "grad_norm": 0.4265230596065521, "learning_rate": 0.00035238095238095243, "loss": 0.4674, "step": 23233 }, { "epoch": 12.979888268156424, "grad_norm": 0.4582882225513458, "learning_rate": 0.0003523529411764706, "loss": 0.4423, "step": 23234 }, { "epoch": 12.980446927374302, "grad_norm": 1.04096257686615, "learning_rate": 0.0003523249299719888, "loss": 0.3586, "step": 23235 }, { "epoch": 12.981005586592179, "grad_norm": 0.38066405057907104, "learning_rate": 0.00035229691876750705, "loss": 0.4026, "step": 23236 }, { "epoch": 12.981564245810056, "grad_norm": 0.6009653806686401, "learning_rate": 0.0003522689075630252, "loss": 0.5469, "step": 23237 }, { "epoch": 12.982122905027932, "grad_norm": 0.38071930408477783, "learning_rate": 0.00035224089635854346, "loss": 0.4713, "step": 23238 }, { "epoch": 12.98268156424581, "grad_norm": 0.620356559753418, "learning_rate": 0.0003522128851540616, "loss": 0.4746, "step": 23239 }, { "epoch": 12.983240223463687, "grad_norm": 0.4416988492012024, "learning_rate": 0.0003521848739495798, "loss": 0.3741, "step": 23240 }, { "epoch": 12.983798882681564, "grad_norm": 0.5249208807945251, "learning_rate": 0.0003521568627450981, "loss": 0.4015, "step": 23241 }, { "epoch": 12.984357541899442, "grad_norm": 0.4516470730304718, "learning_rate": 0.00035212885154061623, "loss": 0.4788, "step": 23242 }, { "epoch": 12.984916201117318, "grad_norm": 0.6191099882125854, "learning_rate": 0.0003521008403361345, "loss": 0.4432, "step": 23243 }, { "epoch": 12.985474860335195, "grad_norm": 0.6576861143112183, "learning_rate": 0.0003520728291316527, "loss": 0.6266, "step": 23244 }, { "epoch": 12.986033519553073, "grad_norm": 10.930916786193848, "learning_rate": 0.00035204481792717085, "loss": 0.3527, "step": 23245 }, { "epoch": 12.98659217877095, "grad_norm": 0.44888150691986084, "learning_rate": 0.0003520168067226891, "loss": 0.4743, "step": 23246 }, { "epoch": 12.987150837988827, "grad_norm": 0.3685510456562042, "learning_rate": 0.00035198879551820726, "loss": 0.3969, "step": 23247 }, { "epoch": 12.987709497206703, "grad_norm": 0.6509757041931152, "learning_rate": 0.0003519607843137255, "loss": 0.4456, "step": 23248 }, { "epoch": 12.988268156424581, "grad_norm": 2.5802578926086426, "learning_rate": 0.00035193277310924373, "loss": 0.4512, "step": 23249 }, { "epoch": 12.988826815642458, "grad_norm": 0.4414239823818207, "learning_rate": 0.0003519047619047619, "loss": 0.4157, "step": 23250 }, { "epoch": 12.989385474860335, "grad_norm": 0.5902441740036011, "learning_rate": 0.00035187675070028014, "loss": 0.5264, "step": 23251 }, { "epoch": 12.989944134078213, "grad_norm": 0.6017928123474121, "learning_rate": 0.00035184873949579835, "loss": 0.3992, "step": 23252 }, { "epoch": 12.99050279329609, "grad_norm": 0.4962887465953827, "learning_rate": 0.00035182072829131655, "loss": 0.4205, "step": 23253 }, { "epoch": 12.991061452513966, "grad_norm": 1.87726628780365, "learning_rate": 0.00035179271708683476, "loss": 0.4861, "step": 23254 }, { "epoch": 12.991620111731844, "grad_norm": 0.412777304649353, "learning_rate": 0.0003517647058823529, "loss": 0.3152, "step": 23255 }, { "epoch": 12.992178770949721, "grad_norm": 0.5835171341896057, "learning_rate": 0.00035173669467787117, "loss": 0.555, "step": 23256 }, { "epoch": 12.992737430167598, "grad_norm": 0.44555264711380005, "learning_rate": 0.0003517086834733894, "loss": 0.4496, "step": 23257 }, { "epoch": 12.993296089385474, "grad_norm": 0.6477193236351013, "learning_rate": 0.0003516806722689076, "loss": 0.3407, "step": 23258 }, { "epoch": 12.993854748603352, "grad_norm": 0.29367247223854065, "learning_rate": 0.0003516526610644258, "loss": 0.2749, "step": 23259 }, { "epoch": 12.994413407821229, "grad_norm": 0.40685129165649414, "learning_rate": 0.000351624649859944, "loss": 0.3807, "step": 23260 }, { "epoch": 12.994972067039106, "grad_norm": 1.059714436531067, "learning_rate": 0.0003515966386554622, "loss": 0.4873, "step": 23261 }, { "epoch": 12.995530726256984, "grad_norm": 0.6272127628326416, "learning_rate": 0.0003515686274509804, "loss": 0.3504, "step": 23262 }, { "epoch": 12.99608938547486, "grad_norm": 0.4392222464084625, "learning_rate": 0.00035154061624649856, "loss": 0.4357, "step": 23263 }, { "epoch": 12.996648044692737, "grad_norm": 0.41262519359588623, "learning_rate": 0.0003515126050420168, "loss": 0.4543, "step": 23264 }, { "epoch": 12.997206703910614, "grad_norm": 0.43222057819366455, "learning_rate": 0.000351484593837535, "loss": 0.4734, "step": 23265 }, { "epoch": 12.997765363128492, "grad_norm": 0.5488288402557373, "learning_rate": 0.00035145658263305323, "loss": 0.3411, "step": 23266 }, { "epoch": 12.998324022346369, "grad_norm": 0.6002556681632996, "learning_rate": 0.00035142857142857144, "loss": 0.3443, "step": 23267 }, { "epoch": 12.998882681564245, "grad_norm": 1.0528843402862549, "learning_rate": 0.00035140056022408964, "loss": 0.4884, "step": 23268 }, { "epoch": 12.999441340782123, "grad_norm": 2.401143789291382, "learning_rate": 0.00035137254901960785, "loss": 0.5111, "step": 23269 }, { "epoch": 13.0, "grad_norm": 0.46162986755371094, "learning_rate": 0.00035134453781512605, "loss": 0.591, "step": 23270 }, { "epoch": 13.000558659217877, "grad_norm": 0.47466105222702026, "learning_rate": 0.00035131652661064426, "loss": 0.3738, "step": 23271 }, { "epoch": 13.001117318435755, "grad_norm": 0.35235080122947693, "learning_rate": 0.00035128851540616247, "loss": 0.3253, "step": 23272 }, { "epoch": 13.001675977653631, "grad_norm": 0.9307236075401306, "learning_rate": 0.00035126050420168067, "loss": 0.5879, "step": 23273 }, { "epoch": 13.002234636871508, "grad_norm": 0.590955913066864, "learning_rate": 0.0003512324929971989, "loss": 0.424, "step": 23274 }, { "epoch": 13.002793296089385, "grad_norm": 0.47806814312934875, "learning_rate": 0.0003512044817927171, "loss": 0.4274, "step": 23275 }, { "epoch": 13.003351955307263, "grad_norm": 0.35463306307792664, "learning_rate": 0.00035117647058823534, "loss": 0.3433, "step": 23276 }, { "epoch": 13.00391061452514, "grad_norm": 1.6759039163589478, "learning_rate": 0.0003511484593837535, "loss": 0.3917, "step": 23277 }, { "epoch": 13.004469273743016, "grad_norm": 1.6848140954971313, "learning_rate": 0.0003511204481792717, "loss": 0.4547, "step": 23278 }, { "epoch": 13.005027932960894, "grad_norm": 0.5859642624855042, "learning_rate": 0.0003510924369747899, "loss": 0.3993, "step": 23279 }, { "epoch": 13.005586592178771, "grad_norm": 0.40881794691085815, "learning_rate": 0.0003510644257703081, "loss": 0.4588, "step": 23280 }, { "epoch": 13.006145251396648, "grad_norm": 0.548321008682251, "learning_rate": 0.0003510364145658264, "loss": 0.35, "step": 23281 }, { "epoch": 13.006703910614526, "grad_norm": 0.6069298386573792, "learning_rate": 0.0003510084033613445, "loss": 0.3775, "step": 23282 }, { "epoch": 13.007262569832402, "grad_norm": 4.028810977935791, "learning_rate": 0.00035098039215686273, "loss": 0.3571, "step": 23283 }, { "epoch": 13.007821229050279, "grad_norm": 0.42840051651000977, "learning_rate": 0.000350952380952381, "loss": 0.3795, "step": 23284 }, { "epoch": 13.008379888268156, "grad_norm": 9.171091079711914, "learning_rate": 0.00035092436974789914, "loss": 0.452, "step": 23285 }, { "epoch": 13.008938547486034, "grad_norm": 0.3818887770175934, "learning_rate": 0.0003508963585434174, "loss": 0.3893, "step": 23286 }, { "epoch": 13.00949720670391, "grad_norm": 0.4687611162662506, "learning_rate": 0.00035086834733893556, "loss": 0.4417, "step": 23287 }, { "epoch": 13.010055865921787, "grad_norm": 0.41950637102127075, "learning_rate": 0.00035084033613445376, "loss": 0.4926, "step": 23288 }, { "epoch": 13.010614525139665, "grad_norm": 0.44910523295402527, "learning_rate": 0.000350812324929972, "loss": 0.3968, "step": 23289 }, { "epoch": 13.011173184357542, "grad_norm": 2.7767529487609863, "learning_rate": 0.0003507843137254902, "loss": 0.6029, "step": 23290 }, { "epoch": 13.011731843575419, "grad_norm": 1.1533838510513306, "learning_rate": 0.00035075630252100843, "loss": 0.409, "step": 23291 }, { "epoch": 13.012290502793297, "grad_norm": 0.5757137537002563, "learning_rate": 0.00035072829131652664, "loss": 0.5104, "step": 23292 }, { "epoch": 13.012849162011173, "grad_norm": 0.4171714782714844, "learning_rate": 0.0003507002801120448, "loss": 0.4045, "step": 23293 }, { "epoch": 13.01340782122905, "grad_norm": 0.49656033515930176, "learning_rate": 0.00035067226890756305, "loss": 0.4049, "step": 23294 }, { "epoch": 13.013966480446927, "grad_norm": 0.5441023111343384, "learning_rate": 0.0003506442577030812, "loss": 0.4317, "step": 23295 }, { "epoch": 13.014525139664805, "grad_norm": 0.6322762370109558, "learning_rate": 0.00035061624649859946, "loss": 0.4018, "step": 23296 }, { "epoch": 13.015083798882682, "grad_norm": 0.4555903673171997, "learning_rate": 0.00035058823529411767, "loss": 0.405, "step": 23297 }, { "epoch": 13.015642458100558, "grad_norm": 3.7376105785369873, "learning_rate": 0.0003505602240896358, "loss": 0.3974, "step": 23298 }, { "epoch": 13.016201117318436, "grad_norm": 0.4773577153682709, "learning_rate": 0.0003505322128851541, "loss": 0.4286, "step": 23299 }, { "epoch": 13.016759776536313, "grad_norm": 0.5603949427604675, "learning_rate": 0.0003505042016806723, "loss": 0.4018, "step": 23300 }, { "epoch": 13.01731843575419, "grad_norm": 0.38223135471343994, "learning_rate": 0.0003504761904761905, "loss": 0.3693, "step": 23301 }, { "epoch": 13.017877094972068, "grad_norm": 0.5161375999450684, "learning_rate": 0.0003504481792717087, "loss": 0.4125, "step": 23302 }, { "epoch": 13.018435754189944, "grad_norm": 0.6084408760070801, "learning_rate": 0.00035042016806722685, "loss": 0.4995, "step": 23303 }, { "epoch": 13.018994413407821, "grad_norm": 0.6102217435836792, "learning_rate": 0.0003503921568627451, "loss": 0.4814, "step": 23304 }, { "epoch": 13.019553072625698, "grad_norm": 0.49996086955070496, "learning_rate": 0.0003503641456582633, "loss": 0.4424, "step": 23305 }, { "epoch": 13.020111731843576, "grad_norm": 0.44509491324424744, "learning_rate": 0.0003503361344537815, "loss": 0.3695, "step": 23306 }, { "epoch": 13.020670391061453, "grad_norm": 0.5539101362228394, "learning_rate": 0.00035030812324929973, "loss": 0.3544, "step": 23307 }, { "epoch": 13.021229050279329, "grad_norm": 0.49441125988960266, "learning_rate": 0.00035028011204481794, "loss": 0.4398, "step": 23308 }, { "epoch": 13.021787709497207, "grad_norm": 0.35702961683273315, "learning_rate": 0.00035025210084033614, "loss": 0.4556, "step": 23309 }, { "epoch": 13.022346368715084, "grad_norm": 0.5351783037185669, "learning_rate": 0.00035022408963585435, "loss": 0.4357, "step": 23310 }, { "epoch": 13.02290502793296, "grad_norm": 0.5267682671546936, "learning_rate": 0.00035019607843137255, "loss": 0.3789, "step": 23311 }, { "epoch": 13.023463687150837, "grad_norm": 0.609285831451416, "learning_rate": 0.00035016806722689076, "loss": 0.4698, "step": 23312 }, { "epoch": 13.024022346368715, "grad_norm": 0.47037866711616516, "learning_rate": 0.00035014005602240897, "loss": 0.3499, "step": 23313 }, { "epoch": 13.024581005586592, "grad_norm": 0.7390353083610535, "learning_rate": 0.00035011204481792717, "loss": 0.413, "step": 23314 }, { "epoch": 13.025139664804469, "grad_norm": 2.7140743732452393, "learning_rate": 0.0003500840336134454, "loss": 0.3623, "step": 23315 }, { "epoch": 13.025698324022347, "grad_norm": 2.934598207473755, "learning_rate": 0.00035005602240896364, "loss": 0.5114, "step": 23316 }, { "epoch": 13.026256983240224, "grad_norm": 2.202272653579712, "learning_rate": 0.0003500280112044818, "loss": 0.3985, "step": 23317 }, { "epoch": 13.0268156424581, "grad_norm": 0.31507304310798645, "learning_rate": 0.00035, "loss": 0.3448, "step": 23318 }, { "epoch": 13.027374301675978, "grad_norm": 0.53544020652771, "learning_rate": 0.0003499719887955182, "loss": 0.3566, "step": 23319 }, { "epoch": 13.027932960893855, "grad_norm": 0.6698783040046692, "learning_rate": 0.0003499439775910364, "loss": 0.4161, "step": 23320 }, { "epoch": 13.028491620111732, "grad_norm": 2.000349760055542, "learning_rate": 0.00034991596638655467, "loss": 0.4796, "step": 23321 }, { "epoch": 13.029050279329608, "grad_norm": 0.45821481943130493, "learning_rate": 0.0003498879551820728, "loss": 0.4374, "step": 23322 }, { "epoch": 13.029608938547486, "grad_norm": 0.44443824887275696, "learning_rate": 0.000349859943977591, "loss": 0.3859, "step": 23323 }, { "epoch": 13.030167597765363, "grad_norm": 11.175480842590332, "learning_rate": 0.0003498319327731093, "loss": 0.3849, "step": 23324 }, { "epoch": 13.03072625698324, "grad_norm": 0.8641663789749146, "learning_rate": 0.00034980392156862744, "loss": 0.5376, "step": 23325 }, { "epoch": 13.031284916201118, "grad_norm": 0.8429691791534424, "learning_rate": 0.0003497759103641457, "loss": 0.3326, "step": 23326 }, { "epoch": 13.031843575418995, "grad_norm": 0.4473841190338135, "learning_rate": 0.00034974789915966385, "loss": 0.4164, "step": 23327 }, { "epoch": 13.032402234636871, "grad_norm": 0.45497047901153564, "learning_rate": 0.00034971988795518206, "loss": 0.3431, "step": 23328 }, { "epoch": 13.03296089385475, "grad_norm": 0.4669978618621826, "learning_rate": 0.0003496918767507003, "loss": 0.365, "step": 23329 }, { "epoch": 13.033519553072626, "grad_norm": 0.40230894088745117, "learning_rate": 0.00034966386554621847, "loss": 0.3178, "step": 23330 }, { "epoch": 13.034078212290503, "grad_norm": 0.5160427689552307, "learning_rate": 0.00034963585434173673, "loss": 0.4612, "step": 23331 }, { "epoch": 13.03463687150838, "grad_norm": 0.754610002040863, "learning_rate": 0.00034960784313725493, "loss": 0.5468, "step": 23332 }, { "epoch": 13.035195530726257, "grad_norm": 0.3806002140045166, "learning_rate": 0.0003495798319327731, "loss": 0.4028, "step": 23333 }, { "epoch": 13.035754189944134, "grad_norm": 0.3582924008369446, "learning_rate": 0.00034955182072829135, "loss": 0.3286, "step": 23334 }, { "epoch": 13.03631284916201, "grad_norm": 0.5125653147697449, "learning_rate": 0.0003495238095238095, "loss": 0.4791, "step": 23335 }, { "epoch": 13.036871508379889, "grad_norm": 0.47860410809516907, "learning_rate": 0.00034949579831932776, "loss": 0.4265, "step": 23336 }, { "epoch": 13.037430167597766, "grad_norm": 0.38151678442955017, "learning_rate": 0.00034946778711484596, "loss": 0.3546, "step": 23337 }, { "epoch": 13.037988826815642, "grad_norm": 0.37241029739379883, "learning_rate": 0.0003494397759103641, "loss": 0.4061, "step": 23338 }, { "epoch": 13.03854748603352, "grad_norm": 0.47382745146751404, "learning_rate": 0.0003494117647058824, "loss": 0.4034, "step": 23339 }, { "epoch": 13.039106145251397, "grad_norm": 0.5245199799537659, "learning_rate": 0.0003493837535014006, "loss": 0.4215, "step": 23340 }, { "epoch": 13.039664804469274, "grad_norm": 0.40086638927459717, "learning_rate": 0.0003493557422969188, "loss": 0.5141, "step": 23341 }, { "epoch": 13.04022346368715, "grad_norm": 0.4952842891216278, "learning_rate": 0.000349327731092437, "loss": 0.3414, "step": 23342 }, { "epoch": 13.040782122905028, "grad_norm": 0.6562835574150085, "learning_rate": 0.00034929971988795515, "loss": 0.4555, "step": 23343 }, { "epoch": 13.041340782122905, "grad_norm": 0.5985842943191528, "learning_rate": 0.0003492717086834734, "loss": 0.4716, "step": 23344 }, { "epoch": 13.041899441340782, "grad_norm": 0.4215427339076996, "learning_rate": 0.0003492436974789916, "loss": 0.522, "step": 23345 }, { "epoch": 13.04245810055866, "grad_norm": 0.7224948406219482, "learning_rate": 0.0003492156862745098, "loss": 0.5043, "step": 23346 }, { "epoch": 13.043016759776537, "grad_norm": 0.7831788659095764, "learning_rate": 0.000349187675070028, "loss": 0.3888, "step": 23347 }, { "epoch": 13.043575418994413, "grad_norm": 0.4009150266647339, "learning_rate": 0.00034915966386554623, "loss": 0.3701, "step": 23348 }, { "epoch": 13.04413407821229, "grad_norm": 0.4335838854312897, "learning_rate": 0.00034913165266106444, "loss": 0.4256, "step": 23349 }, { "epoch": 13.044692737430168, "grad_norm": 0.5390592813491821, "learning_rate": 0.00034910364145658264, "loss": 0.582, "step": 23350 }, { "epoch": 13.045251396648045, "grad_norm": 0.48130103945732117, "learning_rate": 0.00034907563025210085, "loss": 0.3127, "step": 23351 }, { "epoch": 13.045810055865921, "grad_norm": 0.518240213394165, "learning_rate": 0.00034904761904761905, "loss": 0.4259, "step": 23352 }, { "epoch": 13.0463687150838, "grad_norm": 0.39265748858451843, "learning_rate": 0.00034901960784313726, "loss": 0.3201, "step": 23353 }, { "epoch": 13.046927374301676, "grad_norm": 0.6923423409461975, "learning_rate": 0.00034899159663865547, "loss": 0.4541, "step": 23354 }, { "epoch": 13.047486033519553, "grad_norm": 0.4615306258201599, "learning_rate": 0.00034896358543417367, "loss": 0.4092, "step": 23355 }, { "epoch": 13.048044692737431, "grad_norm": 0.4727894961833954, "learning_rate": 0.00034893557422969193, "loss": 0.5077, "step": 23356 }, { "epoch": 13.048603351955308, "grad_norm": 0.554659903049469, "learning_rate": 0.0003489075630252101, "loss": 0.4187, "step": 23357 }, { "epoch": 13.049162011173184, "grad_norm": 0.9945338368415833, "learning_rate": 0.0003488795518207283, "loss": 0.3427, "step": 23358 }, { "epoch": 13.04972067039106, "grad_norm": 0.5358831286430359, "learning_rate": 0.0003488515406162465, "loss": 0.4445, "step": 23359 }, { "epoch": 13.050279329608939, "grad_norm": 0.527206540107727, "learning_rate": 0.0003488235294117647, "loss": 0.4761, "step": 23360 }, { "epoch": 13.050837988826816, "grad_norm": 0.5932209491729736, "learning_rate": 0.00034879551820728296, "loss": 0.3437, "step": 23361 }, { "epoch": 13.051396648044692, "grad_norm": 0.5323039293289185, "learning_rate": 0.0003487675070028011, "loss": 0.3247, "step": 23362 }, { "epoch": 13.05195530726257, "grad_norm": 2.791893243789673, "learning_rate": 0.0003487394957983193, "loss": 0.4139, "step": 23363 }, { "epoch": 13.052513966480447, "grad_norm": 1.1677329540252686, "learning_rate": 0.0003487114845938376, "loss": 0.4465, "step": 23364 }, { "epoch": 13.053072625698324, "grad_norm": 0.35761281847953796, "learning_rate": 0.00034868347338935573, "loss": 0.3995, "step": 23365 }, { "epoch": 13.053631284916202, "grad_norm": 4.081105709075928, "learning_rate": 0.000348655462184874, "loss": 0.3641, "step": 23366 }, { "epoch": 13.054189944134079, "grad_norm": 0.6329918503761292, "learning_rate": 0.00034862745098039214, "loss": 0.3823, "step": 23367 }, { "epoch": 13.054748603351955, "grad_norm": 0.5143391489982605, "learning_rate": 0.00034859943977591035, "loss": 0.3968, "step": 23368 }, { "epoch": 13.055307262569832, "grad_norm": 0.3964098393917084, "learning_rate": 0.0003485714285714286, "loss": 0.3502, "step": 23369 }, { "epoch": 13.05586592178771, "grad_norm": 0.49288180470466614, "learning_rate": 0.00034854341736694676, "loss": 0.442, "step": 23370 }, { "epoch": 13.056424581005587, "grad_norm": 0.4393397569656372, "learning_rate": 0.000348515406162465, "loss": 0.305, "step": 23371 }, { "epoch": 13.056983240223463, "grad_norm": 0.44056546688079834, "learning_rate": 0.00034848739495798323, "loss": 0.428, "step": 23372 }, { "epoch": 13.057541899441341, "grad_norm": 0.5214044451713562, "learning_rate": 0.0003484593837535014, "loss": 0.4495, "step": 23373 }, { "epoch": 13.058100558659218, "grad_norm": 0.5635223388671875, "learning_rate": 0.00034843137254901964, "loss": 0.504, "step": 23374 }, { "epoch": 13.058659217877095, "grad_norm": 0.37817564606666565, "learning_rate": 0.0003484033613445378, "loss": 0.3689, "step": 23375 }, { "epoch": 13.059217877094973, "grad_norm": 0.35967835783958435, "learning_rate": 0.000348375350140056, "loss": 0.372, "step": 23376 }, { "epoch": 13.05977653631285, "grad_norm": 5.484134197235107, "learning_rate": 0.00034834733893557426, "loss": 0.4478, "step": 23377 }, { "epoch": 13.060335195530726, "grad_norm": 0.40357133746147156, "learning_rate": 0.0003483193277310924, "loss": 0.3885, "step": 23378 }, { "epoch": 13.060893854748603, "grad_norm": 0.401522696018219, "learning_rate": 0.00034829131652661067, "loss": 0.4024, "step": 23379 }, { "epoch": 13.061452513966481, "grad_norm": 1.2607015371322632, "learning_rate": 0.0003482633053221289, "loss": 0.2955, "step": 23380 }, { "epoch": 13.062011173184358, "grad_norm": 0.5900859832763672, "learning_rate": 0.00034823529411764703, "loss": 0.4783, "step": 23381 }, { "epoch": 13.062569832402234, "grad_norm": 0.6093268990516663, "learning_rate": 0.0003482072829131653, "loss": 0.3935, "step": 23382 }, { "epoch": 13.063128491620112, "grad_norm": 0.5110654234886169, "learning_rate": 0.00034817927170868344, "loss": 0.4844, "step": 23383 }, { "epoch": 13.063687150837989, "grad_norm": 0.6068913340568542, "learning_rate": 0.0003481512605042017, "loss": 0.4082, "step": 23384 }, { "epoch": 13.064245810055866, "grad_norm": 0.6574090719223022, "learning_rate": 0.0003481232492997199, "loss": 0.535, "step": 23385 }, { "epoch": 13.064804469273742, "grad_norm": 0.5582006573677063, "learning_rate": 0.00034809523809523806, "loss": 0.44, "step": 23386 }, { "epoch": 13.06536312849162, "grad_norm": 1.0597667694091797, "learning_rate": 0.0003480672268907563, "loss": 0.431, "step": 23387 }, { "epoch": 13.065921787709497, "grad_norm": 0.6355597972869873, "learning_rate": 0.0003480392156862745, "loss": 0.4666, "step": 23388 }, { "epoch": 13.066480446927374, "grad_norm": 3.466019630432129, "learning_rate": 0.00034801120448179273, "loss": 0.516, "step": 23389 }, { "epoch": 13.067039106145252, "grad_norm": 0.421601265668869, "learning_rate": 0.00034798319327731094, "loss": 0.3844, "step": 23390 }, { "epoch": 13.067597765363129, "grad_norm": 0.4610384404659271, "learning_rate": 0.0003479551820728291, "loss": 0.4689, "step": 23391 }, { "epoch": 13.068156424581005, "grad_norm": 4.263872146606445, "learning_rate": 0.00034792717086834735, "loss": 0.4303, "step": 23392 }, { "epoch": 13.068715083798883, "grad_norm": 0.8405833840370178, "learning_rate": 0.00034789915966386555, "loss": 0.355, "step": 23393 }, { "epoch": 13.06927374301676, "grad_norm": 4.102142810821533, "learning_rate": 0.00034787114845938376, "loss": 0.4393, "step": 23394 }, { "epoch": 13.069832402234637, "grad_norm": 0.5717090368270874, "learning_rate": 0.00034784313725490197, "loss": 0.5269, "step": 23395 }, { "epoch": 13.070391061452513, "grad_norm": 0.5308569073677063, "learning_rate": 0.00034781512605042017, "loss": 0.4395, "step": 23396 }, { "epoch": 13.070949720670392, "grad_norm": 0.40274500846862793, "learning_rate": 0.0003477871148459384, "loss": 0.3977, "step": 23397 }, { "epoch": 13.071508379888268, "grad_norm": 0.6104032397270203, "learning_rate": 0.0003477591036414566, "loss": 0.4386, "step": 23398 }, { "epoch": 13.072067039106145, "grad_norm": 0.3510996699333191, "learning_rate": 0.0003477310924369748, "loss": 0.4816, "step": 23399 }, { "epoch": 13.072625698324023, "grad_norm": 0.4035705327987671, "learning_rate": 0.000347703081232493, "loss": 0.3293, "step": 23400 }, { "epoch": 13.0731843575419, "grad_norm": 0.45566388964653015, "learning_rate": 0.0003476750700280112, "loss": 0.4375, "step": 23401 }, { "epoch": 13.073743016759776, "grad_norm": 0.4309523403644562, "learning_rate": 0.0003476470588235294, "loss": 0.38, "step": 23402 }, { "epoch": 13.074301675977654, "grad_norm": 0.3658255934715271, "learning_rate": 0.0003476190476190476, "loss": 0.3627, "step": 23403 }, { "epoch": 13.074860335195531, "grad_norm": 0.3523833155632019, "learning_rate": 0.0003475910364145659, "loss": 0.4075, "step": 23404 }, { "epoch": 13.075418994413408, "grad_norm": 0.4560738801956177, "learning_rate": 0.000347563025210084, "loss": 0.412, "step": 23405 }, { "epoch": 13.075977653631284, "grad_norm": 1.2952686548233032, "learning_rate": 0.00034753501400560223, "loss": 0.3877, "step": 23406 }, { "epoch": 13.076536312849163, "grad_norm": 0.3817651569843292, "learning_rate": 0.00034750700280112044, "loss": 0.3739, "step": 23407 }, { "epoch": 13.077094972067039, "grad_norm": 0.5688372850418091, "learning_rate": 0.00034747899159663864, "loss": 0.402, "step": 23408 }, { "epoch": 13.077653631284916, "grad_norm": 0.43046021461486816, "learning_rate": 0.0003474509803921569, "loss": 0.3634, "step": 23409 }, { "epoch": 13.078212290502794, "grad_norm": 0.3656826317310333, "learning_rate": 0.00034742296918767506, "loss": 0.3151, "step": 23410 }, { "epoch": 13.07877094972067, "grad_norm": 0.4676840305328369, "learning_rate": 0.00034739495798319326, "loss": 0.4161, "step": 23411 }, { "epoch": 13.079329608938547, "grad_norm": 0.48933640122413635, "learning_rate": 0.0003473669467787115, "loss": 0.5191, "step": 23412 }, { "epoch": 13.079888268156424, "grad_norm": 0.6550039649009705, "learning_rate": 0.0003473389355742297, "loss": 0.3779, "step": 23413 }, { "epoch": 13.080446927374302, "grad_norm": 0.5361563563346863, "learning_rate": 0.00034731092436974793, "loss": 0.4868, "step": 23414 }, { "epoch": 13.081005586592179, "grad_norm": 0.4756554961204529, "learning_rate": 0.0003472829131652661, "loss": 0.5032, "step": 23415 }, { "epoch": 13.081564245810055, "grad_norm": 0.527836799621582, "learning_rate": 0.0003472549019607843, "loss": 0.5133, "step": 23416 }, { "epoch": 13.082122905027934, "grad_norm": 0.7004073262214661, "learning_rate": 0.00034722689075630255, "loss": 0.3439, "step": 23417 }, { "epoch": 13.08268156424581, "grad_norm": 0.5956624746322632, "learning_rate": 0.0003471988795518207, "loss": 0.3976, "step": 23418 }, { "epoch": 13.083240223463687, "grad_norm": 0.4042103886604309, "learning_rate": 0.00034717086834733896, "loss": 0.3206, "step": 23419 }, { "epoch": 13.083798882681565, "grad_norm": 0.4299623966217041, "learning_rate": 0.00034714285714285717, "loss": 0.3968, "step": 23420 }, { "epoch": 13.084357541899442, "grad_norm": 2.804033041000366, "learning_rate": 0.0003471148459383753, "loss": 0.4656, "step": 23421 }, { "epoch": 13.084916201117318, "grad_norm": 0.42832913994789124, "learning_rate": 0.0003470868347338936, "loss": 0.4292, "step": 23422 }, { "epoch": 13.085474860335195, "grad_norm": 0.8991368412971497, "learning_rate": 0.00034705882352941173, "loss": 0.5082, "step": 23423 }, { "epoch": 13.086033519553073, "grad_norm": 0.4675876498222351, "learning_rate": 0.00034703081232493, "loss": 0.4235, "step": 23424 }, { "epoch": 13.08659217877095, "grad_norm": 0.5286150574684143, "learning_rate": 0.0003470028011204482, "loss": 0.3207, "step": 23425 }, { "epoch": 13.087150837988826, "grad_norm": 0.5329135656356812, "learning_rate": 0.00034697478991596635, "loss": 0.4508, "step": 23426 }, { "epoch": 13.087709497206705, "grad_norm": 0.48325949907302856, "learning_rate": 0.0003469467787114846, "loss": 0.4762, "step": 23427 }, { "epoch": 13.088268156424581, "grad_norm": 0.776962161064148, "learning_rate": 0.0003469187675070028, "loss": 0.3713, "step": 23428 }, { "epoch": 13.088826815642458, "grad_norm": 0.45045456290245056, "learning_rate": 0.000346890756302521, "loss": 0.4427, "step": 23429 }, { "epoch": 13.089385474860336, "grad_norm": 0.6555449366569519, "learning_rate": 0.00034686274509803923, "loss": 0.3658, "step": 23430 }, { "epoch": 13.089944134078213, "grad_norm": 0.5370076894760132, "learning_rate": 0.0003468347338935574, "loss": 0.3792, "step": 23431 }, { "epoch": 13.09050279329609, "grad_norm": 0.7479352951049805, "learning_rate": 0.00034680672268907564, "loss": 0.5671, "step": 23432 }, { "epoch": 13.091061452513966, "grad_norm": 1.0681031942367554, "learning_rate": 0.00034677871148459385, "loss": 0.4474, "step": 23433 }, { "epoch": 13.091620111731844, "grad_norm": 0.4158102869987488, "learning_rate": 0.00034675070028011205, "loss": 0.3915, "step": 23434 }, { "epoch": 13.09217877094972, "grad_norm": 8.41711711883545, "learning_rate": 0.00034672268907563026, "loss": 0.4198, "step": 23435 }, { "epoch": 13.092737430167597, "grad_norm": 0.4590073227882385, "learning_rate": 0.00034669467787114847, "loss": 0.4278, "step": 23436 }, { "epoch": 13.093296089385476, "grad_norm": 0.5667222738265991, "learning_rate": 0.00034666666666666667, "loss": 0.3699, "step": 23437 }, { "epoch": 13.093854748603352, "grad_norm": 0.46746981143951416, "learning_rate": 0.0003466386554621849, "loss": 0.3222, "step": 23438 }, { "epoch": 13.094413407821229, "grad_norm": 0.3760402500629425, "learning_rate": 0.00034661064425770314, "loss": 0.3534, "step": 23439 }, { "epoch": 13.094972067039107, "grad_norm": 0.608992338180542, "learning_rate": 0.0003465826330532213, "loss": 0.4393, "step": 23440 }, { "epoch": 13.095530726256984, "grad_norm": 0.749060332775116, "learning_rate": 0.0003465546218487395, "loss": 0.3955, "step": 23441 }, { "epoch": 13.09608938547486, "grad_norm": 1.5715206861495972, "learning_rate": 0.0003465266106442577, "loss": 0.4733, "step": 23442 }, { "epoch": 13.096648044692737, "grad_norm": 0.45165273547172546, "learning_rate": 0.0003464985994397759, "loss": 0.3453, "step": 23443 }, { "epoch": 13.097206703910615, "grad_norm": 0.6978132128715515, "learning_rate": 0.00034647058823529417, "loss": 0.3558, "step": 23444 }, { "epoch": 13.097765363128492, "grad_norm": 0.4665333330631256, "learning_rate": 0.0003464425770308123, "loss": 0.4694, "step": 23445 }, { "epoch": 13.098324022346368, "grad_norm": 0.47941717505455017, "learning_rate": 0.0003464145658263305, "loss": 0.4267, "step": 23446 }, { "epoch": 13.098882681564247, "grad_norm": 0.7825055122375488, "learning_rate": 0.0003463865546218488, "loss": 0.4784, "step": 23447 }, { "epoch": 13.099441340782123, "grad_norm": 0.7211691737174988, "learning_rate": 0.00034635854341736694, "loss": 0.3982, "step": 23448 }, { "epoch": 13.1, "grad_norm": 0.41671818494796753, "learning_rate": 0.0003463305322128852, "loss": 0.3724, "step": 23449 }, { "epoch": 13.100558659217878, "grad_norm": 0.5795016288757324, "learning_rate": 0.00034630252100840335, "loss": 0.4754, "step": 23450 }, { "epoch": 13.101117318435755, "grad_norm": 0.5600852966308594, "learning_rate": 0.00034627450980392156, "loss": 0.5074, "step": 23451 }, { "epoch": 13.101675977653631, "grad_norm": 0.34581464529037476, "learning_rate": 0.0003462464985994398, "loss": 0.3788, "step": 23452 }, { "epoch": 13.102234636871508, "grad_norm": 0.4762992858886719, "learning_rate": 0.00034621848739495797, "loss": 0.4919, "step": 23453 }, { "epoch": 13.102793296089386, "grad_norm": 0.43008309602737427, "learning_rate": 0.00034619047619047623, "loss": 0.3592, "step": 23454 }, { "epoch": 13.103351955307263, "grad_norm": 0.48411741852760315, "learning_rate": 0.00034616246498599443, "loss": 0.4635, "step": 23455 }, { "epoch": 13.10391061452514, "grad_norm": 0.7330057621002197, "learning_rate": 0.0003461344537815126, "loss": 0.3535, "step": 23456 }, { "epoch": 13.104469273743018, "grad_norm": 0.43695884943008423, "learning_rate": 0.00034610644257703085, "loss": 0.361, "step": 23457 }, { "epoch": 13.105027932960894, "grad_norm": 1.7426176071166992, "learning_rate": 0.000346078431372549, "loss": 0.3413, "step": 23458 }, { "epoch": 13.10558659217877, "grad_norm": 0.3531784117221832, "learning_rate": 0.00034605042016806726, "loss": 0.3509, "step": 23459 }, { "epoch": 13.106145251396647, "grad_norm": 0.39489856362342834, "learning_rate": 0.00034602240896358546, "loss": 0.4728, "step": 23460 }, { "epoch": 13.106703910614526, "grad_norm": 0.5669115781784058, "learning_rate": 0.0003459943977591036, "loss": 0.4042, "step": 23461 }, { "epoch": 13.107262569832402, "grad_norm": 0.4808441698551178, "learning_rate": 0.0003459663865546219, "loss": 0.4154, "step": 23462 }, { "epoch": 13.107821229050279, "grad_norm": 0.8627301454544067, "learning_rate": 0.0003459383753501401, "loss": 0.319, "step": 23463 }, { "epoch": 13.108379888268157, "grad_norm": 0.3973948061466217, "learning_rate": 0.0003459103641456583, "loss": 0.4312, "step": 23464 }, { "epoch": 13.108938547486034, "grad_norm": 0.6303219795227051, "learning_rate": 0.0003458823529411765, "loss": 0.3947, "step": 23465 }, { "epoch": 13.10949720670391, "grad_norm": 0.44183576107025146, "learning_rate": 0.00034585434173669465, "loss": 0.4253, "step": 23466 }, { "epoch": 13.110055865921789, "grad_norm": 0.3627544939517975, "learning_rate": 0.0003458263305322129, "loss": 0.445, "step": 23467 }, { "epoch": 13.110614525139665, "grad_norm": 0.406851202249527, "learning_rate": 0.0003457983193277311, "loss": 0.3856, "step": 23468 }, { "epoch": 13.111173184357542, "grad_norm": 1.479478120803833, "learning_rate": 0.0003457703081232493, "loss": 0.3414, "step": 23469 }, { "epoch": 13.111731843575418, "grad_norm": 0.6937291622161865, "learning_rate": 0.0003457422969187675, "loss": 0.3947, "step": 23470 }, { "epoch": 13.112290502793297, "grad_norm": 0.3560657799243927, "learning_rate": 0.00034571428571428573, "loss": 0.3749, "step": 23471 }, { "epoch": 13.112849162011173, "grad_norm": 0.3498425781726837, "learning_rate": 0.00034568627450980394, "loss": 0.3003, "step": 23472 }, { "epoch": 13.11340782122905, "grad_norm": 0.46245017647743225, "learning_rate": 0.00034565826330532214, "loss": 0.4369, "step": 23473 }, { "epoch": 13.113966480446928, "grad_norm": 0.7830196022987366, "learning_rate": 0.00034563025210084035, "loss": 0.4534, "step": 23474 }, { "epoch": 13.114525139664805, "grad_norm": 0.5229582786560059, "learning_rate": 0.00034560224089635855, "loss": 0.496, "step": 23475 }, { "epoch": 13.115083798882681, "grad_norm": 0.7278667688369751, "learning_rate": 0.00034557422969187676, "loss": 0.4963, "step": 23476 }, { "epoch": 13.11564245810056, "grad_norm": 2.452320098876953, "learning_rate": 0.00034554621848739497, "loss": 0.326, "step": 23477 }, { "epoch": 13.116201117318436, "grad_norm": 2.0859055519104004, "learning_rate": 0.00034551820728291317, "loss": 0.5025, "step": 23478 }, { "epoch": 13.116759776536313, "grad_norm": 0.6509073376655579, "learning_rate": 0.00034549019607843143, "loss": 0.4261, "step": 23479 }, { "epoch": 13.11731843575419, "grad_norm": 0.4105704426765442, "learning_rate": 0.0003454621848739496, "loss": 0.4669, "step": 23480 }, { "epoch": 13.117877094972068, "grad_norm": 0.3937210738658905, "learning_rate": 0.0003454341736694678, "loss": 0.3452, "step": 23481 }, { "epoch": 13.118435754189944, "grad_norm": 0.5726685523986816, "learning_rate": 0.000345406162464986, "loss": 0.4207, "step": 23482 }, { "epoch": 13.11899441340782, "grad_norm": 0.5402323007583618, "learning_rate": 0.0003453781512605042, "loss": 0.4147, "step": 23483 }, { "epoch": 13.119553072625699, "grad_norm": 0.5098580121994019, "learning_rate": 0.00034535014005602246, "loss": 0.4349, "step": 23484 }, { "epoch": 13.120111731843576, "grad_norm": 0.5631532073020935, "learning_rate": 0.0003453221288515406, "loss": 0.3586, "step": 23485 }, { "epoch": 13.120670391061452, "grad_norm": 0.6478902101516724, "learning_rate": 0.0003452941176470588, "loss": 0.5709, "step": 23486 }, { "epoch": 13.121229050279329, "grad_norm": 0.35418611764907837, "learning_rate": 0.0003452661064425771, "loss": 0.3667, "step": 23487 }, { "epoch": 13.121787709497207, "grad_norm": 0.6710458397865295, "learning_rate": 0.00034523809523809523, "loss": 0.459, "step": 23488 }, { "epoch": 13.122346368715084, "grad_norm": 0.9335286617279053, "learning_rate": 0.00034521008403361344, "loss": 0.4511, "step": 23489 }, { "epoch": 13.12290502793296, "grad_norm": 0.39675742387771606, "learning_rate": 0.00034518207282913164, "loss": 0.4662, "step": 23490 }, { "epoch": 13.123463687150839, "grad_norm": 2.5451738834381104, "learning_rate": 0.00034515406162464985, "loss": 0.4111, "step": 23491 }, { "epoch": 13.124022346368715, "grad_norm": 0.6817076206207275, "learning_rate": 0.0003451260504201681, "loss": 0.4472, "step": 23492 }, { "epoch": 13.124581005586592, "grad_norm": 0.597777783870697, "learning_rate": 0.00034509803921568626, "loss": 0.4242, "step": 23493 }, { "epoch": 13.12513966480447, "grad_norm": 0.38116705417633057, "learning_rate": 0.00034507002801120447, "loss": 0.3524, "step": 23494 }, { "epoch": 13.125698324022347, "grad_norm": 0.39808329939842224, "learning_rate": 0.00034504201680672273, "loss": 0.3632, "step": 23495 }, { "epoch": 13.126256983240223, "grad_norm": 0.787678062915802, "learning_rate": 0.0003450140056022409, "loss": 0.5201, "step": 23496 }, { "epoch": 13.1268156424581, "grad_norm": 0.4769321084022522, "learning_rate": 0.00034498599439775914, "loss": 0.4292, "step": 23497 }, { "epoch": 13.127374301675978, "grad_norm": 0.8958873748779297, "learning_rate": 0.0003449579831932773, "loss": 0.4356, "step": 23498 }, { "epoch": 13.127932960893855, "grad_norm": 0.38787928223609924, "learning_rate": 0.0003449299719887955, "loss": 0.4488, "step": 23499 }, { "epoch": 13.128491620111731, "grad_norm": 3.619854688644409, "learning_rate": 0.00034490196078431376, "loss": 0.462, "step": 23500 }, { "epoch": 13.128491620111731, "eval_cer": 0.08704024963721864, "eval_loss": 0.32847243547439575, "eval_runtime": 55.6347, "eval_samples_per_second": 81.568, "eval_steps_per_second": 5.105, "eval_wer": 0.3444037876036982, "step": 23500 }, { "epoch": 13.12905027932961, "grad_norm": 0.6810756325721741, "learning_rate": 0.0003448739495798319, "loss": 0.3625, "step": 23501 }, { "epoch": 13.129608938547486, "grad_norm": 26.42056655883789, "learning_rate": 0.00034484593837535017, "loss": 0.3343, "step": 23502 }, { "epoch": 13.130167597765363, "grad_norm": 0.6155726313591003, "learning_rate": 0.0003448179271708684, "loss": 0.4215, "step": 23503 }, { "epoch": 13.130726256983241, "grad_norm": 0.585280179977417, "learning_rate": 0.00034478991596638653, "loss": 0.4785, "step": 23504 }, { "epoch": 13.131284916201118, "grad_norm": 0.7052249908447266, "learning_rate": 0.0003447619047619048, "loss": 0.3592, "step": 23505 }, { "epoch": 13.131843575418994, "grad_norm": 0.3439772129058838, "learning_rate": 0.00034473389355742294, "loss": 0.3636, "step": 23506 }, { "epoch": 13.13240223463687, "grad_norm": 0.5768067836761475, "learning_rate": 0.0003447058823529412, "loss": 0.4817, "step": 23507 }, { "epoch": 13.132960893854749, "grad_norm": 0.48665159940719604, "learning_rate": 0.0003446778711484594, "loss": 0.4066, "step": 23508 }, { "epoch": 13.133519553072626, "grad_norm": 0.6033996939659119, "learning_rate": 0.00034464985994397756, "loss": 0.451, "step": 23509 }, { "epoch": 13.134078212290502, "grad_norm": 0.35850170254707336, "learning_rate": 0.0003446218487394958, "loss": 0.3341, "step": 23510 }, { "epoch": 13.13463687150838, "grad_norm": 0.34263238310813904, "learning_rate": 0.000344593837535014, "loss": 0.3625, "step": 23511 }, { "epoch": 13.135195530726257, "grad_norm": 0.4308985769748688, "learning_rate": 0.00034456582633053223, "loss": 0.4469, "step": 23512 }, { "epoch": 13.135754189944134, "grad_norm": 0.3533570170402527, "learning_rate": 0.00034453781512605044, "loss": 0.2687, "step": 23513 }, { "epoch": 13.136312849162012, "grad_norm": 0.4198404550552368, "learning_rate": 0.0003445098039215686, "loss": 0.4803, "step": 23514 }, { "epoch": 13.136871508379889, "grad_norm": 1.7108770608901978, "learning_rate": 0.00034448179271708685, "loss": 0.4725, "step": 23515 }, { "epoch": 13.137430167597765, "grad_norm": 0.6219730377197266, "learning_rate": 0.00034445378151260505, "loss": 0.4563, "step": 23516 }, { "epoch": 13.137988826815642, "grad_norm": 0.8826090097427368, "learning_rate": 0.00034442577030812326, "loss": 0.3904, "step": 23517 }, { "epoch": 13.13854748603352, "grad_norm": 0.37981295585632324, "learning_rate": 0.00034439775910364147, "loss": 0.3292, "step": 23518 }, { "epoch": 13.139106145251397, "grad_norm": 0.5105723142623901, "learning_rate": 0.00034436974789915967, "loss": 0.3904, "step": 23519 }, { "epoch": 13.139664804469273, "grad_norm": 0.3931066393852234, "learning_rate": 0.0003443417366946779, "loss": 0.4326, "step": 23520 }, { "epoch": 13.140223463687152, "grad_norm": 0.44986462593078613, "learning_rate": 0.0003443137254901961, "loss": 0.4258, "step": 23521 }, { "epoch": 13.140782122905028, "grad_norm": 0.5707066655158997, "learning_rate": 0.0003442857142857143, "loss": 0.5597, "step": 23522 }, { "epoch": 13.141340782122905, "grad_norm": 0.5863339304924011, "learning_rate": 0.0003442577030812325, "loss": 0.427, "step": 23523 }, { "epoch": 13.141899441340781, "grad_norm": 0.615108847618103, "learning_rate": 0.0003442296918767507, "loss": 0.4913, "step": 23524 }, { "epoch": 13.14245810055866, "grad_norm": 0.5812993049621582, "learning_rate": 0.0003442016806722689, "loss": 0.394, "step": 23525 }, { "epoch": 13.143016759776536, "grad_norm": 1.0453574657440186, "learning_rate": 0.0003441736694677871, "loss": 0.4934, "step": 23526 }, { "epoch": 13.143575418994413, "grad_norm": 0.7637466192245483, "learning_rate": 0.0003441456582633054, "loss": 0.4815, "step": 23527 }, { "epoch": 13.144134078212291, "grad_norm": 0.7954033613204956, "learning_rate": 0.0003441176470588235, "loss": 0.3582, "step": 23528 }, { "epoch": 13.144692737430168, "grad_norm": 4.8569016456604, "learning_rate": 0.00034408963585434173, "loss": 0.483, "step": 23529 }, { "epoch": 13.145251396648044, "grad_norm": 0.3829418420791626, "learning_rate": 0.00034406162464985994, "loss": 0.4215, "step": 23530 }, { "epoch": 13.145810055865923, "grad_norm": 0.4325534403324127, "learning_rate": 0.00034403361344537814, "loss": 0.4284, "step": 23531 }, { "epoch": 13.1463687150838, "grad_norm": 0.8124381899833679, "learning_rate": 0.0003440056022408964, "loss": 0.4473, "step": 23532 }, { "epoch": 13.146927374301676, "grad_norm": 0.35435134172439575, "learning_rate": 0.00034397759103641456, "loss": 0.3891, "step": 23533 }, { "epoch": 13.147486033519552, "grad_norm": 0.4271470308303833, "learning_rate": 0.00034394957983193276, "loss": 0.4484, "step": 23534 }, { "epoch": 13.14804469273743, "grad_norm": 0.5407982468605042, "learning_rate": 0.000343921568627451, "loss": 0.4232, "step": 23535 }, { "epoch": 13.148603351955307, "grad_norm": 0.5933109521865845, "learning_rate": 0.0003438935574229692, "loss": 0.4179, "step": 23536 }, { "epoch": 13.149162011173184, "grad_norm": 0.5191864967346191, "learning_rate": 0.00034386554621848743, "loss": 0.4349, "step": 23537 }, { "epoch": 13.149720670391062, "grad_norm": 1.0307084321975708, "learning_rate": 0.0003438375350140056, "loss": 0.6456, "step": 23538 }, { "epoch": 13.150279329608939, "grad_norm": 0.31632161140441895, "learning_rate": 0.0003438095238095238, "loss": 0.3445, "step": 23539 }, { "epoch": 13.150837988826815, "grad_norm": 0.36762794852256775, "learning_rate": 0.00034378151260504205, "loss": 0.3837, "step": 23540 }, { "epoch": 13.151396648044694, "grad_norm": 0.454878568649292, "learning_rate": 0.0003437535014005602, "loss": 0.4148, "step": 23541 }, { "epoch": 13.15195530726257, "grad_norm": 0.7697305679321289, "learning_rate": 0.00034372549019607846, "loss": 0.5602, "step": 23542 }, { "epoch": 13.152513966480447, "grad_norm": 0.44395360350608826, "learning_rate": 0.00034369747899159667, "loss": 0.3639, "step": 23543 }, { "epoch": 13.153072625698323, "grad_norm": 0.47492894530296326, "learning_rate": 0.0003436694677871148, "loss": 0.4208, "step": 23544 }, { "epoch": 13.153631284916202, "grad_norm": 0.36974403262138367, "learning_rate": 0.0003436414565826331, "loss": 0.3543, "step": 23545 }, { "epoch": 13.154189944134078, "grad_norm": 0.7163898944854736, "learning_rate": 0.00034361344537815123, "loss": 0.5361, "step": 23546 }, { "epoch": 13.154748603351955, "grad_norm": 0.7160236835479736, "learning_rate": 0.0003435854341736695, "loss": 0.4204, "step": 23547 }, { "epoch": 13.155307262569833, "grad_norm": 0.4247840940952301, "learning_rate": 0.0003435574229691877, "loss": 0.3608, "step": 23548 }, { "epoch": 13.15586592178771, "grad_norm": 0.3794199824333191, "learning_rate": 0.00034352941176470585, "loss": 0.3944, "step": 23549 }, { "epoch": 13.156424581005586, "grad_norm": 0.6485399603843689, "learning_rate": 0.0003435014005602241, "loss": 0.4774, "step": 23550 }, { "epoch": 13.156983240223465, "grad_norm": 0.4471887946128845, "learning_rate": 0.0003434733893557423, "loss": 0.38, "step": 23551 }, { "epoch": 13.157541899441341, "grad_norm": 0.8795566558837891, "learning_rate": 0.0003434453781512605, "loss": 0.4458, "step": 23552 }, { "epoch": 13.158100558659218, "grad_norm": 0.4335973560810089, "learning_rate": 0.00034341736694677873, "loss": 0.4366, "step": 23553 }, { "epoch": 13.158659217877094, "grad_norm": 1.1662137508392334, "learning_rate": 0.0003433893557422969, "loss": 0.5791, "step": 23554 }, { "epoch": 13.159217877094973, "grad_norm": 0.7169398069381714, "learning_rate": 0.00034336134453781514, "loss": 0.3939, "step": 23555 }, { "epoch": 13.15977653631285, "grad_norm": 2.2903497219085693, "learning_rate": 0.00034333333333333335, "loss": 0.7638, "step": 23556 }, { "epoch": 13.160335195530726, "grad_norm": 0.46845901012420654, "learning_rate": 0.00034330532212885155, "loss": 0.4014, "step": 23557 }, { "epoch": 13.160893854748604, "grad_norm": 0.5294485092163086, "learning_rate": 0.00034327731092436976, "loss": 0.3926, "step": 23558 }, { "epoch": 13.16145251396648, "grad_norm": 0.6986362338066101, "learning_rate": 0.00034324929971988797, "loss": 0.4263, "step": 23559 }, { "epoch": 13.162011173184357, "grad_norm": 0.7757440805435181, "learning_rate": 0.00034322128851540617, "loss": 0.4805, "step": 23560 }, { "epoch": 13.162569832402234, "grad_norm": 0.6362632513046265, "learning_rate": 0.0003431932773109244, "loss": 0.5545, "step": 23561 }, { "epoch": 13.163128491620112, "grad_norm": 0.5044678449630737, "learning_rate": 0.0003431652661064426, "loss": 0.3777, "step": 23562 }, { "epoch": 13.163687150837989, "grad_norm": 0.49659910798072815, "learning_rate": 0.0003431372549019608, "loss": 0.3936, "step": 23563 }, { "epoch": 13.164245810055865, "grad_norm": 0.6571908593177795, "learning_rate": 0.000343109243697479, "loss": 0.5951, "step": 23564 }, { "epoch": 13.164804469273744, "grad_norm": 0.972420334815979, "learning_rate": 0.0003430812324929972, "loss": 0.4665, "step": 23565 }, { "epoch": 13.16536312849162, "grad_norm": 0.5162473917007446, "learning_rate": 0.0003430532212885154, "loss": 0.387, "step": 23566 }, { "epoch": 13.165921787709497, "grad_norm": 0.5060432553291321, "learning_rate": 0.00034302521008403367, "loss": 0.3819, "step": 23567 }, { "epoch": 13.166480446927375, "grad_norm": 3.235938787460327, "learning_rate": 0.0003429971988795518, "loss": 0.5093, "step": 23568 }, { "epoch": 13.167039106145252, "grad_norm": 0.40038686990737915, "learning_rate": 0.00034296918767507, "loss": 0.3489, "step": 23569 }, { "epoch": 13.167597765363128, "grad_norm": 0.4324215054512024, "learning_rate": 0.00034294117647058823, "loss": 0.3479, "step": 23570 }, { "epoch": 13.168156424581005, "grad_norm": 0.3749639689922333, "learning_rate": 0.00034291316526610644, "loss": 0.3425, "step": 23571 }, { "epoch": 13.168715083798883, "grad_norm": 0.42007380723953247, "learning_rate": 0.0003428851540616247, "loss": 0.4433, "step": 23572 }, { "epoch": 13.16927374301676, "grad_norm": 3.701608657836914, "learning_rate": 0.00034285714285714285, "loss": 0.6659, "step": 23573 }, { "epoch": 13.169832402234636, "grad_norm": 0.6986651420593262, "learning_rate": 0.00034282913165266106, "loss": 0.5085, "step": 23574 }, { "epoch": 13.170391061452515, "grad_norm": 0.5721229910850525, "learning_rate": 0.0003428011204481793, "loss": 0.4654, "step": 23575 }, { "epoch": 13.170949720670391, "grad_norm": 0.5399644374847412, "learning_rate": 0.00034277310924369747, "loss": 0.3784, "step": 23576 }, { "epoch": 13.171508379888268, "grad_norm": 0.5080203413963318, "learning_rate": 0.00034274509803921573, "loss": 0.5532, "step": 23577 }, { "epoch": 13.172067039106146, "grad_norm": 0.5806756615638733, "learning_rate": 0.0003427170868347339, "loss": 0.4165, "step": 23578 }, { "epoch": 13.172625698324023, "grad_norm": 0.6362572312355042, "learning_rate": 0.0003426890756302521, "loss": 0.5603, "step": 23579 }, { "epoch": 13.1731843575419, "grad_norm": 0.3594598174095154, "learning_rate": 0.00034266106442577035, "loss": 0.4381, "step": 23580 }, { "epoch": 13.173743016759776, "grad_norm": 0.863064706325531, "learning_rate": 0.0003426330532212885, "loss": 0.5102, "step": 23581 }, { "epoch": 13.174301675977654, "grad_norm": 36.3951530456543, "learning_rate": 0.00034260504201680676, "loss": 0.3892, "step": 23582 }, { "epoch": 13.17486033519553, "grad_norm": 0.6443665623664856, "learning_rate": 0.00034257703081232496, "loss": 0.3876, "step": 23583 }, { "epoch": 13.175418994413407, "grad_norm": 0.4082416594028473, "learning_rate": 0.0003425490196078431, "loss": 0.2843, "step": 23584 }, { "epoch": 13.175977653631286, "grad_norm": 0.4457329511642456, "learning_rate": 0.0003425210084033614, "loss": 0.3766, "step": 23585 }, { "epoch": 13.176536312849162, "grad_norm": 0.370343953371048, "learning_rate": 0.00034249299719887953, "loss": 0.4197, "step": 23586 }, { "epoch": 13.177094972067039, "grad_norm": 0.3777053952217102, "learning_rate": 0.0003424649859943978, "loss": 0.4416, "step": 23587 }, { "epoch": 13.177653631284917, "grad_norm": 0.454223096370697, "learning_rate": 0.000342436974789916, "loss": 0.3687, "step": 23588 }, { "epoch": 13.178212290502794, "grad_norm": 0.4108923673629761, "learning_rate": 0.00034240896358543415, "loss": 0.379, "step": 23589 }, { "epoch": 13.17877094972067, "grad_norm": 0.5979317426681519, "learning_rate": 0.0003423809523809524, "loss": 0.4423, "step": 23590 }, { "epoch": 13.179329608938547, "grad_norm": 0.867979884147644, "learning_rate": 0.0003423529411764706, "loss": 0.4397, "step": 23591 }, { "epoch": 13.179888268156425, "grad_norm": 0.5001507997512817, "learning_rate": 0.0003423249299719888, "loss": 0.4925, "step": 23592 }, { "epoch": 13.180446927374302, "grad_norm": 0.5346710085868835, "learning_rate": 0.000342296918767507, "loss": 0.4079, "step": 23593 }, { "epoch": 13.181005586592178, "grad_norm": 0.5234153866767883, "learning_rate": 0.0003422689075630252, "loss": 0.362, "step": 23594 }, { "epoch": 13.181564245810057, "grad_norm": 0.6689703464508057, "learning_rate": 0.00034224089635854344, "loss": 0.43, "step": 23595 }, { "epoch": 13.182122905027933, "grad_norm": 0.4779629111289978, "learning_rate": 0.00034221288515406164, "loss": 0.4067, "step": 23596 }, { "epoch": 13.18268156424581, "grad_norm": 0.6178259253501892, "learning_rate": 0.0003421848739495798, "loss": 0.4587, "step": 23597 }, { "epoch": 13.183240223463686, "grad_norm": 0.42503488063812256, "learning_rate": 0.00034215686274509805, "loss": 0.3733, "step": 23598 }, { "epoch": 13.183798882681565, "grad_norm": 1.5793695449829102, "learning_rate": 0.00034212885154061626, "loss": 0.4202, "step": 23599 }, { "epoch": 13.184357541899441, "grad_norm": 0.45896288752555847, "learning_rate": 0.00034210084033613447, "loss": 0.6113, "step": 23600 }, { "epoch": 13.184916201117318, "grad_norm": 0.46592289209365845, "learning_rate": 0.00034207282913165267, "loss": 0.4403, "step": 23601 }, { "epoch": 13.185474860335196, "grad_norm": 0.49267512559890747, "learning_rate": 0.0003420448179271708, "loss": 0.4694, "step": 23602 }, { "epoch": 13.186033519553073, "grad_norm": 0.35972535610198975, "learning_rate": 0.0003420168067226891, "loss": 0.3231, "step": 23603 }, { "epoch": 13.18659217877095, "grad_norm": 0.4554131031036377, "learning_rate": 0.0003419887955182073, "loss": 0.4257, "step": 23604 }, { "epoch": 13.187150837988828, "grad_norm": 0.3862420916557312, "learning_rate": 0.0003419607843137255, "loss": 0.4125, "step": 23605 }, { "epoch": 13.187709497206704, "grad_norm": 0.5158444046974182, "learning_rate": 0.0003419327731092437, "loss": 0.4389, "step": 23606 }, { "epoch": 13.18826815642458, "grad_norm": 0.683224081993103, "learning_rate": 0.0003419047619047619, "loss": 0.4378, "step": 23607 }, { "epoch": 13.188826815642457, "grad_norm": 1.318318247795105, "learning_rate": 0.0003418767507002801, "loss": 0.3757, "step": 23608 }, { "epoch": 13.189385474860336, "grad_norm": 1.172843337059021, "learning_rate": 0.0003418487394957983, "loss": 0.3676, "step": 23609 }, { "epoch": 13.189944134078212, "grad_norm": 0.3646121621131897, "learning_rate": 0.0003418207282913165, "loss": 0.3366, "step": 23610 }, { "epoch": 13.190502793296089, "grad_norm": 0.5097604393959045, "learning_rate": 0.00034179271708683473, "loss": 0.433, "step": 23611 }, { "epoch": 13.191061452513967, "grad_norm": 1.0712482929229736, "learning_rate": 0.00034176470588235294, "loss": 0.4321, "step": 23612 }, { "epoch": 13.191620111731844, "grad_norm": 0.6039049625396729, "learning_rate": 0.00034173669467787114, "loss": 0.4369, "step": 23613 }, { "epoch": 13.19217877094972, "grad_norm": 0.6225924491882324, "learning_rate": 0.00034170868347338935, "loss": 0.449, "step": 23614 }, { "epoch": 13.192737430167599, "grad_norm": 0.44095805287361145, "learning_rate": 0.0003416806722689076, "loss": 0.2924, "step": 23615 }, { "epoch": 13.193296089385475, "grad_norm": 0.5484685301780701, "learning_rate": 0.00034165266106442576, "loss": 0.4602, "step": 23616 }, { "epoch": 13.193854748603352, "grad_norm": 0.37980830669403076, "learning_rate": 0.00034162464985994397, "loss": 0.4425, "step": 23617 }, { "epoch": 13.194413407821228, "grad_norm": 0.3541870415210724, "learning_rate": 0.0003415966386554622, "loss": 0.388, "step": 23618 }, { "epoch": 13.194972067039107, "grad_norm": 2.8225080966949463, "learning_rate": 0.0003415686274509804, "loss": 0.423, "step": 23619 }, { "epoch": 13.195530726256983, "grad_norm": 0.4947691857814789, "learning_rate": 0.00034154061624649864, "loss": 0.4027, "step": 23620 }, { "epoch": 13.19608938547486, "grad_norm": 0.5435184836387634, "learning_rate": 0.0003415126050420168, "loss": 0.3603, "step": 23621 }, { "epoch": 13.196648044692738, "grad_norm": 0.5195969343185425, "learning_rate": 0.000341484593837535, "loss": 0.397, "step": 23622 }, { "epoch": 13.197206703910615, "grad_norm": 0.3950599431991577, "learning_rate": 0.00034145658263305326, "loss": 0.4146, "step": 23623 }, { "epoch": 13.197765363128491, "grad_norm": 0.5738654732704163, "learning_rate": 0.0003414285714285714, "loss": 0.3531, "step": 23624 }, { "epoch": 13.19832402234637, "grad_norm": 0.593899667263031, "learning_rate": 0.00034140056022408967, "loss": 0.4783, "step": 23625 }, { "epoch": 13.198882681564246, "grad_norm": 0.4480629861354828, "learning_rate": 0.0003413725490196078, "loss": 0.469, "step": 23626 }, { "epoch": 13.199441340782123, "grad_norm": 1.3859435319900513, "learning_rate": 0.00034134453781512603, "loss": 0.4182, "step": 23627 }, { "epoch": 13.2, "grad_norm": 0.7241740822792053, "learning_rate": 0.0003413165266106443, "loss": 0.4453, "step": 23628 }, { "epoch": 13.200558659217878, "grad_norm": 0.4517875909805298, "learning_rate": 0.00034128851540616244, "loss": 0.5206, "step": 23629 }, { "epoch": 13.201117318435754, "grad_norm": 0.37582507729530334, "learning_rate": 0.0003412605042016807, "loss": 0.4036, "step": 23630 }, { "epoch": 13.20167597765363, "grad_norm": 0.5926438570022583, "learning_rate": 0.0003412324929971989, "loss": 0.4185, "step": 23631 }, { "epoch": 13.202234636871509, "grad_norm": 0.4149555265903473, "learning_rate": 0.00034120448179271706, "loss": 0.3489, "step": 23632 }, { "epoch": 13.202793296089386, "grad_norm": 0.8887465000152588, "learning_rate": 0.0003411764705882353, "loss": 0.4132, "step": 23633 }, { "epoch": 13.203351955307262, "grad_norm": 2.309170961380005, "learning_rate": 0.00034114845938375347, "loss": 0.4776, "step": 23634 }, { "epoch": 13.203910614525139, "grad_norm": 0.663011908531189, "learning_rate": 0.00034112044817927173, "loss": 0.4734, "step": 23635 }, { "epoch": 13.204469273743017, "grad_norm": 0.8255983591079712, "learning_rate": 0.00034109243697478994, "loss": 0.4683, "step": 23636 }, { "epoch": 13.205027932960894, "grad_norm": 0.48494043946266174, "learning_rate": 0.0003410644257703081, "loss": 0.4544, "step": 23637 }, { "epoch": 13.20558659217877, "grad_norm": 0.544628381729126, "learning_rate": 0.00034103641456582635, "loss": 0.4623, "step": 23638 }, { "epoch": 13.206145251396649, "grad_norm": 0.6140303611755371, "learning_rate": 0.00034100840336134455, "loss": 0.471, "step": 23639 }, { "epoch": 13.206703910614525, "grad_norm": 0.3705027401447296, "learning_rate": 0.00034098039215686276, "loss": 0.4034, "step": 23640 }, { "epoch": 13.207262569832402, "grad_norm": 0.39516913890838623, "learning_rate": 0.00034095238095238097, "loss": 0.4429, "step": 23641 }, { "epoch": 13.20782122905028, "grad_norm": 0.4192878007888794, "learning_rate": 0.0003409243697478991, "loss": 0.4398, "step": 23642 }, { "epoch": 13.208379888268157, "grad_norm": 0.7160155177116394, "learning_rate": 0.0003408963585434174, "loss": 0.4594, "step": 23643 }, { "epoch": 13.208938547486033, "grad_norm": 1.7860888242721558, "learning_rate": 0.0003408683473389356, "loss": 0.3091, "step": 23644 }, { "epoch": 13.20949720670391, "grad_norm": 0.7349260449409485, "learning_rate": 0.0003408403361344538, "loss": 0.496, "step": 23645 }, { "epoch": 13.210055865921788, "grad_norm": 0.6736895442008972, "learning_rate": 0.000340812324929972, "loss": 0.4373, "step": 23646 }, { "epoch": 13.210614525139665, "grad_norm": 0.7749654054641724, "learning_rate": 0.0003407843137254902, "loss": 0.5233, "step": 23647 }, { "epoch": 13.211173184357541, "grad_norm": 1.0293306112289429, "learning_rate": 0.0003407563025210084, "loss": 0.4343, "step": 23648 }, { "epoch": 13.21173184357542, "grad_norm": 0.5538996458053589, "learning_rate": 0.0003407282913165266, "loss": 0.3533, "step": 23649 }, { "epoch": 13.212290502793296, "grad_norm": 0.3994947075843811, "learning_rate": 0.0003407002801120448, "loss": 0.4113, "step": 23650 }, { "epoch": 13.212849162011173, "grad_norm": 0.44770339131355286, "learning_rate": 0.000340672268907563, "loss": 0.3631, "step": 23651 }, { "epoch": 13.213407821229051, "grad_norm": 0.4695807695388794, "learning_rate": 0.00034064425770308123, "loss": 0.4945, "step": 23652 }, { "epoch": 13.213966480446928, "grad_norm": 0.8152663111686707, "learning_rate": 0.00034061624649859944, "loss": 0.3494, "step": 23653 }, { "epoch": 13.214525139664804, "grad_norm": 0.7754992842674255, "learning_rate": 0.00034058823529411764, "loss": 0.4156, "step": 23654 }, { "epoch": 13.21508379888268, "grad_norm": 1.0545365810394287, "learning_rate": 0.0003405602240896359, "loss": 0.4503, "step": 23655 }, { "epoch": 13.21564245810056, "grad_norm": 0.3590008020401001, "learning_rate": 0.00034053221288515406, "loss": 0.3142, "step": 23656 }, { "epoch": 13.216201117318436, "grad_norm": 0.6116838455200195, "learning_rate": 0.00034050420168067226, "loss": 0.5446, "step": 23657 }, { "epoch": 13.216759776536312, "grad_norm": 0.5947260856628418, "learning_rate": 0.00034047619047619047, "loss": 0.3706, "step": 23658 }, { "epoch": 13.21731843575419, "grad_norm": 0.43871936202049255, "learning_rate": 0.0003404481792717087, "loss": 0.3462, "step": 23659 }, { "epoch": 13.217877094972067, "grad_norm": 0.47355180978775024, "learning_rate": 0.00034042016806722693, "loss": 0.4153, "step": 23660 }, { "epoch": 13.218435754189944, "grad_norm": 0.5599240660667419, "learning_rate": 0.0003403921568627451, "loss": 0.4207, "step": 23661 }, { "epoch": 13.21899441340782, "grad_norm": 2.1302199363708496, "learning_rate": 0.0003403641456582633, "loss": 0.4603, "step": 23662 }, { "epoch": 13.219553072625699, "grad_norm": 1.1983551979064941, "learning_rate": 0.00034033613445378155, "loss": 0.4666, "step": 23663 }, { "epoch": 13.220111731843575, "grad_norm": 2.491521120071411, "learning_rate": 0.0003403081232492997, "loss": 0.7406, "step": 23664 }, { "epoch": 13.220670391061452, "grad_norm": 0.5156349539756775, "learning_rate": 0.00034028011204481796, "loss": 0.6736, "step": 23665 }, { "epoch": 13.22122905027933, "grad_norm": 1.0162335634231567, "learning_rate": 0.0003402521008403361, "loss": 0.4549, "step": 23666 }, { "epoch": 13.221787709497207, "grad_norm": 2.56878662109375, "learning_rate": 0.0003402240896358543, "loss": 0.3905, "step": 23667 }, { "epoch": 13.222346368715083, "grad_norm": 0.45578983426094055, "learning_rate": 0.0003401960784313726, "loss": 0.4045, "step": 23668 }, { "epoch": 13.222905027932962, "grad_norm": 0.4824478030204773, "learning_rate": 0.00034016806722689073, "loss": 0.3391, "step": 23669 }, { "epoch": 13.223463687150838, "grad_norm": 2.444816827774048, "learning_rate": 0.000340140056022409, "loss": 0.3401, "step": 23670 }, { "epoch": 13.224022346368715, "grad_norm": 0.438673198223114, "learning_rate": 0.0003401120448179272, "loss": 0.3308, "step": 23671 }, { "epoch": 13.224581005586591, "grad_norm": 0.6557748317718506, "learning_rate": 0.00034008403361344535, "loss": 0.4582, "step": 23672 }, { "epoch": 13.22513966480447, "grad_norm": 0.43666499853134155, "learning_rate": 0.0003400560224089636, "loss": 0.4359, "step": 23673 }, { "epoch": 13.225698324022346, "grad_norm": 0.4532405138015747, "learning_rate": 0.00034002801120448176, "loss": 0.4513, "step": 23674 }, { "epoch": 13.226256983240223, "grad_norm": 0.5603696703910828, "learning_rate": 0.00034, "loss": 0.5223, "step": 23675 }, { "epoch": 13.226815642458101, "grad_norm": 0.3744250535964966, "learning_rate": 0.00033997198879551823, "loss": 0.3511, "step": 23676 }, { "epoch": 13.227374301675978, "grad_norm": 0.4103121757507324, "learning_rate": 0.0003399439775910364, "loss": 0.458, "step": 23677 }, { "epoch": 13.227932960893854, "grad_norm": 3.3088722229003906, "learning_rate": 0.00033991596638655464, "loss": 0.5882, "step": 23678 }, { "epoch": 13.228491620111733, "grad_norm": 0.3987123668193817, "learning_rate": 0.00033988795518207285, "loss": 0.3491, "step": 23679 }, { "epoch": 13.22905027932961, "grad_norm": 0.8478341698646545, "learning_rate": 0.00033985994397759105, "loss": 0.4476, "step": 23680 }, { "epoch": 13.229608938547486, "grad_norm": 0.8431574106216431, "learning_rate": 0.00033983193277310926, "loss": 0.4342, "step": 23681 }, { "epoch": 13.230167597765362, "grad_norm": 0.41380441188812256, "learning_rate": 0.0003398039215686274, "loss": 0.4658, "step": 23682 }, { "epoch": 13.23072625698324, "grad_norm": 1.3141186237335205, "learning_rate": 0.00033977591036414567, "loss": 0.379, "step": 23683 }, { "epoch": 13.231284916201117, "grad_norm": 4.500753402709961, "learning_rate": 0.0003397478991596639, "loss": 0.429, "step": 23684 }, { "epoch": 13.231843575418994, "grad_norm": 0.41592085361480713, "learning_rate": 0.0003397198879551821, "loss": 0.3795, "step": 23685 }, { "epoch": 13.232402234636872, "grad_norm": 0.5778118371963501, "learning_rate": 0.0003396918767507003, "loss": 0.5639, "step": 23686 }, { "epoch": 13.232960893854749, "grad_norm": 0.7451493144035339, "learning_rate": 0.0003396638655462185, "loss": 0.3903, "step": 23687 }, { "epoch": 13.233519553072625, "grad_norm": 0.8031677603721619, "learning_rate": 0.0003396358543417367, "loss": 0.499, "step": 23688 }, { "epoch": 13.234078212290504, "grad_norm": 0.3959466218948364, "learning_rate": 0.0003396078431372549, "loss": 0.3404, "step": 23689 }, { "epoch": 13.23463687150838, "grad_norm": 0.4078703224658966, "learning_rate": 0.00033957983193277317, "loss": 0.4294, "step": 23690 }, { "epoch": 13.235195530726257, "grad_norm": 0.49590003490448, "learning_rate": 0.0003395518207282913, "loss": 0.3412, "step": 23691 }, { "epoch": 13.235754189944133, "grad_norm": 0.36874130368232727, "learning_rate": 0.0003395238095238095, "loss": 0.3443, "step": 23692 }, { "epoch": 13.236312849162012, "grad_norm": 0.5423593521118164, "learning_rate": 0.00033949579831932773, "loss": 0.4818, "step": 23693 }, { "epoch": 13.236871508379888, "grad_norm": 0.4223531186580658, "learning_rate": 0.00033946778711484594, "loss": 0.325, "step": 23694 }, { "epoch": 13.237430167597765, "grad_norm": 0.4977760910987854, "learning_rate": 0.0003394397759103642, "loss": 0.404, "step": 23695 }, { "epoch": 13.237988826815643, "grad_norm": 2.3316402435302734, "learning_rate": 0.00033941176470588235, "loss": 0.457, "step": 23696 }, { "epoch": 13.23854748603352, "grad_norm": 0.37392234802246094, "learning_rate": 0.00033938375350140056, "loss": 0.4031, "step": 23697 }, { "epoch": 13.239106145251396, "grad_norm": 2.352795362472534, "learning_rate": 0.0003393557422969188, "loss": 0.5183, "step": 23698 }, { "epoch": 13.239664804469275, "grad_norm": 0.5329104661941528, "learning_rate": 0.00033932773109243697, "loss": 0.6079, "step": 23699 }, { "epoch": 13.240223463687151, "grad_norm": 0.3084893822669983, "learning_rate": 0.00033929971988795523, "loss": 0.3625, "step": 23700 }, { "epoch": 13.240782122905028, "grad_norm": 0.7818188071250916, "learning_rate": 0.0003392717086834734, "loss": 0.396, "step": 23701 }, { "epoch": 13.241340782122904, "grad_norm": 0.3785753548145294, "learning_rate": 0.0003392436974789916, "loss": 0.38, "step": 23702 }, { "epoch": 13.241899441340783, "grad_norm": 0.4625031054019928, "learning_rate": 0.00033921568627450985, "loss": 0.5357, "step": 23703 }, { "epoch": 13.24245810055866, "grad_norm": 0.39104342460632324, "learning_rate": 0.000339187675070028, "loss": 0.3973, "step": 23704 }, { "epoch": 13.243016759776536, "grad_norm": 0.4392891228199005, "learning_rate": 0.00033915966386554626, "loss": 0.4813, "step": 23705 }, { "epoch": 13.243575418994414, "grad_norm": 0.6085929870605469, "learning_rate": 0.00033913165266106446, "loss": 0.3963, "step": 23706 }, { "epoch": 13.24413407821229, "grad_norm": 0.380363792181015, "learning_rate": 0.0003391036414565826, "loss": 0.3603, "step": 23707 }, { "epoch": 13.244692737430167, "grad_norm": 0.5867030620574951, "learning_rate": 0.0003390756302521009, "loss": 0.4762, "step": 23708 }, { "epoch": 13.245251396648044, "grad_norm": 0.6263246536254883, "learning_rate": 0.00033904761904761903, "loss": 0.4342, "step": 23709 }, { "epoch": 13.245810055865922, "grad_norm": 0.4621928334236145, "learning_rate": 0.00033901960784313723, "loss": 0.4349, "step": 23710 }, { "epoch": 13.246368715083799, "grad_norm": 0.523230791091919, "learning_rate": 0.0003389915966386555, "loss": 0.4075, "step": 23711 }, { "epoch": 13.246927374301675, "grad_norm": 0.40113189816474915, "learning_rate": 0.00033896358543417365, "loss": 0.4657, "step": 23712 }, { "epoch": 13.247486033519554, "grad_norm": 0.946519672870636, "learning_rate": 0.0003389355742296919, "loss": 0.3781, "step": 23713 }, { "epoch": 13.24804469273743, "grad_norm": 0.39682650566101074, "learning_rate": 0.0003389075630252101, "loss": 0.3894, "step": 23714 }, { "epoch": 13.248603351955307, "grad_norm": 0.4995368421077728, "learning_rate": 0.00033887955182072826, "loss": 0.3952, "step": 23715 }, { "epoch": 13.249162011173185, "grad_norm": 0.47496095299720764, "learning_rate": 0.0003388515406162465, "loss": 0.4611, "step": 23716 }, { "epoch": 13.249720670391062, "grad_norm": 0.32721957564353943, "learning_rate": 0.0003388235294117647, "loss": 0.3812, "step": 23717 }, { "epoch": 13.250279329608938, "grad_norm": 0.5261251926422119, "learning_rate": 0.00033879551820728294, "loss": 0.4817, "step": 23718 }, { "epoch": 13.250837988826815, "grad_norm": 0.5836412906646729, "learning_rate": 0.00033876750700280114, "loss": 0.6486, "step": 23719 }, { "epoch": 13.251396648044693, "grad_norm": 0.6512834429740906, "learning_rate": 0.0003387394957983193, "loss": 0.5161, "step": 23720 }, { "epoch": 13.25195530726257, "grad_norm": 0.4200536608695984, "learning_rate": 0.00033871148459383755, "loss": 0.4173, "step": 23721 }, { "epoch": 13.252513966480446, "grad_norm": 0.41412511467933655, "learning_rate": 0.00033868347338935576, "loss": 0.3841, "step": 23722 }, { "epoch": 13.253072625698325, "grad_norm": 0.9450453519821167, "learning_rate": 0.00033865546218487397, "loss": 0.3596, "step": 23723 }, { "epoch": 13.253631284916201, "grad_norm": 0.6311314105987549, "learning_rate": 0.00033862745098039217, "loss": 0.4675, "step": 23724 }, { "epoch": 13.254189944134078, "grad_norm": 1.0011779069900513, "learning_rate": 0.0003385994397759103, "loss": 0.4963, "step": 23725 }, { "epoch": 13.254748603351956, "grad_norm": 0.7265097498893738, "learning_rate": 0.0003385714285714286, "loss": 0.3648, "step": 23726 }, { "epoch": 13.255307262569833, "grad_norm": 0.5350236892700195, "learning_rate": 0.0003385434173669468, "loss": 0.5527, "step": 23727 }, { "epoch": 13.25586592178771, "grad_norm": 0.3682211935520172, "learning_rate": 0.000338515406162465, "loss": 0.4746, "step": 23728 }, { "epoch": 13.256424581005586, "grad_norm": 0.3880104720592499, "learning_rate": 0.0003384873949579832, "loss": 0.546, "step": 23729 }, { "epoch": 13.256983240223464, "grad_norm": 0.5494697093963623, "learning_rate": 0.0003384593837535014, "loss": 0.5341, "step": 23730 }, { "epoch": 13.25754189944134, "grad_norm": 0.5074079036712646, "learning_rate": 0.0003384313725490196, "loss": 0.4142, "step": 23731 }, { "epoch": 13.258100558659217, "grad_norm": 0.582423210144043, "learning_rate": 0.0003384033613445378, "loss": 0.5867, "step": 23732 }, { "epoch": 13.258659217877096, "grad_norm": 0.9407914876937866, "learning_rate": 0.000338375350140056, "loss": 0.3889, "step": 23733 }, { "epoch": 13.259217877094972, "grad_norm": 0.46638309955596924, "learning_rate": 0.00033834733893557423, "loss": 0.3626, "step": 23734 }, { "epoch": 13.259776536312849, "grad_norm": 0.47919225692749023, "learning_rate": 0.00033831932773109244, "loss": 0.6177, "step": 23735 }, { "epoch": 13.260335195530725, "grad_norm": 0.4282124638557434, "learning_rate": 0.00033829131652661064, "loss": 0.4576, "step": 23736 }, { "epoch": 13.260893854748604, "grad_norm": 0.6756613254547119, "learning_rate": 0.00033826330532212885, "loss": 0.4171, "step": 23737 }, { "epoch": 13.26145251396648, "grad_norm": 0.49720460176467896, "learning_rate": 0.0003382352941176471, "loss": 0.3788, "step": 23738 }, { "epoch": 13.262011173184357, "grad_norm": 0.963648796081543, "learning_rate": 0.00033820728291316526, "loss": 0.3591, "step": 23739 }, { "epoch": 13.262569832402235, "grad_norm": 0.5625825524330139, "learning_rate": 0.00033817927170868347, "loss": 0.4253, "step": 23740 }, { "epoch": 13.263128491620112, "grad_norm": 0.6193555593490601, "learning_rate": 0.0003381512605042017, "loss": 0.4243, "step": 23741 }, { "epoch": 13.263687150837988, "grad_norm": 0.48198628425598145, "learning_rate": 0.0003381232492997199, "loss": 0.3295, "step": 23742 }, { "epoch": 13.264245810055867, "grad_norm": 0.3768001198768616, "learning_rate": 0.00033809523809523814, "loss": 0.3951, "step": 23743 }, { "epoch": 13.264804469273743, "grad_norm": 7.285225868225098, "learning_rate": 0.0003380672268907563, "loss": 0.488, "step": 23744 }, { "epoch": 13.26536312849162, "grad_norm": 0.5495749711990356, "learning_rate": 0.0003380392156862745, "loss": 0.4656, "step": 23745 }, { "epoch": 13.265921787709496, "grad_norm": 0.3612942099571228, "learning_rate": 0.00033801120448179276, "loss": 0.4098, "step": 23746 }, { "epoch": 13.266480446927375, "grad_norm": 0.411364883184433, "learning_rate": 0.0003379831932773109, "loss": 0.4022, "step": 23747 }, { "epoch": 13.267039106145251, "grad_norm": 0.565970242023468, "learning_rate": 0.00033795518207282917, "loss": 0.5006, "step": 23748 }, { "epoch": 13.267597765363128, "grad_norm": 0.4536724090576172, "learning_rate": 0.0003379271708683473, "loss": 0.5085, "step": 23749 }, { "epoch": 13.268156424581006, "grad_norm": 0.29209285974502563, "learning_rate": 0.00033789915966386553, "loss": 0.3827, "step": 23750 }, { "epoch": 13.268715083798883, "grad_norm": 0.5238004326820374, "learning_rate": 0.0003378711484593838, "loss": 0.5162, "step": 23751 }, { "epoch": 13.26927374301676, "grad_norm": 0.5797522068023682, "learning_rate": 0.00033784313725490194, "loss": 0.3792, "step": 23752 }, { "epoch": 13.269832402234638, "grad_norm": 3.573162317276001, "learning_rate": 0.0003378151260504202, "loss": 0.4015, "step": 23753 }, { "epoch": 13.270391061452514, "grad_norm": 1.1292129755020142, "learning_rate": 0.0003377871148459384, "loss": 0.3854, "step": 23754 }, { "epoch": 13.27094972067039, "grad_norm": 6.190539836883545, "learning_rate": 0.00033775910364145656, "loss": 0.5266, "step": 23755 }, { "epoch": 13.271508379888267, "grad_norm": 0.41582804918289185, "learning_rate": 0.0003377310924369748, "loss": 0.2796, "step": 23756 }, { "epoch": 13.272067039106146, "grad_norm": 2.211156129837036, "learning_rate": 0.00033770308123249297, "loss": 0.5429, "step": 23757 }, { "epoch": 13.272625698324022, "grad_norm": 0.7692956924438477, "learning_rate": 0.00033767507002801123, "loss": 0.3791, "step": 23758 }, { "epoch": 13.273184357541899, "grad_norm": 0.4514099955558777, "learning_rate": 0.00033764705882352944, "loss": 0.4525, "step": 23759 }, { "epoch": 13.273743016759777, "grad_norm": 0.3639926612377167, "learning_rate": 0.0003376190476190476, "loss": 0.4728, "step": 23760 }, { "epoch": 13.274301675977654, "grad_norm": 0.5912122130393982, "learning_rate": 0.00033759103641456585, "loss": 0.4062, "step": 23761 }, { "epoch": 13.27486033519553, "grad_norm": 0.9222456216812134, "learning_rate": 0.00033756302521008405, "loss": 0.4132, "step": 23762 }, { "epoch": 13.275418994413409, "grad_norm": 0.2927217185497284, "learning_rate": 0.00033753501400560226, "loss": 0.3699, "step": 23763 }, { "epoch": 13.275977653631285, "grad_norm": 0.7229845523834229, "learning_rate": 0.00033750700280112047, "loss": 0.5858, "step": 23764 }, { "epoch": 13.276536312849162, "grad_norm": 0.8083800673484802, "learning_rate": 0.0003374789915966386, "loss": 0.436, "step": 23765 }, { "epoch": 13.277094972067038, "grad_norm": 0.42317280173301697, "learning_rate": 0.0003374509803921569, "loss": 0.4916, "step": 23766 }, { "epoch": 13.277653631284917, "grad_norm": 0.5788655281066895, "learning_rate": 0.0003374229691876751, "loss": 0.4597, "step": 23767 }, { "epoch": 13.278212290502793, "grad_norm": 0.4634612500667572, "learning_rate": 0.0003373949579831933, "loss": 0.3903, "step": 23768 }, { "epoch": 13.27877094972067, "grad_norm": 0.3420405089855194, "learning_rate": 0.0003373669467787115, "loss": 0.3958, "step": 23769 }, { "epoch": 13.279329608938548, "grad_norm": 0.5979591608047485, "learning_rate": 0.0003373389355742297, "loss": 0.4618, "step": 23770 }, { "epoch": 13.279888268156425, "grad_norm": 0.5105229020118713, "learning_rate": 0.0003373109243697479, "loss": 0.4256, "step": 23771 }, { "epoch": 13.280446927374301, "grad_norm": 0.45555105805397034, "learning_rate": 0.0003372829131652661, "loss": 0.3752, "step": 23772 }, { "epoch": 13.28100558659218, "grad_norm": 0.5491973161697388, "learning_rate": 0.0003372549019607843, "loss": 0.5126, "step": 23773 }, { "epoch": 13.281564245810056, "grad_norm": 0.4162377715110779, "learning_rate": 0.0003372268907563025, "loss": 0.3753, "step": 23774 }, { "epoch": 13.282122905027933, "grad_norm": 0.5493960976600647, "learning_rate": 0.00033719887955182073, "loss": 0.4632, "step": 23775 }, { "epoch": 13.28268156424581, "grad_norm": 0.4823024868965149, "learning_rate": 0.00033717086834733894, "loss": 0.4191, "step": 23776 }, { "epoch": 13.283240223463688, "grad_norm": 0.608932614326477, "learning_rate": 0.00033714285714285714, "loss": 0.5072, "step": 23777 }, { "epoch": 13.283798882681564, "grad_norm": 0.3941212296485901, "learning_rate": 0.0003371148459383754, "loss": 0.4176, "step": 23778 }, { "epoch": 13.28435754189944, "grad_norm": 0.7590417861938477, "learning_rate": 0.00033708683473389356, "loss": 0.5109, "step": 23779 }, { "epoch": 13.28491620111732, "grad_norm": 0.48589059710502625, "learning_rate": 0.00033705882352941176, "loss": 0.5136, "step": 23780 }, { "epoch": 13.285474860335196, "grad_norm": 0.6259783506393433, "learning_rate": 0.00033703081232492997, "loss": 0.4997, "step": 23781 }, { "epoch": 13.286033519553072, "grad_norm": 0.3962301015853882, "learning_rate": 0.0003370028011204482, "loss": 0.3443, "step": 23782 }, { "epoch": 13.286592178770949, "grad_norm": 1.0924508571624756, "learning_rate": 0.00033697478991596643, "loss": 0.3593, "step": 23783 }, { "epoch": 13.287150837988827, "grad_norm": 0.46605604887008667, "learning_rate": 0.0003369467787114846, "loss": 0.4109, "step": 23784 }, { "epoch": 13.287709497206704, "grad_norm": 0.6827029585838318, "learning_rate": 0.0003369187675070028, "loss": 0.3317, "step": 23785 }, { "epoch": 13.28826815642458, "grad_norm": 0.5276693105697632, "learning_rate": 0.00033689075630252105, "loss": 0.3585, "step": 23786 }, { "epoch": 13.288826815642459, "grad_norm": 1.05462646484375, "learning_rate": 0.0003368627450980392, "loss": 0.3805, "step": 23787 }, { "epoch": 13.289385474860335, "grad_norm": 0.34285661578178406, "learning_rate": 0.00033683473389355746, "loss": 0.3501, "step": 23788 }, { "epoch": 13.289944134078212, "grad_norm": 0.5167961716651917, "learning_rate": 0.0003368067226890756, "loss": 0.5222, "step": 23789 }, { "epoch": 13.29050279329609, "grad_norm": 0.611379861831665, "learning_rate": 0.0003367787114845938, "loss": 0.371, "step": 23790 }, { "epoch": 13.291061452513967, "grad_norm": 1.8814785480499268, "learning_rate": 0.0003367507002801121, "loss": 0.4354, "step": 23791 }, { "epoch": 13.291620111731843, "grad_norm": 0.5234327912330627, "learning_rate": 0.00033672268907563023, "loss": 0.3668, "step": 23792 }, { "epoch": 13.29217877094972, "grad_norm": 0.5477795004844666, "learning_rate": 0.0003366946778711485, "loss": 0.627, "step": 23793 }, { "epoch": 13.292737430167598, "grad_norm": 1.0924941301345825, "learning_rate": 0.0003366666666666667, "loss": 0.3692, "step": 23794 }, { "epoch": 13.293296089385475, "grad_norm": 0.8735679984092712, "learning_rate": 0.00033663865546218485, "loss": 0.417, "step": 23795 }, { "epoch": 13.293854748603351, "grad_norm": 0.5475407838821411, "learning_rate": 0.0003366106442577031, "loss": 0.463, "step": 23796 }, { "epoch": 13.29441340782123, "grad_norm": 0.3959656357765198, "learning_rate": 0.00033658263305322126, "loss": 0.3657, "step": 23797 }, { "epoch": 13.294972067039106, "grad_norm": 0.6313350200653076, "learning_rate": 0.0003365546218487395, "loss": 0.437, "step": 23798 }, { "epoch": 13.295530726256983, "grad_norm": 2.0455846786499023, "learning_rate": 0.00033652661064425773, "loss": 0.4531, "step": 23799 }, { "epoch": 13.296089385474861, "grad_norm": 1.8393518924713135, "learning_rate": 0.0003364985994397759, "loss": 0.3803, "step": 23800 }, { "epoch": 13.296648044692738, "grad_norm": 0.45601314306259155, "learning_rate": 0.00033647058823529414, "loss": 0.3257, "step": 23801 }, { "epoch": 13.297206703910614, "grad_norm": 0.5860977172851562, "learning_rate": 0.00033644257703081235, "loss": 0.4253, "step": 23802 }, { "epoch": 13.297765363128491, "grad_norm": 0.5303998589515686, "learning_rate": 0.00033641456582633055, "loss": 0.3477, "step": 23803 }, { "epoch": 13.29832402234637, "grad_norm": 0.4638810455799103, "learning_rate": 0.00033638655462184876, "loss": 0.3804, "step": 23804 }, { "epoch": 13.298882681564246, "grad_norm": 0.6082403063774109, "learning_rate": 0.0003363585434173669, "loss": 0.5631, "step": 23805 }, { "epoch": 13.299441340782122, "grad_norm": 0.41769352555274963, "learning_rate": 0.00033633053221288517, "loss": 0.3649, "step": 23806 }, { "epoch": 13.3, "grad_norm": 0.4883352518081665, "learning_rate": 0.0003363025210084034, "loss": 0.3777, "step": 23807 }, { "epoch": 13.300558659217877, "grad_norm": 0.37725093960762024, "learning_rate": 0.0003362745098039216, "loss": 0.424, "step": 23808 }, { "epoch": 13.301117318435754, "grad_norm": 0.41677770018577576, "learning_rate": 0.0003362464985994398, "loss": 0.3543, "step": 23809 }, { "epoch": 13.30167597765363, "grad_norm": 0.37945252656936646, "learning_rate": 0.000336218487394958, "loss": 0.3537, "step": 23810 }, { "epoch": 13.302234636871509, "grad_norm": 0.5119186639785767, "learning_rate": 0.0003361904761904762, "loss": 0.4028, "step": 23811 }, { "epoch": 13.302793296089385, "grad_norm": 0.49954649806022644, "learning_rate": 0.0003361624649859944, "loss": 0.4695, "step": 23812 }, { "epoch": 13.303351955307262, "grad_norm": 1.107476830482483, "learning_rate": 0.0003361344537815126, "loss": 0.6901, "step": 23813 }, { "epoch": 13.30391061452514, "grad_norm": 1.364524006843567, "learning_rate": 0.0003361064425770308, "loss": 0.4671, "step": 23814 }, { "epoch": 13.304469273743017, "grad_norm": 0.6046756505966187, "learning_rate": 0.000336078431372549, "loss": 0.5035, "step": 23815 }, { "epoch": 13.305027932960893, "grad_norm": 1.9447007179260254, "learning_rate": 0.00033605042016806723, "loss": 0.3759, "step": 23816 }, { "epoch": 13.305586592178772, "grad_norm": 3.609431266784668, "learning_rate": 0.00033602240896358544, "loss": 0.4887, "step": 23817 }, { "epoch": 13.306145251396648, "grad_norm": 0.5841872096061707, "learning_rate": 0.0003359943977591037, "loss": 0.4432, "step": 23818 }, { "epoch": 13.306703910614525, "grad_norm": 0.4049026370048523, "learning_rate": 0.00033596638655462185, "loss": 0.4052, "step": 23819 }, { "epoch": 13.307262569832401, "grad_norm": 0.37242940068244934, "learning_rate": 0.00033593837535014006, "loss": 0.3739, "step": 23820 }, { "epoch": 13.30782122905028, "grad_norm": 0.434740275144577, "learning_rate": 0.00033591036414565826, "loss": 0.4814, "step": 23821 }, { "epoch": 13.308379888268156, "grad_norm": 0.5802294015884399, "learning_rate": 0.00033588235294117647, "loss": 0.5231, "step": 23822 }, { "epoch": 13.308938547486033, "grad_norm": 0.5402165651321411, "learning_rate": 0.0003358543417366947, "loss": 0.5756, "step": 23823 }, { "epoch": 13.309497206703911, "grad_norm": 0.46558958292007446, "learning_rate": 0.0003358263305322129, "loss": 0.4299, "step": 23824 }, { "epoch": 13.310055865921788, "grad_norm": 0.3648386001586914, "learning_rate": 0.0003357983193277311, "loss": 0.3439, "step": 23825 }, { "epoch": 13.310614525139664, "grad_norm": 0.47914642095565796, "learning_rate": 0.00033577030812324935, "loss": 0.3986, "step": 23826 }, { "epoch": 13.311173184357543, "grad_norm": 0.4992138743400574, "learning_rate": 0.0003357422969187675, "loss": 0.4313, "step": 23827 }, { "epoch": 13.31173184357542, "grad_norm": 0.3443225622177124, "learning_rate": 0.0003357142857142857, "loss": 0.3626, "step": 23828 }, { "epoch": 13.312290502793296, "grad_norm": 0.5787002444267273, "learning_rate": 0.0003356862745098039, "loss": 0.3574, "step": 23829 }, { "epoch": 13.312849162011172, "grad_norm": 0.43956124782562256, "learning_rate": 0.0003356582633053221, "loss": 0.5116, "step": 23830 }, { "epoch": 13.31340782122905, "grad_norm": 0.7588776350021362, "learning_rate": 0.0003356302521008404, "loss": 0.3789, "step": 23831 }, { "epoch": 13.313966480446927, "grad_norm": 0.4662046432495117, "learning_rate": 0.00033560224089635853, "loss": 0.4886, "step": 23832 }, { "epoch": 13.314525139664804, "grad_norm": 0.47922998666763306, "learning_rate": 0.00033557422969187673, "loss": 0.4323, "step": 23833 }, { "epoch": 13.315083798882682, "grad_norm": 0.9394093751907349, "learning_rate": 0.000335546218487395, "loss": 0.3502, "step": 23834 }, { "epoch": 13.315642458100559, "grad_norm": 0.35608112812042236, "learning_rate": 0.00033551820728291315, "loss": 0.4018, "step": 23835 }, { "epoch": 13.316201117318435, "grad_norm": 0.3313712775707245, "learning_rate": 0.0003354901960784314, "loss": 0.3623, "step": 23836 }, { "epoch": 13.316759776536314, "grad_norm": 0.44091737270355225, "learning_rate": 0.00033546218487394956, "loss": 0.3845, "step": 23837 }, { "epoch": 13.31731843575419, "grad_norm": 1.1808913946151733, "learning_rate": 0.00033543417366946776, "loss": 0.4446, "step": 23838 }, { "epoch": 13.317877094972067, "grad_norm": 0.6786538362503052, "learning_rate": 0.000335406162464986, "loss": 0.6643, "step": 23839 }, { "epoch": 13.318435754189943, "grad_norm": 1.007463812828064, "learning_rate": 0.0003353781512605042, "loss": 0.4263, "step": 23840 }, { "epoch": 13.318994413407822, "grad_norm": 0.4319334030151367, "learning_rate": 0.00033535014005602244, "loss": 0.3863, "step": 23841 }, { "epoch": 13.319553072625698, "grad_norm": 0.8270582556724548, "learning_rate": 0.00033532212885154064, "loss": 0.5159, "step": 23842 }, { "epoch": 13.320111731843575, "grad_norm": 1.0376777648925781, "learning_rate": 0.0003352941176470588, "loss": 0.4225, "step": 23843 }, { "epoch": 13.320670391061453, "grad_norm": 0.47982004284858704, "learning_rate": 0.00033526610644257705, "loss": 0.3449, "step": 23844 }, { "epoch": 13.32122905027933, "grad_norm": 0.40144264698028564, "learning_rate": 0.0003352380952380952, "loss": 0.3341, "step": 23845 }, { "epoch": 13.321787709497206, "grad_norm": 0.6232517957687378, "learning_rate": 0.00033521008403361347, "loss": 0.3658, "step": 23846 }, { "epoch": 13.322346368715085, "grad_norm": 0.5487639307975769, "learning_rate": 0.00033518207282913167, "loss": 0.4097, "step": 23847 }, { "epoch": 13.322905027932961, "grad_norm": 0.31972163915634155, "learning_rate": 0.0003351540616246498, "loss": 0.3973, "step": 23848 }, { "epoch": 13.323463687150838, "grad_norm": 0.45323118567466736, "learning_rate": 0.0003351260504201681, "loss": 0.5238, "step": 23849 }, { "epoch": 13.324022346368714, "grad_norm": 0.5316075682640076, "learning_rate": 0.0003350980392156863, "loss": 0.4174, "step": 23850 }, { "epoch": 13.324581005586593, "grad_norm": 0.5013684630393982, "learning_rate": 0.0003350700280112045, "loss": 0.4098, "step": 23851 }, { "epoch": 13.32513966480447, "grad_norm": 0.5039564967155457, "learning_rate": 0.0003350420168067227, "loss": 0.3542, "step": 23852 }, { "epoch": 13.325698324022346, "grad_norm": 0.5305153727531433, "learning_rate": 0.00033501400560224085, "loss": 0.4429, "step": 23853 }, { "epoch": 13.326256983240224, "grad_norm": 0.7472844123840332, "learning_rate": 0.0003349859943977591, "loss": 0.3688, "step": 23854 }, { "epoch": 13.3268156424581, "grad_norm": 1.5326067209243774, "learning_rate": 0.0003349579831932773, "loss": 0.4184, "step": 23855 }, { "epoch": 13.327374301675977, "grad_norm": 0.3773473799228668, "learning_rate": 0.0003349299719887955, "loss": 0.3738, "step": 23856 }, { "epoch": 13.327932960893854, "grad_norm": 0.39498671889305115, "learning_rate": 0.00033490196078431373, "loss": 0.4291, "step": 23857 }, { "epoch": 13.328491620111732, "grad_norm": 0.4979417324066162, "learning_rate": 0.00033487394957983194, "loss": 0.4405, "step": 23858 }, { "epoch": 13.329050279329609, "grad_norm": 0.5262969136238098, "learning_rate": 0.00033484593837535014, "loss": 0.5357, "step": 23859 }, { "epoch": 13.329608938547485, "grad_norm": 0.3744189739227295, "learning_rate": 0.00033481792717086835, "loss": 0.3762, "step": 23860 }, { "epoch": 13.330167597765364, "grad_norm": 0.3269403874874115, "learning_rate": 0.00033478991596638656, "loss": 0.3229, "step": 23861 }, { "epoch": 13.33072625698324, "grad_norm": 0.6849920153617859, "learning_rate": 0.00033476190476190476, "loss": 0.5005, "step": 23862 }, { "epoch": 13.331284916201117, "grad_norm": 1.2992875576019287, "learning_rate": 0.00033473389355742297, "loss": 0.4249, "step": 23863 }, { "epoch": 13.331843575418995, "grad_norm": 0.7522691488265991, "learning_rate": 0.0003347058823529412, "loss": 0.485, "step": 23864 }, { "epoch": 13.332402234636872, "grad_norm": 0.5542646050453186, "learning_rate": 0.0003346778711484594, "loss": 0.5863, "step": 23865 }, { "epoch": 13.332960893854748, "grad_norm": 0.4124361574649811, "learning_rate": 0.00033464985994397764, "loss": 0.4196, "step": 23866 }, { "epoch": 13.333519553072625, "grad_norm": 7.529343605041504, "learning_rate": 0.0003346218487394958, "loss": 0.3452, "step": 23867 }, { "epoch": 13.334078212290503, "grad_norm": 0.6973263025283813, "learning_rate": 0.000334593837535014, "loss": 0.3848, "step": 23868 }, { "epoch": 13.33463687150838, "grad_norm": 0.5479353070259094, "learning_rate": 0.0003345658263305322, "loss": 0.3435, "step": 23869 }, { "epoch": 13.335195530726256, "grad_norm": 0.4201735556125641, "learning_rate": 0.0003345378151260504, "loss": 0.403, "step": 23870 }, { "epoch": 13.335754189944135, "grad_norm": 0.3778197169303894, "learning_rate": 0.00033450980392156867, "loss": 0.4235, "step": 23871 }, { "epoch": 13.336312849162011, "grad_norm": 0.43880122900009155, "learning_rate": 0.0003344817927170868, "loss": 0.3849, "step": 23872 }, { "epoch": 13.336871508379888, "grad_norm": 0.38169172406196594, "learning_rate": 0.00033445378151260503, "loss": 0.4012, "step": 23873 }, { "epoch": 13.337430167597766, "grad_norm": 0.46411970257759094, "learning_rate": 0.0003344257703081233, "loss": 0.4198, "step": 23874 }, { "epoch": 13.337988826815643, "grad_norm": 0.6051220893859863, "learning_rate": 0.00033439775910364144, "loss": 0.484, "step": 23875 }, { "epoch": 13.33854748603352, "grad_norm": 0.5200245976448059, "learning_rate": 0.0003343697478991597, "loss": 0.3722, "step": 23876 }, { "epoch": 13.339106145251396, "grad_norm": 0.4520384669303894, "learning_rate": 0.00033434173669467785, "loss": 0.3846, "step": 23877 }, { "epoch": 13.339664804469274, "grad_norm": 0.3697698414325714, "learning_rate": 0.00033431372549019606, "loss": 0.4308, "step": 23878 }, { "epoch": 13.34022346368715, "grad_norm": 0.8896191716194153, "learning_rate": 0.0003342857142857143, "loss": 0.391, "step": 23879 }, { "epoch": 13.340782122905027, "grad_norm": 0.4311777949333191, "learning_rate": 0.00033425770308123247, "loss": 0.3403, "step": 23880 }, { "epoch": 13.341340782122906, "grad_norm": 1.1365503072738647, "learning_rate": 0.00033422969187675073, "loss": 0.5369, "step": 23881 }, { "epoch": 13.341899441340782, "grad_norm": 1.3326804637908936, "learning_rate": 0.00033420168067226894, "loss": 0.3207, "step": 23882 }, { "epoch": 13.342458100558659, "grad_norm": 0.3907860517501831, "learning_rate": 0.0003341736694677871, "loss": 0.4018, "step": 23883 }, { "epoch": 13.343016759776535, "grad_norm": 0.40447160601615906, "learning_rate": 0.00033414565826330535, "loss": 0.4004, "step": 23884 }, { "epoch": 13.343575418994414, "grad_norm": 1.5601035356521606, "learning_rate": 0.0003341176470588235, "loss": 0.4241, "step": 23885 }, { "epoch": 13.34413407821229, "grad_norm": 0.5868250727653503, "learning_rate": 0.00033408963585434176, "loss": 0.379, "step": 23886 }, { "epoch": 13.344692737430167, "grad_norm": 0.4469394385814667, "learning_rate": 0.00033406162464985997, "loss": 0.2715, "step": 23887 }, { "epoch": 13.345251396648045, "grad_norm": 0.4678853452205658, "learning_rate": 0.0003340336134453781, "loss": 0.3468, "step": 23888 }, { "epoch": 13.345810055865922, "grad_norm": 0.7323243618011475, "learning_rate": 0.0003340056022408964, "loss": 0.3656, "step": 23889 }, { "epoch": 13.346368715083798, "grad_norm": 0.6104510426521301, "learning_rate": 0.0003339775910364146, "loss": 0.4768, "step": 23890 }, { "epoch": 13.346927374301677, "grad_norm": 2.0912399291992188, "learning_rate": 0.0003339495798319328, "loss": 0.3526, "step": 23891 }, { "epoch": 13.347486033519553, "grad_norm": 0.4993850886821747, "learning_rate": 0.000333921568627451, "loss": 0.4722, "step": 23892 }, { "epoch": 13.34804469273743, "grad_norm": 0.3902897834777832, "learning_rate": 0.00033389355742296915, "loss": 0.4615, "step": 23893 }, { "epoch": 13.348603351955306, "grad_norm": 0.40381699800491333, "learning_rate": 0.0003338655462184874, "loss": 0.4241, "step": 23894 }, { "epoch": 13.349162011173185, "grad_norm": 0.5576342940330505, "learning_rate": 0.0003338375350140056, "loss": 0.3997, "step": 23895 }, { "epoch": 13.349720670391061, "grad_norm": 0.4372580945491791, "learning_rate": 0.0003338095238095238, "loss": 0.5035, "step": 23896 }, { "epoch": 13.350279329608938, "grad_norm": 0.42239633202552795, "learning_rate": 0.000333781512605042, "loss": 0.3994, "step": 23897 }, { "epoch": 13.350837988826816, "grad_norm": 1.4621309041976929, "learning_rate": 0.00033375350140056023, "loss": 0.4073, "step": 23898 }, { "epoch": 13.351396648044693, "grad_norm": 0.41640138626098633, "learning_rate": 0.00033372549019607844, "loss": 0.4049, "step": 23899 }, { "epoch": 13.35195530726257, "grad_norm": 0.7657338380813599, "learning_rate": 0.00033369747899159664, "loss": 0.39, "step": 23900 }, { "epoch": 13.352513966480448, "grad_norm": 0.7223778367042542, "learning_rate": 0.0003336694677871149, "loss": 0.5628, "step": 23901 }, { "epoch": 13.353072625698324, "grad_norm": 0.48033207654953003, "learning_rate": 0.00033364145658263306, "loss": 0.4704, "step": 23902 }, { "epoch": 13.3536312849162, "grad_norm": 0.4417782723903656, "learning_rate": 0.00033361344537815126, "loss": 0.3768, "step": 23903 }, { "epoch": 13.354189944134077, "grad_norm": 0.5472639203071594, "learning_rate": 0.00033358543417366947, "loss": 0.3224, "step": 23904 }, { "epoch": 13.354748603351956, "grad_norm": 0.4203943610191345, "learning_rate": 0.0003335574229691877, "loss": 0.4523, "step": 23905 }, { "epoch": 13.355307262569832, "grad_norm": 0.40968528389930725, "learning_rate": 0.00033352941176470593, "loss": 0.348, "step": 23906 }, { "epoch": 13.355865921787709, "grad_norm": 2.3902251720428467, "learning_rate": 0.0003335014005602241, "loss": 0.3167, "step": 23907 }, { "epoch": 13.356424581005587, "grad_norm": 5.612401485443115, "learning_rate": 0.0003334733893557423, "loss": 0.4434, "step": 23908 }, { "epoch": 13.356983240223464, "grad_norm": 0.6918250918388367, "learning_rate": 0.00033344537815126055, "loss": 0.477, "step": 23909 }, { "epoch": 13.35754189944134, "grad_norm": 0.42146405577659607, "learning_rate": 0.0003334173669467787, "loss": 0.4734, "step": 23910 }, { "epoch": 13.358100558659217, "grad_norm": 0.6265257000923157, "learning_rate": 0.00033338935574229696, "loss": 0.3636, "step": 23911 }, { "epoch": 13.358659217877095, "grad_norm": 0.5151063799858093, "learning_rate": 0.0003333613445378151, "loss": 0.5935, "step": 23912 }, { "epoch": 13.359217877094972, "grad_norm": 1.145356297492981, "learning_rate": 0.0003333333333333333, "loss": 0.4679, "step": 23913 }, { "epoch": 13.359776536312848, "grad_norm": 0.34669482707977295, "learning_rate": 0.0003333053221288516, "loss": 0.363, "step": 23914 }, { "epoch": 13.360335195530727, "grad_norm": 0.4645282030105591, "learning_rate": 0.00033327731092436973, "loss": 0.4035, "step": 23915 }, { "epoch": 13.360893854748603, "grad_norm": 0.4767138361930847, "learning_rate": 0.000333249299719888, "loss": 0.4715, "step": 23916 }, { "epoch": 13.36145251396648, "grad_norm": 0.5608267784118652, "learning_rate": 0.0003332212885154062, "loss": 0.3601, "step": 23917 }, { "epoch": 13.362011173184358, "grad_norm": 0.5498733520507812, "learning_rate": 0.00033319327731092435, "loss": 0.6437, "step": 23918 }, { "epoch": 13.362569832402235, "grad_norm": 0.5856558084487915, "learning_rate": 0.0003331652661064426, "loss": 0.5159, "step": 23919 }, { "epoch": 13.363128491620111, "grad_norm": 0.6594109535217285, "learning_rate": 0.00033313725490196076, "loss": 0.4361, "step": 23920 }, { "epoch": 13.363687150837988, "grad_norm": 1.2163207530975342, "learning_rate": 0.000333109243697479, "loss": 0.4369, "step": 23921 }, { "epoch": 13.364245810055866, "grad_norm": 0.46256163716316223, "learning_rate": 0.00033308123249299723, "loss": 0.3462, "step": 23922 }, { "epoch": 13.364804469273743, "grad_norm": 0.7978337407112122, "learning_rate": 0.0003330532212885154, "loss": 0.3853, "step": 23923 }, { "epoch": 13.36536312849162, "grad_norm": 0.48576152324676514, "learning_rate": 0.00033302521008403364, "loss": 0.3777, "step": 23924 }, { "epoch": 13.365921787709498, "grad_norm": 0.5249726176261902, "learning_rate": 0.00033299719887955185, "loss": 0.4314, "step": 23925 }, { "epoch": 13.366480446927374, "grad_norm": 0.4241422116756439, "learning_rate": 0.00033296918767507005, "loss": 0.3921, "step": 23926 }, { "epoch": 13.367039106145251, "grad_norm": 0.5657902956008911, "learning_rate": 0.00033294117647058826, "loss": 0.4705, "step": 23927 }, { "epoch": 13.36759776536313, "grad_norm": 0.5743637084960938, "learning_rate": 0.0003329131652661064, "loss": 0.3546, "step": 23928 }, { "epoch": 13.368156424581006, "grad_norm": 0.7750608325004578, "learning_rate": 0.00033288515406162467, "loss": 0.5385, "step": 23929 }, { "epoch": 13.368715083798882, "grad_norm": 0.5233944058418274, "learning_rate": 0.0003328571428571429, "loss": 0.4268, "step": 23930 }, { "epoch": 13.369273743016759, "grad_norm": 0.423409104347229, "learning_rate": 0.0003328291316526611, "loss": 0.3835, "step": 23931 }, { "epoch": 13.369832402234637, "grad_norm": 0.4611133933067322, "learning_rate": 0.0003328011204481793, "loss": 0.4779, "step": 23932 }, { "epoch": 13.370391061452514, "grad_norm": 0.6168695092201233, "learning_rate": 0.0003327731092436975, "loss": 0.4186, "step": 23933 }, { "epoch": 13.37094972067039, "grad_norm": 0.5267783403396606, "learning_rate": 0.0003327450980392157, "loss": 0.3806, "step": 23934 }, { "epoch": 13.371508379888269, "grad_norm": 0.728651225566864, "learning_rate": 0.0003327170868347339, "loss": 0.413, "step": 23935 }, { "epoch": 13.372067039106145, "grad_norm": 0.5308983325958252, "learning_rate": 0.00033268907563025206, "loss": 0.5084, "step": 23936 }, { "epoch": 13.372625698324022, "grad_norm": 0.9741657972335815, "learning_rate": 0.0003326610644257703, "loss": 0.506, "step": 23937 }, { "epoch": 13.3731843575419, "grad_norm": 0.37411120533943176, "learning_rate": 0.0003326330532212885, "loss": 0.3495, "step": 23938 }, { "epoch": 13.373743016759777, "grad_norm": 0.5017676949501038, "learning_rate": 0.00033260504201680673, "loss": 0.37, "step": 23939 }, { "epoch": 13.374301675977653, "grad_norm": 0.8682034611701965, "learning_rate": 0.00033257703081232494, "loss": 0.5199, "step": 23940 }, { "epoch": 13.37486033519553, "grad_norm": 0.5218703150749207, "learning_rate": 0.00033254901960784314, "loss": 0.5387, "step": 23941 }, { "epoch": 13.375418994413408, "grad_norm": 0.5939416289329529, "learning_rate": 0.00033252100840336135, "loss": 0.4932, "step": 23942 }, { "epoch": 13.375977653631285, "grad_norm": 0.6838260293006897, "learning_rate": 0.00033249299719887956, "loss": 0.3823, "step": 23943 }, { "epoch": 13.376536312849161, "grad_norm": 0.4618566632270813, "learning_rate": 0.00033246498599439776, "loss": 0.5363, "step": 23944 }, { "epoch": 13.37709497206704, "grad_norm": 0.6762872934341431, "learning_rate": 0.00033243697478991597, "loss": 0.4902, "step": 23945 }, { "epoch": 13.377653631284916, "grad_norm": 0.39185744524002075, "learning_rate": 0.0003324089635854342, "loss": 0.4039, "step": 23946 }, { "epoch": 13.378212290502793, "grad_norm": 0.4100474417209625, "learning_rate": 0.0003323809523809524, "loss": 0.3251, "step": 23947 }, { "epoch": 13.378770949720671, "grad_norm": 0.4624863564968109, "learning_rate": 0.0003323529411764706, "loss": 0.3983, "step": 23948 }, { "epoch": 13.379329608938548, "grad_norm": 0.9086792469024658, "learning_rate": 0.00033232492997198885, "loss": 0.4873, "step": 23949 }, { "epoch": 13.379888268156424, "grad_norm": 1.9930933713912964, "learning_rate": 0.000332296918767507, "loss": 0.4458, "step": 23950 }, { "epoch": 13.380446927374301, "grad_norm": 0.6225258708000183, "learning_rate": 0.0003322689075630252, "loss": 0.3734, "step": 23951 }, { "epoch": 13.38100558659218, "grad_norm": 0.4544983208179474, "learning_rate": 0.0003322408963585434, "loss": 0.4438, "step": 23952 }, { "epoch": 13.381564245810056, "grad_norm": 0.8222813606262207, "learning_rate": 0.0003322128851540616, "loss": 0.63, "step": 23953 }, { "epoch": 13.382122905027932, "grad_norm": 0.5289430022239685, "learning_rate": 0.0003321848739495799, "loss": 0.3208, "step": 23954 }, { "epoch": 13.38268156424581, "grad_norm": 0.3467426300048828, "learning_rate": 0.00033215686274509803, "loss": 0.3111, "step": 23955 }, { "epoch": 13.383240223463687, "grad_norm": 2.535980463027954, "learning_rate": 0.00033212885154061623, "loss": 0.5181, "step": 23956 }, { "epoch": 13.383798882681564, "grad_norm": 1.7109472751617432, "learning_rate": 0.0003321008403361345, "loss": 0.4645, "step": 23957 }, { "epoch": 13.38435754189944, "grad_norm": 0.6140024065971375, "learning_rate": 0.00033207282913165265, "loss": 0.3611, "step": 23958 }, { "epoch": 13.384916201117319, "grad_norm": 0.39160990715026855, "learning_rate": 0.0003320448179271709, "loss": 0.4454, "step": 23959 }, { "epoch": 13.385474860335195, "grad_norm": 0.7750301957130432, "learning_rate": 0.00033201680672268906, "loss": 0.4169, "step": 23960 }, { "epoch": 13.386033519553072, "grad_norm": 0.8618877530097961, "learning_rate": 0.00033198879551820726, "loss": 0.5585, "step": 23961 }, { "epoch": 13.38659217877095, "grad_norm": 1.4556639194488525, "learning_rate": 0.0003319607843137255, "loss": 0.446, "step": 23962 }, { "epoch": 13.387150837988827, "grad_norm": 0.37511712312698364, "learning_rate": 0.0003319327731092437, "loss": 0.4248, "step": 23963 }, { "epoch": 13.387709497206703, "grad_norm": 0.6858104467391968, "learning_rate": 0.00033190476190476194, "loss": 0.4957, "step": 23964 }, { "epoch": 13.388268156424582, "grad_norm": 0.6047878265380859, "learning_rate": 0.00033187675070028014, "loss": 0.4738, "step": 23965 }, { "epoch": 13.388826815642458, "grad_norm": 0.44325828552246094, "learning_rate": 0.0003318487394957983, "loss": 0.3719, "step": 23966 }, { "epoch": 13.389385474860335, "grad_norm": 0.724241316318512, "learning_rate": 0.00033182072829131655, "loss": 0.3483, "step": 23967 }, { "epoch": 13.389944134078211, "grad_norm": 0.5017548203468323, "learning_rate": 0.0003317927170868347, "loss": 0.4482, "step": 23968 }, { "epoch": 13.39050279329609, "grad_norm": 0.43123143911361694, "learning_rate": 0.00033176470588235297, "loss": 0.4151, "step": 23969 }, { "epoch": 13.391061452513966, "grad_norm": 0.31168562173843384, "learning_rate": 0.00033173669467787117, "loss": 0.297, "step": 23970 }, { "epoch": 13.391620111731843, "grad_norm": 0.49120715260505676, "learning_rate": 0.0003317086834733893, "loss": 0.3888, "step": 23971 }, { "epoch": 13.392178770949721, "grad_norm": 1.9605176448822021, "learning_rate": 0.0003316806722689076, "loss": 0.3904, "step": 23972 }, { "epoch": 13.392737430167598, "grad_norm": 0.5817265510559082, "learning_rate": 0.0003316526610644258, "loss": 0.4741, "step": 23973 }, { "epoch": 13.393296089385474, "grad_norm": 0.42018619179725647, "learning_rate": 0.000331624649859944, "loss": 0.3869, "step": 23974 }, { "epoch": 13.393854748603353, "grad_norm": 0.7574966549873352, "learning_rate": 0.0003315966386554622, "loss": 0.4373, "step": 23975 }, { "epoch": 13.39441340782123, "grad_norm": 0.5552080869674683, "learning_rate": 0.00033156862745098035, "loss": 0.342, "step": 23976 }, { "epoch": 13.394972067039106, "grad_norm": 0.42406103014945984, "learning_rate": 0.0003315406162464986, "loss": 0.4161, "step": 23977 }, { "epoch": 13.395530726256982, "grad_norm": 1.149976134300232, "learning_rate": 0.0003315126050420168, "loss": 0.3485, "step": 23978 }, { "epoch": 13.39608938547486, "grad_norm": 0.43855980038642883, "learning_rate": 0.000331484593837535, "loss": 0.4103, "step": 23979 }, { "epoch": 13.396648044692737, "grad_norm": 0.5216570496559143, "learning_rate": 0.00033145658263305323, "loss": 0.3253, "step": 23980 }, { "epoch": 13.397206703910614, "grad_norm": 0.408236563205719, "learning_rate": 0.00033142857142857144, "loss": 0.3827, "step": 23981 }, { "epoch": 13.397765363128492, "grad_norm": 0.8788466453552246, "learning_rate": 0.00033140056022408964, "loss": 0.4947, "step": 23982 }, { "epoch": 13.398324022346369, "grad_norm": 0.5359235405921936, "learning_rate": 0.00033137254901960785, "loss": 0.3661, "step": 23983 }, { "epoch": 13.398882681564245, "grad_norm": 0.5464350581169128, "learning_rate": 0.00033134453781512606, "loss": 0.4578, "step": 23984 }, { "epoch": 13.399441340782122, "grad_norm": 0.43618351221084595, "learning_rate": 0.00033131652661064426, "loss": 0.3375, "step": 23985 }, { "epoch": 13.4, "grad_norm": 0.4241359531879425, "learning_rate": 0.00033128851540616247, "loss": 0.3676, "step": 23986 }, { "epoch": 13.400558659217877, "grad_norm": 0.6320966482162476, "learning_rate": 0.0003312605042016807, "loss": 0.3961, "step": 23987 }, { "epoch": 13.401117318435753, "grad_norm": 0.479246586561203, "learning_rate": 0.0003312324929971989, "loss": 0.4815, "step": 23988 }, { "epoch": 13.401675977653632, "grad_norm": 0.34296807646751404, "learning_rate": 0.00033120448179271714, "loss": 0.2678, "step": 23989 }, { "epoch": 13.402234636871508, "grad_norm": 0.5537387728691101, "learning_rate": 0.0003311764705882353, "loss": 0.5158, "step": 23990 }, { "epoch": 13.402793296089385, "grad_norm": 0.7673983573913574, "learning_rate": 0.0003311484593837535, "loss": 0.5018, "step": 23991 }, { "epoch": 13.403351955307263, "grad_norm": 0.3897479772567749, "learning_rate": 0.0003311204481792717, "loss": 0.3878, "step": 23992 }, { "epoch": 13.40391061452514, "grad_norm": 1.1340206861495972, "learning_rate": 0.0003310924369747899, "loss": 0.3561, "step": 23993 }, { "epoch": 13.404469273743016, "grad_norm": 0.5545897483825684, "learning_rate": 0.00033106442577030817, "loss": 0.3832, "step": 23994 }, { "epoch": 13.405027932960893, "grad_norm": 0.9974201321601868, "learning_rate": 0.0003310364145658263, "loss": 0.3523, "step": 23995 }, { "epoch": 13.405586592178771, "grad_norm": 0.4552745819091797, "learning_rate": 0.00033100840336134453, "loss": 0.4373, "step": 23996 }, { "epoch": 13.406145251396648, "grad_norm": 0.3898215889930725, "learning_rate": 0.0003309803921568628, "loss": 0.4002, "step": 23997 }, { "epoch": 13.406703910614524, "grad_norm": 0.8718129396438599, "learning_rate": 0.00033095238095238094, "loss": 0.6538, "step": 23998 }, { "epoch": 13.407262569832403, "grad_norm": 0.4390983283519745, "learning_rate": 0.0003309243697478992, "loss": 0.3123, "step": 23999 }, { "epoch": 13.40782122905028, "grad_norm": 0.5923282504081726, "learning_rate": 0.00033089635854341735, "loss": 0.5346, "step": 24000 }, { "epoch": 13.40782122905028, "eval_cer": 0.08641833873413854, "eval_loss": 0.33153823018074036, "eval_runtime": 55.6683, "eval_samples_per_second": 81.519, "eval_steps_per_second": 5.102, "eval_wer": 0.3398229099745817, "step": 24000 }, { "epoch": 13.408379888268156, "grad_norm": 0.8365556001663208, "learning_rate": 0.00033086834733893556, "loss": 0.4261, "step": 24001 }, { "epoch": 13.408938547486034, "grad_norm": 0.551045835018158, "learning_rate": 0.0003308403361344538, "loss": 0.3676, "step": 24002 }, { "epoch": 13.40949720670391, "grad_norm": 0.628197431564331, "learning_rate": 0.00033081232492997197, "loss": 0.4124, "step": 24003 }, { "epoch": 13.410055865921787, "grad_norm": 0.5075826644897461, "learning_rate": 0.00033078431372549023, "loss": 0.4317, "step": 24004 }, { "epoch": 13.410614525139664, "grad_norm": 5.493040561676025, "learning_rate": 0.00033075630252100844, "loss": 0.6192, "step": 24005 }, { "epoch": 13.411173184357542, "grad_norm": 0.4345131516456604, "learning_rate": 0.0003307282913165266, "loss": 0.3825, "step": 24006 }, { "epoch": 13.411731843575419, "grad_norm": 0.37854650616645813, "learning_rate": 0.00033070028011204485, "loss": 0.3553, "step": 24007 }, { "epoch": 13.412290502793295, "grad_norm": 0.629016101360321, "learning_rate": 0.000330672268907563, "loss": 0.3608, "step": 24008 }, { "epoch": 13.412849162011174, "grad_norm": 0.48460161685943604, "learning_rate": 0.00033064425770308126, "loss": 0.4466, "step": 24009 }, { "epoch": 13.41340782122905, "grad_norm": 0.5453964471817017, "learning_rate": 0.00033061624649859947, "loss": 0.3567, "step": 24010 }, { "epoch": 13.413966480446927, "grad_norm": 0.5664440393447876, "learning_rate": 0.0003305882352941176, "loss": 0.4554, "step": 24011 }, { "epoch": 13.414525139664805, "grad_norm": 0.5190845131874084, "learning_rate": 0.0003305602240896359, "loss": 0.4445, "step": 24012 }, { "epoch": 13.415083798882682, "grad_norm": 0.49040234088897705, "learning_rate": 0.0003305322128851541, "loss": 0.3752, "step": 24013 }, { "epoch": 13.415642458100558, "grad_norm": 0.5478666424751282, "learning_rate": 0.0003305042016806723, "loss": 0.476, "step": 24014 }, { "epoch": 13.416201117318435, "grad_norm": 2.912370204925537, "learning_rate": 0.0003304761904761905, "loss": 0.5006, "step": 24015 }, { "epoch": 13.416759776536313, "grad_norm": 0.6135877370834351, "learning_rate": 0.00033044817927170865, "loss": 0.5034, "step": 24016 }, { "epoch": 13.41731843575419, "grad_norm": 0.8700762391090393, "learning_rate": 0.0003304201680672269, "loss": 0.6384, "step": 24017 }, { "epoch": 13.417877094972066, "grad_norm": 0.47633153200149536, "learning_rate": 0.0003303921568627451, "loss": 0.3772, "step": 24018 }, { "epoch": 13.418435754189945, "grad_norm": 0.4083118438720703, "learning_rate": 0.0003303641456582633, "loss": 0.3795, "step": 24019 }, { "epoch": 13.418994413407821, "grad_norm": 0.45499464869499207, "learning_rate": 0.0003303361344537815, "loss": 0.4086, "step": 24020 }, { "epoch": 13.419553072625698, "grad_norm": 0.46390077471733093, "learning_rate": 0.00033030812324929973, "loss": 0.445, "step": 24021 }, { "epoch": 13.420111731843576, "grad_norm": 0.5300405025482178, "learning_rate": 0.00033028011204481794, "loss": 0.3861, "step": 24022 }, { "epoch": 13.420670391061453, "grad_norm": 0.5467058420181274, "learning_rate": 0.00033025210084033614, "loss": 0.5471, "step": 24023 }, { "epoch": 13.42122905027933, "grad_norm": 0.5927534699440002, "learning_rate": 0.00033022408963585435, "loss": 0.3758, "step": 24024 }, { "epoch": 13.421787709497206, "grad_norm": 0.7953538298606873, "learning_rate": 0.00033019607843137256, "loss": 0.5591, "step": 24025 }, { "epoch": 13.422346368715084, "grad_norm": 0.5789852738380432, "learning_rate": 0.00033016806722689076, "loss": 0.5116, "step": 24026 }, { "epoch": 13.422905027932961, "grad_norm": 0.37751510739326477, "learning_rate": 0.00033014005602240897, "loss": 0.3924, "step": 24027 }, { "epoch": 13.423463687150837, "grad_norm": 0.3508763909339905, "learning_rate": 0.0003301120448179272, "loss": 0.4381, "step": 24028 }, { "epoch": 13.424022346368716, "grad_norm": 0.458859920501709, "learning_rate": 0.00033008403361344543, "loss": 0.4542, "step": 24029 }, { "epoch": 13.424581005586592, "grad_norm": 0.45266950130462646, "learning_rate": 0.0003300560224089636, "loss": 0.3955, "step": 24030 }, { "epoch": 13.425139664804469, "grad_norm": 0.5199288129806519, "learning_rate": 0.0003300280112044818, "loss": 0.319, "step": 24031 }, { "epoch": 13.425698324022346, "grad_norm": 9.08499813079834, "learning_rate": 0.00033, "loss": 0.4953, "step": 24032 }, { "epoch": 13.426256983240224, "grad_norm": 0.37150460481643677, "learning_rate": 0.0003299719887955182, "loss": 0.3951, "step": 24033 }, { "epoch": 13.4268156424581, "grad_norm": 0.7141701579093933, "learning_rate": 0.00032994397759103646, "loss": 0.4892, "step": 24034 }, { "epoch": 13.427374301675977, "grad_norm": 0.6042700409889221, "learning_rate": 0.0003299159663865546, "loss": 0.4408, "step": 24035 }, { "epoch": 13.427932960893855, "grad_norm": 0.5827171206474304, "learning_rate": 0.0003298879551820728, "loss": 0.3516, "step": 24036 }, { "epoch": 13.428491620111732, "grad_norm": 0.45501765608787537, "learning_rate": 0.0003298599439775911, "loss": 0.5182, "step": 24037 }, { "epoch": 13.429050279329608, "grad_norm": 0.6984469890594482, "learning_rate": 0.00032983193277310923, "loss": 0.5069, "step": 24038 }, { "epoch": 13.429608938547487, "grad_norm": 1.102880835533142, "learning_rate": 0.0003298039215686275, "loss": 0.357, "step": 24039 }, { "epoch": 13.430167597765363, "grad_norm": 0.38930967450141907, "learning_rate": 0.00032977591036414565, "loss": 0.3601, "step": 24040 }, { "epoch": 13.43072625698324, "grad_norm": 0.8170896172523499, "learning_rate": 0.00032974789915966385, "loss": 0.4151, "step": 24041 }, { "epoch": 13.431284916201117, "grad_norm": 0.34871599078178406, "learning_rate": 0.0003297198879551821, "loss": 0.4465, "step": 24042 }, { "epoch": 13.431843575418995, "grad_norm": 0.4154914915561676, "learning_rate": 0.00032969187675070026, "loss": 0.3849, "step": 24043 }, { "epoch": 13.432402234636871, "grad_norm": 1.2494512796401978, "learning_rate": 0.00032966386554621847, "loss": 0.4804, "step": 24044 }, { "epoch": 13.432960893854748, "grad_norm": 2.529845952987671, "learning_rate": 0.00032963585434173673, "loss": 0.4597, "step": 24045 }, { "epoch": 13.433519553072626, "grad_norm": 0.6845638155937195, "learning_rate": 0.0003296078431372549, "loss": 0.4631, "step": 24046 }, { "epoch": 13.434078212290503, "grad_norm": 0.7565985321998596, "learning_rate": 0.00032957983193277314, "loss": 0.3217, "step": 24047 }, { "epoch": 13.43463687150838, "grad_norm": 0.3648121953010559, "learning_rate": 0.0003295518207282913, "loss": 0.4119, "step": 24048 }, { "epoch": 13.435195530726258, "grad_norm": 0.5148236155509949, "learning_rate": 0.0003295238095238095, "loss": 0.4098, "step": 24049 }, { "epoch": 13.435754189944134, "grad_norm": 6.18539571762085, "learning_rate": 0.00032949579831932776, "loss": 0.473, "step": 24050 }, { "epoch": 13.436312849162011, "grad_norm": 0.45961225032806396, "learning_rate": 0.0003294677871148459, "loss": 0.4264, "step": 24051 }, { "epoch": 13.436871508379888, "grad_norm": 0.5115296244621277, "learning_rate": 0.00032943977591036417, "loss": 0.3598, "step": 24052 }, { "epoch": 13.437430167597766, "grad_norm": 0.45676034688949585, "learning_rate": 0.0003294117647058824, "loss": 0.4193, "step": 24053 }, { "epoch": 13.437988826815642, "grad_norm": 0.6057080030441284, "learning_rate": 0.00032938375350140053, "loss": 0.4042, "step": 24054 }, { "epoch": 13.438547486033519, "grad_norm": 0.5430832505226135, "learning_rate": 0.0003293557422969188, "loss": 0.3292, "step": 24055 }, { "epoch": 13.439106145251397, "grad_norm": 0.4006073772907257, "learning_rate": 0.00032932773109243694, "loss": 0.3254, "step": 24056 }, { "epoch": 13.439664804469274, "grad_norm": 3.4763219356536865, "learning_rate": 0.0003292997198879552, "loss": 0.4068, "step": 24057 }, { "epoch": 13.44022346368715, "grad_norm": 0.4126766622066498, "learning_rate": 0.0003292717086834734, "loss": 0.3275, "step": 24058 }, { "epoch": 13.440782122905027, "grad_norm": 0.3994097411632538, "learning_rate": 0.00032924369747899156, "loss": 0.3569, "step": 24059 }, { "epoch": 13.441340782122905, "grad_norm": 0.49512559175491333, "learning_rate": 0.0003292156862745098, "loss": 0.3286, "step": 24060 }, { "epoch": 13.441899441340782, "grad_norm": 0.6281354427337646, "learning_rate": 0.000329187675070028, "loss": 0.4282, "step": 24061 }, { "epoch": 13.442458100558659, "grad_norm": 0.8131765723228455, "learning_rate": 0.00032915966386554623, "loss": 0.4747, "step": 24062 }, { "epoch": 13.443016759776537, "grad_norm": 0.6101727485656738, "learning_rate": 0.00032913165266106444, "loss": 0.4062, "step": 24063 }, { "epoch": 13.443575418994413, "grad_norm": 0.4951469600200653, "learning_rate": 0.0003291036414565826, "loss": 0.4507, "step": 24064 }, { "epoch": 13.44413407821229, "grad_norm": 0.5197064876556396, "learning_rate": 0.00032907563025210085, "loss": 0.4306, "step": 24065 }, { "epoch": 13.444692737430168, "grad_norm": 3.8119289875030518, "learning_rate": 0.00032904761904761906, "loss": 0.3867, "step": 24066 }, { "epoch": 13.445251396648045, "grad_norm": 0.5981159806251526, "learning_rate": 0.00032901960784313726, "loss": 0.6226, "step": 24067 }, { "epoch": 13.445810055865921, "grad_norm": 0.4323895275592804, "learning_rate": 0.00032899159663865547, "loss": 0.3518, "step": 24068 }, { "epoch": 13.446368715083798, "grad_norm": 0.5299617052078247, "learning_rate": 0.0003289635854341737, "loss": 0.6003, "step": 24069 }, { "epoch": 13.446927374301676, "grad_norm": 0.5881797671318054, "learning_rate": 0.0003289355742296919, "loss": 0.2923, "step": 24070 }, { "epoch": 13.447486033519553, "grad_norm": 0.5459182262420654, "learning_rate": 0.0003289075630252101, "loss": 0.3705, "step": 24071 }, { "epoch": 13.44804469273743, "grad_norm": 0.6495555639266968, "learning_rate": 0.0003288795518207283, "loss": 0.3372, "step": 24072 }, { "epoch": 13.448603351955308, "grad_norm": 0.5846161842346191, "learning_rate": 0.0003288515406162465, "loss": 0.3325, "step": 24073 }, { "epoch": 13.449162011173184, "grad_norm": 0.44714757800102234, "learning_rate": 0.0003288235294117647, "loss": 0.4131, "step": 24074 }, { "epoch": 13.449720670391061, "grad_norm": 0.5594474077224731, "learning_rate": 0.0003287955182072829, "loss": 0.6322, "step": 24075 }, { "epoch": 13.45027932960894, "grad_norm": 0.4771212637424469, "learning_rate": 0.0003287675070028011, "loss": 0.4221, "step": 24076 }, { "epoch": 13.450837988826816, "grad_norm": 0.461176335811615, "learning_rate": 0.0003287394957983194, "loss": 0.3213, "step": 24077 }, { "epoch": 13.451396648044692, "grad_norm": 1.8055278062820435, "learning_rate": 0.00032871148459383753, "loss": 0.3278, "step": 24078 }, { "epoch": 13.451955307262569, "grad_norm": 1.654625415802002, "learning_rate": 0.00032868347338935573, "loss": 0.4666, "step": 24079 }, { "epoch": 13.452513966480447, "grad_norm": 0.6786640882492065, "learning_rate": 0.00032865546218487394, "loss": 0.4018, "step": 24080 }, { "epoch": 13.453072625698324, "grad_norm": 0.7715920805931091, "learning_rate": 0.00032862745098039215, "loss": 0.5213, "step": 24081 }, { "epoch": 13.4536312849162, "grad_norm": 0.5421979427337646, "learning_rate": 0.0003285994397759104, "loss": 0.4043, "step": 24082 }, { "epoch": 13.454189944134079, "grad_norm": 0.7266767024993896, "learning_rate": 0.00032857142857142856, "loss": 0.3895, "step": 24083 }, { "epoch": 13.454748603351955, "grad_norm": 9.650465965270996, "learning_rate": 0.00032854341736694676, "loss": 0.378, "step": 24084 }, { "epoch": 13.455307262569832, "grad_norm": 0.44124212861061096, "learning_rate": 0.000328515406162465, "loss": 0.3872, "step": 24085 }, { "epoch": 13.45586592178771, "grad_norm": 0.9369888305664062, "learning_rate": 0.0003284873949579832, "loss": 0.4032, "step": 24086 }, { "epoch": 13.456424581005587, "grad_norm": 0.4573241174221039, "learning_rate": 0.00032845938375350144, "loss": 0.4073, "step": 24087 }, { "epoch": 13.456983240223463, "grad_norm": 0.35886985063552856, "learning_rate": 0.0003284313725490196, "loss": 0.3779, "step": 24088 }, { "epoch": 13.45754189944134, "grad_norm": 0.7889412641525269, "learning_rate": 0.0003284033613445378, "loss": 0.756, "step": 24089 }, { "epoch": 13.458100558659218, "grad_norm": 0.7551457285881042, "learning_rate": 0.00032837535014005605, "loss": 0.3321, "step": 24090 }, { "epoch": 13.458659217877095, "grad_norm": 0.4197429120540619, "learning_rate": 0.0003283473389355742, "loss": 0.4102, "step": 24091 }, { "epoch": 13.459217877094972, "grad_norm": 0.7796857953071594, "learning_rate": 0.00032831932773109247, "loss": 0.5002, "step": 24092 }, { "epoch": 13.45977653631285, "grad_norm": 2.507544755935669, "learning_rate": 0.00032829131652661067, "loss": 0.4085, "step": 24093 }, { "epoch": 13.460335195530726, "grad_norm": 2.926431894302368, "learning_rate": 0.0003282633053221288, "loss": 0.4222, "step": 24094 }, { "epoch": 13.460893854748603, "grad_norm": 0.5870780348777771, "learning_rate": 0.0003282352941176471, "loss": 0.4732, "step": 24095 }, { "epoch": 13.461452513966481, "grad_norm": 0.43072089552879333, "learning_rate": 0.00032820728291316524, "loss": 0.3823, "step": 24096 }, { "epoch": 13.462011173184358, "grad_norm": 0.4563128650188446, "learning_rate": 0.0003281792717086835, "loss": 0.4411, "step": 24097 }, { "epoch": 13.462569832402234, "grad_norm": 1.0716365575790405, "learning_rate": 0.0003281512605042017, "loss": 0.4848, "step": 24098 }, { "epoch": 13.463128491620111, "grad_norm": 1.2820457220077515, "learning_rate": 0.00032812324929971985, "loss": 0.4763, "step": 24099 }, { "epoch": 13.46368715083799, "grad_norm": 0.3371823728084564, "learning_rate": 0.0003280952380952381, "loss": 0.3929, "step": 24100 }, { "epoch": 13.464245810055866, "grad_norm": 0.6280838847160339, "learning_rate": 0.0003280672268907563, "loss": 0.3704, "step": 24101 }, { "epoch": 13.464804469273743, "grad_norm": 2.7057723999023438, "learning_rate": 0.0003280392156862745, "loss": 0.3824, "step": 24102 }, { "epoch": 13.46536312849162, "grad_norm": 0.832786500453949, "learning_rate": 0.00032801120448179273, "loss": 0.5244, "step": 24103 }, { "epoch": 13.465921787709497, "grad_norm": 0.4125325083732605, "learning_rate": 0.0003279831932773109, "loss": 0.3519, "step": 24104 }, { "epoch": 13.466480446927374, "grad_norm": 0.5188695192337036, "learning_rate": 0.00032795518207282914, "loss": 0.5037, "step": 24105 }, { "epoch": 13.46703910614525, "grad_norm": 0.517441987991333, "learning_rate": 0.00032792717086834735, "loss": 0.453, "step": 24106 }, { "epoch": 13.467597765363129, "grad_norm": 0.890071451663971, "learning_rate": 0.00032789915966386556, "loss": 0.4698, "step": 24107 }, { "epoch": 13.468156424581005, "grad_norm": 0.4595129191875458, "learning_rate": 0.00032787114845938376, "loss": 0.4612, "step": 24108 }, { "epoch": 13.468715083798882, "grad_norm": 0.47704362869262695, "learning_rate": 0.00032784313725490197, "loss": 0.4529, "step": 24109 }, { "epoch": 13.46927374301676, "grad_norm": 0.5455402135848999, "learning_rate": 0.0003278151260504202, "loss": 0.4508, "step": 24110 }, { "epoch": 13.469832402234637, "grad_norm": 0.5782555937767029, "learning_rate": 0.0003277871148459384, "loss": 0.4033, "step": 24111 }, { "epoch": 13.470391061452514, "grad_norm": 0.4718027412891388, "learning_rate": 0.0003277591036414566, "loss": 0.4518, "step": 24112 }, { "epoch": 13.470949720670392, "grad_norm": 0.5594518780708313, "learning_rate": 0.0003277310924369748, "loss": 0.3742, "step": 24113 }, { "epoch": 13.471508379888268, "grad_norm": 0.4130031168460846, "learning_rate": 0.000327703081232493, "loss": 0.402, "step": 24114 }, { "epoch": 13.472067039106145, "grad_norm": 0.3438200354576111, "learning_rate": 0.0003276750700280112, "loss": 0.3988, "step": 24115 }, { "epoch": 13.472625698324022, "grad_norm": 0.37586405873298645, "learning_rate": 0.0003276470588235294, "loss": 0.3869, "step": 24116 }, { "epoch": 13.4731843575419, "grad_norm": 0.5238877534866333, "learning_rate": 0.00032761904761904767, "loss": 0.572, "step": 24117 }, { "epoch": 13.473743016759776, "grad_norm": 0.5923094153404236, "learning_rate": 0.0003275910364145658, "loss": 0.3986, "step": 24118 }, { "epoch": 13.474301675977653, "grad_norm": 2.852379322052002, "learning_rate": 0.00032756302521008403, "loss": 0.5165, "step": 24119 }, { "epoch": 13.474860335195531, "grad_norm": 0.4096725583076477, "learning_rate": 0.00032753501400560223, "loss": 0.4266, "step": 24120 }, { "epoch": 13.475418994413408, "grad_norm": 0.5331655144691467, "learning_rate": 0.00032750700280112044, "loss": 0.4184, "step": 24121 }, { "epoch": 13.475977653631285, "grad_norm": 2.1263222694396973, "learning_rate": 0.0003274789915966387, "loss": 0.3689, "step": 24122 }, { "epoch": 13.476536312849163, "grad_norm": 0.6322385668754578, "learning_rate": 0.00032745098039215685, "loss": 0.4613, "step": 24123 }, { "epoch": 13.47709497206704, "grad_norm": 0.4124833941459656, "learning_rate": 0.00032742296918767506, "loss": 0.359, "step": 24124 }, { "epoch": 13.477653631284916, "grad_norm": 0.5539673566818237, "learning_rate": 0.0003273949579831933, "loss": 0.5617, "step": 24125 }, { "epoch": 13.478212290502793, "grad_norm": 2.7253148555755615, "learning_rate": 0.00032736694677871147, "loss": 0.2838, "step": 24126 }, { "epoch": 13.478770949720671, "grad_norm": 0.44082266092300415, "learning_rate": 0.00032733893557422973, "loss": 0.4267, "step": 24127 }, { "epoch": 13.479329608938547, "grad_norm": 0.3895180821418762, "learning_rate": 0.0003273109243697479, "loss": 0.3356, "step": 24128 }, { "epoch": 13.479888268156424, "grad_norm": 0.4109691083431244, "learning_rate": 0.0003272829131652661, "loss": 0.4625, "step": 24129 }, { "epoch": 13.480446927374302, "grad_norm": 0.46554839611053467, "learning_rate": 0.00032725490196078435, "loss": 0.4282, "step": 24130 }, { "epoch": 13.481005586592179, "grad_norm": 0.3763386607170105, "learning_rate": 0.0003272268907563025, "loss": 0.4413, "step": 24131 }, { "epoch": 13.481564245810056, "grad_norm": 0.4315921664237976, "learning_rate": 0.00032719887955182076, "loss": 0.4097, "step": 24132 }, { "epoch": 13.482122905027932, "grad_norm": 0.4852002263069153, "learning_rate": 0.00032717086834733897, "loss": 0.4, "step": 24133 }, { "epoch": 13.48268156424581, "grad_norm": 0.5241253972053528, "learning_rate": 0.0003271428571428571, "loss": 0.3607, "step": 24134 }, { "epoch": 13.483240223463687, "grad_norm": 0.4029459059238434, "learning_rate": 0.0003271148459383754, "loss": 0.4342, "step": 24135 }, { "epoch": 13.483798882681564, "grad_norm": 0.5683883428573608, "learning_rate": 0.00032708683473389353, "loss": 0.3894, "step": 24136 }, { "epoch": 13.484357541899442, "grad_norm": 0.4287756085395813, "learning_rate": 0.0003270588235294118, "loss": 0.3678, "step": 24137 }, { "epoch": 13.484916201117318, "grad_norm": 0.9509091377258301, "learning_rate": 0.00032703081232493, "loss": 0.4939, "step": 24138 }, { "epoch": 13.485474860335195, "grad_norm": 0.5072173476219177, "learning_rate": 0.00032700280112044815, "loss": 0.3109, "step": 24139 }, { "epoch": 13.486033519553073, "grad_norm": 0.36319008469581604, "learning_rate": 0.0003269747899159664, "loss": 0.2862, "step": 24140 }, { "epoch": 13.48659217877095, "grad_norm": 0.6130936741828918, "learning_rate": 0.0003269467787114846, "loss": 0.4215, "step": 24141 }, { "epoch": 13.487150837988827, "grad_norm": 0.380995512008667, "learning_rate": 0.0003269187675070028, "loss": 0.4091, "step": 24142 }, { "epoch": 13.487709497206703, "grad_norm": 0.42395463585853577, "learning_rate": 0.000326890756302521, "loss": 0.5316, "step": 24143 }, { "epoch": 13.488268156424581, "grad_norm": 0.40420690178871155, "learning_rate": 0.0003268627450980392, "loss": 0.3688, "step": 24144 }, { "epoch": 13.488826815642458, "grad_norm": 0.5257751941680908, "learning_rate": 0.00032683473389355744, "loss": 0.3836, "step": 24145 }, { "epoch": 13.489385474860335, "grad_norm": 0.8360605835914612, "learning_rate": 0.00032680672268907564, "loss": 0.3395, "step": 24146 }, { "epoch": 13.489944134078213, "grad_norm": 0.627238929271698, "learning_rate": 0.00032677871148459385, "loss": 0.2734, "step": 24147 }, { "epoch": 13.49050279329609, "grad_norm": 0.4085390269756317, "learning_rate": 0.00032675070028011206, "loss": 0.4635, "step": 24148 }, { "epoch": 13.491061452513966, "grad_norm": 0.46530553698539734, "learning_rate": 0.00032672268907563026, "loss": 0.4391, "step": 24149 }, { "epoch": 13.491620111731844, "grad_norm": 0.3951616883277893, "learning_rate": 0.00032669467787114847, "loss": 0.4111, "step": 24150 }, { "epoch": 13.492178770949721, "grad_norm": 0.5235318541526794, "learning_rate": 0.0003266666666666667, "loss": 0.4743, "step": 24151 }, { "epoch": 13.492737430167598, "grad_norm": 0.4140463173389435, "learning_rate": 0.00032663865546218493, "loss": 0.3888, "step": 24152 }, { "epoch": 13.493296089385474, "grad_norm": 1.059981346130371, "learning_rate": 0.0003266106442577031, "loss": 0.4244, "step": 24153 }, { "epoch": 13.493854748603352, "grad_norm": 0.4322214424610138, "learning_rate": 0.0003265826330532213, "loss": 0.3555, "step": 24154 }, { "epoch": 13.494413407821229, "grad_norm": 0.4388740658760071, "learning_rate": 0.0003265546218487395, "loss": 0.4535, "step": 24155 }, { "epoch": 13.494972067039106, "grad_norm": 0.39848706126213074, "learning_rate": 0.0003265266106442577, "loss": 0.3855, "step": 24156 }, { "epoch": 13.495530726256984, "grad_norm": 0.5250911116600037, "learning_rate": 0.0003264985994397759, "loss": 0.3727, "step": 24157 }, { "epoch": 13.49608938547486, "grad_norm": 0.837040901184082, "learning_rate": 0.0003264705882352941, "loss": 0.435, "step": 24158 }, { "epoch": 13.496648044692737, "grad_norm": 0.575750470161438, "learning_rate": 0.0003264425770308123, "loss": 0.5221, "step": 24159 }, { "epoch": 13.497206703910614, "grad_norm": 1.2366598844528198, "learning_rate": 0.0003264145658263306, "loss": 0.403, "step": 24160 }, { "epoch": 13.497765363128492, "grad_norm": 0.4141302704811096, "learning_rate": 0.00032638655462184873, "loss": 0.5009, "step": 24161 }, { "epoch": 13.498324022346369, "grad_norm": 0.6292719841003418, "learning_rate": 0.00032635854341736694, "loss": 0.4022, "step": 24162 }, { "epoch": 13.498882681564245, "grad_norm": 0.4115258455276489, "learning_rate": 0.00032633053221288515, "loss": 0.4545, "step": 24163 }, { "epoch": 13.499441340782123, "grad_norm": 0.43598470091819763, "learning_rate": 0.00032630252100840335, "loss": 0.3849, "step": 24164 }, { "epoch": 13.5, "grad_norm": 0.4456072449684143, "learning_rate": 0.0003262745098039216, "loss": 0.3516, "step": 24165 }, { "epoch": 13.500558659217877, "grad_norm": 0.3481283485889435, "learning_rate": 0.00032624649859943976, "loss": 0.3444, "step": 24166 }, { "epoch": 13.501117318435755, "grad_norm": 0.45081251859664917, "learning_rate": 0.00032621848739495797, "loss": 0.3664, "step": 24167 }, { "epoch": 13.501675977653631, "grad_norm": 0.39018774032592773, "learning_rate": 0.00032619047619047623, "loss": 0.5551, "step": 24168 }, { "epoch": 13.502234636871508, "grad_norm": 0.5667728185653687, "learning_rate": 0.0003261624649859944, "loss": 0.425, "step": 24169 }, { "epoch": 13.502793296089386, "grad_norm": 0.3822039067745209, "learning_rate": 0.00032613445378151264, "loss": 0.3998, "step": 24170 }, { "epoch": 13.503351955307263, "grad_norm": 0.3626020550727844, "learning_rate": 0.0003261064425770308, "loss": 0.4132, "step": 24171 }, { "epoch": 13.50391061452514, "grad_norm": 0.5741115212440491, "learning_rate": 0.000326078431372549, "loss": 0.4491, "step": 24172 }, { "epoch": 13.504469273743016, "grad_norm": 10.957284927368164, "learning_rate": 0.00032605042016806726, "loss": 0.4135, "step": 24173 }, { "epoch": 13.505027932960894, "grad_norm": 0.3592619299888611, "learning_rate": 0.0003260224089635854, "loss": 0.3967, "step": 24174 }, { "epoch": 13.505586592178771, "grad_norm": 0.5009909868240356, "learning_rate": 0.00032599439775910367, "loss": 0.4171, "step": 24175 }, { "epoch": 13.506145251396648, "grad_norm": 0.7640971541404724, "learning_rate": 0.0003259663865546219, "loss": 0.4059, "step": 24176 }, { "epoch": 13.506703910614526, "grad_norm": 0.6335141062736511, "learning_rate": 0.00032593837535014003, "loss": 0.401, "step": 24177 }, { "epoch": 13.507262569832402, "grad_norm": 0.4155081510543823, "learning_rate": 0.0003259103641456583, "loss": 0.39, "step": 24178 }, { "epoch": 13.507821229050279, "grad_norm": 0.5095494985580444, "learning_rate": 0.00032588235294117644, "loss": 0.3249, "step": 24179 }, { "epoch": 13.508379888268156, "grad_norm": 0.565673291683197, "learning_rate": 0.0003258543417366947, "loss": 0.5632, "step": 24180 }, { "epoch": 13.508938547486034, "grad_norm": 0.6595878005027771, "learning_rate": 0.0003258263305322129, "loss": 0.4268, "step": 24181 }, { "epoch": 13.50949720670391, "grad_norm": 0.6628794074058533, "learning_rate": 0.00032579831932773106, "loss": 0.5202, "step": 24182 }, { "epoch": 13.510055865921787, "grad_norm": 0.37234166264533997, "learning_rate": 0.0003257703081232493, "loss": 0.4072, "step": 24183 }, { "epoch": 13.510614525139665, "grad_norm": 0.42800527811050415, "learning_rate": 0.0003257422969187675, "loss": 0.3678, "step": 24184 }, { "epoch": 13.511173184357542, "grad_norm": 0.603776752948761, "learning_rate": 0.00032571428571428573, "loss": 0.5497, "step": 24185 }, { "epoch": 13.511731843575419, "grad_norm": 1.179127812385559, "learning_rate": 0.00032568627450980394, "loss": 0.4688, "step": 24186 }, { "epoch": 13.512290502793297, "grad_norm": 8.016297340393066, "learning_rate": 0.0003256582633053221, "loss": 0.4433, "step": 24187 }, { "epoch": 13.512849162011173, "grad_norm": 0.9016374945640564, "learning_rate": 0.00032563025210084035, "loss": 0.5232, "step": 24188 }, { "epoch": 13.51340782122905, "grad_norm": 0.45515188574790955, "learning_rate": 0.00032560224089635856, "loss": 0.419, "step": 24189 }, { "epoch": 13.513966480446927, "grad_norm": 0.5851801633834839, "learning_rate": 0.00032557422969187676, "loss": 0.4626, "step": 24190 }, { "epoch": 13.514525139664805, "grad_norm": 0.6761608123779297, "learning_rate": 0.00032554621848739497, "loss": 0.4628, "step": 24191 }, { "epoch": 13.515083798882682, "grad_norm": 0.4658829867839813, "learning_rate": 0.0003255182072829132, "loss": 0.3779, "step": 24192 }, { "epoch": 13.515642458100558, "grad_norm": 0.3431715667247772, "learning_rate": 0.0003254901960784314, "loss": 0.3791, "step": 24193 }, { "epoch": 13.516201117318436, "grad_norm": 0.32139307260513306, "learning_rate": 0.0003254621848739496, "loss": 0.3361, "step": 24194 }, { "epoch": 13.516759776536313, "grad_norm": 0.49882182478904724, "learning_rate": 0.0003254341736694678, "loss": 0.4824, "step": 24195 }, { "epoch": 13.51731843575419, "grad_norm": 0.391493022441864, "learning_rate": 0.000325406162464986, "loss": 0.3547, "step": 24196 }, { "epoch": 13.517877094972068, "grad_norm": 0.7438197731971741, "learning_rate": 0.0003253781512605042, "loss": 0.5398, "step": 24197 }, { "epoch": 13.518435754189944, "grad_norm": 0.5835862755775452, "learning_rate": 0.0003253501400560224, "loss": 0.5447, "step": 24198 }, { "epoch": 13.518994413407821, "grad_norm": 0.5489450693130493, "learning_rate": 0.0003253221288515406, "loss": 0.4434, "step": 24199 }, { "epoch": 13.519553072625698, "grad_norm": 1.1164907217025757, "learning_rate": 0.0003252941176470589, "loss": 0.5135, "step": 24200 }, { "epoch": 13.520111731843576, "grad_norm": 0.532439112663269, "learning_rate": 0.00032526610644257703, "loss": 0.5785, "step": 24201 }, { "epoch": 13.520670391061453, "grad_norm": 0.3819122910499573, "learning_rate": 0.00032523809523809523, "loss": 0.3747, "step": 24202 }, { "epoch": 13.521229050279329, "grad_norm": 0.49957165122032166, "learning_rate": 0.00032521008403361344, "loss": 0.3758, "step": 24203 }, { "epoch": 13.521787709497207, "grad_norm": 0.44187700748443604, "learning_rate": 0.00032518207282913165, "loss": 0.4464, "step": 24204 }, { "epoch": 13.522346368715084, "grad_norm": 0.37818682193756104, "learning_rate": 0.0003251540616246499, "loss": 0.3839, "step": 24205 }, { "epoch": 13.52290502793296, "grad_norm": 0.4456169903278351, "learning_rate": 0.00032512605042016806, "loss": 0.4275, "step": 24206 }, { "epoch": 13.523463687150837, "grad_norm": 0.9655728936195374, "learning_rate": 0.00032509803921568626, "loss": 0.4851, "step": 24207 }, { "epoch": 13.524022346368715, "grad_norm": 0.6421684622764587, "learning_rate": 0.0003250700280112045, "loss": 0.4451, "step": 24208 }, { "epoch": 13.524581005586592, "grad_norm": 0.3974587917327881, "learning_rate": 0.0003250420168067227, "loss": 0.4256, "step": 24209 }, { "epoch": 13.525139664804469, "grad_norm": 0.4115317165851593, "learning_rate": 0.00032501400560224094, "loss": 0.38, "step": 24210 }, { "epoch": 13.525698324022347, "grad_norm": 0.4842468500137329, "learning_rate": 0.0003249859943977591, "loss": 0.5274, "step": 24211 }, { "epoch": 13.526256983240224, "grad_norm": 0.3966115117073059, "learning_rate": 0.0003249579831932773, "loss": 0.3963, "step": 24212 }, { "epoch": 13.5268156424581, "grad_norm": 2.8386292457580566, "learning_rate": 0.00032492997198879555, "loss": 0.3592, "step": 24213 }, { "epoch": 13.527374301675978, "grad_norm": 0.8059162497520447, "learning_rate": 0.0003249019607843137, "loss": 0.5126, "step": 24214 }, { "epoch": 13.527932960893855, "grad_norm": 0.6205608248710632, "learning_rate": 0.00032487394957983197, "loss": 0.4321, "step": 24215 }, { "epoch": 13.528491620111732, "grad_norm": 0.4475083649158478, "learning_rate": 0.00032484593837535017, "loss": 0.3884, "step": 24216 }, { "epoch": 13.529050279329608, "grad_norm": 0.5723434686660767, "learning_rate": 0.0003248179271708683, "loss": 0.4876, "step": 24217 }, { "epoch": 13.529608938547486, "grad_norm": 1.1262656450271606, "learning_rate": 0.0003247899159663866, "loss": 0.4277, "step": 24218 }, { "epoch": 13.530167597765363, "grad_norm": 0.6936318278312683, "learning_rate": 0.00032476190476190474, "loss": 0.4547, "step": 24219 }, { "epoch": 13.53072625698324, "grad_norm": 0.5107802152633667, "learning_rate": 0.000324733893557423, "loss": 0.4598, "step": 24220 }, { "epoch": 13.531284916201118, "grad_norm": 1.8162590265274048, "learning_rate": 0.0003247058823529412, "loss": 0.4705, "step": 24221 }, { "epoch": 13.531843575418995, "grad_norm": 0.7601991295814514, "learning_rate": 0.00032467787114845935, "loss": 0.4054, "step": 24222 }, { "epoch": 13.532402234636871, "grad_norm": 0.42457419633865356, "learning_rate": 0.0003246498599439776, "loss": 0.4487, "step": 24223 }, { "epoch": 13.53296089385475, "grad_norm": 0.49485647678375244, "learning_rate": 0.0003246218487394958, "loss": 0.4042, "step": 24224 }, { "epoch": 13.533519553072626, "grad_norm": 0.45416122674942017, "learning_rate": 0.000324593837535014, "loss": 0.4325, "step": 24225 }, { "epoch": 13.534078212290503, "grad_norm": 0.43602144718170166, "learning_rate": 0.00032456582633053223, "loss": 0.3925, "step": 24226 }, { "epoch": 13.53463687150838, "grad_norm": 0.6646022796630859, "learning_rate": 0.0003245378151260504, "loss": 0.4351, "step": 24227 }, { "epoch": 13.535195530726257, "grad_norm": 0.6392934918403625, "learning_rate": 0.00032450980392156864, "loss": 0.339, "step": 24228 }, { "epoch": 13.535754189944134, "grad_norm": 0.538756787776947, "learning_rate": 0.00032448179271708685, "loss": 0.4694, "step": 24229 }, { "epoch": 13.53631284916201, "grad_norm": 0.35067659616470337, "learning_rate": 0.00032445378151260506, "loss": 0.2582, "step": 24230 }, { "epoch": 13.536871508379889, "grad_norm": 0.4100213944911957, "learning_rate": 0.00032442577030812326, "loss": 0.4139, "step": 24231 }, { "epoch": 13.537430167597766, "grad_norm": 0.979207456111908, "learning_rate": 0.00032439775910364147, "loss": 0.4613, "step": 24232 }, { "epoch": 13.537988826815642, "grad_norm": 0.7991422414779663, "learning_rate": 0.0003243697478991597, "loss": 0.476, "step": 24233 }, { "epoch": 13.538547486033519, "grad_norm": 0.5174069404602051, "learning_rate": 0.0003243417366946779, "loss": 0.4082, "step": 24234 }, { "epoch": 13.539106145251397, "grad_norm": 0.90731281042099, "learning_rate": 0.0003243137254901961, "loss": 0.4528, "step": 24235 }, { "epoch": 13.539664804469274, "grad_norm": 0.7654157876968384, "learning_rate": 0.0003242857142857143, "loss": 0.5313, "step": 24236 }, { "epoch": 13.54022346368715, "grad_norm": 0.36062702536582947, "learning_rate": 0.0003242577030812325, "loss": 0.4262, "step": 24237 }, { "epoch": 13.540782122905028, "grad_norm": 0.5776169300079346, "learning_rate": 0.0003242296918767507, "loss": 0.3506, "step": 24238 }, { "epoch": 13.541340782122905, "grad_norm": 0.5203577876091003, "learning_rate": 0.0003242016806722689, "loss": 0.5883, "step": 24239 }, { "epoch": 13.541899441340782, "grad_norm": 0.47661954164505005, "learning_rate": 0.00032417366946778717, "loss": 0.4938, "step": 24240 }, { "epoch": 13.54245810055866, "grad_norm": 0.462184339761734, "learning_rate": 0.0003241456582633053, "loss": 0.4259, "step": 24241 }, { "epoch": 13.543016759776537, "grad_norm": 0.6133396625518799, "learning_rate": 0.00032411764705882353, "loss": 0.4095, "step": 24242 }, { "epoch": 13.543575418994413, "grad_norm": 0.7531257271766663, "learning_rate": 0.00032408963585434173, "loss": 0.5045, "step": 24243 }, { "epoch": 13.544134078212291, "grad_norm": 0.5929487347602844, "learning_rate": 0.00032406162464985994, "loss": 0.4323, "step": 24244 }, { "epoch": 13.544692737430168, "grad_norm": 0.3269234299659729, "learning_rate": 0.0003240336134453782, "loss": 0.3957, "step": 24245 }, { "epoch": 13.545251396648045, "grad_norm": 1.4112555980682373, "learning_rate": 0.00032400560224089635, "loss": 0.4331, "step": 24246 }, { "epoch": 13.545810055865921, "grad_norm": 0.5134170651435852, "learning_rate": 0.00032397759103641456, "loss": 0.4151, "step": 24247 }, { "epoch": 13.5463687150838, "grad_norm": 0.5318816304206848, "learning_rate": 0.0003239495798319328, "loss": 0.4119, "step": 24248 }, { "epoch": 13.546927374301676, "grad_norm": 0.6621688604354858, "learning_rate": 0.00032392156862745097, "loss": 0.6012, "step": 24249 }, { "epoch": 13.547486033519553, "grad_norm": 0.7528454661369324, "learning_rate": 0.00032389355742296923, "loss": 0.5409, "step": 24250 }, { "epoch": 13.548044692737431, "grad_norm": 0.4618639647960663, "learning_rate": 0.0003238655462184874, "loss": 0.4708, "step": 24251 }, { "epoch": 13.548603351955308, "grad_norm": 0.4162149727344513, "learning_rate": 0.0003238375350140056, "loss": 0.3975, "step": 24252 }, { "epoch": 13.549162011173184, "grad_norm": 0.975376307964325, "learning_rate": 0.00032380952380952385, "loss": 0.3584, "step": 24253 }, { "epoch": 13.54972067039106, "grad_norm": 0.46845823526382446, "learning_rate": 0.000323781512605042, "loss": 0.5591, "step": 24254 }, { "epoch": 13.550279329608939, "grad_norm": 1.4530911445617676, "learning_rate": 0.00032375350140056026, "loss": 0.5976, "step": 24255 }, { "epoch": 13.550837988826816, "grad_norm": 0.39533713459968567, "learning_rate": 0.00032372549019607847, "loss": 0.4209, "step": 24256 }, { "epoch": 13.551396648044692, "grad_norm": 1.1704386472702026, "learning_rate": 0.0003236974789915966, "loss": 0.6624, "step": 24257 }, { "epoch": 13.55195530726257, "grad_norm": 0.8711075186729431, "learning_rate": 0.0003236694677871149, "loss": 0.4374, "step": 24258 }, { "epoch": 13.552513966480447, "grad_norm": 0.32882529497146606, "learning_rate": 0.00032364145658263303, "loss": 0.4534, "step": 24259 }, { "epoch": 13.553072625698324, "grad_norm": 0.644980251789093, "learning_rate": 0.0003236134453781513, "loss": 0.4965, "step": 24260 }, { "epoch": 13.553631284916202, "grad_norm": 0.5811072587966919, "learning_rate": 0.0003235854341736695, "loss": 0.7347, "step": 24261 }, { "epoch": 13.554189944134079, "grad_norm": 0.4524349570274353, "learning_rate": 0.00032355742296918765, "loss": 0.462, "step": 24262 }, { "epoch": 13.554748603351955, "grad_norm": 0.5441485047340393, "learning_rate": 0.0003235294117647059, "loss": 0.479, "step": 24263 }, { "epoch": 13.555307262569832, "grad_norm": 0.41202443838119507, "learning_rate": 0.0003235014005602241, "loss": 0.3806, "step": 24264 }, { "epoch": 13.55586592178771, "grad_norm": 0.8137189745903015, "learning_rate": 0.0003234733893557423, "loss": 0.4186, "step": 24265 }, { "epoch": 13.556424581005587, "grad_norm": 0.4145328402519226, "learning_rate": 0.0003234453781512605, "loss": 0.432, "step": 24266 }, { "epoch": 13.556983240223463, "grad_norm": 0.38000452518463135, "learning_rate": 0.0003234173669467787, "loss": 0.4581, "step": 24267 }, { "epoch": 13.557541899441341, "grad_norm": 0.43896549940109253, "learning_rate": 0.00032338935574229694, "loss": 0.4317, "step": 24268 }, { "epoch": 13.558100558659218, "grad_norm": 1.0027886629104614, "learning_rate": 0.00032336134453781514, "loss": 0.398, "step": 24269 }, { "epoch": 13.558659217877095, "grad_norm": 1.0002566576004028, "learning_rate": 0.0003233333333333333, "loss": 0.3899, "step": 24270 }, { "epoch": 13.559217877094973, "grad_norm": 3.61797833442688, "learning_rate": 0.00032330532212885156, "loss": 0.5673, "step": 24271 }, { "epoch": 13.55977653631285, "grad_norm": 0.6382842659950256, "learning_rate": 0.00032327731092436976, "loss": 0.4118, "step": 24272 }, { "epoch": 13.560335195530726, "grad_norm": 1.1657254695892334, "learning_rate": 0.00032324929971988797, "loss": 0.4881, "step": 24273 }, { "epoch": 13.560893854748603, "grad_norm": 0.410346120595932, "learning_rate": 0.0003232212885154062, "loss": 0.3876, "step": 24274 }, { "epoch": 13.561452513966481, "grad_norm": 0.9743613600730896, "learning_rate": 0.0003231932773109243, "loss": 0.3926, "step": 24275 }, { "epoch": 13.562011173184358, "grad_norm": 2.832747220993042, "learning_rate": 0.0003231652661064426, "loss": 0.368, "step": 24276 }, { "epoch": 13.562569832402234, "grad_norm": 0.4697764813899994, "learning_rate": 0.0003231372549019608, "loss": 0.4078, "step": 24277 }, { "epoch": 13.563128491620112, "grad_norm": 0.5244917869567871, "learning_rate": 0.000323109243697479, "loss": 0.4865, "step": 24278 }, { "epoch": 13.563687150837989, "grad_norm": 0.6743093132972717, "learning_rate": 0.0003230812324929972, "loss": 0.4442, "step": 24279 }, { "epoch": 13.564245810055866, "grad_norm": 0.4919312298297882, "learning_rate": 0.0003230532212885154, "loss": 0.4084, "step": 24280 }, { "epoch": 13.564804469273742, "grad_norm": 0.5810346603393555, "learning_rate": 0.0003230252100840336, "loss": 0.4218, "step": 24281 }, { "epoch": 13.56536312849162, "grad_norm": 0.5394034385681152, "learning_rate": 0.0003229971988795518, "loss": 0.5409, "step": 24282 }, { "epoch": 13.565921787709497, "grad_norm": 1.5001826286315918, "learning_rate": 0.00032296918767507003, "loss": 0.4208, "step": 24283 }, { "epoch": 13.566480446927374, "grad_norm": 0.4995175302028656, "learning_rate": 0.00032294117647058823, "loss": 0.4024, "step": 24284 }, { "epoch": 13.567039106145252, "grad_norm": 0.9005759358406067, "learning_rate": 0.00032291316526610644, "loss": 0.5347, "step": 24285 }, { "epoch": 13.567597765363129, "grad_norm": 0.3349456489086151, "learning_rate": 0.00032288515406162465, "loss": 0.2691, "step": 24286 }, { "epoch": 13.568156424581005, "grad_norm": 0.4442967176437378, "learning_rate": 0.00032285714285714285, "loss": 0.4984, "step": 24287 }, { "epoch": 13.568715083798883, "grad_norm": 0.5860431790351868, "learning_rate": 0.0003228291316526611, "loss": 0.442, "step": 24288 }, { "epoch": 13.56927374301676, "grad_norm": 0.7263837456703186, "learning_rate": 0.00032280112044817926, "loss": 0.4136, "step": 24289 }, { "epoch": 13.569832402234637, "grad_norm": 0.6092385649681091, "learning_rate": 0.00032277310924369747, "loss": 0.427, "step": 24290 }, { "epoch": 13.570391061452513, "grad_norm": 0.3892316520214081, "learning_rate": 0.0003227450980392157, "loss": 0.4188, "step": 24291 }, { "epoch": 13.570949720670392, "grad_norm": 0.8783763647079468, "learning_rate": 0.0003227170868347339, "loss": 0.3693, "step": 24292 }, { "epoch": 13.571508379888268, "grad_norm": 0.4158749282360077, "learning_rate": 0.00032268907563025214, "loss": 0.4154, "step": 24293 }, { "epoch": 13.572067039106145, "grad_norm": 0.3377149701118469, "learning_rate": 0.0003226610644257703, "loss": 0.4184, "step": 24294 }, { "epoch": 13.572625698324023, "grad_norm": 0.430833101272583, "learning_rate": 0.0003226330532212885, "loss": 0.4628, "step": 24295 }, { "epoch": 13.5731843575419, "grad_norm": 0.8284791707992554, "learning_rate": 0.00032260504201680676, "loss": 0.3164, "step": 24296 }, { "epoch": 13.573743016759776, "grad_norm": 1.172363042831421, "learning_rate": 0.0003225770308123249, "loss": 0.4507, "step": 24297 }, { "epoch": 13.574301675977654, "grad_norm": 0.7132647037506104, "learning_rate": 0.00032254901960784317, "loss": 0.3534, "step": 24298 }, { "epoch": 13.574860335195531, "grad_norm": 0.47527194023132324, "learning_rate": 0.0003225210084033613, "loss": 0.3989, "step": 24299 }, { "epoch": 13.575418994413408, "grad_norm": 0.6327033042907715, "learning_rate": 0.00032249299719887953, "loss": 0.4595, "step": 24300 }, { "epoch": 13.575977653631284, "grad_norm": 0.41773825883865356, "learning_rate": 0.0003224649859943978, "loss": 0.3774, "step": 24301 }, { "epoch": 13.576536312849163, "grad_norm": 0.41878068447113037, "learning_rate": 0.00032243697478991594, "loss": 0.3821, "step": 24302 }, { "epoch": 13.577094972067039, "grad_norm": 0.6169646978378296, "learning_rate": 0.0003224089635854342, "loss": 0.4287, "step": 24303 }, { "epoch": 13.577653631284916, "grad_norm": 0.5389190912246704, "learning_rate": 0.0003223809523809524, "loss": 0.4591, "step": 24304 }, { "epoch": 13.578212290502794, "grad_norm": 0.6131604909896851, "learning_rate": 0.00032235294117647056, "loss": 0.3759, "step": 24305 }, { "epoch": 13.57877094972067, "grad_norm": 0.5030089020729065, "learning_rate": 0.0003223249299719888, "loss": 0.4815, "step": 24306 }, { "epoch": 13.579329608938547, "grad_norm": 0.5005931854248047, "learning_rate": 0.00032229691876750697, "loss": 0.53, "step": 24307 }, { "epoch": 13.579888268156424, "grad_norm": 0.43076246976852417, "learning_rate": 0.00032226890756302523, "loss": 0.4261, "step": 24308 }, { "epoch": 13.580446927374302, "grad_norm": 0.594862699508667, "learning_rate": 0.00032224089635854344, "loss": 0.3809, "step": 24309 }, { "epoch": 13.581005586592179, "grad_norm": 0.33322015404701233, "learning_rate": 0.0003222128851540616, "loss": 0.3735, "step": 24310 }, { "epoch": 13.581564245810055, "grad_norm": 5.958134174346924, "learning_rate": 0.00032218487394957985, "loss": 0.4254, "step": 24311 }, { "epoch": 13.582122905027934, "grad_norm": 0.45530906319618225, "learning_rate": 0.00032215686274509806, "loss": 0.3942, "step": 24312 }, { "epoch": 13.58268156424581, "grad_norm": 0.5442169904708862, "learning_rate": 0.00032212885154061626, "loss": 0.5347, "step": 24313 }, { "epoch": 13.583240223463687, "grad_norm": 1.794421911239624, "learning_rate": 0.00032210084033613447, "loss": 0.4279, "step": 24314 }, { "epoch": 13.583798882681565, "grad_norm": 0.48989158868789673, "learning_rate": 0.0003220728291316526, "loss": 0.387, "step": 24315 }, { "epoch": 13.584357541899442, "grad_norm": 0.7065900564193726, "learning_rate": 0.0003220448179271709, "loss": 0.4707, "step": 24316 }, { "epoch": 13.584916201117318, "grad_norm": 0.4515487253665924, "learning_rate": 0.0003220168067226891, "loss": 0.4152, "step": 24317 }, { "epoch": 13.585474860335196, "grad_norm": 0.8893181681632996, "learning_rate": 0.0003219887955182073, "loss": 0.4448, "step": 24318 }, { "epoch": 13.586033519553073, "grad_norm": 0.6195316314697266, "learning_rate": 0.0003219607843137255, "loss": 0.5344, "step": 24319 }, { "epoch": 13.58659217877095, "grad_norm": 0.9954841136932373, "learning_rate": 0.0003219327731092437, "loss": 0.4736, "step": 24320 }, { "epoch": 13.587150837988826, "grad_norm": 0.9266394972801208, "learning_rate": 0.0003219047619047619, "loss": 0.4849, "step": 24321 }, { "epoch": 13.587709497206705, "grad_norm": 0.6230759620666504, "learning_rate": 0.0003218767507002801, "loss": 0.5423, "step": 24322 }, { "epoch": 13.588268156424581, "grad_norm": 0.5001469850540161, "learning_rate": 0.0003218487394957983, "loss": 0.3997, "step": 24323 }, { "epoch": 13.588826815642458, "grad_norm": 1.8093211650848389, "learning_rate": 0.00032182072829131653, "loss": 0.41, "step": 24324 }, { "epoch": 13.589385474860336, "grad_norm": 0.37465524673461914, "learning_rate": 0.00032179271708683473, "loss": 0.4085, "step": 24325 }, { "epoch": 13.589944134078213, "grad_norm": 0.423692911863327, "learning_rate": 0.00032176470588235294, "loss": 0.3438, "step": 24326 }, { "epoch": 13.59050279329609, "grad_norm": 1.3446416854858398, "learning_rate": 0.00032173669467787115, "loss": 0.5167, "step": 24327 }, { "epoch": 13.591061452513966, "grad_norm": 0.47073447704315186, "learning_rate": 0.0003217086834733894, "loss": 0.3583, "step": 24328 }, { "epoch": 13.591620111731844, "grad_norm": 0.51042640209198, "learning_rate": 0.00032168067226890756, "loss": 0.3911, "step": 24329 }, { "epoch": 13.59217877094972, "grad_norm": 0.45660144090652466, "learning_rate": 0.00032165266106442576, "loss": 0.57, "step": 24330 }, { "epoch": 13.592737430167597, "grad_norm": 0.605329692363739, "learning_rate": 0.00032162464985994397, "loss": 0.5513, "step": 24331 }, { "epoch": 13.593296089385476, "grad_norm": 0.4736612141132355, "learning_rate": 0.0003215966386554622, "loss": 0.3644, "step": 24332 }, { "epoch": 13.593854748603352, "grad_norm": 0.5587393641471863, "learning_rate": 0.00032156862745098044, "loss": 0.4166, "step": 24333 }, { "epoch": 13.594413407821229, "grad_norm": 0.36336472630500793, "learning_rate": 0.0003215406162464986, "loss": 0.3603, "step": 24334 }, { "epoch": 13.594972067039105, "grad_norm": 1.369785189628601, "learning_rate": 0.0003215126050420168, "loss": 0.4314, "step": 24335 }, { "epoch": 13.595530726256984, "grad_norm": 0.4977770149707794, "learning_rate": 0.00032148459383753505, "loss": 0.3219, "step": 24336 }, { "epoch": 13.59608938547486, "grad_norm": 0.6435325741767883, "learning_rate": 0.0003214565826330532, "loss": 0.5181, "step": 24337 }, { "epoch": 13.596648044692737, "grad_norm": 0.36096060276031494, "learning_rate": 0.00032142857142857147, "loss": 0.344, "step": 24338 }, { "epoch": 13.597206703910615, "grad_norm": 0.3784453272819519, "learning_rate": 0.0003214005602240896, "loss": 0.4263, "step": 24339 }, { "epoch": 13.597765363128492, "grad_norm": 0.4730847477912903, "learning_rate": 0.0003213725490196078, "loss": 0.367, "step": 24340 }, { "epoch": 13.598324022346368, "grad_norm": 0.44273480772972107, "learning_rate": 0.0003213445378151261, "loss": 0.4394, "step": 24341 }, { "epoch": 13.598882681564247, "grad_norm": 0.37002480030059814, "learning_rate": 0.00032131652661064424, "loss": 0.4155, "step": 24342 }, { "epoch": 13.599441340782123, "grad_norm": 7.938429355621338, "learning_rate": 0.0003212885154061625, "loss": 0.459, "step": 24343 }, { "epoch": 13.6, "grad_norm": 0.4473360478878021, "learning_rate": 0.0003212605042016807, "loss": 0.3792, "step": 24344 }, { "epoch": 13.600558659217878, "grad_norm": 0.3718496859073639, "learning_rate": 0.00032123249299719885, "loss": 0.3437, "step": 24345 }, { "epoch": 13.601117318435755, "grad_norm": 0.6617511510848999, "learning_rate": 0.0003212044817927171, "loss": 0.3902, "step": 24346 }, { "epoch": 13.601675977653631, "grad_norm": 0.42920982837677, "learning_rate": 0.00032117647058823527, "loss": 0.5534, "step": 24347 }, { "epoch": 13.602234636871508, "grad_norm": 1.6077117919921875, "learning_rate": 0.0003211484593837535, "loss": 0.3793, "step": 24348 }, { "epoch": 13.602793296089386, "grad_norm": 0.37592613697052, "learning_rate": 0.00032112044817927173, "loss": 0.3851, "step": 24349 }, { "epoch": 13.603351955307263, "grad_norm": 0.44684112071990967, "learning_rate": 0.0003210924369747899, "loss": 0.3179, "step": 24350 }, { "epoch": 13.60391061452514, "grad_norm": 0.5253171324729919, "learning_rate": 0.00032106442577030814, "loss": 0.4973, "step": 24351 }, { "epoch": 13.604469273743018, "grad_norm": 0.9252895712852478, "learning_rate": 0.00032103641456582635, "loss": 0.4572, "step": 24352 }, { "epoch": 13.605027932960894, "grad_norm": 1.9745129346847534, "learning_rate": 0.00032100840336134456, "loss": 0.4288, "step": 24353 }, { "epoch": 13.60558659217877, "grad_norm": 0.5356171727180481, "learning_rate": 0.00032098039215686276, "loss": 0.3872, "step": 24354 }, { "epoch": 13.606145251396647, "grad_norm": 0.5641050338745117, "learning_rate": 0.0003209523809523809, "loss": 0.3483, "step": 24355 }, { "epoch": 13.606703910614526, "grad_norm": 0.40061479806900024, "learning_rate": 0.0003209243697478992, "loss": 0.3435, "step": 24356 }, { "epoch": 13.607262569832402, "grad_norm": 0.46367666125297546, "learning_rate": 0.0003208963585434174, "loss": 0.3087, "step": 24357 }, { "epoch": 13.607821229050279, "grad_norm": 0.4955611228942871, "learning_rate": 0.0003208683473389356, "loss": 0.4245, "step": 24358 }, { "epoch": 13.608379888268157, "grad_norm": 0.5085077881813049, "learning_rate": 0.0003208403361344538, "loss": 0.433, "step": 24359 }, { "epoch": 13.608938547486034, "grad_norm": 0.36777132749557495, "learning_rate": 0.000320812324929972, "loss": 0.3491, "step": 24360 }, { "epoch": 13.60949720670391, "grad_norm": 0.5223658084869385, "learning_rate": 0.0003207843137254902, "loss": 0.3592, "step": 24361 }, { "epoch": 13.610055865921789, "grad_norm": 0.5470029711723328, "learning_rate": 0.0003207563025210084, "loss": 0.4424, "step": 24362 }, { "epoch": 13.610614525139665, "grad_norm": 0.38278546929359436, "learning_rate": 0.00032072829131652667, "loss": 0.3623, "step": 24363 }, { "epoch": 13.611173184357542, "grad_norm": 0.3846319019794464, "learning_rate": 0.0003207002801120448, "loss": 0.2749, "step": 24364 }, { "epoch": 13.611731843575418, "grad_norm": 0.9957003593444824, "learning_rate": 0.00032067226890756303, "loss": 0.3384, "step": 24365 }, { "epoch": 13.612290502793297, "grad_norm": 0.5243500471115112, "learning_rate": 0.00032064425770308123, "loss": 0.3717, "step": 24366 }, { "epoch": 13.612849162011173, "grad_norm": 0.4746619164943695, "learning_rate": 0.00032061624649859944, "loss": 0.435, "step": 24367 }, { "epoch": 13.61340782122905, "grad_norm": 1.226295828819275, "learning_rate": 0.0003205882352941177, "loss": 0.4555, "step": 24368 }, { "epoch": 13.613966480446928, "grad_norm": 1.6121059656143188, "learning_rate": 0.00032056022408963585, "loss": 0.3599, "step": 24369 }, { "epoch": 13.614525139664805, "grad_norm": 0.45315811038017273, "learning_rate": 0.00032053221288515406, "loss": 0.5751, "step": 24370 }, { "epoch": 13.615083798882681, "grad_norm": 1.625594973564148, "learning_rate": 0.0003205042016806723, "loss": 0.4484, "step": 24371 }, { "epoch": 13.61564245810056, "grad_norm": 0.5459540486335754, "learning_rate": 0.00032047619047619047, "loss": 0.3991, "step": 24372 }, { "epoch": 13.616201117318436, "grad_norm": 0.35960251092910767, "learning_rate": 0.00032044817927170873, "loss": 0.3721, "step": 24373 }, { "epoch": 13.616759776536313, "grad_norm": 0.4609623849391937, "learning_rate": 0.0003204201680672269, "loss": 0.4145, "step": 24374 }, { "epoch": 13.61731843575419, "grad_norm": 0.4776493310928345, "learning_rate": 0.0003203921568627451, "loss": 0.4558, "step": 24375 }, { "epoch": 13.617877094972068, "grad_norm": 0.6469979882240295, "learning_rate": 0.00032036414565826335, "loss": 0.3647, "step": 24376 }, { "epoch": 13.618435754189944, "grad_norm": 1.4890589714050293, "learning_rate": 0.0003203361344537815, "loss": 0.5229, "step": 24377 }, { "epoch": 13.61899441340782, "grad_norm": 0.6840735077857971, "learning_rate": 0.0003203081232492997, "loss": 0.4278, "step": 24378 }, { "epoch": 13.619553072625699, "grad_norm": 0.4018539488315582, "learning_rate": 0.00032028011204481797, "loss": 0.3919, "step": 24379 }, { "epoch": 13.620111731843576, "grad_norm": 0.37476009130477905, "learning_rate": 0.0003202521008403361, "loss": 0.456, "step": 24380 }, { "epoch": 13.620670391061452, "grad_norm": 0.37753865122795105, "learning_rate": 0.0003202240896358544, "loss": 0.3466, "step": 24381 }, { "epoch": 13.621229050279329, "grad_norm": 0.46557021141052246, "learning_rate": 0.00032019607843137253, "loss": 0.4142, "step": 24382 }, { "epoch": 13.621787709497207, "grad_norm": 4.799345970153809, "learning_rate": 0.00032016806722689074, "loss": 0.4271, "step": 24383 }, { "epoch": 13.622346368715084, "grad_norm": 0.6641995906829834, "learning_rate": 0.000320140056022409, "loss": 0.5034, "step": 24384 }, { "epoch": 13.62290502793296, "grad_norm": 0.6856821179389954, "learning_rate": 0.00032011204481792715, "loss": 0.5518, "step": 24385 }, { "epoch": 13.623463687150839, "grad_norm": 0.48791933059692383, "learning_rate": 0.0003200840336134454, "loss": 0.4211, "step": 24386 }, { "epoch": 13.624022346368715, "grad_norm": 0.5542627573013306, "learning_rate": 0.0003200560224089636, "loss": 0.4499, "step": 24387 }, { "epoch": 13.624581005586592, "grad_norm": 0.4851677715778351, "learning_rate": 0.00032002801120448177, "loss": 0.4039, "step": 24388 }, { "epoch": 13.62513966480447, "grad_norm": 0.4310801327228546, "learning_rate": 0.00032, "loss": 0.3953, "step": 24389 }, { "epoch": 13.625698324022347, "grad_norm": 0.4289790689945221, "learning_rate": 0.0003199719887955182, "loss": 0.4124, "step": 24390 }, { "epoch": 13.626256983240223, "grad_norm": 0.3752892017364502, "learning_rate": 0.00031994397759103644, "loss": 0.4201, "step": 24391 }, { "epoch": 13.6268156424581, "grad_norm": 0.5055199861526489, "learning_rate": 0.00031991596638655464, "loss": 0.442, "step": 24392 }, { "epoch": 13.627374301675978, "grad_norm": 0.5431085824966431, "learning_rate": 0.0003198879551820728, "loss": 0.4966, "step": 24393 }, { "epoch": 13.627932960893855, "grad_norm": 0.5029839873313904, "learning_rate": 0.00031985994397759106, "loss": 0.4532, "step": 24394 }, { "epoch": 13.628491620111731, "grad_norm": 0.4633367955684662, "learning_rate": 0.00031983193277310926, "loss": 0.4598, "step": 24395 }, { "epoch": 13.62905027932961, "grad_norm": 0.479522705078125, "learning_rate": 0.00031980392156862747, "loss": 0.3988, "step": 24396 }, { "epoch": 13.629608938547486, "grad_norm": 22.345001220703125, "learning_rate": 0.0003197759103641457, "loss": 0.4134, "step": 24397 }, { "epoch": 13.630167597765363, "grad_norm": 0.49456721544265747, "learning_rate": 0.0003197478991596638, "loss": 0.3621, "step": 24398 }, { "epoch": 13.630726256983241, "grad_norm": 1.0717533826828003, "learning_rate": 0.0003197198879551821, "loss": 0.3406, "step": 24399 }, { "epoch": 13.631284916201118, "grad_norm": 0.4595997929573059, "learning_rate": 0.0003196918767507003, "loss": 0.3349, "step": 24400 }, { "epoch": 13.631843575418994, "grad_norm": 0.4430665075778961, "learning_rate": 0.0003196638655462185, "loss": 0.4916, "step": 24401 }, { "epoch": 13.63240223463687, "grad_norm": 0.41209807991981506, "learning_rate": 0.0003196358543417367, "loss": 0.3897, "step": 24402 }, { "epoch": 13.632960893854749, "grad_norm": 0.47212961316108704, "learning_rate": 0.0003196078431372549, "loss": 0.2838, "step": 24403 }, { "epoch": 13.633519553072626, "grad_norm": 0.36477741599082947, "learning_rate": 0.0003195798319327731, "loss": 0.3098, "step": 24404 }, { "epoch": 13.634078212290502, "grad_norm": 0.3986670672893524, "learning_rate": 0.0003195518207282913, "loss": 0.4196, "step": 24405 }, { "epoch": 13.63463687150838, "grad_norm": 0.49524441361427307, "learning_rate": 0.00031952380952380953, "loss": 0.4196, "step": 24406 }, { "epoch": 13.635195530726257, "grad_norm": 1.3900222778320312, "learning_rate": 0.00031949579831932773, "loss": 0.3833, "step": 24407 }, { "epoch": 13.635754189944134, "grad_norm": 0.5785476565361023, "learning_rate": 0.00031946778711484594, "loss": 0.408, "step": 24408 }, { "epoch": 13.63631284916201, "grad_norm": 0.5940086245536804, "learning_rate": 0.00031943977591036415, "loss": 0.4067, "step": 24409 }, { "epoch": 13.636871508379889, "grad_norm": 2.657275676727295, "learning_rate": 0.00031941176470588235, "loss": 0.4608, "step": 24410 }, { "epoch": 13.637430167597765, "grad_norm": 0.6407803297042847, "learning_rate": 0.0003193837535014006, "loss": 0.5222, "step": 24411 }, { "epoch": 13.637988826815642, "grad_norm": 0.4277299642562866, "learning_rate": 0.00031935574229691876, "loss": 0.3432, "step": 24412 }, { "epoch": 13.63854748603352, "grad_norm": 0.8213837742805481, "learning_rate": 0.00031932773109243697, "loss": 0.4603, "step": 24413 }, { "epoch": 13.639106145251397, "grad_norm": 0.5171836614608765, "learning_rate": 0.0003192997198879552, "loss": 0.3866, "step": 24414 }, { "epoch": 13.639664804469273, "grad_norm": 0.494873046875, "learning_rate": 0.0003192717086834734, "loss": 0.4306, "step": 24415 }, { "epoch": 13.640223463687152, "grad_norm": 0.2952917814254761, "learning_rate": 0.00031924369747899164, "loss": 0.2794, "step": 24416 }, { "epoch": 13.640782122905028, "grad_norm": 0.3938904404640198, "learning_rate": 0.0003192156862745098, "loss": 0.5472, "step": 24417 }, { "epoch": 13.641340782122905, "grad_norm": 0.8669130206108093, "learning_rate": 0.000319187675070028, "loss": 0.3086, "step": 24418 }, { "epoch": 13.641899441340783, "grad_norm": 0.39553967118263245, "learning_rate": 0.00031915966386554626, "loss": 0.3864, "step": 24419 }, { "epoch": 13.64245810055866, "grad_norm": 0.40750664472579956, "learning_rate": 0.0003191316526610644, "loss": 0.5224, "step": 24420 }, { "epoch": 13.643016759776536, "grad_norm": 0.6431204676628113, "learning_rate": 0.00031910364145658267, "loss": 0.4321, "step": 24421 }, { "epoch": 13.643575418994413, "grad_norm": 0.5904327034950256, "learning_rate": 0.0003190756302521008, "loss": 0.6309, "step": 24422 }, { "epoch": 13.644134078212291, "grad_norm": 0.40578797459602356, "learning_rate": 0.00031904761904761903, "loss": 0.3865, "step": 24423 }, { "epoch": 13.644692737430168, "grad_norm": 0.4975600838661194, "learning_rate": 0.0003190196078431373, "loss": 0.4856, "step": 24424 }, { "epoch": 13.645251396648044, "grad_norm": 1.1321818828582764, "learning_rate": 0.00031899159663865544, "loss": 0.3811, "step": 24425 }, { "epoch": 13.645810055865923, "grad_norm": 0.7656422257423401, "learning_rate": 0.0003189635854341737, "loss": 0.444, "step": 24426 }, { "epoch": 13.6463687150838, "grad_norm": 0.412936806678772, "learning_rate": 0.0003189355742296919, "loss": 0.3771, "step": 24427 }, { "epoch": 13.646927374301676, "grad_norm": 0.5320417881011963, "learning_rate": 0.00031890756302521006, "loss": 0.6079, "step": 24428 }, { "epoch": 13.647486033519552, "grad_norm": 0.3536711633205414, "learning_rate": 0.0003188795518207283, "loss": 0.4358, "step": 24429 }, { "epoch": 13.64804469273743, "grad_norm": 0.5484802722930908, "learning_rate": 0.00031885154061624647, "loss": 0.5392, "step": 24430 }, { "epoch": 13.648603351955307, "grad_norm": 0.4762323498725891, "learning_rate": 0.00031882352941176473, "loss": 0.3719, "step": 24431 }, { "epoch": 13.649162011173184, "grad_norm": 0.7804234027862549, "learning_rate": 0.00031879551820728294, "loss": 0.3759, "step": 24432 }, { "epoch": 13.649720670391062, "grad_norm": 0.4490956962108612, "learning_rate": 0.0003187675070028011, "loss": 0.4596, "step": 24433 }, { "epoch": 13.650279329608939, "grad_norm": 0.5034146904945374, "learning_rate": 0.00031873949579831935, "loss": 0.3284, "step": 24434 }, { "epoch": 13.650837988826815, "grad_norm": 0.5368829369544983, "learning_rate": 0.00031871148459383756, "loss": 0.3822, "step": 24435 }, { "epoch": 13.651396648044694, "grad_norm": 0.48756930232048035, "learning_rate": 0.00031868347338935576, "loss": 0.413, "step": 24436 }, { "epoch": 13.65195530726257, "grad_norm": 0.5027840733528137, "learning_rate": 0.00031865546218487397, "loss": 0.3747, "step": 24437 }, { "epoch": 13.652513966480447, "grad_norm": 0.8145759105682373, "learning_rate": 0.0003186274509803921, "loss": 0.3451, "step": 24438 }, { "epoch": 13.653072625698323, "grad_norm": 1.5784149169921875, "learning_rate": 0.0003185994397759104, "loss": 0.4663, "step": 24439 }, { "epoch": 13.653631284916202, "grad_norm": 0.4120514392852783, "learning_rate": 0.0003185714285714286, "loss": 0.5175, "step": 24440 }, { "epoch": 13.654189944134078, "grad_norm": 0.9323202967643738, "learning_rate": 0.0003185434173669468, "loss": 0.6399, "step": 24441 }, { "epoch": 13.654748603351955, "grad_norm": 6.808210372924805, "learning_rate": 0.000318515406162465, "loss": 0.4985, "step": 24442 }, { "epoch": 13.655307262569833, "grad_norm": 0.3559451699256897, "learning_rate": 0.0003184873949579832, "loss": 0.4026, "step": 24443 }, { "epoch": 13.65586592178771, "grad_norm": 0.391656756401062, "learning_rate": 0.0003184593837535014, "loss": 0.4318, "step": 24444 }, { "epoch": 13.656424581005586, "grad_norm": 0.9895601272583008, "learning_rate": 0.0003184313725490196, "loss": 0.4206, "step": 24445 }, { "epoch": 13.656983240223465, "grad_norm": 0.7245948314666748, "learning_rate": 0.0003184033613445378, "loss": 0.3907, "step": 24446 }, { "epoch": 13.657541899441341, "grad_norm": 6.7192792892456055, "learning_rate": 0.00031837535014005603, "loss": 0.3928, "step": 24447 }, { "epoch": 13.658100558659218, "grad_norm": 1.7429684400558472, "learning_rate": 0.00031834733893557423, "loss": 0.4669, "step": 24448 }, { "epoch": 13.658659217877094, "grad_norm": 0.46691322326660156, "learning_rate": 0.00031831932773109244, "loss": 0.4856, "step": 24449 }, { "epoch": 13.659217877094973, "grad_norm": 0.47422346472740173, "learning_rate": 0.00031829131652661065, "loss": 0.3313, "step": 24450 }, { "epoch": 13.65977653631285, "grad_norm": 0.8414809703826904, "learning_rate": 0.0003182633053221289, "loss": 0.3155, "step": 24451 }, { "epoch": 13.660335195530726, "grad_norm": 0.3099393844604492, "learning_rate": 0.00031823529411764706, "loss": 0.341, "step": 24452 }, { "epoch": 13.660893854748604, "grad_norm": 0.6694900393486023, "learning_rate": 0.00031820728291316526, "loss": 0.4794, "step": 24453 }, { "epoch": 13.66145251396648, "grad_norm": 0.5306008458137512, "learning_rate": 0.00031817927170868347, "loss": 0.4684, "step": 24454 }, { "epoch": 13.662011173184357, "grad_norm": 3.5476903915405273, "learning_rate": 0.0003181512605042017, "loss": 0.4257, "step": 24455 }, { "epoch": 13.662569832402234, "grad_norm": 0.7237918376922607, "learning_rate": 0.00031812324929971994, "loss": 0.4807, "step": 24456 }, { "epoch": 13.663128491620112, "grad_norm": 0.4164571464061737, "learning_rate": 0.0003180952380952381, "loss": 0.3361, "step": 24457 }, { "epoch": 13.663687150837989, "grad_norm": 0.3449452817440033, "learning_rate": 0.0003180672268907563, "loss": 0.3426, "step": 24458 }, { "epoch": 13.664245810055865, "grad_norm": 0.699264645576477, "learning_rate": 0.00031803921568627455, "loss": 0.4645, "step": 24459 }, { "epoch": 13.664804469273744, "grad_norm": 0.6480334401130676, "learning_rate": 0.0003180112044817927, "loss": 0.4169, "step": 24460 }, { "epoch": 13.66536312849162, "grad_norm": 0.3999210596084595, "learning_rate": 0.00031798319327731097, "loss": 0.4488, "step": 24461 }, { "epoch": 13.665921787709497, "grad_norm": 0.42112091183662415, "learning_rate": 0.0003179551820728291, "loss": 0.3699, "step": 24462 }, { "epoch": 13.666480446927375, "grad_norm": 0.38431501388549805, "learning_rate": 0.0003179271708683473, "loss": 0.3726, "step": 24463 }, { "epoch": 13.667039106145252, "grad_norm": 0.6671479940414429, "learning_rate": 0.0003178991596638656, "loss": 0.3603, "step": 24464 }, { "epoch": 13.667597765363128, "grad_norm": 0.49649402499198914, "learning_rate": 0.00031787114845938374, "loss": 0.5209, "step": 24465 }, { "epoch": 13.668156424581005, "grad_norm": 0.5430819988250732, "learning_rate": 0.000317843137254902, "loss": 0.4982, "step": 24466 }, { "epoch": 13.668715083798883, "grad_norm": 0.5576528310775757, "learning_rate": 0.0003178151260504202, "loss": 0.3561, "step": 24467 }, { "epoch": 13.66927374301676, "grad_norm": 0.8587684631347656, "learning_rate": 0.00031778711484593835, "loss": 0.4854, "step": 24468 }, { "epoch": 13.669832402234636, "grad_norm": 1.5889253616333008, "learning_rate": 0.0003177591036414566, "loss": 0.3674, "step": 24469 }, { "epoch": 13.670391061452515, "grad_norm": 0.4303986430168152, "learning_rate": 0.00031773109243697477, "loss": 0.4575, "step": 24470 }, { "epoch": 13.670949720670391, "grad_norm": 1.6989390850067139, "learning_rate": 0.000317703081232493, "loss": 0.4801, "step": 24471 }, { "epoch": 13.671508379888268, "grad_norm": 0.4176202714443207, "learning_rate": 0.00031767507002801123, "loss": 0.4061, "step": 24472 }, { "epoch": 13.672067039106146, "grad_norm": 0.48989418148994446, "learning_rate": 0.0003176470588235294, "loss": 0.3934, "step": 24473 }, { "epoch": 13.672625698324023, "grad_norm": 0.5725489854812622, "learning_rate": 0.00031761904761904764, "loss": 0.5604, "step": 24474 }, { "epoch": 13.6731843575419, "grad_norm": 0.5144396424293518, "learning_rate": 0.00031759103641456585, "loss": 0.4342, "step": 24475 }, { "epoch": 13.673743016759776, "grad_norm": 0.4507734775543213, "learning_rate": 0.00031756302521008406, "loss": 0.3368, "step": 24476 }, { "epoch": 13.674301675977654, "grad_norm": 0.4781704843044281, "learning_rate": 0.00031753501400560226, "loss": 0.4302, "step": 24477 }, { "epoch": 13.67486033519553, "grad_norm": 0.532047688961029, "learning_rate": 0.0003175070028011204, "loss": 0.5614, "step": 24478 }, { "epoch": 13.675418994413407, "grad_norm": 0.5552019476890564, "learning_rate": 0.0003174789915966387, "loss": 0.4627, "step": 24479 }, { "epoch": 13.675977653631286, "grad_norm": 0.7362289428710938, "learning_rate": 0.0003174509803921569, "loss": 0.4887, "step": 24480 }, { "epoch": 13.676536312849162, "grad_norm": 0.43260982632637024, "learning_rate": 0.0003174229691876751, "loss": 0.4835, "step": 24481 }, { "epoch": 13.677094972067039, "grad_norm": 0.5450297594070435, "learning_rate": 0.0003173949579831933, "loss": 0.5921, "step": 24482 }, { "epoch": 13.677653631284915, "grad_norm": 0.4004102051258087, "learning_rate": 0.0003173669467787115, "loss": 0.3766, "step": 24483 }, { "epoch": 13.678212290502794, "grad_norm": 0.5877929329872131, "learning_rate": 0.0003173389355742297, "loss": 0.3767, "step": 24484 }, { "epoch": 13.67877094972067, "grad_norm": 0.38973262906074524, "learning_rate": 0.0003173109243697479, "loss": 0.451, "step": 24485 }, { "epoch": 13.679329608938547, "grad_norm": 0.6328718066215515, "learning_rate": 0.0003172829131652661, "loss": 0.5041, "step": 24486 }, { "epoch": 13.679888268156425, "grad_norm": 0.6081767678260803, "learning_rate": 0.0003172549019607843, "loss": 0.3969, "step": 24487 }, { "epoch": 13.680446927374302, "grad_norm": 0.3715354800224304, "learning_rate": 0.00031722689075630253, "loss": 0.3678, "step": 24488 }, { "epoch": 13.681005586592178, "grad_norm": 0.39989131689071655, "learning_rate": 0.00031719887955182073, "loss": 0.3856, "step": 24489 }, { "epoch": 13.681564245810057, "grad_norm": 0.4814515709877014, "learning_rate": 0.00031717086834733894, "loss": 0.3994, "step": 24490 }, { "epoch": 13.682122905027933, "grad_norm": 0.4881913363933563, "learning_rate": 0.00031714285714285715, "loss": 0.371, "step": 24491 }, { "epoch": 13.68268156424581, "grad_norm": 0.8198256492614746, "learning_rate": 0.00031711484593837535, "loss": 0.5498, "step": 24492 }, { "epoch": 13.683240223463688, "grad_norm": 0.36178117990493774, "learning_rate": 0.00031708683473389356, "loss": 0.3272, "step": 24493 }, { "epoch": 13.683798882681565, "grad_norm": 0.3471197187900543, "learning_rate": 0.00031705882352941176, "loss": 0.3755, "step": 24494 }, { "epoch": 13.684357541899441, "grad_norm": 0.5136399269104004, "learning_rate": 0.00031703081232492997, "loss": 0.5028, "step": 24495 }, { "epoch": 13.684916201117318, "grad_norm": 0.7959884405136108, "learning_rate": 0.0003170028011204482, "loss": 0.4318, "step": 24496 }, { "epoch": 13.685474860335196, "grad_norm": 0.3880917727947235, "learning_rate": 0.0003169747899159664, "loss": 0.4967, "step": 24497 }, { "epoch": 13.686033519553073, "grad_norm": 0.48721611499786377, "learning_rate": 0.0003169467787114846, "loss": 0.3599, "step": 24498 }, { "epoch": 13.68659217877095, "grad_norm": 0.48447638750076294, "learning_rate": 0.00031691876750700285, "loss": 0.3612, "step": 24499 }, { "epoch": 13.687150837988828, "grad_norm": 0.5575209856033325, "learning_rate": 0.000316890756302521, "loss": 0.3255, "step": 24500 }, { "epoch": 13.687150837988828, "eval_cer": 0.08666928523889016, "eval_loss": 0.32727304100990295, "eval_runtime": 55.4243, "eval_samples_per_second": 81.877, "eval_steps_per_second": 5.124, "eval_wer": 0.3433982290997458, "step": 24500 }, { "epoch": 13.687709497206704, "grad_norm": 0.667323887348175, "learning_rate": 0.0003168627450980392, "loss": 0.4303, "step": 24501 }, { "epoch": 13.68826815642458, "grad_norm": 0.6596452593803406, "learning_rate": 0.0003168347338935574, "loss": 0.4048, "step": 24502 }, { "epoch": 13.688826815642457, "grad_norm": 0.6369730234146118, "learning_rate": 0.0003168067226890756, "loss": 0.4133, "step": 24503 }, { "epoch": 13.689385474860336, "grad_norm": 0.3578794598579407, "learning_rate": 0.0003167787114845939, "loss": 0.3067, "step": 24504 }, { "epoch": 13.689944134078212, "grad_norm": 0.7099540829658508, "learning_rate": 0.00031675070028011203, "loss": 0.5211, "step": 24505 }, { "epoch": 13.690502793296089, "grad_norm": 0.6109747290611267, "learning_rate": 0.00031672268907563024, "loss": 0.6047, "step": 24506 }, { "epoch": 13.691061452513967, "grad_norm": 0.45618754625320435, "learning_rate": 0.0003166946778711485, "loss": 0.3851, "step": 24507 }, { "epoch": 13.691620111731844, "grad_norm": 1.5236531496047974, "learning_rate": 0.00031666666666666665, "loss": 0.4273, "step": 24508 }, { "epoch": 13.69217877094972, "grad_norm": 3.017887830734253, "learning_rate": 0.0003166386554621849, "loss": 0.4465, "step": 24509 }, { "epoch": 13.692737430167599, "grad_norm": 0.806997537612915, "learning_rate": 0.00031661064425770306, "loss": 0.3675, "step": 24510 }, { "epoch": 13.693296089385475, "grad_norm": 0.5202308893203735, "learning_rate": 0.00031658263305322127, "loss": 0.4247, "step": 24511 }, { "epoch": 13.693854748603352, "grad_norm": 0.4889890253543854, "learning_rate": 0.0003165546218487395, "loss": 0.4221, "step": 24512 }, { "epoch": 13.694413407821228, "grad_norm": 0.32687246799468994, "learning_rate": 0.0003165266106442577, "loss": 0.397, "step": 24513 }, { "epoch": 13.694972067039107, "grad_norm": 7.667118549346924, "learning_rate": 0.00031649859943977594, "loss": 0.4309, "step": 24514 }, { "epoch": 13.695530726256983, "grad_norm": 0.41886138916015625, "learning_rate": 0.00031647058823529414, "loss": 0.472, "step": 24515 }, { "epoch": 13.69608938547486, "grad_norm": 0.7411496639251709, "learning_rate": 0.0003164425770308123, "loss": 0.373, "step": 24516 }, { "epoch": 13.696648044692738, "grad_norm": 0.5268422961235046, "learning_rate": 0.00031641456582633056, "loss": 0.3805, "step": 24517 }, { "epoch": 13.697206703910615, "grad_norm": 0.6885746717453003, "learning_rate": 0.0003163865546218487, "loss": 0.4061, "step": 24518 }, { "epoch": 13.697765363128491, "grad_norm": 0.5006151795387268, "learning_rate": 0.00031635854341736697, "loss": 0.3743, "step": 24519 }, { "epoch": 13.69832402234637, "grad_norm": 0.3941054046154022, "learning_rate": 0.0003163305322128852, "loss": 0.405, "step": 24520 }, { "epoch": 13.698882681564246, "grad_norm": 0.6299375295639038, "learning_rate": 0.0003163025210084033, "loss": 0.421, "step": 24521 }, { "epoch": 13.699441340782123, "grad_norm": 0.8714436888694763, "learning_rate": 0.0003162745098039216, "loss": 0.4772, "step": 24522 }, { "epoch": 13.7, "grad_norm": 0.32743626832962036, "learning_rate": 0.0003162464985994398, "loss": 0.2963, "step": 24523 }, { "epoch": 13.700558659217878, "grad_norm": 0.4370838403701782, "learning_rate": 0.000316218487394958, "loss": 0.3863, "step": 24524 }, { "epoch": 13.701117318435754, "grad_norm": 0.40452754497528076, "learning_rate": 0.0003161904761904762, "loss": 0.3614, "step": 24525 }, { "epoch": 13.70167597765363, "grad_norm": 0.5632776618003845, "learning_rate": 0.00031616246498599436, "loss": 0.4264, "step": 24526 }, { "epoch": 13.702234636871509, "grad_norm": 0.6831368803977966, "learning_rate": 0.0003161344537815126, "loss": 0.4612, "step": 24527 }, { "epoch": 13.702793296089386, "grad_norm": 0.5946988463401794, "learning_rate": 0.0003161064425770308, "loss": 0.5087, "step": 24528 }, { "epoch": 13.703351955307262, "grad_norm": 0.4070788323879242, "learning_rate": 0.00031607843137254903, "loss": 0.3535, "step": 24529 }, { "epoch": 13.703910614525139, "grad_norm": 0.44555631279945374, "learning_rate": 0.00031605042016806723, "loss": 0.4467, "step": 24530 }, { "epoch": 13.704469273743017, "grad_norm": 0.5728515386581421, "learning_rate": 0.00031602240896358544, "loss": 0.4443, "step": 24531 }, { "epoch": 13.705027932960894, "grad_norm": 0.6138285994529724, "learning_rate": 0.00031599439775910365, "loss": 0.363, "step": 24532 }, { "epoch": 13.70558659217877, "grad_norm": 0.39038747549057007, "learning_rate": 0.00031596638655462185, "loss": 0.4248, "step": 24533 }, { "epoch": 13.706145251396649, "grad_norm": 0.4148624837398529, "learning_rate": 0.00031593837535014006, "loss": 0.4268, "step": 24534 }, { "epoch": 13.706703910614525, "grad_norm": 0.4829498529434204, "learning_rate": 0.00031591036414565826, "loss": 0.4783, "step": 24535 }, { "epoch": 13.707262569832402, "grad_norm": 0.4681767225265503, "learning_rate": 0.00031588235294117647, "loss": 0.4102, "step": 24536 }, { "epoch": 13.70782122905028, "grad_norm": 0.39798927307128906, "learning_rate": 0.0003158543417366947, "loss": 0.3487, "step": 24537 }, { "epoch": 13.708379888268157, "grad_norm": 0.4582329988479614, "learning_rate": 0.0003158263305322129, "loss": 0.3083, "step": 24538 }, { "epoch": 13.708938547486033, "grad_norm": 0.48490220308303833, "learning_rate": 0.00031579831932773114, "loss": 0.4, "step": 24539 }, { "epoch": 13.70949720670391, "grad_norm": 0.38172608613967896, "learning_rate": 0.0003157703081232493, "loss": 0.532, "step": 24540 }, { "epoch": 13.710055865921788, "grad_norm": 0.35551315546035767, "learning_rate": 0.0003157422969187675, "loss": 0.3808, "step": 24541 }, { "epoch": 13.710614525139665, "grad_norm": 0.288070946931839, "learning_rate": 0.0003157142857142857, "loss": 0.2339, "step": 24542 }, { "epoch": 13.711173184357541, "grad_norm": 0.5457075238227844, "learning_rate": 0.0003156862745098039, "loss": 0.6342, "step": 24543 }, { "epoch": 13.71173184357542, "grad_norm": 0.3962937295436859, "learning_rate": 0.00031565826330532217, "loss": 0.3717, "step": 24544 }, { "epoch": 13.712290502793296, "grad_norm": 0.4008278548717499, "learning_rate": 0.0003156302521008403, "loss": 0.3572, "step": 24545 }, { "epoch": 13.712849162011173, "grad_norm": 0.9183762073516846, "learning_rate": 0.00031560224089635853, "loss": 0.3559, "step": 24546 }, { "epoch": 13.713407821229051, "grad_norm": 0.2623822093009949, "learning_rate": 0.0003155742296918768, "loss": 0.2553, "step": 24547 }, { "epoch": 13.713966480446928, "grad_norm": 0.5116667151451111, "learning_rate": 0.00031554621848739494, "loss": 0.4813, "step": 24548 }, { "epoch": 13.714525139664804, "grad_norm": 0.5181401968002319, "learning_rate": 0.0003155182072829132, "loss": 0.4459, "step": 24549 }, { "epoch": 13.71508379888268, "grad_norm": 0.5862475037574768, "learning_rate": 0.00031549019607843135, "loss": 0.4588, "step": 24550 }, { "epoch": 13.71564245810056, "grad_norm": 0.5037033557891846, "learning_rate": 0.00031546218487394956, "loss": 0.4587, "step": 24551 }, { "epoch": 13.716201117318436, "grad_norm": 0.5580079555511475, "learning_rate": 0.0003154341736694678, "loss": 0.4885, "step": 24552 }, { "epoch": 13.716759776536312, "grad_norm": 0.5429263114929199, "learning_rate": 0.00031540616246498597, "loss": 0.3158, "step": 24553 }, { "epoch": 13.71731843575419, "grad_norm": 0.5533357858657837, "learning_rate": 0.00031537815126050423, "loss": 0.6756, "step": 24554 }, { "epoch": 13.717877094972067, "grad_norm": 1.255815863609314, "learning_rate": 0.00031535014005602244, "loss": 0.3585, "step": 24555 }, { "epoch": 13.718435754189944, "grad_norm": 0.5340338945388794, "learning_rate": 0.0003153221288515406, "loss": 0.3875, "step": 24556 }, { "epoch": 13.71899441340782, "grad_norm": 0.4080115854740143, "learning_rate": 0.00031529411764705885, "loss": 0.316, "step": 24557 }, { "epoch": 13.719553072625699, "grad_norm": 0.6611988544464111, "learning_rate": 0.000315266106442577, "loss": 0.4606, "step": 24558 }, { "epoch": 13.720111731843575, "grad_norm": 0.3810964524745941, "learning_rate": 0.00031523809523809526, "loss": 0.4552, "step": 24559 }, { "epoch": 13.720670391061452, "grad_norm": 1.0721698999404907, "learning_rate": 0.00031521008403361347, "loss": 0.4714, "step": 24560 }, { "epoch": 13.72122905027933, "grad_norm": 0.7423052787780762, "learning_rate": 0.0003151820728291316, "loss": 0.3403, "step": 24561 }, { "epoch": 13.721787709497207, "grad_norm": 0.49939483404159546, "learning_rate": 0.0003151540616246499, "loss": 0.5856, "step": 24562 }, { "epoch": 13.722346368715083, "grad_norm": 0.44711172580718994, "learning_rate": 0.0003151260504201681, "loss": 0.3844, "step": 24563 }, { "epoch": 13.722905027932962, "grad_norm": 0.4472387135028839, "learning_rate": 0.0003150980392156863, "loss": 0.4338, "step": 24564 }, { "epoch": 13.723463687150838, "grad_norm": 0.4355800747871399, "learning_rate": 0.0003150700280112045, "loss": 0.3082, "step": 24565 }, { "epoch": 13.724022346368715, "grad_norm": 0.4703373610973358, "learning_rate": 0.00031504201680672265, "loss": 0.4255, "step": 24566 }, { "epoch": 13.724581005586593, "grad_norm": 0.6501026153564453, "learning_rate": 0.0003150140056022409, "loss": 0.3076, "step": 24567 }, { "epoch": 13.72513966480447, "grad_norm": 0.5645608901977539, "learning_rate": 0.0003149859943977591, "loss": 0.5815, "step": 24568 }, { "epoch": 13.725698324022346, "grad_norm": 0.46356844902038574, "learning_rate": 0.0003149579831932773, "loss": 0.313, "step": 24569 }, { "epoch": 13.726256983240223, "grad_norm": 0.38465526700019836, "learning_rate": 0.00031492997198879553, "loss": 0.4331, "step": 24570 }, { "epoch": 13.726815642458101, "grad_norm": 0.515557587146759, "learning_rate": 0.00031490196078431373, "loss": 0.3329, "step": 24571 }, { "epoch": 13.727374301675978, "grad_norm": 0.5560299754142761, "learning_rate": 0.00031487394957983194, "loss": 0.5442, "step": 24572 }, { "epoch": 13.727932960893854, "grad_norm": 0.5835617184638977, "learning_rate": 0.00031484593837535015, "loss": 0.3897, "step": 24573 }, { "epoch": 13.728491620111733, "grad_norm": 0.5543268322944641, "learning_rate": 0.00031481792717086835, "loss": 0.413, "step": 24574 }, { "epoch": 13.72905027932961, "grad_norm": 0.9479879140853882, "learning_rate": 0.00031478991596638656, "loss": 0.4342, "step": 24575 }, { "epoch": 13.729608938547486, "grad_norm": 0.40735381841659546, "learning_rate": 0.00031476190476190476, "loss": 0.4029, "step": 24576 }, { "epoch": 13.730167597765362, "grad_norm": 0.43447422981262207, "learning_rate": 0.00031473389355742297, "loss": 0.3302, "step": 24577 }, { "epoch": 13.73072625698324, "grad_norm": 0.500577986240387, "learning_rate": 0.0003147058823529412, "loss": 0.3815, "step": 24578 }, { "epoch": 13.731284916201117, "grad_norm": 0.39958199858665466, "learning_rate": 0.00031467787114845944, "loss": 0.378, "step": 24579 }, { "epoch": 13.731843575418994, "grad_norm": 0.3292566239833832, "learning_rate": 0.0003146498599439776, "loss": 0.4458, "step": 24580 }, { "epoch": 13.732402234636872, "grad_norm": 0.4399887025356293, "learning_rate": 0.0003146218487394958, "loss": 0.4577, "step": 24581 }, { "epoch": 13.732960893854749, "grad_norm": 0.5361818671226501, "learning_rate": 0.000314593837535014, "loss": 0.4597, "step": 24582 }, { "epoch": 13.733519553072625, "grad_norm": 0.3752124011516571, "learning_rate": 0.0003145658263305322, "loss": 0.3746, "step": 24583 }, { "epoch": 13.734078212290502, "grad_norm": 0.8983011841773987, "learning_rate": 0.00031453781512605047, "loss": 0.4075, "step": 24584 }, { "epoch": 13.73463687150838, "grad_norm": 0.42087504267692566, "learning_rate": 0.0003145098039215686, "loss": 0.4069, "step": 24585 }, { "epoch": 13.735195530726257, "grad_norm": 0.684815526008606, "learning_rate": 0.0003144817927170868, "loss": 0.5309, "step": 24586 }, { "epoch": 13.735754189944133, "grad_norm": 0.48172667622566223, "learning_rate": 0.0003144537815126051, "loss": 0.4075, "step": 24587 }, { "epoch": 13.736312849162012, "grad_norm": 0.3652772307395935, "learning_rate": 0.00031442577030812324, "loss": 0.3708, "step": 24588 }, { "epoch": 13.736871508379888, "grad_norm": 0.5555179119110107, "learning_rate": 0.0003143977591036415, "loss": 0.4989, "step": 24589 }, { "epoch": 13.737430167597765, "grad_norm": 1.068904161453247, "learning_rate": 0.00031436974789915965, "loss": 0.4675, "step": 24590 }, { "epoch": 13.737988826815643, "grad_norm": 0.6132938861846924, "learning_rate": 0.00031434173669467785, "loss": 0.4282, "step": 24591 }, { "epoch": 13.73854748603352, "grad_norm": 0.41793474555015564, "learning_rate": 0.0003143137254901961, "loss": 0.3708, "step": 24592 }, { "epoch": 13.739106145251396, "grad_norm": 0.47035282850265503, "learning_rate": 0.00031428571428571427, "loss": 0.4435, "step": 24593 }, { "epoch": 13.739664804469275, "grad_norm": 0.4950382709503174, "learning_rate": 0.0003142577030812325, "loss": 0.3104, "step": 24594 }, { "epoch": 13.740223463687151, "grad_norm": 0.5601530075073242, "learning_rate": 0.00031422969187675073, "loss": 0.3845, "step": 24595 }, { "epoch": 13.740782122905028, "grad_norm": 0.7217606902122498, "learning_rate": 0.0003142016806722689, "loss": 0.38, "step": 24596 }, { "epoch": 13.741340782122904, "grad_norm": 0.7079225182533264, "learning_rate": 0.00031417366946778714, "loss": 0.4124, "step": 24597 }, { "epoch": 13.741899441340783, "grad_norm": 0.36488285660743713, "learning_rate": 0.0003141456582633053, "loss": 0.352, "step": 24598 }, { "epoch": 13.74245810055866, "grad_norm": 0.6521407961845398, "learning_rate": 0.00031411764705882356, "loss": 0.3825, "step": 24599 }, { "epoch": 13.743016759776536, "grad_norm": 0.9532288312911987, "learning_rate": 0.00031408963585434176, "loss": 0.3708, "step": 24600 }, { "epoch": 13.743575418994414, "grad_norm": 1.2292375564575195, "learning_rate": 0.0003140616246498599, "loss": 0.4161, "step": 24601 }, { "epoch": 13.74413407821229, "grad_norm": 0.7507383227348328, "learning_rate": 0.0003140336134453782, "loss": 0.3888, "step": 24602 }, { "epoch": 13.744692737430167, "grad_norm": 0.4155767858028412, "learning_rate": 0.0003140056022408964, "loss": 0.4368, "step": 24603 }, { "epoch": 13.745251396648044, "grad_norm": 0.4549953043460846, "learning_rate": 0.00031397759103641453, "loss": 0.4851, "step": 24604 }, { "epoch": 13.745810055865922, "grad_norm": 0.5594121217727661, "learning_rate": 0.0003139495798319328, "loss": 0.3797, "step": 24605 }, { "epoch": 13.746368715083799, "grad_norm": 0.7021816968917847, "learning_rate": 0.00031392156862745094, "loss": 0.5175, "step": 24606 }, { "epoch": 13.746927374301675, "grad_norm": 1.0182263851165771, "learning_rate": 0.0003138935574229692, "loss": 0.3267, "step": 24607 }, { "epoch": 13.747486033519554, "grad_norm": 0.379764586687088, "learning_rate": 0.0003138655462184874, "loss": 0.3446, "step": 24608 }, { "epoch": 13.74804469273743, "grad_norm": 0.4251542389392853, "learning_rate": 0.00031383753501400556, "loss": 0.3929, "step": 24609 }, { "epoch": 13.748603351955307, "grad_norm": 0.3923177421092987, "learning_rate": 0.0003138095238095238, "loss": 0.3921, "step": 24610 }, { "epoch": 13.749162011173185, "grad_norm": 0.47387468814849854, "learning_rate": 0.00031378151260504203, "loss": 0.4049, "step": 24611 }, { "epoch": 13.749720670391062, "grad_norm": 0.4466247856616974, "learning_rate": 0.00031375350140056023, "loss": 0.4826, "step": 24612 }, { "epoch": 13.750279329608938, "grad_norm": 0.5341809391975403, "learning_rate": 0.00031372549019607844, "loss": 0.5005, "step": 24613 }, { "epoch": 13.750837988826815, "grad_norm": 0.6019898653030396, "learning_rate": 0.0003136974789915966, "loss": 0.3905, "step": 24614 }, { "epoch": 13.751396648044693, "grad_norm": 0.36196956038475037, "learning_rate": 0.00031366946778711485, "loss": 0.3505, "step": 24615 }, { "epoch": 13.75195530726257, "grad_norm": 0.5079441070556641, "learning_rate": 0.00031364145658263306, "loss": 0.4109, "step": 24616 }, { "epoch": 13.752513966480446, "grad_norm": 0.46043869853019714, "learning_rate": 0.00031361344537815126, "loss": 0.3734, "step": 24617 }, { "epoch": 13.753072625698325, "grad_norm": 0.8851267695426941, "learning_rate": 0.00031358543417366947, "loss": 0.4339, "step": 24618 }, { "epoch": 13.753631284916201, "grad_norm": 0.47812914848327637, "learning_rate": 0.0003135574229691877, "loss": 0.3874, "step": 24619 }, { "epoch": 13.754189944134078, "grad_norm": 0.34665754437446594, "learning_rate": 0.0003135294117647059, "loss": 0.3923, "step": 24620 }, { "epoch": 13.754748603351956, "grad_norm": 0.8204054832458496, "learning_rate": 0.0003135014005602241, "loss": 0.3932, "step": 24621 }, { "epoch": 13.755307262569833, "grad_norm": 0.6049415469169617, "learning_rate": 0.00031347338935574235, "loss": 0.3562, "step": 24622 }, { "epoch": 13.75586592178771, "grad_norm": 0.3725706934928894, "learning_rate": 0.0003134453781512605, "loss": 0.3359, "step": 24623 }, { "epoch": 13.756424581005586, "grad_norm": 0.3988759517669678, "learning_rate": 0.0003134173669467787, "loss": 0.376, "step": 24624 }, { "epoch": 13.756983240223464, "grad_norm": 0.4468362331390381, "learning_rate": 0.0003133893557422969, "loss": 0.4223, "step": 24625 }, { "epoch": 13.75754189944134, "grad_norm": 5.098926067352295, "learning_rate": 0.0003133613445378151, "loss": 0.4952, "step": 24626 }, { "epoch": 13.758100558659217, "grad_norm": 0.45199814438819885, "learning_rate": 0.0003133333333333334, "loss": 0.3626, "step": 24627 }, { "epoch": 13.758659217877096, "grad_norm": 0.5131231546401978, "learning_rate": 0.00031330532212885153, "loss": 0.4385, "step": 24628 }, { "epoch": 13.759217877094972, "grad_norm": 0.8979408740997314, "learning_rate": 0.00031327731092436974, "loss": 0.4279, "step": 24629 }, { "epoch": 13.759776536312849, "grad_norm": 0.8850966691970825, "learning_rate": 0.000313249299719888, "loss": 0.4999, "step": 24630 }, { "epoch": 13.760335195530725, "grad_norm": 0.44493335485458374, "learning_rate": 0.00031322128851540615, "loss": 0.4163, "step": 24631 }, { "epoch": 13.760893854748604, "grad_norm": 0.5462016463279724, "learning_rate": 0.0003131932773109244, "loss": 0.3428, "step": 24632 }, { "epoch": 13.76145251396648, "grad_norm": 0.4455803632736206, "learning_rate": 0.00031316526610644256, "loss": 0.4576, "step": 24633 }, { "epoch": 13.762011173184357, "grad_norm": 0.49655285477638245, "learning_rate": 0.00031313725490196077, "loss": 0.353, "step": 24634 }, { "epoch": 13.762569832402235, "grad_norm": 0.5043083429336548, "learning_rate": 0.000313109243697479, "loss": 0.4215, "step": 24635 }, { "epoch": 13.763128491620112, "grad_norm": 0.4754570424556732, "learning_rate": 0.0003130812324929972, "loss": 0.3636, "step": 24636 }, { "epoch": 13.763687150837988, "grad_norm": 0.9718914031982422, "learning_rate": 0.00031305322128851544, "loss": 0.498, "step": 24637 }, { "epoch": 13.764245810055867, "grad_norm": 0.3998493254184723, "learning_rate": 0.00031302521008403364, "loss": 0.3539, "step": 24638 }, { "epoch": 13.764804469273743, "grad_norm": 0.6009743809700012, "learning_rate": 0.0003129971988795518, "loss": 0.4535, "step": 24639 }, { "epoch": 13.76536312849162, "grad_norm": 0.9182845950126648, "learning_rate": 0.00031296918767507006, "loss": 0.432, "step": 24640 }, { "epoch": 13.765921787709498, "grad_norm": 0.6865869164466858, "learning_rate": 0.0003129411764705882, "loss": 0.342, "step": 24641 }, { "epoch": 13.766480446927375, "grad_norm": 0.402852326631546, "learning_rate": 0.00031291316526610647, "loss": 0.4819, "step": 24642 }, { "epoch": 13.767039106145251, "grad_norm": 0.4187225103378296, "learning_rate": 0.0003128851540616247, "loss": 0.4365, "step": 24643 }, { "epoch": 13.767597765363128, "grad_norm": 2.5152461528778076, "learning_rate": 0.0003128571428571428, "loss": 0.4728, "step": 24644 }, { "epoch": 13.768156424581006, "grad_norm": 0.4061499536037445, "learning_rate": 0.0003128291316526611, "loss": 0.4361, "step": 24645 }, { "epoch": 13.768715083798883, "grad_norm": 0.4748583436012268, "learning_rate": 0.0003128011204481793, "loss": 0.4209, "step": 24646 }, { "epoch": 13.76927374301676, "grad_norm": 0.5399193167686462, "learning_rate": 0.0003127731092436975, "loss": 0.3765, "step": 24647 }, { "epoch": 13.769832402234638, "grad_norm": 0.417892724275589, "learning_rate": 0.0003127450980392157, "loss": 0.4622, "step": 24648 }, { "epoch": 13.770391061452514, "grad_norm": 3.1574244499206543, "learning_rate": 0.00031271708683473386, "loss": 0.5254, "step": 24649 }, { "epoch": 13.77094972067039, "grad_norm": 0.6645531058311462, "learning_rate": 0.0003126890756302521, "loss": 0.6908, "step": 24650 }, { "epoch": 13.771508379888267, "grad_norm": 0.33625566959381104, "learning_rate": 0.0003126610644257703, "loss": 0.3904, "step": 24651 }, { "epoch": 13.772067039106146, "grad_norm": 0.8538450002670288, "learning_rate": 0.00031263305322128853, "loss": 0.4866, "step": 24652 }, { "epoch": 13.772625698324022, "grad_norm": 0.43366724252700806, "learning_rate": 0.00031260504201680673, "loss": 0.4575, "step": 24653 }, { "epoch": 13.773184357541899, "grad_norm": 0.5008127093315125, "learning_rate": 0.00031257703081232494, "loss": 0.4043, "step": 24654 }, { "epoch": 13.773743016759777, "grad_norm": 0.734473705291748, "learning_rate": 0.00031254901960784315, "loss": 0.4041, "step": 24655 }, { "epoch": 13.774301675977654, "grad_norm": 0.6329787373542786, "learning_rate": 0.00031252100840336135, "loss": 0.456, "step": 24656 }, { "epoch": 13.77486033519553, "grad_norm": 0.5221911668777466, "learning_rate": 0.00031249299719887956, "loss": 0.4338, "step": 24657 }, { "epoch": 13.775418994413407, "grad_norm": 0.47905802726745605, "learning_rate": 0.00031246498599439776, "loss": 0.3983, "step": 24658 }, { "epoch": 13.775977653631285, "grad_norm": 0.4423077702522278, "learning_rate": 0.00031243697478991597, "loss": 0.4912, "step": 24659 }, { "epoch": 13.776536312849162, "grad_norm": 0.568717360496521, "learning_rate": 0.0003124089635854342, "loss": 0.5354, "step": 24660 }, { "epoch": 13.777094972067038, "grad_norm": 0.6811214685440063, "learning_rate": 0.0003123809523809524, "loss": 0.4527, "step": 24661 }, { "epoch": 13.777653631284917, "grad_norm": 0.3925004303455353, "learning_rate": 0.00031235294117647064, "loss": 0.3469, "step": 24662 }, { "epoch": 13.778212290502793, "grad_norm": 0.4180448651313782, "learning_rate": 0.0003123249299719888, "loss": 0.4302, "step": 24663 }, { "epoch": 13.77877094972067, "grad_norm": 0.35556793212890625, "learning_rate": 0.000312296918767507, "loss": 0.3462, "step": 24664 }, { "epoch": 13.779329608938548, "grad_norm": 0.5247901678085327, "learning_rate": 0.0003122689075630252, "loss": 0.4785, "step": 24665 }, { "epoch": 13.779888268156425, "grad_norm": 0.6282362341880798, "learning_rate": 0.0003122408963585434, "loss": 0.4286, "step": 24666 }, { "epoch": 13.780446927374301, "grad_norm": 3.9609861373901367, "learning_rate": 0.00031221288515406167, "loss": 0.3743, "step": 24667 }, { "epoch": 13.78100558659218, "grad_norm": 0.5430408716201782, "learning_rate": 0.0003121848739495798, "loss": 0.4937, "step": 24668 }, { "epoch": 13.781564245810056, "grad_norm": 1.0549169778823853, "learning_rate": 0.00031215686274509803, "loss": 0.4234, "step": 24669 }, { "epoch": 13.782122905027933, "grad_norm": 0.4465056359767914, "learning_rate": 0.0003121288515406163, "loss": 0.4192, "step": 24670 }, { "epoch": 13.78268156424581, "grad_norm": 0.5028314590454102, "learning_rate": 0.00031210084033613444, "loss": 0.4091, "step": 24671 }, { "epoch": 13.783240223463688, "grad_norm": 0.690117597579956, "learning_rate": 0.0003120728291316527, "loss": 0.3422, "step": 24672 }, { "epoch": 13.783798882681564, "grad_norm": 2.79848051071167, "learning_rate": 0.00031204481792717085, "loss": 0.4305, "step": 24673 }, { "epoch": 13.78435754189944, "grad_norm": 0.548388659954071, "learning_rate": 0.00031201680672268906, "loss": 0.4641, "step": 24674 }, { "epoch": 13.78491620111732, "grad_norm": 0.41758206486701965, "learning_rate": 0.0003119887955182073, "loss": 0.4391, "step": 24675 }, { "epoch": 13.785474860335196, "grad_norm": 0.5833352208137512, "learning_rate": 0.00031196078431372547, "loss": 0.3802, "step": 24676 }, { "epoch": 13.786033519553072, "grad_norm": 0.5385997891426086, "learning_rate": 0.00031193277310924373, "loss": 0.3592, "step": 24677 }, { "epoch": 13.786592178770949, "grad_norm": 0.40148165822029114, "learning_rate": 0.00031190476190476194, "loss": 0.3826, "step": 24678 }, { "epoch": 13.787150837988827, "grad_norm": 0.4532122313976288, "learning_rate": 0.0003118767507002801, "loss": 0.3265, "step": 24679 }, { "epoch": 13.787709497206704, "grad_norm": 0.6617640852928162, "learning_rate": 0.00031184873949579835, "loss": 0.3516, "step": 24680 }, { "epoch": 13.78826815642458, "grad_norm": 0.3093830347061157, "learning_rate": 0.0003118207282913165, "loss": 0.3361, "step": 24681 }, { "epoch": 13.788826815642459, "grad_norm": 0.629340410232544, "learning_rate": 0.00031179271708683476, "loss": 0.5207, "step": 24682 }, { "epoch": 13.789385474860335, "grad_norm": 0.5637341141700745, "learning_rate": 0.00031176470588235297, "loss": 0.4489, "step": 24683 }, { "epoch": 13.789944134078212, "grad_norm": 0.6393017172813416, "learning_rate": 0.0003117366946778711, "loss": 0.5139, "step": 24684 }, { "epoch": 13.79050279329609, "grad_norm": 0.38375043869018555, "learning_rate": 0.0003117086834733894, "loss": 0.3241, "step": 24685 }, { "epoch": 13.791061452513967, "grad_norm": 0.39735087752342224, "learning_rate": 0.0003116806722689076, "loss": 0.2916, "step": 24686 }, { "epoch": 13.791620111731843, "grad_norm": 0.578385055065155, "learning_rate": 0.0003116526610644258, "loss": 0.406, "step": 24687 }, { "epoch": 13.79217877094972, "grad_norm": 0.7574548721313477, "learning_rate": 0.000311624649859944, "loss": 0.4503, "step": 24688 }, { "epoch": 13.792737430167598, "grad_norm": 0.621619462966919, "learning_rate": 0.00031159663865546215, "loss": 0.4245, "step": 24689 }, { "epoch": 13.793296089385475, "grad_norm": 0.7814710736274719, "learning_rate": 0.0003115686274509804, "loss": 0.636, "step": 24690 }, { "epoch": 13.793854748603351, "grad_norm": 0.6537702083587646, "learning_rate": 0.0003115406162464986, "loss": 0.4781, "step": 24691 }, { "epoch": 13.79441340782123, "grad_norm": 0.5536796450614929, "learning_rate": 0.0003115126050420168, "loss": 0.4541, "step": 24692 }, { "epoch": 13.794972067039106, "grad_norm": 0.557115912437439, "learning_rate": 0.00031148459383753503, "loss": 0.4299, "step": 24693 }, { "epoch": 13.795530726256983, "grad_norm": 0.4359208047389984, "learning_rate": 0.00031145658263305323, "loss": 0.4022, "step": 24694 }, { "epoch": 13.796089385474861, "grad_norm": 0.5477483868598938, "learning_rate": 0.00031142857142857144, "loss": 0.2924, "step": 24695 }, { "epoch": 13.796648044692738, "grad_norm": 0.38812774419784546, "learning_rate": 0.00031140056022408965, "loss": 0.3855, "step": 24696 }, { "epoch": 13.797206703910614, "grad_norm": 0.3890925943851471, "learning_rate": 0.00031137254901960785, "loss": 0.4509, "step": 24697 }, { "epoch": 13.797765363128491, "grad_norm": 0.9076298475265503, "learning_rate": 0.00031134453781512606, "loss": 0.5379, "step": 24698 }, { "epoch": 13.79832402234637, "grad_norm": 0.7020007371902466, "learning_rate": 0.00031131652661064426, "loss": 0.3944, "step": 24699 }, { "epoch": 13.798882681564246, "grad_norm": 0.4714997112751007, "learning_rate": 0.00031128851540616247, "loss": 0.426, "step": 24700 }, { "epoch": 13.799441340782122, "grad_norm": 0.39724090695381165, "learning_rate": 0.0003112605042016807, "loss": 0.4327, "step": 24701 }, { "epoch": 13.8, "grad_norm": 0.4750833213329315, "learning_rate": 0.00031123249299719894, "loss": 0.4446, "step": 24702 }, { "epoch": 13.800558659217877, "grad_norm": 0.5161691904067993, "learning_rate": 0.0003112044817927171, "loss": 0.3625, "step": 24703 }, { "epoch": 13.801117318435754, "grad_norm": 0.4361058175563812, "learning_rate": 0.0003111764705882353, "loss": 0.4515, "step": 24704 }, { "epoch": 13.80167597765363, "grad_norm": 0.38783717155456543, "learning_rate": 0.0003111484593837535, "loss": 0.3889, "step": 24705 }, { "epoch": 13.802234636871509, "grad_norm": 0.4028116464614868, "learning_rate": 0.0003111204481792717, "loss": 0.3657, "step": 24706 }, { "epoch": 13.802793296089385, "grad_norm": 0.5284596085548401, "learning_rate": 0.00031109243697478997, "loss": 0.4298, "step": 24707 }, { "epoch": 13.803351955307262, "grad_norm": 0.5655832290649414, "learning_rate": 0.0003110644257703081, "loss": 0.4333, "step": 24708 }, { "epoch": 13.80391061452514, "grad_norm": 0.5062133073806763, "learning_rate": 0.0003110364145658263, "loss": 0.5057, "step": 24709 }, { "epoch": 13.804469273743017, "grad_norm": 0.43898701667785645, "learning_rate": 0.0003110084033613446, "loss": 0.3977, "step": 24710 }, { "epoch": 13.805027932960893, "grad_norm": 0.8681417107582092, "learning_rate": 0.00031098039215686274, "loss": 0.5185, "step": 24711 }, { "epoch": 13.805586592178772, "grad_norm": 0.4172993004322052, "learning_rate": 0.000310952380952381, "loss": 0.4523, "step": 24712 }, { "epoch": 13.806145251396648, "grad_norm": 0.4801238775253296, "learning_rate": 0.00031092436974789915, "loss": 0.4043, "step": 24713 }, { "epoch": 13.806703910614525, "grad_norm": 0.4862687587738037, "learning_rate": 0.00031089635854341735, "loss": 0.4016, "step": 24714 }, { "epoch": 13.807262569832401, "grad_norm": 0.30294865369796753, "learning_rate": 0.0003108683473389356, "loss": 0.3559, "step": 24715 }, { "epoch": 13.80782122905028, "grad_norm": 0.4659216105937958, "learning_rate": 0.00031084033613445377, "loss": 0.3914, "step": 24716 }, { "epoch": 13.808379888268156, "grad_norm": 0.30654600262641907, "learning_rate": 0.00031081232492997197, "loss": 0.3372, "step": 24717 }, { "epoch": 13.808938547486033, "grad_norm": 0.4536445736885071, "learning_rate": 0.00031078431372549023, "loss": 0.472, "step": 24718 }, { "epoch": 13.809497206703911, "grad_norm": 1.4842451810836792, "learning_rate": 0.0003107563025210084, "loss": 0.4134, "step": 24719 }, { "epoch": 13.810055865921788, "grad_norm": 0.5635777711868286, "learning_rate": 0.00031072829131652664, "loss": 0.3248, "step": 24720 }, { "epoch": 13.810614525139664, "grad_norm": 0.42820364236831665, "learning_rate": 0.0003107002801120448, "loss": 0.3508, "step": 24721 }, { "epoch": 13.811173184357543, "grad_norm": 0.4716918468475342, "learning_rate": 0.000310672268907563, "loss": 0.4834, "step": 24722 }, { "epoch": 13.81173184357542, "grad_norm": 2.3274002075195312, "learning_rate": 0.00031064425770308126, "loss": 0.3529, "step": 24723 }, { "epoch": 13.812290502793296, "grad_norm": 0.5653722882270813, "learning_rate": 0.0003106162464985994, "loss": 0.491, "step": 24724 }, { "epoch": 13.812849162011172, "grad_norm": 1.42947256565094, "learning_rate": 0.0003105882352941177, "loss": 0.3504, "step": 24725 }, { "epoch": 13.81340782122905, "grad_norm": 1.928516149520874, "learning_rate": 0.0003105602240896359, "loss": 0.4699, "step": 24726 }, { "epoch": 13.813966480446927, "grad_norm": 0.40962472558021545, "learning_rate": 0.00031053221288515403, "loss": 0.3122, "step": 24727 }, { "epoch": 13.814525139664804, "grad_norm": 1.3945581912994385, "learning_rate": 0.0003105042016806723, "loss": 0.4259, "step": 24728 }, { "epoch": 13.815083798882682, "grad_norm": 1.3861957788467407, "learning_rate": 0.00031047619047619044, "loss": 0.3504, "step": 24729 }, { "epoch": 13.815642458100559, "grad_norm": 0.7788215279579163, "learning_rate": 0.0003104481792717087, "loss": 0.3956, "step": 24730 }, { "epoch": 13.816201117318435, "grad_norm": 0.4320719540119171, "learning_rate": 0.0003104201680672269, "loss": 0.3542, "step": 24731 }, { "epoch": 13.816759776536312, "grad_norm": 0.3784499168395996, "learning_rate": 0.00031039215686274506, "loss": 0.4004, "step": 24732 }, { "epoch": 13.81731843575419, "grad_norm": 0.4452926516532898, "learning_rate": 0.0003103641456582633, "loss": 0.3821, "step": 24733 }, { "epoch": 13.817877094972067, "grad_norm": 1.0932354927062988, "learning_rate": 0.00031033613445378153, "loss": 0.5385, "step": 24734 }, { "epoch": 13.818435754189943, "grad_norm": 0.4239197075366974, "learning_rate": 0.00031030812324929973, "loss": 0.4186, "step": 24735 }, { "epoch": 13.818994413407822, "grad_norm": 0.5929141044616699, "learning_rate": 0.00031028011204481794, "loss": 0.4336, "step": 24736 }, { "epoch": 13.819553072625698, "grad_norm": 0.36009687185287476, "learning_rate": 0.0003102521008403361, "loss": 0.3616, "step": 24737 }, { "epoch": 13.820111731843575, "grad_norm": 3.307466983795166, "learning_rate": 0.00031022408963585435, "loss": 0.3815, "step": 24738 }, { "epoch": 13.820670391061453, "grad_norm": 0.35800763964653015, "learning_rate": 0.00031019607843137256, "loss": 0.3141, "step": 24739 }, { "epoch": 13.82122905027933, "grad_norm": 0.4880959391593933, "learning_rate": 0.00031016806722689076, "loss": 0.4333, "step": 24740 }, { "epoch": 13.821787709497206, "grad_norm": 2.480283737182617, "learning_rate": 0.00031014005602240897, "loss": 0.4749, "step": 24741 }, { "epoch": 13.822346368715085, "grad_norm": 0.6007377505302429, "learning_rate": 0.0003101120448179272, "loss": 0.5857, "step": 24742 }, { "epoch": 13.822905027932961, "grad_norm": 1.4767669439315796, "learning_rate": 0.0003100840336134454, "loss": 0.4434, "step": 24743 }, { "epoch": 13.823463687150838, "grad_norm": 0.39550453424453735, "learning_rate": 0.0003100560224089636, "loss": 0.4314, "step": 24744 }, { "epoch": 13.824022346368714, "grad_norm": 1.0598410367965698, "learning_rate": 0.0003100280112044818, "loss": 0.3635, "step": 24745 }, { "epoch": 13.824581005586593, "grad_norm": 0.9734326004981995, "learning_rate": 0.00031, "loss": 0.4689, "step": 24746 }, { "epoch": 13.82513966480447, "grad_norm": 0.5384419560432434, "learning_rate": 0.0003099719887955182, "loss": 0.3902, "step": 24747 }, { "epoch": 13.825698324022346, "grad_norm": 2.225046396255493, "learning_rate": 0.0003099439775910364, "loss": 0.3756, "step": 24748 }, { "epoch": 13.826256983240224, "grad_norm": 10.570537567138672, "learning_rate": 0.0003099159663865546, "loss": 0.372, "step": 24749 }, { "epoch": 13.8268156424581, "grad_norm": 0.43944209814071655, "learning_rate": 0.0003098879551820729, "loss": 0.376, "step": 24750 }, { "epoch": 13.827374301675977, "grad_norm": 0.39851173758506775, "learning_rate": 0.00030985994397759103, "loss": 0.3766, "step": 24751 }, { "epoch": 13.827932960893854, "grad_norm": 0.4189709424972534, "learning_rate": 0.00030983193277310924, "loss": 0.4647, "step": 24752 }, { "epoch": 13.828491620111732, "grad_norm": 0.6913696527481079, "learning_rate": 0.00030980392156862744, "loss": 0.353, "step": 24753 }, { "epoch": 13.829050279329609, "grad_norm": 0.4704824388027191, "learning_rate": 0.00030977591036414565, "loss": 0.3919, "step": 24754 }, { "epoch": 13.829608938547485, "grad_norm": 0.49627575278282166, "learning_rate": 0.0003097478991596639, "loss": 0.3783, "step": 24755 }, { "epoch": 13.830167597765364, "grad_norm": 0.574061393737793, "learning_rate": 0.00030971988795518206, "loss": 0.5715, "step": 24756 }, { "epoch": 13.83072625698324, "grad_norm": 0.5746279954910278, "learning_rate": 0.00030969187675070027, "loss": 0.4117, "step": 24757 }, { "epoch": 13.831284916201117, "grad_norm": 0.5381438136100769, "learning_rate": 0.0003096638655462185, "loss": 0.3408, "step": 24758 }, { "epoch": 13.831843575418995, "grad_norm": 0.5281462669372559, "learning_rate": 0.0003096358543417367, "loss": 0.3985, "step": 24759 }, { "epoch": 13.832402234636872, "grad_norm": 0.448641300201416, "learning_rate": 0.00030960784313725494, "loss": 0.3417, "step": 24760 }, { "epoch": 13.832960893854748, "grad_norm": 0.5278318524360657, "learning_rate": 0.0003095798319327731, "loss": 0.3657, "step": 24761 }, { "epoch": 13.833519553072625, "grad_norm": 0.5017572641372681, "learning_rate": 0.0003095518207282913, "loss": 0.5063, "step": 24762 }, { "epoch": 13.834078212290503, "grad_norm": 0.5167214274406433, "learning_rate": 0.00030952380952380956, "loss": 0.5081, "step": 24763 }, { "epoch": 13.83463687150838, "grad_norm": 0.38477057218551636, "learning_rate": 0.0003094957983193277, "loss": 0.2677, "step": 24764 }, { "epoch": 13.835195530726256, "grad_norm": 0.5728855133056641, "learning_rate": 0.00030946778711484597, "loss": 0.3538, "step": 24765 }, { "epoch": 13.835754189944135, "grad_norm": 0.5977369546890259, "learning_rate": 0.0003094397759103642, "loss": 0.6885, "step": 24766 }, { "epoch": 13.836312849162011, "grad_norm": 0.3712436854839325, "learning_rate": 0.0003094117647058823, "loss": 0.3134, "step": 24767 }, { "epoch": 13.836871508379888, "grad_norm": 0.4216599762439728, "learning_rate": 0.0003093837535014006, "loss": 0.3811, "step": 24768 }, { "epoch": 13.837430167597766, "grad_norm": 0.6151779890060425, "learning_rate": 0.00030935574229691874, "loss": 0.3746, "step": 24769 }, { "epoch": 13.837988826815643, "grad_norm": 0.36623167991638184, "learning_rate": 0.000309327731092437, "loss": 0.3668, "step": 24770 }, { "epoch": 13.83854748603352, "grad_norm": 0.5187537670135498, "learning_rate": 0.0003092997198879552, "loss": 0.4323, "step": 24771 }, { "epoch": 13.839106145251396, "grad_norm": 0.478880912065506, "learning_rate": 0.00030927170868347336, "loss": 0.4179, "step": 24772 }, { "epoch": 13.839664804469274, "grad_norm": 1.3582699298858643, "learning_rate": 0.0003092436974789916, "loss": 0.3941, "step": 24773 }, { "epoch": 13.84022346368715, "grad_norm": 0.3994075059890747, "learning_rate": 0.0003092156862745098, "loss": 0.429, "step": 24774 }, { "epoch": 13.840782122905027, "grad_norm": 0.3768007159233093, "learning_rate": 0.00030918767507002803, "loss": 0.3608, "step": 24775 }, { "epoch": 13.841340782122906, "grad_norm": 0.4348650574684143, "learning_rate": 0.00030915966386554623, "loss": 0.3659, "step": 24776 }, { "epoch": 13.841899441340782, "grad_norm": 0.6999574303627014, "learning_rate": 0.0003091316526610644, "loss": 0.5593, "step": 24777 }, { "epoch": 13.842458100558659, "grad_norm": 0.6640080809593201, "learning_rate": 0.00030910364145658265, "loss": 0.6605, "step": 24778 }, { "epoch": 13.843016759776535, "grad_norm": 0.4254578948020935, "learning_rate": 0.00030907563025210085, "loss": 0.3679, "step": 24779 }, { "epoch": 13.843575418994414, "grad_norm": 0.7168781161308289, "learning_rate": 0.00030904761904761906, "loss": 0.3735, "step": 24780 }, { "epoch": 13.84413407821229, "grad_norm": 0.7321787476539612, "learning_rate": 0.00030901960784313726, "loss": 0.416, "step": 24781 }, { "epoch": 13.844692737430167, "grad_norm": 0.8198249936103821, "learning_rate": 0.00030899159663865547, "loss": 0.3627, "step": 24782 }, { "epoch": 13.845251396648045, "grad_norm": 0.4430847465991974, "learning_rate": 0.0003089635854341737, "loss": 0.4394, "step": 24783 }, { "epoch": 13.845810055865922, "grad_norm": 0.4459068179130554, "learning_rate": 0.0003089355742296919, "loss": 0.3838, "step": 24784 }, { "epoch": 13.846368715083798, "grad_norm": 0.3711383044719696, "learning_rate": 0.0003089075630252101, "loss": 0.4061, "step": 24785 }, { "epoch": 13.846927374301677, "grad_norm": 1.381853699684143, "learning_rate": 0.0003088795518207283, "loss": 0.4183, "step": 24786 }, { "epoch": 13.847486033519553, "grad_norm": 0.45864179730415344, "learning_rate": 0.0003088515406162465, "loss": 0.3913, "step": 24787 }, { "epoch": 13.84804469273743, "grad_norm": 0.37968701124191284, "learning_rate": 0.0003088235294117647, "loss": 0.339, "step": 24788 }, { "epoch": 13.848603351955306, "grad_norm": 0.5475267171859741, "learning_rate": 0.0003087955182072829, "loss": 0.3488, "step": 24789 }, { "epoch": 13.849162011173185, "grad_norm": 0.6172983646392822, "learning_rate": 0.00030876750700280117, "loss": 0.419, "step": 24790 }, { "epoch": 13.849720670391061, "grad_norm": 0.8614798784255981, "learning_rate": 0.0003087394957983193, "loss": 0.4723, "step": 24791 }, { "epoch": 13.850279329608938, "grad_norm": 0.3572481572628021, "learning_rate": 0.00030871148459383753, "loss": 0.323, "step": 24792 }, { "epoch": 13.850837988826816, "grad_norm": 0.7472407817840576, "learning_rate": 0.00030868347338935574, "loss": 0.5763, "step": 24793 }, { "epoch": 13.851396648044693, "grad_norm": 1.253983736038208, "learning_rate": 0.00030865546218487394, "loss": 0.3707, "step": 24794 }, { "epoch": 13.85195530726257, "grad_norm": 0.6515014171600342, "learning_rate": 0.0003086274509803922, "loss": 0.4965, "step": 24795 }, { "epoch": 13.852513966480448, "grad_norm": 0.5535890460014343, "learning_rate": 0.00030859943977591035, "loss": 0.4612, "step": 24796 }, { "epoch": 13.853072625698324, "grad_norm": 0.6500381231307983, "learning_rate": 0.00030857142857142856, "loss": 0.4783, "step": 24797 }, { "epoch": 13.8536312849162, "grad_norm": 1.5299204587936401, "learning_rate": 0.0003085434173669468, "loss": 0.3689, "step": 24798 }, { "epoch": 13.854189944134077, "grad_norm": 0.9100631475448608, "learning_rate": 0.00030851540616246497, "loss": 0.4797, "step": 24799 }, { "epoch": 13.854748603351956, "grad_norm": 0.5053530931472778, "learning_rate": 0.00030848739495798323, "loss": 0.6667, "step": 24800 }, { "epoch": 13.855307262569832, "grad_norm": 0.31468626856803894, "learning_rate": 0.0003084593837535014, "loss": 0.3313, "step": 24801 }, { "epoch": 13.855865921787709, "grad_norm": 0.7528792023658752, "learning_rate": 0.0003084313725490196, "loss": 0.4396, "step": 24802 }, { "epoch": 13.856424581005587, "grad_norm": 0.5894933342933655, "learning_rate": 0.00030840336134453785, "loss": 0.472, "step": 24803 }, { "epoch": 13.856983240223464, "grad_norm": 0.3429962992668152, "learning_rate": 0.000308375350140056, "loss": 0.4024, "step": 24804 }, { "epoch": 13.85754189944134, "grad_norm": 0.5356067419052124, "learning_rate": 0.00030834733893557426, "loss": 0.4173, "step": 24805 }, { "epoch": 13.858100558659217, "grad_norm": 0.5016393065452576, "learning_rate": 0.00030831932773109247, "loss": 0.5019, "step": 24806 }, { "epoch": 13.858659217877095, "grad_norm": 0.4478253722190857, "learning_rate": 0.0003082913165266106, "loss": 0.3586, "step": 24807 }, { "epoch": 13.859217877094972, "grad_norm": 0.48870691657066345, "learning_rate": 0.0003082633053221289, "loss": 0.5072, "step": 24808 }, { "epoch": 13.859776536312848, "grad_norm": 0.4953252077102661, "learning_rate": 0.00030823529411764703, "loss": 0.4422, "step": 24809 }, { "epoch": 13.860335195530727, "grad_norm": 0.6184070110321045, "learning_rate": 0.0003082072829131653, "loss": 0.4862, "step": 24810 }, { "epoch": 13.860893854748603, "grad_norm": 0.3630295693874359, "learning_rate": 0.0003081792717086835, "loss": 0.4107, "step": 24811 }, { "epoch": 13.86145251396648, "grad_norm": 0.5265270471572876, "learning_rate": 0.00030815126050420165, "loss": 0.3247, "step": 24812 }, { "epoch": 13.862011173184358, "grad_norm": 1.2840449810028076, "learning_rate": 0.0003081232492997199, "loss": 0.3798, "step": 24813 }, { "epoch": 13.862569832402235, "grad_norm": 0.6314387917518616, "learning_rate": 0.0003080952380952381, "loss": 0.3891, "step": 24814 }, { "epoch": 13.863128491620111, "grad_norm": 0.6427621245384216, "learning_rate": 0.0003080672268907563, "loss": 0.4861, "step": 24815 }, { "epoch": 13.86368715083799, "grad_norm": 0.5162996053695679, "learning_rate": 0.00030803921568627453, "loss": 0.4164, "step": 24816 }, { "epoch": 13.864245810055866, "grad_norm": 0.48597386479377747, "learning_rate": 0.0003080112044817927, "loss": 0.4909, "step": 24817 }, { "epoch": 13.864804469273743, "grad_norm": 0.4007278084754944, "learning_rate": 0.00030798319327731094, "loss": 0.4153, "step": 24818 }, { "epoch": 13.86536312849162, "grad_norm": 0.514100968837738, "learning_rate": 0.00030795518207282915, "loss": 0.3756, "step": 24819 }, { "epoch": 13.865921787709498, "grad_norm": 0.4739219546318054, "learning_rate": 0.00030792717086834735, "loss": 0.4713, "step": 24820 }, { "epoch": 13.866480446927374, "grad_norm": 0.42733922600746155, "learning_rate": 0.00030789915966386556, "loss": 0.3627, "step": 24821 }, { "epoch": 13.867039106145251, "grad_norm": 0.7179304361343384, "learning_rate": 0.00030787114845938376, "loss": 0.4428, "step": 24822 }, { "epoch": 13.86759776536313, "grad_norm": 0.9533480405807495, "learning_rate": 0.00030784313725490197, "loss": 0.3956, "step": 24823 }, { "epoch": 13.868156424581006, "grad_norm": 0.47056421637535095, "learning_rate": 0.0003078151260504202, "loss": 0.4164, "step": 24824 }, { "epoch": 13.868715083798882, "grad_norm": 0.6332651972770691, "learning_rate": 0.00030778711484593833, "loss": 0.4094, "step": 24825 }, { "epoch": 13.869273743016759, "grad_norm": 0.5415308475494385, "learning_rate": 0.0003077591036414566, "loss": 0.4968, "step": 24826 }, { "epoch": 13.869832402234637, "grad_norm": 0.4044300615787506, "learning_rate": 0.0003077310924369748, "loss": 0.3796, "step": 24827 }, { "epoch": 13.870391061452514, "grad_norm": 0.37973079085350037, "learning_rate": 0.000307703081232493, "loss": 0.3064, "step": 24828 }, { "epoch": 13.87094972067039, "grad_norm": 0.4267829954624176, "learning_rate": 0.0003076750700280112, "loss": 0.4511, "step": 24829 }, { "epoch": 13.871508379888269, "grad_norm": 0.5870457291603088, "learning_rate": 0.0003076470588235294, "loss": 0.5604, "step": 24830 }, { "epoch": 13.872067039106145, "grad_norm": 2.063067674636841, "learning_rate": 0.0003076190476190476, "loss": 0.3536, "step": 24831 }, { "epoch": 13.872625698324022, "grad_norm": 0.49359625577926636, "learning_rate": 0.0003075910364145658, "loss": 0.4329, "step": 24832 }, { "epoch": 13.8731843575419, "grad_norm": 0.7901489734649658, "learning_rate": 0.0003075630252100841, "loss": 0.4224, "step": 24833 }, { "epoch": 13.873743016759777, "grad_norm": 0.366642564535141, "learning_rate": 0.00030753501400560224, "loss": 0.3397, "step": 24834 }, { "epoch": 13.874301675977653, "grad_norm": 0.3388896584510803, "learning_rate": 0.00030750700280112044, "loss": 0.3714, "step": 24835 }, { "epoch": 13.87486033519553, "grad_norm": 0.3611159324645996, "learning_rate": 0.00030747899159663865, "loss": 0.3747, "step": 24836 }, { "epoch": 13.875418994413408, "grad_norm": 0.5765059590339661, "learning_rate": 0.00030745098039215685, "loss": 0.4235, "step": 24837 }, { "epoch": 13.875977653631285, "grad_norm": 0.5157936811447144, "learning_rate": 0.0003074229691876751, "loss": 0.4584, "step": 24838 }, { "epoch": 13.876536312849161, "grad_norm": 0.5755608081817627, "learning_rate": 0.00030739495798319327, "loss": 0.4803, "step": 24839 }, { "epoch": 13.87709497206704, "grad_norm": 0.7004003524780273, "learning_rate": 0.00030736694677871147, "loss": 0.4791, "step": 24840 }, { "epoch": 13.877653631284916, "grad_norm": 0.5604896545410156, "learning_rate": 0.00030733893557422973, "loss": 0.3583, "step": 24841 }, { "epoch": 13.878212290502793, "grad_norm": 0.4400005042552948, "learning_rate": 0.0003073109243697479, "loss": 0.3697, "step": 24842 }, { "epoch": 13.878770949720671, "grad_norm": 0.7079864144325256, "learning_rate": 0.00030728291316526614, "loss": 0.4265, "step": 24843 }, { "epoch": 13.879329608938548, "grad_norm": 0.5235856175422668, "learning_rate": 0.0003072549019607843, "loss": 0.3183, "step": 24844 }, { "epoch": 13.879888268156424, "grad_norm": 0.3624257743358612, "learning_rate": 0.0003072268907563025, "loss": 0.363, "step": 24845 }, { "epoch": 13.880446927374301, "grad_norm": 0.5465137362480164, "learning_rate": 0.00030719887955182076, "loss": 0.4307, "step": 24846 }, { "epoch": 13.88100558659218, "grad_norm": 0.45373231172561646, "learning_rate": 0.0003071708683473389, "loss": 0.5498, "step": 24847 }, { "epoch": 13.881564245810056, "grad_norm": 0.5142821073532104, "learning_rate": 0.0003071428571428572, "loss": 0.4475, "step": 24848 }, { "epoch": 13.882122905027932, "grad_norm": 0.37014129757881165, "learning_rate": 0.0003071148459383754, "loss": 0.4255, "step": 24849 }, { "epoch": 13.88268156424581, "grad_norm": 0.3725886046886444, "learning_rate": 0.00030708683473389353, "loss": 0.3451, "step": 24850 }, { "epoch": 13.883240223463687, "grad_norm": 0.45471274852752686, "learning_rate": 0.0003070588235294118, "loss": 0.4549, "step": 24851 }, { "epoch": 13.883798882681564, "grad_norm": 0.3539445996284485, "learning_rate": 0.00030703081232492994, "loss": 0.3404, "step": 24852 }, { "epoch": 13.88435754189944, "grad_norm": 1.4144705533981323, "learning_rate": 0.0003070028011204482, "loss": 0.4332, "step": 24853 }, { "epoch": 13.884916201117319, "grad_norm": 0.5037001967430115, "learning_rate": 0.0003069747899159664, "loss": 0.3877, "step": 24854 }, { "epoch": 13.885474860335195, "grad_norm": 0.4294300377368927, "learning_rate": 0.00030694677871148456, "loss": 0.3402, "step": 24855 }, { "epoch": 13.886033519553072, "grad_norm": 0.542075514793396, "learning_rate": 0.0003069187675070028, "loss": 0.3218, "step": 24856 }, { "epoch": 13.88659217877095, "grad_norm": 0.7178765535354614, "learning_rate": 0.00030689075630252103, "loss": 0.4026, "step": 24857 }, { "epoch": 13.887150837988827, "grad_norm": 0.34286803007125854, "learning_rate": 0.00030686274509803923, "loss": 0.3226, "step": 24858 }, { "epoch": 13.887709497206703, "grad_norm": 0.47864800691604614, "learning_rate": 0.00030683473389355744, "loss": 0.4391, "step": 24859 }, { "epoch": 13.888268156424582, "grad_norm": 0.5917994976043701, "learning_rate": 0.0003068067226890756, "loss": 0.3859, "step": 24860 }, { "epoch": 13.888826815642458, "grad_norm": 0.4243963360786438, "learning_rate": 0.00030677871148459385, "loss": 0.4309, "step": 24861 }, { "epoch": 13.889385474860335, "grad_norm": 0.5597795248031616, "learning_rate": 0.00030675070028011206, "loss": 0.3925, "step": 24862 }, { "epoch": 13.889944134078211, "grad_norm": 0.38727062940597534, "learning_rate": 0.00030672268907563026, "loss": 0.3336, "step": 24863 }, { "epoch": 13.89050279329609, "grad_norm": 0.42713820934295654, "learning_rate": 0.00030669467787114847, "loss": 0.408, "step": 24864 }, { "epoch": 13.891061452513966, "grad_norm": 0.6606540083885193, "learning_rate": 0.0003066666666666667, "loss": 0.2949, "step": 24865 }, { "epoch": 13.891620111731843, "grad_norm": 0.5189084410667419, "learning_rate": 0.0003066386554621849, "loss": 0.4535, "step": 24866 }, { "epoch": 13.892178770949721, "grad_norm": 0.3938692808151245, "learning_rate": 0.0003066106442577031, "loss": 0.3846, "step": 24867 }, { "epoch": 13.892737430167598, "grad_norm": 0.5946329236030579, "learning_rate": 0.0003065826330532213, "loss": 0.4372, "step": 24868 }, { "epoch": 13.893296089385474, "grad_norm": 0.41366398334503174, "learning_rate": 0.0003065546218487395, "loss": 0.4985, "step": 24869 }, { "epoch": 13.893854748603353, "grad_norm": 0.34584948420524597, "learning_rate": 0.0003065266106442577, "loss": 0.3786, "step": 24870 }, { "epoch": 13.89441340782123, "grad_norm": 0.6611112356185913, "learning_rate": 0.0003064985994397759, "loss": 0.3314, "step": 24871 }, { "epoch": 13.894972067039106, "grad_norm": 0.4873482286930084, "learning_rate": 0.0003064705882352941, "loss": 0.4504, "step": 24872 }, { "epoch": 13.895530726256982, "grad_norm": 0.3984193503856659, "learning_rate": 0.0003064425770308124, "loss": 0.3731, "step": 24873 }, { "epoch": 13.89608938547486, "grad_norm": 0.6287557482719421, "learning_rate": 0.00030641456582633053, "loss": 0.463, "step": 24874 }, { "epoch": 13.896648044692737, "grad_norm": 0.4631573259830475, "learning_rate": 0.00030638655462184874, "loss": 0.3476, "step": 24875 }, { "epoch": 13.897206703910614, "grad_norm": 0.6214492321014404, "learning_rate": 0.00030635854341736694, "loss": 0.4585, "step": 24876 }, { "epoch": 13.897765363128492, "grad_norm": 1.2695749998092651, "learning_rate": 0.00030633053221288515, "loss": 0.4332, "step": 24877 }, { "epoch": 13.898324022346369, "grad_norm": 0.543743371963501, "learning_rate": 0.0003063025210084034, "loss": 0.4681, "step": 24878 }, { "epoch": 13.898882681564245, "grad_norm": 0.6609926223754883, "learning_rate": 0.00030627450980392156, "loss": 0.4241, "step": 24879 }, { "epoch": 13.899441340782122, "grad_norm": 0.44550999999046326, "learning_rate": 0.00030624649859943977, "loss": 0.4329, "step": 24880 }, { "epoch": 13.9, "grad_norm": 0.6306529641151428, "learning_rate": 0.000306218487394958, "loss": 0.5281, "step": 24881 }, { "epoch": 13.900558659217877, "grad_norm": 0.7403978705406189, "learning_rate": 0.0003061904761904762, "loss": 0.4134, "step": 24882 }, { "epoch": 13.901117318435753, "grad_norm": 0.4836862087249756, "learning_rate": 0.00030616246498599444, "loss": 0.4792, "step": 24883 }, { "epoch": 13.901675977653632, "grad_norm": 0.8319069743156433, "learning_rate": 0.0003061344537815126, "loss": 0.469, "step": 24884 }, { "epoch": 13.902234636871508, "grad_norm": 0.5553714632987976, "learning_rate": 0.0003061064425770308, "loss": 0.4946, "step": 24885 }, { "epoch": 13.902793296089385, "grad_norm": 0.36867955327033997, "learning_rate": 0.00030607843137254906, "loss": 0.4309, "step": 24886 }, { "epoch": 13.903351955307263, "grad_norm": 0.3950061798095703, "learning_rate": 0.0003060504201680672, "loss": 0.4647, "step": 24887 }, { "epoch": 13.90391061452514, "grad_norm": 0.42105087637901306, "learning_rate": 0.00030602240896358547, "loss": 0.4269, "step": 24888 }, { "epoch": 13.904469273743016, "grad_norm": 0.4055873453617096, "learning_rate": 0.0003059943977591037, "loss": 0.4052, "step": 24889 }, { "epoch": 13.905027932960895, "grad_norm": 0.3744962215423584, "learning_rate": 0.0003059663865546218, "loss": 0.3786, "step": 24890 }, { "epoch": 13.905586592178771, "grad_norm": 0.48850616812705994, "learning_rate": 0.0003059383753501401, "loss": 0.4241, "step": 24891 }, { "epoch": 13.906145251396648, "grad_norm": 0.3959115743637085, "learning_rate": 0.00030591036414565824, "loss": 0.3752, "step": 24892 }, { "epoch": 13.906703910614524, "grad_norm": 0.39288461208343506, "learning_rate": 0.0003058823529411765, "loss": 0.428, "step": 24893 }, { "epoch": 13.907262569832403, "grad_norm": 0.4193260967731476, "learning_rate": 0.0003058543417366947, "loss": 0.3372, "step": 24894 }, { "epoch": 13.90782122905028, "grad_norm": 0.4617643654346466, "learning_rate": 0.00030582633053221286, "loss": 0.4536, "step": 24895 }, { "epoch": 13.908379888268156, "grad_norm": 0.5762104988098145, "learning_rate": 0.0003057983193277311, "loss": 0.3844, "step": 24896 }, { "epoch": 13.908938547486034, "grad_norm": 0.31485405564308167, "learning_rate": 0.0003057703081232493, "loss": 0.3736, "step": 24897 }, { "epoch": 13.90949720670391, "grad_norm": 0.4969572424888611, "learning_rate": 0.00030574229691876753, "loss": 0.3703, "step": 24898 }, { "epoch": 13.910055865921787, "grad_norm": 0.35041072964668274, "learning_rate": 0.00030571428571428573, "loss": 0.3233, "step": 24899 }, { "epoch": 13.910614525139664, "grad_norm": 0.48628121614456177, "learning_rate": 0.0003056862745098039, "loss": 0.3707, "step": 24900 }, { "epoch": 13.911173184357542, "grad_norm": 0.6263110041618347, "learning_rate": 0.00030565826330532215, "loss": 0.4527, "step": 24901 }, { "epoch": 13.911731843575419, "grad_norm": 0.49387502670288086, "learning_rate": 0.00030563025210084035, "loss": 0.4809, "step": 24902 }, { "epoch": 13.912290502793295, "grad_norm": 0.4616723954677582, "learning_rate": 0.00030560224089635856, "loss": 0.3766, "step": 24903 }, { "epoch": 13.912849162011174, "grad_norm": 0.3935795724391937, "learning_rate": 0.00030557422969187676, "loss": 0.3186, "step": 24904 }, { "epoch": 13.91340782122905, "grad_norm": 0.7374389171600342, "learning_rate": 0.00030554621848739497, "loss": 0.5969, "step": 24905 }, { "epoch": 13.913966480446927, "grad_norm": 0.4294931888580322, "learning_rate": 0.0003055182072829132, "loss": 0.3713, "step": 24906 }, { "epoch": 13.914525139664804, "grad_norm": 0.5246458053588867, "learning_rate": 0.0003054901960784314, "loss": 0.426, "step": 24907 }, { "epoch": 13.915083798882682, "grad_norm": 0.35554391145706177, "learning_rate": 0.0003054621848739496, "loss": 0.3892, "step": 24908 }, { "epoch": 13.915642458100558, "grad_norm": 0.3771948218345642, "learning_rate": 0.0003054341736694678, "loss": 0.4549, "step": 24909 }, { "epoch": 13.916201117318435, "grad_norm": 0.4551900029182434, "learning_rate": 0.000305406162464986, "loss": 0.3678, "step": 24910 }, { "epoch": 13.916759776536313, "grad_norm": 0.4652528762817383, "learning_rate": 0.0003053781512605042, "loss": 0.4989, "step": 24911 }, { "epoch": 13.91731843575419, "grad_norm": 0.40176817774772644, "learning_rate": 0.0003053501400560224, "loss": 0.4417, "step": 24912 }, { "epoch": 13.917877094972066, "grad_norm": 0.4590175449848175, "learning_rate": 0.00030532212885154067, "loss": 0.334, "step": 24913 }, { "epoch": 13.918435754189945, "grad_norm": 0.25038766860961914, "learning_rate": 0.0003052941176470588, "loss": 0.2776, "step": 24914 }, { "epoch": 13.918994413407821, "grad_norm": 1.717965006828308, "learning_rate": 0.00030526610644257703, "loss": 0.3425, "step": 24915 }, { "epoch": 13.919553072625698, "grad_norm": 0.44046440720558167, "learning_rate": 0.00030523809523809524, "loss": 0.4157, "step": 24916 }, { "epoch": 13.920111731843576, "grad_norm": 0.7244234681129456, "learning_rate": 0.00030521008403361344, "loss": 0.3469, "step": 24917 }, { "epoch": 13.920670391061453, "grad_norm": 0.447412371635437, "learning_rate": 0.0003051820728291317, "loss": 0.4717, "step": 24918 }, { "epoch": 13.92122905027933, "grad_norm": 0.41557204723358154, "learning_rate": 0.00030515406162464985, "loss": 0.3163, "step": 24919 }, { "epoch": 13.921787709497206, "grad_norm": 0.39765432476997375, "learning_rate": 0.00030512605042016806, "loss": 0.3783, "step": 24920 }, { "epoch": 13.922346368715084, "grad_norm": 0.4708682894706726, "learning_rate": 0.0003050980392156863, "loss": 0.4489, "step": 24921 }, { "epoch": 13.922905027932961, "grad_norm": 1.637816071510315, "learning_rate": 0.00030507002801120447, "loss": 0.574, "step": 24922 }, { "epoch": 13.923463687150837, "grad_norm": 0.5205154418945312, "learning_rate": 0.00030504201680672273, "loss": 0.3754, "step": 24923 }, { "epoch": 13.924022346368716, "grad_norm": 0.4843134880065918, "learning_rate": 0.0003050140056022409, "loss": 0.4248, "step": 24924 }, { "epoch": 13.924581005586592, "grad_norm": 5.135484218597412, "learning_rate": 0.0003049859943977591, "loss": 0.4979, "step": 24925 }, { "epoch": 13.925139664804469, "grad_norm": 0.4674665033817291, "learning_rate": 0.00030495798319327735, "loss": 0.4985, "step": 24926 }, { "epoch": 13.925698324022346, "grad_norm": 0.4445206820964813, "learning_rate": 0.0003049299719887955, "loss": 0.3612, "step": 24927 }, { "epoch": 13.926256983240224, "grad_norm": 0.4883038103580475, "learning_rate": 0.00030490196078431376, "loss": 0.494, "step": 24928 }, { "epoch": 13.9268156424581, "grad_norm": 0.6263957619667053, "learning_rate": 0.00030487394957983197, "loss": 0.4728, "step": 24929 }, { "epoch": 13.927374301675977, "grad_norm": 7.5520524978637695, "learning_rate": 0.0003048459383753501, "loss": 0.3862, "step": 24930 }, { "epoch": 13.927932960893855, "grad_norm": 0.44578176736831665, "learning_rate": 0.0003048179271708684, "loss": 0.4137, "step": 24931 }, { "epoch": 13.928491620111732, "grad_norm": 0.9556331634521484, "learning_rate": 0.00030478991596638653, "loss": 0.4159, "step": 24932 }, { "epoch": 13.929050279329608, "grad_norm": 0.48974746465682983, "learning_rate": 0.0003047619047619048, "loss": 0.3416, "step": 24933 }, { "epoch": 13.929608938547487, "grad_norm": 0.931921660900116, "learning_rate": 0.000304733893557423, "loss": 0.4043, "step": 24934 }, { "epoch": 13.930167597765363, "grad_norm": 2.0274505615234375, "learning_rate": 0.00030470588235294115, "loss": 0.3542, "step": 24935 }, { "epoch": 13.93072625698324, "grad_norm": 1.0719321966171265, "learning_rate": 0.0003046778711484594, "loss": 0.3991, "step": 24936 }, { "epoch": 13.931284916201117, "grad_norm": 0.3949093222618103, "learning_rate": 0.0003046498599439776, "loss": 0.3916, "step": 24937 }, { "epoch": 13.931843575418995, "grad_norm": 2.330949068069458, "learning_rate": 0.00030462184873949577, "loss": 0.4245, "step": 24938 }, { "epoch": 13.932402234636871, "grad_norm": 0.5379676818847656, "learning_rate": 0.00030459383753501403, "loss": 0.4337, "step": 24939 }, { "epoch": 13.932960893854748, "grad_norm": 0.4951185882091522, "learning_rate": 0.0003045658263305322, "loss": 0.456, "step": 24940 }, { "epoch": 13.933519553072626, "grad_norm": 0.6574488878250122, "learning_rate": 0.00030453781512605044, "loss": 0.4314, "step": 24941 }, { "epoch": 13.934078212290503, "grad_norm": 0.38705697655677795, "learning_rate": 0.00030450980392156865, "loss": 0.4273, "step": 24942 }, { "epoch": 13.93463687150838, "grad_norm": 0.840924859046936, "learning_rate": 0.0003044817927170868, "loss": 0.4625, "step": 24943 }, { "epoch": 13.935195530726258, "grad_norm": 1.2168173789978027, "learning_rate": 0.00030445378151260506, "loss": 0.2766, "step": 24944 }, { "epoch": 13.935754189944134, "grad_norm": 0.43147024512290955, "learning_rate": 0.00030442577030812326, "loss": 0.5715, "step": 24945 }, { "epoch": 13.936312849162011, "grad_norm": 0.5921577215194702, "learning_rate": 0.00030439775910364147, "loss": 0.4653, "step": 24946 }, { "epoch": 13.936871508379888, "grad_norm": 0.43765193223953247, "learning_rate": 0.0003043697478991597, "loss": 0.3826, "step": 24947 }, { "epoch": 13.937430167597766, "grad_norm": 0.4704474210739136, "learning_rate": 0.00030434173669467783, "loss": 0.3918, "step": 24948 }, { "epoch": 13.937988826815642, "grad_norm": 0.4440448582172394, "learning_rate": 0.0003043137254901961, "loss": 0.43, "step": 24949 }, { "epoch": 13.938547486033519, "grad_norm": 0.40606898069381714, "learning_rate": 0.0003042857142857143, "loss": 0.4245, "step": 24950 }, { "epoch": 13.939106145251397, "grad_norm": 1.4436674118041992, "learning_rate": 0.0003042577030812325, "loss": 0.4639, "step": 24951 }, { "epoch": 13.939664804469274, "grad_norm": 0.485685259103775, "learning_rate": 0.0003042296918767507, "loss": 0.4152, "step": 24952 }, { "epoch": 13.94022346368715, "grad_norm": 0.6809135675430298, "learning_rate": 0.0003042016806722689, "loss": 0.4493, "step": 24953 }, { "epoch": 13.940782122905027, "grad_norm": 0.7552197575569153, "learning_rate": 0.0003041736694677871, "loss": 0.4958, "step": 24954 }, { "epoch": 13.941340782122905, "grad_norm": 1.0604698657989502, "learning_rate": 0.0003041456582633053, "loss": 0.3753, "step": 24955 }, { "epoch": 13.941899441340782, "grad_norm": 0.44197893142700195, "learning_rate": 0.00030411764705882353, "loss": 0.506, "step": 24956 }, { "epoch": 13.942458100558659, "grad_norm": 0.34298622608184814, "learning_rate": 0.00030408963585434174, "loss": 0.2879, "step": 24957 }, { "epoch": 13.943016759776537, "grad_norm": 0.7418647408485413, "learning_rate": 0.00030406162464985994, "loss": 0.6121, "step": 24958 }, { "epoch": 13.943575418994413, "grad_norm": 1.0117508172988892, "learning_rate": 0.00030403361344537815, "loss": 0.3255, "step": 24959 }, { "epoch": 13.94413407821229, "grad_norm": 0.8493671417236328, "learning_rate": 0.00030400560224089635, "loss": 0.3605, "step": 24960 }, { "epoch": 13.944692737430168, "grad_norm": 0.3530765473842621, "learning_rate": 0.0003039775910364146, "loss": 0.3804, "step": 24961 }, { "epoch": 13.945251396648045, "grad_norm": 0.6087768077850342, "learning_rate": 0.00030394957983193277, "loss": 0.5235, "step": 24962 }, { "epoch": 13.945810055865921, "grad_norm": 0.7930576801300049, "learning_rate": 0.00030392156862745097, "loss": 0.3845, "step": 24963 }, { "epoch": 13.946368715083798, "grad_norm": 0.44346028566360474, "learning_rate": 0.0003038935574229692, "loss": 0.4258, "step": 24964 }, { "epoch": 13.946927374301676, "grad_norm": 2.600170850753784, "learning_rate": 0.0003038655462184874, "loss": 0.3171, "step": 24965 }, { "epoch": 13.947486033519553, "grad_norm": 1.5927622318267822, "learning_rate": 0.00030383753501400564, "loss": 0.3892, "step": 24966 }, { "epoch": 13.94804469273743, "grad_norm": 0.894919216632843, "learning_rate": 0.0003038095238095238, "loss": 0.4701, "step": 24967 }, { "epoch": 13.948603351955308, "grad_norm": 0.4815196990966797, "learning_rate": 0.000303781512605042, "loss": 0.3697, "step": 24968 }, { "epoch": 13.949162011173184, "grad_norm": 0.3941391408443451, "learning_rate": 0.00030375350140056026, "loss": 0.3169, "step": 24969 }, { "epoch": 13.949720670391061, "grad_norm": 0.4568084478378296, "learning_rate": 0.0003037254901960784, "loss": 0.4355, "step": 24970 }, { "epoch": 13.95027932960894, "grad_norm": 0.8687947988510132, "learning_rate": 0.0003036974789915967, "loss": 0.5036, "step": 24971 }, { "epoch": 13.950837988826816, "grad_norm": 0.5170503854751587, "learning_rate": 0.0003036694677871148, "loss": 0.4282, "step": 24972 }, { "epoch": 13.951396648044692, "grad_norm": 0.4385678768157959, "learning_rate": 0.00030364145658263303, "loss": 0.4937, "step": 24973 }, { "epoch": 13.951955307262569, "grad_norm": 0.6030340194702148, "learning_rate": 0.0003036134453781513, "loss": 0.5574, "step": 24974 }, { "epoch": 13.952513966480447, "grad_norm": 0.8687620759010315, "learning_rate": 0.00030358543417366944, "loss": 0.3401, "step": 24975 }, { "epoch": 13.953072625698324, "grad_norm": 0.5189350247383118, "learning_rate": 0.0003035574229691877, "loss": 0.4109, "step": 24976 }, { "epoch": 13.9536312849162, "grad_norm": 0.36550942063331604, "learning_rate": 0.0003035294117647059, "loss": 0.3059, "step": 24977 }, { "epoch": 13.954189944134079, "grad_norm": 0.43972718715667725, "learning_rate": 0.00030350140056022406, "loss": 0.4123, "step": 24978 }, { "epoch": 13.954748603351955, "grad_norm": 0.9455841183662415, "learning_rate": 0.0003034733893557423, "loss": 0.4193, "step": 24979 }, { "epoch": 13.955307262569832, "grad_norm": 0.7085368037223816, "learning_rate": 0.0003034453781512605, "loss": 0.3641, "step": 24980 }, { "epoch": 13.955865921787709, "grad_norm": 0.3997029960155487, "learning_rate": 0.00030341736694677873, "loss": 0.3421, "step": 24981 }, { "epoch": 13.956424581005587, "grad_norm": 0.46714967489242554, "learning_rate": 0.00030338935574229694, "loss": 0.3272, "step": 24982 }, { "epoch": 13.956983240223463, "grad_norm": 0.4718601107597351, "learning_rate": 0.0003033613445378151, "loss": 0.4274, "step": 24983 }, { "epoch": 13.95754189944134, "grad_norm": 2.4217123985290527, "learning_rate": 0.00030333333333333335, "loss": 0.5249, "step": 24984 }, { "epoch": 13.958100558659218, "grad_norm": 0.6807698011398315, "learning_rate": 0.00030330532212885156, "loss": 0.416, "step": 24985 }, { "epoch": 13.958659217877095, "grad_norm": 0.5843945741653442, "learning_rate": 0.00030327731092436976, "loss": 0.3969, "step": 24986 }, { "epoch": 13.959217877094972, "grad_norm": 0.5690789222717285, "learning_rate": 0.00030324929971988797, "loss": 0.4249, "step": 24987 }, { "epoch": 13.95977653631285, "grad_norm": 0.3401868939399719, "learning_rate": 0.0003032212885154061, "loss": 0.3148, "step": 24988 }, { "epoch": 13.960335195530726, "grad_norm": 0.9192000031471252, "learning_rate": 0.0003031932773109244, "loss": 0.3858, "step": 24989 }, { "epoch": 13.960893854748603, "grad_norm": 1.3651618957519531, "learning_rate": 0.0003031652661064426, "loss": 0.4264, "step": 24990 }, { "epoch": 13.961452513966481, "grad_norm": 0.4703902304172516, "learning_rate": 0.0003031372549019608, "loss": 0.3557, "step": 24991 }, { "epoch": 13.962011173184358, "grad_norm": 1.926825761795044, "learning_rate": 0.000303109243697479, "loss": 0.4822, "step": 24992 }, { "epoch": 13.962569832402234, "grad_norm": 0.5890985727310181, "learning_rate": 0.0003030812324929972, "loss": 0.3963, "step": 24993 }, { "epoch": 13.963128491620111, "grad_norm": 0.3517052233219147, "learning_rate": 0.0003030532212885154, "loss": 0.4021, "step": 24994 }, { "epoch": 13.96368715083799, "grad_norm": 0.6159536838531494, "learning_rate": 0.0003030252100840336, "loss": 0.4574, "step": 24995 }, { "epoch": 13.964245810055866, "grad_norm": 0.4008338153362274, "learning_rate": 0.0003029971988795518, "loss": 0.4137, "step": 24996 }, { "epoch": 13.964804469273743, "grad_norm": 0.3515673577785492, "learning_rate": 0.00030296918767507003, "loss": 0.3177, "step": 24997 }, { "epoch": 13.96536312849162, "grad_norm": 0.3601021468639374, "learning_rate": 0.00030294117647058824, "loss": 0.3553, "step": 24998 }, { "epoch": 13.965921787709497, "grad_norm": 2.735264778137207, "learning_rate": 0.00030291316526610644, "loss": 0.3803, "step": 24999 }, { "epoch": 13.966480446927374, "grad_norm": 0.7660241723060608, "learning_rate": 0.00030288515406162465, "loss": 0.3471, "step": 25000 }, { "epoch": 13.966480446927374, "eval_cer": 0.08627104404656694, "eval_loss": 0.3287004232406616, "eval_runtime": 55.6728, "eval_samples_per_second": 81.512, "eval_steps_per_second": 5.101, "eval_wer": 0.33990670651657773, "step": 25000 }, { "epoch": 13.96703910614525, "grad_norm": 0.5212233662605286, "learning_rate": 0.0003028571428571429, "loss": 0.3975, "step": 25001 }, { "epoch": 13.967597765363129, "grad_norm": 0.5010490417480469, "learning_rate": 0.00030282913165266106, "loss": 0.268, "step": 25002 }, { "epoch": 13.968156424581005, "grad_norm": 0.3625105321407318, "learning_rate": 0.00030280112044817927, "loss": 0.415, "step": 25003 }, { "epoch": 13.968715083798882, "grad_norm": 0.544319212436676, "learning_rate": 0.00030277310924369747, "loss": 0.5051, "step": 25004 }, { "epoch": 13.96927374301676, "grad_norm": 0.8994757533073425, "learning_rate": 0.0003027450980392157, "loss": 0.4631, "step": 25005 }, { "epoch": 13.969832402234637, "grad_norm": 0.6352178454399109, "learning_rate": 0.00030271708683473394, "loss": 0.4535, "step": 25006 }, { "epoch": 13.970391061452514, "grad_norm": 0.5342041850090027, "learning_rate": 0.0003026890756302521, "loss": 0.4037, "step": 25007 }, { "epoch": 13.970949720670392, "grad_norm": 0.6092376112937927, "learning_rate": 0.0003026610644257703, "loss": 0.5237, "step": 25008 }, { "epoch": 13.971508379888268, "grad_norm": 1.606554627418518, "learning_rate": 0.00030263305322128856, "loss": 0.479, "step": 25009 }, { "epoch": 13.972067039106145, "grad_norm": 0.6056253910064697, "learning_rate": 0.0003026050420168067, "loss": 0.5034, "step": 25010 }, { "epoch": 13.972625698324022, "grad_norm": 0.4374372661113739, "learning_rate": 0.00030257703081232497, "loss": 0.491, "step": 25011 }, { "epoch": 13.9731843575419, "grad_norm": 0.5344729423522949, "learning_rate": 0.0003025490196078431, "loss": 0.3837, "step": 25012 }, { "epoch": 13.973743016759776, "grad_norm": 0.8057110905647278, "learning_rate": 0.0003025210084033613, "loss": 0.42, "step": 25013 }, { "epoch": 13.974301675977653, "grad_norm": 1.5922526121139526, "learning_rate": 0.0003024929971988796, "loss": 0.3962, "step": 25014 }, { "epoch": 13.974860335195531, "grad_norm": 0.42678239941596985, "learning_rate": 0.00030246498599439774, "loss": 0.3899, "step": 25015 }, { "epoch": 13.975418994413408, "grad_norm": 0.7659466862678528, "learning_rate": 0.000302436974789916, "loss": 0.3991, "step": 25016 }, { "epoch": 13.975977653631285, "grad_norm": 0.702599048614502, "learning_rate": 0.0003024089635854342, "loss": 0.3449, "step": 25017 }, { "epoch": 13.976536312849163, "grad_norm": 0.7584528923034668, "learning_rate": 0.00030238095238095236, "loss": 0.5014, "step": 25018 }, { "epoch": 13.97709497206704, "grad_norm": 0.42104771733283997, "learning_rate": 0.0003023529411764706, "loss": 0.4854, "step": 25019 }, { "epoch": 13.977653631284916, "grad_norm": 0.8232793211936951, "learning_rate": 0.00030232492997198877, "loss": 0.4124, "step": 25020 }, { "epoch": 13.978212290502793, "grad_norm": 0.557181179523468, "learning_rate": 0.00030229691876750703, "loss": 0.3953, "step": 25021 }, { "epoch": 13.978770949720671, "grad_norm": 0.4982050061225891, "learning_rate": 0.00030226890756302523, "loss": 0.3152, "step": 25022 }, { "epoch": 13.979329608938547, "grad_norm": 0.41561731696128845, "learning_rate": 0.0003022408963585434, "loss": 0.3955, "step": 25023 }, { "epoch": 13.979888268156424, "grad_norm": 0.5033066272735596, "learning_rate": 0.00030221288515406165, "loss": 0.432, "step": 25024 }, { "epoch": 13.980446927374302, "grad_norm": 0.5226442217826843, "learning_rate": 0.00030218487394957985, "loss": 0.4477, "step": 25025 }, { "epoch": 13.981005586592179, "grad_norm": 0.3596704304218292, "learning_rate": 0.00030215686274509806, "loss": 0.4072, "step": 25026 }, { "epoch": 13.981564245810056, "grad_norm": 0.43071648478507996, "learning_rate": 0.00030212885154061626, "loss": 0.4094, "step": 25027 }, { "epoch": 13.982122905027932, "grad_norm": 0.8383761048316956, "learning_rate": 0.0003021008403361344, "loss": 0.4696, "step": 25028 }, { "epoch": 13.98268156424581, "grad_norm": 0.5035290718078613, "learning_rate": 0.0003020728291316527, "loss": 0.4363, "step": 25029 }, { "epoch": 13.983240223463687, "grad_norm": 0.4246644973754883, "learning_rate": 0.0003020448179271709, "loss": 0.5145, "step": 25030 }, { "epoch": 13.983798882681564, "grad_norm": 1.333490252494812, "learning_rate": 0.0003020168067226891, "loss": 0.5115, "step": 25031 }, { "epoch": 13.984357541899442, "grad_norm": 0.4504704773426056, "learning_rate": 0.0003019887955182073, "loss": 0.3872, "step": 25032 }, { "epoch": 13.984916201117318, "grad_norm": 0.35326847434043884, "learning_rate": 0.0003019607843137255, "loss": 0.3361, "step": 25033 }, { "epoch": 13.985474860335195, "grad_norm": 0.41942453384399414, "learning_rate": 0.0003019327731092437, "loss": 0.3568, "step": 25034 }, { "epoch": 13.986033519553073, "grad_norm": 0.574332058429718, "learning_rate": 0.0003019047619047619, "loss": 0.3928, "step": 25035 }, { "epoch": 13.98659217877095, "grad_norm": 0.41776958107948303, "learning_rate": 0.0003018767507002801, "loss": 0.4517, "step": 25036 }, { "epoch": 13.987150837988827, "grad_norm": 0.535555899143219, "learning_rate": 0.0003018487394957983, "loss": 0.3967, "step": 25037 }, { "epoch": 13.987709497206703, "grad_norm": 0.5311372876167297, "learning_rate": 0.00030182072829131653, "loss": 0.4513, "step": 25038 }, { "epoch": 13.988268156424581, "grad_norm": 1.0931395292282104, "learning_rate": 0.00030179271708683474, "loss": 0.7758, "step": 25039 }, { "epoch": 13.988826815642458, "grad_norm": 0.4387125074863434, "learning_rate": 0.00030176470588235294, "loss": 0.3698, "step": 25040 }, { "epoch": 13.989385474860335, "grad_norm": 0.8777655363082886, "learning_rate": 0.0003017366946778712, "loss": 0.4809, "step": 25041 }, { "epoch": 13.989944134078213, "grad_norm": 0.6878267526626587, "learning_rate": 0.00030170868347338935, "loss": 0.3858, "step": 25042 }, { "epoch": 13.99050279329609, "grad_norm": 0.3940977156162262, "learning_rate": 0.00030168067226890756, "loss": 0.4235, "step": 25043 }, { "epoch": 13.991061452513966, "grad_norm": 0.7464665174484253, "learning_rate": 0.00030165266106442577, "loss": 0.5448, "step": 25044 }, { "epoch": 13.991620111731844, "grad_norm": 0.45642080903053284, "learning_rate": 0.00030162464985994397, "loss": 0.4523, "step": 25045 }, { "epoch": 13.992178770949721, "grad_norm": 0.7723276019096375, "learning_rate": 0.00030159663865546223, "loss": 0.3269, "step": 25046 }, { "epoch": 13.992737430167598, "grad_norm": 0.41004931926727295, "learning_rate": 0.0003015686274509804, "loss": 0.3492, "step": 25047 }, { "epoch": 13.993296089385474, "grad_norm": 0.5904877185821533, "learning_rate": 0.0003015406162464986, "loss": 0.3897, "step": 25048 }, { "epoch": 13.993854748603352, "grad_norm": 0.4126007556915283, "learning_rate": 0.00030151260504201685, "loss": 0.3872, "step": 25049 }, { "epoch": 13.994413407821229, "grad_norm": 0.4562183618545532, "learning_rate": 0.000301484593837535, "loss": 0.4694, "step": 25050 }, { "epoch": 13.994972067039106, "grad_norm": 3.9524052143096924, "learning_rate": 0.0003014565826330532, "loss": 0.4037, "step": 25051 }, { "epoch": 13.995530726256984, "grad_norm": 0.589347779750824, "learning_rate": 0.0003014285714285714, "loss": 0.4321, "step": 25052 }, { "epoch": 13.99608938547486, "grad_norm": 0.43606725335121155, "learning_rate": 0.0003014005602240896, "loss": 0.2877, "step": 25053 }, { "epoch": 13.996648044692737, "grad_norm": 0.4309036433696747, "learning_rate": 0.0003013725490196079, "loss": 0.4147, "step": 25054 }, { "epoch": 13.997206703910614, "grad_norm": 0.5048011541366577, "learning_rate": 0.00030134453781512603, "loss": 0.3415, "step": 25055 }, { "epoch": 13.997765363128492, "grad_norm": 0.37226057052612305, "learning_rate": 0.00030131652661064424, "loss": 0.2313, "step": 25056 }, { "epoch": 13.998324022346369, "grad_norm": 0.4966714680194855, "learning_rate": 0.0003012885154061625, "loss": 0.3837, "step": 25057 }, { "epoch": 13.998882681564245, "grad_norm": 0.6564077734947205, "learning_rate": 0.00030126050420168065, "loss": 0.4731, "step": 25058 }, { "epoch": 13.999441340782123, "grad_norm": 0.7023360133171082, "learning_rate": 0.0003012324929971989, "loss": 0.4921, "step": 25059 }, { "epoch": 14.0, "grad_norm": 0.42959120869636536, "learning_rate": 0.00030120448179271706, "loss": 0.3585, "step": 25060 }, { "epoch": 14.000558659217877, "grad_norm": 0.4712943434715271, "learning_rate": 0.00030117647058823527, "loss": 0.4188, "step": 25061 }, { "epoch": 14.001117318435755, "grad_norm": 0.5183335542678833, "learning_rate": 0.00030114845938375353, "loss": 0.3994, "step": 25062 }, { "epoch": 14.001675977653631, "grad_norm": 18.417646408081055, "learning_rate": 0.0003011204481792717, "loss": 0.3296, "step": 25063 }, { "epoch": 14.002234636871508, "grad_norm": 0.44175639748573303, "learning_rate": 0.00030109243697478994, "loss": 0.3516, "step": 25064 }, { "epoch": 14.002793296089385, "grad_norm": 0.9682335257530212, "learning_rate": 0.00030106442577030815, "loss": 0.3866, "step": 25065 }, { "epoch": 14.003351955307263, "grad_norm": 0.7077897191047668, "learning_rate": 0.0003010364145658263, "loss": 0.4485, "step": 25066 }, { "epoch": 14.00391061452514, "grad_norm": 0.5629830360412598, "learning_rate": 0.00030100840336134456, "loss": 0.4298, "step": 25067 }, { "epoch": 14.004469273743016, "grad_norm": 0.8316035270690918, "learning_rate": 0.0003009803921568627, "loss": 0.367, "step": 25068 }, { "epoch": 14.005027932960894, "grad_norm": 0.399234414100647, "learning_rate": 0.00030095238095238097, "loss": 0.3361, "step": 25069 }, { "epoch": 14.005586592178771, "grad_norm": 0.3416390120983124, "learning_rate": 0.0003009243697478992, "loss": 0.3499, "step": 25070 }, { "epoch": 14.006145251396648, "grad_norm": 0.5388157963752747, "learning_rate": 0.00030089635854341733, "loss": 0.4587, "step": 25071 }, { "epoch": 14.006703910614526, "grad_norm": 0.9829431176185608, "learning_rate": 0.0003008683473389356, "loss": 0.4418, "step": 25072 }, { "epoch": 14.007262569832402, "grad_norm": 0.39367660880088806, "learning_rate": 0.0003008403361344538, "loss": 0.402, "step": 25073 }, { "epoch": 14.007821229050279, "grad_norm": 0.3428211212158203, "learning_rate": 0.000300812324929972, "loss": 0.3656, "step": 25074 }, { "epoch": 14.008379888268156, "grad_norm": 0.3543752133846283, "learning_rate": 0.0003007843137254902, "loss": 0.4425, "step": 25075 }, { "epoch": 14.008938547486034, "grad_norm": 0.4108964204788208, "learning_rate": 0.00030075630252100836, "loss": 0.4204, "step": 25076 }, { "epoch": 14.00949720670391, "grad_norm": 0.6321560144424438, "learning_rate": 0.0003007282913165266, "loss": 0.385, "step": 25077 }, { "epoch": 14.010055865921787, "grad_norm": 0.4434875249862671, "learning_rate": 0.0003007002801120448, "loss": 0.3945, "step": 25078 }, { "epoch": 14.010614525139665, "grad_norm": 0.3013342022895813, "learning_rate": 0.00030067226890756303, "loss": 0.3081, "step": 25079 }, { "epoch": 14.011173184357542, "grad_norm": 0.5292012691497803, "learning_rate": 0.00030064425770308124, "loss": 0.4856, "step": 25080 }, { "epoch": 14.011731843575419, "grad_norm": 0.38223472237586975, "learning_rate": 0.00030061624649859944, "loss": 0.4097, "step": 25081 }, { "epoch": 14.012290502793297, "grad_norm": 0.3560939431190491, "learning_rate": 0.00030058823529411765, "loss": 0.4264, "step": 25082 }, { "epoch": 14.012849162011173, "grad_norm": 0.5588728189468384, "learning_rate": 0.00030056022408963585, "loss": 0.3723, "step": 25083 }, { "epoch": 14.01340782122905, "grad_norm": 0.4066706895828247, "learning_rate": 0.0003005322128851541, "loss": 0.4191, "step": 25084 }, { "epoch": 14.013966480446927, "grad_norm": 0.4044496715068817, "learning_rate": 0.00030050420168067227, "loss": 0.3767, "step": 25085 }, { "epoch": 14.014525139664805, "grad_norm": 0.43806135654449463, "learning_rate": 0.00030047619047619047, "loss": 0.4753, "step": 25086 }, { "epoch": 14.015083798882682, "grad_norm": 0.42578983306884766, "learning_rate": 0.0003004481792717087, "loss": 0.3864, "step": 25087 }, { "epoch": 14.015642458100558, "grad_norm": 0.4107792377471924, "learning_rate": 0.0003004201680672269, "loss": 0.4069, "step": 25088 }, { "epoch": 14.016201117318436, "grad_norm": 0.4648132622241974, "learning_rate": 0.00030039215686274514, "loss": 0.4098, "step": 25089 }, { "epoch": 14.016759776536313, "grad_norm": 0.3882954716682434, "learning_rate": 0.0003003641456582633, "loss": 0.3972, "step": 25090 }, { "epoch": 14.01731843575419, "grad_norm": 0.4436319172382355, "learning_rate": 0.0003003361344537815, "loss": 0.4274, "step": 25091 }, { "epoch": 14.017877094972068, "grad_norm": 0.521126925945282, "learning_rate": 0.00030030812324929976, "loss": 0.2999, "step": 25092 }, { "epoch": 14.018435754189944, "grad_norm": 0.5451239943504333, "learning_rate": 0.0003002801120448179, "loss": 0.4994, "step": 25093 }, { "epoch": 14.018994413407821, "grad_norm": 0.45273008942604065, "learning_rate": 0.0003002521008403362, "loss": 0.3398, "step": 25094 }, { "epoch": 14.019553072625698, "grad_norm": 0.6346138715744019, "learning_rate": 0.0003002240896358543, "loss": 0.5223, "step": 25095 }, { "epoch": 14.020111731843576, "grad_norm": 1.1171289682388306, "learning_rate": 0.00030019607843137253, "loss": 0.4852, "step": 25096 }, { "epoch": 14.020670391061453, "grad_norm": 0.4387471675872803, "learning_rate": 0.0003001680672268908, "loss": 0.4918, "step": 25097 }, { "epoch": 14.021229050279329, "grad_norm": 0.653271496295929, "learning_rate": 0.00030014005602240894, "loss": 0.4949, "step": 25098 }, { "epoch": 14.021787709497207, "grad_norm": 0.519095778465271, "learning_rate": 0.0003001120448179272, "loss": 0.4279, "step": 25099 }, { "epoch": 14.022346368715084, "grad_norm": 0.48979949951171875, "learning_rate": 0.0003000840336134454, "loss": 0.4172, "step": 25100 }, { "epoch": 14.02290502793296, "grad_norm": 0.519146740436554, "learning_rate": 0.00030005602240896356, "loss": 0.4088, "step": 25101 }, { "epoch": 14.023463687150837, "grad_norm": 0.3801347315311432, "learning_rate": 0.0003000280112044818, "loss": 0.3158, "step": 25102 }, { "epoch": 14.024022346368715, "grad_norm": 0.8574338555335999, "learning_rate": 0.0003, "loss": 0.3592, "step": 25103 }, { "epoch": 14.024581005586592, "grad_norm": 0.45281165838241577, "learning_rate": 0.00029997198879551823, "loss": 0.4392, "step": 25104 }, { "epoch": 14.025139664804469, "grad_norm": 0.3741462528705597, "learning_rate": 0.00029994397759103644, "loss": 0.4564, "step": 25105 }, { "epoch": 14.025698324022347, "grad_norm": 0.3896619975566864, "learning_rate": 0.0002999159663865546, "loss": 0.3316, "step": 25106 }, { "epoch": 14.026256983240224, "grad_norm": 0.3635427951812744, "learning_rate": 0.00029988795518207285, "loss": 0.3794, "step": 25107 }, { "epoch": 14.0268156424581, "grad_norm": 0.5178716778755188, "learning_rate": 0.00029985994397759106, "loss": 0.478, "step": 25108 }, { "epoch": 14.027374301675978, "grad_norm": 1.5163228511810303, "learning_rate": 0.00029983193277310926, "loss": 0.3813, "step": 25109 }, { "epoch": 14.027932960893855, "grad_norm": 0.41071194410324097, "learning_rate": 0.00029980392156862747, "loss": 0.389, "step": 25110 }, { "epoch": 14.028491620111732, "grad_norm": 0.5828934907913208, "learning_rate": 0.0002997759103641456, "loss": 0.5433, "step": 25111 }, { "epoch": 14.029050279329608, "grad_norm": 0.43605664372444153, "learning_rate": 0.0002997478991596639, "loss": 0.4031, "step": 25112 }, { "epoch": 14.029608938547486, "grad_norm": 0.32368379831314087, "learning_rate": 0.0002997198879551821, "loss": 0.3372, "step": 25113 }, { "epoch": 14.030167597765363, "grad_norm": 0.3229929208755493, "learning_rate": 0.0002996918767507003, "loss": 0.3311, "step": 25114 }, { "epoch": 14.03072625698324, "grad_norm": 0.5266735553741455, "learning_rate": 0.0002996638655462185, "loss": 0.5404, "step": 25115 }, { "epoch": 14.031284916201118, "grad_norm": 0.3858741521835327, "learning_rate": 0.0002996358543417367, "loss": 0.3795, "step": 25116 }, { "epoch": 14.031843575418995, "grad_norm": 0.48159995675086975, "learning_rate": 0.0002996078431372549, "loss": 0.4463, "step": 25117 }, { "epoch": 14.032402234636871, "grad_norm": 1.2363423109054565, "learning_rate": 0.0002995798319327731, "loss": 0.4726, "step": 25118 }, { "epoch": 14.03296089385475, "grad_norm": 0.8779730796813965, "learning_rate": 0.0002995518207282913, "loss": 0.4443, "step": 25119 }, { "epoch": 14.033519553072626, "grad_norm": 0.5478161573410034, "learning_rate": 0.00029952380952380953, "loss": 0.3233, "step": 25120 }, { "epoch": 14.034078212290503, "grad_norm": 0.294427752494812, "learning_rate": 0.00029949579831932774, "loss": 0.2965, "step": 25121 }, { "epoch": 14.03463687150838, "grad_norm": 0.5613181591033936, "learning_rate": 0.00029946778711484594, "loss": 0.4103, "step": 25122 }, { "epoch": 14.035195530726257, "grad_norm": 0.3509247303009033, "learning_rate": 0.00029943977591036415, "loss": 0.399, "step": 25123 }, { "epoch": 14.035754189944134, "grad_norm": 0.4378091096878052, "learning_rate": 0.0002994117647058824, "loss": 0.3974, "step": 25124 }, { "epoch": 14.03631284916201, "grad_norm": 0.5401489734649658, "learning_rate": 0.00029938375350140056, "loss": 0.3839, "step": 25125 }, { "epoch": 14.036871508379889, "grad_norm": 0.5004332065582275, "learning_rate": 0.00029935574229691877, "loss": 0.5193, "step": 25126 }, { "epoch": 14.037430167597766, "grad_norm": 0.4608827531337738, "learning_rate": 0.00029932773109243697, "loss": 0.4458, "step": 25127 }, { "epoch": 14.037988826815642, "grad_norm": 0.5581133365631104, "learning_rate": 0.0002992997198879552, "loss": 0.3701, "step": 25128 }, { "epoch": 14.03854748603352, "grad_norm": 0.5472137928009033, "learning_rate": 0.00029927170868347344, "loss": 0.3666, "step": 25129 }, { "epoch": 14.039106145251397, "grad_norm": 0.4342600405216217, "learning_rate": 0.0002992436974789916, "loss": 0.4168, "step": 25130 }, { "epoch": 14.039664804469274, "grad_norm": 0.48705387115478516, "learning_rate": 0.0002992156862745098, "loss": 0.5495, "step": 25131 }, { "epoch": 14.04022346368715, "grad_norm": 0.6571857929229736, "learning_rate": 0.00029918767507002806, "loss": 0.3733, "step": 25132 }, { "epoch": 14.040782122905028, "grad_norm": 0.9972319006919861, "learning_rate": 0.0002991596638655462, "loss": 0.4395, "step": 25133 }, { "epoch": 14.041340782122905, "grad_norm": 1.2861734628677368, "learning_rate": 0.00029913165266106447, "loss": 0.429, "step": 25134 }, { "epoch": 14.041899441340782, "grad_norm": 0.6150367259979248, "learning_rate": 0.0002991036414565826, "loss": 0.4162, "step": 25135 }, { "epoch": 14.04245810055866, "grad_norm": 0.5176380276679993, "learning_rate": 0.0002990756302521008, "loss": 0.4357, "step": 25136 }, { "epoch": 14.043016759776537, "grad_norm": 0.46156901121139526, "learning_rate": 0.0002990476190476191, "loss": 0.3647, "step": 25137 }, { "epoch": 14.043575418994413, "grad_norm": 0.4424963593482971, "learning_rate": 0.00029901960784313724, "loss": 0.2935, "step": 25138 }, { "epoch": 14.04413407821229, "grad_norm": 0.5505861639976501, "learning_rate": 0.0002989915966386555, "loss": 0.6266, "step": 25139 }, { "epoch": 14.044692737430168, "grad_norm": 0.5297809839248657, "learning_rate": 0.0002989635854341737, "loss": 0.4665, "step": 25140 }, { "epoch": 14.045251396648045, "grad_norm": 0.3823828101158142, "learning_rate": 0.00029893557422969186, "loss": 0.3453, "step": 25141 }, { "epoch": 14.045810055865921, "grad_norm": 0.6137543320655823, "learning_rate": 0.0002989075630252101, "loss": 0.4747, "step": 25142 }, { "epoch": 14.0463687150838, "grad_norm": 0.5718567967414856, "learning_rate": 0.00029887955182072827, "loss": 0.6341, "step": 25143 }, { "epoch": 14.046927374301676, "grad_norm": 0.5188270211219788, "learning_rate": 0.00029885154061624653, "loss": 0.5634, "step": 25144 }, { "epoch": 14.047486033519553, "grad_norm": 0.44254305958747864, "learning_rate": 0.00029882352941176473, "loss": 0.3635, "step": 25145 }, { "epoch": 14.048044692737431, "grad_norm": 0.6161893010139465, "learning_rate": 0.0002987955182072829, "loss": 0.491, "step": 25146 }, { "epoch": 14.048603351955308, "grad_norm": 1.4533168077468872, "learning_rate": 0.00029876750700280115, "loss": 0.3027, "step": 25147 }, { "epoch": 14.049162011173184, "grad_norm": 0.8176302909851074, "learning_rate": 0.00029873949579831935, "loss": 0.3143, "step": 25148 }, { "epoch": 14.04972067039106, "grad_norm": 0.6239168047904968, "learning_rate": 0.00029871148459383756, "loss": 0.3537, "step": 25149 }, { "epoch": 14.050279329608939, "grad_norm": 5.831343173980713, "learning_rate": 0.00029868347338935576, "loss": 0.3829, "step": 25150 }, { "epoch": 14.050837988826816, "grad_norm": 0.6405991911888123, "learning_rate": 0.0002986554621848739, "loss": 0.4646, "step": 25151 }, { "epoch": 14.051396648044692, "grad_norm": 0.3845250904560089, "learning_rate": 0.0002986274509803922, "loss": 0.3478, "step": 25152 }, { "epoch": 14.05195530726257, "grad_norm": 0.7091770172119141, "learning_rate": 0.0002985994397759104, "loss": 0.4759, "step": 25153 }, { "epoch": 14.052513966480447, "grad_norm": 0.4209301173686981, "learning_rate": 0.0002985714285714286, "loss": 0.4502, "step": 25154 }, { "epoch": 14.053072625698324, "grad_norm": 1.1112370491027832, "learning_rate": 0.0002985434173669468, "loss": 0.4293, "step": 25155 }, { "epoch": 14.053631284916202, "grad_norm": 0.5396345853805542, "learning_rate": 0.000298515406162465, "loss": 0.4657, "step": 25156 }, { "epoch": 14.054189944134079, "grad_norm": 0.6367893218994141, "learning_rate": 0.0002984873949579832, "loss": 0.5284, "step": 25157 }, { "epoch": 14.054748603351955, "grad_norm": 0.49026769399642944, "learning_rate": 0.0002984593837535014, "loss": 0.4015, "step": 25158 }, { "epoch": 14.055307262569832, "grad_norm": 0.3736318051815033, "learning_rate": 0.00029843137254901956, "loss": 0.2683, "step": 25159 }, { "epoch": 14.05586592178771, "grad_norm": 0.9663935303688049, "learning_rate": 0.0002984033613445378, "loss": 0.7196, "step": 25160 }, { "epoch": 14.056424581005587, "grad_norm": 0.9790797829627991, "learning_rate": 0.00029837535014005603, "loss": 0.4882, "step": 25161 }, { "epoch": 14.056983240223463, "grad_norm": 0.5219529271125793, "learning_rate": 0.00029834733893557424, "loss": 0.5522, "step": 25162 }, { "epoch": 14.057541899441341, "grad_norm": 11.369020462036133, "learning_rate": 0.00029831932773109244, "loss": 0.3799, "step": 25163 }, { "epoch": 14.058100558659218, "grad_norm": 0.7957562804222107, "learning_rate": 0.00029829131652661065, "loss": 0.3867, "step": 25164 }, { "epoch": 14.058659217877095, "grad_norm": 0.592454195022583, "learning_rate": 0.00029826330532212885, "loss": 0.5343, "step": 25165 }, { "epoch": 14.059217877094973, "grad_norm": 0.841611385345459, "learning_rate": 0.00029823529411764706, "loss": 0.3398, "step": 25166 }, { "epoch": 14.05977653631285, "grad_norm": 0.6596607565879822, "learning_rate": 0.00029820728291316527, "loss": 0.3552, "step": 25167 }, { "epoch": 14.060335195530726, "grad_norm": 0.8836166858673096, "learning_rate": 0.00029817927170868347, "loss": 0.376, "step": 25168 }, { "epoch": 14.060893854748603, "grad_norm": 19.35335922241211, "learning_rate": 0.0002981512605042017, "loss": 0.3215, "step": 25169 }, { "epoch": 14.061452513966481, "grad_norm": 0.5205940008163452, "learning_rate": 0.0002981232492997199, "loss": 0.4637, "step": 25170 }, { "epoch": 14.062011173184358, "grad_norm": 0.5076216459274292, "learning_rate": 0.0002980952380952381, "loss": 0.5615, "step": 25171 }, { "epoch": 14.062569832402234, "grad_norm": 0.40266963839530945, "learning_rate": 0.00029806722689075635, "loss": 0.3353, "step": 25172 }, { "epoch": 14.063128491620112, "grad_norm": 0.5750336050987244, "learning_rate": 0.0002980392156862745, "loss": 0.4524, "step": 25173 }, { "epoch": 14.063687150837989, "grad_norm": 0.525634229183197, "learning_rate": 0.0002980112044817927, "loss": 0.4119, "step": 25174 }, { "epoch": 14.064245810055866, "grad_norm": 1.1602169275283813, "learning_rate": 0.0002979831932773109, "loss": 0.3954, "step": 25175 }, { "epoch": 14.064804469273742, "grad_norm": 0.5978020429611206, "learning_rate": 0.0002979551820728291, "loss": 0.4465, "step": 25176 }, { "epoch": 14.06536312849162, "grad_norm": 0.4733113646507263, "learning_rate": 0.0002979271708683474, "loss": 0.4775, "step": 25177 }, { "epoch": 14.065921787709497, "grad_norm": 0.6798698902130127, "learning_rate": 0.00029789915966386553, "loss": 0.6171, "step": 25178 }, { "epoch": 14.066480446927374, "grad_norm": 0.5701090097427368, "learning_rate": 0.00029787114845938374, "loss": 0.4569, "step": 25179 }, { "epoch": 14.067039106145252, "grad_norm": 0.4863775670528412, "learning_rate": 0.000297843137254902, "loss": 0.38, "step": 25180 }, { "epoch": 14.067597765363129, "grad_norm": 0.6646408438682556, "learning_rate": 0.00029781512605042015, "loss": 0.6488, "step": 25181 }, { "epoch": 14.068156424581005, "grad_norm": 0.4765235483646393, "learning_rate": 0.0002977871148459384, "loss": 0.4397, "step": 25182 }, { "epoch": 14.068715083798883, "grad_norm": 2.3887763023376465, "learning_rate": 0.00029775910364145656, "loss": 0.4544, "step": 25183 }, { "epoch": 14.06927374301676, "grad_norm": 0.6155056953430176, "learning_rate": 0.00029773109243697477, "loss": 0.3791, "step": 25184 }, { "epoch": 14.069832402234637, "grad_norm": 0.49900445342063904, "learning_rate": 0.00029770308123249303, "loss": 0.3861, "step": 25185 }, { "epoch": 14.070391061452513, "grad_norm": 0.9303284883499146, "learning_rate": 0.0002976750700280112, "loss": 0.3847, "step": 25186 }, { "epoch": 14.070949720670392, "grad_norm": 0.5667380094528198, "learning_rate": 0.00029764705882352944, "loss": 0.4252, "step": 25187 }, { "epoch": 14.071508379888268, "grad_norm": 0.5309032201766968, "learning_rate": 0.00029761904761904765, "loss": 0.4725, "step": 25188 }, { "epoch": 14.072067039106145, "grad_norm": 0.597527027130127, "learning_rate": 0.0002975910364145658, "loss": 0.4007, "step": 25189 }, { "epoch": 14.072625698324023, "grad_norm": 0.5219390988349915, "learning_rate": 0.00029756302521008406, "loss": 0.5085, "step": 25190 }, { "epoch": 14.0731843575419, "grad_norm": 0.35437649488449097, "learning_rate": 0.0002975350140056022, "loss": 0.3983, "step": 25191 }, { "epoch": 14.073743016759776, "grad_norm": 0.3699656128883362, "learning_rate": 0.00029750700280112047, "loss": 0.3124, "step": 25192 }, { "epoch": 14.074301675977654, "grad_norm": 0.4022884964942932, "learning_rate": 0.0002974789915966387, "loss": 0.3703, "step": 25193 }, { "epoch": 14.074860335195531, "grad_norm": 0.37731048464775085, "learning_rate": 0.00029745098039215683, "loss": 0.4088, "step": 25194 }, { "epoch": 14.075418994413408, "grad_norm": 0.5332436561584473, "learning_rate": 0.0002974229691876751, "loss": 0.4706, "step": 25195 }, { "epoch": 14.075977653631284, "grad_norm": 0.4692066013813019, "learning_rate": 0.0002973949579831933, "loss": 0.4376, "step": 25196 }, { "epoch": 14.076536312849163, "grad_norm": 0.5193145871162415, "learning_rate": 0.0002973669467787115, "loss": 0.3453, "step": 25197 }, { "epoch": 14.077094972067039, "grad_norm": 0.5056783556938171, "learning_rate": 0.0002973389355742297, "loss": 0.4136, "step": 25198 }, { "epoch": 14.077653631284916, "grad_norm": 0.3452467918395996, "learning_rate": 0.00029731092436974786, "loss": 0.3302, "step": 25199 }, { "epoch": 14.078212290502794, "grad_norm": 0.8596594929695129, "learning_rate": 0.0002972829131652661, "loss": 0.4093, "step": 25200 }, { "epoch": 14.07877094972067, "grad_norm": 0.6025142669677734, "learning_rate": 0.0002972549019607843, "loss": 0.4807, "step": 25201 }, { "epoch": 14.079329608938547, "grad_norm": 0.48050639033317566, "learning_rate": 0.00029722689075630253, "loss": 0.5147, "step": 25202 }, { "epoch": 14.079888268156424, "grad_norm": 0.6344686150550842, "learning_rate": 0.00029719887955182074, "loss": 0.4303, "step": 25203 }, { "epoch": 14.080446927374302, "grad_norm": 0.6649397611618042, "learning_rate": 0.00029717086834733894, "loss": 0.4077, "step": 25204 }, { "epoch": 14.081005586592179, "grad_norm": 0.4509686529636383, "learning_rate": 0.00029714285714285715, "loss": 0.3855, "step": 25205 }, { "epoch": 14.081564245810055, "grad_norm": 0.4079494774341583, "learning_rate": 0.00029711484593837535, "loss": 0.3264, "step": 25206 }, { "epoch": 14.082122905027934, "grad_norm": 0.4381329119205475, "learning_rate": 0.00029708683473389356, "loss": 0.4357, "step": 25207 }, { "epoch": 14.08268156424581, "grad_norm": 0.4486906826496124, "learning_rate": 0.00029705882352941177, "loss": 0.4582, "step": 25208 }, { "epoch": 14.083240223463687, "grad_norm": 0.38026419281959534, "learning_rate": 0.00029703081232492997, "loss": 0.3547, "step": 25209 }, { "epoch": 14.083798882681565, "grad_norm": 6.7017822265625, "learning_rate": 0.0002970028011204482, "loss": 0.416, "step": 25210 }, { "epoch": 14.084357541899442, "grad_norm": 0.46478271484375, "learning_rate": 0.0002969747899159664, "loss": 0.3734, "step": 25211 }, { "epoch": 14.084916201117318, "grad_norm": 0.4860726594924927, "learning_rate": 0.00029694677871148464, "loss": 0.455, "step": 25212 }, { "epoch": 14.085474860335195, "grad_norm": 0.5200951099395752, "learning_rate": 0.0002969187675070028, "loss": 0.3661, "step": 25213 }, { "epoch": 14.086033519553073, "grad_norm": 0.4196780323982239, "learning_rate": 0.000296890756302521, "loss": 0.4539, "step": 25214 }, { "epoch": 14.08659217877095, "grad_norm": 0.5268884897232056, "learning_rate": 0.0002968627450980392, "loss": 0.4419, "step": 25215 }, { "epoch": 14.087150837988826, "grad_norm": 0.5414919853210449, "learning_rate": 0.0002968347338935574, "loss": 0.5052, "step": 25216 }, { "epoch": 14.087709497206705, "grad_norm": 0.4272289276123047, "learning_rate": 0.0002968067226890757, "loss": 0.2713, "step": 25217 }, { "epoch": 14.088268156424581, "grad_norm": 0.38965895771980286, "learning_rate": 0.0002967787114845938, "loss": 0.3698, "step": 25218 }, { "epoch": 14.088826815642458, "grad_norm": 0.39560815691947937, "learning_rate": 0.00029675070028011203, "loss": 0.3644, "step": 25219 }, { "epoch": 14.089385474860336, "grad_norm": 0.6007598042488098, "learning_rate": 0.0002967226890756303, "loss": 0.4668, "step": 25220 }, { "epoch": 14.089944134078213, "grad_norm": 0.42788320779800415, "learning_rate": 0.00029669467787114844, "loss": 0.3759, "step": 25221 }, { "epoch": 14.09050279329609, "grad_norm": 0.396360844373703, "learning_rate": 0.0002966666666666667, "loss": 0.3699, "step": 25222 }, { "epoch": 14.091061452513966, "grad_norm": 3.4097747802734375, "learning_rate": 0.00029663865546218486, "loss": 0.5345, "step": 25223 }, { "epoch": 14.091620111731844, "grad_norm": 0.4605167806148529, "learning_rate": 0.00029661064425770306, "loss": 0.3749, "step": 25224 }, { "epoch": 14.09217877094972, "grad_norm": 0.5276898145675659, "learning_rate": 0.0002965826330532213, "loss": 0.4286, "step": 25225 }, { "epoch": 14.092737430167597, "grad_norm": 0.417802095413208, "learning_rate": 0.0002965546218487395, "loss": 0.4423, "step": 25226 }, { "epoch": 14.093296089385476, "grad_norm": 0.5498452186584473, "learning_rate": 0.00029652661064425773, "loss": 0.3645, "step": 25227 }, { "epoch": 14.093854748603352, "grad_norm": 0.6556692719459534, "learning_rate": 0.00029649859943977594, "loss": 0.3947, "step": 25228 }, { "epoch": 14.094413407821229, "grad_norm": 0.5161360502243042, "learning_rate": 0.0002964705882352941, "loss": 0.4575, "step": 25229 }, { "epoch": 14.094972067039107, "grad_norm": 0.5679553151130676, "learning_rate": 0.00029644257703081235, "loss": 0.5336, "step": 25230 }, { "epoch": 14.095530726256984, "grad_norm": 0.49548977613449097, "learning_rate": 0.0002964145658263305, "loss": 0.3869, "step": 25231 }, { "epoch": 14.09608938547486, "grad_norm": 1.4515784978866577, "learning_rate": 0.00029638655462184876, "loss": 0.3497, "step": 25232 }, { "epoch": 14.096648044692737, "grad_norm": 0.3503189980983734, "learning_rate": 0.00029635854341736697, "loss": 0.3812, "step": 25233 }, { "epoch": 14.097206703910615, "grad_norm": 0.39500895142555237, "learning_rate": 0.0002963305322128851, "loss": 0.4219, "step": 25234 }, { "epoch": 14.097765363128492, "grad_norm": 0.687580406665802, "learning_rate": 0.0002963025210084034, "loss": 0.3774, "step": 25235 }, { "epoch": 14.098324022346368, "grad_norm": 0.6148037314414978, "learning_rate": 0.0002962745098039216, "loss": 0.4375, "step": 25236 }, { "epoch": 14.098882681564247, "grad_norm": 0.46995699405670166, "learning_rate": 0.0002962464985994398, "loss": 0.4371, "step": 25237 }, { "epoch": 14.099441340782123, "grad_norm": 0.4028025269508362, "learning_rate": 0.000296218487394958, "loss": 0.4175, "step": 25238 }, { "epoch": 14.1, "grad_norm": 1.3604490756988525, "learning_rate": 0.00029619047619047615, "loss": 0.5418, "step": 25239 }, { "epoch": 14.100558659217878, "grad_norm": 0.35674938559532166, "learning_rate": 0.0002961624649859944, "loss": 0.3923, "step": 25240 }, { "epoch": 14.101117318435755, "grad_norm": 0.4669075310230255, "learning_rate": 0.0002961344537815126, "loss": 0.4426, "step": 25241 }, { "epoch": 14.101675977653631, "grad_norm": 0.4117047190666199, "learning_rate": 0.0002961064425770308, "loss": 0.4196, "step": 25242 }, { "epoch": 14.102234636871508, "grad_norm": 1.3927124738693237, "learning_rate": 0.00029607843137254903, "loss": 0.531, "step": 25243 }, { "epoch": 14.102793296089386, "grad_norm": 0.42453867197036743, "learning_rate": 0.00029605042016806724, "loss": 0.4364, "step": 25244 }, { "epoch": 14.103351955307263, "grad_norm": 0.44160357117652893, "learning_rate": 0.00029602240896358544, "loss": 0.3726, "step": 25245 }, { "epoch": 14.10391061452514, "grad_norm": 0.4080667793750763, "learning_rate": 0.00029599439775910365, "loss": 0.3693, "step": 25246 }, { "epoch": 14.104469273743018, "grad_norm": 0.5016002058982849, "learning_rate": 0.00029596638655462185, "loss": 0.4269, "step": 25247 }, { "epoch": 14.105027932960894, "grad_norm": 0.5420082211494446, "learning_rate": 0.00029593837535014006, "loss": 0.6439, "step": 25248 }, { "epoch": 14.10558659217877, "grad_norm": 0.4951266348361969, "learning_rate": 0.00029591036414565827, "loss": 0.3099, "step": 25249 }, { "epoch": 14.106145251396647, "grad_norm": 0.4550786018371582, "learning_rate": 0.00029588235294117647, "loss": 0.3699, "step": 25250 }, { "epoch": 14.106703910614526, "grad_norm": 0.6337207555770874, "learning_rate": 0.0002958543417366947, "loss": 0.4013, "step": 25251 }, { "epoch": 14.107262569832402, "grad_norm": 2.4398343563079834, "learning_rate": 0.00029582633053221294, "loss": 0.4477, "step": 25252 }, { "epoch": 14.107821229050279, "grad_norm": 0.4273487627506256, "learning_rate": 0.0002957983193277311, "loss": 0.4273, "step": 25253 }, { "epoch": 14.108379888268157, "grad_norm": 0.5015369653701782, "learning_rate": 0.0002957703081232493, "loss": 0.4909, "step": 25254 }, { "epoch": 14.108938547486034, "grad_norm": 0.47109749913215637, "learning_rate": 0.0002957422969187675, "loss": 0.4461, "step": 25255 }, { "epoch": 14.10949720670391, "grad_norm": 0.5753278732299805, "learning_rate": 0.0002957142857142857, "loss": 0.6006, "step": 25256 }, { "epoch": 14.110055865921789, "grad_norm": 0.39780813455581665, "learning_rate": 0.00029568627450980397, "loss": 0.4196, "step": 25257 }, { "epoch": 14.110614525139665, "grad_norm": 0.6434287428855896, "learning_rate": 0.0002956582633053221, "loss": 0.408, "step": 25258 }, { "epoch": 14.111173184357542, "grad_norm": 0.5035482048988342, "learning_rate": 0.0002956302521008403, "loss": 0.4585, "step": 25259 }, { "epoch": 14.111731843575418, "grad_norm": 0.7466797232627869, "learning_rate": 0.0002956022408963586, "loss": 0.4973, "step": 25260 }, { "epoch": 14.112290502793297, "grad_norm": 0.5112746953964233, "learning_rate": 0.00029557422969187674, "loss": 0.5799, "step": 25261 }, { "epoch": 14.112849162011173, "grad_norm": 1.0010509490966797, "learning_rate": 0.000295546218487395, "loss": 0.4776, "step": 25262 }, { "epoch": 14.11340782122905, "grad_norm": 0.7096441388130188, "learning_rate": 0.00029551820728291315, "loss": 0.4699, "step": 25263 }, { "epoch": 14.113966480446928, "grad_norm": 1.4034569263458252, "learning_rate": 0.00029549019607843136, "loss": 0.4724, "step": 25264 }, { "epoch": 14.114525139664805, "grad_norm": 1.9724229574203491, "learning_rate": 0.0002954621848739496, "loss": 0.4356, "step": 25265 }, { "epoch": 14.115083798882681, "grad_norm": 1.2837929725646973, "learning_rate": 0.00029543417366946777, "loss": 0.4072, "step": 25266 }, { "epoch": 14.11564245810056, "grad_norm": 0.4360194802284241, "learning_rate": 0.00029540616246498603, "loss": 0.4394, "step": 25267 }, { "epoch": 14.116201117318436, "grad_norm": 0.653012216091156, "learning_rate": 0.00029537815126050423, "loss": 0.5023, "step": 25268 }, { "epoch": 14.116759776536313, "grad_norm": 0.4755963981151581, "learning_rate": 0.0002953501400560224, "loss": 0.4031, "step": 25269 }, { "epoch": 14.11731843575419, "grad_norm": 0.529951810836792, "learning_rate": 0.00029532212885154065, "loss": 0.3702, "step": 25270 }, { "epoch": 14.117877094972068, "grad_norm": 0.985953688621521, "learning_rate": 0.0002952941176470588, "loss": 0.4476, "step": 25271 }, { "epoch": 14.118435754189944, "grad_norm": 0.4121471345424652, "learning_rate": 0.000295266106442577, "loss": 0.3754, "step": 25272 }, { "epoch": 14.11899441340782, "grad_norm": 0.3403964340686798, "learning_rate": 0.00029523809523809526, "loss": 0.4224, "step": 25273 }, { "epoch": 14.119553072625699, "grad_norm": 0.44252824783325195, "learning_rate": 0.0002952100840336134, "loss": 0.3419, "step": 25274 }, { "epoch": 14.120111731843576, "grad_norm": 3.4991135597229004, "learning_rate": 0.0002951820728291317, "loss": 0.3548, "step": 25275 }, { "epoch": 14.120670391061452, "grad_norm": 0.35608962178230286, "learning_rate": 0.0002951540616246499, "loss": 0.4404, "step": 25276 }, { "epoch": 14.121229050279329, "grad_norm": 0.47937682271003723, "learning_rate": 0.00029512605042016803, "loss": 0.4, "step": 25277 }, { "epoch": 14.121787709497207, "grad_norm": 0.738074779510498, "learning_rate": 0.0002950980392156863, "loss": 0.4152, "step": 25278 }, { "epoch": 14.122346368715084, "grad_norm": 0.778804361820221, "learning_rate": 0.00029507002801120445, "loss": 0.4486, "step": 25279 }, { "epoch": 14.12290502793296, "grad_norm": 0.3959718644618988, "learning_rate": 0.0002950420168067227, "loss": 0.3151, "step": 25280 }, { "epoch": 14.123463687150839, "grad_norm": 4.583657264709473, "learning_rate": 0.0002950140056022409, "loss": 0.3263, "step": 25281 }, { "epoch": 14.124022346368715, "grad_norm": 2.1122794151306152, "learning_rate": 0.00029498599439775906, "loss": 0.4463, "step": 25282 }, { "epoch": 14.124581005586592, "grad_norm": 0.45545968413352966, "learning_rate": 0.0002949579831932773, "loss": 0.4253, "step": 25283 }, { "epoch": 14.12513966480447, "grad_norm": 0.47403988242149353, "learning_rate": 0.00029492997198879553, "loss": 0.4362, "step": 25284 }, { "epoch": 14.125698324022347, "grad_norm": 1.6997489929199219, "learning_rate": 0.00029490196078431374, "loss": 0.3871, "step": 25285 }, { "epoch": 14.126256983240223, "grad_norm": 0.9154916405677795, "learning_rate": 0.00029487394957983194, "loss": 0.4911, "step": 25286 }, { "epoch": 14.1268156424581, "grad_norm": 1.0880929231643677, "learning_rate": 0.0002948459383753501, "loss": 0.4515, "step": 25287 }, { "epoch": 14.127374301675978, "grad_norm": 0.3965624272823334, "learning_rate": 0.00029481792717086835, "loss": 0.3945, "step": 25288 }, { "epoch": 14.127932960893855, "grad_norm": 0.5198319554328918, "learning_rate": 0.00029478991596638656, "loss": 0.4914, "step": 25289 }, { "epoch": 14.128491620111731, "grad_norm": 0.4856901466846466, "learning_rate": 0.00029476190476190477, "loss": 0.3462, "step": 25290 }, { "epoch": 14.12905027932961, "grad_norm": 1.0101051330566406, "learning_rate": 0.00029473389355742297, "loss": 0.3211, "step": 25291 }, { "epoch": 14.129608938547486, "grad_norm": 1.641786813735962, "learning_rate": 0.0002947058823529412, "loss": 0.4097, "step": 25292 }, { "epoch": 14.130167597765363, "grad_norm": 0.4728354215621948, "learning_rate": 0.0002946778711484594, "loss": 0.3946, "step": 25293 }, { "epoch": 14.130726256983241, "grad_norm": 1.2953299283981323, "learning_rate": 0.0002946498599439776, "loss": 0.3475, "step": 25294 }, { "epoch": 14.131284916201118, "grad_norm": 0.6651237607002258, "learning_rate": 0.00029462184873949585, "loss": 0.4749, "step": 25295 }, { "epoch": 14.131843575418994, "grad_norm": 0.4762365520000458, "learning_rate": 0.000294593837535014, "loss": 0.3566, "step": 25296 }, { "epoch": 14.13240223463687, "grad_norm": 0.3909949064254761, "learning_rate": 0.0002945658263305322, "loss": 0.4144, "step": 25297 }, { "epoch": 14.132960893854749, "grad_norm": 0.5893816351890564, "learning_rate": 0.0002945378151260504, "loss": 0.4077, "step": 25298 }, { "epoch": 14.133519553072626, "grad_norm": 0.4073117673397064, "learning_rate": 0.0002945098039215686, "loss": 0.4622, "step": 25299 }, { "epoch": 14.134078212290502, "grad_norm": 0.41223081946372986, "learning_rate": 0.0002944817927170869, "loss": 0.4471, "step": 25300 }, { "epoch": 14.13463687150838, "grad_norm": 0.2972390651702881, "learning_rate": 0.00029445378151260503, "loss": 0.2445, "step": 25301 }, { "epoch": 14.135195530726257, "grad_norm": 0.4496445059776306, "learning_rate": 0.00029442577030812324, "loss": 0.3742, "step": 25302 }, { "epoch": 14.135754189944134, "grad_norm": 0.5851024389266968, "learning_rate": 0.0002943977591036415, "loss": 0.3323, "step": 25303 }, { "epoch": 14.136312849162012, "grad_norm": 0.5123774409294128, "learning_rate": 0.00029436974789915965, "loss": 0.4662, "step": 25304 }, { "epoch": 14.136871508379889, "grad_norm": 0.5973889231681824, "learning_rate": 0.0002943417366946779, "loss": 0.533, "step": 25305 }, { "epoch": 14.137430167597765, "grad_norm": 0.7023487687110901, "learning_rate": 0.00029431372549019606, "loss": 0.4516, "step": 25306 }, { "epoch": 14.137988826815642, "grad_norm": 0.44419729709625244, "learning_rate": 0.00029428571428571427, "loss": 0.4859, "step": 25307 }, { "epoch": 14.13854748603352, "grad_norm": 0.4277520775794983, "learning_rate": 0.00029425770308123253, "loss": 0.3727, "step": 25308 }, { "epoch": 14.139106145251397, "grad_norm": 0.8788644075393677, "learning_rate": 0.0002942296918767507, "loss": 0.4663, "step": 25309 }, { "epoch": 14.139664804469273, "grad_norm": 0.9085835218429565, "learning_rate": 0.00029420168067226894, "loss": 0.5411, "step": 25310 }, { "epoch": 14.140223463687152, "grad_norm": 3.711705446243286, "learning_rate": 0.00029417366946778715, "loss": 0.4121, "step": 25311 }, { "epoch": 14.140782122905028, "grad_norm": 0.5772017240524292, "learning_rate": 0.0002941456582633053, "loss": 0.5762, "step": 25312 }, { "epoch": 14.141340782122905, "grad_norm": 0.5865617394447327, "learning_rate": 0.00029411764705882356, "loss": 0.3212, "step": 25313 }, { "epoch": 14.141899441340781, "grad_norm": 0.6632071733474731, "learning_rate": 0.0002940896358543417, "loss": 0.5052, "step": 25314 }, { "epoch": 14.14245810055866, "grad_norm": 0.6682087182998657, "learning_rate": 0.00029406162464985997, "loss": 0.5174, "step": 25315 }, { "epoch": 14.143016759776536, "grad_norm": 0.3512507975101471, "learning_rate": 0.0002940336134453782, "loss": 0.3698, "step": 25316 }, { "epoch": 14.143575418994413, "grad_norm": 0.3714883625507355, "learning_rate": 0.00029400560224089633, "loss": 0.2646, "step": 25317 }, { "epoch": 14.144134078212291, "grad_norm": 0.4298558235168457, "learning_rate": 0.0002939775910364146, "loss": 0.3523, "step": 25318 }, { "epoch": 14.144692737430168, "grad_norm": 0.49693670868873596, "learning_rate": 0.0002939495798319328, "loss": 0.4972, "step": 25319 }, { "epoch": 14.145251396648044, "grad_norm": 0.5760936737060547, "learning_rate": 0.000293921568627451, "loss": 0.3955, "step": 25320 }, { "epoch": 14.145810055865923, "grad_norm": 0.7711797952651978, "learning_rate": 0.0002938935574229692, "loss": 0.5422, "step": 25321 }, { "epoch": 14.1463687150838, "grad_norm": 0.5558648705482483, "learning_rate": 0.00029386554621848736, "loss": 0.4265, "step": 25322 }, { "epoch": 14.146927374301676, "grad_norm": 0.4073849022388458, "learning_rate": 0.0002938375350140056, "loss": 0.3977, "step": 25323 }, { "epoch": 14.147486033519552, "grad_norm": 0.5167980790138245, "learning_rate": 0.0002938095238095238, "loss": 0.5153, "step": 25324 }, { "epoch": 14.14804469273743, "grad_norm": 0.36196261644363403, "learning_rate": 0.00029378151260504203, "loss": 0.4237, "step": 25325 }, { "epoch": 14.148603351955307, "grad_norm": 0.679480254650116, "learning_rate": 0.00029375350140056024, "loss": 0.4334, "step": 25326 }, { "epoch": 14.149162011173184, "grad_norm": 0.2971903383731842, "learning_rate": 0.00029372549019607844, "loss": 0.3687, "step": 25327 }, { "epoch": 14.149720670391062, "grad_norm": 1.4860907793045044, "learning_rate": 0.00029369747899159665, "loss": 0.3838, "step": 25328 }, { "epoch": 14.150279329608939, "grad_norm": 0.48321208357810974, "learning_rate": 0.00029366946778711485, "loss": 0.3966, "step": 25329 }, { "epoch": 14.150837988826815, "grad_norm": 0.749348521232605, "learning_rate": 0.00029364145658263306, "loss": 0.5299, "step": 25330 }, { "epoch": 14.151396648044694, "grad_norm": 0.7418469786643982, "learning_rate": 0.00029361344537815127, "loss": 0.4166, "step": 25331 }, { "epoch": 14.15195530726257, "grad_norm": 0.43929678201675415, "learning_rate": 0.00029358543417366947, "loss": 0.3452, "step": 25332 }, { "epoch": 14.152513966480447, "grad_norm": 0.39026811718940735, "learning_rate": 0.0002935574229691877, "loss": 0.3996, "step": 25333 }, { "epoch": 14.153072625698323, "grad_norm": 0.42382487654685974, "learning_rate": 0.0002935294117647059, "loss": 0.391, "step": 25334 }, { "epoch": 14.153631284916202, "grad_norm": 0.4462238550186157, "learning_rate": 0.00029350140056022414, "loss": 0.4032, "step": 25335 }, { "epoch": 14.154189944134078, "grad_norm": 0.43746742606163025, "learning_rate": 0.0002934733893557423, "loss": 0.3956, "step": 25336 }, { "epoch": 14.154748603351955, "grad_norm": 0.4640008211135864, "learning_rate": 0.0002934453781512605, "loss": 0.4619, "step": 25337 }, { "epoch": 14.155307262569833, "grad_norm": 0.5463054180145264, "learning_rate": 0.0002934173669467787, "loss": 0.4103, "step": 25338 }, { "epoch": 14.15586592178771, "grad_norm": 0.4512375593185425, "learning_rate": 0.0002933893557422969, "loss": 0.3586, "step": 25339 }, { "epoch": 14.156424581005586, "grad_norm": 0.6465870141983032, "learning_rate": 0.0002933613445378152, "loss": 0.3646, "step": 25340 }, { "epoch": 14.156983240223465, "grad_norm": 0.4685108959674835, "learning_rate": 0.0002933333333333333, "loss": 0.5483, "step": 25341 }, { "epoch": 14.157541899441341, "grad_norm": 0.5340759754180908, "learning_rate": 0.00029330532212885153, "loss": 0.3275, "step": 25342 }, { "epoch": 14.158100558659218, "grad_norm": 0.46942225098609924, "learning_rate": 0.0002932773109243698, "loss": 0.5873, "step": 25343 }, { "epoch": 14.158659217877094, "grad_norm": 0.5129939317703247, "learning_rate": 0.00029324929971988794, "loss": 0.4062, "step": 25344 }, { "epoch": 14.159217877094973, "grad_norm": 0.3288388252258301, "learning_rate": 0.0002932212885154062, "loss": 0.3835, "step": 25345 }, { "epoch": 14.15977653631285, "grad_norm": 0.4304754436016083, "learning_rate": 0.00029319327731092436, "loss": 0.543, "step": 25346 }, { "epoch": 14.160335195530726, "grad_norm": 0.4583442509174347, "learning_rate": 0.00029316526610644256, "loss": 0.6142, "step": 25347 }, { "epoch": 14.160893854748604, "grad_norm": 1.060774803161621, "learning_rate": 0.0002931372549019608, "loss": 0.4083, "step": 25348 }, { "epoch": 14.16145251396648, "grad_norm": 0.4252772331237793, "learning_rate": 0.000293109243697479, "loss": 0.3002, "step": 25349 }, { "epoch": 14.162011173184357, "grad_norm": 0.459568589925766, "learning_rate": 0.00029308123249299723, "loss": 0.4, "step": 25350 }, { "epoch": 14.162569832402234, "grad_norm": 0.6705222725868225, "learning_rate": 0.00029305322128851544, "loss": 0.4242, "step": 25351 }, { "epoch": 14.163128491620112, "grad_norm": 0.39991599321365356, "learning_rate": 0.0002930252100840336, "loss": 0.434, "step": 25352 }, { "epoch": 14.163687150837989, "grad_norm": 0.41154345870018005, "learning_rate": 0.00029299719887955185, "loss": 0.3575, "step": 25353 }, { "epoch": 14.164245810055865, "grad_norm": 0.7604439854621887, "learning_rate": 0.00029296918767507, "loss": 0.5188, "step": 25354 }, { "epoch": 14.164804469273744, "grad_norm": 0.4710926413536072, "learning_rate": 0.00029294117647058826, "loss": 0.3541, "step": 25355 }, { "epoch": 14.16536312849162, "grad_norm": 0.5217876434326172, "learning_rate": 0.00029291316526610647, "loss": 0.4285, "step": 25356 }, { "epoch": 14.165921787709497, "grad_norm": 0.7819264531135559, "learning_rate": 0.0002928851540616246, "loss": 0.3806, "step": 25357 }, { "epoch": 14.166480446927375, "grad_norm": 0.552517294883728, "learning_rate": 0.0002928571428571429, "loss": 0.3913, "step": 25358 }, { "epoch": 14.167039106145252, "grad_norm": 0.40774044394493103, "learning_rate": 0.0002928291316526611, "loss": 0.4348, "step": 25359 }, { "epoch": 14.167597765363128, "grad_norm": 1.6148439645767212, "learning_rate": 0.0002928011204481793, "loss": 0.4019, "step": 25360 }, { "epoch": 14.168156424581005, "grad_norm": 0.7223689556121826, "learning_rate": 0.0002927731092436975, "loss": 0.5353, "step": 25361 }, { "epoch": 14.168715083798883, "grad_norm": 0.5304924249649048, "learning_rate": 0.00029274509803921565, "loss": 0.4043, "step": 25362 }, { "epoch": 14.16927374301676, "grad_norm": 0.28764864802360535, "learning_rate": 0.0002927170868347339, "loss": 0.2598, "step": 25363 }, { "epoch": 14.169832402234636, "grad_norm": 0.5099817514419556, "learning_rate": 0.0002926890756302521, "loss": 0.3896, "step": 25364 }, { "epoch": 14.170391061452515, "grad_norm": 1.29264497756958, "learning_rate": 0.0002926610644257703, "loss": 0.3744, "step": 25365 }, { "epoch": 14.170949720670391, "grad_norm": 12.400774002075195, "learning_rate": 0.00029263305322128853, "loss": 0.3973, "step": 25366 }, { "epoch": 14.171508379888268, "grad_norm": 1.8185019493103027, "learning_rate": 0.00029260504201680674, "loss": 0.3709, "step": 25367 }, { "epoch": 14.172067039106146, "grad_norm": 0.6673020720481873, "learning_rate": 0.00029257703081232494, "loss": 0.385, "step": 25368 }, { "epoch": 14.172625698324023, "grad_norm": 0.505962610244751, "learning_rate": 0.00029254901960784315, "loss": 0.4431, "step": 25369 }, { "epoch": 14.1731843575419, "grad_norm": 0.815013587474823, "learning_rate": 0.00029252100840336135, "loss": 0.3551, "step": 25370 }, { "epoch": 14.173743016759776, "grad_norm": 0.6648625731468201, "learning_rate": 0.00029249299719887956, "loss": 0.5268, "step": 25371 }, { "epoch": 14.174301675977654, "grad_norm": 0.4008505940437317, "learning_rate": 0.00029246498599439777, "loss": 0.4143, "step": 25372 }, { "epoch": 14.17486033519553, "grad_norm": 1.815342903137207, "learning_rate": 0.00029243697478991597, "loss": 0.4116, "step": 25373 }, { "epoch": 14.175418994413407, "grad_norm": 1.129390835762024, "learning_rate": 0.0002924089635854342, "loss": 0.3374, "step": 25374 }, { "epoch": 14.175977653631286, "grad_norm": 0.4192420542240143, "learning_rate": 0.00029238095238095244, "loss": 0.3758, "step": 25375 }, { "epoch": 14.176536312849162, "grad_norm": 0.40174534916877747, "learning_rate": 0.0002923529411764706, "loss": 0.442, "step": 25376 }, { "epoch": 14.177094972067039, "grad_norm": 1.539506435394287, "learning_rate": 0.0002923249299719888, "loss": 0.5041, "step": 25377 }, { "epoch": 14.177653631284917, "grad_norm": 0.45347777009010315, "learning_rate": 0.000292296918767507, "loss": 0.5917, "step": 25378 }, { "epoch": 14.178212290502794, "grad_norm": 0.4287858307361603, "learning_rate": 0.0002922689075630252, "loss": 0.4937, "step": 25379 }, { "epoch": 14.17877094972067, "grad_norm": 0.603649377822876, "learning_rate": 0.00029224089635854347, "loss": 0.4027, "step": 25380 }, { "epoch": 14.179329608938547, "grad_norm": 0.4463514983654022, "learning_rate": 0.0002922128851540616, "loss": 0.508, "step": 25381 }, { "epoch": 14.179888268156425, "grad_norm": 0.43069174885749817, "learning_rate": 0.0002921848739495798, "loss": 0.3804, "step": 25382 }, { "epoch": 14.180446927374302, "grad_norm": 0.4212243854999542, "learning_rate": 0.0002921568627450981, "loss": 0.4008, "step": 25383 }, { "epoch": 14.181005586592178, "grad_norm": 0.43528735637664795, "learning_rate": 0.00029212885154061624, "loss": 0.2751, "step": 25384 }, { "epoch": 14.181564245810057, "grad_norm": 0.3767814040184021, "learning_rate": 0.00029210084033613444, "loss": 0.4111, "step": 25385 }, { "epoch": 14.182122905027933, "grad_norm": 0.393604576587677, "learning_rate": 0.00029207282913165265, "loss": 0.3957, "step": 25386 }, { "epoch": 14.18268156424581, "grad_norm": 0.3708493411540985, "learning_rate": 0.00029204481792717086, "loss": 0.4047, "step": 25387 }, { "epoch": 14.183240223463686, "grad_norm": 0.9301588535308838, "learning_rate": 0.0002920168067226891, "loss": 0.5294, "step": 25388 }, { "epoch": 14.183798882681565, "grad_norm": 0.49596869945526123, "learning_rate": 0.00029198879551820727, "loss": 0.536, "step": 25389 }, { "epoch": 14.184357541899441, "grad_norm": 2.0865442752838135, "learning_rate": 0.0002919607843137255, "loss": 0.4239, "step": 25390 }, { "epoch": 14.184916201117318, "grad_norm": 2.171962261199951, "learning_rate": 0.00029193277310924373, "loss": 0.3256, "step": 25391 }, { "epoch": 14.185474860335196, "grad_norm": 2.2568492889404297, "learning_rate": 0.0002919047619047619, "loss": 0.4235, "step": 25392 }, { "epoch": 14.186033519553073, "grad_norm": 0.3644927442073822, "learning_rate": 0.00029187675070028015, "loss": 0.3703, "step": 25393 }, { "epoch": 14.18659217877095, "grad_norm": 0.47552794218063354, "learning_rate": 0.0002918487394957983, "loss": 0.4287, "step": 25394 }, { "epoch": 14.187150837988828, "grad_norm": 0.3807581961154938, "learning_rate": 0.0002918207282913165, "loss": 0.3197, "step": 25395 }, { "epoch": 14.187709497206704, "grad_norm": 0.3990229666233063, "learning_rate": 0.00029179271708683476, "loss": 0.3832, "step": 25396 }, { "epoch": 14.18826815642458, "grad_norm": 0.37736618518829346, "learning_rate": 0.0002917647058823529, "loss": 0.4209, "step": 25397 }, { "epoch": 14.188826815642457, "grad_norm": 0.40924301743507385, "learning_rate": 0.0002917366946778712, "loss": 0.5158, "step": 25398 }, { "epoch": 14.189385474860336, "grad_norm": 0.46802961826324463, "learning_rate": 0.0002917086834733894, "loss": 0.5239, "step": 25399 }, { "epoch": 14.189944134078212, "grad_norm": 0.4457922577857971, "learning_rate": 0.00029168067226890753, "loss": 0.3584, "step": 25400 }, { "epoch": 14.190502793296089, "grad_norm": 0.43479225039482117, "learning_rate": 0.0002916526610644258, "loss": 0.4077, "step": 25401 }, { "epoch": 14.191061452513967, "grad_norm": 0.5818706154823303, "learning_rate": 0.00029162464985994395, "loss": 0.6792, "step": 25402 }, { "epoch": 14.191620111731844, "grad_norm": 1.5353957414627075, "learning_rate": 0.0002915966386554622, "loss": 0.4007, "step": 25403 }, { "epoch": 14.19217877094972, "grad_norm": 0.8117145895957947, "learning_rate": 0.0002915686274509804, "loss": 0.4783, "step": 25404 }, { "epoch": 14.192737430167599, "grad_norm": 1.470420241355896, "learning_rate": 0.00029154061624649856, "loss": 0.4613, "step": 25405 }, { "epoch": 14.193296089385475, "grad_norm": 0.3942779302597046, "learning_rate": 0.0002915126050420168, "loss": 0.3891, "step": 25406 }, { "epoch": 14.193854748603352, "grad_norm": 2.4024617671966553, "learning_rate": 0.00029148459383753503, "loss": 0.4088, "step": 25407 }, { "epoch": 14.194413407821228, "grad_norm": 0.6227216720581055, "learning_rate": 0.00029145658263305324, "loss": 0.3696, "step": 25408 }, { "epoch": 14.194972067039107, "grad_norm": 0.540982186794281, "learning_rate": 0.00029142857142857144, "loss": 0.4258, "step": 25409 }, { "epoch": 14.195530726256983, "grad_norm": 0.6376860737800598, "learning_rate": 0.0002914005602240896, "loss": 0.3395, "step": 25410 }, { "epoch": 14.19608938547486, "grad_norm": 0.5503140091896057, "learning_rate": 0.00029137254901960785, "loss": 0.5466, "step": 25411 }, { "epoch": 14.196648044692738, "grad_norm": 0.4306468963623047, "learning_rate": 0.00029134453781512606, "loss": 0.351, "step": 25412 }, { "epoch": 14.197206703910615, "grad_norm": 0.5129110813140869, "learning_rate": 0.00029131652661064427, "loss": 0.4, "step": 25413 }, { "epoch": 14.197765363128491, "grad_norm": 0.4232270419597626, "learning_rate": 0.00029128851540616247, "loss": 0.3758, "step": 25414 }, { "epoch": 14.19832402234637, "grad_norm": 0.32299908995628357, "learning_rate": 0.0002912605042016807, "loss": 0.4056, "step": 25415 }, { "epoch": 14.198882681564246, "grad_norm": 0.5747291445732117, "learning_rate": 0.0002912324929971989, "loss": 0.4724, "step": 25416 }, { "epoch": 14.199441340782123, "grad_norm": 0.662925124168396, "learning_rate": 0.0002912044817927171, "loss": 0.4932, "step": 25417 }, { "epoch": 14.2, "grad_norm": 0.5969512462615967, "learning_rate": 0.0002911764705882353, "loss": 0.4246, "step": 25418 }, { "epoch": 14.200558659217878, "grad_norm": 0.35183316469192505, "learning_rate": 0.0002911484593837535, "loss": 0.3795, "step": 25419 }, { "epoch": 14.201117318435754, "grad_norm": 0.8971480131149292, "learning_rate": 0.0002911204481792717, "loss": 0.4092, "step": 25420 }, { "epoch": 14.20167597765363, "grad_norm": 0.5833323001861572, "learning_rate": 0.0002910924369747899, "loss": 0.3542, "step": 25421 }, { "epoch": 14.202234636871509, "grad_norm": 0.6520475149154663, "learning_rate": 0.0002910644257703081, "loss": 0.4713, "step": 25422 }, { "epoch": 14.202793296089386, "grad_norm": 0.6420779824256897, "learning_rate": 0.0002910364145658264, "loss": 0.482, "step": 25423 }, { "epoch": 14.203351955307262, "grad_norm": 1.9195599555969238, "learning_rate": 0.00029100840336134453, "loss": 0.3527, "step": 25424 }, { "epoch": 14.203910614525139, "grad_norm": 0.5164729952812195, "learning_rate": 0.00029098039215686274, "loss": 0.3458, "step": 25425 }, { "epoch": 14.204469273743017, "grad_norm": 0.3451891541481018, "learning_rate": 0.00029095238095238094, "loss": 0.3937, "step": 25426 }, { "epoch": 14.205027932960894, "grad_norm": 2.548146963119507, "learning_rate": 0.00029092436974789915, "loss": 0.3416, "step": 25427 }, { "epoch": 14.20558659217877, "grad_norm": 0.5128240585327148, "learning_rate": 0.0002908963585434174, "loss": 0.523, "step": 25428 }, { "epoch": 14.206145251396649, "grad_norm": 0.5100475549697876, "learning_rate": 0.00029086834733893556, "loss": 0.3806, "step": 25429 }, { "epoch": 14.206703910614525, "grad_norm": 0.7261170148849487, "learning_rate": 0.00029084033613445377, "loss": 0.2921, "step": 25430 }, { "epoch": 14.207262569832402, "grad_norm": 0.3787108361721039, "learning_rate": 0.00029081232492997203, "loss": 0.4531, "step": 25431 }, { "epoch": 14.20782122905028, "grad_norm": 0.428067147731781, "learning_rate": 0.0002907843137254902, "loss": 0.3762, "step": 25432 }, { "epoch": 14.208379888268157, "grad_norm": 0.759437084197998, "learning_rate": 0.00029075630252100844, "loss": 0.4209, "step": 25433 }, { "epoch": 14.208938547486033, "grad_norm": 0.3542296290397644, "learning_rate": 0.0002907282913165266, "loss": 0.3556, "step": 25434 }, { "epoch": 14.20949720670391, "grad_norm": 0.42031148076057434, "learning_rate": 0.0002907002801120448, "loss": 0.3731, "step": 25435 }, { "epoch": 14.210055865921788, "grad_norm": 0.9290695190429688, "learning_rate": 0.00029067226890756306, "loss": 0.3655, "step": 25436 }, { "epoch": 14.210614525139665, "grad_norm": 1.0854949951171875, "learning_rate": 0.0002906442577030812, "loss": 0.4211, "step": 25437 }, { "epoch": 14.211173184357541, "grad_norm": 0.46389836072921753, "learning_rate": 0.00029061624649859947, "loss": 0.4988, "step": 25438 }, { "epoch": 14.21173184357542, "grad_norm": 0.458755761384964, "learning_rate": 0.0002905882352941177, "loss": 0.4781, "step": 25439 }, { "epoch": 14.212290502793296, "grad_norm": 0.6610090136528015, "learning_rate": 0.00029056022408963583, "loss": 0.3479, "step": 25440 }, { "epoch": 14.212849162011173, "grad_norm": 0.4801051616668701, "learning_rate": 0.0002905322128851541, "loss": 0.3652, "step": 25441 }, { "epoch": 14.213407821229051, "grad_norm": 1.0304220914840698, "learning_rate": 0.00029050420168067224, "loss": 0.3939, "step": 25442 }, { "epoch": 14.213966480446928, "grad_norm": 0.5730315446853638, "learning_rate": 0.0002904761904761905, "loss": 0.5498, "step": 25443 }, { "epoch": 14.214525139664804, "grad_norm": 0.5775060057640076, "learning_rate": 0.0002904481792717087, "loss": 0.4653, "step": 25444 }, { "epoch": 14.21508379888268, "grad_norm": 0.7271833419799805, "learning_rate": 0.00029042016806722686, "loss": 0.4736, "step": 25445 }, { "epoch": 14.21564245810056, "grad_norm": 0.3662284016609192, "learning_rate": 0.0002903921568627451, "loss": 0.3464, "step": 25446 }, { "epoch": 14.216201117318436, "grad_norm": 0.6032304167747498, "learning_rate": 0.0002903641456582633, "loss": 0.3747, "step": 25447 }, { "epoch": 14.216759776536312, "grad_norm": 0.42111560702323914, "learning_rate": 0.00029033613445378153, "loss": 0.4004, "step": 25448 }, { "epoch": 14.21731843575419, "grad_norm": 0.4776552617549896, "learning_rate": 0.00029030812324929974, "loss": 0.3661, "step": 25449 }, { "epoch": 14.217877094972067, "grad_norm": 0.7991938591003418, "learning_rate": 0.0002902801120448179, "loss": 0.5408, "step": 25450 }, { "epoch": 14.218435754189944, "grad_norm": 0.4549073874950409, "learning_rate": 0.00029025210084033615, "loss": 0.3762, "step": 25451 }, { "epoch": 14.21899441340782, "grad_norm": 0.3923225998878479, "learning_rate": 0.00029022408963585435, "loss": 0.4157, "step": 25452 }, { "epoch": 14.219553072625699, "grad_norm": 0.8494863510131836, "learning_rate": 0.00029019607843137256, "loss": 0.3551, "step": 25453 }, { "epoch": 14.220111731843575, "grad_norm": 0.4054602086544037, "learning_rate": 0.00029016806722689077, "loss": 0.3353, "step": 25454 }, { "epoch": 14.220670391061452, "grad_norm": 0.5391204953193665, "learning_rate": 0.00029014005602240897, "loss": 0.5238, "step": 25455 }, { "epoch": 14.22122905027933, "grad_norm": 0.5281268358230591, "learning_rate": 0.0002901120448179272, "loss": 0.3553, "step": 25456 }, { "epoch": 14.221787709497207, "grad_norm": 0.5622842907905579, "learning_rate": 0.0002900840336134454, "loss": 0.446, "step": 25457 }, { "epoch": 14.222346368715083, "grad_norm": 0.4416981041431427, "learning_rate": 0.0002900560224089636, "loss": 0.4989, "step": 25458 }, { "epoch": 14.222905027932962, "grad_norm": 0.5113020539283752, "learning_rate": 0.0002900280112044818, "loss": 0.5118, "step": 25459 }, { "epoch": 14.223463687150838, "grad_norm": 0.62982577085495, "learning_rate": 0.00029, "loss": 0.4557, "step": 25460 }, { "epoch": 14.224022346368715, "grad_norm": 0.5984874367713928, "learning_rate": 0.0002899719887955182, "loss": 0.4455, "step": 25461 }, { "epoch": 14.224581005586591, "grad_norm": 0.6335060596466064, "learning_rate": 0.0002899439775910364, "loss": 0.3539, "step": 25462 }, { "epoch": 14.22513966480447, "grad_norm": 0.5951938033103943, "learning_rate": 0.0002899159663865547, "loss": 0.4757, "step": 25463 }, { "epoch": 14.225698324022346, "grad_norm": 0.46993544697761536, "learning_rate": 0.0002898879551820728, "loss": 0.497, "step": 25464 }, { "epoch": 14.226256983240223, "grad_norm": 0.5990579128265381, "learning_rate": 0.00028985994397759103, "loss": 0.4243, "step": 25465 }, { "epoch": 14.226815642458101, "grad_norm": 0.685468316078186, "learning_rate": 0.00028983193277310924, "loss": 0.4395, "step": 25466 }, { "epoch": 14.227374301675978, "grad_norm": 0.6801133155822754, "learning_rate": 0.00028980392156862744, "loss": 0.3139, "step": 25467 }, { "epoch": 14.227932960893854, "grad_norm": 0.9943726062774658, "learning_rate": 0.0002897759103641457, "loss": 0.4103, "step": 25468 }, { "epoch": 14.228491620111733, "grad_norm": 1.1428391933441162, "learning_rate": 0.00028974789915966386, "loss": 0.4004, "step": 25469 }, { "epoch": 14.22905027932961, "grad_norm": 0.5141860842704773, "learning_rate": 0.00028971988795518206, "loss": 0.4099, "step": 25470 }, { "epoch": 14.229608938547486, "grad_norm": 0.4192003905773163, "learning_rate": 0.0002896918767507003, "loss": 0.4695, "step": 25471 }, { "epoch": 14.230167597765362, "grad_norm": 0.5188270807266235, "learning_rate": 0.0002896638655462185, "loss": 0.4887, "step": 25472 }, { "epoch": 14.23072625698324, "grad_norm": 0.3818208873271942, "learning_rate": 0.00028963585434173673, "loss": 0.4184, "step": 25473 }, { "epoch": 14.231284916201117, "grad_norm": 0.5171835422515869, "learning_rate": 0.0002896078431372549, "loss": 0.3886, "step": 25474 }, { "epoch": 14.231843575418994, "grad_norm": 0.4434897005558014, "learning_rate": 0.0002895798319327731, "loss": 0.4194, "step": 25475 }, { "epoch": 14.232402234636872, "grad_norm": 0.4490929841995239, "learning_rate": 0.00028955182072829135, "loss": 0.41, "step": 25476 }, { "epoch": 14.232960893854749, "grad_norm": 0.5383500456809998, "learning_rate": 0.0002895238095238095, "loss": 0.4215, "step": 25477 }, { "epoch": 14.233519553072625, "grad_norm": 2.6780238151550293, "learning_rate": 0.00028949579831932776, "loss": 0.5994, "step": 25478 }, { "epoch": 14.234078212290504, "grad_norm": 1.0847309827804565, "learning_rate": 0.00028946778711484597, "loss": 0.4374, "step": 25479 }, { "epoch": 14.23463687150838, "grad_norm": 0.5482456684112549, "learning_rate": 0.0002894397759103641, "loss": 0.4361, "step": 25480 }, { "epoch": 14.235195530726257, "grad_norm": 3.7909281253814697, "learning_rate": 0.0002894117647058824, "loss": 0.4548, "step": 25481 }, { "epoch": 14.235754189944133, "grad_norm": 2.6463077068328857, "learning_rate": 0.00028938375350140053, "loss": 0.4261, "step": 25482 }, { "epoch": 14.236312849162012, "grad_norm": 0.4804808795452118, "learning_rate": 0.0002893557422969188, "loss": 0.4383, "step": 25483 }, { "epoch": 14.236871508379888, "grad_norm": 1.7638521194458008, "learning_rate": 0.000289327731092437, "loss": 0.6639, "step": 25484 }, { "epoch": 14.237430167597765, "grad_norm": 0.4069303870201111, "learning_rate": 0.00028929971988795515, "loss": 0.394, "step": 25485 }, { "epoch": 14.237988826815643, "grad_norm": 2.883249044418335, "learning_rate": 0.0002892717086834734, "loss": 0.5772, "step": 25486 }, { "epoch": 14.23854748603352, "grad_norm": 0.4020165205001831, "learning_rate": 0.0002892436974789916, "loss": 0.3719, "step": 25487 }, { "epoch": 14.239106145251396, "grad_norm": 0.43882298469543457, "learning_rate": 0.0002892156862745098, "loss": 0.4669, "step": 25488 }, { "epoch": 14.239664804469275, "grad_norm": 0.7697341442108154, "learning_rate": 0.00028918767507002803, "loss": 0.384, "step": 25489 }, { "epoch": 14.240223463687151, "grad_norm": 0.4405876696109772, "learning_rate": 0.0002891596638655462, "loss": 0.4291, "step": 25490 }, { "epoch": 14.240782122905028, "grad_norm": 0.5887637138366699, "learning_rate": 0.00028913165266106444, "loss": 0.4115, "step": 25491 }, { "epoch": 14.241340782122904, "grad_norm": 0.4987044930458069, "learning_rate": 0.00028910364145658265, "loss": 0.2933, "step": 25492 }, { "epoch": 14.241899441340783, "grad_norm": 0.38464289903640747, "learning_rate": 0.00028907563025210085, "loss": 0.4263, "step": 25493 }, { "epoch": 14.24245810055866, "grad_norm": 0.42155346274375916, "learning_rate": 0.00028904761904761906, "loss": 0.3301, "step": 25494 }, { "epoch": 14.243016759776536, "grad_norm": 0.5381590127944946, "learning_rate": 0.00028901960784313727, "loss": 0.5916, "step": 25495 }, { "epoch": 14.243575418994414, "grad_norm": 0.4940604865550995, "learning_rate": 0.00028899159663865547, "loss": 0.3237, "step": 25496 }, { "epoch": 14.24413407821229, "grad_norm": 0.4953722357749939, "learning_rate": 0.0002889635854341737, "loss": 0.4075, "step": 25497 }, { "epoch": 14.244692737430167, "grad_norm": 0.58608078956604, "learning_rate": 0.00028893557422969183, "loss": 0.4047, "step": 25498 }, { "epoch": 14.245251396648044, "grad_norm": 0.6618332862854004, "learning_rate": 0.0002889075630252101, "loss": 0.4907, "step": 25499 }, { "epoch": 14.245810055865922, "grad_norm": 0.5740599632263184, "learning_rate": 0.0002888795518207283, "loss": 0.3338, "step": 25500 }, { "epoch": 14.245810055865922, "eval_cer": 0.08616193687058799, "eval_loss": 0.3270304501056671, "eval_runtime": 55.6267, "eval_samples_per_second": 81.58, "eval_steps_per_second": 5.105, "eval_wer": 0.34046535012988466, "step": 25500 }, { "epoch": 14.246368715083799, "grad_norm": 0.752598226070404, "learning_rate": 0.0002888515406162465, "loss": 0.5034, "step": 25501 }, { "epoch": 14.246927374301675, "grad_norm": 0.38785606622695923, "learning_rate": 0.0002888235294117647, "loss": 0.3528, "step": 25502 }, { "epoch": 14.247486033519554, "grad_norm": 2.519864320755005, "learning_rate": 0.0002887955182072829, "loss": 0.4753, "step": 25503 }, { "epoch": 14.24804469273743, "grad_norm": 0.5507903695106506, "learning_rate": 0.0002887675070028011, "loss": 0.3467, "step": 25504 }, { "epoch": 14.248603351955307, "grad_norm": 1.0111862421035767, "learning_rate": 0.0002887394957983193, "loss": 0.4537, "step": 25505 }, { "epoch": 14.249162011173185, "grad_norm": 0.5406572818756104, "learning_rate": 0.0002887114845938376, "loss": 0.3905, "step": 25506 }, { "epoch": 14.249720670391062, "grad_norm": 0.638052225112915, "learning_rate": 0.00028868347338935574, "loss": 0.3996, "step": 25507 }, { "epoch": 14.250279329608938, "grad_norm": 0.560427188873291, "learning_rate": 0.00028865546218487394, "loss": 0.422, "step": 25508 }, { "epoch": 14.250837988826815, "grad_norm": 0.8192435503005981, "learning_rate": 0.00028862745098039215, "loss": 0.393, "step": 25509 }, { "epoch": 14.251396648044693, "grad_norm": 0.43932002782821655, "learning_rate": 0.00028859943977591036, "loss": 0.4577, "step": 25510 }, { "epoch": 14.25195530726257, "grad_norm": 0.422860324382782, "learning_rate": 0.0002885714285714286, "loss": 0.3799, "step": 25511 }, { "epoch": 14.252513966480446, "grad_norm": 0.7656521201133728, "learning_rate": 0.00028854341736694677, "loss": 0.5352, "step": 25512 }, { "epoch": 14.253072625698325, "grad_norm": 0.3323248028755188, "learning_rate": 0.000288515406162465, "loss": 0.3841, "step": 25513 }, { "epoch": 14.253631284916201, "grad_norm": 0.4313906133174896, "learning_rate": 0.00028848739495798323, "loss": 0.4854, "step": 25514 }, { "epoch": 14.254189944134078, "grad_norm": 10.918773651123047, "learning_rate": 0.0002884593837535014, "loss": 0.4138, "step": 25515 }, { "epoch": 14.254748603351956, "grad_norm": 0.867256760597229, "learning_rate": 0.00028843137254901965, "loss": 0.543, "step": 25516 }, { "epoch": 14.255307262569833, "grad_norm": 0.49338144063949585, "learning_rate": 0.0002884033613445378, "loss": 0.3523, "step": 25517 }, { "epoch": 14.25586592178771, "grad_norm": 0.7883240580558777, "learning_rate": 0.000288375350140056, "loss": 0.3198, "step": 25518 }, { "epoch": 14.256424581005586, "grad_norm": 0.5634401440620422, "learning_rate": 0.00028834733893557426, "loss": 0.4217, "step": 25519 }, { "epoch": 14.256983240223464, "grad_norm": 0.37593311071395874, "learning_rate": 0.0002883193277310924, "loss": 0.349, "step": 25520 }, { "epoch": 14.25754189944134, "grad_norm": 0.3959060609340668, "learning_rate": 0.0002882913165266107, "loss": 0.3389, "step": 25521 }, { "epoch": 14.258100558659217, "grad_norm": 5.378292083740234, "learning_rate": 0.0002882633053221289, "loss": 0.4151, "step": 25522 }, { "epoch": 14.258659217877096, "grad_norm": 0.5760030746459961, "learning_rate": 0.00028823529411764703, "loss": 0.4536, "step": 25523 }, { "epoch": 14.259217877094972, "grad_norm": 0.46774008870124817, "learning_rate": 0.0002882072829131653, "loss": 0.4568, "step": 25524 }, { "epoch": 14.259776536312849, "grad_norm": 0.6029528975486755, "learning_rate": 0.00028817927170868345, "loss": 0.3828, "step": 25525 }, { "epoch": 14.260335195530725, "grad_norm": 0.5174428224563599, "learning_rate": 0.0002881512605042017, "loss": 0.375, "step": 25526 }, { "epoch": 14.260893854748604, "grad_norm": 0.5050787329673767, "learning_rate": 0.0002881232492997199, "loss": 0.4171, "step": 25527 }, { "epoch": 14.26145251396648, "grad_norm": 0.4592553675174713, "learning_rate": 0.00028809523809523806, "loss": 0.418, "step": 25528 }, { "epoch": 14.262011173184357, "grad_norm": 1.9049334526062012, "learning_rate": 0.0002880672268907563, "loss": 0.3889, "step": 25529 }, { "epoch": 14.262569832402235, "grad_norm": 0.40549713373184204, "learning_rate": 0.00028803921568627453, "loss": 0.3566, "step": 25530 }, { "epoch": 14.263128491620112, "grad_norm": 0.9373443722724915, "learning_rate": 0.00028801120448179274, "loss": 0.415, "step": 25531 }, { "epoch": 14.263687150837988, "grad_norm": 0.41337016224861145, "learning_rate": 0.00028798319327731094, "loss": 0.3713, "step": 25532 }, { "epoch": 14.264245810055867, "grad_norm": 0.600817084312439, "learning_rate": 0.0002879551820728291, "loss": 0.4654, "step": 25533 }, { "epoch": 14.264804469273743, "grad_norm": 4.329148769378662, "learning_rate": 0.00028792717086834735, "loss": 0.3514, "step": 25534 }, { "epoch": 14.26536312849162, "grad_norm": 0.3803815543651581, "learning_rate": 0.00028789915966386556, "loss": 0.3725, "step": 25535 }, { "epoch": 14.265921787709496, "grad_norm": 0.5153924822807312, "learning_rate": 0.00028787114845938377, "loss": 0.4011, "step": 25536 }, { "epoch": 14.266480446927375, "grad_norm": 0.38321420550346375, "learning_rate": 0.00028784313725490197, "loss": 0.3501, "step": 25537 }, { "epoch": 14.267039106145251, "grad_norm": 0.5944235324859619, "learning_rate": 0.0002878151260504202, "loss": 0.4261, "step": 25538 }, { "epoch": 14.267597765363128, "grad_norm": 0.6112844944000244, "learning_rate": 0.0002877871148459384, "loss": 0.3842, "step": 25539 }, { "epoch": 14.268156424581006, "grad_norm": 0.5148619413375854, "learning_rate": 0.0002877591036414566, "loss": 0.3592, "step": 25540 }, { "epoch": 14.268715083798883, "grad_norm": 0.40505728125572205, "learning_rate": 0.0002877310924369748, "loss": 0.3265, "step": 25541 }, { "epoch": 14.26927374301676, "grad_norm": 0.4315570294857025, "learning_rate": 0.000287703081232493, "loss": 0.3915, "step": 25542 }, { "epoch": 14.269832402234638, "grad_norm": 1.0758757591247559, "learning_rate": 0.0002876750700280112, "loss": 0.8022, "step": 25543 }, { "epoch": 14.270391061452514, "grad_norm": 0.3557911217212677, "learning_rate": 0.0002876470588235294, "loss": 0.3871, "step": 25544 }, { "epoch": 14.27094972067039, "grad_norm": 0.6048030257225037, "learning_rate": 0.0002876190476190476, "loss": 0.4904, "step": 25545 }, { "epoch": 14.271508379888267, "grad_norm": 0.5858570337295532, "learning_rate": 0.0002875910364145659, "loss": 0.4267, "step": 25546 }, { "epoch": 14.272067039106146, "grad_norm": 0.5784070491790771, "learning_rate": 0.00028756302521008403, "loss": 0.5145, "step": 25547 }, { "epoch": 14.272625698324022, "grad_norm": 0.42552152276039124, "learning_rate": 0.00028753501400560224, "loss": 0.3919, "step": 25548 }, { "epoch": 14.273184357541899, "grad_norm": 0.4977792501449585, "learning_rate": 0.00028750700280112044, "loss": 0.5396, "step": 25549 }, { "epoch": 14.273743016759777, "grad_norm": 0.3209351599216461, "learning_rate": 0.00028747899159663865, "loss": 0.3126, "step": 25550 }, { "epoch": 14.274301675977654, "grad_norm": 0.5076764822006226, "learning_rate": 0.0002874509803921569, "loss": 0.4115, "step": 25551 }, { "epoch": 14.27486033519553, "grad_norm": 0.4412117600440979, "learning_rate": 0.00028742296918767506, "loss": 0.3798, "step": 25552 }, { "epoch": 14.275418994413409, "grad_norm": 0.5591453313827515, "learning_rate": 0.00028739495798319327, "loss": 0.3711, "step": 25553 }, { "epoch": 14.275977653631285, "grad_norm": 0.41338402032852173, "learning_rate": 0.00028736694677871153, "loss": 0.4216, "step": 25554 }, { "epoch": 14.276536312849162, "grad_norm": 0.8007129430770874, "learning_rate": 0.0002873389355742297, "loss": 0.5284, "step": 25555 }, { "epoch": 14.277094972067038, "grad_norm": 0.3463747501373291, "learning_rate": 0.00028731092436974794, "loss": 0.3978, "step": 25556 }, { "epoch": 14.277653631284917, "grad_norm": 0.4747637212276459, "learning_rate": 0.0002872829131652661, "loss": 0.4384, "step": 25557 }, { "epoch": 14.278212290502793, "grad_norm": 0.3862406611442566, "learning_rate": 0.0002872549019607843, "loss": 0.3211, "step": 25558 }, { "epoch": 14.27877094972067, "grad_norm": 0.47372931241989136, "learning_rate": 0.00028722689075630256, "loss": 0.3553, "step": 25559 }, { "epoch": 14.279329608938548, "grad_norm": 1.2283505201339722, "learning_rate": 0.0002871988795518207, "loss": 0.4279, "step": 25560 }, { "epoch": 14.279888268156425, "grad_norm": 0.7186576724052429, "learning_rate": 0.00028717086834733897, "loss": 0.3839, "step": 25561 }, { "epoch": 14.280446927374301, "grad_norm": 0.49575498700141907, "learning_rate": 0.0002871428571428572, "loss": 0.3703, "step": 25562 }, { "epoch": 14.28100558659218, "grad_norm": 0.9154319167137146, "learning_rate": 0.00028711484593837533, "loss": 0.4882, "step": 25563 }, { "epoch": 14.281564245810056, "grad_norm": 1.0527592897415161, "learning_rate": 0.0002870868347338936, "loss": 0.4304, "step": 25564 }, { "epoch": 14.282122905027933, "grad_norm": 0.6844486594200134, "learning_rate": 0.00028705882352941174, "loss": 0.4457, "step": 25565 }, { "epoch": 14.28268156424581, "grad_norm": 0.5184553861618042, "learning_rate": 0.00028703081232493, "loss": 0.3888, "step": 25566 }, { "epoch": 14.283240223463688, "grad_norm": 0.5193313360214233, "learning_rate": 0.0002870028011204482, "loss": 0.4474, "step": 25567 }, { "epoch": 14.283798882681564, "grad_norm": 0.48808997869491577, "learning_rate": 0.00028697478991596636, "loss": 0.3456, "step": 25568 }, { "epoch": 14.28435754189944, "grad_norm": 0.39619943499565125, "learning_rate": 0.0002869467787114846, "loss": 0.3287, "step": 25569 }, { "epoch": 14.28491620111732, "grad_norm": 0.7508231997489929, "learning_rate": 0.0002869187675070028, "loss": 0.3599, "step": 25570 }, { "epoch": 14.285474860335196, "grad_norm": 0.73044753074646, "learning_rate": 0.00028689075630252103, "loss": 0.4251, "step": 25571 }, { "epoch": 14.286033519553072, "grad_norm": 1.4588490724563599, "learning_rate": 0.00028686274509803924, "loss": 0.4485, "step": 25572 }, { "epoch": 14.286592178770949, "grad_norm": 0.46958646178245544, "learning_rate": 0.0002868347338935574, "loss": 0.3903, "step": 25573 }, { "epoch": 14.287150837988827, "grad_norm": 0.43628770112991333, "learning_rate": 0.00028680672268907565, "loss": 0.5557, "step": 25574 }, { "epoch": 14.287709497206704, "grad_norm": 0.645496129989624, "learning_rate": 0.00028677871148459385, "loss": 0.3772, "step": 25575 }, { "epoch": 14.28826815642458, "grad_norm": 0.44594693183898926, "learning_rate": 0.00028675070028011206, "loss": 0.4574, "step": 25576 }, { "epoch": 14.288826815642459, "grad_norm": 0.368770033121109, "learning_rate": 0.00028672268907563027, "loss": 0.3557, "step": 25577 }, { "epoch": 14.289385474860335, "grad_norm": 0.521491289138794, "learning_rate": 0.00028669467787114847, "loss": 0.4678, "step": 25578 }, { "epoch": 14.289944134078212, "grad_norm": 0.6467028856277466, "learning_rate": 0.0002866666666666667, "loss": 0.4318, "step": 25579 }, { "epoch": 14.29050279329609, "grad_norm": 0.334685742855072, "learning_rate": 0.0002866386554621849, "loss": 0.3157, "step": 25580 }, { "epoch": 14.291061452513967, "grad_norm": 0.4182282090187073, "learning_rate": 0.0002866106442577031, "loss": 0.4883, "step": 25581 }, { "epoch": 14.291620111731843, "grad_norm": 0.575793445110321, "learning_rate": 0.0002865826330532213, "loss": 0.4264, "step": 25582 }, { "epoch": 14.29217877094972, "grad_norm": 0.3642539978027344, "learning_rate": 0.0002865546218487395, "loss": 0.3273, "step": 25583 }, { "epoch": 14.292737430167598, "grad_norm": 5.717525959014893, "learning_rate": 0.0002865266106442577, "loss": 0.3787, "step": 25584 }, { "epoch": 14.293296089385475, "grad_norm": 0.7128199338912964, "learning_rate": 0.0002864985994397759, "loss": 0.5182, "step": 25585 }, { "epoch": 14.293854748603351, "grad_norm": 0.45790258049964905, "learning_rate": 0.0002864705882352942, "loss": 0.405, "step": 25586 }, { "epoch": 14.29441340782123, "grad_norm": 0.3644290268421173, "learning_rate": 0.0002864425770308123, "loss": 0.3695, "step": 25587 }, { "epoch": 14.294972067039106, "grad_norm": 2.33027982711792, "learning_rate": 0.00028641456582633053, "loss": 0.3861, "step": 25588 }, { "epoch": 14.295530726256983, "grad_norm": 0.4087968170642853, "learning_rate": 0.00028638655462184874, "loss": 0.4143, "step": 25589 }, { "epoch": 14.296089385474861, "grad_norm": 0.43147313594818115, "learning_rate": 0.00028635854341736694, "loss": 0.3789, "step": 25590 }, { "epoch": 14.296648044692738, "grad_norm": 0.37114840745925903, "learning_rate": 0.0002863305322128852, "loss": 0.3139, "step": 25591 }, { "epoch": 14.297206703910614, "grad_norm": 0.431992769241333, "learning_rate": 0.00028630252100840336, "loss": 0.4288, "step": 25592 }, { "epoch": 14.297765363128491, "grad_norm": 3.008676528930664, "learning_rate": 0.00028627450980392156, "loss": 0.2972, "step": 25593 }, { "epoch": 14.29832402234637, "grad_norm": 0.6362246870994568, "learning_rate": 0.0002862464985994398, "loss": 0.4963, "step": 25594 }, { "epoch": 14.298882681564246, "grad_norm": 0.8416968584060669, "learning_rate": 0.000286218487394958, "loss": 0.3519, "step": 25595 }, { "epoch": 14.299441340782122, "grad_norm": 0.4806748628616333, "learning_rate": 0.00028619047619047623, "loss": 0.4421, "step": 25596 }, { "epoch": 14.3, "grad_norm": 0.3764563202857971, "learning_rate": 0.0002861624649859944, "loss": 0.4204, "step": 25597 }, { "epoch": 14.300558659217877, "grad_norm": 0.7359580993652344, "learning_rate": 0.0002861344537815126, "loss": 0.6259, "step": 25598 }, { "epoch": 14.301117318435754, "grad_norm": 0.41040635108947754, "learning_rate": 0.00028610644257703085, "loss": 0.3854, "step": 25599 }, { "epoch": 14.30167597765363, "grad_norm": 0.31745120882987976, "learning_rate": 0.000286078431372549, "loss": 0.392, "step": 25600 }, { "epoch": 14.302234636871509, "grad_norm": 0.7701444029808044, "learning_rate": 0.00028605042016806726, "loss": 0.3671, "step": 25601 }, { "epoch": 14.302793296089385, "grad_norm": 0.6117039322853088, "learning_rate": 0.00028602240896358547, "loss": 0.3445, "step": 25602 }, { "epoch": 14.303351955307262, "grad_norm": 0.4762633442878723, "learning_rate": 0.0002859943977591036, "loss": 0.3223, "step": 25603 }, { "epoch": 14.30391061452514, "grad_norm": 0.43308648467063904, "learning_rate": 0.0002859663865546219, "loss": 0.3964, "step": 25604 }, { "epoch": 14.304469273743017, "grad_norm": 0.40438348054885864, "learning_rate": 0.00028593837535014003, "loss": 0.3421, "step": 25605 }, { "epoch": 14.305027932960893, "grad_norm": 0.6806602478027344, "learning_rate": 0.00028591036414565824, "loss": 0.4041, "step": 25606 }, { "epoch": 14.305586592178772, "grad_norm": 0.5020909309387207, "learning_rate": 0.0002858823529411765, "loss": 0.4181, "step": 25607 }, { "epoch": 14.306145251396648, "grad_norm": 0.5129014849662781, "learning_rate": 0.00028585434173669465, "loss": 0.4666, "step": 25608 }, { "epoch": 14.306703910614525, "grad_norm": 0.4376325011253357, "learning_rate": 0.0002858263305322129, "loss": 0.4673, "step": 25609 }, { "epoch": 14.307262569832401, "grad_norm": 0.6989833116531372, "learning_rate": 0.0002857983193277311, "loss": 0.4142, "step": 25610 }, { "epoch": 14.30782122905028, "grad_norm": 0.8938417434692383, "learning_rate": 0.00028577030812324927, "loss": 0.4017, "step": 25611 }, { "epoch": 14.308379888268156, "grad_norm": 0.3371240794658661, "learning_rate": 0.00028574229691876753, "loss": 0.346, "step": 25612 }, { "epoch": 14.308938547486033, "grad_norm": 0.57198566198349, "learning_rate": 0.0002857142857142857, "loss": 0.3265, "step": 25613 }, { "epoch": 14.309497206703911, "grad_norm": 0.4454578161239624, "learning_rate": 0.00028568627450980394, "loss": 0.4923, "step": 25614 }, { "epoch": 14.310055865921788, "grad_norm": 0.3879300355911255, "learning_rate": 0.00028565826330532215, "loss": 0.3488, "step": 25615 }, { "epoch": 14.310614525139664, "grad_norm": 0.4444729685783386, "learning_rate": 0.0002856302521008403, "loss": 0.4609, "step": 25616 }, { "epoch": 14.311173184357543, "grad_norm": 0.8942714929580688, "learning_rate": 0.00028560224089635856, "loss": 0.4281, "step": 25617 }, { "epoch": 14.31173184357542, "grad_norm": 0.4284249544143677, "learning_rate": 0.00028557422969187677, "loss": 0.3466, "step": 25618 }, { "epoch": 14.312290502793296, "grad_norm": 0.6086063385009766, "learning_rate": 0.00028554621848739497, "loss": 0.4871, "step": 25619 }, { "epoch": 14.312849162011172, "grad_norm": 1.3937976360321045, "learning_rate": 0.0002855182072829132, "loss": 0.3765, "step": 25620 }, { "epoch": 14.31340782122905, "grad_norm": 0.45092374086380005, "learning_rate": 0.00028549019607843133, "loss": 0.3977, "step": 25621 }, { "epoch": 14.313966480446927, "grad_norm": 0.45951715111732483, "learning_rate": 0.0002854621848739496, "loss": 0.416, "step": 25622 }, { "epoch": 14.314525139664804, "grad_norm": 0.9599772095680237, "learning_rate": 0.0002854341736694678, "loss": 0.4227, "step": 25623 }, { "epoch": 14.315083798882682, "grad_norm": 0.7276136875152588, "learning_rate": 0.000285406162464986, "loss": 0.3497, "step": 25624 }, { "epoch": 14.315642458100559, "grad_norm": 1.7187039852142334, "learning_rate": 0.0002853781512605042, "loss": 0.4396, "step": 25625 }, { "epoch": 14.316201117318435, "grad_norm": 0.46159234642982483, "learning_rate": 0.0002853501400560224, "loss": 0.4092, "step": 25626 }, { "epoch": 14.316759776536314, "grad_norm": 0.514132022857666, "learning_rate": 0.0002853221288515406, "loss": 0.4296, "step": 25627 }, { "epoch": 14.31731843575419, "grad_norm": 0.47145363688468933, "learning_rate": 0.0002852941176470588, "loss": 0.3638, "step": 25628 }, { "epoch": 14.317877094972067, "grad_norm": 0.40994134545326233, "learning_rate": 0.00028526610644257703, "loss": 0.3472, "step": 25629 }, { "epoch": 14.318435754189943, "grad_norm": 0.7872180938720703, "learning_rate": 0.00028523809523809524, "loss": 0.4858, "step": 25630 }, { "epoch": 14.318994413407822, "grad_norm": 0.5457873940467834, "learning_rate": 0.00028521008403361344, "loss": 0.3691, "step": 25631 }, { "epoch": 14.319553072625698, "grad_norm": 0.3201582133769989, "learning_rate": 0.00028518207282913165, "loss": 0.3506, "step": 25632 }, { "epoch": 14.320111731843575, "grad_norm": 0.6611894369125366, "learning_rate": 0.00028515406162464986, "loss": 0.4559, "step": 25633 }, { "epoch": 14.320670391061453, "grad_norm": 0.3534206449985504, "learning_rate": 0.0002851260504201681, "loss": 0.3857, "step": 25634 }, { "epoch": 14.32122905027933, "grad_norm": 0.6890993118286133, "learning_rate": 0.00028509803921568627, "loss": 0.4612, "step": 25635 }, { "epoch": 14.321787709497206, "grad_norm": 0.6053663492202759, "learning_rate": 0.0002850700280112045, "loss": 0.4293, "step": 25636 }, { "epoch": 14.322346368715085, "grad_norm": 0.8205382823944092, "learning_rate": 0.0002850420168067227, "loss": 0.4353, "step": 25637 }, { "epoch": 14.322905027932961, "grad_norm": 0.4678000211715698, "learning_rate": 0.0002850140056022409, "loss": 0.4414, "step": 25638 }, { "epoch": 14.323463687150838, "grad_norm": 0.9820969700813293, "learning_rate": 0.00028498599439775915, "loss": 0.3453, "step": 25639 }, { "epoch": 14.324022346368714, "grad_norm": 0.6170769929885864, "learning_rate": 0.0002849579831932773, "loss": 0.4082, "step": 25640 }, { "epoch": 14.324581005586593, "grad_norm": 0.3387000262737274, "learning_rate": 0.0002849299719887955, "loss": 0.343, "step": 25641 }, { "epoch": 14.32513966480447, "grad_norm": 0.41351622343063354, "learning_rate": 0.00028490196078431376, "loss": 0.4604, "step": 25642 }, { "epoch": 14.325698324022346, "grad_norm": 0.4139968454837799, "learning_rate": 0.0002848739495798319, "loss": 0.3738, "step": 25643 }, { "epoch": 14.326256983240224, "grad_norm": 0.5786769986152649, "learning_rate": 0.0002848459383753502, "loss": 0.3749, "step": 25644 }, { "epoch": 14.3268156424581, "grad_norm": 1.082821011543274, "learning_rate": 0.00028481792717086833, "loss": 0.5075, "step": 25645 }, { "epoch": 14.327374301675977, "grad_norm": 0.8283697366714478, "learning_rate": 0.00028478991596638653, "loss": 0.4805, "step": 25646 }, { "epoch": 14.327932960893854, "grad_norm": 0.4066693186759949, "learning_rate": 0.0002847619047619048, "loss": 0.5018, "step": 25647 }, { "epoch": 14.328491620111732, "grad_norm": 0.43121862411499023, "learning_rate": 0.00028473389355742295, "loss": 0.371, "step": 25648 }, { "epoch": 14.329050279329609, "grad_norm": 0.6932399272918701, "learning_rate": 0.0002847058823529412, "loss": 0.3533, "step": 25649 }, { "epoch": 14.329608938547485, "grad_norm": 0.62120121717453, "learning_rate": 0.0002846778711484594, "loss": 0.4223, "step": 25650 }, { "epoch": 14.330167597765364, "grad_norm": 1.1668896675109863, "learning_rate": 0.00028464985994397756, "loss": 0.4433, "step": 25651 }, { "epoch": 14.33072625698324, "grad_norm": 0.7521544098854065, "learning_rate": 0.0002846218487394958, "loss": 0.4454, "step": 25652 }, { "epoch": 14.331284916201117, "grad_norm": 0.4208326041698456, "learning_rate": 0.000284593837535014, "loss": 0.501, "step": 25653 }, { "epoch": 14.331843575418995, "grad_norm": 0.5409586429595947, "learning_rate": 0.00028456582633053224, "loss": 0.4698, "step": 25654 }, { "epoch": 14.332402234636872, "grad_norm": 0.597471296787262, "learning_rate": 0.00028453781512605044, "loss": 0.3547, "step": 25655 }, { "epoch": 14.332960893854748, "grad_norm": 0.7143123149871826, "learning_rate": 0.0002845098039215686, "loss": 0.3982, "step": 25656 }, { "epoch": 14.333519553072625, "grad_norm": 0.4022220969200134, "learning_rate": 0.00028448179271708685, "loss": 0.4146, "step": 25657 }, { "epoch": 14.334078212290503, "grad_norm": 0.4564114511013031, "learning_rate": 0.00028445378151260506, "loss": 0.4246, "step": 25658 }, { "epoch": 14.33463687150838, "grad_norm": 0.5670762658119202, "learning_rate": 0.00028442577030812327, "loss": 0.5039, "step": 25659 }, { "epoch": 14.335195530726256, "grad_norm": 0.5910559892654419, "learning_rate": 0.00028439775910364147, "loss": 0.3681, "step": 25660 }, { "epoch": 14.335754189944135, "grad_norm": 0.6039672493934631, "learning_rate": 0.0002843697478991596, "loss": 0.5049, "step": 25661 }, { "epoch": 14.336312849162011, "grad_norm": 0.3645922541618347, "learning_rate": 0.0002843417366946779, "loss": 0.3103, "step": 25662 }, { "epoch": 14.336871508379888, "grad_norm": 0.3067604601383209, "learning_rate": 0.0002843137254901961, "loss": 0.3533, "step": 25663 }, { "epoch": 14.337430167597766, "grad_norm": 0.46452659368515015, "learning_rate": 0.0002842857142857143, "loss": 0.3276, "step": 25664 }, { "epoch": 14.337988826815643, "grad_norm": 0.44438958168029785, "learning_rate": 0.0002842577030812325, "loss": 0.426, "step": 25665 }, { "epoch": 14.33854748603352, "grad_norm": 0.492631196975708, "learning_rate": 0.0002842296918767507, "loss": 0.394, "step": 25666 }, { "epoch": 14.339106145251396, "grad_norm": 0.7714545726776123, "learning_rate": 0.0002842016806722689, "loss": 0.5024, "step": 25667 }, { "epoch": 14.339664804469274, "grad_norm": 0.4022531509399414, "learning_rate": 0.0002841736694677871, "loss": 0.4186, "step": 25668 }, { "epoch": 14.34022346368715, "grad_norm": 5.050975322723389, "learning_rate": 0.0002841456582633053, "loss": 0.4058, "step": 25669 }, { "epoch": 14.340782122905027, "grad_norm": 0.37994644045829773, "learning_rate": 0.00028411764705882353, "loss": 0.4224, "step": 25670 }, { "epoch": 14.341340782122906, "grad_norm": 0.9045817852020264, "learning_rate": 0.00028408963585434174, "loss": 0.353, "step": 25671 }, { "epoch": 14.341899441340782, "grad_norm": 0.49429816007614136, "learning_rate": 0.00028406162464985994, "loss": 0.4733, "step": 25672 }, { "epoch": 14.342458100558659, "grad_norm": 0.3610917627811432, "learning_rate": 0.00028403361344537815, "loss": 0.2923, "step": 25673 }, { "epoch": 14.343016759776535, "grad_norm": 2.681816816329956, "learning_rate": 0.0002840056022408964, "loss": 0.3733, "step": 25674 }, { "epoch": 14.343575418994414, "grad_norm": 0.5871475338935852, "learning_rate": 0.00028397759103641456, "loss": 0.4014, "step": 25675 }, { "epoch": 14.34413407821229, "grad_norm": 0.44170141220092773, "learning_rate": 0.00028394957983193277, "loss": 0.4157, "step": 25676 }, { "epoch": 14.344692737430167, "grad_norm": 0.8048399090766907, "learning_rate": 0.000283921568627451, "loss": 0.4336, "step": 25677 }, { "epoch": 14.345251396648045, "grad_norm": 0.5510793328285217, "learning_rate": 0.0002838935574229692, "loss": 0.4143, "step": 25678 }, { "epoch": 14.345810055865922, "grad_norm": 0.6959816813468933, "learning_rate": 0.00028386554621848744, "loss": 0.3851, "step": 25679 }, { "epoch": 14.346368715083798, "grad_norm": 0.46577441692352295, "learning_rate": 0.0002838375350140056, "loss": 0.2991, "step": 25680 }, { "epoch": 14.346927374301677, "grad_norm": 0.44089773297309875, "learning_rate": 0.0002838095238095238, "loss": 0.4154, "step": 25681 }, { "epoch": 14.347486033519553, "grad_norm": 0.5041240453720093, "learning_rate": 0.00028378151260504206, "loss": 0.414, "step": 25682 }, { "epoch": 14.34804469273743, "grad_norm": 0.5451045632362366, "learning_rate": 0.0002837535014005602, "loss": 0.4117, "step": 25683 }, { "epoch": 14.348603351955306, "grad_norm": 0.5523211359977722, "learning_rate": 0.00028372549019607847, "loss": 0.4144, "step": 25684 }, { "epoch": 14.349162011173185, "grad_norm": 0.7593986392021179, "learning_rate": 0.0002836974789915966, "loss": 0.3524, "step": 25685 }, { "epoch": 14.349720670391061, "grad_norm": 1.9889676570892334, "learning_rate": 0.00028366946778711483, "loss": 0.4046, "step": 25686 }, { "epoch": 14.350279329608938, "grad_norm": 0.48265495896339417, "learning_rate": 0.0002836414565826331, "loss": 0.5398, "step": 25687 }, { "epoch": 14.350837988826816, "grad_norm": 0.43482232093811035, "learning_rate": 0.00028361344537815124, "loss": 0.4597, "step": 25688 }, { "epoch": 14.351396648044693, "grad_norm": 0.3500218689441681, "learning_rate": 0.0002835854341736695, "loss": 0.3708, "step": 25689 }, { "epoch": 14.35195530726257, "grad_norm": 0.5408735275268555, "learning_rate": 0.0002835574229691877, "loss": 0.4819, "step": 25690 }, { "epoch": 14.352513966480448, "grad_norm": 0.6805570721626282, "learning_rate": 0.00028352941176470586, "loss": 0.4361, "step": 25691 }, { "epoch": 14.353072625698324, "grad_norm": 0.7888262271881104, "learning_rate": 0.0002835014005602241, "loss": 0.4249, "step": 25692 }, { "epoch": 14.3536312849162, "grad_norm": 0.32939043641090393, "learning_rate": 0.00028347338935574227, "loss": 0.3919, "step": 25693 }, { "epoch": 14.354189944134077, "grad_norm": 0.6589708924293518, "learning_rate": 0.00028344537815126053, "loss": 0.4001, "step": 25694 }, { "epoch": 14.354748603351956, "grad_norm": 0.39256951212882996, "learning_rate": 0.00028341736694677874, "loss": 0.3093, "step": 25695 }, { "epoch": 14.355307262569832, "grad_norm": 0.31894832849502563, "learning_rate": 0.0002833893557422969, "loss": 0.319, "step": 25696 }, { "epoch": 14.355865921787709, "grad_norm": 0.4189417362213135, "learning_rate": 0.00028336134453781515, "loss": 0.4303, "step": 25697 }, { "epoch": 14.356424581005587, "grad_norm": 0.49348950386047363, "learning_rate": 0.00028333333333333335, "loss": 0.4588, "step": 25698 }, { "epoch": 14.356983240223464, "grad_norm": 0.380424439907074, "learning_rate": 0.00028330532212885156, "loss": 0.3469, "step": 25699 }, { "epoch": 14.35754189944134, "grad_norm": 0.4521121680736542, "learning_rate": 0.00028327731092436977, "loss": 0.5093, "step": 25700 }, { "epoch": 14.358100558659217, "grad_norm": 0.38366076350212097, "learning_rate": 0.0002832492997198879, "loss": 0.3765, "step": 25701 }, { "epoch": 14.358659217877095, "grad_norm": 0.41023173928260803, "learning_rate": 0.0002832212885154062, "loss": 0.4308, "step": 25702 }, { "epoch": 14.359217877094972, "grad_norm": 0.8428417444229126, "learning_rate": 0.0002831932773109244, "loss": 0.3418, "step": 25703 }, { "epoch": 14.359776536312848, "grad_norm": 3.7292654514312744, "learning_rate": 0.0002831652661064426, "loss": 0.481, "step": 25704 }, { "epoch": 14.360335195530727, "grad_norm": 0.7049896717071533, "learning_rate": 0.0002831372549019608, "loss": 0.3586, "step": 25705 }, { "epoch": 14.360893854748603, "grad_norm": 0.5074928998947144, "learning_rate": 0.000283109243697479, "loss": 0.4894, "step": 25706 }, { "epoch": 14.36145251396648, "grad_norm": 0.4286574721336365, "learning_rate": 0.0002830812324929972, "loss": 0.3388, "step": 25707 }, { "epoch": 14.362011173184358, "grad_norm": 0.34454187750816345, "learning_rate": 0.0002830532212885154, "loss": 0.347, "step": 25708 }, { "epoch": 14.362569832402235, "grad_norm": 0.7793242335319519, "learning_rate": 0.0002830252100840336, "loss": 0.3957, "step": 25709 }, { "epoch": 14.363128491620111, "grad_norm": 0.41481536626815796, "learning_rate": 0.0002829971988795518, "loss": 0.4063, "step": 25710 }, { "epoch": 14.363687150837988, "grad_norm": 0.5770106315612793, "learning_rate": 0.00028296918767507003, "loss": 0.3488, "step": 25711 }, { "epoch": 14.364245810055866, "grad_norm": 0.4796459972858429, "learning_rate": 0.00028294117647058824, "loss": 0.4223, "step": 25712 }, { "epoch": 14.364804469273743, "grad_norm": 0.4658036231994629, "learning_rate": 0.00028291316526610644, "loss": 0.3649, "step": 25713 }, { "epoch": 14.36536312849162, "grad_norm": 0.4907352030277252, "learning_rate": 0.0002828851540616247, "loss": 0.4932, "step": 25714 }, { "epoch": 14.365921787709498, "grad_norm": 0.43476834893226624, "learning_rate": 0.00028285714285714286, "loss": 0.5205, "step": 25715 }, { "epoch": 14.366480446927374, "grad_norm": 0.3576247990131378, "learning_rate": 0.00028282913165266106, "loss": 0.3031, "step": 25716 }, { "epoch": 14.367039106145251, "grad_norm": 0.405793696641922, "learning_rate": 0.00028280112044817927, "loss": 0.3406, "step": 25717 }, { "epoch": 14.36759776536313, "grad_norm": 0.6483950018882751, "learning_rate": 0.0002827731092436975, "loss": 0.4059, "step": 25718 }, { "epoch": 14.368156424581006, "grad_norm": 0.3726096749305725, "learning_rate": 0.0002827450980392157, "loss": 0.3593, "step": 25719 }, { "epoch": 14.368715083798882, "grad_norm": 0.36157068610191345, "learning_rate": 0.0002827170868347339, "loss": 0.3736, "step": 25720 }, { "epoch": 14.369273743016759, "grad_norm": 0.41069304943084717, "learning_rate": 0.0002826890756302521, "loss": 0.3953, "step": 25721 }, { "epoch": 14.369832402234637, "grad_norm": 0.5591342449188232, "learning_rate": 0.00028266106442577035, "loss": 0.2942, "step": 25722 }, { "epoch": 14.370391061452514, "grad_norm": 0.7573222517967224, "learning_rate": 0.0002826330532212885, "loss": 0.4293, "step": 25723 }, { "epoch": 14.37094972067039, "grad_norm": 0.5397464632987976, "learning_rate": 0.0002826050420168067, "loss": 0.5697, "step": 25724 }, { "epoch": 14.371508379888269, "grad_norm": 0.5947046875953674, "learning_rate": 0.0002825770308123249, "loss": 0.4275, "step": 25725 }, { "epoch": 14.372067039106145, "grad_norm": 0.576654314994812, "learning_rate": 0.0002825490196078431, "loss": 0.4647, "step": 25726 }, { "epoch": 14.372625698324022, "grad_norm": 0.4334881603717804, "learning_rate": 0.0002825210084033614, "loss": 0.4593, "step": 25727 }, { "epoch": 14.3731843575419, "grad_norm": 0.37201419472694397, "learning_rate": 0.00028249299719887953, "loss": 0.3517, "step": 25728 }, { "epoch": 14.373743016759777, "grad_norm": 0.8242093920707703, "learning_rate": 0.00028246498599439774, "loss": 0.3823, "step": 25729 }, { "epoch": 14.374301675977653, "grad_norm": 0.7555853128433228, "learning_rate": 0.000282436974789916, "loss": 0.4939, "step": 25730 }, { "epoch": 14.37486033519553, "grad_norm": 1.2133944034576416, "learning_rate": 0.00028240896358543415, "loss": 0.4774, "step": 25731 }, { "epoch": 14.375418994413408, "grad_norm": 0.3893866240978241, "learning_rate": 0.0002823809523809524, "loss": 0.4104, "step": 25732 }, { "epoch": 14.375977653631285, "grad_norm": 2.329319715499878, "learning_rate": 0.00028235294117647056, "loss": 0.4933, "step": 25733 }, { "epoch": 14.376536312849161, "grad_norm": 5.495023727416992, "learning_rate": 0.00028232492997198877, "loss": 0.4191, "step": 25734 }, { "epoch": 14.37709497206704, "grad_norm": 0.47597333788871765, "learning_rate": 0.00028229691876750703, "loss": 0.5126, "step": 25735 }, { "epoch": 14.377653631284916, "grad_norm": 0.34918737411499023, "learning_rate": 0.0002822689075630252, "loss": 0.3895, "step": 25736 }, { "epoch": 14.378212290502793, "grad_norm": 3.2512359619140625, "learning_rate": 0.00028224089635854344, "loss": 0.5718, "step": 25737 }, { "epoch": 14.378770949720671, "grad_norm": 0.6092514395713806, "learning_rate": 0.00028221288515406165, "loss": 0.4242, "step": 25738 }, { "epoch": 14.379329608938548, "grad_norm": 0.41264331340789795, "learning_rate": 0.0002821848739495798, "loss": 0.4264, "step": 25739 }, { "epoch": 14.379888268156424, "grad_norm": 0.6225352883338928, "learning_rate": 0.00028215686274509806, "loss": 0.3416, "step": 25740 }, { "epoch": 14.380446927374301, "grad_norm": 0.3158023953437805, "learning_rate": 0.0002821288515406162, "loss": 0.4095, "step": 25741 }, { "epoch": 14.38100558659218, "grad_norm": 0.5627813935279846, "learning_rate": 0.00028210084033613447, "loss": 0.4102, "step": 25742 }, { "epoch": 14.381564245810056, "grad_norm": 0.8792022466659546, "learning_rate": 0.0002820728291316527, "loss": 0.3798, "step": 25743 }, { "epoch": 14.382122905027932, "grad_norm": 0.40875422954559326, "learning_rate": 0.00028204481792717083, "loss": 0.4946, "step": 25744 }, { "epoch": 14.38268156424581, "grad_norm": 0.4875701665878296, "learning_rate": 0.0002820168067226891, "loss": 0.4873, "step": 25745 }, { "epoch": 14.383240223463687, "grad_norm": 0.5964968204498291, "learning_rate": 0.0002819887955182073, "loss": 0.3611, "step": 25746 }, { "epoch": 14.383798882681564, "grad_norm": 0.5391162633895874, "learning_rate": 0.0002819607843137255, "loss": 0.469, "step": 25747 }, { "epoch": 14.38435754189944, "grad_norm": 0.49238142371177673, "learning_rate": 0.0002819327731092437, "loss": 0.4111, "step": 25748 }, { "epoch": 14.384916201117319, "grad_norm": 0.42598405480384827, "learning_rate": 0.00028190476190476186, "loss": 0.4641, "step": 25749 }, { "epoch": 14.385474860335195, "grad_norm": 0.5593146085739136, "learning_rate": 0.0002818767507002801, "loss": 0.4146, "step": 25750 }, { "epoch": 14.386033519553072, "grad_norm": 0.6897799372673035, "learning_rate": 0.0002818487394957983, "loss": 0.4757, "step": 25751 }, { "epoch": 14.38659217877095, "grad_norm": 0.33926549553871155, "learning_rate": 0.00028182072829131653, "loss": 0.3405, "step": 25752 }, { "epoch": 14.387150837988827, "grad_norm": 0.38387221097946167, "learning_rate": 0.00028179271708683474, "loss": 0.3435, "step": 25753 }, { "epoch": 14.387709497206703, "grad_norm": 0.8094502687454224, "learning_rate": 0.00028176470588235294, "loss": 0.5851, "step": 25754 }, { "epoch": 14.388268156424582, "grad_norm": 1.2071977853775024, "learning_rate": 0.00028173669467787115, "loss": 0.4762, "step": 25755 }, { "epoch": 14.388826815642458, "grad_norm": 0.3796028792858124, "learning_rate": 0.00028170868347338936, "loss": 0.4228, "step": 25756 }, { "epoch": 14.389385474860335, "grad_norm": 0.6941390633583069, "learning_rate": 0.0002816806722689076, "loss": 0.4467, "step": 25757 }, { "epoch": 14.389944134078211, "grad_norm": 0.6343345642089844, "learning_rate": 0.00028165266106442577, "loss": 0.4957, "step": 25758 }, { "epoch": 14.39050279329609, "grad_norm": 0.5664315223693848, "learning_rate": 0.000281624649859944, "loss": 0.4144, "step": 25759 }, { "epoch": 14.391061452513966, "grad_norm": 0.4608016908168793, "learning_rate": 0.0002815966386554622, "loss": 0.3555, "step": 25760 }, { "epoch": 14.391620111731843, "grad_norm": 1.5384972095489502, "learning_rate": 0.0002815686274509804, "loss": 0.462, "step": 25761 }, { "epoch": 14.392178770949721, "grad_norm": 0.4720909595489502, "learning_rate": 0.00028154061624649865, "loss": 0.4471, "step": 25762 }, { "epoch": 14.392737430167598, "grad_norm": 0.36641809344291687, "learning_rate": 0.0002815126050420168, "loss": 0.2873, "step": 25763 }, { "epoch": 14.393296089385474, "grad_norm": 0.4277470111846924, "learning_rate": 0.000281484593837535, "loss": 0.351, "step": 25764 }, { "epoch": 14.393854748603353, "grad_norm": 0.5178866982460022, "learning_rate": 0.00028145658263305326, "loss": 0.4064, "step": 25765 }, { "epoch": 14.39441340782123, "grad_norm": 0.532721996307373, "learning_rate": 0.0002814285714285714, "loss": 0.4609, "step": 25766 }, { "epoch": 14.394972067039106, "grad_norm": 0.3775053024291992, "learning_rate": 0.0002814005602240897, "loss": 0.3408, "step": 25767 }, { "epoch": 14.395530726256982, "grad_norm": 0.48296311497688293, "learning_rate": 0.00028137254901960783, "loss": 0.513, "step": 25768 }, { "epoch": 14.39608938547486, "grad_norm": 0.6062502264976501, "learning_rate": 0.00028134453781512603, "loss": 0.5569, "step": 25769 }, { "epoch": 14.396648044692737, "grad_norm": 1.783350944519043, "learning_rate": 0.0002813165266106443, "loss": 0.4486, "step": 25770 }, { "epoch": 14.397206703910614, "grad_norm": 2.1369588375091553, "learning_rate": 0.00028128851540616245, "loss": 0.4258, "step": 25771 }, { "epoch": 14.397765363128492, "grad_norm": 0.665153980255127, "learning_rate": 0.0002812605042016807, "loss": 0.3787, "step": 25772 }, { "epoch": 14.398324022346369, "grad_norm": 0.6627637147903442, "learning_rate": 0.0002812324929971989, "loss": 0.5166, "step": 25773 }, { "epoch": 14.398882681564245, "grad_norm": 0.4719976484775543, "learning_rate": 0.00028120448179271706, "loss": 0.4381, "step": 25774 }, { "epoch": 14.399441340782122, "grad_norm": 0.40011176466941833, "learning_rate": 0.0002811764705882353, "loss": 0.4025, "step": 25775 }, { "epoch": 14.4, "grad_norm": 0.3883362412452698, "learning_rate": 0.0002811484593837535, "loss": 0.4517, "step": 25776 }, { "epoch": 14.400558659217877, "grad_norm": 0.5507895946502686, "learning_rate": 0.00028112044817927174, "loss": 0.3458, "step": 25777 }, { "epoch": 14.401117318435753, "grad_norm": 0.39327993988990784, "learning_rate": 0.00028109243697478994, "loss": 0.4834, "step": 25778 }, { "epoch": 14.401675977653632, "grad_norm": 0.4275578558444977, "learning_rate": 0.0002810644257703081, "loss": 0.3585, "step": 25779 }, { "epoch": 14.402234636871508, "grad_norm": 0.7927836179733276, "learning_rate": 0.00028103641456582635, "loss": 0.4295, "step": 25780 }, { "epoch": 14.402793296089385, "grad_norm": 0.6239094138145447, "learning_rate": 0.00028100840336134456, "loss": 0.2793, "step": 25781 }, { "epoch": 14.403351955307263, "grad_norm": 0.5948808789253235, "learning_rate": 0.00028098039215686277, "loss": 0.6748, "step": 25782 }, { "epoch": 14.40391061452514, "grad_norm": 0.579407274723053, "learning_rate": 0.00028095238095238097, "loss": 0.4861, "step": 25783 }, { "epoch": 14.404469273743016, "grad_norm": 0.4269624650478363, "learning_rate": 0.0002809243697478991, "loss": 0.4071, "step": 25784 }, { "epoch": 14.405027932960893, "grad_norm": 0.4624919295310974, "learning_rate": 0.0002808963585434174, "loss": 0.4408, "step": 25785 }, { "epoch": 14.405586592178771, "grad_norm": 0.3998176157474518, "learning_rate": 0.0002808683473389356, "loss": 0.3213, "step": 25786 }, { "epoch": 14.406145251396648, "grad_norm": 0.5063607692718506, "learning_rate": 0.0002808403361344538, "loss": 0.3633, "step": 25787 }, { "epoch": 14.406703910614524, "grad_norm": 2.788221836090088, "learning_rate": 0.000280812324929972, "loss": 0.3837, "step": 25788 }, { "epoch": 14.407262569832403, "grad_norm": 0.8124858140945435, "learning_rate": 0.0002807843137254902, "loss": 0.559, "step": 25789 }, { "epoch": 14.40782122905028, "grad_norm": 0.4693353772163391, "learning_rate": 0.0002807563025210084, "loss": 0.4196, "step": 25790 }, { "epoch": 14.408379888268156, "grad_norm": 1.7660160064697266, "learning_rate": 0.0002807282913165266, "loss": 0.3712, "step": 25791 }, { "epoch": 14.408938547486034, "grad_norm": 0.8365939259529114, "learning_rate": 0.0002807002801120448, "loss": 0.5345, "step": 25792 }, { "epoch": 14.40949720670391, "grad_norm": 0.37916257977485657, "learning_rate": 0.00028067226890756303, "loss": 0.3536, "step": 25793 }, { "epoch": 14.410055865921787, "grad_norm": 0.5092312693595886, "learning_rate": 0.00028064425770308124, "loss": 0.4232, "step": 25794 }, { "epoch": 14.410614525139664, "grad_norm": 0.6536540389060974, "learning_rate": 0.00028061624649859944, "loss": 0.358, "step": 25795 }, { "epoch": 14.411173184357542, "grad_norm": 4.4830546379089355, "learning_rate": 0.00028058823529411765, "loss": 0.3305, "step": 25796 }, { "epoch": 14.411731843575419, "grad_norm": 0.4286189377307892, "learning_rate": 0.0002805602240896359, "loss": 0.3636, "step": 25797 }, { "epoch": 14.412290502793295, "grad_norm": 0.8312407732009888, "learning_rate": 0.00028053221288515406, "loss": 0.4554, "step": 25798 }, { "epoch": 14.412849162011174, "grad_norm": 0.5654194951057434, "learning_rate": 0.00028050420168067227, "loss": 0.4181, "step": 25799 }, { "epoch": 14.41340782122905, "grad_norm": 0.5863842964172363, "learning_rate": 0.0002804761904761905, "loss": 0.4628, "step": 25800 }, { "epoch": 14.413966480446927, "grad_norm": 0.3270134925842285, "learning_rate": 0.0002804481792717087, "loss": 0.2687, "step": 25801 }, { "epoch": 14.414525139664805, "grad_norm": 0.8150222897529602, "learning_rate": 0.00028042016806722694, "loss": 0.4096, "step": 25802 }, { "epoch": 14.415083798882682, "grad_norm": 0.44506585597991943, "learning_rate": 0.0002803921568627451, "loss": 0.5477, "step": 25803 }, { "epoch": 14.415642458100558, "grad_norm": 0.3880547881126404, "learning_rate": 0.0002803641456582633, "loss": 0.3942, "step": 25804 }, { "epoch": 14.416201117318435, "grad_norm": 0.5805116295814514, "learning_rate": 0.00028033613445378156, "loss": 0.5443, "step": 25805 }, { "epoch": 14.416759776536313, "grad_norm": 0.4237102270126343, "learning_rate": 0.0002803081232492997, "loss": 0.4433, "step": 25806 }, { "epoch": 14.41731843575419, "grad_norm": 0.5702316761016846, "learning_rate": 0.00028028011204481797, "loss": 0.4432, "step": 25807 }, { "epoch": 14.417877094972066, "grad_norm": 0.4755604565143585, "learning_rate": 0.0002802521008403361, "loss": 0.4003, "step": 25808 }, { "epoch": 14.418435754189945, "grad_norm": 0.41497841477394104, "learning_rate": 0.00028022408963585433, "loss": 0.4138, "step": 25809 }, { "epoch": 14.418994413407821, "grad_norm": 0.4244304299354553, "learning_rate": 0.0002801960784313726, "loss": 0.3975, "step": 25810 }, { "epoch": 14.419553072625698, "grad_norm": 0.4146798253059387, "learning_rate": 0.00028016806722689074, "loss": 0.3978, "step": 25811 }, { "epoch": 14.420111731843576, "grad_norm": 0.5190150737762451, "learning_rate": 0.000280140056022409, "loss": 0.4463, "step": 25812 }, { "epoch": 14.420670391061453, "grad_norm": 0.6149578094482422, "learning_rate": 0.0002801120448179272, "loss": 0.5793, "step": 25813 }, { "epoch": 14.42122905027933, "grad_norm": 0.6687585115432739, "learning_rate": 0.00028008403361344536, "loss": 0.3749, "step": 25814 }, { "epoch": 14.421787709497206, "grad_norm": 1.8690475225448608, "learning_rate": 0.0002800560224089636, "loss": 0.4547, "step": 25815 }, { "epoch": 14.422346368715084, "grad_norm": 0.42321157455444336, "learning_rate": 0.00028002801120448177, "loss": 0.4473, "step": 25816 }, { "epoch": 14.422905027932961, "grad_norm": 2.1452319622039795, "learning_rate": 0.00028000000000000003, "loss": 0.3988, "step": 25817 }, { "epoch": 14.423463687150837, "grad_norm": 0.5502938628196716, "learning_rate": 0.00027997198879551824, "loss": 0.4806, "step": 25818 }, { "epoch": 14.424022346368716, "grad_norm": 0.49775195121765137, "learning_rate": 0.0002799439775910364, "loss": 0.4999, "step": 25819 }, { "epoch": 14.424581005586592, "grad_norm": 0.4695853292942047, "learning_rate": 0.00027991596638655465, "loss": 0.3727, "step": 25820 }, { "epoch": 14.425139664804469, "grad_norm": 0.7304810881614685, "learning_rate": 0.00027988795518207285, "loss": 0.4343, "step": 25821 }, { "epoch": 14.425698324022346, "grad_norm": 1.0314608812332153, "learning_rate": 0.00027985994397759106, "loss": 0.3913, "step": 25822 }, { "epoch": 14.426256983240224, "grad_norm": 2.5969812870025635, "learning_rate": 0.00027983193277310927, "loss": 0.4345, "step": 25823 }, { "epoch": 14.4268156424581, "grad_norm": 0.6922586560249329, "learning_rate": 0.0002798039215686274, "loss": 0.3476, "step": 25824 }, { "epoch": 14.427374301675977, "grad_norm": 1.0971225500106812, "learning_rate": 0.0002797759103641457, "loss": 0.4155, "step": 25825 }, { "epoch": 14.427932960893855, "grad_norm": 0.6877304911613464, "learning_rate": 0.0002797478991596639, "loss": 0.5204, "step": 25826 }, { "epoch": 14.428491620111732, "grad_norm": 0.7078418731689453, "learning_rate": 0.0002797198879551821, "loss": 0.4386, "step": 25827 }, { "epoch": 14.429050279329608, "grad_norm": 1.0341423749923706, "learning_rate": 0.0002796918767507003, "loss": 0.5362, "step": 25828 }, { "epoch": 14.429608938547487, "grad_norm": 0.5058976411819458, "learning_rate": 0.0002796638655462185, "loss": 0.3344, "step": 25829 }, { "epoch": 14.430167597765363, "grad_norm": 1.0340579748153687, "learning_rate": 0.0002796358543417367, "loss": 0.385, "step": 25830 }, { "epoch": 14.43072625698324, "grad_norm": 0.5209304094314575, "learning_rate": 0.0002796078431372549, "loss": 0.474, "step": 25831 }, { "epoch": 14.431284916201117, "grad_norm": 0.6722506284713745, "learning_rate": 0.00027957983193277307, "loss": 0.5624, "step": 25832 }, { "epoch": 14.431843575418995, "grad_norm": 2.0200576782226562, "learning_rate": 0.0002795518207282913, "loss": 0.403, "step": 25833 }, { "epoch": 14.432402234636871, "grad_norm": 0.4758749306201935, "learning_rate": 0.00027952380952380953, "loss": 0.4527, "step": 25834 }, { "epoch": 14.432960893854748, "grad_norm": 1.201953411102295, "learning_rate": 0.00027949579831932774, "loss": 0.3817, "step": 25835 }, { "epoch": 14.433519553072626, "grad_norm": 0.4644990861415863, "learning_rate": 0.00027946778711484594, "loss": 0.4767, "step": 25836 }, { "epoch": 14.434078212290503, "grad_norm": 3.7290847301483154, "learning_rate": 0.00027943977591036415, "loss": 0.3681, "step": 25837 }, { "epoch": 14.43463687150838, "grad_norm": 0.7345566749572754, "learning_rate": 0.00027941176470588236, "loss": 0.4311, "step": 25838 }, { "epoch": 14.435195530726258, "grad_norm": 0.684956431388855, "learning_rate": 0.00027938375350140056, "loss": 0.3851, "step": 25839 }, { "epoch": 14.435754189944134, "grad_norm": 1.3335058689117432, "learning_rate": 0.00027935574229691877, "loss": 0.4385, "step": 25840 }, { "epoch": 14.436312849162011, "grad_norm": 0.5183255076408386, "learning_rate": 0.000279327731092437, "loss": 0.3662, "step": 25841 }, { "epoch": 14.436871508379888, "grad_norm": 0.40338483452796936, "learning_rate": 0.0002792997198879552, "loss": 0.4074, "step": 25842 }, { "epoch": 14.437430167597766, "grad_norm": 0.40220940113067627, "learning_rate": 0.0002792717086834734, "loss": 0.2954, "step": 25843 }, { "epoch": 14.437988826815642, "grad_norm": 0.47030654549598694, "learning_rate": 0.0002792436974789916, "loss": 0.3435, "step": 25844 }, { "epoch": 14.438547486033519, "grad_norm": 0.3183976411819458, "learning_rate": 0.00027921568627450985, "loss": 0.2667, "step": 25845 }, { "epoch": 14.439106145251397, "grad_norm": 0.5989043116569519, "learning_rate": 0.000279187675070028, "loss": 0.3914, "step": 25846 }, { "epoch": 14.439664804469274, "grad_norm": 0.3644499182701111, "learning_rate": 0.0002791596638655462, "loss": 0.4763, "step": 25847 }, { "epoch": 14.44022346368715, "grad_norm": 0.5210332870483398, "learning_rate": 0.0002791316526610644, "loss": 0.3578, "step": 25848 }, { "epoch": 14.440782122905027, "grad_norm": 0.5334910750389099, "learning_rate": 0.0002791036414565826, "loss": 0.5267, "step": 25849 }, { "epoch": 14.441340782122905, "grad_norm": 0.6800428628921509, "learning_rate": 0.0002790756302521009, "loss": 0.4044, "step": 25850 }, { "epoch": 14.441899441340782, "grad_norm": 0.4861067235469818, "learning_rate": 0.00027904761904761903, "loss": 0.4885, "step": 25851 }, { "epoch": 14.442458100558659, "grad_norm": 0.7069694995880127, "learning_rate": 0.00027901960784313724, "loss": 0.3833, "step": 25852 }, { "epoch": 14.443016759776537, "grad_norm": 0.5879365801811218, "learning_rate": 0.0002789915966386555, "loss": 0.42, "step": 25853 }, { "epoch": 14.443575418994413, "grad_norm": 1.8649920225143433, "learning_rate": 0.00027896358543417365, "loss": 0.4743, "step": 25854 }, { "epoch": 14.44413407821229, "grad_norm": 0.3928588032722473, "learning_rate": 0.0002789355742296919, "loss": 0.4167, "step": 25855 }, { "epoch": 14.444692737430168, "grad_norm": 0.4184569716453552, "learning_rate": 0.00027890756302521006, "loss": 0.386, "step": 25856 }, { "epoch": 14.445251396648045, "grad_norm": 0.37697815895080566, "learning_rate": 0.00027887955182072827, "loss": 0.3515, "step": 25857 }, { "epoch": 14.445810055865921, "grad_norm": 0.4627286195755005, "learning_rate": 0.00027885154061624653, "loss": 0.3727, "step": 25858 }, { "epoch": 14.446368715083798, "grad_norm": 0.4251154363155365, "learning_rate": 0.0002788235294117647, "loss": 0.3987, "step": 25859 }, { "epoch": 14.446927374301676, "grad_norm": 0.44241055846214294, "learning_rate": 0.00027879551820728294, "loss": 0.4119, "step": 25860 }, { "epoch": 14.447486033519553, "grad_norm": 0.5731030702590942, "learning_rate": 0.00027876750700280115, "loss": 0.4532, "step": 25861 }, { "epoch": 14.44804469273743, "grad_norm": 0.31942126154899597, "learning_rate": 0.0002787394957983193, "loss": 0.2581, "step": 25862 }, { "epoch": 14.448603351955308, "grad_norm": 0.5858067870140076, "learning_rate": 0.00027871148459383756, "loss": 0.3868, "step": 25863 }, { "epoch": 14.449162011173184, "grad_norm": 0.5176591277122498, "learning_rate": 0.0002786834733893557, "loss": 0.6416, "step": 25864 }, { "epoch": 14.449720670391061, "grad_norm": 0.5952520370483398, "learning_rate": 0.00027865546218487397, "loss": 0.3012, "step": 25865 }, { "epoch": 14.45027932960894, "grad_norm": 0.4955611228942871, "learning_rate": 0.0002786274509803922, "loss": 0.3949, "step": 25866 }, { "epoch": 14.450837988826816, "grad_norm": 0.45805174112319946, "learning_rate": 0.00027859943977591033, "loss": 0.6283, "step": 25867 }, { "epoch": 14.451396648044692, "grad_norm": 2.1716721057891846, "learning_rate": 0.0002785714285714286, "loss": 0.4934, "step": 25868 }, { "epoch": 14.451955307262569, "grad_norm": 0.35805487632751465, "learning_rate": 0.0002785434173669468, "loss": 0.2824, "step": 25869 }, { "epoch": 14.452513966480447, "grad_norm": 0.5593825578689575, "learning_rate": 0.000278515406162465, "loss": 0.464, "step": 25870 }, { "epoch": 14.453072625698324, "grad_norm": 0.46954384446144104, "learning_rate": 0.0002784873949579832, "loss": 0.4405, "step": 25871 }, { "epoch": 14.4536312849162, "grad_norm": 2.8768081665039062, "learning_rate": 0.00027845938375350136, "loss": 0.4671, "step": 25872 }, { "epoch": 14.454189944134079, "grad_norm": 0.7915114164352417, "learning_rate": 0.0002784313725490196, "loss": 0.4529, "step": 25873 }, { "epoch": 14.454748603351955, "grad_norm": 0.6863232254981995, "learning_rate": 0.0002784033613445378, "loss": 0.408, "step": 25874 }, { "epoch": 14.455307262569832, "grad_norm": 0.47793740034103394, "learning_rate": 0.00027837535014005603, "loss": 0.3627, "step": 25875 }, { "epoch": 14.45586592178771, "grad_norm": 0.39980530738830566, "learning_rate": 0.00027834733893557424, "loss": 0.3887, "step": 25876 }, { "epoch": 14.456424581005587, "grad_norm": 1.248163104057312, "learning_rate": 0.00027831932773109244, "loss": 0.4542, "step": 25877 }, { "epoch": 14.456983240223463, "grad_norm": 0.34120243787765503, "learning_rate": 0.00027829131652661065, "loss": 0.4245, "step": 25878 }, { "epoch": 14.45754189944134, "grad_norm": 0.388263463973999, "learning_rate": 0.00027826330532212886, "loss": 0.454, "step": 25879 }, { "epoch": 14.458100558659218, "grad_norm": 0.42011892795562744, "learning_rate": 0.00027823529411764706, "loss": 0.3985, "step": 25880 }, { "epoch": 14.458659217877095, "grad_norm": 0.7509288787841797, "learning_rate": 0.00027820728291316527, "loss": 0.404, "step": 25881 }, { "epoch": 14.459217877094972, "grad_norm": 0.600434422492981, "learning_rate": 0.0002781792717086835, "loss": 0.427, "step": 25882 }, { "epoch": 14.45977653631285, "grad_norm": 0.6243824362754822, "learning_rate": 0.0002781512605042017, "loss": 0.3565, "step": 25883 }, { "epoch": 14.460335195530726, "grad_norm": 0.5877174735069275, "learning_rate": 0.0002781232492997199, "loss": 0.3793, "step": 25884 }, { "epoch": 14.460893854748603, "grad_norm": 0.6933143138885498, "learning_rate": 0.00027809523809523815, "loss": 0.3625, "step": 25885 }, { "epoch": 14.461452513966481, "grad_norm": 0.3809208869934082, "learning_rate": 0.0002780672268907563, "loss": 0.3219, "step": 25886 }, { "epoch": 14.462011173184358, "grad_norm": 0.4341868460178375, "learning_rate": 0.0002780392156862745, "loss": 0.4553, "step": 25887 }, { "epoch": 14.462569832402234, "grad_norm": 0.3498751223087311, "learning_rate": 0.0002780112044817927, "loss": 0.3918, "step": 25888 }, { "epoch": 14.463128491620111, "grad_norm": 0.5370792746543884, "learning_rate": 0.0002779831932773109, "loss": 0.4683, "step": 25889 }, { "epoch": 14.46368715083799, "grad_norm": 2.3911874294281006, "learning_rate": 0.0002779551820728292, "loss": 0.3675, "step": 25890 }, { "epoch": 14.464245810055866, "grad_norm": 0.4159843623638153, "learning_rate": 0.00027792717086834733, "loss": 0.518, "step": 25891 }, { "epoch": 14.464804469273743, "grad_norm": 0.42622682452201843, "learning_rate": 0.00027789915966386553, "loss": 0.3819, "step": 25892 }, { "epoch": 14.46536312849162, "grad_norm": 0.48557355999946594, "learning_rate": 0.0002778711484593838, "loss": 0.3972, "step": 25893 }, { "epoch": 14.465921787709497, "grad_norm": 0.6152389049530029, "learning_rate": 0.00027784313725490195, "loss": 0.3862, "step": 25894 }, { "epoch": 14.466480446927374, "grad_norm": 0.44748613238334656, "learning_rate": 0.0002778151260504202, "loss": 0.4744, "step": 25895 }, { "epoch": 14.46703910614525, "grad_norm": 0.6692421436309814, "learning_rate": 0.00027778711484593836, "loss": 0.4342, "step": 25896 }, { "epoch": 14.467597765363129, "grad_norm": 4.49300479888916, "learning_rate": 0.00027775910364145656, "loss": 0.3707, "step": 25897 }, { "epoch": 14.468156424581005, "grad_norm": 0.5097029805183411, "learning_rate": 0.0002777310924369748, "loss": 0.5343, "step": 25898 }, { "epoch": 14.468715083798882, "grad_norm": 0.55149245262146, "learning_rate": 0.000277703081232493, "loss": 0.4433, "step": 25899 }, { "epoch": 14.46927374301676, "grad_norm": 0.44171226024627686, "learning_rate": 0.00027767507002801124, "loss": 0.4507, "step": 25900 }, { "epoch": 14.469832402234637, "grad_norm": 0.4970145523548126, "learning_rate": 0.00027764705882352944, "loss": 0.4557, "step": 25901 }, { "epoch": 14.470391061452514, "grad_norm": 0.5052556991577148, "learning_rate": 0.0002776190476190476, "loss": 0.4334, "step": 25902 }, { "epoch": 14.470949720670392, "grad_norm": 1.618317723274231, "learning_rate": 0.00027759103641456585, "loss": 0.4407, "step": 25903 }, { "epoch": 14.471508379888268, "grad_norm": 0.34399062395095825, "learning_rate": 0.000277563025210084, "loss": 0.2987, "step": 25904 }, { "epoch": 14.472067039106145, "grad_norm": 0.36642301082611084, "learning_rate": 0.00027753501400560227, "loss": 0.3539, "step": 25905 }, { "epoch": 14.472625698324022, "grad_norm": 0.8578351736068726, "learning_rate": 0.00027750700280112047, "loss": 0.3539, "step": 25906 }, { "epoch": 14.4731843575419, "grad_norm": 0.413003146648407, "learning_rate": 0.0002774789915966386, "loss": 0.4346, "step": 25907 }, { "epoch": 14.473743016759776, "grad_norm": 0.6205025911331177, "learning_rate": 0.0002774509803921569, "loss": 0.4073, "step": 25908 }, { "epoch": 14.474301675977653, "grad_norm": 0.41772159934043884, "learning_rate": 0.0002774229691876751, "loss": 0.3453, "step": 25909 }, { "epoch": 14.474860335195531, "grad_norm": 1.549654483795166, "learning_rate": 0.0002773949579831933, "loss": 0.388, "step": 25910 }, { "epoch": 14.475418994413408, "grad_norm": 0.5830680727958679, "learning_rate": 0.0002773669467787115, "loss": 0.4695, "step": 25911 }, { "epoch": 14.475977653631285, "grad_norm": 0.4966842830181122, "learning_rate": 0.00027733893557422965, "loss": 0.5402, "step": 25912 }, { "epoch": 14.476536312849163, "grad_norm": 0.4180568754673004, "learning_rate": 0.0002773109243697479, "loss": 0.4398, "step": 25913 }, { "epoch": 14.47709497206704, "grad_norm": 3.233367919921875, "learning_rate": 0.0002772829131652661, "loss": 0.357, "step": 25914 }, { "epoch": 14.477653631284916, "grad_norm": 0.40725085139274597, "learning_rate": 0.0002772549019607843, "loss": 0.3687, "step": 25915 }, { "epoch": 14.478212290502793, "grad_norm": 0.5402584075927734, "learning_rate": 0.00027722689075630253, "loss": 0.3921, "step": 25916 }, { "epoch": 14.478770949720671, "grad_norm": 1.2117303609848022, "learning_rate": 0.00027719887955182074, "loss": 0.4623, "step": 25917 }, { "epoch": 14.479329608938547, "grad_norm": 0.4490962326526642, "learning_rate": 0.00027717086834733894, "loss": 0.4384, "step": 25918 }, { "epoch": 14.479888268156424, "grad_norm": 0.43865489959716797, "learning_rate": 0.00027714285714285715, "loss": 0.3899, "step": 25919 }, { "epoch": 14.480446927374302, "grad_norm": 0.44409558176994324, "learning_rate": 0.00027711484593837536, "loss": 0.4186, "step": 25920 }, { "epoch": 14.481005586592179, "grad_norm": 0.5111070275306702, "learning_rate": 0.00027708683473389356, "loss": 0.6634, "step": 25921 }, { "epoch": 14.481564245810056, "grad_norm": 0.4883441925048828, "learning_rate": 0.00027705882352941177, "loss": 0.4135, "step": 25922 }, { "epoch": 14.482122905027932, "grad_norm": 0.5916086435317993, "learning_rate": 0.00027703081232493, "loss": 0.3109, "step": 25923 }, { "epoch": 14.48268156424581, "grad_norm": 0.4902220666408539, "learning_rate": 0.0002770028011204482, "loss": 0.3081, "step": 25924 }, { "epoch": 14.483240223463687, "grad_norm": 0.3402598202228546, "learning_rate": 0.00027697478991596644, "loss": 0.3663, "step": 25925 }, { "epoch": 14.483798882681564, "grad_norm": 0.46187663078308105, "learning_rate": 0.0002769467787114846, "loss": 0.4908, "step": 25926 }, { "epoch": 14.484357541899442, "grad_norm": 1.052712321281433, "learning_rate": 0.0002769187675070028, "loss": 0.5474, "step": 25927 }, { "epoch": 14.484916201117318, "grad_norm": 0.6220926642417908, "learning_rate": 0.000276890756302521, "loss": 0.6341, "step": 25928 }, { "epoch": 14.485474860335195, "grad_norm": 1.2511452436447144, "learning_rate": 0.0002768627450980392, "loss": 0.4107, "step": 25929 }, { "epoch": 14.486033519553073, "grad_norm": 0.5480039715766907, "learning_rate": 0.00027683473389355747, "loss": 0.3444, "step": 25930 }, { "epoch": 14.48659217877095, "grad_norm": 0.8300253748893738, "learning_rate": 0.0002768067226890756, "loss": 0.3393, "step": 25931 }, { "epoch": 14.487150837988827, "grad_norm": 0.577490508556366, "learning_rate": 0.00027677871148459383, "loss": 0.5192, "step": 25932 }, { "epoch": 14.487709497206703, "grad_norm": 0.6029434204101562, "learning_rate": 0.0002767507002801121, "loss": 0.5011, "step": 25933 }, { "epoch": 14.488268156424581, "grad_norm": 0.44405749440193176, "learning_rate": 0.00027672268907563024, "loss": 0.4414, "step": 25934 }, { "epoch": 14.488826815642458, "grad_norm": 0.5729031562805176, "learning_rate": 0.0002766946778711485, "loss": 0.3957, "step": 25935 }, { "epoch": 14.489385474860335, "grad_norm": 1.158888339996338, "learning_rate": 0.00027666666666666665, "loss": 0.4868, "step": 25936 }, { "epoch": 14.489944134078213, "grad_norm": 0.42603081464767456, "learning_rate": 0.00027663865546218486, "loss": 0.4826, "step": 25937 }, { "epoch": 14.49050279329609, "grad_norm": 1.5587899684906006, "learning_rate": 0.0002766106442577031, "loss": 0.3082, "step": 25938 }, { "epoch": 14.491061452513966, "grad_norm": 0.7136504650115967, "learning_rate": 0.00027658263305322127, "loss": 0.3839, "step": 25939 }, { "epoch": 14.491620111731844, "grad_norm": 0.4167581796646118, "learning_rate": 0.0002765546218487395, "loss": 0.3553, "step": 25940 }, { "epoch": 14.492178770949721, "grad_norm": 1.5550014972686768, "learning_rate": 0.00027652661064425774, "loss": 0.416, "step": 25941 }, { "epoch": 14.492737430167598, "grad_norm": 0.4727814793586731, "learning_rate": 0.0002764985994397759, "loss": 0.4125, "step": 25942 }, { "epoch": 14.493296089385474, "grad_norm": 0.6300815939903259, "learning_rate": 0.00027647058823529415, "loss": 0.4168, "step": 25943 }, { "epoch": 14.493854748603352, "grad_norm": 0.38475531339645386, "learning_rate": 0.0002764425770308123, "loss": 0.4369, "step": 25944 }, { "epoch": 14.494413407821229, "grad_norm": 0.5899533033370972, "learning_rate": 0.0002764145658263305, "loss": 0.3714, "step": 25945 }, { "epoch": 14.494972067039106, "grad_norm": 1.1216338872909546, "learning_rate": 0.00027638655462184877, "loss": 0.4638, "step": 25946 }, { "epoch": 14.495530726256984, "grad_norm": 0.4806707799434662, "learning_rate": 0.0002763585434173669, "loss": 0.4203, "step": 25947 }, { "epoch": 14.49608938547486, "grad_norm": 0.4335712194442749, "learning_rate": 0.0002763305322128852, "loss": 0.4131, "step": 25948 }, { "epoch": 14.496648044692737, "grad_norm": 0.6214283108711243, "learning_rate": 0.0002763025210084034, "loss": 0.3542, "step": 25949 }, { "epoch": 14.497206703910614, "grad_norm": 0.4234263598918915, "learning_rate": 0.00027627450980392154, "loss": 0.4529, "step": 25950 }, { "epoch": 14.497765363128492, "grad_norm": 0.5320940613746643, "learning_rate": 0.0002762464985994398, "loss": 0.4522, "step": 25951 }, { "epoch": 14.498324022346369, "grad_norm": 0.44390252232551575, "learning_rate": 0.00027621848739495795, "loss": 0.4595, "step": 25952 }, { "epoch": 14.498882681564245, "grad_norm": 0.540688157081604, "learning_rate": 0.0002761904761904762, "loss": 0.3776, "step": 25953 }, { "epoch": 14.499441340782123, "grad_norm": 0.6436562538146973, "learning_rate": 0.0002761624649859944, "loss": 0.6551, "step": 25954 }, { "epoch": 14.5, "grad_norm": 1.3538720607757568, "learning_rate": 0.00027613445378151257, "loss": 0.3443, "step": 25955 }, { "epoch": 14.500558659217877, "grad_norm": 0.9569316506385803, "learning_rate": 0.0002761064425770308, "loss": 0.4618, "step": 25956 }, { "epoch": 14.501117318435755, "grad_norm": 0.5849122405052185, "learning_rate": 0.00027607843137254903, "loss": 0.3982, "step": 25957 }, { "epoch": 14.501675977653631, "grad_norm": 1.7833435535430908, "learning_rate": 0.00027605042016806724, "loss": 0.3608, "step": 25958 }, { "epoch": 14.502234636871508, "grad_norm": 0.9267893433570862, "learning_rate": 0.00027602240896358544, "loss": 0.4844, "step": 25959 }, { "epoch": 14.502793296089386, "grad_norm": 0.586439847946167, "learning_rate": 0.0002759943977591036, "loss": 0.6485, "step": 25960 }, { "epoch": 14.503351955307263, "grad_norm": 0.472647100687027, "learning_rate": 0.00027596638655462186, "loss": 0.4161, "step": 25961 }, { "epoch": 14.50391061452514, "grad_norm": 0.46137022972106934, "learning_rate": 0.00027593837535014006, "loss": 0.3481, "step": 25962 }, { "epoch": 14.504469273743016, "grad_norm": 0.4774845540523529, "learning_rate": 0.00027591036414565827, "loss": 0.374, "step": 25963 }, { "epoch": 14.505027932960894, "grad_norm": 0.7174018025398254, "learning_rate": 0.0002758823529411765, "loss": 0.3755, "step": 25964 }, { "epoch": 14.505586592178771, "grad_norm": 0.39748233556747437, "learning_rate": 0.0002758543417366947, "loss": 0.465, "step": 25965 }, { "epoch": 14.506145251396648, "grad_norm": 0.5003325939178467, "learning_rate": 0.0002758263305322129, "loss": 0.534, "step": 25966 }, { "epoch": 14.506703910614526, "grad_norm": 0.7247323989868164, "learning_rate": 0.0002757983193277311, "loss": 0.3158, "step": 25967 }, { "epoch": 14.507262569832402, "grad_norm": 0.40612685680389404, "learning_rate": 0.00027577030812324935, "loss": 0.4028, "step": 25968 }, { "epoch": 14.507821229050279, "grad_norm": 0.6557098627090454, "learning_rate": 0.0002757422969187675, "loss": 0.4125, "step": 25969 }, { "epoch": 14.508379888268156, "grad_norm": 0.7481595277786255, "learning_rate": 0.0002757142857142857, "loss": 0.2999, "step": 25970 }, { "epoch": 14.508938547486034, "grad_norm": 0.5338840484619141, "learning_rate": 0.0002756862745098039, "loss": 0.4391, "step": 25971 }, { "epoch": 14.50949720670391, "grad_norm": 0.4046436548233032, "learning_rate": 0.0002756582633053221, "loss": 0.4779, "step": 25972 }, { "epoch": 14.510055865921787, "grad_norm": 0.49413642287254333, "learning_rate": 0.0002756302521008404, "loss": 0.4386, "step": 25973 }, { "epoch": 14.510614525139665, "grad_norm": 1.3335566520690918, "learning_rate": 0.00027560224089635853, "loss": 0.3907, "step": 25974 }, { "epoch": 14.511173184357542, "grad_norm": 0.8605470657348633, "learning_rate": 0.00027557422969187674, "loss": 0.4689, "step": 25975 }, { "epoch": 14.511731843575419, "grad_norm": 0.4894128143787384, "learning_rate": 0.000275546218487395, "loss": 0.4374, "step": 25976 }, { "epoch": 14.512290502793297, "grad_norm": 0.4011082351207733, "learning_rate": 0.00027551820728291315, "loss": 0.4145, "step": 25977 }, { "epoch": 14.512849162011173, "grad_norm": 1.0278226137161255, "learning_rate": 0.0002754901960784314, "loss": 0.3024, "step": 25978 }, { "epoch": 14.51340782122905, "grad_norm": 0.36048823595046997, "learning_rate": 0.00027546218487394956, "loss": 0.4077, "step": 25979 }, { "epoch": 14.513966480446927, "grad_norm": 0.3824455440044403, "learning_rate": 0.00027543417366946777, "loss": 0.4161, "step": 25980 }, { "epoch": 14.514525139664805, "grad_norm": 1.2201869487762451, "learning_rate": 0.00027540616246498603, "loss": 0.3678, "step": 25981 }, { "epoch": 14.515083798882682, "grad_norm": 0.6133846044540405, "learning_rate": 0.0002753781512605042, "loss": 0.5822, "step": 25982 }, { "epoch": 14.515642458100558, "grad_norm": 3.5307374000549316, "learning_rate": 0.00027535014005602244, "loss": 0.4668, "step": 25983 }, { "epoch": 14.516201117318436, "grad_norm": 0.4548972249031067, "learning_rate": 0.00027532212885154065, "loss": 0.4188, "step": 25984 }, { "epoch": 14.516759776536313, "grad_norm": 0.5998306274414062, "learning_rate": 0.0002752941176470588, "loss": 0.573, "step": 25985 }, { "epoch": 14.51731843575419, "grad_norm": 0.5475751757621765, "learning_rate": 0.00027526610644257706, "loss": 0.3595, "step": 25986 }, { "epoch": 14.517877094972068, "grad_norm": 0.3263009190559387, "learning_rate": 0.0002752380952380952, "loss": 0.3734, "step": 25987 }, { "epoch": 14.518435754189944, "grad_norm": 0.6386430859565735, "learning_rate": 0.00027521008403361347, "loss": 0.4613, "step": 25988 }, { "epoch": 14.518994413407821, "grad_norm": 0.5398362874984741, "learning_rate": 0.0002751820728291317, "loss": 0.5471, "step": 25989 }, { "epoch": 14.519553072625698, "grad_norm": 0.3478747010231018, "learning_rate": 0.00027515406162464983, "loss": 0.3831, "step": 25990 }, { "epoch": 14.520111731843576, "grad_norm": 0.48594221472740173, "learning_rate": 0.0002751260504201681, "loss": 0.3831, "step": 25991 }, { "epoch": 14.520670391061453, "grad_norm": 0.31570881605148315, "learning_rate": 0.0002750980392156863, "loss": 0.3458, "step": 25992 }, { "epoch": 14.521229050279329, "grad_norm": 0.4659821391105652, "learning_rate": 0.0002750700280112045, "loss": 0.421, "step": 25993 }, { "epoch": 14.521787709497207, "grad_norm": 0.5973386168479919, "learning_rate": 0.0002750420168067227, "loss": 0.4089, "step": 25994 }, { "epoch": 14.522346368715084, "grad_norm": 0.5446675419807434, "learning_rate": 0.00027501400560224086, "loss": 0.4108, "step": 25995 }, { "epoch": 14.52290502793296, "grad_norm": 2.5869674682617188, "learning_rate": 0.0002749859943977591, "loss": 0.4668, "step": 25996 }, { "epoch": 14.523463687150837, "grad_norm": 0.5437316298484802, "learning_rate": 0.0002749579831932773, "loss": 0.3679, "step": 25997 }, { "epoch": 14.524022346368715, "grad_norm": 0.4237959682941437, "learning_rate": 0.00027492997198879553, "loss": 0.4254, "step": 25998 }, { "epoch": 14.524581005586592, "grad_norm": 0.47599485516548157, "learning_rate": 0.00027490196078431374, "loss": 0.3897, "step": 25999 }, { "epoch": 14.525139664804469, "grad_norm": 0.4000786244869232, "learning_rate": 0.00027487394957983194, "loss": 0.4103, "step": 26000 }, { "epoch": 14.525139664804469, "eval_cer": 0.08566549921988369, "eval_loss": 0.32592344284057617, "eval_runtime": 55.5069, "eval_samples_per_second": 81.756, "eval_steps_per_second": 5.116, "eval_wer": 0.33959945252925894, "step": 26000 }, { "epoch": 14.525698324022347, "grad_norm": 1.9771556854248047, "learning_rate": 0.00027484593837535015, "loss": 0.3924, "step": 26001 }, { "epoch": 14.526256983240224, "grad_norm": 0.522955596446991, "learning_rate": 0.00027481792717086836, "loss": 0.4107, "step": 26002 }, { "epoch": 14.5268156424581, "grad_norm": 0.7025735974311829, "learning_rate": 0.00027478991596638656, "loss": 0.3578, "step": 26003 }, { "epoch": 14.527374301675978, "grad_norm": 0.43271005153656006, "learning_rate": 0.00027476190476190477, "loss": 0.4055, "step": 26004 }, { "epoch": 14.527932960893855, "grad_norm": 0.5428447127342224, "learning_rate": 0.000274733893557423, "loss": 0.498, "step": 26005 }, { "epoch": 14.528491620111732, "grad_norm": 0.5781406760215759, "learning_rate": 0.0002747058823529412, "loss": 0.3292, "step": 26006 }, { "epoch": 14.529050279329608, "grad_norm": 2.1886701583862305, "learning_rate": 0.0002746778711484594, "loss": 0.4734, "step": 26007 }, { "epoch": 14.529608938547486, "grad_norm": 0.742739200592041, "learning_rate": 0.00027464985994397765, "loss": 0.3742, "step": 26008 }, { "epoch": 14.530167597765363, "grad_norm": 1.6004784107208252, "learning_rate": 0.0002746218487394958, "loss": 0.4654, "step": 26009 }, { "epoch": 14.53072625698324, "grad_norm": 0.40614578127861023, "learning_rate": 0.000274593837535014, "loss": 0.4035, "step": 26010 }, { "epoch": 14.531284916201118, "grad_norm": 0.6169610023498535, "learning_rate": 0.0002745658263305322, "loss": 0.4775, "step": 26011 }, { "epoch": 14.531843575418995, "grad_norm": 0.6450771689414978, "learning_rate": 0.0002745378151260504, "loss": 0.4256, "step": 26012 }, { "epoch": 14.532402234636871, "grad_norm": 0.42523083090782166, "learning_rate": 0.0002745098039215687, "loss": 0.3226, "step": 26013 }, { "epoch": 14.53296089385475, "grad_norm": 0.6148983240127563, "learning_rate": 0.00027448179271708683, "loss": 0.4975, "step": 26014 }, { "epoch": 14.533519553072626, "grad_norm": 0.5217316150665283, "learning_rate": 0.00027445378151260503, "loss": 0.4175, "step": 26015 }, { "epoch": 14.534078212290503, "grad_norm": 0.7151603698730469, "learning_rate": 0.0002744257703081233, "loss": 0.4726, "step": 26016 }, { "epoch": 14.53463687150838, "grad_norm": 0.4330414831638336, "learning_rate": 0.00027439775910364145, "loss": 0.4007, "step": 26017 }, { "epoch": 14.535195530726257, "grad_norm": 0.38777121901512146, "learning_rate": 0.0002743697478991597, "loss": 0.4533, "step": 26018 }, { "epoch": 14.535754189944134, "grad_norm": 3.000777244567871, "learning_rate": 0.00027434173669467786, "loss": 0.4623, "step": 26019 }, { "epoch": 14.53631284916201, "grad_norm": 1.1478471755981445, "learning_rate": 0.00027431372549019606, "loss": 0.4353, "step": 26020 }, { "epoch": 14.536871508379889, "grad_norm": 1.5430406332015991, "learning_rate": 0.0002742857142857143, "loss": 0.3321, "step": 26021 }, { "epoch": 14.537430167597766, "grad_norm": 0.4634488523006439, "learning_rate": 0.0002742577030812325, "loss": 0.405, "step": 26022 }, { "epoch": 14.537988826815642, "grad_norm": 0.5155308246612549, "learning_rate": 0.00027422969187675074, "loss": 0.5002, "step": 26023 }, { "epoch": 14.538547486033519, "grad_norm": 0.4073270559310913, "learning_rate": 0.00027420168067226894, "loss": 0.3, "step": 26024 }, { "epoch": 14.539106145251397, "grad_norm": 0.6153872013092041, "learning_rate": 0.0002741736694677871, "loss": 0.3977, "step": 26025 }, { "epoch": 14.539664804469274, "grad_norm": 0.6380208730697632, "learning_rate": 0.00027414565826330535, "loss": 0.5049, "step": 26026 }, { "epoch": 14.54022346368715, "grad_norm": 0.41897475719451904, "learning_rate": 0.0002741176470588235, "loss": 0.3928, "step": 26027 }, { "epoch": 14.540782122905028, "grad_norm": 1.5104429721832275, "learning_rate": 0.00027408963585434177, "loss": 0.3214, "step": 26028 }, { "epoch": 14.541340782122905, "grad_norm": 1.6623996496200562, "learning_rate": 0.00027406162464985997, "loss": 0.4312, "step": 26029 }, { "epoch": 14.541899441340782, "grad_norm": 0.3700019419193268, "learning_rate": 0.0002740336134453781, "loss": 0.3656, "step": 26030 }, { "epoch": 14.54245810055866, "grad_norm": 0.4379046857357025, "learning_rate": 0.0002740056022408964, "loss": 0.3629, "step": 26031 }, { "epoch": 14.543016759776537, "grad_norm": 0.8033010363578796, "learning_rate": 0.0002739775910364146, "loss": 0.4527, "step": 26032 }, { "epoch": 14.543575418994413, "grad_norm": 0.7021380662918091, "learning_rate": 0.0002739495798319328, "loss": 0.3193, "step": 26033 }, { "epoch": 14.544134078212291, "grad_norm": 0.8823875784873962, "learning_rate": 0.000273921568627451, "loss": 0.3757, "step": 26034 }, { "epoch": 14.544692737430168, "grad_norm": 0.7063277959823608, "learning_rate": 0.00027389355742296915, "loss": 0.7309, "step": 26035 }, { "epoch": 14.545251396648045, "grad_norm": 0.49394771456718445, "learning_rate": 0.0002738655462184874, "loss": 0.5467, "step": 26036 }, { "epoch": 14.545810055865921, "grad_norm": 0.40190622210502625, "learning_rate": 0.0002738375350140056, "loss": 0.3621, "step": 26037 }, { "epoch": 14.5463687150838, "grad_norm": 0.7106718420982361, "learning_rate": 0.0002738095238095238, "loss": 0.4182, "step": 26038 }, { "epoch": 14.546927374301676, "grad_norm": 0.5196418166160583, "learning_rate": 0.00027378151260504203, "loss": 0.4571, "step": 26039 }, { "epoch": 14.547486033519553, "grad_norm": 0.3982641100883484, "learning_rate": 0.00027375350140056024, "loss": 0.3458, "step": 26040 }, { "epoch": 14.548044692737431, "grad_norm": 0.5754070281982422, "learning_rate": 0.00027372549019607844, "loss": 0.4574, "step": 26041 }, { "epoch": 14.548603351955308, "grad_norm": 0.5434890389442444, "learning_rate": 0.00027369747899159665, "loss": 0.3786, "step": 26042 }, { "epoch": 14.549162011173184, "grad_norm": 0.4313388764858246, "learning_rate": 0.00027366946778711486, "loss": 0.4345, "step": 26043 }, { "epoch": 14.54972067039106, "grad_norm": 0.48550620675086975, "learning_rate": 0.00027364145658263306, "loss": 0.503, "step": 26044 }, { "epoch": 14.550279329608939, "grad_norm": 0.40159523487091064, "learning_rate": 0.00027361344537815127, "loss": 0.4624, "step": 26045 }, { "epoch": 14.550837988826816, "grad_norm": 0.37571531534194946, "learning_rate": 0.0002735854341736695, "loss": 0.4472, "step": 26046 }, { "epoch": 14.551396648044692, "grad_norm": 0.5323110222816467, "learning_rate": 0.0002735574229691877, "loss": 0.3221, "step": 26047 }, { "epoch": 14.55195530726257, "grad_norm": 0.7403799891471863, "learning_rate": 0.00027352941176470594, "loss": 0.4074, "step": 26048 }, { "epoch": 14.552513966480447, "grad_norm": 0.32933682203292847, "learning_rate": 0.0002735014005602241, "loss": 0.3303, "step": 26049 }, { "epoch": 14.553072625698324, "grad_norm": 0.38775405287742615, "learning_rate": 0.0002734733893557423, "loss": 0.4085, "step": 26050 }, { "epoch": 14.553631284916202, "grad_norm": 0.83772212266922, "learning_rate": 0.0002734453781512605, "loss": 0.3432, "step": 26051 }, { "epoch": 14.554189944134079, "grad_norm": 0.8909516334533691, "learning_rate": 0.0002734173669467787, "loss": 0.5251, "step": 26052 }, { "epoch": 14.554748603351955, "grad_norm": 0.4273841977119446, "learning_rate": 0.0002733893557422969, "loss": 0.4467, "step": 26053 }, { "epoch": 14.555307262569832, "grad_norm": 0.46706312894821167, "learning_rate": 0.0002733613445378151, "loss": 0.3305, "step": 26054 }, { "epoch": 14.55586592178771, "grad_norm": 1.3802083730697632, "learning_rate": 0.00027333333333333333, "loss": 0.5234, "step": 26055 }, { "epoch": 14.556424581005587, "grad_norm": 0.5829143524169922, "learning_rate": 0.0002733053221288516, "loss": 0.5211, "step": 26056 }, { "epoch": 14.556983240223463, "grad_norm": 0.3526742160320282, "learning_rate": 0.00027327731092436974, "loss": 0.411, "step": 26057 }, { "epoch": 14.557541899441341, "grad_norm": 0.6135927438735962, "learning_rate": 0.00027324929971988795, "loss": 0.4243, "step": 26058 }, { "epoch": 14.558100558659218, "grad_norm": 0.4651064872741699, "learning_rate": 0.00027322128851540615, "loss": 0.492, "step": 26059 }, { "epoch": 14.558659217877095, "grad_norm": 0.39348316192626953, "learning_rate": 0.00027319327731092436, "loss": 0.4161, "step": 26060 }, { "epoch": 14.559217877094973, "grad_norm": 0.40608805418014526, "learning_rate": 0.0002731652661064426, "loss": 0.3723, "step": 26061 }, { "epoch": 14.55977653631285, "grad_norm": 0.8289069533348083, "learning_rate": 0.00027313725490196077, "loss": 0.4473, "step": 26062 }, { "epoch": 14.560335195530726, "grad_norm": 0.5005795359611511, "learning_rate": 0.000273109243697479, "loss": 0.4202, "step": 26063 }, { "epoch": 14.560893854748603, "grad_norm": 0.4409407377243042, "learning_rate": 0.00027308123249299724, "loss": 0.405, "step": 26064 }, { "epoch": 14.561452513966481, "grad_norm": 0.6019088625907898, "learning_rate": 0.0002730532212885154, "loss": 0.4844, "step": 26065 }, { "epoch": 14.562011173184358, "grad_norm": 0.537895679473877, "learning_rate": 0.00027302521008403365, "loss": 0.357, "step": 26066 }, { "epoch": 14.562569832402234, "grad_norm": 0.34745514392852783, "learning_rate": 0.0002729971988795518, "loss": 0.3399, "step": 26067 }, { "epoch": 14.563128491620112, "grad_norm": 0.9350487589836121, "learning_rate": 0.00027296918767507, "loss": 0.3352, "step": 26068 }, { "epoch": 14.563687150837989, "grad_norm": 0.6089915037155151, "learning_rate": 0.00027294117647058827, "loss": 0.4378, "step": 26069 }, { "epoch": 14.564245810055866, "grad_norm": 0.47654271125793457, "learning_rate": 0.0002729131652661064, "loss": 0.3825, "step": 26070 }, { "epoch": 14.564804469273742, "grad_norm": 0.52055424451828, "learning_rate": 0.0002728851540616247, "loss": 0.4719, "step": 26071 }, { "epoch": 14.56536312849162, "grad_norm": 1.17324697971344, "learning_rate": 0.0002728571428571429, "loss": 0.5105, "step": 26072 }, { "epoch": 14.565921787709497, "grad_norm": 0.3932095766067505, "learning_rate": 0.00027282913165266104, "loss": 0.445, "step": 26073 }, { "epoch": 14.566480446927374, "grad_norm": 0.6846091747283936, "learning_rate": 0.0002728011204481793, "loss": 0.419, "step": 26074 }, { "epoch": 14.567039106145252, "grad_norm": 0.37739360332489014, "learning_rate": 0.00027277310924369745, "loss": 0.4032, "step": 26075 }, { "epoch": 14.567597765363129, "grad_norm": 0.36128780245780945, "learning_rate": 0.0002727450980392157, "loss": 0.327, "step": 26076 }, { "epoch": 14.568156424581005, "grad_norm": 0.5941394567489624, "learning_rate": 0.0002727170868347339, "loss": 0.408, "step": 26077 }, { "epoch": 14.568715083798883, "grad_norm": 2.184309720993042, "learning_rate": 0.00027268907563025207, "loss": 0.4819, "step": 26078 }, { "epoch": 14.56927374301676, "grad_norm": 0.4476054012775421, "learning_rate": 0.0002726610644257703, "loss": 0.4246, "step": 26079 }, { "epoch": 14.569832402234637, "grad_norm": 0.5083453059196472, "learning_rate": 0.00027263305322128853, "loss": 0.3647, "step": 26080 }, { "epoch": 14.570391061452513, "grad_norm": 0.3089964985847473, "learning_rate": 0.00027260504201680674, "loss": 0.3666, "step": 26081 }, { "epoch": 14.570949720670392, "grad_norm": 1.3355246782302856, "learning_rate": 0.00027257703081232494, "loss": 0.4315, "step": 26082 }, { "epoch": 14.571508379888268, "grad_norm": 0.49300894141197205, "learning_rate": 0.0002725490196078431, "loss": 0.2956, "step": 26083 }, { "epoch": 14.572067039106145, "grad_norm": 0.3968302011489868, "learning_rate": 0.00027252100840336136, "loss": 0.2935, "step": 26084 }, { "epoch": 14.572625698324023, "grad_norm": 0.4763304591178894, "learning_rate": 0.00027249299719887956, "loss": 0.622, "step": 26085 }, { "epoch": 14.5731843575419, "grad_norm": 0.3786044418811798, "learning_rate": 0.00027246498599439777, "loss": 0.4332, "step": 26086 }, { "epoch": 14.573743016759776, "grad_norm": 0.46189144253730774, "learning_rate": 0.000272436974789916, "loss": 0.6368, "step": 26087 }, { "epoch": 14.574301675977654, "grad_norm": 0.7109042406082153, "learning_rate": 0.0002724089635854342, "loss": 0.4257, "step": 26088 }, { "epoch": 14.574860335195531, "grad_norm": 0.3672035336494446, "learning_rate": 0.0002723809523809524, "loss": 0.3468, "step": 26089 }, { "epoch": 14.575418994413408, "grad_norm": 0.3219582438468933, "learning_rate": 0.0002723529411764706, "loss": 0.4181, "step": 26090 }, { "epoch": 14.575977653631284, "grad_norm": 0.49108415842056274, "learning_rate": 0.0002723249299719888, "loss": 0.5212, "step": 26091 }, { "epoch": 14.576536312849163, "grad_norm": 0.6956894397735596, "learning_rate": 0.000272296918767507, "loss": 0.3895, "step": 26092 }, { "epoch": 14.577094972067039, "grad_norm": 0.5838638544082642, "learning_rate": 0.0002722689075630252, "loss": 0.6173, "step": 26093 }, { "epoch": 14.577653631284916, "grad_norm": 0.3746076226234436, "learning_rate": 0.0002722408963585434, "loss": 0.4911, "step": 26094 }, { "epoch": 14.578212290502794, "grad_norm": 1.9903069734573364, "learning_rate": 0.0002722128851540616, "loss": 0.4245, "step": 26095 }, { "epoch": 14.57877094972067, "grad_norm": 1.221889615058899, "learning_rate": 0.0002721848739495799, "loss": 0.5497, "step": 26096 }, { "epoch": 14.579329608938547, "grad_norm": 0.9008015990257263, "learning_rate": 0.00027215686274509803, "loss": 0.3349, "step": 26097 }, { "epoch": 14.579888268156424, "grad_norm": 0.5886290073394775, "learning_rate": 0.00027212885154061624, "loss": 0.3793, "step": 26098 }, { "epoch": 14.580446927374302, "grad_norm": 10.349678039550781, "learning_rate": 0.00027210084033613445, "loss": 0.3556, "step": 26099 }, { "epoch": 14.581005586592179, "grad_norm": 0.7075499296188354, "learning_rate": 0.00027207282913165265, "loss": 0.4967, "step": 26100 }, { "epoch": 14.581564245810055, "grad_norm": 0.659697949886322, "learning_rate": 0.0002720448179271709, "loss": 0.4812, "step": 26101 }, { "epoch": 14.582122905027934, "grad_norm": 0.8686216473579407, "learning_rate": 0.00027201680672268906, "loss": 0.4624, "step": 26102 }, { "epoch": 14.58268156424581, "grad_norm": 0.5101198554039001, "learning_rate": 0.00027198879551820727, "loss": 0.4314, "step": 26103 }, { "epoch": 14.583240223463687, "grad_norm": 5.199033260345459, "learning_rate": 0.00027196078431372553, "loss": 0.3412, "step": 26104 }, { "epoch": 14.583798882681565, "grad_norm": 0.5274847745895386, "learning_rate": 0.0002719327731092437, "loss": 0.3988, "step": 26105 }, { "epoch": 14.584357541899442, "grad_norm": 0.45590347051620483, "learning_rate": 0.00027190476190476194, "loss": 0.4705, "step": 26106 }, { "epoch": 14.584916201117318, "grad_norm": 1.5884829759597778, "learning_rate": 0.0002718767507002801, "loss": 0.3979, "step": 26107 }, { "epoch": 14.585474860335196, "grad_norm": 0.39603689312934875, "learning_rate": 0.0002718487394957983, "loss": 0.3367, "step": 26108 }, { "epoch": 14.586033519553073, "grad_norm": 0.552118718624115, "learning_rate": 0.00027182072829131656, "loss": 0.4641, "step": 26109 }, { "epoch": 14.58659217877095, "grad_norm": 0.9286280274391174, "learning_rate": 0.0002717927170868347, "loss": 0.4111, "step": 26110 }, { "epoch": 14.587150837988826, "grad_norm": 0.5307616591453552, "learning_rate": 0.00027176470588235297, "loss": 0.4479, "step": 26111 }, { "epoch": 14.587709497206705, "grad_norm": 0.38906583189964294, "learning_rate": 0.0002717366946778712, "loss": 0.3181, "step": 26112 }, { "epoch": 14.588268156424581, "grad_norm": 0.41692179441452026, "learning_rate": 0.00027170868347338933, "loss": 0.3959, "step": 26113 }, { "epoch": 14.588826815642458, "grad_norm": 0.45565518736839294, "learning_rate": 0.0002716806722689076, "loss": 0.5004, "step": 26114 }, { "epoch": 14.589385474860336, "grad_norm": 0.5528531074523926, "learning_rate": 0.00027165266106442574, "loss": 0.3311, "step": 26115 }, { "epoch": 14.589944134078213, "grad_norm": 0.35898053646087646, "learning_rate": 0.000271624649859944, "loss": 0.3291, "step": 26116 }, { "epoch": 14.59050279329609, "grad_norm": 0.4807465672492981, "learning_rate": 0.0002715966386554622, "loss": 0.4286, "step": 26117 }, { "epoch": 14.591061452513966, "grad_norm": 0.42465221881866455, "learning_rate": 0.00027156862745098036, "loss": 0.4198, "step": 26118 }, { "epoch": 14.591620111731844, "grad_norm": 0.41445204615592957, "learning_rate": 0.0002715406162464986, "loss": 0.3419, "step": 26119 }, { "epoch": 14.59217877094972, "grad_norm": 0.32837793231010437, "learning_rate": 0.0002715126050420168, "loss": 0.4119, "step": 26120 }, { "epoch": 14.592737430167597, "grad_norm": 0.3237442374229431, "learning_rate": 0.00027148459383753503, "loss": 0.3136, "step": 26121 }, { "epoch": 14.593296089385476, "grad_norm": 0.42453381419181824, "learning_rate": 0.00027145658263305324, "loss": 0.3574, "step": 26122 }, { "epoch": 14.593854748603352, "grad_norm": 1.4846601486206055, "learning_rate": 0.0002714285714285714, "loss": 0.3973, "step": 26123 }, { "epoch": 14.594413407821229, "grad_norm": 0.4344806671142578, "learning_rate": 0.00027140056022408965, "loss": 0.4062, "step": 26124 }, { "epoch": 14.594972067039105, "grad_norm": 0.38445907831192017, "learning_rate": 0.00027137254901960786, "loss": 0.3543, "step": 26125 }, { "epoch": 14.595530726256984, "grad_norm": 0.5752466320991516, "learning_rate": 0.00027134453781512606, "loss": 0.4638, "step": 26126 }, { "epoch": 14.59608938547486, "grad_norm": 0.8387674689292908, "learning_rate": 0.00027131652661064427, "loss": 0.3897, "step": 26127 }, { "epoch": 14.596648044692737, "grad_norm": 0.705547034740448, "learning_rate": 0.0002712885154061625, "loss": 0.3764, "step": 26128 }, { "epoch": 14.597206703910615, "grad_norm": 0.5294306874275208, "learning_rate": 0.0002712605042016807, "loss": 0.5623, "step": 26129 }, { "epoch": 14.597765363128492, "grad_norm": 0.3328593373298645, "learning_rate": 0.0002712324929971989, "loss": 0.3736, "step": 26130 }, { "epoch": 14.598324022346368, "grad_norm": 0.40915560722351074, "learning_rate": 0.0002712044817927171, "loss": 0.3548, "step": 26131 }, { "epoch": 14.598882681564247, "grad_norm": 0.4741741716861725, "learning_rate": 0.0002711764705882353, "loss": 0.4751, "step": 26132 }, { "epoch": 14.599441340782123, "grad_norm": 0.32831376791000366, "learning_rate": 0.0002711484593837535, "loss": 0.3642, "step": 26133 }, { "epoch": 14.6, "grad_norm": 0.49157190322875977, "learning_rate": 0.0002711204481792717, "loss": 0.5136, "step": 26134 }, { "epoch": 14.600558659217878, "grad_norm": 0.5309893488883972, "learning_rate": 0.0002710924369747899, "loss": 0.3769, "step": 26135 }, { "epoch": 14.601117318435755, "grad_norm": 3.6335575580596924, "learning_rate": 0.0002710644257703082, "loss": 0.5501, "step": 26136 }, { "epoch": 14.601675977653631, "grad_norm": 0.45638343691825867, "learning_rate": 0.00027103641456582633, "loss": 0.4078, "step": 26137 }, { "epoch": 14.602234636871508, "grad_norm": 1.5849069356918335, "learning_rate": 0.00027100840336134453, "loss": 0.3802, "step": 26138 }, { "epoch": 14.602793296089386, "grad_norm": 0.6146363019943237, "learning_rate": 0.00027098039215686274, "loss": 0.4778, "step": 26139 }, { "epoch": 14.603351955307263, "grad_norm": 0.6562778353691101, "learning_rate": 0.00027095238095238095, "loss": 0.4588, "step": 26140 }, { "epoch": 14.60391061452514, "grad_norm": 0.5333754420280457, "learning_rate": 0.0002709243697478992, "loss": 0.4616, "step": 26141 }, { "epoch": 14.604469273743018, "grad_norm": 0.40937182307243347, "learning_rate": 0.00027089635854341736, "loss": 0.3876, "step": 26142 }, { "epoch": 14.605027932960894, "grad_norm": 0.37132003903388977, "learning_rate": 0.00027086834733893556, "loss": 0.3164, "step": 26143 }, { "epoch": 14.60558659217877, "grad_norm": 0.45623156428337097, "learning_rate": 0.0002708403361344538, "loss": 0.4682, "step": 26144 }, { "epoch": 14.606145251396647, "grad_norm": 0.3326408267021179, "learning_rate": 0.000270812324929972, "loss": 0.4154, "step": 26145 }, { "epoch": 14.606703910614526, "grad_norm": 0.4545988440513611, "learning_rate": 0.00027078431372549024, "loss": 0.4711, "step": 26146 }, { "epoch": 14.607262569832402, "grad_norm": 0.4900790750980377, "learning_rate": 0.0002707563025210084, "loss": 0.5832, "step": 26147 }, { "epoch": 14.607821229050279, "grad_norm": 0.7301142811775208, "learning_rate": 0.0002707282913165266, "loss": 0.3963, "step": 26148 }, { "epoch": 14.608379888268157, "grad_norm": 0.3953668177127838, "learning_rate": 0.00027070028011204485, "loss": 0.4456, "step": 26149 }, { "epoch": 14.608938547486034, "grad_norm": 0.5311002731323242, "learning_rate": 0.000270672268907563, "loss": 0.3338, "step": 26150 }, { "epoch": 14.60949720670391, "grad_norm": 0.40936607122421265, "learning_rate": 0.00027064425770308127, "loss": 0.3963, "step": 26151 }, { "epoch": 14.610055865921789, "grad_norm": 42.05873107910156, "learning_rate": 0.00027061624649859947, "loss": 0.4933, "step": 26152 }, { "epoch": 14.610614525139665, "grad_norm": 0.7966561913490295, "learning_rate": 0.0002705882352941176, "loss": 0.4037, "step": 26153 }, { "epoch": 14.611173184357542, "grad_norm": 0.49473223090171814, "learning_rate": 0.0002705602240896359, "loss": 0.4048, "step": 26154 }, { "epoch": 14.611731843575418, "grad_norm": 0.5742973685264587, "learning_rate": 0.00027053221288515404, "loss": 0.5214, "step": 26155 }, { "epoch": 14.612290502793297, "grad_norm": 0.4434870183467865, "learning_rate": 0.0002705042016806723, "loss": 0.3567, "step": 26156 }, { "epoch": 14.612849162011173, "grad_norm": 0.6392762064933777, "learning_rate": 0.0002704761904761905, "loss": 0.4417, "step": 26157 }, { "epoch": 14.61340782122905, "grad_norm": 0.4371165335178375, "learning_rate": 0.00027044817927170865, "loss": 0.398, "step": 26158 }, { "epoch": 14.613966480446928, "grad_norm": 0.40892982482910156, "learning_rate": 0.0002704201680672269, "loss": 0.4626, "step": 26159 }, { "epoch": 14.614525139664805, "grad_norm": 0.3397248387336731, "learning_rate": 0.0002703921568627451, "loss": 0.3824, "step": 26160 }, { "epoch": 14.615083798882681, "grad_norm": 0.3726533353328705, "learning_rate": 0.0002703641456582633, "loss": 0.3862, "step": 26161 }, { "epoch": 14.61564245810056, "grad_norm": 0.3600512146949768, "learning_rate": 0.00027033613445378153, "loss": 0.3747, "step": 26162 }, { "epoch": 14.616201117318436, "grad_norm": 0.6502101421356201, "learning_rate": 0.0002703081232492997, "loss": 0.516, "step": 26163 }, { "epoch": 14.616759776536313, "grad_norm": 0.40174487233161926, "learning_rate": 0.00027028011204481794, "loss": 0.4119, "step": 26164 }, { "epoch": 14.61731843575419, "grad_norm": 0.8021479249000549, "learning_rate": 0.00027025210084033615, "loss": 0.4718, "step": 26165 }, { "epoch": 14.617877094972068, "grad_norm": 0.8731991648674011, "learning_rate": 0.0002702240896358543, "loss": 0.5393, "step": 26166 }, { "epoch": 14.618435754189944, "grad_norm": 0.4460891783237457, "learning_rate": 0.00027019607843137256, "loss": 0.2959, "step": 26167 }, { "epoch": 14.61899441340782, "grad_norm": 0.4560711085796356, "learning_rate": 0.00027016806722689077, "loss": 0.3374, "step": 26168 }, { "epoch": 14.619553072625699, "grad_norm": 0.45966285467147827, "learning_rate": 0.000270140056022409, "loss": 0.4145, "step": 26169 }, { "epoch": 14.620111731843576, "grad_norm": 0.5002447962760925, "learning_rate": 0.0002701120448179272, "loss": 0.3981, "step": 26170 }, { "epoch": 14.620670391061452, "grad_norm": 0.4015636146068573, "learning_rate": 0.00027008403361344533, "loss": 0.4184, "step": 26171 }, { "epoch": 14.621229050279329, "grad_norm": 0.6292645335197449, "learning_rate": 0.0002700560224089636, "loss": 0.4649, "step": 26172 }, { "epoch": 14.621787709497207, "grad_norm": 0.41048961877822876, "learning_rate": 0.0002700280112044818, "loss": 0.4324, "step": 26173 }, { "epoch": 14.622346368715084, "grad_norm": 0.5327526926994324, "learning_rate": 0.00027, "loss": 0.403, "step": 26174 }, { "epoch": 14.62290502793296, "grad_norm": 0.6859604716300964, "learning_rate": 0.0002699719887955182, "loss": 0.5822, "step": 26175 }, { "epoch": 14.623463687150839, "grad_norm": 1.352912187576294, "learning_rate": 0.0002699439775910364, "loss": 0.3686, "step": 26176 }, { "epoch": 14.624022346368715, "grad_norm": 0.5483441948890686, "learning_rate": 0.0002699159663865546, "loss": 0.3689, "step": 26177 }, { "epoch": 14.624581005586592, "grad_norm": 0.5091087222099304, "learning_rate": 0.00026988795518207283, "loss": 0.3912, "step": 26178 }, { "epoch": 14.62513966480447, "grad_norm": 0.5993852019309998, "learning_rate": 0.00026985994397759103, "loss": 0.3838, "step": 26179 }, { "epoch": 14.625698324022347, "grad_norm": 0.45645418763160706, "learning_rate": 0.00026983193277310924, "loss": 0.3715, "step": 26180 }, { "epoch": 14.626256983240223, "grad_norm": 0.4045490026473999, "learning_rate": 0.00026980392156862745, "loss": 0.484, "step": 26181 }, { "epoch": 14.6268156424581, "grad_norm": 0.3664165735244751, "learning_rate": 0.00026977591036414565, "loss": 0.4915, "step": 26182 }, { "epoch": 14.627374301675978, "grad_norm": 0.4844832122325897, "learning_rate": 0.00026974789915966386, "loss": 0.3243, "step": 26183 }, { "epoch": 14.627932960893855, "grad_norm": 0.34051236510276794, "learning_rate": 0.0002697198879551821, "loss": 0.4092, "step": 26184 }, { "epoch": 14.628491620111731, "grad_norm": 0.3885672986507416, "learning_rate": 0.00026969187675070027, "loss": 0.4048, "step": 26185 }, { "epoch": 14.62905027932961, "grad_norm": 1.0388611555099487, "learning_rate": 0.0002696638655462185, "loss": 0.4823, "step": 26186 }, { "epoch": 14.629608938547486, "grad_norm": 0.309492290019989, "learning_rate": 0.0002696358543417367, "loss": 0.3374, "step": 26187 }, { "epoch": 14.630167597765363, "grad_norm": 0.5820735692977905, "learning_rate": 0.0002696078431372549, "loss": 0.4226, "step": 26188 }, { "epoch": 14.630726256983241, "grad_norm": 0.557466447353363, "learning_rate": 0.00026957983193277315, "loss": 0.3548, "step": 26189 }, { "epoch": 14.631284916201118, "grad_norm": 0.40924587845802307, "learning_rate": 0.0002695518207282913, "loss": 0.3518, "step": 26190 }, { "epoch": 14.631843575418994, "grad_norm": 0.499137818813324, "learning_rate": 0.0002695238095238095, "loss": 0.3912, "step": 26191 }, { "epoch": 14.63240223463687, "grad_norm": 0.3898703455924988, "learning_rate": 0.00026949579831932777, "loss": 0.3434, "step": 26192 }, { "epoch": 14.632960893854749, "grad_norm": 0.40127143263816833, "learning_rate": 0.0002694677871148459, "loss": 0.42, "step": 26193 }, { "epoch": 14.633519553072626, "grad_norm": 22.813966751098633, "learning_rate": 0.0002694397759103642, "loss": 0.3111, "step": 26194 }, { "epoch": 14.634078212290502, "grad_norm": 1.0975518226623535, "learning_rate": 0.00026941176470588233, "loss": 0.3402, "step": 26195 }, { "epoch": 14.63463687150838, "grad_norm": 0.4779471457004547, "learning_rate": 0.00026938375350140054, "loss": 0.4725, "step": 26196 }, { "epoch": 14.635195530726257, "grad_norm": 0.6070505976676941, "learning_rate": 0.0002693557422969188, "loss": 0.432, "step": 26197 }, { "epoch": 14.635754189944134, "grad_norm": 0.5175840854644775, "learning_rate": 0.00026932773109243695, "loss": 0.5037, "step": 26198 }, { "epoch": 14.63631284916201, "grad_norm": 0.5430041551589966, "learning_rate": 0.0002692997198879552, "loss": 0.3568, "step": 26199 }, { "epoch": 14.636871508379889, "grad_norm": 1.4578442573547363, "learning_rate": 0.0002692717086834734, "loss": 0.4893, "step": 26200 }, { "epoch": 14.637430167597765, "grad_norm": 0.5916971564292908, "learning_rate": 0.00026924369747899157, "loss": 0.3631, "step": 26201 }, { "epoch": 14.637988826815642, "grad_norm": 1.3853681087493896, "learning_rate": 0.0002692156862745098, "loss": 0.5891, "step": 26202 }, { "epoch": 14.63854748603352, "grad_norm": 0.3974631726741791, "learning_rate": 0.000269187675070028, "loss": 0.3572, "step": 26203 }, { "epoch": 14.639106145251397, "grad_norm": 0.6649497151374817, "learning_rate": 0.00026915966386554624, "loss": 0.4976, "step": 26204 }, { "epoch": 14.639664804469273, "grad_norm": 0.4823826849460602, "learning_rate": 0.00026913165266106444, "loss": 0.437, "step": 26205 }, { "epoch": 14.640223463687152, "grad_norm": 0.668759286403656, "learning_rate": 0.0002691036414565826, "loss": 0.4794, "step": 26206 }, { "epoch": 14.640782122905028, "grad_norm": 1.0757184028625488, "learning_rate": 0.00026907563025210086, "loss": 0.3916, "step": 26207 }, { "epoch": 14.641340782122905, "grad_norm": 2.5164954662323, "learning_rate": 0.00026904761904761906, "loss": 0.5758, "step": 26208 }, { "epoch": 14.641899441340783, "grad_norm": 0.41021421551704407, "learning_rate": 0.00026901960784313727, "loss": 0.4502, "step": 26209 }, { "epoch": 14.64245810055866, "grad_norm": 2.5434138774871826, "learning_rate": 0.0002689915966386555, "loss": 0.4813, "step": 26210 }, { "epoch": 14.643016759776536, "grad_norm": 0.5545975565910339, "learning_rate": 0.0002689635854341736, "loss": 0.4348, "step": 26211 }, { "epoch": 14.643575418994413, "grad_norm": 1.3623853921890259, "learning_rate": 0.0002689355742296919, "loss": 0.3345, "step": 26212 }, { "epoch": 14.644134078212291, "grad_norm": 1.0093461275100708, "learning_rate": 0.0002689075630252101, "loss": 0.5207, "step": 26213 }, { "epoch": 14.644692737430168, "grad_norm": 0.3987479507923126, "learning_rate": 0.0002688795518207283, "loss": 0.492, "step": 26214 }, { "epoch": 14.645251396648044, "grad_norm": 0.4499443471431732, "learning_rate": 0.0002688515406162465, "loss": 0.4389, "step": 26215 }, { "epoch": 14.645810055865923, "grad_norm": 0.8201692700386047, "learning_rate": 0.0002688235294117647, "loss": 0.4016, "step": 26216 }, { "epoch": 14.6463687150838, "grad_norm": 0.6254597306251526, "learning_rate": 0.0002687955182072829, "loss": 0.4247, "step": 26217 }, { "epoch": 14.646927374301676, "grad_norm": 0.32390525937080383, "learning_rate": 0.0002687675070028011, "loss": 0.3831, "step": 26218 }, { "epoch": 14.647486033519552, "grad_norm": 0.4069139361381531, "learning_rate": 0.0002687394957983194, "loss": 0.4152, "step": 26219 }, { "epoch": 14.64804469273743, "grad_norm": 0.37570512294769287, "learning_rate": 0.00026871148459383753, "loss": 0.4136, "step": 26220 }, { "epoch": 14.648603351955307, "grad_norm": 0.6094589829444885, "learning_rate": 0.00026868347338935574, "loss": 0.5027, "step": 26221 }, { "epoch": 14.649162011173184, "grad_norm": 0.39261001348495483, "learning_rate": 0.00026865546218487395, "loss": 0.4587, "step": 26222 }, { "epoch": 14.649720670391062, "grad_norm": 0.5667718648910522, "learning_rate": 0.00026862745098039215, "loss": 0.5456, "step": 26223 }, { "epoch": 14.650279329608939, "grad_norm": 0.7404841780662537, "learning_rate": 0.0002685994397759104, "loss": 0.4915, "step": 26224 }, { "epoch": 14.650837988826815, "grad_norm": 0.6316376328468323, "learning_rate": 0.00026857142857142856, "loss": 0.4496, "step": 26225 }, { "epoch": 14.651396648044694, "grad_norm": 0.4138798713684082, "learning_rate": 0.00026854341736694677, "loss": 0.4422, "step": 26226 }, { "epoch": 14.65195530726257, "grad_norm": 0.5987324118614197, "learning_rate": 0.00026851540616246503, "loss": 0.3764, "step": 26227 }, { "epoch": 14.652513966480447, "grad_norm": 0.5430089831352234, "learning_rate": 0.0002684873949579832, "loss": 0.4924, "step": 26228 }, { "epoch": 14.653072625698323, "grad_norm": 0.37948867678642273, "learning_rate": 0.00026845938375350144, "loss": 0.3416, "step": 26229 }, { "epoch": 14.653631284916202, "grad_norm": 1.177739143371582, "learning_rate": 0.0002684313725490196, "loss": 0.4222, "step": 26230 }, { "epoch": 14.654189944134078, "grad_norm": 1.028996467590332, "learning_rate": 0.0002684033613445378, "loss": 0.403, "step": 26231 }, { "epoch": 14.654748603351955, "grad_norm": 0.3851401209831238, "learning_rate": 0.00026837535014005606, "loss": 0.3784, "step": 26232 }, { "epoch": 14.655307262569833, "grad_norm": 0.6418029069900513, "learning_rate": 0.0002683473389355742, "loss": 0.4123, "step": 26233 }, { "epoch": 14.65586592178771, "grad_norm": 0.49686112999916077, "learning_rate": 0.00026831932773109247, "loss": 0.6794, "step": 26234 }, { "epoch": 14.656424581005586, "grad_norm": 0.5954158902168274, "learning_rate": 0.0002682913165266107, "loss": 0.3666, "step": 26235 }, { "epoch": 14.656983240223465, "grad_norm": 0.5749161243438721, "learning_rate": 0.00026826330532212883, "loss": 0.4424, "step": 26236 }, { "epoch": 14.657541899441341, "grad_norm": 0.4638644754886627, "learning_rate": 0.0002682352941176471, "loss": 0.4241, "step": 26237 }, { "epoch": 14.658100558659218, "grad_norm": 0.3733034133911133, "learning_rate": 0.00026820728291316524, "loss": 0.3891, "step": 26238 }, { "epoch": 14.658659217877094, "grad_norm": 0.3820943832397461, "learning_rate": 0.0002681792717086835, "loss": 0.4636, "step": 26239 }, { "epoch": 14.659217877094973, "grad_norm": 0.47946685552597046, "learning_rate": 0.0002681512605042017, "loss": 0.4383, "step": 26240 }, { "epoch": 14.65977653631285, "grad_norm": 0.9269028306007385, "learning_rate": 0.00026812324929971986, "loss": 0.4399, "step": 26241 }, { "epoch": 14.660335195530726, "grad_norm": 0.5003195405006409, "learning_rate": 0.0002680952380952381, "loss": 0.375, "step": 26242 }, { "epoch": 14.660893854748604, "grad_norm": 0.5342738032341003, "learning_rate": 0.0002680672268907563, "loss": 0.6067, "step": 26243 }, { "epoch": 14.66145251396648, "grad_norm": 0.47403308749198914, "learning_rate": 0.00026803921568627453, "loss": 0.4541, "step": 26244 }, { "epoch": 14.662011173184357, "grad_norm": 0.6981449127197266, "learning_rate": 0.00026801120448179274, "loss": 0.4106, "step": 26245 }, { "epoch": 14.662569832402234, "grad_norm": 0.3924463093280792, "learning_rate": 0.0002679831932773109, "loss": 0.4244, "step": 26246 }, { "epoch": 14.663128491620112, "grad_norm": 0.5522960424423218, "learning_rate": 0.00026795518207282915, "loss": 0.4407, "step": 26247 }, { "epoch": 14.663687150837989, "grad_norm": 0.5107319355010986, "learning_rate": 0.00026792717086834736, "loss": 0.4159, "step": 26248 }, { "epoch": 14.664245810055865, "grad_norm": 4.181353569030762, "learning_rate": 0.00026789915966386556, "loss": 0.3792, "step": 26249 }, { "epoch": 14.664804469273744, "grad_norm": 0.6626802086830139, "learning_rate": 0.00026787114845938377, "loss": 0.4671, "step": 26250 }, { "epoch": 14.66536312849162, "grad_norm": 0.3738318085670471, "learning_rate": 0.000267843137254902, "loss": 0.4495, "step": 26251 }, { "epoch": 14.665921787709497, "grad_norm": 0.5367510914802551, "learning_rate": 0.0002678151260504202, "loss": 0.3019, "step": 26252 }, { "epoch": 14.666480446927375, "grad_norm": 0.5658826231956482, "learning_rate": 0.0002677871148459384, "loss": 0.5239, "step": 26253 }, { "epoch": 14.667039106145252, "grad_norm": 0.4039609730243683, "learning_rate": 0.0002677591036414566, "loss": 0.4327, "step": 26254 }, { "epoch": 14.667597765363128, "grad_norm": 0.42840760946273804, "learning_rate": 0.0002677310924369748, "loss": 0.4675, "step": 26255 }, { "epoch": 14.668156424581005, "grad_norm": 0.47770094871520996, "learning_rate": 0.000267703081232493, "loss": 0.5503, "step": 26256 }, { "epoch": 14.668715083798883, "grad_norm": 0.588168740272522, "learning_rate": 0.0002676750700280112, "loss": 0.437, "step": 26257 }, { "epoch": 14.66927374301676, "grad_norm": 0.5643516182899475, "learning_rate": 0.0002676470588235294, "loss": 0.4007, "step": 26258 }, { "epoch": 14.669832402234636, "grad_norm": 0.49603214859962463, "learning_rate": 0.0002676190476190477, "loss": 0.3827, "step": 26259 }, { "epoch": 14.670391061452515, "grad_norm": 0.47800755500793457, "learning_rate": 0.00026759103641456583, "loss": 0.3589, "step": 26260 }, { "epoch": 14.670949720670391, "grad_norm": 0.32600730657577515, "learning_rate": 0.00026756302521008403, "loss": 0.3427, "step": 26261 }, { "epoch": 14.671508379888268, "grad_norm": 0.5929835438728333, "learning_rate": 0.00026753501400560224, "loss": 0.4558, "step": 26262 }, { "epoch": 14.672067039106146, "grad_norm": 0.40991365909576416, "learning_rate": 0.00026750700280112045, "loss": 0.477, "step": 26263 }, { "epoch": 14.672625698324023, "grad_norm": 0.5074556469917297, "learning_rate": 0.0002674789915966387, "loss": 0.4633, "step": 26264 }, { "epoch": 14.6731843575419, "grad_norm": 0.5383959412574768, "learning_rate": 0.00026745098039215686, "loss": 0.4367, "step": 26265 }, { "epoch": 14.673743016759776, "grad_norm": 0.6705188155174255, "learning_rate": 0.00026742296918767506, "loss": 0.393, "step": 26266 }, { "epoch": 14.674301675977654, "grad_norm": 0.5704450607299805, "learning_rate": 0.0002673949579831933, "loss": 0.4979, "step": 26267 }, { "epoch": 14.67486033519553, "grad_norm": 0.381169855594635, "learning_rate": 0.0002673669467787115, "loss": 0.3592, "step": 26268 }, { "epoch": 14.675418994413407, "grad_norm": 0.4601171612739563, "learning_rate": 0.00026733893557422974, "loss": 0.4048, "step": 26269 }, { "epoch": 14.675977653631286, "grad_norm": 0.4392685890197754, "learning_rate": 0.0002673109243697479, "loss": 0.4004, "step": 26270 }, { "epoch": 14.676536312849162, "grad_norm": 0.32139235734939575, "learning_rate": 0.0002672829131652661, "loss": 0.3192, "step": 26271 }, { "epoch": 14.677094972067039, "grad_norm": 0.3459639847278595, "learning_rate": 0.00026725490196078435, "loss": 0.3346, "step": 26272 }, { "epoch": 14.677653631284915, "grad_norm": 0.37067097425460815, "learning_rate": 0.0002672268907563025, "loss": 0.3428, "step": 26273 }, { "epoch": 14.678212290502794, "grad_norm": 0.578516960144043, "learning_rate": 0.00026719887955182077, "loss": 0.4317, "step": 26274 }, { "epoch": 14.67877094972067, "grad_norm": 0.7438474893569946, "learning_rate": 0.00026717086834733897, "loss": 0.3312, "step": 26275 }, { "epoch": 14.679329608938547, "grad_norm": 0.857010543346405, "learning_rate": 0.0002671428571428571, "loss": 0.3472, "step": 26276 }, { "epoch": 14.679888268156425, "grad_norm": 0.4259175658226013, "learning_rate": 0.0002671148459383754, "loss": 0.4157, "step": 26277 }, { "epoch": 14.680446927374302, "grad_norm": 0.5690827369689941, "learning_rate": 0.00026708683473389354, "loss": 0.5219, "step": 26278 }, { "epoch": 14.681005586592178, "grad_norm": 0.3690986633300781, "learning_rate": 0.00026705882352941174, "loss": 0.3812, "step": 26279 }, { "epoch": 14.681564245810057, "grad_norm": 0.5454613566398621, "learning_rate": 0.00026703081232493, "loss": 0.4324, "step": 26280 }, { "epoch": 14.682122905027933, "grad_norm": 1.6613110303878784, "learning_rate": 0.00026700280112044815, "loss": 0.4891, "step": 26281 }, { "epoch": 14.68268156424581, "grad_norm": 0.501953125, "learning_rate": 0.0002669747899159664, "loss": 0.2629, "step": 26282 }, { "epoch": 14.683240223463688, "grad_norm": 0.483599454164505, "learning_rate": 0.0002669467787114846, "loss": 0.377, "step": 26283 }, { "epoch": 14.683798882681565, "grad_norm": 0.4072704017162323, "learning_rate": 0.00026691876750700277, "loss": 0.4607, "step": 26284 }, { "epoch": 14.684357541899441, "grad_norm": 0.5300388932228088, "learning_rate": 0.00026689075630252103, "loss": 0.4773, "step": 26285 }, { "epoch": 14.684916201117318, "grad_norm": 0.7170148491859436, "learning_rate": 0.0002668627450980392, "loss": 0.5422, "step": 26286 }, { "epoch": 14.685474860335196, "grad_norm": 0.45028436183929443, "learning_rate": 0.00026683473389355744, "loss": 0.308, "step": 26287 }, { "epoch": 14.686033519553073, "grad_norm": 2.491564989089966, "learning_rate": 0.00026680672268907565, "loss": 0.4194, "step": 26288 }, { "epoch": 14.68659217877095, "grad_norm": 1.1377471685409546, "learning_rate": 0.0002667787114845938, "loss": 0.3476, "step": 26289 }, { "epoch": 14.687150837988828, "grad_norm": 1.7199230194091797, "learning_rate": 0.00026675070028011206, "loss": 0.3383, "step": 26290 }, { "epoch": 14.687709497206704, "grad_norm": 0.4311559498310089, "learning_rate": 0.00026672268907563027, "loss": 0.4418, "step": 26291 }, { "epoch": 14.68826815642458, "grad_norm": 0.3717029094696045, "learning_rate": 0.0002666946778711485, "loss": 0.396, "step": 26292 }, { "epoch": 14.688826815642457, "grad_norm": 0.6051945686340332, "learning_rate": 0.0002666666666666667, "loss": 0.3519, "step": 26293 }, { "epoch": 14.689385474860336, "grad_norm": 0.4016841650009155, "learning_rate": 0.00026663865546218483, "loss": 0.4889, "step": 26294 }, { "epoch": 14.689944134078212, "grad_norm": 1.0330736637115479, "learning_rate": 0.0002666106442577031, "loss": 0.4406, "step": 26295 }, { "epoch": 14.690502793296089, "grad_norm": 1.0698660612106323, "learning_rate": 0.0002665826330532213, "loss": 0.4468, "step": 26296 }, { "epoch": 14.691061452513967, "grad_norm": 1.1493093967437744, "learning_rate": 0.0002665546218487395, "loss": 0.4364, "step": 26297 }, { "epoch": 14.691620111731844, "grad_norm": 0.29861214756965637, "learning_rate": 0.0002665266106442577, "loss": 0.2787, "step": 26298 }, { "epoch": 14.69217877094972, "grad_norm": 0.8863983154296875, "learning_rate": 0.0002664985994397759, "loss": 0.3846, "step": 26299 }, { "epoch": 14.692737430167599, "grad_norm": 0.6042300462722778, "learning_rate": 0.0002664705882352941, "loss": 0.3744, "step": 26300 }, { "epoch": 14.693296089385475, "grad_norm": 0.4075743556022644, "learning_rate": 0.00026644257703081233, "loss": 0.4425, "step": 26301 }, { "epoch": 14.693854748603352, "grad_norm": 0.4086613655090332, "learning_rate": 0.00026641456582633053, "loss": 0.4512, "step": 26302 }, { "epoch": 14.694413407821228, "grad_norm": 0.6771743297576904, "learning_rate": 0.00026638655462184874, "loss": 0.5386, "step": 26303 }, { "epoch": 14.694972067039107, "grad_norm": 0.6762493252754211, "learning_rate": 0.00026635854341736695, "loss": 0.3726, "step": 26304 }, { "epoch": 14.695530726256983, "grad_norm": 0.7674379944801331, "learning_rate": 0.00026633053221288515, "loss": 0.3424, "step": 26305 }, { "epoch": 14.69608938547486, "grad_norm": 0.4134715497493744, "learning_rate": 0.00026630252100840336, "loss": 0.4478, "step": 26306 }, { "epoch": 14.696648044692738, "grad_norm": 0.7435882091522217, "learning_rate": 0.0002662745098039216, "loss": 0.4822, "step": 26307 }, { "epoch": 14.697206703910615, "grad_norm": 0.5122054219245911, "learning_rate": 0.00026624649859943977, "loss": 0.4371, "step": 26308 }, { "epoch": 14.697765363128491, "grad_norm": 0.5374253988265991, "learning_rate": 0.000266218487394958, "loss": 0.5478, "step": 26309 }, { "epoch": 14.69832402234637, "grad_norm": 0.392660528421402, "learning_rate": 0.0002661904761904762, "loss": 0.4816, "step": 26310 }, { "epoch": 14.698882681564246, "grad_norm": 0.7365900874137878, "learning_rate": 0.0002661624649859944, "loss": 0.4212, "step": 26311 }, { "epoch": 14.699441340782123, "grad_norm": 1.0071672201156616, "learning_rate": 0.00026613445378151265, "loss": 0.3028, "step": 26312 }, { "epoch": 14.7, "grad_norm": 0.4007118344306946, "learning_rate": 0.0002661064425770308, "loss": 0.4014, "step": 26313 }, { "epoch": 14.700558659217878, "grad_norm": 0.37256067991256714, "learning_rate": 0.000266078431372549, "loss": 0.3697, "step": 26314 }, { "epoch": 14.701117318435754, "grad_norm": 0.4756200313568115, "learning_rate": 0.00026605042016806727, "loss": 0.4586, "step": 26315 }, { "epoch": 14.70167597765363, "grad_norm": 0.5360227227210999, "learning_rate": 0.0002660224089635854, "loss": 0.4186, "step": 26316 }, { "epoch": 14.702234636871509, "grad_norm": 0.46384015679359436, "learning_rate": 0.0002659943977591037, "loss": 0.3584, "step": 26317 }, { "epoch": 14.702793296089386, "grad_norm": 0.4285893142223358, "learning_rate": 0.00026596638655462183, "loss": 0.3876, "step": 26318 }, { "epoch": 14.703351955307262, "grad_norm": 0.46843311190605164, "learning_rate": 0.00026593837535014004, "loss": 0.386, "step": 26319 }, { "epoch": 14.703910614525139, "grad_norm": 0.34277692437171936, "learning_rate": 0.0002659103641456583, "loss": 0.3437, "step": 26320 }, { "epoch": 14.704469273743017, "grad_norm": 0.5339342355728149, "learning_rate": 0.00026588235294117645, "loss": 0.3317, "step": 26321 }, { "epoch": 14.705027932960894, "grad_norm": 0.8739034533500671, "learning_rate": 0.0002658543417366947, "loss": 0.4804, "step": 26322 }, { "epoch": 14.70558659217877, "grad_norm": 0.6812794804573059, "learning_rate": 0.0002658263305322129, "loss": 0.4258, "step": 26323 }, { "epoch": 14.706145251396649, "grad_norm": 0.3642677366733551, "learning_rate": 0.00026579831932773107, "loss": 0.5258, "step": 26324 }, { "epoch": 14.706703910614525, "grad_norm": 0.44004127383232117, "learning_rate": 0.0002657703081232493, "loss": 0.4812, "step": 26325 }, { "epoch": 14.707262569832402, "grad_norm": 0.9689213037490845, "learning_rate": 0.0002657422969187675, "loss": 0.4773, "step": 26326 }, { "epoch": 14.70782122905028, "grad_norm": 0.5342394709587097, "learning_rate": 0.00026571428571428574, "loss": 0.4071, "step": 26327 }, { "epoch": 14.708379888268157, "grad_norm": 0.37837132811546326, "learning_rate": 0.00026568627450980394, "loss": 0.3604, "step": 26328 }, { "epoch": 14.708938547486033, "grad_norm": 0.381149560213089, "learning_rate": 0.0002656582633053221, "loss": 0.3815, "step": 26329 }, { "epoch": 14.70949720670391, "grad_norm": 0.5717688202857971, "learning_rate": 0.00026563025210084036, "loss": 0.3629, "step": 26330 }, { "epoch": 14.710055865921788, "grad_norm": 23.346769332885742, "learning_rate": 0.00026560224089635856, "loss": 0.5919, "step": 26331 }, { "epoch": 14.710614525139665, "grad_norm": 0.48594433069229126, "learning_rate": 0.00026557422969187677, "loss": 0.4452, "step": 26332 }, { "epoch": 14.711173184357541, "grad_norm": 0.7142903208732605, "learning_rate": 0.000265546218487395, "loss": 0.4418, "step": 26333 }, { "epoch": 14.71173184357542, "grad_norm": 0.5551584959030151, "learning_rate": 0.0002655182072829131, "loss": 0.4243, "step": 26334 }, { "epoch": 14.712290502793296, "grad_norm": 0.41218405961990356, "learning_rate": 0.0002654901960784314, "loss": 0.469, "step": 26335 }, { "epoch": 14.712849162011173, "grad_norm": 0.7614871859550476, "learning_rate": 0.0002654621848739496, "loss": 0.3932, "step": 26336 }, { "epoch": 14.713407821229051, "grad_norm": 0.45670029520988464, "learning_rate": 0.0002654341736694678, "loss": 0.5043, "step": 26337 }, { "epoch": 14.713966480446928, "grad_norm": 0.4401882290840149, "learning_rate": 0.000265406162464986, "loss": 0.3796, "step": 26338 }, { "epoch": 14.714525139664804, "grad_norm": 0.6125187873840332, "learning_rate": 0.0002653781512605042, "loss": 0.3395, "step": 26339 }, { "epoch": 14.71508379888268, "grad_norm": 0.5906593203544617, "learning_rate": 0.0002653501400560224, "loss": 0.5487, "step": 26340 }, { "epoch": 14.71564245810056, "grad_norm": 0.7808915376663208, "learning_rate": 0.0002653221288515406, "loss": 0.4719, "step": 26341 }, { "epoch": 14.716201117318436, "grad_norm": 0.3673170506954193, "learning_rate": 0.00026529411764705883, "loss": 0.399, "step": 26342 }, { "epoch": 14.716759776536312, "grad_norm": 0.5387362241744995, "learning_rate": 0.00026526610644257703, "loss": 0.4279, "step": 26343 }, { "epoch": 14.71731843575419, "grad_norm": 0.518286943435669, "learning_rate": 0.00026523809523809524, "loss": 0.4499, "step": 26344 }, { "epoch": 14.717877094972067, "grad_norm": 1.57997727394104, "learning_rate": 0.00026521008403361345, "loss": 0.3926, "step": 26345 }, { "epoch": 14.718435754189944, "grad_norm": 0.5612686276435852, "learning_rate": 0.00026518207282913165, "loss": 0.3542, "step": 26346 }, { "epoch": 14.71899441340782, "grad_norm": 0.44491249322891235, "learning_rate": 0.0002651540616246499, "loss": 0.3814, "step": 26347 }, { "epoch": 14.719553072625699, "grad_norm": 0.4358351528644562, "learning_rate": 0.00026512605042016806, "loss": 0.4099, "step": 26348 }, { "epoch": 14.720111731843575, "grad_norm": 0.5698142051696777, "learning_rate": 0.00026509803921568627, "loss": 0.428, "step": 26349 }, { "epoch": 14.720670391061452, "grad_norm": 1.27790367603302, "learning_rate": 0.0002650700280112045, "loss": 0.3452, "step": 26350 }, { "epoch": 14.72122905027933, "grad_norm": 0.5082082748413086, "learning_rate": 0.0002650420168067227, "loss": 0.3512, "step": 26351 }, { "epoch": 14.721787709497207, "grad_norm": 0.6949784755706787, "learning_rate": 0.00026501400560224094, "loss": 0.4782, "step": 26352 }, { "epoch": 14.722346368715083, "grad_norm": 0.5166410803794861, "learning_rate": 0.0002649859943977591, "loss": 0.3365, "step": 26353 }, { "epoch": 14.722905027932962, "grad_norm": 0.3750188946723938, "learning_rate": 0.0002649579831932773, "loss": 0.3802, "step": 26354 }, { "epoch": 14.723463687150838, "grad_norm": 0.45565059781074524, "learning_rate": 0.00026492997198879556, "loss": 0.4176, "step": 26355 }, { "epoch": 14.724022346368715, "grad_norm": 3.277662992477417, "learning_rate": 0.0002649019607843137, "loss": 0.3548, "step": 26356 }, { "epoch": 14.724581005586593, "grad_norm": 0.9732744097709656, "learning_rate": 0.00026487394957983197, "loss": 0.4092, "step": 26357 }, { "epoch": 14.72513966480447, "grad_norm": 0.38876616954803467, "learning_rate": 0.0002648459383753501, "loss": 0.4006, "step": 26358 }, { "epoch": 14.725698324022346, "grad_norm": 0.5247053503990173, "learning_rate": 0.00026481792717086833, "loss": 0.4387, "step": 26359 }, { "epoch": 14.726256983240223, "grad_norm": 0.5945095419883728, "learning_rate": 0.0002647899159663866, "loss": 0.3847, "step": 26360 }, { "epoch": 14.726815642458101, "grad_norm": 0.5156911611557007, "learning_rate": 0.00026476190476190474, "loss": 0.3732, "step": 26361 }, { "epoch": 14.727374301675978, "grad_norm": 0.4860561490058899, "learning_rate": 0.000264733893557423, "loss": 0.5057, "step": 26362 }, { "epoch": 14.727932960893854, "grad_norm": 1.3079116344451904, "learning_rate": 0.0002647058823529412, "loss": 0.5015, "step": 26363 }, { "epoch": 14.728491620111733, "grad_norm": 0.38649171590805054, "learning_rate": 0.00026467787114845936, "loss": 0.404, "step": 26364 }, { "epoch": 14.72905027932961, "grad_norm": 0.42645263671875, "learning_rate": 0.0002646498599439776, "loss": 0.4418, "step": 26365 }, { "epoch": 14.729608938547486, "grad_norm": 0.42595547437667847, "learning_rate": 0.00026462184873949577, "loss": 0.3842, "step": 26366 }, { "epoch": 14.730167597765362, "grad_norm": 4.825133800506592, "learning_rate": 0.00026459383753501403, "loss": 0.5028, "step": 26367 }, { "epoch": 14.73072625698324, "grad_norm": 0.40822741389274597, "learning_rate": 0.00026456582633053224, "loss": 0.4193, "step": 26368 }, { "epoch": 14.731284916201117, "grad_norm": 0.7829367518424988, "learning_rate": 0.0002645378151260504, "loss": 0.5717, "step": 26369 }, { "epoch": 14.731843575418994, "grad_norm": 1.4182761907577515, "learning_rate": 0.00026450980392156865, "loss": 0.4274, "step": 26370 }, { "epoch": 14.732402234636872, "grad_norm": 2.1350560188293457, "learning_rate": 0.00026448179271708686, "loss": 0.3759, "step": 26371 }, { "epoch": 14.732960893854749, "grad_norm": 0.42354243993759155, "learning_rate": 0.00026445378151260506, "loss": 0.4954, "step": 26372 }, { "epoch": 14.733519553072625, "grad_norm": 0.35651227831840515, "learning_rate": 0.00026442577030812327, "loss": 0.3238, "step": 26373 }, { "epoch": 14.734078212290502, "grad_norm": 0.6394851803779602, "learning_rate": 0.0002643977591036414, "loss": 0.4724, "step": 26374 }, { "epoch": 14.73463687150838, "grad_norm": 0.38952717185020447, "learning_rate": 0.0002643697478991597, "loss": 0.4467, "step": 26375 }, { "epoch": 14.735195530726257, "grad_norm": 0.7485803365707397, "learning_rate": 0.0002643417366946779, "loss": 0.4046, "step": 26376 }, { "epoch": 14.735754189944133, "grad_norm": 0.5602471232414246, "learning_rate": 0.0002643137254901961, "loss": 0.3492, "step": 26377 }, { "epoch": 14.736312849162012, "grad_norm": 0.9912500381469727, "learning_rate": 0.0002642857142857143, "loss": 0.3639, "step": 26378 }, { "epoch": 14.736871508379888, "grad_norm": 0.6238000988960266, "learning_rate": 0.0002642577030812325, "loss": 0.3999, "step": 26379 }, { "epoch": 14.737430167597765, "grad_norm": 0.7098654508590698, "learning_rate": 0.0002642296918767507, "loss": 0.6118, "step": 26380 }, { "epoch": 14.737988826815643, "grad_norm": 0.38345661759376526, "learning_rate": 0.0002642016806722689, "loss": 0.3558, "step": 26381 }, { "epoch": 14.73854748603352, "grad_norm": 0.5556296706199646, "learning_rate": 0.0002641736694677871, "loss": 0.4296, "step": 26382 }, { "epoch": 14.739106145251396, "grad_norm": 0.5271759629249573, "learning_rate": 0.00026414565826330533, "loss": 0.4747, "step": 26383 }, { "epoch": 14.739664804469275, "grad_norm": 0.5730955600738525, "learning_rate": 0.00026411764705882353, "loss": 0.4096, "step": 26384 }, { "epoch": 14.740223463687151, "grad_norm": 0.5778507590293884, "learning_rate": 0.00026408963585434174, "loss": 0.4117, "step": 26385 }, { "epoch": 14.740782122905028, "grad_norm": 0.3964424729347229, "learning_rate": 0.00026406162464985995, "loss": 0.3721, "step": 26386 }, { "epoch": 14.741340782122904, "grad_norm": 0.7801333069801331, "learning_rate": 0.00026403361344537815, "loss": 0.4514, "step": 26387 }, { "epoch": 14.741899441340783, "grad_norm": 0.42704489827156067, "learning_rate": 0.00026400560224089636, "loss": 0.4384, "step": 26388 }, { "epoch": 14.74245810055866, "grad_norm": 0.6510639786720276, "learning_rate": 0.00026397759103641456, "loss": 0.5364, "step": 26389 }, { "epoch": 14.743016759776536, "grad_norm": 0.4674013555049896, "learning_rate": 0.00026394957983193277, "loss": 0.5052, "step": 26390 }, { "epoch": 14.743575418994414, "grad_norm": 0.3671068847179413, "learning_rate": 0.000263921568627451, "loss": 0.339, "step": 26391 }, { "epoch": 14.74413407821229, "grad_norm": 0.6780019402503967, "learning_rate": 0.0002638935574229692, "loss": 0.4556, "step": 26392 }, { "epoch": 14.744692737430167, "grad_norm": 1.0539917945861816, "learning_rate": 0.0002638655462184874, "loss": 0.5037, "step": 26393 }, { "epoch": 14.745251396648044, "grad_norm": 0.7187385559082031, "learning_rate": 0.0002638375350140056, "loss": 0.414, "step": 26394 }, { "epoch": 14.745810055865922, "grad_norm": 1.1761001348495483, "learning_rate": 0.00026380952380952385, "loss": 0.6764, "step": 26395 }, { "epoch": 14.746368715083799, "grad_norm": 1.4172708988189697, "learning_rate": 0.000263781512605042, "loss": 0.3297, "step": 26396 }, { "epoch": 14.746927374301675, "grad_norm": 1.9798983335494995, "learning_rate": 0.0002637535014005602, "loss": 0.4297, "step": 26397 }, { "epoch": 14.747486033519554, "grad_norm": 1.5195233821868896, "learning_rate": 0.0002637254901960784, "loss": 0.3061, "step": 26398 }, { "epoch": 14.74804469273743, "grad_norm": 0.36168909072875977, "learning_rate": 0.0002636974789915966, "loss": 0.396, "step": 26399 }, { "epoch": 14.748603351955307, "grad_norm": 0.5995067358016968, "learning_rate": 0.0002636694677871149, "loss": 0.6228, "step": 26400 }, { "epoch": 14.749162011173185, "grad_norm": 1.1359343528747559, "learning_rate": 0.00026364145658263304, "loss": 0.4636, "step": 26401 }, { "epoch": 14.749720670391062, "grad_norm": 0.4448784589767456, "learning_rate": 0.00026361344537815124, "loss": 0.3464, "step": 26402 }, { "epoch": 14.750279329608938, "grad_norm": 0.7094453573226929, "learning_rate": 0.0002635854341736695, "loss": 0.4707, "step": 26403 }, { "epoch": 14.750837988826815, "grad_norm": 0.4814882278442383, "learning_rate": 0.00026355742296918765, "loss": 0.3963, "step": 26404 }, { "epoch": 14.751396648044693, "grad_norm": 0.4036135971546173, "learning_rate": 0.0002635294117647059, "loss": 0.3986, "step": 26405 }, { "epoch": 14.75195530726257, "grad_norm": 3.5774288177490234, "learning_rate": 0.00026350140056022407, "loss": 0.4137, "step": 26406 }, { "epoch": 14.752513966480446, "grad_norm": 0.5589652061462402, "learning_rate": 0.00026347338935574227, "loss": 0.3292, "step": 26407 }, { "epoch": 14.753072625698325, "grad_norm": 0.382071316242218, "learning_rate": 0.00026344537815126053, "loss": 0.3506, "step": 26408 }, { "epoch": 14.753631284916201, "grad_norm": 0.3391832113265991, "learning_rate": 0.0002634173669467787, "loss": 0.4145, "step": 26409 }, { "epoch": 14.754189944134078, "grad_norm": 0.37645387649536133, "learning_rate": 0.00026338935574229694, "loss": 0.4505, "step": 26410 }, { "epoch": 14.754748603351956, "grad_norm": 0.4456816613674164, "learning_rate": 0.00026336134453781515, "loss": 0.4733, "step": 26411 }, { "epoch": 14.755307262569833, "grad_norm": 1.2704954147338867, "learning_rate": 0.0002633333333333333, "loss": 0.2975, "step": 26412 }, { "epoch": 14.75586592178771, "grad_norm": 0.5297966003417969, "learning_rate": 0.00026330532212885156, "loss": 0.6294, "step": 26413 }, { "epoch": 14.756424581005586, "grad_norm": 0.36453700065612793, "learning_rate": 0.0002632773109243697, "loss": 0.4206, "step": 26414 }, { "epoch": 14.756983240223464, "grad_norm": 0.45416614413261414, "learning_rate": 0.000263249299719888, "loss": 0.4805, "step": 26415 }, { "epoch": 14.75754189944134, "grad_norm": 0.8847922086715698, "learning_rate": 0.0002632212885154062, "loss": 0.4523, "step": 26416 }, { "epoch": 14.758100558659217, "grad_norm": 0.5305850505828857, "learning_rate": 0.00026319327731092433, "loss": 0.3817, "step": 26417 }, { "epoch": 14.758659217877096, "grad_norm": 0.7760154604911804, "learning_rate": 0.0002631652661064426, "loss": 0.4457, "step": 26418 }, { "epoch": 14.759217877094972, "grad_norm": 0.6772235631942749, "learning_rate": 0.0002631372549019608, "loss": 0.4001, "step": 26419 }, { "epoch": 14.759776536312849, "grad_norm": 0.6633051633834839, "learning_rate": 0.000263109243697479, "loss": 0.4248, "step": 26420 }, { "epoch": 14.760335195530725, "grad_norm": 0.2931738793849945, "learning_rate": 0.0002630812324929972, "loss": 0.297, "step": 26421 }, { "epoch": 14.760893854748604, "grad_norm": 0.41645562648773193, "learning_rate": 0.00026305322128851536, "loss": 0.432, "step": 26422 }, { "epoch": 14.76145251396648, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002630252100840336, "loss": 0.3172, "step": 26423 }, { "epoch": 14.762011173184357, "grad_norm": 0.4276534616947174, "learning_rate": 0.00026299719887955183, "loss": 0.3534, "step": 26424 }, { "epoch": 14.762569832402235, "grad_norm": 1.4451985359191895, "learning_rate": 0.00026296918767507003, "loss": 0.5441, "step": 26425 }, { "epoch": 14.763128491620112, "grad_norm": 0.41230452060699463, "learning_rate": 0.00026294117647058824, "loss": 0.3959, "step": 26426 }, { "epoch": 14.763687150837988, "grad_norm": 0.39443573355674744, "learning_rate": 0.00026291316526610645, "loss": 0.2974, "step": 26427 }, { "epoch": 14.764245810055867, "grad_norm": 0.4632950723171234, "learning_rate": 0.00026288515406162465, "loss": 0.3959, "step": 26428 }, { "epoch": 14.764804469273743, "grad_norm": 0.4690471291542053, "learning_rate": 0.00026285714285714286, "loss": 0.4213, "step": 26429 }, { "epoch": 14.76536312849162, "grad_norm": 0.8644055128097534, "learning_rate": 0.0002628291316526611, "loss": 0.3816, "step": 26430 }, { "epoch": 14.765921787709498, "grad_norm": 0.5297317504882812, "learning_rate": 0.00026280112044817927, "loss": 0.3842, "step": 26431 }, { "epoch": 14.766480446927375, "grad_norm": 0.36095932126045227, "learning_rate": 0.0002627731092436975, "loss": 0.38, "step": 26432 }, { "epoch": 14.767039106145251, "grad_norm": 0.5994721055030823, "learning_rate": 0.0002627450980392157, "loss": 0.3954, "step": 26433 }, { "epoch": 14.767597765363128, "grad_norm": 0.42066147923469543, "learning_rate": 0.0002627170868347339, "loss": 0.4586, "step": 26434 }, { "epoch": 14.768156424581006, "grad_norm": 0.397690087556839, "learning_rate": 0.00026268907563025215, "loss": 0.4562, "step": 26435 }, { "epoch": 14.768715083798883, "grad_norm": 0.431347131729126, "learning_rate": 0.0002626610644257703, "loss": 0.4554, "step": 26436 }, { "epoch": 14.76927374301676, "grad_norm": 1.1868691444396973, "learning_rate": 0.0002626330532212885, "loss": 0.4128, "step": 26437 }, { "epoch": 14.769832402234638, "grad_norm": 0.44891518354415894, "learning_rate": 0.00026260504201680677, "loss": 0.3847, "step": 26438 }, { "epoch": 14.770391061452514, "grad_norm": 0.6369753479957581, "learning_rate": 0.0002625770308123249, "loss": 0.4246, "step": 26439 }, { "epoch": 14.77094972067039, "grad_norm": 0.7375242710113525, "learning_rate": 0.0002625490196078432, "loss": 0.3811, "step": 26440 }, { "epoch": 14.771508379888267, "grad_norm": 0.39455243945121765, "learning_rate": 0.00026252100840336133, "loss": 0.4143, "step": 26441 }, { "epoch": 14.772067039106146, "grad_norm": 0.6123532652854919, "learning_rate": 0.00026249299719887954, "loss": 0.453, "step": 26442 }, { "epoch": 14.772625698324022, "grad_norm": 0.3983652591705322, "learning_rate": 0.0002624649859943978, "loss": 0.3656, "step": 26443 }, { "epoch": 14.773184357541899, "grad_norm": 0.43648362159729004, "learning_rate": 0.00026243697478991595, "loss": 0.447, "step": 26444 }, { "epoch": 14.773743016759777, "grad_norm": 3.296373128890991, "learning_rate": 0.0002624089635854342, "loss": 0.5242, "step": 26445 }, { "epoch": 14.774301675977654, "grad_norm": 0.40457525849342346, "learning_rate": 0.0002623809523809524, "loss": 0.3515, "step": 26446 }, { "epoch": 14.77486033519553, "grad_norm": 0.5199089050292969, "learning_rate": 0.00026235294117647057, "loss": 0.429, "step": 26447 }, { "epoch": 14.775418994413407, "grad_norm": 0.4300305247306824, "learning_rate": 0.0002623249299719888, "loss": 0.3581, "step": 26448 }, { "epoch": 14.775977653631285, "grad_norm": 0.6626937985420227, "learning_rate": 0.000262296918767507, "loss": 0.4883, "step": 26449 }, { "epoch": 14.776536312849162, "grad_norm": 0.5347988605499268, "learning_rate": 0.00026226890756302524, "loss": 0.4604, "step": 26450 }, { "epoch": 14.777094972067038, "grad_norm": 0.42290830612182617, "learning_rate": 0.00026224089635854344, "loss": 0.4415, "step": 26451 }, { "epoch": 14.777653631284917, "grad_norm": 0.4398691654205322, "learning_rate": 0.0002622128851540616, "loss": 0.4993, "step": 26452 }, { "epoch": 14.778212290502793, "grad_norm": 0.4389786124229431, "learning_rate": 0.00026218487394957986, "loss": 0.4277, "step": 26453 }, { "epoch": 14.77877094972067, "grad_norm": 0.5745500922203064, "learning_rate": 0.00026215686274509806, "loss": 0.3642, "step": 26454 }, { "epoch": 14.779329608938548, "grad_norm": 6.254465579986572, "learning_rate": 0.00026212885154061627, "loss": 0.4274, "step": 26455 }, { "epoch": 14.779888268156425, "grad_norm": 0.4897795021533966, "learning_rate": 0.0002621008403361345, "loss": 0.3883, "step": 26456 }, { "epoch": 14.780446927374301, "grad_norm": 0.5450849533081055, "learning_rate": 0.0002620728291316526, "loss": 0.4014, "step": 26457 }, { "epoch": 14.78100558659218, "grad_norm": 0.49721452593803406, "learning_rate": 0.0002620448179271709, "loss": 0.4233, "step": 26458 }, { "epoch": 14.781564245810056, "grad_norm": 0.36960384249687195, "learning_rate": 0.0002620168067226891, "loss": 0.3521, "step": 26459 }, { "epoch": 14.782122905027933, "grad_norm": 0.438244104385376, "learning_rate": 0.0002619887955182073, "loss": 0.3493, "step": 26460 }, { "epoch": 14.78268156424581, "grad_norm": 0.3668515384197235, "learning_rate": 0.0002619607843137255, "loss": 0.378, "step": 26461 }, { "epoch": 14.783240223463688, "grad_norm": 0.6461626887321472, "learning_rate": 0.0002619327731092437, "loss": 0.4054, "step": 26462 }, { "epoch": 14.783798882681564, "grad_norm": 0.3935241997241974, "learning_rate": 0.0002619047619047619, "loss": 0.4462, "step": 26463 }, { "epoch": 14.78435754189944, "grad_norm": 0.37472477555274963, "learning_rate": 0.0002618767507002801, "loss": 0.3536, "step": 26464 }, { "epoch": 14.78491620111732, "grad_norm": 0.47504526376724243, "learning_rate": 0.00026184873949579833, "loss": 0.4127, "step": 26465 }, { "epoch": 14.785474860335196, "grad_norm": 0.9019039273262024, "learning_rate": 0.00026182072829131653, "loss": 0.6654, "step": 26466 }, { "epoch": 14.786033519553072, "grad_norm": 0.42635321617126465, "learning_rate": 0.00026179271708683474, "loss": 0.2354, "step": 26467 }, { "epoch": 14.786592178770949, "grad_norm": 1.1489777565002441, "learning_rate": 0.00026176470588235295, "loss": 0.4523, "step": 26468 }, { "epoch": 14.787150837988827, "grad_norm": 1.1180977821350098, "learning_rate": 0.00026173669467787115, "loss": 0.4413, "step": 26469 }, { "epoch": 14.787709497206704, "grad_norm": 8.92027759552002, "learning_rate": 0.0002617086834733894, "loss": 0.3618, "step": 26470 }, { "epoch": 14.78826815642458, "grad_norm": 0.5446832180023193, "learning_rate": 0.00026168067226890756, "loss": 0.3575, "step": 26471 }, { "epoch": 14.788826815642459, "grad_norm": 0.32228612899780273, "learning_rate": 0.00026165266106442577, "loss": 0.3231, "step": 26472 }, { "epoch": 14.789385474860335, "grad_norm": 1.173027753829956, "learning_rate": 0.000261624649859944, "loss": 0.4278, "step": 26473 }, { "epoch": 14.789944134078212, "grad_norm": 0.5928820967674255, "learning_rate": 0.0002615966386554622, "loss": 0.393, "step": 26474 }, { "epoch": 14.79050279329609, "grad_norm": 0.5773325562477112, "learning_rate": 0.00026156862745098044, "loss": 0.3756, "step": 26475 }, { "epoch": 14.791061452513967, "grad_norm": 0.5384437441825867, "learning_rate": 0.0002615406162464986, "loss": 0.4622, "step": 26476 }, { "epoch": 14.791620111731843, "grad_norm": 0.5310943126678467, "learning_rate": 0.0002615126050420168, "loss": 0.3923, "step": 26477 }, { "epoch": 14.79217877094972, "grad_norm": 0.48452699184417725, "learning_rate": 0.00026148459383753506, "loss": 0.475, "step": 26478 }, { "epoch": 14.792737430167598, "grad_norm": 1.5395199060440063, "learning_rate": 0.0002614565826330532, "loss": 0.3946, "step": 26479 }, { "epoch": 14.793296089385475, "grad_norm": 0.4771752953529358, "learning_rate": 0.00026142857142857147, "loss": 0.4126, "step": 26480 }, { "epoch": 14.793854748603351, "grad_norm": 0.5794165730476379, "learning_rate": 0.0002614005602240896, "loss": 0.4703, "step": 26481 }, { "epoch": 14.79441340782123, "grad_norm": 0.4600181579589844, "learning_rate": 0.00026137254901960783, "loss": 0.456, "step": 26482 }, { "epoch": 14.794972067039106, "grad_norm": 0.38606157898902893, "learning_rate": 0.0002613445378151261, "loss": 0.4349, "step": 26483 }, { "epoch": 14.795530726256983, "grad_norm": 0.3928118348121643, "learning_rate": 0.00026131652661064424, "loss": 0.4459, "step": 26484 }, { "epoch": 14.796089385474861, "grad_norm": 0.5269759297370911, "learning_rate": 0.0002612885154061625, "loss": 0.4311, "step": 26485 }, { "epoch": 14.796648044692738, "grad_norm": 0.32078155875205994, "learning_rate": 0.0002612605042016807, "loss": 0.348, "step": 26486 }, { "epoch": 14.797206703910614, "grad_norm": 0.6130132079124451, "learning_rate": 0.00026123249299719886, "loss": 0.4218, "step": 26487 }, { "epoch": 14.797765363128491, "grad_norm": 0.5869680643081665, "learning_rate": 0.0002612044817927171, "loss": 0.4812, "step": 26488 }, { "epoch": 14.79832402234637, "grad_norm": 0.8428412675857544, "learning_rate": 0.00026117647058823527, "loss": 0.393, "step": 26489 }, { "epoch": 14.798882681564246, "grad_norm": 0.4352872967720032, "learning_rate": 0.00026114845938375353, "loss": 0.415, "step": 26490 }, { "epoch": 14.799441340782122, "grad_norm": 0.4212241768836975, "learning_rate": 0.00026112044817927174, "loss": 0.5269, "step": 26491 }, { "epoch": 14.8, "grad_norm": 0.5292167663574219, "learning_rate": 0.0002610924369747899, "loss": 0.4262, "step": 26492 }, { "epoch": 14.800558659217877, "grad_norm": 0.5012488961219788, "learning_rate": 0.00026106442577030815, "loss": 0.4212, "step": 26493 }, { "epoch": 14.801117318435754, "grad_norm": 0.35870733857154846, "learning_rate": 0.00026103641456582636, "loss": 0.3737, "step": 26494 }, { "epoch": 14.80167597765363, "grad_norm": 5.093686103820801, "learning_rate": 0.00026100840336134456, "loss": 0.494, "step": 26495 }, { "epoch": 14.802234636871509, "grad_norm": 0.4757404625415802, "learning_rate": 0.00026098039215686277, "loss": 0.4494, "step": 26496 }, { "epoch": 14.802793296089385, "grad_norm": 0.5189264416694641, "learning_rate": 0.0002609523809523809, "loss": 0.3618, "step": 26497 }, { "epoch": 14.803351955307262, "grad_norm": 0.41683512926101685, "learning_rate": 0.0002609243697478992, "loss": 0.395, "step": 26498 }, { "epoch": 14.80391061452514, "grad_norm": 0.4827743172645569, "learning_rate": 0.0002608963585434174, "loss": 0.422, "step": 26499 }, { "epoch": 14.804469273743017, "grad_norm": 0.545688271522522, "learning_rate": 0.00026086834733893554, "loss": 0.5109, "step": 26500 }, { "epoch": 14.804469273743017, "eval_cer": 0.08619466902338167, "eval_loss": 0.325408011674881, "eval_runtime": 55.5061, "eval_samples_per_second": 81.757, "eval_steps_per_second": 5.117, "eval_wer": 0.34066087539454204, "step": 26500 }, { "epoch": 14.805027932960893, "grad_norm": 0.5007805228233337, "learning_rate": 0.0002608403361344538, "loss": 0.4515, "step": 26501 }, { "epoch": 14.805586592178772, "grad_norm": 0.3946542739868164, "learning_rate": 0.000260812324929972, "loss": 0.364, "step": 26502 }, { "epoch": 14.806145251396648, "grad_norm": 0.5230787992477417, "learning_rate": 0.0002607843137254902, "loss": 0.474, "step": 26503 }, { "epoch": 14.806703910614525, "grad_norm": 0.43557125329971313, "learning_rate": 0.0002607563025210084, "loss": 0.4314, "step": 26504 }, { "epoch": 14.807262569832401, "grad_norm": 0.41160711646080017, "learning_rate": 0.00026072829131652657, "loss": 0.3895, "step": 26505 }, { "epoch": 14.80782122905028, "grad_norm": 0.4903390109539032, "learning_rate": 0.00026070028011204483, "loss": 0.4233, "step": 26506 }, { "epoch": 14.808379888268156, "grad_norm": 0.40855300426483154, "learning_rate": 0.00026067226890756303, "loss": 0.4342, "step": 26507 }, { "epoch": 14.808938547486033, "grad_norm": 0.466184139251709, "learning_rate": 0.00026064425770308124, "loss": 0.4318, "step": 26508 }, { "epoch": 14.809497206703911, "grad_norm": 0.8855137228965759, "learning_rate": 0.00026061624649859945, "loss": 0.4194, "step": 26509 }, { "epoch": 14.810055865921788, "grad_norm": 0.428856760263443, "learning_rate": 0.00026058823529411765, "loss": 0.4312, "step": 26510 }, { "epoch": 14.810614525139664, "grad_norm": 0.386977881193161, "learning_rate": 0.00026056022408963586, "loss": 0.4247, "step": 26511 }, { "epoch": 14.811173184357543, "grad_norm": 0.3859940767288208, "learning_rate": 0.00026053221288515406, "loss": 0.4209, "step": 26512 }, { "epoch": 14.81173184357542, "grad_norm": 0.4737122058868408, "learning_rate": 0.00026050420168067227, "loss": 0.3469, "step": 26513 }, { "epoch": 14.812290502793296, "grad_norm": 0.3521960973739624, "learning_rate": 0.0002604761904761905, "loss": 0.4099, "step": 26514 }, { "epoch": 14.812849162011172, "grad_norm": 0.4565404951572418, "learning_rate": 0.0002604481792717087, "loss": 0.4718, "step": 26515 }, { "epoch": 14.81340782122905, "grad_norm": 0.37796464562416077, "learning_rate": 0.0002604201680672269, "loss": 0.355, "step": 26516 }, { "epoch": 14.813966480446927, "grad_norm": 0.9435752034187317, "learning_rate": 0.0002603921568627451, "loss": 0.5082, "step": 26517 }, { "epoch": 14.814525139664804, "grad_norm": 0.4492003619670868, "learning_rate": 0.00026036414565826335, "loss": 0.4171, "step": 26518 }, { "epoch": 14.815083798882682, "grad_norm": 0.39688143134117126, "learning_rate": 0.0002603361344537815, "loss": 0.3527, "step": 26519 }, { "epoch": 14.815642458100559, "grad_norm": 0.48747146129608154, "learning_rate": 0.0002603081232492997, "loss": 0.4021, "step": 26520 }, { "epoch": 14.816201117318435, "grad_norm": 0.42979180812835693, "learning_rate": 0.0002602801120448179, "loss": 0.4155, "step": 26521 }, { "epoch": 14.816759776536312, "grad_norm": 0.5853160619735718, "learning_rate": 0.0002602521008403361, "loss": 0.3914, "step": 26522 }, { "epoch": 14.81731843575419, "grad_norm": 0.3751629590988159, "learning_rate": 0.0002602240896358544, "loss": 0.428, "step": 26523 }, { "epoch": 14.817877094972067, "grad_norm": 0.408683717250824, "learning_rate": 0.00026019607843137254, "loss": 0.4366, "step": 26524 }, { "epoch": 14.818435754189943, "grad_norm": 0.5187716484069824, "learning_rate": 0.00026016806722689074, "loss": 0.5159, "step": 26525 }, { "epoch": 14.818994413407822, "grad_norm": 0.6641644239425659, "learning_rate": 0.000260140056022409, "loss": 0.4877, "step": 26526 }, { "epoch": 14.819553072625698, "grad_norm": 0.39999130368232727, "learning_rate": 0.00026011204481792715, "loss": 0.355, "step": 26527 }, { "epoch": 14.820111731843575, "grad_norm": 0.3978792726993561, "learning_rate": 0.0002600840336134454, "loss": 0.2909, "step": 26528 }, { "epoch": 14.820670391061453, "grad_norm": 0.5720070600509644, "learning_rate": 0.00026005602240896357, "loss": 0.4694, "step": 26529 }, { "epoch": 14.82122905027933, "grad_norm": 0.6114780902862549, "learning_rate": 0.00026002801120448177, "loss": 0.4328, "step": 26530 }, { "epoch": 14.821787709497206, "grad_norm": 0.38012170791625977, "learning_rate": 0.00026000000000000003, "loss": 0.3127, "step": 26531 }, { "epoch": 14.822346368715085, "grad_norm": 0.579748272895813, "learning_rate": 0.0002599719887955182, "loss": 0.4388, "step": 26532 }, { "epoch": 14.822905027932961, "grad_norm": 0.6629649996757507, "learning_rate": 0.00025994397759103644, "loss": 0.4505, "step": 26533 }, { "epoch": 14.823463687150838, "grad_norm": 0.571614682674408, "learning_rate": 0.00025991596638655465, "loss": 0.4311, "step": 26534 }, { "epoch": 14.824022346368714, "grad_norm": Infinity, "learning_rate": 0.00025991596638655465, "loss": 0.507, "step": 26535 }, { "epoch": 14.824581005586593, "grad_norm": 0.5229687094688416, "learning_rate": 0.0002598879551820728, "loss": 0.3442, "step": 26536 }, { "epoch": 14.82513966480447, "grad_norm": 0.5492231845855713, "learning_rate": 0.00025985994397759106, "loss": 0.4506, "step": 26537 }, { "epoch": 14.825698324022346, "grad_norm": 0.3985249698162079, "learning_rate": 0.0002598319327731092, "loss": 0.3811, "step": 26538 }, { "epoch": 14.826256983240224, "grad_norm": 0.4100886583328247, "learning_rate": 0.0002598039215686275, "loss": 0.5578, "step": 26539 }, { "epoch": 14.8268156424581, "grad_norm": 0.4358460605144501, "learning_rate": 0.0002597759103641457, "loss": 0.4025, "step": 26540 }, { "epoch": 14.827374301675977, "grad_norm": 1.138991355895996, "learning_rate": 0.00025974789915966383, "loss": 0.3615, "step": 26541 }, { "epoch": 14.827932960893854, "grad_norm": 0.393299400806427, "learning_rate": 0.0002597198879551821, "loss": 0.2862, "step": 26542 }, { "epoch": 14.828491620111732, "grad_norm": 0.4582339823246002, "learning_rate": 0.0002596918767507003, "loss": 0.4444, "step": 26543 }, { "epoch": 14.829050279329609, "grad_norm": 0.704105794429779, "learning_rate": 0.0002596638655462185, "loss": 0.7172, "step": 26544 }, { "epoch": 14.829608938547485, "grad_norm": 1.9063905477523804, "learning_rate": 0.0002596358543417367, "loss": 0.4766, "step": 26545 }, { "epoch": 14.830167597765364, "grad_norm": 0.4756879508495331, "learning_rate": 0.00025960784313725486, "loss": 0.4232, "step": 26546 }, { "epoch": 14.83072625698324, "grad_norm": 0.8283573389053345, "learning_rate": 0.0002595798319327731, "loss": 0.3762, "step": 26547 }, { "epoch": 14.831284916201117, "grad_norm": 0.3527793884277344, "learning_rate": 0.00025955182072829133, "loss": 0.319, "step": 26548 }, { "epoch": 14.831843575418995, "grad_norm": 0.5375033020973206, "learning_rate": 0.00025952380952380953, "loss": 0.4744, "step": 26549 }, { "epoch": 14.832402234636872, "grad_norm": 0.4578782916069031, "learning_rate": 0.00025949579831932774, "loss": 0.4204, "step": 26550 }, { "epoch": 14.832960893854748, "grad_norm": 0.5997194647789001, "learning_rate": 0.00025946778711484595, "loss": 0.464, "step": 26551 }, { "epoch": 14.833519553072625, "grad_norm": 2.0308713912963867, "learning_rate": 0.00025943977591036415, "loss": 0.3984, "step": 26552 }, { "epoch": 14.834078212290503, "grad_norm": 0.688795804977417, "learning_rate": 0.00025941176470588236, "loss": 0.5233, "step": 26553 }, { "epoch": 14.83463687150838, "grad_norm": 0.4516580104827881, "learning_rate": 0.00025938375350140056, "loss": 0.4823, "step": 26554 }, { "epoch": 14.835195530726256, "grad_norm": 0.4836987853050232, "learning_rate": 0.00025935574229691877, "loss": 0.4322, "step": 26555 }, { "epoch": 14.835754189944135, "grad_norm": 1.104026198387146, "learning_rate": 0.000259327731092437, "loss": 0.3095, "step": 26556 }, { "epoch": 14.836312849162011, "grad_norm": 0.36835381388664246, "learning_rate": 0.0002592997198879552, "loss": 0.4003, "step": 26557 }, { "epoch": 14.836871508379888, "grad_norm": 0.4588301479816437, "learning_rate": 0.0002592717086834734, "loss": 0.4628, "step": 26558 }, { "epoch": 14.837430167597766, "grad_norm": 0.4947979152202606, "learning_rate": 0.00025924369747899165, "loss": 0.3289, "step": 26559 }, { "epoch": 14.837988826815643, "grad_norm": 0.2932858467102051, "learning_rate": 0.0002592156862745098, "loss": 0.2663, "step": 26560 }, { "epoch": 14.83854748603352, "grad_norm": 0.44763603806495667, "learning_rate": 0.000259187675070028, "loss": 0.4778, "step": 26561 }, { "epoch": 14.839106145251396, "grad_norm": 0.3396928906440735, "learning_rate": 0.0002591596638655462, "loss": 0.3839, "step": 26562 }, { "epoch": 14.839664804469274, "grad_norm": 3.5120232105255127, "learning_rate": 0.0002591316526610644, "loss": 0.3143, "step": 26563 }, { "epoch": 14.84022346368715, "grad_norm": 0.9616038203239441, "learning_rate": 0.0002591036414565827, "loss": 0.5485, "step": 26564 }, { "epoch": 14.840782122905027, "grad_norm": 0.3105078637599945, "learning_rate": 0.00025907563025210083, "loss": 0.2576, "step": 26565 }, { "epoch": 14.841340782122906, "grad_norm": 1.8114765882492065, "learning_rate": 0.00025904761904761904, "loss": 0.4819, "step": 26566 }, { "epoch": 14.841899441340782, "grad_norm": 0.3861510455608368, "learning_rate": 0.0002590196078431373, "loss": 0.3238, "step": 26567 }, { "epoch": 14.842458100558659, "grad_norm": 0.3394841253757477, "learning_rate": 0.00025899159663865545, "loss": 0.3101, "step": 26568 }, { "epoch": 14.843016759776535, "grad_norm": 0.4576930105686188, "learning_rate": 0.0002589635854341737, "loss": 0.3933, "step": 26569 }, { "epoch": 14.843575418994414, "grad_norm": 0.41548705101013184, "learning_rate": 0.00025893557422969186, "loss": 0.4054, "step": 26570 }, { "epoch": 14.84413407821229, "grad_norm": 0.41307491064071655, "learning_rate": 0.00025890756302521007, "loss": 0.3915, "step": 26571 }, { "epoch": 14.844692737430167, "grad_norm": 0.43612322211265564, "learning_rate": 0.0002588795518207283, "loss": 0.4657, "step": 26572 }, { "epoch": 14.845251396648045, "grad_norm": 0.36599600315093994, "learning_rate": 0.0002588515406162465, "loss": 0.3577, "step": 26573 }, { "epoch": 14.845810055865922, "grad_norm": 0.5370457768440247, "learning_rate": 0.00025882352941176474, "loss": 0.4093, "step": 26574 }, { "epoch": 14.846368715083798, "grad_norm": 0.5047298073768616, "learning_rate": 0.00025879551820728294, "loss": 0.4968, "step": 26575 }, { "epoch": 14.846927374301677, "grad_norm": 0.490090012550354, "learning_rate": 0.0002587675070028011, "loss": 0.4724, "step": 26576 }, { "epoch": 14.847486033519553, "grad_norm": 0.4883776307106018, "learning_rate": 0.00025873949579831936, "loss": 0.33, "step": 26577 }, { "epoch": 14.84804469273743, "grad_norm": 0.33621323108673096, "learning_rate": 0.0002587114845938375, "loss": 0.3866, "step": 26578 }, { "epoch": 14.848603351955306, "grad_norm": 0.3998306691646576, "learning_rate": 0.00025868347338935577, "loss": 0.4417, "step": 26579 }, { "epoch": 14.849162011173185, "grad_norm": 0.32000869512557983, "learning_rate": 0.000258655462184874, "loss": 0.3701, "step": 26580 }, { "epoch": 14.849720670391061, "grad_norm": 0.4071613550186157, "learning_rate": 0.0002586274509803921, "loss": 0.4253, "step": 26581 }, { "epoch": 14.850279329608938, "grad_norm": 2.027844190597534, "learning_rate": 0.0002585994397759104, "loss": 0.5256, "step": 26582 }, { "epoch": 14.850837988826816, "grad_norm": 0.5331873297691345, "learning_rate": 0.0002585714285714286, "loss": 0.4831, "step": 26583 }, { "epoch": 14.851396648044693, "grad_norm": 2.0115225315093994, "learning_rate": 0.0002585434173669468, "loss": 0.3547, "step": 26584 }, { "epoch": 14.85195530726257, "grad_norm": 0.8113182187080383, "learning_rate": 0.000258515406162465, "loss": 0.3894, "step": 26585 }, { "epoch": 14.852513966480448, "grad_norm": 0.43342965841293335, "learning_rate": 0.00025848739495798316, "loss": 0.4269, "step": 26586 }, { "epoch": 14.853072625698324, "grad_norm": 0.3013548254966736, "learning_rate": 0.0002584593837535014, "loss": 0.3265, "step": 26587 }, { "epoch": 14.8536312849162, "grad_norm": 0.40642988681793213, "learning_rate": 0.0002584313725490196, "loss": 0.4122, "step": 26588 }, { "epoch": 14.854189944134077, "grad_norm": 0.3633423149585724, "learning_rate": 0.00025840336134453783, "loss": 0.3916, "step": 26589 }, { "epoch": 14.854748603351956, "grad_norm": 0.3396340012550354, "learning_rate": 0.00025837535014005603, "loss": 0.3476, "step": 26590 }, { "epoch": 14.855307262569832, "grad_norm": 0.47138839960098267, "learning_rate": 0.00025834733893557424, "loss": 0.3865, "step": 26591 }, { "epoch": 14.855865921787709, "grad_norm": 1.229844093322754, "learning_rate": 0.00025831932773109245, "loss": 0.4567, "step": 26592 }, { "epoch": 14.856424581005587, "grad_norm": 0.592532217502594, "learning_rate": 0.00025829131652661065, "loss": 0.4355, "step": 26593 }, { "epoch": 14.856983240223464, "grad_norm": 0.5965381264686584, "learning_rate": 0.00025826330532212886, "loss": 0.3752, "step": 26594 }, { "epoch": 14.85754189944134, "grad_norm": 0.5160749554634094, "learning_rate": 0.00025823529411764706, "loss": 0.5177, "step": 26595 }, { "epoch": 14.858100558659217, "grad_norm": 0.41821354627609253, "learning_rate": 0.00025820728291316527, "loss": 0.4024, "step": 26596 }, { "epoch": 14.858659217877095, "grad_norm": 0.5512333512306213, "learning_rate": 0.0002581792717086835, "loss": 0.3825, "step": 26597 }, { "epoch": 14.859217877094972, "grad_norm": 1.4245907068252563, "learning_rate": 0.0002581512605042017, "loss": 0.3753, "step": 26598 }, { "epoch": 14.859776536312848, "grad_norm": 1.4051717519760132, "learning_rate": 0.00025812324929971994, "loss": 0.3699, "step": 26599 }, { "epoch": 14.860335195530727, "grad_norm": 0.4880584478378296, "learning_rate": 0.0002580952380952381, "loss": 0.3435, "step": 26600 }, { "epoch": 14.860893854748603, "grad_norm": 0.40549179911613464, "learning_rate": 0.0002580672268907563, "loss": 0.4291, "step": 26601 }, { "epoch": 14.86145251396648, "grad_norm": 0.6553271412849426, "learning_rate": 0.0002580392156862745, "loss": 0.6506, "step": 26602 }, { "epoch": 14.862011173184358, "grad_norm": 0.3544323742389679, "learning_rate": 0.0002580112044817927, "loss": 0.3726, "step": 26603 }, { "epoch": 14.862569832402235, "grad_norm": 2.8086049556732178, "learning_rate": 0.00025798319327731097, "loss": 0.408, "step": 26604 }, { "epoch": 14.863128491620111, "grad_norm": 0.333035409450531, "learning_rate": 0.0002579551820728291, "loss": 0.2596, "step": 26605 }, { "epoch": 14.86368715083799, "grad_norm": 0.4436178207397461, "learning_rate": 0.00025792717086834733, "loss": 0.3856, "step": 26606 }, { "epoch": 14.864245810055866, "grad_norm": 0.5466976165771484, "learning_rate": 0.0002578991596638656, "loss": 0.4044, "step": 26607 }, { "epoch": 14.864804469273743, "grad_norm": 0.37446752190589905, "learning_rate": 0.00025787114845938374, "loss": 0.4531, "step": 26608 }, { "epoch": 14.86536312849162, "grad_norm": 0.4338454008102417, "learning_rate": 0.000257843137254902, "loss": 0.3605, "step": 26609 }, { "epoch": 14.865921787709498, "grad_norm": 0.6497722864151001, "learning_rate": 0.00025781512605042015, "loss": 0.3824, "step": 26610 }, { "epoch": 14.866480446927374, "grad_norm": 0.41234156489372253, "learning_rate": 0.00025778711484593836, "loss": 0.5167, "step": 26611 }, { "epoch": 14.867039106145251, "grad_norm": 0.45685410499572754, "learning_rate": 0.0002577591036414566, "loss": 0.3876, "step": 26612 }, { "epoch": 14.86759776536313, "grad_norm": 0.3604154586791992, "learning_rate": 0.00025773109243697477, "loss": 0.406, "step": 26613 }, { "epoch": 14.868156424581006, "grad_norm": 0.4332268238067627, "learning_rate": 0.000257703081232493, "loss": 0.4247, "step": 26614 }, { "epoch": 14.868715083798882, "grad_norm": 0.557467520236969, "learning_rate": 0.00025767507002801124, "loss": 0.4297, "step": 26615 }, { "epoch": 14.869273743016759, "grad_norm": 0.41732969880104065, "learning_rate": 0.0002576470588235294, "loss": 0.3656, "step": 26616 }, { "epoch": 14.869832402234637, "grad_norm": 0.42851725220680237, "learning_rate": 0.00025761904761904765, "loss": 0.3817, "step": 26617 }, { "epoch": 14.870391061452514, "grad_norm": 0.5509878993034363, "learning_rate": 0.0002575910364145658, "loss": 0.5425, "step": 26618 }, { "epoch": 14.87094972067039, "grad_norm": 0.7281415462493896, "learning_rate": 0.000257563025210084, "loss": 0.43, "step": 26619 }, { "epoch": 14.871508379888269, "grad_norm": 0.40808019042015076, "learning_rate": 0.00025753501400560227, "loss": 0.3613, "step": 26620 }, { "epoch": 14.872067039106145, "grad_norm": 0.6743311285972595, "learning_rate": 0.0002575070028011204, "loss": 0.4565, "step": 26621 }, { "epoch": 14.872625698324022, "grad_norm": 1.0449023246765137, "learning_rate": 0.0002574789915966387, "loss": 0.4927, "step": 26622 }, { "epoch": 14.8731843575419, "grad_norm": 0.3304521143436432, "learning_rate": 0.0002574509803921569, "loss": 0.3313, "step": 26623 }, { "epoch": 14.873743016759777, "grad_norm": 0.476204514503479, "learning_rate": 0.00025742296918767504, "loss": 0.4571, "step": 26624 }, { "epoch": 14.874301675977653, "grad_norm": 0.3807516098022461, "learning_rate": 0.0002573949579831933, "loss": 0.3532, "step": 26625 }, { "epoch": 14.87486033519553, "grad_norm": 0.535419762134552, "learning_rate": 0.00025736694677871145, "loss": 0.454, "step": 26626 }, { "epoch": 14.875418994413408, "grad_norm": 0.39224204421043396, "learning_rate": 0.0002573389355742297, "loss": 0.3741, "step": 26627 }, { "epoch": 14.875977653631285, "grad_norm": 0.5574172735214233, "learning_rate": 0.0002573109243697479, "loss": 0.4204, "step": 26628 }, { "epoch": 14.876536312849161, "grad_norm": 0.5498204231262207, "learning_rate": 0.00025728291316526607, "loss": 0.357, "step": 26629 }, { "epoch": 14.87709497206704, "grad_norm": 0.34681206941604614, "learning_rate": 0.00025725490196078433, "loss": 0.3775, "step": 26630 }, { "epoch": 14.877653631284916, "grad_norm": 0.3369964361190796, "learning_rate": 0.00025722689075630253, "loss": 0.3381, "step": 26631 }, { "epoch": 14.878212290502793, "grad_norm": 0.3674883544445038, "learning_rate": 0.00025719887955182074, "loss": 0.3944, "step": 26632 }, { "epoch": 14.878770949720671, "grad_norm": 0.4161687195301056, "learning_rate": 0.00025717086834733895, "loss": 0.3686, "step": 26633 }, { "epoch": 14.879329608938548, "grad_norm": 0.3734763562679291, "learning_rate": 0.0002571428571428571, "loss": 0.3381, "step": 26634 }, { "epoch": 14.879888268156424, "grad_norm": 0.6255685091018677, "learning_rate": 0.00025711484593837536, "loss": 0.3103, "step": 26635 }, { "epoch": 14.880446927374301, "grad_norm": 0.4141809940338135, "learning_rate": 0.00025708683473389356, "loss": 0.3873, "step": 26636 }, { "epoch": 14.88100558659218, "grad_norm": 0.5900827646255493, "learning_rate": 0.00025705882352941177, "loss": 0.5042, "step": 26637 }, { "epoch": 14.881564245810056, "grad_norm": 0.5379325747489929, "learning_rate": 0.00025703081232493, "loss": 0.4259, "step": 26638 }, { "epoch": 14.882122905027932, "grad_norm": 0.47491633892059326, "learning_rate": 0.0002570028011204482, "loss": 0.423, "step": 26639 }, { "epoch": 14.88268156424581, "grad_norm": 0.49043571949005127, "learning_rate": 0.0002569747899159664, "loss": 0.4991, "step": 26640 }, { "epoch": 14.883240223463687, "grad_norm": 0.8420180678367615, "learning_rate": 0.0002569467787114846, "loss": 0.4139, "step": 26641 }, { "epoch": 14.883798882681564, "grad_norm": 0.6261454820632935, "learning_rate": 0.0002569187675070028, "loss": 0.5389, "step": 26642 }, { "epoch": 14.88435754189944, "grad_norm": 1.0918277502059937, "learning_rate": 0.000256890756302521, "loss": 0.2714, "step": 26643 }, { "epoch": 14.884916201117319, "grad_norm": 0.3174363374710083, "learning_rate": 0.0002568627450980392, "loss": 0.3878, "step": 26644 }, { "epoch": 14.885474860335195, "grad_norm": 0.6370895504951477, "learning_rate": 0.0002568347338935574, "loss": 0.5015, "step": 26645 }, { "epoch": 14.886033519553072, "grad_norm": 0.44760754704475403, "learning_rate": 0.0002568067226890756, "loss": 0.3652, "step": 26646 }, { "epoch": 14.88659217877095, "grad_norm": 0.42924797534942627, "learning_rate": 0.0002567787114845939, "loss": 0.2802, "step": 26647 }, { "epoch": 14.887150837988827, "grad_norm": 1.5820657014846802, "learning_rate": 0.00025675070028011204, "loss": 0.3777, "step": 26648 }, { "epoch": 14.887709497206703, "grad_norm": 0.48219895362854004, "learning_rate": 0.00025672268907563024, "loss": 0.4461, "step": 26649 }, { "epoch": 14.888268156424582, "grad_norm": 0.7413076162338257, "learning_rate": 0.00025669467787114845, "loss": 0.504, "step": 26650 }, { "epoch": 14.888826815642458, "grad_norm": 4.097867488861084, "learning_rate": 0.00025666666666666665, "loss": 0.4297, "step": 26651 }, { "epoch": 14.889385474860335, "grad_norm": 0.5052071809768677, "learning_rate": 0.0002566386554621849, "loss": 0.461, "step": 26652 }, { "epoch": 14.889944134078211, "grad_norm": 0.43386533856391907, "learning_rate": 0.00025661064425770307, "loss": 0.411, "step": 26653 }, { "epoch": 14.89050279329609, "grad_norm": 0.3635751008987427, "learning_rate": 0.00025658263305322127, "loss": 0.3706, "step": 26654 }, { "epoch": 14.891061452513966, "grad_norm": 0.5499553084373474, "learning_rate": 0.00025655462184873953, "loss": 0.4579, "step": 26655 }, { "epoch": 14.891620111731843, "grad_norm": 1.0164464712142944, "learning_rate": 0.0002565266106442577, "loss": 0.4046, "step": 26656 }, { "epoch": 14.892178770949721, "grad_norm": 0.5983941555023193, "learning_rate": 0.00025649859943977594, "loss": 0.402, "step": 26657 }, { "epoch": 14.892737430167598, "grad_norm": 0.49708226323127747, "learning_rate": 0.0002564705882352941, "loss": 0.355, "step": 26658 }, { "epoch": 14.893296089385474, "grad_norm": 0.5279198288917542, "learning_rate": 0.0002564425770308123, "loss": 0.4718, "step": 26659 }, { "epoch": 14.893854748603353, "grad_norm": 0.5096535682678223, "learning_rate": 0.00025641456582633056, "loss": 0.5149, "step": 26660 }, { "epoch": 14.89441340782123, "grad_norm": 0.6974937319755554, "learning_rate": 0.0002563865546218487, "loss": 0.4487, "step": 26661 }, { "epoch": 14.894972067039106, "grad_norm": 0.9840856790542603, "learning_rate": 0.000256358543417367, "loss": 0.3418, "step": 26662 }, { "epoch": 14.895530726256982, "grad_norm": 0.42200687527656555, "learning_rate": 0.0002563305322128852, "loss": 0.4218, "step": 26663 }, { "epoch": 14.89608938547486, "grad_norm": 0.5543202757835388, "learning_rate": 0.00025630252100840333, "loss": 0.3908, "step": 26664 }, { "epoch": 14.896648044692737, "grad_norm": 0.42498838901519775, "learning_rate": 0.0002562745098039216, "loss": 0.4553, "step": 26665 }, { "epoch": 14.897206703910614, "grad_norm": 2.4699130058288574, "learning_rate": 0.00025624649859943974, "loss": 0.5016, "step": 26666 }, { "epoch": 14.897765363128492, "grad_norm": 0.5625272393226624, "learning_rate": 0.000256218487394958, "loss": 0.406, "step": 26667 }, { "epoch": 14.898324022346369, "grad_norm": 0.5316697359085083, "learning_rate": 0.0002561904761904762, "loss": 0.2879, "step": 26668 }, { "epoch": 14.898882681564245, "grad_norm": 0.3590381443500519, "learning_rate": 0.00025616246498599436, "loss": 0.5206, "step": 26669 }, { "epoch": 14.899441340782122, "grad_norm": 0.4346022605895996, "learning_rate": 0.0002561344537815126, "loss": 0.4501, "step": 26670 }, { "epoch": 14.9, "grad_norm": 0.47714418172836304, "learning_rate": 0.00025610644257703083, "loss": 0.4973, "step": 26671 }, { "epoch": 14.900558659217877, "grad_norm": 0.48734551668167114, "learning_rate": 0.00025607843137254903, "loss": 0.4386, "step": 26672 }, { "epoch": 14.901117318435753, "grad_norm": 0.5241233706474304, "learning_rate": 0.00025605042016806724, "loss": 0.3414, "step": 26673 }, { "epoch": 14.901675977653632, "grad_norm": 0.5248932838439941, "learning_rate": 0.0002560224089635854, "loss": 0.4846, "step": 26674 }, { "epoch": 14.902234636871508, "grad_norm": 0.4646275043487549, "learning_rate": 0.00025599439775910365, "loss": 0.4283, "step": 26675 }, { "epoch": 14.902793296089385, "grad_norm": 0.4028272032737732, "learning_rate": 0.00025596638655462186, "loss": 0.342, "step": 26676 }, { "epoch": 14.903351955307263, "grad_norm": 2.1062960624694824, "learning_rate": 0.00025593837535014006, "loss": 0.4469, "step": 26677 }, { "epoch": 14.90391061452514, "grad_norm": 2.1968324184417725, "learning_rate": 0.00025591036414565827, "loss": 0.3279, "step": 26678 }, { "epoch": 14.904469273743016, "grad_norm": 0.6872121095657349, "learning_rate": 0.0002558823529411765, "loss": 0.3232, "step": 26679 }, { "epoch": 14.905027932960895, "grad_norm": 0.37037986516952515, "learning_rate": 0.0002558543417366947, "loss": 0.3142, "step": 26680 }, { "epoch": 14.905586592178771, "grad_norm": 0.6516431570053101, "learning_rate": 0.0002558263305322129, "loss": 0.4806, "step": 26681 }, { "epoch": 14.906145251396648, "grad_norm": 0.4694614112377167, "learning_rate": 0.00025579831932773115, "loss": 0.4179, "step": 26682 }, { "epoch": 14.906703910614524, "grad_norm": 0.545961320400238, "learning_rate": 0.0002557703081232493, "loss": 0.3839, "step": 26683 }, { "epoch": 14.907262569832403, "grad_norm": 0.40497687458992004, "learning_rate": 0.0002557422969187675, "loss": 0.2871, "step": 26684 }, { "epoch": 14.90782122905028, "grad_norm": 0.8895620107650757, "learning_rate": 0.0002557142857142857, "loss": 0.3958, "step": 26685 }, { "epoch": 14.908379888268156, "grad_norm": 0.6328848600387573, "learning_rate": 0.0002556862745098039, "loss": 0.4879, "step": 26686 }, { "epoch": 14.908938547486034, "grad_norm": 0.37975776195526123, "learning_rate": 0.0002556582633053222, "loss": 0.4112, "step": 26687 }, { "epoch": 14.90949720670391, "grad_norm": 0.5444965958595276, "learning_rate": 0.00025563025210084033, "loss": 0.4435, "step": 26688 }, { "epoch": 14.910055865921787, "grad_norm": 0.8743622303009033, "learning_rate": 0.00025560224089635854, "loss": 0.3393, "step": 26689 }, { "epoch": 14.910614525139664, "grad_norm": 0.40520724654197693, "learning_rate": 0.0002555742296918768, "loss": 0.4175, "step": 26690 }, { "epoch": 14.911173184357542, "grad_norm": 0.507301926612854, "learning_rate": 0.00025554621848739495, "loss": 0.4894, "step": 26691 }, { "epoch": 14.911731843575419, "grad_norm": 0.3801904320716858, "learning_rate": 0.0002555182072829132, "loss": 0.3553, "step": 26692 }, { "epoch": 14.912290502793295, "grad_norm": 0.42334821820259094, "learning_rate": 0.00025549019607843136, "loss": 0.4115, "step": 26693 }, { "epoch": 14.912849162011174, "grad_norm": 0.3584507703781128, "learning_rate": 0.00025546218487394957, "loss": 0.424, "step": 26694 }, { "epoch": 14.91340782122905, "grad_norm": 0.44555243849754333, "learning_rate": 0.0002554341736694678, "loss": 0.4661, "step": 26695 }, { "epoch": 14.913966480446927, "grad_norm": 0.47240155935287476, "learning_rate": 0.000255406162464986, "loss": 0.5293, "step": 26696 }, { "epoch": 14.914525139664804, "grad_norm": 3.6318836212158203, "learning_rate": 0.00025537815126050424, "loss": 0.4274, "step": 26697 }, { "epoch": 14.915083798882682, "grad_norm": 1.208521842956543, "learning_rate": 0.00025535014005602244, "loss": 0.502, "step": 26698 }, { "epoch": 14.915642458100558, "grad_norm": 0.4105777442455292, "learning_rate": 0.0002553221288515406, "loss": 0.3547, "step": 26699 }, { "epoch": 14.916201117318435, "grad_norm": 0.38693636655807495, "learning_rate": 0.00025529411764705886, "loss": 0.422, "step": 26700 }, { "epoch": 14.916759776536313, "grad_norm": 0.5913032293319702, "learning_rate": 0.000255266106442577, "loss": 0.4376, "step": 26701 }, { "epoch": 14.91731843575419, "grad_norm": 2.128818988800049, "learning_rate": 0.00025523809523809527, "loss": 0.337, "step": 26702 }, { "epoch": 14.917877094972066, "grad_norm": 0.44014424085617065, "learning_rate": 0.0002552100840336135, "loss": 0.3936, "step": 26703 }, { "epoch": 14.918435754189945, "grad_norm": 0.3193798065185547, "learning_rate": 0.0002551820728291316, "loss": 0.3646, "step": 26704 }, { "epoch": 14.918994413407821, "grad_norm": 0.5399397015571594, "learning_rate": 0.0002551540616246499, "loss": 0.4451, "step": 26705 }, { "epoch": 14.919553072625698, "grad_norm": 0.4297410845756531, "learning_rate": 0.0002551260504201681, "loss": 0.4076, "step": 26706 }, { "epoch": 14.920111731843576, "grad_norm": 0.5189684629440308, "learning_rate": 0.0002550980392156863, "loss": 0.3843, "step": 26707 }, { "epoch": 14.920670391061453, "grad_norm": 0.5604249238967896, "learning_rate": 0.0002550700280112045, "loss": 0.4171, "step": 26708 }, { "epoch": 14.92122905027933, "grad_norm": 1.0083544254302979, "learning_rate": 0.00025504201680672266, "loss": 0.4521, "step": 26709 }, { "epoch": 14.921787709497206, "grad_norm": 0.7064391374588013, "learning_rate": 0.0002550140056022409, "loss": 0.353, "step": 26710 }, { "epoch": 14.922346368715084, "grad_norm": 0.3665640950202942, "learning_rate": 0.0002549859943977591, "loss": 0.3566, "step": 26711 }, { "epoch": 14.922905027932961, "grad_norm": 0.4071871042251587, "learning_rate": 0.00025495798319327733, "loss": 0.3435, "step": 26712 }, { "epoch": 14.923463687150837, "grad_norm": 0.44359061121940613, "learning_rate": 0.00025492997198879553, "loss": 0.4729, "step": 26713 }, { "epoch": 14.924022346368716, "grad_norm": 0.6107977032661438, "learning_rate": 0.00025490196078431374, "loss": 0.4939, "step": 26714 }, { "epoch": 14.924581005586592, "grad_norm": 0.5190136432647705, "learning_rate": 0.00025487394957983195, "loss": 0.3887, "step": 26715 }, { "epoch": 14.925139664804469, "grad_norm": 0.38806429505348206, "learning_rate": 0.00025484593837535015, "loss": 0.3133, "step": 26716 }, { "epoch": 14.925698324022346, "grad_norm": 0.42224401235580444, "learning_rate": 0.00025481792717086836, "loss": 0.4102, "step": 26717 }, { "epoch": 14.926256983240224, "grad_norm": 0.5717446804046631, "learning_rate": 0.00025478991596638656, "loss": 0.4744, "step": 26718 }, { "epoch": 14.9268156424581, "grad_norm": 0.6142580509185791, "learning_rate": 0.00025476190476190477, "loss": 0.4914, "step": 26719 }, { "epoch": 14.927374301675977, "grad_norm": 0.3627018630504608, "learning_rate": 0.000254733893557423, "loss": 0.374, "step": 26720 }, { "epoch": 14.927932960893855, "grad_norm": 0.3908005356788635, "learning_rate": 0.0002547058823529412, "loss": 0.4962, "step": 26721 }, { "epoch": 14.928491620111732, "grad_norm": 0.4286330044269562, "learning_rate": 0.0002546778711484594, "loss": 0.4596, "step": 26722 }, { "epoch": 14.929050279329608, "grad_norm": 0.41611284017562866, "learning_rate": 0.0002546498599439776, "loss": 0.4051, "step": 26723 }, { "epoch": 14.929608938547487, "grad_norm": 0.531650722026825, "learning_rate": 0.0002546218487394958, "loss": 0.5239, "step": 26724 }, { "epoch": 14.930167597765363, "grad_norm": 0.35428231954574585, "learning_rate": 0.000254593837535014, "loss": 0.4362, "step": 26725 }, { "epoch": 14.93072625698324, "grad_norm": 0.4525468945503235, "learning_rate": 0.0002545658263305322, "loss": 0.3122, "step": 26726 }, { "epoch": 14.931284916201117, "grad_norm": 0.6107298135757446, "learning_rate": 0.0002545378151260504, "loss": 0.4408, "step": 26727 }, { "epoch": 14.931843575418995, "grad_norm": 0.38961026072502136, "learning_rate": 0.0002545098039215686, "loss": 0.444, "step": 26728 }, { "epoch": 14.932402234636871, "grad_norm": 0.4186474680900574, "learning_rate": 0.00025448179271708683, "loss": 0.4527, "step": 26729 }, { "epoch": 14.932960893854748, "grad_norm": 0.4926489293575287, "learning_rate": 0.0002544537815126051, "loss": 0.5086, "step": 26730 }, { "epoch": 14.933519553072626, "grad_norm": 0.44995221495628357, "learning_rate": 0.00025442577030812324, "loss": 0.4627, "step": 26731 }, { "epoch": 14.934078212290503, "grad_norm": 0.3999173045158386, "learning_rate": 0.00025439775910364145, "loss": 0.5113, "step": 26732 }, { "epoch": 14.93463687150838, "grad_norm": 0.5775466561317444, "learning_rate": 0.00025436974789915965, "loss": 0.3226, "step": 26733 }, { "epoch": 14.935195530726258, "grad_norm": 3.547893762588501, "learning_rate": 0.00025434173669467786, "loss": 0.3124, "step": 26734 }, { "epoch": 14.935754189944134, "grad_norm": 0.4250103831291199, "learning_rate": 0.0002543137254901961, "loss": 0.3083, "step": 26735 }, { "epoch": 14.936312849162011, "grad_norm": 0.585394561290741, "learning_rate": 0.00025428571428571427, "loss": 0.4156, "step": 26736 }, { "epoch": 14.936871508379888, "grad_norm": 0.4130701422691345, "learning_rate": 0.0002542577030812325, "loss": 0.3007, "step": 26737 }, { "epoch": 14.937430167597766, "grad_norm": 0.43073514103889465, "learning_rate": 0.00025422969187675074, "loss": 0.4547, "step": 26738 }, { "epoch": 14.937988826815642, "grad_norm": 0.5746600031852722, "learning_rate": 0.0002542016806722689, "loss": 0.4421, "step": 26739 }, { "epoch": 14.938547486033519, "grad_norm": 0.41955921053886414, "learning_rate": 0.00025417366946778715, "loss": 0.4599, "step": 26740 }, { "epoch": 14.939106145251397, "grad_norm": 0.3666926324367523, "learning_rate": 0.0002541456582633053, "loss": 0.4538, "step": 26741 }, { "epoch": 14.939664804469274, "grad_norm": 1.398512601852417, "learning_rate": 0.0002541176470588235, "loss": 0.6172, "step": 26742 }, { "epoch": 14.94022346368715, "grad_norm": 0.4980083107948303, "learning_rate": 0.00025408963585434177, "loss": 0.4001, "step": 26743 }, { "epoch": 14.940782122905027, "grad_norm": 0.34092041850090027, "learning_rate": 0.0002540616246498599, "loss": 0.3721, "step": 26744 }, { "epoch": 14.941340782122905, "grad_norm": 0.37353256344795227, "learning_rate": 0.0002540336134453782, "loss": 0.3692, "step": 26745 }, { "epoch": 14.941899441340782, "grad_norm": 0.3240382969379425, "learning_rate": 0.0002540056022408964, "loss": 0.3553, "step": 26746 }, { "epoch": 14.942458100558659, "grad_norm": 0.568525493144989, "learning_rate": 0.00025397759103641454, "loss": 0.4068, "step": 26747 }, { "epoch": 14.943016759776537, "grad_norm": 0.42363086342811584, "learning_rate": 0.0002539495798319328, "loss": 0.4224, "step": 26748 }, { "epoch": 14.943575418994413, "grad_norm": 0.3492322564125061, "learning_rate": 0.00025392156862745095, "loss": 0.3949, "step": 26749 }, { "epoch": 14.94413407821229, "grad_norm": 0.42833009362220764, "learning_rate": 0.0002538935574229692, "loss": 0.5427, "step": 26750 }, { "epoch": 14.944692737430168, "grad_norm": 0.4058558940887451, "learning_rate": 0.0002538655462184874, "loss": 0.3925, "step": 26751 }, { "epoch": 14.945251396648045, "grad_norm": 0.4290007948875427, "learning_rate": 0.00025383753501400557, "loss": 0.3184, "step": 26752 }, { "epoch": 14.945810055865921, "grad_norm": 0.46052247285842896, "learning_rate": 0.00025380952380952383, "loss": 0.4291, "step": 26753 }, { "epoch": 14.946368715083798, "grad_norm": 0.785688579082489, "learning_rate": 0.00025378151260504203, "loss": 0.3504, "step": 26754 }, { "epoch": 14.946927374301676, "grad_norm": 0.3419393301010132, "learning_rate": 0.00025375350140056024, "loss": 0.4197, "step": 26755 }, { "epoch": 14.947486033519553, "grad_norm": 0.4605628550052643, "learning_rate": 0.00025372549019607845, "loss": 0.4112, "step": 26756 }, { "epoch": 14.94804469273743, "grad_norm": 0.5389098525047302, "learning_rate": 0.0002536974789915966, "loss": 0.4735, "step": 26757 }, { "epoch": 14.948603351955308, "grad_norm": 1.1555759906768799, "learning_rate": 0.00025366946778711486, "loss": 0.3496, "step": 26758 }, { "epoch": 14.949162011173184, "grad_norm": 0.44598883390426636, "learning_rate": 0.00025364145658263306, "loss": 0.4304, "step": 26759 }, { "epoch": 14.949720670391061, "grad_norm": 0.5165014266967773, "learning_rate": 0.00025361344537815127, "loss": 0.3378, "step": 26760 }, { "epoch": 14.95027932960894, "grad_norm": 0.4520178735256195, "learning_rate": 0.0002535854341736695, "loss": 0.508, "step": 26761 }, { "epoch": 14.950837988826816, "grad_norm": 0.36179137229919434, "learning_rate": 0.0002535574229691877, "loss": 0.3353, "step": 26762 }, { "epoch": 14.951396648044692, "grad_norm": 0.5026635527610779, "learning_rate": 0.0002535294117647059, "loss": 0.3965, "step": 26763 }, { "epoch": 14.951955307262569, "grad_norm": 0.38730064034461975, "learning_rate": 0.0002535014005602241, "loss": 0.3938, "step": 26764 }, { "epoch": 14.952513966480447, "grad_norm": 0.43364083766937256, "learning_rate": 0.0002534733893557423, "loss": 0.4268, "step": 26765 }, { "epoch": 14.953072625698324, "grad_norm": 0.4257733225822449, "learning_rate": 0.0002534453781512605, "loss": 0.4203, "step": 26766 }, { "epoch": 14.9536312849162, "grad_norm": 0.3969273567199707, "learning_rate": 0.0002534173669467787, "loss": 0.3456, "step": 26767 }, { "epoch": 14.954189944134079, "grad_norm": 0.551905632019043, "learning_rate": 0.0002533893557422969, "loss": 0.4225, "step": 26768 }, { "epoch": 14.954748603351955, "grad_norm": 0.4354240298271179, "learning_rate": 0.0002533613445378151, "loss": 0.4485, "step": 26769 }, { "epoch": 14.955307262569832, "grad_norm": 0.5617095232009888, "learning_rate": 0.0002533333333333334, "loss": 0.3515, "step": 26770 }, { "epoch": 14.955865921787709, "grad_norm": 0.4644061028957367, "learning_rate": 0.00025330532212885154, "loss": 0.4768, "step": 26771 }, { "epoch": 14.956424581005587, "grad_norm": 0.48649847507476807, "learning_rate": 0.00025327731092436974, "loss": 0.4254, "step": 26772 }, { "epoch": 14.956983240223463, "grad_norm": 0.5363298654556274, "learning_rate": 0.00025324929971988795, "loss": 0.4469, "step": 26773 }, { "epoch": 14.95754189944134, "grad_norm": 1.2023565769195557, "learning_rate": 0.00025322128851540615, "loss": 0.2982, "step": 26774 }, { "epoch": 14.958100558659218, "grad_norm": 0.6308413147926331, "learning_rate": 0.0002531932773109244, "loss": 0.4076, "step": 26775 }, { "epoch": 14.958659217877095, "grad_norm": 0.484457403421402, "learning_rate": 0.00025316526610644257, "loss": 0.2778, "step": 26776 }, { "epoch": 14.959217877094972, "grad_norm": 0.5184215903282166, "learning_rate": 0.00025313725490196077, "loss": 0.3792, "step": 26777 }, { "epoch": 14.95977653631285, "grad_norm": 0.400098592042923, "learning_rate": 0.00025310924369747903, "loss": 0.4089, "step": 26778 }, { "epoch": 14.960335195530726, "grad_norm": 0.6525869369506836, "learning_rate": 0.0002530812324929972, "loss": 0.5099, "step": 26779 }, { "epoch": 14.960893854748603, "grad_norm": 0.5836340188980103, "learning_rate": 0.00025305322128851544, "loss": 0.458, "step": 26780 }, { "epoch": 14.961452513966481, "grad_norm": 0.5297774076461792, "learning_rate": 0.0002530252100840336, "loss": 0.5553, "step": 26781 }, { "epoch": 14.962011173184358, "grad_norm": 0.35775622725486755, "learning_rate": 0.0002529971988795518, "loss": 0.3315, "step": 26782 }, { "epoch": 14.962569832402234, "grad_norm": 0.4294080436229706, "learning_rate": 0.00025296918767507006, "loss": 0.4052, "step": 26783 }, { "epoch": 14.963128491620111, "grad_norm": 0.401673823595047, "learning_rate": 0.0002529411764705882, "loss": 0.3491, "step": 26784 }, { "epoch": 14.96368715083799, "grad_norm": 0.5867675542831421, "learning_rate": 0.0002529131652661065, "loss": 0.4043, "step": 26785 }, { "epoch": 14.964245810055866, "grad_norm": 0.46166130900382996, "learning_rate": 0.0002528851540616247, "loss": 0.5118, "step": 26786 }, { "epoch": 14.964804469273743, "grad_norm": 0.9252844452857971, "learning_rate": 0.00025285714285714283, "loss": 0.3603, "step": 26787 }, { "epoch": 14.96536312849162, "grad_norm": 0.9635888338088989, "learning_rate": 0.0002528291316526611, "loss": 0.2898, "step": 26788 }, { "epoch": 14.965921787709497, "grad_norm": 0.4883910119533539, "learning_rate": 0.00025280112044817924, "loss": 0.5281, "step": 26789 }, { "epoch": 14.966480446927374, "grad_norm": 0.5012143850326538, "learning_rate": 0.0002527731092436975, "loss": 0.3591, "step": 26790 }, { "epoch": 14.96703910614525, "grad_norm": 0.38108256459236145, "learning_rate": 0.0002527450980392157, "loss": 0.4126, "step": 26791 }, { "epoch": 14.967597765363129, "grad_norm": 0.6020956635475159, "learning_rate": 0.00025271708683473386, "loss": 0.4843, "step": 26792 }, { "epoch": 14.968156424581005, "grad_norm": 0.6270828247070312, "learning_rate": 0.0002526890756302521, "loss": 0.5059, "step": 26793 }, { "epoch": 14.968715083798882, "grad_norm": 2.904100179672241, "learning_rate": 0.00025266106442577033, "loss": 0.4719, "step": 26794 }, { "epoch": 14.96927374301676, "grad_norm": 0.5739738345146179, "learning_rate": 0.00025263305322128853, "loss": 0.3722, "step": 26795 }, { "epoch": 14.969832402234637, "grad_norm": 0.2860737442970276, "learning_rate": 0.00025260504201680674, "loss": 0.2672, "step": 26796 }, { "epoch": 14.970391061452514, "grad_norm": 0.3919770419597626, "learning_rate": 0.0002525770308123249, "loss": 0.4256, "step": 26797 }, { "epoch": 14.970949720670392, "grad_norm": 0.3273077607154846, "learning_rate": 0.00025254901960784315, "loss": 0.3114, "step": 26798 }, { "epoch": 14.971508379888268, "grad_norm": 0.4783824384212494, "learning_rate": 0.00025252100840336136, "loss": 0.3996, "step": 26799 }, { "epoch": 14.972067039106145, "grad_norm": 1.8253722190856934, "learning_rate": 0.00025249299719887956, "loss": 0.4797, "step": 26800 }, { "epoch": 14.972625698324022, "grad_norm": 0.5969604253768921, "learning_rate": 0.00025246498599439777, "loss": 0.5129, "step": 26801 }, { "epoch": 14.9731843575419, "grad_norm": 0.7188569903373718, "learning_rate": 0.000252436974789916, "loss": 0.4162, "step": 26802 }, { "epoch": 14.973743016759776, "grad_norm": 0.38322216272354126, "learning_rate": 0.0002524089635854342, "loss": 0.3425, "step": 26803 }, { "epoch": 14.974301675977653, "grad_norm": 0.5722339749336243, "learning_rate": 0.0002523809523809524, "loss": 0.5008, "step": 26804 }, { "epoch": 14.974860335195531, "grad_norm": 0.37354356050491333, "learning_rate": 0.0002523529411764706, "loss": 0.3874, "step": 26805 }, { "epoch": 14.975418994413408, "grad_norm": 0.40424275398254395, "learning_rate": 0.0002523249299719888, "loss": 0.4555, "step": 26806 }, { "epoch": 14.975977653631285, "grad_norm": 0.5796392560005188, "learning_rate": 0.000252296918767507, "loss": 0.4723, "step": 26807 }, { "epoch": 14.976536312849163, "grad_norm": 0.6225695610046387, "learning_rate": 0.0002522689075630252, "loss": 0.4004, "step": 26808 }, { "epoch": 14.97709497206704, "grad_norm": 1.1140315532684326, "learning_rate": 0.0002522408963585434, "loss": 0.4631, "step": 26809 }, { "epoch": 14.977653631284916, "grad_norm": 0.3727976083755493, "learning_rate": 0.0002522128851540617, "loss": 0.3779, "step": 26810 }, { "epoch": 14.978212290502793, "grad_norm": 0.5018436908721924, "learning_rate": 0.00025218487394957983, "loss": 0.2911, "step": 26811 }, { "epoch": 14.978770949720671, "grad_norm": 1.0696431398391724, "learning_rate": 0.00025215686274509804, "loss": 0.384, "step": 26812 }, { "epoch": 14.979329608938547, "grad_norm": 0.5404345393180847, "learning_rate": 0.00025212885154061624, "loss": 0.425, "step": 26813 }, { "epoch": 14.979888268156424, "grad_norm": 0.8666000366210938, "learning_rate": 0.00025210084033613445, "loss": 0.589, "step": 26814 }, { "epoch": 14.980446927374302, "grad_norm": 0.4966289699077606, "learning_rate": 0.0002520728291316527, "loss": 0.4294, "step": 26815 }, { "epoch": 14.981005586592179, "grad_norm": 0.7560771107673645, "learning_rate": 0.00025204481792717086, "loss": 0.4236, "step": 26816 }, { "epoch": 14.981564245810056, "grad_norm": 0.6763277053833008, "learning_rate": 0.00025201680672268907, "loss": 0.4047, "step": 26817 }, { "epoch": 14.982122905027932, "grad_norm": 0.5242845416069031, "learning_rate": 0.0002519887955182073, "loss": 0.3426, "step": 26818 }, { "epoch": 14.98268156424581, "grad_norm": 0.6580713987350464, "learning_rate": 0.0002519607843137255, "loss": 0.4821, "step": 26819 }, { "epoch": 14.983240223463687, "grad_norm": 0.3427670896053314, "learning_rate": 0.00025193277310924374, "loss": 0.3155, "step": 26820 }, { "epoch": 14.983798882681564, "grad_norm": 0.5841524004936218, "learning_rate": 0.0002519047619047619, "loss": 0.4119, "step": 26821 }, { "epoch": 14.984357541899442, "grad_norm": 0.6364976167678833, "learning_rate": 0.0002518767507002801, "loss": 0.4236, "step": 26822 }, { "epoch": 14.984916201117318, "grad_norm": 0.3674076795578003, "learning_rate": 0.00025184873949579836, "loss": 0.4101, "step": 26823 }, { "epoch": 14.985474860335195, "grad_norm": 1.5305808782577515, "learning_rate": 0.0002518207282913165, "loss": 0.4783, "step": 26824 }, { "epoch": 14.986033519553073, "grad_norm": 0.5475207567214966, "learning_rate": 0.00025179271708683477, "loss": 0.3689, "step": 26825 }, { "epoch": 14.98659217877095, "grad_norm": 0.5333097577095032, "learning_rate": 0.000251764705882353, "loss": 0.4149, "step": 26826 }, { "epoch": 14.987150837988827, "grad_norm": 0.4091620147228241, "learning_rate": 0.0002517366946778711, "loss": 0.41, "step": 26827 }, { "epoch": 14.987709497206703, "grad_norm": 0.40512973070144653, "learning_rate": 0.0002517086834733894, "loss": 0.3662, "step": 26828 }, { "epoch": 14.988268156424581, "grad_norm": 0.6344616413116455, "learning_rate": 0.00025168067226890754, "loss": 0.3644, "step": 26829 }, { "epoch": 14.988826815642458, "grad_norm": 0.6171712875366211, "learning_rate": 0.0002516526610644258, "loss": 0.4622, "step": 26830 }, { "epoch": 14.989385474860335, "grad_norm": 0.48157447576522827, "learning_rate": 0.000251624649859944, "loss": 0.4254, "step": 26831 }, { "epoch": 14.989944134078213, "grad_norm": 1.4633336067199707, "learning_rate": 0.00025159663865546216, "loss": 0.406, "step": 26832 }, { "epoch": 14.99050279329609, "grad_norm": 0.4426704943180084, "learning_rate": 0.0002515686274509804, "loss": 0.3601, "step": 26833 }, { "epoch": 14.991061452513966, "grad_norm": 0.9304420351982117, "learning_rate": 0.0002515406162464986, "loss": 0.3857, "step": 26834 }, { "epoch": 14.991620111731844, "grad_norm": 0.47383683919906616, "learning_rate": 0.0002515126050420168, "loss": 0.4666, "step": 26835 }, { "epoch": 14.992178770949721, "grad_norm": 0.8339270353317261, "learning_rate": 0.00025148459383753503, "loss": 0.4184, "step": 26836 }, { "epoch": 14.992737430167598, "grad_norm": 4.5467329025268555, "learning_rate": 0.0002514565826330532, "loss": 0.4058, "step": 26837 }, { "epoch": 14.993296089385474, "grad_norm": 1.2365411520004272, "learning_rate": 0.00025142857142857145, "loss": 0.3475, "step": 26838 }, { "epoch": 14.993854748603352, "grad_norm": 0.7254947423934937, "learning_rate": 0.00025140056022408965, "loss": 0.4221, "step": 26839 }, { "epoch": 14.994413407821229, "grad_norm": 0.3923681676387787, "learning_rate": 0.0002513725490196078, "loss": 0.3795, "step": 26840 }, { "epoch": 14.994972067039106, "grad_norm": 1.1441880464553833, "learning_rate": 0.00025134453781512606, "loss": 0.3237, "step": 26841 }, { "epoch": 14.995530726256984, "grad_norm": 0.613080620765686, "learning_rate": 0.00025131652661064427, "loss": 0.3942, "step": 26842 }, { "epoch": 14.99608938547486, "grad_norm": 0.47400137782096863, "learning_rate": 0.0002512885154061625, "loss": 0.3835, "step": 26843 }, { "epoch": 14.996648044692737, "grad_norm": 0.8768868446350098, "learning_rate": 0.0002512605042016807, "loss": 0.593, "step": 26844 }, { "epoch": 14.997206703910614, "grad_norm": 0.6574596762657166, "learning_rate": 0.00025123249299719883, "loss": 0.5053, "step": 26845 }, { "epoch": 14.997765363128492, "grad_norm": 1.4516459703445435, "learning_rate": 0.0002512044817927171, "loss": 0.3991, "step": 26846 }, { "epoch": 14.998324022346369, "grad_norm": 0.4115583300590515, "learning_rate": 0.0002511764705882353, "loss": 0.4428, "step": 26847 }, { "epoch": 14.998882681564245, "grad_norm": 3.324688673019409, "learning_rate": 0.0002511484593837535, "loss": 0.3352, "step": 26848 }, { "epoch": 14.999441340782123, "grad_norm": 0.33019959926605225, "learning_rate": 0.0002511204481792717, "loss": 0.3327, "step": 26849 }, { "epoch": 15.0, "grad_norm": 0.45191580057144165, "learning_rate": 0.0002510924369747899, "loss": 0.3564, "step": 26850 }, { "epoch": 15.000558659217877, "grad_norm": 0.4028445780277252, "learning_rate": 0.0002510644257703081, "loss": 0.4243, "step": 26851 }, { "epoch": 15.001117318435755, "grad_norm": 0.6346094012260437, "learning_rate": 0.00025103641456582633, "loss": 0.5177, "step": 26852 }, { "epoch": 15.001675977653631, "grad_norm": 0.36450257897377014, "learning_rate": 0.00025100840336134454, "loss": 0.3586, "step": 26853 }, { "epoch": 15.002234636871508, "grad_norm": 0.3948482573032379, "learning_rate": 0.00025098039215686274, "loss": 0.4466, "step": 26854 }, { "epoch": 15.002793296089385, "grad_norm": 0.6200559139251709, "learning_rate": 0.00025095238095238095, "loss": 0.428, "step": 26855 }, { "epoch": 15.003351955307263, "grad_norm": 0.372334361076355, "learning_rate": 0.00025092436974789915, "loss": 0.3406, "step": 26856 }, { "epoch": 15.00391061452514, "grad_norm": 0.5261492133140564, "learning_rate": 0.00025089635854341736, "loss": 0.5081, "step": 26857 }, { "epoch": 15.004469273743016, "grad_norm": 0.4601307809352875, "learning_rate": 0.0002508683473389356, "loss": 0.4763, "step": 26858 }, { "epoch": 15.005027932960894, "grad_norm": 0.877083957195282, "learning_rate": 0.00025084033613445377, "loss": 0.4749, "step": 26859 }, { "epoch": 15.005586592178771, "grad_norm": 0.3876457214355469, "learning_rate": 0.000250812324929972, "loss": 0.3666, "step": 26860 }, { "epoch": 15.006145251396648, "grad_norm": 0.4225273132324219, "learning_rate": 0.0002507843137254902, "loss": 0.3234, "step": 26861 }, { "epoch": 15.006703910614526, "grad_norm": 0.4907268285751343, "learning_rate": 0.0002507563025210084, "loss": 0.4391, "step": 26862 }, { "epoch": 15.007262569832402, "grad_norm": 0.5842012166976929, "learning_rate": 0.00025072829131652665, "loss": 0.3832, "step": 26863 }, { "epoch": 15.007821229050279, "grad_norm": 1.8074244260787964, "learning_rate": 0.0002507002801120448, "loss": 0.3827, "step": 26864 }, { "epoch": 15.008379888268156, "grad_norm": 0.5633991360664368, "learning_rate": 0.000250672268907563, "loss": 0.3771, "step": 26865 }, { "epoch": 15.008938547486034, "grad_norm": 0.47548773884773254, "learning_rate": 0.00025064425770308127, "loss": 0.3853, "step": 26866 }, { "epoch": 15.00949720670391, "grad_norm": 0.3436129689216614, "learning_rate": 0.0002506162464985994, "loss": 0.4163, "step": 26867 }, { "epoch": 15.010055865921787, "grad_norm": 0.37591785192489624, "learning_rate": 0.0002505882352941177, "loss": 0.3768, "step": 26868 }, { "epoch": 15.010614525139665, "grad_norm": 0.6241902709007263, "learning_rate": 0.00025056022408963583, "loss": 0.4407, "step": 26869 }, { "epoch": 15.011173184357542, "grad_norm": 0.4844948947429657, "learning_rate": 0.00025053221288515404, "loss": 0.3645, "step": 26870 }, { "epoch": 15.011731843575419, "grad_norm": 0.44438520073890686, "learning_rate": 0.0002505042016806723, "loss": 0.4263, "step": 26871 }, { "epoch": 15.012290502793297, "grad_norm": 0.32489725947380066, "learning_rate": 0.00025047619047619045, "loss": 0.3289, "step": 26872 }, { "epoch": 15.012849162011173, "grad_norm": 0.4891221523284912, "learning_rate": 0.0002504481792717087, "loss": 0.4708, "step": 26873 }, { "epoch": 15.01340782122905, "grad_norm": 0.45323818922042847, "learning_rate": 0.0002504201680672269, "loss": 0.3345, "step": 26874 }, { "epoch": 15.013966480446927, "grad_norm": 0.43679043650627136, "learning_rate": 0.00025039215686274507, "loss": 0.3651, "step": 26875 }, { "epoch": 15.014525139664805, "grad_norm": 0.5581892728805542, "learning_rate": 0.00025036414565826333, "loss": 0.3524, "step": 26876 }, { "epoch": 15.015083798882682, "grad_norm": 0.4508143663406372, "learning_rate": 0.0002503361344537815, "loss": 0.4639, "step": 26877 }, { "epoch": 15.015642458100558, "grad_norm": 0.7554802298545837, "learning_rate": 0.00025030812324929974, "loss": 0.3419, "step": 26878 }, { "epoch": 15.016201117318436, "grad_norm": 1.1287566423416138, "learning_rate": 0.00025028011204481795, "loss": 0.3986, "step": 26879 }, { "epoch": 15.016759776536313, "grad_norm": 0.4794694781303406, "learning_rate": 0.0002502521008403361, "loss": 0.4132, "step": 26880 }, { "epoch": 15.01731843575419, "grad_norm": 0.4565139710903168, "learning_rate": 0.00025022408963585436, "loss": 0.4118, "step": 26881 }, { "epoch": 15.017877094972068, "grad_norm": 0.44634249806404114, "learning_rate": 0.00025019607843137256, "loss": 0.4228, "step": 26882 }, { "epoch": 15.018435754189944, "grad_norm": 0.5857980847358704, "learning_rate": 0.00025016806722689077, "loss": 0.3714, "step": 26883 }, { "epoch": 15.018994413407821, "grad_norm": 0.4245086908340454, "learning_rate": 0.000250140056022409, "loss": 0.3279, "step": 26884 }, { "epoch": 15.019553072625698, "grad_norm": 0.44360044598579407, "learning_rate": 0.0002501120448179271, "loss": 0.4227, "step": 26885 }, { "epoch": 15.020111731843576, "grad_norm": 1.079078197479248, "learning_rate": 0.0002500840336134454, "loss": 0.434, "step": 26886 }, { "epoch": 15.020670391061453, "grad_norm": 0.5442601442337036, "learning_rate": 0.0002500560224089636, "loss": 0.5015, "step": 26887 }, { "epoch": 15.021229050279329, "grad_norm": 0.7010372877120972, "learning_rate": 0.0002500280112044818, "loss": 0.4554, "step": 26888 }, { "epoch": 15.021787709497207, "grad_norm": 0.36108502745628357, "learning_rate": 0.00025, "loss": 0.352, "step": 26889 }, { "epoch": 15.022346368715084, "grad_norm": 2.8384745121002197, "learning_rate": 0.0002499719887955182, "loss": 0.5172, "step": 26890 }, { "epoch": 15.02290502793296, "grad_norm": 0.5143072605133057, "learning_rate": 0.0002499439775910364, "loss": 0.3654, "step": 26891 }, { "epoch": 15.023463687150837, "grad_norm": 0.46261823177337646, "learning_rate": 0.0002499159663865546, "loss": 0.3173, "step": 26892 }, { "epoch": 15.024022346368715, "grad_norm": 0.4659637212753296, "learning_rate": 0.00024988795518207283, "loss": 0.3228, "step": 26893 }, { "epoch": 15.024581005586592, "grad_norm": 0.4876371920108795, "learning_rate": 0.00024985994397759104, "loss": 0.4118, "step": 26894 }, { "epoch": 15.025139664804469, "grad_norm": 1.7116811275482178, "learning_rate": 0.00024983193277310924, "loss": 0.4531, "step": 26895 }, { "epoch": 15.025698324022347, "grad_norm": 10.244782447814941, "learning_rate": 0.00024980392156862745, "loss": 0.4112, "step": 26896 }, { "epoch": 15.026256983240224, "grad_norm": 0.5708367824554443, "learning_rate": 0.00024977591036414565, "loss": 0.5848, "step": 26897 }, { "epoch": 15.0268156424581, "grad_norm": 0.37682056427001953, "learning_rate": 0.00024974789915966386, "loss": 0.3565, "step": 26898 }, { "epoch": 15.027374301675978, "grad_norm": 0.43208223581314087, "learning_rate": 0.00024971988795518207, "loss": 0.4851, "step": 26899 }, { "epoch": 15.027932960893855, "grad_norm": 0.5319404602050781, "learning_rate": 0.00024969187675070027, "loss": 0.5145, "step": 26900 }, { "epoch": 15.028491620111732, "grad_norm": 0.48241758346557617, "learning_rate": 0.00024966386554621853, "loss": 0.3877, "step": 26901 }, { "epoch": 15.029050279329608, "grad_norm": 1.4814679622650146, "learning_rate": 0.0002496358543417367, "loss": 0.2605, "step": 26902 }, { "epoch": 15.029608938547486, "grad_norm": 0.37490302324295044, "learning_rate": 0.0002496078431372549, "loss": 0.3707, "step": 26903 }, { "epoch": 15.030167597765363, "grad_norm": 0.8533223867416382, "learning_rate": 0.0002495798319327731, "loss": 0.3715, "step": 26904 }, { "epoch": 15.03072625698324, "grad_norm": 0.45826980471611023, "learning_rate": 0.00024955182072829136, "loss": 0.3716, "step": 26905 }, { "epoch": 15.031284916201118, "grad_norm": 0.6275171041488647, "learning_rate": 0.00024952380952380956, "loss": 0.3928, "step": 26906 }, { "epoch": 15.031843575418995, "grad_norm": 0.8012722134590149, "learning_rate": 0.0002494957983193277, "loss": 0.3866, "step": 26907 }, { "epoch": 15.032402234636871, "grad_norm": 0.5740479230880737, "learning_rate": 0.0002494677871148459, "loss": 0.4378, "step": 26908 }, { "epoch": 15.03296089385475, "grad_norm": 0.5773026347160339, "learning_rate": 0.0002494397759103642, "loss": 0.3659, "step": 26909 }, { "epoch": 15.033519553072626, "grad_norm": 0.49632546305656433, "learning_rate": 0.0002494117647058824, "loss": 0.5214, "step": 26910 }, { "epoch": 15.034078212290503, "grad_norm": 0.5711356401443481, "learning_rate": 0.0002493837535014006, "loss": 0.4924, "step": 26911 }, { "epoch": 15.03463687150838, "grad_norm": 0.8759272694587708, "learning_rate": 0.00024935574229691874, "loss": 0.3887, "step": 26912 }, { "epoch": 15.035195530726257, "grad_norm": 0.649247944355011, "learning_rate": 0.000249327731092437, "loss": 0.2928, "step": 26913 }, { "epoch": 15.035754189944134, "grad_norm": 1.206963300704956, "learning_rate": 0.0002492997198879552, "loss": 0.4736, "step": 26914 }, { "epoch": 15.03631284916201, "grad_norm": 0.9956198930740356, "learning_rate": 0.0002492717086834734, "loss": 0.4638, "step": 26915 }, { "epoch": 15.036871508379889, "grad_norm": 0.5089482665061951, "learning_rate": 0.0002492436974789916, "loss": 0.4372, "step": 26916 }, { "epoch": 15.037430167597766, "grad_norm": 0.4803427755832672, "learning_rate": 0.00024921568627450983, "loss": 0.6179, "step": 26917 }, { "epoch": 15.037988826815642, "grad_norm": 0.4337618947029114, "learning_rate": 0.00024918767507002803, "loss": 0.4552, "step": 26918 }, { "epoch": 15.03854748603352, "grad_norm": 0.3171527087688446, "learning_rate": 0.00024915966386554624, "loss": 0.2976, "step": 26919 }, { "epoch": 15.039106145251397, "grad_norm": 0.443774938583374, "learning_rate": 0.00024913165266106445, "loss": 0.5081, "step": 26920 }, { "epoch": 15.039664804469274, "grad_norm": 0.431959331035614, "learning_rate": 0.00024910364145658265, "loss": 0.3502, "step": 26921 }, { "epoch": 15.04022346368715, "grad_norm": 2.338465452194214, "learning_rate": 0.00024907563025210086, "loss": 0.4069, "step": 26922 }, { "epoch": 15.040782122905028, "grad_norm": 0.37305769324302673, "learning_rate": 0.00024904761904761906, "loss": 0.3302, "step": 26923 }, { "epoch": 15.041340782122905, "grad_norm": 0.4058387875556946, "learning_rate": 0.00024901960784313727, "loss": 0.3688, "step": 26924 }, { "epoch": 15.041899441340782, "grad_norm": 0.3835179805755615, "learning_rate": 0.0002489915966386555, "loss": 0.3507, "step": 26925 }, { "epoch": 15.04245810055866, "grad_norm": 0.764991283416748, "learning_rate": 0.0002489635854341737, "loss": 0.4481, "step": 26926 }, { "epoch": 15.043016759776537, "grad_norm": 0.4784719944000244, "learning_rate": 0.0002489355742296919, "loss": 0.4478, "step": 26927 }, { "epoch": 15.043575418994413, "grad_norm": 0.3947021961212158, "learning_rate": 0.0002489075630252101, "loss": 0.3699, "step": 26928 }, { "epoch": 15.04413407821229, "grad_norm": 0.40625664591789246, "learning_rate": 0.0002488795518207283, "loss": 0.3561, "step": 26929 }, { "epoch": 15.044692737430168, "grad_norm": 0.7485128045082092, "learning_rate": 0.0002488515406162465, "loss": 0.4737, "step": 26930 }, { "epoch": 15.045251396648045, "grad_norm": 0.5016077756881714, "learning_rate": 0.0002488235294117647, "loss": 0.4761, "step": 26931 }, { "epoch": 15.045810055865921, "grad_norm": 0.40478381514549255, "learning_rate": 0.0002487955182072829, "loss": 0.4094, "step": 26932 }, { "epoch": 15.0463687150838, "grad_norm": 0.5221636295318604, "learning_rate": 0.0002487675070028011, "loss": 0.4465, "step": 26933 }, { "epoch": 15.046927374301676, "grad_norm": 0.5584438443183899, "learning_rate": 0.00024873949579831933, "loss": 0.3425, "step": 26934 }, { "epoch": 15.047486033519553, "grad_norm": 0.3996504545211792, "learning_rate": 0.00024871148459383754, "loss": 0.454, "step": 26935 }, { "epoch": 15.048044692737431, "grad_norm": 0.5384317636489868, "learning_rate": 0.00024868347338935574, "loss": 0.5427, "step": 26936 }, { "epoch": 15.048603351955308, "grad_norm": 1.0164932012557983, "learning_rate": 0.00024865546218487395, "loss": 0.4771, "step": 26937 }, { "epoch": 15.049162011173184, "grad_norm": 0.7885221838951111, "learning_rate": 0.00024862745098039215, "loss": 0.3507, "step": 26938 }, { "epoch": 15.04972067039106, "grad_norm": 0.5892857313156128, "learning_rate": 0.00024859943977591036, "loss": 0.4302, "step": 26939 }, { "epoch": 15.050279329608939, "grad_norm": 0.40044379234313965, "learning_rate": 0.00024857142857142857, "loss": 0.4147, "step": 26940 }, { "epoch": 15.050837988826816, "grad_norm": 0.4539005756378174, "learning_rate": 0.00024854341736694677, "loss": 0.3913, "step": 26941 }, { "epoch": 15.051396648044692, "grad_norm": 0.6547682881355286, "learning_rate": 0.000248515406162465, "loss": 0.4286, "step": 26942 }, { "epoch": 15.05195530726257, "grad_norm": 0.40776658058166504, "learning_rate": 0.0002484873949579832, "loss": 0.4446, "step": 26943 }, { "epoch": 15.052513966480447, "grad_norm": 8.208586692810059, "learning_rate": 0.0002484593837535014, "loss": 0.3818, "step": 26944 }, { "epoch": 15.053072625698324, "grad_norm": 0.5905068516731262, "learning_rate": 0.00024843137254901965, "loss": 0.4897, "step": 26945 }, { "epoch": 15.053631284916202, "grad_norm": 0.5988060235977173, "learning_rate": 0.0002484033613445378, "loss": 0.3098, "step": 26946 }, { "epoch": 15.054189944134079, "grad_norm": 0.5745384693145752, "learning_rate": 0.000248375350140056, "loss": 0.5645, "step": 26947 }, { "epoch": 15.054748603351955, "grad_norm": 1.0670571327209473, "learning_rate": 0.0002483473389355742, "loss": 0.455, "step": 26948 }, { "epoch": 15.055307262569832, "grad_norm": 0.544528603553772, "learning_rate": 0.0002483193277310925, "loss": 0.399, "step": 26949 }, { "epoch": 15.05586592178771, "grad_norm": 0.3786354959011078, "learning_rate": 0.0002482913165266107, "loss": 0.3785, "step": 26950 }, { "epoch": 15.056424581005587, "grad_norm": 0.6824361085891724, "learning_rate": 0.00024826330532212883, "loss": 0.4394, "step": 26951 }, { "epoch": 15.056983240223463, "grad_norm": 0.3922429084777832, "learning_rate": 0.00024823529411764704, "loss": 0.4059, "step": 26952 }, { "epoch": 15.057541899441341, "grad_norm": 0.4455968737602234, "learning_rate": 0.0002482072829131653, "loss": 0.3817, "step": 26953 }, { "epoch": 15.058100558659218, "grad_norm": 0.467678964138031, "learning_rate": 0.0002481792717086835, "loss": 0.3993, "step": 26954 }, { "epoch": 15.058659217877095, "grad_norm": 0.3912041187286377, "learning_rate": 0.0002481512605042017, "loss": 0.3281, "step": 26955 }, { "epoch": 15.059217877094973, "grad_norm": 0.828059196472168, "learning_rate": 0.00024812324929971986, "loss": 0.3237, "step": 26956 }, { "epoch": 15.05977653631285, "grad_norm": 0.37380895018577576, "learning_rate": 0.0002480952380952381, "loss": 0.3674, "step": 26957 }, { "epoch": 15.060335195530726, "grad_norm": 0.4876594841480255, "learning_rate": 0.00024806722689075633, "loss": 0.369, "step": 26958 }, { "epoch": 15.060893854748603, "grad_norm": 0.2946512997150421, "learning_rate": 0.00024803921568627453, "loss": 0.2426, "step": 26959 }, { "epoch": 15.061452513966481, "grad_norm": 0.4818108081817627, "learning_rate": 0.00024801120448179274, "loss": 0.367, "step": 26960 }, { "epoch": 15.062011173184358, "grad_norm": 4.84298849105835, "learning_rate": 0.00024798319327731095, "loss": 0.3869, "step": 26961 }, { "epoch": 15.062569832402234, "grad_norm": 0.45844560861587524, "learning_rate": 0.00024795518207282915, "loss": 0.3902, "step": 26962 }, { "epoch": 15.063128491620112, "grad_norm": 0.3915700912475586, "learning_rate": 0.00024792717086834736, "loss": 0.4014, "step": 26963 }, { "epoch": 15.063687150837989, "grad_norm": 0.8484997153282166, "learning_rate": 0.00024789915966386556, "loss": 0.3755, "step": 26964 }, { "epoch": 15.064245810055866, "grad_norm": 0.397550493478775, "learning_rate": 0.00024787114845938377, "loss": 0.4255, "step": 26965 }, { "epoch": 15.064804469273742, "grad_norm": 0.36226579546928406, "learning_rate": 0.000247843137254902, "loss": 0.3431, "step": 26966 }, { "epoch": 15.06536312849162, "grad_norm": 0.586125910282135, "learning_rate": 0.0002478151260504202, "loss": 0.4467, "step": 26967 }, { "epoch": 15.065921787709497, "grad_norm": 0.42878472805023193, "learning_rate": 0.0002477871148459384, "loss": 0.3871, "step": 26968 }, { "epoch": 15.066480446927374, "grad_norm": 1.2628026008605957, "learning_rate": 0.0002477591036414566, "loss": 0.4338, "step": 26969 }, { "epoch": 15.067039106145252, "grad_norm": 0.5514110326766968, "learning_rate": 0.0002477310924369748, "loss": 0.4237, "step": 26970 }, { "epoch": 15.067597765363129, "grad_norm": 0.3650316298007965, "learning_rate": 0.000247703081232493, "loss": 0.3568, "step": 26971 }, { "epoch": 15.068156424581005, "grad_norm": 0.4012395143508911, "learning_rate": 0.0002476750700280112, "loss": 0.3276, "step": 26972 }, { "epoch": 15.068715083798883, "grad_norm": 0.4632839560508728, "learning_rate": 0.0002476470588235294, "loss": 0.4044, "step": 26973 }, { "epoch": 15.06927374301676, "grad_norm": 0.9237446188926697, "learning_rate": 0.0002476190476190476, "loss": 0.4136, "step": 26974 }, { "epoch": 15.069832402234637, "grad_norm": 0.4552759528160095, "learning_rate": 0.00024759103641456583, "loss": 0.4752, "step": 26975 }, { "epoch": 15.070391061452513, "grad_norm": 0.48059314489364624, "learning_rate": 0.00024756302521008404, "loss": 0.3667, "step": 26976 }, { "epoch": 15.070949720670392, "grad_norm": 0.4038139283657074, "learning_rate": 0.00024753501400560224, "loss": 0.3644, "step": 26977 }, { "epoch": 15.071508379888268, "grad_norm": 0.4506329894065857, "learning_rate": 0.00024750700280112045, "loss": 0.5779, "step": 26978 }, { "epoch": 15.072067039106145, "grad_norm": 0.3530978560447693, "learning_rate": 0.00024747899159663865, "loss": 0.4101, "step": 26979 }, { "epoch": 15.072625698324023, "grad_norm": 1.1197973489761353, "learning_rate": 0.00024745098039215686, "loss": 0.4629, "step": 26980 }, { "epoch": 15.0731843575419, "grad_norm": 1.083927869796753, "learning_rate": 0.00024742296918767507, "loss": 0.5189, "step": 26981 }, { "epoch": 15.073743016759776, "grad_norm": 0.579332709312439, "learning_rate": 0.00024739495798319327, "loss": 0.3133, "step": 26982 }, { "epoch": 15.074301675977654, "grad_norm": 0.6353923678398132, "learning_rate": 0.0002473669467787115, "loss": 0.4556, "step": 26983 }, { "epoch": 15.074860335195531, "grad_norm": 0.6581987142562866, "learning_rate": 0.0002473389355742297, "loss": 0.5038, "step": 26984 }, { "epoch": 15.075418994413408, "grad_norm": 0.682462751865387, "learning_rate": 0.0002473109243697479, "loss": 0.4109, "step": 26985 }, { "epoch": 15.075977653631284, "grad_norm": 1.4290266036987305, "learning_rate": 0.0002472829131652661, "loss": 0.3351, "step": 26986 }, { "epoch": 15.076536312849163, "grad_norm": 1.4346846342086792, "learning_rate": 0.0002472549019607843, "loss": 0.5061, "step": 26987 }, { "epoch": 15.077094972067039, "grad_norm": 0.32376420497894287, "learning_rate": 0.0002472268907563025, "loss": 0.3993, "step": 26988 }, { "epoch": 15.077653631284916, "grad_norm": 0.7088323831558228, "learning_rate": 0.00024719887955182077, "loss": 0.3565, "step": 26989 }, { "epoch": 15.078212290502794, "grad_norm": 0.5038853287696838, "learning_rate": 0.0002471708683473389, "loss": 0.425, "step": 26990 }, { "epoch": 15.07877094972067, "grad_norm": 1.1923103332519531, "learning_rate": 0.0002471428571428571, "loss": 0.485, "step": 26991 }, { "epoch": 15.079329608938547, "grad_norm": 0.3794712424278259, "learning_rate": 0.00024711484593837533, "loss": 0.4284, "step": 26992 }, { "epoch": 15.079888268156424, "grad_norm": 0.36067497730255127, "learning_rate": 0.0002470868347338936, "loss": 0.2901, "step": 26993 }, { "epoch": 15.080446927374302, "grad_norm": 0.4367261230945587, "learning_rate": 0.0002470588235294118, "loss": 0.4183, "step": 26994 }, { "epoch": 15.081005586592179, "grad_norm": 0.3977581262588501, "learning_rate": 0.00024703081232492995, "loss": 0.3706, "step": 26995 }, { "epoch": 15.081564245810055, "grad_norm": 0.4364907145500183, "learning_rate": 0.00024700280112044816, "loss": 0.3747, "step": 26996 }, { "epoch": 15.082122905027934, "grad_norm": 0.5650056004524231, "learning_rate": 0.0002469747899159664, "loss": 0.4664, "step": 26997 }, { "epoch": 15.08268156424581, "grad_norm": 0.4446296989917755, "learning_rate": 0.0002469467787114846, "loss": 0.584, "step": 26998 }, { "epoch": 15.083240223463687, "grad_norm": 0.45611563324928284, "learning_rate": 0.00024691876750700283, "loss": 0.501, "step": 26999 }, { "epoch": 15.083798882681565, "grad_norm": 0.4472367465496063, "learning_rate": 0.000246890756302521, "loss": 0.473, "step": 27000 }, { "epoch": 15.083798882681565, "eval_cer": 0.0859709993126248, "eval_loss": 0.32471296191215515, "eval_runtime": 55.5103, "eval_samples_per_second": 81.751, "eval_steps_per_second": 5.116, "eval_wer": 0.3408843328398648, "step": 27000 }, { "epoch": 15.084357541899442, "grad_norm": 0.46555376052856445, "learning_rate": 0.00024686274509803924, "loss": 0.5431, "step": 27001 }, { "epoch": 15.084916201117318, "grad_norm": 0.9119547605514526, "learning_rate": 0.00024683473389355745, "loss": 0.4241, "step": 27002 }, { "epoch": 15.085474860335195, "grad_norm": 0.42160847783088684, "learning_rate": 0.00024680672268907565, "loss": 0.3458, "step": 27003 }, { "epoch": 15.086033519553073, "grad_norm": 0.486910343170166, "learning_rate": 0.00024677871148459386, "loss": 0.4357, "step": 27004 }, { "epoch": 15.08659217877095, "grad_norm": 0.3392389714717865, "learning_rate": 0.00024675070028011206, "loss": 0.2862, "step": 27005 }, { "epoch": 15.087150837988826, "grad_norm": 0.9551404714584351, "learning_rate": 0.00024672268907563027, "loss": 0.306, "step": 27006 }, { "epoch": 15.087709497206705, "grad_norm": 0.6245831847190857, "learning_rate": 0.0002466946778711485, "loss": 0.4347, "step": 27007 }, { "epoch": 15.088268156424581, "grad_norm": 0.46229642629623413, "learning_rate": 0.0002466666666666667, "loss": 0.4521, "step": 27008 }, { "epoch": 15.088826815642458, "grad_norm": 0.3591441512107849, "learning_rate": 0.0002466386554621849, "loss": 0.4022, "step": 27009 }, { "epoch": 15.089385474860336, "grad_norm": 0.5125793814659119, "learning_rate": 0.0002466106442577031, "loss": 0.3479, "step": 27010 }, { "epoch": 15.089944134078213, "grad_norm": 0.5043748617172241, "learning_rate": 0.0002465826330532213, "loss": 0.4097, "step": 27011 }, { "epoch": 15.09050279329609, "grad_norm": 0.346449613571167, "learning_rate": 0.0002465546218487395, "loss": 0.3676, "step": 27012 }, { "epoch": 15.091061452513966, "grad_norm": 0.5347242951393127, "learning_rate": 0.0002465266106442577, "loss": 0.5437, "step": 27013 }, { "epoch": 15.091620111731844, "grad_norm": 0.5479172468185425, "learning_rate": 0.0002464985994397759, "loss": 0.4798, "step": 27014 }, { "epoch": 15.09217877094972, "grad_norm": 2.947571277618408, "learning_rate": 0.0002464705882352941, "loss": 0.3597, "step": 27015 }, { "epoch": 15.092737430167597, "grad_norm": 0.442832350730896, "learning_rate": 0.00024644257703081233, "loss": 0.3518, "step": 27016 }, { "epoch": 15.093296089385476, "grad_norm": 0.3627994656562805, "learning_rate": 0.00024641456582633054, "loss": 0.3948, "step": 27017 }, { "epoch": 15.093854748603352, "grad_norm": 0.7864519953727722, "learning_rate": 0.00024638655462184874, "loss": 0.4945, "step": 27018 }, { "epoch": 15.094413407821229, "grad_norm": 0.4636199474334717, "learning_rate": 0.00024635854341736695, "loss": 0.359, "step": 27019 }, { "epoch": 15.094972067039107, "grad_norm": 0.9081053137779236, "learning_rate": 0.00024633053221288515, "loss": 0.3362, "step": 27020 }, { "epoch": 15.095530726256984, "grad_norm": 0.44295617938041687, "learning_rate": 0.00024630252100840336, "loss": 0.4572, "step": 27021 }, { "epoch": 15.09608938547486, "grad_norm": 0.48747849464416504, "learning_rate": 0.00024627450980392157, "loss": 0.5052, "step": 27022 }, { "epoch": 15.096648044692737, "grad_norm": 8.42707347869873, "learning_rate": 0.00024624649859943977, "loss": 0.371, "step": 27023 }, { "epoch": 15.097206703910615, "grad_norm": 0.6480430364608765, "learning_rate": 0.000246218487394958, "loss": 0.4103, "step": 27024 }, { "epoch": 15.097765363128492, "grad_norm": 0.3670368790626526, "learning_rate": 0.0002461904761904762, "loss": 0.4084, "step": 27025 }, { "epoch": 15.098324022346368, "grad_norm": 0.6102190017700195, "learning_rate": 0.0002461624649859944, "loss": 0.3836, "step": 27026 }, { "epoch": 15.098882681564247, "grad_norm": 0.46493402123451233, "learning_rate": 0.0002461344537815126, "loss": 0.4032, "step": 27027 }, { "epoch": 15.099441340782123, "grad_norm": 3.027873992919922, "learning_rate": 0.0002461064425770308, "loss": 0.3891, "step": 27028 }, { "epoch": 15.1, "grad_norm": 0.4689657390117645, "learning_rate": 0.000246078431372549, "loss": 0.3921, "step": 27029 }, { "epoch": 15.100558659217878, "grad_norm": 0.34698885679244995, "learning_rate": 0.0002460504201680672, "loss": 0.3879, "step": 27030 }, { "epoch": 15.101117318435755, "grad_norm": 0.371849924325943, "learning_rate": 0.0002460224089635854, "loss": 0.3345, "step": 27031 }, { "epoch": 15.101675977653631, "grad_norm": 0.6494749784469604, "learning_rate": 0.0002459943977591036, "loss": 0.3818, "step": 27032 }, { "epoch": 15.102234636871508, "grad_norm": 0.4158179759979248, "learning_rate": 0.0002459663865546219, "loss": 0.3808, "step": 27033 }, { "epoch": 15.102793296089386, "grad_norm": 0.45474332571029663, "learning_rate": 0.00024593837535014004, "loss": 0.4842, "step": 27034 }, { "epoch": 15.103351955307263, "grad_norm": 0.5171681046485901, "learning_rate": 0.00024591036414565824, "loss": 0.3572, "step": 27035 }, { "epoch": 15.10391061452514, "grad_norm": 0.4828847050666809, "learning_rate": 0.00024588235294117645, "loss": 0.4643, "step": 27036 }, { "epoch": 15.104469273743018, "grad_norm": 0.45485252141952515, "learning_rate": 0.0002458543417366947, "loss": 0.5088, "step": 27037 }, { "epoch": 15.105027932960894, "grad_norm": 0.42376044392585754, "learning_rate": 0.0002458263305322129, "loss": 0.5013, "step": 27038 }, { "epoch": 15.10558659217877, "grad_norm": 0.5620428323745728, "learning_rate": 0.00024579831932773107, "loss": 0.2976, "step": 27039 }, { "epoch": 15.106145251396647, "grad_norm": 0.721748411655426, "learning_rate": 0.0002457703081232493, "loss": 0.3624, "step": 27040 }, { "epoch": 15.106703910614526, "grad_norm": 0.34397101402282715, "learning_rate": 0.00024574229691876753, "loss": 0.3228, "step": 27041 }, { "epoch": 15.107262569832402, "grad_norm": 3.018893241882324, "learning_rate": 0.00024571428571428574, "loss": 0.5769, "step": 27042 }, { "epoch": 15.107821229050279, "grad_norm": 0.6660287976264954, "learning_rate": 0.00024568627450980395, "loss": 0.435, "step": 27043 }, { "epoch": 15.108379888268157, "grad_norm": 0.43279436230659485, "learning_rate": 0.0002456582633053221, "loss": 0.5081, "step": 27044 }, { "epoch": 15.108938547486034, "grad_norm": 1.3323110342025757, "learning_rate": 0.00024563025210084036, "loss": 0.4175, "step": 27045 }, { "epoch": 15.10949720670391, "grad_norm": 0.6359979510307312, "learning_rate": 0.00024560224089635856, "loss": 0.3153, "step": 27046 }, { "epoch": 15.110055865921789, "grad_norm": 0.4616943299770355, "learning_rate": 0.00024557422969187677, "loss": 0.4214, "step": 27047 }, { "epoch": 15.110614525139665, "grad_norm": 0.5856008529663086, "learning_rate": 0.000245546218487395, "loss": 0.3628, "step": 27048 }, { "epoch": 15.111173184357542, "grad_norm": 0.40157410502433777, "learning_rate": 0.0002455182072829132, "loss": 0.2982, "step": 27049 }, { "epoch": 15.111731843575418, "grad_norm": 0.6337383389472961, "learning_rate": 0.0002454901960784314, "loss": 0.3596, "step": 27050 }, { "epoch": 15.112290502793297, "grad_norm": 0.45262759923934937, "learning_rate": 0.0002454621848739496, "loss": 0.3678, "step": 27051 }, { "epoch": 15.112849162011173, "grad_norm": 0.825549840927124, "learning_rate": 0.0002454341736694678, "loss": 0.3288, "step": 27052 }, { "epoch": 15.11340782122905, "grad_norm": 0.39222854375839233, "learning_rate": 0.000245406162464986, "loss": 0.3562, "step": 27053 }, { "epoch": 15.113966480446928, "grad_norm": 0.41247180104255676, "learning_rate": 0.0002453781512605042, "loss": 0.3624, "step": 27054 }, { "epoch": 15.114525139664805, "grad_norm": 0.7672168016433716, "learning_rate": 0.0002453501400560224, "loss": 0.501, "step": 27055 }, { "epoch": 15.115083798882681, "grad_norm": 0.41790902614593506, "learning_rate": 0.0002453221288515406, "loss": 0.3563, "step": 27056 }, { "epoch": 15.11564245810056, "grad_norm": 0.4824005365371704, "learning_rate": 0.00024529411764705883, "loss": 0.388, "step": 27057 }, { "epoch": 15.116201117318436, "grad_norm": 0.49185648560523987, "learning_rate": 0.00024526610644257704, "loss": 0.5412, "step": 27058 }, { "epoch": 15.116759776536313, "grad_norm": 0.5381485819816589, "learning_rate": 0.00024523809523809524, "loss": 0.4175, "step": 27059 }, { "epoch": 15.11731843575419, "grad_norm": 0.41351935267448425, "learning_rate": 0.00024521008403361345, "loss": 0.4051, "step": 27060 }, { "epoch": 15.117877094972068, "grad_norm": 0.5445733666419983, "learning_rate": 0.00024518207282913165, "loss": 0.4901, "step": 27061 }, { "epoch": 15.118435754189944, "grad_norm": 0.5667095184326172, "learning_rate": 0.00024515406162464986, "loss": 0.4413, "step": 27062 }, { "epoch": 15.11899441340782, "grad_norm": 0.44915780425071716, "learning_rate": 0.00024512605042016807, "loss": 0.492, "step": 27063 }, { "epoch": 15.119553072625699, "grad_norm": 1.5476468801498413, "learning_rate": 0.00024509803921568627, "loss": 0.5613, "step": 27064 }, { "epoch": 15.120111731843576, "grad_norm": 0.32369574904441833, "learning_rate": 0.0002450700280112045, "loss": 0.3763, "step": 27065 }, { "epoch": 15.120670391061452, "grad_norm": 0.5431506037712097, "learning_rate": 0.0002450420168067227, "loss": 0.4661, "step": 27066 }, { "epoch": 15.121229050279329, "grad_norm": 0.4893670976161957, "learning_rate": 0.0002450140056022409, "loss": 0.3427, "step": 27067 }, { "epoch": 15.121787709497207, "grad_norm": 1.858221411705017, "learning_rate": 0.0002449859943977591, "loss": 0.3877, "step": 27068 }, { "epoch": 15.122346368715084, "grad_norm": 0.31977301836013794, "learning_rate": 0.0002449579831932773, "loss": 0.3144, "step": 27069 }, { "epoch": 15.12290502793296, "grad_norm": 1.4009097814559937, "learning_rate": 0.0002449299719887955, "loss": 0.3541, "step": 27070 }, { "epoch": 15.123463687150839, "grad_norm": 0.44302475452423096, "learning_rate": 0.0002449019607843137, "loss": 0.3841, "step": 27071 }, { "epoch": 15.124022346368715, "grad_norm": 0.3800410330295563, "learning_rate": 0.0002448739495798319, "loss": 0.3564, "step": 27072 }, { "epoch": 15.124581005586592, "grad_norm": 0.7728914022445679, "learning_rate": 0.0002448459383753502, "loss": 0.3359, "step": 27073 }, { "epoch": 15.12513966480447, "grad_norm": 0.4465992748737335, "learning_rate": 0.00024481792717086833, "loss": 0.4417, "step": 27074 }, { "epoch": 15.125698324022347, "grad_norm": 0.4095946252346039, "learning_rate": 0.00024478991596638654, "loss": 0.3654, "step": 27075 }, { "epoch": 15.126256983240223, "grad_norm": 0.5216981768608093, "learning_rate": 0.00024476190476190474, "loss": 0.4827, "step": 27076 }, { "epoch": 15.1268156424581, "grad_norm": 0.5526641011238098, "learning_rate": 0.000244733893557423, "loss": 0.4068, "step": 27077 }, { "epoch": 15.127374301675978, "grad_norm": 0.572192907333374, "learning_rate": 0.0002447058823529412, "loss": 0.5303, "step": 27078 }, { "epoch": 15.127932960893855, "grad_norm": 1.3528988361358643, "learning_rate": 0.00024467787114845936, "loss": 0.5299, "step": 27079 }, { "epoch": 15.128491620111731, "grad_norm": 0.3897814452648163, "learning_rate": 0.00024464985994397757, "loss": 0.3441, "step": 27080 }, { "epoch": 15.12905027932961, "grad_norm": 0.49050432443618774, "learning_rate": 0.00024462184873949583, "loss": 0.5119, "step": 27081 }, { "epoch": 15.129608938547486, "grad_norm": 0.4133954644203186, "learning_rate": 0.00024459383753501403, "loss": 0.3326, "step": 27082 }, { "epoch": 15.130167597765363, "grad_norm": 0.47314685583114624, "learning_rate": 0.00024456582633053224, "loss": 0.4209, "step": 27083 }, { "epoch": 15.130726256983241, "grad_norm": 0.3983404338359833, "learning_rate": 0.0002445378151260504, "loss": 0.4779, "step": 27084 }, { "epoch": 15.131284916201118, "grad_norm": 0.4799702763557434, "learning_rate": 0.00024450980392156865, "loss": 0.4399, "step": 27085 }, { "epoch": 15.131843575418994, "grad_norm": 0.38889190554618835, "learning_rate": 0.00024448179271708686, "loss": 0.4156, "step": 27086 }, { "epoch": 15.13240223463687, "grad_norm": 0.34335896372795105, "learning_rate": 0.00024445378151260506, "loss": 0.3567, "step": 27087 }, { "epoch": 15.132960893854749, "grad_norm": 1.9918476343154907, "learning_rate": 0.0002444257703081232, "loss": 0.2787, "step": 27088 }, { "epoch": 15.133519553072626, "grad_norm": 0.39316263794898987, "learning_rate": 0.0002443977591036415, "loss": 0.3886, "step": 27089 }, { "epoch": 15.134078212290502, "grad_norm": 0.4902094900608063, "learning_rate": 0.0002443697478991597, "loss": 0.4207, "step": 27090 }, { "epoch": 15.13463687150838, "grad_norm": 0.5925796627998352, "learning_rate": 0.0002443417366946779, "loss": 0.7048, "step": 27091 }, { "epoch": 15.135195530726257, "grad_norm": 0.6203573346138, "learning_rate": 0.0002443137254901961, "loss": 0.4067, "step": 27092 }, { "epoch": 15.135754189944134, "grad_norm": 0.4595261812210083, "learning_rate": 0.0002442857142857143, "loss": 0.4124, "step": 27093 }, { "epoch": 15.136312849162012, "grad_norm": 0.5828721523284912, "learning_rate": 0.0002442577030812325, "loss": 0.4914, "step": 27094 }, { "epoch": 15.136871508379889, "grad_norm": 0.45298030972480774, "learning_rate": 0.0002442296918767507, "loss": 0.4889, "step": 27095 }, { "epoch": 15.137430167597765, "grad_norm": 0.6941516399383545, "learning_rate": 0.0002442016806722689, "loss": 0.4544, "step": 27096 }, { "epoch": 15.137988826815642, "grad_norm": 0.48597705364227295, "learning_rate": 0.0002441736694677871, "loss": 0.4048, "step": 27097 }, { "epoch": 15.13854748603352, "grad_norm": 0.5737661719322205, "learning_rate": 0.00024414565826330533, "loss": 0.5196, "step": 27098 }, { "epoch": 15.139106145251397, "grad_norm": 0.37921103835105896, "learning_rate": 0.00024411764705882354, "loss": 0.3723, "step": 27099 }, { "epoch": 15.139664804469273, "grad_norm": 0.5729641318321228, "learning_rate": 0.00024408963585434174, "loss": 0.3805, "step": 27100 }, { "epoch": 15.140223463687152, "grad_norm": 0.3232637047767639, "learning_rate": 0.00024406162464985995, "loss": 0.3659, "step": 27101 }, { "epoch": 15.140782122905028, "grad_norm": 6.596498966217041, "learning_rate": 0.00024403361344537815, "loss": 0.3631, "step": 27102 }, { "epoch": 15.141340782122905, "grad_norm": 0.7614020705223083, "learning_rate": 0.00024400560224089636, "loss": 0.4226, "step": 27103 }, { "epoch": 15.141899441340781, "grad_norm": 0.3891860246658325, "learning_rate": 0.00024397759103641457, "loss": 0.4035, "step": 27104 }, { "epoch": 15.14245810055866, "grad_norm": 1.087943196296692, "learning_rate": 0.00024394957983193277, "loss": 0.4083, "step": 27105 }, { "epoch": 15.143016759776536, "grad_norm": 0.33843058347702026, "learning_rate": 0.00024392156862745098, "loss": 0.3387, "step": 27106 }, { "epoch": 15.143575418994413, "grad_norm": 0.5082100629806519, "learning_rate": 0.0002438935574229692, "loss": 0.3525, "step": 27107 }, { "epoch": 15.144134078212291, "grad_norm": 0.40680164098739624, "learning_rate": 0.0002438655462184874, "loss": 0.381, "step": 27108 }, { "epoch": 15.144692737430168, "grad_norm": 0.333112508058548, "learning_rate": 0.0002438375350140056, "loss": 0.3623, "step": 27109 }, { "epoch": 15.145251396648044, "grad_norm": 0.4140739142894745, "learning_rate": 0.0002438095238095238, "loss": 0.3258, "step": 27110 }, { "epoch": 15.145810055865923, "grad_norm": 0.4597927927970886, "learning_rate": 0.00024378151260504203, "loss": 0.3499, "step": 27111 }, { "epoch": 15.1463687150838, "grad_norm": 0.43684789538383484, "learning_rate": 0.00024375350140056024, "loss": 0.3495, "step": 27112 }, { "epoch": 15.146927374301676, "grad_norm": 0.3556741774082184, "learning_rate": 0.00024372549019607842, "loss": 0.4257, "step": 27113 }, { "epoch": 15.147486033519552, "grad_norm": 0.46280211210250854, "learning_rate": 0.00024369747899159663, "loss": 0.4655, "step": 27114 }, { "epoch": 15.14804469273743, "grad_norm": 0.4690878391265869, "learning_rate": 0.00024366946778711486, "loss": 0.3436, "step": 27115 }, { "epoch": 15.148603351955307, "grad_norm": 0.477507621049881, "learning_rate": 0.00024364145658263306, "loss": 0.447, "step": 27116 }, { "epoch": 15.149162011173184, "grad_norm": 0.3387720584869385, "learning_rate": 0.00024361344537815127, "loss": 0.3915, "step": 27117 }, { "epoch": 15.149720670391062, "grad_norm": 0.476091593503952, "learning_rate": 0.00024358543417366945, "loss": 0.4431, "step": 27118 }, { "epoch": 15.150279329608939, "grad_norm": 2.260544776916504, "learning_rate": 0.00024355742296918768, "loss": 0.384, "step": 27119 }, { "epoch": 15.150837988826815, "grad_norm": 0.5228288173675537, "learning_rate": 0.0002435294117647059, "loss": 0.5477, "step": 27120 }, { "epoch": 15.151396648044694, "grad_norm": 0.6211914420127869, "learning_rate": 0.0002435014005602241, "loss": 0.5798, "step": 27121 }, { "epoch": 15.15195530726257, "grad_norm": 0.35126590728759766, "learning_rate": 0.00024347338935574233, "loss": 0.3928, "step": 27122 }, { "epoch": 15.152513966480447, "grad_norm": 0.48009130358695984, "learning_rate": 0.0002434453781512605, "loss": 0.5798, "step": 27123 }, { "epoch": 15.153072625698323, "grad_norm": 0.6149377226829529, "learning_rate": 0.0002434173669467787, "loss": 0.4288, "step": 27124 }, { "epoch": 15.153631284916202, "grad_norm": 0.798728883266449, "learning_rate": 0.00024338935574229692, "loss": 0.5143, "step": 27125 }, { "epoch": 15.154189944134078, "grad_norm": 0.5272912383079529, "learning_rate": 0.00024336134453781515, "loss": 0.4576, "step": 27126 }, { "epoch": 15.154748603351955, "grad_norm": 0.3481975793838501, "learning_rate": 0.00024333333333333336, "loss": 0.446, "step": 27127 }, { "epoch": 15.155307262569833, "grad_norm": 0.7295636534690857, "learning_rate": 0.00024330532212885154, "loss": 0.4959, "step": 27128 }, { "epoch": 15.15586592178771, "grad_norm": 0.5239155888557434, "learning_rate": 0.00024327731092436974, "loss": 0.504, "step": 27129 }, { "epoch": 15.156424581005586, "grad_norm": 0.8161219358444214, "learning_rate": 0.00024324929971988798, "loss": 0.3815, "step": 27130 }, { "epoch": 15.156983240223465, "grad_norm": 0.3975829780101776, "learning_rate": 0.00024322128851540618, "loss": 0.4839, "step": 27131 }, { "epoch": 15.157541899441341, "grad_norm": 0.35358983278274536, "learning_rate": 0.0002431932773109244, "loss": 0.3193, "step": 27132 }, { "epoch": 15.158100558659218, "grad_norm": 0.6780710220336914, "learning_rate": 0.00024316526610644257, "loss": 0.4728, "step": 27133 }, { "epoch": 15.158659217877094, "grad_norm": 0.5312126278877258, "learning_rate": 0.0002431372549019608, "loss": 0.3756, "step": 27134 }, { "epoch": 15.159217877094973, "grad_norm": 1.957397699356079, "learning_rate": 0.000243109243697479, "loss": 0.3745, "step": 27135 }, { "epoch": 15.15977653631285, "grad_norm": 0.42384931445121765, "learning_rate": 0.0002430812324929972, "loss": 0.3645, "step": 27136 }, { "epoch": 15.160335195530726, "grad_norm": 0.4002234935760498, "learning_rate": 0.00024305322128851542, "loss": 0.2955, "step": 27137 }, { "epoch": 15.160893854748604, "grad_norm": 1.1644526720046997, "learning_rate": 0.00024302521008403362, "loss": 0.4545, "step": 27138 }, { "epoch": 15.16145251396648, "grad_norm": 0.4228382110595703, "learning_rate": 0.00024299719887955183, "loss": 0.3317, "step": 27139 }, { "epoch": 15.162011173184357, "grad_norm": 0.4425305724143982, "learning_rate": 0.00024296918767507004, "loss": 0.4211, "step": 27140 }, { "epoch": 15.162569832402234, "grad_norm": 12.102898597717285, "learning_rate": 0.00024294117647058824, "loss": 0.4857, "step": 27141 }, { "epoch": 15.163128491620112, "grad_norm": 0.46449530124664307, "learning_rate": 0.00024291316526610645, "loss": 0.3903, "step": 27142 }, { "epoch": 15.163687150837989, "grad_norm": 0.4231211245059967, "learning_rate": 0.00024288515406162465, "loss": 0.4803, "step": 27143 }, { "epoch": 15.164245810055865, "grad_norm": 0.5800387859344482, "learning_rate": 0.00024285714285714286, "loss": 0.4655, "step": 27144 }, { "epoch": 15.164804469273744, "grad_norm": 0.4049888253211975, "learning_rate": 0.00024282913165266107, "loss": 0.4367, "step": 27145 }, { "epoch": 15.16536312849162, "grad_norm": 0.39952030777931213, "learning_rate": 0.0002428011204481793, "loss": 0.5176, "step": 27146 }, { "epoch": 15.165921787709497, "grad_norm": 0.42050716280937195, "learning_rate": 0.00024277310924369748, "loss": 0.3216, "step": 27147 }, { "epoch": 15.166480446927375, "grad_norm": 0.38417303562164307, "learning_rate": 0.00024274509803921568, "loss": 0.5151, "step": 27148 }, { "epoch": 15.167039106145252, "grad_norm": 0.4076318144798279, "learning_rate": 0.0002427170868347339, "loss": 0.3785, "step": 27149 }, { "epoch": 15.167597765363128, "grad_norm": 0.6365754008293152, "learning_rate": 0.00024268907563025212, "loss": 0.411, "step": 27150 }, { "epoch": 15.168156424581005, "grad_norm": 2.771817922592163, "learning_rate": 0.00024266106442577033, "loss": 0.4113, "step": 27151 }, { "epoch": 15.168715083798883, "grad_norm": 0.7303066253662109, "learning_rate": 0.0002426330532212885, "loss": 0.3963, "step": 27152 }, { "epoch": 15.16927374301676, "grad_norm": 0.42182886600494385, "learning_rate": 0.0002426050420168067, "loss": 0.4615, "step": 27153 }, { "epoch": 15.169832402234636, "grad_norm": 0.8525797724723816, "learning_rate": 0.00024257703081232495, "loss": 0.482, "step": 27154 }, { "epoch": 15.170391061452515, "grad_norm": 0.6056182980537415, "learning_rate": 0.00024254901960784315, "loss": 0.5641, "step": 27155 }, { "epoch": 15.170949720670391, "grad_norm": 0.4830930233001709, "learning_rate": 0.00024252100840336136, "loss": 0.4042, "step": 27156 }, { "epoch": 15.171508379888268, "grad_norm": 0.42725157737731934, "learning_rate": 0.00024249299719887954, "loss": 0.3956, "step": 27157 }, { "epoch": 15.172067039106146, "grad_norm": 0.4476284682750702, "learning_rate": 0.00024246498599439777, "loss": 0.3605, "step": 27158 }, { "epoch": 15.172625698324023, "grad_norm": 0.3446992337703705, "learning_rate": 0.00024243697478991598, "loss": 0.3235, "step": 27159 }, { "epoch": 15.1731843575419, "grad_norm": 0.43621525168418884, "learning_rate": 0.00024240896358543418, "loss": 0.4406, "step": 27160 }, { "epoch": 15.173743016759776, "grad_norm": 4.17518424987793, "learning_rate": 0.0002423809523809524, "loss": 0.3035, "step": 27161 }, { "epoch": 15.174301675977654, "grad_norm": 0.5337277054786682, "learning_rate": 0.0002423529411764706, "loss": 0.3987, "step": 27162 }, { "epoch": 15.17486033519553, "grad_norm": 0.6094560623168945, "learning_rate": 0.0002423249299719888, "loss": 0.4167, "step": 27163 }, { "epoch": 15.175418994413407, "grad_norm": 0.4864679276943207, "learning_rate": 0.000242296918767507, "loss": 0.3847, "step": 27164 }, { "epoch": 15.175977653631286, "grad_norm": 0.3789011836051941, "learning_rate": 0.0002422689075630252, "loss": 0.3829, "step": 27165 }, { "epoch": 15.176536312849162, "grad_norm": 0.38551798462867737, "learning_rate": 0.00024224089635854345, "loss": 0.3448, "step": 27166 }, { "epoch": 15.177094972067039, "grad_norm": 0.3869551122188568, "learning_rate": 0.00024221288515406162, "loss": 0.42, "step": 27167 }, { "epoch": 15.177653631284917, "grad_norm": 0.6148666739463806, "learning_rate": 0.00024218487394957983, "loss": 0.4083, "step": 27168 }, { "epoch": 15.178212290502794, "grad_norm": 0.6997018456459045, "learning_rate": 0.00024215686274509804, "loss": 0.4197, "step": 27169 }, { "epoch": 15.17877094972067, "grad_norm": 0.3802711069583893, "learning_rate": 0.00024212885154061627, "loss": 0.3537, "step": 27170 }, { "epoch": 15.179329608938547, "grad_norm": 0.5877300500869751, "learning_rate": 0.00024210084033613448, "loss": 0.3941, "step": 27171 }, { "epoch": 15.179888268156425, "grad_norm": 0.6996586918830872, "learning_rate": 0.00024207282913165265, "loss": 0.4312, "step": 27172 }, { "epoch": 15.180446927374302, "grad_norm": 0.6761998534202576, "learning_rate": 0.00024204481792717086, "loss": 0.4189, "step": 27173 }, { "epoch": 15.181005586592178, "grad_norm": 0.3972788155078888, "learning_rate": 0.0002420168067226891, "loss": 0.4239, "step": 27174 }, { "epoch": 15.181564245810057, "grad_norm": 0.4193545877933502, "learning_rate": 0.0002419887955182073, "loss": 0.3773, "step": 27175 }, { "epoch": 15.182122905027933, "grad_norm": 0.5859658718109131, "learning_rate": 0.0002419607843137255, "loss": 0.3704, "step": 27176 }, { "epoch": 15.18268156424581, "grad_norm": 0.46136415004730225, "learning_rate": 0.00024193277310924368, "loss": 0.3605, "step": 27177 }, { "epoch": 15.183240223463686, "grad_norm": 0.7353127598762512, "learning_rate": 0.00024190476190476192, "loss": 0.4138, "step": 27178 }, { "epoch": 15.183798882681565, "grad_norm": 0.43678024411201477, "learning_rate": 0.00024187675070028012, "loss": 0.3953, "step": 27179 }, { "epoch": 15.184357541899441, "grad_norm": 0.5883775353431702, "learning_rate": 0.00024184873949579833, "loss": 0.4165, "step": 27180 }, { "epoch": 15.184916201117318, "grad_norm": 0.36661794781684875, "learning_rate": 0.00024182072829131654, "loss": 0.3876, "step": 27181 }, { "epoch": 15.185474860335196, "grad_norm": 0.48775869607925415, "learning_rate": 0.00024179271708683474, "loss": 0.4548, "step": 27182 }, { "epoch": 15.186033519553073, "grad_norm": 0.34850120544433594, "learning_rate": 0.00024176470588235295, "loss": 0.4022, "step": 27183 }, { "epoch": 15.18659217877095, "grad_norm": 0.34781205654144287, "learning_rate": 0.00024173669467787115, "loss": 0.4063, "step": 27184 }, { "epoch": 15.187150837988828, "grad_norm": 0.49330341815948486, "learning_rate": 0.00024170868347338936, "loss": 0.4032, "step": 27185 }, { "epoch": 15.187709497206704, "grad_norm": 0.48733147978782654, "learning_rate": 0.0002416806722689076, "loss": 0.3515, "step": 27186 }, { "epoch": 15.18826815642458, "grad_norm": 0.9458677768707275, "learning_rate": 0.00024165266106442577, "loss": 0.3293, "step": 27187 }, { "epoch": 15.188826815642457, "grad_norm": 0.43216556310653687, "learning_rate": 0.00024162464985994398, "loss": 0.4078, "step": 27188 }, { "epoch": 15.189385474860336, "grad_norm": 19.454490661621094, "learning_rate": 0.00024159663865546218, "loss": 0.4541, "step": 27189 }, { "epoch": 15.189944134078212, "grad_norm": 0.4774804711341858, "learning_rate": 0.00024156862745098042, "loss": 0.344, "step": 27190 }, { "epoch": 15.190502793296089, "grad_norm": 0.5158601999282837, "learning_rate": 0.00024154061624649862, "loss": 0.4167, "step": 27191 }, { "epoch": 15.191061452513967, "grad_norm": 1.0138782262802124, "learning_rate": 0.0002415126050420168, "loss": 0.4612, "step": 27192 }, { "epoch": 15.191620111731844, "grad_norm": 0.5057836771011353, "learning_rate": 0.000241484593837535, "loss": 0.5445, "step": 27193 }, { "epoch": 15.19217877094972, "grad_norm": 0.3696221113204956, "learning_rate": 0.00024145658263305324, "loss": 0.4381, "step": 27194 }, { "epoch": 15.192737430167599, "grad_norm": 0.41393572092056274, "learning_rate": 0.00024142857142857145, "loss": 0.2883, "step": 27195 }, { "epoch": 15.193296089385475, "grad_norm": 0.35773953795433044, "learning_rate": 0.00024140056022408963, "loss": 0.3677, "step": 27196 }, { "epoch": 15.193854748603352, "grad_norm": 1.09688138961792, "learning_rate": 0.00024137254901960783, "loss": 0.4206, "step": 27197 }, { "epoch": 15.194413407821228, "grad_norm": 0.44415491819381714, "learning_rate": 0.00024134453781512606, "loss": 0.5319, "step": 27198 }, { "epoch": 15.194972067039107, "grad_norm": 0.5444587469100952, "learning_rate": 0.00024131652661064427, "loss": 0.3968, "step": 27199 }, { "epoch": 15.195530726256983, "grad_norm": 0.5443543791770935, "learning_rate": 0.00024128851540616248, "loss": 0.4141, "step": 27200 }, { "epoch": 15.19608938547486, "grad_norm": 0.714827299118042, "learning_rate": 0.00024126050420168066, "loss": 0.5278, "step": 27201 }, { "epoch": 15.196648044692738, "grad_norm": 0.3461098372936249, "learning_rate": 0.0002412324929971989, "loss": 0.3499, "step": 27202 }, { "epoch": 15.197206703910615, "grad_norm": 0.5639128088951111, "learning_rate": 0.0002412044817927171, "loss": 0.4245, "step": 27203 }, { "epoch": 15.197765363128491, "grad_norm": 0.504483699798584, "learning_rate": 0.0002411764705882353, "loss": 0.4451, "step": 27204 }, { "epoch": 15.19832402234637, "grad_norm": 0.4270140826702118, "learning_rate": 0.0002411484593837535, "loss": 0.3925, "step": 27205 }, { "epoch": 15.198882681564246, "grad_norm": 0.40988901257514954, "learning_rate": 0.0002411204481792717, "loss": 0.3553, "step": 27206 }, { "epoch": 15.199441340782123, "grad_norm": 0.3633969724178314, "learning_rate": 0.00024109243697478992, "loss": 0.3032, "step": 27207 }, { "epoch": 15.2, "grad_norm": 0.3978801965713501, "learning_rate": 0.00024106442577030812, "loss": 0.4529, "step": 27208 }, { "epoch": 15.200558659217878, "grad_norm": 0.36953550577163696, "learning_rate": 0.00024103641456582633, "loss": 0.3472, "step": 27209 }, { "epoch": 15.201117318435754, "grad_norm": 0.7324294447898865, "learning_rate": 0.00024100840336134456, "loss": 0.4125, "step": 27210 }, { "epoch": 15.20167597765363, "grad_norm": 0.38765665888786316, "learning_rate": 0.00024098039215686274, "loss": 0.3249, "step": 27211 }, { "epoch": 15.202234636871509, "grad_norm": 0.37759941816329956, "learning_rate": 0.00024095238095238095, "loss": 0.343, "step": 27212 }, { "epoch": 15.202793296089386, "grad_norm": 0.5258983969688416, "learning_rate": 0.00024092436974789915, "loss": 0.4944, "step": 27213 }, { "epoch": 15.203351955307262, "grad_norm": 0.5161382555961609, "learning_rate": 0.0002408963585434174, "loss": 0.4012, "step": 27214 }, { "epoch": 15.203910614525139, "grad_norm": 0.5770924687385559, "learning_rate": 0.0002408683473389356, "loss": 0.3351, "step": 27215 }, { "epoch": 15.204469273743017, "grad_norm": 0.4455718994140625, "learning_rate": 0.00024084033613445377, "loss": 0.4163, "step": 27216 }, { "epoch": 15.205027932960894, "grad_norm": 0.7514647841453552, "learning_rate": 0.00024081232492997198, "loss": 0.4094, "step": 27217 }, { "epoch": 15.20558659217877, "grad_norm": 0.4450298249721527, "learning_rate": 0.0002407843137254902, "loss": 0.3284, "step": 27218 }, { "epoch": 15.206145251396649, "grad_norm": 0.3868100941181183, "learning_rate": 0.00024075630252100842, "loss": 0.328, "step": 27219 }, { "epoch": 15.206703910614525, "grad_norm": 0.6516246199607849, "learning_rate": 0.00024072829131652662, "loss": 0.369, "step": 27220 }, { "epoch": 15.207262569832402, "grad_norm": 0.43434417247772217, "learning_rate": 0.0002407002801120448, "loss": 0.4005, "step": 27221 }, { "epoch": 15.20782122905028, "grad_norm": 0.770481526851654, "learning_rate": 0.00024067226890756304, "loss": 0.4172, "step": 27222 }, { "epoch": 15.208379888268157, "grad_norm": 0.38107791543006897, "learning_rate": 0.00024064425770308124, "loss": 0.3336, "step": 27223 }, { "epoch": 15.208938547486033, "grad_norm": 1.1834373474121094, "learning_rate": 0.00024061624649859945, "loss": 0.3957, "step": 27224 }, { "epoch": 15.20949720670391, "grad_norm": 0.6148368120193481, "learning_rate": 0.00024058823529411765, "loss": 0.5837, "step": 27225 }, { "epoch": 15.210055865921788, "grad_norm": 0.43569695949554443, "learning_rate": 0.00024056022408963586, "loss": 0.4507, "step": 27226 }, { "epoch": 15.210614525139665, "grad_norm": 0.4305015206336975, "learning_rate": 0.00024053221288515407, "loss": 0.3597, "step": 27227 }, { "epoch": 15.211173184357541, "grad_norm": 3.1079533100128174, "learning_rate": 0.00024050420168067227, "loss": 0.6983, "step": 27228 }, { "epoch": 15.21173184357542, "grad_norm": 0.4841807782649994, "learning_rate": 0.00024047619047619048, "loss": 0.3871, "step": 27229 }, { "epoch": 15.212290502793296, "grad_norm": 0.4471410810947418, "learning_rate": 0.0002404481792717087, "loss": 0.3148, "step": 27230 }, { "epoch": 15.212849162011173, "grad_norm": 0.639410138130188, "learning_rate": 0.0002404201680672269, "loss": 0.4899, "step": 27231 }, { "epoch": 15.213407821229051, "grad_norm": 0.5526193976402283, "learning_rate": 0.0002403921568627451, "loss": 0.4907, "step": 27232 }, { "epoch": 15.213966480446928, "grad_norm": 0.4611920416355133, "learning_rate": 0.0002403641456582633, "loss": 0.4557, "step": 27233 }, { "epoch": 15.214525139664804, "grad_norm": 0.3367268741130829, "learning_rate": 0.00024033613445378153, "loss": 0.4035, "step": 27234 }, { "epoch": 15.21508379888268, "grad_norm": 0.3978441655635834, "learning_rate": 0.00024030812324929974, "loss": 0.4656, "step": 27235 }, { "epoch": 15.21564245810056, "grad_norm": 0.43446311354637146, "learning_rate": 0.00024028011204481792, "loss": 0.4488, "step": 27236 }, { "epoch": 15.216201117318436, "grad_norm": 0.43057745695114136, "learning_rate": 0.00024025210084033613, "loss": 0.4285, "step": 27237 }, { "epoch": 15.216759776536312, "grad_norm": 0.3746228814125061, "learning_rate": 0.00024022408963585436, "loss": 0.385, "step": 27238 }, { "epoch": 15.21731843575419, "grad_norm": 0.5968604683876038, "learning_rate": 0.00024019607843137256, "loss": 0.3689, "step": 27239 }, { "epoch": 15.217877094972067, "grad_norm": 0.4818195402622223, "learning_rate": 0.00024016806722689077, "loss": 0.3884, "step": 27240 }, { "epoch": 15.218435754189944, "grad_norm": 0.41954466700553894, "learning_rate": 0.00024014005602240895, "loss": 0.3841, "step": 27241 }, { "epoch": 15.21899441340782, "grad_norm": 0.41375160217285156, "learning_rate": 0.00024011204481792718, "loss": 0.4826, "step": 27242 }, { "epoch": 15.219553072625699, "grad_norm": 0.44006311893463135, "learning_rate": 0.0002400840336134454, "loss": 0.3743, "step": 27243 }, { "epoch": 15.220111731843575, "grad_norm": 0.44837290048599243, "learning_rate": 0.0002400560224089636, "loss": 0.4754, "step": 27244 }, { "epoch": 15.220670391061452, "grad_norm": 0.5105819702148438, "learning_rate": 0.0002400280112044818, "loss": 0.3687, "step": 27245 }, { "epoch": 15.22122905027933, "grad_norm": 0.4841049909591675, "learning_rate": 0.00024, "loss": 0.4296, "step": 27246 }, { "epoch": 15.221787709497207, "grad_norm": 0.37922903895378113, "learning_rate": 0.0002399719887955182, "loss": 0.4071, "step": 27247 }, { "epoch": 15.222346368715083, "grad_norm": 0.43594953417778015, "learning_rate": 0.00023994397759103642, "loss": 0.3984, "step": 27248 }, { "epoch": 15.222905027932962, "grad_norm": 0.512876570224762, "learning_rate": 0.00023991596638655462, "loss": 0.4027, "step": 27249 }, { "epoch": 15.223463687150838, "grad_norm": 0.7205897569656372, "learning_rate": 0.00023988795518207286, "loss": 0.463, "step": 27250 }, { "epoch": 15.224022346368715, "grad_norm": 0.43102848529815674, "learning_rate": 0.00023985994397759104, "loss": 0.5187, "step": 27251 }, { "epoch": 15.224581005586591, "grad_norm": 0.47895359992980957, "learning_rate": 0.00023983193277310924, "loss": 0.4228, "step": 27252 }, { "epoch": 15.22513966480447, "grad_norm": 0.5614460110664368, "learning_rate": 0.00023980392156862745, "loss": 0.4512, "step": 27253 }, { "epoch": 15.225698324022346, "grad_norm": 0.5633761882781982, "learning_rate": 0.00023977591036414568, "loss": 0.5342, "step": 27254 }, { "epoch": 15.226256983240223, "grad_norm": 0.5918115377426147, "learning_rate": 0.00023974789915966386, "loss": 0.3865, "step": 27255 }, { "epoch": 15.226815642458101, "grad_norm": 0.8485002517700195, "learning_rate": 0.00023971988795518207, "loss": 0.6571, "step": 27256 }, { "epoch": 15.227374301675978, "grad_norm": 0.42712152004241943, "learning_rate": 0.00023969187675070027, "loss": 0.3879, "step": 27257 }, { "epoch": 15.227932960893854, "grad_norm": 0.47626161575317383, "learning_rate": 0.0002396638655462185, "loss": 0.4658, "step": 27258 }, { "epoch": 15.228491620111733, "grad_norm": 0.5027827620506287, "learning_rate": 0.0002396358543417367, "loss": 0.559, "step": 27259 }, { "epoch": 15.22905027932961, "grad_norm": 2.392272472381592, "learning_rate": 0.0002396078431372549, "loss": 0.2938, "step": 27260 }, { "epoch": 15.229608938547486, "grad_norm": 0.49962446093559265, "learning_rate": 0.0002395798319327731, "loss": 0.428, "step": 27261 }, { "epoch": 15.230167597765362, "grad_norm": 0.4339938163757324, "learning_rate": 0.00023955182072829133, "loss": 0.4497, "step": 27262 }, { "epoch": 15.23072625698324, "grad_norm": 0.6094764471054077, "learning_rate": 0.00023952380952380954, "loss": 0.3842, "step": 27263 }, { "epoch": 15.231284916201117, "grad_norm": 0.4525226354598999, "learning_rate": 0.00023949579831932774, "loss": 0.4778, "step": 27264 }, { "epoch": 15.231843575418994, "grad_norm": 0.4125398099422455, "learning_rate": 0.00023946778711484592, "loss": 0.3265, "step": 27265 }, { "epoch": 15.232402234636872, "grad_norm": 0.5147817134857178, "learning_rate": 0.00023943977591036415, "loss": 0.4861, "step": 27266 }, { "epoch": 15.232960893854749, "grad_norm": 1.3364759683609009, "learning_rate": 0.00023941176470588236, "loss": 0.4358, "step": 27267 }, { "epoch": 15.233519553072625, "grad_norm": 0.44241103529930115, "learning_rate": 0.00023938375350140057, "loss": 0.4308, "step": 27268 }, { "epoch": 15.234078212290504, "grad_norm": 0.3694785535335541, "learning_rate": 0.00023935574229691877, "loss": 0.3838, "step": 27269 }, { "epoch": 15.23463687150838, "grad_norm": 0.41772425174713135, "learning_rate": 0.00023932773109243698, "loss": 0.3951, "step": 27270 }, { "epoch": 15.235195530726257, "grad_norm": 0.7389469742774963, "learning_rate": 0.00023929971988795518, "loss": 0.7359, "step": 27271 }, { "epoch": 15.235754189944133, "grad_norm": 0.49500662088394165, "learning_rate": 0.0002392717086834734, "loss": 0.4697, "step": 27272 }, { "epoch": 15.236312849162012, "grad_norm": 0.5220922231674194, "learning_rate": 0.0002392436974789916, "loss": 0.468, "step": 27273 }, { "epoch": 15.236871508379888, "grad_norm": 0.4546958804130554, "learning_rate": 0.00023921568627450983, "loss": 0.4194, "step": 27274 }, { "epoch": 15.237430167597765, "grad_norm": 0.3412879705429077, "learning_rate": 0.000239187675070028, "loss": 0.4142, "step": 27275 }, { "epoch": 15.237988826815643, "grad_norm": 0.40437960624694824, "learning_rate": 0.0002391596638655462, "loss": 0.4745, "step": 27276 }, { "epoch": 15.23854748603352, "grad_norm": 0.5808608531951904, "learning_rate": 0.00023913165266106442, "loss": 0.3694, "step": 27277 }, { "epoch": 15.239106145251396, "grad_norm": 0.46674829721450806, "learning_rate": 0.00023910364145658265, "loss": 0.3589, "step": 27278 }, { "epoch": 15.239664804469275, "grad_norm": 0.8124476075172424, "learning_rate": 0.00023907563025210086, "loss": 0.3476, "step": 27279 }, { "epoch": 15.240223463687151, "grad_norm": 0.35851016640663147, "learning_rate": 0.00023904761904761904, "loss": 0.3176, "step": 27280 }, { "epoch": 15.240782122905028, "grad_norm": 0.5494292378425598, "learning_rate": 0.00023901960784313724, "loss": 0.4843, "step": 27281 }, { "epoch": 15.241340782122904, "grad_norm": 0.38169774413108826, "learning_rate": 0.00023899159663865548, "loss": 0.4697, "step": 27282 }, { "epoch": 15.241899441340783, "grad_norm": 0.476744145154953, "learning_rate": 0.00023896358543417368, "loss": 0.4574, "step": 27283 }, { "epoch": 15.24245810055866, "grad_norm": 0.5318389534950256, "learning_rate": 0.0002389355742296919, "loss": 0.3775, "step": 27284 }, { "epoch": 15.243016759776536, "grad_norm": 0.6290057301521301, "learning_rate": 0.00023890756302521007, "loss": 0.3823, "step": 27285 }, { "epoch": 15.243575418994414, "grad_norm": 4.899540424346924, "learning_rate": 0.0002388795518207283, "loss": 0.4312, "step": 27286 }, { "epoch": 15.24413407821229, "grad_norm": 0.3728036880493164, "learning_rate": 0.0002388515406162465, "loss": 0.3267, "step": 27287 }, { "epoch": 15.244692737430167, "grad_norm": 0.38292205333709717, "learning_rate": 0.0002388235294117647, "loss": 0.3905, "step": 27288 }, { "epoch": 15.245251396648044, "grad_norm": 0.4266759157180786, "learning_rate": 0.00023879551820728292, "loss": 0.4193, "step": 27289 }, { "epoch": 15.245810055865922, "grad_norm": 0.47007545828819275, "learning_rate": 0.00023876750700280112, "loss": 0.3629, "step": 27290 }, { "epoch": 15.246368715083799, "grad_norm": 1.3062148094177246, "learning_rate": 0.00023873949579831933, "loss": 0.4357, "step": 27291 }, { "epoch": 15.246927374301675, "grad_norm": 0.6552423238754272, "learning_rate": 0.00023871148459383754, "loss": 0.4535, "step": 27292 }, { "epoch": 15.247486033519554, "grad_norm": 0.4915752708911896, "learning_rate": 0.00023868347338935574, "loss": 0.5503, "step": 27293 }, { "epoch": 15.24804469273743, "grad_norm": 0.382807195186615, "learning_rate": 0.00023865546218487398, "loss": 0.3209, "step": 27294 }, { "epoch": 15.248603351955307, "grad_norm": 0.4989481270313263, "learning_rate": 0.00023862745098039215, "loss": 0.5095, "step": 27295 }, { "epoch": 15.249162011173185, "grad_norm": 0.34435802698135376, "learning_rate": 0.00023859943977591036, "loss": 0.3609, "step": 27296 }, { "epoch": 15.249720670391062, "grad_norm": 0.39178192615509033, "learning_rate": 0.00023857142857142857, "loss": 0.4913, "step": 27297 }, { "epoch": 15.250279329608938, "grad_norm": 0.39510196447372437, "learning_rate": 0.0002385434173669468, "loss": 0.3389, "step": 27298 }, { "epoch": 15.250837988826815, "grad_norm": 0.289592444896698, "learning_rate": 0.000238515406162465, "loss": 0.2906, "step": 27299 }, { "epoch": 15.251396648044693, "grad_norm": 0.4477510154247284, "learning_rate": 0.00023848739495798318, "loss": 0.4611, "step": 27300 }, { "epoch": 15.25195530726257, "grad_norm": 0.4839337170124054, "learning_rate": 0.0002384593837535014, "loss": 0.3498, "step": 27301 }, { "epoch": 15.252513966480446, "grad_norm": 0.7617443799972534, "learning_rate": 0.00023843137254901962, "loss": 0.3765, "step": 27302 }, { "epoch": 15.253072625698325, "grad_norm": 0.6577860713005066, "learning_rate": 0.00023840336134453783, "loss": 0.4719, "step": 27303 }, { "epoch": 15.253631284916201, "grad_norm": 0.4975976347923279, "learning_rate": 0.00023837535014005604, "loss": 0.3287, "step": 27304 }, { "epoch": 15.254189944134078, "grad_norm": 0.44093024730682373, "learning_rate": 0.00023834733893557421, "loss": 0.3799, "step": 27305 }, { "epoch": 15.254748603351956, "grad_norm": 1.389951229095459, "learning_rate": 0.00023831932773109245, "loss": 0.3356, "step": 27306 }, { "epoch": 15.255307262569833, "grad_norm": 0.3966330289840698, "learning_rate": 0.00023829131652661065, "loss": 0.3848, "step": 27307 }, { "epoch": 15.25586592178771, "grad_norm": 0.4381747543811798, "learning_rate": 0.00023826330532212886, "loss": 0.4327, "step": 27308 }, { "epoch": 15.256424581005586, "grad_norm": 0.7367750406265259, "learning_rate": 0.00023823529411764704, "loss": 0.3501, "step": 27309 }, { "epoch": 15.256983240223464, "grad_norm": 0.34817299246788025, "learning_rate": 0.00023820728291316527, "loss": 0.3811, "step": 27310 }, { "epoch": 15.25754189944134, "grad_norm": 0.3882730007171631, "learning_rate": 0.00023817927170868348, "loss": 0.3842, "step": 27311 }, { "epoch": 15.258100558659217, "grad_norm": 0.49196770787239075, "learning_rate": 0.00023815126050420168, "loss": 0.3593, "step": 27312 }, { "epoch": 15.258659217877096, "grad_norm": 1.9919036626815796, "learning_rate": 0.0002381232492997199, "loss": 0.5058, "step": 27313 }, { "epoch": 15.259217877094972, "grad_norm": 0.6281132698059082, "learning_rate": 0.0002380952380952381, "loss": 0.4263, "step": 27314 }, { "epoch": 15.259776536312849, "grad_norm": 0.45565271377563477, "learning_rate": 0.0002380672268907563, "loss": 0.449, "step": 27315 }, { "epoch": 15.260335195530725, "grad_norm": 0.5083568096160889, "learning_rate": 0.0002380392156862745, "loss": 0.4098, "step": 27316 }, { "epoch": 15.260893854748604, "grad_norm": 0.5633454918861389, "learning_rate": 0.0002380112044817927, "loss": 0.3126, "step": 27317 }, { "epoch": 15.26145251396648, "grad_norm": 0.5010203123092651, "learning_rate": 0.00023798319327731095, "loss": 0.4285, "step": 27318 }, { "epoch": 15.262011173184357, "grad_norm": 0.3600017726421356, "learning_rate": 0.00023795518207282913, "loss": 0.3855, "step": 27319 }, { "epoch": 15.262569832402235, "grad_norm": 0.5928458571434021, "learning_rate": 0.00023792717086834733, "loss": 0.4163, "step": 27320 }, { "epoch": 15.263128491620112, "grad_norm": 0.34623974561691284, "learning_rate": 0.00023789915966386554, "loss": 0.3586, "step": 27321 }, { "epoch": 15.263687150837988, "grad_norm": 0.4261847138404846, "learning_rate": 0.00023787114845938377, "loss": 0.3601, "step": 27322 }, { "epoch": 15.264245810055867, "grad_norm": 0.45708662271499634, "learning_rate": 0.00023784313725490198, "loss": 0.4331, "step": 27323 }, { "epoch": 15.264804469273743, "grad_norm": 0.4622381925582886, "learning_rate": 0.00023781512605042015, "loss": 0.4676, "step": 27324 }, { "epoch": 15.26536312849162, "grad_norm": 0.4170280694961548, "learning_rate": 0.00023778711484593836, "loss": 0.4359, "step": 27325 }, { "epoch": 15.265921787709496, "grad_norm": 0.6590286493301392, "learning_rate": 0.0002377591036414566, "loss": 0.5291, "step": 27326 }, { "epoch": 15.266480446927375, "grad_norm": 0.7865777015686035, "learning_rate": 0.0002377310924369748, "loss": 0.3311, "step": 27327 }, { "epoch": 15.267039106145251, "grad_norm": 0.4268190264701843, "learning_rate": 0.000237703081232493, "loss": 0.498, "step": 27328 }, { "epoch": 15.267597765363128, "grad_norm": 0.41497716307640076, "learning_rate": 0.00023767507002801118, "loss": 0.4136, "step": 27329 }, { "epoch": 15.268156424581006, "grad_norm": 0.34212756156921387, "learning_rate": 0.00023764705882352942, "loss": 0.4468, "step": 27330 }, { "epoch": 15.268715083798883, "grad_norm": 0.5231161117553711, "learning_rate": 0.00023761904761904762, "loss": 0.4593, "step": 27331 }, { "epoch": 15.26927374301676, "grad_norm": 0.5548396110534668, "learning_rate": 0.00023759103641456583, "loss": 0.5669, "step": 27332 }, { "epoch": 15.269832402234638, "grad_norm": 0.4548458158969879, "learning_rate": 0.00023756302521008404, "loss": 0.4052, "step": 27333 }, { "epoch": 15.270391061452514, "grad_norm": 0.5036672949790955, "learning_rate": 0.00023753501400560224, "loss": 0.4673, "step": 27334 }, { "epoch": 15.27094972067039, "grad_norm": 0.9956867694854736, "learning_rate": 0.00023750700280112045, "loss": 0.3387, "step": 27335 }, { "epoch": 15.271508379888267, "grad_norm": 0.3966291844844818, "learning_rate": 0.00023747899159663865, "loss": 0.42, "step": 27336 }, { "epoch": 15.272067039106146, "grad_norm": 0.36904725432395935, "learning_rate": 0.00023745098039215686, "loss": 0.4877, "step": 27337 }, { "epoch": 15.272625698324022, "grad_norm": 0.46454742550849915, "learning_rate": 0.0002374229691876751, "loss": 0.4637, "step": 27338 }, { "epoch": 15.273184357541899, "grad_norm": 0.3837496042251587, "learning_rate": 0.00023739495798319327, "loss": 0.3795, "step": 27339 }, { "epoch": 15.273743016759777, "grad_norm": 0.41461503505706787, "learning_rate": 0.00023736694677871148, "loss": 0.4349, "step": 27340 }, { "epoch": 15.274301675977654, "grad_norm": 0.47250935435295105, "learning_rate": 0.00023733893557422968, "loss": 0.459, "step": 27341 }, { "epoch": 15.27486033519553, "grad_norm": 0.42125189304351807, "learning_rate": 0.00023731092436974792, "loss": 0.4918, "step": 27342 }, { "epoch": 15.275418994413409, "grad_norm": 2.8062045574188232, "learning_rate": 0.00023728291316526612, "loss": 0.4098, "step": 27343 }, { "epoch": 15.275977653631285, "grad_norm": 0.7166767716407776, "learning_rate": 0.0002372549019607843, "loss": 0.4191, "step": 27344 }, { "epoch": 15.276536312849162, "grad_norm": 0.6807219982147217, "learning_rate": 0.0002372268907563025, "loss": 0.4433, "step": 27345 }, { "epoch": 15.277094972067038, "grad_norm": 0.4246392250061035, "learning_rate": 0.00023719887955182074, "loss": 0.452, "step": 27346 }, { "epoch": 15.277653631284917, "grad_norm": 0.4423331022262573, "learning_rate": 0.00023717086834733895, "loss": 0.3894, "step": 27347 }, { "epoch": 15.278212290502793, "grad_norm": 0.7535455822944641, "learning_rate": 0.00023714285714285715, "loss": 0.4578, "step": 27348 }, { "epoch": 15.27877094972067, "grad_norm": 0.4807535707950592, "learning_rate": 0.00023711484593837533, "loss": 0.3998, "step": 27349 }, { "epoch": 15.279329608938548, "grad_norm": 2.586956262588501, "learning_rate": 0.00023708683473389357, "loss": 0.4998, "step": 27350 }, { "epoch": 15.279888268156425, "grad_norm": 0.40266501903533936, "learning_rate": 0.00023705882352941177, "loss": 0.3149, "step": 27351 }, { "epoch": 15.280446927374301, "grad_norm": 0.6439770460128784, "learning_rate": 0.00023703081232492998, "loss": 0.4691, "step": 27352 }, { "epoch": 15.28100558659218, "grad_norm": 1.7154874801635742, "learning_rate": 0.0002370028011204482, "loss": 0.3783, "step": 27353 }, { "epoch": 15.281564245810056, "grad_norm": 0.5013905763626099, "learning_rate": 0.0002369747899159664, "loss": 0.366, "step": 27354 }, { "epoch": 15.282122905027933, "grad_norm": 0.4838288426399231, "learning_rate": 0.0002369467787114846, "loss": 0.4225, "step": 27355 }, { "epoch": 15.28268156424581, "grad_norm": 0.48759859800338745, "learning_rate": 0.0002369187675070028, "loss": 0.4907, "step": 27356 }, { "epoch": 15.283240223463688, "grad_norm": 0.636256217956543, "learning_rate": 0.00023689075630252103, "loss": 0.4596, "step": 27357 }, { "epoch": 15.283798882681564, "grad_norm": 0.3870348632335663, "learning_rate": 0.00023686274509803924, "loss": 0.3714, "step": 27358 }, { "epoch": 15.28435754189944, "grad_norm": 0.6482207775115967, "learning_rate": 0.00023683473389355742, "loss": 0.5904, "step": 27359 }, { "epoch": 15.28491620111732, "grad_norm": 0.3728819787502289, "learning_rate": 0.00023680672268907562, "loss": 0.432, "step": 27360 }, { "epoch": 15.285474860335196, "grad_norm": 1.8959358930587769, "learning_rate": 0.00023677871148459386, "loss": 0.4745, "step": 27361 }, { "epoch": 15.286033519553072, "grad_norm": 2.601167917251587, "learning_rate": 0.00023675070028011206, "loss": 0.3805, "step": 27362 }, { "epoch": 15.286592178770949, "grad_norm": 0.6279693841934204, "learning_rate": 0.00023672268907563027, "loss": 0.3181, "step": 27363 }, { "epoch": 15.287150837988827, "grad_norm": 0.6313248872756958, "learning_rate": 0.00023669467787114845, "loss": 0.394, "step": 27364 }, { "epoch": 15.287709497206704, "grad_norm": 0.569640576839447, "learning_rate": 0.00023666666666666668, "loss": 0.4009, "step": 27365 }, { "epoch": 15.28826815642458, "grad_norm": 0.45173370838165283, "learning_rate": 0.0002366386554621849, "loss": 0.3805, "step": 27366 }, { "epoch": 15.288826815642459, "grad_norm": 0.9054532051086426, "learning_rate": 0.0002366106442577031, "loss": 0.3985, "step": 27367 }, { "epoch": 15.289385474860335, "grad_norm": 0.37061354517936707, "learning_rate": 0.00023658263305322127, "loss": 0.531, "step": 27368 }, { "epoch": 15.289944134078212, "grad_norm": 0.35593268275260925, "learning_rate": 0.0002365546218487395, "loss": 0.3574, "step": 27369 }, { "epoch": 15.29050279329609, "grad_norm": 0.3411157727241516, "learning_rate": 0.0002365266106442577, "loss": 0.3868, "step": 27370 }, { "epoch": 15.291061452513967, "grad_norm": 0.45763590931892395, "learning_rate": 0.00023649859943977592, "loss": 0.4686, "step": 27371 }, { "epoch": 15.291620111731843, "grad_norm": 0.42536401748657227, "learning_rate": 0.00023647058823529412, "loss": 0.4015, "step": 27372 }, { "epoch": 15.29217877094972, "grad_norm": 0.5526837706565857, "learning_rate": 0.00023644257703081233, "loss": 0.4297, "step": 27373 }, { "epoch": 15.292737430167598, "grad_norm": 0.3557010889053345, "learning_rate": 0.00023641456582633054, "loss": 0.3044, "step": 27374 }, { "epoch": 15.293296089385475, "grad_norm": 0.43715643882751465, "learning_rate": 0.00023638655462184874, "loss": 0.4324, "step": 27375 }, { "epoch": 15.293854748603351, "grad_norm": 0.40180277824401855, "learning_rate": 0.00023635854341736695, "loss": 0.3925, "step": 27376 }, { "epoch": 15.29441340782123, "grad_norm": 0.3401314616203308, "learning_rate": 0.00023633053221288518, "loss": 0.3272, "step": 27377 }, { "epoch": 15.294972067039106, "grad_norm": 3.161470413208008, "learning_rate": 0.00023630252100840336, "loss": 0.4517, "step": 27378 }, { "epoch": 15.295530726256983, "grad_norm": 0.34121567010879517, "learning_rate": 0.00023627450980392157, "loss": 0.2791, "step": 27379 }, { "epoch": 15.296089385474861, "grad_norm": 0.5235820412635803, "learning_rate": 0.00023624649859943977, "loss": 0.5952, "step": 27380 }, { "epoch": 15.296648044692738, "grad_norm": 0.47562819719314575, "learning_rate": 0.000236218487394958, "loss": 0.4544, "step": 27381 }, { "epoch": 15.297206703910614, "grad_norm": 0.5631003379821777, "learning_rate": 0.0002361904761904762, "loss": 0.4569, "step": 27382 }, { "epoch": 15.297765363128491, "grad_norm": 0.4859277606010437, "learning_rate": 0.0002361624649859944, "loss": 0.5742, "step": 27383 }, { "epoch": 15.29832402234637, "grad_norm": 0.36334100365638733, "learning_rate": 0.0002361344537815126, "loss": 0.4525, "step": 27384 }, { "epoch": 15.298882681564246, "grad_norm": 0.3904019594192505, "learning_rate": 0.00023610644257703083, "loss": 0.417, "step": 27385 }, { "epoch": 15.299441340782122, "grad_norm": 0.43609458208084106, "learning_rate": 0.00023607843137254904, "loss": 0.3987, "step": 27386 }, { "epoch": 15.3, "grad_norm": 0.48538318276405334, "learning_rate": 0.00023605042016806724, "loss": 0.3534, "step": 27387 }, { "epoch": 15.300558659217877, "grad_norm": 2.9375693798065186, "learning_rate": 0.00023602240896358542, "loss": 0.3829, "step": 27388 }, { "epoch": 15.301117318435754, "grad_norm": 0.4311273992061615, "learning_rate": 0.00023599439775910365, "loss": 0.4089, "step": 27389 }, { "epoch": 15.30167597765363, "grad_norm": 0.3711971938610077, "learning_rate": 0.00023596638655462186, "loss": 0.3456, "step": 27390 }, { "epoch": 15.302234636871509, "grad_norm": 0.764815628528595, "learning_rate": 0.00023593837535014007, "loss": 0.4087, "step": 27391 }, { "epoch": 15.302793296089385, "grad_norm": 0.3980580270290375, "learning_rate": 0.00023591036414565827, "loss": 0.3805, "step": 27392 }, { "epoch": 15.303351955307262, "grad_norm": 0.5145567655563354, "learning_rate": 0.00023588235294117648, "loss": 0.3964, "step": 27393 }, { "epoch": 15.30391061452514, "grad_norm": 2.1635966300964355, "learning_rate": 0.00023585434173669468, "loss": 0.4846, "step": 27394 }, { "epoch": 15.304469273743017, "grad_norm": 0.7815500497817993, "learning_rate": 0.0002358263305322129, "loss": 0.4059, "step": 27395 }, { "epoch": 15.305027932960893, "grad_norm": 1.560543179512024, "learning_rate": 0.0002357983193277311, "loss": 0.4489, "step": 27396 }, { "epoch": 15.305586592178772, "grad_norm": 0.39423465728759766, "learning_rate": 0.00023577030812324933, "loss": 0.3675, "step": 27397 }, { "epoch": 15.306145251396648, "grad_norm": 0.39616483449935913, "learning_rate": 0.0002357422969187675, "loss": 0.4807, "step": 27398 }, { "epoch": 15.306703910614525, "grad_norm": 0.5009031891822815, "learning_rate": 0.0002357142857142857, "loss": 0.4109, "step": 27399 }, { "epoch": 15.307262569832401, "grad_norm": 0.5291558504104614, "learning_rate": 0.00023568627450980392, "loss": 0.4489, "step": 27400 }, { "epoch": 15.30782122905028, "grad_norm": 0.7831292152404785, "learning_rate": 0.00023565826330532215, "loss": 0.3388, "step": 27401 }, { "epoch": 15.308379888268156, "grad_norm": 0.5438055992126465, "learning_rate": 0.00023563025210084036, "loss": 0.4295, "step": 27402 }, { "epoch": 15.308938547486033, "grad_norm": 0.48060014843940735, "learning_rate": 0.00023560224089635854, "loss": 0.4782, "step": 27403 }, { "epoch": 15.309497206703911, "grad_norm": 0.5952041745185852, "learning_rate": 0.00023557422969187674, "loss": 0.3667, "step": 27404 }, { "epoch": 15.310055865921788, "grad_norm": 0.5067158937454224, "learning_rate": 0.00023554621848739498, "loss": 0.401, "step": 27405 }, { "epoch": 15.310614525139664, "grad_norm": 1.7469717264175415, "learning_rate": 0.00023551820728291318, "loss": 0.4151, "step": 27406 }, { "epoch": 15.311173184357543, "grad_norm": 0.43438252806663513, "learning_rate": 0.0002354901960784314, "loss": 0.4133, "step": 27407 }, { "epoch": 15.31173184357542, "grad_norm": 0.6350743174552917, "learning_rate": 0.00023546218487394957, "loss": 0.4113, "step": 27408 }, { "epoch": 15.312290502793296, "grad_norm": 0.4433327317237854, "learning_rate": 0.0002354341736694678, "loss": 0.3555, "step": 27409 }, { "epoch": 15.312849162011172, "grad_norm": 0.4220203161239624, "learning_rate": 0.000235406162464986, "loss": 0.4514, "step": 27410 }, { "epoch": 15.31340782122905, "grad_norm": 1.5481551885604858, "learning_rate": 0.0002353781512605042, "loss": 0.3857, "step": 27411 }, { "epoch": 15.313966480446927, "grad_norm": 0.5273675918579102, "learning_rate": 0.00023535014005602242, "loss": 0.5126, "step": 27412 }, { "epoch": 15.314525139664804, "grad_norm": 0.4492024779319763, "learning_rate": 0.00023532212885154062, "loss": 0.3989, "step": 27413 }, { "epoch": 15.315083798882682, "grad_norm": 1.1539660692214966, "learning_rate": 0.00023529411764705883, "loss": 0.4852, "step": 27414 }, { "epoch": 15.315642458100559, "grad_norm": 3.3286173343658447, "learning_rate": 0.00023526610644257704, "loss": 0.3654, "step": 27415 }, { "epoch": 15.316201117318435, "grad_norm": 0.43987223505973816, "learning_rate": 0.00023523809523809524, "loss": 0.4506, "step": 27416 }, { "epoch": 15.316759776536314, "grad_norm": 0.4094041883945465, "learning_rate": 0.00023521008403361348, "loss": 0.4581, "step": 27417 }, { "epoch": 15.31731843575419, "grad_norm": 0.5308538675308228, "learning_rate": 0.00023518207282913165, "loss": 0.4487, "step": 27418 }, { "epoch": 15.317877094972067, "grad_norm": 0.4632507264614105, "learning_rate": 0.00023515406162464986, "loss": 0.4111, "step": 27419 }, { "epoch": 15.318435754189943, "grad_norm": 0.370576947927475, "learning_rate": 0.00023512605042016807, "loss": 0.3633, "step": 27420 }, { "epoch": 15.318994413407822, "grad_norm": 0.5131366848945618, "learning_rate": 0.0002350980392156863, "loss": 0.4594, "step": 27421 }, { "epoch": 15.319553072625698, "grad_norm": 1.1148682832717896, "learning_rate": 0.00023507002801120448, "loss": 0.3624, "step": 27422 }, { "epoch": 15.320111731843575, "grad_norm": 0.47584646940231323, "learning_rate": 0.00023504201680672268, "loss": 0.4187, "step": 27423 }, { "epoch": 15.320670391061453, "grad_norm": 0.6456692218780518, "learning_rate": 0.0002350140056022409, "loss": 0.5032, "step": 27424 }, { "epoch": 15.32122905027933, "grad_norm": 0.5331834554672241, "learning_rate": 0.00023498599439775912, "loss": 0.4274, "step": 27425 }, { "epoch": 15.321787709497206, "grad_norm": 0.4325638711452484, "learning_rate": 0.00023495798319327733, "loss": 0.391, "step": 27426 }, { "epoch": 15.322346368715085, "grad_norm": 0.4980892539024353, "learning_rate": 0.0002349299719887955, "loss": 0.4272, "step": 27427 }, { "epoch": 15.322905027932961, "grad_norm": 0.6630984544754028, "learning_rate": 0.00023490196078431371, "loss": 0.471, "step": 27428 }, { "epoch": 15.323463687150838, "grad_norm": 0.5613689422607422, "learning_rate": 0.00023487394957983195, "loss": 0.4831, "step": 27429 }, { "epoch": 15.324022346368714, "grad_norm": 0.5826461315155029, "learning_rate": 0.00023484593837535015, "loss": 0.3759, "step": 27430 }, { "epoch": 15.324581005586593, "grad_norm": 0.38186895847320557, "learning_rate": 0.00023481792717086836, "loss": 0.3279, "step": 27431 }, { "epoch": 15.32513966480447, "grad_norm": 0.8778761625289917, "learning_rate": 0.00023478991596638654, "loss": 0.4397, "step": 27432 }, { "epoch": 15.325698324022346, "grad_norm": 0.40523669123649597, "learning_rate": 0.00023476190476190477, "loss": 0.3948, "step": 27433 }, { "epoch": 15.326256983240224, "grad_norm": 0.4401901364326477, "learning_rate": 0.00023473389355742298, "loss": 0.51, "step": 27434 }, { "epoch": 15.3268156424581, "grad_norm": 0.32560133934020996, "learning_rate": 0.00023470588235294118, "loss": 0.3507, "step": 27435 }, { "epoch": 15.327374301675977, "grad_norm": 0.9157285094261169, "learning_rate": 0.0002346778711484594, "loss": 0.37, "step": 27436 }, { "epoch": 15.327932960893854, "grad_norm": 0.4645446836948395, "learning_rate": 0.0002346498599439776, "loss": 0.4232, "step": 27437 }, { "epoch": 15.328491620111732, "grad_norm": 0.48706427216529846, "learning_rate": 0.0002346218487394958, "loss": 0.4315, "step": 27438 }, { "epoch": 15.329050279329609, "grad_norm": 0.38807767629623413, "learning_rate": 0.000234593837535014, "loss": 0.4093, "step": 27439 }, { "epoch": 15.329608938547485, "grad_norm": 0.40641501545906067, "learning_rate": 0.0002345658263305322, "loss": 0.2849, "step": 27440 }, { "epoch": 15.330167597765364, "grad_norm": 0.5182761549949646, "learning_rate": 0.00023453781512605045, "loss": 0.4397, "step": 27441 }, { "epoch": 15.33072625698324, "grad_norm": 0.4478176236152649, "learning_rate": 0.00023450980392156862, "loss": 0.4226, "step": 27442 }, { "epoch": 15.331284916201117, "grad_norm": 0.6990725994110107, "learning_rate": 0.00023448179271708683, "loss": 0.4019, "step": 27443 }, { "epoch": 15.331843575418995, "grad_norm": 0.3783870041370392, "learning_rate": 0.00023445378151260504, "loss": 0.3643, "step": 27444 }, { "epoch": 15.332402234636872, "grad_norm": 1.6086512804031372, "learning_rate": 0.00023442577030812327, "loss": 0.4534, "step": 27445 }, { "epoch": 15.332960893854748, "grad_norm": 0.444332480430603, "learning_rate": 0.00023439775910364148, "loss": 0.4341, "step": 27446 }, { "epoch": 15.333519553072625, "grad_norm": 0.40880537033081055, "learning_rate": 0.00023436974789915965, "loss": 0.388, "step": 27447 }, { "epoch": 15.334078212290503, "grad_norm": 0.40572336316108704, "learning_rate": 0.00023434173669467786, "loss": 0.447, "step": 27448 }, { "epoch": 15.33463687150838, "grad_norm": 0.5504960417747498, "learning_rate": 0.0002343137254901961, "loss": 0.4271, "step": 27449 }, { "epoch": 15.335195530726256, "grad_norm": 0.3967116177082062, "learning_rate": 0.0002342857142857143, "loss": 0.442, "step": 27450 }, { "epoch": 15.335754189944135, "grad_norm": 1.4559507369995117, "learning_rate": 0.0002342577030812325, "loss": 0.3804, "step": 27451 }, { "epoch": 15.336312849162011, "grad_norm": 0.5857447981834412, "learning_rate": 0.00023422969187675068, "loss": 0.4832, "step": 27452 }, { "epoch": 15.336871508379888, "grad_norm": 1.276358962059021, "learning_rate": 0.00023420168067226892, "loss": 0.4484, "step": 27453 }, { "epoch": 15.337430167597766, "grad_norm": 0.47999775409698486, "learning_rate": 0.00023417366946778712, "loss": 0.3877, "step": 27454 }, { "epoch": 15.337988826815643, "grad_norm": 0.459094762802124, "learning_rate": 0.00023414565826330533, "loss": 0.3145, "step": 27455 }, { "epoch": 15.33854748603352, "grad_norm": 3.09444522857666, "learning_rate": 0.00023411764705882354, "loss": 0.4672, "step": 27456 }, { "epoch": 15.339106145251396, "grad_norm": 0.9074259996414185, "learning_rate": 0.00023408963585434174, "loss": 0.4762, "step": 27457 }, { "epoch": 15.339664804469274, "grad_norm": 0.49666330218315125, "learning_rate": 0.00023406162464985995, "loss": 0.5451, "step": 27458 }, { "epoch": 15.34022346368715, "grad_norm": 0.6739406585693359, "learning_rate": 0.00023403361344537815, "loss": 0.3897, "step": 27459 }, { "epoch": 15.340782122905027, "grad_norm": 0.49086982011795044, "learning_rate": 0.00023400560224089636, "loss": 0.3274, "step": 27460 }, { "epoch": 15.341340782122906, "grad_norm": 0.3382643163204193, "learning_rate": 0.0002339775910364146, "loss": 0.3374, "step": 27461 }, { "epoch": 15.341899441340782, "grad_norm": 0.5720822811126709, "learning_rate": 0.00023394957983193277, "loss": 0.447, "step": 27462 }, { "epoch": 15.342458100558659, "grad_norm": 0.39611977338790894, "learning_rate": 0.00023392156862745098, "loss": 0.4714, "step": 27463 }, { "epoch": 15.343016759776535, "grad_norm": 0.6396960020065308, "learning_rate": 0.00023389355742296918, "loss": 0.527, "step": 27464 }, { "epoch": 15.343575418994414, "grad_norm": 0.38685473799705505, "learning_rate": 0.00023386554621848742, "loss": 0.3946, "step": 27465 }, { "epoch": 15.34413407821229, "grad_norm": 0.36341413855552673, "learning_rate": 0.00023383753501400562, "loss": 0.3138, "step": 27466 }, { "epoch": 15.344692737430167, "grad_norm": 0.7965563535690308, "learning_rate": 0.0002338095238095238, "loss": 0.5117, "step": 27467 }, { "epoch": 15.345251396648045, "grad_norm": 0.41985607147216797, "learning_rate": 0.000233781512605042, "loss": 0.542, "step": 27468 }, { "epoch": 15.345810055865922, "grad_norm": 0.38819757103919983, "learning_rate": 0.00023375350140056024, "loss": 0.383, "step": 27469 }, { "epoch": 15.346368715083798, "grad_norm": 0.5193238258361816, "learning_rate": 0.00023372549019607845, "loss": 0.4748, "step": 27470 }, { "epoch": 15.346927374301677, "grad_norm": 0.4518737494945526, "learning_rate": 0.00023369747899159665, "loss": 0.3561, "step": 27471 }, { "epoch": 15.347486033519553, "grad_norm": 0.8575886487960815, "learning_rate": 0.00023366946778711483, "loss": 0.4776, "step": 27472 }, { "epoch": 15.34804469273743, "grad_norm": 0.3266448378562927, "learning_rate": 0.00023364145658263307, "loss": 0.2543, "step": 27473 }, { "epoch": 15.348603351955306, "grad_norm": 1.4699512720108032, "learning_rate": 0.00023361344537815127, "loss": 0.5686, "step": 27474 }, { "epoch": 15.349162011173185, "grad_norm": 0.35907161235809326, "learning_rate": 0.00023358543417366948, "loss": 0.3322, "step": 27475 }, { "epoch": 15.349720670391061, "grad_norm": 0.5248056054115295, "learning_rate": 0.00023355742296918766, "loss": 0.3766, "step": 27476 }, { "epoch": 15.350279329608938, "grad_norm": 0.447727769613266, "learning_rate": 0.0002335294117647059, "loss": 0.4298, "step": 27477 }, { "epoch": 15.350837988826816, "grad_norm": 0.6774367094039917, "learning_rate": 0.0002335014005602241, "loss": 0.4092, "step": 27478 }, { "epoch": 15.351396648044693, "grad_norm": 0.385184109210968, "learning_rate": 0.0002334733893557423, "loss": 0.3924, "step": 27479 }, { "epoch": 15.35195530726257, "grad_norm": 0.5629845857620239, "learning_rate": 0.0002334453781512605, "loss": 0.4709, "step": 27480 }, { "epoch": 15.352513966480448, "grad_norm": 2.9537107944488525, "learning_rate": 0.0002334173669467787, "loss": 0.4158, "step": 27481 }, { "epoch": 15.353072625698324, "grad_norm": 2.4394400119781494, "learning_rate": 0.00023338935574229692, "loss": 0.3627, "step": 27482 }, { "epoch": 15.3536312849162, "grad_norm": 0.49361085891723633, "learning_rate": 0.00023336134453781512, "loss": 0.4637, "step": 27483 }, { "epoch": 15.354189944134077, "grad_norm": 0.40729281306266785, "learning_rate": 0.00023333333333333333, "loss": 0.3397, "step": 27484 }, { "epoch": 15.354748603351956, "grad_norm": 0.41827327013015747, "learning_rate": 0.00023330532212885156, "loss": 0.3922, "step": 27485 }, { "epoch": 15.355307262569832, "grad_norm": 0.7532612681388855, "learning_rate": 0.00023327731092436974, "loss": 0.456, "step": 27486 }, { "epoch": 15.355865921787709, "grad_norm": 0.654574453830719, "learning_rate": 0.00023324929971988795, "loss": 0.4426, "step": 27487 }, { "epoch": 15.356424581005587, "grad_norm": 0.7738398909568787, "learning_rate": 0.00023322128851540615, "loss": 0.3934, "step": 27488 }, { "epoch": 15.356983240223464, "grad_norm": 0.4268757700920105, "learning_rate": 0.0002331932773109244, "loss": 0.4565, "step": 27489 }, { "epoch": 15.35754189944134, "grad_norm": 0.5913812518119812, "learning_rate": 0.0002331652661064426, "loss": 0.4944, "step": 27490 }, { "epoch": 15.358100558659217, "grad_norm": 0.9433100819587708, "learning_rate": 0.00023313725490196077, "loss": 0.4831, "step": 27491 }, { "epoch": 15.358659217877095, "grad_norm": 0.3646106719970703, "learning_rate": 0.00023310924369747898, "loss": 0.3796, "step": 27492 }, { "epoch": 15.359217877094972, "grad_norm": 1.3304804563522339, "learning_rate": 0.0002330812324929972, "loss": 0.6214, "step": 27493 }, { "epoch": 15.359776536312848, "grad_norm": 0.6899111270904541, "learning_rate": 0.00023305322128851542, "loss": 0.4288, "step": 27494 }, { "epoch": 15.360335195530727, "grad_norm": 0.5661269426345825, "learning_rate": 0.00023302521008403362, "loss": 0.4828, "step": 27495 }, { "epoch": 15.360893854748603, "grad_norm": 0.3339625597000122, "learning_rate": 0.0002329971988795518, "loss": 0.3689, "step": 27496 }, { "epoch": 15.36145251396648, "grad_norm": 0.4951328933238983, "learning_rate": 0.00023296918767507004, "loss": 0.4463, "step": 27497 }, { "epoch": 15.362011173184358, "grad_norm": 0.44402211904525757, "learning_rate": 0.00023294117647058824, "loss": 0.4084, "step": 27498 }, { "epoch": 15.362569832402235, "grad_norm": 0.6998577117919922, "learning_rate": 0.00023291316526610645, "loss": 0.6134, "step": 27499 }, { "epoch": 15.363128491620111, "grad_norm": 0.39879918098449707, "learning_rate": 0.00023288515406162465, "loss": 0.3641, "step": 27500 }, { "epoch": 15.363128491620111, "eval_cer": 0.08602009754181532, "eval_loss": 0.3251490890979767, "eval_runtime": 55.761, "eval_samples_per_second": 81.383, "eval_steps_per_second": 5.093, "eval_wer": 0.33909667327728277, "step": 27500 }, { "epoch": 15.363687150837988, "grad_norm": 0.44014763832092285, "learning_rate": 0.00023285714285714286, "loss": 0.3847, "step": 27501 }, { "epoch": 15.364245810055866, "grad_norm": 0.450112909078598, "learning_rate": 0.00023282913165266107, "loss": 0.5513, "step": 27502 }, { "epoch": 15.364804469273743, "grad_norm": 0.42415347695350647, "learning_rate": 0.00023280112044817927, "loss": 0.4425, "step": 27503 }, { "epoch": 15.36536312849162, "grad_norm": 0.5702974200248718, "learning_rate": 0.00023277310924369748, "loss": 0.2753, "step": 27504 }, { "epoch": 15.365921787709498, "grad_norm": 7.577267169952393, "learning_rate": 0.0002327450980392157, "loss": 0.47, "step": 27505 }, { "epoch": 15.366480446927374, "grad_norm": 0.6406427025794983, "learning_rate": 0.0002327170868347339, "loss": 0.4608, "step": 27506 }, { "epoch": 15.367039106145251, "grad_norm": 0.6958872079849243, "learning_rate": 0.0002326890756302521, "loss": 0.5317, "step": 27507 }, { "epoch": 15.36759776536313, "grad_norm": 0.751998782157898, "learning_rate": 0.0002326610644257703, "loss": 0.4889, "step": 27508 }, { "epoch": 15.368156424581006, "grad_norm": 0.7922876477241516, "learning_rate": 0.00023263305322128854, "loss": 0.3869, "step": 27509 }, { "epoch": 15.368715083798882, "grad_norm": 0.4760020673274994, "learning_rate": 0.00023260504201680674, "loss": 0.4252, "step": 27510 }, { "epoch": 15.369273743016759, "grad_norm": 0.6070950031280518, "learning_rate": 0.00023257703081232492, "loss": 0.529, "step": 27511 }, { "epoch": 15.369832402234637, "grad_norm": 0.4527904689311981, "learning_rate": 0.00023254901960784313, "loss": 0.4788, "step": 27512 }, { "epoch": 15.370391061452514, "grad_norm": 0.5024074912071228, "learning_rate": 0.00023252100840336136, "loss": 0.4286, "step": 27513 }, { "epoch": 15.37094972067039, "grad_norm": 0.6211361885070801, "learning_rate": 0.00023249299719887957, "loss": 0.5245, "step": 27514 }, { "epoch": 15.371508379888269, "grad_norm": 0.38150015473365784, "learning_rate": 0.00023246498599439777, "loss": 0.3711, "step": 27515 }, { "epoch": 15.372067039106145, "grad_norm": 0.42352208495140076, "learning_rate": 0.00023243697478991595, "loss": 0.4494, "step": 27516 }, { "epoch": 15.372625698324022, "grad_norm": 0.7814956903457642, "learning_rate": 0.00023240896358543418, "loss": 0.4101, "step": 27517 }, { "epoch": 15.3731843575419, "grad_norm": 0.4737066924571991, "learning_rate": 0.0002323809523809524, "loss": 0.401, "step": 27518 }, { "epoch": 15.373743016759777, "grad_norm": 0.6003987789154053, "learning_rate": 0.0002323529411764706, "loss": 0.4746, "step": 27519 }, { "epoch": 15.374301675977653, "grad_norm": 0.4094335436820984, "learning_rate": 0.0002323249299719888, "loss": 0.4024, "step": 27520 }, { "epoch": 15.37486033519553, "grad_norm": 0.33603543043136597, "learning_rate": 0.000232296918767507, "loss": 0.3844, "step": 27521 }, { "epoch": 15.375418994413408, "grad_norm": 0.5068058967590332, "learning_rate": 0.0002322689075630252, "loss": 0.4591, "step": 27522 }, { "epoch": 15.375977653631285, "grad_norm": 0.48952800035476685, "learning_rate": 0.00023224089635854342, "loss": 0.5202, "step": 27523 }, { "epoch": 15.376536312849161, "grad_norm": 1.1990879774093628, "learning_rate": 0.00023221288515406162, "loss": 0.4939, "step": 27524 }, { "epoch": 15.37709497206704, "grad_norm": 0.3486270308494568, "learning_rate": 0.00023218487394957986, "loss": 0.4496, "step": 27525 }, { "epoch": 15.377653631284916, "grad_norm": 0.41330480575561523, "learning_rate": 0.00023215686274509804, "loss": 0.3874, "step": 27526 }, { "epoch": 15.378212290502793, "grad_norm": 0.42523783445358276, "learning_rate": 0.00023212885154061624, "loss": 0.4283, "step": 27527 }, { "epoch": 15.378770949720671, "grad_norm": 4.902891635894775, "learning_rate": 0.00023210084033613445, "loss": 0.5188, "step": 27528 }, { "epoch": 15.379329608938548, "grad_norm": 0.5284238457679749, "learning_rate": 0.00023207282913165268, "loss": 0.495, "step": 27529 }, { "epoch": 15.379888268156424, "grad_norm": 0.6698698997497559, "learning_rate": 0.0002320448179271709, "loss": 0.4149, "step": 27530 }, { "epoch": 15.380446927374301, "grad_norm": 0.4035865366458893, "learning_rate": 0.00023201680672268907, "loss": 0.3462, "step": 27531 }, { "epoch": 15.38100558659218, "grad_norm": 0.5787323713302612, "learning_rate": 0.00023198879551820727, "loss": 0.4516, "step": 27532 }, { "epoch": 15.381564245810056, "grad_norm": 0.4580431878566742, "learning_rate": 0.0002319607843137255, "loss": 0.4168, "step": 27533 }, { "epoch": 15.382122905027932, "grad_norm": 0.4119787812232971, "learning_rate": 0.0002319327731092437, "loss": 0.3618, "step": 27534 }, { "epoch": 15.38268156424581, "grad_norm": 0.5767946243286133, "learning_rate": 0.0002319047619047619, "loss": 0.4409, "step": 27535 }, { "epoch": 15.383240223463687, "grad_norm": 0.3838423788547516, "learning_rate": 0.0002318767507002801, "loss": 0.3623, "step": 27536 }, { "epoch": 15.383798882681564, "grad_norm": 0.47653728723526, "learning_rate": 0.00023184873949579833, "loss": 0.4988, "step": 27537 }, { "epoch": 15.38435754189944, "grad_norm": 0.670563280582428, "learning_rate": 0.00023182072829131654, "loss": 0.3875, "step": 27538 }, { "epoch": 15.384916201117319, "grad_norm": 0.7059043645858765, "learning_rate": 0.00023179271708683474, "loss": 0.5393, "step": 27539 }, { "epoch": 15.385474860335195, "grad_norm": 0.5428305864334106, "learning_rate": 0.00023176470588235292, "loss": 0.3491, "step": 27540 }, { "epoch": 15.386033519553072, "grad_norm": 1.0532699823379517, "learning_rate": 0.00023173669467787115, "loss": 0.3515, "step": 27541 }, { "epoch": 15.38659217877095, "grad_norm": 0.8265397548675537, "learning_rate": 0.00023170868347338936, "loss": 0.4631, "step": 27542 }, { "epoch": 15.387150837988827, "grad_norm": 0.30975237488746643, "learning_rate": 0.00023168067226890757, "loss": 0.3779, "step": 27543 }, { "epoch": 15.387709497206703, "grad_norm": 0.5013826489448547, "learning_rate": 0.00023165266106442577, "loss": 0.494, "step": 27544 }, { "epoch": 15.388268156424582, "grad_norm": 0.5017936825752258, "learning_rate": 0.00023162464985994398, "loss": 0.4105, "step": 27545 }, { "epoch": 15.388826815642458, "grad_norm": 0.543145477771759, "learning_rate": 0.00023159663865546218, "loss": 0.3445, "step": 27546 }, { "epoch": 15.389385474860335, "grad_norm": 2.5300023555755615, "learning_rate": 0.0002315686274509804, "loss": 0.3829, "step": 27547 }, { "epoch": 15.389944134078211, "grad_norm": 0.4205876886844635, "learning_rate": 0.0002315406162464986, "loss": 0.4657, "step": 27548 }, { "epoch": 15.39050279329609, "grad_norm": 0.30096274614334106, "learning_rate": 0.00023151260504201683, "loss": 0.352, "step": 27549 }, { "epoch": 15.391061452513966, "grad_norm": 0.42650893330574036, "learning_rate": 0.000231484593837535, "loss": 0.3451, "step": 27550 }, { "epoch": 15.391620111731843, "grad_norm": 0.3083764314651489, "learning_rate": 0.00023145658263305321, "loss": 0.3978, "step": 27551 }, { "epoch": 15.392178770949721, "grad_norm": 0.4789768159389496, "learning_rate": 0.00023142857142857142, "loss": 0.3737, "step": 27552 }, { "epoch": 15.392737430167598, "grad_norm": 0.5054426789283752, "learning_rate": 0.00023140056022408965, "loss": 0.41, "step": 27553 }, { "epoch": 15.393296089385474, "grad_norm": 2.1758203506469727, "learning_rate": 0.00023137254901960786, "loss": 0.4607, "step": 27554 }, { "epoch": 15.393854748603353, "grad_norm": 0.39306533336639404, "learning_rate": 0.00023134453781512604, "loss": 0.3265, "step": 27555 }, { "epoch": 15.39441340782123, "grad_norm": 0.7310125827789307, "learning_rate": 0.00023131652661064424, "loss": 0.481, "step": 27556 }, { "epoch": 15.394972067039106, "grad_norm": 0.674607515335083, "learning_rate": 0.00023128851540616248, "loss": 0.3696, "step": 27557 }, { "epoch": 15.395530726256982, "grad_norm": 0.637444257736206, "learning_rate": 0.00023126050420168068, "loss": 0.5676, "step": 27558 }, { "epoch": 15.39608938547486, "grad_norm": 0.2863808572292328, "learning_rate": 0.0002312324929971989, "loss": 0.2788, "step": 27559 }, { "epoch": 15.396648044692737, "grad_norm": 0.5081846714019775, "learning_rate": 0.00023120448179271707, "loss": 0.4052, "step": 27560 }, { "epoch": 15.397206703910614, "grad_norm": 0.33326709270477295, "learning_rate": 0.0002311764705882353, "loss": 0.377, "step": 27561 }, { "epoch": 15.397765363128492, "grad_norm": 0.33083057403564453, "learning_rate": 0.0002311484593837535, "loss": 0.2984, "step": 27562 }, { "epoch": 15.398324022346369, "grad_norm": 0.45589569211006165, "learning_rate": 0.0002311204481792717, "loss": 0.4344, "step": 27563 }, { "epoch": 15.398882681564245, "grad_norm": 0.38245394825935364, "learning_rate": 0.00023109243697478992, "loss": 0.3602, "step": 27564 }, { "epoch": 15.399441340782122, "grad_norm": 0.39443495869636536, "learning_rate": 0.00023106442577030812, "loss": 0.3611, "step": 27565 }, { "epoch": 15.4, "grad_norm": 0.4073416590690613, "learning_rate": 0.00023103641456582633, "loss": 0.4346, "step": 27566 }, { "epoch": 15.400558659217877, "grad_norm": 0.5925625562667847, "learning_rate": 0.00023100840336134454, "loss": 0.476, "step": 27567 }, { "epoch": 15.401117318435753, "grad_norm": 0.522760808467865, "learning_rate": 0.00023098039215686274, "loss": 0.4629, "step": 27568 }, { "epoch": 15.401675977653632, "grad_norm": 0.737259030342102, "learning_rate": 0.00023095238095238098, "loss": 0.3871, "step": 27569 }, { "epoch": 15.402234636871508, "grad_norm": 0.42474088072776794, "learning_rate": 0.00023092436974789915, "loss": 0.4824, "step": 27570 }, { "epoch": 15.402793296089385, "grad_norm": 0.5107524394989014, "learning_rate": 0.00023089635854341736, "loss": 0.4556, "step": 27571 }, { "epoch": 15.403351955307263, "grad_norm": 0.7858421802520752, "learning_rate": 0.00023086834733893557, "loss": 0.4925, "step": 27572 }, { "epoch": 15.40391061452514, "grad_norm": 0.5118883848190308, "learning_rate": 0.0002308403361344538, "loss": 0.3384, "step": 27573 }, { "epoch": 15.404469273743016, "grad_norm": 0.5859266519546509, "learning_rate": 0.000230812324929972, "loss": 0.3735, "step": 27574 }, { "epoch": 15.405027932960893, "grad_norm": 0.6551598906517029, "learning_rate": 0.00023078431372549018, "loss": 0.4377, "step": 27575 }, { "epoch": 15.405586592178771, "grad_norm": 1.1171528100967407, "learning_rate": 0.0002307563025210084, "loss": 0.4845, "step": 27576 }, { "epoch": 15.406145251396648, "grad_norm": 0.39609792828559875, "learning_rate": 0.00023072829131652662, "loss": 0.5033, "step": 27577 }, { "epoch": 15.406703910614524, "grad_norm": 0.39364534616470337, "learning_rate": 0.00023070028011204483, "loss": 0.3599, "step": 27578 }, { "epoch": 15.407262569832403, "grad_norm": 0.5504368543624878, "learning_rate": 0.00023067226890756304, "loss": 0.751, "step": 27579 }, { "epoch": 15.40782122905028, "grad_norm": 0.3931092917919159, "learning_rate": 0.00023064425770308121, "loss": 0.2672, "step": 27580 }, { "epoch": 15.408379888268156, "grad_norm": 0.55729740858078, "learning_rate": 0.00023061624649859945, "loss": 0.4177, "step": 27581 }, { "epoch": 15.408938547486034, "grad_norm": 0.4239175319671631, "learning_rate": 0.00023058823529411765, "loss": 0.451, "step": 27582 }, { "epoch": 15.40949720670391, "grad_norm": 0.4770985245704651, "learning_rate": 0.00023056022408963586, "loss": 0.3901, "step": 27583 }, { "epoch": 15.410055865921787, "grad_norm": 0.6786672472953796, "learning_rate": 0.0002305322128851541, "loss": 0.4976, "step": 27584 }, { "epoch": 15.410614525139664, "grad_norm": 0.582874596118927, "learning_rate": 0.00023050420168067227, "loss": 0.3703, "step": 27585 }, { "epoch": 15.411173184357542, "grad_norm": 0.8771749138832092, "learning_rate": 0.00023047619047619048, "loss": 0.3139, "step": 27586 }, { "epoch": 15.411731843575419, "grad_norm": 0.5681930780410767, "learning_rate": 0.00023044817927170868, "loss": 0.3656, "step": 27587 }, { "epoch": 15.412290502793295, "grad_norm": 0.3547438383102417, "learning_rate": 0.00023042016806722692, "loss": 0.3947, "step": 27588 }, { "epoch": 15.412849162011174, "grad_norm": 0.42386239767074585, "learning_rate": 0.0002303921568627451, "loss": 0.3042, "step": 27589 }, { "epoch": 15.41340782122905, "grad_norm": 2.890068531036377, "learning_rate": 0.0002303641456582633, "loss": 0.3073, "step": 27590 }, { "epoch": 15.413966480446927, "grad_norm": 2.2874836921691895, "learning_rate": 0.0002303361344537815, "loss": 0.618, "step": 27591 }, { "epoch": 15.414525139664805, "grad_norm": 0.43156924843788147, "learning_rate": 0.00023030812324929974, "loss": 0.331, "step": 27592 }, { "epoch": 15.415083798882682, "grad_norm": 2.0707414150238037, "learning_rate": 0.00023028011204481795, "loss": 0.3955, "step": 27593 }, { "epoch": 15.415642458100558, "grad_norm": 0.8385738730430603, "learning_rate": 0.00023025210084033613, "loss": 0.3018, "step": 27594 }, { "epoch": 15.416201117318435, "grad_norm": 0.5015643239021301, "learning_rate": 0.00023022408963585433, "loss": 0.3306, "step": 27595 }, { "epoch": 15.416759776536313, "grad_norm": 1.2832459211349487, "learning_rate": 0.00023019607843137256, "loss": 0.3434, "step": 27596 }, { "epoch": 15.41731843575419, "grad_norm": 0.4637226462364197, "learning_rate": 0.00023016806722689077, "loss": 0.4113, "step": 27597 }, { "epoch": 15.417877094972066, "grad_norm": 0.6637462973594666, "learning_rate": 0.00023014005602240898, "loss": 0.4451, "step": 27598 }, { "epoch": 15.418435754189945, "grad_norm": 0.3724496364593506, "learning_rate": 0.00023011204481792716, "loss": 0.3291, "step": 27599 }, { "epoch": 15.418994413407821, "grad_norm": 0.3903222382068634, "learning_rate": 0.0002300840336134454, "loss": 0.3271, "step": 27600 }, { "epoch": 15.419553072625698, "grad_norm": 4.25506067276001, "learning_rate": 0.0002300560224089636, "loss": 0.4898, "step": 27601 }, { "epoch": 15.420111731843576, "grad_norm": 0.41180357336997986, "learning_rate": 0.0002300280112044818, "loss": 0.322, "step": 27602 }, { "epoch": 15.420670391061453, "grad_norm": 0.6076601147651672, "learning_rate": 0.00023, "loss": 0.4253, "step": 27603 }, { "epoch": 15.42122905027933, "grad_norm": 0.7646205425262451, "learning_rate": 0.0002299719887955182, "loss": 0.4349, "step": 27604 }, { "epoch": 15.421787709497206, "grad_norm": 0.3251868784427643, "learning_rate": 0.00022994397759103642, "loss": 0.2879, "step": 27605 }, { "epoch": 15.422346368715084, "grad_norm": 0.6212793588638306, "learning_rate": 0.00022991596638655462, "loss": 0.3858, "step": 27606 }, { "epoch": 15.422905027932961, "grad_norm": 0.46519187092781067, "learning_rate": 0.00022988795518207283, "loss": 0.4382, "step": 27607 }, { "epoch": 15.423463687150837, "grad_norm": 0.8211582899093628, "learning_rate": 0.00022985994397759106, "loss": 0.3877, "step": 27608 }, { "epoch": 15.424022346368716, "grad_norm": 0.4072891175746918, "learning_rate": 0.00022983193277310924, "loss": 0.3507, "step": 27609 }, { "epoch": 15.424581005586592, "grad_norm": 0.5349559783935547, "learning_rate": 0.00022980392156862745, "loss": 0.4666, "step": 27610 }, { "epoch": 15.425139664804469, "grad_norm": 0.7876636981964111, "learning_rate": 0.00022977591036414565, "loss": 0.4664, "step": 27611 }, { "epoch": 15.425698324022346, "grad_norm": 0.3492244482040405, "learning_rate": 0.0002297478991596639, "loss": 0.4843, "step": 27612 }, { "epoch": 15.426256983240224, "grad_norm": 0.3406340777873993, "learning_rate": 0.0002297198879551821, "loss": 0.3334, "step": 27613 }, { "epoch": 15.4268156424581, "grad_norm": 0.7022618055343628, "learning_rate": 0.00022969187675070027, "loss": 0.4076, "step": 27614 }, { "epoch": 15.427374301675977, "grad_norm": 0.7164230346679688, "learning_rate": 0.00022966386554621848, "loss": 0.3755, "step": 27615 }, { "epoch": 15.427932960893855, "grad_norm": 0.5902921557426453, "learning_rate": 0.0002296358543417367, "loss": 0.3245, "step": 27616 }, { "epoch": 15.428491620111732, "grad_norm": 0.38953524827957153, "learning_rate": 0.00022960784313725492, "loss": 0.3491, "step": 27617 }, { "epoch": 15.429050279329608, "grad_norm": 0.48069486021995544, "learning_rate": 0.00022957983193277312, "loss": 0.3791, "step": 27618 }, { "epoch": 15.429608938547487, "grad_norm": 0.7019156217575073, "learning_rate": 0.0002295518207282913, "loss": 0.4645, "step": 27619 }, { "epoch": 15.430167597765363, "grad_norm": 0.4355953335762024, "learning_rate": 0.00022952380952380954, "loss": 0.3615, "step": 27620 }, { "epoch": 15.43072625698324, "grad_norm": 1.006739854812622, "learning_rate": 0.00022949579831932774, "loss": 0.7369, "step": 27621 }, { "epoch": 15.431284916201117, "grad_norm": 0.4114830493927002, "learning_rate": 0.00022946778711484595, "loss": 0.4501, "step": 27622 }, { "epoch": 15.431843575418995, "grad_norm": 0.5296586155891418, "learning_rate": 0.00022943977591036415, "loss": 0.3829, "step": 27623 }, { "epoch": 15.432402234636871, "grad_norm": 0.6578084230422974, "learning_rate": 0.00022941176470588236, "loss": 0.4193, "step": 27624 }, { "epoch": 15.432960893854748, "grad_norm": 0.4390453100204468, "learning_rate": 0.00022938375350140057, "loss": 0.4166, "step": 27625 }, { "epoch": 15.433519553072626, "grad_norm": 0.3733435273170471, "learning_rate": 0.00022935574229691877, "loss": 0.3638, "step": 27626 }, { "epoch": 15.434078212290503, "grad_norm": 0.7661147713661194, "learning_rate": 0.00022932773109243698, "loss": 0.4204, "step": 27627 }, { "epoch": 15.43463687150838, "grad_norm": 0.46562889218330383, "learning_rate": 0.0002292997198879552, "loss": 0.3369, "step": 27628 }, { "epoch": 15.435195530726258, "grad_norm": 0.38845834136009216, "learning_rate": 0.0002292717086834734, "loss": 0.4245, "step": 27629 }, { "epoch": 15.435754189944134, "grad_norm": 0.3744841516017914, "learning_rate": 0.0002292436974789916, "loss": 0.3507, "step": 27630 }, { "epoch": 15.436312849162011, "grad_norm": 0.8743610978126526, "learning_rate": 0.0002292156862745098, "loss": 0.362, "step": 27631 }, { "epoch": 15.436871508379888, "grad_norm": 0.4833931028842926, "learning_rate": 0.00022918767507002804, "loss": 0.4614, "step": 27632 }, { "epoch": 15.437430167597766, "grad_norm": 0.42429786920547485, "learning_rate": 0.00022915966386554624, "loss": 0.4175, "step": 27633 }, { "epoch": 15.437988826815642, "grad_norm": 0.6156449317932129, "learning_rate": 0.00022913165266106442, "loss": 0.418, "step": 27634 }, { "epoch": 15.438547486033519, "grad_norm": 1.6276191473007202, "learning_rate": 0.00022910364145658263, "loss": 0.4153, "step": 27635 }, { "epoch": 15.439106145251397, "grad_norm": 0.5266149044036865, "learning_rate": 0.00022907563025210086, "loss": 0.3583, "step": 27636 }, { "epoch": 15.439664804469274, "grad_norm": 11.407163619995117, "learning_rate": 0.00022904761904761906, "loss": 0.4658, "step": 27637 }, { "epoch": 15.44022346368715, "grad_norm": 0.41940897703170776, "learning_rate": 0.00022901960784313727, "loss": 0.4096, "step": 27638 }, { "epoch": 15.440782122905027, "grad_norm": 2.3815360069274902, "learning_rate": 0.00022899159663865545, "loss": 0.4761, "step": 27639 }, { "epoch": 15.441340782122905, "grad_norm": 0.427706241607666, "learning_rate": 0.00022896358543417368, "loss": 0.35, "step": 27640 }, { "epoch": 15.441899441340782, "grad_norm": 0.4956286549568176, "learning_rate": 0.0002289355742296919, "loss": 0.5104, "step": 27641 }, { "epoch": 15.442458100558659, "grad_norm": 0.99051433801651, "learning_rate": 0.0002289075630252101, "loss": 0.4523, "step": 27642 }, { "epoch": 15.443016759776537, "grad_norm": 0.5525454878807068, "learning_rate": 0.00022887955182072827, "loss": 0.3786, "step": 27643 }, { "epoch": 15.443575418994413, "grad_norm": 1.143302083015442, "learning_rate": 0.0002288515406162465, "loss": 0.7081, "step": 27644 }, { "epoch": 15.44413407821229, "grad_norm": 0.40622884035110474, "learning_rate": 0.0002288235294117647, "loss": 0.3518, "step": 27645 }, { "epoch": 15.444692737430168, "grad_norm": 0.5262824892997742, "learning_rate": 0.00022879551820728292, "loss": 0.551, "step": 27646 }, { "epoch": 15.445251396648045, "grad_norm": 0.5108913779258728, "learning_rate": 0.00022876750700280112, "loss": 0.4627, "step": 27647 }, { "epoch": 15.445810055865921, "grad_norm": 0.4353354871273041, "learning_rate": 0.00022873949579831933, "loss": 0.3024, "step": 27648 }, { "epoch": 15.446368715083798, "grad_norm": 0.36251866817474365, "learning_rate": 0.00022871148459383754, "loss": 0.3678, "step": 27649 }, { "epoch": 15.446927374301676, "grad_norm": 0.43140944838523865, "learning_rate": 0.00022868347338935574, "loss": 0.3017, "step": 27650 }, { "epoch": 15.447486033519553, "grad_norm": 0.6407912969589233, "learning_rate": 0.00022865546218487395, "loss": 0.4401, "step": 27651 }, { "epoch": 15.44804469273743, "grad_norm": 0.49221348762512207, "learning_rate": 0.00022862745098039218, "loss": 0.4441, "step": 27652 }, { "epoch": 15.448603351955308, "grad_norm": 0.3979596197605133, "learning_rate": 0.00022859943977591036, "loss": 0.3996, "step": 27653 }, { "epoch": 15.449162011173184, "grad_norm": 0.4379895329475403, "learning_rate": 0.00022857142857142857, "loss": 0.3407, "step": 27654 }, { "epoch": 15.449720670391061, "grad_norm": 0.4559272825717926, "learning_rate": 0.00022854341736694677, "loss": 0.4442, "step": 27655 }, { "epoch": 15.45027932960894, "grad_norm": 0.4939734637737274, "learning_rate": 0.000228515406162465, "loss": 0.351, "step": 27656 }, { "epoch": 15.450837988826816, "grad_norm": 0.6032205820083618, "learning_rate": 0.0002284873949579832, "loss": 0.3944, "step": 27657 }, { "epoch": 15.451396648044692, "grad_norm": 0.45249828696250916, "learning_rate": 0.0002284593837535014, "loss": 0.4493, "step": 27658 }, { "epoch": 15.451955307262569, "grad_norm": 0.6539236903190613, "learning_rate": 0.0002284313725490196, "loss": 0.3956, "step": 27659 }, { "epoch": 15.452513966480447, "grad_norm": 1.2492121458053589, "learning_rate": 0.00022840336134453783, "loss": 0.3869, "step": 27660 }, { "epoch": 15.453072625698324, "grad_norm": 0.627498984336853, "learning_rate": 0.00022837535014005604, "loss": 0.4193, "step": 27661 }, { "epoch": 15.4536312849162, "grad_norm": 0.33169886469841003, "learning_rate": 0.00022834733893557424, "loss": 0.2996, "step": 27662 }, { "epoch": 15.454189944134079, "grad_norm": 0.3716643452644348, "learning_rate": 0.00022831932773109242, "loss": 0.4158, "step": 27663 }, { "epoch": 15.454748603351955, "grad_norm": 0.4901297986507416, "learning_rate": 0.00022829131652661065, "loss": 0.3603, "step": 27664 }, { "epoch": 15.455307262569832, "grad_norm": 0.5403488278388977, "learning_rate": 0.00022826330532212886, "loss": 0.3732, "step": 27665 }, { "epoch": 15.45586592178771, "grad_norm": 0.4044787883758545, "learning_rate": 0.00022823529411764707, "loss": 0.3437, "step": 27666 }, { "epoch": 15.456424581005587, "grad_norm": 0.5171582698822021, "learning_rate": 0.00022820728291316527, "loss": 0.4816, "step": 27667 }, { "epoch": 15.456983240223463, "grad_norm": 3.2613813877105713, "learning_rate": 0.00022817927170868348, "loss": 0.3555, "step": 27668 }, { "epoch": 15.45754189944134, "grad_norm": 0.6673614978790283, "learning_rate": 0.00022815126050420168, "loss": 0.3789, "step": 27669 }, { "epoch": 15.458100558659218, "grad_norm": 0.581941545009613, "learning_rate": 0.0002281232492997199, "loss": 0.4786, "step": 27670 }, { "epoch": 15.458659217877095, "grad_norm": 0.5647519826889038, "learning_rate": 0.0002280952380952381, "loss": 0.4133, "step": 27671 }, { "epoch": 15.459217877094972, "grad_norm": 1.0003045797348022, "learning_rate": 0.00022806722689075633, "loss": 0.4272, "step": 27672 }, { "epoch": 15.45977653631285, "grad_norm": 0.779039204120636, "learning_rate": 0.0002280392156862745, "loss": 0.5387, "step": 27673 }, { "epoch": 15.460335195530726, "grad_norm": 0.47317731380462646, "learning_rate": 0.00022801120448179271, "loss": 0.5029, "step": 27674 }, { "epoch": 15.460893854748603, "grad_norm": 1.9812017679214478, "learning_rate": 0.00022798319327731092, "loss": 0.3512, "step": 27675 }, { "epoch": 15.461452513966481, "grad_norm": 0.5163731575012207, "learning_rate": 0.00022795518207282915, "loss": 0.3531, "step": 27676 }, { "epoch": 15.462011173184358, "grad_norm": 0.4579041302204132, "learning_rate": 0.00022792717086834736, "loss": 0.4395, "step": 27677 }, { "epoch": 15.462569832402234, "grad_norm": 0.4563809633255005, "learning_rate": 0.00022789915966386554, "loss": 0.4023, "step": 27678 }, { "epoch": 15.463128491620111, "grad_norm": 0.7590668201446533, "learning_rate": 0.00022787114845938374, "loss": 0.4024, "step": 27679 }, { "epoch": 15.46368715083799, "grad_norm": 7.259964942932129, "learning_rate": 0.00022784313725490198, "loss": 0.3955, "step": 27680 }, { "epoch": 15.464245810055866, "grad_norm": 0.4260775148868561, "learning_rate": 0.00022781512605042018, "loss": 0.5114, "step": 27681 }, { "epoch": 15.464804469273743, "grad_norm": 0.5505006909370422, "learning_rate": 0.0002277871148459384, "loss": 0.4232, "step": 27682 }, { "epoch": 15.46536312849162, "grad_norm": 0.5028451681137085, "learning_rate": 0.00022775910364145657, "loss": 0.3986, "step": 27683 }, { "epoch": 15.465921787709497, "grad_norm": 0.5197755694389343, "learning_rate": 0.0002277310924369748, "loss": 0.4804, "step": 27684 }, { "epoch": 15.466480446927374, "grad_norm": 0.5973348021507263, "learning_rate": 0.000227703081232493, "loss": 0.4683, "step": 27685 }, { "epoch": 15.46703910614525, "grad_norm": 0.5457064509391785, "learning_rate": 0.0002276750700280112, "loss": 0.4634, "step": 27686 }, { "epoch": 15.467597765363129, "grad_norm": 0.47269371151924133, "learning_rate": 0.00022764705882352942, "loss": 0.3978, "step": 27687 }, { "epoch": 15.468156424581005, "grad_norm": 7.242918491363525, "learning_rate": 0.00022761904761904762, "loss": 0.4553, "step": 27688 }, { "epoch": 15.468715083798882, "grad_norm": 0.5599194169044495, "learning_rate": 0.00022759103641456583, "loss": 0.4651, "step": 27689 }, { "epoch": 15.46927374301676, "grad_norm": 0.43262985348701477, "learning_rate": 0.00022756302521008404, "loss": 0.429, "step": 27690 }, { "epoch": 15.469832402234637, "grad_norm": 0.322427362203598, "learning_rate": 0.00022753501400560224, "loss": 0.3351, "step": 27691 }, { "epoch": 15.470391061452514, "grad_norm": 0.43876051902770996, "learning_rate": 0.00022750700280112048, "loss": 0.3816, "step": 27692 }, { "epoch": 15.470949720670392, "grad_norm": 0.4143429696559906, "learning_rate": 0.00022747899159663865, "loss": 0.6289, "step": 27693 }, { "epoch": 15.471508379888268, "grad_norm": 0.46920105814933777, "learning_rate": 0.00022745098039215686, "loss": 0.4933, "step": 27694 }, { "epoch": 15.472067039106145, "grad_norm": 0.522365152835846, "learning_rate": 0.00022742296918767507, "loss": 0.3471, "step": 27695 }, { "epoch": 15.472625698324022, "grad_norm": 0.6207307577133179, "learning_rate": 0.0002273949579831933, "loss": 0.4391, "step": 27696 }, { "epoch": 15.4731843575419, "grad_norm": 0.4017002582550049, "learning_rate": 0.0002273669467787115, "loss": 0.408, "step": 27697 }, { "epoch": 15.473743016759776, "grad_norm": 0.5419505834579468, "learning_rate": 0.00022733893557422968, "loss": 0.3766, "step": 27698 }, { "epoch": 15.474301675977653, "grad_norm": 0.43403494358062744, "learning_rate": 0.0002273109243697479, "loss": 0.4243, "step": 27699 }, { "epoch": 15.474860335195531, "grad_norm": 0.6490126252174377, "learning_rate": 0.00022728291316526612, "loss": 0.4771, "step": 27700 }, { "epoch": 15.475418994413408, "grad_norm": 0.8881585597991943, "learning_rate": 0.00022725490196078433, "loss": 0.4652, "step": 27701 }, { "epoch": 15.475977653631285, "grad_norm": 0.3979702293872833, "learning_rate": 0.0002272268907563025, "loss": 0.3543, "step": 27702 }, { "epoch": 15.476536312849163, "grad_norm": 2.262089490890503, "learning_rate": 0.00022719887955182071, "loss": 0.383, "step": 27703 }, { "epoch": 15.47709497206704, "grad_norm": 0.49367737770080566, "learning_rate": 0.00022717086834733895, "loss": 0.4739, "step": 27704 }, { "epoch": 15.477653631284916, "grad_norm": 0.5165473818778992, "learning_rate": 0.00022714285714285715, "loss": 0.3618, "step": 27705 }, { "epoch": 15.478212290502793, "grad_norm": 0.5490630269050598, "learning_rate": 0.00022711484593837536, "loss": 0.4626, "step": 27706 }, { "epoch": 15.478770949720671, "grad_norm": 0.67984938621521, "learning_rate": 0.00022708683473389354, "loss": 0.4194, "step": 27707 }, { "epoch": 15.479329608938547, "grad_norm": 0.48702386021614075, "learning_rate": 0.00022705882352941177, "loss": 0.4674, "step": 27708 }, { "epoch": 15.479888268156424, "grad_norm": 0.7793223261833191, "learning_rate": 0.00022703081232492998, "loss": 0.4446, "step": 27709 }, { "epoch": 15.480446927374302, "grad_norm": 0.749777615070343, "learning_rate": 0.00022700280112044818, "loss": 0.467, "step": 27710 }, { "epoch": 15.481005586592179, "grad_norm": 1.3378573656082153, "learning_rate": 0.0002269747899159664, "loss": 0.3866, "step": 27711 }, { "epoch": 15.481564245810056, "grad_norm": 0.41602206230163574, "learning_rate": 0.0002269467787114846, "loss": 0.4069, "step": 27712 }, { "epoch": 15.482122905027932, "grad_norm": 0.5027822256088257, "learning_rate": 0.0002269187675070028, "loss": 0.3923, "step": 27713 }, { "epoch": 15.48268156424581, "grad_norm": 6.99078893661499, "learning_rate": 0.000226890756302521, "loss": 0.4555, "step": 27714 }, { "epoch": 15.483240223463687, "grad_norm": 0.3848678469657898, "learning_rate": 0.00022686274509803921, "loss": 0.32, "step": 27715 }, { "epoch": 15.483798882681564, "grad_norm": 0.742813229560852, "learning_rate": 0.00022683473389355745, "loss": 0.3903, "step": 27716 }, { "epoch": 15.484357541899442, "grad_norm": 0.7288814783096313, "learning_rate": 0.00022680672268907563, "loss": 0.3523, "step": 27717 }, { "epoch": 15.484916201117318, "grad_norm": 0.46087270975112915, "learning_rate": 0.00022677871148459383, "loss": 0.47, "step": 27718 }, { "epoch": 15.485474860335195, "grad_norm": 4.229316711425781, "learning_rate": 0.00022675070028011204, "loss": 0.3998, "step": 27719 }, { "epoch": 15.486033519553073, "grad_norm": 0.8021203875541687, "learning_rate": 0.00022672268907563027, "loss": 0.4289, "step": 27720 }, { "epoch": 15.48659217877095, "grad_norm": 0.3956725001335144, "learning_rate": 0.00022669467787114848, "loss": 0.3374, "step": 27721 }, { "epoch": 15.487150837988827, "grad_norm": 0.6719968318939209, "learning_rate": 0.00022666666666666666, "loss": 0.4233, "step": 27722 }, { "epoch": 15.487709497206703, "grad_norm": 0.4739748537540436, "learning_rate": 0.00022663865546218486, "loss": 0.4605, "step": 27723 }, { "epoch": 15.488268156424581, "grad_norm": 0.6410335302352905, "learning_rate": 0.0002266106442577031, "loss": 0.4411, "step": 27724 }, { "epoch": 15.488826815642458, "grad_norm": 0.5351026654243469, "learning_rate": 0.0002265826330532213, "loss": 0.3417, "step": 27725 }, { "epoch": 15.489385474860335, "grad_norm": 0.40366125106811523, "learning_rate": 0.0002265546218487395, "loss": 0.4236, "step": 27726 }, { "epoch": 15.489944134078213, "grad_norm": 0.5542619228363037, "learning_rate": 0.00022652661064425769, "loss": 0.3679, "step": 27727 }, { "epoch": 15.49050279329609, "grad_norm": 0.5849606394767761, "learning_rate": 0.00022649859943977592, "loss": 0.3836, "step": 27728 }, { "epoch": 15.491061452513966, "grad_norm": 0.4847210645675659, "learning_rate": 0.00022647058823529412, "loss": 0.3922, "step": 27729 }, { "epoch": 15.491620111731844, "grad_norm": 0.6907528042793274, "learning_rate": 0.00022644257703081233, "loss": 0.3566, "step": 27730 }, { "epoch": 15.492178770949721, "grad_norm": 0.4100522994995117, "learning_rate": 0.00022641456582633054, "loss": 0.4029, "step": 27731 }, { "epoch": 15.492737430167598, "grad_norm": 0.4595806300640106, "learning_rate": 0.00022638655462184874, "loss": 0.4208, "step": 27732 }, { "epoch": 15.493296089385474, "grad_norm": 0.3797292709350586, "learning_rate": 0.00022635854341736695, "loss": 0.3549, "step": 27733 }, { "epoch": 15.493854748603352, "grad_norm": 0.42252305150032043, "learning_rate": 0.00022633053221288515, "loss": 0.355, "step": 27734 }, { "epoch": 15.494413407821229, "grad_norm": 0.90904700756073, "learning_rate": 0.00022630252100840336, "loss": 0.3713, "step": 27735 }, { "epoch": 15.494972067039106, "grad_norm": 0.5891319513320923, "learning_rate": 0.0002262745098039216, "loss": 0.4912, "step": 27736 }, { "epoch": 15.495530726256984, "grad_norm": 0.4711788296699524, "learning_rate": 0.00022624649859943977, "loss": 0.3273, "step": 27737 }, { "epoch": 15.49608938547486, "grad_norm": 0.9140803813934326, "learning_rate": 0.00022621848739495798, "loss": 0.4454, "step": 27738 }, { "epoch": 15.496648044692737, "grad_norm": 1.2518978118896484, "learning_rate": 0.00022619047619047618, "loss": 0.3853, "step": 27739 }, { "epoch": 15.497206703910614, "grad_norm": 4.439642429351807, "learning_rate": 0.00022616246498599442, "loss": 0.6196, "step": 27740 }, { "epoch": 15.497765363128492, "grad_norm": 0.5441060662269592, "learning_rate": 0.00022613445378151262, "loss": 0.4056, "step": 27741 }, { "epoch": 15.498324022346369, "grad_norm": 0.46169713139533997, "learning_rate": 0.0002261064425770308, "loss": 0.3582, "step": 27742 }, { "epoch": 15.498882681564245, "grad_norm": 0.42566612362861633, "learning_rate": 0.000226078431372549, "loss": 0.3208, "step": 27743 }, { "epoch": 15.499441340782123, "grad_norm": 0.5000613927841187, "learning_rate": 0.00022605042016806724, "loss": 0.3902, "step": 27744 }, { "epoch": 15.5, "grad_norm": 1.6427463293075562, "learning_rate": 0.00022602240896358545, "loss": 0.4906, "step": 27745 }, { "epoch": 15.500558659217877, "grad_norm": 1.1477903127670288, "learning_rate": 0.00022599439775910365, "loss": 0.4149, "step": 27746 }, { "epoch": 15.501117318435755, "grad_norm": 0.33115386962890625, "learning_rate": 0.00022596638655462183, "loss": 0.3167, "step": 27747 }, { "epoch": 15.501675977653631, "grad_norm": 0.40444689989089966, "learning_rate": 0.00022593837535014007, "loss": 0.3464, "step": 27748 }, { "epoch": 15.502234636871508, "grad_norm": 0.7617838382720947, "learning_rate": 0.00022591036414565827, "loss": 0.4017, "step": 27749 }, { "epoch": 15.502793296089386, "grad_norm": 0.4813630282878876, "learning_rate": 0.00022588235294117648, "loss": 0.3969, "step": 27750 }, { "epoch": 15.503351955307263, "grad_norm": 0.3566363453865051, "learning_rate": 0.00022585434173669468, "loss": 0.3564, "step": 27751 }, { "epoch": 15.50391061452514, "grad_norm": 0.4510447382926941, "learning_rate": 0.0002258263305322129, "loss": 0.4681, "step": 27752 }, { "epoch": 15.504469273743016, "grad_norm": 0.5560593008995056, "learning_rate": 0.0002257983193277311, "loss": 0.5418, "step": 27753 }, { "epoch": 15.505027932960894, "grad_norm": 0.48497235774993896, "learning_rate": 0.0002257703081232493, "loss": 0.2578, "step": 27754 }, { "epoch": 15.505586592178771, "grad_norm": 0.7682862281799316, "learning_rate": 0.0002257422969187675, "loss": 0.3326, "step": 27755 }, { "epoch": 15.506145251396648, "grad_norm": 0.49692872166633606, "learning_rate": 0.00022571428571428571, "loss": 0.3487, "step": 27756 }, { "epoch": 15.506703910614526, "grad_norm": 0.5352974534034729, "learning_rate": 0.00022568627450980392, "loss": 0.3662, "step": 27757 }, { "epoch": 15.507262569832402, "grad_norm": 0.4714043140411377, "learning_rate": 0.00022565826330532213, "loss": 0.3866, "step": 27758 }, { "epoch": 15.507821229050279, "grad_norm": 0.39776623249053955, "learning_rate": 0.00022563025210084033, "loss": 0.5038, "step": 27759 }, { "epoch": 15.508379888268156, "grad_norm": 0.9410495162010193, "learning_rate": 0.00022560224089635856, "loss": 0.3989, "step": 27760 }, { "epoch": 15.508938547486034, "grad_norm": 0.47694334387779236, "learning_rate": 0.00022557422969187674, "loss": 0.3357, "step": 27761 }, { "epoch": 15.50949720670391, "grad_norm": 0.4025542736053467, "learning_rate": 0.00022554621848739495, "loss": 0.4093, "step": 27762 }, { "epoch": 15.510055865921787, "grad_norm": 6.801734447479248, "learning_rate": 0.00022551820728291316, "loss": 0.4862, "step": 27763 }, { "epoch": 15.510614525139665, "grad_norm": 0.7451063394546509, "learning_rate": 0.0002254901960784314, "loss": 0.791, "step": 27764 }, { "epoch": 15.511173184357542, "grad_norm": 0.6250303387641907, "learning_rate": 0.0002254621848739496, "loss": 0.4077, "step": 27765 }, { "epoch": 15.511731843575419, "grad_norm": 0.6530276536941528, "learning_rate": 0.00022543417366946777, "loss": 0.5877, "step": 27766 }, { "epoch": 15.512290502793297, "grad_norm": 0.4405488073825836, "learning_rate": 0.00022540616246498598, "loss": 0.5036, "step": 27767 }, { "epoch": 15.512849162011173, "grad_norm": 1.9687271118164062, "learning_rate": 0.0002253781512605042, "loss": 0.4081, "step": 27768 }, { "epoch": 15.51340782122905, "grad_norm": 0.9267881512641907, "learning_rate": 0.00022535014005602242, "loss": 0.3863, "step": 27769 }, { "epoch": 15.513966480446927, "grad_norm": 0.3652385473251343, "learning_rate": 0.00022532212885154062, "loss": 0.3228, "step": 27770 }, { "epoch": 15.514525139664805, "grad_norm": 0.46286118030548096, "learning_rate": 0.0002252941176470588, "loss": 0.4435, "step": 27771 }, { "epoch": 15.515083798882682, "grad_norm": 0.5767965912818909, "learning_rate": 0.00022526610644257704, "loss": 0.3195, "step": 27772 }, { "epoch": 15.515642458100558, "grad_norm": 0.5513387322425842, "learning_rate": 0.00022523809523809524, "loss": 0.4673, "step": 27773 }, { "epoch": 15.516201117318436, "grad_norm": 2.7823116779327393, "learning_rate": 0.00022521008403361345, "loss": 0.415, "step": 27774 }, { "epoch": 15.516759776536313, "grad_norm": 0.5855982899665833, "learning_rate": 0.00022518207282913165, "loss": 0.6397, "step": 27775 }, { "epoch": 15.51731843575419, "grad_norm": 0.6598750948905945, "learning_rate": 0.00022515406162464986, "loss": 0.4776, "step": 27776 }, { "epoch": 15.517877094972068, "grad_norm": 0.8199924826622009, "learning_rate": 0.00022512605042016807, "loss": 0.554, "step": 27777 }, { "epoch": 15.518435754189944, "grad_norm": 0.4444223642349243, "learning_rate": 0.00022509803921568627, "loss": 0.3705, "step": 27778 }, { "epoch": 15.518994413407821, "grad_norm": 0.45687365531921387, "learning_rate": 0.00022507002801120448, "loss": 0.3419, "step": 27779 }, { "epoch": 15.519553072625698, "grad_norm": 0.40366584062576294, "learning_rate": 0.0002250420168067227, "loss": 0.5157, "step": 27780 }, { "epoch": 15.520111731843576, "grad_norm": 0.7116847634315491, "learning_rate": 0.0002250140056022409, "loss": 0.3981, "step": 27781 }, { "epoch": 15.520670391061453, "grad_norm": 0.3483697772026062, "learning_rate": 0.0002249859943977591, "loss": 0.4021, "step": 27782 }, { "epoch": 15.521229050279329, "grad_norm": 0.419830858707428, "learning_rate": 0.0002249579831932773, "loss": 0.331, "step": 27783 }, { "epoch": 15.521787709497207, "grad_norm": 0.39615291357040405, "learning_rate": 0.00022492997198879554, "loss": 0.4366, "step": 27784 }, { "epoch": 15.522346368715084, "grad_norm": 0.667262077331543, "learning_rate": 0.00022490196078431374, "loss": 0.5225, "step": 27785 }, { "epoch": 15.52290502793296, "grad_norm": 0.3409097194671631, "learning_rate": 0.00022487394957983192, "loss": 0.4403, "step": 27786 }, { "epoch": 15.523463687150837, "grad_norm": 1.2697312831878662, "learning_rate": 0.00022484593837535013, "loss": 0.4086, "step": 27787 }, { "epoch": 15.524022346368715, "grad_norm": 0.39927807450294495, "learning_rate": 0.00022481792717086836, "loss": 0.3901, "step": 27788 }, { "epoch": 15.524581005586592, "grad_norm": 0.7561572194099426, "learning_rate": 0.00022478991596638657, "loss": 0.4048, "step": 27789 }, { "epoch": 15.525139664804469, "grad_norm": 1.0124382972717285, "learning_rate": 0.00022476190476190477, "loss": 0.4465, "step": 27790 }, { "epoch": 15.525698324022347, "grad_norm": 0.7594790458679199, "learning_rate": 0.00022473389355742295, "loss": 0.4258, "step": 27791 }, { "epoch": 15.526256983240224, "grad_norm": 0.46628737449645996, "learning_rate": 0.00022470588235294118, "loss": 0.4281, "step": 27792 }, { "epoch": 15.5268156424581, "grad_norm": 0.9521386623382568, "learning_rate": 0.0002246778711484594, "loss": 0.4199, "step": 27793 }, { "epoch": 15.527374301675978, "grad_norm": 0.3501659631729126, "learning_rate": 0.0002246498599439776, "loss": 0.4267, "step": 27794 }, { "epoch": 15.527932960893855, "grad_norm": 0.3712787330150604, "learning_rate": 0.00022462184873949583, "loss": 0.3779, "step": 27795 }, { "epoch": 15.528491620111732, "grad_norm": 0.44694897532463074, "learning_rate": 0.000224593837535014, "loss": 0.3585, "step": 27796 }, { "epoch": 15.529050279329608, "grad_norm": 0.46219462156295776, "learning_rate": 0.00022456582633053221, "loss": 0.3315, "step": 27797 }, { "epoch": 15.529608938547486, "grad_norm": 0.46868833899497986, "learning_rate": 0.00022453781512605042, "loss": 0.3413, "step": 27798 }, { "epoch": 15.530167597765363, "grad_norm": 0.3779137134552002, "learning_rate": 0.00022450980392156865, "loss": 0.3353, "step": 27799 }, { "epoch": 15.53072625698324, "grad_norm": 0.320011168718338, "learning_rate": 0.00022448179271708686, "loss": 0.338, "step": 27800 }, { "epoch": 15.531284916201118, "grad_norm": 0.47712403535842896, "learning_rate": 0.00022445378151260504, "loss": 0.4771, "step": 27801 }, { "epoch": 15.531843575418995, "grad_norm": 0.955322802066803, "learning_rate": 0.00022442577030812324, "loss": 0.3127, "step": 27802 }, { "epoch": 15.532402234636871, "grad_norm": 0.43112605810165405, "learning_rate": 0.00022439775910364148, "loss": 0.4107, "step": 27803 }, { "epoch": 15.53296089385475, "grad_norm": 0.5075454115867615, "learning_rate": 0.00022436974789915968, "loss": 0.4834, "step": 27804 }, { "epoch": 15.533519553072626, "grad_norm": 0.46545740962028503, "learning_rate": 0.0002243417366946779, "loss": 0.4086, "step": 27805 }, { "epoch": 15.534078212290503, "grad_norm": 0.6279367804527283, "learning_rate": 0.00022431372549019607, "loss": 0.3753, "step": 27806 }, { "epoch": 15.53463687150838, "grad_norm": 1.413002848625183, "learning_rate": 0.0002242857142857143, "loss": 0.3897, "step": 27807 }, { "epoch": 15.535195530726257, "grad_norm": 0.43945133686065674, "learning_rate": 0.0002242577030812325, "loss": 0.473, "step": 27808 }, { "epoch": 15.535754189944134, "grad_norm": 0.4629015326499939, "learning_rate": 0.0002242296918767507, "loss": 0.4382, "step": 27809 }, { "epoch": 15.53631284916201, "grad_norm": 0.4480507969856262, "learning_rate": 0.0002242016806722689, "loss": 0.4411, "step": 27810 }, { "epoch": 15.536871508379889, "grad_norm": 0.39406436681747437, "learning_rate": 0.00022417366946778712, "loss": 0.3934, "step": 27811 }, { "epoch": 15.537430167597766, "grad_norm": 0.4244726300239563, "learning_rate": 0.00022414565826330533, "loss": 0.4296, "step": 27812 }, { "epoch": 15.537988826815642, "grad_norm": 0.7130715250968933, "learning_rate": 0.00022411764705882354, "loss": 0.4231, "step": 27813 }, { "epoch": 15.538547486033519, "grad_norm": 0.37719523906707764, "learning_rate": 0.00022408963585434174, "loss": 0.3713, "step": 27814 }, { "epoch": 15.539106145251397, "grad_norm": 0.4484434723854065, "learning_rate": 0.00022406162464985995, "loss": 0.4769, "step": 27815 }, { "epoch": 15.539664804469274, "grad_norm": 0.507165253162384, "learning_rate": 0.00022403361344537815, "loss": 0.4794, "step": 27816 }, { "epoch": 15.54022346368715, "grad_norm": 0.3663724958896637, "learning_rate": 0.00022400560224089636, "loss": 0.3152, "step": 27817 }, { "epoch": 15.540782122905028, "grad_norm": 0.5082835555076599, "learning_rate": 0.00022397759103641457, "loss": 0.3894, "step": 27818 }, { "epoch": 15.541340782122905, "grad_norm": 0.5757226943969727, "learning_rate": 0.0002239495798319328, "loss": 0.3396, "step": 27819 }, { "epoch": 15.541899441340782, "grad_norm": 2.986664295196533, "learning_rate": 0.00022392156862745098, "loss": 0.5302, "step": 27820 }, { "epoch": 15.54245810055866, "grad_norm": 0.5417391061782837, "learning_rate": 0.00022389355742296918, "loss": 0.3914, "step": 27821 }, { "epoch": 15.543016759776537, "grad_norm": 0.5475592017173767, "learning_rate": 0.0002238655462184874, "loss": 0.5516, "step": 27822 }, { "epoch": 15.543575418994413, "grad_norm": 0.9667094349861145, "learning_rate": 0.00022383753501400562, "loss": 0.4107, "step": 27823 }, { "epoch": 15.544134078212291, "grad_norm": 0.8609445095062256, "learning_rate": 0.00022380952380952383, "loss": 0.4726, "step": 27824 }, { "epoch": 15.544692737430168, "grad_norm": 0.7537353038787842, "learning_rate": 0.000223781512605042, "loss": 0.4655, "step": 27825 }, { "epoch": 15.545251396648045, "grad_norm": 1.0021028518676758, "learning_rate": 0.00022375350140056021, "loss": 0.4195, "step": 27826 }, { "epoch": 15.545810055865921, "grad_norm": 1.3827221393585205, "learning_rate": 0.00022372549019607845, "loss": 0.4152, "step": 27827 }, { "epoch": 15.5463687150838, "grad_norm": 0.3408071994781494, "learning_rate": 0.00022369747899159665, "loss": 0.325, "step": 27828 }, { "epoch": 15.546927374301676, "grad_norm": 0.7138128280639648, "learning_rate": 0.00022366946778711486, "loss": 0.3637, "step": 27829 }, { "epoch": 15.547486033519553, "grad_norm": 0.4434405267238617, "learning_rate": 0.00022364145658263304, "loss": 0.4552, "step": 27830 }, { "epoch": 15.548044692737431, "grad_norm": 0.7046041488647461, "learning_rate": 0.00022361344537815127, "loss": 0.4774, "step": 27831 }, { "epoch": 15.548603351955308, "grad_norm": 0.5138440728187561, "learning_rate": 0.00022358543417366948, "loss": 0.4148, "step": 27832 }, { "epoch": 15.549162011173184, "grad_norm": 0.5399837493896484, "learning_rate": 0.00022355742296918768, "loss": 0.3205, "step": 27833 }, { "epoch": 15.54972067039106, "grad_norm": 0.5108838081359863, "learning_rate": 0.0002235294117647059, "loss": 0.433, "step": 27834 }, { "epoch": 15.550279329608939, "grad_norm": 0.5395323634147644, "learning_rate": 0.0002235014005602241, "loss": 0.4069, "step": 27835 }, { "epoch": 15.550837988826816, "grad_norm": 0.4377346336841583, "learning_rate": 0.0002234733893557423, "loss": 0.4045, "step": 27836 }, { "epoch": 15.551396648044692, "grad_norm": 4.8756103515625, "learning_rate": 0.0002234453781512605, "loss": 0.3721, "step": 27837 }, { "epoch": 15.55195530726257, "grad_norm": 0.4428098499774933, "learning_rate": 0.00022341736694677871, "loss": 0.3483, "step": 27838 }, { "epoch": 15.552513966480447, "grad_norm": 0.8837556838989258, "learning_rate": 0.00022338935574229695, "loss": 0.3811, "step": 27839 }, { "epoch": 15.553072625698324, "grad_norm": 1.031833291053772, "learning_rate": 0.00022336134453781513, "loss": 0.4121, "step": 27840 }, { "epoch": 15.553631284916202, "grad_norm": 0.5359768867492676, "learning_rate": 0.00022333333333333333, "loss": 0.4345, "step": 27841 }, { "epoch": 15.554189944134079, "grad_norm": 0.4339999854564667, "learning_rate": 0.00022330532212885154, "loss": 0.3908, "step": 27842 }, { "epoch": 15.554748603351955, "grad_norm": 0.49947911500930786, "learning_rate": 0.00022327731092436977, "loss": 0.4705, "step": 27843 }, { "epoch": 15.555307262569832, "grad_norm": 0.7107603549957275, "learning_rate": 0.00022324929971988798, "loss": 0.4691, "step": 27844 }, { "epoch": 15.55586592178771, "grad_norm": 0.3989611566066742, "learning_rate": 0.00022322128851540616, "loss": 0.448, "step": 27845 }, { "epoch": 15.556424581005587, "grad_norm": 0.6919142603874207, "learning_rate": 0.00022319327731092436, "loss": 0.3891, "step": 27846 }, { "epoch": 15.556983240223463, "grad_norm": 0.4450211524963379, "learning_rate": 0.0002231652661064426, "loss": 0.3426, "step": 27847 }, { "epoch": 15.557541899441341, "grad_norm": 0.5051398873329163, "learning_rate": 0.0002231372549019608, "loss": 0.3624, "step": 27848 }, { "epoch": 15.558100558659218, "grad_norm": 0.3704480826854706, "learning_rate": 0.000223109243697479, "loss": 0.4175, "step": 27849 }, { "epoch": 15.558659217877095, "grad_norm": 0.4108850955963135, "learning_rate": 0.00022308123249299719, "loss": 0.4215, "step": 27850 }, { "epoch": 15.559217877094973, "grad_norm": 0.3740463852882385, "learning_rate": 0.00022305322128851542, "loss": 0.436, "step": 27851 }, { "epoch": 15.55977653631285, "grad_norm": 0.577150821685791, "learning_rate": 0.00022302521008403362, "loss": 0.3592, "step": 27852 }, { "epoch": 15.560335195530726, "grad_norm": 0.5524119138717651, "learning_rate": 0.00022299719887955183, "loss": 0.3251, "step": 27853 }, { "epoch": 15.560893854748603, "grad_norm": 0.4201485514640808, "learning_rate": 0.00022296918767507004, "loss": 0.3999, "step": 27854 }, { "epoch": 15.561452513966481, "grad_norm": 0.45837342739105225, "learning_rate": 0.00022294117647058824, "loss": 0.4121, "step": 27855 }, { "epoch": 15.562011173184358, "grad_norm": 0.43424102663993835, "learning_rate": 0.00022291316526610645, "loss": 0.4174, "step": 27856 }, { "epoch": 15.562569832402234, "grad_norm": 0.7538143992424011, "learning_rate": 0.00022288515406162465, "loss": 0.3682, "step": 27857 }, { "epoch": 15.563128491620112, "grad_norm": 0.41381534934043884, "learning_rate": 0.00022285714285714286, "loss": 0.3739, "step": 27858 }, { "epoch": 15.563687150837989, "grad_norm": 0.4533923268318176, "learning_rate": 0.0002228291316526611, "loss": 0.3686, "step": 27859 }, { "epoch": 15.564245810055866, "grad_norm": 0.34350019693374634, "learning_rate": 0.00022280112044817927, "loss": 0.3715, "step": 27860 }, { "epoch": 15.564804469273742, "grad_norm": 0.5557906031608582, "learning_rate": 0.00022277310924369748, "loss": 0.5958, "step": 27861 }, { "epoch": 15.56536312849162, "grad_norm": 0.6481451988220215, "learning_rate": 0.00022274509803921568, "loss": 0.3957, "step": 27862 }, { "epoch": 15.565921787709497, "grad_norm": 0.5459092259407043, "learning_rate": 0.00022271708683473392, "loss": 0.3216, "step": 27863 }, { "epoch": 15.566480446927374, "grad_norm": 0.7596459984779358, "learning_rate": 0.00022268907563025212, "loss": 0.3183, "step": 27864 }, { "epoch": 15.567039106145252, "grad_norm": 0.6075207591056824, "learning_rate": 0.0002226610644257703, "loss": 0.5228, "step": 27865 }, { "epoch": 15.567597765363129, "grad_norm": 0.8762142658233643, "learning_rate": 0.0002226330532212885, "loss": 0.3887, "step": 27866 }, { "epoch": 15.568156424581005, "grad_norm": 0.4284616708755493, "learning_rate": 0.00022260504201680674, "loss": 0.6057, "step": 27867 }, { "epoch": 15.568715083798883, "grad_norm": 0.5207067131996155, "learning_rate": 0.00022257703081232495, "loss": 0.3693, "step": 27868 }, { "epoch": 15.56927374301676, "grad_norm": 0.4390489459037781, "learning_rate": 0.00022254901960784313, "loss": 0.3953, "step": 27869 }, { "epoch": 15.569832402234637, "grad_norm": 2.9083826541900635, "learning_rate": 0.00022252100840336133, "loss": 0.3678, "step": 27870 }, { "epoch": 15.570391061452513, "grad_norm": 0.5766394138336182, "learning_rate": 0.00022249299719887957, "loss": 0.5632, "step": 27871 }, { "epoch": 15.570949720670392, "grad_norm": 0.4161745011806488, "learning_rate": 0.00022246498599439777, "loss": 0.4148, "step": 27872 }, { "epoch": 15.571508379888268, "grad_norm": 1.5840708017349243, "learning_rate": 0.00022243697478991598, "loss": 0.3985, "step": 27873 }, { "epoch": 15.572067039106145, "grad_norm": 0.6482981443405151, "learning_rate": 0.00022240896358543416, "loss": 0.3563, "step": 27874 }, { "epoch": 15.572625698324023, "grad_norm": 0.596347987651825, "learning_rate": 0.0002223809523809524, "loss": 0.3524, "step": 27875 }, { "epoch": 15.5731843575419, "grad_norm": 0.4068700075149536, "learning_rate": 0.0002223529411764706, "loss": 0.415, "step": 27876 }, { "epoch": 15.573743016759776, "grad_norm": 0.394394189119339, "learning_rate": 0.0002223249299719888, "loss": 0.3586, "step": 27877 }, { "epoch": 15.574301675977654, "grad_norm": 0.4117099940776825, "learning_rate": 0.000222296918767507, "loss": 0.3585, "step": 27878 }, { "epoch": 15.574860335195531, "grad_norm": 0.5155060887336731, "learning_rate": 0.00022226890756302521, "loss": 0.4698, "step": 27879 }, { "epoch": 15.575418994413408, "grad_norm": 0.4252316355705261, "learning_rate": 0.00022224089635854342, "loss": 0.4955, "step": 27880 }, { "epoch": 15.575977653631284, "grad_norm": 0.3797030448913574, "learning_rate": 0.00022221288515406163, "loss": 0.4122, "step": 27881 }, { "epoch": 15.576536312849163, "grad_norm": 0.33882996439933777, "learning_rate": 0.00022218487394957983, "loss": 0.339, "step": 27882 }, { "epoch": 15.577094972067039, "grad_norm": 0.37392720580101013, "learning_rate": 0.00022215686274509806, "loss": 0.356, "step": 27883 }, { "epoch": 15.577653631284916, "grad_norm": 0.4503822922706604, "learning_rate": 0.00022212885154061624, "loss": 0.3986, "step": 27884 }, { "epoch": 15.578212290502794, "grad_norm": 0.45349517464637756, "learning_rate": 0.00022210084033613445, "loss": 0.3729, "step": 27885 }, { "epoch": 15.57877094972067, "grad_norm": 0.6705023646354675, "learning_rate": 0.00022207282913165266, "loss": 0.4701, "step": 27886 }, { "epoch": 15.579329608938547, "grad_norm": 0.6574171781539917, "learning_rate": 0.0002220448179271709, "loss": 0.5386, "step": 27887 }, { "epoch": 15.579888268156424, "grad_norm": 1.4103906154632568, "learning_rate": 0.0002220168067226891, "loss": 0.3012, "step": 27888 }, { "epoch": 15.580446927374302, "grad_norm": 0.3313217759132385, "learning_rate": 0.00022198879551820727, "loss": 0.3573, "step": 27889 }, { "epoch": 15.581005586592179, "grad_norm": 0.32307830452919006, "learning_rate": 0.00022196078431372548, "loss": 0.3587, "step": 27890 }, { "epoch": 15.581564245810055, "grad_norm": 0.5686635971069336, "learning_rate": 0.0002219327731092437, "loss": 0.4455, "step": 27891 }, { "epoch": 15.582122905027934, "grad_norm": 4.520925521850586, "learning_rate": 0.00022190476190476192, "loss": 0.4614, "step": 27892 }, { "epoch": 15.58268156424581, "grad_norm": 0.4929114878177643, "learning_rate": 0.00022187675070028012, "loss": 0.3922, "step": 27893 }, { "epoch": 15.583240223463687, "grad_norm": 7.434657096862793, "learning_rate": 0.0002218487394957983, "loss": 0.3705, "step": 27894 }, { "epoch": 15.583798882681565, "grad_norm": 0.6490254998207092, "learning_rate": 0.00022182072829131654, "loss": 0.4126, "step": 27895 }, { "epoch": 15.584357541899442, "grad_norm": 3.9909751415252686, "learning_rate": 0.00022179271708683474, "loss": 0.3589, "step": 27896 }, { "epoch": 15.584916201117318, "grad_norm": 0.41026800870895386, "learning_rate": 0.00022176470588235295, "loss": 0.3939, "step": 27897 }, { "epoch": 15.585474860335196, "grad_norm": 0.6016550660133362, "learning_rate": 0.00022173669467787115, "loss": 0.4498, "step": 27898 }, { "epoch": 15.586033519553073, "grad_norm": 1.143162488937378, "learning_rate": 0.00022170868347338936, "loss": 0.6263, "step": 27899 }, { "epoch": 15.58659217877095, "grad_norm": 0.8973687887191772, "learning_rate": 0.00022168067226890757, "loss": 0.4008, "step": 27900 }, { "epoch": 15.587150837988826, "grad_norm": 1.216586947441101, "learning_rate": 0.00022165266106442577, "loss": 0.4845, "step": 27901 }, { "epoch": 15.587709497206705, "grad_norm": 0.4037569761276245, "learning_rate": 0.00022162464985994398, "loss": 0.3714, "step": 27902 }, { "epoch": 15.588268156424581, "grad_norm": 0.4583616852760315, "learning_rate": 0.0002215966386554622, "loss": 0.4034, "step": 27903 }, { "epoch": 15.588826815642458, "grad_norm": 0.3377223610877991, "learning_rate": 0.0002215686274509804, "loss": 0.3138, "step": 27904 }, { "epoch": 15.589385474860336, "grad_norm": 0.4331744909286499, "learning_rate": 0.0002215406162464986, "loss": 0.5142, "step": 27905 }, { "epoch": 15.589944134078213, "grad_norm": 0.4325265884399414, "learning_rate": 0.0002215126050420168, "loss": 0.453, "step": 27906 }, { "epoch": 15.59050279329609, "grad_norm": 0.422418475151062, "learning_rate": 0.00022148459383753504, "loss": 0.3625, "step": 27907 }, { "epoch": 15.591061452513966, "grad_norm": 0.4533475637435913, "learning_rate": 0.00022145658263305324, "loss": 0.3877, "step": 27908 }, { "epoch": 15.591620111731844, "grad_norm": 0.43079784512519836, "learning_rate": 0.00022142857142857142, "loss": 0.4155, "step": 27909 }, { "epoch": 15.59217877094972, "grad_norm": 0.4105457067489624, "learning_rate": 0.00022140056022408963, "loss": 0.3429, "step": 27910 }, { "epoch": 15.592737430167597, "grad_norm": 0.34282201528549194, "learning_rate": 0.00022137254901960786, "loss": 0.4353, "step": 27911 }, { "epoch": 15.593296089385476, "grad_norm": 0.38764122128486633, "learning_rate": 0.00022134453781512607, "loss": 0.3986, "step": 27912 }, { "epoch": 15.593854748603352, "grad_norm": 0.9187127351760864, "learning_rate": 0.00022131652661064427, "loss": 0.3587, "step": 27913 }, { "epoch": 15.594413407821229, "grad_norm": 0.37504643201828003, "learning_rate": 0.00022128851540616245, "loss": 0.3478, "step": 27914 }, { "epoch": 15.594972067039105, "grad_norm": 1.2339084148406982, "learning_rate": 0.00022126050420168068, "loss": 0.4146, "step": 27915 }, { "epoch": 15.595530726256984, "grad_norm": 0.6316666603088379, "learning_rate": 0.0002212324929971989, "loss": 0.345, "step": 27916 }, { "epoch": 15.59608938547486, "grad_norm": 0.5449196100234985, "learning_rate": 0.0002212044817927171, "loss": 0.5052, "step": 27917 }, { "epoch": 15.596648044692737, "grad_norm": 0.5349736213684082, "learning_rate": 0.0002211764705882353, "loss": 0.6448, "step": 27918 }, { "epoch": 15.597206703910615, "grad_norm": 0.37893494963645935, "learning_rate": 0.0002211484593837535, "loss": 0.3733, "step": 27919 }, { "epoch": 15.597765363128492, "grad_norm": 0.3856014311313629, "learning_rate": 0.00022112044817927171, "loss": 0.3823, "step": 27920 }, { "epoch": 15.598324022346368, "grad_norm": 0.8640221357345581, "learning_rate": 0.00022109243697478992, "loss": 0.4212, "step": 27921 }, { "epoch": 15.598882681564247, "grad_norm": 0.5253803730010986, "learning_rate": 0.00022106442577030813, "loss": 0.5596, "step": 27922 }, { "epoch": 15.599441340782123, "grad_norm": 0.4020250141620636, "learning_rate": 0.00022103641456582633, "loss": 0.3587, "step": 27923 }, { "epoch": 15.6, "grad_norm": 0.41350749135017395, "learning_rate": 0.00022100840336134454, "loss": 0.3881, "step": 27924 }, { "epoch": 15.600558659217878, "grad_norm": 0.48249930143356323, "learning_rate": 0.00022098039215686274, "loss": 0.4405, "step": 27925 }, { "epoch": 15.601117318435755, "grad_norm": 0.47828221321105957, "learning_rate": 0.00022095238095238095, "loss": 0.466, "step": 27926 }, { "epoch": 15.601675977653631, "grad_norm": 0.45245322585105896, "learning_rate": 0.00022092436974789918, "loss": 0.5831, "step": 27927 }, { "epoch": 15.602234636871508, "grad_norm": 0.9160994291305542, "learning_rate": 0.00022089635854341736, "loss": 0.5035, "step": 27928 }, { "epoch": 15.602793296089386, "grad_norm": 0.5818053483963013, "learning_rate": 0.00022086834733893557, "loss": 0.3685, "step": 27929 }, { "epoch": 15.603351955307263, "grad_norm": 23.26161003112793, "learning_rate": 0.00022084033613445377, "loss": 0.3014, "step": 27930 }, { "epoch": 15.60391061452514, "grad_norm": 0.2935068905353546, "learning_rate": 0.000220812324929972, "loss": 0.3349, "step": 27931 }, { "epoch": 15.604469273743018, "grad_norm": 0.327740341424942, "learning_rate": 0.0002207843137254902, "loss": 0.3889, "step": 27932 }, { "epoch": 15.605027932960894, "grad_norm": 0.4711613357067108, "learning_rate": 0.0002207563025210084, "loss": 0.3951, "step": 27933 }, { "epoch": 15.60558659217877, "grad_norm": 0.5951714515686035, "learning_rate": 0.0002207282913165266, "loss": 0.5704, "step": 27934 }, { "epoch": 15.606145251396647, "grad_norm": 0.6404430270195007, "learning_rate": 0.00022070028011204483, "loss": 0.5611, "step": 27935 }, { "epoch": 15.606703910614526, "grad_norm": 0.7200407385826111, "learning_rate": 0.00022067226890756304, "loss": 0.4001, "step": 27936 }, { "epoch": 15.607262569832402, "grad_norm": 0.5036531686782837, "learning_rate": 0.00022064425770308124, "loss": 0.3539, "step": 27937 }, { "epoch": 15.607821229050279, "grad_norm": 0.409136027097702, "learning_rate": 0.00022061624649859942, "loss": 0.3768, "step": 27938 }, { "epoch": 15.608379888268157, "grad_norm": 0.4895537197589874, "learning_rate": 0.00022058823529411765, "loss": 0.4071, "step": 27939 }, { "epoch": 15.608938547486034, "grad_norm": 0.437532514333725, "learning_rate": 0.00022056022408963586, "loss": 0.3595, "step": 27940 }, { "epoch": 15.60949720670391, "grad_norm": 0.3874495327472687, "learning_rate": 0.00022053221288515407, "loss": 0.2997, "step": 27941 }, { "epoch": 15.610055865921789, "grad_norm": 0.41679462790489197, "learning_rate": 0.00022050420168067227, "loss": 0.3278, "step": 27942 }, { "epoch": 15.610614525139665, "grad_norm": 0.39926114678382874, "learning_rate": 0.00022047619047619048, "loss": 0.4615, "step": 27943 }, { "epoch": 15.611173184357542, "grad_norm": 0.5115527510643005, "learning_rate": 0.00022044817927170868, "loss": 0.4595, "step": 27944 }, { "epoch": 15.611731843575418, "grad_norm": 0.4073830246925354, "learning_rate": 0.0002204201680672269, "loss": 0.3783, "step": 27945 }, { "epoch": 15.612290502793297, "grad_norm": 0.452438086271286, "learning_rate": 0.0002203921568627451, "loss": 0.3064, "step": 27946 }, { "epoch": 15.612849162011173, "grad_norm": 0.44046100974082947, "learning_rate": 0.00022036414565826333, "loss": 0.4469, "step": 27947 }, { "epoch": 15.61340782122905, "grad_norm": 0.38222116231918335, "learning_rate": 0.0002203361344537815, "loss": 0.2998, "step": 27948 }, { "epoch": 15.613966480446928, "grad_norm": 0.586675226688385, "learning_rate": 0.00022030812324929971, "loss": 0.3807, "step": 27949 }, { "epoch": 15.614525139664805, "grad_norm": 0.3349973261356354, "learning_rate": 0.00022028011204481792, "loss": 0.3483, "step": 27950 }, { "epoch": 15.615083798882681, "grad_norm": 0.7216817736625671, "learning_rate": 0.00022025210084033615, "loss": 0.3771, "step": 27951 }, { "epoch": 15.61564245810056, "grad_norm": 0.768157958984375, "learning_rate": 0.00022022408963585436, "loss": 0.356, "step": 27952 }, { "epoch": 15.616201117318436, "grad_norm": 0.8046454191207886, "learning_rate": 0.00022019607843137254, "loss": 0.4699, "step": 27953 }, { "epoch": 15.616759776536313, "grad_norm": 0.5023537278175354, "learning_rate": 0.00022016806722689074, "loss": 0.3096, "step": 27954 }, { "epoch": 15.61731843575419, "grad_norm": 0.3565543293952942, "learning_rate": 0.00022014005602240898, "loss": 0.3845, "step": 27955 }, { "epoch": 15.617877094972068, "grad_norm": 0.44465214014053345, "learning_rate": 0.00022011204481792718, "loss": 0.4387, "step": 27956 }, { "epoch": 15.618435754189944, "grad_norm": 0.5596281290054321, "learning_rate": 0.0002200840336134454, "loss": 0.4045, "step": 27957 }, { "epoch": 15.61899441340782, "grad_norm": 0.54207444190979, "learning_rate": 0.00022005602240896357, "loss": 0.5789, "step": 27958 }, { "epoch": 15.619553072625699, "grad_norm": 0.4174506664276123, "learning_rate": 0.0002200280112044818, "loss": 0.3504, "step": 27959 }, { "epoch": 15.620111731843576, "grad_norm": 0.5959535241127014, "learning_rate": 0.00022, "loss": 0.4715, "step": 27960 }, { "epoch": 15.620670391061452, "grad_norm": 0.3997792601585388, "learning_rate": 0.00021997198879551821, "loss": 0.4131, "step": 27961 }, { "epoch": 15.621229050279329, "grad_norm": 0.7493446469306946, "learning_rate": 0.00021994397759103642, "loss": 0.3882, "step": 27962 }, { "epoch": 15.621787709497207, "grad_norm": 0.5321084260940552, "learning_rate": 0.00021991596638655463, "loss": 0.4481, "step": 27963 }, { "epoch": 15.622346368715084, "grad_norm": 0.5192870497703552, "learning_rate": 0.00021988795518207283, "loss": 0.4059, "step": 27964 }, { "epoch": 15.62290502793296, "grad_norm": 0.9302773475646973, "learning_rate": 0.00021985994397759104, "loss": 0.3524, "step": 27965 }, { "epoch": 15.623463687150839, "grad_norm": 0.7968913316726685, "learning_rate": 0.00021983193277310924, "loss": 0.3187, "step": 27966 }, { "epoch": 15.624022346368715, "grad_norm": 0.3571932911872864, "learning_rate": 0.00021980392156862748, "loss": 0.3511, "step": 27967 }, { "epoch": 15.624581005586592, "grad_norm": 1.3209444284439087, "learning_rate": 0.00021977591036414566, "loss": 0.5391, "step": 27968 }, { "epoch": 15.62513966480447, "grad_norm": 0.7576371431350708, "learning_rate": 0.00021974789915966386, "loss": 0.4232, "step": 27969 }, { "epoch": 15.625698324022347, "grad_norm": 3.180560350418091, "learning_rate": 0.00021971988795518207, "loss": 0.3515, "step": 27970 }, { "epoch": 15.626256983240223, "grad_norm": 0.5635356903076172, "learning_rate": 0.0002196918767507003, "loss": 0.4208, "step": 27971 }, { "epoch": 15.6268156424581, "grad_norm": 0.8229154348373413, "learning_rate": 0.0002196638655462185, "loss": 0.3811, "step": 27972 }, { "epoch": 15.627374301675978, "grad_norm": 0.6761255860328674, "learning_rate": 0.00021963585434173669, "loss": 0.5865, "step": 27973 }, { "epoch": 15.627932960893855, "grad_norm": 0.4207446873188019, "learning_rate": 0.0002196078431372549, "loss": 0.3162, "step": 27974 }, { "epoch": 15.628491620111731, "grad_norm": 0.5189700126647949, "learning_rate": 0.00021957983193277312, "loss": 0.4833, "step": 27975 }, { "epoch": 15.62905027932961, "grad_norm": 0.4031204879283905, "learning_rate": 0.00021955182072829133, "loss": 0.4975, "step": 27976 }, { "epoch": 15.629608938547486, "grad_norm": 0.6120550036430359, "learning_rate": 0.0002195238095238095, "loss": 0.4854, "step": 27977 }, { "epoch": 15.630167597765363, "grad_norm": 0.3992401361465454, "learning_rate": 0.00021949579831932772, "loss": 0.353, "step": 27978 }, { "epoch": 15.630726256983241, "grad_norm": 1.417589783668518, "learning_rate": 0.00021946778711484595, "loss": 0.4176, "step": 27979 }, { "epoch": 15.631284916201118, "grad_norm": 0.673062801361084, "learning_rate": 0.00021943977591036415, "loss": 0.3865, "step": 27980 }, { "epoch": 15.631843575418994, "grad_norm": 0.6185072064399719, "learning_rate": 0.00021941176470588236, "loss": 0.3642, "step": 27981 }, { "epoch": 15.63240223463687, "grad_norm": 0.4298047423362732, "learning_rate": 0.00021938375350140054, "loss": 0.4035, "step": 27982 }, { "epoch": 15.632960893854749, "grad_norm": 0.8356462717056274, "learning_rate": 0.00021935574229691877, "loss": 0.4261, "step": 27983 }, { "epoch": 15.633519553072626, "grad_norm": 0.5218881368637085, "learning_rate": 0.00021932773109243698, "loss": 0.4292, "step": 27984 }, { "epoch": 15.634078212290502, "grad_norm": 0.6895882487297058, "learning_rate": 0.00021929971988795518, "loss": 0.3489, "step": 27985 }, { "epoch": 15.63463687150838, "grad_norm": 0.5630412697792053, "learning_rate": 0.0002192717086834734, "loss": 0.4754, "step": 27986 }, { "epoch": 15.635195530726257, "grad_norm": 2.5687711238861084, "learning_rate": 0.0002192436974789916, "loss": 0.337, "step": 27987 }, { "epoch": 15.635754189944134, "grad_norm": 0.38433295488357544, "learning_rate": 0.0002192156862745098, "loss": 0.3681, "step": 27988 }, { "epoch": 15.63631284916201, "grad_norm": 0.4970787465572357, "learning_rate": 0.000219187675070028, "loss": 0.4212, "step": 27989 }, { "epoch": 15.636871508379889, "grad_norm": 0.7202358841896057, "learning_rate": 0.00021915966386554621, "loss": 0.414, "step": 27990 }, { "epoch": 15.637430167597765, "grad_norm": 0.4074876606464386, "learning_rate": 0.00021913165266106445, "loss": 0.3623, "step": 27991 }, { "epoch": 15.637988826815642, "grad_norm": 0.5222742557525635, "learning_rate": 0.00021910364145658263, "loss": 0.342, "step": 27992 }, { "epoch": 15.63854748603352, "grad_norm": 1.1061675548553467, "learning_rate": 0.00021907563025210083, "loss": 0.6238, "step": 27993 }, { "epoch": 15.639106145251397, "grad_norm": 0.3918350040912628, "learning_rate": 0.00021904761904761904, "loss": 0.5253, "step": 27994 }, { "epoch": 15.639664804469273, "grad_norm": 0.33751118183135986, "learning_rate": 0.00021901960784313727, "loss": 0.3473, "step": 27995 }, { "epoch": 15.640223463687152, "grad_norm": 0.4809604585170746, "learning_rate": 0.00021899159663865548, "loss": 0.5169, "step": 27996 }, { "epoch": 15.640782122905028, "grad_norm": 0.4874924421310425, "learning_rate": 0.00021896358543417366, "loss": 0.3319, "step": 27997 }, { "epoch": 15.641340782122905, "grad_norm": 0.38321277499198914, "learning_rate": 0.00021893557422969186, "loss": 0.3139, "step": 27998 }, { "epoch": 15.641899441340783, "grad_norm": 0.6106351017951965, "learning_rate": 0.0002189075630252101, "loss": 0.5398, "step": 27999 }, { "epoch": 15.64245810055866, "grad_norm": 0.4765563905239105, "learning_rate": 0.0002188795518207283, "loss": 0.4245, "step": 28000 }, { "epoch": 15.64245810055866, "eval_cer": 0.08562731170829105, "eval_loss": 0.3225233256816864, "eval_runtime": 55.5941, "eval_samples_per_second": 81.627, "eval_steps_per_second": 5.108, "eval_wer": 0.33839836876064916, "step": 28000 }, { "epoch": 15.643016759776536, "grad_norm": 0.5473787784576416, "learning_rate": 0.0002188515406162465, "loss": 0.3087, "step": 28001 }, { "epoch": 15.643575418994413, "grad_norm": 0.32055461406707764, "learning_rate": 0.0002188235294117647, "loss": 0.3049, "step": 28002 }, { "epoch": 15.644134078212291, "grad_norm": 0.3655672073364258, "learning_rate": 0.00021879551820728292, "loss": 0.3107, "step": 28003 }, { "epoch": 15.644692737430168, "grad_norm": 0.35586389899253845, "learning_rate": 0.00021876750700280113, "loss": 0.3541, "step": 28004 }, { "epoch": 15.645251396648044, "grad_norm": 0.33578261733055115, "learning_rate": 0.00021873949579831933, "loss": 0.3611, "step": 28005 }, { "epoch": 15.645810055865923, "grad_norm": 2.0723326206207275, "learning_rate": 0.00021871148459383754, "loss": 0.4157, "step": 28006 }, { "epoch": 15.6463687150838, "grad_norm": 0.4603142738342285, "learning_rate": 0.00021868347338935574, "loss": 0.4801, "step": 28007 }, { "epoch": 15.646927374301676, "grad_norm": 0.5859696269035339, "learning_rate": 0.00021865546218487395, "loss": 0.3282, "step": 28008 }, { "epoch": 15.647486033519552, "grad_norm": 0.6254460215568542, "learning_rate": 0.00021862745098039216, "loss": 0.4269, "step": 28009 }, { "epoch": 15.64804469273743, "grad_norm": 0.618705153465271, "learning_rate": 0.00021859943977591036, "loss": 0.3526, "step": 28010 }, { "epoch": 15.648603351955307, "grad_norm": 0.6330431699752808, "learning_rate": 0.0002185714285714286, "loss": 0.6561, "step": 28011 }, { "epoch": 15.649162011173184, "grad_norm": 0.6152411699295044, "learning_rate": 0.00021854341736694677, "loss": 0.359, "step": 28012 }, { "epoch": 15.649720670391062, "grad_norm": 0.520406186580658, "learning_rate": 0.00021851540616246498, "loss": 0.4962, "step": 28013 }, { "epoch": 15.650279329608939, "grad_norm": 0.459607869386673, "learning_rate": 0.00021848739495798319, "loss": 0.4322, "step": 28014 }, { "epoch": 15.650837988826815, "grad_norm": 0.7368574738502502, "learning_rate": 0.00021845938375350142, "loss": 0.4397, "step": 28015 }, { "epoch": 15.651396648044694, "grad_norm": 0.4270786941051483, "learning_rate": 0.00021843137254901962, "loss": 0.3793, "step": 28016 }, { "epoch": 15.65195530726257, "grad_norm": 0.4606087803840637, "learning_rate": 0.0002184033613445378, "loss": 0.3899, "step": 28017 }, { "epoch": 15.652513966480447, "grad_norm": 0.5311670303344727, "learning_rate": 0.000218375350140056, "loss": 0.3743, "step": 28018 }, { "epoch": 15.653072625698323, "grad_norm": 0.3264901041984558, "learning_rate": 0.00021834733893557424, "loss": 0.3546, "step": 28019 }, { "epoch": 15.653631284916202, "grad_norm": 0.7805134057998657, "learning_rate": 0.00021831932773109245, "loss": 0.4282, "step": 28020 }, { "epoch": 15.654189944134078, "grad_norm": 0.6998165249824524, "learning_rate": 0.00021829131652661065, "loss": 0.4396, "step": 28021 }, { "epoch": 15.654748603351955, "grad_norm": 0.33380597829818726, "learning_rate": 0.00021826330532212883, "loss": 0.3619, "step": 28022 }, { "epoch": 15.655307262569833, "grad_norm": 0.40164363384246826, "learning_rate": 0.00021823529411764707, "loss": 0.3855, "step": 28023 }, { "epoch": 15.65586592178771, "grad_norm": 0.9354643821716309, "learning_rate": 0.00021820728291316527, "loss": 0.4513, "step": 28024 }, { "epoch": 15.656424581005586, "grad_norm": 10.054797172546387, "learning_rate": 0.00021817927170868348, "loss": 0.4721, "step": 28025 }, { "epoch": 15.656983240223465, "grad_norm": 0.40198424458503723, "learning_rate": 0.0002181512605042017, "loss": 0.4358, "step": 28026 }, { "epoch": 15.657541899441341, "grad_norm": 0.5293678045272827, "learning_rate": 0.0002181232492997199, "loss": 0.4397, "step": 28027 }, { "epoch": 15.658100558659218, "grad_norm": 0.7725968956947327, "learning_rate": 0.0002180952380952381, "loss": 0.4925, "step": 28028 }, { "epoch": 15.658659217877094, "grad_norm": 0.4105333089828491, "learning_rate": 0.0002180672268907563, "loss": 0.3911, "step": 28029 }, { "epoch": 15.659217877094973, "grad_norm": 0.3805825412273407, "learning_rate": 0.00021803921568627454, "loss": 0.3913, "step": 28030 }, { "epoch": 15.65977653631285, "grad_norm": 0.43646717071533203, "learning_rate": 0.00021801120448179274, "loss": 0.4768, "step": 28031 }, { "epoch": 15.660335195530726, "grad_norm": 0.6447239518165588, "learning_rate": 0.00021798319327731092, "loss": 0.4063, "step": 28032 }, { "epoch": 15.660893854748604, "grad_norm": 0.79170161485672, "learning_rate": 0.00021795518207282913, "loss": 0.4174, "step": 28033 }, { "epoch": 15.66145251396648, "grad_norm": 0.3485768437385559, "learning_rate": 0.00021792717086834736, "loss": 0.3205, "step": 28034 }, { "epoch": 15.662011173184357, "grad_norm": 0.6954795122146606, "learning_rate": 0.00021789915966386557, "loss": 0.5848, "step": 28035 }, { "epoch": 15.662569832402234, "grad_norm": 0.7661239504814148, "learning_rate": 0.00021787114845938374, "loss": 0.346, "step": 28036 }, { "epoch": 15.663128491620112, "grad_norm": 0.5549522638320923, "learning_rate": 0.00021784313725490195, "loss": 0.4464, "step": 28037 }, { "epoch": 15.663687150837989, "grad_norm": 0.4942370653152466, "learning_rate": 0.00021781512605042018, "loss": 0.3929, "step": 28038 }, { "epoch": 15.664245810055865, "grad_norm": 0.40910083055496216, "learning_rate": 0.0002177871148459384, "loss": 0.3208, "step": 28039 }, { "epoch": 15.664804469273744, "grad_norm": 0.3546142578125, "learning_rate": 0.0002177591036414566, "loss": 0.4221, "step": 28040 }, { "epoch": 15.66536312849162, "grad_norm": 0.4588821828365326, "learning_rate": 0.00021773109243697477, "loss": 0.4319, "step": 28041 }, { "epoch": 15.665921787709497, "grad_norm": 0.4927888810634613, "learning_rate": 0.000217703081232493, "loss": 0.3312, "step": 28042 }, { "epoch": 15.666480446927375, "grad_norm": 1.637519121170044, "learning_rate": 0.00021767507002801121, "loss": 0.4159, "step": 28043 }, { "epoch": 15.667039106145252, "grad_norm": 0.6051217317581177, "learning_rate": 0.00021764705882352942, "loss": 0.4074, "step": 28044 }, { "epoch": 15.667597765363128, "grad_norm": 0.4173748791217804, "learning_rate": 0.00021761904761904763, "loss": 0.4952, "step": 28045 }, { "epoch": 15.668156424581005, "grad_norm": 0.39418622851371765, "learning_rate": 0.00021759103641456583, "loss": 0.3816, "step": 28046 }, { "epoch": 15.668715083798883, "grad_norm": 0.3564317524433136, "learning_rate": 0.00021756302521008404, "loss": 0.3327, "step": 28047 }, { "epoch": 15.66927374301676, "grad_norm": 0.9859527945518494, "learning_rate": 0.00021753501400560224, "loss": 0.4629, "step": 28048 }, { "epoch": 15.669832402234636, "grad_norm": 3.0253565311431885, "learning_rate": 0.00021750700280112045, "loss": 0.4269, "step": 28049 }, { "epoch": 15.670391061452515, "grad_norm": 2.299283266067505, "learning_rate": 0.00021747899159663868, "loss": 0.3088, "step": 28050 }, { "epoch": 15.670949720670391, "grad_norm": 0.9328839182853699, "learning_rate": 0.00021745098039215686, "loss": 0.4022, "step": 28051 }, { "epoch": 15.671508379888268, "grad_norm": 0.44723692536354065, "learning_rate": 0.00021742296918767507, "loss": 0.3944, "step": 28052 }, { "epoch": 15.672067039106146, "grad_norm": 0.39097368717193604, "learning_rate": 0.00021739495798319327, "loss": 0.435, "step": 28053 }, { "epoch": 15.672625698324023, "grad_norm": 0.5249546766281128, "learning_rate": 0.0002173669467787115, "loss": 0.3699, "step": 28054 }, { "epoch": 15.6731843575419, "grad_norm": 0.932850182056427, "learning_rate": 0.0002173389355742297, "loss": 0.4172, "step": 28055 }, { "epoch": 15.673743016759776, "grad_norm": 1.1159703731536865, "learning_rate": 0.0002173109243697479, "loss": 0.4471, "step": 28056 }, { "epoch": 15.674301675977654, "grad_norm": 0.36395493149757385, "learning_rate": 0.0002172829131652661, "loss": 0.38, "step": 28057 }, { "epoch": 15.67486033519553, "grad_norm": 0.5341206192970276, "learning_rate": 0.00021725490196078433, "loss": 0.4892, "step": 28058 }, { "epoch": 15.675418994413407, "grad_norm": 0.5427635908126831, "learning_rate": 0.00021722689075630254, "loss": 0.4812, "step": 28059 }, { "epoch": 15.675977653631286, "grad_norm": 0.5830546617507935, "learning_rate": 0.00021719887955182074, "loss": 0.481, "step": 28060 }, { "epoch": 15.676536312849162, "grad_norm": 0.5074461102485657, "learning_rate": 0.00021717086834733892, "loss": 0.457, "step": 28061 }, { "epoch": 15.677094972067039, "grad_norm": 0.3944685757160187, "learning_rate": 0.00021714285714285715, "loss": 0.3819, "step": 28062 }, { "epoch": 15.677653631284915, "grad_norm": 0.34566086530685425, "learning_rate": 0.00021711484593837536, "loss": 0.3471, "step": 28063 }, { "epoch": 15.678212290502794, "grad_norm": 0.4306388199329376, "learning_rate": 0.00021708683473389357, "loss": 0.4118, "step": 28064 }, { "epoch": 15.67877094972067, "grad_norm": 0.4727766811847687, "learning_rate": 0.00021705882352941177, "loss": 0.4856, "step": 28065 }, { "epoch": 15.679329608938547, "grad_norm": 0.36203840374946594, "learning_rate": 0.00021703081232492998, "loss": 0.3532, "step": 28066 }, { "epoch": 15.679888268156425, "grad_norm": 0.6653933525085449, "learning_rate": 0.00021700280112044818, "loss": 0.4496, "step": 28067 }, { "epoch": 15.680446927374302, "grad_norm": 0.5298588275909424, "learning_rate": 0.0002169747899159664, "loss": 0.4147, "step": 28068 }, { "epoch": 15.681005586592178, "grad_norm": 7.28579568862915, "learning_rate": 0.0002169467787114846, "loss": 0.4534, "step": 28069 }, { "epoch": 15.681564245810057, "grad_norm": 0.7363472580909729, "learning_rate": 0.00021691876750700283, "loss": 0.5128, "step": 28070 }, { "epoch": 15.682122905027933, "grad_norm": 0.9153317213058472, "learning_rate": 0.000216890756302521, "loss": 0.5014, "step": 28071 }, { "epoch": 15.68268156424581, "grad_norm": 1.3480807542800903, "learning_rate": 0.00021686274509803921, "loss": 0.4138, "step": 28072 }, { "epoch": 15.683240223463688, "grad_norm": 0.648037850856781, "learning_rate": 0.00021683473389355742, "loss": 0.4395, "step": 28073 }, { "epoch": 15.683798882681565, "grad_norm": 0.44283750653266907, "learning_rate": 0.00021680672268907565, "loss": 0.4652, "step": 28074 }, { "epoch": 15.684357541899441, "grad_norm": 0.4221786558628082, "learning_rate": 0.00021677871148459386, "loss": 0.3767, "step": 28075 }, { "epoch": 15.684916201117318, "grad_norm": 0.5067715644836426, "learning_rate": 0.00021675070028011204, "loss": 0.4935, "step": 28076 }, { "epoch": 15.685474860335196, "grad_norm": 0.46660977602005005, "learning_rate": 0.00021672268907563024, "loss": 0.2776, "step": 28077 }, { "epoch": 15.686033519553073, "grad_norm": 0.41411831974983215, "learning_rate": 0.00021669467787114848, "loss": 0.4438, "step": 28078 }, { "epoch": 15.68659217877095, "grad_norm": 0.9013859629631042, "learning_rate": 0.00021666666666666668, "loss": 0.6201, "step": 28079 }, { "epoch": 15.687150837988828, "grad_norm": 0.5020959377288818, "learning_rate": 0.0002166386554621849, "loss": 0.4511, "step": 28080 }, { "epoch": 15.687709497206704, "grad_norm": 0.6807861924171448, "learning_rate": 0.00021661064425770307, "loss": 0.3768, "step": 28081 }, { "epoch": 15.68826815642458, "grad_norm": 1.3946880102157593, "learning_rate": 0.0002165826330532213, "loss": 0.397, "step": 28082 }, { "epoch": 15.688826815642457, "grad_norm": 0.4447402060031891, "learning_rate": 0.0002165546218487395, "loss": 0.4648, "step": 28083 }, { "epoch": 15.689385474860336, "grad_norm": 0.5153530240058899, "learning_rate": 0.00021652661064425771, "loss": 0.3659, "step": 28084 }, { "epoch": 15.689944134078212, "grad_norm": 0.47520315647125244, "learning_rate": 0.00021649859943977592, "loss": 0.4042, "step": 28085 }, { "epoch": 15.690502793296089, "grad_norm": 0.3638395071029663, "learning_rate": 0.00021647058823529413, "loss": 0.3662, "step": 28086 }, { "epoch": 15.691061452513967, "grad_norm": 0.3841032385826111, "learning_rate": 0.00021644257703081233, "loss": 0.4512, "step": 28087 }, { "epoch": 15.691620111731844, "grad_norm": 1.059695839881897, "learning_rate": 0.00021641456582633054, "loss": 0.3874, "step": 28088 }, { "epoch": 15.69217877094972, "grad_norm": 0.48372459411621094, "learning_rate": 0.00021638655462184874, "loss": 0.4759, "step": 28089 }, { "epoch": 15.692737430167599, "grad_norm": 0.7638027667999268, "learning_rate": 0.00021635854341736695, "loss": 0.4961, "step": 28090 }, { "epoch": 15.693296089385475, "grad_norm": 0.5282920598983765, "learning_rate": 0.00021633053221288516, "loss": 0.4017, "step": 28091 }, { "epoch": 15.693854748603352, "grad_norm": 1.376887321472168, "learning_rate": 0.00021630252100840336, "loss": 0.4267, "step": 28092 }, { "epoch": 15.694413407821228, "grad_norm": 0.8021776080131531, "learning_rate": 0.00021627450980392157, "loss": 0.3466, "step": 28093 }, { "epoch": 15.694972067039107, "grad_norm": 0.40849369764328003, "learning_rate": 0.0002162464985994398, "loss": 0.4051, "step": 28094 }, { "epoch": 15.695530726256983, "grad_norm": 0.41417962312698364, "learning_rate": 0.00021621848739495798, "loss": 0.3558, "step": 28095 }, { "epoch": 15.69608938547486, "grad_norm": 0.4883786141872406, "learning_rate": 0.00021619047619047619, "loss": 0.3759, "step": 28096 }, { "epoch": 15.696648044692738, "grad_norm": 0.4189356565475464, "learning_rate": 0.0002161624649859944, "loss": 0.3405, "step": 28097 }, { "epoch": 15.697206703910615, "grad_norm": 0.3570777475833893, "learning_rate": 0.00021613445378151262, "loss": 0.3945, "step": 28098 }, { "epoch": 15.697765363128491, "grad_norm": 0.4587579369544983, "learning_rate": 0.00021610644257703083, "loss": 0.3518, "step": 28099 }, { "epoch": 15.69832402234637, "grad_norm": 0.6413249373435974, "learning_rate": 0.000216078431372549, "loss": 0.3829, "step": 28100 }, { "epoch": 15.698882681564246, "grad_norm": 0.5313349366188049, "learning_rate": 0.00021605042016806722, "loss": 0.3975, "step": 28101 }, { "epoch": 15.699441340782123, "grad_norm": 1.1013466119766235, "learning_rate": 0.00021602240896358545, "loss": 0.5159, "step": 28102 }, { "epoch": 15.7, "grad_norm": 3.016965627670288, "learning_rate": 0.00021599439775910365, "loss": 0.4462, "step": 28103 }, { "epoch": 15.700558659217878, "grad_norm": 5.754360198974609, "learning_rate": 0.00021596638655462186, "loss": 0.4059, "step": 28104 }, { "epoch": 15.701117318435754, "grad_norm": 0.36752769351005554, "learning_rate": 0.00021593837535014004, "loss": 0.311, "step": 28105 }, { "epoch": 15.70167597765363, "grad_norm": 0.5617859959602356, "learning_rate": 0.00021591036414565827, "loss": 0.4049, "step": 28106 }, { "epoch": 15.702234636871509, "grad_norm": 0.8458712100982666, "learning_rate": 0.00021588235294117648, "loss": 0.4099, "step": 28107 }, { "epoch": 15.702793296089386, "grad_norm": 0.44229528307914734, "learning_rate": 0.00021585434173669468, "loss": 0.4034, "step": 28108 }, { "epoch": 15.703351955307262, "grad_norm": 0.4449937045574188, "learning_rate": 0.0002158263305322129, "loss": 0.3363, "step": 28109 }, { "epoch": 15.703910614525139, "grad_norm": 0.49023550748825073, "learning_rate": 0.0002157983193277311, "loss": 0.4212, "step": 28110 }, { "epoch": 15.704469273743017, "grad_norm": 0.415931761264801, "learning_rate": 0.0002157703081232493, "loss": 0.4225, "step": 28111 }, { "epoch": 15.705027932960894, "grad_norm": 0.42796462774276733, "learning_rate": 0.0002157422969187675, "loss": 0.3966, "step": 28112 }, { "epoch": 15.70558659217877, "grad_norm": 0.45776891708374023, "learning_rate": 0.00021571428571428571, "loss": 0.5357, "step": 28113 }, { "epoch": 15.706145251396649, "grad_norm": 0.607927143573761, "learning_rate": 0.00021568627450980395, "loss": 0.5675, "step": 28114 }, { "epoch": 15.706703910614525, "grad_norm": 0.4122005105018616, "learning_rate": 0.00021565826330532213, "loss": 0.4158, "step": 28115 }, { "epoch": 15.707262569832402, "grad_norm": 0.3309580087661743, "learning_rate": 0.00021563025210084033, "loss": 0.2945, "step": 28116 }, { "epoch": 15.70782122905028, "grad_norm": 1.7135727405548096, "learning_rate": 0.00021560224089635854, "loss": 0.5501, "step": 28117 }, { "epoch": 15.708379888268157, "grad_norm": 0.3926011919975281, "learning_rate": 0.00021557422969187677, "loss": 0.3926, "step": 28118 }, { "epoch": 15.708938547486033, "grad_norm": 1.5358628034591675, "learning_rate": 0.00021554621848739498, "loss": 0.3873, "step": 28119 }, { "epoch": 15.70949720670391, "grad_norm": 0.4119482934474945, "learning_rate": 0.00021551820728291316, "loss": 0.3999, "step": 28120 }, { "epoch": 15.710055865921788, "grad_norm": 4.981225967407227, "learning_rate": 0.00021549019607843136, "loss": 0.4542, "step": 28121 }, { "epoch": 15.710614525139665, "grad_norm": 0.48065581917762756, "learning_rate": 0.0002154621848739496, "loss": 0.3545, "step": 28122 }, { "epoch": 15.711173184357541, "grad_norm": 0.454269677400589, "learning_rate": 0.0002154341736694678, "loss": 0.4192, "step": 28123 }, { "epoch": 15.71173184357542, "grad_norm": 0.4448856711387634, "learning_rate": 0.000215406162464986, "loss": 0.4102, "step": 28124 }, { "epoch": 15.712290502793296, "grad_norm": 0.44434693455696106, "learning_rate": 0.0002153781512605042, "loss": 0.3084, "step": 28125 }, { "epoch": 15.712849162011173, "grad_norm": 0.8203147649765015, "learning_rate": 0.00021535014005602242, "loss": 0.3925, "step": 28126 }, { "epoch": 15.713407821229051, "grad_norm": 0.42819273471832275, "learning_rate": 0.00021532212885154063, "loss": 0.5081, "step": 28127 }, { "epoch": 15.713966480446928, "grad_norm": 0.40749242901802063, "learning_rate": 0.00021529411764705883, "loss": 0.4591, "step": 28128 }, { "epoch": 15.714525139664804, "grad_norm": 0.42136573791503906, "learning_rate": 0.00021526610644257704, "loss": 0.3978, "step": 28129 }, { "epoch": 15.71508379888268, "grad_norm": 0.37534379959106445, "learning_rate": 0.00021523809523809524, "loss": 0.4432, "step": 28130 }, { "epoch": 15.71564245810056, "grad_norm": 0.6140737533569336, "learning_rate": 0.00021521008403361345, "loss": 0.4645, "step": 28131 }, { "epoch": 15.716201117318436, "grad_norm": 0.43461814522743225, "learning_rate": 0.00021518207282913166, "loss": 0.4257, "step": 28132 }, { "epoch": 15.716759776536312, "grad_norm": 0.5296183228492737, "learning_rate": 0.00021515406162464986, "loss": 0.429, "step": 28133 }, { "epoch": 15.71731843575419, "grad_norm": 0.4991846978664398, "learning_rate": 0.0002151260504201681, "loss": 0.3247, "step": 28134 }, { "epoch": 15.717877094972067, "grad_norm": 0.6997066140174866, "learning_rate": 0.00021509803921568627, "loss": 0.3217, "step": 28135 }, { "epoch": 15.718435754189944, "grad_norm": 0.54915851354599, "learning_rate": 0.00021507002801120448, "loss": 0.3599, "step": 28136 }, { "epoch": 15.71899441340782, "grad_norm": 0.49522387981414795, "learning_rate": 0.00021504201680672269, "loss": 0.6005, "step": 28137 }, { "epoch": 15.719553072625699, "grad_norm": 1.1229275465011597, "learning_rate": 0.00021501400560224092, "loss": 0.4658, "step": 28138 }, { "epoch": 15.720111731843575, "grad_norm": 3.6634035110473633, "learning_rate": 0.00021498599439775912, "loss": 0.4077, "step": 28139 }, { "epoch": 15.720670391061452, "grad_norm": 0.48935940861701965, "learning_rate": 0.0002149579831932773, "loss": 0.4222, "step": 28140 }, { "epoch": 15.72122905027933, "grad_norm": 0.5736417770385742, "learning_rate": 0.0002149299719887955, "loss": 0.461, "step": 28141 }, { "epoch": 15.721787709497207, "grad_norm": 0.8085067868232727, "learning_rate": 0.00021490196078431374, "loss": 0.4278, "step": 28142 }, { "epoch": 15.722346368715083, "grad_norm": 0.39364534616470337, "learning_rate": 0.00021487394957983195, "loss": 0.4347, "step": 28143 }, { "epoch": 15.722905027932962, "grad_norm": 0.35484471917152405, "learning_rate": 0.00021484593837535015, "loss": 0.3141, "step": 28144 }, { "epoch": 15.723463687150838, "grad_norm": 0.384712815284729, "learning_rate": 0.00021481792717086833, "loss": 0.4192, "step": 28145 }, { "epoch": 15.724022346368715, "grad_norm": 1.2662056684494019, "learning_rate": 0.00021478991596638657, "loss": 0.5098, "step": 28146 }, { "epoch": 15.724581005586593, "grad_norm": 0.5253733992576599, "learning_rate": 0.00021476190476190477, "loss": 0.3821, "step": 28147 }, { "epoch": 15.72513966480447, "grad_norm": 0.5488473176956177, "learning_rate": 0.00021473389355742298, "loss": 0.3729, "step": 28148 }, { "epoch": 15.725698324022346, "grad_norm": 0.5362403988838196, "learning_rate": 0.00021470588235294116, "loss": 0.355, "step": 28149 }, { "epoch": 15.726256983240223, "grad_norm": 0.48557308316230774, "learning_rate": 0.0002146778711484594, "loss": 0.475, "step": 28150 }, { "epoch": 15.726815642458101, "grad_norm": 1.0692726373672485, "learning_rate": 0.0002146498599439776, "loss": 0.3537, "step": 28151 }, { "epoch": 15.727374301675978, "grad_norm": 1.6404802799224854, "learning_rate": 0.0002146218487394958, "loss": 0.4396, "step": 28152 }, { "epoch": 15.727932960893854, "grad_norm": 1.2937458753585815, "learning_rate": 0.000214593837535014, "loss": 0.3433, "step": 28153 }, { "epoch": 15.728491620111733, "grad_norm": 0.7173959016799927, "learning_rate": 0.00021456582633053221, "loss": 0.3316, "step": 28154 }, { "epoch": 15.72905027932961, "grad_norm": 3.468048334121704, "learning_rate": 0.00021453781512605042, "loss": 0.3969, "step": 28155 }, { "epoch": 15.729608938547486, "grad_norm": 0.7456526160240173, "learning_rate": 0.00021450980392156863, "loss": 0.385, "step": 28156 }, { "epoch": 15.730167597765362, "grad_norm": 25.07265281677246, "learning_rate": 0.00021448179271708683, "loss": 0.3852, "step": 28157 }, { "epoch": 15.73072625698324, "grad_norm": 0.5502532124519348, "learning_rate": 0.00021445378151260507, "loss": 0.5549, "step": 28158 }, { "epoch": 15.731284916201117, "grad_norm": 0.4713345170021057, "learning_rate": 0.00021442577030812324, "loss": 0.4081, "step": 28159 }, { "epoch": 15.731843575418994, "grad_norm": 2.0924980640411377, "learning_rate": 0.00021439775910364145, "loss": 0.4937, "step": 28160 }, { "epoch": 15.732402234636872, "grad_norm": 0.9878963232040405, "learning_rate": 0.00021436974789915966, "loss": 0.4308, "step": 28161 }, { "epoch": 15.732960893854749, "grad_norm": 1.0944411754608154, "learning_rate": 0.0002143417366946779, "loss": 0.4274, "step": 28162 }, { "epoch": 15.733519553072625, "grad_norm": 0.387623131275177, "learning_rate": 0.0002143137254901961, "loss": 0.3801, "step": 28163 }, { "epoch": 15.734078212290502, "grad_norm": 0.4180534780025482, "learning_rate": 0.00021428571428571427, "loss": 0.3458, "step": 28164 }, { "epoch": 15.73463687150838, "grad_norm": 0.3599781394004822, "learning_rate": 0.00021425770308123248, "loss": 0.3793, "step": 28165 }, { "epoch": 15.735195530726257, "grad_norm": 0.5250738263130188, "learning_rate": 0.00021422969187675071, "loss": 0.5147, "step": 28166 }, { "epoch": 15.735754189944133, "grad_norm": 0.37296831607818604, "learning_rate": 0.00021420168067226892, "loss": 0.4184, "step": 28167 }, { "epoch": 15.736312849162012, "grad_norm": 0.8904727101325989, "learning_rate": 0.00021417366946778713, "loss": 0.58, "step": 28168 }, { "epoch": 15.736871508379888, "grad_norm": 0.6159899830818176, "learning_rate": 0.0002141456582633053, "loss": 0.3904, "step": 28169 }, { "epoch": 15.737430167597765, "grad_norm": 0.48762694001197815, "learning_rate": 0.00021411764705882354, "loss": 0.4652, "step": 28170 }, { "epoch": 15.737988826815643, "grad_norm": 0.49352702498435974, "learning_rate": 0.00021408963585434174, "loss": 0.5325, "step": 28171 }, { "epoch": 15.73854748603352, "grad_norm": 0.45881393551826477, "learning_rate": 0.00021406162464985995, "loss": 0.365, "step": 28172 }, { "epoch": 15.739106145251396, "grad_norm": 0.4733392000198364, "learning_rate": 0.00021403361344537816, "loss": 0.3535, "step": 28173 }, { "epoch": 15.739664804469275, "grad_norm": 0.5747881531715393, "learning_rate": 0.00021400560224089636, "loss": 0.3286, "step": 28174 }, { "epoch": 15.740223463687151, "grad_norm": 0.5354949831962585, "learning_rate": 0.00021397759103641457, "loss": 0.4344, "step": 28175 }, { "epoch": 15.740782122905028, "grad_norm": 0.6348032355308533, "learning_rate": 0.00021394957983193277, "loss": 0.5637, "step": 28176 }, { "epoch": 15.741340782122904, "grad_norm": 0.45050469040870667, "learning_rate": 0.00021392156862745098, "loss": 0.4639, "step": 28177 }, { "epoch": 15.741899441340783, "grad_norm": 1.7818286418914795, "learning_rate": 0.0002138935574229692, "loss": 0.3982, "step": 28178 }, { "epoch": 15.74245810055866, "grad_norm": 0.37337547540664673, "learning_rate": 0.0002138655462184874, "loss": 0.4243, "step": 28179 }, { "epoch": 15.743016759776536, "grad_norm": 0.6851099133491516, "learning_rate": 0.0002138375350140056, "loss": 0.5206, "step": 28180 }, { "epoch": 15.743575418994414, "grad_norm": 0.7506327629089355, "learning_rate": 0.0002138095238095238, "loss": 0.3762, "step": 28181 }, { "epoch": 15.74413407821229, "grad_norm": 0.44795480370521545, "learning_rate": 0.00021378151260504204, "loss": 0.38, "step": 28182 }, { "epoch": 15.744692737430167, "grad_norm": 0.7723276019096375, "learning_rate": 0.00021375350140056024, "loss": 0.4501, "step": 28183 }, { "epoch": 15.745251396648044, "grad_norm": 0.49385908246040344, "learning_rate": 0.00021372549019607842, "loss": 0.3646, "step": 28184 }, { "epoch": 15.745810055865922, "grad_norm": 1.3371938467025757, "learning_rate": 0.00021369747899159663, "loss": 0.4058, "step": 28185 }, { "epoch": 15.746368715083799, "grad_norm": 0.8355948328971863, "learning_rate": 0.00021366946778711486, "loss": 0.3837, "step": 28186 }, { "epoch": 15.746927374301675, "grad_norm": 0.5903971791267395, "learning_rate": 0.00021364145658263307, "loss": 0.537, "step": 28187 }, { "epoch": 15.747486033519554, "grad_norm": 0.47570276260375977, "learning_rate": 0.00021361344537815127, "loss": 0.4582, "step": 28188 }, { "epoch": 15.74804469273743, "grad_norm": 0.46108779311180115, "learning_rate": 0.00021358543417366945, "loss": 0.3613, "step": 28189 }, { "epoch": 15.748603351955307, "grad_norm": 0.3710097074508667, "learning_rate": 0.00021355742296918768, "loss": 0.4422, "step": 28190 }, { "epoch": 15.749162011173185, "grad_norm": 0.616888701915741, "learning_rate": 0.0002135294117647059, "loss": 0.4114, "step": 28191 }, { "epoch": 15.749720670391062, "grad_norm": 0.5004860162734985, "learning_rate": 0.0002135014005602241, "loss": 0.5068, "step": 28192 }, { "epoch": 15.750279329608938, "grad_norm": 0.38989344239234924, "learning_rate": 0.0002134733893557423, "loss": 0.4164, "step": 28193 }, { "epoch": 15.750837988826815, "grad_norm": 1.829546570777893, "learning_rate": 0.0002134453781512605, "loss": 0.3906, "step": 28194 }, { "epoch": 15.751396648044693, "grad_norm": 1.5404324531555176, "learning_rate": 0.00021341736694677871, "loss": 0.3922, "step": 28195 }, { "epoch": 15.75195530726257, "grad_norm": 0.541739821434021, "learning_rate": 0.00021338935574229692, "loss": 0.3306, "step": 28196 }, { "epoch": 15.752513966480446, "grad_norm": 2.049353837966919, "learning_rate": 0.00021336134453781513, "loss": 0.4648, "step": 28197 }, { "epoch": 15.753072625698325, "grad_norm": 0.5836424827575684, "learning_rate": 0.00021333333333333336, "loss": 0.3823, "step": 28198 }, { "epoch": 15.753631284916201, "grad_norm": 0.33068305253982544, "learning_rate": 0.00021330532212885154, "loss": 0.3958, "step": 28199 }, { "epoch": 15.754189944134078, "grad_norm": 0.325416624546051, "learning_rate": 0.00021327731092436974, "loss": 0.3946, "step": 28200 }, { "epoch": 15.754748603351956, "grad_norm": 0.38331055641174316, "learning_rate": 0.00021324929971988795, "loss": 0.436, "step": 28201 }, { "epoch": 15.755307262569833, "grad_norm": 1.1591451168060303, "learning_rate": 0.00021322128851540618, "loss": 0.3803, "step": 28202 }, { "epoch": 15.75586592178771, "grad_norm": 0.4944038689136505, "learning_rate": 0.00021319327731092436, "loss": 0.4557, "step": 28203 }, { "epoch": 15.756424581005586, "grad_norm": 0.3757638931274414, "learning_rate": 0.00021316526610644257, "loss": 0.2937, "step": 28204 }, { "epoch": 15.756983240223464, "grad_norm": 5.244970321655273, "learning_rate": 0.00021313725490196077, "loss": 0.3399, "step": 28205 }, { "epoch": 15.75754189944134, "grad_norm": 0.33353477716445923, "learning_rate": 0.000213109243697479, "loss": 0.305, "step": 28206 }, { "epoch": 15.758100558659217, "grad_norm": 0.5653721690177917, "learning_rate": 0.00021308123249299721, "loss": 0.4854, "step": 28207 }, { "epoch": 15.758659217877096, "grad_norm": 0.4542103409767151, "learning_rate": 0.0002130532212885154, "loss": 0.3424, "step": 28208 }, { "epoch": 15.759217877094972, "grad_norm": 0.4675729274749756, "learning_rate": 0.0002130252100840336, "loss": 0.3804, "step": 28209 }, { "epoch": 15.759776536312849, "grad_norm": 0.7025383114814758, "learning_rate": 0.00021299719887955183, "loss": 0.4995, "step": 28210 }, { "epoch": 15.760335195530725, "grad_norm": 0.32371261715888977, "learning_rate": 0.00021296918767507004, "loss": 0.3095, "step": 28211 }, { "epoch": 15.760893854748604, "grad_norm": 1.2456889152526855, "learning_rate": 0.00021294117647058824, "loss": 0.3937, "step": 28212 }, { "epoch": 15.76145251396648, "grad_norm": 0.4105624854564667, "learning_rate": 0.00021291316526610642, "loss": 0.3572, "step": 28213 }, { "epoch": 15.762011173184357, "grad_norm": 4.164335250854492, "learning_rate": 0.00021288515406162466, "loss": 0.4034, "step": 28214 }, { "epoch": 15.762569832402235, "grad_norm": 0.7588809728622437, "learning_rate": 0.00021285714285714286, "loss": 0.4862, "step": 28215 }, { "epoch": 15.763128491620112, "grad_norm": 0.38554051518440247, "learning_rate": 0.00021282913165266107, "loss": 0.3176, "step": 28216 }, { "epoch": 15.763687150837988, "grad_norm": 0.6313602328300476, "learning_rate": 0.00021280112044817927, "loss": 0.5529, "step": 28217 }, { "epoch": 15.764245810055867, "grad_norm": 0.37413835525512695, "learning_rate": 0.00021277310924369748, "loss": 0.4657, "step": 28218 }, { "epoch": 15.764804469273743, "grad_norm": 0.567621648311615, "learning_rate": 0.00021274509803921569, "loss": 0.4256, "step": 28219 }, { "epoch": 15.76536312849162, "grad_norm": 0.7270967364311218, "learning_rate": 0.0002127170868347339, "loss": 0.4057, "step": 28220 }, { "epoch": 15.765921787709498, "grad_norm": 0.5513010621070862, "learning_rate": 0.0002126890756302521, "loss": 0.4731, "step": 28221 }, { "epoch": 15.766480446927375, "grad_norm": 0.38643893599510193, "learning_rate": 0.00021266106442577033, "loss": 0.4341, "step": 28222 }, { "epoch": 15.767039106145251, "grad_norm": 0.6488580107688904, "learning_rate": 0.0002126330532212885, "loss": 0.3671, "step": 28223 }, { "epoch": 15.767597765363128, "grad_norm": 0.6248840093612671, "learning_rate": 0.00021260504201680672, "loss": 0.4997, "step": 28224 }, { "epoch": 15.768156424581006, "grad_norm": 0.3813264071941376, "learning_rate": 0.00021257703081232492, "loss": 0.3437, "step": 28225 }, { "epoch": 15.768715083798883, "grad_norm": 0.4814385771751404, "learning_rate": 0.00021254901960784315, "loss": 0.3562, "step": 28226 }, { "epoch": 15.76927374301676, "grad_norm": 0.3675447702407837, "learning_rate": 0.00021252100840336136, "loss": 0.3952, "step": 28227 }, { "epoch": 15.769832402234638, "grad_norm": 0.6319012641906738, "learning_rate": 0.00021249299719887954, "loss": 0.4785, "step": 28228 }, { "epoch": 15.770391061452514, "grad_norm": 4.534488201141357, "learning_rate": 0.00021246498599439775, "loss": 0.3524, "step": 28229 }, { "epoch": 15.77094972067039, "grad_norm": 0.4247336685657501, "learning_rate": 0.00021243697478991598, "loss": 0.3756, "step": 28230 }, { "epoch": 15.771508379888267, "grad_norm": 0.5720586180686951, "learning_rate": 0.00021240896358543418, "loss": 0.4587, "step": 28231 }, { "epoch": 15.772067039106146, "grad_norm": 0.42110154032707214, "learning_rate": 0.0002123809523809524, "loss": 0.3331, "step": 28232 }, { "epoch": 15.772625698324022, "grad_norm": 0.5136241316795349, "learning_rate": 0.00021235294117647057, "loss": 0.4474, "step": 28233 }, { "epoch": 15.773184357541899, "grad_norm": 0.6020116806030273, "learning_rate": 0.0002123249299719888, "loss": 0.4761, "step": 28234 }, { "epoch": 15.773743016759777, "grad_norm": 0.5410043001174927, "learning_rate": 0.000212296918767507, "loss": 0.4962, "step": 28235 }, { "epoch": 15.774301675977654, "grad_norm": 0.471684068441391, "learning_rate": 0.00021226890756302521, "loss": 0.4167, "step": 28236 }, { "epoch": 15.77486033519553, "grad_norm": 0.44435322284698486, "learning_rate": 0.00021224089635854342, "loss": 0.4428, "step": 28237 }, { "epoch": 15.775418994413407, "grad_norm": 0.5288370251655579, "learning_rate": 0.00021221288515406163, "loss": 0.325, "step": 28238 }, { "epoch": 15.775977653631285, "grad_norm": 0.5087246298789978, "learning_rate": 0.00021218487394957983, "loss": 0.4106, "step": 28239 }, { "epoch": 15.776536312849162, "grad_norm": 0.4656437933444977, "learning_rate": 0.00021215686274509804, "loss": 0.4707, "step": 28240 }, { "epoch": 15.777094972067038, "grad_norm": 0.41650253534317017, "learning_rate": 0.00021212885154061624, "loss": 0.4246, "step": 28241 }, { "epoch": 15.777653631284917, "grad_norm": 0.543643057346344, "learning_rate": 0.00021210084033613448, "loss": 0.3894, "step": 28242 }, { "epoch": 15.778212290502793, "grad_norm": 0.9209433197975159, "learning_rate": 0.00021207282913165266, "loss": 0.3783, "step": 28243 }, { "epoch": 15.77877094972067, "grad_norm": 0.6223825216293335, "learning_rate": 0.00021204481792717086, "loss": 0.4041, "step": 28244 }, { "epoch": 15.779329608938548, "grad_norm": 0.3460981845855713, "learning_rate": 0.00021201680672268907, "loss": 0.3028, "step": 28245 }, { "epoch": 15.779888268156425, "grad_norm": 0.7730225324630737, "learning_rate": 0.0002119887955182073, "loss": 0.4101, "step": 28246 }, { "epoch": 15.780446927374301, "grad_norm": 0.42222270369529724, "learning_rate": 0.0002119607843137255, "loss": 0.4014, "step": 28247 }, { "epoch": 15.78100558659218, "grad_norm": 0.4425014555454254, "learning_rate": 0.0002119327731092437, "loss": 0.4345, "step": 28248 }, { "epoch": 15.781564245810056, "grad_norm": 0.7121943831443787, "learning_rate": 0.0002119047619047619, "loss": 0.5221, "step": 28249 }, { "epoch": 15.782122905027933, "grad_norm": 0.6115889549255371, "learning_rate": 0.00021187675070028013, "loss": 0.5173, "step": 28250 }, { "epoch": 15.78268156424581, "grad_norm": 0.44649675488471985, "learning_rate": 0.00021184873949579833, "loss": 0.4346, "step": 28251 }, { "epoch": 15.783240223463688, "grad_norm": 0.3846137523651123, "learning_rate": 0.00021182072829131654, "loss": 0.4231, "step": 28252 }, { "epoch": 15.783798882681564, "grad_norm": 2.3018479347229004, "learning_rate": 0.00021179271708683472, "loss": 0.3659, "step": 28253 }, { "epoch": 15.78435754189944, "grad_norm": 0.5861935615539551, "learning_rate": 0.00021176470588235295, "loss": 0.41, "step": 28254 }, { "epoch": 15.78491620111732, "grad_norm": 5.909586429595947, "learning_rate": 0.00021173669467787116, "loss": 0.4884, "step": 28255 }, { "epoch": 15.785474860335196, "grad_norm": 0.3388601839542389, "learning_rate": 0.00021170868347338936, "loss": 0.283, "step": 28256 }, { "epoch": 15.786033519553072, "grad_norm": 0.5771458148956299, "learning_rate": 0.00021168067226890754, "loss": 0.3824, "step": 28257 }, { "epoch": 15.786592178770949, "grad_norm": 0.5275522470474243, "learning_rate": 0.00021165266106442577, "loss": 0.3793, "step": 28258 }, { "epoch": 15.787150837988827, "grad_norm": 0.6667299270629883, "learning_rate": 0.00021162464985994398, "loss": 0.5232, "step": 28259 }, { "epoch": 15.787709497206704, "grad_norm": 0.45518627762794495, "learning_rate": 0.00021159663865546219, "loss": 0.4345, "step": 28260 }, { "epoch": 15.78826815642458, "grad_norm": 0.7462858557701111, "learning_rate": 0.00021156862745098042, "loss": 0.412, "step": 28261 }, { "epoch": 15.788826815642459, "grad_norm": 1.8235708475112915, "learning_rate": 0.0002115406162464986, "loss": 0.4048, "step": 28262 }, { "epoch": 15.789385474860335, "grad_norm": 0.5864009261131287, "learning_rate": 0.0002115126050420168, "loss": 0.3942, "step": 28263 }, { "epoch": 15.789944134078212, "grad_norm": 0.4313289225101471, "learning_rate": 0.000211484593837535, "loss": 0.4062, "step": 28264 }, { "epoch": 15.79050279329609, "grad_norm": 0.7372799515724182, "learning_rate": 0.00021145658263305324, "loss": 0.4278, "step": 28265 }, { "epoch": 15.791061452513967, "grad_norm": 0.4913404583930969, "learning_rate": 0.00021142857142857145, "loss": 0.4769, "step": 28266 }, { "epoch": 15.791620111731843, "grad_norm": 0.4150962233543396, "learning_rate": 0.00021140056022408963, "loss": 0.3899, "step": 28267 }, { "epoch": 15.79217877094972, "grad_norm": 0.3714819550514221, "learning_rate": 0.00021137254901960783, "loss": 0.415, "step": 28268 }, { "epoch": 15.792737430167598, "grad_norm": 0.44067323207855225, "learning_rate": 0.00021134453781512607, "loss": 0.3846, "step": 28269 }, { "epoch": 15.793296089385475, "grad_norm": 0.48309043049812317, "learning_rate": 0.00021131652661064427, "loss": 0.4095, "step": 28270 }, { "epoch": 15.793854748603351, "grad_norm": 0.9010888338088989, "learning_rate": 0.00021128851540616248, "loss": 0.6459, "step": 28271 }, { "epoch": 15.79441340782123, "grad_norm": 0.463115394115448, "learning_rate": 0.00021126050420168066, "loss": 0.4062, "step": 28272 }, { "epoch": 15.794972067039106, "grad_norm": 0.3948679566383362, "learning_rate": 0.0002112324929971989, "loss": 0.4239, "step": 28273 }, { "epoch": 15.795530726256983, "grad_norm": 0.6633087396621704, "learning_rate": 0.0002112044817927171, "loss": 0.3746, "step": 28274 }, { "epoch": 15.796089385474861, "grad_norm": 0.4883731007575989, "learning_rate": 0.0002111764705882353, "loss": 0.3441, "step": 28275 }, { "epoch": 15.796648044692738, "grad_norm": 0.45647338032722473, "learning_rate": 0.0002111484593837535, "loss": 0.4526, "step": 28276 }, { "epoch": 15.797206703910614, "grad_norm": 0.38070204854011536, "learning_rate": 0.00021112044817927171, "loss": 0.364, "step": 28277 }, { "epoch": 15.797765363128491, "grad_norm": 0.4591529667377472, "learning_rate": 0.00021109243697478992, "loss": 0.377, "step": 28278 }, { "epoch": 15.79832402234637, "grad_norm": 0.5029637217521667, "learning_rate": 0.00021106442577030813, "loss": 0.4278, "step": 28279 }, { "epoch": 15.798882681564246, "grad_norm": 0.4572776257991791, "learning_rate": 0.00021103641456582633, "loss": 0.4632, "step": 28280 }, { "epoch": 15.799441340782122, "grad_norm": 0.5541945695877075, "learning_rate": 0.00021100840336134457, "loss": 0.4269, "step": 28281 }, { "epoch": 15.8, "grad_norm": 0.5428129434585571, "learning_rate": 0.00021098039215686274, "loss": 0.4126, "step": 28282 }, { "epoch": 15.800558659217877, "grad_norm": 0.3842190206050873, "learning_rate": 0.00021095238095238095, "loss": 0.3171, "step": 28283 }, { "epoch": 15.801117318435754, "grad_norm": 3.594968557357788, "learning_rate": 0.00021092436974789916, "loss": 0.3944, "step": 28284 }, { "epoch": 15.80167597765363, "grad_norm": 0.45112496614456177, "learning_rate": 0.0002108963585434174, "loss": 0.3855, "step": 28285 }, { "epoch": 15.802234636871509, "grad_norm": 0.7227235436439514, "learning_rate": 0.0002108683473389356, "loss": 0.5105, "step": 28286 }, { "epoch": 15.802793296089385, "grad_norm": 0.685396671295166, "learning_rate": 0.00021084033613445377, "loss": 0.5296, "step": 28287 }, { "epoch": 15.803351955307262, "grad_norm": 0.3453178405761719, "learning_rate": 0.00021081232492997198, "loss": 0.3888, "step": 28288 }, { "epoch": 15.80391061452514, "grad_norm": 0.4115144908428192, "learning_rate": 0.00021078431372549021, "loss": 0.4003, "step": 28289 }, { "epoch": 15.804469273743017, "grad_norm": 0.36292344331741333, "learning_rate": 0.00021075630252100842, "loss": 0.3397, "step": 28290 }, { "epoch": 15.805027932960893, "grad_norm": 0.8145294189453125, "learning_rate": 0.00021072829131652663, "loss": 0.4216, "step": 28291 }, { "epoch": 15.805586592178772, "grad_norm": 2.1440043449401855, "learning_rate": 0.0002107002801120448, "loss": 0.3865, "step": 28292 }, { "epoch": 15.806145251396648, "grad_norm": 0.4146145284175873, "learning_rate": 0.00021067226890756304, "loss": 0.3604, "step": 28293 }, { "epoch": 15.806703910614525, "grad_norm": 0.34399181604385376, "learning_rate": 0.00021064425770308124, "loss": 0.2795, "step": 28294 }, { "epoch": 15.807262569832401, "grad_norm": 0.5868501663208008, "learning_rate": 0.00021061624649859945, "loss": 0.3938, "step": 28295 }, { "epoch": 15.80782122905028, "grad_norm": 0.5085304975509644, "learning_rate": 0.00021058823529411766, "loss": 0.5137, "step": 28296 }, { "epoch": 15.808379888268156, "grad_norm": 0.46111851930618286, "learning_rate": 0.00021056022408963586, "loss": 0.3755, "step": 28297 }, { "epoch": 15.808938547486033, "grad_norm": 0.6453214287757874, "learning_rate": 0.00021053221288515407, "loss": 0.3752, "step": 28298 }, { "epoch": 15.809497206703911, "grad_norm": 0.410430371761322, "learning_rate": 0.00021050420168067227, "loss": 0.2885, "step": 28299 }, { "epoch": 15.810055865921788, "grad_norm": 0.40357017517089844, "learning_rate": 0.00021047619047619048, "loss": 0.3957, "step": 28300 }, { "epoch": 15.810614525139664, "grad_norm": 0.5667738318443298, "learning_rate": 0.0002104481792717087, "loss": 0.3917, "step": 28301 }, { "epoch": 15.811173184357543, "grad_norm": 8.480268478393555, "learning_rate": 0.0002104201680672269, "loss": 0.3927, "step": 28302 }, { "epoch": 15.81173184357542, "grad_norm": 0.5042136907577515, "learning_rate": 0.0002103921568627451, "loss": 0.4054, "step": 28303 }, { "epoch": 15.812290502793296, "grad_norm": 0.43359267711639404, "learning_rate": 0.0002103641456582633, "loss": 0.2964, "step": 28304 }, { "epoch": 15.812849162011172, "grad_norm": 0.46846503019332886, "learning_rate": 0.00021033613445378154, "loss": 0.4553, "step": 28305 }, { "epoch": 15.81340782122905, "grad_norm": 0.43995222449302673, "learning_rate": 0.00021030812324929974, "loss": 0.4142, "step": 28306 }, { "epoch": 15.813966480446927, "grad_norm": 0.43687763810157776, "learning_rate": 0.00021028011204481792, "loss": 0.4013, "step": 28307 }, { "epoch": 15.814525139664804, "grad_norm": 1.1201419830322266, "learning_rate": 0.00021025210084033613, "loss": 0.3124, "step": 28308 }, { "epoch": 15.815083798882682, "grad_norm": 0.4475044310092926, "learning_rate": 0.00021022408963585436, "loss": 0.5161, "step": 28309 }, { "epoch": 15.815642458100559, "grad_norm": 1.9716365337371826, "learning_rate": 0.00021019607843137257, "loss": 0.3639, "step": 28310 }, { "epoch": 15.816201117318435, "grad_norm": 0.5290347933769226, "learning_rate": 0.00021016806722689077, "loss": 0.4378, "step": 28311 }, { "epoch": 15.816759776536312, "grad_norm": 0.4921661615371704, "learning_rate": 0.00021014005602240895, "loss": 0.4831, "step": 28312 }, { "epoch": 15.81731843575419, "grad_norm": 0.47815224528312683, "learning_rate": 0.00021011204481792718, "loss": 0.3903, "step": 28313 }, { "epoch": 15.817877094972067, "grad_norm": 0.5091875195503235, "learning_rate": 0.0002100840336134454, "loss": 0.3688, "step": 28314 }, { "epoch": 15.818435754189943, "grad_norm": 0.6837301254272461, "learning_rate": 0.0002100560224089636, "loss": 0.4595, "step": 28315 }, { "epoch": 15.818994413407822, "grad_norm": 0.6364619135856628, "learning_rate": 0.00021002801120448178, "loss": 0.4679, "step": 28316 }, { "epoch": 15.819553072625698, "grad_norm": 0.6363828778266907, "learning_rate": 0.00021, "loss": 0.4398, "step": 28317 }, { "epoch": 15.820111731843575, "grad_norm": 0.40923407673835754, "learning_rate": 0.00020997198879551821, "loss": 0.3526, "step": 28318 }, { "epoch": 15.820670391061453, "grad_norm": 0.6104234457015991, "learning_rate": 0.00020994397759103642, "loss": 0.4608, "step": 28319 }, { "epoch": 15.82122905027933, "grad_norm": 1.1124622821807861, "learning_rate": 0.00020991596638655463, "loss": 0.3707, "step": 28320 }, { "epoch": 15.821787709497206, "grad_norm": 0.427314817905426, "learning_rate": 0.00020988795518207283, "loss": 0.4528, "step": 28321 }, { "epoch": 15.822346368715085, "grad_norm": 0.381928414106369, "learning_rate": 0.00020985994397759104, "loss": 0.3779, "step": 28322 }, { "epoch": 15.822905027932961, "grad_norm": 0.4302935302257538, "learning_rate": 0.00020983193277310924, "loss": 0.3966, "step": 28323 }, { "epoch": 15.823463687150838, "grad_norm": 0.6560373902320862, "learning_rate": 0.00020980392156862745, "loss": 0.4123, "step": 28324 }, { "epoch": 15.824022346368714, "grad_norm": 0.47805914282798767, "learning_rate": 0.00020977591036414568, "loss": 0.402, "step": 28325 }, { "epoch": 15.824581005586593, "grad_norm": 0.4818778336048126, "learning_rate": 0.00020974789915966386, "loss": 0.3634, "step": 28326 }, { "epoch": 15.82513966480447, "grad_norm": 0.3727802634239197, "learning_rate": 0.00020971988795518207, "loss": 0.3497, "step": 28327 }, { "epoch": 15.825698324022346, "grad_norm": 0.46278542280197144, "learning_rate": 0.00020969187675070027, "loss": 0.3682, "step": 28328 }, { "epoch": 15.826256983240224, "grad_norm": 0.39895567297935486, "learning_rate": 0.0002096638655462185, "loss": 0.3823, "step": 28329 }, { "epoch": 15.8268156424581, "grad_norm": 0.5496333241462708, "learning_rate": 0.00020963585434173671, "loss": 0.4496, "step": 28330 }, { "epoch": 15.827374301675977, "grad_norm": 0.8188040256500244, "learning_rate": 0.0002096078431372549, "loss": 0.3558, "step": 28331 }, { "epoch": 15.827932960893854, "grad_norm": 0.4682527780532837, "learning_rate": 0.0002095798319327731, "loss": 0.4711, "step": 28332 }, { "epoch": 15.828491620111732, "grad_norm": 0.47295722365379333, "learning_rate": 0.00020955182072829133, "loss": 0.407, "step": 28333 }, { "epoch": 15.829050279329609, "grad_norm": 0.5079707503318787, "learning_rate": 0.00020952380952380954, "loss": 0.3624, "step": 28334 }, { "epoch": 15.829608938547485, "grad_norm": 0.3931850492954254, "learning_rate": 0.00020949579831932774, "loss": 0.3333, "step": 28335 }, { "epoch": 15.830167597765364, "grad_norm": 0.36695098876953125, "learning_rate": 0.00020946778711484592, "loss": 0.3201, "step": 28336 }, { "epoch": 15.83072625698324, "grad_norm": 0.4932332932949066, "learning_rate": 0.00020943977591036416, "loss": 0.4186, "step": 28337 }, { "epoch": 15.831284916201117, "grad_norm": 0.37566468119621277, "learning_rate": 0.00020941176470588236, "loss": 0.3577, "step": 28338 }, { "epoch": 15.831843575418995, "grad_norm": 0.4285581409931183, "learning_rate": 0.00020938375350140057, "loss": 0.406, "step": 28339 }, { "epoch": 15.832402234636872, "grad_norm": 0.3985428810119629, "learning_rate": 0.00020935574229691877, "loss": 0.406, "step": 28340 }, { "epoch": 15.832960893854748, "grad_norm": 1.2113806009292603, "learning_rate": 0.00020932773109243698, "loss": 0.4083, "step": 28341 }, { "epoch": 15.833519553072625, "grad_norm": 1.4710288047790527, "learning_rate": 0.00020929971988795519, "loss": 0.4782, "step": 28342 }, { "epoch": 15.834078212290503, "grad_norm": 0.350473552942276, "learning_rate": 0.0002092717086834734, "loss": 0.403, "step": 28343 }, { "epoch": 15.83463687150838, "grad_norm": 0.4966563880443573, "learning_rate": 0.0002092436974789916, "loss": 0.4612, "step": 28344 }, { "epoch": 15.835195530726256, "grad_norm": 0.3245506286621094, "learning_rate": 0.00020921568627450983, "loss": 0.3364, "step": 28345 }, { "epoch": 15.835754189944135, "grad_norm": 0.3747156262397766, "learning_rate": 0.000209187675070028, "loss": 0.4897, "step": 28346 }, { "epoch": 15.836312849162011, "grad_norm": 1.2024601697921753, "learning_rate": 0.00020915966386554622, "loss": 0.3126, "step": 28347 }, { "epoch": 15.836871508379888, "grad_norm": 0.4942377805709839, "learning_rate": 0.00020913165266106442, "loss": 0.3626, "step": 28348 }, { "epoch": 15.837430167597766, "grad_norm": 0.4200831949710846, "learning_rate": 0.00020910364145658265, "loss": 0.3739, "step": 28349 }, { "epoch": 15.837988826815643, "grad_norm": 0.4163464307785034, "learning_rate": 0.00020907563025210086, "loss": 0.4032, "step": 28350 }, { "epoch": 15.83854748603352, "grad_norm": 0.48109230399131775, "learning_rate": 0.00020904761904761904, "loss": 0.3262, "step": 28351 }, { "epoch": 15.839106145251396, "grad_norm": 0.3733949661254883, "learning_rate": 0.00020901960784313725, "loss": 0.4544, "step": 28352 }, { "epoch": 15.839664804469274, "grad_norm": 0.3351680040359497, "learning_rate": 0.00020899159663865548, "loss": 0.4556, "step": 28353 }, { "epoch": 15.84022346368715, "grad_norm": 0.5531509518623352, "learning_rate": 0.00020896358543417368, "loss": 0.3882, "step": 28354 }, { "epoch": 15.840782122905027, "grad_norm": 0.6404728889465332, "learning_rate": 0.0002089355742296919, "loss": 0.3724, "step": 28355 }, { "epoch": 15.841340782122906, "grad_norm": 0.3734499216079712, "learning_rate": 0.00020890756302521007, "loss": 0.4391, "step": 28356 }, { "epoch": 15.841899441340782, "grad_norm": 0.39347684383392334, "learning_rate": 0.0002088795518207283, "loss": 0.4276, "step": 28357 }, { "epoch": 15.842458100558659, "grad_norm": 0.5413459539413452, "learning_rate": 0.0002088515406162465, "loss": 0.486, "step": 28358 }, { "epoch": 15.843016759776535, "grad_norm": 1.31234610080719, "learning_rate": 0.00020882352941176471, "loss": 0.3833, "step": 28359 }, { "epoch": 15.843575418994414, "grad_norm": 0.7396610975265503, "learning_rate": 0.00020879551820728292, "loss": 0.3721, "step": 28360 }, { "epoch": 15.84413407821229, "grad_norm": 0.45909854769706726, "learning_rate": 0.00020876750700280113, "loss": 0.4952, "step": 28361 }, { "epoch": 15.844692737430167, "grad_norm": 0.8502050042152405, "learning_rate": 0.00020873949579831933, "loss": 0.4997, "step": 28362 }, { "epoch": 15.845251396648045, "grad_norm": 0.8959946036338806, "learning_rate": 0.00020871148459383754, "loss": 0.5762, "step": 28363 }, { "epoch": 15.845810055865922, "grad_norm": 0.5829812288284302, "learning_rate": 0.00020868347338935574, "loss": 0.5075, "step": 28364 }, { "epoch": 15.846368715083798, "grad_norm": 0.4223109185695648, "learning_rate": 0.00020865546218487398, "loss": 0.3932, "step": 28365 }, { "epoch": 15.846927374301677, "grad_norm": 0.46028634905815125, "learning_rate": 0.00020862745098039216, "loss": 0.4617, "step": 28366 }, { "epoch": 15.847486033519553, "grad_norm": 0.5849369764328003, "learning_rate": 0.00020859943977591036, "loss": 0.3527, "step": 28367 }, { "epoch": 15.84804469273743, "grad_norm": 0.5332579612731934, "learning_rate": 0.00020857142857142857, "loss": 0.4275, "step": 28368 }, { "epoch": 15.848603351955306, "grad_norm": 0.5800167322158813, "learning_rate": 0.0002085434173669468, "loss": 0.336, "step": 28369 }, { "epoch": 15.849162011173185, "grad_norm": 0.33230382204055786, "learning_rate": 0.00020851540616246498, "loss": 0.366, "step": 28370 }, { "epoch": 15.849720670391061, "grad_norm": 0.5483872890472412, "learning_rate": 0.0002084873949579832, "loss": 0.3585, "step": 28371 }, { "epoch": 15.850279329608938, "grad_norm": 0.5005813837051392, "learning_rate": 0.0002084593837535014, "loss": 0.3808, "step": 28372 }, { "epoch": 15.850837988826816, "grad_norm": 11.123309135437012, "learning_rate": 0.00020843137254901963, "loss": 0.408, "step": 28373 }, { "epoch": 15.851396648044693, "grad_norm": 0.489734411239624, "learning_rate": 0.00020840336134453783, "loss": 0.4529, "step": 28374 }, { "epoch": 15.85195530726257, "grad_norm": 0.36484530568122864, "learning_rate": 0.000208375350140056, "loss": 0.3717, "step": 28375 }, { "epoch": 15.852513966480448, "grad_norm": 0.4622708559036255, "learning_rate": 0.00020834733893557422, "loss": 0.4871, "step": 28376 }, { "epoch": 15.853072625698324, "grad_norm": 0.4826257526874542, "learning_rate": 0.00020831932773109245, "loss": 0.4698, "step": 28377 }, { "epoch": 15.8536312849162, "grad_norm": 0.4868282377719879, "learning_rate": 0.00020829131652661066, "loss": 0.4612, "step": 28378 }, { "epoch": 15.854189944134077, "grad_norm": 0.4474319517612457, "learning_rate": 0.00020826330532212886, "loss": 0.5293, "step": 28379 }, { "epoch": 15.854748603351956, "grad_norm": 0.6853349208831787, "learning_rate": 0.00020823529411764704, "loss": 0.3449, "step": 28380 }, { "epoch": 15.855307262569832, "grad_norm": 0.5232422351837158, "learning_rate": 0.00020820728291316527, "loss": 0.4543, "step": 28381 }, { "epoch": 15.855865921787709, "grad_norm": 0.4874227046966553, "learning_rate": 0.00020817927170868348, "loss": 0.44, "step": 28382 }, { "epoch": 15.856424581005587, "grad_norm": 0.3655456006526947, "learning_rate": 0.00020815126050420169, "loss": 0.3741, "step": 28383 }, { "epoch": 15.856983240223464, "grad_norm": 0.3759661912918091, "learning_rate": 0.0002081232492997199, "loss": 0.3301, "step": 28384 }, { "epoch": 15.85754189944134, "grad_norm": 0.6729076504707336, "learning_rate": 0.0002080952380952381, "loss": 0.4547, "step": 28385 }, { "epoch": 15.858100558659217, "grad_norm": 0.5717796087265015, "learning_rate": 0.0002080672268907563, "loss": 0.3132, "step": 28386 }, { "epoch": 15.858659217877095, "grad_norm": 0.616753876209259, "learning_rate": 0.0002080392156862745, "loss": 0.4034, "step": 28387 }, { "epoch": 15.859217877094972, "grad_norm": 0.48488298058509827, "learning_rate": 0.00020801120448179272, "loss": 0.4667, "step": 28388 }, { "epoch": 15.859776536312848, "grad_norm": 0.3956802785396576, "learning_rate": 0.00020798319327731095, "loss": 0.3445, "step": 28389 }, { "epoch": 15.860335195530727, "grad_norm": 0.3456871211528778, "learning_rate": 0.00020795518207282913, "loss": 0.3577, "step": 28390 }, { "epoch": 15.860893854748603, "grad_norm": 0.4096421003341675, "learning_rate": 0.00020792717086834733, "loss": 0.4156, "step": 28391 }, { "epoch": 15.86145251396648, "grad_norm": 0.385881632566452, "learning_rate": 0.00020789915966386554, "loss": 0.3469, "step": 28392 }, { "epoch": 15.862011173184358, "grad_norm": 0.3828909397125244, "learning_rate": 0.00020787114845938377, "loss": 0.3857, "step": 28393 }, { "epoch": 15.862569832402235, "grad_norm": 0.36904406547546387, "learning_rate": 0.00020784313725490198, "loss": 0.3603, "step": 28394 }, { "epoch": 15.863128491620111, "grad_norm": 0.3796258568763733, "learning_rate": 0.00020781512605042016, "loss": 0.3879, "step": 28395 }, { "epoch": 15.86368715083799, "grad_norm": 0.3728046417236328, "learning_rate": 0.00020778711484593836, "loss": 0.4918, "step": 28396 }, { "epoch": 15.864245810055866, "grad_norm": 1.1431180238723755, "learning_rate": 0.0002077591036414566, "loss": 0.4027, "step": 28397 }, { "epoch": 15.864804469273743, "grad_norm": 2.3157832622528076, "learning_rate": 0.0002077310924369748, "loss": 0.4078, "step": 28398 }, { "epoch": 15.86536312849162, "grad_norm": 0.3106030225753784, "learning_rate": 0.000207703081232493, "loss": 0.3404, "step": 28399 }, { "epoch": 15.865921787709498, "grad_norm": 0.6065434813499451, "learning_rate": 0.0002076750700280112, "loss": 0.7019, "step": 28400 }, { "epoch": 15.866480446927374, "grad_norm": 0.46118006110191345, "learning_rate": 0.00020764705882352942, "loss": 0.3393, "step": 28401 }, { "epoch": 15.867039106145251, "grad_norm": 0.5767713785171509, "learning_rate": 0.00020761904761904763, "loss": 0.4381, "step": 28402 }, { "epoch": 15.86759776536313, "grad_norm": 0.4891776740550995, "learning_rate": 0.00020759103641456583, "loss": 0.3676, "step": 28403 }, { "epoch": 15.868156424581006, "grad_norm": 0.3744615912437439, "learning_rate": 0.00020756302521008404, "loss": 0.3022, "step": 28404 }, { "epoch": 15.868715083798882, "grad_norm": 0.7736131548881531, "learning_rate": 0.00020753501400560224, "loss": 0.4942, "step": 28405 }, { "epoch": 15.869273743016759, "grad_norm": 1.4315904378890991, "learning_rate": 0.00020750700280112045, "loss": 0.3647, "step": 28406 }, { "epoch": 15.869832402234637, "grad_norm": 0.3626892864704132, "learning_rate": 0.00020747899159663866, "loss": 0.4165, "step": 28407 }, { "epoch": 15.870391061452514, "grad_norm": 0.7442923784255981, "learning_rate": 0.00020745098039215686, "loss": 0.4806, "step": 28408 }, { "epoch": 15.87094972067039, "grad_norm": 2.0274085998535156, "learning_rate": 0.0002074229691876751, "loss": 0.3876, "step": 28409 }, { "epoch": 15.871508379888269, "grad_norm": 0.5515741109848022, "learning_rate": 0.00020739495798319327, "loss": 0.3988, "step": 28410 }, { "epoch": 15.872067039106145, "grad_norm": 0.44420647621154785, "learning_rate": 0.00020736694677871148, "loss": 0.4475, "step": 28411 }, { "epoch": 15.872625698324022, "grad_norm": 0.42270877957344055, "learning_rate": 0.0002073389355742297, "loss": 0.4018, "step": 28412 }, { "epoch": 15.8731843575419, "grad_norm": 0.8761048913002014, "learning_rate": 0.00020731092436974792, "loss": 0.3788, "step": 28413 }, { "epoch": 15.873743016759777, "grad_norm": 0.7267293930053711, "learning_rate": 0.00020728291316526613, "loss": 0.4004, "step": 28414 }, { "epoch": 15.874301675977653, "grad_norm": 0.4068256616592407, "learning_rate": 0.0002072549019607843, "loss": 0.3254, "step": 28415 }, { "epoch": 15.87486033519553, "grad_norm": 0.4016488194465637, "learning_rate": 0.0002072268907563025, "loss": 0.4844, "step": 28416 }, { "epoch": 15.875418994413408, "grad_norm": 0.3279385566711426, "learning_rate": 0.00020719887955182074, "loss": 0.3282, "step": 28417 }, { "epoch": 15.875977653631285, "grad_norm": 0.5887762904167175, "learning_rate": 0.00020717086834733895, "loss": 0.4306, "step": 28418 }, { "epoch": 15.876536312849161, "grad_norm": 0.350024938583374, "learning_rate": 0.00020714285714285716, "loss": 0.3529, "step": 28419 }, { "epoch": 15.87709497206704, "grad_norm": 0.5459478497505188, "learning_rate": 0.00020711484593837533, "loss": 0.3658, "step": 28420 }, { "epoch": 15.877653631284916, "grad_norm": 0.6664366722106934, "learning_rate": 0.00020708683473389357, "loss": 0.3459, "step": 28421 }, { "epoch": 15.878212290502793, "grad_norm": 15.537731170654297, "learning_rate": 0.00020705882352941177, "loss": 0.4829, "step": 28422 }, { "epoch": 15.878770949720671, "grad_norm": 0.4693450927734375, "learning_rate": 0.00020703081232492998, "loss": 0.3265, "step": 28423 }, { "epoch": 15.879329608938548, "grad_norm": 0.46675968170166016, "learning_rate": 0.00020700280112044816, "loss": 0.3966, "step": 28424 }, { "epoch": 15.879888268156424, "grad_norm": 0.39274975657463074, "learning_rate": 0.0002069747899159664, "loss": 0.3731, "step": 28425 }, { "epoch": 15.880446927374301, "grad_norm": 0.6091746687889099, "learning_rate": 0.0002069467787114846, "loss": 0.4976, "step": 28426 }, { "epoch": 15.88100558659218, "grad_norm": 0.4482857882976532, "learning_rate": 0.0002069187675070028, "loss": 0.3346, "step": 28427 }, { "epoch": 15.881564245810056, "grad_norm": 1.0685772895812988, "learning_rate": 0.000206890756302521, "loss": 0.4871, "step": 28428 }, { "epoch": 15.882122905027932, "grad_norm": 0.57100909948349, "learning_rate": 0.00020686274509803922, "loss": 0.3607, "step": 28429 }, { "epoch": 15.88268156424581, "grad_norm": 0.440228134393692, "learning_rate": 0.00020683473389355742, "loss": 0.4409, "step": 28430 }, { "epoch": 15.883240223463687, "grad_norm": 1.2606966495513916, "learning_rate": 0.00020680672268907563, "loss": 0.6222, "step": 28431 }, { "epoch": 15.883798882681564, "grad_norm": 0.5349351763725281, "learning_rate": 0.00020677871148459383, "loss": 0.3509, "step": 28432 }, { "epoch": 15.88435754189944, "grad_norm": 0.33402013778686523, "learning_rate": 0.00020675070028011207, "loss": 0.3488, "step": 28433 }, { "epoch": 15.884916201117319, "grad_norm": 2.497631072998047, "learning_rate": 0.00020672268907563025, "loss": 0.3966, "step": 28434 }, { "epoch": 15.885474860335195, "grad_norm": 0.5972374677658081, "learning_rate": 0.00020669467787114845, "loss": 0.616, "step": 28435 }, { "epoch": 15.886033519553072, "grad_norm": 0.5541279315948486, "learning_rate": 0.00020666666666666666, "loss": 0.3521, "step": 28436 }, { "epoch": 15.88659217877095, "grad_norm": 0.8467921018600464, "learning_rate": 0.0002066386554621849, "loss": 0.2775, "step": 28437 }, { "epoch": 15.887150837988827, "grad_norm": 0.6436362266540527, "learning_rate": 0.0002066106442577031, "loss": 0.3813, "step": 28438 }, { "epoch": 15.887709497206703, "grad_norm": 0.44945985078811646, "learning_rate": 0.00020658263305322128, "loss": 0.5514, "step": 28439 }, { "epoch": 15.888268156424582, "grad_norm": 0.3350696563720703, "learning_rate": 0.00020655462184873948, "loss": 0.3674, "step": 28440 }, { "epoch": 15.888826815642458, "grad_norm": 0.3166445195674896, "learning_rate": 0.00020652661064425771, "loss": 0.3327, "step": 28441 }, { "epoch": 15.889385474860335, "grad_norm": 0.6495802402496338, "learning_rate": 0.00020649859943977592, "loss": 0.4819, "step": 28442 }, { "epoch": 15.889944134078211, "grad_norm": 2.76810359954834, "learning_rate": 0.00020647058823529413, "loss": 0.5147, "step": 28443 }, { "epoch": 15.89050279329609, "grad_norm": 0.5136860013008118, "learning_rate": 0.0002064425770308123, "loss": 0.3636, "step": 28444 }, { "epoch": 15.891061452513966, "grad_norm": 0.43074607849121094, "learning_rate": 0.00020641456582633054, "loss": 0.4142, "step": 28445 }, { "epoch": 15.891620111731843, "grad_norm": 0.4100888967514038, "learning_rate": 0.00020638655462184874, "loss": 0.4511, "step": 28446 }, { "epoch": 15.892178770949721, "grad_norm": 0.6936303377151489, "learning_rate": 0.00020635854341736695, "loss": 0.4032, "step": 28447 }, { "epoch": 15.892737430167598, "grad_norm": 0.38012710213661194, "learning_rate": 0.00020633053221288516, "loss": 0.3854, "step": 28448 }, { "epoch": 15.893296089385474, "grad_norm": 0.7620035409927368, "learning_rate": 0.00020630252100840336, "loss": 0.4582, "step": 28449 }, { "epoch": 15.893854748603353, "grad_norm": 0.4981892704963684, "learning_rate": 0.00020627450980392157, "loss": 0.569, "step": 28450 }, { "epoch": 15.89441340782123, "grad_norm": 0.5805944800376892, "learning_rate": 0.00020624649859943977, "loss": 0.474, "step": 28451 }, { "epoch": 15.894972067039106, "grad_norm": 0.37508833408355713, "learning_rate": 0.00020621848739495798, "loss": 0.4374, "step": 28452 }, { "epoch": 15.895530726256982, "grad_norm": 1.0173367261886597, "learning_rate": 0.00020619047619047621, "loss": 0.4204, "step": 28453 }, { "epoch": 15.89608938547486, "grad_norm": 0.7939687967300415, "learning_rate": 0.0002061624649859944, "loss": 0.365, "step": 28454 }, { "epoch": 15.896648044692737, "grad_norm": 0.8371119499206543, "learning_rate": 0.0002061344537815126, "loss": 0.4268, "step": 28455 }, { "epoch": 15.897206703910614, "grad_norm": 0.4625296890735626, "learning_rate": 0.0002061064425770308, "loss": 0.4179, "step": 28456 }, { "epoch": 15.897765363128492, "grad_norm": 0.34698665142059326, "learning_rate": 0.00020607843137254904, "loss": 0.3675, "step": 28457 }, { "epoch": 15.898324022346369, "grad_norm": 6.101527690887451, "learning_rate": 0.00020605042016806724, "loss": 0.4231, "step": 28458 }, { "epoch": 15.898882681564245, "grad_norm": 0.5622072815895081, "learning_rate": 0.00020602240896358542, "loss": 0.4838, "step": 28459 }, { "epoch": 15.899441340782122, "grad_norm": 0.924141526222229, "learning_rate": 0.00020599439775910363, "loss": 0.3551, "step": 28460 }, { "epoch": 15.9, "grad_norm": 4.007843017578125, "learning_rate": 0.00020596638655462186, "loss": 0.4115, "step": 28461 }, { "epoch": 15.900558659217877, "grad_norm": 1.081787347793579, "learning_rate": 0.00020593837535014007, "loss": 0.6797, "step": 28462 }, { "epoch": 15.901117318435753, "grad_norm": 0.4717196822166443, "learning_rate": 0.00020591036414565827, "loss": 0.2993, "step": 28463 }, { "epoch": 15.901675977653632, "grad_norm": 0.9129984974861145, "learning_rate": 0.00020588235294117645, "loss": 0.5495, "step": 28464 }, { "epoch": 15.902234636871508, "grad_norm": 0.7169912457466125, "learning_rate": 0.00020585434173669469, "loss": 0.5595, "step": 28465 }, { "epoch": 15.902793296089385, "grad_norm": 0.4903715252876282, "learning_rate": 0.0002058263305322129, "loss": 0.4882, "step": 28466 }, { "epoch": 15.903351955307263, "grad_norm": 0.5238422155380249, "learning_rate": 0.0002057983193277311, "loss": 0.4049, "step": 28467 }, { "epoch": 15.90391061452514, "grad_norm": 0.6813641786575317, "learning_rate": 0.0002057703081232493, "loss": 0.3379, "step": 28468 }, { "epoch": 15.904469273743016, "grad_norm": 0.9063427448272705, "learning_rate": 0.0002057422969187675, "loss": 0.4149, "step": 28469 }, { "epoch": 15.905027932960895, "grad_norm": 0.43170052766799927, "learning_rate": 0.00020571428571428572, "loss": 0.3482, "step": 28470 }, { "epoch": 15.905586592178771, "grad_norm": 0.3269258141517639, "learning_rate": 0.00020568627450980392, "loss": 0.3935, "step": 28471 }, { "epoch": 15.906145251396648, "grad_norm": 0.5334846377372742, "learning_rate": 0.00020565826330532213, "loss": 0.3952, "step": 28472 }, { "epoch": 15.906703910614524, "grad_norm": 0.8405916094779968, "learning_rate": 0.00020563025210084036, "loss": 0.4517, "step": 28473 }, { "epoch": 15.907262569832403, "grad_norm": 0.6547182202339172, "learning_rate": 0.00020560224089635854, "loss": 0.4063, "step": 28474 }, { "epoch": 15.90782122905028, "grad_norm": 0.45974281430244446, "learning_rate": 0.00020557422969187675, "loss": 0.3768, "step": 28475 }, { "epoch": 15.908379888268156, "grad_norm": 2.6047375202178955, "learning_rate": 0.00020554621848739495, "loss": 0.4796, "step": 28476 }, { "epoch": 15.908938547486034, "grad_norm": 1.2541228532791138, "learning_rate": 0.00020551820728291318, "loss": 0.4766, "step": 28477 }, { "epoch": 15.90949720670391, "grad_norm": 1.216201901435852, "learning_rate": 0.0002054901960784314, "loss": 0.485, "step": 28478 }, { "epoch": 15.910055865921787, "grad_norm": 16.248367309570312, "learning_rate": 0.00020546218487394957, "loss": 0.3789, "step": 28479 }, { "epoch": 15.910614525139664, "grad_norm": 0.6656233072280884, "learning_rate": 0.00020543417366946778, "loss": 0.4885, "step": 28480 }, { "epoch": 15.911173184357542, "grad_norm": 0.43571439385414124, "learning_rate": 0.000205406162464986, "loss": 0.4797, "step": 28481 }, { "epoch": 15.911731843575419, "grad_norm": 1.9898213148117065, "learning_rate": 0.00020537815126050421, "loss": 0.423, "step": 28482 }, { "epoch": 15.912290502793295, "grad_norm": 0.5237147212028503, "learning_rate": 0.0002053501400560224, "loss": 0.3472, "step": 28483 }, { "epoch": 15.912849162011174, "grad_norm": 0.4679156541824341, "learning_rate": 0.0002053221288515406, "loss": 0.5599, "step": 28484 }, { "epoch": 15.91340782122905, "grad_norm": 0.5738799571990967, "learning_rate": 0.00020529411764705883, "loss": 0.5695, "step": 28485 }, { "epoch": 15.913966480446927, "grad_norm": 0.385433167219162, "learning_rate": 0.00020526610644257704, "loss": 0.4009, "step": 28486 }, { "epoch": 15.914525139664804, "grad_norm": 0.4707374572753906, "learning_rate": 0.00020523809523809524, "loss": 0.4851, "step": 28487 }, { "epoch": 15.915083798882682, "grad_norm": 0.5555111169815063, "learning_rate": 0.00020521008403361342, "loss": 0.4917, "step": 28488 }, { "epoch": 15.915642458100558, "grad_norm": 0.4589138627052307, "learning_rate": 0.00020518207282913166, "loss": 0.3896, "step": 28489 }, { "epoch": 15.916201117318435, "grad_norm": 0.42126110196113586, "learning_rate": 0.00020515406162464986, "loss": 0.4569, "step": 28490 }, { "epoch": 15.916759776536313, "grad_norm": 0.4824746251106262, "learning_rate": 0.00020512605042016807, "loss": 0.3677, "step": 28491 }, { "epoch": 15.91731843575419, "grad_norm": 0.6603912711143494, "learning_rate": 0.0002050980392156863, "loss": 0.4111, "step": 28492 }, { "epoch": 15.917877094972066, "grad_norm": 1.5749597549438477, "learning_rate": 0.00020507002801120448, "loss": 0.3415, "step": 28493 }, { "epoch": 15.918435754189945, "grad_norm": 0.4526945650577545, "learning_rate": 0.0002050420168067227, "loss": 0.2991, "step": 28494 }, { "epoch": 15.918994413407821, "grad_norm": 0.4534764289855957, "learning_rate": 0.0002050140056022409, "loss": 0.3299, "step": 28495 }, { "epoch": 15.919553072625698, "grad_norm": 0.5210875272750854, "learning_rate": 0.00020498599439775913, "loss": 0.4253, "step": 28496 }, { "epoch": 15.920111731843576, "grad_norm": 0.5250996351242065, "learning_rate": 0.00020495798319327733, "loss": 0.4201, "step": 28497 }, { "epoch": 15.920670391061453, "grad_norm": 0.6987993717193604, "learning_rate": 0.0002049299719887955, "loss": 0.5055, "step": 28498 }, { "epoch": 15.92122905027933, "grad_norm": 0.9810032844543457, "learning_rate": 0.00020490196078431372, "loss": 0.3642, "step": 28499 }, { "epoch": 15.921787709497206, "grad_norm": 0.4533756971359253, "learning_rate": 0.00020487394957983195, "loss": 0.3648, "step": 28500 }, { "epoch": 15.921787709497206, "eval_cer": 0.08613466007659323, "eval_loss": 0.3235432505607605, "eval_runtime": 58.068, "eval_samples_per_second": 78.15, "eval_steps_per_second": 4.891, "eval_wer": 0.3415547051758331, "step": 28500 }, { "epoch": 15.922346368715084, "grad_norm": 0.5654794573783875, "learning_rate": 0.00020484593837535016, "loss": 0.426, "step": 28501 }, { "epoch": 15.922905027932961, "grad_norm": 0.5199263095855713, "learning_rate": 0.00020481792717086836, "loss": 0.3793, "step": 28502 }, { "epoch": 15.923463687150837, "grad_norm": 2.2113380432128906, "learning_rate": 0.00020478991596638654, "loss": 0.4895, "step": 28503 }, { "epoch": 15.924022346368716, "grad_norm": 0.6097524166107178, "learning_rate": 0.00020476190476190477, "loss": 0.4814, "step": 28504 }, { "epoch": 15.924581005586592, "grad_norm": 0.6493017077445984, "learning_rate": 0.00020473389355742298, "loss": 0.3894, "step": 28505 }, { "epoch": 15.925139664804469, "grad_norm": 0.4637574255466461, "learning_rate": 0.00020470588235294119, "loss": 0.3997, "step": 28506 }, { "epoch": 15.925698324022346, "grad_norm": 0.4985971450805664, "learning_rate": 0.0002046778711484594, "loss": 0.4104, "step": 28507 }, { "epoch": 15.926256983240224, "grad_norm": 1.911176323890686, "learning_rate": 0.0002046498599439776, "loss": 0.3783, "step": 28508 }, { "epoch": 15.9268156424581, "grad_norm": 0.5623754262924194, "learning_rate": 0.0002046218487394958, "loss": 0.4578, "step": 28509 }, { "epoch": 15.927374301675977, "grad_norm": 0.7828503847122192, "learning_rate": 0.000204593837535014, "loss": 0.6944, "step": 28510 }, { "epoch": 15.927932960893855, "grad_norm": 0.3718717694282532, "learning_rate": 0.00020456582633053222, "loss": 0.3955, "step": 28511 }, { "epoch": 15.928491620111732, "grad_norm": 0.6445087790489197, "learning_rate": 0.00020453781512605045, "loss": 0.6679, "step": 28512 }, { "epoch": 15.929050279329608, "grad_norm": 0.47356128692626953, "learning_rate": 0.00020450980392156863, "loss": 0.3253, "step": 28513 }, { "epoch": 15.929608938547487, "grad_norm": 0.4766961932182312, "learning_rate": 0.00020448179271708683, "loss": 0.544, "step": 28514 }, { "epoch": 15.930167597765363, "grad_norm": 0.8592237830162048, "learning_rate": 0.00020445378151260504, "loss": 0.374, "step": 28515 }, { "epoch": 15.93072625698324, "grad_norm": 1.002304196357727, "learning_rate": 0.00020442577030812327, "loss": 0.4401, "step": 28516 }, { "epoch": 15.931284916201117, "grad_norm": 0.5882537961006165, "learning_rate": 0.00020439775910364148, "loss": 0.375, "step": 28517 }, { "epoch": 15.931843575418995, "grad_norm": 2.5620806217193604, "learning_rate": 0.00020436974789915966, "loss": 0.505, "step": 28518 }, { "epoch": 15.932402234636871, "grad_norm": 0.40555885434150696, "learning_rate": 0.00020434173669467786, "loss": 0.4169, "step": 28519 }, { "epoch": 15.932960893854748, "grad_norm": 0.6523237824440002, "learning_rate": 0.0002043137254901961, "loss": 0.4334, "step": 28520 }, { "epoch": 15.933519553072626, "grad_norm": 0.5478083491325378, "learning_rate": 0.0002042857142857143, "loss": 0.5454, "step": 28521 }, { "epoch": 15.934078212290503, "grad_norm": 0.4292314052581787, "learning_rate": 0.0002042577030812325, "loss": 0.3919, "step": 28522 }, { "epoch": 15.93463687150838, "grad_norm": 0.4856822192668915, "learning_rate": 0.0002042296918767507, "loss": 0.3842, "step": 28523 }, { "epoch": 15.935195530726258, "grad_norm": 0.4929467439651489, "learning_rate": 0.00020420168067226892, "loss": 0.4751, "step": 28524 }, { "epoch": 15.935754189944134, "grad_norm": 0.3763562738895416, "learning_rate": 0.00020417366946778713, "loss": 0.3052, "step": 28525 }, { "epoch": 15.936312849162011, "grad_norm": 0.484101265668869, "learning_rate": 0.00020414565826330533, "loss": 0.4347, "step": 28526 }, { "epoch": 15.936871508379888, "grad_norm": 0.5910486578941345, "learning_rate": 0.00020411764705882354, "loss": 0.4826, "step": 28527 }, { "epoch": 15.937430167597766, "grad_norm": 0.660768985748291, "learning_rate": 0.00020408963585434174, "loss": 0.3767, "step": 28528 }, { "epoch": 15.937988826815642, "grad_norm": 0.47091758251190186, "learning_rate": 0.00020406162464985995, "loss": 0.5084, "step": 28529 }, { "epoch": 15.938547486033519, "grad_norm": 0.46020060777664185, "learning_rate": 0.00020403361344537816, "loss": 0.3624, "step": 28530 }, { "epoch": 15.939106145251397, "grad_norm": 0.4867001175880432, "learning_rate": 0.00020400560224089636, "loss": 0.3611, "step": 28531 }, { "epoch": 15.939664804469274, "grad_norm": 0.392189621925354, "learning_rate": 0.0002039775910364146, "loss": 0.4259, "step": 28532 }, { "epoch": 15.94022346368715, "grad_norm": 0.4954374134540558, "learning_rate": 0.00020394957983193277, "loss": 0.4123, "step": 28533 }, { "epoch": 15.940782122905027, "grad_norm": 0.7099990844726562, "learning_rate": 0.00020392156862745098, "loss": 0.4378, "step": 28534 }, { "epoch": 15.941340782122905, "grad_norm": 4.950645446777344, "learning_rate": 0.0002038935574229692, "loss": 0.5561, "step": 28535 }, { "epoch": 15.941899441340782, "grad_norm": 0.4951377511024475, "learning_rate": 0.00020386554621848742, "loss": 0.3797, "step": 28536 }, { "epoch": 15.942458100558659, "grad_norm": 0.953783392906189, "learning_rate": 0.0002038375350140056, "loss": 0.3479, "step": 28537 }, { "epoch": 15.943016759776537, "grad_norm": 0.38955867290496826, "learning_rate": 0.0002038095238095238, "loss": 0.388, "step": 28538 }, { "epoch": 15.943575418994413, "grad_norm": 0.6025633215904236, "learning_rate": 0.000203781512605042, "loss": 0.4626, "step": 28539 }, { "epoch": 15.94413407821229, "grad_norm": 0.39215004444122314, "learning_rate": 0.00020375350140056024, "loss": 0.3648, "step": 28540 }, { "epoch": 15.944692737430168, "grad_norm": 0.416963130235672, "learning_rate": 0.00020372549019607845, "loss": 0.4433, "step": 28541 }, { "epoch": 15.945251396648045, "grad_norm": 0.6851239800453186, "learning_rate": 0.00020369747899159663, "loss": 0.5279, "step": 28542 }, { "epoch": 15.945810055865921, "grad_norm": 0.37526416778564453, "learning_rate": 0.00020366946778711483, "loss": 0.3689, "step": 28543 }, { "epoch": 15.946368715083798, "grad_norm": 0.4368479251861572, "learning_rate": 0.00020364145658263307, "loss": 0.4656, "step": 28544 }, { "epoch": 15.946927374301676, "grad_norm": 0.9038490653038025, "learning_rate": 0.00020361344537815127, "loss": 0.5051, "step": 28545 }, { "epoch": 15.947486033519553, "grad_norm": 0.7822328805923462, "learning_rate": 0.00020358543417366948, "loss": 0.4648, "step": 28546 }, { "epoch": 15.94804469273743, "grad_norm": 1.0376384258270264, "learning_rate": 0.00020355742296918766, "loss": 0.5006, "step": 28547 }, { "epoch": 15.948603351955308, "grad_norm": 1.508028507232666, "learning_rate": 0.0002035294117647059, "loss": 0.4648, "step": 28548 }, { "epoch": 15.949162011173184, "grad_norm": 0.49336063861846924, "learning_rate": 0.0002035014005602241, "loss": 0.3744, "step": 28549 }, { "epoch": 15.949720670391061, "grad_norm": 1.7102192640304565, "learning_rate": 0.0002034733893557423, "loss": 0.3585, "step": 28550 }, { "epoch": 15.95027932960894, "grad_norm": 0.5059553384780884, "learning_rate": 0.0002034453781512605, "loss": 0.4075, "step": 28551 }, { "epoch": 15.950837988826816, "grad_norm": 0.35427260398864746, "learning_rate": 0.00020341736694677872, "loss": 0.3601, "step": 28552 }, { "epoch": 15.951396648044692, "grad_norm": 0.5888104438781738, "learning_rate": 0.00020338935574229692, "loss": 0.5204, "step": 28553 }, { "epoch": 15.951955307262569, "grad_norm": 1.101461410522461, "learning_rate": 0.00020336134453781513, "loss": 0.2755, "step": 28554 }, { "epoch": 15.952513966480447, "grad_norm": 0.36998382210731506, "learning_rate": 0.00020333333333333333, "loss": 0.4071, "step": 28555 }, { "epoch": 15.953072625698324, "grad_norm": 0.44662296772003174, "learning_rate": 0.00020330532212885157, "loss": 0.4485, "step": 28556 }, { "epoch": 15.9536312849162, "grad_norm": 0.4298152029514313, "learning_rate": 0.00020327731092436975, "loss": 0.5044, "step": 28557 }, { "epoch": 15.954189944134079, "grad_norm": 0.7141160368919373, "learning_rate": 0.00020324929971988795, "loss": 0.3431, "step": 28558 }, { "epoch": 15.954748603351955, "grad_norm": 0.35678035020828247, "learning_rate": 0.00020322128851540616, "loss": 0.2986, "step": 28559 }, { "epoch": 15.955307262569832, "grad_norm": 0.435062438249588, "learning_rate": 0.0002031932773109244, "loss": 0.3811, "step": 28560 }, { "epoch": 15.955865921787709, "grad_norm": 0.40991783142089844, "learning_rate": 0.0002031652661064426, "loss": 0.441, "step": 28561 }, { "epoch": 15.956424581005587, "grad_norm": 0.4384680688381195, "learning_rate": 0.00020313725490196078, "loss": 0.4559, "step": 28562 }, { "epoch": 15.956983240223463, "grad_norm": 0.5405470728874207, "learning_rate": 0.00020310924369747898, "loss": 0.4067, "step": 28563 }, { "epoch": 15.95754189944134, "grad_norm": 0.3395642042160034, "learning_rate": 0.00020308123249299721, "loss": 0.336, "step": 28564 }, { "epoch": 15.958100558659218, "grad_norm": 0.3948809504508972, "learning_rate": 0.00020305322128851542, "loss": 0.4156, "step": 28565 }, { "epoch": 15.958659217877095, "grad_norm": 0.42704781889915466, "learning_rate": 0.00020302521008403363, "loss": 0.3176, "step": 28566 }, { "epoch": 15.959217877094972, "grad_norm": 1.9850234985351562, "learning_rate": 0.0002029971988795518, "loss": 0.4029, "step": 28567 }, { "epoch": 15.95977653631285, "grad_norm": 0.42603111267089844, "learning_rate": 0.00020296918767507004, "loss": 0.3581, "step": 28568 }, { "epoch": 15.960335195530726, "grad_norm": 0.452487975358963, "learning_rate": 0.00020294117647058824, "loss": 0.4134, "step": 28569 }, { "epoch": 15.960893854748603, "grad_norm": 0.7981101870536804, "learning_rate": 0.00020291316526610645, "loss": 0.4401, "step": 28570 }, { "epoch": 15.961452513966481, "grad_norm": 0.9477629661560059, "learning_rate": 0.00020288515406162466, "loss": 0.3739, "step": 28571 }, { "epoch": 15.962011173184358, "grad_norm": 0.5518683195114136, "learning_rate": 0.00020285714285714286, "loss": 0.5674, "step": 28572 }, { "epoch": 15.962569832402234, "grad_norm": 0.43295642733573914, "learning_rate": 0.00020282913165266107, "loss": 0.307, "step": 28573 }, { "epoch": 15.963128491620111, "grad_norm": 0.35875412821769714, "learning_rate": 0.00020280112044817927, "loss": 0.3457, "step": 28574 }, { "epoch": 15.96368715083799, "grad_norm": 0.5405577421188354, "learning_rate": 0.00020277310924369748, "loss": 0.3574, "step": 28575 }, { "epoch": 15.964245810055866, "grad_norm": 0.44104719161987305, "learning_rate": 0.00020274509803921571, "loss": 0.4515, "step": 28576 }, { "epoch": 15.964804469273743, "grad_norm": 9.683589935302734, "learning_rate": 0.0002027170868347339, "loss": 0.3122, "step": 28577 }, { "epoch": 15.96536312849162, "grad_norm": 1.4077775478363037, "learning_rate": 0.0002026890756302521, "loss": 0.3856, "step": 28578 }, { "epoch": 15.965921787709497, "grad_norm": 0.3799853026866913, "learning_rate": 0.0002026610644257703, "loss": 0.4183, "step": 28579 }, { "epoch": 15.966480446927374, "grad_norm": 1.7218818664550781, "learning_rate": 0.00020263305322128854, "loss": 0.3339, "step": 28580 }, { "epoch": 15.96703910614525, "grad_norm": 8.140228271484375, "learning_rate": 0.00020260504201680674, "loss": 0.3363, "step": 28581 }, { "epoch": 15.967597765363129, "grad_norm": 0.7309381365776062, "learning_rate": 0.00020257703081232492, "loss": 0.4697, "step": 28582 }, { "epoch": 15.968156424581005, "grad_norm": 0.5796902179718018, "learning_rate": 0.00020254901960784313, "loss": 0.4133, "step": 28583 }, { "epoch": 15.968715083798882, "grad_norm": 0.4000356197357178, "learning_rate": 0.00020252100840336136, "loss": 0.3832, "step": 28584 }, { "epoch": 15.96927374301676, "grad_norm": 0.7208417057991028, "learning_rate": 0.00020249299719887957, "loss": 0.3891, "step": 28585 }, { "epoch": 15.969832402234637, "grad_norm": 1.0270278453826904, "learning_rate": 0.00020246498599439777, "loss": 0.4176, "step": 28586 }, { "epoch": 15.970391061452514, "grad_norm": 0.4895149767398834, "learning_rate": 0.00020243697478991595, "loss": 0.4114, "step": 28587 }, { "epoch": 15.970949720670392, "grad_norm": 0.6412506699562073, "learning_rate": 0.00020240896358543419, "loss": 0.4179, "step": 28588 }, { "epoch": 15.971508379888268, "grad_norm": 0.42856088280677795, "learning_rate": 0.0002023809523809524, "loss": 0.3925, "step": 28589 }, { "epoch": 15.972067039106145, "grad_norm": 0.6094840168952942, "learning_rate": 0.0002023529411764706, "loss": 0.4559, "step": 28590 }, { "epoch": 15.972625698324022, "grad_norm": 0.4153468608856201, "learning_rate": 0.00020232492997198878, "loss": 0.3764, "step": 28591 }, { "epoch": 15.9731843575419, "grad_norm": 0.6538140177726746, "learning_rate": 0.000202296918767507, "loss": 0.7245, "step": 28592 }, { "epoch": 15.973743016759776, "grad_norm": 0.9328212738037109, "learning_rate": 0.00020226890756302522, "loss": 0.8007, "step": 28593 }, { "epoch": 15.974301675977653, "grad_norm": 0.4586169421672821, "learning_rate": 0.00020224089635854342, "loss": 0.3397, "step": 28594 }, { "epoch": 15.974860335195531, "grad_norm": 30.752887725830078, "learning_rate": 0.00020221288515406163, "loss": 0.4215, "step": 28595 }, { "epoch": 15.975418994413408, "grad_norm": 0.48116356134414673, "learning_rate": 0.00020218487394957983, "loss": 0.4404, "step": 28596 }, { "epoch": 15.975977653631285, "grad_norm": 0.9074668884277344, "learning_rate": 0.00020215686274509804, "loss": 0.4648, "step": 28597 }, { "epoch": 15.976536312849163, "grad_norm": 0.6799079775810242, "learning_rate": 0.00020212885154061625, "loss": 0.5253, "step": 28598 }, { "epoch": 15.97709497206704, "grad_norm": 0.6789929866790771, "learning_rate": 0.00020210084033613445, "loss": 0.3991, "step": 28599 }, { "epoch": 15.977653631284916, "grad_norm": 0.43006426095962524, "learning_rate": 0.00020207282913165268, "loss": 0.3803, "step": 28600 }, { "epoch": 15.978212290502793, "grad_norm": 1.7170820236206055, "learning_rate": 0.00020204481792717086, "loss": 0.4897, "step": 28601 }, { "epoch": 15.978770949720671, "grad_norm": 0.6101322174072266, "learning_rate": 0.00020201680672268907, "loss": 0.3751, "step": 28602 }, { "epoch": 15.979329608938547, "grad_norm": 0.4622575044631958, "learning_rate": 0.00020198879551820728, "loss": 0.3692, "step": 28603 }, { "epoch": 15.979888268156424, "grad_norm": 3.59499454498291, "learning_rate": 0.0002019607843137255, "loss": 0.3856, "step": 28604 }, { "epoch": 15.980446927374302, "grad_norm": 0.3898753225803375, "learning_rate": 0.00020193277310924371, "loss": 0.4428, "step": 28605 }, { "epoch": 15.981005586592179, "grad_norm": 0.4102843403816223, "learning_rate": 0.0002019047619047619, "loss": 0.4631, "step": 28606 }, { "epoch": 15.981564245810056, "grad_norm": 0.38137349486351013, "learning_rate": 0.0002018767507002801, "loss": 0.4184, "step": 28607 }, { "epoch": 15.982122905027932, "grad_norm": 0.5560269951820374, "learning_rate": 0.00020184873949579833, "loss": 0.4638, "step": 28608 }, { "epoch": 15.98268156424581, "grad_norm": 1.2141250371932983, "learning_rate": 0.00020182072829131654, "loss": 0.416, "step": 28609 }, { "epoch": 15.983240223463687, "grad_norm": 0.8336212635040283, "learning_rate": 0.00020179271708683474, "loss": 0.4315, "step": 28610 }, { "epoch": 15.983798882681564, "grad_norm": 0.7413162589073181, "learning_rate": 0.00020176470588235292, "loss": 0.3989, "step": 28611 }, { "epoch": 15.984357541899442, "grad_norm": 0.5239019393920898, "learning_rate": 0.00020173669467787116, "loss": 0.3504, "step": 28612 }, { "epoch": 15.984916201117318, "grad_norm": 0.5917267203330994, "learning_rate": 0.00020170868347338936, "loss": 0.3389, "step": 28613 }, { "epoch": 15.985474860335195, "grad_norm": 0.4086204767227173, "learning_rate": 0.00020168067226890757, "loss": 0.3238, "step": 28614 }, { "epoch": 15.986033519553073, "grad_norm": 0.3828534185886383, "learning_rate": 0.00020165266106442577, "loss": 0.4007, "step": 28615 }, { "epoch": 15.98659217877095, "grad_norm": 0.45286455750465393, "learning_rate": 0.00020162464985994398, "loss": 0.4141, "step": 28616 }, { "epoch": 15.987150837988827, "grad_norm": 1.5492680072784424, "learning_rate": 0.0002015966386554622, "loss": 0.4797, "step": 28617 }, { "epoch": 15.987709497206703, "grad_norm": 0.4872051477432251, "learning_rate": 0.0002015686274509804, "loss": 0.3872, "step": 28618 }, { "epoch": 15.988268156424581, "grad_norm": 0.4941769540309906, "learning_rate": 0.0002015406162464986, "loss": 0.3462, "step": 28619 }, { "epoch": 15.988826815642458, "grad_norm": 0.4956414997577667, "learning_rate": 0.00020151260504201683, "loss": 0.3027, "step": 28620 }, { "epoch": 15.989385474860335, "grad_norm": 0.4612584412097931, "learning_rate": 0.000201484593837535, "loss": 0.4028, "step": 28621 }, { "epoch": 15.989944134078213, "grad_norm": 0.4827750027179718, "learning_rate": 0.00020145658263305322, "loss": 0.3857, "step": 28622 }, { "epoch": 15.99050279329609, "grad_norm": 0.40966519713401794, "learning_rate": 0.00020142857142857142, "loss": 0.4001, "step": 28623 }, { "epoch": 15.991061452513966, "grad_norm": 0.38131317496299744, "learning_rate": 0.00020140056022408966, "loss": 0.3941, "step": 28624 }, { "epoch": 15.991620111731844, "grad_norm": 0.4684199094772339, "learning_rate": 0.00020137254901960786, "loss": 0.4498, "step": 28625 }, { "epoch": 15.992178770949721, "grad_norm": 0.4183073341846466, "learning_rate": 0.00020134453781512604, "loss": 0.394, "step": 28626 }, { "epoch": 15.992737430167598, "grad_norm": 0.4430552124977112, "learning_rate": 0.00020131652661064425, "loss": 0.3761, "step": 28627 }, { "epoch": 15.993296089385474, "grad_norm": 0.4162876605987549, "learning_rate": 0.00020128851540616248, "loss": 0.3759, "step": 28628 }, { "epoch": 15.993854748603352, "grad_norm": 0.48116421699523926, "learning_rate": 0.00020126050420168069, "loss": 0.2989, "step": 28629 }, { "epoch": 15.994413407821229, "grad_norm": 0.48768338561058044, "learning_rate": 0.0002012324929971989, "loss": 0.4296, "step": 28630 }, { "epoch": 15.994972067039106, "grad_norm": 1.4282417297363281, "learning_rate": 0.00020120448179271707, "loss": 0.3903, "step": 28631 }, { "epoch": 15.995530726256984, "grad_norm": 4.254456520080566, "learning_rate": 0.0002011764705882353, "loss": 0.3804, "step": 28632 }, { "epoch": 15.99608938547486, "grad_norm": 0.8755951523780823, "learning_rate": 0.0002011484593837535, "loss": 0.3919, "step": 28633 }, { "epoch": 15.996648044692737, "grad_norm": 0.480814129114151, "learning_rate": 0.00020112044817927172, "loss": 0.3455, "step": 28634 }, { "epoch": 15.997206703910614, "grad_norm": 0.4574722349643707, "learning_rate": 0.00020109243697478992, "loss": 0.3223, "step": 28635 }, { "epoch": 15.997765363128492, "grad_norm": 0.31181037425994873, "learning_rate": 0.00020106442577030813, "loss": 0.3316, "step": 28636 }, { "epoch": 15.998324022346369, "grad_norm": 0.5989413261413574, "learning_rate": 0.00020103641456582633, "loss": 0.4521, "step": 28637 }, { "epoch": 15.998882681564245, "grad_norm": 2.2446444034576416, "learning_rate": 0.00020100840336134454, "loss": 0.438, "step": 28638 }, { "epoch": 15.999441340782123, "grad_norm": 37.44776916503906, "learning_rate": 0.00020098039215686275, "loss": 0.3661, "step": 28639 }, { "epoch": 16.0, "grad_norm": 0.43943220376968384, "learning_rate": 0.00020095238095238098, "loss": 0.4565, "step": 28640 }, { "epoch": 16.000558659217877, "grad_norm": 0.7048699855804443, "learning_rate": 0.00020092436974789916, "loss": 0.3605, "step": 28641 }, { "epoch": 16.001117318435753, "grad_norm": 0.5793527960777283, "learning_rate": 0.00020089635854341736, "loss": 0.4319, "step": 28642 }, { "epoch": 16.00167597765363, "grad_norm": 0.9626024961471558, "learning_rate": 0.00020086834733893557, "loss": 0.3648, "step": 28643 }, { "epoch": 16.00223463687151, "grad_norm": 0.37225890159606934, "learning_rate": 0.0002008403361344538, "loss": 0.413, "step": 28644 }, { "epoch": 16.002793296089386, "grad_norm": 0.39024972915649414, "learning_rate": 0.000200812324929972, "loss": 0.3994, "step": 28645 }, { "epoch": 16.003351955307263, "grad_norm": 0.72527015209198, "learning_rate": 0.0002007843137254902, "loss": 0.3831, "step": 28646 }, { "epoch": 16.00391061452514, "grad_norm": 0.38242390751838684, "learning_rate": 0.0002007563025210084, "loss": 0.4909, "step": 28647 }, { "epoch": 16.004469273743016, "grad_norm": 0.34653082489967346, "learning_rate": 0.00020072829131652663, "loss": 0.2554, "step": 28648 }, { "epoch": 16.005027932960893, "grad_norm": 0.4286128878593445, "learning_rate": 0.00020070028011204483, "loss": 0.416, "step": 28649 }, { "epoch": 16.00558659217877, "grad_norm": 0.8708988428115845, "learning_rate": 0.000200672268907563, "loss": 0.4155, "step": 28650 }, { "epoch": 16.00614525139665, "grad_norm": 0.34916868805885315, "learning_rate": 0.00020064425770308122, "loss": 0.3637, "step": 28651 }, { "epoch": 16.006703910614526, "grad_norm": 1.1638103723526, "learning_rate": 0.00020061624649859945, "loss": 0.4343, "step": 28652 }, { "epoch": 16.007262569832402, "grad_norm": 1.565123200416565, "learning_rate": 0.00020058823529411766, "loss": 0.4478, "step": 28653 }, { "epoch": 16.00782122905028, "grad_norm": 0.43218564987182617, "learning_rate": 0.00020056022408963586, "loss": 0.435, "step": 28654 }, { "epoch": 16.008379888268156, "grad_norm": 0.587452232837677, "learning_rate": 0.00020053221288515404, "loss": 0.547, "step": 28655 }, { "epoch": 16.008938547486032, "grad_norm": 0.5117811560630798, "learning_rate": 0.00020050420168067227, "loss": 0.4295, "step": 28656 }, { "epoch": 16.009497206703912, "grad_norm": 0.49994993209838867, "learning_rate": 0.00020047619047619048, "loss": 0.4401, "step": 28657 }, { "epoch": 16.01005586592179, "grad_norm": 0.5301174521446228, "learning_rate": 0.0002004481792717087, "loss": 0.3344, "step": 28658 }, { "epoch": 16.010614525139665, "grad_norm": 0.39890173077583313, "learning_rate": 0.0002004201680672269, "loss": 0.3962, "step": 28659 }, { "epoch": 16.011173184357542, "grad_norm": 0.8612424731254578, "learning_rate": 0.0002003921568627451, "loss": 0.3873, "step": 28660 }, { "epoch": 16.01173184357542, "grad_norm": 0.46927180886268616, "learning_rate": 0.0002003641456582633, "loss": 0.4122, "step": 28661 }, { "epoch": 16.012290502793295, "grad_norm": 0.4179631769657135, "learning_rate": 0.0002003361344537815, "loss": 0.4842, "step": 28662 }, { "epoch": 16.01284916201117, "grad_norm": 0.5834910273551941, "learning_rate": 0.00020030812324929972, "loss": 0.4836, "step": 28663 }, { "epoch": 16.013407821229052, "grad_norm": 0.555940568447113, "learning_rate": 0.00020028011204481795, "loss": 0.3847, "step": 28664 }, { "epoch": 16.01396648044693, "grad_norm": 0.5529289245605469, "learning_rate": 0.00020025210084033613, "loss": 0.355, "step": 28665 }, { "epoch": 16.014525139664805, "grad_norm": 0.5286394953727722, "learning_rate": 0.00020022408963585433, "loss": 0.4983, "step": 28666 }, { "epoch": 16.01508379888268, "grad_norm": 0.5944266319274902, "learning_rate": 0.00020019607843137254, "loss": 0.4084, "step": 28667 }, { "epoch": 16.015642458100558, "grad_norm": 0.6405765414237976, "learning_rate": 0.00020016806722689077, "loss": 0.3636, "step": 28668 }, { "epoch": 16.016201117318435, "grad_norm": 0.4832776188850403, "learning_rate": 0.00020014005602240898, "loss": 0.3652, "step": 28669 }, { "epoch": 16.01675977653631, "grad_norm": 0.43088850378990173, "learning_rate": 0.00020011204481792716, "loss": 0.3936, "step": 28670 }, { "epoch": 16.01731843575419, "grad_norm": 2.6670849323272705, "learning_rate": 0.00020008403361344536, "loss": 0.4082, "step": 28671 }, { "epoch": 16.017877094972068, "grad_norm": 0.3858379125595093, "learning_rate": 0.0002000560224089636, "loss": 0.336, "step": 28672 }, { "epoch": 16.018435754189944, "grad_norm": 0.4809342622756958, "learning_rate": 0.0002000280112044818, "loss": 0.4306, "step": 28673 }, { "epoch": 16.01899441340782, "grad_norm": 0.40753260254859924, "learning_rate": 0.0002, "loss": 0.4286, "step": 28674 }, { "epoch": 16.019553072625698, "grad_norm": 0.5665051341056824, "learning_rate": 0.0001999719887955182, "loss": 0.4737, "step": 28675 }, { "epoch": 16.020111731843574, "grad_norm": 1.057637095451355, "learning_rate": 0.00019994397759103642, "loss": 0.4571, "step": 28676 }, { "epoch": 16.02067039106145, "grad_norm": 0.49633321166038513, "learning_rate": 0.00019991596638655463, "loss": 0.439, "step": 28677 }, { "epoch": 16.02122905027933, "grad_norm": 0.33009234070777893, "learning_rate": 0.00019988795518207283, "loss": 0.4024, "step": 28678 }, { "epoch": 16.021787709497207, "grad_norm": 1.06159245967865, "learning_rate": 0.00019985994397759104, "loss": 0.5011, "step": 28679 }, { "epoch": 16.022346368715084, "grad_norm": 0.5506209135055542, "learning_rate": 0.00019983193277310925, "loss": 0.4544, "step": 28680 }, { "epoch": 16.02290502793296, "grad_norm": 0.4042088985443115, "learning_rate": 0.00019980392156862745, "loss": 0.4188, "step": 28681 }, { "epoch": 16.023463687150837, "grad_norm": 0.8546011447906494, "learning_rate": 0.00019977591036414566, "loss": 0.3441, "step": 28682 }, { "epoch": 16.024022346368714, "grad_norm": 0.4159787595272064, "learning_rate": 0.00019974789915966386, "loss": 0.4095, "step": 28683 }, { "epoch": 16.024581005586594, "grad_norm": 0.34117525815963745, "learning_rate": 0.0001997198879551821, "loss": 0.3602, "step": 28684 }, { "epoch": 16.02513966480447, "grad_norm": 0.6552532911300659, "learning_rate": 0.00019969187675070028, "loss": 0.424, "step": 28685 }, { "epoch": 16.025698324022347, "grad_norm": 0.4073229134082794, "learning_rate": 0.00019966386554621848, "loss": 0.3652, "step": 28686 }, { "epoch": 16.026256983240224, "grad_norm": 0.351581335067749, "learning_rate": 0.0001996358543417367, "loss": 0.3459, "step": 28687 }, { "epoch": 16.0268156424581, "grad_norm": 0.9441070556640625, "learning_rate": 0.00019960784313725492, "loss": 0.5215, "step": 28688 }, { "epoch": 16.027374301675977, "grad_norm": 0.5241767764091492, "learning_rate": 0.00019957983193277313, "loss": 0.4037, "step": 28689 }, { "epoch": 16.027932960893853, "grad_norm": 0.46423688530921936, "learning_rate": 0.0001995518207282913, "loss": 0.3779, "step": 28690 }, { "epoch": 16.028491620111733, "grad_norm": 5.712769031524658, "learning_rate": 0.0001995238095238095, "loss": 0.3881, "step": 28691 }, { "epoch": 16.02905027932961, "grad_norm": 0.6212025880813599, "learning_rate": 0.00019949579831932774, "loss": 0.4035, "step": 28692 }, { "epoch": 16.029608938547486, "grad_norm": 0.6730415225028992, "learning_rate": 0.00019946778711484595, "loss": 0.3349, "step": 28693 }, { "epoch": 16.030167597765363, "grad_norm": 0.37275049090385437, "learning_rate": 0.00019943977591036416, "loss": 0.3216, "step": 28694 }, { "epoch": 16.03072625698324, "grad_norm": 2.095198154449463, "learning_rate": 0.00019941176470588234, "loss": 0.3938, "step": 28695 }, { "epoch": 16.031284916201116, "grad_norm": 0.7965613007545471, "learning_rate": 0.00019938375350140057, "loss": 0.4118, "step": 28696 }, { "epoch": 16.031843575418993, "grad_norm": 0.38307318091392517, "learning_rate": 0.00019935574229691877, "loss": 0.3259, "step": 28697 }, { "epoch": 16.032402234636873, "grad_norm": 0.7534868121147156, "learning_rate": 0.00019932773109243698, "loss": 0.3655, "step": 28698 }, { "epoch": 16.03296089385475, "grad_norm": 0.310043066740036, "learning_rate": 0.0001992997198879552, "loss": 0.3398, "step": 28699 }, { "epoch": 16.033519553072626, "grad_norm": 1.157518744468689, "learning_rate": 0.0001992717086834734, "loss": 0.3228, "step": 28700 }, { "epoch": 16.034078212290503, "grad_norm": 2.2935333251953125, "learning_rate": 0.0001992436974789916, "loss": 0.4583, "step": 28701 }, { "epoch": 16.03463687150838, "grad_norm": 0.5491610169410706, "learning_rate": 0.0001992156862745098, "loss": 0.4437, "step": 28702 }, { "epoch": 16.035195530726256, "grad_norm": 0.49655038118362427, "learning_rate": 0.000199187675070028, "loss": 0.448, "step": 28703 }, { "epoch": 16.035754189944136, "grad_norm": 1.1616835594177246, "learning_rate": 0.00019915966386554622, "loss": 0.3663, "step": 28704 }, { "epoch": 16.036312849162012, "grad_norm": 0.48957470059394836, "learning_rate": 0.00019913165266106442, "loss": 0.3926, "step": 28705 }, { "epoch": 16.03687150837989, "grad_norm": 0.40391018986701965, "learning_rate": 0.00019910364145658263, "loss": 0.4804, "step": 28706 }, { "epoch": 16.037430167597766, "grad_norm": 0.4524936378002167, "learning_rate": 0.00019907563025210083, "loss": 0.2925, "step": 28707 }, { "epoch": 16.037988826815642, "grad_norm": 0.31568586826324463, "learning_rate": 0.00019904761904761907, "loss": 0.3227, "step": 28708 }, { "epoch": 16.03854748603352, "grad_norm": 0.5016072392463684, "learning_rate": 0.00019901960784313725, "loss": 0.4659, "step": 28709 }, { "epoch": 16.039106145251395, "grad_norm": 0.524066686630249, "learning_rate": 0.00019899159663865545, "loss": 0.4895, "step": 28710 }, { "epoch": 16.039664804469275, "grad_norm": 0.5096714496612549, "learning_rate": 0.00019896358543417366, "loss": 0.4801, "step": 28711 }, { "epoch": 16.040223463687152, "grad_norm": 0.5921947360038757, "learning_rate": 0.0001989355742296919, "loss": 0.4793, "step": 28712 }, { "epoch": 16.04078212290503, "grad_norm": 0.8980619311332703, "learning_rate": 0.0001989075630252101, "loss": 0.4051, "step": 28713 }, { "epoch": 16.041340782122905, "grad_norm": 0.6189582943916321, "learning_rate": 0.00019887955182072828, "loss": 0.3373, "step": 28714 }, { "epoch": 16.04189944134078, "grad_norm": 0.5464439392089844, "learning_rate": 0.00019885154061624648, "loss": 0.4072, "step": 28715 }, { "epoch": 16.042458100558658, "grad_norm": 3.057891368865967, "learning_rate": 0.00019882352941176472, "loss": 0.3639, "step": 28716 }, { "epoch": 16.043016759776535, "grad_norm": 0.4201343059539795, "learning_rate": 0.00019879551820728292, "loss": 0.426, "step": 28717 }, { "epoch": 16.043575418994415, "grad_norm": 0.808809757232666, "learning_rate": 0.00019876750700280113, "loss": 0.4774, "step": 28718 }, { "epoch": 16.04413407821229, "grad_norm": 0.42565008997917175, "learning_rate": 0.0001987394957983193, "loss": 0.3174, "step": 28719 }, { "epoch": 16.044692737430168, "grad_norm": 0.6673728823661804, "learning_rate": 0.00019871148459383754, "loss": 0.3693, "step": 28720 }, { "epoch": 16.045251396648045, "grad_norm": 0.4921528398990631, "learning_rate": 0.00019868347338935575, "loss": 0.3409, "step": 28721 }, { "epoch": 16.04581005586592, "grad_norm": 0.9265057444572449, "learning_rate": 0.00019865546218487395, "loss": 0.4993, "step": 28722 }, { "epoch": 16.046368715083798, "grad_norm": 14.272374153137207, "learning_rate": 0.00019862745098039218, "loss": 0.4597, "step": 28723 }, { "epoch": 16.046927374301674, "grad_norm": 0.8591921329498291, "learning_rate": 0.00019859943977591036, "loss": 0.3781, "step": 28724 }, { "epoch": 16.047486033519554, "grad_norm": 0.45600244402885437, "learning_rate": 0.00019857142857142857, "loss": 0.3751, "step": 28725 }, { "epoch": 16.04804469273743, "grad_norm": 1.3724937438964844, "learning_rate": 0.00019854341736694678, "loss": 0.5224, "step": 28726 }, { "epoch": 16.048603351955308, "grad_norm": 0.5757851600646973, "learning_rate": 0.000198515406162465, "loss": 0.431, "step": 28727 }, { "epoch": 16.049162011173184, "grad_norm": 0.5403292179107666, "learning_rate": 0.00019848739495798321, "loss": 0.475, "step": 28728 }, { "epoch": 16.04972067039106, "grad_norm": 1.0260902643203735, "learning_rate": 0.0001984593837535014, "loss": 0.3883, "step": 28729 }, { "epoch": 16.050279329608937, "grad_norm": 0.469302773475647, "learning_rate": 0.0001984313725490196, "loss": 0.3064, "step": 28730 }, { "epoch": 16.050837988826817, "grad_norm": 0.5761861801147461, "learning_rate": 0.00019840336134453783, "loss": 0.4788, "step": 28731 }, { "epoch": 16.051396648044694, "grad_norm": 0.5332999229431152, "learning_rate": 0.00019837535014005604, "loss": 0.3847, "step": 28732 }, { "epoch": 16.05195530726257, "grad_norm": 0.35240307450294495, "learning_rate": 0.00019834733893557424, "loss": 0.4331, "step": 28733 }, { "epoch": 16.052513966480447, "grad_norm": 0.5284339785575867, "learning_rate": 0.00019831932773109242, "loss": 0.5328, "step": 28734 }, { "epoch": 16.053072625698324, "grad_norm": 0.7995294332504272, "learning_rate": 0.00019829131652661066, "loss": 0.4426, "step": 28735 }, { "epoch": 16.0536312849162, "grad_norm": 0.4651493430137634, "learning_rate": 0.00019826330532212886, "loss": 0.332, "step": 28736 }, { "epoch": 16.054189944134077, "grad_norm": 0.4823979437351227, "learning_rate": 0.00019823529411764707, "loss": 0.4345, "step": 28737 }, { "epoch": 16.054748603351957, "grad_norm": 1.1699432134628296, "learning_rate": 0.00019820728291316527, "loss": 0.4165, "step": 28738 }, { "epoch": 16.055307262569833, "grad_norm": 0.48355185985565186, "learning_rate": 0.00019817927170868348, "loss": 0.379, "step": 28739 }, { "epoch": 16.05586592178771, "grad_norm": 0.8748696446418762, "learning_rate": 0.0001981512605042017, "loss": 0.8124, "step": 28740 }, { "epoch": 16.056424581005587, "grad_norm": 0.3409229815006256, "learning_rate": 0.0001981232492997199, "loss": 0.3976, "step": 28741 }, { "epoch": 16.056983240223463, "grad_norm": 0.3500695824623108, "learning_rate": 0.0001980952380952381, "loss": 0.3491, "step": 28742 }, { "epoch": 16.05754189944134, "grad_norm": 0.5360079407691956, "learning_rate": 0.00019806722689075633, "loss": 0.4036, "step": 28743 }, { "epoch": 16.058100558659216, "grad_norm": 0.9907260537147522, "learning_rate": 0.0001980392156862745, "loss": 0.3982, "step": 28744 }, { "epoch": 16.058659217877096, "grad_norm": 0.3772161900997162, "learning_rate": 0.00019801120448179272, "loss": 0.4175, "step": 28745 }, { "epoch": 16.059217877094973, "grad_norm": 0.48105260729789734, "learning_rate": 0.00019798319327731092, "loss": 0.4074, "step": 28746 }, { "epoch": 16.05977653631285, "grad_norm": 0.4142693877220154, "learning_rate": 0.00019795518207282916, "loss": 0.4006, "step": 28747 }, { "epoch": 16.060335195530726, "grad_norm": 11.138873100280762, "learning_rate": 0.00019792717086834736, "loss": 0.4017, "step": 28748 }, { "epoch": 16.060893854748603, "grad_norm": 0.47995811700820923, "learning_rate": 0.00019789915966386554, "loss": 0.4257, "step": 28749 }, { "epoch": 16.06145251396648, "grad_norm": 0.4389776885509491, "learning_rate": 0.00019787114845938375, "loss": 0.3558, "step": 28750 }, { "epoch": 16.062011173184356, "grad_norm": 2.0461952686309814, "learning_rate": 0.00019784313725490198, "loss": 0.624, "step": 28751 }, { "epoch": 16.062569832402236, "grad_norm": 0.6170380115509033, "learning_rate": 0.00019781512605042019, "loss": 0.4538, "step": 28752 }, { "epoch": 16.063128491620112, "grad_norm": 0.5412031412124634, "learning_rate": 0.0001977871148459384, "loss": 0.3106, "step": 28753 }, { "epoch": 16.06368715083799, "grad_norm": 0.4140807092189789, "learning_rate": 0.00019775910364145657, "loss": 0.3162, "step": 28754 }, { "epoch": 16.064245810055866, "grad_norm": 2.8879294395446777, "learning_rate": 0.0001977310924369748, "loss": 0.4743, "step": 28755 }, { "epoch": 16.064804469273742, "grad_norm": 0.49047958850860596, "learning_rate": 0.000197703081232493, "loss": 0.427, "step": 28756 }, { "epoch": 16.06536312849162, "grad_norm": 0.6948040127754211, "learning_rate": 0.00019767507002801122, "loss": 0.3439, "step": 28757 }, { "epoch": 16.0659217877095, "grad_norm": 1.8124775886535645, "learning_rate": 0.0001976470588235294, "loss": 0.3917, "step": 28758 }, { "epoch": 16.066480446927375, "grad_norm": 0.424328476190567, "learning_rate": 0.00019761904761904763, "loss": 0.4055, "step": 28759 }, { "epoch": 16.067039106145252, "grad_norm": 0.5531435012817383, "learning_rate": 0.00019759103641456583, "loss": 0.417, "step": 28760 }, { "epoch": 16.06759776536313, "grad_norm": 0.34919580817222595, "learning_rate": 0.00019756302521008404, "loss": 0.3044, "step": 28761 }, { "epoch": 16.068156424581005, "grad_norm": 0.42807140946388245, "learning_rate": 0.00019753501400560225, "loss": 0.372, "step": 28762 }, { "epoch": 16.06871508379888, "grad_norm": 0.5811235904693604, "learning_rate": 0.00019750700280112045, "loss": 0.8069, "step": 28763 }, { "epoch": 16.06927374301676, "grad_norm": 0.47905027866363525, "learning_rate": 0.00019747899159663866, "loss": 0.5167, "step": 28764 }, { "epoch": 16.06983240223464, "grad_norm": 0.41478869318962097, "learning_rate": 0.00019745098039215686, "loss": 0.4004, "step": 28765 }, { "epoch": 16.070391061452515, "grad_norm": 1.1165934801101685, "learning_rate": 0.00019742296918767507, "loss": 0.3736, "step": 28766 }, { "epoch": 16.07094972067039, "grad_norm": 0.39963629841804504, "learning_rate": 0.0001973949579831933, "loss": 0.439, "step": 28767 }, { "epoch": 16.071508379888268, "grad_norm": 0.950663685798645, "learning_rate": 0.00019736694677871148, "loss": 0.6034, "step": 28768 }, { "epoch": 16.072067039106145, "grad_norm": 0.6046480536460876, "learning_rate": 0.0001973389355742297, "loss": 0.4055, "step": 28769 }, { "epoch": 16.07262569832402, "grad_norm": 0.3774864375591278, "learning_rate": 0.0001973109243697479, "loss": 0.4279, "step": 28770 }, { "epoch": 16.073184357541898, "grad_norm": 0.4944782257080078, "learning_rate": 0.00019728291316526613, "loss": 0.3541, "step": 28771 }, { "epoch": 16.073743016759778, "grad_norm": 0.8915618062019348, "learning_rate": 0.00019725490196078433, "loss": 0.4295, "step": 28772 }, { "epoch": 16.074301675977654, "grad_norm": 0.48576030135154724, "learning_rate": 0.0001972268907563025, "loss": 0.4015, "step": 28773 }, { "epoch": 16.07486033519553, "grad_norm": 0.4235306978225708, "learning_rate": 0.00019719887955182072, "loss": 0.5565, "step": 28774 }, { "epoch": 16.075418994413408, "grad_norm": 0.3478255569934845, "learning_rate": 0.00019717086834733895, "loss": 0.39, "step": 28775 }, { "epoch": 16.075977653631284, "grad_norm": 0.44412699341773987, "learning_rate": 0.00019714285714285716, "loss": 0.4141, "step": 28776 }, { "epoch": 16.07653631284916, "grad_norm": 1.8947107791900635, "learning_rate": 0.00019711484593837536, "loss": 0.4653, "step": 28777 }, { "epoch": 16.07709497206704, "grad_norm": 0.7067160606384277, "learning_rate": 0.00019708683473389354, "loss": 0.4177, "step": 28778 }, { "epoch": 16.077653631284917, "grad_norm": 0.5734338164329529, "learning_rate": 0.00019705882352941177, "loss": 0.4436, "step": 28779 }, { "epoch": 16.078212290502794, "grad_norm": 2.026653289794922, "learning_rate": 0.00019703081232492998, "loss": 0.364, "step": 28780 }, { "epoch": 16.07877094972067, "grad_norm": 0.3616013824939728, "learning_rate": 0.0001970028011204482, "loss": 0.3936, "step": 28781 }, { "epoch": 16.079329608938547, "grad_norm": 0.4312297999858856, "learning_rate": 0.0001969747899159664, "loss": 0.3756, "step": 28782 }, { "epoch": 16.079888268156424, "grad_norm": 0.45226210355758667, "learning_rate": 0.0001969467787114846, "loss": 0.3535, "step": 28783 }, { "epoch": 16.0804469273743, "grad_norm": 0.4514426290988922, "learning_rate": 0.0001969187675070028, "loss": 0.4161, "step": 28784 }, { "epoch": 16.08100558659218, "grad_norm": 0.42622092366218567, "learning_rate": 0.000196890756302521, "loss": 0.4053, "step": 28785 }, { "epoch": 16.081564245810057, "grad_norm": 0.6194861531257629, "learning_rate": 0.00019686274509803922, "loss": 0.3239, "step": 28786 }, { "epoch": 16.082122905027934, "grad_norm": 0.6120905876159668, "learning_rate": 0.00019683473389355745, "loss": 0.3877, "step": 28787 }, { "epoch": 16.08268156424581, "grad_norm": 4.206735134124756, "learning_rate": 0.00019680672268907563, "loss": 0.4098, "step": 28788 }, { "epoch": 16.083240223463687, "grad_norm": 0.33900880813598633, "learning_rate": 0.00019677871148459383, "loss": 0.3221, "step": 28789 }, { "epoch": 16.083798882681563, "grad_norm": 1.4210306406021118, "learning_rate": 0.00019675070028011204, "loss": 0.6155, "step": 28790 }, { "epoch": 16.08435754189944, "grad_norm": 0.5302776098251343, "learning_rate": 0.00019672268907563027, "loss": 0.6129, "step": 28791 }, { "epoch": 16.08491620111732, "grad_norm": 0.7050266861915588, "learning_rate": 0.00019669467787114848, "loss": 0.447, "step": 28792 }, { "epoch": 16.085474860335196, "grad_norm": 0.3712307810783386, "learning_rate": 0.00019666666666666666, "loss": 0.3587, "step": 28793 }, { "epoch": 16.086033519553073, "grad_norm": 0.5986878275871277, "learning_rate": 0.00019663865546218486, "loss": 0.4713, "step": 28794 }, { "epoch": 16.08659217877095, "grad_norm": 0.4047383964061737, "learning_rate": 0.0001966106442577031, "loss": 0.3765, "step": 28795 }, { "epoch": 16.087150837988826, "grad_norm": 0.4967782497406006, "learning_rate": 0.0001965826330532213, "loss": 0.3973, "step": 28796 }, { "epoch": 16.087709497206703, "grad_norm": 0.45842745900154114, "learning_rate": 0.0001965546218487395, "loss": 0.4939, "step": 28797 }, { "epoch": 16.08826815642458, "grad_norm": 0.3373325765132904, "learning_rate": 0.0001965266106442577, "loss": 0.3848, "step": 28798 }, { "epoch": 16.08882681564246, "grad_norm": 0.4641217291355133, "learning_rate": 0.00019649859943977592, "loss": 0.3516, "step": 28799 }, { "epoch": 16.089385474860336, "grad_norm": 0.4103059470653534, "learning_rate": 0.00019647058823529413, "loss": 0.5263, "step": 28800 }, { "epoch": 16.089944134078213, "grad_norm": 0.4553210437297821, "learning_rate": 0.00019644257703081233, "loss": 0.4864, "step": 28801 }, { "epoch": 16.09050279329609, "grad_norm": 0.33146676421165466, "learning_rate": 0.00019641456582633054, "loss": 0.3926, "step": 28802 }, { "epoch": 16.091061452513966, "grad_norm": 5.626728534698486, "learning_rate": 0.00019638655462184875, "loss": 0.3473, "step": 28803 }, { "epoch": 16.091620111731842, "grad_norm": 0.4232625961303711, "learning_rate": 0.00019635854341736695, "loss": 0.4914, "step": 28804 }, { "epoch": 16.092178770949722, "grad_norm": 1.3321236371994019, "learning_rate": 0.00019633053221288516, "loss": 0.3322, "step": 28805 }, { "epoch": 16.0927374301676, "grad_norm": 0.7122716307640076, "learning_rate": 0.00019630252100840336, "loss": 0.3935, "step": 28806 }, { "epoch": 16.093296089385476, "grad_norm": 0.46890321373939514, "learning_rate": 0.0001962745098039216, "loss": 0.4412, "step": 28807 }, { "epoch": 16.093854748603352, "grad_norm": 0.48771265149116516, "learning_rate": 0.00019624649859943978, "loss": 0.5041, "step": 28808 }, { "epoch": 16.09441340782123, "grad_norm": 0.4522497057914734, "learning_rate": 0.00019621848739495798, "loss": 0.4605, "step": 28809 }, { "epoch": 16.094972067039105, "grad_norm": 0.743918240070343, "learning_rate": 0.0001961904761904762, "loss": 0.3463, "step": 28810 }, { "epoch": 16.095530726256982, "grad_norm": 6.346516132354736, "learning_rate": 0.00019616246498599442, "loss": 0.3904, "step": 28811 }, { "epoch": 16.096089385474862, "grad_norm": 0.45202475786209106, "learning_rate": 0.00019613445378151263, "loss": 0.3986, "step": 28812 }, { "epoch": 16.09664804469274, "grad_norm": 5.965319633483887, "learning_rate": 0.0001961064425770308, "loss": 0.4171, "step": 28813 }, { "epoch": 16.097206703910615, "grad_norm": 0.3890116810798645, "learning_rate": 0.000196078431372549, "loss": 0.4416, "step": 28814 }, { "epoch": 16.09776536312849, "grad_norm": 0.3330911099910736, "learning_rate": 0.00019605042016806724, "loss": 0.4189, "step": 28815 }, { "epoch": 16.098324022346368, "grad_norm": 0.3990189731121063, "learning_rate": 0.00019602240896358545, "loss": 0.382, "step": 28816 }, { "epoch": 16.098882681564245, "grad_norm": 0.5373325347900391, "learning_rate": 0.00019599439775910363, "loss": 0.4148, "step": 28817 }, { "epoch": 16.09944134078212, "grad_norm": 0.6801446080207825, "learning_rate": 0.00019596638655462184, "loss": 0.4598, "step": 28818 }, { "epoch": 16.1, "grad_norm": 0.3618169128894806, "learning_rate": 0.00019593837535014007, "loss": 0.2463, "step": 28819 }, { "epoch": 16.100558659217878, "grad_norm": 0.5345287919044495, "learning_rate": 0.00019591036414565827, "loss": 0.4374, "step": 28820 }, { "epoch": 16.101117318435755, "grad_norm": 0.5600844025611877, "learning_rate": 0.00019588235294117648, "loss": 0.3632, "step": 28821 }, { "epoch": 16.10167597765363, "grad_norm": 0.42011335492134094, "learning_rate": 0.00019585434173669466, "loss": 0.405, "step": 28822 }, { "epoch": 16.102234636871508, "grad_norm": 0.775844931602478, "learning_rate": 0.0001958263305322129, "loss": 0.4732, "step": 28823 }, { "epoch": 16.102793296089384, "grad_norm": 0.47399938106536865, "learning_rate": 0.0001957983193277311, "loss": 0.3936, "step": 28824 }, { "epoch": 16.10335195530726, "grad_norm": 0.42256438732147217, "learning_rate": 0.0001957703081232493, "loss": 0.4568, "step": 28825 }, { "epoch": 16.10391061452514, "grad_norm": 0.570817232131958, "learning_rate": 0.0001957422969187675, "loss": 0.5531, "step": 28826 }, { "epoch": 16.104469273743018, "grad_norm": 0.4960199296474457, "learning_rate": 0.00019571428571428572, "loss": 0.3889, "step": 28827 }, { "epoch": 16.105027932960894, "grad_norm": 0.5583524107933044, "learning_rate": 0.00019568627450980392, "loss": 0.4265, "step": 28828 }, { "epoch": 16.10558659217877, "grad_norm": 0.43021684885025024, "learning_rate": 0.00019565826330532213, "loss": 0.4337, "step": 28829 }, { "epoch": 16.106145251396647, "grad_norm": 0.7041333317756653, "learning_rate": 0.00019563025210084033, "loss": 0.4424, "step": 28830 }, { "epoch": 16.106703910614524, "grad_norm": 0.4659338891506195, "learning_rate": 0.00019560224089635857, "loss": 0.4191, "step": 28831 }, { "epoch": 16.107262569832404, "grad_norm": 0.5400794148445129, "learning_rate": 0.00019557422969187675, "loss": 0.4044, "step": 28832 }, { "epoch": 16.10782122905028, "grad_norm": 0.4359588921070099, "learning_rate": 0.00019554621848739495, "loss": 0.3595, "step": 28833 }, { "epoch": 16.108379888268157, "grad_norm": 0.9698153138160706, "learning_rate": 0.00019551820728291316, "loss": 0.3382, "step": 28834 }, { "epoch": 16.108938547486034, "grad_norm": 0.6758460402488708, "learning_rate": 0.0001954901960784314, "loss": 0.4053, "step": 28835 }, { "epoch": 16.10949720670391, "grad_norm": 0.49740511178970337, "learning_rate": 0.0001954621848739496, "loss": 0.4474, "step": 28836 }, { "epoch": 16.110055865921787, "grad_norm": 0.5072407126426697, "learning_rate": 0.00019543417366946778, "loss": 0.3999, "step": 28837 }, { "epoch": 16.110614525139663, "grad_norm": 0.5797218084335327, "learning_rate": 0.00019540616246498598, "loss": 0.3937, "step": 28838 }, { "epoch": 16.111173184357543, "grad_norm": 0.48711439967155457, "learning_rate": 0.00019537815126050422, "loss": 0.3525, "step": 28839 }, { "epoch": 16.11173184357542, "grad_norm": 0.2812398374080658, "learning_rate": 0.00019535014005602242, "loss": 0.3679, "step": 28840 }, { "epoch": 16.112290502793297, "grad_norm": 0.4230554699897766, "learning_rate": 0.00019532212885154063, "loss": 0.3439, "step": 28841 }, { "epoch": 16.112849162011173, "grad_norm": 0.8616529107093811, "learning_rate": 0.0001952941176470588, "loss": 0.3844, "step": 28842 }, { "epoch": 16.11340782122905, "grad_norm": 0.8501707315444946, "learning_rate": 0.00019526610644257704, "loss": 0.4702, "step": 28843 }, { "epoch": 16.113966480446926, "grad_norm": 0.6084223389625549, "learning_rate": 0.00019523809523809525, "loss": 0.7092, "step": 28844 }, { "epoch": 16.114525139664803, "grad_norm": 0.7039982080459595, "learning_rate": 0.00019521008403361345, "loss": 0.3849, "step": 28845 }, { "epoch": 16.115083798882683, "grad_norm": 0.3611606955528259, "learning_rate": 0.00019518207282913166, "loss": 0.3713, "step": 28846 }, { "epoch": 16.11564245810056, "grad_norm": 0.4002581238746643, "learning_rate": 0.00019515406162464986, "loss": 0.4499, "step": 28847 }, { "epoch": 16.116201117318436, "grad_norm": 4.773829460144043, "learning_rate": 0.00019512605042016807, "loss": 0.3019, "step": 28848 }, { "epoch": 16.116759776536313, "grad_norm": 0.5328097343444824, "learning_rate": 0.00019509803921568628, "loss": 0.4014, "step": 28849 }, { "epoch": 16.11731843575419, "grad_norm": 0.5776244401931763, "learning_rate": 0.00019507002801120448, "loss": 0.4332, "step": 28850 }, { "epoch": 16.117877094972066, "grad_norm": 1.8821289539337158, "learning_rate": 0.00019504201680672271, "loss": 0.4398, "step": 28851 }, { "epoch": 16.118435754189946, "grad_norm": 0.5329691767692566, "learning_rate": 0.0001950140056022409, "loss": 0.4845, "step": 28852 }, { "epoch": 16.118994413407822, "grad_norm": 0.6767594218254089, "learning_rate": 0.0001949859943977591, "loss": 0.4634, "step": 28853 }, { "epoch": 16.1195530726257, "grad_norm": 1.8163594007492065, "learning_rate": 0.0001949579831932773, "loss": 0.5032, "step": 28854 }, { "epoch": 16.120111731843576, "grad_norm": 0.36950555443763733, "learning_rate": 0.00019492997198879554, "loss": 0.3995, "step": 28855 }, { "epoch": 16.120670391061452, "grad_norm": 0.47612613439559937, "learning_rate": 0.00019490196078431374, "loss": 0.4771, "step": 28856 }, { "epoch": 16.12122905027933, "grad_norm": 0.419756680727005, "learning_rate": 0.00019487394957983192, "loss": 0.3311, "step": 28857 }, { "epoch": 16.121787709497205, "grad_norm": 1.678883671760559, "learning_rate": 0.00019484593837535013, "loss": 0.3767, "step": 28858 }, { "epoch": 16.122346368715085, "grad_norm": 0.36759650707244873, "learning_rate": 0.00019481792717086836, "loss": 0.3838, "step": 28859 }, { "epoch": 16.122905027932962, "grad_norm": 0.39833423495292664, "learning_rate": 0.00019478991596638657, "loss": 0.3711, "step": 28860 }, { "epoch": 16.12346368715084, "grad_norm": 0.4569665491580963, "learning_rate": 0.00019476190476190477, "loss": 0.3828, "step": 28861 }, { "epoch": 16.124022346368715, "grad_norm": 0.6211580038070679, "learning_rate": 0.00019473389355742295, "loss": 0.3842, "step": 28862 }, { "epoch": 16.12458100558659, "grad_norm": 0.41862139105796814, "learning_rate": 0.0001947058823529412, "loss": 0.4467, "step": 28863 }, { "epoch": 16.12513966480447, "grad_norm": 0.41824907064437866, "learning_rate": 0.0001946778711484594, "loss": 0.393, "step": 28864 }, { "epoch": 16.125698324022345, "grad_norm": 0.3791068494319916, "learning_rate": 0.0001946498599439776, "loss": 0.4825, "step": 28865 }, { "epoch": 16.126256983240225, "grad_norm": 1.915249228477478, "learning_rate": 0.0001946218487394958, "loss": 0.4422, "step": 28866 }, { "epoch": 16.1268156424581, "grad_norm": 0.7629830241203308, "learning_rate": 0.000194593837535014, "loss": 0.4596, "step": 28867 }, { "epoch": 16.127374301675978, "grad_norm": 0.4331672191619873, "learning_rate": 0.00019456582633053222, "loss": 0.3667, "step": 28868 }, { "epoch": 16.127932960893855, "grad_norm": 0.524337887763977, "learning_rate": 0.00019453781512605042, "loss": 0.455, "step": 28869 }, { "epoch": 16.12849162011173, "grad_norm": 0.3574155867099762, "learning_rate": 0.00019450980392156863, "loss": 0.3914, "step": 28870 }, { "epoch": 16.129050279329608, "grad_norm": 0.535084068775177, "learning_rate": 0.00019448179271708683, "loss": 0.5184, "step": 28871 }, { "epoch": 16.129608938547484, "grad_norm": 0.3467373549938202, "learning_rate": 0.00019445378151260504, "loss": 0.3454, "step": 28872 }, { "epoch": 16.130167597765364, "grad_norm": 0.4609588384628296, "learning_rate": 0.00019442577030812325, "loss": 0.4708, "step": 28873 }, { "epoch": 16.13072625698324, "grad_norm": 0.9235809445381165, "learning_rate": 0.00019439775910364145, "loss": 0.3322, "step": 28874 }, { "epoch": 16.131284916201118, "grad_norm": 0.6614897847175598, "learning_rate": 0.00019436974789915969, "loss": 0.377, "step": 28875 }, { "epoch": 16.131843575418994, "grad_norm": 0.42744559049606323, "learning_rate": 0.00019434173669467786, "loss": 0.422, "step": 28876 }, { "epoch": 16.13240223463687, "grad_norm": 0.4130760431289673, "learning_rate": 0.00019431372549019607, "loss": 0.4242, "step": 28877 }, { "epoch": 16.132960893854747, "grad_norm": 0.5075344443321228, "learning_rate": 0.00019428571428571428, "loss": 0.4315, "step": 28878 }, { "epoch": 16.133519553072627, "grad_norm": 0.8356727361679077, "learning_rate": 0.0001942577030812325, "loss": 0.3516, "step": 28879 }, { "epoch": 16.134078212290504, "grad_norm": 2.4321515560150146, "learning_rate": 0.00019422969187675072, "loss": 0.4694, "step": 28880 }, { "epoch": 16.13463687150838, "grad_norm": 1.457574486732483, "learning_rate": 0.0001942016806722689, "loss": 0.3689, "step": 28881 }, { "epoch": 16.135195530726257, "grad_norm": 0.3981468975543976, "learning_rate": 0.0001941736694677871, "loss": 0.3455, "step": 28882 }, { "epoch": 16.135754189944134, "grad_norm": 1.4101759195327759, "learning_rate": 0.00019414565826330533, "loss": 0.4785, "step": 28883 }, { "epoch": 16.13631284916201, "grad_norm": 0.7086668014526367, "learning_rate": 0.00019411764705882354, "loss": 0.4477, "step": 28884 }, { "epoch": 16.136871508379887, "grad_norm": 0.6071313619613647, "learning_rate": 0.00019408963585434175, "loss": 0.5448, "step": 28885 }, { "epoch": 16.137430167597767, "grad_norm": 0.3937363922595978, "learning_rate": 0.00019406162464985992, "loss": 0.382, "step": 28886 }, { "epoch": 16.137988826815644, "grad_norm": 0.34242311120033264, "learning_rate": 0.00019403361344537816, "loss": 0.3983, "step": 28887 }, { "epoch": 16.13854748603352, "grad_norm": 0.6401903629302979, "learning_rate": 0.00019400560224089636, "loss": 0.392, "step": 28888 }, { "epoch": 16.139106145251397, "grad_norm": 0.712629497051239, "learning_rate": 0.00019397759103641457, "loss": 0.3884, "step": 28889 }, { "epoch": 16.139664804469273, "grad_norm": 0.500311017036438, "learning_rate": 0.00019394957983193278, "loss": 0.367, "step": 28890 }, { "epoch": 16.14022346368715, "grad_norm": 0.5342960357666016, "learning_rate": 0.00019392156862745098, "loss": 0.4055, "step": 28891 }, { "epoch": 16.140782122905026, "grad_norm": 0.543070375919342, "learning_rate": 0.0001938935574229692, "loss": 0.3509, "step": 28892 }, { "epoch": 16.141340782122906, "grad_norm": 0.3987923264503479, "learning_rate": 0.0001938655462184874, "loss": 0.3665, "step": 28893 }, { "epoch": 16.141899441340783, "grad_norm": 0.5370165705680847, "learning_rate": 0.0001938375350140056, "loss": 0.4208, "step": 28894 }, { "epoch": 16.14245810055866, "grad_norm": 0.38203269243240356, "learning_rate": 0.00019380952380952383, "loss": 0.3763, "step": 28895 }, { "epoch": 16.143016759776536, "grad_norm": 0.4971391260623932, "learning_rate": 0.000193781512605042, "loss": 0.4206, "step": 28896 }, { "epoch": 16.143575418994413, "grad_norm": 1.1526046991348267, "learning_rate": 0.00019375350140056022, "loss": 0.3644, "step": 28897 }, { "epoch": 16.14413407821229, "grad_norm": 0.33874574303627014, "learning_rate": 0.00019372549019607842, "loss": 0.3382, "step": 28898 }, { "epoch": 16.144692737430166, "grad_norm": 0.35815566778182983, "learning_rate": 0.00019369747899159666, "loss": 0.3562, "step": 28899 }, { "epoch": 16.145251396648046, "grad_norm": 0.36037832498550415, "learning_rate": 0.00019366946778711486, "loss": 0.3481, "step": 28900 }, { "epoch": 16.145810055865923, "grad_norm": 0.6234013438224792, "learning_rate": 0.00019364145658263304, "loss": 0.4623, "step": 28901 }, { "epoch": 16.1463687150838, "grad_norm": 0.4487958252429962, "learning_rate": 0.00019361344537815125, "loss": 0.3994, "step": 28902 }, { "epoch": 16.146927374301676, "grad_norm": 0.45689263939857483, "learning_rate": 0.00019358543417366948, "loss": 0.5541, "step": 28903 }, { "epoch": 16.147486033519552, "grad_norm": 0.45454445481300354, "learning_rate": 0.0001935574229691877, "loss": 0.4564, "step": 28904 }, { "epoch": 16.14804469273743, "grad_norm": 0.592432975769043, "learning_rate": 0.0001935294117647059, "loss": 0.4554, "step": 28905 }, { "epoch": 16.14860335195531, "grad_norm": 0.8670316934585571, "learning_rate": 0.00019350140056022407, "loss": 0.3391, "step": 28906 }, { "epoch": 16.149162011173186, "grad_norm": 1.1139332056045532, "learning_rate": 0.0001934733893557423, "loss": 0.4316, "step": 28907 }, { "epoch": 16.149720670391062, "grad_norm": 0.34945961833000183, "learning_rate": 0.0001934453781512605, "loss": 0.2629, "step": 28908 }, { "epoch": 16.15027932960894, "grad_norm": 0.3951212167739868, "learning_rate": 0.00019341736694677872, "loss": 0.3899, "step": 28909 }, { "epoch": 16.150837988826815, "grad_norm": 0.34655168652534485, "learning_rate": 0.00019338935574229692, "loss": 0.3558, "step": 28910 }, { "epoch": 16.15139664804469, "grad_norm": 0.3671613335609436, "learning_rate": 0.00019336134453781513, "loss": 0.3536, "step": 28911 }, { "epoch": 16.15195530726257, "grad_norm": 0.3306417465209961, "learning_rate": 0.00019333333333333333, "loss": 0.3765, "step": 28912 }, { "epoch": 16.15251396648045, "grad_norm": 0.4065249264240265, "learning_rate": 0.00019330532212885154, "loss": 0.3667, "step": 28913 }, { "epoch": 16.153072625698325, "grad_norm": 0.4873636066913605, "learning_rate": 0.00019327731092436975, "loss": 0.361, "step": 28914 }, { "epoch": 16.1536312849162, "grad_norm": 0.42552927136421204, "learning_rate": 0.00019324929971988798, "loss": 0.5713, "step": 28915 }, { "epoch": 16.154189944134078, "grad_norm": 1.208526849746704, "learning_rate": 0.00019322128851540616, "loss": 0.3693, "step": 28916 }, { "epoch": 16.154748603351955, "grad_norm": 0.660973072052002, "learning_rate": 0.00019319327731092436, "loss": 0.3673, "step": 28917 }, { "epoch": 16.15530726256983, "grad_norm": 0.35808494687080383, "learning_rate": 0.00019316526610644257, "loss": 0.3344, "step": 28918 }, { "epoch": 16.155865921787708, "grad_norm": 0.38039055466651917, "learning_rate": 0.0001931372549019608, "loss": 0.4074, "step": 28919 }, { "epoch": 16.156424581005588, "grad_norm": 0.5032056570053101, "learning_rate": 0.000193109243697479, "loss": 0.4776, "step": 28920 }, { "epoch": 16.156983240223465, "grad_norm": 0.5298901200294495, "learning_rate": 0.0001930812324929972, "loss": 0.4434, "step": 28921 }, { "epoch": 16.15754189944134, "grad_norm": 0.6850790977478027, "learning_rate": 0.0001930532212885154, "loss": 0.3908, "step": 28922 }, { "epoch": 16.158100558659218, "grad_norm": 0.8807922601699829, "learning_rate": 0.00019302521008403363, "loss": 0.4267, "step": 28923 }, { "epoch": 16.158659217877094, "grad_norm": 0.4602443277835846, "learning_rate": 0.00019299719887955183, "loss": 0.4627, "step": 28924 }, { "epoch": 16.15921787709497, "grad_norm": 0.34916195273399353, "learning_rate": 0.00019296918767507004, "loss": 0.2844, "step": 28925 }, { "epoch": 16.159776536312847, "grad_norm": 0.4090387523174286, "learning_rate": 0.00019294117647058822, "loss": 0.5318, "step": 28926 }, { "epoch": 16.160335195530728, "grad_norm": 1.464445948600769, "learning_rate": 0.00019291316526610645, "loss": 0.3715, "step": 28927 }, { "epoch": 16.160893854748604, "grad_norm": 0.383338987827301, "learning_rate": 0.00019288515406162466, "loss": 0.4827, "step": 28928 }, { "epoch": 16.16145251396648, "grad_norm": 2.73823618888855, "learning_rate": 0.00019285714285714286, "loss": 0.443, "step": 28929 }, { "epoch": 16.162011173184357, "grad_norm": 1.5407174825668335, "learning_rate": 0.00019282913165266104, "loss": 0.3322, "step": 28930 }, { "epoch": 16.162569832402234, "grad_norm": 0.46490007638931274, "learning_rate": 0.00019280112044817928, "loss": 0.3855, "step": 28931 }, { "epoch": 16.16312849162011, "grad_norm": 0.4543697237968445, "learning_rate": 0.00019277310924369748, "loss": 0.4277, "step": 28932 }, { "epoch": 16.16368715083799, "grad_norm": 0.36964547634124756, "learning_rate": 0.0001927450980392157, "loss": 0.3994, "step": 28933 }, { "epoch": 16.164245810055867, "grad_norm": 0.533072292804718, "learning_rate": 0.0001927170868347339, "loss": 0.3803, "step": 28934 }, { "epoch": 16.164804469273744, "grad_norm": 0.471686989068985, "learning_rate": 0.0001926890756302521, "loss": 0.4216, "step": 28935 }, { "epoch": 16.16536312849162, "grad_norm": 0.4997316300868988, "learning_rate": 0.0001926610644257703, "loss": 0.3474, "step": 28936 }, { "epoch": 16.165921787709497, "grad_norm": 0.49094322323799133, "learning_rate": 0.0001926330532212885, "loss": 0.4135, "step": 28937 }, { "epoch": 16.166480446927373, "grad_norm": 0.43086546659469604, "learning_rate": 0.00019260504201680672, "loss": 0.5377, "step": 28938 }, { "epoch": 16.16703910614525, "grad_norm": 4.299013614654541, "learning_rate": 0.00019257703081232495, "loss": 0.4876, "step": 28939 }, { "epoch": 16.16759776536313, "grad_norm": 0.48508575558662415, "learning_rate": 0.00019254901960784313, "loss": 0.413, "step": 28940 }, { "epoch": 16.168156424581007, "grad_norm": 0.4109722375869751, "learning_rate": 0.00019252100840336134, "loss": 0.3512, "step": 28941 }, { "epoch": 16.168715083798883, "grad_norm": 0.4435434639453888, "learning_rate": 0.00019249299719887954, "loss": 0.3758, "step": 28942 }, { "epoch": 16.16927374301676, "grad_norm": 2.2694084644317627, "learning_rate": 0.00019246498599439777, "loss": 0.4886, "step": 28943 }, { "epoch": 16.169832402234636, "grad_norm": 0.3850485682487488, "learning_rate": 0.00019243697478991598, "loss": 0.38, "step": 28944 }, { "epoch": 16.170391061452513, "grad_norm": 0.5183414220809937, "learning_rate": 0.00019240896358543416, "loss": 0.4263, "step": 28945 }, { "epoch": 16.17094972067039, "grad_norm": 1.2390666007995605, "learning_rate": 0.00019238095238095237, "loss": 0.3359, "step": 28946 }, { "epoch": 16.17150837988827, "grad_norm": 0.6483945846557617, "learning_rate": 0.0001923529411764706, "loss": 0.4244, "step": 28947 }, { "epoch": 16.172067039106146, "grad_norm": 5.2322258949279785, "learning_rate": 0.0001923249299719888, "loss": 0.4502, "step": 28948 }, { "epoch": 16.172625698324023, "grad_norm": 0.7982333898544312, "learning_rate": 0.000192296918767507, "loss": 0.4099, "step": 28949 }, { "epoch": 16.1731843575419, "grad_norm": 0.4285476505756378, "learning_rate": 0.0001922689075630252, "loss": 0.4088, "step": 28950 }, { "epoch": 16.173743016759776, "grad_norm": 5.1269989013671875, "learning_rate": 0.00019224089635854342, "loss": 0.3385, "step": 28951 }, { "epoch": 16.174301675977652, "grad_norm": 1.214417576789856, "learning_rate": 0.00019221288515406163, "loss": 0.413, "step": 28952 }, { "epoch": 16.174860335195532, "grad_norm": 0.46083489060401917, "learning_rate": 0.00019218487394957983, "loss": 0.3782, "step": 28953 }, { "epoch": 16.17541899441341, "grad_norm": 0.366047203540802, "learning_rate": 0.00019215686274509807, "loss": 0.3492, "step": 28954 }, { "epoch": 16.175977653631286, "grad_norm": 3.1637935638427734, "learning_rate": 0.00019212885154061625, "loss": 0.3387, "step": 28955 }, { "epoch": 16.176536312849162, "grad_norm": 0.5342099666595459, "learning_rate": 0.00019210084033613445, "loss": 0.334, "step": 28956 }, { "epoch": 16.17709497206704, "grad_norm": 0.6804115176200867, "learning_rate": 0.00019207282913165266, "loss": 0.437, "step": 28957 }, { "epoch": 16.177653631284915, "grad_norm": 0.574478805065155, "learning_rate": 0.0001920448179271709, "loss": 0.4401, "step": 28958 }, { "epoch": 16.178212290502792, "grad_norm": 0.4467751383781433, "learning_rate": 0.0001920168067226891, "loss": 0.5054, "step": 28959 }, { "epoch": 16.178770949720672, "grad_norm": 0.35941118001937866, "learning_rate": 0.00019198879551820728, "loss": 0.396, "step": 28960 }, { "epoch": 16.17932960893855, "grad_norm": 0.3976753354072571, "learning_rate": 0.00019196078431372548, "loss": 0.3442, "step": 28961 }, { "epoch": 16.179888268156425, "grad_norm": 1.8833200931549072, "learning_rate": 0.00019193277310924372, "loss": 0.4503, "step": 28962 }, { "epoch": 16.1804469273743, "grad_norm": 0.5222572088241577, "learning_rate": 0.00019190476190476192, "loss": 0.4895, "step": 28963 }, { "epoch": 16.18100558659218, "grad_norm": 1.4167481660842896, "learning_rate": 0.00019187675070028013, "loss": 0.3876, "step": 28964 }, { "epoch": 16.181564245810055, "grad_norm": 0.4568377733230591, "learning_rate": 0.0001918487394957983, "loss": 0.4048, "step": 28965 }, { "epoch": 16.18212290502793, "grad_norm": 0.351442813873291, "learning_rate": 0.00019182072829131654, "loss": 0.3562, "step": 28966 }, { "epoch": 16.18268156424581, "grad_norm": 0.32675302028656006, "learning_rate": 0.00019179271708683475, "loss": 0.362, "step": 28967 }, { "epoch": 16.183240223463688, "grad_norm": 0.44293713569641113, "learning_rate": 0.00019176470588235295, "loss": 0.4205, "step": 28968 }, { "epoch": 16.183798882681565, "grad_norm": 0.36270028352737427, "learning_rate": 0.00019173669467787116, "loss": 0.4336, "step": 28969 }, { "epoch": 16.18435754189944, "grad_norm": 1.8359348773956299, "learning_rate": 0.00019170868347338936, "loss": 0.3976, "step": 28970 }, { "epoch": 16.184916201117318, "grad_norm": 0.5431550145149231, "learning_rate": 0.00019168067226890757, "loss": 0.452, "step": 28971 }, { "epoch": 16.185474860335194, "grad_norm": 0.4765489995479584, "learning_rate": 0.00019165266106442578, "loss": 0.4306, "step": 28972 }, { "epoch": 16.18603351955307, "grad_norm": 0.5705583095550537, "learning_rate": 0.00019162464985994398, "loss": 0.5865, "step": 28973 }, { "epoch": 16.18659217877095, "grad_norm": 0.6350952386856079, "learning_rate": 0.00019159663865546221, "loss": 0.3381, "step": 28974 }, { "epoch": 16.187150837988828, "grad_norm": 0.5249627232551575, "learning_rate": 0.0001915686274509804, "loss": 0.4851, "step": 28975 }, { "epoch": 16.187709497206704, "grad_norm": 3.483253002166748, "learning_rate": 0.0001915406162464986, "loss": 0.4001, "step": 28976 }, { "epoch": 16.18826815642458, "grad_norm": 0.6546013355255127, "learning_rate": 0.0001915126050420168, "loss": 0.4291, "step": 28977 }, { "epoch": 16.188826815642457, "grad_norm": 0.4770772457122803, "learning_rate": 0.00019148459383753504, "loss": 0.4826, "step": 28978 }, { "epoch": 16.189385474860334, "grad_norm": 0.6366468667984009, "learning_rate": 0.00019145658263305324, "loss": 0.4469, "step": 28979 }, { "epoch": 16.189944134078214, "grad_norm": 0.6755110621452332, "learning_rate": 0.00019142857142857142, "loss": 0.4744, "step": 28980 }, { "epoch": 16.19050279329609, "grad_norm": 0.8556031584739685, "learning_rate": 0.00019140056022408963, "loss": 0.5351, "step": 28981 }, { "epoch": 16.191061452513967, "grad_norm": 0.6915291547775269, "learning_rate": 0.00019137254901960786, "loss": 0.6405, "step": 28982 }, { "epoch": 16.191620111731844, "grad_norm": 0.6408063769340515, "learning_rate": 0.00019134453781512607, "loss": 0.6071, "step": 28983 }, { "epoch": 16.19217877094972, "grad_norm": 0.39270055294036865, "learning_rate": 0.00019131652661064425, "loss": 0.4022, "step": 28984 }, { "epoch": 16.192737430167597, "grad_norm": 0.4455922842025757, "learning_rate": 0.00019128851540616245, "loss": 0.4118, "step": 28985 }, { "epoch": 16.193296089385473, "grad_norm": 0.4017958343029022, "learning_rate": 0.0001912605042016807, "loss": 0.4036, "step": 28986 }, { "epoch": 16.193854748603353, "grad_norm": 0.4968282878398895, "learning_rate": 0.0001912324929971989, "loss": 0.4791, "step": 28987 }, { "epoch": 16.19441340782123, "grad_norm": 0.3548322021961212, "learning_rate": 0.0001912044817927171, "loss": 0.3629, "step": 28988 }, { "epoch": 16.194972067039107, "grad_norm": 0.3978058993816376, "learning_rate": 0.00019117647058823528, "loss": 0.408, "step": 28989 }, { "epoch": 16.195530726256983, "grad_norm": 0.4985060393810272, "learning_rate": 0.0001911484593837535, "loss": 0.4459, "step": 28990 }, { "epoch": 16.19608938547486, "grad_norm": 1.926979422569275, "learning_rate": 0.00019112044817927172, "loss": 0.478, "step": 28991 }, { "epoch": 16.196648044692736, "grad_norm": 0.41042885184288025, "learning_rate": 0.00019109243697478992, "loss": 0.2723, "step": 28992 }, { "epoch": 16.197206703910613, "grad_norm": 0.3781493604183197, "learning_rate": 0.00019106442577030813, "loss": 0.385, "step": 28993 }, { "epoch": 16.197765363128493, "grad_norm": 0.36626923084259033, "learning_rate": 0.00019103641456582633, "loss": 0.3516, "step": 28994 }, { "epoch": 16.19832402234637, "grad_norm": 0.45216286182403564, "learning_rate": 0.00019100840336134454, "loss": 0.4302, "step": 28995 }, { "epoch": 16.198882681564246, "grad_norm": 0.8124073147773743, "learning_rate": 0.00019098039215686275, "loss": 0.3794, "step": 28996 }, { "epoch": 16.199441340782123, "grad_norm": 0.7732542753219604, "learning_rate": 0.00019095238095238095, "loss": 0.3908, "step": 28997 }, { "epoch": 16.2, "grad_norm": 0.6467917561531067, "learning_rate": 0.00019092436974789919, "loss": 0.4249, "step": 28998 }, { "epoch": 16.200558659217876, "grad_norm": 0.3322466313838959, "learning_rate": 0.00019089635854341736, "loss": 0.3154, "step": 28999 }, { "epoch": 16.201117318435756, "grad_norm": 0.7633682489395142, "learning_rate": 0.00019086834733893557, "loss": 0.339, "step": 29000 }, { "epoch": 16.201117318435756, "eval_cer": 0.08562731170829105, "eval_loss": 0.3217138350009918, "eval_runtime": 55.4967, "eval_samples_per_second": 81.771, "eval_steps_per_second": 5.117, "eval_wer": 0.3395156559872629, "step": 29000 }, { "epoch": 16.201675977653633, "grad_norm": 0.49037250876426697, "learning_rate": 0.00019084033613445378, "loss": 0.425, "step": 29001 }, { "epoch": 16.20223463687151, "grad_norm": 0.3645758032798767, "learning_rate": 0.000190812324929972, "loss": 0.3603, "step": 29002 }, { "epoch": 16.202793296089386, "grad_norm": 0.41297295689582825, "learning_rate": 0.00019078431372549022, "loss": 0.4849, "step": 29003 }, { "epoch": 16.203351955307262, "grad_norm": 0.36511531472206116, "learning_rate": 0.0001907563025210084, "loss": 0.3893, "step": 29004 }, { "epoch": 16.20391061452514, "grad_norm": 0.5171051621437073, "learning_rate": 0.0001907282913165266, "loss": 0.3123, "step": 29005 }, { "epoch": 16.204469273743015, "grad_norm": 0.4663669466972351, "learning_rate": 0.00019070028011204483, "loss": 0.3906, "step": 29006 }, { "epoch": 16.205027932960895, "grad_norm": 0.49591711163520813, "learning_rate": 0.00019067226890756304, "loss": 0.4604, "step": 29007 }, { "epoch": 16.205586592178772, "grad_norm": 0.7694417834281921, "learning_rate": 0.00019064425770308125, "loss": 0.3802, "step": 29008 }, { "epoch": 16.20614525139665, "grad_norm": 1.1275218725204468, "learning_rate": 0.00019061624649859942, "loss": 0.5457, "step": 29009 }, { "epoch": 16.206703910614525, "grad_norm": 0.6767483949661255, "learning_rate": 0.00019058823529411766, "loss": 0.4056, "step": 29010 }, { "epoch": 16.2072625698324, "grad_norm": 1.1713223457336426, "learning_rate": 0.00019056022408963586, "loss": 0.32, "step": 29011 }, { "epoch": 16.20782122905028, "grad_norm": 0.41864219307899475, "learning_rate": 0.00019053221288515407, "loss": 0.4077, "step": 29012 }, { "epoch": 16.208379888268155, "grad_norm": 0.6184785962104797, "learning_rate": 0.00019050420168067228, "loss": 0.3934, "step": 29013 }, { "epoch": 16.208938547486035, "grad_norm": 0.449820339679718, "learning_rate": 0.00019047619047619048, "loss": 0.4662, "step": 29014 }, { "epoch": 16.20949720670391, "grad_norm": 0.6108324527740479, "learning_rate": 0.0001904481792717087, "loss": 0.6847, "step": 29015 }, { "epoch": 16.210055865921788, "grad_norm": 0.45040029287338257, "learning_rate": 0.0001904201680672269, "loss": 0.379, "step": 29016 }, { "epoch": 16.210614525139665, "grad_norm": 0.6352062821388245, "learning_rate": 0.0001903921568627451, "loss": 0.4382, "step": 29017 }, { "epoch": 16.21117318435754, "grad_norm": 0.6224572658538818, "learning_rate": 0.00019036414565826333, "loss": 0.4887, "step": 29018 }, { "epoch": 16.211731843575418, "grad_norm": 0.4349299967288971, "learning_rate": 0.0001903361344537815, "loss": 0.3016, "step": 29019 }, { "epoch": 16.212290502793294, "grad_norm": 0.5045689344406128, "learning_rate": 0.00019030812324929972, "loss": 0.4532, "step": 29020 }, { "epoch": 16.212849162011175, "grad_norm": 0.6894789338111877, "learning_rate": 0.00019028011204481792, "loss": 0.5491, "step": 29021 }, { "epoch": 16.21340782122905, "grad_norm": 0.5629291534423828, "learning_rate": 0.00019025210084033616, "loss": 0.3674, "step": 29022 }, { "epoch": 16.213966480446928, "grad_norm": 0.49713167548179626, "learning_rate": 0.00019022408963585436, "loss": 0.439, "step": 29023 }, { "epoch": 16.214525139664804, "grad_norm": 0.9506420493125916, "learning_rate": 0.00019019607843137254, "loss": 0.5599, "step": 29024 }, { "epoch": 16.21508379888268, "grad_norm": 0.39019033312797546, "learning_rate": 0.00019016806722689075, "loss": 0.4051, "step": 29025 }, { "epoch": 16.215642458100557, "grad_norm": 0.3452981114387512, "learning_rate": 0.00019014005602240898, "loss": 0.4062, "step": 29026 }, { "epoch": 16.216201117318437, "grad_norm": 1.3234254121780396, "learning_rate": 0.0001901120448179272, "loss": 0.3791, "step": 29027 }, { "epoch": 16.216759776536314, "grad_norm": 0.46408969163894653, "learning_rate": 0.0001900840336134454, "loss": 0.3053, "step": 29028 }, { "epoch": 16.21731843575419, "grad_norm": 0.6138445734977722, "learning_rate": 0.00019005602240896357, "loss": 0.2913, "step": 29029 }, { "epoch": 16.217877094972067, "grad_norm": 0.5210884809494019, "learning_rate": 0.0001900280112044818, "loss": 0.4327, "step": 29030 }, { "epoch": 16.218435754189944, "grad_norm": 0.4872722625732422, "learning_rate": 0.00019, "loss": 0.3336, "step": 29031 }, { "epoch": 16.21899441340782, "grad_norm": 0.3628700077533722, "learning_rate": 0.00018997198879551822, "loss": 0.3824, "step": 29032 }, { "epoch": 16.219553072625697, "grad_norm": 0.3308964967727661, "learning_rate": 0.00018994397759103642, "loss": 0.3566, "step": 29033 }, { "epoch": 16.220111731843577, "grad_norm": 0.918548047542572, "learning_rate": 0.00018991596638655463, "loss": 0.5195, "step": 29034 }, { "epoch": 16.220670391061454, "grad_norm": 0.7363218069076538, "learning_rate": 0.00018988795518207283, "loss": 0.3971, "step": 29035 }, { "epoch": 16.22122905027933, "grad_norm": 0.39213788509368896, "learning_rate": 0.00018985994397759104, "loss": 0.3311, "step": 29036 }, { "epoch": 16.221787709497207, "grad_norm": 0.42629334330558777, "learning_rate": 0.00018983193277310925, "loss": 0.4387, "step": 29037 }, { "epoch": 16.222346368715083, "grad_norm": 0.7424758672714233, "learning_rate": 0.00018980392156862745, "loss": 0.5691, "step": 29038 }, { "epoch": 16.22290502793296, "grad_norm": 0.7804065346717834, "learning_rate": 0.00018977591036414566, "loss": 0.441, "step": 29039 }, { "epoch": 16.223463687150836, "grad_norm": 0.43857377767562866, "learning_rate": 0.00018974789915966386, "loss": 0.4612, "step": 29040 }, { "epoch": 16.224022346368717, "grad_norm": 0.5992130637168884, "learning_rate": 0.00018971988795518207, "loss": 0.4271, "step": 29041 }, { "epoch": 16.224581005586593, "grad_norm": 0.4632066786289215, "learning_rate": 0.0001896918767507003, "loss": 0.4237, "step": 29042 }, { "epoch": 16.22513966480447, "grad_norm": 0.4995499551296234, "learning_rate": 0.00018966386554621848, "loss": 0.3592, "step": 29043 }, { "epoch": 16.225698324022346, "grad_norm": 0.40829551219940186, "learning_rate": 0.0001896358543417367, "loss": 0.3492, "step": 29044 }, { "epoch": 16.226256983240223, "grad_norm": 0.34807339310646057, "learning_rate": 0.0001896078431372549, "loss": 0.3914, "step": 29045 }, { "epoch": 16.2268156424581, "grad_norm": 5.844367027282715, "learning_rate": 0.00018957983193277313, "loss": 0.4812, "step": 29046 }, { "epoch": 16.227374301675976, "grad_norm": 0.4908343255519867, "learning_rate": 0.00018955182072829133, "loss": 0.4931, "step": 29047 }, { "epoch": 16.227932960893856, "grad_norm": 1.6199932098388672, "learning_rate": 0.0001895238095238095, "loss": 0.6624, "step": 29048 }, { "epoch": 16.228491620111733, "grad_norm": 0.5040088295936584, "learning_rate": 0.00018949579831932772, "loss": 0.4234, "step": 29049 }, { "epoch": 16.22905027932961, "grad_norm": 0.40048477053642273, "learning_rate": 0.00018946778711484595, "loss": 0.4307, "step": 29050 }, { "epoch": 16.229608938547486, "grad_norm": 0.46609658002853394, "learning_rate": 0.00018943977591036416, "loss": 0.5186, "step": 29051 }, { "epoch": 16.230167597765362, "grad_norm": 0.37821826338768005, "learning_rate": 0.00018941176470588236, "loss": 0.3089, "step": 29052 }, { "epoch": 16.23072625698324, "grad_norm": 0.5133246779441833, "learning_rate": 0.00018938375350140054, "loss": 0.5561, "step": 29053 }, { "epoch": 16.23128491620112, "grad_norm": 0.6027588844299316, "learning_rate": 0.00018935574229691878, "loss": 0.6304, "step": 29054 }, { "epoch": 16.231843575418996, "grad_norm": 2.0489094257354736, "learning_rate": 0.00018932773109243698, "loss": 0.4385, "step": 29055 }, { "epoch": 16.232402234636872, "grad_norm": 0.45070499181747437, "learning_rate": 0.0001892997198879552, "loss": 0.4004, "step": 29056 }, { "epoch": 16.23296089385475, "grad_norm": 0.32728439569473267, "learning_rate": 0.0001892717086834734, "loss": 0.3516, "step": 29057 }, { "epoch": 16.233519553072625, "grad_norm": 0.45389971137046814, "learning_rate": 0.0001892436974789916, "loss": 0.4836, "step": 29058 }, { "epoch": 16.234078212290502, "grad_norm": 2.1861369609832764, "learning_rate": 0.0001892156862745098, "loss": 0.3212, "step": 29059 }, { "epoch": 16.23463687150838, "grad_norm": 0.5019142031669617, "learning_rate": 0.000189187675070028, "loss": 0.3969, "step": 29060 }, { "epoch": 16.23519553072626, "grad_norm": 0.9010175466537476, "learning_rate": 0.00018915966386554622, "loss": 0.3848, "step": 29061 }, { "epoch": 16.235754189944135, "grad_norm": 0.3512330949306488, "learning_rate": 0.00018913165266106445, "loss": 0.3354, "step": 29062 }, { "epoch": 16.23631284916201, "grad_norm": 0.4336191713809967, "learning_rate": 0.00018910364145658263, "loss": 0.4292, "step": 29063 }, { "epoch": 16.23687150837989, "grad_norm": 0.4691496789455414, "learning_rate": 0.00018907563025210084, "loss": 0.4035, "step": 29064 }, { "epoch": 16.237430167597765, "grad_norm": 0.3892434537410736, "learning_rate": 0.00018904761904761904, "loss": 0.3894, "step": 29065 }, { "epoch": 16.23798882681564, "grad_norm": 0.4409516751766205, "learning_rate": 0.00018901960784313727, "loss": 0.3611, "step": 29066 }, { "epoch": 16.238547486033518, "grad_norm": 1.963951826095581, "learning_rate": 0.00018899159663865548, "loss": 0.376, "step": 29067 }, { "epoch": 16.239106145251398, "grad_norm": 0.48292070627212524, "learning_rate": 0.00018896358543417366, "loss": 0.4659, "step": 29068 }, { "epoch": 16.239664804469275, "grad_norm": 0.4007108211517334, "learning_rate": 0.00018893557422969187, "loss": 0.4485, "step": 29069 }, { "epoch": 16.24022346368715, "grad_norm": 0.32865309715270996, "learning_rate": 0.0001889075630252101, "loss": 0.2822, "step": 29070 }, { "epoch": 16.240782122905028, "grad_norm": 2.792288064956665, "learning_rate": 0.0001888795518207283, "loss": 0.531, "step": 29071 }, { "epoch": 16.241340782122904, "grad_norm": 1.0920830965042114, "learning_rate": 0.0001888515406162465, "loss": 0.4286, "step": 29072 }, { "epoch": 16.24189944134078, "grad_norm": 0.43305540084838867, "learning_rate": 0.0001888235294117647, "loss": 0.4685, "step": 29073 }, { "epoch": 16.242458100558657, "grad_norm": 0.45835575461387634, "learning_rate": 0.00018879551820728292, "loss": 0.4401, "step": 29074 }, { "epoch": 16.243016759776538, "grad_norm": 0.45942288637161255, "learning_rate": 0.00018876750700280113, "loss": 0.3581, "step": 29075 }, { "epoch": 16.243575418994414, "grad_norm": 1.5817164182662964, "learning_rate": 0.00018873949579831933, "loss": 0.3596, "step": 29076 }, { "epoch": 16.24413407821229, "grad_norm": 0.3791969120502472, "learning_rate": 0.00018871148459383754, "loss": 0.317, "step": 29077 }, { "epoch": 16.244692737430167, "grad_norm": 0.6134214401245117, "learning_rate": 0.00018868347338935575, "loss": 0.4473, "step": 29078 }, { "epoch": 16.245251396648044, "grad_norm": 0.9240784645080566, "learning_rate": 0.00018865546218487395, "loss": 0.4651, "step": 29079 }, { "epoch": 16.24581005586592, "grad_norm": 0.42309319972991943, "learning_rate": 0.00018862745098039216, "loss": 0.3474, "step": 29080 }, { "epoch": 16.2463687150838, "grad_norm": 0.6950482726097107, "learning_rate": 0.00018859943977591036, "loss": 0.45, "step": 29081 }, { "epoch": 16.246927374301677, "grad_norm": 0.48081183433532715, "learning_rate": 0.0001885714285714286, "loss": 0.2722, "step": 29082 }, { "epoch": 16.247486033519554, "grad_norm": 0.9204301238059998, "learning_rate": 0.00018854341736694678, "loss": 0.4694, "step": 29083 }, { "epoch": 16.24804469273743, "grad_norm": 0.5251319408416748, "learning_rate": 0.00018851540616246498, "loss": 0.4333, "step": 29084 }, { "epoch": 16.248603351955307, "grad_norm": 1.100652813911438, "learning_rate": 0.0001884873949579832, "loss": 0.5262, "step": 29085 }, { "epoch": 16.249162011173183, "grad_norm": 1.8652902841567993, "learning_rate": 0.00018845938375350142, "loss": 0.4302, "step": 29086 }, { "epoch": 16.24972067039106, "grad_norm": 1.2497718334197998, "learning_rate": 0.00018843137254901963, "loss": 0.404, "step": 29087 }, { "epoch": 16.25027932960894, "grad_norm": 0.4833080768585205, "learning_rate": 0.0001884033613445378, "loss": 0.4255, "step": 29088 }, { "epoch": 16.250837988826817, "grad_norm": 0.5544809699058533, "learning_rate": 0.000188375350140056, "loss": 0.365, "step": 29089 }, { "epoch": 16.251396648044693, "grad_norm": 0.3277098536491394, "learning_rate": 0.00018834733893557425, "loss": 0.3109, "step": 29090 }, { "epoch": 16.25195530726257, "grad_norm": 2.0763208866119385, "learning_rate": 0.00018831932773109245, "loss": 0.3628, "step": 29091 }, { "epoch": 16.252513966480446, "grad_norm": 5.025925159454346, "learning_rate": 0.00018829131652661066, "loss": 0.347, "step": 29092 }, { "epoch": 16.253072625698323, "grad_norm": 0.4993504583835602, "learning_rate": 0.00018826330532212884, "loss": 0.4132, "step": 29093 }, { "epoch": 16.2536312849162, "grad_norm": 0.3727685213088989, "learning_rate": 0.00018823529411764707, "loss": 0.3961, "step": 29094 }, { "epoch": 16.25418994413408, "grad_norm": 0.5742563605308533, "learning_rate": 0.00018820728291316528, "loss": 0.4418, "step": 29095 }, { "epoch": 16.254748603351956, "grad_norm": 0.35151973366737366, "learning_rate": 0.00018817927170868348, "loss": 0.4017, "step": 29096 }, { "epoch": 16.255307262569833, "grad_norm": 0.515477180480957, "learning_rate": 0.00018815126050420166, "loss": 0.5386, "step": 29097 }, { "epoch": 16.25586592178771, "grad_norm": 0.4228178858757019, "learning_rate": 0.0001881232492997199, "loss": 0.3413, "step": 29098 }, { "epoch": 16.256424581005586, "grad_norm": 0.40588104724884033, "learning_rate": 0.0001880952380952381, "loss": 0.3689, "step": 29099 }, { "epoch": 16.256983240223462, "grad_norm": 1.015671730041504, "learning_rate": 0.0001880672268907563, "loss": 0.3922, "step": 29100 }, { "epoch": 16.257541899441343, "grad_norm": 0.3605565130710602, "learning_rate": 0.0001880392156862745, "loss": 0.3401, "step": 29101 }, { "epoch": 16.25810055865922, "grad_norm": 0.9874143600463867, "learning_rate": 0.00018801120448179272, "loss": 0.4106, "step": 29102 }, { "epoch": 16.258659217877096, "grad_norm": 0.4626317620277405, "learning_rate": 0.00018798319327731092, "loss": 0.3945, "step": 29103 }, { "epoch": 16.259217877094972, "grad_norm": 0.3932786285877228, "learning_rate": 0.00018795518207282913, "loss": 0.4554, "step": 29104 }, { "epoch": 16.25977653631285, "grad_norm": 0.5947979688644409, "learning_rate": 0.00018792717086834734, "loss": 0.4747, "step": 29105 }, { "epoch": 16.260335195530725, "grad_norm": 0.45465967059135437, "learning_rate": 0.00018789915966386557, "loss": 0.4238, "step": 29106 }, { "epoch": 16.260893854748602, "grad_norm": 0.38727518916130066, "learning_rate": 0.00018787114845938375, "loss": 0.3378, "step": 29107 }, { "epoch": 16.261452513966482, "grad_norm": 0.546984076499939, "learning_rate": 0.00018784313725490195, "loss": 0.3193, "step": 29108 }, { "epoch": 16.26201117318436, "grad_norm": 0.6047913432121277, "learning_rate": 0.00018781512605042016, "loss": 0.3548, "step": 29109 }, { "epoch": 16.262569832402235, "grad_norm": 0.5609403848648071, "learning_rate": 0.0001877871148459384, "loss": 0.44, "step": 29110 }, { "epoch": 16.26312849162011, "grad_norm": 0.3945479393005371, "learning_rate": 0.0001877591036414566, "loss": 0.428, "step": 29111 }, { "epoch": 16.26368715083799, "grad_norm": 3.5814199447631836, "learning_rate": 0.00018773109243697478, "loss": 0.3967, "step": 29112 }, { "epoch": 16.264245810055865, "grad_norm": 0.5166468024253845, "learning_rate": 0.00018770308123249298, "loss": 0.4152, "step": 29113 }, { "epoch": 16.26480446927374, "grad_norm": 0.6188220381736755, "learning_rate": 0.00018767507002801122, "loss": 0.4333, "step": 29114 }, { "epoch": 16.26536312849162, "grad_norm": 0.43525469303131104, "learning_rate": 0.00018764705882352942, "loss": 0.3802, "step": 29115 }, { "epoch": 16.265921787709498, "grad_norm": 0.6135135889053345, "learning_rate": 0.00018761904761904763, "loss": 0.4858, "step": 29116 }, { "epoch": 16.266480446927375, "grad_norm": 1.1723921298980713, "learning_rate": 0.0001875910364145658, "loss": 0.4417, "step": 29117 }, { "epoch": 16.26703910614525, "grad_norm": 0.39319777488708496, "learning_rate": 0.00018756302521008404, "loss": 0.4916, "step": 29118 }, { "epoch": 16.267597765363128, "grad_norm": 0.5077816247940063, "learning_rate": 0.00018753501400560225, "loss": 0.3119, "step": 29119 }, { "epoch": 16.268156424581004, "grad_norm": 0.5423672199249268, "learning_rate": 0.00018750700280112045, "loss": 0.3795, "step": 29120 }, { "epoch": 16.26871508379888, "grad_norm": 0.5061722993850708, "learning_rate": 0.00018747899159663866, "loss": 0.4888, "step": 29121 }, { "epoch": 16.26927374301676, "grad_norm": 16.067617416381836, "learning_rate": 0.00018745098039215686, "loss": 0.3969, "step": 29122 }, { "epoch": 16.269832402234638, "grad_norm": 0.3667023777961731, "learning_rate": 0.00018742296918767507, "loss": 0.3164, "step": 29123 }, { "epoch": 16.270391061452514, "grad_norm": 0.3833708167076111, "learning_rate": 0.00018739495798319328, "loss": 0.3643, "step": 29124 }, { "epoch": 16.27094972067039, "grad_norm": 0.42458459734916687, "learning_rate": 0.00018736694677871148, "loss": 0.3894, "step": 29125 }, { "epoch": 16.271508379888267, "grad_norm": 0.8859478235244751, "learning_rate": 0.00018733893557422972, "loss": 0.4356, "step": 29126 }, { "epoch": 16.272067039106144, "grad_norm": 0.3674730360507965, "learning_rate": 0.0001873109243697479, "loss": 0.3278, "step": 29127 }, { "epoch": 16.272625698324024, "grad_norm": 0.3688463568687439, "learning_rate": 0.0001872829131652661, "loss": 0.4002, "step": 29128 }, { "epoch": 16.2731843575419, "grad_norm": 2.6958277225494385, "learning_rate": 0.0001872549019607843, "loss": 0.5277, "step": 29129 }, { "epoch": 16.273743016759777, "grad_norm": 0.4835015535354614, "learning_rate": 0.00018722689075630254, "loss": 0.5684, "step": 29130 }, { "epoch": 16.274301675977654, "grad_norm": 0.46925756335258484, "learning_rate": 0.00018719887955182075, "loss": 0.5322, "step": 29131 }, { "epoch": 16.27486033519553, "grad_norm": 0.8536108136177063, "learning_rate": 0.00018717086834733892, "loss": 0.3429, "step": 29132 }, { "epoch": 16.275418994413407, "grad_norm": 0.42519521713256836, "learning_rate": 0.00018714285714285713, "loss": 0.3869, "step": 29133 }, { "epoch": 16.275977653631283, "grad_norm": 0.5542572140693665, "learning_rate": 0.00018711484593837536, "loss": 0.5038, "step": 29134 }, { "epoch": 16.276536312849164, "grad_norm": 0.4148680567741394, "learning_rate": 0.00018708683473389357, "loss": 0.4158, "step": 29135 }, { "epoch": 16.27709497206704, "grad_norm": 0.39515072107315063, "learning_rate": 0.00018705882352941178, "loss": 0.3911, "step": 29136 }, { "epoch": 16.277653631284917, "grad_norm": 0.637635350227356, "learning_rate": 0.00018703081232492995, "loss": 0.3914, "step": 29137 }, { "epoch": 16.278212290502793, "grad_norm": 0.5663643479347229, "learning_rate": 0.0001870028011204482, "loss": 0.3522, "step": 29138 }, { "epoch": 16.27877094972067, "grad_norm": 0.47910791635513306, "learning_rate": 0.0001869747899159664, "loss": 0.412, "step": 29139 }, { "epoch": 16.279329608938546, "grad_norm": 0.3700598478317261, "learning_rate": 0.0001869467787114846, "loss": 0.4322, "step": 29140 }, { "epoch": 16.279888268156423, "grad_norm": 0.4291694760322571, "learning_rate": 0.0001869187675070028, "loss": 0.5087, "step": 29141 }, { "epoch": 16.280446927374303, "grad_norm": 0.4926217198371887, "learning_rate": 0.000186890756302521, "loss": 0.3972, "step": 29142 }, { "epoch": 16.28100558659218, "grad_norm": 0.39969736337661743, "learning_rate": 0.00018686274509803922, "loss": 0.4101, "step": 29143 }, { "epoch": 16.281564245810056, "grad_norm": 0.3326007127761841, "learning_rate": 0.00018683473389355742, "loss": 0.3498, "step": 29144 }, { "epoch": 16.282122905027933, "grad_norm": 0.43563219904899597, "learning_rate": 0.00018680672268907563, "loss": 0.388, "step": 29145 }, { "epoch": 16.28268156424581, "grad_norm": 0.3399113118648529, "learning_rate": 0.00018677871148459386, "loss": 0.3723, "step": 29146 }, { "epoch": 16.283240223463686, "grad_norm": 0.5271950960159302, "learning_rate": 0.00018675070028011204, "loss": 0.3757, "step": 29147 }, { "epoch": 16.283798882681563, "grad_norm": 0.4606555998325348, "learning_rate": 0.00018672268907563025, "loss": 0.4286, "step": 29148 }, { "epoch": 16.284357541899443, "grad_norm": 0.49080440402030945, "learning_rate": 0.00018669467787114845, "loss": 0.3785, "step": 29149 }, { "epoch": 16.28491620111732, "grad_norm": 0.8866600394248962, "learning_rate": 0.0001866666666666667, "loss": 0.4363, "step": 29150 }, { "epoch": 16.285474860335196, "grad_norm": 0.8600183725357056, "learning_rate": 0.00018663865546218487, "loss": 0.4588, "step": 29151 }, { "epoch": 16.286033519553072, "grad_norm": 0.3469720780849457, "learning_rate": 0.00018661064425770307, "loss": 0.421, "step": 29152 }, { "epoch": 16.28659217877095, "grad_norm": 0.44809409976005554, "learning_rate": 0.00018658263305322128, "loss": 0.3602, "step": 29153 }, { "epoch": 16.287150837988825, "grad_norm": 1.6205464601516724, "learning_rate": 0.0001865546218487395, "loss": 0.5199, "step": 29154 }, { "epoch": 16.287709497206706, "grad_norm": 0.6207685470581055, "learning_rate": 0.00018652661064425772, "loss": 0.3592, "step": 29155 }, { "epoch": 16.288268156424582, "grad_norm": 0.4336671531200409, "learning_rate": 0.0001864985994397759, "loss": 0.3146, "step": 29156 }, { "epoch": 16.28882681564246, "grad_norm": 0.504715085029602, "learning_rate": 0.0001864705882352941, "loss": 0.4937, "step": 29157 }, { "epoch": 16.289385474860335, "grad_norm": 0.930429995059967, "learning_rate": 0.00018644257703081233, "loss": 0.4622, "step": 29158 }, { "epoch": 16.289944134078212, "grad_norm": 0.38197097182273865, "learning_rate": 0.00018641456582633054, "loss": 0.4488, "step": 29159 }, { "epoch": 16.29050279329609, "grad_norm": 0.36801236867904663, "learning_rate": 0.00018638655462184875, "loss": 0.3556, "step": 29160 }, { "epoch": 16.291061452513965, "grad_norm": 0.38204339146614075, "learning_rate": 0.00018635854341736693, "loss": 0.3351, "step": 29161 }, { "epoch": 16.291620111731845, "grad_norm": 0.47354158759117126, "learning_rate": 0.00018633053221288516, "loss": 0.4342, "step": 29162 }, { "epoch": 16.29217877094972, "grad_norm": 0.5487963557243347, "learning_rate": 0.00018630252100840336, "loss": 0.535, "step": 29163 }, { "epoch": 16.2927374301676, "grad_norm": 0.4645974636077881, "learning_rate": 0.00018627450980392157, "loss": 0.3823, "step": 29164 }, { "epoch": 16.293296089385475, "grad_norm": 0.6940818428993225, "learning_rate": 0.00018624649859943978, "loss": 0.4547, "step": 29165 }, { "epoch": 16.29385474860335, "grad_norm": 0.3313988745212555, "learning_rate": 0.00018621848739495798, "loss": 0.3685, "step": 29166 }, { "epoch": 16.294413407821228, "grad_norm": 0.542900025844574, "learning_rate": 0.0001861904761904762, "loss": 0.3499, "step": 29167 }, { "epoch": 16.294972067039105, "grad_norm": 0.3856269419193268, "learning_rate": 0.0001861624649859944, "loss": 0.4374, "step": 29168 }, { "epoch": 16.295530726256985, "grad_norm": 0.5980980396270752, "learning_rate": 0.0001861344537815126, "loss": 0.5256, "step": 29169 }, { "epoch": 16.29608938547486, "grad_norm": 0.4706897437572479, "learning_rate": 0.00018610644257703083, "loss": 0.4057, "step": 29170 }, { "epoch": 16.296648044692738, "grad_norm": 0.3908962309360504, "learning_rate": 0.000186078431372549, "loss": 0.3912, "step": 29171 }, { "epoch": 16.297206703910614, "grad_norm": 5.841452598571777, "learning_rate": 0.00018605042016806722, "loss": 0.5153, "step": 29172 }, { "epoch": 16.29776536312849, "grad_norm": 0.48757871985435486, "learning_rate": 0.00018602240896358542, "loss": 0.6541, "step": 29173 }, { "epoch": 16.298324022346367, "grad_norm": 0.7620923519134521, "learning_rate": 0.00018599439775910366, "loss": 0.3578, "step": 29174 }, { "epoch": 16.298882681564244, "grad_norm": 0.33920931816101074, "learning_rate": 0.00018596638655462186, "loss": 0.272, "step": 29175 }, { "epoch": 16.299441340782124, "grad_norm": 1.5870195627212524, "learning_rate": 0.00018593837535014004, "loss": 0.3656, "step": 29176 }, { "epoch": 16.3, "grad_norm": 0.9805966019630432, "learning_rate": 0.00018591036414565825, "loss": 0.3682, "step": 29177 }, { "epoch": 16.300558659217877, "grad_norm": 0.379983127117157, "learning_rate": 0.00018588235294117648, "loss": 0.3775, "step": 29178 }, { "epoch": 16.301117318435754, "grad_norm": 0.6545946002006531, "learning_rate": 0.0001858543417366947, "loss": 0.541, "step": 29179 }, { "epoch": 16.30167597765363, "grad_norm": 0.37663015723228455, "learning_rate": 0.0001858263305322129, "loss": 0.3774, "step": 29180 }, { "epoch": 16.302234636871507, "grad_norm": 0.3958780765533447, "learning_rate": 0.00018579831932773107, "loss": 0.4102, "step": 29181 }, { "epoch": 16.302793296089387, "grad_norm": 0.5670944452285767, "learning_rate": 0.0001857703081232493, "loss": 0.3944, "step": 29182 }, { "epoch": 16.303351955307264, "grad_norm": 0.4776594042778015, "learning_rate": 0.0001857422969187675, "loss": 0.3873, "step": 29183 }, { "epoch": 16.30391061452514, "grad_norm": 0.4428180754184723, "learning_rate": 0.00018571428571428572, "loss": 0.4654, "step": 29184 }, { "epoch": 16.304469273743017, "grad_norm": 0.5051175355911255, "learning_rate": 0.00018568627450980395, "loss": 0.4051, "step": 29185 }, { "epoch": 16.305027932960893, "grad_norm": 0.39730992913246155, "learning_rate": 0.00018565826330532213, "loss": 0.3407, "step": 29186 }, { "epoch": 16.30558659217877, "grad_norm": 0.4471661448478699, "learning_rate": 0.00018563025210084034, "loss": 0.4246, "step": 29187 }, { "epoch": 16.306145251396647, "grad_norm": 0.6260039210319519, "learning_rate": 0.00018560224089635854, "loss": 0.4354, "step": 29188 }, { "epoch": 16.306703910614527, "grad_norm": 0.4061700403690338, "learning_rate": 0.00018557422969187677, "loss": 0.3366, "step": 29189 }, { "epoch": 16.307262569832403, "grad_norm": 0.8906798362731934, "learning_rate": 0.00018554621848739498, "loss": 0.465, "step": 29190 }, { "epoch": 16.30782122905028, "grad_norm": 0.3904813826084137, "learning_rate": 0.00018551820728291316, "loss": 0.4831, "step": 29191 }, { "epoch": 16.308379888268156, "grad_norm": 0.3188614547252655, "learning_rate": 0.00018549019607843137, "loss": 0.378, "step": 29192 }, { "epoch": 16.308938547486033, "grad_norm": 0.4340461790561676, "learning_rate": 0.0001854621848739496, "loss": 0.3103, "step": 29193 }, { "epoch": 16.30949720670391, "grad_norm": 0.5216889977455139, "learning_rate": 0.0001854341736694678, "loss": 0.4056, "step": 29194 }, { "epoch": 16.310055865921786, "grad_norm": 0.40086278319358826, "learning_rate": 0.000185406162464986, "loss": 0.3585, "step": 29195 }, { "epoch": 16.310614525139666, "grad_norm": 0.42081013321876526, "learning_rate": 0.0001853781512605042, "loss": 0.3651, "step": 29196 }, { "epoch": 16.311173184357543, "grad_norm": 0.4767129123210907, "learning_rate": 0.00018535014005602242, "loss": 0.3725, "step": 29197 }, { "epoch": 16.31173184357542, "grad_norm": 0.5047635436058044, "learning_rate": 0.00018532212885154063, "loss": 0.4219, "step": 29198 }, { "epoch": 16.312290502793296, "grad_norm": 0.3997997045516968, "learning_rate": 0.00018529411764705883, "loss": 0.4053, "step": 29199 }, { "epoch": 16.312849162011172, "grad_norm": 0.4263671040534973, "learning_rate": 0.00018526610644257704, "loss": 0.3475, "step": 29200 }, { "epoch": 16.31340782122905, "grad_norm": 0.4201339781284332, "learning_rate": 0.00018523809523809525, "loss": 0.3649, "step": 29201 }, { "epoch": 16.31396648044693, "grad_norm": 0.4680660665035248, "learning_rate": 0.00018521008403361345, "loss": 0.3479, "step": 29202 }, { "epoch": 16.314525139664806, "grad_norm": 0.4918220043182373, "learning_rate": 0.00018518207282913166, "loss": 0.4292, "step": 29203 }, { "epoch": 16.315083798882682, "grad_norm": 0.6135198473930359, "learning_rate": 0.00018515406162464986, "loss": 0.4139, "step": 29204 }, { "epoch": 16.31564245810056, "grad_norm": 0.4507567882537842, "learning_rate": 0.00018512605042016807, "loss": 0.3462, "step": 29205 }, { "epoch": 16.316201117318435, "grad_norm": 0.33337104320526123, "learning_rate": 0.00018509803921568628, "loss": 0.3737, "step": 29206 }, { "epoch": 16.316759776536312, "grad_norm": 0.4143950641155243, "learning_rate": 0.00018507002801120448, "loss": 0.3631, "step": 29207 }, { "epoch": 16.31731843575419, "grad_norm": 0.39553341269493103, "learning_rate": 0.0001850420168067227, "loss": 0.3884, "step": 29208 }, { "epoch": 16.31787709497207, "grad_norm": 0.4273497462272644, "learning_rate": 0.00018501400560224092, "loss": 0.4903, "step": 29209 }, { "epoch": 16.318435754189945, "grad_norm": 0.8866323232650757, "learning_rate": 0.0001849859943977591, "loss": 0.5057, "step": 29210 }, { "epoch": 16.31899441340782, "grad_norm": 0.5312885642051697, "learning_rate": 0.0001849579831932773, "loss": 0.4901, "step": 29211 }, { "epoch": 16.3195530726257, "grad_norm": 0.3801111876964569, "learning_rate": 0.0001849299719887955, "loss": 0.3127, "step": 29212 }, { "epoch": 16.320111731843575, "grad_norm": 0.42810311913490295, "learning_rate": 0.00018490196078431375, "loss": 0.5119, "step": 29213 }, { "epoch": 16.32067039106145, "grad_norm": 0.6221145987510681, "learning_rate": 0.00018487394957983195, "loss": 0.3868, "step": 29214 }, { "epoch": 16.321229050279328, "grad_norm": 2.6309096813201904, "learning_rate": 0.00018484593837535013, "loss": 0.4351, "step": 29215 }, { "epoch": 16.321787709497208, "grad_norm": 0.46629124879837036, "learning_rate": 0.00018481792717086834, "loss": 0.4432, "step": 29216 }, { "epoch": 16.322346368715085, "grad_norm": 0.4800971746444702, "learning_rate": 0.00018478991596638657, "loss": 0.3808, "step": 29217 }, { "epoch": 16.32290502793296, "grad_norm": 0.45208385586738586, "learning_rate": 0.00018476190476190478, "loss": 0.3619, "step": 29218 }, { "epoch": 16.323463687150838, "grad_norm": 0.36703911423683167, "learning_rate": 0.00018473389355742298, "loss": 0.3848, "step": 29219 }, { "epoch": 16.324022346368714, "grad_norm": 0.38982218503952026, "learning_rate": 0.00018470588235294116, "loss": 0.4822, "step": 29220 }, { "epoch": 16.32458100558659, "grad_norm": 0.3825012147426605, "learning_rate": 0.0001846778711484594, "loss": 0.3929, "step": 29221 }, { "epoch": 16.325139664804468, "grad_norm": 0.526433527469635, "learning_rate": 0.0001846498599439776, "loss": 0.4149, "step": 29222 }, { "epoch": 16.325698324022348, "grad_norm": 0.3424449563026428, "learning_rate": 0.0001846218487394958, "loss": 0.5157, "step": 29223 }, { "epoch": 16.326256983240224, "grad_norm": 0.397943913936615, "learning_rate": 0.000184593837535014, "loss": 0.4104, "step": 29224 }, { "epoch": 16.3268156424581, "grad_norm": 0.5023960471153259, "learning_rate": 0.00018456582633053222, "loss": 0.3657, "step": 29225 }, { "epoch": 16.327374301675977, "grad_norm": 3.254836082458496, "learning_rate": 0.00018453781512605042, "loss": 0.4406, "step": 29226 }, { "epoch": 16.327932960893854, "grad_norm": 1.1098432540893555, "learning_rate": 0.00018450980392156863, "loss": 0.6464, "step": 29227 }, { "epoch": 16.32849162011173, "grad_norm": 0.46687057614326477, "learning_rate": 0.00018448179271708684, "loss": 0.4494, "step": 29228 }, { "epoch": 16.32905027932961, "grad_norm": 0.5460361838340759, "learning_rate": 0.00018445378151260507, "loss": 0.4602, "step": 29229 }, { "epoch": 16.329608938547487, "grad_norm": 0.44901618361473083, "learning_rate": 0.00018442577030812325, "loss": 0.3199, "step": 29230 }, { "epoch": 16.330167597765364, "grad_norm": 0.40359994769096375, "learning_rate": 0.00018439775910364145, "loss": 0.3447, "step": 29231 }, { "epoch": 16.33072625698324, "grad_norm": 3.2685322761535645, "learning_rate": 0.00018436974789915966, "loss": 0.44, "step": 29232 }, { "epoch": 16.331284916201117, "grad_norm": 0.3578014671802521, "learning_rate": 0.0001843417366946779, "loss": 0.3838, "step": 29233 }, { "epoch": 16.331843575418993, "grad_norm": 0.5542742609977722, "learning_rate": 0.0001843137254901961, "loss": 0.4238, "step": 29234 }, { "epoch": 16.33240223463687, "grad_norm": 0.3870325982570648, "learning_rate": 0.00018428571428571428, "loss": 0.4147, "step": 29235 }, { "epoch": 16.33296089385475, "grad_norm": 0.563289225101471, "learning_rate": 0.00018425770308123248, "loss": 0.4619, "step": 29236 }, { "epoch": 16.333519553072627, "grad_norm": 0.42293792963027954, "learning_rate": 0.00018422969187675072, "loss": 0.3667, "step": 29237 }, { "epoch": 16.334078212290503, "grad_norm": 0.906248927116394, "learning_rate": 0.00018420168067226892, "loss": 0.4891, "step": 29238 }, { "epoch": 16.33463687150838, "grad_norm": 0.4047188460826874, "learning_rate": 0.00018417366946778713, "loss": 0.3169, "step": 29239 }, { "epoch": 16.335195530726256, "grad_norm": 0.6282626390457153, "learning_rate": 0.0001841456582633053, "loss": 0.3237, "step": 29240 }, { "epoch": 16.335754189944133, "grad_norm": 0.6191372275352478, "learning_rate": 0.00018411764705882354, "loss": 0.3981, "step": 29241 }, { "epoch": 16.33631284916201, "grad_norm": 0.9407033920288086, "learning_rate": 0.00018408963585434175, "loss": 0.4274, "step": 29242 }, { "epoch": 16.33687150837989, "grad_norm": 0.3497813940048218, "learning_rate": 0.00018406162464985995, "loss": 0.4113, "step": 29243 }, { "epoch": 16.337430167597766, "grad_norm": 0.4571531116962433, "learning_rate": 0.00018403361344537816, "loss": 0.3937, "step": 29244 }, { "epoch": 16.337988826815643, "grad_norm": 0.4238032102584839, "learning_rate": 0.00018400560224089636, "loss": 0.3561, "step": 29245 }, { "epoch": 16.33854748603352, "grad_norm": 0.6335416436195374, "learning_rate": 0.00018397759103641457, "loss": 0.55, "step": 29246 }, { "epoch": 16.339106145251396, "grad_norm": 1.5848215818405151, "learning_rate": 0.00018394957983193278, "loss": 0.4107, "step": 29247 }, { "epoch": 16.339664804469272, "grad_norm": 0.47834599018096924, "learning_rate": 0.00018392156862745098, "loss": 0.3934, "step": 29248 }, { "epoch": 16.340223463687153, "grad_norm": 3.7983195781707764, "learning_rate": 0.00018389355742296922, "loss": 0.5218, "step": 29249 }, { "epoch": 16.34078212290503, "grad_norm": 0.4493592381477356, "learning_rate": 0.0001838655462184874, "loss": 0.5024, "step": 29250 }, { "epoch": 16.341340782122906, "grad_norm": 0.41889163851737976, "learning_rate": 0.0001838375350140056, "loss": 0.3673, "step": 29251 }, { "epoch": 16.341899441340782, "grad_norm": 0.42403653264045715, "learning_rate": 0.0001838095238095238, "loss": 0.3908, "step": 29252 }, { "epoch": 16.34245810055866, "grad_norm": 0.44208356738090515, "learning_rate": 0.00018378151260504204, "loss": 0.4522, "step": 29253 }, { "epoch": 16.343016759776535, "grad_norm": 0.5989401340484619, "learning_rate": 0.00018375350140056025, "loss": 0.38, "step": 29254 }, { "epoch": 16.343575418994412, "grad_norm": 0.31500527262687683, "learning_rate": 0.00018372549019607842, "loss": 0.3827, "step": 29255 }, { "epoch": 16.344134078212292, "grad_norm": 1.2640317678451538, "learning_rate": 0.00018369747899159663, "loss": 0.4623, "step": 29256 }, { "epoch": 16.34469273743017, "grad_norm": 0.45024389028549194, "learning_rate": 0.00018366946778711486, "loss": 0.4668, "step": 29257 }, { "epoch": 16.345251396648045, "grad_norm": 0.34603625535964966, "learning_rate": 0.00018364145658263307, "loss": 0.4083, "step": 29258 }, { "epoch": 16.345810055865922, "grad_norm": 1.5504465103149414, "learning_rate": 0.00018361344537815128, "loss": 0.4711, "step": 29259 }, { "epoch": 16.3463687150838, "grad_norm": 0.3613249957561493, "learning_rate": 0.00018358543417366945, "loss": 0.3456, "step": 29260 }, { "epoch": 16.346927374301675, "grad_norm": 0.6513382196426392, "learning_rate": 0.0001835574229691877, "loss": 0.4329, "step": 29261 }, { "epoch": 16.34748603351955, "grad_norm": 0.35389411449432373, "learning_rate": 0.0001835294117647059, "loss": 0.3403, "step": 29262 }, { "epoch": 16.34804469273743, "grad_norm": 0.6265043020248413, "learning_rate": 0.0001835014005602241, "loss": 0.6289, "step": 29263 }, { "epoch": 16.34860335195531, "grad_norm": 0.5129328370094299, "learning_rate": 0.00018347338935574228, "loss": 0.4474, "step": 29264 }, { "epoch": 16.349162011173185, "grad_norm": 0.5313289761543274, "learning_rate": 0.0001834453781512605, "loss": 0.3894, "step": 29265 }, { "epoch": 16.34972067039106, "grad_norm": 0.44400593638420105, "learning_rate": 0.00018341736694677872, "loss": 0.3965, "step": 29266 }, { "epoch": 16.350279329608938, "grad_norm": 0.5046951770782471, "learning_rate": 0.00018338935574229692, "loss": 0.51, "step": 29267 }, { "epoch": 16.350837988826814, "grad_norm": 0.38331663608551025, "learning_rate": 0.00018336134453781513, "loss": 0.3484, "step": 29268 }, { "epoch": 16.35139664804469, "grad_norm": 0.38684171438217163, "learning_rate": 0.00018333333333333334, "loss": 0.4309, "step": 29269 }, { "epoch": 16.35195530726257, "grad_norm": 0.2986755073070526, "learning_rate": 0.00018330532212885154, "loss": 0.2516, "step": 29270 }, { "epoch": 16.352513966480448, "grad_norm": 0.38292479515075684, "learning_rate": 0.00018327731092436975, "loss": 0.4192, "step": 29271 }, { "epoch": 16.353072625698324, "grad_norm": 0.428937703371048, "learning_rate": 0.00018324929971988795, "loss": 0.3361, "step": 29272 }, { "epoch": 16.3536312849162, "grad_norm": 1.4777461290359497, "learning_rate": 0.00018322128851540619, "loss": 0.4556, "step": 29273 }, { "epoch": 16.354189944134077, "grad_norm": 0.3609444200992584, "learning_rate": 0.00018319327731092437, "loss": 0.3869, "step": 29274 }, { "epoch": 16.354748603351954, "grad_norm": 0.29203441739082336, "learning_rate": 0.00018316526610644257, "loss": 0.3545, "step": 29275 }, { "epoch": 16.355307262569834, "grad_norm": 0.5496408343315125, "learning_rate": 0.00018313725490196078, "loss": 0.42, "step": 29276 }, { "epoch": 16.35586592178771, "grad_norm": 0.42638736963272095, "learning_rate": 0.000183109243697479, "loss": 0.4328, "step": 29277 }, { "epoch": 16.356424581005587, "grad_norm": 0.44181400537490845, "learning_rate": 0.00018308123249299722, "loss": 0.3923, "step": 29278 }, { "epoch": 16.356983240223464, "grad_norm": 0.48514965176582336, "learning_rate": 0.0001830532212885154, "loss": 0.4978, "step": 29279 }, { "epoch": 16.35754189944134, "grad_norm": 0.5489807724952698, "learning_rate": 0.0001830252100840336, "loss": 0.452, "step": 29280 }, { "epoch": 16.358100558659217, "grad_norm": 0.4797283411026001, "learning_rate": 0.00018299719887955183, "loss": 0.4269, "step": 29281 }, { "epoch": 16.358659217877094, "grad_norm": 1.541698694229126, "learning_rate": 0.00018296918767507004, "loss": 0.6974, "step": 29282 }, { "epoch": 16.359217877094974, "grad_norm": 0.4843508005142212, "learning_rate": 0.00018294117647058825, "loss": 0.4573, "step": 29283 }, { "epoch": 16.35977653631285, "grad_norm": 0.6121612787246704, "learning_rate": 0.00018291316526610643, "loss": 0.4276, "step": 29284 }, { "epoch": 16.360335195530727, "grad_norm": 0.4368060231208801, "learning_rate": 0.00018288515406162466, "loss": 0.4165, "step": 29285 }, { "epoch": 16.360893854748603, "grad_norm": 0.4716063141822815, "learning_rate": 0.00018285714285714286, "loss": 0.4793, "step": 29286 }, { "epoch": 16.36145251396648, "grad_norm": 0.4299421012401581, "learning_rate": 0.00018282913165266107, "loss": 0.4326, "step": 29287 }, { "epoch": 16.362011173184356, "grad_norm": 0.40455934405326843, "learning_rate": 0.00018280112044817928, "loss": 0.3515, "step": 29288 }, { "epoch": 16.362569832402233, "grad_norm": 0.5767871141433716, "learning_rate": 0.00018277310924369748, "loss": 0.4018, "step": 29289 }, { "epoch": 16.363128491620113, "grad_norm": 0.37597858905792236, "learning_rate": 0.0001827450980392157, "loss": 0.3562, "step": 29290 }, { "epoch": 16.36368715083799, "grad_norm": 2.1146843433380127, "learning_rate": 0.0001827170868347339, "loss": 0.4456, "step": 29291 }, { "epoch": 16.364245810055866, "grad_norm": 0.4962692856788635, "learning_rate": 0.0001826890756302521, "loss": 0.4764, "step": 29292 }, { "epoch": 16.364804469273743, "grad_norm": 0.314089298248291, "learning_rate": 0.00018266106442577033, "loss": 0.3385, "step": 29293 }, { "epoch": 16.36536312849162, "grad_norm": 0.6499539017677307, "learning_rate": 0.0001826330532212885, "loss": 0.3931, "step": 29294 }, { "epoch": 16.365921787709496, "grad_norm": 0.43791627883911133, "learning_rate": 0.00018260504201680672, "loss": 0.4438, "step": 29295 }, { "epoch": 16.366480446927373, "grad_norm": 0.3982679843902588, "learning_rate": 0.00018257703081232492, "loss": 0.3195, "step": 29296 }, { "epoch": 16.367039106145253, "grad_norm": 2.6786413192749023, "learning_rate": 0.00018254901960784316, "loss": 0.3847, "step": 29297 }, { "epoch": 16.36759776536313, "grad_norm": 0.6924843192100525, "learning_rate": 0.00018252100840336136, "loss": 0.4085, "step": 29298 }, { "epoch": 16.368156424581006, "grad_norm": 0.5009807348251343, "learning_rate": 0.00018249299719887954, "loss": 0.4202, "step": 29299 }, { "epoch": 16.368715083798882, "grad_norm": 0.4068930149078369, "learning_rate": 0.00018246498599439775, "loss": 0.3709, "step": 29300 }, { "epoch": 16.36927374301676, "grad_norm": 0.414574533700943, "learning_rate": 0.00018243697478991598, "loss": 0.5027, "step": 29301 }, { "epoch": 16.369832402234636, "grad_norm": 0.38836005330085754, "learning_rate": 0.0001824089635854342, "loss": 0.2532, "step": 29302 }, { "epoch": 16.370391061452516, "grad_norm": 0.36547163128852844, "learning_rate": 0.0001823809523809524, "loss": 0.3456, "step": 29303 }, { "epoch": 16.370949720670392, "grad_norm": 1.076255202293396, "learning_rate": 0.00018235294117647057, "loss": 0.639, "step": 29304 }, { "epoch": 16.37150837988827, "grad_norm": 1.2171363830566406, "learning_rate": 0.0001823249299719888, "loss": 0.3471, "step": 29305 }, { "epoch": 16.372067039106145, "grad_norm": 0.3890760540962219, "learning_rate": 0.000182296918767507, "loss": 0.4504, "step": 29306 }, { "epoch": 16.372625698324022, "grad_norm": 0.4601249694824219, "learning_rate": 0.00018226890756302522, "loss": 0.4602, "step": 29307 }, { "epoch": 16.3731843575419, "grad_norm": 0.37602803111076355, "learning_rate": 0.00018224089635854342, "loss": 0.414, "step": 29308 }, { "epoch": 16.373743016759775, "grad_norm": 0.3871692419052124, "learning_rate": 0.00018221288515406163, "loss": 0.3911, "step": 29309 }, { "epoch": 16.374301675977655, "grad_norm": 0.681891143321991, "learning_rate": 0.00018218487394957984, "loss": 0.5098, "step": 29310 }, { "epoch": 16.37486033519553, "grad_norm": 0.45180290937423706, "learning_rate": 0.00018215686274509804, "loss": 0.4606, "step": 29311 }, { "epoch": 16.37541899441341, "grad_norm": 0.31876417994499207, "learning_rate": 0.00018212885154061625, "loss": 0.4339, "step": 29312 }, { "epoch": 16.375977653631285, "grad_norm": 0.5003113150596619, "learning_rate": 0.00018210084033613448, "loss": 0.4175, "step": 29313 }, { "epoch": 16.37653631284916, "grad_norm": 0.3924187421798706, "learning_rate": 0.00018207282913165266, "loss": 0.3779, "step": 29314 }, { "epoch": 16.377094972067038, "grad_norm": 1.0952647924423218, "learning_rate": 0.00018204481792717087, "loss": 0.3918, "step": 29315 }, { "epoch": 16.377653631284915, "grad_norm": 0.36226415634155273, "learning_rate": 0.00018201680672268907, "loss": 0.354, "step": 29316 }, { "epoch": 16.378212290502795, "grad_norm": 0.7102446556091309, "learning_rate": 0.0001819887955182073, "loss": 0.3695, "step": 29317 }, { "epoch": 16.37877094972067, "grad_norm": 0.4620143175125122, "learning_rate": 0.00018196078431372548, "loss": 0.4195, "step": 29318 }, { "epoch": 16.379329608938548, "grad_norm": 0.5799338817596436, "learning_rate": 0.0001819327731092437, "loss": 0.3947, "step": 29319 }, { "epoch": 16.379888268156424, "grad_norm": 0.6911343932151794, "learning_rate": 0.0001819047619047619, "loss": 0.3326, "step": 29320 }, { "epoch": 16.3804469273743, "grad_norm": 0.547643780708313, "learning_rate": 0.00018187675070028013, "loss": 0.5668, "step": 29321 }, { "epoch": 16.381005586592178, "grad_norm": 0.6426225304603577, "learning_rate": 0.00018184873949579833, "loss": 0.4453, "step": 29322 }, { "epoch": 16.381564245810054, "grad_norm": 0.501176118850708, "learning_rate": 0.0001818207282913165, "loss": 0.3989, "step": 29323 }, { "epoch": 16.382122905027934, "grad_norm": 0.7337033748626709, "learning_rate": 0.00018179271708683472, "loss": 0.4347, "step": 29324 }, { "epoch": 16.38268156424581, "grad_norm": 0.3888240158557892, "learning_rate": 0.00018176470588235295, "loss": 0.3696, "step": 29325 }, { "epoch": 16.383240223463687, "grad_norm": 2.0745716094970703, "learning_rate": 0.00018173669467787116, "loss": 0.4853, "step": 29326 }, { "epoch": 16.383798882681564, "grad_norm": 1.0249732732772827, "learning_rate": 0.00018170868347338936, "loss": 0.3249, "step": 29327 }, { "epoch": 16.38435754189944, "grad_norm": 0.35450994968414307, "learning_rate": 0.00018168067226890754, "loss": 0.3556, "step": 29328 }, { "epoch": 16.384916201117317, "grad_norm": 0.8636338710784912, "learning_rate": 0.00018165266106442578, "loss": 0.3969, "step": 29329 }, { "epoch": 16.385474860335197, "grad_norm": 0.39495307207107544, "learning_rate": 0.00018162464985994398, "loss": 0.3413, "step": 29330 }, { "epoch": 16.386033519553074, "grad_norm": 0.8098691701889038, "learning_rate": 0.0001815966386554622, "loss": 0.3852, "step": 29331 }, { "epoch": 16.38659217877095, "grad_norm": 0.47679540514945984, "learning_rate": 0.0001815686274509804, "loss": 0.3945, "step": 29332 }, { "epoch": 16.387150837988827, "grad_norm": 1.0485734939575195, "learning_rate": 0.0001815406162464986, "loss": 0.3792, "step": 29333 }, { "epoch": 16.387709497206703, "grad_norm": 0.5419967770576477, "learning_rate": 0.0001815126050420168, "loss": 0.4015, "step": 29334 }, { "epoch": 16.38826815642458, "grad_norm": 0.38929253816604614, "learning_rate": 0.000181484593837535, "loss": 0.343, "step": 29335 }, { "epoch": 16.388826815642457, "grad_norm": 0.41058382391929626, "learning_rate": 0.00018145658263305322, "loss": 0.4496, "step": 29336 }, { "epoch": 16.389385474860337, "grad_norm": 0.7400023341178894, "learning_rate": 0.00018142857142857145, "loss": 0.3882, "step": 29337 }, { "epoch": 16.389944134078213, "grad_norm": 0.44622188806533813, "learning_rate": 0.00018140056022408963, "loss": 0.4985, "step": 29338 }, { "epoch": 16.39050279329609, "grad_norm": 0.43652012944221497, "learning_rate": 0.00018137254901960784, "loss": 0.3915, "step": 29339 }, { "epoch": 16.391061452513966, "grad_norm": 0.4128139913082123, "learning_rate": 0.00018134453781512604, "loss": 0.363, "step": 29340 }, { "epoch": 16.391620111731843, "grad_norm": 0.33831775188446045, "learning_rate": 0.00018131652661064428, "loss": 0.382, "step": 29341 }, { "epoch": 16.39217877094972, "grad_norm": 0.5503801107406616, "learning_rate": 0.00018128851540616248, "loss": 0.3605, "step": 29342 }, { "epoch": 16.392737430167596, "grad_norm": 0.391715943813324, "learning_rate": 0.00018126050420168066, "loss": 0.4508, "step": 29343 }, { "epoch": 16.393296089385476, "grad_norm": 0.37690484523773193, "learning_rate": 0.00018123249299719887, "loss": 0.3141, "step": 29344 }, { "epoch": 16.393854748603353, "grad_norm": 0.46313005685806274, "learning_rate": 0.0001812044817927171, "loss": 0.365, "step": 29345 }, { "epoch": 16.39441340782123, "grad_norm": 0.3934359848499298, "learning_rate": 0.0001811764705882353, "loss": 0.3592, "step": 29346 }, { "epoch": 16.394972067039106, "grad_norm": 0.3994815945625305, "learning_rate": 0.0001811484593837535, "loss": 0.3421, "step": 29347 }, { "epoch": 16.395530726256982, "grad_norm": 0.49727535247802734, "learning_rate": 0.0001811204481792717, "loss": 0.3814, "step": 29348 }, { "epoch": 16.39608938547486, "grad_norm": 0.4964869022369385, "learning_rate": 0.00018109243697478992, "loss": 0.3946, "step": 29349 }, { "epoch": 16.39664804469274, "grad_norm": 2.4565200805664062, "learning_rate": 0.00018106442577030813, "loss": 0.3296, "step": 29350 }, { "epoch": 16.397206703910616, "grad_norm": 0.46368148922920227, "learning_rate": 0.00018103641456582634, "loss": 0.4344, "step": 29351 }, { "epoch": 16.397765363128492, "grad_norm": 0.9674256443977356, "learning_rate": 0.00018100840336134454, "loss": 0.4751, "step": 29352 }, { "epoch": 16.39832402234637, "grad_norm": 0.5346485376358032, "learning_rate": 0.00018098039215686275, "loss": 0.3908, "step": 29353 }, { "epoch": 16.398882681564245, "grad_norm": 0.401483416557312, "learning_rate": 0.00018095238095238095, "loss": 0.3718, "step": 29354 }, { "epoch": 16.399441340782122, "grad_norm": 1.0723282098770142, "learning_rate": 0.00018092436974789916, "loss": 0.459, "step": 29355 }, { "epoch": 16.4, "grad_norm": 0.6425642371177673, "learning_rate": 0.00018089635854341737, "loss": 0.4133, "step": 29356 }, { "epoch": 16.40055865921788, "grad_norm": 0.46530115604400635, "learning_rate": 0.0001808683473389356, "loss": 0.4477, "step": 29357 }, { "epoch": 16.401117318435755, "grad_norm": 0.5972253084182739, "learning_rate": 0.00018084033613445378, "loss": 0.3748, "step": 29358 }, { "epoch": 16.401675977653632, "grad_norm": 0.564058244228363, "learning_rate": 0.00018081232492997198, "loss": 0.5235, "step": 29359 }, { "epoch": 16.40223463687151, "grad_norm": 0.46438759565353394, "learning_rate": 0.0001807843137254902, "loss": 0.4149, "step": 29360 }, { "epoch": 16.402793296089385, "grad_norm": 0.5858387351036072, "learning_rate": 0.00018075630252100842, "loss": 0.6283, "step": 29361 }, { "epoch": 16.40335195530726, "grad_norm": 1.0293725728988647, "learning_rate": 0.00018072829131652663, "loss": 0.4761, "step": 29362 }, { "epoch": 16.403910614525138, "grad_norm": 0.5233083963394165, "learning_rate": 0.0001807002801120448, "loss": 0.5338, "step": 29363 }, { "epoch": 16.404469273743018, "grad_norm": 0.39073577523231506, "learning_rate": 0.000180672268907563, "loss": 0.4256, "step": 29364 }, { "epoch": 16.405027932960895, "grad_norm": 1.2213108539581299, "learning_rate": 0.00018064425770308125, "loss": 0.5129, "step": 29365 }, { "epoch": 16.40558659217877, "grad_norm": 0.42700281739234924, "learning_rate": 0.00018061624649859945, "loss": 0.4128, "step": 29366 }, { "epoch": 16.406145251396648, "grad_norm": 0.4969269037246704, "learning_rate": 0.00018058823529411766, "loss": 0.3885, "step": 29367 }, { "epoch": 16.406703910614524, "grad_norm": 6.9765238761901855, "learning_rate": 0.00018056022408963584, "loss": 0.3337, "step": 29368 }, { "epoch": 16.4072625698324, "grad_norm": 0.6105775237083435, "learning_rate": 0.00018053221288515407, "loss": 0.4606, "step": 29369 }, { "epoch": 16.407821229050278, "grad_norm": 0.47373780608177185, "learning_rate": 0.00018050420168067228, "loss": 0.4018, "step": 29370 }, { "epoch": 16.408379888268158, "grad_norm": 0.48614877462387085, "learning_rate": 0.00018047619047619048, "loss": 0.3678, "step": 29371 }, { "epoch": 16.408938547486034, "grad_norm": 0.5047931671142578, "learning_rate": 0.00018044817927170866, "loss": 0.4625, "step": 29372 }, { "epoch": 16.40949720670391, "grad_norm": 0.4170670211315155, "learning_rate": 0.0001804201680672269, "loss": 0.3593, "step": 29373 }, { "epoch": 16.410055865921787, "grad_norm": 0.3998320996761322, "learning_rate": 0.0001803921568627451, "loss": 0.3599, "step": 29374 }, { "epoch": 16.410614525139664, "grad_norm": 0.4551156461238861, "learning_rate": 0.0001803641456582633, "loss": 0.4183, "step": 29375 }, { "epoch": 16.41117318435754, "grad_norm": 1.0150521993637085, "learning_rate": 0.0001803361344537815, "loss": 0.3646, "step": 29376 }, { "epoch": 16.41173184357542, "grad_norm": 1.6198301315307617, "learning_rate": 0.00018030812324929972, "loss": 0.5356, "step": 29377 }, { "epoch": 16.412290502793297, "grad_norm": 0.46991240978240967, "learning_rate": 0.00018028011204481792, "loss": 0.4629, "step": 29378 }, { "epoch": 16.412849162011174, "grad_norm": 0.4913531243801117, "learning_rate": 0.00018025210084033613, "loss": 0.4256, "step": 29379 }, { "epoch": 16.41340782122905, "grad_norm": 0.8520797491073608, "learning_rate": 0.00018022408963585434, "loss": 0.3958, "step": 29380 }, { "epoch": 16.413966480446927, "grad_norm": 1.7837859392166138, "learning_rate": 0.00018019607843137257, "loss": 0.4377, "step": 29381 }, { "epoch": 16.414525139664804, "grad_norm": 0.35356056690216064, "learning_rate": 0.00018016806722689075, "loss": 0.3857, "step": 29382 }, { "epoch": 16.41508379888268, "grad_norm": 1.4621249437332153, "learning_rate": 0.00018014005602240895, "loss": 0.4112, "step": 29383 }, { "epoch": 16.41564245810056, "grad_norm": 0.5419853925704956, "learning_rate": 0.00018011204481792716, "loss": 0.4548, "step": 29384 }, { "epoch": 16.416201117318437, "grad_norm": 0.347685843706131, "learning_rate": 0.0001800840336134454, "loss": 0.3322, "step": 29385 }, { "epoch": 16.416759776536313, "grad_norm": 0.4668864607810974, "learning_rate": 0.0001800560224089636, "loss": 0.3549, "step": 29386 }, { "epoch": 16.41731843575419, "grad_norm": 0.8550448417663574, "learning_rate": 0.00018002801120448178, "loss": 0.4192, "step": 29387 }, { "epoch": 16.417877094972066, "grad_norm": 0.4354788362979889, "learning_rate": 0.00017999999999999998, "loss": 0.3668, "step": 29388 }, { "epoch": 16.418435754189943, "grad_norm": 0.3606705367565155, "learning_rate": 0.00017997198879551822, "loss": 0.3758, "step": 29389 }, { "epoch": 16.41899441340782, "grad_norm": 0.43496471643447876, "learning_rate": 0.00017994397759103642, "loss": 0.3764, "step": 29390 }, { "epoch": 16.4195530726257, "grad_norm": 1.3305996656417847, "learning_rate": 0.00017991596638655463, "loss": 0.5198, "step": 29391 }, { "epoch": 16.420111731843576, "grad_norm": 0.4233115613460541, "learning_rate": 0.0001798879551820728, "loss": 0.4372, "step": 29392 }, { "epoch": 16.420670391061453, "grad_norm": 0.35455557703971863, "learning_rate": 0.00017985994397759104, "loss": 0.3625, "step": 29393 }, { "epoch": 16.42122905027933, "grad_norm": 0.6690486669540405, "learning_rate": 0.00017983193277310925, "loss": 0.6452, "step": 29394 }, { "epoch": 16.421787709497206, "grad_norm": 0.43061983585357666, "learning_rate": 0.00017980392156862745, "loss": 0.3574, "step": 29395 }, { "epoch": 16.422346368715083, "grad_norm": 0.6338505744934082, "learning_rate": 0.00017977591036414566, "loss": 0.3963, "step": 29396 }, { "epoch": 16.422905027932963, "grad_norm": 0.4692417085170746, "learning_rate": 0.00017974789915966387, "loss": 0.4116, "step": 29397 }, { "epoch": 16.42346368715084, "grad_norm": 1.1381837129592896, "learning_rate": 0.00017971988795518207, "loss": 0.3987, "step": 29398 }, { "epoch": 16.424022346368716, "grad_norm": 0.44889065623283386, "learning_rate": 0.00017969187675070028, "loss": 0.3823, "step": 29399 }, { "epoch": 16.424581005586592, "grad_norm": 0.37629449367523193, "learning_rate": 0.00017966386554621848, "loss": 0.3933, "step": 29400 }, { "epoch": 16.42513966480447, "grad_norm": 0.380341112613678, "learning_rate": 0.00017963585434173672, "loss": 0.3203, "step": 29401 }, { "epoch": 16.425698324022346, "grad_norm": 0.3731003701686859, "learning_rate": 0.0001796078431372549, "loss": 0.3838, "step": 29402 }, { "epoch": 16.426256983240222, "grad_norm": 0.40624523162841797, "learning_rate": 0.0001795798319327731, "loss": 0.4334, "step": 29403 }, { "epoch": 16.426815642458102, "grad_norm": 0.4896109104156494, "learning_rate": 0.0001795518207282913, "loss": 0.3675, "step": 29404 }, { "epoch": 16.42737430167598, "grad_norm": 0.7210534811019897, "learning_rate": 0.00017952380952380954, "loss": 0.4412, "step": 29405 }, { "epoch": 16.427932960893855, "grad_norm": 0.4975736737251282, "learning_rate": 0.00017949579831932775, "loss": 0.4504, "step": 29406 }, { "epoch": 16.428491620111732, "grad_norm": 0.7467525601387024, "learning_rate": 0.00017946778711484593, "loss": 0.4592, "step": 29407 }, { "epoch": 16.42905027932961, "grad_norm": 0.5787555575370789, "learning_rate": 0.00017943977591036413, "loss": 0.4144, "step": 29408 }, { "epoch": 16.429608938547485, "grad_norm": 0.9575775265693665, "learning_rate": 0.00017941176470588236, "loss": 0.5782, "step": 29409 }, { "epoch": 16.43016759776536, "grad_norm": 0.45134004950523376, "learning_rate": 0.00017938375350140057, "loss": 0.4362, "step": 29410 }, { "epoch": 16.43072625698324, "grad_norm": 0.3954046368598938, "learning_rate": 0.00017935574229691878, "loss": 0.3687, "step": 29411 }, { "epoch": 16.43128491620112, "grad_norm": 0.39862683415412903, "learning_rate": 0.00017932773109243696, "loss": 0.3575, "step": 29412 }, { "epoch": 16.431843575418995, "grad_norm": 5.257641792297363, "learning_rate": 0.0001792997198879552, "loss": 0.6435, "step": 29413 }, { "epoch": 16.43240223463687, "grad_norm": 0.47216475009918213, "learning_rate": 0.0001792717086834734, "loss": 0.4379, "step": 29414 }, { "epoch": 16.432960893854748, "grad_norm": 0.41238969564437866, "learning_rate": 0.0001792436974789916, "loss": 0.387, "step": 29415 }, { "epoch": 16.433519553072625, "grad_norm": 0.3795824348926544, "learning_rate": 0.00017921568627450983, "loss": 0.3687, "step": 29416 }, { "epoch": 16.4340782122905, "grad_norm": 1.1302629709243774, "learning_rate": 0.000179187675070028, "loss": 0.3983, "step": 29417 }, { "epoch": 16.43463687150838, "grad_norm": 0.4332362115383148, "learning_rate": 0.00017915966386554622, "loss": 0.4238, "step": 29418 }, { "epoch": 16.435195530726258, "grad_norm": 0.36636883020401, "learning_rate": 0.00017913165266106442, "loss": 0.3809, "step": 29419 }, { "epoch": 16.435754189944134, "grad_norm": 0.39438527822494507, "learning_rate": 0.00017910364145658266, "loss": 0.3385, "step": 29420 }, { "epoch": 16.43631284916201, "grad_norm": 0.4470037519931793, "learning_rate": 0.00017907563025210086, "loss": 0.4038, "step": 29421 }, { "epoch": 16.436871508379888, "grad_norm": 0.35678696632385254, "learning_rate": 0.00017904761904761904, "loss": 0.4445, "step": 29422 }, { "epoch": 16.437430167597764, "grad_norm": 39.401817321777344, "learning_rate": 0.00017901960784313725, "loss": 0.3245, "step": 29423 }, { "epoch": 16.43798882681564, "grad_norm": 0.47048747539520264, "learning_rate": 0.00017899159663865548, "loss": 0.3636, "step": 29424 }, { "epoch": 16.43854748603352, "grad_norm": 0.60615473985672, "learning_rate": 0.0001789635854341737, "loss": 0.3999, "step": 29425 }, { "epoch": 16.439106145251397, "grad_norm": 0.49049490690231323, "learning_rate": 0.0001789355742296919, "loss": 0.3878, "step": 29426 }, { "epoch": 16.439664804469274, "grad_norm": 0.5932494401931763, "learning_rate": 0.00017890756302521007, "loss": 0.5057, "step": 29427 }, { "epoch": 16.44022346368715, "grad_norm": 0.41386163234710693, "learning_rate": 0.0001788795518207283, "loss": 0.3113, "step": 29428 }, { "epoch": 16.440782122905027, "grad_norm": 0.39143890142440796, "learning_rate": 0.0001788515406162465, "loss": 0.3211, "step": 29429 }, { "epoch": 16.441340782122904, "grad_norm": 0.47978535294532776, "learning_rate": 0.00017882352941176472, "loss": 0.4291, "step": 29430 }, { "epoch": 16.441899441340784, "grad_norm": 0.4087378680706024, "learning_rate": 0.0001787955182072829, "loss": 0.4357, "step": 29431 }, { "epoch": 16.44245810055866, "grad_norm": 0.4008702337741852, "learning_rate": 0.00017876750700280113, "loss": 0.4008, "step": 29432 }, { "epoch": 16.443016759776537, "grad_norm": 0.666835606098175, "learning_rate": 0.00017873949579831934, "loss": 0.4823, "step": 29433 }, { "epoch": 16.443575418994413, "grad_norm": 0.3378714323043823, "learning_rate": 0.00017871148459383754, "loss": 0.311, "step": 29434 }, { "epoch": 16.44413407821229, "grad_norm": 6.9305100440979, "learning_rate": 0.00017868347338935575, "loss": 0.8419, "step": 29435 }, { "epoch": 16.444692737430167, "grad_norm": 0.5294859409332275, "learning_rate": 0.00017865546218487395, "loss": 0.3849, "step": 29436 }, { "epoch": 16.445251396648043, "grad_norm": 0.47561389207839966, "learning_rate": 0.00017862745098039216, "loss": 0.4249, "step": 29437 }, { "epoch": 16.445810055865923, "grad_norm": 0.9982561469078064, "learning_rate": 0.00017859943977591037, "loss": 0.4407, "step": 29438 }, { "epoch": 16.4463687150838, "grad_norm": 0.5340924859046936, "learning_rate": 0.00017857142857142857, "loss": 0.4228, "step": 29439 }, { "epoch": 16.446927374301676, "grad_norm": 0.661406397819519, "learning_rate": 0.0001785434173669468, "loss": 0.4038, "step": 29440 }, { "epoch": 16.447486033519553, "grad_norm": 0.5189449787139893, "learning_rate": 0.00017851540616246498, "loss": 0.4325, "step": 29441 }, { "epoch": 16.44804469273743, "grad_norm": 0.45457956194877625, "learning_rate": 0.0001784873949579832, "loss": 0.3436, "step": 29442 }, { "epoch": 16.448603351955306, "grad_norm": 0.5822194218635559, "learning_rate": 0.0001784593837535014, "loss": 0.5424, "step": 29443 }, { "epoch": 16.449162011173183, "grad_norm": 0.6378493309020996, "learning_rate": 0.00017843137254901963, "loss": 0.5246, "step": 29444 }, { "epoch": 16.449720670391063, "grad_norm": 0.967460572719574, "learning_rate": 0.00017840336134453783, "loss": 0.3991, "step": 29445 }, { "epoch": 16.45027932960894, "grad_norm": 8.89358139038086, "learning_rate": 0.000178375350140056, "loss": 0.3686, "step": 29446 }, { "epoch": 16.450837988826816, "grad_norm": 0.5115857720375061, "learning_rate": 0.00017834733893557422, "loss": 0.5709, "step": 29447 }, { "epoch": 16.451396648044692, "grad_norm": 1.2989612817764282, "learning_rate": 0.00017831932773109245, "loss": 0.544, "step": 29448 }, { "epoch": 16.45195530726257, "grad_norm": 0.4012458324432373, "learning_rate": 0.00017829131652661066, "loss": 0.4386, "step": 29449 }, { "epoch": 16.452513966480446, "grad_norm": 0.4779501259326935, "learning_rate": 0.00017826330532212886, "loss": 0.4002, "step": 29450 }, { "epoch": 16.453072625698326, "grad_norm": 0.36083459854125977, "learning_rate": 0.00017823529411764704, "loss": 0.3723, "step": 29451 }, { "epoch": 16.453631284916202, "grad_norm": 2.731084108352661, "learning_rate": 0.00017820728291316528, "loss": 0.48, "step": 29452 }, { "epoch": 16.45418994413408, "grad_norm": 0.5182414650917053, "learning_rate": 0.00017817927170868348, "loss": 0.449, "step": 29453 }, { "epoch": 16.454748603351955, "grad_norm": 0.32445117831230164, "learning_rate": 0.0001781512605042017, "loss": 0.319, "step": 29454 }, { "epoch": 16.455307262569832, "grad_norm": 0.6372569799423218, "learning_rate": 0.0001781232492997199, "loss": 0.3511, "step": 29455 }, { "epoch": 16.45586592178771, "grad_norm": 0.4452030658721924, "learning_rate": 0.0001780952380952381, "loss": 0.3397, "step": 29456 }, { "epoch": 16.456424581005585, "grad_norm": 0.4560225307941437, "learning_rate": 0.0001780672268907563, "loss": 0.394, "step": 29457 }, { "epoch": 16.456983240223465, "grad_norm": 0.5394875407218933, "learning_rate": 0.0001780392156862745, "loss": 0.5881, "step": 29458 }, { "epoch": 16.457541899441342, "grad_norm": 0.48844999074935913, "learning_rate": 0.00017801120448179272, "loss": 0.3734, "step": 29459 }, { "epoch": 16.45810055865922, "grad_norm": 0.4158102869987488, "learning_rate": 0.00017798319327731095, "loss": 0.2226, "step": 29460 }, { "epoch": 16.458659217877095, "grad_norm": 0.6215737462043762, "learning_rate": 0.00017795518207282913, "loss": 0.4187, "step": 29461 }, { "epoch": 16.45921787709497, "grad_norm": 1.918904423713684, "learning_rate": 0.00017792717086834734, "loss": 0.3467, "step": 29462 }, { "epoch": 16.459776536312848, "grad_norm": 0.4171788692474365, "learning_rate": 0.00017789915966386554, "loss": 0.3431, "step": 29463 }, { "epoch": 16.460335195530725, "grad_norm": 0.36005398631095886, "learning_rate": 0.00017787114845938378, "loss": 0.3756, "step": 29464 }, { "epoch": 16.460893854748605, "grad_norm": 0.528662919998169, "learning_rate": 0.00017784313725490198, "loss": 0.5389, "step": 29465 }, { "epoch": 16.46145251396648, "grad_norm": 0.44890251755714417, "learning_rate": 0.00017781512605042016, "loss": 0.3353, "step": 29466 }, { "epoch": 16.462011173184358, "grad_norm": 0.33808767795562744, "learning_rate": 0.00017778711484593837, "loss": 0.4475, "step": 29467 }, { "epoch": 16.462569832402234, "grad_norm": 0.44032731652259827, "learning_rate": 0.0001777591036414566, "loss": 0.4351, "step": 29468 }, { "epoch": 16.46312849162011, "grad_norm": 3.4745051860809326, "learning_rate": 0.0001777310924369748, "loss": 0.4358, "step": 29469 }, { "epoch": 16.463687150837988, "grad_norm": 0.4250917434692383, "learning_rate": 0.000177703081232493, "loss": 0.5355, "step": 29470 }, { "epoch": 16.464245810055864, "grad_norm": 0.33331140875816345, "learning_rate": 0.0001776750700280112, "loss": 0.3472, "step": 29471 }, { "epoch": 16.464804469273744, "grad_norm": 1.1574751138687134, "learning_rate": 0.00017764705882352942, "loss": 0.4325, "step": 29472 }, { "epoch": 16.46536312849162, "grad_norm": 0.48738908767700195, "learning_rate": 0.00017761904761904763, "loss": 0.3956, "step": 29473 }, { "epoch": 16.465921787709497, "grad_norm": 0.41552281379699707, "learning_rate": 0.00017759103641456584, "loss": 0.3715, "step": 29474 }, { "epoch": 16.466480446927374, "grad_norm": 1.4205689430236816, "learning_rate": 0.00017756302521008404, "loss": 0.354, "step": 29475 }, { "epoch": 16.46703910614525, "grad_norm": 0.4148227274417877, "learning_rate": 0.00017753501400560225, "loss": 0.2938, "step": 29476 }, { "epoch": 16.467597765363127, "grad_norm": 0.4061793386936188, "learning_rate": 0.00017750700280112045, "loss": 0.4087, "step": 29477 }, { "epoch": 16.468156424581007, "grad_norm": 0.43411895632743835, "learning_rate": 0.00017747899159663866, "loss": 0.3314, "step": 29478 }, { "epoch": 16.468715083798884, "grad_norm": 0.4680617153644562, "learning_rate": 0.00017745098039215687, "loss": 0.4388, "step": 29479 }, { "epoch": 16.46927374301676, "grad_norm": 0.424232542514801, "learning_rate": 0.0001774229691876751, "loss": 0.5143, "step": 29480 }, { "epoch": 16.469832402234637, "grad_norm": 0.5051302313804626, "learning_rate": 0.00017739495798319328, "loss": 0.6205, "step": 29481 }, { "epoch": 16.470391061452514, "grad_norm": 0.48615598678588867, "learning_rate": 0.00017736694677871148, "loss": 0.3775, "step": 29482 }, { "epoch": 16.47094972067039, "grad_norm": 0.42735230922698975, "learning_rate": 0.0001773389355742297, "loss": 0.3789, "step": 29483 }, { "epoch": 16.471508379888267, "grad_norm": 1.5181976556777954, "learning_rate": 0.00017731092436974792, "loss": 0.4173, "step": 29484 }, { "epoch": 16.472067039106147, "grad_norm": 0.8731113076210022, "learning_rate": 0.0001772829131652661, "loss": 0.4225, "step": 29485 }, { "epoch": 16.472625698324023, "grad_norm": 0.46482938528060913, "learning_rate": 0.0001772549019607843, "loss": 0.452, "step": 29486 }, { "epoch": 16.4731843575419, "grad_norm": 0.36842676997184753, "learning_rate": 0.0001772268907563025, "loss": 0.4023, "step": 29487 }, { "epoch": 16.473743016759776, "grad_norm": 0.3433108627796173, "learning_rate": 0.00017719887955182075, "loss": 0.3581, "step": 29488 }, { "epoch": 16.474301675977653, "grad_norm": 0.4484669268131256, "learning_rate": 0.00017717086834733895, "loss": 0.4462, "step": 29489 }, { "epoch": 16.47486033519553, "grad_norm": 20.87667465209961, "learning_rate": 0.00017714285714285713, "loss": 0.5043, "step": 29490 }, { "epoch": 16.475418994413406, "grad_norm": 0.6367224454879761, "learning_rate": 0.00017711484593837534, "loss": 0.4368, "step": 29491 }, { "epoch": 16.475977653631286, "grad_norm": 1.043039083480835, "learning_rate": 0.00017708683473389357, "loss": 0.5384, "step": 29492 }, { "epoch": 16.476536312849163, "grad_norm": 0.3519802391529083, "learning_rate": 0.00017705882352941178, "loss": 0.3477, "step": 29493 }, { "epoch": 16.47709497206704, "grad_norm": 0.3984992802143097, "learning_rate": 0.00017703081232492998, "loss": 0.4305, "step": 29494 }, { "epoch": 16.477653631284916, "grad_norm": 1.0287542343139648, "learning_rate": 0.00017700280112044816, "loss": 0.6056, "step": 29495 }, { "epoch": 16.478212290502793, "grad_norm": 0.530757486820221, "learning_rate": 0.0001769747899159664, "loss": 0.3759, "step": 29496 }, { "epoch": 16.47877094972067, "grad_norm": 0.3497740626335144, "learning_rate": 0.0001769467787114846, "loss": 0.3848, "step": 29497 }, { "epoch": 16.47932960893855, "grad_norm": 0.6007401347160339, "learning_rate": 0.0001769187675070028, "loss": 0.4708, "step": 29498 }, { "epoch": 16.479888268156426, "grad_norm": 0.4128912091255188, "learning_rate": 0.000176890756302521, "loss": 0.3202, "step": 29499 }, { "epoch": 16.480446927374302, "grad_norm": 0.3892780542373657, "learning_rate": 0.00017686274509803922, "loss": 0.3687, "step": 29500 }, { "epoch": 16.480446927374302, "eval_cer": 0.08526180266876153, "eval_loss": 0.32208189368247986, "eval_runtime": 55.4806, "eval_samples_per_second": 81.794, "eval_steps_per_second": 5.119, "eval_wer": 0.3367783022820592, "step": 29500 }, { "epoch": 16.48100558659218, "grad_norm": 0.5070006847381592, "learning_rate": 0.00017683473389355742, "loss": 0.5553, "step": 29501 }, { "epoch": 16.481564245810056, "grad_norm": 0.5133941173553467, "learning_rate": 0.00017680672268907563, "loss": 0.3917, "step": 29502 }, { "epoch": 16.482122905027932, "grad_norm": 0.4133491516113281, "learning_rate": 0.00017677871148459384, "loss": 0.4881, "step": 29503 }, { "epoch": 16.48268156424581, "grad_norm": 1.5901294946670532, "learning_rate": 0.00017675070028011207, "loss": 0.3937, "step": 29504 }, { "epoch": 16.48324022346369, "grad_norm": 0.3636549711227417, "learning_rate": 0.00017672268907563025, "loss": 0.2974, "step": 29505 }, { "epoch": 16.483798882681565, "grad_norm": 0.8664739727973938, "learning_rate": 0.00017669467787114845, "loss": 0.561, "step": 29506 }, { "epoch": 16.484357541899442, "grad_norm": 0.9924991726875305, "learning_rate": 0.00017666666666666666, "loss": 0.5394, "step": 29507 }, { "epoch": 16.48491620111732, "grad_norm": 0.3601953089237213, "learning_rate": 0.0001766386554621849, "loss": 0.3947, "step": 29508 }, { "epoch": 16.485474860335195, "grad_norm": 0.39514946937561035, "learning_rate": 0.0001766106442577031, "loss": 0.3079, "step": 29509 }, { "epoch": 16.48603351955307, "grad_norm": 0.4760145843029022, "learning_rate": 0.00017658263305322128, "loss": 0.3853, "step": 29510 }, { "epoch": 16.486592178770948, "grad_norm": 0.9267053008079529, "learning_rate": 0.00017655462184873948, "loss": 0.4347, "step": 29511 }, { "epoch": 16.48715083798883, "grad_norm": 0.5790505409240723, "learning_rate": 0.00017652661064425772, "loss": 0.4244, "step": 29512 }, { "epoch": 16.487709497206705, "grad_norm": 0.5494601726531982, "learning_rate": 0.00017649859943977592, "loss": 0.5681, "step": 29513 }, { "epoch": 16.48826815642458, "grad_norm": 0.43294307589530945, "learning_rate": 0.00017647058823529413, "loss": 0.404, "step": 29514 }, { "epoch": 16.488826815642458, "grad_norm": 0.5143566131591797, "learning_rate": 0.0001764425770308123, "loss": 0.4754, "step": 29515 }, { "epoch": 16.489385474860335, "grad_norm": 1.9302611351013184, "learning_rate": 0.00017641456582633054, "loss": 0.3845, "step": 29516 }, { "epoch": 16.48994413407821, "grad_norm": 0.5820269584655762, "learning_rate": 0.00017638655462184875, "loss": 0.3088, "step": 29517 }, { "epoch": 16.490502793296088, "grad_norm": 0.37830305099487305, "learning_rate": 0.00017635854341736695, "loss": 0.4489, "step": 29518 }, { "epoch": 16.491061452513968, "grad_norm": 0.4058821201324463, "learning_rate": 0.00017633053221288516, "loss": 0.429, "step": 29519 }, { "epoch": 16.491620111731844, "grad_norm": 0.3129950761795044, "learning_rate": 0.00017630252100840337, "loss": 0.3669, "step": 29520 }, { "epoch": 16.49217877094972, "grad_norm": 0.4125111401081085, "learning_rate": 0.00017627450980392157, "loss": 0.3638, "step": 29521 }, { "epoch": 16.492737430167598, "grad_norm": 0.850448727607727, "learning_rate": 0.00017624649859943978, "loss": 0.3668, "step": 29522 }, { "epoch": 16.493296089385474, "grad_norm": 1.0357177257537842, "learning_rate": 0.00017621848739495798, "loss": 0.3691, "step": 29523 }, { "epoch": 16.49385474860335, "grad_norm": 0.6750121116638184, "learning_rate": 0.00017619047619047622, "loss": 0.4634, "step": 29524 }, { "epoch": 16.49441340782123, "grad_norm": 0.40457144379615784, "learning_rate": 0.0001761624649859944, "loss": 0.4212, "step": 29525 }, { "epoch": 16.494972067039107, "grad_norm": 0.3802253007888794, "learning_rate": 0.0001761344537815126, "loss": 0.4377, "step": 29526 }, { "epoch": 16.495530726256984, "grad_norm": 0.5436435341835022, "learning_rate": 0.0001761064425770308, "loss": 0.3979, "step": 29527 }, { "epoch": 16.49608938547486, "grad_norm": 0.8033078908920288, "learning_rate": 0.00017607843137254904, "loss": 0.4785, "step": 29528 }, { "epoch": 16.496648044692737, "grad_norm": 0.46778449416160583, "learning_rate": 0.00017605042016806725, "loss": 0.4041, "step": 29529 }, { "epoch": 16.497206703910614, "grad_norm": 0.38254237174987793, "learning_rate": 0.00017602240896358543, "loss": 0.4057, "step": 29530 }, { "epoch": 16.49776536312849, "grad_norm": 0.4245761036872864, "learning_rate": 0.00017599439775910363, "loss": 0.3903, "step": 29531 }, { "epoch": 16.49832402234637, "grad_norm": 0.8905037045478821, "learning_rate": 0.00017596638655462186, "loss": 0.3991, "step": 29532 }, { "epoch": 16.498882681564247, "grad_norm": 1.5270904302597046, "learning_rate": 0.00017593837535014007, "loss": 0.3446, "step": 29533 }, { "epoch": 16.499441340782123, "grad_norm": 1.6669657230377197, "learning_rate": 0.00017591036414565828, "loss": 0.3426, "step": 29534 }, { "epoch": 16.5, "grad_norm": 0.34264275431632996, "learning_rate": 0.00017588235294117646, "loss": 0.3028, "step": 29535 }, { "epoch": 16.500558659217877, "grad_norm": 0.4609209895133972, "learning_rate": 0.0001758543417366947, "loss": 0.3911, "step": 29536 }, { "epoch": 16.501117318435753, "grad_norm": 0.8710081577301025, "learning_rate": 0.0001758263305322129, "loss": 0.4124, "step": 29537 }, { "epoch": 16.50167597765363, "grad_norm": 0.4963414967060089, "learning_rate": 0.0001757983193277311, "loss": 0.365, "step": 29538 }, { "epoch": 16.50223463687151, "grad_norm": 0.8065279126167297, "learning_rate": 0.00017577030812324928, "loss": 0.367, "step": 29539 }, { "epoch": 16.502793296089386, "grad_norm": 0.4079006016254425, "learning_rate": 0.0001757422969187675, "loss": 0.4052, "step": 29540 }, { "epoch": 16.503351955307263, "grad_norm": 0.47505417466163635, "learning_rate": 0.00017571428571428572, "loss": 0.5816, "step": 29541 }, { "epoch": 16.50391061452514, "grad_norm": 0.49536171555519104, "learning_rate": 0.00017568627450980392, "loss": 0.4312, "step": 29542 }, { "epoch": 16.504469273743016, "grad_norm": 11.939494132995605, "learning_rate": 0.00017565826330532213, "loss": 0.4046, "step": 29543 }, { "epoch": 16.505027932960893, "grad_norm": 0.44392749667167664, "learning_rate": 0.00017563025210084034, "loss": 0.3759, "step": 29544 }, { "epoch": 16.505586592178773, "grad_norm": 0.3380763828754425, "learning_rate": 0.00017560224089635854, "loss": 0.3819, "step": 29545 }, { "epoch": 16.50614525139665, "grad_norm": 0.44330930709838867, "learning_rate": 0.00017557422969187675, "loss": 0.3929, "step": 29546 }, { "epoch": 16.506703910614526, "grad_norm": 0.5489369034767151, "learning_rate": 0.00017554621848739495, "loss": 0.4064, "step": 29547 }, { "epoch": 16.507262569832402, "grad_norm": 0.46115559339523315, "learning_rate": 0.0001755182072829132, "loss": 0.4147, "step": 29548 }, { "epoch": 16.50782122905028, "grad_norm": 0.9333981275558472, "learning_rate": 0.00017549019607843137, "loss": 0.4228, "step": 29549 }, { "epoch": 16.508379888268156, "grad_norm": 0.512912929058075, "learning_rate": 0.00017546218487394957, "loss": 0.396, "step": 29550 }, { "epoch": 16.508938547486032, "grad_norm": 0.383973628282547, "learning_rate": 0.00017543417366946778, "loss": 0.3502, "step": 29551 }, { "epoch": 16.509497206703912, "grad_norm": 0.7826922535896301, "learning_rate": 0.000175406162464986, "loss": 0.5093, "step": 29552 }, { "epoch": 16.51005586592179, "grad_norm": 1.2715567350387573, "learning_rate": 0.00017537815126050422, "loss": 0.4078, "step": 29553 }, { "epoch": 16.510614525139665, "grad_norm": 0.7431827783584595, "learning_rate": 0.0001753501400560224, "loss": 0.3956, "step": 29554 }, { "epoch": 16.511173184357542, "grad_norm": 0.6131908893585205, "learning_rate": 0.0001753221288515406, "loss": 0.4175, "step": 29555 }, { "epoch": 16.51173184357542, "grad_norm": 0.3459623157978058, "learning_rate": 0.00017529411764705884, "loss": 0.382, "step": 29556 }, { "epoch": 16.512290502793295, "grad_norm": 0.3784160912036896, "learning_rate": 0.00017526610644257704, "loss": 0.4012, "step": 29557 }, { "epoch": 16.51284916201117, "grad_norm": 0.8570274710655212, "learning_rate": 0.00017523809523809525, "loss": 0.4445, "step": 29558 }, { "epoch": 16.513407821229052, "grad_norm": 0.3129422962665558, "learning_rate": 0.00017521008403361343, "loss": 0.4027, "step": 29559 }, { "epoch": 16.51396648044693, "grad_norm": 0.5028800964355469, "learning_rate": 0.00017518207282913166, "loss": 0.347, "step": 29560 }, { "epoch": 16.514525139664805, "grad_norm": 0.45034411549568176, "learning_rate": 0.00017515406162464987, "loss": 0.3658, "step": 29561 }, { "epoch": 16.51508379888268, "grad_norm": 0.49262264370918274, "learning_rate": 0.00017512605042016807, "loss": 0.3676, "step": 29562 }, { "epoch": 16.515642458100558, "grad_norm": 0.549745500087738, "learning_rate": 0.00017509803921568628, "loss": 0.3593, "step": 29563 }, { "epoch": 16.516201117318435, "grad_norm": 0.7150022387504578, "learning_rate": 0.00017507002801120448, "loss": 0.3858, "step": 29564 }, { "epoch": 16.51675977653631, "grad_norm": 0.3563117980957031, "learning_rate": 0.0001750420168067227, "loss": 0.3856, "step": 29565 }, { "epoch": 16.51731843575419, "grad_norm": 0.44275540113449097, "learning_rate": 0.0001750140056022409, "loss": 0.3878, "step": 29566 }, { "epoch": 16.517877094972068, "grad_norm": 0.6995197534561157, "learning_rate": 0.0001749859943977591, "loss": 0.3762, "step": 29567 }, { "epoch": 16.518435754189944, "grad_norm": 0.5018677115440369, "learning_rate": 0.00017495798319327733, "loss": 0.4436, "step": 29568 }, { "epoch": 16.51899441340782, "grad_norm": 0.5029260516166687, "learning_rate": 0.0001749299719887955, "loss": 0.4151, "step": 29569 }, { "epoch": 16.519553072625698, "grad_norm": 0.4033641815185547, "learning_rate": 0.00017490196078431372, "loss": 0.4366, "step": 29570 }, { "epoch": 16.520111731843574, "grad_norm": 0.777968168258667, "learning_rate": 0.00017487394957983193, "loss": 0.3825, "step": 29571 }, { "epoch": 16.52067039106145, "grad_norm": 0.4416103959083557, "learning_rate": 0.00017484593837535016, "loss": 0.4224, "step": 29572 }, { "epoch": 16.52122905027933, "grad_norm": 11.347146987915039, "learning_rate": 0.00017481792717086836, "loss": 0.4416, "step": 29573 }, { "epoch": 16.521787709497207, "grad_norm": 0.4550493061542511, "learning_rate": 0.00017478991596638654, "loss": 0.3259, "step": 29574 }, { "epoch": 16.522346368715084, "grad_norm": 0.6317253112792969, "learning_rate": 0.00017476190476190475, "loss": 0.3822, "step": 29575 }, { "epoch": 16.52290502793296, "grad_norm": 0.6731358170509338, "learning_rate": 0.00017473389355742298, "loss": 0.4104, "step": 29576 }, { "epoch": 16.523463687150837, "grad_norm": 0.7160224914550781, "learning_rate": 0.0001747058823529412, "loss": 0.2865, "step": 29577 }, { "epoch": 16.524022346368714, "grad_norm": 0.4603045880794525, "learning_rate": 0.0001746778711484594, "loss": 0.3875, "step": 29578 }, { "epoch": 16.524581005586594, "grad_norm": 0.43759676814079285, "learning_rate": 0.00017464985994397757, "loss": 0.4154, "step": 29579 }, { "epoch": 16.52513966480447, "grad_norm": 0.39633890986442566, "learning_rate": 0.0001746218487394958, "loss": 0.4358, "step": 29580 }, { "epoch": 16.525698324022347, "grad_norm": 0.3954674303531647, "learning_rate": 0.000174593837535014, "loss": 0.4003, "step": 29581 }, { "epoch": 16.526256983240224, "grad_norm": 0.39999040961265564, "learning_rate": 0.00017456582633053222, "loss": 0.4105, "step": 29582 }, { "epoch": 16.5268156424581, "grad_norm": 4.218301773071289, "learning_rate": 0.00017453781512605042, "loss": 0.4567, "step": 29583 }, { "epoch": 16.527374301675977, "grad_norm": 2.76589035987854, "learning_rate": 0.00017450980392156863, "loss": 0.5069, "step": 29584 }, { "epoch": 16.527932960893853, "grad_norm": 0.4792419970035553, "learning_rate": 0.00017448179271708684, "loss": 0.399, "step": 29585 }, { "epoch": 16.528491620111733, "grad_norm": 0.5354159474372864, "learning_rate": 0.00017445378151260504, "loss": 0.4727, "step": 29586 }, { "epoch": 16.52905027932961, "grad_norm": 0.38616445660591125, "learning_rate": 0.00017442577030812325, "loss": 0.3983, "step": 29587 }, { "epoch": 16.529608938547486, "grad_norm": 0.4955047369003296, "learning_rate": 0.00017439775910364148, "loss": 0.394, "step": 29588 }, { "epoch": 16.530167597765363, "grad_norm": 0.3584757149219513, "learning_rate": 0.00017436974789915966, "loss": 0.362, "step": 29589 }, { "epoch": 16.53072625698324, "grad_norm": 0.40841972827911377, "learning_rate": 0.00017434173669467787, "loss": 0.3846, "step": 29590 }, { "epoch": 16.531284916201116, "grad_norm": 0.9143375158309937, "learning_rate": 0.00017431372549019607, "loss": 0.4077, "step": 29591 }, { "epoch": 16.531843575418993, "grad_norm": 0.4993152916431427, "learning_rate": 0.0001742857142857143, "loss": 0.4283, "step": 29592 }, { "epoch": 16.532402234636873, "grad_norm": 15.13156795501709, "learning_rate": 0.0001742577030812325, "loss": 0.417, "step": 29593 }, { "epoch": 16.53296089385475, "grad_norm": 0.5883806943893433, "learning_rate": 0.0001742296918767507, "loss": 0.3737, "step": 29594 }, { "epoch": 16.533519553072626, "grad_norm": 0.4294358193874359, "learning_rate": 0.0001742016806722689, "loss": 0.4663, "step": 29595 }, { "epoch": 16.534078212290503, "grad_norm": 0.5711474418640137, "learning_rate": 0.00017417366946778713, "loss": 0.3758, "step": 29596 }, { "epoch": 16.53463687150838, "grad_norm": 0.45019978284835815, "learning_rate": 0.00017414565826330534, "loss": 0.4607, "step": 29597 }, { "epoch": 16.535195530726256, "grad_norm": 0.45127925276756287, "learning_rate": 0.00017411764705882351, "loss": 0.5163, "step": 29598 }, { "epoch": 16.535754189944136, "grad_norm": 7.641594886779785, "learning_rate": 0.00017408963585434172, "loss": 0.3904, "step": 29599 }, { "epoch": 16.536312849162012, "grad_norm": 0.4126541316509247, "learning_rate": 0.00017406162464985995, "loss": 0.4359, "step": 29600 }, { "epoch": 16.53687150837989, "grad_norm": 0.4351097643375397, "learning_rate": 0.00017403361344537816, "loss": 0.4096, "step": 29601 }, { "epoch": 16.537430167597766, "grad_norm": 3.101602077484131, "learning_rate": 0.00017400560224089637, "loss": 0.4188, "step": 29602 }, { "epoch": 16.537988826815642, "grad_norm": 0.5362922549247742, "learning_rate": 0.00017397759103641454, "loss": 0.3835, "step": 29603 }, { "epoch": 16.53854748603352, "grad_norm": 0.44642752408981323, "learning_rate": 0.00017394957983193278, "loss": 0.373, "step": 29604 }, { "epoch": 16.539106145251395, "grad_norm": 0.4679783284664154, "learning_rate": 0.00017392156862745098, "loss": 0.3426, "step": 29605 }, { "epoch": 16.539664804469275, "grad_norm": 1.766141414642334, "learning_rate": 0.0001738935574229692, "loss": 0.3562, "step": 29606 }, { "epoch": 16.540223463687152, "grad_norm": 1.2823249101638794, "learning_rate": 0.0001738655462184874, "loss": 0.4688, "step": 29607 }, { "epoch": 16.54078212290503, "grad_norm": 1.7663451433181763, "learning_rate": 0.0001738375350140056, "loss": 0.4044, "step": 29608 }, { "epoch": 16.541340782122905, "grad_norm": 1.6318271160125732, "learning_rate": 0.0001738095238095238, "loss": 0.5122, "step": 29609 }, { "epoch": 16.54189944134078, "grad_norm": 0.9168058633804321, "learning_rate": 0.000173781512605042, "loss": 0.3594, "step": 29610 }, { "epoch": 16.542458100558658, "grad_norm": 0.5199082493782043, "learning_rate": 0.00017375350140056022, "loss": 0.4907, "step": 29611 }, { "epoch": 16.543016759776535, "grad_norm": 0.6399844288825989, "learning_rate": 0.00017372549019607845, "loss": 0.5804, "step": 29612 }, { "epoch": 16.543575418994415, "grad_norm": 0.9436054825782776, "learning_rate": 0.00017369747899159663, "loss": 0.2925, "step": 29613 }, { "epoch": 16.54413407821229, "grad_norm": 0.544022262096405, "learning_rate": 0.00017366946778711484, "loss": 0.4792, "step": 29614 }, { "epoch": 16.544692737430168, "grad_norm": 0.4976023733615875, "learning_rate": 0.00017364145658263304, "loss": 0.4114, "step": 29615 }, { "epoch": 16.545251396648045, "grad_norm": 1.0800968408584595, "learning_rate": 0.00017361344537815128, "loss": 0.3403, "step": 29616 }, { "epoch": 16.54581005586592, "grad_norm": 0.517621636390686, "learning_rate": 0.00017358543417366948, "loss": 0.3453, "step": 29617 }, { "epoch": 16.546368715083798, "grad_norm": 1.2114115953445435, "learning_rate": 0.00017355742296918766, "loss": 0.3807, "step": 29618 }, { "epoch": 16.546927374301674, "grad_norm": 0.39131876826286316, "learning_rate": 0.00017352941176470587, "loss": 0.3461, "step": 29619 }, { "epoch": 16.547486033519554, "grad_norm": 1.1608264446258545, "learning_rate": 0.0001735014005602241, "loss": 0.5039, "step": 29620 }, { "epoch": 16.54804469273743, "grad_norm": 0.32489052414894104, "learning_rate": 0.0001734733893557423, "loss": 0.4094, "step": 29621 }, { "epoch": 16.548603351955308, "grad_norm": 0.4785040318965912, "learning_rate": 0.0001734453781512605, "loss": 0.4733, "step": 29622 }, { "epoch": 16.549162011173184, "grad_norm": 0.37516117095947266, "learning_rate": 0.0001734173669467787, "loss": 0.3945, "step": 29623 }, { "epoch": 16.54972067039106, "grad_norm": 0.4778275191783905, "learning_rate": 0.00017338935574229692, "loss": 0.4298, "step": 29624 }, { "epoch": 16.550279329608937, "grad_norm": 0.4768771231174469, "learning_rate": 0.00017336134453781513, "loss": 0.3231, "step": 29625 }, { "epoch": 16.550837988826817, "grad_norm": 0.48715725541114807, "learning_rate": 0.00017333333333333334, "loss": 0.4273, "step": 29626 }, { "epoch": 16.551396648044694, "grad_norm": 0.6316798329353333, "learning_rate": 0.00017330532212885157, "loss": 0.4241, "step": 29627 }, { "epoch": 16.55195530726257, "grad_norm": 1.118650197982788, "learning_rate": 0.00017327731092436975, "loss": 0.4511, "step": 29628 }, { "epoch": 16.552513966480447, "grad_norm": 2.3481647968292236, "learning_rate": 0.00017324929971988795, "loss": 0.4329, "step": 29629 }, { "epoch": 16.553072625698324, "grad_norm": 0.3932090103626251, "learning_rate": 0.00017322128851540616, "loss": 0.4335, "step": 29630 }, { "epoch": 16.5536312849162, "grad_norm": 0.5164644718170166, "learning_rate": 0.0001731932773109244, "loss": 0.514, "step": 29631 }, { "epoch": 16.554189944134077, "grad_norm": 0.6813526153564453, "learning_rate": 0.0001731652661064426, "loss": 0.4278, "step": 29632 }, { "epoch": 16.554748603351957, "grad_norm": 0.5018157958984375, "learning_rate": 0.00017313725490196078, "loss": 0.4374, "step": 29633 }, { "epoch": 16.555307262569833, "grad_norm": 0.4966731369495392, "learning_rate": 0.00017310924369747898, "loss": 0.5462, "step": 29634 }, { "epoch": 16.55586592178771, "grad_norm": 0.307049959897995, "learning_rate": 0.00017308123249299722, "loss": 0.3618, "step": 29635 }, { "epoch": 16.556424581005587, "grad_norm": 0.5479479432106018, "learning_rate": 0.00017305322128851542, "loss": 0.355, "step": 29636 }, { "epoch": 16.556983240223463, "grad_norm": 1.2515720129013062, "learning_rate": 0.00017302521008403363, "loss": 0.3501, "step": 29637 }, { "epoch": 16.55754189944134, "grad_norm": 0.7564569711685181, "learning_rate": 0.0001729971988795518, "loss": 0.4521, "step": 29638 }, { "epoch": 16.558100558659216, "grad_norm": 0.6315253376960754, "learning_rate": 0.00017296918767507004, "loss": 0.4293, "step": 29639 }, { "epoch": 16.558659217877096, "grad_norm": 0.4353763163089752, "learning_rate": 0.00017294117647058825, "loss": 0.2829, "step": 29640 }, { "epoch": 16.559217877094973, "grad_norm": 0.518596887588501, "learning_rate": 0.00017291316526610645, "loss": 0.3557, "step": 29641 }, { "epoch": 16.55977653631285, "grad_norm": 0.5975028872489929, "learning_rate": 0.00017288515406162466, "loss": 0.3438, "step": 29642 }, { "epoch": 16.560335195530726, "grad_norm": 0.4311278164386749, "learning_rate": 0.00017285714285714287, "loss": 0.3413, "step": 29643 }, { "epoch": 16.560893854748603, "grad_norm": 0.42580920457839966, "learning_rate": 0.00017282913165266107, "loss": 0.4041, "step": 29644 }, { "epoch": 16.56145251396648, "grad_norm": 0.40019556879997253, "learning_rate": 0.00017280112044817928, "loss": 0.4361, "step": 29645 }, { "epoch": 16.56201117318436, "grad_norm": 1.3777707815170288, "learning_rate": 0.00017277310924369748, "loss": 0.3613, "step": 29646 }, { "epoch": 16.562569832402236, "grad_norm": 0.45521843433380127, "learning_rate": 0.00017274509803921572, "loss": 0.4192, "step": 29647 }, { "epoch": 16.563128491620112, "grad_norm": 0.9244868159294128, "learning_rate": 0.0001727170868347339, "loss": 0.501, "step": 29648 }, { "epoch": 16.56368715083799, "grad_norm": 1.7845511436462402, "learning_rate": 0.0001726890756302521, "loss": 0.432, "step": 29649 }, { "epoch": 16.564245810055866, "grad_norm": 0.7052385210990906, "learning_rate": 0.0001726610644257703, "loss": 0.5091, "step": 29650 }, { "epoch": 16.564804469273742, "grad_norm": 1.2290674448013306, "learning_rate": 0.00017263305322128854, "loss": 0.5519, "step": 29651 }, { "epoch": 16.56536312849162, "grad_norm": 2.1546733379364014, "learning_rate": 0.00017260504201680672, "loss": 0.4083, "step": 29652 }, { "epoch": 16.5659217877095, "grad_norm": 0.5240933299064636, "learning_rate": 0.00017257703081232493, "loss": 0.4367, "step": 29653 }, { "epoch": 16.566480446927375, "grad_norm": 1.0836304426193237, "learning_rate": 0.00017254901960784313, "loss": 0.5049, "step": 29654 }, { "epoch": 16.567039106145252, "grad_norm": 0.48378872871398926, "learning_rate": 0.00017252100840336136, "loss": 0.4602, "step": 29655 }, { "epoch": 16.56759776536313, "grad_norm": 0.6799153089523315, "learning_rate": 0.00017249299719887957, "loss": 0.4034, "step": 29656 }, { "epoch": 16.568156424581005, "grad_norm": 1.4692351818084717, "learning_rate": 0.00017246498599439775, "loss": 0.5495, "step": 29657 }, { "epoch": 16.56871508379888, "grad_norm": 0.4837232828140259, "learning_rate": 0.00017243697478991596, "loss": 0.3757, "step": 29658 }, { "epoch": 16.56927374301676, "grad_norm": 0.39382028579711914, "learning_rate": 0.0001724089635854342, "loss": 0.3576, "step": 29659 }, { "epoch": 16.56983240223464, "grad_norm": 0.4457899034023285, "learning_rate": 0.0001723809523809524, "loss": 0.3659, "step": 29660 }, { "epoch": 16.570391061452515, "grad_norm": 0.5575380325317383, "learning_rate": 0.0001723529411764706, "loss": 0.478, "step": 29661 }, { "epoch": 16.57094972067039, "grad_norm": 0.9752169847488403, "learning_rate": 0.00017232492997198878, "loss": 0.3735, "step": 29662 }, { "epoch": 16.571508379888268, "grad_norm": 0.40215227007865906, "learning_rate": 0.000172296918767507, "loss": 0.4444, "step": 29663 }, { "epoch": 16.572067039106145, "grad_norm": 0.5985216498374939, "learning_rate": 0.00017226890756302522, "loss": 0.3131, "step": 29664 }, { "epoch": 16.57262569832402, "grad_norm": 0.5278451442718506, "learning_rate": 0.00017224089635854342, "loss": 0.3822, "step": 29665 }, { "epoch": 16.573184357541898, "grad_norm": 0.4382477104663849, "learning_rate": 0.00017221288515406163, "loss": 0.4592, "step": 29666 }, { "epoch": 16.573743016759778, "grad_norm": 0.9168773889541626, "learning_rate": 0.00017218487394957984, "loss": 0.5365, "step": 29667 }, { "epoch": 16.574301675977654, "grad_norm": 0.41254740953445435, "learning_rate": 0.00017215686274509804, "loss": 0.3662, "step": 29668 }, { "epoch": 16.57486033519553, "grad_norm": 0.7508476972579956, "learning_rate": 0.00017212885154061625, "loss": 0.6102, "step": 29669 }, { "epoch": 16.575418994413408, "grad_norm": 0.4705517590045929, "learning_rate": 0.00017210084033613445, "loss": 0.4243, "step": 29670 }, { "epoch": 16.575977653631284, "grad_norm": 0.5159240365028381, "learning_rate": 0.0001720728291316527, "loss": 0.3668, "step": 29671 }, { "epoch": 16.57653631284916, "grad_norm": 2.45832896232605, "learning_rate": 0.00017204481792717087, "loss": 0.3968, "step": 29672 }, { "epoch": 16.577094972067037, "grad_norm": 0.47152748703956604, "learning_rate": 0.00017201680672268907, "loss": 0.3763, "step": 29673 }, { "epoch": 16.577653631284917, "grad_norm": 0.49372634291648865, "learning_rate": 0.00017198879551820728, "loss": 0.3271, "step": 29674 }, { "epoch": 16.578212290502794, "grad_norm": 0.37817874550819397, "learning_rate": 0.0001719607843137255, "loss": 0.4885, "step": 29675 }, { "epoch": 16.57877094972067, "grad_norm": 0.7956822514533997, "learning_rate": 0.00017193277310924372, "loss": 0.4818, "step": 29676 }, { "epoch": 16.579329608938547, "grad_norm": 0.4049944281578064, "learning_rate": 0.0001719047619047619, "loss": 0.3986, "step": 29677 }, { "epoch": 16.579888268156424, "grad_norm": 0.4858841001987457, "learning_rate": 0.0001718767507002801, "loss": 0.3722, "step": 29678 }, { "epoch": 16.5804469273743, "grad_norm": 0.5916111469268799, "learning_rate": 0.00017184873949579834, "loss": 0.4235, "step": 29679 }, { "epoch": 16.58100558659218, "grad_norm": 0.6348965167999268, "learning_rate": 0.00017182072829131654, "loss": 0.4009, "step": 29680 }, { "epoch": 16.581564245810057, "grad_norm": 0.45960313081741333, "learning_rate": 0.00017179271708683475, "loss": 0.4042, "step": 29681 }, { "epoch": 16.582122905027934, "grad_norm": 0.4433102309703827, "learning_rate": 0.00017176470588235293, "loss": 0.3529, "step": 29682 }, { "epoch": 16.58268156424581, "grad_norm": 0.34756675362586975, "learning_rate": 0.00017173669467787116, "loss": 0.3749, "step": 29683 }, { "epoch": 16.583240223463687, "grad_norm": 0.4158267676830292, "learning_rate": 0.00017170868347338937, "loss": 0.44, "step": 29684 }, { "epoch": 16.583798882681563, "grad_norm": 0.6636767983436584, "learning_rate": 0.00017168067226890757, "loss": 0.3907, "step": 29685 }, { "epoch": 16.58435754189944, "grad_norm": 3.3351364135742188, "learning_rate": 0.00017165266106442578, "loss": 0.3563, "step": 29686 }, { "epoch": 16.58491620111732, "grad_norm": 0.5583810210227966, "learning_rate": 0.00017162464985994398, "loss": 0.3593, "step": 29687 }, { "epoch": 16.585474860335196, "grad_norm": 0.4328242540359497, "learning_rate": 0.0001715966386554622, "loss": 0.4542, "step": 29688 }, { "epoch": 16.586033519553073, "grad_norm": 2.0626401901245117, "learning_rate": 0.0001715686274509804, "loss": 0.7094, "step": 29689 }, { "epoch": 16.58659217877095, "grad_norm": 0.37423473596572876, "learning_rate": 0.0001715406162464986, "loss": 0.3501, "step": 29690 }, { "epoch": 16.587150837988826, "grad_norm": 0.5140286087989807, "learning_rate": 0.00017151260504201683, "loss": 0.3606, "step": 29691 }, { "epoch": 16.587709497206703, "grad_norm": 0.5087933540344238, "learning_rate": 0.000171484593837535, "loss": 0.3522, "step": 29692 }, { "epoch": 16.58826815642458, "grad_norm": 0.5980960726737976, "learning_rate": 0.00017145658263305322, "loss": 0.4719, "step": 29693 }, { "epoch": 16.58882681564246, "grad_norm": 1.4996987581253052, "learning_rate": 0.00017142857142857143, "loss": 0.512, "step": 29694 }, { "epoch": 16.589385474860336, "grad_norm": 0.6197682619094849, "learning_rate": 0.00017140056022408966, "loss": 0.6934, "step": 29695 }, { "epoch": 16.589944134078213, "grad_norm": 1.5981677770614624, "learning_rate": 0.00017137254901960786, "loss": 0.5227, "step": 29696 }, { "epoch": 16.59050279329609, "grad_norm": 1.542922854423523, "learning_rate": 0.00017134453781512604, "loss": 0.5042, "step": 29697 }, { "epoch": 16.591061452513966, "grad_norm": 0.36248210072517395, "learning_rate": 0.00017131652661064425, "loss": 0.3431, "step": 29698 }, { "epoch": 16.591620111731842, "grad_norm": 0.4703643023967743, "learning_rate": 0.00017128851540616248, "loss": 0.4712, "step": 29699 }, { "epoch": 16.592178770949722, "grad_norm": 0.5619949102401733, "learning_rate": 0.0001712605042016807, "loss": 0.3888, "step": 29700 }, { "epoch": 16.5927374301676, "grad_norm": 0.42303141951560974, "learning_rate": 0.0001712324929971989, "loss": 0.4616, "step": 29701 }, { "epoch": 16.593296089385476, "grad_norm": 0.44406846165657043, "learning_rate": 0.00017120448179271707, "loss": 0.4879, "step": 29702 }, { "epoch": 16.593854748603352, "grad_norm": 0.8510697484016418, "learning_rate": 0.0001711764705882353, "loss": 0.4943, "step": 29703 }, { "epoch": 16.59441340782123, "grad_norm": 0.3986489772796631, "learning_rate": 0.0001711484593837535, "loss": 0.3103, "step": 29704 }, { "epoch": 16.594972067039105, "grad_norm": 0.4285847246646881, "learning_rate": 0.00017112044817927172, "loss": 0.4788, "step": 29705 }, { "epoch": 16.595530726256982, "grad_norm": 0.33925896883010864, "learning_rate": 0.0001710924369747899, "loss": 0.4475, "step": 29706 }, { "epoch": 16.596089385474862, "grad_norm": 4.6819586753845215, "learning_rate": 0.00017106442577030813, "loss": 0.4485, "step": 29707 }, { "epoch": 16.59664804469274, "grad_norm": 0.4595019519329071, "learning_rate": 0.00017103641456582634, "loss": 0.3516, "step": 29708 }, { "epoch": 16.597206703910615, "grad_norm": 0.4340721368789673, "learning_rate": 0.00017100840336134454, "loss": 0.2841, "step": 29709 }, { "epoch": 16.59776536312849, "grad_norm": 0.5420017838478088, "learning_rate": 0.00017098039215686275, "loss": 0.3986, "step": 29710 }, { "epoch": 16.598324022346368, "grad_norm": 0.5166442394256592, "learning_rate": 0.00017095238095238095, "loss": 0.3902, "step": 29711 }, { "epoch": 16.598882681564245, "grad_norm": 0.38179582357406616, "learning_rate": 0.00017092436974789916, "loss": 0.3904, "step": 29712 }, { "epoch": 16.59944134078212, "grad_norm": 0.36862191557884216, "learning_rate": 0.00017089635854341737, "loss": 0.3877, "step": 29713 }, { "epoch": 16.6, "grad_norm": 0.6554352641105652, "learning_rate": 0.00017086834733893557, "loss": 0.4739, "step": 29714 }, { "epoch": 16.600558659217878, "grad_norm": 1.3777062892913818, "learning_rate": 0.0001708403361344538, "loss": 0.8374, "step": 29715 }, { "epoch": 16.601117318435755, "grad_norm": 0.5427790880203247, "learning_rate": 0.00017081232492997198, "loss": 0.4328, "step": 29716 }, { "epoch": 16.60167597765363, "grad_norm": 0.4491827189922333, "learning_rate": 0.0001707843137254902, "loss": 0.3879, "step": 29717 }, { "epoch": 16.602234636871508, "grad_norm": 0.4406115710735321, "learning_rate": 0.0001707563025210084, "loss": 0.3991, "step": 29718 }, { "epoch": 16.602793296089384, "grad_norm": 0.5698709487915039, "learning_rate": 0.00017072829131652663, "loss": 0.6151, "step": 29719 }, { "epoch": 16.60335195530726, "grad_norm": 0.4022897183895111, "learning_rate": 0.00017070028011204484, "loss": 0.3703, "step": 29720 }, { "epoch": 16.60391061452514, "grad_norm": 0.3980019986629486, "learning_rate": 0.00017067226890756301, "loss": 0.4458, "step": 29721 }, { "epoch": 16.604469273743018, "grad_norm": 3.4253838062286377, "learning_rate": 0.00017064425770308122, "loss": 0.4251, "step": 29722 }, { "epoch": 16.605027932960894, "grad_norm": 0.35719001293182373, "learning_rate": 0.00017061624649859945, "loss": 0.37, "step": 29723 }, { "epoch": 16.60558659217877, "grad_norm": 0.5619191527366638, "learning_rate": 0.00017058823529411766, "loss": 0.352, "step": 29724 }, { "epoch": 16.606145251396647, "grad_norm": 0.34911277890205383, "learning_rate": 0.00017056022408963587, "loss": 0.3919, "step": 29725 }, { "epoch": 16.606703910614524, "grad_norm": 0.5471906661987305, "learning_rate": 0.00017053221288515404, "loss": 0.5871, "step": 29726 }, { "epoch": 16.607262569832404, "grad_norm": 0.4728212058544159, "learning_rate": 0.00017050420168067228, "loss": 0.3922, "step": 29727 }, { "epoch": 16.60782122905028, "grad_norm": 0.344344824552536, "learning_rate": 0.00017047619047619048, "loss": 0.3806, "step": 29728 }, { "epoch": 16.608379888268157, "grad_norm": 0.9261954426765442, "learning_rate": 0.0001704481792717087, "loss": 0.4156, "step": 29729 }, { "epoch": 16.608938547486034, "grad_norm": 0.3845740556716919, "learning_rate": 0.0001704201680672269, "loss": 0.3719, "step": 29730 }, { "epoch": 16.60949720670391, "grad_norm": 0.3786081373691559, "learning_rate": 0.0001703921568627451, "loss": 0.4619, "step": 29731 }, { "epoch": 16.610055865921787, "grad_norm": 9.380817413330078, "learning_rate": 0.0001703641456582633, "loss": 0.4, "step": 29732 }, { "epoch": 16.610614525139663, "grad_norm": 1.563375473022461, "learning_rate": 0.0001703361344537815, "loss": 0.5395, "step": 29733 }, { "epoch": 16.611173184357543, "grad_norm": 0.7948753237724304, "learning_rate": 0.00017030812324929972, "loss": 0.3641, "step": 29734 }, { "epoch": 16.61173184357542, "grad_norm": 0.8591349124908447, "learning_rate": 0.00017028011204481795, "loss": 0.4049, "step": 29735 }, { "epoch": 16.612290502793297, "grad_norm": 0.8327168822288513, "learning_rate": 0.00017025210084033613, "loss": 0.5116, "step": 29736 }, { "epoch": 16.612849162011173, "grad_norm": 0.4438212215900421, "learning_rate": 0.00017022408963585434, "loss": 0.3541, "step": 29737 }, { "epoch": 16.61340782122905, "grad_norm": 0.5014461278915405, "learning_rate": 0.00017019607843137254, "loss": 0.3421, "step": 29738 }, { "epoch": 16.613966480446926, "grad_norm": 1.445834994316101, "learning_rate": 0.00017016806722689078, "loss": 0.3864, "step": 29739 }, { "epoch": 16.614525139664803, "grad_norm": 0.4010203778743744, "learning_rate": 0.00017014005602240898, "loss": 0.4058, "step": 29740 }, { "epoch": 16.615083798882683, "grad_norm": 1.4026222229003906, "learning_rate": 0.00017011204481792716, "loss": 0.3821, "step": 29741 }, { "epoch": 16.61564245810056, "grad_norm": 1.3032084703445435, "learning_rate": 0.00017008403361344537, "loss": 0.5126, "step": 29742 }, { "epoch": 16.616201117318436, "grad_norm": 0.4361550509929657, "learning_rate": 0.0001700560224089636, "loss": 0.4642, "step": 29743 }, { "epoch": 16.616759776536313, "grad_norm": 0.4836312234401703, "learning_rate": 0.0001700280112044818, "loss": 0.3256, "step": 29744 }, { "epoch": 16.61731843575419, "grad_norm": 0.4750947654247284, "learning_rate": 0.00017, "loss": 0.4166, "step": 29745 }, { "epoch": 16.617877094972066, "grad_norm": 1.03825044631958, "learning_rate": 0.0001699719887955182, "loss": 0.4849, "step": 29746 }, { "epoch": 16.618435754189946, "grad_norm": 0.4988580346107483, "learning_rate": 0.00016994397759103642, "loss": 0.4587, "step": 29747 }, { "epoch": 16.618994413407822, "grad_norm": 0.8459121584892273, "learning_rate": 0.00016991596638655463, "loss": 0.4798, "step": 29748 }, { "epoch": 16.6195530726257, "grad_norm": 0.3887941837310791, "learning_rate": 0.00016988795518207284, "loss": 0.3233, "step": 29749 }, { "epoch": 16.620111731843576, "grad_norm": 0.6093942523002625, "learning_rate": 0.00016985994397759104, "loss": 0.3486, "step": 29750 }, { "epoch": 16.620670391061452, "grad_norm": 0.3763768970966339, "learning_rate": 0.00016983193277310925, "loss": 0.4905, "step": 29751 }, { "epoch": 16.62122905027933, "grad_norm": 0.47057199478149414, "learning_rate": 0.00016980392156862745, "loss": 0.4818, "step": 29752 }, { "epoch": 16.621787709497205, "grad_norm": 0.46999314427375793, "learning_rate": 0.00016977591036414566, "loss": 0.3309, "step": 29753 }, { "epoch": 16.622346368715085, "grad_norm": 0.4252774715423584, "learning_rate": 0.00016974789915966387, "loss": 0.3791, "step": 29754 }, { "epoch": 16.622905027932962, "grad_norm": 0.33144935965538025, "learning_rate": 0.0001697198879551821, "loss": 0.3416, "step": 29755 }, { "epoch": 16.62346368715084, "grad_norm": 1.0748701095581055, "learning_rate": 0.00016969187675070028, "loss": 0.4465, "step": 29756 }, { "epoch": 16.624022346368715, "grad_norm": 0.46357840299606323, "learning_rate": 0.00016966386554621848, "loss": 0.5094, "step": 29757 }, { "epoch": 16.62458100558659, "grad_norm": 1.2137809991836548, "learning_rate": 0.0001696358543417367, "loss": 0.4933, "step": 29758 }, { "epoch": 16.62513966480447, "grad_norm": 0.48791569471359253, "learning_rate": 0.00016960784313725492, "loss": 0.4891, "step": 29759 }, { "epoch": 16.625698324022345, "grad_norm": 0.6240214705467224, "learning_rate": 0.00016957983193277313, "loss": 0.462, "step": 29760 }, { "epoch": 16.626256983240225, "grad_norm": 0.4574168920516968, "learning_rate": 0.0001695518207282913, "loss": 0.4368, "step": 29761 }, { "epoch": 16.6268156424581, "grad_norm": 2.8725969791412354, "learning_rate": 0.00016952380952380951, "loss": 0.3746, "step": 29762 }, { "epoch": 16.627374301675978, "grad_norm": 0.3899432420730591, "learning_rate": 0.00016949579831932775, "loss": 0.3437, "step": 29763 }, { "epoch": 16.627932960893855, "grad_norm": 0.48070505261421204, "learning_rate": 0.00016946778711484595, "loss": 0.4328, "step": 29764 }, { "epoch": 16.62849162011173, "grad_norm": 0.9653042554855347, "learning_rate": 0.00016943977591036413, "loss": 0.4131, "step": 29765 }, { "epoch": 16.629050279329608, "grad_norm": 0.5093980431556702, "learning_rate": 0.00016941176470588234, "loss": 0.3805, "step": 29766 }, { "epoch": 16.629608938547484, "grad_norm": 0.5078274011611938, "learning_rate": 0.00016938375350140057, "loss": 0.5362, "step": 29767 }, { "epoch": 16.630167597765364, "grad_norm": 0.3415151834487915, "learning_rate": 0.00016935574229691878, "loss": 0.3357, "step": 29768 }, { "epoch": 16.63072625698324, "grad_norm": 1.2921910285949707, "learning_rate": 0.00016932773109243698, "loss": 0.4118, "step": 29769 }, { "epoch": 16.631284916201118, "grad_norm": 0.8225568532943726, "learning_rate": 0.00016929971988795516, "loss": 0.6127, "step": 29770 }, { "epoch": 16.631843575418994, "grad_norm": 0.41550955176353455, "learning_rate": 0.0001692717086834734, "loss": 0.4673, "step": 29771 }, { "epoch": 16.63240223463687, "grad_norm": 0.5405202507972717, "learning_rate": 0.0001692436974789916, "loss": 0.3966, "step": 29772 }, { "epoch": 16.632960893854747, "grad_norm": 0.2869426906108856, "learning_rate": 0.0001692156862745098, "loss": 0.3843, "step": 29773 }, { "epoch": 16.633519553072627, "grad_norm": 0.6287881135940552, "learning_rate": 0.000169187675070028, "loss": 0.4232, "step": 29774 }, { "epoch": 16.634078212290504, "grad_norm": 0.6040198802947998, "learning_rate": 0.00016915966386554622, "loss": 0.3733, "step": 29775 }, { "epoch": 16.63463687150838, "grad_norm": 0.47310498356819153, "learning_rate": 0.00016913165266106443, "loss": 0.5953, "step": 29776 }, { "epoch": 16.635195530726257, "grad_norm": 0.2993278205394745, "learning_rate": 0.00016910364145658263, "loss": 0.3394, "step": 29777 }, { "epoch": 16.635754189944134, "grad_norm": 0.4342913031578064, "learning_rate": 0.00016907563025210084, "loss": 0.3745, "step": 29778 }, { "epoch": 16.63631284916201, "grad_norm": 0.4923255145549774, "learning_rate": 0.00016904761904761907, "loss": 0.5028, "step": 29779 }, { "epoch": 16.636871508379887, "grad_norm": 0.4501801133155823, "learning_rate": 0.00016901960784313725, "loss": 0.3889, "step": 29780 }, { "epoch": 16.637430167597767, "grad_norm": 0.31149882078170776, "learning_rate": 0.00016899159663865546, "loss": 0.3579, "step": 29781 }, { "epoch": 16.637988826815644, "grad_norm": 1.1276620626449585, "learning_rate": 0.00016896358543417366, "loss": 0.3462, "step": 29782 }, { "epoch": 16.63854748603352, "grad_norm": 0.6334343552589417, "learning_rate": 0.0001689355742296919, "loss": 0.4647, "step": 29783 }, { "epoch": 16.639106145251397, "grad_norm": 0.36434948444366455, "learning_rate": 0.0001689075630252101, "loss": 0.3855, "step": 29784 }, { "epoch": 16.639664804469273, "grad_norm": 2.5770599842071533, "learning_rate": 0.00016887955182072828, "loss": 0.3896, "step": 29785 }, { "epoch": 16.64022346368715, "grad_norm": 0.4854937195777893, "learning_rate": 0.00016885154061624649, "loss": 0.4692, "step": 29786 }, { "epoch": 16.640782122905026, "grad_norm": 0.4809033274650574, "learning_rate": 0.00016882352941176472, "loss": 0.4189, "step": 29787 }, { "epoch": 16.641340782122906, "grad_norm": 0.5006118416786194, "learning_rate": 0.00016879551820728292, "loss": 0.4356, "step": 29788 }, { "epoch": 16.641899441340783, "grad_norm": 0.5263915657997131, "learning_rate": 0.00016876750700280113, "loss": 0.3806, "step": 29789 }, { "epoch": 16.64245810055866, "grad_norm": 0.6017934679985046, "learning_rate": 0.0001687394957983193, "loss": 0.2537, "step": 29790 }, { "epoch": 16.643016759776536, "grad_norm": 0.6212185621261597, "learning_rate": 0.00016871148459383754, "loss": 0.5214, "step": 29791 }, { "epoch": 16.643575418994413, "grad_norm": 0.4498072862625122, "learning_rate": 0.00016868347338935575, "loss": 0.3417, "step": 29792 }, { "epoch": 16.64413407821229, "grad_norm": 0.418268084526062, "learning_rate": 0.00016865546218487395, "loss": 0.4021, "step": 29793 }, { "epoch": 16.64469273743017, "grad_norm": 0.3742298483848572, "learning_rate": 0.00016862745098039216, "loss": 0.3386, "step": 29794 }, { "epoch": 16.645251396648046, "grad_norm": 0.4824741780757904, "learning_rate": 0.00016859943977591037, "loss": 0.3532, "step": 29795 }, { "epoch": 16.645810055865923, "grad_norm": 0.3796785771846771, "learning_rate": 0.00016857142857142857, "loss": 0.3778, "step": 29796 }, { "epoch": 16.6463687150838, "grad_norm": 0.3775938153266907, "learning_rate": 0.00016854341736694678, "loss": 0.4137, "step": 29797 }, { "epoch": 16.646927374301676, "grad_norm": 0.42650899291038513, "learning_rate": 0.00016851540616246498, "loss": 0.3709, "step": 29798 }, { "epoch": 16.647486033519552, "grad_norm": 0.4216354787349701, "learning_rate": 0.00016848739495798322, "loss": 0.3708, "step": 29799 }, { "epoch": 16.64804469273743, "grad_norm": 0.2871125042438507, "learning_rate": 0.0001684593837535014, "loss": 0.3613, "step": 29800 }, { "epoch": 16.64860335195531, "grad_norm": 0.393095999956131, "learning_rate": 0.0001684313725490196, "loss": 0.3516, "step": 29801 }, { "epoch": 16.649162011173186, "grad_norm": 0.3895621597766876, "learning_rate": 0.0001684033613445378, "loss": 0.4114, "step": 29802 }, { "epoch": 16.649720670391062, "grad_norm": 0.35037121176719666, "learning_rate": 0.00016837535014005604, "loss": 0.4198, "step": 29803 }, { "epoch": 16.65027932960894, "grad_norm": 0.5421853065490723, "learning_rate": 0.00016834733893557425, "loss": 0.5941, "step": 29804 }, { "epoch": 16.650837988826815, "grad_norm": 0.845901370048523, "learning_rate": 0.00016831932773109243, "loss": 0.4613, "step": 29805 }, { "epoch": 16.65139664804469, "grad_norm": 0.485249787569046, "learning_rate": 0.00016829131652661063, "loss": 0.3706, "step": 29806 }, { "epoch": 16.65195530726257, "grad_norm": 0.45605310797691345, "learning_rate": 0.00016826330532212887, "loss": 0.3898, "step": 29807 }, { "epoch": 16.65251396648045, "grad_norm": 0.4363083839416504, "learning_rate": 0.00016823529411764707, "loss": 0.3734, "step": 29808 }, { "epoch": 16.653072625698325, "grad_norm": 0.3861304223537445, "learning_rate": 0.00016820728291316528, "loss": 0.3746, "step": 29809 }, { "epoch": 16.6536312849162, "grad_norm": 0.939969003200531, "learning_rate": 0.00016817927170868346, "loss": 0.332, "step": 29810 }, { "epoch": 16.654189944134078, "grad_norm": 0.42532482743263245, "learning_rate": 0.0001681512605042017, "loss": 0.3903, "step": 29811 }, { "epoch": 16.654748603351955, "grad_norm": 0.3521217703819275, "learning_rate": 0.0001681232492997199, "loss": 0.3263, "step": 29812 }, { "epoch": 16.65530726256983, "grad_norm": 3.500936508178711, "learning_rate": 0.0001680952380952381, "loss": 0.4264, "step": 29813 }, { "epoch": 16.655865921787708, "grad_norm": 0.46213197708129883, "learning_rate": 0.0001680672268907563, "loss": 0.4305, "step": 29814 }, { "epoch": 16.656424581005588, "grad_norm": 0.6043916344642639, "learning_rate": 0.0001680392156862745, "loss": 0.3263, "step": 29815 }, { "epoch": 16.656983240223465, "grad_norm": 0.3290908634662628, "learning_rate": 0.00016801120448179272, "loss": 0.2953, "step": 29816 }, { "epoch": 16.65754189944134, "grad_norm": 0.45729339122772217, "learning_rate": 0.00016798319327731093, "loss": 0.483, "step": 29817 }, { "epoch": 16.658100558659218, "grad_norm": 0.4936226010322571, "learning_rate": 0.00016795518207282913, "loss": 0.4003, "step": 29818 }, { "epoch": 16.658659217877094, "grad_norm": 0.5775694847106934, "learning_rate": 0.00016792717086834734, "loss": 0.3776, "step": 29819 }, { "epoch": 16.65921787709497, "grad_norm": 0.38737642765045166, "learning_rate": 0.00016789915966386554, "loss": 0.3505, "step": 29820 }, { "epoch": 16.659776536312847, "grad_norm": 0.4338393807411194, "learning_rate": 0.00016787114845938375, "loss": 0.391, "step": 29821 }, { "epoch": 16.660335195530728, "grad_norm": 0.39069968461990356, "learning_rate": 0.00016784313725490196, "loss": 0.3365, "step": 29822 }, { "epoch": 16.660893854748604, "grad_norm": 0.5240722298622131, "learning_rate": 0.0001678151260504202, "loss": 0.4258, "step": 29823 }, { "epoch": 16.66145251396648, "grad_norm": 2.5170700550079346, "learning_rate": 0.00016778711484593837, "loss": 0.3693, "step": 29824 }, { "epoch": 16.662011173184357, "grad_norm": 0.520591139793396, "learning_rate": 0.00016775910364145657, "loss": 0.4493, "step": 29825 }, { "epoch": 16.662569832402234, "grad_norm": 0.37775611877441406, "learning_rate": 0.00016773109243697478, "loss": 0.4546, "step": 29826 }, { "epoch": 16.66312849162011, "grad_norm": 0.9459759593009949, "learning_rate": 0.000167703081232493, "loss": 0.4518, "step": 29827 }, { "epoch": 16.66368715083799, "grad_norm": 0.3614331781864166, "learning_rate": 0.00016767507002801122, "loss": 0.3635, "step": 29828 }, { "epoch": 16.664245810055867, "grad_norm": 0.4394908547401428, "learning_rate": 0.0001676470588235294, "loss": 0.262, "step": 29829 }, { "epoch": 16.664804469273744, "grad_norm": 4.644527912139893, "learning_rate": 0.0001676190476190476, "loss": 0.4004, "step": 29830 }, { "epoch": 16.66536312849162, "grad_norm": 0.4521452784538269, "learning_rate": 0.00016759103641456584, "loss": 0.4257, "step": 29831 }, { "epoch": 16.665921787709497, "grad_norm": 0.4307730197906494, "learning_rate": 0.00016756302521008404, "loss": 0.3873, "step": 29832 }, { "epoch": 16.666480446927373, "grad_norm": 0.4104289412498474, "learning_rate": 0.00016753501400560225, "loss": 0.3279, "step": 29833 }, { "epoch": 16.66703910614525, "grad_norm": 1.1417956352233887, "learning_rate": 0.00016750700280112043, "loss": 0.3868, "step": 29834 }, { "epoch": 16.66759776536313, "grad_norm": 0.3976530134677887, "learning_rate": 0.00016747899159663866, "loss": 0.3356, "step": 29835 }, { "epoch": 16.668156424581007, "grad_norm": 4.729708671569824, "learning_rate": 0.00016745098039215687, "loss": 0.4438, "step": 29836 }, { "epoch": 16.668715083798883, "grad_norm": 0.4817903935909271, "learning_rate": 0.00016742296918767507, "loss": 0.3501, "step": 29837 }, { "epoch": 16.66927374301676, "grad_norm": 0.38200893998146057, "learning_rate": 0.00016739495798319328, "loss": 0.4531, "step": 29838 }, { "epoch": 16.669832402234636, "grad_norm": 0.48971059918403625, "learning_rate": 0.00016736694677871148, "loss": 0.3622, "step": 29839 }, { "epoch": 16.670391061452513, "grad_norm": 0.9083506464958191, "learning_rate": 0.0001673389355742297, "loss": 0.4264, "step": 29840 }, { "epoch": 16.67094972067039, "grad_norm": 0.3592814803123474, "learning_rate": 0.0001673109243697479, "loss": 0.3323, "step": 29841 }, { "epoch": 16.67150837988827, "grad_norm": 1.5707166194915771, "learning_rate": 0.0001672829131652661, "loss": 0.4032, "step": 29842 }, { "epoch": 16.672067039106146, "grad_norm": 0.3624243438243866, "learning_rate": 0.00016725490196078434, "loss": 0.4164, "step": 29843 }, { "epoch": 16.672625698324023, "grad_norm": 0.5553063750267029, "learning_rate": 0.00016722689075630251, "loss": 0.3849, "step": 29844 }, { "epoch": 16.6731843575419, "grad_norm": 0.3137574791908264, "learning_rate": 0.00016719887955182072, "loss": 0.3201, "step": 29845 }, { "epoch": 16.673743016759776, "grad_norm": 0.39533865451812744, "learning_rate": 0.00016717086834733893, "loss": 0.3956, "step": 29846 }, { "epoch": 16.674301675977652, "grad_norm": 1.0763453245162964, "learning_rate": 0.00016714285714285716, "loss": 0.4178, "step": 29847 }, { "epoch": 16.674860335195532, "grad_norm": 0.4174930453300476, "learning_rate": 0.00016711484593837537, "loss": 0.4033, "step": 29848 }, { "epoch": 16.67541899441341, "grad_norm": 3.591134548187256, "learning_rate": 0.00016708683473389354, "loss": 0.4537, "step": 29849 }, { "epoch": 16.675977653631286, "grad_norm": 0.5874401926994324, "learning_rate": 0.00016705882352941175, "loss": 0.4534, "step": 29850 }, { "epoch": 16.676536312849162, "grad_norm": 0.5107418894767761, "learning_rate": 0.00016703081232492998, "loss": 0.4688, "step": 29851 }, { "epoch": 16.67709497206704, "grad_norm": 0.3686980903148651, "learning_rate": 0.0001670028011204482, "loss": 0.2746, "step": 29852 }, { "epoch": 16.677653631284915, "grad_norm": 0.3866333067417145, "learning_rate": 0.0001669747899159664, "loss": 0.3548, "step": 29853 }, { "epoch": 16.678212290502792, "grad_norm": 0.42212390899658203, "learning_rate": 0.00016694677871148457, "loss": 0.4262, "step": 29854 }, { "epoch": 16.678770949720672, "grad_norm": 13.590853691101074, "learning_rate": 0.0001669187675070028, "loss": 0.426, "step": 29855 }, { "epoch": 16.67932960893855, "grad_norm": 0.5182271003723145, "learning_rate": 0.000166890756302521, "loss": 0.3961, "step": 29856 }, { "epoch": 16.679888268156425, "grad_norm": 0.4262494444847107, "learning_rate": 0.00016686274509803922, "loss": 0.3991, "step": 29857 }, { "epoch": 16.6804469273743, "grad_norm": 0.3989584743976593, "learning_rate": 0.00016683473389355745, "loss": 0.2982, "step": 29858 }, { "epoch": 16.68100558659218, "grad_norm": 0.48111674189567566, "learning_rate": 0.00016680672268907563, "loss": 0.4127, "step": 29859 }, { "epoch": 16.681564245810055, "grad_norm": 0.7447097301483154, "learning_rate": 0.00016677871148459384, "loss": 0.373, "step": 29860 }, { "epoch": 16.68212290502793, "grad_norm": 0.48700830340385437, "learning_rate": 0.00016675070028011204, "loss": 0.3883, "step": 29861 }, { "epoch": 16.68268156424581, "grad_norm": 0.40347445011138916, "learning_rate": 0.00016672268907563028, "loss": 0.4454, "step": 29862 }, { "epoch": 16.683240223463688, "grad_norm": 0.5050028562545776, "learning_rate": 0.00016669467787114848, "loss": 0.4196, "step": 29863 }, { "epoch": 16.683798882681565, "grad_norm": 0.4193721115589142, "learning_rate": 0.00016666666666666666, "loss": 0.3724, "step": 29864 }, { "epoch": 16.68435754189944, "grad_norm": 0.37018245458602905, "learning_rate": 0.00016663865546218487, "loss": 0.3552, "step": 29865 }, { "epoch": 16.684916201117318, "grad_norm": 0.407537043094635, "learning_rate": 0.0001666106442577031, "loss": 0.4217, "step": 29866 }, { "epoch": 16.685474860335194, "grad_norm": 7.895242691040039, "learning_rate": 0.0001665826330532213, "loss": 0.3836, "step": 29867 }, { "epoch": 16.68603351955307, "grad_norm": 0.5751171112060547, "learning_rate": 0.0001665546218487395, "loss": 0.3033, "step": 29868 }, { "epoch": 16.68659217877095, "grad_norm": 0.5942823886871338, "learning_rate": 0.0001665266106442577, "loss": 0.4354, "step": 29869 }, { "epoch": 16.687150837988828, "grad_norm": 0.30008044838905334, "learning_rate": 0.00016649859943977592, "loss": 0.3086, "step": 29870 }, { "epoch": 16.687709497206704, "grad_norm": 0.34768733382225037, "learning_rate": 0.00016647058823529413, "loss": 0.3689, "step": 29871 }, { "epoch": 16.68826815642458, "grad_norm": 0.45363518595695496, "learning_rate": 0.00016644257703081234, "loss": 0.3591, "step": 29872 }, { "epoch": 16.688826815642457, "grad_norm": 0.4299505054950714, "learning_rate": 0.00016641456582633054, "loss": 0.4411, "step": 29873 }, { "epoch": 16.689385474860334, "grad_norm": 1.0740561485290527, "learning_rate": 0.00016638655462184875, "loss": 0.4172, "step": 29874 }, { "epoch": 16.689944134078214, "grad_norm": 0.6301817893981934, "learning_rate": 0.00016635854341736695, "loss": 0.4125, "step": 29875 }, { "epoch": 16.69050279329609, "grad_norm": 0.6941254138946533, "learning_rate": 0.00016633053221288516, "loss": 0.5804, "step": 29876 }, { "epoch": 16.691061452513967, "grad_norm": 0.46025583148002625, "learning_rate": 0.00016630252100840337, "loss": 0.4331, "step": 29877 }, { "epoch": 16.691620111731844, "grad_norm": 0.6945750117301941, "learning_rate": 0.00016627450980392157, "loss": 0.4416, "step": 29878 }, { "epoch": 16.69217877094972, "grad_norm": 0.375480055809021, "learning_rate": 0.00016624649859943978, "loss": 0.3182, "step": 29879 }, { "epoch": 16.692737430167597, "grad_norm": 0.5400424003601074, "learning_rate": 0.00016621848739495798, "loss": 0.3523, "step": 29880 }, { "epoch": 16.693296089385473, "grad_norm": 0.4541155993938446, "learning_rate": 0.0001661904761904762, "loss": 0.4909, "step": 29881 }, { "epoch": 16.693854748603353, "grad_norm": 0.27958911657333374, "learning_rate": 0.00016616246498599442, "loss": 0.2921, "step": 29882 }, { "epoch": 16.69441340782123, "grad_norm": 1.3257254362106323, "learning_rate": 0.0001661344537815126, "loss": 0.4412, "step": 29883 }, { "epoch": 16.694972067039107, "grad_norm": 0.45688945055007935, "learning_rate": 0.0001661064425770308, "loss": 0.4096, "step": 29884 }, { "epoch": 16.695530726256983, "grad_norm": 0.9085695147514343, "learning_rate": 0.00016607843137254901, "loss": 0.3687, "step": 29885 }, { "epoch": 16.69608938547486, "grad_norm": 0.5619651079177856, "learning_rate": 0.00016605042016806725, "loss": 0.3901, "step": 29886 }, { "epoch": 16.696648044692736, "grad_norm": 0.5347960591316223, "learning_rate": 0.00016602240896358545, "loss": 0.4193, "step": 29887 }, { "epoch": 16.697206703910613, "grad_norm": 1.0598918199539185, "learning_rate": 0.00016599439775910363, "loss": 0.5168, "step": 29888 }, { "epoch": 16.697765363128493, "grad_norm": 0.4130520522594452, "learning_rate": 0.00016596638655462184, "loss": 0.4165, "step": 29889 }, { "epoch": 16.69832402234637, "grad_norm": 0.3421899974346161, "learning_rate": 0.00016593837535014007, "loss": 0.3401, "step": 29890 }, { "epoch": 16.698882681564246, "grad_norm": 0.3486146032810211, "learning_rate": 0.00016591036414565828, "loss": 0.3371, "step": 29891 }, { "epoch": 16.699441340782123, "grad_norm": 0.43327903747558594, "learning_rate": 0.00016588235294117648, "loss": 0.4299, "step": 29892 }, { "epoch": 16.7, "grad_norm": 0.6202404499053955, "learning_rate": 0.00016585434173669466, "loss": 0.6182, "step": 29893 }, { "epoch": 16.700558659217876, "grad_norm": 0.42607107758522034, "learning_rate": 0.0001658263305322129, "loss": 0.4443, "step": 29894 }, { "epoch": 16.701117318435756, "grad_norm": 0.40272432565689087, "learning_rate": 0.0001657983193277311, "loss": 0.4303, "step": 29895 }, { "epoch": 16.701675977653633, "grad_norm": 0.47172215580940247, "learning_rate": 0.0001657703081232493, "loss": 0.3609, "step": 29896 }, { "epoch": 16.70223463687151, "grad_norm": 0.38696345686912537, "learning_rate": 0.0001657422969187675, "loss": 0.3198, "step": 29897 }, { "epoch": 16.702793296089386, "grad_norm": 1.5591745376586914, "learning_rate": 0.00016571428571428572, "loss": 0.3752, "step": 29898 }, { "epoch": 16.703351955307262, "grad_norm": 0.9605649709701538, "learning_rate": 0.00016568627450980393, "loss": 0.4239, "step": 29899 }, { "epoch": 16.70391061452514, "grad_norm": 0.44754770398139954, "learning_rate": 0.00016565826330532213, "loss": 0.4025, "step": 29900 }, { "epoch": 16.704469273743015, "grad_norm": 0.4150021970272064, "learning_rate": 0.00016563025210084034, "loss": 0.4145, "step": 29901 }, { "epoch": 16.705027932960895, "grad_norm": 6.039492607116699, "learning_rate": 0.00016560224089635857, "loss": 0.3618, "step": 29902 }, { "epoch": 16.705586592178772, "grad_norm": 1.223336100578308, "learning_rate": 0.00016557422969187675, "loss": 0.5, "step": 29903 }, { "epoch": 16.70614525139665, "grad_norm": 4.616772174835205, "learning_rate": 0.00016554621848739496, "loss": 0.3983, "step": 29904 }, { "epoch": 16.706703910614525, "grad_norm": 0.8068564534187317, "learning_rate": 0.00016551820728291316, "loss": 0.3283, "step": 29905 }, { "epoch": 16.7072625698324, "grad_norm": 0.38786745071411133, "learning_rate": 0.0001654901960784314, "loss": 0.4067, "step": 29906 }, { "epoch": 16.70782122905028, "grad_norm": 1.4356131553649902, "learning_rate": 0.0001654621848739496, "loss": 0.3633, "step": 29907 }, { "epoch": 16.708379888268155, "grad_norm": 0.4727135896682739, "learning_rate": 0.00016543417366946778, "loss": 0.47, "step": 29908 }, { "epoch": 16.708938547486035, "grad_norm": 0.402080237865448, "learning_rate": 0.00016540616246498599, "loss": 0.3399, "step": 29909 }, { "epoch": 16.70949720670391, "grad_norm": 0.5463772416114807, "learning_rate": 0.00016537815126050422, "loss": 0.3137, "step": 29910 }, { "epoch": 16.710055865921788, "grad_norm": 0.4279260039329529, "learning_rate": 0.00016535014005602242, "loss": 0.4554, "step": 29911 }, { "epoch": 16.710614525139665, "grad_norm": 0.44774922728538513, "learning_rate": 0.00016532212885154063, "loss": 0.4614, "step": 29912 }, { "epoch": 16.71117318435754, "grad_norm": 0.3404584228992462, "learning_rate": 0.0001652941176470588, "loss": 0.3071, "step": 29913 }, { "epoch": 16.711731843575418, "grad_norm": 0.4860134720802307, "learning_rate": 0.00016526610644257704, "loss": 0.3868, "step": 29914 }, { "epoch": 16.712290502793294, "grad_norm": 0.6186960935592651, "learning_rate": 0.00016523809523809525, "loss": 0.4002, "step": 29915 }, { "epoch": 16.712849162011175, "grad_norm": 0.486005574464798, "learning_rate": 0.00016521008403361345, "loss": 0.4736, "step": 29916 }, { "epoch": 16.71340782122905, "grad_norm": 0.9943147301673889, "learning_rate": 0.00016518207282913166, "loss": 0.4369, "step": 29917 }, { "epoch": 16.713966480446928, "grad_norm": 6.732193946838379, "learning_rate": 0.00016515406162464987, "loss": 0.3906, "step": 29918 }, { "epoch": 16.714525139664804, "grad_norm": 0.5688100457191467, "learning_rate": 0.00016512605042016807, "loss": 0.4562, "step": 29919 }, { "epoch": 16.71508379888268, "grad_norm": 0.6026413440704346, "learning_rate": 0.00016509803921568628, "loss": 0.4667, "step": 29920 }, { "epoch": 16.715642458100557, "grad_norm": 0.8118782043457031, "learning_rate": 0.00016507002801120448, "loss": 0.3906, "step": 29921 }, { "epoch": 16.716201117318434, "grad_norm": 1.3486980199813843, "learning_rate": 0.00016504201680672272, "loss": 0.4707, "step": 29922 }, { "epoch": 16.716759776536314, "grad_norm": 0.6077152490615845, "learning_rate": 0.0001650140056022409, "loss": 0.4093, "step": 29923 }, { "epoch": 16.71731843575419, "grad_norm": 0.8175409436225891, "learning_rate": 0.0001649859943977591, "loss": 0.4454, "step": 29924 }, { "epoch": 16.717877094972067, "grad_norm": 0.39631110429763794, "learning_rate": 0.0001649579831932773, "loss": 0.2738, "step": 29925 }, { "epoch": 16.718435754189944, "grad_norm": 1.8991374969482422, "learning_rate": 0.00016492997198879554, "loss": 0.3533, "step": 29926 }, { "epoch": 16.71899441340782, "grad_norm": 0.4475594460964203, "learning_rate": 0.00016490196078431375, "loss": 0.3457, "step": 29927 }, { "epoch": 16.719553072625697, "grad_norm": 0.7632883787155151, "learning_rate": 0.00016487394957983193, "loss": 0.396, "step": 29928 }, { "epoch": 16.720111731843577, "grad_norm": 0.7284535765647888, "learning_rate": 0.00016484593837535013, "loss": 0.5019, "step": 29929 }, { "epoch": 16.720670391061454, "grad_norm": 0.5170512795448303, "learning_rate": 0.00016481792717086837, "loss": 0.4317, "step": 29930 }, { "epoch": 16.72122905027933, "grad_norm": 0.7504491806030273, "learning_rate": 0.00016478991596638657, "loss": 0.4665, "step": 29931 }, { "epoch": 16.721787709497207, "grad_norm": 0.31554993987083435, "learning_rate": 0.00016476190476190475, "loss": 0.3206, "step": 29932 }, { "epoch": 16.722346368715083, "grad_norm": 0.4275408685207367, "learning_rate": 0.00016473389355742296, "loss": 0.424, "step": 29933 }, { "epoch": 16.72290502793296, "grad_norm": 0.40644797682762146, "learning_rate": 0.0001647058823529412, "loss": 0.4387, "step": 29934 }, { "epoch": 16.723463687150836, "grad_norm": 0.5162314772605896, "learning_rate": 0.0001646778711484594, "loss": 0.3359, "step": 29935 }, { "epoch": 16.724022346368717, "grad_norm": 0.3356877267360687, "learning_rate": 0.0001646498599439776, "loss": 0.2943, "step": 29936 }, { "epoch": 16.724581005586593, "grad_norm": 0.3958683907985687, "learning_rate": 0.00016462184873949578, "loss": 0.4764, "step": 29937 }, { "epoch": 16.72513966480447, "grad_norm": 0.3934794068336487, "learning_rate": 0.000164593837535014, "loss": 0.4396, "step": 29938 }, { "epoch": 16.725698324022346, "grad_norm": 0.43442875146865845, "learning_rate": 0.00016456582633053222, "loss": 0.4255, "step": 29939 }, { "epoch": 16.726256983240223, "grad_norm": 0.3928596079349518, "learning_rate": 0.00016453781512605043, "loss": 0.4206, "step": 29940 }, { "epoch": 16.7268156424581, "grad_norm": 0.6808320879936218, "learning_rate": 0.00016450980392156863, "loss": 0.4186, "step": 29941 }, { "epoch": 16.727374301675976, "grad_norm": 0.4956154227256775, "learning_rate": 0.00016448179271708684, "loss": 0.3829, "step": 29942 }, { "epoch": 16.727932960893856, "grad_norm": 0.577748715877533, "learning_rate": 0.00016445378151260504, "loss": 0.497, "step": 29943 }, { "epoch": 16.728491620111733, "grad_norm": 0.38291576504707336, "learning_rate": 0.00016442577030812325, "loss": 0.3831, "step": 29944 }, { "epoch": 16.72905027932961, "grad_norm": 0.6551175713539124, "learning_rate": 0.00016439775910364146, "loss": 0.4421, "step": 29945 }, { "epoch": 16.729608938547486, "grad_norm": 0.4217638075351715, "learning_rate": 0.0001643697478991597, "loss": 0.3422, "step": 29946 }, { "epoch": 16.730167597765362, "grad_norm": 0.39045143127441406, "learning_rate": 0.00016434173669467787, "loss": 0.3302, "step": 29947 }, { "epoch": 16.73072625698324, "grad_norm": 1.9984890222549438, "learning_rate": 0.00016431372549019607, "loss": 0.4164, "step": 29948 }, { "epoch": 16.73128491620112, "grad_norm": 0.5829303860664368, "learning_rate": 0.00016428571428571428, "loss": 0.3996, "step": 29949 }, { "epoch": 16.731843575418996, "grad_norm": 0.6386532783508301, "learning_rate": 0.0001642577030812325, "loss": 0.4314, "step": 29950 }, { "epoch": 16.732402234636872, "grad_norm": 0.790768027305603, "learning_rate": 0.00016422969187675072, "loss": 0.4433, "step": 29951 }, { "epoch": 16.73296089385475, "grad_norm": 0.5277051329612732, "learning_rate": 0.0001642016806722689, "loss": 0.4143, "step": 29952 }, { "epoch": 16.733519553072625, "grad_norm": 0.4073973596096039, "learning_rate": 0.0001641736694677871, "loss": 0.412, "step": 29953 }, { "epoch": 16.734078212290502, "grad_norm": 0.4140506684780121, "learning_rate": 0.00016414565826330534, "loss": 0.3921, "step": 29954 }, { "epoch": 16.73463687150838, "grad_norm": 0.4317987263202667, "learning_rate": 0.00016411764705882354, "loss": 0.3557, "step": 29955 }, { "epoch": 16.73519553072626, "grad_norm": 0.5608370304107666, "learning_rate": 0.00016408963585434175, "loss": 0.4136, "step": 29956 }, { "epoch": 16.735754189944135, "grad_norm": 1.1222494840621948, "learning_rate": 0.00016406162464985993, "loss": 0.4794, "step": 29957 }, { "epoch": 16.73631284916201, "grad_norm": 0.3921917676925659, "learning_rate": 0.00016403361344537816, "loss": 0.4389, "step": 29958 }, { "epoch": 16.73687150837989, "grad_norm": 0.40632399916648865, "learning_rate": 0.00016400560224089637, "loss": 0.4393, "step": 29959 }, { "epoch": 16.737430167597765, "grad_norm": 0.3970141112804413, "learning_rate": 0.00016397759103641457, "loss": 0.421, "step": 29960 }, { "epoch": 16.73798882681564, "grad_norm": 0.5261026620864868, "learning_rate": 0.00016394957983193278, "loss": 0.3715, "step": 29961 }, { "epoch": 16.738547486033518, "grad_norm": 0.7265753746032715, "learning_rate": 0.00016392156862745098, "loss": 0.4421, "step": 29962 }, { "epoch": 16.739106145251398, "grad_norm": 1.650052785873413, "learning_rate": 0.0001638935574229692, "loss": 0.5266, "step": 29963 }, { "epoch": 16.739664804469275, "grad_norm": 0.5999802350997925, "learning_rate": 0.0001638655462184874, "loss": 0.306, "step": 29964 }, { "epoch": 16.74022346368715, "grad_norm": 1.1444833278656006, "learning_rate": 0.0001638375350140056, "loss": 0.2927, "step": 29965 }, { "epoch": 16.740782122905028, "grad_norm": 0.4256893992424011, "learning_rate": 0.00016380952380952384, "loss": 0.4032, "step": 29966 }, { "epoch": 16.741340782122904, "grad_norm": 1.132124662399292, "learning_rate": 0.00016378151260504201, "loss": 0.3511, "step": 29967 }, { "epoch": 16.74189944134078, "grad_norm": 0.3812745213508606, "learning_rate": 0.00016375350140056022, "loss": 0.3724, "step": 29968 }, { "epoch": 16.742458100558657, "grad_norm": 0.4091469347476959, "learning_rate": 0.00016372549019607843, "loss": 0.4531, "step": 29969 }, { "epoch": 16.743016759776538, "grad_norm": 0.5499061942100525, "learning_rate": 0.00016369747899159666, "loss": 0.3622, "step": 29970 }, { "epoch": 16.743575418994414, "grad_norm": 0.35281631350517273, "learning_rate": 0.00016366946778711487, "loss": 0.4009, "step": 29971 }, { "epoch": 16.74413407821229, "grad_norm": 0.4959126114845276, "learning_rate": 0.00016364145658263304, "loss": 0.4931, "step": 29972 }, { "epoch": 16.744692737430167, "grad_norm": 0.37805166840553284, "learning_rate": 0.00016361344537815125, "loss": 0.4348, "step": 29973 }, { "epoch": 16.745251396648044, "grad_norm": 0.7124146819114685, "learning_rate": 0.00016358543417366948, "loss": 0.392, "step": 29974 }, { "epoch": 16.74581005586592, "grad_norm": 0.5156207084655762, "learning_rate": 0.0001635574229691877, "loss": 0.3511, "step": 29975 }, { "epoch": 16.7463687150838, "grad_norm": 0.4222303330898285, "learning_rate": 0.0001635294117647059, "loss": 0.3414, "step": 29976 }, { "epoch": 16.746927374301677, "grad_norm": 0.97475266456604, "learning_rate": 0.00016350140056022407, "loss": 0.3733, "step": 29977 }, { "epoch": 16.747486033519554, "grad_norm": 0.4579882025718689, "learning_rate": 0.0001634733893557423, "loss": 0.3679, "step": 29978 }, { "epoch": 16.74804469273743, "grad_norm": 0.4228186309337616, "learning_rate": 0.0001634453781512605, "loss": 0.4364, "step": 29979 }, { "epoch": 16.748603351955307, "grad_norm": 0.4264308512210846, "learning_rate": 0.00016341736694677872, "loss": 0.5012, "step": 29980 }, { "epoch": 16.749162011173183, "grad_norm": 0.40354928374290466, "learning_rate": 0.00016338935574229693, "loss": 0.3544, "step": 29981 }, { "epoch": 16.74972067039106, "grad_norm": 0.42568692564964294, "learning_rate": 0.00016336134453781513, "loss": 0.3594, "step": 29982 }, { "epoch": 16.75027932960894, "grad_norm": 0.5387842059135437, "learning_rate": 0.00016333333333333334, "loss": 0.5039, "step": 29983 }, { "epoch": 16.750837988826817, "grad_norm": 0.5158648490905762, "learning_rate": 0.00016330532212885154, "loss": 0.4734, "step": 29984 }, { "epoch": 16.751396648044693, "grad_norm": 0.5180208086967468, "learning_rate": 0.00016327731092436975, "loss": 0.2838, "step": 29985 }, { "epoch": 16.75195530726257, "grad_norm": 0.7454515695571899, "learning_rate": 0.00016324929971988796, "loss": 0.4632, "step": 29986 }, { "epoch": 16.752513966480446, "grad_norm": 0.5076193809509277, "learning_rate": 0.00016322128851540616, "loss": 0.5489, "step": 29987 }, { "epoch": 16.753072625698323, "grad_norm": 0.4938351511955261, "learning_rate": 0.00016319327731092437, "loss": 0.3702, "step": 29988 }, { "epoch": 16.7536312849162, "grad_norm": 0.9141116142272949, "learning_rate": 0.00016316526610644257, "loss": 0.4474, "step": 29989 }, { "epoch": 16.75418994413408, "grad_norm": 0.48057761788368225, "learning_rate": 0.0001631372549019608, "loss": 0.3621, "step": 29990 }, { "epoch": 16.754748603351956, "grad_norm": 0.4921472370624542, "learning_rate": 0.00016310924369747899, "loss": 0.2904, "step": 29991 }, { "epoch": 16.755307262569833, "grad_norm": 1.0851823091506958, "learning_rate": 0.0001630812324929972, "loss": 0.4379, "step": 29992 }, { "epoch": 16.75586592178771, "grad_norm": 0.5022264719009399, "learning_rate": 0.0001630532212885154, "loss": 0.4459, "step": 29993 }, { "epoch": 16.756424581005586, "grad_norm": 0.42536184191703796, "learning_rate": 0.00016302521008403363, "loss": 0.4271, "step": 29994 }, { "epoch": 16.756983240223462, "grad_norm": 0.41575920581817627, "learning_rate": 0.00016299719887955184, "loss": 0.5693, "step": 29995 }, { "epoch": 16.757541899441343, "grad_norm": 0.5380123853683472, "learning_rate": 0.00016296918767507002, "loss": 0.3829, "step": 29996 }, { "epoch": 16.75810055865922, "grad_norm": 0.4218633770942688, "learning_rate": 0.00016294117647058822, "loss": 0.3902, "step": 29997 }, { "epoch": 16.758659217877096, "grad_norm": 0.3621273934841156, "learning_rate": 0.00016291316526610645, "loss": 0.3344, "step": 29998 }, { "epoch": 16.759217877094972, "grad_norm": 0.37574502825737, "learning_rate": 0.00016288515406162466, "loss": 0.4674, "step": 29999 }, { "epoch": 16.75977653631285, "grad_norm": 0.5005525350570679, "learning_rate": 0.00016285714285714287, "loss": 0.4329, "step": 30000 }, { "epoch": 16.75977653631285, "eval_cer": 0.08611283864139745, "eval_loss": 0.32325825095176697, "eval_runtime": 55.6706, "eval_samples_per_second": 81.515, "eval_steps_per_second": 5.101, "eval_wer": 0.34105192592385686, "step": 30000 }, { "epoch": 16.760335195530725, "grad_norm": 0.4052548408508301, "learning_rate": 0.00016282913165266104, "loss": 0.363, "step": 30001 }, { "epoch": 16.760893854748602, "grad_norm": 0.49155187606811523, "learning_rate": 0.00016280112044817928, "loss": 0.4062, "step": 30002 }, { "epoch": 16.761452513966482, "grad_norm": 0.39574292302131653, "learning_rate": 0.00016277310924369748, "loss": 0.3745, "step": 30003 }, { "epoch": 16.76201117318436, "grad_norm": 3.2734153270721436, "learning_rate": 0.0001627450980392157, "loss": 0.429, "step": 30004 }, { "epoch": 16.762569832402235, "grad_norm": 0.6843207478523254, "learning_rate": 0.0001627170868347339, "loss": 0.4213, "step": 30005 }, { "epoch": 16.76312849162011, "grad_norm": 0.6496402621269226, "learning_rate": 0.0001626890756302521, "loss": 0.4037, "step": 30006 }, { "epoch": 16.76368715083799, "grad_norm": 0.38369643688201904, "learning_rate": 0.0001626610644257703, "loss": 0.3781, "step": 30007 }, { "epoch": 16.764245810055865, "grad_norm": 0.789936900138855, "learning_rate": 0.00016263305322128851, "loss": 0.3429, "step": 30008 }, { "epoch": 16.76480446927374, "grad_norm": 0.3861772418022156, "learning_rate": 0.00016260504201680672, "loss": 0.3419, "step": 30009 }, { "epoch": 16.76536312849162, "grad_norm": 0.38788917660713196, "learning_rate": 0.00016257703081232495, "loss": 0.4038, "step": 30010 }, { "epoch": 16.765921787709498, "grad_norm": 0.4419642984867096, "learning_rate": 0.00016254901960784313, "loss": 0.2792, "step": 30011 }, { "epoch": 16.766480446927375, "grad_norm": 0.4525263011455536, "learning_rate": 0.00016252100840336134, "loss": 0.3405, "step": 30012 }, { "epoch": 16.76703910614525, "grad_norm": 4.382625579833984, "learning_rate": 0.00016249299719887954, "loss": 0.4036, "step": 30013 }, { "epoch": 16.767597765363128, "grad_norm": 0.444553405046463, "learning_rate": 0.00016246498599439778, "loss": 0.3746, "step": 30014 }, { "epoch": 16.768156424581004, "grad_norm": 1.0176643133163452, "learning_rate": 0.00016243697478991598, "loss": 0.4712, "step": 30015 }, { "epoch": 16.76871508379888, "grad_norm": 0.3747853934764862, "learning_rate": 0.00016240896358543416, "loss": 0.342, "step": 30016 }, { "epoch": 16.76927374301676, "grad_norm": 0.48713281750679016, "learning_rate": 0.00016238095238095237, "loss": 0.462, "step": 30017 }, { "epoch": 16.769832402234638, "grad_norm": 0.5657044053077698, "learning_rate": 0.0001623529411764706, "loss": 0.4339, "step": 30018 }, { "epoch": 16.770391061452514, "grad_norm": 0.40071308612823486, "learning_rate": 0.0001623249299719888, "loss": 0.3091, "step": 30019 }, { "epoch": 16.77094972067039, "grad_norm": 0.44520989060401917, "learning_rate": 0.000162296918767507, "loss": 0.3461, "step": 30020 }, { "epoch": 16.771508379888267, "grad_norm": 1.4213981628417969, "learning_rate": 0.0001622689075630252, "loss": 0.4164, "step": 30021 }, { "epoch": 16.772067039106144, "grad_norm": 0.5215101838111877, "learning_rate": 0.00016224089635854343, "loss": 0.4527, "step": 30022 }, { "epoch": 16.772625698324024, "grad_norm": 0.4174841046333313, "learning_rate": 0.00016221288515406163, "loss": 0.4103, "step": 30023 }, { "epoch": 16.7731843575419, "grad_norm": 0.47039493918418884, "learning_rate": 0.00016218487394957984, "loss": 0.3948, "step": 30024 }, { "epoch": 16.773743016759777, "grad_norm": 0.5664065480232239, "learning_rate": 0.00016215686274509804, "loss": 0.3949, "step": 30025 }, { "epoch": 16.774301675977654, "grad_norm": 0.7055777311325073, "learning_rate": 0.00016212885154061625, "loss": 0.3805, "step": 30026 }, { "epoch": 16.77486033519553, "grad_norm": 0.4658617377281189, "learning_rate": 0.00016210084033613446, "loss": 0.3533, "step": 30027 }, { "epoch": 16.775418994413407, "grad_norm": 1.507992148399353, "learning_rate": 0.00016207282913165266, "loss": 0.3788, "step": 30028 }, { "epoch": 16.775977653631283, "grad_norm": 0.31773972511291504, "learning_rate": 0.00016204481792717087, "loss": 0.374, "step": 30029 }, { "epoch": 16.776536312849164, "grad_norm": 0.4965648651123047, "learning_rate": 0.0001620168067226891, "loss": 0.5032, "step": 30030 }, { "epoch": 16.77709497206704, "grad_norm": 0.45034071803092957, "learning_rate": 0.00016198879551820728, "loss": 0.4356, "step": 30031 }, { "epoch": 16.777653631284917, "grad_norm": 0.5066399574279785, "learning_rate": 0.00016196078431372549, "loss": 0.4806, "step": 30032 }, { "epoch": 16.778212290502793, "grad_norm": 0.4164505898952484, "learning_rate": 0.0001619327731092437, "loss": 0.3765, "step": 30033 }, { "epoch": 16.77877094972067, "grad_norm": 1.2076771259307861, "learning_rate": 0.00016190476190476192, "loss": 0.3275, "step": 30034 }, { "epoch": 16.779329608938546, "grad_norm": 0.3843424618244171, "learning_rate": 0.00016187675070028013, "loss": 0.3921, "step": 30035 }, { "epoch": 16.779888268156423, "grad_norm": 0.5174873471260071, "learning_rate": 0.0001618487394957983, "loss": 0.4649, "step": 30036 }, { "epoch": 16.780446927374303, "grad_norm": 0.5740247368812561, "learning_rate": 0.00016182072829131652, "loss": 0.4952, "step": 30037 }, { "epoch": 16.78100558659218, "grad_norm": 0.6851624250411987, "learning_rate": 0.00016179271708683475, "loss": 0.4165, "step": 30038 }, { "epoch": 16.781564245810056, "grad_norm": 0.4838239252567291, "learning_rate": 0.00016176470588235295, "loss": 0.5026, "step": 30039 }, { "epoch": 16.782122905027933, "grad_norm": 0.5639671683311462, "learning_rate": 0.00016173669467787116, "loss": 0.5074, "step": 30040 }, { "epoch": 16.78268156424581, "grad_norm": 0.5394155979156494, "learning_rate": 0.00016170868347338934, "loss": 0.3443, "step": 30041 }, { "epoch": 16.783240223463686, "grad_norm": 0.4927348494529724, "learning_rate": 0.00016168067226890757, "loss": 0.4462, "step": 30042 }, { "epoch": 16.783798882681566, "grad_norm": 0.5866169333457947, "learning_rate": 0.00016165266106442578, "loss": 0.3407, "step": 30043 }, { "epoch": 16.784357541899443, "grad_norm": 0.6173146963119507, "learning_rate": 0.00016162464985994398, "loss": 0.2543, "step": 30044 }, { "epoch": 16.78491620111732, "grad_norm": 1.2588061094284058, "learning_rate": 0.00016159663865546216, "loss": 0.5018, "step": 30045 }, { "epoch": 16.785474860335196, "grad_norm": 0.5672191381454468, "learning_rate": 0.0001615686274509804, "loss": 0.4313, "step": 30046 }, { "epoch": 16.786033519553072, "grad_norm": 0.4832766354084015, "learning_rate": 0.0001615406162464986, "loss": 0.2464, "step": 30047 }, { "epoch": 16.78659217877095, "grad_norm": 1.892886757850647, "learning_rate": 0.0001615126050420168, "loss": 0.283, "step": 30048 }, { "epoch": 16.787150837988825, "grad_norm": 0.6519088745117188, "learning_rate": 0.00016148459383753501, "loss": 0.4246, "step": 30049 }, { "epoch": 16.787709497206706, "grad_norm": 0.44339805841445923, "learning_rate": 0.00016145658263305322, "loss": 0.3872, "step": 30050 }, { "epoch": 16.788268156424582, "grad_norm": 0.6117792129516602, "learning_rate": 0.00016142857142857143, "loss": 0.3754, "step": 30051 }, { "epoch": 16.78882681564246, "grad_norm": 0.39605194330215454, "learning_rate": 0.00016140056022408963, "loss": 0.4391, "step": 30052 }, { "epoch": 16.789385474860335, "grad_norm": 0.5022445917129517, "learning_rate": 0.00016137254901960784, "loss": 0.3587, "step": 30053 }, { "epoch": 16.789944134078212, "grad_norm": 0.5878540873527527, "learning_rate": 0.00016134453781512607, "loss": 0.5111, "step": 30054 }, { "epoch": 16.79050279329609, "grad_norm": 0.33780530095100403, "learning_rate": 0.00016131652661064425, "loss": 0.366, "step": 30055 }, { "epoch": 16.791061452513965, "grad_norm": 2.6099483966827393, "learning_rate": 0.00016128851540616246, "loss": 0.3523, "step": 30056 }, { "epoch": 16.791620111731845, "grad_norm": 0.5597836971282959, "learning_rate": 0.00016126050420168066, "loss": 0.511, "step": 30057 }, { "epoch": 16.79217877094972, "grad_norm": 0.6871532201766968, "learning_rate": 0.0001612324929971989, "loss": 0.4292, "step": 30058 }, { "epoch": 16.7927374301676, "grad_norm": 0.4103935658931732, "learning_rate": 0.0001612044817927171, "loss": 0.4976, "step": 30059 }, { "epoch": 16.793296089385475, "grad_norm": 0.6675010323524475, "learning_rate": 0.00016117647058823528, "loss": 0.4376, "step": 30060 }, { "epoch": 16.79385474860335, "grad_norm": 0.41142186522483826, "learning_rate": 0.00016114845938375349, "loss": 0.3611, "step": 30061 }, { "epoch": 16.794413407821228, "grad_norm": 0.511282742023468, "learning_rate": 0.00016112044817927172, "loss": 0.3495, "step": 30062 }, { "epoch": 16.794972067039105, "grad_norm": 0.6248022317886353, "learning_rate": 0.00016109243697478993, "loss": 0.415, "step": 30063 }, { "epoch": 16.795530726256985, "grad_norm": 0.4924640953540802, "learning_rate": 0.00016106442577030813, "loss": 0.4743, "step": 30064 }, { "epoch": 16.79608938547486, "grad_norm": 0.603979229927063, "learning_rate": 0.0001610364145658263, "loss": 0.3523, "step": 30065 }, { "epoch": 16.796648044692738, "grad_norm": 0.5137979984283447, "learning_rate": 0.00016100840336134454, "loss": 0.5143, "step": 30066 }, { "epoch": 16.797206703910614, "grad_norm": 0.5229576230049133, "learning_rate": 0.00016098039215686275, "loss": 0.452, "step": 30067 }, { "epoch": 16.79776536312849, "grad_norm": 0.49414879083633423, "learning_rate": 0.00016095238095238096, "loss": 0.4395, "step": 30068 }, { "epoch": 16.798324022346367, "grad_norm": 0.4139558970928192, "learning_rate": 0.00016092436974789916, "loss": 0.365, "step": 30069 }, { "epoch": 16.798882681564244, "grad_norm": 0.41835296154022217, "learning_rate": 0.00016089635854341737, "loss": 0.3999, "step": 30070 }, { "epoch": 16.799441340782124, "grad_norm": 0.4176795482635498, "learning_rate": 0.00016086834733893557, "loss": 0.4125, "step": 30071 }, { "epoch": 16.8, "grad_norm": 0.44688260555267334, "learning_rate": 0.00016084033613445378, "loss": 0.3575, "step": 30072 }, { "epoch": 16.800558659217877, "grad_norm": 0.4810837507247925, "learning_rate": 0.00016081232492997199, "loss": 0.451, "step": 30073 }, { "epoch": 16.801117318435754, "grad_norm": 0.5799554586410522, "learning_rate": 0.00016078431372549022, "loss": 0.4201, "step": 30074 }, { "epoch": 16.80167597765363, "grad_norm": 0.35912781953811646, "learning_rate": 0.0001607563025210084, "loss": 0.4278, "step": 30075 }, { "epoch": 16.802234636871507, "grad_norm": 0.3864504098892212, "learning_rate": 0.0001607282913165266, "loss": 0.4273, "step": 30076 }, { "epoch": 16.802793296089387, "grad_norm": 0.5275647640228271, "learning_rate": 0.0001607002801120448, "loss": 0.3651, "step": 30077 }, { "epoch": 16.803351955307264, "grad_norm": 0.41697949171066284, "learning_rate": 0.00016067226890756304, "loss": 0.4145, "step": 30078 }, { "epoch": 16.80391061452514, "grad_norm": 0.433200478553772, "learning_rate": 0.00016064425770308125, "loss": 0.3225, "step": 30079 }, { "epoch": 16.804469273743017, "grad_norm": 0.3597385883331299, "learning_rate": 0.00016061624649859943, "loss": 0.3647, "step": 30080 }, { "epoch": 16.805027932960893, "grad_norm": 0.6093882322311401, "learning_rate": 0.00016058823529411763, "loss": 0.6441, "step": 30081 }, { "epoch": 16.80558659217877, "grad_norm": 0.9391186833381653, "learning_rate": 0.00016056022408963587, "loss": 0.3832, "step": 30082 }, { "epoch": 16.806145251396647, "grad_norm": 0.3506004512310028, "learning_rate": 0.00016053221288515407, "loss": 0.4148, "step": 30083 }, { "epoch": 16.806703910614527, "grad_norm": 0.36887064576148987, "learning_rate": 0.00016050420168067228, "loss": 0.3311, "step": 30084 }, { "epoch": 16.807262569832403, "grad_norm": 6.3074727058410645, "learning_rate": 0.00016047619047619046, "loss": 0.4321, "step": 30085 }, { "epoch": 16.80782122905028, "grad_norm": 0.5146413445472717, "learning_rate": 0.0001604481792717087, "loss": 0.5419, "step": 30086 }, { "epoch": 16.808379888268156, "grad_norm": 0.41056182980537415, "learning_rate": 0.0001604201680672269, "loss": 0.3547, "step": 30087 }, { "epoch": 16.808938547486033, "grad_norm": 1.769498586654663, "learning_rate": 0.0001603921568627451, "loss": 0.4257, "step": 30088 }, { "epoch": 16.80949720670391, "grad_norm": 0.35737600922584534, "learning_rate": 0.00016036414565826334, "loss": 0.3283, "step": 30089 }, { "epoch": 16.810055865921786, "grad_norm": 3.807839870452881, "learning_rate": 0.00016033613445378151, "loss": 0.4933, "step": 30090 }, { "epoch": 16.810614525139666, "grad_norm": 0.40298232436180115, "learning_rate": 0.00016030812324929972, "loss": 0.3827, "step": 30091 }, { "epoch": 16.811173184357543, "grad_norm": 0.34603506326675415, "learning_rate": 0.00016028011204481793, "loss": 0.37, "step": 30092 }, { "epoch": 16.81173184357542, "grad_norm": 0.4428398609161377, "learning_rate": 0.00016025210084033616, "loss": 0.4672, "step": 30093 }, { "epoch": 16.812290502793296, "grad_norm": 0.3467986285686493, "learning_rate": 0.00016022408963585437, "loss": 0.3648, "step": 30094 }, { "epoch": 16.812849162011172, "grad_norm": 0.38408535718917847, "learning_rate": 0.00016019607843137254, "loss": 0.392, "step": 30095 }, { "epoch": 16.81340782122905, "grad_norm": 0.40280744433403015, "learning_rate": 0.00016016806722689075, "loss": 0.3634, "step": 30096 }, { "epoch": 16.81396648044693, "grad_norm": 0.5805290937423706, "learning_rate": 0.00016014005602240898, "loss": 0.4533, "step": 30097 }, { "epoch": 16.814525139664806, "grad_norm": 0.3980065882205963, "learning_rate": 0.0001601120448179272, "loss": 0.3685, "step": 30098 }, { "epoch": 16.815083798882682, "grad_norm": 0.40838387608528137, "learning_rate": 0.00016008403361344537, "loss": 0.4539, "step": 30099 }, { "epoch": 16.81564245810056, "grad_norm": 0.4192756712436676, "learning_rate": 0.00016005602240896357, "loss": 0.3942, "step": 30100 }, { "epoch": 16.816201117318435, "grad_norm": 0.4256346523761749, "learning_rate": 0.0001600280112044818, "loss": 0.3048, "step": 30101 }, { "epoch": 16.816759776536312, "grad_norm": 0.4652436077594757, "learning_rate": 0.00016, "loss": 0.3463, "step": 30102 }, { "epoch": 16.81731843575419, "grad_norm": 0.4597277343273163, "learning_rate": 0.00015997198879551822, "loss": 0.3428, "step": 30103 }, { "epoch": 16.81787709497207, "grad_norm": 0.3622557520866394, "learning_rate": 0.0001599439775910364, "loss": 0.3968, "step": 30104 }, { "epoch": 16.818435754189945, "grad_norm": 0.4852827489376068, "learning_rate": 0.00015991596638655463, "loss": 0.4643, "step": 30105 }, { "epoch": 16.81899441340782, "grad_norm": 0.5440395474433899, "learning_rate": 0.00015988795518207284, "loss": 0.534, "step": 30106 }, { "epoch": 16.8195530726257, "grad_norm": 0.3743683695793152, "learning_rate": 0.00015985994397759104, "loss": 0.3925, "step": 30107 }, { "epoch": 16.820111731843575, "grad_norm": 4.565816879272461, "learning_rate": 0.00015983193277310925, "loss": 0.2968, "step": 30108 }, { "epoch": 16.82067039106145, "grad_norm": 0.4137653410434723, "learning_rate": 0.00015980392156862746, "loss": 0.373, "step": 30109 }, { "epoch": 16.821229050279328, "grad_norm": 0.3782500624656677, "learning_rate": 0.00015977591036414566, "loss": 0.4872, "step": 30110 }, { "epoch": 16.821787709497208, "grad_norm": 0.374213308095932, "learning_rate": 0.00015974789915966387, "loss": 0.3315, "step": 30111 }, { "epoch": 16.822346368715085, "grad_norm": 0.7055694460868835, "learning_rate": 0.00015971988795518207, "loss": 0.4757, "step": 30112 }, { "epoch": 16.82290502793296, "grad_norm": 0.478340744972229, "learning_rate": 0.0001596918767507003, "loss": 0.4939, "step": 30113 }, { "epoch": 16.823463687150838, "grad_norm": 0.3142969310283661, "learning_rate": 0.00015966386554621849, "loss": 0.259, "step": 30114 }, { "epoch": 16.824022346368714, "grad_norm": 0.5844735503196716, "learning_rate": 0.0001596358543417367, "loss": 0.4245, "step": 30115 }, { "epoch": 16.82458100558659, "grad_norm": 0.3984036147594452, "learning_rate": 0.0001596078431372549, "loss": 0.3528, "step": 30116 }, { "epoch": 16.825139664804468, "grad_norm": 4.019460201263428, "learning_rate": 0.00015957983193277313, "loss": 0.4194, "step": 30117 }, { "epoch": 16.825698324022348, "grad_norm": 0.42472344636917114, "learning_rate": 0.00015955182072829134, "loss": 0.3925, "step": 30118 }, { "epoch": 16.826256983240224, "grad_norm": 0.6465110182762146, "learning_rate": 0.00015952380952380951, "loss": 0.4051, "step": 30119 }, { "epoch": 16.8268156424581, "grad_norm": 0.5962534546852112, "learning_rate": 0.00015949579831932772, "loss": 0.4239, "step": 30120 }, { "epoch": 16.827374301675977, "grad_norm": 0.6170143485069275, "learning_rate": 0.00015946778711484595, "loss": 0.4946, "step": 30121 }, { "epoch": 16.827932960893854, "grad_norm": 0.5440694689750671, "learning_rate": 0.00015943977591036416, "loss": 0.4483, "step": 30122 }, { "epoch": 16.82849162011173, "grad_norm": 0.4368651211261749, "learning_rate": 0.00015941176470588237, "loss": 0.3626, "step": 30123 }, { "epoch": 16.82905027932961, "grad_norm": 0.7729046940803528, "learning_rate": 0.00015938375350140054, "loss": 0.334, "step": 30124 }, { "epoch": 16.829608938547487, "grad_norm": 0.5720878839492798, "learning_rate": 0.00015935574229691878, "loss": 0.377, "step": 30125 }, { "epoch": 16.830167597765364, "grad_norm": 1.4981003999710083, "learning_rate": 0.00015932773109243698, "loss": 0.3569, "step": 30126 }, { "epoch": 16.83072625698324, "grad_norm": 0.4393386244773865, "learning_rate": 0.0001592997198879552, "loss": 0.398, "step": 30127 }, { "epoch": 16.831284916201117, "grad_norm": 0.40800952911376953, "learning_rate": 0.0001592717086834734, "loss": 0.4818, "step": 30128 }, { "epoch": 16.831843575418993, "grad_norm": 1.3066201210021973, "learning_rate": 0.0001592436974789916, "loss": 0.3661, "step": 30129 }, { "epoch": 16.83240223463687, "grad_norm": 0.43286192417144775, "learning_rate": 0.0001592156862745098, "loss": 0.3214, "step": 30130 }, { "epoch": 16.83296089385475, "grad_norm": 0.4904375970363617, "learning_rate": 0.00015918767507002801, "loss": 0.3733, "step": 30131 }, { "epoch": 16.833519553072627, "grad_norm": 0.44405966997146606, "learning_rate": 0.00015915966386554622, "loss": 0.3916, "step": 30132 }, { "epoch": 16.834078212290503, "grad_norm": 0.5740193128585815, "learning_rate": 0.00015913165266106445, "loss": 0.4114, "step": 30133 }, { "epoch": 16.83463687150838, "grad_norm": 0.4858759939670563, "learning_rate": 0.00015910364145658263, "loss": 0.365, "step": 30134 }, { "epoch": 16.835195530726256, "grad_norm": 0.5334282517433167, "learning_rate": 0.00015907563025210084, "loss": 0.4353, "step": 30135 }, { "epoch": 16.835754189944133, "grad_norm": 0.5499429106712341, "learning_rate": 0.00015904761904761904, "loss": 0.3757, "step": 30136 }, { "epoch": 16.83631284916201, "grad_norm": 1.394912600517273, "learning_rate": 0.00015901960784313728, "loss": 0.634, "step": 30137 }, { "epoch": 16.83687150837989, "grad_norm": 0.4118703603744507, "learning_rate": 0.00015899159663865548, "loss": 0.3574, "step": 30138 }, { "epoch": 16.837430167597766, "grad_norm": 0.6004096269607544, "learning_rate": 0.00015896358543417366, "loss": 0.4295, "step": 30139 }, { "epoch": 16.837988826815643, "grad_norm": 0.40506094694137573, "learning_rate": 0.00015893557422969187, "loss": 0.3435, "step": 30140 }, { "epoch": 16.83854748603352, "grad_norm": 0.3610875904560089, "learning_rate": 0.0001589075630252101, "loss": 0.4478, "step": 30141 }, { "epoch": 16.839106145251396, "grad_norm": 0.39817145466804504, "learning_rate": 0.0001588795518207283, "loss": 0.3696, "step": 30142 }, { "epoch": 16.839664804469272, "grad_norm": 0.5740687847137451, "learning_rate": 0.0001588515406162465, "loss": 0.39, "step": 30143 }, { "epoch": 16.840223463687153, "grad_norm": 0.4253646731376648, "learning_rate": 0.0001588235294117647, "loss": 0.313, "step": 30144 }, { "epoch": 16.84078212290503, "grad_norm": 1.1679350137710571, "learning_rate": 0.00015879551820728293, "loss": 0.572, "step": 30145 }, { "epoch": 16.841340782122906, "grad_norm": 0.3970259130001068, "learning_rate": 0.00015876750700280113, "loss": 0.3984, "step": 30146 }, { "epoch": 16.841899441340782, "grad_norm": 0.5139328837394714, "learning_rate": 0.00015873949579831934, "loss": 0.4816, "step": 30147 }, { "epoch": 16.84245810055866, "grad_norm": 0.7918537259101868, "learning_rate": 0.00015871148459383754, "loss": 0.5831, "step": 30148 }, { "epoch": 16.843016759776535, "grad_norm": 0.4558841288089752, "learning_rate": 0.00015868347338935575, "loss": 0.3435, "step": 30149 }, { "epoch": 16.843575418994412, "grad_norm": 0.3359465003013611, "learning_rate": 0.00015865546218487396, "loss": 0.3466, "step": 30150 }, { "epoch": 16.844134078212292, "grad_norm": 0.38350799679756165, "learning_rate": 0.00015862745098039216, "loss": 0.4271, "step": 30151 }, { "epoch": 16.84469273743017, "grad_norm": 0.3991387188434601, "learning_rate": 0.00015859943977591037, "loss": 0.39, "step": 30152 }, { "epoch": 16.845251396648045, "grad_norm": 0.3865835964679718, "learning_rate": 0.00015857142857142857, "loss": 0.4485, "step": 30153 }, { "epoch": 16.845810055865922, "grad_norm": 0.4502410888671875, "learning_rate": 0.00015854341736694678, "loss": 0.4842, "step": 30154 }, { "epoch": 16.8463687150838, "grad_norm": 0.4129258990287781, "learning_rate": 0.00015851540616246499, "loss": 0.4452, "step": 30155 }, { "epoch": 16.846927374301675, "grad_norm": 1.3322232961654663, "learning_rate": 0.0001584873949579832, "loss": 0.3218, "step": 30156 }, { "epoch": 16.84748603351955, "grad_norm": 0.5254073739051819, "learning_rate": 0.00015845938375350142, "loss": 0.4675, "step": 30157 }, { "epoch": 16.84804469273743, "grad_norm": 0.580143392086029, "learning_rate": 0.0001584313725490196, "loss": 0.4845, "step": 30158 }, { "epoch": 16.84860335195531, "grad_norm": 0.3895910978317261, "learning_rate": 0.0001584033613445378, "loss": 0.3993, "step": 30159 }, { "epoch": 16.849162011173185, "grad_norm": 0.36416247487068176, "learning_rate": 0.00015837535014005601, "loss": 0.3807, "step": 30160 }, { "epoch": 16.84972067039106, "grad_norm": 0.5567619204521179, "learning_rate": 0.00015834733893557425, "loss": 0.4068, "step": 30161 }, { "epoch": 16.850279329608938, "grad_norm": 0.444638729095459, "learning_rate": 0.00015831932773109245, "loss": 0.4625, "step": 30162 }, { "epoch": 16.850837988826814, "grad_norm": 1.1667346954345703, "learning_rate": 0.00015829131652661063, "loss": 0.4223, "step": 30163 }, { "epoch": 16.85139664804469, "grad_norm": 0.5068751573562622, "learning_rate": 0.00015826330532212884, "loss": 0.5173, "step": 30164 }, { "epoch": 16.85195530726257, "grad_norm": 0.44086870551109314, "learning_rate": 0.00015823529411764707, "loss": 0.4567, "step": 30165 }, { "epoch": 16.852513966480448, "grad_norm": 0.6275837421417236, "learning_rate": 0.00015820728291316528, "loss": 0.5035, "step": 30166 }, { "epoch": 16.853072625698324, "grad_norm": 0.5532208681106567, "learning_rate": 0.00015817927170868348, "loss": 0.4662, "step": 30167 }, { "epoch": 16.8536312849162, "grad_norm": 13.293482780456543, "learning_rate": 0.00015815126050420166, "loss": 0.331, "step": 30168 }, { "epoch": 16.854189944134077, "grad_norm": 1.044973373413086, "learning_rate": 0.0001581232492997199, "loss": 0.3656, "step": 30169 }, { "epoch": 16.854748603351954, "grad_norm": 0.456415057182312, "learning_rate": 0.0001580952380952381, "loss": 0.3218, "step": 30170 }, { "epoch": 16.85530726256983, "grad_norm": 0.5060498714447021, "learning_rate": 0.0001580672268907563, "loss": 0.5017, "step": 30171 }, { "epoch": 16.85586592178771, "grad_norm": 0.6787229776382446, "learning_rate": 0.00015803921568627451, "loss": 0.5713, "step": 30172 }, { "epoch": 16.856424581005587, "grad_norm": 0.8607981204986572, "learning_rate": 0.00015801120448179272, "loss": 0.421, "step": 30173 }, { "epoch": 16.856983240223464, "grad_norm": 1.5900905132293701, "learning_rate": 0.00015798319327731093, "loss": 0.4379, "step": 30174 }, { "epoch": 16.85754189944134, "grad_norm": 0.7126103639602661, "learning_rate": 0.00015795518207282913, "loss": 0.3682, "step": 30175 }, { "epoch": 16.858100558659217, "grad_norm": 1.5982716083526611, "learning_rate": 0.00015792717086834734, "loss": 0.3406, "step": 30176 }, { "epoch": 16.858659217877094, "grad_norm": 0.5052145719528198, "learning_rate": 0.00015789915966386557, "loss": 0.3449, "step": 30177 }, { "epoch": 16.859217877094974, "grad_norm": 0.41603681445121765, "learning_rate": 0.00015787114845938375, "loss": 0.4301, "step": 30178 }, { "epoch": 16.85977653631285, "grad_norm": 0.468876451253891, "learning_rate": 0.00015784313725490196, "loss": 0.4057, "step": 30179 }, { "epoch": 16.860335195530727, "grad_norm": 0.3810173571109772, "learning_rate": 0.00015781512605042016, "loss": 0.446, "step": 30180 }, { "epoch": 16.860893854748603, "grad_norm": 0.3950745165348053, "learning_rate": 0.0001577871148459384, "loss": 0.3828, "step": 30181 }, { "epoch": 16.86145251396648, "grad_norm": 0.4344445765018463, "learning_rate": 0.0001577591036414566, "loss": 0.4228, "step": 30182 }, { "epoch": 16.862011173184356, "grad_norm": 0.488749623298645, "learning_rate": 0.00015773109243697478, "loss": 0.395, "step": 30183 }, { "epoch": 16.862569832402233, "grad_norm": 1.1128497123718262, "learning_rate": 0.00015770308123249299, "loss": 0.5235, "step": 30184 }, { "epoch": 16.863128491620113, "grad_norm": 0.8008594512939453, "learning_rate": 0.00015767507002801122, "loss": 0.3943, "step": 30185 }, { "epoch": 16.86368715083799, "grad_norm": 0.4214664399623871, "learning_rate": 0.00015764705882352943, "loss": 0.4004, "step": 30186 }, { "epoch": 16.864245810055866, "grad_norm": 0.7226083874702454, "learning_rate": 0.00015761904761904763, "loss": 0.4682, "step": 30187 }, { "epoch": 16.864804469273743, "grad_norm": 0.7479549050331116, "learning_rate": 0.0001575910364145658, "loss": 0.4372, "step": 30188 }, { "epoch": 16.86536312849162, "grad_norm": 1.2650318145751953, "learning_rate": 0.00015756302521008404, "loss": 0.4072, "step": 30189 }, { "epoch": 16.865921787709496, "grad_norm": 1.8159083127975464, "learning_rate": 0.00015753501400560225, "loss": 0.4335, "step": 30190 }, { "epoch": 16.866480446927373, "grad_norm": 0.424147367477417, "learning_rate": 0.00015750700280112046, "loss": 0.4016, "step": 30191 }, { "epoch": 16.867039106145253, "grad_norm": 0.4480076730251312, "learning_rate": 0.00015747899159663866, "loss": 0.347, "step": 30192 }, { "epoch": 16.86759776536313, "grad_norm": 0.44959908723831177, "learning_rate": 0.00015745098039215687, "loss": 0.342, "step": 30193 }, { "epoch": 16.868156424581006, "grad_norm": 0.7538745403289795, "learning_rate": 0.00015742296918767507, "loss": 0.6072, "step": 30194 }, { "epoch": 16.868715083798882, "grad_norm": 0.34363195300102234, "learning_rate": 0.00015739495798319328, "loss": 0.3381, "step": 30195 }, { "epoch": 16.86927374301676, "grad_norm": 0.5263931155204773, "learning_rate": 0.00015736694677871149, "loss": 0.4159, "step": 30196 }, { "epoch": 16.869832402234636, "grad_norm": 0.6510556936264038, "learning_rate": 0.00015733893557422972, "loss": 0.4327, "step": 30197 }, { "epoch": 16.870391061452516, "grad_norm": 0.40254583954811096, "learning_rate": 0.0001573109243697479, "loss": 0.3398, "step": 30198 }, { "epoch": 16.870949720670392, "grad_norm": 0.492439329624176, "learning_rate": 0.0001572829131652661, "loss": 0.5842, "step": 30199 }, { "epoch": 16.87150837988827, "grad_norm": 0.5890607833862305, "learning_rate": 0.0001572549019607843, "loss": 0.4168, "step": 30200 }, { "epoch": 16.872067039106145, "grad_norm": 0.44668495655059814, "learning_rate": 0.00015722689075630254, "loss": 0.3928, "step": 30201 }, { "epoch": 16.872625698324022, "grad_norm": 0.5565999746322632, "learning_rate": 0.00015719887955182075, "loss": 0.4087, "step": 30202 }, { "epoch": 16.8731843575419, "grad_norm": 0.44597819447517395, "learning_rate": 0.00015717086834733893, "loss": 0.4349, "step": 30203 }, { "epoch": 16.873743016759775, "grad_norm": 0.35262104868888855, "learning_rate": 0.00015714285714285713, "loss": 0.3524, "step": 30204 }, { "epoch": 16.874301675977655, "grad_norm": 0.37330934405326843, "learning_rate": 0.00015711484593837537, "loss": 0.3336, "step": 30205 }, { "epoch": 16.87486033519553, "grad_norm": 0.4421864151954651, "learning_rate": 0.00015708683473389357, "loss": 0.3197, "step": 30206 }, { "epoch": 16.87541899441341, "grad_norm": 0.4005056321620941, "learning_rate": 0.00015705882352941178, "loss": 0.5008, "step": 30207 }, { "epoch": 16.875977653631285, "grad_norm": 0.6955085396766663, "learning_rate": 0.00015703081232492996, "loss": 0.4083, "step": 30208 }, { "epoch": 16.87653631284916, "grad_norm": 0.48278653621673584, "learning_rate": 0.0001570028011204482, "loss": 0.4086, "step": 30209 }, { "epoch": 16.877094972067038, "grad_norm": 0.38105377554893494, "learning_rate": 0.0001569747899159664, "loss": 0.3203, "step": 30210 }, { "epoch": 16.877653631284915, "grad_norm": 0.3725895881652832, "learning_rate": 0.0001569467787114846, "loss": 0.3453, "step": 30211 }, { "epoch": 16.878212290502795, "grad_norm": 0.7716445326805115, "learning_rate": 0.00015691876750700278, "loss": 0.4119, "step": 30212 }, { "epoch": 16.87877094972067, "grad_norm": 0.8801575899124146, "learning_rate": 0.00015689075630252101, "loss": 0.4677, "step": 30213 }, { "epoch": 16.879329608938548, "grad_norm": 0.3447099030017853, "learning_rate": 0.00015686274509803922, "loss": 0.3712, "step": 30214 }, { "epoch": 16.879888268156424, "grad_norm": 0.5271446108818054, "learning_rate": 0.00015683473389355743, "loss": 0.4254, "step": 30215 }, { "epoch": 16.8804469273743, "grad_norm": 0.4342610239982605, "learning_rate": 0.00015680672268907563, "loss": 0.4123, "step": 30216 }, { "epoch": 16.881005586592178, "grad_norm": 0.4275245666503906, "learning_rate": 0.00015677871148459384, "loss": 0.4504, "step": 30217 }, { "epoch": 16.881564245810054, "grad_norm": 5.342784881591797, "learning_rate": 0.00015675070028011204, "loss": 0.41, "step": 30218 }, { "epoch": 16.882122905027934, "grad_norm": 0.35894110798835754, "learning_rate": 0.00015672268907563025, "loss": 0.3602, "step": 30219 }, { "epoch": 16.88268156424581, "grad_norm": 0.5169207453727722, "learning_rate": 0.00015669467787114846, "loss": 0.5066, "step": 30220 }, { "epoch": 16.883240223463687, "grad_norm": 1.1114915609359741, "learning_rate": 0.0001566666666666667, "loss": 0.3083, "step": 30221 }, { "epoch": 16.883798882681564, "grad_norm": 0.4362267851829529, "learning_rate": 0.00015663865546218487, "loss": 0.3687, "step": 30222 }, { "epoch": 16.88435754189944, "grad_norm": 0.5804873704910278, "learning_rate": 0.00015661064425770307, "loss": 0.4614, "step": 30223 }, { "epoch": 16.884916201117317, "grad_norm": 0.45495694875717163, "learning_rate": 0.00015658263305322128, "loss": 0.4684, "step": 30224 }, { "epoch": 16.885474860335197, "grad_norm": 2.383920192718506, "learning_rate": 0.0001565546218487395, "loss": 0.712, "step": 30225 }, { "epoch": 16.886033519553074, "grad_norm": 0.9333776831626892, "learning_rate": 0.00015652661064425772, "loss": 0.3584, "step": 30226 }, { "epoch": 16.88659217877095, "grad_norm": 0.8368858695030212, "learning_rate": 0.0001564985994397759, "loss": 0.4805, "step": 30227 }, { "epoch": 16.887150837988827, "grad_norm": 0.38262975215911865, "learning_rate": 0.0001564705882352941, "loss": 0.3999, "step": 30228 }, { "epoch": 16.887709497206703, "grad_norm": 0.7130293846130371, "learning_rate": 0.00015644257703081234, "loss": 0.4329, "step": 30229 }, { "epoch": 16.88826815642458, "grad_norm": 0.3868766725063324, "learning_rate": 0.00015641456582633054, "loss": 0.3311, "step": 30230 }, { "epoch": 16.888826815642457, "grad_norm": 0.6025223731994629, "learning_rate": 0.00015638655462184875, "loss": 0.612, "step": 30231 }, { "epoch": 16.889385474860337, "grad_norm": 0.39621472358703613, "learning_rate": 0.00015635854341736693, "loss": 0.3461, "step": 30232 }, { "epoch": 16.889944134078213, "grad_norm": 0.5080248713493347, "learning_rate": 0.00015633053221288516, "loss": 0.4839, "step": 30233 }, { "epoch": 16.89050279329609, "grad_norm": 0.8841781616210938, "learning_rate": 0.00015630252100840337, "loss": 0.4909, "step": 30234 }, { "epoch": 16.891061452513966, "grad_norm": 0.48106616735458374, "learning_rate": 0.00015627450980392157, "loss": 0.4152, "step": 30235 }, { "epoch": 16.891620111731843, "grad_norm": 0.4235338568687439, "learning_rate": 0.00015624649859943978, "loss": 0.4636, "step": 30236 }, { "epoch": 16.89217877094972, "grad_norm": 0.4886437654495239, "learning_rate": 0.00015621848739495798, "loss": 0.3888, "step": 30237 }, { "epoch": 16.892737430167596, "grad_norm": 0.6446411609649658, "learning_rate": 0.0001561904761904762, "loss": 0.413, "step": 30238 }, { "epoch": 16.893296089385476, "grad_norm": 0.4011625647544861, "learning_rate": 0.0001561624649859944, "loss": 0.4109, "step": 30239 }, { "epoch": 16.893854748603353, "grad_norm": 0.39117443561553955, "learning_rate": 0.0001561344537815126, "loss": 0.3616, "step": 30240 }, { "epoch": 16.89441340782123, "grad_norm": 0.38766980171203613, "learning_rate": 0.00015610644257703084, "loss": 0.3917, "step": 30241 }, { "epoch": 16.894972067039106, "grad_norm": 0.40482330322265625, "learning_rate": 0.00015607843137254901, "loss": 0.4667, "step": 30242 }, { "epoch": 16.895530726256982, "grad_norm": 0.5429258346557617, "learning_rate": 0.00015605042016806722, "loss": 0.3795, "step": 30243 }, { "epoch": 16.89608938547486, "grad_norm": 2.321565628051758, "learning_rate": 0.00015602240896358543, "loss": 0.3792, "step": 30244 }, { "epoch": 16.89664804469274, "grad_norm": 0.38894128799438477, "learning_rate": 0.00015599439775910366, "loss": 0.3649, "step": 30245 }, { "epoch": 16.897206703910616, "grad_norm": 0.5999763011932373, "learning_rate": 0.00015596638655462187, "loss": 0.3026, "step": 30246 }, { "epoch": 16.897765363128492, "grad_norm": 0.6594188213348389, "learning_rate": 0.00015593837535014004, "loss": 0.3825, "step": 30247 }, { "epoch": 16.89832402234637, "grad_norm": 0.48847028613090515, "learning_rate": 0.00015591036414565825, "loss": 0.4173, "step": 30248 }, { "epoch": 16.898882681564245, "grad_norm": 0.3831975758075714, "learning_rate": 0.00015588235294117648, "loss": 0.316, "step": 30249 }, { "epoch": 16.899441340782122, "grad_norm": 0.547111988067627, "learning_rate": 0.0001558543417366947, "loss": 0.4064, "step": 30250 }, { "epoch": 16.9, "grad_norm": 0.3626510798931122, "learning_rate": 0.0001558263305322129, "loss": 0.3189, "step": 30251 }, { "epoch": 16.90055865921788, "grad_norm": 0.40763407945632935, "learning_rate": 0.00015579831932773107, "loss": 0.4641, "step": 30252 }, { "epoch": 16.901117318435755, "grad_norm": 1.1521021127700806, "learning_rate": 0.0001557703081232493, "loss": 0.4325, "step": 30253 }, { "epoch": 16.901675977653632, "grad_norm": 0.4214644730091095, "learning_rate": 0.00015574229691876751, "loss": 0.3507, "step": 30254 }, { "epoch": 16.90223463687151, "grad_norm": 0.3495875895023346, "learning_rate": 0.00015571428571428572, "loss": 0.3517, "step": 30255 }, { "epoch": 16.902793296089385, "grad_norm": 0.31127431988716125, "learning_rate": 0.00015568627450980393, "loss": 0.2604, "step": 30256 }, { "epoch": 16.90335195530726, "grad_norm": 0.3663713335990906, "learning_rate": 0.00015565826330532213, "loss": 0.3759, "step": 30257 }, { "epoch": 16.903910614525138, "grad_norm": 0.7469764351844788, "learning_rate": 0.00015563025210084034, "loss": 0.6233, "step": 30258 }, { "epoch": 16.904469273743018, "grad_norm": 0.4186480939388275, "learning_rate": 0.00015560224089635854, "loss": 0.3789, "step": 30259 }, { "epoch": 16.905027932960895, "grad_norm": 0.47238972783088684, "learning_rate": 0.00015557422969187675, "loss": 0.429, "step": 30260 }, { "epoch": 16.90558659217877, "grad_norm": 0.6932469010353088, "learning_rate": 0.00015554621848739498, "loss": 0.4313, "step": 30261 }, { "epoch": 16.906145251396648, "grad_norm": 0.8271285891532898, "learning_rate": 0.00015551820728291316, "loss": 0.3734, "step": 30262 }, { "epoch": 16.906703910614524, "grad_norm": 0.5990018248558044, "learning_rate": 0.00015549019607843137, "loss": 0.3683, "step": 30263 }, { "epoch": 16.9072625698324, "grad_norm": 0.625999391078949, "learning_rate": 0.00015546218487394957, "loss": 0.4034, "step": 30264 }, { "epoch": 16.907821229050278, "grad_norm": 0.4199620187282562, "learning_rate": 0.0001554341736694678, "loss": 0.3603, "step": 30265 }, { "epoch": 16.908379888268158, "grad_norm": 2.2511370182037354, "learning_rate": 0.00015540616246498599, "loss": 0.5155, "step": 30266 }, { "epoch": 16.908938547486034, "grad_norm": 0.45186132192611694, "learning_rate": 0.0001553781512605042, "loss": 0.4384, "step": 30267 }, { "epoch": 16.90949720670391, "grad_norm": 0.4772550165653229, "learning_rate": 0.0001553501400560224, "loss": 0.4245, "step": 30268 }, { "epoch": 16.910055865921787, "grad_norm": 0.6822510361671448, "learning_rate": 0.00015532212885154063, "loss": 0.4253, "step": 30269 }, { "epoch": 16.910614525139664, "grad_norm": 1.310558795928955, "learning_rate": 0.00015529411764705884, "loss": 0.3596, "step": 30270 }, { "epoch": 16.91117318435754, "grad_norm": 0.42638224363327026, "learning_rate": 0.00015526610644257702, "loss": 0.4199, "step": 30271 }, { "epoch": 16.91173184357542, "grad_norm": 0.44095078110694885, "learning_rate": 0.00015523809523809522, "loss": 0.475, "step": 30272 }, { "epoch": 16.912290502793297, "grad_norm": 1.823797583580017, "learning_rate": 0.00015521008403361346, "loss": 0.4956, "step": 30273 }, { "epoch": 16.912849162011174, "grad_norm": 0.5275580286979675, "learning_rate": 0.00015518207282913166, "loss": 0.4656, "step": 30274 }, { "epoch": 16.91340782122905, "grad_norm": 0.8070825934410095, "learning_rate": 0.00015515406162464987, "loss": 0.382, "step": 30275 }, { "epoch": 16.913966480446927, "grad_norm": 0.40196824073791504, "learning_rate": 0.00015512605042016805, "loss": 0.5042, "step": 30276 }, { "epoch": 16.914525139664804, "grad_norm": 0.5920143127441406, "learning_rate": 0.00015509803921568628, "loss": 0.5957, "step": 30277 }, { "epoch": 16.91508379888268, "grad_norm": 0.5059862732887268, "learning_rate": 0.00015507002801120448, "loss": 0.495, "step": 30278 }, { "epoch": 16.91564245810056, "grad_norm": 0.372800350189209, "learning_rate": 0.0001550420168067227, "loss": 0.3789, "step": 30279 }, { "epoch": 16.916201117318437, "grad_norm": 0.5092388391494751, "learning_rate": 0.0001550140056022409, "loss": 0.363, "step": 30280 }, { "epoch": 16.916759776536313, "grad_norm": 0.5395246744155884, "learning_rate": 0.0001549859943977591, "loss": 0.5217, "step": 30281 }, { "epoch": 16.91731843575419, "grad_norm": 0.8115045428276062, "learning_rate": 0.0001549579831932773, "loss": 0.42, "step": 30282 }, { "epoch": 16.917877094972066, "grad_norm": 0.4640989899635315, "learning_rate": 0.00015492997198879551, "loss": 0.3768, "step": 30283 }, { "epoch": 16.918435754189943, "grad_norm": 0.4571166932582855, "learning_rate": 0.00015490196078431372, "loss": 0.4004, "step": 30284 }, { "epoch": 16.91899441340782, "grad_norm": 0.34246352314949036, "learning_rate": 0.00015487394957983195, "loss": 0.2978, "step": 30285 }, { "epoch": 16.9195530726257, "grad_norm": 0.510298490524292, "learning_rate": 0.00015484593837535013, "loss": 0.4448, "step": 30286 }, { "epoch": 16.920111731843576, "grad_norm": 0.375177264213562, "learning_rate": 0.00015481792717086834, "loss": 0.3154, "step": 30287 }, { "epoch": 16.920670391061453, "grad_norm": 0.48991602659225464, "learning_rate": 0.00015478991596638654, "loss": 0.3548, "step": 30288 }, { "epoch": 16.92122905027933, "grad_norm": 0.3705797493457794, "learning_rate": 0.00015476190476190478, "loss": 0.4276, "step": 30289 }, { "epoch": 16.921787709497206, "grad_norm": 0.40698692202568054, "learning_rate": 0.00015473389355742298, "loss": 0.3508, "step": 30290 }, { "epoch": 16.922346368715083, "grad_norm": 0.5141226053237915, "learning_rate": 0.00015470588235294116, "loss": 0.3915, "step": 30291 }, { "epoch": 16.922905027932963, "grad_norm": 0.4309077262878418, "learning_rate": 0.00015467787114845937, "loss": 0.4075, "step": 30292 }, { "epoch": 16.92346368715084, "grad_norm": 0.6018301844596863, "learning_rate": 0.0001546498599439776, "loss": 0.4251, "step": 30293 }, { "epoch": 16.924022346368716, "grad_norm": 0.4072677493095398, "learning_rate": 0.0001546218487394958, "loss": 0.473, "step": 30294 }, { "epoch": 16.924581005586592, "grad_norm": 0.5321144461631775, "learning_rate": 0.00015459383753501401, "loss": 0.4535, "step": 30295 }, { "epoch": 16.92513966480447, "grad_norm": 0.4155421257019043, "learning_rate": 0.0001545658263305322, "loss": 0.4496, "step": 30296 }, { "epoch": 16.925698324022346, "grad_norm": 0.4917810261249542, "learning_rate": 0.00015453781512605043, "loss": 0.3631, "step": 30297 }, { "epoch": 16.926256983240222, "grad_norm": 0.9854460954666138, "learning_rate": 0.00015450980392156863, "loss": 0.392, "step": 30298 }, { "epoch": 16.926815642458102, "grad_norm": 0.4849650263786316, "learning_rate": 0.00015448179271708684, "loss": 0.3442, "step": 30299 }, { "epoch": 16.92737430167598, "grad_norm": 0.49455657601356506, "learning_rate": 0.00015445378151260504, "loss": 0.3862, "step": 30300 }, { "epoch": 16.927932960893855, "grad_norm": 0.3992047607898712, "learning_rate": 0.00015442577030812325, "loss": 0.3025, "step": 30301 }, { "epoch": 16.928491620111732, "grad_norm": 0.5470736026763916, "learning_rate": 0.00015439775910364146, "loss": 0.4432, "step": 30302 }, { "epoch": 16.92905027932961, "grad_norm": 0.3519626557826996, "learning_rate": 0.00015436974789915966, "loss": 0.3512, "step": 30303 }, { "epoch": 16.929608938547485, "grad_norm": 0.3889520764350891, "learning_rate": 0.00015434173669467787, "loss": 0.3501, "step": 30304 }, { "epoch": 16.93016759776536, "grad_norm": 0.34396892786026, "learning_rate": 0.0001543137254901961, "loss": 0.3762, "step": 30305 }, { "epoch": 16.93072625698324, "grad_norm": 0.6689139604568481, "learning_rate": 0.00015428571428571428, "loss": 0.4965, "step": 30306 }, { "epoch": 16.93128491620112, "grad_norm": 0.41385045647621155, "learning_rate": 0.00015425770308123249, "loss": 0.4382, "step": 30307 }, { "epoch": 16.931843575418995, "grad_norm": 0.38909074664115906, "learning_rate": 0.0001542296918767507, "loss": 0.387, "step": 30308 }, { "epoch": 16.93240223463687, "grad_norm": 0.3993358016014099, "learning_rate": 0.00015420168067226893, "loss": 0.4423, "step": 30309 }, { "epoch": 16.932960893854748, "grad_norm": 0.4193381667137146, "learning_rate": 0.00015417366946778713, "loss": 0.3351, "step": 30310 }, { "epoch": 16.933519553072625, "grad_norm": 0.461887001991272, "learning_rate": 0.0001541456582633053, "loss": 0.5152, "step": 30311 }, { "epoch": 16.9340782122905, "grad_norm": 0.4148399233818054, "learning_rate": 0.00015411764705882352, "loss": 0.4208, "step": 30312 }, { "epoch": 16.93463687150838, "grad_norm": 0.6164987087249756, "learning_rate": 0.00015408963585434175, "loss": 0.3846, "step": 30313 }, { "epoch": 16.935195530726258, "grad_norm": 0.4818493127822876, "learning_rate": 0.00015406162464985996, "loss": 0.2934, "step": 30314 }, { "epoch": 16.935754189944134, "grad_norm": 0.4571642279624939, "learning_rate": 0.00015403361344537816, "loss": 0.3984, "step": 30315 }, { "epoch": 16.93631284916201, "grad_norm": 0.5050204396247864, "learning_rate": 0.00015400560224089634, "loss": 0.3245, "step": 30316 }, { "epoch": 16.936871508379888, "grad_norm": 0.3212690055370331, "learning_rate": 0.00015397759103641457, "loss": 0.3152, "step": 30317 }, { "epoch": 16.937430167597764, "grad_norm": 0.46357548236846924, "learning_rate": 0.00015394957983193278, "loss": 0.4281, "step": 30318 }, { "epoch": 16.93798882681564, "grad_norm": 0.4582130014896393, "learning_rate": 0.00015392156862745098, "loss": 0.447, "step": 30319 }, { "epoch": 16.93854748603352, "grad_norm": 0.4889698624610901, "learning_rate": 0.00015389355742296916, "loss": 0.4215, "step": 30320 }, { "epoch": 16.939106145251397, "grad_norm": 0.67316073179245, "learning_rate": 0.0001538655462184874, "loss": 0.5077, "step": 30321 }, { "epoch": 16.939664804469274, "grad_norm": 0.5229091048240662, "learning_rate": 0.0001538375350140056, "loss": 0.5161, "step": 30322 }, { "epoch": 16.94022346368715, "grad_norm": 0.7249429225921631, "learning_rate": 0.0001538095238095238, "loss": 0.4171, "step": 30323 }, { "epoch": 16.940782122905027, "grad_norm": 0.5083596110343933, "learning_rate": 0.00015378151260504204, "loss": 0.5654, "step": 30324 }, { "epoch": 16.941340782122904, "grad_norm": 0.3124542236328125, "learning_rate": 0.00015375350140056022, "loss": 0.299, "step": 30325 }, { "epoch": 16.941899441340784, "grad_norm": 0.41659268736839294, "learning_rate": 0.00015372549019607843, "loss": 0.3377, "step": 30326 }, { "epoch": 16.94245810055866, "grad_norm": 0.42169126868247986, "learning_rate": 0.00015369747899159663, "loss": 0.4597, "step": 30327 }, { "epoch": 16.943016759776537, "grad_norm": 0.4793253242969513, "learning_rate": 0.00015366946778711487, "loss": 0.4167, "step": 30328 }, { "epoch": 16.943575418994413, "grad_norm": 0.4617081582546234, "learning_rate": 0.00015364145658263307, "loss": 0.373, "step": 30329 }, { "epoch": 16.94413407821229, "grad_norm": 0.5602740049362183, "learning_rate": 0.00015361344537815125, "loss": 0.3198, "step": 30330 }, { "epoch": 16.944692737430167, "grad_norm": 1.0916534662246704, "learning_rate": 0.00015358543417366946, "loss": 0.4097, "step": 30331 }, { "epoch": 16.945251396648043, "grad_norm": 1.9058390855789185, "learning_rate": 0.0001535574229691877, "loss": 0.5161, "step": 30332 }, { "epoch": 16.945810055865923, "grad_norm": 0.44394323229789734, "learning_rate": 0.0001535294117647059, "loss": 0.4215, "step": 30333 }, { "epoch": 16.9463687150838, "grad_norm": 0.33620184659957886, "learning_rate": 0.0001535014005602241, "loss": 0.3181, "step": 30334 }, { "epoch": 16.946927374301676, "grad_norm": 0.7556557655334473, "learning_rate": 0.00015347338935574228, "loss": 0.4124, "step": 30335 }, { "epoch": 16.947486033519553, "grad_norm": 0.6048688292503357, "learning_rate": 0.00015344537815126051, "loss": 0.409, "step": 30336 }, { "epoch": 16.94804469273743, "grad_norm": 0.983925461769104, "learning_rate": 0.00015341736694677872, "loss": 0.3841, "step": 30337 }, { "epoch": 16.948603351955306, "grad_norm": 0.5724963545799255, "learning_rate": 0.00015338935574229693, "loss": 0.4612, "step": 30338 }, { "epoch": 16.949162011173183, "grad_norm": 0.45660504698753357, "learning_rate": 0.00015336134453781513, "loss": 0.2795, "step": 30339 }, { "epoch": 16.949720670391063, "grad_norm": 0.40456148982048035, "learning_rate": 0.00015333333333333334, "loss": 0.5845, "step": 30340 }, { "epoch": 16.95027932960894, "grad_norm": 0.5004631876945496, "learning_rate": 0.00015330532212885154, "loss": 0.3564, "step": 30341 }, { "epoch": 16.950837988826816, "grad_norm": 0.3486461937427521, "learning_rate": 0.00015327731092436975, "loss": 0.3205, "step": 30342 }, { "epoch": 16.951396648044692, "grad_norm": 0.7220545411109924, "learning_rate": 0.00015324929971988796, "loss": 0.4924, "step": 30343 }, { "epoch": 16.95195530726257, "grad_norm": 5.430187702178955, "learning_rate": 0.0001532212885154062, "loss": 0.4761, "step": 30344 }, { "epoch": 16.952513966480446, "grad_norm": 0.47808152437210083, "learning_rate": 0.00015319327731092437, "loss": 0.4208, "step": 30345 }, { "epoch": 16.953072625698326, "grad_norm": 0.5832805633544922, "learning_rate": 0.00015316526610644257, "loss": 0.2934, "step": 30346 }, { "epoch": 16.953631284916202, "grad_norm": 0.5149906873703003, "learning_rate": 0.00015313725490196078, "loss": 0.3609, "step": 30347 }, { "epoch": 16.95418994413408, "grad_norm": 0.5477957725524902, "learning_rate": 0.000153109243697479, "loss": 0.4756, "step": 30348 }, { "epoch": 16.954748603351955, "grad_norm": 0.7830228805541992, "learning_rate": 0.00015308123249299722, "loss": 0.3902, "step": 30349 }, { "epoch": 16.955307262569832, "grad_norm": 0.4080927073955536, "learning_rate": 0.0001530532212885154, "loss": 0.4187, "step": 30350 }, { "epoch": 16.95586592178771, "grad_norm": 0.5628142356872559, "learning_rate": 0.0001530252100840336, "loss": 0.4375, "step": 30351 }, { "epoch": 16.956424581005585, "grad_norm": 0.5363519787788391, "learning_rate": 0.00015299719887955184, "loss": 0.4159, "step": 30352 }, { "epoch": 16.956983240223465, "grad_norm": 0.5226016044616699, "learning_rate": 0.00015296918767507004, "loss": 0.407, "step": 30353 }, { "epoch": 16.957541899441342, "grad_norm": 0.4318486452102661, "learning_rate": 0.00015294117647058825, "loss": 0.3876, "step": 30354 }, { "epoch": 16.95810055865922, "grad_norm": 0.5389750599861145, "learning_rate": 0.00015291316526610643, "loss": 0.4849, "step": 30355 }, { "epoch": 16.958659217877095, "grad_norm": 0.45806819200515747, "learning_rate": 0.00015288515406162466, "loss": 0.3435, "step": 30356 }, { "epoch": 16.95921787709497, "grad_norm": 5.623819828033447, "learning_rate": 0.00015285714285714287, "loss": 0.4569, "step": 30357 }, { "epoch": 16.959776536312848, "grad_norm": 0.4806057810783386, "learning_rate": 0.00015282913165266107, "loss": 0.4605, "step": 30358 }, { "epoch": 16.960335195530725, "grad_norm": 0.49418771266937256, "learning_rate": 0.00015280112044817928, "loss": 0.3411, "step": 30359 }, { "epoch": 16.960893854748605, "grad_norm": 0.532681405544281, "learning_rate": 0.00015277310924369748, "loss": 0.4761, "step": 30360 }, { "epoch": 16.96145251396648, "grad_norm": 0.3914929926395416, "learning_rate": 0.0001527450980392157, "loss": 0.3678, "step": 30361 }, { "epoch": 16.962011173184358, "grad_norm": 0.3841231167316437, "learning_rate": 0.0001527170868347339, "loss": 0.3611, "step": 30362 }, { "epoch": 16.962569832402234, "grad_norm": 0.6541624069213867, "learning_rate": 0.0001526890756302521, "loss": 0.3907, "step": 30363 }, { "epoch": 16.96312849162011, "grad_norm": 0.46786296367645264, "learning_rate": 0.00015266106442577034, "loss": 0.4288, "step": 30364 }, { "epoch": 16.963687150837988, "grad_norm": 0.5675880312919617, "learning_rate": 0.00015263305322128851, "loss": 0.4288, "step": 30365 }, { "epoch": 16.964245810055864, "grad_norm": 0.38062313199043274, "learning_rate": 0.00015260504201680672, "loss": 0.4316, "step": 30366 }, { "epoch": 16.964804469273744, "grad_norm": 0.7660247087478638, "learning_rate": 0.00015257703081232493, "loss": 0.4691, "step": 30367 }, { "epoch": 16.96536312849162, "grad_norm": 0.272076815366745, "learning_rate": 0.00015254901960784316, "loss": 0.3178, "step": 30368 }, { "epoch": 16.965921787709497, "grad_norm": 0.7283949255943298, "learning_rate": 0.00015252100840336137, "loss": 0.4403, "step": 30369 }, { "epoch": 16.966480446927374, "grad_norm": 0.391897588968277, "learning_rate": 0.00015249299719887954, "loss": 0.3679, "step": 30370 }, { "epoch": 16.96703910614525, "grad_norm": 0.410977303981781, "learning_rate": 0.00015246498599439775, "loss": 0.3527, "step": 30371 }, { "epoch": 16.967597765363127, "grad_norm": 0.48569798469543457, "learning_rate": 0.00015243697478991598, "loss": 0.3172, "step": 30372 }, { "epoch": 16.968156424581007, "grad_norm": 0.6881005764007568, "learning_rate": 0.0001524089635854342, "loss": 0.3689, "step": 30373 }, { "epoch": 16.968715083798884, "grad_norm": 0.5051879286766052, "learning_rate": 0.0001523809523809524, "loss": 0.4217, "step": 30374 }, { "epoch": 16.96927374301676, "grad_norm": 0.7523027658462524, "learning_rate": 0.00015235294117647057, "loss": 0.4059, "step": 30375 }, { "epoch": 16.969832402234637, "grad_norm": 0.3337520956993103, "learning_rate": 0.0001523249299719888, "loss": 0.4102, "step": 30376 }, { "epoch": 16.970391061452514, "grad_norm": 0.6148476600646973, "learning_rate": 0.00015229691876750701, "loss": 0.3719, "step": 30377 }, { "epoch": 16.97094972067039, "grad_norm": 0.5315216779708862, "learning_rate": 0.00015226890756302522, "loss": 0.4742, "step": 30378 }, { "epoch": 16.971508379888267, "grad_norm": 0.42165207862854004, "learning_rate": 0.0001522408963585434, "loss": 0.389, "step": 30379 }, { "epoch": 16.972067039106147, "grad_norm": 0.5636337399482727, "learning_rate": 0.00015221288515406163, "loss": 0.4001, "step": 30380 }, { "epoch": 16.972625698324023, "grad_norm": 0.4556921124458313, "learning_rate": 0.00015218487394957984, "loss": 0.374, "step": 30381 }, { "epoch": 16.9731843575419, "grad_norm": 0.5564090609550476, "learning_rate": 0.00015215686274509804, "loss": 0.4175, "step": 30382 }, { "epoch": 16.973743016759776, "grad_norm": 0.40993061661720276, "learning_rate": 0.00015212885154061625, "loss": 0.426, "step": 30383 }, { "epoch": 16.974301675977653, "grad_norm": 0.4953514337539673, "learning_rate": 0.00015210084033613446, "loss": 0.4035, "step": 30384 }, { "epoch": 16.97486033519553, "grad_norm": 0.45152053236961365, "learning_rate": 0.00015207282913165266, "loss": 0.3084, "step": 30385 }, { "epoch": 16.975418994413406, "grad_norm": 1.408462643623352, "learning_rate": 0.00015204481792717087, "loss": 0.4266, "step": 30386 }, { "epoch": 16.975977653631286, "grad_norm": 2.6643075942993164, "learning_rate": 0.00015201680672268907, "loss": 0.3615, "step": 30387 }, { "epoch": 16.976536312849163, "grad_norm": 0.40733802318573, "learning_rate": 0.0001519887955182073, "loss": 0.3587, "step": 30388 }, { "epoch": 16.97709497206704, "grad_norm": 0.4195544123649597, "learning_rate": 0.00015196078431372549, "loss": 0.3177, "step": 30389 }, { "epoch": 16.977653631284916, "grad_norm": 0.48221084475517273, "learning_rate": 0.0001519327731092437, "loss": 0.5664, "step": 30390 }, { "epoch": 16.978212290502793, "grad_norm": 0.494863361120224, "learning_rate": 0.0001519047619047619, "loss": 0.4674, "step": 30391 }, { "epoch": 16.97877094972067, "grad_norm": 0.303216814994812, "learning_rate": 0.00015187675070028013, "loss": 0.3052, "step": 30392 }, { "epoch": 16.97932960893855, "grad_norm": 0.5198042392730713, "learning_rate": 0.00015184873949579834, "loss": 0.3815, "step": 30393 }, { "epoch": 16.979888268156426, "grad_norm": 0.5553575158119202, "learning_rate": 0.00015182072829131652, "loss": 0.3854, "step": 30394 }, { "epoch": 16.980446927374302, "grad_norm": 0.5964141488075256, "learning_rate": 0.00015179271708683472, "loss": 0.4508, "step": 30395 }, { "epoch": 16.98100558659218, "grad_norm": 0.44770148396492004, "learning_rate": 0.00015176470588235295, "loss": 0.4239, "step": 30396 }, { "epoch": 16.981564245810056, "grad_norm": 0.43785104155540466, "learning_rate": 0.00015173669467787116, "loss": 0.3668, "step": 30397 }, { "epoch": 16.982122905027932, "grad_norm": 0.4443635046482086, "learning_rate": 0.00015170868347338937, "loss": 0.4358, "step": 30398 }, { "epoch": 16.98268156424581, "grad_norm": 0.5695051550865173, "learning_rate": 0.00015168067226890755, "loss": 0.4331, "step": 30399 }, { "epoch": 16.98324022346369, "grad_norm": 0.49616923928260803, "learning_rate": 0.00015165266106442578, "loss": 0.3587, "step": 30400 }, { "epoch": 16.983798882681565, "grad_norm": 0.4713943898677826, "learning_rate": 0.00015162464985994398, "loss": 0.3646, "step": 30401 }, { "epoch": 16.984357541899442, "grad_norm": 0.45692238211631775, "learning_rate": 0.0001515966386554622, "loss": 0.3509, "step": 30402 }, { "epoch": 16.98491620111732, "grad_norm": 0.3453557789325714, "learning_rate": 0.0001515686274509804, "loss": 0.3372, "step": 30403 }, { "epoch": 16.985474860335195, "grad_norm": 0.789562463760376, "learning_rate": 0.0001515406162464986, "loss": 0.4327, "step": 30404 }, { "epoch": 16.98603351955307, "grad_norm": 0.3741300404071808, "learning_rate": 0.0001515126050420168, "loss": 0.3358, "step": 30405 }, { "epoch": 16.986592178770948, "grad_norm": 0.5981579422950745, "learning_rate": 0.00015148459383753501, "loss": 0.3589, "step": 30406 }, { "epoch": 16.98715083798883, "grad_norm": 0.4594210088253021, "learning_rate": 0.00015145658263305322, "loss": 0.4083, "step": 30407 }, { "epoch": 16.987709497206705, "grad_norm": 0.5064340233802795, "learning_rate": 0.00015142857142857145, "loss": 0.4726, "step": 30408 }, { "epoch": 16.98826815642458, "grad_norm": 8.44332218170166, "learning_rate": 0.00015140056022408963, "loss": 0.4203, "step": 30409 }, { "epoch": 16.988826815642458, "grad_norm": 0.5397350788116455, "learning_rate": 0.00015137254901960784, "loss": 0.4103, "step": 30410 }, { "epoch": 16.989385474860335, "grad_norm": 0.5714200735092163, "learning_rate": 0.00015134453781512604, "loss": 0.5161, "step": 30411 }, { "epoch": 16.98994413407821, "grad_norm": 0.8399409651756287, "learning_rate": 0.00015131652661064428, "loss": 0.4342, "step": 30412 }, { "epoch": 16.990502793296088, "grad_norm": 1.0706945657730103, "learning_rate": 0.00015128851540616248, "loss": 0.3302, "step": 30413 }, { "epoch": 16.991061452513968, "grad_norm": 0.47315743565559387, "learning_rate": 0.00015126050420168066, "loss": 0.4044, "step": 30414 }, { "epoch": 16.991620111731844, "grad_norm": 0.5722789764404297, "learning_rate": 0.00015123249299719887, "loss": 0.4494, "step": 30415 }, { "epoch": 16.99217877094972, "grad_norm": 0.5723875761032104, "learning_rate": 0.0001512044817927171, "loss": 0.4076, "step": 30416 }, { "epoch": 16.992737430167598, "grad_norm": 0.6388134360313416, "learning_rate": 0.0001511764705882353, "loss": 0.4836, "step": 30417 }, { "epoch": 16.993296089385474, "grad_norm": 0.40612682700157166, "learning_rate": 0.00015114845938375351, "loss": 0.4231, "step": 30418 }, { "epoch": 16.99385474860335, "grad_norm": 0.7671379446983337, "learning_rate": 0.0001511204481792717, "loss": 0.4081, "step": 30419 }, { "epoch": 16.994413407821227, "grad_norm": 0.45651549100875854, "learning_rate": 0.00015109243697478993, "loss": 0.3477, "step": 30420 }, { "epoch": 16.994972067039107, "grad_norm": 0.5423356890678406, "learning_rate": 0.00015106442577030813, "loss": 0.4501, "step": 30421 }, { "epoch": 16.995530726256984, "grad_norm": 0.42410463094711304, "learning_rate": 0.00015103641456582634, "loss": 0.3896, "step": 30422 }, { "epoch": 16.99608938547486, "grad_norm": 0.4299747347831726, "learning_rate": 0.00015100840336134454, "loss": 0.4271, "step": 30423 }, { "epoch": 16.996648044692737, "grad_norm": 0.41651228070259094, "learning_rate": 0.00015098039215686275, "loss": 0.3995, "step": 30424 }, { "epoch": 16.997206703910614, "grad_norm": 0.44958311319351196, "learning_rate": 0.00015095238095238096, "loss": 0.4121, "step": 30425 }, { "epoch": 16.99776536312849, "grad_norm": 0.46983492374420166, "learning_rate": 0.00015092436974789916, "loss": 0.4017, "step": 30426 }, { "epoch": 16.99832402234637, "grad_norm": 0.5891770124435425, "learning_rate": 0.00015089635854341737, "loss": 0.458, "step": 30427 }, { "epoch": 16.998882681564247, "grad_norm": 0.5173524022102356, "learning_rate": 0.0001508683473389356, "loss": 0.5606, "step": 30428 }, { "epoch": 16.999441340782123, "grad_norm": 1.2778003215789795, "learning_rate": 0.00015084033613445378, "loss": 0.4813, "step": 30429 }, { "epoch": 17.0, "grad_norm": 0.662997841835022, "learning_rate": 0.00015081232492997199, "loss": 0.3612, "step": 30430 }, { "epoch": 17.000558659217877, "grad_norm": 0.36778971552848816, "learning_rate": 0.0001507843137254902, "loss": 0.2869, "step": 30431 }, { "epoch": 17.001117318435753, "grad_norm": 0.5440883636474609, "learning_rate": 0.00015075630252100843, "loss": 0.4328, "step": 30432 }, { "epoch": 17.00167597765363, "grad_norm": 0.33694374561309814, "learning_rate": 0.0001507282913165266, "loss": 0.4027, "step": 30433 }, { "epoch": 17.00223463687151, "grad_norm": 0.5987569093704224, "learning_rate": 0.0001507002801120448, "loss": 0.3475, "step": 30434 }, { "epoch": 17.002793296089386, "grad_norm": 0.39774081110954285, "learning_rate": 0.00015067226890756302, "loss": 0.3922, "step": 30435 }, { "epoch": 17.003351955307263, "grad_norm": 2.299609661102295, "learning_rate": 0.00015064425770308125, "loss": 0.3822, "step": 30436 }, { "epoch": 17.00391061452514, "grad_norm": 4.180192470550537, "learning_rate": 0.00015061624649859945, "loss": 0.4177, "step": 30437 }, { "epoch": 17.004469273743016, "grad_norm": 0.46989870071411133, "learning_rate": 0.00015058823529411763, "loss": 0.3707, "step": 30438 }, { "epoch": 17.005027932960893, "grad_norm": 0.37000805139541626, "learning_rate": 0.00015056022408963584, "loss": 0.3177, "step": 30439 }, { "epoch": 17.00558659217877, "grad_norm": 0.44988906383514404, "learning_rate": 0.00015053221288515407, "loss": 0.3763, "step": 30440 }, { "epoch": 17.00614525139665, "grad_norm": 0.4812779128551483, "learning_rate": 0.00015050420168067228, "loss": 0.3948, "step": 30441 }, { "epoch": 17.006703910614526, "grad_norm": 0.7407799363136292, "learning_rate": 0.00015047619047619048, "loss": 0.3995, "step": 30442 }, { "epoch": 17.007262569832402, "grad_norm": 0.3529624044895172, "learning_rate": 0.00015044817927170866, "loss": 0.385, "step": 30443 }, { "epoch": 17.00782122905028, "grad_norm": 0.3553699254989624, "learning_rate": 0.0001504201680672269, "loss": 0.4685, "step": 30444 }, { "epoch": 17.008379888268156, "grad_norm": 0.4063166081905365, "learning_rate": 0.0001503921568627451, "loss": 0.4675, "step": 30445 }, { "epoch": 17.008938547486032, "grad_norm": 1.3298100233078003, "learning_rate": 0.0001503641456582633, "loss": 0.3456, "step": 30446 }, { "epoch": 17.009497206703912, "grad_norm": 3.5663909912109375, "learning_rate": 0.00015033613445378151, "loss": 0.4187, "step": 30447 }, { "epoch": 17.01005586592179, "grad_norm": 1.5872750282287598, "learning_rate": 0.00015030812324929972, "loss": 0.353, "step": 30448 }, { "epoch": 17.010614525139665, "grad_norm": 0.4045100510120392, "learning_rate": 0.00015028011204481793, "loss": 0.2705, "step": 30449 }, { "epoch": 17.011173184357542, "grad_norm": 1.197712779045105, "learning_rate": 0.00015025210084033613, "loss": 0.5144, "step": 30450 }, { "epoch": 17.01173184357542, "grad_norm": 0.615078330039978, "learning_rate": 0.00015022408963585434, "loss": 0.4217, "step": 30451 }, { "epoch": 17.012290502793295, "grad_norm": 0.7409213781356812, "learning_rate": 0.00015019607843137257, "loss": 0.3797, "step": 30452 }, { "epoch": 17.01284916201117, "grad_norm": 3.4061059951782227, "learning_rate": 0.00015016806722689075, "loss": 0.5353, "step": 30453 }, { "epoch": 17.013407821229052, "grad_norm": 0.5427066087722778, "learning_rate": 0.00015014005602240896, "loss": 0.3286, "step": 30454 }, { "epoch": 17.01396648044693, "grad_norm": 8.338967323303223, "learning_rate": 0.00015011204481792716, "loss": 0.4225, "step": 30455 }, { "epoch": 17.014525139664805, "grad_norm": 0.6157800555229187, "learning_rate": 0.0001500840336134454, "loss": 0.4572, "step": 30456 }, { "epoch": 17.01508379888268, "grad_norm": 1.7625092267990112, "learning_rate": 0.0001500560224089636, "loss": 0.4084, "step": 30457 }, { "epoch": 17.015642458100558, "grad_norm": 0.6848914623260498, "learning_rate": 0.00015002801120448178, "loss": 0.4316, "step": 30458 }, { "epoch": 17.016201117318435, "grad_norm": 1.7069450616836548, "learning_rate": 0.00015, "loss": 0.3977, "step": 30459 }, { "epoch": 17.01675977653631, "grad_norm": 0.8589963912963867, "learning_rate": 0.00014997198879551822, "loss": 0.4355, "step": 30460 }, { "epoch": 17.01731843575419, "grad_norm": 0.471879243850708, "learning_rate": 0.00014994397759103643, "loss": 0.3755, "step": 30461 }, { "epoch": 17.017877094972068, "grad_norm": 4.113809585571289, "learning_rate": 0.00014991596638655463, "loss": 0.507, "step": 30462 }, { "epoch": 17.018435754189944, "grad_norm": 0.3370513617992401, "learning_rate": 0.0001498879551820728, "loss": 0.3231, "step": 30463 }, { "epoch": 17.01899441340782, "grad_norm": 0.3638036847114563, "learning_rate": 0.00014985994397759104, "loss": 0.31, "step": 30464 }, { "epoch": 17.019553072625698, "grad_norm": 0.3504454493522644, "learning_rate": 0.00014983193277310925, "loss": 0.3139, "step": 30465 }, { "epoch": 17.020111731843574, "grad_norm": 0.3819427192211151, "learning_rate": 0.00014980392156862746, "loss": 0.3707, "step": 30466 }, { "epoch": 17.02067039106145, "grad_norm": 0.5636350512504578, "learning_rate": 0.00014977591036414566, "loss": 0.3306, "step": 30467 }, { "epoch": 17.02122905027933, "grad_norm": 1.5958689451217651, "learning_rate": 0.00014974789915966387, "loss": 0.4532, "step": 30468 }, { "epoch": 17.021787709497207, "grad_norm": 0.3493449091911316, "learning_rate": 0.00014971988795518207, "loss": 0.422, "step": 30469 }, { "epoch": 17.022346368715084, "grad_norm": 2.130363702774048, "learning_rate": 0.00014969187675070028, "loss": 0.633, "step": 30470 }, { "epoch": 17.02290502793296, "grad_norm": 0.5868925452232361, "learning_rate": 0.00014966386554621849, "loss": 0.4658, "step": 30471 }, { "epoch": 17.023463687150837, "grad_norm": 0.40896233916282654, "learning_rate": 0.00014963585434173672, "loss": 0.3636, "step": 30472 }, { "epoch": 17.024022346368714, "grad_norm": 0.3784361779689789, "learning_rate": 0.0001496078431372549, "loss": 0.3997, "step": 30473 }, { "epoch": 17.024581005586594, "grad_norm": 0.5266556143760681, "learning_rate": 0.0001495798319327731, "loss": 0.4098, "step": 30474 }, { "epoch": 17.02513966480447, "grad_norm": 0.4023912847042084, "learning_rate": 0.0001495518207282913, "loss": 0.4364, "step": 30475 }, { "epoch": 17.025698324022347, "grad_norm": 1.3155419826507568, "learning_rate": 0.00014952380952380954, "loss": 0.4446, "step": 30476 }, { "epoch": 17.026256983240224, "grad_norm": 0.3635464906692505, "learning_rate": 0.00014949579831932775, "loss": 0.4578, "step": 30477 }, { "epoch": 17.0268156424581, "grad_norm": 1.4340434074401855, "learning_rate": 0.00014946778711484593, "loss": 0.3187, "step": 30478 }, { "epoch": 17.027374301675977, "grad_norm": 0.6198726892471313, "learning_rate": 0.00014943977591036413, "loss": 0.4259, "step": 30479 }, { "epoch": 17.027932960893853, "grad_norm": 1.1278038024902344, "learning_rate": 0.00014941176470588237, "loss": 0.3197, "step": 30480 }, { "epoch": 17.028491620111733, "grad_norm": 0.46070167422294617, "learning_rate": 0.00014938375350140057, "loss": 0.3955, "step": 30481 }, { "epoch": 17.02905027932961, "grad_norm": 0.5326356291770935, "learning_rate": 0.00014935574229691878, "loss": 0.4687, "step": 30482 }, { "epoch": 17.029608938547486, "grad_norm": 0.42663297057151794, "learning_rate": 0.00014932773109243696, "loss": 0.4572, "step": 30483 }, { "epoch": 17.030167597765363, "grad_norm": 0.36611148715019226, "learning_rate": 0.0001492997198879552, "loss": 0.3564, "step": 30484 }, { "epoch": 17.03072625698324, "grad_norm": 0.4728602468967438, "learning_rate": 0.0001492717086834734, "loss": 0.4383, "step": 30485 }, { "epoch": 17.031284916201116, "grad_norm": 0.4630252718925476, "learning_rate": 0.0001492436974789916, "loss": 0.3734, "step": 30486 }, { "epoch": 17.031843575418993, "grad_norm": 0.4830436110496521, "learning_rate": 0.00014921568627450978, "loss": 0.4712, "step": 30487 }, { "epoch": 17.032402234636873, "grad_norm": 0.3279505670070648, "learning_rate": 0.00014918767507002801, "loss": 0.2992, "step": 30488 }, { "epoch": 17.03296089385475, "grad_norm": 0.4427335858345032, "learning_rate": 0.00014915966386554622, "loss": 0.4434, "step": 30489 }, { "epoch": 17.033519553072626, "grad_norm": 0.5309723019599915, "learning_rate": 0.00014913165266106443, "loss": 0.4698, "step": 30490 }, { "epoch": 17.034078212290503, "grad_norm": 0.5725134611129761, "learning_rate": 0.00014910364145658263, "loss": 0.4241, "step": 30491 }, { "epoch": 17.03463687150838, "grad_norm": 14.767130851745605, "learning_rate": 0.00014907563025210084, "loss": 0.5801, "step": 30492 }, { "epoch": 17.035195530726256, "grad_norm": 0.3527759611606598, "learning_rate": 0.00014904761904761904, "loss": 0.3567, "step": 30493 }, { "epoch": 17.035754189944136, "grad_norm": 0.3585748076438904, "learning_rate": 0.00014901960784313725, "loss": 0.3253, "step": 30494 }, { "epoch": 17.036312849162012, "grad_norm": 0.6002820730209351, "learning_rate": 0.00014899159663865546, "loss": 0.3623, "step": 30495 }, { "epoch": 17.03687150837989, "grad_norm": 0.6385857462882996, "learning_rate": 0.0001489635854341737, "loss": 0.4929, "step": 30496 }, { "epoch": 17.037430167597766, "grad_norm": 1.726499080657959, "learning_rate": 0.00014893557422969187, "loss": 0.4868, "step": 30497 }, { "epoch": 17.037988826815642, "grad_norm": 0.41995203495025635, "learning_rate": 0.00014890756302521007, "loss": 0.3407, "step": 30498 }, { "epoch": 17.03854748603352, "grad_norm": 0.5569109320640564, "learning_rate": 0.00014887955182072828, "loss": 0.5235, "step": 30499 }, { "epoch": 17.039106145251395, "grad_norm": 2.3928635120391846, "learning_rate": 0.00014885154061624651, "loss": 0.5613, "step": 30500 }, { "epoch": 17.039106145251395, "eval_cer": 0.08478718645325303, "eval_loss": 0.32105767726898193, "eval_runtime": 55.2041, "eval_samples_per_second": 82.204, "eval_steps_per_second": 5.145, "eval_wer": 0.3347113209128237, "step": 30500 }, { "epoch": 17.039664804469275, "grad_norm": 0.4282025992870331, "learning_rate": 0.00014882352941176472, "loss": 0.3997, "step": 30501 }, { "epoch": 17.040223463687152, "grad_norm": 1.9419450759887695, "learning_rate": 0.0001487955182072829, "loss": 0.3019, "step": 30502 }, { "epoch": 17.04078212290503, "grad_norm": 1.7139935493469238, "learning_rate": 0.0001487675070028011, "loss": 0.6062, "step": 30503 }, { "epoch": 17.041340782122905, "grad_norm": 0.3762113153934479, "learning_rate": 0.00014873949579831934, "loss": 0.3172, "step": 30504 }, { "epoch": 17.04189944134078, "grad_norm": 0.3709942698478699, "learning_rate": 0.00014871148459383754, "loss": 0.3543, "step": 30505 }, { "epoch": 17.042458100558658, "grad_norm": 0.5424813032150269, "learning_rate": 0.00014868347338935575, "loss": 0.3489, "step": 30506 }, { "epoch": 17.043016759776535, "grad_norm": 0.35648950934410095, "learning_rate": 0.00014865546218487393, "loss": 0.3092, "step": 30507 }, { "epoch": 17.043575418994415, "grad_norm": 0.5092266798019409, "learning_rate": 0.00014862745098039216, "loss": 0.404, "step": 30508 }, { "epoch": 17.04413407821229, "grad_norm": 0.8773375153541565, "learning_rate": 0.00014859943977591037, "loss": 0.4841, "step": 30509 }, { "epoch": 17.044692737430168, "grad_norm": 0.4752204120159149, "learning_rate": 0.00014857142857142857, "loss": 0.3815, "step": 30510 }, { "epoch": 17.045251396648045, "grad_norm": 0.7021821141242981, "learning_rate": 0.00014854341736694678, "loss": 0.4805, "step": 30511 }, { "epoch": 17.04581005586592, "grad_norm": 0.40230700373649597, "learning_rate": 0.00014851540616246499, "loss": 0.4182, "step": 30512 }, { "epoch": 17.046368715083798, "grad_norm": 0.5683800578117371, "learning_rate": 0.0001484873949579832, "loss": 0.351, "step": 30513 }, { "epoch": 17.046927374301674, "grad_norm": 0.4263964295387268, "learning_rate": 0.0001484593837535014, "loss": 0.4341, "step": 30514 }, { "epoch": 17.047486033519554, "grad_norm": 0.5452007055282593, "learning_rate": 0.0001484313725490196, "loss": 0.4949, "step": 30515 }, { "epoch": 17.04804469273743, "grad_norm": 0.40486037731170654, "learning_rate": 0.00014840336134453784, "loss": 0.3668, "step": 30516 }, { "epoch": 17.048603351955308, "grad_norm": 0.4338291883468628, "learning_rate": 0.00014837535014005602, "loss": 0.4316, "step": 30517 }, { "epoch": 17.049162011173184, "grad_norm": 0.4577411711215973, "learning_rate": 0.00014834733893557422, "loss": 0.3992, "step": 30518 }, { "epoch": 17.04972067039106, "grad_norm": 0.37868183851242065, "learning_rate": 0.00014831932773109243, "loss": 0.4336, "step": 30519 }, { "epoch": 17.050279329608937, "grad_norm": 0.3750998079776764, "learning_rate": 0.00014829131652661066, "loss": 0.3244, "step": 30520 }, { "epoch": 17.050837988826817, "grad_norm": 0.5478485226631165, "learning_rate": 0.00014826330532212887, "loss": 0.5612, "step": 30521 }, { "epoch": 17.051396648044694, "grad_norm": 0.4271056056022644, "learning_rate": 0.00014823529411764705, "loss": 0.4541, "step": 30522 }, { "epoch": 17.05195530726257, "grad_norm": 0.9498957395553589, "learning_rate": 0.00014820728291316525, "loss": 0.38, "step": 30523 }, { "epoch": 17.052513966480447, "grad_norm": 0.38722601532936096, "learning_rate": 0.00014817927170868348, "loss": 0.3858, "step": 30524 }, { "epoch": 17.053072625698324, "grad_norm": 0.3773421049118042, "learning_rate": 0.0001481512605042017, "loss": 0.391, "step": 30525 }, { "epoch": 17.0536312849162, "grad_norm": 2.046898365020752, "learning_rate": 0.0001481232492997199, "loss": 0.4374, "step": 30526 }, { "epoch": 17.054189944134077, "grad_norm": 0.44194281101226807, "learning_rate": 0.00014809523809523808, "loss": 0.4443, "step": 30527 }, { "epoch": 17.054748603351957, "grad_norm": 0.961051344871521, "learning_rate": 0.0001480672268907563, "loss": 0.5131, "step": 30528 }, { "epoch": 17.055307262569833, "grad_norm": 0.498801052570343, "learning_rate": 0.00014803921568627451, "loss": 0.506, "step": 30529 }, { "epoch": 17.05586592178771, "grad_norm": 0.5123061537742615, "learning_rate": 0.00014801120448179272, "loss": 0.4112, "step": 30530 }, { "epoch": 17.056424581005587, "grad_norm": 0.38034579157829285, "learning_rate": 0.00014798319327731093, "loss": 0.4355, "step": 30531 }, { "epoch": 17.056983240223463, "grad_norm": 0.38187503814697266, "learning_rate": 0.00014795518207282913, "loss": 0.4039, "step": 30532 }, { "epoch": 17.05754189944134, "grad_norm": 0.43583863973617554, "learning_rate": 0.00014792717086834734, "loss": 0.581, "step": 30533 }, { "epoch": 17.058100558659216, "grad_norm": 0.3752821981906891, "learning_rate": 0.00014789915966386554, "loss": 0.4265, "step": 30534 }, { "epoch": 17.058659217877096, "grad_norm": 2.105147361755371, "learning_rate": 0.00014787114845938375, "loss": 0.4563, "step": 30535 }, { "epoch": 17.059217877094973, "grad_norm": 0.5207562446594238, "learning_rate": 0.00014784313725490198, "loss": 0.3956, "step": 30536 }, { "epoch": 17.05977653631285, "grad_norm": 0.627870500087738, "learning_rate": 0.00014781512605042016, "loss": 0.516, "step": 30537 }, { "epoch": 17.060335195530726, "grad_norm": 0.4861336052417755, "learning_rate": 0.00014778711484593837, "loss": 0.4406, "step": 30538 }, { "epoch": 17.060893854748603, "grad_norm": 3.0468103885650635, "learning_rate": 0.00014775910364145657, "loss": 0.4418, "step": 30539 }, { "epoch": 17.06145251396648, "grad_norm": 0.49870914220809937, "learning_rate": 0.0001477310924369748, "loss": 0.4393, "step": 30540 }, { "epoch": 17.062011173184356, "grad_norm": 2.6195478439331055, "learning_rate": 0.00014770308123249301, "loss": 0.4129, "step": 30541 }, { "epoch": 17.062569832402236, "grad_norm": 0.5501329302787781, "learning_rate": 0.0001476750700280112, "loss": 0.321, "step": 30542 }, { "epoch": 17.063128491620112, "grad_norm": 0.3972786068916321, "learning_rate": 0.0001476470588235294, "loss": 0.3804, "step": 30543 }, { "epoch": 17.06368715083799, "grad_norm": 0.5347886085510254, "learning_rate": 0.00014761904761904763, "loss": 0.4034, "step": 30544 }, { "epoch": 17.064245810055866, "grad_norm": 0.36315909028053284, "learning_rate": 0.00014759103641456584, "loss": 0.5294, "step": 30545 }, { "epoch": 17.064804469273742, "grad_norm": 1.4645538330078125, "learning_rate": 0.00014756302521008402, "loss": 0.4326, "step": 30546 }, { "epoch": 17.06536312849162, "grad_norm": 0.48635053634643555, "learning_rate": 0.00014753501400560222, "loss": 0.4484, "step": 30547 }, { "epoch": 17.0659217877095, "grad_norm": 0.38818925619125366, "learning_rate": 0.00014750700280112046, "loss": 0.4492, "step": 30548 }, { "epoch": 17.066480446927375, "grad_norm": 1.262762188911438, "learning_rate": 0.00014747899159663866, "loss": 0.4299, "step": 30549 }, { "epoch": 17.067039106145252, "grad_norm": 0.48113033175468445, "learning_rate": 0.00014745098039215687, "loss": 0.4097, "step": 30550 }, { "epoch": 17.06759776536313, "grad_norm": 0.6917311549186707, "learning_rate": 0.00014742296918767505, "loss": 0.3696, "step": 30551 }, { "epoch": 17.068156424581005, "grad_norm": 0.5821406841278076, "learning_rate": 0.00014739495798319328, "loss": 0.3911, "step": 30552 }, { "epoch": 17.06871508379888, "grad_norm": 0.6156346797943115, "learning_rate": 0.00014736694677871149, "loss": 0.4394, "step": 30553 }, { "epoch": 17.06927374301676, "grad_norm": 18.05360221862793, "learning_rate": 0.0001473389355742297, "loss": 0.3847, "step": 30554 }, { "epoch": 17.06983240223464, "grad_norm": 0.5106489658355713, "learning_rate": 0.00014731092436974792, "loss": 0.435, "step": 30555 }, { "epoch": 17.070391061452515, "grad_norm": 0.3439846336841583, "learning_rate": 0.0001472829131652661, "loss": 0.3495, "step": 30556 }, { "epoch": 17.07094972067039, "grad_norm": 0.3897092640399933, "learning_rate": 0.0001472549019607843, "loss": 0.4916, "step": 30557 }, { "epoch": 17.071508379888268, "grad_norm": 0.5481124520301819, "learning_rate": 0.00014722689075630252, "loss": 0.5462, "step": 30558 }, { "epoch": 17.072067039106145, "grad_norm": 0.43974775075912476, "learning_rate": 0.00014719887955182075, "loss": 0.4289, "step": 30559 }, { "epoch": 17.07262569832402, "grad_norm": 0.602907657623291, "learning_rate": 0.00014717086834733895, "loss": 0.5071, "step": 30560 }, { "epoch": 17.073184357541898, "grad_norm": 0.35608187317848206, "learning_rate": 0.00014714285714285713, "loss": 0.3086, "step": 30561 }, { "epoch": 17.073743016759778, "grad_norm": 0.3388363718986511, "learning_rate": 0.00014711484593837534, "loss": 0.3729, "step": 30562 }, { "epoch": 17.074301675977654, "grad_norm": 0.4461972713470459, "learning_rate": 0.00014708683473389357, "loss": 0.3941, "step": 30563 }, { "epoch": 17.07486033519553, "grad_norm": 0.9617152214050293, "learning_rate": 0.00014705882352941178, "loss": 0.4159, "step": 30564 }, { "epoch": 17.075418994413408, "grad_norm": 0.4029093384742737, "learning_rate": 0.00014703081232492998, "loss": 0.3948, "step": 30565 }, { "epoch": 17.075977653631284, "grad_norm": 0.495516300201416, "learning_rate": 0.00014700280112044816, "loss": 0.4716, "step": 30566 }, { "epoch": 17.07653631284916, "grad_norm": 0.8004716634750366, "learning_rate": 0.0001469747899159664, "loss": 0.5289, "step": 30567 }, { "epoch": 17.07709497206704, "grad_norm": 0.5013948678970337, "learning_rate": 0.0001469467787114846, "loss": 0.4269, "step": 30568 }, { "epoch": 17.077653631284917, "grad_norm": 0.42532581090927124, "learning_rate": 0.0001469187675070028, "loss": 0.4482, "step": 30569 }, { "epoch": 17.078212290502794, "grad_norm": 0.7256349325180054, "learning_rate": 0.00014689075630252101, "loss": 0.5398, "step": 30570 }, { "epoch": 17.07877094972067, "grad_norm": 0.5493570566177368, "learning_rate": 0.00014686274509803922, "loss": 0.3284, "step": 30571 }, { "epoch": 17.079329608938547, "grad_norm": 0.5636252760887146, "learning_rate": 0.00014683473389355743, "loss": 0.4947, "step": 30572 }, { "epoch": 17.079888268156424, "grad_norm": 1.7464534044265747, "learning_rate": 0.00014680672268907563, "loss": 0.3855, "step": 30573 }, { "epoch": 17.0804469273743, "grad_norm": 0.3875367343425751, "learning_rate": 0.00014677871148459384, "loss": 0.3412, "step": 30574 }, { "epoch": 17.08100558659218, "grad_norm": 0.5242041945457458, "learning_rate": 0.00014675070028011207, "loss": 0.4053, "step": 30575 }, { "epoch": 17.081564245810057, "grad_norm": 0.40674248337745667, "learning_rate": 0.00014672268907563025, "loss": 0.4532, "step": 30576 }, { "epoch": 17.082122905027934, "grad_norm": 0.7609793543815613, "learning_rate": 0.00014669467787114846, "loss": 0.3148, "step": 30577 }, { "epoch": 17.08268156424581, "grad_norm": 0.7364414930343628, "learning_rate": 0.00014666666666666666, "loss": 0.4908, "step": 30578 }, { "epoch": 17.083240223463687, "grad_norm": 0.4483233094215393, "learning_rate": 0.0001466386554621849, "loss": 0.3973, "step": 30579 }, { "epoch": 17.083798882681563, "grad_norm": 0.3973104655742645, "learning_rate": 0.0001466106442577031, "loss": 0.4524, "step": 30580 }, { "epoch": 17.08435754189944, "grad_norm": 0.38178759813308716, "learning_rate": 0.00014658263305322128, "loss": 0.3727, "step": 30581 }, { "epoch": 17.08491620111732, "grad_norm": 5.578076362609863, "learning_rate": 0.0001465546218487395, "loss": 0.4283, "step": 30582 }, { "epoch": 17.085474860335196, "grad_norm": 0.3670594096183777, "learning_rate": 0.00014652661064425772, "loss": 0.4127, "step": 30583 }, { "epoch": 17.086033519553073, "grad_norm": 0.5761085748672485, "learning_rate": 0.00014649859943977593, "loss": 0.3824, "step": 30584 }, { "epoch": 17.08659217877095, "grad_norm": 0.4312434792518616, "learning_rate": 0.00014647058823529413, "loss": 0.3376, "step": 30585 }, { "epoch": 17.087150837988826, "grad_norm": 0.4243375062942505, "learning_rate": 0.0001464425770308123, "loss": 0.3348, "step": 30586 }, { "epoch": 17.087709497206703, "grad_norm": 0.4164976477622986, "learning_rate": 0.00014641456582633054, "loss": 0.3754, "step": 30587 }, { "epoch": 17.08826815642458, "grad_norm": 0.8961573839187622, "learning_rate": 0.00014638655462184875, "loss": 0.3887, "step": 30588 }, { "epoch": 17.08882681564246, "grad_norm": 0.4520336389541626, "learning_rate": 0.00014635854341736696, "loss": 0.3161, "step": 30589 }, { "epoch": 17.089385474860336, "grad_norm": 0.6728535890579224, "learning_rate": 0.00014633053221288516, "loss": 0.3789, "step": 30590 }, { "epoch": 17.089944134078213, "grad_norm": 0.6951990723609924, "learning_rate": 0.00014630252100840337, "loss": 0.4237, "step": 30591 }, { "epoch": 17.09050279329609, "grad_norm": 0.4518861472606659, "learning_rate": 0.00014627450980392157, "loss": 0.2842, "step": 30592 }, { "epoch": 17.091061452513966, "grad_norm": 0.3565356731414795, "learning_rate": 0.00014624649859943978, "loss": 0.3337, "step": 30593 }, { "epoch": 17.091620111731842, "grad_norm": 0.32045552134513855, "learning_rate": 0.00014621848739495799, "loss": 0.3258, "step": 30594 }, { "epoch": 17.092178770949722, "grad_norm": 0.3928930163383484, "learning_rate": 0.00014619047619047622, "loss": 0.4039, "step": 30595 }, { "epoch": 17.0927374301676, "grad_norm": 0.4180894196033478, "learning_rate": 0.0001461624649859944, "loss": 0.2897, "step": 30596 }, { "epoch": 17.093296089385476, "grad_norm": 0.568274974822998, "learning_rate": 0.0001461344537815126, "loss": 0.4662, "step": 30597 }, { "epoch": 17.093854748603352, "grad_norm": 0.3471246659755707, "learning_rate": 0.0001461064425770308, "loss": 0.3321, "step": 30598 }, { "epoch": 17.09441340782123, "grad_norm": 0.41662198305130005, "learning_rate": 0.00014607843137254904, "loss": 0.3683, "step": 30599 }, { "epoch": 17.094972067039105, "grad_norm": 0.6780016422271729, "learning_rate": 0.00014605042016806722, "loss": 0.519, "step": 30600 }, { "epoch": 17.095530726256982, "grad_norm": 0.8851748108863831, "learning_rate": 0.00014602240896358543, "loss": 0.3277, "step": 30601 }, { "epoch": 17.096089385474862, "grad_norm": 0.6956737041473389, "learning_rate": 0.00014599439775910363, "loss": 0.4344, "step": 30602 }, { "epoch": 17.09664804469274, "grad_norm": 0.6809481382369995, "learning_rate": 0.00014596638655462187, "loss": 0.3304, "step": 30603 }, { "epoch": 17.097206703910615, "grad_norm": 0.7596341967582703, "learning_rate": 0.00014593837535014007, "loss": 0.4337, "step": 30604 }, { "epoch": 17.09776536312849, "grad_norm": 0.43310680985450745, "learning_rate": 0.00014591036414565825, "loss": 0.5794, "step": 30605 }, { "epoch": 17.098324022346368, "grad_norm": 0.6859529614448547, "learning_rate": 0.00014588235294117646, "loss": 0.3938, "step": 30606 }, { "epoch": 17.098882681564245, "grad_norm": 1.169474482536316, "learning_rate": 0.0001458543417366947, "loss": 0.4018, "step": 30607 }, { "epoch": 17.09944134078212, "grad_norm": 0.4618262052536011, "learning_rate": 0.0001458263305322129, "loss": 0.5233, "step": 30608 }, { "epoch": 17.1, "grad_norm": 0.4496903419494629, "learning_rate": 0.0001457983193277311, "loss": 0.5017, "step": 30609 }, { "epoch": 17.100558659217878, "grad_norm": 0.9016445279121399, "learning_rate": 0.00014577030812324928, "loss": 0.3914, "step": 30610 }, { "epoch": 17.101117318435755, "grad_norm": 0.554819643497467, "learning_rate": 0.00014574229691876751, "loss": 0.5955, "step": 30611 }, { "epoch": 17.10167597765363, "grad_norm": 0.4067293703556061, "learning_rate": 0.00014571428571428572, "loss": 0.5351, "step": 30612 }, { "epoch": 17.102234636871508, "grad_norm": 0.5183228850364685, "learning_rate": 0.00014568627450980393, "loss": 0.4086, "step": 30613 }, { "epoch": 17.102793296089384, "grad_norm": 6.587244987487793, "learning_rate": 0.00014565826330532213, "loss": 0.4892, "step": 30614 }, { "epoch": 17.10335195530726, "grad_norm": 0.9795680642127991, "learning_rate": 0.00014563025210084034, "loss": 0.4459, "step": 30615 }, { "epoch": 17.10391061452514, "grad_norm": 0.5369141101837158, "learning_rate": 0.00014560224089635854, "loss": 0.4167, "step": 30616 }, { "epoch": 17.104469273743018, "grad_norm": 0.36952686309814453, "learning_rate": 0.00014557422969187675, "loss": 0.3498, "step": 30617 }, { "epoch": 17.105027932960894, "grad_norm": 0.3905816674232483, "learning_rate": 0.00014554621848739496, "loss": 0.3489, "step": 30618 }, { "epoch": 17.10558659217877, "grad_norm": 0.517412006855011, "learning_rate": 0.0001455182072829132, "loss": 0.373, "step": 30619 }, { "epoch": 17.106145251396647, "grad_norm": 0.5572271943092346, "learning_rate": 0.00014549019607843137, "loss": 0.5167, "step": 30620 }, { "epoch": 17.106703910614524, "grad_norm": 0.601365327835083, "learning_rate": 0.00014546218487394957, "loss": 0.3882, "step": 30621 }, { "epoch": 17.107262569832404, "grad_norm": 0.49694252014160156, "learning_rate": 0.00014543417366946778, "loss": 0.4317, "step": 30622 }, { "epoch": 17.10782122905028, "grad_norm": 0.7042227387428284, "learning_rate": 0.00014540616246498601, "loss": 0.424, "step": 30623 }, { "epoch": 17.108379888268157, "grad_norm": 2.348402738571167, "learning_rate": 0.00014537815126050422, "loss": 0.3399, "step": 30624 }, { "epoch": 17.108938547486034, "grad_norm": 0.357658326625824, "learning_rate": 0.0001453501400560224, "loss": 0.3377, "step": 30625 }, { "epoch": 17.10949720670391, "grad_norm": 3.7513959407806396, "learning_rate": 0.0001453221288515406, "loss": 0.3313, "step": 30626 }, { "epoch": 17.110055865921787, "grad_norm": 0.4794785678386688, "learning_rate": 0.00014529411764705884, "loss": 0.4224, "step": 30627 }, { "epoch": 17.110614525139663, "grad_norm": 0.36617326736450195, "learning_rate": 0.00014526610644257704, "loss": 0.4576, "step": 30628 }, { "epoch": 17.111173184357543, "grad_norm": 0.435846745967865, "learning_rate": 0.00014523809523809525, "loss": 0.3935, "step": 30629 }, { "epoch": 17.11173184357542, "grad_norm": 0.4789201021194458, "learning_rate": 0.00014521008403361343, "loss": 0.3303, "step": 30630 }, { "epoch": 17.112290502793297, "grad_norm": 0.6044398546218872, "learning_rate": 0.00014518207282913166, "loss": 0.4645, "step": 30631 }, { "epoch": 17.112849162011173, "grad_norm": 5.2347540855407715, "learning_rate": 0.00014515406162464987, "loss": 0.4486, "step": 30632 }, { "epoch": 17.11340782122905, "grad_norm": 0.3528764545917511, "learning_rate": 0.00014512605042016807, "loss": 0.3535, "step": 30633 }, { "epoch": 17.113966480446926, "grad_norm": 0.3694852888584137, "learning_rate": 0.00014509803921568628, "loss": 0.385, "step": 30634 }, { "epoch": 17.114525139664803, "grad_norm": 0.2871161699295044, "learning_rate": 0.00014507002801120449, "loss": 0.3449, "step": 30635 }, { "epoch": 17.115083798882683, "grad_norm": 0.3888263404369354, "learning_rate": 0.0001450420168067227, "loss": 0.4752, "step": 30636 }, { "epoch": 17.11564245810056, "grad_norm": 0.44928330183029175, "learning_rate": 0.0001450140056022409, "loss": 0.3576, "step": 30637 }, { "epoch": 17.116201117318436, "grad_norm": 0.44116848707199097, "learning_rate": 0.0001449859943977591, "loss": 0.4291, "step": 30638 }, { "epoch": 17.116759776536313, "grad_norm": 0.8819090723991394, "learning_rate": 0.00014495798319327734, "loss": 0.3813, "step": 30639 }, { "epoch": 17.11731843575419, "grad_norm": 0.4356851279735565, "learning_rate": 0.00014492997198879552, "loss": 0.3799, "step": 30640 }, { "epoch": 17.117877094972066, "grad_norm": 0.6298952698707581, "learning_rate": 0.00014490196078431372, "loss": 0.5793, "step": 30641 }, { "epoch": 17.118435754189946, "grad_norm": 0.6416264176368713, "learning_rate": 0.00014487394957983193, "loss": 0.3043, "step": 30642 }, { "epoch": 17.118994413407822, "grad_norm": 0.4306201934814453, "learning_rate": 0.00014484593837535016, "loss": 0.4532, "step": 30643 }, { "epoch": 17.1195530726257, "grad_norm": 0.6793591380119324, "learning_rate": 0.00014481792717086837, "loss": 0.4275, "step": 30644 }, { "epoch": 17.120111731843576, "grad_norm": 0.41066646575927734, "learning_rate": 0.00014478991596638655, "loss": 0.463, "step": 30645 }, { "epoch": 17.120670391061452, "grad_norm": 0.7772185802459717, "learning_rate": 0.00014476190476190475, "loss": 0.4528, "step": 30646 }, { "epoch": 17.12122905027933, "grad_norm": 0.43935853242874146, "learning_rate": 0.00014473389355742298, "loss": 0.364, "step": 30647 }, { "epoch": 17.121787709497205, "grad_norm": 0.5706481337547302, "learning_rate": 0.0001447058823529412, "loss": 0.445, "step": 30648 }, { "epoch": 17.122346368715085, "grad_norm": 0.4210303723812103, "learning_rate": 0.0001446778711484594, "loss": 0.3841, "step": 30649 }, { "epoch": 17.122905027932962, "grad_norm": 0.4442594647407532, "learning_rate": 0.00014464985994397758, "loss": 0.3377, "step": 30650 }, { "epoch": 17.12346368715084, "grad_norm": 0.8115764856338501, "learning_rate": 0.0001446218487394958, "loss": 0.328, "step": 30651 }, { "epoch": 17.124022346368715, "grad_norm": 3.739281177520752, "learning_rate": 0.00014459383753501401, "loss": 0.4641, "step": 30652 }, { "epoch": 17.12458100558659, "grad_norm": 0.4313853979110718, "learning_rate": 0.00014456582633053222, "loss": 0.4049, "step": 30653 }, { "epoch": 17.12513966480447, "grad_norm": 0.4225478172302246, "learning_rate": 0.00014453781512605043, "loss": 0.4381, "step": 30654 }, { "epoch": 17.125698324022345, "grad_norm": 0.557887852191925, "learning_rate": 0.00014450980392156863, "loss": 0.389, "step": 30655 }, { "epoch": 17.126256983240225, "grad_norm": 0.6848803162574768, "learning_rate": 0.00014448179271708684, "loss": 0.388, "step": 30656 }, { "epoch": 17.1268156424581, "grad_norm": 0.5033578872680664, "learning_rate": 0.00014445378151260504, "loss": 0.4012, "step": 30657 }, { "epoch": 17.127374301675978, "grad_norm": 0.6004784107208252, "learning_rate": 0.00014442577030812325, "loss": 0.4827, "step": 30658 }, { "epoch": 17.127932960893855, "grad_norm": 0.4511112570762634, "learning_rate": 0.00014439775910364146, "loss": 0.372, "step": 30659 }, { "epoch": 17.12849162011173, "grad_norm": 0.701365053653717, "learning_rate": 0.00014436974789915966, "loss": 0.4164, "step": 30660 }, { "epoch": 17.129050279329608, "grad_norm": 0.43382611870765686, "learning_rate": 0.00014434173669467787, "loss": 0.3811, "step": 30661 }, { "epoch": 17.129608938547484, "grad_norm": 0.4533500373363495, "learning_rate": 0.00014431372549019607, "loss": 0.3504, "step": 30662 }, { "epoch": 17.130167597765364, "grad_norm": 0.385133296251297, "learning_rate": 0.0001442857142857143, "loss": 0.3699, "step": 30663 }, { "epoch": 17.13072625698324, "grad_norm": 0.7855821847915649, "learning_rate": 0.0001442577030812325, "loss": 0.4541, "step": 30664 }, { "epoch": 17.131284916201118, "grad_norm": 4.786759376525879, "learning_rate": 0.0001442296918767507, "loss": 0.4434, "step": 30665 }, { "epoch": 17.131843575418994, "grad_norm": 0.9977697730064392, "learning_rate": 0.0001442016806722689, "loss": 0.4855, "step": 30666 }, { "epoch": 17.13240223463687, "grad_norm": 0.5389133095741272, "learning_rate": 0.00014417366946778713, "loss": 0.4693, "step": 30667 }, { "epoch": 17.132960893854747, "grad_norm": 0.7849056720733643, "learning_rate": 0.00014414565826330534, "loss": 0.3601, "step": 30668 }, { "epoch": 17.133519553072627, "grad_norm": 0.3958875834941864, "learning_rate": 0.00014411764705882352, "loss": 0.417, "step": 30669 }, { "epoch": 17.134078212290504, "grad_norm": 0.4477807283401489, "learning_rate": 0.00014408963585434172, "loss": 0.3522, "step": 30670 }, { "epoch": 17.13463687150838, "grad_norm": 1.1301321983337402, "learning_rate": 0.00014406162464985996, "loss": 0.4437, "step": 30671 }, { "epoch": 17.135195530726257, "grad_norm": 0.3881736397743225, "learning_rate": 0.00014403361344537816, "loss": 0.3914, "step": 30672 }, { "epoch": 17.135754189944134, "grad_norm": 0.4912889301776886, "learning_rate": 0.00014400560224089637, "loss": 0.419, "step": 30673 }, { "epoch": 17.13631284916201, "grad_norm": 0.4167359173297882, "learning_rate": 0.00014397759103641455, "loss": 0.4036, "step": 30674 }, { "epoch": 17.136871508379887, "grad_norm": 0.762546956539154, "learning_rate": 0.00014394957983193278, "loss": 0.561, "step": 30675 }, { "epoch": 17.137430167597767, "grad_norm": 0.7804329991340637, "learning_rate": 0.00014392156862745099, "loss": 0.3645, "step": 30676 }, { "epoch": 17.137988826815644, "grad_norm": 0.33971935510635376, "learning_rate": 0.0001438935574229692, "loss": 0.3488, "step": 30677 }, { "epoch": 17.13854748603352, "grad_norm": 0.8154634833335876, "learning_rate": 0.0001438655462184874, "loss": 0.5119, "step": 30678 }, { "epoch": 17.139106145251397, "grad_norm": 0.4247089624404907, "learning_rate": 0.0001438375350140056, "loss": 0.5857, "step": 30679 }, { "epoch": 17.139664804469273, "grad_norm": 0.3276110887527466, "learning_rate": 0.0001438095238095238, "loss": 0.3701, "step": 30680 }, { "epoch": 17.14022346368715, "grad_norm": 0.4172048568725586, "learning_rate": 0.00014378151260504202, "loss": 0.3429, "step": 30681 }, { "epoch": 17.140782122905026, "grad_norm": 0.3361363708972931, "learning_rate": 0.00014375350140056022, "loss": 0.4208, "step": 30682 }, { "epoch": 17.141340782122906, "grad_norm": 9.697089195251465, "learning_rate": 0.00014372549019607845, "loss": 0.4094, "step": 30683 }, { "epoch": 17.141899441340783, "grad_norm": 0.49515315890312195, "learning_rate": 0.00014369747899159663, "loss": 0.3923, "step": 30684 }, { "epoch": 17.14245810055866, "grad_norm": 0.46800726652145386, "learning_rate": 0.00014366946778711484, "loss": 0.3401, "step": 30685 }, { "epoch": 17.143016759776536, "grad_norm": 1.4067541360855103, "learning_rate": 0.00014364145658263305, "loss": 0.3679, "step": 30686 }, { "epoch": 17.143575418994413, "grad_norm": 0.3973587453365326, "learning_rate": 0.00014361344537815128, "loss": 0.374, "step": 30687 }, { "epoch": 17.14413407821229, "grad_norm": 0.3729104995727539, "learning_rate": 0.00014358543417366948, "loss": 0.393, "step": 30688 }, { "epoch": 17.144692737430166, "grad_norm": 0.5097337365150452, "learning_rate": 0.00014355742296918766, "loss": 0.4501, "step": 30689 }, { "epoch": 17.145251396648046, "grad_norm": 2.895922899246216, "learning_rate": 0.00014352941176470587, "loss": 0.6325, "step": 30690 }, { "epoch": 17.145810055865923, "grad_norm": 0.7796987891197205, "learning_rate": 0.0001435014005602241, "loss": 0.3506, "step": 30691 }, { "epoch": 17.1463687150838, "grad_norm": 0.5188897252082825, "learning_rate": 0.0001434733893557423, "loss": 0.3887, "step": 30692 }, { "epoch": 17.146927374301676, "grad_norm": 2.1030290126800537, "learning_rate": 0.00014344537815126051, "loss": 0.469, "step": 30693 }, { "epoch": 17.147486033519552, "grad_norm": 0.45743533968925476, "learning_rate": 0.0001434173669467787, "loss": 0.3988, "step": 30694 }, { "epoch": 17.14804469273743, "grad_norm": 0.6907396912574768, "learning_rate": 0.00014338935574229693, "loss": 0.4277, "step": 30695 }, { "epoch": 17.14860335195531, "grad_norm": 1.2991725206375122, "learning_rate": 0.00014336134453781513, "loss": 0.3601, "step": 30696 }, { "epoch": 17.149162011173186, "grad_norm": 0.703126847743988, "learning_rate": 0.00014333333333333334, "loss": 0.5457, "step": 30697 }, { "epoch": 17.149720670391062, "grad_norm": 0.4269443452358246, "learning_rate": 0.00014330532212885154, "loss": 0.4236, "step": 30698 }, { "epoch": 17.15027932960894, "grad_norm": 0.5367129445075989, "learning_rate": 0.00014327731092436975, "loss": 0.3983, "step": 30699 }, { "epoch": 17.150837988826815, "grad_norm": 0.37813735008239746, "learning_rate": 0.00014324929971988796, "loss": 0.3945, "step": 30700 }, { "epoch": 17.15139664804469, "grad_norm": 0.8809571266174316, "learning_rate": 0.00014322128851540616, "loss": 0.4923, "step": 30701 }, { "epoch": 17.15195530726257, "grad_norm": 1.254481315612793, "learning_rate": 0.00014319327731092437, "loss": 0.4305, "step": 30702 }, { "epoch": 17.15251396648045, "grad_norm": 0.3826538622379303, "learning_rate": 0.0001431652661064426, "loss": 0.3819, "step": 30703 }, { "epoch": 17.153072625698325, "grad_norm": 0.4359777271747589, "learning_rate": 0.00014313725490196078, "loss": 0.4034, "step": 30704 }, { "epoch": 17.1536312849162, "grad_norm": 0.7031931281089783, "learning_rate": 0.000143109243697479, "loss": 0.3267, "step": 30705 }, { "epoch": 17.154189944134078, "grad_norm": 0.43798449635505676, "learning_rate": 0.0001430812324929972, "loss": 0.4039, "step": 30706 }, { "epoch": 17.154748603351955, "grad_norm": 0.4789959788322449, "learning_rate": 0.00014305322128851543, "loss": 0.4182, "step": 30707 }, { "epoch": 17.15530726256983, "grad_norm": 0.4141457676887512, "learning_rate": 0.00014302521008403363, "loss": 0.357, "step": 30708 }, { "epoch": 17.155865921787708, "grad_norm": 0.3567759394645691, "learning_rate": 0.0001429971988795518, "loss": 0.3395, "step": 30709 }, { "epoch": 17.156424581005588, "grad_norm": 0.761366605758667, "learning_rate": 0.00014296918767507002, "loss": 0.4784, "step": 30710 }, { "epoch": 17.156983240223465, "grad_norm": 0.4974413812160492, "learning_rate": 0.00014294117647058825, "loss": 0.3842, "step": 30711 }, { "epoch": 17.15754189944134, "grad_norm": 1.084973692893982, "learning_rate": 0.00014291316526610646, "loss": 0.5343, "step": 30712 }, { "epoch": 17.158100558659218, "grad_norm": 0.4046635925769806, "learning_rate": 0.00014288515406162463, "loss": 0.3207, "step": 30713 }, { "epoch": 17.158659217877094, "grad_norm": 0.3601182699203491, "learning_rate": 0.00014285714285714284, "loss": 0.4473, "step": 30714 }, { "epoch": 17.15921787709497, "grad_norm": 0.4990438222885132, "learning_rate": 0.00014282913165266107, "loss": 0.4056, "step": 30715 }, { "epoch": 17.159776536312847, "grad_norm": 0.5641283988952637, "learning_rate": 0.00014280112044817928, "loss": 0.3609, "step": 30716 }, { "epoch": 17.160335195530728, "grad_norm": 0.6007311344146729, "learning_rate": 0.00014277310924369749, "loss": 0.4625, "step": 30717 }, { "epoch": 17.160893854748604, "grad_norm": 0.7267006039619446, "learning_rate": 0.00014274509803921566, "loss": 0.401, "step": 30718 }, { "epoch": 17.16145251396648, "grad_norm": 0.7893133163452148, "learning_rate": 0.0001427170868347339, "loss": 0.3988, "step": 30719 }, { "epoch": 17.162011173184357, "grad_norm": 0.4442957639694214, "learning_rate": 0.0001426890756302521, "loss": 0.3621, "step": 30720 }, { "epoch": 17.162569832402234, "grad_norm": 0.7172282934188843, "learning_rate": 0.0001426610644257703, "loss": 0.5008, "step": 30721 }, { "epoch": 17.16312849162011, "grad_norm": 0.41543108224868774, "learning_rate": 0.00014263305322128852, "loss": 0.3916, "step": 30722 }, { "epoch": 17.16368715083799, "grad_norm": 0.46276652812957764, "learning_rate": 0.00014260504201680672, "loss": 0.331, "step": 30723 }, { "epoch": 17.164245810055867, "grad_norm": 0.7716822028160095, "learning_rate": 0.00014257703081232493, "loss": 0.3414, "step": 30724 }, { "epoch": 17.164804469273744, "grad_norm": 0.37379390001296997, "learning_rate": 0.00014254901960784313, "loss": 0.392, "step": 30725 }, { "epoch": 17.16536312849162, "grad_norm": 0.5457429885864258, "learning_rate": 0.00014252100840336134, "loss": 0.4658, "step": 30726 }, { "epoch": 17.165921787709497, "grad_norm": 0.4888390004634857, "learning_rate": 0.00014249299719887957, "loss": 0.616, "step": 30727 }, { "epoch": 17.166480446927373, "grad_norm": 0.3112061619758606, "learning_rate": 0.00014246498599439775, "loss": 0.3787, "step": 30728 }, { "epoch": 17.16703910614525, "grad_norm": 3.6695821285247803, "learning_rate": 0.00014243697478991596, "loss": 0.4124, "step": 30729 }, { "epoch": 17.16759776536313, "grad_norm": 1.4364280700683594, "learning_rate": 0.00014240896358543416, "loss": 0.3716, "step": 30730 }, { "epoch": 17.168156424581007, "grad_norm": 0.42415791749954224, "learning_rate": 0.0001423809523809524, "loss": 0.3617, "step": 30731 }, { "epoch": 17.168715083798883, "grad_norm": 0.5223853588104248, "learning_rate": 0.0001423529411764706, "loss": 0.409, "step": 30732 }, { "epoch": 17.16927374301676, "grad_norm": 0.43599650263786316, "learning_rate": 0.00014232492997198878, "loss": 0.4166, "step": 30733 }, { "epoch": 17.169832402234636, "grad_norm": 3.177008867263794, "learning_rate": 0.000142296918767507, "loss": 0.4631, "step": 30734 }, { "epoch": 17.170391061452513, "grad_norm": 0.7092443704605103, "learning_rate": 0.00014226890756302522, "loss": 0.3763, "step": 30735 }, { "epoch": 17.17094972067039, "grad_norm": 0.503337562084198, "learning_rate": 0.00014224089635854343, "loss": 0.3923, "step": 30736 }, { "epoch": 17.17150837988827, "grad_norm": 0.5022904276847839, "learning_rate": 0.00014221288515406163, "loss": 0.3756, "step": 30737 }, { "epoch": 17.172067039106146, "grad_norm": 0.3345615565776825, "learning_rate": 0.0001421848739495798, "loss": 0.3168, "step": 30738 }, { "epoch": 17.172625698324023, "grad_norm": 2.0736594200134277, "learning_rate": 0.00014215686274509804, "loss": 0.339, "step": 30739 }, { "epoch": 17.1731843575419, "grad_norm": 1.0013874769210815, "learning_rate": 0.00014212885154061625, "loss": 0.3853, "step": 30740 }, { "epoch": 17.173743016759776, "grad_norm": 0.44100654125213623, "learning_rate": 0.00014210084033613446, "loss": 0.2739, "step": 30741 }, { "epoch": 17.174301675977652, "grad_norm": 0.4583946466445923, "learning_rate": 0.00014207282913165266, "loss": 0.3344, "step": 30742 }, { "epoch": 17.174860335195532, "grad_norm": 0.49546849727630615, "learning_rate": 0.00014204481792717087, "loss": 0.3992, "step": 30743 }, { "epoch": 17.17541899441341, "grad_norm": 0.44333434104919434, "learning_rate": 0.00014201680672268907, "loss": 0.4895, "step": 30744 }, { "epoch": 17.175977653631286, "grad_norm": 0.8308997750282288, "learning_rate": 0.00014198879551820728, "loss": 0.371, "step": 30745 }, { "epoch": 17.176536312849162, "grad_norm": 0.4740828573703766, "learning_rate": 0.0001419607843137255, "loss": 0.4085, "step": 30746 }, { "epoch": 17.17709497206704, "grad_norm": 0.44030627608299255, "learning_rate": 0.00014193277310924372, "loss": 0.3475, "step": 30747 }, { "epoch": 17.177653631284915, "grad_norm": 0.7526219487190247, "learning_rate": 0.0001419047619047619, "loss": 0.3975, "step": 30748 }, { "epoch": 17.178212290502792, "grad_norm": 0.5852035284042358, "learning_rate": 0.0001418767507002801, "loss": 0.5215, "step": 30749 }, { "epoch": 17.178770949720672, "grad_norm": 0.5003010630607605, "learning_rate": 0.0001418487394957983, "loss": 0.5067, "step": 30750 }, { "epoch": 17.17932960893855, "grad_norm": 0.49839702248573303, "learning_rate": 0.00014182072829131654, "loss": 0.3739, "step": 30751 }, { "epoch": 17.179888268156425, "grad_norm": 3.989161491394043, "learning_rate": 0.00014179271708683475, "loss": 0.4152, "step": 30752 }, { "epoch": 17.1804469273743, "grad_norm": 0.6722609996795654, "learning_rate": 0.00014176470588235293, "loss": 0.4152, "step": 30753 }, { "epoch": 17.18100558659218, "grad_norm": 0.9006577730178833, "learning_rate": 0.00014173669467787113, "loss": 0.4545, "step": 30754 }, { "epoch": 17.181564245810055, "grad_norm": 0.9005676507949829, "learning_rate": 0.00014170868347338937, "loss": 0.3867, "step": 30755 }, { "epoch": 17.18212290502793, "grad_norm": 0.3634353578090668, "learning_rate": 0.00014168067226890757, "loss": 0.3688, "step": 30756 }, { "epoch": 17.18268156424581, "grad_norm": 0.5950771570205688, "learning_rate": 0.00014165266106442578, "loss": 0.381, "step": 30757 }, { "epoch": 17.183240223463688, "grad_norm": 0.547524094581604, "learning_rate": 0.00014162464985994396, "loss": 0.5534, "step": 30758 }, { "epoch": 17.183798882681565, "grad_norm": 0.45856061577796936, "learning_rate": 0.0001415966386554622, "loss": 0.4081, "step": 30759 }, { "epoch": 17.18435754189944, "grad_norm": 0.37353959679603577, "learning_rate": 0.0001415686274509804, "loss": 0.3982, "step": 30760 }, { "epoch": 17.184916201117318, "grad_norm": 0.5519049763679504, "learning_rate": 0.0001415406162464986, "loss": 0.6021, "step": 30761 }, { "epoch": 17.185474860335194, "grad_norm": 0.3649415671825409, "learning_rate": 0.0001415126050420168, "loss": 0.4029, "step": 30762 }, { "epoch": 17.18603351955307, "grad_norm": 0.43485450744628906, "learning_rate": 0.00014148459383753502, "loss": 0.4843, "step": 30763 }, { "epoch": 17.18659217877095, "grad_norm": 0.6240429282188416, "learning_rate": 0.00014145658263305322, "loss": 0.3766, "step": 30764 }, { "epoch": 17.187150837988828, "grad_norm": 0.46352556347846985, "learning_rate": 0.00014142857142857143, "loss": 0.3892, "step": 30765 }, { "epoch": 17.187709497206704, "grad_norm": 0.3896719813346863, "learning_rate": 0.00014140056022408963, "loss": 0.3854, "step": 30766 }, { "epoch": 17.18826815642458, "grad_norm": 0.49799567461013794, "learning_rate": 0.00014137254901960784, "loss": 0.3704, "step": 30767 }, { "epoch": 17.188826815642457, "grad_norm": 0.3745008111000061, "learning_rate": 0.00014134453781512605, "loss": 0.3372, "step": 30768 }, { "epoch": 17.189385474860334, "grad_norm": 0.47652313113212585, "learning_rate": 0.00014131652661064425, "loss": 0.377, "step": 30769 }, { "epoch": 17.189944134078214, "grad_norm": 0.3345956802368164, "learning_rate": 0.00014128851540616246, "loss": 0.3459, "step": 30770 }, { "epoch": 17.19050279329609, "grad_norm": 0.3444382846355438, "learning_rate": 0.0001412605042016807, "loss": 0.3621, "step": 30771 }, { "epoch": 17.191061452513967, "grad_norm": 0.37247714400291443, "learning_rate": 0.00014123249299719887, "loss": 0.3253, "step": 30772 }, { "epoch": 17.191620111731844, "grad_norm": 0.5487529039382935, "learning_rate": 0.00014120448179271708, "loss": 0.4187, "step": 30773 }, { "epoch": 17.19217877094972, "grad_norm": 0.3745158314704895, "learning_rate": 0.00014117647058823528, "loss": 0.4216, "step": 30774 }, { "epoch": 17.192737430167597, "grad_norm": 0.4563126564025879, "learning_rate": 0.00014114845938375351, "loss": 0.4264, "step": 30775 }, { "epoch": 17.193296089385473, "grad_norm": 0.6057673096656799, "learning_rate": 0.00014112044817927172, "loss": 0.5994, "step": 30776 }, { "epoch": 17.193854748603353, "grad_norm": 0.5246877074241638, "learning_rate": 0.0001410924369747899, "loss": 0.4866, "step": 30777 }, { "epoch": 17.19441340782123, "grad_norm": 1.5769007205963135, "learning_rate": 0.0001410644257703081, "loss": 0.4173, "step": 30778 }, { "epoch": 17.194972067039107, "grad_norm": 0.5270800590515137, "learning_rate": 0.00014103641456582634, "loss": 0.3994, "step": 30779 }, { "epoch": 17.195530726256983, "grad_norm": 0.4798087179660797, "learning_rate": 0.00014100840336134454, "loss": 0.5318, "step": 30780 }, { "epoch": 17.19608938547486, "grad_norm": 0.377145379781723, "learning_rate": 0.00014098039215686275, "loss": 0.4044, "step": 30781 }, { "epoch": 17.196648044692736, "grad_norm": 5.0258941650390625, "learning_rate": 0.00014095238095238093, "loss": 0.3911, "step": 30782 }, { "epoch": 17.197206703910613, "grad_norm": 0.4774393141269684, "learning_rate": 0.00014092436974789916, "loss": 0.3697, "step": 30783 }, { "epoch": 17.197765363128493, "grad_norm": 0.39969557523727417, "learning_rate": 0.00014089635854341737, "loss": 0.3502, "step": 30784 }, { "epoch": 17.19832402234637, "grad_norm": 0.5120758414268494, "learning_rate": 0.00014086834733893557, "loss": 0.3672, "step": 30785 }, { "epoch": 17.198882681564246, "grad_norm": 0.6791298389434814, "learning_rate": 0.0001408403361344538, "loss": 0.3598, "step": 30786 }, { "epoch": 17.199441340782123, "grad_norm": 0.44210800528526306, "learning_rate": 0.000140812324929972, "loss": 0.4542, "step": 30787 }, { "epoch": 17.2, "grad_norm": 0.6122923493385315, "learning_rate": 0.0001407843137254902, "loss": 0.3872, "step": 30788 }, { "epoch": 17.200558659217876, "grad_norm": 0.5188512802124023, "learning_rate": 0.0001407563025210084, "loss": 0.4251, "step": 30789 }, { "epoch": 17.201117318435756, "grad_norm": 0.5056479573249817, "learning_rate": 0.00014072829131652663, "loss": 0.3245, "step": 30790 }, { "epoch": 17.201675977653633, "grad_norm": 0.5134987235069275, "learning_rate": 0.00014070028011204484, "loss": 0.3031, "step": 30791 }, { "epoch": 17.20223463687151, "grad_norm": 0.766931414604187, "learning_rate": 0.00014067226890756302, "loss": 0.3508, "step": 30792 }, { "epoch": 17.202793296089386, "grad_norm": 0.33603018522262573, "learning_rate": 0.00014064425770308122, "loss": 0.3574, "step": 30793 }, { "epoch": 17.203351955307262, "grad_norm": 0.6310589909553528, "learning_rate": 0.00014061624649859946, "loss": 0.4536, "step": 30794 }, { "epoch": 17.20391061452514, "grad_norm": 0.8279927968978882, "learning_rate": 0.00014058823529411766, "loss": 0.3854, "step": 30795 }, { "epoch": 17.204469273743015, "grad_norm": 0.9566571712493896, "learning_rate": 0.00014056022408963587, "loss": 0.4779, "step": 30796 }, { "epoch": 17.205027932960895, "grad_norm": 0.4382748305797577, "learning_rate": 0.00014053221288515405, "loss": 0.3631, "step": 30797 }, { "epoch": 17.205586592178772, "grad_norm": 0.33860304951667786, "learning_rate": 0.00014050420168067228, "loss": 0.3777, "step": 30798 }, { "epoch": 17.20614525139665, "grad_norm": 0.3949485123157501, "learning_rate": 0.00014047619047619049, "loss": 0.3493, "step": 30799 }, { "epoch": 17.206703910614525, "grad_norm": 0.5826476216316223, "learning_rate": 0.0001404481792717087, "loss": 0.4618, "step": 30800 }, { "epoch": 17.2072625698324, "grad_norm": 0.4941520094871521, "learning_rate": 0.0001404201680672269, "loss": 0.3332, "step": 30801 }, { "epoch": 17.20782122905028, "grad_norm": 0.42459985613822937, "learning_rate": 0.0001403921568627451, "loss": 0.342, "step": 30802 }, { "epoch": 17.208379888268155, "grad_norm": 0.4781554937362671, "learning_rate": 0.0001403641456582633, "loss": 0.3199, "step": 30803 }, { "epoch": 17.208938547486035, "grad_norm": 0.39960116147994995, "learning_rate": 0.00014033613445378152, "loss": 0.4332, "step": 30804 }, { "epoch": 17.20949720670391, "grad_norm": 4.267194747924805, "learning_rate": 0.00014030812324929972, "loss": 0.3692, "step": 30805 }, { "epoch": 17.210055865921788, "grad_norm": 0.7935893535614014, "learning_rate": 0.00014028011204481795, "loss": 0.3996, "step": 30806 }, { "epoch": 17.210614525139665, "grad_norm": 0.5719877481460571, "learning_rate": 0.00014025210084033613, "loss": 0.5224, "step": 30807 }, { "epoch": 17.21117318435754, "grad_norm": 0.325985848903656, "learning_rate": 0.00014022408963585434, "loss": 0.3939, "step": 30808 }, { "epoch": 17.211731843575418, "grad_norm": 0.46041709184646606, "learning_rate": 0.00014019607843137255, "loss": 0.4268, "step": 30809 }, { "epoch": 17.212290502793294, "grad_norm": 0.8821285367012024, "learning_rate": 0.00014016806722689078, "loss": 0.4616, "step": 30810 }, { "epoch": 17.212849162011175, "grad_norm": 0.563669741153717, "learning_rate": 0.00014014005602240898, "loss": 0.4486, "step": 30811 }, { "epoch": 17.21340782122905, "grad_norm": 0.47116991877555847, "learning_rate": 0.00014011204481792716, "loss": 0.3261, "step": 30812 }, { "epoch": 17.213966480446928, "grad_norm": 0.7829761505126953, "learning_rate": 0.00014008403361344537, "loss": 0.3214, "step": 30813 }, { "epoch": 17.214525139664804, "grad_norm": 0.4250996708869934, "learning_rate": 0.0001400560224089636, "loss": 0.4002, "step": 30814 }, { "epoch": 17.21508379888268, "grad_norm": 0.4736463129520416, "learning_rate": 0.0001400280112044818, "loss": 0.4208, "step": 30815 }, { "epoch": 17.215642458100557, "grad_norm": 0.3516188859939575, "learning_rate": 0.00014000000000000001, "loss": 0.4041, "step": 30816 }, { "epoch": 17.216201117318437, "grad_norm": 0.3567846715450287, "learning_rate": 0.0001399719887955182, "loss": 0.3776, "step": 30817 }, { "epoch": 17.216759776536314, "grad_norm": 0.42406564950942993, "learning_rate": 0.00013994397759103643, "loss": 0.4301, "step": 30818 }, { "epoch": 17.21731843575419, "grad_norm": 0.7535119652748108, "learning_rate": 0.00013991596638655463, "loss": 0.4104, "step": 30819 }, { "epoch": 17.217877094972067, "grad_norm": 0.5563556551933289, "learning_rate": 0.00013988795518207284, "loss": 0.4862, "step": 30820 }, { "epoch": 17.218435754189944, "grad_norm": 0.5752178430557251, "learning_rate": 0.00013985994397759104, "loss": 0.4008, "step": 30821 }, { "epoch": 17.21899441340782, "grad_norm": 0.3778010606765747, "learning_rate": 0.00013983193277310925, "loss": 0.2946, "step": 30822 }, { "epoch": 17.219553072625697, "grad_norm": 0.5849639177322388, "learning_rate": 0.00013980392156862746, "loss": 0.3877, "step": 30823 }, { "epoch": 17.220111731843577, "grad_norm": 0.4828316569328308, "learning_rate": 0.00013977591036414566, "loss": 0.3599, "step": 30824 }, { "epoch": 17.220670391061454, "grad_norm": 0.3942548632621765, "learning_rate": 0.00013974789915966387, "loss": 0.4413, "step": 30825 }, { "epoch": 17.22122905027933, "grad_norm": 23.958749771118164, "learning_rate": 0.00013971988795518207, "loss": 0.3402, "step": 30826 }, { "epoch": 17.221787709497207, "grad_norm": 1.4602874517440796, "learning_rate": 0.00013969187675070028, "loss": 0.4295, "step": 30827 }, { "epoch": 17.222346368715083, "grad_norm": 0.4744478464126587, "learning_rate": 0.0001396638655462185, "loss": 0.3776, "step": 30828 }, { "epoch": 17.22290502793296, "grad_norm": 0.4322073459625244, "learning_rate": 0.0001396358543417367, "loss": 0.4569, "step": 30829 }, { "epoch": 17.223463687150836, "grad_norm": 0.5271614789962769, "learning_rate": 0.00013960784313725493, "loss": 0.3849, "step": 30830 }, { "epoch": 17.224022346368717, "grad_norm": 0.9581164717674255, "learning_rate": 0.0001395798319327731, "loss": 0.3588, "step": 30831 }, { "epoch": 17.224581005586593, "grad_norm": 0.5064389705657959, "learning_rate": 0.0001395518207282913, "loss": 0.3979, "step": 30832 }, { "epoch": 17.22513966480447, "grad_norm": 0.4320276081562042, "learning_rate": 0.00013952380952380952, "loss": 0.3942, "step": 30833 }, { "epoch": 17.225698324022346, "grad_norm": 0.4634736180305481, "learning_rate": 0.00013949579831932775, "loss": 0.3647, "step": 30834 }, { "epoch": 17.226256983240223, "grad_norm": 0.715804398059845, "learning_rate": 0.00013946778711484596, "loss": 0.536, "step": 30835 }, { "epoch": 17.2268156424581, "grad_norm": 0.8666571974754333, "learning_rate": 0.00013943977591036413, "loss": 0.3377, "step": 30836 }, { "epoch": 17.227374301675976, "grad_norm": 0.36556607484817505, "learning_rate": 0.00013941176470588234, "loss": 0.364, "step": 30837 }, { "epoch": 17.227932960893856, "grad_norm": 0.4033718407154083, "learning_rate": 0.00013938375350140057, "loss": 0.3177, "step": 30838 }, { "epoch": 17.228491620111733, "grad_norm": 0.43227052688598633, "learning_rate": 0.00013935574229691878, "loss": 0.3495, "step": 30839 }, { "epoch": 17.22905027932961, "grad_norm": 0.6135537624359131, "learning_rate": 0.00013932773109243699, "loss": 0.5143, "step": 30840 }, { "epoch": 17.229608938547486, "grad_norm": 0.4333163797855377, "learning_rate": 0.00013929971988795516, "loss": 0.4698, "step": 30841 }, { "epoch": 17.230167597765362, "grad_norm": 0.5569503903388977, "learning_rate": 0.0001392717086834734, "loss": 0.3978, "step": 30842 }, { "epoch": 17.23072625698324, "grad_norm": 0.6848451495170593, "learning_rate": 0.0001392436974789916, "loss": 0.481, "step": 30843 }, { "epoch": 17.23128491620112, "grad_norm": 0.8422544002532959, "learning_rate": 0.0001392156862745098, "loss": 0.443, "step": 30844 }, { "epoch": 17.231843575418996, "grad_norm": 0.4208816587924957, "learning_rate": 0.00013918767507002802, "loss": 0.3659, "step": 30845 }, { "epoch": 17.232402234636872, "grad_norm": 0.46880191564559937, "learning_rate": 0.00013915966386554622, "loss": 0.4404, "step": 30846 }, { "epoch": 17.23296089385475, "grad_norm": 0.37111273407936096, "learning_rate": 0.00013913165266106443, "loss": 0.3204, "step": 30847 }, { "epoch": 17.233519553072625, "grad_norm": 0.34396710991859436, "learning_rate": 0.00013910364145658263, "loss": 0.3837, "step": 30848 }, { "epoch": 17.234078212290502, "grad_norm": 0.4517327845096588, "learning_rate": 0.00013907563025210084, "loss": 0.4, "step": 30849 }, { "epoch": 17.23463687150838, "grad_norm": 0.4544031023979187, "learning_rate": 0.00013904761904761907, "loss": 0.3801, "step": 30850 }, { "epoch": 17.23519553072626, "grad_norm": 1.6825200319290161, "learning_rate": 0.00013901960784313725, "loss": 0.3969, "step": 30851 }, { "epoch": 17.235754189944135, "grad_norm": 0.6552950739860535, "learning_rate": 0.00013899159663865546, "loss": 0.4193, "step": 30852 }, { "epoch": 17.23631284916201, "grad_norm": 0.6134896874427795, "learning_rate": 0.00013896358543417366, "loss": 0.4103, "step": 30853 }, { "epoch": 17.23687150837989, "grad_norm": 0.36394381523132324, "learning_rate": 0.0001389355742296919, "loss": 0.3945, "step": 30854 }, { "epoch": 17.237430167597765, "grad_norm": 4.964192867279053, "learning_rate": 0.0001389075630252101, "loss": 0.3259, "step": 30855 }, { "epoch": 17.23798882681564, "grad_norm": 0.4651060700416565, "learning_rate": 0.00013887955182072828, "loss": 0.3964, "step": 30856 }, { "epoch": 17.238547486033518, "grad_norm": 0.5870866179466248, "learning_rate": 0.0001388515406162465, "loss": 0.409, "step": 30857 }, { "epoch": 17.239106145251398, "grad_norm": 0.49224334955215454, "learning_rate": 0.00013882352941176472, "loss": 0.4666, "step": 30858 }, { "epoch": 17.239664804469275, "grad_norm": 1.063842535018921, "learning_rate": 0.00013879551820728293, "loss": 0.3182, "step": 30859 }, { "epoch": 17.24022346368715, "grad_norm": 6.323765754699707, "learning_rate": 0.00013876750700280113, "loss": 0.4017, "step": 30860 }, { "epoch": 17.240782122905028, "grad_norm": 0.4009861946105957, "learning_rate": 0.0001387394957983193, "loss": 0.3945, "step": 30861 }, { "epoch": 17.241340782122904, "grad_norm": 0.36764004826545715, "learning_rate": 0.00013871148459383754, "loss": 0.334, "step": 30862 }, { "epoch": 17.24189944134078, "grad_norm": 0.5300739407539368, "learning_rate": 0.00013868347338935575, "loss": 0.4345, "step": 30863 }, { "epoch": 17.242458100558657, "grad_norm": 0.6319892406463623, "learning_rate": 0.00013865546218487396, "loss": 0.3874, "step": 30864 }, { "epoch": 17.243016759776538, "grad_norm": 0.3876967132091522, "learning_rate": 0.00013862745098039216, "loss": 0.3318, "step": 30865 }, { "epoch": 17.243575418994414, "grad_norm": 0.6696122288703918, "learning_rate": 0.00013859943977591037, "loss": 0.429, "step": 30866 }, { "epoch": 17.24413407821229, "grad_norm": 0.6907252669334412, "learning_rate": 0.00013857142857142857, "loss": 0.4089, "step": 30867 }, { "epoch": 17.244692737430167, "grad_norm": 0.5244340300559998, "learning_rate": 0.00013854341736694678, "loss": 0.459, "step": 30868 }, { "epoch": 17.245251396648044, "grad_norm": 1.119207501411438, "learning_rate": 0.000138515406162465, "loss": 0.3509, "step": 30869 }, { "epoch": 17.24581005586592, "grad_norm": 0.41680291295051575, "learning_rate": 0.00013848739495798322, "loss": 0.4479, "step": 30870 }, { "epoch": 17.2463687150838, "grad_norm": 0.6145120859146118, "learning_rate": 0.0001384593837535014, "loss": 0.429, "step": 30871 }, { "epoch": 17.246927374301677, "grad_norm": 2.2590606212615967, "learning_rate": 0.0001384313725490196, "loss": 0.415, "step": 30872 }, { "epoch": 17.247486033519554, "grad_norm": 0.4758312404155731, "learning_rate": 0.0001384033613445378, "loss": 0.3951, "step": 30873 }, { "epoch": 17.24804469273743, "grad_norm": 1.1972399950027466, "learning_rate": 0.00013837535014005604, "loss": 0.4254, "step": 30874 }, { "epoch": 17.248603351955307, "grad_norm": 0.3477563261985779, "learning_rate": 0.00013834733893557425, "loss": 0.4614, "step": 30875 }, { "epoch": 17.249162011173183, "grad_norm": 0.34456226229667664, "learning_rate": 0.00013831932773109243, "loss": 0.4409, "step": 30876 }, { "epoch": 17.24972067039106, "grad_norm": 2.0233559608459473, "learning_rate": 0.00013829131652661063, "loss": 0.5128, "step": 30877 }, { "epoch": 17.25027932960894, "grad_norm": 0.44099491834640503, "learning_rate": 0.00013826330532212887, "loss": 0.3918, "step": 30878 }, { "epoch": 17.250837988826817, "grad_norm": 0.3825315237045288, "learning_rate": 0.00013823529411764707, "loss": 0.3995, "step": 30879 }, { "epoch": 17.251396648044693, "grad_norm": 0.33068642020225525, "learning_rate": 0.00013820728291316525, "loss": 0.3558, "step": 30880 }, { "epoch": 17.25195530726257, "grad_norm": 0.47674262523651123, "learning_rate": 0.00013817927170868346, "loss": 0.3743, "step": 30881 }, { "epoch": 17.252513966480446, "grad_norm": 0.391883909702301, "learning_rate": 0.0001381512605042017, "loss": 0.35, "step": 30882 }, { "epoch": 17.253072625698323, "grad_norm": 0.6043128967285156, "learning_rate": 0.0001381232492997199, "loss": 0.4051, "step": 30883 }, { "epoch": 17.2536312849162, "grad_norm": 0.476500540971756, "learning_rate": 0.0001380952380952381, "loss": 0.4562, "step": 30884 }, { "epoch": 17.25418994413408, "grad_norm": 0.5104117393493652, "learning_rate": 0.00013806722689075628, "loss": 0.2972, "step": 30885 }, { "epoch": 17.254748603351956, "grad_norm": 0.4299020767211914, "learning_rate": 0.00013803921568627452, "loss": 0.3577, "step": 30886 }, { "epoch": 17.255307262569833, "grad_norm": 0.5536918044090271, "learning_rate": 0.00013801120448179272, "loss": 0.5185, "step": 30887 }, { "epoch": 17.25586592178771, "grad_norm": 0.4102650582790375, "learning_rate": 0.00013798319327731093, "loss": 0.4376, "step": 30888 }, { "epoch": 17.256424581005586, "grad_norm": 0.446763813495636, "learning_rate": 0.00013795518207282913, "loss": 0.4226, "step": 30889 }, { "epoch": 17.256983240223462, "grad_norm": 0.32200101017951965, "learning_rate": 0.00013792717086834734, "loss": 0.3654, "step": 30890 }, { "epoch": 17.257541899441343, "grad_norm": 0.8275089263916016, "learning_rate": 0.00013789915966386555, "loss": 0.533, "step": 30891 }, { "epoch": 17.25810055865922, "grad_norm": 0.6773897409439087, "learning_rate": 0.00013787114845938375, "loss": 0.4648, "step": 30892 }, { "epoch": 17.258659217877096, "grad_norm": 0.3769640028476715, "learning_rate": 0.00013784313725490196, "loss": 0.4887, "step": 30893 }, { "epoch": 17.259217877094972, "grad_norm": 0.5789656043052673, "learning_rate": 0.0001378151260504202, "loss": 0.5579, "step": 30894 }, { "epoch": 17.25977653631285, "grad_norm": 0.44554921984672546, "learning_rate": 0.00013778711484593837, "loss": 0.3955, "step": 30895 }, { "epoch": 17.260335195530725, "grad_norm": 0.49596601724624634, "learning_rate": 0.00013775910364145658, "loss": 0.4066, "step": 30896 }, { "epoch": 17.260893854748602, "grad_norm": 0.6141653060913086, "learning_rate": 0.00013773109243697478, "loss": 0.4103, "step": 30897 }, { "epoch": 17.261452513966482, "grad_norm": 0.4723193645477295, "learning_rate": 0.00013770308123249301, "loss": 0.3924, "step": 30898 }, { "epoch": 17.26201117318436, "grad_norm": 0.37482163310050964, "learning_rate": 0.00013767507002801122, "loss": 0.3048, "step": 30899 }, { "epoch": 17.262569832402235, "grad_norm": 0.3675345182418823, "learning_rate": 0.0001376470588235294, "loss": 0.353, "step": 30900 }, { "epoch": 17.26312849162011, "grad_norm": 0.4940016269683838, "learning_rate": 0.0001376190476190476, "loss": 0.4825, "step": 30901 }, { "epoch": 17.26368715083799, "grad_norm": 0.5392482280731201, "learning_rate": 0.00013759103641456584, "loss": 0.4118, "step": 30902 }, { "epoch": 17.264245810055865, "grad_norm": 1.110404372215271, "learning_rate": 0.00013756302521008404, "loss": 0.3909, "step": 30903 }, { "epoch": 17.26480446927374, "grad_norm": 0.453970730304718, "learning_rate": 0.00013753501400560225, "loss": 0.3649, "step": 30904 }, { "epoch": 17.26536312849162, "grad_norm": 0.39727967977523804, "learning_rate": 0.00013750700280112043, "loss": 0.4499, "step": 30905 }, { "epoch": 17.265921787709498, "grad_norm": 0.4828219413757324, "learning_rate": 0.00013747899159663866, "loss": 0.3408, "step": 30906 }, { "epoch": 17.266480446927375, "grad_norm": 7.320755958557129, "learning_rate": 0.00013745098039215687, "loss": 0.4585, "step": 30907 }, { "epoch": 17.26703910614525, "grad_norm": 0.5370703339576721, "learning_rate": 0.00013742296918767507, "loss": 0.3453, "step": 30908 }, { "epoch": 17.267597765363128, "grad_norm": 0.42002859711647034, "learning_rate": 0.00013739495798319328, "loss": 0.4502, "step": 30909 }, { "epoch": 17.268156424581004, "grad_norm": 0.34289199113845825, "learning_rate": 0.0001373669467787115, "loss": 0.3302, "step": 30910 }, { "epoch": 17.26871508379888, "grad_norm": 0.5936195254325867, "learning_rate": 0.0001373389355742297, "loss": 0.3739, "step": 30911 }, { "epoch": 17.26927374301676, "grad_norm": 0.39910629391670227, "learning_rate": 0.0001373109243697479, "loss": 0.417, "step": 30912 }, { "epoch": 17.269832402234638, "grad_norm": 0.47082874178886414, "learning_rate": 0.0001372829131652661, "loss": 0.4941, "step": 30913 }, { "epoch": 17.270391061452514, "grad_norm": 0.9759960770606995, "learning_rate": 0.00013725490196078434, "loss": 0.3609, "step": 30914 }, { "epoch": 17.27094972067039, "grad_norm": 2.263122081756592, "learning_rate": 0.00013722689075630252, "loss": 0.4058, "step": 30915 }, { "epoch": 17.271508379888267, "grad_norm": 0.419633150100708, "learning_rate": 0.00013719887955182072, "loss": 0.4724, "step": 30916 }, { "epoch": 17.272067039106144, "grad_norm": 0.37599149346351624, "learning_rate": 0.00013717086834733893, "loss": 0.4537, "step": 30917 }, { "epoch": 17.272625698324024, "grad_norm": 0.39581042528152466, "learning_rate": 0.00013714285714285716, "loss": 0.3674, "step": 30918 }, { "epoch": 17.2731843575419, "grad_norm": 0.5695962905883789, "learning_rate": 0.00013711484593837537, "loss": 0.3849, "step": 30919 }, { "epoch": 17.273743016759777, "grad_norm": 0.857007622718811, "learning_rate": 0.00013708683473389355, "loss": 0.3899, "step": 30920 }, { "epoch": 17.274301675977654, "grad_norm": 7.770477771759033, "learning_rate": 0.00013705882352941175, "loss": 0.3871, "step": 30921 }, { "epoch": 17.27486033519553, "grad_norm": 0.3573330342769623, "learning_rate": 0.00013703081232492999, "loss": 0.4289, "step": 30922 }, { "epoch": 17.275418994413407, "grad_norm": 0.4509274661540985, "learning_rate": 0.0001370028011204482, "loss": 0.4, "step": 30923 }, { "epoch": 17.275977653631283, "grad_norm": 0.34237974882125854, "learning_rate": 0.0001369747899159664, "loss": 0.4171, "step": 30924 }, { "epoch": 17.276536312849164, "grad_norm": 0.8740954995155334, "learning_rate": 0.00013694677871148458, "loss": 0.5036, "step": 30925 }, { "epoch": 17.27709497206704, "grad_norm": 0.7318565249443054, "learning_rate": 0.0001369187675070028, "loss": 0.5101, "step": 30926 }, { "epoch": 17.277653631284917, "grad_norm": 1.6115286350250244, "learning_rate": 0.00013689075630252102, "loss": 0.29, "step": 30927 }, { "epoch": 17.278212290502793, "grad_norm": 0.5907230377197266, "learning_rate": 0.00013686274509803922, "loss": 0.4436, "step": 30928 }, { "epoch": 17.27877094972067, "grad_norm": 0.3992558419704437, "learning_rate": 0.00013683473389355743, "loss": 0.3901, "step": 30929 }, { "epoch": 17.279329608938546, "grad_norm": 0.5210452675819397, "learning_rate": 0.00013680672268907563, "loss": 0.3434, "step": 30930 }, { "epoch": 17.279888268156423, "grad_norm": 0.40953654050827026, "learning_rate": 0.00013677871148459384, "loss": 0.4168, "step": 30931 }, { "epoch": 17.280446927374303, "grad_norm": 0.41116511821746826, "learning_rate": 0.00013675070028011205, "loss": 0.3767, "step": 30932 }, { "epoch": 17.28100558659218, "grad_norm": 0.4857286214828491, "learning_rate": 0.00013672268907563025, "loss": 0.4451, "step": 30933 }, { "epoch": 17.281564245810056, "grad_norm": 0.3809511959552765, "learning_rate": 0.00013669467787114846, "loss": 0.3726, "step": 30934 }, { "epoch": 17.282122905027933, "grad_norm": 3.765681505203247, "learning_rate": 0.00013666666666666666, "loss": 0.4611, "step": 30935 }, { "epoch": 17.28268156424581, "grad_norm": 0.4003088176250458, "learning_rate": 0.00013663865546218487, "loss": 0.4332, "step": 30936 }, { "epoch": 17.283240223463686, "grad_norm": 0.42851537466049194, "learning_rate": 0.00013661064425770308, "loss": 0.397, "step": 30937 }, { "epoch": 17.283798882681563, "grad_norm": 0.5623506307601929, "learning_rate": 0.0001365826330532213, "loss": 0.3678, "step": 30938 }, { "epoch": 17.284357541899443, "grad_norm": 0.6187272667884827, "learning_rate": 0.0001365546218487395, "loss": 0.3686, "step": 30939 }, { "epoch": 17.28491620111732, "grad_norm": 1.0401549339294434, "learning_rate": 0.0001365266106442577, "loss": 0.3553, "step": 30940 }, { "epoch": 17.285474860335196, "grad_norm": 0.572690486907959, "learning_rate": 0.0001364985994397759, "loss": 0.4421, "step": 30941 }, { "epoch": 17.286033519553072, "grad_norm": 0.5744448900222778, "learning_rate": 0.00013647058823529413, "loss": 0.4744, "step": 30942 }, { "epoch": 17.28659217877095, "grad_norm": 0.6637737154960632, "learning_rate": 0.00013644257703081234, "loss": 0.4896, "step": 30943 }, { "epoch": 17.287150837988825, "grad_norm": 12.140373229980469, "learning_rate": 0.00013641456582633052, "loss": 0.4325, "step": 30944 }, { "epoch": 17.287709497206706, "grad_norm": 0.5495653748512268, "learning_rate": 0.00013638655462184872, "loss": 0.5844, "step": 30945 }, { "epoch": 17.288268156424582, "grad_norm": 0.5662904977798462, "learning_rate": 0.00013635854341736696, "loss": 0.383, "step": 30946 }, { "epoch": 17.28882681564246, "grad_norm": 0.530608057975769, "learning_rate": 0.00013633053221288516, "loss": 0.4547, "step": 30947 }, { "epoch": 17.289385474860335, "grad_norm": 0.43215397000312805, "learning_rate": 0.00013630252100840337, "loss": 0.4145, "step": 30948 }, { "epoch": 17.289944134078212, "grad_norm": 0.6672047972679138, "learning_rate": 0.00013627450980392155, "loss": 0.3413, "step": 30949 }, { "epoch": 17.29050279329609, "grad_norm": 0.9095503091812134, "learning_rate": 0.00013624649859943978, "loss": 0.497, "step": 30950 }, { "epoch": 17.291061452513965, "grad_norm": 1.1142430305480957, "learning_rate": 0.000136218487394958, "loss": 0.4701, "step": 30951 }, { "epoch": 17.291620111731845, "grad_norm": 0.4204825460910797, "learning_rate": 0.0001361904761904762, "loss": 0.3896, "step": 30952 }, { "epoch": 17.29217877094972, "grad_norm": 0.8348186612129211, "learning_rate": 0.0001361624649859944, "loss": 0.3278, "step": 30953 }, { "epoch": 17.2927374301676, "grad_norm": 0.3412925601005554, "learning_rate": 0.0001361344537815126, "loss": 0.3412, "step": 30954 }, { "epoch": 17.293296089385475, "grad_norm": 0.4504820704460144, "learning_rate": 0.0001361064425770308, "loss": 0.4762, "step": 30955 }, { "epoch": 17.29385474860335, "grad_norm": 0.8060104250907898, "learning_rate": 0.00013607843137254902, "loss": 0.5149, "step": 30956 }, { "epoch": 17.294413407821228, "grad_norm": 0.40251079201698303, "learning_rate": 0.00013605042016806722, "loss": 0.3836, "step": 30957 }, { "epoch": 17.294972067039105, "grad_norm": 0.3634423017501831, "learning_rate": 0.00013602240896358546, "loss": 0.3616, "step": 30958 }, { "epoch": 17.295530726256985, "grad_norm": 0.5007919073104858, "learning_rate": 0.00013599439775910363, "loss": 0.4911, "step": 30959 }, { "epoch": 17.29608938547486, "grad_norm": 0.48630887269973755, "learning_rate": 0.00013596638655462184, "loss": 0.5072, "step": 30960 }, { "epoch": 17.296648044692738, "grad_norm": 0.5721303224563599, "learning_rate": 0.00013593837535014005, "loss": 0.4613, "step": 30961 }, { "epoch": 17.297206703910614, "grad_norm": 0.3682270050048828, "learning_rate": 0.00013591036414565828, "loss": 0.3556, "step": 30962 }, { "epoch": 17.29776536312849, "grad_norm": 0.44523659348487854, "learning_rate": 0.00013588235294117649, "loss": 0.5177, "step": 30963 }, { "epoch": 17.298324022346367, "grad_norm": 0.5685264468193054, "learning_rate": 0.00013585434173669466, "loss": 0.4693, "step": 30964 }, { "epoch": 17.298882681564244, "grad_norm": 0.6503521800041199, "learning_rate": 0.00013582633053221287, "loss": 0.574, "step": 30965 }, { "epoch": 17.299441340782124, "grad_norm": 0.5219910144805908, "learning_rate": 0.0001357983193277311, "loss": 0.3701, "step": 30966 }, { "epoch": 17.3, "grad_norm": 0.6246150135993958, "learning_rate": 0.0001357703081232493, "loss": 0.4525, "step": 30967 }, { "epoch": 17.300558659217877, "grad_norm": 1.1186622381210327, "learning_rate": 0.00013574229691876752, "loss": 0.4297, "step": 30968 }, { "epoch": 17.301117318435754, "grad_norm": 0.41805148124694824, "learning_rate": 0.0001357142857142857, "loss": 0.4654, "step": 30969 }, { "epoch": 17.30167597765363, "grad_norm": 0.6026161313056946, "learning_rate": 0.00013568627450980393, "loss": 0.3995, "step": 30970 }, { "epoch": 17.302234636871507, "grad_norm": 0.3659982979297638, "learning_rate": 0.00013565826330532213, "loss": 0.3874, "step": 30971 }, { "epoch": 17.302793296089387, "grad_norm": 2.2221803665161133, "learning_rate": 0.00013563025210084034, "loss": 0.4509, "step": 30972 }, { "epoch": 17.303351955307264, "grad_norm": 0.4099322557449341, "learning_rate": 0.00013560224089635855, "loss": 0.5089, "step": 30973 }, { "epoch": 17.30391061452514, "grad_norm": 0.8470801115036011, "learning_rate": 0.00013557422969187675, "loss": 0.3613, "step": 30974 }, { "epoch": 17.304469273743017, "grad_norm": 0.36109501123428345, "learning_rate": 0.00013554621848739496, "loss": 0.3092, "step": 30975 }, { "epoch": 17.305027932960893, "grad_norm": 1.5990710258483887, "learning_rate": 0.00013551820728291316, "loss": 0.5475, "step": 30976 }, { "epoch": 17.30558659217877, "grad_norm": 0.8227080702781677, "learning_rate": 0.00013549019607843137, "loss": 0.3916, "step": 30977 }, { "epoch": 17.306145251396647, "grad_norm": 0.45544853806495667, "learning_rate": 0.0001354621848739496, "loss": 0.3922, "step": 30978 }, { "epoch": 17.306703910614527, "grad_norm": 0.5445821285247803, "learning_rate": 0.00013543417366946778, "loss": 0.3523, "step": 30979 }, { "epoch": 17.307262569832403, "grad_norm": 0.4503592848777771, "learning_rate": 0.000135406162464986, "loss": 0.4257, "step": 30980 }, { "epoch": 17.30782122905028, "grad_norm": 0.4236200153827667, "learning_rate": 0.0001353781512605042, "loss": 0.3291, "step": 30981 }, { "epoch": 17.308379888268156, "grad_norm": 0.42220044136047363, "learning_rate": 0.00013535014005602243, "loss": 0.3796, "step": 30982 }, { "epoch": 17.308938547486033, "grad_norm": 0.9984843134880066, "learning_rate": 0.00013532212885154063, "loss": 0.4022, "step": 30983 }, { "epoch": 17.30949720670391, "grad_norm": 0.5241859555244446, "learning_rate": 0.0001352941176470588, "loss": 0.3775, "step": 30984 }, { "epoch": 17.310055865921786, "grad_norm": 0.3857882618904114, "learning_rate": 0.00013526610644257702, "loss": 0.3796, "step": 30985 }, { "epoch": 17.310614525139666, "grad_norm": 1.3354923725128174, "learning_rate": 0.00013523809523809525, "loss": 0.3523, "step": 30986 }, { "epoch": 17.311173184357543, "grad_norm": 0.3864104151725769, "learning_rate": 0.00013521008403361346, "loss": 0.3942, "step": 30987 }, { "epoch": 17.31173184357542, "grad_norm": 1.2963893413543701, "learning_rate": 0.00013518207282913166, "loss": 0.3878, "step": 30988 }, { "epoch": 17.312290502793296, "grad_norm": 0.4099782705307007, "learning_rate": 0.00013515406162464984, "loss": 0.4229, "step": 30989 }, { "epoch": 17.312849162011172, "grad_norm": 0.703102171421051, "learning_rate": 0.00013512605042016807, "loss": 0.4509, "step": 30990 }, { "epoch": 17.31340782122905, "grad_norm": 0.6414778232574463, "learning_rate": 0.00013509803921568628, "loss": 0.427, "step": 30991 }, { "epoch": 17.31396648044693, "grad_norm": 0.34215009212493896, "learning_rate": 0.0001350700280112045, "loss": 0.3436, "step": 30992 }, { "epoch": 17.314525139664806, "grad_norm": 2.1678569316864014, "learning_rate": 0.00013504201680672267, "loss": 0.3776, "step": 30993 }, { "epoch": 17.315083798882682, "grad_norm": 0.3732213079929352, "learning_rate": 0.0001350140056022409, "loss": 0.4372, "step": 30994 }, { "epoch": 17.31564245810056, "grad_norm": 0.618624210357666, "learning_rate": 0.0001349859943977591, "loss": 0.4475, "step": 30995 }, { "epoch": 17.316201117318435, "grad_norm": 0.36261868476867676, "learning_rate": 0.0001349579831932773, "loss": 0.3479, "step": 30996 }, { "epoch": 17.316759776536312, "grad_norm": 0.5007100701332092, "learning_rate": 0.00013492997198879552, "loss": 0.4405, "step": 30997 }, { "epoch": 17.31731843575419, "grad_norm": 0.4221706986427307, "learning_rate": 0.00013490196078431372, "loss": 0.4509, "step": 30998 }, { "epoch": 17.31787709497207, "grad_norm": 2.1026504039764404, "learning_rate": 0.00013487394957983193, "loss": 0.3968, "step": 30999 }, { "epoch": 17.318435754189945, "grad_norm": 0.5361040234565735, "learning_rate": 0.00013484593837535013, "loss": 0.4392, "step": 31000 }, { "epoch": 17.318435754189945, "eval_cer": 0.08447623100171298, "eval_loss": 0.32168301939964294, "eval_runtime": 55.8575, "eval_samples_per_second": 81.242, "eval_steps_per_second": 5.084, "eval_wer": 0.333035390072903, "step": 31000 }, { "epoch": 17.31899441340782, "grad_norm": 0.4020605683326721, "learning_rate": 0.00013481792717086834, "loss": 0.4416, "step": 31001 }, { "epoch": 17.3195530726257, "grad_norm": 0.46303820610046387, "learning_rate": 0.00013478991596638657, "loss": 0.345, "step": 31002 }, { "epoch": 17.320111731843575, "grad_norm": 0.4367726445198059, "learning_rate": 0.00013476190476190475, "loss": 0.4028, "step": 31003 }, { "epoch": 17.32067039106145, "grad_norm": 0.5548036098480225, "learning_rate": 0.00013473389355742296, "loss": 0.3605, "step": 31004 }, { "epoch": 17.321229050279328, "grad_norm": 0.3474282920360565, "learning_rate": 0.00013470588235294116, "loss": 0.3881, "step": 31005 }, { "epoch": 17.321787709497208, "grad_norm": 0.46288466453552246, "learning_rate": 0.0001346778711484594, "loss": 0.3729, "step": 31006 }, { "epoch": 17.322346368715085, "grad_norm": 0.7189444303512573, "learning_rate": 0.0001346498599439776, "loss": 0.468, "step": 31007 }, { "epoch": 17.32290502793296, "grad_norm": 1.3265817165374756, "learning_rate": 0.00013462184873949578, "loss": 0.4116, "step": 31008 }, { "epoch": 17.323463687150838, "grad_norm": 0.37299486994743347, "learning_rate": 0.000134593837535014, "loss": 0.3733, "step": 31009 }, { "epoch": 17.324022346368714, "grad_norm": 0.4688703715801239, "learning_rate": 0.00013456582633053222, "loss": 0.489, "step": 31010 }, { "epoch": 17.32458100558659, "grad_norm": 0.30199870467185974, "learning_rate": 0.00013453781512605043, "loss": 0.3427, "step": 31011 }, { "epoch": 17.325139664804468, "grad_norm": 0.8538857102394104, "learning_rate": 0.00013450980392156863, "loss": 0.4749, "step": 31012 }, { "epoch": 17.325698324022348, "grad_norm": 0.4387122094631195, "learning_rate": 0.0001344817927170868, "loss": 0.4268, "step": 31013 }, { "epoch": 17.326256983240224, "grad_norm": 0.8337236046791077, "learning_rate": 0.00013445378151260505, "loss": 0.7094, "step": 31014 }, { "epoch": 17.3268156424581, "grad_norm": 0.6055076122283936, "learning_rate": 0.00013442577030812325, "loss": 0.4725, "step": 31015 }, { "epoch": 17.327374301675977, "grad_norm": 0.46573007106781006, "learning_rate": 0.00013439775910364146, "loss": 0.4359, "step": 31016 }, { "epoch": 17.327932960893854, "grad_norm": 0.8039844632148743, "learning_rate": 0.0001343697478991597, "loss": 0.5274, "step": 31017 }, { "epoch": 17.32849162011173, "grad_norm": 0.5466184616088867, "learning_rate": 0.00013434173669467787, "loss": 0.4516, "step": 31018 }, { "epoch": 17.32905027932961, "grad_norm": 0.8507922291755676, "learning_rate": 0.00013431372549019608, "loss": 0.3562, "step": 31019 }, { "epoch": 17.329608938547487, "grad_norm": 0.6679883003234863, "learning_rate": 0.00013428571428571428, "loss": 0.4599, "step": 31020 }, { "epoch": 17.330167597765364, "grad_norm": 2.3208248615264893, "learning_rate": 0.00013425770308123251, "loss": 0.3567, "step": 31021 }, { "epoch": 17.33072625698324, "grad_norm": 9.774212837219238, "learning_rate": 0.00013422969187675072, "loss": 0.3518, "step": 31022 }, { "epoch": 17.331284916201117, "grad_norm": 0.4367597699165344, "learning_rate": 0.0001342016806722689, "loss": 0.3901, "step": 31023 }, { "epoch": 17.331843575418993, "grad_norm": 0.4077896773815155, "learning_rate": 0.0001341736694677871, "loss": 0.4077, "step": 31024 }, { "epoch": 17.33240223463687, "grad_norm": 2.621882200241089, "learning_rate": 0.00013414565826330534, "loss": 0.5258, "step": 31025 }, { "epoch": 17.33296089385475, "grad_norm": 0.374945729970932, "learning_rate": 0.00013411764705882354, "loss": 0.3027, "step": 31026 }, { "epoch": 17.333519553072627, "grad_norm": 0.5708574652671814, "learning_rate": 0.00013408963585434175, "loss": 0.3692, "step": 31027 }, { "epoch": 17.334078212290503, "grad_norm": 0.7426781058311462, "learning_rate": 0.00013406162464985993, "loss": 0.3747, "step": 31028 }, { "epoch": 17.33463687150838, "grad_norm": 0.7198552489280701, "learning_rate": 0.00013403361344537816, "loss": 0.2718, "step": 31029 }, { "epoch": 17.335195530726256, "grad_norm": 0.5650630593299866, "learning_rate": 0.00013400560224089637, "loss": 0.3947, "step": 31030 }, { "epoch": 17.335754189944133, "grad_norm": 0.4685058891773224, "learning_rate": 0.00013397759103641457, "loss": 0.4047, "step": 31031 }, { "epoch": 17.33631284916201, "grad_norm": 0.4511740803718567, "learning_rate": 0.00013394957983193278, "loss": 0.4206, "step": 31032 }, { "epoch": 17.33687150837989, "grad_norm": 0.893106997013092, "learning_rate": 0.000133921568627451, "loss": 0.3377, "step": 31033 }, { "epoch": 17.337430167597766, "grad_norm": 0.4217708706855774, "learning_rate": 0.0001338935574229692, "loss": 0.4595, "step": 31034 }, { "epoch": 17.337988826815643, "grad_norm": 0.47246602177619934, "learning_rate": 0.0001338655462184874, "loss": 0.4727, "step": 31035 }, { "epoch": 17.33854748603352, "grad_norm": 0.43586745858192444, "learning_rate": 0.0001338375350140056, "loss": 0.4012, "step": 31036 }, { "epoch": 17.339106145251396, "grad_norm": 1.1428580284118652, "learning_rate": 0.00013380952380952384, "loss": 0.4178, "step": 31037 }, { "epoch": 17.339664804469272, "grad_norm": 0.6881787776947021, "learning_rate": 0.00013378151260504202, "loss": 0.4648, "step": 31038 }, { "epoch": 17.340223463687153, "grad_norm": 0.9605644941329956, "learning_rate": 0.00013375350140056022, "loss": 0.4384, "step": 31039 }, { "epoch": 17.34078212290503, "grad_norm": 0.4451036751270294, "learning_rate": 0.00013372549019607843, "loss": 0.3957, "step": 31040 }, { "epoch": 17.341340782122906, "grad_norm": 0.4208865165710449, "learning_rate": 0.00013369747899159666, "loss": 0.4122, "step": 31041 }, { "epoch": 17.341899441340782, "grad_norm": 0.5682745575904846, "learning_rate": 0.00013366946778711487, "loss": 0.3828, "step": 31042 }, { "epoch": 17.34245810055866, "grad_norm": 0.4290216565132141, "learning_rate": 0.00013364145658263305, "loss": 0.3744, "step": 31043 }, { "epoch": 17.343016759776535, "grad_norm": 0.9240705370903015, "learning_rate": 0.00013361344537815125, "loss": 0.3837, "step": 31044 }, { "epoch": 17.343575418994412, "grad_norm": 0.4382179081439972, "learning_rate": 0.00013358543417366949, "loss": 0.5254, "step": 31045 }, { "epoch": 17.344134078212292, "grad_norm": 0.7130903005599976, "learning_rate": 0.0001335574229691877, "loss": 0.4986, "step": 31046 }, { "epoch": 17.34469273743017, "grad_norm": 0.608599841594696, "learning_rate": 0.00013352941176470587, "loss": 0.3217, "step": 31047 }, { "epoch": 17.345251396648045, "grad_norm": 1.5888854265213013, "learning_rate": 0.00013350140056022408, "loss": 0.5133, "step": 31048 }, { "epoch": 17.345810055865922, "grad_norm": 0.5445583462715149, "learning_rate": 0.0001334733893557423, "loss": 0.3349, "step": 31049 }, { "epoch": 17.3463687150838, "grad_norm": 1.5335725545883179, "learning_rate": 0.00013344537815126052, "loss": 0.3488, "step": 31050 }, { "epoch": 17.346927374301675, "grad_norm": 0.35896986722946167, "learning_rate": 0.00013341736694677872, "loss": 0.354, "step": 31051 }, { "epoch": 17.34748603351955, "grad_norm": 0.46245482563972473, "learning_rate": 0.0001333893557422969, "loss": 0.432, "step": 31052 }, { "epoch": 17.34804469273743, "grad_norm": 1.0253490209579468, "learning_rate": 0.00013336134453781513, "loss": 0.4725, "step": 31053 }, { "epoch": 17.34860335195531, "grad_norm": 0.40848150849342346, "learning_rate": 0.00013333333333333334, "loss": 0.3143, "step": 31054 }, { "epoch": 17.349162011173185, "grad_norm": 0.8147862553596497, "learning_rate": 0.00013330532212885155, "loss": 0.4263, "step": 31055 }, { "epoch": 17.34972067039106, "grad_norm": 0.40357330441474915, "learning_rate": 0.00013327731092436975, "loss": 0.3502, "step": 31056 }, { "epoch": 17.350279329608938, "grad_norm": 0.8164263367652893, "learning_rate": 0.00013324929971988796, "loss": 0.5221, "step": 31057 }, { "epoch": 17.350837988826814, "grad_norm": 0.6612587571144104, "learning_rate": 0.00013322128851540616, "loss": 0.4333, "step": 31058 }, { "epoch": 17.35139664804469, "grad_norm": 0.4285007417201996, "learning_rate": 0.00013319327731092437, "loss": 0.3303, "step": 31059 }, { "epoch": 17.35195530726257, "grad_norm": 0.7287948727607727, "learning_rate": 0.00013316526610644258, "loss": 0.3868, "step": 31060 }, { "epoch": 17.352513966480448, "grad_norm": 0.29196247458457947, "learning_rate": 0.0001331372549019608, "loss": 0.3313, "step": 31061 }, { "epoch": 17.353072625698324, "grad_norm": 0.3559337854385376, "learning_rate": 0.000133109243697479, "loss": 0.3722, "step": 31062 }, { "epoch": 17.3536312849162, "grad_norm": 0.5292189121246338, "learning_rate": 0.0001330812324929972, "loss": 0.4242, "step": 31063 }, { "epoch": 17.354189944134077, "grad_norm": 0.37551379203796387, "learning_rate": 0.0001330532212885154, "loss": 0.3307, "step": 31064 }, { "epoch": 17.354748603351954, "grad_norm": 0.5712440609931946, "learning_rate": 0.00013302521008403363, "loss": 0.417, "step": 31065 }, { "epoch": 17.355307262569834, "grad_norm": 0.52461177110672, "learning_rate": 0.00013299719887955184, "loss": 0.4474, "step": 31066 }, { "epoch": 17.35586592178771, "grad_norm": 0.5089712142944336, "learning_rate": 0.00013296918767507002, "loss": 0.5588, "step": 31067 }, { "epoch": 17.356424581005587, "grad_norm": 0.6703516840934753, "learning_rate": 0.00013294117647058822, "loss": 0.4169, "step": 31068 }, { "epoch": 17.356983240223464, "grad_norm": 0.4563344717025757, "learning_rate": 0.00013291316526610646, "loss": 0.5132, "step": 31069 }, { "epoch": 17.35754189944134, "grad_norm": 0.9744804501533508, "learning_rate": 0.00013288515406162466, "loss": 0.4448, "step": 31070 }, { "epoch": 17.358100558659217, "grad_norm": 0.4727672338485718, "learning_rate": 0.00013285714285714287, "loss": 0.4282, "step": 31071 }, { "epoch": 17.358659217877094, "grad_norm": 0.38442444801330566, "learning_rate": 0.00013282913165266105, "loss": 0.3821, "step": 31072 }, { "epoch": 17.359217877094974, "grad_norm": 0.5265441536903381, "learning_rate": 0.00013280112044817928, "loss": 0.5058, "step": 31073 }, { "epoch": 17.35977653631285, "grad_norm": 0.4554186761379242, "learning_rate": 0.0001327731092436975, "loss": 0.3197, "step": 31074 }, { "epoch": 17.360335195530727, "grad_norm": 2.4853837490081787, "learning_rate": 0.0001327450980392157, "loss": 0.4539, "step": 31075 }, { "epoch": 17.360893854748603, "grad_norm": 0.4225909411907196, "learning_rate": 0.0001327170868347339, "loss": 0.3803, "step": 31076 }, { "epoch": 17.36145251396648, "grad_norm": 0.44362735748291016, "learning_rate": 0.0001326890756302521, "loss": 0.4426, "step": 31077 }, { "epoch": 17.362011173184356, "grad_norm": 0.5334896445274353, "learning_rate": 0.0001326610644257703, "loss": 0.382, "step": 31078 }, { "epoch": 17.362569832402233, "grad_norm": 0.6462735533714294, "learning_rate": 0.00013263305322128852, "loss": 0.3612, "step": 31079 }, { "epoch": 17.363128491620113, "grad_norm": 0.38574841618537903, "learning_rate": 0.00013260504201680672, "loss": 0.5019, "step": 31080 }, { "epoch": 17.36368715083799, "grad_norm": 11.807450294494629, "learning_rate": 0.00013257703081232496, "loss": 0.4538, "step": 31081 }, { "epoch": 17.364245810055866, "grad_norm": 0.3913212716579437, "learning_rate": 0.00013254901960784313, "loss": 0.3783, "step": 31082 }, { "epoch": 17.364804469273743, "grad_norm": 0.3708021640777588, "learning_rate": 0.00013252100840336134, "loss": 0.4296, "step": 31083 }, { "epoch": 17.36536312849162, "grad_norm": 0.29742497205734253, "learning_rate": 0.00013249299719887955, "loss": 0.3355, "step": 31084 }, { "epoch": 17.365921787709496, "grad_norm": 0.3839186131954193, "learning_rate": 0.00013246498599439778, "loss": 0.2789, "step": 31085 }, { "epoch": 17.366480446927373, "grad_norm": 1.1304399967193604, "learning_rate": 0.00013243697478991599, "loss": 0.3587, "step": 31086 }, { "epoch": 17.367039106145253, "grad_norm": 1.2746134996414185, "learning_rate": 0.00013240896358543416, "loss": 0.5103, "step": 31087 }, { "epoch": 17.36759776536313, "grad_norm": 0.4955059587955475, "learning_rate": 0.00013238095238095237, "loss": 0.4981, "step": 31088 }, { "epoch": 17.368156424581006, "grad_norm": 0.48075589537620544, "learning_rate": 0.0001323529411764706, "loss": 0.536, "step": 31089 }, { "epoch": 17.368715083798882, "grad_norm": 0.44370028376579285, "learning_rate": 0.0001323249299719888, "loss": 0.3746, "step": 31090 }, { "epoch": 17.36927374301676, "grad_norm": 0.38735711574554443, "learning_rate": 0.00013229691876750702, "loss": 0.381, "step": 31091 }, { "epoch": 17.369832402234636, "grad_norm": 0.6469260454177856, "learning_rate": 0.0001322689075630252, "loss": 0.4111, "step": 31092 }, { "epoch": 17.370391061452516, "grad_norm": 0.5939928293228149, "learning_rate": 0.00013224089635854343, "loss": 0.4654, "step": 31093 }, { "epoch": 17.370949720670392, "grad_norm": 0.44604727625846863, "learning_rate": 0.00013221288515406163, "loss": 0.3473, "step": 31094 }, { "epoch": 17.37150837988827, "grad_norm": 0.5927965044975281, "learning_rate": 0.00013218487394957984, "loss": 0.3626, "step": 31095 }, { "epoch": 17.372067039106145, "grad_norm": 0.4443076550960541, "learning_rate": 0.00013215686274509805, "loss": 0.4574, "step": 31096 }, { "epoch": 17.372625698324022, "grad_norm": 0.6326189041137695, "learning_rate": 0.00013212885154061625, "loss": 0.4052, "step": 31097 }, { "epoch": 17.3731843575419, "grad_norm": 0.41950735449790955, "learning_rate": 0.00013210084033613446, "loss": 0.3325, "step": 31098 }, { "epoch": 17.373743016759775, "grad_norm": 0.842399001121521, "learning_rate": 0.00013207282913165266, "loss": 0.3975, "step": 31099 }, { "epoch": 17.374301675977655, "grad_norm": 0.4795132875442505, "learning_rate": 0.00013204481792717087, "loss": 0.3684, "step": 31100 }, { "epoch": 17.37486033519553, "grad_norm": 0.5365030765533447, "learning_rate": 0.00013201680672268908, "loss": 0.3895, "step": 31101 }, { "epoch": 17.37541899441341, "grad_norm": 0.44384628534317017, "learning_rate": 0.00013198879551820728, "loss": 0.4035, "step": 31102 }, { "epoch": 17.375977653631285, "grad_norm": 0.5448676347732544, "learning_rate": 0.0001319607843137255, "loss": 0.4162, "step": 31103 }, { "epoch": 17.37653631284916, "grad_norm": 0.31774941086769104, "learning_rate": 0.0001319327731092437, "loss": 0.3768, "step": 31104 }, { "epoch": 17.377094972067038, "grad_norm": 0.8972043395042419, "learning_rate": 0.00013190476190476193, "loss": 0.3863, "step": 31105 }, { "epoch": 17.377653631284915, "grad_norm": 1.0762656927108765, "learning_rate": 0.0001318767507002801, "loss": 0.389, "step": 31106 }, { "epoch": 17.378212290502795, "grad_norm": 0.3484049439430237, "learning_rate": 0.0001318487394957983, "loss": 0.3416, "step": 31107 }, { "epoch": 17.37877094972067, "grad_norm": 5.075378894805908, "learning_rate": 0.00013182072829131652, "loss": 0.4321, "step": 31108 }, { "epoch": 17.379329608938548, "grad_norm": 1.223828673362732, "learning_rate": 0.00013179271708683475, "loss": 0.4254, "step": 31109 }, { "epoch": 17.379888268156424, "grad_norm": 0.32664188742637634, "learning_rate": 0.00013176470588235296, "loss": 0.3373, "step": 31110 }, { "epoch": 17.3804469273743, "grad_norm": 0.35772499442100525, "learning_rate": 0.00013173669467787114, "loss": 0.3857, "step": 31111 }, { "epoch": 17.381005586592178, "grad_norm": 0.4438253939151764, "learning_rate": 0.00013170868347338934, "loss": 0.403, "step": 31112 }, { "epoch": 17.381564245810054, "grad_norm": 0.5758340954780579, "learning_rate": 0.00013168067226890757, "loss": 0.4319, "step": 31113 }, { "epoch": 17.382122905027934, "grad_norm": 0.4089943468570709, "learning_rate": 0.00013165266106442578, "loss": 0.2961, "step": 31114 }, { "epoch": 17.38268156424581, "grad_norm": 0.5886080861091614, "learning_rate": 0.000131624649859944, "loss": 0.4895, "step": 31115 }, { "epoch": 17.383240223463687, "grad_norm": 1.3975069522857666, "learning_rate": 0.00013159663865546217, "loss": 0.3515, "step": 31116 }, { "epoch": 17.383798882681564, "grad_norm": 0.6111028790473938, "learning_rate": 0.0001315686274509804, "loss": 0.4484, "step": 31117 }, { "epoch": 17.38435754189944, "grad_norm": 0.44044652581214905, "learning_rate": 0.0001315406162464986, "loss": 0.3735, "step": 31118 }, { "epoch": 17.384916201117317, "grad_norm": 1.429068922996521, "learning_rate": 0.0001315126050420168, "loss": 0.4147, "step": 31119 }, { "epoch": 17.385474860335197, "grad_norm": 0.37906938791275024, "learning_rate": 0.00013148459383753502, "loss": 0.3015, "step": 31120 }, { "epoch": 17.386033519553074, "grad_norm": 0.38827309012413025, "learning_rate": 0.00013145658263305322, "loss": 0.381, "step": 31121 }, { "epoch": 17.38659217877095, "grad_norm": 0.7701229453086853, "learning_rate": 0.00013142857142857143, "loss": 0.4689, "step": 31122 }, { "epoch": 17.387150837988827, "grad_norm": 0.346907377243042, "learning_rate": 0.00013140056022408963, "loss": 0.4313, "step": 31123 }, { "epoch": 17.387709497206703, "grad_norm": 0.42327284812927246, "learning_rate": 0.00013137254901960784, "loss": 0.4318, "step": 31124 }, { "epoch": 17.38826815642458, "grad_norm": 0.6674845814704895, "learning_rate": 0.00013134453781512607, "loss": 0.3852, "step": 31125 }, { "epoch": 17.388826815642457, "grad_norm": 0.5044666528701782, "learning_rate": 0.00013131652661064425, "loss": 0.335, "step": 31126 }, { "epoch": 17.389385474860337, "grad_norm": 0.7203618884086609, "learning_rate": 0.00013128851540616246, "loss": 0.388, "step": 31127 }, { "epoch": 17.389944134078213, "grad_norm": 1.1940944194793701, "learning_rate": 0.00013126050420168066, "loss": 0.4515, "step": 31128 }, { "epoch": 17.39050279329609, "grad_norm": 0.5435183644294739, "learning_rate": 0.0001312324929971989, "loss": 0.3519, "step": 31129 }, { "epoch": 17.391061452513966, "grad_norm": 0.4717260003089905, "learning_rate": 0.0001312044817927171, "loss": 0.4059, "step": 31130 }, { "epoch": 17.391620111731843, "grad_norm": 0.6585044264793396, "learning_rate": 0.00013117647058823528, "loss": 0.631, "step": 31131 }, { "epoch": 17.39217877094972, "grad_norm": 0.45104146003723145, "learning_rate": 0.0001311484593837535, "loss": 0.3558, "step": 31132 }, { "epoch": 17.392737430167596, "grad_norm": 0.5185086131095886, "learning_rate": 0.00013112044817927172, "loss": 0.5385, "step": 31133 }, { "epoch": 17.393296089385476, "grad_norm": 0.7516040205955505, "learning_rate": 0.00013109243697478993, "loss": 0.4288, "step": 31134 }, { "epoch": 17.393854748603353, "grad_norm": 0.5420228242874146, "learning_rate": 0.00013106442577030813, "loss": 0.744, "step": 31135 }, { "epoch": 17.39441340782123, "grad_norm": 1.1047993898391724, "learning_rate": 0.0001310364145658263, "loss": 0.4011, "step": 31136 }, { "epoch": 17.394972067039106, "grad_norm": 4.153549671173096, "learning_rate": 0.00013100840336134455, "loss": 0.3628, "step": 31137 }, { "epoch": 17.395530726256982, "grad_norm": 0.6312436461448669, "learning_rate": 0.00013098039215686275, "loss": 0.4365, "step": 31138 }, { "epoch": 17.39608938547486, "grad_norm": 0.5247658491134644, "learning_rate": 0.00013095238095238096, "loss": 0.371, "step": 31139 }, { "epoch": 17.39664804469274, "grad_norm": 0.3999980092048645, "learning_rate": 0.00013092436974789916, "loss": 0.3387, "step": 31140 }, { "epoch": 17.397206703910616, "grad_norm": 0.5040502548217773, "learning_rate": 0.00013089635854341737, "loss": 0.4086, "step": 31141 }, { "epoch": 17.397765363128492, "grad_norm": 1.0589455366134644, "learning_rate": 0.00013086834733893558, "loss": 0.2532, "step": 31142 }, { "epoch": 17.39832402234637, "grad_norm": 0.3961639106273651, "learning_rate": 0.00013084033613445378, "loss": 0.355, "step": 31143 }, { "epoch": 17.398882681564245, "grad_norm": 0.6299583911895752, "learning_rate": 0.000130812324929972, "loss": 0.4084, "step": 31144 }, { "epoch": 17.399441340782122, "grad_norm": 0.3586007058620453, "learning_rate": 0.00013078431372549022, "loss": 0.3633, "step": 31145 }, { "epoch": 17.4, "grad_norm": 0.3738737404346466, "learning_rate": 0.0001307563025210084, "loss": 0.3777, "step": 31146 }, { "epoch": 17.40055865921788, "grad_norm": 1.038413643836975, "learning_rate": 0.0001307282913165266, "loss": 0.5113, "step": 31147 }, { "epoch": 17.401117318435755, "grad_norm": 0.5648976564407349, "learning_rate": 0.0001307002801120448, "loss": 0.57, "step": 31148 }, { "epoch": 17.401675977653632, "grad_norm": 0.465968519449234, "learning_rate": 0.00013067226890756304, "loss": 0.3386, "step": 31149 }, { "epoch": 17.40223463687151, "grad_norm": 0.5013076663017273, "learning_rate": 0.00013064425770308125, "loss": 0.5146, "step": 31150 }, { "epoch": 17.402793296089385, "grad_norm": 0.869238555431366, "learning_rate": 0.00013061624649859943, "loss": 0.4005, "step": 31151 }, { "epoch": 17.40335195530726, "grad_norm": 0.41563844680786133, "learning_rate": 0.00013058823529411764, "loss": 0.4106, "step": 31152 }, { "epoch": 17.403910614525138, "grad_norm": 0.4195695221424103, "learning_rate": 0.00013056022408963587, "loss": 0.3817, "step": 31153 }, { "epoch": 17.404469273743018, "grad_norm": 0.37012434005737305, "learning_rate": 0.00013053221288515407, "loss": 0.3547, "step": 31154 }, { "epoch": 17.405027932960895, "grad_norm": 0.34807100892066956, "learning_rate": 0.00013050420168067228, "loss": 0.4019, "step": 31155 }, { "epoch": 17.40558659217877, "grad_norm": 0.7357035279273987, "learning_rate": 0.00013047619047619046, "loss": 0.444, "step": 31156 }, { "epoch": 17.406145251396648, "grad_norm": 0.6481372117996216, "learning_rate": 0.0001304481792717087, "loss": 0.4614, "step": 31157 }, { "epoch": 17.406703910614524, "grad_norm": 0.9928131699562073, "learning_rate": 0.0001304201680672269, "loss": 0.3733, "step": 31158 }, { "epoch": 17.4072625698324, "grad_norm": 0.37836578488349915, "learning_rate": 0.0001303921568627451, "loss": 0.3426, "step": 31159 }, { "epoch": 17.407821229050278, "grad_norm": 0.33758580684661865, "learning_rate": 0.00013036414565826328, "loss": 0.3914, "step": 31160 }, { "epoch": 17.408379888268158, "grad_norm": 0.39689308404922485, "learning_rate": 0.00013033613445378152, "loss": 0.3583, "step": 31161 }, { "epoch": 17.408938547486034, "grad_norm": 0.5006991028785706, "learning_rate": 0.00013030812324929972, "loss": 0.4154, "step": 31162 }, { "epoch": 17.40949720670391, "grad_norm": 0.5362796783447266, "learning_rate": 0.00013028011204481793, "loss": 0.409, "step": 31163 }, { "epoch": 17.410055865921787, "grad_norm": 0.39778801798820496, "learning_rate": 0.00013025210084033613, "loss": 0.4054, "step": 31164 }, { "epoch": 17.410614525139664, "grad_norm": 0.3496702015399933, "learning_rate": 0.00013022408963585434, "loss": 0.3646, "step": 31165 }, { "epoch": 17.41117318435754, "grad_norm": 0.3237707316875458, "learning_rate": 0.00013019607843137255, "loss": 0.3432, "step": 31166 }, { "epoch": 17.41173184357542, "grad_norm": 0.39388328790664673, "learning_rate": 0.00013016806722689075, "loss": 0.4349, "step": 31167 }, { "epoch": 17.412290502793297, "grad_norm": 0.5242244601249695, "learning_rate": 0.00013014005602240896, "loss": 0.369, "step": 31168 }, { "epoch": 17.412849162011174, "grad_norm": 0.42856332659721375, "learning_rate": 0.0001301120448179272, "loss": 0.4278, "step": 31169 }, { "epoch": 17.41340782122905, "grad_norm": 2.6800715923309326, "learning_rate": 0.00013008403361344537, "loss": 0.4979, "step": 31170 }, { "epoch": 17.413966480446927, "grad_norm": 0.3910575807094574, "learning_rate": 0.00013005602240896358, "loss": 0.4928, "step": 31171 }, { "epoch": 17.414525139664804, "grad_norm": 0.31625622510910034, "learning_rate": 0.00013002801120448178, "loss": 0.2782, "step": 31172 }, { "epoch": 17.41508379888268, "grad_norm": 0.38408100605010986, "learning_rate": 0.00013000000000000002, "loss": 0.4502, "step": 31173 }, { "epoch": 17.41564245810056, "grad_norm": 0.38111215829849243, "learning_rate": 0.00012997198879551822, "loss": 0.3911, "step": 31174 }, { "epoch": 17.416201117318437, "grad_norm": 0.9490625262260437, "learning_rate": 0.0001299439775910364, "loss": 0.4187, "step": 31175 }, { "epoch": 17.416759776536313, "grad_norm": 0.3859139680862427, "learning_rate": 0.0001299159663865546, "loss": 0.4071, "step": 31176 }, { "epoch": 17.41731843575419, "grad_norm": 0.5537550449371338, "learning_rate": 0.00012988795518207284, "loss": 0.5584, "step": 31177 }, { "epoch": 17.417877094972066, "grad_norm": 0.4984540045261383, "learning_rate": 0.00012985994397759105, "loss": 0.5001, "step": 31178 }, { "epoch": 17.418435754189943, "grad_norm": 0.445202499628067, "learning_rate": 0.00012983193277310925, "loss": 0.3912, "step": 31179 }, { "epoch": 17.41899441340782, "grad_norm": 0.42921656370162964, "learning_rate": 0.00012980392156862743, "loss": 0.5417, "step": 31180 }, { "epoch": 17.4195530726257, "grad_norm": 0.47433730959892273, "learning_rate": 0.00012977591036414566, "loss": 0.4553, "step": 31181 }, { "epoch": 17.420111731843576, "grad_norm": 0.3896671533584595, "learning_rate": 0.00012974789915966387, "loss": 0.3289, "step": 31182 }, { "epoch": 17.420670391061453, "grad_norm": 1.5151444673538208, "learning_rate": 0.00012971988795518208, "loss": 0.4431, "step": 31183 }, { "epoch": 17.42122905027933, "grad_norm": 0.3981206715106964, "learning_rate": 0.00012969187675070028, "loss": 0.3853, "step": 31184 }, { "epoch": 17.421787709497206, "grad_norm": 0.5477587580680847, "learning_rate": 0.0001296638655462185, "loss": 0.4347, "step": 31185 }, { "epoch": 17.422346368715083, "grad_norm": 0.6910151243209839, "learning_rate": 0.0001296358543417367, "loss": 0.3903, "step": 31186 }, { "epoch": 17.422905027932963, "grad_norm": 2.355640172958374, "learning_rate": 0.0001296078431372549, "loss": 0.3713, "step": 31187 }, { "epoch": 17.42346368715084, "grad_norm": 0.42415162920951843, "learning_rate": 0.0001295798319327731, "loss": 0.3866, "step": 31188 }, { "epoch": 17.424022346368716, "grad_norm": 0.4407443404197693, "learning_rate": 0.00012955182072829134, "loss": 0.2818, "step": 31189 }, { "epoch": 17.424581005586592, "grad_norm": 0.49550893902778625, "learning_rate": 0.00012952380952380952, "loss": 0.4505, "step": 31190 }, { "epoch": 17.42513966480447, "grad_norm": 0.5797774791717529, "learning_rate": 0.00012949579831932772, "loss": 0.3807, "step": 31191 }, { "epoch": 17.425698324022346, "grad_norm": 0.4401281774044037, "learning_rate": 0.00012946778711484593, "loss": 0.3979, "step": 31192 }, { "epoch": 17.426256983240222, "grad_norm": 0.6628199219703674, "learning_rate": 0.00012943977591036416, "loss": 0.4008, "step": 31193 }, { "epoch": 17.426815642458102, "grad_norm": 1.6024513244628906, "learning_rate": 0.00012941176470588237, "loss": 0.3079, "step": 31194 }, { "epoch": 17.42737430167598, "grad_norm": 0.34817370772361755, "learning_rate": 0.00012938375350140055, "loss": 0.3773, "step": 31195 }, { "epoch": 17.427932960893855, "grad_norm": 0.4814103841781616, "learning_rate": 0.00012935574229691875, "loss": 0.4857, "step": 31196 }, { "epoch": 17.428491620111732, "grad_norm": 0.3819580376148224, "learning_rate": 0.000129327731092437, "loss": 0.3314, "step": 31197 }, { "epoch": 17.42905027932961, "grad_norm": 0.3839380741119385, "learning_rate": 0.0001292997198879552, "loss": 0.3444, "step": 31198 }, { "epoch": 17.429608938547485, "grad_norm": 0.4543190598487854, "learning_rate": 0.0001292717086834734, "loss": 0.3227, "step": 31199 }, { "epoch": 17.43016759776536, "grad_norm": 0.6105818152427673, "learning_rate": 0.00012924369747899158, "loss": 0.3553, "step": 31200 }, { "epoch": 17.43072625698324, "grad_norm": 0.4673517048358917, "learning_rate": 0.0001292156862745098, "loss": 0.3473, "step": 31201 }, { "epoch": 17.43128491620112, "grad_norm": 0.3725651204586029, "learning_rate": 0.00012918767507002802, "loss": 0.4416, "step": 31202 }, { "epoch": 17.431843575418995, "grad_norm": 0.4732637405395508, "learning_rate": 0.00012915966386554622, "loss": 0.4132, "step": 31203 }, { "epoch": 17.43240223463687, "grad_norm": 1.4968427419662476, "learning_rate": 0.00012913165266106443, "loss": 0.3913, "step": 31204 }, { "epoch": 17.432960893854748, "grad_norm": 2.410842180252075, "learning_rate": 0.00012910364145658263, "loss": 0.4388, "step": 31205 }, { "epoch": 17.433519553072625, "grad_norm": 0.38205739855766296, "learning_rate": 0.00012907563025210084, "loss": 0.3819, "step": 31206 }, { "epoch": 17.4340782122905, "grad_norm": 0.44525229930877686, "learning_rate": 0.00012904761904761905, "loss": 0.355, "step": 31207 }, { "epoch": 17.43463687150838, "grad_norm": 0.33054521679878235, "learning_rate": 0.00012901960784313725, "loss": 0.3039, "step": 31208 }, { "epoch": 17.435195530726258, "grad_norm": 0.3912530541419983, "learning_rate": 0.00012899159663865549, "loss": 0.4294, "step": 31209 }, { "epoch": 17.435754189944134, "grad_norm": 1.9333300590515137, "learning_rate": 0.00012896358543417366, "loss": 0.3906, "step": 31210 }, { "epoch": 17.43631284916201, "grad_norm": 0.3383536636829376, "learning_rate": 0.00012893557422969187, "loss": 0.3803, "step": 31211 }, { "epoch": 17.436871508379888, "grad_norm": 0.33888325095176697, "learning_rate": 0.00012890756302521008, "loss": 0.3367, "step": 31212 }, { "epoch": 17.437430167597764, "grad_norm": 0.44426989555358887, "learning_rate": 0.0001288795518207283, "loss": 0.3968, "step": 31213 }, { "epoch": 17.43798882681564, "grad_norm": 0.4917047917842865, "learning_rate": 0.0001288515406162465, "loss": 0.3472, "step": 31214 }, { "epoch": 17.43854748603352, "grad_norm": 0.42818641662597656, "learning_rate": 0.0001288235294117647, "loss": 0.442, "step": 31215 }, { "epoch": 17.439106145251397, "grad_norm": 0.48286232352256775, "learning_rate": 0.0001287955182072829, "loss": 0.4061, "step": 31216 }, { "epoch": 17.439664804469274, "grad_norm": 0.3871276080608368, "learning_rate": 0.00012876750700280113, "loss": 0.4856, "step": 31217 }, { "epoch": 17.44022346368715, "grad_norm": 0.5358783602714539, "learning_rate": 0.00012873949579831934, "loss": 0.3334, "step": 31218 }, { "epoch": 17.440782122905027, "grad_norm": 0.31744393706321716, "learning_rate": 0.00012871148459383752, "loss": 0.3663, "step": 31219 }, { "epoch": 17.441340782122904, "grad_norm": 0.3956749737262726, "learning_rate": 0.00012868347338935572, "loss": 0.369, "step": 31220 }, { "epoch": 17.441899441340784, "grad_norm": 0.44227921962738037, "learning_rate": 0.00012865546218487396, "loss": 0.5505, "step": 31221 }, { "epoch": 17.44245810055866, "grad_norm": 0.5183186531066895, "learning_rate": 0.00012862745098039216, "loss": 0.5041, "step": 31222 }, { "epoch": 17.443016759776537, "grad_norm": 0.7157148718833923, "learning_rate": 0.00012859943977591037, "loss": 0.391, "step": 31223 }, { "epoch": 17.443575418994413, "grad_norm": 1.0879318714141846, "learning_rate": 0.00012857142857142855, "loss": 0.3555, "step": 31224 }, { "epoch": 17.44413407821229, "grad_norm": 2.4511914253234863, "learning_rate": 0.00012854341736694678, "loss": 0.367, "step": 31225 }, { "epoch": 17.444692737430167, "grad_norm": 0.3168339729309082, "learning_rate": 0.000128515406162465, "loss": 0.3696, "step": 31226 }, { "epoch": 17.445251396648043, "grad_norm": 0.40689313411712646, "learning_rate": 0.0001284873949579832, "loss": 0.3725, "step": 31227 }, { "epoch": 17.445810055865923, "grad_norm": 0.33695682883262634, "learning_rate": 0.0001284593837535014, "loss": 0.3602, "step": 31228 }, { "epoch": 17.4463687150838, "grad_norm": 0.4141903817653656, "learning_rate": 0.0001284313725490196, "loss": 0.3079, "step": 31229 }, { "epoch": 17.446927374301676, "grad_norm": 0.6350756287574768, "learning_rate": 0.0001284033613445378, "loss": 0.4451, "step": 31230 }, { "epoch": 17.447486033519553, "grad_norm": 0.3371887505054474, "learning_rate": 0.00012837535014005602, "loss": 0.3402, "step": 31231 }, { "epoch": 17.44804469273743, "grad_norm": 0.5235417485237122, "learning_rate": 0.00012834733893557422, "loss": 0.4089, "step": 31232 }, { "epoch": 17.448603351955306, "grad_norm": 0.45878708362579346, "learning_rate": 0.00012831932773109246, "loss": 0.4281, "step": 31233 }, { "epoch": 17.449162011173183, "grad_norm": 0.42058709263801575, "learning_rate": 0.00012829131652661064, "loss": 0.3968, "step": 31234 }, { "epoch": 17.449720670391063, "grad_norm": 0.5157774686813354, "learning_rate": 0.00012826330532212884, "loss": 0.3863, "step": 31235 }, { "epoch": 17.45027932960894, "grad_norm": 1.095288634300232, "learning_rate": 0.00012823529411764705, "loss": 0.4447, "step": 31236 }, { "epoch": 17.450837988826816, "grad_norm": 0.5966135263442993, "learning_rate": 0.00012820728291316528, "loss": 0.5351, "step": 31237 }, { "epoch": 17.451396648044692, "grad_norm": 2.243619441986084, "learning_rate": 0.0001281792717086835, "loss": 0.4642, "step": 31238 }, { "epoch": 17.45195530726257, "grad_norm": 0.41884922981262207, "learning_rate": 0.00012815126050420167, "loss": 0.4461, "step": 31239 }, { "epoch": 17.452513966480446, "grad_norm": 0.6158894300460815, "learning_rate": 0.00012812324929971987, "loss": 0.4425, "step": 31240 }, { "epoch": 17.453072625698326, "grad_norm": 0.564399242401123, "learning_rate": 0.0001280952380952381, "loss": 0.45, "step": 31241 }, { "epoch": 17.453631284916202, "grad_norm": 0.740170419216156, "learning_rate": 0.0001280672268907563, "loss": 0.395, "step": 31242 }, { "epoch": 17.45418994413408, "grad_norm": 0.41129449009895325, "learning_rate": 0.00012803921568627452, "loss": 0.2984, "step": 31243 }, { "epoch": 17.454748603351955, "grad_norm": 0.3733203411102295, "learning_rate": 0.0001280112044817927, "loss": 0.337, "step": 31244 }, { "epoch": 17.455307262569832, "grad_norm": 0.5790176391601562, "learning_rate": 0.00012798319327731093, "loss": 0.3524, "step": 31245 }, { "epoch": 17.45586592178771, "grad_norm": 0.546937882900238, "learning_rate": 0.00012795518207282913, "loss": 0.4131, "step": 31246 }, { "epoch": 17.456424581005585, "grad_norm": 0.3751690685749054, "learning_rate": 0.00012792717086834734, "loss": 0.3928, "step": 31247 }, { "epoch": 17.456983240223465, "grad_norm": 0.7142043709754944, "learning_rate": 0.00012789915966386557, "loss": 0.4499, "step": 31248 }, { "epoch": 17.457541899441342, "grad_norm": 0.36646780371665955, "learning_rate": 0.00012787114845938375, "loss": 0.3072, "step": 31249 }, { "epoch": 17.45810055865922, "grad_norm": 0.8351800441741943, "learning_rate": 0.00012784313725490196, "loss": 0.4419, "step": 31250 }, { "epoch": 17.458659217877095, "grad_norm": 0.4552780091762543, "learning_rate": 0.00012781512605042016, "loss": 0.5186, "step": 31251 }, { "epoch": 17.45921787709497, "grad_norm": 0.4794028699398041, "learning_rate": 0.0001277871148459384, "loss": 0.367, "step": 31252 }, { "epoch": 17.459776536312848, "grad_norm": 1.0033352375030518, "learning_rate": 0.0001277591036414566, "loss": 0.3567, "step": 31253 }, { "epoch": 17.460335195530725, "grad_norm": 0.9024407863616943, "learning_rate": 0.00012773109243697478, "loss": 0.5256, "step": 31254 }, { "epoch": 17.460893854748605, "grad_norm": 0.5599893927574158, "learning_rate": 0.000127703081232493, "loss": 0.4971, "step": 31255 }, { "epoch": 17.46145251396648, "grad_norm": 0.37672266364097595, "learning_rate": 0.00012767507002801122, "loss": 0.394, "step": 31256 }, { "epoch": 17.462011173184358, "grad_norm": 0.5927656888961792, "learning_rate": 0.00012764705882352943, "loss": 0.5308, "step": 31257 }, { "epoch": 17.462569832402234, "grad_norm": 0.8347872495651245, "learning_rate": 0.00012761904761904763, "loss": 0.3615, "step": 31258 }, { "epoch": 17.46312849162011, "grad_norm": 0.4515547454357147, "learning_rate": 0.0001275910364145658, "loss": 0.4665, "step": 31259 }, { "epoch": 17.463687150837988, "grad_norm": 1.0949366092681885, "learning_rate": 0.00012756302521008405, "loss": 0.457, "step": 31260 }, { "epoch": 17.464245810055864, "grad_norm": 0.5063674449920654, "learning_rate": 0.00012753501400560225, "loss": 0.5971, "step": 31261 }, { "epoch": 17.464804469273744, "grad_norm": 0.3996122181415558, "learning_rate": 0.00012750700280112046, "loss": 0.3813, "step": 31262 }, { "epoch": 17.46536312849162, "grad_norm": 0.4895181655883789, "learning_rate": 0.00012747899159663866, "loss": 0.4412, "step": 31263 }, { "epoch": 17.465921787709497, "grad_norm": 0.679352343082428, "learning_rate": 0.00012745098039215687, "loss": 0.456, "step": 31264 }, { "epoch": 17.466480446927374, "grad_norm": 0.5667781233787537, "learning_rate": 0.00012742296918767508, "loss": 0.4587, "step": 31265 }, { "epoch": 17.46703910614525, "grad_norm": 0.3791618347167969, "learning_rate": 0.00012739495798319328, "loss": 0.3619, "step": 31266 }, { "epoch": 17.467597765363127, "grad_norm": 0.4643804132938385, "learning_rate": 0.0001273669467787115, "loss": 0.3956, "step": 31267 }, { "epoch": 17.468156424581007, "grad_norm": 0.6760391592979431, "learning_rate": 0.0001273389355742297, "loss": 0.3493, "step": 31268 }, { "epoch": 17.468715083798884, "grad_norm": 0.4165029525756836, "learning_rate": 0.0001273109243697479, "loss": 0.4396, "step": 31269 }, { "epoch": 17.46927374301676, "grad_norm": 0.4410113990306854, "learning_rate": 0.0001272829131652661, "loss": 0.4114, "step": 31270 }, { "epoch": 17.469832402234637, "grad_norm": 0.3582690954208374, "learning_rate": 0.0001272549019607843, "loss": 0.4026, "step": 31271 }, { "epoch": 17.470391061452514, "grad_norm": 0.5017548203468323, "learning_rate": 0.00012722689075630254, "loss": 0.4254, "step": 31272 }, { "epoch": 17.47094972067039, "grad_norm": 0.3959094285964966, "learning_rate": 0.00012719887955182072, "loss": 0.3526, "step": 31273 }, { "epoch": 17.471508379888267, "grad_norm": 0.41697320342063904, "learning_rate": 0.00012717086834733893, "loss": 0.4482, "step": 31274 }, { "epoch": 17.472067039106147, "grad_norm": 0.6840927004814148, "learning_rate": 0.00012714285714285714, "loss": 0.4384, "step": 31275 }, { "epoch": 17.472625698324023, "grad_norm": 0.33184123039245605, "learning_rate": 0.00012711484593837537, "loss": 0.2965, "step": 31276 }, { "epoch": 17.4731843575419, "grad_norm": 2.997969150543213, "learning_rate": 0.00012708683473389357, "loss": 0.3998, "step": 31277 }, { "epoch": 17.473743016759776, "grad_norm": 0.5682587027549744, "learning_rate": 0.00012705882352941175, "loss": 0.4387, "step": 31278 }, { "epoch": 17.474301675977653, "grad_norm": 0.47491955757141113, "learning_rate": 0.00012703081232492996, "loss": 0.5778, "step": 31279 }, { "epoch": 17.47486033519553, "grad_norm": 0.4297771751880646, "learning_rate": 0.0001270028011204482, "loss": 0.3574, "step": 31280 }, { "epoch": 17.475418994413406, "grad_norm": 2.0499913692474365, "learning_rate": 0.0001269747899159664, "loss": 0.3042, "step": 31281 }, { "epoch": 17.475977653631286, "grad_norm": 0.48910534381866455, "learning_rate": 0.0001269467787114846, "loss": 0.4409, "step": 31282 }, { "epoch": 17.476536312849163, "grad_norm": 0.5102927684783936, "learning_rate": 0.00012691876750700278, "loss": 0.4225, "step": 31283 }, { "epoch": 17.47709497206704, "grad_norm": 0.3281363546848297, "learning_rate": 0.00012689075630252102, "loss": 0.3329, "step": 31284 }, { "epoch": 17.477653631284916, "grad_norm": 0.5468024015426636, "learning_rate": 0.00012686274509803922, "loss": 0.4837, "step": 31285 }, { "epoch": 17.478212290502793, "grad_norm": 0.7422975301742554, "learning_rate": 0.00012683473389355743, "loss": 0.4877, "step": 31286 }, { "epoch": 17.47877094972067, "grad_norm": 0.3469271659851074, "learning_rate": 0.00012680672268907563, "loss": 0.3656, "step": 31287 }, { "epoch": 17.47932960893855, "grad_norm": 0.6495400667190552, "learning_rate": 0.00012677871148459384, "loss": 0.4163, "step": 31288 }, { "epoch": 17.479888268156426, "grad_norm": 0.5434854626655579, "learning_rate": 0.00012675070028011205, "loss": 0.3445, "step": 31289 }, { "epoch": 17.480446927374302, "grad_norm": 3.2187349796295166, "learning_rate": 0.00012672268907563025, "loss": 0.354, "step": 31290 }, { "epoch": 17.48100558659218, "grad_norm": 0.8823065757751465, "learning_rate": 0.00012669467787114846, "loss": 0.4252, "step": 31291 }, { "epoch": 17.481564245810056, "grad_norm": 0.49800223112106323, "learning_rate": 0.0001266666666666667, "loss": 0.2975, "step": 31292 }, { "epoch": 17.482122905027932, "grad_norm": 0.4639713168144226, "learning_rate": 0.00012663865546218487, "loss": 0.4643, "step": 31293 }, { "epoch": 17.48268156424581, "grad_norm": 0.4206647574901581, "learning_rate": 0.00012661064425770308, "loss": 0.3625, "step": 31294 }, { "epoch": 17.48324022346369, "grad_norm": 0.448525995016098, "learning_rate": 0.00012658263305322128, "loss": 0.2995, "step": 31295 }, { "epoch": 17.483798882681565, "grad_norm": 0.391122967004776, "learning_rate": 0.00012655462184873952, "loss": 0.4144, "step": 31296 }, { "epoch": 17.484357541899442, "grad_norm": 1.5043803453445435, "learning_rate": 0.00012652661064425772, "loss": 0.4243, "step": 31297 }, { "epoch": 17.48491620111732, "grad_norm": 0.3991751968860626, "learning_rate": 0.0001264985994397759, "loss": 0.4054, "step": 31298 }, { "epoch": 17.485474860335195, "grad_norm": 0.5427753925323486, "learning_rate": 0.0001264705882352941, "loss": 0.5228, "step": 31299 }, { "epoch": 17.48603351955307, "grad_norm": 0.3899679481983185, "learning_rate": 0.00012644257703081234, "loss": 0.3706, "step": 31300 }, { "epoch": 17.486592178770948, "grad_norm": 3.9391305446624756, "learning_rate": 0.00012641456582633055, "loss": 0.3294, "step": 31301 }, { "epoch": 17.48715083798883, "grad_norm": 0.4592090845108032, "learning_rate": 0.00012638655462184875, "loss": 0.3922, "step": 31302 }, { "epoch": 17.487709497206705, "grad_norm": 0.5964315533638, "learning_rate": 0.00012635854341736693, "loss": 0.4505, "step": 31303 }, { "epoch": 17.48826815642458, "grad_norm": 0.5506134629249573, "learning_rate": 0.00012633053221288516, "loss": 0.448, "step": 31304 }, { "epoch": 17.488826815642458, "grad_norm": 0.6865684390068054, "learning_rate": 0.00012630252100840337, "loss": 0.5109, "step": 31305 }, { "epoch": 17.489385474860335, "grad_norm": 0.4935726523399353, "learning_rate": 0.00012627450980392158, "loss": 0.4008, "step": 31306 }, { "epoch": 17.48994413407821, "grad_norm": 6.239386558532715, "learning_rate": 0.00012624649859943978, "loss": 0.4636, "step": 31307 }, { "epoch": 17.490502793296088, "grad_norm": 1.9036096334457397, "learning_rate": 0.000126218487394958, "loss": 0.3467, "step": 31308 }, { "epoch": 17.491061452513968, "grad_norm": 1.24658203125, "learning_rate": 0.0001261904761904762, "loss": 0.4893, "step": 31309 }, { "epoch": 17.491620111731844, "grad_norm": 0.615731954574585, "learning_rate": 0.0001261624649859944, "loss": 0.4222, "step": 31310 }, { "epoch": 17.49217877094972, "grad_norm": 0.42554715275764465, "learning_rate": 0.0001261344537815126, "loss": 0.6147, "step": 31311 }, { "epoch": 17.492737430167598, "grad_norm": 0.6205093264579773, "learning_rate": 0.00012610644257703084, "loss": 0.3121, "step": 31312 }, { "epoch": 17.493296089385474, "grad_norm": 0.6284263730049133, "learning_rate": 0.00012607843137254902, "loss": 0.4786, "step": 31313 }, { "epoch": 17.49385474860335, "grad_norm": 1.0057072639465332, "learning_rate": 0.00012605042016806722, "loss": 0.5219, "step": 31314 }, { "epoch": 17.49441340782123, "grad_norm": 0.4818427860736847, "learning_rate": 0.00012602240896358543, "loss": 0.5274, "step": 31315 }, { "epoch": 17.494972067039107, "grad_norm": 0.7015307545661926, "learning_rate": 0.00012599439775910366, "loss": 0.4136, "step": 31316 }, { "epoch": 17.495530726256984, "grad_norm": 1.4937360286712646, "learning_rate": 0.00012596638655462187, "loss": 0.4036, "step": 31317 }, { "epoch": 17.49608938547486, "grad_norm": 0.9747006297111511, "learning_rate": 0.00012593837535014005, "loss": 0.3422, "step": 31318 }, { "epoch": 17.496648044692737, "grad_norm": 0.3876058757305145, "learning_rate": 0.00012591036414565825, "loss": 0.3966, "step": 31319 }, { "epoch": 17.497206703910614, "grad_norm": 0.48371076583862305, "learning_rate": 0.0001258823529411765, "loss": 0.3915, "step": 31320 }, { "epoch": 17.49776536312849, "grad_norm": 0.3128376007080078, "learning_rate": 0.0001258543417366947, "loss": 0.3079, "step": 31321 }, { "epoch": 17.49832402234637, "grad_norm": 0.4322524964809418, "learning_rate": 0.0001258263305322129, "loss": 0.3691, "step": 31322 }, { "epoch": 17.498882681564247, "grad_norm": 0.45457175374031067, "learning_rate": 0.00012579831932773108, "loss": 0.4365, "step": 31323 }, { "epoch": 17.499441340782123, "grad_norm": 0.5468364953994751, "learning_rate": 0.0001257703081232493, "loss": 0.3698, "step": 31324 }, { "epoch": 17.5, "grad_norm": 0.9551847577095032, "learning_rate": 0.00012574229691876752, "loss": 0.3923, "step": 31325 }, { "epoch": 17.500558659217877, "grad_norm": 0.3207535445690155, "learning_rate": 0.00012571428571428572, "loss": 0.318, "step": 31326 }, { "epoch": 17.501117318435753, "grad_norm": 0.8012207746505737, "learning_rate": 0.0001256862745098039, "loss": 0.466, "step": 31327 }, { "epoch": 17.50167597765363, "grad_norm": 0.42065680027008057, "learning_rate": 0.00012565826330532213, "loss": 0.3795, "step": 31328 }, { "epoch": 17.50223463687151, "grad_norm": 0.45595505833625793, "learning_rate": 0.00012563025210084034, "loss": 0.465, "step": 31329 }, { "epoch": 17.502793296089386, "grad_norm": 0.4388836622238159, "learning_rate": 0.00012560224089635855, "loss": 0.4349, "step": 31330 }, { "epoch": 17.503351955307263, "grad_norm": 0.3341262936592102, "learning_rate": 0.00012557422969187675, "loss": 0.4089, "step": 31331 }, { "epoch": 17.50391061452514, "grad_norm": 0.48142194747924805, "learning_rate": 0.00012554621848739496, "loss": 0.4203, "step": 31332 }, { "epoch": 17.504469273743016, "grad_norm": 0.4287753999233246, "learning_rate": 0.00012551820728291316, "loss": 0.406, "step": 31333 }, { "epoch": 17.505027932960893, "grad_norm": 0.382752925157547, "learning_rate": 0.00012549019607843137, "loss": 0.3868, "step": 31334 }, { "epoch": 17.505586592178773, "grad_norm": 0.4057750403881073, "learning_rate": 0.00012546218487394958, "loss": 0.4076, "step": 31335 }, { "epoch": 17.50614525139665, "grad_norm": 0.4480024576187134, "learning_rate": 0.0001254341736694678, "loss": 0.4189, "step": 31336 }, { "epoch": 17.506703910614526, "grad_norm": 0.42614173889160156, "learning_rate": 0.000125406162464986, "loss": 0.399, "step": 31337 }, { "epoch": 17.507262569832402, "grad_norm": 0.8793990612030029, "learning_rate": 0.0001253781512605042, "loss": 0.4118, "step": 31338 }, { "epoch": 17.50782122905028, "grad_norm": 0.7743203043937683, "learning_rate": 0.0001253501400560224, "loss": 0.3745, "step": 31339 }, { "epoch": 17.508379888268156, "grad_norm": 0.3688618838787079, "learning_rate": 0.00012532212885154063, "loss": 0.4876, "step": 31340 }, { "epoch": 17.508938547486032, "grad_norm": 0.5979149341583252, "learning_rate": 0.00012529411764705884, "loss": 0.4168, "step": 31341 }, { "epoch": 17.509497206703912, "grad_norm": 0.5943663120269775, "learning_rate": 0.00012526610644257702, "loss": 0.3421, "step": 31342 }, { "epoch": 17.51005586592179, "grad_norm": 4.5099992752075195, "learning_rate": 0.00012523809523809522, "loss": 0.4085, "step": 31343 }, { "epoch": 17.510614525139665, "grad_norm": 0.9150093793869019, "learning_rate": 0.00012521008403361346, "loss": 0.4656, "step": 31344 }, { "epoch": 17.511173184357542, "grad_norm": 0.43181443214416504, "learning_rate": 0.00012518207282913166, "loss": 0.5019, "step": 31345 }, { "epoch": 17.51173184357542, "grad_norm": 0.6618065237998962, "learning_rate": 0.00012515406162464987, "loss": 0.6839, "step": 31346 }, { "epoch": 17.512290502793295, "grad_norm": 0.9445618391036987, "learning_rate": 0.00012512605042016805, "loss": 0.5031, "step": 31347 }, { "epoch": 17.51284916201117, "grad_norm": 0.5031107068061829, "learning_rate": 0.00012509803921568628, "loss": 0.5551, "step": 31348 }, { "epoch": 17.513407821229052, "grad_norm": 0.391179621219635, "learning_rate": 0.0001250700280112045, "loss": 0.4311, "step": 31349 }, { "epoch": 17.51396648044693, "grad_norm": 1.2921807765960693, "learning_rate": 0.0001250420168067227, "loss": 0.4065, "step": 31350 }, { "epoch": 17.514525139664805, "grad_norm": 1.0348345041275024, "learning_rate": 0.0001250140056022409, "loss": 0.3986, "step": 31351 }, { "epoch": 17.51508379888268, "grad_norm": 0.5795475244522095, "learning_rate": 0.0001249859943977591, "loss": 0.4414, "step": 31352 }, { "epoch": 17.515642458100558, "grad_norm": 0.4601471424102783, "learning_rate": 0.0001249579831932773, "loss": 0.4582, "step": 31353 }, { "epoch": 17.516201117318435, "grad_norm": 2.1328625679016113, "learning_rate": 0.00012492997198879552, "loss": 0.3725, "step": 31354 }, { "epoch": 17.51675977653631, "grad_norm": 0.5001599192619324, "learning_rate": 0.00012490196078431372, "loss": 0.438, "step": 31355 }, { "epoch": 17.51731843575419, "grad_norm": 0.5486342310905457, "learning_rate": 0.00012487394957983193, "loss": 0.3573, "step": 31356 }, { "epoch": 17.517877094972068, "grad_norm": 0.462859570980072, "learning_rate": 0.00012484593837535014, "loss": 0.4112, "step": 31357 }, { "epoch": 17.518435754189944, "grad_norm": 0.6502798795700073, "learning_rate": 0.00012481792717086834, "loss": 0.4724, "step": 31358 }, { "epoch": 17.51899441340782, "grad_norm": 0.5650374293327332, "learning_rate": 0.00012478991596638655, "loss": 0.4736, "step": 31359 }, { "epoch": 17.519553072625698, "grad_norm": 0.5809507369995117, "learning_rate": 0.00012476190476190478, "loss": 0.5454, "step": 31360 }, { "epoch": 17.520111731843574, "grad_norm": 0.5060024857521057, "learning_rate": 0.00012473389355742296, "loss": 0.4056, "step": 31361 }, { "epoch": 17.52067039106145, "grad_norm": 0.40012040734291077, "learning_rate": 0.0001247058823529412, "loss": 0.4042, "step": 31362 }, { "epoch": 17.52122905027933, "grad_norm": 0.4475395083427429, "learning_rate": 0.00012467787114845937, "loss": 0.324, "step": 31363 }, { "epoch": 17.521787709497207, "grad_norm": 0.6050678491592407, "learning_rate": 0.0001246498599439776, "loss": 0.4074, "step": 31364 }, { "epoch": 17.522346368715084, "grad_norm": 0.8675230145454407, "learning_rate": 0.0001246218487394958, "loss": 0.4499, "step": 31365 }, { "epoch": 17.52290502793296, "grad_norm": 0.39340218901634216, "learning_rate": 0.00012459383753501402, "loss": 0.4347, "step": 31366 }, { "epoch": 17.523463687150837, "grad_norm": 0.5411680340766907, "learning_rate": 0.00012456582633053222, "loss": 0.3305, "step": 31367 }, { "epoch": 17.524022346368714, "grad_norm": 1.6587975025177002, "learning_rate": 0.00012453781512605043, "loss": 0.3332, "step": 31368 }, { "epoch": 17.524581005586594, "grad_norm": 2.565991163253784, "learning_rate": 0.00012450980392156863, "loss": 0.4447, "step": 31369 }, { "epoch": 17.52513966480447, "grad_norm": 0.4227873980998993, "learning_rate": 0.00012448179271708684, "loss": 0.4126, "step": 31370 }, { "epoch": 17.525698324022347, "grad_norm": 0.45929771661758423, "learning_rate": 0.00012445378151260505, "loss": 0.3582, "step": 31371 }, { "epoch": 17.526256983240224, "grad_norm": 0.5435831546783447, "learning_rate": 0.00012442577030812325, "loss": 0.4984, "step": 31372 }, { "epoch": 17.5268156424581, "grad_norm": 0.41643592715263367, "learning_rate": 0.00012439775910364146, "loss": 0.4074, "step": 31373 }, { "epoch": 17.527374301675977, "grad_norm": 0.43090009689331055, "learning_rate": 0.00012436974789915966, "loss": 0.3958, "step": 31374 }, { "epoch": 17.527932960893853, "grad_norm": 0.9563839435577393, "learning_rate": 0.00012434173669467787, "loss": 0.3905, "step": 31375 }, { "epoch": 17.528491620111733, "grad_norm": 0.43215587735176086, "learning_rate": 0.00012431372549019608, "loss": 0.425, "step": 31376 }, { "epoch": 17.52905027932961, "grad_norm": 0.354337602853775, "learning_rate": 0.00012428571428571428, "loss": 0.4403, "step": 31377 }, { "epoch": 17.529608938547486, "grad_norm": 4.78610897064209, "learning_rate": 0.0001242577030812325, "loss": 0.3937, "step": 31378 }, { "epoch": 17.530167597765363, "grad_norm": 1.036870002746582, "learning_rate": 0.0001242296918767507, "loss": 0.3553, "step": 31379 }, { "epoch": 17.53072625698324, "grad_norm": 0.7375327348709106, "learning_rate": 0.0001242016806722689, "loss": 0.3827, "step": 31380 }, { "epoch": 17.531284916201116, "grad_norm": 0.33264487981796265, "learning_rate": 0.0001241736694677871, "loss": 0.3312, "step": 31381 }, { "epoch": 17.531843575418993, "grad_norm": 0.41684606671333313, "learning_rate": 0.00012414565826330534, "loss": 0.3981, "step": 31382 }, { "epoch": 17.532402234636873, "grad_norm": 0.3767816126346588, "learning_rate": 0.00012411764705882352, "loss": 0.412, "step": 31383 }, { "epoch": 17.53296089385475, "grad_norm": 0.40415525436401367, "learning_rate": 0.00012408963585434175, "loss": 0.3904, "step": 31384 }, { "epoch": 17.533519553072626, "grad_norm": 0.4591088593006134, "learning_rate": 0.00012406162464985993, "loss": 0.3354, "step": 31385 }, { "epoch": 17.534078212290503, "grad_norm": 0.41783255338668823, "learning_rate": 0.00012403361344537816, "loss": 0.5111, "step": 31386 }, { "epoch": 17.53463687150838, "grad_norm": 0.38515251874923706, "learning_rate": 0.00012400560224089637, "loss": 0.3786, "step": 31387 }, { "epoch": 17.535195530726256, "grad_norm": 0.3406234681606293, "learning_rate": 0.00012397759103641458, "loss": 0.3904, "step": 31388 }, { "epoch": 17.535754189944136, "grad_norm": 0.3470050096511841, "learning_rate": 0.00012394957983193278, "loss": 0.4298, "step": 31389 }, { "epoch": 17.536312849162012, "grad_norm": 0.4810034930706024, "learning_rate": 0.000123921568627451, "loss": 0.3771, "step": 31390 }, { "epoch": 17.53687150837989, "grad_norm": 0.5584728717803955, "learning_rate": 0.0001238935574229692, "loss": 0.3874, "step": 31391 }, { "epoch": 17.537430167597766, "grad_norm": 0.5674973726272583, "learning_rate": 0.0001238655462184874, "loss": 0.3095, "step": 31392 }, { "epoch": 17.537988826815642, "grad_norm": 0.4328550696372986, "learning_rate": 0.0001238375350140056, "loss": 0.3253, "step": 31393 }, { "epoch": 17.53854748603352, "grad_norm": 0.4130370318889618, "learning_rate": 0.0001238095238095238, "loss": 0.3788, "step": 31394 }, { "epoch": 17.539106145251395, "grad_norm": 0.4716345965862274, "learning_rate": 0.00012378151260504202, "loss": 0.4596, "step": 31395 }, { "epoch": 17.539664804469275, "grad_norm": 1.815059781074524, "learning_rate": 0.00012375350140056022, "loss": 0.4453, "step": 31396 }, { "epoch": 17.540223463687152, "grad_norm": 0.28682801127433777, "learning_rate": 0.00012372549019607843, "loss": 0.3091, "step": 31397 }, { "epoch": 17.54078212290503, "grad_norm": 0.4455854892730713, "learning_rate": 0.00012369747899159664, "loss": 0.3273, "step": 31398 }, { "epoch": 17.541340782122905, "grad_norm": 0.7888973355293274, "learning_rate": 0.00012366946778711484, "loss": 0.4247, "step": 31399 }, { "epoch": 17.54189944134078, "grad_norm": 0.543374240398407, "learning_rate": 0.00012364145658263305, "loss": 0.4549, "step": 31400 }, { "epoch": 17.542458100558658, "grad_norm": 0.7094858288764954, "learning_rate": 0.00012361344537815125, "loss": 0.388, "step": 31401 }, { "epoch": 17.543016759776535, "grad_norm": 0.4381362199783325, "learning_rate": 0.00012358543417366946, "loss": 0.3872, "step": 31402 }, { "epoch": 17.543575418994415, "grad_norm": 0.4672721028327942, "learning_rate": 0.00012355742296918767, "loss": 0.5157, "step": 31403 }, { "epoch": 17.54413407821229, "grad_norm": 0.6241531372070312, "learning_rate": 0.0001235294117647059, "loss": 0.4097, "step": 31404 }, { "epoch": 17.544692737430168, "grad_norm": 1.5492377281188965, "learning_rate": 0.00012350140056022408, "loss": 0.4101, "step": 31405 }, { "epoch": 17.545251396648045, "grad_norm": 0.3204233646392822, "learning_rate": 0.0001234733893557423, "loss": 0.3426, "step": 31406 }, { "epoch": 17.54581005586592, "grad_norm": 0.7466712594032288, "learning_rate": 0.0001234453781512605, "loss": 0.5604, "step": 31407 }, { "epoch": 17.546368715083798, "grad_norm": 0.673948347568512, "learning_rate": 0.00012341736694677872, "loss": 0.3366, "step": 31408 }, { "epoch": 17.546927374301674, "grad_norm": 0.3771499693393707, "learning_rate": 0.00012338935574229693, "loss": 0.3738, "step": 31409 }, { "epoch": 17.547486033519554, "grad_norm": 5.109767436981201, "learning_rate": 0.00012336134453781513, "loss": 0.415, "step": 31410 }, { "epoch": 17.54804469273743, "grad_norm": 0.4377894103527069, "learning_rate": 0.00012333333333333334, "loss": 0.4644, "step": 31411 }, { "epoch": 17.548603351955308, "grad_norm": 0.4229608178138733, "learning_rate": 0.00012330532212885155, "loss": 0.3884, "step": 31412 }, { "epoch": 17.549162011173184, "grad_norm": 0.43517762422561646, "learning_rate": 0.00012327731092436975, "loss": 0.384, "step": 31413 }, { "epoch": 17.54972067039106, "grad_norm": 0.4626273810863495, "learning_rate": 0.00012324929971988796, "loss": 0.3851, "step": 31414 }, { "epoch": 17.550279329608937, "grad_norm": 0.3298066258430481, "learning_rate": 0.00012322128851540616, "loss": 0.363, "step": 31415 }, { "epoch": 17.550837988826817, "grad_norm": 4.576408863067627, "learning_rate": 0.00012319327731092437, "loss": 0.4781, "step": 31416 }, { "epoch": 17.551396648044694, "grad_norm": 0.9707555174827576, "learning_rate": 0.00012316526610644258, "loss": 0.4127, "step": 31417 }, { "epoch": 17.55195530726257, "grad_norm": 0.34349972009658813, "learning_rate": 0.00012313725490196078, "loss": 0.3346, "step": 31418 }, { "epoch": 17.552513966480447, "grad_norm": 4.508553981781006, "learning_rate": 0.000123109243697479, "loss": 0.4511, "step": 31419 }, { "epoch": 17.553072625698324, "grad_norm": 0.66888028383255, "learning_rate": 0.0001230812324929972, "loss": 0.3887, "step": 31420 }, { "epoch": 17.5536312849162, "grad_norm": 0.3732622265815735, "learning_rate": 0.0001230532212885154, "loss": 0.3929, "step": 31421 }, { "epoch": 17.554189944134077, "grad_norm": 0.5522032976150513, "learning_rate": 0.0001230252100840336, "loss": 0.4275, "step": 31422 }, { "epoch": 17.554748603351957, "grad_norm": 0.43784770369529724, "learning_rate": 0.0001229971988795518, "loss": 0.4578, "step": 31423 }, { "epoch": 17.555307262569833, "grad_norm": 0.8571161031723022, "learning_rate": 0.00012296918767507002, "loss": 0.4625, "step": 31424 }, { "epoch": 17.55586592178771, "grad_norm": 0.3722347617149353, "learning_rate": 0.00012294117647058822, "loss": 0.3602, "step": 31425 }, { "epoch": 17.556424581005587, "grad_norm": 0.6145003437995911, "learning_rate": 0.00012291316526610646, "loss": 0.4427, "step": 31426 }, { "epoch": 17.556983240223463, "grad_norm": 2.3805315494537354, "learning_rate": 0.00012288515406162464, "loss": 0.446, "step": 31427 }, { "epoch": 17.55754189944134, "grad_norm": 0.40157830715179443, "learning_rate": 0.00012285714285714287, "loss": 0.3836, "step": 31428 }, { "epoch": 17.558100558659216, "grad_norm": 0.45903003215789795, "learning_rate": 0.00012282913165266105, "loss": 0.3689, "step": 31429 }, { "epoch": 17.558659217877096, "grad_norm": 0.7679571509361267, "learning_rate": 0.00012280112044817928, "loss": 0.2849, "step": 31430 }, { "epoch": 17.559217877094973, "grad_norm": 0.508935809135437, "learning_rate": 0.0001227731092436975, "loss": 0.3485, "step": 31431 }, { "epoch": 17.55977653631285, "grad_norm": 0.3794126808643341, "learning_rate": 0.0001227450980392157, "loss": 0.308, "step": 31432 }, { "epoch": 17.560335195530726, "grad_norm": 0.5495390892028809, "learning_rate": 0.0001227170868347339, "loss": 0.605, "step": 31433 }, { "epoch": 17.560893854748603, "grad_norm": 0.665381908416748, "learning_rate": 0.0001226890756302521, "loss": 0.5278, "step": 31434 }, { "epoch": 17.56145251396648, "grad_norm": 0.6173123121261597, "learning_rate": 0.0001226610644257703, "loss": 0.3187, "step": 31435 }, { "epoch": 17.56201117318436, "grad_norm": 3.169093608856201, "learning_rate": 0.00012263305322128852, "loss": 0.4349, "step": 31436 }, { "epoch": 17.562569832402236, "grad_norm": 0.4877188205718994, "learning_rate": 0.00012260504201680672, "loss": 0.4138, "step": 31437 }, { "epoch": 17.563128491620112, "grad_norm": 0.3999217450618744, "learning_rate": 0.00012257703081232493, "loss": 0.3624, "step": 31438 }, { "epoch": 17.56368715083799, "grad_norm": 0.4917362928390503, "learning_rate": 0.00012254901960784314, "loss": 0.4646, "step": 31439 }, { "epoch": 17.564245810055866, "grad_norm": 0.39735132455825806, "learning_rate": 0.00012252100840336134, "loss": 0.3664, "step": 31440 }, { "epoch": 17.564804469273742, "grad_norm": 0.4285474419593811, "learning_rate": 0.00012249299719887955, "loss": 0.44, "step": 31441 }, { "epoch": 17.56536312849162, "grad_norm": 0.45227164030075073, "learning_rate": 0.00012246498599439775, "loss": 0.3774, "step": 31442 }, { "epoch": 17.5659217877095, "grad_norm": 0.3510522246360779, "learning_rate": 0.00012243697478991596, "loss": 0.2889, "step": 31443 }, { "epoch": 17.566480446927375, "grad_norm": 0.41908979415893555, "learning_rate": 0.00012240896358543417, "loss": 0.4113, "step": 31444 }, { "epoch": 17.567039106145252, "grad_norm": 0.5059147477149963, "learning_rate": 0.00012238095238095237, "loss": 0.4623, "step": 31445 }, { "epoch": 17.56759776536313, "grad_norm": 1.465397834777832, "learning_rate": 0.0001223529411764706, "loss": 0.3556, "step": 31446 }, { "epoch": 17.568156424581005, "grad_norm": 0.4680210053920746, "learning_rate": 0.00012232492997198878, "loss": 0.4147, "step": 31447 }, { "epoch": 17.56871508379888, "grad_norm": 0.38897278904914856, "learning_rate": 0.00012229691876750702, "loss": 0.404, "step": 31448 }, { "epoch": 17.56927374301676, "grad_norm": 0.4207116961479187, "learning_rate": 0.0001222689075630252, "loss": 0.5453, "step": 31449 }, { "epoch": 17.56983240223464, "grad_norm": 1.5752358436584473, "learning_rate": 0.00012224089635854343, "loss": 0.3971, "step": 31450 }, { "epoch": 17.570391061452515, "grad_norm": 0.41147202253341675, "learning_rate": 0.0001222128851540616, "loss": 0.2843, "step": 31451 }, { "epoch": 17.57094972067039, "grad_norm": 0.6159135699272156, "learning_rate": 0.00012218487394957984, "loss": 0.4589, "step": 31452 }, { "epoch": 17.571508379888268, "grad_norm": 0.5537623167037964, "learning_rate": 0.00012215686274509805, "loss": 0.454, "step": 31453 }, { "epoch": 17.572067039106145, "grad_norm": 0.4944468140602112, "learning_rate": 0.00012212885154061625, "loss": 0.4485, "step": 31454 }, { "epoch": 17.57262569832402, "grad_norm": 0.40874210000038147, "learning_rate": 0.00012210084033613446, "loss": 0.3675, "step": 31455 }, { "epoch": 17.573184357541898, "grad_norm": 0.4066227078437805, "learning_rate": 0.00012207282913165266, "loss": 0.3506, "step": 31456 }, { "epoch": 17.573743016759778, "grad_norm": 1.199589490890503, "learning_rate": 0.00012204481792717087, "loss": 0.3576, "step": 31457 }, { "epoch": 17.574301675977654, "grad_norm": 0.38021135330200195, "learning_rate": 0.00012201680672268908, "loss": 0.3642, "step": 31458 }, { "epoch": 17.57486033519553, "grad_norm": 1.409810185432434, "learning_rate": 0.00012198879551820728, "loss": 0.3535, "step": 31459 }, { "epoch": 17.575418994413408, "grad_norm": 0.5215467810630798, "learning_rate": 0.00012196078431372549, "loss": 0.4374, "step": 31460 }, { "epoch": 17.575977653631284, "grad_norm": 0.5941303968429565, "learning_rate": 0.0001219327731092437, "loss": 0.4692, "step": 31461 }, { "epoch": 17.57653631284916, "grad_norm": 0.3887898921966553, "learning_rate": 0.0001219047619047619, "loss": 0.347, "step": 31462 }, { "epoch": 17.577094972067037, "grad_norm": 0.3999468684196472, "learning_rate": 0.00012187675070028012, "loss": 0.3779, "step": 31463 }, { "epoch": 17.577653631284917, "grad_norm": 0.4554681181907654, "learning_rate": 0.00012184873949579831, "loss": 0.5051, "step": 31464 }, { "epoch": 17.578212290502794, "grad_norm": 0.33645960688591003, "learning_rate": 0.00012182072829131653, "loss": 0.3762, "step": 31465 }, { "epoch": 17.57877094972067, "grad_norm": 0.444515198469162, "learning_rate": 0.00012179271708683472, "loss": 0.3362, "step": 31466 }, { "epoch": 17.579329608938547, "grad_norm": 0.3829556703567505, "learning_rate": 0.00012176470588235294, "loss": 0.427, "step": 31467 }, { "epoch": 17.579888268156424, "grad_norm": 0.4609065055847168, "learning_rate": 0.00012173669467787116, "loss": 0.3865, "step": 31468 }, { "epoch": 17.5804469273743, "grad_norm": 0.4460180699825287, "learning_rate": 0.00012170868347338936, "loss": 0.3777, "step": 31469 }, { "epoch": 17.58100558659218, "grad_norm": 0.5300408601760864, "learning_rate": 0.00012168067226890758, "loss": 0.4202, "step": 31470 }, { "epoch": 17.581564245810057, "grad_norm": 0.37787437438964844, "learning_rate": 0.00012165266106442577, "loss": 0.4468, "step": 31471 }, { "epoch": 17.582122905027934, "grad_norm": 0.3655270040035248, "learning_rate": 0.00012162464985994399, "loss": 0.3407, "step": 31472 }, { "epoch": 17.58268156424581, "grad_norm": 0.5844898223876953, "learning_rate": 0.0001215966386554622, "loss": 0.6882, "step": 31473 }, { "epoch": 17.583240223463687, "grad_norm": 0.879111647605896, "learning_rate": 0.0001215686274509804, "loss": 0.3737, "step": 31474 }, { "epoch": 17.583798882681563, "grad_norm": 1.4070534706115723, "learning_rate": 0.0001215406162464986, "loss": 0.394, "step": 31475 }, { "epoch": 17.58435754189944, "grad_norm": 0.5160382986068726, "learning_rate": 0.00012151260504201681, "loss": 0.5746, "step": 31476 }, { "epoch": 17.58491620111732, "grad_norm": 0.5294186472892761, "learning_rate": 0.00012148459383753502, "loss": 0.5147, "step": 31477 }, { "epoch": 17.585474860335196, "grad_norm": 0.37465882301330566, "learning_rate": 0.00012145658263305322, "loss": 0.2895, "step": 31478 }, { "epoch": 17.586033519553073, "grad_norm": 3.461212158203125, "learning_rate": 0.00012142857142857143, "loss": 0.4826, "step": 31479 }, { "epoch": 17.58659217877095, "grad_norm": 0.5687753558158875, "learning_rate": 0.00012140056022408965, "loss": 0.5353, "step": 31480 }, { "epoch": 17.587150837988826, "grad_norm": 0.6214829683303833, "learning_rate": 0.00012137254901960784, "loss": 0.384, "step": 31481 }, { "epoch": 17.587709497206703, "grad_norm": 0.4134661555290222, "learning_rate": 0.00012134453781512606, "loss": 0.3514, "step": 31482 }, { "epoch": 17.58826815642458, "grad_norm": 4.23690938949585, "learning_rate": 0.00012131652661064425, "loss": 0.3824, "step": 31483 }, { "epoch": 17.58882681564246, "grad_norm": 0.4844209551811218, "learning_rate": 0.00012128851540616247, "loss": 0.416, "step": 31484 }, { "epoch": 17.589385474860336, "grad_norm": 0.4660329520702362, "learning_rate": 0.00012126050420168068, "loss": 0.5046, "step": 31485 }, { "epoch": 17.589944134078213, "grad_norm": 0.4786379039287567, "learning_rate": 0.00012123249299719889, "loss": 0.4764, "step": 31486 }, { "epoch": 17.59050279329609, "grad_norm": 1.036396861076355, "learning_rate": 0.00012120448179271709, "loss": 0.8822, "step": 31487 }, { "epoch": 17.591061452513966, "grad_norm": 0.8079026937484741, "learning_rate": 0.0001211764705882353, "loss": 0.3213, "step": 31488 }, { "epoch": 17.591620111731842, "grad_norm": 0.44388240575790405, "learning_rate": 0.0001211484593837535, "loss": 0.4536, "step": 31489 }, { "epoch": 17.592178770949722, "grad_norm": 0.45528748631477356, "learning_rate": 0.00012112044817927172, "loss": 0.5471, "step": 31490 }, { "epoch": 17.5927374301676, "grad_norm": 0.459954172372818, "learning_rate": 0.00012109243697478992, "loss": 0.3513, "step": 31491 }, { "epoch": 17.593296089385476, "grad_norm": 0.5678022503852844, "learning_rate": 0.00012106442577030813, "loss": 0.3945, "step": 31492 }, { "epoch": 17.593854748603352, "grad_norm": 1.2420063018798828, "learning_rate": 0.00012103641456582633, "loss": 0.5049, "step": 31493 }, { "epoch": 17.59441340782123, "grad_norm": 0.4424779415130615, "learning_rate": 0.00012100840336134455, "loss": 0.31, "step": 31494 }, { "epoch": 17.594972067039105, "grad_norm": 0.582234263420105, "learning_rate": 0.00012098039215686275, "loss": 0.4144, "step": 31495 }, { "epoch": 17.595530726256982, "grad_norm": 0.6069568991661072, "learning_rate": 0.00012095238095238096, "loss": 0.3325, "step": 31496 }, { "epoch": 17.596089385474862, "grad_norm": 0.3336666226387024, "learning_rate": 0.00012092436974789916, "loss": 0.4106, "step": 31497 }, { "epoch": 17.59664804469274, "grad_norm": 0.5108823776245117, "learning_rate": 0.00012089635854341737, "loss": 0.3966, "step": 31498 }, { "epoch": 17.597206703910615, "grad_norm": 0.5811411738395691, "learning_rate": 0.00012086834733893558, "loss": 0.4283, "step": 31499 }, { "epoch": 17.59776536312849, "grad_norm": 2.5782835483551025, "learning_rate": 0.0001208403361344538, "loss": 0.4039, "step": 31500 }, { "epoch": 17.59776536312849, "eval_cer": 0.08528907946275627, "eval_loss": 0.31996604800224304, "eval_runtime": 55.7055, "eval_samples_per_second": 81.464, "eval_steps_per_second": 5.098, "eval_wer": 0.33864975838663725, "step": 31500 }, { "epoch": 17.598324022346368, "grad_norm": 0.524627685546875, "learning_rate": 0.00012081232492997199, "loss": 0.3661, "step": 31501 }, { "epoch": 17.598882681564245, "grad_norm": 0.401043564081192, "learning_rate": 0.00012078431372549021, "loss": 0.4058, "step": 31502 }, { "epoch": 17.59944134078212, "grad_norm": 0.9795319437980652, "learning_rate": 0.0001207563025210084, "loss": 0.4405, "step": 31503 }, { "epoch": 17.6, "grad_norm": 0.3942526876926422, "learning_rate": 0.00012072829131652662, "loss": 0.3337, "step": 31504 }, { "epoch": 17.600558659217878, "grad_norm": 0.548869252204895, "learning_rate": 0.00012070028011204481, "loss": 0.4324, "step": 31505 }, { "epoch": 17.601117318435755, "grad_norm": 0.5122600197792053, "learning_rate": 0.00012067226890756303, "loss": 0.5191, "step": 31506 }, { "epoch": 17.60167597765363, "grad_norm": 0.3822869658470154, "learning_rate": 0.00012064425770308124, "loss": 0.3683, "step": 31507 }, { "epoch": 17.602234636871508, "grad_norm": 0.3101330101490021, "learning_rate": 0.00012061624649859944, "loss": 0.3689, "step": 31508 }, { "epoch": 17.602793296089384, "grad_norm": 0.4185240864753723, "learning_rate": 0.00012058823529411765, "loss": 0.4623, "step": 31509 }, { "epoch": 17.60335195530726, "grad_norm": 0.42487871646881104, "learning_rate": 0.00012056022408963586, "loss": 0.4076, "step": 31510 }, { "epoch": 17.60391061452514, "grad_norm": 0.5407841801643372, "learning_rate": 0.00012053221288515406, "loss": 0.3103, "step": 31511 }, { "epoch": 17.604469273743018, "grad_norm": 0.6585702300071716, "learning_rate": 0.00012050420168067228, "loss": 0.3808, "step": 31512 }, { "epoch": 17.605027932960894, "grad_norm": 0.3549375832080841, "learning_rate": 0.00012047619047619047, "loss": 0.3329, "step": 31513 }, { "epoch": 17.60558659217877, "grad_norm": 0.42086732387542725, "learning_rate": 0.0001204481792717087, "loss": 0.3541, "step": 31514 }, { "epoch": 17.606145251396647, "grad_norm": 0.6354426741600037, "learning_rate": 0.00012042016806722689, "loss": 0.4, "step": 31515 }, { "epoch": 17.606703910614524, "grad_norm": 6.595485210418701, "learning_rate": 0.0001203921568627451, "loss": 0.5049, "step": 31516 }, { "epoch": 17.607262569832404, "grad_norm": 0.42092061042785645, "learning_rate": 0.00012036414565826331, "loss": 0.4261, "step": 31517 }, { "epoch": 17.60782122905028, "grad_norm": 0.46468386054039, "learning_rate": 0.00012033613445378152, "loss": 0.2787, "step": 31518 }, { "epoch": 17.608379888268157, "grad_norm": 0.5346707105636597, "learning_rate": 0.00012030812324929972, "loss": 0.2795, "step": 31519 }, { "epoch": 17.608938547486034, "grad_norm": 6.596825122833252, "learning_rate": 0.00012028011204481793, "loss": 0.4227, "step": 31520 }, { "epoch": 17.60949720670391, "grad_norm": 0.412166029214859, "learning_rate": 0.00012025210084033614, "loss": 0.4157, "step": 31521 }, { "epoch": 17.610055865921787, "grad_norm": 0.6900970935821533, "learning_rate": 0.00012022408963585436, "loss": 0.4507, "step": 31522 }, { "epoch": 17.610614525139663, "grad_norm": 1.7597662210464478, "learning_rate": 0.00012019607843137255, "loss": 0.3885, "step": 31523 }, { "epoch": 17.611173184357543, "grad_norm": 0.80777907371521, "learning_rate": 0.00012016806722689077, "loss": 0.4676, "step": 31524 }, { "epoch": 17.61173184357542, "grad_norm": 0.42634645104408264, "learning_rate": 0.00012014005602240896, "loss": 0.4036, "step": 31525 }, { "epoch": 17.612290502793297, "grad_norm": 2.0557897090911865, "learning_rate": 0.00012011204481792718, "loss": 0.4193, "step": 31526 }, { "epoch": 17.612849162011173, "grad_norm": 0.5671976804733276, "learning_rate": 0.00012008403361344539, "loss": 0.4688, "step": 31527 }, { "epoch": 17.61340782122905, "grad_norm": 0.6261650919914246, "learning_rate": 0.00012005602240896359, "loss": 0.3173, "step": 31528 }, { "epoch": 17.613966480446926, "grad_norm": 0.4165620803833008, "learning_rate": 0.0001200280112044818, "loss": 0.4342, "step": 31529 }, { "epoch": 17.614525139664803, "grad_norm": 0.460334450006485, "learning_rate": 0.00012, "loss": 0.3746, "step": 31530 }, { "epoch": 17.615083798882683, "grad_norm": 1.6202187538146973, "learning_rate": 0.00011997198879551821, "loss": 0.6428, "step": 31531 }, { "epoch": 17.61564245810056, "grad_norm": 0.5665677785873413, "learning_rate": 0.00011994397759103643, "loss": 0.2907, "step": 31532 }, { "epoch": 17.616201117318436, "grad_norm": 0.39944425225257874, "learning_rate": 0.00011991596638655462, "loss": 0.3787, "step": 31533 }, { "epoch": 17.616759776536313, "grad_norm": 0.562082827091217, "learning_rate": 0.00011988795518207284, "loss": 0.3582, "step": 31534 }, { "epoch": 17.61731843575419, "grad_norm": 0.4276149868965149, "learning_rate": 0.00011985994397759103, "loss": 0.4732, "step": 31535 }, { "epoch": 17.617877094972066, "grad_norm": 0.6590036153793335, "learning_rate": 0.00011983193277310925, "loss": 0.3843, "step": 31536 }, { "epoch": 17.618435754189946, "grad_norm": 0.5104238390922546, "learning_rate": 0.00011980392156862745, "loss": 0.5287, "step": 31537 }, { "epoch": 17.618994413407822, "grad_norm": 0.33312758803367615, "learning_rate": 0.00011977591036414566, "loss": 0.3356, "step": 31538 }, { "epoch": 17.6195530726257, "grad_norm": 0.6793311238288879, "learning_rate": 0.00011974789915966387, "loss": 0.4423, "step": 31539 }, { "epoch": 17.620111731843576, "grad_norm": 0.3907807171344757, "learning_rate": 0.00011971988795518208, "loss": 0.4378, "step": 31540 }, { "epoch": 17.620670391061452, "grad_norm": 0.5622068643569946, "learning_rate": 0.00011969187675070028, "loss": 0.4348, "step": 31541 }, { "epoch": 17.62122905027933, "grad_norm": 0.5129549503326416, "learning_rate": 0.00011966386554621849, "loss": 0.4897, "step": 31542 }, { "epoch": 17.621787709497205, "grad_norm": 0.35348406434059143, "learning_rate": 0.0001196358543417367, "loss": 0.4355, "step": 31543 }, { "epoch": 17.622346368715085, "grad_norm": 0.4193996489048004, "learning_rate": 0.00011960784313725491, "loss": 0.3968, "step": 31544 }, { "epoch": 17.622905027932962, "grad_norm": 0.5911250114440918, "learning_rate": 0.0001195798319327731, "loss": 0.4094, "step": 31545 }, { "epoch": 17.62346368715084, "grad_norm": 0.8757883906364441, "learning_rate": 0.00011955182072829133, "loss": 0.3381, "step": 31546 }, { "epoch": 17.624022346368715, "grad_norm": 0.6150679588317871, "learning_rate": 0.00011952380952380952, "loss": 0.4874, "step": 31547 }, { "epoch": 17.62458100558659, "grad_norm": 0.4737125635147095, "learning_rate": 0.00011949579831932774, "loss": 0.4128, "step": 31548 }, { "epoch": 17.62513966480447, "grad_norm": 1.7527192831039429, "learning_rate": 0.00011946778711484594, "loss": 0.3142, "step": 31549 }, { "epoch": 17.625698324022345, "grad_norm": 0.4082525074481964, "learning_rate": 0.00011943977591036415, "loss": 0.4127, "step": 31550 }, { "epoch": 17.626256983240225, "grad_norm": 0.46964824199676514, "learning_rate": 0.00011941176470588236, "loss": 0.4541, "step": 31551 }, { "epoch": 17.6268156424581, "grad_norm": 0.40015947818756104, "learning_rate": 0.00011938375350140056, "loss": 0.3936, "step": 31552 }, { "epoch": 17.627374301675978, "grad_norm": 0.6811219453811646, "learning_rate": 0.00011935574229691877, "loss": 0.4426, "step": 31553 }, { "epoch": 17.627932960893855, "grad_norm": 0.42463311553001404, "learning_rate": 0.00011932773109243699, "loss": 0.3125, "step": 31554 }, { "epoch": 17.62849162011173, "grad_norm": 0.44518330693244934, "learning_rate": 0.00011929971988795518, "loss": 0.2777, "step": 31555 }, { "epoch": 17.629050279329608, "grad_norm": 0.3771800100803375, "learning_rate": 0.0001192717086834734, "loss": 0.377, "step": 31556 }, { "epoch": 17.629608938547484, "grad_norm": 0.4709591567516327, "learning_rate": 0.00011924369747899159, "loss": 0.4397, "step": 31557 }, { "epoch": 17.630167597765364, "grad_norm": 0.4042418897151947, "learning_rate": 0.00011921568627450981, "loss": 0.3838, "step": 31558 }, { "epoch": 17.63072625698324, "grad_norm": 0.4670283794403076, "learning_rate": 0.00011918767507002802, "loss": 0.3882, "step": 31559 }, { "epoch": 17.631284916201118, "grad_norm": 0.6441609859466553, "learning_rate": 0.00011915966386554622, "loss": 0.504, "step": 31560 }, { "epoch": 17.631843575418994, "grad_norm": 0.8895352482795715, "learning_rate": 0.00011913165266106443, "loss": 0.451, "step": 31561 }, { "epoch": 17.63240223463687, "grad_norm": 0.35737344622612, "learning_rate": 0.00011910364145658264, "loss": 0.4517, "step": 31562 }, { "epoch": 17.632960893854747, "grad_norm": 0.36211442947387695, "learning_rate": 0.00011907563025210084, "loss": 0.3633, "step": 31563 }, { "epoch": 17.633519553072627, "grad_norm": 0.4149252474308014, "learning_rate": 0.00011904761904761905, "loss": 0.3364, "step": 31564 }, { "epoch": 17.634078212290504, "grad_norm": 0.29160770773887634, "learning_rate": 0.00011901960784313725, "loss": 0.3008, "step": 31565 }, { "epoch": 17.63463687150838, "grad_norm": 0.35701847076416016, "learning_rate": 0.00011899159663865547, "loss": 0.3682, "step": 31566 }, { "epoch": 17.635195530726257, "grad_norm": 0.4197212755680084, "learning_rate": 0.00011896358543417367, "loss": 0.3595, "step": 31567 }, { "epoch": 17.635754189944134, "grad_norm": 0.37884974479675293, "learning_rate": 0.00011893557422969189, "loss": 0.4083, "step": 31568 }, { "epoch": 17.63631284916201, "grad_norm": 0.5068607330322266, "learning_rate": 0.00011890756302521008, "loss": 0.4852, "step": 31569 }, { "epoch": 17.636871508379887, "grad_norm": 0.49007448554039, "learning_rate": 0.0001188795518207283, "loss": 0.3863, "step": 31570 }, { "epoch": 17.637430167597767, "grad_norm": 0.4585886597633362, "learning_rate": 0.0001188515406162465, "loss": 0.4439, "step": 31571 }, { "epoch": 17.637988826815644, "grad_norm": 0.3608410358428955, "learning_rate": 0.00011882352941176471, "loss": 0.3463, "step": 31572 }, { "epoch": 17.63854748603352, "grad_norm": 0.3823234438896179, "learning_rate": 0.00011879551820728292, "loss": 0.3546, "step": 31573 }, { "epoch": 17.639106145251397, "grad_norm": 1.4093756675720215, "learning_rate": 0.00011876750700280112, "loss": 0.3833, "step": 31574 }, { "epoch": 17.639664804469273, "grad_norm": 0.42624631524086, "learning_rate": 0.00011873949579831933, "loss": 0.3542, "step": 31575 }, { "epoch": 17.64022346368715, "grad_norm": 0.4674459993839264, "learning_rate": 0.00011871148459383755, "loss": 0.3508, "step": 31576 }, { "epoch": 17.640782122905026, "grad_norm": 0.47147318720817566, "learning_rate": 0.00011868347338935574, "loss": 0.367, "step": 31577 }, { "epoch": 17.641340782122906, "grad_norm": 0.45162442326545715, "learning_rate": 0.00011865546218487396, "loss": 0.4238, "step": 31578 }, { "epoch": 17.641899441340783, "grad_norm": 0.6584606170654297, "learning_rate": 0.00011862745098039215, "loss": 0.4001, "step": 31579 }, { "epoch": 17.64245810055866, "grad_norm": 1.55841863155365, "learning_rate": 0.00011859943977591037, "loss": 0.3405, "step": 31580 }, { "epoch": 17.643016759776536, "grad_norm": 0.41311919689178467, "learning_rate": 0.00011857142857142858, "loss": 0.4435, "step": 31581 }, { "epoch": 17.643575418994413, "grad_norm": 0.40032321214675903, "learning_rate": 0.00011854341736694678, "loss": 0.3566, "step": 31582 }, { "epoch": 17.64413407821229, "grad_norm": 0.7496161460876465, "learning_rate": 0.00011851540616246499, "loss": 0.4416, "step": 31583 }, { "epoch": 17.64469273743017, "grad_norm": 0.3911203444004059, "learning_rate": 0.0001184873949579832, "loss": 0.4098, "step": 31584 }, { "epoch": 17.645251396648046, "grad_norm": 0.9852616190910339, "learning_rate": 0.0001184593837535014, "loss": 0.3722, "step": 31585 }, { "epoch": 17.645810055865923, "grad_norm": 0.7721287608146667, "learning_rate": 0.00011843137254901962, "loss": 0.3411, "step": 31586 }, { "epoch": 17.6463687150838, "grad_norm": 1.2441833019256592, "learning_rate": 0.00011840336134453781, "loss": 0.3388, "step": 31587 }, { "epoch": 17.646927374301676, "grad_norm": 0.5143932700157166, "learning_rate": 0.00011837535014005603, "loss": 0.4098, "step": 31588 }, { "epoch": 17.647486033519552, "grad_norm": 0.348914235830307, "learning_rate": 0.00011834733893557422, "loss": 0.3231, "step": 31589 }, { "epoch": 17.64804469273743, "grad_norm": 0.36880359053611755, "learning_rate": 0.00011831932773109244, "loss": 0.3407, "step": 31590 }, { "epoch": 17.64860335195531, "grad_norm": 0.6653019785881042, "learning_rate": 0.00011829131652661064, "loss": 0.5821, "step": 31591 }, { "epoch": 17.649162011173186, "grad_norm": 0.6810843348503113, "learning_rate": 0.00011826330532212886, "loss": 0.3613, "step": 31592 }, { "epoch": 17.649720670391062, "grad_norm": 0.7818769812583923, "learning_rate": 0.00011823529411764706, "loss": 0.4533, "step": 31593 }, { "epoch": 17.65027932960894, "grad_norm": 0.4716668725013733, "learning_rate": 0.00011820728291316527, "loss": 0.4091, "step": 31594 }, { "epoch": 17.650837988826815, "grad_norm": 11.892498016357422, "learning_rate": 0.00011817927170868347, "loss": 0.3633, "step": 31595 }, { "epoch": 17.65139664804469, "grad_norm": 0.4523873031139374, "learning_rate": 0.00011815126050420168, "loss": 0.4002, "step": 31596 }, { "epoch": 17.65195530726257, "grad_norm": 0.401519238948822, "learning_rate": 0.00011812324929971989, "loss": 0.3617, "step": 31597 }, { "epoch": 17.65251396648045, "grad_norm": 0.6163433194160461, "learning_rate": 0.0001180952380952381, "loss": 0.4538, "step": 31598 }, { "epoch": 17.653072625698325, "grad_norm": 0.4203943610191345, "learning_rate": 0.0001180672268907563, "loss": 0.3265, "step": 31599 }, { "epoch": 17.6536312849162, "grad_norm": 0.6386510729789734, "learning_rate": 0.00011803921568627452, "loss": 0.4398, "step": 31600 }, { "epoch": 17.654189944134078, "grad_norm": 1.06069815158844, "learning_rate": 0.00011801120448179271, "loss": 0.4286, "step": 31601 }, { "epoch": 17.654748603351955, "grad_norm": 0.3916155695915222, "learning_rate": 0.00011798319327731093, "loss": 0.4547, "step": 31602 }, { "epoch": 17.65530726256983, "grad_norm": 0.4664710462093353, "learning_rate": 0.00011795518207282914, "loss": 0.4157, "step": 31603 }, { "epoch": 17.655865921787708, "grad_norm": 0.6798214316368103, "learning_rate": 0.00011792717086834734, "loss": 0.3903, "step": 31604 }, { "epoch": 17.656424581005588, "grad_norm": 0.7662414312362671, "learning_rate": 0.00011789915966386555, "loss": 0.5906, "step": 31605 }, { "epoch": 17.656983240223465, "grad_norm": 0.381435364484787, "learning_rate": 0.00011787114845938375, "loss": 0.508, "step": 31606 }, { "epoch": 17.65754189944134, "grad_norm": 0.36230552196502686, "learning_rate": 0.00011784313725490196, "loss": 0.4304, "step": 31607 }, { "epoch": 17.658100558659218, "grad_norm": 0.5704683065414429, "learning_rate": 0.00011781512605042018, "loss": 0.4462, "step": 31608 }, { "epoch": 17.658659217877094, "grad_norm": 0.4187014400959015, "learning_rate": 0.00011778711484593837, "loss": 0.3724, "step": 31609 }, { "epoch": 17.65921787709497, "grad_norm": 0.5483432412147522, "learning_rate": 0.00011775910364145659, "loss": 0.5125, "step": 31610 }, { "epoch": 17.659776536312847, "grad_norm": 0.5280621647834778, "learning_rate": 0.00011773109243697478, "loss": 0.5092, "step": 31611 }, { "epoch": 17.660335195530728, "grad_norm": 0.4235619902610779, "learning_rate": 0.000117703081232493, "loss": 0.4665, "step": 31612 }, { "epoch": 17.660893854748604, "grad_norm": 0.5079139471054077, "learning_rate": 0.00011767507002801121, "loss": 0.4052, "step": 31613 }, { "epoch": 17.66145251396648, "grad_norm": 1.5212641954421997, "learning_rate": 0.00011764705882352942, "loss": 0.4093, "step": 31614 }, { "epoch": 17.662011173184357, "grad_norm": 0.5192087888717651, "learning_rate": 0.00011761904761904762, "loss": 0.4886, "step": 31615 }, { "epoch": 17.662569832402234, "grad_norm": 0.5584065318107605, "learning_rate": 0.00011759103641456583, "loss": 0.4583, "step": 31616 }, { "epoch": 17.66312849162011, "grad_norm": 0.5097191333770752, "learning_rate": 0.00011756302521008403, "loss": 0.5373, "step": 31617 }, { "epoch": 17.66368715083799, "grad_norm": 0.47235700488090515, "learning_rate": 0.00011753501400560224, "loss": 0.446, "step": 31618 }, { "epoch": 17.664245810055867, "grad_norm": 0.42533260583877563, "learning_rate": 0.00011750700280112044, "loss": 0.4097, "step": 31619 }, { "epoch": 17.664804469273744, "grad_norm": 0.35144490003585815, "learning_rate": 0.00011747899159663866, "loss": 0.4372, "step": 31620 }, { "epoch": 17.66536312849162, "grad_norm": 0.3451632857322693, "learning_rate": 0.00011745098039215686, "loss": 0.3348, "step": 31621 }, { "epoch": 17.665921787709497, "grad_norm": 0.474505215883255, "learning_rate": 0.00011742296918767508, "loss": 0.6443, "step": 31622 }, { "epoch": 17.666480446927373, "grad_norm": 0.5839816331863403, "learning_rate": 0.00011739495798319327, "loss": 0.4087, "step": 31623 }, { "epoch": 17.66703910614525, "grad_norm": 0.384731650352478, "learning_rate": 0.00011736694677871149, "loss": 0.4137, "step": 31624 }, { "epoch": 17.66759776536313, "grad_norm": 0.479227215051651, "learning_rate": 0.0001173389355742297, "loss": 0.3764, "step": 31625 }, { "epoch": 17.668156424581007, "grad_norm": 0.6528783440589905, "learning_rate": 0.0001173109243697479, "loss": 0.4521, "step": 31626 }, { "epoch": 17.668715083798883, "grad_norm": 0.9608190655708313, "learning_rate": 0.0001172829131652661, "loss": 0.6482, "step": 31627 }, { "epoch": 17.66927374301676, "grad_norm": 0.43148183822631836, "learning_rate": 0.00011725490196078431, "loss": 0.3637, "step": 31628 }, { "epoch": 17.669832402234636, "grad_norm": 0.4659328758716583, "learning_rate": 0.00011722689075630252, "loss": 0.4142, "step": 31629 }, { "epoch": 17.670391061452513, "grad_norm": 1.5025144815444946, "learning_rate": 0.00011719887955182074, "loss": 0.4255, "step": 31630 }, { "epoch": 17.67094972067039, "grad_norm": 0.5012494325637817, "learning_rate": 0.00011717086834733893, "loss": 0.3259, "step": 31631 }, { "epoch": 17.67150837988827, "grad_norm": 0.4289581775665283, "learning_rate": 0.00011714285714285715, "loss": 0.4878, "step": 31632 }, { "epoch": 17.672067039106146, "grad_norm": 0.3920581042766571, "learning_rate": 0.00011711484593837534, "loss": 0.3948, "step": 31633 }, { "epoch": 17.672625698324023, "grad_norm": 2.1535732746124268, "learning_rate": 0.00011708683473389356, "loss": 0.3313, "step": 31634 }, { "epoch": 17.6731843575419, "grad_norm": 0.7451744079589844, "learning_rate": 0.00011705882352941177, "loss": 0.4454, "step": 31635 }, { "epoch": 17.673743016759776, "grad_norm": 0.4232161045074463, "learning_rate": 0.00011703081232492997, "loss": 0.5101, "step": 31636 }, { "epoch": 17.674301675977652, "grad_norm": 1.4028230905532837, "learning_rate": 0.00011700280112044818, "loss": 0.4238, "step": 31637 }, { "epoch": 17.674860335195532, "grad_norm": 0.4034656882286072, "learning_rate": 0.00011697478991596639, "loss": 0.4115, "step": 31638 }, { "epoch": 17.67541899441341, "grad_norm": 0.48111510276794434, "learning_rate": 0.00011694677871148459, "loss": 0.3278, "step": 31639 }, { "epoch": 17.675977653631286, "grad_norm": 0.7918464541435242, "learning_rate": 0.00011691876750700281, "loss": 0.3576, "step": 31640 }, { "epoch": 17.676536312849162, "grad_norm": 0.6013913750648499, "learning_rate": 0.000116890756302521, "loss": 0.4078, "step": 31641 }, { "epoch": 17.67709497206704, "grad_norm": 0.7468206286430359, "learning_rate": 0.00011686274509803922, "loss": 0.5465, "step": 31642 }, { "epoch": 17.677653631284915, "grad_norm": 0.6063535213470459, "learning_rate": 0.00011683473389355742, "loss": 0.5013, "step": 31643 }, { "epoch": 17.678212290502792, "grad_norm": 0.3940153121948242, "learning_rate": 0.00011680672268907564, "loss": 0.4193, "step": 31644 }, { "epoch": 17.678770949720672, "grad_norm": 0.4078555405139923, "learning_rate": 0.00011677871148459383, "loss": 0.3593, "step": 31645 }, { "epoch": 17.67932960893855, "grad_norm": 0.4437078833580017, "learning_rate": 0.00011675070028011205, "loss": 0.4136, "step": 31646 }, { "epoch": 17.679888268156425, "grad_norm": 0.4826681911945343, "learning_rate": 0.00011672268907563025, "loss": 0.4074, "step": 31647 }, { "epoch": 17.6804469273743, "grad_norm": 0.4207814633846283, "learning_rate": 0.00011669467787114846, "loss": 0.4013, "step": 31648 }, { "epoch": 17.68100558659218, "grad_norm": 0.3458710014820099, "learning_rate": 0.00011666666666666667, "loss": 0.3464, "step": 31649 }, { "epoch": 17.681564245810055, "grad_norm": 0.5296543836593628, "learning_rate": 0.00011663865546218487, "loss": 0.424, "step": 31650 }, { "epoch": 17.68212290502793, "grad_norm": 0.555448055267334, "learning_rate": 0.00011661064425770308, "loss": 0.4235, "step": 31651 }, { "epoch": 17.68268156424581, "grad_norm": 0.628456175327301, "learning_rate": 0.0001165826330532213, "loss": 0.6004, "step": 31652 }, { "epoch": 17.683240223463688, "grad_norm": 0.37482598423957825, "learning_rate": 0.00011655462184873949, "loss": 0.3871, "step": 31653 }, { "epoch": 17.683798882681565, "grad_norm": 15.061306953430176, "learning_rate": 0.00011652661064425771, "loss": 0.3937, "step": 31654 }, { "epoch": 17.68435754189944, "grad_norm": 0.43291914463043213, "learning_rate": 0.0001164985994397759, "loss": 0.4344, "step": 31655 }, { "epoch": 17.684916201117318, "grad_norm": 0.4832512140274048, "learning_rate": 0.00011647058823529412, "loss": 0.4133, "step": 31656 }, { "epoch": 17.685474860335194, "grad_norm": 0.918084442615509, "learning_rate": 0.00011644257703081233, "loss": 0.3989, "step": 31657 }, { "epoch": 17.68603351955307, "grad_norm": 0.48510441184043884, "learning_rate": 0.00011641456582633053, "loss": 0.3668, "step": 31658 }, { "epoch": 17.68659217877095, "grad_norm": 0.37789955735206604, "learning_rate": 0.00011638655462184874, "loss": 0.3973, "step": 31659 }, { "epoch": 17.687150837988828, "grad_norm": 1.3163118362426758, "learning_rate": 0.00011635854341736694, "loss": 0.3282, "step": 31660 }, { "epoch": 17.687709497206704, "grad_norm": 0.4649355709552765, "learning_rate": 0.00011633053221288515, "loss": 0.3604, "step": 31661 }, { "epoch": 17.68826815642458, "grad_norm": 0.27869874238967896, "learning_rate": 0.00011630252100840337, "loss": 0.302, "step": 31662 }, { "epoch": 17.688826815642457, "grad_norm": 0.5352100133895874, "learning_rate": 0.00011627450980392156, "loss": 0.566, "step": 31663 }, { "epoch": 17.689385474860334, "grad_norm": 0.3969566226005554, "learning_rate": 0.00011624649859943978, "loss": 0.2895, "step": 31664 }, { "epoch": 17.689944134078214, "grad_norm": 0.36757737398147583, "learning_rate": 0.00011621848739495797, "loss": 0.3359, "step": 31665 }, { "epoch": 17.69050279329609, "grad_norm": 0.502049446105957, "learning_rate": 0.0001161904761904762, "loss": 0.2775, "step": 31666 }, { "epoch": 17.691061452513967, "grad_norm": 0.44251585006713867, "learning_rate": 0.0001161624649859944, "loss": 0.4396, "step": 31667 }, { "epoch": 17.691620111731844, "grad_norm": 0.5007766485214233, "learning_rate": 0.0001161344537815126, "loss": 0.4285, "step": 31668 }, { "epoch": 17.69217877094972, "grad_norm": 0.3388381600379944, "learning_rate": 0.00011610644257703081, "loss": 0.3408, "step": 31669 }, { "epoch": 17.692737430167597, "grad_norm": 0.6114059090614319, "learning_rate": 0.00011607843137254902, "loss": 0.3896, "step": 31670 }, { "epoch": 17.693296089385473, "grad_norm": 0.42510855197906494, "learning_rate": 0.00011605042016806722, "loss": 0.459, "step": 31671 }, { "epoch": 17.693854748603353, "grad_norm": 0.5139893293380737, "learning_rate": 0.00011602240896358544, "loss": 0.4303, "step": 31672 }, { "epoch": 17.69441340782123, "grad_norm": 1.0428709983825684, "learning_rate": 0.00011599439775910364, "loss": 0.3736, "step": 31673 }, { "epoch": 17.694972067039107, "grad_norm": 2.1388893127441406, "learning_rate": 0.00011596638655462186, "loss": 0.6118, "step": 31674 }, { "epoch": 17.695530726256983, "grad_norm": 0.5511255860328674, "learning_rate": 0.00011593837535014005, "loss": 0.564, "step": 31675 }, { "epoch": 17.69608938547486, "grad_norm": 0.6469642519950867, "learning_rate": 0.00011591036414565827, "loss": 0.3678, "step": 31676 }, { "epoch": 17.696648044692736, "grad_norm": 0.42850831151008606, "learning_rate": 0.00011588235294117646, "loss": 0.3473, "step": 31677 }, { "epoch": 17.697206703910613, "grad_norm": 0.6640798449516296, "learning_rate": 0.00011585434173669468, "loss": 0.3513, "step": 31678 }, { "epoch": 17.697765363128493, "grad_norm": 1.0331090688705444, "learning_rate": 0.00011582633053221289, "loss": 0.5791, "step": 31679 }, { "epoch": 17.69832402234637, "grad_norm": 0.37808313965797424, "learning_rate": 0.00011579831932773109, "loss": 0.4451, "step": 31680 }, { "epoch": 17.698882681564246, "grad_norm": 0.42985180020332336, "learning_rate": 0.0001157703081232493, "loss": 0.4207, "step": 31681 }, { "epoch": 17.699441340782123, "grad_norm": 0.7293187975883484, "learning_rate": 0.0001157422969187675, "loss": 0.3876, "step": 31682 }, { "epoch": 17.7, "grad_norm": 3.816993236541748, "learning_rate": 0.00011571428571428571, "loss": 0.4487, "step": 31683 }, { "epoch": 17.700558659217876, "grad_norm": 0.33883845806121826, "learning_rate": 0.00011568627450980393, "loss": 0.3798, "step": 31684 }, { "epoch": 17.701117318435756, "grad_norm": 0.4608825445175171, "learning_rate": 0.00011565826330532212, "loss": 0.4232, "step": 31685 }, { "epoch": 17.701675977653633, "grad_norm": 0.9737520813941956, "learning_rate": 0.00011563025210084034, "loss": 0.3791, "step": 31686 }, { "epoch": 17.70223463687151, "grad_norm": 0.44187629222869873, "learning_rate": 0.00011560224089635853, "loss": 0.3185, "step": 31687 }, { "epoch": 17.702793296089386, "grad_norm": 0.872516393661499, "learning_rate": 0.00011557422969187675, "loss": 0.4443, "step": 31688 }, { "epoch": 17.703351955307262, "grad_norm": 0.548848569393158, "learning_rate": 0.00011554621848739496, "loss": 0.4519, "step": 31689 }, { "epoch": 17.70391061452514, "grad_norm": 0.47159314155578613, "learning_rate": 0.00011551820728291317, "loss": 0.535, "step": 31690 }, { "epoch": 17.704469273743015, "grad_norm": 0.6160970330238342, "learning_rate": 0.00011549019607843137, "loss": 0.3898, "step": 31691 }, { "epoch": 17.705027932960895, "grad_norm": 1.0960477590560913, "learning_rate": 0.00011546218487394958, "loss": 0.4759, "step": 31692 }, { "epoch": 17.705586592178772, "grad_norm": 0.5396501421928406, "learning_rate": 0.00011543417366946778, "loss": 0.3898, "step": 31693 }, { "epoch": 17.70614525139665, "grad_norm": 0.6107489466667175, "learning_rate": 0.000115406162464986, "loss": 0.4173, "step": 31694 }, { "epoch": 17.706703910614525, "grad_norm": 0.5246310830116272, "learning_rate": 0.0001153781512605042, "loss": 0.3455, "step": 31695 }, { "epoch": 17.7072625698324, "grad_norm": 0.48305490612983704, "learning_rate": 0.00011535014005602241, "loss": 0.3954, "step": 31696 }, { "epoch": 17.70782122905028, "grad_norm": 0.5337732434272766, "learning_rate": 0.00011532212885154061, "loss": 0.3865, "step": 31697 }, { "epoch": 17.708379888268155, "grad_norm": 0.7672650218009949, "learning_rate": 0.00011529411764705883, "loss": 0.3603, "step": 31698 }, { "epoch": 17.708938547486035, "grad_norm": 0.687788188457489, "learning_rate": 0.00011526610644257705, "loss": 0.3206, "step": 31699 }, { "epoch": 17.70949720670391, "grad_norm": 1.1132733821868896, "learning_rate": 0.00011523809523809524, "loss": 0.5971, "step": 31700 }, { "epoch": 17.710055865921788, "grad_norm": 0.3982209861278534, "learning_rate": 0.00011521008403361346, "loss": 0.3968, "step": 31701 }, { "epoch": 17.710614525139665, "grad_norm": 0.40567949414253235, "learning_rate": 0.00011518207282913165, "loss": 0.4902, "step": 31702 }, { "epoch": 17.71117318435754, "grad_norm": 0.9496821165084839, "learning_rate": 0.00011515406162464987, "loss": 0.4258, "step": 31703 }, { "epoch": 17.711731843575418, "grad_norm": 0.3718690276145935, "learning_rate": 0.00011512605042016806, "loss": 0.371, "step": 31704 }, { "epoch": 17.712290502793294, "grad_norm": 0.4091494381427765, "learning_rate": 0.00011509803921568628, "loss": 0.3981, "step": 31705 }, { "epoch": 17.712849162011175, "grad_norm": 0.34816282987594604, "learning_rate": 0.00011507002801120449, "loss": 0.3845, "step": 31706 }, { "epoch": 17.71340782122905, "grad_norm": 0.8170983791351318, "learning_rate": 0.0001150420168067227, "loss": 0.298, "step": 31707 }, { "epoch": 17.713966480446928, "grad_norm": 0.567524790763855, "learning_rate": 0.0001150140056022409, "loss": 0.4071, "step": 31708 }, { "epoch": 17.714525139664804, "grad_norm": 0.4217959940433502, "learning_rate": 0.0001149859943977591, "loss": 0.3282, "step": 31709 }, { "epoch": 17.71508379888268, "grad_norm": 0.3706951141357422, "learning_rate": 0.00011495798319327731, "loss": 0.3237, "step": 31710 }, { "epoch": 17.715642458100557, "grad_norm": 0.4685196578502655, "learning_rate": 0.00011492997198879553, "loss": 0.6083, "step": 31711 }, { "epoch": 17.716201117318434, "grad_norm": 0.5812598466873169, "learning_rate": 0.00011490196078431372, "loss": 0.3995, "step": 31712 }, { "epoch": 17.716759776536314, "grad_norm": 0.36034324765205383, "learning_rate": 0.00011487394957983194, "loss": 0.327, "step": 31713 }, { "epoch": 17.71731843575419, "grad_norm": 0.6904635429382324, "learning_rate": 0.00011484593837535014, "loss": 0.5734, "step": 31714 }, { "epoch": 17.717877094972067, "grad_norm": 0.40325453877449036, "learning_rate": 0.00011481792717086836, "loss": 0.3822, "step": 31715 }, { "epoch": 17.718435754189944, "grad_norm": 0.36447393894195557, "learning_rate": 0.00011478991596638656, "loss": 0.3569, "step": 31716 }, { "epoch": 17.71899441340782, "grad_norm": 0.6370408535003662, "learning_rate": 0.00011476190476190477, "loss": 0.4883, "step": 31717 }, { "epoch": 17.719553072625697, "grad_norm": 0.37020808458328247, "learning_rate": 0.00011473389355742297, "loss": 0.3948, "step": 31718 }, { "epoch": 17.720111731843577, "grad_norm": 0.34848758578300476, "learning_rate": 0.00011470588235294118, "loss": 0.37, "step": 31719 }, { "epoch": 17.720670391061454, "grad_norm": 0.5552564859390259, "learning_rate": 0.00011467787114845939, "loss": 0.6072, "step": 31720 }, { "epoch": 17.72122905027933, "grad_norm": 0.4255106747150421, "learning_rate": 0.0001146498599439776, "loss": 0.4111, "step": 31721 }, { "epoch": 17.721787709497207, "grad_norm": 0.5258291959762573, "learning_rate": 0.0001146218487394958, "loss": 0.3518, "step": 31722 }, { "epoch": 17.722346368715083, "grad_norm": 0.444899320602417, "learning_rate": 0.00011459383753501402, "loss": 0.4101, "step": 31723 }, { "epoch": 17.72290502793296, "grad_norm": 0.5501758456230164, "learning_rate": 0.00011456582633053221, "loss": 0.3817, "step": 31724 }, { "epoch": 17.723463687150836, "grad_norm": 1.4332919120788574, "learning_rate": 0.00011453781512605043, "loss": 0.3544, "step": 31725 }, { "epoch": 17.724022346368717, "grad_norm": 0.3350711464881897, "learning_rate": 0.00011450980392156864, "loss": 0.3635, "step": 31726 }, { "epoch": 17.724581005586593, "grad_norm": 1.3741832971572876, "learning_rate": 0.00011448179271708684, "loss": 0.4342, "step": 31727 }, { "epoch": 17.72513966480447, "grad_norm": 2.9018564224243164, "learning_rate": 0.00011445378151260505, "loss": 0.4103, "step": 31728 }, { "epoch": 17.725698324022346, "grad_norm": 0.33238524198532104, "learning_rate": 0.00011442577030812325, "loss": 0.332, "step": 31729 }, { "epoch": 17.726256983240223, "grad_norm": 0.39465397596359253, "learning_rate": 0.00011439775910364146, "loss": 0.3943, "step": 31730 }, { "epoch": 17.7268156424581, "grad_norm": 0.6010270118713379, "learning_rate": 0.00011436974789915967, "loss": 0.4098, "step": 31731 }, { "epoch": 17.727374301675976, "grad_norm": 0.6202162504196167, "learning_rate": 0.00011434173669467787, "loss": 0.6358, "step": 31732 }, { "epoch": 17.727932960893856, "grad_norm": 0.4591814875602722, "learning_rate": 0.00011431372549019609, "loss": 0.442, "step": 31733 }, { "epoch": 17.728491620111733, "grad_norm": 0.5069906115531921, "learning_rate": 0.00011428571428571428, "loss": 0.3449, "step": 31734 }, { "epoch": 17.72905027932961, "grad_norm": 0.3964703679084778, "learning_rate": 0.0001142577030812325, "loss": 0.3507, "step": 31735 }, { "epoch": 17.729608938547486, "grad_norm": 0.7453311085700989, "learning_rate": 0.0001142296918767507, "loss": 0.4001, "step": 31736 }, { "epoch": 17.730167597765362, "grad_norm": 0.42806634306907654, "learning_rate": 0.00011420168067226891, "loss": 0.4508, "step": 31737 }, { "epoch": 17.73072625698324, "grad_norm": 0.6020180583000183, "learning_rate": 0.00011417366946778712, "loss": 0.4229, "step": 31738 }, { "epoch": 17.73128491620112, "grad_norm": 0.5702681541442871, "learning_rate": 0.00011414565826330533, "loss": 0.4787, "step": 31739 }, { "epoch": 17.731843575418996, "grad_norm": 1.486236572265625, "learning_rate": 0.00011411764705882353, "loss": 0.4373, "step": 31740 }, { "epoch": 17.732402234636872, "grad_norm": 2.740108013153076, "learning_rate": 0.00011408963585434174, "loss": 0.4312, "step": 31741 }, { "epoch": 17.73296089385475, "grad_norm": 0.6044945120811462, "learning_rate": 0.00011406162464985994, "loss": 0.3758, "step": 31742 }, { "epoch": 17.733519553072625, "grad_norm": 5.576402187347412, "learning_rate": 0.00011403361344537816, "loss": 0.3877, "step": 31743 }, { "epoch": 17.734078212290502, "grad_norm": 0.49012938141822815, "learning_rate": 0.00011400560224089636, "loss": 0.4137, "step": 31744 }, { "epoch": 17.73463687150838, "grad_norm": 0.3953683376312256, "learning_rate": 0.00011397759103641458, "loss": 0.4363, "step": 31745 }, { "epoch": 17.73519553072626, "grad_norm": 0.9253637790679932, "learning_rate": 0.00011394957983193277, "loss": 0.3309, "step": 31746 }, { "epoch": 17.735754189944135, "grad_norm": 0.3485408425331116, "learning_rate": 0.00011392156862745099, "loss": 0.4461, "step": 31747 }, { "epoch": 17.73631284916201, "grad_norm": 5.815307140350342, "learning_rate": 0.0001138935574229692, "loss": 0.5169, "step": 31748 }, { "epoch": 17.73687150837989, "grad_norm": 0.5165740847587585, "learning_rate": 0.0001138655462184874, "loss": 0.3959, "step": 31749 }, { "epoch": 17.737430167597765, "grad_norm": 1.4824273586273193, "learning_rate": 0.0001138375350140056, "loss": 0.466, "step": 31750 }, { "epoch": 17.73798882681564, "grad_norm": 0.4407392144203186, "learning_rate": 0.00011380952380952381, "loss": 0.5506, "step": 31751 }, { "epoch": 17.738547486033518, "grad_norm": 0.42283546924591064, "learning_rate": 0.00011378151260504202, "loss": 0.3625, "step": 31752 }, { "epoch": 17.739106145251398, "grad_norm": 1.8034919500350952, "learning_rate": 0.00011375350140056024, "loss": 0.4253, "step": 31753 }, { "epoch": 17.739664804469275, "grad_norm": 0.8658513426780701, "learning_rate": 0.00011372549019607843, "loss": 0.3778, "step": 31754 }, { "epoch": 17.74022346368715, "grad_norm": 0.4768412113189697, "learning_rate": 0.00011369747899159665, "loss": 0.518, "step": 31755 }, { "epoch": 17.740782122905028, "grad_norm": 0.480977863073349, "learning_rate": 0.00011366946778711484, "loss": 0.4105, "step": 31756 }, { "epoch": 17.741340782122904, "grad_norm": 0.4641282558441162, "learning_rate": 0.00011364145658263306, "loss": 0.4152, "step": 31757 }, { "epoch": 17.74189944134078, "grad_norm": 0.5431604385375977, "learning_rate": 0.00011361344537815125, "loss": 0.5353, "step": 31758 }, { "epoch": 17.742458100558657, "grad_norm": 0.41398873925209045, "learning_rate": 0.00011358543417366947, "loss": 0.4273, "step": 31759 }, { "epoch": 17.743016759776538, "grad_norm": 0.452446848154068, "learning_rate": 0.00011355742296918768, "loss": 0.335, "step": 31760 }, { "epoch": 17.743575418994414, "grad_norm": 0.6121146082878113, "learning_rate": 0.00011352941176470589, "loss": 0.4743, "step": 31761 }, { "epoch": 17.74413407821229, "grad_norm": 1.3366907835006714, "learning_rate": 0.00011350140056022409, "loss": 0.4244, "step": 31762 }, { "epoch": 17.744692737430167, "grad_norm": 0.50154709815979, "learning_rate": 0.0001134733893557423, "loss": 0.3787, "step": 31763 }, { "epoch": 17.745251396648044, "grad_norm": 0.45586681365966797, "learning_rate": 0.0001134453781512605, "loss": 0.3345, "step": 31764 }, { "epoch": 17.74581005586592, "grad_norm": 0.36819028854370117, "learning_rate": 0.00011341736694677872, "loss": 0.3532, "step": 31765 }, { "epoch": 17.7463687150838, "grad_norm": 0.3592306077480316, "learning_rate": 0.00011338935574229692, "loss": 0.4122, "step": 31766 }, { "epoch": 17.746927374301677, "grad_norm": 0.36595141887664795, "learning_rate": 0.00011336134453781514, "loss": 0.4265, "step": 31767 }, { "epoch": 17.747486033519554, "grad_norm": 0.40586674213409424, "learning_rate": 0.00011333333333333333, "loss": 0.4106, "step": 31768 }, { "epoch": 17.74804469273743, "grad_norm": 1.2317415475845337, "learning_rate": 0.00011330532212885155, "loss": 0.3875, "step": 31769 }, { "epoch": 17.748603351955307, "grad_norm": 0.39180266857147217, "learning_rate": 0.00011327731092436975, "loss": 0.2927, "step": 31770 }, { "epoch": 17.749162011173183, "grad_norm": 0.447621613740921, "learning_rate": 0.00011324929971988796, "loss": 0.3638, "step": 31771 }, { "epoch": 17.74972067039106, "grad_norm": 0.3200156092643738, "learning_rate": 0.00011322128851540617, "loss": 0.3719, "step": 31772 }, { "epoch": 17.75027932960894, "grad_norm": 0.3594907522201538, "learning_rate": 0.00011319327731092437, "loss": 0.3404, "step": 31773 }, { "epoch": 17.750837988826817, "grad_norm": 0.3619016408920288, "learning_rate": 0.00011316526610644258, "loss": 0.2689, "step": 31774 }, { "epoch": 17.751396648044693, "grad_norm": 0.4687427580356598, "learning_rate": 0.0001131372549019608, "loss": 0.3966, "step": 31775 }, { "epoch": 17.75195530726257, "grad_norm": 2.486599922180176, "learning_rate": 0.00011310924369747899, "loss": 0.4061, "step": 31776 }, { "epoch": 17.752513966480446, "grad_norm": 0.5070294141769409, "learning_rate": 0.00011308123249299721, "loss": 0.4258, "step": 31777 }, { "epoch": 17.753072625698323, "grad_norm": 0.8229541182518005, "learning_rate": 0.0001130532212885154, "loss": 0.4483, "step": 31778 }, { "epoch": 17.7536312849162, "grad_norm": 0.4754789173603058, "learning_rate": 0.00011302521008403362, "loss": 0.5531, "step": 31779 }, { "epoch": 17.75418994413408, "grad_norm": 0.8967899680137634, "learning_rate": 0.00011299719887955183, "loss": 0.3463, "step": 31780 }, { "epoch": 17.754748603351956, "grad_norm": 0.4442479908466339, "learning_rate": 0.00011296918767507003, "loss": 0.4539, "step": 31781 }, { "epoch": 17.755307262569833, "grad_norm": 0.48731866478919983, "learning_rate": 0.00011294117647058824, "loss": 0.4408, "step": 31782 }, { "epoch": 17.75586592178771, "grad_norm": 0.3123086094856262, "learning_rate": 0.00011291316526610644, "loss": 0.294, "step": 31783 }, { "epoch": 17.756424581005586, "grad_norm": 0.45249998569488525, "learning_rate": 0.00011288515406162465, "loss": 0.3584, "step": 31784 }, { "epoch": 17.756983240223462, "grad_norm": 0.36949098110198975, "learning_rate": 0.00011285714285714286, "loss": 0.4162, "step": 31785 }, { "epoch": 17.757541899441343, "grad_norm": 0.391250342130661, "learning_rate": 0.00011282913165266106, "loss": 0.3537, "step": 31786 }, { "epoch": 17.75810055865922, "grad_norm": 0.4333246350288391, "learning_rate": 0.00011280112044817928, "loss": 0.3671, "step": 31787 }, { "epoch": 17.758659217877096, "grad_norm": 0.4002021253108978, "learning_rate": 0.00011277310924369747, "loss": 0.4698, "step": 31788 }, { "epoch": 17.759217877094972, "grad_norm": 0.46529802680015564, "learning_rate": 0.0001127450980392157, "loss": 0.4365, "step": 31789 }, { "epoch": 17.75977653631285, "grad_norm": 0.4416027069091797, "learning_rate": 0.00011271708683473389, "loss": 0.467, "step": 31790 }, { "epoch": 17.760335195530725, "grad_norm": 0.38470885157585144, "learning_rate": 0.0001126890756302521, "loss": 0.4718, "step": 31791 }, { "epoch": 17.760893854748602, "grad_norm": 2.6901586055755615, "learning_rate": 0.00011266106442577031, "loss": 0.3468, "step": 31792 }, { "epoch": 17.761452513966482, "grad_norm": 0.5298575758934021, "learning_rate": 0.00011263305322128852, "loss": 0.4428, "step": 31793 }, { "epoch": 17.76201117318436, "grad_norm": 0.36829259991645813, "learning_rate": 0.00011260504201680672, "loss": 0.3987, "step": 31794 }, { "epoch": 17.762569832402235, "grad_norm": 0.6363039016723633, "learning_rate": 0.00011257703081232493, "loss": 0.496, "step": 31795 }, { "epoch": 17.76312849162011, "grad_norm": 0.37412530183792114, "learning_rate": 0.00011254901960784314, "loss": 0.4066, "step": 31796 }, { "epoch": 17.76368715083799, "grad_norm": 3.429553747177124, "learning_rate": 0.00011252100840336136, "loss": 0.4323, "step": 31797 }, { "epoch": 17.764245810055865, "grad_norm": 1.2768347263336182, "learning_rate": 0.00011249299719887955, "loss": 0.5037, "step": 31798 }, { "epoch": 17.76480446927374, "grad_norm": 0.31131407618522644, "learning_rate": 0.00011246498599439777, "loss": 0.3964, "step": 31799 }, { "epoch": 17.76536312849162, "grad_norm": 0.39107710123062134, "learning_rate": 0.00011243697478991596, "loss": 0.3687, "step": 31800 }, { "epoch": 17.765921787709498, "grad_norm": 0.3947944641113281, "learning_rate": 0.00011240896358543418, "loss": 0.4411, "step": 31801 }, { "epoch": 17.766480446927375, "grad_norm": 0.32954323291778564, "learning_rate": 0.00011238095238095239, "loss": 0.3008, "step": 31802 }, { "epoch": 17.76703910614525, "grad_norm": 0.45163053274154663, "learning_rate": 0.00011235294117647059, "loss": 0.5035, "step": 31803 }, { "epoch": 17.767597765363128, "grad_norm": 0.3795064091682434, "learning_rate": 0.0001123249299719888, "loss": 0.3447, "step": 31804 }, { "epoch": 17.768156424581004, "grad_norm": 1.524694800376892, "learning_rate": 0.000112296918767507, "loss": 0.4474, "step": 31805 }, { "epoch": 17.76871508379888, "grad_norm": 0.34645721316337585, "learning_rate": 0.00011226890756302521, "loss": 0.3678, "step": 31806 }, { "epoch": 17.76927374301676, "grad_norm": 0.4489235281944275, "learning_rate": 0.00011224089635854343, "loss": 0.4142, "step": 31807 }, { "epoch": 17.769832402234638, "grad_norm": 0.3958072364330292, "learning_rate": 0.00011221288515406162, "loss": 0.458, "step": 31808 }, { "epoch": 17.770391061452514, "grad_norm": 1.2771408557891846, "learning_rate": 0.00011218487394957984, "loss": 0.3309, "step": 31809 }, { "epoch": 17.77094972067039, "grad_norm": 0.3904511332511902, "learning_rate": 0.00011215686274509803, "loss": 0.3707, "step": 31810 }, { "epoch": 17.771508379888267, "grad_norm": 0.3633131682872772, "learning_rate": 0.00011212885154061625, "loss": 0.3726, "step": 31811 }, { "epoch": 17.772067039106144, "grad_norm": 0.5046189427375793, "learning_rate": 0.00011210084033613445, "loss": 0.3883, "step": 31812 }, { "epoch": 17.772625698324024, "grad_norm": 0.42271968722343445, "learning_rate": 0.00011207282913165267, "loss": 0.3196, "step": 31813 }, { "epoch": 17.7731843575419, "grad_norm": 0.3760518729686737, "learning_rate": 0.00011204481792717087, "loss": 0.4372, "step": 31814 }, { "epoch": 17.773743016759777, "grad_norm": 0.9491005539894104, "learning_rate": 0.00011201680672268908, "loss": 0.3523, "step": 31815 }, { "epoch": 17.774301675977654, "grad_norm": 0.4184345304965973, "learning_rate": 0.00011198879551820728, "loss": 0.3349, "step": 31816 }, { "epoch": 17.77486033519553, "grad_norm": 1.4697279930114746, "learning_rate": 0.00011196078431372549, "loss": 0.486, "step": 31817 }, { "epoch": 17.775418994413407, "grad_norm": 0.6576196551322937, "learning_rate": 0.0001119327731092437, "loss": 0.3433, "step": 31818 }, { "epoch": 17.775977653631283, "grad_norm": 1.0015850067138672, "learning_rate": 0.00011190476190476191, "loss": 0.3722, "step": 31819 }, { "epoch": 17.776536312849164, "grad_norm": 0.5193490982055664, "learning_rate": 0.00011187675070028011, "loss": 0.4575, "step": 31820 }, { "epoch": 17.77709497206704, "grad_norm": 0.3868086040019989, "learning_rate": 0.00011184873949579833, "loss": 0.3511, "step": 31821 }, { "epoch": 17.777653631284917, "grad_norm": 0.6862823963165283, "learning_rate": 0.00011182072829131652, "loss": 0.4379, "step": 31822 }, { "epoch": 17.778212290502793, "grad_norm": 0.5161616802215576, "learning_rate": 0.00011179271708683474, "loss": 0.4775, "step": 31823 }, { "epoch": 17.77877094972067, "grad_norm": 0.5153317451477051, "learning_rate": 0.00011176470588235294, "loss": 0.4161, "step": 31824 }, { "epoch": 17.779329608938546, "grad_norm": 0.4883970618247986, "learning_rate": 0.00011173669467787115, "loss": 0.3423, "step": 31825 }, { "epoch": 17.779888268156423, "grad_norm": 0.5907506346702576, "learning_rate": 0.00011170868347338936, "loss": 0.3985, "step": 31826 }, { "epoch": 17.780446927374303, "grad_norm": 0.7341945171356201, "learning_rate": 0.00011168067226890756, "loss": 0.3556, "step": 31827 }, { "epoch": 17.78100558659218, "grad_norm": 0.2722187638282776, "learning_rate": 0.00011165266106442577, "loss": 0.2608, "step": 31828 }, { "epoch": 17.781564245810056, "grad_norm": 1.0369043350219727, "learning_rate": 0.00011162464985994399, "loss": 0.4905, "step": 31829 }, { "epoch": 17.782122905027933, "grad_norm": 0.3412696123123169, "learning_rate": 0.00011159663865546218, "loss": 0.3563, "step": 31830 }, { "epoch": 17.78268156424581, "grad_norm": 0.3238923251628876, "learning_rate": 0.0001115686274509804, "loss": 0.3186, "step": 31831 }, { "epoch": 17.783240223463686, "grad_norm": 0.872382640838623, "learning_rate": 0.00011154061624649859, "loss": 0.4017, "step": 31832 }, { "epoch": 17.783798882681566, "grad_norm": 0.3689119815826416, "learning_rate": 0.00011151260504201681, "loss": 0.3306, "step": 31833 }, { "epoch": 17.784357541899443, "grad_norm": 0.4544695317745209, "learning_rate": 0.00011148459383753502, "loss": 0.4932, "step": 31834 }, { "epoch": 17.78491620111732, "grad_norm": 0.451572448015213, "learning_rate": 0.00011145658263305322, "loss": 0.3543, "step": 31835 }, { "epoch": 17.785474860335196, "grad_norm": 0.40825968980789185, "learning_rate": 0.00011142857142857143, "loss": 0.4308, "step": 31836 }, { "epoch": 17.786033519553072, "grad_norm": 2.6506662368774414, "learning_rate": 0.00011140056022408964, "loss": 0.3571, "step": 31837 }, { "epoch": 17.78659217877095, "grad_norm": 0.3947301208972931, "learning_rate": 0.00011137254901960784, "loss": 0.3348, "step": 31838 }, { "epoch": 17.787150837988825, "grad_norm": 2.443533420562744, "learning_rate": 0.00011134453781512606, "loss": 0.3631, "step": 31839 }, { "epoch": 17.787709497206706, "grad_norm": 0.3971710205078125, "learning_rate": 0.00011131652661064425, "loss": 0.3422, "step": 31840 }, { "epoch": 17.788268156424582, "grad_norm": 0.6614204049110413, "learning_rate": 0.00011128851540616247, "loss": 0.3085, "step": 31841 }, { "epoch": 17.78882681564246, "grad_norm": 0.3786182403564453, "learning_rate": 0.00011126050420168067, "loss": 0.3332, "step": 31842 }, { "epoch": 17.789385474860335, "grad_norm": 1.9345414638519287, "learning_rate": 0.00011123249299719889, "loss": 0.4199, "step": 31843 }, { "epoch": 17.789944134078212, "grad_norm": 0.49531999230384827, "learning_rate": 0.00011120448179271708, "loss": 0.5077, "step": 31844 }, { "epoch": 17.79050279329609, "grad_norm": 0.49440038204193115, "learning_rate": 0.0001111764705882353, "loss": 0.4385, "step": 31845 }, { "epoch": 17.791061452513965, "grad_norm": 2.345907688140869, "learning_rate": 0.0001111484593837535, "loss": 0.3708, "step": 31846 }, { "epoch": 17.791620111731845, "grad_norm": 0.780145525932312, "learning_rate": 0.00011112044817927171, "loss": 0.3446, "step": 31847 }, { "epoch": 17.79217877094972, "grad_norm": 0.4204564690589905, "learning_rate": 0.00011109243697478992, "loss": 0.3942, "step": 31848 }, { "epoch": 17.7927374301676, "grad_norm": 0.4733802080154419, "learning_rate": 0.00011106442577030812, "loss": 0.4914, "step": 31849 }, { "epoch": 17.793296089385475, "grad_norm": 0.49106743931770325, "learning_rate": 0.00011103641456582633, "loss": 0.4141, "step": 31850 }, { "epoch": 17.79385474860335, "grad_norm": 2.615499973297119, "learning_rate": 0.00011100840336134455, "loss": 0.4028, "step": 31851 }, { "epoch": 17.794413407821228, "grad_norm": 0.5444148182868958, "learning_rate": 0.00011098039215686274, "loss": 0.4486, "step": 31852 }, { "epoch": 17.794972067039105, "grad_norm": 0.3481050431728363, "learning_rate": 0.00011095238095238096, "loss": 0.3265, "step": 31853 }, { "epoch": 17.795530726256985, "grad_norm": 0.4157502353191376, "learning_rate": 0.00011092436974789915, "loss": 0.338, "step": 31854 }, { "epoch": 17.79608938547486, "grad_norm": 0.4834425449371338, "learning_rate": 0.00011089635854341737, "loss": 0.4843, "step": 31855 }, { "epoch": 17.796648044692738, "grad_norm": 0.4319273829460144, "learning_rate": 0.00011086834733893558, "loss": 0.3502, "step": 31856 }, { "epoch": 17.797206703910614, "grad_norm": 0.5160530805587769, "learning_rate": 0.00011084033613445378, "loss": 0.4258, "step": 31857 }, { "epoch": 17.79776536312849, "grad_norm": 0.37492382526397705, "learning_rate": 0.00011081232492997199, "loss": 0.3746, "step": 31858 }, { "epoch": 17.798324022346367, "grad_norm": 5.192200660705566, "learning_rate": 0.0001107843137254902, "loss": 0.4337, "step": 31859 }, { "epoch": 17.798882681564244, "grad_norm": 0.6172341108322144, "learning_rate": 0.0001107563025210084, "loss": 0.45, "step": 31860 }, { "epoch": 17.799441340782124, "grad_norm": 0.5336006879806519, "learning_rate": 0.00011072829131652662, "loss": 0.431, "step": 31861 }, { "epoch": 17.8, "grad_norm": 0.5429198741912842, "learning_rate": 0.00011070028011204481, "loss": 0.5343, "step": 31862 }, { "epoch": 17.800558659217877, "grad_norm": 0.6353127956390381, "learning_rate": 0.00011067226890756303, "loss": 0.5253, "step": 31863 }, { "epoch": 17.801117318435754, "grad_norm": 1.7069145441055298, "learning_rate": 0.00011064425770308123, "loss": 0.4263, "step": 31864 }, { "epoch": 17.80167597765363, "grad_norm": 0.5213304162025452, "learning_rate": 0.00011061624649859944, "loss": 0.5414, "step": 31865 }, { "epoch": 17.802234636871507, "grad_norm": 6.125345230102539, "learning_rate": 0.00011058823529411765, "loss": 0.3751, "step": 31866 }, { "epoch": 17.802793296089387, "grad_norm": 0.36859026551246643, "learning_rate": 0.00011056022408963586, "loss": 0.4566, "step": 31867 }, { "epoch": 17.803351955307264, "grad_norm": 0.4032394587993622, "learning_rate": 0.00011053221288515406, "loss": 0.489, "step": 31868 }, { "epoch": 17.80391061452514, "grad_norm": 0.45502305030822754, "learning_rate": 0.00011050420168067227, "loss": 0.4236, "step": 31869 }, { "epoch": 17.804469273743017, "grad_norm": 0.4770444631576538, "learning_rate": 0.00011047619047619047, "loss": 0.4102, "step": 31870 }, { "epoch": 17.805027932960893, "grad_norm": 0.3851770758628845, "learning_rate": 0.00011044817927170868, "loss": 0.333, "step": 31871 }, { "epoch": 17.80558659217877, "grad_norm": 0.4486406743526459, "learning_rate": 0.00011042016806722689, "loss": 0.3953, "step": 31872 }, { "epoch": 17.806145251396647, "grad_norm": 0.4979979395866394, "learning_rate": 0.0001103921568627451, "loss": 0.3086, "step": 31873 }, { "epoch": 17.806703910614527, "grad_norm": 0.43344834446907043, "learning_rate": 0.0001103641456582633, "loss": 0.2791, "step": 31874 }, { "epoch": 17.807262569832403, "grad_norm": 0.3375483751296997, "learning_rate": 0.00011033613445378152, "loss": 0.3637, "step": 31875 }, { "epoch": 17.80782122905028, "grad_norm": 0.5579739809036255, "learning_rate": 0.00011030812324929971, "loss": 0.3386, "step": 31876 }, { "epoch": 17.808379888268156, "grad_norm": 0.6381669640541077, "learning_rate": 0.00011028011204481793, "loss": 0.4795, "step": 31877 }, { "epoch": 17.808938547486033, "grad_norm": 0.5497968196868896, "learning_rate": 0.00011025210084033614, "loss": 0.5316, "step": 31878 }, { "epoch": 17.80949720670391, "grad_norm": 0.6413907408714294, "learning_rate": 0.00011022408963585434, "loss": 0.2878, "step": 31879 }, { "epoch": 17.810055865921786, "grad_norm": 0.3716050386428833, "learning_rate": 0.00011019607843137255, "loss": 0.3482, "step": 31880 }, { "epoch": 17.810614525139666, "grad_norm": 0.31638702750205994, "learning_rate": 0.00011016806722689075, "loss": 0.3785, "step": 31881 }, { "epoch": 17.811173184357543, "grad_norm": 1.0723401308059692, "learning_rate": 0.00011014005602240896, "loss": 0.3525, "step": 31882 }, { "epoch": 17.81173184357542, "grad_norm": 0.5220655202865601, "learning_rate": 0.00011011204481792718, "loss": 0.4249, "step": 31883 }, { "epoch": 17.812290502793296, "grad_norm": 0.6201823949813843, "learning_rate": 0.00011008403361344537, "loss": 0.3958, "step": 31884 }, { "epoch": 17.812849162011172, "grad_norm": 0.3612859845161438, "learning_rate": 0.00011005602240896359, "loss": 0.3515, "step": 31885 }, { "epoch": 17.81340782122905, "grad_norm": 0.6465184092521667, "learning_rate": 0.00011002801120448178, "loss": 0.4484, "step": 31886 }, { "epoch": 17.81396648044693, "grad_norm": 0.410290390253067, "learning_rate": 0.00011, "loss": 0.3302, "step": 31887 }, { "epoch": 17.814525139664806, "grad_norm": 0.7216017246246338, "learning_rate": 0.00010997198879551821, "loss": 0.6642, "step": 31888 }, { "epoch": 17.815083798882682, "grad_norm": 0.36545509099960327, "learning_rate": 0.00010994397759103642, "loss": 0.4407, "step": 31889 }, { "epoch": 17.81564245810056, "grad_norm": 0.5036261677742004, "learning_rate": 0.00010991596638655462, "loss": 0.4231, "step": 31890 }, { "epoch": 17.816201117318435, "grad_norm": 0.6891294717788696, "learning_rate": 0.00010988795518207283, "loss": 0.5012, "step": 31891 }, { "epoch": 17.816759776536312, "grad_norm": 0.5240914821624756, "learning_rate": 0.00010985994397759103, "loss": 0.3678, "step": 31892 }, { "epoch": 17.81731843575419, "grad_norm": 0.5042918920516968, "learning_rate": 0.00010983193277310925, "loss": 0.4614, "step": 31893 }, { "epoch": 17.81787709497207, "grad_norm": 0.6323612332344055, "learning_rate": 0.00010980392156862745, "loss": 0.3963, "step": 31894 }, { "epoch": 17.818435754189945, "grad_norm": 0.36174264550209045, "learning_rate": 0.00010977591036414567, "loss": 0.4666, "step": 31895 }, { "epoch": 17.81899441340782, "grad_norm": 0.42684587836265564, "learning_rate": 0.00010974789915966386, "loss": 0.4552, "step": 31896 }, { "epoch": 17.8195530726257, "grad_norm": 0.5217669010162354, "learning_rate": 0.00010971988795518208, "loss": 0.5117, "step": 31897 }, { "epoch": 17.820111731843575, "grad_norm": 0.5353214144706726, "learning_rate": 0.00010969187675070027, "loss": 0.4298, "step": 31898 }, { "epoch": 17.82067039106145, "grad_norm": 0.41908028721809387, "learning_rate": 0.00010966386554621849, "loss": 0.3252, "step": 31899 }, { "epoch": 17.821229050279328, "grad_norm": 2.6000418663024902, "learning_rate": 0.0001096358543417367, "loss": 0.4494, "step": 31900 }, { "epoch": 17.821787709497208, "grad_norm": 0.9905964732170105, "learning_rate": 0.0001096078431372549, "loss": 0.2856, "step": 31901 }, { "epoch": 17.822346368715085, "grad_norm": 0.5244598388671875, "learning_rate": 0.00010957983193277311, "loss": 0.4672, "step": 31902 }, { "epoch": 17.82290502793296, "grad_norm": 0.34664949774742126, "learning_rate": 0.00010955182072829131, "loss": 0.3729, "step": 31903 }, { "epoch": 17.823463687150838, "grad_norm": 0.3882288634777069, "learning_rate": 0.00010952380952380952, "loss": 0.3285, "step": 31904 }, { "epoch": 17.824022346368714, "grad_norm": 0.5065073370933533, "learning_rate": 0.00010949579831932774, "loss": 0.4638, "step": 31905 }, { "epoch": 17.82458100558659, "grad_norm": 0.41068363189697266, "learning_rate": 0.00010946778711484593, "loss": 0.3416, "step": 31906 }, { "epoch": 17.825139664804468, "grad_norm": 0.31750819087028503, "learning_rate": 0.00010943977591036415, "loss": 0.3399, "step": 31907 }, { "epoch": 17.825698324022348, "grad_norm": 0.8183224201202393, "learning_rate": 0.00010941176470588234, "loss": 0.3684, "step": 31908 }, { "epoch": 17.826256983240224, "grad_norm": 0.5211698412895203, "learning_rate": 0.00010938375350140056, "loss": 0.4143, "step": 31909 }, { "epoch": 17.8268156424581, "grad_norm": 2.1088993549346924, "learning_rate": 0.00010935574229691877, "loss": 0.4852, "step": 31910 }, { "epoch": 17.827374301675977, "grad_norm": 0.5901950001716614, "learning_rate": 0.00010932773109243697, "loss": 0.4276, "step": 31911 }, { "epoch": 17.827932960893854, "grad_norm": 0.5979985594749451, "learning_rate": 0.00010929971988795518, "loss": 0.4371, "step": 31912 }, { "epoch": 17.82849162011173, "grad_norm": 0.39830952882766724, "learning_rate": 0.00010927170868347339, "loss": 0.3631, "step": 31913 }, { "epoch": 17.82905027932961, "grad_norm": 0.43950772285461426, "learning_rate": 0.00010924369747899159, "loss": 0.374, "step": 31914 }, { "epoch": 17.829608938547487, "grad_norm": 0.4283125102519989, "learning_rate": 0.00010921568627450981, "loss": 0.3383, "step": 31915 }, { "epoch": 17.830167597765364, "grad_norm": 0.47540703415870667, "learning_rate": 0.000109187675070028, "loss": 0.4599, "step": 31916 }, { "epoch": 17.83072625698324, "grad_norm": 0.3979719579219818, "learning_rate": 0.00010915966386554622, "loss": 0.2708, "step": 31917 }, { "epoch": 17.831284916201117, "grad_norm": 0.43210309743881226, "learning_rate": 0.00010913165266106442, "loss": 0.4322, "step": 31918 }, { "epoch": 17.831843575418993, "grad_norm": 2.098341226577759, "learning_rate": 0.00010910364145658264, "loss": 0.4979, "step": 31919 }, { "epoch": 17.83240223463687, "grad_norm": 1.82694673538208, "learning_rate": 0.00010907563025210086, "loss": 0.3639, "step": 31920 }, { "epoch": 17.83296089385475, "grad_norm": 1.404898762702942, "learning_rate": 0.00010904761904761905, "loss": 0.3612, "step": 31921 }, { "epoch": 17.833519553072627, "grad_norm": 0.3981015980243683, "learning_rate": 0.00010901960784313727, "loss": 0.4262, "step": 31922 }, { "epoch": 17.834078212290503, "grad_norm": 10.760833740234375, "learning_rate": 0.00010899159663865546, "loss": 0.4608, "step": 31923 }, { "epoch": 17.83463687150838, "grad_norm": 1.3174183368682861, "learning_rate": 0.00010896358543417368, "loss": 0.425, "step": 31924 }, { "epoch": 17.835195530726256, "grad_norm": 0.4950545132160187, "learning_rate": 0.00010893557422969187, "loss": 0.3357, "step": 31925 }, { "epoch": 17.835754189944133, "grad_norm": 0.46289488673210144, "learning_rate": 0.00010890756302521009, "loss": 0.5155, "step": 31926 }, { "epoch": 17.83631284916201, "grad_norm": 0.4915810823440552, "learning_rate": 0.0001088795518207283, "loss": 0.3832, "step": 31927 }, { "epoch": 17.83687150837989, "grad_norm": 0.4239140748977661, "learning_rate": 0.0001088515406162465, "loss": 0.3538, "step": 31928 }, { "epoch": 17.837430167597766, "grad_norm": 0.4773687720298767, "learning_rate": 0.00010882352941176471, "loss": 0.4977, "step": 31929 }, { "epoch": 17.837988826815643, "grad_norm": 0.4510042667388916, "learning_rate": 0.00010879551820728292, "loss": 0.494, "step": 31930 }, { "epoch": 17.83854748603352, "grad_norm": 0.363717645406723, "learning_rate": 0.00010876750700280112, "loss": 0.4335, "step": 31931 }, { "epoch": 17.839106145251396, "grad_norm": 0.36441436409950256, "learning_rate": 0.00010873949579831934, "loss": 0.3809, "step": 31932 }, { "epoch": 17.839664804469272, "grad_norm": 0.4575157165527344, "learning_rate": 0.00010871148459383753, "loss": 0.4163, "step": 31933 }, { "epoch": 17.840223463687153, "grad_norm": 0.3995513617992401, "learning_rate": 0.00010868347338935575, "loss": 0.3774, "step": 31934 }, { "epoch": 17.84078212290503, "grad_norm": 0.46553125977516174, "learning_rate": 0.00010865546218487395, "loss": 0.4469, "step": 31935 }, { "epoch": 17.841340782122906, "grad_norm": 0.5818611979484558, "learning_rate": 0.00010862745098039217, "loss": 0.5001, "step": 31936 }, { "epoch": 17.841899441340782, "grad_norm": 0.5967891216278076, "learning_rate": 0.00010859943977591037, "loss": 0.4058, "step": 31937 }, { "epoch": 17.84245810055866, "grad_norm": 0.3642972707748413, "learning_rate": 0.00010857142857142858, "loss": 0.4445, "step": 31938 }, { "epoch": 17.843016759776535, "grad_norm": 0.7218237519264221, "learning_rate": 0.00010854341736694678, "loss": 0.3182, "step": 31939 }, { "epoch": 17.843575418994412, "grad_norm": 0.49845090508461, "learning_rate": 0.00010851540616246499, "loss": 0.3442, "step": 31940 }, { "epoch": 17.844134078212292, "grad_norm": 0.804343044757843, "learning_rate": 0.0001084873949579832, "loss": 0.3806, "step": 31941 }, { "epoch": 17.84469273743017, "grad_norm": 0.545930802822113, "learning_rate": 0.00010845938375350141, "loss": 0.5129, "step": 31942 }, { "epoch": 17.845251396648045, "grad_norm": 1.05837082862854, "learning_rate": 0.00010843137254901961, "loss": 0.4157, "step": 31943 }, { "epoch": 17.845810055865922, "grad_norm": 0.45503243803977966, "learning_rate": 0.00010840336134453783, "loss": 0.3442, "step": 31944 }, { "epoch": 17.8463687150838, "grad_norm": 0.5501124858856201, "learning_rate": 0.00010837535014005602, "loss": 0.3084, "step": 31945 }, { "epoch": 17.846927374301675, "grad_norm": 0.3977105915546417, "learning_rate": 0.00010834733893557424, "loss": 0.462, "step": 31946 }, { "epoch": 17.84748603351955, "grad_norm": 0.5938095450401306, "learning_rate": 0.00010831932773109244, "loss": 0.5859, "step": 31947 }, { "epoch": 17.84804469273743, "grad_norm": 0.4917389452457428, "learning_rate": 0.00010829131652661065, "loss": 0.3658, "step": 31948 }, { "epoch": 17.84860335195531, "grad_norm": 11.339869499206543, "learning_rate": 0.00010826330532212886, "loss": 0.4489, "step": 31949 }, { "epoch": 17.849162011173185, "grad_norm": 0.9019293785095215, "learning_rate": 0.00010823529411764706, "loss": 0.5327, "step": 31950 }, { "epoch": 17.84972067039106, "grad_norm": 0.5398561358451843, "learning_rate": 0.00010820728291316527, "loss": 0.488, "step": 31951 }, { "epoch": 17.850279329608938, "grad_norm": 2.950305938720703, "learning_rate": 0.00010817927170868347, "loss": 0.3323, "step": 31952 }, { "epoch": 17.850837988826814, "grad_norm": 0.4664683938026428, "learning_rate": 0.00010815126050420168, "loss": 0.4722, "step": 31953 }, { "epoch": 17.85139664804469, "grad_norm": 0.8299731612205505, "learning_rate": 0.0001081232492997199, "loss": 0.4752, "step": 31954 }, { "epoch": 17.85195530726257, "grad_norm": 0.39772647619247437, "learning_rate": 0.00010809523809523809, "loss": 0.4135, "step": 31955 }, { "epoch": 17.852513966480448, "grad_norm": 0.9067767262458801, "learning_rate": 0.00010806722689075631, "loss": 0.4144, "step": 31956 }, { "epoch": 17.853072625698324, "grad_norm": 0.4373841881752014, "learning_rate": 0.0001080392156862745, "loss": 0.4244, "step": 31957 }, { "epoch": 17.8536312849162, "grad_norm": 0.6203702092170715, "learning_rate": 0.00010801120448179272, "loss": 0.5448, "step": 31958 }, { "epoch": 17.854189944134077, "grad_norm": 0.40931329131126404, "learning_rate": 0.00010798319327731093, "loss": 0.34, "step": 31959 }, { "epoch": 17.854748603351954, "grad_norm": 0.42782947421073914, "learning_rate": 0.00010795518207282914, "loss": 0.4218, "step": 31960 }, { "epoch": 17.85530726256983, "grad_norm": 0.6662193536758423, "learning_rate": 0.00010792717086834734, "loss": 0.5633, "step": 31961 }, { "epoch": 17.85586592178771, "grad_norm": 0.5704799890518188, "learning_rate": 0.00010789915966386555, "loss": 0.3347, "step": 31962 }, { "epoch": 17.856424581005587, "grad_norm": 1.324694275856018, "learning_rate": 0.00010787114845938375, "loss": 0.4297, "step": 31963 }, { "epoch": 17.856983240223464, "grad_norm": 0.4512801468372345, "learning_rate": 0.00010784313725490197, "loss": 0.2948, "step": 31964 }, { "epoch": 17.85754189944134, "grad_norm": 0.9972048401832581, "learning_rate": 0.00010781512605042017, "loss": 0.4534, "step": 31965 }, { "epoch": 17.858100558659217, "grad_norm": 0.4238443076610565, "learning_rate": 0.00010778711484593839, "loss": 0.3982, "step": 31966 }, { "epoch": 17.858659217877094, "grad_norm": 0.5718919038772583, "learning_rate": 0.00010775910364145658, "loss": 0.4961, "step": 31967 }, { "epoch": 17.859217877094974, "grad_norm": 1.6888715028762817, "learning_rate": 0.0001077310924369748, "loss": 0.4457, "step": 31968 }, { "epoch": 17.85977653631285, "grad_norm": 0.49498414993286133, "learning_rate": 0.000107703081232493, "loss": 0.542, "step": 31969 }, { "epoch": 17.860335195530727, "grad_norm": 3.436616897583008, "learning_rate": 0.00010767507002801121, "loss": 0.4906, "step": 31970 }, { "epoch": 17.860893854748603, "grad_norm": 0.4372938275337219, "learning_rate": 0.00010764705882352942, "loss": 0.4205, "step": 31971 }, { "epoch": 17.86145251396648, "grad_norm": 0.4418832063674927, "learning_rate": 0.00010761904761904762, "loss": 0.3754, "step": 31972 }, { "epoch": 17.862011173184356, "grad_norm": 0.9103538393974304, "learning_rate": 0.00010759103641456583, "loss": 0.5945, "step": 31973 }, { "epoch": 17.862569832402233, "grad_norm": 0.9872821569442749, "learning_rate": 0.00010756302521008405, "loss": 0.4801, "step": 31974 }, { "epoch": 17.863128491620113, "grad_norm": 0.45777133107185364, "learning_rate": 0.00010753501400560224, "loss": 0.381, "step": 31975 }, { "epoch": 17.86368715083799, "grad_norm": 0.36995068192481995, "learning_rate": 0.00010750700280112046, "loss": 0.3555, "step": 31976 }, { "epoch": 17.864245810055866, "grad_norm": 0.6736024022102356, "learning_rate": 0.00010747899159663865, "loss": 0.4196, "step": 31977 }, { "epoch": 17.864804469273743, "grad_norm": 0.7733534574508667, "learning_rate": 0.00010745098039215687, "loss": 0.5822, "step": 31978 }, { "epoch": 17.86536312849162, "grad_norm": 0.47300904989242554, "learning_rate": 0.00010742296918767508, "loss": 0.463, "step": 31979 }, { "epoch": 17.865921787709496, "grad_norm": 0.4311671853065491, "learning_rate": 0.00010739495798319328, "loss": 0.3113, "step": 31980 }, { "epoch": 17.866480446927373, "grad_norm": 0.5093075633049011, "learning_rate": 0.00010736694677871149, "loss": 0.3407, "step": 31981 }, { "epoch": 17.867039106145253, "grad_norm": 0.4941078722476959, "learning_rate": 0.0001073389355742297, "loss": 0.4441, "step": 31982 }, { "epoch": 17.86759776536313, "grad_norm": 0.7564123272895813, "learning_rate": 0.0001073109243697479, "loss": 0.3644, "step": 31983 }, { "epoch": 17.868156424581006, "grad_norm": 0.4897879660129547, "learning_rate": 0.00010728291316526611, "loss": 0.5605, "step": 31984 }, { "epoch": 17.868715083798882, "grad_norm": 0.36037081480026245, "learning_rate": 0.00010725490196078431, "loss": 0.4279, "step": 31985 }, { "epoch": 17.86927374301676, "grad_norm": 1.0760464668273926, "learning_rate": 0.00010722689075630253, "loss": 0.4798, "step": 31986 }, { "epoch": 17.869832402234636, "grad_norm": 0.39917442202568054, "learning_rate": 0.00010719887955182073, "loss": 0.3588, "step": 31987 }, { "epoch": 17.870391061452516, "grad_norm": 0.33753713965415955, "learning_rate": 0.00010717086834733894, "loss": 0.3469, "step": 31988 }, { "epoch": 17.870949720670392, "grad_norm": 0.5130229592323303, "learning_rate": 0.00010714285714285714, "loss": 0.431, "step": 31989 }, { "epoch": 17.87150837988827, "grad_norm": 0.41078999638557434, "learning_rate": 0.00010711484593837536, "loss": 0.44, "step": 31990 }, { "epoch": 17.872067039106145, "grad_norm": 1.2140178680419922, "learning_rate": 0.00010708683473389356, "loss": 0.3265, "step": 31991 }, { "epoch": 17.872625698324022, "grad_norm": 1.0050920248031616, "learning_rate": 0.00010705882352941177, "loss": 0.3698, "step": 31992 }, { "epoch": 17.8731843575419, "grad_norm": 0.5039964318275452, "learning_rate": 0.00010703081232492997, "loss": 0.4237, "step": 31993 }, { "epoch": 17.873743016759775, "grad_norm": 0.393161416053772, "learning_rate": 0.00010700280112044818, "loss": 0.3062, "step": 31994 }, { "epoch": 17.874301675977655, "grad_norm": 0.7035702466964722, "learning_rate": 0.00010697478991596639, "loss": 0.4219, "step": 31995 }, { "epoch": 17.87486033519553, "grad_norm": 0.40759238600730896, "learning_rate": 0.0001069467787114846, "loss": 0.3632, "step": 31996 }, { "epoch": 17.87541899441341, "grad_norm": 0.41441529989242554, "learning_rate": 0.0001069187675070028, "loss": 0.3941, "step": 31997 }, { "epoch": 17.875977653631285, "grad_norm": 0.36333101987838745, "learning_rate": 0.00010689075630252102, "loss": 0.4258, "step": 31998 }, { "epoch": 17.87653631284916, "grad_norm": 0.34713229537010193, "learning_rate": 0.00010686274509803921, "loss": 0.3317, "step": 31999 }, { "epoch": 17.877094972067038, "grad_norm": 0.955400824546814, "learning_rate": 0.00010683473389355743, "loss": 0.4111, "step": 32000 }, { "epoch": 17.877094972067038, "eval_cer": 0.08432893631414139, "eval_loss": 0.3200739026069641, "eval_runtime": 55.6201, "eval_samples_per_second": 81.589, "eval_steps_per_second": 5.106, "eval_wer": 0.33325884751822576, "step": 32000 }, { "epoch": 17.877653631284915, "grad_norm": 0.4626900851726532, "learning_rate": 0.00010680672268907564, "loss": 0.4202, "step": 32001 }, { "epoch": 17.878212290502795, "grad_norm": 0.44135206937789917, "learning_rate": 0.00010677871148459384, "loss": 0.344, "step": 32002 }, { "epoch": 17.87877094972067, "grad_norm": 0.4019858241081238, "learning_rate": 0.00010675070028011205, "loss": 0.288, "step": 32003 }, { "epoch": 17.879329608938548, "grad_norm": 0.4524330496788025, "learning_rate": 0.00010672268907563025, "loss": 0.3672, "step": 32004 }, { "epoch": 17.879888268156424, "grad_norm": 0.6566335558891296, "learning_rate": 0.00010669467787114846, "loss": 0.4763, "step": 32005 }, { "epoch": 17.8804469273743, "grad_norm": 1.3326184749603271, "learning_rate": 0.00010666666666666668, "loss": 0.5798, "step": 32006 }, { "epoch": 17.881005586592178, "grad_norm": 25.050582885742188, "learning_rate": 0.00010663865546218487, "loss": 0.3253, "step": 32007 }, { "epoch": 17.881564245810054, "grad_norm": 0.5367122888565063, "learning_rate": 0.00010661064425770309, "loss": 0.5938, "step": 32008 }, { "epoch": 17.882122905027934, "grad_norm": 0.39557814598083496, "learning_rate": 0.00010658263305322128, "loss": 0.4841, "step": 32009 }, { "epoch": 17.88268156424581, "grad_norm": 9.567939758300781, "learning_rate": 0.0001065546218487395, "loss": 0.4048, "step": 32010 }, { "epoch": 17.883240223463687, "grad_norm": 0.53277587890625, "learning_rate": 0.0001065266106442577, "loss": 0.6215, "step": 32011 }, { "epoch": 17.883798882681564, "grad_norm": 0.419577032327652, "learning_rate": 0.00010649859943977592, "loss": 0.4554, "step": 32012 }, { "epoch": 17.88435754189944, "grad_norm": 0.4224916994571686, "learning_rate": 0.00010647058823529412, "loss": 0.3955, "step": 32013 }, { "epoch": 17.884916201117317, "grad_norm": 0.8175129294395447, "learning_rate": 0.00010644257703081233, "loss": 0.5371, "step": 32014 }, { "epoch": 17.885474860335197, "grad_norm": 0.4805639684200287, "learning_rate": 0.00010641456582633053, "loss": 0.3878, "step": 32015 }, { "epoch": 17.886033519553074, "grad_norm": 0.450518935918808, "learning_rate": 0.00010638655462184874, "loss": 0.3436, "step": 32016 }, { "epoch": 17.88659217877095, "grad_norm": 0.43353959918022156, "learning_rate": 0.00010635854341736695, "loss": 0.3683, "step": 32017 }, { "epoch": 17.887150837988827, "grad_norm": 0.5608108043670654, "learning_rate": 0.00010633053221288517, "loss": 0.5416, "step": 32018 }, { "epoch": 17.887709497206703, "grad_norm": 0.538255512714386, "learning_rate": 0.00010630252100840336, "loss": 0.3851, "step": 32019 }, { "epoch": 17.88826815642458, "grad_norm": 0.4306880533695221, "learning_rate": 0.00010627450980392158, "loss": 0.3317, "step": 32020 }, { "epoch": 17.888826815642457, "grad_norm": 0.6945005655288696, "learning_rate": 0.00010624649859943977, "loss": 0.4168, "step": 32021 }, { "epoch": 17.889385474860337, "grad_norm": 0.45939913392066956, "learning_rate": 0.00010621848739495799, "loss": 0.5134, "step": 32022 }, { "epoch": 17.889944134078213, "grad_norm": 0.5442972183227539, "learning_rate": 0.0001061904761904762, "loss": 0.453, "step": 32023 }, { "epoch": 17.89050279329609, "grad_norm": 0.5479558110237122, "learning_rate": 0.0001061624649859944, "loss": 0.4877, "step": 32024 }, { "epoch": 17.891061452513966, "grad_norm": 2.9534685611724854, "learning_rate": 0.00010613445378151261, "loss": 0.4438, "step": 32025 }, { "epoch": 17.891620111731843, "grad_norm": 0.38486379384994507, "learning_rate": 0.00010610644257703081, "loss": 0.3996, "step": 32026 }, { "epoch": 17.89217877094972, "grad_norm": 0.4887557327747345, "learning_rate": 0.00010607843137254902, "loss": 0.3762, "step": 32027 }, { "epoch": 17.892737430167596, "grad_norm": 0.40388065576553345, "learning_rate": 0.00010605042016806724, "loss": 0.4619, "step": 32028 }, { "epoch": 17.893296089385476, "grad_norm": 0.5143378376960754, "learning_rate": 0.00010602240896358543, "loss": 0.3742, "step": 32029 }, { "epoch": 17.893854748603353, "grad_norm": 0.3665013611316681, "learning_rate": 0.00010599439775910365, "loss": 0.4007, "step": 32030 }, { "epoch": 17.89441340782123, "grad_norm": 0.3822100758552551, "learning_rate": 0.00010596638655462184, "loss": 0.3761, "step": 32031 }, { "epoch": 17.894972067039106, "grad_norm": 0.5564274191856384, "learning_rate": 0.00010593837535014006, "loss": 0.3406, "step": 32032 }, { "epoch": 17.895530726256982, "grad_norm": 0.7217164635658264, "learning_rate": 0.00010591036414565827, "loss": 0.3972, "step": 32033 }, { "epoch": 17.89608938547486, "grad_norm": 3.5297625064849854, "learning_rate": 0.00010588235294117647, "loss": 0.4178, "step": 32034 }, { "epoch": 17.89664804469274, "grad_norm": 0.5031899809837341, "learning_rate": 0.00010585434173669468, "loss": 0.4507, "step": 32035 }, { "epoch": 17.897206703910616, "grad_norm": 1.3595974445343018, "learning_rate": 0.00010582633053221289, "loss": 0.3222, "step": 32036 }, { "epoch": 17.897765363128492, "grad_norm": 1.0752272605895996, "learning_rate": 0.00010579831932773109, "loss": 0.3487, "step": 32037 }, { "epoch": 17.89832402234637, "grad_norm": 0.34939366579055786, "learning_rate": 0.0001057703081232493, "loss": 0.3818, "step": 32038 }, { "epoch": 17.898882681564245, "grad_norm": 0.8856455683708191, "learning_rate": 0.0001057422969187675, "loss": 0.3968, "step": 32039 }, { "epoch": 17.899441340782122, "grad_norm": 0.5000383257865906, "learning_rate": 0.00010571428571428572, "loss": 0.3757, "step": 32040 }, { "epoch": 17.9, "grad_norm": 0.5492093563079834, "learning_rate": 0.00010568627450980392, "loss": 0.3876, "step": 32041 }, { "epoch": 17.90055865921788, "grad_norm": 1.3750840425491333, "learning_rate": 0.00010565826330532214, "loss": 0.3885, "step": 32042 }, { "epoch": 17.901117318435755, "grad_norm": 0.40190553665161133, "learning_rate": 0.00010563025210084033, "loss": 0.3729, "step": 32043 }, { "epoch": 17.901675977653632, "grad_norm": 0.40007996559143066, "learning_rate": 0.00010560224089635855, "loss": 0.4331, "step": 32044 }, { "epoch": 17.90223463687151, "grad_norm": 0.4040498435497284, "learning_rate": 0.00010557422969187675, "loss": 0.3885, "step": 32045 }, { "epoch": 17.902793296089385, "grad_norm": 0.5825757384300232, "learning_rate": 0.00010554621848739496, "loss": 0.3931, "step": 32046 }, { "epoch": 17.90335195530726, "grad_norm": 0.7963008880615234, "learning_rate": 0.00010551820728291317, "loss": 0.2815, "step": 32047 }, { "epoch": 17.903910614525138, "grad_norm": 0.3657061755657196, "learning_rate": 0.00010549019607843137, "loss": 0.2593, "step": 32048 }, { "epoch": 17.904469273743018, "grad_norm": 0.437903493642807, "learning_rate": 0.00010546218487394958, "loss": 0.4883, "step": 32049 }, { "epoch": 17.905027932960895, "grad_norm": 0.3962153494358063, "learning_rate": 0.0001054341736694678, "loss": 0.3808, "step": 32050 }, { "epoch": 17.90558659217877, "grad_norm": 0.554152250289917, "learning_rate": 0.00010540616246498599, "loss": 0.4805, "step": 32051 }, { "epoch": 17.906145251396648, "grad_norm": 3.1426029205322266, "learning_rate": 0.00010537815126050421, "loss": 0.4685, "step": 32052 }, { "epoch": 17.906703910614524, "grad_norm": 0.34781739115715027, "learning_rate": 0.0001053501400560224, "loss": 0.4164, "step": 32053 }, { "epoch": 17.9072625698324, "grad_norm": 0.45156776905059814, "learning_rate": 0.00010532212885154062, "loss": 0.3965, "step": 32054 }, { "epoch": 17.907821229050278, "grad_norm": 0.37126877903938293, "learning_rate": 0.00010529411764705883, "loss": 0.4043, "step": 32055 }, { "epoch": 17.908379888268158, "grad_norm": 0.5230785012245178, "learning_rate": 0.00010526610644257703, "loss": 0.3767, "step": 32056 }, { "epoch": 17.908938547486034, "grad_norm": 0.3856871426105499, "learning_rate": 0.00010523809523809524, "loss": 0.3922, "step": 32057 }, { "epoch": 17.90949720670391, "grad_norm": 0.46022406220436096, "learning_rate": 0.00010521008403361345, "loss": 0.4806, "step": 32058 }, { "epoch": 17.910055865921787, "grad_norm": 2.3201136589050293, "learning_rate": 0.00010518207282913165, "loss": 0.3462, "step": 32059 }, { "epoch": 17.910614525139664, "grad_norm": 0.429340124130249, "learning_rate": 0.00010515406162464987, "loss": 0.3291, "step": 32060 }, { "epoch": 17.91117318435754, "grad_norm": 0.5402361750602722, "learning_rate": 0.00010512605042016806, "loss": 0.4225, "step": 32061 }, { "epoch": 17.91173184357542, "grad_norm": 0.5591565370559692, "learning_rate": 0.00010509803921568628, "loss": 0.4203, "step": 32062 }, { "epoch": 17.912290502793297, "grad_norm": 3.564390182495117, "learning_rate": 0.00010507002801120448, "loss": 0.4272, "step": 32063 }, { "epoch": 17.912849162011174, "grad_norm": 1.6119402647018433, "learning_rate": 0.0001050420168067227, "loss": 0.4188, "step": 32064 }, { "epoch": 17.91340782122905, "grad_norm": 0.3713831603527069, "learning_rate": 0.00010501400560224089, "loss": 0.387, "step": 32065 }, { "epoch": 17.913966480446927, "grad_norm": 0.5590749382972717, "learning_rate": 0.00010498599439775911, "loss": 0.3493, "step": 32066 }, { "epoch": 17.914525139664804, "grad_norm": 0.6645670533180237, "learning_rate": 0.00010495798319327731, "loss": 0.4138, "step": 32067 }, { "epoch": 17.91508379888268, "grad_norm": 0.7421476244926453, "learning_rate": 0.00010492997198879552, "loss": 0.4337, "step": 32068 }, { "epoch": 17.91564245810056, "grad_norm": 0.4942847490310669, "learning_rate": 0.00010490196078431373, "loss": 0.441, "step": 32069 }, { "epoch": 17.916201117318437, "grad_norm": 0.3272298574447632, "learning_rate": 0.00010487394957983193, "loss": 0.3237, "step": 32070 }, { "epoch": 17.916759776536313, "grad_norm": 0.4406721293926239, "learning_rate": 0.00010484593837535014, "loss": 0.4312, "step": 32071 }, { "epoch": 17.91731843575419, "grad_norm": 0.35039588809013367, "learning_rate": 0.00010481792717086836, "loss": 0.4062, "step": 32072 }, { "epoch": 17.917877094972066, "grad_norm": 0.4460091292858124, "learning_rate": 0.00010478991596638655, "loss": 0.421, "step": 32073 }, { "epoch": 17.918435754189943, "grad_norm": 0.35254159569740295, "learning_rate": 0.00010476190476190477, "loss": 0.3262, "step": 32074 }, { "epoch": 17.91899441340782, "grad_norm": 0.5050002932548523, "learning_rate": 0.00010473389355742296, "loss": 0.3445, "step": 32075 }, { "epoch": 17.9195530726257, "grad_norm": 0.5239331126213074, "learning_rate": 0.00010470588235294118, "loss": 0.403, "step": 32076 }, { "epoch": 17.920111731843576, "grad_norm": 0.3262229263782501, "learning_rate": 0.00010467787114845939, "loss": 0.3974, "step": 32077 }, { "epoch": 17.920670391061453, "grad_norm": 6.374235153198242, "learning_rate": 0.00010464985994397759, "loss": 0.397, "step": 32078 }, { "epoch": 17.92122905027933, "grad_norm": 0.44188499450683594, "learning_rate": 0.0001046218487394958, "loss": 0.424, "step": 32079 }, { "epoch": 17.921787709497206, "grad_norm": 0.4864714443683624, "learning_rate": 0.000104593837535014, "loss": 0.3611, "step": 32080 }, { "epoch": 17.922346368715083, "grad_norm": 0.9476110339164734, "learning_rate": 0.00010456582633053221, "loss": 0.3961, "step": 32081 }, { "epoch": 17.922905027932963, "grad_norm": 0.4864995777606964, "learning_rate": 0.00010453781512605043, "loss": 0.3896, "step": 32082 }, { "epoch": 17.92346368715084, "grad_norm": 0.5234395265579224, "learning_rate": 0.00010450980392156862, "loss": 0.3804, "step": 32083 }, { "epoch": 17.924022346368716, "grad_norm": 0.3916802704334259, "learning_rate": 0.00010448179271708684, "loss": 0.323, "step": 32084 }, { "epoch": 17.924581005586592, "grad_norm": 0.5071191787719727, "learning_rate": 0.00010445378151260503, "loss": 0.554, "step": 32085 }, { "epoch": 17.92513966480447, "grad_norm": 0.9988527894020081, "learning_rate": 0.00010442577030812325, "loss": 0.4345, "step": 32086 }, { "epoch": 17.925698324022346, "grad_norm": 0.37349894642829895, "learning_rate": 0.00010439775910364146, "loss": 0.3474, "step": 32087 }, { "epoch": 17.926256983240222, "grad_norm": 0.5939425826072693, "learning_rate": 0.00010436974789915967, "loss": 0.435, "step": 32088 }, { "epoch": 17.926815642458102, "grad_norm": 0.46381595730781555, "learning_rate": 0.00010434173669467787, "loss": 0.5452, "step": 32089 }, { "epoch": 17.92737430167598, "grad_norm": 0.9660611152648926, "learning_rate": 0.00010431372549019608, "loss": 0.3852, "step": 32090 }, { "epoch": 17.927932960893855, "grad_norm": 0.40508294105529785, "learning_rate": 0.00010428571428571428, "loss": 0.4066, "step": 32091 }, { "epoch": 17.928491620111732, "grad_norm": 1.4999148845672607, "learning_rate": 0.00010425770308123249, "loss": 0.383, "step": 32092 }, { "epoch": 17.92905027932961, "grad_norm": 0.41329383850097656, "learning_rate": 0.0001042296918767507, "loss": 0.4594, "step": 32093 }, { "epoch": 17.929608938547485, "grad_norm": 0.8257381916046143, "learning_rate": 0.00010420168067226892, "loss": 0.57, "step": 32094 }, { "epoch": 17.93016759776536, "grad_norm": 0.5611152052879333, "learning_rate": 0.00010417366946778711, "loss": 0.3729, "step": 32095 }, { "epoch": 17.93072625698324, "grad_norm": 0.5553175806999207, "learning_rate": 0.00010414565826330533, "loss": 0.491, "step": 32096 }, { "epoch": 17.93128491620112, "grad_norm": 0.5101818442344666, "learning_rate": 0.00010411764705882352, "loss": 0.4563, "step": 32097 }, { "epoch": 17.931843575418995, "grad_norm": 0.42898306250572205, "learning_rate": 0.00010408963585434174, "loss": 0.3305, "step": 32098 }, { "epoch": 17.93240223463687, "grad_norm": 0.5169159770011902, "learning_rate": 0.00010406162464985995, "loss": 0.4026, "step": 32099 }, { "epoch": 17.932960893854748, "grad_norm": 0.40212827920913696, "learning_rate": 0.00010403361344537815, "loss": 0.4071, "step": 32100 }, { "epoch": 17.933519553072625, "grad_norm": 0.5036874413490295, "learning_rate": 0.00010400560224089636, "loss": 0.3974, "step": 32101 }, { "epoch": 17.9340782122905, "grad_norm": 0.48071178793907166, "learning_rate": 0.00010397759103641456, "loss": 0.4066, "step": 32102 }, { "epoch": 17.93463687150838, "grad_norm": 4.051095008850098, "learning_rate": 0.00010394957983193277, "loss": 0.4658, "step": 32103 }, { "epoch": 17.935195530726258, "grad_norm": 0.5427535176277161, "learning_rate": 0.00010392156862745099, "loss": 0.48, "step": 32104 }, { "epoch": 17.935754189944134, "grad_norm": 0.41416987776756287, "learning_rate": 0.00010389355742296918, "loss": 0.3215, "step": 32105 }, { "epoch": 17.93631284916201, "grad_norm": 10.936762809753418, "learning_rate": 0.0001038655462184874, "loss": 0.3667, "step": 32106 }, { "epoch": 17.936871508379888, "grad_norm": 0.6484376192092896, "learning_rate": 0.0001038375350140056, "loss": 0.4732, "step": 32107 }, { "epoch": 17.937430167597764, "grad_norm": 1.140391230583191, "learning_rate": 0.00010380952380952381, "loss": 0.4438, "step": 32108 }, { "epoch": 17.93798882681564, "grad_norm": 0.38668641448020935, "learning_rate": 0.00010378151260504202, "loss": 0.3633, "step": 32109 }, { "epoch": 17.93854748603352, "grad_norm": 0.5950167775154114, "learning_rate": 0.00010375350140056023, "loss": 0.411, "step": 32110 }, { "epoch": 17.939106145251397, "grad_norm": 0.47897446155548096, "learning_rate": 0.00010372549019607843, "loss": 0.3484, "step": 32111 }, { "epoch": 17.939664804469274, "grad_norm": 1.1663424968719482, "learning_rate": 0.00010369747899159664, "loss": 0.3879, "step": 32112 }, { "epoch": 17.94022346368715, "grad_norm": 0.37962576746940613, "learning_rate": 0.00010366946778711484, "loss": 0.3883, "step": 32113 }, { "epoch": 17.940782122905027, "grad_norm": 0.35679253935813904, "learning_rate": 0.00010364145658263306, "loss": 0.4137, "step": 32114 }, { "epoch": 17.941340782122904, "grad_norm": 0.5075446963310242, "learning_rate": 0.00010361344537815126, "loss": 0.3422, "step": 32115 }, { "epoch": 17.941899441340784, "grad_norm": 0.5077576041221619, "learning_rate": 0.00010358543417366947, "loss": 0.345, "step": 32116 }, { "epoch": 17.94245810055866, "grad_norm": 0.530564546585083, "learning_rate": 0.00010355742296918767, "loss": 0.3702, "step": 32117 }, { "epoch": 17.943016759776537, "grad_norm": 0.5628470778465271, "learning_rate": 0.00010352941176470589, "loss": 0.4678, "step": 32118 }, { "epoch": 17.943575418994413, "grad_norm": 0.5565418601036072, "learning_rate": 0.00010350140056022408, "loss": 0.431, "step": 32119 }, { "epoch": 17.94413407821229, "grad_norm": 0.5384323000907898, "learning_rate": 0.0001034733893557423, "loss": 0.4193, "step": 32120 }, { "epoch": 17.944692737430167, "grad_norm": 0.3861985504627228, "learning_rate": 0.0001034453781512605, "loss": 0.4186, "step": 32121 }, { "epoch": 17.945251396648043, "grad_norm": 0.4633825421333313, "learning_rate": 0.00010341736694677871, "loss": 0.4426, "step": 32122 }, { "epoch": 17.945810055865923, "grad_norm": 0.44751685857772827, "learning_rate": 0.00010338935574229692, "loss": 0.4302, "step": 32123 }, { "epoch": 17.9463687150838, "grad_norm": 0.5824738144874573, "learning_rate": 0.00010336134453781512, "loss": 0.5436, "step": 32124 }, { "epoch": 17.946927374301676, "grad_norm": 0.5901375412940979, "learning_rate": 0.00010333333333333333, "loss": 0.5259, "step": 32125 }, { "epoch": 17.947486033519553, "grad_norm": 0.4049050807952881, "learning_rate": 0.00010330532212885155, "loss": 0.379, "step": 32126 }, { "epoch": 17.94804469273743, "grad_norm": 0.9458888173103333, "learning_rate": 0.00010327731092436974, "loss": 0.6673, "step": 32127 }, { "epoch": 17.948603351955306, "grad_norm": 0.46767690777778625, "learning_rate": 0.00010324929971988796, "loss": 0.4629, "step": 32128 }, { "epoch": 17.949162011173183, "grad_norm": 2.2768640518188477, "learning_rate": 0.00010322128851540615, "loss": 0.389, "step": 32129 }, { "epoch": 17.949720670391063, "grad_norm": 0.41384831070899963, "learning_rate": 0.00010319327731092437, "loss": 0.4262, "step": 32130 }, { "epoch": 17.95027932960894, "grad_norm": 1.425829529762268, "learning_rate": 0.00010316526610644258, "loss": 0.2834, "step": 32131 }, { "epoch": 17.950837988826816, "grad_norm": 0.3054933249950409, "learning_rate": 0.00010313725490196078, "loss": 0.2559, "step": 32132 }, { "epoch": 17.951396648044692, "grad_norm": 0.4020155966281891, "learning_rate": 0.00010310924369747899, "loss": 0.5037, "step": 32133 }, { "epoch": 17.95195530726257, "grad_norm": 0.5484250783920288, "learning_rate": 0.0001030812324929972, "loss": 0.3176, "step": 32134 }, { "epoch": 17.952513966480446, "grad_norm": 0.7302910685539246, "learning_rate": 0.0001030532212885154, "loss": 0.5089, "step": 32135 }, { "epoch": 17.953072625698326, "grad_norm": 0.46535348892211914, "learning_rate": 0.00010302521008403362, "loss": 0.3451, "step": 32136 }, { "epoch": 17.953631284916202, "grad_norm": 0.39149177074432373, "learning_rate": 0.00010299719887955181, "loss": 0.3416, "step": 32137 }, { "epoch": 17.95418994413408, "grad_norm": 0.37270358204841614, "learning_rate": 0.00010296918767507003, "loss": 0.3294, "step": 32138 }, { "epoch": 17.954748603351955, "grad_norm": 0.4724014401435852, "learning_rate": 0.00010294117647058823, "loss": 0.4524, "step": 32139 }, { "epoch": 17.955307262569832, "grad_norm": 0.3210068941116333, "learning_rate": 0.00010291316526610645, "loss": 0.3241, "step": 32140 }, { "epoch": 17.95586592178771, "grad_norm": 7.787713050842285, "learning_rate": 0.00010288515406162465, "loss": 0.4483, "step": 32141 }, { "epoch": 17.956424581005585, "grad_norm": 0.4520329236984253, "learning_rate": 0.00010285714285714286, "loss": 0.3667, "step": 32142 }, { "epoch": 17.956983240223465, "grad_norm": 0.6888320446014404, "learning_rate": 0.00010282913165266106, "loss": 0.4194, "step": 32143 }, { "epoch": 17.957541899441342, "grad_norm": 0.8103201985359192, "learning_rate": 0.00010280112044817927, "loss": 0.3727, "step": 32144 }, { "epoch": 17.95810055865922, "grad_norm": 0.5739715695381165, "learning_rate": 0.00010277310924369748, "loss": 0.4644, "step": 32145 }, { "epoch": 17.958659217877095, "grad_norm": 0.44778817892074585, "learning_rate": 0.0001027450980392157, "loss": 0.4121, "step": 32146 }, { "epoch": 17.95921787709497, "grad_norm": 0.6674813032150269, "learning_rate": 0.00010271708683473389, "loss": 0.3549, "step": 32147 }, { "epoch": 17.959776536312848, "grad_norm": 0.4502522349357605, "learning_rate": 0.00010268907563025211, "loss": 0.4164, "step": 32148 }, { "epoch": 17.960335195530725, "grad_norm": 0.41869762539863586, "learning_rate": 0.0001026610644257703, "loss": 0.4212, "step": 32149 }, { "epoch": 17.960893854748605, "grad_norm": 0.3820033073425293, "learning_rate": 0.00010263305322128852, "loss": 0.5288, "step": 32150 }, { "epoch": 17.96145251396648, "grad_norm": 0.40751445293426514, "learning_rate": 0.00010260504201680671, "loss": 0.3637, "step": 32151 }, { "epoch": 17.962011173184358, "grad_norm": 0.5653027296066284, "learning_rate": 0.00010257703081232493, "loss": 0.4395, "step": 32152 }, { "epoch": 17.962569832402234, "grad_norm": 1.0940889120101929, "learning_rate": 0.00010254901960784315, "loss": 0.3925, "step": 32153 }, { "epoch": 17.96312849162011, "grad_norm": 0.32194969058036804, "learning_rate": 0.00010252100840336134, "loss": 0.3994, "step": 32154 }, { "epoch": 17.963687150837988, "grad_norm": 0.5637681484222412, "learning_rate": 0.00010249299719887956, "loss": 0.4458, "step": 32155 }, { "epoch": 17.964245810055864, "grad_norm": 0.47407469153404236, "learning_rate": 0.00010246498599439776, "loss": 0.3285, "step": 32156 }, { "epoch": 17.964804469273744, "grad_norm": 0.486642450094223, "learning_rate": 0.00010243697478991597, "loss": 0.3451, "step": 32157 }, { "epoch": 17.96536312849162, "grad_norm": 0.5967510342597961, "learning_rate": 0.00010240896358543418, "loss": 0.3988, "step": 32158 }, { "epoch": 17.965921787709497, "grad_norm": 0.3528304994106293, "learning_rate": 0.00010238095238095239, "loss": 0.3753, "step": 32159 }, { "epoch": 17.966480446927374, "grad_norm": 0.9806635975837708, "learning_rate": 0.00010235294117647059, "loss": 0.4167, "step": 32160 }, { "epoch": 17.96703910614525, "grad_norm": 0.49239224195480347, "learning_rate": 0.0001023249299719888, "loss": 0.4962, "step": 32161 }, { "epoch": 17.967597765363127, "grad_norm": 0.47801443934440613, "learning_rate": 0.000102296918767507, "loss": 0.3625, "step": 32162 }, { "epoch": 17.968156424581007, "grad_norm": 0.5573924779891968, "learning_rate": 0.00010226890756302522, "loss": 0.3267, "step": 32163 }, { "epoch": 17.968715083798884, "grad_norm": 0.5888733267784119, "learning_rate": 0.00010224089635854342, "loss": 0.4356, "step": 32164 }, { "epoch": 17.96927374301676, "grad_norm": 0.4991845488548279, "learning_rate": 0.00010221288515406164, "loss": 0.3967, "step": 32165 }, { "epoch": 17.969832402234637, "grad_norm": 0.6667455434799194, "learning_rate": 0.00010218487394957983, "loss": 0.3928, "step": 32166 }, { "epoch": 17.970391061452514, "grad_norm": 0.4273636043071747, "learning_rate": 0.00010215686274509805, "loss": 0.441, "step": 32167 }, { "epoch": 17.97094972067039, "grad_norm": 0.4200524389743805, "learning_rate": 0.00010212885154061625, "loss": 0.4129, "step": 32168 }, { "epoch": 17.971508379888267, "grad_norm": 0.4013611972332001, "learning_rate": 0.00010210084033613446, "loss": 0.3675, "step": 32169 }, { "epoch": 17.972067039106147, "grad_norm": 20.487226486206055, "learning_rate": 0.00010207282913165267, "loss": 0.3944, "step": 32170 }, { "epoch": 17.972625698324023, "grad_norm": 0.5312010049819946, "learning_rate": 0.00010204481792717087, "loss": 0.464, "step": 32171 }, { "epoch": 17.9731843575419, "grad_norm": 2.0187249183654785, "learning_rate": 0.00010201680672268908, "loss": 0.5059, "step": 32172 }, { "epoch": 17.973743016759776, "grad_norm": 17.045413970947266, "learning_rate": 0.0001019887955182073, "loss": 0.4262, "step": 32173 }, { "epoch": 17.974301675977653, "grad_norm": 0.9672741293907166, "learning_rate": 0.00010196078431372549, "loss": 0.4437, "step": 32174 }, { "epoch": 17.97486033519553, "grad_norm": 0.5415329933166504, "learning_rate": 0.00010193277310924371, "loss": 0.3903, "step": 32175 }, { "epoch": 17.975418994413406, "grad_norm": 0.6438754796981812, "learning_rate": 0.0001019047619047619, "loss": 0.3886, "step": 32176 }, { "epoch": 17.975977653631286, "grad_norm": 0.5428248047828674, "learning_rate": 0.00010187675070028012, "loss": 0.4948, "step": 32177 }, { "epoch": 17.976536312849163, "grad_norm": 0.4975340962409973, "learning_rate": 0.00010184873949579831, "loss": 0.3714, "step": 32178 }, { "epoch": 17.97709497206704, "grad_norm": 0.35350000858306885, "learning_rate": 0.00010182072829131653, "loss": 0.419, "step": 32179 }, { "epoch": 17.977653631284916, "grad_norm": 0.4170933663845062, "learning_rate": 0.00010179271708683474, "loss": 0.3274, "step": 32180 }, { "epoch": 17.978212290502793, "grad_norm": 0.6111339926719666, "learning_rate": 0.00010176470588235295, "loss": 0.5101, "step": 32181 }, { "epoch": 17.97877094972067, "grad_norm": 0.4280765950679779, "learning_rate": 0.00010173669467787115, "loss": 0.3706, "step": 32182 }, { "epoch": 17.97932960893855, "grad_norm": 0.4439769983291626, "learning_rate": 0.00010170868347338936, "loss": 0.3239, "step": 32183 }, { "epoch": 17.979888268156426, "grad_norm": 0.4513072371482849, "learning_rate": 0.00010168067226890756, "loss": 0.3322, "step": 32184 }, { "epoch": 17.980446927374302, "grad_norm": 0.336851567029953, "learning_rate": 0.00010165266106442578, "loss": 0.3525, "step": 32185 }, { "epoch": 17.98100558659218, "grad_norm": 0.3594527244567871, "learning_rate": 0.00010162464985994398, "loss": 0.2935, "step": 32186 }, { "epoch": 17.981564245810056, "grad_norm": 0.4038698077201843, "learning_rate": 0.0001015966386554622, "loss": 0.4512, "step": 32187 }, { "epoch": 17.982122905027932, "grad_norm": 0.35323983430862427, "learning_rate": 0.00010156862745098039, "loss": 0.445, "step": 32188 }, { "epoch": 17.98268156424581, "grad_norm": 0.5118697285652161, "learning_rate": 0.00010154061624649861, "loss": 0.3577, "step": 32189 }, { "epoch": 17.98324022346369, "grad_norm": 0.3399391174316406, "learning_rate": 0.00010151260504201681, "loss": 0.381, "step": 32190 }, { "epoch": 17.983798882681565, "grad_norm": 0.5384077429771423, "learning_rate": 0.00010148459383753502, "loss": 0.3696, "step": 32191 }, { "epoch": 17.984357541899442, "grad_norm": 0.5064133405685425, "learning_rate": 0.00010145658263305323, "loss": 0.3448, "step": 32192 }, { "epoch": 17.98491620111732, "grad_norm": 1.204272985458374, "learning_rate": 0.00010142857142857143, "loss": 0.3644, "step": 32193 }, { "epoch": 17.985474860335195, "grad_norm": 0.41834303736686707, "learning_rate": 0.00010140056022408964, "loss": 0.3743, "step": 32194 }, { "epoch": 17.98603351955307, "grad_norm": 0.44735148549079895, "learning_rate": 0.00010137254901960786, "loss": 0.5135, "step": 32195 }, { "epoch": 17.986592178770948, "grad_norm": 0.3835451602935791, "learning_rate": 0.00010134453781512605, "loss": 0.3585, "step": 32196 }, { "epoch": 17.98715083798883, "grad_norm": 0.4819982647895813, "learning_rate": 0.00010131652661064427, "loss": 0.4432, "step": 32197 }, { "epoch": 17.987709497206705, "grad_norm": 0.49629509449005127, "learning_rate": 0.00010128851540616246, "loss": 0.5428, "step": 32198 }, { "epoch": 17.98826815642458, "grad_norm": 0.508905291557312, "learning_rate": 0.00010126050420168068, "loss": 0.3444, "step": 32199 }, { "epoch": 17.988826815642458, "grad_norm": 0.64361572265625, "learning_rate": 0.00010123249299719889, "loss": 0.4514, "step": 32200 }, { "epoch": 17.989385474860335, "grad_norm": 1.2176861763000488, "learning_rate": 0.00010120448179271709, "loss": 0.4176, "step": 32201 }, { "epoch": 17.98994413407821, "grad_norm": 0.4100797474384308, "learning_rate": 0.0001011764705882353, "loss": 0.4212, "step": 32202 }, { "epoch": 17.990502793296088, "grad_norm": 0.935918927192688, "learning_rate": 0.0001011484593837535, "loss": 0.5361, "step": 32203 }, { "epoch": 17.991061452513968, "grad_norm": 1.6712472438812256, "learning_rate": 0.00010112044817927171, "loss": 0.4151, "step": 32204 }, { "epoch": 17.991620111731844, "grad_norm": 0.5248129367828369, "learning_rate": 0.00010109243697478992, "loss": 0.4453, "step": 32205 }, { "epoch": 17.99217877094972, "grad_norm": 0.6204363107681274, "learning_rate": 0.00010106442577030812, "loss": 0.3319, "step": 32206 }, { "epoch": 17.992737430167598, "grad_norm": 0.3578352928161621, "learning_rate": 0.00010103641456582634, "loss": 0.3813, "step": 32207 }, { "epoch": 17.993296089385474, "grad_norm": 0.4895043671131134, "learning_rate": 0.00010100840336134453, "loss": 0.422, "step": 32208 }, { "epoch": 17.99385474860335, "grad_norm": 0.48461681604385376, "learning_rate": 0.00010098039215686275, "loss": 0.3886, "step": 32209 }, { "epoch": 17.994413407821227, "grad_norm": 2.6344857215881348, "learning_rate": 0.00010095238095238095, "loss": 0.4594, "step": 32210 }, { "epoch": 17.994972067039107, "grad_norm": 0.4512898325920105, "learning_rate": 0.00010092436974789917, "loss": 0.4307, "step": 32211 }, { "epoch": 17.995530726256984, "grad_norm": 0.5038577318191528, "learning_rate": 0.00010089635854341737, "loss": 0.4041, "step": 32212 }, { "epoch": 17.99608938547486, "grad_norm": 1.025885820388794, "learning_rate": 0.00010086834733893558, "loss": 0.3315, "step": 32213 }, { "epoch": 17.996648044692737, "grad_norm": 0.4434353709220886, "learning_rate": 0.00010084033613445378, "loss": 0.34, "step": 32214 }, { "epoch": 17.997206703910614, "grad_norm": 0.4924229681491852, "learning_rate": 0.00010081232492997199, "loss": 0.369, "step": 32215 }, { "epoch": 17.99776536312849, "grad_norm": 0.44298720359802246, "learning_rate": 0.0001007843137254902, "loss": 0.507, "step": 32216 }, { "epoch": 17.99832402234637, "grad_norm": 0.5157743692398071, "learning_rate": 0.00010075630252100842, "loss": 0.3637, "step": 32217 }, { "epoch": 17.998882681564247, "grad_norm": 0.483979731798172, "learning_rate": 0.00010072829131652661, "loss": 0.44, "step": 32218 }, { "epoch": 17.999441340782123, "grad_norm": 0.5765401721000671, "learning_rate": 0.00010070028011204483, "loss": 0.3873, "step": 32219 }, { "epoch": 18.0, "grad_norm": 0.40338215231895447, "learning_rate": 0.00010067226890756302, "loss": 0.3496, "step": 32220 }, { "epoch": 18.000558659217877, "grad_norm": 0.5964576601982117, "learning_rate": 0.00010064425770308124, "loss": 0.3746, "step": 32221 }, { "epoch": 18.001117318435753, "grad_norm": 0.6850411295890808, "learning_rate": 0.00010061624649859945, "loss": 0.5033, "step": 32222 }, { "epoch": 18.00167597765363, "grad_norm": 0.40267351269721985, "learning_rate": 0.00010058823529411765, "loss": 0.3866, "step": 32223 }, { "epoch": 18.00223463687151, "grad_norm": 0.37789425253868103, "learning_rate": 0.00010056022408963586, "loss": 0.4156, "step": 32224 }, { "epoch": 18.002793296089386, "grad_norm": 0.3623667061328888, "learning_rate": 0.00010053221288515406, "loss": 0.3125, "step": 32225 }, { "epoch": 18.003351955307263, "grad_norm": 0.3939913511276245, "learning_rate": 0.00010050420168067227, "loss": 0.3503, "step": 32226 }, { "epoch": 18.00391061452514, "grad_norm": 0.42091861367225647, "learning_rate": 0.00010047619047619049, "loss": 0.346, "step": 32227 }, { "epoch": 18.004469273743016, "grad_norm": 0.5072236061096191, "learning_rate": 0.00010044817927170868, "loss": 0.4363, "step": 32228 }, { "epoch": 18.005027932960893, "grad_norm": 0.4480527341365814, "learning_rate": 0.0001004201680672269, "loss": 0.4823, "step": 32229 }, { "epoch": 18.00558659217877, "grad_norm": 0.5972398519515991, "learning_rate": 0.0001003921568627451, "loss": 0.3489, "step": 32230 }, { "epoch": 18.00614525139665, "grad_norm": 0.40248218178749084, "learning_rate": 0.00010036414565826331, "loss": 0.3482, "step": 32231 }, { "epoch": 18.006703910614526, "grad_norm": 0.4852919578552246, "learning_rate": 0.0001003361344537815, "loss": 0.4764, "step": 32232 }, { "epoch": 18.007262569832402, "grad_norm": 0.4575750231742859, "learning_rate": 0.00010030812324929973, "loss": 0.4602, "step": 32233 }, { "epoch": 18.00782122905028, "grad_norm": 0.3594995141029358, "learning_rate": 0.00010028011204481793, "loss": 0.3459, "step": 32234 }, { "epoch": 18.008379888268156, "grad_norm": 0.4226916432380676, "learning_rate": 0.00010025210084033614, "loss": 0.2825, "step": 32235 }, { "epoch": 18.008938547486032, "grad_norm": 0.4638039171695709, "learning_rate": 0.00010022408963585434, "loss": 0.4521, "step": 32236 }, { "epoch": 18.009497206703912, "grad_norm": 1.3567938804626465, "learning_rate": 0.00010019607843137255, "loss": 0.4194, "step": 32237 }, { "epoch": 18.01005586592179, "grad_norm": 0.49408772587776184, "learning_rate": 0.00010016806722689076, "loss": 0.3871, "step": 32238 }, { "epoch": 18.010614525139665, "grad_norm": 0.48068350553512573, "learning_rate": 0.00010014005602240897, "loss": 0.4438, "step": 32239 }, { "epoch": 18.011173184357542, "grad_norm": 0.3814902603626251, "learning_rate": 0.00010011204481792717, "loss": 0.3139, "step": 32240 }, { "epoch": 18.01173184357542, "grad_norm": 0.5485551357269287, "learning_rate": 0.00010008403361344539, "loss": 0.5443, "step": 32241 }, { "epoch": 18.012290502793295, "grad_norm": 0.3657400906085968, "learning_rate": 0.00010005602240896358, "loss": 0.3701, "step": 32242 }, { "epoch": 18.01284916201117, "grad_norm": 0.3984507620334625, "learning_rate": 0.0001000280112044818, "loss": 0.376, "step": 32243 }, { "epoch": 18.013407821229052, "grad_norm": 0.3243999779224396, "learning_rate": 0.0001, "loss": 0.3586, "step": 32244 }, { "epoch": 18.01396648044693, "grad_norm": 0.37670835852622986, "learning_rate": 9.997198879551821e-05, "loss": 0.5086, "step": 32245 }, { "epoch": 18.014525139664805, "grad_norm": 1.4647576808929443, "learning_rate": 9.994397759103642e-05, "loss": 0.4316, "step": 32246 }, { "epoch": 18.01508379888268, "grad_norm": 0.42384421825408936, "learning_rate": 9.991596638655462e-05, "loss": 0.3776, "step": 32247 }, { "epoch": 18.015642458100558, "grad_norm": 0.5377728343009949, "learning_rate": 9.988795518207283e-05, "loss": 0.3353, "step": 32248 }, { "epoch": 18.016201117318435, "grad_norm": 0.4751221239566803, "learning_rate": 9.985994397759105e-05, "loss": 0.427, "step": 32249 }, { "epoch": 18.01675977653631, "grad_norm": 0.43127840757369995, "learning_rate": 9.983193277310924e-05, "loss": 0.3993, "step": 32250 }, { "epoch": 18.01731843575419, "grad_norm": 0.6855712532997131, "learning_rate": 9.980392156862746e-05, "loss": 0.4527, "step": 32251 }, { "epoch": 18.017877094972068, "grad_norm": 0.5313059687614441, "learning_rate": 9.977591036414565e-05, "loss": 0.3849, "step": 32252 }, { "epoch": 18.018435754189944, "grad_norm": 0.6236023306846619, "learning_rate": 9.974789915966387e-05, "loss": 0.4618, "step": 32253 }, { "epoch": 18.01899441340782, "grad_norm": 0.7799429297447205, "learning_rate": 9.971988795518208e-05, "loss": 0.3672, "step": 32254 }, { "epoch": 18.019553072625698, "grad_norm": 0.5411490797996521, "learning_rate": 9.969187675070028e-05, "loss": 0.4196, "step": 32255 }, { "epoch": 18.020111731843574, "grad_norm": 0.3924950659275055, "learning_rate": 9.966386554621849e-05, "loss": 0.3716, "step": 32256 }, { "epoch": 18.02067039106145, "grad_norm": 0.579119861125946, "learning_rate": 9.96358543417367e-05, "loss": 0.3804, "step": 32257 }, { "epoch": 18.02122905027933, "grad_norm": 0.3702141046524048, "learning_rate": 9.96078431372549e-05, "loss": 0.3669, "step": 32258 }, { "epoch": 18.021787709497207, "grad_norm": 1.1192439794540405, "learning_rate": 9.957983193277311e-05, "loss": 0.352, "step": 32259 }, { "epoch": 18.022346368715084, "grad_norm": 10.042969703674316, "learning_rate": 9.955182072829131e-05, "loss": 0.3675, "step": 32260 }, { "epoch": 18.02290502793296, "grad_norm": 0.515103816986084, "learning_rate": 9.952380952380953e-05, "loss": 0.4227, "step": 32261 }, { "epoch": 18.023463687150837, "grad_norm": 0.5099522471427917, "learning_rate": 9.949579831932773e-05, "loss": 0.4808, "step": 32262 }, { "epoch": 18.024022346368714, "grad_norm": 0.40187525749206543, "learning_rate": 9.946778711484595e-05, "loss": 0.4333, "step": 32263 }, { "epoch": 18.024581005586594, "grad_norm": 0.45274391770362854, "learning_rate": 9.943977591036414e-05, "loss": 0.3857, "step": 32264 }, { "epoch": 18.02513966480447, "grad_norm": 0.524215042591095, "learning_rate": 9.941176470588236e-05, "loss": 0.3685, "step": 32265 }, { "epoch": 18.025698324022347, "grad_norm": 0.46615076065063477, "learning_rate": 9.938375350140056e-05, "loss": 0.2647, "step": 32266 }, { "epoch": 18.026256983240224, "grad_norm": 0.54038405418396, "learning_rate": 9.935574229691877e-05, "loss": 0.3059, "step": 32267 }, { "epoch": 18.0268156424581, "grad_norm": 0.4917943477630615, "learning_rate": 9.932773109243698e-05, "loss": 0.3303, "step": 32268 }, { "epoch": 18.027374301675977, "grad_norm": 0.4269266426563263, "learning_rate": 9.929971988795518e-05, "loss": 0.4245, "step": 32269 }, { "epoch": 18.027932960893853, "grad_norm": 0.5459792017936707, "learning_rate": 9.927170868347339e-05, "loss": 0.4266, "step": 32270 }, { "epoch": 18.028491620111733, "grad_norm": 0.3995327353477478, "learning_rate": 9.924369747899161e-05, "loss": 0.4168, "step": 32271 }, { "epoch": 18.02905027932961, "grad_norm": 0.527877688407898, "learning_rate": 9.92156862745098e-05, "loss": 0.4083, "step": 32272 }, { "epoch": 18.029608938547486, "grad_norm": 0.3832007348537445, "learning_rate": 9.918767507002802e-05, "loss": 0.2715, "step": 32273 }, { "epoch": 18.030167597765363, "grad_norm": 0.3343893587589264, "learning_rate": 9.915966386554621e-05, "loss": 0.3998, "step": 32274 }, { "epoch": 18.03072625698324, "grad_norm": 0.43357473611831665, "learning_rate": 9.913165266106443e-05, "loss": 0.4659, "step": 32275 }, { "epoch": 18.031284916201116, "grad_norm": 1.858503818511963, "learning_rate": 9.910364145658264e-05, "loss": 0.3403, "step": 32276 }, { "epoch": 18.031843575418993, "grad_norm": 1.3414182662963867, "learning_rate": 9.907563025210084e-05, "loss": 0.4059, "step": 32277 }, { "epoch": 18.032402234636873, "grad_norm": 0.45445460081100464, "learning_rate": 9.904761904761905e-05, "loss": 0.4071, "step": 32278 }, { "epoch": 18.03296089385475, "grad_norm": 0.5012571811676025, "learning_rate": 9.901960784313726e-05, "loss": 0.5003, "step": 32279 }, { "epoch": 18.033519553072626, "grad_norm": 0.6706053018569946, "learning_rate": 9.899159663865546e-05, "loss": 0.4059, "step": 32280 }, { "epoch": 18.034078212290503, "grad_norm": 0.9332179427146912, "learning_rate": 9.896358543417368e-05, "loss": 0.4242, "step": 32281 }, { "epoch": 18.03463687150838, "grad_norm": 0.46652886271476746, "learning_rate": 9.893557422969187e-05, "loss": 0.3825, "step": 32282 }, { "epoch": 18.035195530726256, "grad_norm": 0.32998818159103394, "learning_rate": 9.890756302521009e-05, "loss": 0.3812, "step": 32283 }, { "epoch": 18.035754189944136, "grad_norm": 0.5732955932617188, "learning_rate": 9.887955182072829e-05, "loss": 0.3729, "step": 32284 }, { "epoch": 18.036312849162012, "grad_norm": 0.4227480888366699, "learning_rate": 9.88515406162465e-05, "loss": 0.4074, "step": 32285 }, { "epoch": 18.03687150837989, "grad_norm": 0.7531979084014893, "learning_rate": 9.88235294117647e-05, "loss": 0.4275, "step": 32286 }, { "epoch": 18.037430167597766, "grad_norm": 0.7109859585762024, "learning_rate": 9.879551820728292e-05, "loss": 0.3545, "step": 32287 }, { "epoch": 18.037988826815642, "grad_norm": 0.48333683609962463, "learning_rate": 9.876750700280112e-05, "loss": 0.3377, "step": 32288 }, { "epoch": 18.03854748603352, "grad_norm": 0.3968415856361389, "learning_rate": 9.873949579831933e-05, "loss": 0.4417, "step": 32289 }, { "epoch": 18.039106145251395, "grad_norm": 0.3953340947628021, "learning_rate": 9.871148459383753e-05, "loss": 0.2911, "step": 32290 }, { "epoch": 18.039664804469275, "grad_norm": 0.5416538715362549, "learning_rate": 9.868347338935574e-05, "loss": 0.4176, "step": 32291 }, { "epoch": 18.040223463687152, "grad_norm": 0.44783201813697815, "learning_rate": 9.865546218487395e-05, "loss": 0.4059, "step": 32292 }, { "epoch": 18.04078212290503, "grad_norm": 3.4399209022521973, "learning_rate": 9.862745098039217e-05, "loss": 0.4041, "step": 32293 }, { "epoch": 18.041340782122905, "grad_norm": 0.4822849929332733, "learning_rate": 9.859943977591036e-05, "loss": 0.4081, "step": 32294 }, { "epoch": 18.04189944134078, "grad_norm": 2.9424662590026855, "learning_rate": 9.857142857142858e-05, "loss": 0.3402, "step": 32295 }, { "epoch": 18.042458100558658, "grad_norm": 1.5867671966552734, "learning_rate": 9.854341736694677e-05, "loss": 0.5788, "step": 32296 }, { "epoch": 18.043016759776535, "grad_norm": 0.5507910251617432, "learning_rate": 9.851540616246499e-05, "loss": 0.6523, "step": 32297 }, { "epoch": 18.043575418994415, "grad_norm": 0.9780528545379639, "learning_rate": 9.84873949579832e-05, "loss": 0.397, "step": 32298 }, { "epoch": 18.04413407821229, "grad_norm": 0.4034658372402191, "learning_rate": 9.84593837535014e-05, "loss": 0.3861, "step": 32299 }, { "epoch": 18.044692737430168, "grad_norm": 0.3881433308124542, "learning_rate": 9.843137254901961e-05, "loss": 0.3836, "step": 32300 }, { "epoch": 18.045251396648045, "grad_norm": 0.7576314806938171, "learning_rate": 9.840336134453781e-05, "loss": 0.3992, "step": 32301 }, { "epoch": 18.04581005586592, "grad_norm": 0.43571993708610535, "learning_rate": 9.837535014005602e-05, "loss": 0.4538, "step": 32302 }, { "epoch": 18.046368715083798, "grad_norm": 0.3823072016239166, "learning_rate": 9.834733893557424e-05, "loss": 0.4308, "step": 32303 }, { "epoch": 18.046927374301674, "grad_norm": 0.5230814814567566, "learning_rate": 9.831932773109243e-05, "loss": 0.3813, "step": 32304 }, { "epoch": 18.047486033519554, "grad_norm": 0.45276939868927, "learning_rate": 9.829131652661065e-05, "loss": 0.3408, "step": 32305 }, { "epoch": 18.04804469273743, "grad_norm": 0.4107702076435089, "learning_rate": 9.826330532212884e-05, "loss": 0.4543, "step": 32306 }, { "epoch": 18.048603351955308, "grad_norm": 0.5349020957946777, "learning_rate": 9.823529411764706e-05, "loss": 0.3946, "step": 32307 }, { "epoch": 18.049162011173184, "grad_norm": 0.49025577306747437, "learning_rate": 9.820728291316527e-05, "loss": 0.4312, "step": 32308 }, { "epoch": 18.04972067039106, "grad_norm": 0.5161467790603638, "learning_rate": 9.817927170868348e-05, "loss": 0.4998, "step": 32309 }, { "epoch": 18.050279329608937, "grad_norm": 0.3578868508338928, "learning_rate": 9.815126050420168e-05, "loss": 0.3209, "step": 32310 }, { "epoch": 18.050837988826817, "grad_norm": 0.5973392128944397, "learning_rate": 9.812324929971989e-05, "loss": 0.3495, "step": 32311 }, { "epoch": 18.051396648044694, "grad_norm": 0.3100230097770691, "learning_rate": 9.80952380952381e-05, "loss": 0.3758, "step": 32312 }, { "epoch": 18.05195530726257, "grad_norm": 0.39064323902130127, "learning_rate": 9.806722689075631e-05, "loss": 0.3605, "step": 32313 }, { "epoch": 18.052513966480447, "grad_norm": 0.35504016280174255, "learning_rate": 9.80392156862745e-05, "loss": 0.323, "step": 32314 }, { "epoch": 18.053072625698324, "grad_norm": 1.5023446083068848, "learning_rate": 9.801120448179273e-05, "loss": 0.3491, "step": 32315 }, { "epoch": 18.0536312849162, "grad_norm": 0.4155273139476776, "learning_rate": 9.798319327731092e-05, "loss": 0.4041, "step": 32316 }, { "epoch": 18.054189944134077, "grad_norm": 0.5000426173210144, "learning_rate": 9.795518207282914e-05, "loss": 0.3484, "step": 32317 }, { "epoch": 18.054748603351957, "grad_norm": 0.49676889181137085, "learning_rate": 9.792717086834733e-05, "loss": 0.465, "step": 32318 }, { "epoch": 18.055307262569833, "grad_norm": 0.3657989203929901, "learning_rate": 9.789915966386555e-05, "loss": 0.3029, "step": 32319 }, { "epoch": 18.05586592178771, "grad_norm": 0.34014958143234253, "learning_rate": 9.787114845938376e-05, "loss": 0.3663, "step": 32320 }, { "epoch": 18.056424581005587, "grad_norm": 0.4739871621131897, "learning_rate": 9.784313725490196e-05, "loss": 0.4468, "step": 32321 }, { "epoch": 18.056983240223463, "grad_norm": 0.5413164496421814, "learning_rate": 9.781512605042017e-05, "loss": 0.4806, "step": 32322 }, { "epoch": 18.05754189944134, "grad_norm": 1.307497262954712, "learning_rate": 9.778711484593837e-05, "loss": 0.36, "step": 32323 }, { "epoch": 18.058100558659216, "grad_norm": 3.2699074745178223, "learning_rate": 9.775910364145658e-05, "loss": 0.4242, "step": 32324 }, { "epoch": 18.058659217877096, "grad_norm": 0.7847439050674438, "learning_rate": 9.77310924369748e-05, "loss": 0.3199, "step": 32325 }, { "epoch": 18.059217877094973, "grad_norm": 0.5128061771392822, "learning_rate": 9.770308123249299e-05, "loss": 0.3954, "step": 32326 }, { "epoch": 18.05977653631285, "grad_norm": 0.7440681457519531, "learning_rate": 9.767507002801121e-05, "loss": 0.548, "step": 32327 }, { "epoch": 18.060335195530726, "grad_norm": 0.3531847298145294, "learning_rate": 9.76470588235294e-05, "loss": 0.3961, "step": 32328 }, { "epoch": 18.060893854748603, "grad_norm": 0.5368345975875854, "learning_rate": 9.761904761904762e-05, "loss": 0.5148, "step": 32329 }, { "epoch": 18.06145251396648, "grad_norm": 0.3315516412258148, "learning_rate": 9.759103641456583e-05, "loss": 0.287, "step": 32330 }, { "epoch": 18.062011173184356, "grad_norm": 0.35034263134002686, "learning_rate": 9.756302521008403e-05, "loss": 0.3237, "step": 32331 }, { "epoch": 18.062569832402236, "grad_norm": 0.7380197644233704, "learning_rate": 9.753501400560224e-05, "loss": 0.5109, "step": 32332 }, { "epoch": 18.063128491620112, "grad_norm": 1.6325008869171143, "learning_rate": 9.750700280112045e-05, "loss": 0.4722, "step": 32333 }, { "epoch": 18.06368715083799, "grad_norm": 0.6074326634407043, "learning_rate": 9.747899159663865e-05, "loss": 0.3467, "step": 32334 }, { "epoch": 18.064245810055866, "grad_norm": 2.7419750690460205, "learning_rate": 9.745098039215687e-05, "loss": 0.4185, "step": 32335 }, { "epoch": 18.064804469273742, "grad_norm": 0.4355568587779999, "learning_rate": 9.742296918767506e-05, "loss": 0.4237, "step": 32336 }, { "epoch": 18.06536312849162, "grad_norm": 0.4071156084537506, "learning_rate": 9.739495798319328e-05, "loss": 0.3871, "step": 32337 }, { "epoch": 18.0659217877095, "grad_norm": 0.4159044921398163, "learning_rate": 9.736694677871148e-05, "loss": 0.428, "step": 32338 }, { "epoch": 18.066480446927375, "grad_norm": 0.4854905903339386, "learning_rate": 9.73389355742297e-05, "loss": 0.3825, "step": 32339 }, { "epoch": 18.067039106145252, "grad_norm": 0.7821348309516907, "learning_rate": 9.73109243697479e-05, "loss": 0.437, "step": 32340 }, { "epoch": 18.06759776536313, "grad_norm": 0.320843368768692, "learning_rate": 9.728291316526611e-05, "loss": 0.3524, "step": 32341 }, { "epoch": 18.068156424581005, "grad_norm": 0.4091688096523285, "learning_rate": 9.725490196078431e-05, "loss": 0.4402, "step": 32342 }, { "epoch": 18.06871508379888, "grad_norm": 0.4603174030780792, "learning_rate": 9.722689075630252e-05, "loss": 0.3619, "step": 32343 }, { "epoch": 18.06927374301676, "grad_norm": 0.538090705871582, "learning_rate": 9.719887955182073e-05, "loss": 0.458, "step": 32344 }, { "epoch": 18.06983240223464, "grad_norm": 0.5462102293968201, "learning_rate": 9.717086834733893e-05, "loss": 0.3489, "step": 32345 }, { "epoch": 18.070391061452515, "grad_norm": 0.3876274824142456, "learning_rate": 9.714285714285714e-05, "loss": 0.4015, "step": 32346 }, { "epoch": 18.07094972067039, "grad_norm": 0.5038914084434509, "learning_rate": 9.711484593837536e-05, "loss": 0.3956, "step": 32347 }, { "epoch": 18.071508379888268, "grad_norm": 0.343403697013855, "learning_rate": 9.708683473389355e-05, "loss": 0.3298, "step": 32348 }, { "epoch": 18.072067039106145, "grad_norm": 0.39973217248916626, "learning_rate": 9.705882352941177e-05, "loss": 0.3596, "step": 32349 }, { "epoch": 18.07262569832402, "grad_norm": 0.7613192796707153, "learning_rate": 9.703081232492996e-05, "loss": 0.4188, "step": 32350 }, { "epoch": 18.073184357541898, "grad_norm": 5.275942325592041, "learning_rate": 9.700280112044818e-05, "loss": 0.3812, "step": 32351 }, { "epoch": 18.073743016759778, "grad_norm": 0.4313063323497772, "learning_rate": 9.697478991596639e-05, "loss": 0.3734, "step": 32352 }, { "epoch": 18.074301675977654, "grad_norm": 1.448196530342102, "learning_rate": 9.69467787114846e-05, "loss": 0.4771, "step": 32353 }, { "epoch": 18.07486033519553, "grad_norm": 3.522810459136963, "learning_rate": 9.69187675070028e-05, "loss": 0.4753, "step": 32354 }, { "epoch": 18.075418994413408, "grad_norm": 1.010437250137329, "learning_rate": 9.6890756302521e-05, "loss": 0.352, "step": 32355 }, { "epoch": 18.075977653631284, "grad_norm": 0.553568959236145, "learning_rate": 9.686274509803921e-05, "loss": 0.4939, "step": 32356 }, { "epoch": 18.07653631284916, "grad_norm": 0.5555480718612671, "learning_rate": 9.683473389355743e-05, "loss": 0.4298, "step": 32357 }, { "epoch": 18.07709497206704, "grad_norm": 0.43442219495773315, "learning_rate": 9.680672268907562e-05, "loss": 0.4521, "step": 32358 }, { "epoch": 18.077653631284917, "grad_norm": 0.6569821238517761, "learning_rate": 9.677871148459384e-05, "loss": 0.4076, "step": 32359 }, { "epoch": 18.078212290502794, "grad_norm": 0.6165484189987183, "learning_rate": 9.675070028011204e-05, "loss": 0.3306, "step": 32360 }, { "epoch": 18.07877094972067, "grad_norm": 0.44647157192230225, "learning_rate": 9.672268907563026e-05, "loss": 0.342, "step": 32361 }, { "epoch": 18.079329608938547, "grad_norm": 0.7071517705917358, "learning_rate": 9.669467787114846e-05, "loss": 0.3781, "step": 32362 }, { "epoch": 18.079888268156424, "grad_norm": 0.47947704792022705, "learning_rate": 9.666666666666667e-05, "loss": 0.3816, "step": 32363 }, { "epoch": 18.0804469273743, "grad_norm": 0.3952135145664215, "learning_rate": 9.663865546218487e-05, "loss": 0.3865, "step": 32364 }, { "epoch": 18.08100558659218, "grad_norm": 0.7275920510292053, "learning_rate": 9.661064425770308e-05, "loss": 0.3736, "step": 32365 }, { "epoch": 18.081564245810057, "grad_norm": 0.3496969938278198, "learning_rate": 9.658263305322129e-05, "loss": 0.4123, "step": 32366 }, { "epoch": 18.082122905027934, "grad_norm": 0.31497928500175476, "learning_rate": 9.65546218487395e-05, "loss": 0.3428, "step": 32367 }, { "epoch": 18.08268156424581, "grad_norm": 0.3751422166824341, "learning_rate": 9.65266106442577e-05, "loss": 0.4505, "step": 32368 }, { "epoch": 18.083240223463687, "grad_norm": 0.6251440644264221, "learning_rate": 9.649859943977592e-05, "loss": 0.3481, "step": 32369 }, { "epoch": 18.083798882681563, "grad_norm": 0.3780922293663025, "learning_rate": 9.647058823529411e-05, "loss": 0.4113, "step": 32370 }, { "epoch": 18.08435754189944, "grad_norm": 0.5271783471107483, "learning_rate": 9.644257703081233e-05, "loss": 0.4537, "step": 32371 }, { "epoch": 18.08491620111732, "grad_norm": 4.852962970733643, "learning_rate": 9.641456582633052e-05, "loss": 0.4273, "step": 32372 }, { "epoch": 18.085474860335196, "grad_norm": 0.39356476068496704, "learning_rate": 9.638655462184874e-05, "loss": 0.37, "step": 32373 }, { "epoch": 18.086033519553073, "grad_norm": 0.44131913781166077, "learning_rate": 9.635854341736695e-05, "loss": 0.3359, "step": 32374 }, { "epoch": 18.08659217877095, "grad_norm": 2.905324697494507, "learning_rate": 9.633053221288515e-05, "loss": 0.3475, "step": 32375 }, { "epoch": 18.087150837988826, "grad_norm": 0.49924376606941223, "learning_rate": 9.630252100840336e-05, "loss": 0.3393, "step": 32376 }, { "epoch": 18.087709497206703, "grad_norm": 0.650465190410614, "learning_rate": 9.627450980392156e-05, "loss": 0.4384, "step": 32377 }, { "epoch": 18.08826815642458, "grad_norm": 0.35724392533302307, "learning_rate": 9.624649859943977e-05, "loss": 0.4068, "step": 32378 }, { "epoch": 18.08882681564246, "grad_norm": 0.5715853571891785, "learning_rate": 9.621848739495799e-05, "loss": 0.4898, "step": 32379 }, { "epoch": 18.089385474860336, "grad_norm": 0.4248448312282562, "learning_rate": 9.619047619047618e-05, "loss": 0.4238, "step": 32380 }, { "epoch": 18.089944134078213, "grad_norm": 0.7967003583908081, "learning_rate": 9.61624649859944e-05, "loss": 0.6419, "step": 32381 }, { "epoch": 18.09050279329609, "grad_norm": 0.6587761640548706, "learning_rate": 9.61344537815126e-05, "loss": 0.3594, "step": 32382 }, { "epoch": 18.091061452513966, "grad_norm": 0.5312923192977905, "learning_rate": 9.610644257703081e-05, "loss": 0.4188, "step": 32383 }, { "epoch": 18.091620111731842, "grad_norm": 0.585952639579773, "learning_rate": 9.607843137254903e-05, "loss": 0.3851, "step": 32384 }, { "epoch": 18.092178770949722, "grad_norm": 0.33357542753219604, "learning_rate": 9.605042016806723e-05, "loss": 0.3851, "step": 32385 }, { "epoch": 18.0927374301676, "grad_norm": 0.39248207211494446, "learning_rate": 9.602240896358545e-05, "loss": 0.3166, "step": 32386 }, { "epoch": 18.093296089385476, "grad_norm": 0.5831889510154724, "learning_rate": 9.599439775910364e-05, "loss": 0.4307, "step": 32387 }, { "epoch": 18.093854748603352, "grad_norm": 0.4109219014644623, "learning_rate": 9.596638655462186e-05, "loss": 0.4067, "step": 32388 }, { "epoch": 18.09441340782123, "grad_norm": 0.4178137183189392, "learning_rate": 9.593837535014006e-05, "loss": 0.3455, "step": 32389 }, { "epoch": 18.094972067039105, "grad_norm": 0.620341420173645, "learning_rate": 9.591036414565827e-05, "loss": 0.5119, "step": 32390 }, { "epoch": 18.095530726256982, "grad_norm": 0.467006117105484, "learning_rate": 9.588235294117648e-05, "loss": 0.3469, "step": 32391 }, { "epoch": 18.096089385474862, "grad_norm": 0.6388922929763794, "learning_rate": 9.585434173669468e-05, "loss": 0.4681, "step": 32392 }, { "epoch": 18.09664804469274, "grad_norm": 0.9081668853759766, "learning_rate": 9.582633053221289e-05, "loss": 0.4724, "step": 32393 }, { "epoch": 18.097206703910615, "grad_norm": 0.4373238682746887, "learning_rate": 9.579831932773111e-05, "loss": 0.3631, "step": 32394 }, { "epoch": 18.09776536312849, "grad_norm": 0.4496663510799408, "learning_rate": 9.57703081232493e-05, "loss": 0.3552, "step": 32395 }, { "epoch": 18.098324022346368, "grad_norm": 0.47216156125068665, "learning_rate": 9.574229691876752e-05, "loss": 0.3933, "step": 32396 }, { "epoch": 18.098882681564245, "grad_norm": 0.34115511178970337, "learning_rate": 9.571428571428571e-05, "loss": 0.443, "step": 32397 }, { "epoch": 18.09944134078212, "grad_norm": 0.6326836347579956, "learning_rate": 9.568627450980393e-05, "loss": 0.4772, "step": 32398 }, { "epoch": 18.1, "grad_norm": 0.45279860496520996, "learning_rate": 9.565826330532212e-05, "loss": 0.4123, "step": 32399 }, { "epoch": 18.100558659217878, "grad_norm": 0.5163799524307251, "learning_rate": 9.563025210084034e-05, "loss": 0.3916, "step": 32400 }, { "epoch": 18.101117318435755, "grad_norm": 0.43643999099731445, "learning_rate": 9.560224089635855e-05, "loss": 0.4468, "step": 32401 }, { "epoch": 18.10167597765363, "grad_norm": 0.4300617277622223, "learning_rate": 9.557422969187676e-05, "loss": 0.3304, "step": 32402 }, { "epoch": 18.102234636871508, "grad_norm": 0.35711267590522766, "learning_rate": 9.554621848739496e-05, "loss": 0.3385, "step": 32403 }, { "epoch": 18.102793296089384, "grad_norm": 0.6354407072067261, "learning_rate": 9.551820728291317e-05, "loss": 0.4069, "step": 32404 }, { "epoch": 18.10335195530726, "grad_norm": 0.4284682273864746, "learning_rate": 9.549019607843137e-05, "loss": 0.5048, "step": 32405 }, { "epoch": 18.10391061452514, "grad_norm": 0.3715086877346039, "learning_rate": 9.546218487394959e-05, "loss": 0.3421, "step": 32406 }, { "epoch": 18.104469273743018, "grad_norm": 0.425657719373703, "learning_rate": 9.543417366946779e-05, "loss": 0.4341, "step": 32407 }, { "epoch": 18.105027932960894, "grad_norm": 0.46359753608703613, "learning_rate": 9.5406162464986e-05, "loss": 0.4554, "step": 32408 }, { "epoch": 18.10558659217877, "grad_norm": 0.3752211928367615, "learning_rate": 9.53781512605042e-05, "loss": 0.3942, "step": 32409 }, { "epoch": 18.106145251396647, "grad_norm": 1.7721015214920044, "learning_rate": 9.535014005602242e-05, "loss": 0.4794, "step": 32410 }, { "epoch": 18.106703910614524, "grad_norm": 0.801632285118103, "learning_rate": 9.532212885154062e-05, "loss": 0.6042, "step": 32411 }, { "epoch": 18.107262569832404, "grad_norm": 0.4083986282348633, "learning_rate": 9.529411764705883e-05, "loss": 0.4206, "step": 32412 }, { "epoch": 18.10782122905028, "grad_norm": 0.6771788597106934, "learning_rate": 9.526610644257703e-05, "loss": 0.6182, "step": 32413 }, { "epoch": 18.108379888268157, "grad_norm": 0.4505431652069092, "learning_rate": 9.523809523809524e-05, "loss": 0.4314, "step": 32414 }, { "epoch": 18.108938547486034, "grad_norm": 0.6466163992881775, "learning_rate": 9.521008403361345e-05, "loss": 0.5239, "step": 32415 }, { "epoch": 18.10949720670391, "grad_norm": 0.4537881910800934, "learning_rate": 9.518207282913167e-05, "loss": 0.3854, "step": 32416 }, { "epoch": 18.110055865921787, "grad_norm": 0.5449619889259338, "learning_rate": 9.515406162464986e-05, "loss": 0.4495, "step": 32417 }, { "epoch": 18.110614525139663, "grad_norm": 0.5234614014625549, "learning_rate": 9.512605042016808e-05, "loss": 0.4212, "step": 32418 }, { "epoch": 18.111173184357543, "grad_norm": 0.3781912326812744, "learning_rate": 9.509803921568627e-05, "loss": 0.3492, "step": 32419 }, { "epoch": 18.11173184357542, "grad_norm": 0.6956316232681274, "learning_rate": 9.507002801120449e-05, "loss": 0.4208, "step": 32420 }, { "epoch": 18.112290502793297, "grad_norm": 0.3979493975639343, "learning_rate": 9.50420168067227e-05, "loss": 0.4432, "step": 32421 }, { "epoch": 18.112849162011173, "grad_norm": 0.41402044892311096, "learning_rate": 9.50140056022409e-05, "loss": 0.4491, "step": 32422 }, { "epoch": 18.11340782122905, "grad_norm": 0.49965232610702515, "learning_rate": 9.498599439775911e-05, "loss": 0.4174, "step": 32423 }, { "epoch": 18.113966480446926, "grad_norm": 0.6669294834136963, "learning_rate": 9.495798319327731e-05, "loss": 0.4533, "step": 32424 }, { "epoch": 18.114525139664803, "grad_norm": 0.41609206795692444, "learning_rate": 9.492997198879552e-05, "loss": 0.3756, "step": 32425 }, { "epoch": 18.115083798882683, "grad_norm": 0.4448949098587036, "learning_rate": 9.490196078431373e-05, "loss": 0.3953, "step": 32426 }, { "epoch": 18.11564245810056, "grad_norm": 0.7255361080169678, "learning_rate": 9.487394957983193e-05, "loss": 0.4487, "step": 32427 }, { "epoch": 18.116201117318436, "grad_norm": 0.413948655128479, "learning_rate": 9.484593837535015e-05, "loss": 0.3719, "step": 32428 }, { "epoch": 18.116759776536313, "grad_norm": 0.323770135641098, "learning_rate": 9.481792717086834e-05, "loss": 0.4094, "step": 32429 }, { "epoch": 18.11731843575419, "grad_norm": 0.3582766354084015, "learning_rate": 9.478991596638656e-05, "loss": 0.3807, "step": 32430 }, { "epoch": 18.117877094972066, "grad_norm": 0.3958209156990051, "learning_rate": 9.476190476190476e-05, "loss": 0.5188, "step": 32431 }, { "epoch": 18.118435754189946, "grad_norm": 0.7040544748306274, "learning_rate": 9.473389355742298e-05, "loss": 0.406, "step": 32432 }, { "epoch": 18.118994413407822, "grad_norm": 0.40433672070503235, "learning_rate": 9.470588235294118e-05, "loss": 0.3748, "step": 32433 }, { "epoch": 18.1195530726257, "grad_norm": 0.5301108360290527, "learning_rate": 9.467787114845939e-05, "loss": 0.3411, "step": 32434 }, { "epoch": 18.120111731843576, "grad_norm": 0.6196574568748474, "learning_rate": 9.46498599439776e-05, "loss": 0.4587, "step": 32435 }, { "epoch": 18.120670391061452, "grad_norm": 1.0553373098373413, "learning_rate": 9.46218487394958e-05, "loss": 0.5373, "step": 32436 }, { "epoch": 18.12122905027933, "grad_norm": 0.4461023509502411, "learning_rate": 9.4593837535014e-05, "loss": 0.3963, "step": 32437 }, { "epoch": 18.121787709497205, "grad_norm": 0.45119306445121765, "learning_rate": 9.456582633053223e-05, "loss": 0.4016, "step": 32438 }, { "epoch": 18.122346368715085, "grad_norm": 0.3909936547279358, "learning_rate": 9.453781512605042e-05, "loss": 0.3992, "step": 32439 }, { "epoch": 18.122905027932962, "grad_norm": 0.4895066022872925, "learning_rate": 9.450980392156864e-05, "loss": 0.4642, "step": 32440 }, { "epoch": 18.12346368715084, "grad_norm": 0.31271567940711975, "learning_rate": 9.448179271708683e-05, "loss": 0.3423, "step": 32441 }, { "epoch": 18.124022346368715, "grad_norm": 0.6508842706680298, "learning_rate": 9.445378151260505e-05, "loss": 0.6018, "step": 32442 }, { "epoch": 18.12458100558659, "grad_norm": 0.49566635489463806, "learning_rate": 9.442577030812326e-05, "loss": 0.3558, "step": 32443 }, { "epoch": 18.12513966480447, "grad_norm": 0.4299832582473755, "learning_rate": 9.439775910364146e-05, "loss": 0.4237, "step": 32444 }, { "epoch": 18.125698324022345, "grad_norm": 0.45366498827934265, "learning_rate": 9.436974789915967e-05, "loss": 0.4452, "step": 32445 }, { "epoch": 18.126256983240225, "grad_norm": 0.9918420910835266, "learning_rate": 9.434173669467787e-05, "loss": 0.3383, "step": 32446 }, { "epoch": 18.1268156424581, "grad_norm": 0.41790103912353516, "learning_rate": 9.431372549019608e-05, "loss": 0.3641, "step": 32447 }, { "epoch": 18.127374301675978, "grad_norm": 0.5692659616470337, "learning_rate": 9.42857142857143e-05, "loss": 0.3654, "step": 32448 }, { "epoch": 18.127932960893855, "grad_norm": 0.38307905197143555, "learning_rate": 9.425770308123249e-05, "loss": 0.4579, "step": 32449 }, { "epoch": 18.12849162011173, "grad_norm": 0.7055395245552063, "learning_rate": 9.422969187675071e-05, "loss": 0.4169, "step": 32450 }, { "epoch": 18.129050279329608, "grad_norm": 0.42380666732788086, "learning_rate": 9.42016806722689e-05, "loss": 0.3665, "step": 32451 }, { "epoch": 18.129608938547484, "grad_norm": 0.4086686968803406, "learning_rate": 9.417366946778712e-05, "loss": 0.492, "step": 32452 }, { "epoch": 18.130167597765364, "grad_norm": 0.47182878851890564, "learning_rate": 9.414565826330533e-05, "loss": 0.4582, "step": 32453 }, { "epoch": 18.13072625698324, "grad_norm": 0.8682714700698853, "learning_rate": 9.411764705882353e-05, "loss": 0.4316, "step": 32454 }, { "epoch": 18.131284916201118, "grad_norm": 0.603193461894989, "learning_rate": 9.408963585434174e-05, "loss": 0.3247, "step": 32455 }, { "epoch": 18.131843575418994, "grad_norm": 0.38381659984588623, "learning_rate": 9.406162464985995e-05, "loss": 0.3178, "step": 32456 }, { "epoch": 18.13240223463687, "grad_norm": 0.46145394444465637, "learning_rate": 9.403361344537815e-05, "loss": 0.3245, "step": 32457 }, { "epoch": 18.132960893854747, "grad_norm": 0.5114855170249939, "learning_rate": 9.400560224089636e-05, "loss": 0.4095, "step": 32458 }, { "epoch": 18.133519553072627, "grad_norm": 1.6515815258026123, "learning_rate": 9.397759103641456e-05, "loss": 0.3389, "step": 32459 }, { "epoch": 18.134078212290504, "grad_norm": 1.1794813871383667, "learning_rate": 9.394957983193278e-05, "loss": 0.5584, "step": 32460 }, { "epoch": 18.13463687150838, "grad_norm": 0.583577573299408, "learning_rate": 9.392156862745098e-05, "loss": 0.4741, "step": 32461 }, { "epoch": 18.135195530726257, "grad_norm": 0.40958139300346375, "learning_rate": 9.38935574229692e-05, "loss": 0.4346, "step": 32462 }, { "epoch": 18.135754189944134, "grad_norm": 0.36646702885627747, "learning_rate": 9.386554621848739e-05, "loss": 0.38, "step": 32463 }, { "epoch": 18.13631284916201, "grad_norm": 0.7411330938339233, "learning_rate": 9.383753501400561e-05, "loss": 0.4981, "step": 32464 }, { "epoch": 18.136871508379887, "grad_norm": 0.3926258981227875, "learning_rate": 9.380952380952381e-05, "loss": 0.3915, "step": 32465 }, { "epoch": 18.137430167597767, "grad_norm": 0.37541258335113525, "learning_rate": 9.378151260504202e-05, "loss": 0.2965, "step": 32466 }, { "epoch": 18.137988826815644, "grad_norm": 0.4508364200592041, "learning_rate": 9.375350140056023e-05, "loss": 0.3885, "step": 32467 }, { "epoch": 18.13854748603352, "grad_norm": 1.0271037817001343, "learning_rate": 9.372549019607843e-05, "loss": 0.3198, "step": 32468 }, { "epoch": 18.139106145251397, "grad_norm": 1.0580130815505981, "learning_rate": 9.369747899159664e-05, "loss": 0.3248, "step": 32469 }, { "epoch": 18.139664804469273, "grad_norm": 0.5313414335250854, "learning_rate": 9.366946778711486e-05, "loss": 0.3455, "step": 32470 }, { "epoch": 18.14022346368715, "grad_norm": 0.4863787293434143, "learning_rate": 9.364145658263305e-05, "loss": 0.3814, "step": 32471 }, { "epoch": 18.140782122905026, "grad_norm": 0.48073557019233704, "learning_rate": 9.361344537815127e-05, "loss": 0.3625, "step": 32472 }, { "epoch": 18.141340782122906, "grad_norm": 0.37851858139038086, "learning_rate": 9.358543417366946e-05, "loss": 0.3364, "step": 32473 }, { "epoch": 18.141899441340783, "grad_norm": 0.32191070914268494, "learning_rate": 9.355742296918768e-05, "loss": 0.3259, "step": 32474 }, { "epoch": 18.14245810055866, "grad_norm": 0.40157049894332886, "learning_rate": 9.352941176470589e-05, "loss": 0.4028, "step": 32475 }, { "epoch": 18.143016759776536, "grad_norm": 0.3746364712715149, "learning_rate": 9.35014005602241e-05, "loss": 0.3802, "step": 32476 }, { "epoch": 18.143575418994413, "grad_norm": 3.2074031829833984, "learning_rate": 9.34733893557423e-05, "loss": 0.3821, "step": 32477 }, { "epoch": 18.14413407821229, "grad_norm": 0.8361067175865173, "learning_rate": 9.34453781512605e-05, "loss": 0.3681, "step": 32478 }, { "epoch": 18.144692737430166, "grad_norm": 0.4876335561275482, "learning_rate": 9.341736694677871e-05, "loss": 0.5789, "step": 32479 }, { "epoch": 18.145251396648046, "grad_norm": 0.46530160307884216, "learning_rate": 9.338935574229693e-05, "loss": 0.4168, "step": 32480 }, { "epoch": 18.145810055865923, "grad_norm": 0.40378502011299133, "learning_rate": 9.336134453781512e-05, "loss": 0.4796, "step": 32481 }, { "epoch": 18.1463687150838, "grad_norm": 0.9662461876869202, "learning_rate": 9.333333333333334e-05, "loss": 0.3917, "step": 32482 }, { "epoch": 18.146927374301676, "grad_norm": 0.33864277601242065, "learning_rate": 9.330532212885154e-05, "loss": 0.3652, "step": 32483 }, { "epoch": 18.147486033519552, "grad_norm": 0.603958010673523, "learning_rate": 9.327731092436976e-05, "loss": 0.5275, "step": 32484 }, { "epoch": 18.14804469273743, "grad_norm": 0.36513668298721313, "learning_rate": 9.324929971988795e-05, "loss": 0.4782, "step": 32485 }, { "epoch": 18.14860335195531, "grad_norm": 1.4098597764968872, "learning_rate": 9.322128851540617e-05, "loss": 0.4092, "step": 32486 }, { "epoch": 18.149162011173186, "grad_norm": 0.35510867834091187, "learning_rate": 9.319327731092437e-05, "loss": 0.3667, "step": 32487 }, { "epoch": 18.149720670391062, "grad_norm": 0.9086071848869324, "learning_rate": 9.316526610644258e-05, "loss": 0.467, "step": 32488 }, { "epoch": 18.15027932960894, "grad_norm": 0.4618314504623413, "learning_rate": 9.313725490196079e-05, "loss": 0.375, "step": 32489 }, { "epoch": 18.150837988826815, "grad_norm": 0.649321973323822, "learning_rate": 9.310924369747899e-05, "loss": 0.3544, "step": 32490 }, { "epoch": 18.15139664804469, "grad_norm": 2.0096356868743896, "learning_rate": 9.30812324929972e-05, "loss": 0.499, "step": 32491 }, { "epoch": 18.15195530726257, "grad_norm": 0.5533978343009949, "learning_rate": 9.305322128851542e-05, "loss": 0.4398, "step": 32492 }, { "epoch": 18.15251396648045, "grad_norm": 0.6605199575424194, "learning_rate": 9.302521008403361e-05, "loss": 0.3721, "step": 32493 }, { "epoch": 18.153072625698325, "grad_norm": 0.3735016882419586, "learning_rate": 9.299719887955183e-05, "loss": 0.4213, "step": 32494 }, { "epoch": 18.1536312849162, "grad_norm": 0.42000138759613037, "learning_rate": 9.296918767507002e-05, "loss": 0.4794, "step": 32495 }, { "epoch": 18.154189944134078, "grad_norm": 0.48943063616752625, "learning_rate": 9.294117647058824e-05, "loss": 0.3607, "step": 32496 }, { "epoch": 18.154748603351955, "grad_norm": 1.0324535369873047, "learning_rate": 9.291316526610645e-05, "loss": 0.4688, "step": 32497 }, { "epoch": 18.15530726256983, "grad_norm": 0.5306296348571777, "learning_rate": 9.288515406162465e-05, "loss": 0.3205, "step": 32498 }, { "epoch": 18.155865921787708, "grad_norm": 0.38911643624305725, "learning_rate": 9.285714285714286e-05, "loss": 0.3856, "step": 32499 }, { "epoch": 18.156424581005588, "grad_norm": 0.5162615776062012, "learning_rate": 9.282913165266106e-05, "loss": 0.3852, "step": 32500 }, { "epoch": 18.156424581005588, "eval_cer": 0.08433439167294032, "eval_loss": 0.32063376903533936, "eval_runtime": 55.2078, "eval_samples_per_second": 82.199, "eval_steps_per_second": 5.144, "eval_wer": 0.3336498980475406, "step": 32500 }, { "epoch": 18.156983240223465, "grad_norm": 0.5900794863700867, "learning_rate": 9.280112044817927e-05, "loss": 0.3841, "step": 32501 }, { "epoch": 18.15754189944134, "grad_norm": 0.37070396542549133, "learning_rate": 9.277310924369749e-05, "loss": 0.3839, "step": 32502 }, { "epoch": 18.158100558659218, "grad_norm": 0.4954962134361267, "learning_rate": 9.274509803921568e-05, "loss": 0.545, "step": 32503 }, { "epoch": 18.158659217877094, "grad_norm": 0.6245876550674438, "learning_rate": 9.27170868347339e-05, "loss": 0.3945, "step": 32504 }, { "epoch": 18.15921787709497, "grad_norm": 0.6478275656700134, "learning_rate": 9.26890756302521e-05, "loss": 0.4555, "step": 32505 }, { "epoch": 18.159776536312847, "grad_norm": 0.36954963207244873, "learning_rate": 9.266106442577031e-05, "loss": 0.4065, "step": 32506 }, { "epoch": 18.160335195530728, "grad_norm": 0.4671505093574524, "learning_rate": 9.263305322128852e-05, "loss": 0.4342, "step": 32507 }, { "epoch": 18.160893854748604, "grad_norm": 0.641503095626831, "learning_rate": 9.260504201680673e-05, "loss": 0.461, "step": 32508 }, { "epoch": 18.16145251396648, "grad_norm": 0.47220394015312195, "learning_rate": 9.257703081232493e-05, "loss": 0.3333, "step": 32509 }, { "epoch": 18.162011173184357, "grad_norm": 0.38197147846221924, "learning_rate": 9.254901960784314e-05, "loss": 0.3194, "step": 32510 }, { "epoch": 18.162569832402234, "grad_norm": 1.2994736433029175, "learning_rate": 9.252100840336134e-05, "loss": 0.4188, "step": 32511 }, { "epoch": 18.16312849162011, "grad_norm": 0.3727455735206604, "learning_rate": 9.249299719887955e-05, "loss": 0.4084, "step": 32512 }, { "epoch": 18.16368715083799, "grad_norm": 0.4289976954460144, "learning_rate": 9.246498599439776e-05, "loss": 0.4244, "step": 32513 }, { "epoch": 18.164245810055867, "grad_norm": 1.45867121219635, "learning_rate": 9.243697478991598e-05, "loss": 0.3903, "step": 32514 }, { "epoch": 18.164804469273744, "grad_norm": 1.9103114604949951, "learning_rate": 9.240896358543417e-05, "loss": 0.3926, "step": 32515 }, { "epoch": 18.16536312849162, "grad_norm": 0.8467742800712585, "learning_rate": 9.238095238095239e-05, "loss": 0.4844, "step": 32516 }, { "epoch": 18.165921787709497, "grad_norm": 0.5672905445098877, "learning_rate": 9.235294117647058e-05, "loss": 0.4825, "step": 32517 }, { "epoch": 18.166480446927373, "grad_norm": 0.35855451226234436, "learning_rate": 9.23249299719888e-05, "loss": 0.3361, "step": 32518 }, { "epoch": 18.16703910614525, "grad_norm": 0.3326796889305115, "learning_rate": 9.2296918767507e-05, "loss": 0.3375, "step": 32519 }, { "epoch": 18.16759776536313, "grad_norm": 0.525824785232544, "learning_rate": 9.226890756302521e-05, "loss": 0.3427, "step": 32520 }, { "epoch": 18.168156424581007, "grad_norm": 0.40844160318374634, "learning_rate": 9.224089635854342e-05, "loss": 0.3608, "step": 32521 }, { "epoch": 18.168715083798883, "grad_norm": 0.5018882751464844, "learning_rate": 9.221288515406162e-05, "loss": 0.4218, "step": 32522 }, { "epoch": 18.16927374301676, "grad_norm": 0.38595661520957947, "learning_rate": 9.218487394957983e-05, "loss": 0.342, "step": 32523 }, { "epoch": 18.169832402234636, "grad_norm": 0.49344679713249207, "learning_rate": 9.215686274509805e-05, "loss": 0.43, "step": 32524 }, { "epoch": 18.170391061452513, "grad_norm": 0.3859495520591736, "learning_rate": 9.212885154061624e-05, "loss": 0.3672, "step": 32525 }, { "epoch": 18.17094972067039, "grad_norm": 0.5541929006576538, "learning_rate": 9.210084033613446e-05, "loss": 0.6609, "step": 32526 }, { "epoch": 18.17150837988827, "grad_norm": 0.422832190990448, "learning_rate": 9.207282913165265e-05, "loss": 0.4374, "step": 32527 }, { "epoch": 18.172067039106146, "grad_norm": 0.3607363998889923, "learning_rate": 9.204481792717087e-05, "loss": 0.4006, "step": 32528 }, { "epoch": 18.172625698324023, "grad_norm": 0.4760386347770691, "learning_rate": 9.201680672268908e-05, "loss": 0.4676, "step": 32529 }, { "epoch": 18.1731843575419, "grad_norm": 0.5883450508117676, "learning_rate": 9.198879551820729e-05, "loss": 0.3944, "step": 32530 }, { "epoch": 18.173743016759776, "grad_norm": 0.4508742690086365, "learning_rate": 9.196078431372549e-05, "loss": 0.4697, "step": 32531 }, { "epoch": 18.174301675977652, "grad_norm": 1.72762131690979, "learning_rate": 9.19327731092437e-05, "loss": 0.3853, "step": 32532 }, { "epoch": 18.174860335195532, "grad_norm": 0.3518361449241638, "learning_rate": 9.19047619047619e-05, "loss": 0.3747, "step": 32533 }, { "epoch": 18.17541899441341, "grad_norm": 3.5360472202301025, "learning_rate": 9.187675070028012e-05, "loss": 0.5803, "step": 32534 }, { "epoch": 18.175977653631286, "grad_norm": 1.5024994611740112, "learning_rate": 9.184873949579832e-05, "loss": 0.3919, "step": 32535 }, { "epoch": 18.176536312849162, "grad_norm": 0.675117015838623, "learning_rate": 9.182072829131653e-05, "loss": 0.3196, "step": 32536 }, { "epoch": 18.17709497206704, "grad_norm": 0.470883846282959, "learning_rate": 9.179271708683473e-05, "loss": 0.3848, "step": 32537 }, { "epoch": 18.177653631284915, "grad_norm": 0.5163912177085876, "learning_rate": 9.176470588235295e-05, "loss": 0.471, "step": 32538 }, { "epoch": 18.178212290502792, "grad_norm": 1.051300287246704, "learning_rate": 9.173669467787114e-05, "loss": 0.4844, "step": 32539 }, { "epoch": 18.178770949720672, "grad_norm": 0.41967248916625977, "learning_rate": 9.170868347338936e-05, "loss": 0.4125, "step": 32540 }, { "epoch": 18.17932960893855, "grad_norm": 6.672290802001953, "learning_rate": 9.168067226890756e-05, "loss": 0.3738, "step": 32541 }, { "epoch": 18.179888268156425, "grad_norm": 0.3774382770061493, "learning_rate": 9.165266106442577e-05, "loss": 0.424, "step": 32542 }, { "epoch": 18.1804469273743, "grad_norm": 0.4206157624721527, "learning_rate": 9.162464985994398e-05, "loss": 0.3353, "step": 32543 }, { "epoch": 18.18100558659218, "grad_norm": 0.4097682535648346, "learning_rate": 9.159663865546218e-05, "loss": 0.3576, "step": 32544 }, { "epoch": 18.181564245810055, "grad_norm": 0.9245181679725647, "learning_rate": 9.156862745098039e-05, "loss": 0.4094, "step": 32545 }, { "epoch": 18.18212290502793, "grad_norm": 0.3932160437107086, "learning_rate": 9.154061624649861e-05, "loss": 0.3837, "step": 32546 }, { "epoch": 18.18268156424581, "grad_norm": 0.4510837197303772, "learning_rate": 9.15126050420168e-05, "loss": 0.4662, "step": 32547 }, { "epoch": 18.183240223463688, "grad_norm": 0.7100754976272583, "learning_rate": 9.148459383753502e-05, "loss": 0.4582, "step": 32548 }, { "epoch": 18.183798882681565, "grad_norm": 0.5350834131240845, "learning_rate": 9.145658263305321e-05, "loss": 0.3508, "step": 32549 }, { "epoch": 18.18435754189944, "grad_norm": 1.6078065633773804, "learning_rate": 9.142857142857143e-05, "loss": 0.3739, "step": 32550 }, { "epoch": 18.184916201117318, "grad_norm": 0.5379326343536377, "learning_rate": 9.140056022408964e-05, "loss": 0.394, "step": 32551 }, { "epoch": 18.185474860335194, "grad_norm": 0.44442981481552124, "learning_rate": 9.137254901960784e-05, "loss": 0.489, "step": 32552 }, { "epoch": 18.18603351955307, "grad_norm": 0.38975608348846436, "learning_rate": 9.134453781512605e-05, "loss": 0.4322, "step": 32553 }, { "epoch": 18.18659217877095, "grad_norm": 0.5542870163917542, "learning_rate": 9.131652661064426e-05, "loss": 0.419, "step": 32554 }, { "epoch": 18.187150837988828, "grad_norm": 0.5849865078926086, "learning_rate": 9.128851540616246e-05, "loss": 0.5205, "step": 32555 }, { "epoch": 18.187709497206704, "grad_norm": 0.37388792634010315, "learning_rate": 9.126050420168068e-05, "loss": 0.3099, "step": 32556 }, { "epoch": 18.18826815642458, "grad_norm": 0.6174965500831604, "learning_rate": 9.123249299719887e-05, "loss": 0.2984, "step": 32557 }, { "epoch": 18.188826815642457, "grad_norm": 0.30588021874427795, "learning_rate": 9.12044817927171e-05, "loss": 0.2786, "step": 32558 }, { "epoch": 18.189385474860334, "grad_norm": 0.23707345128059387, "learning_rate": 9.117647058823529e-05, "loss": 0.2342, "step": 32559 }, { "epoch": 18.189944134078214, "grad_norm": 0.541202962398529, "learning_rate": 9.11484593837535e-05, "loss": 0.3985, "step": 32560 }, { "epoch": 18.19050279329609, "grad_norm": 0.3407653272151947, "learning_rate": 9.112044817927171e-05, "loss": 0.4086, "step": 32561 }, { "epoch": 18.191061452513967, "grad_norm": 0.6027182936668396, "learning_rate": 9.109243697478992e-05, "loss": 0.5373, "step": 32562 }, { "epoch": 18.191620111731844, "grad_norm": 0.3618367910385132, "learning_rate": 9.106442577030812e-05, "loss": 0.3936, "step": 32563 }, { "epoch": 18.19217877094972, "grad_norm": 1.2943954467773438, "learning_rate": 9.103641456582633e-05, "loss": 0.442, "step": 32564 }, { "epoch": 18.192737430167597, "grad_norm": 0.49033835530281067, "learning_rate": 9.100840336134454e-05, "loss": 0.3982, "step": 32565 }, { "epoch": 18.193296089385473, "grad_norm": 0.45318087935447693, "learning_rate": 9.098039215686274e-05, "loss": 0.431, "step": 32566 }, { "epoch": 18.193854748603353, "grad_norm": 0.46391695737838745, "learning_rate": 9.095238095238095e-05, "loss": 0.4346, "step": 32567 }, { "epoch": 18.19441340782123, "grad_norm": 0.3761165738105774, "learning_rate": 9.092436974789917e-05, "loss": 0.3828, "step": 32568 }, { "epoch": 18.194972067039107, "grad_norm": 0.39247897267341614, "learning_rate": 9.089635854341736e-05, "loss": 0.4222, "step": 32569 }, { "epoch": 18.195530726256983, "grad_norm": 0.5490865111351013, "learning_rate": 9.086834733893558e-05, "loss": 0.4431, "step": 32570 }, { "epoch": 18.19608938547486, "grad_norm": 0.43112263083457947, "learning_rate": 9.084033613445377e-05, "loss": 0.377, "step": 32571 }, { "epoch": 18.196648044692736, "grad_norm": 0.594628632068634, "learning_rate": 9.081232492997199e-05, "loss": 0.3921, "step": 32572 }, { "epoch": 18.197206703910613, "grad_norm": 0.858647882938385, "learning_rate": 9.07843137254902e-05, "loss": 0.4228, "step": 32573 }, { "epoch": 18.197765363128493, "grad_norm": 0.3972368836402893, "learning_rate": 9.07563025210084e-05, "loss": 0.4821, "step": 32574 }, { "epoch": 18.19832402234637, "grad_norm": 0.6365428566932678, "learning_rate": 9.072829131652661e-05, "loss": 0.5124, "step": 32575 }, { "epoch": 18.198882681564246, "grad_norm": 0.3716276288032532, "learning_rate": 9.070028011204482e-05, "loss": 0.3816, "step": 32576 }, { "epoch": 18.199441340782123, "grad_norm": 0.40125203132629395, "learning_rate": 9.067226890756302e-05, "loss": 0.3364, "step": 32577 }, { "epoch": 18.2, "grad_norm": 1.586991548538208, "learning_rate": 9.064425770308124e-05, "loss": 0.3861, "step": 32578 }, { "epoch": 18.200558659217876, "grad_norm": 0.4253595769405365, "learning_rate": 9.061624649859943e-05, "loss": 0.407, "step": 32579 }, { "epoch": 18.201117318435756, "grad_norm": 0.5789925456047058, "learning_rate": 9.058823529411765e-05, "loss": 0.387, "step": 32580 }, { "epoch": 18.201675977653633, "grad_norm": 0.5610730051994324, "learning_rate": 9.056022408963585e-05, "loss": 0.3732, "step": 32581 }, { "epoch": 18.20223463687151, "grad_norm": 0.5564419031143188, "learning_rate": 9.053221288515406e-05, "loss": 0.4879, "step": 32582 }, { "epoch": 18.202793296089386, "grad_norm": 0.3981051743030548, "learning_rate": 9.050420168067227e-05, "loss": 0.3245, "step": 32583 }, { "epoch": 18.203351955307262, "grad_norm": 0.47177278995513916, "learning_rate": 9.047619047619048e-05, "loss": 0.3961, "step": 32584 }, { "epoch": 18.20391061452514, "grad_norm": 0.3759315311908722, "learning_rate": 9.044817927170868e-05, "loss": 0.3418, "step": 32585 }, { "epoch": 18.204469273743015, "grad_norm": 0.9305090308189392, "learning_rate": 9.042016806722689e-05, "loss": 0.4495, "step": 32586 }, { "epoch": 18.205027932960895, "grad_norm": 1.0677870512008667, "learning_rate": 9.03921568627451e-05, "loss": 0.4175, "step": 32587 }, { "epoch": 18.205586592178772, "grad_norm": 0.3763781785964966, "learning_rate": 9.036414565826331e-05, "loss": 0.3729, "step": 32588 }, { "epoch": 18.20614525139665, "grad_norm": 0.5919235944747925, "learning_rate": 9.03361344537815e-05, "loss": 0.446, "step": 32589 }, { "epoch": 18.206703910614525, "grad_norm": 0.4296429455280304, "learning_rate": 9.030812324929973e-05, "loss": 0.3921, "step": 32590 }, { "epoch": 18.2072625698324, "grad_norm": 0.5748556852340698, "learning_rate": 9.028011204481792e-05, "loss": 0.5197, "step": 32591 }, { "epoch": 18.20782122905028, "grad_norm": 0.48893022537231445, "learning_rate": 9.025210084033614e-05, "loss": 0.5051, "step": 32592 }, { "epoch": 18.208379888268155, "grad_norm": 0.3871202766895294, "learning_rate": 9.022408963585433e-05, "loss": 0.3564, "step": 32593 }, { "epoch": 18.208938547486035, "grad_norm": 0.6607181429862976, "learning_rate": 9.019607843137255e-05, "loss": 0.398, "step": 32594 }, { "epoch": 18.20949720670391, "grad_norm": 0.49910831451416016, "learning_rate": 9.016806722689076e-05, "loss": 0.3749, "step": 32595 }, { "epoch": 18.210055865921788, "grad_norm": 1.2300628423690796, "learning_rate": 9.014005602240896e-05, "loss": 0.543, "step": 32596 }, { "epoch": 18.210614525139665, "grad_norm": 1.643019676208496, "learning_rate": 9.011204481792717e-05, "loss": 0.3282, "step": 32597 }, { "epoch": 18.21117318435754, "grad_norm": 1.0076401233673096, "learning_rate": 9.008403361344537e-05, "loss": 0.5498, "step": 32598 }, { "epoch": 18.211731843575418, "grad_norm": 1.1145869493484497, "learning_rate": 9.005602240896358e-05, "loss": 0.6046, "step": 32599 }, { "epoch": 18.212290502793294, "grad_norm": 0.4166979491710663, "learning_rate": 9.00280112044818e-05, "loss": 0.3985, "step": 32600 }, { "epoch": 18.212849162011175, "grad_norm": 0.5675028562545776, "learning_rate": 8.999999999999999e-05, "loss": 0.5742, "step": 32601 }, { "epoch": 18.21340782122905, "grad_norm": 0.4414902627468109, "learning_rate": 8.997198879551821e-05, "loss": 0.3072, "step": 32602 }, { "epoch": 18.213966480446928, "grad_norm": 0.5873119235038757, "learning_rate": 8.99439775910364e-05, "loss": 0.3824, "step": 32603 }, { "epoch": 18.214525139664804, "grad_norm": 0.4763086438179016, "learning_rate": 8.991596638655462e-05, "loss": 0.3903, "step": 32604 }, { "epoch": 18.21508379888268, "grad_norm": 1.431850552558899, "learning_rate": 8.988795518207283e-05, "loss": 0.4268, "step": 32605 }, { "epoch": 18.215642458100557, "grad_norm": 0.47908493876457214, "learning_rate": 8.985994397759104e-05, "loss": 0.3364, "step": 32606 }, { "epoch": 18.216201117318437, "grad_norm": 0.4700092673301697, "learning_rate": 8.983193277310924e-05, "loss": 0.4631, "step": 32607 }, { "epoch": 18.216759776536314, "grad_norm": 1.0191336870193481, "learning_rate": 8.980392156862745e-05, "loss": 0.4135, "step": 32608 }, { "epoch": 18.21731843575419, "grad_norm": 1.3718301057815552, "learning_rate": 8.977591036414565e-05, "loss": 0.4806, "step": 32609 }, { "epoch": 18.217877094972067, "grad_norm": 6.846329689025879, "learning_rate": 8.974789915966387e-05, "loss": 0.3451, "step": 32610 }, { "epoch": 18.218435754189944, "grad_norm": 0.4599190056324005, "learning_rate": 8.971988795518207e-05, "loss": 0.3301, "step": 32611 }, { "epoch": 18.21899441340782, "grad_norm": 1.8401906490325928, "learning_rate": 8.969187675070029e-05, "loss": 0.3158, "step": 32612 }, { "epoch": 18.219553072625697, "grad_norm": 0.36960023641586304, "learning_rate": 8.966386554621848e-05, "loss": 0.2652, "step": 32613 }, { "epoch": 18.220111731843577, "grad_norm": 0.4305665194988251, "learning_rate": 8.96358543417367e-05, "loss": 0.4355, "step": 32614 }, { "epoch": 18.220670391061454, "grad_norm": 0.6946961283683777, "learning_rate": 8.960784313725492e-05, "loss": 0.4415, "step": 32615 }, { "epoch": 18.22122905027933, "grad_norm": 0.4750472605228424, "learning_rate": 8.957983193277311e-05, "loss": 0.3805, "step": 32616 }, { "epoch": 18.221787709497207, "grad_norm": 0.4733906686306, "learning_rate": 8.955182072829133e-05, "loss": 0.3545, "step": 32617 }, { "epoch": 18.222346368715083, "grad_norm": 0.5815635919570923, "learning_rate": 8.952380952380952e-05, "loss": 0.3927, "step": 32618 }, { "epoch": 18.22290502793296, "grad_norm": 6.846716403961182, "learning_rate": 8.949579831932774e-05, "loss": 0.3762, "step": 32619 }, { "epoch": 18.223463687150836, "grad_norm": 0.4133613109588623, "learning_rate": 8.946778711484595e-05, "loss": 0.3729, "step": 32620 }, { "epoch": 18.224022346368717, "grad_norm": 0.5400164723396301, "learning_rate": 8.943977591036415e-05, "loss": 0.3753, "step": 32621 }, { "epoch": 18.224581005586593, "grad_norm": 1.4985110759735107, "learning_rate": 8.941176470588236e-05, "loss": 0.4775, "step": 32622 }, { "epoch": 18.22513966480447, "grad_norm": 0.592915952205658, "learning_rate": 8.938375350140056e-05, "loss": 0.3905, "step": 32623 }, { "epoch": 18.225698324022346, "grad_norm": 0.5983620882034302, "learning_rate": 8.935574229691877e-05, "loss": 0.4551, "step": 32624 }, { "epoch": 18.226256983240223, "grad_norm": 0.4434497654438019, "learning_rate": 8.932773109243698e-05, "loss": 0.4656, "step": 32625 }, { "epoch": 18.2268156424581, "grad_norm": 0.5307989120483398, "learning_rate": 8.929971988795518e-05, "loss": 0.4007, "step": 32626 }, { "epoch": 18.227374301675976, "grad_norm": 0.721480667591095, "learning_rate": 8.92717086834734e-05, "loss": 0.5197, "step": 32627 }, { "epoch": 18.227932960893856, "grad_norm": 0.45105406641960144, "learning_rate": 8.92436974789916e-05, "loss": 0.418, "step": 32628 }, { "epoch": 18.228491620111733, "grad_norm": 0.48472219705581665, "learning_rate": 8.921568627450981e-05, "loss": 0.4069, "step": 32629 }, { "epoch": 18.22905027932961, "grad_norm": 0.5231115221977234, "learning_rate": 8.9187675070028e-05, "loss": 0.4382, "step": 32630 }, { "epoch": 18.229608938547486, "grad_norm": 0.33041539788246155, "learning_rate": 8.915966386554623e-05, "loss": 0.4929, "step": 32631 }, { "epoch": 18.230167597765362, "grad_norm": 1.308843970298767, "learning_rate": 8.913165266106443e-05, "loss": 0.3491, "step": 32632 }, { "epoch": 18.23072625698324, "grad_norm": 0.5758770704269409, "learning_rate": 8.910364145658264e-05, "loss": 0.5197, "step": 32633 }, { "epoch": 18.23128491620112, "grad_norm": 0.3611909747123718, "learning_rate": 8.907563025210084e-05, "loss": 0.3711, "step": 32634 }, { "epoch": 18.231843575418996, "grad_norm": 1.6035256385803223, "learning_rate": 8.904761904761905e-05, "loss": 0.4406, "step": 32635 }, { "epoch": 18.232402234636872, "grad_norm": 0.6332162618637085, "learning_rate": 8.901960784313726e-05, "loss": 0.5166, "step": 32636 }, { "epoch": 18.23296089385475, "grad_norm": 0.651264488697052, "learning_rate": 8.899159663865548e-05, "loss": 0.378, "step": 32637 }, { "epoch": 18.233519553072625, "grad_norm": 0.3985283076763153, "learning_rate": 8.896358543417367e-05, "loss": 0.3369, "step": 32638 }, { "epoch": 18.234078212290502, "grad_norm": 0.3729357123374939, "learning_rate": 8.893557422969189e-05, "loss": 0.3628, "step": 32639 }, { "epoch": 18.23463687150838, "grad_norm": 0.40608903765678406, "learning_rate": 8.890756302521008e-05, "loss": 0.4224, "step": 32640 }, { "epoch": 18.23519553072626, "grad_norm": 0.39620083570480347, "learning_rate": 8.88795518207283e-05, "loss": 0.3865, "step": 32641 }, { "epoch": 18.235754189944135, "grad_norm": 0.35489603877067566, "learning_rate": 8.88515406162465e-05, "loss": 0.325, "step": 32642 }, { "epoch": 18.23631284916201, "grad_norm": 0.6893346309661865, "learning_rate": 8.882352941176471e-05, "loss": 0.4692, "step": 32643 }, { "epoch": 18.23687150837989, "grad_norm": 0.4614734351634979, "learning_rate": 8.879551820728292e-05, "loss": 0.5273, "step": 32644 }, { "epoch": 18.237430167597765, "grad_norm": 0.7457419037818909, "learning_rate": 8.876750700280112e-05, "loss": 0.4335, "step": 32645 }, { "epoch": 18.23798882681564, "grad_norm": 0.5542821288108826, "learning_rate": 8.873949579831933e-05, "loss": 0.462, "step": 32646 }, { "epoch": 18.238547486033518, "grad_norm": 0.3706904649734497, "learning_rate": 8.871148459383755e-05, "loss": 0.4107, "step": 32647 }, { "epoch": 18.239106145251398, "grad_norm": 0.4472128450870514, "learning_rate": 8.868347338935574e-05, "loss": 0.4742, "step": 32648 }, { "epoch": 18.239664804469275, "grad_norm": 0.6131716966629028, "learning_rate": 8.865546218487396e-05, "loss": 0.3673, "step": 32649 }, { "epoch": 18.24022346368715, "grad_norm": 0.32064637541770935, "learning_rate": 8.862745098039215e-05, "loss": 0.3779, "step": 32650 }, { "epoch": 18.240782122905028, "grad_norm": 0.4744412899017334, "learning_rate": 8.859943977591037e-05, "loss": 0.3862, "step": 32651 }, { "epoch": 18.241340782122904, "grad_norm": 0.5209770202636719, "learning_rate": 8.857142857142857e-05, "loss": 0.4758, "step": 32652 }, { "epoch": 18.24189944134078, "grad_norm": 0.5869362354278564, "learning_rate": 8.854341736694679e-05, "loss": 0.3101, "step": 32653 }, { "epoch": 18.242458100558657, "grad_norm": 0.44523465633392334, "learning_rate": 8.851540616246499e-05, "loss": 0.3581, "step": 32654 }, { "epoch": 18.243016759776538, "grad_norm": 0.3737280070781708, "learning_rate": 8.84873949579832e-05, "loss": 0.4315, "step": 32655 }, { "epoch": 18.243575418994414, "grad_norm": 0.5529664754867554, "learning_rate": 8.84593837535014e-05, "loss": 0.4035, "step": 32656 }, { "epoch": 18.24413407821229, "grad_norm": 0.34042322635650635, "learning_rate": 8.843137254901961e-05, "loss": 0.3044, "step": 32657 }, { "epoch": 18.244692737430167, "grad_norm": 0.4932880997657776, "learning_rate": 8.840336134453782e-05, "loss": 0.3448, "step": 32658 }, { "epoch": 18.245251396648044, "grad_norm": 0.4720737934112549, "learning_rate": 8.837535014005603e-05, "loss": 0.4894, "step": 32659 }, { "epoch": 18.24581005586592, "grad_norm": 0.5280595421791077, "learning_rate": 8.834733893557423e-05, "loss": 0.5299, "step": 32660 }, { "epoch": 18.2463687150838, "grad_norm": 0.4164494574069977, "learning_rate": 8.831932773109245e-05, "loss": 0.5149, "step": 32661 }, { "epoch": 18.246927374301677, "grad_norm": 1.0335514545440674, "learning_rate": 8.829131652661064e-05, "loss": 0.3933, "step": 32662 }, { "epoch": 18.247486033519554, "grad_norm": 0.3542427718639374, "learning_rate": 8.826330532212886e-05, "loss": 0.3496, "step": 32663 }, { "epoch": 18.24804469273743, "grad_norm": 0.35461345314979553, "learning_rate": 8.823529411764706e-05, "loss": 0.3788, "step": 32664 }, { "epoch": 18.248603351955307, "grad_norm": 0.42494508624076843, "learning_rate": 8.820728291316527e-05, "loss": 0.3909, "step": 32665 }, { "epoch": 18.249162011173183, "grad_norm": 0.49568766355514526, "learning_rate": 8.817927170868348e-05, "loss": 0.5342, "step": 32666 }, { "epoch": 18.24972067039106, "grad_norm": 0.38212326169013977, "learning_rate": 8.815126050420168e-05, "loss": 0.3606, "step": 32667 }, { "epoch": 18.25027932960894, "grad_norm": 0.43844321370124817, "learning_rate": 8.812324929971989e-05, "loss": 0.3283, "step": 32668 }, { "epoch": 18.250837988826817, "grad_norm": 0.5792275667190552, "learning_rate": 8.809523809523811e-05, "loss": 0.5553, "step": 32669 }, { "epoch": 18.251396648044693, "grad_norm": 0.45069336891174316, "learning_rate": 8.80672268907563e-05, "loss": 0.3663, "step": 32670 }, { "epoch": 18.25195530726257, "grad_norm": 0.3351284861564636, "learning_rate": 8.803921568627452e-05, "loss": 0.3379, "step": 32671 }, { "epoch": 18.252513966480446, "grad_norm": 0.3912334144115448, "learning_rate": 8.801120448179271e-05, "loss": 0.4557, "step": 32672 }, { "epoch": 18.253072625698323, "grad_norm": 0.3823745846748352, "learning_rate": 8.798319327731093e-05, "loss": 0.45, "step": 32673 }, { "epoch": 18.2536312849162, "grad_norm": 0.5660895109176636, "learning_rate": 8.795518207282914e-05, "loss": 0.4257, "step": 32674 }, { "epoch": 18.25418994413408, "grad_norm": 0.5477224588394165, "learning_rate": 8.792717086834734e-05, "loss": 0.4939, "step": 32675 }, { "epoch": 18.254748603351956, "grad_norm": 0.40360334515571594, "learning_rate": 8.789915966386555e-05, "loss": 0.3895, "step": 32676 }, { "epoch": 18.255307262569833, "grad_norm": 0.5136256814002991, "learning_rate": 8.787114845938376e-05, "loss": 0.3832, "step": 32677 }, { "epoch": 18.25586592178771, "grad_norm": 1.602191686630249, "learning_rate": 8.784313725490196e-05, "loss": 0.4502, "step": 32678 }, { "epoch": 18.256424581005586, "grad_norm": 0.7725604176521301, "learning_rate": 8.781512605042017e-05, "loss": 0.496, "step": 32679 }, { "epoch": 18.256983240223462, "grad_norm": 0.5572128891944885, "learning_rate": 8.778711484593837e-05, "loss": 0.4377, "step": 32680 }, { "epoch": 18.257541899441343, "grad_norm": 0.5574092268943787, "learning_rate": 8.77591036414566e-05, "loss": 0.4732, "step": 32681 }, { "epoch": 18.25810055865922, "grad_norm": 0.35897552967071533, "learning_rate": 8.773109243697479e-05, "loss": 0.4118, "step": 32682 }, { "epoch": 18.258659217877096, "grad_norm": 1.541033148765564, "learning_rate": 8.7703081232493e-05, "loss": 0.3662, "step": 32683 }, { "epoch": 18.259217877094972, "grad_norm": 0.3584447503089905, "learning_rate": 8.76750700280112e-05, "loss": 0.3905, "step": 32684 }, { "epoch": 18.25977653631285, "grad_norm": 0.43582451343536377, "learning_rate": 8.764705882352942e-05, "loss": 0.33, "step": 32685 }, { "epoch": 18.260335195530725, "grad_norm": 1.2134478092193604, "learning_rate": 8.761904761904762e-05, "loss": 0.332, "step": 32686 }, { "epoch": 18.260893854748602, "grad_norm": 0.9116256237030029, "learning_rate": 8.759103641456583e-05, "loss": 0.3652, "step": 32687 }, { "epoch": 18.261452513966482, "grad_norm": 0.44309672713279724, "learning_rate": 8.756302521008404e-05, "loss": 0.4279, "step": 32688 }, { "epoch": 18.26201117318436, "grad_norm": 0.44933533668518066, "learning_rate": 8.753501400560224e-05, "loss": 0.3359, "step": 32689 }, { "epoch": 18.262569832402235, "grad_norm": 0.37190139293670654, "learning_rate": 8.750700280112045e-05, "loss": 0.4022, "step": 32690 }, { "epoch": 18.26312849162011, "grad_norm": 0.5863502025604248, "learning_rate": 8.747899159663867e-05, "loss": 0.4016, "step": 32691 }, { "epoch": 18.26368715083799, "grad_norm": 8.179637908935547, "learning_rate": 8.745098039215686e-05, "loss": 0.4442, "step": 32692 }, { "epoch": 18.264245810055865, "grad_norm": 0.4853821098804474, "learning_rate": 8.742296918767508e-05, "loss": 0.4429, "step": 32693 }, { "epoch": 18.26480446927374, "grad_norm": 0.2989823520183563, "learning_rate": 8.739495798319327e-05, "loss": 0.293, "step": 32694 }, { "epoch": 18.26536312849162, "grad_norm": 0.46924665570259094, "learning_rate": 8.736694677871149e-05, "loss": 0.4807, "step": 32695 }, { "epoch": 18.265921787709498, "grad_norm": 0.4112356901168823, "learning_rate": 8.73389355742297e-05, "loss": 0.3483, "step": 32696 }, { "epoch": 18.266480446927375, "grad_norm": 0.46377190947532654, "learning_rate": 8.73109243697479e-05, "loss": 0.3399, "step": 32697 }, { "epoch": 18.26703910614525, "grad_norm": 0.9841479659080505, "learning_rate": 8.728291316526611e-05, "loss": 0.3423, "step": 32698 }, { "epoch": 18.267597765363128, "grad_norm": 0.33804404735565186, "learning_rate": 8.725490196078432e-05, "loss": 0.3303, "step": 32699 }, { "epoch": 18.268156424581004, "grad_norm": 0.48222580552101135, "learning_rate": 8.722689075630252e-05, "loss": 0.4222, "step": 32700 }, { "epoch": 18.26871508379888, "grad_norm": 0.47329995036125183, "learning_rate": 8.719887955182074e-05, "loss": 0.439, "step": 32701 }, { "epoch": 18.26927374301676, "grad_norm": 0.4900064468383789, "learning_rate": 8.717086834733893e-05, "loss": 0.3662, "step": 32702 }, { "epoch": 18.269832402234638, "grad_norm": 0.42072832584381104, "learning_rate": 8.714285714285715e-05, "loss": 0.3832, "step": 32703 }, { "epoch": 18.270391061452514, "grad_norm": 0.599050760269165, "learning_rate": 8.711484593837535e-05, "loss": 0.3996, "step": 32704 }, { "epoch": 18.27094972067039, "grad_norm": 0.31566593050956726, "learning_rate": 8.708683473389356e-05, "loss": 0.3375, "step": 32705 }, { "epoch": 18.271508379888267, "grad_norm": 0.4924340844154358, "learning_rate": 8.705882352941176e-05, "loss": 0.4397, "step": 32706 }, { "epoch": 18.272067039106144, "grad_norm": 0.36471566557884216, "learning_rate": 8.703081232492998e-05, "loss": 0.266, "step": 32707 }, { "epoch": 18.272625698324024, "grad_norm": 0.45468661189079285, "learning_rate": 8.700280112044818e-05, "loss": 0.2787, "step": 32708 }, { "epoch": 18.2731843575419, "grad_norm": 0.3292801082134247, "learning_rate": 8.697478991596639e-05, "loss": 0.4341, "step": 32709 }, { "epoch": 18.273743016759777, "grad_norm": 0.705889880657196, "learning_rate": 8.69467787114846e-05, "loss": 0.4941, "step": 32710 }, { "epoch": 18.274301675977654, "grad_norm": 0.32996779680252075, "learning_rate": 8.69187675070028e-05, "loss": 0.3509, "step": 32711 }, { "epoch": 18.27486033519553, "grad_norm": 0.5066474080085754, "learning_rate": 8.6890756302521e-05, "loss": 0.4193, "step": 32712 }, { "epoch": 18.275418994413407, "grad_norm": 0.7488020062446594, "learning_rate": 8.686274509803923e-05, "loss": 0.6275, "step": 32713 }, { "epoch": 18.275977653631283, "grad_norm": 0.3979850709438324, "learning_rate": 8.683473389355742e-05, "loss": 0.3467, "step": 32714 }, { "epoch": 18.276536312849164, "grad_norm": 0.5178669095039368, "learning_rate": 8.680672268907564e-05, "loss": 0.5044, "step": 32715 }, { "epoch": 18.27709497206704, "grad_norm": 0.5792111158370972, "learning_rate": 8.677871148459383e-05, "loss": 0.5073, "step": 32716 }, { "epoch": 18.277653631284917, "grad_norm": 2.007347583770752, "learning_rate": 8.675070028011205e-05, "loss": 0.4509, "step": 32717 }, { "epoch": 18.278212290502793, "grad_norm": 0.4164412319660187, "learning_rate": 8.672268907563026e-05, "loss": 0.3502, "step": 32718 }, { "epoch": 18.27877094972067, "grad_norm": 0.6618167757987976, "learning_rate": 8.669467787114846e-05, "loss": 0.4215, "step": 32719 }, { "epoch": 18.279329608938546, "grad_norm": 0.36278441548347473, "learning_rate": 8.666666666666667e-05, "loss": 0.4033, "step": 32720 }, { "epoch": 18.279888268156423, "grad_norm": 0.5017735958099365, "learning_rate": 8.663865546218487e-05, "loss": 0.4402, "step": 32721 }, { "epoch": 18.280446927374303, "grad_norm": 0.5115286111831665, "learning_rate": 8.661064425770308e-05, "loss": 0.3058, "step": 32722 }, { "epoch": 18.28100558659218, "grad_norm": 0.5701693296432495, "learning_rate": 8.65826330532213e-05, "loss": 0.4815, "step": 32723 }, { "epoch": 18.281564245810056, "grad_norm": 0.5837891697883606, "learning_rate": 8.655462184873949e-05, "loss": 0.4681, "step": 32724 }, { "epoch": 18.282122905027933, "grad_norm": 0.6313376426696777, "learning_rate": 8.652661064425771e-05, "loss": 0.4246, "step": 32725 }, { "epoch": 18.28268156424581, "grad_norm": 0.4196922481060028, "learning_rate": 8.64985994397759e-05, "loss": 0.3821, "step": 32726 }, { "epoch": 18.283240223463686, "grad_norm": 0.43936988711357117, "learning_rate": 8.647058823529412e-05, "loss": 0.358, "step": 32727 }, { "epoch": 18.283798882681563, "grad_norm": 0.5881524085998535, "learning_rate": 8.644257703081233e-05, "loss": 0.3941, "step": 32728 }, { "epoch": 18.284357541899443, "grad_norm": 0.3587978184223175, "learning_rate": 8.641456582633054e-05, "loss": 0.3715, "step": 32729 }, { "epoch": 18.28491620111732, "grad_norm": 0.38702911138534546, "learning_rate": 8.638655462184874e-05, "loss": 0.3943, "step": 32730 }, { "epoch": 18.285474860335196, "grad_norm": 0.47795331478118896, "learning_rate": 8.635854341736695e-05, "loss": 0.3728, "step": 32731 }, { "epoch": 18.286033519553072, "grad_norm": 0.6315271258354187, "learning_rate": 8.633053221288515e-05, "loss": 0.3726, "step": 32732 }, { "epoch": 18.28659217877095, "grad_norm": 0.4539512097835541, "learning_rate": 8.630252100840336e-05, "loss": 0.4537, "step": 32733 }, { "epoch": 18.287150837988825, "grad_norm": 0.74102383852005, "learning_rate": 8.627450980392157e-05, "loss": 0.435, "step": 32734 }, { "epoch": 18.287709497206706, "grad_norm": 1.3684748411178589, "learning_rate": 8.624649859943979e-05, "loss": 0.3699, "step": 32735 }, { "epoch": 18.288268156424582, "grad_norm": 0.49205633997917175, "learning_rate": 8.621848739495798e-05, "loss": 0.3227, "step": 32736 }, { "epoch": 18.28882681564246, "grad_norm": 0.4957444965839386, "learning_rate": 8.61904761904762e-05, "loss": 0.4, "step": 32737 }, { "epoch": 18.289385474860335, "grad_norm": 0.5035558342933655, "learning_rate": 8.616246498599439e-05, "loss": 0.3783, "step": 32738 }, { "epoch": 18.289944134078212, "grad_norm": 0.44133779406547546, "learning_rate": 8.613445378151261e-05, "loss": 0.2953, "step": 32739 }, { "epoch": 18.29050279329609, "grad_norm": 0.5431911945343018, "learning_rate": 8.610644257703082e-05, "loss": 0.3909, "step": 32740 }, { "epoch": 18.291061452513965, "grad_norm": 0.4621146321296692, "learning_rate": 8.607843137254902e-05, "loss": 0.3649, "step": 32741 }, { "epoch": 18.291620111731845, "grad_norm": 0.35900264978408813, "learning_rate": 8.605042016806723e-05, "loss": 0.3256, "step": 32742 }, { "epoch": 18.29217877094972, "grad_norm": 1.841981053352356, "learning_rate": 8.602240896358543e-05, "loss": 0.3582, "step": 32743 }, { "epoch": 18.2927374301676, "grad_norm": 1.3663215637207031, "learning_rate": 8.599439775910364e-05, "loss": 0.4403, "step": 32744 }, { "epoch": 18.293296089385475, "grad_norm": 0.4664662778377533, "learning_rate": 8.596638655462186e-05, "loss": 0.4713, "step": 32745 }, { "epoch": 18.29385474860335, "grad_norm": 0.36488622426986694, "learning_rate": 8.593837535014005e-05, "loss": 0.3527, "step": 32746 }, { "epoch": 18.294413407821228, "grad_norm": 0.719631016254425, "learning_rate": 8.591036414565827e-05, "loss": 0.565, "step": 32747 }, { "epoch": 18.294972067039105, "grad_norm": 1.0860995054244995, "learning_rate": 8.588235294117646e-05, "loss": 0.6013, "step": 32748 }, { "epoch": 18.295530726256985, "grad_norm": 0.3828640282154083, "learning_rate": 8.585434173669468e-05, "loss": 0.4284, "step": 32749 }, { "epoch": 18.29608938547486, "grad_norm": 0.7656653523445129, "learning_rate": 8.582633053221289e-05, "loss": 0.3263, "step": 32750 }, { "epoch": 18.296648044692738, "grad_norm": 0.4117003381252289, "learning_rate": 8.57983193277311e-05, "loss": 0.4296, "step": 32751 }, { "epoch": 18.297206703910614, "grad_norm": 0.5932239294052124, "learning_rate": 8.57703081232493e-05, "loss": 0.4228, "step": 32752 }, { "epoch": 18.29776536312849, "grad_norm": 0.45200273394584656, "learning_rate": 8.57422969187675e-05, "loss": 0.4143, "step": 32753 }, { "epoch": 18.298324022346367, "grad_norm": 0.5346892476081848, "learning_rate": 8.571428571428571e-05, "loss": 0.4719, "step": 32754 }, { "epoch": 18.298882681564244, "grad_norm": 0.7034209966659546, "learning_rate": 8.568627450980393e-05, "loss": 0.4499, "step": 32755 }, { "epoch": 18.299441340782124, "grad_norm": 1.0168575048446655, "learning_rate": 8.565826330532212e-05, "loss": 0.4135, "step": 32756 }, { "epoch": 18.3, "grad_norm": 0.344670832157135, "learning_rate": 8.563025210084034e-05, "loss": 0.4056, "step": 32757 }, { "epoch": 18.300558659217877, "grad_norm": 0.5479024648666382, "learning_rate": 8.560224089635854e-05, "loss": 0.3813, "step": 32758 }, { "epoch": 18.301117318435754, "grad_norm": 0.6125989556312561, "learning_rate": 8.557422969187676e-05, "loss": 0.3271, "step": 32759 }, { "epoch": 18.30167597765363, "grad_norm": 0.4583778977394104, "learning_rate": 8.554621848739495e-05, "loss": 0.2994, "step": 32760 }, { "epoch": 18.302234636871507, "grad_norm": 0.3797827959060669, "learning_rate": 8.551820728291317e-05, "loss": 0.3217, "step": 32761 }, { "epoch": 18.302793296089387, "grad_norm": 0.48128241300582886, "learning_rate": 8.549019607843137e-05, "loss": 0.546, "step": 32762 }, { "epoch": 18.303351955307264, "grad_norm": 0.5493959784507751, "learning_rate": 8.546218487394958e-05, "loss": 0.4899, "step": 32763 }, { "epoch": 18.30391061452514, "grad_norm": 0.5889927744865417, "learning_rate": 8.543417366946779e-05, "loss": 0.5364, "step": 32764 }, { "epoch": 18.304469273743017, "grad_norm": 0.42793142795562744, "learning_rate": 8.540616246498599e-05, "loss": 0.4067, "step": 32765 }, { "epoch": 18.305027932960893, "grad_norm": 0.40835610032081604, "learning_rate": 8.53781512605042e-05, "loss": 0.4564, "step": 32766 }, { "epoch": 18.30558659217877, "grad_norm": 0.3972095847129822, "learning_rate": 8.535014005602242e-05, "loss": 0.2937, "step": 32767 }, { "epoch": 18.306145251396647, "grad_norm": 0.6681272983551025, "learning_rate": 8.532212885154061e-05, "loss": 0.4869, "step": 32768 }, { "epoch": 18.306703910614527, "grad_norm": 0.32472676038742065, "learning_rate": 8.529411764705883e-05, "loss": 0.386, "step": 32769 }, { "epoch": 18.307262569832403, "grad_norm": 0.6974824070930481, "learning_rate": 8.526610644257702e-05, "loss": 0.3816, "step": 32770 }, { "epoch": 18.30782122905028, "grad_norm": 5.003784656524658, "learning_rate": 8.523809523809524e-05, "loss": 0.3498, "step": 32771 }, { "epoch": 18.308379888268156, "grad_norm": 0.5939489006996155, "learning_rate": 8.521008403361345e-05, "loss": 0.5552, "step": 32772 }, { "epoch": 18.308938547486033, "grad_norm": 0.39250120520591736, "learning_rate": 8.518207282913165e-05, "loss": 0.4502, "step": 32773 }, { "epoch": 18.30949720670391, "grad_norm": 0.55890291929245, "learning_rate": 8.515406162464986e-05, "loss": 0.432, "step": 32774 }, { "epoch": 18.310055865921786, "grad_norm": 0.4307514429092407, "learning_rate": 8.512605042016807e-05, "loss": 0.4114, "step": 32775 }, { "epoch": 18.310614525139666, "grad_norm": 0.40841779112815857, "learning_rate": 8.509803921568627e-05, "loss": 0.3903, "step": 32776 }, { "epoch": 18.311173184357543, "grad_norm": 0.576561450958252, "learning_rate": 8.507002801120449e-05, "loss": 0.4155, "step": 32777 }, { "epoch": 18.31173184357542, "grad_norm": 0.6510694026947021, "learning_rate": 8.504201680672268e-05, "loss": 0.3695, "step": 32778 }, { "epoch": 18.312290502793296, "grad_norm": 0.3977453410625458, "learning_rate": 8.50140056022409e-05, "loss": 0.365, "step": 32779 }, { "epoch": 18.312849162011172, "grad_norm": 0.45094066858291626, "learning_rate": 8.49859943977591e-05, "loss": 0.3377, "step": 32780 }, { "epoch": 18.31340782122905, "grad_norm": 0.37779805064201355, "learning_rate": 8.495798319327732e-05, "loss": 0.4165, "step": 32781 }, { "epoch": 18.31396648044693, "grad_norm": 1.4240490198135376, "learning_rate": 8.492997198879552e-05, "loss": 0.3597, "step": 32782 }, { "epoch": 18.314525139664806, "grad_norm": 0.633579432964325, "learning_rate": 8.490196078431373e-05, "loss": 0.3714, "step": 32783 }, { "epoch": 18.315083798882682, "grad_norm": 0.4324328303337097, "learning_rate": 8.487394957983193e-05, "loss": 0.3943, "step": 32784 }, { "epoch": 18.31564245810056, "grad_norm": 0.49630314111709595, "learning_rate": 8.484593837535014e-05, "loss": 0.3625, "step": 32785 }, { "epoch": 18.316201117318435, "grad_norm": 0.3495021462440491, "learning_rate": 8.481792717086835e-05, "loss": 0.391, "step": 32786 }, { "epoch": 18.316759776536312, "grad_norm": 2.732417106628418, "learning_rate": 8.478991596638656e-05, "loss": 0.3685, "step": 32787 }, { "epoch": 18.31731843575419, "grad_norm": 0.3252967894077301, "learning_rate": 8.476190476190476e-05, "loss": 0.3593, "step": 32788 }, { "epoch": 18.31787709497207, "grad_norm": 0.5639015436172485, "learning_rate": 8.473389355742298e-05, "loss": 0.4012, "step": 32789 }, { "epoch": 18.318435754189945, "grad_norm": 0.8142260313034058, "learning_rate": 8.470588235294117e-05, "loss": 0.4648, "step": 32790 }, { "epoch": 18.31899441340782, "grad_norm": 0.3550356924533844, "learning_rate": 8.467787114845939e-05, "loss": 0.3674, "step": 32791 }, { "epoch": 18.3195530726257, "grad_norm": 0.4787386655807495, "learning_rate": 8.464985994397758e-05, "loss": 0.46, "step": 32792 }, { "epoch": 18.320111731843575, "grad_norm": 0.39011022448539734, "learning_rate": 8.46218487394958e-05, "loss": 0.3356, "step": 32793 }, { "epoch": 18.32067039106145, "grad_norm": 0.40004441142082214, "learning_rate": 8.4593837535014e-05, "loss": 0.3849, "step": 32794 }, { "epoch": 18.321229050279328, "grad_norm": 0.36449384689331055, "learning_rate": 8.456582633053221e-05, "loss": 0.4433, "step": 32795 }, { "epoch": 18.321787709497208, "grad_norm": 0.37574502825737, "learning_rate": 8.453781512605042e-05, "loss": 0.4069, "step": 32796 }, { "epoch": 18.322346368715085, "grad_norm": 0.5998339653015137, "learning_rate": 8.450980392156862e-05, "loss": 0.6158, "step": 32797 }, { "epoch": 18.32290502793296, "grad_norm": 0.43560925126075745, "learning_rate": 8.448179271708683e-05, "loss": 0.4036, "step": 32798 }, { "epoch": 18.323463687150838, "grad_norm": 0.4975191652774811, "learning_rate": 8.445378151260505e-05, "loss": 0.4369, "step": 32799 }, { "epoch": 18.324022346368714, "grad_norm": 0.6788953542709351, "learning_rate": 8.442577030812324e-05, "loss": 0.3568, "step": 32800 }, { "epoch": 18.32458100558659, "grad_norm": 0.7546923160552979, "learning_rate": 8.439775910364146e-05, "loss": 0.5594, "step": 32801 }, { "epoch": 18.325139664804468, "grad_norm": 0.4436323046684265, "learning_rate": 8.436974789915965e-05, "loss": 0.4003, "step": 32802 }, { "epoch": 18.325698324022348, "grad_norm": 1.403450846672058, "learning_rate": 8.434173669467787e-05, "loss": 0.4736, "step": 32803 }, { "epoch": 18.326256983240224, "grad_norm": 0.4323928654193878, "learning_rate": 8.431372549019608e-05, "loss": 0.3664, "step": 32804 }, { "epoch": 18.3268156424581, "grad_norm": 0.4957042634487152, "learning_rate": 8.428571428571429e-05, "loss": 0.3452, "step": 32805 }, { "epoch": 18.327374301675977, "grad_norm": 0.5773230791091919, "learning_rate": 8.425770308123249e-05, "loss": 0.5432, "step": 32806 }, { "epoch": 18.327932960893854, "grad_norm": 0.43703946471214294, "learning_rate": 8.42296918767507e-05, "loss": 0.3724, "step": 32807 }, { "epoch": 18.32849162011173, "grad_norm": 0.6803310513496399, "learning_rate": 8.42016806722689e-05, "loss": 0.415, "step": 32808 }, { "epoch": 18.32905027932961, "grad_norm": 0.4226667284965515, "learning_rate": 8.417366946778712e-05, "loss": 0.3944, "step": 32809 }, { "epoch": 18.329608938547487, "grad_norm": 0.352253258228302, "learning_rate": 8.414565826330532e-05, "loss": 0.3608, "step": 32810 }, { "epoch": 18.330167597765364, "grad_norm": 0.5811667442321777, "learning_rate": 8.411764705882354e-05, "loss": 0.3853, "step": 32811 }, { "epoch": 18.33072625698324, "grad_norm": 1.4271761178970337, "learning_rate": 8.408963585434173e-05, "loss": 0.5111, "step": 32812 }, { "epoch": 18.331284916201117, "grad_norm": 0.5063607096672058, "learning_rate": 8.406162464985995e-05, "loss": 0.5105, "step": 32813 }, { "epoch": 18.331843575418993, "grad_norm": 0.4931904375553131, "learning_rate": 8.403361344537815e-05, "loss": 0.3597, "step": 32814 }, { "epoch": 18.33240223463687, "grad_norm": 1.5323851108551025, "learning_rate": 8.400560224089636e-05, "loss": 0.4469, "step": 32815 }, { "epoch": 18.33296089385475, "grad_norm": 0.38962700963020325, "learning_rate": 8.397759103641457e-05, "loss": 0.4647, "step": 32816 }, { "epoch": 18.333519553072627, "grad_norm": 0.6305058002471924, "learning_rate": 8.394957983193277e-05, "loss": 0.5606, "step": 32817 }, { "epoch": 18.334078212290503, "grad_norm": 0.5131636261940002, "learning_rate": 8.392156862745098e-05, "loss": 0.4307, "step": 32818 }, { "epoch": 18.33463687150838, "grad_norm": 0.3672429025173187, "learning_rate": 8.389355742296918e-05, "loss": 0.3651, "step": 32819 }, { "epoch": 18.335195530726256, "grad_norm": 0.4035200774669647, "learning_rate": 8.386554621848739e-05, "loss": 0.417, "step": 32820 }, { "epoch": 18.335754189944133, "grad_norm": 0.479237824678421, "learning_rate": 8.383753501400561e-05, "loss": 0.4214, "step": 32821 }, { "epoch": 18.33631284916201, "grad_norm": 0.3408047556877136, "learning_rate": 8.38095238095238e-05, "loss": 0.3622, "step": 32822 }, { "epoch": 18.33687150837989, "grad_norm": 0.42649906873703003, "learning_rate": 8.378151260504202e-05, "loss": 0.4341, "step": 32823 }, { "epoch": 18.337430167597766, "grad_norm": 3.1019928455352783, "learning_rate": 8.375350140056021e-05, "loss": 0.5003, "step": 32824 }, { "epoch": 18.337988826815643, "grad_norm": 0.49332132935523987, "learning_rate": 8.372549019607843e-05, "loss": 0.3372, "step": 32825 }, { "epoch": 18.33854748603352, "grad_norm": 0.3751504123210907, "learning_rate": 8.369747899159664e-05, "loss": 0.3448, "step": 32826 }, { "epoch": 18.339106145251396, "grad_norm": 0.4341195225715637, "learning_rate": 8.366946778711485e-05, "loss": 0.3913, "step": 32827 }, { "epoch": 18.339664804469272, "grad_norm": 0.4469486474990845, "learning_rate": 8.364145658263305e-05, "loss": 0.4465, "step": 32828 }, { "epoch": 18.340223463687153, "grad_norm": 1.0455799102783203, "learning_rate": 8.361344537815126e-05, "loss": 0.4696, "step": 32829 }, { "epoch": 18.34078212290503, "grad_norm": 0.4647930860519409, "learning_rate": 8.358543417366946e-05, "loss": 0.5153, "step": 32830 }, { "epoch": 18.341340782122906, "grad_norm": 0.39613330364227295, "learning_rate": 8.355742296918768e-05, "loss": 0.4409, "step": 32831 }, { "epoch": 18.341899441340782, "grad_norm": 1.484390377998352, "learning_rate": 8.352941176470588e-05, "loss": 0.3064, "step": 32832 }, { "epoch": 18.34245810055866, "grad_norm": 0.6235575675964355, "learning_rate": 8.35014005602241e-05, "loss": 0.467, "step": 32833 }, { "epoch": 18.343016759776535, "grad_norm": 0.37441450357437134, "learning_rate": 8.347338935574229e-05, "loss": 0.3083, "step": 32834 }, { "epoch": 18.343575418994412, "grad_norm": 0.4194738566875458, "learning_rate": 8.34453781512605e-05, "loss": 0.3944, "step": 32835 }, { "epoch": 18.344134078212292, "grad_norm": 0.3580695688724518, "learning_rate": 8.341736694677873e-05, "loss": 0.349, "step": 32836 }, { "epoch": 18.34469273743017, "grad_norm": 0.8038898706436157, "learning_rate": 8.338935574229692e-05, "loss": 0.4701, "step": 32837 }, { "epoch": 18.345251396648045, "grad_norm": 1.6893147230148315, "learning_rate": 8.336134453781514e-05, "loss": 0.3396, "step": 32838 }, { "epoch": 18.345810055865922, "grad_norm": 0.5352857708930969, "learning_rate": 8.333333333333333e-05, "loss": 0.4297, "step": 32839 }, { "epoch": 18.3463687150838, "grad_norm": 0.35862624645233154, "learning_rate": 8.330532212885155e-05, "loss": 0.4418, "step": 32840 }, { "epoch": 18.346927374301675, "grad_norm": 0.4197530746459961, "learning_rate": 8.327731092436976e-05, "loss": 0.4265, "step": 32841 }, { "epoch": 18.34748603351955, "grad_norm": 0.3959534466266632, "learning_rate": 8.324929971988796e-05, "loss": 0.4129, "step": 32842 }, { "epoch": 18.34804469273743, "grad_norm": 1.516517996788025, "learning_rate": 8.322128851540617e-05, "loss": 0.438, "step": 32843 }, { "epoch": 18.34860335195531, "grad_norm": 0.3945249021053314, "learning_rate": 8.319327731092437e-05, "loss": 0.2983, "step": 32844 }, { "epoch": 18.349162011173185, "grad_norm": 1.3578836917877197, "learning_rate": 8.316526610644258e-05, "loss": 0.3832, "step": 32845 }, { "epoch": 18.34972067039106, "grad_norm": 0.5925694704055786, "learning_rate": 8.313725490196079e-05, "loss": 0.4247, "step": 32846 }, { "epoch": 18.350279329608938, "grad_norm": 0.40185171365737915, "learning_rate": 8.310924369747899e-05, "loss": 0.3487, "step": 32847 }, { "epoch": 18.350837988826814, "grad_norm": 0.34567567706108093, "learning_rate": 8.308123249299721e-05, "loss": 0.4123, "step": 32848 }, { "epoch": 18.35139664804469, "grad_norm": 0.31427231431007385, "learning_rate": 8.30532212885154e-05, "loss": 0.3928, "step": 32849 }, { "epoch": 18.35195530726257, "grad_norm": 0.659511148929596, "learning_rate": 8.302521008403362e-05, "loss": 0.4432, "step": 32850 }, { "epoch": 18.352513966480448, "grad_norm": 6.609692573547363, "learning_rate": 8.299719887955182e-05, "loss": 0.371, "step": 32851 }, { "epoch": 18.353072625698324, "grad_norm": 0.6741576194763184, "learning_rate": 8.296918767507004e-05, "loss": 0.354, "step": 32852 }, { "epoch": 18.3536312849162, "grad_norm": 0.5141425132751465, "learning_rate": 8.294117647058824e-05, "loss": 0.3765, "step": 32853 }, { "epoch": 18.354189944134077, "grad_norm": 0.457843154668808, "learning_rate": 8.291316526610645e-05, "loss": 0.3968, "step": 32854 }, { "epoch": 18.354748603351954, "grad_norm": 1.0220999717712402, "learning_rate": 8.288515406162465e-05, "loss": 0.4273, "step": 32855 }, { "epoch": 18.355307262569834, "grad_norm": 1.3391636610031128, "learning_rate": 8.285714285714286e-05, "loss": 0.3777, "step": 32856 }, { "epoch": 18.35586592178771, "grad_norm": 0.43547654151916504, "learning_rate": 8.282913165266107e-05, "loss": 0.4361, "step": 32857 }, { "epoch": 18.356424581005587, "grad_norm": 0.35032373666763306, "learning_rate": 8.280112044817929e-05, "loss": 0.3932, "step": 32858 }, { "epoch": 18.356983240223464, "grad_norm": 1.0634431838989258, "learning_rate": 8.277310924369748e-05, "loss": 0.4109, "step": 32859 }, { "epoch": 18.35754189944134, "grad_norm": 0.7619585990905762, "learning_rate": 8.27450980392157e-05, "loss": 0.3677, "step": 32860 }, { "epoch": 18.358100558659217, "grad_norm": 0.47028061747550964, "learning_rate": 8.271708683473389e-05, "loss": 0.4321, "step": 32861 }, { "epoch": 18.358659217877094, "grad_norm": 0.36756548285484314, "learning_rate": 8.268907563025211e-05, "loss": 0.3337, "step": 32862 }, { "epoch": 18.359217877094974, "grad_norm": 0.5534183979034424, "learning_rate": 8.266106442577032e-05, "loss": 0.4591, "step": 32863 }, { "epoch": 18.35977653631285, "grad_norm": 0.39359161257743835, "learning_rate": 8.263305322128852e-05, "loss": 0.3896, "step": 32864 }, { "epoch": 18.360335195530727, "grad_norm": 0.35327041149139404, "learning_rate": 8.260504201680673e-05, "loss": 0.3101, "step": 32865 }, { "epoch": 18.360893854748603, "grad_norm": 0.4520114064216614, "learning_rate": 8.257703081232493e-05, "loss": 0.4383, "step": 32866 }, { "epoch": 18.36145251396648, "grad_norm": 3.7016406059265137, "learning_rate": 8.254901960784314e-05, "loss": 0.3834, "step": 32867 }, { "epoch": 18.362011173184356, "grad_norm": 0.36280062794685364, "learning_rate": 8.252100840336136e-05, "loss": 0.3951, "step": 32868 }, { "epoch": 18.362569832402233, "grad_norm": 0.36964765191078186, "learning_rate": 8.249299719887955e-05, "loss": 0.2614, "step": 32869 }, { "epoch": 18.363128491620113, "grad_norm": 0.38076820969581604, "learning_rate": 8.246498599439777e-05, "loss": 0.2917, "step": 32870 }, { "epoch": 18.36368715083799, "grad_norm": 0.46522969007492065, "learning_rate": 8.243697478991596e-05, "loss": 0.4573, "step": 32871 }, { "epoch": 18.364245810055866, "grad_norm": Infinity, "learning_rate": 8.243697478991596e-05, "loss": 0.3979, "step": 32872 }, { "epoch": 18.364804469273743, "grad_norm": 0.45378348231315613, "learning_rate": 8.240896358543418e-05, "loss": 0.3797, "step": 32873 }, { "epoch": 18.36536312849162, "grad_norm": 0.39367756247520447, "learning_rate": 8.238095238095238e-05, "loss": 0.3964, "step": 32874 }, { "epoch": 18.365921787709496, "grad_norm": 0.3585837781429291, "learning_rate": 8.23529411764706e-05, "loss": 0.373, "step": 32875 }, { "epoch": 18.366480446927373, "grad_norm": 0.454330176115036, "learning_rate": 8.23249299719888e-05, "loss": 0.3201, "step": 32876 }, { "epoch": 18.367039106145253, "grad_norm": 0.5186188220977783, "learning_rate": 8.2296918767507e-05, "loss": 0.3346, "step": 32877 }, { "epoch": 18.36759776536313, "grad_norm": 0.36090168356895447, "learning_rate": 8.226890756302521e-05, "loss": 0.3337, "step": 32878 }, { "epoch": 18.368156424581006, "grad_norm": 0.9182324409484863, "learning_rate": 8.224089635854342e-05, "loss": 0.2814, "step": 32879 }, { "epoch": 18.368715083798882, "grad_norm": 0.3522058427333832, "learning_rate": 8.221288515406162e-05, "loss": 0.3593, "step": 32880 }, { "epoch": 18.36927374301676, "grad_norm": 0.373034805059433, "learning_rate": 8.218487394957984e-05, "loss": 0.4735, "step": 32881 }, { "epoch": 18.369832402234636, "grad_norm": 19.711328506469727, "learning_rate": 8.215686274509804e-05, "loss": 0.3152, "step": 32882 }, { "epoch": 18.370391061452516, "grad_norm": 0.3268198072910309, "learning_rate": 8.212885154061626e-05, "loss": 0.3646, "step": 32883 }, { "epoch": 18.370949720670392, "grad_norm": 0.5742087960243225, "learning_rate": 8.210084033613445e-05, "loss": 0.5912, "step": 32884 }, { "epoch": 18.37150837988827, "grad_norm": 0.4913671910762787, "learning_rate": 8.207282913165267e-05, "loss": 0.6029, "step": 32885 }, { "epoch": 18.372067039106145, "grad_norm": 1.0694589614868164, "learning_rate": 8.204481792717087e-05, "loss": 0.395, "step": 32886 }, { "epoch": 18.372625698324022, "grad_norm": 0.4125858247280121, "learning_rate": 8.201680672268908e-05, "loss": 0.3673, "step": 32887 }, { "epoch": 18.3731843575419, "grad_norm": 0.4719606637954712, "learning_rate": 8.198879551820729e-05, "loss": 0.3139, "step": 32888 }, { "epoch": 18.373743016759775, "grad_norm": 0.37062758207321167, "learning_rate": 8.196078431372549e-05, "loss": 0.3392, "step": 32889 }, { "epoch": 18.374301675977655, "grad_norm": 0.4029068052768707, "learning_rate": 8.19327731092437e-05, "loss": 0.3678, "step": 32890 }, { "epoch": 18.37486033519553, "grad_norm": 0.39382800459861755, "learning_rate": 8.190476190476192e-05, "loss": 0.3985, "step": 32891 }, { "epoch": 18.37541899441341, "grad_norm": 0.7422434091567993, "learning_rate": 8.187675070028011e-05, "loss": 0.3468, "step": 32892 }, { "epoch": 18.375977653631285, "grad_norm": 0.33596086502075195, "learning_rate": 8.184873949579833e-05, "loss": 0.3888, "step": 32893 }, { "epoch": 18.37653631284916, "grad_norm": 0.5770246386528015, "learning_rate": 8.182072829131652e-05, "loss": 0.3454, "step": 32894 }, { "epoch": 18.377094972067038, "grad_norm": 0.5962408781051636, "learning_rate": 8.179271708683474e-05, "loss": 0.3737, "step": 32895 }, { "epoch": 18.377653631284915, "grad_norm": 1.5015170574188232, "learning_rate": 8.176470588235295e-05, "loss": 0.4487, "step": 32896 }, { "epoch": 18.378212290502795, "grad_norm": 0.5289201736450195, "learning_rate": 8.173669467787115e-05, "loss": 0.4285, "step": 32897 }, { "epoch": 18.37877094972067, "grad_norm": 0.40539059042930603, "learning_rate": 8.170868347338936e-05, "loss": 0.4056, "step": 32898 }, { "epoch": 18.379329608938548, "grad_norm": 0.44266578555107117, "learning_rate": 8.168067226890757e-05, "loss": 0.3546, "step": 32899 }, { "epoch": 18.379888268156424, "grad_norm": 0.31555184721946716, "learning_rate": 8.165266106442577e-05, "loss": 0.3419, "step": 32900 }, { "epoch": 18.3804469273743, "grad_norm": 0.5579817891120911, "learning_rate": 8.162464985994398e-05, "loss": 0.5055, "step": 32901 }, { "epoch": 18.381005586592178, "grad_norm": 0.4791562855243683, "learning_rate": 8.159663865546218e-05, "loss": 0.4083, "step": 32902 }, { "epoch": 18.381564245810054, "grad_norm": 0.4405474364757538, "learning_rate": 8.15686274509804e-05, "loss": 0.3747, "step": 32903 }, { "epoch": 18.382122905027934, "grad_norm": 2.780805826187134, "learning_rate": 8.15406162464986e-05, "loss": 0.3304, "step": 32904 }, { "epoch": 18.38268156424581, "grad_norm": 1.5652447938919067, "learning_rate": 8.151260504201682e-05, "loss": 0.3571, "step": 32905 }, { "epoch": 18.383240223463687, "grad_norm": 1.2004978656768799, "learning_rate": 8.148459383753501e-05, "loss": 0.3378, "step": 32906 }, { "epoch": 18.383798882681564, "grad_norm": 0.47375693917274475, "learning_rate": 8.145658263305323e-05, "loss": 0.394, "step": 32907 }, { "epoch": 18.38435754189944, "grad_norm": 0.6289718151092529, "learning_rate": 8.142857142857143e-05, "loss": 0.5781, "step": 32908 }, { "epoch": 18.384916201117317, "grad_norm": 0.6018370389938354, "learning_rate": 8.140056022408964e-05, "loss": 0.4072, "step": 32909 }, { "epoch": 18.385474860335197, "grad_norm": 0.48269882798194885, "learning_rate": 8.137254901960785e-05, "loss": 0.412, "step": 32910 }, { "epoch": 18.386033519553074, "grad_norm": 0.39507797360420227, "learning_rate": 8.134453781512605e-05, "loss": 0.338, "step": 32911 }, { "epoch": 18.38659217877095, "grad_norm": 0.4784800112247467, "learning_rate": 8.131652661064426e-05, "loss": 0.4702, "step": 32912 }, { "epoch": 18.387150837988827, "grad_norm": 0.3584432601928711, "learning_rate": 8.128851540616248e-05, "loss": 0.3456, "step": 32913 }, { "epoch": 18.387709497206703, "grad_norm": 0.5144425630569458, "learning_rate": 8.126050420168067e-05, "loss": 0.487, "step": 32914 }, { "epoch": 18.38826815642458, "grad_norm": 0.35907530784606934, "learning_rate": 8.123249299719889e-05, "loss": 0.3876, "step": 32915 }, { "epoch": 18.388826815642457, "grad_norm": 0.38987234234809875, "learning_rate": 8.120448179271708e-05, "loss": 0.3807, "step": 32916 }, { "epoch": 18.389385474860337, "grad_norm": 0.44976285099983215, "learning_rate": 8.11764705882353e-05, "loss": 0.2841, "step": 32917 }, { "epoch": 18.389944134078213, "grad_norm": 1.293975830078125, "learning_rate": 8.11484593837535e-05, "loss": 0.4223, "step": 32918 }, { "epoch": 18.39050279329609, "grad_norm": 0.40058234333992004, "learning_rate": 8.112044817927171e-05, "loss": 0.349, "step": 32919 }, { "epoch": 18.391061452513966, "grad_norm": 0.4394546449184418, "learning_rate": 8.109243697478992e-05, "loss": 0.3986, "step": 32920 }, { "epoch": 18.391620111731843, "grad_norm": 0.4801974594593048, "learning_rate": 8.106442577030812e-05, "loss": 0.4179, "step": 32921 }, { "epoch": 18.39217877094972, "grad_norm": 0.41869810223579407, "learning_rate": 8.103641456582633e-05, "loss": 0.4264, "step": 32922 }, { "epoch": 18.392737430167596, "grad_norm": 0.5575540661811829, "learning_rate": 8.100840336134455e-05, "loss": 0.3996, "step": 32923 }, { "epoch": 18.393296089385476, "grad_norm": 0.37092867493629456, "learning_rate": 8.098039215686274e-05, "loss": 0.3878, "step": 32924 }, { "epoch": 18.393854748603353, "grad_norm": 1.365338683128357, "learning_rate": 8.095238095238096e-05, "loss": 0.5178, "step": 32925 }, { "epoch": 18.39441340782123, "grad_norm": 0.49847087264060974, "learning_rate": 8.092436974789915e-05, "loss": 0.3857, "step": 32926 }, { "epoch": 18.394972067039106, "grad_norm": 0.36416247487068176, "learning_rate": 8.089635854341737e-05, "loss": 0.4178, "step": 32927 }, { "epoch": 18.395530726256982, "grad_norm": 3.3862669467926025, "learning_rate": 8.086834733893558e-05, "loss": 0.3456, "step": 32928 }, { "epoch": 18.39608938547486, "grad_norm": 0.3563072383403778, "learning_rate": 8.084033613445379e-05, "loss": 0.3933, "step": 32929 }, { "epoch": 18.39664804469274, "grad_norm": 0.8796271681785583, "learning_rate": 8.081232492997199e-05, "loss": 0.4575, "step": 32930 }, { "epoch": 18.397206703910616, "grad_norm": 0.5124216675758362, "learning_rate": 8.07843137254902e-05, "loss": 0.4773, "step": 32931 }, { "epoch": 18.397765363128492, "grad_norm": 0.3278964161872864, "learning_rate": 8.07563025210084e-05, "loss": 0.2702, "step": 32932 }, { "epoch": 18.39832402234637, "grad_norm": 0.41057077050209045, "learning_rate": 8.072829131652661e-05, "loss": 0.4701, "step": 32933 }, { "epoch": 18.398882681564245, "grad_norm": 0.5461214780807495, "learning_rate": 8.070028011204482e-05, "loss": 0.4735, "step": 32934 }, { "epoch": 18.399441340782122, "grad_norm": 0.4644501209259033, "learning_rate": 8.067226890756304e-05, "loss": 0.3917, "step": 32935 }, { "epoch": 18.4, "grad_norm": 0.671927809715271, "learning_rate": 8.064425770308123e-05, "loss": 0.3263, "step": 32936 }, { "epoch": 18.40055865921788, "grad_norm": 0.7671076655387878, "learning_rate": 8.061624649859945e-05, "loss": 0.4463, "step": 32937 }, { "epoch": 18.401117318435755, "grad_norm": 0.3986946642398834, "learning_rate": 8.058823529411764e-05, "loss": 0.3639, "step": 32938 }, { "epoch": 18.401675977653632, "grad_norm": 0.38581475615501404, "learning_rate": 8.056022408963586e-05, "loss": 0.3927, "step": 32939 }, { "epoch": 18.40223463687151, "grad_norm": 1.1379859447479248, "learning_rate": 8.053221288515407e-05, "loss": 0.417, "step": 32940 }, { "epoch": 18.402793296089385, "grad_norm": 1.1928983926773071, "learning_rate": 8.050420168067227e-05, "loss": 0.4742, "step": 32941 }, { "epoch": 18.40335195530726, "grad_norm": 0.6107305884361267, "learning_rate": 8.047619047619048e-05, "loss": 0.4547, "step": 32942 }, { "epoch": 18.403910614525138, "grad_norm": 0.6340111494064331, "learning_rate": 8.044817927170868e-05, "loss": 0.4346, "step": 32943 }, { "epoch": 18.404469273743018, "grad_norm": 0.8544795513153076, "learning_rate": 8.042016806722689e-05, "loss": 0.4457, "step": 32944 }, { "epoch": 18.405027932960895, "grad_norm": 0.5879116654396057, "learning_rate": 8.039215686274511e-05, "loss": 0.3773, "step": 32945 }, { "epoch": 18.40558659217877, "grad_norm": 0.30842816829681396, "learning_rate": 8.03641456582633e-05, "loss": 0.3092, "step": 32946 }, { "epoch": 18.406145251396648, "grad_norm": 0.4883110225200653, "learning_rate": 8.033613445378152e-05, "loss": 0.4249, "step": 32947 }, { "epoch": 18.406703910614524, "grad_norm": 0.39143693447113037, "learning_rate": 8.030812324929971e-05, "loss": 0.3966, "step": 32948 }, { "epoch": 18.4072625698324, "grad_norm": 0.7340346574783325, "learning_rate": 8.028011204481793e-05, "loss": 0.4919, "step": 32949 }, { "epoch": 18.407821229050278, "grad_norm": 0.5486397743225098, "learning_rate": 8.025210084033614e-05, "loss": 0.4141, "step": 32950 }, { "epoch": 18.408379888268158, "grad_norm": 0.6926745772361755, "learning_rate": 8.022408963585435e-05, "loss": 0.3798, "step": 32951 }, { "epoch": 18.408938547486034, "grad_norm": 0.4745299816131592, "learning_rate": 8.019607843137255e-05, "loss": 0.4238, "step": 32952 }, { "epoch": 18.40949720670391, "grad_norm": 0.6261916160583496, "learning_rate": 8.016806722689076e-05, "loss": 0.3885, "step": 32953 }, { "epoch": 18.410055865921787, "grad_norm": 0.42988282442092896, "learning_rate": 8.014005602240896e-05, "loss": 0.4396, "step": 32954 }, { "epoch": 18.410614525139664, "grad_norm": 0.3979518711566925, "learning_rate": 8.011204481792718e-05, "loss": 0.3088, "step": 32955 }, { "epoch": 18.41117318435754, "grad_norm": 6.194914817810059, "learning_rate": 8.008403361344538e-05, "loss": 0.5407, "step": 32956 }, { "epoch": 18.41173184357542, "grad_norm": 0.4199903607368469, "learning_rate": 8.00560224089636e-05, "loss": 0.3436, "step": 32957 }, { "epoch": 18.412290502793297, "grad_norm": 1.6032865047454834, "learning_rate": 8.002801120448179e-05, "loss": 0.4505, "step": 32958 }, { "epoch": 18.412849162011174, "grad_norm": 0.33165737986564636, "learning_rate": 8e-05, "loss": 0.3495, "step": 32959 }, { "epoch": 18.41340782122905, "grad_norm": 0.3565899729728699, "learning_rate": 7.99719887955182e-05, "loss": 0.3981, "step": 32960 }, { "epoch": 18.413966480446927, "grad_norm": 0.47874388098716736, "learning_rate": 7.994397759103642e-05, "loss": 0.4094, "step": 32961 }, { "epoch": 18.414525139664804, "grad_norm": 0.4249212145805359, "learning_rate": 7.991596638655462e-05, "loss": 0.3633, "step": 32962 }, { "epoch": 18.41508379888268, "grad_norm": 0.4846232831478119, "learning_rate": 7.988795518207283e-05, "loss": 0.4138, "step": 32963 }, { "epoch": 18.41564245810056, "grad_norm": 0.5512405037879944, "learning_rate": 7.985994397759104e-05, "loss": 0.4146, "step": 32964 }, { "epoch": 18.416201117318437, "grad_norm": 0.370501309633255, "learning_rate": 7.983193277310924e-05, "loss": 0.3038, "step": 32965 }, { "epoch": 18.416759776536313, "grad_norm": 0.5199580788612366, "learning_rate": 7.980392156862745e-05, "loss": 0.3512, "step": 32966 }, { "epoch": 18.41731843575419, "grad_norm": 1.395495057106018, "learning_rate": 7.977591036414567e-05, "loss": 0.4137, "step": 32967 }, { "epoch": 18.417877094972066, "grad_norm": 0.7687472105026245, "learning_rate": 7.974789915966386e-05, "loss": 0.4476, "step": 32968 }, { "epoch": 18.418435754189943, "grad_norm": 0.5083678364753723, "learning_rate": 7.971988795518208e-05, "loss": 0.3393, "step": 32969 }, { "epoch": 18.41899441340782, "grad_norm": 0.3709186613559723, "learning_rate": 7.969187675070027e-05, "loss": 0.4297, "step": 32970 }, { "epoch": 18.4195530726257, "grad_norm": 0.702735185623169, "learning_rate": 7.966386554621849e-05, "loss": 0.3291, "step": 32971 }, { "epoch": 18.420111731843576, "grad_norm": 0.6108242869377136, "learning_rate": 7.96358543417367e-05, "loss": 0.582, "step": 32972 }, { "epoch": 18.420670391061453, "grad_norm": 0.5116142630577087, "learning_rate": 7.96078431372549e-05, "loss": 0.4132, "step": 32973 }, { "epoch": 18.42122905027933, "grad_norm": 0.428847998380661, "learning_rate": 7.957983193277311e-05, "loss": 0.3916, "step": 32974 }, { "epoch": 18.421787709497206, "grad_norm": 0.6311320662498474, "learning_rate": 7.955182072829132e-05, "loss": 0.4354, "step": 32975 }, { "epoch": 18.422346368715083, "grad_norm": 0.34464508295059204, "learning_rate": 7.952380952380952e-05, "loss": 0.459, "step": 32976 }, { "epoch": 18.422905027932963, "grad_norm": 0.35673147439956665, "learning_rate": 7.949579831932774e-05, "loss": 0.3044, "step": 32977 }, { "epoch": 18.42346368715084, "grad_norm": 0.6588972210884094, "learning_rate": 7.946778711484593e-05, "loss": 0.4345, "step": 32978 }, { "epoch": 18.424022346368716, "grad_norm": 1.5466976165771484, "learning_rate": 7.943977591036415e-05, "loss": 0.4463, "step": 32979 }, { "epoch": 18.424581005586592, "grad_norm": 0.4252294600009918, "learning_rate": 7.941176470588235e-05, "loss": 0.4046, "step": 32980 }, { "epoch": 18.42513966480447, "grad_norm": 0.6188439130783081, "learning_rate": 7.938375350140057e-05, "loss": 0.6927, "step": 32981 }, { "epoch": 18.425698324022346, "grad_norm": 0.43326622247695923, "learning_rate": 7.935574229691877e-05, "loss": 0.3682, "step": 32982 }, { "epoch": 18.426256983240222, "grad_norm": 0.5017699599266052, "learning_rate": 7.932773109243698e-05, "loss": 0.3753, "step": 32983 }, { "epoch": 18.426815642458102, "grad_norm": 1.5855077505111694, "learning_rate": 7.929971988795518e-05, "loss": 0.3638, "step": 32984 }, { "epoch": 18.42737430167598, "grad_norm": 0.6004158854484558, "learning_rate": 7.927170868347339e-05, "loss": 0.2602, "step": 32985 }, { "epoch": 18.427932960893855, "grad_norm": 1.3407344818115234, "learning_rate": 7.92436974789916e-05, "loss": 0.3408, "step": 32986 }, { "epoch": 18.428491620111732, "grad_norm": 1.0956668853759766, "learning_rate": 7.92156862745098e-05, "loss": 0.5017, "step": 32987 }, { "epoch": 18.42905027932961, "grad_norm": 3.4091012477874756, "learning_rate": 7.918767507002801e-05, "loss": 0.5011, "step": 32988 }, { "epoch": 18.429608938547485, "grad_norm": 0.6315897703170776, "learning_rate": 7.915966386554623e-05, "loss": 0.4583, "step": 32989 }, { "epoch": 18.43016759776536, "grad_norm": 0.6949681043624878, "learning_rate": 7.913165266106442e-05, "loss": 0.3621, "step": 32990 }, { "epoch": 18.43072625698324, "grad_norm": 3.8538801670074463, "learning_rate": 7.910364145658264e-05, "loss": 0.4234, "step": 32991 }, { "epoch": 18.43128491620112, "grad_norm": 1.2446885108947754, "learning_rate": 7.907563025210083e-05, "loss": 0.4556, "step": 32992 }, { "epoch": 18.431843575418995, "grad_norm": 0.33871883153915405, "learning_rate": 7.904761904761905e-05, "loss": 0.3682, "step": 32993 }, { "epoch": 18.43240223463687, "grad_norm": 0.8030214309692383, "learning_rate": 7.901960784313726e-05, "loss": 0.3618, "step": 32994 }, { "epoch": 18.432960893854748, "grad_norm": 0.34368887543678284, "learning_rate": 7.899159663865546e-05, "loss": 0.3262, "step": 32995 }, { "epoch": 18.433519553072625, "grad_norm": 1.7237881422042847, "learning_rate": 7.896358543417367e-05, "loss": 0.7043, "step": 32996 }, { "epoch": 18.4340782122905, "grad_norm": 1.5003234148025513, "learning_rate": 7.893557422969187e-05, "loss": 0.33, "step": 32997 }, { "epoch": 18.43463687150838, "grad_norm": 1.2776204347610474, "learning_rate": 7.890756302521008e-05, "loss": 0.4121, "step": 32998 }, { "epoch": 18.435195530726258, "grad_norm": 0.5692400932312012, "learning_rate": 7.88795518207283e-05, "loss": 0.4963, "step": 32999 }, { "epoch": 18.435754189944134, "grad_norm": 0.47316592931747437, "learning_rate": 7.885154061624649e-05, "loss": 0.6218, "step": 33000 }, { "epoch": 18.435754189944134, "eval_cer": 0.08477627573565513, "eval_loss": 0.3203820586204529, "eval_runtime": 55.6542, "eval_samples_per_second": 81.539, "eval_steps_per_second": 5.103, "eval_wer": 0.3347113209128237, "step": 33000 }, { "epoch": 18.43631284916201, "grad_norm": 0.44589757919311523, "learning_rate": 7.882352941176471e-05, "loss": 0.3691, "step": 33001 }, { "epoch": 18.436871508379888, "grad_norm": 0.6666038632392883, "learning_rate": 7.87955182072829e-05, "loss": 0.4973, "step": 33002 }, { "epoch": 18.437430167597764, "grad_norm": 0.38396579027175903, "learning_rate": 7.876750700280112e-05, "loss": 0.3244, "step": 33003 }, { "epoch": 18.43798882681564, "grad_norm": 0.39291146397590637, "learning_rate": 7.873949579831933e-05, "loss": 0.3583, "step": 33004 }, { "epoch": 18.43854748603352, "grad_norm": 0.40219053626060486, "learning_rate": 7.871148459383754e-05, "loss": 0.353, "step": 33005 }, { "epoch": 18.439106145251397, "grad_norm": 0.5306525230407715, "learning_rate": 7.868347338935574e-05, "loss": 0.5015, "step": 33006 }, { "epoch": 18.439664804469274, "grad_norm": 0.5150411128997803, "learning_rate": 7.865546218487395e-05, "loss": 0.5127, "step": 33007 }, { "epoch": 18.44022346368715, "grad_norm": 0.3518139719963074, "learning_rate": 7.862745098039215e-05, "loss": 0.4225, "step": 33008 }, { "epoch": 18.440782122905027, "grad_norm": 0.49749556183815, "learning_rate": 7.859943977591037e-05, "loss": 0.3997, "step": 33009 }, { "epoch": 18.441340782122904, "grad_norm": 1.749674677848816, "learning_rate": 7.857142857142857e-05, "loss": 0.3951, "step": 33010 }, { "epoch": 18.441899441340784, "grad_norm": 0.5609121918678284, "learning_rate": 7.854341736694679e-05, "loss": 0.4742, "step": 33011 }, { "epoch": 18.44245810055866, "grad_norm": 0.37346526980400085, "learning_rate": 7.851540616246498e-05, "loss": 0.3268, "step": 33012 }, { "epoch": 18.443016759776537, "grad_norm": 0.6323011517524719, "learning_rate": 7.84873949579832e-05, "loss": 0.5413, "step": 33013 }, { "epoch": 18.443575418994413, "grad_norm": 0.7415624856948853, "learning_rate": 7.845938375350139e-05, "loss": 0.5697, "step": 33014 }, { "epoch": 18.44413407821229, "grad_norm": 0.4007977247238159, "learning_rate": 7.843137254901961e-05, "loss": 0.4791, "step": 33015 }, { "epoch": 18.444692737430167, "grad_norm": 0.5531507134437561, "learning_rate": 7.840336134453782e-05, "loss": 0.4185, "step": 33016 }, { "epoch": 18.445251396648043, "grad_norm": 0.4146111011505127, "learning_rate": 7.837535014005602e-05, "loss": 0.3786, "step": 33017 }, { "epoch": 18.445810055865923, "grad_norm": 2.5037317276000977, "learning_rate": 7.834733893557423e-05, "loss": 0.4907, "step": 33018 }, { "epoch": 18.4463687150838, "grad_norm": 0.3815475106239319, "learning_rate": 7.831932773109243e-05, "loss": 0.4057, "step": 33019 }, { "epoch": 18.446927374301676, "grad_norm": 0.34028899669647217, "learning_rate": 7.829131652661064e-05, "loss": 0.3928, "step": 33020 }, { "epoch": 18.447486033519553, "grad_norm": 0.3826596736907959, "learning_rate": 7.826330532212886e-05, "loss": 0.3682, "step": 33021 }, { "epoch": 18.44804469273743, "grad_norm": 0.44373390078544617, "learning_rate": 7.823529411764705e-05, "loss": 0.544, "step": 33022 }, { "epoch": 18.448603351955306, "grad_norm": 0.3561420440673828, "learning_rate": 7.820728291316527e-05, "loss": 0.3207, "step": 33023 }, { "epoch": 18.449162011173183, "grad_norm": 0.5077047944068909, "learning_rate": 7.817927170868346e-05, "loss": 0.399, "step": 33024 }, { "epoch": 18.449720670391063, "grad_norm": 0.49721693992614746, "learning_rate": 7.815126050420168e-05, "loss": 0.4016, "step": 33025 }, { "epoch": 18.45027932960894, "grad_norm": 1.33882474899292, "learning_rate": 7.812324929971989e-05, "loss": 0.3393, "step": 33026 }, { "epoch": 18.450837988826816, "grad_norm": 0.4576573073863983, "learning_rate": 7.80952380952381e-05, "loss": 0.4236, "step": 33027 }, { "epoch": 18.451396648044692, "grad_norm": 1.5567392110824585, "learning_rate": 7.80672268907563e-05, "loss": 0.4699, "step": 33028 }, { "epoch": 18.45195530726257, "grad_norm": 0.5081969499588013, "learning_rate": 7.803921568627451e-05, "loss": 0.3925, "step": 33029 }, { "epoch": 18.452513966480446, "grad_norm": 0.47761791944503784, "learning_rate": 7.801120448179271e-05, "loss": 0.3574, "step": 33030 }, { "epoch": 18.453072625698326, "grad_norm": 0.6030970215797424, "learning_rate": 7.798319327731093e-05, "loss": 0.3416, "step": 33031 }, { "epoch": 18.453631284916202, "grad_norm": 0.44586479663848877, "learning_rate": 7.795518207282913e-05, "loss": 0.3261, "step": 33032 }, { "epoch": 18.45418994413408, "grad_norm": 0.4216799736022949, "learning_rate": 7.792717086834735e-05, "loss": 0.4234, "step": 33033 }, { "epoch": 18.454748603351955, "grad_norm": 0.30692920088768005, "learning_rate": 7.789915966386554e-05, "loss": 0.3775, "step": 33034 }, { "epoch": 18.455307262569832, "grad_norm": 0.45264431834220886, "learning_rate": 7.787114845938376e-05, "loss": 0.4657, "step": 33035 }, { "epoch": 18.45586592178771, "grad_norm": 0.4765605330467224, "learning_rate": 7.784313725490196e-05, "loss": 0.5099, "step": 33036 }, { "epoch": 18.456424581005585, "grad_norm": 0.540405809879303, "learning_rate": 7.781512605042017e-05, "loss": 0.4369, "step": 33037 }, { "epoch": 18.456983240223465, "grad_norm": 0.5709408521652222, "learning_rate": 7.778711484593837e-05, "loss": 0.4126, "step": 33038 }, { "epoch": 18.457541899441342, "grad_norm": 0.48756682872772217, "learning_rate": 7.775910364145658e-05, "loss": 0.4706, "step": 33039 }, { "epoch": 18.45810055865922, "grad_norm": 0.37175390124320984, "learning_rate": 7.773109243697479e-05, "loss": 0.3476, "step": 33040 }, { "epoch": 18.458659217877095, "grad_norm": 0.7057384848594666, "learning_rate": 7.770308123249299e-05, "loss": 0.4767, "step": 33041 }, { "epoch": 18.45921787709497, "grad_norm": 1.2787559032440186, "learning_rate": 7.76750700280112e-05, "loss": 0.5325, "step": 33042 }, { "epoch": 18.459776536312848, "grad_norm": 0.47130322456359863, "learning_rate": 7.764705882352942e-05, "loss": 0.3885, "step": 33043 }, { "epoch": 18.460335195530725, "grad_norm": 0.4600951671600342, "learning_rate": 7.761904761904761e-05, "loss": 0.3801, "step": 33044 }, { "epoch": 18.460893854748605, "grad_norm": 0.5492141842842102, "learning_rate": 7.759103641456583e-05, "loss": 0.4805, "step": 33045 }, { "epoch": 18.46145251396648, "grad_norm": 0.8448355197906494, "learning_rate": 7.756302521008402e-05, "loss": 0.3472, "step": 33046 }, { "epoch": 18.462011173184358, "grad_norm": 0.32858195900917053, "learning_rate": 7.753501400560224e-05, "loss": 0.3936, "step": 33047 }, { "epoch": 18.462569832402234, "grad_norm": 1.110443353652954, "learning_rate": 7.750700280112045e-05, "loss": 0.4981, "step": 33048 }, { "epoch": 18.46312849162011, "grad_norm": 2.133528709411621, "learning_rate": 7.747899159663865e-05, "loss": 0.4248, "step": 33049 }, { "epoch": 18.463687150837988, "grad_norm": 0.81072998046875, "learning_rate": 7.745098039215686e-05, "loss": 0.4564, "step": 33050 }, { "epoch": 18.464245810055864, "grad_norm": 1.1348986625671387, "learning_rate": 7.742296918767507e-05, "loss": 0.3962, "step": 33051 }, { "epoch": 18.464804469273744, "grad_norm": 0.8548154830932617, "learning_rate": 7.739495798319327e-05, "loss": 0.3693, "step": 33052 }, { "epoch": 18.46536312849162, "grad_norm": 0.3434957265853882, "learning_rate": 7.736694677871149e-05, "loss": 0.3647, "step": 33053 }, { "epoch": 18.465921787709497, "grad_norm": 0.3319074809551239, "learning_rate": 7.733893557422968e-05, "loss": 0.3884, "step": 33054 }, { "epoch": 18.466480446927374, "grad_norm": 0.5831570625305176, "learning_rate": 7.73109243697479e-05, "loss": 0.5707, "step": 33055 }, { "epoch": 18.46703910614525, "grad_norm": 0.5149168372154236, "learning_rate": 7.72829131652661e-05, "loss": 0.3985, "step": 33056 }, { "epoch": 18.467597765363127, "grad_norm": 0.5399753451347351, "learning_rate": 7.725490196078432e-05, "loss": 0.3828, "step": 33057 }, { "epoch": 18.468156424581007, "grad_norm": 0.42691951990127563, "learning_rate": 7.722689075630252e-05, "loss": 0.3773, "step": 33058 }, { "epoch": 18.468715083798884, "grad_norm": 1.6134134531021118, "learning_rate": 7.719887955182073e-05, "loss": 0.3723, "step": 33059 }, { "epoch": 18.46927374301676, "grad_norm": 0.5129427313804626, "learning_rate": 7.717086834733893e-05, "loss": 0.4191, "step": 33060 }, { "epoch": 18.469832402234637, "grad_norm": 0.4188036620616913, "learning_rate": 7.714285714285714e-05, "loss": 0.3696, "step": 33061 }, { "epoch": 18.470391061452514, "grad_norm": 0.3631969392299652, "learning_rate": 7.711484593837535e-05, "loss": 0.3723, "step": 33062 }, { "epoch": 18.47094972067039, "grad_norm": 0.4793473184108734, "learning_rate": 7.708683473389357e-05, "loss": 0.5606, "step": 33063 }, { "epoch": 18.471508379888267, "grad_norm": 0.6509886980056763, "learning_rate": 7.705882352941176e-05, "loss": 0.312, "step": 33064 }, { "epoch": 18.472067039106147, "grad_norm": 0.758695662021637, "learning_rate": 7.703081232492998e-05, "loss": 0.721, "step": 33065 }, { "epoch": 18.472625698324023, "grad_norm": 0.37205594778060913, "learning_rate": 7.700280112044817e-05, "loss": 0.3416, "step": 33066 }, { "epoch": 18.4731843575419, "grad_norm": 0.3956359326839447, "learning_rate": 7.697478991596639e-05, "loss": 0.4396, "step": 33067 }, { "epoch": 18.473743016759776, "grad_norm": 0.5734394788742065, "learning_rate": 7.694677871148458e-05, "loss": 0.4488, "step": 33068 }, { "epoch": 18.474301675977653, "grad_norm": 0.4282195270061493, "learning_rate": 7.69187675070028e-05, "loss": 0.3303, "step": 33069 }, { "epoch": 18.47486033519553, "grad_norm": 0.39459228515625, "learning_rate": 7.689075630252102e-05, "loss": 0.3917, "step": 33070 }, { "epoch": 18.475418994413406, "grad_norm": 0.4824240505695343, "learning_rate": 7.686274509803921e-05, "loss": 0.5014, "step": 33071 }, { "epoch": 18.475977653631286, "grad_norm": 0.3895677328109741, "learning_rate": 7.683473389355743e-05, "loss": 0.3382, "step": 33072 }, { "epoch": 18.476536312849163, "grad_norm": 0.42169928550720215, "learning_rate": 7.680672268907563e-05, "loss": 0.436, "step": 33073 }, { "epoch": 18.47709497206704, "grad_norm": 0.37625226378440857, "learning_rate": 7.677871148459385e-05, "loss": 0.3906, "step": 33074 }, { "epoch": 18.477653631284916, "grad_norm": 0.4884724020957947, "learning_rate": 7.675070028011205e-05, "loss": 0.398, "step": 33075 }, { "epoch": 18.478212290502793, "grad_norm": 0.9212857484817505, "learning_rate": 7.672268907563026e-05, "loss": 0.3593, "step": 33076 }, { "epoch": 18.47877094972067, "grad_norm": 0.732638955116272, "learning_rate": 7.669467787114846e-05, "loss": 0.3779, "step": 33077 }, { "epoch": 18.47932960893855, "grad_norm": 0.3234267830848694, "learning_rate": 7.666666666666667e-05, "loss": 0.44, "step": 33078 }, { "epoch": 18.479888268156426, "grad_norm": 0.9365727305412292, "learning_rate": 7.663865546218487e-05, "loss": 0.3489, "step": 33079 }, { "epoch": 18.480446927374302, "grad_norm": 0.3906329870223999, "learning_rate": 7.66106442577031e-05, "loss": 0.3596, "step": 33080 }, { "epoch": 18.48100558659218, "grad_norm": 0.37238189578056335, "learning_rate": 7.658263305322129e-05, "loss": 0.3963, "step": 33081 }, { "epoch": 18.481564245810056, "grad_norm": 0.3529389798641205, "learning_rate": 7.65546218487395e-05, "loss": 0.3651, "step": 33082 }, { "epoch": 18.482122905027932, "grad_norm": 0.5095834732055664, "learning_rate": 7.65266106442577e-05, "loss": 0.411, "step": 33083 }, { "epoch": 18.48268156424581, "grad_norm": 0.46512576937675476, "learning_rate": 7.649859943977592e-05, "loss": 0.5025, "step": 33084 }, { "epoch": 18.48324022346369, "grad_norm": 0.5901860594749451, "learning_rate": 7.647058823529412e-05, "loss": 0.3328, "step": 33085 }, { "epoch": 18.483798882681565, "grad_norm": 0.8037012815475464, "learning_rate": 7.644257703081233e-05, "loss": 0.5664, "step": 33086 }, { "epoch": 18.484357541899442, "grad_norm": 0.4427291750907898, "learning_rate": 7.641456582633054e-05, "loss": 0.3422, "step": 33087 }, { "epoch": 18.48491620111732, "grad_norm": 0.8680294156074524, "learning_rate": 7.638655462184874e-05, "loss": 0.3653, "step": 33088 }, { "epoch": 18.485474860335195, "grad_norm": 1.4251518249511719, "learning_rate": 7.635854341736695e-05, "loss": 0.3637, "step": 33089 }, { "epoch": 18.48603351955307, "grad_norm": 0.5643889904022217, "learning_rate": 7.633053221288517e-05, "loss": 0.4147, "step": 33090 }, { "epoch": 18.486592178770948, "grad_norm": 1.5160716772079468, "learning_rate": 7.630252100840336e-05, "loss": 0.3667, "step": 33091 }, { "epoch": 18.48715083798883, "grad_norm": 0.5278897881507874, "learning_rate": 7.627450980392158e-05, "loss": 0.4094, "step": 33092 }, { "epoch": 18.487709497206705, "grad_norm": 0.5631977915763855, "learning_rate": 7.624649859943977e-05, "loss": 0.3746, "step": 33093 }, { "epoch": 18.48826815642458, "grad_norm": 0.4107406735420227, "learning_rate": 7.621848739495799e-05, "loss": 0.3953, "step": 33094 }, { "epoch": 18.488826815642458, "grad_norm": 0.5018259286880493, "learning_rate": 7.61904761904762e-05, "loss": 0.4387, "step": 33095 }, { "epoch": 18.489385474860335, "grad_norm": 0.37330323457717896, "learning_rate": 7.61624649859944e-05, "loss": 0.3784, "step": 33096 }, { "epoch": 18.48994413407821, "grad_norm": 0.3837027847766876, "learning_rate": 7.613445378151261e-05, "loss": 0.3666, "step": 33097 }, { "epoch": 18.490502793296088, "grad_norm": 1.465105414390564, "learning_rate": 7.610644257703082e-05, "loss": 0.3488, "step": 33098 }, { "epoch": 18.491061452513968, "grad_norm": 0.44824424386024475, "learning_rate": 7.607843137254902e-05, "loss": 0.4173, "step": 33099 }, { "epoch": 18.491620111731844, "grad_norm": 0.3605848550796509, "learning_rate": 7.605042016806723e-05, "loss": 0.3523, "step": 33100 }, { "epoch": 18.49217877094972, "grad_norm": 0.7937778830528259, "learning_rate": 7.602240896358543e-05, "loss": 0.3825, "step": 33101 }, { "epoch": 18.492737430167598, "grad_norm": 0.6326472759246826, "learning_rate": 7.599439775910365e-05, "loss": 0.3843, "step": 33102 }, { "epoch": 18.493296089385474, "grad_norm": 0.6514365077018738, "learning_rate": 7.596638655462185e-05, "loss": 0.3958, "step": 33103 }, { "epoch": 18.49385474860335, "grad_norm": 0.9572352766990662, "learning_rate": 7.593837535014007e-05, "loss": 0.3022, "step": 33104 }, { "epoch": 18.49441340782123, "grad_norm": 0.325369656085968, "learning_rate": 7.591036414565826e-05, "loss": 0.3368, "step": 33105 }, { "epoch": 18.494972067039107, "grad_norm": 0.5275483727455139, "learning_rate": 7.588235294117648e-05, "loss": 0.3703, "step": 33106 }, { "epoch": 18.495530726256984, "grad_norm": 0.4663551151752472, "learning_rate": 7.585434173669468e-05, "loss": 0.3267, "step": 33107 }, { "epoch": 18.49608938547486, "grad_norm": 1.1299080848693848, "learning_rate": 7.582633053221289e-05, "loss": 0.4904, "step": 33108 }, { "epoch": 18.496648044692737, "grad_norm": 0.4735184609889984, "learning_rate": 7.57983193277311e-05, "loss": 0.4935, "step": 33109 }, { "epoch": 18.497206703910614, "grad_norm": 0.40381255745887756, "learning_rate": 7.57703081232493e-05, "loss": 0.45, "step": 33110 }, { "epoch": 18.49776536312849, "grad_norm": 0.49893704056739807, "learning_rate": 7.574229691876751e-05, "loss": 0.4343, "step": 33111 }, { "epoch": 18.49832402234637, "grad_norm": 0.3621094822883606, "learning_rate": 7.571428571428573e-05, "loss": 0.3422, "step": 33112 }, { "epoch": 18.498882681564247, "grad_norm": 0.5491006970405579, "learning_rate": 7.568627450980392e-05, "loss": 0.3803, "step": 33113 }, { "epoch": 18.499441340782123, "grad_norm": 0.4539957642555237, "learning_rate": 7.565826330532214e-05, "loss": 0.421, "step": 33114 }, { "epoch": 18.5, "grad_norm": 0.47944334149360657, "learning_rate": 7.563025210084033e-05, "loss": 0.4285, "step": 33115 }, { "epoch": 18.500558659217877, "grad_norm": 0.5856572389602661, "learning_rate": 7.560224089635855e-05, "loss": 0.3584, "step": 33116 }, { "epoch": 18.501117318435753, "grad_norm": 0.5330870747566223, "learning_rate": 7.557422969187676e-05, "loss": 0.529, "step": 33117 }, { "epoch": 18.50167597765363, "grad_norm": 0.4092240631580353, "learning_rate": 7.554621848739496e-05, "loss": 0.3492, "step": 33118 }, { "epoch": 18.50223463687151, "grad_norm": 0.7180306315422058, "learning_rate": 7.551820728291317e-05, "loss": 0.3424, "step": 33119 }, { "epoch": 18.502793296089386, "grad_norm": 0.38659730553627014, "learning_rate": 7.549019607843137e-05, "loss": 0.3772, "step": 33120 }, { "epoch": 18.503351955307263, "grad_norm": 0.36830198764801025, "learning_rate": 7.546218487394958e-05, "loss": 0.396, "step": 33121 }, { "epoch": 18.50391061452514, "grad_norm": 0.7057275772094727, "learning_rate": 7.54341736694678e-05, "loss": 0.3826, "step": 33122 }, { "epoch": 18.504469273743016, "grad_norm": 0.8301900625228882, "learning_rate": 7.540616246498599e-05, "loss": 0.3786, "step": 33123 }, { "epoch": 18.505027932960893, "grad_norm": 0.4118098020553589, "learning_rate": 7.537815126050421e-05, "loss": 0.361, "step": 33124 }, { "epoch": 18.505586592178773, "grad_norm": 1.040213704109192, "learning_rate": 7.53501400560224e-05, "loss": 0.3848, "step": 33125 }, { "epoch": 18.50614525139665, "grad_norm": 0.8100784420967102, "learning_rate": 7.532212885154062e-05, "loss": 0.3111, "step": 33126 }, { "epoch": 18.506703910614526, "grad_norm": 0.8182530403137207, "learning_rate": 7.529411764705882e-05, "loss": 0.3934, "step": 33127 }, { "epoch": 18.507262569832402, "grad_norm": 0.3644542098045349, "learning_rate": 7.526610644257704e-05, "loss": 0.3501, "step": 33128 }, { "epoch": 18.50782122905028, "grad_norm": 0.517069935798645, "learning_rate": 7.523809523809524e-05, "loss": 0.41, "step": 33129 }, { "epoch": 18.508379888268156, "grad_norm": 0.46252840757369995, "learning_rate": 7.521008403361345e-05, "loss": 0.4326, "step": 33130 }, { "epoch": 18.508938547486032, "grad_norm": 0.36550891399383545, "learning_rate": 7.518207282913165e-05, "loss": 0.4662, "step": 33131 }, { "epoch": 18.509497206703912, "grad_norm": 0.6734106540679932, "learning_rate": 7.515406162464986e-05, "loss": 0.3488, "step": 33132 }, { "epoch": 18.51005586592179, "grad_norm": 0.5024005174636841, "learning_rate": 7.512605042016807e-05, "loss": 0.4461, "step": 33133 }, { "epoch": 18.510614525139665, "grad_norm": 0.45493632555007935, "learning_rate": 7.509803921568629e-05, "loss": 0.4329, "step": 33134 }, { "epoch": 18.511173184357542, "grad_norm": 0.3843703866004944, "learning_rate": 7.507002801120448e-05, "loss": 0.3793, "step": 33135 }, { "epoch": 18.51173184357542, "grad_norm": 0.3260156810283661, "learning_rate": 7.50420168067227e-05, "loss": 0.3111, "step": 33136 }, { "epoch": 18.512290502793295, "grad_norm": 0.4309711158275604, "learning_rate": 7.501400560224089e-05, "loss": 0.4148, "step": 33137 }, { "epoch": 18.51284916201117, "grad_norm": 0.42396098375320435, "learning_rate": 7.498599439775911e-05, "loss": 0.4683, "step": 33138 }, { "epoch": 18.513407821229052, "grad_norm": 0.5131839513778687, "learning_rate": 7.495798319327732e-05, "loss": 0.5678, "step": 33139 }, { "epoch": 18.51396648044693, "grad_norm": 0.7215701341629028, "learning_rate": 7.492997198879552e-05, "loss": 0.4591, "step": 33140 }, { "epoch": 18.514525139664805, "grad_norm": 0.36599570512771606, "learning_rate": 7.490196078431373e-05, "loss": 0.3928, "step": 33141 }, { "epoch": 18.51508379888268, "grad_norm": 0.5501282811164856, "learning_rate": 7.487394957983193e-05, "loss": 0.4272, "step": 33142 }, { "epoch": 18.515642458100558, "grad_norm": 0.44552475214004517, "learning_rate": 7.484593837535014e-05, "loss": 0.3913, "step": 33143 }, { "epoch": 18.516201117318435, "grad_norm": 0.7973290681838989, "learning_rate": 7.481792717086836e-05, "loss": 0.4686, "step": 33144 }, { "epoch": 18.51675977653631, "grad_norm": 0.5320394039154053, "learning_rate": 7.478991596638655e-05, "loss": 0.382, "step": 33145 }, { "epoch": 18.51731843575419, "grad_norm": 0.8162189722061157, "learning_rate": 7.476190476190477e-05, "loss": 0.4688, "step": 33146 }, { "epoch": 18.517877094972068, "grad_norm": 0.341412216424942, "learning_rate": 7.473389355742296e-05, "loss": 0.377, "step": 33147 }, { "epoch": 18.518435754189944, "grad_norm": 1.8213880062103271, "learning_rate": 7.470588235294118e-05, "loss": 0.4102, "step": 33148 }, { "epoch": 18.51899441340782, "grad_norm": 0.46000349521636963, "learning_rate": 7.467787114845939e-05, "loss": 0.4041, "step": 33149 }, { "epoch": 18.519553072625698, "grad_norm": 0.4847964644432068, "learning_rate": 7.46498599439776e-05, "loss": 0.2782, "step": 33150 }, { "epoch": 18.520111731843574, "grad_norm": 1.4006507396697998, "learning_rate": 7.46218487394958e-05, "loss": 0.2997, "step": 33151 }, { "epoch": 18.52067039106145, "grad_norm": 0.3633391261100769, "learning_rate": 7.459383753501401e-05, "loss": 0.3576, "step": 33152 }, { "epoch": 18.52122905027933, "grad_norm": 0.41202208399772644, "learning_rate": 7.456582633053221e-05, "loss": 0.3238, "step": 33153 }, { "epoch": 18.521787709497207, "grad_norm": 2.5322020053863525, "learning_rate": 7.453781512605042e-05, "loss": 0.4036, "step": 33154 }, { "epoch": 18.522346368715084, "grad_norm": 0.45762449502944946, "learning_rate": 7.450980392156863e-05, "loss": 0.3718, "step": 33155 }, { "epoch": 18.52290502793296, "grad_norm": 0.6414396166801453, "learning_rate": 7.448179271708684e-05, "loss": 0.3802, "step": 33156 }, { "epoch": 18.523463687150837, "grad_norm": 0.5207961797714233, "learning_rate": 7.445378151260504e-05, "loss": 0.3864, "step": 33157 }, { "epoch": 18.524022346368714, "grad_norm": 0.6475685238838196, "learning_rate": 7.442577030812326e-05, "loss": 0.429, "step": 33158 }, { "epoch": 18.524581005586594, "grad_norm": 2.0297670364379883, "learning_rate": 7.439775910364145e-05, "loss": 0.4424, "step": 33159 }, { "epoch": 18.52513966480447, "grad_norm": 0.3975844979286194, "learning_rate": 7.436974789915967e-05, "loss": 0.3114, "step": 33160 }, { "epoch": 18.525698324022347, "grad_norm": 0.8499632477760315, "learning_rate": 7.434173669467787e-05, "loss": 0.4036, "step": 33161 }, { "epoch": 18.526256983240224, "grad_norm": 2.8143064975738525, "learning_rate": 7.431372549019608e-05, "loss": 0.3653, "step": 33162 }, { "epoch": 18.5268156424581, "grad_norm": 0.42966195940971375, "learning_rate": 7.428571428571429e-05, "loss": 0.3744, "step": 33163 }, { "epoch": 18.527374301675977, "grad_norm": 2.403418779373169, "learning_rate": 7.425770308123249e-05, "loss": 0.4275, "step": 33164 }, { "epoch": 18.527932960893853, "grad_norm": 0.4748799204826355, "learning_rate": 7.42296918767507e-05, "loss": 0.473, "step": 33165 }, { "epoch": 18.528491620111733, "grad_norm": 0.42772579193115234, "learning_rate": 7.420168067226892e-05, "loss": 0.2686, "step": 33166 }, { "epoch": 18.52905027932961, "grad_norm": 0.32804208993911743, "learning_rate": 7.417366946778711e-05, "loss": 0.389, "step": 33167 }, { "epoch": 18.529608938547486, "grad_norm": 0.5379674434661865, "learning_rate": 7.414565826330533e-05, "loss": 0.5156, "step": 33168 }, { "epoch": 18.530167597765363, "grad_norm": 0.4180110692977905, "learning_rate": 7.411764705882352e-05, "loss": 0.4637, "step": 33169 }, { "epoch": 18.53072625698324, "grad_norm": 0.5903040170669556, "learning_rate": 7.408963585434174e-05, "loss": 0.4076, "step": 33170 }, { "epoch": 18.531284916201116, "grad_norm": 0.5031701326370239, "learning_rate": 7.406162464985995e-05, "loss": 0.4007, "step": 33171 }, { "epoch": 18.531843575418993, "grad_norm": 0.4165465235710144, "learning_rate": 7.403361344537815e-05, "loss": 0.3814, "step": 33172 }, { "epoch": 18.532402234636873, "grad_norm": 0.4524424970149994, "learning_rate": 7.400560224089636e-05, "loss": 0.4105, "step": 33173 }, { "epoch": 18.53296089385475, "grad_norm": 1.2550581693649292, "learning_rate": 7.397759103641457e-05, "loss": 0.4557, "step": 33174 }, { "epoch": 18.533519553072626, "grad_norm": 2.22735333442688, "learning_rate": 7.394957983193277e-05, "loss": 0.3991, "step": 33175 }, { "epoch": 18.534078212290503, "grad_norm": 0.5408685803413391, "learning_rate": 7.392156862745099e-05, "loss": 0.4748, "step": 33176 }, { "epoch": 18.53463687150838, "grad_norm": 0.4846052825450897, "learning_rate": 7.389355742296918e-05, "loss": 0.232, "step": 33177 }, { "epoch": 18.535195530726256, "grad_norm": 3.0756566524505615, "learning_rate": 7.38655462184874e-05, "loss": 0.4665, "step": 33178 }, { "epoch": 18.535754189944136, "grad_norm": 0.3997471034526825, "learning_rate": 7.38375350140056e-05, "loss": 0.338, "step": 33179 }, { "epoch": 18.536312849162012, "grad_norm": 0.3297956585884094, "learning_rate": 7.380952380952382e-05, "loss": 0.4415, "step": 33180 }, { "epoch": 18.53687150837989, "grad_norm": 0.8906546235084534, "learning_rate": 7.378151260504201e-05, "loss": 0.4183, "step": 33181 }, { "epoch": 18.537430167597766, "grad_norm": 0.46344131231307983, "learning_rate": 7.375350140056023e-05, "loss": 0.3753, "step": 33182 }, { "epoch": 18.537988826815642, "grad_norm": 0.41927555203437805, "learning_rate": 7.372549019607843e-05, "loss": 0.4176, "step": 33183 }, { "epoch": 18.53854748603352, "grad_norm": 0.5486651062965393, "learning_rate": 7.369747899159664e-05, "loss": 0.4219, "step": 33184 }, { "epoch": 18.539106145251395, "grad_norm": 0.5035902857780457, "learning_rate": 7.366946778711485e-05, "loss": 0.4534, "step": 33185 }, { "epoch": 18.539664804469275, "grad_norm": 1.6768066883087158, "learning_rate": 7.364145658263305e-05, "loss": 0.629, "step": 33186 }, { "epoch": 18.540223463687152, "grad_norm": 2.1856179237365723, "learning_rate": 7.361344537815126e-05, "loss": 0.3252, "step": 33187 }, { "epoch": 18.54078212290503, "grad_norm": 0.4666445255279541, "learning_rate": 7.358543417366948e-05, "loss": 0.3478, "step": 33188 }, { "epoch": 18.541340782122905, "grad_norm": 0.543511152267456, "learning_rate": 7.355742296918767e-05, "loss": 0.538, "step": 33189 }, { "epoch": 18.54189944134078, "grad_norm": 0.6718709468841553, "learning_rate": 7.352941176470589e-05, "loss": 0.4193, "step": 33190 }, { "epoch": 18.542458100558658, "grad_norm": 0.2732716500759125, "learning_rate": 7.350140056022408e-05, "loss": 0.3295, "step": 33191 }, { "epoch": 18.543016759776535, "grad_norm": 0.35757744312286377, "learning_rate": 7.34733893557423e-05, "loss": 0.3599, "step": 33192 }, { "epoch": 18.543575418994415, "grad_norm": 0.4452117085456848, "learning_rate": 7.344537815126051e-05, "loss": 0.3975, "step": 33193 }, { "epoch": 18.54413407821229, "grad_norm": 0.4574030339717865, "learning_rate": 7.341736694677871e-05, "loss": 0.5115, "step": 33194 }, { "epoch": 18.544692737430168, "grad_norm": 0.588256299495697, "learning_rate": 7.338935574229692e-05, "loss": 0.5974, "step": 33195 }, { "epoch": 18.545251396648045, "grad_norm": 0.56131911277771, "learning_rate": 7.336134453781513e-05, "loss": 0.3411, "step": 33196 }, { "epoch": 18.54581005586592, "grad_norm": 1.006944179534912, "learning_rate": 7.333333333333333e-05, "loss": 0.4188, "step": 33197 }, { "epoch": 18.546368715083798, "grad_norm": 0.8523073196411133, "learning_rate": 7.330532212885155e-05, "loss": 0.4112, "step": 33198 }, { "epoch": 18.546927374301674, "grad_norm": 1.447073221206665, "learning_rate": 7.327731092436974e-05, "loss": 0.4438, "step": 33199 }, { "epoch": 18.547486033519554, "grad_norm": 0.3902038335800171, "learning_rate": 7.324929971988796e-05, "loss": 0.402, "step": 33200 }, { "epoch": 18.54804469273743, "grad_norm": 0.5681053400039673, "learning_rate": 7.322128851540616e-05, "loss": 0.3704, "step": 33201 }, { "epoch": 18.548603351955308, "grad_norm": 0.5559684634208679, "learning_rate": 7.319327731092437e-05, "loss": 0.4918, "step": 33202 }, { "epoch": 18.549162011173184, "grad_norm": 1.091188907623291, "learning_rate": 7.316526610644258e-05, "loss": 0.5067, "step": 33203 }, { "epoch": 18.54972067039106, "grad_norm": 0.42429253458976746, "learning_rate": 7.313725490196079e-05, "loss": 0.3281, "step": 33204 }, { "epoch": 18.550279329608937, "grad_norm": 0.5959124565124512, "learning_rate": 7.310924369747899e-05, "loss": 0.378, "step": 33205 }, { "epoch": 18.550837988826817, "grad_norm": 0.6779744625091553, "learning_rate": 7.30812324929972e-05, "loss": 0.3785, "step": 33206 }, { "epoch": 18.551396648044694, "grad_norm": 0.4374370872974396, "learning_rate": 7.30532212885154e-05, "loss": 0.3957, "step": 33207 }, { "epoch": 18.55195530726257, "grad_norm": 0.7064943909645081, "learning_rate": 7.302521008403361e-05, "loss": 0.4395, "step": 33208 }, { "epoch": 18.552513966480447, "grad_norm": 0.28746578097343445, "learning_rate": 7.299719887955182e-05, "loss": 0.2544, "step": 33209 }, { "epoch": 18.553072625698324, "grad_norm": 0.4191751778125763, "learning_rate": 7.296918767507004e-05, "loss": 0.4033, "step": 33210 }, { "epoch": 18.5536312849162, "grad_norm": 0.49161866307258606, "learning_rate": 7.294117647058823e-05, "loss": 0.4372, "step": 33211 }, { "epoch": 18.554189944134077, "grad_norm": 0.3971637189388275, "learning_rate": 7.291316526610645e-05, "loss": 0.4318, "step": 33212 }, { "epoch": 18.554748603351957, "grad_norm": 1.196581244468689, "learning_rate": 7.288515406162464e-05, "loss": 0.4166, "step": 33213 }, { "epoch": 18.555307262569833, "grad_norm": 0.4599059224128723, "learning_rate": 7.285714285714286e-05, "loss": 0.3712, "step": 33214 }, { "epoch": 18.55586592178771, "grad_norm": 0.4462869465351105, "learning_rate": 7.282913165266107e-05, "loss": 0.4345, "step": 33215 }, { "epoch": 18.556424581005587, "grad_norm": 0.831937313079834, "learning_rate": 7.280112044817927e-05, "loss": 0.3613, "step": 33216 }, { "epoch": 18.556983240223463, "grad_norm": 1.3959224224090576, "learning_rate": 7.277310924369748e-05, "loss": 0.5899, "step": 33217 }, { "epoch": 18.55754189944134, "grad_norm": 0.3580646216869354, "learning_rate": 7.274509803921568e-05, "loss": 0.3501, "step": 33218 }, { "epoch": 18.558100558659216, "grad_norm": 0.5040664672851562, "learning_rate": 7.271708683473389e-05, "loss": 0.332, "step": 33219 }, { "epoch": 18.558659217877096, "grad_norm": 0.937634289264679, "learning_rate": 7.268907563025211e-05, "loss": 0.4182, "step": 33220 }, { "epoch": 18.559217877094973, "grad_norm": 2.6909663677215576, "learning_rate": 7.26610644257703e-05, "loss": 0.4565, "step": 33221 }, { "epoch": 18.55977653631285, "grad_norm": 0.4037894010543823, "learning_rate": 7.263305322128852e-05, "loss": 0.3517, "step": 33222 }, { "epoch": 18.560335195530726, "grad_norm": 0.42995497584342957, "learning_rate": 7.260504201680671e-05, "loss": 0.3998, "step": 33223 }, { "epoch": 18.560893854748603, "grad_norm": 0.37578660249710083, "learning_rate": 7.257703081232493e-05, "loss": 0.4749, "step": 33224 }, { "epoch": 18.56145251396648, "grad_norm": 2.0960230827331543, "learning_rate": 7.254901960784314e-05, "loss": 0.4002, "step": 33225 }, { "epoch": 18.56201117318436, "grad_norm": 0.5072458982467651, "learning_rate": 7.252100840336135e-05, "loss": 0.3718, "step": 33226 }, { "epoch": 18.562569832402236, "grad_norm": 0.7249230742454529, "learning_rate": 7.249299719887955e-05, "loss": 0.3791, "step": 33227 }, { "epoch": 18.563128491620112, "grad_norm": 0.4565328359603882, "learning_rate": 7.246498599439776e-05, "loss": 0.3983, "step": 33228 }, { "epoch": 18.56368715083799, "grad_norm": 0.4697781801223755, "learning_rate": 7.243697478991596e-05, "loss": 0.3648, "step": 33229 }, { "epoch": 18.564245810055866, "grad_norm": 0.5477104187011719, "learning_rate": 7.240896358543418e-05, "loss": 0.5351, "step": 33230 }, { "epoch": 18.564804469273742, "grad_norm": 0.3905161917209625, "learning_rate": 7.238095238095238e-05, "loss": 0.3054, "step": 33231 }, { "epoch": 18.56536312849162, "grad_norm": 2.3207709789276123, "learning_rate": 7.23529411764706e-05, "loss": 0.3868, "step": 33232 }, { "epoch": 18.5659217877095, "grad_norm": 0.44025224447250366, "learning_rate": 7.232492997198879e-05, "loss": 0.384, "step": 33233 }, { "epoch": 18.566480446927375, "grad_norm": 0.4687928259372711, "learning_rate": 7.229691876750701e-05, "loss": 0.332, "step": 33234 }, { "epoch": 18.567039106145252, "grad_norm": 0.40319857001304626, "learning_rate": 7.226890756302521e-05, "loss": 0.499, "step": 33235 }, { "epoch": 18.56759776536313, "grad_norm": 0.4615161418914795, "learning_rate": 7.224089635854342e-05, "loss": 0.406, "step": 33236 }, { "epoch": 18.568156424581005, "grad_norm": 0.7605828046798706, "learning_rate": 7.221288515406163e-05, "loss": 0.511, "step": 33237 }, { "epoch": 18.56871508379888, "grad_norm": 2.966731071472168, "learning_rate": 7.218487394957983e-05, "loss": 0.4446, "step": 33238 }, { "epoch": 18.56927374301676, "grad_norm": 0.3684213161468506, "learning_rate": 7.215686274509804e-05, "loss": 0.394, "step": 33239 }, { "epoch": 18.56983240223464, "grad_norm": 0.5597073435783386, "learning_rate": 7.212885154061624e-05, "loss": 0.546, "step": 33240 }, { "epoch": 18.570391061452515, "grad_norm": 0.8951592445373535, "learning_rate": 7.210084033613445e-05, "loss": 0.4788, "step": 33241 }, { "epoch": 18.57094972067039, "grad_norm": 0.34841498732566833, "learning_rate": 7.207282913165267e-05, "loss": 0.4622, "step": 33242 }, { "epoch": 18.571508379888268, "grad_norm": 0.9607884883880615, "learning_rate": 7.204481792717086e-05, "loss": 0.4271, "step": 33243 }, { "epoch": 18.572067039106145, "grad_norm": 0.46509015560150146, "learning_rate": 7.201680672268908e-05, "loss": 0.3516, "step": 33244 }, { "epoch": 18.57262569832402, "grad_norm": 0.4555004835128784, "learning_rate": 7.198879551820727e-05, "loss": 0.5174, "step": 33245 }, { "epoch": 18.573184357541898, "grad_norm": 5.340290069580078, "learning_rate": 7.196078431372549e-05, "loss": 0.4249, "step": 33246 }, { "epoch": 18.573743016759778, "grad_norm": 0.6346580982208252, "learning_rate": 7.19327731092437e-05, "loss": 0.4467, "step": 33247 }, { "epoch": 18.574301675977654, "grad_norm": 0.5110494494438171, "learning_rate": 7.19047619047619e-05, "loss": 0.3731, "step": 33248 }, { "epoch": 18.57486033519553, "grad_norm": 0.36393192410469055, "learning_rate": 7.187675070028011e-05, "loss": 0.3197, "step": 33249 }, { "epoch": 18.575418994413408, "grad_norm": 0.9145272374153137, "learning_rate": 7.184873949579832e-05, "loss": 0.5364, "step": 33250 }, { "epoch": 18.575977653631284, "grad_norm": 0.6487234830856323, "learning_rate": 7.182072829131652e-05, "loss": 0.4829, "step": 33251 }, { "epoch": 18.57653631284916, "grad_norm": 0.384820818901062, "learning_rate": 7.179271708683474e-05, "loss": 0.4202, "step": 33252 }, { "epoch": 18.577094972067037, "grad_norm": 0.793975293636322, "learning_rate": 7.176470588235293e-05, "loss": 0.3957, "step": 33253 }, { "epoch": 18.577653631284917, "grad_norm": 0.6721038222312927, "learning_rate": 7.173669467787115e-05, "loss": 0.3662, "step": 33254 }, { "epoch": 18.578212290502794, "grad_norm": 1.0753592252731323, "learning_rate": 7.170868347338935e-05, "loss": 0.3263, "step": 33255 }, { "epoch": 18.57877094972067, "grad_norm": 0.4046759307384491, "learning_rate": 7.168067226890757e-05, "loss": 0.4993, "step": 33256 }, { "epoch": 18.579329608938547, "grad_norm": 1.7801207304000854, "learning_rate": 7.165266106442577e-05, "loss": 0.41, "step": 33257 }, { "epoch": 18.579888268156424, "grad_norm": 1.1364519596099854, "learning_rate": 7.162464985994398e-05, "loss": 0.4858, "step": 33258 }, { "epoch": 18.5804469273743, "grad_norm": 0.5880615711212158, "learning_rate": 7.159663865546218e-05, "loss": 0.4152, "step": 33259 }, { "epoch": 18.58100558659218, "grad_norm": 1.4969029426574707, "learning_rate": 7.156862745098039e-05, "loss": 0.3912, "step": 33260 }, { "epoch": 18.581564245810057, "grad_norm": 0.3177163004875183, "learning_rate": 7.15406162464986e-05, "loss": 0.3105, "step": 33261 }, { "epoch": 18.582122905027934, "grad_norm": 0.39522597193717957, "learning_rate": 7.151260504201682e-05, "loss": 0.4095, "step": 33262 }, { "epoch": 18.58268156424581, "grad_norm": 0.6207083463668823, "learning_rate": 7.148459383753501e-05, "loss": 0.3569, "step": 33263 }, { "epoch": 18.583240223463687, "grad_norm": 0.46913254261016846, "learning_rate": 7.145658263305323e-05, "loss": 0.3818, "step": 33264 }, { "epoch": 18.583798882681563, "grad_norm": 1.3477263450622559, "learning_rate": 7.142857142857142e-05, "loss": 0.4285, "step": 33265 }, { "epoch": 18.58435754189944, "grad_norm": 1.10686457157135, "learning_rate": 7.140056022408964e-05, "loss": 0.351, "step": 33266 }, { "epoch": 18.58491620111732, "grad_norm": 0.3655551075935364, "learning_rate": 7.137254901960783e-05, "loss": 0.3682, "step": 33267 }, { "epoch": 18.585474860335196, "grad_norm": 0.35059067606925964, "learning_rate": 7.134453781512605e-05, "loss": 0.4181, "step": 33268 }, { "epoch": 18.586033519553073, "grad_norm": 0.7126660943031311, "learning_rate": 7.131652661064426e-05, "loss": 0.3384, "step": 33269 }, { "epoch": 18.58659217877095, "grad_norm": 0.39949169754981995, "learning_rate": 7.128851540616246e-05, "loss": 0.3947, "step": 33270 }, { "epoch": 18.587150837988826, "grad_norm": 0.5206325054168701, "learning_rate": 7.126050420168067e-05, "loss": 0.462, "step": 33271 }, { "epoch": 18.587709497206703, "grad_norm": 1.393926739692688, "learning_rate": 7.123249299719888e-05, "loss": 0.4513, "step": 33272 }, { "epoch": 18.58826815642458, "grad_norm": 0.3261336088180542, "learning_rate": 7.120448179271708e-05, "loss": 0.3595, "step": 33273 }, { "epoch": 18.58882681564246, "grad_norm": 0.47407636046409607, "learning_rate": 7.11764705882353e-05, "loss": 0.3385, "step": 33274 }, { "epoch": 18.589385474860336, "grad_norm": 0.48043501377105713, "learning_rate": 7.11484593837535e-05, "loss": 0.5154, "step": 33275 }, { "epoch": 18.589944134078213, "grad_norm": 0.6077399849891663, "learning_rate": 7.112044817927171e-05, "loss": 0.3616, "step": 33276 }, { "epoch": 18.59050279329609, "grad_norm": 0.44984865188598633, "learning_rate": 7.10924369747899e-05, "loss": 0.3844, "step": 33277 }, { "epoch": 18.591061452513966, "grad_norm": 1.0627429485321045, "learning_rate": 7.106442577030813e-05, "loss": 0.4101, "step": 33278 }, { "epoch": 18.591620111731842, "grad_norm": 0.4674539268016815, "learning_rate": 7.103641456582633e-05, "loss": 0.3961, "step": 33279 }, { "epoch": 18.592178770949722, "grad_norm": 0.4832356870174408, "learning_rate": 7.100840336134454e-05, "loss": 0.3921, "step": 33280 }, { "epoch": 18.5927374301676, "grad_norm": 0.45875832438468933, "learning_rate": 7.098039215686274e-05, "loss": 0.5165, "step": 33281 }, { "epoch": 18.593296089385476, "grad_norm": 0.5701793432235718, "learning_rate": 7.095238095238095e-05, "loss": 0.5011, "step": 33282 }, { "epoch": 18.593854748603352, "grad_norm": 0.4326432943344116, "learning_rate": 7.092436974789916e-05, "loss": 0.3937, "step": 33283 }, { "epoch": 18.59441340782123, "grad_norm": 0.5247666835784912, "learning_rate": 7.089635854341737e-05, "loss": 0.477, "step": 33284 }, { "epoch": 18.594972067039105, "grad_norm": 0.4139988422393799, "learning_rate": 7.086834733893557e-05, "loss": 0.4307, "step": 33285 }, { "epoch": 18.595530726256982, "grad_norm": 0.3451310396194458, "learning_rate": 7.084033613445379e-05, "loss": 0.3372, "step": 33286 }, { "epoch": 18.596089385474862, "grad_norm": 1.644040822982788, "learning_rate": 7.081232492997198e-05, "loss": 0.3664, "step": 33287 }, { "epoch": 18.59664804469274, "grad_norm": 0.6035840511322021, "learning_rate": 7.07843137254902e-05, "loss": 0.4486, "step": 33288 }, { "epoch": 18.597206703910615, "grad_norm": 0.9313774108886719, "learning_rate": 7.07563025210084e-05, "loss": 0.3313, "step": 33289 }, { "epoch": 18.59776536312849, "grad_norm": 2.306288480758667, "learning_rate": 7.072829131652661e-05, "loss": 0.3907, "step": 33290 }, { "epoch": 18.598324022346368, "grad_norm": 0.4268757998943329, "learning_rate": 7.070028011204482e-05, "loss": 0.3032, "step": 33291 }, { "epoch": 18.598882681564245, "grad_norm": 0.42450085282325745, "learning_rate": 7.067226890756302e-05, "loss": 0.3449, "step": 33292 }, { "epoch": 18.59944134078212, "grad_norm": 0.4467846155166626, "learning_rate": 7.064425770308123e-05, "loss": 0.4707, "step": 33293 }, { "epoch": 18.6, "grad_norm": 0.48317572474479675, "learning_rate": 7.061624649859943e-05, "loss": 0.4223, "step": 33294 }, { "epoch": 18.600558659217878, "grad_norm": 0.4549301862716675, "learning_rate": 7.058823529411764e-05, "loss": 0.4177, "step": 33295 }, { "epoch": 18.601117318435755, "grad_norm": 0.48605504631996155, "learning_rate": 7.056022408963586e-05, "loss": 0.3695, "step": 33296 }, { "epoch": 18.60167597765363, "grad_norm": 0.4467478394508362, "learning_rate": 7.053221288515405e-05, "loss": 0.404, "step": 33297 }, { "epoch": 18.602234636871508, "grad_norm": 0.44747933745384216, "learning_rate": 7.050420168067227e-05, "loss": 0.4255, "step": 33298 }, { "epoch": 18.602793296089384, "grad_norm": 0.4978366792201996, "learning_rate": 7.047619047619046e-05, "loss": 0.4848, "step": 33299 }, { "epoch": 18.60335195530726, "grad_norm": 0.39808717370033264, "learning_rate": 7.044817927170868e-05, "loss": 0.3402, "step": 33300 }, { "epoch": 18.60391061452514, "grad_norm": 0.8843775987625122, "learning_rate": 7.04201680672269e-05, "loss": 0.3535, "step": 33301 }, { "epoch": 18.604469273743018, "grad_norm": 1.2137494087219238, "learning_rate": 7.03921568627451e-05, "loss": 0.56, "step": 33302 }, { "epoch": 18.605027932960894, "grad_norm": 0.44732093811035156, "learning_rate": 7.036414565826332e-05, "loss": 0.4376, "step": 33303 }, { "epoch": 18.60558659217877, "grad_norm": 1.8453917503356934, "learning_rate": 7.033613445378151e-05, "loss": 0.4299, "step": 33304 }, { "epoch": 18.606145251396647, "grad_norm": 0.4487641453742981, "learning_rate": 7.030812324929973e-05, "loss": 0.4115, "step": 33305 }, { "epoch": 18.606703910614524, "grad_norm": 0.806923508644104, "learning_rate": 7.028011204481793e-05, "loss": 0.2797, "step": 33306 }, { "epoch": 18.607262569832404, "grad_norm": 0.5042039155960083, "learning_rate": 7.025210084033614e-05, "loss": 0.353, "step": 33307 }, { "epoch": 18.60782122905028, "grad_norm": 0.40456557273864746, "learning_rate": 7.022408963585435e-05, "loss": 0.378, "step": 33308 }, { "epoch": 18.608379888268157, "grad_norm": 0.6416122913360596, "learning_rate": 7.019607843137255e-05, "loss": 0.4145, "step": 33309 }, { "epoch": 18.608938547486034, "grad_norm": 0.3558356761932373, "learning_rate": 7.016806722689076e-05, "loss": 0.3978, "step": 33310 }, { "epoch": 18.60949720670391, "grad_norm": 1.0025913715362549, "learning_rate": 7.014005602240898e-05, "loss": 0.4903, "step": 33311 }, { "epoch": 18.610055865921787, "grad_norm": 0.5669221878051758, "learning_rate": 7.011204481792717e-05, "loss": 0.6458, "step": 33312 }, { "epoch": 18.610614525139663, "grad_norm": 0.44173604249954224, "learning_rate": 7.008403361344539e-05, "loss": 0.4132, "step": 33313 }, { "epoch": 18.611173184357543, "grad_norm": 0.4549146294593811, "learning_rate": 7.005602240896358e-05, "loss": 0.4963, "step": 33314 }, { "epoch": 18.61173184357542, "grad_norm": 0.3699948489665985, "learning_rate": 7.00280112044818e-05, "loss": 0.4142, "step": 33315 }, { "epoch": 18.612290502793297, "grad_norm": 0.30903923511505127, "learning_rate": 7.000000000000001e-05, "loss": 0.3222, "step": 33316 }, { "epoch": 18.612849162011173, "grad_norm": 0.426784873008728, "learning_rate": 6.997198879551821e-05, "loss": 0.4562, "step": 33317 }, { "epoch": 18.61340782122905, "grad_norm": 0.46098801493644714, "learning_rate": 6.994397759103642e-05, "loss": 0.436, "step": 33318 }, { "epoch": 18.613966480446926, "grad_norm": 0.4943619966506958, "learning_rate": 6.991596638655463e-05, "loss": 0.4207, "step": 33319 }, { "epoch": 18.614525139664803, "grad_norm": 2.67362380027771, "learning_rate": 6.988795518207283e-05, "loss": 0.4138, "step": 33320 }, { "epoch": 18.615083798882683, "grad_norm": 1.2203537225723267, "learning_rate": 6.985994397759104e-05, "loss": 0.2797, "step": 33321 }, { "epoch": 18.61564245810056, "grad_norm": 0.6407197117805481, "learning_rate": 6.983193277310924e-05, "loss": 0.4509, "step": 33322 }, { "epoch": 18.616201117318436, "grad_norm": 0.44569990038871765, "learning_rate": 6.980392156862746e-05, "loss": 0.3285, "step": 33323 }, { "epoch": 18.616759776536313, "grad_norm": 0.9600493311882019, "learning_rate": 6.977591036414566e-05, "loss": 0.337, "step": 33324 }, { "epoch": 18.61731843575419, "grad_norm": 0.38452187180519104, "learning_rate": 6.974789915966387e-05, "loss": 0.3733, "step": 33325 }, { "epoch": 18.617877094972066, "grad_norm": 0.5779157280921936, "learning_rate": 6.971988795518207e-05, "loss": 0.3544, "step": 33326 }, { "epoch": 18.618435754189946, "grad_norm": 1.3285716772079468, "learning_rate": 6.969187675070029e-05, "loss": 0.435, "step": 33327 }, { "epoch": 18.618994413407822, "grad_norm": 0.6382309198379517, "learning_rate": 6.966386554621849e-05, "loss": 0.4314, "step": 33328 }, { "epoch": 18.6195530726257, "grad_norm": 0.4070654511451721, "learning_rate": 6.96358543417367e-05, "loss": 0.4191, "step": 33329 }, { "epoch": 18.620111731843576, "grad_norm": 0.3165675699710846, "learning_rate": 6.96078431372549e-05, "loss": 0.2905, "step": 33330 }, { "epoch": 18.620670391061452, "grad_norm": 0.6993505954742432, "learning_rate": 6.957983193277311e-05, "loss": 0.3288, "step": 33331 }, { "epoch": 18.62122905027933, "grad_norm": 0.3519015312194824, "learning_rate": 6.955182072829132e-05, "loss": 0.3951, "step": 33332 }, { "epoch": 18.621787709497205, "grad_norm": 0.4634532630443573, "learning_rate": 6.952380952380954e-05, "loss": 0.4133, "step": 33333 }, { "epoch": 18.622346368715085, "grad_norm": 0.3909326493740082, "learning_rate": 6.949579831932773e-05, "loss": 0.3508, "step": 33334 }, { "epoch": 18.622905027932962, "grad_norm": 0.3784567415714264, "learning_rate": 6.946778711484595e-05, "loss": 0.3786, "step": 33335 }, { "epoch": 18.62346368715084, "grad_norm": 0.43578144907951355, "learning_rate": 6.943977591036414e-05, "loss": 0.4814, "step": 33336 }, { "epoch": 18.624022346368715, "grad_norm": 0.3682384490966797, "learning_rate": 6.941176470588236e-05, "loss": 0.357, "step": 33337 }, { "epoch": 18.62458100558659, "grad_norm": 0.3547244966030121, "learning_rate": 6.938375350140057e-05, "loss": 0.3404, "step": 33338 }, { "epoch": 18.62513966480447, "grad_norm": 0.42116090655326843, "learning_rate": 6.935574229691877e-05, "loss": 0.4496, "step": 33339 }, { "epoch": 18.625698324022345, "grad_norm": 0.38738059997558594, "learning_rate": 6.932773109243698e-05, "loss": 0.4142, "step": 33340 }, { "epoch": 18.626256983240225, "grad_norm": 0.5341260433197021, "learning_rate": 6.929971988795518e-05, "loss": 0.4304, "step": 33341 }, { "epoch": 18.6268156424581, "grad_norm": 0.5779695510864258, "learning_rate": 6.927170868347339e-05, "loss": 0.6634, "step": 33342 }, { "epoch": 18.627374301675978, "grad_norm": 0.4567739963531494, "learning_rate": 6.924369747899161e-05, "loss": 0.4972, "step": 33343 }, { "epoch": 18.627932960893855, "grad_norm": 0.5166769623756409, "learning_rate": 6.92156862745098e-05, "loss": 0.4008, "step": 33344 }, { "epoch": 18.62849162011173, "grad_norm": 0.44283583760261536, "learning_rate": 6.918767507002802e-05, "loss": 0.5069, "step": 33345 }, { "epoch": 18.629050279329608, "grad_norm": 0.4220067858695984, "learning_rate": 6.915966386554621e-05, "loss": 0.4044, "step": 33346 }, { "epoch": 18.629608938547484, "grad_norm": 0.5188944339752197, "learning_rate": 6.913165266106443e-05, "loss": 0.3974, "step": 33347 }, { "epoch": 18.630167597765364, "grad_norm": 0.4026779532432556, "learning_rate": 6.910364145658263e-05, "loss": 0.3993, "step": 33348 }, { "epoch": 18.63072625698324, "grad_norm": 0.45261114835739136, "learning_rate": 6.907563025210085e-05, "loss": 0.3556, "step": 33349 }, { "epoch": 18.631284916201118, "grad_norm": 0.4704189896583557, "learning_rate": 6.904761904761905e-05, "loss": 0.372, "step": 33350 }, { "epoch": 18.631843575418994, "grad_norm": 0.43418875336647034, "learning_rate": 6.901960784313726e-05, "loss": 0.5005, "step": 33351 }, { "epoch": 18.63240223463687, "grad_norm": 0.6777607202529907, "learning_rate": 6.899159663865546e-05, "loss": 0.4431, "step": 33352 }, { "epoch": 18.632960893854747, "grad_norm": 0.5315324664115906, "learning_rate": 6.896358543417367e-05, "loss": 0.5795, "step": 33353 }, { "epoch": 18.633519553072627, "grad_norm": 0.4635539948940277, "learning_rate": 6.893557422969188e-05, "loss": 0.3553, "step": 33354 }, { "epoch": 18.634078212290504, "grad_norm": 0.405765175819397, "learning_rate": 6.89075630252101e-05, "loss": 0.3628, "step": 33355 }, { "epoch": 18.63463687150838, "grad_norm": 0.386717826128006, "learning_rate": 6.887955182072829e-05, "loss": 0.4548, "step": 33356 }, { "epoch": 18.635195530726257, "grad_norm": 0.46891769766807556, "learning_rate": 6.885154061624651e-05, "loss": 0.3966, "step": 33357 }, { "epoch": 18.635754189944134, "grad_norm": 0.5598861575126648, "learning_rate": 6.88235294117647e-05, "loss": 0.3838, "step": 33358 }, { "epoch": 18.63631284916201, "grad_norm": 3.9775185585021973, "learning_rate": 6.879551820728292e-05, "loss": 0.3262, "step": 33359 }, { "epoch": 18.636871508379887, "grad_norm": 0.4295130968093872, "learning_rate": 6.876750700280113e-05, "loss": 0.3648, "step": 33360 }, { "epoch": 18.637430167597767, "grad_norm": 0.38078030943870544, "learning_rate": 6.873949579831933e-05, "loss": 0.4605, "step": 33361 }, { "epoch": 18.637988826815644, "grad_norm": 0.7125481367111206, "learning_rate": 6.871148459383754e-05, "loss": 0.4915, "step": 33362 }, { "epoch": 18.63854748603352, "grad_norm": 0.5079014301300049, "learning_rate": 6.868347338935574e-05, "loss": 0.4306, "step": 33363 }, { "epoch": 18.639106145251397, "grad_norm": 0.37734177708625793, "learning_rate": 6.865546218487395e-05, "loss": 0.2915, "step": 33364 }, { "epoch": 18.639664804469273, "grad_norm": 0.36632850766181946, "learning_rate": 6.862745098039217e-05, "loss": 0.3349, "step": 33365 }, { "epoch": 18.64022346368715, "grad_norm": 0.3756070137023926, "learning_rate": 6.859943977591036e-05, "loss": 0.3728, "step": 33366 }, { "epoch": 18.640782122905026, "grad_norm": 0.4458467960357666, "learning_rate": 6.857142857142858e-05, "loss": 0.4098, "step": 33367 }, { "epoch": 18.641340782122906, "grad_norm": 0.37712183594703674, "learning_rate": 6.854341736694677e-05, "loss": 0.4033, "step": 33368 }, { "epoch": 18.641899441340783, "grad_norm": 0.42903950810432434, "learning_rate": 6.851540616246499e-05, "loss": 0.4192, "step": 33369 }, { "epoch": 18.64245810055866, "grad_norm": 0.5669801831245422, "learning_rate": 6.84873949579832e-05, "loss": 0.3165, "step": 33370 }, { "epoch": 18.643016759776536, "grad_norm": 0.5043022036552429, "learning_rate": 6.84593837535014e-05, "loss": 0.3816, "step": 33371 }, { "epoch": 18.643575418994413, "grad_norm": 0.5248939394950867, "learning_rate": 6.843137254901961e-05, "loss": 0.3789, "step": 33372 }, { "epoch": 18.64413407821229, "grad_norm": 1.111251950263977, "learning_rate": 6.840336134453782e-05, "loss": 0.4261, "step": 33373 }, { "epoch": 18.64469273743017, "grad_norm": 0.5118752717971802, "learning_rate": 6.837535014005602e-05, "loss": 0.5202, "step": 33374 }, { "epoch": 18.645251396648046, "grad_norm": 0.5092790126800537, "learning_rate": 6.834733893557423e-05, "loss": 0.4774, "step": 33375 }, { "epoch": 18.645810055865923, "grad_norm": 0.4587847590446472, "learning_rate": 6.831932773109243e-05, "loss": 0.3485, "step": 33376 }, { "epoch": 18.6463687150838, "grad_norm": 0.8198220729827881, "learning_rate": 6.829131652661065e-05, "loss": 0.3504, "step": 33377 }, { "epoch": 18.646927374301676, "grad_norm": 0.5205161571502686, "learning_rate": 6.826330532212885e-05, "loss": 0.28, "step": 33378 }, { "epoch": 18.647486033519552, "grad_norm": 0.6236117482185364, "learning_rate": 6.823529411764707e-05, "loss": 0.3962, "step": 33379 }, { "epoch": 18.64804469273743, "grad_norm": 0.5078142881393433, "learning_rate": 6.820728291316526e-05, "loss": 0.4444, "step": 33380 }, { "epoch": 18.64860335195531, "grad_norm": 0.5336299538612366, "learning_rate": 6.817927170868348e-05, "loss": 0.4304, "step": 33381 }, { "epoch": 18.649162011173186, "grad_norm": 1.2762658596038818, "learning_rate": 6.815126050420168e-05, "loss": 0.3632, "step": 33382 }, { "epoch": 18.649720670391062, "grad_norm": 0.3571813106536865, "learning_rate": 6.812324929971989e-05, "loss": 0.409, "step": 33383 }, { "epoch": 18.65027932960894, "grad_norm": 0.9720109105110168, "learning_rate": 6.80952380952381e-05, "loss": 0.4535, "step": 33384 }, { "epoch": 18.650837988826815, "grad_norm": 0.5444110631942749, "learning_rate": 6.80672268907563e-05, "loss": 0.3612, "step": 33385 }, { "epoch": 18.65139664804469, "grad_norm": 0.40758684277534485, "learning_rate": 6.803921568627451e-05, "loss": 0.4329, "step": 33386 }, { "epoch": 18.65195530726257, "grad_norm": 0.6672812104225159, "learning_rate": 6.801120448179273e-05, "loss": 0.3773, "step": 33387 }, { "epoch": 18.65251396648045, "grad_norm": 0.7788664698600769, "learning_rate": 6.798319327731092e-05, "loss": 0.3264, "step": 33388 }, { "epoch": 18.653072625698325, "grad_norm": 0.6699696779251099, "learning_rate": 6.795518207282914e-05, "loss": 0.5074, "step": 33389 }, { "epoch": 18.6536312849162, "grad_norm": 0.4077255129814148, "learning_rate": 6.792717086834733e-05, "loss": 0.3973, "step": 33390 }, { "epoch": 18.654189944134078, "grad_norm": 0.402718722820282, "learning_rate": 6.789915966386555e-05, "loss": 0.4072, "step": 33391 }, { "epoch": 18.654748603351955, "grad_norm": 0.5116134285926819, "learning_rate": 6.787114845938376e-05, "loss": 0.3528, "step": 33392 }, { "epoch": 18.65530726256983, "grad_norm": 0.4758792519569397, "learning_rate": 6.784313725490196e-05, "loss": 0.3629, "step": 33393 }, { "epoch": 18.655865921787708, "grad_norm": 0.36802932620048523, "learning_rate": 6.781512605042017e-05, "loss": 0.3526, "step": 33394 }, { "epoch": 18.656424581005588, "grad_norm": 0.4252503514289856, "learning_rate": 6.778711484593838e-05, "loss": 0.3415, "step": 33395 }, { "epoch": 18.656983240223465, "grad_norm": 1.050351858139038, "learning_rate": 6.775910364145658e-05, "loss": 0.4402, "step": 33396 }, { "epoch": 18.65754189944134, "grad_norm": 0.43924498558044434, "learning_rate": 6.77310924369748e-05, "loss": 0.3683, "step": 33397 }, { "epoch": 18.658100558659218, "grad_norm": 0.5609034895896912, "learning_rate": 6.7703081232493e-05, "loss": 0.4917, "step": 33398 }, { "epoch": 18.658659217877094, "grad_norm": 0.7274319529533386, "learning_rate": 6.767507002801121e-05, "loss": 0.3173, "step": 33399 }, { "epoch": 18.65921787709497, "grad_norm": 0.9132292866706848, "learning_rate": 6.76470588235294e-05, "loss": 0.5188, "step": 33400 }, { "epoch": 18.659776536312847, "grad_norm": 0.7994263172149658, "learning_rate": 6.761904761904763e-05, "loss": 0.4378, "step": 33401 }, { "epoch": 18.660335195530728, "grad_norm": 0.368822306394577, "learning_rate": 6.759103641456583e-05, "loss": 0.3658, "step": 33402 }, { "epoch": 18.660893854748604, "grad_norm": 0.7670091986656189, "learning_rate": 6.756302521008404e-05, "loss": 0.4421, "step": 33403 }, { "epoch": 18.66145251396648, "grad_norm": 1.2274000644683838, "learning_rate": 6.753501400560224e-05, "loss": 0.4916, "step": 33404 }, { "epoch": 18.662011173184357, "grad_norm": 1.5293766260147095, "learning_rate": 6.750700280112045e-05, "loss": 0.4057, "step": 33405 }, { "epoch": 18.662569832402234, "grad_norm": 0.5386022329330444, "learning_rate": 6.747899159663866e-05, "loss": 0.479, "step": 33406 }, { "epoch": 18.66312849162011, "grad_norm": 0.4993986189365387, "learning_rate": 6.745098039215686e-05, "loss": 0.4085, "step": 33407 }, { "epoch": 18.66368715083799, "grad_norm": 0.7581431865692139, "learning_rate": 6.742296918767507e-05, "loss": 0.4864, "step": 33408 }, { "epoch": 18.664245810055867, "grad_norm": 0.39697983860969543, "learning_rate": 6.739495798319329e-05, "loss": 0.5637, "step": 33409 }, { "epoch": 18.664804469273744, "grad_norm": 0.515539288520813, "learning_rate": 6.736694677871148e-05, "loss": 0.4363, "step": 33410 }, { "epoch": 18.66536312849162, "grad_norm": 0.37271395325660706, "learning_rate": 6.73389355742297e-05, "loss": 0.3839, "step": 33411 }, { "epoch": 18.665921787709497, "grad_norm": 0.5110172629356384, "learning_rate": 6.731092436974789e-05, "loss": 0.4851, "step": 33412 }, { "epoch": 18.666480446927373, "grad_norm": 0.43135422468185425, "learning_rate": 6.728291316526611e-05, "loss": 0.4247, "step": 33413 }, { "epoch": 18.66703910614525, "grad_norm": 0.37992382049560547, "learning_rate": 6.725490196078432e-05, "loss": 0.4249, "step": 33414 }, { "epoch": 18.66759776536313, "grad_norm": 0.33368533849716187, "learning_rate": 6.722689075630252e-05, "loss": 0.4076, "step": 33415 }, { "epoch": 18.668156424581007, "grad_norm": 0.41844138503074646, "learning_rate": 6.719887955182073e-05, "loss": 0.5004, "step": 33416 }, { "epoch": 18.668715083798883, "grad_norm": 0.6788742542266846, "learning_rate": 6.717086834733893e-05, "loss": 0.5111, "step": 33417 }, { "epoch": 18.66927374301676, "grad_norm": 0.467020183801651, "learning_rate": 6.714285714285714e-05, "loss": 0.3777, "step": 33418 }, { "epoch": 18.669832402234636, "grad_norm": 0.3772476315498352, "learning_rate": 6.711484593837536e-05, "loss": 0.4085, "step": 33419 }, { "epoch": 18.670391061452513, "grad_norm": 0.38728058338165283, "learning_rate": 6.708683473389355e-05, "loss": 0.3391, "step": 33420 }, { "epoch": 18.67094972067039, "grad_norm": 0.42252540588378906, "learning_rate": 6.705882352941177e-05, "loss": 0.4304, "step": 33421 }, { "epoch": 18.67150837988827, "grad_norm": 0.42641809582710266, "learning_rate": 6.703081232492996e-05, "loss": 0.4231, "step": 33422 }, { "epoch": 18.672067039106146, "grad_norm": 6.117280960083008, "learning_rate": 6.700280112044818e-05, "loss": 0.3582, "step": 33423 }, { "epoch": 18.672625698324023, "grad_norm": 0.34205129742622375, "learning_rate": 6.697478991596639e-05, "loss": 0.3737, "step": 33424 }, { "epoch": 18.6731843575419, "grad_norm": 0.38349395990371704, "learning_rate": 6.69467787114846e-05, "loss": 0.384, "step": 33425 }, { "epoch": 18.673743016759776, "grad_norm": 5.445019721984863, "learning_rate": 6.69187675070028e-05, "loss": 0.549, "step": 33426 }, { "epoch": 18.674301675977652, "grad_norm": 0.3726084232330322, "learning_rate": 6.689075630252101e-05, "loss": 0.3219, "step": 33427 }, { "epoch": 18.674860335195532, "grad_norm": 0.4386651813983917, "learning_rate": 6.686274509803921e-05, "loss": 0.2697, "step": 33428 }, { "epoch": 18.67541899441341, "grad_norm": 0.35443681478500366, "learning_rate": 6.683473389355743e-05, "loss": 0.388, "step": 33429 }, { "epoch": 18.675977653631286, "grad_norm": 0.5843977928161621, "learning_rate": 6.680672268907563e-05, "loss": 0.492, "step": 33430 }, { "epoch": 18.676536312849162, "grad_norm": 0.37908267974853516, "learning_rate": 6.677871148459385e-05, "loss": 0.3413, "step": 33431 }, { "epoch": 18.67709497206704, "grad_norm": 0.5288969278335571, "learning_rate": 6.675070028011204e-05, "loss": 0.4877, "step": 33432 }, { "epoch": 18.677653631284915, "grad_norm": 0.37134626507759094, "learning_rate": 6.672268907563026e-05, "loss": 0.3633, "step": 33433 }, { "epoch": 18.678212290502792, "grad_norm": 0.38918158411979675, "learning_rate": 6.669467787114845e-05, "loss": 0.3626, "step": 33434 }, { "epoch": 18.678770949720672, "grad_norm": 0.3228667378425598, "learning_rate": 6.666666666666667e-05, "loss": 0.4075, "step": 33435 }, { "epoch": 18.67932960893855, "grad_norm": 0.6147653460502625, "learning_rate": 6.663865546218488e-05, "loss": 0.4991, "step": 33436 }, { "epoch": 18.679888268156425, "grad_norm": 1.1263574361801147, "learning_rate": 6.661064425770308e-05, "loss": 0.513, "step": 33437 }, { "epoch": 18.6804469273743, "grad_norm": 0.332773894071579, "learning_rate": 6.658263305322129e-05, "loss": 0.3661, "step": 33438 }, { "epoch": 18.68100558659218, "grad_norm": 0.4812290668487549, "learning_rate": 6.65546218487395e-05, "loss": 0.4563, "step": 33439 }, { "epoch": 18.681564245810055, "grad_norm": 0.35524433851242065, "learning_rate": 6.65266106442577e-05, "loss": 0.3677, "step": 33440 }, { "epoch": 18.68212290502793, "grad_norm": 0.380925714969635, "learning_rate": 6.649859943977592e-05, "loss": 0.5541, "step": 33441 }, { "epoch": 18.68268156424581, "grad_norm": 0.47978392243385315, "learning_rate": 6.647058823529411e-05, "loss": 0.527, "step": 33442 }, { "epoch": 18.683240223463688, "grad_norm": 0.3735135495662689, "learning_rate": 6.644257703081233e-05, "loss": 0.4005, "step": 33443 }, { "epoch": 18.683798882681565, "grad_norm": 0.6373332142829895, "learning_rate": 6.641456582633052e-05, "loss": 0.4329, "step": 33444 }, { "epoch": 18.68435754189944, "grad_norm": 0.3036915063858032, "learning_rate": 6.638655462184874e-05, "loss": 0.2913, "step": 33445 }, { "epoch": 18.684916201117318, "grad_norm": 0.5199482440948486, "learning_rate": 6.635854341736695e-05, "loss": 0.5777, "step": 33446 }, { "epoch": 18.685474860335194, "grad_norm": 0.36825549602508545, "learning_rate": 6.633053221288516e-05, "loss": 0.3525, "step": 33447 }, { "epoch": 18.68603351955307, "grad_norm": 0.29921406507492065, "learning_rate": 6.630252100840336e-05, "loss": 0.3117, "step": 33448 }, { "epoch": 18.68659217877095, "grad_norm": 0.429211288690567, "learning_rate": 6.627450980392157e-05, "loss": 0.3609, "step": 33449 }, { "epoch": 18.687150837988828, "grad_norm": 0.35947978496551514, "learning_rate": 6.624649859943977e-05, "loss": 0.3826, "step": 33450 }, { "epoch": 18.687709497206704, "grad_norm": 0.4741494059562683, "learning_rate": 6.621848739495799e-05, "loss": 0.3908, "step": 33451 }, { "epoch": 18.68826815642458, "grad_norm": 0.9542248845100403, "learning_rate": 6.619047619047619e-05, "loss": 0.3512, "step": 33452 }, { "epoch": 18.688826815642457, "grad_norm": 0.469257116317749, "learning_rate": 6.61624649859944e-05, "loss": 0.4691, "step": 33453 }, { "epoch": 18.689385474860334, "grad_norm": 0.37024492025375366, "learning_rate": 6.61344537815126e-05, "loss": 0.3029, "step": 33454 }, { "epoch": 18.689944134078214, "grad_norm": 0.38087335228919983, "learning_rate": 6.610644257703082e-05, "loss": 0.4421, "step": 33455 }, { "epoch": 18.69050279329609, "grad_norm": 0.5070014595985413, "learning_rate": 6.607843137254902e-05, "loss": 0.4671, "step": 33456 }, { "epoch": 18.691061452513967, "grad_norm": 0.47173941135406494, "learning_rate": 6.605042016806723e-05, "loss": 0.4226, "step": 33457 }, { "epoch": 18.691620111731844, "grad_norm": 1.1254301071166992, "learning_rate": 6.602240896358543e-05, "loss": 0.5326, "step": 33458 }, { "epoch": 18.69217877094972, "grad_norm": 0.3740171492099762, "learning_rate": 6.599439775910364e-05, "loss": 0.3618, "step": 33459 }, { "epoch": 18.692737430167597, "grad_norm": 1.5500136613845825, "learning_rate": 6.596638655462185e-05, "loss": 0.4863, "step": 33460 }, { "epoch": 18.693296089385473, "grad_norm": 0.40329331159591675, "learning_rate": 6.593837535014005e-05, "loss": 0.36, "step": 33461 }, { "epoch": 18.693854748603353, "grad_norm": 1.1434741020202637, "learning_rate": 6.591036414565826e-05, "loss": 0.5125, "step": 33462 }, { "epoch": 18.69441340782123, "grad_norm": 0.4510940611362457, "learning_rate": 6.588235294117648e-05, "loss": 0.4034, "step": 33463 }, { "epoch": 18.694972067039107, "grad_norm": 0.37764859199523926, "learning_rate": 6.585434173669467e-05, "loss": 0.42, "step": 33464 }, { "epoch": 18.695530726256983, "grad_norm": 0.4557996392250061, "learning_rate": 6.582633053221289e-05, "loss": 0.3941, "step": 33465 }, { "epoch": 18.69608938547486, "grad_norm": 0.42460697889328003, "learning_rate": 6.579831932773108e-05, "loss": 0.5063, "step": 33466 }, { "epoch": 18.696648044692736, "grad_norm": 1.5521520376205444, "learning_rate": 6.57703081232493e-05, "loss": 0.4302, "step": 33467 }, { "epoch": 18.697206703910613, "grad_norm": 0.6736728549003601, "learning_rate": 6.574229691876751e-05, "loss": 0.376, "step": 33468 }, { "epoch": 18.697765363128493, "grad_norm": 0.5069401860237122, "learning_rate": 6.571428571428571e-05, "loss": 0.6019, "step": 33469 }, { "epoch": 18.69832402234637, "grad_norm": 0.557361900806427, "learning_rate": 6.568627450980392e-05, "loss": 0.4209, "step": 33470 }, { "epoch": 18.698882681564246, "grad_norm": 0.3788871169090271, "learning_rate": 6.565826330532213e-05, "loss": 0.4169, "step": 33471 }, { "epoch": 18.699441340782123, "grad_norm": 0.5408769845962524, "learning_rate": 6.563025210084033e-05, "loss": 0.4089, "step": 33472 }, { "epoch": 18.7, "grad_norm": 0.7183144688606262, "learning_rate": 6.560224089635855e-05, "loss": 0.3244, "step": 33473 }, { "epoch": 18.700558659217876, "grad_norm": 0.38459843397140503, "learning_rate": 6.557422969187674e-05, "loss": 0.4013, "step": 33474 }, { "epoch": 18.701117318435756, "grad_norm": 1.41618812084198, "learning_rate": 6.554621848739496e-05, "loss": 0.5249, "step": 33475 }, { "epoch": 18.701675977653633, "grad_norm": 0.6083825826644897, "learning_rate": 6.551820728291316e-05, "loss": 0.4764, "step": 33476 }, { "epoch": 18.70223463687151, "grad_norm": 0.49202781915664673, "learning_rate": 6.549019607843138e-05, "loss": 0.4582, "step": 33477 }, { "epoch": 18.702793296089386, "grad_norm": 0.3436841666698456, "learning_rate": 6.546218487394958e-05, "loss": 0.2664, "step": 33478 }, { "epoch": 18.703351955307262, "grad_norm": 0.4124920666217804, "learning_rate": 6.543417366946779e-05, "loss": 0.3201, "step": 33479 }, { "epoch": 18.70391061452514, "grad_norm": 0.45277103781700134, "learning_rate": 6.5406162464986e-05, "loss": 0.423, "step": 33480 }, { "epoch": 18.704469273743015, "grad_norm": 0.38388049602508545, "learning_rate": 6.53781512605042e-05, "loss": 0.3584, "step": 33481 }, { "epoch": 18.705027932960895, "grad_norm": 0.42084765434265137, "learning_rate": 6.53501400560224e-05, "loss": 0.3358, "step": 33482 }, { "epoch": 18.705586592178772, "grad_norm": 0.7023029327392578, "learning_rate": 6.532212885154063e-05, "loss": 0.4595, "step": 33483 }, { "epoch": 18.70614525139665, "grad_norm": 0.7741283178329468, "learning_rate": 6.529411764705882e-05, "loss": 0.4092, "step": 33484 }, { "epoch": 18.706703910614525, "grad_norm": 0.3026845455169678, "learning_rate": 6.526610644257704e-05, "loss": 0.3626, "step": 33485 }, { "epoch": 18.7072625698324, "grad_norm": 0.345252126455307, "learning_rate": 6.523809523809523e-05, "loss": 0.4136, "step": 33486 }, { "epoch": 18.70782122905028, "grad_norm": 0.5867792963981628, "learning_rate": 6.521008403361345e-05, "loss": 0.4503, "step": 33487 }, { "epoch": 18.708379888268155, "grad_norm": 0.358710914850235, "learning_rate": 6.518207282913164e-05, "loss": 0.4142, "step": 33488 }, { "epoch": 18.708938547486035, "grad_norm": 0.6675719618797302, "learning_rate": 6.515406162464986e-05, "loss": 0.2848, "step": 33489 }, { "epoch": 18.70949720670391, "grad_norm": 0.5228134989738464, "learning_rate": 6.512605042016807e-05, "loss": 0.4357, "step": 33490 }, { "epoch": 18.710055865921788, "grad_norm": 0.8309877514839172, "learning_rate": 6.509803921568627e-05, "loss": 0.3157, "step": 33491 }, { "epoch": 18.710614525139665, "grad_norm": 1.5525248050689697, "learning_rate": 6.507002801120448e-05, "loss": 0.4008, "step": 33492 }, { "epoch": 18.71117318435754, "grad_norm": 0.4749762713909149, "learning_rate": 6.504201680672269e-05, "loss": 0.4309, "step": 33493 }, { "epoch": 18.711731843575418, "grad_norm": 0.44904613494873047, "learning_rate": 6.501400560224089e-05, "loss": 0.4429, "step": 33494 }, { "epoch": 18.712290502793294, "grad_norm": 0.46362900733947754, "learning_rate": 6.498599439775911e-05, "loss": 0.4831, "step": 33495 }, { "epoch": 18.712849162011175, "grad_norm": 0.4536849558353424, "learning_rate": 6.49579831932773e-05, "loss": 0.4639, "step": 33496 }, { "epoch": 18.71340782122905, "grad_norm": 0.6675393581390381, "learning_rate": 6.492997198879552e-05, "loss": 0.3892, "step": 33497 }, { "epoch": 18.713966480446928, "grad_norm": 0.42143043875694275, "learning_rate": 6.490196078431372e-05, "loss": 0.4607, "step": 33498 }, { "epoch": 18.714525139664804, "grad_norm": 0.43682605028152466, "learning_rate": 6.487394957983193e-05, "loss": 0.4065, "step": 33499 }, { "epoch": 18.71508379888268, "grad_norm": 0.4687761664390564, "learning_rate": 6.484593837535014e-05, "loss": 0.4763, "step": 33500 }, { "epoch": 18.71508379888268, "eval_cer": 0.08476536501805723, "eval_loss": 0.3193410336971283, "eval_runtime": 55.6868, "eval_samples_per_second": 81.491, "eval_steps_per_second": 5.1, "eval_wer": 0.3354375576101226, "step": 33500 }, { "epoch": 18.715642458100557, "grad_norm": 0.6815500855445862, "learning_rate": 6.481792717086835e-05, "loss": 0.3896, "step": 33501 }, { "epoch": 18.716201117318434, "grad_norm": 1.6572506427764893, "learning_rate": 6.478991596638655e-05, "loss": 0.4983, "step": 33502 }, { "epoch": 18.716759776536314, "grad_norm": 0.5081488490104675, "learning_rate": 6.476190476190476e-05, "loss": 0.4617, "step": 33503 }, { "epoch": 18.71731843575419, "grad_norm": 0.3655071258544922, "learning_rate": 6.473389355742296e-05, "loss": 0.3722, "step": 33504 }, { "epoch": 18.717877094972067, "grad_norm": 1.954050898551941, "learning_rate": 6.470588235294118e-05, "loss": 0.3733, "step": 33505 }, { "epoch": 18.718435754189944, "grad_norm": 0.47904837131500244, "learning_rate": 6.467787114845938e-05, "loss": 0.3619, "step": 33506 }, { "epoch": 18.71899441340782, "grad_norm": 0.4262252748012543, "learning_rate": 6.46498599439776e-05, "loss": 0.4215, "step": 33507 }, { "epoch": 18.719553072625697, "grad_norm": 0.6213170289993286, "learning_rate": 6.462184873949579e-05, "loss": 0.4556, "step": 33508 }, { "epoch": 18.720111731843577, "grad_norm": 0.5783782005310059, "learning_rate": 6.459383753501401e-05, "loss": 0.4961, "step": 33509 }, { "epoch": 18.720670391061454, "grad_norm": 0.3850608468055725, "learning_rate": 6.456582633053221e-05, "loss": 0.362, "step": 33510 }, { "epoch": 18.72122905027933, "grad_norm": 0.5463806390762329, "learning_rate": 6.453781512605042e-05, "loss": 0.3582, "step": 33511 }, { "epoch": 18.721787709497207, "grad_norm": 0.45780298113822937, "learning_rate": 6.450980392156863e-05, "loss": 0.3662, "step": 33512 }, { "epoch": 18.722346368715083, "grad_norm": 1.0495574474334717, "learning_rate": 6.448179271708683e-05, "loss": 0.375, "step": 33513 }, { "epoch": 18.72290502793296, "grad_norm": 0.4512307643890381, "learning_rate": 6.445378151260504e-05, "loss": 0.3662, "step": 33514 }, { "epoch": 18.723463687150836, "grad_norm": 0.5555634498596191, "learning_rate": 6.442577030812324e-05, "loss": 0.5181, "step": 33515 }, { "epoch": 18.724022346368717, "grad_norm": 0.5169801712036133, "learning_rate": 6.439775910364145e-05, "loss": 0.3613, "step": 33516 }, { "epoch": 18.724581005586593, "grad_norm": 0.34606289863586426, "learning_rate": 6.436974789915967e-05, "loss": 0.3679, "step": 33517 }, { "epoch": 18.72513966480447, "grad_norm": 0.4272880554199219, "learning_rate": 6.434173669467786e-05, "loss": 0.3703, "step": 33518 }, { "epoch": 18.725698324022346, "grad_norm": 0.31165993213653564, "learning_rate": 6.431372549019608e-05, "loss": 0.3263, "step": 33519 }, { "epoch": 18.726256983240223, "grad_norm": 1.1470900774002075, "learning_rate": 6.428571428571427e-05, "loss": 0.4411, "step": 33520 }, { "epoch": 18.7268156424581, "grad_norm": 0.40630728006362915, "learning_rate": 6.42577030812325e-05, "loss": 0.3582, "step": 33521 }, { "epoch": 18.727374301675976, "grad_norm": 3.5836431980133057, "learning_rate": 6.42296918767507e-05, "loss": 0.4071, "step": 33522 }, { "epoch": 18.727932960893856, "grad_norm": 1.5012294054031372, "learning_rate": 6.42016806722689e-05, "loss": 0.4231, "step": 33523 }, { "epoch": 18.728491620111733, "grad_norm": 0.46891170740127563, "learning_rate": 6.417366946778711e-05, "loss": 0.5469, "step": 33524 }, { "epoch": 18.72905027932961, "grad_norm": 0.3708672523498535, "learning_rate": 6.414565826330532e-05, "loss": 0.3395, "step": 33525 }, { "epoch": 18.729608938547486, "grad_norm": 0.8656470775604248, "learning_rate": 6.411764705882352e-05, "loss": 0.4004, "step": 33526 }, { "epoch": 18.730167597765362, "grad_norm": 1.1113722324371338, "learning_rate": 6.408963585434174e-05, "loss": 0.2736, "step": 33527 }, { "epoch": 18.73072625698324, "grad_norm": 0.43879860639572144, "learning_rate": 6.406162464985994e-05, "loss": 0.3823, "step": 33528 }, { "epoch": 18.73128491620112, "grad_norm": 0.40990403294563293, "learning_rate": 6.403361344537816e-05, "loss": 0.3664, "step": 33529 }, { "epoch": 18.731843575418996, "grad_norm": 0.3509818911552429, "learning_rate": 6.400560224089635e-05, "loss": 0.3173, "step": 33530 }, { "epoch": 18.732402234636872, "grad_norm": 0.36291226744651794, "learning_rate": 6.397759103641457e-05, "loss": 0.3569, "step": 33531 }, { "epoch": 18.73296089385475, "grad_norm": 0.5059667825698853, "learning_rate": 6.394957983193279e-05, "loss": 0.3845, "step": 33532 }, { "epoch": 18.733519553072625, "grad_norm": 1.602874755859375, "learning_rate": 6.392156862745098e-05, "loss": 0.387, "step": 33533 }, { "epoch": 18.734078212290502, "grad_norm": 0.50505530834198, "learning_rate": 6.38935574229692e-05, "loss": 0.4263, "step": 33534 }, { "epoch": 18.73463687150838, "grad_norm": 0.554422914981842, "learning_rate": 6.386554621848739e-05, "loss": 0.3165, "step": 33535 }, { "epoch": 18.73519553072626, "grad_norm": 0.410466730594635, "learning_rate": 6.383753501400561e-05, "loss": 0.4478, "step": 33536 }, { "epoch": 18.735754189944135, "grad_norm": 0.4008373022079468, "learning_rate": 6.380952380952382e-05, "loss": 0.4049, "step": 33537 }, { "epoch": 18.73631284916201, "grad_norm": 0.5057961344718933, "learning_rate": 6.378151260504202e-05, "loss": 0.5317, "step": 33538 }, { "epoch": 18.73687150837989, "grad_norm": 0.8561438918113708, "learning_rate": 6.375350140056023e-05, "loss": 0.4027, "step": 33539 }, { "epoch": 18.737430167597765, "grad_norm": 0.4856656491756439, "learning_rate": 6.372549019607843e-05, "loss": 0.3458, "step": 33540 }, { "epoch": 18.73798882681564, "grad_norm": 0.31427201628685, "learning_rate": 6.369747899159664e-05, "loss": 0.4045, "step": 33541 }, { "epoch": 18.738547486033518, "grad_norm": 0.6725631952285767, "learning_rate": 6.366946778711485e-05, "loss": 0.4022, "step": 33542 }, { "epoch": 18.739106145251398, "grad_norm": 0.5081383585929871, "learning_rate": 6.364145658263305e-05, "loss": 0.3974, "step": 33543 }, { "epoch": 18.739664804469275, "grad_norm": 0.3647308051586151, "learning_rate": 6.361344537815127e-05, "loss": 0.3628, "step": 33544 }, { "epoch": 18.74022346368715, "grad_norm": 0.8962703347206116, "learning_rate": 6.358543417366946e-05, "loss": 0.5085, "step": 33545 }, { "epoch": 18.740782122905028, "grad_norm": 0.41111117601394653, "learning_rate": 6.355742296918768e-05, "loss": 0.4359, "step": 33546 }, { "epoch": 18.741340782122904, "grad_norm": 0.3825221061706543, "learning_rate": 6.352941176470588e-05, "loss": 0.4237, "step": 33547 }, { "epoch": 18.74189944134078, "grad_norm": 2.754225969314575, "learning_rate": 6.35014005602241e-05, "loss": 0.4702, "step": 33548 }, { "epoch": 18.742458100558657, "grad_norm": 0.746883749961853, "learning_rate": 6.34733893557423e-05, "loss": 0.5241, "step": 33549 }, { "epoch": 18.743016759776538, "grad_norm": 0.46698498725891113, "learning_rate": 6.344537815126051e-05, "loss": 0.4536, "step": 33550 }, { "epoch": 18.743575418994414, "grad_norm": 0.32715073227882385, "learning_rate": 6.341736694677871e-05, "loss": 0.3052, "step": 33551 }, { "epoch": 18.74413407821229, "grad_norm": 0.3255140781402588, "learning_rate": 6.338935574229692e-05, "loss": 0.4001, "step": 33552 }, { "epoch": 18.744692737430167, "grad_norm": 1.024924397468567, "learning_rate": 6.336134453781513e-05, "loss": 0.4406, "step": 33553 }, { "epoch": 18.745251396648044, "grad_norm": 0.4181256592273712, "learning_rate": 6.333333333333335e-05, "loss": 0.403, "step": 33554 }, { "epoch": 18.74581005586592, "grad_norm": 0.46969282627105713, "learning_rate": 6.330532212885154e-05, "loss": 0.4011, "step": 33555 }, { "epoch": 18.7463687150838, "grad_norm": 1.2439371347427368, "learning_rate": 6.327731092436976e-05, "loss": 0.2997, "step": 33556 }, { "epoch": 18.746927374301677, "grad_norm": 0.40725943446159363, "learning_rate": 6.324929971988795e-05, "loss": 0.2854, "step": 33557 }, { "epoch": 18.747486033519554, "grad_norm": 0.5683439373970032, "learning_rate": 6.322128851540617e-05, "loss": 0.3898, "step": 33558 }, { "epoch": 18.74804469273743, "grad_norm": 0.45918145775794983, "learning_rate": 6.319327731092438e-05, "loss": 0.3301, "step": 33559 }, { "epoch": 18.748603351955307, "grad_norm": 0.5282741785049438, "learning_rate": 6.316526610644258e-05, "loss": 0.4106, "step": 33560 }, { "epoch": 18.749162011173183, "grad_norm": 0.3408030569553375, "learning_rate": 6.313725490196079e-05, "loss": 0.3614, "step": 33561 }, { "epoch": 18.74972067039106, "grad_norm": 0.2686549723148346, "learning_rate": 6.3109243697479e-05, "loss": 0.367, "step": 33562 }, { "epoch": 18.75027932960894, "grad_norm": 0.4324513077735901, "learning_rate": 6.30812324929972e-05, "loss": 0.4271, "step": 33563 }, { "epoch": 18.750837988826817, "grad_norm": 0.44949960708618164, "learning_rate": 6.305322128851542e-05, "loss": 0.448, "step": 33564 }, { "epoch": 18.751396648044693, "grad_norm": 0.994263231754303, "learning_rate": 6.302521008403361e-05, "loss": 0.3576, "step": 33565 }, { "epoch": 18.75195530726257, "grad_norm": 1.548141598701477, "learning_rate": 6.299719887955183e-05, "loss": 0.491, "step": 33566 }, { "epoch": 18.752513966480446, "grad_norm": 1.3650633096694946, "learning_rate": 6.296918767507002e-05, "loss": 0.4545, "step": 33567 }, { "epoch": 18.753072625698323, "grad_norm": 0.41302624344825745, "learning_rate": 6.294117647058824e-05, "loss": 0.4181, "step": 33568 }, { "epoch": 18.7536312849162, "grad_norm": 0.661688506603241, "learning_rate": 6.291316526610645e-05, "loss": 0.4612, "step": 33569 }, { "epoch": 18.75418994413408, "grad_norm": 0.4149094820022583, "learning_rate": 6.288515406162466e-05, "loss": 0.3798, "step": 33570 }, { "epoch": 18.754748603351956, "grad_norm": 1.880969762802124, "learning_rate": 6.285714285714286e-05, "loss": 0.3609, "step": 33571 }, { "epoch": 18.755307262569833, "grad_norm": 0.4125847816467285, "learning_rate": 6.282913165266107e-05, "loss": 0.4451, "step": 33572 }, { "epoch": 18.75586592178771, "grad_norm": 0.4462127685546875, "learning_rate": 6.280112044817927e-05, "loss": 0.3838, "step": 33573 }, { "epoch": 18.756424581005586, "grad_norm": 0.4350358545780182, "learning_rate": 6.277310924369748e-05, "loss": 0.3056, "step": 33574 }, { "epoch": 18.756983240223462, "grad_norm": 0.36763831973075867, "learning_rate": 6.274509803921569e-05, "loss": 0.3435, "step": 33575 }, { "epoch": 18.757541899441343, "grad_norm": 0.5017610192298889, "learning_rate": 6.27170868347339e-05, "loss": 0.3366, "step": 33576 }, { "epoch": 18.75810055865922, "grad_norm": 1.6766163110733032, "learning_rate": 6.26890756302521e-05, "loss": 0.398, "step": 33577 }, { "epoch": 18.758659217877096, "grad_norm": 0.9491270780563354, "learning_rate": 6.266106442577032e-05, "loss": 0.333, "step": 33578 }, { "epoch": 18.759217877094972, "grad_norm": 0.3896239101886749, "learning_rate": 6.263305322128851e-05, "loss": 0.326, "step": 33579 }, { "epoch": 18.75977653631285, "grad_norm": 0.8490318655967712, "learning_rate": 6.260504201680673e-05, "loss": 0.3519, "step": 33580 }, { "epoch": 18.760335195530725, "grad_norm": 0.5182773470878601, "learning_rate": 6.257703081232493e-05, "loss": 0.4598, "step": 33581 }, { "epoch": 18.760893854748602, "grad_norm": 0.3434610366821289, "learning_rate": 6.254901960784314e-05, "loss": 0.3383, "step": 33582 }, { "epoch": 18.761452513966482, "grad_norm": 1.0617681741714478, "learning_rate": 6.252100840336135e-05, "loss": 0.4286, "step": 33583 }, { "epoch": 18.76201117318436, "grad_norm": 0.8387086391448975, "learning_rate": 6.249299719887955e-05, "loss": 0.4394, "step": 33584 }, { "epoch": 18.762569832402235, "grad_norm": 0.40427055954933167, "learning_rate": 6.246498599439776e-05, "loss": 0.3566, "step": 33585 }, { "epoch": 18.76312849162011, "grad_norm": 0.3702031970024109, "learning_rate": 6.243697478991596e-05, "loss": 0.3836, "step": 33586 }, { "epoch": 18.76368715083799, "grad_norm": 0.29070088267326355, "learning_rate": 6.240896358543417e-05, "loss": 0.3017, "step": 33587 }, { "epoch": 18.764245810055865, "grad_norm": 0.38933902978897095, "learning_rate": 6.238095238095239e-05, "loss": 0.3978, "step": 33588 }, { "epoch": 18.76480446927374, "grad_norm": 0.470816045999527, "learning_rate": 6.23529411764706e-05, "loss": 0.3778, "step": 33589 }, { "epoch": 18.76536312849162, "grad_norm": 0.4901551604270935, "learning_rate": 6.23249299719888e-05, "loss": 0.3911, "step": 33590 }, { "epoch": 18.765921787709498, "grad_norm": 0.3814218044281006, "learning_rate": 6.229691876750701e-05, "loss": 0.3432, "step": 33591 }, { "epoch": 18.766480446927375, "grad_norm": 0.6107118129730225, "learning_rate": 6.226890756302521e-05, "loss": 0.6505, "step": 33592 }, { "epoch": 18.76703910614525, "grad_norm": 0.6372637748718262, "learning_rate": 6.224089635854342e-05, "loss": 0.4389, "step": 33593 }, { "epoch": 18.767597765363128, "grad_norm": 0.5028481483459473, "learning_rate": 6.221288515406163e-05, "loss": 0.4854, "step": 33594 }, { "epoch": 18.768156424581004, "grad_norm": 0.3836221694946289, "learning_rate": 6.218487394957983e-05, "loss": 0.4245, "step": 33595 }, { "epoch": 18.76871508379888, "grad_norm": 0.503173291683197, "learning_rate": 6.215686274509804e-05, "loss": 0.3976, "step": 33596 }, { "epoch": 18.76927374301676, "grad_norm": 0.6945926547050476, "learning_rate": 6.212885154061624e-05, "loss": 0.5094, "step": 33597 }, { "epoch": 18.769832402234638, "grad_norm": 0.4380642771720886, "learning_rate": 6.210084033613445e-05, "loss": 0.4273, "step": 33598 }, { "epoch": 18.770391061452514, "grad_norm": 1.0532293319702148, "learning_rate": 6.207282913165267e-05, "loss": 0.3966, "step": 33599 }, { "epoch": 18.77094972067039, "grad_norm": 0.303750216960907, "learning_rate": 6.204481792717088e-05, "loss": 0.3466, "step": 33600 }, { "epoch": 18.771508379888267, "grad_norm": 0.38815316557884216, "learning_rate": 6.201680672268908e-05, "loss": 0.4295, "step": 33601 }, { "epoch": 18.772067039106144, "grad_norm": 0.48118990659713745, "learning_rate": 6.198879551820729e-05, "loss": 0.5432, "step": 33602 }, { "epoch": 18.772625698324024, "grad_norm": 0.7466734647750854, "learning_rate": 6.19607843137255e-05, "loss": 0.3938, "step": 33603 }, { "epoch": 18.7731843575419, "grad_norm": 0.7467971444129944, "learning_rate": 6.19327731092437e-05, "loss": 0.4443, "step": 33604 }, { "epoch": 18.773743016759777, "grad_norm": 0.6385853290557861, "learning_rate": 6.19047619047619e-05, "loss": 0.3208, "step": 33605 }, { "epoch": 18.774301675977654, "grad_norm": 0.5847535729408264, "learning_rate": 6.187675070028011e-05, "loss": 0.4505, "step": 33606 }, { "epoch": 18.77486033519553, "grad_norm": 0.6122723817825317, "learning_rate": 6.184873949579832e-05, "loss": 0.4491, "step": 33607 }, { "epoch": 18.775418994413407, "grad_norm": 0.37647995352745056, "learning_rate": 6.182072829131652e-05, "loss": 0.4095, "step": 33608 }, { "epoch": 18.775977653631283, "grad_norm": 0.5081083178520203, "learning_rate": 6.179271708683473e-05, "loss": 0.3845, "step": 33609 }, { "epoch": 18.776536312849164, "grad_norm": 0.8722805380821228, "learning_rate": 6.176470588235295e-05, "loss": 0.2995, "step": 33610 }, { "epoch": 18.77709497206704, "grad_norm": 0.4157169759273529, "learning_rate": 6.173669467787116e-05, "loss": 0.4648, "step": 33611 }, { "epoch": 18.777653631284917, "grad_norm": 0.49111250042915344, "learning_rate": 6.170868347338936e-05, "loss": 0.3866, "step": 33612 }, { "epoch": 18.778212290502793, "grad_norm": 0.4404752552509308, "learning_rate": 6.168067226890757e-05, "loss": 0.4771, "step": 33613 }, { "epoch": 18.77877094972067, "grad_norm": 0.8342339396476746, "learning_rate": 6.165266106442577e-05, "loss": 0.4572, "step": 33614 }, { "epoch": 18.779329608938546, "grad_norm": 0.4370078146457672, "learning_rate": 6.162464985994398e-05, "loss": 0.4462, "step": 33615 }, { "epoch": 18.779888268156423, "grad_norm": 0.8758747577667236, "learning_rate": 6.159663865546219e-05, "loss": 0.4322, "step": 33616 }, { "epoch": 18.780446927374303, "grad_norm": 0.37110209465026855, "learning_rate": 6.156862745098039e-05, "loss": 0.3856, "step": 33617 }, { "epoch": 18.78100558659218, "grad_norm": 0.3819989562034607, "learning_rate": 6.15406162464986e-05, "loss": 0.3869, "step": 33618 }, { "epoch": 18.781564245810056, "grad_norm": 0.41110458970069885, "learning_rate": 6.15126050420168e-05, "loss": 0.3263, "step": 33619 }, { "epoch": 18.782122905027933, "grad_norm": 0.37413454055786133, "learning_rate": 6.148459383753501e-05, "loss": 0.4122, "step": 33620 }, { "epoch": 18.78268156424581, "grad_norm": 0.4960332214832306, "learning_rate": 6.145658263305323e-05, "loss": 0.383, "step": 33621 }, { "epoch": 18.783240223463686, "grad_norm": 0.30806565284729004, "learning_rate": 6.142857142857143e-05, "loss": 0.3718, "step": 33622 }, { "epoch": 18.783798882681566, "grad_norm": 0.3246197998523712, "learning_rate": 6.140056022408964e-05, "loss": 0.3301, "step": 33623 }, { "epoch": 18.784357541899443, "grad_norm": 0.35392487049102783, "learning_rate": 6.137254901960785e-05, "loss": 0.2859, "step": 33624 }, { "epoch": 18.78491620111732, "grad_norm": 0.44571053981781006, "learning_rate": 6.134453781512605e-05, "loss": 0.3502, "step": 33625 }, { "epoch": 18.785474860335196, "grad_norm": 0.4495675265789032, "learning_rate": 6.131652661064426e-05, "loss": 0.4848, "step": 33626 }, { "epoch": 18.786033519553072, "grad_norm": 1.0844656229019165, "learning_rate": 6.128851540616246e-05, "loss": 0.3464, "step": 33627 }, { "epoch": 18.78659217877095, "grad_norm": 0.5044671297073364, "learning_rate": 6.126050420168067e-05, "loss": 0.4682, "step": 33628 }, { "epoch": 18.787150837988825, "grad_norm": 0.8576611280441284, "learning_rate": 6.123249299719888e-05, "loss": 0.3665, "step": 33629 }, { "epoch": 18.787709497206706, "grad_norm": 0.518936038017273, "learning_rate": 6.120448179271708e-05, "loss": 0.4651, "step": 33630 }, { "epoch": 18.788268156424582, "grad_norm": 0.3987240493297577, "learning_rate": 6.11764705882353e-05, "loss": 0.3649, "step": 33631 }, { "epoch": 18.78882681564246, "grad_norm": 4.616729736328125, "learning_rate": 6.114845938375351e-05, "loss": 0.398, "step": 33632 }, { "epoch": 18.789385474860335, "grad_norm": 0.5554696321487427, "learning_rate": 6.112044817927171e-05, "loss": 0.4504, "step": 33633 }, { "epoch": 18.789944134078212, "grad_norm": 0.47344985604286194, "learning_rate": 6.109243697478992e-05, "loss": 0.3926, "step": 33634 }, { "epoch": 18.79050279329609, "grad_norm": 0.5345861315727234, "learning_rate": 6.106442577030813e-05, "loss": 0.4997, "step": 33635 }, { "epoch": 18.791061452513965, "grad_norm": 0.4340493381023407, "learning_rate": 6.103641456582633e-05, "loss": 0.3127, "step": 33636 }, { "epoch": 18.791620111731845, "grad_norm": 0.4726484417915344, "learning_rate": 6.100840336134454e-05, "loss": 0.3954, "step": 33637 }, { "epoch": 18.79217877094972, "grad_norm": 0.4120994806289673, "learning_rate": 6.0980392156862744e-05, "loss": 0.4294, "step": 33638 }, { "epoch": 18.7927374301676, "grad_norm": 0.4894300699234009, "learning_rate": 6.095238095238095e-05, "loss": 0.4215, "step": 33639 }, { "epoch": 18.793296089385475, "grad_norm": 0.5503779053688049, "learning_rate": 6.0924369747899156e-05, "loss": 0.4988, "step": 33640 }, { "epoch": 18.79385474860335, "grad_norm": 6.059313774108887, "learning_rate": 6.089635854341736e-05, "loss": 0.478, "step": 33641 }, { "epoch": 18.794413407821228, "grad_norm": 1.0625801086425781, "learning_rate": 6.086834733893558e-05, "loss": 0.3867, "step": 33642 }, { "epoch": 18.794972067039105, "grad_norm": 0.46304410696029663, "learning_rate": 6.084033613445379e-05, "loss": 0.4384, "step": 33643 }, { "epoch": 18.795530726256985, "grad_norm": 0.8256794214248657, "learning_rate": 6.0812324929971994e-05, "loss": 0.6431, "step": 33644 }, { "epoch": 18.79608938547486, "grad_norm": 0.39776909351348877, "learning_rate": 6.07843137254902e-05, "loss": 0.3513, "step": 33645 }, { "epoch": 18.796648044692738, "grad_norm": 0.40414267778396606, "learning_rate": 6.0756302521008406e-05, "loss": 0.3771, "step": 33646 }, { "epoch": 18.797206703910614, "grad_norm": 0.5518737435340881, "learning_rate": 6.072829131652661e-05, "loss": 0.3924, "step": 33647 }, { "epoch": 18.79776536312849, "grad_norm": 0.3767162561416626, "learning_rate": 6.0700280112044825e-05, "loss": 0.4086, "step": 33648 }, { "epoch": 18.798324022346367, "grad_norm": 0.4354788064956665, "learning_rate": 6.067226890756303e-05, "loss": 0.416, "step": 33649 }, { "epoch": 18.798882681564244, "grad_norm": 0.4227149188518524, "learning_rate": 6.0644257703081237e-05, "loss": 0.3765, "step": 33650 }, { "epoch": 18.799441340782124, "grad_norm": 0.48564037680625916, "learning_rate": 6.061624649859944e-05, "loss": 0.3945, "step": 33651 }, { "epoch": 18.8, "grad_norm": 0.44414955377578735, "learning_rate": 6.058823529411765e-05, "loss": 0.3253, "step": 33652 }, { "epoch": 18.800558659217877, "grad_norm": 0.5386155843734741, "learning_rate": 6.056022408963586e-05, "loss": 0.4393, "step": 33653 }, { "epoch": 18.801117318435754, "grad_norm": 0.3885471224784851, "learning_rate": 6.053221288515407e-05, "loss": 0.4009, "step": 33654 }, { "epoch": 18.80167597765363, "grad_norm": 0.642909049987793, "learning_rate": 6.050420168067227e-05, "loss": 0.4884, "step": 33655 }, { "epoch": 18.802234636871507, "grad_norm": 0.5241016745567322, "learning_rate": 6.047619047619048e-05, "loss": 0.41, "step": 33656 }, { "epoch": 18.802793296089387, "grad_norm": 0.43042615056037903, "learning_rate": 6.0448179271708685e-05, "loss": 0.4248, "step": 33657 }, { "epoch": 18.803351955307264, "grad_norm": 0.3907735347747803, "learning_rate": 6.04201680672269e-05, "loss": 0.3917, "step": 33658 }, { "epoch": 18.80391061452514, "grad_norm": 0.3452138602733612, "learning_rate": 6.0392156862745104e-05, "loss": 0.3919, "step": 33659 }, { "epoch": 18.804469273743017, "grad_norm": 0.3378983736038208, "learning_rate": 6.036414565826331e-05, "loss": 0.3564, "step": 33660 }, { "epoch": 18.805027932960893, "grad_norm": 0.3364536762237549, "learning_rate": 6.0336134453781516e-05, "loss": 0.352, "step": 33661 }, { "epoch": 18.80558659217877, "grad_norm": 0.41994136571884155, "learning_rate": 6.030812324929972e-05, "loss": 0.4406, "step": 33662 }, { "epoch": 18.806145251396647, "grad_norm": 1.952738642692566, "learning_rate": 6.028011204481793e-05, "loss": 0.4115, "step": 33663 }, { "epoch": 18.806703910614527, "grad_norm": 0.33895957469940186, "learning_rate": 6.025210084033614e-05, "loss": 0.341, "step": 33664 }, { "epoch": 18.807262569832403, "grad_norm": 3.4712064266204834, "learning_rate": 6.022408963585435e-05, "loss": 0.4661, "step": 33665 }, { "epoch": 18.80782122905028, "grad_norm": 0.48126447200775146, "learning_rate": 6.019607843137255e-05, "loss": 0.3822, "step": 33666 }, { "epoch": 18.808379888268156, "grad_norm": 0.4363022446632385, "learning_rate": 6.016806722689076e-05, "loss": 0.355, "step": 33667 }, { "epoch": 18.808938547486033, "grad_norm": 0.42777401208877563, "learning_rate": 6.0140056022408965e-05, "loss": 0.464, "step": 33668 }, { "epoch": 18.80949720670391, "grad_norm": 0.6931414008140564, "learning_rate": 6.011204481792718e-05, "loss": 0.3896, "step": 33669 }, { "epoch": 18.810055865921786, "grad_norm": 0.4182848334312439, "learning_rate": 6.0084033613445384e-05, "loss": 0.4577, "step": 33670 }, { "epoch": 18.810614525139666, "grad_norm": 0.43032410740852356, "learning_rate": 6.005602240896359e-05, "loss": 0.4036, "step": 33671 }, { "epoch": 18.811173184357543, "grad_norm": 0.43364471197128296, "learning_rate": 6.0028011204481796e-05, "loss": 0.382, "step": 33672 }, { "epoch": 18.81173184357542, "grad_norm": 0.5476785898208618, "learning_rate": 6e-05, "loss": 0.4643, "step": 33673 }, { "epoch": 18.812290502793296, "grad_norm": 0.3527085781097412, "learning_rate": 5.9971988795518214e-05, "loss": 0.3618, "step": 33674 }, { "epoch": 18.812849162011172, "grad_norm": 0.3852047622203827, "learning_rate": 5.994397759103642e-05, "loss": 0.3997, "step": 33675 }, { "epoch": 18.81340782122905, "grad_norm": 0.4716630280017853, "learning_rate": 5.9915966386554626e-05, "loss": 0.347, "step": 33676 }, { "epoch": 18.81396648044693, "grad_norm": 0.5135959386825562, "learning_rate": 5.988795518207283e-05, "loss": 0.4655, "step": 33677 }, { "epoch": 18.814525139664806, "grad_norm": 0.40920212864875793, "learning_rate": 5.985994397759104e-05, "loss": 0.4981, "step": 33678 }, { "epoch": 18.815083798882682, "grad_norm": 0.38840556144714355, "learning_rate": 5.9831932773109244e-05, "loss": 0.3182, "step": 33679 }, { "epoch": 18.81564245810056, "grad_norm": 0.4323107898235321, "learning_rate": 5.980392156862746e-05, "loss": 0.4632, "step": 33680 }, { "epoch": 18.816201117318435, "grad_norm": 0.4154958426952362, "learning_rate": 5.977591036414566e-05, "loss": 0.4092, "step": 33681 }, { "epoch": 18.816759776536312, "grad_norm": 0.5845361351966858, "learning_rate": 5.974789915966387e-05, "loss": 0.4858, "step": 33682 }, { "epoch": 18.81731843575419, "grad_norm": 0.5577226281166077, "learning_rate": 5.9719887955182075e-05, "loss": 0.4394, "step": 33683 }, { "epoch": 18.81787709497207, "grad_norm": 0.7876600623130798, "learning_rate": 5.969187675070028e-05, "loss": 0.4335, "step": 33684 }, { "epoch": 18.818435754189945, "grad_norm": 1.0263603925704956, "learning_rate": 5.9663865546218494e-05, "loss": 0.375, "step": 33685 }, { "epoch": 18.81899441340782, "grad_norm": 0.9165291786193848, "learning_rate": 5.96358543417367e-05, "loss": 0.3378, "step": 33686 }, { "epoch": 18.8195530726257, "grad_norm": 0.9924935698509216, "learning_rate": 5.9607843137254906e-05, "loss": 0.3996, "step": 33687 }, { "epoch": 18.820111731843575, "grad_norm": 0.48136764764785767, "learning_rate": 5.957983193277311e-05, "loss": 0.5362, "step": 33688 }, { "epoch": 18.82067039106145, "grad_norm": 0.39025384187698364, "learning_rate": 5.955182072829132e-05, "loss": 0.3445, "step": 33689 }, { "epoch": 18.821229050279328, "grad_norm": 0.4123421609401703, "learning_rate": 5.9523809523809524e-05, "loss": 0.4086, "step": 33690 }, { "epoch": 18.821787709497208, "grad_norm": 0.36732372641563416, "learning_rate": 5.9495798319327737e-05, "loss": 0.3554, "step": 33691 }, { "epoch": 18.822346368715085, "grad_norm": 0.4214653968811035, "learning_rate": 5.946778711484594e-05, "loss": 0.4356, "step": 33692 }, { "epoch": 18.82290502793296, "grad_norm": 0.6310732960700989, "learning_rate": 5.943977591036415e-05, "loss": 0.5798, "step": 33693 }, { "epoch": 18.823463687150838, "grad_norm": 0.4226304888725281, "learning_rate": 5.9411764705882355e-05, "loss": 0.4193, "step": 33694 }, { "epoch": 18.824022346368714, "grad_norm": 0.4110008776187897, "learning_rate": 5.938375350140056e-05, "loss": 0.5657, "step": 33695 }, { "epoch": 18.82458100558659, "grad_norm": 0.33921244740486145, "learning_rate": 5.935574229691877e-05, "loss": 0.324, "step": 33696 }, { "epoch": 18.825139664804468, "grad_norm": 0.7263866662979126, "learning_rate": 5.932773109243698e-05, "loss": 0.5492, "step": 33697 }, { "epoch": 18.825698324022348, "grad_norm": 0.4138749837875366, "learning_rate": 5.9299719887955185e-05, "loss": 0.3825, "step": 33698 }, { "epoch": 18.826256983240224, "grad_norm": 1.0773077011108398, "learning_rate": 5.927170868347339e-05, "loss": 0.4271, "step": 33699 }, { "epoch": 18.8268156424581, "grad_norm": 1.0225144624710083, "learning_rate": 5.92436974789916e-05, "loss": 0.469, "step": 33700 }, { "epoch": 18.827374301675977, "grad_norm": 0.5519649982452393, "learning_rate": 5.921568627450981e-05, "loss": 0.5048, "step": 33701 }, { "epoch": 18.827932960893854, "grad_norm": 0.4285273849964142, "learning_rate": 5.9187675070028016e-05, "loss": 0.3719, "step": 33702 }, { "epoch": 18.82849162011173, "grad_norm": 0.3183865547180176, "learning_rate": 5.915966386554622e-05, "loss": 0.3398, "step": 33703 }, { "epoch": 18.82905027932961, "grad_norm": 0.35934627056121826, "learning_rate": 5.913165266106443e-05, "loss": 0.2945, "step": 33704 }, { "epoch": 18.829608938547487, "grad_norm": 0.3428328037261963, "learning_rate": 5.9103641456582634e-05, "loss": 0.4252, "step": 33705 }, { "epoch": 18.830167597765364, "grad_norm": 0.399688720703125, "learning_rate": 5.907563025210084e-05, "loss": 0.3838, "step": 33706 }, { "epoch": 18.83072625698324, "grad_norm": 0.43299439549446106, "learning_rate": 5.904761904761905e-05, "loss": 0.5022, "step": 33707 }, { "epoch": 18.831284916201117, "grad_norm": 0.5337679982185364, "learning_rate": 5.901960784313726e-05, "loss": 0.3524, "step": 33708 }, { "epoch": 18.831843575418993, "grad_norm": 0.5172250866889954, "learning_rate": 5.8991596638655465e-05, "loss": 0.3645, "step": 33709 }, { "epoch": 18.83240223463687, "grad_norm": 0.4401620924472809, "learning_rate": 5.896358543417367e-05, "loss": 0.3656, "step": 33710 }, { "epoch": 18.83296089385475, "grad_norm": 0.4201304018497467, "learning_rate": 5.893557422969188e-05, "loss": 0.4456, "step": 33711 }, { "epoch": 18.833519553072627, "grad_norm": 0.48590970039367676, "learning_rate": 5.890756302521009e-05, "loss": 0.4417, "step": 33712 }, { "epoch": 18.834078212290503, "grad_norm": 0.40995311737060547, "learning_rate": 5.8879551820728296e-05, "loss": 0.3401, "step": 33713 }, { "epoch": 18.83463687150838, "grad_norm": 1.6525832414627075, "learning_rate": 5.88515406162465e-05, "loss": 0.3963, "step": 33714 }, { "epoch": 18.835195530726256, "grad_norm": 0.6423236727714539, "learning_rate": 5.882352941176471e-05, "loss": 0.5462, "step": 33715 }, { "epoch": 18.835754189944133, "grad_norm": 0.39395588636398315, "learning_rate": 5.8795518207282914e-05, "loss": 0.4129, "step": 33716 }, { "epoch": 18.83631284916201, "grad_norm": 0.3613419234752655, "learning_rate": 5.876750700280112e-05, "loss": 0.2915, "step": 33717 }, { "epoch": 18.83687150837989, "grad_norm": 0.5271963477134705, "learning_rate": 5.873949579831933e-05, "loss": 0.4528, "step": 33718 }, { "epoch": 18.837430167597766, "grad_norm": 0.37885165214538574, "learning_rate": 5.871148459383754e-05, "loss": 0.3127, "step": 33719 }, { "epoch": 18.837988826815643, "grad_norm": 0.6319796442985535, "learning_rate": 5.8683473389355744e-05, "loss": 0.4789, "step": 33720 }, { "epoch": 18.83854748603352, "grad_norm": 0.725278913974762, "learning_rate": 5.865546218487395e-05, "loss": 0.4643, "step": 33721 }, { "epoch": 18.839106145251396, "grad_norm": 0.3575795292854309, "learning_rate": 5.8627450980392156e-05, "loss": 0.3441, "step": 33722 }, { "epoch": 18.839664804469272, "grad_norm": 0.4634858965873718, "learning_rate": 5.859943977591037e-05, "loss": 0.3861, "step": 33723 }, { "epoch": 18.840223463687153, "grad_norm": 0.3979707956314087, "learning_rate": 5.8571428571428575e-05, "loss": 0.3607, "step": 33724 }, { "epoch": 18.84078212290503, "grad_norm": 0.44715383648872375, "learning_rate": 5.854341736694678e-05, "loss": 0.5378, "step": 33725 }, { "epoch": 18.841340782122906, "grad_norm": 0.5906356573104858, "learning_rate": 5.851540616246499e-05, "loss": 0.3927, "step": 33726 }, { "epoch": 18.841899441340782, "grad_norm": 1.4931284189224243, "learning_rate": 5.848739495798319e-05, "loss": 0.4052, "step": 33727 }, { "epoch": 18.84245810055866, "grad_norm": 0.6978968977928162, "learning_rate": 5.8459383753501406e-05, "loss": 0.3972, "step": 33728 }, { "epoch": 18.843016759776535, "grad_norm": 0.4317839741706848, "learning_rate": 5.843137254901961e-05, "loss": 0.3438, "step": 33729 }, { "epoch": 18.843575418994412, "grad_norm": 0.481693834066391, "learning_rate": 5.840336134453782e-05, "loss": 0.4343, "step": 33730 }, { "epoch": 18.844134078212292, "grad_norm": 0.4739903211593628, "learning_rate": 5.8375350140056024e-05, "loss": 0.4162, "step": 33731 }, { "epoch": 18.84469273743017, "grad_norm": 0.37906476855278015, "learning_rate": 5.834733893557423e-05, "loss": 0.4502, "step": 33732 }, { "epoch": 18.845251396648045, "grad_norm": 0.48904070258140564, "learning_rate": 5.8319327731092436e-05, "loss": 0.4318, "step": 33733 }, { "epoch": 18.845810055865922, "grad_norm": 0.4569445848464966, "learning_rate": 5.829131652661065e-05, "loss": 0.4916, "step": 33734 }, { "epoch": 18.8463687150838, "grad_norm": 0.3764616847038269, "learning_rate": 5.8263305322128855e-05, "loss": 0.4485, "step": 33735 }, { "epoch": 18.846927374301675, "grad_norm": 0.5238183736801147, "learning_rate": 5.823529411764706e-05, "loss": 0.4701, "step": 33736 }, { "epoch": 18.84748603351955, "grad_norm": 0.46046218276023865, "learning_rate": 5.8207282913165266e-05, "loss": 0.5287, "step": 33737 }, { "epoch": 18.84804469273743, "grad_norm": 0.4134523868560791, "learning_rate": 5.817927170868347e-05, "loss": 0.3618, "step": 33738 }, { "epoch": 18.84860335195531, "grad_norm": 0.3827623128890991, "learning_rate": 5.8151260504201685e-05, "loss": 0.3278, "step": 33739 }, { "epoch": 18.849162011173185, "grad_norm": 0.5377877354621887, "learning_rate": 5.812324929971989e-05, "loss": 0.53, "step": 33740 }, { "epoch": 18.84972067039106, "grad_norm": 1.424222707748413, "learning_rate": 5.80952380952381e-05, "loss": 0.4046, "step": 33741 }, { "epoch": 18.850279329608938, "grad_norm": 3.638166666030884, "learning_rate": 5.80672268907563e-05, "loss": 0.4394, "step": 33742 }, { "epoch": 18.850837988826814, "grad_norm": 0.4418627619743347, "learning_rate": 5.803921568627451e-05, "loss": 0.4245, "step": 33743 }, { "epoch": 18.85139664804469, "grad_norm": 0.34589412808418274, "learning_rate": 5.801120448179272e-05, "loss": 0.3761, "step": 33744 }, { "epoch": 18.85195530726257, "grad_norm": 0.4258033037185669, "learning_rate": 5.798319327731093e-05, "loss": 0.4272, "step": 33745 }, { "epoch": 18.852513966480448, "grad_norm": 0.4996253550052643, "learning_rate": 5.7955182072829134e-05, "loss": 0.4417, "step": 33746 }, { "epoch": 18.853072625698324, "grad_norm": 0.3439030051231384, "learning_rate": 5.792717086834734e-05, "loss": 0.3546, "step": 33747 }, { "epoch": 18.8536312849162, "grad_norm": 0.4623798429965973, "learning_rate": 5.7899159663865546e-05, "loss": 0.4037, "step": 33748 }, { "epoch": 18.854189944134077, "grad_norm": 0.6681358814239502, "learning_rate": 5.787114845938375e-05, "loss": 0.4476, "step": 33749 }, { "epoch": 18.854748603351954, "grad_norm": 0.5658302903175354, "learning_rate": 5.7843137254901965e-05, "loss": 0.364, "step": 33750 }, { "epoch": 18.85530726256983, "grad_norm": 0.8510394096374512, "learning_rate": 5.781512605042017e-05, "loss": 0.3053, "step": 33751 }, { "epoch": 18.85586592178771, "grad_norm": 0.8178472518920898, "learning_rate": 5.778711484593838e-05, "loss": 0.4575, "step": 33752 }, { "epoch": 18.856424581005587, "grad_norm": 0.4230727255344391, "learning_rate": 5.775910364145658e-05, "loss": 0.5357, "step": 33753 }, { "epoch": 18.856983240223464, "grad_norm": 1.4039816856384277, "learning_rate": 5.773109243697479e-05, "loss": 0.3991, "step": 33754 }, { "epoch": 18.85754189944134, "grad_norm": 0.48048731684684753, "learning_rate": 5.7703081232493e-05, "loss": 0.4447, "step": 33755 }, { "epoch": 18.858100558659217, "grad_norm": 0.5927854180335999, "learning_rate": 5.767507002801121e-05, "loss": 0.328, "step": 33756 }, { "epoch": 18.858659217877094, "grad_norm": 0.4228411316871643, "learning_rate": 5.7647058823529413e-05, "loss": 0.3425, "step": 33757 }, { "epoch": 18.859217877094974, "grad_norm": 0.622751772403717, "learning_rate": 5.761904761904762e-05, "loss": 0.3729, "step": 33758 }, { "epoch": 18.85977653631285, "grad_norm": 1.3569228649139404, "learning_rate": 5.7591036414565825e-05, "loss": 0.4215, "step": 33759 }, { "epoch": 18.860335195530727, "grad_norm": 1.6811857223510742, "learning_rate": 5.756302521008403e-05, "loss": 0.4857, "step": 33760 }, { "epoch": 18.860893854748603, "grad_norm": 0.5531189441680908, "learning_rate": 5.7535014005602244e-05, "loss": 0.3257, "step": 33761 }, { "epoch": 18.86145251396648, "grad_norm": 0.4516947865486145, "learning_rate": 5.750700280112045e-05, "loss": 0.2824, "step": 33762 }, { "epoch": 18.862011173184356, "grad_norm": 0.5208219289779663, "learning_rate": 5.7478991596638656e-05, "loss": 0.4062, "step": 33763 }, { "epoch": 18.862569832402233, "grad_norm": 0.5304266214370728, "learning_rate": 5.745098039215686e-05, "loss": 0.4474, "step": 33764 }, { "epoch": 18.863128491620113, "grad_norm": 0.3732846975326538, "learning_rate": 5.742296918767507e-05, "loss": 0.441, "step": 33765 }, { "epoch": 18.86368715083799, "grad_norm": 0.6364693641662598, "learning_rate": 5.739495798319328e-05, "loss": 0.401, "step": 33766 }, { "epoch": 18.864245810055866, "grad_norm": 0.36977869272232056, "learning_rate": 5.736694677871149e-05, "loss": 0.3707, "step": 33767 }, { "epoch": 18.864804469273743, "grad_norm": 0.3811185657978058, "learning_rate": 5.733893557422969e-05, "loss": 0.3291, "step": 33768 }, { "epoch": 18.86536312849162, "grad_norm": 0.40819332003593445, "learning_rate": 5.73109243697479e-05, "loss": 0.3453, "step": 33769 }, { "epoch": 18.865921787709496, "grad_norm": 0.6222102642059326, "learning_rate": 5.7282913165266105e-05, "loss": 0.5234, "step": 33770 }, { "epoch": 18.866480446927373, "grad_norm": Infinity, "learning_rate": 5.7282913165266105e-05, "loss": 0.4241, "step": 33771 }, { "epoch": 18.867039106145253, "grad_norm": 0.46164900064468384, "learning_rate": 5.725490196078432e-05, "loss": 0.4707, "step": 33772 }, { "epoch": 18.86759776536313, "grad_norm": 1.1930509805679321, "learning_rate": 5.7226890756302524e-05, "loss": 0.4401, "step": 33773 }, { "epoch": 18.868156424581006, "grad_norm": 0.49368709325790405, "learning_rate": 5.719887955182073e-05, "loss": 0.4313, "step": 33774 }, { "epoch": 18.868715083798882, "grad_norm": 0.4615408480167389, "learning_rate": 5.7170868347338936e-05, "loss": 0.4023, "step": 33775 }, { "epoch": 18.86927374301676, "grad_norm": 0.6049171686172485, "learning_rate": 5.714285714285714e-05, "loss": 0.3122, "step": 33776 }, { "epoch": 18.869832402234636, "grad_norm": 0.4467255175113678, "learning_rate": 5.711484593837535e-05, "loss": 0.413, "step": 33777 }, { "epoch": 18.870391061452516, "grad_norm": 1.2062095403671265, "learning_rate": 5.708683473389356e-05, "loss": 0.2946, "step": 33778 }, { "epoch": 18.870949720670392, "grad_norm": 0.5447296500205994, "learning_rate": 5.7058823529411766e-05, "loss": 0.377, "step": 33779 }, { "epoch": 18.87150837988827, "grad_norm": 0.6711100339889526, "learning_rate": 5.703081232492997e-05, "loss": 0.3874, "step": 33780 }, { "epoch": 18.872067039106145, "grad_norm": 0.5505905151367188, "learning_rate": 5.700280112044818e-05, "loss": 0.4287, "step": 33781 }, { "epoch": 18.872625698324022, "grad_norm": 0.43074774742126465, "learning_rate": 5.6974789915966384e-05, "loss": 0.3675, "step": 33782 }, { "epoch": 18.8731843575419, "grad_norm": 0.9471970200538635, "learning_rate": 5.69467787114846e-05, "loss": 0.6955, "step": 33783 }, { "epoch": 18.873743016759775, "grad_norm": 4.350872993469238, "learning_rate": 5.69187675070028e-05, "loss": 0.3992, "step": 33784 }, { "epoch": 18.874301675977655, "grad_norm": 0.943796694278717, "learning_rate": 5.689075630252101e-05, "loss": 0.3821, "step": 33785 }, { "epoch": 18.87486033519553, "grad_norm": 0.4437604248523712, "learning_rate": 5.6862745098039215e-05, "loss": 0.4595, "step": 33786 }, { "epoch": 18.87541899441341, "grad_norm": 0.34584903717041016, "learning_rate": 5.683473389355742e-05, "loss": 0.3689, "step": 33787 }, { "epoch": 18.875977653631285, "grad_norm": 0.5171068906784058, "learning_rate": 5.680672268907563e-05, "loss": 0.2757, "step": 33788 }, { "epoch": 18.87653631284916, "grad_norm": 0.4553881585597992, "learning_rate": 5.677871148459384e-05, "loss": 0.3173, "step": 33789 }, { "epoch": 18.877094972067038, "grad_norm": 0.5202228426933289, "learning_rate": 5.6750700280112046e-05, "loss": 0.4997, "step": 33790 }, { "epoch": 18.877653631284915, "grad_norm": 31.49077033996582, "learning_rate": 5.672268907563025e-05, "loss": 0.4835, "step": 33791 }, { "epoch": 18.878212290502795, "grad_norm": 0.6451875567436218, "learning_rate": 5.669467787114846e-05, "loss": 0.3518, "step": 33792 }, { "epoch": 18.87877094972067, "grad_norm": 1.2403446435928345, "learning_rate": 5.6666666666666664e-05, "loss": 0.4217, "step": 33793 }, { "epoch": 18.879329608938548, "grad_norm": 0.5851247310638428, "learning_rate": 5.663865546218488e-05, "loss": 0.3996, "step": 33794 }, { "epoch": 18.879888268156424, "grad_norm": 1.5194450616836548, "learning_rate": 5.661064425770308e-05, "loss": 0.3895, "step": 33795 }, { "epoch": 18.8804469273743, "grad_norm": 1.0575073957443237, "learning_rate": 5.658263305322129e-05, "loss": 0.3865, "step": 33796 }, { "epoch": 18.881005586592178, "grad_norm": 0.6892126798629761, "learning_rate": 5.6554621848739495e-05, "loss": 0.3493, "step": 33797 }, { "epoch": 18.881564245810054, "grad_norm": 0.5927559733390808, "learning_rate": 5.65266106442577e-05, "loss": 0.3257, "step": 33798 }, { "epoch": 18.882122905027934, "grad_norm": 0.40630051493644714, "learning_rate": 5.6498599439775913e-05, "loss": 0.4255, "step": 33799 }, { "epoch": 18.88268156424581, "grad_norm": 0.9027992486953735, "learning_rate": 5.647058823529412e-05, "loss": 0.3708, "step": 33800 }, { "epoch": 18.883240223463687, "grad_norm": 0.46567243337631226, "learning_rate": 5.6442577030812325e-05, "loss": 0.399, "step": 33801 }, { "epoch": 18.883798882681564, "grad_norm": 0.4221014082431793, "learning_rate": 5.641456582633053e-05, "loss": 0.3562, "step": 33802 }, { "epoch": 18.88435754189944, "grad_norm": 0.7294585108757019, "learning_rate": 5.638655462184874e-05, "loss": 0.5107, "step": 33803 }, { "epoch": 18.884916201117317, "grad_norm": 0.5214829444885254, "learning_rate": 5.6358543417366943e-05, "loss": 0.3887, "step": 33804 }, { "epoch": 18.885474860335197, "grad_norm": 0.32403364777565, "learning_rate": 5.6330532212885156e-05, "loss": 0.4068, "step": 33805 }, { "epoch": 18.886033519553074, "grad_norm": 0.37694549560546875, "learning_rate": 5.630252100840336e-05, "loss": 0.4088, "step": 33806 }, { "epoch": 18.88659217877095, "grad_norm": 0.47963759303092957, "learning_rate": 5.627450980392157e-05, "loss": 0.4369, "step": 33807 }, { "epoch": 18.887150837988827, "grad_norm": 1.2743427753448486, "learning_rate": 5.6246498599439774e-05, "loss": 0.4282, "step": 33808 }, { "epoch": 18.887709497206703, "grad_norm": 0.5330156683921814, "learning_rate": 5.621848739495798e-05, "loss": 0.4474, "step": 33809 }, { "epoch": 18.88826815642458, "grad_norm": 0.4144650399684906, "learning_rate": 5.619047619047619e-05, "loss": 0.3894, "step": 33810 }, { "epoch": 18.888826815642457, "grad_norm": 0.4847582280635834, "learning_rate": 5.61624649859944e-05, "loss": 0.5132, "step": 33811 }, { "epoch": 18.889385474860337, "grad_norm": 4.575112342834473, "learning_rate": 5.6134453781512605e-05, "loss": 0.6483, "step": 33812 }, { "epoch": 18.889944134078213, "grad_norm": 1.3570356369018555, "learning_rate": 5.610644257703081e-05, "loss": 0.5871, "step": 33813 }, { "epoch": 18.89050279329609, "grad_norm": 0.3947475850582123, "learning_rate": 5.607843137254902e-05, "loss": 0.4254, "step": 33814 }, { "epoch": 18.891061452513966, "grad_norm": 0.4775826036930084, "learning_rate": 5.605042016806722e-05, "loss": 0.4923, "step": 33815 }, { "epoch": 18.891620111731843, "grad_norm": 0.5627162456512451, "learning_rate": 5.6022408963585436e-05, "loss": 0.4182, "step": 33816 }, { "epoch": 18.89217877094972, "grad_norm": 0.9713678956031799, "learning_rate": 5.599439775910364e-05, "loss": 0.5381, "step": 33817 }, { "epoch": 18.892737430167596, "grad_norm": 0.3798504173755646, "learning_rate": 5.596638655462185e-05, "loss": 0.3773, "step": 33818 }, { "epoch": 18.893296089385476, "grad_norm": 1.123907208442688, "learning_rate": 5.5938375350140054e-05, "loss": 0.6038, "step": 33819 }, { "epoch": 18.893854748603353, "grad_norm": 0.29842960834503174, "learning_rate": 5.591036414565826e-05, "loss": 0.2406, "step": 33820 }, { "epoch": 18.89441340782123, "grad_norm": 2.686370611190796, "learning_rate": 5.588235294117647e-05, "loss": 0.3811, "step": 33821 }, { "epoch": 18.894972067039106, "grad_norm": 0.34505489468574524, "learning_rate": 5.585434173669468e-05, "loss": 0.3327, "step": 33822 }, { "epoch": 18.895530726256982, "grad_norm": 0.3997093439102173, "learning_rate": 5.5826330532212884e-05, "loss": 0.4684, "step": 33823 }, { "epoch": 18.89608938547486, "grad_norm": 1.4778010845184326, "learning_rate": 5.579831932773109e-05, "loss": 0.3432, "step": 33824 }, { "epoch": 18.89664804469274, "grad_norm": 0.4771747291088104, "learning_rate": 5.5770308123249296e-05, "loss": 0.3939, "step": 33825 }, { "epoch": 18.897206703910616, "grad_norm": 0.5368199944496155, "learning_rate": 5.574229691876751e-05, "loss": 0.3633, "step": 33826 }, { "epoch": 18.897765363128492, "grad_norm": 0.8267648816108704, "learning_rate": 5.5714285714285715e-05, "loss": 0.5352, "step": 33827 }, { "epoch": 18.89832402234637, "grad_norm": 0.4943237006664276, "learning_rate": 5.568627450980392e-05, "loss": 0.5406, "step": 33828 }, { "epoch": 18.898882681564245, "grad_norm": 0.882495641708374, "learning_rate": 5.565826330532213e-05, "loss": 0.4723, "step": 33829 }, { "epoch": 18.899441340782122, "grad_norm": 0.39665326476097107, "learning_rate": 5.563025210084033e-05, "loss": 0.4804, "step": 33830 }, { "epoch": 18.9, "grad_norm": 0.8868411183357239, "learning_rate": 5.560224089635854e-05, "loss": 0.461, "step": 33831 }, { "epoch": 18.90055865921788, "grad_norm": 0.4521852433681488, "learning_rate": 5.557422969187675e-05, "loss": 0.4124, "step": 33832 }, { "epoch": 18.901117318435755, "grad_norm": 0.7548196315765381, "learning_rate": 5.554621848739496e-05, "loss": 0.3007, "step": 33833 }, { "epoch": 18.901675977653632, "grad_norm": 0.5111404657363892, "learning_rate": 5.5518207282913164e-05, "loss": 0.5198, "step": 33834 }, { "epoch": 18.90223463687151, "grad_norm": 0.49965816736221313, "learning_rate": 5.549019607843137e-05, "loss": 0.4763, "step": 33835 }, { "epoch": 18.902793296089385, "grad_norm": 0.6660752892494202, "learning_rate": 5.5462184873949576e-05, "loss": 0.4514, "step": 33836 }, { "epoch": 18.90335195530726, "grad_norm": 1.3603583574295044, "learning_rate": 5.543417366946779e-05, "loss": 0.435, "step": 33837 }, { "epoch": 18.903910614525138, "grad_norm": 1.1103458404541016, "learning_rate": 5.5406162464985995e-05, "loss": 0.5685, "step": 33838 }, { "epoch": 18.904469273743018, "grad_norm": 0.4276740849018097, "learning_rate": 5.53781512605042e-05, "loss": 0.4152, "step": 33839 }, { "epoch": 18.905027932960895, "grad_norm": 0.3635987937450409, "learning_rate": 5.535014005602241e-05, "loss": 0.3539, "step": 33840 }, { "epoch": 18.90558659217877, "grad_norm": 0.45916834473609924, "learning_rate": 5.532212885154061e-05, "loss": 0.403, "step": 33841 }, { "epoch": 18.906145251396648, "grad_norm": 0.6639769673347473, "learning_rate": 5.5294117647058825e-05, "loss": 0.4668, "step": 33842 }, { "epoch": 18.906703910614524, "grad_norm": 0.45911604166030884, "learning_rate": 5.526610644257703e-05, "loss": 0.4328, "step": 33843 }, { "epoch": 18.9072625698324, "grad_norm": 0.7857806086540222, "learning_rate": 5.523809523809524e-05, "loss": 0.417, "step": 33844 }, { "epoch": 18.907821229050278, "grad_norm": 0.7776517271995544, "learning_rate": 5.5210084033613443e-05, "loss": 0.3677, "step": 33845 }, { "epoch": 18.908379888268158, "grad_norm": 0.4270723760128021, "learning_rate": 5.518207282913165e-05, "loss": 0.3843, "step": 33846 }, { "epoch": 18.908938547486034, "grad_norm": 1.4539105892181396, "learning_rate": 5.5154061624649855e-05, "loss": 0.3672, "step": 33847 }, { "epoch": 18.90949720670391, "grad_norm": 0.41392868757247925, "learning_rate": 5.512605042016807e-05, "loss": 0.3303, "step": 33848 }, { "epoch": 18.910055865921787, "grad_norm": 0.5647719502449036, "learning_rate": 5.5098039215686274e-05, "loss": 0.553, "step": 33849 }, { "epoch": 18.910614525139664, "grad_norm": 0.3424026370048523, "learning_rate": 5.507002801120448e-05, "loss": 0.3443, "step": 33850 }, { "epoch": 18.91117318435754, "grad_norm": 0.5082478523254395, "learning_rate": 5.5042016806722686e-05, "loss": 0.5536, "step": 33851 }, { "epoch": 18.91173184357542, "grad_norm": 0.4064604938030243, "learning_rate": 5.501400560224089e-05, "loss": 0.3801, "step": 33852 }, { "epoch": 18.912290502793297, "grad_norm": 1.052262544631958, "learning_rate": 5.4985994397759105e-05, "loss": 0.4567, "step": 33853 }, { "epoch": 18.912849162011174, "grad_norm": 0.5106754302978516, "learning_rate": 5.495798319327731e-05, "loss": 0.4851, "step": 33854 }, { "epoch": 18.91340782122905, "grad_norm": 0.436939537525177, "learning_rate": 5.492997198879552e-05, "loss": 0.4729, "step": 33855 }, { "epoch": 18.913966480446927, "grad_norm": 0.5108621716499329, "learning_rate": 5.490196078431372e-05, "loss": 0.3625, "step": 33856 }, { "epoch": 18.914525139664804, "grad_norm": 0.8846465945243835, "learning_rate": 5.487394957983193e-05, "loss": 0.3987, "step": 33857 }, { "epoch": 18.91508379888268, "grad_norm": 3.5128445625305176, "learning_rate": 5.4845938375350135e-05, "loss": 0.5637, "step": 33858 }, { "epoch": 18.91564245810056, "grad_norm": 3.4166648387908936, "learning_rate": 5.481792717086835e-05, "loss": 0.5048, "step": 33859 }, { "epoch": 18.916201117318437, "grad_norm": 0.45042723417282104, "learning_rate": 5.4789915966386554e-05, "loss": 0.4685, "step": 33860 }, { "epoch": 18.916759776536313, "grad_norm": 4.460443496704102, "learning_rate": 5.476190476190476e-05, "loss": 0.3877, "step": 33861 }, { "epoch": 18.91731843575419, "grad_norm": 0.4218735098838806, "learning_rate": 5.4733893557422966e-05, "loss": 0.4458, "step": 33862 }, { "epoch": 18.917877094972066, "grad_norm": 0.3589506447315216, "learning_rate": 5.470588235294117e-05, "loss": 0.3439, "step": 33863 }, { "epoch": 18.918435754189943, "grad_norm": 0.7866973280906677, "learning_rate": 5.4677871148459384e-05, "loss": 0.4355, "step": 33864 }, { "epoch": 18.91899441340782, "grad_norm": 1.1428085565567017, "learning_rate": 5.464985994397759e-05, "loss": 0.3687, "step": 33865 }, { "epoch": 18.9195530726257, "grad_norm": 11.617588996887207, "learning_rate": 5.4621848739495796e-05, "loss": 0.3951, "step": 33866 }, { "epoch": 18.920111731843576, "grad_norm": 0.8480934500694275, "learning_rate": 5.4593837535014e-05, "loss": 0.4449, "step": 33867 }, { "epoch": 18.920670391061453, "grad_norm": 0.7735621929168701, "learning_rate": 5.456582633053221e-05, "loss": 0.6169, "step": 33868 }, { "epoch": 18.92122905027933, "grad_norm": 0.7786951661109924, "learning_rate": 5.453781512605043e-05, "loss": 0.4327, "step": 33869 }, { "epoch": 18.921787709497206, "grad_norm": 0.3648815453052521, "learning_rate": 5.4509803921568634e-05, "loss": 0.2842, "step": 33870 }, { "epoch": 18.922346368715083, "grad_norm": 0.6461501717567444, "learning_rate": 5.448179271708684e-05, "loss": 0.4989, "step": 33871 }, { "epoch": 18.922905027932963, "grad_norm": 0.37036168575286865, "learning_rate": 5.4453781512605046e-05, "loss": 0.3437, "step": 33872 }, { "epoch": 18.92346368715084, "grad_norm": 0.6820854544639587, "learning_rate": 5.442577030812325e-05, "loss": 0.3563, "step": 33873 }, { "epoch": 18.924022346368716, "grad_norm": 2.24249267578125, "learning_rate": 5.439775910364146e-05, "loss": 0.3787, "step": 33874 }, { "epoch": 18.924581005586592, "grad_norm": 0.39428409934043884, "learning_rate": 5.436974789915967e-05, "loss": 0.346, "step": 33875 }, { "epoch": 18.92513966480447, "grad_norm": 0.47936001420021057, "learning_rate": 5.434173669467788e-05, "loss": 0.4097, "step": 33876 }, { "epoch": 18.925698324022346, "grad_norm": 0.49858567118644714, "learning_rate": 5.431372549019608e-05, "loss": 0.3706, "step": 33877 }, { "epoch": 18.926256983240222, "grad_norm": 0.5014204382896423, "learning_rate": 5.428571428571429e-05, "loss": 0.3831, "step": 33878 }, { "epoch": 18.926815642458102, "grad_norm": 0.5206138491630554, "learning_rate": 5.4257703081232495e-05, "loss": 0.4625, "step": 33879 }, { "epoch": 18.92737430167598, "grad_norm": 1.844326138496399, "learning_rate": 5.422969187675071e-05, "loss": 0.4055, "step": 33880 }, { "epoch": 18.927932960893855, "grad_norm": 0.5907692313194275, "learning_rate": 5.4201680672268913e-05, "loss": 0.4577, "step": 33881 }, { "epoch": 18.928491620111732, "grad_norm": 0.4953712224960327, "learning_rate": 5.417366946778712e-05, "loss": 0.3581, "step": 33882 }, { "epoch": 18.92905027932961, "grad_norm": 0.3316100537776947, "learning_rate": 5.4145658263305325e-05, "loss": 0.3284, "step": 33883 }, { "epoch": 18.929608938547485, "grad_norm": 0.48553207516670227, "learning_rate": 5.411764705882353e-05, "loss": 0.3742, "step": 33884 }, { "epoch": 18.93016759776536, "grad_norm": 0.34783920645713806, "learning_rate": 5.408963585434174e-05, "loss": 0.2538, "step": 33885 }, { "epoch": 18.93072625698324, "grad_norm": 0.41213372349739075, "learning_rate": 5.406162464985995e-05, "loss": 0.3792, "step": 33886 }, { "epoch": 18.93128491620112, "grad_norm": 0.5908197164535522, "learning_rate": 5.4033613445378156e-05, "loss": 0.3949, "step": 33887 }, { "epoch": 18.931843575418995, "grad_norm": 0.4517349302768707, "learning_rate": 5.400560224089636e-05, "loss": 0.4406, "step": 33888 }, { "epoch": 18.93240223463687, "grad_norm": 0.930027186870575, "learning_rate": 5.397759103641457e-05, "loss": 0.3281, "step": 33889 }, { "epoch": 18.932960893854748, "grad_norm": 0.551719069480896, "learning_rate": 5.3949579831932774e-05, "loss": 0.3581, "step": 33890 }, { "epoch": 18.933519553072625, "grad_norm": 0.39376363158226013, "learning_rate": 5.392156862745099e-05, "loss": 0.3685, "step": 33891 }, { "epoch": 18.9340782122905, "grad_norm": 1.0118646621704102, "learning_rate": 5.389355742296919e-05, "loss": 0.4237, "step": 33892 }, { "epoch": 18.93463687150838, "grad_norm": 0.33368727564811707, "learning_rate": 5.38655462184874e-05, "loss": 0.2681, "step": 33893 }, { "epoch": 18.935195530726258, "grad_norm": 0.3958822786808014, "learning_rate": 5.3837535014005605e-05, "loss": 0.422, "step": 33894 }, { "epoch": 18.935754189944134, "grad_norm": 0.4539048373699188, "learning_rate": 5.380952380952381e-05, "loss": 0.4047, "step": 33895 }, { "epoch": 18.93631284916201, "grad_norm": 0.5042299032211304, "learning_rate": 5.3781512605042024e-05, "loss": 0.3592, "step": 33896 }, { "epoch": 18.936871508379888, "grad_norm": 0.39602118730545044, "learning_rate": 5.375350140056023e-05, "loss": 0.3712, "step": 33897 }, { "epoch": 18.937430167597764, "grad_norm": 0.36842110753059387, "learning_rate": 5.3725490196078436e-05, "loss": 0.3887, "step": 33898 }, { "epoch": 18.93798882681564, "grad_norm": 0.4202346205711365, "learning_rate": 5.369747899159664e-05, "loss": 0.455, "step": 33899 }, { "epoch": 18.93854748603352, "grad_norm": 0.5464991927146912, "learning_rate": 5.366946778711485e-05, "loss": 0.5855, "step": 33900 }, { "epoch": 18.939106145251397, "grad_norm": 0.89593905210495, "learning_rate": 5.3641456582633054e-05, "loss": 0.433, "step": 33901 }, { "epoch": 18.939664804469274, "grad_norm": 0.3702648878097534, "learning_rate": 5.3613445378151266e-05, "loss": 0.4813, "step": 33902 }, { "epoch": 18.94022346368715, "grad_norm": 0.5631015300750732, "learning_rate": 5.358543417366947e-05, "loss": 0.2789, "step": 33903 }, { "epoch": 18.940782122905027, "grad_norm": 0.41592875123023987, "learning_rate": 5.355742296918768e-05, "loss": 0.4394, "step": 33904 }, { "epoch": 18.941340782122904, "grad_norm": 0.35282185673713684, "learning_rate": 5.3529411764705884e-05, "loss": 0.3806, "step": 33905 }, { "epoch": 18.941899441340784, "grad_norm": 0.7095988392829895, "learning_rate": 5.350140056022409e-05, "loss": 0.5656, "step": 33906 }, { "epoch": 18.94245810055866, "grad_norm": 0.34797996282577515, "learning_rate": 5.34733893557423e-05, "loss": 0.336, "step": 33907 }, { "epoch": 18.943016759776537, "grad_norm": 0.41780421137809753, "learning_rate": 5.344537815126051e-05, "loss": 0.4263, "step": 33908 }, { "epoch": 18.943575418994413, "grad_norm": 0.3015372157096863, "learning_rate": 5.3417366946778715e-05, "loss": 0.355, "step": 33909 }, { "epoch": 18.94413407821229, "grad_norm": 0.4017311930656433, "learning_rate": 5.338935574229692e-05, "loss": 0.3908, "step": 33910 }, { "epoch": 18.944692737430167, "grad_norm": 0.4889945983886719, "learning_rate": 5.336134453781513e-05, "loss": 0.5316, "step": 33911 }, { "epoch": 18.945251396648043, "grad_norm": 0.4016016721725464, "learning_rate": 5.333333333333334e-05, "loss": 0.4654, "step": 33912 }, { "epoch": 18.945810055865923, "grad_norm": 0.5134336352348328, "learning_rate": 5.3305322128851546e-05, "loss": 0.347, "step": 33913 }, { "epoch": 18.9463687150838, "grad_norm": 0.8897731900215149, "learning_rate": 5.327731092436975e-05, "loss": 0.4025, "step": 33914 }, { "epoch": 18.946927374301676, "grad_norm": 0.7334646582603455, "learning_rate": 5.324929971988796e-05, "loss": 0.3937, "step": 33915 }, { "epoch": 18.947486033519553, "grad_norm": 0.48601144552230835, "learning_rate": 5.3221288515406164e-05, "loss": 0.47, "step": 33916 }, { "epoch": 18.94804469273743, "grad_norm": 0.48712092638015747, "learning_rate": 5.319327731092437e-05, "loss": 0.3804, "step": 33917 }, { "epoch": 18.948603351955306, "grad_norm": 0.6057708263397217, "learning_rate": 5.316526610644258e-05, "loss": 0.3118, "step": 33918 }, { "epoch": 18.949162011173183, "grad_norm": 1.0692092180252075, "learning_rate": 5.313725490196079e-05, "loss": 0.3982, "step": 33919 }, { "epoch": 18.949720670391063, "grad_norm": 2.1109750270843506, "learning_rate": 5.3109243697478995e-05, "loss": 0.4418, "step": 33920 }, { "epoch": 18.95027932960894, "grad_norm": 0.3324871063232422, "learning_rate": 5.30812324929972e-05, "loss": 0.3801, "step": 33921 }, { "epoch": 18.950837988826816, "grad_norm": 0.4373445212841034, "learning_rate": 5.305322128851541e-05, "loss": 0.3976, "step": 33922 }, { "epoch": 18.951396648044692, "grad_norm": 0.7524389028549194, "learning_rate": 5.302521008403362e-05, "loss": 0.2836, "step": 33923 }, { "epoch": 18.95195530726257, "grad_norm": 1.6838334798812866, "learning_rate": 5.2997198879551825e-05, "loss": 0.3771, "step": 33924 }, { "epoch": 18.952513966480446, "grad_norm": 0.896115779876709, "learning_rate": 5.296918767507003e-05, "loss": 0.292, "step": 33925 }, { "epoch": 18.953072625698326, "grad_norm": 0.43289652466773987, "learning_rate": 5.294117647058824e-05, "loss": 0.4667, "step": 33926 }, { "epoch": 18.953631284916202, "grad_norm": 0.5212640762329102, "learning_rate": 5.2913165266106443e-05, "loss": 0.3845, "step": 33927 }, { "epoch": 18.95418994413408, "grad_norm": 0.6807703375816345, "learning_rate": 5.288515406162465e-05, "loss": 0.4292, "step": 33928 }, { "epoch": 18.954748603351955, "grad_norm": 0.4425789713859558, "learning_rate": 5.285714285714286e-05, "loss": 0.4301, "step": 33929 }, { "epoch": 18.955307262569832, "grad_norm": 0.41546064615249634, "learning_rate": 5.282913165266107e-05, "loss": 0.4305, "step": 33930 }, { "epoch": 18.95586592178771, "grad_norm": 0.43749648332595825, "learning_rate": 5.2801120448179274e-05, "loss": 0.4138, "step": 33931 }, { "epoch": 18.956424581005585, "grad_norm": 0.6813123822212219, "learning_rate": 5.277310924369748e-05, "loss": 0.3651, "step": 33932 }, { "epoch": 18.956983240223465, "grad_norm": 0.35313770174980164, "learning_rate": 5.2745098039215686e-05, "loss": 0.4507, "step": 33933 }, { "epoch": 18.957541899441342, "grad_norm": 0.3603627383708954, "learning_rate": 5.27170868347339e-05, "loss": 0.3907, "step": 33934 }, { "epoch": 18.95810055865922, "grad_norm": 0.4522898197174072, "learning_rate": 5.2689075630252105e-05, "loss": 0.4027, "step": 33935 }, { "epoch": 18.958659217877095, "grad_norm": 0.3996206820011139, "learning_rate": 5.266106442577031e-05, "loss": 0.389, "step": 33936 }, { "epoch": 18.95921787709497, "grad_norm": 0.46490445733070374, "learning_rate": 5.263305322128852e-05, "loss": 0.3402, "step": 33937 }, { "epoch": 18.959776536312848, "grad_norm": 0.4886170029640198, "learning_rate": 5.260504201680672e-05, "loss": 0.4489, "step": 33938 }, { "epoch": 18.960335195530725, "grad_norm": 2.693960189819336, "learning_rate": 5.2577030812324936e-05, "loss": 0.3774, "step": 33939 }, { "epoch": 18.960893854748605, "grad_norm": 0.5858724117279053, "learning_rate": 5.254901960784314e-05, "loss": 0.4251, "step": 33940 }, { "epoch": 18.96145251396648, "grad_norm": 0.3281695246696472, "learning_rate": 5.252100840336135e-05, "loss": 0.362, "step": 33941 }, { "epoch": 18.962011173184358, "grad_norm": 1.1966187953948975, "learning_rate": 5.2492997198879554e-05, "loss": 0.5748, "step": 33942 }, { "epoch": 18.962569832402234, "grad_norm": 0.36663034558296204, "learning_rate": 5.246498599439776e-05, "loss": 0.3643, "step": 33943 }, { "epoch": 18.96312849162011, "grad_norm": 1.103279709815979, "learning_rate": 5.2436974789915966e-05, "loss": 0.3651, "step": 33944 }, { "epoch": 18.963687150837988, "grad_norm": 4.124715328216553, "learning_rate": 5.240896358543418e-05, "loss": 0.4382, "step": 33945 }, { "epoch": 18.964245810055864, "grad_norm": 0.4870852828025818, "learning_rate": 5.2380952380952384e-05, "loss": 0.4002, "step": 33946 }, { "epoch": 18.964804469273744, "grad_norm": 0.34290871024131775, "learning_rate": 5.235294117647059e-05, "loss": 0.4116, "step": 33947 }, { "epoch": 18.96536312849162, "grad_norm": 0.3345028758049011, "learning_rate": 5.2324929971988796e-05, "loss": 0.3448, "step": 33948 }, { "epoch": 18.965921787709497, "grad_norm": 2.3267154693603516, "learning_rate": 5.2296918767507e-05, "loss": 0.4233, "step": 33949 }, { "epoch": 18.966480446927374, "grad_norm": 0.7566601634025574, "learning_rate": 5.2268907563025215e-05, "loss": 0.5048, "step": 33950 }, { "epoch": 18.96703910614525, "grad_norm": 0.37007296085357666, "learning_rate": 5.224089635854342e-05, "loss": 0.3912, "step": 33951 }, { "epoch": 18.967597765363127, "grad_norm": 0.4309180974960327, "learning_rate": 5.221288515406163e-05, "loss": 0.3832, "step": 33952 }, { "epoch": 18.968156424581007, "grad_norm": 3.102376699447632, "learning_rate": 5.218487394957983e-05, "loss": 0.444, "step": 33953 }, { "epoch": 18.968715083798884, "grad_norm": 0.588596522808075, "learning_rate": 5.215686274509804e-05, "loss": 0.3839, "step": 33954 }, { "epoch": 18.96927374301676, "grad_norm": 0.8135894536972046, "learning_rate": 5.2128851540616245e-05, "loss": 0.4149, "step": 33955 }, { "epoch": 18.969832402234637, "grad_norm": 0.4203833341598511, "learning_rate": 5.210084033613446e-05, "loss": 0.3848, "step": 33956 }, { "epoch": 18.970391061452514, "grad_norm": 0.7620267868041992, "learning_rate": 5.2072829131652664e-05, "loss": 0.4737, "step": 33957 }, { "epoch": 18.97094972067039, "grad_norm": 0.5293670892715454, "learning_rate": 5.204481792717087e-05, "loss": 0.4346, "step": 33958 }, { "epoch": 18.971508379888267, "grad_norm": 0.7390501499176025, "learning_rate": 5.2016806722689076e-05, "loss": 0.3172, "step": 33959 }, { "epoch": 18.972067039106147, "grad_norm": 0.9070444703102112, "learning_rate": 5.198879551820728e-05, "loss": 0.3466, "step": 33960 }, { "epoch": 18.972625698324023, "grad_norm": 0.5488793849945068, "learning_rate": 5.1960784313725495e-05, "loss": 0.4414, "step": 33961 }, { "epoch": 18.9731843575419, "grad_norm": 8.175317764282227, "learning_rate": 5.19327731092437e-05, "loss": 0.405, "step": 33962 }, { "epoch": 18.973743016759776, "grad_norm": 0.8509916663169861, "learning_rate": 5.190476190476191e-05, "loss": 0.3367, "step": 33963 }, { "epoch": 18.974301675977653, "grad_norm": 1.420340895652771, "learning_rate": 5.187675070028011e-05, "loss": 0.4212, "step": 33964 }, { "epoch": 18.97486033519553, "grad_norm": 0.37568145990371704, "learning_rate": 5.184873949579832e-05, "loss": 0.4308, "step": 33965 }, { "epoch": 18.975418994413406, "grad_norm": 0.45119836926460266, "learning_rate": 5.182072829131653e-05, "loss": 0.4295, "step": 33966 }, { "epoch": 18.975977653631286, "grad_norm": 0.4286446273326874, "learning_rate": 5.179271708683474e-05, "loss": 0.3305, "step": 33967 }, { "epoch": 18.976536312849163, "grad_norm": 0.5809363126754761, "learning_rate": 5.1764705882352943e-05, "loss": 0.5267, "step": 33968 }, { "epoch": 18.97709497206704, "grad_norm": 0.4270780682563782, "learning_rate": 5.173669467787115e-05, "loss": 0.3908, "step": 33969 }, { "epoch": 18.977653631284916, "grad_norm": 0.6030421853065491, "learning_rate": 5.1708683473389355e-05, "loss": 0.4067, "step": 33970 }, { "epoch": 18.978212290502793, "grad_norm": 9.251143455505371, "learning_rate": 5.168067226890756e-05, "loss": 0.4219, "step": 33971 }, { "epoch": 18.97877094972067, "grad_norm": 0.6906352043151855, "learning_rate": 5.1652661064425774e-05, "loss": 0.7465, "step": 33972 }, { "epoch": 18.97932960893855, "grad_norm": 0.5502415299415588, "learning_rate": 5.162464985994398e-05, "loss": 0.4004, "step": 33973 }, { "epoch": 18.979888268156426, "grad_norm": 0.47371426224708557, "learning_rate": 5.1596638655462186e-05, "loss": 0.4125, "step": 33974 }, { "epoch": 18.980446927374302, "grad_norm": 1.191032886505127, "learning_rate": 5.156862745098039e-05, "loss": 0.3484, "step": 33975 }, { "epoch": 18.98100558659218, "grad_norm": 3.6259195804595947, "learning_rate": 5.15406162464986e-05, "loss": 0.3549, "step": 33976 }, { "epoch": 18.981564245810056, "grad_norm": 0.401210755109787, "learning_rate": 5.151260504201681e-05, "loss": 0.3255, "step": 33977 }, { "epoch": 18.982122905027932, "grad_norm": 0.5023775100708008, "learning_rate": 5.148459383753502e-05, "loss": 0.5245, "step": 33978 }, { "epoch": 18.98268156424581, "grad_norm": 0.3667581081390381, "learning_rate": 5.145658263305322e-05, "loss": 0.3686, "step": 33979 }, { "epoch": 18.98324022346369, "grad_norm": 0.44442471861839294, "learning_rate": 5.142857142857143e-05, "loss": 0.3932, "step": 33980 }, { "epoch": 18.983798882681565, "grad_norm": 0.45789602398872375, "learning_rate": 5.1400560224089635e-05, "loss": 0.613, "step": 33981 }, { "epoch": 18.984357541899442, "grad_norm": 0.5256237387657166, "learning_rate": 5.137254901960785e-05, "loss": 0.4683, "step": 33982 }, { "epoch": 18.98491620111732, "grad_norm": 1.2099040746688843, "learning_rate": 5.1344537815126054e-05, "loss": 0.4535, "step": 33983 }, { "epoch": 18.985474860335195, "grad_norm": 0.3841578960418701, "learning_rate": 5.131652661064426e-05, "loss": 0.3364, "step": 33984 }, { "epoch": 18.98603351955307, "grad_norm": 0.8091573119163513, "learning_rate": 5.1288515406162466e-05, "loss": 0.5057, "step": 33985 }, { "epoch": 18.986592178770948, "grad_norm": 0.5129916071891785, "learning_rate": 5.126050420168067e-05, "loss": 0.4451, "step": 33986 }, { "epoch": 18.98715083798883, "grad_norm": 0.39209139347076416, "learning_rate": 5.123249299719888e-05, "loss": 0.3354, "step": 33987 }, { "epoch": 18.987709497206705, "grad_norm": 0.33828359842300415, "learning_rate": 5.120448179271709e-05, "loss": 0.3366, "step": 33988 }, { "epoch": 18.98826815642458, "grad_norm": 0.6842661499977112, "learning_rate": 5.1176470588235296e-05, "loss": 0.4673, "step": 33989 }, { "epoch": 18.988826815642458, "grad_norm": 0.437063604593277, "learning_rate": 5.11484593837535e-05, "loss": 0.4506, "step": 33990 }, { "epoch": 18.989385474860335, "grad_norm": 0.7568961977958679, "learning_rate": 5.112044817927171e-05, "loss": 0.4899, "step": 33991 }, { "epoch": 18.98994413407821, "grad_norm": 0.4318029582500458, "learning_rate": 5.1092436974789914e-05, "loss": 0.4284, "step": 33992 }, { "epoch": 18.990502793296088, "grad_norm": 0.4273245632648468, "learning_rate": 5.106442577030813e-05, "loss": 0.4964, "step": 33993 }, { "epoch": 18.991061452513968, "grad_norm": 0.42612603306770325, "learning_rate": 5.103641456582633e-05, "loss": 0.3822, "step": 33994 }, { "epoch": 18.991620111731844, "grad_norm": 0.6867136359214783, "learning_rate": 5.100840336134454e-05, "loss": 0.3913, "step": 33995 }, { "epoch": 18.99217877094972, "grad_norm": 0.4082396924495697, "learning_rate": 5.0980392156862745e-05, "loss": 0.4082, "step": 33996 }, { "epoch": 18.992737430167598, "grad_norm": 0.5038939714431763, "learning_rate": 5.095238095238095e-05, "loss": 0.3688, "step": 33997 }, { "epoch": 18.993296089385474, "grad_norm": 6.329892635345459, "learning_rate": 5.092436974789916e-05, "loss": 0.3698, "step": 33998 }, { "epoch": 18.99385474860335, "grad_norm": 0.4058187007904053, "learning_rate": 5.089635854341737e-05, "loss": 0.3684, "step": 33999 }, { "epoch": 18.994413407821227, "grad_norm": 0.4301515221595764, "learning_rate": 5.0868347338935576e-05, "loss": 0.3489, "step": 34000 }, { "epoch": 18.994413407821227, "eval_cer": 0.08439440061972876, "eval_loss": 0.31858545541763306, "eval_runtime": 55.4392, "eval_samples_per_second": 81.855, "eval_steps_per_second": 5.123, "eval_wer": 0.334655456551493, "step": 34000 }, { "epoch": 18.994972067039107, "grad_norm": 0.696684718132019, "learning_rate": 5.084033613445378e-05, "loss": 0.3138, "step": 34001 }, { "epoch": 18.995530726256984, "grad_norm": 2.6737091541290283, "learning_rate": 5.081232492997199e-05, "loss": 0.2794, "step": 34002 }, { "epoch": 18.99608938547486, "grad_norm": 0.4672900438308716, "learning_rate": 5.0784313725490194e-05, "loss": 0.4445, "step": 34003 }, { "epoch": 18.996648044692737, "grad_norm": 0.38065600395202637, "learning_rate": 5.075630252100841e-05, "loss": 0.2964, "step": 34004 }, { "epoch": 18.997206703910614, "grad_norm": 0.4367673993110657, "learning_rate": 5.072829131652661e-05, "loss": 0.5116, "step": 34005 }, { "epoch": 18.99776536312849, "grad_norm": 1.6056444644927979, "learning_rate": 5.070028011204482e-05, "loss": 0.4629, "step": 34006 }, { "epoch": 18.99832402234637, "grad_norm": 0.3913673162460327, "learning_rate": 5.0672268907563025e-05, "loss": 0.346, "step": 34007 }, { "epoch": 18.998882681564247, "grad_norm": 0.6824917197227478, "learning_rate": 5.064425770308123e-05, "loss": 0.5012, "step": 34008 }, { "epoch": 18.999441340782123, "grad_norm": 1.9792801141738892, "learning_rate": 5.0616246498599443e-05, "loss": 0.4343, "step": 34009 }, { "epoch": 19.0, "grad_norm": 0.838819682598114, "learning_rate": 5.058823529411765e-05, "loss": 0.381, "step": 34010 }, { "epoch": 19.000558659217877, "grad_norm": 0.3558765947818756, "learning_rate": 5.0560224089635855e-05, "loss": 0.3837, "step": 34011 }, { "epoch": 19.001117318435753, "grad_norm": 0.8690874576568604, "learning_rate": 5.053221288515406e-05, "loss": 0.4975, "step": 34012 }, { "epoch": 19.00167597765363, "grad_norm": 0.34056082367897034, "learning_rate": 5.050420168067227e-05, "loss": 0.3591, "step": 34013 }, { "epoch": 19.00223463687151, "grad_norm": 0.4120817184448242, "learning_rate": 5.047619047619047e-05, "loss": 0.4373, "step": 34014 }, { "epoch": 19.002793296089386, "grad_norm": 0.612208902835846, "learning_rate": 5.0448179271708686e-05, "loss": 0.3276, "step": 34015 }, { "epoch": 19.003351955307263, "grad_norm": 0.33575963973999023, "learning_rate": 5.042016806722689e-05, "loss": 0.3332, "step": 34016 }, { "epoch": 19.00391061452514, "grad_norm": 0.5213196277618408, "learning_rate": 5.03921568627451e-05, "loss": 0.4321, "step": 34017 }, { "epoch": 19.004469273743016, "grad_norm": 0.4308393597602844, "learning_rate": 5.0364145658263304e-05, "loss": 0.3894, "step": 34018 }, { "epoch": 19.005027932960893, "grad_norm": 1.910498023033142, "learning_rate": 5.033613445378151e-05, "loss": 0.4154, "step": 34019 }, { "epoch": 19.00558659217877, "grad_norm": 0.6421675682067871, "learning_rate": 5.030812324929972e-05, "loss": 0.4603, "step": 34020 }, { "epoch": 19.00614525139665, "grad_norm": 0.37055546045303345, "learning_rate": 5.028011204481793e-05, "loss": 0.332, "step": 34021 }, { "epoch": 19.006703910614526, "grad_norm": 0.427277147769928, "learning_rate": 5.0252100840336135e-05, "loss": 0.4285, "step": 34022 }, { "epoch": 19.007262569832402, "grad_norm": 1.2247799634933472, "learning_rate": 5.022408963585434e-05, "loss": 0.4186, "step": 34023 }, { "epoch": 19.00782122905028, "grad_norm": 0.3420492708683014, "learning_rate": 5.019607843137255e-05, "loss": 0.3582, "step": 34024 }, { "epoch": 19.008379888268156, "grad_norm": 0.4589462876319885, "learning_rate": 5.016806722689075e-05, "loss": 0.353, "step": 34025 }, { "epoch": 19.008938547486032, "grad_norm": 0.3948412537574768, "learning_rate": 5.0140056022408966e-05, "loss": 0.4366, "step": 34026 }, { "epoch": 19.009497206703912, "grad_norm": 0.3301945626735687, "learning_rate": 5.011204481792717e-05, "loss": 0.3213, "step": 34027 }, { "epoch": 19.01005586592179, "grad_norm": 0.478463351726532, "learning_rate": 5.008403361344538e-05, "loss": 0.5252, "step": 34028 }, { "epoch": 19.010614525139665, "grad_norm": 0.4208948016166687, "learning_rate": 5.0056022408963584e-05, "loss": 0.4559, "step": 34029 }, { "epoch": 19.011173184357542, "grad_norm": 0.7570073008537292, "learning_rate": 5.002801120448179e-05, "loss": 0.4632, "step": 34030 }, { "epoch": 19.01173184357542, "grad_norm": 0.4433969557285309, "learning_rate": 5e-05, "loss": 0.3333, "step": 34031 }, { "epoch": 19.012290502793295, "grad_norm": 0.7575501203536987, "learning_rate": 4.997198879551821e-05, "loss": 0.4447, "step": 34032 }, { "epoch": 19.01284916201117, "grad_norm": 0.6942957043647766, "learning_rate": 4.9943977591036414e-05, "loss": 0.5404, "step": 34033 }, { "epoch": 19.013407821229052, "grad_norm": 0.41899439692497253, "learning_rate": 4.991596638655462e-05, "loss": 0.3727, "step": 34034 }, { "epoch": 19.01396648044693, "grad_norm": 0.43453991413116455, "learning_rate": 4.9887955182072826e-05, "loss": 0.3685, "step": 34035 }, { "epoch": 19.014525139664805, "grad_norm": 0.3726389706134796, "learning_rate": 4.985994397759104e-05, "loss": 0.5042, "step": 34036 }, { "epoch": 19.01508379888268, "grad_norm": 1.0077435970306396, "learning_rate": 4.9831932773109245e-05, "loss": 0.4406, "step": 34037 }, { "epoch": 19.015642458100558, "grad_norm": 0.3891523778438568, "learning_rate": 4.980392156862745e-05, "loss": 0.3346, "step": 34038 }, { "epoch": 19.016201117318435, "grad_norm": 0.4843681752681732, "learning_rate": 4.977591036414566e-05, "loss": 0.3849, "step": 34039 }, { "epoch": 19.01675977653631, "grad_norm": 0.7743499279022217, "learning_rate": 4.974789915966386e-05, "loss": 0.7428, "step": 34040 }, { "epoch": 19.01731843575419, "grad_norm": 0.3791869282722473, "learning_rate": 4.971988795518207e-05, "loss": 0.3572, "step": 34041 }, { "epoch": 19.017877094972068, "grad_norm": 0.4791411757469177, "learning_rate": 4.969187675070028e-05, "loss": 0.4102, "step": 34042 }, { "epoch": 19.018435754189944, "grad_norm": 0.4512704610824585, "learning_rate": 4.966386554621849e-05, "loss": 0.4702, "step": 34043 }, { "epoch": 19.01899441340782, "grad_norm": 0.321982204914093, "learning_rate": 4.9635854341736694e-05, "loss": 0.3361, "step": 34044 }, { "epoch": 19.019553072625698, "grad_norm": 0.40492740273475647, "learning_rate": 4.96078431372549e-05, "loss": 0.4158, "step": 34045 }, { "epoch": 19.020111731843574, "grad_norm": 0.4612349569797516, "learning_rate": 4.9579831932773106e-05, "loss": 0.4831, "step": 34046 }, { "epoch": 19.02067039106145, "grad_norm": 0.38113701343536377, "learning_rate": 4.955182072829132e-05, "loss": 0.4136, "step": 34047 }, { "epoch": 19.02122905027933, "grad_norm": 0.4612719714641571, "learning_rate": 4.9523809523809525e-05, "loss": 0.4485, "step": 34048 }, { "epoch": 19.021787709497207, "grad_norm": 0.5298845767974854, "learning_rate": 4.949579831932773e-05, "loss": 0.4222, "step": 34049 }, { "epoch": 19.022346368715084, "grad_norm": 3.101620674133301, "learning_rate": 4.946778711484594e-05, "loss": 0.5287, "step": 34050 }, { "epoch": 19.02290502793296, "grad_norm": 0.33950090408325195, "learning_rate": 4.943977591036414e-05, "loss": 0.3762, "step": 34051 }, { "epoch": 19.023463687150837, "grad_norm": 0.37245798110961914, "learning_rate": 4.941176470588235e-05, "loss": 0.3819, "step": 34052 }, { "epoch": 19.024022346368714, "grad_norm": 0.4824327826499939, "learning_rate": 4.938375350140056e-05, "loss": 0.5237, "step": 34053 }, { "epoch": 19.024581005586594, "grad_norm": 0.8317398428916931, "learning_rate": 4.935574229691877e-05, "loss": 0.3923, "step": 34054 }, { "epoch": 19.02513966480447, "grad_norm": 0.8856366872787476, "learning_rate": 4.932773109243697e-05, "loss": 0.3664, "step": 34055 }, { "epoch": 19.025698324022347, "grad_norm": 2.81778883934021, "learning_rate": 4.929971988795518e-05, "loss": 0.4356, "step": 34056 }, { "epoch": 19.026256983240224, "grad_norm": 0.667323648929596, "learning_rate": 4.9271708683473385e-05, "loss": 0.4066, "step": 34057 }, { "epoch": 19.0268156424581, "grad_norm": 0.4666479527950287, "learning_rate": 4.92436974789916e-05, "loss": 0.3425, "step": 34058 }, { "epoch": 19.027374301675977, "grad_norm": 0.41187188029289246, "learning_rate": 4.9215686274509804e-05, "loss": 0.4823, "step": 34059 }, { "epoch": 19.027932960893853, "grad_norm": 0.5146870017051697, "learning_rate": 4.918767507002801e-05, "loss": 0.4044, "step": 34060 }, { "epoch": 19.028491620111733, "grad_norm": 0.42817193269729614, "learning_rate": 4.9159663865546216e-05, "loss": 0.3684, "step": 34061 }, { "epoch": 19.02905027932961, "grad_norm": 8.784381866455078, "learning_rate": 4.913165266106442e-05, "loss": 0.4205, "step": 34062 }, { "epoch": 19.029608938547486, "grad_norm": 0.7608481049537659, "learning_rate": 4.9103641456582635e-05, "loss": 0.3371, "step": 34063 }, { "epoch": 19.030167597765363, "grad_norm": 0.453857421875, "learning_rate": 4.907563025210084e-05, "loss": 0.4081, "step": 34064 }, { "epoch": 19.03072625698324, "grad_norm": 0.5360804200172424, "learning_rate": 4.904761904761905e-05, "loss": 0.2983, "step": 34065 }, { "epoch": 19.031284916201116, "grad_norm": 0.8454349040985107, "learning_rate": 4.901960784313725e-05, "loss": 0.4219, "step": 34066 }, { "epoch": 19.031843575418993, "grad_norm": 0.4815874695777893, "learning_rate": 4.899159663865546e-05, "loss": 0.4718, "step": 34067 }, { "epoch": 19.032402234636873, "grad_norm": 0.6291993260383606, "learning_rate": 4.8963585434173665e-05, "loss": 0.3947, "step": 34068 }, { "epoch": 19.03296089385475, "grad_norm": 0.4006820321083069, "learning_rate": 4.893557422969188e-05, "loss": 0.3354, "step": 34069 }, { "epoch": 19.033519553072626, "grad_norm": 1.2439199686050415, "learning_rate": 4.8907563025210084e-05, "loss": 0.3404, "step": 34070 }, { "epoch": 19.034078212290503, "grad_norm": 0.552683413028717, "learning_rate": 4.887955182072829e-05, "loss": 0.7117, "step": 34071 }, { "epoch": 19.03463687150838, "grad_norm": 0.386258602142334, "learning_rate": 4.8851540616246496e-05, "loss": 0.3918, "step": 34072 }, { "epoch": 19.035195530726256, "grad_norm": 0.4527541995048523, "learning_rate": 4.88235294117647e-05, "loss": 0.3915, "step": 34073 }, { "epoch": 19.035754189944136, "grad_norm": 0.43757620453834534, "learning_rate": 4.8795518207282914e-05, "loss": 0.4162, "step": 34074 }, { "epoch": 19.036312849162012, "grad_norm": 0.32871389389038086, "learning_rate": 4.876750700280112e-05, "loss": 0.3347, "step": 34075 }, { "epoch": 19.03687150837989, "grad_norm": 0.42162078619003296, "learning_rate": 4.8739495798319326e-05, "loss": 0.4381, "step": 34076 }, { "epoch": 19.037430167597766, "grad_norm": 2.049283266067505, "learning_rate": 4.871148459383753e-05, "loss": 0.333, "step": 34077 }, { "epoch": 19.037988826815642, "grad_norm": 0.9449719786643982, "learning_rate": 4.868347338935574e-05, "loss": 0.4235, "step": 34078 }, { "epoch": 19.03854748603352, "grad_norm": 0.39846327900886536, "learning_rate": 4.865546218487395e-05, "loss": 0.4082, "step": 34079 }, { "epoch": 19.039106145251395, "grad_norm": 0.9023163318634033, "learning_rate": 4.862745098039216e-05, "loss": 0.4537, "step": 34080 }, { "epoch": 19.039664804469275, "grad_norm": 0.6002269387245178, "learning_rate": 4.859943977591036e-05, "loss": 0.4, "step": 34081 }, { "epoch": 19.040223463687152, "grad_norm": 0.4130149483680725, "learning_rate": 4.857142857142857e-05, "loss": 0.4301, "step": 34082 }, { "epoch": 19.04078212290503, "grad_norm": 0.6279932856559753, "learning_rate": 4.8543417366946775e-05, "loss": 0.4048, "step": 34083 }, { "epoch": 19.041340782122905, "grad_norm": 0.862360954284668, "learning_rate": 4.851540616246498e-05, "loss": 0.6508, "step": 34084 }, { "epoch": 19.04189944134078, "grad_norm": 0.4316216707229614, "learning_rate": 4.8487394957983194e-05, "loss": 0.4083, "step": 34085 }, { "epoch": 19.042458100558658, "grad_norm": 0.39052802324295044, "learning_rate": 4.84593837535014e-05, "loss": 0.4149, "step": 34086 }, { "epoch": 19.043016759776535, "grad_norm": 0.3880631625652313, "learning_rate": 4.8431372549019606e-05, "loss": 0.4942, "step": 34087 }, { "epoch": 19.043575418994415, "grad_norm": 0.3922453224658966, "learning_rate": 4.840336134453781e-05, "loss": 0.3689, "step": 34088 }, { "epoch": 19.04413407821229, "grad_norm": 0.5278199911117554, "learning_rate": 4.837535014005602e-05, "loss": 0.4032, "step": 34089 }, { "epoch": 19.044692737430168, "grad_norm": 0.37316828966140747, "learning_rate": 4.834733893557423e-05, "loss": 0.435, "step": 34090 }, { "epoch": 19.045251396648045, "grad_norm": 0.3822898864746094, "learning_rate": 4.831932773109244e-05, "loss": 0.3519, "step": 34091 }, { "epoch": 19.04581005586592, "grad_norm": 0.5060520172119141, "learning_rate": 4.829131652661064e-05, "loss": 0.3543, "step": 34092 }, { "epoch": 19.046368715083798, "grad_norm": 0.7076753377914429, "learning_rate": 4.826330532212885e-05, "loss": 0.3482, "step": 34093 }, { "epoch": 19.046927374301674, "grad_norm": 0.38288766145706177, "learning_rate": 4.8235294117647055e-05, "loss": 0.4134, "step": 34094 }, { "epoch": 19.047486033519554, "grad_norm": 0.5623285174369812, "learning_rate": 4.820728291316526e-05, "loss": 0.4837, "step": 34095 }, { "epoch": 19.04804469273743, "grad_norm": 0.4888148009777069, "learning_rate": 4.817927170868347e-05, "loss": 0.3216, "step": 34096 }, { "epoch": 19.048603351955308, "grad_norm": 0.6230806708335876, "learning_rate": 4.815126050420168e-05, "loss": 0.5085, "step": 34097 }, { "epoch": 19.049162011173184, "grad_norm": 0.7253946661949158, "learning_rate": 4.8123249299719885e-05, "loss": 0.6701, "step": 34098 }, { "epoch": 19.04972067039106, "grad_norm": 0.6751520037651062, "learning_rate": 4.809523809523809e-05, "loss": 0.4458, "step": 34099 }, { "epoch": 19.050279329608937, "grad_norm": 0.4840782582759857, "learning_rate": 4.80672268907563e-05, "loss": 0.4558, "step": 34100 }, { "epoch": 19.050837988826817, "grad_norm": 0.5162602663040161, "learning_rate": 4.803921568627452e-05, "loss": 0.3498, "step": 34101 }, { "epoch": 19.051396648044694, "grad_norm": 0.37954264879226685, "learning_rate": 4.801120448179272e-05, "loss": 0.5211, "step": 34102 }, { "epoch": 19.05195530726257, "grad_norm": 0.4165072441101074, "learning_rate": 4.798319327731093e-05, "loss": 0.4125, "step": 34103 }, { "epoch": 19.052513966480447, "grad_norm": 0.5712683796882629, "learning_rate": 4.7955182072829135e-05, "loss": 0.3782, "step": 34104 }, { "epoch": 19.053072625698324, "grad_norm": 0.29432412981987, "learning_rate": 4.792717086834734e-05, "loss": 0.3782, "step": 34105 }, { "epoch": 19.0536312849162, "grad_norm": 0.3098897635936737, "learning_rate": 4.7899159663865554e-05, "loss": 0.3918, "step": 34106 }, { "epoch": 19.054189944134077, "grad_norm": 0.5611530542373657, "learning_rate": 4.787114845938376e-05, "loss": 0.4941, "step": 34107 }, { "epoch": 19.054748603351957, "grad_norm": 1.9435120820999146, "learning_rate": 4.7843137254901966e-05, "loss": 0.4363, "step": 34108 }, { "epoch": 19.055307262569833, "grad_norm": 0.46485641598701477, "learning_rate": 4.781512605042017e-05, "loss": 0.438, "step": 34109 }, { "epoch": 19.05586592178771, "grad_norm": 0.47409337759017944, "learning_rate": 4.778711484593838e-05, "loss": 0.579, "step": 34110 }, { "epoch": 19.056424581005587, "grad_norm": 0.6881526708602905, "learning_rate": 4.7759103641456584e-05, "loss": 0.3658, "step": 34111 }, { "epoch": 19.056983240223463, "grad_norm": 0.468822181224823, "learning_rate": 4.7731092436974796e-05, "loss": 0.4229, "step": 34112 }, { "epoch": 19.05754189944134, "grad_norm": 0.4482509195804596, "learning_rate": 4.7703081232493e-05, "loss": 0.3863, "step": 34113 }, { "epoch": 19.058100558659216, "grad_norm": 0.40117621421813965, "learning_rate": 4.767507002801121e-05, "loss": 0.4044, "step": 34114 }, { "epoch": 19.058659217877096, "grad_norm": 0.6154475212097168, "learning_rate": 4.7647058823529414e-05, "loss": 0.4546, "step": 34115 }, { "epoch": 19.059217877094973, "grad_norm": 0.4011020064353943, "learning_rate": 4.761904761904762e-05, "loss": 0.3227, "step": 34116 }, { "epoch": 19.05977653631285, "grad_norm": 0.8815206289291382, "learning_rate": 4.759103641456583e-05, "loss": 0.3934, "step": 34117 }, { "epoch": 19.060335195530726, "grad_norm": 1.187586784362793, "learning_rate": 4.756302521008404e-05, "loss": 0.4635, "step": 34118 }, { "epoch": 19.060893854748603, "grad_norm": 0.7696611881256104, "learning_rate": 4.7535014005602245e-05, "loss": 0.5406, "step": 34119 }, { "epoch": 19.06145251396648, "grad_norm": 0.35524311661720276, "learning_rate": 4.750700280112045e-05, "loss": 0.3825, "step": 34120 }, { "epoch": 19.062011173184356, "grad_norm": 0.42355555295944214, "learning_rate": 4.747899159663866e-05, "loss": 0.3914, "step": 34121 }, { "epoch": 19.062569832402236, "grad_norm": 1.7967973947525024, "learning_rate": 4.745098039215686e-05, "loss": 0.3586, "step": 34122 }, { "epoch": 19.063128491620112, "grad_norm": 0.5909311771392822, "learning_rate": 4.7422969187675076e-05, "loss": 0.5545, "step": 34123 }, { "epoch": 19.06368715083799, "grad_norm": 0.5110454559326172, "learning_rate": 4.739495798319328e-05, "loss": 0.3988, "step": 34124 }, { "epoch": 19.064245810055866, "grad_norm": 0.35232388973236084, "learning_rate": 4.736694677871149e-05, "loss": 0.3914, "step": 34125 }, { "epoch": 19.064804469273742, "grad_norm": 0.3780320882797241, "learning_rate": 4.7338935574229694e-05, "loss": 0.4401, "step": 34126 }, { "epoch": 19.06536312849162, "grad_norm": 0.31993746757507324, "learning_rate": 4.73109243697479e-05, "loss": 0.3499, "step": 34127 }, { "epoch": 19.0659217877095, "grad_norm": 0.7794672846794128, "learning_rate": 4.728291316526611e-05, "loss": 0.3744, "step": 34128 }, { "epoch": 19.066480446927375, "grad_norm": 0.585939884185791, "learning_rate": 4.725490196078432e-05, "loss": 0.4236, "step": 34129 }, { "epoch": 19.067039106145252, "grad_norm": 0.3240194022655487, "learning_rate": 4.7226890756302525e-05, "loss": 0.3468, "step": 34130 }, { "epoch": 19.06759776536313, "grad_norm": 0.5565316677093506, "learning_rate": 4.719887955182073e-05, "loss": 0.47, "step": 34131 }, { "epoch": 19.068156424581005, "grad_norm": 0.43060302734375, "learning_rate": 4.7170868347338937e-05, "loss": 0.378, "step": 34132 }, { "epoch": 19.06871508379888, "grad_norm": 0.4093746840953827, "learning_rate": 4.714285714285715e-05, "loss": 0.4313, "step": 34133 }, { "epoch": 19.06927374301676, "grad_norm": 0.714434802532196, "learning_rate": 4.7114845938375355e-05, "loss": 0.4189, "step": 34134 }, { "epoch": 19.06983240223464, "grad_norm": 0.484144926071167, "learning_rate": 4.708683473389356e-05, "loss": 0.4037, "step": 34135 }, { "epoch": 19.070391061452515, "grad_norm": 0.388107568025589, "learning_rate": 4.705882352941177e-05, "loss": 0.3177, "step": 34136 }, { "epoch": 19.07094972067039, "grad_norm": 0.4172494411468506, "learning_rate": 4.703081232492997e-05, "loss": 0.3919, "step": 34137 }, { "epoch": 19.071508379888268, "grad_norm": 0.41301459074020386, "learning_rate": 4.700280112044818e-05, "loss": 0.3968, "step": 34138 }, { "epoch": 19.072067039106145, "grad_norm": 0.36860454082489014, "learning_rate": 4.697478991596639e-05, "loss": 0.3644, "step": 34139 }, { "epoch": 19.07262569832402, "grad_norm": 0.36383143067359924, "learning_rate": 4.69467787114846e-05, "loss": 0.3251, "step": 34140 }, { "epoch": 19.073184357541898, "grad_norm": 0.4134189486503601, "learning_rate": 4.6918767507002804e-05, "loss": 0.3419, "step": 34141 }, { "epoch": 19.073743016759778, "grad_norm": 0.6444626450538635, "learning_rate": 4.689075630252101e-05, "loss": 0.3581, "step": 34142 }, { "epoch": 19.074301675977654, "grad_norm": 0.9333446025848389, "learning_rate": 4.6862745098039216e-05, "loss": 0.3873, "step": 34143 }, { "epoch": 19.07486033519553, "grad_norm": 0.5211381912231445, "learning_rate": 4.683473389355743e-05, "loss": 0.4522, "step": 34144 }, { "epoch": 19.075418994413408, "grad_norm": 0.6196352243423462, "learning_rate": 4.6806722689075635e-05, "loss": 0.3596, "step": 34145 }, { "epoch": 19.075977653631284, "grad_norm": 0.35157525539398193, "learning_rate": 4.677871148459384e-05, "loss": 0.3365, "step": 34146 }, { "epoch": 19.07653631284916, "grad_norm": 0.4391478896141052, "learning_rate": 4.675070028011205e-05, "loss": 0.3466, "step": 34147 }, { "epoch": 19.07709497206704, "grad_norm": 0.3524358570575714, "learning_rate": 4.672268907563025e-05, "loss": 0.3226, "step": 34148 }, { "epoch": 19.077653631284917, "grad_norm": 0.4521028995513916, "learning_rate": 4.6694677871148466e-05, "loss": 0.3493, "step": 34149 }, { "epoch": 19.078212290502794, "grad_norm": 1.3534393310546875, "learning_rate": 4.666666666666667e-05, "loss": 0.3782, "step": 34150 }, { "epoch": 19.07877094972067, "grad_norm": 1.945213794708252, "learning_rate": 4.663865546218488e-05, "loss": 0.4144, "step": 34151 }, { "epoch": 19.079329608938547, "grad_norm": 1.0611389875411987, "learning_rate": 4.6610644257703084e-05, "loss": 0.4153, "step": 34152 }, { "epoch": 19.079888268156424, "grad_norm": 0.8366313576698303, "learning_rate": 4.658263305322129e-05, "loss": 0.3939, "step": 34153 }, { "epoch": 19.0804469273743, "grad_norm": 0.7407335042953491, "learning_rate": 4.6554621848739496e-05, "loss": 0.4789, "step": 34154 }, { "epoch": 19.08100558659218, "grad_norm": 0.7199921607971191, "learning_rate": 4.652661064425771e-05, "loss": 0.5183, "step": 34155 }, { "epoch": 19.081564245810057, "grad_norm": 1.6046547889709473, "learning_rate": 4.6498599439775914e-05, "loss": 0.4423, "step": 34156 }, { "epoch": 19.082122905027934, "grad_norm": 0.48168522119522095, "learning_rate": 4.647058823529412e-05, "loss": 0.3974, "step": 34157 }, { "epoch": 19.08268156424581, "grad_norm": 0.4296114444732666, "learning_rate": 4.6442577030812326e-05, "loss": 0.3463, "step": 34158 }, { "epoch": 19.083240223463687, "grad_norm": 2.2824909687042236, "learning_rate": 4.641456582633053e-05, "loss": 0.3455, "step": 34159 }, { "epoch": 19.083798882681563, "grad_norm": 0.4152123034000397, "learning_rate": 4.6386554621848745e-05, "loss": 0.4699, "step": 34160 }, { "epoch": 19.08435754189944, "grad_norm": 0.7814894318580627, "learning_rate": 4.635854341736695e-05, "loss": 0.458, "step": 34161 }, { "epoch": 19.08491620111732, "grad_norm": 1.4727343320846558, "learning_rate": 4.633053221288516e-05, "loss": 0.4037, "step": 34162 }, { "epoch": 19.085474860335196, "grad_norm": 0.5679681897163391, "learning_rate": 4.630252100840336e-05, "loss": 0.4008, "step": 34163 }, { "epoch": 19.086033519553073, "grad_norm": 0.5303749442100525, "learning_rate": 4.627450980392157e-05, "loss": 0.3487, "step": 34164 }, { "epoch": 19.08659217877095, "grad_norm": 0.5636522173881531, "learning_rate": 4.6246498599439775e-05, "loss": 0.4373, "step": 34165 }, { "epoch": 19.087150837988826, "grad_norm": 5.520827293395996, "learning_rate": 4.621848739495799e-05, "loss": 0.4092, "step": 34166 }, { "epoch": 19.087709497206703, "grad_norm": 0.6318961381912231, "learning_rate": 4.6190476190476194e-05, "loss": 0.3045, "step": 34167 }, { "epoch": 19.08826815642458, "grad_norm": 0.39643406867980957, "learning_rate": 4.61624649859944e-05, "loss": 0.4362, "step": 34168 }, { "epoch": 19.08882681564246, "grad_norm": 0.37128540873527527, "learning_rate": 4.6134453781512606e-05, "loss": 0.345, "step": 34169 }, { "epoch": 19.089385474860336, "grad_norm": 0.5596094727516174, "learning_rate": 4.610644257703081e-05, "loss": 0.3603, "step": 34170 }, { "epoch": 19.089944134078213, "grad_norm": 0.5508023500442505, "learning_rate": 4.6078431372549025e-05, "loss": 0.5016, "step": 34171 }, { "epoch": 19.09050279329609, "grad_norm": 0.6179094314575195, "learning_rate": 4.605042016806723e-05, "loss": 0.346, "step": 34172 }, { "epoch": 19.091061452513966, "grad_norm": 1.7648438215255737, "learning_rate": 4.6022408963585437e-05, "loss": 0.4677, "step": 34173 }, { "epoch": 19.091620111731842, "grad_norm": 0.401187539100647, "learning_rate": 4.599439775910364e-05, "loss": 0.432, "step": 34174 }, { "epoch": 19.092178770949722, "grad_norm": 0.5296809077262878, "learning_rate": 4.596638655462185e-05, "loss": 0.407, "step": 34175 }, { "epoch": 19.0927374301676, "grad_norm": 0.40033671259880066, "learning_rate": 4.593837535014006e-05, "loss": 0.4148, "step": 34176 }, { "epoch": 19.093296089385476, "grad_norm": 0.76639723777771, "learning_rate": 4.591036414565827e-05, "loss": 0.4094, "step": 34177 }, { "epoch": 19.093854748603352, "grad_norm": 1.1314560174942017, "learning_rate": 4.588235294117647e-05, "loss": 0.3606, "step": 34178 }, { "epoch": 19.09441340782123, "grad_norm": 0.4724883437156677, "learning_rate": 4.585434173669468e-05, "loss": 0.6665, "step": 34179 }, { "epoch": 19.094972067039105, "grad_norm": 0.3455270230770111, "learning_rate": 4.5826330532212885e-05, "loss": 0.4485, "step": 34180 }, { "epoch": 19.095530726256982, "grad_norm": 0.5339173674583435, "learning_rate": 4.579831932773109e-05, "loss": 0.4023, "step": 34181 }, { "epoch": 19.096089385474862, "grad_norm": 0.6792379021644592, "learning_rate": 4.5770308123249304e-05, "loss": 0.3938, "step": 34182 }, { "epoch": 19.09664804469274, "grad_norm": 0.3906536400318146, "learning_rate": 4.574229691876751e-05, "loss": 0.4277, "step": 34183 }, { "epoch": 19.097206703910615, "grad_norm": 0.5333456993103027, "learning_rate": 4.5714285714285716e-05, "loss": 0.394, "step": 34184 }, { "epoch": 19.09776536312849, "grad_norm": 0.6192470192909241, "learning_rate": 4.568627450980392e-05, "loss": 0.391, "step": 34185 }, { "epoch": 19.098324022346368, "grad_norm": 0.2980307340621948, "learning_rate": 4.565826330532213e-05, "loss": 0.3264, "step": 34186 }, { "epoch": 19.098882681564245, "grad_norm": 0.7014803886413574, "learning_rate": 4.563025210084034e-05, "loss": 0.3392, "step": 34187 }, { "epoch": 19.09944134078212, "grad_norm": 0.5543592572212219, "learning_rate": 4.560224089635855e-05, "loss": 0.3789, "step": 34188 }, { "epoch": 19.1, "grad_norm": 0.4921249449253082, "learning_rate": 4.557422969187675e-05, "loss": 0.5058, "step": 34189 }, { "epoch": 19.100558659217878, "grad_norm": 0.4610847532749176, "learning_rate": 4.554621848739496e-05, "loss": 0.4497, "step": 34190 }, { "epoch": 19.101117318435755, "grad_norm": 1.6444391012191772, "learning_rate": 4.5518207282913165e-05, "loss": 0.3999, "step": 34191 }, { "epoch": 19.10167597765363, "grad_norm": 0.7008644342422485, "learning_rate": 4.549019607843137e-05, "loss": 0.3456, "step": 34192 }, { "epoch": 19.102234636871508, "grad_norm": 0.4882813096046448, "learning_rate": 4.5462184873949584e-05, "loss": 0.4038, "step": 34193 }, { "epoch": 19.102793296089384, "grad_norm": 0.5796037912368774, "learning_rate": 4.543417366946779e-05, "loss": 0.4282, "step": 34194 }, { "epoch": 19.10335195530726, "grad_norm": 0.5729399919509888, "learning_rate": 4.5406162464985996e-05, "loss": 0.5945, "step": 34195 }, { "epoch": 19.10391061452514, "grad_norm": 0.3941247761249542, "learning_rate": 4.53781512605042e-05, "loss": 0.459, "step": 34196 }, { "epoch": 19.104469273743018, "grad_norm": 1.8043785095214844, "learning_rate": 4.535014005602241e-05, "loss": 0.4556, "step": 34197 }, { "epoch": 19.105027932960894, "grad_norm": 0.4125872850418091, "learning_rate": 4.532212885154062e-05, "loss": 0.398, "step": 34198 }, { "epoch": 19.10558659217877, "grad_norm": 0.3589528203010559, "learning_rate": 4.5294117647058826e-05, "loss": 0.3206, "step": 34199 }, { "epoch": 19.106145251396647, "grad_norm": 0.5594779253005981, "learning_rate": 4.526610644257703e-05, "loss": 0.4212, "step": 34200 }, { "epoch": 19.106703910614524, "grad_norm": 0.5324401259422302, "learning_rate": 4.523809523809524e-05, "loss": 0.3484, "step": 34201 }, { "epoch": 19.107262569832404, "grad_norm": 0.8759794235229492, "learning_rate": 4.5210084033613444e-05, "loss": 0.3369, "step": 34202 }, { "epoch": 19.10782122905028, "grad_norm": 0.33922043442726135, "learning_rate": 4.518207282913166e-05, "loss": 0.2869, "step": 34203 }, { "epoch": 19.108379888268157, "grad_norm": 0.45556265115737915, "learning_rate": 4.515406162464986e-05, "loss": 0.3603, "step": 34204 }, { "epoch": 19.108938547486034, "grad_norm": 0.773074209690094, "learning_rate": 4.512605042016807e-05, "loss": 0.4423, "step": 34205 }, { "epoch": 19.10949720670391, "grad_norm": 0.40810850262641907, "learning_rate": 4.5098039215686275e-05, "loss": 0.3475, "step": 34206 }, { "epoch": 19.110055865921787, "grad_norm": 0.33923882246017456, "learning_rate": 4.507002801120448e-05, "loss": 0.3494, "step": 34207 }, { "epoch": 19.110614525139663, "grad_norm": 0.47900938987731934, "learning_rate": 4.504201680672269e-05, "loss": 0.6554, "step": 34208 }, { "epoch": 19.111173184357543, "grad_norm": 0.6720021963119507, "learning_rate": 4.50140056022409e-05, "loss": 0.3882, "step": 34209 }, { "epoch": 19.11173184357542, "grad_norm": 2.907823085784912, "learning_rate": 4.4985994397759106e-05, "loss": 0.3919, "step": 34210 }, { "epoch": 19.112290502793297, "grad_norm": 0.43489328026771545, "learning_rate": 4.495798319327731e-05, "loss": 0.3344, "step": 34211 }, { "epoch": 19.112849162011173, "grad_norm": 0.46838128566741943, "learning_rate": 4.492997198879552e-05, "loss": 0.4066, "step": 34212 }, { "epoch": 19.11340782122905, "grad_norm": 0.3710154891014099, "learning_rate": 4.4901960784313724e-05, "loss": 0.3643, "step": 34213 }, { "epoch": 19.113966480446926, "grad_norm": 0.5053613781929016, "learning_rate": 4.4873949579831937e-05, "loss": 0.3608, "step": 34214 }, { "epoch": 19.114525139664803, "grad_norm": 0.5019699931144714, "learning_rate": 4.484593837535014e-05, "loss": 0.3882, "step": 34215 }, { "epoch": 19.115083798882683, "grad_norm": 3.535247802734375, "learning_rate": 4.481792717086835e-05, "loss": 0.4134, "step": 34216 }, { "epoch": 19.11564245810056, "grad_norm": 0.5509768724441528, "learning_rate": 4.4789915966386555e-05, "loss": 0.442, "step": 34217 }, { "epoch": 19.116201117318436, "grad_norm": 0.34652987122535706, "learning_rate": 4.476190476190476e-05, "loss": 0.3416, "step": 34218 }, { "epoch": 19.116759776536313, "grad_norm": 0.3768334984779358, "learning_rate": 4.473389355742297e-05, "loss": 0.3529, "step": 34219 }, { "epoch": 19.11731843575419, "grad_norm": 0.48385241627693176, "learning_rate": 4.470588235294118e-05, "loss": 0.3047, "step": 34220 }, { "epoch": 19.117877094972066, "grad_norm": 0.6893239617347717, "learning_rate": 4.4677871148459385e-05, "loss": 0.383, "step": 34221 }, { "epoch": 19.118435754189946, "grad_norm": 0.6909326314926147, "learning_rate": 4.464985994397759e-05, "loss": 0.5975, "step": 34222 }, { "epoch": 19.118994413407822, "grad_norm": 0.440336138010025, "learning_rate": 4.46218487394958e-05, "loss": 0.3798, "step": 34223 }, { "epoch": 19.1195530726257, "grad_norm": 0.34653693437576294, "learning_rate": 4.4593837535014e-05, "loss": 0.3647, "step": 34224 }, { "epoch": 19.120111731843576, "grad_norm": 1.9989093542099, "learning_rate": 4.4565826330532216e-05, "loss": 0.4407, "step": 34225 }, { "epoch": 19.120670391061452, "grad_norm": 0.4312792122364044, "learning_rate": 4.453781512605042e-05, "loss": 0.516, "step": 34226 }, { "epoch": 19.12122905027933, "grad_norm": 0.4135589897632599, "learning_rate": 4.450980392156863e-05, "loss": 0.3385, "step": 34227 }, { "epoch": 19.121787709497205, "grad_norm": 0.4922415316104889, "learning_rate": 4.4481792717086834e-05, "loss": 0.4132, "step": 34228 }, { "epoch": 19.122346368715085, "grad_norm": 0.6351668238639832, "learning_rate": 4.445378151260504e-05, "loss": 0.5048, "step": 34229 }, { "epoch": 19.122905027932962, "grad_norm": 0.676240086555481, "learning_rate": 4.442577030812325e-05, "loss": 0.4116, "step": 34230 }, { "epoch": 19.12346368715084, "grad_norm": 0.7866749167442322, "learning_rate": 4.439775910364146e-05, "loss": 0.2922, "step": 34231 }, { "epoch": 19.124022346368715, "grad_norm": 1.8758156299591064, "learning_rate": 4.4369747899159665e-05, "loss": 0.4118, "step": 34232 }, { "epoch": 19.12458100558659, "grad_norm": 0.7438153028488159, "learning_rate": 4.434173669467787e-05, "loss": 0.3668, "step": 34233 }, { "epoch": 19.12513966480447, "grad_norm": 0.36478978395462036, "learning_rate": 4.431372549019608e-05, "loss": 0.4436, "step": 34234 }, { "epoch": 19.125698324022345, "grad_norm": 0.37556782364845276, "learning_rate": 4.428571428571428e-05, "loss": 0.4635, "step": 34235 }, { "epoch": 19.126256983240225, "grad_norm": 0.38899460434913635, "learning_rate": 4.4257703081232496e-05, "loss": 0.414, "step": 34236 }, { "epoch": 19.1268156424581, "grad_norm": 0.4715697169303894, "learning_rate": 4.42296918767507e-05, "loss": 0.3814, "step": 34237 }, { "epoch": 19.127374301675978, "grad_norm": 0.5459057688713074, "learning_rate": 4.420168067226891e-05, "loss": 0.4786, "step": 34238 }, { "epoch": 19.127932960893855, "grad_norm": 0.49794602394104004, "learning_rate": 4.4173669467787114e-05, "loss": 0.349, "step": 34239 }, { "epoch": 19.12849162011173, "grad_norm": 0.3777656853199005, "learning_rate": 4.414565826330532e-05, "loss": 0.331, "step": 34240 }, { "epoch": 19.129050279329608, "grad_norm": 0.46212896704673767, "learning_rate": 4.411764705882353e-05, "loss": 0.5189, "step": 34241 }, { "epoch": 19.129608938547484, "grad_norm": 0.4109019935131073, "learning_rate": 4.408963585434174e-05, "loss": 0.4203, "step": 34242 }, { "epoch": 19.130167597765364, "grad_norm": 0.37591230869293213, "learning_rate": 4.4061624649859944e-05, "loss": 0.3335, "step": 34243 }, { "epoch": 19.13072625698324, "grad_norm": 0.5432429909706116, "learning_rate": 4.403361344537815e-05, "loss": 0.3465, "step": 34244 }, { "epoch": 19.131284916201118, "grad_norm": 0.46247363090515137, "learning_rate": 4.4005602240896356e-05, "loss": 0.4028, "step": 34245 }, { "epoch": 19.131843575418994, "grad_norm": 0.39337995648384094, "learning_rate": 4.397759103641457e-05, "loss": 0.3571, "step": 34246 }, { "epoch": 19.13240223463687, "grad_norm": 0.36914628744125366, "learning_rate": 4.3949579831932775e-05, "loss": 0.4886, "step": 34247 }, { "epoch": 19.132960893854747, "grad_norm": 0.49211585521698, "learning_rate": 4.392156862745098e-05, "loss": 0.4052, "step": 34248 }, { "epoch": 19.133519553072627, "grad_norm": 0.4447023868560791, "learning_rate": 4.389355742296919e-05, "loss": 0.3776, "step": 34249 }, { "epoch": 19.134078212290504, "grad_norm": 1.1806244850158691, "learning_rate": 4.386554621848739e-05, "loss": 0.4135, "step": 34250 }, { "epoch": 19.13463687150838, "grad_norm": 0.9135016798973083, "learning_rate": 4.38375350140056e-05, "loss": 0.4195, "step": 34251 }, { "epoch": 19.135195530726257, "grad_norm": 0.35323119163513184, "learning_rate": 4.380952380952381e-05, "loss": 0.3915, "step": 34252 }, { "epoch": 19.135754189944134, "grad_norm": 0.5484306812286377, "learning_rate": 4.378151260504202e-05, "loss": 0.4333, "step": 34253 }, { "epoch": 19.13631284916201, "grad_norm": 0.921738862991333, "learning_rate": 4.3753501400560224e-05, "loss": 0.4144, "step": 34254 }, { "epoch": 19.136871508379887, "grad_norm": 0.48659461736679077, "learning_rate": 4.372549019607843e-05, "loss": 0.4253, "step": 34255 }, { "epoch": 19.137430167597767, "grad_norm": 0.7575713992118835, "learning_rate": 4.3697478991596636e-05, "loss": 0.3186, "step": 34256 }, { "epoch": 19.137988826815644, "grad_norm": 0.9563173055648804, "learning_rate": 4.366946778711485e-05, "loss": 0.4094, "step": 34257 }, { "epoch": 19.13854748603352, "grad_norm": 0.5873318910598755, "learning_rate": 4.3641456582633055e-05, "loss": 0.4059, "step": 34258 }, { "epoch": 19.139106145251397, "grad_norm": 0.3507838845252991, "learning_rate": 4.361344537815126e-05, "loss": 0.4241, "step": 34259 }, { "epoch": 19.139664804469273, "grad_norm": 1.0424703359603882, "learning_rate": 4.3585434173669467e-05, "loss": 0.4388, "step": 34260 }, { "epoch": 19.14022346368715, "grad_norm": 0.4615655839443207, "learning_rate": 4.355742296918767e-05, "loss": 0.438, "step": 34261 }, { "epoch": 19.140782122905026, "grad_norm": 0.49827060103416443, "learning_rate": 4.352941176470588e-05, "loss": 0.4324, "step": 34262 }, { "epoch": 19.141340782122906, "grad_norm": 0.4816020131111145, "learning_rate": 4.350140056022409e-05, "loss": 0.4084, "step": 34263 }, { "epoch": 19.141899441340783, "grad_norm": 0.44822368025779724, "learning_rate": 4.34733893557423e-05, "loss": 0.3578, "step": 34264 }, { "epoch": 19.14245810055866, "grad_norm": 0.39726537466049194, "learning_rate": 4.34453781512605e-05, "loss": 0.3666, "step": 34265 }, { "epoch": 19.143016759776536, "grad_norm": 0.47128039598464966, "learning_rate": 4.341736694677871e-05, "loss": 0.3857, "step": 34266 }, { "epoch": 19.143575418994413, "grad_norm": 1.2847801446914673, "learning_rate": 4.3389355742296915e-05, "loss": 0.4591, "step": 34267 }, { "epoch": 19.14413407821229, "grad_norm": 0.6383705735206604, "learning_rate": 4.336134453781513e-05, "loss": 0.4182, "step": 34268 }, { "epoch": 19.144692737430166, "grad_norm": 1.0606571435928345, "learning_rate": 4.3333333333333334e-05, "loss": 0.4121, "step": 34269 }, { "epoch": 19.145251396648046, "grad_norm": 0.6661317944526672, "learning_rate": 4.330532212885154e-05, "loss": 0.4344, "step": 34270 }, { "epoch": 19.145810055865923, "grad_norm": 0.593712329864502, "learning_rate": 4.3277310924369746e-05, "loss": 0.3228, "step": 34271 }, { "epoch": 19.1463687150838, "grad_norm": 0.4032175540924072, "learning_rate": 4.324929971988795e-05, "loss": 0.3686, "step": 34272 }, { "epoch": 19.146927374301676, "grad_norm": 0.49472740292549133, "learning_rate": 4.3221288515406165e-05, "loss": 0.4627, "step": 34273 }, { "epoch": 19.147486033519552, "grad_norm": 0.87626713514328, "learning_rate": 4.319327731092437e-05, "loss": 0.3669, "step": 34274 }, { "epoch": 19.14804469273743, "grad_norm": 0.477796345949173, "learning_rate": 4.316526610644258e-05, "loss": 0.4801, "step": 34275 }, { "epoch": 19.14860335195531, "grad_norm": 0.461201936006546, "learning_rate": 4.313725490196078e-05, "loss": 0.3879, "step": 34276 }, { "epoch": 19.149162011173186, "grad_norm": 0.6023568511009216, "learning_rate": 4.310924369747899e-05, "loss": 0.5359, "step": 34277 }, { "epoch": 19.149720670391062, "grad_norm": 0.4684387445449829, "learning_rate": 4.3081232492997195e-05, "loss": 0.3828, "step": 34278 }, { "epoch": 19.15027932960894, "grad_norm": 0.3617677688598633, "learning_rate": 4.305322128851541e-05, "loss": 0.4087, "step": 34279 }, { "epoch": 19.150837988826815, "grad_norm": 0.8472923040390015, "learning_rate": 4.3025210084033614e-05, "loss": 0.3591, "step": 34280 }, { "epoch": 19.15139664804469, "grad_norm": 0.6464107632637024, "learning_rate": 4.299719887955182e-05, "loss": 0.3883, "step": 34281 }, { "epoch": 19.15195530726257, "grad_norm": 0.4648975431919098, "learning_rate": 4.2969187675070026e-05, "loss": 0.3634, "step": 34282 }, { "epoch": 19.15251396648045, "grad_norm": 0.41971632838249207, "learning_rate": 4.294117647058823e-05, "loss": 0.4138, "step": 34283 }, { "epoch": 19.153072625698325, "grad_norm": 0.8614796996116638, "learning_rate": 4.2913165266106444e-05, "loss": 0.6349, "step": 34284 }, { "epoch": 19.1536312849162, "grad_norm": 0.4292737543582916, "learning_rate": 4.288515406162465e-05, "loss": 0.4658, "step": 34285 }, { "epoch": 19.154189944134078, "grad_norm": 0.726313054561615, "learning_rate": 4.2857142857142856e-05, "loss": 0.5186, "step": 34286 }, { "epoch": 19.154748603351955, "grad_norm": 0.5681427121162415, "learning_rate": 4.282913165266106e-05, "loss": 0.5047, "step": 34287 }, { "epoch": 19.15530726256983, "grad_norm": 0.39771389961242676, "learning_rate": 4.280112044817927e-05, "loss": 0.3963, "step": 34288 }, { "epoch": 19.155865921787708, "grad_norm": 0.3797587752342224, "learning_rate": 4.2773109243697474e-05, "loss": 0.4568, "step": 34289 }, { "epoch": 19.156424581005588, "grad_norm": 0.61158686876297, "learning_rate": 4.274509803921569e-05, "loss": 0.4238, "step": 34290 }, { "epoch": 19.156983240223465, "grad_norm": 2.9981637001037598, "learning_rate": 4.271708683473389e-05, "loss": 0.5333, "step": 34291 }, { "epoch": 19.15754189944134, "grad_norm": 0.923304557800293, "learning_rate": 4.26890756302521e-05, "loss": 0.5107, "step": 34292 }, { "epoch": 19.158100558659218, "grad_norm": 2.4630281925201416, "learning_rate": 4.2661064425770305e-05, "loss": 0.4966, "step": 34293 }, { "epoch": 19.158659217877094, "grad_norm": 0.43496495485305786, "learning_rate": 4.263305322128851e-05, "loss": 0.4827, "step": 34294 }, { "epoch": 19.15921787709497, "grad_norm": 1.0934908390045166, "learning_rate": 4.2605042016806724e-05, "loss": 0.3397, "step": 34295 }, { "epoch": 19.159776536312847, "grad_norm": 0.4419431984424591, "learning_rate": 4.257703081232493e-05, "loss": 0.5075, "step": 34296 }, { "epoch": 19.160335195530728, "grad_norm": 0.6630734801292419, "learning_rate": 4.2549019607843136e-05, "loss": 0.4103, "step": 34297 }, { "epoch": 19.160893854748604, "grad_norm": 0.7188488245010376, "learning_rate": 4.252100840336134e-05, "loss": 0.4085, "step": 34298 }, { "epoch": 19.16145251396648, "grad_norm": 1.0252022743225098, "learning_rate": 4.249299719887955e-05, "loss": 0.5157, "step": 34299 }, { "epoch": 19.162011173184357, "grad_norm": 1.0057063102722168, "learning_rate": 4.246498599439776e-05, "loss": 0.4006, "step": 34300 }, { "epoch": 19.162569832402234, "grad_norm": 1.642436146736145, "learning_rate": 4.2436974789915967e-05, "loss": 0.345, "step": 34301 }, { "epoch": 19.16312849162011, "grad_norm": 0.3450249135494232, "learning_rate": 4.240896358543417e-05, "loss": 0.3652, "step": 34302 }, { "epoch": 19.16368715083799, "grad_norm": 0.3725767135620117, "learning_rate": 4.238095238095238e-05, "loss": 0.4096, "step": 34303 }, { "epoch": 19.164245810055867, "grad_norm": 1.5294750928878784, "learning_rate": 4.2352941176470585e-05, "loss": 0.3936, "step": 34304 }, { "epoch": 19.164804469273744, "grad_norm": 0.3487869203090668, "learning_rate": 4.232492997198879e-05, "loss": 0.3021, "step": 34305 }, { "epoch": 19.16536312849162, "grad_norm": 1.7775439023971558, "learning_rate": 4.2296918767507e-05, "loss": 0.5655, "step": 34306 }, { "epoch": 19.165921787709497, "grad_norm": 0.40884989500045776, "learning_rate": 4.226890756302521e-05, "loss": 0.3999, "step": 34307 }, { "epoch": 19.166480446927373, "grad_norm": 0.6474719047546387, "learning_rate": 4.2240896358543415e-05, "loss": 0.5398, "step": 34308 }, { "epoch": 19.16703910614525, "grad_norm": 0.5420839786529541, "learning_rate": 4.221288515406162e-05, "loss": 0.4273, "step": 34309 }, { "epoch": 19.16759776536313, "grad_norm": 0.626081109046936, "learning_rate": 4.218487394957983e-05, "loss": 0.4102, "step": 34310 }, { "epoch": 19.168156424581007, "grad_norm": 0.5016055703163147, "learning_rate": 4.215686274509804e-05, "loss": 0.3793, "step": 34311 }, { "epoch": 19.168715083798883, "grad_norm": 0.5190970301628113, "learning_rate": 4.2128851540616246e-05, "loss": 0.4073, "step": 34312 }, { "epoch": 19.16927374301676, "grad_norm": 0.5294607281684875, "learning_rate": 4.210084033613445e-05, "loss": 0.3961, "step": 34313 }, { "epoch": 19.169832402234636, "grad_norm": 0.5505123138427734, "learning_rate": 4.207282913165266e-05, "loss": 0.3457, "step": 34314 }, { "epoch": 19.170391061452513, "grad_norm": 0.4130032956600189, "learning_rate": 4.2044817927170864e-05, "loss": 0.4335, "step": 34315 }, { "epoch": 19.17094972067039, "grad_norm": 0.44169241189956665, "learning_rate": 4.201680672268908e-05, "loss": 0.4047, "step": 34316 }, { "epoch": 19.17150837988827, "grad_norm": 0.38089585304260254, "learning_rate": 4.198879551820728e-05, "loss": 0.4338, "step": 34317 }, { "epoch": 19.172067039106146, "grad_norm": 5.8323655128479, "learning_rate": 4.196078431372549e-05, "loss": 0.4143, "step": 34318 }, { "epoch": 19.172625698324023, "grad_norm": 1.6275320053100586, "learning_rate": 4.1932773109243695e-05, "loss": 0.3966, "step": 34319 }, { "epoch": 19.1731843575419, "grad_norm": 0.5544790625572205, "learning_rate": 4.19047619047619e-05, "loss": 0.3933, "step": 34320 }, { "epoch": 19.173743016759776, "grad_norm": 0.3950328826904297, "learning_rate": 4.187675070028011e-05, "loss": 0.4861, "step": 34321 }, { "epoch": 19.174301675977652, "grad_norm": 0.4213440716266632, "learning_rate": 4.184873949579832e-05, "loss": 0.4476, "step": 34322 }, { "epoch": 19.174860335195532, "grad_norm": 0.5959659218788147, "learning_rate": 4.1820728291316526e-05, "loss": 0.3459, "step": 34323 }, { "epoch": 19.17541899441341, "grad_norm": 0.9817525148391724, "learning_rate": 4.179271708683473e-05, "loss": 0.442, "step": 34324 }, { "epoch": 19.175977653631286, "grad_norm": 0.4581740200519562, "learning_rate": 4.176470588235294e-05, "loss": 0.4052, "step": 34325 }, { "epoch": 19.176536312849162, "grad_norm": 0.36220991611480713, "learning_rate": 4.1736694677871144e-05, "loss": 0.3479, "step": 34326 }, { "epoch": 19.17709497206704, "grad_norm": 0.5001604557037354, "learning_rate": 4.170868347338936e-05, "loss": 0.4275, "step": 34327 }, { "epoch": 19.177653631284915, "grad_norm": 0.5874848365783691, "learning_rate": 4.168067226890757e-05, "loss": 0.4385, "step": 34328 }, { "epoch": 19.178212290502792, "grad_norm": 0.46055924892425537, "learning_rate": 4.1652661064425775e-05, "loss": 0.3793, "step": 34329 }, { "epoch": 19.178770949720672, "grad_norm": 0.4592307209968567, "learning_rate": 4.162464985994398e-05, "loss": 0.4431, "step": 34330 }, { "epoch": 19.17932960893855, "grad_norm": 0.49831610918045044, "learning_rate": 4.159663865546219e-05, "loss": 0.4617, "step": 34331 }, { "epoch": 19.179888268156425, "grad_norm": 0.4550251066684723, "learning_rate": 4.156862745098039e-05, "loss": 0.4514, "step": 34332 }, { "epoch": 19.1804469273743, "grad_norm": 1.1201833486557007, "learning_rate": 4.1540616246498606e-05, "loss": 0.423, "step": 34333 }, { "epoch": 19.18100558659218, "grad_norm": 0.752461850643158, "learning_rate": 4.151260504201681e-05, "loss": 0.4612, "step": 34334 }, { "epoch": 19.181564245810055, "grad_norm": 0.9002892374992371, "learning_rate": 4.148459383753502e-05, "loss": 0.3933, "step": 34335 }, { "epoch": 19.18212290502793, "grad_norm": 0.5792464017868042, "learning_rate": 4.1456582633053224e-05, "loss": 0.5658, "step": 34336 }, { "epoch": 19.18268156424581, "grad_norm": 0.38768136501312256, "learning_rate": 4.142857142857143e-05, "loss": 0.3858, "step": 34337 }, { "epoch": 19.183240223463688, "grad_norm": 0.36140042543411255, "learning_rate": 4.140056022408964e-05, "loss": 0.3579, "step": 34338 }, { "epoch": 19.183798882681565, "grad_norm": 1.685960292816162, "learning_rate": 4.137254901960785e-05, "loss": 0.512, "step": 34339 }, { "epoch": 19.18435754189944, "grad_norm": 0.35322847962379456, "learning_rate": 4.1344537815126055e-05, "loss": 0.4037, "step": 34340 }, { "epoch": 19.184916201117318, "grad_norm": 0.672893762588501, "learning_rate": 4.131652661064426e-05, "loss": 0.4224, "step": 34341 }, { "epoch": 19.185474860335194, "grad_norm": 0.5783244967460632, "learning_rate": 4.1288515406162467e-05, "loss": 0.4676, "step": 34342 }, { "epoch": 19.18603351955307, "grad_norm": 6.282768249511719, "learning_rate": 4.126050420168068e-05, "loss": 0.3382, "step": 34343 }, { "epoch": 19.18659217877095, "grad_norm": 0.3705224096775055, "learning_rate": 4.1232492997198885e-05, "loss": 0.3064, "step": 34344 }, { "epoch": 19.187150837988828, "grad_norm": 0.4761348366737366, "learning_rate": 4.120448179271709e-05, "loss": 0.4091, "step": 34345 }, { "epoch": 19.187709497206704, "grad_norm": 1.8969731330871582, "learning_rate": 4.11764705882353e-05, "loss": 0.3728, "step": 34346 }, { "epoch": 19.18826815642458, "grad_norm": 0.5123389959335327, "learning_rate": 4.11484593837535e-05, "loss": 0.4134, "step": 34347 }, { "epoch": 19.188826815642457, "grad_norm": 0.8245759606361389, "learning_rate": 4.112044817927171e-05, "loss": 0.5437, "step": 34348 }, { "epoch": 19.189385474860334, "grad_norm": 0.41834890842437744, "learning_rate": 4.109243697478992e-05, "loss": 0.3773, "step": 34349 }, { "epoch": 19.189944134078214, "grad_norm": 0.5005658268928528, "learning_rate": 4.106442577030813e-05, "loss": 0.338, "step": 34350 }, { "epoch": 19.19050279329609, "grad_norm": 1.4807153940200806, "learning_rate": 4.1036414565826334e-05, "loss": 0.5031, "step": 34351 }, { "epoch": 19.191061452513967, "grad_norm": 0.5110713839530945, "learning_rate": 4.100840336134454e-05, "loss": 0.3326, "step": 34352 }, { "epoch": 19.191620111731844, "grad_norm": 2.1174850463867188, "learning_rate": 4.0980392156862746e-05, "loss": 0.3338, "step": 34353 }, { "epoch": 19.19217877094972, "grad_norm": 0.8628860712051392, "learning_rate": 4.095238095238096e-05, "loss": 0.3338, "step": 34354 }, { "epoch": 19.192737430167597, "grad_norm": 0.3803478479385376, "learning_rate": 4.0924369747899165e-05, "loss": 0.3605, "step": 34355 }, { "epoch": 19.193296089385473, "grad_norm": 0.4083240330219269, "learning_rate": 4.089635854341737e-05, "loss": 0.3814, "step": 34356 }, { "epoch": 19.193854748603353, "grad_norm": 1.269036054611206, "learning_rate": 4.086834733893558e-05, "loss": 0.4311, "step": 34357 }, { "epoch": 19.19441340782123, "grad_norm": 0.5924358367919922, "learning_rate": 4.084033613445378e-05, "loss": 0.3645, "step": 34358 }, { "epoch": 19.194972067039107, "grad_norm": 0.6092901229858398, "learning_rate": 4.081232492997199e-05, "loss": 0.4134, "step": 34359 }, { "epoch": 19.195530726256983, "grad_norm": 0.5444338917732239, "learning_rate": 4.07843137254902e-05, "loss": 0.4707, "step": 34360 }, { "epoch": 19.19608938547486, "grad_norm": 0.5880433320999146, "learning_rate": 4.075630252100841e-05, "loss": 0.3255, "step": 34361 }, { "epoch": 19.196648044692736, "grad_norm": 0.3997005820274353, "learning_rate": 4.0728291316526614e-05, "loss": 0.443, "step": 34362 }, { "epoch": 19.197206703910613, "grad_norm": 0.4177122712135315, "learning_rate": 4.070028011204482e-05, "loss": 0.4718, "step": 34363 }, { "epoch": 19.197765363128493, "grad_norm": 0.25567135214805603, "learning_rate": 4.0672268907563026e-05, "loss": 0.3029, "step": 34364 }, { "epoch": 19.19832402234637, "grad_norm": 0.7818362712860107, "learning_rate": 4.064425770308124e-05, "loss": 0.4759, "step": 34365 }, { "epoch": 19.198882681564246, "grad_norm": 1.0677684545516968, "learning_rate": 4.0616246498599444e-05, "loss": 0.3505, "step": 34366 }, { "epoch": 19.199441340782123, "grad_norm": 0.4286250174045563, "learning_rate": 4.058823529411765e-05, "loss": 0.4432, "step": 34367 }, { "epoch": 19.2, "grad_norm": 0.4016687273979187, "learning_rate": 4.0560224089635856e-05, "loss": 0.3748, "step": 34368 }, { "epoch": 19.200558659217876, "grad_norm": 0.8131793141365051, "learning_rate": 4.053221288515406e-05, "loss": 0.3686, "step": 34369 }, { "epoch": 19.201117318435756, "grad_norm": 0.3354417681694031, "learning_rate": 4.0504201680672275e-05, "loss": 0.3442, "step": 34370 }, { "epoch": 19.201675977653633, "grad_norm": 3.6068902015686035, "learning_rate": 4.047619047619048e-05, "loss": 0.3728, "step": 34371 }, { "epoch": 19.20223463687151, "grad_norm": 0.5244073867797852, "learning_rate": 4.044817927170869e-05, "loss": 0.4244, "step": 34372 }, { "epoch": 19.202793296089386, "grad_norm": 0.38286206126213074, "learning_rate": 4.042016806722689e-05, "loss": 0.4356, "step": 34373 }, { "epoch": 19.203351955307262, "grad_norm": 1.1345473527908325, "learning_rate": 4.03921568627451e-05, "loss": 0.3763, "step": 34374 }, { "epoch": 19.20391061452514, "grad_norm": 0.6460477709770203, "learning_rate": 4.0364145658263305e-05, "loss": 0.4427, "step": 34375 }, { "epoch": 19.204469273743015, "grad_norm": 0.5855938792228699, "learning_rate": 4.033613445378152e-05, "loss": 0.3745, "step": 34376 }, { "epoch": 19.205027932960895, "grad_norm": 0.47824084758758545, "learning_rate": 4.0308123249299724e-05, "loss": 0.3633, "step": 34377 }, { "epoch": 19.205586592178772, "grad_norm": 0.4581983983516693, "learning_rate": 4.028011204481793e-05, "loss": 0.4907, "step": 34378 }, { "epoch": 19.20614525139665, "grad_norm": 0.9827057719230652, "learning_rate": 4.0252100840336136e-05, "loss": 0.5121, "step": 34379 }, { "epoch": 19.206703910614525, "grad_norm": 0.5318775177001953, "learning_rate": 4.022408963585434e-05, "loss": 0.3904, "step": 34380 }, { "epoch": 19.2072625698324, "grad_norm": 0.31430310010910034, "learning_rate": 4.0196078431372555e-05, "loss": 0.3172, "step": 34381 }, { "epoch": 19.20782122905028, "grad_norm": 0.44189882278442383, "learning_rate": 4.016806722689076e-05, "loss": 0.4017, "step": 34382 }, { "epoch": 19.208379888268155, "grad_norm": 0.6885204315185547, "learning_rate": 4.0140056022408967e-05, "loss": 0.4175, "step": 34383 }, { "epoch": 19.208938547486035, "grad_norm": 0.4150227904319763, "learning_rate": 4.011204481792717e-05, "loss": 0.471, "step": 34384 }, { "epoch": 19.20949720670391, "grad_norm": 4.07744026184082, "learning_rate": 4.008403361344538e-05, "loss": 0.4057, "step": 34385 }, { "epoch": 19.210055865921788, "grad_norm": 0.3927232027053833, "learning_rate": 4.005602240896359e-05, "loss": 0.3981, "step": 34386 }, { "epoch": 19.210614525139665, "grad_norm": 0.4456595778465271, "learning_rate": 4.00280112044818e-05, "loss": 0.4037, "step": 34387 }, { "epoch": 19.21117318435754, "grad_norm": 0.44463491439819336, "learning_rate": 4e-05, "loss": 0.4162, "step": 34388 }, { "epoch": 19.211731843575418, "grad_norm": 0.5158103108406067, "learning_rate": 3.997198879551821e-05, "loss": 0.4227, "step": 34389 }, { "epoch": 19.212290502793294, "grad_norm": 0.4529596269130707, "learning_rate": 3.9943977591036415e-05, "loss": 0.5055, "step": 34390 }, { "epoch": 19.212849162011175, "grad_norm": 0.4801010191440582, "learning_rate": 3.991596638655462e-05, "loss": 0.2862, "step": 34391 }, { "epoch": 19.21340782122905, "grad_norm": 0.37588655948638916, "learning_rate": 3.9887955182072834e-05, "loss": 0.3313, "step": 34392 }, { "epoch": 19.213966480446928, "grad_norm": 0.5175050497055054, "learning_rate": 3.985994397759104e-05, "loss": 0.3854, "step": 34393 }, { "epoch": 19.214525139664804, "grad_norm": 0.39390328526496887, "learning_rate": 3.9831932773109246e-05, "loss": 0.3752, "step": 34394 }, { "epoch": 19.21508379888268, "grad_norm": 0.34324905276298523, "learning_rate": 3.980392156862745e-05, "loss": 0.3163, "step": 34395 }, { "epoch": 19.215642458100557, "grad_norm": 0.3557980954647064, "learning_rate": 3.977591036414566e-05, "loss": 0.4621, "step": 34396 }, { "epoch": 19.216201117318437, "grad_norm": 0.3141343593597412, "learning_rate": 3.974789915966387e-05, "loss": 0.3152, "step": 34397 }, { "epoch": 19.216759776536314, "grad_norm": 0.36761367321014404, "learning_rate": 3.971988795518208e-05, "loss": 0.3269, "step": 34398 }, { "epoch": 19.21731843575419, "grad_norm": 0.9165186285972595, "learning_rate": 3.969187675070028e-05, "loss": 0.3777, "step": 34399 }, { "epoch": 19.217877094972067, "grad_norm": 9.963189125061035, "learning_rate": 3.966386554621849e-05, "loss": 0.4152, "step": 34400 }, { "epoch": 19.218435754189944, "grad_norm": 4.866195201873779, "learning_rate": 3.9635854341736695e-05, "loss": 0.6291, "step": 34401 }, { "epoch": 19.21899441340782, "grad_norm": 0.48058149218559265, "learning_rate": 3.96078431372549e-05, "loss": 0.339, "step": 34402 }, { "epoch": 19.219553072625697, "grad_norm": 0.41713157296180725, "learning_rate": 3.9579831932773114e-05, "loss": 0.4617, "step": 34403 }, { "epoch": 19.220111731843577, "grad_norm": 0.3184274733066559, "learning_rate": 3.955182072829132e-05, "loss": 0.3874, "step": 34404 }, { "epoch": 19.220670391061454, "grad_norm": 0.4267182946205139, "learning_rate": 3.9523809523809526e-05, "loss": 0.3292, "step": 34405 }, { "epoch": 19.22122905027933, "grad_norm": 0.3267779052257538, "learning_rate": 3.949579831932773e-05, "loss": 0.2746, "step": 34406 }, { "epoch": 19.221787709497207, "grad_norm": 0.8905510306358337, "learning_rate": 3.946778711484594e-05, "loss": 0.4621, "step": 34407 }, { "epoch": 19.222346368715083, "grad_norm": 3.1319580078125, "learning_rate": 3.943977591036415e-05, "loss": 0.3476, "step": 34408 }, { "epoch": 19.22290502793296, "grad_norm": 0.4703458547592163, "learning_rate": 3.9411764705882356e-05, "loss": 0.3801, "step": 34409 }, { "epoch": 19.223463687150836, "grad_norm": 0.3702363073825836, "learning_rate": 3.938375350140056e-05, "loss": 0.3417, "step": 34410 }, { "epoch": 19.224022346368717, "grad_norm": 0.5029980540275574, "learning_rate": 3.935574229691877e-05, "loss": 0.4235, "step": 34411 }, { "epoch": 19.224581005586593, "grad_norm": 0.42715221643447876, "learning_rate": 3.9327731092436974e-05, "loss": 0.4443, "step": 34412 }, { "epoch": 19.22513966480447, "grad_norm": 0.8617134094238281, "learning_rate": 3.929971988795519e-05, "loss": 0.4453, "step": 34413 }, { "epoch": 19.225698324022346, "grad_norm": 0.338504433631897, "learning_rate": 3.927170868347339e-05, "loss": 0.3728, "step": 34414 }, { "epoch": 19.226256983240223, "grad_norm": 0.643187403678894, "learning_rate": 3.92436974789916e-05, "loss": 0.6016, "step": 34415 }, { "epoch": 19.2268156424581, "grad_norm": 0.35219722986221313, "learning_rate": 3.9215686274509805e-05, "loss": 0.2796, "step": 34416 }, { "epoch": 19.227374301675976, "grad_norm": 0.8584226369857788, "learning_rate": 3.918767507002801e-05, "loss": 0.3179, "step": 34417 }, { "epoch": 19.227932960893856, "grad_norm": 0.4579952359199524, "learning_rate": 3.915966386554622e-05, "loss": 0.4259, "step": 34418 }, { "epoch": 19.228491620111733, "grad_norm": 0.4399353861808777, "learning_rate": 3.913165266106443e-05, "loss": 0.3223, "step": 34419 }, { "epoch": 19.22905027932961, "grad_norm": 0.5349218845367432, "learning_rate": 3.9103641456582636e-05, "loss": 0.4382, "step": 34420 }, { "epoch": 19.229608938547486, "grad_norm": 0.4383047819137573, "learning_rate": 3.907563025210084e-05, "loss": 0.3807, "step": 34421 }, { "epoch": 19.230167597765362, "grad_norm": 0.3751812279224396, "learning_rate": 3.904761904761905e-05, "loss": 0.3856, "step": 34422 }, { "epoch": 19.23072625698324, "grad_norm": 0.6546064019203186, "learning_rate": 3.9019607843137254e-05, "loss": 0.4378, "step": 34423 }, { "epoch": 19.23128491620112, "grad_norm": 0.4113582372665405, "learning_rate": 3.8991596638655467e-05, "loss": 0.3533, "step": 34424 }, { "epoch": 19.231843575418996, "grad_norm": 0.31873294711112976, "learning_rate": 3.896358543417367e-05, "loss": 0.3057, "step": 34425 }, { "epoch": 19.232402234636872, "grad_norm": 2.620142936706543, "learning_rate": 3.893557422969188e-05, "loss": 0.3723, "step": 34426 }, { "epoch": 19.23296089385475, "grad_norm": 0.493437796831131, "learning_rate": 3.8907563025210084e-05, "loss": 0.3218, "step": 34427 }, { "epoch": 19.233519553072625, "grad_norm": 0.4244549870491028, "learning_rate": 3.887955182072829e-05, "loss": 0.4358, "step": 34428 }, { "epoch": 19.234078212290502, "grad_norm": 0.4533943831920624, "learning_rate": 3.8851540616246496e-05, "loss": 0.4536, "step": 34429 }, { "epoch": 19.23463687150838, "grad_norm": 0.4940558969974518, "learning_rate": 3.882352941176471e-05, "loss": 0.418, "step": 34430 }, { "epoch": 19.23519553072626, "grad_norm": 1.029957890510559, "learning_rate": 3.8795518207282915e-05, "loss": 0.3927, "step": 34431 }, { "epoch": 19.235754189944135, "grad_norm": 0.5873790979385376, "learning_rate": 3.876750700280112e-05, "loss": 0.4747, "step": 34432 }, { "epoch": 19.23631284916201, "grad_norm": 2.361293077468872, "learning_rate": 3.873949579831933e-05, "loss": 0.3537, "step": 34433 }, { "epoch": 19.23687150837989, "grad_norm": 2.3189992904663086, "learning_rate": 3.871148459383753e-05, "loss": 0.3931, "step": 34434 }, { "epoch": 19.237430167597765, "grad_norm": 0.4077621400356293, "learning_rate": 3.8683473389355746e-05, "loss": 0.3609, "step": 34435 }, { "epoch": 19.23798882681564, "grad_norm": 1.0960241556167603, "learning_rate": 3.865546218487395e-05, "loss": 0.4345, "step": 34436 }, { "epoch": 19.238547486033518, "grad_norm": 0.38314005732536316, "learning_rate": 3.862745098039216e-05, "loss": 0.4599, "step": 34437 }, { "epoch": 19.239106145251398, "grad_norm": 0.7055360674858093, "learning_rate": 3.8599439775910364e-05, "loss": 0.4675, "step": 34438 }, { "epoch": 19.239664804469275, "grad_norm": 0.6184757947921753, "learning_rate": 3.857142857142857e-05, "loss": 0.3714, "step": 34439 }, { "epoch": 19.24022346368715, "grad_norm": 0.611702024936676, "learning_rate": 3.854341736694678e-05, "loss": 0.3398, "step": 34440 }, { "epoch": 19.240782122905028, "grad_norm": 0.37282654643058777, "learning_rate": 3.851540616246499e-05, "loss": 0.412, "step": 34441 }, { "epoch": 19.241340782122904, "grad_norm": 0.5059308409690857, "learning_rate": 3.8487394957983195e-05, "loss": 0.4693, "step": 34442 }, { "epoch": 19.24189944134078, "grad_norm": 0.5726853609085083, "learning_rate": 3.84593837535014e-05, "loss": 0.393, "step": 34443 }, { "epoch": 19.242458100558657, "grad_norm": 0.6580373644828796, "learning_rate": 3.843137254901961e-05, "loss": 0.4918, "step": 34444 }, { "epoch": 19.243016759776538, "grad_norm": 0.3653848171234131, "learning_rate": 3.840336134453781e-05, "loss": 0.3033, "step": 34445 }, { "epoch": 19.243575418994414, "grad_norm": 0.5073138475418091, "learning_rate": 3.8375350140056026e-05, "loss": 0.4398, "step": 34446 }, { "epoch": 19.24413407821229, "grad_norm": 0.4576285183429718, "learning_rate": 3.834733893557423e-05, "loss": 0.4018, "step": 34447 }, { "epoch": 19.244692737430167, "grad_norm": 0.4962644875049591, "learning_rate": 3.831932773109244e-05, "loss": 0.3701, "step": 34448 }, { "epoch": 19.245251396648044, "grad_norm": 0.3840586841106415, "learning_rate": 3.8291316526610643e-05, "loss": 0.3993, "step": 34449 }, { "epoch": 19.24581005586592, "grad_norm": 0.3287234306335449, "learning_rate": 3.826330532212885e-05, "loss": 0.3254, "step": 34450 }, { "epoch": 19.2463687150838, "grad_norm": 0.37090054154396057, "learning_rate": 3.823529411764706e-05, "loss": 0.2896, "step": 34451 }, { "epoch": 19.246927374301677, "grad_norm": 0.7580893635749817, "learning_rate": 3.820728291316527e-05, "loss": 0.3594, "step": 34452 }, { "epoch": 19.247486033519554, "grad_norm": 0.679252028465271, "learning_rate": 3.8179271708683474e-05, "loss": 0.3687, "step": 34453 }, { "epoch": 19.24804469273743, "grad_norm": 0.6639065146446228, "learning_rate": 3.815126050420168e-05, "loss": 0.2488, "step": 34454 }, { "epoch": 19.248603351955307, "grad_norm": 1.4326963424682617, "learning_rate": 3.8123249299719886e-05, "loss": 0.3029, "step": 34455 }, { "epoch": 19.249162011173183, "grad_norm": 0.39584365487098694, "learning_rate": 3.80952380952381e-05, "loss": 0.4003, "step": 34456 }, { "epoch": 19.24972067039106, "grad_norm": 0.36156165599823, "learning_rate": 3.8067226890756305e-05, "loss": 0.248, "step": 34457 }, { "epoch": 19.25027932960894, "grad_norm": 3.6178572177886963, "learning_rate": 3.803921568627451e-05, "loss": 0.3861, "step": 34458 }, { "epoch": 19.250837988826817, "grad_norm": 0.5734772086143494, "learning_rate": 3.801120448179272e-05, "loss": 0.3264, "step": 34459 }, { "epoch": 19.251396648044693, "grad_norm": 0.5030843019485474, "learning_rate": 3.798319327731092e-05, "loss": 0.485, "step": 34460 }, { "epoch": 19.25195530726257, "grad_norm": 0.5218889713287354, "learning_rate": 3.795518207282913e-05, "loss": 0.3418, "step": 34461 }, { "epoch": 19.252513966480446, "grad_norm": 0.3216029107570648, "learning_rate": 3.792717086834734e-05, "loss": 0.2891, "step": 34462 }, { "epoch": 19.253072625698323, "grad_norm": 0.6246047019958496, "learning_rate": 3.789915966386555e-05, "loss": 0.436, "step": 34463 }, { "epoch": 19.2536312849162, "grad_norm": 0.6183018684387207, "learning_rate": 3.7871148459383754e-05, "loss": 0.4096, "step": 34464 }, { "epoch": 19.25418994413408, "grad_norm": 0.406149685382843, "learning_rate": 3.784313725490196e-05, "loss": 0.4364, "step": 34465 }, { "epoch": 19.254748603351956, "grad_norm": 0.5009737014770508, "learning_rate": 3.7815126050420166e-05, "loss": 0.4631, "step": 34466 }, { "epoch": 19.255307262569833, "grad_norm": 0.803261399269104, "learning_rate": 3.778711484593838e-05, "loss": 0.4859, "step": 34467 }, { "epoch": 19.25586592178771, "grad_norm": 0.40794411301612854, "learning_rate": 3.7759103641456584e-05, "loss": 0.3855, "step": 34468 }, { "epoch": 19.256424581005586, "grad_norm": 0.42702820897102356, "learning_rate": 3.773109243697479e-05, "loss": 0.3593, "step": 34469 }, { "epoch": 19.256983240223462, "grad_norm": 0.43986064195632935, "learning_rate": 3.7703081232492996e-05, "loss": 0.4934, "step": 34470 }, { "epoch": 19.257541899441343, "grad_norm": 0.46019378304481506, "learning_rate": 3.76750700280112e-05, "loss": 0.4586, "step": 34471 }, { "epoch": 19.25810055865922, "grad_norm": 0.5878046751022339, "learning_rate": 3.764705882352941e-05, "loss": 0.5377, "step": 34472 }, { "epoch": 19.258659217877096, "grad_norm": 0.4518672525882721, "learning_rate": 3.761904761904762e-05, "loss": 0.4622, "step": 34473 }, { "epoch": 19.259217877094972, "grad_norm": 0.3877251446247101, "learning_rate": 3.759103641456583e-05, "loss": 0.4393, "step": 34474 }, { "epoch": 19.25977653631285, "grad_norm": 0.33077797293663025, "learning_rate": 3.756302521008403e-05, "loss": 0.4212, "step": 34475 }, { "epoch": 19.260335195530725, "grad_norm": 3.9410672187805176, "learning_rate": 3.753501400560224e-05, "loss": 0.3889, "step": 34476 }, { "epoch": 19.260893854748602, "grad_norm": 0.3530593514442444, "learning_rate": 3.7507002801120445e-05, "loss": 0.4301, "step": 34477 }, { "epoch": 19.261452513966482, "grad_norm": 0.29086387157440186, "learning_rate": 3.747899159663866e-05, "loss": 0.3044, "step": 34478 }, { "epoch": 19.26201117318436, "grad_norm": 0.5265844464302063, "learning_rate": 3.7450980392156864e-05, "loss": 0.3984, "step": 34479 }, { "epoch": 19.262569832402235, "grad_norm": 1.8132120370864868, "learning_rate": 3.742296918767507e-05, "loss": 0.5091, "step": 34480 }, { "epoch": 19.26312849162011, "grad_norm": 0.6871200799942017, "learning_rate": 3.7394957983193276e-05, "loss": 0.4322, "step": 34481 }, { "epoch": 19.26368715083799, "grad_norm": 4.2410712242126465, "learning_rate": 3.736694677871148e-05, "loss": 0.3524, "step": 34482 }, { "epoch": 19.264245810055865, "grad_norm": 0.3089617192745209, "learning_rate": 3.7338935574229695e-05, "loss": 0.3252, "step": 34483 }, { "epoch": 19.26480446927374, "grad_norm": 0.5155884027481079, "learning_rate": 3.73109243697479e-05, "loss": 0.5773, "step": 34484 }, { "epoch": 19.26536312849162, "grad_norm": 0.459256112575531, "learning_rate": 3.728291316526611e-05, "loss": 0.4601, "step": 34485 }, { "epoch": 19.265921787709498, "grad_norm": 0.5264071226119995, "learning_rate": 3.725490196078431e-05, "loss": 0.4621, "step": 34486 }, { "epoch": 19.266480446927375, "grad_norm": 0.2996077239513397, "learning_rate": 3.722689075630252e-05, "loss": 0.2895, "step": 34487 }, { "epoch": 19.26703910614525, "grad_norm": 0.5657638907432556, "learning_rate": 3.7198879551820725e-05, "loss": 0.3957, "step": 34488 }, { "epoch": 19.267597765363128, "grad_norm": 0.4529856741428375, "learning_rate": 3.717086834733894e-05, "loss": 0.3525, "step": 34489 }, { "epoch": 19.268156424581004, "grad_norm": 0.43867769837379456, "learning_rate": 3.7142857142857143e-05, "loss": 0.4109, "step": 34490 }, { "epoch": 19.26871508379888, "grad_norm": 2.437505006790161, "learning_rate": 3.711484593837535e-05, "loss": 0.3627, "step": 34491 }, { "epoch": 19.26927374301676, "grad_norm": 0.4841361343860626, "learning_rate": 3.7086834733893555e-05, "loss": 0.3623, "step": 34492 }, { "epoch": 19.269832402234638, "grad_norm": 0.5400592088699341, "learning_rate": 3.705882352941176e-05, "loss": 0.3654, "step": 34493 }, { "epoch": 19.270391061452514, "grad_norm": 1.1143990755081177, "learning_rate": 3.7030812324929974e-05, "loss": 0.4004, "step": 34494 }, { "epoch": 19.27094972067039, "grad_norm": 0.37151867151260376, "learning_rate": 3.700280112044818e-05, "loss": 0.4146, "step": 34495 }, { "epoch": 19.271508379888267, "grad_norm": 0.320733904838562, "learning_rate": 3.6974789915966386e-05, "loss": 0.2426, "step": 34496 }, { "epoch": 19.272067039106144, "grad_norm": 0.4429011344909668, "learning_rate": 3.694677871148459e-05, "loss": 0.3922, "step": 34497 }, { "epoch": 19.272625698324024, "grad_norm": 3.2030532360076904, "learning_rate": 3.69187675070028e-05, "loss": 0.446, "step": 34498 }, { "epoch": 19.2731843575419, "grad_norm": 0.5493384599685669, "learning_rate": 3.6890756302521004e-05, "loss": 0.3944, "step": 34499 }, { "epoch": 19.273743016759777, "grad_norm": 0.5305752158164978, "learning_rate": 3.686274509803922e-05, "loss": 0.3661, "step": 34500 }, { "epoch": 19.273743016759777, "eval_cer": 0.08440531133732665, "eval_loss": 0.31948575377464294, "eval_runtime": 58.7391, "eval_samples_per_second": 77.257, "eval_steps_per_second": 4.835, "eval_wer": 0.3343482025641742, "step": 34500 }, { "epoch": 19.274301675977654, "grad_norm": 0.43951746821403503, "learning_rate": 3.683473389355742e-05, "loss": 0.4555, "step": 34501 }, { "epoch": 19.27486033519553, "grad_norm": 0.5666416883468628, "learning_rate": 3.680672268907563e-05, "loss": 0.4083, "step": 34502 }, { "epoch": 19.275418994413407, "grad_norm": 0.3738899827003479, "learning_rate": 3.6778711484593835e-05, "loss": 0.3931, "step": 34503 }, { "epoch": 19.275977653631283, "grad_norm": 0.8670925498008728, "learning_rate": 3.675070028011204e-05, "loss": 0.3392, "step": 34504 }, { "epoch": 19.276536312849164, "grad_norm": 0.3728310465812683, "learning_rate": 3.6722689075630254e-05, "loss": 0.4187, "step": 34505 }, { "epoch": 19.27709497206704, "grad_norm": 0.6954748034477234, "learning_rate": 3.669467787114846e-05, "loss": 0.4187, "step": 34506 }, { "epoch": 19.277653631284917, "grad_norm": 0.4802590012550354, "learning_rate": 3.6666666666666666e-05, "loss": 0.3605, "step": 34507 }, { "epoch": 19.278212290502793, "grad_norm": 0.5010871887207031, "learning_rate": 3.663865546218487e-05, "loss": 0.449, "step": 34508 }, { "epoch": 19.27877094972067, "grad_norm": 0.9805195331573486, "learning_rate": 3.661064425770308e-05, "loss": 0.3888, "step": 34509 }, { "epoch": 19.279329608938546, "grad_norm": 0.35273832082748413, "learning_rate": 3.658263305322129e-05, "loss": 0.322, "step": 34510 }, { "epoch": 19.279888268156423, "grad_norm": 0.7681180834770203, "learning_rate": 3.6554621848739496e-05, "loss": 0.4689, "step": 34511 }, { "epoch": 19.280446927374303, "grad_norm": 2.174262046813965, "learning_rate": 3.65266106442577e-05, "loss": 0.4476, "step": 34512 }, { "epoch": 19.28100558659218, "grad_norm": 0.5720050930976868, "learning_rate": 3.649859943977591e-05, "loss": 0.3996, "step": 34513 }, { "epoch": 19.281564245810056, "grad_norm": 0.4021025598049164, "learning_rate": 3.6470588235294114e-05, "loss": 0.4229, "step": 34514 }, { "epoch": 19.282122905027933, "grad_norm": 0.37996503710746765, "learning_rate": 3.644257703081232e-05, "loss": 0.3358, "step": 34515 }, { "epoch": 19.28268156424581, "grad_norm": 0.36724987626075745, "learning_rate": 3.641456582633053e-05, "loss": 0.3909, "step": 34516 }, { "epoch": 19.283240223463686, "grad_norm": 0.3688671886920929, "learning_rate": 3.638655462184874e-05, "loss": 0.3492, "step": 34517 }, { "epoch": 19.283798882681563, "grad_norm": 0.7051141262054443, "learning_rate": 3.6358543417366945e-05, "loss": 0.7063, "step": 34518 }, { "epoch": 19.284357541899443, "grad_norm": 0.47929301857948303, "learning_rate": 3.633053221288515e-05, "loss": 0.3617, "step": 34519 }, { "epoch": 19.28491620111732, "grad_norm": 0.49092692136764526, "learning_rate": 3.630252100840336e-05, "loss": 0.3148, "step": 34520 }, { "epoch": 19.285474860335196, "grad_norm": 0.6783444285392761, "learning_rate": 3.627450980392157e-05, "loss": 0.583, "step": 34521 }, { "epoch": 19.286033519553072, "grad_norm": 0.447298139333725, "learning_rate": 3.6246498599439776e-05, "loss": 0.4116, "step": 34522 }, { "epoch": 19.28659217877095, "grad_norm": 2.8305201530456543, "learning_rate": 3.621848739495798e-05, "loss": 0.3925, "step": 34523 }, { "epoch": 19.287150837988825, "grad_norm": 0.45870012044906616, "learning_rate": 3.619047619047619e-05, "loss": 0.3785, "step": 34524 }, { "epoch": 19.287709497206706, "grad_norm": 0.41788142919540405, "learning_rate": 3.6162464985994394e-05, "loss": 0.3599, "step": 34525 }, { "epoch": 19.288268156424582, "grad_norm": 0.4122820794582367, "learning_rate": 3.613445378151261e-05, "loss": 0.4336, "step": 34526 }, { "epoch": 19.28882681564246, "grad_norm": 0.709364116191864, "learning_rate": 3.610644257703081e-05, "loss": 0.3704, "step": 34527 }, { "epoch": 19.289385474860335, "grad_norm": 0.37942859530448914, "learning_rate": 3.607843137254902e-05, "loss": 0.4096, "step": 34528 }, { "epoch": 19.289944134078212, "grad_norm": 0.6213984489440918, "learning_rate": 3.6050420168067225e-05, "loss": 0.4096, "step": 34529 }, { "epoch": 19.29050279329609, "grad_norm": 0.4581393599510193, "learning_rate": 3.602240896358543e-05, "loss": 0.4387, "step": 34530 }, { "epoch": 19.291061452513965, "grad_norm": 0.41568735241889954, "learning_rate": 3.599439775910364e-05, "loss": 0.4137, "step": 34531 }, { "epoch": 19.291620111731845, "grad_norm": 0.3643159866333008, "learning_rate": 3.596638655462185e-05, "loss": 0.4055, "step": 34532 }, { "epoch": 19.29217877094972, "grad_norm": 1.5516700744628906, "learning_rate": 3.5938375350140055e-05, "loss": 0.5469, "step": 34533 }, { "epoch": 19.2927374301676, "grad_norm": 0.3524828255176544, "learning_rate": 3.591036414565826e-05, "loss": 0.3719, "step": 34534 }, { "epoch": 19.293296089385475, "grad_norm": 0.5035155415534973, "learning_rate": 3.588235294117647e-05, "loss": 0.4485, "step": 34535 }, { "epoch": 19.29385474860335, "grad_norm": 0.6188202500343323, "learning_rate": 3.5854341736694673e-05, "loss": 0.4273, "step": 34536 }, { "epoch": 19.294413407821228, "grad_norm": 0.4884275496006012, "learning_rate": 3.5826330532212886e-05, "loss": 0.3879, "step": 34537 }, { "epoch": 19.294972067039105, "grad_norm": 0.5118287205696106, "learning_rate": 3.579831932773109e-05, "loss": 0.3497, "step": 34538 }, { "epoch": 19.295530726256985, "grad_norm": 1.9995648860931396, "learning_rate": 3.57703081232493e-05, "loss": 0.2845, "step": 34539 }, { "epoch": 19.29608938547486, "grad_norm": 0.8530018925666809, "learning_rate": 3.5742296918767504e-05, "loss": 0.4404, "step": 34540 }, { "epoch": 19.296648044692738, "grad_norm": 0.7184450626373291, "learning_rate": 3.571428571428571e-05, "loss": 0.3281, "step": 34541 }, { "epoch": 19.297206703910614, "grad_norm": 0.36259332299232483, "learning_rate": 3.5686274509803916e-05, "loss": 0.4072, "step": 34542 }, { "epoch": 19.29776536312849, "grad_norm": 1.0099579095840454, "learning_rate": 3.565826330532213e-05, "loss": 0.5971, "step": 34543 }, { "epoch": 19.298324022346367, "grad_norm": 0.3382643163204193, "learning_rate": 3.5630252100840335e-05, "loss": 0.3694, "step": 34544 }, { "epoch": 19.298882681564244, "grad_norm": 0.5092337727546692, "learning_rate": 3.560224089635854e-05, "loss": 0.4617, "step": 34545 }, { "epoch": 19.299441340782124, "grad_norm": 0.46436750888824463, "learning_rate": 3.557422969187675e-05, "loss": 0.4547, "step": 34546 }, { "epoch": 19.3, "grad_norm": 1.312204360961914, "learning_rate": 3.554621848739495e-05, "loss": 0.3458, "step": 34547 }, { "epoch": 19.300558659217877, "grad_norm": 2.6606457233428955, "learning_rate": 3.5518207282913166e-05, "loss": 0.5411, "step": 34548 }, { "epoch": 19.301117318435754, "grad_norm": 0.3522096276283264, "learning_rate": 3.549019607843137e-05, "loss": 0.3317, "step": 34549 }, { "epoch": 19.30167597765363, "grad_norm": 1.109816074371338, "learning_rate": 3.546218487394958e-05, "loss": 0.4761, "step": 34550 }, { "epoch": 19.302234636871507, "grad_norm": 0.5125716924667358, "learning_rate": 3.5434173669467784e-05, "loss": 0.4579, "step": 34551 }, { "epoch": 19.302793296089387, "grad_norm": 0.4348616302013397, "learning_rate": 3.540616246498599e-05, "loss": 0.3682, "step": 34552 }, { "epoch": 19.303351955307264, "grad_norm": 0.5202425122261047, "learning_rate": 3.53781512605042e-05, "loss": 0.3406, "step": 34553 }, { "epoch": 19.30391061452514, "grad_norm": 0.32495445013046265, "learning_rate": 3.535014005602241e-05, "loss": 0.3373, "step": 34554 }, { "epoch": 19.304469273743017, "grad_norm": 0.4260399341583252, "learning_rate": 3.5322128851540614e-05, "loss": 0.397, "step": 34555 }, { "epoch": 19.305027932960893, "grad_norm": 0.6243739128112793, "learning_rate": 3.529411764705882e-05, "loss": 0.3791, "step": 34556 }, { "epoch": 19.30558659217877, "grad_norm": 0.881285548210144, "learning_rate": 3.5266106442577026e-05, "loss": 0.4649, "step": 34557 }, { "epoch": 19.306145251396647, "grad_norm": 0.35440558195114136, "learning_rate": 3.523809523809523e-05, "loss": 0.331, "step": 34558 }, { "epoch": 19.306703910614527, "grad_norm": 0.31542378664016724, "learning_rate": 3.521008403361345e-05, "loss": 0.3808, "step": 34559 }, { "epoch": 19.307262569832403, "grad_norm": 0.5489582419395447, "learning_rate": 3.518207282913166e-05, "loss": 0.4356, "step": 34560 }, { "epoch": 19.30782122905028, "grad_norm": 0.41696399450302124, "learning_rate": 3.5154061624649864e-05, "loss": 0.3757, "step": 34561 }, { "epoch": 19.308379888268156, "grad_norm": 0.39428970217704773, "learning_rate": 3.512605042016807e-05, "loss": 0.3902, "step": 34562 }, { "epoch": 19.308938547486033, "grad_norm": 1.334436058998108, "learning_rate": 3.5098039215686276e-05, "loss": 0.4034, "step": 34563 }, { "epoch": 19.30949720670391, "grad_norm": 0.6640145182609558, "learning_rate": 3.507002801120449e-05, "loss": 0.2782, "step": 34564 }, { "epoch": 19.310055865921786, "grad_norm": 0.45179006457328796, "learning_rate": 3.5042016806722695e-05, "loss": 0.423, "step": 34565 }, { "epoch": 19.310614525139666, "grad_norm": 0.6381816267967224, "learning_rate": 3.50140056022409e-05, "loss": 0.3782, "step": 34566 }, { "epoch": 19.311173184357543, "grad_norm": 0.40780389308929443, "learning_rate": 3.498599439775911e-05, "loss": 0.4524, "step": 34567 }, { "epoch": 19.31173184357542, "grad_norm": 0.4660477340221405, "learning_rate": 3.495798319327731e-05, "loss": 0.4591, "step": 34568 }, { "epoch": 19.312290502793296, "grad_norm": 0.49867674708366394, "learning_rate": 3.492997198879552e-05, "loss": 0.4471, "step": 34569 }, { "epoch": 19.312849162011172, "grad_norm": 0.46156176924705505, "learning_rate": 3.490196078431373e-05, "loss": 0.3721, "step": 34570 }, { "epoch": 19.31340782122905, "grad_norm": 0.46882903575897217, "learning_rate": 3.487394957983194e-05, "loss": 0.4086, "step": 34571 }, { "epoch": 19.31396648044693, "grad_norm": 0.4375666379928589, "learning_rate": 3.4845938375350143e-05, "loss": 0.3814, "step": 34572 }, { "epoch": 19.314525139664806, "grad_norm": 1.0045313835144043, "learning_rate": 3.481792717086835e-05, "loss": 0.35, "step": 34573 }, { "epoch": 19.315083798882682, "grad_norm": 0.5981054306030273, "learning_rate": 3.4789915966386555e-05, "loss": 0.4691, "step": 34574 }, { "epoch": 19.31564245810056, "grad_norm": 0.5679623484611511, "learning_rate": 3.476190476190477e-05, "loss": 0.3854, "step": 34575 }, { "epoch": 19.316201117318435, "grad_norm": 0.5705524682998657, "learning_rate": 3.4733893557422974e-05, "loss": 0.3607, "step": 34576 }, { "epoch": 19.316759776536312, "grad_norm": 0.32652977108955383, "learning_rate": 3.470588235294118e-05, "loss": 0.394, "step": 34577 }, { "epoch": 19.31731843575419, "grad_norm": 2.34604549407959, "learning_rate": 3.4677871148459386e-05, "loss": 0.4035, "step": 34578 }, { "epoch": 19.31787709497207, "grad_norm": 0.5323050022125244, "learning_rate": 3.464985994397759e-05, "loss": 0.4212, "step": 34579 }, { "epoch": 19.318435754189945, "grad_norm": 0.4590151309967041, "learning_rate": 3.4621848739495805e-05, "loss": 0.4329, "step": 34580 }, { "epoch": 19.31899441340782, "grad_norm": 0.5562870502471924, "learning_rate": 3.459383753501401e-05, "loss": 0.4241, "step": 34581 }, { "epoch": 19.3195530726257, "grad_norm": 0.4686630368232727, "learning_rate": 3.456582633053222e-05, "loss": 0.4644, "step": 34582 }, { "epoch": 19.320111731843575, "grad_norm": 0.3051053583621979, "learning_rate": 3.453781512605042e-05, "loss": 0.3301, "step": 34583 }, { "epoch": 19.32067039106145, "grad_norm": 0.31965896487236023, "learning_rate": 3.450980392156863e-05, "loss": 0.4152, "step": 34584 }, { "epoch": 19.321229050279328, "grad_norm": 0.3505248725414276, "learning_rate": 3.4481792717086835e-05, "loss": 0.4031, "step": 34585 }, { "epoch": 19.321787709497208, "grad_norm": 0.3530297577381134, "learning_rate": 3.445378151260505e-05, "loss": 0.3574, "step": 34586 }, { "epoch": 19.322346368715085, "grad_norm": 0.36637377738952637, "learning_rate": 3.4425770308123254e-05, "loss": 0.4012, "step": 34587 }, { "epoch": 19.32290502793296, "grad_norm": 0.43705517053604126, "learning_rate": 3.439775910364146e-05, "loss": 0.3851, "step": 34588 }, { "epoch": 19.323463687150838, "grad_norm": 2.149904727935791, "learning_rate": 3.4369747899159666e-05, "loss": 0.4042, "step": 34589 }, { "epoch": 19.324022346368714, "grad_norm": 0.5753365159034729, "learning_rate": 3.434173669467787e-05, "loss": 0.3401, "step": 34590 }, { "epoch": 19.32458100558659, "grad_norm": 0.3856475055217743, "learning_rate": 3.4313725490196084e-05, "loss": 0.3026, "step": 34591 }, { "epoch": 19.325139664804468, "grad_norm": 0.3130301237106323, "learning_rate": 3.428571428571429e-05, "loss": 0.3567, "step": 34592 }, { "epoch": 19.325698324022348, "grad_norm": 0.3634701073169708, "learning_rate": 3.4257703081232496e-05, "loss": 0.3695, "step": 34593 }, { "epoch": 19.326256983240224, "grad_norm": 0.4071289300918579, "learning_rate": 3.42296918767507e-05, "loss": 0.4572, "step": 34594 }, { "epoch": 19.3268156424581, "grad_norm": 0.4620494246482849, "learning_rate": 3.420168067226891e-05, "loss": 0.4014, "step": 34595 }, { "epoch": 19.327374301675977, "grad_norm": 0.4361553192138672, "learning_rate": 3.4173669467787114e-05, "loss": 0.35, "step": 34596 }, { "epoch": 19.327932960893854, "grad_norm": 0.40845033526420593, "learning_rate": 3.414565826330533e-05, "loss": 0.4101, "step": 34597 }, { "epoch": 19.32849162011173, "grad_norm": 7.659552574157715, "learning_rate": 3.411764705882353e-05, "loss": 0.4547, "step": 34598 }, { "epoch": 19.32905027932961, "grad_norm": 0.5099524259567261, "learning_rate": 3.408963585434174e-05, "loss": 0.4258, "step": 34599 }, { "epoch": 19.329608938547487, "grad_norm": 0.40536609292030334, "learning_rate": 3.4061624649859945e-05, "loss": 0.3796, "step": 34600 }, { "epoch": 19.330167597765364, "grad_norm": 0.34829026460647583, "learning_rate": 3.403361344537815e-05, "loss": 0.3913, "step": 34601 }, { "epoch": 19.33072625698324, "grad_norm": 0.7247732877731323, "learning_rate": 3.4005602240896364e-05, "loss": 0.4874, "step": 34602 }, { "epoch": 19.331284916201117, "grad_norm": 1.0088967084884644, "learning_rate": 3.397759103641457e-05, "loss": 0.3509, "step": 34603 }, { "epoch": 19.331843575418993, "grad_norm": 0.8679409027099609, "learning_rate": 3.3949579831932776e-05, "loss": 0.4532, "step": 34604 }, { "epoch": 19.33240223463687, "grad_norm": 0.49338603019714355, "learning_rate": 3.392156862745098e-05, "loss": 0.3935, "step": 34605 }, { "epoch": 19.33296089385475, "grad_norm": 0.5714191794395447, "learning_rate": 3.389355742296919e-05, "loss": 0.43, "step": 34606 }, { "epoch": 19.333519553072627, "grad_norm": 0.4332340955734253, "learning_rate": 3.38655462184874e-05, "loss": 0.3866, "step": 34607 }, { "epoch": 19.334078212290503, "grad_norm": 0.5391001105308533, "learning_rate": 3.383753501400561e-05, "loss": 0.5297, "step": 34608 }, { "epoch": 19.33463687150838, "grad_norm": 0.37089234590530396, "learning_rate": 3.380952380952381e-05, "loss": 0.4008, "step": 34609 }, { "epoch": 19.335195530726256, "grad_norm": 2.5591518878936768, "learning_rate": 3.378151260504202e-05, "loss": 0.5271, "step": 34610 }, { "epoch": 19.335754189944133, "grad_norm": 0.5355129241943359, "learning_rate": 3.3753501400560225e-05, "loss": 0.3615, "step": 34611 }, { "epoch": 19.33631284916201, "grad_norm": 0.5871062874794006, "learning_rate": 3.372549019607843e-05, "loss": 0.3361, "step": 34612 }, { "epoch": 19.33687150837989, "grad_norm": 0.5193787813186646, "learning_rate": 3.3697478991596643e-05, "loss": 0.4015, "step": 34613 }, { "epoch": 19.337430167597766, "grad_norm": 0.451000452041626, "learning_rate": 3.366946778711485e-05, "loss": 0.5063, "step": 34614 }, { "epoch": 19.337988826815643, "grad_norm": 1.3041807413101196, "learning_rate": 3.3641456582633055e-05, "loss": 0.3354, "step": 34615 }, { "epoch": 19.33854748603352, "grad_norm": 0.5264590382575989, "learning_rate": 3.361344537815126e-05, "loss": 0.402, "step": 34616 }, { "epoch": 19.339106145251396, "grad_norm": 3.2200613021850586, "learning_rate": 3.358543417366947e-05, "loss": 0.4691, "step": 34617 }, { "epoch": 19.339664804469272, "grad_norm": 0.6092663407325745, "learning_rate": 3.355742296918768e-05, "loss": 0.4262, "step": 34618 }, { "epoch": 19.340223463687153, "grad_norm": 0.9767898321151733, "learning_rate": 3.3529411764705886e-05, "loss": 0.3824, "step": 34619 }, { "epoch": 19.34078212290503, "grad_norm": 0.3635249733924866, "learning_rate": 3.350140056022409e-05, "loss": 0.3741, "step": 34620 }, { "epoch": 19.341340782122906, "grad_norm": 0.5556697249412537, "learning_rate": 3.34733893557423e-05, "loss": 0.4421, "step": 34621 }, { "epoch": 19.341899441340782, "grad_norm": 0.38647469878196716, "learning_rate": 3.3445378151260504e-05, "loss": 0.4257, "step": 34622 }, { "epoch": 19.34245810055866, "grad_norm": 0.9777635931968689, "learning_rate": 3.341736694677872e-05, "loss": 0.4714, "step": 34623 }, { "epoch": 19.343016759776535, "grad_norm": 0.5502926707267761, "learning_rate": 3.338935574229692e-05, "loss": 0.3947, "step": 34624 }, { "epoch": 19.343575418994412, "grad_norm": 0.49105897545814514, "learning_rate": 3.336134453781513e-05, "loss": 0.3577, "step": 34625 }, { "epoch": 19.344134078212292, "grad_norm": 0.5177717804908752, "learning_rate": 3.3333333333333335e-05, "loss": 0.5035, "step": 34626 }, { "epoch": 19.34469273743017, "grad_norm": 1.2236799001693726, "learning_rate": 3.330532212885154e-05, "loss": 0.3496, "step": 34627 }, { "epoch": 19.345251396648045, "grad_norm": 0.35906124114990234, "learning_rate": 3.327731092436975e-05, "loss": 0.3504, "step": 34628 }, { "epoch": 19.345810055865922, "grad_norm": 0.3925894796848297, "learning_rate": 3.324929971988796e-05, "loss": 0.4446, "step": 34629 }, { "epoch": 19.3463687150838, "grad_norm": 0.4980289936065674, "learning_rate": 3.3221288515406166e-05, "loss": 0.5324, "step": 34630 }, { "epoch": 19.346927374301675, "grad_norm": 0.40415525436401367, "learning_rate": 3.319327731092437e-05, "loss": 0.3328, "step": 34631 }, { "epoch": 19.34748603351955, "grad_norm": 0.37800678610801697, "learning_rate": 3.316526610644258e-05, "loss": 0.4266, "step": 34632 }, { "epoch": 19.34804469273743, "grad_norm": 0.33418819308280945, "learning_rate": 3.3137254901960784e-05, "loss": 0.3856, "step": 34633 }, { "epoch": 19.34860335195531, "grad_norm": 1.0299174785614014, "learning_rate": 3.3109243697478996e-05, "loss": 0.5321, "step": 34634 }, { "epoch": 19.349162011173185, "grad_norm": 0.38019147515296936, "learning_rate": 3.30812324929972e-05, "loss": 0.4656, "step": 34635 }, { "epoch": 19.34972067039106, "grad_norm": 0.5020016431808472, "learning_rate": 3.305322128851541e-05, "loss": 0.3786, "step": 34636 }, { "epoch": 19.350279329608938, "grad_norm": 0.7795881032943726, "learning_rate": 3.3025210084033614e-05, "loss": 0.6756, "step": 34637 }, { "epoch": 19.350837988826814, "grad_norm": 1.6135830879211426, "learning_rate": 3.299719887955182e-05, "loss": 0.4821, "step": 34638 }, { "epoch": 19.35139664804469, "grad_norm": 0.3907015025615692, "learning_rate": 3.2969187675070026e-05, "loss": 0.3012, "step": 34639 }, { "epoch": 19.35195530726257, "grad_norm": 0.39014896750450134, "learning_rate": 3.294117647058824e-05, "loss": 0.3422, "step": 34640 }, { "epoch": 19.352513966480448, "grad_norm": 0.346729040145874, "learning_rate": 3.2913165266106445e-05, "loss": 0.4306, "step": 34641 }, { "epoch": 19.353072625698324, "grad_norm": 0.4760814309120178, "learning_rate": 3.288515406162465e-05, "loss": 0.4452, "step": 34642 }, { "epoch": 19.3536312849162, "grad_norm": 0.42498233914375305, "learning_rate": 3.285714285714286e-05, "loss": 0.4091, "step": 34643 }, { "epoch": 19.354189944134077, "grad_norm": 1.282052755355835, "learning_rate": 3.282913165266106e-05, "loss": 0.3175, "step": 34644 }, { "epoch": 19.354748603351954, "grad_norm": 0.5970929265022278, "learning_rate": 3.2801120448179276e-05, "loss": 0.5549, "step": 34645 }, { "epoch": 19.355307262569834, "grad_norm": 1.0843144655227661, "learning_rate": 3.277310924369748e-05, "loss": 0.3928, "step": 34646 }, { "epoch": 19.35586592178771, "grad_norm": 0.34287598729133606, "learning_rate": 3.274509803921569e-05, "loss": 0.3053, "step": 34647 }, { "epoch": 19.356424581005587, "grad_norm": 0.36522290110588074, "learning_rate": 3.2717086834733894e-05, "loss": 0.4558, "step": 34648 }, { "epoch": 19.356983240223464, "grad_norm": 0.3644511103630066, "learning_rate": 3.26890756302521e-05, "loss": 0.3109, "step": 34649 }, { "epoch": 19.35754189944134, "grad_norm": 0.4517008662223816, "learning_rate": 3.266106442577031e-05, "loss": 0.4343, "step": 34650 }, { "epoch": 19.358100558659217, "grad_norm": 0.5071067810058594, "learning_rate": 3.263305322128852e-05, "loss": 0.3534, "step": 34651 }, { "epoch": 19.358659217877094, "grad_norm": 0.4516407549381256, "learning_rate": 3.2605042016806725e-05, "loss": 0.2933, "step": 34652 }, { "epoch": 19.359217877094974, "grad_norm": 0.37370795011520386, "learning_rate": 3.257703081232493e-05, "loss": 0.3682, "step": 34653 }, { "epoch": 19.35977653631285, "grad_norm": 0.38454169034957886, "learning_rate": 3.254901960784314e-05, "loss": 0.3677, "step": 34654 }, { "epoch": 19.360335195530727, "grad_norm": 0.6930497884750366, "learning_rate": 3.252100840336134e-05, "loss": 0.4225, "step": 34655 }, { "epoch": 19.360893854748603, "grad_norm": 0.4099404513835907, "learning_rate": 3.2492997198879555e-05, "loss": 0.3765, "step": 34656 }, { "epoch": 19.36145251396648, "grad_norm": 0.4370623826980591, "learning_rate": 3.246498599439776e-05, "loss": 0.343, "step": 34657 }, { "epoch": 19.362011173184356, "grad_norm": 0.3761231601238251, "learning_rate": 3.243697478991597e-05, "loss": 0.4067, "step": 34658 }, { "epoch": 19.362569832402233, "grad_norm": 0.46025410294532776, "learning_rate": 3.2408963585434173e-05, "loss": 0.3499, "step": 34659 }, { "epoch": 19.363128491620113, "grad_norm": 1.623153805732727, "learning_rate": 3.238095238095238e-05, "loss": 0.4312, "step": 34660 }, { "epoch": 19.36368715083799, "grad_norm": 0.3973400592803955, "learning_rate": 3.235294117647059e-05, "loss": 0.3336, "step": 34661 }, { "epoch": 19.364245810055866, "grad_norm": 3.6344473361968994, "learning_rate": 3.23249299719888e-05, "loss": 0.4525, "step": 34662 }, { "epoch": 19.364804469273743, "grad_norm": 0.7064842581748962, "learning_rate": 3.2296918767507004e-05, "loss": 0.3495, "step": 34663 }, { "epoch": 19.36536312849162, "grad_norm": 0.36064934730529785, "learning_rate": 3.226890756302521e-05, "loss": 0.4462, "step": 34664 }, { "epoch": 19.365921787709496, "grad_norm": 0.8353272080421448, "learning_rate": 3.2240896358543416e-05, "loss": 0.471, "step": 34665 }, { "epoch": 19.366480446927373, "grad_norm": 0.3255285620689392, "learning_rate": 3.221288515406162e-05, "loss": 0.2968, "step": 34666 }, { "epoch": 19.367039106145253, "grad_norm": 0.9225115180015564, "learning_rate": 3.2184873949579835e-05, "loss": 0.3536, "step": 34667 }, { "epoch": 19.36759776536313, "grad_norm": 0.40227606892585754, "learning_rate": 3.215686274509804e-05, "loss": 0.3704, "step": 34668 }, { "epoch": 19.368156424581006, "grad_norm": 0.35369041562080383, "learning_rate": 3.212885154061625e-05, "loss": 0.3284, "step": 34669 }, { "epoch": 19.368715083798882, "grad_norm": 0.4591996371746063, "learning_rate": 3.210084033613445e-05, "loss": 0.4583, "step": 34670 }, { "epoch": 19.36927374301676, "grad_norm": 0.5736231207847595, "learning_rate": 3.207282913165266e-05, "loss": 0.5323, "step": 34671 }, { "epoch": 19.369832402234636, "grad_norm": 0.5638227462768555, "learning_rate": 3.204481792717087e-05, "loss": 0.4511, "step": 34672 }, { "epoch": 19.370391061452516, "grad_norm": 0.4991433620452881, "learning_rate": 3.201680672268908e-05, "loss": 0.5381, "step": 34673 }, { "epoch": 19.370949720670392, "grad_norm": 0.5432763695716858, "learning_rate": 3.1988795518207284e-05, "loss": 0.3771, "step": 34674 }, { "epoch": 19.37150837988827, "grad_norm": 0.42705973982810974, "learning_rate": 3.196078431372549e-05, "loss": 0.4681, "step": 34675 }, { "epoch": 19.372067039106145, "grad_norm": 0.3913818299770355, "learning_rate": 3.1932773109243696e-05, "loss": 0.3428, "step": 34676 }, { "epoch": 19.372625698324022, "grad_norm": 0.385786771774292, "learning_rate": 3.190476190476191e-05, "loss": 0.3126, "step": 34677 }, { "epoch": 19.3731843575419, "grad_norm": 0.4680740535259247, "learning_rate": 3.1876750700280114e-05, "loss": 0.5126, "step": 34678 }, { "epoch": 19.373743016759775, "grad_norm": 0.6564361453056335, "learning_rate": 3.184873949579832e-05, "loss": 0.4606, "step": 34679 }, { "epoch": 19.374301675977655, "grad_norm": 5.351374626159668, "learning_rate": 3.1820728291316526e-05, "loss": 0.3892, "step": 34680 }, { "epoch": 19.37486033519553, "grad_norm": 0.3373904824256897, "learning_rate": 3.179271708683473e-05, "loss": 0.4192, "step": 34681 }, { "epoch": 19.37541899441341, "grad_norm": 2.507180690765381, "learning_rate": 3.176470588235294e-05, "loss": 0.4172, "step": 34682 }, { "epoch": 19.375977653631285, "grad_norm": 0.370297908782959, "learning_rate": 3.173669467787115e-05, "loss": 0.3853, "step": 34683 }, { "epoch": 19.37653631284916, "grad_norm": 0.47303617000579834, "learning_rate": 3.170868347338936e-05, "loss": 0.4624, "step": 34684 }, { "epoch": 19.377094972067038, "grad_norm": 0.36894431710243225, "learning_rate": 3.168067226890756e-05, "loss": 0.361, "step": 34685 }, { "epoch": 19.377653631284915, "grad_norm": 0.9282207489013672, "learning_rate": 3.165266106442577e-05, "loss": 0.3989, "step": 34686 }, { "epoch": 19.378212290502795, "grad_norm": 0.4179244935512543, "learning_rate": 3.1624649859943975e-05, "loss": 0.4752, "step": 34687 }, { "epoch": 19.37877094972067, "grad_norm": 0.9252038598060608, "learning_rate": 3.159663865546219e-05, "loss": 0.4406, "step": 34688 }, { "epoch": 19.379329608938548, "grad_norm": 0.5302088856697083, "learning_rate": 3.1568627450980394e-05, "loss": 0.5164, "step": 34689 }, { "epoch": 19.379888268156424, "grad_norm": 1.5509836673736572, "learning_rate": 3.15406162464986e-05, "loss": 0.3704, "step": 34690 }, { "epoch": 19.3804469273743, "grad_norm": 0.4000025987625122, "learning_rate": 3.1512605042016806e-05, "loss": 0.3496, "step": 34691 }, { "epoch": 19.381005586592178, "grad_norm": 0.5078865885734558, "learning_rate": 3.148459383753501e-05, "loss": 0.3737, "step": 34692 }, { "epoch": 19.381564245810054, "grad_norm": 0.7631915807723999, "learning_rate": 3.1456582633053225e-05, "loss": 0.3133, "step": 34693 }, { "epoch": 19.382122905027934, "grad_norm": 0.3892311453819275, "learning_rate": 3.142857142857143e-05, "loss": 0.4643, "step": 34694 }, { "epoch": 19.38268156424581, "grad_norm": 1.2695770263671875, "learning_rate": 3.140056022408964e-05, "loss": 0.4245, "step": 34695 }, { "epoch": 19.383240223463687, "grad_norm": 0.5960999727249146, "learning_rate": 3.137254901960784e-05, "loss": 0.4573, "step": 34696 }, { "epoch": 19.383798882681564, "grad_norm": 0.5446678996086121, "learning_rate": 3.134453781512605e-05, "loss": 0.3849, "step": 34697 }, { "epoch": 19.38435754189944, "grad_norm": 0.7207553386688232, "learning_rate": 3.1316526610644255e-05, "loss": 0.4716, "step": 34698 }, { "epoch": 19.384916201117317, "grad_norm": 0.44883689284324646, "learning_rate": 3.128851540616247e-05, "loss": 0.5281, "step": 34699 }, { "epoch": 19.385474860335197, "grad_norm": 0.5003238916397095, "learning_rate": 3.1260504201680673e-05, "loss": 0.4242, "step": 34700 }, { "epoch": 19.386033519553074, "grad_norm": 0.3824901878833771, "learning_rate": 3.123249299719888e-05, "loss": 0.3272, "step": 34701 }, { "epoch": 19.38659217877095, "grad_norm": 0.37728121876716614, "learning_rate": 3.1204481792717085e-05, "loss": 0.3884, "step": 34702 }, { "epoch": 19.387150837988827, "grad_norm": 0.5183541178703308, "learning_rate": 3.11764705882353e-05, "loss": 0.4014, "step": 34703 }, { "epoch": 19.387709497206703, "grad_norm": 0.3923890292644501, "learning_rate": 3.1148459383753504e-05, "loss": 0.3382, "step": 34704 }, { "epoch": 19.38826815642458, "grad_norm": 0.5274564623832703, "learning_rate": 3.112044817927171e-05, "loss": 0.2708, "step": 34705 }, { "epoch": 19.388826815642457, "grad_norm": 0.4084869623184204, "learning_rate": 3.1092436974789916e-05, "loss": 0.4536, "step": 34706 }, { "epoch": 19.389385474860337, "grad_norm": 2.950998067855835, "learning_rate": 3.106442577030812e-05, "loss": 0.5644, "step": 34707 }, { "epoch": 19.389944134078213, "grad_norm": 0.45802491903305054, "learning_rate": 3.1036414565826335e-05, "loss": 0.3959, "step": 34708 }, { "epoch": 19.39050279329609, "grad_norm": 0.6680406928062439, "learning_rate": 3.100840336134454e-05, "loss": 0.3962, "step": 34709 }, { "epoch": 19.391061452513966, "grad_norm": 0.6626935601234436, "learning_rate": 3.098039215686275e-05, "loss": 0.3003, "step": 34710 }, { "epoch": 19.391620111731843, "grad_norm": 0.4294644892215729, "learning_rate": 3.095238095238095e-05, "loss": 0.3527, "step": 34711 }, { "epoch": 19.39217877094972, "grad_norm": 3.947005033493042, "learning_rate": 3.092436974789916e-05, "loss": 0.4912, "step": 34712 }, { "epoch": 19.392737430167596, "grad_norm": 0.5176776051521301, "learning_rate": 3.0896358543417365e-05, "loss": 0.3834, "step": 34713 }, { "epoch": 19.393296089385476, "grad_norm": 0.5808885097503662, "learning_rate": 3.086834733893558e-05, "loss": 0.3733, "step": 34714 }, { "epoch": 19.393854748603353, "grad_norm": 0.49917304515838623, "learning_rate": 3.0840336134453784e-05, "loss": 0.3681, "step": 34715 }, { "epoch": 19.39441340782123, "grad_norm": 0.7487123608589172, "learning_rate": 3.081232492997199e-05, "loss": 0.3508, "step": 34716 }, { "epoch": 19.394972067039106, "grad_norm": 0.44437068700790405, "learning_rate": 3.0784313725490196e-05, "loss": 0.3922, "step": 34717 }, { "epoch": 19.395530726256982, "grad_norm": 0.44631487131118774, "learning_rate": 3.07563025210084e-05, "loss": 0.4115, "step": 34718 }, { "epoch": 19.39608938547486, "grad_norm": 0.6000242233276367, "learning_rate": 3.0728291316526614e-05, "loss": 0.3919, "step": 34719 }, { "epoch": 19.39664804469274, "grad_norm": 0.8256241679191589, "learning_rate": 3.070028011204482e-05, "loss": 0.4147, "step": 34720 }, { "epoch": 19.397206703910616, "grad_norm": 0.6992743015289307, "learning_rate": 3.0672268907563026e-05, "loss": 0.4548, "step": 34721 }, { "epoch": 19.397765363128492, "grad_norm": 0.5466495752334595, "learning_rate": 3.064425770308123e-05, "loss": 0.3992, "step": 34722 }, { "epoch": 19.39832402234637, "grad_norm": 0.392668217420578, "learning_rate": 3.061624649859944e-05, "loss": 0.4177, "step": 34723 }, { "epoch": 19.398882681564245, "grad_norm": 0.7657110095024109, "learning_rate": 3.058823529411765e-05, "loss": 0.319, "step": 34724 }, { "epoch": 19.399441340782122, "grad_norm": 0.43436890840530396, "learning_rate": 3.056022408963586e-05, "loss": 0.382, "step": 34725 }, { "epoch": 19.4, "grad_norm": 0.47670605778694153, "learning_rate": 3.053221288515406e-05, "loss": 0.3444, "step": 34726 }, { "epoch": 19.40055865921788, "grad_norm": 0.4780365526676178, "learning_rate": 3.050420168067227e-05, "loss": 0.3661, "step": 34727 }, { "epoch": 19.401117318435755, "grad_norm": 0.35390588641166687, "learning_rate": 3.0476190476190475e-05, "loss": 0.3873, "step": 34728 }, { "epoch": 19.401675977653632, "grad_norm": 0.35138699412345886, "learning_rate": 3.044817927170868e-05, "loss": 0.3392, "step": 34729 }, { "epoch": 19.40223463687151, "grad_norm": 1.0493677854537964, "learning_rate": 3.0420168067226894e-05, "loss": 0.6186, "step": 34730 }, { "epoch": 19.402793296089385, "grad_norm": 0.4658297002315521, "learning_rate": 3.03921568627451e-05, "loss": 0.3375, "step": 34731 }, { "epoch": 19.40335195530726, "grad_norm": 1.4820036888122559, "learning_rate": 3.0364145658263306e-05, "loss": 0.4263, "step": 34732 }, { "epoch": 19.403910614525138, "grad_norm": 3.14245867729187, "learning_rate": 3.0336134453781515e-05, "loss": 0.4717, "step": 34733 }, { "epoch": 19.404469273743018, "grad_norm": 0.322102814912796, "learning_rate": 3.030812324929972e-05, "loss": 0.3872, "step": 34734 }, { "epoch": 19.405027932960895, "grad_norm": 0.3833746910095215, "learning_rate": 3.028011204481793e-05, "loss": 0.3989, "step": 34735 }, { "epoch": 19.40558659217877, "grad_norm": 2.2707359790802, "learning_rate": 3.0252100840336137e-05, "loss": 0.3755, "step": 34736 }, { "epoch": 19.406145251396648, "grad_norm": 0.4668048322200775, "learning_rate": 3.0224089635854343e-05, "loss": 0.4137, "step": 34737 }, { "epoch": 19.406703910614524, "grad_norm": 0.4917636811733246, "learning_rate": 3.0196078431372552e-05, "loss": 0.3698, "step": 34738 }, { "epoch": 19.4072625698324, "grad_norm": 0.49985042214393616, "learning_rate": 3.0168067226890758e-05, "loss": 0.3804, "step": 34739 }, { "epoch": 19.407821229050278, "grad_norm": 0.43190765380859375, "learning_rate": 3.0140056022408964e-05, "loss": 0.4342, "step": 34740 }, { "epoch": 19.408379888268158, "grad_norm": 0.5139693021774292, "learning_rate": 3.0112044817927173e-05, "loss": 0.4178, "step": 34741 }, { "epoch": 19.408938547486034, "grad_norm": 0.4200821816921234, "learning_rate": 3.008403361344538e-05, "loss": 0.3118, "step": 34742 }, { "epoch": 19.40949720670391, "grad_norm": 0.40365833044052124, "learning_rate": 3.005602240896359e-05, "loss": 0.4482, "step": 34743 }, { "epoch": 19.410055865921787, "grad_norm": 0.5570021867752075, "learning_rate": 3.0028011204481795e-05, "loss": 0.434, "step": 34744 }, { "epoch": 19.410614525139664, "grad_norm": 0.5476948022842407, "learning_rate": 3e-05, "loss": 0.3609, "step": 34745 }, { "epoch": 19.41117318435754, "grad_norm": 0.5441157221794128, "learning_rate": 2.997198879551821e-05, "loss": 0.3706, "step": 34746 }, { "epoch": 19.41173184357542, "grad_norm": 0.31823068857192993, "learning_rate": 2.9943977591036416e-05, "loss": 0.283, "step": 34747 }, { "epoch": 19.412290502793297, "grad_norm": 0.36117902398109436, "learning_rate": 2.9915966386554622e-05, "loss": 0.2859, "step": 34748 }, { "epoch": 19.412849162011174, "grad_norm": 0.4033918082714081, "learning_rate": 2.988795518207283e-05, "loss": 0.343, "step": 34749 }, { "epoch": 19.41340782122905, "grad_norm": 0.4970065653324127, "learning_rate": 2.9859943977591038e-05, "loss": 0.3617, "step": 34750 }, { "epoch": 19.413966480446927, "grad_norm": 1.593176007270813, "learning_rate": 2.9831932773109247e-05, "loss": 0.4703, "step": 34751 }, { "epoch": 19.414525139664804, "grad_norm": 0.5126480460166931, "learning_rate": 2.9803921568627453e-05, "loss": 0.5085, "step": 34752 }, { "epoch": 19.41508379888268, "grad_norm": 0.30920934677124023, "learning_rate": 2.977591036414566e-05, "loss": 0.2957, "step": 34753 }, { "epoch": 19.41564245810056, "grad_norm": 0.49395856261253357, "learning_rate": 2.9747899159663868e-05, "loss": 0.4249, "step": 34754 }, { "epoch": 19.416201117318437, "grad_norm": 1.4000450372695923, "learning_rate": 2.9719887955182074e-05, "loss": 0.4614, "step": 34755 }, { "epoch": 19.416759776536313, "grad_norm": 0.4395942986011505, "learning_rate": 2.969187675070028e-05, "loss": 0.4361, "step": 34756 }, { "epoch": 19.41731843575419, "grad_norm": 0.4884836971759796, "learning_rate": 2.966386554621849e-05, "loss": 0.3278, "step": 34757 }, { "epoch": 19.417877094972066, "grad_norm": 0.3011881411075592, "learning_rate": 2.9635854341736696e-05, "loss": 0.3595, "step": 34758 }, { "epoch": 19.418435754189943, "grad_norm": 0.4406471252441406, "learning_rate": 2.9607843137254905e-05, "loss": 0.4385, "step": 34759 }, { "epoch": 19.41899441340782, "grad_norm": 0.625727117061615, "learning_rate": 2.957983193277311e-05, "loss": 0.4696, "step": 34760 }, { "epoch": 19.4195530726257, "grad_norm": 0.5656685829162598, "learning_rate": 2.9551820728291317e-05, "loss": 0.4403, "step": 34761 }, { "epoch": 19.420111731843576, "grad_norm": 0.40907514095306396, "learning_rate": 2.9523809523809526e-05, "loss": 0.3767, "step": 34762 }, { "epoch": 19.420670391061453, "grad_norm": 0.4315016269683838, "learning_rate": 2.9495798319327732e-05, "loss": 0.3623, "step": 34763 }, { "epoch": 19.42122905027933, "grad_norm": 0.5315988659858704, "learning_rate": 2.946778711484594e-05, "loss": 0.3874, "step": 34764 }, { "epoch": 19.421787709497206, "grad_norm": 0.41649487614631653, "learning_rate": 2.9439775910364148e-05, "loss": 0.3566, "step": 34765 }, { "epoch": 19.422346368715083, "grad_norm": 0.45690855383872986, "learning_rate": 2.9411764705882354e-05, "loss": 0.4189, "step": 34766 }, { "epoch": 19.422905027932963, "grad_norm": 0.5419706702232361, "learning_rate": 2.938375350140056e-05, "loss": 0.392, "step": 34767 }, { "epoch": 19.42346368715084, "grad_norm": 0.39451321959495544, "learning_rate": 2.935574229691877e-05, "loss": 0.3383, "step": 34768 }, { "epoch": 19.424022346368716, "grad_norm": 0.34369826316833496, "learning_rate": 2.9327731092436975e-05, "loss": 0.3421, "step": 34769 }, { "epoch": 19.424581005586592, "grad_norm": 1.778489112854004, "learning_rate": 2.9299719887955185e-05, "loss": 0.4537, "step": 34770 }, { "epoch": 19.42513966480447, "grad_norm": 0.48961853981018066, "learning_rate": 2.927170868347339e-05, "loss": 0.4071, "step": 34771 }, { "epoch": 19.425698324022346, "grad_norm": 0.40935471653938293, "learning_rate": 2.9243697478991596e-05, "loss": 0.3474, "step": 34772 }, { "epoch": 19.426256983240222, "grad_norm": 0.37686896324157715, "learning_rate": 2.9215686274509806e-05, "loss": 0.3683, "step": 34773 }, { "epoch": 19.426815642458102, "grad_norm": 0.47350791096687317, "learning_rate": 2.9187675070028012e-05, "loss": 0.4736, "step": 34774 }, { "epoch": 19.42737430167598, "grad_norm": 0.4565175771713257, "learning_rate": 2.9159663865546218e-05, "loss": 0.447, "step": 34775 }, { "epoch": 19.427932960893855, "grad_norm": 0.590487003326416, "learning_rate": 2.9131652661064427e-05, "loss": 0.4295, "step": 34776 }, { "epoch": 19.428491620111732, "grad_norm": 0.6740888357162476, "learning_rate": 2.9103641456582633e-05, "loss": 0.5175, "step": 34777 }, { "epoch": 19.42905027932961, "grad_norm": 0.6588280200958252, "learning_rate": 2.9075630252100843e-05, "loss": 0.4447, "step": 34778 }, { "epoch": 19.429608938547485, "grad_norm": 13.812747955322266, "learning_rate": 2.904761904761905e-05, "loss": 0.4068, "step": 34779 }, { "epoch": 19.43016759776536, "grad_norm": 0.5586323738098145, "learning_rate": 2.9019607843137255e-05, "loss": 0.3489, "step": 34780 }, { "epoch": 19.43072625698324, "grad_norm": 0.5538607239723206, "learning_rate": 2.8991596638655464e-05, "loss": 0.4431, "step": 34781 }, { "epoch": 19.43128491620112, "grad_norm": 0.7430354356765747, "learning_rate": 2.896358543417367e-05, "loss": 0.3899, "step": 34782 }, { "epoch": 19.431843575418995, "grad_norm": 0.8361756801605225, "learning_rate": 2.8935574229691876e-05, "loss": 0.4509, "step": 34783 }, { "epoch": 19.43240223463687, "grad_norm": 0.5199877023696899, "learning_rate": 2.8907563025210085e-05, "loss": 0.4836, "step": 34784 }, { "epoch": 19.432960893854748, "grad_norm": 0.3791772425174713, "learning_rate": 2.887955182072829e-05, "loss": 0.2965, "step": 34785 }, { "epoch": 19.433519553072625, "grad_norm": 0.4693761467933655, "learning_rate": 2.88515406162465e-05, "loss": 0.435, "step": 34786 }, { "epoch": 19.4340782122905, "grad_norm": 0.5811636447906494, "learning_rate": 2.8823529411764707e-05, "loss": 0.4304, "step": 34787 }, { "epoch": 19.43463687150838, "grad_norm": 0.5526044964790344, "learning_rate": 2.8795518207282913e-05, "loss": 0.487, "step": 34788 }, { "epoch": 19.435195530726258, "grad_norm": 0.2887870669364929, "learning_rate": 2.8767507002801122e-05, "loss": 0.2608, "step": 34789 }, { "epoch": 19.435754189944134, "grad_norm": 0.39098960161209106, "learning_rate": 2.8739495798319328e-05, "loss": 0.3385, "step": 34790 }, { "epoch": 19.43631284916201, "grad_norm": 0.4343716502189636, "learning_rate": 2.8711484593837534e-05, "loss": 0.4337, "step": 34791 }, { "epoch": 19.436871508379888, "grad_norm": 0.34231317043304443, "learning_rate": 2.8683473389355743e-05, "loss": 0.3784, "step": 34792 }, { "epoch": 19.437430167597764, "grad_norm": 0.4189598858356476, "learning_rate": 2.865546218487395e-05, "loss": 0.4661, "step": 34793 }, { "epoch": 19.43798882681564, "grad_norm": 0.35148534178733826, "learning_rate": 2.862745098039216e-05, "loss": 0.3215, "step": 34794 }, { "epoch": 19.43854748603352, "grad_norm": 0.3340983986854553, "learning_rate": 2.8599439775910365e-05, "loss": 0.3384, "step": 34795 }, { "epoch": 19.439106145251397, "grad_norm": 0.5679505467414856, "learning_rate": 2.857142857142857e-05, "loss": 0.3756, "step": 34796 }, { "epoch": 19.439664804469274, "grad_norm": 0.6255388259887695, "learning_rate": 2.854341736694678e-05, "loss": 0.3552, "step": 34797 }, { "epoch": 19.44022346368715, "grad_norm": 0.34175461530685425, "learning_rate": 2.8515406162464986e-05, "loss": 0.3551, "step": 34798 }, { "epoch": 19.440782122905027, "grad_norm": 0.40118804574012756, "learning_rate": 2.8487394957983192e-05, "loss": 0.3892, "step": 34799 }, { "epoch": 19.441340782122904, "grad_norm": 0.46519577503204346, "learning_rate": 2.84593837535014e-05, "loss": 0.423, "step": 34800 }, { "epoch": 19.441899441340784, "grad_norm": 0.3849142789840698, "learning_rate": 2.8431372549019608e-05, "loss": 0.4067, "step": 34801 }, { "epoch": 19.44245810055866, "grad_norm": 0.36764630675315857, "learning_rate": 2.8403361344537814e-05, "loss": 0.3341, "step": 34802 }, { "epoch": 19.443016759776537, "grad_norm": 0.3311637043952942, "learning_rate": 2.8375350140056023e-05, "loss": 0.3163, "step": 34803 }, { "epoch": 19.443575418994413, "grad_norm": 0.4395899176597595, "learning_rate": 2.834733893557423e-05, "loss": 0.4324, "step": 34804 }, { "epoch": 19.44413407821229, "grad_norm": 0.417794793844223, "learning_rate": 2.831932773109244e-05, "loss": 0.3394, "step": 34805 }, { "epoch": 19.444692737430167, "grad_norm": 0.5990760326385498, "learning_rate": 2.8291316526610644e-05, "loss": 0.3996, "step": 34806 }, { "epoch": 19.445251396648043, "grad_norm": 0.4159018099308014, "learning_rate": 2.826330532212885e-05, "loss": 0.4901, "step": 34807 }, { "epoch": 19.445810055865923, "grad_norm": 0.6076653599739075, "learning_rate": 2.823529411764706e-05, "loss": 0.4628, "step": 34808 }, { "epoch": 19.4463687150838, "grad_norm": 2.1380856037139893, "learning_rate": 2.8207282913165266e-05, "loss": 0.3825, "step": 34809 }, { "epoch": 19.446927374301676, "grad_norm": 1.1035913228988647, "learning_rate": 2.8179271708683472e-05, "loss": 0.4055, "step": 34810 }, { "epoch": 19.447486033519553, "grad_norm": 0.5296691060066223, "learning_rate": 2.815126050420168e-05, "loss": 0.5932, "step": 34811 }, { "epoch": 19.44804469273743, "grad_norm": 0.35844045877456665, "learning_rate": 2.8123249299719887e-05, "loss": 0.3516, "step": 34812 }, { "epoch": 19.448603351955306, "grad_norm": 0.8088261485099792, "learning_rate": 2.8095238095238096e-05, "loss": 0.5878, "step": 34813 }, { "epoch": 19.449162011173183, "grad_norm": 0.5955832004547119, "learning_rate": 2.8067226890756302e-05, "loss": 0.4585, "step": 34814 }, { "epoch": 19.449720670391063, "grad_norm": 0.47626441717147827, "learning_rate": 2.803921568627451e-05, "loss": 0.378, "step": 34815 }, { "epoch": 19.45027932960894, "grad_norm": 0.8115229606628418, "learning_rate": 2.8011204481792718e-05, "loss": 0.4401, "step": 34816 }, { "epoch": 19.450837988826816, "grad_norm": 0.4454791843891144, "learning_rate": 2.7983193277310924e-05, "loss": 0.3236, "step": 34817 }, { "epoch": 19.451396648044692, "grad_norm": 0.5104039311408997, "learning_rate": 2.795518207282913e-05, "loss": 0.4567, "step": 34818 }, { "epoch": 19.45195530726257, "grad_norm": 0.6141294240951538, "learning_rate": 2.792717086834734e-05, "loss": 0.5223, "step": 34819 }, { "epoch": 19.452513966480446, "grad_norm": 0.3874828517436981, "learning_rate": 2.7899159663865545e-05, "loss": 0.4499, "step": 34820 }, { "epoch": 19.453072625698326, "grad_norm": 3.971581220626831, "learning_rate": 2.7871148459383755e-05, "loss": 0.3135, "step": 34821 }, { "epoch": 19.453631284916202, "grad_norm": 0.400783509016037, "learning_rate": 2.784313725490196e-05, "loss": 0.3877, "step": 34822 }, { "epoch": 19.45418994413408, "grad_norm": 1.1234560012817383, "learning_rate": 2.7815126050420167e-05, "loss": 0.3208, "step": 34823 }, { "epoch": 19.454748603351955, "grad_norm": 0.7502797842025757, "learning_rate": 2.7787114845938376e-05, "loss": 0.3751, "step": 34824 }, { "epoch": 19.455307262569832, "grad_norm": 0.6240797638893127, "learning_rate": 2.7759103641456582e-05, "loss": 0.5704, "step": 34825 }, { "epoch": 19.45586592178771, "grad_norm": 0.4749441146850586, "learning_rate": 2.7731092436974788e-05, "loss": 0.4114, "step": 34826 }, { "epoch": 19.456424581005585, "grad_norm": 0.7239744067192078, "learning_rate": 2.7703081232492997e-05, "loss": 0.4083, "step": 34827 }, { "epoch": 19.456983240223465, "grad_norm": 0.7120631337165833, "learning_rate": 2.7675070028011203e-05, "loss": 0.4615, "step": 34828 }, { "epoch": 19.457541899441342, "grad_norm": 0.469102144241333, "learning_rate": 2.7647058823529413e-05, "loss": 0.3855, "step": 34829 }, { "epoch": 19.45810055865922, "grad_norm": 5.470970153808594, "learning_rate": 2.761904761904762e-05, "loss": 0.4748, "step": 34830 }, { "epoch": 19.458659217877095, "grad_norm": 0.3650527894496918, "learning_rate": 2.7591036414565825e-05, "loss": 0.399, "step": 34831 }, { "epoch": 19.45921787709497, "grad_norm": 0.6419007182121277, "learning_rate": 2.7563025210084034e-05, "loss": 0.4371, "step": 34832 }, { "epoch": 19.459776536312848, "grad_norm": 0.3608035147190094, "learning_rate": 2.753501400560224e-05, "loss": 0.4125, "step": 34833 }, { "epoch": 19.460335195530725, "grad_norm": 0.5687878131866455, "learning_rate": 2.7507002801120446e-05, "loss": 0.5592, "step": 34834 }, { "epoch": 19.460893854748605, "grad_norm": 0.6292167901992798, "learning_rate": 2.7478991596638655e-05, "loss": 0.3765, "step": 34835 }, { "epoch": 19.46145251396648, "grad_norm": 0.36823347210884094, "learning_rate": 2.745098039215686e-05, "loss": 0.3864, "step": 34836 }, { "epoch": 19.462011173184358, "grad_norm": 0.980316698551178, "learning_rate": 2.7422969187675067e-05, "loss": 0.4543, "step": 34837 }, { "epoch": 19.462569832402234, "grad_norm": 0.7499711513519287, "learning_rate": 2.7394957983193277e-05, "loss": 0.4236, "step": 34838 }, { "epoch": 19.46312849162011, "grad_norm": 0.5976037383079529, "learning_rate": 2.7366946778711483e-05, "loss": 0.413, "step": 34839 }, { "epoch": 19.463687150837988, "grad_norm": 0.361826628446579, "learning_rate": 2.7338935574229692e-05, "loss": 0.4383, "step": 34840 }, { "epoch": 19.464245810055864, "grad_norm": 0.5410655736923218, "learning_rate": 2.7310924369747898e-05, "loss": 0.3938, "step": 34841 }, { "epoch": 19.464804469273744, "grad_norm": 0.6119258403778076, "learning_rate": 2.7282913165266104e-05, "loss": 0.5936, "step": 34842 }, { "epoch": 19.46536312849162, "grad_norm": 2.1629810333251953, "learning_rate": 2.7254901960784317e-05, "loss": 0.4456, "step": 34843 }, { "epoch": 19.465921787709497, "grad_norm": 0.49945780634880066, "learning_rate": 2.7226890756302523e-05, "loss": 0.4008, "step": 34844 }, { "epoch": 19.466480446927374, "grad_norm": 0.36867979168891907, "learning_rate": 2.719887955182073e-05, "loss": 0.3071, "step": 34845 }, { "epoch": 19.46703910614525, "grad_norm": 2.1073596477508545, "learning_rate": 2.717086834733894e-05, "loss": 0.4274, "step": 34846 }, { "epoch": 19.467597765363127, "grad_norm": 0.49559345841407776, "learning_rate": 2.7142857142857144e-05, "loss": 0.4909, "step": 34847 }, { "epoch": 19.468156424581007, "grad_norm": 0.4453836977481842, "learning_rate": 2.7114845938375354e-05, "loss": 0.3993, "step": 34848 }, { "epoch": 19.468715083798884, "grad_norm": 0.5157610774040222, "learning_rate": 2.708683473389356e-05, "loss": 0.5475, "step": 34849 }, { "epoch": 19.46927374301676, "grad_norm": 0.5772706866264343, "learning_rate": 2.7058823529411766e-05, "loss": 0.3872, "step": 34850 }, { "epoch": 19.469832402234637, "grad_norm": 2.2015509605407715, "learning_rate": 2.7030812324929975e-05, "loss": 0.4799, "step": 34851 }, { "epoch": 19.470391061452514, "grad_norm": 1.1958978176116943, "learning_rate": 2.700280112044818e-05, "loss": 0.3247, "step": 34852 }, { "epoch": 19.47094972067039, "grad_norm": 0.4813414216041565, "learning_rate": 2.6974789915966387e-05, "loss": 0.4392, "step": 34853 }, { "epoch": 19.471508379888267, "grad_norm": 0.7284502387046814, "learning_rate": 2.6946778711484596e-05, "loss": 0.6559, "step": 34854 }, { "epoch": 19.472067039106147, "grad_norm": 4.674539089202881, "learning_rate": 2.6918767507002802e-05, "loss": 0.4551, "step": 34855 }, { "epoch": 19.472625698324023, "grad_norm": 0.390680193901062, "learning_rate": 2.6890756302521012e-05, "loss": 0.3195, "step": 34856 }, { "epoch": 19.4731843575419, "grad_norm": 1.7368971109390259, "learning_rate": 2.6862745098039218e-05, "loss": 0.485, "step": 34857 }, { "epoch": 19.473743016759776, "grad_norm": 0.6366631984710693, "learning_rate": 2.6834733893557424e-05, "loss": 0.4, "step": 34858 }, { "epoch": 19.474301675977653, "grad_norm": 0.4273139536380768, "learning_rate": 2.6806722689075633e-05, "loss": 0.446, "step": 34859 }, { "epoch": 19.47486033519553, "grad_norm": 0.4665451943874359, "learning_rate": 2.677871148459384e-05, "loss": 0.4566, "step": 34860 }, { "epoch": 19.475418994413406, "grad_norm": 0.869002640247345, "learning_rate": 2.6750700280112045e-05, "loss": 0.5105, "step": 34861 }, { "epoch": 19.475977653631286, "grad_norm": 0.43392908573150635, "learning_rate": 2.6722689075630255e-05, "loss": 0.3639, "step": 34862 }, { "epoch": 19.476536312849163, "grad_norm": 1.1479334831237793, "learning_rate": 2.669467787114846e-05, "loss": 0.4706, "step": 34863 }, { "epoch": 19.47709497206704, "grad_norm": 0.5081384778022766, "learning_rate": 2.666666666666667e-05, "loss": 0.3925, "step": 34864 }, { "epoch": 19.477653631284916, "grad_norm": 0.40185925364494324, "learning_rate": 2.6638655462184876e-05, "loss": 0.4877, "step": 34865 }, { "epoch": 19.478212290502793, "grad_norm": 0.3650851845741272, "learning_rate": 2.6610644257703082e-05, "loss": 0.3393, "step": 34866 }, { "epoch": 19.47877094972067, "grad_norm": 0.48613956570625305, "learning_rate": 2.658263305322129e-05, "loss": 0.4484, "step": 34867 }, { "epoch": 19.47932960893855, "grad_norm": 0.4635118544101715, "learning_rate": 2.6554621848739497e-05, "loss": 0.3699, "step": 34868 }, { "epoch": 19.479888268156426, "grad_norm": 0.4014201760292053, "learning_rate": 2.6526610644257703e-05, "loss": 0.249, "step": 34869 }, { "epoch": 19.480446927374302, "grad_norm": 1.2354720830917358, "learning_rate": 2.6498599439775913e-05, "loss": 0.5171, "step": 34870 }, { "epoch": 19.48100558659218, "grad_norm": 0.33539703488349915, "learning_rate": 2.647058823529412e-05, "loss": 0.3626, "step": 34871 }, { "epoch": 19.481564245810056, "grad_norm": 0.6215862035751343, "learning_rate": 2.6442577030812325e-05, "loss": 0.3721, "step": 34872 }, { "epoch": 19.482122905027932, "grad_norm": 0.38046738505363464, "learning_rate": 2.6414565826330534e-05, "loss": 0.4454, "step": 34873 }, { "epoch": 19.48268156424581, "grad_norm": 0.3673492670059204, "learning_rate": 2.638655462184874e-05, "loss": 0.4318, "step": 34874 }, { "epoch": 19.48324022346369, "grad_norm": 0.4376643896102905, "learning_rate": 2.635854341736695e-05, "loss": 0.4907, "step": 34875 }, { "epoch": 19.483798882681565, "grad_norm": 0.3730255961418152, "learning_rate": 2.6330532212885155e-05, "loss": 0.3338, "step": 34876 }, { "epoch": 19.484357541899442, "grad_norm": 0.5789554119110107, "learning_rate": 2.630252100840336e-05, "loss": 0.3071, "step": 34877 }, { "epoch": 19.48491620111732, "grad_norm": 0.4080667495727539, "learning_rate": 2.627450980392157e-05, "loss": 0.3741, "step": 34878 }, { "epoch": 19.485474860335195, "grad_norm": 0.3730865716934204, "learning_rate": 2.6246498599439777e-05, "loss": 0.3944, "step": 34879 }, { "epoch": 19.48603351955307, "grad_norm": 0.369336873292923, "learning_rate": 2.6218487394957983e-05, "loss": 0.4123, "step": 34880 }, { "epoch": 19.486592178770948, "grad_norm": 0.5278117656707764, "learning_rate": 2.6190476190476192e-05, "loss": 0.4091, "step": 34881 }, { "epoch": 19.48715083798883, "grad_norm": 1.122765064239502, "learning_rate": 2.6162464985994398e-05, "loss": 0.3808, "step": 34882 }, { "epoch": 19.487709497206705, "grad_norm": 0.7268039584159851, "learning_rate": 2.6134453781512608e-05, "loss": 0.3949, "step": 34883 }, { "epoch": 19.48826815642458, "grad_norm": 3.184790849685669, "learning_rate": 2.6106442577030814e-05, "loss": 0.3785, "step": 34884 }, { "epoch": 19.488826815642458, "grad_norm": 0.3668678104877472, "learning_rate": 2.607843137254902e-05, "loss": 0.3556, "step": 34885 }, { "epoch": 19.489385474860335, "grad_norm": 0.40232452750205994, "learning_rate": 2.605042016806723e-05, "loss": 0.4073, "step": 34886 }, { "epoch": 19.48994413407821, "grad_norm": 0.3640769422054291, "learning_rate": 2.6022408963585435e-05, "loss": 0.4083, "step": 34887 }, { "epoch": 19.490502793296088, "grad_norm": 0.42225614190101624, "learning_rate": 2.599439775910364e-05, "loss": 0.51, "step": 34888 }, { "epoch": 19.491061452513968, "grad_norm": 3.6921956539154053, "learning_rate": 2.596638655462185e-05, "loss": 0.5421, "step": 34889 }, { "epoch": 19.491620111731844, "grad_norm": 0.6919388771057129, "learning_rate": 2.5938375350140056e-05, "loss": 0.6297, "step": 34890 }, { "epoch": 19.49217877094972, "grad_norm": 0.6777781844139099, "learning_rate": 2.5910364145658266e-05, "loss": 0.392, "step": 34891 }, { "epoch": 19.492737430167598, "grad_norm": 0.39118492603302, "learning_rate": 2.5882352941176472e-05, "loss": 0.3772, "step": 34892 }, { "epoch": 19.493296089385474, "grad_norm": 0.3688738942146301, "learning_rate": 2.5854341736694678e-05, "loss": 0.3981, "step": 34893 }, { "epoch": 19.49385474860335, "grad_norm": 0.33815377950668335, "learning_rate": 2.5826330532212887e-05, "loss": 0.3424, "step": 34894 }, { "epoch": 19.49441340782123, "grad_norm": 0.34132981300354004, "learning_rate": 2.5798319327731093e-05, "loss": 0.3532, "step": 34895 }, { "epoch": 19.494972067039107, "grad_norm": 2.5869383811950684, "learning_rate": 2.57703081232493e-05, "loss": 0.3117, "step": 34896 }, { "epoch": 19.495530726256984, "grad_norm": 0.6172873973846436, "learning_rate": 2.574229691876751e-05, "loss": 0.3574, "step": 34897 }, { "epoch": 19.49608938547486, "grad_norm": 0.44866862893104553, "learning_rate": 2.5714285714285714e-05, "loss": 0.2921, "step": 34898 }, { "epoch": 19.496648044692737, "grad_norm": 0.8284403085708618, "learning_rate": 2.5686274509803924e-05, "loss": 0.4283, "step": 34899 }, { "epoch": 19.497206703910614, "grad_norm": 1.1001683473587036, "learning_rate": 2.565826330532213e-05, "loss": 0.4978, "step": 34900 }, { "epoch": 19.49776536312849, "grad_norm": 0.5014494061470032, "learning_rate": 2.5630252100840336e-05, "loss": 0.3411, "step": 34901 }, { "epoch": 19.49832402234637, "grad_norm": 0.5027364492416382, "learning_rate": 2.5602240896358545e-05, "loss": 0.4445, "step": 34902 }, { "epoch": 19.498882681564247, "grad_norm": 0.4541790783405304, "learning_rate": 2.557422969187675e-05, "loss": 0.4546, "step": 34903 }, { "epoch": 19.499441340782123, "grad_norm": 0.44293051958084106, "learning_rate": 2.5546218487394957e-05, "loss": 0.3856, "step": 34904 }, { "epoch": 19.5, "grad_norm": 0.42757755517959595, "learning_rate": 2.5518207282913167e-05, "loss": 0.3473, "step": 34905 }, { "epoch": 19.500558659217877, "grad_norm": 0.8330206274986267, "learning_rate": 2.5490196078431373e-05, "loss": 0.3887, "step": 34906 }, { "epoch": 19.501117318435753, "grad_norm": 0.40243637561798096, "learning_rate": 2.546218487394958e-05, "loss": 0.2796, "step": 34907 }, { "epoch": 19.50167597765363, "grad_norm": 0.39788687229156494, "learning_rate": 2.5434173669467788e-05, "loss": 0.388, "step": 34908 }, { "epoch": 19.50223463687151, "grad_norm": 1.57988440990448, "learning_rate": 2.5406162464985994e-05, "loss": 0.3657, "step": 34909 }, { "epoch": 19.502793296089386, "grad_norm": 0.3384858965873718, "learning_rate": 2.5378151260504203e-05, "loss": 0.3439, "step": 34910 }, { "epoch": 19.503351955307263, "grad_norm": 0.9958170652389526, "learning_rate": 2.535014005602241e-05, "loss": 0.3799, "step": 34911 }, { "epoch": 19.50391061452514, "grad_norm": 0.642697274684906, "learning_rate": 2.5322128851540615e-05, "loss": 0.4593, "step": 34912 }, { "epoch": 19.504469273743016, "grad_norm": 0.3647029995918274, "learning_rate": 2.5294117647058825e-05, "loss": 0.4773, "step": 34913 }, { "epoch": 19.505027932960893, "grad_norm": 0.6092699766159058, "learning_rate": 2.526610644257703e-05, "loss": 0.4089, "step": 34914 }, { "epoch": 19.505586592178773, "grad_norm": 1.4237099885940552, "learning_rate": 2.5238095238095237e-05, "loss": 0.4038, "step": 34915 }, { "epoch": 19.50614525139665, "grad_norm": 0.4050527811050415, "learning_rate": 2.5210084033613446e-05, "loss": 0.3612, "step": 34916 }, { "epoch": 19.506703910614526, "grad_norm": 0.3727430999279022, "learning_rate": 2.5182072829131652e-05, "loss": 0.3419, "step": 34917 }, { "epoch": 19.507262569832402, "grad_norm": 0.6199318766593933, "learning_rate": 2.515406162464986e-05, "loss": 0.3683, "step": 34918 }, { "epoch": 19.50782122905028, "grad_norm": 0.5812656283378601, "learning_rate": 2.5126050420168067e-05, "loss": 0.2762, "step": 34919 }, { "epoch": 19.508379888268156, "grad_norm": 2.1323699951171875, "learning_rate": 2.5098039215686273e-05, "loss": 0.4134, "step": 34920 }, { "epoch": 19.508938547486032, "grad_norm": 0.6260694861412048, "learning_rate": 2.5070028011204483e-05, "loss": 0.4046, "step": 34921 }, { "epoch": 19.509497206703912, "grad_norm": 0.36617955565452576, "learning_rate": 2.504201680672269e-05, "loss": 0.4208, "step": 34922 }, { "epoch": 19.51005586592179, "grad_norm": 0.37258216738700867, "learning_rate": 2.5014005602240895e-05, "loss": 0.3871, "step": 34923 }, { "epoch": 19.510614525139665, "grad_norm": 0.35397523641586304, "learning_rate": 2.4985994397759104e-05, "loss": 0.4331, "step": 34924 }, { "epoch": 19.511173184357542, "grad_norm": 2.2395682334899902, "learning_rate": 2.495798319327731e-05, "loss": 0.5199, "step": 34925 }, { "epoch": 19.51173184357542, "grad_norm": 0.42774754762649536, "learning_rate": 2.492997198879552e-05, "loss": 0.4441, "step": 34926 }, { "epoch": 19.512290502793295, "grad_norm": 0.5070825815200806, "learning_rate": 2.4901960784313726e-05, "loss": 0.4599, "step": 34927 }, { "epoch": 19.51284916201117, "grad_norm": 1.0811023712158203, "learning_rate": 2.487394957983193e-05, "loss": 0.408, "step": 34928 }, { "epoch": 19.513407821229052, "grad_norm": 0.42528045177459717, "learning_rate": 2.484593837535014e-05, "loss": 0.383, "step": 34929 }, { "epoch": 19.51396648044693, "grad_norm": 0.4363587200641632, "learning_rate": 2.4817927170868347e-05, "loss": 0.5726, "step": 34930 }, { "epoch": 19.514525139664805, "grad_norm": 0.42945781350135803, "learning_rate": 2.4789915966386553e-05, "loss": 0.4024, "step": 34931 }, { "epoch": 19.51508379888268, "grad_norm": 0.47450146079063416, "learning_rate": 2.4761904761904762e-05, "loss": 0.4144, "step": 34932 }, { "epoch": 19.515642458100558, "grad_norm": 0.4275537133216858, "learning_rate": 2.473389355742297e-05, "loss": 0.4054, "step": 34933 }, { "epoch": 19.516201117318435, "grad_norm": 0.3809202015399933, "learning_rate": 2.4705882352941174e-05, "loss": 0.4281, "step": 34934 }, { "epoch": 19.51675977653631, "grad_norm": 0.38599687814712524, "learning_rate": 2.4677871148459384e-05, "loss": 0.3593, "step": 34935 }, { "epoch": 19.51731843575419, "grad_norm": 0.38828933238983154, "learning_rate": 2.464985994397759e-05, "loss": 0.3805, "step": 34936 }, { "epoch": 19.517877094972068, "grad_norm": 2.617799997329712, "learning_rate": 2.46218487394958e-05, "loss": 0.3243, "step": 34937 }, { "epoch": 19.518435754189944, "grad_norm": 0.44306081533432007, "learning_rate": 2.4593837535014005e-05, "loss": 0.3522, "step": 34938 }, { "epoch": 19.51899441340782, "grad_norm": 0.41663122177124023, "learning_rate": 2.456582633053221e-05, "loss": 0.3711, "step": 34939 }, { "epoch": 19.519553072625698, "grad_norm": 0.40998974442481995, "learning_rate": 2.453781512605042e-05, "loss": 0.3916, "step": 34940 }, { "epoch": 19.520111731843574, "grad_norm": 2.1621267795562744, "learning_rate": 2.4509803921568626e-05, "loss": 0.4845, "step": 34941 }, { "epoch": 19.52067039106145, "grad_norm": 0.4144487977027893, "learning_rate": 2.4481792717086832e-05, "loss": 0.4182, "step": 34942 }, { "epoch": 19.52122905027933, "grad_norm": 0.8371133208274841, "learning_rate": 2.4453781512605042e-05, "loss": 0.4985, "step": 34943 }, { "epoch": 19.521787709497207, "grad_norm": 0.3932962119579315, "learning_rate": 2.4425770308123248e-05, "loss": 0.4071, "step": 34944 }, { "epoch": 19.522346368715084, "grad_norm": 0.3749922513961792, "learning_rate": 2.4397759103641457e-05, "loss": 0.4593, "step": 34945 }, { "epoch": 19.52290502793296, "grad_norm": 0.7596540451049805, "learning_rate": 2.4369747899159663e-05, "loss": 0.4766, "step": 34946 }, { "epoch": 19.523463687150837, "grad_norm": 0.38084205985069275, "learning_rate": 2.434173669467787e-05, "loss": 0.3785, "step": 34947 }, { "epoch": 19.524022346368714, "grad_norm": 0.34402021765708923, "learning_rate": 2.431372549019608e-05, "loss": 0.4099, "step": 34948 }, { "epoch": 19.524581005586594, "grad_norm": 2.6963329315185547, "learning_rate": 2.4285714285714285e-05, "loss": 0.3745, "step": 34949 }, { "epoch": 19.52513966480447, "grad_norm": 0.5655094981193542, "learning_rate": 2.425770308123249e-05, "loss": 0.4122, "step": 34950 }, { "epoch": 19.525698324022347, "grad_norm": 0.4889974892139435, "learning_rate": 2.42296918767507e-05, "loss": 0.3939, "step": 34951 }, { "epoch": 19.526256983240224, "grad_norm": 0.47463908791542053, "learning_rate": 2.4201680672268906e-05, "loss": 0.359, "step": 34952 }, { "epoch": 19.5268156424581, "grad_norm": 0.39065998792648315, "learning_rate": 2.4173669467787115e-05, "loss": 0.3533, "step": 34953 }, { "epoch": 19.527374301675977, "grad_norm": 0.6336252093315125, "learning_rate": 2.414565826330532e-05, "loss": 0.3926, "step": 34954 }, { "epoch": 19.527932960893853, "grad_norm": 0.6006150245666504, "learning_rate": 2.4117647058823527e-05, "loss": 0.5883, "step": 34955 }, { "epoch": 19.528491620111733, "grad_norm": 0.9705725312232971, "learning_rate": 2.4089635854341737e-05, "loss": 0.3489, "step": 34956 }, { "epoch": 19.52905027932961, "grad_norm": 0.42252224683761597, "learning_rate": 2.4061624649859943e-05, "loss": 0.4638, "step": 34957 }, { "epoch": 19.529608938547486, "grad_norm": 0.30472639203071594, "learning_rate": 2.403361344537815e-05, "loss": 0.2838, "step": 34958 }, { "epoch": 19.530167597765363, "grad_norm": 0.4616810381412506, "learning_rate": 2.400560224089636e-05, "loss": 0.3634, "step": 34959 }, { "epoch": 19.53072625698324, "grad_norm": 0.2783021926879883, "learning_rate": 2.3977591036414567e-05, "loss": 0.2769, "step": 34960 }, { "epoch": 19.531284916201116, "grad_norm": 0.4363904297351837, "learning_rate": 2.3949579831932777e-05, "loss": 0.3294, "step": 34961 }, { "epoch": 19.531843575418993, "grad_norm": 0.7102211713790894, "learning_rate": 2.3921568627450983e-05, "loss": 0.4197, "step": 34962 }, { "epoch": 19.532402234636873, "grad_norm": 0.6642683744430542, "learning_rate": 2.389355742296919e-05, "loss": 0.4695, "step": 34963 }, { "epoch": 19.53296089385475, "grad_norm": 0.31163617968559265, "learning_rate": 2.3865546218487398e-05, "loss": 0.3135, "step": 34964 }, { "epoch": 19.533519553072626, "grad_norm": 0.37404903769493103, "learning_rate": 2.3837535014005604e-05, "loss": 0.4106, "step": 34965 }, { "epoch": 19.534078212290503, "grad_norm": 0.4966489374637604, "learning_rate": 2.380952380952381e-05, "loss": 0.4069, "step": 34966 }, { "epoch": 19.53463687150838, "grad_norm": 0.5149335861206055, "learning_rate": 2.378151260504202e-05, "loss": 0.3693, "step": 34967 }, { "epoch": 19.535195530726256, "grad_norm": 0.47929149866104126, "learning_rate": 2.3753501400560226e-05, "loss": 0.4462, "step": 34968 }, { "epoch": 19.535754189944136, "grad_norm": 0.4186246395111084, "learning_rate": 2.372549019607843e-05, "loss": 0.3949, "step": 34969 }, { "epoch": 19.536312849162012, "grad_norm": 0.5047544836997986, "learning_rate": 2.369747899159664e-05, "loss": 0.3725, "step": 34970 }, { "epoch": 19.53687150837989, "grad_norm": 0.8106202483177185, "learning_rate": 2.3669467787114847e-05, "loss": 0.3615, "step": 34971 }, { "epoch": 19.537430167597766, "grad_norm": 0.38692641258239746, "learning_rate": 2.3641456582633056e-05, "loss": 0.3533, "step": 34972 }, { "epoch": 19.537988826815642, "grad_norm": 0.37314605712890625, "learning_rate": 2.3613445378151262e-05, "loss": 0.3369, "step": 34973 }, { "epoch": 19.53854748603352, "grad_norm": 0.41331860423088074, "learning_rate": 2.3585434173669468e-05, "loss": 0.3654, "step": 34974 }, { "epoch": 19.539106145251395, "grad_norm": 0.4739454984664917, "learning_rate": 2.3557422969187678e-05, "loss": 0.4056, "step": 34975 }, { "epoch": 19.539664804469275, "grad_norm": 0.487355500459671, "learning_rate": 2.3529411764705884e-05, "loss": 0.474, "step": 34976 }, { "epoch": 19.540223463687152, "grad_norm": 0.4497007131576538, "learning_rate": 2.350140056022409e-05, "loss": 0.3209, "step": 34977 }, { "epoch": 19.54078212290503, "grad_norm": 0.5294460654258728, "learning_rate": 2.34733893557423e-05, "loss": 0.3927, "step": 34978 }, { "epoch": 19.541340782122905, "grad_norm": 0.776323139667511, "learning_rate": 2.3445378151260505e-05, "loss": 0.3937, "step": 34979 }, { "epoch": 19.54189944134078, "grad_norm": 1.2215746641159058, "learning_rate": 2.3417366946778714e-05, "loss": 0.3795, "step": 34980 }, { "epoch": 19.542458100558658, "grad_norm": 0.6829772591590881, "learning_rate": 2.338935574229692e-05, "loss": 0.4107, "step": 34981 }, { "epoch": 19.543016759776535, "grad_norm": 0.42722100019454956, "learning_rate": 2.3361344537815126e-05, "loss": 0.4278, "step": 34982 }, { "epoch": 19.543575418994415, "grad_norm": 1.1316921710968018, "learning_rate": 2.3333333333333336e-05, "loss": 0.3934, "step": 34983 }, { "epoch": 19.54413407821229, "grad_norm": 0.4051525890827179, "learning_rate": 2.3305322128851542e-05, "loss": 0.2968, "step": 34984 }, { "epoch": 19.544692737430168, "grad_norm": 0.47396060824394226, "learning_rate": 2.3277310924369748e-05, "loss": 0.4394, "step": 34985 }, { "epoch": 19.545251396648045, "grad_norm": 0.3654646873474121, "learning_rate": 2.3249299719887957e-05, "loss": 0.3884, "step": 34986 }, { "epoch": 19.54581005586592, "grad_norm": 0.6824849843978882, "learning_rate": 2.3221288515406163e-05, "loss": 0.3576, "step": 34987 }, { "epoch": 19.546368715083798, "grad_norm": 1.8359508514404297, "learning_rate": 2.3193277310924373e-05, "loss": 0.4149, "step": 34988 }, { "epoch": 19.546927374301674, "grad_norm": 0.470069944858551, "learning_rate": 2.316526610644258e-05, "loss": 0.317, "step": 34989 }, { "epoch": 19.547486033519554, "grad_norm": 0.3980754315853119, "learning_rate": 2.3137254901960785e-05, "loss": 0.2888, "step": 34990 }, { "epoch": 19.54804469273743, "grad_norm": 0.7938000559806824, "learning_rate": 2.3109243697478994e-05, "loss": 0.4342, "step": 34991 }, { "epoch": 19.548603351955308, "grad_norm": 0.6193335652351379, "learning_rate": 2.30812324929972e-05, "loss": 0.4072, "step": 34992 }, { "epoch": 19.549162011173184, "grad_norm": 0.3488420844078064, "learning_rate": 2.3053221288515406e-05, "loss": 0.3046, "step": 34993 }, { "epoch": 19.54972067039106, "grad_norm": 0.5678327679634094, "learning_rate": 2.3025210084033615e-05, "loss": 0.4197, "step": 34994 }, { "epoch": 19.550279329608937, "grad_norm": 0.4888420104980469, "learning_rate": 2.299719887955182e-05, "loss": 0.4346, "step": 34995 }, { "epoch": 19.550837988826817, "grad_norm": 0.3524285852909088, "learning_rate": 2.296918767507003e-05, "loss": 0.3785, "step": 34996 }, { "epoch": 19.551396648044694, "grad_norm": 0.42418041825294495, "learning_rate": 2.2941176470588237e-05, "loss": 0.3854, "step": 34997 }, { "epoch": 19.55195530726257, "grad_norm": 0.6350045800209045, "learning_rate": 2.2913165266106443e-05, "loss": 0.4108, "step": 34998 }, { "epoch": 19.552513966480447, "grad_norm": 0.48599985241889954, "learning_rate": 2.2885154061624652e-05, "loss": 0.4059, "step": 34999 }, { "epoch": 19.553072625698324, "grad_norm": 4.1164679527282715, "learning_rate": 2.2857142857142858e-05, "loss": 0.3567, "step": 35000 }, { "epoch": 19.553072625698324, "eval_cer": 0.08433439167294032, "eval_loss": 0.3190707564353943, "eval_runtime": 55.5481, "eval_samples_per_second": 81.695, "eval_steps_per_second": 5.113, "eval_wer": 0.33387335549286334, "step": 35000 }, { "epoch": 19.5536312849162, "grad_norm": 0.6705339550971985, "learning_rate": 2.2829131652661064e-05, "loss": 0.3228, "step": 35001 }, { "epoch": 19.554189944134077, "grad_norm": 0.8339620232582092, "learning_rate": 2.2801120448179273e-05, "loss": 0.3974, "step": 35002 }, { "epoch": 19.554748603351957, "grad_norm": 0.3480084538459778, "learning_rate": 2.277310924369748e-05, "loss": 0.3924, "step": 35003 }, { "epoch": 19.555307262569833, "grad_norm": 0.4313034117221832, "learning_rate": 2.2745098039215685e-05, "loss": 0.4389, "step": 35004 }, { "epoch": 19.55586592178771, "grad_norm": 0.49800875782966614, "learning_rate": 2.2717086834733895e-05, "loss": 0.4895, "step": 35005 }, { "epoch": 19.556424581005587, "grad_norm": 0.35939350724220276, "learning_rate": 2.26890756302521e-05, "loss": 0.4327, "step": 35006 }, { "epoch": 19.556983240223463, "grad_norm": 0.46768736839294434, "learning_rate": 2.266106442577031e-05, "loss": 0.3753, "step": 35007 }, { "epoch": 19.55754189944134, "grad_norm": 1.2855732440948486, "learning_rate": 2.2633053221288516e-05, "loss": 0.3511, "step": 35008 }, { "epoch": 19.558100558659216, "grad_norm": 0.309354305267334, "learning_rate": 2.2605042016806722e-05, "loss": 0.3547, "step": 35009 }, { "epoch": 19.558659217877096, "grad_norm": 0.5491271615028381, "learning_rate": 2.257703081232493e-05, "loss": 0.5688, "step": 35010 }, { "epoch": 19.559217877094973, "grad_norm": 0.4882706105709076, "learning_rate": 2.2549019607843138e-05, "loss": 0.4048, "step": 35011 }, { "epoch": 19.55977653631285, "grad_norm": 2.1504855155944824, "learning_rate": 2.2521008403361344e-05, "loss": 0.3801, "step": 35012 }, { "epoch": 19.560335195530726, "grad_norm": 0.33867359161376953, "learning_rate": 2.2492997198879553e-05, "loss": 0.3937, "step": 35013 }, { "epoch": 19.560893854748603, "grad_norm": 0.44972822070121765, "learning_rate": 2.246498599439776e-05, "loss": 0.3847, "step": 35014 }, { "epoch": 19.56145251396648, "grad_norm": 0.36333194375038147, "learning_rate": 2.2436974789915968e-05, "loss": 0.346, "step": 35015 }, { "epoch": 19.56201117318436, "grad_norm": 0.7097443342208862, "learning_rate": 2.2408963585434174e-05, "loss": 0.401, "step": 35016 }, { "epoch": 19.562569832402236, "grad_norm": 0.49143320322036743, "learning_rate": 2.238095238095238e-05, "loss": 0.3742, "step": 35017 }, { "epoch": 19.563128491620112, "grad_norm": 0.39414462447166443, "learning_rate": 2.235294117647059e-05, "loss": 0.468, "step": 35018 }, { "epoch": 19.56368715083799, "grad_norm": 0.5225555896759033, "learning_rate": 2.2324929971988796e-05, "loss": 0.422, "step": 35019 }, { "epoch": 19.564245810055866, "grad_norm": 0.5444976091384888, "learning_rate": 2.2296918767507e-05, "loss": 0.4103, "step": 35020 }, { "epoch": 19.564804469273742, "grad_norm": 1.5434752702713013, "learning_rate": 2.226890756302521e-05, "loss": 0.4295, "step": 35021 }, { "epoch": 19.56536312849162, "grad_norm": 0.43311455845832825, "learning_rate": 2.2240896358543417e-05, "loss": 0.4176, "step": 35022 }, { "epoch": 19.5659217877095, "grad_norm": 3.5761990547180176, "learning_rate": 2.2212885154061626e-05, "loss": 0.526, "step": 35023 }, { "epoch": 19.566480446927375, "grad_norm": 0.35101911425590515, "learning_rate": 2.2184873949579832e-05, "loss": 0.4223, "step": 35024 }, { "epoch": 19.567039106145252, "grad_norm": 0.407419890165329, "learning_rate": 2.215686274509804e-05, "loss": 0.3558, "step": 35025 }, { "epoch": 19.56759776536313, "grad_norm": 0.6738312840461731, "learning_rate": 2.2128851540616248e-05, "loss": 0.4056, "step": 35026 }, { "epoch": 19.568156424581005, "grad_norm": 0.3665103614330292, "learning_rate": 2.2100840336134454e-05, "loss": 0.365, "step": 35027 }, { "epoch": 19.56871508379888, "grad_norm": 0.4983854293823242, "learning_rate": 2.207282913165266e-05, "loss": 0.3619, "step": 35028 }, { "epoch": 19.56927374301676, "grad_norm": 0.7285831570625305, "learning_rate": 2.204481792717087e-05, "loss": 0.5124, "step": 35029 }, { "epoch": 19.56983240223464, "grad_norm": 0.5841778516769409, "learning_rate": 2.2016806722689075e-05, "loss": 0.4676, "step": 35030 }, { "epoch": 19.570391061452515, "grad_norm": 0.4914669692516327, "learning_rate": 2.1988795518207285e-05, "loss": 0.4527, "step": 35031 }, { "epoch": 19.57094972067039, "grad_norm": 0.5048649311065674, "learning_rate": 2.196078431372549e-05, "loss": 0.3912, "step": 35032 }, { "epoch": 19.571508379888268, "grad_norm": 0.3908544182777405, "learning_rate": 2.1932773109243697e-05, "loss": 0.4351, "step": 35033 }, { "epoch": 19.572067039106145, "grad_norm": 0.4516026973724365, "learning_rate": 2.1904761904761906e-05, "loss": 0.4406, "step": 35034 }, { "epoch": 19.57262569832402, "grad_norm": 0.3828793168067932, "learning_rate": 2.1876750700280112e-05, "loss": 0.3799, "step": 35035 }, { "epoch": 19.573184357541898, "grad_norm": 0.4044423997402191, "learning_rate": 2.1848739495798318e-05, "loss": 0.3958, "step": 35036 }, { "epoch": 19.573743016759778, "grad_norm": 0.32164379954338074, "learning_rate": 2.1820728291316527e-05, "loss": 0.2731, "step": 35037 }, { "epoch": 19.574301675977654, "grad_norm": 0.683765709400177, "learning_rate": 2.1792717086834733e-05, "loss": 0.4894, "step": 35038 }, { "epoch": 19.57486033519553, "grad_norm": 0.4297809898853302, "learning_rate": 2.176470588235294e-05, "loss": 0.3929, "step": 35039 }, { "epoch": 19.575418994413408, "grad_norm": 0.4439571499824524, "learning_rate": 2.173669467787115e-05, "loss": 0.4521, "step": 35040 }, { "epoch": 19.575977653631284, "grad_norm": 0.4166252911090851, "learning_rate": 2.1708683473389355e-05, "loss": 0.3794, "step": 35041 }, { "epoch": 19.57653631284916, "grad_norm": 0.38812676072120667, "learning_rate": 2.1680672268907564e-05, "loss": 0.3045, "step": 35042 }, { "epoch": 19.577094972067037, "grad_norm": 0.8387113213539124, "learning_rate": 2.165266106442577e-05, "loss": 0.4833, "step": 35043 }, { "epoch": 19.577653631284917, "grad_norm": 0.538284420967102, "learning_rate": 2.1624649859943976e-05, "loss": 0.4566, "step": 35044 }, { "epoch": 19.578212290502794, "grad_norm": 0.407591849565506, "learning_rate": 2.1596638655462185e-05, "loss": 0.3746, "step": 35045 }, { "epoch": 19.57877094972067, "grad_norm": 0.4515630006790161, "learning_rate": 2.156862745098039e-05, "loss": 0.3487, "step": 35046 }, { "epoch": 19.579329608938547, "grad_norm": 0.7752786874771118, "learning_rate": 2.1540616246498597e-05, "loss": 0.6461, "step": 35047 }, { "epoch": 19.579888268156424, "grad_norm": 0.39946651458740234, "learning_rate": 2.1512605042016807e-05, "loss": 0.3889, "step": 35048 }, { "epoch": 19.5804469273743, "grad_norm": 0.6564873456954956, "learning_rate": 2.1484593837535013e-05, "loss": 0.3389, "step": 35049 }, { "epoch": 19.58100558659218, "grad_norm": 0.4666491746902466, "learning_rate": 2.1456582633053222e-05, "loss": 0.3886, "step": 35050 }, { "epoch": 19.581564245810057, "grad_norm": 0.4656910002231598, "learning_rate": 2.1428571428571428e-05, "loss": 0.3586, "step": 35051 }, { "epoch": 19.582122905027934, "grad_norm": 0.5210119485855103, "learning_rate": 2.1400560224089634e-05, "loss": 0.3565, "step": 35052 }, { "epoch": 19.58268156424581, "grad_norm": 0.7861770987510681, "learning_rate": 2.1372549019607844e-05, "loss": 0.4018, "step": 35053 }, { "epoch": 19.583240223463687, "grad_norm": 0.45525625348091125, "learning_rate": 2.134453781512605e-05, "loss": 0.4268, "step": 35054 }, { "epoch": 19.583798882681563, "grad_norm": 0.3897051215171814, "learning_rate": 2.1316526610644256e-05, "loss": 0.4099, "step": 35055 }, { "epoch": 19.58435754189944, "grad_norm": 0.5481089353561401, "learning_rate": 2.1288515406162465e-05, "loss": 0.4637, "step": 35056 }, { "epoch": 19.58491620111732, "grad_norm": 0.42182138562202454, "learning_rate": 2.126050420168067e-05, "loss": 0.4193, "step": 35057 }, { "epoch": 19.585474860335196, "grad_norm": 1.3122252225875854, "learning_rate": 2.123249299719888e-05, "loss": 0.3148, "step": 35058 }, { "epoch": 19.586033519553073, "grad_norm": 0.2764971852302551, "learning_rate": 2.1204481792717086e-05, "loss": 0.2922, "step": 35059 }, { "epoch": 19.58659217877095, "grad_norm": 0.3993698060512543, "learning_rate": 2.1176470588235292e-05, "loss": 0.394, "step": 35060 }, { "epoch": 19.587150837988826, "grad_norm": 3.5782339572906494, "learning_rate": 2.11484593837535e-05, "loss": 0.3107, "step": 35061 }, { "epoch": 19.587709497206703, "grad_norm": 0.6111013293266296, "learning_rate": 2.1120448179271708e-05, "loss": 0.4053, "step": 35062 }, { "epoch": 19.58826815642458, "grad_norm": 0.5928622484207153, "learning_rate": 2.1092436974789914e-05, "loss": 0.4382, "step": 35063 }, { "epoch": 19.58882681564246, "grad_norm": 0.5645527243614197, "learning_rate": 2.1064425770308123e-05, "loss": 0.3882, "step": 35064 }, { "epoch": 19.589385474860336, "grad_norm": 0.44165825843811035, "learning_rate": 2.103641456582633e-05, "loss": 0.4407, "step": 35065 }, { "epoch": 19.589944134078213, "grad_norm": 0.6013810038566589, "learning_rate": 2.100840336134454e-05, "loss": 0.3858, "step": 35066 }, { "epoch": 19.59050279329609, "grad_norm": 0.5412119030952454, "learning_rate": 2.0980392156862744e-05, "loss": 0.4121, "step": 35067 }, { "epoch": 19.591061452513966, "grad_norm": 0.5015369057655334, "learning_rate": 2.095238095238095e-05, "loss": 0.3899, "step": 35068 }, { "epoch": 19.591620111731842, "grad_norm": 0.39011484384536743, "learning_rate": 2.092436974789916e-05, "loss": 0.4125, "step": 35069 }, { "epoch": 19.592178770949722, "grad_norm": 1.8045885562896729, "learning_rate": 2.0896358543417366e-05, "loss": 0.4131, "step": 35070 }, { "epoch": 19.5927374301676, "grad_norm": 3.1306211948394775, "learning_rate": 2.0868347338935572e-05, "loss": 0.3615, "step": 35071 }, { "epoch": 19.593296089385476, "grad_norm": 0.4504767060279846, "learning_rate": 2.0840336134453785e-05, "loss": 0.333, "step": 35072 }, { "epoch": 19.593854748603352, "grad_norm": 1.0253909826278687, "learning_rate": 2.081232492997199e-05, "loss": 0.4006, "step": 35073 }, { "epoch": 19.59441340782123, "grad_norm": 0.38497379422187805, "learning_rate": 2.0784313725490197e-05, "loss": 0.36, "step": 35074 }, { "epoch": 19.594972067039105, "grad_norm": 0.414264976978302, "learning_rate": 2.0756302521008406e-05, "loss": 0.3276, "step": 35075 }, { "epoch": 19.595530726256982, "grad_norm": 0.5852633118629456, "learning_rate": 2.0728291316526612e-05, "loss": 0.4064, "step": 35076 }, { "epoch": 19.596089385474862, "grad_norm": 3.511029005050659, "learning_rate": 2.070028011204482e-05, "loss": 0.3782, "step": 35077 }, { "epoch": 19.59664804469274, "grad_norm": 0.32487791776657104, "learning_rate": 2.0672268907563027e-05, "loss": 0.3451, "step": 35078 }, { "epoch": 19.597206703910615, "grad_norm": 0.46119052171707153, "learning_rate": 2.0644257703081233e-05, "loss": 0.3973, "step": 35079 }, { "epoch": 19.59776536312849, "grad_norm": 0.3387055993080139, "learning_rate": 2.0616246498599443e-05, "loss": 0.3683, "step": 35080 }, { "epoch": 19.598324022346368, "grad_norm": 2.1004459857940674, "learning_rate": 2.058823529411765e-05, "loss": 0.4996, "step": 35081 }, { "epoch": 19.598882681564245, "grad_norm": 0.5597602725028992, "learning_rate": 2.0560224089635855e-05, "loss": 0.3378, "step": 35082 }, { "epoch": 19.59944134078212, "grad_norm": 0.4323146641254425, "learning_rate": 2.0532212885154064e-05, "loss": 0.3227, "step": 35083 }, { "epoch": 19.6, "grad_norm": 0.4124544560909271, "learning_rate": 2.050420168067227e-05, "loss": 0.3843, "step": 35084 }, { "epoch": 19.600558659217878, "grad_norm": 0.5325616598129272, "learning_rate": 2.047619047619048e-05, "loss": 0.4052, "step": 35085 }, { "epoch": 19.601117318435755, "grad_norm": 0.3568335473537445, "learning_rate": 2.0448179271708685e-05, "loss": 0.325, "step": 35086 }, { "epoch": 19.60167597765363, "grad_norm": 0.510695219039917, "learning_rate": 2.042016806722689e-05, "loss": 0.5959, "step": 35087 }, { "epoch": 19.602234636871508, "grad_norm": 1.3934470415115356, "learning_rate": 2.03921568627451e-05, "loss": 0.3895, "step": 35088 }, { "epoch": 19.602793296089384, "grad_norm": 0.3698326349258423, "learning_rate": 2.0364145658263307e-05, "loss": 0.4022, "step": 35089 }, { "epoch": 19.60335195530726, "grad_norm": 0.6723500490188599, "learning_rate": 2.0336134453781513e-05, "loss": 0.4444, "step": 35090 }, { "epoch": 19.60391061452514, "grad_norm": 0.4389816224575043, "learning_rate": 2.0308123249299722e-05, "loss": 0.3771, "step": 35091 }, { "epoch": 19.604469273743018, "grad_norm": 0.5208408832550049, "learning_rate": 2.0280112044817928e-05, "loss": 0.4235, "step": 35092 }, { "epoch": 19.605027932960894, "grad_norm": 0.4646131098270416, "learning_rate": 2.0252100840336138e-05, "loss": 0.3677, "step": 35093 }, { "epoch": 19.60558659217877, "grad_norm": 0.557743489742279, "learning_rate": 2.0224089635854344e-05, "loss": 0.5623, "step": 35094 }, { "epoch": 19.606145251396647, "grad_norm": 0.5055925250053406, "learning_rate": 2.019607843137255e-05, "loss": 0.4085, "step": 35095 }, { "epoch": 19.606703910614524, "grad_norm": 0.5961707830429077, "learning_rate": 2.016806722689076e-05, "loss": 0.5964, "step": 35096 }, { "epoch": 19.607262569832404, "grad_norm": 0.35368698835372925, "learning_rate": 2.0140056022408965e-05, "loss": 0.3608, "step": 35097 }, { "epoch": 19.60782122905028, "grad_norm": 2.274745464324951, "learning_rate": 2.011204481792717e-05, "loss": 0.4127, "step": 35098 }, { "epoch": 19.608379888268157, "grad_norm": 0.353723406791687, "learning_rate": 2.008403361344538e-05, "loss": 0.4429, "step": 35099 }, { "epoch": 19.608938547486034, "grad_norm": 0.337365984916687, "learning_rate": 2.0056022408963586e-05, "loss": 0.3128, "step": 35100 }, { "epoch": 19.60949720670391, "grad_norm": 0.44131410121917725, "learning_rate": 2.0028011204481796e-05, "loss": 0.4234, "step": 35101 }, { "epoch": 19.610055865921787, "grad_norm": 0.4321846663951874, "learning_rate": 2e-05, "loss": 0.452, "step": 35102 }, { "epoch": 19.610614525139663, "grad_norm": 0.41311925649642944, "learning_rate": 1.9971988795518208e-05, "loss": 0.3764, "step": 35103 }, { "epoch": 19.611173184357543, "grad_norm": 0.4405032694339752, "learning_rate": 1.9943977591036417e-05, "loss": 0.3733, "step": 35104 }, { "epoch": 19.61173184357542, "grad_norm": 0.506595253944397, "learning_rate": 1.9915966386554623e-05, "loss": 0.4627, "step": 35105 }, { "epoch": 19.612290502793297, "grad_norm": 1.0056757926940918, "learning_rate": 1.988795518207283e-05, "loss": 0.3021, "step": 35106 }, { "epoch": 19.612849162011173, "grad_norm": 0.4197084605693817, "learning_rate": 1.985994397759104e-05, "loss": 0.4676, "step": 35107 }, { "epoch": 19.61340782122905, "grad_norm": 0.44864171743392944, "learning_rate": 1.9831932773109244e-05, "loss": 0.3103, "step": 35108 }, { "epoch": 19.613966480446926, "grad_norm": 1.8107091188430786, "learning_rate": 1.980392156862745e-05, "loss": 0.4444, "step": 35109 }, { "epoch": 19.614525139664803, "grad_norm": 0.5745518803596497, "learning_rate": 1.977591036414566e-05, "loss": 0.4349, "step": 35110 }, { "epoch": 19.615083798882683, "grad_norm": 0.43389493227005005, "learning_rate": 1.9747899159663866e-05, "loss": 0.4509, "step": 35111 }, { "epoch": 19.61564245810056, "grad_norm": 0.516075611114502, "learning_rate": 1.9719887955182075e-05, "loss": 0.4, "step": 35112 }, { "epoch": 19.616201117318436, "grad_norm": 0.42388996481895447, "learning_rate": 1.969187675070028e-05, "loss": 0.2859, "step": 35113 }, { "epoch": 19.616759776536313, "grad_norm": 0.4811156094074249, "learning_rate": 1.9663865546218487e-05, "loss": 0.5608, "step": 35114 }, { "epoch": 19.61731843575419, "grad_norm": 2.0465471744537354, "learning_rate": 1.9635854341736697e-05, "loss": 0.6116, "step": 35115 }, { "epoch": 19.617877094972066, "grad_norm": 0.6585325598716736, "learning_rate": 1.9607843137254903e-05, "loss": 0.5078, "step": 35116 }, { "epoch": 19.618435754189946, "grad_norm": 0.4917345643043518, "learning_rate": 1.957983193277311e-05, "loss": 0.289, "step": 35117 }, { "epoch": 19.618994413407822, "grad_norm": 1.1267015933990479, "learning_rate": 1.9551820728291318e-05, "loss": 0.573, "step": 35118 }, { "epoch": 19.6195530726257, "grad_norm": 0.3880383372306824, "learning_rate": 1.9523809523809524e-05, "loss": 0.3758, "step": 35119 }, { "epoch": 19.620111731843576, "grad_norm": 2.414391040802002, "learning_rate": 1.9495798319327733e-05, "loss": 0.3644, "step": 35120 }, { "epoch": 19.620670391061452, "grad_norm": 0.5096579790115356, "learning_rate": 1.946778711484594e-05, "loss": 0.4748, "step": 35121 }, { "epoch": 19.62122905027933, "grad_norm": 0.693800687789917, "learning_rate": 1.9439775910364145e-05, "loss": 0.5959, "step": 35122 }, { "epoch": 19.621787709497205, "grad_norm": 0.4663922190666199, "learning_rate": 1.9411764705882355e-05, "loss": 0.3605, "step": 35123 }, { "epoch": 19.622346368715085, "grad_norm": 0.4367486536502838, "learning_rate": 1.938375350140056e-05, "loss": 0.4726, "step": 35124 }, { "epoch": 19.622905027932962, "grad_norm": 0.6484634280204773, "learning_rate": 1.9355742296918767e-05, "loss": 0.4415, "step": 35125 }, { "epoch": 19.62346368715084, "grad_norm": 0.3513046205043793, "learning_rate": 1.9327731092436976e-05, "loss": 0.269, "step": 35126 }, { "epoch": 19.624022346368715, "grad_norm": 0.4450730085372925, "learning_rate": 1.9299719887955182e-05, "loss": 0.4782, "step": 35127 }, { "epoch": 19.62458100558659, "grad_norm": 0.45933833718299866, "learning_rate": 1.927170868347339e-05, "loss": 0.4778, "step": 35128 }, { "epoch": 19.62513966480447, "grad_norm": 3.330951690673828, "learning_rate": 1.9243697478991597e-05, "loss": 0.5655, "step": 35129 }, { "epoch": 19.625698324022345, "grad_norm": 1.941396713256836, "learning_rate": 1.9215686274509803e-05, "loss": 0.553, "step": 35130 }, { "epoch": 19.626256983240225, "grad_norm": 0.43774154782295227, "learning_rate": 1.9187675070028013e-05, "loss": 0.4074, "step": 35131 }, { "epoch": 19.6268156424581, "grad_norm": 0.43905583024024963, "learning_rate": 1.915966386554622e-05, "loss": 0.457, "step": 35132 }, { "epoch": 19.627374301675978, "grad_norm": 0.45976385474205017, "learning_rate": 1.9131652661064425e-05, "loss": 0.3267, "step": 35133 }, { "epoch": 19.627932960893855, "grad_norm": 0.6096678972244263, "learning_rate": 1.9103641456582634e-05, "loss": 0.4661, "step": 35134 }, { "epoch": 19.62849162011173, "grad_norm": 0.35695183277130127, "learning_rate": 1.907563025210084e-05, "loss": 0.4388, "step": 35135 }, { "epoch": 19.629050279329608, "grad_norm": 0.30571630597114563, "learning_rate": 1.904761904761905e-05, "loss": 0.3864, "step": 35136 }, { "epoch": 19.629608938547484, "grad_norm": 1.6890528202056885, "learning_rate": 1.9019607843137255e-05, "loss": 0.4257, "step": 35137 }, { "epoch": 19.630167597765364, "grad_norm": 0.3722410500049591, "learning_rate": 1.899159663865546e-05, "loss": 0.4071, "step": 35138 }, { "epoch": 19.63072625698324, "grad_norm": 0.4365113079547882, "learning_rate": 1.896358543417367e-05, "loss": 0.5847, "step": 35139 }, { "epoch": 19.631284916201118, "grad_norm": 0.3387068212032318, "learning_rate": 1.8935574229691877e-05, "loss": 0.3363, "step": 35140 }, { "epoch": 19.631843575418994, "grad_norm": 0.49587541818618774, "learning_rate": 1.8907563025210083e-05, "loss": 0.3513, "step": 35141 }, { "epoch": 19.63240223463687, "grad_norm": 0.469573050737381, "learning_rate": 1.8879551820728292e-05, "loss": 0.508, "step": 35142 }, { "epoch": 19.632960893854747, "grad_norm": 0.4225670099258423, "learning_rate": 1.8851540616246498e-05, "loss": 0.4525, "step": 35143 }, { "epoch": 19.633519553072627, "grad_norm": 0.49111419916152954, "learning_rate": 1.8823529411764704e-05, "loss": 0.4103, "step": 35144 }, { "epoch": 19.634078212290504, "grad_norm": 0.416355699300766, "learning_rate": 1.8795518207282914e-05, "loss": 0.4392, "step": 35145 }, { "epoch": 19.63463687150838, "grad_norm": 0.49225419759750366, "learning_rate": 1.876750700280112e-05, "loss": 0.4132, "step": 35146 }, { "epoch": 19.635195530726257, "grad_norm": 1.0314242839813232, "learning_rate": 1.873949579831933e-05, "loss": 0.3441, "step": 35147 }, { "epoch": 19.635754189944134, "grad_norm": 0.39084526896476746, "learning_rate": 1.8711484593837535e-05, "loss": 0.3476, "step": 35148 }, { "epoch": 19.63631284916201, "grad_norm": 1.059799313545227, "learning_rate": 1.868347338935574e-05, "loss": 0.3516, "step": 35149 }, { "epoch": 19.636871508379887, "grad_norm": 0.4000471234321594, "learning_rate": 1.865546218487395e-05, "loss": 0.3686, "step": 35150 }, { "epoch": 19.637430167597767, "grad_norm": 0.45973414182662964, "learning_rate": 1.8627450980392156e-05, "loss": 0.4799, "step": 35151 }, { "epoch": 19.637988826815644, "grad_norm": 0.875268816947937, "learning_rate": 1.8599439775910362e-05, "loss": 0.3757, "step": 35152 }, { "epoch": 19.63854748603352, "grad_norm": 0.589676558971405, "learning_rate": 1.8571428571428572e-05, "loss": 0.3621, "step": 35153 }, { "epoch": 19.639106145251397, "grad_norm": 0.3321785628795624, "learning_rate": 1.8543417366946778e-05, "loss": 0.2663, "step": 35154 }, { "epoch": 19.639664804469273, "grad_norm": 0.40286576747894287, "learning_rate": 1.8515406162464987e-05, "loss": 0.348, "step": 35155 }, { "epoch": 19.64022346368715, "grad_norm": 0.5802273154258728, "learning_rate": 1.8487394957983193e-05, "loss": 0.3961, "step": 35156 }, { "epoch": 19.640782122905026, "grad_norm": 17.08884620666504, "learning_rate": 1.84593837535014e-05, "loss": 0.3829, "step": 35157 }, { "epoch": 19.641340782122906, "grad_norm": 3.4307515621185303, "learning_rate": 1.843137254901961e-05, "loss": 0.3988, "step": 35158 }, { "epoch": 19.641899441340783, "grad_norm": 0.45335379242897034, "learning_rate": 1.8403361344537814e-05, "loss": 0.3577, "step": 35159 }, { "epoch": 19.64245810055866, "grad_norm": 0.44909825921058655, "learning_rate": 1.837535014005602e-05, "loss": 0.4292, "step": 35160 }, { "epoch": 19.643016759776536, "grad_norm": 0.38877925276756287, "learning_rate": 1.834733893557423e-05, "loss": 0.4574, "step": 35161 }, { "epoch": 19.643575418994413, "grad_norm": 0.5969152450561523, "learning_rate": 1.8319327731092436e-05, "loss": 0.5093, "step": 35162 }, { "epoch": 19.64413407821229, "grad_norm": 0.6126071810722351, "learning_rate": 1.8291316526610645e-05, "loss": 0.3742, "step": 35163 }, { "epoch": 19.64469273743017, "grad_norm": 0.7234187722206116, "learning_rate": 1.826330532212885e-05, "loss": 0.5294, "step": 35164 }, { "epoch": 19.645251396648046, "grad_norm": 0.3739009201526642, "learning_rate": 1.8235294117647057e-05, "loss": 0.3257, "step": 35165 }, { "epoch": 19.645810055865923, "grad_norm": 0.35236263275146484, "learning_rate": 1.8207282913165267e-05, "loss": 0.3686, "step": 35166 }, { "epoch": 19.6463687150838, "grad_norm": 0.614061176776886, "learning_rate": 1.8179271708683473e-05, "loss": 0.4206, "step": 35167 }, { "epoch": 19.646927374301676, "grad_norm": 0.37418922781944275, "learning_rate": 1.815126050420168e-05, "loss": 0.448, "step": 35168 }, { "epoch": 19.647486033519552, "grad_norm": 0.456551730632782, "learning_rate": 1.8123249299719888e-05, "loss": 0.3942, "step": 35169 }, { "epoch": 19.64804469273743, "grad_norm": 6.372609615325928, "learning_rate": 1.8095238095238094e-05, "loss": 0.3743, "step": 35170 }, { "epoch": 19.64860335195531, "grad_norm": 0.6308823823928833, "learning_rate": 1.8067226890756303e-05, "loss": 0.5636, "step": 35171 }, { "epoch": 19.649162011173186, "grad_norm": 0.3686907887458801, "learning_rate": 1.803921568627451e-05, "loss": 0.4425, "step": 35172 }, { "epoch": 19.649720670391062, "grad_norm": 5.129613876342773, "learning_rate": 1.8011204481792715e-05, "loss": 0.3914, "step": 35173 }, { "epoch": 19.65027932960894, "grad_norm": 1.5420482158660889, "learning_rate": 1.7983193277310925e-05, "loss": 0.4004, "step": 35174 }, { "epoch": 19.650837988826815, "grad_norm": 0.517649233341217, "learning_rate": 1.795518207282913e-05, "loss": 0.5575, "step": 35175 }, { "epoch": 19.65139664804469, "grad_norm": 1.0616827011108398, "learning_rate": 1.7927170868347337e-05, "loss": 0.3526, "step": 35176 }, { "epoch": 19.65195530726257, "grad_norm": 0.5183215141296387, "learning_rate": 1.7899159663865546e-05, "loss": 0.4371, "step": 35177 }, { "epoch": 19.65251396648045, "grad_norm": 0.34969913959503174, "learning_rate": 1.7871148459383752e-05, "loss": 0.3941, "step": 35178 }, { "epoch": 19.653072625698325, "grad_norm": 0.3530867099761963, "learning_rate": 1.7843137254901958e-05, "loss": 0.3556, "step": 35179 }, { "epoch": 19.6536312849162, "grad_norm": 0.38902702927589417, "learning_rate": 1.7815126050420167e-05, "loss": 0.4256, "step": 35180 }, { "epoch": 19.654189944134078, "grad_norm": 0.9034125804901123, "learning_rate": 1.7787114845938373e-05, "loss": 0.3844, "step": 35181 }, { "epoch": 19.654748603351955, "grad_norm": 0.7404069900512695, "learning_rate": 1.7759103641456583e-05, "loss": 0.4225, "step": 35182 }, { "epoch": 19.65530726256983, "grad_norm": 0.41993728280067444, "learning_rate": 1.773109243697479e-05, "loss": 0.4018, "step": 35183 }, { "epoch": 19.655865921787708, "grad_norm": 0.6756036877632141, "learning_rate": 1.7703081232492995e-05, "loss": 0.476, "step": 35184 }, { "epoch": 19.656424581005588, "grad_norm": 0.4275253415107727, "learning_rate": 1.7675070028011204e-05, "loss": 0.3528, "step": 35185 }, { "epoch": 19.656983240223465, "grad_norm": 5.222357749938965, "learning_rate": 1.764705882352941e-05, "loss": 0.7583, "step": 35186 }, { "epoch": 19.65754189944134, "grad_norm": 0.6256312131881714, "learning_rate": 1.7619047619047616e-05, "loss": 0.4369, "step": 35187 }, { "epoch": 19.658100558659218, "grad_norm": 0.5469828248023987, "learning_rate": 1.759103641456583e-05, "loss": 0.39, "step": 35188 }, { "epoch": 19.658659217877094, "grad_norm": 0.7170218825340271, "learning_rate": 1.7563025210084035e-05, "loss": 0.4195, "step": 35189 }, { "epoch": 19.65921787709497, "grad_norm": 0.5640260577201843, "learning_rate": 1.7535014005602244e-05, "loss": 0.3716, "step": 35190 }, { "epoch": 19.659776536312847, "grad_norm": 0.7066543102264404, "learning_rate": 1.750700280112045e-05, "loss": 0.5033, "step": 35191 }, { "epoch": 19.660335195530728, "grad_norm": 1.5083872079849243, "learning_rate": 1.7478991596638656e-05, "loss": 0.4156, "step": 35192 }, { "epoch": 19.660893854748604, "grad_norm": 0.46740296483039856, "learning_rate": 1.7450980392156866e-05, "loss": 0.4254, "step": 35193 }, { "epoch": 19.66145251396648, "grad_norm": 0.526253342628479, "learning_rate": 1.7422969187675072e-05, "loss": 0.4311, "step": 35194 }, { "epoch": 19.662011173184357, "grad_norm": 0.46243155002593994, "learning_rate": 1.7394957983193278e-05, "loss": 0.3734, "step": 35195 }, { "epoch": 19.662569832402234, "grad_norm": 0.5958356261253357, "learning_rate": 1.7366946778711487e-05, "loss": 0.5343, "step": 35196 }, { "epoch": 19.66312849162011, "grad_norm": 0.6704958081245422, "learning_rate": 1.7338935574229693e-05, "loss": 0.3482, "step": 35197 }, { "epoch": 19.66368715083799, "grad_norm": 1.38182532787323, "learning_rate": 1.7310924369747902e-05, "loss": 0.3423, "step": 35198 }, { "epoch": 19.664245810055867, "grad_norm": 0.4332784116268158, "learning_rate": 1.728291316526611e-05, "loss": 0.3564, "step": 35199 }, { "epoch": 19.664804469273744, "grad_norm": 0.41296276450157166, "learning_rate": 1.7254901960784314e-05, "loss": 0.4204, "step": 35200 }, { "epoch": 19.66536312849162, "grad_norm": 0.5465002059936523, "learning_rate": 1.7226890756302524e-05, "loss": 0.4535, "step": 35201 }, { "epoch": 19.665921787709497, "grad_norm": 3.1349384784698486, "learning_rate": 1.719887955182073e-05, "loss": 0.4663, "step": 35202 }, { "epoch": 19.666480446927373, "grad_norm": 0.5418531894683838, "learning_rate": 1.7170868347338936e-05, "loss": 0.3629, "step": 35203 }, { "epoch": 19.66703910614525, "grad_norm": 0.41018274426460266, "learning_rate": 1.7142857142857145e-05, "loss": 0.3583, "step": 35204 }, { "epoch": 19.66759776536313, "grad_norm": 1.4892946481704712, "learning_rate": 1.711484593837535e-05, "loss": 0.4989, "step": 35205 }, { "epoch": 19.668156424581007, "grad_norm": 0.3679587244987488, "learning_rate": 1.7086834733893557e-05, "loss": 0.2931, "step": 35206 }, { "epoch": 19.668715083798883, "grad_norm": 0.3493441641330719, "learning_rate": 1.7058823529411767e-05, "loss": 0.3441, "step": 35207 }, { "epoch": 19.66927374301676, "grad_norm": 0.3642297387123108, "learning_rate": 1.7030812324929973e-05, "loss": 0.348, "step": 35208 }, { "epoch": 19.669832402234636, "grad_norm": 0.48526158928871155, "learning_rate": 1.7002801120448182e-05, "loss": 0.4682, "step": 35209 }, { "epoch": 19.670391061452513, "grad_norm": 0.5626773834228516, "learning_rate": 1.6974789915966388e-05, "loss": 0.3355, "step": 35210 }, { "epoch": 19.67094972067039, "grad_norm": 0.3787447214126587, "learning_rate": 1.6946778711484594e-05, "loss": 0.3065, "step": 35211 }, { "epoch": 19.67150837988827, "grad_norm": 0.5290059447288513, "learning_rate": 1.6918767507002803e-05, "loss": 0.3894, "step": 35212 }, { "epoch": 19.672067039106146, "grad_norm": 0.5625268816947937, "learning_rate": 1.689075630252101e-05, "loss": 0.369, "step": 35213 }, { "epoch": 19.672625698324023, "grad_norm": 0.5560200810432434, "learning_rate": 1.6862745098039215e-05, "loss": 0.3481, "step": 35214 }, { "epoch": 19.6731843575419, "grad_norm": 0.4861189126968384, "learning_rate": 1.6834733893557425e-05, "loss": 0.354, "step": 35215 }, { "epoch": 19.673743016759776, "grad_norm": 0.3645785450935364, "learning_rate": 1.680672268907563e-05, "loss": 0.3124, "step": 35216 }, { "epoch": 19.674301675977652, "grad_norm": 0.4363611340522766, "learning_rate": 1.677871148459384e-05, "loss": 0.3786, "step": 35217 }, { "epoch": 19.674860335195532, "grad_norm": 0.4242125451564789, "learning_rate": 1.6750700280112046e-05, "loss": 0.3411, "step": 35218 }, { "epoch": 19.67541899441341, "grad_norm": 0.6243457198143005, "learning_rate": 1.6722689075630252e-05, "loss": 0.4185, "step": 35219 }, { "epoch": 19.675977653631286, "grad_norm": 0.38432514667510986, "learning_rate": 1.669467787114846e-05, "loss": 0.3351, "step": 35220 }, { "epoch": 19.676536312849162, "grad_norm": 0.47736856341362, "learning_rate": 1.6666666666666667e-05, "loss": 0.3836, "step": 35221 }, { "epoch": 19.67709497206704, "grad_norm": 0.3078649342060089, "learning_rate": 1.6638655462184873e-05, "loss": 0.302, "step": 35222 }, { "epoch": 19.677653631284915, "grad_norm": 0.44212305545806885, "learning_rate": 1.6610644257703083e-05, "loss": 0.3989, "step": 35223 }, { "epoch": 19.678212290502792, "grad_norm": 0.3291095197200775, "learning_rate": 1.658263305322129e-05, "loss": 0.3245, "step": 35224 }, { "epoch": 19.678770949720672, "grad_norm": 0.5435345768928528, "learning_rate": 1.6554621848739498e-05, "loss": 0.384, "step": 35225 }, { "epoch": 19.67932960893855, "grad_norm": 0.5516030192375183, "learning_rate": 1.6526610644257704e-05, "loss": 0.5009, "step": 35226 }, { "epoch": 19.679888268156425, "grad_norm": 0.29875752329826355, "learning_rate": 1.649859943977591e-05, "loss": 0.3368, "step": 35227 }, { "epoch": 19.6804469273743, "grad_norm": 0.6018951535224915, "learning_rate": 1.647058823529412e-05, "loss": 0.3497, "step": 35228 }, { "epoch": 19.68100558659218, "grad_norm": 0.5232781767845154, "learning_rate": 1.6442577030812326e-05, "loss": 0.4386, "step": 35229 }, { "epoch": 19.681564245810055, "grad_norm": 0.40044742822647095, "learning_rate": 1.641456582633053e-05, "loss": 0.3798, "step": 35230 }, { "epoch": 19.68212290502793, "grad_norm": 0.514755368232727, "learning_rate": 1.638655462184874e-05, "loss": 0.5279, "step": 35231 }, { "epoch": 19.68268156424581, "grad_norm": 0.37741488218307495, "learning_rate": 1.6358543417366947e-05, "loss": 0.3553, "step": 35232 }, { "epoch": 19.683240223463688, "grad_norm": 2.0443544387817383, "learning_rate": 1.6330532212885156e-05, "loss": 0.4384, "step": 35233 }, { "epoch": 19.683798882681565, "grad_norm": 0.9448440670967102, "learning_rate": 1.6302521008403362e-05, "loss": 0.3242, "step": 35234 }, { "epoch": 19.68435754189944, "grad_norm": 0.43367841839790344, "learning_rate": 1.627450980392157e-05, "loss": 0.3235, "step": 35235 }, { "epoch": 19.684916201117318, "grad_norm": 0.4780968129634857, "learning_rate": 1.6246498599439778e-05, "loss": 0.4009, "step": 35236 }, { "epoch": 19.685474860335194, "grad_norm": 0.42769503593444824, "learning_rate": 1.6218487394957984e-05, "loss": 0.5156, "step": 35237 }, { "epoch": 19.68603351955307, "grad_norm": 0.4197807312011719, "learning_rate": 1.619047619047619e-05, "loss": 0.3193, "step": 35238 }, { "epoch": 19.68659217877095, "grad_norm": 1.7733579874038696, "learning_rate": 1.61624649859944e-05, "loss": 0.3267, "step": 35239 }, { "epoch": 19.687150837988828, "grad_norm": 0.5177894830703735, "learning_rate": 1.6134453781512605e-05, "loss": 0.3525, "step": 35240 }, { "epoch": 19.687709497206704, "grad_norm": 0.5187222361564636, "learning_rate": 1.610644257703081e-05, "loss": 0.4945, "step": 35241 }, { "epoch": 19.68826815642458, "grad_norm": 0.527868390083313, "learning_rate": 1.607843137254902e-05, "loss": 0.4959, "step": 35242 }, { "epoch": 19.688826815642457, "grad_norm": 1.0807161331176758, "learning_rate": 1.6050420168067226e-05, "loss": 0.3997, "step": 35243 }, { "epoch": 19.689385474860334, "grad_norm": 1.081878662109375, "learning_rate": 1.6022408963585436e-05, "loss": 0.4295, "step": 35244 }, { "epoch": 19.689944134078214, "grad_norm": 0.5213796496391296, "learning_rate": 1.5994397759103642e-05, "loss": 0.4486, "step": 35245 }, { "epoch": 19.69050279329609, "grad_norm": 0.9523847103118896, "learning_rate": 1.5966386554621848e-05, "loss": 0.3599, "step": 35246 }, { "epoch": 19.691061452513967, "grad_norm": 0.5482472777366638, "learning_rate": 1.5938375350140057e-05, "loss": 0.3607, "step": 35247 }, { "epoch": 19.691620111731844, "grad_norm": 0.6379703283309937, "learning_rate": 1.5910364145658263e-05, "loss": 0.3173, "step": 35248 }, { "epoch": 19.69217877094972, "grad_norm": 0.4073861539363861, "learning_rate": 1.588235294117647e-05, "loss": 0.3485, "step": 35249 }, { "epoch": 19.692737430167597, "grad_norm": 0.4410066604614258, "learning_rate": 1.585434173669468e-05, "loss": 0.3252, "step": 35250 }, { "epoch": 19.693296089385473, "grad_norm": 0.42806291580200195, "learning_rate": 1.5826330532212885e-05, "loss": 0.4112, "step": 35251 }, { "epoch": 19.693854748603353, "grad_norm": 0.39493927359580994, "learning_rate": 1.5798319327731094e-05, "loss": 0.4115, "step": 35252 }, { "epoch": 19.69441340782123, "grad_norm": 0.30333179235458374, "learning_rate": 1.57703081232493e-05, "loss": 0.3883, "step": 35253 }, { "epoch": 19.694972067039107, "grad_norm": 0.4833950400352478, "learning_rate": 1.5742296918767506e-05, "loss": 0.4372, "step": 35254 }, { "epoch": 19.695530726256983, "grad_norm": 0.4764694571495056, "learning_rate": 1.5714285714285715e-05, "loss": 0.5119, "step": 35255 }, { "epoch": 19.69608938547486, "grad_norm": 0.431545615196228, "learning_rate": 1.568627450980392e-05, "loss": 0.3786, "step": 35256 }, { "epoch": 19.696648044692736, "grad_norm": 0.3747271001338959, "learning_rate": 1.5658263305322127e-05, "loss": 0.3579, "step": 35257 }, { "epoch": 19.697206703910613, "grad_norm": 0.34276649355888367, "learning_rate": 1.5630252100840337e-05, "loss": 0.3806, "step": 35258 }, { "epoch": 19.697765363128493, "grad_norm": 0.47646328806877136, "learning_rate": 1.5602240896358543e-05, "loss": 0.3585, "step": 35259 }, { "epoch": 19.69832402234637, "grad_norm": 0.34929677844047546, "learning_rate": 1.5574229691876752e-05, "loss": 0.3619, "step": 35260 }, { "epoch": 19.698882681564246, "grad_norm": 0.8763848543167114, "learning_rate": 1.5546218487394958e-05, "loss": 0.4121, "step": 35261 }, { "epoch": 19.699441340782123, "grad_norm": 0.3957643508911133, "learning_rate": 1.5518207282913167e-05, "loss": 0.3951, "step": 35262 }, { "epoch": 19.7, "grad_norm": 0.48728543519973755, "learning_rate": 1.5490196078431373e-05, "loss": 0.4378, "step": 35263 }, { "epoch": 19.700558659217876, "grad_norm": 0.4075457751750946, "learning_rate": 1.546218487394958e-05, "loss": 0.3792, "step": 35264 }, { "epoch": 19.701117318435756, "grad_norm": 0.7470335364341736, "learning_rate": 1.543417366946779e-05, "loss": 0.4578, "step": 35265 }, { "epoch": 19.701675977653633, "grad_norm": 0.5533445477485657, "learning_rate": 1.5406162464985995e-05, "loss": 0.3696, "step": 35266 }, { "epoch": 19.70223463687151, "grad_norm": 0.3893565535545349, "learning_rate": 1.53781512605042e-05, "loss": 0.4154, "step": 35267 }, { "epoch": 19.702793296089386, "grad_norm": 0.5706729292869568, "learning_rate": 1.535014005602241e-05, "loss": 0.3055, "step": 35268 }, { "epoch": 19.703351955307262, "grad_norm": 0.4026053845882416, "learning_rate": 1.5322128851540616e-05, "loss": 0.3227, "step": 35269 }, { "epoch": 19.70391061452514, "grad_norm": 0.3754120469093323, "learning_rate": 1.5294117647058826e-05, "loss": 0.3245, "step": 35270 }, { "epoch": 19.704469273743015, "grad_norm": 0.5366465449333191, "learning_rate": 1.526610644257703e-05, "loss": 0.3944, "step": 35271 }, { "epoch": 19.705027932960895, "grad_norm": 0.38823482394218445, "learning_rate": 1.5238095238095238e-05, "loss": 0.41, "step": 35272 }, { "epoch": 19.705586592178772, "grad_norm": 0.6000133752822876, "learning_rate": 1.5210084033613447e-05, "loss": 0.3764, "step": 35273 }, { "epoch": 19.70614525139665, "grad_norm": 0.4234936237335205, "learning_rate": 1.5182072829131653e-05, "loss": 0.3829, "step": 35274 }, { "epoch": 19.706703910614525, "grad_norm": 0.38632890582084656, "learning_rate": 1.515406162464986e-05, "loss": 0.3921, "step": 35275 }, { "epoch": 19.7072625698324, "grad_norm": 0.5408406853675842, "learning_rate": 1.5126050420168068e-05, "loss": 0.4565, "step": 35276 }, { "epoch": 19.70782122905028, "grad_norm": 0.5979124307632446, "learning_rate": 1.5098039215686276e-05, "loss": 0.5407, "step": 35277 }, { "epoch": 19.708379888268155, "grad_norm": 1.4332454204559326, "learning_rate": 1.5070028011204482e-05, "loss": 0.399, "step": 35278 }, { "epoch": 19.708938547486035, "grad_norm": 0.5275501012802124, "learning_rate": 1.504201680672269e-05, "loss": 0.3606, "step": 35279 }, { "epoch": 19.70949720670391, "grad_norm": 0.6827466487884521, "learning_rate": 1.5014005602240897e-05, "loss": 0.4747, "step": 35280 }, { "epoch": 19.710055865921788, "grad_norm": 1.2533493041992188, "learning_rate": 1.4985994397759105e-05, "loss": 0.393, "step": 35281 }, { "epoch": 19.710614525139665, "grad_norm": 0.4410189688205719, "learning_rate": 1.4957983193277311e-05, "loss": 0.3918, "step": 35282 }, { "epoch": 19.71117318435754, "grad_norm": 0.357056587934494, "learning_rate": 1.4929971988795519e-05, "loss": 0.389, "step": 35283 }, { "epoch": 19.711731843575418, "grad_norm": 0.7442764639854431, "learning_rate": 1.4901960784313726e-05, "loss": 0.3385, "step": 35284 }, { "epoch": 19.712290502793294, "grad_norm": 1.0437657833099365, "learning_rate": 1.4873949579831934e-05, "loss": 0.4917, "step": 35285 }, { "epoch": 19.712849162011175, "grad_norm": 0.37906742095947266, "learning_rate": 1.484593837535014e-05, "loss": 0.3227, "step": 35286 }, { "epoch": 19.71340782122905, "grad_norm": 0.5135797262191772, "learning_rate": 1.4817927170868348e-05, "loss": 0.5272, "step": 35287 }, { "epoch": 19.713966480446928, "grad_norm": 0.343237042427063, "learning_rate": 1.4789915966386556e-05, "loss": 0.3696, "step": 35288 }, { "epoch": 19.714525139664804, "grad_norm": 0.5119778513908386, "learning_rate": 1.4761904761904763e-05, "loss": 0.4165, "step": 35289 }, { "epoch": 19.71508379888268, "grad_norm": 1.148737907409668, "learning_rate": 1.473389355742297e-05, "loss": 0.3871, "step": 35290 }, { "epoch": 19.715642458100557, "grad_norm": 0.5131190419197083, "learning_rate": 1.4705882352941177e-05, "loss": 0.4368, "step": 35291 }, { "epoch": 19.716201117318434, "grad_norm": 0.36445698142051697, "learning_rate": 1.4677871148459385e-05, "loss": 0.4555, "step": 35292 }, { "epoch": 19.716759776536314, "grad_norm": 0.34603628516197205, "learning_rate": 1.4649859943977592e-05, "loss": 0.3412, "step": 35293 }, { "epoch": 19.71731843575419, "grad_norm": 0.5670938491821289, "learning_rate": 1.4621848739495798e-05, "loss": 0.3815, "step": 35294 }, { "epoch": 19.717877094972067, "grad_norm": 0.5026167035102844, "learning_rate": 1.4593837535014006e-05, "loss": 0.3656, "step": 35295 }, { "epoch": 19.718435754189944, "grad_norm": 1.0616168975830078, "learning_rate": 1.4565826330532214e-05, "loss": 0.3349, "step": 35296 }, { "epoch": 19.71899441340782, "grad_norm": 0.8057705163955688, "learning_rate": 1.4537815126050421e-05, "loss": 0.3383, "step": 35297 }, { "epoch": 19.719553072625697, "grad_norm": 0.35158783197402954, "learning_rate": 1.4509803921568627e-05, "loss": 0.3749, "step": 35298 }, { "epoch": 19.720111731843577, "grad_norm": 0.3770917057991028, "learning_rate": 1.4481792717086835e-05, "loss": 0.4597, "step": 35299 }, { "epoch": 19.720670391061454, "grad_norm": 0.3776755630970001, "learning_rate": 1.4453781512605043e-05, "loss": 0.5356, "step": 35300 }, { "epoch": 19.72122905027933, "grad_norm": 1.9294934272766113, "learning_rate": 1.442577030812325e-05, "loss": 0.524, "step": 35301 }, { "epoch": 19.721787709497207, "grad_norm": 0.34345540404319763, "learning_rate": 1.4397759103641456e-05, "loss": 0.3501, "step": 35302 }, { "epoch": 19.722346368715083, "grad_norm": 0.5337271690368652, "learning_rate": 1.4369747899159664e-05, "loss": 0.3747, "step": 35303 }, { "epoch": 19.72290502793296, "grad_norm": 0.6082429885864258, "learning_rate": 1.4341736694677872e-05, "loss": 0.4915, "step": 35304 }, { "epoch": 19.723463687150836, "grad_norm": 0.793801486492157, "learning_rate": 1.431372549019608e-05, "loss": 0.4021, "step": 35305 }, { "epoch": 19.724022346368717, "grad_norm": 14.263829231262207, "learning_rate": 1.4285714285714285e-05, "loss": 0.7126, "step": 35306 }, { "epoch": 19.724581005586593, "grad_norm": 0.35827380418777466, "learning_rate": 1.4257703081232493e-05, "loss": 0.3766, "step": 35307 }, { "epoch": 19.72513966480447, "grad_norm": 1.4470044374465942, "learning_rate": 1.42296918767507e-05, "loss": 0.3858, "step": 35308 }, { "epoch": 19.725698324022346, "grad_norm": 0.34180471301078796, "learning_rate": 1.4201680672268907e-05, "loss": 0.3456, "step": 35309 }, { "epoch": 19.726256983240223, "grad_norm": 0.6229627728462219, "learning_rate": 1.4173669467787114e-05, "loss": 0.4423, "step": 35310 }, { "epoch": 19.7268156424581, "grad_norm": 0.5809889435768127, "learning_rate": 1.4145658263305322e-05, "loss": 0.4294, "step": 35311 }, { "epoch": 19.727374301675976, "grad_norm": 0.566570520401001, "learning_rate": 1.411764705882353e-05, "loss": 0.295, "step": 35312 }, { "epoch": 19.727932960893856, "grad_norm": 4.236278057098389, "learning_rate": 1.4089635854341736e-05, "loss": 0.3396, "step": 35313 }, { "epoch": 19.728491620111733, "grad_norm": 0.4100249707698822, "learning_rate": 1.4061624649859944e-05, "loss": 0.3831, "step": 35314 }, { "epoch": 19.72905027932961, "grad_norm": 0.38920170068740845, "learning_rate": 1.4033613445378151e-05, "loss": 0.3927, "step": 35315 }, { "epoch": 19.729608938547486, "grad_norm": 0.3384273648262024, "learning_rate": 1.4005602240896359e-05, "loss": 0.2926, "step": 35316 }, { "epoch": 19.730167597765362, "grad_norm": 0.41815540194511414, "learning_rate": 1.3977591036414565e-05, "loss": 0.4532, "step": 35317 }, { "epoch": 19.73072625698324, "grad_norm": 0.3968935012817383, "learning_rate": 1.3949579831932773e-05, "loss": 0.4812, "step": 35318 }, { "epoch": 19.73128491620112, "grad_norm": 1.058937668800354, "learning_rate": 1.392156862745098e-05, "loss": 0.3715, "step": 35319 }, { "epoch": 19.731843575418996, "grad_norm": 0.43217697739601135, "learning_rate": 1.3893557422969188e-05, "loss": 0.3202, "step": 35320 }, { "epoch": 19.732402234636872, "grad_norm": 0.8799136281013489, "learning_rate": 1.3865546218487394e-05, "loss": 0.4151, "step": 35321 }, { "epoch": 19.73296089385475, "grad_norm": 0.8152925372123718, "learning_rate": 1.3837535014005602e-05, "loss": 0.3754, "step": 35322 }, { "epoch": 19.733519553072625, "grad_norm": 5.013149261474609, "learning_rate": 1.380952380952381e-05, "loss": 0.4126, "step": 35323 }, { "epoch": 19.734078212290502, "grad_norm": 0.43170085549354553, "learning_rate": 1.3781512605042017e-05, "loss": 0.4514, "step": 35324 }, { "epoch": 19.73463687150838, "grad_norm": 0.44063419103622437, "learning_rate": 1.3753501400560223e-05, "loss": 0.4131, "step": 35325 }, { "epoch": 19.73519553072626, "grad_norm": 0.5054619312286377, "learning_rate": 1.372549019607843e-05, "loss": 0.4444, "step": 35326 }, { "epoch": 19.735754189944135, "grad_norm": 0.4828226864337921, "learning_rate": 1.3697478991596638e-05, "loss": 0.4329, "step": 35327 }, { "epoch": 19.73631284916201, "grad_norm": 0.46969398856163025, "learning_rate": 1.3669467787114846e-05, "loss": 0.4188, "step": 35328 }, { "epoch": 19.73687150837989, "grad_norm": 0.4652106761932373, "learning_rate": 1.3641456582633052e-05, "loss": 0.4382, "step": 35329 }, { "epoch": 19.737430167597765, "grad_norm": 0.3739686906337738, "learning_rate": 1.3613445378151261e-05, "loss": 0.4117, "step": 35330 }, { "epoch": 19.73798882681564, "grad_norm": 0.44180986285209656, "learning_rate": 1.358543417366947e-05, "loss": 0.368, "step": 35331 }, { "epoch": 19.738547486033518, "grad_norm": 0.6450395584106445, "learning_rate": 1.3557422969187677e-05, "loss": 0.3496, "step": 35332 }, { "epoch": 19.739106145251398, "grad_norm": 0.6799238324165344, "learning_rate": 1.3529411764705883e-05, "loss": 0.4199, "step": 35333 }, { "epoch": 19.739664804469275, "grad_norm": 0.35876762866973877, "learning_rate": 1.350140056022409e-05, "loss": 0.3994, "step": 35334 }, { "epoch": 19.74022346368715, "grad_norm": 2.3797507286071777, "learning_rate": 1.3473389355742298e-05, "loss": 0.3226, "step": 35335 }, { "epoch": 19.740782122905028, "grad_norm": 0.4078042805194855, "learning_rate": 1.3445378151260506e-05, "loss": 0.4355, "step": 35336 }, { "epoch": 19.741340782122904, "grad_norm": 1.2120305299758911, "learning_rate": 1.3417366946778712e-05, "loss": 0.4802, "step": 35337 }, { "epoch": 19.74189944134078, "grad_norm": 2.60994029045105, "learning_rate": 1.338935574229692e-05, "loss": 0.3832, "step": 35338 }, { "epoch": 19.742458100558657, "grad_norm": 0.7532150149345398, "learning_rate": 1.3361344537815127e-05, "loss": 0.5767, "step": 35339 }, { "epoch": 19.743016759776538, "grad_norm": 0.33394649624824524, "learning_rate": 1.3333333333333335e-05, "loss": 0.3255, "step": 35340 }, { "epoch": 19.743575418994414, "grad_norm": 0.3346899151802063, "learning_rate": 1.3305322128851541e-05, "loss": 0.3919, "step": 35341 }, { "epoch": 19.74413407821229, "grad_norm": 1.0925260782241821, "learning_rate": 1.3277310924369749e-05, "loss": 0.3314, "step": 35342 }, { "epoch": 19.744692737430167, "grad_norm": 0.384634405374527, "learning_rate": 1.3249299719887956e-05, "loss": 0.341, "step": 35343 }, { "epoch": 19.745251396648044, "grad_norm": 0.8957847356796265, "learning_rate": 1.3221288515406162e-05, "loss": 0.4007, "step": 35344 }, { "epoch": 19.74581005586592, "grad_norm": 0.4554111659526825, "learning_rate": 1.319327731092437e-05, "loss": 0.4457, "step": 35345 }, { "epoch": 19.7463687150838, "grad_norm": 0.8450426459312439, "learning_rate": 1.3165266106442578e-05, "loss": 0.3426, "step": 35346 }, { "epoch": 19.746927374301677, "grad_norm": 0.4269426763057709, "learning_rate": 1.3137254901960785e-05, "loss": 0.4541, "step": 35347 }, { "epoch": 19.747486033519554, "grad_norm": 0.4236166179180145, "learning_rate": 1.3109243697478991e-05, "loss": 0.4459, "step": 35348 }, { "epoch": 19.74804469273743, "grad_norm": 0.45875146985054016, "learning_rate": 1.3081232492997199e-05, "loss": 0.557, "step": 35349 }, { "epoch": 19.748603351955307, "grad_norm": 0.4377026855945587, "learning_rate": 1.3053221288515407e-05, "loss": 0.4098, "step": 35350 }, { "epoch": 19.749162011173183, "grad_norm": 0.6969612240791321, "learning_rate": 1.3025210084033614e-05, "loss": 0.2941, "step": 35351 }, { "epoch": 19.74972067039106, "grad_norm": 0.3948517441749573, "learning_rate": 1.299719887955182e-05, "loss": 0.4382, "step": 35352 }, { "epoch": 19.75027932960894, "grad_norm": 0.7713333964347839, "learning_rate": 1.2969187675070028e-05, "loss": 0.4586, "step": 35353 }, { "epoch": 19.750837988826817, "grad_norm": 0.6138208508491516, "learning_rate": 1.2941176470588236e-05, "loss": 0.6719, "step": 35354 }, { "epoch": 19.751396648044693, "grad_norm": 0.520180344581604, "learning_rate": 1.2913165266106444e-05, "loss": 0.3966, "step": 35355 }, { "epoch": 19.75195530726257, "grad_norm": 0.39862850308418274, "learning_rate": 1.288515406162465e-05, "loss": 0.3426, "step": 35356 }, { "epoch": 19.752513966480446, "grad_norm": 0.4524399936199188, "learning_rate": 1.2857142857142857e-05, "loss": 0.3527, "step": 35357 }, { "epoch": 19.753072625698323, "grad_norm": 1.5671555995941162, "learning_rate": 1.2829131652661065e-05, "loss": 0.3862, "step": 35358 }, { "epoch": 19.7536312849162, "grad_norm": 0.38465172052383423, "learning_rate": 1.2801120448179273e-05, "loss": 0.3717, "step": 35359 }, { "epoch": 19.75418994413408, "grad_norm": 0.7431902885437012, "learning_rate": 1.2773109243697479e-05, "loss": 0.3524, "step": 35360 }, { "epoch": 19.754748603351956, "grad_norm": 0.8761482834815979, "learning_rate": 1.2745098039215686e-05, "loss": 0.4172, "step": 35361 }, { "epoch": 19.755307262569833, "grad_norm": 0.42005470395088196, "learning_rate": 1.2717086834733894e-05, "loss": 0.3334, "step": 35362 }, { "epoch": 19.75586592178771, "grad_norm": 0.4235022962093353, "learning_rate": 1.2689075630252102e-05, "loss": 0.3524, "step": 35363 }, { "epoch": 19.756424581005586, "grad_norm": 0.3634481430053711, "learning_rate": 1.2661064425770308e-05, "loss": 0.398, "step": 35364 }, { "epoch": 19.756983240223462, "grad_norm": 0.43742242455482483, "learning_rate": 1.2633053221288515e-05, "loss": 0.3289, "step": 35365 }, { "epoch": 19.757541899441343, "grad_norm": 0.3694218695163727, "learning_rate": 1.2605042016806723e-05, "loss": 0.4362, "step": 35366 }, { "epoch": 19.75810055865922, "grad_norm": 0.4466545581817627, "learning_rate": 1.257703081232493e-05, "loss": 0.463, "step": 35367 }, { "epoch": 19.758659217877096, "grad_norm": 0.5588370561599731, "learning_rate": 1.2549019607843137e-05, "loss": 0.4343, "step": 35368 }, { "epoch": 19.759217877094972, "grad_norm": 0.41058963537216187, "learning_rate": 1.2521008403361344e-05, "loss": 0.3628, "step": 35369 }, { "epoch": 19.75977653631285, "grad_norm": 0.3880702257156372, "learning_rate": 1.2492997198879552e-05, "loss": 0.3759, "step": 35370 }, { "epoch": 19.760335195530725, "grad_norm": 0.6218333840370178, "learning_rate": 1.246498599439776e-05, "loss": 0.3889, "step": 35371 }, { "epoch": 19.760893854748602, "grad_norm": 0.42052242159843445, "learning_rate": 1.2436974789915966e-05, "loss": 0.3498, "step": 35372 }, { "epoch": 19.761452513966482, "grad_norm": 0.7047318816184998, "learning_rate": 1.2408963585434173e-05, "loss": 0.4009, "step": 35373 }, { "epoch": 19.76201117318436, "grad_norm": 0.8518757820129395, "learning_rate": 1.2380952380952381e-05, "loss": 0.3714, "step": 35374 }, { "epoch": 19.762569832402235, "grad_norm": 1.8158037662506104, "learning_rate": 1.2352941176470587e-05, "loss": 0.4962, "step": 35375 }, { "epoch": 19.76312849162011, "grad_norm": 0.3734445869922638, "learning_rate": 1.2324929971988795e-05, "loss": 0.3927, "step": 35376 }, { "epoch": 19.76368715083799, "grad_norm": 0.6635797023773193, "learning_rate": 1.2296918767507003e-05, "loss": 0.4089, "step": 35377 }, { "epoch": 19.764245810055865, "grad_norm": 0.3037201762199402, "learning_rate": 1.226890756302521e-05, "loss": 0.2901, "step": 35378 }, { "epoch": 19.76480446927374, "grad_norm": 0.34549978375434875, "learning_rate": 1.2240896358543416e-05, "loss": 0.3791, "step": 35379 }, { "epoch": 19.76536312849162, "grad_norm": 0.3665732741355896, "learning_rate": 1.2212885154061624e-05, "loss": 0.3675, "step": 35380 }, { "epoch": 19.765921787709498, "grad_norm": 0.4470747709274292, "learning_rate": 1.2184873949579832e-05, "loss": 0.4414, "step": 35381 }, { "epoch": 19.766480446927375, "grad_norm": 0.42910757660865784, "learning_rate": 1.215686274509804e-05, "loss": 0.4816, "step": 35382 }, { "epoch": 19.76703910614525, "grad_norm": 5.966570854187012, "learning_rate": 1.2128851540616245e-05, "loss": 0.4625, "step": 35383 }, { "epoch": 19.767597765363128, "grad_norm": 0.43506544828414917, "learning_rate": 1.2100840336134453e-05, "loss": 0.4251, "step": 35384 }, { "epoch": 19.768156424581004, "grad_norm": 2.226890802383423, "learning_rate": 1.207282913165266e-05, "loss": 0.3989, "step": 35385 }, { "epoch": 19.76871508379888, "grad_norm": 0.3308287262916565, "learning_rate": 1.2044817927170868e-05, "loss": 0.3876, "step": 35386 }, { "epoch": 19.76927374301676, "grad_norm": 0.42185738682746887, "learning_rate": 1.2016806722689074e-05, "loss": 0.36, "step": 35387 }, { "epoch": 19.769832402234638, "grad_norm": 0.45810985565185547, "learning_rate": 1.1988795518207284e-05, "loss": 0.4387, "step": 35388 }, { "epoch": 19.770391061452514, "grad_norm": 0.4088507294654846, "learning_rate": 1.1960784313725491e-05, "loss": 0.3708, "step": 35389 }, { "epoch": 19.77094972067039, "grad_norm": 0.5535271763801575, "learning_rate": 1.1932773109243699e-05, "loss": 0.639, "step": 35390 }, { "epoch": 19.771508379888267, "grad_norm": 0.31074050068855286, "learning_rate": 1.1904761904761905e-05, "loss": 0.2974, "step": 35391 }, { "epoch": 19.772067039106144, "grad_norm": 2.0858309268951416, "learning_rate": 1.1876750700280113e-05, "loss": 0.4046, "step": 35392 }, { "epoch": 19.772625698324024, "grad_norm": 1.9705958366394043, "learning_rate": 1.184873949579832e-05, "loss": 0.4135, "step": 35393 }, { "epoch": 19.7731843575419, "grad_norm": 0.4865002930164337, "learning_rate": 1.1820728291316528e-05, "loss": 0.4873, "step": 35394 }, { "epoch": 19.773743016759777, "grad_norm": 0.6817571520805359, "learning_rate": 1.1792717086834734e-05, "loss": 0.4266, "step": 35395 }, { "epoch": 19.774301675977654, "grad_norm": 0.8982945680618286, "learning_rate": 1.1764705882352942e-05, "loss": 0.3915, "step": 35396 }, { "epoch": 19.77486033519553, "grad_norm": 0.3575826585292816, "learning_rate": 1.173669467787115e-05, "loss": 0.3997, "step": 35397 }, { "epoch": 19.775418994413407, "grad_norm": 0.5289197564125061, "learning_rate": 1.1708683473389357e-05, "loss": 0.4958, "step": 35398 }, { "epoch": 19.775977653631283, "grad_norm": 0.6567515730857849, "learning_rate": 1.1680672268907563e-05, "loss": 0.3063, "step": 35399 }, { "epoch": 19.776536312849164, "grad_norm": 2.3138108253479004, "learning_rate": 1.1652661064425771e-05, "loss": 0.3552, "step": 35400 }, { "epoch": 19.77709497206704, "grad_norm": 1.0725176334381104, "learning_rate": 1.1624649859943979e-05, "loss": 0.4822, "step": 35401 }, { "epoch": 19.777653631284917, "grad_norm": 0.38680484890937805, "learning_rate": 1.1596638655462186e-05, "loss": 0.3236, "step": 35402 }, { "epoch": 19.778212290502793, "grad_norm": 0.5377716422080994, "learning_rate": 1.1568627450980392e-05, "loss": 0.3898, "step": 35403 }, { "epoch": 19.77877094972067, "grad_norm": 5.421585559844971, "learning_rate": 1.15406162464986e-05, "loss": 0.3205, "step": 35404 }, { "epoch": 19.779329608938546, "grad_norm": 0.3550727367401123, "learning_rate": 1.1512605042016808e-05, "loss": 0.346, "step": 35405 }, { "epoch": 19.779888268156423, "grad_norm": 0.43515947461128235, "learning_rate": 1.1484593837535015e-05, "loss": 0.4045, "step": 35406 }, { "epoch": 19.780446927374303, "grad_norm": 0.4157072901725769, "learning_rate": 1.1456582633053221e-05, "loss": 0.3642, "step": 35407 }, { "epoch": 19.78100558659218, "grad_norm": 0.43124279379844666, "learning_rate": 1.1428571428571429e-05, "loss": 0.4, "step": 35408 }, { "epoch": 19.781564245810056, "grad_norm": 0.5944299697875977, "learning_rate": 1.1400560224089637e-05, "loss": 0.4026, "step": 35409 }, { "epoch": 19.782122905027933, "grad_norm": 0.3536844253540039, "learning_rate": 1.1372549019607843e-05, "loss": 0.3354, "step": 35410 }, { "epoch": 19.78268156424581, "grad_norm": 0.42086103558540344, "learning_rate": 1.134453781512605e-05, "loss": 0.3785, "step": 35411 }, { "epoch": 19.783240223463686, "grad_norm": 1.5688191652297974, "learning_rate": 1.1316526610644258e-05, "loss": 0.4498, "step": 35412 }, { "epoch": 19.783798882681566, "grad_norm": 0.3635714054107666, "learning_rate": 1.1288515406162466e-05, "loss": 0.3804, "step": 35413 }, { "epoch": 19.784357541899443, "grad_norm": 0.6300631761550903, "learning_rate": 1.1260504201680672e-05, "loss": 0.4114, "step": 35414 }, { "epoch": 19.78491620111732, "grad_norm": 8.403244972229004, "learning_rate": 1.123249299719888e-05, "loss": 0.3891, "step": 35415 }, { "epoch": 19.785474860335196, "grad_norm": 0.7297216653823853, "learning_rate": 1.1204481792717087e-05, "loss": 0.538, "step": 35416 }, { "epoch": 19.786033519553072, "grad_norm": 0.7535122036933899, "learning_rate": 1.1176470588235295e-05, "loss": 0.3785, "step": 35417 }, { "epoch": 19.78659217877095, "grad_norm": 0.5579131841659546, "learning_rate": 1.11484593837535e-05, "loss": 0.2886, "step": 35418 }, { "epoch": 19.787150837988825, "grad_norm": 0.36793315410614014, "learning_rate": 1.1120448179271709e-05, "loss": 0.3938, "step": 35419 }, { "epoch": 19.787709497206706, "grad_norm": 0.4455862045288086, "learning_rate": 1.1092436974789916e-05, "loss": 0.3504, "step": 35420 }, { "epoch": 19.788268156424582, "grad_norm": 0.33554697036743164, "learning_rate": 1.1064425770308124e-05, "loss": 0.3082, "step": 35421 }, { "epoch": 19.78882681564246, "grad_norm": 0.42358070611953735, "learning_rate": 1.103641456582633e-05, "loss": 0.4217, "step": 35422 }, { "epoch": 19.789385474860335, "grad_norm": 1.4139267206192017, "learning_rate": 1.1008403361344538e-05, "loss": 0.3496, "step": 35423 }, { "epoch": 19.789944134078212, "grad_norm": 0.5414817929267883, "learning_rate": 1.0980392156862745e-05, "loss": 0.4734, "step": 35424 }, { "epoch": 19.79050279329609, "grad_norm": 0.47942787408828735, "learning_rate": 1.0952380952380953e-05, "loss": 0.4385, "step": 35425 }, { "epoch": 19.791061452513965, "grad_norm": 0.7389019727706909, "learning_rate": 1.0924369747899159e-05, "loss": 0.3776, "step": 35426 }, { "epoch": 19.791620111731845, "grad_norm": 0.3489637076854706, "learning_rate": 1.0896358543417367e-05, "loss": 0.3097, "step": 35427 }, { "epoch": 19.79217877094972, "grad_norm": 0.6101607084274292, "learning_rate": 1.0868347338935574e-05, "loss": 0.4299, "step": 35428 }, { "epoch": 19.7927374301676, "grad_norm": 0.4865139424800873, "learning_rate": 1.0840336134453782e-05, "loss": 0.3274, "step": 35429 }, { "epoch": 19.793296089385475, "grad_norm": 0.38860467076301575, "learning_rate": 1.0812324929971988e-05, "loss": 0.4513, "step": 35430 }, { "epoch": 19.79385474860335, "grad_norm": 7.287206649780273, "learning_rate": 1.0784313725490196e-05, "loss": 0.3786, "step": 35431 }, { "epoch": 19.794413407821228, "grad_norm": 0.36772340536117554, "learning_rate": 1.0756302521008403e-05, "loss": 0.317, "step": 35432 }, { "epoch": 19.794972067039105, "grad_norm": 1.7814099788665771, "learning_rate": 1.0728291316526611e-05, "loss": 0.4732, "step": 35433 }, { "epoch": 19.795530726256985, "grad_norm": 1.896518349647522, "learning_rate": 1.0700280112044817e-05, "loss": 0.4465, "step": 35434 }, { "epoch": 19.79608938547486, "grad_norm": 0.5008261799812317, "learning_rate": 1.0672268907563025e-05, "loss": 0.4829, "step": 35435 }, { "epoch": 19.796648044692738, "grad_norm": 0.39349105954170227, "learning_rate": 1.0644257703081232e-05, "loss": 0.4175, "step": 35436 }, { "epoch": 19.797206703910614, "grad_norm": 0.483936071395874, "learning_rate": 1.061624649859944e-05, "loss": 0.4006, "step": 35437 }, { "epoch": 19.79776536312849, "grad_norm": 0.7365521788597107, "learning_rate": 1.0588235294117646e-05, "loss": 0.4336, "step": 35438 }, { "epoch": 19.798324022346367, "grad_norm": 0.4665125012397766, "learning_rate": 1.0560224089635854e-05, "loss": 0.3615, "step": 35439 }, { "epoch": 19.798882681564244, "grad_norm": 0.41941338777542114, "learning_rate": 1.0532212885154062e-05, "loss": 0.3516, "step": 35440 }, { "epoch": 19.799441340782124, "grad_norm": 0.3809608519077301, "learning_rate": 1.050420168067227e-05, "loss": 0.3293, "step": 35441 }, { "epoch": 19.8, "grad_norm": 0.7504149079322815, "learning_rate": 1.0476190476190475e-05, "loss": 0.4951, "step": 35442 }, { "epoch": 19.800558659217877, "grad_norm": 0.5064688324928284, "learning_rate": 1.0448179271708683e-05, "loss": 0.4691, "step": 35443 }, { "epoch": 19.801117318435754, "grad_norm": 0.3260011076927185, "learning_rate": 1.0420168067226892e-05, "loss": 0.3691, "step": 35444 }, { "epoch": 19.80167597765363, "grad_norm": 0.4795764088630676, "learning_rate": 1.0392156862745098e-05, "loss": 0.5028, "step": 35445 }, { "epoch": 19.802234636871507, "grad_norm": 0.30835017561912537, "learning_rate": 1.0364145658263306e-05, "loss": 0.3702, "step": 35446 }, { "epoch": 19.802793296089387, "grad_norm": 1.7510360479354858, "learning_rate": 1.0336134453781514e-05, "loss": 0.4261, "step": 35447 }, { "epoch": 19.803351955307264, "grad_norm": 0.5377888083457947, "learning_rate": 1.0308123249299721e-05, "loss": 0.4054, "step": 35448 }, { "epoch": 19.80391061452514, "grad_norm": 0.5830637216567993, "learning_rate": 1.0280112044817927e-05, "loss": 0.4766, "step": 35449 }, { "epoch": 19.804469273743017, "grad_norm": 1.8150489330291748, "learning_rate": 1.0252100840336135e-05, "loss": 0.4179, "step": 35450 }, { "epoch": 19.805027932960893, "grad_norm": 0.5734506249427795, "learning_rate": 1.0224089635854343e-05, "loss": 0.4011, "step": 35451 }, { "epoch": 19.80558659217877, "grad_norm": 0.513920247554779, "learning_rate": 1.019607843137255e-05, "loss": 0.4805, "step": 35452 }, { "epoch": 19.806145251396647, "grad_norm": 0.36466121673583984, "learning_rate": 1.0168067226890756e-05, "loss": 0.3617, "step": 35453 }, { "epoch": 19.806703910614527, "grad_norm": 0.44612565636634827, "learning_rate": 1.0140056022408964e-05, "loss": 0.4559, "step": 35454 }, { "epoch": 19.807262569832403, "grad_norm": 1.0071829557418823, "learning_rate": 1.0112044817927172e-05, "loss": 0.3683, "step": 35455 }, { "epoch": 19.80782122905028, "grad_norm": 0.8905125260353088, "learning_rate": 1.008403361344538e-05, "loss": 0.2977, "step": 35456 }, { "epoch": 19.808379888268156, "grad_norm": 1.2234464883804321, "learning_rate": 1.0056022408963585e-05, "loss": 0.4066, "step": 35457 }, { "epoch": 19.808938547486033, "grad_norm": 1.0835731029510498, "learning_rate": 1.0028011204481793e-05, "loss": 0.295, "step": 35458 }, { "epoch": 19.80949720670391, "grad_norm": 0.6086183190345764, "learning_rate": 1e-05, "loss": 0.5409, "step": 35459 }, { "epoch": 19.810055865921786, "grad_norm": 0.41774123907089233, "learning_rate": 9.971988795518209e-06, "loss": 0.4359, "step": 35460 }, { "epoch": 19.810614525139666, "grad_norm": 1.0672895908355713, "learning_rate": 9.943977591036415e-06, "loss": 0.4561, "step": 35461 }, { "epoch": 19.811173184357543, "grad_norm": 0.3792206645011902, "learning_rate": 9.915966386554622e-06, "loss": 0.3996, "step": 35462 }, { "epoch": 19.81173184357542, "grad_norm": 0.5730742812156677, "learning_rate": 9.88795518207283e-06, "loss": 0.3345, "step": 35463 }, { "epoch": 19.812290502793296, "grad_norm": 0.49028459191322327, "learning_rate": 9.859943977591038e-06, "loss": 0.4197, "step": 35464 }, { "epoch": 19.812849162011172, "grad_norm": 1.1842142343521118, "learning_rate": 9.831932773109244e-06, "loss": 0.5358, "step": 35465 }, { "epoch": 19.81340782122905, "grad_norm": 0.38475149869918823, "learning_rate": 9.803921568627451e-06, "loss": 0.4663, "step": 35466 }, { "epoch": 19.81396648044693, "grad_norm": 0.5872317552566528, "learning_rate": 9.775910364145659e-06, "loss": 0.3847, "step": 35467 }, { "epoch": 19.814525139664806, "grad_norm": 0.7070002555847168, "learning_rate": 9.747899159663867e-06, "loss": 0.4182, "step": 35468 }, { "epoch": 19.815083798882682, "grad_norm": 0.5117806196212769, "learning_rate": 9.719887955182073e-06, "loss": 0.3946, "step": 35469 }, { "epoch": 19.81564245810056, "grad_norm": 1.5071008205413818, "learning_rate": 9.69187675070028e-06, "loss": 0.3947, "step": 35470 }, { "epoch": 19.816201117318435, "grad_norm": 0.36761990189552307, "learning_rate": 9.663865546218488e-06, "loss": 0.3493, "step": 35471 }, { "epoch": 19.816759776536312, "grad_norm": 0.4044910967350006, "learning_rate": 9.635854341736696e-06, "loss": 0.4225, "step": 35472 }, { "epoch": 19.81731843575419, "grad_norm": 0.32278409600257874, "learning_rate": 9.607843137254902e-06, "loss": 0.3502, "step": 35473 }, { "epoch": 19.81787709497207, "grad_norm": 0.4044205844402313, "learning_rate": 9.57983193277311e-06, "loss": 0.4839, "step": 35474 }, { "epoch": 19.818435754189945, "grad_norm": 0.6844619512557983, "learning_rate": 9.551820728291317e-06, "loss": 0.4976, "step": 35475 }, { "epoch": 19.81899441340782, "grad_norm": 1.0425516366958618, "learning_rate": 9.523809523809525e-06, "loss": 0.4034, "step": 35476 }, { "epoch": 19.8195530726257, "grad_norm": 0.4030227065086365, "learning_rate": 9.49579831932773e-06, "loss": 0.4116, "step": 35477 }, { "epoch": 19.820111731843575, "grad_norm": 0.5286258459091187, "learning_rate": 9.467787114845938e-06, "loss": 0.5643, "step": 35478 }, { "epoch": 19.82067039106145, "grad_norm": 0.3709220588207245, "learning_rate": 9.439775910364146e-06, "loss": 0.3372, "step": 35479 }, { "epoch": 19.821229050279328, "grad_norm": 0.4690113067626953, "learning_rate": 9.411764705882352e-06, "loss": 0.3767, "step": 35480 }, { "epoch": 19.821787709497208, "grad_norm": 0.3882239758968353, "learning_rate": 9.38375350140056e-06, "loss": 0.3847, "step": 35481 }, { "epoch": 19.822346368715085, "grad_norm": 0.8706823587417603, "learning_rate": 9.355742296918767e-06, "loss": 0.6712, "step": 35482 }, { "epoch": 19.82290502793296, "grad_norm": 0.43887171149253845, "learning_rate": 9.327731092436975e-06, "loss": 0.3566, "step": 35483 }, { "epoch": 19.823463687150838, "grad_norm": 0.5384312868118286, "learning_rate": 9.299719887955181e-06, "loss": 0.6602, "step": 35484 }, { "epoch": 19.824022346368714, "grad_norm": 0.42510315775871277, "learning_rate": 9.271708683473389e-06, "loss": 0.3938, "step": 35485 }, { "epoch": 19.82458100558659, "grad_norm": 0.43815770745277405, "learning_rate": 9.243697478991597e-06, "loss": 0.3332, "step": 35486 }, { "epoch": 19.825139664804468, "grad_norm": 0.742708146572113, "learning_rate": 9.215686274509804e-06, "loss": 0.4613, "step": 35487 }, { "epoch": 19.825698324022348, "grad_norm": 0.4642021358013153, "learning_rate": 9.18767507002801e-06, "loss": 0.3769, "step": 35488 }, { "epoch": 19.826256983240224, "grad_norm": 1.5623059272766113, "learning_rate": 9.159663865546218e-06, "loss": 0.4048, "step": 35489 }, { "epoch": 19.8268156424581, "grad_norm": 0.3773129880428314, "learning_rate": 9.131652661064426e-06, "loss": 0.4172, "step": 35490 }, { "epoch": 19.827374301675977, "grad_norm": 0.5068979859352112, "learning_rate": 9.103641456582633e-06, "loss": 0.4614, "step": 35491 }, { "epoch": 19.827932960893854, "grad_norm": 0.5426199436187744, "learning_rate": 9.07563025210084e-06, "loss": 0.4455, "step": 35492 }, { "epoch": 19.82849162011173, "grad_norm": 0.6335077285766602, "learning_rate": 9.047619047619047e-06, "loss": 0.3756, "step": 35493 }, { "epoch": 19.82905027932961, "grad_norm": 0.48047536611557007, "learning_rate": 9.019607843137255e-06, "loss": 0.4421, "step": 35494 }, { "epoch": 19.829608938547487, "grad_norm": 0.5671623945236206, "learning_rate": 8.991596638655462e-06, "loss": 0.3646, "step": 35495 }, { "epoch": 19.830167597765364, "grad_norm": 0.47398099303245544, "learning_rate": 8.963585434173668e-06, "loss": 0.4053, "step": 35496 }, { "epoch": 19.83072625698324, "grad_norm": 0.7299337387084961, "learning_rate": 8.935574229691876e-06, "loss": 0.4374, "step": 35497 }, { "epoch": 19.831284916201117, "grad_norm": 0.38078492879867554, "learning_rate": 8.907563025210084e-06, "loss": 0.3514, "step": 35498 }, { "epoch": 19.831843575418993, "grad_norm": 0.4372172951698303, "learning_rate": 8.879551820728291e-06, "loss": 0.3925, "step": 35499 }, { "epoch": 19.83240223463687, "grad_norm": 0.4379926323890686, "learning_rate": 8.851540616246497e-06, "loss": 0.3628, "step": 35500 }, { "epoch": 19.83240223463687, "eval_cer": 0.08419800770296662, "eval_loss": 0.3183664083480835, "eval_runtime": 55.5821, "eval_samples_per_second": 81.645, "eval_steps_per_second": 5.11, "eval_wer": 0.33250467864026145, "step": 35500 }, { "epoch": 19.83296089385475, "grad_norm": 0.9604039788246155, "learning_rate": 8.823529411764705e-06, "loss": 0.4113, "step": 35501 }, { "epoch": 19.833519553072627, "grad_norm": 0.6833616495132446, "learning_rate": 8.795518207282914e-06, "loss": 0.3779, "step": 35502 }, { "epoch": 19.834078212290503, "grad_norm": 0.4546038806438446, "learning_rate": 8.767507002801122e-06, "loss": 0.4312, "step": 35503 }, { "epoch": 19.83463687150838, "grad_norm": 0.37255576252937317, "learning_rate": 8.739495798319328e-06, "loss": 0.3509, "step": 35504 }, { "epoch": 19.835195530726256, "grad_norm": 0.374406635761261, "learning_rate": 8.711484593837536e-06, "loss": 0.3985, "step": 35505 }, { "epoch": 19.835754189944133, "grad_norm": 0.7711275219917297, "learning_rate": 8.683473389355744e-06, "loss": 0.4128, "step": 35506 }, { "epoch": 19.83631284916201, "grad_norm": 0.5150610208511353, "learning_rate": 8.655462184873951e-06, "loss": 0.5035, "step": 35507 }, { "epoch": 19.83687150837989, "grad_norm": 0.436656653881073, "learning_rate": 8.627450980392157e-06, "loss": 0.3825, "step": 35508 }, { "epoch": 19.837430167597766, "grad_norm": 0.5069315433502197, "learning_rate": 8.599439775910365e-06, "loss": 0.3684, "step": 35509 }, { "epoch": 19.837988826815643, "grad_norm": 0.39290910959243774, "learning_rate": 8.571428571428573e-06, "loss": 0.4058, "step": 35510 }, { "epoch": 19.83854748603352, "grad_norm": 0.9138739705085754, "learning_rate": 8.543417366946779e-06, "loss": 0.3596, "step": 35511 }, { "epoch": 19.839106145251396, "grad_norm": 1.8350526094436646, "learning_rate": 8.515406162464986e-06, "loss": 0.3519, "step": 35512 }, { "epoch": 19.839664804469272, "grad_norm": 0.9484584927558899, "learning_rate": 8.487394957983194e-06, "loss": 0.3813, "step": 35513 }, { "epoch": 19.840223463687153, "grad_norm": 0.4576646089553833, "learning_rate": 8.459383753501402e-06, "loss": 0.46, "step": 35514 }, { "epoch": 19.84078212290503, "grad_norm": 0.7134267091751099, "learning_rate": 8.431372549019608e-06, "loss": 0.4448, "step": 35515 }, { "epoch": 19.841340782122906, "grad_norm": 0.4767299294471741, "learning_rate": 8.403361344537815e-06, "loss": 0.4664, "step": 35516 }, { "epoch": 19.841899441340782, "grad_norm": 0.6320598125457764, "learning_rate": 8.375350140056023e-06, "loss": 0.3816, "step": 35517 }, { "epoch": 19.84245810055866, "grad_norm": 0.520431637763977, "learning_rate": 8.34733893557423e-06, "loss": 0.4582, "step": 35518 }, { "epoch": 19.843016759776535, "grad_norm": 0.35879379510879517, "learning_rate": 8.319327731092437e-06, "loss": 0.3502, "step": 35519 }, { "epoch": 19.843575418994412, "grad_norm": 0.3486548960208893, "learning_rate": 8.291316526610644e-06, "loss": 0.3546, "step": 35520 }, { "epoch": 19.844134078212292, "grad_norm": 0.37583065032958984, "learning_rate": 8.263305322128852e-06, "loss": 0.4123, "step": 35521 }, { "epoch": 19.84469273743017, "grad_norm": 2.0360541343688965, "learning_rate": 8.23529411764706e-06, "loss": 0.5527, "step": 35522 }, { "epoch": 19.845251396648045, "grad_norm": 0.5614008903503418, "learning_rate": 8.207282913165266e-06, "loss": 0.3478, "step": 35523 }, { "epoch": 19.845810055865922, "grad_norm": 0.404690146446228, "learning_rate": 8.179271708683473e-06, "loss": 0.3642, "step": 35524 }, { "epoch": 19.8463687150838, "grad_norm": 1.168663501739502, "learning_rate": 8.151260504201681e-06, "loss": 0.4079, "step": 35525 }, { "epoch": 19.846927374301675, "grad_norm": 0.3829575181007385, "learning_rate": 8.123249299719889e-06, "loss": 0.3279, "step": 35526 }, { "epoch": 19.84748603351955, "grad_norm": 0.48685383796691895, "learning_rate": 8.095238095238095e-06, "loss": 0.4543, "step": 35527 }, { "epoch": 19.84804469273743, "grad_norm": 1.7935909032821655, "learning_rate": 8.067226890756303e-06, "loss": 0.3568, "step": 35528 }, { "epoch": 19.84860335195531, "grad_norm": 0.43810322880744934, "learning_rate": 8.03921568627451e-06, "loss": 0.4088, "step": 35529 }, { "epoch": 19.849162011173185, "grad_norm": 0.5050700306892395, "learning_rate": 8.011204481792718e-06, "loss": 0.3397, "step": 35530 }, { "epoch": 19.84972067039106, "grad_norm": 0.5490198135375977, "learning_rate": 7.983193277310924e-06, "loss": 0.4668, "step": 35531 }, { "epoch": 19.850279329608938, "grad_norm": 6.344862461090088, "learning_rate": 7.955182072829132e-06, "loss": 0.4066, "step": 35532 }, { "epoch": 19.850837988826814, "grad_norm": 0.5124679207801819, "learning_rate": 7.92717086834734e-06, "loss": 0.4066, "step": 35533 }, { "epoch": 19.85139664804469, "grad_norm": 0.6247010231018066, "learning_rate": 7.899159663865547e-06, "loss": 0.4567, "step": 35534 }, { "epoch": 19.85195530726257, "grad_norm": 0.3546310365200043, "learning_rate": 7.871148459383753e-06, "loss": 0.3596, "step": 35535 }, { "epoch": 19.852513966480448, "grad_norm": 0.42871859669685364, "learning_rate": 7.84313725490196e-06, "loss": 0.2945, "step": 35536 }, { "epoch": 19.853072625698324, "grad_norm": 1.153154969215393, "learning_rate": 7.815126050420168e-06, "loss": 0.396, "step": 35537 }, { "epoch": 19.8536312849162, "grad_norm": 0.9508769512176514, "learning_rate": 7.787114845938376e-06, "loss": 0.3773, "step": 35538 }, { "epoch": 19.854189944134077, "grad_norm": 0.35451996326446533, "learning_rate": 7.759103641456584e-06, "loss": 0.3973, "step": 35539 }, { "epoch": 19.854748603351954, "grad_norm": 0.40359291434288025, "learning_rate": 7.73109243697479e-06, "loss": 0.4204, "step": 35540 }, { "epoch": 19.85530726256983, "grad_norm": 0.4803354740142822, "learning_rate": 7.703081232492997e-06, "loss": 0.4347, "step": 35541 }, { "epoch": 19.85586592178771, "grad_norm": 0.4973466396331787, "learning_rate": 7.675070028011205e-06, "loss": 0.5028, "step": 35542 }, { "epoch": 19.856424581005587, "grad_norm": 0.7691525816917419, "learning_rate": 7.647058823529413e-06, "loss": 0.3523, "step": 35543 }, { "epoch": 19.856983240223464, "grad_norm": 0.5215095281600952, "learning_rate": 7.619047619047619e-06, "loss": 0.3446, "step": 35544 }, { "epoch": 19.85754189944134, "grad_norm": 1.2336946725845337, "learning_rate": 7.5910364145658265e-06, "loss": 0.3818, "step": 35545 }, { "epoch": 19.858100558659217, "grad_norm": 2.777637243270874, "learning_rate": 7.563025210084034e-06, "loss": 0.3379, "step": 35546 }, { "epoch": 19.858659217877094, "grad_norm": 0.41862061619758606, "learning_rate": 7.535014005602241e-06, "loss": 0.3558, "step": 35547 }, { "epoch": 19.859217877094974, "grad_norm": 0.5710946917533875, "learning_rate": 7.507002801120449e-06, "loss": 0.4097, "step": 35548 }, { "epoch": 19.85977653631285, "grad_norm": 0.46634867787361145, "learning_rate": 7.4789915966386555e-06, "loss": 0.3715, "step": 35549 }, { "epoch": 19.860335195530727, "grad_norm": 0.32197415828704834, "learning_rate": 7.450980392156863e-06, "loss": 0.3542, "step": 35550 }, { "epoch": 19.860893854748603, "grad_norm": 0.6783162355422974, "learning_rate": 7.42296918767507e-06, "loss": 0.6101, "step": 35551 }, { "epoch": 19.86145251396648, "grad_norm": 0.8543192148208618, "learning_rate": 7.394957983193278e-06, "loss": 0.4123, "step": 35552 }, { "epoch": 19.862011173184356, "grad_norm": 0.3979789614677429, "learning_rate": 7.366946778711485e-06, "loss": 0.3475, "step": 35553 }, { "epoch": 19.862569832402233, "grad_norm": 0.4430939257144928, "learning_rate": 7.338935574229692e-06, "loss": 0.4203, "step": 35554 }, { "epoch": 19.863128491620113, "grad_norm": 2.5592780113220215, "learning_rate": 7.310924369747899e-06, "loss": 0.4729, "step": 35555 }, { "epoch": 19.86368715083799, "grad_norm": 0.40907856822013855, "learning_rate": 7.282913165266107e-06, "loss": 0.4438, "step": 35556 }, { "epoch": 19.864245810055866, "grad_norm": 2.6902639865875244, "learning_rate": 7.254901960784314e-06, "loss": 0.3976, "step": 35557 }, { "epoch": 19.864804469273743, "grad_norm": 0.491024374961853, "learning_rate": 7.226890756302521e-06, "loss": 0.4618, "step": 35558 }, { "epoch": 19.86536312849162, "grad_norm": 0.46603935956954956, "learning_rate": 7.198879551820728e-06, "loss": 0.3655, "step": 35559 }, { "epoch": 19.865921787709496, "grad_norm": 0.6266034245491028, "learning_rate": 7.170868347338936e-06, "loss": 0.6292, "step": 35560 }, { "epoch": 19.866480446927373, "grad_norm": 0.37468862533569336, "learning_rate": 7.142857142857143e-06, "loss": 0.4192, "step": 35561 }, { "epoch": 19.867039106145253, "grad_norm": 0.5093718767166138, "learning_rate": 7.11484593837535e-06, "loss": 0.3296, "step": 35562 }, { "epoch": 19.86759776536313, "grad_norm": 0.6867132186889648, "learning_rate": 7.086834733893557e-06, "loss": 0.4028, "step": 35563 }, { "epoch": 19.868156424581006, "grad_norm": 0.607503354549408, "learning_rate": 7.058823529411765e-06, "loss": 0.4589, "step": 35564 }, { "epoch": 19.868715083798882, "grad_norm": 0.6798485517501831, "learning_rate": 7.030812324929972e-06, "loss": 0.3539, "step": 35565 }, { "epoch": 19.86927374301676, "grad_norm": 0.5485566854476929, "learning_rate": 7.0028011204481795e-06, "loss": 0.4842, "step": 35566 }, { "epoch": 19.869832402234636, "grad_norm": 0.7571957111358643, "learning_rate": 6.974789915966386e-06, "loss": 0.4923, "step": 35567 }, { "epoch": 19.870391061452516, "grad_norm": 0.6238499283790588, "learning_rate": 6.946778711484594e-06, "loss": 0.5692, "step": 35568 }, { "epoch": 19.870949720670392, "grad_norm": 0.730003833770752, "learning_rate": 6.918767507002801e-06, "loss": 0.4373, "step": 35569 }, { "epoch": 19.87150837988827, "grad_norm": 0.5628697276115417, "learning_rate": 6.8907563025210085e-06, "loss": 0.4717, "step": 35570 }, { "epoch": 19.872067039106145, "grad_norm": 0.47615954279899597, "learning_rate": 6.862745098039215e-06, "loss": 0.367, "step": 35571 }, { "epoch": 19.872625698324022, "grad_norm": 0.5309939384460449, "learning_rate": 6.834733893557423e-06, "loss": 0.2809, "step": 35572 }, { "epoch": 19.8731843575419, "grad_norm": 0.37138187885284424, "learning_rate": 6.806722689075631e-06, "loss": 0.4018, "step": 35573 }, { "epoch": 19.873743016759775, "grad_norm": 1.0609674453735352, "learning_rate": 6.7787114845938384e-06, "loss": 0.3847, "step": 35574 }, { "epoch": 19.874301675977655, "grad_norm": 0.4392301142215729, "learning_rate": 6.750700280112045e-06, "loss": 0.4211, "step": 35575 }, { "epoch": 19.87486033519553, "grad_norm": 0.3797439932823181, "learning_rate": 6.722689075630253e-06, "loss": 0.3247, "step": 35576 }, { "epoch": 19.87541899441341, "grad_norm": 0.40507978200912476, "learning_rate": 6.69467787114846e-06, "loss": 0.3803, "step": 35577 }, { "epoch": 19.875977653631285, "grad_norm": 1.6920356750488281, "learning_rate": 6.6666666666666675e-06, "loss": 0.3982, "step": 35578 }, { "epoch": 19.87653631284916, "grad_norm": 0.42925766110420227, "learning_rate": 6.638655462184874e-06, "loss": 0.3347, "step": 35579 }, { "epoch": 19.877094972067038, "grad_norm": 0.3393830955028534, "learning_rate": 6.610644257703081e-06, "loss": 0.3983, "step": 35580 }, { "epoch": 19.877653631284915, "grad_norm": 0.8725360035896301, "learning_rate": 6.582633053221289e-06, "loss": 0.3126, "step": 35581 }, { "epoch": 19.878212290502795, "grad_norm": 0.47602248191833496, "learning_rate": 6.554621848739496e-06, "loss": 0.5332, "step": 35582 }, { "epoch": 19.87877094972067, "grad_norm": 0.5204659104347229, "learning_rate": 6.526610644257703e-06, "loss": 0.3323, "step": 35583 }, { "epoch": 19.879329608938548, "grad_norm": 0.39426782727241516, "learning_rate": 6.49859943977591e-06, "loss": 0.3341, "step": 35584 }, { "epoch": 19.879888268156424, "grad_norm": 0.422897070646286, "learning_rate": 6.470588235294118e-06, "loss": 0.4999, "step": 35585 }, { "epoch": 19.8804469273743, "grad_norm": 0.4663355350494385, "learning_rate": 6.442577030812325e-06, "loss": 0.3848, "step": 35586 }, { "epoch": 19.881005586592178, "grad_norm": 0.6233259439468384, "learning_rate": 6.4145658263305325e-06, "loss": 0.4194, "step": 35587 }, { "epoch": 19.881564245810054, "grad_norm": 0.4507158696651459, "learning_rate": 6.386554621848739e-06, "loss": 0.4627, "step": 35588 }, { "epoch": 19.882122905027934, "grad_norm": 0.3214305341243744, "learning_rate": 6.358543417366947e-06, "loss": 0.306, "step": 35589 }, { "epoch": 19.88268156424581, "grad_norm": 0.8551495671272278, "learning_rate": 6.330532212885154e-06, "loss": 0.3744, "step": 35590 }, { "epoch": 19.883240223463687, "grad_norm": 0.44746673107147217, "learning_rate": 6.3025210084033615e-06, "loss": 0.347, "step": 35591 }, { "epoch": 19.883798882681564, "grad_norm": 0.2838791012763977, "learning_rate": 6.274509803921568e-06, "loss": 0.3111, "step": 35592 }, { "epoch": 19.88435754189944, "grad_norm": 0.3916764557361603, "learning_rate": 6.246498599439776e-06, "loss": 0.3976, "step": 35593 }, { "epoch": 19.884916201117317, "grad_norm": 0.8163996338844299, "learning_rate": 6.218487394957983e-06, "loss": 0.4459, "step": 35594 }, { "epoch": 19.885474860335197, "grad_norm": 0.42282184958457947, "learning_rate": 6.190476190476191e-06, "loss": 0.4298, "step": 35595 }, { "epoch": 19.886033519553074, "grad_norm": 0.4737150967121124, "learning_rate": 6.162464985994397e-06, "loss": 0.3543, "step": 35596 }, { "epoch": 19.88659217877095, "grad_norm": 0.8519539833068848, "learning_rate": 6.134453781512605e-06, "loss": 0.282, "step": 35597 }, { "epoch": 19.887150837988827, "grad_norm": 1.1746183633804321, "learning_rate": 6.106442577030812e-06, "loss": 0.3679, "step": 35598 }, { "epoch": 19.887709497206703, "grad_norm": 0.7324888110160828, "learning_rate": 6.07843137254902e-06, "loss": 0.4486, "step": 35599 }, { "epoch": 19.88826815642458, "grad_norm": 0.360386461019516, "learning_rate": 6.0504201680672265e-06, "loss": 0.4148, "step": 35600 }, { "epoch": 19.888826815642457, "grad_norm": 0.357022225856781, "learning_rate": 6.022408963585434e-06, "loss": 0.3358, "step": 35601 }, { "epoch": 19.889385474860337, "grad_norm": 0.5042193531990051, "learning_rate": 5.994397759103642e-06, "loss": 0.4167, "step": 35602 }, { "epoch": 19.889944134078213, "grad_norm": 0.5186048746109009, "learning_rate": 5.9663865546218495e-06, "loss": 0.298, "step": 35603 }, { "epoch": 19.89050279329609, "grad_norm": 0.6707499027252197, "learning_rate": 5.938375350140056e-06, "loss": 0.4543, "step": 35604 }, { "epoch": 19.891061452513966, "grad_norm": 0.4759967625141144, "learning_rate": 5.910364145658264e-06, "loss": 0.3585, "step": 35605 }, { "epoch": 19.891620111731843, "grad_norm": 0.4389513432979584, "learning_rate": 5.882352941176471e-06, "loss": 0.3579, "step": 35606 }, { "epoch": 19.89217877094972, "grad_norm": 1.225927472114563, "learning_rate": 5.854341736694679e-06, "loss": 0.3545, "step": 35607 }, { "epoch": 19.892737430167596, "grad_norm": 0.39883729815483093, "learning_rate": 5.8263305322128855e-06, "loss": 0.3716, "step": 35608 }, { "epoch": 19.893296089385476, "grad_norm": 0.33032065629959106, "learning_rate": 5.798319327731093e-06, "loss": 0.3658, "step": 35609 }, { "epoch": 19.893854748603353, "grad_norm": 0.5306668877601624, "learning_rate": 5.7703081232493e-06, "loss": 0.355, "step": 35610 }, { "epoch": 19.89441340782123, "grad_norm": 0.5451876521110535, "learning_rate": 5.742296918767508e-06, "loss": 0.4293, "step": 35611 }, { "epoch": 19.894972067039106, "grad_norm": 0.3425430953502655, "learning_rate": 5.7142857142857145e-06, "loss": 0.3981, "step": 35612 }, { "epoch": 19.895530726256982, "grad_norm": 0.5449585318565369, "learning_rate": 5.686274509803921e-06, "loss": 0.3558, "step": 35613 }, { "epoch": 19.89608938547486, "grad_norm": 0.8266059160232544, "learning_rate": 5.658263305322129e-06, "loss": 0.5354, "step": 35614 }, { "epoch": 19.89664804469274, "grad_norm": 0.4417438805103302, "learning_rate": 5.630252100840336e-06, "loss": 0.4157, "step": 35615 }, { "epoch": 19.897206703910616, "grad_norm": 0.559985339641571, "learning_rate": 5.6022408963585436e-06, "loss": 0.4945, "step": 35616 }, { "epoch": 19.897765363128492, "grad_norm": 5.538567066192627, "learning_rate": 5.57422969187675e-06, "loss": 0.3246, "step": 35617 }, { "epoch": 19.89832402234637, "grad_norm": 0.43555939197540283, "learning_rate": 5.546218487394958e-06, "loss": 0.5071, "step": 35618 }, { "epoch": 19.898882681564245, "grad_norm": 0.57423335313797, "learning_rate": 5.518207282913165e-06, "loss": 0.4386, "step": 35619 }, { "epoch": 19.899441340782122, "grad_norm": 0.8419376015663147, "learning_rate": 5.490196078431373e-06, "loss": 0.3857, "step": 35620 }, { "epoch": 19.9, "grad_norm": 0.4784092605113983, "learning_rate": 5.4621848739495795e-06, "loss": 0.3082, "step": 35621 }, { "epoch": 19.90055865921788, "grad_norm": 0.8151900172233582, "learning_rate": 5.434173669467787e-06, "loss": 0.3871, "step": 35622 }, { "epoch": 19.901117318435755, "grad_norm": 1.1739178895950317, "learning_rate": 5.406162464985994e-06, "loss": 0.3768, "step": 35623 }, { "epoch": 19.901675977653632, "grad_norm": 0.42103952169418335, "learning_rate": 5.378151260504202e-06, "loss": 0.3949, "step": 35624 }, { "epoch": 19.90223463687151, "grad_norm": 0.4123176336288452, "learning_rate": 5.3501400560224085e-06, "loss": 0.3185, "step": 35625 }, { "epoch": 19.902793296089385, "grad_norm": 0.5617626905441284, "learning_rate": 5.322128851540616e-06, "loss": 0.4178, "step": 35626 }, { "epoch": 19.90335195530726, "grad_norm": 0.4380773901939392, "learning_rate": 5.294117647058823e-06, "loss": 0.4507, "step": 35627 }, { "epoch": 19.903910614525138, "grad_norm": 0.5657081604003906, "learning_rate": 5.266106442577031e-06, "loss": 0.4342, "step": 35628 }, { "epoch": 19.904469273743018, "grad_norm": 0.49421849846839905, "learning_rate": 5.238095238095238e-06, "loss": 0.3753, "step": 35629 }, { "epoch": 19.905027932960895, "grad_norm": 0.33730989694595337, "learning_rate": 5.210084033613446e-06, "loss": 0.3897, "step": 35630 }, { "epoch": 19.90558659217877, "grad_norm": 0.4522286355495453, "learning_rate": 5.182072829131653e-06, "loss": 0.3804, "step": 35631 }, { "epoch": 19.906145251396648, "grad_norm": 0.761443555355072, "learning_rate": 5.154061624649861e-06, "loss": 0.3818, "step": 35632 }, { "epoch": 19.906703910614524, "grad_norm": 0.6169270873069763, "learning_rate": 5.1260504201680675e-06, "loss": 0.3748, "step": 35633 }, { "epoch": 19.9072625698324, "grad_norm": 0.4670906066894531, "learning_rate": 5.098039215686275e-06, "loss": 0.4335, "step": 35634 }, { "epoch": 19.907821229050278, "grad_norm": 0.5424671173095703, "learning_rate": 5.070028011204482e-06, "loss": 0.4197, "step": 35635 }, { "epoch": 19.908379888268158, "grad_norm": 0.48261988162994385, "learning_rate": 5.04201680672269e-06, "loss": 0.3623, "step": 35636 }, { "epoch": 19.908938547486034, "grad_norm": 0.4566068947315216, "learning_rate": 5.0140056022408966e-06, "loss": 0.4932, "step": 35637 }, { "epoch": 19.90949720670391, "grad_norm": 0.4355485141277313, "learning_rate": 4.985994397759104e-06, "loss": 0.4828, "step": 35638 }, { "epoch": 19.910055865921787, "grad_norm": 1.133594274520874, "learning_rate": 4.957983193277311e-06, "loss": 0.3703, "step": 35639 }, { "epoch": 19.910614525139664, "grad_norm": 0.6314957737922668, "learning_rate": 4.929971988795519e-06, "loss": 0.4299, "step": 35640 }, { "epoch": 19.91117318435754, "grad_norm": 0.3796870708465576, "learning_rate": 4.901960784313726e-06, "loss": 0.3684, "step": 35641 }, { "epoch": 19.91173184357542, "grad_norm": 0.42551523447036743, "learning_rate": 4.873949579831933e-06, "loss": 0.3439, "step": 35642 }, { "epoch": 19.912290502793297, "grad_norm": 0.7229700684547424, "learning_rate": 4.84593837535014e-06, "loss": 0.4305, "step": 35643 }, { "epoch": 19.912849162011174, "grad_norm": 0.537805438041687, "learning_rate": 4.817927170868348e-06, "loss": 0.3773, "step": 35644 }, { "epoch": 19.91340782122905, "grad_norm": 0.5850977301597595, "learning_rate": 4.789915966386555e-06, "loss": 0.426, "step": 35645 }, { "epoch": 19.913966480446927, "grad_norm": 0.414144366979599, "learning_rate": 4.761904761904762e-06, "loss": 0.3362, "step": 35646 }, { "epoch": 19.914525139664804, "grad_norm": 0.5388544201850891, "learning_rate": 4.733893557422969e-06, "loss": 0.43, "step": 35647 }, { "epoch": 19.91508379888268, "grad_norm": 0.4163450300693512, "learning_rate": 4.705882352941176e-06, "loss": 0.4243, "step": 35648 }, { "epoch": 19.91564245810056, "grad_norm": 0.9192421436309814, "learning_rate": 4.677871148459384e-06, "loss": 0.4005, "step": 35649 }, { "epoch": 19.916201117318437, "grad_norm": 0.4805905222892761, "learning_rate": 4.649859943977591e-06, "loss": 0.3543, "step": 35650 }, { "epoch": 19.916759776536313, "grad_norm": 0.548423171043396, "learning_rate": 4.621848739495798e-06, "loss": 0.3764, "step": 35651 }, { "epoch": 19.91731843575419, "grad_norm": 0.5281224250793457, "learning_rate": 4.593837535014005e-06, "loss": 0.4331, "step": 35652 }, { "epoch": 19.917877094972066, "grad_norm": 0.49651938676834106, "learning_rate": 4.565826330532213e-06, "loss": 0.4811, "step": 35653 }, { "epoch": 19.918435754189943, "grad_norm": 0.43199896812438965, "learning_rate": 4.53781512605042e-06, "loss": 0.5455, "step": 35654 }, { "epoch": 19.91899441340782, "grad_norm": 0.45970430970191956, "learning_rate": 4.509803921568627e-06, "loss": 0.3835, "step": 35655 }, { "epoch": 19.9195530726257, "grad_norm": 0.5654606819152832, "learning_rate": 4.481792717086834e-06, "loss": 0.3718, "step": 35656 }, { "epoch": 19.920111731843576, "grad_norm": 1.0351994037628174, "learning_rate": 4.453781512605042e-06, "loss": 0.4673, "step": 35657 }, { "epoch": 19.920670391061453, "grad_norm": 0.474049836397171, "learning_rate": 4.425770308123249e-06, "loss": 0.4639, "step": 35658 }, { "epoch": 19.92122905027933, "grad_norm": 0.4930829107761383, "learning_rate": 4.397759103641457e-06, "loss": 0.4436, "step": 35659 }, { "epoch": 19.921787709497206, "grad_norm": 0.3586808145046234, "learning_rate": 4.369747899159664e-06, "loss": 0.3681, "step": 35660 }, { "epoch": 19.922346368715083, "grad_norm": 1.6363966464996338, "learning_rate": 4.341736694677872e-06, "loss": 0.3905, "step": 35661 }, { "epoch": 19.922905027932963, "grad_norm": 0.35228431224823, "learning_rate": 4.313725490196079e-06, "loss": 0.3522, "step": 35662 }, { "epoch": 19.92346368715084, "grad_norm": 1.6791367530822754, "learning_rate": 4.285714285714286e-06, "loss": 0.3659, "step": 35663 }, { "epoch": 19.924022346368716, "grad_norm": 0.40718963742256165, "learning_rate": 4.257703081232493e-06, "loss": 0.4443, "step": 35664 }, { "epoch": 19.924581005586592, "grad_norm": 0.5448503494262695, "learning_rate": 4.229691876750701e-06, "loss": 0.5231, "step": 35665 }, { "epoch": 19.92513966480447, "grad_norm": 1.078081727027893, "learning_rate": 4.201680672268908e-06, "loss": 0.3552, "step": 35666 }, { "epoch": 19.925698324022346, "grad_norm": 0.8464499711990356, "learning_rate": 4.173669467787115e-06, "loss": 0.4155, "step": 35667 }, { "epoch": 19.926256983240222, "grad_norm": 0.4724714756011963, "learning_rate": 4.145658263305322e-06, "loss": 0.5077, "step": 35668 }, { "epoch": 19.926815642458102, "grad_norm": 0.3425491750240326, "learning_rate": 4.11764705882353e-06, "loss": 0.326, "step": 35669 }, { "epoch": 19.92737430167598, "grad_norm": 0.49022987484931946, "learning_rate": 4.089635854341737e-06, "loss": 0.4266, "step": 35670 }, { "epoch": 19.927932960893855, "grad_norm": 0.5103769302368164, "learning_rate": 4.0616246498599444e-06, "loss": 0.5553, "step": 35671 }, { "epoch": 19.928491620111732, "grad_norm": 0.42951250076293945, "learning_rate": 4.033613445378151e-06, "loss": 0.3576, "step": 35672 }, { "epoch": 19.92905027932961, "grad_norm": 0.37149930000305176, "learning_rate": 4.005602240896359e-06, "loss": 0.3762, "step": 35673 }, { "epoch": 19.929608938547485, "grad_norm": 0.9715537428855896, "learning_rate": 3.977591036414566e-06, "loss": 0.4102, "step": 35674 }, { "epoch": 19.93016759776536, "grad_norm": 0.4984362721443176, "learning_rate": 3.9495798319327735e-06, "loss": 0.4779, "step": 35675 }, { "epoch": 19.93072625698324, "grad_norm": 0.3813376724720001, "learning_rate": 3.92156862745098e-06, "loss": 0.374, "step": 35676 }, { "epoch": 19.93128491620112, "grad_norm": 0.323688268661499, "learning_rate": 3.893557422969188e-06, "loss": 0.3673, "step": 35677 }, { "epoch": 19.931843575418995, "grad_norm": 0.553429901599884, "learning_rate": 3.865546218487395e-06, "loss": 0.3484, "step": 35678 }, { "epoch": 19.93240223463687, "grad_norm": 0.5127355456352234, "learning_rate": 3.8375350140056026e-06, "loss": 0.3694, "step": 35679 }, { "epoch": 19.932960893854748, "grad_norm": 0.3489966094493866, "learning_rate": 3.8095238095238094e-06, "loss": 0.4427, "step": 35680 }, { "epoch": 19.933519553072625, "grad_norm": 0.39821743965148926, "learning_rate": 3.781512605042017e-06, "loss": 0.4465, "step": 35681 }, { "epoch": 19.9340782122905, "grad_norm": 0.9026612639427185, "learning_rate": 3.7535014005602243e-06, "loss": 0.4096, "step": 35682 }, { "epoch": 19.93463687150838, "grad_norm": 0.407042533159256, "learning_rate": 3.7254901960784316e-06, "loss": 0.3671, "step": 35683 }, { "epoch": 19.935195530726258, "grad_norm": 0.5701173543930054, "learning_rate": 3.697478991596639e-06, "loss": 0.6763, "step": 35684 }, { "epoch": 19.935754189944134, "grad_norm": 0.538378119468689, "learning_rate": 3.669467787114846e-06, "loss": 0.36, "step": 35685 }, { "epoch": 19.93631284916201, "grad_norm": 0.5826660394668579, "learning_rate": 3.6414565826330534e-06, "loss": 0.4464, "step": 35686 }, { "epoch": 19.936871508379888, "grad_norm": 0.3692992329597473, "learning_rate": 3.6134453781512607e-06, "loss": 0.3863, "step": 35687 }, { "epoch": 19.937430167597764, "grad_norm": 0.5763682126998901, "learning_rate": 3.585434173669468e-06, "loss": 0.5184, "step": 35688 }, { "epoch": 19.93798882681564, "grad_norm": 0.5126556158065796, "learning_rate": 3.557422969187675e-06, "loss": 0.4881, "step": 35689 }, { "epoch": 19.93854748603352, "grad_norm": 0.31983473896980286, "learning_rate": 3.5294117647058825e-06, "loss": 0.3163, "step": 35690 }, { "epoch": 19.939106145251397, "grad_norm": 0.6009042263031006, "learning_rate": 3.5014005602240897e-06, "loss": 0.3812, "step": 35691 }, { "epoch": 19.939664804469274, "grad_norm": 0.4028477072715759, "learning_rate": 3.473389355742297e-06, "loss": 0.3111, "step": 35692 }, { "epoch": 19.94022346368715, "grad_norm": 1.3376309871673584, "learning_rate": 3.4453781512605043e-06, "loss": 0.3678, "step": 35693 }, { "epoch": 19.940782122905027, "grad_norm": 13.20019817352295, "learning_rate": 3.4173669467787115e-06, "loss": 0.4757, "step": 35694 }, { "epoch": 19.941340782122904, "grad_norm": 0.5198339819908142, "learning_rate": 3.3893557422969192e-06, "loss": 0.3489, "step": 35695 }, { "epoch": 19.941899441340784, "grad_norm": 0.6982374787330627, "learning_rate": 3.3613445378151265e-06, "loss": 0.4379, "step": 35696 }, { "epoch": 19.94245810055866, "grad_norm": 0.45871031284332275, "learning_rate": 3.3333333333333337e-06, "loss": 0.3599, "step": 35697 }, { "epoch": 19.943016759776537, "grad_norm": 0.8334909677505493, "learning_rate": 3.3053221288515406e-06, "loss": 0.5008, "step": 35698 }, { "epoch": 19.943575418994413, "grad_norm": 0.8086140751838684, "learning_rate": 3.277310924369748e-06, "loss": 0.413, "step": 35699 }, { "epoch": 19.94413407821229, "grad_norm": 1.4512261152267456, "learning_rate": 3.249299719887955e-06, "loss": 0.3854, "step": 35700 }, { "epoch": 19.944692737430167, "grad_norm": 0.4095904529094696, "learning_rate": 3.2212885154061624e-06, "loss": 0.2515, "step": 35701 }, { "epoch": 19.945251396648043, "grad_norm": 0.48182395100593567, "learning_rate": 3.1932773109243696e-06, "loss": 0.4824, "step": 35702 }, { "epoch": 19.945810055865923, "grad_norm": 0.9869712591171265, "learning_rate": 3.165266106442577e-06, "loss": 0.3804, "step": 35703 }, { "epoch": 19.9463687150838, "grad_norm": 0.3817277252674103, "learning_rate": 3.137254901960784e-06, "loss": 0.4203, "step": 35704 }, { "epoch": 19.946927374301676, "grad_norm": 0.40958330035209656, "learning_rate": 3.1092436974789914e-06, "loss": 0.3923, "step": 35705 }, { "epoch": 19.947486033519553, "grad_norm": 0.7038190364837646, "learning_rate": 3.0812324929971987e-06, "loss": 0.493, "step": 35706 }, { "epoch": 19.94804469273743, "grad_norm": 0.5374528169631958, "learning_rate": 3.053221288515406e-06, "loss": 0.4533, "step": 35707 }, { "epoch": 19.948603351955306, "grad_norm": 0.3756989538669586, "learning_rate": 3.0252100840336132e-06, "loss": 0.3664, "step": 35708 }, { "epoch": 19.949162011173183, "grad_norm": 0.4476151466369629, "learning_rate": 2.997198879551821e-06, "loss": 0.4512, "step": 35709 }, { "epoch": 19.949720670391063, "grad_norm": 0.49882346391677856, "learning_rate": 2.969187675070028e-06, "loss": 0.4236, "step": 35710 }, { "epoch": 19.95027932960894, "grad_norm": 0.5060874819755554, "learning_rate": 2.9411764705882355e-06, "loss": 0.3233, "step": 35711 }, { "epoch": 19.950837988826816, "grad_norm": 0.39693549275398254, "learning_rate": 2.9131652661064427e-06, "loss": 0.388, "step": 35712 }, { "epoch": 19.951396648044692, "grad_norm": 0.4244128167629242, "learning_rate": 2.88515406162465e-06, "loss": 0.408, "step": 35713 }, { "epoch": 19.95195530726257, "grad_norm": 0.39799681305885315, "learning_rate": 2.8571428571428573e-06, "loss": 0.3463, "step": 35714 }, { "epoch": 19.952513966480446, "grad_norm": 0.8204896450042725, "learning_rate": 2.8291316526610645e-06, "loss": 0.4184, "step": 35715 }, { "epoch": 19.953072625698326, "grad_norm": 0.3208429217338562, "learning_rate": 2.8011204481792718e-06, "loss": 0.3386, "step": 35716 }, { "epoch": 19.953631284916202, "grad_norm": 0.44770392775535583, "learning_rate": 2.773109243697479e-06, "loss": 0.5423, "step": 35717 }, { "epoch": 19.95418994413408, "grad_norm": 0.5298616886138916, "learning_rate": 2.7450980392156863e-06, "loss": 0.3982, "step": 35718 }, { "epoch": 19.954748603351955, "grad_norm": 0.5084118843078613, "learning_rate": 2.7170868347338936e-06, "loss": 0.3581, "step": 35719 }, { "epoch": 19.955307262569832, "grad_norm": 1.0268547534942627, "learning_rate": 2.689075630252101e-06, "loss": 0.484, "step": 35720 }, { "epoch": 19.95586592178771, "grad_norm": 1.0976309776306152, "learning_rate": 2.661064425770308e-06, "loss": 0.3514, "step": 35721 }, { "epoch": 19.956424581005585, "grad_norm": 0.6385658383369446, "learning_rate": 2.6330532212885154e-06, "loss": 0.2825, "step": 35722 }, { "epoch": 19.956983240223465, "grad_norm": 0.8541482090950012, "learning_rate": 2.605042016806723e-06, "loss": 0.454, "step": 35723 }, { "epoch": 19.957541899441342, "grad_norm": 0.6795336604118347, "learning_rate": 2.5770308123249303e-06, "loss": 0.4391, "step": 35724 }, { "epoch": 19.95810055865922, "grad_norm": 0.4772208631038666, "learning_rate": 2.5490196078431376e-06, "loss": 0.4127, "step": 35725 }, { "epoch": 19.958659217877095, "grad_norm": 0.3698780834674835, "learning_rate": 2.521008403361345e-06, "loss": 0.4208, "step": 35726 }, { "epoch": 19.95921787709497, "grad_norm": 0.5685043931007385, "learning_rate": 2.492997198879552e-06, "loss": 0.4064, "step": 35727 }, { "epoch": 19.959776536312848, "grad_norm": 0.3657155930995941, "learning_rate": 2.4649859943977594e-06, "loss": 0.4184, "step": 35728 }, { "epoch": 19.960335195530725, "grad_norm": 1.0049821138381958, "learning_rate": 2.4369747899159667e-06, "loss": 0.5792, "step": 35729 }, { "epoch": 19.960893854748605, "grad_norm": 15.256537437438965, "learning_rate": 2.408963585434174e-06, "loss": 0.3905, "step": 35730 }, { "epoch": 19.96145251396648, "grad_norm": 0.45929771661758423, "learning_rate": 2.380952380952381e-06, "loss": 0.4531, "step": 35731 }, { "epoch": 19.962011173184358, "grad_norm": 0.49803638458251953, "learning_rate": 2.352941176470588e-06, "loss": 0.4904, "step": 35732 }, { "epoch": 19.962569832402234, "grad_norm": 0.3768760859966278, "learning_rate": 2.3249299719887953e-06, "loss": 0.3589, "step": 35733 }, { "epoch": 19.96312849162011, "grad_norm": 0.9365134835243225, "learning_rate": 2.2969187675070026e-06, "loss": 0.3522, "step": 35734 }, { "epoch": 19.963687150837988, "grad_norm": 0.41926106810569763, "learning_rate": 2.26890756302521e-06, "loss": 0.4289, "step": 35735 }, { "epoch": 19.964245810055864, "grad_norm": 0.5147438645362854, "learning_rate": 2.240896358543417e-06, "loss": 0.5659, "step": 35736 }, { "epoch": 19.964804469273744, "grad_norm": 0.6456831097602844, "learning_rate": 2.2128851540616244e-06, "loss": 0.3807, "step": 35737 }, { "epoch": 19.96536312849162, "grad_norm": 0.39884018898010254, "learning_rate": 2.184873949579832e-06, "loss": 0.4295, "step": 35738 }, { "epoch": 19.965921787709497, "grad_norm": 0.7749279737472534, "learning_rate": 2.1568627450980393e-06, "loss": 0.3755, "step": 35739 }, { "epoch": 19.966480446927374, "grad_norm": 0.5296819806098938, "learning_rate": 2.1288515406162466e-06, "loss": 0.5193, "step": 35740 }, { "epoch": 19.96703910614525, "grad_norm": 4.553891658782959, "learning_rate": 2.100840336134454e-06, "loss": 0.4418, "step": 35741 }, { "epoch": 19.967597765363127, "grad_norm": 0.3707868158817291, "learning_rate": 2.072829131652661e-06, "loss": 0.3272, "step": 35742 }, { "epoch": 19.968156424581007, "grad_norm": 0.3603146970272064, "learning_rate": 2.0448179271708684e-06, "loss": 0.3986, "step": 35743 }, { "epoch": 19.968715083798884, "grad_norm": 0.6828670501708984, "learning_rate": 2.0168067226890756e-06, "loss": 0.5284, "step": 35744 }, { "epoch": 19.96927374301676, "grad_norm": 0.352805495262146, "learning_rate": 1.988795518207283e-06, "loss": 0.4627, "step": 35745 }, { "epoch": 19.969832402234637, "grad_norm": 0.5262941718101501, "learning_rate": 1.96078431372549e-06, "loss": 0.5057, "step": 35746 }, { "epoch": 19.970391061452514, "grad_norm": 0.37788015604019165, "learning_rate": 1.9327731092436974e-06, "loss": 0.3568, "step": 35747 }, { "epoch": 19.97094972067039, "grad_norm": 2.0190670490264893, "learning_rate": 1.9047619047619047e-06, "loss": 0.5348, "step": 35748 }, { "epoch": 19.971508379888267, "grad_norm": 0.5013555288314819, "learning_rate": 1.8767507002801122e-06, "loss": 0.419, "step": 35749 }, { "epoch": 19.972067039106147, "grad_norm": 0.38966071605682373, "learning_rate": 1.8487394957983194e-06, "loss": 0.3391, "step": 35750 }, { "epoch": 19.972625698324023, "grad_norm": 0.4424666166305542, "learning_rate": 1.8207282913165267e-06, "loss": 0.5569, "step": 35751 }, { "epoch": 19.9731843575419, "grad_norm": 0.4121440052986145, "learning_rate": 1.792717086834734e-06, "loss": 0.3891, "step": 35752 }, { "epoch": 19.973743016759776, "grad_norm": 0.4010823369026184, "learning_rate": 1.7647058823529412e-06, "loss": 0.4402, "step": 35753 }, { "epoch": 19.974301675977653, "grad_norm": 0.448160856962204, "learning_rate": 1.7366946778711485e-06, "loss": 0.4184, "step": 35754 }, { "epoch": 19.97486033519553, "grad_norm": 0.4456930458545685, "learning_rate": 1.7086834733893558e-06, "loss": 0.4246, "step": 35755 }, { "epoch": 19.975418994413406, "grad_norm": 0.49775293469429016, "learning_rate": 1.6806722689075632e-06, "loss": 0.3936, "step": 35756 }, { "epoch": 19.975977653631286, "grad_norm": 0.7370997071266174, "learning_rate": 1.6526610644257703e-06, "loss": 0.4992, "step": 35757 }, { "epoch": 19.976536312849163, "grad_norm": 1.8036106824874878, "learning_rate": 1.6246498599439776e-06, "loss": 0.373, "step": 35758 }, { "epoch": 19.97709497206704, "grad_norm": 0.3875807821750641, "learning_rate": 1.5966386554621848e-06, "loss": 0.3729, "step": 35759 }, { "epoch": 19.977653631284916, "grad_norm": 0.4445265233516693, "learning_rate": 1.568627450980392e-06, "loss": 0.3927, "step": 35760 }, { "epoch": 19.978212290502793, "grad_norm": 0.3668687045574188, "learning_rate": 1.5406162464985994e-06, "loss": 0.3326, "step": 35761 }, { "epoch": 19.97877094972067, "grad_norm": 0.4288323223590851, "learning_rate": 1.5126050420168066e-06, "loss": 0.3051, "step": 35762 }, { "epoch": 19.97932960893855, "grad_norm": 0.33855193853378296, "learning_rate": 1.484593837535014e-06, "loss": 0.3957, "step": 35763 }, { "epoch": 19.979888268156426, "grad_norm": 0.4493440091609955, "learning_rate": 1.4565826330532214e-06, "loss": 0.327, "step": 35764 }, { "epoch": 19.980446927374302, "grad_norm": 0.3707232177257538, "learning_rate": 1.4285714285714286e-06, "loss": 0.4448, "step": 35765 }, { "epoch": 19.98100558659218, "grad_norm": 0.3734290599822998, "learning_rate": 1.4005602240896359e-06, "loss": 0.3223, "step": 35766 }, { "epoch": 19.981564245810056, "grad_norm": 6.52301025390625, "learning_rate": 1.3725490196078432e-06, "loss": 0.3994, "step": 35767 }, { "epoch": 19.982122905027932, "grad_norm": 0.41697150468826294, "learning_rate": 1.3445378151260504e-06, "loss": 0.4148, "step": 35768 }, { "epoch": 19.98268156424581, "grad_norm": 0.5138062834739685, "learning_rate": 1.3165266106442577e-06, "loss": 0.4871, "step": 35769 }, { "epoch": 19.98324022346369, "grad_norm": 0.4564405083656311, "learning_rate": 1.2885154061624652e-06, "loss": 0.3831, "step": 35770 }, { "epoch": 19.983798882681565, "grad_norm": 0.30999866127967834, "learning_rate": 1.2605042016806724e-06, "loss": 0.301, "step": 35771 }, { "epoch": 19.984357541899442, "grad_norm": 0.4386674463748932, "learning_rate": 1.2324929971988797e-06, "loss": 0.4778, "step": 35772 }, { "epoch": 19.98491620111732, "grad_norm": 0.8380294442176819, "learning_rate": 1.204481792717087e-06, "loss": 0.455, "step": 35773 }, { "epoch": 19.985474860335195, "grad_norm": 0.5301977396011353, "learning_rate": 1.176470588235294e-06, "loss": 0.4115, "step": 35774 }, { "epoch": 19.98603351955307, "grad_norm": 0.4730093479156494, "learning_rate": 1.1484593837535013e-06, "loss": 0.2906, "step": 35775 }, { "epoch": 19.986592178770948, "grad_norm": 0.5763904452323914, "learning_rate": 1.1204481792717085e-06, "loss": 0.4095, "step": 35776 }, { "epoch": 19.98715083798883, "grad_norm": 0.798834502696991, "learning_rate": 1.092436974789916e-06, "loss": 0.3321, "step": 35777 }, { "epoch": 19.987709497206705, "grad_norm": 0.42858266830444336, "learning_rate": 1.0644257703081233e-06, "loss": 0.4252, "step": 35778 }, { "epoch": 19.98826815642458, "grad_norm": 0.4689836800098419, "learning_rate": 1.0364145658263306e-06, "loss": 0.5972, "step": 35779 }, { "epoch": 19.988826815642458, "grad_norm": 0.4905235469341278, "learning_rate": 1.0084033613445378e-06, "loss": 0.4909, "step": 35780 }, { "epoch": 19.989385474860335, "grad_norm": 0.34495341777801514, "learning_rate": 9.80392156862745e-07, "loss": 0.3659, "step": 35781 }, { "epoch": 19.98994413407821, "grad_norm": 0.4195338487625122, "learning_rate": 9.523809523809523e-07, "loss": 0.3451, "step": 35782 }, { "epoch": 19.990502793296088, "grad_norm": 0.40323707461357117, "learning_rate": 9.243697478991597e-07, "loss": 0.4844, "step": 35783 }, { "epoch": 19.991061452513968, "grad_norm": 0.3888605237007141, "learning_rate": 8.96358543417367e-07, "loss": 0.3603, "step": 35784 }, { "epoch": 19.991620111731844, "grad_norm": 0.6263288259506226, "learning_rate": 8.683473389355742e-07, "loss": 0.4473, "step": 35785 }, { "epoch": 19.99217877094972, "grad_norm": 1.1992926597595215, "learning_rate": 8.403361344537816e-07, "loss": 0.4191, "step": 35786 }, { "epoch": 19.992737430167598, "grad_norm": 0.5969231128692627, "learning_rate": 8.123249299719888e-07, "loss": 0.3807, "step": 35787 }, { "epoch": 19.993296089385474, "grad_norm": 0.4747834801673889, "learning_rate": 7.84313725490196e-07, "loss": 0.3464, "step": 35788 }, { "epoch": 19.99385474860335, "grad_norm": 0.4346066117286682, "learning_rate": 7.563025210084033e-07, "loss": 0.4275, "step": 35789 }, { "epoch": 19.994413407821227, "grad_norm": 2.834784507751465, "learning_rate": 7.282913165266107e-07, "loss": 0.557, "step": 35790 }, { "epoch": 19.994972067039107, "grad_norm": 0.6398465633392334, "learning_rate": 7.002801120448179e-07, "loss": 0.4374, "step": 35791 }, { "epoch": 19.995530726256984, "grad_norm": 0.8262577056884766, "learning_rate": 6.722689075630252e-07, "loss": 0.3263, "step": 35792 }, { "epoch": 19.99608938547486, "grad_norm": 0.6604378819465637, "learning_rate": 6.442577030812326e-07, "loss": 0.3759, "step": 35793 }, { "epoch": 19.996648044692737, "grad_norm": 0.6472581028938293, "learning_rate": 6.162464985994398e-07, "loss": 0.4731, "step": 35794 }, { "epoch": 19.997206703910614, "grad_norm": 0.4706597328186035, "learning_rate": 5.88235294117647e-07, "loss": 0.3876, "step": 35795 }, { "epoch": 19.99776536312849, "grad_norm": 0.5563760995864868, "learning_rate": 5.602240896358543e-07, "loss": 0.3993, "step": 35796 }, { "epoch": 19.99832402234637, "grad_norm": 2.6378302574157715, "learning_rate": 5.322128851540616e-07, "loss": 0.4534, "step": 35797 }, { "epoch": 19.998882681564247, "grad_norm": 0.44761624932289124, "learning_rate": 5.042016806722689e-07, "loss": 0.4278, "step": 35798 }, { "epoch": 19.999441340782123, "grad_norm": 0.47075140476226807, "learning_rate": 4.761904761904762e-07, "loss": 0.5231, "step": 35799 }, { "epoch": 20.0, "grad_norm": 5.764364242553711, "learning_rate": 4.481792717086835e-07, "loss": 0.3833, "step": 35800 }, { "epoch": 20.0, "step": 35800, "total_flos": 5.794046604000698e+20, "train_loss": 0.4541601099669101, "train_runtime": 54968.513, "train_samples_per_second": 20.839, "train_steps_per_second": 0.651 } ], "logging_steps": 1.0, "max_steps": 35800, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.794046604000698e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }