{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14092, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.8587175627969085, "learning_rate": 2.364066193853428e-08, "loss": 0.4041, "step": 1 }, { "epoch": 0.0, "grad_norm": 5.583009923102745, "learning_rate": 4.728132387706856e-08, "loss": 0.9724, "step": 2 }, { "epoch": 0.0, "grad_norm": 6.076889550887127, "learning_rate": 7.092198581560284e-08, "loss": 0.9748, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.958300688747302, "learning_rate": 9.456264775413712e-08, "loss": 0.8295, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.079811480892564, "learning_rate": 1.182033096926714e-07, "loss": 1.0067, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.051209274504425, "learning_rate": 1.4184397163120568e-07, "loss": 0.9317, "step": 6 }, { "epoch": 0.0, "grad_norm": 5.92102203332061, "learning_rate": 1.6548463356973995e-07, "loss": 1.0185, "step": 7 }, { "epoch": 0.0, "grad_norm": 6.498622705489556, "learning_rate": 1.8912529550827425e-07, "loss": 0.9687, "step": 8 }, { "epoch": 0.0, "grad_norm": 5.814821658435063, "learning_rate": 2.1276595744680852e-07, "loss": 0.9695, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.446124476869084, "learning_rate": 2.364066193853428e-07, "loss": 0.8598, "step": 10 }, { "epoch": 0.0, "grad_norm": 5.516335803697005, "learning_rate": 2.6004728132387706e-07, "loss": 0.9629, "step": 11 }, { "epoch": 0.0, "grad_norm": 5.09784018677111, "learning_rate": 2.8368794326241136e-07, "loss": 0.9552, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.029348032951575, "learning_rate": 3.0732860520094566e-07, "loss": 0.9829, "step": 13 }, { "epoch": 0.0, "grad_norm": 5.197871010360706, "learning_rate": 3.309692671394799e-07, "loss": 0.961, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.444068851120301, "learning_rate": 3.5460992907801425e-07, "loss": 0.8559, "step": 15 }, { "epoch": 0.0, "grad_norm": 5.263953889786298, "learning_rate": 3.782505910165485e-07, "loss": 0.8968, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.9308618238654365, "learning_rate": 4.018912529550828e-07, "loss": 1.0012, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.7103527167811602, "learning_rate": 4.2553191489361704e-07, "loss": 0.4147, "step": 18 }, { "epoch": 0.0, "grad_norm": 5.4601398805553085, "learning_rate": 4.4917257683215134e-07, "loss": 1.0205, "step": 19 }, { "epoch": 0.0, "grad_norm": 5.842036311266042, "learning_rate": 4.728132387706856e-07, "loss": 0.9705, "step": 20 }, { "epoch": 0.0, "grad_norm": 8.941207999783432, "learning_rate": 4.964539007092199e-07, "loss": 0.9253, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.199456691240805, "learning_rate": 5.200945626477541e-07, "loss": 0.9801, "step": 22 }, { "epoch": 0.0, "grad_norm": 7.912397961401164, "learning_rate": 5.437352245862885e-07, "loss": 0.9274, "step": 23 }, { "epoch": 0.0, "grad_norm": 4.640194361056253, "learning_rate": 5.673758865248227e-07, "loss": 0.9181, "step": 24 }, { "epoch": 0.0, "grad_norm": 5.096234565296452, "learning_rate": 5.91016548463357e-07, "loss": 0.9708, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.865217896053551, "learning_rate": 6.146572104018913e-07, "loss": 0.9088, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.2449427005788785, "learning_rate": 6.382978723404255e-07, "loss": 0.9388, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.072191074509652, "learning_rate": 6.619385342789598e-07, "loss": 0.9533, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.431695321161389, "learning_rate": 6.855791962174942e-07, "loss": 0.89, "step": 29 }, { "epoch": 0.0, "grad_norm": 3.5165089283738817, "learning_rate": 7.092198581560285e-07, "loss": 0.7818, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.092919331252151, "learning_rate": 7.328605200945627e-07, "loss": 0.8877, "step": 31 }, { "epoch": 0.0, "grad_norm": 4.4152596066655585, "learning_rate": 7.56501182033097e-07, "loss": 0.8657, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.7185456183956519, "learning_rate": 7.801418439716313e-07, "loss": 0.4143, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.2372119629501883, "learning_rate": 8.037825059101656e-07, "loss": 0.8192, "step": 34 }, { "epoch": 0.0, "grad_norm": 4.028327891852994, "learning_rate": 8.274231678486998e-07, "loss": 0.8476, "step": 35 }, { "epoch": 0.0, "grad_norm": 3.5992495168327494, "learning_rate": 8.510638297872341e-07, "loss": 0.7375, "step": 36 }, { "epoch": 0.0, "grad_norm": 3.6656303580340697, "learning_rate": 8.747044917257684e-07, "loss": 0.91, "step": 37 }, { "epoch": 0.0, "grad_norm": 3.565875190750197, "learning_rate": 8.983451536643027e-07, "loss": 0.8641, "step": 38 }, { "epoch": 0.0, "grad_norm": 3.2659699093645176, "learning_rate": 9.219858156028369e-07, "loss": 0.856, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.8480726168680865, "learning_rate": 9.456264775413712e-07, "loss": 0.7334, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.104875322805582, "learning_rate": 9.692671394799055e-07, "loss": 0.7909, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.4218619588018053, "learning_rate": 9.929078014184399e-07, "loss": 0.7058, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.477914199444258, "learning_rate": 1.016548463356974e-06, "loss": 0.7549, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.898055551946699, "learning_rate": 1.0401891252955083e-06, "loss": 0.8078, "step": 44 }, { "epoch": 0.0, "grad_norm": 3.7995673086732182, "learning_rate": 1.0638297872340427e-06, "loss": 0.8143, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.3801635193808197, "learning_rate": 1.087470449172577e-06, "loss": 0.7865, "step": 46 }, { "epoch": 0.0, "grad_norm": 4.202452298258762, "learning_rate": 1.111111111111111e-06, "loss": 0.7604, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.3534765508525606, "learning_rate": 1.1347517730496454e-06, "loss": 0.7317, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.441783143300323, "learning_rate": 1.1583924349881798e-06, "loss": 0.7005, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.855261585744242, "learning_rate": 1.182033096926714e-06, "loss": 0.8033, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.848662778559429, "learning_rate": 1.2056737588652482e-06, "loss": 0.7861, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.702131872957413, "learning_rate": 1.2293144208037826e-06, "loss": 0.8627, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.543996867152144, "learning_rate": 1.2529550827423168e-06, "loss": 0.6894, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.2014595771896075, "learning_rate": 1.276595744680851e-06, "loss": 0.7669, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.311136134810509, "learning_rate": 1.3002364066193854e-06, "loss": 0.8149, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.342133445768906, "learning_rate": 1.3238770685579196e-06, "loss": 0.7447, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.3940363343042708, "learning_rate": 1.347517730496454e-06, "loss": 0.8097, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.756101926155324, "learning_rate": 1.3711583924349884e-06, "loss": 0.7584, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.809278240572465, "learning_rate": 1.3947990543735226e-06, "loss": 0.7809, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.6316174176009817, "learning_rate": 1.418439716312057e-06, "loss": 0.6602, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.3577560926340504, "learning_rate": 1.4420803782505912e-06, "loss": 0.7189, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.051199436921334, "learning_rate": 1.4657210401891254e-06, "loss": 0.6896, "step": 62 }, { "epoch": 0.0, "grad_norm": 2.1557201165820703, "learning_rate": 1.4893617021276596e-06, "loss": 0.7027, "step": 63 }, { "epoch": 0.0, "grad_norm": 2.19275007266616, "learning_rate": 1.513002364066194e-06, "loss": 0.7195, "step": 64 }, { "epoch": 0.0, "grad_norm": 2.2503664757400452, "learning_rate": 1.5366430260047282e-06, "loss": 0.7333, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.9571027147559699, "learning_rate": 1.5602836879432626e-06, "loss": 0.6952, "step": 66 }, { "epoch": 0.0, "grad_norm": 2.145083354315299, "learning_rate": 1.583924349881797e-06, "loss": 0.7175, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.8462498834637493, "learning_rate": 1.6075650118203312e-06, "loss": 0.8097, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.0321790558242383, "learning_rate": 1.6312056737588656e-06, "loss": 0.6543, "step": 69 }, { "epoch": 0.0, "grad_norm": 2.12526700475727, "learning_rate": 1.6548463356973996e-06, "loss": 0.707, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.309430782589029, "learning_rate": 1.678486997635934e-06, "loss": 0.6983, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.9668279965506479, "learning_rate": 1.7021276595744682e-06, "loss": 0.754, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.7288353278623705, "learning_rate": 1.7257683215130026e-06, "loss": 0.4241, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.697370867137166, "learning_rate": 1.7494089834515368e-06, "loss": 0.6779, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.07421670502202, "learning_rate": 1.7730496453900712e-06, "loss": 0.6492, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.138915505095088, "learning_rate": 1.7966903073286054e-06, "loss": 0.7194, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.9771665655126607, "learning_rate": 1.8203309692671398e-06, "loss": 0.7739, "step": 77 }, { "epoch": 0.01, "grad_norm": 3.391421759862296, "learning_rate": 1.8439716312056737e-06, "loss": 0.7211, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.2353836099243853, "learning_rate": 1.8676122931442081e-06, "loss": 0.7786, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.9268430730450932, "learning_rate": 1.8912529550827423e-06, "loss": 0.6035, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.1840594946106324, "learning_rate": 1.9148936170212767e-06, "loss": 0.6269, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.5120810844420296, "learning_rate": 1.938534278959811e-06, "loss": 0.7292, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.089928020757947, "learning_rate": 1.9621749408983455e-06, "loss": 0.6384, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.5159637071868013, "learning_rate": 1.9858156028368797e-06, "loss": 0.6891, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.2092160263245137, "learning_rate": 2.009456264775414e-06, "loss": 0.728, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.976966258807257, "learning_rate": 2.033096926713948e-06, "loss": 0.6447, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.394943049290893, "learning_rate": 2.0567375886524823e-06, "loss": 0.6845, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.5464700605671347, "learning_rate": 2.0803782505910165e-06, "loss": 0.761, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.85921586081652, "learning_rate": 2.104018912529551e-06, "loss": 0.7155, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.119553769728487, "learning_rate": 2.1276595744680853e-06, "loss": 0.6555, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.0326435937348313, "learning_rate": 2.1513002364066195e-06, "loss": 0.6183, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.185959844338953, "learning_rate": 2.174940898345154e-06, "loss": 0.6494, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.0629475084059496, "learning_rate": 2.1985815602836883e-06, "loss": 0.6943, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.1226083857699036, "learning_rate": 2.222222222222222e-06, "loss": 0.6199, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.1156533869945293, "learning_rate": 2.2458628841607567e-06, "loss": 0.7093, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.7452190102948758, "learning_rate": 2.269503546099291e-06, "loss": 0.395, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.0434874621946433, "learning_rate": 2.293144208037825e-06, "loss": 0.56, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.7957635107147354, "learning_rate": 2.3167848699763597e-06, "loss": 0.4229, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.183195640197295, "learning_rate": 2.340425531914894e-06, "loss": 0.708, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.149757424731977, "learning_rate": 2.364066193853428e-06, "loss": 0.6563, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.192369099217086, "learning_rate": 2.3877068557919627e-06, "loss": 0.6403, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.9478973010204859, "learning_rate": 2.4113475177304965e-06, "loss": 0.6792, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.8758522721544448, "learning_rate": 2.4349881796690306e-06, "loss": 0.6538, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.7754239938958422, "learning_rate": 2.4586288416075653e-06, "loss": 0.3938, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.029679982723797, "learning_rate": 2.4822695035460995e-06, "loss": 0.6356, "step": 105 }, { "epoch": 0.01, "grad_norm": 3.3219276832674427, "learning_rate": 2.5059101654846336e-06, "loss": 0.7016, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.321111196586404, "learning_rate": 2.529550827423168e-06, "loss": 0.641, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.8213641361459265, "learning_rate": 2.553191489361702e-06, "loss": 0.6369, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.066186222098806, "learning_rate": 2.5768321513002366e-06, "loss": 0.7249, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.0075314634026085, "learning_rate": 2.600472813238771e-06, "loss": 0.6985, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.009095582419765, "learning_rate": 2.624113475177305e-06, "loss": 0.651, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.8769117515328484, "learning_rate": 2.6477541371158392e-06, "loss": 0.6231, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.349295938224509, "learning_rate": 2.671394799054374e-06, "loss": 0.8078, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.392344586533091, "learning_rate": 2.695035460992908e-06, "loss": 0.7138, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.9772353553735873, "learning_rate": 2.7186761229314422e-06, "loss": 0.6069, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.2605075851878067, "learning_rate": 2.742316784869977e-06, "loss": 0.6807, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.0221386736618547, "learning_rate": 2.765957446808511e-06, "loss": 0.6383, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.8449815904598856, "learning_rate": 2.7895981087470452e-06, "loss": 0.6347, "step": 118 }, { "epoch": 0.01, "grad_norm": 7.182608803469407, "learning_rate": 2.8132387706855794e-06, "loss": 0.6879, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.561970103209323, "learning_rate": 2.836879432624114e-06, "loss": 0.6197, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.659226172166337, "learning_rate": 2.8605200945626482e-06, "loss": 0.5801, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.8008052054278582, "learning_rate": 2.8841607565011824e-06, "loss": 0.4485, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.159645629054842, "learning_rate": 2.907801418439716e-06, "loss": 0.7098, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.4491117080711446, "learning_rate": 2.931442080378251e-06, "loss": 0.7218, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.5344555213359006, "learning_rate": 2.955082742316785e-06, "loss": 0.7083, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.802208160376501, "learning_rate": 2.978723404255319e-06, "loss": 0.6903, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.401066542997042, "learning_rate": 3.0023640661938534e-06, "loss": 0.5544, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.0520881776094186, "learning_rate": 3.026004728132388e-06, "loss": 0.5865, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.02432126603985, "learning_rate": 3.049645390070922e-06, "loss": 0.6963, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.121749012154649, "learning_rate": 3.0732860520094564e-06, "loss": 0.6213, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.256329056344549, "learning_rate": 3.096926713947991e-06, "loss": 0.6434, "step": 131 }, { "epoch": 0.01, "grad_norm": 3.7872109464815242, "learning_rate": 3.120567375886525e-06, "loss": 0.6728, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.006021966296559, "learning_rate": 3.1442080378250594e-06, "loss": 0.6654, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.917709437476842, "learning_rate": 3.167848699763594e-06, "loss": 0.5955, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.1049139535388797, "learning_rate": 3.191489361702128e-06, "loss": 0.6252, "step": 135 }, { "epoch": 0.01, "grad_norm": 2.4296111270711354, "learning_rate": 3.2151300236406624e-06, "loss": 0.6514, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.15678878363444, "learning_rate": 3.2387706855791966e-06, "loss": 0.6498, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.619197676015908, "learning_rate": 3.262411347517731e-06, "loss": 0.6464, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.13284102430054, "learning_rate": 3.286052009456265e-06, "loss": 0.6549, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.6616228935224164, "learning_rate": 3.309692671394799e-06, "loss": 0.608, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.1618771571085063, "learning_rate": 3.3333333333333333e-06, "loss": 0.6672, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.169749292188095, "learning_rate": 3.356973995271868e-06, "loss": 0.6685, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.8560454137735654, "learning_rate": 3.380614657210402e-06, "loss": 0.6295, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.768424806113618, "learning_rate": 3.4042553191489363e-06, "loss": 0.6793, "step": 144 }, { "epoch": 0.01, "grad_norm": 2.1661559577935954, "learning_rate": 3.4278959810874705e-06, "loss": 0.6656, "step": 145 }, { "epoch": 0.01, "grad_norm": 2.3715922653346153, "learning_rate": 3.451536643026005e-06, "loss": 0.6461, "step": 146 }, { "epoch": 0.01, "grad_norm": 3.948531044351373, "learning_rate": 3.4751773049645393e-06, "loss": 0.697, "step": 147 }, { "epoch": 0.01, "grad_norm": 2.2048575774614254, "learning_rate": 3.4988179669030735e-06, "loss": 0.6424, "step": 148 }, { "epoch": 0.01, "grad_norm": 2.6250354018953552, "learning_rate": 3.522458628841608e-06, "loss": 0.6753, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.9528489349432132, "learning_rate": 3.5460992907801423e-06, "loss": 0.5661, "step": 150 }, { "epoch": 0.01, "grad_norm": 6.686830835718987, "learning_rate": 3.5697399527186765e-06, "loss": 0.5791, "step": 151 }, { "epoch": 0.01, "grad_norm": 2.1607801468413625, "learning_rate": 3.5933806146572107e-06, "loss": 0.6341, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.9659459506612003, "learning_rate": 3.6170212765957453e-06, "loss": 0.6487, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.9634120209876291, "learning_rate": 3.6406619385342795e-06, "loss": 0.712, "step": 154 }, { "epoch": 0.01, "grad_norm": 2.3123339067049553, "learning_rate": 3.6643026004728133e-06, "loss": 0.6519, "step": 155 }, { "epoch": 0.01, "grad_norm": 2.036683460177132, "learning_rate": 3.6879432624113475e-06, "loss": 0.6476, "step": 156 }, { "epoch": 0.01, "grad_norm": 2.275478462736982, "learning_rate": 3.711583924349882e-06, "loss": 0.6598, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.0982463942842586, "learning_rate": 3.7352245862884163e-06, "loss": 0.6339, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.0645188410665365, "learning_rate": 3.7588652482269505e-06, "loss": 0.6329, "step": 159 }, { "epoch": 0.01, "grad_norm": 0.7775022337559055, "learning_rate": 3.7825059101654847e-06, "loss": 0.4324, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.148020865745057, "learning_rate": 3.8061465721040193e-06, "loss": 0.6783, "step": 161 }, { "epoch": 0.01, "grad_norm": 2.4111658323753002, "learning_rate": 3.8297872340425535e-06, "loss": 0.7128, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.3040776568311907, "learning_rate": 3.853427895981088e-06, "loss": 0.6391, "step": 163 }, { "epoch": 0.01, "grad_norm": 2.248369138677927, "learning_rate": 3.877068557919622e-06, "loss": 0.6806, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.832519970664543, "learning_rate": 3.9007092198581565e-06, "loss": 0.4929, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.9986252116661374, "learning_rate": 3.924349881796691e-06, "loss": 0.6269, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.2030260568042612, "learning_rate": 3.947990543735225e-06, "loss": 0.6258, "step": 167 }, { "epoch": 0.01, "grad_norm": 2.1303847063562737, "learning_rate": 3.9716312056737595e-06, "loss": 0.6312, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.8692761665553068, "learning_rate": 3.995271867612294e-06, "loss": 0.3962, "step": 169 }, { "epoch": 0.01, "grad_norm": 3.4747041382838306, "learning_rate": 4.018912529550828e-06, "loss": 0.605, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.000020680430264, "learning_rate": 4.042553191489362e-06, "loss": 0.6253, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.9561153891124494, "learning_rate": 4.066193853427896e-06, "loss": 0.5603, "step": 172 }, { "epoch": 0.01, "grad_norm": 2.231992534936014, "learning_rate": 4.08983451536643e-06, "loss": 0.6216, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.8735804858344745, "learning_rate": 4.113475177304965e-06, "loss": 0.5307, "step": 174 }, { "epoch": 0.01, "grad_norm": 2.2950329634408484, "learning_rate": 4.137115839243499e-06, "loss": 0.6554, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.7955483583206069, "learning_rate": 4.160756501182033e-06, "loss": 0.5008, "step": 176 }, { "epoch": 0.01, "grad_norm": 2.065040736707046, "learning_rate": 4.184397163120568e-06, "loss": 0.6066, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.9176646303520286, "learning_rate": 4.208037825059102e-06, "loss": 0.438, "step": 178 }, { "epoch": 0.01, "grad_norm": 2.423573513431105, "learning_rate": 4.231678486997636e-06, "loss": 0.6705, "step": 179 }, { "epoch": 0.01, "grad_norm": 3.912752947175479, "learning_rate": 4.255319148936171e-06, "loss": 0.6361, "step": 180 }, { "epoch": 0.01, "grad_norm": 2.142787170184617, "learning_rate": 4.278959810874705e-06, "loss": 0.5986, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.1243054517956446, "learning_rate": 4.302600472813239e-06, "loss": 0.6819, "step": 182 }, { "epoch": 0.01, "grad_norm": 6.0177085628374165, "learning_rate": 4.326241134751774e-06, "loss": 0.5869, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.4128712648456, "learning_rate": 4.349881796690308e-06, "loss": 0.6538, "step": 184 }, { "epoch": 0.01, "grad_norm": 2.6524776635188654, "learning_rate": 4.373522458628842e-06, "loss": 0.6518, "step": 185 }, { "epoch": 0.01, "grad_norm": 2.2102545510210057, "learning_rate": 4.397163120567377e-06, "loss": 0.5818, "step": 186 }, { "epoch": 0.01, "grad_norm": 2.8059648964021413, "learning_rate": 4.42080378250591e-06, "loss": 0.5986, "step": 187 }, { "epoch": 0.01, "grad_norm": 2.029891636654725, "learning_rate": 4.444444444444444e-06, "loss": 0.6285, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.7976156623824606, "learning_rate": 4.468085106382979e-06, "loss": 0.6518, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.8791569032450737, "learning_rate": 4.491725768321513e-06, "loss": 0.6251, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.604925920135495, "learning_rate": 4.515366430260047e-06, "loss": 0.7178, "step": 191 }, { "epoch": 0.01, "grad_norm": 2.479708832685261, "learning_rate": 4.539007092198582e-06, "loss": 0.6542, "step": 192 }, { "epoch": 0.01, "grad_norm": 4.2313100639606365, "learning_rate": 4.562647754137116e-06, "loss": 0.6172, "step": 193 }, { "epoch": 0.01, "grad_norm": 2.3372270218352504, "learning_rate": 4.58628841607565e-06, "loss": 0.5594, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.9384828561671921, "learning_rate": 4.609929078014185e-06, "loss": 0.4505, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.6589284000373308, "learning_rate": 4.633569739952719e-06, "loss": 0.5785, "step": 196 }, { "epoch": 0.01, "grad_norm": 2.670232525491877, "learning_rate": 4.657210401891253e-06, "loss": 0.6324, "step": 197 }, { "epoch": 0.01, "grad_norm": 6.875992556481169, "learning_rate": 4.680851063829788e-06, "loss": 0.698, "step": 198 }, { "epoch": 0.01, "grad_norm": 2.305184666203248, "learning_rate": 4.704491725768322e-06, "loss": 0.6553, "step": 199 }, { "epoch": 0.01, "grad_norm": 2.254527590111259, "learning_rate": 4.728132387706856e-06, "loss": 0.6835, "step": 200 }, { "epoch": 0.01, "grad_norm": 2.159689917649488, "learning_rate": 4.751773049645391e-06, "loss": 0.617, "step": 201 }, { "epoch": 0.01, "grad_norm": 2.0960135032150142, "learning_rate": 4.775413711583925e-06, "loss": 0.6538, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.0146064500883962, "learning_rate": 4.799054373522459e-06, "loss": 0.4311, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.8639200163765812, "learning_rate": 4.822695035460993e-06, "loss": 0.5789, "step": 204 }, { "epoch": 0.01, "grad_norm": 2.352322437698567, "learning_rate": 4.8463356973995275e-06, "loss": 0.584, "step": 205 }, { "epoch": 0.01, "grad_norm": 2.0169475662646694, "learning_rate": 4.869976359338061e-06, "loss": 0.5818, "step": 206 }, { "epoch": 0.01, "grad_norm": 2.042454197711281, "learning_rate": 4.893617021276596e-06, "loss": 0.5529, "step": 207 }, { "epoch": 0.01, "grad_norm": 3.0202756196873866, "learning_rate": 4.9172576832151305e-06, "loss": 0.63, "step": 208 }, { "epoch": 0.01, "grad_norm": 2.2172392101445784, "learning_rate": 4.940898345153664e-06, "loss": 0.6311, "step": 209 }, { "epoch": 0.01, "grad_norm": 2.431770103937424, "learning_rate": 4.964539007092199e-06, "loss": 0.741, "step": 210 }, { "epoch": 0.01, "grad_norm": 2.0743363874139136, "learning_rate": 4.9881796690307335e-06, "loss": 0.6057, "step": 211 }, { "epoch": 0.02, "grad_norm": 2.1884757140997397, "learning_rate": 5.011820330969267e-06, "loss": 0.6588, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.040171022578918, "learning_rate": 5.035460992907801e-06, "loss": 0.6113, "step": 213 }, { "epoch": 0.02, "grad_norm": 2.1314100402836016, "learning_rate": 5.059101654846336e-06, "loss": 0.6767, "step": 214 }, { "epoch": 0.02, "grad_norm": 2.0336404153087533, "learning_rate": 5.08274231678487e-06, "loss": 0.6434, "step": 215 }, { "epoch": 0.02, "grad_norm": 2.3218487362109674, "learning_rate": 5.106382978723404e-06, "loss": 0.594, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.1145117259166986, "learning_rate": 5.130023640661939e-06, "loss": 0.6236, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.8190050333578367, "learning_rate": 5.153664302600473e-06, "loss": 0.5533, "step": 218 }, { "epoch": 0.02, "grad_norm": 2.0695476922562404, "learning_rate": 5.177304964539007e-06, "loss": 0.5921, "step": 219 }, { "epoch": 0.02, "grad_norm": 4.022709173960421, "learning_rate": 5.200945626477542e-06, "loss": 0.6494, "step": 220 }, { "epoch": 0.02, "grad_norm": 2.0750349216859765, "learning_rate": 5.2245862884160754e-06, "loss": 0.5544, "step": 221 }, { "epoch": 0.02, "grad_norm": 2.048908153034241, "learning_rate": 5.24822695035461e-06, "loss": 0.6785, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.926955254633487, "learning_rate": 5.271867612293145e-06, "loss": 0.6023, "step": 223 }, { "epoch": 0.02, "grad_norm": 0.8322456725055736, "learning_rate": 5.2955082742316784e-06, "loss": 0.433, "step": 224 }, { "epoch": 0.02, "grad_norm": 0.9526387888660328, "learning_rate": 5.319148936170213e-06, "loss": 0.4184, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.279764837851014, "learning_rate": 5.342789598108748e-06, "loss": 0.6319, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.9231079260218826, "learning_rate": 5.3664302600472814e-06, "loss": 0.7165, "step": 227 }, { "epoch": 0.02, "grad_norm": 2.5359330229169426, "learning_rate": 5.390070921985816e-06, "loss": 0.6826, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.3626667464121374, "learning_rate": 5.413711583924351e-06, "loss": 0.6322, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.152996996023829, "learning_rate": 5.4373522458628844e-06, "loss": 0.616, "step": 230 }, { "epoch": 0.02, "grad_norm": 1.7480873509891672, "learning_rate": 5.460992907801419e-06, "loss": 0.5359, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.92330487822303, "learning_rate": 5.484633569739954e-06, "loss": 0.6141, "step": 232 }, { "epoch": 0.02, "grad_norm": 2.4040968360728185, "learning_rate": 5.5082742316784874e-06, "loss": 0.562, "step": 233 }, { "epoch": 0.02, "grad_norm": 2.116920489934228, "learning_rate": 5.531914893617022e-06, "loss": 0.6639, "step": 234 }, { "epoch": 0.02, "grad_norm": 2.222744562531556, "learning_rate": 5.555555555555557e-06, "loss": 0.6301, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.7979351889416966, "learning_rate": 5.5791962174940904e-06, "loss": 0.5751, "step": 236 }, { "epoch": 0.02, "grad_norm": 5.173884141866729, "learning_rate": 5.602836879432625e-06, "loss": 0.6598, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.4420450784075487, "learning_rate": 5.626477541371159e-06, "loss": 0.6309, "step": 238 }, { "epoch": 0.02, "grad_norm": 2.425918388030342, "learning_rate": 5.6501182033096934e-06, "loss": 0.6037, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.7341447896216793, "learning_rate": 5.673758865248228e-06, "loss": 0.6353, "step": 240 }, { "epoch": 0.02, "grad_norm": 2.3018966088550648, "learning_rate": 5.697399527186762e-06, "loss": 0.64, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.3374635210524473, "learning_rate": 5.7210401891252964e-06, "loss": 0.4235, "step": 242 }, { "epoch": 0.02, "grad_norm": 2.282575378237286, "learning_rate": 5.744680851063831e-06, "loss": 0.5711, "step": 243 }, { "epoch": 0.02, "grad_norm": 2.241359881772414, "learning_rate": 5.768321513002365e-06, "loss": 0.7367, "step": 244 }, { "epoch": 0.02, "grad_norm": 0.9589976522347291, "learning_rate": 5.791962174940899e-06, "loss": 0.4299, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.744917319264682, "learning_rate": 5.815602836879432e-06, "loss": 0.622, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.8207386108766157, "learning_rate": 5.839243498817967e-06, "loss": 0.5983, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.8499861172988363, "learning_rate": 5.862884160756502e-06, "loss": 0.612, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.8691772168200202, "learning_rate": 5.886524822695035e-06, "loss": 0.5319, "step": 249 }, { "epoch": 0.02, "grad_norm": 2.6151822742700492, "learning_rate": 5.91016548463357e-06, "loss": 0.647, "step": 250 }, { "epoch": 0.02, "grad_norm": 2.4267246005298952, "learning_rate": 5.933806146572105e-06, "loss": 0.5778, "step": 251 }, { "epoch": 0.02, "grad_norm": 2.4965085672937533, "learning_rate": 5.957446808510638e-06, "loss": 0.6263, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.9438471287419556, "learning_rate": 5.981087470449173e-06, "loss": 0.6042, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.0685659390083897, "learning_rate": 6.004728132387707e-06, "loss": 0.4295, "step": 254 }, { "epoch": 0.02, "grad_norm": 3.027350347918465, "learning_rate": 6.028368794326241e-06, "loss": 0.6276, "step": 255 }, { "epoch": 0.02, "grad_norm": 2.229266355135027, "learning_rate": 6.052009456264776e-06, "loss": 0.595, "step": 256 }, { "epoch": 0.02, "grad_norm": 2.2132558520570584, "learning_rate": 6.07565011820331e-06, "loss": 0.5963, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.953861668104456, "learning_rate": 6.099290780141844e-06, "loss": 0.6, "step": 258 }, { "epoch": 0.02, "grad_norm": 2.1426793603173016, "learning_rate": 6.122931442080379e-06, "loss": 0.6653, "step": 259 }, { "epoch": 0.02, "grad_norm": 2.542720484713686, "learning_rate": 6.146572104018913e-06, "loss": 0.5929, "step": 260 }, { "epoch": 0.02, "grad_norm": 3.3792015730201044, "learning_rate": 6.170212765957447e-06, "loss": 0.6423, "step": 261 }, { "epoch": 0.02, "grad_norm": 3.0640976142489302, "learning_rate": 6.193853427895982e-06, "loss": 0.5904, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.9350334199356625, "learning_rate": 6.217494089834516e-06, "loss": 0.6447, "step": 263 }, { "epoch": 0.02, "grad_norm": 2.3687732731010915, "learning_rate": 6.24113475177305e-06, "loss": 0.6821, "step": 264 }, { "epoch": 0.02, "grad_norm": 0.8943668043857836, "learning_rate": 6.264775413711585e-06, "loss": 0.4209, "step": 265 }, { "epoch": 0.02, "grad_norm": 2.1194396436028202, "learning_rate": 6.288416075650119e-06, "loss": 0.5754, "step": 266 }, { "epoch": 0.02, "grad_norm": 2.0594708970722646, "learning_rate": 6.312056737588653e-06, "loss": 0.7548, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.3497058443209657, "learning_rate": 6.335697399527188e-06, "loss": 0.6459, "step": 268 }, { "epoch": 0.02, "grad_norm": 0.8400306351135198, "learning_rate": 6.359338061465722e-06, "loss": 0.4111, "step": 269 }, { "epoch": 0.02, "grad_norm": 2.929830230212635, "learning_rate": 6.382978723404256e-06, "loss": 0.6433, "step": 270 }, { "epoch": 0.02, "grad_norm": 2.9675168606594404, "learning_rate": 6.40661938534279e-06, "loss": 0.64, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.9387238360710977, "learning_rate": 6.430260047281325e-06, "loss": 0.6548, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.866164478305682, "learning_rate": 6.453900709219859e-06, "loss": 0.628, "step": 273 }, { "epoch": 0.02, "grad_norm": 0.884236064552695, "learning_rate": 6.477541371158393e-06, "loss": 0.4411, "step": 274 }, { "epoch": 0.02, "grad_norm": 2.55431358080111, "learning_rate": 6.501182033096928e-06, "loss": 0.631, "step": 275 }, { "epoch": 0.02, "grad_norm": 2.319321607416945, "learning_rate": 6.524822695035462e-06, "loss": 0.6148, "step": 276 }, { "epoch": 0.02, "grad_norm": 2.117724538033564, "learning_rate": 6.548463356973995e-06, "loss": 0.6242, "step": 277 }, { "epoch": 0.02, "grad_norm": 0.87811584005566, "learning_rate": 6.57210401891253e-06, "loss": 0.4284, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.9310970615008756, "learning_rate": 6.595744680851064e-06, "loss": 0.5737, "step": 279 }, { "epoch": 0.02, "grad_norm": 2.955230745343112, "learning_rate": 6.619385342789598e-06, "loss": 0.6007, "step": 280 }, { "epoch": 0.02, "grad_norm": 6.078262572457781, "learning_rate": 6.643026004728133e-06, "loss": 0.6707, "step": 281 }, { "epoch": 0.02, "grad_norm": 2.696540506411694, "learning_rate": 6.666666666666667e-06, "loss": 0.6274, "step": 282 }, { "epoch": 0.02, "grad_norm": 2.1264118683709414, "learning_rate": 6.690307328605201e-06, "loss": 0.5839, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.9635434754522751, "learning_rate": 6.713947990543736e-06, "loss": 0.6305, "step": 284 }, { "epoch": 0.02, "grad_norm": 2.8179530548994656, "learning_rate": 6.73758865248227e-06, "loss": 0.6109, "step": 285 }, { "epoch": 0.02, "grad_norm": 2.147283340542664, "learning_rate": 6.761229314420804e-06, "loss": 0.6575, "step": 286 }, { "epoch": 0.02, "grad_norm": 2.046292573560414, "learning_rate": 6.784869976359338e-06, "loss": 0.6166, "step": 287 }, { "epoch": 0.02, "grad_norm": 2.2349979813086063, "learning_rate": 6.808510638297873e-06, "loss": 0.5921, "step": 288 }, { "epoch": 0.02, "grad_norm": 6.011143874573497, "learning_rate": 6.832151300236407e-06, "loss": 0.6377, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.7149223117749663, "learning_rate": 6.855791962174941e-06, "loss": 0.5793, "step": 290 }, { "epoch": 0.02, "grad_norm": 2.6870360084163663, "learning_rate": 6.879432624113476e-06, "loss": 0.6359, "step": 291 }, { "epoch": 0.02, "grad_norm": 2.509501069401399, "learning_rate": 6.90307328605201e-06, "loss": 0.6048, "step": 292 }, { "epoch": 0.02, "grad_norm": 2.2675136287944646, "learning_rate": 6.926713947990544e-06, "loss": 0.6566, "step": 293 }, { "epoch": 0.02, "grad_norm": 2.30047382654215, "learning_rate": 6.950354609929079e-06, "loss": 0.6292, "step": 294 }, { "epoch": 0.02, "grad_norm": 2.0424336013253064, "learning_rate": 6.973995271867613e-06, "loss": 0.6222, "step": 295 }, { "epoch": 0.02, "grad_norm": 2.0412213008724183, "learning_rate": 6.997635933806147e-06, "loss": 0.5894, "step": 296 }, { "epoch": 0.02, "grad_norm": 3.3417543290975713, "learning_rate": 7.021276595744682e-06, "loss": 0.5642, "step": 297 }, { "epoch": 0.02, "grad_norm": 2.403106512160002, "learning_rate": 7.044917257683216e-06, "loss": 0.5829, "step": 298 }, { "epoch": 0.02, "grad_norm": 2.090566868722372, "learning_rate": 7.06855791962175e-06, "loss": 0.6031, "step": 299 }, { "epoch": 0.02, "grad_norm": 2.1777723014145836, "learning_rate": 7.092198581560285e-06, "loss": 0.5388, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.748322187303465, "learning_rate": 7.115839243498818e-06, "loss": 0.6316, "step": 301 }, { "epoch": 0.02, "grad_norm": 2.3467489343001207, "learning_rate": 7.139479905437353e-06, "loss": 0.5795, "step": 302 }, { "epoch": 0.02, "grad_norm": 2.6047512781678734, "learning_rate": 7.163120567375888e-06, "loss": 0.6116, "step": 303 }, { "epoch": 0.02, "grad_norm": 2.1125782871518317, "learning_rate": 7.186761229314421e-06, "loss": 0.644, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.993291878592755, "learning_rate": 7.210401891252956e-06, "loss": 0.6822, "step": 305 }, { "epoch": 0.02, "grad_norm": 2.019692812487218, "learning_rate": 7.234042553191491e-06, "loss": 0.6087, "step": 306 }, { "epoch": 0.02, "grad_norm": 2.336444997291636, "learning_rate": 7.257683215130024e-06, "loss": 0.5787, "step": 307 }, { "epoch": 0.02, "grad_norm": 2.0850134094412023, "learning_rate": 7.281323877068559e-06, "loss": 0.6551, "step": 308 }, { "epoch": 0.02, "grad_norm": 2.4225300082135353, "learning_rate": 7.304964539007094e-06, "loss": 0.6174, "step": 309 }, { "epoch": 0.02, "grad_norm": 2.583401647353592, "learning_rate": 7.3286052009456266e-06, "loss": 0.6061, "step": 310 }, { "epoch": 0.02, "grad_norm": 2.6144572594636575, "learning_rate": 7.352245862884161e-06, "loss": 0.6351, "step": 311 }, { "epoch": 0.02, "grad_norm": 2.34160821928125, "learning_rate": 7.375886524822695e-06, "loss": 0.6232, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.8616464175081722, "learning_rate": 7.3995271867612296e-06, "loss": 0.5718, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.8393024740834645, "learning_rate": 7.423167848699764e-06, "loss": 0.634, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.984175163360553, "learning_rate": 7.446808510638298e-06, "loss": 0.6588, "step": 315 }, { "epoch": 0.02, "grad_norm": 4.970645907307493, "learning_rate": 7.4704491725768326e-06, "loss": 0.5868, "step": 316 }, { "epoch": 0.02, "grad_norm": 2.170123670941602, "learning_rate": 7.494089834515367e-06, "loss": 0.5987, "step": 317 }, { "epoch": 0.02, "grad_norm": 2.291316350579009, "learning_rate": 7.517730496453901e-06, "loss": 0.5691, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.9967141003084057, "learning_rate": 7.5413711583924356e-06, "loss": 0.585, "step": 319 }, { "epoch": 0.02, "grad_norm": 2.205025391781694, "learning_rate": 7.565011820330969e-06, "loss": 0.7242, "step": 320 }, { "epoch": 0.02, "grad_norm": 2.143973978339484, "learning_rate": 7.588652482269504e-06, "loss": 0.5935, "step": 321 }, { "epoch": 0.02, "grad_norm": 0.9475879315260549, "learning_rate": 7.6122931442080386e-06, "loss": 0.4418, "step": 322 }, { "epoch": 0.02, "grad_norm": 0.94530812902598, "learning_rate": 7.635933806146573e-06, "loss": 0.446, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.8266913056807288, "learning_rate": 7.659574468085107e-06, "loss": 0.7063, "step": 324 }, { "epoch": 0.02, "grad_norm": 2.463706612258893, "learning_rate": 7.68321513002364e-06, "loss": 0.5881, "step": 325 }, { "epoch": 0.02, "grad_norm": 2.604791553022805, "learning_rate": 7.706855791962176e-06, "loss": 0.6763, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.8924654838469481, "learning_rate": 7.73049645390071e-06, "loss": 0.6021, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.6238009898913615, "learning_rate": 7.754137115839244e-06, "loss": 0.5749, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.0355786303738426, "learning_rate": 7.77777777777778e-06, "loss": 0.5809, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.9207474085031437, "learning_rate": 7.801418439716313e-06, "loss": 0.5797, "step": 330 }, { "epoch": 0.02, "grad_norm": 2.1143960443693732, "learning_rate": 7.825059101654847e-06, "loss": 0.6487, "step": 331 }, { "epoch": 0.02, "grad_norm": 2.033327914147217, "learning_rate": 7.848699763593382e-06, "loss": 0.6823, "step": 332 }, { "epoch": 0.02, "grad_norm": 2.1158852684356924, "learning_rate": 7.872340425531916e-06, "loss": 0.6055, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.880102422329663, "learning_rate": 7.89598108747045e-06, "loss": 0.6515, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.9025149898387945, "learning_rate": 7.919621749408985e-06, "loss": 0.5768, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.1549472944395816, "learning_rate": 7.943262411347519e-06, "loss": 0.6651, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.7436315644493636, "learning_rate": 7.966903073286053e-06, "loss": 0.6324, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.7910781595848788, "learning_rate": 7.990543735224588e-06, "loss": 0.6185, "step": 338 }, { "epoch": 0.02, "grad_norm": 2.671234577697105, "learning_rate": 8.014184397163122e-06, "loss": 0.6658, "step": 339 }, { "epoch": 0.02, "grad_norm": 3.1615541550364177, "learning_rate": 8.037825059101656e-06, "loss": 0.5478, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.1265840907786644, "learning_rate": 8.061465721040191e-06, "loss": 0.441, "step": 341 }, { "epoch": 0.02, "grad_norm": 3.6930899183772192, "learning_rate": 8.085106382978723e-06, "loss": 0.7241, "step": 342 }, { "epoch": 0.02, "grad_norm": 2.1981823695190608, "learning_rate": 8.108747044917257e-06, "loss": 0.5729, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.1312927140441005, "learning_rate": 8.132387706855792e-06, "loss": 0.5973, "step": 344 }, { "epoch": 0.02, "grad_norm": 2.557075663219547, "learning_rate": 8.156028368794326e-06, "loss": 0.5948, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.8753559873805663, "learning_rate": 8.17966903073286e-06, "loss": 0.4182, "step": 346 }, { "epoch": 0.02, "grad_norm": 2.3843164064101305, "learning_rate": 8.203309692671395e-06, "loss": 0.7099, "step": 347 }, { "epoch": 0.02, "grad_norm": 2.226871693466023, "learning_rate": 8.22695035460993e-06, "loss": 0.5838, "step": 348 }, { "epoch": 0.02, "grad_norm": 2.1076826836049203, "learning_rate": 8.250591016548463e-06, "loss": 0.6022, "step": 349 }, { "epoch": 0.02, "grad_norm": 2.380663233509458, "learning_rate": 8.274231678486998e-06, "loss": 0.6895, "step": 350 }, { "epoch": 0.02, "grad_norm": 2.1910461963761962, "learning_rate": 8.297872340425532e-06, "loss": 0.6299, "step": 351 }, { "epoch": 0.02, "grad_norm": 2.1492939857674713, "learning_rate": 8.321513002364066e-06, "loss": 0.6809, "step": 352 }, { "epoch": 0.03, "grad_norm": 2.04669671603823, "learning_rate": 8.345153664302601e-06, "loss": 0.5487, "step": 353 }, { "epoch": 0.03, "grad_norm": 3.193631763129984, "learning_rate": 8.368794326241135e-06, "loss": 0.6054, "step": 354 }, { "epoch": 0.03, "grad_norm": 2.07281163950364, "learning_rate": 8.392434988179669e-06, "loss": 0.5948, "step": 355 }, { "epoch": 0.03, "grad_norm": 2.4745230634498143, "learning_rate": 8.416075650118204e-06, "loss": 0.6859, "step": 356 }, { "epoch": 0.03, "grad_norm": 0.8582869660609473, "learning_rate": 8.439716312056738e-06, "loss": 0.4304, "step": 357 }, { "epoch": 0.03, "grad_norm": 2.2129767939103395, "learning_rate": 8.463356973995272e-06, "loss": 0.5606, "step": 358 }, { "epoch": 0.03, "grad_norm": 2.263933634811897, "learning_rate": 8.486997635933807e-06, "loss": 0.6006, "step": 359 }, { "epoch": 0.03, "grad_norm": 1.9323600318164587, "learning_rate": 8.510638297872341e-06, "loss": 0.6188, "step": 360 }, { "epoch": 0.03, "grad_norm": 4.005975598107289, "learning_rate": 8.534278959810875e-06, "loss": 0.5893, "step": 361 }, { "epoch": 0.03, "grad_norm": 2.127423157847264, "learning_rate": 8.55791962174941e-06, "loss": 0.5931, "step": 362 }, { "epoch": 0.03, "grad_norm": 2.1910497480451565, "learning_rate": 8.581560283687944e-06, "loss": 0.6218, "step": 363 }, { "epoch": 0.03, "grad_norm": 2.1184685435170785, "learning_rate": 8.605200945626478e-06, "loss": 0.6796, "step": 364 }, { "epoch": 0.03, "grad_norm": 1.8487136254519403, "learning_rate": 8.628841607565013e-06, "loss": 0.6597, "step": 365 }, { "epoch": 0.03, "grad_norm": 0.9038437362243, "learning_rate": 8.652482269503547e-06, "loss": 0.4437, "step": 366 }, { "epoch": 0.03, "grad_norm": 2.5798214198073794, "learning_rate": 8.676122931442081e-06, "loss": 0.5401, "step": 367 }, { "epoch": 0.03, "grad_norm": 2.3925206905682925, "learning_rate": 8.699763593380616e-06, "loss": 0.5959, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.9968658815156681, "learning_rate": 8.72340425531915e-06, "loss": 0.6103, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.8336797492105232, "learning_rate": 8.747044917257684e-06, "loss": 0.6316, "step": 370 }, { "epoch": 0.03, "grad_norm": 2.529576850073482, "learning_rate": 8.77068557919622e-06, "loss": 0.6745, "step": 371 }, { "epoch": 0.03, "grad_norm": 1.929251438954493, "learning_rate": 8.794326241134753e-06, "loss": 0.6582, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.8891939284759103, "learning_rate": 8.817966903073287e-06, "loss": 0.6353, "step": 373 }, { "epoch": 0.03, "grad_norm": 2.2843901750783764, "learning_rate": 8.84160756501182e-06, "loss": 0.5779, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.9352115203970868, "learning_rate": 8.865248226950355e-06, "loss": 0.6481, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.7573679707082033, "learning_rate": 8.888888888888888e-06, "loss": 0.6559, "step": 376 }, { "epoch": 0.03, "grad_norm": 2.6362787419821148, "learning_rate": 8.912529550827424e-06, "loss": 0.5897, "step": 377 }, { "epoch": 0.03, "grad_norm": 2.6083820211608773, "learning_rate": 8.936170212765958e-06, "loss": 0.5808, "step": 378 }, { "epoch": 0.03, "grad_norm": 2.093429173347105, "learning_rate": 8.959810874704491e-06, "loss": 0.6066, "step": 379 }, { "epoch": 0.03, "grad_norm": 1.8522287764911876, "learning_rate": 8.983451536643027e-06, "loss": 0.6136, "step": 380 }, { "epoch": 0.03, "grad_norm": 1.9388692480217478, "learning_rate": 9.00709219858156e-06, "loss": 0.6412, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.1316123287352875, "learning_rate": 9.030732860520094e-06, "loss": 0.4386, "step": 382 }, { "epoch": 0.03, "grad_norm": 0.879938641379592, "learning_rate": 9.05437352245863e-06, "loss": 0.4433, "step": 383 }, { "epoch": 0.03, "grad_norm": 2.69067768010082, "learning_rate": 9.078014184397164e-06, "loss": 0.5895, "step": 384 }, { "epoch": 0.03, "grad_norm": 2.0209256009242886, "learning_rate": 9.101654846335697e-06, "loss": 0.5338, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.0397709911903443, "learning_rate": 9.125295508274233e-06, "loss": 0.4457, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.8782836923334245, "learning_rate": 9.148936170212767e-06, "loss": 0.6204, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.8481548702128392, "learning_rate": 9.1725768321513e-06, "loss": 0.5851, "step": 388 }, { "epoch": 0.03, "grad_norm": 2.049637721396946, "learning_rate": 9.196217494089836e-06, "loss": 0.6538, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.8603948156072645, "learning_rate": 9.21985815602837e-06, "loss": 0.5481, "step": 390 }, { "epoch": 0.03, "grad_norm": 2.068030725370351, "learning_rate": 9.243498817966903e-06, "loss": 0.6356, "step": 391 }, { "epoch": 0.03, "grad_norm": 2.6869762348526804, "learning_rate": 9.267139479905439e-06, "loss": 0.5658, "step": 392 }, { "epoch": 0.03, "grad_norm": 3.703195674165023, "learning_rate": 9.290780141843973e-06, "loss": 0.6712, "step": 393 }, { "epoch": 0.03, "grad_norm": 2.146561298913722, "learning_rate": 9.314420803782506e-06, "loss": 0.5824, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.8588813054324174, "learning_rate": 9.338061465721042e-06, "loss": 0.5665, "step": 395 }, { "epoch": 0.03, "grad_norm": 2.4400727930026456, "learning_rate": 9.361702127659576e-06, "loss": 0.5711, "step": 396 }, { "epoch": 0.03, "grad_norm": 2.420514059813741, "learning_rate": 9.38534278959811e-06, "loss": 0.7586, "step": 397 }, { "epoch": 0.03, "grad_norm": 2.106911929269598, "learning_rate": 9.408983451536645e-06, "loss": 0.6493, "step": 398 }, { "epoch": 0.03, "grad_norm": 2.072530122310079, "learning_rate": 9.432624113475179e-06, "loss": 0.5177, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.9551347240993509, "learning_rate": 9.456264775413712e-06, "loss": 0.5946, "step": 400 }, { "epoch": 0.03, "grad_norm": 2.409768171945308, "learning_rate": 9.479905437352248e-06, "loss": 0.5555, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.9317505225205758, "learning_rate": 9.503546099290782e-06, "loss": 0.5903, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.9306826865626512, "learning_rate": 9.527186761229315e-06, "loss": 0.5887, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.724141527162873, "learning_rate": 9.55082742316785e-06, "loss": 0.5717, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.129590676540281, "learning_rate": 9.574468085106385e-06, "loss": 0.4453, "step": 405 }, { "epoch": 0.03, "grad_norm": 2.6993176113650215, "learning_rate": 9.598108747044918e-06, "loss": 0.6613, "step": 406 }, { "epoch": 0.03, "grad_norm": 2.0887868782976944, "learning_rate": 9.621749408983452e-06, "loss": 0.591, "step": 407 }, { "epoch": 0.03, "grad_norm": 2.106431258762348, "learning_rate": 9.645390070921986e-06, "loss": 0.594, "step": 408 }, { "epoch": 0.03, "grad_norm": 2.1784688426064713, "learning_rate": 9.66903073286052e-06, "loss": 0.6298, "step": 409 }, { "epoch": 0.03, "grad_norm": 2.1721541266727655, "learning_rate": 9.692671394799055e-06, "loss": 0.6505, "step": 410 }, { "epoch": 0.03, "grad_norm": 2.1809630033542047, "learning_rate": 9.716312056737589e-06, "loss": 0.6297, "step": 411 }, { "epoch": 0.03, "grad_norm": 0.8424817420723397, "learning_rate": 9.739952718676123e-06, "loss": 0.4238, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.784788182359966, "learning_rate": 9.763593380614658e-06, "loss": 0.5542, "step": 413 }, { "epoch": 0.03, "grad_norm": 2.4377206466890113, "learning_rate": 9.787234042553192e-06, "loss": 0.663, "step": 414 }, { "epoch": 0.03, "grad_norm": 2.050555734404037, "learning_rate": 9.810874704491726e-06, "loss": 0.6063, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.9668111735131868, "learning_rate": 9.834515366430261e-06, "loss": 0.5514, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.859227133343033, "learning_rate": 9.858156028368795e-06, "loss": 0.6234, "step": 417 }, { "epoch": 0.03, "grad_norm": 4.700581455890908, "learning_rate": 9.881796690307329e-06, "loss": 0.5857, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.9895807476281837, "learning_rate": 9.905437352245864e-06, "loss": 0.6442, "step": 419 }, { "epoch": 0.03, "grad_norm": 2.2895671109023112, "learning_rate": 9.929078014184398e-06, "loss": 0.6353, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.6777334805932096, "learning_rate": 9.952718676122932e-06, "loss": 0.523, "step": 421 }, { "epoch": 0.03, "grad_norm": 2.054313557069353, "learning_rate": 9.976359338061467e-06, "loss": 0.6265, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.0850554042760896, "learning_rate": 1e-05, "loss": 0.446, "step": 423 }, { "epoch": 0.03, "grad_norm": 2.1875223299760185, "learning_rate": 9.99999986794153e-06, "loss": 0.6062, "step": 424 }, { "epoch": 0.03, "grad_norm": 2.0855143446816133, "learning_rate": 9.999999471766123e-06, "loss": 0.5843, "step": 425 }, { "epoch": 0.03, "grad_norm": 22.755803049184415, "learning_rate": 9.999998811473801e-06, "loss": 0.6147, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.7034978733491404, "learning_rate": 9.999997887064601e-06, "loss": 0.5475, "step": 427 }, { "epoch": 0.03, "grad_norm": 2.2335003786275074, "learning_rate": 9.99999669853857e-06, "loss": 0.5942, "step": 428 }, { "epoch": 0.03, "grad_norm": 2.5050152320176826, "learning_rate": 9.999995245895772e-06, "loss": 0.6755, "step": 429 }, { "epoch": 0.03, "grad_norm": 2.123235795706473, "learning_rate": 9.999993529136281e-06, "loss": 0.686, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.0549905978769132, "learning_rate": 9.999991548260191e-06, "loss": 0.4642, "step": 431 }, { "epoch": 0.03, "grad_norm": 2.028104503190489, "learning_rate": 9.999989303267605e-06, "loss": 0.5842, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.9980137198882097, "learning_rate": 9.999986794158641e-06, "loss": 0.5883, "step": 433 }, { "epoch": 0.03, "grad_norm": 4.95261947084521, "learning_rate": 9.99998402093343e-06, "loss": 0.6478, "step": 434 }, { "epoch": 0.03, "grad_norm": 0.873147957115486, "learning_rate": 9.999980983592125e-06, "loss": 0.4274, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.962941880908112, "learning_rate": 9.99997768213488e-06, "loss": 0.6601, "step": 436 }, { "epoch": 0.03, "grad_norm": 2.117172694912394, "learning_rate": 9.999974116561872e-06, "loss": 0.6759, "step": 437 }, { "epoch": 0.03, "grad_norm": 2.070729751050917, "learning_rate": 9.999970286873288e-06, "loss": 0.6857, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.792328691351642, "learning_rate": 9.999966193069332e-06, "loss": 0.6117, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.6286698831847481, "learning_rate": 9.999961835150221e-06, "loss": 0.5911, "step": 440 }, { "epoch": 0.03, "grad_norm": 2.290532725641558, "learning_rate": 9.999957213116183e-06, "loss": 0.587, "step": 441 }, { "epoch": 0.03, "grad_norm": 2.0712507395701807, "learning_rate": 9.999952326967462e-06, "loss": 0.6172, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.814219437690518, "learning_rate": 9.999947176704316e-06, "loss": 0.6208, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.8663775511642962, "learning_rate": 9.99994176232702e-06, "loss": 0.6249, "step": 444 }, { "epoch": 0.03, "grad_norm": 2.7514297033447748, "learning_rate": 9.999936083835856e-06, "loss": 0.6375, "step": 445 }, { "epoch": 0.03, "grad_norm": 2.146583255742809, "learning_rate": 9.999930141231127e-06, "loss": 0.6106, "step": 446 }, { "epoch": 0.03, "grad_norm": 3.2936140009088253, "learning_rate": 9.999923934513146e-06, "loss": 0.5486, "step": 447 }, { "epoch": 0.03, "grad_norm": 2.3405766697992267, "learning_rate": 9.999917463682241e-06, "loss": 0.6964, "step": 448 }, { "epoch": 0.03, "grad_norm": 2.4663997834665987, "learning_rate": 9.999910728738753e-06, "loss": 0.6502, "step": 449 }, { "epoch": 0.03, "grad_norm": 2.307899006947246, "learning_rate": 9.999903729683038e-06, "loss": 0.6407, "step": 450 }, { "epoch": 0.03, "grad_norm": 2.2261264949341855, "learning_rate": 9.999896466515466e-06, "loss": 0.6041, "step": 451 }, { "epoch": 0.03, "grad_norm": 3.4670024909888713, "learning_rate": 9.999888939236422e-06, "loss": 0.5708, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.9916134940828316, "learning_rate": 9.999881147846301e-06, "loss": 0.6283, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.8559951887730242, "learning_rate": 9.999873092345516e-06, "loss": 0.5826, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.9715093683395095, "learning_rate": 9.999864772734494e-06, "loss": 0.5921, "step": 455 }, { "epoch": 0.03, "grad_norm": 2.5524785687961073, "learning_rate": 9.999856189013671e-06, "loss": 0.6845, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.5153120227856305, "learning_rate": 9.999847341183501e-06, "loss": 0.4623, "step": 457 }, { "epoch": 0.03, "grad_norm": 1.2564559672472113, "learning_rate": 9.999838229244455e-06, "loss": 0.4419, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.8994015342533994, "learning_rate": 9.999828853197013e-06, "loss": 0.6337, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.8336871676104447, "learning_rate": 9.999819213041665e-06, "loss": 0.5976, "step": 460 }, { "epoch": 0.03, "grad_norm": 2.0508705096936644, "learning_rate": 9.999809308778929e-06, "loss": 0.6666, "step": 461 }, { "epoch": 0.03, "grad_norm": 2.332915698788453, "learning_rate": 9.99979914040932e-06, "loss": 0.6594, "step": 462 }, { "epoch": 0.03, "grad_norm": 2.285609998559129, "learning_rate": 9.999788707933382e-06, "loss": 0.6287, "step": 463 }, { "epoch": 0.03, "grad_norm": 2.2941943555694406, "learning_rate": 9.999778011351661e-06, "loss": 0.5948, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.5501557637749934, "learning_rate": 9.999767050664725e-06, "loss": 0.5672, "step": 465 }, { "epoch": 0.03, "grad_norm": 2.568267448780335, "learning_rate": 9.99975582587315e-06, "loss": 0.621, "step": 466 }, { "epoch": 0.03, "grad_norm": 2.272348231857212, "learning_rate": 9.999744336977532e-06, "loss": 0.662, "step": 467 }, { "epoch": 0.03, "grad_norm": 2.1245652183532777, "learning_rate": 9.999732583978476e-06, "loss": 0.6502, "step": 468 }, { "epoch": 0.03, "grad_norm": 2.092213679057817, "learning_rate": 9.999720566876605e-06, "loss": 0.6341, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.8142001875679095, "learning_rate": 9.999708285672551e-06, "loss": 0.511, "step": 470 }, { "epoch": 0.03, "grad_norm": 2.4631822265911083, "learning_rate": 9.999695740366966e-06, "loss": 0.6368, "step": 471 }, { "epoch": 0.03, "grad_norm": 2.2359457577411708, "learning_rate": 9.999682930960508e-06, "loss": 0.6226, "step": 472 }, { "epoch": 0.03, "grad_norm": 2.3042026477374247, "learning_rate": 9.999669857453857e-06, "loss": 0.6059, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.9471741908991034, "learning_rate": 9.999656519847704e-06, "loss": 0.6653, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.9441546705902637, "learning_rate": 9.99964291814275e-06, "loss": 0.7125, "step": 475 }, { "epoch": 0.03, "grad_norm": 2.0954695452559515, "learning_rate": 9.999629052339719e-06, "loss": 0.5726, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.766923570965179, "learning_rate": 9.99961492243934e-06, "loss": 0.5918, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.925135348327929, "learning_rate": 9.99960052844236e-06, "loss": 0.5814, "step": 478 }, { "epoch": 0.03, "grad_norm": 2.0965818632458926, "learning_rate": 9.999585870349537e-06, "loss": 0.6658, "step": 479 }, { "epoch": 0.03, "grad_norm": 2.196105671896359, "learning_rate": 9.999570948161649e-06, "loss": 0.6154, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.913309831435858, "learning_rate": 9.999555761879482e-06, "loss": 0.5941, "step": 481 }, { "epoch": 0.03, "grad_norm": 2.460971651752271, "learning_rate": 9.99954031150384e-06, "loss": 0.6147, "step": 482 }, { "epoch": 0.03, "grad_norm": 3.0391409946655625, "learning_rate": 9.999524597035535e-06, "loss": 0.6049, "step": 483 }, { "epoch": 0.03, "grad_norm": 2.331122372519916, "learning_rate": 9.999508618475403e-06, "loss": 0.6802, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.8306106286035506, "learning_rate": 9.999492375824285e-06, "loss": 0.5982, "step": 485 }, { "epoch": 0.03, "grad_norm": 2.3546967095804905, "learning_rate": 9.999475869083037e-06, "loss": 0.6507, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.9401689794455603, "learning_rate": 9.999459098252535e-06, "loss": 0.6034, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.8680707621496733, "learning_rate": 9.999442063333663e-06, "loss": 0.6466, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.6710636156417706, "learning_rate": 9.999424764327319e-06, "loss": 0.5802, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.962656405335296, "learning_rate": 9.999407201234422e-06, "loss": 0.576, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.8608754414026427, "learning_rate": 9.999389374055892e-06, "loss": 0.6263, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.7038428042080167, "learning_rate": 9.999371282792678e-06, "loss": 0.6258, "step": 492 }, { "epoch": 0.03, "grad_norm": 2.7299346634445216, "learning_rate": 9.999352927445732e-06, "loss": 0.6221, "step": 493 }, { "epoch": 0.04, "grad_norm": 2.2005390078664777, "learning_rate": 9.999334308016024e-06, "loss": 0.638, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.6855551445316017, "learning_rate": 9.999315424504536e-06, "loss": 0.555, "step": 495 }, { "epoch": 0.04, "grad_norm": 2.3654195102561513, "learning_rate": 9.99929627691227e-06, "loss": 0.6586, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.889557322858207, "learning_rate": 9.999276865240234e-06, "loss": 0.6243, "step": 497 }, { "epoch": 0.04, "grad_norm": 2.039578181457567, "learning_rate": 9.999257189489454e-06, "loss": 0.6757, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.6921587885375207, "learning_rate": 9.99923724966097e-06, "loss": 0.6146, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.9713363629517324, "learning_rate": 9.999217045755833e-06, "loss": 0.6533, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.920753634184261, "learning_rate": 9.999196577775114e-06, "loss": 0.5849, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.7173101022104877, "learning_rate": 9.999175845719891e-06, "loss": 0.6427, "step": 502 }, { "epoch": 0.04, "grad_norm": 0.9243342876661618, "learning_rate": 9.999154849591261e-06, "loss": 0.4675, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.8912624197014798, "learning_rate": 9.999133589390332e-06, "loss": 0.6546, "step": 504 }, { "epoch": 0.04, "grad_norm": 2.267540234103927, "learning_rate": 9.999112065118228e-06, "loss": 0.6377, "step": 505 }, { "epoch": 0.04, "grad_norm": 2.290877658942183, "learning_rate": 9.999090276776084e-06, "loss": 0.5846, "step": 506 }, { "epoch": 0.04, "grad_norm": 2.09920988300228, "learning_rate": 9.999068224365053e-06, "loss": 0.5603, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.7944665830226367, "learning_rate": 9.9990459078863e-06, "loss": 0.5484, "step": 508 }, { "epoch": 0.04, "grad_norm": 3.2535082187929927, "learning_rate": 9.999023327341002e-06, "loss": 0.6804, "step": 509 }, { "epoch": 0.04, "grad_norm": 1.6155536720601074, "learning_rate": 9.999000482730353e-06, "loss": 0.5381, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.8248015303487206, "learning_rate": 9.998977374055561e-06, "loss": 0.5809, "step": 511 }, { "epoch": 0.04, "grad_norm": 1.9393530333731168, "learning_rate": 9.998954001317844e-06, "loss": 0.5644, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.8207521752287739, "learning_rate": 9.998930364518437e-06, "loss": 0.6344, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.7068423797397476, "learning_rate": 9.998906463658591e-06, "loss": 0.6049, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.714576663821938, "learning_rate": 9.998882298739567e-06, "loss": 0.6531, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.5934447260068765, "learning_rate": 9.99885786976264e-06, "loss": 0.4833, "step": 516 }, { "epoch": 0.04, "grad_norm": 2.303727161297358, "learning_rate": 9.998833176729103e-06, "loss": 0.6388, "step": 517 }, { "epoch": 0.04, "grad_norm": 0.9904779351141787, "learning_rate": 9.99880821964026e-06, "loss": 0.4476, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.7985322725256871, "learning_rate": 9.998782998497428e-06, "loss": 0.6203, "step": 519 }, { "epoch": 0.04, "grad_norm": 2.4369476050354653, "learning_rate": 9.99875751330194e-06, "loss": 0.6091, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.9025855553674964, "learning_rate": 9.998731764055141e-06, "loss": 0.6083, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.9441539227336635, "learning_rate": 9.998705750758391e-06, "loss": 0.6258, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.834135829686564, "learning_rate": 9.99867947341307e-06, "loss": 0.5987, "step": 523 }, { "epoch": 0.04, "grad_norm": 2.16897703565224, "learning_rate": 9.998652932020555e-06, "loss": 0.5999, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.8138945986792385, "learning_rate": 9.99862612658226e-06, "loss": 0.609, "step": 525 }, { "epoch": 0.04, "grad_norm": 1.877092314177485, "learning_rate": 9.998599057099592e-06, "loss": 0.5896, "step": 526 }, { "epoch": 0.04, "grad_norm": 0.9385450241747622, "learning_rate": 9.998571723573987e-06, "loss": 0.4504, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.8552989982645332, "learning_rate": 9.998544126006884e-06, "loss": 0.6041, "step": 528 }, { "epoch": 0.04, "grad_norm": 2.4542337740888542, "learning_rate": 9.998516264399742e-06, "loss": 0.6519, "step": 529 }, { "epoch": 0.04, "grad_norm": 1.8807341500407655, "learning_rate": 9.998488138754036e-06, "loss": 0.5871, "step": 530 }, { "epoch": 0.04, "grad_norm": 2.33476139679285, "learning_rate": 9.998459749071248e-06, "loss": 0.6807, "step": 531 }, { "epoch": 0.04, "grad_norm": 0.9618780216788828, "learning_rate": 9.998431095352878e-06, "loss": 0.4318, "step": 532 }, { "epoch": 0.04, "grad_norm": 1.8645312740961977, "learning_rate": 9.998402177600443e-06, "loss": 0.6163, "step": 533 }, { "epoch": 0.04, "grad_norm": 1.9635932627143786, "learning_rate": 9.998372995815466e-06, "loss": 0.6528, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.7344585206969054, "learning_rate": 9.998343549999492e-06, "loss": 0.5932, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.9915528616791083, "learning_rate": 9.998313840154075e-06, "loss": 0.6351, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.9454575437608645, "learning_rate": 9.998283866280784e-06, "loss": 0.6073, "step": 537 }, { "epoch": 0.04, "grad_norm": 2.6847741458596865, "learning_rate": 9.998253628381202e-06, "loss": 0.598, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.908037612932845, "learning_rate": 9.998223126456928e-06, "loss": 0.6649, "step": 539 }, { "epoch": 0.04, "grad_norm": 2.5781733858935634, "learning_rate": 9.99819236050957e-06, "loss": 0.6133, "step": 540 }, { "epoch": 0.04, "grad_norm": 2.9789407731346227, "learning_rate": 9.998161330540759e-06, "loss": 0.5987, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.6956323618645095, "learning_rate": 9.998130036552127e-06, "loss": 0.5806, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.746859060529278, "learning_rate": 9.998098478545332e-06, "loss": 0.5729, "step": 543 }, { "epoch": 0.04, "grad_norm": 2.0316384293136003, "learning_rate": 9.99806665652204e-06, "loss": 0.6008, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.6889384247265664, "learning_rate": 9.99803457048393e-06, "loss": 0.5743, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.1922584396776097, "learning_rate": 9.9980022204327e-06, "loss": 0.4776, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.9205258478117864, "learning_rate": 9.997969606370057e-06, "loss": 0.6789, "step": 547 }, { "epoch": 0.04, "grad_norm": 2.074542947396767, "learning_rate": 9.997936728297722e-06, "loss": 0.6179, "step": 548 }, { "epoch": 0.04, "grad_norm": 2.1640790632383022, "learning_rate": 9.997903586217435e-06, "loss": 0.6156, "step": 549 }, { "epoch": 0.04, "grad_norm": 2.7814762695734596, "learning_rate": 9.997870180130946e-06, "loss": 0.5686, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.9071650776770406, "learning_rate": 9.997836510040018e-06, "loss": 0.636, "step": 551 }, { "epoch": 0.04, "grad_norm": 2.1029054969458296, "learning_rate": 9.997802575946432e-06, "loss": 0.6782, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.894825801614073, "learning_rate": 9.997768377851977e-06, "loss": 0.6462, "step": 553 }, { "epoch": 0.04, "grad_norm": 2.2889123593330307, "learning_rate": 9.997733915758462e-06, "loss": 0.6173, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.9254057223291228, "learning_rate": 9.997699189667707e-06, "loss": 0.6613, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.8511083113169775, "learning_rate": 9.997664199581548e-06, "loss": 0.6074, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.7482663993777725, "learning_rate": 9.997628945501829e-06, "loss": 0.5637, "step": 557 }, { "epoch": 0.04, "grad_norm": 2.010341424619799, "learning_rate": 9.997593427430416e-06, "loss": 0.5821, "step": 558 }, { "epoch": 0.04, "grad_norm": 2.1018379578742588, "learning_rate": 9.997557645369185e-06, "loss": 0.5893, "step": 559 }, { "epoch": 0.04, "grad_norm": 2.1395267964991374, "learning_rate": 9.997521599320023e-06, "loss": 0.6137, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.984533208557336, "learning_rate": 9.997485289284838e-06, "loss": 0.5361, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.7359252405780785, "learning_rate": 9.997448715265546e-06, "loss": 0.6127, "step": 562 }, { "epoch": 0.04, "grad_norm": 2.03761059267238, "learning_rate": 9.997411877264079e-06, "loss": 0.6547, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.8115353981017317, "learning_rate": 9.997374775282383e-06, "loss": 0.5728, "step": 564 }, { "epoch": 0.04, "grad_norm": 2.085702852631562, "learning_rate": 9.997337409322418e-06, "loss": 0.5901, "step": 565 }, { "epoch": 0.04, "grad_norm": 3.27602039888933, "learning_rate": 9.997299779386157e-06, "loss": 0.5453, "step": 566 }, { "epoch": 0.04, "grad_norm": 2.3524361430090353, "learning_rate": 9.99726188547559e-06, "loss": 0.5785, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.8299057271813801, "learning_rate": 9.997223727592717e-06, "loss": 0.5621, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.8186636486616559, "learning_rate": 9.997185305739551e-06, "loss": 0.5667, "step": 569 }, { "epoch": 0.04, "grad_norm": 2.04478643481762, "learning_rate": 9.997146619918127e-06, "loss": 0.5872, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.9500864604801027, "learning_rate": 9.997107670130486e-06, "loss": 0.5196, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.9522090784217032, "learning_rate": 9.997068456378684e-06, "loss": 0.6241, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.7383190161017843, "learning_rate": 9.997028978664796e-06, "loss": 0.6261, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.9759852476669626, "learning_rate": 9.996989236990904e-06, "loss": 0.5949, "step": 574 }, { "epoch": 0.04, "grad_norm": 2.0283355607637072, "learning_rate": 9.996949231359108e-06, "loss": 0.6765, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.7967996396341426, "learning_rate": 9.996908961771521e-06, "loss": 0.5594, "step": 576 }, { "epoch": 0.04, "grad_norm": 2.0374957318020206, "learning_rate": 9.996868428230271e-06, "loss": 0.5022, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.718033533035505, "learning_rate": 9.9968276307375e-06, "loss": 0.5735, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.8645114793545352, "learning_rate": 9.99678656929536e-06, "loss": 0.5832, "step": 579 }, { "epoch": 0.04, "grad_norm": 2.234258206261488, "learning_rate": 9.996745243906026e-06, "loss": 0.6641, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.7111107571978625, "learning_rate": 9.996703654571673e-06, "loss": 0.5942, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.8906290161834944, "learning_rate": 9.996661801294503e-06, "loss": 0.597, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.0415238412573753, "learning_rate": 9.996619684076724e-06, "loss": 0.4621, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.6829251385574737, "learning_rate": 9.996577302920567e-06, "loss": 0.5932, "step": 584 }, { "epoch": 0.04, "grad_norm": 2.247617574473002, "learning_rate": 9.996534657828263e-06, "loss": 0.6249, "step": 585 }, { "epoch": 0.04, "grad_norm": 2.153577055386234, "learning_rate": 9.996491748802067e-06, "loss": 0.6987, "step": 586 }, { "epoch": 0.04, "grad_norm": 0.9914816671035794, "learning_rate": 9.996448575844247e-06, "loss": 0.4795, "step": 587 }, { "epoch": 0.04, "grad_norm": 1.8840199759775895, "learning_rate": 9.996405138957083e-06, "loss": 0.6045, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.8634910862938125, "learning_rate": 9.996361438142872e-06, "loss": 0.5621, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.8643085486343365, "learning_rate": 9.996317473403917e-06, "loss": 0.6012, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.8303324155612233, "learning_rate": 9.996273244742544e-06, "loss": 0.6613, "step": 591 }, { "epoch": 0.04, "grad_norm": 2.1295288089674824, "learning_rate": 9.996228752161087e-06, "loss": 0.6774, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.1452371408227051, "learning_rate": 9.996183995661901e-06, "loss": 0.4749, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.6636188169689397, "learning_rate": 9.996138975247345e-06, "loss": 0.5534, "step": 594 }, { "epoch": 0.04, "grad_norm": 2.0729021118309223, "learning_rate": 9.9960936909198e-06, "loss": 0.6076, "step": 595 }, { "epoch": 0.04, "grad_norm": 2.0614673781884516, "learning_rate": 9.996048142681657e-06, "loss": 0.5615, "step": 596 }, { "epoch": 0.04, "grad_norm": 2.0029918819076733, "learning_rate": 9.99600233053532e-06, "loss": 0.6462, "step": 597 }, { "epoch": 0.04, "grad_norm": 2.609659529329357, "learning_rate": 9.995956254483214e-06, "loss": 0.6315, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.9087203761711684, "learning_rate": 9.995909914527768e-06, "loss": 0.644, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.8829500333621283, "learning_rate": 9.995863310671432e-06, "loss": 0.6312, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.893944875278428, "learning_rate": 9.99581644291667e-06, "loss": 0.5784, "step": 601 }, { "epoch": 0.04, "grad_norm": 2.9048323204193474, "learning_rate": 9.995769311265953e-06, "loss": 0.652, "step": 602 }, { "epoch": 0.04, "grad_norm": 2.2606312619527498, "learning_rate": 9.995721915721774e-06, "loss": 0.6448, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.9306316318555141, "learning_rate": 9.995674256286636e-06, "loss": 0.6581, "step": 604 }, { "epoch": 0.04, "grad_norm": 2.3962532730879595, "learning_rate": 9.995626332963053e-06, "loss": 0.6084, "step": 605 }, { "epoch": 0.04, "grad_norm": 2.2562368539082964, "learning_rate": 9.995578145753563e-06, "loss": 0.5854, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.9210581967569014, "learning_rate": 9.995529694660707e-06, "loss": 0.6846, "step": 607 }, { "epoch": 0.04, "grad_norm": 1.647766560806375, "learning_rate": 9.995480979687046e-06, "loss": 0.6169, "step": 608 }, { "epoch": 0.04, "grad_norm": 2.13837041386148, "learning_rate": 9.995432000835153e-06, "loss": 0.5085, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.9906969154995522, "learning_rate": 9.995382758107612e-06, "loss": 0.6579, "step": 610 }, { "epoch": 0.04, "grad_norm": 2.0689845979235857, "learning_rate": 9.995333251507029e-06, "loss": 0.6563, "step": 611 }, { "epoch": 0.04, "grad_norm": 2.8069274650962255, "learning_rate": 9.995283481036017e-06, "loss": 0.6015, "step": 612 }, { "epoch": 0.04, "grad_norm": 2.4476288037748337, "learning_rate": 9.995233446697206e-06, "loss": 0.6352, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.8319136511654073, "learning_rate": 9.995183148493236e-06, "loss": 0.5968, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.8200796281200393, "learning_rate": 9.99513258642677e-06, "loss": 0.6012, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.882547938777712, "learning_rate": 9.995081760500471e-06, "loss": 0.6052, "step": 616 }, { "epoch": 0.04, "grad_norm": 2.1057028045970614, "learning_rate": 9.99503067071703e-06, "loss": 0.6635, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.9073253252672766, "learning_rate": 9.994979317079143e-06, "loss": 0.6267, "step": 618 }, { "epoch": 0.04, "grad_norm": 0.934776660873475, "learning_rate": 9.994927699589523e-06, "loss": 0.4859, "step": 619 }, { "epoch": 0.04, "grad_norm": 2.166466625934562, "learning_rate": 9.994875818250898e-06, "loss": 0.4815, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.6409978776045064, "learning_rate": 9.994823673066006e-06, "loss": 0.6166, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.8921408716100994, "learning_rate": 9.994771264037602e-06, "loss": 0.6034, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.839631362481778, "learning_rate": 9.994718591168457e-06, "loss": 0.5893, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.58648766790884, "learning_rate": 9.99466565446135e-06, "loss": 0.5901, "step": 624 }, { "epoch": 0.04, "grad_norm": 2.016676054035594, "learning_rate": 9.99461245391908e-06, "loss": 0.5767, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.8959820514271726, "learning_rate": 9.994558989544456e-06, "loss": 0.5902, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.6985999292016627, "learning_rate": 9.9945052613403e-06, "loss": 0.5715, "step": 627 }, { "epoch": 0.04, "grad_norm": 2.58791575547424, "learning_rate": 9.994451269309457e-06, "loss": 0.5815, "step": 628 }, { "epoch": 0.04, "grad_norm": 2.3491442763324626, "learning_rate": 9.99439701345477e-06, "loss": 0.5783, "step": 629 }, { "epoch": 0.04, "grad_norm": 0.9750236674257756, "learning_rate": 9.994342493779112e-06, "loss": 0.4597, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.9419992209003525, "learning_rate": 9.99428771028536e-06, "loss": 0.6231, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.8150080040311811, "learning_rate": 9.994232662976409e-06, "loss": 0.5802, "step": 632 }, { "epoch": 0.04, "grad_norm": 2.201781156406854, "learning_rate": 9.994177351855162e-06, "loss": 0.676, "step": 633 }, { "epoch": 0.04, "grad_norm": 2.0996473010097976, "learning_rate": 9.99412177692455e-06, "loss": 0.548, "step": 634 }, { "epoch": 0.05, "grad_norm": 2.025498163061105, "learning_rate": 9.9940659381875e-06, "loss": 0.5751, "step": 635 }, { "epoch": 0.05, "grad_norm": 10.289524425441089, "learning_rate": 9.994009835646967e-06, "loss": 0.6241, "step": 636 }, { "epoch": 0.05, "grad_norm": 2.1201402461343104, "learning_rate": 9.99395346930591e-06, "loss": 0.5199, "step": 637 }, { "epoch": 0.05, "grad_norm": 2.0171378139086538, "learning_rate": 9.993896839167311e-06, "loss": 0.6237, "step": 638 }, { "epoch": 0.05, "grad_norm": 2.2402563523612335, "learning_rate": 9.993839945234158e-06, "loss": 0.6086, "step": 639 }, { "epoch": 0.05, "grad_norm": 3.5640244424367955, "learning_rate": 9.993782787509458e-06, "loss": 0.5998, "step": 640 }, { "epoch": 0.05, "grad_norm": 3.2672426170269406, "learning_rate": 9.99372536599623e-06, "loss": 0.5954, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.8691792865076111, "learning_rate": 9.993667680697508e-06, "loss": 0.6211, "step": 642 }, { "epoch": 0.05, "grad_norm": 2.4739355123404234, "learning_rate": 9.993609731616338e-06, "loss": 0.6191, "step": 643 }, { "epoch": 0.05, "grad_norm": 3.3412737850790286, "learning_rate": 9.99355151875578e-06, "loss": 0.6296, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.0979368691498586, "learning_rate": 9.99349304211891e-06, "loss": 0.4555, "step": 645 }, { "epoch": 0.05, "grad_norm": 1.889663948757811, "learning_rate": 9.99343430170882e-06, "loss": 0.5793, "step": 646 }, { "epoch": 0.05, "grad_norm": 2.1341357915242445, "learning_rate": 9.99337529752861e-06, "loss": 0.6119, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.7857264782500004, "learning_rate": 9.993316029581394e-06, "loss": 0.6522, "step": 648 }, { "epoch": 0.05, "grad_norm": 1.7820128825840982, "learning_rate": 9.993256497870307e-06, "loss": 0.5664, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.8579943705886592, "learning_rate": 9.993196702398494e-06, "loss": 0.5894, "step": 650 }, { "epoch": 0.05, "grad_norm": 2.096513359023596, "learning_rate": 9.99313664316911e-06, "loss": 0.5434, "step": 651 }, { "epoch": 0.05, "grad_norm": 2.036287397415716, "learning_rate": 9.993076320185329e-06, "loss": 0.6504, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.7364652479511444, "learning_rate": 9.993015733450337e-06, "loss": 0.5249, "step": 653 }, { "epoch": 0.05, "grad_norm": 1.981323459670183, "learning_rate": 9.992954882967339e-06, "loss": 0.6371, "step": 654 }, { "epoch": 0.05, "grad_norm": 3.9594070547795286, "learning_rate": 9.992893768739542e-06, "loss": 0.5844, "step": 655 }, { "epoch": 0.05, "grad_norm": 1.9732400271073796, "learning_rate": 9.99283239077018e-06, "loss": 0.6039, "step": 656 }, { "epoch": 0.05, "grad_norm": 2.054525898926972, "learning_rate": 9.992770749062492e-06, "loss": 0.5595, "step": 657 }, { "epoch": 0.05, "grad_norm": 1.980638941845807, "learning_rate": 9.992708843619737e-06, "loss": 0.6492, "step": 658 }, { "epoch": 0.05, "grad_norm": 2.0620395684410373, "learning_rate": 9.992646674445182e-06, "loss": 0.5939, "step": 659 }, { "epoch": 0.05, "grad_norm": 2.867374980847205, "learning_rate": 9.992584241542112e-06, "loss": 0.6485, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.9988354811696056, "learning_rate": 9.992521544913827e-06, "loss": 0.5973, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.658786690420108, "learning_rate": 9.992458584563635e-06, "loss": 0.5714, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.6651325182711714, "learning_rate": 9.992395360494865e-06, "loss": 0.5298, "step": 663 }, { "epoch": 0.05, "grad_norm": 1.4979047030329098, "learning_rate": 9.992331872710855e-06, "loss": 0.5016, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.96395762051117, "learning_rate": 9.992268121214958e-06, "loss": 0.5802, "step": 665 }, { "epoch": 0.05, "grad_norm": 2.535952452393718, "learning_rate": 9.992204106010544e-06, "loss": 0.6566, "step": 666 }, { "epoch": 0.05, "grad_norm": 1.9773069147119862, "learning_rate": 9.992139827100994e-06, "loss": 0.6176, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.607972912448335, "learning_rate": 9.9920752844897e-06, "loss": 0.5853, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.0613846436850072, "learning_rate": 9.992010478180076e-06, "loss": 0.4776, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.8816910942919807, "learning_rate": 9.991945408175543e-06, "loss": 0.6354, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.6002999125250774, "learning_rate": 9.991880074479538e-06, "loss": 0.4938, "step": 671 }, { "epoch": 0.05, "grad_norm": 1.7764153422047582, "learning_rate": 9.991814477095514e-06, "loss": 0.6328, "step": 672 }, { "epoch": 0.05, "grad_norm": 1.8279768864771238, "learning_rate": 9.991748616026935e-06, "loss": 0.5961, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.1152115665653053, "learning_rate": 9.991682491277277e-06, "loss": 0.4844, "step": 674 }, { "epoch": 0.05, "grad_norm": 2.3295346142732876, "learning_rate": 9.991616102850039e-06, "loss": 0.5781, "step": 675 }, { "epoch": 0.05, "grad_norm": 1.7780347625214579, "learning_rate": 9.991549450748722e-06, "loss": 0.5797, "step": 676 }, { "epoch": 0.05, "grad_norm": 1.68734366488098, "learning_rate": 9.991482534976851e-06, "loss": 0.5634, "step": 677 }, { "epoch": 0.05, "grad_norm": 1.690177466130356, "learning_rate": 9.991415355537956e-06, "loss": 0.5521, "step": 678 }, { "epoch": 0.05, "grad_norm": 1.7402835701927133, "learning_rate": 9.991347912435592e-06, "loss": 0.6335, "step": 679 }, { "epoch": 0.05, "grad_norm": 2.293034203874179, "learning_rate": 9.991280205673317e-06, "loss": 0.6081, "step": 680 }, { "epoch": 0.05, "grad_norm": 2.211166476269173, "learning_rate": 9.991212235254708e-06, "loss": 0.5786, "step": 681 }, { "epoch": 0.05, "grad_norm": 1.7961350064102972, "learning_rate": 9.99114400118336e-06, "loss": 0.6046, "step": 682 }, { "epoch": 0.05, "grad_norm": 2.021768418850511, "learning_rate": 9.991075503462868e-06, "loss": 0.6187, "step": 683 }, { "epoch": 0.05, "grad_norm": 2.136481016666777, "learning_rate": 9.99100674209686e-06, "loss": 0.6157, "step": 684 }, { "epoch": 0.05, "grad_norm": 2.2774946357777157, "learning_rate": 9.990937717088963e-06, "loss": 0.649, "step": 685 }, { "epoch": 0.05, "grad_norm": 1.846335231775904, "learning_rate": 9.990868428442824e-06, "loss": 0.5918, "step": 686 }, { "epoch": 0.05, "grad_norm": 2.0884213031246848, "learning_rate": 9.990798876162102e-06, "loss": 0.5976, "step": 687 }, { "epoch": 0.05, "grad_norm": 1.0405607952938931, "learning_rate": 9.990729060250473e-06, "loss": 0.4713, "step": 688 }, { "epoch": 0.05, "grad_norm": 0.9680776501509842, "learning_rate": 9.990658980711625e-06, "loss": 0.4875, "step": 689 }, { "epoch": 0.05, "grad_norm": 1.721400562805146, "learning_rate": 9.99058863754926e-06, "loss": 0.5808, "step": 690 }, { "epoch": 0.05, "grad_norm": 1.862140322640572, "learning_rate": 9.990518030767089e-06, "loss": 0.6422, "step": 691 }, { "epoch": 0.05, "grad_norm": 1.7741570272621363, "learning_rate": 9.990447160368848e-06, "loss": 0.6235, "step": 692 }, { "epoch": 0.05, "grad_norm": 1.8559871042191343, "learning_rate": 9.990376026358277e-06, "loss": 0.6259, "step": 693 }, { "epoch": 0.05, "grad_norm": 1.0405620118675403, "learning_rate": 9.990304628739135e-06, "loss": 0.4507, "step": 694 }, { "epoch": 0.05, "grad_norm": 1.9698840253662522, "learning_rate": 9.990232967515194e-06, "loss": 0.5742, "step": 695 }, { "epoch": 0.05, "grad_norm": 2.2168067833320473, "learning_rate": 9.990161042690238e-06, "loss": 0.5785, "step": 696 }, { "epoch": 0.05, "grad_norm": 1.9608609007903917, "learning_rate": 9.990088854268067e-06, "loss": 0.5851, "step": 697 }, { "epoch": 0.05, "grad_norm": 1.8542317176827354, "learning_rate": 9.990016402252494e-06, "loss": 0.5832, "step": 698 }, { "epoch": 0.05, "grad_norm": 1.8118017063530996, "learning_rate": 9.989943686647345e-06, "loss": 0.5826, "step": 699 }, { "epoch": 0.05, "grad_norm": 1.8955544786274217, "learning_rate": 9.989870707456463e-06, "loss": 0.6663, "step": 700 }, { "epoch": 0.05, "grad_norm": 1.9250891838900077, "learning_rate": 9.989797464683702e-06, "loss": 0.5863, "step": 701 }, { "epoch": 0.05, "grad_norm": 1.8581517726714682, "learning_rate": 9.98972395833293e-06, "loss": 0.5978, "step": 702 }, { "epoch": 0.05, "grad_norm": 1.9725233008991974, "learning_rate": 9.989650188408034e-06, "loss": 0.6514, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.8821647649277196, "learning_rate": 9.989576154912905e-06, "loss": 0.6198, "step": 704 }, { "epoch": 0.05, "grad_norm": 2.0526053250844285, "learning_rate": 9.989501857851458e-06, "loss": 0.5926, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.7825672812827749, "learning_rate": 9.989427297227615e-06, "loss": 0.6593, "step": 706 }, { "epoch": 0.05, "grad_norm": 2.021422961184575, "learning_rate": 9.989352473045317e-06, "loss": 0.5922, "step": 707 }, { "epoch": 0.05, "grad_norm": 2.620450632686968, "learning_rate": 9.989277385308515e-06, "loss": 0.6479, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.7199596143725084, "learning_rate": 9.989202034021175e-06, "loss": 0.5499, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.8673878608490122, "learning_rate": 9.989126419187276e-06, "loss": 0.5511, "step": 710 }, { "epoch": 0.05, "grad_norm": 2.3538825430962333, "learning_rate": 9.989050540810817e-06, "loss": 0.6498, "step": 711 }, { "epoch": 0.05, "grad_norm": 1.9771713273117246, "learning_rate": 9.988974398895802e-06, "loss": 0.6307, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.8256973729041028, "learning_rate": 9.988897993446255e-06, "loss": 0.5723, "step": 713 }, { "epoch": 0.05, "grad_norm": 2.2457931487501672, "learning_rate": 9.988821324466212e-06, "loss": 0.5963, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.5014812571268752, "learning_rate": 9.98874439195972e-06, "loss": 0.5244, "step": 715 }, { "epoch": 0.05, "grad_norm": 2.0090665835666646, "learning_rate": 9.988667195930846e-06, "loss": 0.5999, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.7499056050524848, "learning_rate": 9.988589736383667e-06, "loss": 0.6345, "step": 717 }, { "epoch": 0.05, "grad_norm": 2.6706843120371166, "learning_rate": 9.988512013322274e-06, "loss": 0.6352, "step": 718 }, { "epoch": 0.05, "grad_norm": 2.0227458133158205, "learning_rate": 9.988434026750773e-06, "loss": 0.5145, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.109659419861857, "learning_rate": 9.988355776673284e-06, "loss": 0.4716, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.9570468692611225, "learning_rate": 9.98827726309394e-06, "loss": 0.5445, "step": 721 }, { "epoch": 0.05, "grad_norm": 2.3009962967242723, "learning_rate": 9.988198486016888e-06, "loss": 0.5733, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.8230708768771242, "learning_rate": 9.988119445446291e-06, "loss": 0.5244, "step": 723 }, { "epoch": 0.05, "grad_norm": 4.791741033426039, "learning_rate": 9.988040141386322e-06, "loss": 0.6092, "step": 724 }, { "epoch": 0.05, "grad_norm": 2.545432878025286, "learning_rate": 9.98796057384117e-06, "loss": 0.6339, "step": 725 }, { "epoch": 0.05, "grad_norm": 2.5015504078125064, "learning_rate": 9.98788074281504e-06, "loss": 0.575, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.8551388928328785, "learning_rate": 9.987800648312146e-06, "loss": 0.6201, "step": 727 }, { "epoch": 0.05, "grad_norm": 2.486619442839847, "learning_rate": 9.987720290336725e-06, "loss": 0.5868, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.9814906054947357, "learning_rate": 9.987639668893015e-06, "loss": 0.6283, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.6182202843582136, "learning_rate": 9.987558783985275e-06, "loss": 0.5799, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.6898071419434377, "learning_rate": 9.987477635617783e-06, "loss": 0.5757, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.8218352562318085, "learning_rate": 9.987396223794822e-06, "loss": 0.6575, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.6672154323012864, "learning_rate": 9.987314548520693e-06, "loss": 0.5817, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.836994107573075, "learning_rate": 9.987232609799709e-06, "loss": 0.5299, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.6276611932796248, "learning_rate": 9.9871504076362e-06, "loss": 0.5623, "step": 735 }, { "epoch": 0.05, "grad_norm": 2.219553753507082, "learning_rate": 9.987067942034507e-06, "loss": 0.5874, "step": 736 }, { "epoch": 0.05, "grad_norm": 2.069500168938439, "learning_rate": 9.98698521299899e-06, "loss": 0.6126, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.286709045343945, "learning_rate": 9.986902220534011e-06, "loss": 0.4744, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.7888718474800211, "learning_rate": 9.986818964643963e-06, "loss": 0.5514, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.8180110567142151, "learning_rate": 9.986735445333238e-06, "loss": 0.6424, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.7807572474024047, "learning_rate": 9.986651662606248e-06, "loss": 0.6102, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.8021843905368713, "learning_rate": 9.986567616467421e-06, "loss": 0.5764, "step": 742 }, { "epoch": 0.05, "grad_norm": 2.533005401427882, "learning_rate": 9.986483306921196e-06, "loss": 0.5939, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.7168049776532563, "learning_rate": 9.986398733972027e-06, "loss": 0.5594, "step": 744 }, { "epoch": 0.05, "grad_norm": 10.65973041477218, "learning_rate": 9.98631389762438e-06, "loss": 0.5972, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.7352238947190146, "learning_rate": 9.986228797882737e-06, "loss": 0.6136, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.942070525514916, "learning_rate": 9.986143434751593e-06, "loss": 0.5834, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.8640144232278262, "learning_rate": 9.986057808235457e-06, "loss": 0.5679, "step": 748 }, { "epoch": 0.05, "grad_norm": 0.9611081640237724, "learning_rate": 9.985971918338854e-06, "loss": 0.472, "step": 749 }, { "epoch": 0.05, "grad_norm": 2.560660193625534, "learning_rate": 9.985885765066319e-06, "loss": 0.5696, "step": 750 }, { "epoch": 0.05, "grad_norm": 2.4607444406644423, "learning_rate": 9.985799348422403e-06, "loss": 0.5839, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.9314279980753932, "learning_rate": 9.98571266841167e-06, "loss": 0.624, "step": 752 }, { "epoch": 0.05, "grad_norm": 2.5415247716604976, "learning_rate": 9.985625725038702e-06, "loss": 0.5845, "step": 753 }, { "epoch": 0.05, "grad_norm": 3.7644850042550524, "learning_rate": 9.985538518308088e-06, "loss": 0.6256, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.6918918476164666, "learning_rate": 9.985451048224437e-06, "loss": 0.5622, "step": 755 }, { "epoch": 0.05, "grad_norm": 2.1889029756086686, "learning_rate": 9.98536331479237e-06, "loss": 0.6217, "step": 756 }, { "epoch": 0.05, "grad_norm": 3.040684723508936, "learning_rate": 9.985275318016517e-06, "loss": 0.6269, "step": 757 }, { "epoch": 0.05, "grad_norm": 1.891838483347082, "learning_rate": 9.98518705790153e-06, "loss": 0.6704, "step": 758 }, { "epoch": 0.05, "grad_norm": 2.523225671008834, "learning_rate": 9.985098534452074e-06, "loss": 0.6696, "step": 759 }, { "epoch": 0.05, "grad_norm": 2.144537749471743, "learning_rate": 9.985009747672818e-06, "loss": 0.5719, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.8199455724926854, "learning_rate": 9.984920697568457e-06, "loss": 0.5347, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.717797303334601, "learning_rate": 9.984831384143692e-06, "loss": 0.5751, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.924909989349552, "learning_rate": 9.984741807403244e-06, "loss": 0.5579, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.8416242690569073, "learning_rate": 9.984651967351842e-06, "loss": 0.592, "step": 764 }, { "epoch": 0.05, "grad_norm": 1.8378304445124731, "learning_rate": 9.984561863994234e-06, "loss": 0.6012, "step": 765 }, { "epoch": 0.05, "grad_norm": 2.146671289848264, "learning_rate": 9.984471497335178e-06, "loss": 0.6279, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.2249572230606032, "learning_rate": 9.984380867379447e-06, "loss": 0.4866, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.8470411036855674, "learning_rate": 9.984289974131829e-06, "loss": 0.5712, "step": 768 }, { "epoch": 0.05, "grad_norm": 0.9512314754598917, "learning_rate": 9.984198817597126e-06, "loss": 0.464, "step": 769 }, { "epoch": 0.05, "grad_norm": 6.29376042153947, "learning_rate": 9.984107397780154e-06, "loss": 0.6041, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.9560030189945388, "learning_rate": 9.984015714685739e-06, "loss": 0.5777, "step": 771 }, { "epoch": 0.05, "grad_norm": 0.9920985477251776, "learning_rate": 9.983923768318727e-06, "loss": 0.4802, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.9873330441301158, "learning_rate": 9.983831558683971e-06, "loss": 0.647, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.7994735729841338, "learning_rate": 9.983739085786347e-06, "loss": 0.5465, "step": 774 }, { "epoch": 0.05, "grad_norm": 2.1591862062181253, "learning_rate": 9.983646349630738e-06, "loss": 0.6523, "step": 775 }, { "epoch": 0.06, "grad_norm": 0.842681928469058, "learning_rate": 9.983553350222042e-06, "loss": 0.4655, "step": 776 }, { "epoch": 0.06, "grad_norm": 0.93618269982594, "learning_rate": 9.983460087565169e-06, "loss": 0.4398, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.732337076785266, "learning_rate": 9.983366561665049e-06, "loss": 0.5877, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.9580441152640422, "learning_rate": 9.983272772526621e-06, "loss": 0.5928, "step": 779 }, { "epoch": 0.06, "grad_norm": 3.2504826064327057, "learning_rate": 9.98317872015484e-06, "loss": 0.6113, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.947286097097674, "learning_rate": 9.983084404554674e-06, "loss": 0.6242, "step": 781 }, { "epoch": 0.06, "grad_norm": 2.225099355263732, "learning_rate": 9.982989825731106e-06, "loss": 0.6366, "step": 782 }, { "epoch": 0.06, "grad_norm": 2.0629680222217424, "learning_rate": 9.982894983689129e-06, "loss": 0.6226, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.7648291403420113, "learning_rate": 9.982799878433754e-06, "loss": 0.536, "step": 784 }, { "epoch": 0.06, "grad_norm": 1.725067037334444, "learning_rate": 9.982704509970007e-06, "loss": 0.5591, "step": 785 }, { "epoch": 0.06, "grad_norm": 2.9583644776020774, "learning_rate": 9.982608878302924e-06, "loss": 0.6604, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.8682453386288844, "learning_rate": 9.982512983437555e-06, "loss": 0.5625, "step": 787 }, { "epoch": 0.06, "grad_norm": 3.162372561126446, "learning_rate": 9.982416825378968e-06, "loss": 0.5895, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.1101950242741496, "learning_rate": 9.982320404132242e-06, "loss": 0.4596, "step": 789 }, { "epoch": 0.06, "grad_norm": 2.4882629568452423, "learning_rate": 9.982223719702469e-06, "loss": 0.5899, "step": 790 }, { "epoch": 0.06, "grad_norm": 3.6303379454036557, "learning_rate": 9.982126772094758e-06, "loss": 0.594, "step": 791 }, { "epoch": 0.06, "grad_norm": 3.672779172491735, "learning_rate": 9.982029561314227e-06, "loss": 0.5792, "step": 792 }, { "epoch": 0.06, "grad_norm": 1.9294736206741199, "learning_rate": 9.981932087366017e-06, "loss": 0.593, "step": 793 }, { "epoch": 0.06, "grad_norm": 0.9767696875647004, "learning_rate": 9.98183435025527e-06, "loss": 0.4484, "step": 794 }, { "epoch": 0.06, "grad_norm": 1.7538415628753308, "learning_rate": 9.981736349987153e-06, "loss": 0.5662, "step": 795 }, { "epoch": 0.06, "grad_norm": 1.687466774691323, "learning_rate": 9.98163808656684e-06, "loss": 0.6094, "step": 796 }, { "epoch": 0.06, "grad_norm": 2.0650637404171346, "learning_rate": 9.981539559999525e-06, "loss": 0.5608, "step": 797 }, { "epoch": 0.06, "grad_norm": 2.187483416040044, "learning_rate": 9.981440770290412e-06, "loss": 0.6347, "step": 798 }, { "epoch": 0.06, "grad_norm": 2.353329825927032, "learning_rate": 9.981341717444715e-06, "loss": 0.6329, "step": 799 }, { "epoch": 0.06, "grad_norm": 2.272046913463745, "learning_rate": 9.98124240146767e-06, "loss": 0.6088, "step": 800 }, { "epoch": 0.06, "grad_norm": 2.0699615660476876, "learning_rate": 9.981142822364524e-06, "loss": 0.6733, "step": 801 }, { "epoch": 0.06, "grad_norm": 1.8126499471437967, "learning_rate": 9.981042980140536e-06, "loss": 0.5618, "step": 802 }, { "epoch": 0.06, "grad_norm": 1.7641807829159009, "learning_rate": 9.980942874800979e-06, "loss": 0.6154, "step": 803 }, { "epoch": 0.06, "grad_norm": 2.160356581384707, "learning_rate": 9.98084250635114e-06, "loss": 0.6402, "step": 804 }, { "epoch": 0.06, "grad_norm": 1.8193463519286226, "learning_rate": 9.980741874796324e-06, "loss": 0.5563, "step": 805 }, { "epoch": 0.06, "grad_norm": 0.9709661037978785, "learning_rate": 9.980640980141844e-06, "loss": 0.4513, "step": 806 }, { "epoch": 0.06, "grad_norm": 1.7775913205613263, "learning_rate": 9.980539822393032e-06, "loss": 0.5215, "step": 807 }, { "epoch": 0.06, "grad_norm": 0.8514190717792197, "learning_rate": 9.980438401555231e-06, "loss": 0.482, "step": 808 }, { "epoch": 0.06, "grad_norm": 1.9522669354872004, "learning_rate": 9.980336717633796e-06, "loss": 0.6854, "step": 809 }, { "epoch": 0.06, "grad_norm": 2.2071671007448836, "learning_rate": 9.9802347706341e-06, "loss": 0.6123, "step": 810 }, { "epoch": 0.06, "grad_norm": 1.8343281883272198, "learning_rate": 9.980132560561528e-06, "loss": 0.7338, "step": 811 }, { "epoch": 0.06, "grad_norm": 1.9079408401876257, "learning_rate": 9.980030087421479e-06, "loss": 0.5817, "step": 812 }, { "epoch": 0.06, "grad_norm": 1.6192142427880043, "learning_rate": 9.979927351219367e-06, "loss": 0.5169, "step": 813 }, { "epoch": 0.06, "grad_norm": 2.5112111558540344, "learning_rate": 9.979824351960617e-06, "loss": 0.6184, "step": 814 }, { "epoch": 0.06, "grad_norm": 2.362258064545396, "learning_rate": 9.979721089650671e-06, "loss": 0.6368, "step": 815 }, { "epoch": 0.06, "grad_norm": 1.9615972940113384, "learning_rate": 9.979617564294986e-06, "loss": 0.6112, "step": 816 }, { "epoch": 0.06, "grad_norm": 1.7435099176695448, "learning_rate": 9.979513775899024e-06, "loss": 0.5284, "step": 817 }, { "epoch": 0.06, "grad_norm": 1.8021295795921246, "learning_rate": 9.979409724468274e-06, "loss": 0.5221, "step": 818 }, { "epoch": 0.06, "grad_norm": 1.6482342150238698, "learning_rate": 9.97930541000823e-06, "loss": 0.564, "step": 819 }, { "epoch": 0.06, "grad_norm": 2.451519029084754, "learning_rate": 9.9792008325244e-06, "loss": 0.6594, "step": 820 }, { "epoch": 0.06, "grad_norm": 2.1937804630356323, "learning_rate": 9.979095992022311e-06, "loss": 0.589, "step": 821 }, { "epoch": 0.06, "grad_norm": 1.790033522649253, "learning_rate": 9.978990888507502e-06, "loss": 0.6604, "step": 822 }, { "epoch": 0.06, "grad_norm": 1.9220007754551103, "learning_rate": 9.978885521985522e-06, "loss": 0.616, "step": 823 }, { "epoch": 0.06, "grad_norm": 2.171765350694403, "learning_rate": 9.978779892461936e-06, "loss": 0.5481, "step": 824 }, { "epoch": 0.06, "grad_norm": 1.9508105940602367, "learning_rate": 9.978673999942328e-06, "loss": 0.6372, "step": 825 }, { "epoch": 0.06, "grad_norm": 1.8989133394027413, "learning_rate": 9.97856784443229e-06, "loss": 0.6587, "step": 826 }, { "epoch": 0.06, "grad_norm": 2.4652631765140485, "learning_rate": 9.978461425937427e-06, "loss": 0.6225, "step": 827 }, { "epoch": 0.06, "grad_norm": 1.8014241908131035, "learning_rate": 9.978354744463361e-06, "loss": 0.6246, "step": 828 }, { "epoch": 0.06, "grad_norm": 1.7241092365717274, "learning_rate": 9.97824780001573e-06, "loss": 0.5554, "step": 829 }, { "epoch": 0.06, "grad_norm": 1.818362165932305, "learning_rate": 9.978140592600182e-06, "loss": 0.6395, "step": 830 }, { "epoch": 0.06, "grad_norm": 2.278539916827366, "learning_rate": 9.978033122222379e-06, "loss": 0.5357, "step": 831 }, { "epoch": 0.06, "grad_norm": 2.0268939508869095, "learning_rate": 9.977925388888e-06, "loss": 0.6488, "step": 832 }, { "epoch": 0.06, "grad_norm": 2.6293847185169166, "learning_rate": 9.977817392602733e-06, "loss": 0.7021, "step": 833 }, { "epoch": 0.06, "grad_norm": 2.0052066649871674, "learning_rate": 9.977709133372284e-06, "loss": 0.6196, "step": 834 }, { "epoch": 0.06, "grad_norm": 1.1784984272427252, "learning_rate": 9.977600611202372e-06, "loss": 0.4805, "step": 835 }, { "epoch": 0.06, "grad_norm": 1.84166732277992, "learning_rate": 9.97749182609873e-06, "loss": 0.5855, "step": 836 }, { "epoch": 0.06, "grad_norm": 1.9709655655215204, "learning_rate": 9.977382778067104e-06, "loss": 0.6126, "step": 837 }, { "epoch": 0.06, "grad_norm": 1.8229458479445437, "learning_rate": 9.977273467113254e-06, "loss": 0.6582, "step": 838 }, { "epoch": 0.06, "grad_norm": 1.166124119682141, "learning_rate": 9.977163893242952e-06, "loss": 0.4634, "step": 839 }, { "epoch": 0.06, "grad_norm": 1.9683453676183154, "learning_rate": 9.97705405646199e-06, "loss": 0.6159, "step": 840 }, { "epoch": 0.06, "grad_norm": 1.8277088811646793, "learning_rate": 9.976943956776168e-06, "loss": 0.566, "step": 841 }, { "epoch": 0.06, "grad_norm": 1.7589739881682507, "learning_rate": 9.976833594191301e-06, "loss": 0.6541, "step": 842 }, { "epoch": 0.06, "grad_norm": 2.465341610938836, "learning_rate": 9.976722968713222e-06, "loss": 0.6112, "step": 843 }, { "epoch": 0.06, "grad_norm": 2.2799024015877056, "learning_rate": 9.97661208034777e-06, "loss": 0.5758, "step": 844 }, { "epoch": 0.06, "grad_norm": 1.692080697305744, "learning_rate": 9.976500929100806e-06, "loss": 0.625, "step": 845 }, { "epoch": 0.06, "grad_norm": 1.6112302465168034, "learning_rate": 9.9763895149782e-06, "loss": 0.5957, "step": 846 }, { "epoch": 0.06, "grad_norm": 1.0915184156344058, "learning_rate": 9.976277837985838e-06, "loss": 0.4287, "step": 847 }, { "epoch": 0.06, "grad_norm": 1.6960835734776885, "learning_rate": 9.976165898129617e-06, "loss": 0.5701, "step": 848 }, { "epoch": 0.06, "grad_norm": 1.8209266226111924, "learning_rate": 9.976053695415453e-06, "loss": 0.5464, "step": 849 }, { "epoch": 0.06, "grad_norm": 1.8962231714399242, "learning_rate": 9.975941229849273e-06, "loss": 0.6345, "step": 850 }, { "epoch": 0.06, "grad_norm": 2.4347057278624904, "learning_rate": 9.975828501437015e-06, "loss": 0.6128, "step": 851 }, { "epoch": 0.06, "grad_norm": 0.9701608457378855, "learning_rate": 9.975715510184633e-06, "loss": 0.4677, "step": 852 }, { "epoch": 0.06, "grad_norm": 1.826048645610474, "learning_rate": 9.975602256098101e-06, "loss": 0.601, "step": 853 }, { "epoch": 0.06, "grad_norm": 1.7635323265430154, "learning_rate": 9.975488739183397e-06, "loss": 0.5631, "step": 854 }, { "epoch": 0.06, "grad_norm": 1.7170652814724954, "learning_rate": 9.975374959446518e-06, "loss": 0.5358, "step": 855 }, { "epoch": 0.06, "grad_norm": 1.82466761165953, "learning_rate": 9.975260916893477e-06, "loss": 0.5581, "step": 856 }, { "epoch": 0.06, "grad_norm": 1.6088892246849174, "learning_rate": 9.975146611530293e-06, "loss": 0.5251, "step": 857 }, { "epoch": 0.06, "grad_norm": 4.317816069250075, "learning_rate": 9.97503204336301e-06, "loss": 0.5963, "step": 858 }, { "epoch": 0.06, "grad_norm": 1.6900684818219267, "learning_rate": 9.974917212397674e-06, "loss": 0.6305, "step": 859 }, { "epoch": 0.06, "grad_norm": 2.025470447529022, "learning_rate": 9.974802118640354e-06, "loss": 0.5842, "step": 860 }, { "epoch": 0.06, "grad_norm": 2.021441255022368, "learning_rate": 9.97468676209713e-06, "loss": 0.5827, "step": 861 }, { "epoch": 0.06, "grad_norm": 1.8610307300944973, "learning_rate": 9.974571142774095e-06, "loss": 0.5934, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.0146787174815457, "learning_rate": 9.974455260677353e-06, "loss": 0.4821, "step": 863 }, { "epoch": 0.06, "grad_norm": 3.019263994741795, "learning_rate": 9.974339115813031e-06, "loss": 0.5763, "step": 864 }, { "epoch": 0.06, "grad_norm": 2.172402342565474, "learning_rate": 9.974222708187263e-06, "loss": 0.608, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.8419787566644, "learning_rate": 9.974106037806195e-06, "loss": 0.5085, "step": 866 }, { "epoch": 0.06, "grad_norm": 1.9829903508564541, "learning_rate": 9.97398910467599e-06, "loss": 0.5915, "step": 867 }, { "epoch": 0.06, "grad_norm": 2.083601631108778, "learning_rate": 9.973871908802828e-06, "loss": 0.6569, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.8609155361391914, "learning_rate": 9.973754450192899e-06, "loss": 0.5951, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.922397781312075, "learning_rate": 9.973636728852406e-06, "loss": 0.6317, "step": 870 }, { "epoch": 0.06, "grad_norm": 2.0937765887126445, "learning_rate": 9.973518744787564e-06, "loss": 0.5528, "step": 871 }, { "epoch": 0.06, "grad_norm": 0.9307070356280323, "learning_rate": 9.973400498004614e-06, "loss": 0.4379, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.9708208473918585, "learning_rate": 9.973281988509797e-06, "loss": 0.6242, "step": 873 }, { "epoch": 0.06, "grad_norm": 2.1716549774775755, "learning_rate": 9.973163216309372e-06, "loss": 0.575, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.9834120094353342, "learning_rate": 9.973044181409615e-06, "loss": 0.6476, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.881757462925116, "learning_rate": 9.972924883816813e-06, "loss": 0.6009, "step": 876 }, { "epoch": 0.06, "grad_norm": 2.147014728035417, "learning_rate": 9.97280532353727e-06, "loss": 0.5882, "step": 877 }, { "epoch": 0.06, "grad_norm": 2.1724960317691444, "learning_rate": 9.972685500577298e-06, "loss": 0.5682, "step": 878 }, { "epoch": 0.06, "grad_norm": 4.282556217063045, "learning_rate": 9.972565414943227e-06, "loss": 0.5298, "step": 879 }, { "epoch": 0.06, "grad_norm": 2.105894236389261, "learning_rate": 9.972445066641402e-06, "loss": 0.585, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.6996022889408693, "learning_rate": 9.972324455678183e-06, "loss": 0.602, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.814945086356654, "learning_rate": 9.972203582059934e-06, "loss": 0.6793, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.905743097274066, "learning_rate": 9.972082445793045e-06, "loss": 0.5951, "step": 883 }, { "epoch": 0.06, "grad_norm": 2.984656564542831, "learning_rate": 9.971961046883913e-06, "loss": 0.5825, "step": 884 }, { "epoch": 0.06, "grad_norm": 0.9527634299624462, "learning_rate": 9.971839385338952e-06, "loss": 0.4644, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.710910228263264, "learning_rate": 9.971717461164587e-06, "loss": 0.6145, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.9693978242875008, "learning_rate": 9.97159527436726e-06, "loss": 0.5932, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.5650670846721049, "learning_rate": 9.971472824953424e-06, "loss": 0.5868, "step": 888 }, { "epoch": 0.06, "grad_norm": 5.887828219587848, "learning_rate": 9.971350112929547e-06, "loss": 0.593, "step": 889 }, { "epoch": 0.06, "grad_norm": 2.4023258415443984, "learning_rate": 9.971227138302112e-06, "loss": 0.589, "step": 890 }, { "epoch": 0.06, "grad_norm": 5.939024582463684, "learning_rate": 9.971103901077617e-06, "loss": 0.5792, "step": 891 }, { "epoch": 0.06, "grad_norm": 2.0785454618638526, "learning_rate": 9.970980401262567e-06, "loss": 0.5687, "step": 892 }, { "epoch": 0.06, "grad_norm": 2.0946012688564672, "learning_rate": 9.97085663886349e-06, "loss": 0.6617, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.752209723044127, "learning_rate": 9.97073261388692e-06, "loss": 0.6015, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.9766817219219843, "learning_rate": 9.970608326339412e-06, "loss": 0.6951, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.9825613559009032, "learning_rate": 9.97048377622753e-06, "loss": 0.5272, "step": 896 }, { "epoch": 0.06, "grad_norm": 8.720347274979087, "learning_rate": 9.970358963557849e-06, "loss": 0.6007, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.7666897572156686, "learning_rate": 9.970233888336969e-06, "loss": 0.6003, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.823990328529463, "learning_rate": 9.970108550571492e-06, "loss": 0.6949, "step": 899 }, { "epoch": 0.06, "grad_norm": 2.0460248580961506, "learning_rate": 9.96998295026804e-06, "loss": 0.6285, "step": 900 }, { "epoch": 0.06, "grad_norm": 2.1444350798914584, "learning_rate": 9.96985708743325e-06, "loss": 0.5693, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.9432446655007625, "learning_rate": 9.969730962073766e-06, "loss": 0.5978, "step": 902 }, { "epoch": 0.06, "grad_norm": 2.407018149499689, "learning_rate": 9.969604574196255e-06, "loss": 0.6224, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.0037136435872804, "learning_rate": 9.96947792380739e-06, "loss": 0.4598, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.8326036091806832, "learning_rate": 9.969351010913863e-06, "loss": 0.7125, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.830851819927731, "learning_rate": 9.969223835522377e-06, "loss": 0.6518, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.7779405420608956, "learning_rate": 9.969096397639647e-06, "loss": 0.6539, "step": 907 }, { "epoch": 0.06, "grad_norm": 2.153927525701028, "learning_rate": 9.968968697272413e-06, "loss": 0.5624, "step": 908 }, { "epoch": 0.06, "grad_norm": 0.9874270094416919, "learning_rate": 9.968840734427411e-06, "loss": 0.457, "step": 909 }, { "epoch": 0.06, "grad_norm": 2.006708477860132, "learning_rate": 9.968712509111405e-06, "loss": 0.5349, "step": 910 }, { "epoch": 0.06, "grad_norm": 1.6891536424160458, "learning_rate": 9.968584021331169e-06, "loss": 0.6229, "step": 911 }, { "epoch": 0.06, "grad_norm": 2.2400003935522346, "learning_rate": 9.96845527109349e-06, "loss": 0.595, "step": 912 }, { "epoch": 0.06, "grad_norm": 1.8197738409863462, "learning_rate": 9.968326258405168e-06, "loss": 0.5783, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.7945323729979659, "learning_rate": 9.968196983273018e-06, "loss": 0.5785, "step": 914 }, { "epoch": 0.06, "grad_norm": 0.8985331650990965, "learning_rate": 9.968067445703866e-06, "loss": 0.4501, "step": 915 }, { "epoch": 0.07, "grad_norm": 2.2284875187402653, "learning_rate": 9.96793764570456e-06, "loss": 0.5747, "step": 916 }, { "epoch": 0.07, "grad_norm": 2.7289751817573573, "learning_rate": 9.967807583281955e-06, "loss": 0.6883, "step": 917 }, { "epoch": 0.07, "grad_norm": 0.9617652798852775, "learning_rate": 9.967677258442918e-06, "loss": 0.4677, "step": 918 }, { "epoch": 0.07, "grad_norm": 1.869470988391731, "learning_rate": 9.967546671194335e-06, "loss": 0.576, "step": 919 }, { "epoch": 0.07, "grad_norm": 1.9119344849532227, "learning_rate": 9.967415821543107e-06, "loss": 0.5712, "step": 920 }, { "epoch": 0.07, "grad_norm": 2.911043116077983, "learning_rate": 9.967284709496142e-06, "loss": 0.6001, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.7565667783594785, "learning_rate": 9.967153335060367e-06, "loss": 0.6843, "step": 922 }, { "epoch": 0.07, "grad_norm": 3.232203497547063, "learning_rate": 9.96702169824272e-06, "loss": 0.6291, "step": 923 }, { "epoch": 0.07, "grad_norm": 1.9603634692549758, "learning_rate": 9.96688979905016e-06, "loss": 0.5934, "step": 924 }, { "epoch": 0.07, "grad_norm": 1.6212481980922946, "learning_rate": 9.966757637489647e-06, "loss": 0.6018, "step": 925 }, { "epoch": 0.07, "grad_norm": 2.1877457261719644, "learning_rate": 9.966625213568169e-06, "loss": 0.5791, "step": 926 }, { "epoch": 0.07, "grad_norm": 1.7952240870885958, "learning_rate": 9.966492527292718e-06, "loss": 0.6318, "step": 927 }, { "epoch": 0.07, "grad_norm": 0.9023042009864433, "learning_rate": 9.966359578670302e-06, "loss": 0.4718, "step": 928 }, { "epoch": 0.07, "grad_norm": 1.886150086637256, "learning_rate": 9.966226367707943e-06, "loss": 0.5645, "step": 929 }, { "epoch": 0.07, "grad_norm": 1.8619355750875033, "learning_rate": 9.966092894412683e-06, "loss": 0.6124, "step": 930 }, { "epoch": 0.07, "grad_norm": 2.3138410422118505, "learning_rate": 9.965959158791564e-06, "loss": 0.6055, "step": 931 }, { "epoch": 0.07, "grad_norm": 1.717862802490277, "learning_rate": 9.96582516085166e-06, "loss": 0.6625, "step": 932 }, { "epoch": 0.07, "grad_norm": 0.8584968622231486, "learning_rate": 9.965690900600042e-06, "loss": 0.4908, "step": 933 }, { "epoch": 0.07, "grad_norm": 1.909652169596075, "learning_rate": 9.965556378043806e-06, "loss": 0.6712, "step": 934 }, { "epoch": 0.07, "grad_norm": 1.6739853923745158, "learning_rate": 9.965421593190055e-06, "loss": 0.6092, "step": 935 }, { "epoch": 0.07, "grad_norm": 3.0279753874320314, "learning_rate": 9.965286546045911e-06, "loss": 0.5493, "step": 936 }, { "epoch": 0.07, "grad_norm": 1.979034693393833, "learning_rate": 9.965151236618506e-06, "loss": 0.6632, "step": 937 }, { "epoch": 0.07, "grad_norm": 0.9898820613403267, "learning_rate": 9.96501566491499e-06, "loss": 0.4634, "step": 938 }, { "epoch": 0.07, "grad_norm": 1.8261873933090123, "learning_rate": 9.964879830942522e-06, "loss": 0.6541, "step": 939 }, { "epoch": 0.07, "grad_norm": 2.0686272120013807, "learning_rate": 9.96474373470828e-06, "loss": 0.5171, "step": 940 }, { "epoch": 0.07, "grad_norm": 1.6688511068608667, "learning_rate": 9.964607376219448e-06, "loss": 0.5622, "step": 941 }, { "epoch": 0.07, "grad_norm": 1.871077513019353, "learning_rate": 9.964470755483233e-06, "loss": 0.6285, "step": 942 }, { "epoch": 0.07, "grad_norm": 2.052486629540008, "learning_rate": 9.964333872506852e-06, "loss": 0.6266, "step": 943 }, { "epoch": 0.07, "grad_norm": 1.786169561760334, "learning_rate": 9.964196727297533e-06, "loss": 0.6136, "step": 944 }, { "epoch": 0.07, "grad_norm": 0.9488176618114591, "learning_rate": 9.964059319862522e-06, "loss": 0.4556, "step": 945 }, { "epoch": 0.07, "grad_norm": 1.713882689591305, "learning_rate": 9.963921650209079e-06, "loss": 0.5916, "step": 946 }, { "epoch": 0.07, "grad_norm": 8.156210591485637, "learning_rate": 9.963783718344471e-06, "loss": 0.5972, "step": 947 }, { "epoch": 0.07, "grad_norm": 1.6735671467686146, "learning_rate": 9.963645524275991e-06, "loss": 0.584, "step": 948 }, { "epoch": 0.07, "grad_norm": 2.276830798852272, "learning_rate": 9.963507068010933e-06, "loss": 0.6418, "step": 949 }, { "epoch": 0.07, "grad_norm": 2.3441157082821746, "learning_rate": 9.963368349556614e-06, "loss": 0.5468, "step": 950 }, { "epoch": 0.07, "grad_norm": 2.342859980279433, "learning_rate": 9.96322936892036e-06, "loss": 0.6946, "step": 951 }, { "epoch": 0.07, "grad_norm": 1.700710644353618, "learning_rate": 9.963090126109513e-06, "loss": 0.5941, "step": 952 }, { "epoch": 0.07, "grad_norm": 1.9769494657080284, "learning_rate": 9.962950621131428e-06, "loss": 0.5949, "step": 953 }, { "epoch": 0.07, "grad_norm": 2.876909040872116, "learning_rate": 9.962810853993476e-06, "loss": 0.5814, "step": 954 }, { "epoch": 0.07, "grad_norm": 1.6465083760938293, "learning_rate": 9.962670824703036e-06, "loss": 0.551, "step": 955 }, { "epoch": 0.07, "grad_norm": 2.1420842510858478, "learning_rate": 9.962530533267509e-06, "loss": 0.6299, "step": 956 }, { "epoch": 0.07, "grad_norm": 2.0857649474567412, "learning_rate": 9.962389979694305e-06, "loss": 0.5453, "step": 957 }, { "epoch": 0.07, "grad_norm": 2.0686946954752052, "learning_rate": 9.962249163990845e-06, "loss": 0.5879, "step": 958 }, { "epoch": 0.07, "grad_norm": 2.2381282580812614, "learning_rate": 9.96210808616457e-06, "loss": 0.5538, "step": 959 }, { "epoch": 0.07, "grad_norm": 1.8980944721668438, "learning_rate": 9.961966746222932e-06, "loss": 0.565, "step": 960 }, { "epoch": 0.07, "grad_norm": 1.1146849728949642, "learning_rate": 9.961825144173398e-06, "loss": 0.4633, "step": 961 }, { "epoch": 0.07, "grad_norm": 1.930090766725209, "learning_rate": 9.961683280023446e-06, "loss": 0.5319, "step": 962 }, { "epoch": 0.07, "grad_norm": 1.9261510435773344, "learning_rate": 9.961541153780571e-06, "loss": 0.5924, "step": 963 }, { "epoch": 0.07, "grad_norm": 2.696804409059998, "learning_rate": 9.961398765452282e-06, "loss": 0.5689, "step": 964 }, { "epoch": 0.07, "grad_norm": 1.667347481743653, "learning_rate": 9.961256115046096e-06, "loss": 0.5659, "step": 965 }, { "epoch": 0.07, "grad_norm": 1.7467911315034046, "learning_rate": 9.961113202569553e-06, "loss": 0.6208, "step": 966 }, { "epoch": 0.07, "grad_norm": 1.9166040700094245, "learning_rate": 9.960970028030199e-06, "loss": 0.5923, "step": 967 }, { "epoch": 0.07, "grad_norm": 1.631080649964886, "learning_rate": 9.960826591435598e-06, "loss": 0.5493, "step": 968 }, { "epoch": 0.07, "grad_norm": 7.753504265636464, "learning_rate": 9.960682892793328e-06, "loss": 0.6408, "step": 969 }, { "epoch": 0.07, "grad_norm": 1.9917995419167682, "learning_rate": 9.960538932110977e-06, "loss": 0.6085, "step": 970 }, { "epoch": 0.07, "grad_norm": 1.5972018439183042, "learning_rate": 9.960394709396152e-06, "loss": 0.5051, "step": 971 }, { "epoch": 0.07, "grad_norm": 1.9839178919532483, "learning_rate": 9.960250224656468e-06, "loss": 0.7072, "step": 972 }, { "epoch": 0.07, "grad_norm": 1.7956244745958974, "learning_rate": 9.960105477899565e-06, "loss": 0.6235, "step": 973 }, { "epoch": 0.07, "grad_norm": 1.9516011221943101, "learning_rate": 9.959960469133079e-06, "loss": 0.5534, "step": 974 }, { "epoch": 0.07, "grad_norm": 2.2355846997661404, "learning_rate": 9.959815198364676e-06, "loss": 0.5872, "step": 975 }, { "epoch": 0.07, "grad_norm": 1.9017082237600156, "learning_rate": 9.959669665602026e-06, "loss": 0.557, "step": 976 }, { "epoch": 0.07, "grad_norm": 1.8077601025295578, "learning_rate": 9.959523870852821e-06, "loss": 0.6623, "step": 977 }, { "epoch": 0.07, "grad_norm": 1.7877833045022467, "learning_rate": 9.959377814124759e-06, "loss": 0.5579, "step": 978 }, { "epoch": 0.07, "grad_norm": 2.1833227795127934, "learning_rate": 9.959231495425557e-06, "loss": 0.6484, "step": 979 }, { "epoch": 0.07, "grad_norm": 1.9092953043499936, "learning_rate": 9.959084914762942e-06, "loss": 0.6368, "step": 980 }, { "epoch": 0.07, "grad_norm": 1.8781677243250605, "learning_rate": 9.958938072144659e-06, "loss": 0.5918, "step": 981 }, { "epoch": 0.07, "grad_norm": 1.6665327415494193, "learning_rate": 9.958790967578463e-06, "loss": 0.5418, "step": 982 }, { "epoch": 0.07, "grad_norm": 1.8307992478320423, "learning_rate": 9.958643601072126e-06, "loss": 0.553, "step": 983 }, { "epoch": 0.07, "grad_norm": 1.9220979981484367, "learning_rate": 9.958495972633431e-06, "loss": 0.5941, "step": 984 }, { "epoch": 0.07, "grad_norm": 1.9457239907818042, "learning_rate": 9.958348082270178e-06, "loss": 0.6743, "step": 985 }, { "epoch": 0.07, "grad_norm": 2.764260761611482, "learning_rate": 9.958199929990178e-06, "loss": 0.6568, "step": 986 }, { "epoch": 0.07, "grad_norm": 1.7856321916081361, "learning_rate": 9.958051515801257e-06, "loss": 0.6022, "step": 987 }, { "epoch": 0.07, "grad_norm": 1.8732212657590062, "learning_rate": 9.957902839711254e-06, "loss": 0.6485, "step": 988 }, { "epoch": 0.07, "grad_norm": 1.8838482369742178, "learning_rate": 9.957753901728024e-06, "loss": 0.5916, "step": 989 }, { "epoch": 0.07, "grad_norm": 2.4272359058088626, "learning_rate": 9.957604701859434e-06, "loss": 0.6082, "step": 990 }, { "epoch": 0.07, "grad_norm": 2.1391578458822056, "learning_rate": 9.957455240113365e-06, "loss": 0.5857, "step": 991 }, { "epoch": 0.07, "grad_norm": 1.8038855192382879, "learning_rate": 9.957305516497712e-06, "loss": 0.5782, "step": 992 }, { "epoch": 0.07, "grad_norm": 2.309851224451441, "learning_rate": 9.957155531020385e-06, "loss": 0.6199, "step": 993 }, { "epoch": 0.07, "grad_norm": 1.8265647357635815, "learning_rate": 9.957005283689303e-06, "loss": 0.5406, "step": 994 }, { "epoch": 0.07, "grad_norm": 1.9328246685906743, "learning_rate": 9.956854774512407e-06, "loss": 0.5896, "step": 995 }, { "epoch": 0.07, "grad_norm": 2.1314023843884473, "learning_rate": 9.956704003497646e-06, "loss": 0.6049, "step": 996 }, { "epoch": 0.07, "grad_norm": 1.9500532067357477, "learning_rate": 9.956552970652984e-06, "loss": 0.6205, "step": 997 }, { "epoch": 0.07, "grad_norm": 1.7889198911358128, "learning_rate": 9.956401675986398e-06, "loss": 0.5817, "step": 998 }, { "epoch": 0.07, "grad_norm": 2.177320840210121, "learning_rate": 9.956250119505881e-06, "loss": 0.6471, "step": 999 }, { "epoch": 0.07, "grad_norm": 2.085154339503883, "learning_rate": 9.956098301219438e-06, "loss": 0.5975, "step": 1000 }, { "epoch": 0.07, "grad_norm": 1.905296200571165, "learning_rate": 9.955946221135091e-06, "loss": 0.5691, "step": 1001 }, { "epoch": 0.07, "grad_norm": 3.0593920515931994, "learning_rate": 9.955793879260872e-06, "loss": 0.6006, "step": 1002 }, { "epoch": 0.07, "grad_norm": 0.9195687243340704, "learning_rate": 9.955641275604825e-06, "loss": 0.4678, "step": 1003 }, { "epoch": 0.07, "grad_norm": 2.264031790407994, "learning_rate": 9.955488410175016e-06, "loss": 0.5508, "step": 1004 }, { "epoch": 0.07, "grad_norm": 1.621076090187733, "learning_rate": 9.955335282979517e-06, "loss": 0.5568, "step": 1005 }, { "epoch": 0.07, "grad_norm": 0.9371986024203656, "learning_rate": 9.955181894026415e-06, "loss": 0.4701, "step": 1006 }, { "epoch": 0.07, "grad_norm": 1.6427943191551786, "learning_rate": 9.95502824332382e-06, "loss": 0.5871, "step": 1007 }, { "epoch": 0.07, "grad_norm": 1.7513905841246618, "learning_rate": 9.954874330879839e-06, "loss": 0.5945, "step": 1008 }, { "epoch": 0.07, "grad_norm": 1.7599051316341732, "learning_rate": 9.95472015670261e-06, "loss": 0.5607, "step": 1009 }, { "epoch": 0.07, "grad_norm": 1.634660555100661, "learning_rate": 9.95456572080027e-06, "loss": 0.518, "step": 1010 }, { "epoch": 0.07, "grad_norm": 1.7147215798274211, "learning_rate": 9.95441102318098e-06, "loss": 0.5868, "step": 1011 }, { "epoch": 0.07, "grad_norm": 2.819909120965664, "learning_rate": 9.954256063852913e-06, "loss": 0.6384, "step": 1012 }, { "epoch": 0.07, "grad_norm": 1.8027608347820943, "learning_rate": 9.954100842824256e-06, "loss": 0.6005, "step": 1013 }, { "epoch": 0.07, "grad_norm": 1.7472294862032016, "learning_rate": 9.953945360103202e-06, "loss": 0.5864, "step": 1014 }, { "epoch": 0.07, "grad_norm": 2.4085909224303546, "learning_rate": 9.95378961569797e-06, "loss": 0.5914, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.8637468950007319, "learning_rate": 9.953633609616786e-06, "loss": 0.6092, "step": 1016 }, { "epoch": 0.07, "grad_norm": 1.0136157150020422, "learning_rate": 9.953477341867887e-06, "loss": 0.4541, "step": 1017 }, { "epoch": 0.07, "grad_norm": 2.0554482270108427, "learning_rate": 9.95332081245953e-06, "loss": 0.5952, "step": 1018 }, { "epoch": 0.07, "grad_norm": 0.8286010717785349, "learning_rate": 9.953164021399986e-06, "loss": 0.4569, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.7710412966403686, "learning_rate": 9.953006968697532e-06, "loss": 0.5974, "step": 1020 }, { "epoch": 0.07, "grad_norm": 2.3860842730220106, "learning_rate": 9.952849654360468e-06, "loss": 0.557, "step": 1021 }, { "epoch": 0.07, "grad_norm": 2.12677653936814, "learning_rate": 9.952692078397104e-06, "loss": 0.5814, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.9015927872337524, "learning_rate": 9.952534240815761e-06, "loss": 0.5362, "step": 1023 }, { "epoch": 0.07, "grad_norm": 2.436318903492822, "learning_rate": 9.952376141624777e-06, "loss": 0.6558, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.746096871488504, "learning_rate": 9.952217780832505e-06, "loss": 0.6069, "step": 1025 }, { "epoch": 0.07, "grad_norm": 2.6366731461385875, "learning_rate": 9.952059158447312e-06, "loss": 0.5427, "step": 1026 }, { "epoch": 0.07, "grad_norm": 5.83223506274411, "learning_rate": 9.951900274477571e-06, "loss": 0.6497, "step": 1027 }, { "epoch": 0.07, "grad_norm": 4.046706841433947, "learning_rate": 9.95174112893168e-06, "loss": 0.6756, "step": 1028 }, { "epoch": 0.07, "grad_norm": 2.407595421247788, "learning_rate": 9.951581721818041e-06, "loss": 0.5719, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.909742886923054, "learning_rate": 9.951422053145081e-06, "loss": 0.632, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.2548387890304005, "learning_rate": 9.951262122921228e-06, "loss": 0.4737, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.7144660024606917, "learning_rate": 9.951101931154933e-06, "loss": 0.5959, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.192050099722398, "learning_rate": 9.950941477854659e-06, "loss": 0.4719, "step": 1033 }, { "epoch": 0.07, "grad_norm": 2.0154896917261853, "learning_rate": 9.950780763028878e-06, "loss": 0.5896, "step": 1034 }, { "epoch": 0.07, "grad_norm": 0.9253854303917076, "learning_rate": 9.950619786686083e-06, "loss": 0.4625, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.7669939912916404, "learning_rate": 9.950458548834775e-06, "loss": 0.6043, "step": 1036 }, { "epoch": 0.07, "grad_norm": 2.106498345073158, "learning_rate": 9.950297049483472e-06, "loss": 0.5116, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.559036732778817, "learning_rate": 9.950135288640706e-06, "loss": 0.5963, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.8934992065845806, "learning_rate": 9.949973266315018e-06, "loss": 0.6238, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.7070859822507563, "learning_rate": 9.949810982514971e-06, "loss": 0.6488, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.9164769488059537, "learning_rate": 9.949648437249138e-06, "loss": 0.5585, "step": 1041 }, { "epoch": 0.07, "grad_norm": 2.215174184219762, "learning_rate": 9.949485630526099e-06, "loss": 0.6298, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.715485794951897, "learning_rate": 9.94932256235446e-06, "loss": 0.6212, "step": 1043 }, { "epoch": 0.07, "grad_norm": 2.1331970908573354, "learning_rate": 9.949159232742832e-06, "loss": 0.526, "step": 1044 }, { "epoch": 0.07, "grad_norm": 2.106084618614398, "learning_rate": 9.948995641699844e-06, "loss": 0.7176, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.7439403995469547, "learning_rate": 9.948831789234135e-06, "loss": 0.566, "step": 1046 }, { "epoch": 0.07, "grad_norm": 2.0162970016182307, "learning_rate": 9.948667675354365e-06, "loss": 0.5867, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.1698415200384558, "learning_rate": 9.948503300069197e-06, "loss": 0.4817, "step": 1048 }, { "epoch": 0.07, "grad_norm": 1.9293637615841233, "learning_rate": 9.948338663387319e-06, "loss": 0.6291, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.741904619623014, "learning_rate": 9.948173765317424e-06, "loss": 0.6026, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.9159397491325307, "learning_rate": 9.948008605868224e-06, "loss": 0.5795, "step": 1051 }, { "epoch": 0.07, "grad_norm": 1.4563528745157615, "learning_rate": 9.947843185048443e-06, "loss": 0.4874, "step": 1052 }, { "epoch": 0.07, "grad_norm": 2.6336049463940436, "learning_rate": 9.94767750286682e-06, "loss": 0.5718, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.9704737575128997, "learning_rate": 9.947511559332107e-06, "loss": 0.6471, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.6352919901322311, "learning_rate": 9.947345354453068e-06, "loss": 0.6399, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.6996076903204365, "learning_rate": 9.947178888238485e-06, "loss": 0.6229, "step": 1056 }, { "epoch": 0.08, "grad_norm": 1.6331791209378932, "learning_rate": 9.947012160697149e-06, "loss": 0.5935, "step": 1057 }, { "epoch": 0.08, "grad_norm": 2.0980869735292678, "learning_rate": 9.946845171837866e-06, "loss": 0.5947, "step": 1058 }, { "epoch": 0.08, "grad_norm": 1.693997810089867, "learning_rate": 9.946677921669462e-06, "loss": 0.5849, "step": 1059 }, { "epoch": 0.08, "grad_norm": 1.802331373003337, "learning_rate": 9.946510410200768e-06, "loss": 0.6111, "step": 1060 }, { "epoch": 0.08, "grad_norm": 0.9928957996677363, "learning_rate": 9.94634263744063e-06, "loss": 0.4903, "step": 1061 }, { "epoch": 0.08, "grad_norm": 1.82141531879438, "learning_rate": 9.946174603397915e-06, "loss": 0.622, "step": 1062 }, { "epoch": 0.08, "grad_norm": 1.7977843041525552, "learning_rate": 9.9460063080815e-06, "loss": 0.6069, "step": 1063 }, { "epoch": 0.08, "grad_norm": 2.0142252920841166, "learning_rate": 9.945837751500269e-06, "loss": 0.5752, "step": 1064 }, { "epoch": 0.08, "grad_norm": 1.6331377771756355, "learning_rate": 9.945668933663131e-06, "loss": 0.5668, "step": 1065 }, { "epoch": 0.08, "grad_norm": 1.7979694804994537, "learning_rate": 9.945499854579002e-06, "loss": 0.5581, "step": 1066 }, { "epoch": 0.08, "grad_norm": 1.6909355623185462, "learning_rate": 9.945330514256811e-06, "loss": 0.5939, "step": 1067 }, { "epoch": 0.08, "grad_norm": 2.0428229522539514, "learning_rate": 9.945160912705507e-06, "loss": 0.59, "step": 1068 }, { "epoch": 0.08, "grad_norm": 0.8589318987832768, "learning_rate": 9.944991049934047e-06, "loss": 0.4554, "step": 1069 }, { "epoch": 0.08, "grad_norm": 2.1013614058080283, "learning_rate": 9.944820925951403e-06, "loss": 0.6036, "step": 1070 }, { "epoch": 0.08, "grad_norm": 1.8467886516297793, "learning_rate": 9.944650540766564e-06, "loss": 0.5506, "step": 1071 }, { "epoch": 0.08, "grad_norm": 1.7721712961746898, "learning_rate": 9.944479894388525e-06, "loss": 0.6259, "step": 1072 }, { "epoch": 0.08, "grad_norm": 1.7833551363556575, "learning_rate": 9.944308986826307e-06, "loss": 0.5991, "step": 1073 }, { "epoch": 0.08, "grad_norm": 1.8629899170228683, "learning_rate": 9.944137818088935e-06, "loss": 0.6456, "step": 1074 }, { "epoch": 0.08, "grad_norm": 2.1191424301878587, "learning_rate": 9.943966388185449e-06, "loss": 0.5331, "step": 1075 }, { "epoch": 0.08, "grad_norm": 1.9581236917185463, "learning_rate": 9.943794697124907e-06, "loss": 0.5616, "step": 1076 }, { "epoch": 0.08, "grad_norm": 1.9378629380033925, "learning_rate": 9.943622744916377e-06, "loss": 0.5834, "step": 1077 }, { "epoch": 0.08, "grad_norm": 1.8417425553959619, "learning_rate": 9.943450531568942e-06, "loss": 0.6063, "step": 1078 }, { "epoch": 0.08, "grad_norm": 3.3334523100909927, "learning_rate": 9.9432780570917e-06, "loss": 0.6144, "step": 1079 }, { "epoch": 0.08, "grad_norm": 1.954724376318318, "learning_rate": 9.943105321493757e-06, "loss": 0.6136, "step": 1080 }, { "epoch": 0.08, "grad_norm": 1.6835267446353195, "learning_rate": 9.942932324784245e-06, "loss": 0.6153, "step": 1081 }, { "epoch": 0.08, "grad_norm": 1.7712250919107193, "learning_rate": 9.942759066972298e-06, "loss": 0.5426, "step": 1082 }, { "epoch": 0.08, "grad_norm": 2.4004138739848844, "learning_rate": 9.942585548067069e-06, "loss": 0.6353, "step": 1083 }, { "epoch": 0.08, "grad_norm": 1.7704680960902197, "learning_rate": 9.942411768077725e-06, "loss": 0.5702, "step": 1084 }, { "epoch": 0.08, "grad_norm": 2.7530620945215216, "learning_rate": 9.942237727013441e-06, "loss": 0.5896, "step": 1085 }, { "epoch": 0.08, "grad_norm": 1.7604046709983363, "learning_rate": 9.942063424883415e-06, "loss": 0.5988, "step": 1086 }, { "epoch": 0.08, "grad_norm": 2.0994886305309226, "learning_rate": 9.941888861696854e-06, "loss": 0.5625, "step": 1087 }, { "epoch": 0.08, "grad_norm": 2.0815867359440117, "learning_rate": 9.941714037462978e-06, "loss": 0.5451, "step": 1088 }, { "epoch": 0.08, "grad_norm": 6.017174916020509, "learning_rate": 9.94153895219102e-06, "loss": 0.5847, "step": 1089 }, { "epoch": 0.08, "grad_norm": 1.7964656136664965, "learning_rate": 9.941363605890231e-06, "loss": 0.5787, "step": 1090 }, { "epoch": 0.08, "grad_norm": 1.8878433405503776, "learning_rate": 9.941187998569874e-06, "loss": 0.6249, "step": 1091 }, { "epoch": 0.08, "grad_norm": 1.7354164545495139, "learning_rate": 9.941012130239223e-06, "loss": 0.6023, "step": 1092 }, { "epoch": 0.08, "grad_norm": 1.7835679311166261, "learning_rate": 9.940836000907567e-06, "loss": 0.521, "step": 1093 }, { "epoch": 0.08, "grad_norm": 1.6435070523996826, "learning_rate": 9.940659610584214e-06, "loss": 0.6245, "step": 1094 }, { "epoch": 0.08, "grad_norm": 1.7810198784808215, "learning_rate": 9.940482959278479e-06, "loss": 0.534, "step": 1095 }, { "epoch": 0.08, "grad_norm": 1.717099991623071, "learning_rate": 9.940306046999693e-06, "loss": 0.5665, "step": 1096 }, { "epoch": 0.08, "grad_norm": 1.5353589015000866, "learning_rate": 9.940128873757199e-06, "loss": 0.5935, "step": 1097 }, { "epoch": 0.08, "grad_norm": 2.360983253856564, "learning_rate": 9.939951439560363e-06, "loss": 0.6579, "step": 1098 }, { "epoch": 0.08, "grad_norm": 1.7229907418353716, "learning_rate": 9.93977374441855e-06, "loss": 0.6084, "step": 1099 }, { "epoch": 0.08, "grad_norm": 1.8055796260521704, "learning_rate": 9.93959578834115e-06, "loss": 0.5497, "step": 1100 }, { "epoch": 0.08, "grad_norm": 2.7722259243748724, "learning_rate": 9.939417571337563e-06, "loss": 0.5765, "step": 1101 }, { "epoch": 0.08, "grad_norm": 2.209330304793147, "learning_rate": 9.939239093417205e-06, "loss": 0.613, "step": 1102 }, { "epoch": 0.08, "grad_norm": 0.9045863058371468, "learning_rate": 9.9390603545895e-06, "loss": 0.4747, "step": 1103 }, { "epoch": 0.08, "grad_norm": 1.7971246380651944, "learning_rate": 9.938881354863891e-06, "loss": 0.5644, "step": 1104 }, { "epoch": 0.08, "grad_norm": 1.7226699472964218, "learning_rate": 9.938702094249833e-06, "loss": 0.5734, "step": 1105 }, { "epoch": 0.08, "grad_norm": 1.5528802933881127, "learning_rate": 9.938522572756798e-06, "loss": 0.5556, "step": 1106 }, { "epoch": 0.08, "grad_norm": 1.738750246448217, "learning_rate": 9.938342790394264e-06, "loss": 0.6161, "step": 1107 }, { "epoch": 0.08, "grad_norm": 2.436858321203428, "learning_rate": 9.93816274717173e-06, "loss": 0.5683, "step": 1108 }, { "epoch": 0.08, "grad_norm": 1.6188958672116516, "learning_rate": 9.937982443098711e-06, "loss": 0.5591, "step": 1109 }, { "epoch": 0.08, "grad_norm": 2.068851672463039, "learning_rate": 9.937801878184725e-06, "loss": 0.6161, "step": 1110 }, { "epoch": 0.08, "grad_norm": 1.5476561188707283, "learning_rate": 9.93762105243931e-06, "loss": 0.5375, "step": 1111 }, { "epoch": 0.08, "grad_norm": 2.233636640510574, "learning_rate": 9.937439965872023e-06, "loss": 0.6375, "step": 1112 }, { "epoch": 0.08, "grad_norm": 2.2153100538571717, "learning_rate": 9.937258618492425e-06, "loss": 0.6251, "step": 1113 }, { "epoch": 0.08, "grad_norm": 2.3580833559884424, "learning_rate": 9.937077010310099e-06, "loss": 0.6024, "step": 1114 }, { "epoch": 0.08, "grad_norm": 1.9381800937110203, "learning_rate": 9.936895141334634e-06, "loss": 0.6607, "step": 1115 }, { "epoch": 0.08, "grad_norm": 1.9912594348204549, "learning_rate": 9.93671301157564e-06, "loss": 0.5838, "step": 1116 }, { "epoch": 0.08, "grad_norm": 1.65084905388092, "learning_rate": 9.936530621042737e-06, "loss": 0.6092, "step": 1117 }, { "epoch": 0.08, "grad_norm": 1.683911459481241, "learning_rate": 9.93634796974556e-06, "loss": 0.5961, "step": 1118 }, { "epoch": 0.08, "grad_norm": 1.9346497166748984, "learning_rate": 9.936165057693756e-06, "loss": 0.5546, "step": 1119 }, { "epoch": 0.08, "grad_norm": 1.846504101914356, "learning_rate": 9.935981884896988e-06, "loss": 0.6791, "step": 1120 }, { "epoch": 0.08, "grad_norm": 1.7724285502998458, "learning_rate": 9.935798451364932e-06, "loss": 0.4809, "step": 1121 }, { "epoch": 0.08, "grad_norm": 1.8488035885343617, "learning_rate": 9.935614757107274e-06, "loss": 0.5465, "step": 1122 }, { "epoch": 0.08, "grad_norm": 2.872094531405468, "learning_rate": 9.935430802133723e-06, "loss": 0.5425, "step": 1123 }, { "epoch": 0.08, "grad_norm": 1.6113517570992404, "learning_rate": 9.935246586453993e-06, "loss": 0.5818, "step": 1124 }, { "epoch": 0.08, "grad_norm": 2.22660263791639, "learning_rate": 9.935062110077815e-06, "loss": 0.5327, "step": 1125 }, { "epoch": 0.08, "grad_norm": 2.1927501725989402, "learning_rate": 9.934877373014935e-06, "loss": 0.599, "step": 1126 }, { "epoch": 0.08, "grad_norm": 2.0535723529000967, "learning_rate": 9.93469237527511e-06, "loss": 0.5966, "step": 1127 }, { "epoch": 0.08, "grad_norm": 0.9977837344199024, "learning_rate": 9.934507116868113e-06, "loss": 0.4613, "step": 1128 }, { "epoch": 0.08, "grad_norm": 1.5215586819981877, "learning_rate": 9.93432159780373e-06, "loss": 0.5781, "step": 1129 }, { "epoch": 0.08, "grad_norm": 1.825181799742569, "learning_rate": 9.934135818091759e-06, "loss": 0.611, "step": 1130 }, { "epoch": 0.08, "grad_norm": 1.866276369445615, "learning_rate": 9.933949777742016e-06, "loss": 0.5768, "step": 1131 }, { "epoch": 0.08, "grad_norm": 1.8979232346918364, "learning_rate": 9.933763476764326e-06, "loss": 0.5588, "step": 1132 }, { "epoch": 0.08, "grad_norm": 4.478385878159797, "learning_rate": 9.933576915168532e-06, "loss": 0.5766, "step": 1133 }, { "epoch": 0.08, "grad_norm": 2.295132543701005, "learning_rate": 9.933390092964488e-06, "loss": 0.6108, "step": 1134 }, { "epoch": 0.08, "grad_norm": 1.9107164350340187, "learning_rate": 9.933203010162064e-06, "loss": 0.573, "step": 1135 }, { "epoch": 0.08, "grad_norm": 1.735183249874809, "learning_rate": 9.93301566677114e-06, "loss": 0.584, "step": 1136 }, { "epoch": 0.08, "grad_norm": 1.8079158541631022, "learning_rate": 9.932828062801614e-06, "loss": 0.5957, "step": 1137 }, { "epoch": 0.08, "grad_norm": 1.963365070271999, "learning_rate": 9.932640198263394e-06, "loss": 0.5687, "step": 1138 }, { "epoch": 0.08, "grad_norm": 1.8180216504765323, "learning_rate": 9.932452073166405e-06, "loss": 0.6131, "step": 1139 }, { "epoch": 0.08, "grad_norm": 1.6566641514284106, "learning_rate": 9.932263687520584e-06, "loss": 0.5534, "step": 1140 }, { "epoch": 0.08, "grad_norm": 2.1486566775981664, "learning_rate": 9.932075041335883e-06, "loss": 0.5289, "step": 1141 }, { "epoch": 0.08, "grad_norm": 0.9987534761509603, "learning_rate": 9.931886134622264e-06, "loss": 0.4631, "step": 1142 }, { "epoch": 0.08, "grad_norm": 1.6462619641048146, "learning_rate": 9.93169696738971e-06, "loss": 0.5451, "step": 1143 }, { "epoch": 0.08, "grad_norm": 0.9818807156548468, "learning_rate": 9.931507539648211e-06, "loss": 0.4503, "step": 1144 }, { "epoch": 0.08, "grad_norm": 1.692565068362615, "learning_rate": 9.931317851407772e-06, "loss": 0.6765, "step": 1145 }, { "epoch": 0.08, "grad_norm": 2.02382650616117, "learning_rate": 9.931127902678415e-06, "loss": 0.6334, "step": 1146 }, { "epoch": 0.08, "grad_norm": 1.7480501384101033, "learning_rate": 9.930937693470175e-06, "loss": 0.5899, "step": 1147 }, { "epoch": 0.08, "grad_norm": 1.779844091651691, "learning_rate": 9.930747223793096e-06, "loss": 0.5488, "step": 1148 }, { "epoch": 0.08, "grad_norm": 1.88738072373629, "learning_rate": 9.930556493657242e-06, "loss": 0.5834, "step": 1149 }, { "epoch": 0.08, "grad_norm": 1.857050056057343, "learning_rate": 9.930365503072686e-06, "loss": 0.5465, "step": 1150 }, { "epoch": 0.08, "grad_norm": 2.0678736367773176, "learning_rate": 9.930174252049518e-06, "loss": 0.5791, "step": 1151 }, { "epoch": 0.08, "grad_norm": 2.6019639043718423, "learning_rate": 9.92998274059784e-06, "loss": 0.545, "step": 1152 }, { "epoch": 0.08, "grad_norm": 2.0098831227896823, "learning_rate": 9.92979096872777e-06, "loss": 0.602, "step": 1153 }, { "epoch": 0.08, "grad_norm": 1.8225948621823336, "learning_rate": 9.929598936449437e-06, "loss": 0.5321, "step": 1154 }, { "epoch": 0.08, "grad_norm": 1.9629747922778744, "learning_rate": 9.929406643772983e-06, "loss": 0.5637, "step": 1155 }, { "epoch": 0.08, "grad_norm": 1.8221778911872186, "learning_rate": 9.929214090708567e-06, "loss": 0.5927, "step": 1156 }, { "epoch": 0.08, "grad_norm": 1.891479906155563, "learning_rate": 9.929021277266359e-06, "loss": 0.575, "step": 1157 }, { "epoch": 0.08, "grad_norm": 1.7005408430546565, "learning_rate": 9.928828203456549e-06, "loss": 0.6286, "step": 1158 }, { "epoch": 0.08, "grad_norm": 1.767571233276235, "learning_rate": 9.928634869289329e-06, "loss": 0.5797, "step": 1159 }, { "epoch": 0.08, "grad_norm": 1.6585840555713292, "learning_rate": 9.928441274774917e-06, "loss": 0.5638, "step": 1160 }, { "epoch": 0.08, "grad_norm": 1.7925367609906053, "learning_rate": 9.928247419923534e-06, "loss": 0.6125, "step": 1161 }, { "epoch": 0.08, "grad_norm": 1.9102301823942354, "learning_rate": 9.928053304745424e-06, "loss": 0.5185, "step": 1162 }, { "epoch": 0.08, "grad_norm": 2.135171401688203, "learning_rate": 9.927858929250843e-06, "loss": 0.6252, "step": 1163 }, { "epoch": 0.08, "grad_norm": 1.759189982336816, "learning_rate": 9.927664293450053e-06, "loss": 0.5464, "step": 1164 }, { "epoch": 0.08, "grad_norm": 2.7766592990231076, "learning_rate": 9.927469397353338e-06, "loss": 0.5508, "step": 1165 }, { "epoch": 0.08, "grad_norm": 2.217478741465085, "learning_rate": 9.927274240970992e-06, "loss": 0.6509, "step": 1166 }, { "epoch": 0.08, "grad_norm": 1.8780759488027114, "learning_rate": 9.927078824313325e-06, "loss": 0.5601, "step": 1167 }, { "epoch": 0.08, "grad_norm": 1.623582245937414, "learning_rate": 9.92688314739066e-06, "loss": 0.5725, "step": 1168 }, { "epoch": 0.08, "grad_norm": 1.7754954655193163, "learning_rate": 9.926687210213332e-06, "loss": 0.5901, "step": 1169 }, { "epoch": 0.08, "grad_norm": 5.923314392363563, "learning_rate": 9.926491012791693e-06, "loss": 0.6128, "step": 1170 }, { "epoch": 0.08, "grad_norm": 1.795334476730306, "learning_rate": 9.926294555136104e-06, "loss": 0.6193, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.6520751082260345, "learning_rate": 9.926097837256945e-06, "loss": 0.5444, "step": 1172 }, { "epoch": 0.08, "grad_norm": 1.749928391994692, "learning_rate": 9.925900859164605e-06, "loss": 0.6031, "step": 1173 }, { "epoch": 0.08, "grad_norm": 2.288482939641416, "learning_rate": 9.925703620869493e-06, "loss": 0.5724, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.7412790608109916, "learning_rate": 9.925506122382022e-06, "loss": 0.574, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.6897988131395134, "learning_rate": 9.92530836371263e-06, "loss": 0.5784, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.7802564119135693, "learning_rate": 9.92511034487176e-06, "loss": 0.5759, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.714082777908771, "learning_rate": 9.924912065869875e-06, "loss": 0.5611, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.928814180599799, "learning_rate": 9.924713526717444e-06, "loss": 0.5761, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.967951884404924, "learning_rate": 9.924514727424959e-06, "loss": 0.5947, "step": 1180 }, { "epoch": 0.08, "grad_norm": 2.227795091230408, "learning_rate": 9.924315668002919e-06, "loss": 0.6123, "step": 1181 }, { "epoch": 0.08, "grad_norm": 1.8413322674707158, "learning_rate": 9.924116348461839e-06, "loss": 0.5827, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.954965713683045, "learning_rate": 9.92391676881225e-06, "loss": 0.5482, "step": 1183 }, { "epoch": 0.08, "grad_norm": 2.22211616620053, "learning_rate": 9.92371692906469e-06, "loss": 0.5996, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.6982658882041028, "learning_rate": 9.92351682922972e-06, "loss": 0.566, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.6806749351358607, "learning_rate": 9.923316469317907e-06, "loss": 0.5706, "step": 1186 }, { "epoch": 0.08, "grad_norm": 2.0701633949252516, "learning_rate": 9.923115849339837e-06, "loss": 0.5618, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.7105605748455022, "learning_rate": 9.922914969306105e-06, "loss": 0.636, "step": 1188 }, { "epoch": 0.08, "grad_norm": 2.369548798387799, "learning_rate": 9.922713829227323e-06, "loss": 0.5742, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.6367537955058469, "learning_rate": 9.922512429114118e-06, "loss": 0.5198, "step": 1190 }, { "epoch": 0.08, "grad_norm": 1.3541741691695732, "learning_rate": 9.922310768977124e-06, "loss": 0.4317, "step": 1191 }, { "epoch": 0.08, "grad_norm": 1.885736070735759, "learning_rate": 9.922108848826997e-06, "loss": 0.5651, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.7935770349506805, "learning_rate": 9.921906668674402e-06, "loss": 0.6909, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.684495593029889, "learning_rate": 9.92170422853002e-06, "loss": 0.5933, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.6077915450346685, "learning_rate": 9.921501528404544e-06, "loss": 0.5379, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.6820414805856694, "learning_rate": 9.92129856830868e-06, "loss": 0.4628, "step": 1196 }, { "epoch": 0.08, "grad_norm": 1.5906974631942323, "learning_rate": 9.92109534825315e-06, "loss": 0.5545, "step": 1197 }, { "epoch": 0.09, "grad_norm": 2.216652218852187, "learning_rate": 9.92089186824869e-06, "loss": 0.652, "step": 1198 }, { "epoch": 0.09, "grad_norm": 1.950804019470793, "learning_rate": 9.920688128306046e-06, "loss": 0.5886, "step": 1199 }, { "epoch": 0.09, "grad_norm": 1.6993982162411818, "learning_rate": 9.92048412843598e-06, "loss": 0.5734, "step": 1200 }, { "epoch": 0.09, "grad_norm": 1.8028462829338006, "learning_rate": 9.920279868649272e-06, "loss": 0.5803, "step": 1201 }, { "epoch": 0.09, "grad_norm": 1.9798459677195217, "learning_rate": 9.920075348956709e-06, "loss": 0.6292, "step": 1202 }, { "epoch": 0.09, "grad_norm": 2.571983882288428, "learning_rate": 9.919870569369094e-06, "loss": 0.5736, "step": 1203 }, { "epoch": 0.09, "grad_norm": 1.6066794568245326, "learning_rate": 9.919665529897244e-06, "loss": 0.5504, "step": 1204 }, { "epoch": 0.09, "grad_norm": 1.981904090340616, "learning_rate": 9.919460230551991e-06, "loss": 0.5939, "step": 1205 }, { "epoch": 0.09, "grad_norm": 2.7284094091991133, "learning_rate": 9.919254671344181e-06, "loss": 0.5978, "step": 1206 }, { "epoch": 0.09, "grad_norm": 3.567729956266612, "learning_rate": 9.919048852284667e-06, "loss": 0.5488, "step": 1207 }, { "epoch": 0.09, "grad_norm": 1.1410972838154188, "learning_rate": 9.918842773384327e-06, "loss": 0.4573, "step": 1208 }, { "epoch": 0.09, "grad_norm": 1.8990024563537673, "learning_rate": 9.918636434654046e-06, "loss": 0.6171, "step": 1209 }, { "epoch": 0.09, "grad_norm": 1.3866009957114651, "learning_rate": 9.918429836104719e-06, "loss": 0.5386, "step": 1210 }, { "epoch": 0.09, "grad_norm": 2.015041694448049, "learning_rate": 9.918222977747263e-06, "loss": 0.5682, "step": 1211 }, { "epoch": 0.09, "grad_norm": 1.830201846485847, "learning_rate": 9.918015859592603e-06, "loss": 0.64, "step": 1212 }, { "epoch": 0.09, "grad_norm": 1.6551911666505976, "learning_rate": 9.917808481651682e-06, "loss": 0.6585, "step": 1213 }, { "epoch": 0.09, "grad_norm": 2.0116896322533435, "learning_rate": 9.917600843935453e-06, "loss": 0.5673, "step": 1214 }, { "epoch": 0.09, "grad_norm": 2.255024758048306, "learning_rate": 9.917392946454885e-06, "loss": 0.6215, "step": 1215 }, { "epoch": 0.09, "grad_norm": 2.07188563069938, "learning_rate": 9.91718478922096e-06, "loss": 0.6746, "step": 1216 }, { "epoch": 0.09, "grad_norm": 1.6651976778973394, "learning_rate": 9.916976372244671e-06, "loss": 0.6075, "step": 1217 }, { "epoch": 0.09, "grad_norm": 1.0984520744620285, "learning_rate": 9.916767695537028e-06, "loss": 0.4631, "step": 1218 }, { "epoch": 0.09, "grad_norm": 1.7370466259576671, "learning_rate": 9.916558759109058e-06, "loss": 0.6415, "step": 1219 }, { "epoch": 0.09, "grad_norm": 2.759782095316019, "learning_rate": 9.916349562971793e-06, "loss": 0.6117, "step": 1220 }, { "epoch": 0.09, "grad_norm": 1.7408920182263712, "learning_rate": 9.916140107136286e-06, "loss": 0.5608, "step": 1221 }, { "epoch": 0.09, "grad_norm": 0.9759958349596218, "learning_rate": 9.9159303916136e-06, "loss": 0.46, "step": 1222 }, { "epoch": 0.09, "grad_norm": 1.7995879385908335, "learning_rate": 9.91572041641481e-06, "loss": 0.6024, "step": 1223 }, { "epoch": 0.09, "grad_norm": 1.7615528811689827, "learning_rate": 9.915510181551015e-06, "loss": 0.556, "step": 1224 }, { "epoch": 0.09, "grad_norm": 2.333570123917567, "learning_rate": 9.915299687033315e-06, "loss": 0.6071, "step": 1225 }, { "epoch": 0.09, "grad_norm": 1.949082528809982, "learning_rate": 9.915088932872831e-06, "loss": 0.6253, "step": 1226 }, { "epoch": 0.09, "grad_norm": 1.8360940132115062, "learning_rate": 9.914877919080695e-06, "loss": 0.5912, "step": 1227 }, { "epoch": 0.09, "grad_norm": 1.750614397700351, "learning_rate": 9.914666645668052e-06, "loss": 0.6334, "step": 1228 }, { "epoch": 0.09, "grad_norm": 1.6776782891901605, "learning_rate": 9.914455112646065e-06, "loss": 0.6117, "step": 1229 }, { "epoch": 0.09, "grad_norm": 1.7194721920488816, "learning_rate": 9.914243320025905e-06, "loss": 0.6016, "step": 1230 }, { "epoch": 0.09, "grad_norm": 1.6936774213108488, "learning_rate": 9.914031267818762e-06, "loss": 0.6329, "step": 1231 }, { "epoch": 0.09, "grad_norm": 1.846935203238646, "learning_rate": 9.913818956035836e-06, "loss": 0.5812, "step": 1232 }, { "epoch": 0.09, "grad_norm": 2.062047162358173, "learning_rate": 9.913606384688344e-06, "loss": 0.6091, "step": 1233 }, { "epoch": 0.09, "grad_norm": 1.8408822602926813, "learning_rate": 9.913393553787512e-06, "loss": 0.5937, "step": 1234 }, { "epoch": 0.09, "grad_norm": 2.037736879102753, "learning_rate": 9.913180463344583e-06, "loss": 0.5851, "step": 1235 }, { "epoch": 0.09, "grad_norm": 1.8710903522498417, "learning_rate": 9.912967113370814e-06, "loss": 0.5807, "step": 1236 }, { "epoch": 0.09, "grad_norm": 2.017638473146537, "learning_rate": 9.912753503877475e-06, "loss": 0.596, "step": 1237 }, { "epoch": 0.09, "grad_norm": 1.6605641861866103, "learning_rate": 9.912539634875851e-06, "loss": 0.5811, "step": 1238 }, { "epoch": 0.09, "grad_norm": 2.2397582130518354, "learning_rate": 9.912325506377236e-06, "loss": 0.5626, "step": 1239 }, { "epoch": 0.09, "grad_norm": 1.887085337247806, "learning_rate": 9.912111118392942e-06, "loss": 0.6438, "step": 1240 }, { "epoch": 0.09, "grad_norm": 2.003306966785893, "learning_rate": 9.911896470934294e-06, "loss": 0.6206, "step": 1241 }, { "epoch": 0.09, "grad_norm": 1.6606324803064856, "learning_rate": 9.911681564012633e-06, "loss": 0.5664, "step": 1242 }, { "epoch": 0.09, "grad_norm": 1.9489965498868007, "learning_rate": 9.911466397639306e-06, "loss": 0.5833, "step": 1243 }, { "epoch": 0.09, "grad_norm": 2.1072711104629858, "learning_rate": 9.911250971825682e-06, "loss": 0.5593, "step": 1244 }, { "epoch": 0.09, "grad_norm": 1.8153025619281533, "learning_rate": 9.911035286583141e-06, "loss": 0.6293, "step": 1245 }, { "epoch": 0.09, "grad_norm": 1.615777170245443, "learning_rate": 9.910819341923074e-06, "loss": 0.5803, "step": 1246 }, { "epoch": 0.09, "grad_norm": 1.6433745346455484, "learning_rate": 9.910603137856892e-06, "loss": 0.5881, "step": 1247 }, { "epoch": 0.09, "grad_norm": 1.788186080786205, "learning_rate": 9.91038667439601e-06, "loss": 0.5882, "step": 1248 }, { "epoch": 0.09, "grad_norm": 1.7964366481156733, "learning_rate": 9.910169951551867e-06, "loss": 0.6109, "step": 1249 }, { "epoch": 0.09, "grad_norm": 1.4686137498066427, "learning_rate": 9.909952969335908e-06, "loss": 0.5142, "step": 1250 }, { "epoch": 0.09, "grad_norm": 1.9730179384567703, "learning_rate": 9.909735727759597e-06, "loss": 0.5083, "step": 1251 }, { "epoch": 0.09, "grad_norm": 1.747316190622302, "learning_rate": 9.909518226834407e-06, "loss": 0.6054, "step": 1252 }, { "epoch": 0.09, "grad_norm": 1.7546645665742489, "learning_rate": 9.90930046657183e-06, "loss": 0.6305, "step": 1253 }, { "epoch": 0.09, "grad_norm": 1.8919168767792636, "learning_rate": 9.909082446983366e-06, "loss": 0.5916, "step": 1254 }, { "epoch": 0.09, "grad_norm": 2.081472423914304, "learning_rate": 9.908864168080535e-06, "loss": 0.5986, "step": 1255 }, { "epoch": 0.09, "grad_norm": 0.9349120666792797, "learning_rate": 9.908645629874864e-06, "loss": 0.4466, "step": 1256 }, { "epoch": 0.09, "grad_norm": 1.0599331890168222, "learning_rate": 9.908426832377898e-06, "loss": 0.481, "step": 1257 }, { "epoch": 0.09, "grad_norm": 2.2925254598846565, "learning_rate": 9.908207775601194e-06, "loss": 0.6263, "step": 1258 }, { "epoch": 0.09, "grad_norm": 1.6594436051462882, "learning_rate": 9.907988459556324e-06, "loss": 0.5469, "step": 1259 }, { "epoch": 0.09, "grad_norm": 2.480169998601102, "learning_rate": 9.907768884254874e-06, "loss": 0.6157, "step": 1260 }, { "epoch": 0.09, "grad_norm": 3.1718315172992004, "learning_rate": 9.907549049708442e-06, "loss": 0.5684, "step": 1261 }, { "epoch": 0.09, "grad_norm": 1.7429427329173932, "learning_rate": 9.907328955928639e-06, "loss": 0.5549, "step": 1262 }, { "epoch": 0.09, "grad_norm": 1.7503000368204438, "learning_rate": 9.907108602927092e-06, "loss": 0.6309, "step": 1263 }, { "epoch": 0.09, "grad_norm": 1.9780421908216177, "learning_rate": 9.906887990715442e-06, "loss": 0.5322, "step": 1264 }, { "epoch": 0.09, "grad_norm": 1.6849970699653378, "learning_rate": 9.906667119305342e-06, "loss": 0.5814, "step": 1265 }, { "epoch": 0.09, "grad_norm": 1.6880254798309657, "learning_rate": 9.906445988708458e-06, "loss": 0.5648, "step": 1266 }, { "epoch": 0.09, "grad_norm": 1.2281946009392568, "learning_rate": 9.906224598936471e-06, "loss": 0.4574, "step": 1267 }, { "epoch": 0.09, "grad_norm": 2.5832065826218846, "learning_rate": 9.906002950001076e-06, "loss": 0.5781, "step": 1268 }, { "epoch": 0.09, "grad_norm": 1.8006861456662047, "learning_rate": 9.905781041913983e-06, "loss": 0.6056, "step": 1269 }, { "epoch": 0.09, "grad_norm": 1.8845317379449849, "learning_rate": 9.905558874686912e-06, "loss": 0.5648, "step": 1270 }, { "epoch": 0.09, "grad_norm": 1.797843254627934, "learning_rate": 9.905336448331598e-06, "loss": 0.5559, "step": 1271 }, { "epoch": 0.09, "grad_norm": 1.7881199631167275, "learning_rate": 9.905113762859792e-06, "loss": 0.5523, "step": 1272 }, { "epoch": 0.09, "grad_norm": 0.8603688583328809, "learning_rate": 9.904890818283255e-06, "loss": 0.4942, "step": 1273 }, { "epoch": 0.09, "grad_norm": 1.6406935850055777, "learning_rate": 9.904667614613766e-06, "loss": 0.5088, "step": 1274 }, { "epoch": 0.09, "grad_norm": 1.7478706271146758, "learning_rate": 9.904444151863116e-06, "loss": 0.6329, "step": 1275 }, { "epoch": 0.09, "grad_norm": 2.0602155586791855, "learning_rate": 9.904220430043104e-06, "loss": 0.6554, "step": 1276 }, { "epoch": 0.09, "grad_norm": 2.2906190483613673, "learning_rate": 9.903996449165552e-06, "loss": 0.6348, "step": 1277 }, { "epoch": 0.09, "grad_norm": 1.7824152376294604, "learning_rate": 9.903772209242291e-06, "loss": 0.5867, "step": 1278 }, { "epoch": 0.09, "grad_norm": 1.831351594269644, "learning_rate": 9.903547710285166e-06, "loss": 0.5791, "step": 1279 }, { "epoch": 0.09, "grad_norm": 2.398692158589471, "learning_rate": 9.903322952306036e-06, "loss": 0.591, "step": 1280 }, { "epoch": 0.09, "grad_norm": 1.8724691409790362, "learning_rate": 9.903097935316772e-06, "loss": 0.6094, "step": 1281 }, { "epoch": 0.09, "grad_norm": 3.384772094190995, "learning_rate": 9.902872659329262e-06, "loss": 0.638, "step": 1282 }, { "epoch": 0.09, "grad_norm": 1.7866847027448565, "learning_rate": 9.902647124355403e-06, "loss": 0.5583, "step": 1283 }, { "epoch": 0.09, "grad_norm": 1.6752402310169443, "learning_rate": 9.902421330407113e-06, "loss": 0.6332, "step": 1284 }, { "epoch": 0.09, "grad_norm": 2.239625640946203, "learning_rate": 9.902195277496314e-06, "loss": 0.5676, "step": 1285 }, { "epoch": 0.09, "grad_norm": 1.8022405602708378, "learning_rate": 9.901968965634951e-06, "loss": 0.6221, "step": 1286 }, { "epoch": 0.09, "grad_norm": 1.9479164162662685, "learning_rate": 9.901742394834978e-06, "loss": 0.6654, "step": 1287 }, { "epoch": 0.09, "grad_norm": 1.6437373470952514, "learning_rate": 9.901515565108362e-06, "loss": 0.6696, "step": 1288 }, { "epoch": 0.09, "grad_norm": 1.649059219926303, "learning_rate": 9.901288476467083e-06, "loss": 0.5872, "step": 1289 }, { "epoch": 0.09, "grad_norm": 1.7142790572321507, "learning_rate": 9.90106112892314e-06, "loss": 0.5768, "step": 1290 }, { "epoch": 0.09, "grad_norm": 1.8303934529150983, "learning_rate": 9.900833522488542e-06, "loss": 0.5599, "step": 1291 }, { "epoch": 0.09, "grad_norm": 1.663633003491383, "learning_rate": 9.900605657175311e-06, "loss": 0.5301, "step": 1292 }, { "epoch": 0.09, "grad_norm": 1.5649316109898668, "learning_rate": 9.900377532995483e-06, "loss": 0.5806, "step": 1293 }, { "epoch": 0.09, "grad_norm": 1.6451674648531116, "learning_rate": 9.90014914996111e-06, "loss": 0.4933, "step": 1294 }, { "epoch": 0.09, "grad_norm": 1.1087209807515346, "learning_rate": 9.899920508084253e-06, "loss": 0.4641, "step": 1295 }, { "epoch": 0.09, "grad_norm": 1.766917043451123, "learning_rate": 9.899691607376994e-06, "loss": 0.6108, "step": 1296 }, { "epoch": 0.09, "grad_norm": 1.7580224903077726, "learning_rate": 9.89946244785142e-06, "loss": 0.5794, "step": 1297 }, { "epoch": 0.09, "grad_norm": 1.9270486022285491, "learning_rate": 9.899233029519639e-06, "loss": 0.6296, "step": 1298 }, { "epoch": 0.09, "grad_norm": 2.230307602772785, "learning_rate": 9.899003352393768e-06, "loss": 0.6009, "step": 1299 }, { "epoch": 0.09, "grad_norm": 1.7316856634087157, "learning_rate": 9.898773416485938e-06, "loss": 0.5994, "step": 1300 }, { "epoch": 0.09, "grad_norm": 1.881291648098643, "learning_rate": 9.898543221808299e-06, "loss": 0.5823, "step": 1301 }, { "epoch": 0.09, "grad_norm": 2.0794535716666958, "learning_rate": 9.898312768373008e-06, "loss": 0.5832, "step": 1302 }, { "epoch": 0.09, "grad_norm": 1.860363783135927, "learning_rate": 9.89808205619224e-06, "loss": 0.5367, "step": 1303 }, { "epoch": 0.09, "grad_norm": 4.7691833475780125, "learning_rate": 9.89785108527818e-06, "loss": 0.544, "step": 1304 }, { "epoch": 0.09, "grad_norm": 1.685349383175745, "learning_rate": 9.897619855643029e-06, "loss": 0.5751, "step": 1305 }, { "epoch": 0.09, "grad_norm": 1.5000550718409968, "learning_rate": 9.897388367299002e-06, "loss": 0.5574, "step": 1306 }, { "epoch": 0.09, "grad_norm": 1.772086774182155, "learning_rate": 9.897156620258326e-06, "loss": 0.53, "step": 1307 }, { "epoch": 0.09, "grad_norm": 2.195287195478925, "learning_rate": 9.896924614533246e-06, "loss": 0.6622, "step": 1308 }, { "epoch": 0.09, "grad_norm": 1.6815398538194033, "learning_rate": 9.896692350136013e-06, "loss": 0.5351, "step": 1309 }, { "epoch": 0.09, "grad_norm": 2.3943270254285483, "learning_rate": 9.896459827078896e-06, "loss": 0.6193, "step": 1310 }, { "epoch": 0.09, "grad_norm": 1.5310504935398441, "learning_rate": 9.896227045374182e-06, "loss": 0.5428, "step": 1311 }, { "epoch": 0.09, "grad_norm": 1.9445873415145851, "learning_rate": 9.895994005034164e-06, "loss": 0.5945, "step": 1312 }, { "epoch": 0.09, "grad_norm": 1.6569725662881276, "learning_rate": 9.895760706071154e-06, "loss": 0.5532, "step": 1313 }, { "epoch": 0.09, "grad_norm": 1.6969636872927831, "learning_rate": 9.895527148497472e-06, "loss": 0.5846, "step": 1314 }, { "epoch": 0.09, "grad_norm": 2.0862062095764475, "learning_rate": 9.89529333232546e-06, "loss": 0.6017, "step": 1315 }, { "epoch": 0.09, "grad_norm": 2.8323648036871965, "learning_rate": 9.895059257567465e-06, "loss": 0.6146, "step": 1316 }, { "epoch": 0.09, "grad_norm": 2.0410247440072546, "learning_rate": 9.894824924235855e-06, "loss": 0.5383, "step": 1317 }, { "epoch": 0.09, "grad_norm": 1.9861623968625974, "learning_rate": 9.894590332343005e-06, "loss": 0.5667, "step": 1318 }, { "epoch": 0.09, "grad_norm": 2.0216831015068206, "learning_rate": 9.89435548190131e-06, "loss": 0.5808, "step": 1319 }, { "epoch": 0.09, "grad_norm": 0.9788110840017549, "learning_rate": 9.894120372923172e-06, "loss": 0.4723, "step": 1320 }, { "epoch": 0.09, "grad_norm": 2.1235272219530383, "learning_rate": 9.893885005421015e-06, "loss": 0.5047, "step": 1321 }, { "epoch": 0.09, "grad_norm": 1.6779220654572204, "learning_rate": 9.893649379407269e-06, "loss": 0.5952, "step": 1322 }, { "epoch": 0.09, "grad_norm": 1.776932945690128, "learning_rate": 9.893413494894378e-06, "loss": 0.6328, "step": 1323 }, { "epoch": 0.09, "grad_norm": 1.839696484408332, "learning_rate": 9.893177351894807e-06, "loss": 0.5846, "step": 1324 }, { "epoch": 0.09, "grad_norm": 1.5927340655025057, "learning_rate": 9.892940950421029e-06, "loss": 0.5873, "step": 1325 }, { "epoch": 0.09, "grad_norm": 1.6463108746052195, "learning_rate": 9.892704290485528e-06, "loss": 0.5417, "step": 1326 }, { "epoch": 0.09, "grad_norm": 2.398331314673184, "learning_rate": 9.892467372100808e-06, "loss": 0.5912, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.6800396101970716, "learning_rate": 9.892230195279386e-06, "loss": 0.5938, "step": 1328 }, { "epoch": 0.09, "grad_norm": 1.8939746797389594, "learning_rate": 9.891992760033786e-06, "loss": 0.6346, "step": 1329 }, { "epoch": 0.09, "grad_norm": 2.962087853264865, "learning_rate": 9.891755066376552e-06, "loss": 0.5937, "step": 1330 }, { "epoch": 0.09, "grad_norm": 1.0186779810421223, "learning_rate": 9.891517114320239e-06, "loss": 0.4831, "step": 1331 }, { "epoch": 0.09, "grad_norm": 1.7525537347974018, "learning_rate": 9.89127890387742e-06, "loss": 0.594, "step": 1332 }, { "epoch": 0.09, "grad_norm": 2.075203807381958, "learning_rate": 9.891040435060672e-06, "loss": 0.6373, "step": 1333 }, { "epoch": 0.09, "grad_norm": 1.640400335641878, "learning_rate": 9.890801707882598e-06, "loss": 0.5802, "step": 1334 }, { "epoch": 0.09, "grad_norm": 1.6790695787142118, "learning_rate": 9.890562722355804e-06, "loss": 0.5218, "step": 1335 }, { "epoch": 0.09, "grad_norm": 1.8131903722706173, "learning_rate": 9.890323478492915e-06, "loss": 0.5598, "step": 1336 }, { "epoch": 0.09, "grad_norm": 2.1050705278076056, "learning_rate": 9.89008397630657e-06, "loss": 0.6923, "step": 1337 }, { "epoch": 0.09, "grad_norm": 1.618339080207912, "learning_rate": 9.889844215809421e-06, "loss": 0.5674, "step": 1338 }, { "epoch": 0.1, "grad_norm": 2.1084757705216957, "learning_rate": 9.889604197014131e-06, "loss": 0.5198, "step": 1339 }, { "epoch": 0.1, "grad_norm": 1.5704978384104147, "learning_rate": 9.889363919933378e-06, "loss": 0.5306, "step": 1340 }, { "epoch": 0.1, "grad_norm": 2.004931824762026, "learning_rate": 9.889123384579856e-06, "loss": 0.6032, "step": 1341 }, { "epoch": 0.1, "grad_norm": 1.5742977725906473, "learning_rate": 9.888882590966268e-06, "loss": 0.5218, "step": 1342 }, { "epoch": 0.1, "grad_norm": 0.9949687699244333, "learning_rate": 9.888641539105338e-06, "loss": 0.4688, "step": 1343 }, { "epoch": 0.1, "grad_norm": 2.1446940836889308, "learning_rate": 9.888400229009796e-06, "loss": 0.5794, "step": 1344 }, { "epoch": 0.1, "grad_norm": 1.7524054776251003, "learning_rate": 9.88815866069239e-06, "loss": 0.5762, "step": 1345 }, { "epoch": 0.1, "grad_norm": 1.8018873812021827, "learning_rate": 9.887916834165881e-06, "loss": 0.5488, "step": 1346 }, { "epoch": 0.1, "grad_norm": 1.8624604865936865, "learning_rate": 9.887674749443041e-06, "loss": 0.6132, "step": 1347 }, { "epoch": 0.1, "grad_norm": 2.1302568016964893, "learning_rate": 9.88743240653666e-06, "loss": 0.5867, "step": 1348 }, { "epoch": 0.1, "grad_norm": 2.0794488835296883, "learning_rate": 9.887189805459538e-06, "loss": 0.6357, "step": 1349 }, { "epoch": 0.1, "grad_norm": 1.9293632110340833, "learning_rate": 9.88694694622449e-06, "loss": 0.5823, "step": 1350 }, { "epoch": 0.1, "grad_norm": 1.9122004757017805, "learning_rate": 9.886703828844346e-06, "loss": 0.5592, "step": 1351 }, { "epoch": 0.1, "grad_norm": 0.9722316927334979, "learning_rate": 9.886460453331947e-06, "loss": 0.478, "step": 1352 }, { "epoch": 0.1, "grad_norm": 1.656189158121428, "learning_rate": 9.88621681970015e-06, "loss": 0.6174, "step": 1353 }, { "epoch": 0.1, "grad_norm": 1.6861959515573761, "learning_rate": 9.885972927961822e-06, "loss": 0.5726, "step": 1354 }, { "epoch": 0.1, "grad_norm": 1.9496077551981468, "learning_rate": 9.88572877812985e-06, "loss": 0.6106, "step": 1355 }, { "epoch": 0.1, "grad_norm": 1.8746979828578094, "learning_rate": 9.885484370217127e-06, "loss": 0.5893, "step": 1356 }, { "epoch": 0.1, "grad_norm": 0.8776452706712202, "learning_rate": 9.885239704236567e-06, "loss": 0.4546, "step": 1357 }, { "epoch": 0.1, "grad_norm": 0.7957081751554597, "learning_rate": 9.884994780201092e-06, "loss": 0.4729, "step": 1358 }, { "epoch": 0.1, "grad_norm": 1.675260193758147, "learning_rate": 9.884749598123638e-06, "loss": 0.5906, "step": 1359 }, { "epoch": 0.1, "grad_norm": 0.9327316732091321, "learning_rate": 9.88450415801716e-06, "loss": 0.4625, "step": 1360 }, { "epoch": 0.1, "grad_norm": 1.5957453136922894, "learning_rate": 9.884258459894622e-06, "loss": 0.6584, "step": 1361 }, { "epoch": 0.1, "grad_norm": 1.6592526460131953, "learning_rate": 9.884012503769e-06, "loss": 0.5582, "step": 1362 }, { "epoch": 0.1, "grad_norm": 2.6922563835888456, "learning_rate": 9.883766289653291e-06, "loss": 0.5549, "step": 1363 }, { "epoch": 0.1, "grad_norm": 1.7760343892069097, "learning_rate": 9.883519817560497e-06, "loss": 0.6305, "step": 1364 }, { "epoch": 0.1, "grad_norm": 0.905601632868781, "learning_rate": 9.883273087503638e-06, "loss": 0.4594, "step": 1365 }, { "epoch": 0.1, "grad_norm": 2.612005952517804, "learning_rate": 9.883026099495749e-06, "loss": 0.4979, "step": 1366 }, { "epoch": 0.1, "grad_norm": 1.7579053370491673, "learning_rate": 9.882778853549874e-06, "loss": 0.5544, "step": 1367 }, { "epoch": 0.1, "grad_norm": 1.9690079749625564, "learning_rate": 9.882531349679076e-06, "loss": 0.6154, "step": 1368 }, { "epoch": 0.1, "grad_norm": 2.0707363390114595, "learning_rate": 9.882283587896426e-06, "loss": 0.6114, "step": 1369 }, { "epoch": 0.1, "grad_norm": 1.9054145000893203, "learning_rate": 9.882035568215016e-06, "loss": 0.5571, "step": 1370 }, { "epoch": 0.1, "grad_norm": 1.7902440879004258, "learning_rate": 9.881787290647943e-06, "loss": 0.6072, "step": 1371 }, { "epoch": 0.1, "grad_norm": 1.7707260498656345, "learning_rate": 9.881538755208324e-06, "loss": 0.6074, "step": 1372 }, { "epoch": 0.1, "grad_norm": 1.523197096700819, "learning_rate": 9.881289961909287e-06, "loss": 0.5912, "step": 1373 }, { "epoch": 0.1, "grad_norm": 2.1116212262284306, "learning_rate": 9.881040910763974e-06, "loss": 0.5876, "step": 1374 }, { "epoch": 0.1, "grad_norm": 1.5726819488627966, "learning_rate": 9.88079160178554e-06, "loss": 0.6164, "step": 1375 }, { "epoch": 0.1, "grad_norm": 1.890174774757809, "learning_rate": 9.880542034987158e-06, "loss": 0.6076, "step": 1376 }, { "epoch": 0.1, "grad_norm": 1.6738298907818343, "learning_rate": 9.880292210382005e-06, "loss": 0.5795, "step": 1377 }, { "epoch": 0.1, "grad_norm": 0.8874690448743272, "learning_rate": 9.880042127983282e-06, "loss": 0.4662, "step": 1378 }, { "epoch": 0.1, "grad_norm": 1.8534001522351713, "learning_rate": 9.879791787804199e-06, "loss": 0.6306, "step": 1379 }, { "epoch": 0.1, "grad_norm": 0.8742899839357069, "learning_rate": 9.879541189857978e-06, "loss": 0.47, "step": 1380 }, { "epoch": 0.1, "grad_norm": 1.8349905137473284, "learning_rate": 9.879290334157857e-06, "loss": 0.6415, "step": 1381 }, { "epoch": 0.1, "grad_norm": 1.6677320992876723, "learning_rate": 9.879039220717089e-06, "loss": 0.5991, "step": 1382 }, { "epoch": 0.1, "grad_norm": 1.7094385414930173, "learning_rate": 9.878787849548934e-06, "loss": 0.5745, "step": 1383 }, { "epoch": 0.1, "grad_norm": 2.7082202864508713, "learning_rate": 9.878536220666674e-06, "loss": 0.5477, "step": 1384 }, { "epoch": 0.1, "grad_norm": 1.904464090837267, "learning_rate": 9.8782843340836e-06, "loss": 0.6494, "step": 1385 }, { "epoch": 0.1, "grad_norm": 2.0391146150216755, "learning_rate": 9.87803218981302e-06, "loss": 0.6839, "step": 1386 }, { "epoch": 0.1, "grad_norm": 1.6545363508881952, "learning_rate": 9.87777978786825e-06, "loss": 0.6067, "step": 1387 }, { "epoch": 0.1, "grad_norm": 2.0163313088154173, "learning_rate": 9.87752712826262e-06, "loss": 0.5801, "step": 1388 }, { "epoch": 0.1, "grad_norm": 2.0057849125615226, "learning_rate": 9.877274211009482e-06, "loss": 0.4967, "step": 1389 }, { "epoch": 0.1, "grad_norm": 1.659019327226779, "learning_rate": 9.877021036122194e-06, "loss": 0.5553, "step": 1390 }, { "epoch": 0.1, "grad_norm": 0.9591829540884204, "learning_rate": 9.87676760361413e-06, "loss": 0.4478, "step": 1391 }, { "epoch": 0.1, "grad_norm": 2.2860450886430064, "learning_rate": 9.876513913498676e-06, "loss": 0.5312, "step": 1392 }, { "epoch": 0.1, "grad_norm": 1.6637381565510574, "learning_rate": 9.876259965789232e-06, "loss": 0.5951, "step": 1393 }, { "epoch": 0.1, "grad_norm": 1.8086496448379972, "learning_rate": 9.876005760499215e-06, "loss": 0.5952, "step": 1394 }, { "epoch": 0.1, "grad_norm": 1.6376146336386441, "learning_rate": 9.87575129764205e-06, "loss": 0.564, "step": 1395 }, { "epoch": 0.1, "grad_norm": 1.6583737900036206, "learning_rate": 9.875496577231181e-06, "loss": 0.6026, "step": 1396 }, { "epoch": 0.1, "grad_norm": 1.6521123902596038, "learning_rate": 9.875241599280063e-06, "loss": 0.5704, "step": 1397 }, { "epoch": 0.1, "grad_norm": 2.0089755274155285, "learning_rate": 9.874986363802163e-06, "loss": 0.5989, "step": 1398 }, { "epoch": 0.1, "grad_norm": 1.901134438407413, "learning_rate": 9.874730870810964e-06, "loss": 0.5195, "step": 1399 }, { "epoch": 0.1, "grad_norm": 1.9062941128993003, "learning_rate": 9.874475120319965e-06, "loss": 0.6028, "step": 1400 }, { "epoch": 0.1, "grad_norm": 1.6511868543735315, "learning_rate": 9.87421911234267e-06, "loss": 0.6108, "step": 1401 }, { "epoch": 0.1, "grad_norm": 1.6704424806548632, "learning_rate": 9.87396284689261e-06, "loss": 0.508, "step": 1402 }, { "epoch": 0.1, "grad_norm": 1.5990269055654875, "learning_rate": 9.873706323983314e-06, "loss": 0.6269, "step": 1403 }, { "epoch": 0.1, "grad_norm": 1.6517162408667507, "learning_rate": 9.873449543628336e-06, "loss": 0.5791, "step": 1404 }, { "epoch": 0.1, "grad_norm": 1.82014702658685, "learning_rate": 9.87319250584124e-06, "loss": 0.6256, "step": 1405 }, { "epoch": 0.1, "grad_norm": 1.1553734039171788, "learning_rate": 9.872935210635602e-06, "loss": 0.4683, "step": 1406 }, { "epoch": 0.1, "grad_norm": 2.9897352590189112, "learning_rate": 9.872677658025014e-06, "loss": 0.6422, "step": 1407 }, { "epoch": 0.1, "grad_norm": 1.631784403667013, "learning_rate": 9.872419848023084e-06, "loss": 0.5606, "step": 1408 }, { "epoch": 0.1, "grad_norm": 2.0214091419497486, "learning_rate": 9.872161780643425e-06, "loss": 0.6265, "step": 1409 }, { "epoch": 0.1, "grad_norm": 1.6810760993906442, "learning_rate": 9.871903455899674e-06, "loss": 0.5728, "step": 1410 }, { "epoch": 0.1, "grad_norm": 1.8721409896794687, "learning_rate": 9.871644873805473e-06, "loss": 0.5998, "step": 1411 }, { "epoch": 0.1, "grad_norm": 1.6225167295846379, "learning_rate": 9.871386034374481e-06, "loss": 0.57, "step": 1412 }, { "epoch": 0.1, "grad_norm": 1.5770990401282194, "learning_rate": 9.871126937620374e-06, "loss": 0.6318, "step": 1413 }, { "epoch": 0.1, "grad_norm": 1.6992957592098146, "learning_rate": 9.870867583556836e-06, "loss": 0.5733, "step": 1414 }, { "epoch": 0.1, "grad_norm": 1.7019954839069762, "learning_rate": 9.870607972197568e-06, "loss": 0.6263, "step": 1415 }, { "epoch": 0.1, "grad_norm": 1.6622363015669066, "learning_rate": 9.870348103556282e-06, "loss": 0.6661, "step": 1416 }, { "epoch": 0.1, "grad_norm": 0.9351899287855089, "learning_rate": 9.870087977646707e-06, "loss": 0.4757, "step": 1417 }, { "epoch": 0.1, "grad_norm": 2.037294074386783, "learning_rate": 9.869827594482582e-06, "loss": 0.6337, "step": 1418 }, { "epoch": 0.1, "grad_norm": 2.373470436973459, "learning_rate": 9.869566954077662e-06, "loss": 0.586, "step": 1419 }, { "epoch": 0.1, "grad_norm": 1.9651152257719244, "learning_rate": 9.869306056445717e-06, "loss": 0.5063, "step": 1420 }, { "epoch": 0.1, "grad_norm": 1.9959977057958365, "learning_rate": 9.869044901600526e-06, "loss": 0.6006, "step": 1421 }, { "epoch": 0.1, "grad_norm": 1.92125259190946, "learning_rate": 9.868783489555883e-06, "loss": 0.5855, "step": 1422 }, { "epoch": 0.1, "grad_norm": 1.716970955198324, "learning_rate": 9.8685218203256e-06, "loss": 0.6074, "step": 1423 }, { "epoch": 0.1, "grad_norm": 2.3122639599490284, "learning_rate": 9.868259893923498e-06, "loss": 0.6465, "step": 1424 }, { "epoch": 0.1, "grad_norm": 0.9404257486096801, "learning_rate": 9.86799771036341e-06, "loss": 0.4639, "step": 1425 }, { "epoch": 0.1, "grad_norm": 9.481099703646525, "learning_rate": 9.867735269659191e-06, "loss": 0.5019, "step": 1426 }, { "epoch": 0.1, "grad_norm": 2.330164184514846, "learning_rate": 9.867472571824699e-06, "loss": 0.6522, "step": 1427 }, { "epoch": 0.1, "grad_norm": 2.3864976308866823, "learning_rate": 9.867209616873815e-06, "loss": 0.608, "step": 1428 }, { "epoch": 0.1, "grad_norm": 1.673787400231851, "learning_rate": 9.866946404820423e-06, "loss": 0.5679, "step": 1429 }, { "epoch": 0.1, "grad_norm": 0.8949518955091295, "learning_rate": 9.866682935678434e-06, "loss": 0.4436, "step": 1430 }, { "epoch": 0.1, "grad_norm": 1.0951109104096572, "learning_rate": 9.866419209461759e-06, "loss": 0.4941, "step": 1431 }, { "epoch": 0.1, "grad_norm": 1.7284872333542305, "learning_rate": 9.866155226184334e-06, "loss": 0.5581, "step": 1432 }, { "epoch": 0.1, "grad_norm": 1.9827106237175938, "learning_rate": 9.865890985860098e-06, "loss": 0.5509, "step": 1433 }, { "epoch": 0.1, "grad_norm": 2.0066137464984792, "learning_rate": 9.865626488503013e-06, "loss": 0.5549, "step": 1434 }, { "epoch": 0.1, "grad_norm": 1.8035790701361751, "learning_rate": 9.865361734127052e-06, "loss": 0.5951, "step": 1435 }, { "epoch": 0.1, "grad_norm": 2.11728905765854, "learning_rate": 9.865096722746197e-06, "loss": 0.6113, "step": 1436 }, { "epoch": 0.1, "grad_norm": 1.7121587680313333, "learning_rate": 9.864831454374449e-06, "loss": 0.6516, "step": 1437 }, { "epoch": 0.1, "grad_norm": 1.6593418535335818, "learning_rate": 9.864565929025817e-06, "loss": 0.54, "step": 1438 }, { "epoch": 0.1, "grad_norm": 2.934403320829197, "learning_rate": 9.864300146714331e-06, "loss": 0.5584, "step": 1439 }, { "epoch": 0.1, "grad_norm": 2.227463207925463, "learning_rate": 9.864034107454028e-06, "loss": 0.5621, "step": 1440 }, { "epoch": 0.1, "grad_norm": 2.7975763799948856, "learning_rate": 9.863767811258961e-06, "loss": 0.5383, "step": 1441 }, { "epoch": 0.1, "grad_norm": 1.5590985199622502, "learning_rate": 9.8635012581432e-06, "loss": 0.5753, "step": 1442 }, { "epoch": 0.1, "grad_norm": 5.121568600115799, "learning_rate": 9.863234448120823e-06, "loss": 0.5054, "step": 1443 }, { "epoch": 0.1, "grad_norm": 1.9906501504891194, "learning_rate": 9.862967381205921e-06, "loss": 0.5811, "step": 1444 }, { "epoch": 0.1, "grad_norm": 1.995421982165682, "learning_rate": 9.862700057412604e-06, "loss": 0.6755, "step": 1445 }, { "epoch": 0.1, "grad_norm": 1.8739437708609863, "learning_rate": 9.862432476754995e-06, "loss": 0.55, "step": 1446 }, { "epoch": 0.1, "grad_norm": 2.519220949222514, "learning_rate": 9.862164639247224e-06, "loss": 0.563, "step": 1447 }, { "epoch": 0.1, "grad_norm": 2.067812305906726, "learning_rate": 9.861896544903443e-06, "loss": 0.5594, "step": 1448 }, { "epoch": 0.1, "grad_norm": 1.861383590271203, "learning_rate": 9.861628193737811e-06, "loss": 0.5543, "step": 1449 }, { "epoch": 0.1, "grad_norm": 1.89907605080924, "learning_rate": 9.861359585764504e-06, "loss": 0.6055, "step": 1450 }, { "epoch": 0.1, "grad_norm": 0.9542722957519214, "learning_rate": 9.861090720997715e-06, "loss": 0.449, "step": 1451 }, { "epoch": 0.1, "grad_norm": 3.7812884214850837, "learning_rate": 9.860821599451639e-06, "loss": 0.5602, "step": 1452 }, { "epoch": 0.1, "grad_norm": 2.028399645690091, "learning_rate": 9.860552221140496e-06, "loss": 0.5897, "step": 1453 }, { "epoch": 0.1, "grad_norm": 2.214139585015646, "learning_rate": 9.860282586078515e-06, "loss": 0.5966, "step": 1454 }, { "epoch": 0.1, "grad_norm": 4.258955655091173, "learning_rate": 9.860012694279939e-06, "loss": 0.6704, "step": 1455 }, { "epoch": 0.1, "grad_norm": 1.9263774385109618, "learning_rate": 9.859742545759025e-06, "loss": 0.5595, "step": 1456 }, { "epoch": 0.1, "grad_norm": 2.136398697505051, "learning_rate": 9.859472140530041e-06, "loss": 0.6063, "step": 1457 }, { "epoch": 0.1, "grad_norm": 1.8434398682311743, "learning_rate": 9.859201478607274e-06, "loss": 0.6289, "step": 1458 }, { "epoch": 0.1, "grad_norm": 2.090590812605694, "learning_rate": 9.85893056000502e-06, "loss": 0.6114, "step": 1459 }, { "epoch": 0.1, "grad_norm": 2.1828092135057124, "learning_rate": 9.858659384737588e-06, "loss": 0.6413, "step": 1460 }, { "epoch": 0.1, "grad_norm": 2.3776245917494676, "learning_rate": 9.858387952819305e-06, "loss": 0.6548, "step": 1461 }, { "epoch": 0.1, "grad_norm": 1.9615480021563265, "learning_rate": 9.858116264264508e-06, "loss": 0.5579, "step": 1462 }, { "epoch": 0.1, "grad_norm": 2.1296370927360777, "learning_rate": 9.857844319087548e-06, "loss": 0.5831, "step": 1463 }, { "epoch": 0.1, "grad_norm": 1.8444820092635612, "learning_rate": 9.85757211730279e-06, "loss": 0.5541, "step": 1464 }, { "epoch": 0.1, "grad_norm": 1.0228963123705634, "learning_rate": 9.857299658924613e-06, "loss": 0.4921, "step": 1465 }, { "epoch": 0.1, "grad_norm": 3.3255706664306106, "learning_rate": 9.85702694396741e-06, "loss": 0.644, "step": 1466 }, { "epoch": 0.1, "grad_norm": 2.359036497887175, "learning_rate": 9.856753972445586e-06, "loss": 0.5947, "step": 1467 }, { "epoch": 0.1, "grad_norm": 3.1193535244041626, "learning_rate": 9.85648074437356e-06, "loss": 0.5265, "step": 1468 }, { "epoch": 0.1, "grad_norm": 1.0093373920591115, "learning_rate": 9.856207259765765e-06, "loss": 0.4615, "step": 1469 }, { "epoch": 0.1, "grad_norm": 2.5590261701788166, "learning_rate": 9.855933518636648e-06, "loss": 0.5964, "step": 1470 }, { "epoch": 0.1, "grad_norm": 0.7952515854499836, "learning_rate": 9.855659521000666e-06, "loss": 0.456, "step": 1471 }, { "epoch": 0.1, "grad_norm": 1.5636008294253774, "learning_rate": 9.855385266872297e-06, "loss": 0.5056, "step": 1472 }, { "epoch": 0.1, "grad_norm": 2.1828141577885463, "learning_rate": 9.855110756266027e-06, "loss": 0.5175, "step": 1473 }, { "epoch": 0.1, "grad_norm": 2.2446849292271036, "learning_rate": 9.854835989196353e-06, "loss": 0.6067, "step": 1474 }, { "epoch": 0.1, "grad_norm": 2.032164353842534, "learning_rate": 9.854560965677793e-06, "loss": 0.5313, "step": 1475 }, { "epoch": 0.1, "grad_norm": 1.8866945212161064, "learning_rate": 9.854285685724874e-06, "loss": 0.5943, "step": 1476 }, { "epoch": 0.1, "grad_norm": 2.0487169262760814, "learning_rate": 9.854010149352136e-06, "loss": 0.5515, "step": 1477 }, { "epoch": 0.1, "grad_norm": 1.6827681019273515, "learning_rate": 9.853734356574133e-06, "loss": 0.549, "step": 1478 }, { "epoch": 0.1, "grad_norm": 2.9756537884457033, "learning_rate": 9.853458307405434e-06, "loss": 0.5952, "step": 1479 }, { "epoch": 0.11, "grad_norm": 1.700323285138724, "learning_rate": 9.853182001860625e-06, "loss": 0.5739, "step": 1480 }, { "epoch": 0.11, "grad_norm": 2.096009277136331, "learning_rate": 9.852905439954294e-06, "loss": 0.5791, "step": 1481 }, { "epoch": 0.11, "grad_norm": 1.8899140128821095, "learning_rate": 9.852628621701058e-06, "loss": 0.5723, "step": 1482 }, { "epoch": 0.11, "grad_norm": 2.045682355152485, "learning_rate": 9.852351547115531e-06, "loss": 0.5662, "step": 1483 }, { "epoch": 0.11, "grad_norm": 2.506298801233158, "learning_rate": 9.852074216212355e-06, "loss": 0.6104, "step": 1484 }, { "epoch": 0.11, "grad_norm": 1.9678981949825465, "learning_rate": 9.851796629006179e-06, "loss": 0.6574, "step": 1485 }, { "epoch": 0.11, "grad_norm": 1.6412805618982835, "learning_rate": 9.851518785511662e-06, "loss": 0.62, "step": 1486 }, { "epoch": 0.11, "grad_norm": 1.7657762269071653, "learning_rate": 9.851240685743486e-06, "loss": 0.5557, "step": 1487 }, { "epoch": 0.11, "grad_norm": 2.093973366966402, "learning_rate": 9.850962329716339e-06, "loss": 0.6025, "step": 1488 }, { "epoch": 0.11, "grad_norm": 1.9111357378609122, "learning_rate": 9.850683717444922e-06, "loss": 0.6162, "step": 1489 }, { "epoch": 0.11, "grad_norm": 1.7179396545791499, "learning_rate": 9.850404848943956e-06, "loss": 0.5121, "step": 1490 }, { "epoch": 0.11, "grad_norm": 2.0316296958324043, "learning_rate": 9.850125724228172e-06, "loss": 0.5919, "step": 1491 }, { "epoch": 0.11, "grad_norm": 3.8979095905076733, "learning_rate": 9.84984634331231e-06, "loss": 0.6592, "step": 1492 }, { "epoch": 0.11, "grad_norm": 3.2880244189660632, "learning_rate": 9.849566706211133e-06, "loss": 0.4916, "step": 1493 }, { "epoch": 0.11, "grad_norm": 1.6657361310543528, "learning_rate": 9.84928681293941e-06, "loss": 0.6745, "step": 1494 }, { "epoch": 0.11, "grad_norm": 1.7937754617560573, "learning_rate": 9.849006663511924e-06, "loss": 0.6375, "step": 1495 }, { "epoch": 0.11, "grad_norm": 2.6951311812239105, "learning_rate": 9.848726257943476e-06, "loss": 0.5786, "step": 1496 }, { "epoch": 0.11, "grad_norm": 2.289863702325345, "learning_rate": 9.848445596248877e-06, "loss": 0.6399, "step": 1497 }, { "epoch": 0.11, "grad_norm": 0.9292179696752689, "learning_rate": 9.848164678442953e-06, "loss": 0.4865, "step": 1498 }, { "epoch": 0.11, "grad_norm": 1.8243361869346137, "learning_rate": 9.847883504540545e-06, "loss": 0.6432, "step": 1499 }, { "epoch": 0.11, "grad_norm": 2.683884634997996, "learning_rate": 9.847602074556502e-06, "loss": 0.5837, "step": 1500 }, { "epoch": 0.11, "grad_norm": 2.1554401485748014, "learning_rate": 9.84732038850569e-06, "loss": 0.6043, "step": 1501 }, { "epoch": 0.11, "grad_norm": 1.6290773936268348, "learning_rate": 9.847038446402992e-06, "loss": 0.5487, "step": 1502 }, { "epoch": 0.11, "grad_norm": 2.3331277257409377, "learning_rate": 9.846756248263299e-06, "loss": 0.6153, "step": 1503 }, { "epoch": 0.11, "grad_norm": 1.9749741572803685, "learning_rate": 9.846473794101517e-06, "loss": 0.634, "step": 1504 }, { "epoch": 0.11, "grad_norm": 1.723253840874343, "learning_rate": 9.846191083932569e-06, "loss": 0.609, "step": 1505 }, { "epoch": 0.11, "grad_norm": 1.844205542323179, "learning_rate": 9.845908117771383e-06, "loss": 0.582, "step": 1506 }, { "epoch": 0.11, "grad_norm": 1.8029672975852855, "learning_rate": 9.845624895632914e-06, "loss": 0.6385, "step": 1507 }, { "epoch": 0.11, "grad_norm": 1.988096938597669, "learning_rate": 9.845341417532117e-06, "loss": 0.6499, "step": 1508 }, { "epoch": 0.11, "grad_norm": 2.0366305174923385, "learning_rate": 9.845057683483969e-06, "loss": 0.5613, "step": 1509 }, { "epoch": 0.11, "grad_norm": 0.821054847799922, "learning_rate": 9.844773693503456e-06, "loss": 0.4616, "step": 1510 }, { "epoch": 0.11, "grad_norm": 0.896355283867609, "learning_rate": 9.84448944760558e-06, "loss": 0.4723, "step": 1511 }, { "epoch": 0.11, "grad_norm": 1.8123919998872928, "learning_rate": 9.844204945805355e-06, "loss": 0.5644, "step": 1512 }, { "epoch": 0.11, "grad_norm": 1.7316559427634801, "learning_rate": 9.843920188117812e-06, "loss": 0.5801, "step": 1513 }, { "epoch": 0.11, "grad_norm": 1.9866674963325475, "learning_rate": 9.843635174557992e-06, "loss": 0.5822, "step": 1514 }, { "epoch": 0.11, "grad_norm": 1.9964800827778924, "learning_rate": 9.84334990514095e-06, "loss": 0.614, "step": 1515 }, { "epoch": 0.11, "grad_norm": 2.2043228155621692, "learning_rate": 9.843064379881751e-06, "loss": 0.619, "step": 1516 }, { "epoch": 0.11, "grad_norm": 1.6488031515615715, "learning_rate": 9.842778598795483e-06, "loss": 0.6427, "step": 1517 }, { "epoch": 0.11, "grad_norm": 1.9720703629603002, "learning_rate": 9.842492561897241e-06, "loss": 0.5264, "step": 1518 }, { "epoch": 0.11, "grad_norm": 2.1347501938543463, "learning_rate": 9.842206269202131e-06, "loss": 0.5913, "step": 1519 }, { "epoch": 0.11, "grad_norm": 1.0228150264712537, "learning_rate": 9.84191972072528e-06, "loss": 0.4788, "step": 1520 }, { "epoch": 0.11, "grad_norm": 1.796899264805008, "learning_rate": 9.841632916481822e-06, "loss": 0.581, "step": 1521 }, { "epoch": 0.11, "grad_norm": 3.4717192049253547, "learning_rate": 9.841345856486907e-06, "loss": 0.5323, "step": 1522 }, { "epoch": 0.11, "grad_norm": 1.9951087058069443, "learning_rate": 9.8410585407557e-06, "loss": 0.5739, "step": 1523 }, { "epoch": 0.11, "grad_norm": 2.2886954156592316, "learning_rate": 9.840770969303377e-06, "loss": 0.6224, "step": 1524 }, { "epoch": 0.11, "grad_norm": 1.9509597226831588, "learning_rate": 9.840483142145129e-06, "loss": 0.51, "step": 1525 }, { "epoch": 0.11, "grad_norm": 1.6522838256087842, "learning_rate": 9.84019505929616e-06, "loss": 0.5836, "step": 1526 }, { "epoch": 0.11, "grad_norm": 2.2459818437562427, "learning_rate": 9.839906720771685e-06, "loss": 0.5695, "step": 1527 }, { "epoch": 0.11, "grad_norm": 2.08916565525036, "learning_rate": 9.839618126586938e-06, "loss": 0.5882, "step": 1528 }, { "epoch": 0.11, "grad_norm": 2.240821146309717, "learning_rate": 9.839329276757163e-06, "loss": 0.6425, "step": 1529 }, { "epoch": 0.11, "grad_norm": 1.8397566958723328, "learning_rate": 9.839040171297619e-06, "loss": 0.5905, "step": 1530 }, { "epoch": 0.11, "grad_norm": 1.6947902142134277, "learning_rate": 9.838750810223575e-06, "loss": 0.5847, "step": 1531 }, { "epoch": 0.11, "grad_norm": 1.7145726321095267, "learning_rate": 9.838461193550317e-06, "loss": 0.5508, "step": 1532 }, { "epoch": 0.11, "grad_norm": 1.8443937488419102, "learning_rate": 9.838171321293145e-06, "loss": 0.536, "step": 1533 }, { "epoch": 0.11, "grad_norm": 1.648708302139436, "learning_rate": 9.83788119346737e-06, "loss": 0.5262, "step": 1534 }, { "epoch": 0.11, "grad_norm": 0.8761204977405773, "learning_rate": 9.837590810088316e-06, "loss": 0.4545, "step": 1535 }, { "epoch": 0.11, "grad_norm": 1.6647307695243208, "learning_rate": 9.837300171171323e-06, "loss": 0.5266, "step": 1536 }, { "epoch": 0.11, "grad_norm": 1.6120160196330557, "learning_rate": 9.837009276731746e-06, "loss": 0.5282, "step": 1537 }, { "epoch": 0.11, "grad_norm": 1.071056511808313, "learning_rate": 9.836718126784948e-06, "loss": 0.4503, "step": 1538 }, { "epoch": 0.11, "grad_norm": 0.8713131886550167, "learning_rate": 9.836426721346308e-06, "loss": 0.4529, "step": 1539 }, { "epoch": 0.11, "grad_norm": 1.7950699385501552, "learning_rate": 9.836135060431223e-06, "loss": 0.5694, "step": 1540 }, { "epoch": 0.11, "grad_norm": 1.855999792749136, "learning_rate": 9.835843144055096e-06, "loss": 0.5851, "step": 1541 }, { "epoch": 0.11, "grad_norm": 1.730557104318448, "learning_rate": 9.835550972233347e-06, "loss": 0.5548, "step": 1542 }, { "epoch": 0.11, "grad_norm": 1.8633466601167028, "learning_rate": 9.835258544981412e-06, "loss": 0.7023, "step": 1543 }, { "epoch": 0.11, "grad_norm": 2.0905976904484556, "learning_rate": 9.834965862314735e-06, "loss": 0.6438, "step": 1544 }, { "epoch": 0.11, "grad_norm": 1.9116822668911706, "learning_rate": 9.83467292424878e-06, "loss": 0.6247, "step": 1545 }, { "epoch": 0.11, "grad_norm": 1.8892253985840513, "learning_rate": 9.834379730799018e-06, "loss": 0.5883, "step": 1546 }, { "epoch": 0.11, "grad_norm": 1.820160483902085, "learning_rate": 9.834086281980939e-06, "loss": 0.6035, "step": 1547 }, { "epoch": 0.11, "grad_norm": 1.6730970553235953, "learning_rate": 9.833792577810039e-06, "loss": 0.5673, "step": 1548 }, { "epoch": 0.11, "grad_norm": 2.035293034977337, "learning_rate": 9.83349861830184e-06, "loss": 0.5326, "step": 1549 }, { "epoch": 0.11, "grad_norm": 1.7761475601402066, "learning_rate": 9.833204403471863e-06, "loss": 0.5769, "step": 1550 }, { "epoch": 0.11, "grad_norm": 1.8766534333938016, "learning_rate": 9.832909933335654e-06, "loss": 0.6043, "step": 1551 }, { "epoch": 0.11, "grad_norm": 3.1027282438101396, "learning_rate": 9.832615207908764e-06, "loss": 0.5968, "step": 1552 }, { "epoch": 0.11, "grad_norm": 2.6293091433880185, "learning_rate": 9.832320227206766e-06, "loss": 0.5999, "step": 1553 }, { "epoch": 0.11, "grad_norm": 1.7368266857328927, "learning_rate": 9.832024991245239e-06, "loss": 0.6164, "step": 1554 }, { "epoch": 0.11, "grad_norm": 1.7576112320855033, "learning_rate": 9.831729500039777e-06, "loss": 0.6785, "step": 1555 }, { "epoch": 0.11, "grad_norm": 1.8307473909175571, "learning_rate": 9.831433753605991e-06, "loss": 0.6213, "step": 1556 }, { "epoch": 0.11, "grad_norm": 1.6772546349476507, "learning_rate": 9.831137751959505e-06, "loss": 0.5818, "step": 1557 }, { "epoch": 0.11, "grad_norm": 1.6016211343928812, "learning_rate": 9.830841495115953e-06, "loss": 0.5786, "step": 1558 }, { "epoch": 0.11, "grad_norm": 1.8589299776562003, "learning_rate": 9.830544983090981e-06, "loss": 0.6085, "step": 1559 }, { "epoch": 0.11, "grad_norm": 1.8260544733557214, "learning_rate": 9.830248215900258e-06, "loss": 0.5315, "step": 1560 }, { "epoch": 0.11, "grad_norm": 1.670022879962204, "learning_rate": 9.829951193559454e-06, "loss": 0.5996, "step": 1561 }, { "epoch": 0.11, "grad_norm": 0.9624733801027308, "learning_rate": 9.829653916084266e-06, "loss": 0.4457, "step": 1562 }, { "epoch": 0.11, "grad_norm": 1.9469722046258957, "learning_rate": 9.82935638349039e-06, "loss": 0.6246, "step": 1563 }, { "epoch": 0.11, "grad_norm": 1.7551382150165016, "learning_rate": 9.829058595793547e-06, "loss": 0.5039, "step": 1564 }, { "epoch": 0.11, "grad_norm": 1.677544766974154, "learning_rate": 9.828760553009465e-06, "loss": 0.595, "step": 1565 }, { "epoch": 0.11, "grad_norm": 1.6836328474471074, "learning_rate": 9.828462255153888e-06, "loss": 0.5477, "step": 1566 }, { "epoch": 0.11, "grad_norm": 1.7724968537330028, "learning_rate": 9.828163702242574e-06, "loss": 0.5929, "step": 1567 }, { "epoch": 0.11, "grad_norm": 1.6949652615383937, "learning_rate": 9.827864894291295e-06, "loss": 0.5635, "step": 1568 }, { "epoch": 0.11, "grad_norm": 1.640802326747633, "learning_rate": 9.827565831315832e-06, "loss": 0.6115, "step": 1569 }, { "epoch": 0.11, "grad_norm": 0.924672089187853, "learning_rate": 9.827266513331983e-06, "loss": 0.4711, "step": 1570 }, { "epoch": 0.11, "grad_norm": 1.7689106298480262, "learning_rate": 9.82696694035556e-06, "loss": 0.6107, "step": 1571 }, { "epoch": 0.11, "grad_norm": 1.7981877657524108, "learning_rate": 9.826667112402389e-06, "loss": 0.5117, "step": 1572 }, { "epoch": 0.11, "grad_norm": 1.7220863478534356, "learning_rate": 9.826367029488305e-06, "loss": 0.526, "step": 1573 }, { "epoch": 0.11, "grad_norm": 2.5018013839811397, "learning_rate": 9.82606669162916e-06, "loss": 0.5547, "step": 1574 }, { "epoch": 0.11, "grad_norm": 1.7725953143831277, "learning_rate": 9.82576609884082e-06, "loss": 0.5089, "step": 1575 }, { "epoch": 0.11, "grad_norm": 1.895532656094033, "learning_rate": 9.825465251139162e-06, "loss": 0.5905, "step": 1576 }, { "epoch": 0.11, "grad_norm": 1.4908339176305159, "learning_rate": 9.825164148540079e-06, "loss": 0.5368, "step": 1577 }, { "epoch": 0.11, "grad_norm": 2.079911526669793, "learning_rate": 9.824862791059476e-06, "loss": 0.6269, "step": 1578 }, { "epoch": 0.11, "grad_norm": 2.859521367961629, "learning_rate": 9.824561178713272e-06, "loss": 0.6996, "step": 1579 }, { "epoch": 0.11, "grad_norm": 2.169168318659681, "learning_rate": 9.824259311517396e-06, "loss": 0.557, "step": 1580 }, { "epoch": 0.11, "grad_norm": 1.88249992411124, "learning_rate": 9.823957189487801e-06, "loss": 0.5377, "step": 1581 }, { "epoch": 0.11, "grad_norm": 1.7778815986272392, "learning_rate": 9.823654812640438e-06, "loss": 0.5912, "step": 1582 }, { "epoch": 0.11, "grad_norm": 1.8539350460700688, "learning_rate": 9.823352180991283e-06, "loss": 0.4998, "step": 1583 }, { "epoch": 0.11, "grad_norm": 1.7673844116002844, "learning_rate": 9.823049294556324e-06, "loss": 0.6286, "step": 1584 }, { "epoch": 0.11, "grad_norm": 1.877665104666314, "learning_rate": 9.822746153351557e-06, "loss": 0.5398, "step": 1585 }, { "epoch": 0.11, "grad_norm": 1.641569988839206, "learning_rate": 9.822442757392996e-06, "loss": 0.5956, "step": 1586 }, { "epoch": 0.11, "grad_norm": 1.785509914706729, "learning_rate": 9.822139106696668e-06, "loss": 0.5902, "step": 1587 }, { "epoch": 0.11, "grad_norm": 1.9349241008709828, "learning_rate": 9.821835201278615e-06, "loss": 0.53, "step": 1588 }, { "epoch": 0.11, "grad_norm": 1.6217658983953225, "learning_rate": 9.821531041154885e-06, "loss": 0.5612, "step": 1589 }, { "epoch": 0.11, "grad_norm": 3.9560051568541152, "learning_rate": 9.821226626341552e-06, "loss": 0.6205, "step": 1590 }, { "epoch": 0.11, "grad_norm": 1.7990213566814486, "learning_rate": 9.82092195685469e-06, "loss": 0.5442, "step": 1591 }, { "epoch": 0.11, "grad_norm": 1.779142961331543, "learning_rate": 9.820617032710393e-06, "loss": 0.6246, "step": 1592 }, { "epoch": 0.11, "grad_norm": 0.8107379121808159, "learning_rate": 9.820311853924771e-06, "loss": 0.4663, "step": 1593 }, { "epoch": 0.11, "grad_norm": 1.7208302145817087, "learning_rate": 9.820006420513944e-06, "loss": 0.578, "step": 1594 }, { "epoch": 0.11, "grad_norm": 2.9151133571688486, "learning_rate": 9.819700732494044e-06, "loss": 0.5764, "step": 1595 }, { "epoch": 0.11, "grad_norm": 0.8662837230965708, "learning_rate": 9.819394789881222e-06, "loss": 0.4665, "step": 1596 }, { "epoch": 0.11, "grad_norm": 1.4904979348105627, "learning_rate": 9.819088592691634e-06, "loss": 0.5445, "step": 1597 }, { "epoch": 0.11, "grad_norm": 1.7093385780023105, "learning_rate": 9.81878214094146e-06, "loss": 0.5896, "step": 1598 }, { "epoch": 0.11, "grad_norm": 1.8134905269016217, "learning_rate": 9.818475434646884e-06, "loss": 0.5286, "step": 1599 }, { "epoch": 0.11, "grad_norm": 1.68852689279498, "learning_rate": 9.818168473824108e-06, "loss": 0.5887, "step": 1600 }, { "epoch": 0.11, "grad_norm": 0.8660182807726676, "learning_rate": 9.817861258489347e-06, "loss": 0.4715, "step": 1601 }, { "epoch": 0.11, "grad_norm": 1.5622628909502618, "learning_rate": 9.81755378865883e-06, "loss": 0.6048, "step": 1602 }, { "epoch": 0.11, "grad_norm": 1.6420845708675185, "learning_rate": 9.817246064348797e-06, "loss": 0.5727, "step": 1603 }, { "epoch": 0.11, "grad_norm": 1.8233446934041162, "learning_rate": 9.816938085575505e-06, "loss": 0.535, "step": 1604 }, { "epoch": 0.11, "grad_norm": 1.4395387955754717, "learning_rate": 9.816629852355219e-06, "loss": 0.5764, "step": 1605 }, { "epoch": 0.11, "grad_norm": 0.8827211499418127, "learning_rate": 9.816321364704227e-06, "loss": 0.4747, "step": 1606 }, { "epoch": 0.11, "grad_norm": 1.7553925011685585, "learning_rate": 9.816012622638817e-06, "loss": 0.5906, "step": 1607 }, { "epoch": 0.11, "grad_norm": 1.8728316307807868, "learning_rate": 9.815703626175303e-06, "loss": 0.5548, "step": 1608 }, { "epoch": 0.11, "grad_norm": 1.9727137353619058, "learning_rate": 9.815394375330005e-06, "loss": 0.5571, "step": 1609 }, { "epoch": 0.11, "grad_norm": 2.1967351453914095, "learning_rate": 9.81508487011926e-06, "loss": 0.5078, "step": 1610 }, { "epoch": 0.11, "grad_norm": 1.7560020321923198, "learning_rate": 9.814775110559415e-06, "loss": 0.6072, "step": 1611 }, { "epoch": 0.11, "grad_norm": 1.6380506234046546, "learning_rate": 9.814465096666837e-06, "loss": 0.5563, "step": 1612 }, { "epoch": 0.11, "grad_norm": 1.4790109115036612, "learning_rate": 9.814154828457895e-06, "loss": 0.5828, "step": 1613 }, { "epoch": 0.11, "grad_norm": 1.6659779725324986, "learning_rate": 9.813844305948984e-06, "loss": 0.6296, "step": 1614 }, { "epoch": 0.11, "grad_norm": 2.075095849674983, "learning_rate": 9.813533529156507e-06, "loss": 0.5573, "step": 1615 }, { "epoch": 0.11, "grad_norm": 1.4698755951975089, "learning_rate": 9.813222498096877e-06, "loss": 0.5986, "step": 1616 }, { "epoch": 0.11, "grad_norm": 1.793025537997058, "learning_rate": 9.812911212786525e-06, "loss": 0.5242, "step": 1617 }, { "epoch": 0.11, "grad_norm": 1.6963956019489854, "learning_rate": 9.812599673241896e-06, "loss": 0.6089, "step": 1618 }, { "epoch": 0.11, "grad_norm": 1.8024032896597002, "learning_rate": 9.812287879479442e-06, "loss": 0.6022, "step": 1619 }, { "epoch": 0.11, "grad_norm": 1.017901370911912, "learning_rate": 9.81197583151564e-06, "loss": 0.4632, "step": 1620 }, { "epoch": 0.12, "grad_norm": 1.8083063949841252, "learning_rate": 9.811663529366966e-06, "loss": 0.5672, "step": 1621 }, { "epoch": 0.12, "grad_norm": 1.9075548432686693, "learning_rate": 9.81135097304992e-06, "loss": 0.6011, "step": 1622 }, { "epoch": 0.12, "grad_norm": 2.1158141447699537, "learning_rate": 9.811038162581015e-06, "loss": 0.6131, "step": 1623 }, { "epoch": 0.12, "grad_norm": 0.9678323502907267, "learning_rate": 9.81072509797677e-06, "loss": 0.4665, "step": 1624 }, { "epoch": 0.12, "grad_norm": 1.5052479744935263, "learning_rate": 9.810411779253725e-06, "loss": 0.6009, "step": 1625 }, { "epoch": 0.12, "grad_norm": 1.8632299582540817, "learning_rate": 9.810098206428432e-06, "loss": 0.5452, "step": 1626 }, { "epoch": 0.12, "grad_norm": 1.5085253336266504, "learning_rate": 9.80978437951745e-06, "loss": 0.5733, "step": 1627 }, { "epoch": 0.12, "grad_norm": 1.6984652842263934, "learning_rate": 9.80947029853736e-06, "loss": 0.5591, "step": 1628 }, { "epoch": 0.12, "grad_norm": 1.7951071243314094, "learning_rate": 9.809155963504753e-06, "loss": 0.551, "step": 1629 }, { "epoch": 0.12, "grad_norm": 3.6827709780243767, "learning_rate": 9.808841374436233e-06, "loss": 0.6769, "step": 1630 }, { "epoch": 0.12, "grad_norm": 1.6784552134007635, "learning_rate": 9.808526531348415e-06, "loss": 0.611, "step": 1631 }, { "epoch": 0.12, "grad_norm": 1.6783044488443546, "learning_rate": 9.808211434257933e-06, "loss": 0.5133, "step": 1632 }, { "epoch": 0.12, "grad_norm": 2.0318226917064615, "learning_rate": 9.807896083181431e-06, "loss": 0.6137, "step": 1633 }, { "epoch": 0.12, "grad_norm": 1.5583388671532634, "learning_rate": 9.807580478135565e-06, "loss": 0.5543, "step": 1634 }, { "epoch": 0.12, "grad_norm": 1.7716049186121556, "learning_rate": 9.80726461913701e-06, "loss": 0.5514, "step": 1635 }, { "epoch": 0.12, "grad_norm": 3.432260157933293, "learning_rate": 9.806948506202446e-06, "loss": 0.5992, "step": 1636 }, { "epoch": 0.12, "grad_norm": 1.504919761136113, "learning_rate": 9.806632139348578e-06, "loss": 0.5316, "step": 1637 }, { "epoch": 0.12, "grad_norm": 1.6976097015709428, "learning_rate": 9.80631551859211e-06, "loss": 0.6231, "step": 1638 }, { "epoch": 0.12, "grad_norm": 1.6266425970762441, "learning_rate": 9.80599864394977e-06, "loss": 0.6289, "step": 1639 }, { "epoch": 0.12, "grad_norm": 1.7771037127262488, "learning_rate": 9.805681515438298e-06, "loss": 0.5222, "step": 1640 }, { "epoch": 0.12, "grad_norm": 0.9208656386535256, "learning_rate": 9.805364133074444e-06, "loss": 0.4447, "step": 1641 }, { "epoch": 0.12, "grad_norm": 1.789268403930533, "learning_rate": 9.805046496874974e-06, "loss": 0.6743, "step": 1642 }, { "epoch": 0.12, "grad_norm": 1.7367432836619436, "learning_rate": 9.804728606856666e-06, "loss": 0.5984, "step": 1643 }, { "epoch": 0.12, "grad_norm": 1.7749041284997291, "learning_rate": 9.804410463036313e-06, "loss": 0.6539, "step": 1644 }, { "epoch": 0.12, "grad_norm": 1.7572920048536245, "learning_rate": 9.80409206543072e-06, "loss": 0.643, "step": 1645 }, { "epoch": 0.12, "grad_norm": 0.9562006887458541, "learning_rate": 9.803773414056703e-06, "loss": 0.4529, "step": 1646 }, { "epoch": 0.12, "grad_norm": 2.036531148913945, "learning_rate": 9.8034545089311e-06, "loss": 0.6162, "step": 1647 }, { "epoch": 0.12, "grad_norm": 1.8886877480460629, "learning_rate": 9.803135350070752e-06, "loss": 0.5699, "step": 1648 }, { "epoch": 0.12, "grad_norm": 1.712058000118822, "learning_rate": 9.802815937492518e-06, "loss": 0.6111, "step": 1649 }, { "epoch": 0.12, "grad_norm": 1.69819920057338, "learning_rate": 9.802496271213276e-06, "loss": 0.5954, "step": 1650 }, { "epoch": 0.12, "grad_norm": 1.5828689901871047, "learning_rate": 9.802176351249905e-06, "loss": 0.5664, "step": 1651 }, { "epoch": 0.12, "grad_norm": 1.7122568104346672, "learning_rate": 9.801856177619307e-06, "loss": 0.5544, "step": 1652 }, { "epoch": 0.12, "grad_norm": 1.6101887488009492, "learning_rate": 9.801535750338395e-06, "loss": 0.6017, "step": 1653 }, { "epoch": 0.12, "grad_norm": 0.8770295904184275, "learning_rate": 9.801215069424097e-06, "loss": 0.4734, "step": 1654 }, { "epoch": 0.12, "grad_norm": 2.5538497684752905, "learning_rate": 9.800894134893348e-06, "loss": 0.5859, "step": 1655 }, { "epoch": 0.12, "grad_norm": 1.773759860776207, "learning_rate": 9.800572946763104e-06, "loss": 0.5987, "step": 1656 }, { "epoch": 0.12, "grad_norm": 0.951090288170339, "learning_rate": 9.800251505050331e-06, "loss": 0.4638, "step": 1657 }, { "epoch": 0.12, "grad_norm": 2.364074739333842, "learning_rate": 9.799929809772007e-06, "loss": 0.6612, "step": 1658 }, { "epoch": 0.12, "grad_norm": 2.4646801114604338, "learning_rate": 9.799607860945126e-06, "loss": 0.5699, "step": 1659 }, { "epoch": 0.12, "grad_norm": 1.8673764833004431, "learning_rate": 9.799285658586695e-06, "loss": 0.6009, "step": 1660 }, { "epoch": 0.12, "grad_norm": 1.6815522093024686, "learning_rate": 9.798963202713734e-06, "loss": 0.5708, "step": 1661 }, { "epoch": 0.12, "grad_norm": 1.9845032090945882, "learning_rate": 9.798640493343274e-06, "loss": 0.5391, "step": 1662 }, { "epoch": 0.12, "grad_norm": 1.6520694580596809, "learning_rate": 9.798317530492365e-06, "loss": 0.5726, "step": 1663 }, { "epoch": 0.12, "grad_norm": 1.5480146687315741, "learning_rate": 9.797994314178063e-06, "loss": 0.5282, "step": 1664 }, { "epoch": 0.12, "grad_norm": 1.8950087085896452, "learning_rate": 9.797670844417444e-06, "loss": 0.582, "step": 1665 }, { "epoch": 0.12, "grad_norm": 2.0085361032706666, "learning_rate": 9.797347121227595e-06, "loss": 0.596, "step": 1666 }, { "epoch": 0.12, "grad_norm": 2.031869936183376, "learning_rate": 9.797023144625615e-06, "loss": 0.6443, "step": 1667 }, { "epoch": 0.12, "grad_norm": 2.0467287298403156, "learning_rate": 9.796698914628618e-06, "loss": 0.638, "step": 1668 }, { "epoch": 0.12, "grad_norm": 4.6093371696203596, "learning_rate": 9.79637443125373e-06, "loss": 0.5975, "step": 1669 }, { "epoch": 0.12, "grad_norm": 1.7774429536077863, "learning_rate": 9.796049694518095e-06, "loss": 0.6005, "step": 1670 }, { "epoch": 0.12, "grad_norm": 3.2054433318233624, "learning_rate": 9.795724704438861e-06, "loss": 0.5675, "step": 1671 }, { "epoch": 0.12, "grad_norm": 0.9681266789265309, "learning_rate": 9.7953994610332e-06, "loss": 0.4546, "step": 1672 }, { "epoch": 0.12, "grad_norm": 1.6134086807317816, "learning_rate": 9.79507396431829e-06, "loss": 0.535, "step": 1673 }, { "epoch": 0.12, "grad_norm": 2.9171573974938694, "learning_rate": 9.794748214311324e-06, "loss": 0.6382, "step": 1674 }, { "epoch": 0.12, "grad_norm": 1.6118727491446523, "learning_rate": 9.794422211029511e-06, "loss": 0.4913, "step": 1675 }, { "epoch": 0.12, "grad_norm": 2.3586547827431494, "learning_rate": 9.79409595449007e-06, "loss": 0.5877, "step": 1676 }, { "epoch": 0.12, "grad_norm": 1.717249667473596, "learning_rate": 9.793769444710239e-06, "loss": 0.5975, "step": 1677 }, { "epoch": 0.12, "grad_norm": 1.757111355760766, "learning_rate": 9.793442681707261e-06, "loss": 0.5307, "step": 1678 }, { "epoch": 0.12, "grad_norm": 1.6920483677746456, "learning_rate": 9.793115665498398e-06, "loss": 0.5652, "step": 1679 }, { "epoch": 0.12, "grad_norm": 1.7033990317954792, "learning_rate": 9.792788396100923e-06, "loss": 0.5937, "step": 1680 }, { "epoch": 0.12, "grad_norm": 1.8810573999902531, "learning_rate": 9.792460873532128e-06, "loss": 0.5032, "step": 1681 }, { "epoch": 0.12, "grad_norm": 1.9533958696397018, "learning_rate": 9.792133097809308e-06, "loss": 0.558, "step": 1682 }, { "epoch": 0.12, "grad_norm": 1.8146314482778534, "learning_rate": 9.79180506894978e-06, "loss": 0.5705, "step": 1683 }, { "epoch": 0.12, "grad_norm": 1.75601315195119, "learning_rate": 9.791476786970873e-06, "loss": 0.6213, "step": 1684 }, { "epoch": 0.12, "grad_norm": 1.6628949380902125, "learning_rate": 9.791148251889924e-06, "loss": 0.6026, "step": 1685 }, { "epoch": 0.12, "grad_norm": 2.1787093040919747, "learning_rate": 9.790819463724292e-06, "loss": 0.5719, "step": 1686 }, { "epoch": 0.12, "grad_norm": 1.9656340111868644, "learning_rate": 9.790490422491342e-06, "loss": 0.5465, "step": 1687 }, { "epoch": 0.12, "grad_norm": 2.0233460014282256, "learning_rate": 9.790161128208453e-06, "loss": 0.6109, "step": 1688 }, { "epoch": 0.12, "grad_norm": 2.219591199813304, "learning_rate": 9.789831580893024e-06, "loss": 0.5704, "step": 1689 }, { "epoch": 0.12, "grad_norm": 1.5528507550985806, "learning_rate": 9.78950178056246e-06, "loss": 0.539, "step": 1690 }, { "epoch": 0.12, "grad_norm": 1.793121006620478, "learning_rate": 9.789171727234184e-06, "loss": 0.6452, "step": 1691 }, { "epoch": 0.12, "grad_norm": 1.812612359250488, "learning_rate": 9.78884142092563e-06, "loss": 0.5684, "step": 1692 }, { "epoch": 0.12, "grad_norm": 1.981753155531472, "learning_rate": 9.788510861654244e-06, "loss": 0.5224, "step": 1693 }, { "epoch": 0.12, "grad_norm": 1.8328008107321687, "learning_rate": 9.788180049437487e-06, "loss": 0.6294, "step": 1694 }, { "epoch": 0.12, "grad_norm": 1.016448791561159, "learning_rate": 9.787848984292838e-06, "loss": 0.4823, "step": 1695 }, { "epoch": 0.12, "grad_norm": 1.606306438319079, "learning_rate": 9.78751766623778e-06, "loss": 0.5298, "step": 1696 }, { "epoch": 0.12, "grad_norm": 1.7539288028720381, "learning_rate": 9.787186095289818e-06, "loss": 0.5923, "step": 1697 }, { "epoch": 0.12, "grad_norm": 1.7902307876041041, "learning_rate": 9.786854271466465e-06, "loss": 0.6406, "step": 1698 }, { "epoch": 0.12, "grad_norm": 8.767421809537648, "learning_rate": 9.78652219478525e-06, "loss": 0.6653, "step": 1699 }, { "epoch": 0.12, "grad_norm": 1.6772022879558186, "learning_rate": 9.786189865263713e-06, "loss": 0.5475, "step": 1700 }, { "epoch": 0.12, "grad_norm": 1.609551122426434, "learning_rate": 9.785857282919409e-06, "loss": 0.5994, "step": 1701 }, { "epoch": 0.12, "grad_norm": 2.1307515829363926, "learning_rate": 9.785524447769907e-06, "loss": 0.5248, "step": 1702 }, { "epoch": 0.12, "grad_norm": 2.447965777138737, "learning_rate": 9.785191359832789e-06, "loss": 0.5236, "step": 1703 }, { "epoch": 0.12, "grad_norm": 1.6619264478435938, "learning_rate": 9.784858019125647e-06, "loss": 0.533, "step": 1704 }, { "epoch": 0.12, "grad_norm": 1.5915751748013487, "learning_rate": 9.784524425666092e-06, "loss": 0.5632, "step": 1705 }, { "epoch": 0.12, "grad_norm": 1.6637265226514524, "learning_rate": 9.784190579471746e-06, "loss": 0.5771, "step": 1706 }, { "epoch": 0.12, "grad_norm": 2.1892585503153517, "learning_rate": 9.78385648056024e-06, "loss": 0.6139, "step": 1707 }, { "epoch": 0.12, "grad_norm": 2.2017523093281706, "learning_rate": 9.783522128949227e-06, "loss": 0.5735, "step": 1708 }, { "epoch": 0.12, "grad_norm": 1.8500997631793252, "learning_rate": 9.783187524656366e-06, "loss": 0.6108, "step": 1709 }, { "epoch": 0.12, "grad_norm": 1.6286744618554194, "learning_rate": 9.782852667699331e-06, "loss": 0.6234, "step": 1710 }, { "epoch": 0.12, "grad_norm": 1.6612408748054623, "learning_rate": 9.782517558095814e-06, "loss": 0.5912, "step": 1711 }, { "epoch": 0.12, "grad_norm": 0.980804823788116, "learning_rate": 9.782182195863513e-06, "loss": 0.4386, "step": 1712 }, { "epoch": 0.12, "grad_norm": 2.6432678732711907, "learning_rate": 9.781846581020142e-06, "loss": 0.6171, "step": 1713 }, { "epoch": 0.12, "grad_norm": 1.6488865030594788, "learning_rate": 9.781510713583433e-06, "loss": 0.5513, "step": 1714 }, { "epoch": 0.12, "grad_norm": 1.9297626843937195, "learning_rate": 9.781174593571127e-06, "loss": 0.626, "step": 1715 }, { "epoch": 0.12, "grad_norm": 2.4946679741702518, "learning_rate": 9.78083822100098e-06, "loss": 0.5752, "step": 1716 }, { "epoch": 0.12, "grad_norm": 1.622967771019889, "learning_rate": 9.780501595890755e-06, "loss": 0.5999, "step": 1717 }, { "epoch": 0.12, "grad_norm": 1.5030803376976007, "learning_rate": 9.780164718258238e-06, "loss": 0.5073, "step": 1718 }, { "epoch": 0.12, "grad_norm": 1.9132037930952, "learning_rate": 9.779827588121223e-06, "loss": 0.5213, "step": 1719 }, { "epoch": 0.12, "grad_norm": 1.5061665763363306, "learning_rate": 9.77949020549752e-06, "loss": 0.5955, "step": 1720 }, { "epoch": 0.12, "grad_norm": 2.751075858961627, "learning_rate": 9.779152570404948e-06, "loss": 0.5999, "step": 1721 }, { "epoch": 0.12, "grad_norm": 1.4720862941923512, "learning_rate": 9.778814682861344e-06, "loss": 0.6689, "step": 1722 }, { "epoch": 0.12, "grad_norm": 2.030438264972945, "learning_rate": 9.778476542884553e-06, "loss": 0.6117, "step": 1723 }, { "epoch": 0.12, "grad_norm": 2.1218447446264057, "learning_rate": 9.778138150492441e-06, "loss": 0.5829, "step": 1724 }, { "epoch": 0.12, "grad_norm": 1.601387980763318, "learning_rate": 9.777799505702883e-06, "loss": 0.5979, "step": 1725 }, { "epoch": 0.12, "grad_norm": 1.9026227421275081, "learning_rate": 9.777460608533765e-06, "loss": 0.5445, "step": 1726 }, { "epoch": 0.12, "grad_norm": 1.7601491290801956, "learning_rate": 9.777121459002987e-06, "loss": 0.4724, "step": 1727 }, { "epoch": 0.12, "grad_norm": 1.5833284767521465, "learning_rate": 9.77678205712847e-06, "loss": 0.5658, "step": 1728 }, { "epoch": 0.12, "grad_norm": 1.7589918083326543, "learning_rate": 9.776442402928137e-06, "loss": 0.6454, "step": 1729 }, { "epoch": 0.12, "grad_norm": 1.9417152826057664, "learning_rate": 9.77610249641993e-06, "loss": 0.5311, "step": 1730 }, { "epoch": 0.12, "grad_norm": 1.62205089531339, "learning_rate": 9.775762337621808e-06, "loss": 0.6092, "step": 1731 }, { "epoch": 0.12, "grad_norm": 1.5843787918497383, "learning_rate": 9.775421926551737e-06, "loss": 0.538, "step": 1732 }, { "epoch": 0.12, "grad_norm": 1.4691147932554922, "learning_rate": 9.775081263227696e-06, "loss": 0.562, "step": 1733 }, { "epoch": 0.12, "grad_norm": 1.712318444821895, "learning_rate": 9.774740347667684e-06, "loss": 0.6503, "step": 1734 }, { "epoch": 0.12, "grad_norm": 1.76385220329107, "learning_rate": 9.774399179889708e-06, "loss": 0.5756, "step": 1735 }, { "epoch": 0.12, "grad_norm": 1.8109453036453846, "learning_rate": 9.774057759911788e-06, "loss": 0.5844, "step": 1736 }, { "epoch": 0.12, "grad_norm": 0.9796204919772943, "learning_rate": 9.773716087751963e-06, "loss": 0.4684, "step": 1737 }, { "epoch": 0.12, "grad_norm": 1.7391490141610613, "learning_rate": 9.773374163428276e-06, "loss": 0.5618, "step": 1738 }, { "epoch": 0.12, "grad_norm": 1.6642568469827328, "learning_rate": 9.773031986958791e-06, "loss": 0.5926, "step": 1739 }, { "epoch": 0.12, "grad_norm": 1.907366466457657, "learning_rate": 9.772689558361585e-06, "loss": 0.5525, "step": 1740 }, { "epoch": 0.12, "grad_norm": 1.9133541465407675, "learning_rate": 9.772346877654746e-06, "loss": 0.5691, "step": 1741 }, { "epoch": 0.12, "grad_norm": 1.7544241640946543, "learning_rate": 9.772003944856371e-06, "loss": 0.6077, "step": 1742 }, { "epoch": 0.12, "grad_norm": 1.7163307483405952, "learning_rate": 9.77166075998458e-06, "loss": 0.5678, "step": 1743 }, { "epoch": 0.12, "grad_norm": 1.5364558271019502, "learning_rate": 9.771317323057498e-06, "loss": 0.57, "step": 1744 }, { "epoch": 0.12, "grad_norm": 0.8491838709009322, "learning_rate": 9.770973634093267e-06, "loss": 0.4801, "step": 1745 }, { "epoch": 0.12, "grad_norm": 0.9488322829777677, "learning_rate": 9.770629693110042e-06, "loss": 0.4862, "step": 1746 }, { "epoch": 0.12, "grad_norm": 1.5608502494769665, "learning_rate": 9.770285500125993e-06, "loss": 0.6013, "step": 1747 }, { "epoch": 0.12, "grad_norm": 1.9447417433967484, "learning_rate": 9.7699410551593e-06, "loss": 0.6285, "step": 1748 }, { "epoch": 0.12, "grad_norm": 1.6900212055833288, "learning_rate": 9.769596358228158e-06, "loss": 0.5384, "step": 1749 }, { "epoch": 0.12, "grad_norm": 2.3974428956127847, "learning_rate": 9.769251409350773e-06, "loss": 0.6307, "step": 1750 }, { "epoch": 0.12, "grad_norm": 1.6235126037514267, "learning_rate": 9.76890620854537e-06, "loss": 0.5694, "step": 1751 }, { "epoch": 0.12, "grad_norm": 1.8290281895591345, "learning_rate": 9.768560755830181e-06, "loss": 0.5884, "step": 1752 }, { "epoch": 0.12, "grad_norm": 1.7045940824613837, "learning_rate": 9.768215051223454e-06, "loss": 0.6855, "step": 1753 }, { "epoch": 0.12, "grad_norm": 1.6657670482430476, "learning_rate": 9.767869094743451e-06, "loss": 0.546, "step": 1754 }, { "epoch": 0.12, "grad_norm": 1.650841482514477, "learning_rate": 9.767522886408449e-06, "loss": 0.5633, "step": 1755 }, { "epoch": 0.12, "grad_norm": 2.460697297290688, "learning_rate": 9.767176426236731e-06, "loss": 0.5462, "step": 1756 }, { "epoch": 0.12, "grad_norm": 1.8564609757309207, "learning_rate": 9.766829714246601e-06, "loss": 0.5147, "step": 1757 }, { "epoch": 0.12, "grad_norm": 2.0074975508382584, "learning_rate": 9.766482750456374e-06, "loss": 0.5542, "step": 1758 }, { "epoch": 0.12, "grad_norm": 3.2097870773622716, "learning_rate": 9.766135534884378e-06, "loss": 0.6294, "step": 1759 }, { "epoch": 0.12, "grad_norm": 1.969401746085967, "learning_rate": 9.765788067548952e-06, "loss": 0.655, "step": 1760 }, { "epoch": 0.12, "grad_norm": 0.9967869336526871, "learning_rate": 9.765440348468452e-06, "loss": 0.4917, "step": 1761 }, { "epoch": 0.13, "grad_norm": 1.8080405259289378, "learning_rate": 9.765092377661244e-06, "loss": 0.6236, "step": 1762 }, { "epoch": 0.13, "grad_norm": 2.001248753149921, "learning_rate": 9.764744155145713e-06, "loss": 0.6595, "step": 1763 }, { "epoch": 0.13, "grad_norm": 1.8313497394619975, "learning_rate": 9.764395680940248e-06, "loss": 0.48, "step": 1764 }, { "epoch": 0.13, "grad_norm": 0.903556680776509, "learning_rate": 9.764046955063261e-06, "loss": 0.4482, "step": 1765 }, { "epoch": 0.13, "grad_norm": 2.186807309459345, "learning_rate": 9.763697977533172e-06, "loss": 0.6006, "step": 1766 }, { "epoch": 0.13, "grad_norm": 1.737625322769082, "learning_rate": 9.763348748368411e-06, "loss": 0.6262, "step": 1767 }, { "epoch": 0.13, "grad_norm": 0.9047360426339274, "learning_rate": 9.76299926758743e-06, "loss": 0.4639, "step": 1768 }, { "epoch": 0.13, "grad_norm": 1.649864737356665, "learning_rate": 9.762649535208689e-06, "loss": 0.5644, "step": 1769 }, { "epoch": 0.13, "grad_norm": 1.8913121564534474, "learning_rate": 9.76229955125066e-06, "loss": 0.5757, "step": 1770 }, { "epoch": 0.13, "grad_norm": 2.7826359725913457, "learning_rate": 9.761949315731834e-06, "loss": 0.5445, "step": 1771 }, { "epoch": 0.13, "grad_norm": 1.5926916333176009, "learning_rate": 9.761598828670707e-06, "loss": 0.5988, "step": 1772 }, { "epoch": 0.13, "grad_norm": 2.150778943168815, "learning_rate": 9.761248090085798e-06, "loss": 0.5639, "step": 1773 }, { "epoch": 0.13, "grad_norm": 1.5887044410235698, "learning_rate": 9.76089709999563e-06, "loss": 0.5251, "step": 1774 }, { "epoch": 0.13, "grad_norm": 1.7625364747691568, "learning_rate": 9.760545858418744e-06, "loss": 0.6303, "step": 1775 }, { "epoch": 0.13, "grad_norm": 1.606159875096524, "learning_rate": 9.760194365373696e-06, "loss": 0.5841, "step": 1776 }, { "epoch": 0.13, "grad_norm": 1.884117181246165, "learning_rate": 9.759842620879053e-06, "loss": 0.619, "step": 1777 }, { "epoch": 0.13, "grad_norm": 2.049831972534073, "learning_rate": 9.759490624953393e-06, "loss": 0.5811, "step": 1778 }, { "epoch": 0.13, "grad_norm": 1.5928390716809229, "learning_rate": 9.759138377615311e-06, "loss": 0.5798, "step": 1779 }, { "epoch": 0.13, "grad_norm": 1.8439494015108664, "learning_rate": 9.758785878883413e-06, "loss": 0.5735, "step": 1780 }, { "epoch": 0.13, "grad_norm": 1.6746342225967408, "learning_rate": 9.758433128776321e-06, "loss": 0.536, "step": 1781 }, { "epoch": 0.13, "grad_norm": 1.705669000634515, "learning_rate": 9.758080127312666e-06, "loss": 0.5219, "step": 1782 }, { "epoch": 0.13, "grad_norm": 2.2142027678037226, "learning_rate": 9.757726874511097e-06, "loss": 0.6629, "step": 1783 }, { "epoch": 0.13, "grad_norm": 1.7625890345692965, "learning_rate": 9.757373370390274e-06, "loss": 0.6193, "step": 1784 }, { "epoch": 0.13, "grad_norm": 2.0942723124065004, "learning_rate": 9.75701961496887e-06, "loss": 0.5819, "step": 1785 }, { "epoch": 0.13, "grad_norm": 1.7134856774426843, "learning_rate": 9.75666560826557e-06, "loss": 0.6335, "step": 1786 }, { "epoch": 0.13, "grad_norm": 1.8311358067731494, "learning_rate": 9.756311350299073e-06, "loss": 0.565, "step": 1787 }, { "epoch": 0.13, "grad_norm": 1.6952227730767107, "learning_rate": 9.755956841088096e-06, "loss": 0.6534, "step": 1788 }, { "epoch": 0.13, "grad_norm": 2.511588474741989, "learning_rate": 9.755602080651363e-06, "loss": 0.6159, "step": 1789 }, { "epoch": 0.13, "grad_norm": 1.6418722437803235, "learning_rate": 9.755247069007611e-06, "loss": 0.6116, "step": 1790 }, { "epoch": 0.13, "grad_norm": 1.8406873494696212, "learning_rate": 9.754891806175599e-06, "loss": 0.568, "step": 1791 }, { "epoch": 0.13, "grad_norm": 1.513150197524101, "learning_rate": 9.754536292174091e-06, "loss": 0.5775, "step": 1792 }, { "epoch": 0.13, "grad_norm": 1.8038826352427186, "learning_rate": 9.754180527021863e-06, "loss": 0.6082, "step": 1793 }, { "epoch": 0.13, "grad_norm": 1.611546032342575, "learning_rate": 9.753824510737711e-06, "loss": 0.5236, "step": 1794 }, { "epoch": 0.13, "grad_norm": 1.5351709866888281, "learning_rate": 9.753468243340441e-06, "loss": 0.6076, "step": 1795 }, { "epoch": 0.13, "grad_norm": 1.6747702563689981, "learning_rate": 9.75311172484887e-06, "loss": 0.5674, "step": 1796 }, { "epoch": 0.13, "grad_norm": 1.648031312017971, "learning_rate": 9.752754955281832e-06, "loss": 0.6199, "step": 1797 }, { "epoch": 0.13, "grad_norm": 1.5706558811508375, "learning_rate": 9.752397934658174e-06, "loss": 0.6213, "step": 1798 }, { "epoch": 0.13, "grad_norm": 2.35205738774739, "learning_rate": 9.752040662996754e-06, "loss": 0.5428, "step": 1799 }, { "epoch": 0.13, "grad_norm": 1.7018239185476982, "learning_rate": 9.751683140316441e-06, "loss": 0.5652, "step": 1800 }, { "epoch": 0.13, "grad_norm": 1.853198227662673, "learning_rate": 9.751325366636127e-06, "loss": 0.5787, "step": 1801 }, { "epoch": 0.13, "grad_norm": 1.4283102049329293, "learning_rate": 9.750967341974706e-06, "loss": 0.5545, "step": 1802 }, { "epoch": 0.13, "grad_norm": 1.7892101508664149, "learning_rate": 9.750609066351092e-06, "loss": 0.5921, "step": 1803 }, { "epoch": 0.13, "grad_norm": 1.9886044622953356, "learning_rate": 9.75025053978421e-06, "loss": 0.5352, "step": 1804 }, { "epoch": 0.13, "grad_norm": 1.790273329919268, "learning_rate": 9.749891762292998e-06, "loss": 0.5972, "step": 1805 }, { "epoch": 0.13, "grad_norm": 1.483443222067405, "learning_rate": 9.749532733896408e-06, "loss": 0.5451, "step": 1806 }, { "epoch": 0.13, "grad_norm": 1.5320377845397621, "learning_rate": 9.749173454613405e-06, "loss": 0.6303, "step": 1807 }, { "epoch": 0.13, "grad_norm": 1.5696239111847572, "learning_rate": 9.74881392446297e-06, "loss": 0.5424, "step": 1808 }, { "epoch": 0.13, "grad_norm": 1.9336253127248402, "learning_rate": 9.74845414346409e-06, "loss": 0.6305, "step": 1809 }, { "epoch": 0.13, "grad_norm": 1.1120712276236346, "learning_rate": 9.748094111635774e-06, "loss": 0.4822, "step": 1810 }, { "epoch": 0.13, "grad_norm": 0.9854686968662069, "learning_rate": 9.747733828997036e-06, "loss": 0.4793, "step": 1811 }, { "epoch": 0.13, "grad_norm": 1.6916323840798329, "learning_rate": 9.747373295566911e-06, "loss": 0.5994, "step": 1812 }, { "epoch": 0.13, "grad_norm": 1.7034161389557745, "learning_rate": 9.747012511364442e-06, "loss": 0.5634, "step": 1813 }, { "epoch": 0.13, "grad_norm": 2.471965522940496, "learning_rate": 9.746651476408687e-06, "loss": 0.4955, "step": 1814 }, { "epoch": 0.13, "grad_norm": 2.319652145767398, "learning_rate": 9.746290190718717e-06, "loss": 0.5425, "step": 1815 }, { "epoch": 0.13, "grad_norm": 1.59692612732707, "learning_rate": 9.745928654313617e-06, "loss": 0.5992, "step": 1816 }, { "epoch": 0.13, "grad_norm": 2.968788286250123, "learning_rate": 9.745566867212483e-06, "loss": 0.6042, "step": 1817 }, { "epoch": 0.13, "grad_norm": 1.7823263910581828, "learning_rate": 9.745204829434429e-06, "loss": 0.6, "step": 1818 }, { "epoch": 0.13, "grad_norm": 1.6930016049551133, "learning_rate": 9.744842540998576e-06, "loss": 0.6289, "step": 1819 }, { "epoch": 0.13, "grad_norm": 3.118471169363396, "learning_rate": 9.744480001924062e-06, "loss": 0.6185, "step": 1820 }, { "epoch": 0.13, "grad_norm": 1.5923845319176075, "learning_rate": 9.744117212230038e-06, "loss": 0.6263, "step": 1821 }, { "epoch": 0.13, "grad_norm": 1.6464883088006244, "learning_rate": 9.743754171935668e-06, "loss": 0.6258, "step": 1822 }, { "epoch": 0.13, "grad_norm": 1.6735617586884581, "learning_rate": 9.743390881060128e-06, "loss": 0.6307, "step": 1823 }, { "epoch": 0.13, "grad_norm": 2.073981150557679, "learning_rate": 9.743027339622609e-06, "loss": 0.6098, "step": 1824 }, { "epoch": 0.13, "grad_norm": 1.8490703665318342, "learning_rate": 9.742663547642314e-06, "loss": 0.5772, "step": 1825 }, { "epoch": 0.13, "grad_norm": 1.6370870315855133, "learning_rate": 9.74229950513846e-06, "loss": 0.5596, "step": 1826 }, { "epoch": 0.13, "grad_norm": 1.7245658535014168, "learning_rate": 9.741935212130277e-06, "loss": 0.5353, "step": 1827 }, { "epoch": 0.13, "grad_norm": 1.76488788251901, "learning_rate": 9.741570668637009e-06, "loss": 0.531, "step": 1828 }, { "epoch": 0.13, "grad_norm": 2.121555657488959, "learning_rate": 9.741205874677912e-06, "loss": 0.6274, "step": 1829 }, { "epoch": 0.13, "grad_norm": 1.6008938025447474, "learning_rate": 9.740840830272253e-06, "loss": 0.5528, "step": 1830 }, { "epoch": 0.13, "grad_norm": 1.1258905454263233, "learning_rate": 9.74047553543932e-06, "loss": 0.4828, "step": 1831 }, { "epoch": 0.13, "grad_norm": 1.036980778288693, "learning_rate": 9.740109990198405e-06, "loss": 0.4817, "step": 1832 }, { "epoch": 0.13, "grad_norm": 1.626082657956783, "learning_rate": 9.73974419456882e-06, "loss": 0.5458, "step": 1833 }, { "epoch": 0.13, "grad_norm": 1.8812303512649025, "learning_rate": 9.739378148569882e-06, "loss": 0.6286, "step": 1834 }, { "epoch": 0.13, "grad_norm": 2.0536167328882935, "learning_rate": 9.739011852220934e-06, "loss": 0.5576, "step": 1835 }, { "epoch": 0.13, "grad_norm": 2.2579630032365956, "learning_rate": 9.738645305541323e-06, "loss": 0.5114, "step": 1836 }, { "epoch": 0.13, "grad_norm": 0.9930640274728167, "learning_rate": 9.738278508550409e-06, "loss": 0.4685, "step": 1837 }, { "epoch": 0.13, "grad_norm": 1.7599488549702391, "learning_rate": 9.737911461267569e-06, "loss": 0.5631, "step": 1838 }, { "epoch": 0.13, "grad_norm": 1.6566367386726666, "learning_rate": 9.737544163712192e-06, "loss": 0.5902, "step": 1839 }, { "epoch": 0.13, "grad_norm": 1.7383763836761734, "learning_rate": 9.737176615903677e-06, "loss": 0.5485, "step": 1840 }, { "epoch": 0.13, "grad_norm": 3.8174449743113494, "learning_rate": 9.736808817861442e-06, "loss": 0.6577, "step": 1841 }, { "epoch": 0.13, "grad_norm": 1.583372161788993, "learning_rate": 9.736440769604917e-06, "loss": 0.6362, "step": 1842 }, { "epoch": 0.13, "grad_norm": 1.47932775671271, "learning_rate": 9.736072471153539e-06, "loss": 0.5893, "step": 1843 }, { "epoch": 0.13, "grad_norm": 1.8968165397017982, "learning_rate": 9.735703922526767e-06, "loss": 0.5705, "step": 1844 }, { "epoch": 0.13, "grad_norm": 1.9733098276832608, "learning_rate": 9.735335123744065e-06, "loss": 0.5465, "step": 1845 }, { "epoch": 0.13, "grad_norm": 1.7147654002984136, "learning_rate": 9.734966074824918e-06, "loss": 0.5625, "step": 1846 }, { "epoch": 0.13, "grad_norm": 1.7218864041970574, "learning_rate": 9.734596775788819e-06, "loss": 0.5844, "step": 1847 }, { "epoch": 0.13, "grad_norm": 2.1545241455697384, "learning_rate": 9.734227226655273e-06, "loss": 0.5952, "step": 1848 }, { "epoch": 0.13, "grad_norm": 1.7034475724160862, "learning_rate": 9.733857427443805e-06, "loss": 0.6328, "step": 1849 }, { "epoch": 0.13, "grad_norm": 1.8168758250070562, "learning_rate": 9.733487378173946e-06, "loss": 0.547, "step": 1850 }, { "epoch": 0.13, "grad_norm": 1.8909581894037046, "learning_rate": 9.733117078865245e-06, "loss": 0.6265, "step": 1851 }, { "epoch": 0.13, "grad_norm": 1.7053347248752695, "learning_rate": 9.732746529537262e-06, "loss": 0.6338, "step": 1852 }, { "epoch": 0.13, "grad_norm": 1.760585292006874, "learning_rate": 9.73237573020957e-06, "loss": 0.5503, "step": 1853 }, { "epoch": 0.13, "grad_norm": 1.5703752416545302, "learning_rate": 9.732004680901758e-06, "loss": 0.5205, "step": 1854 }, { "epoch": 0.13, "grad_norm": 1.7650736345884976, "learning_rate": 9.731633381633422e-06, "loss": 0.583, "step": 1855 }, { "epoch": 0.13, "grad_norm": 1.6297027030570372, "learning_rate": 9.73126183242418e-06, "loss": 0.5837, "step": 1856 }, { "epoch": 0.13, "grad_norm": 2.0250382008703625, "learning_rate": 9.730890033293656e-06, "loss": 0.5748, "step": 1857 }, { "epoch": 0.13, "grad_norm": 2.2960826455462477, "learning_rate": 9.73051798426149e-06, "loss": 0.5834, "step": 1858 }, { "epoch": 0.13, "grad_norm": 2.2600734848769326, "learning_rate": 9.730145685347334e-06, "loss": 0.5832, "step": 1859 }, { "epoch": 0.13, "grad_norm": 1.9689673688637401, "learning_rate": 9.729773136570855e-06, "loss": 0.6047, "step": 1860 }, { "epoch": 0.13, "grad_norm": 1.5493514070529597, "learning_rate": 9.729400337951733e-06, "loss": 0.5293, "step": 1861 }, { "epoch": 0.13, "grad_norm": 3.066894156481255, "learning_rate": 9.729027289509661e-06, "loss": 0.6419, "step": 1862 }, { "epoch": 0.13, "grad_norm": 1.8237152700483332, "learning_rate": 9.72865399126434e-06, "loss": 0.6412, "step": 1863 }, { "epoch": 0.13, "grad_norm": 1.7185727047590058, "learning_rate": 9.728280443235495e-06, "loss": 0.506, "step": 1864 }, { "epoch": 0.13, "grad_norm": 2.1365485331375758, "learning_rate": 9.727906645442855e-06, "loss": 0.5738, "step": 1865 }, { "epoch": 0.13, "grad_norm": 2.655894481515866, "learning_rate": 9.727532597906165e-06, "loss": 0.5059, "step": 1866 }, { "epoch": 0.13, "grad_norm": 1.6134682943932184, "learning_rate": 9.727158300645184e-06, "loss": 0.5201, "step": 1867 }, { "epoch": 0.13, "grad_norm": 1.7467219475266151, "learning_rate": 9.726783753679684e-06, "loss": 0.5976, "step": 1868 }, { "epoch": 0.13, "grad_norm": 2.031132939208805, "learning_rate": 9.726408957029451e-06, "loss": 0.5283, "step": 1869 }, { "epoch": 0.13, "grad_norm": 2.01141606139681, "learning_rate": 9.72603391071428e-06, "loss": 0.5229, "step": 1870 }, { "epoch": 0.13, "grad_norm": 2.2978469798015286, "learning_rate": 9.725658614753983e-06, "loss": 0.6048, "step": 1871 }, { "epoch": 0.13, "grad_norm": 1.8490977385323506, "learning_rate": 9.725283069168387e-06, "loss": 0.6608, "step": 1872 }, { "epoch": 0.13, "grad_norm": 1.6557494887540234, "learning_rate": 9.724907273977328e-06, "loss": 0.606, "step": 1873 }, { "epoch": 0.13, "grad_norm": 1.777194410135706, "learning_rate": 9.724531229200656e-06, "loss": 0.5844, "step": 1874 }, { "epoch": 0.13, "grad_norm": 1.7980145414491835, "learning_rate": 9.724154934858236e-06, "loss": 0.6222, "step": 1875 }, { "epoch": 0.13, "grad_norm": 1.6523558845435453, "learning_rate": 9.723778390969944e-06, "loss": 0.564, "step": 1876 }, { "epoch": 0.13, "grad_norm": 1.9483233555565544, "learning_rate": 9.723401597555671e-06, "loss": 0.5447, "step": 1877 }, { "epoch": 0.13, "grad_norm": 1.814751818228945, "learning_rate": 9.723024554635321e-06, "loss": 0.5982, "step": 1878 }, { "epoch": 0.13, "grad_norm": 2.483563582137913, "learning_rate": 9.722647262228811e-06, "loss": 0.5865, "step": 1879 }, { "epoch": 0.13, "grad_norm": 1.714570618215152, "learning_rate": 9.722269720356068e-06, "loss": 0.5833, "step": 1880 }, { "epoch": 0.13, "grad_norm": 1.5028603977931627, "learning_rate": 9.721891929037039e-06, "loss": 0.5628, "step": 1881 }, { "epoch": 0.13, "grad_norm": 1.7998178818993837, "learning_rate": 9.721513888291677e-06, "loss": 0.545, "step": 1882 }, { "epoch": 0.13, "grad_norm": 1.8400516711438888, "learning_rate": 9.721135598139954e-06, "loss": 0.549, "step": 1883 }, { "epoch": 0.13, "grad_norm": 1.8150691848641167, "learning_rate": 9.72075705860185e-06, "loss": 0.5932, "step": 1884 }, { "epoch": 0.13, "grad_norm": 2.100055762651, "learning_rate": 9.720378269697363e-06, "loss": 0.5723, "step": 1885 }, { "epoch": 0.13, "grad_norm": 1.6386119826372871, "learning_rate": 9.7199992314465e-06, "loss": 0.5679, "step": 1886 }, { "epoch": 0.13, "grad_norm": 2.0120321420240663, "learning_rate": 9.719619943869285e-06, "loss": 0.5796, "step": 1887 }, { "epoch": 0.13, "grad_norm": 1.759082562944045, "learning_rate": 9.719240406985753e-06, "loss": 0.5484, "step": 1888 }, { "epoch": 0.13, "grad_norm": 1.6118194118277673, "learning_rate": 9.718860620815951e-06, "loss": 0.5131, "step": 1889 }, { "epoch": 0.13, "grad_norm": 1.6960940787369176, "learning_rate": 9.71848058537994e-06, "loss": 0.5331, "step": 1890 }, { "epoch": 0.13, "grad_norm": 2.015589647027879, "learning_rate": 9.718100300697797e-06, "loss": 0.588, "step": 1891 }, { "epoch": 0.13, "grad_norm": 2.020843184540193, "learning_rate": 9.71771976678961e-06, "loss": 0.5507, "step": 1892 }, { "epoch": 0.13, "grad_norm": 1.8829302733971391, "learning_rate": 9.717338983675479e-06, "loss": 0.5602, "step": 1893 }, { "epoch": 0.13, "grad_norm": 1.8453451573266069, "learning_rate": 9.716957951375517e-06, "loss": 0.5794, "step": 1894 }, { "epoch": 0.13, "grad_norm": 1.6492758692754435, "learning_rate": 9.716576669909854e-06, "loss": 0.5723, "step": 1895 }, { "epoch": 0.13, "grad_norm": 2.1699982274176697, "learning_rate": 9.716195139298629e-06, "loss": 0.5558, "step": 1896 }, { "epoch": 0.13, "grad_norm": 1.7691051978483867, "learning_rate": 9.715813359561995e-06, "loss": 0.5532, "step": 1897 }, { "epoch": 0.13, "grad_norm": 3.399678167547627, "learning_rate": 9.715431330720121e-06, "loss": 0.6217, "step": 1898 }, { "epoch": 0.13, "grad_norm": 1.8102488774941672, "learning_rate": 9.715049052793187e-06, "loss": 0.5988, "step": 1899 }, { "epoch": 0.13, "grad_norm": 2.099223442300193, "learning_rate": 9.714666525801385e-06, "loss": 0.5887, "step": 1900 }, { "epoch": 0.13, "grad_norm": 1.6393671314051599, "learning_rate": 9.71428374976492e-06, "loss": 0.5324, "step": 1901 }, { "epoch": 0.13, "grad_norm": 1.0684902221418966, "learning_rate": 9.713900724704012e-06, "loss": 0.4965, "step": 1902 }, { "epoch": 0.14, "grad_norm": 1.611745862922129, "learning_rate": 9.713517450638898e-06, "loss": 0.5219, "step": 1903 }, { "epoch": 0.14, "grad_norm": 1.9839823738927387, "learning_rate": 9.71313392758982e-06, "loss": 0.5911, "step": 1904 }, { "epoch": 0.14, "grad_norm": 2.0530300227522003, "learning_rate": 9.712750155577038e-06, "loss": 0.5495, "step": 1905 }, { "epoch": 0.14, "grad_norm": 5.351509868113161, "learning_rate": 9.712366134620822e-06, "loss": 0.6077, "step": 1906 }, { "epoch": 0.14, "grad_norm": 2.1900013432899224, "learning_rate": 9.71198186474146e-06, "loss": 0.5641, "step": 1907 }, { "epoch": 0.14, "grad_norm": 3.342608726423243, "learning_rate": 9.711597345959249e-06, "loss": 0.5781, "step": 1908 }, { "epoch": 0.14, "grad_norm": 2.598274876517335, "learning_rate": 9.711212578294501e-06, "loss": 0.5918, "step": 1909 }, { "epoch": 0.14, "grad_norm": 1.6222691202148796, "learning_rate": 9.710827561767544e-06, "loss": 0.5537, "step": 1910 }, { "epoch": 0.14, "grad_norm": 1.8015162911865747, "learning_rate": 9.71044229639871e-06, "loss": 0.5615, "step": 1911 }, { "epoch": 0.14, "grad_norm": 0.8631502143605907, "learning_rate": 9.710056782208352e-06, "loss": 0.466, "step": 1912 }, { "epoch": 0.14, "grad_norm": 11.24048346996647, "learning_rate": 9.709671019216838e-06, "loss": 0.5897, "step": 1913 }, { "epoch": 0.14, "grad_norm": 1.9351564928950402, "learning_rate": 9.70928500744454e-06, "loss": 0.6004, "step": 1914 }, { "epoch": 0.14, "grad_norm": 2.1877130009984307, "learning_rate": 9.70889874691185e-06, "loss": 0.5135, "step": 1915 }, { "epoch": 0.14, "grad_norm": 1.5355295884907905, "learning_rate": 9.708512237639174e-06, "loss": 0.5874, "step": 1916 }, { "epoch": 0.14, "grad_norm": 2.0231000565028885, "learning_rate": 9.708125479646926e-06, "loss": 0.5659, "step": 1917 }, { "epoch": 0.14, "grad_norm": 2.032614284070895, "learning_rate": 9.707738472955536e-06, "loss": 0.5441, "step": 1918 }, { "epoch": 0.14, "grad_norm": 2.2745889511277335, "learning_rate": 9.70735121758545e-06, "loss": 0.6034, "step": 1919 }, { "epoch": 0.14, "grad_norm": 1.7299876620398278, "learning_rate": 9.70696371355712e-06, "loss": 0.6339, "step": 1920 }, { "epoch": 0.14, "grad_norm": 1.8988464421356506, "learning_rate": 9.706575960891019e-06, "loss": 0.5601, "step": 1921 }, { "epoch": 0.14, "grad_norm": 1.8520466579912147, "learning_rate": 9.706187959607627e-06, "loss": 0.5738, "step": 1922 }, { "epoch": 0.14, "grad_norm": 1.7990614959943028, "learning_rate": 9.70579970972744e-06, "loss": 0.5709, "step": 1923 }, { "epoch": 0.14, "grad_norm": 0.9774675755811699, "learning_rate": 9.705411211270966e-06, "loss": 0.4507, "step": 1924 }, { "epoch": 0.14, "grad_norm": 2.146400391484652, "learning_rate": 9.70502246425873e-06, "loss": 0.6161, "step": 1925 }, { "epoch": 0.14, "grad_norm": 0.8970430579912533, "learning_rate": 9.704633468711262e-06, "loss": 0.4492, "step": 1926 }, { "epoch": 0.14, "grad_norm": 1.9374378448836693, "learning_rate": 9.704244224649116e-06, "loss": 0.5926, "step": 1927 }, { "epoch": 0.14, "grad_norm": 4.649329743622021, "learning_rate": 9.703854732092846e-06, "loss": 0.5931, "step": 1928 }, { "epoch": 0.14, "grad_norm": 1.7104544480288595, "learning_rate": 9.703464991063032e-06, "loss": 0.5394, "step": 1929 }, { "epoch": 0.14, "grad_norm": 2.0388432311162843, "learning_rate": 9.70307500158026e-06, "loss": 0.5898, "step": 1930 }, { "epoch": 0.14, "grad_norm": 1.6285930606764576, "learning_rate": 9.702684763665131e-06, "loss": 0.6785, "step": 1931 }, { "epoch": 0.14, "grad_norm": 1.664734745840734, "learning_rate": 9.702294277338257e-06, "loss": 0.4916, "step": 1932 }, { "epoch": 0.14, "grad_norm": 1.8824961457439187, "learning_rate": 9.701903542620266e-06, "loss": 0.6205, "step": 1933 }, { "epoch": 0.14, "grad_norm": 1.4826762065985195, "learning_rate": 9.701512559531796e-06, "loss": 0.5693, "step": 1934 }, { "epoch": 0.14, "grad_norm": 1.9448054327895665, "learning_rate": 9.701121328093503e-06, "loss": 0.6109, "step": 1935 }, { "epoch": 0.14, "grad_norm": 1.8684682885153396, "learning_rate": 9.700729848326053e-06, "loss": 0.6196, "step": 1936 }, { "epoch": 0.14, "grad_norm": 1.7224208480919332, "learning_rate": 9.700338120250123e-06, "loss": 0.6297, "step": 1937 }, { "epoch": 0.14, "grad_norm": 0.9038897641747816, "learning_rate": 9.699946143886406e-06, "loss": 0.4946, "step": 1938 }, { "epoch": 0.14, "grad_norm": 1.7481379030291944, "learning_rate": 9.699553919255609e-06, "loss": 0.6254, "step": 1939 }, { "epoch": 0.14, "grad_norm": 2.271531649959383, "learning_rate": 9.699161446378449e-06, "loss": 0.4865, "step": 1940 }, { "epoch": 0.14, "grad_norm": 2.03328685203979, "learning_rate": 9.69876872527566e-06, "loss": 0.587, "step": 1941 }, { "epoch": 0.14, "grad_norm": 2.1237433624760302, "learning_rate": 9.698375755967983e-06, "loss": 0.6056, "step": 1942 }, { "epoch": 0.14, "grad_norm": 0.9295434955173402, "learning_rate": 9.69798253847618e-06, "loss": 0.4576, "step": 1943 }, { "epoch": 0.14, "grad_norm": 1.71672010487686, "learning_rate": 9.697589072821022e-06, "loss": 0.5861, "step": 1944 }, { "epoch": 0.14, "grad_norm": 1.5909691941652666, "learning_rate": 9.697195359023287e-06, "loss": 0.5475, "step": 1945 }, { "epoch": 0.14, "grad_norm": 1.796772956489963, "learning_rate": 9.696801397103782e-06, "loss": 0.5856, "step": 1946 }, { "epoch": 0.14, "grad_norm": 2.050801229647497, "learning_rate": 9.696407187083308e-06, "loss": 0.6173, "step": 1947 }, { "epoch": 0.14, "grad_norm": 2.602084215925719, "learning_rate": 9.696012728982695e-06, "loss": 0.5869, "step": 1948 }, { "epoch": 0.14, "grad_norm": 1.9772914360569471, "learning_rate": 9.695618022822778e-06, "loss": 0.5087, "step": 1949 }, { "epoch": 0.14, "grad_norm": 2.1279420134004976, "learning_rate": 9.695223068624403e-06, "loss": 0.5749, "step": 1950 }, { "epoch": 0.14, "grad_norm": 1.8610654370320527, "learning_rate": 9.694827866408438e-06, "loss": 0.5847, "step": 1951 }, { "epoch": 0.14, "grad_norm": 2.396920740028801, "learning_rate": 9.694432416195757e-06, "loss": 0.5395, "step": 1952 }, { "epoch": 0.14, "grad_norm": 1.582001326784994, "learning_rate": 9.694036718007248e-06, "loss": 0.5325, "step": 1953 }, { "epoch": 0.14, "grad_norm": 1.8012946702969606, "learning_rate": 9.693640771863814e-06, "loss": 0.6553, "step": 1954 }, { "epoch": 0.14, "grad_norm": 2.05723679921335, "learning_rate": 9.693244577786372e-06, "loss": 0.5617, "step": 1955 }, { "epoch": 0.14, "grad_norm": 1.941173465825226, "learning_rate": 9.692848135795847e-06, "loss": 0.614, "step": 1956 }, { "epoch": 0.14, "grad_norm": 1.6054274052580888, "learning_rate": 9.692451445913182e-06, "loss": 0.5614, "step": 1957 }, { "epoch": 0.14, "grad_norm": 1.8996110786215692, "learning_rate": 9.692054508159332e-06, "loss": 0.5908, "step": 1958 }, { "epoch": 0.14, "grad_norm": 1.827018735892037, "learning_rate": 9.691657322555264e-06, "loss": 0.5196, "step": 1959 }, { "epoch": 0.14, "grad_norm": 2.1052370586600406, "learning_rate": 9.691259889121958e-06, "loss": 0.5755, "step": 1960 }, { "epoch": 0.14, "grad_norm": 1.572889371364559, "learning_rate": 9.69086220788041e-06, "loss": 0.5068, "step": 1961 }, { "epoch": 0.14, "grad_norm": 1.6116238587579266, "learning_rate": 9.690464278851623e-06, "loss": 0.5952, "step": 1962 }, { "epoch": 0.14, "grad_norm": 1.6474466374896115, "learning_rate": 9.69006610205662e-06, "loss": 0.4883, "step": 1963 }, { "epoch": 0.14, "grad_norm": 1.760255423271644, "learning_rate": 9.689667677516434e-06, "loss": 0.631, "step": 1964 }, { "epoch": 0.14, "grad_norm": 0.9136329100588662, "learning_rate": 9.689269005252112e-06, "loss": 0.4837, "step": 1965 }, { "epoch": 0.14, "grad_norm": 0.9277274326514918, "learning_rate": 9.68887008528471e-06, "loss": 0.4508, "step": 1966 }, { "epoch": 0.14, "grad_norm": 1.6343790951066772, "learning_rate": 9.688470917635302e-06, "loss": 0.6, "step": 1967 }, { "epoch": 0.14, "grad_norm": 1.9277144648580316, "learning_rate": 9.688071502324973e-06, "loss": 0.6062, "step": 1968 }, { "epoch": 0.14, "grad_norm": 1.5874142834021694, "learning_rate": 9.687671839374822e-06, "loss": 0.6886, "step": 1969 }, { "epoch": 0.14, "grad_norm": 1.870420020364407, "learning_rate": 9.68727192880596e-06, "loss": 0.6541, "step": 1970 }, { "epoch": 0.14, "grad_norm": 2.513852617536507, "learning_rate": 9.686871770639514e-06, "loss": 0.5583, "step": 1971 }, { "epoch": 0.14, "grad_norm": 2.085738095223795, "learning_rate": 9.686471364896618e-06, "loss": 0.535, "step": 1972 }, { "epoch": 0.14, "grad_norm": 1.7433581873596748, "learning_rate": 9.686070711598428e-06, "loss": 0.516, "step": 1973 }, { "epoch": 0.14, "grad_norm": 2.300899630958353, "learning_rate": 9.685669810766101e-06, "loss": 0.6691, "step": 1974 }, { "epoch": 0.14, "grad_norm": 2.068259906561549, "learning_rate": 9.68526866242082e-06, "loss": 0.5745, "step": 1975 }, { "epoch": 0.14, "grad_norm": 1.9647331242694106, "learning_rate": 9.684867266583768e-06, "loss": 0.5493, "step": 1976 }, { "epoch": 0.14, "grad_norm": 2.6839497282681584, "learning_rate": 9.684465623276156e-06, "loss": 0.5272, "step": 1977 }, { "epoch": 0.14, "grad_norm": 2.187898736097372, "learning_rate": 9.684063732519195e-06, "loss": 0.5279, "step": 1978 }, { "epoch": 0.14, "grad_norm": 1.66899063538525, "learning_rate": 9.683661594334117e-06, "loss": 0.6323, "step": 1979 }, { "epoch": 0.14, "grad_norm": 2.2623434928564436, "learning_rate": 9.683259208742165e-06, "loss": 0.4858, "step": 1980 }, { "epoch": 0.14, "grad_norm": 1.3725364384574403, "learning_rate": 9.68285657576459e-06, "loss": 0.5917, "step": 1981 }, { "epoch": 0.14, "grad_norm": 1.7046568008476424, "learning_rate": 9.682453695422663e-06, "loss": 0.575, "step": 1982 }, { "epoch": 0.14, "grad_norm": 1.8096622026911067, "learning_rate": 9.682050567737665e-06, "loss": 0.5119, "step": 1983 }, { "epoch": 0.14, "grad_norm": 1.9693565074965238, "learning_rate": 9.681647192730893e-06, "loss": 0.6046, "step": 1984 }, { "epoch": 0.14, "grad_norm": 2.2496369993399457, "learning_rate": 9.681243570423651e-06, "loss": 0.5687, "step": 1985 }, { "epoch": 0.14, "grad_norm": 1.8949705524076383, "learning_rate": 9.680839700837262e-06, "loss": 0.5801, "step": 1986 }, { "epoch": 0.14, "grad_norm": 1.4363768872579783, "learning_rate": 9.68043558399306e-06, "loss": 0.6023, "step": 1987 }, { "epoch": 0.14, "grad_norm": 1.7356690230493972, "learning_rate": 9.68003121991239e-06, "loss": 0.5375, "step": 1988 }, { "epoch": 0.14, "grad_norm": 1.6621643358883686, "learning_rate": 9.679626608616612e-06, "loss": 0.5774, "step": 1989 }, { "epoch": 0.14, "grad_norm": 1.852317528650638, "learning_rate": 9.6792217501271e-06, "loss": 0.5591, "step": 1990 }, { "epoch": 0.14, "grad_norm": 1.6414986513098953, "learning_rate": 9.678816644465242e-06, "loss": 0.591, "step": 1991 }, { "epoch": 0.14, "grad_norm": 1.7635742077856367, "learning_rate": 9.678411291652431e-06, "loss": 0.5377, "step": 1992 }, { "epoch": 0.14, "grad_norm": 1.6372879328334657, "learning_rate": 9.678005691710086e-06, "loss": 0.4852, "step": 1993 }, { "epoch": 0.14, "grad_norm": 2.6022515437347913, "learning_rate": 9.677599844659628e-06, "loss": 0.668, "step": 1994 }, { "epoch": 0.14, "grad_norm": 1.4613814713064646, "learning_rate": 9.677193750522498e-06, "loss": 0.5798, "step": 1995 }, { "epoch": 0.14, "grad_norm": 1.4948216965298493, "learning_rate": 9.676787409320144e-06, "loss": 0.5133, "step": 1996 }, { "epoch": 0.14, "grad_norm": 1.65009310097062, "learning_rate": 9.676380821074033e-06, "loss": 0.5519, "step": 1997 }, { "epoch": 0.14, "grad_norm": 1.8144986595841188, "learning_rate": 9.67597398580564e-06, "loss": 0.5785, "step": 1998 }, { "epoch": 0.14, "grad_norm": 2.335556065755507, "learning_rate": 9.675566903536458e-06, "loss": 0.5406, "step": 1999 }, { "epoch": 0.14, "grad_norm": 1.9993611534751266, "learning_rate": 9.675159574287989e-06, "loss": 0.6308, "step": 2000 }, { "epoch": 0.14, "grad_norm": 2.0519278690989444, "learning_rate": 9.674751998081748e-06, "loss": 0.5789, "step": 2001 }, { "epoch": 0.14, "grad_norm": 1.7465686150660946, "learning_rate": 9.674344174939268e-06, "loss": 0.6299, "step": 2002 }, { "epoch": 0.14, "grad_norm": 1.6411894511358684, "learning_rate": 9.673936104882089e-06, "loss": 0.6229, "step": 2003 }, { "epoch": 0.14, "grad_norm": 1.6629277508832108, "learning_rate": 9.673527787931767e-06, "loss": 0.5698, "step": 2004 }, { "epoch": 0.14, "grad_norm": 4.405637540054362, "learning_rate": 9.673119224109872e-06, "loss": 0.6338, "step": 2005 }, { "epoch": 0.14, "grad_norm": 1.0150111830508741, "learning_rate": 9.672710413437986e-06, "loss": 0.482, "step": 2006 }, { "epoch": 0.14, "grad_norm": 1.0617942680539303, "learning_rate": 9.672301355937702e-06, "loss": 0.4883, "step": 2007 }, { "epoch": 0.14, "grad_norm": 1.6566950913112255, "learning_rate": 9.671892051630627e-06, "loss": 0.5386, "step": 2008 }, { "epoch": 0.14, "grad_norm": 1.8355469645237559, "learning_rate": 9.671482500538384e-06, "loss": 0.5196, "step": 2009 }, { "epoch": 0.14, "grad_norm": 1.5686807144800414, "learning_rate": 9.671072702682607e-06, "loss": 0.5852, "step": 2010 }, { "epoch": 0.14, "grad_norm": 2.0859698506673254, "learning_rate": 9.670662658084942e-06, "loss": 0.5946, "step": 2011 }, { "epoch": 0.14, "grad_norm": 2.072754678346567, "learning_rate": 9.670252366767048e-06, "loss": 0.6484, "step": 2012 }, { "epoch": 0.14, "grad_norm": 1.8979910171079, "learning_rate": 9.669841828750599e-06, "loss": 0.6276, "step": 2013 }, { "epoch": 0.14, "grad_norm": 1.655523270611167, "learning_rate": 9.669431044057281e-06, "loss": 0.5377, "step": 2014 }, { "epoch": 0.14, "grad_norm": 2.2036442088972494, "learning_rate": 9.669020012708794e-06, "loss": 0.6371, "step": 2015 }, { "epoch": 0.14, "grad_norm": 2.2666401151727156, "learning_rate": 9.668608734726849e-06, "loss": 0.5803, "step": 2016 }, { "epoch": 0.14, "grad_norm": 1.5142995922854994, "learning_rate": 9.66819721013317e-06, "loss": 0.4952, "step": 2017 }, { "epoch": 0.14, "grad_norm": 1.7866257040591784, "learning_rate": 9.667785438949497e-06, "loss": 0.5539, "step": 2018 }, { "epoch": 0.14, "grad_norm": 1.6797978213914093, "learning_rate": 9.667373421197581e-06, "loss": 0.5854, "step": 2019 }, { "epoch": 0.14, "grad_norm": 1.6707753347321554, "learning_rate": 9.666961156899187e-06, "loss": 0.5768, "step": 2020 }, { "epoch": 0.14, "grad_norm": 1.5851976861156922, "learning_rate": 9.666548646076087e-06, "loss": 0.5768, "step": 2021 }, { "epoch": 0.14, "grad_norm": 1.668168456898025, "learning_rate": 9.666135888750078e-06, "loss": 0.5441, "step": 2022 }, { "epoch": 0.14, "grad_norm": 1.5251100633394035, "learning_rate": 9.66572288494296e-06, "loss": 0.628, "step": 2023 }, { "epoch": 0.14, "grad_norm": 1.903974018376592, "learning_rate": 9.665309634676551e-06, "loss": 0.6056, "step": 2024 }, { "epoch": 0.14, "grad_norm": 2.0505935745099517, "learning_rate": 9.664896137972677e-06, "loss": 0.5322, "step": 2025 }, { "epoch": 0.14, "grad_norm": 1.9603167205898366, "learning_rate": 9.664482394853181e-06, "loss": 0.5453, "step": 2026 }, { "epoch": 0.14, "grad_norm": 1.8135453717769674, "learning_rate": 9.664068405339921e-06, "loss": 0.6179, "step": 2027 }, { "epoch": 0.14, "grad_norm": 1.6197926221297367, "learning_rate": 9.663654169454765e-06, "loss": 0.5436, "step": 2028 }, { "epoch": 0.14, "grad_norm": 1.5234027741473628, "learning_rate": 9.663239687219591e-06, "loss": 0.5901, "step": 2029 }, { "epoch": 0.14, "grad_norm": 1.7724826769939195, "learning_rate": 9.662824958656297e-06, "loss": 0.6304, "step": 2030 }, { "epoch": 0.14, "grad_norm": 1.5036064684291701, "learning_rate": 9.662409983786788e-06, "loss": 0.5659, "step": 2031 }, { "epoch": 0.14, "grad_norm": 1.9924638243978485, "learning_rate": 9.661994762632985e-06, "loss": 0.5839, "step": 2032 }, { "epoch": 0.14, "grad_norm": 1.7522431124786204, "learning_rate": 9.661579295216823e-06, "loss": 0.4999, "step": 2033 }, { "epoch": 0.14, "grad_norm": 1.436011301037939, "learning_rate": 9.661163581560247e-06, "loss": 0.5881, "step": 2034 }, { "epoch": 0.14, "grad_norm": 1.6021055598775995, "learning_rate": 9.660747621685214e-06, "loss": 0.5803, "step": 2035 }, { "epoch": 0.14, "grad_norm": 2.1006013924281155, "learning_rate": 9.660331415613701e-06, "loss": 0.6104, "step": 2036 }, { "epoch": 0.14, "grad_norm": 1.6350316264620368, "learning_rate": 9.659914963367692e-06, "loss": 0.6015, "step": 2037 }, { "epoch": 0.14, "grad_norm": 1.5371917609626466, "learning_rate": 9.659498264969183e-06, "loss": 0.5337, "step": 2038 }, { "epoch": 0.14, "grad_norm": 2.6581339675955853, "learning_rate": 9.659081320440187e-06, "loss": 0.5661, "step": 2039 }, { "epoch": 0.14, "grad_norm": 1.5231811415838228, "learning_rate": 9.658664129802728e-06, "loss": 0.5841, "step": 2040 }, { "epoch": 0.14, "grad_norm": 1.6672454895742175, "learning_rate": 9.658246693078845e-06, "loss": 0.5412, "step": 2041 }, { "epoch": 0.14, "grad_norm": 1.6334012290134465, "learning_rate": 9.657829010290588e-06, "loss": 0.569, "step": 2042 }, { "epoch": 0.14, "grad_norm": 1.6602505191534778, "learning_rate": 9.65741108146002e-06, "loss": 0.5885, "step": 2043 }, { "epoch": 0.15, "grad_norm": 1.6446863770357623, "learning_rate": 9.656992906609215e-06, "loss": 0.5533, "step": 2044 }, { "epoch": 0.15, "grad_norm": 1.6364036088818459, "learning_rate": 9.656574485760267e-06, "loss": 0.581, "step": 2045 }, { "epoch": 0.15, "grad_norm": 1.5983674236471037, "learning_rate": 9.656155818935275e-06, "loss": 0.571, "step": 2046 }, { "epoch": 0.15, "grad_norm": 1.7967262931593406, "learning_rate": 9.655736906156355e-06, "loss": 0.5308, "step": 2047 }, { "epoch": 0.15, "grad_norm": 1.821770562664974, "learning_rate": 9.655317747445636e-06, "loss": 0.5404, "step": 2048 }, { "epoch": 0.15, "grad_norm": 1.644145447688416, "learning_rate": 9.654898342825261e-06, "loss": 0.5376, "step": 2049 }, { "epoch": 0.15, "grad_norm": 1.9126759109702718, "learning_rate": 9.65447869231738e-06, "loss": 0.5682, "step": 2050 }, { "epoch": 0.15, "grad_norm": 1.660477788633002, "learning_rate": 9.654058795944165e-06, "loss": 0.503, "step": 2051 }, { "epoch": 0.15, "grad_norm": 2.6663209914515105, "learning_rate": 9.653638653727793e-06, "loss": 0.5484, "step": 2052 }, { "epoch": 0.15, "grad_norm": 1.748013609153089, "learning_rate": 9.653218265690458e-06, "loss": 0.5833, "step": 2053 }, { "epoch": 0.15, "grad_norm": 1.7319528083631415, "learning_rate": 9.65279763185437e-06, "loss": 0.5658, "step": 2054 }, { "epoch": 0.15, "grad_norm": 1.6805316558904804, "learning_rate": 9.652376752241743e-06, "loss": 0.5812, "step": 2055 }, { "epoch": 0.15, "grad_norm": 1.66376613805248, "learning_rate": 9.651955626874812e-06, "loss": 0.6364, "step": 2056 }, { "epoch": 0.15, "grad_norm": 2.0905530261252876, "learning_rate": 9.65153425577582e-06, "loss": 0.596, "step": 2057 }, { "epoch": 0.15, "grad_norm": 1.6155690893284218, "learning_rate": 9.651112638967028e-06, "loss": 0.6167, "step": 2058 }, { "epoch": 0.15, "grad_norm": 1.5356914287380137, "learning_rate": 9.650690776470708e-06, "loss": 0.5808, "step": 2059 }, { "epoch": 0.15, "grad_norm": 1.9078423973083523, "learning_rate": 9.650268668309142e-06, "loss": 0.5374, "step": 2060 }, { "epoch": 0.15, "grad_norm": 1.7124192012946258, "learning_rate": 9.649846314504627e-06, "loss": 0.5486, "step": 2061 }, { "epoch": 0.15, "grad_norm": 1.82062343192843, "learning_rate": 9.649423715079474e-06, "loss": 0.6086, "step": 2062 }, { "epoch": 0.15, "grad_norm": 1.498429846532035, "learning_rate": 9.649000870056004e-06, "loss": 0.5706, "step": 2063 }, { "epoch": 0.15, "grad_norm": 2.1568399675893093, "learning_rate": 9.648577779456559e-06, "loss": 0.5738, "step": 2064 }, { "epoch": 0.15, "grad_norm": 1.0699549739599807, "learning_rate": 9.648154443303481e-06, "loss": 0.4914, "step": 2065 }, { "epoch": 0.15, "grad_norm": 1.8142495445685458, "learning_rate": 9.647730861619137e-06, "loss": 0.6066, "step": 2066 }, { "epoch": 0.15, "grad_norm": 1.85639134074057, "learning_rate": 9.647307034425898e-06, "loss": 0.6192, "step": 2067 }, { "epoch": 0.15, "grad_norm": 0.8748757760257014, "learning_rate": 9.646882961746157e-06, "loss": 0.4823, "step": 2068 }, { "epoch": 0.15, "grad_norm": 1.5680903256415388, "learning_rate": 9.646458643602311e-06, "loss": 0.5745, "step": 2069 }, { "epoch": 0.15, "grad_norm": 1.4829305536462372, "learning_rate": 9.646034080016775e-06, "loss": 0.5856, "step": 2070 }, { "epoch": 0.15, "grad_norm": 1.8330741506963368, "learning_rate": 9.645609271011975e-06, "loss": 0.6381, "step": 2071 }, { "epoch": 0.15, "grad_norm": 2.5936991764920876, "learning_rate": 9.645184216610353e-06, "loss": 0.5583, "step": 2072 }, { "epoch": 0.15, "grad_norm": 1.7679033702681357, "learning_rate": 9.644758916834362e-06, "loss": 0.5508, "step": 2073 }, { "epoch": 0.15, "grad_norm": 1.636242475268784, "learning_rate": 9.644333371706465e-06, "loss": 0.588, "step": 2074 }, { "epoch": 0.15, "grad_norm": 1.58409404352087, "learning_rate": 9.643907581249142e-06, "loss": 0.6201, "step": 2075 }, { "epoch": 0.15, "grad_norm": 1.0402329938802013, "learning_rate": 9.643481545484884e-06, "loss": 0.4556, "step": 2076 }, { "epoch": 0.15, "grad_norm": 1.692053828629462, "learning_rate": 9.643055264436198e-06, "loss": 0.6071, "step": 2077 }, { "epoch": 0.15, "grad_norm": 2.2754445985078755, "learning_rate": 9.6426287381256e-06, "loss": 0.6024, "step": 2078 }, { "epoch": 0.15, "grad_norm": 1.690469348086785, "learning_rate": 9.64220196657562e-06, "loss": 0.5908, "step": 2079 }, { "epoch": 0.15, "grad_norm": 1.5783069100901368, "learning_rate": 9.641774949808802e-06, "loss": 0.5091, "step": 2080 }, { "epoch": 0.15, "grad_norm": 1.8049987864415111, "learning_rate": 9.641347687847703e-06, "loss": 0.5436, "step": 2081 }, { "epoch": 0.15, "grad_norm": 0.8545483194862561, "learning_rate": 9.640920180714892e-06, "loss": 0.4694, "step": 2082 }, { "epoch": 0.15, "grad_norm": 0.8260283885519006, "learning_rate": 9.640492428432953e-06, "loss": 0.4745, "step": 2083 }, { "epoch": 0.15, "grad_norm": 1.6222758755214932, "learning_rate": 9.640064431024478e-06, "loss": 0.606, "step": 2084 }, { "epoch": 0.15, "grad_norm": 1.7979913433033554, "learning_rate": 9.639636188512077e-06, "loss": 0.5864, "step": 2085 }, { "epoch": 0.15, "grad_norm": 1.9189532786546784, "learning_rate": 9.639207700918371e-06, "loss": 0.5791, "step": 2086 }, { "epoch": 0.15, "grad_norm": 1.5630318936966947, "learning_rate": 9.638778968265996e-06, "loss": 0.5754, "step": 2087 }, { "epoch": 0.15, "grad_norm": 2.3730909439619365, "learning_rate": 9.638349990577596e-06, "loss": 0.6084, "step": 2088 }, { "epoch": 0.15, "grad_norm": 0.8536363889330592, "learning_rate": 9.637920767875834e-06, "loss": 0.4393, "step": 2089 }, { "epoch": 0.15, "grad_norm": 1.4656946589000912, "learning_rate": 9.637491300183382e-06, "loss": 0.5616, "step": 2090 }, { "epoch": 0.15, "grad_norm": 1.8727987670906867, "learning_rate": 9.637061587522924e-06, "loss": 0.57, "step": 2091 }, { "epoch": 0.15, "grad_norm": 2.2372234488022182, "learning_rate": 9.636631629917161e-06, "loss": 0.5531, "step": 2092 }, { "epoch": 0.15, "grad_norm": 1.8286970423887974, "learning_rate": 9.636201427388805e-06, "loss": 0.602, "step": 2093 }, { "epoch": 0.15, "grad_norm": 1.6313497419588237, "learning_rate": 9.635770979960579e-06, "loss": 0.6238, "step": 2094 }, { "epoch": 0.15, "grad_norm": 1.7222762043488424, "learning_rate": 9.635340287655222e-06, "loss": 0.5491, "step": 2095 }, { "epoch": 0.15, "grad_norm": 1.7784170256613303, "learning_rate": 9.634909350495486e-06, "loss": 0.5248, "step": 2096 }, { "epoch": 0.15, "grad_norm": 1.8970225157715686, "learning_rate": 9.634478168504129e-06, "loss": 0.5685, "step": 2097 }, { "epoch": 0.15, "grad_norm": 1.4375833981504196, "learning_rate": 9.634046741703935e-06, "loss": 0.5383, "step": 2098 }, { "epoch": 0.15, "grad_norm": 2.482557236287892, "learning_rate": 9.633615070117687e-06, "loss": 0.6019, "step": 2099 }, { "epoch": 0.15, "grad_norm": 1.5366986101767295, "learning_rate": 9.633183153768193e-06, "loss": 0.5695, "step": 2100 }, { "epoch": 0.15, "grad_norm": 1.6071249771554728, "learning_rate": 9.632750992678264e-06, "loss": 0.5275, "step": 2101 }, { "epoch": 0.15, "grad_norm": 1.6934258327488794, "learning_rate": 9.63231858687073e-06, "loss": 0.608, "step": 2102 }, { "epoch": 0.15, "grad_norm": 2.1247468322140817, "learning_rate": 9.63188593636843e-06, "loss": 0.595, "step": 2103 }, { "epoch": 0.15, "grad_norm": 6.776780103002606, "learning_rate": 9.631453041194222e-06, "loss": 0.4867, "step": 2104 }, { "epoch": 0.15, "grad_norm": 1.7197899600025235, "learning_rate": 9.63101990137097e-06, "loss": 0.5148, "step": 2105 }, { "epoch": 0.15, "grad_norm": 1.6054649878644405, "learning_rate": 9.630586516921557e-06, "loss": 0.5827, "step": 2106 }, { "epoch": 0.15, "grad_norm": 2.2956106326330814, "learning_rate": 9.63015288786887e-06, "loss": 0.5844, "step": 2107 }, { "epoch": 0.15, "grad_norm": 2.2314178699409575, "learning_rate": 9.629719014235819e-06, "loss": 0.6091, "step": 2108 }, { "epoch": 0.15, "grad_norm": 2.0993559205520014, "learning_rate": 9.629284896045325e-06, "loss": 0.5662, "step": 2109 }, { "epoch": 0.15, "grad_norm": 1.5612362591715663, "learning_rate": 9.628850533320314e-06, "loss": 0.6219, "step": 2110 }, { "epoch": 0.15, "grad_norm": 1.580686264506666, "learning_rate": 9.628415926083734e-06, "loss": 0.5884, "step": 2111 }, { "epoch": 0.15, "grad_norm": 1.7302996820413585, "learning_rate": 9.62798107435854e-06, "loss": 0.609, "step": 2112 }, { "epoch": 0.15, "grad_norm": 1.5580120318008182, "learning_rate": 9.627545978167707e-06, "loss": 0.6051, "step": 2113 }, { "epoch": 0.15, "grad_norm": 1.6924941956661084, "learning_rate": 9.627110637534212e-06, "loss": 0.5084, "step": 2114 }, { "epoch": 0.15, "grad_norm": 2.3619016009512355, "learning_rate": 9.626675052481057e-06, "loss": 0.531, "step": 2115 }, { "epoch": 0.15, "grad_norm": 1.6599927704723276, "learning_rate": 9.626239223031247e-06, "loss": 0.5729, "step": 2116 }, { "epoch": 0.15, "grad_norm": 1.6263928065969147, "learning_rate": 9.625803149207807e-06, "loss": 0.6004, "step": 2117 }, { "epoch": 0.15, "grad_norm": 0.8881195059122535, "learning_rate": 9.625366831033769e-06, "loss": 0.4587, "step": 2118 }, { "epoch": 0.15, "grad_norm": 1.7408135069537287, "learning_rate": 9.62493026853218e-06, "loss": 0.5917, "step": 2119 }, { "epoch": 0.15, "grad_norm": 1.8939483269755766, "learning_rate": 9.624493461726106e-06, "loss": 0.6377, "step": 2120 }, { "epoch": 0.15, "grad_norm": 1.581128405060929, "learning_rate": 9.624056410638616e-06, "loss": 0.5762, "step": 2121 }, { "epoch": 0.15, "grad_norm": 1.6507236737470345, "learning_rate": 9.623619115292798e-06, "loss": 0.5547, "step": 2122 }, { "epoch": 0.15, "grad_norm": 1.8455942656741242, "learning_rate": 9.623181575711751e-06, "loss": 0.5457, "step": 2123 }, { "epoch": 0.15, "grad_norm": 1.5311010973267534, "learning_rate": 9.62274379191859e-06, "loss": 0.6303, "step": 2124 }, { "epoch": 0.15, "grad_norm": 1.8221024399297454, "learning_rate": 9.622305763936435e-06, "loss": 0.6004, "step": 2125 }, { "epoch": 0.15, "grad_norm": 1.8926679994701423, "learning_rate": 9.621867491788429e-06, "loss": 0.6378, "step": 2126 }, { "epoch": 0.15, "grad_norm": 1.4911481752248694, "learning_rate": 9.62142897549772e-06, "loss": 0.5368, "step": 2127 }, { "epoch": 0.15, "grad_norm": 1.6313570042182557, "learning_rate": 9.620990215087474e-06, "loss": 0.5525, "step": 2128 }, { "epoch": 0.15, "grad_norm": 1.6236220596588629, "learning_rate": 9.620551210580864e-06, "loss": 0.6302, "step": 2129 }, { "epoch": 0.15, "grad_norm": 1.5272536152165817, "learning_rate": 9.620111962001085e-06, "loss": 0.5727, "step": 2130 }, { "epoch": 0.15, "grad_norm": 1.5305824802934582, "learning_rate": 9.619672469371338e-06, "loss": 0.6042, "step": 2131 }, { "epoch": 0.15, "grad_norm": 1.6801400949636047, "learning_rate": 9.619232732714836e-06, "loss": 0.5557, "step": 2132 }, { "epoch": 0.15, "grad_norm": 9.648696701443642, "learning_rate": 9.618792752054809e-06, "loss": 0.5719, "step": 2133 }, { "epoch": 0.15, "grad_norm": 1.9730159486097594, "learning_rate": 9.618352527414498e-06, "loss": 0.6492, "step": 2134 }, { "epoch": 0.15, "grad_norm": 1.0281732099371197, "learning_rate": 9.617912058817157e-06, "loss": 0.481, "step": 2135 }, { "epoch": 0.15, "grad_norm": 2.009736177093827, "learning_rate": 9.617471346286056e-06, "loss": 0.5864, "step": 2136 }, { "epoch": 0.15, "grad_norm": 2.1903720771094797, "learning_rate": 9.61703038984447e-06, "loss": 0.5581, "step": 2137 }, { "epoch": 0.15, "grad_norm": 1.9607948746207904, "learning_rate": 9.616589189515696e-06, "loss": 0.6081, "step": 2138 }, { "epoch": 0.15, "grad_norm": 0.896750028865289, "learning_rate": 9.616147745323035e-06, "loss": 0.4836, "step": 2139 }, { "epoch": 0.15, "grad_norm": 1.606011389402128, "learning_rate": 9.61570605728981e-06, "loss": 0.6121, "step": 2140 }, { "epoch": 0.15, "grad_norm": 1.5117191558580692, "learning_rate": 9.615264125439351e-06, "loss": 0.5957, "step": 2141 }, { "epoch": 0.15, "grad_norm": 1.5649312330426122, "learning_rate": 9.614821949795002e-06, "loss": 0.5736, "step": 2142 }, { "epoch": 0.15, "grad_norm": 1.8291966435336662, "learning_rate": 9.614379530380122e-06, "loss": 0.563, "step": 2143 }, { "epoch": 0.15, "grad_norm": 1.6712860530923126, "learning_rate": 9.613936867218078e-06, "loss": 0.5737, "step": 2144 }, { "epoch": 0.15, "grad_norm": 1.7177016092015445, "learning_rate": 9.613493960332253e-06, "loss": 0.5788, "step": 2145 }, { "epoch": 0.15, "grad_norm": 2.0869939856829345, "learning_rate": 9.613050809746048e-06, "loss": 0.6407, "step": 2146 }, { "epoch": 0.15, "grad_norm": 1.6920754054443645, "learning_rate": 9.612607415482867e-06, "loss": 0.5652, "step": 2147 }, { "epoch": 0.15, "grad_norm": 1.9108186505814162, "learning_rate": 9.61216377756613e-06, "loss": 0.6298, "step": 2148 }, { "epoch": 0.15, "grad_norm": 1.7331045141645243, "learning_rate": 9.611719896019275e-06, "loss": 0.5588, "step": 2149 }, { "epoch": 0.15, "grad_norm": 1.446690857447552, "learning_rate": 9.611275770865751e-06, "loss": 0.5253, "step": 2150 }, { "epoch": 0.15, "grad_norm": 1.653167462259553, "learning_rate": 9.610831402129015e-06, "loss": 0.5901, "step": 2151 }, { "epoch": 0.15, "grad_norm": 1.6084795754159313, "learning_rate": 9.610386789832538e-06, "loss": 0.6437, "step": 2152 }, { "epoch": 0.15, "grad_norm": 1.6096116843716342, "learning_rate": 9.609941933999812e-06, "loss": 0.5988, "step": 2153 }, { "epoch": 0.15, "grad_norm": 1.6785044986328697, "learning_rate": 9.60949683465433e-06, "loss": 0.5695, "step": 2154 }, { "epoch": 0.15, "grad_norm": 1.5446225595395824, "learning_rate": 9.609051491819608e-06, "loss": 0.6701, "step": 2155 }, { "epoch": 0.15, "grad_norm": 1.8158818169452011, "learning_rate": 9.608605905519166e-06, "loss": 0.6685, "step": 2156 }, { "epoch": 0.15, "grad_norm": 1.7003016401593005, "learning_rate": 9.608160075776546e-06, "loss": 0.6204, "step": 2157 }, { "epoch": 0.15, "grad_norm": 1.8663887794981275, "learning_rate": 9.607714002615297e-06, "loss": 0.5477, "step": 2158 }, { "epoch": 0.15, "grad_norm": 1.626090381541546, "learning_rate": 9.607267686058979e-06, "loss": 0.5924, "step": 2159 }, { "epoch": 0.15, "grad_norm": 1.5622615076178528, "learning_rate": 9.606821126131171e-06, "loss": 0.6126, "step": 2160 }, { "epoch": 0.15, "grad_norm": 1.8288070540766657, "learning_rate": 9.606374322855463e-06, "loss": 0.602, "step": 2161 }, { "epoch": 0.15, "grad_norm": 2.0390320878695722, "learning_rate": 9.605927276255452e-06, "loss": 0.5465, "step": 2162 }, { "epoch": 0.15, "grad_norm": 1.9293573760376699, "learning_rate": 9.605479986354758e-06, "loss": 0.5518, "step": 2163 }, { "epoch": 0.15, "grad_norm": 0.8671985104927161, "learning_rate": 9.605032453177004e-06, "loss": 0.4886, "step": 2164 }, { "epoch": 0.15, "grad_norm": 1.5067775199868225, "learning_rate": 9.60458467674583e-06, "loss": 0.5802, "step": 2165 }, { "epoch": 0.15, "grad_norm": 1.6642928320115047, "learning_rate": 9.604136657084894e-06, "loss": 0.5977, "step": 2166 }, { "epoch": 0.15, "grad_norm": 2.1621716432547835, "learning_rate": 9.603688394217858e-06, "loss": 0.5872, "step": 2167 }, { "epoch": 0.15, "grad_norm": 1.6533325789139708, "learning_rate": 9.6032398881684e-06, "loss": 0.5935, "step": 2168 }, { "epoch": 0.15, "grad_norm": 1.717078837266353, "learning_rate": 9.602791138960215e-06, "loss": 0.5213, "step": 2169 }, { "epoch": 0.15, "grad_norm": 3.1649378804584956, "learning_rate": 9.602342146617005e-06, "loss": 0.5305, "step": 2170 }, { "epoch": 0.15, "grad_norm": 2.39592306325254, "learning_rate": 9.601892911162488e-06, "loss": 0.4775, "step": 2171 }, { "epoch": 0.15, "grad_norm": 1.882229706455084, "learning_rate": 9.601443432620394e-06, "loss": 0.5365, "step": 2172 }, { "epoch": 0.15, "grad_norm": 1.5975631807307624, "learning_rate": 9.600993711014466e-06, "loss": 0.6411, "step": 2173 }, { "epoch": 0.15, "grad_norm": 1.8314382805967402, "learning_rate": 9.60054374636846e-06, "loss": 0.6603, "step": 2174 }, { "epoch": 0.15, "grad_norm": 2.272155485573536, "learning_rate": 9.600093538706145e-06, "loss": 0.5211, "step": 2175 }, { "epoch": 0.15, "grad_norm": 2.3826410867770718, "learning_rate": 9.599643088051302e-06, "loss": 0.5547, "step": 2176 }, { "epoch": 0.15, "grad_norm": 2.0237113007011134, "learning_rate": 9.599192394427725e-06, "loss": 0.5605, "step": 2177 }, { "epoch": 0.15, "grad_norm": 2.0998989279638667, "learning_rate": 9.598741457859222e-06, "loss": 0.5917, "step": 2178 }, { "epoch": 0.15, "grad_norm": 1.903492470004949, "learning_rate": 9.598290278369613e-06, "loss": 0.583, "step": 2179 }, { "epoch": 0.15, "grad_norm": 0.9237882261782591, "learning_rate": 9.597838855982728e-06, "loss": 0.4996, "step": 2180 }, { "epoch": 0.15, "grad_norm": 1.483211513516119, "learning_rate": 9.597387190722418e-06, "loss": 0.5168, "step": 2181 }, { "epoch": 0.15, "grad_norm": 1.5316054163458794, "learning_rate": 9.596935282612536e-06, "loss": 0.5082, "step": 2182 }, { "epoch": 0.15, "grad_norm": 1.7671303961046758, "learning_rate": 9.596483131676957e-06, "loss": 0.6258, "step": 2183 }, { "epoch": 0.15, "grad_norm": 1.7231237960509258, "learning_rate": 9.596030737939564e-06, "loss": 0.5727, "step": 2184 }, { "epoch": 0.16, "grad_norm": 1.9428453320018664, "learning_rate": 9.595578101424254e-06, "loss": 0.6001, "step": 2185 }, { "epoch": 0.16, "grad_norm": 1.6315944950998447, "learning_rate": 9.595125222154935e-06, "loss": 0.6035, "step": 2186 }, { "epoch": 0.16, "grad_norm": 1.5456402381571213, "learning_rate": 9.594672100155534e-06, "loss": 0.5526, "step": 2187 }, { "epoch": 0.16, "grad_norm": 1.6815021837453648, "learning_rate": 9.594218735449983e-06, "loss": 0.5707, "step": 2188 }, { "epoch": 0.16, "grad_norm": 1.5704510439569273, "learning_rate": 9.59376512806223e-06, "loss": 0.5388, "step": 2189 }, { "epoch": 0.16, "grad_norm": 1.5483910381784542, "learning_rate": 9.593311278016237e-06, "loss": 0.5954, "step": 2190 }, { "epoch": 0.16, "grad_norm": 2.6188675468635787, "learning_rate": 9.592857185335979e-06, "loss": 0.5892, "step": 2191 }, { "epoch": 0.16, "grad_norm": 2.8669501752773914, "learning_rate": 9.592402850045442e-06, "loss": 0.5394, "step": 2192 }, { "epoch": 0.16, "grad_norm": 1.572339027803603, "learning_rate": 9.591948272168625e-06, "loss": 0.5579, "step": 2193 }, { "epoch": 0.16, "grad_norm": 1.5709848061910616, "learning_rate": 9.59149345172954e-06, "loss": 0.5596, "step": 2194 }, { "epoch": 0.16, "grad_norm": 1.8351978432055878, "learning_rate": 9.591038388752214e-06, "loss": 0.6708, "step": 2195 }, { "epoch": 0.16, "grad_norm": 2.0146901269555304, "learning_rate": 9.590583083260684e-06, "loss": 0.6372, "step": 2196 }, { "epoch": 0.16, "grad_norm": 1.6144171052820337, "learning_rate": 9.590127535278999e-06, "loss": 0.5668, "step": 2197 }, { "epoch": 0.16, "grad_norm": 1.6719333168452695, "learning_rate": 9.589671744831224e-06, "loss": 0.5591, "step": 2198 }, { "epoch": 0.16, "grad_norm": 1.645997266619098, "learning_rate": 9.589215711941437e-06, "loss": 0.5313, "step": 2199 }, { "epoch": 0.16, "grad_norm": 1.7735181939874929, "learning_rate": 9.588759436633724e-06, "loss": 0.6255, "step": 2200 }, { "epoch": 0.16, "grad_norm": 2.044477194970411, "learning_rate": 9.58830291893219e-06, "loss": 0.552, "step": 2201 }, { "epoch": 0.16, "grad_norm": 2.0588549482838996, "learning_rate": 9.587846158860949e-06, "loss": 0.6377, "step": 2202 }, { "epoch": 0.16, "grad_norm": 1.8870520211658623, "learning_rate": 9.587389156444126e-06, "loss": 0.5344, "step": 2203 }, { "epoch": 0.16, "grad_norm": 1.7888672922822684, "learning_rate": 9.586931911705865e-06, "loss": 0.5301, "step": 2204 }, { "epoch": 0.16, "grad_norm": 1.9732692034641155, "learning_rate": 9.586474424670318e-06, "loss": 0.5502, "step": 2205 }, { "epoch": 0.16, "grad_norm": 1.6773296842675214, "learning_rate": 9.58601669536165e-06, "loss": 0.588, "step": 2206 }, { "epoch": 0.16, "grad_norm": 2.6869645107956144, "learning_rate": 9.585558723804041e-06, "loss": 0.6322, "step": 2207 }, { "epoch": 0.16, "grad_norm": 2.0759641924705554, "learning_rate": 9.585100510021683e-06, "loss": 0.6007, "step": 2208 }, { "epoch": 0.16, "grad_norm": 1.5992395210403136, "learning_rate": 9.584642054038779e-06, "loss": 0.6028, "step": 2209 }, { "epoch": 0.16, "grad_norm": 1.7686854651254926, "learning_rate": 9.584183355879547e-06, "loss": 0.6328, "step": 2210 }, { "epoch": 0.16, "grad_norm": 3.1315582872671355, "learning_rate": 9.583724415568216e-06, "loss": 0.5506, "step": 2211 }, { "epoch": 0.16, "grad_norm": 1.8433443319996463, "learning_rate": 9.58326523312903e-06, "loss": 0.5845, "step": 2212 }, { "epoch": 0.16, "grad_norm": 1.7898512639951558, "learning_rate": 9.582805808586245e-06, "loss": 0.5632, "step": 2213 }, { "epoch": 0.16, "grad_norm": 0.8855795909154327, "learning_rate": 9.582346141964127e-06, "loss": 0.4868, "step": 2214 }, { "epoch": 0.16, "grad_norm": 1.5522476659849769, "learning_rate": 9.581886233286959e-06, "loss": 0.6007, "step": 2215 }, { "epoch": 0.16, "grad_norm": 1.574129461844822, "learning_rate": 9.581426082579035e-06, "loss": 0.5643, "step": 2216 }, { "epoch": 0.16, "grad_norm": 2.60781980532732, "learning_rate": 9.580965689864662e-06, "loss": 0.5982, "step": 2217 }, { "epoch": 0.16, "grad_norm": 1.8027443807974424, "learning_rate": 9.580505055168158e-06, "loss": 0.6132, "step": 2218 }, { "epoch": 0.16, "grad_norm": 1.7970061567888873, "learning_rate": 9.580044178513857e-06, "loss": 0.6315, "step": 2219 }, { "epoch": 0.16, "grad_norm": 1.67459254205551, "learning_rate": 9.579583059926101e-06, "loss": 0.6191, "step": 2220 }, { "epoch": 0.16, "grad_norm": 1.7009956070806815, "learning_rate": 9.579121699429252e-06, "loss": 0.5576, "step": 2221 }, { "epoch": 0.16, "grad_norm": 1.826134229985322, "learning_rate": 9.578660097047678e-06, "loss": 0.5715, "step": 2222 }, { "epoch": 0.16, "grad_norm": 1.5632386673289096, "learning_rate": 9.578198252805764e-06, "loss": 0.5893, "step": 2223 }, { "epoch": 0.16, "grad_norm": 1.6710227391088774, "learning_rate": 9.577736166727905e-06, "loss": 0.5744, "step": 2224 }, { "epoch": 0.16, "grad_norm": 2.0771503076900384, "learning_rate": 9.57727383883851e-06, "loss": 0.5631, "step": 2225 }, { "epoch": 0.16, "grad_norm": 1.62091495029376, "learning_rate": 9.576811269162e-06, "loss": 0.6058, "step": 2226 }, { "epoch": 0.16, "grad_norm": 3.3448128979292253, "learning_rate": 9.576348457722811e-06, "loss": 0.6018, "step": 2227 }, { "epoch": 0.16, "grad_norm": 1.8967589605265998, "learning_rate": 9.57588540454539e-06, "loss": 0.6134, "step": 2228 }, { "epoch": 0.16, "grad_norm": 2.0537029785752834, "learning_rate": 9.575422109654195e-06, "loss": 0.6005, "step": 2229 }, { "epoch": 0.16, "grad_norm": 1.911467837414719, "learning_rate": 9.574958573073702e-06, "loss": 0.6733, "step": 2230 }, { "epoch": 0.16, "grad_norm": 1.5095570770846123, "learning_rate": 9.574494794828396e-06, "loss": 0.5503, "step": 2231 }, { "epoch": 0.16, "grad_norm": 1.760778089890679, "learning_rate": 9.574030774942773e-06, "loss": 0.6581, "step": 2232 }, { "epoch": 0.16, "grad_norm": 1.5223044814977023, "learning_rate": 9.573566513441347e-06, "loss": 0.5911, "step": 2233 }, { "epoch": 0.16, "grad_norm": 0.8778431511731889, "learning_rate": 9.573102010348639e-06, "loss": 0.4694, "step": 2234 }, { "epoch": 0.16, "grad_norm": 1.8317574087663744, "learning_rate": 9.572637265689187e-06, "loss": 0.577, "step": 2235 }, { "epoch": 0.16, "grad_norm": 1.8848236974022117, "learning_rate": 9.57217227948754e-06, "loss": 0.5466, "step": 2236 }, { "epoch": 0.16, "grad_norm": 1.741569178705166, "learning_rate": 9.571707051768263e-06, "loss": 0.5701, "step": 2237 }, { "epoch": 0.16, "grad_norm": 1.584629981791469, "learning_rate": 9.571241582555925e-06, "loss": 0.5997, "step": 2238 }, { "epoch": 0.16, "grad_norm": 1.7385750394422443, "learning_rate": 9.570775871875119e-06, "loss": 0.5328, "step": 2239 }, { "epoch": 0.16, "grad_norm": 1.6121997739363734, "learning_rate": 9.570309919750445e-06, "loss": 0.6166, "step": 2240 }, { "epoch": 0.16, "grad_norm": 1.0325775857779589, "learning_rate": 9.569843726206513e-06, "loss": 0.4647, "step": 2241 }, { "epoch": 0.16, "grad_norm": 2.061368481924826, "learning_rate": 9.569377291267951e-06, "loss": 0.5906, "step": 2242 }, { "epoch": 0.16, "grad_norm": 1.8702553276692877, "learning_rate": 9.568910614959398e-06, "loss": 0.54, "step": 2243 }, { "epoch": 0.16, "grad_norm": 2.6898081856322613, "learning_rate": 9.568443697305502e-06, "loss": 0.5316, "step": 2244 }, { "epoch": 0.16, "grad_norm": 0.7718021248675229, "learning_rate": 9.567976538330932e-06, "loss": 0.4562, "step": 2245 }, { "epoch": 0.16, "grad_norm": 1.6905313761064888, "learning_rate": 9.567509138060362e-06, "loss": 0.5832, "step": 2246 }, { "epoch": 0.16, "grad_norm": 1.7547169716128395, "learning_rate": 9.567041496518485e-06, "loss": 0.5664, "step": 2247 }, { "epoch": 0.16, "grad_norm": 1.4795891134233017, "learning_rate": 9.566573613729997e-06, "loss": 0.5177, "step": 2248 }, { "epoch": 0.16, "grad_norm": 1.8949248441473716, "learning_rate": 9.56610548971962e-06, "loss": 0.6199, "step": 2249 }, { "epoch": 0.16, "grad_norm": 1.926553837860844, "learning_rate": 9.565637124512078e-06, "loss": 0.5275, "step": 2250 }, { "epoch": 0.16, "grad_norm": 1.7764817792214893, "learning_rate": 9.565168518132112e-06, "loss": 0.5605, "step": 2251 }, { "epoch": 0.16, "grad_norm": 1.835532596312182, "learning_rate": 9.564699670604477e-06, "loss": 0.5921, "step": 2252 }, { "epoch": 0.16, "grad_norm": 0.8417538997575917, "learning_rate": 9.564230581953938e-06, "loss": 0.4619, "step": 2253 }, { "epoch": 0.16, "grad_norm": 7.081020260740657, "learning_rate": 9.563761252205274e-06, "loss": 0.5895, "step": 2254 }, { "epoch": 0.16, "grad_norm": 1.5878941432343703, "learning_rate": 9.563291681383277e-06, "loss": 0.5776, "step": 2255 }, { "epoch": 0.16, "grad_norm": 0.9397587592195954, "learning_rate": 9.56282186951275e-06, "loss": 0.4777, "step": 2256 }, { "epoch": 0.16, "grad_norm": 1.6050895731983348, "learning_rate": 9.562351816618511e-06, "loss": 0.5867, "step": 2257 }, { "epoch": 0.16, "grad_norm": 1.758575022596704, "learning_rate": 9.56188152272539e-06, "loss": 0.5851, "step": 2258 }, { "epoch": 0.16, "grad_norm": 2.045975616697389, "learning_rate": 9.561410987858228e-06, "loss": 0.5362, "step": 2259 }, { "epoch": 0.16, "grad_norm": 1.6928466069758905, "learning_rate": 9.560940212041882e-06, "loss": 0.6096, "step": 2260 }, { "epoch": 0.16, "grad_norm": 1.525379030457798, "learning_rate": 9.56046919530122e-06, "loss": 0.5367, "step": 2261 }, { "epoch": 0.16, "grad_norm": 1.8462895350414437, "learning_rate": 9.559997937661122e-06, "loss": 0.6299, "step": 2262 }, { "epoch": 0.16, "grad_norm": 1.7648216144064959, "learning_rate": 9.559526439146481e-06, "loss": 0.5425, "step": 2263 }, { "epoch": 0.16, "grad_norm": 1.497447950527088, "learning_rate": 9.559054699782204e-06, "loss": 0.5718, "step": 2264 }, { "epoch": 0.16, "grad_norm": 1.6003208778877256, "learning_rate": 9.558582719593211e-06, "loss": 0.6111, "step": 2265 }, { "epoch": 0.16, "grad_norm": 1.6645270149575964, "learning_rate": 9.55811049860443e-06, "loss": 0.4911, "step": 2266 }, { "epoch": 0.16, "grad_norm": 2.1706601368882144, "learning_rate": 9.557638036840809e-06, "loss": 0.5559, "step": 2267 }, { "epoch": 0.16, "grad_norm": 0.8455681766874805, "learning_rate": 9.5571653343273e-06, "loss": 0.4637, "step": 2268 }, { "epoch": 0.16, "grad_norm": 1.7523197952716691, "learning_rate": 9.55669239108888e-06, "loss": 0.5427, "step": 2269 }, { "epoch": 0.16, "grad_norm": 2.8201679885765936, "learning_rate": 9.556219207150527e-06, "loss": 0.5048, "step": 2270 }, { "epoch": 0.16, "grad_norm": 2.803540149610321, "learning_rate": 9.555745782537238e-06, "loss": 0.5977, "step": 2271 }, { "epoch": 0.16, "grad_norm": 2.26316228558787, "learning_rate": 9.555272117274017e-06, "loss": 0.6059, "step": 2272 }, { "epoch": 0.16, "grad_norm": 1.6768609202073599, "learning_rate": 9.55479821138589e-06, "loss": 0.5191, "step": 2273 }, { "epoch": 0.16, "grad_norm": 1.9711481634640542, "learning_rate": 9.554324064897886e-06, "loss": 0.6052, "step": 2274 }, { "epoch": 0.16, "grad_norm": 1.5738268900227714, "learning_rate": 9.553849677835054e-06, "loss": 0.5575, "step": 2275 }, { "epoch": 0.16, "grad_norm": 1.7436015310932238, "learning_rate": 9.553375050222451e-06, "loss": 0.523, "step": 2276 }, { "epoch": 0.16, "grad_norm": 1.8681704949767293, "learning_rate": 9.552900182085148e-06, "loss": 0.6432, "step": 2277 }, { "epoch": 0.16, "grad_norm": 1.9046891759528701, "learning_rate": 9.552425073448231e-06, "loss": 0.5633, "step": 2278 }, { "epoch": 0.16, "grad_norm": 7.567046814384813, "learning_rate": 9.551949724336796e-06, "loss": 0.5314, "step": 2279 }, { "epoch": 0.16, "grad_norm": 0.8532380853835552, "learning_rate": 9.551474134775951e-06, "loss": 0.4945, "step": 2280 }, { "epoch": 0.16, "grad_norm": 1.6764141906563923, "learning_rate": 9.550998304790822e-06, "loss": 0.6226, "step": 2281 }, { "epoch": 0.16, "grad_norm": 1.9309657957773585, "learning_rate": 9.550522234406539e-06, "loss": 0.5896, "step": 2282 }, { "epoch": 0.16, "grad_norm": 1.6499625908058015, "learning_rate": 9.550045923648255e-06, "loss": 0.5864, "step": 2283 }, { "epoch": 0.16, "grad_norm": 1.8110072429982764, "learning_rate": 9.549569372541126e-06, "loss": 0.566, "step": 2284 }, { "epoch": 0.16, "grad_norm": 0.9582411152493978, "learning_rate": 9.549092581110326e-06, "loss": 0.4819, "step": 2285 }, { "epoch": 0.16, "grad_norm": 1.8450713253068163, "learning_rate": 9.548615549381044e-06, "loss": 0.6111, "step": 2286 }, { "epoch": 0.16, "grad_norm": 1.444549650358243, "learning_rate": 9.548138277378474e-06, "loss": 0.5114, "step": 2287 }, { "epoch": 0.16, "grad_norm": 3.5153317888157827, "learning_rate": 9.547660765127828e-06, "loss": 0.5574, "step": 2288 }, { "epoch": 0.16, "grad_norm": 2.066359722809632, "learning_rate": 9.547183012654333e-06, "loss": 0.5588, "step": 2289 }, { "epoch": 0.16, "grad_norm": 1.6305623091568135, "learning_rate": 9.546705019983222e-06, "loss": 0.6113, "step": 2290 }, { "epoch": 0.16, "grad_norm": 1.5801868377912687, "learning_rate": 9.546226787139747e-06, "loss": 0.5991, "step": 2291 }, { "epoch": 0.16, "grad_norm": 1.6969414209449643, "learning_rate": 9.545748314149167e-06, "loss": 0.5764, "step": 2292 }, { "epoch": 0.16, "grad_norm": 1.7697694827073864, "learning_rate": 9.545269601036758e-06, "loss": 0.6768, "step": 2293 }, { "epoch": 0.16, "grad_norm": 0.9425085773043032, "learning_rate": 9.544790647827808e-06, "loss": 0.4692, "step": 2294 }, { "epoch": 0.16, "grad_norm": 1.8764799195706343, "learning_rate": 9.544311454547615e-06, "loss": 0.6233, "step": 2295 }, { "epoch": 0.16, "grad_norm": 0.8325025945740591, "learning_rate": 9.543832021221495e-06, "loss": 0.4423, "step": 2296 }, { "epoch": 0.16, "grad_norm": 2.3341359308923195, "learning_rate": 9.543352347874767e-06, "loss": 0.5281, "step": 2297 }, { "epoch": 0.16, "grad_norm": 1.968979517165017, "learning_rate": 9.542872434532777e-06, "loss": 0.5854, "step": 2298 }, { "epoch": 0.16, "grad_norm": 1.5007280234917677, "learning_rate": 9.54239228122087e-06, "loss": 0.5436, "step": 2299 }, { "epoch": 0.16, "grad_norm": 1.7661531266002837, "learning_rate": 9.541911887964411e-06, "loss": 0.5222, "step": 2300 }, { "epoch": 0.16, "grad_norm": 1.8600673906472942, "learning_rate": 9.541431254788777e-06, "loss": 0.6109, "step": 2301 }, { "epoch": 0.16, "grad_norm": 2.3995035159209324, "learning_rate": 9.540950381719354e-06, "loss": 0.5085, "step": 2302 }, { "epoch": 0.16, "grad_norm": 1.6448540480073275, "learning_rate": 9.540469268781547e-06, "loss": 0.5431, "step": 2303 }, { "epoch": 0.16, "grad_norm": 2.0339863132545495, "learning_rate": 9.539987916000766e-06, "loss": 0.5451, "step": 2304 }, { "epoch": 0.16, "grad_norm": 0.9169628881652333, "learning_rate": 9.53950632340244e-06, "loss": 0.4731, "step": 2305 }, { "epoch": 0.16, "grad_norm": 2.1133686503171796, "learning_rate": 9.539024491012008e-06, "loss": 0.579, "step": 2306 }, { "epoch": 0.16, "grad_norm": 1.8118977628197253, "learning_rate": 9.538542418854923e-06, "loss": 0.5723, "step": 2307 }, { "epoch": 0.16, "grad_norm": 2.2040538902171942, "learning_rate": 9.538060106956648e-06, "loss": 0.5228, "step": 2308 }, { "epoch": 0.16, "grad_norm": 3.117042140835161, "learning_rate": 9.537577555342661e-06, "loss": 0.5867, "step": 2309 }, { "epoch": 0.16, "grad_norm": 2.1791507166901103, "learning_rate": 9.537094764038454e-06, "loss": 0.543, "step": 2310 }, { "epoch": 0.16, "grad_norm": 1.7105894647637536, "learning_rate": 9.536611733069526e-06, "loss": 0.6596, "step": 2311 }, { "epoch": 0.16, "grad_norm": 1.6561209510916075, "learning_rate": 9.536128462461393e-06, "loss": 0.5279, "step": 2312 }, { "epoch": 0.16, "grad_norm": 1.6024225241637267, "learning_rate": 9.535644952239587e-06, "loss": 0.5698, "step": 2313 }, { "epoch": 0.16, "grad_norm": 1.8655417953022526, "learning_rate": 9.535161202429644e-06, "loss": 0.608, "step": 2314 }, { "epoch": 0.16, "grad_norm": 1.6109186024221125, "learning_rate": 9.53467721305712e-06, "loss": 0.5862, "step": 2315 }, { "epoch": 0.16, "grad_norm": 2.0421673838541654, "learning_rate": 9.534192984147579e-06, "loss": 0.659, "step": 2316 }, { "epoch": 0.16, "grad_norm": 2.2089364039580737, "learning_rate": 9.533708515726601e-06, "loss": 0.6131, "step": 2317 }, { "epoch": 0.16, "grad_norm": 1.8999276601250203, "learning_rate": 9.533223807819777e-06, "loss": 0.5364, "step": 2318 }, { "epoch": 0.16, "grad_norm": 1.6010216285103114, "learning_rate": 9.53273886045271e-06, "loss": 0.5575, "step": 2319 }, { "epoch": 0.16, "grad_norm": 1.8098938675614238, "learning_rate": 9.532253673651019e-06, "loss": 0.5323, "step": 2320 }, { "epoch": 0.16, "grad_norm": 1.8270853260375381, "learning_rate": 9.531768247440331e-06, "loss": 0.5857, "step": 2321 }, { "epoch": 0.16, "grad_norm": 1.8707815379764978, "learning_rate": 9.531282581846288e-06, "loss": 0.5775, "step": 2322 }, { "epoch": 0.16, "grad_norm": 1.6018258728666597, "learning_rate": 9.530796676894544e-06, "loss": 0.5362, "step": 2323 }, { "epoch": 0.16, "grad_norm": 1.702538687916517, "learning_rate": 9.53031053261077e-06, "loss": 0.6099, "step": 2324 }, { "epoch": 0.16, "grad_norm": 2.064452673716773, "learning_rate": 9.52982414902064e-06, "loss": 0.6436, "step": 2325 }, { "epoch": 0.17, "grad_norm": 1.5300102986508581, "learning_rate": 9.529337526149851e-06, "loss": 0.5908, "step": 2326 }, { "epoch": 0.17, "grad_norm": 1.1000618819355426, "learning_rate": 9.528850664024106e-06, "loss": 0.4588, "step": 2327 }, { "epoch": 0.17, "grad_norm": 1.7690080936288721, "learning_rate": 9.528363562669122e-06, "loss": 0.6264, "step": 2328 }, { "epoch": 0.17, "grad_norm": 1.8782182668667409, "learning_rate": 9.52787622211063e-06, "loss": 0.5555, "step": 2329 }, { "epoch": 0.17, "grad_norm": 1.923074786664883, "learning_rate": 9.527388642374375e-06, "loss": 0.5828, "step": 2330 }, { "epoch": 0.17, "grad_norm": 2.092065847314824, "learning_rate": 9.526900823486111e-06, "loss": 0.5224, "step": 2331 }, { "epoch": 0.17, "grad_norm": 1.777785016164713, "learning_rate": 9.526412765471606e-06, "loss": 0.6682, "step": 2332 }, { "epoch": 0.17, "grad_norm": 1.5731246622386497, "learning_rate": 9.525924468356641e-06, "loss": 0.5701, "step": 2333 }, { "epoch": 0.17, "grad_norm": 1.764844363487853, "learning_rate": 9.52543593216701e-06, "loss": 0.5693, "step": 2334 }, { "epoch": 0.17, "grad_norm": 1.697014126864203, "learning_rate": 9.52494715692852e-06, "loss": 0.5263, "step": 2335 }, { "epoch": 0.17, "grad_norm": 1.9061245978370185, "learning_rate": 9.524458142666986e-06, "loss": 0.5092, "step": 2336 }, { "epoch": 0.17, "grad_norm": 1.8106454151111813, "learning_rate": 9.523968889408244e-06, "loss": 0.6139, "step": 2337 }, { "epoch": 0.17, "grad_norm": 2.2130575842688014, "learning_rate": 9.523479397178135e-06, "loss": 0.4778, "step": 2338 }, { "epoch": 0.17, "grad_norm": 1.609642801926249, "learning_rate": 9.522989666002516e-06, "loss": 0.5713, "step": 2339 }, { "epoch": 0.17, "grad_norm": 1.7784604859018336, "learning_rate": 9.522499695907256e-06, "loss": 0.5977, "step": 2340 }, { "epoch": 0.17, "grad_norm": 5.797696514364497, "learning_rate": 9.52200948691824e-06, "loss": 0.5925, "step": 2341 }, { "epoch": 0.17, "grad_norm": 1.8901053930844403, "learning_rate": 9.52151903906136e-06, "loss": 0.6032, "step": 2342 }, { "epoch": 0.17, "grad_norm": 5.353562919374689, "learning_rate": 9.521028352362522e-06, "loss": 0.5623, "step": 2343 }, { "epoch": 0.17, "grad_norm": 1.7319738116739942, "learning_rate": 9.520537426847648e-06, "loss": 0.6051, "step": 2344 }, { "epoch": 0.17, "grad_norm": 2.077370649933147, "learning_rate": 9.520046262542671e-06, "loss": 0.6295, "step": 2345 }, { "epoch": 0.17, "grad_norm": 1.7247342881531706, "learning_rate": 9.519554859473532e-06, "loss": 0.5701, "step": 2346 }, { "epoch": 0.17, "grad_norm": 1.761322597026166, "learning_rate": 9.51906321766619e-06, "loss": 0.6101, "step": 2347 }, { "epoch": 0.17, "grad_norm": 2.596272950258052, "learning_rate": 9.518571337146621e-06, "loss": 0.5443, "step": 2348 }, { "epoch": 0.17, "grad_norm": 2.094033754795508, "learning_rate": 9.518079217940799e-06, "loss": 0.5615, "step": 2349 }, { "epoch": 0.17, "grad_norm": 2.053521683117637, "learning_rate": 9.517586860074724e-06, "loss": 0.5793, "step": 2350 }, { "epoch": 0.17, "grad_norm": 1.826958612079418, "learning_rate": 9.517094263574403e-06, "loss": 0.6056, "step": 2351 }, { "epoch": 0.17, "grad_norm": 1.6127263535831167, "learning_rate": 9.516601428465857e-06, "loss": 0.5495, "step": 2352 }, { "epoch": 0.17, "grad_norm": 2.1467744565821385, "learning_rate": 9.51610835477512e-06, "loss": 0.6421, "step": 2353 }, { "epoch": 0.17, "grad_norm": 0.8817592667778203, "learning_rate": 9.515615042528239e-06, "loss": 0.4935, "step": 2354 }, { "epoch": 0.17, "grad_norm": 2.0688745499557646, "learning_rate": 9.515121491751266e-06, "loss": 0.5634, "step": 2355 }, { "epoch": 0.17, "grad_norm": 1.5915495462706053, "learning_rate": 9.51462770247028e-06, "loss": 0.571, "step": 2356 }, { "epoch": 0.17, "grad_norm": 2.123564395079708, "learning_rate": 9.51413367471136e-06, "loss": 0.6312, "step": 2357 }, { "epoch": 0.17, "grad_norm": 3.484944396888248, "learning_rate": 9.513639408500604e-06, "loss": 0.5324, "step": 2358 }, { "epoch": 0.17, "grad_norm": 1.9158530769039848, "learning_rate": 9.513144903864117e-06, "loss": 0.5574, "step": 2359 }, { "epoch": 0.17, "grad_norm": 1.574598276136798, "learning_rate": 9.512650160828027e-06, "loss": 0.5947, "step": 2360 }, { "epoch": 0.17, "grad_norm": 1.8222016352795363, "learning_rate": 9.512155179418463e-06, "loss": 0.6258, "step": 2361 }, { "epoch": 0.17, "grad_norm": 1.958183147702792, "learning_rate": 9.511659959661575e-06, "loss": 0.6181, "step": 2362 }, { "epoch": 0.17, "grad_norm": 1.9894690514747448, "learning_rate": 9.511164501583519e-06, "loss": 0.5062, "step": 2363 }, { "epoch": 0.17, "grad_norm": 1.5584798523936343, "learning_rate": 9.510668805210468e-06, "loss": 0.5024, "step": 2364 }, { "epoch": 0.17, "grad_norm": 1.645465692877003, "learning_rate": 9.510172870568606e-06, "loss": 0.6233, "step": 2365 }, { "epoch": 0.17, "grad_norm": 1.4441956456267315, "learning_rate": 9.509676697684131e-06, "loss": 0.5358, "step": 2366 }, { "epoch": 0.17, "grad_norm": 1.816359462705794, "learning_rate": 9.509180286583253e-06, "loss": 0.5712, "step": 2367 }, { "epoch": 0.17, "grad_norm": 1.5658739742368475, "learning_rate": 9.508683637292192e-06, "loss": 0.6337, "step": 2368 }, { "epoch": 0.17, "grad_norm": 2.561240221894851, "learning_rate": 9.508186749837182e-06, "loss": 0.595, "step": 2369 }, { "epoch": 0.17, "grad_norm": 1.6652886922147188, "learning_rate": 9.507689624244477e-06, "loss": 0.5634, "step": 2370 }, { "epoch": 0.17, "grad_norm": 1.6058241057415983, "learning_rate": 9.507192260540327e-06, "loss": 0.5319, "step": 2371 }, { "epoch": 0.17, "grad_norm": 1.6128326781548763, "learning_rate": 9.506694658751011e-06, "loss": 0.6192, "step": 2372 }, { "epoch": 0.17, "grad_norm": 1.5273121975441475, "learning_rate": 9.506196818902813e-06, "loss": 0.5349, "step": 2373 }, { "epoch": 0.17, "grad_norm": 1.7988139656423823, "learning_rate": 9.50569874102203e-06, "loss": 0.653, "step": 2374 }, { "epoch": 0.17, "grad_norm": 1.577871834516777, "learning_rate": 9.50520042513497e-06, "loss": 0.6027, "step": 2375 }, { "epoch": 0.17, "grad_norm": 1.7341253318314607, "learning_rate": 9.504701871267961e-06, "loss": 0.5152, "step": 2376 }, { "epoch": 0.17, "grad_norm": 1.6647582274055037, "learning_rate": 9.504203079447333e-06, "loss": 0.5751, "step": 2377 }, { "epoch": 0.17, "grad_norm": 1.6271514375630778, "learning_rate": 9.503704049699436e-06, "loss": 0.5853, "step": 2378 }, { "epoch": 0.17, "grad_norm": 1.6956553706206092, "learning_rate": 9.503204782050631e-06, "loss": 0.6051, "step": 2379 }, { "epoch": 0.17, "grad_norm": 1.8799693009905634, "learning_rate": 9.50270527652729e-06, "loss": 0.5817, "step": 2380 }, { "epoch": 0.17, "grad_norm": 1.8132704809033402, "learning_rate": 9.5022055331558e-06, "loss": 0.6311, "step": 2381 }, { "epoch": 0.17, "grad_norm": 1.995185123556651, "learning_rate": 9.501705551962558e-06, "loss": 0.6007, "step": 2382 }, { "epoch": 0.17, "grad_norm": 1.6644974190828545, "learning_rate": 9.501205332973974e-06, "loss": 0.5674, "step": 2383 }, { "epoch": 0.17, "grad_norm": 1.7570429565731864, "learning_rate": 9.500704876216473e-06, "loss": 0.5248, "step": 2384 }, { "epoch": 0.17, "grad_norm": 0.8966532297955477, "learning_rate": 9.50020418171649e-06, "loss": 0.479, "step": 2385 }, { "epoch": 0.17, "grad_norm": 1.6810838603127205, "learning_rate": 9.499703249500473e-06, "loss": 0.6016, "step": 2386 }, { "epoch": 0.17, "grad_norm": 3.4233757754728775, "learning_rate": 9.499202079594884e-06, "loss": 0.5874, "step": 2387 }, { "epoch": 0.17, "grad_norm": 2.49478299949271, "learning_rate": 9.498700672026195e-06, "loss": 0.6048, "step": 2388 }, { "epoch": 0.17, "grad_norm": 1.6611059123839058, "learning_rate": 9.498199026820894e-06, "loss": 0.5815, "step": 2389 }, { "epoch": 0.17, "grad_norm": 2.941658504425458, "learning_rate": 9.497697144005476e-06, "loss": 0.5542, "step": 2390 }, { "epoch": 0.17, "grad_norm": 1.93474719920731, "learning_rate": 9.497195023606457e-06, "loss": 0.4913, "step": 2391 }, { "epoch": 0.17, "grad_norm": 1.711677872143683, "learning_rate": 9.496692665650355e-06, "loss": 0.5308, "step": 2392 }, { "epoch": 0.17, "grad_norm": 3.0114360309634494, "learning_rate": 9.496190070163713e-06, "loss": 0.5771, "step": 2393 }, { "epoch": 0.17, "grad_norm": 1.6358048064465478, "learning_rate": 9.495687237173075e-06, "loss": 0.5726, "step": 2394 }, { "epoch": 0.17, "grad_norm": 1.690474124379073, "learning_rate": 9.495184166705003e-06, "loss": 0.5449, "step": 2395 }, { "epoch": 0.17, "grad_norm": 1.8316524899539741, "learning_rate": 9.494680858786074e-06, "loss": 0.5954, "step": 2396 }, { "epoch": 0.17, "grad_norm": 1.946188987077838, "learning_rate": 9.49417731344287e-06, "loss": 0.5979, "step": 2397 }, { "epoch": 0.17, "grad_norm": 2.006503034739207, "learning_rate": 9.493673530701993e-06, "loss": 0.576, "step": 2398 }, { "epoch": 0.17, "grad_norm": 1.6126666836327572, "learning_rate": 9.493169510590052e-06, "loss": 0.5213, "step": 2399 }, { "epoch": 0.17, "grad_norm": 1.7731688046801604, "learning_rate": 9.492665253133673e-06, "loss": 0.595, "step": 2400 }, { "epoch": 0.17, "grad_norm": 1.5357496729022324, "learning_rate": 9.492160758359491e-06, "loss": 0.5293, "step": 2401 }, { "epoch": 0.17, "grad_norm": 1.7717826287482, "learning_rate": 9.491656026294158e-06, "loss": 0.6017, "step": 2402 }, { "epoch": 0.17, "grad_norm": 2.09912066141132, "learning_rate": 9.491151056964334e-06, "loss": 0.586, "step": 2403 }, { "epoch": 0.17, "grad_norm": 1.7468342686331428, "learning_rate": 9.490645850396693e-06, "loss": 0.5416, "step": 2404 }, { "epoch": 0.17, "grad_norm": 1.8926179760589417, "learning_rate": 9.490140406617921e-06, "loss": 0.6009, "step": 2405 }, { "epoch": 0.17, "grad_norm": 2.1847171493603446, "learning_rate": 9.489634725654718e-06, "loss": 0.5845, "step": 2406 }, { "epoch": 0.17, "grad_norm": 1.4937615229525123, "learning_rate": 9.489128807533795e-06, "loss": 0.5908, "step": 2407 }, { "epoch": 0.17, "grad_norm": 1.6160484915305766, "learning_rate": 9.48862265228188e-06, "loss": 0.5647, "step": 2408 }, { "epoch": 0.17, "grad_norm": 1.884345336285536, "learning_rate": 9.488116259925706e-06, "loss": 0.5971, "step": 2409 }, { "epoch": 0.17, "grad_norm": 2.480142410196229, "learning_rate": 9.487609630492022e-06, "loss": 0.5631, "step": 2410 }, { "epoch": 0.17, "grad_norm": 1.8078728046006136, "learning_rate": 9.487102764007592e-06, "loss": 0.6323, "step": 2411 }, { "epoch": 0.17, "grad_norm": 2.0944809206074964, "learning_rate": 9.486595660499189e-06, "loss": 0.6238, "step": 2412 }, { "epoch": 0.17, "grad_norm": 2.394517765849018, "learning_rate": 9.4860883199936e-06, "loss": 0.5433, "step": 2413 }, { "epoch": 0.17, "grad_norm": 1.8290720132050349, "learning_rate": 9.485580742517628e-06, "loss": 0.5781, "step": 2414 }, { "epoch": 0.17, "grad_norm": 1.9064009309066845, "learning_rate": 9.48507292809808e-06, "loss": 0.6145, "step": 2415 }, { "epoch": 0.17, "grad_norm": 1.8551318214380745, "learning_rate": 9.484564876761781e-06, "loss": 0.5448, "step": 2416 }, { "epoch": 0.17, "grad_norm": 2.8462952433382767, "learning_rate": 9.484056588535572e-06, "loss": 0.5399, "step": 2417 }, { "epoch": 0.17, "grad_norm": 2.0724372539696714, "learning_rate": 9.483548063446298e-06, "loss": 0.5796, "step": 2418 }, { "epoch": 0.17, "grad_norm": 1.6784176301687213, "learning_rate": 9.483039301520824e-06, "loss": 0.5078, "step": 2419 }, { "epoch": 0.17, "grad_norm": 1.64963957147287, "learning_rate": 9.482530302786023e-06, "loss": 0.6285, "step": 2420 }, { "epoch": 0.17, "grad_norm": 2.3877760842652354, "learning_rate": 9.482021067268782e-06, "loss": 0.5282, "step": 2421 }, { "epoch": 0.17, "grad_norm": 1.8976330999110986, "learning_rate": 9.481511594996002e-06, "loss": 0.6747, "step": 2422 }, { "epoch": 0.17, "grad_norm": 2.0969444600793508, "learning_rate": 9.481001885994595e-06, "loss": 0.5325, "step": 2423 }, { "epoch": 0.17, "grad_norm": 0.9100247921519142, "learning_rate": 9.480491940291484e-06, "loss": 0.4672, "step": 2424 }, { "epoch": 0.17, "grad_norm": 0.8936112495772646, "learning_rate": 9.479981757913606e-06, "loss": 0.4736, "step": 2425 }, { "epoch": 0.17, "grad_norm": 1.5875963366773258, "learning_rate": 9.479471338887911e-06, "loss": 0.5744, "step": 2426 }, { "epoch": 0.17, "grad_norm": 1.7139757889933747, "learning_rate": 9.478960683241362e-06, "loss": 0.5372, "step": 2427 }, { "epoch": 0.17, "grad_norm": 1.535768903023024, "learning_rate": 9.478449791000933e-06, "loss": 0.5557, "step": 2428 }, { "epoch": 0.17, "grad_norm": 1.7758958675592578, "learning_rate": 9.47793866219361e-06, "loss": 0.6053, "step": 2429 }, { "epoch": 0.17, "grad_norm": 1.0616471894396087, "learning_rate": 9.477427296846395e-06, "loss": 0.4877, "step": 2430 }, { "epoch": 0.17, "grad_norm": 1.7971349608801896, "learning_rate": 9.4769156949863e-06, "loss": 0.532, "step": 2431 }, { "epoch": 0.17, "grad_norm": 1.7505645109539452, "learning_rate": 9.476403856640345e-06, "loss": 0.576, "step": 2432 }, { "epoch": 0.17, "grad_norm": 1.6537996105470203, "learning_rate": 9.475891781835572e-06, "loss": 0.5429, "step": 2433 }, { "epoch": 0.17, "grad_norm": 1.8266291240575487, "learning_rate": 9.475379470599027e-06, "loss": 0.615, "step": 2434 }, { "epoch": 0.17, "grad_norm": 1.4411971714044132, "learning_rate": 9.474866922957776e-06, "loss": 0.51, "step": 2435 }, { "epoch": 0.17, "grad_norm": 1.4597483685971446, "learning_rate": 9.474354138938888e-06, "loss": 0.538, "step": 2436 }, { "epoch": 0.17, "grad_norm": 2.747895246167512, "learning_rate": 9.473841118569455e-06, "loss": 0.6096, "step": 2437 }, { "epoch": 0.17, "grad_norm": 1.8618997287988055, "learning_rate": 9.473327861876576e-06, "loss": 0.6746, "step": 2438 }, { "epoch": 0.17, "grad_norm": 1.6315229863840135, "learning_rate": 9.47281436888736e-06, "loss": 0.6554, "step": 2439 }, { "epoch": 0.17, "grad_norm": 0.8323197766584988, "learning_rate": 9.472300639628933e-06, "loss": 0.4757, "step": 2440 }, { "epoch": 0.17, "grad_norm": 1.5736540854525471, "learning_rate": 9.471786674128433e-06, "loss": 0.5553, "step": 2441 }, { "epoch": 0.17, "grad_norm": 1.6372213193529723, "learning_rate": 9.47127247241301e-06, "loss": 0.5423, "step": 2442 }, { "epoch": 0.17, "grad_norm": 1.7325522728974998, "learning_rate": 9.47075803450982e-06, "loss": 0.5966, "step": 2443 }, { "epoch": 0.17, "grad_norm": 1.8944352469376458, "learning_rate": 9.470243360446043e-06, "loss": 0.5749, "step": 2444 }, { "epoch": 0.17, "grad_norm": 1.6268722205896935, "learning_rate": 9.469728450248866e-06, "loss": 0.5744, "step": 2445 }, { "epoch": 0.17, "grad_norm": 1.877926411376837, "learning_rate": 9.469213303945486e-06, "loss": 0.5769, "step": 2446 }, { "epoch": 0.17, "grad_norm": 1.8027373940455296, "learning_rate": 9.468697921563115e-06, "loss": 0.6028, "step": 2447 }, { "epoch": 0.17, "grad_norm": 1.629181965037637, "learning_rate": 9.46818230312898e-06, "loss": 0.5655, "step": 2448 }, { "epoch": 0.17, "grad_norm": 1.6439876084894995, "learning_rate": 9.467666448670312e-06, "loss": 0.5465, "step": 2449 }, { "epoch": 0.17, "grad_norm": 1.8740474432866674, "learning_rate": 9.467150358214367e-06, "loss": 0.551, "step": 2450 }, { "epoch": 0.17, "grad_norm": 1.6958393248914008, "learning_rate": 9.466634031788401e-06, "loss": 0.6347, "step": 2451 }, { "epoch": 0.17, "grad_norm": 2.0972736452581824, "learning_rate": 9.466117469419692e-06, "loss": 0.671, "step": 2452 }, { "epoch": 0.17, "grad_norm": 1.6527815487367117, "learning_rate": 9.465600671135524e-06, "loss": 0.5834, "step": 2453 }, { "epoch": 0.17, "grad_norm": 3.293689600144643, "learning_rate": 9.465083636963196e-06, "loss": 0.618, "step": 2454 }, { "epoch": 0.17, "grad_norm": 1.9213563836986014, "learning_rate": 9.464566366930022e-06, "loss": 0.6025, "step": 2455 }, { "epoch": 0.17, "grad_norm": 3.266886281064554, "learning_rate": 9.464048861063324e-06, "loss": 0.5967, "step": 2456 }, { "epoch": 0.17, "grad_norm": 1.5537547728101417, "learning_rate": 9.463531119390439e-06, "loss": 0.5616, "step": 2457 }, { "epoch": 0.17, "grad_norm": 1.6559123481610567, "learning_rate": 9.463013141938717e-06, "loss": 0.601, "step": 2458 }, { "epoch": 0.17, "grad_norm": 1.4638674481982996, "learning_rate": 9.462494928735516e-06, "loss": 0.6046, "step": 2459 }, { "epoch": 0.17, "grad_norm": 1.8826667715435383, "learning_rate": 9.461976479808213e-06, "loss": 0.6134, "step": 2460 }, { "epoch": 0.17, "grad_norm": 1.6524933031858768, "learning_rate": 9.461457795184192e-06, "loss": 0.5403, "step": 2461 }, { "epoch": 0.17, "grad_norm": 1.9242258742765075, "learning_rate": 9.460938874890855e-06, "loss": 0.5526, "step": 2462 }, { "epoch": 0.17, "grad_norm": 1.567211558753031, "learning_rate": 9.46041971895561e-06, "loss": 0.585, "step": 2463 }, { "epoch": 0.17, "grad_norm": 1.7738305324116619, "learning_rate": 9.45990032740588e-06, "loss": 0.5513, "step": 2464 }, { "epoch": 0.17, "grad_norm": 1.0318973246245005, "learning_rate": 9.459380700269104e-06, "loss": 0.4787, "step": 2465 }, { "epoch": 0.17, "grad_norm": 2.208283710445658, "learning_rate": 9.458860837572727e-06, "loss": 0.6566, "step": 2466 }, { "epoch": 0.18, "grad_norm": 1.7286127321962133, "learning_rate": 9.458340739344214e-06, "loss": 0.607, "step": 2467 }, { "epoch": 0.18, "grad_norm": 2.1838260016836855, "learning_rate": 9.457820405611035e-06, "loss": 0.5885, "step": 2468 }, { "epoch": 0.18, "grad_norm": 0.8413501156024874, "learning_rate": 9.45729983640068e-06, "loss": 0.4609, "step": 2469 }, { "epoch": 0.18, "grad_norm": 1.7089212154497195, "learning_rate": 9.456779031740642e-06, "loss": 0.5292, "step": 2470 }, { "epoch": 0.18, "grad_norm": 0.8931667656166066, "learning_rate": 9.456257991658433e-06, "loss": 0.4595, "step": 2471 }, { "epoch": 0.18, "grad_norm": 1.6195460412201779, "learning_rate": 9.455736716181576e-06, "loss": 0.5718, "step": 2472 }, { "epoch": 0.18, "grad_norm": 1.968128516667588, "learning_rate": 9.455215205337612e-06, "loss": 0.5466, "step": 2473 }, { "epoch": 0.18, "grad_norm": 1.830871541462981, "learning_rate": 9.45469345915408e-06, "loss": 0.5387, "step": 2474 }, { "epoch": 0.18, "grad_norm": 1.7696041860215133, "learning_rate": 9.454171477658548e-06, "loss": 0.5541, "step": 2475 }, { "epoch": 0.18, "grad_norm": 5.293294137752951, "learning_rate": 9.453649260878583e-06, "loss": 0.5697, "step": 2476 }, { "epoch": 0.18, "grad_norm": 1.8133653756042332, "learning_rate": 9.453126808841775e-06, "loss": 0.6262, "step": 2477 }, { "epoch": 0.18, "grad_norm": 2.1386056896703494, "learning_rate": 9.45260412157572e-06, "loss": 0.5532, "step": 2478 }, { "epoch": 0.18, "grad_norm": 1.4537159147583336, "learning_rate": 9.452081199108027e-06, "loss": 0.6062, "step": 2479 }, { "epoch": 0.18, "grad_norm": 1.7268718846725477, "learning_rate": 9.45155804146632e-06, "loss": 0.5779, "step": 2480 }, { "epoch": 0.18, "grad_norm": 2.0932544780773954, "learning_rate": 9.451034648678232e-06, "loss": 0.5101, "step": 2481 }, { "epoch": 0.18, "grad_norm": 1.658577362774394, "learning_rate": 9.450511020771413e-06, "loss": 0.532, "step": 2482 }, { "epoch": 0.18, "grad_norm": 2.0597679198817476, "learning_rate": 9.44998715777352e-06, "loss": 0.6297, "step": 2483 }, { "epoch": 0.18, "grad_norm": 1.7359347422340632, "learning_rate": 9.449463059712228e-06, "loss": 0.5324, "step": 2484 }, { "epoch": 0.18, "grad_norm": 1.5653600172103501, "learning_rate": 9.44893872661522e-06, "loss": 0.5413, "step": 2485 }, { "epoch": 0.18, "grad_norm": 1.9908448220712383, "learning_rate": 9.448414158510194e-06, "loss": 0.631, "step": 2486 }, { "epoch": 0.18, "grad_norm": 1.8941959702533835, "learning_rate": 9.447889355424858e-06, "loss": 0.5601, "step": 2487 }, { "epoch": 0.18, "grad_norm": 1.7373479429967977, "learning_rate": 9.447364317386935e-06, "loss": 0.5526, "step": 2488 }, { "epoch": 0.18, "grad_norm": 1.8593675232387306, "learning_rate": 9.446839044424158e-06, "loss": 0.6303, "step": 2489 }, { "epoch": 0.18, "grad_norm": 0.9787391183006106, "learning_rate": 9.446313536564278e-06, "loss": 0.4503, "step": 2490 }, { "epoch": 0.18, "grad_norm": 1.7460381865849395, "learning_rate": 9.445787793835048e-06, "loss": 0.5517, "step": 2491 }, { "epoch": 0.18, "grad_norm": 1.796430503408903, "learning_rate": 9.445261816264243e-06, "loss": 0.5405, "step": 2492 }, { "epoch": 0.18, "grad_norm": 1.6741002548959194, "learning_rate": 9.444735603879646e-06, "loss": 0.5813, "step": 2493 }, { "epoch": 0.18, "grad_norm": 2.065248524577058, "learning_rate": 9.444209156709054e-06, "loss": 0.6159, "step": 2494 }, { "epoch": 0.18, "grad_norm": 1.699939509821089, "learning_rate": 9.443682474780276e-06, "loss": 0.5873, "step": 2495 }, { "epoch": 0.18, "grad_norm": 1.892619412511557, "learning_rate": 9.44315555812113e-06, "loss": 0.5854, "step": 2496 }, { "epoch": 0.18, "grad_norm": 1.7561164698563279, "learning_rate": 9.442628406759453e-06, "loss": 0.5985, "step": 2497 }, { "epoch": 0.18, "grad_norm": 2.0257378001467203, "learning_rate": 9.442101020723091e-06, "loss": 0.5658, "step": 2498 }, { "epoch": 0.18, "grad_norm": 1.9962020537982097, "learning_rate": 9.4415734000399e-06, "loss": 0.5334, "step": 2499 }, { "epoch": 0.18, "grad_norm": 1.8605045222239138, "learning_rate": 9.441045544737754e-06, "loss": 0.607, "step": 2500 }, { "epoch": 0.18, "grad_norm": 1.6947285427500238, "learning_rate": 9.440517454844533e-06, "loss": 0.5146, "step": 2501 }, { "epoch": 0.18, "grad_norm": 1.5738822211526065, "learning_rate": 9.439989130388131e-06, "loss": 0.5358, "step": 2502 }, { "epoch": 0.18, "grad_norm": 1.5443720923897075, "learning_rate": 9.439460571396462e-06, "loss": 0.6456, "step": 2503 }, { "epoch": 0.18, "grad_norm": 1.5259712021550462, "learning_rate": 9.43893177789744e-06, "loss": 0.5691, "step": 2504 }, { "epoch": 0.18, "grad_norm": 2.2429947446680636, "learning_rate": 9.438402749919002e-06, "loss": 0.6197, "step": 2505 }, { "epoch": 0.18, "grad_norm": 3.0806867086018674, "learning_rate": 9.43787348748909e-06, "loss": 0.5443, "step": 2506 }, { "epoch": 0.18, "grad_norm": 1.7401220095691765, "learning_rate": 9.437343990635663e-06, "loss": 0.5429, "step": 2507 }, { "epoch": 0.18, "grad_norm": 2.330594642721548, "learning_rate": 9.436814259386694e-06, "loss": 0.6389, "step": 2508 }, { "epoch": 0.18, "grad_norm": 1.595794243548415, "learning_rate": 9.436284293770157e-06, "loss": 0.5665, "step": 2509 }, { "epoch": 0.18, "grad_norm": 1.8816628668531936, "learning_rate": 9.435754093814053e-06, "loss": 0.549, "step": 2510 }, { "epoch": 0.18, "grad_norm": 1.86407046519396, "learning_rate": 9.435223659546389e-06, "loss": 0.5366, "step": 2511 }, { "epoch": 0.18, "grad_norm": 0.9612606756888484, "learning_rate": 9.434692990995181e-06, "loss": 0.4807, "step": 2512 }, { "epoch": 0.18, "grad_norm": 2.8304292812031946, "learning_rate": 9.434162088188464e-06, "loss": 0.6013, "step": 2513 }, { "epoch": 0.18, "grad_norm": 1.72785573229462, "learning_rate": 9.433630951154278e-06, "loss": 0.6219, "step": 2514 }, { "epoch": 0.18, "grad_norm": 2.0150275618271456, "learning_rate": 9.433099579920686e-06, "loss": 0.6811, "step": 2515 }, { "epoch": 0.18, "grad_norm": 1.9641678805916127, "learning_rate": 9.43256797451575e-06, "loss": 0.5586, "step": 2516 }, { "epoch": 0.18, "grad_norm": 1.6261932823014627, "learning_rate": 9.432036134967552e-06, "loss": 0.5976, "step": 2517 }, { "epoch": 0.18, "grad_norm": 1.5952357192895406, "learning_rate": 9.431504061304191e-06, "loss": 0.5902, "step": 2518 }, { "epoch": 0.18, "grad_norm": 1.6511831482307644, "learning_rate": 9.430971753553768e-06, "loss": 0.586, "step": 2519 }, { "epoch": 0.18, "grad_norm": 1.7341545021843054, "learning_rate": 9.430439211744403e-06, "loss": 0.538, "step": 2520 }, { "epoch": 0.18, "grad_norm": 1.8534417689280291, "learning_rate": 9.429906435904226e-06, "loss": 0.6242, "step": 2521 }, { "epoch": 0.18, "grad_norm": 1.6296209594088824, "learning_rate": 9.429373426061382e-06, "loss": 0.6291, "step": 2522 }, { "epoch": 0.18, "grad_norm": 1.6322217607924494, "learning_rate": 9.428840182244024e-06, "loss": 0.562, "step": 2523 }, { "epoch": 0.18, "grad_norm": 2.1230564844811566, "learning_rate": 9.428306704480322e-06, "loss": 0.6369, "step": 2524 }, { "epoch": 0.18, "grad_norm": 1.8410276193645816, "learning_rate": 9.427772992798452e-06, "loss": 0.566, "step": 2525 }, { "epoch": 0.18, "grad_norm": 1.6649437644322929, "learning_rate": 9.42723904722661e-06, "loss": 0.5851, "step": 2526 }, { "epoch": 0.18, "grad_norm": 2.2667465662235506, "learning_rate": 9.426704867793001e-06, "loss": 0.6399, "step": 2527 }, { "epoch": 0.18, "grad_norm": 3.5862872672793813, "learning_rate": 9.42617045452584e-06, "loss": 0.5796, "step": 2528 }, { "epoch": 0.18, "grad_norm": 1.9409912942537895, "learning_rate": 9.42563580745336e-06, "loss": 0.5914, "step": 2529 }, { "epoch": 0.18, "grad_norm": 1.7506537627776533, "learning_rate": 9.425100926603799e-06, "loss": 0.6026, "step": 2530 }, { "epoch": 0.18, "grad_norm": 2.5094217416300593, "learning_rate": 9.424565812005411e-06, "loss": 0.5734, "step": 2531 }, { "epoch": 0.18, "grad_norm": 4.44513713673995, "learning_rate": 9.424030463686466e-06, "loss": 0.5555, "step": 2532 }, { "epoch": 0.18, "grad_norm": 2.2005453614142296, "learning_rate": 9.423494881675242e-06, "loss": 0.6323, "step": 2533 }, { "epoch": 0.18, "grad_norm": 1.682457195888122, "learning_rate": 9.422959066000029e-06, "loss": 0.5821, "step": 2534 }, { "epoch": 0.18, "grad_norm": 1.8711716474238234, "learning_rate": 9.42242301668913e-06, "loss": 0.5501, "step": 2535 }, { "epoch": 0.18, "grad_norm": 1.5785557628600095, "learning_rate": 9.421886733770863e-06, "loss": 0.5581, "step": 2536 }, { "epoch": 0.18, "grad_norm": 1.7419325115233442, "learning_rate": 9.421350217273555e-06, "loss": 0.6211, "step": 2537 }, { "epoch": 0.18, "grad_norm": 2.283735851004028, "learning_rate": 9.420813467225547e-06, "loss": 0.5394, "step": 2538 }, { "epoch": 0.18, "grad_norm": 0.910097108861766, "learning_rate": 9.420276483655192e-06, "loss": 0.5071, "step": 2539 }, { "epoch": 0.18, "grad_norm": 2.0602061273946553, "learning_rate": 9.419739266590854e-06, "loss": 0.5499, "step": 2540 }, { "epoch": 0.18, "grad_norm": 1.7442449169147471, "learning_rate": 9.419201816060914e-06, "loss": 0.5301, "step": 2541 }, { "epoch": 0.18, "grad_norm": 1.8375513943186386, "learning_rate": 9.41866413209376e-06, "loss": 0.6229, "step": 2542 }, { "epoch": 0.18, "grad_norm": 0.8715300189026357, "learning_rate": 9.418126214717792e-06, "loss": 0.4529, "step": 2543 }, { "epoch": 0.18, "grad_norm": 1.999479686717617, "learning_rate": 9.417588063961428e-06, "loss": 0.61, "step": 2544 }, { "epoch": 0.18, "grad_norm": 1.8746662741308286, "learning_rate": 9.417049679853093e-06, "loss": 0.663, "step": 2545 }, { "epoch": 0.18, "grad_norm": 2.1810014271872875, "learning_rate": 9.416511062421228e-06, "loss": 0.5506, "step": 2546 }, { "epoch": 0.18, "grad_norm": 1.7806649827539918, "learning_rate": 9.415972211694282e-06, "loss": 0.5191, "step": 2547 }, { "epoch": 0.18, "grad_norm": 1.630835975169748, "learning_rate": 9.415433127700722e-06, "loss": 0.6108, "step": 2548 }, { "epoch": 0.18, "grad_norm": 1.7343846734103776, "learning_rate": 9.414893810469021e-06, "loss": 0.5987, "step": 2549 }, { "epoch": 0.18, "grad_norm": 1.9192719975727983, "learning_rate": 9.41435426002767e-06, "loss": 0.5431, "step": 2550 }, { "epoch": 0.18, "grad_norm": 1.596266231846583, "learning_rate": 9.413814476405168e-06, "loss": 0.5753, "step": 2551 }, { "epoch": 0.18, "grad_norm": 2.244285792373908, "learning_rate": 9.41327445963003e-06, "loss": 0.5607, "step": 2552 }, { "epoch": 0.18, "grad_norm": 1.807663120696747, "learning_rate": 9.412734209730782e-06, "loss": 0.5663, "step": 2553 }, { "epoch": 0.18, "grad_norm": 2.6825627736488413, "learning_rate": 9.41219372673596e-06, "loss": 0.5886, "step": 2554 }, { "epoch": 0.18, "grad_norm": 1.7042636129641382, "learning_rate": 9.411653010674114e-06, "loss": 0.5531, "step": 2555 }, { "epoch": 0.18, "grad_norm": 1.6893844849539452, "learning_rate": 9.411112061573808e-06, "loss": 0.5929, "step": 2556 }, { "epoch": 0.18, "grad_norm": 2.03475529720633, "learning_rate": 9.410570879463617e-06, "loss": 0.6023, "step": 2557 }, { "epoch": 0.18, "grad_norm": 1.5236866802479025, "learning_rate": 9.410029464372126e-06, "loss": 0.6155, "step": 2558 }, { "epoch": 0.18, "grad_norm": 1.998713624508661, "learning_rate": 9.409487816327935e-06, "loss": 0.5795, "step": 2559 }, { "epoch": 0.18, "grad_norm": 1.7674547717004658, "learning_rate": 9.408945935359656e-06, "loss": 0.5942, "step": 2560 }, { "epoch": 0.18, "grad_norm": 0.8598829338566049, "learning_rate": 9.408403821495915e-06, "loss": 0.4568, "step": 2561 }, { "epoch": 0.18, "grad_norm": 1.6693526711561928, "learning_rate": 9.407861474765343e-06, "loss": 0.5867, "step": 2562 }, { "epoch": 0.18, "grad_norm": 1.6207470744890085, "learning_rate": 9.407318895196596e-06, "loss": 0.6346, "step": 2563 }, { "epoch": 0.18, "grad_norm": 1.8933049005756721, "learning_rate": 9.406776082818328e-06, "loss": 0.5791, "step": 2564 }, { "epoch": 0.18, "grad_norm": 3.8190919205595097, "learning_rate": 9.406233037659217e-06, "loss": 0.5319, "step": 2565 }, { "epoch": 0.18, "grad_norm": 1.6607986950528482, "learning_rate": 9.405689759747946e-06, "loss": 0.5233, "step": 2566 }, { "epoch": 0.18, "grad_norm": 2.1439716863689484, "learning_rate": 9.405146249113213e-06, "loss": 0.6099, "step": 2567 }, { "epoch": 0.18, "grad_norm": 1.6717084907491306, "learning_rate": 9.404602505783729e-06, "loss": 0.5271, "step": 2568 }, { "epoch": 0.18, "grad_norm": 1.9005525310471492, "learning_rate": 9.404058529788214e-06, "loss": 0.6003, "step": 2569 }, { "epoch": 0.18, "grad_norm": 0.8465762075878627, "learning_rate": 9.403514321155407e-06, "loss": 0.445, "step": 2570 }, { "epoch": 0.18, "grad_norm": 1.7114607612251587, "learning_rate": 9.402969879914051e-06, "loss": 0.5756, "step": 2571 }, { "epoch": 0.18, "grad_norm": 1.5336073288816168, "learning_rate": 9.402425206092906e-06, "loss": 0.5972, "step": 2572 }, { "epoch": 0.18, "grad_norm": 2.3566428881877615, "learning_rate": 9.401880299720747e-06, "loss": 0.5831, "step": 2573 }, { "epoch": 0.18, "grad_norm": 1.5540333849491075, "learning_rate": 9.401335160826352e-06, "loss": 0.5545, "step": 2574 }, { "epoch": 0.18, "grad_norm": 1.7797010533376645, "learning_rate": 9.400789789438523e-06, "loss": 0.5342, "step": 2575 }, { "epoch": 0.18, "grad_norm": 1.626589517172223, "learning_rate": 9.400244185586063e-06, "loss": 0.5823, "step": 2576 }, { "epoch": 0.18, "grad_norm": 2.0485918882118925, "learning_rate": 9.399698349297794e-06, "loss": 0.5319, "step": 2577 }, { "epoch": 0.18, "grad_norm": 0.837404319138409, "learning_rate": 9.399152280602552e-06, "loss": 0.4748, "step": 2578 }, { "epoch": 0.18, "grad_norm": 1.76707225465034, "learning_rate": 9.39860597952918e-06, "loss": 0.582, "step": 2579 }, { "epoch": 0.18, "grad_norm": 1.873390880232523, "learning_rate": 9.398059446106536e-06, "loss": 0.5326, "step": 2580 }, { "epoch": 0.18, "grad_norm": 1.592886485456597, "learning_rate": 9.39751268036349e-06, "loss": 0.5206, "step": 2581 }, { "epoch": 0.18, "grad_norm": 4.968688377926814, "learning_rate": 9.396965682328921e-06, "loss": 0.5266, "step": 2582 }, { "epoch": 0.18, "grad_norm": 1.6004959451998464, "learning_rate": 9.396418452031727e-06, "loss": 0.491, "step": 2583 }, { "epoch": 0.18, "grad_norm": 8.005602093008575, "learning_rate": 9.395870989500813e-06, "loss": 0.5692, "step": 2584 }, { "epoch": 0.18, "grad_norm": 2.7446158299839674, "learning_rate": 9.395323294765098e-06, "loss": 0.5836, "step": 2585 }, { "epoch": 0.18, "grad_norm": 1.761761231406756, "learning_rate": 9.394775367853514e-06, "loss": 0.6417, "step": 2586 }, { "epoch": 0.18, "grad_norm": 1.4918757187191198, "learning_rate": 9.394227208795003e-06, "loss": 0.5622, "step": 2587 }, { "epoch": 0.18, "grad_norm": 4.1218975747136035, "learning_rate": 9.393678817618521e-06, "loss": 0.5997, "step": 2588 }, { "epoch": 0.18, "grad_norm": 1.5175669631492863, "learning_rate": 9.393130194353037e-06, "loss": 0.5458, "step": 2589 }, { "epoch": 0.18, "grad_norm": 1.8118477116796343, "learning_rate": 9.39258133902753e-06, "loss": 0.5306, "step": 2590 }, { "epoch": 0.18, "grad_norm": 2.2659503338473064, "learning_rate": 9.39203225167099e-06, "loss": 0.5671, "step": 2591 }, { "epoch": 0.18, "grad_norm": 2.0327881960757046, "learning_rate": 9.391482932312428e-06, "loss": 0.5995, "step": 2592 }, { "epoch": 0.18, "grad_norm": 2.1207541964283663, "learning_rate": 9.390933380980856e-06, "loss": 0.5097, "step": 2593 }, { "epoch": 0.18, "grad_norm": 1.5349126682520167, "learning_rate": 9.390383597705302e-06, "loss": 0.5493, "step": 2594 }, { "epoch": 0.18, "grad_norm": 1.285321056761365, "learning_rate": 9.389833582514812e-06, "loss": 0.5193, "step": 2595 }, { "epoch": 0.18, "grad_norm": 0.8822277018659853, "learning_rate": 9.389283335438437e-06, "loss": 0.4624, "step": 2596 }, { "epoch": 0.18, "grad_norm": 2.3202671798706858, "learning_rate": 9.388732856505243e-06, "loss": 0.5745, "step": 2597 }, { "epoch": 0.18, "grad_norm": 2.1142874650036854, "learning_rate": 9.388182145744309e-06, "loss": 0.5982, "step": 2598 }, { "epoch": 0.18, "grad_norm": 1.5512397191182885, "learning_rate": 9.387631203184725e-06, "loss": 0.5311, "step": 2599 }, { "epoch": 0.18, "grad_norm": 2.3109479027975364, "learning_rate": 9.38708002885559e-06, "loss": 0.5982, "step": 2600 }, { "epoch": 0.18, "grad_norm": 1.85355755281865, "learning_rate": 9.386528622786027e-06, "loss": 0.5247, "step": 2601 }, { "epoch": 0.18, "grad_norm": 1.55967786379368, "learning_rate": 9.385976985005157e-06, "loss": 0.601, "step": 2602 }, { "epoch": 0.18, "grad_norm": 1.704242366348073, "learning_rate": 9.385425115542121e-06, "loss": 0.6231, "step": 2603 }, { "epoch": 0.18, "grad_norm": 2.4441816375564893, "learning_rate": 9.38487301442607e-06, "loss": 0.659, "step": 2604 }, { "epoch": 0.18, "grad_norm": 2.3802781051591566, "learning_rate": 9.38432068168617e-06, "loss": 0.5739, "step": 2605 }, { "epoch": 0.18, "grad_norm": 1.776515011177324, "learning_rate": 9.383768117351592e-06, "loss": 0.5461, "step": 2606 }, { "epoch": 0.18, "grad_norm": 1.8232847215069217, "learning_rate": 9.38321532145153e-06, "loss": 0.5502, "step": 2607 }, { "epoch": 0.19, "grad_norm": 1.7270063260291788, "learning_rate": 9.382662294015184e-06, "loss": 0.5863, "step": 2608 }, { "epoch": 0.19, "grad_norm": 1.6418939985880066, "learning_rate": 9.382109035071764e-06, "loss": 0.5677, "step": 2609 }, { "epoch": 0.19, "grad_norm": 1.6395678368626774, "learning_rate": 9.381555544650497e-06, "loss": 0.4974, "step": 2610 }, { "epoch": 0.19, "grad_norm": 1.7078904065419123, "learning_rate": 9.381001822780617e-06, "loss": 0.6086, "step": 2611 }, { "epoch": 0.19, "grad_norm": 1.7110560302002797, "learning_rate": 9.380447869491376e-06, "loss": 0.5825, "step": 2612 }, { "epoch": 0.19, "grad_norm": 1.7976698618640448, "learning_rate": 9.379893684812037e-06, "loss": 0.5638, "step": 2613 }, { "epoch": 0.19, "grad_norm": 1.7076217910001894, "learning_rate": 9.379339268771872e-06, "loss": 0.5544, "step": 2614 }, { "epoch": 0.19, "grad_norm": 2.797802868739752, "learning_rate": 9.378784621400167e-06, "loss": 0.5949, "step": 2615 }, { "epoch": 0.19, "grad_norm": 1.6100316016862593, "learning_rate": 9.378229742726222e-06, "loss": 0.5734, "step": 2616 }, { "epoch": 0.19, "grad_norm": 1.7462508333928493, "learning_rate": 9.377674632779345e-06, "loss": 0.5298, "step": 2617 }, { "epoch": 0.19, "grad_norm": 2.6176126953712715, "learning_rate": 9.377119291588863e-06, "loss": 0.5585, "step": 2618 }, { "epoch": 0.19, "grad_norm": 1.7736125071082263, "learning_rate": 9.376563719184106e-06, "loss": 0.5997, "step": 2619 }, { "epoch": 0.19, "grad_norm": 1.5093666569312825, "learning_rate": 9.376007915594425e-06, "loss": 0.5566, "step": 2620 }, { "epoch": 0.19, "grad_norm": 1.7638343018482276, "learning_rate": 9.375451880849177e-06, "loss": 0.5626, "step": 2621 }, { "epoch": 0.19, "grad_norm": 1.3957158352309658, "learning_rate": 9.374895614977735e-06, "loss": 0.5506, "step": 2622 }, { "epoch": 0.19, "grad_norm": 1.6339591563195888, "learning_rate": 9.374339118009482e-06, "loss": 0.5975, "step": 2623 }, { "epoch": 0.19, "grad_norm": 2.1234455541382613, "learning_rate": 9.373782389973814e-06, "loss": 0.5811, "step": 2624 }, { "epoch": 0.19, "grad_norm": 2.1477408365642146, "learning_rate": 9.373225430900142e-06, "loss": 0.5743, "step": 2625 }, { "epoch": 0.19, "grad_norm": 2.0297200595260763, "learning_rate": 9.372668240817882e-06, "loss": 0.5178, "step": 2626 }, { "epoch": 0.19, "grad_norm": 1.634038271434881, "learning_rate": 9.37211081975647e-06, "loss": 0.5434, "step": 2627 }, { "epoch": 0.19, "grad_norm": 3.030124648178972, "learning_rate": 9.37155316774535e-06, "loss": 0.5279, "step": 2628 }, { "epoch": 0.19, "grad_norm": 1.8579014591634935, "learning_rate": 9.370995284813977e-06, "loss": 0.566, "step": 2629 }, { "epoch": 0.19, "grad_norm": 1.6666529706443036, "learning_rate": 9.370437170991824e-06, "loss": 0.6035, "step": 2630 }, { "epoch": 0.19, "grad_norm": 1.7337221909686524, "learning_rate": 9.36987882630837e-06, "loss": 0.584, "step": 2631 }, { "epoch": 0.19, "grad_norm": 1.917474566721065, "learning_rate": 9.369320250793108e-06, "loss": 0.5856, "step": 2632 }, { "epoch": 0.19, "grad_norm": 1.7492187323521373, "learning_rate": 9.368761444475547e-06, "loss": 0.5627, "step": 2633 }, { "epoch": 0.19, "grad_norm": 1.6479695183528194, "learning_rate": 9.368202407385202e-06, "loss": 0.6087, "step": 2634 }, { "epoch": 0.19, "grad_norm": 1.4896338025488556, "learning_rate": 9.367643139551605e-06, "loss": 0.6354, "step": 2635 }, { "epoch": 0.19, "grad_norm": 1.9834919904509418, "learning_rate": 9.367083641004298e-06, "loss": 0.5776, "step": 2636 }, { "epoch": 0.19, "grad_norm": 1.573210213240799, "learning_rate": 9.366523911772833e-06, "loss": 0.5699, "step": 2637 }, { "epoch": 0.19, "grad_norm": 1.6978658747317419, "learning_rate": 9.365963951886783e-06, "loss": 0.5684, "step": 2638 }, { "epoch": 0.19, "grad_norm": 1.5942965702691956, "learning_rate": 9.36540376137572e-06, "loss": 0.5217, "step": 2639 }, { "epoch": 0.19, "grad_norm": 1.5313049684188527, "learning_rate": 9.36484334026924e-06, "loss": 0.5628, "step": 2640 }, { "epoch": 0.19, "grad_norm": 1.7990590356169134, "learning_rate": 9.364282688596944e-06, "loss": 0.5973, "step": 2641 }, { "epoch": 0.19, "grad_norm": 1.5441365433801806, "learning_rate": 9.363721806388448e-06, "loss": 0.5726, "step": 2642 }, { "epoch": 0.19, "grad_norm": 1.6013304826389212, "learning_rate": 9.363160693673381e-06, "loss": 0.6029, "step": 2643 }, { "epoch": 0.19, "grad_norm": 1.8330195954941901, "learning_rate": 9.362599350481381e-06, "loss": 0.5375, "step": 2644 }, { "epoch": 0.19, "grad_norm": 0.9253230096495927, "learning_rate": 9.3620377768421e-06, "loss": 0.4657, "step": 2645 }, { "epoch": 0.19, "grad_norm": 1.6955895606115392, "learning_rate": 9.361475972785204e-06, "loss": 0.5285, "step": 2646 }, { "epoch": 0.19, "grad_norm": 1.6338381961701471, "learning_rate": 9.360913938340367e-06, "loss": 0.6028, "step": 2647 }, { "epoch": 0.19, "grad_norm": 2.5558080064881703, "learning_rate": 9.360351673537281e-06, "loss": 0.5608, "step": 2648 }, { "epoch": 0.19, "grad_norm": 1.8888786418785593, "learning_rate": 9.359789178405645e-06, "loss": 0.611, "step": 2649 }, { "epoch": 0.19, "grad_norm": 1.7465218500521171, "learning_rate": 9.35922645297517e-06, "loss": 0.5865, "step": 2650 }, { "epoch": 0.19, "grad_norm": 1.8012329216709846, "learning_rate": 9.358663497275584e-06, "loss": 0.6239, "step": 2651 }, { "epoch": 0.19, "grad_norm": 1.499749346976142, "learning_rate": 9.35810031133662e-06, "loss": 0.5266, "step": 2652 }, { "epoch": 0.19, "grad_norm": 2.633110205572291, "learning_rate": 9.357536895188032e-06, "loss": 0.6438, "step": 2653 }, { "epoch": 0.19, "grad_norm": 2.0531348147205706, "learning_rate": 9.356973248859582e-06, "loss": 0.58, "step": 2654 }, { "epoch": 0.19, "grad_norm": 1.471793662162461, "learning_rate": 9.356409372381039e-06, "loss": 0.5967, "step": 2655 }, { "epoch": 0.19, "grad_norm": 1.750064614727594, "learning_rate": 9.355845265782192e-06, "loss": 0.5383, "step": 2656 }, { "epoch": 0.19, "grad_norm": 1.938372331552809, "learning_rate": 9.35528092909284e-06, "loss": 0.5725, "step": 2657 }, { "epoch": 0.19, "grad_norm": 1.852632822863334, "learning_rate": 9.35471636234279e-06, "loss": 0.6131, "step": 2658 }, { "epoch": 0.19, "grad_norm": 2.029716513808916, "learning_rate": 9.354151565561866e-06, "loss": 0.5218, "step": 2659 }, { "epoch": 0.19, "grad_norm": 1.5952089424993123, "learning_rate": 9.353586538779904e-06, "loss": 0.5358, "step": 2660 }, { "epoch": 0.19, "grad_norm": 1.8091190944488702, "learning_rate": 9.353021282026746e-06, "loss": 0.5195, "step": 2661 }, { "epoch": 0.19, "grad_norm": 1.7456347832019496, "learning_rate": 9.352455795332256e-06, "loss": 0.5861, "step": 2662 }, { "epoch": 0.19, "grad_norm": 2.367228925100294, "learning_rate": 9.351890078726302e-06, "loss": 0.5339, "step": 2663 }, { "epoch": 0.19, "grad_norm": 0.8503799719453027, "learning_rate": 9.351324132238769e-06, "loss": 0.4918, "step": 2664 }, { "epoch": 0.19, "grad_norm": 1.6050662513289877, "learning_rate": 9.35075795589955e-06, "loss": 0.5781, "step": 2665 }, { "epoch": 0.19, "grad_norm": 1.5132998636556, "learning_rate": 9.350191549738553e-06, "loss": 0.6002, "step": 2666 }, { "epoch": 0.19, "grad_norm": 2.087687216772786, "learning_rate": 9.349624913785698e-06, "loss": 0.5374, "step": 2667 }, { "epoch": 0.19, "grad_norm": 1.532427664951365, "learning_rate": 9.349058048070917e-06, "loss": 0.5825, "step": 2668 }, { "epoch": 0.19, "grad_norm": 1.8306681328865309, "learning_rate": 9.348490952624154e-06, "loss": 0.6313, "step": 2669 }, { "epoch": 0.19, "grad_norm": 1.6244002124866752, "learning_rate": 9.347923627475364e-06, "loss": 0.5784, "step": 2670 }, { "epoch": 0.19, "grad_norm": 0.7325186533487434, "learning_rate": 9.347356072654515e-06, "loss": 0.4917, "step": 2671 }, { "epoch": 0.19, "grad_norm": 1.5886021512734612, "learning_rate": 9.346788288191587e-06, "loss": 0.5674, "step": 2672 }, { "epoch": 0.19, "grad_norm": 2.0005704089100464, "learning_rate": 9.34622027411657e-06, "loss": 0.5647, "step": 2673 }, { "epoch": 0.19, "grad_norm": 1.607453402823058, "learning_rate": 9.345652030459476e-06, "loss": 0.5551, "step": 2674 }, { "epoch": 0.19, "grad_norm": 1.4893400215077714, "learning_rate": 9.345083557250314e-06, "loss": 0.5359, "step": 2675 }, { "epoch": 0.19, "grad_norm": 1.9916520981979555, "learning_rate": 9.344514854519116e-06, "loss": 0.5185, "step": 2676 }, { "epoch": 0.19, "grad_norm": 1.7677222673847466, "learning_rate": 9.343945922295921e-06, "loss": 0.5446, "step": 2677 }, { "epoch": 0.19, "grad_norm": 2.257592909339425, "learning_rate": 9.343376760610784e-06, "loss": 0.5444, "step": 2678 }, { "epoch": 0.19, "grad_norm": 1.6956966298153642, "learning_rate": 9.34280736949377e-06, "loss": 0.5211, "step": 2679 }, { "epoch": 0.19, "grad_norm": 0.9059015333575456, "learning_rate": 9.342237748974955e-06, "loss": 0.4368, "step": 2680 }, { "epoch": 0.19, "grad_norm": 1.6223718405187346, "learning_rate": 9.341667899084428e-06, "loss": 0.5681, "step": 2681 }, { "epoch": 0.19, "grad_norm": 1.93930917580587, "learning_rate": 9.341097819852291e-06, "loss": 0.6123, "step": 2682 }, { "epoch": 0.19, "grad_norm": 1.9454715454201428, "learning_rate": 9.340527511308657e-06, "loss": 0.5691, "step": 2683 }, { "epoch": 0.19, "grad_norm": 1.649709076954933, "learning_rate": 9.339956973483653e-06, "loss": 0.4699, "step": 2684 }, { "epoch": 0.19, "grad_norm": 1.767930572382086, "learning_rate": 9.339386206407415e-06, "loss": 0.6188, "step": 2685 }, { "epoch": 0.19, "grad_norm": 1.713824603689268, "learning_rate": 9.338815210110094e-06, "loss": 0.5829, "step": 2686 }, { "epoch": 0.19, "grad_norm": 2.122288479263122, "learning_rate": 9.338243984621853e-06, "loss": 0.5553, "step": 2687 }, { "epoch": 0.19, "grad_norm": 1.6391471901251369, "learning_rate": 9.337672529972864e-06, "loss": 0.5358, "step": 2688 }, { "epoch": 0.19, "grad_norm": 1.6979606147738098, "learning_rate": 9.337100846193315e-06, "loss": 0.6522, "step": 2689 }, { "epoch": 0.19, "grad_norm": 1.5794117499319513, "learning_rate": 9.336528933313401e-06, "loss": 0.5307, "step": 2690 }, { "epoch": 0.19, "grad_norm": 0.8278962751786917, "learning_rate": 9.335956791363334e-06, "loss": 0.4892, "step": 2691 }, { "epoch": 0.19, "grad_norm": 1.768856805810727, "learning_rate": 9.33538442037334e-06, "loss": 0.613, "step": 2692 }, { "epoch": 0.19, "grad_norm": 1.990152764606004, "learning_rate": 9.33481182037365e-06, "loss": 0.5884, "step": 2693 }, { "epoch": 0.19, "grad_norm": 0.8915895709217105, "learning_rate": 9.33423899139451e-06, "loss": 0.4868, "step": 2694 }, { "epoch": 0.19, "grad_norm": 1.4354521242149685, "learning_rate": 9.33366593346618e-06, "loss": 0.5642, "step": 2695 }, { "epoch": 0.19, "grad_norm": 1.3603694806533402, "learning_rate": 9.333092646618931e-06, "loss": 0.5362, "step": 2696 }, { "epoch": 0.19, "grad_norm": 4.7777472960556855, "learning_rate": 9.332519130883046e-06, "loss": 0.5734, "step": 2697 }, { "epoch": 0.19, "grad_norm": 1.6561012563243063, "learning_rate": 9.331945386288821e-06, "loss": 0.5553, "step": 2698 }, { "epoch": 0.19, "grad_norm": 1.748245783496589, "learning_rate": 9.331371412866561e-06, "loss": 0.5269, "step": 2699 }, { "epoch": 0.19, "grad_norm": 1.767364031410995, "learning_rate": 9.330797210646586e-06, "loss": 0.5688, "step": 2700 }, { "epoch": 0.19, "grad_norm": 1.6544923629096815, "learning_rate": 9.330222779659231e-06, "loss": 0.605, "step": 2701 }, { "epoch": 0.19, "grad_norm": 1.7671716669094415, "learning_rate": 9.329648119934831e-06, "loss": 0.5856, "step": 2702 }, { "epoch": 0.19, "grad_norm": 1.4701893197752867, "learning_rate": 9.329073231503748e-06, "loss": 0.5307, "step": 2703 }, { "epoch": 0.19, "grad_norm": 2.4126540532962952, "learning_rate": 9.32849811439635e-06, "loss": 0.5199, "step": 2704 }, { "epoch": 0.19, "grad_norm": 1.5334625929423407, "learning_rate": 9.327922768643014e-06, "loss": 0.5559, "step": 2705 }, { "epoch": 0.19, "grad_norm": 1.8837425198733049, "learning_rate": 9.327347194274132e-06, "loss": 0.5414, "step": 2706 }, { "epoch": 0.19, "grad_norm": 1.6946195327473563, "learning_rate": 9.326771391320107e-06, "loss": 0.5963, "step": 2707 }, { "epoch": 0.19, "grad_norm": 1.8396022439047297, "learning_rate": 9.326195359811358e-06, "loss": 0.6024, "step": 2708 }, { "epoch": 0.19, "grad_norm": 1.7533513419434477, "learning_rate": 9.325619099778309e-06, "loss": 0.6207, "step": 2709 }, { "epoch": 0.19, "grad_norm": 1.722301659606369, "learning_rate": 9.325042611251402e-06, "loss": 0.5849, "step": 2710 }, { "epoch": 0.19, "grad_norm": 1.582886891750824, "learning_rate": 9.324465894261092e-06, "loss": 0.5153, "step": 2711 }, { "epoch": 0.19, "grad_norm": 1.4863747004972452, "learning_rate": 9.323888948837837e-06, "loss": 0.556, "step": 2712 }, { "epoch": 0.19, "grad_norm": 1.8234970064630815, "learning_rate": 9.323311775012117e-06, "loss": 0.6502, "step": 2713 }, { "epoch": 0.19, "grad_norm": 1.7080485836060524, "learning_rate": 9.322734372814419e-06, "loss": 0.5812, "step": 2714 }, { "epoch": 0.19, "grad_norm": 1.5635875611522034, "learning_rate": 9.322156742275246e-06, "loss": 0.5415, "step": 2715 }, { "epoch": 0.19, "grad_norm": 1.5682992823464865, "learning_rate": 9.321578883425107e-06, "loss": 0.6157, "step": 2716 }, { "epoch": 0.19, "grad_norm": 1.8879359364445825, "learning_rate": 9.321000796294528e-06, "loss": 0.5831, "step": 2717 }, { "epoch": 0.19, "grad_norm": 0.9007530539335525, "learning_rate": 9.320422480914047e-06, "loss": 0.4725, "step": 2718 }, { "epoch": 0.19, "grad_norm": 3.8624337151019157, "learning_rate": 9.319843937314209e-06, "loss": 0.5509, "step": 2719 }, { "epoch": 0.19, "grad_norm": 0.8119596034783825, "learning_rate": 9.319265165525578e-06, "loss": 0.474, "step": 2720 }, { "epoch": 0.19, "grad_norm": 1.7676986712871225, "learning_rate": 9.318686165578723e-06, "loss": 0.5624, "step": 2721 }, { "epoch": 0.19, "grad_norm": 1.8794206953558208, "learning_rate": 9.318106937504233e-06, "loss": 0.587, "step": 2722 }, { "epoch": 0.19, "grad_norm": 2.1736293605127073, "learning_rate": 9.317527481332702e-06, "loss": 0.6039, "step": 2723 }, { "epoch": 0.19, "grad_norm": 1.625501086748244, "learning_rate": 9.316947797094742e-06, "loss": 0.5239, "step": 2724 }, { "epoch": 0.19, "grad_norm": 1.7101962797037158, "learning_rate": 9.316367884820968e-06, "loss": 0.589, "step": 2725 }, { "epoch": 0.19, "grad_norm": 1.5840957800654951, "learning_rate": 9.31578774454202e-06, "loss": 0.6113, "step": 2726 }, { "epoch": 0.19, "grad_norm": 1.7659698660503211, "learning_rate": 9.315207376288535e-06, "loss": 0.588, "step": 2727 }, { "epoch": 0.19, "grad_norm": 1.5562425734213947, "learning_rate": 9.314626780091178e-06, "loss": 0.5898, "step": 2728 }, { "epoch": 0.19, "grad_norm": 1.7443772309489198, "learning_rate": 9.314045955980613e-06, "loss": 0.5514, "step": 2729 }, { "epoch": 0.19, "grad_norm": 1.693981583849735, "learning_rate": 9.313464903987524e-06, "loss": 0.5223, "step": 2730 }, { "epoch": 0.19, "grad_norm": 1.584603245358618, "learning_rate": 9.3128836241426e-06, "loss": 0.5646, "step": 2731 }, { "epoch": 0.19, "grad_norm": 1.5394962604646236, "learning_rate": 9.312302116476552e-06, "loss": 0.598, "step": 2732 }, { "epoch": 0.19, "grad_norm": 1.5676138508998851, "learning_rate": 9.311720381020093e-06, "loss": 0.5469, "step": 2733 }, { "epoch": 0.19, "grad_norm": 1.5681122252900055, "learning_rate": 9.311138417803953e-06, "loss": 0.5413, "step": 2734 }, { "epoch": 0.19, "grad_norm": 1.6633756359732956, "learning_rate": 9.310556226858874e-06, "loss": 0.5579, "step": 2735 }, { "epoch": 0.19, "grad_norm": 1.7936608094995603, "learning_rate": 9.30997380821561e-06, "loss": 0.6586, "step": 2736 }, { "epoch": 0.19, "grad_norm": 1.5687017952092823, "learning_rate": 9.309391161904923e-06, "loss": 0.5779, "step": 2737 }, { "epoch": 0.19, "grad_norm": 1.4469087648770864, "learning_rate": 9.308808287957593e-06, "loss": 0.585, "step": 2738 }, { "epoch": 0.19, "grad_norm": 2.1376181059123085, "learning_rate": 9.308225186404411e-06, "loss": 0.5995, "step": 2739 }, { "epoch": 0.19, "grad_norm": 1.844709613621193, "learning_rate": 9.307641857276175e-06, "loss": 0.6379, "step": 2740 }, { "epoch": 0.19, "grad_norm": 1.5398023019142713, "learning_rate": 9.3070583006037e-06, "loss": 0.5566, "step": 2741 }, { "epoch": 0.19, "grad_norm": 2.3381768289224105, "learning_rate": 9.306474516417811e-06, "loss": 0.6213, "step": 2742 }, { "epoch": 0.19, "grad_norm": 1.9972839329743721, "learning_rate": 9.305890504749347e-06, "loss": 0.6154, "step": 2743 }, { "epoch": 0.19, "grad_norm": 0.991578802275153, "learning_rate": 9.305306265629155e-06, "loss": 0.4661, "step": 2744 }, { "epoch": 0.19, "grad_norm": 1.5947013747185548, "learning_rate": 9.304721799088097e-06, "loss": 0.623, "step": 2745 }, { "epoch": 0.19, "grad_norm": 2.088101787206702, "learning_rate": 9.304137105157049e-06, "loss": 0.524, "step": 2746 }, { "epoch": 0.19, "grad_norm": 1.9508996860461598, "learning_rate": 9.303552183866893e-06, "loss": 0.5553, "step": 2747 }, { "epoch": 0.2, "grad_norm": 1.7034067671472835, "learning_rate": 9.30296703524853e-06, "loss": 0.6173, "step": 2748 }, { "epoch": 0.2, "grad_norm": 0.8510237740879033, "learning_rate": 9.302381659332866e-06, "loss": 0.4478, "step": 2749 }, { "epoch": 0.2, "grad_norm": 1.8232310714905635, "learning_rate": 9.301796056150824e-06, "loss": 0.549, "step": 2750 }, { "epoch": 0.2, "grad_norm": 1.671054990486113, "learning_rate": 9.30121022573334e-06, "loss": 0.54, "step": 2751 }, { "epoch": 0.2, "grad_norm": 2.061084315331839, "learning_rate": 9.300624168111357e-06, "loss": 0.5626, "step": 2752 }, { "epoch": 0.2, "grad_norm": 0.8902155189042619, "learning_rate": 9.30003788331583e-06, "loss": 0.4698, "step": 2753 }, { "epoch": 0.2, "grad_norm": 1.5228198516074885, "learning_rate": 9.299451371377734e-06, "loss": 0.5475, "step": 2754 }, { "epoch": 0.2, "grad_norm": 1.5752239718402736, "learning_rate": 9.298864632328049e-06, "loss": 0.5604, "step": 2755 }, { "epoch": 0.2, "grad_norm": 2.0043172792116377, "learning_rate": 9.298277666197767e-06, "loss": 0.5861, "step": 2756 }, { "epoch": 0.2, "grad_norm": 0.8238068434882334, "learning_rate": 9.29769047301789e-06, "loss": 0.4402, "step": 2757 }, { "epoch": 0.2, "grad_norm": 2.2185094437676742, "learning_rate": 9.297103052819444e-06, "loss": 0.5642, "step": 2758 }, { "epoch": 0.2, "grad_norm": 1.9789034442139897, "learning_rate": 9.296515405633454e-06, "loss": 0.616, "step": 2759 }, { "epoch": 0.2, "grad_norm": 1.5788497012553782, "learning_rate": 9.295927531490961e-06, "loss": 0.5328, "step": 2760 }, { "epoch": 0.2, "grad_norm": 2.780561314582799, "learning_rate": 9.295339430423018e-06, "loss": 0.5737, "step": 2761 }, { "epoch": 0.2, "grad_norm": 2.077415893417806, "learning_rate": 9.294751102460692e-06, "loss": 0.6404, "step": 2762 }, { "epoch": 0.2, "grad_norm": 2.0548604949755305, "learning_rate": 9.294162547635062e-06, "loss": 0.5284, "step": 2763 }, { "epoch": 0.2, "grad_norm": 0.9775033146737689, "learning_rate": 9.293573765977215e-06, "loss": 0.4787, "step": 2764 }, { "epoch": 0.2, "grad_norm": 2.000182574000604, "learning_rate": 9.292984757518253e-06, "loss": 0.5411, "step": 2765 }, { "epoch": 0.2, "grad_norm": 1.6998394501514196, "learning_rate": 9.292395522289288e-06, "loss": 0.4716, "step": 2766 }, { "epoch": 0.2, "grad_norm": 1.7189695574857438, "learning_rate": 9.291806060321449e-06, "loss": 0.5608, "step": 2767 }, { "epoch": 0.2, "grad_norm": 2.110473683346567, "learning_rate": 9.29121637164587e-06, "loss": 0.5241, "step": 2768 }, { "epoch": 0.2, "grad_norm": 0.8534249147036781, "learning_rate": 9.290626456293701e-06, "loss": 0.4485, "step": 2769 }, { "epoch": 0.2, "grad_norm": 2.3891785787577096, "learning_rate": 9.290036314296104e-06, "loss": 0.6317, "step": 2770 }, { "epoch": 0.2, "grad_norm": 1.5502211596037836, "learning_rate": 9.289445945684255e-06, "loss": 0.5107, "step": 2771 }, { "epoch": 0.2, "grad_norm": 2.3289119559217335, "learning_rate": 9.288855350489334e-06, "loss": 0.5443, "step": 2772 }, { "epoch": 0.2, "grad_norm": 1.9206460516814714, "learning_rate": 9.288264528742542e-06, "loss": 0.5944, "step": 2773 }, { "epoch": 0.2, "grad_norm": 2.2887620921149585, "learning_rate": 9.287673480475086e-06, "loss": 0.5925, "step": 2774 }, { "epoch": 0.2, "grad_norm": 1.6678528460446758, "learning_rate": 9.28708220571819e-06, "loss": 0.4989, "step": 2775 }, { "epoch": 0.2, "grad_norm": 1.740611404542252, "learning_rate": 9.286490704503082e-06, "loss": 0.5704, "step": 2776 }, { "epoch": 0.2, "grad_norm": 1.5596327218244386, "learning_rate": 9.285898976861012e-06, "loss": 0.5804, "step": 2777 }, { "epoch": 0.2, "grad_norm": 2.0952128570318793, "learning_rate": 9.285307022823235e-06, "loss": 0.5894, "step": 2778 }, { "epoch": 0.2, "grad_norm": 1.791813107059716, "learning_rate": 9.284714842421022e-06, "loss": 0.5395, "step": 2779 }, { "epoch": 0.2, "grad_norm": 1.444412714915466, "learning_rate": 9.284122435685652e-06, "loss": 0.4949, "step": 2780 }, { "epoch": 0.2, "grad_norm": 2.2455118123722344, "learning_rate": 9.283529802648417e-06, "loss": 0.5367, "step": 2781 }, { "epoch": 0.2, "grad_norm": 2.7313459060112293, "learning_rate": 9.282936943340623e-06, "loss": 0.5786, "step": 2782 }, { "epoch": 0.2, "grad_norm": 1.5245273619710156, "learning_rate": 9.28234385779359e-06, "loss": 0.5678, "step": 2783 }, { "epoch": 0.2, "grad_norm": 2.1017104794512114, "learning_rate": 9.281750546038642e-06, "loss": 0.5955, "step": 2784 }, { "epoch": 0.2, "grad_norm": 1.9365813084822805, "learning_rate": 9.281157008107121e-06, "loss": 0.586, "step": 2785 }, { "epoch": 0.2, "grad_norm": 1.857077446994876, "learning_rate": 9.280563244030381e-06, "loss": 0.6133, "step": 2786 }, { "epoch": 0.2, "grad_norm": 1.9986495017904966, "learning_rate": 9.279969253839785e-06, "loss": 0.5311, "step": 2787 }, { "epoch": 0.2, "grad_norm": 1.7843919570753788, "learning_rate": 9.279375037566712e-06, "loss": 0.5904, "step": 2788 }, { "epoch": 0.2, "grad_norm": 1.9294770715789524, "learning_rate": 9.27878059524255e-06, "loss": 0.5729, "step": 2789 }, { "epoch": 0.2, "grad_norm": 2.0134654628970674, "learning_rate": 9.278185926898696e-06, "loss": 0.5938, "step": 2790 }, { "epoch": 0.2, "grad_norm": 2.515227871471421, "learning_rate": 9.277591032566565e-06, "loss": 0.6, "step": 2791 }, { "epoch": 0.2, "grad_norm": 2.0360184705045654, "learning_rate": 9.276995912277582e-06, "loss": 0.6007, "step": 2792 }, { "epoch": 0.2, "grad_norm": 0.8984035466232516, "learning_rate": 9.276400566063182e-06, "loss": 0.4697, "step": 2793 }, { "epoch": 0.2, "grad_norm": 1.9833654984613605, "learning_rate": 9.275804993954813e-06, "loss": 0.5487, "step": 2794 }, { "epoch": 0.2, "grad_norm": 1.8656881167730752, "learning_rate": 9.275209195983939e-06, "loss": 0.6146, "step": 2795 }, { "epoch": 0.2, "grad_norm": 2.037683052109617, "learning_rate": 9.274613172182025e-06, "loss": 0.6077, "step": 2796 }, { "epoch": 0.2, "grad_norm": 2.121614600583231, "learning_rate": 9.274016922580561e-06, "loss": 0.6099, "step": 2797 }, { "epoch": 0.2, "grad_norm": 1.7251638364429225, "learning_rate": 9.273420447211041e-06, "loss": 0.6012, "step": 2798 }, { "epoch": 0.2, "grad_norm": 1.4061280686221869, "learning_rate": 9.272823746104972e-06, "loss": 0.4862, "step": 2799 }, { "epoch": 0.2, "grad_norm": 1.3959926916935366, "learning_rate": 9.272226819293875e-06, "loss": 0.573, "step": 2800 }, { "epoch": 0.2, "grad_norm": 1.5441517903712119, "learning_rate": 9.27162966680928e-06, "loss": 0.5782, "step": 2801 }, { "epoch": 0.2, "grad_norm": 1.9604951272425812, "learning_rate": 9.271032288682732e-06, "loss": 0.5661, "step": 2802 }, { "epoch": 0.2, "grad_norm": 1.6611840679445224, "learning_rate": 9.270434684945788e-06, "loss": 0.6103, "step": 2803 }, { "epoch": 0.2, "grad_norm": 1.666850473547599, "learning_rate": 9.269836855630012e-06, "loss": 0.5854, "step": 2804 }, { "epoch": 0.2, "grad_norm": 1.9634289015052904, "learning_rate": 9.269238800766984e-06, "loss": 0.546, "step": 2805 }, { "epoch": 0.2, "grad_norm": 2.325465273646718, "learning_rate": 9.268640520388299e-06, "loss": 0.61, "step": 2806 }, { "epoch": 0.2, "grad_norm": 1.6763124790106425, "learning_rate": 9.268042014525556e-06, "loss": 0.5733, "step": 2807 }, { "epoch": 0.2, "grad_norm": 1.871979398344286, "learning_rate": 9.267443283210372e-06, "loss": 0.5879, "step": 2808 }, { "epoch": 0.2, "grad_norm": 1.8762517597804866, "learning_rate": 9.266844326474373e-06, "loss": 0.6609, "step": 2809 }, { "epoch": 0.2, "grad_norm": 1.622471112412664, "learning_rate": 9.266245144349201e-06, "loss": 0.6715, "step": 2810 }, { "epoch": 0.2, "grad_norm": 1.5812503040460713, "learning_rate": 9.265645736866502e-06, "loss": 0.5448, "step": 2811 }, { "epoch": 0.2, "grad_norm": 18.176651167269117, "learning_rate": 9.265046104057943e-06, "loss": 0.5652, "step": 2812 }, { "epoch": 0.2, "grad_norm": 1.7181365579228223, "learning_rate": 9.264446245955196e-06, "loss": 0.641, "step": 2813 }, { "epoch": 0.2, "grad_norm": 1.867773637121081, "learning_rate": 9.263846162589948e-06, "loss": 0.5624, "step": 2814 }, { "epoch": 0.2, "grad_norm": 2.093995861693024, "learning_rate": 9.263245853993899e-06, "loss": 0.5532, "step": 2815 }, { "epoch": 0.2, "grad_norm": 2.8234002771954954, "learning_rate": 9.262645320198757e-06, "loss": 0.5422, "step": 2816 }, { "epoch": 0.2, "grad_norm": 1.7635189003397638, "learning_rate": 9.262044561236246e-06, "loss": 0.5482, "step": 2817 }, { "epoch": 0.2, "grad_norm": 2.962676811626448, "learning_rate": 9.261443577138098e-06, "loss": 0.6296, "step": 2818 }, { "epoch": 0.2, "grad_norm": 1.8032146205806825, "learning_rate": 9.260842367936063e-06, "loss": 0.5946, "step": 2819 }, { "epoch": 0.2, "grad_norm": 1.7676538069669736, "learning_rate": 9.260240933661894e-06, "loss": 0.6219, "step": 2820 }, { "epoch": 0.2, "grad_norm": 2.133037881416358, "learning_rate": 9.259639274347365e-06, "loss": 0.5649, "step": 2821 }, { "epoch": 0.2, "grad_norm": 2.2544743080111105, "learning_rate": 9.259037390024254e-06, "loss": 0.6794, "step": 2822 }, { "epoch": 0.2, "grad_norm": 1.8091130409575953, "learning_rate": 9.258435280724359e-06, "loss": 0.5313, "step": 2823 }, { "epoch": 0.2, "grad_norm": 1.4620843542914332, "learning_rate": 9.257832946479481e-06, "loss": 0.5196, "step": 2824 }, { "epoch": 0.2, "grad_norm": 1.8066344207198557, "learning_rate": 9.25723038732144e-06, "loss": 0.5854, "step": 2825 }, { "epoch": 0.2, "grad_norm": 1.8705484742510068, "learning_rate": 9.256627603282065e-06, "loss": 0.5888, "step": 2826 }, { "epoch": 0.2, "grad_norm": 1.777180208071855, "learning_rate": 9.256024594393195e-06, "loss": 0.6505, "step": 2827 }, { "epoch": 0.2, "grad_norm": 1.6306733296660285, "learning_rate": 9.255421360686685e-06, "loss": 0.5417, "step": 2828 }, { "epoch": 0.2, "grad_norm": 1.6573642234594272, "learning_rate": 9.2548179021944e-06, "loss": 0.5872, "step": 2829 }, { "epoch": 0.2, "grad_norm": 1.865915024745197, "learning_rate": 9.254214218948217e-06, "loss": 0.6183, "step": 2830 }, { "epoch": 0.2, "grad_norm": 0.8541452353607615, "learning_rate": 9.253610310980023e-06, "loss": 0.5014, "step": 2831 }, { "epoch": 0.2, "grad_norm": 0.8707580861874467, "learning_rate": 9.253006178321719e-06, "loss": 0.4758, "step": 2832 }, { "epoch": 0.2, "grad_norm": 1.772135521118272, "learning_rate": 9.252401821005216e-06, "loss": 0.5851, "step": 2833 }, { "epoch": 0.2, "grad_norm": 4.192901032040957, "learning_rate": 9.25179723906244e-06, "loss": 0.6441, "step": 2834 }, { "epoch": 0.2, "grad_norm": 1.5529191963129034, "learning_rate": 9.251192432525328e-06, "loss": 0.5416, "step": 2835 }, { "epoch": 0.2, "grad_norm": 1.5464017444899205, "learning_rate": 9.250587401425828e-06, "loss": 0.5693, "step": 2836 }, { "epoch": 0.2, "grad_norm": 1.907646435883756, "learning_rate": 9.249982145795897e-06, "loss": 0.5664, "step": 2837 }, { "epoch": 0.2, "grad_norm": 14.67019518429775, "learning_rate": 9.249376665667509e-06, "loss": 0.554, "step": 2838 }, { "epoch": 0.2, "grad_norm": 1.6114690998487287, "learning_rate": 9.248770961072647e-06, "loss": 0.5143, "step": 2839 }, { "epoch": 0.2, "grad_norm": 1.6446154935481185, "learning_rate": 9.248165032043304e-06, "loss": 0.5739, "step": 2840 }, { "epoch": 0.2, "grad_norm": 2.290768937714046, "learning_rate": 9.247558878611489e-06, "loss": 0.5079, "step": 2841 }, { "epoch": 0.2, "grad_norm": 1.5423741829977375, "learning_rate": 9.246952500809223e-06, "loss": 0.6527, "step": 2842 }, { "epoch": 0.2, "grad_norm": 1.7169288362807484, "learning_rate": 9.246345898668534e-06, "loss": 0.514, "step": 2843 }, { "epoch": 0.2, "grad_norm": 1.7315466140140183, "learning_rate": 9.245739072221467e-06, "loss": 0.5914, "step": 2844 }, { "epoch": 0.2, "grad_norm": 1.997609481479007, "learning_rate": 9.245132021500076e-06, "loss": 0.6162, "step": 2845 }, { "epoch": 0.2, "grad_norm": 1.8144813243882123, "learning_rate": 9.244524746536427e-06, "loss": 0.5997, "step": 2846 }, { "epoch": 0.2, "grad_norm": 1.6601213351828352, "learning_rate": 9.243917247362597e-06, "loss": 0.5412, "step": 2847 }, { "epoch": 0.2, "grad_norm": 1.8440281798990021, "learning_rate": 9.24330952401068e-06, "loss": 0.6, "step": 2848 }, { "epoch": 0.2, "grad_norm": 1.7971340389663961, "learning_rate": 9.242701576512774e-06, "loss": 0.5972, "step": 2849 }, { "epoch": 0.2, "grad_norm": 2.138139461080658, "learning_rate": 9.242093404900995e-06, "loss": 0.5313, "step": 2850 }, { "epoch": 0.2, "grad_norm": 1.668173632279138, "learning_rate": 9.241485009207469e-06, "loss": 0.5332, "step": 2851 }, { "epoch": 0.2, "grad_norm": 1.6202216299479468, "learning_rate": 9.24087638946433e-06, "loss": 0.5339, "step": 2852 }, { "epoch": 0.2, "grad_norm": 1.250698925091978, "learning_rate": 9.240267545703733e-06, "loss": 0.4896, "step": 2853 }, { "epoch": 0.2, "grad_norm": 1.9421845662123198, "learning_rate": 9.239658477957837e-06, "loss": 0.6124, "step": 2854 }, { "epoch": 0.2, "grad_norm": 1.625590848636501, "learning_rate": 9.239049186258811e-06, "loss": 0.6287, "step": 2855 }, { "epoch": 0.2, "grad_norm": 1.7032259866182387, "learning_rate": 9.238439670638844e-06, "loss": 0.6122, "step": 2856 }, { "epoch": 0.2, "grad_norm": 1.6796453226253125, "learning_rate": 9.237829931130132e-06, "loss": 0.5678, "step": 2857 }, { "epoch": 0.2, "grad_norm": 0.7696402479926759, "learning_rate": 9.237219967764884e-06, "loss": 0.4786, "step": 2858 }, { "epoch": 0.2, "grad_norm": 1.8654624172911678, "learning_rate": 9.236609780575318e-06, "loss": 0.577, "step": 2859 }, { "epoch": 0.2, "grad_norm": 3.012730219312984, "learning_rate": 9.23599936959367e-06, "loss": 0.6212, "step": 2860 }, { "epoch": 0.2, "grad_norm": 1.6802048966716066, "learning_rate": 9.23538873485218e-06, "loss": 0.4906, "step": 2861 }, { "epoch": 0.2, "grad_norm": 1.6741699659185325, "learning_rate": 9.234777876383107e-06, "loss": 0.6046, "step": 2862 }, { "epoch": 0.2, "grad_norm": 1.7592852471663012, "learning_rate": 9.234166794218718e-06, "loss": 0.6263, "step": 2863 }, { "epoch": 0.2, "grad_norm": 1.737544124461475, "learning_rate": 9.233555488391289e-06, "loss": 0.5478, "step": 2864 }, { "epoch": 0.2, "grad_norm": 1.7589584993983127, "learning_rate": 9.232943958933117e-06, "loss": 0.6087, "step": 2865 }, { "epoch": 0.2, "grad_norm": 1.5481233501097502, "learning_rate": 9.232332205876498e-06, "loss": 0.5838, "step": 2866 }, { "epoch": 0.2, "grad_norm": 1.5694267599773661, "learning_rate": 9.231720229253753e-06, "loss": 0.516, "step": 2867 }, { "epoch": 0.2, "grad_norm": 1.4905733704470852, "learning_rate": 9.231108029097207e-06, "loss": 0.5854, "step": 2868 }, { "epoch": 0.2, "grad_norm": 1.6344975619870183, "learning_rate": 9.230495605439195e-06, "loss": 0.5488, "step": 2869 }, { "epoch": 0.2, "grad_norm": 1.6146690577385583, "learning_rate": 9.229882958312074e-06, "loss": 0.5881, "step": 2870 }, { "epoch": 0.2, "grad_norm": 2.538163768919168, "learning_rate": 9.229270087748201e-06, "loss": 0.5996, "step": 2871 }, { "epoch": 0.2, "grad_norm": 2.44517415467902, "learning_rate": 9.22865699377995e-06, "loss": 0.6062, "step": 2872 }, { "epoch": 0.2, "grad_norm": 1.9494840114356795, "learning_rate": 9.22804367643971e-06, "loss": 0.5271, "step": 2873 }, { "epoch": 0.2, "grad_norm": 0.9095871776060641, "learning_rate": 9.227430135759875e-06, "loss": 0.464, "step": 2874 }, { "epoch": 0.2, "grad_norm": 1.6421233792030172, "learning_rate": 9.226816371772855e-06, "loss": 0.539, "step": 2875 }, { "epoch": 0.2, "grad_norm": 1.7979969520696786, "learning_rate": 9.226202384511074e-06, "loss": 0.6137, "step": 2876 }, { "epoch": 0.2, "grad_norm": 2.586468132922063, "learning_rate": 9.225588174006962e-06, "loss": 0.6047, "step": 2877 }, { "epoch": 0.2, "grad_norm": 2.2536909154667213, "learning_rate": 9.224973740292964e-06, "loss": 0.4742, "step": 2878 }, { "epoch": 0.2, "grad_norm": 1.7964701908226992, "learning_rate": 9.224359083401538e-06, "loss": 0.5629, "step": 2879 }, { "epoch": 0.2, "grad_norm": 3.811896545565224, "learning_rate": 9.223744203365148e-06, "loss": 0.5241, "step": 2880 }, { "epoch": 0.2, "grad_norm": 1.8604214087018058, "learning_rate": 9.22312910021628e-06, "loss": 0.5737, "step": 2881 }, { "epoch": 0.2, "grad_norm": 1.8047742776489735, "learning_rate": 9.222513773987422e-06, "loss": 0.6403, "step": 2882 }, { "epoch": 0.2, "grad_norm": 2.0792131050815774, "learning_rate": 9.221898224711078e-06, "loss": 0.5815, "step": 2883 }, { "epoch": 0.2, "grad_norm": 1.8363959958204494, "learning_rate": 9.221282452419767e-06, "loss": 0.5625, "step": 2884 }, { "epoch": 0.2, "grad_norm": 1.696690120125854, "learning_rate": 9.220666457146011e-06, "loss": 0.6006, "step": 2885 }, { "epoch": 0.2, "grad_norm": 1.6124158671281108, "learning_rate": 9.220050238922351e-06, "loss": 0.5504, "step": 2886 }, { "epoch": 0.2, "grad_norm": 1.735127196658405, "learning_rate": 9.219433797781338e-06, "loss": 0.6253, "step": 2887 }, { "epoch": 0.2, "grad_norm": 1.6455567825470072, "learning_rate": 9.218817133755536e-06, "loss": 0.6307, "step": 2888 }, { "epoch": 0.21, "grad_norm": 0.8773132925293483, "learning_rate": 9.218200246877516e-06, "loss": 0.4761, "step": 2889 }, { "epoch": 0.21, "grad_norm": 1.646725963449686, "learning_rate": 9.217583137179864e-06, "loss": 0.5587, "step": 2890 }, { "epoch": 0.21, "grad_norm": 1.6807410412100026, "learning_rate": 9.216965804695182e-06, "loss": 0.5225, "step": 2891 }, { "epoch": 0.21, "grad_norm": 2.839811009167378, "learning_rate": 9.216348249456077e-06, "loss": 0.558, "step": 2892 }, { "epoch": 0.21, "grad_norm": 10.782692947323637, "learning_rate": 9.21573047149517e-06, "loss": 0.5606, "step": 2893 }, { "epoch": 0.21, "grad_norm": 1.501291564678969, "learning_rate": 9.215112470845096e-06, "loss": 0.5906, "step": 2894 }, { "epoch": 0.21, "grad_norm": 1.3754087025992447, "learning_rate": 9.214494247538497e-06, "loss": 0.5207, "step": 2895 }, { "epoch": 0.21, "grad_norm": 2.085231802716382, "learning_rate": 9.213875801608032e-06, "loss": 0.5792, "step": 2896 }, { "epoch": 0.21, "grad_norm": 1.7914285452031427, "learning_rate": 9.213257133086368e-06, "loss": 0.5254, "step": 2897 }, { "epoch": 0.21, "grad_norm": 2.334190065284943, "learning_rate": 9.212638242006186e-06, "loss": 0.647, "step": 2898 }, { "epoch": 0.21, "grad_norm": 0.9447230423904658, "learning_rate": 9.212019128400177e-06, "loss": 0.4836, "step": 2899 }, { "epoch": 0.21, "grad_norm": 1.6731341983341819, "learning_rate": 9.211399792301048e-06, "loss": 0.6168, "step": 2900 }, { "epoch": 0.21, "grad_norm": 1.6201399854113188, "learning_rate": 9.21078023374151e-06, "loss": 0.5598, "step": 2901 }, { "epoch": 0.21, "grad_norm": 1.731615926063343, "learning_rate": 9.210160452754292e-06, "loss": 0.5306, "step": 2902 }, { "epoch": 0.21, "grad_norm": 2.065902959931956, "learning_rate": 9.209540449372132e-06, "loss": 0.5845, "step": 2903 }, { "epoch": 0.21, "grad_norm": 1.8482994456875714, "learning_rate": 9.208920223627781e-06, "loss": 0.5331, "step": 2904 }, { "epoch": 0.21, "grad_norm": 1.652143875836721, "learning_rate": 9.208299775554004e-06, "loss": 0.5631, "step": 2905 }, { "epoch": 0.21, "grad_norm": 1.5254768629588285, "learning_rate": 9.207679105183573e-06, "loss": 0.5309, "step": 2906 }, { "epoch": 0.21, "grad_norm": 1.7927125695773027, "learning_rate": 9.207058212549273e-06, "loss": 0.532, "step": 2907 }, { "epoch": 0.21, "grad_norm": 1.4798839477708898, "learning_rate": 9.206437097683905e-06, "loss": 0.5859, "step": 2908 }, { "epoch": 0.21, "grad_norm": 3.567369089956538, "learning_rate": 9.205815760620274e-06, "loss": 0.5836, "step": 2909 }, { "epoch": 0.21, "grad_norm": 1.9159242475402147, "learning_rate": 9.205194201391204e-06, "loss": 0.6332, "step": 2910 }, { "epoch": 0.21, "grad_norm": 1.5423109971678717, "learning_rate": 9.204572420029527e-06, "loss": 0.5142, "step": 2911 }, { "epoch": 0.21, "grad_norm": 3.308868578908772, "learning_rate": 9.203950416568088e-06, "loss": 0.5626, "step": 2912 }, { "epoch": 0.21, "grad_norm": 1.676845206748622, "learning_rate": 9.203328191039742e-06, "loss": 0.5926, "step": 2913 }, { "epoch": 0.21, "grad_norm": 0.8533260914837673, "learning_rate": 9.20270574347736e-06, "loss": 0.4589, "step": 2914 }, { "epoch": 0.21, "grad_norm": 1.7810328913358453, "learning_rate": 9.20208307391382e-06, "loss": 0.5349, "step": 2915 }, { "epoch": 0.21, "grad_norm": 1.5765708446740923, "learning_rate": 9.201460182382012e-06, "loss": 0.6231, "step": 2916 }, { "epoch": 0.21, "grad_norm": 1.4799066828948022, "learning_rate": 9.20083706891484e-06, "loss": 0.6258, "step": 2917 }, { "epoch": 0.21, "grad_norm": 1.5957737138162542, "learning_rate": 9.200213733545221e-06, "loss": 0.5865, "step": 2918 }, { "epoch": 0.21, "grad_norm": 2.0543416602382885, "learning_rate": 9.19959017630608e-06, "loss": 0.6032, "step": 2919 }, { "epoch": 0.21, "grad_norm": 1.846278467960694, "learning_rate": 9.198966397230356e-06, "loss": 0.6174, "step": 2920 }, { "epoch": 0.21, "grad_norm": 1.6882515879058753, "learning_rate": 9.198342396350998e-06, "loss": 0.5394, "step": 2921 }, { "epoch": 0.21, "grad_norm": 1.977109529774458, "learning_rate": 9.19771817370097e-06, "loss": 0.5054, "step": 2922 }, { "epoch": 0.21, "grad_norm": 1.709262258868334, "learning_rate": 9.197093729313243e-06, "loss": 0.5326, "step": 2923 }, { "epoch": 0.21, "grad_norm": 1.7441989459637544, "learning_rate": 9.196469063220804e-06, "loss": 0.5914, "step": 2924 }, { "epoch": 0.21, "grad_norm": 2.0429338727372692, "learning_rate": 9.195844175456649e-06, "loss": 0.578, "step": 2925 }, { "epoch": 0.21, "grad_norm": 1.8170650906672365, "learning_rate": 9.195219066053787e-06, "loss": 0.6785, "step": 2926 }, { "epoch": 0.21, "grad_norm": 1.5271149899967154, "learning_rate": 9.194593735045242e-06, "loss": 0.5416, "step": 2927 }, { "epoch": 0.21, "grad_norm": 1.6289956453148233, "learning_rate": 9.193968182464039e-06, "loss": 0.5121, "step": 2928 }, { "epoch": 0.21, "grad_norm": 1.8133352683877872, "learning_rate": 9.193342408343227e-06, "loss": 0.5974, "step": 2929 }, { "epoch": 0.21, "grad_norm": 2.1691409271057283, "learning_rate": 9.192716412715858e-06, "loss": 0.5329, "step": 2930 }, { "epoch": 0.21, "grad_norm": 2.3941092741171044, "learning_rate": 9.192090195615004e-06, "loss": 0.6266, "step": 2931 }, { "epoch": 0.21, "grad_norm": 1.8610670101484466, "learning_rate": 9.191463757073739e-06, "loss": 0.5836, "step": 2932 }, { "epoch": 0.21, "grad_norm": 2.617513762639927, "learning_rate": 9.190837097125156e-06, "loss": 0.5215, "step": 2933 }, { "epoch": 0.21, "grad_norm": 1.7045783531611103, "learning_rate": 9.190210215802358e-06, "loss": 0.6331, "step": 2934 }, { "epoch": 0.21, "grad_norm": 1.689146392103963, "learning_rate": 9.189583113138458e-06, "loss": 0.6042, "step": 2935 }, { "epoch": 0.21, "grad_norm": 1.795051603133157, "learning_rate": 9.188955789166582e-06, "loss": 0.5732, "step": 2936 }, { "epoch": 0.21, "grad_norm": 1.8505155464140037, "learning_rate": 9.188328243919866e-06, "loss": 0.5008, "step": 2937 }, { "epoch": 0.21, "grad_norm": 1.6959415603088208, "learning_rate": 9.187700477431461e-06, "loss": 0.6485, "step": 2938 }, { "epoch": 0.21, "grad_norm": 1.5860746687300702, "learning_rate": 9.187072489734525e-06, "loss": 0.5644, "step": 2939 }, { "epoch": 0.21, "grad_norm": 1.656284732460159, "learning_rate": 9.186444280862235e-06, "loss": 0.5986, "step": 2940 }, { "epoch": 0.21, "grad_norm": 1.8266162060923723, "learning_rate": 9.18581585084777e-06, "loss": 0.5277, "step": 2941 }, { "epoch": 0.21, "grad_norm": 1.486235889249101, "learning_rate": 9.18518719972433e-06, "loss": 0.5234, "step": 2942 }, { "epoch": 0.21, "grad_norm": 1.3650488659449154, "learning_rate": 9.184558327525122e-06, "loss": 0.5241, "step": 2943 }, { "epoch": 0.21, "grad_norm": 1.4495895950530286, "learning_rate": 9.183929234283362e-06, "loss": 0.5361, "step": 2944 }, { "epoch": 0.21, "grad_norm": 1.5401359309107288, "learning_rate": 9.183299920032282e-06, "loss": 0.4995, "step": 2945 }, { "epoch": 0.21, "grad_norm": 1.8300843294979359, "learning_rate": 9.182670384805127e-06, "loss": 0.5308, "step": 2946 }, { "epoch": 0.21, "grad_norm": 3.184766244684131, "learning_rate": 9.18204062863515e-06, "loss": 0.5294, "step": 2947 }, { "epoch": 0.21, "grad_norm": 2.501208382325759, "learning_rate": 9.181410651555613e-06, "loss": 0.5696, "step": 2948 }, { "epoch": 0.21, "grad_norm": 2.7010771838225285, "learning_rate": 9.1807804535998e-06, "loss": 0.5228, "step": 2949 }, { "epoch": 0.21, "grad_norm": 1.723230036944767, "learning_rate": 9.180150034800996e-06, "loss": 0.53, "step": 2950 }, { "epoch": 0.21, "grad_norm": 1.5605804266265146, "learning_rate": 9.179519395192503e-06, "loss": 0.4408, "step": 2951 }, { "epoch": 0.21, "grad_norm": 2.1713932337073776, "learning_rate": 9.178888534807633e-06, "loss": 0.6159, "step": 2952 }, { "epoch": 0.21, "grad_norm": 0.8311380107728986, "learning_rate": 9.17825745367971e-06, "loss": 0.4805, "step": 2953 }, { "epoch": 0.21, "grad_norm": 0.7918724966847771, "learning_rate": 9.177626151842072e-06, "loss": 0.4876, "step": 2954 }, { "epoch": 0.21, "grad_norm": 0.9025730819301531, "learning_rate": 9.176994629328064e-06, "loss": 0.4764, "step": 2955 }, { "epoch": 0.21, "grad_norm": 4.221840446075136, "learning_rate": 9.176362886171047e-06, "loss": 0.5358, "step": 2956 }, { "epoch": 0.21, "grad_norm": 1.4546109512245118, "learning_rate": 9.17573092240439e-06, "loss": 0.5404, "step": 2957 }, { "epoch": 0.21, "grad_norm": 1.5904457778917604, "learning_rate": 9.175098738061477e-06, "loss": 0.5931, "step": 2958 }, { "epoch": 0.21, "grad_norm": 1.8026307151274432, "learning_rate": 9.1744663331757e-06, "loss": 0.5396, "step": 2959 }, { "epoch": 0.21, "grad_norm": 1.773431336814164, "learning_rate": 9.173833707780469e-06, "loss": 0.5343, "step": 2960 }, { "epoch": 0.21, "grad_norm": 1.7349186652520812, "learning_rate": 9.173200861909196e-06, "loss": 0.5274, "step": 2961 }, { "epoch": 0.21, "grad_norm": 1.6585229766432203, "learning_rate": 9.172567795595314e-06, "loss": 0.5584, "step": 2962 }, { "epoch": 0.21, "grad_norm": 1.7161871147778, "learning_rate": 9.171934508872262e-06, "loss": 0.5924, "step": 2963 }, { "epoch": 0.21, "grad_norm": 1.856979841271599, "learning_rate": 9.171301001773493e-06, "loss": 0.6003, "step": 2964 }, { "epoch": 0.21, "grad_norm": 1.6700972988581784, "learning_rate": 9.17066727433247e-06, "loss": 0.5651, "step": 2965 }, { "epoch": 0.21, "grad_norm": 1.6977608473129604, "learning_rate": 9.17003332658267e-06, "loss": 0.6016, "step": 2966 }, { "epoch": 0.21, "grad_norm": 1.5624041318138744, "learning_rate": 9.169399158557581e-06, "loss": 0.579, "step": 2967 }, { "epoch": 0.21, "grad_norm": 2.8438849536673403, "learning_rate": 9.1687647702907e-06, "loss": 0.5662, "step": 2968 }, { "epoch": 0.21, "grad_norm": 2.7454043905149943, "learning_rate": 9.168130161815538e-06, "loss": 0.5761, "step": 2969 }, { "epoch": 0.21, "grad_norm": 4.991668780322095, "learning_rate": 9.167495333165615e-06, "loss": 0.5257, "step": 2970 }, { "epoch": 0.21, "grad_norm": 1.6285655025080312, "learning_rate": 9.16686028437447e-06, "loss": 0.5972, "step": 2971 }, { "epoch": 0.21, "grad_norm": 1.7501827111969754, "learning_rate": 9.166225015475645e-06, "loss": 0.5755, "step": 2972 }, { "epoch": 0.21, "grad_norm": 1.6985119952914056, "learning_rate": 9.165589526502696e-06, "loss": 0.5695, "step": 2973 }, { "epoch": 0.21, "grad_norm": 1.7558358586557516, "learning_rate": 9.164953817489195e-06, "loss": 0.6293, "step": 2974 }, { "epoch": 0.21, "grad_norm": 0.9802599679421644, "learning_rate": 9.16431788846872e-06, "loss": 0.48, "step": 2975 }, { "epoch": 0.21, "grad_norm": 1.604141576923345, "learning_rate": 9.163681739474864e-06, "loss": 0.5808, "step": 2976 }, { "epoch": 0.21, "grad_norm": 1.605872711387335, "learning_rate": 9.163045370541229e-06, "loss": 0.5692, "step": 2977 }, { "epoch": 0.21, "grad_norm": 1.529910082676683, "learning_rate": 9.162408781701432e-06, "loss": 0.5131, "step": 2978 }, { "epoch": 0.21, "grad_norm": 1.6729249340760217, "learning_rate": 9.161771972989098e-06, "loss": 0.623, "step": 2979 }, { "epoch": 0.21, "grad_norm": 1.5859562145462096, "learning_rate": 9.161134944437868e-06, "loss": 0.559, "step": 2980 }, { "epoch": 0.21, "grad_norm": 1.6722943051443542, "learning_rate": 9.16049769608139e-06, "loss": 0.5306, "step": 2981 }, { "epoch": 0.21, "grad_norm": 1.7344162292165934, "learning_rate": 9.159860227953325e-06, "loss": 0.5083, "step": 2982 }, { "epoch": 0.21, "grad_norm": 1.487984191718565, "learning_rate": 9.159222540087347e-06, "loss": 0.4905, "step": 2983 }, { "epoch": 0.21, "grad_norm": 1.545422344028103, "learning_rate": 9.158584632517142e-06, "loss": 0.5813, "step": 2984 }, { "epoch": 0.21, "grad_norm": 1.6175615746384093, "learning_rate": 9.157946505276405e-06, "loss": 0.6, "step": 2985 }, { "epoch": 0.21, "grad_norm": 1.6946303444978852, "learning_rate": 9.157308158398846e-06, "loss": 0.579, "step": 2986 }, { "epoch": 0.21, "grad_norm": 2.446521766973696, "learning_rate": 9.15666959191818e-06, "loss": 0.4778, "step": 2987 }, { "epoch": 0.21, "grad_norm": 3.0929677688933928, "learning_rate": 9.156030805868144e-06, "loss": 0.5906, "step": 2988 }, { "epoch": 0.21, "grad_norm": 1.7452174914756822, "learning_rate": 9.155391800282477e-06, "loss": 0.5239, "step": 2989 }, { "epoch": 0.21, "grad_norm": 2.0798375651391963, "learning_rate": 9.154752575194936e-06, "loss": 0.6283, "step": 2990 }, { "epoch": 0.21, "grad_norm": 0.8712352574894433, "learning_rate": 9.154113130639286e-06, "loss": 0.451, "step": 2991 }, { "epoch": 0.21, "grad_norm": 2.0929544050817475, "learning_rate": 9.153473466649303e-06, "loss": 0.5189, "step": 2992 }, { "epoch": 0.21, "grad_norm": 1.5847481238704257, "learning_rate": 9.152833583258778e-06, "loss": 0.5178, "step": 2993 }, { "epoch": 0.21, "grad_norm": 1.603887266491553, "learning_rate": 9.152193480501512e-06, "loss": 0.4602, "step": 2994 }, { "epoch": 0.21, "grad_norm": 2.014600751045698, "learning_rate": 9.151553158411318e-06, "loss": 0.6141, "step": 2995 }, { "epoch": 0.21, "grad_norm": 1.5654636288362136, "learning_rate": 9.150912617022015e-06, "loss": 0.5854, "step": 2996 }, { "epoch": 0.21, "grad_norm": 1.5731182236432362, "learning_rate": 9.150271856367445e-06, "loss": 0.5288, "step": 2997 }, { "epoch": 0.21, "grad_norm": 1.9013412660735625, "learning_rate": 9.149630876481452e-06, "loss": 0.5863, "step": 2998 }, { "epoch": 0.21, "grad_norm": 1.6864579669043867, "learning_rate": 9.148989677397894e-06, "loss": 0.6295, "step": 2999 }, { "epoch": 0.21, "grad_norm": 1.528937163454691, "learning_rate": 9.148348259150645e-06, "loss": 0.5504, "step": 3000 }, { "epoch": 0.21, "grad_norm": 1.7665446986714837, "learning_rate": 9.147706621773583e-06, "loss": 0.568, "step": 3001 }, { "epoch": 0.21, "grad_norm": 1.6248087598036456, "learning_rate": 9.147064765300604e-06, "loss": 0.4851, "step": 3002 }, { "epoch": 0.21, "grad_norm": 1.8089490497103624, "learning_rate": 9.14642268976561e-06, "loss": 0.448, "step": 3003 }, { "epoch": 0.21, "grad_norm": 1.6990973885116376, "learning_rate": 9.145780395202522e-06, "loss": 0.5705, "step": 3004 }, { "epoch": 0.21, "grad_norm": 1.5957960742209898, "learning_rate": 9.145137881645262e-06, "loss": 0.5573, "step": 3005 }, { "epoch": 0.21, "grad_norm": 1.5207597940791149, "learning_rate": 9.144495149127777e-06, "loss": 0.5704, "step": 3006 }, { "epoch": 0.21, "grad_norm": 1.906382297447813, "learning_rate": 9.143852197684013e-06, "loss": 0.5566, "step": 3007 }, { "epoch": 0.21, "grad_norm": 1.006046244726447, "learning_rate": 9.143209027347936e-06, "loss": 0.4858, "step": 3008 }, { "epoch": 0.21, "grad_norm": 0.9325764639786617, "learning_rate": 9.142565638153518e-06, "loss": 0.4498, "step": 3009 }, { "epoch": 0.21, "grad_norm": 4.975213812059827, "learning_rate": 9.141922030134747e-06, "loss": 0.5527, "step": 3010 }, { "epoch": 0.21, "grad_norm": 2.5157513079606777, "learning_rate": 9.141278203325619e-06, "loss": 0.5798, "step": 3011 }, { "epoch": 0.21, "grad_norm": 1.826891708217817, "learning_rate": 9.140634157760142e-06, "loss": 0.5975, "step": 3012 }, { "epoch": 0.21, "grad_norm": 2.0737973727810335, "learning_rate": 9.13998989347234e-06, "loss": 0.5423, "step": 3013 }, { "epoch": 0.21, "grad_norm": 1.8106414184389292, "learning_rate": 9.139345410496244e-06, "loss": 0.5813, "step": 3014 }, { "epoch": 0.21, "grad_norm": 1.7278106931746051, "learning_rate": 9.138700708865898e-06, "loss": 0.5569, "step": 3015 }, { "epoch": 0.21, "grad_norm": 2.3353286441243455, "learning_rate": 9.138055788615355e-06, "loss": 0.5294, "step": 3016 }, { "epoch": 0.21, "grad_norm": 2.373752084948341, "learning_rate": 9.137410649778683e-06, "loss": 0.5667, "step": 3017 }, { "epoch": 0.21, "grad_norm": 1.5920221641691443, "learning_rate": 9.136765292389964e-06, "loss": 0.6153, "step": 3018 }, { "epoch": 0.21, "grad_norm": 1.5521995996842464, "learning_rate": 9.136119716483282e-06, "loss": 0.5316, "step": 3019 }, { "epoch": 0.21, "grad_norm": 1.7468828635103204, "learning_rate": 9.135473922092743e-06, "loss": 0.5768, "step": 3020 }, { "epoch": 0.21, "grad_norm": 1.9542218394067772, "learning_rate": 9.134827909252459e-06, "loss": 0.5349, "step": 3021 }, { "epoch": 0.21, "grad_norm": 1.7371380364882352, "learning_rate": 9.134181677996555e-06, "loss": 0.5789, "step": 3022 }, { "epoch": 0.21, "grad_norm": 1.6346140635644437, "learning_rate": 9.133535228359163e-06, "loss": 0.5584, "step": 3023 }, { "epoch": 0.21, "grad_norm": 2.0171674204507517, "learning_rate": 9.132888560374437e-06, "loss": 0.5166, "step": 3024 }, { "epoch": 0.21, "grad_norm": 2.1916088877102986, "learning_rate": 9.132241674076532e-06, "loss": 0.5822, "step": 3025 }, { "epoch": 0.21, "grad_norm": 2.0843796049119203, "learning_rate": 9.131594569499618e-06, "loss": 0.5895, "step": 3026 }, { "epoch": 0.21, "grad_norm": 1.9325327916467356, "learning_rate": 9.130947246677881e-06, "loss": 0.5577, "step": 3027 }, { "epoch": 0.21, "grad_norm": 1.7191324186902104, "learning_rate": 9.130299705645513e-06, "loss": 0.594, "step": 3028 }, { "epoch": 0.21, "grad_norm": 1.8799810582389074, "learning_rate": 9.12965194643672e-06, "loss": 0.4938, "step": 3029 }, { "epoch": 0.22, "grad_norm": 10.228057749373589, "learning_rate": 9.129003969085716e-06, "loss": 0.6013, "step": 3030 }, { "epoch": 0.22, "grad_norm": 1.8828906269914918, "learning_rate": 9.128355773626732e-06, "loss": 0.5556, "step": 3031 }, { "epoch": 0.22, "grad_norm": 1.3341773231749388, "learning_rate": 9.127707360094007e-06, "loss": 0.5167, "step": 3032 }, { "epoch": 0.22, "grad_norm": 1.5680382447268233, "learning_rate": 9.127058728521794e-06, "loss": 0.52, "step": 3033 }, { "epoch": 0.22, "grad_norm": 2.270468291645423, "learning_rate": 9.126409878944354e-06, "loss": 0.5164, "step": 3034 }, { "epoch": 0.22, "grad_norm": 1.68530375471036, "learning_rate": 9.125760811395961e-06, "loss": 0.5454, "step": 3035 }, { "epoch": 0.22, "grad_norm": 1.5971411973859018, "learning_rate": 9.125111525910902e-06, "loss": 0.5774, "step": 3036 }, { "epoch": 0.22, "grad_norm": 1.680310404666657, "learning_rate": 9.124462022523475e-06, "loss": 0.5864, "step": 3037 }, { "epoch": 0.22, "grad_norm": 1.782543093403225, "learning_rate": 9.12381230126799e-06, "loss": 0.5929, "step": 3038 }, { "epoch": 0.22, "grad_norm": 1.7694127654386875, "learning_rate": 9.123162362178763e-06, "loss": 0.5924, "step": 3039 }, { "epoch": 0.22, "grad_norm": 1.7572762086094833, "learning_rate": 9.122512205290131e-06, "loss": 0.6054, "step": 3040 }, { "epoch": 0.22, "grad_norm": 1.8324623581471517, "learning_rate": 9.121861830636434e-06, "loss": 0.5614, "step": 3041 }, { "epoch": 0.22, "grad_norm": 1.8773230770202354, "learning_rate": 9.121211238252028e-06, "loss": 0.6082, "step": 3042 }, { "epoch": 0.22, "grad_norm": 1.7622687320156118, "learning_rate": 9.120560428171281e-06, "loss": 0.5844, "step": 3043 }, { "epoch": 0.22, "grad_norm": 0.8691041338728247, "learning_rate": 9.11990940042857e-06, "loss": 0.4735, "step": 3044 }, { "epoch": 0.22, "grad_norm": 1.637574757987552, "learning_rate": 9.119258155058284e-06, "loss": 0.6148, "step": 3045 }, { "epoch": 0.22, "grad_norm": 1.4950484904436516, "learning_rate": 9.118606692094823e-06, "loss": 0.5578, "step": 3046 }, { "epoch": 0.22, "grad_norm": 3.1753274639163718, "learning_rate": 9.117955011572601e-06, "loss": 0.5191, "step": 3047 }, { "epoch": 0.22, "grad_norm": 1.5911642832126338, "learning_rate": 9.117303113526044e-06, "loss": 0.5152, "step": 3048 }, { "epoch": 0.22, "grad_norm": 1.7274471061849384, "learning_rate": 9.116650997989584e-06, "loss": 0.5988, "step": 3049 }, { "epoch": 0.22, "grad_norm": 1.7384893576992186, "learning_rate": 9.115998664997667e-06, "loss": 0.5839, "step": 3050 }, { "epoch": 0.22, "grad_norm": 3.744277285075216, "learning_rate": 9.115346114584757e-06, "loss": 0.6705, "step": 3051 }, { "epoch": 0.22, "grad_norm": 2.966783849680409, "learning_rate": 9.11469334678532e-06, "loss": 0.5768, "step": 3052 }, { "epoch": 0.22, "grad_norm": 1.876745539937525, "learning_rate": 9.114040361633836e-06, "loss": 0.4991, "step": 3053 }, { "epoch": 0.22, "grad_norm": 1.7567659054278457, "learning_rate": 9.1133871591648e-06, "loss": 0.462, "step": 3054 }, { "epoch": 0.22, "grad_norm": 1.7899616000209557, "learning_rate": 9.112733739412717e-06, "loss": 0.6415, "step": 3055 }, { "epoch": 0.22, "grad_norm": 1.830926609022795, "learning_rate": 9.112080102412101e-06, "loss": 0.6188, "step": 3056 }, { "epoch": 0.22, "grad_norm": 2.1085799532131833, "learning_rate": 9.111426248197484e-06, "loss": 0.5691, "step": 3057 }, { "epoch": 0.22, "grad_norm": 1.7710242467532016, "learning_rate": 9.110772176803397e-06, "loss": 0.5637, "step": 3058 }, { "epoch": 0.22, "grad_norm": 1.916134213040013, "learning_rate": 9.110117888264398e-06, "loss": 0.4841, "step": 3059 }, { "epoch": 0.22, "grad_norm": 1.7840681423713853, "learning_rate": 9.109463382615042e-06, "loss": 0.5923, "step": 3060 }, { "epoch": 0.22, "grad_norm": 2.2195526516796105, "learning_rate": 9.108808659889906e-06, "loss": 0.523, "step": 3061 }, { "epoch": 0.22, "grad_norm": 1.8849076598026915, "learning_rate": 9.108153720123576e-06, "loss": 0.5638, "step": 3062 }, { "epoch": 0.22, "grad_norm": 1.8728901290843416, "learning_rate": 9.107498563350644e-06, "loss": 0.6224, "step": 3063 }, { "epoch": 0.22, "grad_norm": 1.5080919307313716, "learning_rate": 9.106843189605721e-06, "loss": 0.5547, "step": 3064 }, { "epoch": 0.22, "grad_norm": 1.5080909963625597, "learning_rate": 9.106187598923425e-06, "loss": 0.6043, "step": 3065 }, { "epoch": 0.22, "grad_norm": 1.9581393788959849, "learning_rate": 9.105531791338384e-06, "loss": 0.5505, "step": 3066 }, { "epoch": 0.22, "grad_norm": 1.6855386456156267, "learning_rate": 9.104875766885245e-06, "loss": 0.5715, "step": 3067 }, { "epoch": 0.22, "grad_norm": 1.7009417088530405, "learning_rate": 9.104219525598657e-06, "loss": 0.5999, "step": 3068 }, { "epoch": 0.22, "grad_norm": 2.0000097016941893, "learning_rate": 9.103563067513289e-06, "loss": 0.5904, "step": 3069 }, { "epoch": 0.22, "grad_norm": 2.0738803553262453, "learning_rate": 9.102906392663812e-06, "loss": 0.5837, "step": 3070 }, { "epoch": 0.22, "grad_norm": 1.7028827670068771, "learning_rate": 9.102249501084918e-06, "loss": 0.6051, "step": 3071 }, { "epoch": 0.22, "grad_norm": 1.8530091896005512, "learning_rate": 9.101592392811307e-06, "loss": 0.6064, "step": 3072 }, { "epoch": 0.22, "grad_norm": 2.1651244429805545, "learning_rate": 9.100935067877685e-06, "loss": 0.5033, "step": 3073 }, { "epoch": 0.22, "grad_norm": 1.6567172547293063, "learning_rate": 9.100277526318779e-06, "loss": 0.5601, "step": 3074 }, { "epoch": 0.22, "grad_norm": 2.1674195417716895, "learning_rate": 9.099619768169318e-06, "loss": 0.5464, "step": 3075 }, { "epoch": 0.22, "grad_norm": 1.7140369806289208, "learning_rate": 9.098961793464051e-06, "loss": 0.5528, "step": 3076 }, { "epoch": 0.22, "grad_norm": 1.7321959108250378, "learning_rate": 9.098303602237734e-06, "loss": 0.4875, "step": 3077 }, { "epoch": 0.22, "grad_norm": 2.3027362519402357, "learning_rate": 9.097645194525132e-06, "loss": 0.5923, "step": 3078 }, { "epoch": 0.22, "grad_norm": 3.5778195118329523, "learning_rate": 9.096986570361025e-06, "loss": 0.6187, "step": 3079 }, { "epoch": 0.22, "grad_norm": 1.5452349765821802, "learning_rate": 9.096327729780208e-06, "loss": 0.509, "step": 3080 }, { "epoch": 0.22, "grad_norm": 1.7513362263263774, "learning_rate": 9.09566867281748e-06, "loss": 0.475, "step": 3081 }, { "epoch": 0.22, "grad_norm": 1.6869460697049827, "learning_rate": 9.095009399507651e-06, "loss": 0.5845, "step": 3082 }, { "epoch": 0.22, "grad_norm": 2.1525324766876475, "learning_rate": 9.094349909885553e-06, "loss": 0.6148, "step": 3083 }, { "epoch": 0.22, "grad_norm": 1.4732447370896398, "learning_rate": 9.093690203986018e-06, "loss": 0.5398, "step": 3084 }, { "epoch": 0.22, "grad_norm": 2.121187467243923, "learning_rate": 9.093030281843896e-06, "loss": 0.5999, "step": 3085 }, { "epoch": 0.22, "grad_norm": 1.7718578575274462, "learning_rate": 9.092370143494043e-06, "loss": 0.6064, "step": 3086 }, { "epoch": 0.22, "grad_norm": 1.6220264694283442, "learning_rate": 9.091709788971335e-06, "loss": 0.5375, "step": 3087 }, { "epoch": 0.22, "grad_norm": 0.9744193033401805, "learning_rate": 9.09104921831065e-06, "loss": 0.4611, "step": 3088 }, { "epoch": 0.22, "grad_norm": 1.5289811049002087, "learning_rate": 9.090388431546882e-06, "loss": 0.6079, "step": 3089 }, { "epoch": 0.22, "grad_norm": 1.6565973319460905, "learning_rate": 9.089727428714938e-06, "loss": 0.575, "step": 3090 }, { "epoch": 0.22, "grad_norm": 1.6525236450410181, "learning_rate": 9.089066209849734e-06, "loss": 0.627, "step": 3091 }, { "epoch": 0.22, "grad_norm": 1.4995656755075468, "learning_rate": 9.088404774986197e-06, "loss": 0.5593, "step": 3092 }, { "epoch": 0.22, "grad_norm": 1.7881781445518559, "learning_rate": 9.087743124159265e-06, "loss": 0.5466, "step": 3093 }, { "epoch": 0.22, "grad_norm": 1.7028781818879257, "learning_rate": 9.087081257403891e-06, "loss": 0.5726, "step": 3094 }, { "epoch": 0.22, "grad_norm": 5.093367862754871, "learning_rate": 9.086419174755037e-06, "loss": 0.5511, "step": 3095 }, { "epoch": 0.22, "grad_norm": 2.0645638652715963, "learning_rate": 9.085756876247673e-06, "loss": 0.6291, "step": 3096 }, { "epoch": 0.22, "grad_norm": 1.6252950775012807, "learning_rate": 9.08509436191679e-06, "loss": 0.5694, "step": 3097 }, { "epoch": 0.22, "grad_norm": 1.9635615007090768, "learning_rate": 9.084431631797376e-06, "loss": 0.5548, "step": 3098 }, { "epoch": 0.22, "grad_norm": 0.8718469414504493, "learning_rate": 9.083768685924447e-06, "loss": 0.4481, "step": 3099 }, { "epoch": 0.22, "grad_norm": 1.5713198604391758, "learning_rate": 9.083105524333017e-06, "loss": 0.571, "step": 3100 }, { "epoch": 0.22, "grad_norm": 1.727830394815153, "learning_rate": 9.082442147058118e-06, "loss": 0.548, "step": 3101 }, { "epoch": 0.22, "grad_norm": 2.1173326917806565, "learning_rate": 9.081778554134792e-06, "loss": 0.6017, "step": 3102 }, { "epoch": 0.22, "grad_norm": 1.5990508472947116, "learning_rate": 9.081114745598089e-06, "loss": 0.6131, "step": 3103 }, { "epoch": 0.22, "grad_norm": 1.8748652438810725, "learning_rate": 9.08045072148308e-06, "loss": 0.5421, "step": 3104 }, { "epoch": 0.22, "grad_norm": 1.9680056113957995, "learning_rate": 9.079786481824837e-06, "loss": 0.5655, "step": 3105 }, { "epoch": 0.22, "grad_norm": 1.5654685817106608, "learning_rate": 9.079122026658447e-06, "loss": 0.5789, "step": 3106 }, { "epoch": 0.22, "grad_norm": 1.4785686646714231, "learning_rate": 9.078457356019009e-06, "loss": 0.5951, "step": 3107 }, { "epoch": 0.22, "grad_norm": 1.7930021746256048, "learning_rate": 9.077792469941634e-06, "loss": 0.5721, "step": 3108 }, { "epoch": 0.22, "grad_norm": 1.8132628413020246, "learning_rate": 9.077127368461443e-06, "loss": 0.5646, "step": 3109 }, { "epoch": 0.22, "grad_norm": 2.008328865431607, "learning_rate": 9.076462051613571e-06, "loss": 0.5708, "step": 3110 }, { "epoch": 0.22, "grad_norm": 1.9843715015650085, "learning_rate": 9.07579651943316e-06, "loss": 0.5827, "step": 3111 }, { "epoch": 0.22, "grad_norm": 1.6756450947493984, "learning_rate": 9.075130771955363e-06, "loss": 0.5647, "step": 3112 }, { "epoch": 0.22, "grad_norm": 1.6676190701104636, "learning_rate": 9.074464809215353e-06, "loss": 0.5165, "step": 3113 }, { "epoch": 0.22, "grad_norm": 0.8352266492758975, "learning_rate": 9.073798631248304e-06, "loss": 0.4389, "step": 3114 }, { "epoch": 0.22, "grad_norm": 1.7183609672210136, "learning_rate": 9.07313223808941e-06, "loss": 0.5506, "step": 3115 }, { "epoch": 0.22, "grad_norm": 1.6881205091380187, "learning_rate": 9.072465629773868e-06, "loss": 0.5869, "step": 3116 }, { "epoch": 0.22, "grad_norm": 2.0244066563487637, "learning_rate": 9.071798806336891e-06, "loss": 0.5687, "step": 3117 }, { "epoch": 0.22, "grad_norm": 1.8098370941200939, "learning_rate": 9.071131767813706e-06, "loss": 0.6398, "step": 3118 }, { "epoch": 0.22, "grad_norm": 5.437316812534526, "learning_rate": 9.070464514239546e-06, "loss": 0.5939, "step": 3119 }, { "epoch": 0.22, "grad_norm": 0.9179022188886208, "learning_rate": 9.069797045649657e-06, "loss": 0.4554, "step": 3120 }, { "epoch": 0.22, "grad_norm": 1.9506358614996155, "learning_rate": 9.069129362079297e-06, "loss": 0.5876, "step": 3121 }, { "epoch": 0.22, "grad_norm": 5.486386582874587, "learning_rate": 9.068461463563737e-06, "loss": 0.5857, "step": 3122 }, { "epoch": 0.22, "grad_norm": 1.5037629719161791, "learning_rate": 9.067793350138256e-06, "loss": 0.5586, "step": 3123 }, { "epoch": 0.22, "grad_norm": 1.4931773545957066, "learning_rate": 9.067125021838148e-06, "loss": 0.4816, "step": 3124 }, { "epoch": 0.22, "grad_norm": 1.7887614862282148, "learning_rate": 9.066456478698713e-06, "loss": 0.547, "step": 3125 }, { "epoch": 0.22, "grad_norm": 1.6005205231940767, "learning_rate": 9.06578772075527e-06, "loss": 0.5714, "step": 3126 }, { "epoch": 0.22, "grad_norm": 1.7270619725929421, "learning_rate": 9.06511874804314e-06, "loss": 0.5639, "step": 3127 }, { "epoch": 0.22, "grad_norm": 1.654115368339888, "learning_rate": 9.064449560597665e-06, "loss": 0.6503, "step": 3128 }, { "epoch": 0.22, "grad_norm": 2.0449235359035933, "learning_rate": 9.063780158454192e-06, "loss": 0.6487, "step": 3129 }, { "epoch": 0.22, "grad_norm": 1.5104037662690675, "learning_rate": 9.063110541648082e-06, "loss": 0.5165, "step": 3130 }, { "epoch": 0.22, "grad_norm": 1.5848126807708425, "learning_rate": 9.062440710214705e-06, "loss": 0.5708, "step": 3131 }, { "epoch": 0.22, "grad_norm": 1.736122500201738, "learning_rate": 9.061770664189442e-06, "loss": 0.5551, "step": 3132 }, { "epoch": 0.22, "grad_norm": 3.367558166958934, "learning_rate": 9.061100403607691e-06, "loss": 0.6132, "step": 3133 }, { "epoch": 0.22, "grad_norm": 1.654540643172597, "learning_rate": 9.060429928504857e-06, "loss": 0.6636, "step": 3134 }, { "epoch": 0.22, "grad_norm": 1.6438141788245617, "learning_rate": 9.059759238916353e-06, "loss": 0.5702, "step": 3135 }, { "epoch": 0.22, "grad_norm": 1.7403698333898585, "learning_rate": 9.059088334877611e-06, "loss": 0.582, "step": 3136 }, { "epoch": 0.22, "grad_norm": 1.6329809783966027, "learning_rate": 9.05841721642407e-06, "loss": 0.5948, "step": 3137 }, { "epoch": 0.22, "grad_norm": 2.0586932552627926, "learning_rate": 9.057745883591178e-06, "loss": 0.5378, "step": 3138 }, { "epoch": 0.22, "grad_norm": 1.977287579514969, "learning_rate": 9.0570743364144e-06, "loss": 0.5554, "step": 3139 }, { "epoch": 0.22, "grad_norm": 1.5794454937333202, "learning_rate": 9.05640257492921e-06, "loss": 0.5125, "step": 3140 }, { "epoch": 0.22, "grad_norm": 1.596614052956998, "learning_rate": 9.055730599171089e-06, "loss": 0.554, "step": 3141 }, { "epoch": 0.22, "grad_norm": 1.8867744802359427, "learning_rate": 9.055058409175534e-06, "loss": 0.5656, "step": 3142 }, { "epoch": 0.22, "grad_norm": 1.7118194274901917, "learning_rate": 9.054386004978056e-06, "loss": 0.5474, "step": 3143 }, { "epoch": 0.22, "grad_norm": 0.8479823257073037, "learning_rate": 9.053713386614169e-06, "loss": 0.4517, "step": 3144 }, { "epoch": 0.22, "grad_norm": 1.8656134619377513, "learning_rate": 9.053040554119405e-06, "loss": 0.6026, "step": 3145 }, { "epoch": 0.22, "grad_norm": 1.5209063837707966, "learning_rate": 9.052367507529307e-06, "loss": 0.5906, "step": 3146 }, { "epoch": 0.22, "grad_norm": 2.261674115424456, "learning_rate": 9.051694246879425e-06, "loss": 0.5342, "step": 3147 }, { "epoch": 0.22, "grad_norm": 1.988044036841759, "learning_rate": 9.051020772205323e-06, "loss": 0.4756, "step": 3148 }, { "epoch": 0.22, "grad_norm": 1.4624886894755698, "learning_rate": 9.050347083542579e-06, "loss": 0.5502, "step": 3149 }, { "epoch": 0.22, "grad_norm": 1.879854095228839, "learning_rate": 9.049673180926776e-06, "loss": 0.5773, "step": 3150 }, { "epoch": 0.22, "grad_norm": 0.8808234635941111, "learning_rate": 9.048999064393515e-06, "loss": 0.4609, "step": 3151 }, { "epoch": 0.22, "grad_norm": 1.3100963012882794, "learning_rate": 9.048324733978403e-06, "loss": 0.5195, "step": 3152 }, { "epoch": 0.22, "grad_norm": 1.4767594867468434, "learning_rate": 9.047650189717059e-06, "loss": 0.5254, "step": 3153 }, { "epoch": 0.22, "grad_norm": 1.5961991802631599, "learning_rate": 9.046975431645118e-06, "loss": 0.5706, "step": 3154 }, { "epoch": 0.22, "grad_norm": 1.509472653220536, "learning_rate": 9.046300459798224e-06, "loss": 0.5875, "step": 3155 }, { "epoch": 0.22, "grad_norm": 2.5321711694578064, "learning_rate": 9.045625274212026e-06, "loss": 0.6784, "step": 3156 }, { "epoch": 0.22, "grad_norm": 1.6907334929537008, "learning_rate": 9.044949874922193e-06, "loss": 0.5262, "step": 3157 }, { "epoch": 0.22, "grad_norm": 3.356484201048197, "learning_rate": 9.044274261964402e-06, "loss": 0.5843, "step": 3158 }, { "epoch": 0.22, "grad_norm": 0.8543757115051713, "learning_rate": 9.04359843537434e-06, "loss": 0.4665, "step": 3159 }, { "epoch": 0.22, "grad_norm": 0.7805994287073599, "learning_rate": 9.042922395187707e-06, "loss": 0.4514, "step": 3160 }, { "epoch": 0.22, "grad_norm": 1.949001517945895, "learning_rate": 9.042246141440215e-06, "loss": 0.6188, "step": 3161 }, { "epoch": 0.22, "grad_norm": 1.438168256474461, "learning_rate": 9.041569674167584e-06, "loss": 0.6087, "step": 3162 }, { "epoch": 0.22, "grad_norm": 2.2430618881643043, "learning_rate": 9.040892993405548e-06, "loss": 0.5177, "step": 3163 }, { "epoch": 0.22, "grad_norm": 1.7107059465929517, "learning_rate": 9.040216099189853e-06, "loss": 0.5733, "step": 3164 }, { "epoch": 0.22, "grad_norm": 1.8738235906946177, "learning_rate": 9.039538991556251e-06, "loss": 0.5719, "step": 3165 }, { "epoch": 0.22, "grad_norm": 0.9156006139823603, "learning_rate": 9.038861670540515e-06, "loss": 0.4705, "step": 3166 }, { "epoch": 0.22, "grad_norm": 1.8856080482980109, "learning_rate": 9.038184136178418e-06, "loss": 0.5264, "step": 3167 }, { "epoch": 0.22, "grad_norm": 2.0971354295252937, "learning_rate": 9.037506388505752e-06, "loss": 0.5696, "step": 3168 }, { "epoch": 0.22, "grad_norm": 0.7787253738797677, "learning_rate": 9.036828427558318e-06, "loss": 0.4745, "step": 3169 }, { "epoch": 0.22, "grad_norm": 1.8408295328731465, "learning_rate": 9.036150253371925e-06, "loss": 0.5924, "step": 3170 }, { "epoch": 0.23, "grad_norm": 1.4882093061664055, "learning_rate": 9.035471865982403e-06, "loss": 0.5788, "step": 3171 }, { "epoch": 0.23, "grad_norm": 1.5899424147773533, "learning_rate": 9.034793265425581e-06, "loss": 0.5356, "step": 3172 }, { "epoch": 0.23, "grad_norm": 2.1798846969232093, "learning_rate": 9.034114451737308e-06, "loss": 0.6126, "step": 3173 }, { "epoch": 0.23, "grad_norm": 1.6979950496862446, "learning_rate": 9.03343542495344e-06, "loss": 0.5724, "step": 3174 }, { "epoch": 0.23, "grad_norm": 1.5276463098277349, "learning_rate": 9.032756185109846e-06, "loss": 0.5126, "step": 3175 }, { "epoch": 0.23, "grad_norm": 1.5902395120542454, "learning_rate": 9.032076732242402e-06, "loss": 0.5489, "step": 3176 }, { "epoch": 0.23, "grad_norm": 1.5175120820650618, "learning_rate": 9.031397066387007e-06, "loss": 0.5854, "step": 3177 }, { "epoch": 0.23, "grad_norm": 0.9342772166868714, "learning_rate": 9.030717187579556e-06, "loss": 0.4609, "step": 3178 }, { "epoch": 0.23, "grad_norm": 1.6831787485820295, "learning_rate": 9.030037095855969e-06, "loss": 0.6257, "step": 3179 }, { "epoch": 0.23, "grad_norm": 1.8740556813943692, "learning_rate": 9.029356791252162e-06, "loss": 0.5001, "step": 3180 }, { "epoch": 0.23, "grad_norm": 1.5362067362839642, "learning_rate": 9.028676273804078e-06, "loss": 0.5865, "step": 3181 }, { "epoch": 0.23, "grad_norm": 1.7996518819584841, "learning_rate": 9.027995543547663e-06, "loss": 0.5435, "step": 3182 }, { "epoch": 0.23, "grad_norm": 1.7024688883722179, "learning_rate": 9.027314600518874e-06, "loss": 0.5781, "step": 3183 }, { "epoch": 0.23, "grad_norm": 1.514506012791769, "learning_rate": 9.026633444753681e-06, "loss": 0.6121, "step": 3184 }, { "epoch": 0.23, "grad_norm": 2.08311835939007, "learning_rate": 9.025952076288066e-06, "loss": 0.5907, "step": 3185 }, { "epoch": 0.23, "grad_norm": 1.8302151715246762, "learning_rate": 9.02527049515802e-06, "loss": 0.5682, "step": 3186 }, { "epoch": 0.23, "grad_norm": 1.5123927115275206, "learning_rate": 9.024588701399548e-06, "loss": 0.5855, "step": 3187 }, { "epoch": 0.23, "grad_norm": 1.6105341535733377, "learning_rate": 9.023906695048663e-06, "loss": 0.5223, "step": 3188 }, { "epoch": 0.23, "grad_norm": 1.8365580546536424, "learning_rate": 9.023224476141392e-06, "loss": 0.576, "step": 3189 }, { "epoch": 0.23, "grad_norm": 2.7792567424905457, "learning_rate": 9.022542044713772e-06, "loss": 0.6261, "step": 3190 }, { "epoch": 0.23, "grad_norm": 2.0840321674602618, "learning_rate": 9.021859400801849e-06, "loss": 0.6255, "step": 3191 }, { "epoch": 0.23, "grad_norm": 1.6197389342296826, "learning_rate": 9.021176544441686e-06, "loss": 0.56, "step": 3192 }, { "epoch": 0.23, "grad_norm": 1.668489064341011, "learning_rate": 9.020493475669351e-06, "loss": 0.4814, "step": 3193 }, { "epoch": 0.23, "grad_norm": 2.566645945335119, "learning_rate": 9.019810194520929e-06, "loss": 0.5516, "step": 3194 }, { "epoch": 0.23, "grad_norm": 1.5689433753110227, "learning_rate": 9.01912670103251e-06, "loss": 0.5338, "step": 3195 }, { "epoch": 0.23, "grad_norm": 1.523335427887631, "learning_rate": 9.018442995240203e-06, "loss": 0.5333, "step": 3196 }, { "epoch": 0.23, "grad_norm": 1.6807787147791986, "learning_rate": 9.017759077180117e-06, "loss": 0.5311, "step": 3197 }, { "epoch": 0.23, "grad_norm": 1.649720565030237, "learning_rate": 9.017074946888383e-06, "loss": 0.5973, "step": 3198 }, { "epoch": 0.23, "grad_norm": 2.0174542827834907, "learning_rate": 9.01639060440114e-06, "loss": 0.5156, "step": 3199 }, { "epoch": 0.23, "grad_norm": 2.127181017777319, "learning_rate": 9.015706049754536e-06, "loss": 0.5636, "step": 3200 }, { "epoch": 0.23, "grad_norm": 2.7979103044509253, "learning_rate": 9.015021282984731e-06, "loss": 0.5667, "step": 3201 }, { "epoch": 0.23, "grad_norm": 1.5984173861538533, "learning_rate": 9.014336304127896e-06, "loss": 0.4766, "step": 3202 }, { "epoch": 0.23, "grad_norm": 1.5740663042673293, "learning_rate": 9.013651113220216e-06, "loss": 0.6078, "step": 3203 }, { "epoch": 0.23, "grad_norm": 1.7331960630207204, "learning_rate": 9.012965710297885e-06, "loss": 0.5903, "step": 3204 }, { "epoch": 0.23, "grad_norm": 1.8477794178742395, "learning_rate": 9.012280095397106e-06, "loss": 0.6071, "step": 3205 }, { "epoch": 0.23, "grad_norm": 2.1115818076535753, "learning_rate": 9.011594268554097e-06, "loss": 0.5624, "step": 3206 }, { "epoch": 0.23, "grad_norm": 1.55228582078478, "learning_rate": 9.010908229805086e-06, "loss": 0.574, "step": 3207 }, { "epoch": 0.23, "grad_norm": 1.989748746259987, "learning_rate": 9.01022197918631e-06, "loss": 0.5657, "step": 3208 }, { "epoch": 0.23, "grad_norm": 0.8006187945217477, "learning_rate": 9.009535516734023e-06, "loss": 0.4857, "step": 3209 }, { "epoch": 0.23, "grad_norm": 0.9356296153936299, "learning_rate": 9.008848842484482e-06, "loss": 0.49, "step": 3210 }, { "epoch": 0.23, "grad_norm": 0.8320898937708464, "learning_rate": 9.008161956473962e-06, "loss": 0.4812, "step": 3211 }, { "epoch": 0.23, "grad_norm": 1.747559472593972, "learning_rate": 9.007474858738748e-06, "loss": 0.6297, "step": 3212 }, { "epoch": 0.23, "grad_norm": 2.8690172696322045, "learning_rate": 9.00678754931513e-06, "loss": 0.6248, "step": 3213 }, { "epoch": 0.23, "grad_norm": 1.645209238416417, "learning_rate": 9.006100028239418e-06, "loss": 0.5483, "step": 3214 }, { "epoch": 0.23, "grad_norm": 1.8269677384049103, "learning_rate": 9.005412295547927e-06, "loss": 0.6445, "step": 3215 }, { "epoch": 0.23, "grad_norm": 1.7625315294528323, "learning_rate": 9.004724351276989e-06, "loss": 0.5177, "step": 3216 }, { "epoch": 0.23, "grad_norm": 0.9442415497824801, "learning_rate": 9.004036195462938e-06, "loss": 0.496, "step": 3217 }, { "epoch": 0.23, "grad_norm": 1.573267773611724, "learning_rate": 9.003347828142126e-06, "loss": 0.4935, "step": 3218 }, { "epoch": 0.23, "grad_norm": 1.6265894398535938, "learning_rate": 9.00265924935092e-06, "loss": 0.6053, "step": 3219 }, { "epoch": 0.23, "grad_norm": 2.082022885554193, "learning_rate": 9.001970459125689e-06, "loss": 0.593, "step": 3220 }, { "epoch": 0.23, "grad_norm": 1.7503239234522106, "learning_rate": 9.001281457502818e-06, "loss": 0.6292, "step": 3221 }, { "epoch": 0.23, "grad_norm": 0.808309658184656, "learning_rate": 9.000592244518701e-06, "loss": 0.4479, "step": 3222 }, { "epoch": 0.23, "grad_norm": 1.6458250955201286, "learning_rate": 8.999902820209747e-06, "loss": 0.5052, "step": 3223 }, { "epoch": 0.23, "grad_norm": 1.9860024271040086, "learning_rate": 8.999213184612371e-06, "loss": 0.5959, "step": 3224 }, { "epoch": 0.23, "grad_norm": 1.482142967295738, "learning_rate": 8.998523337763005e-06, "loss": 0.4915, "step": 3225 }, { "epoch": 0.23, "grad_norm": 0.8492377750495772, "learning_rate": 8.997833279698088e-06, "loss": 0.4359, "step": 3226 }, { "epoch": 0.23, "grad_norm": 1.9519200731969515, "learning_rate": 8.997143010454069e-06, "loss": 0.5845, "step": 3227 }, { "epoch": 0.23, "grad_norm": 1.7414482384857435, "learning_rate": 8.996452530067413e-06, "loss": 0.5914, "step": 3228 }, { "epoch": 0.23, "grad_norm": 1.8617919274310695, "learning_rate": 8.995761838574591e-06, "loss": 0.5384, "step": 3229 }, { "epoch": 0.23, "grad_norm": 1.6887187479243637, "learning_rate": 8.99507093601209e-06, "loss": 0.5944, "step": 3230 }, { "epoch": 0.23, "grad_norm": 1.8697429769591354, "learning_rate": 8.994379822416405e-06, "loss": 0.6304, "step": 3231 }, { "epoch": 0.23, "grad_norm": 1.8737299216826195, "learning_rate": 8.993688497824044e-06, "loss": 0.5256, "step": 3232 }, { "epoch": 0.23, "grad_norm": 1.5871851110323085, "learning_rate": 8.992996962271523e-06, "loss": 0.5815, "step": 3233 }, { "epoch": 0.23, "grad_norm": 0.8170394597898282, "learning_rate": 8.992305215795373e-06, "loss": 0.4574, "step": 3234 }, { "epoch": 0.23, "grad_norm": 1.6425415738908165, "learning_rate": 8.991613258432132e-06, "loss": 0.4784, "step": 3235 }, { "epoch": 0.23, "grad_norm": 1.5787527376473423, "learning_rate": 8.990921090218355e-06, "loss": 0.555, "step": 3236 }, { "epoch": 0.23, "grad_norm": 1.9433371348693589, "learning_rate": 8.990228711190603e-06, "loss": 0.4688, "step": 3237 }, { "epoch": 0.23, "grad_norm": 1.723896395017194, "learning_rate": 8.98953612138545e-06, "loss": 0.5957, "step": 3238 }, { "epoch": 0.23, "grad_norm": 1.617093047604041, "learning_rate": 8.98884332083948e-06, "loss": 0.5168, "step": 3239 }, { "epoch": 0.23, "grad_norm": 1.5806182537705642, "learning_rate": 8.98815030958929e-06, "loss": 0.6095, "step": 3240 }, { "epoch": 0.23, "grad_norm": 0.8058443448221734, "learning_rate": 8.987457087671485e-06, "loss": 0.4419, "step": 3241 }, { "epoch": 0.23, "grad_norm": 1.7281444572734632, "learning_rate": 8.986763655122689e-06, "loss": 0.4612, "step": 3242 }, { "epoch": 0.23, "grad_norm": 1.7229801781965843, "learning_rate": 8.986070011979524e-06, "loss": 0.5727, "step": 3243 }, { "epoch": 0.23, "grad_norm": 2.152121734131126, "learning_rate": 8.985376158278636e-06, "loss": 0.5907, "step": 3244 }, { "epoch": 0.23, "grad_norm": 1.7848930296593741, "learning_rate": 8.984682094056676e-06, "loss": 0.5802, "step": 3245 }, { "epoch": 0.23, "grad_norm": 3.2085837570433986, "learning_rate": 8.983987819350303e-06, "loss": 0.6336, "step": 3246 }, { "epoch": 0.23, "grad_norm": 1.823813665256233, "learning_rate": 8.983293334196197e-06, "loss": 0.5401, "step": 3247 }, { "epoch": 0.23, "grad_norm": 1.5727330068275822, "learning_rate": 8.982598638631038e-06, "loss": 0.5829, "step": 3248 }, { "epoch": 0.23, "grad_norm": 1.910220020160539, "learning_rate": 8.981903732691525e-06, "loss": 0.6025, "step": 3249 }, { "epoch": 0.23, "grad_norm": 2.0193488620608098, "learning_rate": 8.981208616414363e-06, "loss": 0.5942, "step": 3250 }, { "epoch": 0.23, "grad_norm": 1.7100276560604157, "learning_rate": 8.980513289836272e-06, "loss": 0.5197, "step": 3251 }, { "epoch": 0.23, "grad_norm": 1.3657679583905071, "learning_rate": 8.979817752993982e-06, "loss": 0.4892, "step": 3252 }, { "epoch": 0.23, "grad_norm": 2.4157532542283082, "learning_rate": 8.979122005924232e-06, "loss": 0.6404, "step": 3253 }, { "epoch": 0.23, "grad_norm": 1.5478093824588186, "learning_rate": 8.978426048663776e-06, "loss": 0.6122, "step": 3254 }, { "epoch": 0.23, "grad_norm": 1.7843355529890397, "learning_rate": 8.977729881249375e-06, "loss": 0.6285, "step": 3255 }, { "epoch": 0.23, "grad_norm": 1.8957762452006313, "learning_rate": 8.977033503717803e-06, "loss": 0.5759, "step": 3256 }, { "epoch": 0.23, "grad_norm": 2.3846621747530152, "learning_rate": 8.976336916105844e-06, "loss": 0.5404, "step": 3257 }, { "epoch": 0.23, "grad_norm": 2.122295679397442, "learning_rate": 8.975640118450297e-06, "loss": 0.5601, "step": 3258 }, { "epoch": 0.23, "grad_norm": 1.8064780726434821, "learning_rate": 8.974943110787968e-06, "loss": 0.614, "step": 3259 }, { "epoch": 0.23, "grad_norm": 0.7978490931661911, "learning_rate": 8.974245893155673e-06, "loss": 0.4461, "step": 3260 }, { "epoch": 0.23, "grad_norm": 1.653306588106065, "learning_rate": 8.973548465590244e-06, "loss": 0.5917, "step": 3261 }, { "epoch": 0.23, "grad_norm": 1.9903412843688097, "learning_rate": 8.97285082812852e-06, "loss": 0.6436, "step": 3262 }, { "epoch": 0.23, "grad_norm": 2.6452481561831447, "learning_rate": 8.972152980807357e-06, "loss": 0.5578, "step": 3263 }, { "epoch": 0.23, "grad_norm": 1.8024570153417603, "learning_rate": 8.971454923663611e-06, "loss": 0.627, "step": 3264 }, { "epoch": 0.23, "grad_norm": 1.7620173399466177, "learning_rate": 8.97075665673416e-06, "loss": 0.5361, "step": 3265 }, { "epoch": 0.23, "grad_norm": 2.3496644424368154, "learning_rate": 8.970058180055887e-06, "loss": 0.548, "step": 3266 }, { "epoch": 0.23, "grad_norm": 2.44453808603195, "learning_rate": 8.969359493665688e-06, "loss": 0.5887, "step": 3267 }, { "epoch": 0.23, "grad_norm": 0.8088939148600929, "learning_rate": 8.968660597600472e-06, "loss": 0.4512, "step": 3268 }, { "epoch": 0.23, "grad_norm": 1.7675821167537464, "learning_rate": 8.967961491897155e-06, "loss": 0.5384, "step": 3269 }, { "epoch": 0.23, "grad_norm": 1.987730266513659, "learning_rate": 8.967262176592665e-06, "loss": 0.6607, "step": 3270 }, { "epoch": 0.23, "grad_norm": 2.820200398762011, "learning_rate": 8.966562651723947e-06, "loss": 0.5531, "step": 3271 }, { "epoch": 0.23, "grad_norm": 1.730497142660675, "learning_rate": 8.965862917327947e-06, "loss": 0.6658, "step": 3272 }, { "epoch": 0.23, "grad_norm": 1.6933763895642082, "learning_rate": 8.96516297344163e-06, "loss": 0.5852, "step": 3273 }, { "epoch": 0.23, "grad_norm": 1.603094362094811, "learning_rate": 8.96446282010197e-06, "loss": 0.5811, "step": 3274 }, { "epoch": 0.23, "grad_norm": 1.6547792106296562, "learning_rate": 8.963762457345948e-06, "loss": 0.5711, "step": 3275 }, { "epoch": 0.23, "grad_norm": 1.7972955789274427, "learning_rate": 8.963061885210563e-06, "loss": 0.5815, "step": 3276 }, { "epoch": 0.23, "grad_norm": 1.843281373499341, "learning_rate": 8.962361103732822e-06, "loss": 0.5329, "step": 3277 }, { "epoch": 0.23, "grad_norm": 1.8133405167481242, "learning_rate": 8.96166011294974e-06, "loss": 0.5404, "step": 3278 }, { "epoch": 0.23, "grad_norm": 1.8177104335973644, "learning_rate": 8.960958912898347e-06, "loss": 0.6811, "step": 3279 }, { "epoch": 0.23, "grad_norm": 1.6230752306347012, "learning_rate": 8.960257503615682e-06, "loss": 0.5814, "step": 3280 }, { "epoch": 0.23, "grad_norm": 1.5580439386903668, "learning_rate": 8.959555885138798e-06, "loss": 0.5811, "step": 3281 }, { "epoch": 0.23, "grad_norm": 2.7026019795984744, "learning_rate": 8.958854057504754e-06, "loss": 0.5171, "step": 3282 }, { "epoch": 0.23, "grad_norm": 1.5972451889926471, "learning_rate": 8.958152020750624e-06, "loss": 0.5405, "step": 3283 }, { "epoch": 0.23, "grad_norm": 1.8961367685518233, "learning_rate": 8.957449774913493e-06, "loss": 0.6329, "step": 3284 }, { "epoch": 0.23, "grad_norm": 2.126515352611859, "learning_rate": 8.956747320030457e-06, "loss": 0.5431, "step": 3285 }, { "epoch": 0.23, "grad_norm": 2.2571681359566056, "learning_rate": 8.956044656138617e-06, "loss": 0.6407, "step": 3286 }, { "epoch": 0.23, "grad_norm": 1.6908411525009268, "learning_rate": 8.955341783275096e-06, "loss": 0.5712, "step": 3287 }, { "epoch": 0.23, "grad_norm": 1.6308039919470703, "learning_rate": 8.954638701477018e-06, "loss": 0.5519, "step": 3288 }, { "epoch": 0.23, "grad_norm": 1.7898155686375137, "learning_rate": 8.953935410781523e-06, "loss": 0.5571, "step": 3289 }, { "epoch": 0.23, "grad_norm": 1.7908501788722158, "learning_rate": 8.953231911225763e-06, "loss": 0.5231, "step": 3290 }, { "epoch": 0.23, "grad_norm": 1.6947720245475335, "learning_rate": 8.952528202846897e-06, "loss": 0.5707, "step": 3291 }, { "epoch": 0.23, "grad_norm": 2.950233266605976, "learning_rate": 8.9518242856821e-06, "loss": 0.6193, "step": 3292 }, { "epoch": 0.23, "grad_norm": 0.8330861751099594, "learning_rate": 8.951120159768553e-06, "loss": 0.4408, "step": 3293 }, { "epoch": 0.23, "grad_norm": 1.6140889342677536, "learning_rate": 8.95041582514345e-06, "loss": 0.578, "step": 3294 }, { "epoch": 0.23, "grad_norm": 1.7151110918857462, "learning_rate": 8.949711281843998e-06, "loss": 0.5773, "step": 3295 }, { "epoch": 0.23, "grad_norm": 1.5358286755630441, "learning_rate": 8.949006529907413e-06, "loss": 0.5822, "step": 3296 }, { "epoch": 0.23, "grad_norm": 1.6785762716337438, "learning_rate": 8.94830156937092e-06, "loss": 0.5059, "step": 3297 }, { "epoch": 0.23, "grad_norm": 1.7224452668726724, "learning_rate": 8.947596400271763e-06, "loss": 0.5991, "step": 3298 }, { "epoch": 0.23, "grad_norm": 2.028695672369617, "learning_rate": 8.946891022647185e-06, "loss": 0.5832, "step": 3299 }, { "epoch": 0.23, "grad_norm": 1.7498686806762282, "learning_rate": 8.946185436534452e-06, "loss": 0.6337, "step": 3300 }, { "epoch": 0.23, "grad_norm": 2.5404427633307125, "learning_rate": 8.94547964197083e-06, "loss": 0.5747, "step": 3301 }, { "epoch": 0.23, "grad_norm": 1.8592883144946082, "learning_rate": 8.944773638993604e-06, "loss": 0.5421, "step": 3302 }, { "epoch": 0.23, "grad_norm": 0.768196745042951, "learning_rate": 8.94406742764007e-06, "loss": 0.4802, "step": 3303 }, { "epoch": 0.23, "grad_norm": 1.8652791121099208, "learning_rate": 8.943361007947529e-06, "loss": 0.6477, "step": 3304 }, { "epoch": 0.23, "grad_norm": 1.697992567049026, "learning_rate": 8.942654379953297e-06, "loss": 0.5566, "step": 3305 }, { "epoch": 0.23, "grad_norm": 4.174228271284152, "learning_rate": 8.941947543694703e-06, "loss": 0.5926, "step": 3306 }, { "epoch": 0.23, "grad_norm": 1.8688475633301662, "learning_rate": 8.94124049920908e-06, "loss": 0.6027, "step": 3307 }, { "epoch": 0.23, "grad_norm": 0.808972929261961, "learning_rate": 8.940533246533781e-06, "loss": 0.4498, "step": 3308 }, { "epoch": 0.23, "grad_norm": 1.8525659670635315, "learning_rate": 8.939825785706163e-06, "loss": 0.5317, "step": 3309 }, { "epoch": 0.23, "grad_norm": 1.5646139743883554, "learning_rate": 8.939118116763597e-06, "loss": 0.6509, "step": 3310 }, { "epoch": 0.23, "grad_norm": 2.1153692934694477, "learning_rate": 8.938410239743465e-06, "loss": 0.5817, "step": 3311 }, { "epoch": 0.24, "grad_norm": 1.5861706958437227, "learning_rate": 8.937702154683159e-06, "loss": 0.5315, "step": 3312 }, { "epoch": 0.24, "grad_norm": 1.6783010290126583, "learning_rate": 8.936993861620081e-06, "loss": 0.5762, "step": 3313 }, { "epoch": 0.24, "grad_norm": 1.4207336485531172, "learning_rate": 8.936285360591648e-06, "loss": 0.5195, "step": 3314 }, { "epoch": 0.24, "grad_norm": 1.4901147640304615, "learning_rate": 8.935576651635285e-06, "loss": 0.5021, "step": 3315 }, { "epoch": 0.24, "grad_norm": 1.8584633026477153, "learning_rate": 8.934867734788427e-06, "loss": 0.5761, "step": 3316 }, { "epoch": 0.24, "grad_norm": 3.4855327972593932, "learning_rate": 8.934158610088521e-06, "loss": 0.5331, "step": 3317 }, { "epoch": 0.24, "grad_norm": 1.6355755804752488, "learning_rate": 8.933449277573028e-06, "loss": 0.619, "step": 3318 }, { "epoch": 0.24, "grad_norm": 2.031665369824033, "learning_rate": 8.932739737279414e-06, "loss": 0.5384, "step": 3319 }, { "epoch": 0.24, "grad_norm": 2.0872524464633355, "learning_rate": 8.932029989245164e-06, "loss": 0.5349, "step": 3320 }, { "epoch": 0.24, "grad_norm": 1.658323116315646, "learning_rate": 8.931320033507765e-06, "loss": 0.4947, "step": 3321 }, { "epoch": 0.24, "grad_norm": 0.8787613605800726, "learning_rate": 8.93060987010472e-06, "loss": 0.4497, "step": 3322 }, { "epoch": 0.24, "grad_norm": 1.7105677114175144, "learning_rate": 8.929899499073542e-06, "loss": 0.5302, "step": 3323 }, { "epoch": 0.24, "grad_norm": 1.6672822051694582, "learning_rate": 8.929188920451759e-06, "loss": 0.4877, "step": 3324 }, { "epoch": 0.24, "grad_norm": 1.7299424805297468, "learning_rate": 8.928478134276902e-06, "loss": 0.5733, "step": 3325 }, { "epoch": 0.24, "grad_norm": 1.756923900437499, "learning_rate": 8.927767140586518e-06, "loss": 0.5907, "step": 3326 }, { "epoch": 0.24, "grad_norm": 1.6963719683103526, "learning_rate": 8.927055939418165e-06, "loss": 0.5676, "step": 3327 }, { "epoch": 0.24, "grad_norm": 2.4182498960978727, "learning_rate": 8.92634453080941e-06, "loss": 0.5551, "step": 3328 }, { "epoch": 0.24, "grad_norm": 1.6592001965495446, "learning_rate": 8.925632914797833e-06, "loss": 0.5682, "step": 3329 }, { "epoch": 0.24, "grad_norm": 1.753568725380367, "learning_rate": 8.924921091421024e-06, "loss": 0.5487, "step": 3330 }, { "epoch": 0.24, "grad_norm": 1.956388164722983, "learning_rate": 8.924209060716583e-06, "loss": 0.6135, "step": 3331 }, { "epoch": 0.24, "grad_norm": 2.099965269566592, "learning_rate": 8.923496822722122e-06, "loss": 0.588, "step": 3332 }, { "epoch": 0.24, "grad_norm": 1.4940765816120898, "learning_rate": 8.922784377475266e-06, "loss": 0.5625, "step": 3333 }, { "epoch": 0.24, "grad_norm": 1.5454030610067244, "learning_rate": 8.922071725013646e-06, "loss": 0.5806, "step": 3334 }, { "epoch": 0.24, "grad_norm": 1.653357446448182, "learning_rate": 8.921358865374908e-06, "loss": 0.5975, "step": 3335 }, { "epoch": 0.24, "grad_norm": 3.3097044656074774, "learning_rate": 8.920645798596705e-06, "loss": 0.5498, "step": 3336 }, { "epoch": 0.24, "grad_norm": 2.081765124086054, "learning_rate": 8.919932524716707e-06, "loss": 0.5942, "step": 3337 }, { "epoch": 0.24, "grad_norm": 1.5045136450084275, "learning_rate": 8.919219043772592e-06, "loss": 0.5252, "step": 3338 }, { "epoch": 0.24, "grad_norm": 1.7119010101119012, "learning_rate": 8.918505355802046e-06, "loss": 0.5382, "step": 3339 }, { "epoch": 0.24, "grad_norm": 1.6773596881699404, "learning_rate": 8.917791460842771e-06, "loss": 0.5272, "step": 3340 }, { "epoch": 0.24, "grad_norm": 1.6090438517733556, "learning_rate": 8.917077358932473e-06, "loss": 0.5696, "step": 3341 }, { "epoch": 0.24, "grad_norm": 1.5445909479200173, "learning_rate": 8.916363050108879e-06, "loss": 0.57, "step": 3342 }, { "epoch": 0.24, "grad_norm": 1.5366314102451488, "learning_rate": 8.915648534409715e-06, "loss": 0.63, "step": 3343 }, { "epoch": 0.24, "grad_norm": 1.5809792752028764, "learning_rate": 8.91493381187273e-06, "loss": 0.5486, "step": 3344 }, { "epoch": 0.24, "grad_norm": 1.8303657366632156, "learning_rate": 8.914218882535675e-06, "loss": 0.5413, "step": 3345 }, { "epoch": 0.24, "grad_norm": 2.12940205536627, "learning_rate": 8.913503746436314e-06, "loss": 0.5493, "step": 3346 }, { "epoch": 0.24, "grad_norm": 1.651953553757289, "learning_rate": 8.912788403612425e-06, "loss": 0.5587, "step": 3347 }, { "epoch": 0.24, "grad_norm": 1.836977329857448, "learning_rate": 8.912072854101794e-06, "loss": 0.5836, "step": 3348 }, { "epoch": 0.24, "grad_norm": 1.8057506398122625, "learning_rate": 8.91135709794222e-06, "loss": 0.5843, "step": 3349 }, { "epoch": 0.24, "grad_norm": 1.9644616700122761, "learning_rate": 8.91064113517151e-06, "loss": 0.5307, "step": 3350 }, { "epoch": 0.24, "grad_norm": 1.5492554918806145, "learning_rate": 8.909924965827485e-06, "loss": 0.5292, "step": 3351 }, { "epoch": 0.24, "grad_norm": 1.7878326015603088, "learning_rate": 8.909208589947973e-06, "loss": 0.5877, "step": 3352 }, { "epoch": 0.24, "grad_norm": 1.474988075861754, "learning_rate": 8.908492007570819e-06, "loss": 0.5416, "step": 3353 }, { "epoch": 0.24, "grad_norm": 1.789721349190109, "learning_rate": 8.907775218733871e-06, "loss": 0.6279, "step": 3354 }, { "epoch": 0.24, "grad_norm": 1.467179814555534, "learning_rate": 8.907058223474996e-06, "loss": 0.6018, "step": 3355 }, { "epoch": 0.24, "grad_norm": 1.670231345152266, "learning_rate": 8.906341021832066e-06, "loss": 0.5894, "step": 3356 }, { "epoch": 0.24, "grad_norm": 1.884425969363166, "learning_rate": 8.905623613842969e-06, "loss": 0.6718, "step": 3357 }, { "epoch": 0.24, "grad_norm": 1.7952880177154242, "learning_rate": 8.904905999545597e-06, "loss": 0.5906, "step": 3358 }, { "epoch": 0.24, "grad_norm": 1.8631934855750314, "learning_rate": 8.904188178977858e-06, "loss": 0.5935, "step": 3359 }, { "epoch": 0.24, "grad_norm": 1.567379934753429, "learning_rate": 8.90347015217767e-06, "loss": 0.6269, "step": 3360 }, { "epoch": 0.24, "grad_norm": 1.6529429353106653, "learning_rate": 8.902751919182963e-06, "loss": 0.5607, "step": 3361 }, { "epoch": 0.24, "grad_norm": 1.6019177893335002, "learning_rate": 8.902033480031675e-06, "loss": 0.5891, "step": 3362 }, { "epoch": 0.24, "grad_norm": 0.9561059703314159, "learning_rate": 8.901314834761756e-06, "loss": 0.459, "step": 3363 }, { "epoch": 0.24, "grad_norm": 1.5701810338608446, "learning_rate": 8.90059598341117e-06, "loss": 0.6448, "step": 3364 }, { "epoch": 0.24, "grad_norm": 1.7579357258391803, "learning_rate": 8.899876926017884e-06, "loss": 0.5768, "step": 3365 }, { "epoch": 0.24, "grad_norm": 1.8822086995804015, "learning_rate": 8.899157662619887e-06, "loss": 0.4743, "step": 3366 }, { "epoch": 0.24, "grad_norm": 1.640191817878203, "learning_rate": 8.898438193255168e-06, "loss": 0.5748, "step": 3367 }, { "epoch": 0.24, "grad_norm": 2.3754973119027984, "learning_rate": 8.897718517961734e-06, "loss": 0.5376, "step": 3368 }, { "epoch": 0.24, "grad_norm": 2.795766066676986, "learning_rate": 8.896998636777602e-06, "loss": 0.6181, "step": 3369 }, { "epoch": 0.24, "grad_norm": 1.756328554450575, "learning_rate": 8.896278549740796e-06, "loss": 0.5956, "step": 3370 }, { "epoch": 0.24, "grad_norm": 1.875577226567001, "learning_rate": 8.895558256889355e-06, "loss": 0.5247, "step": 3371 }, { "epoch": 0.24, "grad_norm": 1.5920212481848186, "learning_rate": 8.894837758261327e-06, "loss": 0.5351, "step": 3372 }, { "epoch": 0.24, "grad_norm": 1.7383617817183121, "learning_rate": 8.89411705389477e-06, "loss": 0.5822, "step": 3373 }, { "epoch": 0.24, "grad_norm": 0.9004882643421701, "learning_rate": 8.893396143827757e-06, "loss": 0.465, "step": 3374 }, { "epoch": 0.24, "grad_norm": 1.973180669785003, "learning_rate": 8.892675028098367e-06, "loss": 0.6266, "step": 3375 }, { "epoch": 0.24, "grad_norm": 1.9163886443697267, "learning_rate": 8.891953706744691e-06, "loss": 0.5053, "step": 3376 }, { "epoch": 0.24, "grad_norm": 1.8373515300485945, "learning_rate": 8.891232179804833e-06, "loss": 0.528, "step": 3377 }, { "epoch": 0.24, "grad_norm": 0.7884193753225434, "learning_rate": 8.890510447316907e-06, "loss": 0.4567, "step": 3378 }, { "epoch": 0.24, "grad_norm": 2.0303612042905983, "learning_rate": 8.889788509319034e-06, "loss": 0.645, "step": 3379 }, { "epoch": 0.24, "grad_norm": 1.7819371578531362, "learning_rate": 8.889066365849352e-06, "loss": 0.5679, "step": 3380 }, { "epoch": 0.24, "grad_norm": 0.812632882646329, "learning_rate": 8.888344016946008e-06, "loss": 0.4913, "step": 3381 }, { "epoch": 0.24, "grad_norm": 1.6883289673978974, "learning_rate": 8.887621462647156e-06, "loss": 0.5341, "step": 3382 }, { "epoch": 0.24, "grad_norm": 2.1197389049767414, "learning_rate": 8.886898702990965e-06, "loss": 0.5816, "step": 3383 }, { "epoch": 0.24, "grad_norm": 1.7154665383166137, "learning_rate": 8.886175738015617e-06, "loss": 0.5666, "step": 3384 }, { "epoch": 0.24, "grad_norm": 2.315699147210011, "learning_rate": 8.885452567759293e-06, "loss": 0.4998, "step": 3385 }, { "epoch": 0.24, "grad_norm": 1.6252700104780606, "learning_rate": 8.884729192260205e-06, "loss": 0.5501, "step": 3386 }, { "epoch": 0.24, "grad_norm": 1.6910362817364695, "learning_rate": 8.884005611556553e-06, "loss": 0.6264, "step": 3387 }, { "epoch": 0.24, "grad_norm": 2.4809789604562, "learning_rate": 8.883281825686565e-06, "loss": 0.5238, "step": 3388 }, { "epoch": 0.24, "grad_norm": 1.6604285330805015, "learning_rate": 8.882557834688473e-06, "loss": 0.5357, "step": 3389 }, { "epoch": 0.24, "grad_norm": 2.509300837483833, "learning_rate": 8.881833638600521e-06, "loss": 0.6907, "step": 3390 }, { "epoch": 0.24, "grad_norm": 6.521583384248977, "learning_rate": 8.881109237460962e-06, "loss": 0.6326, "step": 3391 }, { "epoch": 0.24, "grad_norm": 1.4958846142070614, "learning_rate": 8.880384631308062e-06, "loss": 0.5146, "step": 3392 }, { "epoch": 0.24, "grad_norm": 2.0713138150248116, "learning_rate": 8.879659820180097e-06, "loss": 0.5547, "step": 3393 }, { "epoch": 0.24, "grad_norm": 1.4740386639486667, "learning_rate": 8.878934804115355e-06, "loss": 0.5566, "step": 3394 }, { "epoch": 0.24, "grad_norm": 0.8685163236541731, "learning_rate": 8.878209583152134e-06, "loss": 0.466, "step": 3395 }, { "epoch": 0.24, "grad_norm": 1.7252380446520137, "learning_rate": 8.87748415732874e-06, "loss": 0.6246, "step": 3396 }, { "epoch": 0.24, "grad_norm": 0.8148128572497462, "learning_rate": 8.876758526683494e-06, "loss": 0.4801, "step": 3397 }, { "epoch": 0.24, "grad_norm": 2.0870913257741712, "learning_rate": 8.876032691254725e-06, "loss": 0.6095, "step": 3398 }, { "epoch": 0.24, "grad_norm": 8.13031208517316, "learning_rate": 8.875306651080778e-06, "loss": 0.5549, "step": 3399 }, { "epoch": 0.24, "grad_norm": 3.569192650303241, "learning_rate": 8.874580406200001e-06, "loss": 0.5437, "step": 3400 }, { "epoch": 0.24, "grad_norm": 1.486524647574541, "learning_rate": 8.87385395665076e-06, "loss": 0.5438, "step": 3401 }, { "epoch": 0.24, "grad_norm": 1.6504746906589784, "learning_rate": 8.873127302471425e-06, "loss": 0.5285, "step": 3402 }, { "epoch": 0.24, "grad_norm": 0.8758467923005988, "learning_rate": 8.872400443700383e-06, "loss": 0.4774, "step": 3403 }, { "epoch": 0.24, "grad_norm": 1.8882105758588144, "learning_rate": 8.871673380376026e-06, "loss": 0.6176, "step": 3404 }, { "epoch": 0.24, "grad_norm": 1.935302769428423, "learning_rate": 8.870946112536763e-06, "loss": 0.5188, "step": 3405 }, { "epoch": 0.24, "grad_norm": 1.861928847353188, "learning_rate": 8.870218640221011e-06, "loss": 0.5767, "step": 3406 }, { "epoch": 0.24, "grad_norm": 1.8935851587127654, "learning_rate": 8.869490963467195e-06, "loss": 0.5104, "step": 3407 }, { "epoch": 0.24, "grad_norm": 1.8484801924340701, "learning_rate": 8.868763082313755e-06, "loss": 0.57, "step": 3408 }, { "epoch": 0.24, "grad_norm": 0.7996558087023813, "learning_rate": 8.86803499679914e-06, "loss": 0.4849, "step": 3409 }, { "epoch": 0.24, "grad_norm": 1.6713140938385975, "learning_rate": 8.86730670696181e-06, "loss": 0.5831, "step": 3410 }, { "epoch": 0.24, "grad_norm": 2.017440219714671, "learning_rate": 8.866578212840234e-06, "loss": 0.5203, "step": 3411 }, { "epoch": 0.24, "grad_norm": 1.9987851957854041, "learning_rate": 8.865849514472897e-06, "loss": 0.5589, "step": 3412 }, { "epoch": 0.24, "grad_norm": 2.0285033662316905, "learning_rate": 8.865120611898287e-06, "loss": 0.6466, "step": 3413 }, { "epoch": 0.24, "grad_norm": 1.7931993978893772, "learning_rate": 8.864391505154912e-06, "loss": 0.5257, "step": 3414 }, { "epoch": 0.24, "grad_norm": 0.8831048325066075, "learning_rate": 8.863662194281283e-06, "loss": 0.5023, "step": 3415 }, { "epoch": 0.24, "grad_norm": 2.2260120966845296, "learning_rate": 8.862932679315925e-06, "loss": 0.6673, "step": 3416 }, { "epoch": 0.24, "grad_norm": 1.9230770384485054, "learning_rate": 8.862202960297373e-06, "loss": 0.5856, "step": 3417 }, { "epoch": 0.24, "grad_norm": 1.8525638865704859, "learning_rate": 8.861473037264172e-06, "loss": 0.5947, "step": 3418 }, { "epoch": 0.24, "grad_norm": 1.7987450902675497, "learning_rate": 8.860742910254883e-06, "loss": 0.5676, "step": 3419 }, { "epoch": 0.24, "grad_norm": 1.7501097369893384, "learning_rate": 8.86001257930807e-06, "loss": 0.6023, "step": 3420 }, { "epoch": 0.24, "grad_norm": 1.6669726175076547, "learning_rate": 8.859282044462315e-06, "loss": 0.5478, "step": 3421 }, { "epoch": 0.24, "grad_norm": 1.7995837146222344, "learning_rate": 8.858551305756205e-06, "loss": 0.6506, "step": 3422 }, { "epoch": 0.24, "grad_norm": 1.9642765276053822, "learning_rate": 8.85782036322834e-06, "loss": 0.492, "step": 3423 }, { "epoch": 0.24, "grad_norm": 0.7977435079913767, "learning_rate": 8.85708921691733e-06, "loss": 0.4886, "step": 3424 }, { "epoch": 0.24, "grad_norm": 6.70194082583396, "learning_rate": 8.8563578668618e-06, "loss": 0.5758, "step": 3425 }, { "epoch": 0.24, "grad_norm": 1.6488801009177294, "learning_rate": 8.855626313100379e-06, "loss": 0.5226, "step": 3426 }, { "epoch": 0.24, "grad_norm": 1.7511417443422959, "learning_rate": 8.854894555671712e-06, "loss": 0.5124, "step": 3427 }, { "epoch": 0.24, "grad_norm": 1.7206348938506097, "learning_rate": 8.854162594614452e-06, "loss": 0.5804, "step": 3428 }, { "epoch": 0.24, "grad_norm": 3.3563531547394385, "learning_rate": 8.853430429967264e-06, "loss": 0.6214, "step": 3429 }, { "epoch": 0.24, "grad_norm": 3.9641546307059965, "learning_rate": 8.852698061768824e-06, "loss": 0.6669, "step": 3430 }, { "epoch": 0.24, "grad_norm": 0.7465698912055478, "learning_rate": 8.851965490057817e-06, "loss": 0.4389, "step": 3431 }, { "epoch": 0.24, "grad_norm": 1.9743001609983004, "learning_rate": 8.851232714872941e-06, "loss": 0.5189, "step": 3432 }, { "epoch": 0.24, "grad_norm": 2.0046359008536094, "learning_rate": 8.850499736252905e-06, "loss": 0.5376, "step": 3433 }, { "epoch": 0.24, "grad_norm": 1.761337934695754, "learning_rate": 8.849766554236424e-06, "loss": 0.6417, "step": 3434 }, { "epoch": 0.24, "grad_norm": 2.2105193602497772, "learning_rate": 8.849033168862227e-06, "loss": 0.4733, "step": 3435 }, { "epoch": 0.24, "grad_norm": 1.6234712266133848, "learning_rate": 8.848299580169058e-06, "loss": 0.5681, "step": 3436 }, { "epoch": 0.24, "grad_norm": 2.0103743771062934, "learning_rate": 8.847565788195664e-06, "loss": 0.504, "step": 3437 }, { "epoch": 0.24, "grad_norm": 1.655915699849422, "learning_rate": 8.84683179298081e-06, "loss": 0.581, "step": 3438 }, { "epoch": 0.24, "grad_norm": 1.9135385025949316, "learning_rate": 8.846097594563263e-06, "loss": 0.627, "step": 3439 }, { "epoch": 0.24, "grad_norm": 1.7086543023647893, "learning_rate": 8.84536319298181e-06, "loss": 0.6422, "step": 3440 }, { "epoch": 0.24, "grad_norm": 1.785381852632581, "learning_rate": 8.844628588275244e-06, "loss": 0.5677, "step": 3441 }, { "epoch": 0.24, "grad_norm": 0.7914085017741087, "learning_rate": 8.84389378048237e-06, "loss": 0.4542, "step": 3442 }, { "epoch": 0.24, "grad_norm": 1.7858583924554032, "learning_rate": 8.843158769641997e-06, "loss": 0.5783, "step": 3443 }, { "epoch": 0.24, "grad_norm": 1.8508121264111628, "learning_rate": 8.842423555792959e-06, "loss": 0.594, "step": 3444 }, { "epoch": 0.24, "grad_norm": 1.7288205426254026, "learning_rate": 8.841688138974087e-06, "loss": 0.5837, "step": 3445 }, { "epoch": 0.24, "grad_norm": 2.455679525334658, "learning_rate": 8.840952519224232e-06, "loss": 0.5535, "step": 3446 }, { "epoch": 0.24, "grad_norm": 1.7682201070459607, "learning_rate": 8.84021669658225e-06, "loss": 0.5546, "step": 3447 }, { "epoch": 0.24, "grad_norm": 1.6579213658951488, "learning_rate": 8.839480671087007e-06, "loss": 0.5379, "step": 3448 }, { "epoch": 0.24, "grad_norm": 0.8245617895428267, "learning_rate": 8.838744442777387e-06, "loss": 0.4598, "step": 3449 }, { "epoch": 0.24, "grad_norm": 1.5934922178449806, "learning_rate": 8.838008011692278e-06, "loss": 0.5677, "step": 3450 }, { "epoch": 0.24, "grad_norm": 1.75936575272406, "learning_rate": 8.83727137787058e-06, "loss": 0.5856, "step": 3451 }, { "epoch": 0.24, "grad_norm": 0.7208201357000927, "learning_rate": 8.836534541351207e-06, "loss": 0.4484, "step": 3452 }, { "epoch": 0.25, "grad_norm": 1.6980040234053784, "learning_rate": 8.835797502173077e-06, "loss": 0.5711, "step": 3453 }, { "epoch": 0.25, "grad_norm": 1.8600968149400643, "learning_rate": 8.835060260375128e-06, "loss": 0.5784, "step": 3454 }, { "epoch": 0.25, "grad_norm": 1.7849903875635742, "learning_rate": 8.8343228159963e-06, "loss": 0.6281, "step": 3455 }, { "epoch": 0.25, "grad_norm": 1.8013249183948317, "learning_rate": 8.833585169075549e-06, "loss": 0.6454, "step": 3456 }, { "epoch": 0.25, "grad_norm": 1.9324670045002184, "learning_rate": 8.832847319651838e-06, "loss": 0.5858, "step": 3457 }, { "epoch": 0.25, "grad_norm": 1.5633632684464063, "learning_rate": 8.832109267764146e-06, "loss": 0.5598, "step": 3458 }, { "epoch": 0.25, "grad_norm": 1.5872701807507894, "learning_rate": 8.831371013451456e-06, "loss": 0.5538, "step": 3459 }, { "epoch": 0.25, "grad_norm": 1.541561437277759, "learning_rate": 8.830632556752768e-06, "loss": 0.5574, "step": 3460 }, { "epoch": 0.25, "grad_norm": 2.102746657500715, "learning_rate": 8.829893897707087e-06, "loss": 0.4949, "step": 3461 }, { "epoch": 0.25, "grad_norm": 1.8557942315246043, "learning_rate": 8.829155036353435e-06, "loss": 0.5407, "step": 3462 }, { "epoch": 0.25, "grad_norm": 1.5838015296039594, "learning_rate": 8.828415972730835e-06, "loss": 0.5147, "step": 3463 }, { "epoch": 0.25, "grad_norm": 1.9397121146740615, "learning_rate": 8.827676706878334e-06, "loss": 0.5389, "step": 3464 }, { "epoch": 0.25, "grad_norm": 1.8032931813713424, "learning_rate": 8.82693723883498e-06, "loss": 0.572, "step": 3465 }, { "epoch": 0.25, "grad_norm": 2.1517770548393793, "learning_rate": 8.826197568639832e-06, "loss": 0.5909, "step": 3466 }, { "epoch": 0.25, "grad_norm": 0.9217569071861116, "learning_rate": 8.825457696331964e-06, "loss": 0.4632, "step": 3467 }, { "epoch": 0.25, "grad_norm": 2.0452288666075975, "learning_rate": 8.824717621950457e-06, "loss": 0.5778, "step": 3468 }, { "epoch": 0.25, "grad_norm": 2.202214700435194, "learning_rate": 8.823977345534407e-06, "loss": 0.4832, "step": 3469 }, { "epoch": 0.25, "grad_norm": 1.539471232651271, "learning_rate": 8.823236867122916e-06, "loss": 0.5829, "step": 3470 }, { "epoch": 0.25, "grad_norm": 0.8652601412332672, "learning_rate": 8.822496186755098e-06, "loss": 0.4558, "step": 3471 }, { "epoch": 0.25, "grad_norm": 1.897618882322111, "learning_rate": 8.821755304470078e-06, "loss": 0.5739, "step": 3472 }, { "epoch": 0.25, "grad_norm": 2.354083817730479, "learning_rate": 8.821014220306995e-06, "loss": 0.5728, "step": 3473 }, { "epoch": 0.25, "grad_norm": 1.7150135664999324, "learning_rate": 8.820272934304992e-06, "loss": 0.5161, "step": 3474 }, { "epoch": 0.25, "grad_norm": 1.6994329328027284, "learning_rate": 8.819531446503229e-06, "loss": 0.5637, "step": 3475 }, { "epoch": 0.25, "grad_norm": 1.5721964388322847, "learning_rate": 8.818789756940872e-06, "loss": 0.5373, "step": 3476 }, { "epoch": 0.25, "grad_norm": 1.5352522368072474, "learning_rate": 8.8180478656571e-06, "loss": 0.5292, "step": 3477 }, { "epoch": 0.25, "grad_norm": 1.5997852640084447, "learning_rate": 8.817305772691103e-06, "loss": 0.6079, "step": 3478 }, { "epoch": 0.25, "grad_norm": 1.4661214049391107, "learning_rate": 8.81656347808208e-06, "loss": 0.5085, "step": 3479 }, { "epoch": 0.25, "grad_norm": 1.9624290405753892, "learning_rate": 8.815820981869243e-06, "loss": 0.5288, "step": 3480 }, { "epoch": 0.25, "grad_norm": 1.5262242644345902, "learning_rate": 8.81507828409181e-06, "loss": 0.5548, "step": 3481 }, { "epoch": 0.25, "grad_norm": 1.5870541444861637, "learning_rate": 8.814335384789016e-06, "loss": 0.535, "step": 3482 }, { "epoch": 0.25, "grad_norm": 1.5355892094361028, "learning_rate": 8.813592284000101e-06, "loss": 0.5335, "step": 3483 }, { "epoch": 0.25, "grad_norm": 1.7237255756449659, "learning_rate": 8.812848981764321e-06, "loss": 0.5499, "step": 3484 }, { "epoch": 0.25, "grad_norm": 1.5740628164363135, "learning_rate": 8.812105478120936e-06, "loss": 0.4714, "step": 3485 }, { "epoch": 0.25, "grad_norm": 2.1097888519167722, "learning_rate": 8.811361773109224e-06, "loss": 0.6267, "step": 3486 }, { "epoch": 0.25, "grad_norm": 2.1379227450921263, "learning_rate": 8.810617866768469e-06, "loss": 0.5612, "step": 3487 }, { "epoch": 0.25, "grad_norm": 1.50045944397671, "learning_rate": 8.809873759137966e-06, "loss": 0.6065, "step": 3488 }, { "epoch": 0.25, "grad_norm": 4.18048567606914, "learning_rate": 8.80912945025702e-06, "loss": 0.521, "step": 3489 }, { "epoch": 0.25, "grad_norm": 1.7995178717734759, "learning_rate": 8.80838494016495e-06, "loss": 0.5408, "step": 3490 }, { "epoch": 0.25, "grad_norm": 0.8077891822337105, "learning_rate": 8.807640228901084e-06, "loss": 0.4764, "step": 3491 }, { "epoch": 0.25, "grad_norm": 1.582827847937871, "learning_rate": 8.806895316504757e-06, "loss": 0.5122, "step": 3492 }, { "epoch": 0.25, "grad_norm": 0.7826633836294917, "learning_rate": 8.806150203015322e-06, "loss": 0.4571, "step": 3493 }, { "epoch": 0.25, "grad_norm": 1.9297796702987702, "learning_rate": 8.805404888472135e-06, "loss": 0.5722, "step": 3494 }, { "epoch": 0.25, "grad_norm": 2.946500502481895, "learning_rate": 8.804659372914568e-06, "loss": 0.6609, "step": 3495 }, { "epoch": 0.25, "grad_norm": 1.737788881605149, "learning_rate": 8.803913656382e-06, "loss": 0.5031, "step": 3496 }, { "epoch": 0.25, "grad_norm": 1.5966975589760444, "learning_rate": 8.803167738913824e-06, "loss": 0.5698, "step": 3497 }, { "epoch": 0.25, "grad_norm": 2.0462722042309056, "learning_rate": 8.80242162054944e-06, "loss": 0.495, "step": 3498 }, { "epoch": 0.25, "grad_norm": 1.7553543656359716, "learning_rate": 8.801675301328263e-06, "loss": 0.5906, "step": 3499 }, { "epoch": 0.25, "grad_norm": 1.7334826578400364, "learning_rate": 8.800928781289715e-06, "loss": 0.5813, "step": 3500 }, { "epoch": 0.25, "grad_norm": 1.7819994045250607, "learning_rate": 8.800182060473228e-06, "loss": 0.6215, "step": 3501 }, { "epoch": 0.25, "grad_norm": 1.6444762719282644, "learning_rate": 8.799435138918248e-06, "loss": 0.6026, "step": 3502 }, { "epoch": 0.25, "grad_norm": 2.0387462172174264, "learning_rate": 8.798688016664231e-06, "loss": 0.536, "step": 3503 }, { "epoch": 0.25, "grad_norm": 1.574216448719919, "learning_rate": 8.79794069375064e-06, "loss": 0.5819, "step": 3504 }, { "epoch": 0.25, "grad_norm": 1.710748567411178, "learning_rate": 8.797193170216953e-06, "loss": 0.5713, "step": 3505 }, { "epoch": 0.25, "grad_norm": 1.530247772013323, "learning_rate": 8.796445446102657e-06, "loss": 0.5199, "step": 3506 }, { "epoch": 0.25, "grad_norm": 2.0611077545947203, "learning_rate": 8.795697521447248e-06, "loss": 0.5632, "step": 3507 }, { "epoch": 0.25, "grad_norm": 2.0571110120031584, "learning_rate": 8.794949396290233e-06, "loss": 0.6028, "step": 3508 }, { "epoch": 0.25, "grad_norm": 1.8216311390643178, "learning_rate": 8.794201070671134e-06, "loss": 0.5385, "step": 3509 }, { "epoch": 0.25, "grad_norm": 1.6748061102813903, "learning_rate": 8.793452544629475e-06, "loss": 0.5395, "step": 3510 }, { "epoch": 0.25, "grad_norm": 1.4779900339120047, "learning_rate": 8.7927038182048e-06, "loss": 0.531, "step": 3511 }, { "epoch": 0.25, "grad_norm": 2.4029747250691638, "learning_rate": 8.791954891436658e-06, "loss": 0.5418, "step": 3512 }, { "epoch": 0.25, "grad_norm": 1.7349472507867285, "learning_rate": 8.79120576436461e-06, "loss": 0.5477, "step": 3513 }, { "epoch": 0.25, "grad_norm": 1.5693731850250041, "learning_rate": 8.790456437028228e-06, "loss": 0.4997, "step": 3514 }, { "epoch": 0.25, "grad_norm": 23.3374808625812, "learning_rate": 8.78970690946709e-06, "loss": 0.5701, "step": 3515 }, { "epoch": 0.25, "grad_norm": 1.6067107721998015, "learning_rate": 8.788957181720796e-06, "loss": 0.5767, "step": 3516 }, { "epoch": 0.25, "grad_norm": 3.434441181565452, "learning_rate": 8.788207253828943e-06, "loss": 0.5739, "step": 3517 }, { "epoch": 0.25, "grad_norm": 1.7339622491702287, "learning_rate": 8.787457125831146e-06, "loss": 0.5323, "step": 3518 }, { "epoch": 0.25, "grad_norm": 1.8539633730372977, "learning_rate": 8.78670679776703e-06, "loss": 0.5825, "step": 3519 }, { "epoch": 0.25, "grad_norm": 1.6797740836557598, "learning_rate": 8.78595626967623e-06, "loss": 0.6299, "step": 3520 }, { "epoch": 0.25, "grad_norm": 2.086605116187921, "learning_rate": 8.785205541598391e-06, "loss": 0.5388, "step": 3521 }, { "epoch": 0.25, "grad_norm": 4.604162152847393, "learning_rate": 8.784454613573172e-06, "loss": 0.58, "step": 3522 }, { "epoch": 0.25, "grad_norm": 1.7765282804776734, "learning_rate": 8.783703485640233e-06, "loss": 0.5325, "step": 3523 }, { "epoch": 0.25, "grad_norm": 1.5141759617941786, "learning_rate": 8.782952157839258e-06, "loss": 0.4905, "step": 3524 }, { "epoch": 0.25, "grad_norm": 2.290043947055051, "learning_rate": 8.78220063020993e-06, "loss": 0.5032, "step": 3525 }, { "epoch": 0.25, "grad_norm": 1.948274071610963, "learning_rate": 8.781448902791949e-06, "loss": 0.5588, "step": 3526 }, { "epoch": 0.25, "grad_norm": 1.741558502112802, "learning_rate": 8.780696975625023e-06, "loss": 0.6184, "step": 3527 }, { "epoch": 0.25, "grad_norm": 1.7883370117449195, "learning_rate": 8.779944848748874e-06, "loss": 0.5751, "step": 3528 }, { "epoch": 0.25, "grad_norm": 2.002023239896317, "learning_rate": 8.779192522203229e-06, "loss": 0.5615, "step": 3529 }, { "epoch": 0.25, "grad_norm": 1.555600383358493, "learning_rate": 8.778439996027827e-06, "loss": 0.5592, "step": 3530 }, { "epoch": 0.25, "grad_norm": 1.7099922262436646, "learning_rate": 8.777687270262425e-06, "loss": 0.6077, "step": 3531 }, { "epoch": 0.25, "grad_norm": 2.3038663002315953, "learning_rate": 8.776934344946779e-06, "loss": 0.5292, "step": 3532 }, { "epoch": 0.25, "grad_norm": 1.563421308569948, "learning_rate": 8.776181220120662e-06, "loss": 0.6121, "step": 3533 }, { "epoch": 0.25, "grad_norm": 1.4620536538123339, "learning_rate": 8.775427895823859e-06, "loss": 0.635, "step": 3534 }, { "epoch": 0.25, "grad_norm": 1.0061452867765013, "learning_rate": 8.774674372096161e-06, "loss": 0.4557, "step": 3535 }, { "epoch": 0.25, "grad_norm": 2.4079815511394007, "learning_rate": 8.773920648977371e-06, "loss": 0.6528, "step": 3536 }, { "epoch": 0.25, "grad_norm": 1.7633387585185842, "learning_rate": 8.773166726507307e-06, "loss": 0.5575, "step": 3537 }, { "epoch": 0.25, "grad_norm": 2.3141514005483037, "learning_rate": 8.77241260472579e-06, "loss": 0.676, "step": 3538 }, { "epoch": 0.25, "grad_norm": 5.767353373868913, "learning_rate": 8.771658283672657e-06, "loss": 0.6051, "step": 3539 }, { "epoch": 0.25, "grad_norm": 1.5544666038167252, "learning_rate": 8.770903763387753e-06, "loss": 0.6121, "step": 3540 }, { "epoch": 0.25, "grad_norm": 1.6045519539605706, "learning_rate": 8.770149043910937e-06, "loss": 0.5737, "step": 3541 }, { "epoch": 0.25, "grad_norm": 0.8660916038448847, "learning_rate": 8.76939412528207e-06, "loss": 0.467, "step": 3542 }, { "epoch": 0.25, "grad_norm": 2.052165391738364, "learning_rate": 8.768639007541034e-06, "loss": 0.5898, "step": 3543 }, { "epoch": 0.25, "grad_norm": 2.312284320166707, "learning_rate": 8.767883690727716e-06, "loss": 0.5306, "step": 3544 }, { "epoch": 0.25, "grad_norm": 0.9091648374909655, "learning_rate": 8.767128174882013e-06, "loss": 0.4577, "step": 3545 }, { "epoch": 0.25, "grad_norm": 1.6442467419450388, "learning_rate": 8.766372460043837e-06, "loss": 0.5471, "step": 3546 }, { "epoch": 0.25, "grad_norm": 1.8064950103547923, "learning_rate": 8.765616546253105e-06, "loss": 0.4484, "step": 3547 }, { "epoch": 0.25, "grad_norm": 2.293471862718506, "learning_rate": 8.764860433549747e-06, "loss": 0.5535, "step": 3548 }, { "epoch": 0.25, "grad_norm": 1.7258487168381427, "learning_rate": 8.764104121973702e-06, "loss": 0.6364, "step": 3549 }, { "epoch": 0.25, "grad_norm": 1.495264853913481, "learning_rate": 8.763347611564925e-06, "loss": 0.5826, "step": 3550 }, { "epoch": 0.25, "grad_norm": 1.7052199262358334, "learning_rate": 8.762590902363375e-06, "loss": 0.6062, "step": 3551 }, { "epoch": 0.25, "grad_norm": 1.8488618109319745, "learning_rate": 8.761833994409023e-06, "loss": 0.604, "step": 3552 }, { "epoch": 0.25, "grad_norm": 2.3144500101480365, "learning_rate": 8.761076887741855e-06, "loss": 0.5915, "step": 3553 }, { "epoch": 0.25, "grad_norm": 1.3787992968939957, "learning_rate": 8.760319582401859e-06, "loss": 0.5249, "step": 3554 }, { "epoch": 0.25, "grad_norm": 1.9700810291058084, "learning_rate": 8.759562078429043e-06, "loss": 0.5436, "step": 3555 }, { "epoch": 0.25, "grad_norm": 1.8836617555997572, "learning_rate": 8.758804375863417e-06, "loss": 0.6626, "step": 3556 }, { "epoch": 0.25, "grad_norm": 1.7456918223000262, "learning_rate": 8.758046474745008e-06, "loss": 0.5911, "step": 3557 }, { "epoch": 0.25, "grad_norm": 2.0569664889441714, "learning_rate": 8.75728837511385e-06, "loss": 0.6069, "step": 3558 }, { "epoch": 0.25, "grad_norm": 1.5936160525488972, "learning_rate": 8.756530077009988e-06, "loss": 0.4951, "step": 3559 }, { "epoch": 0.25, "grad_norm": 1.5244428736375484, "learning_rate": 8.75577158047348e-06, "loss": 0.5089, "step": 3560 }, { "epoch": 0.25, "grad_norm": 1.5430495511142288, "learning_rate": 8.755012885544389e-06, "loss": 0.5366, "step": 3561 }, { "epoch": 0.25, "grad_norm": 0.8838013558654617, "learning_rate": 8.754253992262796e-06, "loss": 0.4571, "step": 3562 }, { "epoch": 0.25, "grad_norm": 1.7312084227295266, "learning_rate": 8.753494900668785e-06, "loss": 0.5838, "step": 3563 }, { "epoch": 0.25, "grad_norm": 1.6012524798619687, "learning_rate": 8.752735610802454e-06, "loss": 0.6118, "step": 3564 }, { "epoch": 0.25, "grad_norm": 1.8498373207413248, "learning_rate": 8.751976122703913e-06, "loss": 0.5768, "step": 3565 }, { "epoch": 0.25, "grad_norm": 1.7427152124593832, "learning_rate": 8.75121643641328e-06, "loss": 0.5889, "step": 3566 }, { "epoch": 0.25, "grad_norm": 1.7478268876946392, "learning_rate": 8.750456551970684e-06, "loss": 0.5825, "step": 3567 }, { "epoch": 0.25, "grad_norm": 1.9879869218371424, "learning_rate": 8.749696469416262e-06, "loss": 0.5337, "step": 3568 }, { "epoch": 0.25, "grad_norm": 1.824353563765505, "learning_rate": 8.74893618879017e-06, "loss": 0.5455, "step": 3569 }, { "epoch": 0.25, "grad_norm": 2.2599102801490574, "learning_rate": 8.748175710132562e-06, "loss": 0.52, "step": 3570 }, { "epoch": 0.25, "grad_norm": 1.4896632630326605, "learning_rate": 8.747415033483615e-06, "loss": 0.5257, "step": 3571 }, { "epoch": 0.25, "grad_norm": 3.647331880626189, "learning_rate": 8.746654158883507e-06, "loss": 0.6105, "step": 3572 }, { "epoch": 0.25, "grad_norm": 1.6630603768454533, "learning_rate": 8.745893086372432e-06, "loss": 0.612, "step": 3573 }, { "epoch": 0.25, "grad_norm": 1.7501164151634936, "learning_rate": 8.745131815990591e-06, "loss": 0.5057, "step": 3574 }, { "epoch": 0.25, "grad_norm": 0.8830299607852853, "learning_rate": 8.744370347778198e-06, "loss": 0.452, "step": 3575 }, { "epoch": 0.25, "grad_norm": 1.5974128624558803, "learning_rate": 8.743608681775473e-06, "loss": 0.5617, "step": 3576 }, { "epoch": 0.25, "grad_norm": 2.946811896788621, "learning_rate": 8.742846818022654e-06, "loss": 0.6471, "step": 3577 }, { "epoch": 0.25, "grad_norm": 1.616861307139559, "learning_rate": 8.742084756559984e-06, "loss": 0.5687, "step": 3578 }, { "epoch": 0.25, "grad_norm": 1.741814282848707, "learning_rate": 8.741322497427717e-06, "loss": 0.5762, "step": 3579 }, { "epoch": 0.25, "grad_norm": 1.8522648817421365, "learning_rate": 8.740560040666117e-06, "loss": 0.5924, "step": 3580 }, { "epoch": 0.25, "grad_norm": 1.875710171427392, "learning_rate": 8.739797386315463e-06, "loss": 0.4979, "step": 3581 }, { "epoch": 0.25, "grad_norm": 1.5622426087668504, "learning_rate": 8.739034534416038e-06, "loss": 0.5487, "step": 3582 }, { "epoch": 0.25, "grad_norm": 1.5384162179633796, "learning_rate": 8.73827148500814e-06, "loss": 0.579, "step": 3583 }, { "epoch": 0.25, "grad_norm": 1.7370431979731387, "learning_rate": 8.737508238132074e-06, "loss": 0.5223, "step": 3584 }, { "epoch": 0.25, "grad_norm": 1.874484798726454, "learning_rate": 8.736744793828158e-06, "loss": 0.5775, "step": 3585 }, { "epoch": 0.25, "grad_norm": 1.7004036356599075, "learning_rate": 8.735981152136721e-06, "loss": 0.5382, "step": 3586 }, { "epoch": 0.25, "grad_norm": 1.9004536977696118, "learning_rate": 8.735217313098101e-06, "loss": 0.481, "step": 3587 }, { "epoch": 0.25, "grad_norm": 1.713668624278301, "learning_rate": 8.734453276752646e-06, "loss": 0.5715, "step": 3588 }, { "epoch": 0.25, "grad_norm": 1.6254213474064918, "learning_rate": 8.733689043140713e-06, "loss": 0.5364, "step": 3589 }, { "epoch": 0.25, "grad_norm": 1.784269153006098, "learning_rate": 8.732924612302675e-06, "loss": 0.6085, "step": 3590 }, { "epoch": 0.25, "grad_norm": 1.6464505079732679, "learning_rate": 8.732159984278909e-06, "loss": 0.5949, "step": 3591 }, { "epoch": 0.25, "grad_norm": 3.838063209545325, "learning_rate": 8.731395159109808e-06, "loss": 0.6504, "step": 3592 }, { "epoch": 0.25, "grad_norm": 1.57740299795768, "learning_rate": 8.73063013683577e-06, "loss": 0.5621, "step": 3593 }, { "epoch": 0.26, "grad_norm": 1.6912789431169566, "learning_rate": 8.729864917497206e-06, "loss": 0.5993, "step": 3594 }, { "epoch": 0.26, "grad_norm": 1.494865070165952, "learning_rate": 8.729099501134539e-06, "loss": 0.526, "step": 3595 }, { "epoch": 0.26, "grad_norm": 1.4821482489130822, "learning_rate": 8.728333887788201e-06, "loss": 0.5951, "step": 3596 }, { "epoch": 0.26, "grad_norm": 1.0279595466081974, "learning_rate": 8.727568077498634e-06, "loss": 0.4724, "step": 3597 }, { "epoch": 0.26, "grad_norm": 1.6208396064027573, "learning_rate": 8.726802070306289e-06, "loss": 0.5704, "step": 3598 }, { "epoch": 0.26, "grad_norm": 1.5428340869398243, "learning_rate": 8.726035866251632e-06, "loss": 0.506, "step": 3599 }, { "epoch": 0.26, "grad_norm": 1.4950042363695166, "learning_rate": 8.725269465375135e-06, "loss": 0.5819, "step": 3600 }, { "epoch": 0.26, "grad_norm": 2.2244418817594855, "learning_rate": 8.724502867717281e-06, "loss": 0.5982, "step": 3601 }, { "epoch": 0.26, "grad_norm": 1.5067785678131795, "learning_rate": 8.723736073318565e-06, "loss": 0.5595, "step": 3602 }, { "epoch": 0.26, "grad_norm": 1.6553742766615005, "learning_rate": 8.722969082219494e-06, "loss": 0.5069, "step": 3603 }, { "epoch": 0.26, "grad_norm": 1.9575609355860462, "learning_rate": 8.722201894460577e-06, "loss": 0.5878, "step": 3604 }, { "epoch": 0.26, "grad_norm": 2.506846191681386, "learning_rate": 8.721434510082347e-06, "loss": 0.575, "step": 3605 }, { "epoch": 0.26, "grad_norm": 2.301053292514493, "learning_rate": 8.720666929125335e-06, "loss": 0.5878, "step": 3606 }, { "epoch": 0.26, "grad_norm": 1.9236638537373143, "learning_rate": 8.719899151630088e-06, "loss": 0.5708, "step": 3607 }, { "epoch": 0.26, "grad_norm": 1.6333889889102935, "learning_rate": 8.719131177637165e-06, "loss": 0.5898, "step": 3608 }, { "epoch": 0.26, "grad_norm": 2.124134934066161, "learning_rate": 8.718363007187129e-06, "loss": 0.5832, "step": 3609 }, { "epoch": 0.26, "grad_norm": 1.702871232379821, "learning_rate": 8.717594640320562e-06, "loss": 0.5314, "step": 3610 }, { "epoch": 0.26, "grad_norm": 2.7625790736531948, "learning_rate": 8.716826077078047e-06, "loss": 0.581, "step": 3611 }, { "epoch": 0.26, "grad_norm": 1.8255145779819335, "learning_rate": 8.716057317500183e-06, "loss": 0.5748, "step": 3612 }, { "epoch": 0.26, "grad_norm": 2.898807726093886, "learning_rate": 8.715288361627581e-06, "loss": 0.5606, "step": 3613 }, { "epoch": 0.26, "grad_norm": 1.7745438437899488, "learning_rate": 8.71451920950086e-06, "loss": 0.6497, "step": 3614 }, { "epoch": 0.26, "grad_norm": 2.068276608729876, "learning_rate": 8.713749861160647e-06, "loss": 0.5827, "step": 3615 }, { "epoch": 0.26, "grad_norm": 2.263145071139074, "learning_rate": 8.712980316647582e-06, "loss": 0.6332, "step": 3616 }, { "epoch": 0.26, "grad_norm": 1.4428324207610437, "learning_rate": 8.712210576002316e-06, "loss": 0.5832, "step": 3617 }, { "epoch": 0.26, "grad_norm": 0.8754212296269239, "learning_rate": 8.711440639265507e-06, "loss": 0.4773, "step": 3618 }, { "epoch": 0.26, "grad_norm": 0.8072928209018205, "learning_rate": 8.710670506477829e-06, "loss": 0.456, "step": 3619 }, { "epoch": 0.26, "grad_norm": 2.108762898757041, "learning_rate": 8.709900177679961e-06, "loss": 0.5224, "step": 3620 }, { "epoch": 0.26, "grad_norm": 0.7143354334254715, "learning_rate": 8.709129652912595e-06, "loss": 0.4544, "step": 3621 }, { "epoch": 0.26, "grad_norm": 1.606053300195936, "learning_rate": 8.708358932216431e-06, "loss": 0.5885, "step": 3622 }, { "epoch": 0.26, "grad_norm": 1.7759187246092598, "learning_rate": 8.707588015632184e-06, "loss": 0.5672, "step": 3623 }, { "epoch": 0.26, "grad_norm": 2.63649911595348, "learning_rate": 8.706816903200576e-06, "loss": 0.5776, "step": 3624 }, { "epoch": 0.26, "grad_norm": 0.8646140014532562, "learning_rate": 8.706045594962338e-06, "loss": 0.467, "step": 3625 }, { "epoch": 0.26, "grad_norm": 2.5040473623783774, "learning_rate": 8.705274090958213e-06, "loss": 0.5585, "step": 3626 }, { "epoch": 0.26, "grad_norm": 1.7324541301129097, "learning_rate": 8.704502391228955e-06, "loss": 0.5534, "step": 3627 }, { "epoch": 0.26, "grad_norm": 1.7710468926980585, "learning_rate": 8.70373049581533e-06, "loss": 0.584, "step": 3628 }, { "epoch": 0.26, "grad_norm": 1.9989333625908405, "learning_rate": 8.702958404758109e-06, "loss": 0.5992, "step": 3629 }, { "epoch": 0.26, "grad_norm": 1.7654461142506783, "learning_rate": 8.702186118098076e-06, "loss": 0.5447, "step": 3630 }, { "epoch": 0.26, "grad_norm": 1.7381693940197311, "learning_rate": 8.701413635876031e-06, "loss": 0.5715, "step": 3631 }, { "epoch": 0.26, "grad_norm": 3.3475359920593384, "learning_rate": 8.700640958132774e-06, "loss": 0.5568, "step": 3632 }, { "epoch": 0.26, "grad_norm": 1.5845307990415023, "learning_rate": 8.699868084909122e-06, "loss": 0.5565, "step": 3633 }, { "epoch": 0.26, "grad_norm": 2.0047742271623905, "learning_rate": 8.699095016245901e-06, "loss": 0.5646, "step": 3634 }, { "epoch": 0.26, "grad_norm": 0.7925378875225985, "learning_rate": 8.698321752183946e-06, "loss": 0.4613, "step": 3635 }, { "epoch": 0.26, "grad_norm": 1.7921994629169096, "learning_rate": 8.697548292764106e-06, "loss": 0.5864, "step": 3636 }, { "epoch": 0.26, "grad_norm": 1.7084199426081939, "learning_rate": 8.696774638027237e-06, "loss": 0.4903, "step": 3637 }, { "epoch": 0.26, "grad_norm": 0.9043494285887166, "learning_rate": 8.696000788014203e-06, "loss": 0.4651, "step": 3638 }, { "epoch": 0.26, "grad_norm": 1.7604120874343454, "learning_rate": 8.695226742765886e-06, "loss": 0.6693, "step": 3639 }, { "epoch": 0.26, "grad_norm": 2.189263168230681, "learning_rate": 8.69445250232317e-06, "loss": 0.5724, "step": 3640 }, { "epoch": 0.26, "grad_norm": 1.7532587805774091, "learning_rate": 8.693678066726954e-06, "loss": 0.5295, "step": 3641 }, { "epoch": 0.26, "grad_norm": 1.587597835912209, "learning_rate": 8.692903436018146e-06, "loss": 0.5891, "step": 3642 }, { "epoch": 0.26, "grad_norm": 1.610792688572172, "learning_rate": 8.692128610237666e-06, "loss": 0.6089, "step": 3643 }, { "epoch": 0.26, "grad_norm": 1.8678905178735024, "learning_rate": 8.691353589426443e-06, "loss": 0.5845, "step": 3644 }, { "epoch": 0.26, "grad_norm": 3.2330473809015503, "learning_rate": 8.690578373625414e-06, "loss": 0.5595, "step": 3645 }, { "epoch": 0.26, "grad_norm": 2.218967301274481, "learning_rate": 8.68980296287553e-06, "loss": 0.5914, "step": 3646 }, { "epoch": 0.26, "grad_norm": 1.7549158855386653, "learning_rate": 8.689027357217751e-06, "loss": 0.5471, "step": 3647 }, { "epoch": 0.26, "grad_norm": 1.468988440212857, "learning_rate": 8.688251556693049e-06, "loss": 0.5992, "step": 3648 }, { "epoch": 0.26, "grad_norm": 1.9179764601407747, "learning_rate": 8.6874755613424e-06, "loss": 0.5192, "step": 3649 }, { "epoch": 0.26, "grad_norm": 1.8650251537227889, "learning_rate": 8.686699371206795e-06, "loss": 0.5759, "step": 3650 }, { "epoch": 0.26, "grad_norm": 1.8132813440249933, "learning_rate": 8.68592298632724e-06, "loss": 0.6232, "step": 3651 }, { "epoch": 0.26, "grad_norm": 1.9072099590090135, "learning_rate": 8.685146406744744e-06, "loss": 0.5648, "step": 3652 }, { "epoch": 0.26, "grad_norm": 1.8352834947803371, "learning_rate": 8.684369632500326e-06, "loss": 0.5583, "step": 3653 }, { "epoch": 0.26, "grad_norm": 1.9478162998984117, "learning_rate": 8.68359266363502e-06, "loss": 0.6115, "step": 3654 }, { "epoch": 0.26, "grad_norm": 1.4721815213799276, "learning_rate": 8.682815500189868e-06, "loss": 0.5679, "step": 3655 }, { "epoch": 0.26, "grad_norm": 2.400695922871463, "learning_rate": 8.682038142205922e-06, "loss": 0.5429, "step": 3656 }, { "epoch": 0.26, "grad_norm": 1.8518293260502567, "learning_rate": 8.681260589724245e-06, "loss": 0.5488, "step": 3657 }, { "epoch": 0.26, "grad_norm": 1.575391248252196, "learning_rate": 8.680482842785909e-06, "loss": 0.5391, "step": 3658 }, { "epoch": 0.26, "grad_norm": 1.4891242458232472, "learning_rate": 8.679704901432e-06, "loss": 0.4822, "step": 3659 }, { "epoch": 0.26, "grad_norm": 1.644929364259764, "learning_rate": 8.678926765703609e-06, "loss": 0.5722, "step": 3660 }, { "epoch": 0.26, "grad_norm": 2.1478295783341457, "learning_rate": 8.678148435641837e-06, "loss": 0.558, "step": 3661 }, { "epoch": 0.26, "grad_norm": 2.939418933932354, "learning_rate": 8.677369911287806e-06, "loss": 0.615, "step": 3662 }, { "epoch": 0.26, "grad_norm": 0.7629138496993778, "learning_rate": 8.676591192682633e-06, "loss": 0.4562, "step": 3663 }, { "epoch": 0.26, "grad_norm": 1.7632948497629903, "learning_rate": 8.675812279867457e-06, "loss": 0.5416, "step": 3664 }, { "epoch": 0.26, "grad_norm": 2.0229309801397855, "learning_rate": 8.675033172883419e-06, "loss": 0.5525, "step": 3665 }, { "epoch": 0.26, "grad_norm": 1.7646636554629866, "learning_rate": 8.674253871771677e-06, "loss": 0.6159, "step": 3666 }, { "epoch": 0.26, "grad_norm": 1.8909733644854134, "learning_rate": 8.673474376573396e-06, "loss": 0.6238, "step": 3667 }, { "epoch": 0.26, "grad_norm": 3.8954919363219718, "learning_rate": 8.67269468732975e-06, "loss": 0.5375, "step": 3668 }, { "epoch": 0.26, "grad_norm": 1.8170956115543786, "learning_rate": 8.671914804081927e-06, "loss": 0.5076, "step": 3669 }, { "epoch": 0.26, "grad_norm": 1.3942200401772238, "learning_rate": 8.67113472687112e-06, "loss": 0.4806, "step": 3670 }, { "epoch": 0.26, "grad_norm": 1.7816022807699283, "learning_rate": 8.670354455738539e-06, "loss": 0.5944, "step": 3671 }, { "epoch": 0.26, "grad_norm": 1.6316943758406213, "learning_rate": 8.669573990725399e-06, "loss": 0.5101, "step": 3672 }, { "epoch": 0.26, "grad_norm": 1.4967197781464874, "learning_rate": 8.668793331872925e-06, "loss": 0.5144, "step": 3673 }, { "epoch": 0.26, "grad_norm": 0.8310118763729321, "learning_rate": 8.668012479222356e-06, "loss": 0.4676, "step": 3674 }, { "epoch": 0.26, "grad_norm": 2.1099151954110615, "learning_rate": 8.66723143281494e-06, "loss": 0.5629, "step": 3675 }, { "epoch": 0.26, "grad_norm": 1.9145896579019952, "learning_rate": 8.666450192691932e-06, "loss": 0.554, "step": 3676 }, { "epoch": 0.26, "grad_norm": 1.7022551172668052, "learning_rate": 8.665668758894603e-06, "loss": 0.6488, "step": 3677 }, { "epoch": 0.26, "grad_norm": 0.7522524061469686, "learning_rate": 8.664887131464228e-06, "loss": 0.466, "step": 3678 }, { "epoch": 0.26, "grad_norm": 1.810764119789444, "learning_rate": 8.664105310442098e-06, "loss": 0.5543, "step": 3679 }, { "epoch": 0.26, "grad_norm": 2.267152642938336, "learning_rate": 8.663323295869506e-06, "loss": 0.5938, "step": 3680 }, { "epoch": 0.26, "grad_norm": 1.8566586317689397, "learning_rate": 8.662541087787768e-06, "loss": 0.6377, "step": 3681 }, { "epoch": 0.26, "grad_norm": 1.7522319053649662, "learning_rate": 8.661758686238198e-06, "loss": 0.5816, "step": 3682 }, { "epoch": 0.26, "grad_norm": 1.5241864534516252, "learning_rate": 8.660976091262127e-06, "loss": 0.5115, "step": 3683 }, { "epoch": 0.26, "grad_norm": 1.6348008722108804, "learning_rate": 8.660193302900893e-06, "loss": 0.5689, "step": 3684 }, { "epoch": 0.26, "grad_norm": 1.689777579743413, "learning_rate": 8.659410321195847e-06, "loss": 0.5593, "step": 3685 }, { "epoch": 0.26, "grad_norm": 0.7596973400392164, "learning_rate": 8.658627146188348e-06, "loss": 0.4471, "step": 3686 }, { "epoch": 0.26, "grad_norm": 1.5181636866406203, "learning_rate": 8.657843777919766e-06, "loss": 0.4999, "step": 3687 }, { "epoch": 0.26, "grad_norm": 1.7748232170481197, "learning_rate": 8.657060216431481e-06, "loss": 0.5776, "step": 3688 }, { "epoch": 0.26, "grad_norm": 1.9460874758566298, "learning_rate": 8.656276461764883e-06, "loss": 0.5621, "step": 3689 }, { "epoch": 0.26, "grad_norm": 1.9392380135897436, "learning_rate": 8.655492513961375e-06, "loss": 0.6248, "step": 3690 }, { "epoch": 0.26, "grad_norm": 1.515636531192478, "learning_rate": 8.654708373062364e-06, "loss": 0.6002, "step": 3691 }, { "epoch": 0.26, "grad_norm": 1.6957115793116486, "learning_rate": 8.653924039109274e-06, "loss": 0.6129, "step": 3692 }, { "epoch": 0.26, "grad_norm": 1.5687243412230785, "learning_rate": 8.653139512143534e-06, "loss": 0.5475, "step": 3693 }, { "epoch": 0.26, "grad_norm": 3.2031730074120555, "learning_rate": 8.652354792206588e-06, "loss": 0.5749, "step": 3694 }, { "epoch": 0.26, "grad_norm": 1.6852168019189258, "learning_rate": 8.651569879339886e-06, "loss": 0.4797, "step": 3695 }, { "epoch": 0.26, "grad_norm": 1.533342261614092, "learning_rate": 8.650784773584887e-06, "loss": 0.4829, "step": 3696 }, { "epoch": 0.26, "grad_norm": 1.676613107619779, "learning_rate": 8.649999474983069e-06, "loss": 0.4745, "step": 3697 }, { "epoch": 0.26, "grad_norm": 1.7145646575924403, "learning_rate": 8.649213983575908e-06, "loss": 0.5789, "step": 3698 }, { "epoch": 0.26, "grad_norm": 1.7776950673731338, "learning_rate": 8.6484282994049e-06, "loss": 0.569, "step": 3699 }, { "epoch": 0.26, "grad_norm": 1.9670540542979742, "learning_rate": 8.647642422511549e-06, "loss": 0.5301, "step": 3700 }, { "epoch": 0.26, "grad_norm": 0.9357515197082985, "learning_rate": 8.646856352937363e-06, "loss": 0.4782, "step": 3701 }, { "epoch": 0.26, "grad_norm": 1.7962593882509972, "learning_rate": 8.646070090723867e-06, "loss": 0.5399, "step": 3702 }, { "epoch": 0.26, "grad_norm": 1.9352727051643879, "learning_rate": 8.645283635912596e-06, "loss": 0.5246, "step": 3703 }, { "epoch": 0.26, "grad_norm": 1.5605131436219404, "learning_rate": 8.64449698854509e-06, "loss": 0.605, "step": 3704 }, { "epoch": 0.26, "grad_norm": 1.9971586410050854, "learning_rate": 8.643710148662906e-06, "loss": 0.5997, "step": 3705 }, { "epoch": 0.26, "grad_norm": 0.8047786173159486, "learning_rate": 8.642923116307603e-06, "loss": 0.465, "step": 3706 }, { "epoch": 0.26, "grad_norm": 1.5070847680522728, "learning_rate": 8.642135891520758e-06, "loss": 0.5744, "step": 3707 }, { "epoch": 0.26, "grad_norm": 2.0518531271373974, "learning_rate": 8.641348474343952e-06, "loss": 0.5554, "step": 3708 }, { "epoch": 0.26, "grad_norm": 1.7760824430389066, "learning_rate": 8.640560864818783e-06, "loss": 0.5397, "step": 3709 }, { "epoch": 0.26, "grad_norm": 1.6663242788838637, "learning_rate": 8.639773062986853e-06, "loss": 0.5603, "step": 3710 }, { "epoch": 0.26, "grad_norm": 1.742370999078878, "learning_rate": 8.638985068889776e-06, "loss": 0.595, "step": 3711 }, { "epoch": 0.26, "grad_norm": 1.5722380329580556, "learning_rate": 8.638196882569178e-06, "loss": 0.5317, "step": 3712 }, { "epoch": 0.26, "grad_norm": 1.8662209054568732, "learning_rate": 8.637408504066693e-06, "loss": 0.6059, "step": 3713 }, { "epoch": 0.26, "grad_norm": 1.7239787585650452, "learning_rate": 8.636619933423964e-06, "loss": 0.5829, "step": 3714 }, { "epoch": 0.26, "grad_norm": 1.785041372854532, "learning_rate": 8.635831170682649e-06, "loss": 0.5839, "step": 3715 }, { "epoch": 0.26, "grad_norm": 1.6555696111711897, "learning_rate": 8.63504221588441e-06, "loss": 0.6736, "step": 3716 }, { "epoch": 0.26, "grad_norm": 1.8443604690101627, "learning_rate": 8.634253069070927e-06, "loss": 0.6179, "step": 3717 }, { "epoch": 0.26, "grad_norm": 1.9807881187593537, "learning_rate": 8.63346373028388e-06, "loss": 0.5606, "step": 3718 }, { "epoch": 0.26, "grad_norm": 1.559039335188096, "learning_rate": 8.632674199564967e-06, "loss": 0.5693, "step": 3719 }, { "epoch": 0.26, "grad_norm": 1.779293958552457, "learning_rate": 8.631884476955895e-06, "loss": 0.6245, "step": 3720 }, { "epoch": 0.26, "grad_norm": 1.8713892967362848, "learning_rate": 8.631094562498376e-06, "loss": 0.585, "step": 3721 }, { "epoch": 0.26, "grad_norm": 1.7305979709166854, "learning_rate": 8.630304456234141e-06, "loss": 0.5321, "step": 3722 }, { "epoch": 0.26, "grad_norm": 0.8083812027382703, "learning_rate": 8.629514158204922e-06, "loss": 0.4542, "step": 3723 }, { "epoch": 0.26, "grad_norm": 1.6812901600536156, "learning_rate": 8.628723668452466e-06, "loss": 0.57, "step": 3724 }, { "epoch": 0.26, "grad_norm": 1.4465288763097162, "learning_rate": 8.627932987018532e-06, "loss": 0.5148, "step": 3725 }, { "epoch": 0.26, "grad_norm": 1.5780342521316664, "learning_rate": 8.627142113944884e-06, "loss": 0.5373, "step": 3726 }, { "epoch": 0.26, "grad_norm": 1.7767833381512546, "learning_rate": 8.626351049273298e-06, "loss": 0.568, "step": 3727 }, { "epoch": 0.26, "grad_norm": 1.623595698510962, "learning_rate": 8.625559793045563e-06, "loss": 0.5395, "step": 3728 }, { "epoch": 0.26, "grad_norm": 1.748915763495258, "learning_rate": 8.624768345303474e-06, "loss": 0.6349, "step": 3729 }, { "epoch": 0.26, "grad_norm": 1.6572425768247014, "learning_rate": 8.623976706088838e-06, "loss": 0.5697, "step": 3730 }, { "epoch": 0.26, "grad_norm": 1.612014682458433, "learning_rate": 8.623184875443473e-06, "loss": 0.6026, "step": 3731 }, { "epoch": 0.26, "grad_norm": 2.0850073105829607, "learning_rate": 8.622392853409207e-06, "loss": 0.6395, "step": 3732 }, { "epoch": 0.26, "grad_norm": 2.0968799923538044, "learning_rate": 8.621600640027875e-06, "loss": 0.6327, "step": 3733 }, { "epoch": 0.26, "grad_norm": 2.2329802984870213, "learning_rate": 8.620808235341327e-06, "loss": 0.6201, "step": 3734 }, { "epoch": 0.27, "grad_norm": 1.9200571085684917, "learning_rate": 8.620015639391417e-06, "loss": 0.5581, "step": 3735 }, { "epoch": 0.27, "grad_norm": 1.5383918939692969, "learning_rate": 8.619222852220017e-06, "loss": 0.5391, "step": 3736 }, { "epoch": 0.27, "grad_norm": 0.8706093720026479, "learning_rate": 8.618429873869e-06, "loss": 0.496, "step": 3737 }, { "epoch": 0.27, "grad_norm": 1.5157151226622232, "learning_rate": 8.617636704380258e-06, "loss": 0.5385, "step": 3738 }, { "epoch": 0.27, "grad_norm": 2.0404820421631547, "learning_rate": 8.616843343795687e-06, "loss": 0.5784, "step": 3739 }, { "epoch": 0.27, "grad_norm": 1.6806312285494946, "learning_rate": 8.616049792157194e-06, "loss": 0.5993, "step": 3740 }, { "epoch": 0.27, "grad_norm": 1.7661721927753113, "learning_rate": 8.615256049506697e-06, "loss": 0.5449, "step": 3741 }, { "epoch": 0.27, "grad_norm": 2.013088543803452, "learning_rate": 8.614462115886126e-06, "loss": 0.6335, "step": 3742 }, { "epoch": 0.27, "grad_norm": 1.9803522921385788, "learning_rate": 8.61366799133742e-06, "loss": 0.5432, "step": 3743 }, { "epoch": 0.27, "grad_norm": 1.6518134171212095, "learning_rate": 8.612873675902525e-06, "loss": 0.5367, "step": 3744 }, { "epoch": 0.27, "grad_norm": 0.787514026746798, "learning_rate": 8.6120791696234e-06, "loss": 0.4361, "step": 3745 }, { "epoch": 0.27, "grad_norm": 1.8640576093607606, "learning_rate": 8.611284472542015e-06, "loss": 0.4731, "step": 3746 }, { "epoch": 0.27, "grad_norm": 1.8982704006669104, "learning_rate": 8.610489584700348e-06, "loss": 0.5312, "step": 3747 }, { "epoch": 0.27, "grad_norm": 1.8518180494040857, "learning_rate": 8.609694506140384e-06, "loss": 0.4953, "step": 3748 }, { "epoch": 0.27, "grad_norm": 0.8221609795065573, "learning_rate": 8.608899236904128e-06, "loss": 0.4396, "step": 3749 }, { "epoch": 0.27, "grad_norm": 2.136103030021321, "learning_rate": 8.608103777033585e-06, "loss": 0.506, "step": 3750 }, { "epoch": 0.27, "grad_norm": 0.9100256992087814, "learning_rate": 8.607308126570773e-06, "loss": 0.4629, "step": 3751 }, { "epoch": 0.27, "grad_norm": 1.5416665482159027, "learning_rate": 8.606512285557725e-06, "loss": 0.5922, "step": 3752 }, { "epoch": 0.27, "grad_norm": 1.6206630681358223, "learning_rate": 8.605716254036475e-06, "loss": 0.5684, "step": 3753 }, { "epoch": 0.27, "grad_norm": 1.880549776972307, "learning_rate": 8.604920032049075e-06, "loss": 0.5451, "step": 3754 }, { "epoch": 0.27, "grad_norm": 0.8477750250082801, "learning_rate": 8.604123619637585e-06, "loss": 0.4584, "step": 3755 }, { "epoch": 0.27, "grad_norm": 2.6204696684290933, "learning_rate": 8.603327016844073e-06, "loss": 0.5388, "step": 3756 }, { "epoch": 0.27, "grad_norm": 1.494624616234771, "learning_rate": 8.602530223710619e-06, "loss": 0.6469, "step": 3757 }, { "epoch": 0.27, "grad_norm": 1.7529979709601866, "learning_rate": 8.601733240279308e-06, "loss": 0.5307, "step": 3758 }, { "epoch": 0.27, "grad_norm": 1.6376235693132724, "learning_rate": 8.600936066592247e-06, "loss": 0.5933, "step": 3759 }, { "epoch": 0.27, "grad_norm": 2.173461684596855, "learning_rate": 8.600138702691538e-06, "loss": 0.5793, "step": 3760 }, { "epoch": 0.27, "grad_norm": 6.089061327928108, "learning_rate": 8.599341148619306e-06, "loss": 0.593, "step": 3761 }, { "epoch": 0.27, "grad_norm": 1.8191289446129948, "learning_rate": 8.598543404417678e-06, "loss": 0.5995, "step": 3762 }, { "epoch": 0.27, "grad_norm": 3.1441988238525727, "learning_rate": 8.597745470128794e-06, "loss": 0.5621, "step": 3763 }, { "epoch": 0.27, "grad_norm": 1.8071413387992534, "learning_rate": 8.596947345794801e-06, "loss": 0.5233, "step": 3764 }, { "epoch": 0.27, "grad_norm": 1.5440797659628793, "learning_rate": 8.596149031457865e-06, "loss": 0.531, "step": 3765 }, { "epoch": 0.27, "grad_norm": 2.0068733081174424, "learning_rate": 8.595350527160149e-06, "loss": 0.5612, "step": 3766 }, { "epoch": 0.27, "grad_norm": 1.8228956837951042, "learning_rate": 8.594551832943837e-06, "loss": 0.5501, "step": 3767 }, { "epoch": 0.27, "grad_norm": 2.4745031533624027, "learning_rate": 8.593752948851115e-06, "loss": 0.5942, "step": 3768 }, { "epoch": 0.27, "grad_norm": 2.4569583524023715, "learning_rate": 8.592953874924187e-06, "loss": 0.543, "step": 3769 }, { "epoch": 0.27, "grad_norm": 2.0780232098348477, "learning_rate": 8.59215461120526e-06, "loss": 0.627, "step": 3770 }, { "epoch": 0.27, "grad_norm": 1.485867233128486, "learning_rate": 8.591355157736555e-06, "loss": 0.6047, "step": 3771 }, { "epoch": 0.27, "grad_norm": 1.78619071927407, "learning_rate": 8.5905555145603e-06, "loss": 0.5193, "step": 3772 }, { "epoch": 0.27, "grad_norm": 1.7032473602452767, "learning_rate": 8.589755681718737e-06, "loss": 0.5146, "step": 3773 }, { "epoch": 0.27, "grad_norm": 1.707653761119075, "learning_rate": 8.588955659254117e-06, "loss": 0.5946, "step": 3774 }, { "epoch": 0.27, "grad_norm": 1.6047857631036928, "learning_rate": 8.588155447208695e-06, "loss": 0.5698, "step": 3775 }, { "epoch": 0.27, "grad_norm": 2.9266184126070436, "learning_rate": 8.587355045624746e-06, "loss": 0.57, "step": 3776 }, { "epoch": 0.27, "grad_norm": 1.5711485289073448, "learning_rate": 8.586554454544547e-06, "loss": 0.4948, "step": 3777 }, { "epoch": 0.27, "grad_norm": 3.6648091776827965, "learning_rate": 8.58575367401039e-06, "loss": 0.512, "step": 3778 }, { "epoch": 0.27, "grad_norm": 1.4927965316453047, "learning_rate": 8.584952704064572e-06, "loss": 0.5035, "step": 3779 }, { "epoch": 0.27, "grad_norm": 2.4557268915209147, "learning_rate": 8.584151544749407e-06, "loss": 0.5932, "step": 3780 }, { "epoch": 0.27, "grad_norm": 0.8295266162640058, "learning_rate": 8.583350196107209e-06, "loss": 0.4965, "step": 3781 }, { "epoch": 0.27, "grad_norm": 1.6084824433679665, "learning_rate": 8.582548658180314e-06, "loss": 0.5931, "step": 3782 }, { "epoch": 0.27, "grad_norm": 1.9301530304262977, "learning_rate": 8.581746931011058e-06, "loss": 0.5536, "step": 3783 }, { "epoch": 0.27, "grad_norm": 2.3148343399984417, "learning_rate": 8.580945014641795e-06, "loss": 0.5532, "step": 3784 }, { "epoch": 0.27, "grad_norm": 1.7396558761754162, "learning_rate": 8.580142909114881e-06, "loss": 0.652, "step": 3785 }, { "epoch": 0.27, "grad_norm": 1.8800748154454312, "learning_rate": 8.579340614472688e-06, "loss": 0.5488, "step": 3786 }, { "epoch": 0.27, "grad_norm": 1.5390932382544662, "learning_rate": 8.578538130757596e-06, "loss": 0.501, "step": 3787 }, { "epoch": 0.27, "grad_norm": 2.4376948941532373, "learning_rate": 8.577735458011994e-06, "loss": 0.5726, "step": 3788 }, { "epoch": 0.27, "grad_norm": 2.7578687527353734, "learning_rate": 8.57693259627828e-06, "loss": 0.5151, "step": 3789 }, { "epoch": 0.27, "grad_norm": 3.11395877438302, "learning_rate": 8.576129545598867e-06, "loss": 0.5914, "step": 3790 }, { "epoch": 0.27, "grad_norm": 1.7009293799988963, "learning_rate": 8.575326306016173e-06, "loss": 0.5136, "step": 3791 }, { "epoch": 0.27, "grad_norm": 2.566269272610906, "learning_rate": 8.57452287757263e-06, "loss": 0.5633, "step": 3792 }, { "epoch": 0.27, "grad_norm": 1.60928332987856, "learning_rate": 8.573719260310676e-06, "loss": 0.5138, "step": 3793 }, { "epoch": 0.27, "grad_norm": 1.781189315892919, "learning_rate": 8.572915454272762e-06, "loss": 0.5968, "step": 3794 }, { "epoch": 0.27, "grad_norm": 2.4190379140864304, "learning_rate": 8.572111459501346e-06, "loss": 0.5714, "step": 3795 }, { "epoch": 0.27, "grad_norm": 2.7533159365540176, "learning_rate": 8.571307276038898e-06, "loss": 0.4999, "step": 3796 }, { "epoch": 0.27, "grad_norm": 0.7874564564287987, "learning_rate": 8.5705029039279e-06, "loss": 0.4434, "step": 3797 }, { "epoch": 0.27, "grad_norm": 2.1754908626180747, "learning_rate": 8.56969834321084e-06, "loss": 0.6336, "step": 3798 }, { "epoch": 0.27, "grad_norm": 1.8820861037973569, "learning_rate": 8.568893593930217e-06, "loss": 0.541, "step": 3799 }, { "epoch": 0.27, "grad_norm": 1.6406993139989712, "learning_rate": 8.568088656128542e-06, "loss": 0.571, "step": 3800 }, { "epoch": 0.27, "grad_norm": 1.8096483607658562, "learning_rate": 8.567283529848333e-06, "loss": 0.5067, "step": 3801 }, { "epoch": 0.27, "grad_norm": 1.571279842775095, "learning_rate": 8.566478215132121e-06, "loss": 0.5589, "step": 3802 }, { "epoch": 0.27, "grad_norm": 1.7181970334415946, "learning_rate": 8.565672712022443e-06, "loss": 0.5143, "step": 3803 }, { "epoch": 0.27, "grad_norm": 1.5794291033700176, "learning_rate": 8.564867020561853e-06, "loss": 0.614, "step": 3804 }, { "epoch": 0.27, "grad_norm": 1.6888873843226395, "learning_rate": 8.564061140792905e-06, "loss": 0.5909, "step": 3805 }, { "epoch": 0.27, "grad_norm": 1.8383682005543078, "learning_rate": 8.563255072758174e-06, "loss": 0.619, "step": 3806 }, { "epoch": 0.27, "grad_norm": 1.619699238933111, "learning_rate": 8.562448816500234e-06, "loss": 0.5617, "step": 3807 }, { "epoch": 0.27, "grad_norm": 1.832593864032716, "learning_rate": 8.561642372061676e-06, "loss": 0.5444, "step": 3808 }, { "epoch": 0.27, "grad_norm": 1.5581582719820313, "learning_rate": 8.560835739485101e-06, "loss": 0.6104, "step": 3809 }, { "epoch": 0.27, "grad_norm": 1.778132074344884, "learning_rate": 8.560028918813115e-06, "loss": 0.5947, "step": 3810 }, { "epoch": 0.27, "grad_norm": 0.9323600435113633, "learning_rate": 8.559221910088338e-06, "loss": 0.4877, "step": 3811 }, { "epoch": 0.27, "grad_norm": 1.851837495849415, "learning_rate": 8.558414713353402e-06, "loss": 0.5353, "step": 3812 }, { "epoch": 0.27, "grad_norm": 1.636943974668802, "learning_rate": 8.55760732865094e-06, "loss": 0.541, "step": 3813 }, { "epoch": 0.27, "grad_norm": 1.7723075791422664, "learning_rate": 8.556799756023607e-06, "loss": 0.5618, "step": 3814 }, { "epoch": 0.27, "grad_norm": 0.7992390308027445, "learning_rate": 8.555991995514056e-06, "loss": 0.459, "step": 3815 }, { "epoch": 0.27, "grad_norm": 1.5088602109566804, "learning_rate": 8.555184047164961e-06, "loss": 0.503, "step": 3816 }, { "epoch": 0.27, "grad_norm": 1.7718530971660045, "learning_rate": 8.554375911018997e-06, "loss": 0.5972, "step": 3817 }, { "epoch": 0.27, "grad_norm": 0.7862736833270481, "learning_rate": 8.553567587118853e-06, "loss": 0.451, "step": 3818 }, { "epoch": 0.27, "grad_norm": 3.36103182655997, "learning_rate": 8.552759075507229e-06, "loss": 0.5339, "step": 3819 }, { "epoch": 0.27, "grad_norm": 1.7839866680797638, "learning_rate": 8.551950376226831e-06, "loss": 0.5399, "step": 3820 }, { "epoch": 0.27, "grad_norm": 1.7623223127474545, "learning_rate": 8.55114148932038e-06, "loss": 0.5581, "step": 3821 }, { "epoch": 0.27, "grad_norm": 2.8185050734153316, "learning_rate": 8.550332414830602e-06, "loss": 0.6203, "step": 3822 }, { "epoch": 0.27, "grad_norm": 1.6406746671890167, "learning_rate": 8.549523152800235e-06, "loss": 0.522, "step": 3823 }, { "epoch": 0.27, "grad_norm": 1.6488788624415496, "learning_rate": 8.548713703272029e-06, "loss": 0.5502, "step": 3824 }, { "epoch": 0.27, "grad_norm": 1.7197370923548907, "learning_rate": 8.547904066288738e-06, "loss": 0.5425, "step": 3825 }, { "epoch": 0.27, "grad_norm": 1.7148865951696295, "learning_rate": 8.547094241893135e-06, "loss": 0.5251, "step": 3826 }, { "epoch": 0.27, "grad_norm": 1.8466129668566087, "learning_rate": 8.546284230127994e-06, "loss": 0.5415, "step": 3827 }, { "epoch": 0.27, "grad_norm": 0.8178112851284088, "learning_rate": 8.545474031036102e-06, "loss": 0.4582, "step": 3828 }, { "epoch": 0.27, "grad_norm": 1.9323978237351618, "learning_rate": 8.544663644660261e-06, "loss": 0.5638, "step": 3829 }, { "epoch": 0.27, "grad_norm": 1.884021669557772, "learning_rate": 8.543853071043273e-06, "loss": 0.59, "step": 3830 }, { "epoch": 0.27, "grad_norm": 0.9114240533091512, "learning_rate": 8.54304231022796e-06, "loss": 0.4735, "step": 3831 }, { "epoch": 0.27, "grad_norm": 1.7679818359024413, "learning_rate": 8.542231362257144e-06, "loss": 0.5297, "step": 3832 }, { "epoch": 0.27, "grad_norm": 2.1708690414638694, "learning_rate": 8.541420227173666e-06, "loss": 0.552, "step": 3833 }, { "epoch": 0.27, "grad_norm": 2.2438519295705546, "learning_rate": 8.540608905020372e-06, "loss": 0.5875, "step": 3834 }, { "epoch": 0.27, "grad_norm": 2.213655710984006, "learning_rate": 8.539797395840117e-06, "loss": 0.587, "step": 3835 }, { "epoch": 0.27, "grad_norm": 1.8008708799469018, "learning_rate": 8.53898569967577e-06, "loss": 0.5484, "step": 3836 }, { "epoch": 0.27, "grad_norm": 2.0264993405539036, "learning_rate": 8.538173816570205e-06, "loss": 0.5089, "step": 3837 }, { "epoch": 0.27, "grad_norm": 2.8067602109380463, "learning_rate": 8.537361746566312e-06, "loss": 0.5395, "step": 3838 }, { "epoch": 0.27, "grad_norm": 1.4753260055086794, "learning_rate": 8.536549489706985e-06, "loss": 0.5459, "step": 3839 }, { "epoch": 0.27, "grad_norm": 1.8257957200420598, "learning_rate": 8.535737046035131e-06, "loss": 0.5921, "step": 3840 }, { "epoch": 0.27, "grad_norm": 1.7109200196763055, "learning_rate": 8.534924415593664e-06, "loss": 0.5327, "step": 3841 }, { "epoch": 0.27, "grad_norm": 1.3834273373720936, "learning_rate": 8.534111598425511e-06, "loss": 0.4793, "step": 3842 }, { "epoch": 0.27, "grad_norm": 1.7605628394301451, "learning_rate": 8.533298594573611e-06, "loss": 0.57, "step": 3843 }, { "epoch": 0.27, "grad_norm": 2.1854642283785393, "learning_rate": 8.532485404080905e-06, "loss": 0.5814, "step": 3844 }, { "epoch": 0.27, "grad_norm": 1.852546639449303, "learning_rate": 8.53167202699035e-06, "loss": 0.5651, "step": 3845 }, { "epoch": 0.27, "grad_norm": 1.788869111915241, "learning_rate": 8.530858463344913e-06, "loss": 0.6052, "step": 3846 }, { "epoch": 0.27, "grad_norm": 1.8451966123887178, "learning_rate": 8.530044713187566e-06, "loss": 0.5237, "step": 3847 }, { "epoch": 0.27, "grad_norm": 1.8552291128348275, "learning_rate": 8.529230776561297e-06, "loss": 0.5284, "step": 3848 }, { "epoch": 0.27, "grad_norm": 2.0832717237403875, "learning_rate": 8.5284166535091e-06, "loss": 0.6644, "step": 3849 }, { "epoch": 0.27, "grad_norm": 1.5108183483882374, "learning_rate": 8.527602344073979e-06, "loss": 0.5007, "step": 3850 }, { "epoch": 0.27, "grad_norm": 1.7247616673776254, "learning_rate": 8.526787848298947e-06, "loss": 0.5446, "step": 3851 }, { "epoch": 0.27, "grad_norm": 1.6720954318137098, "learning_rate": 8.525973166227033e-06, "loss": 0.5414, "step": 3852 }, { "epoch": 0.27, "grad_norm": 1.8452923596805926, "learning_rate": 8.525158297901268e-06, "loss": 0.563, "step": 3853 }, { "epoch": 0.27, "grad_norm": 1.4553929112887263, "learning_rate": 8.524343243364697e-06, "loss": 0.5259, "step": 3854 }, { "epoch": 0.27, "grad_norm": 1.591795924587381, "learning_rate": 8.523528002660374e-06, "loss": 0.5442, "step": 3855 }, { "epoch": 0.27, "grad_norm": 1.8604114879662283, "learning_rate": 8.522712575831363e-06, "loss": 0.5204, "step": 3856 }, { "epoch": 0.27, "grad_norm": 1.5923104445159237, "learning_rate": 8.521896962920735e-06, "loss": 0.5193, "step": 3857 }, { "epoch": 0.27, "grad_norm": 1.5970865677416415, "learning_rate": 8.521081163971579e-06, "loss": 0.561, "step": 3858 }, { "epoch": 0.27, "grad_norm": 3.0666850625361888, "learning_rate": 8.520265179026981e-06, "loss": 0.5682, "step": 3859 }, { "epoch": 0.27, "grad_norm": 1.7932012844764185, "learning_rate": 8.51944900813005e-06, "loss": 0.5419, "step": 3860 }, { "epoch": 0.27, "grad_norm": 1.8680871795908036, "learning_rate": 8.518632651323897e-06, "loss": 0.5482, "step": 3861 }, { "epoch": 0.27, "grad_norm": 1.760047374024755, "learning_rate": 8.517816108651644e-06, "loss": 0.6172, "step": 3862 }, { "epoch": 0.27, "grad_norm": 1.8698498765309115, "learning_rate": 8.516999380156426e-06, "loss": 0.6412, "step": 3863 }, { "epoch": 0.27, "grad_norm": 1.6892544366605822, "learning_rate": 8.516182465881381e-06, "loss": 0.583, "step": 3864 }, { "epoch": 0.27, "grad_norm": 1.7038972228868394, "learning_rate": 8.515365365869665e-06, "loss": 0.6086, "step": 3865 }, { "epoch": 0.27, "grad_norm": 0.8723614717023433, "learning_rate": 8.514548080164437e-06, "loss": 0.4765, "step": 3866 }, { "epoch": 0.27, "grad_norm": 1.5350372353366104, "learning_rate": 8.513730608808873e-06, "loss": 0.5429, "step": 3867 }, { "epoch": 0.27, "grad_norm": 1.5025155623128448, "learning_rate": 8.51291295184615e-06, "loss": 0.574, "step": 3868 }, { "epoch": 0.27, "grad_norm": 1.6465849786177045, "learning_rate": 8.512095109319464e-06, "loss": 0.5832, "step": 3869 }, { "epoch": 0.27, "grad_norm": 1.845589811718301, "learning_rate": 8.511277081272011e-06, "loss": 0.5945, "step": 3870 }, { "epoch": 0.27, "grad_norm": 1.8835233713200703, "learning_rate": 8.510458867747007e-06, "loss": 0.558, "step": 3871 }, { "epoch": 0.27, "grad_norm": 1.946078681308966, "learning_rate": 8.509640468787669e-06, "loss": 0.5718, "step": 3872 }, { "epoch": 0.27, "grad_norm": 1.8744379883570677, "learning_rate": 8.508821884437229e-06, "loss": 0.5953, "step": 3873 }, { "epoch": 0.27, "grad_norm": 1.9762076886863043, "learning_rate": 8.50800311473893e-06, "loss": 0.5926, "step": 3874 }, { "epoch": 0.27, "grad_norm": 1.5064085921660668, "learning_rate": 8.507184159736018e-06, "loss": 0.5242, "step": 3875 }, { "epoch": 0.28, "grad_norm": 1.8058809871413941, "learning_rate": 8.506365019471754e-06, "loss": 0.5793, "step": 3876 }, { "epoch": 0.28, "grad_norm": 1.6473431769773348, "learning_rate": 8.505545693989408e-06, "loss": 0.5097, "step": 3877 }, { "epoch": 0.28, "grad_norm": 1.9477424320572152, "learning_rate": 8.504726183332261e-06, "loss": 0.6623, "step": 3878 }, { "epoch": 0.28, "grad_norm": 1.6724791826762484, "learning_rate": 8.503906487543602e-06, "loss": 0.5732, "step": 3879 }, { "epoch": 0.28, "grad_norm": 1.4571240598881876, "learning_rate": 8.503086606666728e-06, "loss": 0.5174, "step": 3880 }, { "epoch": 0.28, "grad_norm": 1.52522568813604, "learning_rate": 8.502266540744951e-06, "loss": 0.5481, "step": 3881 }, { "epoch": 0.28, "grad_norm": 0.9011082182386291, "learning_rate": 8.501446289821588e-06, "loss": 0.4806, "step": 3882 }, { "epoch": 0.28, "grad_norm": 0.8111808156287608, "learning_rate": 8.500625853939965e-06, "loss": 0.471, "step": 3883 }, { "epoch": 0.28, "grad_norm": 1.511034379385225, "learning_rate": 8.499805233143425e-06, "loss": 0.5526, "step": 3884 }, { "epoch": 0.28, "grad_norm": 1.7561610770375933, "learning_rate": 8.498984427475315e-06, "loss": 0.5206, "step": 3885 }, { "epoch": 0.28, "grad_norm": 1.626637988146729, "learning_rate": 8.498163436978988e-06, "loss": 0.6187, "step": 3886 }, { "epoch": 0.28, "grad_norm": 1.445111640340236, "learning_rate": 8.497342261697816e-06, "loss": 0.5252, "step": 3887 }, { "epoch": 0.28, "grad_norm": 2.3743187200672953, "learning_rate": 8.496520901675175e-06, "loss": 0.5749, "step": 3888 }, { "epoch": 0.28, "grad_norm": 1.5949859922185785, "learning_rate": 8.495699356954454e-06, "loss": 0.6175, "step": 3889 }, { "epoch": 0.28, "grad_norm": 1.5550694736753177, "learning_rate": 8.494877627579046e-06, "loss": 0.4961, "step": 3890 }, { "epoch": 0.28, "grad_norm": 1.717102428138178, "learning_rate": 8.49405571359236e-06, "loss": 0.5395, "step": 3891 }, { "epoch": 0.28, "grad_norm": 1.7229315709351922, "learning_rate": 8.493233615037813e-06, "loss": 0.5364, "step": 3892 }, { "epoch": 0.28, "grad_norm": 1.7767331165377906, "learning_rate": 8.492411331958828e-06, "loss": 0.552, "step": 3893 }, { "epoch": 0.28, "grad_norm": 2.296947548331043, "learning_rate": 8.491588864398843e-06, "loss": 0.5626, "step": 3894 }, { "epoch": 0.28, "grad_norm": 1.518250241384418, "learning_rate": 8.490766212401303e-06, "loss": 0.4887, "step": 3895 }, { "epoch": 0.28, "grad_norm": 1.7248719971845967, "learning_rate": 8.489943376009661e-06, "loss": 0.5765, "step": 3896 }, { "epoch": 0.28, "grad_norm": 1.6281892075346627, "learning_rate": 8.489120355267388e-06, "loss": 0.6011, "step": 3897 }, { "epoch": 0.28, "grad_norm": 1.9997774144640543, "learning_rate": 8.488297150217954e-06, "loss": 0.5652, "step": 3898 }, { "epoch": 0.28, "grad_norm": 1.5699654832187757, "learning_rate": 8.487473760904846e-06, "loss": 0.5349, "step": 3899 }, { "epoch": 0.28, "grad_norm": 1.877927373492826, "learning_rate": 8.486650187371553e-06, "loss": 0.5748, "step": 3900 }, { "epoch": 0.28, "grad_norm": 1.7253889097844912, "learning_rate": 8.485826429661586e-06, "loss": 0.623, "step": 3901 }, { "epoch": 0.28, "grad_norm": 1.5622490818407206, "learning_rate": 8.485002487818454e-06, "loss": 0.5204, "step": 3902 }, { "epoch": 0.28, "grad_norm": 1.7386041839141897, "learning_rate": 8.484178361885683e-06, "loss": 0.6397, "step": 3903 }, { "epoch": 0.28, "grad_norm": 2.74408724570797, "learning_rate": 8.483354051906802e-06, "loss": 0.533, "step": 3904 }, { "epoch": 0.28, "grad_norm": 1.7955297498248448, "learning_rate": 8.48252955792536e-06, "loss": 0.559, "step": 3905 }, { "epoch": 0.28, "grad_norm": 1.6497575387211016, "learning_rate": 8.481704879984904e-06, "loss": 0.5891, "step": 3906 }, { "epoch": 0.28, "grad_norm": 2.912391218739193, "learning_rate": 8.480880018129001e-06, "loss": 0.6263, "step": 3907 }, { "epoch": 0.28, "grad_norm": 1.7998160165888166, "learning_rate": 8.48005497240122e-06, "loss": 0.5699, "step": 3908 }, { "epoch": 0.28, "grad_norm": 1.5270859332299838, "learning_rate": 8.479229742845141e-06, "loss": 0.548, "step": 3909 }, { "epoch": 0.28, "grad_norm": 1.4620700207737807, "learning_rate": 8.478404329504359e-06, "loss": 0.535, "step": 3910 }, { "epoch": 0.28, "grad_norm": 1.5574994813497776, "learning_rate": 8.477578732422474e-06, "loss": 0.5261, "step": 3911 }, { "epoch": 0.28, "grad_norm": 1.7367374415785726, "learning_rate": 8.476752951643099e-06, "loss": 0.5373, "step": 3912 }, { "epoch": 0.28, "grad_norm": 1.007821685652148, "learning_rate": 8.47592698720985e-06, "loss": 0.4709, "step": 3913 }, { "epoch": 0.28, "grad_norm": 1.494088857985598, "learning_rate": 8.475100839166359e-06, "loss": 0.5924, "step": 3914 }, { "epoch": 0.28, "grad_norm": 1.824779319289794, "learning_rate": 8.474274507556265e-06, "loss": 0.5928, "step": 3915 }, { "epoch": 0.28, "grad_norm": 1.7551377743456658, "learning_rate": 8.473447992423223e-06, "loss": 0.5624, "step": 3916 }, { "epoch": 0.28, "grad_norm": 6.120771567426235, "learning_rate": 8.472621293810886e-06, "loss": 0.5468, "step": 3917 }, { "epoch": 0.28, "grad_norm": 0.758453223922965, "learning_rate": 8.471794411762927e-06, "loss": 0.4498, "step": 3918 }, { "epoch": 0.28, "grad_norm": 4.975516555456688, "learning_rate": 8.470967346323023e-06, "loss": 0.601, "step": 3919 }, { "epoch": 0.28, "grad_norm": 1.5824313925397464, "learning_rate": 8.47014009753486e-06, "loss": 0.5819, "step": 3920 }, { "epoch": 0.28, "grad_norm": 2.662666106825579, "learning_rate": 8.469312665442141e-06, "loss": 0.5364, "step": 3921 }, { "epoch": 0.28, "grad_norm": 2.3776311231534644, "learning_rate": 8.468485050088571e-06, "loss": 0.5791, "step": 3922 }, { "epoch": 0.28, "grad_norm": 3.5041814939282467, "learning_rate": 8.467657251517866e-06, "loss": 0.5859, "step": 3923 }, { "epoch": 0.28, "grad_norm": 1.9676447211736459, "learning_rate": 8.466829269773757e-06, "loss": 0.4731, "step": 3924 }, { "epoch": 0.28, "grad_norm": 0.8649870130246029, "learning_rate": 8.466001104899978e-06, "loss": 0.451, "step": 3925 }, { "epoch": 0.28, "grad_norm": 1.7628024025302818, "learning_rate": 8.465172756940277e-06, "loss": 0.5385, "step": 3926 }, { "epoch": 0.28, "grad_norm": 1.7076830321863437, "learning_rate": 8.464344225938409e-06, "loss": 0.5693, "step": 3927 }, { "epoch": 0.28, "grad_norm": 0.768814420952646, "learning_rate": 8.463515511938139e-06, "loss": 0.4521, "step": 3928 }, { "epoch": 0.28, "grad_norm": 2.272510162161342, "learning_rate": 8.462686614983245e-06, "loss": 0.5533, "step": 3929 }, { "epoch": 0.28, "grad_norm": 1.6502344708222791, "learning_rate": 8.46185753511751e-06, "loss": 0.5067, "step": 3930 }, { "epoch": 0.28, "grad_norm": 1.612801447754268, "learning_rate": 8.461028272384731e-06, "loss": 0.5817, "step": 3931 }, { "epoch": 0.28, "grad_norm": 1.6191544535317755, "learning_rate": 8.46019882682871e-06, "loss": 0.5873, "step": 3932 }, { "epoch": 0.28, "grad_norm": 1.8253062870012653, "learning_rate": 8.459369198493261e-06, "loss": 0.6302, "step": 3933 }, { "epoch": 0.28, "grad_norm": 1.8381569561678566, "learning_rate": 8.458539387422211e-06, "loss": 0.5883, "step": 3934 }, { "epoch": 0.28, "grad_norm": 0.8546694064714919, "learning_rate": 8.45770939365939e-06, "loss": 0.445, "step": 3935 }, { "epoch": 0.28, "grad_norm": 1.887344553623164, "learning_rate": 8.456879217248643e-06, "loss": 0.5306, "step": 3936 }, { "epoch": 0.28, "grad_norm": 1.598145442201589, "learning_rate": 8.456048858233823e-06, "loss": 0.5321, "step": 3937 }, { "epoch": 0.28, "grad_norm": 1.6047801852214043, "learning_rate": 8.45521831665879e-06, "loss": 0.5298, "step": 3938 }, { "epoch": 0.28, "grad_norm": 2.1569446303112563, "learning_rate": 8.45438759256742e-06, "loss": 0.5746, "step": 3939 }, { "epoch": 0.28, "grad_norm": 1.5203002659017415, "learning_rate": 8.45355668600359e-06, "loss": 0.5476, "step": 3940 }, { "epoch": 0.28, "grad_norm": 1.8265986190107226, "learning_rate": 8.452725597011196e-06, "loss": 0.5699, "step": 3941 }, { "epoch": 0.28, "grad_norm": 1.3931560788630135, "learning_rate": 8.451894325634135e-06, "loss": 0.5585, "step": 3942 }, { "epoch": 0.28, "grad_norm": 1.7811256607720838, "learning_rate": 8.45106287191632e-06, "loss": 0.6562, "step": 3943 }, { "epoch": 0.28, "grad_norm": 1.5548688668389956, "learning_rate": 8.45023123590167e-06, "loss": 0.5232, "step": 3944 }, { "epoch": 0.28, "grad_norm": 0.8346341250724972, "learning_rate": 8.449399417634112e-06, "loss": 0.4693, "step": 3945 }, { "epoch": 0.28, "grad_norm": 1.5912678588200104, "learning_rate": 8.448567417157593e-06, "loss": 0.5796, "step": 3946 }, { "epoch": 0.28, "grad_norm": 1.8086171175386785, "learning_rate": 8.447735234516055e-06, "loss": 0.58, "step": 3947 }, { "epoch": 0.28, "grad_norm": 2.2436880983969254, "learning_rate": 8.446902869753462e-06, "loss": 0.5484, "step": 3948 }, { "epoch": 0.28, "grad_norm": 1.7065584330790715, "learning_rate": 8.446070322913777e-06, "loss": 0.5381, "step": 3949 }, { "epoch": 0.28, "grad_norm": 1.8642895398490482, "learning_rate": 8.445237594040982e-06, "loss": 0.6341, "step": 3950 }, { "epoch": 0.28, "grad_norm": 1.8245125139854665, "learning_rate": 8.444404683179063e-06, "loss": 0.5531, "step": 3951 }, { "epoch": 0.28, "grad_norm": 0.7587551332364196, "learning_rate": 8.443571590372017e-06, "loss": 0.4666, "step": 3952 }, { "epoch": 0.28, "grad_norm": 1.5948167325764442, "learning_rate": 8.442738315663852e-06, "loss": 0.518, "step": 3953 }, { "epoch": 0.28, "grad_norm": 1.616215754833975, "learning_rate": 8.441904859098583e-06, "loss": 0.5112, "step": 3954 }, { "epoch": 0.28, "grad_norm": 1.8309392390876416, "learning_rate": 8.441071220720237e-06, "loss": 0.6067, "step": 3955 }, { "epoch": 0.28, "grad_norm": 2.7529402545762207, "learning_rate": 8.440237400572849e-06, "loss": 0.563, "step": 3956 }, { "epoch": 0.28, "grad_norm": 1.599523928574332, "learning_rate": 8.439403398700464e-06, "loss": 0.508, "step": 3957 }, { "epoch": 0.28, "grad_norm": 1.6236762333599375, "learning_rate": 8.438569215147137e-06, "loss": 0.5234, "step": 3958 }, { "epoch": 0.28, "grad_norm": 2.3110297588151867, "learning_rate": 8.437734849956934e-06, "loss": 0.5694, "step": 3959 }, { "epoch": 0.28, "grad_norm": 1.6356690209199236, "learning_rate": 8.436900303173924e-06, "loss": 0.6264, "step": 3960 }, { "epoch": 0.28, "grad_norm": 2.3535695264636787, "learning_rate": 8.436065574842198e-06, "loss": 0.6027, "step": 3961 }, { "epoch": 0.28, "grad_norm": 1.8487447420515397, "learning_rate": 8.435230665005845e-06, "loss": 0.5813, "step": 3962 }, { "epoch": 0.28, "grad_norm": 1.6676239186164008, "learning_rate": 8.434395573708967e-06, "loss": 0.6328, "step": 3963 }, { "epoch": 0.28, "grad_norm": 2.0493847866688935, "learning_rate": 8.433560300995677e-06, "loss": 0.4732, "step": 3964 }, { "epoch": 0.28, "grad_norm": 1.8108122335139474, "learning_rate": 8.432724846910099e-06, "loss": 0.5873, "step": 3965 }, { "epoch": 0.28, "grad_norm": 1.5987070775906682, "learning_rate": 8.431889211496363e-06, "loss": 0.5984, "step": 3966 }, { "epoch": 0.28, "grad_norm": 1.6288448604635806, "learning_rate": 8.431053394798609e-06, "loss": 0.4614, "step": 3967 }, { "epoch": 0.28, "grad_norm": 2.380907759171672, "learning_rate": 8.43021739686099e-06, "loss": 0.5924, "step": 3968 }, { "epoch": 0.28, "grad_norm": 1.661946727218087, "learning_rate": 8.429381217727663e-06, "loss": 0.5288, "step": 3969 }, { "epoch": 0.28, "grad_norm": 0.8258645136090841, "learning_rate": 8.4285448574428e-06, "loss": 0.4597, "step": 3970 }, { "epoch": 0.28, "grad_norm": 1.797905676763181, "learning_rate": 8.42770831605058e-06, "loss": 0.5756, "step": 3971 }, { "epoch": 0.28, "grad_norm": 1.6991997043360314, "learning_rate": 8.426871593595193e-06, "loss": 0.5835, "step": 3972 }, { "epoch": 0.28, "grad_norm": 1.677133852266488, "learning_rate": 8.426034690120837e-06, "loss": 0.5363, "step": 3973 }, { "epoch": 0.28, "grad_norm": 1.8752645584682852, "learning_rate": 8.425197605671717e-06, "loss": 0.5359, "step": 3974 }, { "epoch": 0.28, "grad_norm": 1.8895212098596288, "learning_rate": 8.424360340292057e-06, "loss": 0.5312, "step": 3975 }, { "epoch": 0.28, "grad_norm": 1.7878730197700838, "learning_rate": 8.42352289402608e-06, "loss": 0.6166, "step": 3976 }, { "epoch": 0.28, "grad_norm": 0.7350754508180388, "learning_rate": 8.42268526691802e-06, "loss": 0.4804, "step": 3977 }, { "epoch": 0.28, "grad_norm": 1.4942816717868859, "learning_rate": 8.42184745901213e-06, "loss": 0.4688, "step": 3978 }, { "epoch": 0.28, "grad_norm": 1.4579405012475048, "learning_rate": 8.421009470352662e-06, "loss": 0.5443, "step": 3979 }, { "epoch": 0.28, "grad_norm": 1.6309329050698778, "learning_rate": 8.42017130098388e-06, "loss": 0.564, "step": 3980 }, { "epoch": 0.28, "grad_norm": 1.6936002756984099, "learning_rate": 8.419332950950061e-06, "loss": 0.5474, "step": 3981 }, { "epoch": 0.28, "grad_norm": 1.5813604606549683, "learning_rate": 8.418494420295491e-06, "loss": 0.5778, "step": 3982 }, { "epoch": 0.28, "grad_norm": 1.9129866474477308, "learning_rate": 8.417655709064461e-06, "loss": 0.5822, "step": 3983 }, { "epoch": 0.28, "grad_norm": 1.610502769212203, "learning_rate": 8.416816817301275e-06, "loss": 0.5782, "step": 3984 }, { "epoch": 0.28, "grad_norm": 1.7316237703167854, "learning_rate": 8.415977745050247e-06, "loss": 0.5445, "step": 3985 }, { "epoch": 0.28, "grad_norm": 2.2661168274225485, "learning_rate": 8.415138492355703e-06, "loss": 0.5267, "step": 3986 }, { "epoch": 0.28, "grad_norm": 1.8503971759713278, "learning_rate": 8.414299059261968e-06, "loss": 0.6599, "step": 3987 }, { "epoch": 0.28, "grad_norm": 1.921606759347505, "learning_rate": 8.41345944581339e-06, "loss": 0.5801, "step": 3988 }, { "epoch": 0.28, "grad_norm": 1.518327733287103, "learning_rate": 8.412619652054316e-06, "loss": 0.5137, "step": 3989 }, { "epoch": 0.28, "grad_norm": 1.7401412001000505, "learning_rate": 8.411779678029109e-06, "loss": 0.5586, "step": 3990 }, { "epoch": 0.28, "grad_norm": 1.6689117094180324, "learning_rate": 8.410939523782138e-06, "loss": 0.5798, "step": 3991 }, { "epoch": 0.28, "grad_norm": 1.608921073458193, "learning_rate": 8.410099189357783e-06, "loss": 0.549, "step": 3992 }, { "epoch": 0.28, "grad_norm": 0.8746926953931841, "learning_rate": 8.409258674800437e-06, "loss": 0.4551, "step": 3993 }, { "epoch": 0.28, "grad_norm": 0.7688932404609239, "learning_rate": 8.408417980154492e-06, "loss": 0.4491, "step": 3994 }, { "epoch": 0.28, "grad_norm": 1.6736915090796793, "learning_rate": 8.407577105464363e-06, "loss": 0.6347, "step": 3995 }, { "epoch": 0.28, "grad_norm": 1.834201287544821, "learning_rate": 8.406736050774462e-06, "loss": 0.5363, "step": 3996 }, { "epoch": 0.28, "grad_norm": 1.5855171945393083, "learning_rate": 8.405894816129222e-06, "loss": 0.5663, "step": 3997 }, { "epoch": 0.28, "grad_norm": 2.52855010657995, "learning_rate": 8.405053401573075e-06, "loss": 0.5572, "step": 3998 }, { "epoch": 0.28, "grad_norm": 1.6939178825061323, "learning_rate": 8.40421180715047e-06, "loss": 0.553, "step": 3999 }, { "epoch": 0.28, "grad_norm": 1.783722669013582, "learning_rate": 8.403370032905862e-06, "loss": 0.5737, "step": 4000 }, { "epoch": 0.28, "grad_norm": 2.573082758748703, "learning_rate": 8.402528078883719e-06, "loss": 0.5973, "step": 4001 }, { "epoch": 0.28, "grad_norm": 1.692190853386022, "learning_rate": 8.40168594512851e-06, "loss": 0.6174, "step": 4002 }, { "epoch": 0.28, "grad_norm": 0.8822906433063223, "learning_rate": 8.400843631684726e-06, "loss": 0.4572, "step": 4003 }, { "epoch": 0.28, "grad_norm": 1.6965229441380114, "learning_rate": 8.400001138596856e-06, "loss": 0.5511, "step": 4004 }, { "epoch": 0.28, "grad_norm": 1.4713655506288976, "learning_rate": 8.399158465909405e-06, "loss": 0.5387, "step": 4005 }, { "epoch": 0.28, "grad_norm": 1.3919539899412228, "learning_rate": 8.398315613666888e-06, "loss": 0.5249, "step": 4006 }, { "epoch": 0.28, "grad_norm": 1.7070397285381365, "learning_rate": 8.39747258191382e-06, "loss": 0.6144, "step": 4007 }, { "epoch": 0.28, "grad_norm": 1.5458134207901157, "learning_rate": 8.396629370694745e-06, "loss": 0.5575, "step": 4008 }, { "epoch": 0.28, "grad_norm": 1.7808046038645111, "learning_rate": 8.395785980054194e-06, "loss": 0.5256, "step": 4009 }, { "epoch": 0.28, "grad_norm": 1.432172348267557, "learning_rate": 8.39494241003672e-06, "loss": 0.5951, "step": 4010 }, { "epoch": 0.28, "grad_norm": 1.9276590628837833, "learning_rate": 8.394098660686884e-06, "loss": 0.5247, "step": 4011 }, { "epoch": 0.28, "grad_norm": 1.6475198730491059, "learning_rate": 8.393254732049259e-06, "loss": 0.5727, "step": 4012 }, { "epoch": 0.28, "grad_norm": 1.5890425304382039, "learning_rate": 8.39241062416842e-06, "loss": 0.603, "step": 4013 }, { "epoch": 0.28, "grad_norm": 1.5224802825709185, "learning_rate": 8.391566337088955e-06, "loss": 0.5591, "step": 4014 }, { "epoch": 0.28, "grad_norm": 1.8771937857811312, "learning_rate": 8.390721870855464e-06, "loss": 0.6913, "step": 4015 }, { "epoch": 0.28, "grad_norm": 1.6959555912894153, "learning_rate": 8.389877225512556e-06, "loss": 0.5441, "step": 4016 }, { "epoch": 0.29, "grad_norm": 4.129041137736984, "learning_rate": 8.389032401104845e-06, "loss": 0.6159, "step": 4017 }, { "epoch": 0.29, "grad_norm": 1.8311820613043999, "learning_rate": 8.388187397676961e-06, "loss": 0.5701, "step": 4018 }, { "epoch": 0.29, "grad_norm": 1.6698919660467186, "learning_rate": 8.387342215273535e-06, "loss": 0.5757, "step": 4019 }, { "epoch": 0.29, "grad_norm": 0.8299990858324686, "learning_rate": 8.38649685393922e-06, "loss": 0.4834, "step": 4020 }, { "epoch": 0.29, "grad_norm": 1.6203114327810626, "learning_rate": 8.385651313718662e-06, "loss": 0.5173, "step": 4021 }, { "epoch": 0.29, "grad_norm": 1.808593882326435, "learning_rate": 8.384805594656528e-06, "loss": 0.6565, "step": 4022 }, { "epoch": 0.29, "grad_norm": 1.893872488302302, "learning_rate": 8.383959696797498e-06, "loss": 0.5142, "step": 4023 }, { "epoch": 0.29, "grad_norm": 1.6008374920711723, "learning_rate": 8.383113620186247e-06, "loss": 0.5952, "step": 4024 }, { "epoch": 0.29, "grad_norm": 1.8134143618797658, "learning_rate": 8.382267364867473e-06, "loss": 0.6024, "step": 4025 }, { "epoch": 0.29, "grad_norm": 1.8130005739396458, "learning_rate": 8.381420930885876e-06, "loss": 0.6091, "step": 4026 }, { "epoch": 0.29, "grad_norm": 0.7948458039177386, "learning_rate": 8.380574318286168e-06, "loss": 0.4674, "step": 4027 }, { "epoch": 0.29, "grad_norm": 1.441637244693293, "learning_rate": 8.379727527113066e-06, "loss": 0.5354, "step": 4028 }, { "epoch": 0.29, "grad_norm": 1.5052493964780909, "learning_rate": 8.378880557411309e-06, "loss": 0.4993, "step": 4029 }, { "epoch": 0.29, "grad_norm": 2.036816078459009, "learning_rate": 8.378033409225627e-06, "loss": 0.5297, "step": 4030 }, { "epoch": 0.29, "grad_norm": 1.6309720788559503, "learning_rate": 8.377186082600776e-06, "loss": 0.5241, "step": 4031 }, { "epoch": 0.29, "grad_norm": 1.6016800468588481, "learning_rate": 8.376338577581513e-06, "loss": 0.5029, "step": 4032 }, { "epoch": 0.29, "grad_norm": 1.5833455797845262, "learning_rate": 8.375490894212607e-06, "loss": 0.5827, "step": 4033 }, { "epoch": 0.29, "grad_norm": 1.5817555522493298, "learning_rate": 8.374643032538832e-06, "loss": 0.5525, "step": 4034 }, { "epoch": 0.29, "grad_norm": 1.6676419961539768, "learning_rate": 8.373794992604978e-06, "loss": 0.5971, "step": 4035 }, { "epoch": 0.29, "grad_norm": 1.6407101931611179, "learning_rate": 8.37294677445584e-06, "loss": 0.5307, "step": 4036 }, { "epoch": 0.29, "grad_norm": 1.7829747704607677, "learning_rate": 8.372098378136226e-06, "loss": 0.5007, "step": 4037 }, { "epoch": 0.29, "grad_norm": 1.6369429228617751, "learning_rate": 8.371249803690948e-06, "loss": 0.5957, "step": 4038 }, { "epoch": 0.29, "grad_norm": 1.7893182738082392, "learning_rate": 8.370401051164832e-06, "loss": 0.5748, "step": 4039 }, { "epoch": 0.29, "grad_norm": 0.9998529409766825, "learning_rate": 8.369552120602713e-06, "loss": 0.4653, "step": 4040 }, { "epoch": 0.29, "grad_norm": 1.6014843490147543, "learning_rate": 8.368703012049432e-06, "loss": 0.5645, "step": 4041 }, { "epoch": 0.29, "grad_norm": 1.602674383453791, "learning_rate": 8.367853725549844e-06, "loss": 0.5021, "step": 4042 }, { "epoch": 0.29, "grad_norm": 2.18057624731136, "learning_rate": 8.36700426114881e-06, "loss": 0.5328, "step": 4043 }, { "epoch": 0.29, "grad_norm": 1.7475455916751526, "learning_rate": 8.366154618891203e-06, "loss": 0.5619, "step": 4044 }, { "epoch": 0.29, "grad_norm": 0.8193706706661844, "learning_rate": 8.3653047988219e-06, "loss": 0.4551, "step": 4045 }, { "epoch": 0.29, "grad_norm": 1.9166170012950614, "learning_rate": 8.364454800985797e-06, "loss": 0.5481, "step": 4046 }, { "epoch": 0.29, "grad_norm": 1.6903385298304876, "learning_rate": 8.36360462542779e-06, "loss": 0.5279, "step": 4047 }, { "epoch": 0.29, "grad_norm": 1.706970159522442, "learning_rate": 8.36275427219279e-06, "loss": 0.5683, "step": 4048 }, { "epoch": 0.29, "grad_norm": 1.9399402860496497, "learning_rate": 8.361903741325716e-06, "loss": 0.6469, "step": 4049 }, { "epoch": 0.29, "grad_norm": 1.4882637441868254, "learning_rate": 8.361053032871494e-06, "loss": 0.5279, "step": 4050 }, { "epoch": 0.29, "grad_norm": 1.4501051210945841, "learning_rate": 8.36020214687506e-06, "loss": 0.6091, "step": 4051 }, { "epoch": 0.29, "grad_norm": 0.811187473831592, "learning_rate": 8.359351083381364e-06, "loss": 0.4465, "step": 4052 }, { "epoch": 0.29, "grad_norm": 4.563611274422315, "learning_rate": 8.358499842435361e-06, "loss": 0.5835, "step": 4053 }, { "epoch": 0.29, "grad_norm": 0.8685622488220026, "learning_rate": 8.357648424082015e-06, "loss": 0.4611, "step": 4054 }, { "epoch": 0.29, "grad_norm": 1.474062514149165, "learning_rate": 8.356796828366303e-06, "loss": 0.5211, "step": 4055 }, { "epoch": 0.29, "grad_norm": 1.3786668605389012, "learning_rate": 8.355945055333207e-06, "loss": 0.5413, "step": 4056 }, { "epoch": 0.29, "grad_norm": 2.1682400213946407, "learning_rate": 8.355093105027723e-06, "loss": 0.6121, "step": 4057 }, { "epoch": 0.29, "grad_norm": 1.7047341521217991, "learning_rate": 8.354240977494852e-06, "loss": 0.6238, "step": 4058 }, { "epoch": 0.29, "grad_norm": 1.7724118439647396, "learning_rate": 8.353388672779607e-06, "loss": 0.5339, "step": 4059 }, { "epoch": 0.29, "grad_norm": 1.6951343825977745, "learning_rate": 8.35253619092701e-06, "loss": 0.563, "step": 4060 }, { "epoch": 0.29, "grad_norm": 1.9331577950577106, "learning_rate": 8.35168353198209e-06, "loss": 0.5805, "step": 4061 }, { "epoch": 0.29, "grad_norm": 1.6835839625034101, "learning_rate": 8.350830695989892e-06, "loss": 0.5152, "step": 4062 }, { "epoch": 0.29, "grad_norm": 0.7828155323290966, "learning_rate": 8.34997768299546e-06, "loss": 0.4543, "step": 4063 }, { "epoch": 0.29, "grad_norm": 1.7340155865912827, "learning_rate": 8.349124493043853e-06, "loss": 0.6282, "step": 4064 }, { "epoch": 0.29, "grad_norm": 1.6893213295076277, "learning_rate": 8.348271126180147e-06, "loss": 0.5495, "step": 4065 }, { "epoch": 0.29, "grad_norm": 1.720498452467447, "learning_rate": 8.347417582449411e-06, "loss": 0.5467, "step": 4066 }, { "epoch": 0.29, "grad_norm": 2.5315930944678313, "learning_rate": 8.346563861896737e-06, "loss": 0.4787, "step": 4067 }, { "epoch": 0.29, "grad_norm": 1.7561425127838546, "learning_rate": 8.345709964567222e-06, "loss": 0.579, "step": 4068 }, { "epoch": 0.29, "grad_norm": 1.616136042265237, "learning_rate": 8.344855890505967e-06, "loss": 0.5247, "step": 4069 }, { "epoch": 0.29, "grad_norm": 1.60879726860104, "learning_rate": 8.344001639758091e-06, "loss": 0.6333, "step": 4070 }, { "epoch": 0.29, "grad_norm": 1.7208072487855657, "learning_rate": 8.343147212368716e-06, "loss": 0.6693, "step": 4071 }, { "epoch": 0.29, "grad_norm": 2.012918535297233, "learning_rate": 8.342292608382977e-06, "loss": 0.569, "step": 4072 }, { "epoch": 0.29, "grad_norm": 1.6070901485426803, "learning_rate": 8.34143782784602e-06, "loss": 0.6281, "step": 4073 }, { "epoch": 0.29, "grad_norm": 1.7723969564215614, "learning_rate": 8.340582870802994e-06, "loss": 0.5468, "step": 4074 }, { "epoch": 0.29, "grad_norm": 1.7306295364803443, "learning_rate": 8.33972773729906e-06, "loss": 0.5522, "step": 4075 }, { "epoch": 0.29, "grad_norm": 1.6071001374093452, "learning_rate": 8.33887242737939e-06, "loss": 0.6152, "step": 4076 }, { "epoch": 0.29, "grad_norm": 1.7598936414064212, "learning_rate": 8.338016941089167e-06, "loss": 0.5662, "step": 4077 }, { "epoch": 0.29, "grad_norm": 2.1568654433272063, "learning_rate": 8.337161278473576e-06, "loss": 0.5323, "step": 4078 }, { "epoch": 0.29, "grad_norm": 2.0037071747572317, "learning_rate": 8.33630543957782e-06, "loss": 0.6723, "step": 4079 }, { "epoch": 0.29, "grad_norm": 1.5245329803549432, "learning_rate": 8.335449424447106e-06, "loss": 0.4889, "step": 4080 }, { "epoch": 0.29, "grad_norm": 1.5967757302811463, "learning_rate": 8.33459323312665e-06, "loss": 0.6005, "step": 4081 }, { "epoch": 0.29, "grad_norm": 1.5680846125588224, "learning_rate": 8.33373686566168e-06, "loss": 0.6203, "step": 4082 }, { "epoch": 0.29, "grad_norm": 1.8050437486633064, "learning_rate": 8.332880322097432e-06, "loss": 0.5773, "step": 4083 }, { "epoch": 0.29, "grad_norm": 1.3672771543109208, "learning_rate": 8.332023602479155e-06, "loss": 0.5277, "step": 4084 }, { "epoch": 0.29, "grad_norm": 0.9450297733274493, "learning_rate": 8.331166706852099e-06, "loss": 0.4704, "step": 4085 }, { "epoch": 0.29, "grad_norm": 8.472961704075287, "learning_rate": 8.330309635261529e-06, "loss": 0.5898, "step": 4086 }, { "epoch": 0.29, "grad_norm": 1.5245774890814165, "learning_rate": 8.32945238775272e-06, "loss": 0.5415, "step": 4087 }, { "epoch": 0.29, "grad_norm": 2.0345273225569636, "learning_rate": 8.328594964370957e-06, "loss": 0.5869, "step": 4088 }, { "epoch": 0.29, "grad_norm": 1.7491823282944452, "learning_rate": 8.327737365161527e-06, "loss": 0.5648, "step": 4089 }, { "epoch": 0.29, "grad_norm": 0.8522423883394311, "learning_rate": 8.326879590169732e-06, "loss": 0.4674, "step": 4090 }, { "epoch": 0.29, "grad_norm": 1.763586874804926, "learning_rate": 8.326021639440886e-06, "loss": 0.5025, "step": 4091 }, { "epoch": 0.29, "grad_norm": 2.0198517030866, "learning_rate": 8.325163513020307e-06, "loss": 0.5605, "step": 4092 }, { "epoch": 0.29, "grad_norm": 1.628033212624208, "learning_rate": 8.324305210953323e-06, "loss": 0.5832, "step": 4093 }, { "epoch": 0.29, "grad_norm": 1.6377785613614986, "learning_rate": 8.323446733285274e-06, "loss": 0.5093, "step": 4094 }, { "epoch": 0.29, "grad_norm": 1.525521517055867, "learning_rate": 8.322588080061506e-06, "loss": 0.5586, "step": 4095 }, { "epoch": 0.29, "grad_norm": 1.6797857632105606, "learning_rate": 8.321729251327379e-06, "loss": 0.5347, "step": 4096 }, { "epoch": 0.29, "grad_norm": 1.7589641907977436, "learning_rate": 8.320870247128257e-06, "loss": 0.5023, "step": 4097 }, { "epoch": 0.29, "grad_norm": 1.6197745062379005, "learning_rate": 8.320011067509515e-06, "loss": 0.5107, "step": 4098 }, { "epoch": 0.29, "grad_norm": 2.0996970864576308, "learning_rate": 8.31915171251654e-06, "loss": 0.5066, "step": 4099 }, { "epoch": 0.29, "grad_norm": 7.833243962681858, "learning_rate": 8.318292182194723e-06, "loss": 0.5562, "step": 4100 }, { "epoch": 0.29, "grad_norm": 1.6865095989599892, "learning_rate": 8.317432476589471e-06, "loss": 0.5656, "step": 4101 }, { "epoch": 0.29, "grad_norm": 1.7701389993132288, "learning_rate": 8.316572595746193e-06, "loss": 0.5896, "step": 4102 }, { "epoch": 0.29, "grad_norm": 2.0803635314486497, "learning_rate": 8.315712539710312e-06, "loss": 0.5443, "step": 4103 }, { "epoch": 0.29, "grad_norm": 1.6380505916811134, "learning_rate": 8.314852308527261e-06, "loss": 0.5053, "step": 4104 }, { "epoch": 0.29, "grad_norm": 1.6002680706945174, "learning_rate": 8.31399190224248e-06, "loss": 0.4818, "step": 4105 }, { "epoch": 0.29, "grad_norm": 1.8824503573752636, "learning_rate": 8.313131320901415e-06, "loss": 0.5624, "step": 4106 }, { "epoch": 0.29, "grad_norm": 3.3461044406946416, "learning_rate": 8.312270564549527e-06, "loss": 0.5217, "step": 4107 }, { "epoch": 0.29, "grad_norm": 1.770712909533739, "learning_rate": 8.311409633232286e-06, "loss": 0.51, "step": 4108 }, { "epoch": 0.29, "grad_norm": 1.5548306704945407, "learning_rate": 8.310548526995166e-06, "loss": 0.5922, "step": 4109 }, { "epoch": 0.29, "grad_norm": 0.7984810414335358, "learning_rate": 8.309687245883657e-06, "loss": 0.468, "step": 4110 }, { "epoch": 0.29, "grad_norm": 1.5025004032114275, "learning_rate": 8.308825789943251e-06, "loss": 0.5817, "step": 4111 }, { "epoch": 0.29, "grad_norm": 1.6244755862428146, "learning_rate": 8.307964159219455e-06, "loss": 0.6257, "step": 4112 }, { "epoch": 0.29, "grad_norm": 2.49584720937432, "learning_rate": 8.307102353757784e-06, "loss": 0.5124, "step": 4113 }, { "epoch": 0.29, "grad_norm": 2.086362009132171, "learning_rate": 8.306240373603761e-06, "loss": 0.5178, "step": 4114 }, { "epoch": 0.29, "grad_norm": 1.8283779148523402, "learning_rate": 8.305378218802918e-06, "loss": 0.576, "step": 4115 }, { "epoch": 0.29, "grad_norm": 1.844264055519146, "learning_rate": 8.304515889400798e-06, "loss": 0.5154, "step": 4116 }, { "epoch": 0.29, "grad_norm": 1.5221188986874985, "learning_rate": 8.30365338544295e-06, "loss": 0.5758, "step": 4117 }, { "epoch": 0.29, "grad_norm": 1.9206088393134741, "learning_rate": 8.302790706974937e-06, "loss": 0.5791, "step": 4118 }, { "epoch": 0.29, "grad_norm": 1.5452784917062203, "learning_rate": 8.301927854042326e-06, "loss": 0.6155, "step": 4119 }, { "epoch": 0.29, "grad_norm": 1.5935381546851894, "learning_rate": 8.3010648266907e-06, "loss": 0.5611, "step": 4120 }, { "epoch": 0.29, "grad_norm": 1.6801042740513492, "learning_rate": 8.300201624965642e-06, "loss": 0.4994, "step": 4121 }, { "epoch": 0.29, "grad_norm": 1.6942249255069135, "learning_rate": 8.299338248912752e-06, "loss": 0.5507, "step": 4122 }, { "epoch": 0.29, "grad_norm": 1.6278980817763506, "learning_rate": 8.298474698577637e-06, "loss": 0.5614, "step": 4123 }, { "epoch": 0.29, "grad_norm": 1.7807889265099264, "learning_rate": 8.29761097400591e-06, "loss": 0.6457, "step": 4124 }, { "epoch": 0.29, "grad_norm": 1.4840676387978882, "learning_rate": 8.296747075243199e-06, "loss": 0.5644, "step": 4125 }, { "epoch": 0.29, "grad_norm": 1.6047686403007575, "learning_rate": 8.295883002335137e-06, "loss": 0.5934, "step": 4126 }, { "epoch": 0.29, "grad_norm": 1.6310454121845173, "learning_rate": 8.295018755327365e-06, "loss": 0.5928, "step": 4127 }, { "epoch": 0.29, "grad_norm": 1.564294637774086, "learning_rate": 8.294154334265539e-06, "loss": 0.5698, "step": 4128 }, { "epoch": 0.29, "grad_norm": 2.0135685027527077, "learning_rate": 8.293289739195318e-06, "loss": 0.5104, "step": 4129 }, { "epoch": 0.29, "grad_norm": 1.4670726892515684, "learning_rate": 8.292424970162374e-06, "loss": 0.5445, "step": 4130 }, { "epoch": 0.29, "grad_norm": 1.7207675500827189, "learning_rate": 8.291560027212386e-06, "loss": 0.5814, "step": 4131 }, { "epoch": 0.29, "grad_norm": 1.9943184225422592, "learning_rate": 8.290694910391046e-06, "loss": 0.6088, "step": 4132 }, { "epoch": 0.29, "grad_norm": 1.450420516516994, "learning_rate": 8.28982961974405e-06, "loss": 0.5045, "step": 4133 }, { "epoch": 0.29, "grad_norm": 1.591027872393627, "learning_rate": 8.288964155317104e-06, "loss": 0.5445, "step": 4134 }, { "epoch": 0.29, "grad_norm": 1.6777776382350158, "learning_rate": 8.288098517155928e-06, "loss": 0.6044, "step": 4135 }, { "epoch": 0.29, "grad_norm": 1.5113751985555637, "learning_rate": 8.287232705306248e-06, "loss": 0.5857, "step": 4136 }, { "epoch": 0.29, "grad_norm": 1.6135071642686558, "learning_rate": 8.286366719813795e-06, "loss": 0.5402, "step": 4137 }, { "epoch": 0.29, "grad_norm": 5.181313733656596, "learning_rate": 8.285500560724317e-06, "loss": 0.5668, "step": 4138 }, { "epoch": 0.29, "grad_norm": 1.672200608491575, "learning_rate": 8.284634228083569e-06, "loss": 0.558, "step": 4139 }, { "epoch": 0.29, "grad_norm": 2.0314066053038857, "learning_rate": 8.28376772193731e-06, "loss": 0.5642, "step": 4140 }, { "epoch": 0.29, "grad_norm": 0.8139778185289855, "learning_rate": 8.282901042331311e-06, "loss": 0.4771, "step": 4141 }, { "epoch": 0.29, "grad_norm": 1.6225810118990223, "learning_rate": 8.282034189311358e-06, "loss": 0.5634, "step": 4142 }, { "epoch": 0.29, "grad_norm": 1.6581110255224374, "learning_rate": 8.281167162923236e-06, "loss": 0.6011, "step": 4143 }, { "epoch": 0.29, "grad_norm": 1.5184795827731392, "learning_rate": 8.280299963212749e-06, "loss": 0.5183, "step": 4144 }, { "epoch": 0.29, "grad_norm": 1.6513978768158977, "learning_rate": 8.279432590225698e-06, "loss": 0.4386, "step": 4145 }, { "epoch": 0.29, "grad_norm": 1.4860005664476885, "learning_rate": 8.278565044007908e-06, "loss": 0.5383, "step": 4146 }, { "epoch": 0.29, "grad_norm": 1.8013062184567505, "learning_rate": 8.277697324605205e-06, "loss": 0.5716, "step": 4147 }, { "epoch": 0.29, "grad_norm": 1.7150381784744386, "learning_rate": 8.27682943206342e-06, "loss": 0.6185, "step": 4148 }, { "epoch": 0.29, "grad_norm": 1.7854723183609789, "learning_rate": 8.275961366428403e-06, "loss": 0.5437, "step": 4149 }, { "epoch": 0.29, "grad_norm": 1.6113544747838433, "learning_rate": 8.275093127746003e-06, "loss": 0.5246, "step": 4150 }, { "epoch": 0.29, "grad_norm": 1.7206988325228054, "learning_rate": 8.274224716062089e-06, "loss": 0.5857, "step": 4151 }, { "epoch": 0.29, "grad_norm": 1.6105741410605208, "learning_rate": 8.273356131422533e-06, "loss": 0.5908, "step": 4152 }, { "epoch": 0.29, "grad_norm": 1.809310917055256, "learning_rate": 8.272487373873211e-06, "loss": 0.6126, "step": 4153 }, { "epoch": 0.29, "grad_norm": 1.4058128394862073, "learning_rate": 8.271618443460018e-06, "loss": 0.5659, "step": 4154 }, { "epoch": 0.29, "grad_norm": 2.0256917228512523, "learning_rate": 8.270749340228855e-06, "loss": 0.6181, "step": 4155 }, { "epoch": 0.29, "grad_norm": 1.5544592446142869, "learning_rate": 8.269880064225627e-06, "loss": 0.6261, "step": 4156 }, { "epoch": 0.29, "grad_norm": 1.4232600505157573, "learning_rate": 8.269010615496253e-06, "loss": 0.5521, "step": 4157 }, { "epoch": 0.3, "grad_norm": 1.8211907323115697, "learning_rate": 8.268140994086663e-06, "loss": 0.623, "step": 4158 }, { "epoch": 0.3, "grad_norm": 1.8713301798248745, "learning_rate": 8.267271200042792e-06, "loss": 0.5932, "step": 4159 }, { "epoch": 0.3, "grad_norm": 1.6874683261639822, "learning_rate": 8.266401233410585e-06, "loss": 0.5628, "step": 4160 }, { "epoch": 0.3, "grad_norm": 1.5580136383448697, "learning_rate": 8.265531094235997e-06, "loss": 0.5636, "step": 4161 }, { "epoch": 0.3, "grad_norm": 1.7939152210109308, "learning_rate": 8.26466078256499e-06, "loss": 0.5812, "step": 4162 }, { "epoch": 0.3, "grad_norm": 0.8805465098353324, "learning_rate": 8.26379029844354e-06, "loss": 0.4694, "step": 4163 }, { "epoch": 0.3, "grad_norm": 1.6306992913118648, "learning_rate": 8.262919641917626e-06, "loss": 0.578, "step": 4164 }, { "epoch": 0.3, "grad_norm": 1.618054579545835, "learning_rate": 8.26204881303324e-06, "loss": 0.5885, "step": 4165 }, { "epoch": 0.3, "grad_norm": 2.957971751574563, "learning_rate": 8.261177811836386e-06, "loss": 0.5854, "step": 4166 }, { "epoch": 0.3, "grad_norm": 1.64962488471696, "learning_rate": 8.260306638373065e-06, "loss": 0.5483, "step": 4167 }, { "epoch": 0.3, "grad_norm": 1.681156296000756, "learning_rate": 8.259435292689302e-06, "loss": 0.5266, "step": 4168 }, { "epoch": 0.3, "grad_norm": 2.2238159053441593, "learning_rate": 8.258563774831122e-06, "loss": 0.6135, "step": 4169 }, { "epoch": 0.3, "grad_norm": 1.7831033463205608, "learning_rate": 8.257692084844562e-06, "loss": 0.5523, "step": 4170 }, { "epoch": 0.3, "grad_norm": 1.9485285350069337, "learning_rate": 8.256820222775667e-06, "loss": 0.5598, "step": 4171 }, { "epoch": 0.3, "grad_norm": 1.659468480362964, "learning_rate": 8.255948188670493e-06, "loss": 0.6267, "step": 4172 }, { "epoch": 0.3, "grad_norm": 1.6062076541930679, "learning_rate": 8.255075982575102e-06, "loss": 0.5949, "step": 4173 }, { "epoch": 0.3, "grad_norm": 1.7605192474485087, "learning_rate": 8.25420360453557e-06, "loss": 0.5539, "step": 4174 }, { "epoch": 0.3, "grad_norm": 1.5306436737346203, "learning_rate": 8.253331054597974e-06, "loss": 0.5991, "step": 4175 }, { "epoch": 0.3, "grad_norm": 1.4960698789876439, "learning_rate": 8.252458332808411e-06, "loss": 0.5941, "step": 4176 }, { "epoch": 0.3, "grad_norm": 1.5532973188477233, "learning_rate": 8.251585439212975e-06, "loss": 0.5318, "step": 4177 }, { "epoch": 0.3, "grad_norm": 0.7384839412581263, "learning_rate": 8.25071237385778e-06, "loss": 0.4501, "step": 4178 }, { "epoch": 0.3, "grad_norm": 1.6589988343129711, "learning_rate": 8.249839136788942e-06, "loss": 0.5413, "step": 4179 }, { "epoch": 0.3, "grad_norm": 2.5953690359593615, "learning_rate": 8.24896572805259e-06, "loss": 0.5143, "step": 4180 }, { "epoch": 0.3, "grad_norm": 1.7398918107833146, "learning_rate": 8.248092147694858e-06, "loss": 0.5924, "step": 4181 }, { "epoch": 0.3, "grad_norm": 1.7930315917438435, "learning_rate": 8.247218395761891e-06, "loss": 0.4919, "step": 4182 }, { "epoch": 0.3, "grad_norm": 1.5766698561857364, "learning_rate": 8.246344472299847e-06, "loss": 0.6004, "step": 4183 }, { "epoch": 0.3, "grad_norm": 1.6331213742360968, "learning_rate": 8.24547037735489e-06, "loss": 0.5546, "step": 4184 }, { "epoch": 0.3, "grad_norm": 1.6150738681792824, "learning_rate": 8.24459611097319e-06, "loss": 0.5925, "step": 4185 }, { "epoch": 0.3, "grad_norm": 1.6684522297024726, "learning_rate": 8.243721673200927e-06, "loss": 0.6121, "step": 4186 }, { "epoch": 0.3, "grad_norm": 1.8655567828720996, "learning_rate": 8.242847064084294e-06, "loss": 0.5874, "step": 4187 }, { "epoch": 0.3, "grad_norm": 1.6653990070026172, "learning_rate": 8.241972283669491e-06, "loss": 0.4547, "step": 4188 }, { "epoch": 0.3, "grad_norm": 1.8035491822098664, "learning_rate": 8.241097332002727e-06, "loss": 0.5524, "step": 4189 }, { "epoch": 0.3, "grad_norm": 1.7536452237578264, "learning_rate": 8.24022220913022e-06, "loss": 0.5854, "step": 4190 }, { "epoch": 0.3, "grad_norm": 1.4971351313299557, "learning_rate": 8.239346915098196e-06, "loss": 0.5642, "step": 4191 }, { "epoch": 0.3, "grad_norm": 1.695306458457725, "learning_rate": 8.238471449952892e-06, "loss": 0.5534, "step": 4192 }, { "epoch": 0.3, "grad_norm": 1.8606063001152102, "learning_rate": 8.237595813740553e-06, "loss": 0.5855, "step": 4193 }, { "epoch": 0.3, "grad_norm": 1.6160097951867147, "learning_rate": 8.236720006507432e-06, "loss": 0.6048, "step": 4194 }, { "epoch": 0.3, "grad_norm": 1.5655950978399322, "learning_rate": 8.235844028299793e-06, "loss": 0.5577, "step": 4195 }, { "epoch": 0.3, "grad_norm": 1.9865616726535127, "learning_rate": 8.234967879163907e-06, "loss": 0.5663, "step": 4196 }, { "epoch": 0.3, "grad_norm": 1.5395279151810901, "learning_rate": 8.234091559146057e-06, "loss": 0.5886, "step": 4197 }, { "epoch": 0.3, "grad_norm": 2.4834580394407877, "learning_rate": 8.233215068292533e-06, "loss": 0.5986, "step": 4198 }, { "epoch": 0.3, "grad_norm": 1.6650319016640125, "learning_rate": 8.232338406649632e-06, "loss": 0.5946, "step": 4199 }, { "epoch": 0.3, "grad_norm": 1.7706518959997848, "learning_rate": 8.231461574263663e-06, "loss": 0.5231, "step": 4200 }, { "epoch": 0.3, "grad_norm": 1.681933722108426, "learning_rate": 8.230584571180947e-06, "loss": 0.6473, "step": 4201 }, { "epoch": 0.3, "grad_norm": 1.7714317340805206, "learning_rate": 8.229707397447806e-06, "loss": 0.5525, "step": 4202 }, { "epoch": 0.3, "grad_norm": 1.6491001523680766, "learning_rate": 8.228830053110575e-06, "loss": 0.5644, "step": 4203 }, { "epoch": 0.3, "grad_norm": 1.6971035318867842, "learning_rate": 8.2279525382156e-06, "loss": 0.6269, "step": 4204 }, { "epoch": 0.3, "grad_norm": 2.010449230676553, "learning_rate": 8.227074852809236e-06, "loss": 0.5874, "step": 4205 }, { "epoch": 0.3, "grad_norm": 1.4831482196527084, "learning_rate": 8.226196996937843e-06, "loss": 0.5829, "step": 4206 }, { "epoch": 0.3, "grad_norm": 1.4061421100911646, "learning_rate": 8.22531897064779e-06, "loss": 0.5415, "step": 4207 }, { "epoch": 0.3, "grad_norm": 1.7465015461277564, "learning_rate": 8.224440773985464e-06, "loss": 0.5093, "step": 4208 }, { "epoch": 0.3, "grad_norm": 1.4933868008570563, "learning_rate": 8.223562406997247e-06, "loss": 0.5438, "step": 4209 }, { "epoch": 0.3, "grad_norm": 1.5726636849504292, "learning_rate": 8.222683869729544e-06, "loss": 0.5664, "step": 4210 }, { "epoch": 0.3, "grad_norm": 1.561597600549059, "learning_rate": 8.221805162228758e-06, "loss": 0.585, "step": 4211 }, { "epoch": 0.3, "grad_norm": 7.0476427591235185, "learning_rate": 8.220926284541305e-06, "loss": 0.5294, "step": 4212 }, { "epoch": 0.3, "grad_norm": 0.9760640543387811, "learning_rate": 8.220047236713612e-06, "loss": 0.4836, "step": 4213 }, { "epoch": 0.3, "grad_norm": 1.7365932216629094, "learning_rate": 8.219168018792114e-06, "loss": 0.6634, "step": 4214 }, { "epoch": 0.3, "grad_norm": 1.9581582190775046, "learning_rate": 8.218288630823252e-06, "loss": 0.5692, "step": 4215 }, { "epoch": 0.3, "grad_norm": 1.789445125597473, "learning_rate": 8.21740907285348e-06, "loss": 0.5079, "step": 4216 }, { "epoch": 0.3, "grad_norm": 19.96682884616191, "learning_rate": 8.216529344929261e-06, "loss": 0.5791, "step": 4217 }, { "epoch": 0.3, "grad_norm": 0.8165971441561088, "learning_rate": 8.21564944709706e-06, "loss": 0.4451, "step": 4218 }, { "epoch": 0.3, "grad_norm": 1.6431279924625684, "learning_rate": 8.214769379403358e-06, "loss": 0.6149, "step": 4219 }, { "epoch": 0.3, "grad_norm": 1.6587493891304463, "learning_rate": 8.213889141894647e-06, "loss": 0.4625, "step": 4220 }, { "epoch": 0.3, "grad_norm": 1.7246095943061173, "learning_rate": 8.21300873461742e-06, "loss": 0.5338, "step": 4221 }, { "epoch": 0.3, "grad_norm": 1.5979234154850817, "learning_rate": 8.212128157618185e-06, "loss": 0.5434, "step": 4222 }, { "epoch": 0.3, "grad_norm": 1.5481385319371779, "learning_rate": 8.211247410943458e-06, "loss": 0.6017, "step": 4223 }, { "epoch": 0.3, "grad_norm": 1.6912640041680673, "learning_rate": 8.210366494639759e-06, "loss": 0.6101, "step": 4224 }, { "epoch": 0.3, "grad_norm": 1.5993382510421668, "learning_rate": 8.209485408753625e-06, "loss": 0.5677, "step": 4225 }, { "epoch": 0.3, "grad_norm": 0.8327717151420451, "learning_rate": 8.208604153331597e-06, "loss": 0.46, "step": 4226 }, { "epoch": 0.3, "grad_norm": 1.7811241456449773, "learning_rate": 8.207722728420225e-06, "loss": 0.5282, "step": 4227 }, { "epoch": 0.3, "grad_norm": 1.6646032140518985, "learning_rate": 8.206841134066067e-06, "loss": 0.5784, "step": 4228 }, { "epoch": 0.3, "grad_norm": 0.7400769419798904, "learning_rate": 8.205959370315696e-06, "loss": 0.4662, "step": 4229 }, { "epoch": 0.3, "grad_norm": 2.3498507275451423, "learning_rate": 8.205077437215687e-06, "loss": 0.4991, "step": 4230 }, { "epoch": 0.3, "grad_norm": 1.545520681577717, "learning_rate": 8.204195334812629e-06, "loss": 0.5724, "step": 4231 }, { "epoch": 0.3, "grad_norm": 1.740156727485627, "learning_rate": 8.203313063153115e-06, "loss": 0.5946, "step": 4232 }, { "epoch": 0.3, "grad_norm": 2.033725873031003, "learning_rate": 8.202430622283751e-06, "loss": 0.5333, "step": 4233 }, { "epoch": 0.3, "grad_norm": 1.4254601630298347, "learning_rate": 8.20154801225115e-06, "loss": 0.5258, "step": 4234 }, { "epoch": 0.3, "grad_norm": 1.8425489787451375, "learning_rate": 8.200665233101934e-06, "loss": 0.5721, "step": 4235 }, { "epoch": 0.3, "grad_norm": 1.6052046750993954, "learning_rate": 8.199782284882737e-06, "loss": 0.5553, "step": 4236 }, { "epoch": 0.3, "grad_norm": 0.7653488722005614, "learning_rate": 8.198899167640195e-06, "loss": 0.4499, "step": 4237 }, { "epoch": 0.3, "grad_norm": 1.4551314111530251, "learning_rate": 8.198015881420961e-06, "loss": 0.5398, "step": 4238 }, { "epoch": 0.3, "grad_norm": 1.8582049839617305, "learning_rate": 8.197132426271691e-06, "loss": 0.6317, "step": 4239 }, { "epoch": 0.3, "grad_norm": 0.8252698423707818, "learning_rate": 8.196248802239056e-06, "loss": 0.4362, "step": 4240 }, { "epoch": 0.3, "grad_norm": 1.5950030675106568, "learning_rate": 8.195365009369725e-06, "loss": 0.5302, "step": 4241 }, { "epoch": 0.3, "grad_norm": 2.3532132951183833, "learning_rate": 8.194481047710388e-06, "loss": 0.5851, "step": 4242 }, { "epoch": 0.3, "grad_norm": 1.5756281479449123, "learning_rate": 8.193596917307738e-06, "loss": 0.6134, "step": 4243 }, { "epoch": 0.3, "grad_norm": 1.7171837602839939, "learning_rate": 8.192712618208475e-06, "loss": 0.552, "step": 4244 }, { "epoch": 0.3, "grad_norm": 2.7593316860099097, "learning_rate": 8.191828150459316e-06, "loss": 0.6062, "step": 4245 }, { "epoch": 0.3, "grad_norm": 0.7696611211097445, "learning_rate": 8.19094351410698e-06, "loss": 0.4915, "step": 4246 }, { "epoch": 0.3, "grad_norm": 1.8138343372343733, "learning_rate": 8.190058709198193e-06, "loss": 0.5292, "step": 4247 }, { "epoch": 0.3, "grad_norm": 1.607835255615697, "learning_rate": 8.189173735779695e-06, "loss": 0.5725, "step": 4248 }, { "epoch": 0.3, "grad_norm": 1.6014220462689603, "learning_rate": 8.188288593898235e-06, "loss": 0.4691, "step": 4249 }, { "epoch": 0.3, "grad_norm": 1.9292006668606876, "learning_rate": 8.187403283600568e-06, "loss": 0.5571, "step": 4250 }, { "epoch": 0.3, "grad_norm": 1.839405984841153, "learning_rate": 8.18651780493346e-06, "loss": 0.5172, "step": 4251 }, { "epoch": 0.3, "grad_norm": 1.8676733100138836, "learning_rate": 8.185632157943682e-06, "loss": 0.5508, "step": 4252 }, { "epoch": 0.3, "grad_norm": 1.6414132682658147, "learning_rate": 8.18474634267802e-06, "loss": 0.5701, "step": 4253 }, { "epoch": 0.3, "grad_norm": 1.6107034650566234, "learning_rate": 8.183860359183264e-06, "loss": 0.5234, "step": 4254 }, { "epoch": 0.3, "grad_norm": 1.5519368281673547, "learning_rate": 8.182974207506218e-06, "loss": 0.5079, "step": 4255 }, { "epoch": 0.3, "grad_norm": 1.450720658454228, "learning_rate": 8.182087887693687e-06, "loss": 0.561, "step": 4256 }, { "epoch": 0.3, "grad_norm": 1.5393574885034085, "learning_rate": 8.181201399792492e-06, "loss": 0.5222, "step": 4257 }, { "epoch": 0.3, "grad_norm": 1.5434876223080358, "learning_rate": 8.180314743849459e-06, "loss": 0.5537, "step": 4258 }, { "epoch": 0.3, "grad_norm": 1.6256919191143686, "learning_rate": 8.179427919911425e-06, "loss": 0.5386, "step": 4259 }, { "epoch": 0.3, "grad_norm": 2.0980988172323096, "learning_rate": 8.178540928025236e-06, "loss": 0.5033, "step": 4260 }, { "epoch": 0.3, "grad_norm": 2.0611411734553857, "learning_rate": 8.177653768237744e-06, "loss": 0.5033, "step": 4261 }, { "epoch": 0.3, "grad_norm": 1.4218449186715503, "learning_rate": 8.176766440595812e-06, "loss": 0.4939, "step": 4262 }, { "epoch": 0.3, "grad_norm": 1.9467803163175568, "learning_rate": 8.175878945146312e-06, "loss": 0.5442, "step": 4263 }, { "epoch": 0.3, "grad_norm": 0.8404212571999479, "learning_rate": 8.174991281936127e-06, "loss": 0.4567, "step": 4264 }, { "epoch": 0.3, "grad_norm": 1.4710047748911266, "learning_rate": 8.174103451012142e-06, "loss": 0.5364, "step": 4265 }, { "epoch": 0.3, "grad_norm": 2.519682572781901, "learning_rate": 8.173215452421259e-06, "loss": 0.6639, "step": 4266 }, { "epoch": 0.3, "grad_norm": 0.8517253187394633, "learning_rate": 8.172327286210382e-06, "loss": 0.4938, "step": 4267 }, { "epoch": 0.3, "grad_norm": 1.637167819783796, "learning_rate": 8.17143895242643e-06, "loss": 0.575, "step": 4268 }, { "epoch": 0.3, "grad_norm": 1.6311873938870587, "learning_rate": 8.170550451116325e-06, "loss": 0.5171, "step": 4269 }, { "epoch": 0.3, "grad_norm": 5.593648480827877, "learning_rate": 8.169661782327002e-06, "loss": 0.5767, "step": 4270 }, { "epoch": 0.3, "grad_norm": 1.7775731214289612, "learning_rate": 8.168772946105403e-06, "loss": 0.5248, "step": 4271 }, { "epoch": 0.3, "grad_norm": 1.886576981047356, "learning_rate": 8.167883942498482e-06, "loss": 0.5555, "step": 4272 }, { "epoch": 0.3, "grad_norm": 2.008619813638456, "learning_rate": 8.166994771553194e-06, "loss": 0.6, "step": 4273 }, { "epoch": 0.3, "grad_norm": 1.618627618842228, "learning_rate": 8.166105433316513e-06, "loss": 0.5722, "step": 4274 }, { "epoch": 0.3, "grad_norm": 1.867981150947837, "learning_rate": 8.165215927835413e-06, "loss": 0.6059, "step": 4275 }, { "epoch": 0.3, "grad_norm": 1.5202703025353805, "learning_rate": 8.164326255156883e-06, "loss": 0.5189, "step": 4276 }, { "epoch": 0.3, "grad_norm": 1.8681792848090986, "learning_rate": 8.163436415327919e-06, "loss": 0.549, "step": 4277 }, { "epoch": 0.3, "grad_norm": 1.996835077982992, "learning_rate": 8.162546408395524e-06, "loss": 0.5401, "step": 4278 }, { "epoch": 0.3, "grad_norm": 2.2644138093798114, "learning_rate": 8.16165623440671e-06, "loss": 0.5474, "step": 4279 }, { "epoch": 0.3, "grad_norm": 1.8336811889449225, "learning_rate": 8.160765893408502e-06, "loss": 0.5762, "step": 4280 }, { "epoch": 0.3, "grad_norm": 1.5400338940958636, "learning_rate": 8.15987538544793e-06, "loss": 0.545, "step": 4281 }, { "epoch": 0.3, "grad_norm": 3.1566621188646162, "learning_rate": 8.158984710572032e-06, "loss": 0.5775, "step": 4282 }, { "epoch": 0.3, "grad_norm": 2.2225292244686936, "learning_rate": 8.158093868827857e-06, "loss": 0.5192, "step": 4283 }, { "epoch": 0.3, "grad_norm": 1.3858511557648974, "learning_rate": 8.157202860262463e-06, "loss": 0.5349, "step": 4284 }, { "epoch": 0.3, "grad_norm": 1.5757117334566655, "learning_rate": 8.156311684922917e-06, "loss": 0.542, "step": 4285 }, { "epoch": 0.3, "grad_norm": 1.5637551674146335, "learning_rate": 8.155420342856292e-06, "loss": 0.5485, "step": 4286 }, { "epoch": 0.3, "grad_norm": 1.8961452206644958, "learning_rate": 8.154528834109674e-06, "loss": 0.507, "step": 4287 }, { "epoch": 0.3, "grad_norm": 1.652404428830408, "learning_rate": 8.153637158730151e-06, "loss": 0.5513, "step": 4288 }, { "epoch": 0.3, "grad_norm": 1.6868275649135815, "learning_rate": 8.152745316764829e-06, "loss": 0.5195, "step": 4289 }, { "epoch": 0.3, "grad_norm": 1.5732812232491098, "learning_rate": 8.151853308260816e-06, "loss": 0.5821, "step": 4290 }, { "epoch": 0.3, "grad_norm": 1.533875686881365, "learning_rate": 8.150961133265232e-06, "loss": 0.5366, "step": 4291 }, { "epoch": 0.3, "grad_norm": 1.4920170387571963, "learning_rate": 8.150068791825202e-06, "loss": 0.542, "step": 4292 }, { "epoch": 0.3, "grad_norm": 1.769599622702397, "learning_rate": 8.149176283987868e-06, "loss": 0.6213, "step": 4293 }, { "epoch": 0.3, "grad_norm": 1.652577711270736, "learning_rate": 8.148283609800369e-06, "loss": 0.6398, "step": 4294 }, { "epoch": 0.3, "grad_norm": 1.6387522681487596, "learning_rate": 8.147390769309863e-06, "loss": 0.4773, "step": 4295 }, { "epoch": 0.3, "grad_norm": 1.5373937683951593, "learning_rate": 8.146497762563512e-06, "loss": 0.5968, "step": 4296 }, { "epoch": 0.3, "grad_norm": 1.5967383973748752, "learning_rate": 8.145604589608487e-06, "loss": 0.5781, "step": 4297 }, { "epoch": 0.3, "grad_norm": 1.510701780990691, "learning_rate": 8.14471125049197e-06, "loss": 0.5184, "step": 4298 }, { "epoch": 0.31, "grad_norm": 1.5535092599365121, "learning_rate": 8.143817745261147e-06, "loss": 0.5519, "step": 4299 }, { "epoch": 0.31, "grad_norm": 0.7549774053823383, "learning_rate": 8.142924073963219e-06, "loss": 0.4817, "step": 4300 }, { "epoch": 0.31, "grad_norm": 1.8057355273154736, "learning_rate": 8.142030236645391e-06, "loss": 0.525, "step": 4301 }, { "epoch": 0.31, "grad_norm": 1.6917808065446482, "learning_rate": 8.141136233354881e-06, "loss": 0.6773, "step": 4302 }, { "epoch": 0.31, "grad_norm": 1.4648906775816386, "learning_rate": 8.140242064138907e-06, "loss": 0.5298, "step": 4303 }, { "epoch": 0.31, "grad_norm": 1.6324164427227914, "learning_rate": 8.13934772904471e-06, "loss": 0.5722, "step": 4304 }, { "epoch": 0.31, "grad_norm": 1.645317849089794, "learning_rate": 8.13845322811953e-06, "loss": 0.5837, "step": 4305 }, { "epoch": 0.31, "grad_norm": 1.5363764928897885, "learning_rate": 8.137558561410615e-06, "loss": 0.5659, "step": 4306 }, { "epoch": 0.31, "grad_norm": 1.883059435769436, "learning_rate": 8.136663728965225e-06, "loss": 0.5584, "step": 4307 }, { "epoch": 0.31, "grad_norm": 1.4677561176895273, "learning_rate": 8.135768730830627e-06, "loss": 0.5098, "step": 4308 }, { "epoch": 0.31, "grad_norm": 1.7454917171214832, "learning_rate": 8.1348735670541e-06, "loss": 0.6005, "step": 4309 }, { "epoch": 0.31, "grad_norm": 1.5148017788920904, "learning_rate": 8.133978237682929e-06, "loss": 0.5031, "step": 4310 }, { "epoch": 0.31, "grad_norm": 1.4807678415021261, "learning_rate": 8.133082742764407e-06, "loss": 0.5702, "step": 4311 }, { "epoch": 0.31, "grad_norm": 1.9473736359406637, "learning_rate": 8.132187082345839e-06, "loss": 0.537, "step": 4312 }, { "epoch": 0.31, "grad_norm": 1.482190164618764, "learning_rate": 8.131291256474536e-06, "loss": 0.4469, "step": 4313 }, { "epoch": 0.31, "grad_norm": 1.4793252157669619, "learning_rate": 8.130395265197819e-06, "loss": 0.5628, "step": 4314 }, { "epoch": 0.31, "grad_norm": 1.830228702671688, "learning_rate": 8.129499108563016e-06, "loss": 0.6829, "step": 4315 }, { "epoch": 0.31, "grad_norm": 1.8170147770072045, "learning_rate": 8.128602786617468e-06, "loss": 0.536, "step": 4316 }, { "epoch": 0.31, "grad_norm": 1.6314135647465575, "learning_rate": 8.127706299408518e-06, "loss": 0.6166, "step": 4317 }, { "epoch": 0.31, "grad_norm": 1.6036639706001015, "learning_rate": 8.126809646983522e-06, "loss": 0.5969, "step": 4318 }, { "epoch": 0.31, "grad_norm": 1.5116670755484711, "learning_rate": 8.125912829389848e-06, "loss": 0.5258, "step": 4319 }, { "epoch": 0.31, "grad_norm": 1.593244433711856, "learning_rate": 8.125015846674864e-06, "loss": 0.5569, "step": 4320 }, { "epoch": 0.31, "grad_norm": 1.795551469012507, "learning_rate": 8.124118698885955e-06, "loss": 0.5996, "step": 4321 }, { "epoch": 0.31, "grad_norm": 1.8392142941702296, "learning_rate": 8.12322138607051e-06, "loss": 0.5044, "step": 4322 }, { "epoch": 0.31, "grad_norm": 1.4514057962638305, "learning_rate": 8.12232390827593e-06, "loss": 0.5987, "step": 4323 }, { "epoch": 0.31, "grad_norm": 1.7045681567609239, "learning_rate": 8.121426265549619e-06, "loss": 0.575, "step": 4324 }, { "epoch": 0.31, "grad_norm": 2.200218573753405, "learning_rate": 8.120528457938998e-06, "loss": 0.4767, "step": 4325 }, { "epoch": 0.31, "grad_norm": 1.6328214606599796, "learning_rate": 8.119630485491487e-06, "loss": 0.5858, "step": 4326 }, { "epoch": 0.31, "grad_norm": 2.278147380065056, "learning_rate": 8.118732348254525e-06, "loss": 0.5181, "step": 4327 }, { "epoch": 0.31, "grad_norm": 1.4936285391255903, "learning_rate": 8.117834046275555e-06, "loss": 0.5234, "step": 4328 }, { "epoch": 0.31, "grad_norm": 1.867204636648518, "learning_rate": 8.116935579602022e-06, "loss": 0.5721, "step": 4329 }, { "epoch": 0.31, "grad_norm": 1.749089005601295, "learning_rate": 8.116036948281392e-06, "loss": 0.5434, "step": 4330 }, { "epoch": 0.31, "grad_norm": 1.6525435481599546, "learning_rate": 8.115138152361132e-06, "loss": 0.4935, "step": 4331 }, { "epoch": 0.31, "grad_norm": 1.5894650705243134, "learning_rate": 8.11423919188872e-06, "loss": 0.5943, "step": 4332 }, { "epoch": 0.31, "grad_norm": 1.7466673496420493, "learning_rate": 8.113340066911639e-06, "loss": 0.5176, "step": 4333 }, { "epoch": 0.31, "grad_norm": 1.5762692869169663, "learning_rate": 8.112440777477388e-06, "loss": 0.5586, "step": 4334 }, { "epoch": 0.31, "grad_norm": 1.7492268742241084, "learning_rate": 8.111541323633467e-06, "loss": 0.6556, "step": 4335 }, { "epoch": 0.31, "grad_norm": 1.7231519079123618, "learning_rate": 8.11064170542739e-06, "loss": 0.6105, "step": 4336 }, { "epoch": 0.31, "grad_norm": 1.7867314867887323, "learning_rate": 8.10974192290668e-06, "loss": 0.5518, "step": 4337 }, { "epoch": 0.31, "grad_norm": 1.587267110552621, "learning_rate": 8.108841976118864e-06, "loss": 0.6104, "step": 4338 }, { "epoch": 0.31, "grad_norm": 1.7486084697077362, "learning_rate": 8.10794186511148e-06, "loss": 0.559, "step": 4339 }, { "epoch": 0.31, "grad_norm": 1.7905809751171622, "learning_rate": 8.107041589932076e-06, "loss": 0.5556, "step": 4340 }, { "epoch": 0.31, "grad_norm": 1.4963486323622954, "learning_rate": 8.106141150628207e-06, "loss": 0.5758, "step": 4341 }, { "epoch": 0.31, "grad_norm": 2.119541546899883, "learning_rate": 8.105240547247438e-06, "loss": 0.537, "step": 4342 }, { "epoch": 0.31, "grad_norm": 1.5021880904208744, "learning_rate": 8.104339779837339e-06, "loss": 0.5884, "step": 4343 }, { "epoch": 0.31, "grad_norm": 1.796368598152394, "learning_rate": 8.103438848445493e-06, "loss": 0.519, "step": 4344 }, { "epoch": 0.31, "grad_norm": 1.611578343880975, "learning_rate": 8.102537753119494e-06, "loss": 0.5802, "step": 4345 }, { "epoch": 0.31, "grad_norm": 1.6164351108482622, "learning_rate": 8.101636493906938e-06, "loss": 0.5571, "step": 4346 }, { "epoch": 0.31, "grad_norm": 1.862393924407118, "learning_rate": 8.100735070855432e-06, "loss": 0.5548, "step": 4347 }, { "epoch": 0.31, "grad_norm": 2.7323161553217568, "learning_rate": 8.099833484012592e-06, "loss": 0.5722, "step": 4348 }, { "epoch": 0.31, "grad_norm": 1.636104383140665, "learning_rate": 8.098931733426045e-06, "loss": 0.5375, "step": 4349 }, { "epoch": 0.31, "grad_norm": 1.4735569546738667, "learning_rate": 8.098029819143422e-06, "loss": 0.5773, "step": 4350 }, { "epoch": 0.31, "grad_norm": 1.5910346449706478, "learning_rate": 8.097127741212369e-06, "loss": 0.5564, "step": 4351 }, { "epoch": 0.31, "grad_norm": 0.9208147353501052, "learning_rate": 8.096225499680532e-06, "loss": 0.4491, "step": 4352 }, { "epoch": 0.31, "grad_norm": 1.5388194020900825, "learning_rate": 8.095323094595574e-06, "loss": 0.5474, "step": 4353 }, { "epoch": 0.31, "grad_norm": 1.7951108183032523, "learning_rate": 8.09442052600516e-06, "loss": 0.5478, "step": 4354 }, { "epoch": 0.31, "grad_norm": 1.818974541988249, "learning_rate": 8.09351779395697e-06, "loss": 0.5918, "step": 4355 }, { "epoch": 0.31, "grad_norm": 0.7267951740901497, "learning_rate": 8.092614898498685e-06, "loss": 0.4505, "step": 4356 }, { "epoch": 0.31, "grad_norm": 1.768425729472655, "learning_rate": 8.091711839678005e-06, "loss": 0.5641, "step": 4357 }, { "epoch": 0.31, "grad_norm": 1.5821051335137488, "learning_rate": 8.090808617542629e-06, "loss": 0.6276, "step": 4358 }, { "epoch": 0.31, "grad_norm": 0.8155148311949443, "learning_rate": 8.089905232140268e-06, "loss": 0.4658, "step": 4359 }, { "epoch": 0.31, "grad_norm": 1.5612878546186217, "learning_rate": 8.089001683518642e-06, "loss": 0.5875, "step": 4360 }, { "epoch": 0.31, "grad_norm": 1.8826165154795167, "learning_rate": 8.088097971725482e-06, "loss": 0.5548, "step": 4361 }, { "epoch": 0.31, "grad_norm": 1.9856631800558473, "learning_rate": 8.087194096808522e-06, "loss": 0.6211, "step": 4362 }, { "epoch": 0.31, "grad_norm": 0.8474577998917612, "learning_rate": 8.086290058815508e-06, "loss": 0.4643, "step": 4363 }, { "epoch": 0.31, "grad_norm": 0.8488359917748365, "learning_rate": 8.085385857794197e-06, "loss": 0.4545, "step": 4364 }, { "epoch": 0.31, "grad_norm": 1.8036750475634358, "learning_rate": 8.08448149379235e-06, "loss": 0.5708, "step": 4365 }, { "epoch": 0.31, "grad_norm": 1.7630502251520652, "learning_rate": 8.083576966857737e-06, "loss": 0.5249, "step": 4366 }, { "epoch": 0.31, "grad_norm": 1.4118149716486545, "learning_rate": 8.082672277038141e-06, "loss": 0.4731, "step": 4367 }, { "epoch": 0.31, "grad_norm": 1.90153931437648, "learning_rate": 8.081767424381353e-06, "loss": 0.5982, "step": 4368 }, { "epoch": 0.31, "grad_norm": 1.768580922152759, "learning_rate": 8.080862408935164e-06, "loss": 0.5875, "step": 4369 }, { "epoch": 0.31, "grad_norm": 1.8480290677550952, "learning_rate": 8.079957230747384e-06, "loss": 0.5581, "step": 4370 }, { "epoch": 0.31, "grad_norm": 1.6468822816223843, "learning_rate": 8.079051889865826e-06, "loss": 0.5592, "step": 4371 }, { "epoch": 0.31, "grad_norm": 1.9295632248382428, "learning_rate": 8.078146386338314e-06, "loss": 0.5876, "step": 4372 }, { "epoch": 0.31, "grad_norm": 2.2475130067083153, "learning_rate": 8.07724072021268e-06, "loss": 0.6384, "step": 4373 }, { "epoch": 0.31, "grad_norm": 1.589495721815848, "learning_rate": 8.076334891536765e-06, "loss": 0.4819, "step": 4374 }, { "epoch": 0.31, "grad_norm": 1.753806853920163, "learning_rate": 8.075428900358415e-06, "loss": 0.5481, "step": 4375 }, { "epoch": 0.31, "grad_norm": 1.7183345256685902, "learning_rate": 8.07452274672549e-06, "loss": 0.4583, "step": 4376 }, { "epoch": 0.31, "grad_norm": 1.7499279176139844, "learning_rate": 8.073616430685858e-06, "loss": 0.5841, "step": 4377 }, { "epoch": 0.31, "grad_norm": 1.6518687474590232, "learning_rate": 8.072709952287388e-06, "loss": 0.6104, "step": 4378 }, { "epoch": 0.31, "grad_norm": 1.589037743724401, "learning_rate": 8.071803311577969e-06, "loss": 0.5922, "step": 4379 }, { "epoch": 0.31, "grad_norm": 1.8547583633356861, "learning_rate": 8.070896508605489e-06, "loss": 0.5578, "step": 4380 }, { "epoch": 0.31, "grad_norm": 1.892417939428766, "learning_rate": 8.06998954341785e-06, "loss": 0.5514, "step": 4381 }, { "epoch": 0.31, "grad_norm": 1.6892545380800559, "learning_rate": 8.069082416062961e-06, "loss": 0.5928, "step": 4382 }, { "epoch": 0.31, "grad_norm": 1.951256835133891, "learning_rate": 8.06817512658874e-06, "loss": 0.5442, "step": 4383 }, { "epoch": 0.31, "grad_norm": 1.8286888663281617, "learning_rate": 8.067267675043112e-06, "loss": 0.5861, "step": 4384 }, { "epoch": 0.31, "grad_norm": 1.5929972160664623, "learning_rate": 8.066360061474013e-06, "loss": 0.6063, "step": 4385 }, { "epoch": 0.31, "grad_norm": 1.5943437337730584, "learning_rate": 8.065452285929383e-06, "loss": 0.5719, "step": 4386 }, { "epoch": 0.31, "grad_norm": 1.706720778916542, "learning_rate": 8.064544348457177e-06, "loss": 0.5786, "step": 4387 }, { "epoch": 0.31, "grad_norm": 1.705566562712168, "learning_rate": 8.063636249105355e-06, "loss": 0.5557, "step": 4388 }, { "epoch": 0.31, "grad_norm": 1.4742874422019507, "learning_rate": 8.062727987921886e-06, "loss": 0.5453, "step": 4389 }, { "epoch": 0.31, "grad_norm": 0.8356546737431582, "learning_rate": 8.061819564954745e-06, "loss": 0.4287, "step": 4390 }, { "epoch": 0.31, "grad_norm": 2.0457617860905035, "learning_rate": 8.06091098025192e-06, "loss": 0.5756, "step": 4391 }, { "epoch": 0.31, "grad_norm": 1.8597639148312166, "learning_rate": 8.060002233861405e-06, "loss": 0.5501, "step": 4392 }, { "epoch": 0.31, "grad_norm": 1.8253868861527018, "learning_rate": 8.059093325831204e-06, "loss": 0.6278, "step": 4393 }, { "epoch": 0.31, "grad_norm": 1.6449548680498411, "learning_rate": 8.05818425620933e-06, "loss": 0.4996, "step": 4394 }, { "epoch": 0.31, "grad_norm": 1.6376573736783098, "learning_rate": 8.057275025043798e-06, "loss": 0.4654, "step": 4395 }, { "epoch": 0.31, "grad_norm": 1.8077132450217883, "learning_rate": 8.05636563238264e-06, "loss": 0.5387, "step": 4396 }, { "epoch": 0.31, "grad_norm": 2.204257323010359, "learning_rate": 8.055456078273894e-06, "loss": 0.4928, "step": 4397 }, { "epoch": 0.31, "grad_norm": 1.851052738382041, "learning_rate": 8.054546362765605e-06, "loss": 0.5683, "step": 4398 }, { "epoch": 0.31, "grad_norm": 1.5655120763307142, "learning_rate": 8.053636485905824e-06, "loss": 0.5768, "step": 4399 }, { "epoch": 0.31, "grad_norm": 2.3229627290818193, "learning_rate": 8.05272644774262e-06, "loss": 0.5171, "step": 4400 }, { "epoch": 0.31, "grad_norm": 1.6653012427154155, "learning_rate": 8.05181624832406e-06, "loss": 0.5744, "step": 4401 }, { "epoch": 0.31, "grad_norm": 1.5920469117217915, "learning_rate": 8.050905887698225e-06, "loss": 0.5128, "step": 4402 }, { "epoch": 0.31, "grad_norm": 1.6416871497745171, "learning_rate": 8.0499953659132e-06, "loss": 0.5976, "step": 4403 }, { "epoch": 0.31, "grad_norm": 1.5054131978557324, "learning_rate": 8.049084683017089e-06, "loss": 0.5857, "step": 4404 }, { "epoch": 0.31, "grad_norm": 1.6527843329747083, "learning_rate": 8.048173839057993e-06, "loss": 0.5786, "step": 4405 }, { "epoch": 0.31, "grad_norm": 1.631687645263775, "learning_rate": 8.047262834084025e-06, "loss": 0.5489, "step": 4406 }, { "epoch": 0.31, "grad_norm": 2.017297972381213, "learning_rate": 8.046351668143308e-06, "loss": 0.609, "step": 4407 }, { "epoch": 0.31, "grad_norm": 1.4032622610033698, "learning_rate": 8.045440341283974e-06, "loss": 0.5441, "step": 4408 }, { "epoch": 0.31, "grad_norm": 1.8902849389334295, "learning_rate": 8.044528853554163e-06, "loss": 0.5373, "step": 4409 }, { "epoch": 0.31, "grad_norm": 2.3570115539865464, "learning_rate": 8.043617205002021e-06, "loss": 0.558, "step": 4410 }, { "epoch": 0.31, "grad_norm": 1.508358673101007, "learning_rate": 8.042705395675706e-06, "loss": 0.4549, "step": 4411 }, { "epoch": 0.31, "grad_norm": 1.6710909172106094, "learning_rate": 8.041793425623379e-06, "loss": 0.5376, "step": 4412 }, { "epoch": 0.31, "grad_norm": 1.5440937493416225, "learning_rate": 8.040881294893219e-06, "loss": 0.5342, "step": 4413 }, { "epoch": 0.31, "grad_norm": 4.5091530763210494, "learning_rate": 8.039969003533402e-06, "loss": 0.577, "step": 4414 }, { "epoch": 0.31, "grad_norm": 1.7801560158150778, "learning_rate": 8.039056551592126e-06, "loss": 0.552, "step": 4415 }, { "epoch": 0.31, "grad_norm": 1.7719604380705163, "learning_rate": 8.038143939117581e-06, "loss": 0.5725, "step": 4416 }, { "epoch": 0.31, "grad_norm": 1.6638886352313746, "learning_rate": 8.037231166157982e-06, "loss": 0.5585, "step": 4417 }, { "epoch": 0.31, "grad_norm": 1.6510351447302818, "learning_rate": 8.036318232761537e-06, "loss": 0.4978, "step": 4418 }, { "epoch": 0.31, "grad_norm": 1.871503231040386, "learning_rate": 8.035405138976478e-06, "loss": 0.5767, "step": 4419 }, { "epoch": 0.31, "grad_norm": 1.555416722844969, "learning_rate": 8.034491884851033e-06, "loss": 0.5659, "step": 4420 }, { "epoch": 0.31, "grad_norm": 3.0922247352723184, "learning_rate": 8.033578470433446e-06, "loss": 0.5018, "step": 4421 }, { "epoch": 0.31, "grad_norm": 1.2813486735182682, "learning_rate": 8.032664895771962e-06, "loss": 0.4989, "step": 4422 }, { "epoch": 0.31, "grad_norm": 1.8741227824420426, "learning_rate": 8.031751160914843e-06, "loss": 0.6166, "step": 4423 }, { "epoch": 0.31, "grad_norm": 1.5000998441967948, "learning_rate": 8.030837265910357e-06, "loss": 0.6041, "step": 4424 }, { "epoch": 0.31, "grad_norm": 2.129383033356509, "learning_rate": 8.029923210806774e-06, "loss": 0.5392, "step": 4425 }, { "epoch": 0.31, "grad_norm": 1.8130086667318466, "learning_rate": 8.029008995652382e-06, "loss": 0.5489, "step": 4426 }, { "epoch": 0.31, "grad_norm": 1.3792791624346519, "learning_rate": 8.02809462049547e-06, "loss": 0.5078, "step": 4427 }, { "epoch": 0.31, "grad_norm": 1.6996148349689444, "learning_rate": 8.02718008538434e-06, "loss": 0.5372, "step": 4428 }, { "epoch": 0.31, "grad_norm": 2.1900426952872643, "learning_rate": 8.026265390367301e-06, "loss": 0.5019, "step": 4429 }, { "epoch": 0.31, "grad_norm": 2.593723290340787, "learning_rate": 8.025350535492668e-06, "loss": 0.5334, "step": 4430 }, { "epoch": 0.31, "grad_norm": 10.44624339850832, "learning_rate": 8.024435520808771e-06, "loss": 0.5334, "step": 4431 }, { "epoch": 0.31, "grad_norm": 1.5701165663393772, "learning_rate": 8.02352034636394e-06, "loss": 0.4924, "step": 4432 }, { "epoch": 0.31, "grad_norm": 1.4352567825167777, "learning_rate": 8.02260501220652e-06, "loss": 0.4856, "step": 4433 }, { "epoch": 0.31, "grad_norm": 1.516147223360259, "learning_rate": 8.021689518384861e-06, "loss": 0.5091, "step": 4434 }, { "epoch": 0.31, "grad_norm": 3.545874455876537, "learning_rate": 8.020773864947322e-06, "loss": 0.5816, "step": 4435 }, { "epoch": 0.31, "grad_norm": 0.8606104695082516, "learning_rate": 8.019858051942272e-06, "loss": 0.471, "step": 4436 }, { "epoch": 0.31, "grad_norm": 1.654508153617837, "learning_rate": 8.018942079418086e-06, "loss": 0.4604, "step": 4437 }, { "epoch": 0.31, "grad_norm": 1.543569895367182, "learning_rate": 8.018025947423152e-06, "loss": 0.5101, "step": 4438 }, { "epoch": 0.32, "grad_norm": 0.7265870282893965, "learning_rate": 8.01710965600586e-06, "loss": 0.4553, "step": 4439 }, { "epoch": 0.32, "grad_norm": 1.6121640993008763, "learning_rate": 8.016193205214611e-06, "loss": 0.5783, "step": 4440 }, { "epoch": 0.32, "grad_norm": 1.7533551803530916, "learning_rate": 8.015276595097819e-06, "loss": 0.5957, "step": 4441 }, { "epoch": 0.32, "grad_norm": 1.6588017944263151, "learning_rate": 8.014359825703899e-06, "loss": 0.5314, "step": 4442 }, { "epoch": 0.32, "grad_norm": 1.9388030653136485, "learning_rate": 8.01344289708128e-06, "loss": 0.5418, "step": 4443 }, { "epoch": 0.32, "grad_norm": 0.9020992811052921, "learning_rate": 8.012525809278395e-06, "loss": 0.4479, "step": 4444 }, { "epoch": 0.32, "grad_norm": 1.9351774861741087, "learning_rate": 8.01160856234369e-06, "loss": 0.5352, "step": 4445 }, { "epoch": 0.32, "grad_norm": 1.4859691769592243, "learning_rate": 8.010691156325615e-06, "loss": 0.5188, "step": 4446 }, { "epoch": 0.32, "grad_norm": 1.5151457406320967, "learning_rate": 8.009773591272632e-06, "loss": 0.5859, "step": 4447 }, { "epoch": 0.32, "grad_norm": 1.5703420199436302, "learning_rate": 8.00885586723321e-06, "loss": 0.5136, "step": 4448 }, { "epoch": 0.32, "grad_norm": 5.396491164384794, "learning_rate": 8.007937984255825e-06, "loss": 0.5428, "step": 4449 }, { "epoch": 0.32, "grad_norm": 1.6465074732482576, "learning_rate": 8.007019942388962e-06, "loss": 0.624, "step": 4450 }, { "epoch": 0.32, "grad_norm": 1.6029632061940269, "learning_rate": 8.006101741681118e-06, "loss": 0.5623, "step": 4451 }, { "epoch": 0.32, "grad_norm": 2.4565659717098747, "learning_rate": 8.005183382180794e-06, "loss": 0.54, "step": 4452 }, { "epoch": 0.32, "grad_norm": 1.555647189325073, "learning_rate": 8.004264863936502e-06, "loss": 0.5572, "step": 4453 }, { "epoch": 0.32, "grad_norm": 1.828365628222904, "learning_rate": 8.003346186996756e-06, "loss": 0.6101, "step": 4454 }, { "epoch": 0.32, "grad_norm": 1.6901478939052879, "learning_rate": 8.00242735141009e-06, "loss": 0.5556, "step": 4455 }, { "epoch": 0.32, "grad_norm": 0.8712724233170649, "learning_rate": 8.00150835722504e-06, "loss": 0.4466, "step": 4456 }, { "epoch": 0.32, "grad_norm": 1.7088807555980134, "learning_rate": 8.000589204490146e-06, "loss": 0.57, "step": 4457 }, { "epoch": 0.32, "grad_norm": 3.2141540731027596, "learning_rate": 7.999669893253962e-06, "loss": 0.5314, "step": 4458 }, { "epoch": 0.32, "grad_norm": 0.6963331081269266, "learning_rate": 7.998750423565051e-06, "loss": 0.4718, "step": 4459 }, { "epoch": 0.32, "grad_norm": 2.17820570800185, "learning_rate": 7.997830795471982e-06, "loss": 0.5976, "step": 4460 }, { "epoch": 0.32, "grad_norm": 1.774226659078033, "learning_rate": 7.996911009023332e-06, "loss": 0.5779, "step": 4461 }, { "epoch": 0.32, "grad_norm": 1.9927187723692574, "learning_rate": 7.995991064267688e-06, "loss": 0.527, "step": 4462 }, { "epoch": 0.32, "grad_norm": 1.7032996985433786, "learning_rate": 7.995070961253643e-06, "loss": 0.5751, "step": 4463 }, { "epoch": 0.32, "grad_norm": 1.5149946396537313, "learning_rate": 7.994150700029802e-06, "loss": 0.5839, "step": 4464 }, { "epoch": 0.32, "grad_norm": 2.4162634144981725, "learning_rate": 7.993230280644776e-06, "loss": 0.541, "step": 4465 }, { "epoch": 0.32, "grad_norm": 2.676480270237703, "learning_rate": 7.992309703147182e-06, "loss": 0.5292, "step": 4466 }, { "epoch": 0.32, "grad_norm": 1.479740266899018, "learning_rate": 7.991388967585651e-06, "loss": 0.6112, "step": 4467 }, { "epoch": 0.32, "grad_norm": 1.7723440645167579, "learning_rate": 7.99046807400882e-06, "loss": 0.6242, "step": 4468 }, { "epoch": 0.32, "grad_norm": 1.59285037317661, "learning_rate": 7.989547022465329e-06, "loss": 0.5629, "step": 4469 }, { "epoch": 0.32, "grad_norm": 1.5945130490743458, "learning_rate": 7.988625813003837e-06, "loss": 0.5705, "step": 4470 }, { "epoch": 0.32, "grad_norm": 1.4857965778624243, "learning_rate": 7.987704445673003e-06, "loss": 0.5371, "step": 4471 }, { "epoch": 0.32, "grad_norm": 2.2266456793826177, "learning_rate": 7.986782920521494e-06, "loss": 0.516, "step": 4472 }, { "epoch": 0.32, "grad_norm": 1.6672977359438765, "learning_rate": 7.985861237597994e-06, "loss": 0.5026, "step": 4473 }, { "epoch": 0.32, "grad_norm": 1.5547223361143907, "learning_rate": 7.984939396951183e-06, "loss": 0.5309, "step": 4474 }, { "epoch": 0.32, "grad_norm": 1.5197904351312341, "learning_rate": 7.98401739862976e-06, "loss": 0.5885, "step": 4475 }, { "epoch": 0.32, "grad_norm": 0.9498977692662619, "learning_rate": 7.983095242682427e-06, "loss": 0.4787, "step": 4476 }, { "epoch": 0.32, "grad_norm": 0.8689202304987352, "learning_rate": 7.982172929157894e-06, "loss": 0.454, "step": 4477 }, { "epoch": 0.32, "grad_norm": 1.6338223805969365, "learning_rate": 7.981250458104884e-06, "loss": 0.5952, "step": 4478 }, { "epoch": 0.32, "grad_norm": 1.7749455954973212, "learning_rate": 7.980327829572121e-06, "loss": 0.5794, "step": 4479 }, { "epoch": 0.32, "grad_norm": 1.6229458195489774, "learning_rate": 7.979405043608343e-06, "loss": 0.6236, "step": 4480 }, { "epoch": 0.32, "grad_norm": 1.6329162251794298, "learning_rate": 7.978482100262296e-06, "loss": 0.6409, "step": 4481 }, { "epoch": 0.32, "grad_norm": 2.193643476827921, "learning_rate": 7.977558999582732e-06, "loss": 0.5802, "step": 4482 }, { "epoch": 0.32, "grad_norm": 1.6749903668566997, "learning_rate": 7.976635741618413e-06, "loss": 0.5301, "step": 4483 }, { "epoch": 0.32, "grad_norm": 1.6615565346152998, "learning_rate": 7.975712326418105e-06, "loss": 0.5751, "step": 4484 }, { "epoch": 0.32, "grad_norm": 1.62038807559947, "learning_rate": 7.974788754030594e-06, "loss": 0.6057, "step": 4485 }, { "epoch": 0.32, "grad_norm": 1.484562265849183, "learning_rate": 7.973865024504657e-06, "loss": 0.5687, "step": 4486 }, { "epoch": 0.32, "grad_norm": 1.6132801034854432, "learning_rate": 7.972941137889091e-06, "loss": 0.5501, "step": 4487 }, { "epoch": 0.32, "grad_norm": 1.6833034714854715, "learning_rate": 7.972017094232704e-06, "loss": 0.5743, "step": 4488 }, { "epoch": 0.32, "grad_norm": 1.7706967026271705, "learning_rate": 7.971092893584303e-06, "loss": 0.6035, "step": 4489 }, { "epoch": 0.32, "grad_norm": 1.7192143859114, "learning_rate": 7.970168535992708e-06, "loss": 0.568, "step": 4490 }, { "epoch": 0.32, "grad_norm": 1.6111346607601964, "learning_rate": 7.969244021506746e-06, "loss": 0.5628, "step": 4491 }, { "epoch": 0.32, "grad_norm": 1.7837576470298593, "learning_rate": 7.968319350175253e-06, "loss": 0.5817, "step": 4492 }, { "epoch": 0.32, "grad_norm": 2.3521576851212807, "learning_rate": 7.967394522047074e-06, "loss": 0.5707, "step": 4493 }, { "epoch": 0.32, "grad_norm": 1.5679528794355437, "learning_rate": 7.966469537171062e-06, "loss": 0.581, "step": 4494 }, { "epoch": 0.32, "grad_norm": 1.4696717201102523, "learning_rate": 7.965544395596078e-06, "loss": 0.5348, "step": 4495 }, { "epoch": 0.32, "grad_norm": 1.9174997555897697, "learning_rate": 7.96461909737099e-06, "loss": 0.5224, "step": 4496 }, { "epoch": 0.32, "grad_norm": 1.654877502960659, "learning_rate": 7.963693642544674e-06, "loss": 0.5754, "step": 4497 }, { "epoch": 0.32, "grad_norm": 1.5030211064366676, "learning_rate": 7.96276803116602e-06, "loss": 0.6039, "step": 4498 }, { "epoch": 0.32, "grad_norm": 1.4837898793545214, "learning_rate": 7.961842263283917e-06, "loss": 0.567, "step": 4499 }, { "epoch": 0.32, "grad_norm": 2.123706498826837, "learning_rate": 7.960916338947272e-06, "loss": 0.5803, "step": 4500 }, { "epoch": 0.32, "grad_norm": 1.5059772850905464, "learning_rate": 7.959990258204991e-06, "loss": 0.5078, "step": 4501 }, { "epoch": 0.32, "grad_norm": 0.779109182585933, "learning_rate": 7.959064021105996e-06, "loss": 0.4295, "step": 4502 }, { "epoch": 0.32, "grad_norm": 1.847811338926398, "learning_rate": 7.958137627699212e-06, "loss": 0.5925, "step": 4503 }, { "epoch": 0.32, "grad_norm": 1.9636184946817203, "learning_rate": 7.957211078033575e-06, "loss": 0.5919, "step": 4504 }, { "epoch": 0.32, "grad_norm": 1.6577629756275731, "learning_rate": 7.956284372158028e-06, "loss": 0.6655, "step": 4505 }, { "epoch": 0.32, "grad_norm": 1.5846649385824587, "learning_rate": 7.955357510121524e-06, "loss": 0.4945, "step": 4506 }, { "epoch": 0.32, "grad_norm": 1.8124067855794213, "learning_rate": 7.954430491973023e-06, "loss": 0.5129, "step": 4507 }, { "epoch": 0.32, "grad_norm": 2.016856522753694, "learning_rate": 7.95350331776149e-06, "loss": 0.5098, "step": 4508 }, { "epoch": 0.32, "grad_norm": 1.6177259720635517, "learning_rate": 7.952575987535907e-06, "loss": 0.5827, "step": 4509 }, { "epoch": 0.32, "grad_norm": 1.6319720356922793, "learning_rate": 7.951648501345252e-06, "loss": 0.5377, "step": 4510 }, { "epoch": 0.32, "grad_norm": 1.478789518858757, "learning_rate": 7.950720859238526e-06, "loss": 0.5319, "step": 4511 }, { "epoch": 0.32, "grad_norm": 0.8025768120310639, "learning_rate": 7.949793061264723e-06, "loss": 0.426, "step": 4512 }, { "epoch": 0.32, "grad_norm": 1.9550486707947519, "learning_rate": 7.948865107472857e-06, "loss": 0.592, "step": 4513 }, { "epoch": 0.32, "grad_norm": 1.4653801111503884, "learning_rate": 7.94793699791194e-06, "loss": 0.5601, "step": 4514 }, { "epoch": 0.32, "grad_norm": 2.0203183975002426, "learning_rate": 7.947008732631006e-06, "loss": 0.5809, "step": 4515 }, { "epoch": 0.32, "grad_norm": 1.655132457114526, "learning_rate": 7.946080311679084e-06, "loss": 0.4701, "step": 4516 }, { "epoch": 0.32, "grad_norm": 1.822212767802605, "learning_rate": 7.945151735105215e-06, "loss": 0.578, "step": 4517 }, { "epoch": 0.32, "grad_norm": 1.589070411424292, "learning_rate": 7.944223002958453e-06, "loss": 0.6412, "step": 4518 }, { "epoch": 0.32, "grad_norm": 2.1119884663302275, "learning_rate": 7.943294115287856e-06, "loss": 0.5859, "step": 4519 }, { "epoch": 0.32, "grad_norm": 1.8199709617786453, "learning_rate": 7.94236507214249e-06, "loss": 0.5654, "step": 4520 }, { "epoch": 0.32, "grad_norm": 1.9863992137782003, "learning_rate": 7.941435873571432e-06, "loss": 0.628, "step": 4521 }, { "epoch": 0.32, "grad_norm": 1.5501456685426465, "learning_rate": 7.940506519623765e-06, "loss": 0.5113, "step": 4522 }, { "epoch": 0.32, "grad_norm": 1.5485640364553208, "learning_rate": 7.939577010348577e-06, "loss": 0.598, "step": 4523 }, { "epoch": 0.32, "grad_norm": 1.692670723414335, "learning_rate": 7.938647345794972e-06, "loss": 0.5609, "step": 4524 }, { "epoch": 0.32, "grad_norm": 1.792265035856386, "learning_rate": 7.937717526012058e-06, "loss": 0.5915, "step": 4525 }, { "epoch": 0.32, "grad_norm": 1.602092458893521, "learning_rate": 7.936787551048949e-06, "loss": 0.4977, "step": 4526 }, { "epoch": 0.32, "grad_norm": 1.4681985428946687, "learning_rate": 7.935857420954769e-06, "loss": 0.6056, "step": 4527 }, { "epoch": 0.32, "grad_norm": 1.7377218132451826, "learning_rate": 7.934927135778654e-06, "loss": 0.6101, "step": 4528 }, { "epoch": 0.32, "grad_norm": 1.875245943356162, "learning_rate": 7.93399669556974e-06, "loss": 0.5279, "step": 4529 }, { "epoch": 0.32, "grad_norm": 1.6336648725416023, "learning_rate": 7.933066100377183e-06, "loss": 0.5212, "step": 4530 }, { "epoch": 0.32, "grad_norm": 1.7624653517696096, "learning_rate": 7.932135350250132e-06, "loss": 0.5701, "step": 4531 }, { "epoch": 0.32, "grad_norm": 1.7437186628510541, "learning_rate": 7.931204445237758e-06, "loss": 0.5833, "step": 4532 }, { "epoch": 0.32, "grad_norm": 1.3818810625459443, "learning_rate": 7.930273385389234e-06, "loss": 0.5409, "step": 4533 }, { "epoch": 0.32, "grad_norm": 0.8948879882738449, "learning_rate": 7.92934217075374e-06, "loss": 0.4343, "step": 4534 }, { "epoch": 0.32, "grad_norm": 1.6940430377133815, "learning_rate": 7.928410801380466e-06, "loss": 0.6134, "step": 4535 }, { "epoch": 0.32, "grad_norm": 1.9890566100859006, "learning_rate": 7.92747927731861e-06, "loss": 0.5464, "step": 4536 }, { "epoch": 0.32, "grad_norm": 1.7230605041486837, "learning_rate": 7.92654759861738e-06, "loss": 0.5584, "step": 4537 }, { "epoch": 0.32, "grad_norm": 0.7690049472874627, "learning_rate": 7.92561576532599e-06, "loss": 0.4479, "step": 4538 }, { "epoch": 0.32, "grad_norm": 1.6070010054957473, "learning_rate": 7.92468377749366e-06, "loss": 0.6161, "step": 4539 }, { "epoch": 0.32, "grad_norm": 1.6997033101963424, "learning_rate": 7.923751635169624e-06, "loss": 0.5192, "step": 4540 }, { "epoch": 0.32, "grad_norm": 1.506006489709928, "learning_rate": 7.92281933840312e-06, "loss": 0.5126, "step": 4541 }, { "epoch": 0.32, "grad_norm": 1.5886827403886812, "learning_rate": 7.921886887243393e-06, "loss": 0.5293, "step": 4542 }, { "epoch": 0.32, "grad_norm": 1.5581380097440565, "learning_rate": 7.920954281739701e-06, "loss": 0.5345, "step": 4543 }, { "epoch": 0.32, "grad_norm": 2.263067295143041, "learning_rate": 7.920021521941305e-06, "loss": 0.5491, "step": 4544 }, { "epoch": 0.32, "grad_norm": 1.6491038997250576, "learning_rate": 7.91908860789748e-06, "loss": 0.579, "step": 4545 }, { "epoch": 0.32, "grad_norm": 1.911170818628138, "learning_rate": 7.918155539657503e-06, "loss": 0.6028, "step": 4546 }, { "epoch": 0.32, "grad_norm": 1.6743111864688445, "learning_rate": 7.917222317270662e-06, "loss": 0.6399, "step": 4547 }, { "epoch": 0.32, "grad_norm": 1.4539380265204296, "learning_rate": 7.916288940786253e-06, "loss": 0.4895, "step": 4548 }, { "epoch": 0.32, "grad_norm": 1.80338165281254, "learning_rate": 7.915355410253582e-06, "loss": 0.5568, "step": 4549 }, { "epoch": 0.32, "grad_norm": 2.6066257252539202, "learning_rate": 7.914421725721957e-06, "loss": 0.621, "step": 4550 }, { "epoch": 0.32, "grad_norm": 2.094983957310518, "learning_rate": 7.913487887240703e-06, "loss": 0.5799, "step": 4551 }, { "epoch": 0.32, "grad_norm": 0.8670385040078306, "learning_rate": 7.912553894859146e-06, "loss": 0.4763, "step": 4552 }, { "epoch": 0.32, "grad_norm": 1.750342217783563, "learning_rate": 7.911619748626622e-06, "loss": 0.4741, "step": 4553 }, { "epoch": 0.32, "grad_norm": 5.999353517009271, "learning_rate": 7.910685448592478e-06, "loss": 0.6589, "step": 4554 }, { "epoch": 0.32, "grad_norm": 1.5034931748475453, "learning_rate": 7.909750994806066e-06, "loss": 0.5266, "step": 4555 }, { "epoch": 0.32, "grad_norm": 1.7745994152209272, "learning_rate": 7.908816387316747e-06, "loss": 0.5339, "step": 4556 }, { "epoch": 0.32, "grad_norm": 0.6888961189850827, "learning_rate": 7.90788162617389e-06, "loss": 0.459, "step": 4557 }, { "epoch": 0.32, "grad_norm": 1.4458786749830184, "learning_rate": 7.906946711426873e-06, "loss": 0.5795, "step": 4558 }, { "epoch": 0.32, "grad_norm": 0.8468161259941396, "learning_rate": 7.906011643125078e-06, "loss": 0.4682, "step": 4559 }, { "epoch": 0.32, "grad_norm": 2.089786054052781, "learning_rate": 7.905076421317904e-06, "loss": 0.5373, "step": 4560 }, { "epoch": 0.32, "grad_norm": 1.730184182601446, "learning_rate": 7.904141046054747e-06, "loss": 0.5386, "step": 4561 }, { "epoch": 0.32, "grad_norm": 1.5609528597825848, "learning_rate": 7.903205517385022e-06, "loss": 0.5828, "step": 4562 }, { "epoch": 0.32, "grad_norm": 2.083619669158488, "learning_rate": 7.902269835358142e-06, "loss": 0.5621, "step": 4563 }, { "epoch": 0.32, "grad_norm": 1.5878764924991424, "learning_rate": 7.901334000023537e-06, "loss": 0.5991, "step": 4564 }, { "epoch": 0.32, "grad_norm": 1.6870373862434531, "learning_rate": 7.900398011430638e-06, "loss": 0.6115, "step": 4565 }, { "epoch": 0.32, "grad_norm": 1.9518547947943425, "learning_rate": 7.899461869628889e-06, "loss": 0.5782, "step": 4566 }, { "epoch": 0.32, "grad_norm": 1.5565734679743028, "learning_rate": 7.898525574667737e-06, "loss": 0.5346, "step": 4567 }, { "epoch": 0.32, "grad_norm": 1.4811358773251708, "learning_rate": 7.897589126596645e-06, "loss": 0.5248, "step": 4568 }, { "epoch": 0.32, "grad_norm": 0.9315930240557068, "learning_rate": 7.896652525465077e-06, "loss": 0.4686, "step": 4569 }, { "epoch": 0.32, "grad_norm": 2.154826606486221, "learning_rate": 7.895715771322505e-06, "loss": 0.5661, "step": 4570 }, { "epoch": 0.32, "grad_norm": 1.757907272582158, "learning_rate": 7.894778864218415e-06, "loss": 0.6543, "step": 4571 }, { "epoch": 0.32, "grad_norm": 0.7945292097449744, "learning_rate": 7.893841804202299e-06, "loss": 0.4514, "step": 4572 }, { "epoch": 0.32, "grad_norm": 1.6825525465514926, "learning_rate": 7.892904591323651e-06, "loss": 0.5476, "step": 4573 }, { "epoch": 0.32, "grad_norm": 4.269125601171483, "learning_rate": 7.891967225631979e-06, "loss": 0.5375, "step": 4574 }, { "epoch": 0.32, "grad_norm": 1.8069818987147572, "learning_rate": 7.8910297071768e-06, "loss": 0.5343, "step": 4575 }, { "epoch": 0.32, "grad_norm": 1.8032211616448546, "learning_rate": 7.890092036007636e-06, "loss": 0.565, "step": 4576 }, { "epoch": 0.32, "grad_norm": 1.5646533408185028, "learning_rate": 7.889154212174018e-06, "loss": 0.5207, "step": 4577 }, { "epoch": 0.32, "grad_norm": 2.056784263535567, "learning_rate": 7.888216235725484e-06, "loss": 0.6256, "step": 4578 }, { "epoch": 0.32, "grad_norm": 1.5199069206278482, "learning_rate": 7.887278106711582e-06, "loss": 0.4808, "step": 4579 }, { "epoch": 0.33, "grad_norm": 1.7348565057699274, "learning_rate": 7.886339825181865e-06, "loss": 0.5661, "step": 4580 }, { "epoch": 0.33, "grad_norm": 1.3889889460426779, "learning_rate": 7.885401391185902e-06, "loss": 0.5195, "step": 4581 }, { "epoch": 0.33, "grad_norm": 1.7677813706179504, "learning_rate": 7.884462804773259e-06, "loss": 0.5004, "step": 4582 }, { "epoch": 0.33, "grad_norm": 1.7372542242985938, "learning_rate": 7.883524065993515e-06, "loss": 0.5755, "step": 4583 }, { "epoch": 0.33, "grad_norm": 3.3901428963683937, "learning_rate": 7.882585174896261e-06, "loss": 0.5182, "step": 4584 }, { "epoch": 0.33, "grad_norm": 5.383043571236327, "learning_rate": 7.88164613153109e-06, "loss": 0.578, "step": 4585 }, { "epoch": 0.33, "grad_norm": 1.5333468773117762, "learning_rate": 7.880706935947605e-06, "loss": 0.545, "step": 4586 }, { "epoch": 0.33, "grad_norm": 1.5583513814657557, "learning_rate": 7.87976758819542e-06, "loss": 0.4879, "step": 4587 }, { "epoch": 0.33, "grad_norm": 1.6635498370437123, "learning_rate": 7.878828088324154e-06, "loss": 0.5659, "step": 4588 }, { "epoch": 0.33, "grad_norm": 1.4655871037612245, "learning_rate": 7.877888436383431e-06, "loss": 0.5789, "step": 4589 }, { "epoch": 0.33, "grad_norm": 1.5815802112081216, "learning_rate": 7.87694863242289e-06, "loss": 0.6199, "step": 4590 }, { "epoch": 0.33, "grad_norm": 0.8583607672441397, "learning_rate": 7.876008676492175e-06, "loss": 0.4775, "step": 4591 }, { "epoch": 0.33, "grad_norm": 0.8064417051506307, "learning_rate": 7.875068568640935e-06, "loss": 0.4671, "step": 4592 }, { "epoch": 0.33, "grad_norm": 1.963628589123776, "learning_rate": 7.874128308918831e-06, "loss": 0.5512, "step": 4593 }, { "epoch": 0.33, "grad_norm": 1.8399619705050907, "learning_rate": 7.873187897375531e-06, "loss": 0.5428, "step": 4594 }, { "epoch": 0.33, "grad_norm": 1.6640215094157382, "learning_rate": 7.872247334060712e-06, "loss": 0.5871, "step": 4595 }, { "epoch": 0.33, "grad_norm": 1.7290593151739144, "learning_rate": 7.871306619024055e-06, "loss": 0.6148, "step": 4596 }, { "epoch": 0.33, "grad_norm": 1.4707844127066123, "learning_rate": 7.870365752315252e-06, "loss": 0.5147, "step": 4597 }, { "epoch": 0.33, "grad_norm": 1.5874864111277935, "learning_rate": 7.869424733984009e-06, "loss": 0.5896, "step": 4598 }, { "epoch": 0.33, "grad_norm": 1.9431718931373823, "learning_rate": 7.868483564080022e-06, "loss": 0.5378, "step": 4599 }, { "epoch": 0.33, "grad_norm": 1.48801425124575, "learning_rate": 7.867542242653017e-06, "loss": 0.5572, "step": 4600 }, { "epoch": 0.33, "grad_norm": 1.4218421316550778, "learning_rate": 7.866600769752714e-06, "loss": 0.5256, "step": 4601 }, { "epoch": 0.33, "grad_norm": 1.7015423958575833, "learning_rate": 7.865659145428843e-06, "loss": 0.5796, "step": 4602 }, { "epoch": 0.33, "grad_norm": 2.1177174559776963, "learning_rate": 7.864717369731148e-06, "loss": 0.5535, "step": 4603 }, { "epoch": 0.33, "grad_norm": 1.557064133080552, "learning_rate": 7.863775442709374e-06, "loss": 0.5607, "step": 4604 }, { "epoch": 0.33, "grad_norm": 1.532061875173884, "learning_rate": 7.862833364413277e-06, "loss": 0.5196, "step": 4605 }, { "epoch": 0.33, "grad_norm": 1.75511028043191, "learning_rate": 7.861891134892621e-06, "loss": 0.5585, "step": 4606 }, { "epoch": 0.33, "grad_norm": 1.631777713071407, "learning_rate": 7.860948754197178e-06, "loss": 0.5916, "step": 4607 }, { "epoch": 0.33, "grad_norm": 1.4974051628079441, "learning_rate": 7.860006222376729e-06, "loss": 0.5338, "step": 4608 }, { "epoch": 0.33, "grad_norm": 1.7751561468747112, "learning_rate": 7.859063539481057e-06, "loss": 0.5735, "step": 4609 }, { "epoch": 0.33, "grad_norm": 1.7379646131914435, "learning_rate": 7.858120705559963e-06, "loss": 0.5607, "step": 4610 }, { "epoch": 0.33, "grad_norm": 1.5623552436581924, "learning_rate": 7.85717772066325e-06, "loss": 0.5734, "step": 4611 }, { "epoch": 0.33, "grad_norm": 1.77886386217676, "learning_rate": 7.856234584840725e-06, "loss": 0.5581, "step": 4612 }, { "epoch": 0.33, "grad_norm": 1.6647345541964371, "learning_rate": 7.855291298142214e-06, "loss": 0.539, "step": 4613 }, { "epoch": 0.33, "grad_norm": 2.7969402416441036, "learning_rate": 7.85434786061754e-06, "loss": 0.594, "step": 4614 }, { "epoch": 0.33, "grad_norm": 1.6498719215756772, "learning_rate": 7.85340427231654e-06, "loss": 0.5527, "step": 4615 }, { "epoch": 0.33, "grad_norm": 1.5256073249994104, "learning_rate": 7.852460533289059e-06, "loss": 0.5636, "step": 4616 }, { "epoch": 0.33, "grad_norm": 1.7930427929133756, "learning_rate": 7.851516643584947e-06, "loss": 0.5453, "step": 4617 }, { "epoch": 0.33, "grad_norm": 4.083199155161327, "learning_rate": 7.850572603254064e-06, "loss": 0.5372, "step": 4618 }, { "epoch": 0.33, "grad_norm": 1.6563014730117527, "learning_rate": 7.849628412346276e-06, "loss": 0.5376, "step": 4619 }, { "epoch": 0.33, "grad_norm": 1.6793216427305293, "learning_rate": 7.848684070911458e-06, "loss": 0.5828, "step": 4620 }, { "epoch": 0.33, "grad_norm": 0.836824767348362, "learning_rate": 7.847739578999496e-06, "loss": 0.4524, "step": 4621 }, { "epoch": 0.33, "grad_norm": 1.5645723098887703, "learning_rate": 7.846794936660281e-06, "loss": 0.5595, "step": 4622 }, { "epoch": 0.33, "grad_norm": 1.4858043384798767, "learning_rate": 7.845850143943709e-06, "loss": 0.5323, "step": 4623 }, { "epoch": 0.33, "grad_norm": 1.5141223949141884, "learning_rate": 7.844905200899693e-06, "loss": 0.4864, "step": 4624 }, { "epoch": 0.33, "grad_norm": 2.1167184510443833, "learning_rate": 7.843960107578142e-06, "loss": 0.5286, "step": 4625 }, { "epoch": 0.33, "grad_norm": 1.5840746989672634, "learning_rate": 7.843014864028981e-06, "loss": 0.5395, "step": 4626 }, { "epoch": 0.33, "grad_norm": 2.668708501608903, "learning_rate": 7.842069470302143e-06, "loss": 0.5782, "step": 4627 }, { "epoch": 0.33, "grad_norm": 1.5108115132468567, "learning_rate": 7.841123926447565e-06, "loss": 0.5882, "step": 4628 }, { "epoch": 0.33, "grad_norm": 1.8768684469320043, "learning_rate": 7.840178232515192e-06, "loss": 0.571, "step": 4629 }, { "epoch": 0.33, "grad_norm": 0.7977295751028157, "learning_rate": 7.839232388554982e-06, "loss": 0.4551, "step": 4630 }, { "epoch": 0.33, "grad_norm": 3.272430883247567, "learning_rate": 7.838286394616898e-06, "loss": 0.5961, "step": 4631 }, { "epoch": 0.33, "grad_norm": 1.66954679938459, "learning_rate": 7.837340250750909e-06, "loss": 0.5214, "step": 4632 }, { "epoch": 0.33, "grad_norm": 1.591472261671352, "learning_rate": 7.836393957006993e-06, "loss": 0.5559, "step": 4633 }, { "epoch": 0.33, "grad_norm": 1.6409916784730205, "learning_rate": 7.835447513435136e-06, "loss": 0.5219, "step": 4634 }, { "epoch": 0.33, "grad_norm": 1.6146693010694948, "learning_rate": 7.834500920085337e-06, "loss": 0.5827, "step": 4635 }, { "epoch": 0.33, "grad_norm": 3.706783098139606, "learning_rate": 7.833554177007592e-06, "loss": 0.5125, "step": 4636 }, { "epoch": 0.33, "grad_norm": 2.0514074933372104, "learning_rate": 7.832607284251913e-06, "loss": 0.5583, "step": 4637 }, { "epoch": 0.33, "grad_norm": 1.9033711019814257, "learning_rate": 7.831660241868322e-06, "loss": 0.5539, "step": 4638 }, { "epoch": 0.33, "grad_norm": 2.2857120424115993, "learning_rate": 7.83071304990684e-06, "loss": 0.6027, "step": 4639 }, { "epoch": 0.33, "grad_norm": 1.4285901373059415, "learning_rate": 7.829765708417503e-06, "loss": 0.5361, "step": 4640 }, { "epoch": 0.33, "grad_norm": 1.5779745956193771, "learning_rate": 7.828818217450355e-06, "loss": 0.5956, "step": 4641 }, { "epoch": 0.33, "grad_norm": 1.697777139088535, "learning_rate": 7.82787057705544e-06, "loss": 0.5605, "step": 4642 }, { "epoch": 0.33, "grad_norm": 2.568037101748435, "learning_rate": 7.826922787282823e-06, "loss": 0.5001, "step": 4643 }, { "epoch": 0.33, "grad_norm": 1.5528532343978865, "learning_rate": 7.825974848182562e-06, "loss": 0.5233, "step": 4644 }, { "epoch": 0.33, "grad_norm": 2.0639953023548263, "learning_rate": 7.825026759804735e-06, "loss": 0.5712, "step": 4645 }, { "epoch": 0.33, "grad_norm": 1.5706428731727844, "learning_rate": 7.824078522199422e-06, "loss": 0.5402, "step": 4646 }, { "epoch": 0.33, "grad_norm": 1.9357097742177605, "learning_rate": 7.823130135416713e-06, "loss": 0.6155, "step": 4647 }, { "epoch": 0.33, "grad_norm": 1.6616978216312759, "learning_rate": 7.822181599506704e-06, "loss": 0.5439, "step": 4648 }, { "epoch": 0.33, "grad_norm": 0.884321109577002, "learning_rate": 7.821232914519498e-06, "loss": 0.4549, "step": 4649 }, { "epoch": 0.33, "grad_norm": 1.5384978611034608, "learning_rate": 7.820284080505211e-06, "loss": 0.5461, "step": 4650 }, { "epoch": 0.33, "grad_norm": 0.7098303035460066, "learning_rate": 7.819335097513962e-06, "loss": 0.4545, "step": 4651 }, { "epoch": 0.33, "grad_norm": 1.5906438671135015, "learning_rate": 7.818385965595883e-06, "loss": 0.5406, "step": 4652 }, { "epoch": 0.33, "grad_norm": 1.4788236724338404, "learning_rate": 7.817436684801102e-06, "loss": 0.5397, "step": 4653 }, { "epoch": 0.33, "grad_norm": 0.7175289152082126, "learning_rate": 7.816487255179775e-06, "loss": 0.4536, "step": 4654 }, { "epoch": 0.33, "grad_norm": 1.5646691128932102, "learning_rate": 7.815537676782043e-06, "loss": 0.5402, "step": 4655 }, { "epoch": 0.33, "grad_norm": 1.6355933689192153, "learning_rate": 7.814587949658073e-06, "loss": 0.5537, "step": 4656 }, { "epoch": 0.33, "grad_norm": 1.5262555981607469, "learning_rate": 7.813638073858027e-06, "loss": 0.5469, "step": 4657 }, { "epoch": 0.33, "grad_norm": 1.7682879304286492, "learning_rate": 7.812688049432087e-06, "loss": 0.5771, "step": 4658 }, { "epoch": 0.33, "grad_norm": 0.7736915895546868, "learning_rate": 7.811737876430432e-06, "loss": 0.4589, "step": 4659 }, { "epoch": 0.33, "grad_norm": 1.7409465534816897, "learning_rate": 7.810787554903257e-06, "loss": 0.5737, "step": 4660 }, { "epoch": 0.33, "grad_norm": 1.6073603053534078, "learning_rate": 7.809837084900757e-06, "loss": 0.5384, "step": 4661 }, { "epoch": 0.33, "grad_norm": 2.2049348617365907, "learning_rate": 7.808886466473142e-06, "loss": 0.5339, "step": 4662 }, { "epoch": 0.33, "grad_norm": 1.5288906332768764, "learning_rate": 7.807935699670625e-06, "loss": 0.5647, "step": 4663 }, { "epoch": 0.33, "grad_norm": 1.6497824133016323, "learning_rate": 7.80698478454343e-06, "loss": 0.6012, "step": 4664 }, { "epoch": 0.33, "grad_norm": 1.9596783955684993, "learning_rate": 7.806033721141788e-06, "loss": 0.5904, "step": 4665 }, { "epoch": 0.33, "grad_norm": 1.8917464137497104, "learning_rate": 7.805082509515937e-06, "loss": 0.6102, "step": 4666 }, { "epoch": 0.33, "grad_norm": 1.7388903932913622, "learning_rate": 7.804131149716123e-06, "loss": 0.5014, "step": 4667 }, { "epoch": 0.33, "grad_norm": 1.9948965654940347, "learning_rate": 7.8031796417926e-06, "loss": 0.5463, "step": 4668 }, { "epoch": 0.33, "grad_norm": 1.5210416185324516, "learning_rate": 7.80222798579563e-06, "loss": 0.5249, "step": 4669 }, { "epoch": 0.33, "grad_norm": 1.6737590323359803, "learning_rate": 7.801276181775482e-06, "loss": 0.5715, "step": 4670 }, { "epoch": 0.33, "grad_norm": 1.6468094923052277, "learning_rate": 7.800324229782432e-06, "loss": 0.5528, "step": 4671 }, { "epoch": 0.33, "grad_norm": 1.6978241200573425, "learning_rate": 7.799372129866768e-06, "loss": 0.6016, "step": 4672 }, { "epoch": 0.33, "grad_norm": 1.720449361605801, "learning_rate": 7.798419882078786e-06, "loss": 0.5923, "step": 4673 }, { "epoch": 0.33, "grad_norm": 2.159172380797641, "learning_rate": 7.797467486468781e-06, "loss": 0.5161, "step": 4674 }, { "epoch": 0.33, "grad_norm": 1.8250232638762027, "learning_rate": 7.796514943087065e-06, "loss": 0.6106, "step": 4675 }, { "epoch": 0.33, "grad_norm": 0.8462319548833942, "learning_rate": 7.795562251983953e-06, "loss": 0.4868, "step": 4676 }, { "epoch": 0.33, "grad_norm": 1.7401367530245215, "learning_rate": 7.794609413209771e-06, "loss": 0.632, "step": 4677 }, { "epoch": 0.33, "grad_norm": 1.675446822150429, "learning_rate": 7.79365642681485e-06, "loss": 0.5492, "step": 4678 }, { "epoch": 0.33, "grad_norm": 1.6077974719649235, "learning_rate": 7.792703292849531e-06, "loss": 0.5525, "step": 4679 }, { "epoch": 0.33, "grad_norm": 1.8902119472429122, "learning_rate": 7.79175001136416e-06, "loss": 0.5148, "step": 4680 }, { "epoch": 0.33, "grad_norm": 1.7726909150681227, "learning_rate": 7.790796582409093e-06, "loss": 0.5685, "step": 4681 }, { "epoch": 0.33, "grad_norm": 1.4733350733383128, "learning_rate": 7.789843006034697e-06, "loss": 0.5679, "step": 4682 }, { "epoch": 0.33, "grad_norm": 2.028608192113872, "learning_rate": 7.788889282291337e-06, "loss": 0.6431, "step": 4683 }, { "epoch": 0.33, "grad_norm": 1.6052019718392103, "learning_rate": 7.787935411229396e-06, "loss": 0.5309, "step": 4684 }, { "epoch": 0.33, "grad_norm": 1.8103108674078487, "learning_rate": 7.78698139289926e-06, "loss": 0.529, "step": 4685 }, { "epoch": 0.33, "grad_norm": 1.5621525548778643, "learning_rate": 7.786027227351324e-06, "loss": 0.601, "step": 4686 }, { "epoch": 0.33, "grad_norm": 2.078850293911053, "learning_rate": 7.78507291463599e-06, "loss": 0.6155, "step": 4687 }, { "epoch": 0.33, "grad_norm": 1.7318182884196307, "learning_rate": 7.784118454803665e-06, "loss": 0.5006, "step": 4688 }, { "epoch": 0.33, "grad_norm": 1.8720910735795302, "learning_rate": 7.783163847904772e-06, "loss": 0.6014, "step": 4689 }, { "epoch": 0.33, "grad_norm": 1.6110359225124027, "learning_rate": 7.782209093989731e-06, "loss": 0.6003, "step": 4690 }, { "epoch": 0.33, "grad_norm": 1.8686830582716514, "learning_rate": 7.78125419310898e-06, "loss": 0.5923, "step": 4691 }, { "epoch": 0.33, "grad_norm": 1.6763669281892364, "learning_rate": 7.780299145312959e-06, "loss": 0.4832, "step": 4692 }, { "epoch": 0.33, "grad_norm": 1.7024436081433396, "learning_rate": 7.779343950652113e-06, "loss": 0.6448, "step": 4693 }, { "epoch": 0.33, "grad_norm": 1.4859738838254628, "learning_rate": 7.778388609176907e-06, "loss": 0.5611, "step": 4694 }, { "epoch": 0.33, "grad_norm": 1.608771143975767, "learning_rate": 7.777433120937797e-06, "loss": 0.5555, "step": 4695 }, { "epoch": 0.33, "grad_norm": 2.0446063984731637, "learning_rate": 7.776477485985258e-06, "loss": 0.6412, "step": 4696 }, { "epoch": 0.33, "grad_norm": 2.403472528777076, "learning_rate": 7.77552170436977e-06, "loss": 0.6635, "step": 4697 }, { "epoch": 0.33, "grad_norm": 1.7455552603559945, "learning_rate": 7.774565776141822e-06, "loss": 0.5759, "step": 4698 }, { "epoch": 0.33, "grad_norm": 0.7483085435675426, "learning_rate": 7.773609701351909e-06, "loss": 0.4671, "step": 4699 }, { "epoch": 0.33, "grad_norm": 2.0204353213160084, "learning_rate": 7.772653480050531e-06, "loss": 0.5978, "step": 4700 }, { "epoch": 0.33, "grad_norm": 1.6211656413041549, "learning_rate": 7.771697112288204e-06, "loss": 0.5429, "step": 4701 }, { "epoch": 0.33, "grad_norm": 2.0738403207620575, "learning_rate": 7.770740598115442e-06, "loss": 0.5899, "step": 4702 }, { "epoch": 0.33, "grad_norm": 0.7118212212258338, "learning_rate": 7.769783937582773e-06, "loss": 0.4183, "step": 4703 }, { "epoch": 0.33, "grad_norm": 1.4709072674335257, "learning_rate": 7.768827130740731e-06, "loss": 0.6237, "step": 4704 }, { "epoch": 0.33, "grad_norm": 1.7383033033246675, "learning_rate": 7.767870177639858e-06, "loss": 0.5343, "step": 4705 }, { "epoch": 0.33, "grad_norm": 1.4054312290736928, "learning_rate": 7.766913078330704e-06, "loss": 0.5789, "step": 4706 }, { "epoch": 0.33, "grad_norm": 1.703327255682883, "learning_rate": 7.765955832863826e-06, "loss": 0.5647, "step": 4707 }, { "epoch": 0.33, "grad_norm": 1.6218603614231288, "learning_rate": 7.764998441289787e-06, "loss": 0.5393, "step": 4708 }, { "epoch": 0.33, "grad_norm": 1.7264969920839324, "learning_rate": 7.764040903659163e-06, "loss": 0.5241, "step": 4709 }, { "epoch": 0.33, "grad_norm": 1.4741356557590197, "learning_rate": 7.763083220022531e-06, "loss": 0.4985, "step": 4710 }, { "epoch": 0.33, "grad_norm": 1.837266823424641, "learning_rate": 7.762125390430482e-06, "loss": 0.6346, "step": 4711 }, { "epoch": 0.33, "grad_norm": 0.7609131160799693, "learning_rate": 7.76116741493361e-06, "loss": 0.4809, "step": 4712 }, { "epoch": 0.33, "grad_norm": 1.4894214595253323, "learning_rate": 7.760209293582518e-06, "loss": 0.548, "step": 4713 }, { "epoch": 0.33, "grad_norm": 1.7235658230972803, "learning_rate": 7.759251026427819e-06, "loss": 0.5465, "step": 4714 }, { "epoch": 0.33, "grad_norm": 1.8244942025338575, "learning_rate": 7.758292613520131e-06, "loss": 0.61, "step": 4715 }, { "epoch": 0.33, "grad_norm": 1.9880235823163546, "learning_rate": 7.757334054910082e-06, "loss": 0.5765, "step": 4716 }, { "epoch": 0.33, "grad_norm": 1.7506339794659247, "learning_rate": 7.756375350648303e-06, "loss": 0.5683, "step": 4717 }, { "epoch": 0.33, "grad_norm": 1.479877763456396, "learning_rate": 7.75541650078544e-06, "loss": 0.4923, "step": 4718 }, { "epoch": 0.33, "grad_norm": 1.6462878786781092, "learning_rate": 7.75445750537214e-06, "loss": 0.6352, "step": 4719 }, { "epoch": 0.33, "grad_norm": 1.7553436849151027, "learning_rate": 7.753498364459062e-06, "loss": 0.5523, "step": 4720 }, { "epoch": 0.34, "grad_norm": 1.650376751990671, "learning_rate": 7.752539078096869e-06, "loss": 0.5208, "step": 4721 }, { "epoch": 0.34, "grad_norm": 0.7900135707471028, "learning_rate": 7.751579646336235e-06, "loss": 0.4382, "step": 4722 }, { "epoch": 0.34, "grad_norm": 2.669318441616321, "learning_rate": 7.750620069227842e-06, "loss": 0.4512, "step": 4723 }, { "epoch": 0.34, "grad_norm": 1.575551780770198, "learning_rate": 7.749660346822377e-06, "loss": 0.5665, "step": 4724 }, { "epoch": 0.34, "grad_norm": 0.8105189893333551, "learning_rate": 7.748700479170532e-06, "loss": 0.4569, "step": 4725 }, { "epoch": 0.34, "grad_norm": 1.5535775044271178, "learning_rate": 7.747740466323016e-06, "loss": 0.497, "step": 4726 }, { "epoch": 0.34, "grad_norm": 1.6290112308344942, "learning_rate": 7.746780308330539e-06, "loss": 0.587, "step": 4727 }, { "epoch": 0.34, "grad_norm": 1.6659619343101402, "learning_rate": 7.745820005243816e-06, "loss": 0.5553, "step": 4728 }, { "epoch": 0.34, "grad_norm": 1.4327982714612173, "learning_rate": 7.744859557113577e-06, "loss": 0.5269, "step": 4729 }, { "epoch": 0.34, "grad_norm": 1.8245366955185995, "learning_rate": 7.743898963990557e-06, "loss": 0.5622, "step": 4730 }, { "epoch": 0.34, "grad_norm": 2.2535716907793173, "learning_rate": 7.742938225925495e-06, "loss": 0.6181, "step": 4731 }, { "epoch": 0.34, "grad_norm": 1.6299817792803417, "learning_rate": 7.741977342969143e-06, "loss": 0.5564, "step": 4732 }, { "epoch": 0.34, "grad_norm": 1.4963578845502292, "learning_rate": 7.741016315172254e-06, "loss": 0.5687, "step": 4733 }, { "epoch": 0.34, "grad_norm": 1.5991613573377579, "learning_rate": 7.740055142585597e-06, "loss": 0.5763, "step": 4734 }, { "epoch": 0.34, "grad_norm": 1.63434430382507, "learning_rate": 7.739093825259943e-06, "loss": 0.5345, "step": 4735 }, { "epoch": 0.34, "grad_norm": 1.8867459904344575, "learning_rate": 7.738132363246072e-06, "loss": 0.5427, "step": 4736 }, { "epoch": 0.34, "grad_norm": 1.6280003008213626, "learning_rate": 7.737170756594771e-06, "loss": 0.5637, "step": 4737 }, { "epoch": 0.34, "grad_norm": 1.6247668558943018, "learning_rate": 7.736209005356835e-06, "loss": 0.5239, "step": 4738 }, { "epoch": 0.34, "grad_norm": 1.849762119696607, "learning_rate": 7.735247109583068e-06, "loss": 0.5676, "step": 4739 }, { "epoch": 0.34, "grad_norm": 1.5924661040327408, "learning_rate": 7.73428506932428e-06, "loss": 0.5506, "step": 4740 }, { "epoch": 0.34, "grad_norm": 0.8303048966748472, "learning_rate": 7.733322884631293e-06, "loss": 0.4793, "step": 4741 }, { "epoch": 0.34, "grad_norm": 1.6071786164591642, "learning_rate": 7.732360555554927e-06, "loss": 0.5286, "step": 4742 }, { "epoch": 0.34, "grad_norm": 1.589172663553812, "learning_rate": 7.731398082146017e-06, "loss": 0.5105, "step": 4743 }, { "epoch": 0.34, "grad_norm": 0.8582754112806343, "learning_rate": 7.730435464455406e-06, "loss": 0.4624, "step": 4744 }, { "epoch": 0.34, "grad_norm": 1.6061851343439688, "learning_rate": 7.729472702533942e-06, "loss": 0.5681, "step": 4745 }, { "epoch": 0.34, "grad_norm": 1.995724518229646, "learning_rate": 7.728509796432482e-06, "loss": 0.641, "step": 4746 }, { "epoch": 0.34, "grad_norm": 2.0959088797657786, "learning_rate": 7.727546746201887e-06, "loss": 0.5437, "step": 4747 }, { "epoch": 0.34, "grad_norm": 1.4560839946130193, "learning_rate": 7.726583551893033e-06, "loss": 0.5337, "step": 4748 }, { "epoch": 0.34, "grad_norm": 1.9447346179638094, "learning_rate": 7.725620213556795e-06, "loss": 0.6548, "step": 4749 }, { "epoch": 0.34, "grad_norm": 1.6064663516213205, "learning_rate": 7.724656731244062e-06, "loss": 0.6082, "step": 4750 }, { "epoch": 0.34, "grad_norm": 1.525867161260493, "learning_rate": 7.723693105005729e-06, "loss": 0.5019, "step": 4751 }, { "epoch": 0.34, "grad_norm": 1.5798112931773591, "learning_rate": 7.722729334892696e-06, "loss": 0.5348, "step": 4752 }, { "epoch": 0.34, "grad_norm": 1.9361459170354864, "learning_rate": 7.721765420955873e-06, "loss": 0.6235, "step": 4753 }, { "epoch": 0.34, "grad_norm": 0.8360129935342129, "learning_rate": 7.72080136324618e-06, "loss": 0.4485, "step": 4754 }, { "epoch": 0.34, "grad_norm": 2.6276472752364644, "learning_rate": 7.719837161814538e-06, "loss": 0.6187, "step": 4755 }, { "epoch": 0.34, "grad_norm": 1.5867184818199906, "learning_rate": 7.718872816711882e-06, "loss": 0.5434, "step": 4756 }, { "epoch": 0.34, "grad_norm": 1.3960992371657015, "learning_rate": 7.717908327989151e-06, "loss": 0.5091, "step": 4757 }, { "epoch": 0.34, "grad_norm": 1.73568491724809, "learning_rate": 7.716943695697292e-06, "loss": 0.6053, "step": 4758 }, { "epoch": 0.34, "grad_norm": 1.4695473747845849, "learning_rate": 7.715978919887261e-06, "loss": 0.5457, "step": 4759 }, { "epoch": 0.34, "grad_norm": 1.9886523567517422, "learning_rate": 7.71501400061002e-06, "loss": 0.5776, "step": 4760 }, { "epoch": 0.34, "grad_norm": 1.6325446847520872, "learning_rate": 7.71404893791654e-06, "loss": 0.5468, "step": 4761 }, { "epoch": 0.34, "grad_norm": 2.0336008845849003, "learning_rate": 7.7130837318578e-06, "loss": 0.5756, "step": 4762 }, { "epoch": 0.34, "grad_norm": 0.7098736558301314, "learning_rate": 7.712118382484783e-06, "loss": 0.4638, "step": 4763 }, { "epoch": 0.34, "grad_norm": 6.20426650736749, "learning_rate": 7.711152889848483e-06, "loss": 0.5422, "step": 4764 }, { "epoch": 0.34, "grad_norm": 0.7449859830134952, "learning_rate": 7.710187253999901e-06, "loss": 0.4208, "step": 4765 }, { "epoch": 0.34, "grad_norm": 1.4314056198982683, "learning_rate": 7.709221474990044e-06, "loss": 0.5646, "step": 4766 }, { "epoch": 0.34, "grad_norm": 1.5274650194258994, "learning_rate": 7.70825555286993e-06, "loss": 0.6155, "step": 4767 }, { "epoch": 0.34, "grad_norm": 1.647708428949298, "learning_rate": 7.70728948769058e-06, "loss": 0.5346, "step": 4768 }, { "epoch": 0.34, "grad_norm": 1.6189750613850027, "learning_rate": 7.706323279503025e-06, "loss": 0.52, "step": 4769 }, { "epoch": 0.34, "grad_norm": 1.9922713262163763, "learning_rate": 7.705356928358306e-06, "loss": 0.4895, "step": 4770 }, { "epoch": 0.34, "grad_norm": 1.4977263557355764, "learning_rate": 7.704390434307465e-06, "loss": 0.5349, "step": 4771 }, { "epoch": 0.34, "grad_norm": 2.5665371378959403, "learning_rate": 7.70342379740156e-06, "loss": 0.5721, "step": 4772 }, { "epoch": 0.34, "grad_norm": 1.5981697390000285, "learning_rate": 7.702457017691647e-06, "loss": 0.5859, "step": 4773 }, { "epoch": 0.34, "grad_norm": 1.6746877287153015, "learning_rate": 7.701490095228799e-06, "loss": 0.5724, "step": 4774 }, { "epoch": 0.34, "grad_norm": 1.686120282672347, "learning_rate": 7.70052303006409e-06, "loss": 0.5677, "step": 4775 }, { "epoch": 0.34, "grad_norm": 1.8140008716657556, "learning_rate": 7.6995558222486e-06, "loss": 0.5822, "step": 4776 }, { "epoch": 0.34, "grad_norm": 1.6893474044655168, "learning_rate": 7.698588471833428e-06, "loss": 0.5697, "step": 4777 }, { "epoch": 0.34, "grad_norm": 1.734751217595584, "learning_rate": 7.69762097886967e-06, "loss": 0.5778, "step": 4778 }, { "epoch": 0.34, "grad_norm": 1.436888762588941, "learning_rate": 7.69665334340843e-06, "loss": 0.5325, "step": 4779 }, { "epoch": 0.34, "grad_norm": 1.7350723068283707, "learning_rate": 7.695685565500823e-06, "loss": 0.5576, "step": 4780 }, { "epoch": 0.34, "grad_norm": 1.7864406869086606, "learning_rate": 7.694717645197968e-06, "loss": 0.565, "step": 4781 }, { "epoch": 0.34, "grad_norm": 1.6261504304099974, "learning_rate": 7.693749582550999e-06, "loss": 0.5156, "step": 4782 }, { "epoch": 0.34, "grad_norm": 1.62301017831468, "learning_rate": 7.692781377611047e-06, "loss": 0.558, "step": 4783 }, { "epoch": 0.34, "grad_norm": 0.7381646704606706, "learning_rate": 7.691813030429261e-06, "loss": 0.4576, "step": 4784 }, { "epoch": 0.34, "grad_norm": 1.5586049965399558, "learning_rate": 7.690844541056787e-06, "loss": 0.5084, "step": 4785 }, { "epoch": 0.34, "grad_norm": 2.083249706596281, "learning_rate": 7.689875909544787e-06, "loss": 0.5892, "step": 4786 }, { "epoch": 0.34, "grad_norm": 1.4237902770939703, "learning_rate": 7.688907135944429e-06, "loss": 0.5123, "step": 4787 }, { "epoch": 0.34, "grad_norm": 1.5416031283184448, "learning_rate": 7.687938220306883e-06, "loss": 0.4911, "step": 4788 }, { "epoch": 0.34, "grad_norm": 2.2552860452664545, "learning_rate": 7.686969162683331e-06, "loss": 0.5714, "step": 4789 }, { "epoch": 0.34, "grad_norm": 1.7237894212475746, "learning_rate": 7.685999963124965e-06, "loss": 0.6169, "step": 4790 }, { "epoch": 0.34, "grad_norm": 1.5028336770741848, "learning_rate": 7.685030621682978e-06, "loss": 0.4715, "step": 4791 }, { "epoch": 0.34, "grad_norm": 1.4007546398825015, "learning_rate": 7.684061138408577e-06, "loss": 0.5168, "step": 4792 }, { "epoch": 0.34, "grad_norm": 1.767005635024293, "learning_rate": 7.683091513352972e-06, "loss": 0.5599, "step": 4793 }, { "epoch": 0.34, "grad_norm": 1.4711843271767562, "learning_rate": 7.68212174656738e-06, "loss": 0.5376, "step": 4794 }, { "epoch": 0.34, "grad_norm": 1.5719253652355647, "learning_rate": 7.68115183810303e-06, "loss": 0.5822, "step": 4795 }, { "epoch": 0.34, "grad_norm": 1.769427713431006, "learning_rate": 7.680181788011156e-06, "loss": 0.6491, "step": 4796 }, { "epoch": 0.34, "grad_norm": 1.5718728251179117, "learning_rate": 7.679211596342997e-06, "loss": 0.5041, "step": 4797 }, { "epoch": 0.34, "grad_norm": 2.2696226753782676, "learning_rate": 7.678241263149802e-06, "loss": 0.5169, "step": 4798 }, { "epoch": 0.34, "grad_norm": 1.572332338164595, "learning_rate": 7.677270788482831e-06, "loss": 0.5247, "step": 4799 }, { "epoch": 0.34, "grad_norm": 1.7618022883940945, "learning_rate": 7.676300172393344e-06, "loss": 0.596, "step": 4800 }, { "epoch": 0.34, "grad_norm": 1.8392300248332514, "learning_rate": 7.675329414932613e-06, "loss": 0.5715, "step": 4801 }, { "epoch": 0.34, "grad_norm": 1.6656413306655593, "learning_rate": 7.674358516151918e-06, "loss": 0.5455, "step": 4802 }, { "epoch": 0.34, "grad_norm": 1.5292499250991674, "learning_rate": 7.673387476102543e-06, "loss": 0.535, "step": 4803 }, { "epoch": 0.34, "grad_norm": 1.594726544482017, "learning_rate": 7.672416294835784e-06, "loss": 0.6505, "step": 4804 }, { "epoch": 0.34, "grad_norm": 0.8593751249336438, "learning_rate": 7.671444972402939e-06, "loss": 0.4607, "step": 4805 }, { "epoch": 0.34, "grad_norm": 2.1173938895811286, "learning_rate": 7.670473508855321e-06, "loss": 0.563, "step": 4806 }, { "epoch": 0.34, "grad_norm": 1.7433131810540359, "learning_rate": 7.669501904244242e-06, "loss": 0.496, "step": 4807 }, { "epoch": 0.34, "grad_norm": 1.483450925235316, "learning_rate": 7.668530158621026e-06, "loss": 0.5235, "step": 4808 }, { "epoch": 0.34, "grad_norm": 1.6297819618183846, "learning_rate": 7.667558272037006e-06, "loss": 0.5459, "step": 4809 }, { "epoch": 0.34, "grad_norm": 1.5683364087194016, "learning_rate": 7.666586244543518e-06, "loss": 0.5795, "step": 4810 }, { "epoch": 0.34, "grad_norm": 1.85860885831951, "learning_rate": 7.665614076191911e-06, "loss": 0.513, "step": 4811 }, { "epoch": 0.34, "grad_norm": 1.7199229605812114, "learning_rate": 7.664641767033536e-06, "loss": 0.5455, "step": 4812 }, { "epoch": 0.34, "grad_norm": 1.5205261425243821, "learning_rate": 7.663669317119751e-06, "loss": 0.5078, "step": 4813 }, { "epoch": 0.34, "grad_norm": 58.834792463460445, "learning_rate": 7.66269672650193e-06, "loss": 0.5082, "step": 4814 }, { "epoch": 0.34, "grad_norm": 1.5668538249849029, "learning_rate": 7.661723995231446e-06, "loss": 0.4899, "step": 4815 }, { "epoch": 0.34, "grad_norm": 1.887973846532742, "learning_rate": 7.660751123359678e-06, "loss": 0.6257, "step": 4816 }, { "epoch": 0.34, "grad_norm": 1.5567215291808731, "learning_rate": 7.659778110938022e-06, "loss": 0.5737, "step": 4817 }, { "epoch": 0.34, "grad_norm": 1.517713406565091, "learning_rate": 7.658804958017873e-06, "loss": 0.5096, "step": 4818 }, { "epoch": 0.34, "grad_norm": 2.058621110147777, "learning_rate": 7.657831664650638e-06, "loss": 0.5615, "step": 4819 }, { "epoch": 0.34, "grad_norm": 1.5959400392090728, "learning_rate": 7.656858230887727e-06, "loss": 0.5266, "step": 4820 }, { "epoch": 0.34, "grad_norm": 1.5666059401846921, "learning_rate": 7.65588465678056e-06, "loss": 0.566, "step": 4821 }, { "epoch": 0.34, "grad_norm": 2.2528006175698367, "learning_rate": 7.65491094238057e-06, "loss": 0.5403, "step": 4822 }, { "epoch": 0.34, "grad_norm": 1.6886750127941925, "learning_rate": 7.653937087739187e-06, "loss": 0.5452, "step": 4823 }, { "epoch": 0.34, "grad_norm": 0.8569873644034576, "learning_rate": 7.652963092907854e-06, "loss": 0.4566, "step": 4824 }, { "epoch": 0.34, "grad_norm": 0.7780920924938105, "learning_rate": 7.65198895793802e-06, "loss": 0.4337, "step": 4825 }, { "epoch": 0.34, "grad_norm": 1.589020997619751, "learning_rate": 7.651014682881142e-06, "loss": 0.5182, "step": 4826 }, { "epoch": 0.34, "grad_norm": 1.50766543556978, "learning_rate": 7.650040267788688e-06, "loss": 0.5189, "step": 4827 }, { "epoch": 0.34, "grad_norm": 1.483784246253127, "learning_rate": 7.649065712712126e-06, "loss": 0.5413, "step": 4828 }, { "epoch": 0.34, "grad_norm": 1.649200571717978, "learning_rate": 7.648091017702937e-06, "loss": 0.5977, "step": 4829 }, { "epoch": 0.34, "grad_norm": 1.525791580060235, "learning_rate": 7.64711618281261e-06, "loss": 0.552, "step": 4830 }, { "epoch": 0.34, "grad_norm": 1.491987629365099, "learning_rate": 7.646141208092634e-06, "loss": 0.5404, "step": 4831 }, { "epoch": 0.34, "grad_norm": 1.567980871378964, "learning_rate": 7.645166093594513e-06, "loss": 0.4947, "step": 4832 }, { "epoch": 0.34, "grad_norm": 0.793874034007292, "learning_rate": 7.644190839369757e-06, "loss": 0.4641, "step": 4833 }, { "epoch": 0.34, "grad_norm": 1.6415951114145668, "learning_rate": 7.643215445469878e-06, "loss": 0.5859, "step": 4834 }, { "epoch": 0.34, "grad_norm": 1.7324865016573279, "learning_rate": 7.642239911946406e-06, "loss": 0.571, "step": 4835 }, { "epoch": 0.34, "grad_norm": 2.324937403560273, "learning_rate": 7.641264238850868e-06, "loss": 0.5585, "step": 4836 }, { "epoch": 0.34, "grad_norm": 1.9538347556298559, "learning_rate": 7.640288426234803e-06, "loss": 0.6562, "step": 4837 }, { "epoch": 0.34, "grad_norm": 1.5214735419090046, "learning_rate": 7.639312474149756e-06, "loss": 0.5171, "step": 4838 }, { "epoch": 0.34, "grad_norm": 1.4970476526237704, "learning_rate": 7.638336382647284e-06, "loss": 0.5608, "step": 4839 }, { "epoch": 0.34, "grad_norm": 1.7210879157361947, "learning_rate": 7.63736015177894e-06, "loss": 0.5276, "step": 4840 }, { "epoch": 0.34, "grad_norm": 1.531741214028701, "learning_rate": 7.6363837815963e-06, "loss": 0.5937, "step": 4841 }, { "epoch": 0.34, "grad_norm": 0.8131576492914907, "learning_rate": 7.635407272150933e-06, "loss": 0.4687, "step": 4842 }, { "epoch": 0.34, "grad_norm": 1.6860656314710765, "learning_rate": 7.634430623494425e-06, "loss": 0.5734, "step": 4843 }, { "epoch": 0.34, "grad_norm": 1.6700986817823413, "learning_rate": 7.633453835678364e-06, "loss": 0.5316, "step": 4844 }, { "epoch": 0.34, "grad_norm": 1.7615374044550225, "learning_rate": 7.632476908754347e-06, "loss": 0.5724, "step": 4845 }, { "epoch": 0.34, "grad_norm": 2.204073366286373, "learning_rate": 7.631499842773981e-06, "loss": 0.5371, "step": 4846 }, { "epoch": 0.34, "grad_norm": 1.8241263858546308, "learning_rate": 7.630522637788878e-06, "loss": 0.6629, "step": 4847 }, { "epoch": 0.34, "grad_norm": 1.5406400587178273, "learning_rate": 7.629545293850653e-06, "loss": 0.6116, "step": 4848 }, { "epoch": 0.34, "grad_norm": 1.8682256651511238, "learning_rate": 7.628567811010937e-06, "loss": 0.6185, "step": 4849 }, { "epoch": 0.34, "grad_norm": 1.6584404498472998, "learning_rate": 7.627590189321363e-06, "loss": 0.5586, "step": 4850 }, { "epoch": 0.34, "grad_norm": 0.8449224191246342, "learning_rate": 7.626612428833571e-06, "loss": 0.4777, "step": 4851 }, { "epoch": 0.34, "grad_norm": 1.830188346864979, "learning_rate": 7.625634529599211e-06, "loss": 0.5255, "step": 4852 }, { "epoch": 0.34, "grad_norm": 1.7914593141053055, "learning_rate": 7.624656491669937e-06, "loss": 0.5576, "step": 4853 }, { "epoch": 0.34, "grad_norm": 1.792578111815124, "learning_rate": 7.6236783150974145e-06, "loss": 0.5895, "step": 4854 }, { "epoch": 0.34, "grad_norm": 1.518646903916901, "learning_rate": 7.622699999933313e-06, "loss": 0.5675, "step": 4855 }, { "epoch": 0.34, "grad_norm": 1.8836355845883688, "learning_rate": 7.621721546229311e-06, "loss": 0.5751, "step": 4856 }, { "epoch": 0.34, "grad_norm": 0.756698389515884, "learning_rate": 7.620742954037091e-06, "loss": 0.4206, "step": 4857 }, { "epoch": 0.34, "grad_norm": 1.7048508117577985, "learning_rate": 7.61976422340835e-06, "loss": 0.6111, "step": 4858 }, { "epoch": 0.34, "grad_norm": 1.5146159304395557, "learning_rate": 7.618785354394786e-06, "loss": 0.5126, "step": 4859 }, { "epoch": 0.34, "grad_norm": 1.6081244336897265, "learning_rate": 7.617806347048104e-06, "loss": 0.5521, "step": 4860 }, { "epoch": 0.34, "grad_norm": 1.6143377344271912, "learning_rate": 7.616827201420023e-06, "loss": 0.6049, "step": 4861 }, { "epoch": 0.35, "grad_norm": 1.653392999902985, "learning_rate": 7.6158479175622605e-06, "loss": 0.4772, "step": 4862 }, { "epoch": 0.35, "grad_norm": 1.575509215885776, "learning_rate": 7.614868495526547e-06, "loss": 0.5316, "step": 4863 }, { "epoch": 0.35, "grad_norm": 3.1368802533512565, "learning_rate": 7.613888935364619e-06, "loss": 0.5572, "step": 4864 }, { "epoch": 0.35, "grad_norm": 1.5620875146107172, "learning_rate": 7.612909237128223e-06, "loss": 0.5678, "step": 4865 }, { "epoch": 0.35, "grad_norm": 1.5828299831330783, "learning_rate": 7.611929400869105e-06, "loss": 0.5209, "step": 4866 }, { "epoch": 0.35, "grad_norm": 1.5809980425438976, "learning_rate": 7.610949426639027e-06, "loss": 0.5669, "step": 4867 }, { "epoch": 0.35, "grad_norm": 1.5766604115891725, "learning_rate": 7.609969314489751e-06, "loss": 0.4951, "step": 4868 }, { "epoch": 0.35, "grad_norm": 1.5849843379862447, "learning_rate": 7.608989064473056e-06, "loss": 0.5668, "step": 4869 }, { "epoch": 0.35, "grad_norm": 1.6476264245736973, "learning_rate": 7.608008676640715e-06, "loss": 0.4885, "step": 4870 }, { "epoch": 0.35, "grad_norm": 1.690886090642633, "learning_rate": 7.607028151044518e-06, "loss": 0.4808, "step": 4871 }, { "epoch": 0.35, "grad_norm": 1.4647931121100588, "learning_rate": 7.606047487736261e-06, "loss": 0.5362, "step": 4872 }, { "epoch": 0.35, "grad_norm": 1.6230576692987182, "learning_rate": 7.6050666867677455e-06, "loss": 0.5413, "step": 4873 }, { "epoch": 0.35, "grad_norm": 1.5127299884626233, "learning_rate": 7.604085748190782e-06, "loss": 0.6063, "step": 4874 }, { "epoch": 0.35, "grad_norm": 2.21297096000894, "learning_rate": 7.6031046720571825e-06, "loss": 0.6045, "step": 4875 }, { "epoch": 0.35, "grad_norm": 0.7899458230224787, "learning_rate": 7.602123458418775e-06, "loss": 0.41, "step": 4876 }, { "epoch": 0.35, "grad_norm": 1.8836586484147022, "learning_rate": 7.6011421073273904e-06, "loss": 0.5807, "step": 4877 }, { "epoch": 0.35, "grad_norm": 1.535184828037114, "learning_rate": 7.600160618834864e-06, "loss": 0.5706, "step": 4878 }, { "epoch": 0.35, "grad_norm": 1.647426060360714, "learning_rate": 7.599178992993044e-06, "loss": 0.5794, "step": 4879 }, { "epoch": 0.35, "grad_norm": 2.2132808145039955, "learning_rate": 7.598197229853783e-06, "loss": 0.5073, "step": 4880 }, { "epoch": 0.35, "grad_norm": 1.65059769814612, "learning_rate": 7.5972153294689385e-06, "loss": 0.5953, "step": 4881 }, { "epoch": 0.35, "grad_norm": 0.7624652726855439, "learning_rate": 7.596233291890382e-06, "loss": 0.4645, "step": 4882 }, { "epoch": 0.35, "grad_norm": 1.7687478706740556, "learning_rate": 7.595251117169985e-06, "loss": 0.5645, "step": 4883 }, { "epoch": 0.35, "grad_norm": 1.682188511344273, "learning_rate": 7.59426880535963e-06, "loss": 0.5578, "step": 4884 }, { "epoch": 0.35, "grad_norm": 1.4856730717691378, "learning_rate": 7.593286356511205e-06, "loss": 0.5835, "step": 4885 }, { "epoch": 0.35, "grad_norm": 1.9641547793085776, "learning_rate": 7.592303770676609e-06, "loss": 0.5525, "step": 4886 }, { "epoch": 0.35, "grad_norm": 2.031810068041524, "learning_rate": 7.591321047907743e-06, "loss": 0.6032, "step": 4887 }, { "epoch": 0.35, "grad_norm": 1.6395849597046783, "learning_rate": 7.59033818825652e-06, "loss": 0.5009, "step": 4888 }, { "epoch": 0.35, "grad_norm": 1.6642475310934781, "learning_rate": 7.589355191774854e-06, "loss": 0.4877, "step": 4889 }, { "epoch": 0.35, "grad_norm": 1.4860254513046747, "learning_rate": 7.588372058514675e-06, "loss": 0.563, "step": 4890 }, { "epoch": 0.35, "grad_norm": 1.725087415236859, "learning_rate": 7.587388788527913e-06, "loss": 0.5354, "step": 4891 }, { "epoch": 0.35, "grad_norm": 0.7799895665880143, "learning_rate": 7.586405381866507e-06, "loss": 0.4377, "step": 4892 }, { "epoch": 0.35, "grad_norm": 1.4600866179856686, "learning_rate": 7.5854218385824055e-06, "loss": 0.5038, "step": 4893 }, { "epoch": 0.35, "grad_norm": 1.8172038741028984, "learning_rate": 7.584438158727561e-06, "loss": 0.6309, "step": 4894 }, { "epoch": 0.35, "grad_norm": 1.9468267825997616, "learning_rate": 7.583454342353938e-06, "loss": 0.5058, "step": 4895 }, { "epoch": 0.35, "grad_norm": 1.9933214355196305, "learning_rate": 7.582470389513501e-06, "loss": 0.545, "step": 4896 }, { "epoch": 0.35, "grad_norm": 1.6726220062117279, "learning_rate": 7.581486300258227e-06, "loss": 0.5493, "step": 4897 }, { "epoch": 0.35, "grad_norm": 1.674637176576178, "learning_rate": 7.5805020746401e-06, "loss": 0.4693, "step": 4898 }, { "epoch": 0.35, "grad_norm": 2.0604815428440504, "learning_rate": 7.579517712711111e-06, "loss": 0.5019, "step": 4899 }, { "epoch": 0.35, "grad_norm": 1.8944459134209932, "learning_rate": 7.578533214523251e-06, "loss": 0.5593, "step": 4900 }, { "epoch": 0.35, "grad_norm": 1.4594232564135154, "learning_rate": 7.577548580128534e-06, "loss": 0.5017, "step": 4901 }, { "epoch": 0.35, "grad_norm": 1.846656522730922, "learning_rate": 7.576563809578965e-06, "loss": 0.5247, "step": 4902 }, { "epoch": 0.35, "grad_norm": 0.9090368230322707, "learning_rate": 7.575578902926567e-06, "loss": 0.4725, "step": 4903 }, { "epoch": 0.35, "grad_norm": 1.6622083830442977, "learning_rate": 7.574593860223362e-06, "loss": 0.5949, "step": 4904 }, { "epoch": 0.35, "grad_norm": 0.7848369419778249, "learning_rate": 7.573608681521386e-06, "loss": 0.4676, "step": 4905 }, { "epoch": 0.35, "grad_norm": 1.7295308827584182, "learning_rate": 7.57262336687268e-06, "loss": 0.5104, "step": 4906 }, { "epoch": 0.35, "grad_norm": 1.5370046683006775, "learning_rate": 7.571637916329289e-06, "loss": 0.5968, "step": 4907 }, { "epoch": 0.35, "grad_norm": 1.704962983040745, "learning_rate": 7.570652329943269e-06, "loss": 0.5496, "step": 4908 }, { "epoch": 0.35, "grad_norm": 1.38820426607879, "learning_rate": 7.569666607766684e-06, "loss": 0.5321, "step": 4909 }, { "epoch": 0.35, "grad_norm": 1.90162217220586, "learning_rate": 7.5686807498516006e-06, "loss": 0.5489, "step": 4910 }, { "epoch": 0.35, "grad_norm": 1.7105389682281333, "learning_rate": 7.567694756250095e-06, "loss": 0.5323, "step": 4911 }, { "epoch": 0.35, "grad_norm": 1.4758240559101432, "learning_rate": 7.566708627014254e-06, "loss": 0.5442, "step": 4912 }, { "epoch": 0.35, "grad_norm": 1.5167299722045913, "learning_rate": 7.565722362196166e-06, "loss": 0.5494, "step": 4913 }, { "epoch": 0.35, "grad_norm": 2.0077780917890005, "learning_rate": 7.5647359618479275e-06, "loss": 0.6149, "step": 4914 }, { "epoch": 0.35, "grad_norm": 0.8352886069598454, "learning_rate": 7.563749426021645e-06, "loss": 0.4581, "step": 4915 }, { "epoch": 0.35, "grad_norm": 1.5483076936575557, "learning_rate": 7.562762754769433e-06, "loss": 0.5901, "step": 4916 }, { "epoch": 0.35, "grad_norm": 2.11355869300127, "learning_rate": 7.561775948143406e-06, "loss": 0.5562, "step": 4917 }, { "epoch": 0.35, "grad_norm": 1.8083120094276892, "learning_rate": 7.560789006195695e-06, "loss": 0.5763, "step": 4918 }, { "epoch": 0.35, "grad_norm": 1.5514303673872076, "learning_rate": 7.55980192897843e-06, "loss": 0.5277, "step": 4919 }, { "epoch": 0.35, "grad_norm": 1.3924644734873377, "learning_rate": 7.5588147165437545e-06, "loss": 0.5172, "step": 4920 }, { "epoch": 0.35, "grad_norm": 1.5291272719523268, "learning_rate": 7.557827368943813e-06, "loss": 0.5186, "step": 4921 }, { "epoch": 0.35, "grad_norm": 0.8064945243578632, "learning_rate": 7.556839886230764e-06, "loss": 0.4471, "step": 4922 }, { "epoch": 0.35, "grad_norm": 1.4702617807130036, "learning_rate": 7.5558522684567685e-06, "loss": 0.5239, "step": 4923 }, { "epoch": 0.35, "grad_norm": 1.7719036413781462, "learning_rate": 7.5548645156739956e-06, "loss": 0.5715, "step": 4924 }, { "epoch": 0.35, "grad_norm": 1.572933350254439, "learning_rate": 7.553876627934622e-06, "loss": 0.5482, "step": 4925 }, { "epoch": 0.35, "grad_norm": 3.066280826583247, "learning_rate": 7.5528886052908315e-06, "loss": 0.5032, "step": 4926 }, { "epoch": 0.35, "grad_norm": 2.0483900044265235, "learning_rate": 7.551900447794815e-06, "loss": 0.5371, "step": 4927 }, { "epoch": 0.35, "grad_norm": 1.743186877143913, "learning_rate": 7.550912155498769e-06, "loss": 0.5955, "step": 4928 }, { "epoch": 0.35, "grad_norm": 0.8129003946479268, "learning_rate": 7.5499237284549e-06, "loss": 0.4724, "step": 4929 }, { "epoch": 0.35, "grad_norm": 1.9423746022551112, "learning_rate": 7.548935166715417e-06, "loss": 0.5149, "step": 4930 }, { "epoch": 0.35, "grad_norm": 0.6529491220368296, "learning_rate": 7.547946470332545e-06, "loss": 0.4728, "step": 4931 }, { "epoch": 0.35, "grad_norm": 1.3950009606297245, "learning_rate": 7.546957639358505e-06, "loss": 0.5495, "step": 4932 }, { "epoch": 0.35, "grad_norm": 1.6374477805869674, "learning_rate": 7.545968673845532e-06, "loss": 0.5225, "step": 4933 }, { "epoch": 0.35, "grad_norm": 1.7583654086060343, "learning_rate": 7.5449795738458675e-06, "loss": 0.6055, "step": 4934 }, { "epoch": 0.35, "grad_norm": 1.4772703776075, "learning_rate": 7.543990339411759e-06, "loss": 0.5756, "step": 4935 }, { "epoch": 0.35, "grad_norm": 1.6543873807501344, "learning_rate": 7.543000970595459e-06, "loss": 0.5479, "step": 4936 }, { "epoch": 0.35, "grad_norm": 1.9606074945705314, "learning_rate": 7.542011467449231e-06, "loss": 0.5609, "step": 4937 }, { "epoch": 0.35, "grad_norm": 1.7274950137747096, "learning_rate": 7.541021830025343e-06, "loss": 0.5861, "step": 4938 }, { "epoch": 0.35, "grad_norm": 1.702757553198122, "learning_rate": 7.540032058376073e-06, "loss": 0.5358, "step": 4939 }, { "epoch": 0.35, "grad_norm": 0.798079476117054, "learning_rate": 7.5390421525537035e-06, "loss": 0.468, "step": 4940 }, { "epoch": 0.35, "grad_norm": 1.521626491847901, "learning_rate": 7.538052112610523e-06, "loss": 0.5589, "step": 4941 }, { "epoch": 0.35, "grad_norm": 2.229442257055242, "learning_rate": 7.537061938598831e-06, "loss": 0.5784, "step": 4942 }, { "epoch": 0.35, "grad_norm": 1.6607005478757135, "learning_rate": 7.536071630570929e-06, "loss": 0.5224, "step": 4943 }, { "epoch": 0.35, "grad_norm": 1.5217738803930063, "learning_rate": 7.535081188579132e-06, "loss": 0.6062, "step": 4944 }, { "epoch": 0.35, "grad_norm": 1.5800854042212251, "learning_rate": 7.534090612675755e-06, "loss": 0.5587, "step": 4945 }, { "epoch": 0.35, "grad_norm": 1.4470038148668924, "learning_rate": 7.533099902913126e-06, "loss": 0.4779, "step": 4946 }, { "epoch": 0.35, "grad_norm": 1.741351604043712, "learning_rate": 7.532109059343575e-06, "loss": 0.5996, "step": 4947 }, { "epoch": 0.35, "grad_norm": 1.7508441489640851, "learning_rate": 7.531118082019446e-06, "loss": 0.5238, "step": 4948 }, { "epoch": 0.35, "grad_norm": 1.4991082480471094, "learning_rate": 7.530126970993082e-06, "loss": 0.5072, "step": 4949 }, { "epoch": 0.35, "grad_norm": 1.6731013834020672, "learning_rate": 7.529135726316839e-06, "loss": 0.5574, "step": 4950 }, { "epoch": 0.35, "grad_norm": 1.6476992505689012, "learning_rate": 7.528144348043077e-06, "loss": 0.5442, "step": 4951 }, { "epoch": 0.35, "grad_norm": 1.5472203379236447, "learning_rate": 7.527152836224163e-06, "loss": 0.5864, "step": 4952 }, { "epoch": 0.35, "grad_norm": 1.5996551487830517, "learning_rate": 7.526161190912473e-06, "loss": 0.5482, "step": 4953 }, { "epoch": 0.35, "grad_norm": 1.483793761845596, "learning_rate": 7.525169412160389e-06, "loss": 0.5692, "step": 4954 }, { "epoch": 0.35, "grad_norm": 1.5138857315791683, "learning_rate": 7.524177500020302e-06, "loss": 0.5507, "step": 4955 }, { "epoch": 0.35, "grad_norm": 1.7583709572096526, "learning_rate": 7.523185454544606e-06, "loss": 0.5549, "step": 4956 }, { "epoch": 0.35, "grad_norm": 1.5472325487721006, "learning_rate": 7.522193275785703e-06, "loss": 0.4801, "step": 4957 }, { "epoch": 0.35, "grad_norm": 1.5678622229374912, "learning_rate": 7.521200963796006e-06, "loss": 0.6104, "step": 4958 }, { "epoch": 0.35, "grad_norm": 1.738830652847831, "learning_rate": 7.52020851862793e-06, "loss": 0.5712, "step": 4959 }, { "epoch": 0.35, "grad_norm": 1.8820902991770343, "learning_rate": 7.519215940333902e-06, "loss": 0.5687, "step": 4960 }, { "epoch": 0.35, "grad_norm": 1.520298011283955, "learning_rate": 7.5182232289663505e-06, "loss": 0.5066, "step": 4961 }, { "epoch": 0.35, "grad_norm": 1.7188046530800938, "learning_rate": 7.517230384577716e-06, "loss": 0.5708, "step": 4962 }, { "epoch": 0.35, "grad_norm": 1.668706866732145, "learning_rate": 7.516237407220444e-06, "loss": 0.5579, "step": 4963 }, { "epoch": 0.35, "grad_norm": 1.5766489134631672, "learning_rate": 7.515244296946984e-06, "loss": 0.5406, "step": 4964 }, { "epoch": 0.35, "grad_norm": 1.624373570102741, "learning_rate": 7.514251053809798e-06, "loss": 0.5542, "step": 4965 }, { "epoch": 0.35, "grad_norm": 1.558097936173771, "learning_rate": 7.51325767786135e-06, "loss": 0.5571, "step": 4966 }, { "epoch": 0.35, "grad_norm": 1.4657423201381425, "learning_rate": 7.5122641691541185e-06, "loss": 0.5195, "step": 4967 }, { "epoch": 0.35, "grad_norm": 3.0556826498403193, "learning_rate": 7.5112705277405794e-06, "loss": 0.5199, "step": 4968 }, { "epoch": 0.35, "grad_norm": 1.4906878926598979, "learning_rate": 7.5102767536732215e-06, "loss": 0.5452, "step": 4969 }, { "epoch": 0.35, "grad_norm": 0.8167054108534888, "learning_rate": 7.50928284700454e-06, "loss": 0.4928, "step": 4970 }, { "epoch": 0.35, "grad_norm": 0.7597169214935108, "learning_rate": 7.508288807787036e-06, "loss": 0.4666, "step": 4971 }, { "epoch": 0.35, "grad_norm": 2.470837969840525, "learning_rate": 7.507294636073219e-06, "loss": 0.5384, "step": 4972 }, { "epoch": 0.35, "grad_norm": 2.000337085821309, "learning_rate": 7.506300331915601e-06, "loss": 0.602, "step": 4973 }, { "epoch": 0.35, "grad_norm": 1.851030355092836, "learning_rate": 7.505305895366708e-06, "loss": 0.5543, "step": 4974 }, { "epoch": 0.35, "grad_norm": 1.5261731235854827, "learning_rate": 7.5043113264790675e-06, "loss": 0.5547, "step": 4975 }, { "epoch": 0.35, "grad_norm": 1.7647624109233802, "learning_rate": 7.503316625305219e-06, "loss": 0.5402, "step": 4976 }, { "epoch": 0.35, "grad_norm": 1.6038540336943767, "learning_rate": 7.502321791897704e-06, "loss": 0.4724, "step": 4977 }, { "epoch": 0.35, "grad_norm": 1.5973719668418904, "learning_rate": 7.501326826309072e-06, "loss": 0.5271, "step": 4978 }, { "epoch": 0.35, "grad_norm": 1.8515615049882899, "learning_rate": 7.500331728591881e-06, "loss": 0.58, "step": 4979 }, { "epoch": 0.35, "grad_norm": 1.5997308167338569, "learning_rate": 7.499336498798696e-06, "loss": 0.5913, "step": 4980 }, { "epoch": 0.35, "grad_norm": 2.969080838798194, "learning_rate": 7.498341136982088e-06, "loss": 0.4946, "step": 4981 }, { "epoch": 0.35, "grad_norm": 1.57953840276802, "learning_rate": 7.497345643194637e-06, "loss": 0.6232, "step": 4982 }, { "epoch": 0.35, "grad_norm": 1.9067978957706715, "learning_rate": 7.496350017488926e-06, "loss": 0.6135, "step": 4983 }, { "epoch": 0.35, "grad_norm": 1.9838491387632888, "learning_rate": 7.495354259917549e-06, "loss": 0.5722, "step": 4984 }, { "epoch": 0.35, "grad_norm": 2.164894351400883, "learning_rate": 7.494358370533103e-06, "loss": 0.619, "step": 4985 }, { "epoch": 0.35, "grad_norm": 1.4831946622568706, "learning_rate": 7.493362349388197e-06, "loss": 0.515, "step": 4986 }, { "epoch": 0.35, "grad_norm": 1.6261291645636096, "learning_rate": 7.492366196535444e-06, "loss": 0.5556, "step": 4987 }, { "epoch": 0.35, "grad_norm": 1.7195571400701262, "learning_rate": 7.491369912027461e-06, "loss": 0.5558, "step": 4988 }, { "epoch": 0.35, "grad_norm": 1.3308378816201414, "learning_rate": 7.490373495916877e-06, "loss": 0.5835, "step": 4989 }, { "epoch": 0.35, "grad_norm": 2.3276485125290676, "learning_rate": 7.489376948256329e-06, "loss": 0.5226, "step": 4990 }, { "epoch": 0.35, "grad_norm": 0.803752856707921, "learning_rate": 7.4883802690984545e-06, "loss": 0.4386, "step": 4991 }, { "epoch": 0.35, "grad_norm": 1.4030465915722392, "learning_rate": 7.4873834584959e-06, "loss": 0.4931, "step": 4992 }, { "epoch": 0.35, "grad_norm": 2.1789682929891505, "learning_rate": 7.486386516501326e-06, "loss": 0.5645, "step": 4993 }, { "epoch": 0.35, "grad_norm": 1.5364438425027147, "learning_rate": 7.48538944316739e-06, "loss": 0.5207, "step": 4994 }, { "epoch": 0.35, "grad_norm": 0.7765283163650484, "learning_rate": 7.484392238546761e-06, "loss": 0.4441, "step": 4995 }, { "epoch": 0.35, "grad_norm": 1.85902299831688, "learning_rate": 7.483394902692117e-06, "loss": 0.578, "step": 4996 }, { "epoch": 0.35, "grad_norm": 1.703086643345389, "learning_rate": 7.482397435656139e-06, "loss": 0.5661, "step": 4997 }, { "epoch": 0.35, "grad_norm": 1.7654416810941216, "learning_rate": 7.481399837491517e-06, "loss": 0.5658, "step": 4998 }, { "epoch": 0.35, "grad_norm": 1.5767385890224384, "learning_rate": 7.480402108250948e-06, "loss": 0.5807, "step": 4999 }, { "epoch": 0.35, "grad_norm": 1.7257442171893418, "learning_rate": 7.4794042479871345e-06, "loss": 0.6021, "step": 5000 }, { "epoch": 0.35, "grad_norm": 1.470674433630983, "learning_rate": 7.478406256752786e-06, "loss": 0.5992, "step": 5001 }, { "epoch": 0.35, "grad_norm": 2.085562794425816, "learning_rate": 7.477408134600623e-06, "loss": 0.5981, "step": 5002 }, { "epoch": 0.36, "grad_norm": 2.3001738224890094, "learning_rate": 7.476409881583367e-06, "loss": 0.5261, "step": 5003 }, { "epoch": 0.36, "grad_norm": 1.7480149260977138, "learning_rate": 7.4754114977537485e-06, "loss": 0.524, "step": 5004 }, { "epoch": 0.36, "grad_norm": 1.6453381850079216, "learning_rate": 7.474412983164508e-06, "loss": 0.5576, "step": 5005 }, { "epoch": 0.36, "grad_norm": 1.5752315524718463, "learning_rate": 7.47341433786839e-06, "loss": 0.6071, "step": 5006 }, { "epoch": 0.36, "grad_norm": 1.7821339909280394, "learning_rate": 7.4724155619181446e-06, "loss": 0.6018, "step": 5007 }, { "epoch": 0.36, "grad_norm": 1.7951400187128286, "learning_rate": 7.4714166553665324e-06, "loss": 0.6187, "step": 5008 }, { "epoch": 0.36, "grad_norm": 2.4237563030985143, "learning_rate": 7.470417618266317e-06, "loss": 0.6096, "step": 5009 }, { "epoch": 0.36, "grad_norm": 1.5264341324406885, "learning_rate": 7.469418450670273e-06, "loss": 0.5515, "step": 5010 }, { "epoch": 0.36, "grad_norm": 1.7397670980544666, "learning_rate": 7.468419152631178e-06, "loss": 0.5732, "step": 5011 }, { "epoch": 0.36, "grad_norm": 1.4520366138100707, "learning_rate": 7.467419724201821e-06, "loss": 0.5992, "step": 5012 }, { "epoch": 0.36, "grad_norm": 1.8042731704910568, "learning_rate": 7.466420165434991e-06, "loss": 0.5603, "step": 5013 }, { "epoch": 0.36, "grad_norm": 1.4563376794033969, "learning_rate": 7.465420476383492e-06, "loss": 0.4738, "step": 5014 }, { "epoch": 0.36, "grad_norm": 1.8855333052266465, "learning_rate": 7.4644206571001285e-06, "loss": 0.5346, "step": 5015 }, { "epoch": 0.36, "grad_norm": 1.630831424369212, "learning_rate": 7.463420707637718e-06, "loss": 0.584, "step": 5016 }, { "epoch": 0.36, "grad_norm": 1.8066907695212817, "learning_rate": 7.462420628049075e-06, "loss": 0.5524, "step": 5017 }, { "epoch": 0.36, "grad_norm": 1.8579376176384792, "learning_rate": 7.461420418387032e-06, "loss": 0.5949, "step": 5018 }, { "epoch": 0.36, "grad_norm": 1.6150997345575844, "learning_rate": 7.460420078704422e-06, "loss": 0.5222, "step": 5019 }, { "epoch": 0.36, "grad_norm": 1.621533932985966, "learning_rate": 7.459419609054086e-06, "loss": 0.4616, "step": 5020 }, { "epoch": 0.36, "grad_norm": 1.634698922668146, "learning_rate": 7.458419009488873e-06, "loss": 0.5806, "step": 5021 }, { "epoch": 0.36, "grad_norm": 1.5735826515374225, "learning_rate": 7.457418280061638e-06, "loss": 0.5472, "step": 5022 }, { "epoch": 0.36, "grad_norm": 2.3356321576567396, "learning_rate": 7.456417420825243e-06, "loss": 0.5784, "step": 5023 }, { "epoch": 0.36, "grad_norm": 1.636215566831808, "learning_rate": 7.455416431832556e-06, "loss": 0.5013, "step": 5024 }, { "epoch": 0.36, "grad_norm": 1.8152254355469455, "learning_rate": 7.454415313136451e-06, "loss": 0.55, "step": 5025 }, { "epoch": 0.36, "grad_norm": 1.9195200821397498, "learning_rate": 7.453414064789815e-06, "loss": 0.5575, "step": 5026 }, { "epoch": 0.36, "grad_norm": 1.7419063276022502, "learning_rate": 7.452412686845533e-06, "loss": 0.5226, "step": 5027 }, { "epoch": 0.36, "grad_norm": 1.4713389483001647, "learning_rate": 7.451411179356505e-06, "loss": 0.471, "step": 5028 }, { "epoch": 0.36, "grad_norm": 0.7792035267372587, "learning_rate": 7.450409542375632e-06, "loss": 0.438, "step": 5029 }, { "epoch": 0.36, "grad_norm": 2.08646622354803, "learning_rate": 7.449407775955822e-06, "loss": 0.5495, "step": 5030 }, { "epoch": 0.36, "grad_norm": 1.5105509580413656, "learning_rate": 7.448405880149995e-06, "loss": 0.6264, "step": 5031 }, { "epoch": 0.36, "grad_norm": 1.8098579051965586, "learning_rate": 7.447403855011072e-06, "loss": 0.6302, "step": 5032 }, { "epoch": 0.36, "grad_norm": 1.8119766172667833, "learning_rate": 7.446401700591983e-06, "loss": 0.5902, "step": 5033 }, { "epoch": 0.36, "grad_norm": 0.7056572005983928, "learning_rate": 7.4453994169456686e-06, "loss": 0.4254, "step": 5034 }, { "epoch": 0.36, "grad_norm": 1.6681456314143854, "learning_rate": 7.444397004125071e-06, "loss": 0.6201, "step": 5035 }, { "epoch": 0.36, "grad_norm": 1.5649564758407886, "learning_rate": 7.4433944621831414e-06, "loss": 0.5959, "step": 5036 }, { "epoch": 0.36, "grad_norm": 1.9541138939854987, "learning_rate": 7.442391791172836e-06, "loss": 0.5296, "step": 5037 }, { "epoch": 0.36, "grad_norm": 1.5801222628015292, "learning_rate": 7.44138899114712e-06, "loss": 0.5183, "step": 5038 }, { "epoch": 0.36, "grad_norm": 1.4941390415153633, "learning_rate": 7.4403860621589665e-06, "loss": 0.5587, "step": 5039 }, { "epoch": 0.36, "grad_norm": 1.4681971076706204, "learning_rate": 7.43938300426135e-06, "loss": 0.5083, "step": 5040 }, { "epoch": 0.36, "grad_norm": 0.7013568903544598, "learning_rate": 7.438379817507257e-06, "loss": 0.4511, "step": 5041 }, { "epoch": 0.36, "grad_norm": 1.5783403908304015, "learning_rate": 7.437376501949683e-06, "loss": 0.5132, "step": 5042 }, { "epoch": 0.36, "grad_norm": 1.5612692797430152, "learning_rate": 7.43637305764162e-06, "loss": 0.5409, "step": 5043 }, { "epoch": 0.36, "grad_norm": 1.6476981173971175, "learning_rate": 7.435369484636079e-06, "loss": 0.56, "step": 5044 }, { "epoch": 0.36, "grad_norm": 1.4983924004016416, "learning_rate": 7.434365782986068e-06, "loss": 0.5096, "step": 5045 }, { "epoch": 0.36, "grad_norm": 1.7208222242592959, "learning_rate": 7.433361952744609e-06, "loss": 0.5943, "step": 5046 }, { "epoch": 0.36, "grad_norm": 0.8198055297706212, "learning_rate": 7.432357993964724e-06, "loss": 0.4455, "step": 5047 }, { "epoch": 0.36, "grad_norm": 1.556242319986356, "learning_rate": 7.43135390669945e-06, "loss": 0.5933, "step": 5048 }, { "epoch": 0.36, "grad_norm": 1.3736043359517, "learning_rate": 7.430349691001823e-06, "loss": 0.5102, "step": 5049 }, { "epoch": 0.36, "grad_norm": 1.7877012383737436, "learning_rate": 7.42934534692489e-06, "loss": 0.5718, "step": 5050 }, { "epoch": 0.36, "grad_norm": 1.9208428585245887, "learning_rate": 7.428340874521705e-06, "loss": 0.593, "step": 5051 }, { "epoch": 0.36, "grad_norm": 1.5911293365793935, "learning_rate": 7.427336273845325e-06, "loss": 0.5671, "step": 5052 }, { "epoch": 0.36, "grad_norm": 1.5519185520736019, "learning_rate": 7.42633154494882e-06, "loss": 0.516, "step": 5053 }, { "epoch": 0.36, "grad_norm": 0.8277417269174736, "learning_rate": 7.42532668788526e-06, "loss": 0.4422, "step": 5054 }, { "epoch": 0.36, "grad_norm": 1.5931974197039245, "learning_rate": 7.424321702707726e-06, "loss": 0.5523, "step": 5055 }, { "epoch": 0.36, "grad_norm": 1.4981885077819668, "learning_rate": 7.423316589469305e-06, "loss": 0.5391, "step": 5056 }, { "epoch": 0.36, "grad_norm": 1.86737196040675, "learning_rate": 7.4223113482230925e-06, "loss": 0.5243, "step": 5057 }, { "epoch": 0.36, "grad_norm": 1.5735333110925382, "learning_rate": 7.421305979022184e-06, "loss": 0.5844, "step": 5058 }, { "epoch": 0.36, "grad_norm": 1.8936972502792857, "learning_rate": 7.420300481919691e-06, "loss": 0.5098, "step": 5059 }, { "epoch": 0.36, "grad_norm": 1.7091201250011898, "learning_rate": 7.419294856968725e-06, "loss": 0.5512, "step": 5060 }, { "epoch": 0.36, "grad_norm": 1.6893090494284482, "learning_rate": 7.418289104222407e-06, "loss": 0.4997, "step": 5061 }, { "epoch": 0.36, "grad_norm": 1.3652340188441736, "learning_rate": 7.417283223733863e-06, "loss": 0.5523, "step": 5062 }, { "epoch": 0.36, "grad_norm": 0.7963990861831172, "learning_rate": 7.41627721555623e-06, "loss": 0.4497, "step": 5063 }, { "epoch": 0.36, "grad_norm": 2.0385136015943415, "learning_rate": 7.415271079742645e-06, "loss": 0.483, "step": 5064 }, { "epoch": 0.36, "grad_norm": 1.7041267938636007, "learning_rate": 7.414264816346259e-06, "loss": 0.6181, "step": 5065 }, { "epoch": 0.36, "grad_norm": 1.9583146215173481, "learning_rate": 7.413258425420226e-06, "loss": 0.5504, "step": 5066 }, { "epoch": 0.36, "grad_norm": 2.083799444491867, "learning_rate": 7.412251907017703e-06, "loss": 0.5848, "step": 5067 }, { "epoch": 0.36, "grad_norm": 1.4516612646830118, "learning_rate": 7.411245261191863e-06, "loss": 0.515, "step": 5068 }, { "epoch": 0.36, "grad_norm": 1.861412295888898, "learning_rate": 7.4102384879958754e-06, "loss": 0.4733, "step": 5069 }, { "epoch": 0.36, "grad_norm": 0.8219853933668342, "learning_rate": 7.409231587482925e-06, "loss": 0.4539, "step": 5070 }, { "epoch": 0.36, "grad_norm": 1.7464077437141738, "learning_rate": 7.408224559706199e-06, "loss": 0.5567, "step": 5071 }, { "epoch": 0.36, "grad_norm": 0.8413106830065632, "learning_rate": 7.407217404718891e-06, "loss": 0.44, "step": 5072 }, { "epoch": 0.36, "grad_norm": 1.5787535642085477, "learning_rate": 7.406210122574203e-06, "loss": 0.5569, "step": 5073 }, { "epoch": 0.36, "grad_norm": 1.8973360122951102, "learning_rate": 7.4052027133253435e-06, "loss": 0.5952, "step": 5074 }, { "epoch": 0.36, "grad_norm": 1.6898192832409547, "learning_rate": 7.404195177025526e-06, "loss": 0.5704, "step": 5075 }, { "epoch": 0.36, "grad_norm": 1.815609419259841, "learning_rate": 7.403187513727973e-06, "loss": 0.5953, "step": 5076 }, { "epoch": 0.36, "grad_norm": 1.595122569914117, "learning_rate": 7.402179723485912e-06, "loss": 0.4757, "step": 5077 }, { "epoch": 0.36, "grad_norm": 1.7779299929072703, "learning_rate": 7.401171806352579e-06, "loss": 0.5509, "step": 5078 }, { "epoch": 0.36, "grad_norm": 1.4698184275334876, "learning_rate": 7.400163762381215e-06, "loss": 0.5592, "step": 5079 }, { "epoch": 0.36, "grad_norm": 5.377279200177793, "learning_rate": 7.399155591625069e-06, "loss": 0.6379, "step": 5080 }, { "epoch": 0.36, "grad_norm": 1.8352221790576715, "learning_rate": 7.398147294137393e-06, "loss": 0.6796, "step": 5081 }, { "epoch": 0.36, "grad_norm": 1.9620228817585146, "learning_rate": 7.397138869971452e-06, "loss": 0.5192, "step": 5082 }, { "epoch": 0.36, "grad_norm": 1.4711246197982666, "learning_rate": 7.396130319180514e-06, "loss": 0.5519, "step": 5083 }, { "epoch": 0.36, "grad_norm": 1.6330037349870214, "learning_rate": 7.395121641817852e-06, "loss": 0.5479, "step": 5084 }, { "epoch": 0.36, "grad_norm": 1.574900373738006, "learning_rate": 7.394112837936749e-06, "loss": 0.501, "step": 5085 }, { "epoch": 0.36, "grad_norm": 2.9529349124845603, "learning_rate": 7.393103907590494e-06, "loss": 0.5827, "step": 5086 }, { "epoch": 0.36, "grad_norm": 2.3026327734499556, "learning_rate": 7.392094850832383e-06, "loss": 0.511, "step": 5087 }, { "epoch": 0.36, "grad_norm": 1.4267357343134968, "learning_rate": 7.391085667715715e-06, "loss": 0.4688, "step": 5088 }, { "epoch": 0.36, "grad_norm": 1.5429265988950776, "learning_rate": 7.390076358293801e-06, "loss": 0.5135, "step": 5089 }, { "epoch": 0.36, "grad_norm": 1.8774359695551555, "learning_rate": 7.389066922619954e-06, "loss": 0.5548, "step": 5090 }, { "epoch": 0.36, "grad_norm": 1.5076159229903583, "learning_rate": 7.388057360747497e-06, "loss": 0.5399, "step": 5091 }, { "epoch": 0.36, "grad_norm": 1.8167313743886233, "learning_rate": 7.387047672729758e-06, "loss": 0.6252, "step": 5092 }, { "epoch": 0.36, "grad_norm": 1.660637060186211, "learning_rate": 7.386037858620074e-06, "loss": 0.5458, "step": 5093 }, { "epoch": 0.36, "grad_norm": 3.0486344464513095, "learning_rate": 7.385027918471783e-06, "loss": 0.5139, "step": 5094 }, { "epoch": 0.36, "grad_norm": 1.6898381578781265, "learning_rate": 7.384017852338239e-06, "loss": 0.533, "step": 5095 }, { "epoch": 0.36, "grad_norm": 1.5892420424942408, "learning_rate": 7.383007660272792e-06, "loss": 0.5227, "step": 5096 }, { "epoch": 0.36, "grad_norm": 5.617944199601051, "learning_rate": 7.381997342328806e-06, "loss": 0.4923, "step": 5097 }, { "epoch": 0.36, "grad_norm": 1.6975411839023307, "learning_rate": 7.380986898559648e-06, "loss": 0.6243, "step": 5098 }, { "epoch": 0.36, "grad_norm": 2.096827019532785, "learning_rate": 7.3799763290186945e-06, "loss": 0.4832, "step": 5099 }, { "epoch": 0.36, "grad_norm": 1.8254177421631068, "learning_rate": 7.378965633759327e-06, "loss": 0.569, "step": 5100 }, { "epoch": 0.36, "grad_norm": 1.7065811373509632, "learning_rate": 7.377954812834933e-06, "loss": 0.5063, "step": 5101 }, { "epoch": 0.36, "grad_norm": 1.5464949360535123, "learning_rate": 7.3769438662989104e-06, "loss": 0.5904, "step": 5102 }, { "epoch": 0.36, "grad_norm": 1.7067199625771512, "learning_rate": 7.375932794204657e-06, "loss": 0.615, "step": 5103 }, { "epoch": 0.36, "grad_norm": 1.5485577943162878, "learning_rate": 7.374921596605584e-06, "loss": 0.5438, "step": 5104 }, { "epoch": 0.36, "grad_norm": 1.6553840929194712, "learning_rate": 7.373910273555102e-06, "loss": 0.5805, "step": 5105 }, { "epoch": 0.36, "grad_norm": 1.769091854499526, "learning_rate": 7.372898825106638e-06, "loss": 0.5515, "step": 5106 }, { "epoch": 0.36, "grad_norm": 1.7164391576368183, "learning_rate": 7.371887251313617e-06, "loss": 0.5663, "step": 5107 }, { "epoch": 0.36, "grad_norm": 1.7118583076209484, "learning_rate": 7.370875552229475e-06, "loss": 0.6394, "step": 5108 }, { "epoch": 0.36, "grad_norm": 0.8047624824745919, "learning_rate": 7.3698637279076515e-06, "loss": 0.443, "step": 5109 }, { "epoch": 0.36, "grad_norm": 0.7870025201378461, "learning_rate": 7.368851778401597e-06, "loss": 0.4628, "step": 5110 }, { "epoch": 0.36, "grad_norm": 1.7165667211619287, "learning_rate": 7.367839703764763e-06, "loss": 0.5545, "step": 5111 }, { "epoch": 0.36, "grad_norm": 0.7964693908986769, "learning_rate": 7.366827504050615e-06, "loss": 0.4579, "step": 5112 }, { "epoch": 0.36, "grad_norm": 0.7648322301693099, "learning_rate": 7.365815179312615e-06, "loss": 0.4559, "step": 5113 }, { "epoch": 0.36, "grad_norm": 2.6015047173429493, "learning_rate": 7.364802729604242e-06, "loss": 0.4836, "step": 5114 }, { "epoch": 0.36, "grad_norm": 1.9447296172232715, "learning_rate": 7.363790154978977e-06, "loss": 0.5493, "step": 5115 }, { "epoch": 0.36, "grad_norm": 2.482232716389896, "learning_rate": 7.362777455490305e-06, "loss": 0.5862, "step": 5116 }, { "epoch": 0.36, "grad_norm": 1.66128418816264, "learning_rate": 7.361764631191723e-06, "loss": 0.5406, "step": 5117 }, { "epoch": 0.36, "grad_norm": 1.7145440436681805, "learning_rate": 7.3607516821367295e-06, "loss": 0.5934, "step": 5118 }, { "epoch": 0.36, "grad_norm": 1.9539714113278976, "learning_rate": 7.359738608378835e-06, "loss": 0.5353, "step": 5119 }, { "epoch": 0.36, "grad_norm": 0.8316434182820134, "learning_rate": 7.358725409971547e-06, "loss": 0.4601, "step": 5120 }, { "epoch": 0.36, "grad_norm": 2.5510956754206897, "learning_rate": 7.357712086968394e-06, "loss": 0.5366, "step": 5121 }, { "epoch": 0.36, "grad_norm": 1.5913794148353335, "learning_rate": 7.356698639422898e-06, "loss": 0.4833, "step": 5122 }, { "epoch": 0.36, "grad_norm": 1.751716413720923, "learning_rate": 7.355685067388595e-06, "loss": 0.5086, "step": 5123 }, { "epoch": 0.36, "grad_norm": 1.4944767364572513, "learning_rate": 7.354671370919024e-06, "loss": 0.5229, "step": 5124 }, { "epoch": 0.36, "grad_norm": 2.470657628710702, "learning_rate": 7.353657550067733e-06, "loss": 0.5829, "step": 5125 }, { "epoch": 0.36, "grad_norm": 1.7930545104503388, "learning_rate": 7.352643604888274e-06, "loss": 0.5526, "step": 5126 }, { "epoch": 0.36, "grad_norm": 1.6268312736568769, "learning_rate": 7.3516295354342096e-06, "loss": 0.519, "step": 5127 }, { "epoch": 0.36, "grad_norm": 1.533149796565539, "learning_rate": 7.350615341759103e-06, "loss": 0.5274, "step": 5128 }, { "epoch": 0.36, "grad_norm": 1.381698644028891, "learning_rate": 7.34960102391653e-06, "loss": 0.4644, "step": 5129 }, { "epoch": 0.36, "grad_norm": 1.5482866459149196, "learning_rate": 7.348586581960068e-06, "loss": 0.5219, "step": 5130 }, { "epoch": 0.36, "grad_norm": 1.7753605855524777, "learning_rate": 7.347572015943307e-06, "loss": 0.5642, "step": 5131 }, { "epoch": 0.36, "grad_norm": 1.854941836258718, "learning_rate": 7.3465573259198365e-06, "loss": 0.5447, "step": 5132 }, { "epoch": 0.36, "grad_norm": 2.2452714700288623, "learning_rate": 7.345542511943257e-06, "loss": 0.6431, "step": 5133 }, { "epoch": 0.36, "grad_norm": 1.6501921438272449, "learning_rate": 7.344527574067174e-06, "loss": 0.561, "step": 5134 }, { "epoch": 0.36, "grad_norm": 1.668503088340659, "learning_rate": 7.343512512345199e-06, "loss": 0.5308, "step": 5135 }, { "epoch": 0.36, "grad_norm": 1.5177840970476002, "learning_rate": 7.342497326830953e-06, "loss": 0.5094, "step": 5136 }, { "epoch": 0.36, "grad_norm": 1.6522433800671026, "learning_rate": 7.341482017578061e-06, "loss": 0.5108, "step": 5137 }, { "epoch": 0.36, "grad_norm": 1.6250208218789302, "learning_rate": 7.3404665846401555e-06, "loss": 0.5351, "step": 5138 }, { "epoch": 0.36, "grad_norm": 1.7921397949737752, "learning_rate": 7.339451028070873e-06, "loss": 0.5419, "step": 5139 }, { "epoch": 0.36, "grad_norm": 2.1160619574622057, "learning_rate": 7.338435347923861e-06, "loss": 0.4881, "step": 5140 }, { "epoch": 0.36, "grad_norm": 1.6440051312739723, "learning_rate": 7.337419544252771e-06, "loss": 0.55, "step": 5141 }, { "epoch": 0.36, "grad_norm": 1.4087838815695306, "learning_rate": 7.336403617111258e-06, "loss": 0.4658, "step": 5142 }, { "epoch": 0.36, "grad_norm": 2.0091094500961226, "learning_rate": 7.335387566552991e-06, "loss": 0.5263, "step": 5143 }, { "epoch": 0.37, "grad_norm": 1.6479128289239722, "learning_rate": 7.334371392631638e-06, "loss": 0.575, "step": 5144 }, { "epoch": 0.37, "grad_norm": 2.407416666329187, "learning_rate": 7.333355095400879e-06, "loss": 0.548, "step": 5145 }, { "epoch": 0.37, "grad_norm": 3.282268061927636, "learning_rate": 7.332338674914398e-06, "loss": 0.5505, "step": 5146 }, { "epoch": 0.37, "grad_norm": 1.6932025316255659, "learning_rate": 7.3313221312258846e-06, "loss": 0.5556, "step": 5147 }, { "epoch": 0.37, "grad_norm": 1.6795663400087641, "learning_rate": 7.330305464389036e-06, "loss": 0.5455, "step": 5148 }, { "epoch": 0.37, "grad_norm": 1.497900612118339, "learning_rate": 7.3292886744575575e-06, "loss": 0.4593, "step": 5149 }, { "epoch": 0.37, "grad_norm": 1.6561951405207849, "learning_rate": 7.328271761485159e-06, "loss": 0.5432, "step": 5150 }, { "epoch": 0.37, "grad_norm": 1.4652624060160537, "learning_rate": 7.327254725525554e-06, "loss": 0.5867, "step": 5151 }, { "epoch": 0.37, "grad_norm": 3.040675410734751, "learning_rate": 7.326237566632471e-06, "loss": 0.5485, "step": 5152 }, { "epoch": 0.37, "grad_norm": 0.8719122593132272, "learning_rate": 7.325220284859638e-06, "loss": 0.4657, "step": 5153 }, { "epoch": 0.37, "grad_norm": 2.9913566725087914, "learning_rate": 7.324202880260789e-06, "loss": 0.5088, "step": 5154 }, { "epoch": 0.37, "grad_norm": 1.6366908610435897, "learning_rate": 7.32318535288967e-06, "loss": 0.5485, "step": 5155 }, { "epoch": 0.37, "grad_norm": 1.5070794050701657, "learning_rate": 7.322167702800027e-06, "loss": 0.4738, "step": 5156 }, { "epoch": 0.37, "grad_norm": 1.601711919125155, "learning_rate": 7.321149930045617e-06, "loss": 0.599, "step": 5157 }, { "epoch": 0.37, "grad_norm": 1.4274056985595942, "learning_rate": 7.3201320346802026e-06, "loss": 0.528, "step": 5158 }, { "epoch": 0.37, "grad_norm": 1.7363807187178635, "learning_rate": 7.319114016757555e-06, "loss": 0.5034, "step": 5159 }, { "epoch": 0.37, "grad_norm": 1.479588039668636, "learning_rate": 7.318095876331445e-06, "loss": 0.5594, "step": 5160 }, { "epoch": 0.37, "grad_norm": 1.707592600216401, "learning_rate": 7.317077613455656e-06, "loss": 0.5339, "step": 5161 }, { "epoch": 0.37, "grad_norm": 2.026485083570643, "learning_rate": 7.316059228183976e-06, "loss": 0.4725, "step": 5162 }, { "epoch": 0.37, "grad_norm": 1.5366769684740051, "learning_rate": 7.3150407205702e-06, "loss": 0.5081, "step": 5163 }, { "epoch": 0.37, "grad_norm": 1.5441804151086802, "learning_rate": 7.3140220906681295e-06, "loss": 0.539, "step": 5164 }, { "epoch": 0.37, "grad_norm": 1.6583269138089602, "learning_rate": 7.313003338531569e-06, "loss": 0.519, "step": 5165 }, { "epoch": 0.37, "grad_norm": 1.9048494192787009, "learning_rate": 7.311984464214337e-06, "loss": 0.6251, "step": 5166 }, { "epoch": 0.37, "grad_norm": 1.4915246845122918, "learning_rate": 7.31096546777025e-06, "loss": 0.5814, "step": 5167 }, { "epoch": 0.37, "grad_norm": 1.4534310573023528, "learning_rate": 7.309946349253138e-06, "loss": 0.5034, "step": 5168 }, { "epoch": 0.37, "grad_norm": 2.0594336362128467, "learning_rate": 7.30892710871683e-06, "loss": 0.544, "step": 5169 }, { "epoch": 0.37, "grad_norm": 2.0545186429146343, "learning_rate": 7.307907746215172e-06, "loss": 0.5242, "step": 5170 }, { "epoch": 0.37, "grad_norm": 0.9532472089213389, "learning_rate": 7.306888261802003e-06, "loss": 0.4698, "step": 5171 }, { "epoch": 0.37, "grad_norm": 1.4808066538116278, "learning_rate": 7.305868655531181e-06, "loss": 0.4775, "step": 5172 }, { "epoch": 0.37, "grad_norm": 1.7245191819532661, "learning_rate": 7.304848927456563e-06, "loss": 0.5338, "step": 5173 }, { "epoch": 0.37, "grad_norm": 1.7539730374032696, "learning_rate": 7.303829077632015e-06, "loss": 0.5417, "step": 5174 }, { "epoch": 0.37, "grad_norm": 2.062984962502777, "learning_rate": 7.302809106111408e-06, "loss": 0.6039, "step": 5175 }, { "epoch": 0.37, "grad_norm": 2.0286543379729762, "learning_rate": 7.301789012948622e-06, "loss": 0.5192, "step": 5176 }, { "epoch": 0.37, "grad_norm": 1.9233514131753247, "learning_rate": 7.300768798197541e-06, "loss": 0.505, "step": 5177 }, { "epoch": 0.37, "grad_norm": 1.8666881431773925, "learning_rate": 7.299748461912054e-06, "loss": 0.5427, "step": 5178 }, { "epoch": 0.37, "grad_norm": 5.655594815968916, "learning_rate": 7.298728004146062e-06, "loss": 0.5977, "step": 5179 }, { "epoch": 0.37, "grad_norm": 0.8678658919741666, "learning_rate": 7.297707424953467e-06, "loss": 0.4475, "step": 5180 }, { "epoch": 0.37, "grad_norm": 1.6001194034820867, "learning_rate": 7.296686724388181e-06, "loss": 0.5428, "step": 5181 }, { "epoch": 0.37, "grad_norm": 2.698882123735311, "learning_rate": 7.29566590250412e-06, "loss": 0.5413, "step": 5182 }, { "epoch": 0.37, "grad_norm": 1.9559582339747006, "learning_rate": 7.294644959355208e-06, "loss": 0.5643, "step": 5183 }, { "epoch": 0.37, "grad_norm": 3.1659993023250506, "learning_rate": 7.293623894995372e-06, "loss": 0.5764, "step": 5184 }, { "epoch": 0.37, "grad_norm": 1.621399765210913, "learning_rate": 7.2926027094785514e-06, "loss": 0.5669, "step": 5185 }, { "epoch": 0.37, "grad_norm": 1.7723799861440888, "learning_rate": 7.291581402858687e-06, "loss": 0.5573, "step": 5186 }, { "epoch": 0.37, "grad_norm": 1.897439195656029, "learning_rate": 7.290559975189727e-06, "loss": 0.5726, "step": 5187 }, { "epoch": 0.37, "grad_norm": 1.906686590248555, "learning_rate": 7.2895384265256285e-06, "loss": 0.5004, "step": 5188 }, { "epoch": 0.37, "grad_norm": 1.5776828158401848, "learning_rate": 7.288516756920353e-06, "loss": 0.5374, "step": 5189 }, { "epoch": 0.37, "grad_norm": 1.50257283572745, "learning_rate": 7.287494966427866e-06, "loss": 0.5972, "step": 5190 }, { "epoch": 0.37, "grad_norm": 1.6084992642095253, "learning_rate": 7.286473055102146e-06, "loss": 0.4864, "step": 5191 }, { "epoch": 0.37, "grad_norm": 1.48636244380556, "learning_rate": 7.28545102299717e-06, "loss": 0.5508, "step": 5192 }, { "epoch": 0.37, "grad_norm": 1.9886120169376162, "learning_rate": 7.2844288701669265e-06, "loss": 0.5642, "step": 5193 }, { "epoch": 0.37, "grad_norm": 1.6116234406067413, "learning_rate": 7.28340659666541e-06, "loss": 0.6063, "step": 5194 }, { "epoch": 0.37, "grad_norm": 1.4642668753608847, "learning_rate": 7.282384202546619e-06, "loss": 0.4531, "step": 5195 }, { "epoch": 0.37, "grad_norm": 1.6667347374187635, "learning_rate": 7.281361687864563e-06, "loss": 0.5795, "step": 5196 }, { "epoch": 0.37, "grad_norm": 1.6105996373018179, "learning_rate": 7.280339052673251e-06, "loss": 0.5433, "step": 5197 }, { "epoch": 0.37, "grad_norm": 1.66798304950578, "learning_rate": 7.279316297026704e-06, "loss": 0.5584, "step": 5198 }, { "epoch": 0.37, "grad_norm": 2.0345685507203704, "learning_rate": 7.278293420978946e-06, "loss": 0.4837, "step": 5199 }, { "epoch": 0.37, "grad_norm": 1.6415103640630697, "learning_rate": 7.27727042458401e-06, "loss": 0.568, "step": 5200 }, { "epoch": 0.37, "grad_norm": 1.5920921863874502, "learning_rate": 7.276247307895933e-06, "loss": 0.5206, "step": 5201 }, { "epoch": 0.37, "grad_norm": 1.5565344878285035, "learning_rate": 7.2752240709687606e-06, "loss": 0.509, "step": 5202 }, { "epoch": 0.37, "grad_norm": 2.0289398979663185, "learning_rate": 7.274200713856543e-06, "loss": 0.5374, "step": 5203 }, { "epoch": 0.37, "grad_norm": 1.8414926599175903, "learning_rate": 7.273177236613338e-06, "loss": 0.4828, "step": 5204 }, { "epoch": 0.37, "grad_norm": 1.4859337274039752, "learning_rate": 7.2721536392932094e-06, "loss": 0.4841, "step": 5205 }, { "epoch": 0.37, "grad_norm": 1.8127811820136075, "learning_rate": 7.2711299219502254e-06, "loss": 0.508, "step": 5206 }, { "epoch": 0.37, "grad_norm": 1.6610388305791963, "learning_rate": 7.270106084638464e-06, "loss": 0.5156, "step": 5207 }, { "epoch": 0.37, "grad_norm": 1.6731451004252327, "learning_rate": 7.269082127412006e-06, "loss": 0.5726, "step": 5208 }, { "epoch": 0.37, "grad_norm": 1.7726816874005962, "learning_rate": 7.268058050324942e-06, "loss": 0.5217, "step": 5209 }, { "epoch": 0.37, "grad_norm": 1.5539592932620485, "learning_rate": 7.267033853431366e-06, "loss": 0.5785, "step": 5210 }, { "epoch": 0.37, "grad_norm": 3.080811855131007, "learning_rate": 7.266009536785381e-06, "loss": 0.5527, "step": 5211 }, { "epoch": 0.37, "grad_norm": 1.6077367704485828, "learning_rate": 7.264985100441093e-06, "loss": 0.5105, "step": 5212 }, { "epoch": 0.37, "grad_norm": 1.6333428742650145, "learning_rate": 7.26396054445262e-06, "loss": 0.5053, "step": 5213 }, { "epoch": 0.37, "grad_norm": 1.8158261170057446, "learning_rate": 7.262935868874077e-06, "loss": 0.5936, "step": 5214 }, { "epoch": 0.37, "grad_norm": 1.9430533592277475, "learning_rate": 7.261911073759594e-06, "loss": 0.518, "step": 5215 }, { "epoch": 0.37, "grad_norm": 1.6560366663261452, "learning_rate": 7.260886159163304e-06, "loss": 0.5179, "step": 5216 }, { "epoch": 0.37, "grad_norm": 1.6113097369922826, "learning_rate": 7.259861125139345e-06, "loss": 0.4799, "step": 5217 }, { "epoch": 0.37, "grad_norm": 1.8758405369358333, "learning_rate": 7.2588359717418645e-06, "loss": 0.5628, "step": 5218 }, { "epoch": 0.37, "grad_norm": 1.9516202428066758, "learning_rate": 7.257810699025016e-06, "loss": 0.5389, "step": 5219 }, { "epoch": 0.37, "grad_norm": 1.883040970444659, "learning_rate": 7.256785307042953e-06, "loss": 0.6298, "step": 5220 }, { "epoch": 0.37, "grad_norm": 1.7351678902081595, "learning_rate": 7.255759795849845e-06, "loss": 0.5768, "step": 5221 }, { "epoch": 0.37, "grad_norm": 1.6121768885000411, "learning_rate": 7.254734165499861e-06, "loss": 0.6096, "step": 5222 }, { "epoch": 0.37, "grad_norm": 1.6629156069350473, "learning_rate": 7.253708416047176e-06, "loss": 0.5844, "step": 5223 }, { "epoch": 0.37, "grad_norm": 0.8233515788424914, "learning_rate": 7.2526825475459775e-06, "loss": 0.4604, "step": 5224 }, { "epoch": 0.37, "grad_norm": 0.7696184197121986, "learning_rate": 7.251656560050454e-06, "loss": 0.4621, "step": 5225 }, { "epoch": 0.37, "grad_norm": 1.6466590367571898, "learning_rate": 7.250630453614801e-06, "loss": 0.5785, "step": 5226 }, { "epoch": 0.37, "grad_norm": 1.4485095219676252, "learning_rate": 7.249604228293221e-06, "loss": 0.5328, "step": 5227 }, { "epoch": 0.37, "grad_norm": 3.7628713724490788, "learning_rate": 7.248577884139923e-06, "loss": 0.4874, "step": 5228 }, { "epoch": 0.37, "grad_norm": 1.6872449909904645, "learning_rate": 7.247551421209123e-06, "loss": 0.5238, "step": 5229 }, { "epoch": 0.37, "grad_norm": 1.6199210807446809, "learning_rate": 7.24652483955504e-06, "loss": 0.5388, "step": 5230 }, { "epoch": 0.37, "grad_norm": 1.8309753134943307, "learning_rate": 7.245498139231902e-06, "loss": 0.5703, "step": 5231 }, { "epoch": 0.37, "grad_norm": 2.683508779094761, "learning_rate": 7.244471320293945e-06, "loss": 0.498, "step": 5232 }, { "epoch": 0.37, "grad_norm": 1.9454475205644481, "learning_rate": 7.243444382795407e-06, "loss": 0.6502, "step": 5233 }, { "epoch": 0.37, "grad_norm": 1.3791954575291987, "learning_rate": 7.2424173267905365e-06, "loss": 0.5397, "step": 5234 }, { "epoch": 0.37, "grad_norm": 1.8238408320863007, "learning_rate": 7.241390152333582e-06, "loss": 0.4952, "step": 5235 }, { "epoch": 0.37, "grad_norm": 1.604277423795181, "learning_rate": 7.240362859478807e-06, "loss": 0.6029, "step": 5236 }, { "epoch": 0.37, "grad_norm": 1.664031092516312, "learning_rate": 7.239335448280472e-06, "loss": 0.6022, "step": 5237 }, { "epoch": 0.37, "grad_norm": 1.4206883457301, "learning_rate": 7.2383079187928516e-06, "loss": 0.5197, "step": 5238 }, { "epoch": 0.37, "grad_norm": 1.56805527990652, "learning_rate": 7.237280271070223e-06, "loss": 0.5188, "step": 5239 }, { "epoch": 0.37, "grad_norm": 1.615523817437708, "learning_rate": 7.236252505166869e-06, "loss": 0.5886, "step": 5240 }, { "epoch": 0.37, "grad_norm": 1.6228669953749348, "learning_rate": 7.2352246211370805e-06, "loss": 0.5459, "step": 5241 }, { "epoch": 0.37, "grad_norm": 1.494290237242261, "learning_rate": 7.234196619035153e-06, "loss": 0.5083, "step": 5242 }, { "epoch": 0.37, "grad_norm": 1.6584302627318794, "learning_rate": 7.2331684989153905e-06, "loss": 0.5316, "step": 5243 }, { "epoch": 0.37, "grad_norm": 1.443592095260721, "learning_rate": 7.232140260832101e-06, "loss": 0.5415, "step": 5244 }, { "epoch": 0.37, "grad_norm": 1.6004862772342898, "learning_rate": 7.231111904839599e-06, "loss": 0.5505, "step": 5245 }, { "epoch": 0.37, "grad_norm": 1.6527150745053076, "learning_rate": 7.230083430992205e-06, "loss": 0.5624, "step": 5246 }, { "epoch": 0.37, "grad_norm": 0.8023458865465705, "learning_rate": 7.2290548393442496e-06, "loss": 0.4668, "step": 5247 }, { "epoch": 0.37, "grad_norm": 1.6115487747172093, "learning_rate": 7.228026129950063e-06, "loss": 0.5649, "step": 5248 }, { "epoch": 0.37, "grad_norm": 1.5072914973469387, "learning_rate": 7.226997302863987e-06, "loss": 0.5488, "step": 5249 }, { "epoch": 0.37, "grad_norm": 1.7043892734839095, "learning_rate": 7.225968358140368e-06, "loss": 0.5931, "step": 5250 }, { "epoch": 0.37, "grad_norm": 1.8683975462479476, "learning_rate": 7.224939295833558e-06, "loss": 0.6142, "step": 5251 }, { "epoch": 0.37, "grad_norm": 1.5059315638410742, "learning_rate": 7.223910115997914e-06, "loss": 0.5338, "step": 5252 }, { "epoch": 0.37, "grad_norm": 1.4888797602058315, "learning_rate": 7.222880818687803e-06, "loss": 0.5707, "step": 5253 }, { "epoch": 0.37, "grad_norm": 1.561054412788188, "learning_rate": 7.221851403957594e-06, "loss": 0.6096, "step": 5254 }, { "epoch": 0.37, "grad_norm": 1.6429889666540278, "learning_rate": 7.220821871861666e-06, "loss": 0.5452, "step": 5255 }, { "epoch": 0.37, "grad_norm": 1.555751235588039, "learning_rate": 7.219792222454402e-06, "loss": 0.4979, "step": 5256 }, { "epoch": 0.37, "grad_norm": 2.287739592562299, "learning_rate": 7.21876245579019e-06, "loss": 0.5943, "step": 5257 }, { "epoch": 0.37, "grad_norm": 1.4744739900871262, "learning_rate": 7.217732571923428e-06, "loss": 0.5059, "step": 5258 }, { "epoch": 0.37, "grad_norm": 1.558494791418864, "learning_rate": 7.216702570908516e-06, "loss": 0.5249, "step": 5259 }, { "epoch": 0.37, "grad_norm": 1.7551839196821337, "learning_rate": 7.215672452799862e-06, "loss": 0.558, "step": 5260 }, { "epoch": 0.37, "grad_norm": 3.114042663375599, "learning_rate": 7.214642217651883e-06, "loss": 0.5714, "step": 5261 }, { "epoch": 0.37, "grad_norm": 1.6456383582879193, "learning_rate": 7.213611865518997e-06, "loss": 0.5921, "step": 5262 }, { "epoch": 0.37, "grad_norm": 1.5972934299448482, "learning_rate": 7.212581396455633e-06, "loss": 0.6443, "step": 5263 }, { "epoch": 0.37, "grad_norm": 1.546130372386423, "learning_rate": 7.2115508105162215e-06, "loss": 0.5997, "step": 5264 }, { "epoch": 0.37, "grad_norm": 2.0541408640483616, "learning_rate": 7.210520107755201e-06, "loss": 0.5557, "step": 5265 }, { "epoch": 0.37, "grad_norm": 1.4076787565227642, "learning_rate": 7.20948928822702e-06, "loss": 0.4845, "step": 5266 }, { "epoch": 0.37, "grad_norm": 1.9920923941707174, "learning_rate": 7.2084583519861275e-06, "loss": 0.4902, "step": 5267 }, { "epoch": 0.37, "grad_norm": 1.8755854174384332, "learning_rate": 7.207427299086981e-06, "loss": 0.549, "step": 5268 }, { "epoch": 0.37, "grad_norm": 1.6937304562892301, "learning_rate": 7.2063961295840444e-06, "loss": 0.5564, "step": 5269 }, { "epoch": 0.37, "grad_norm": 1.6614112783645985, "learning_rate": 7.20536484353179e-06, "loss": 0.5986, "step": 5270 }, { "epoch": 0.37, "grad_norm": 0.8137076783053949, "learning_rate": 7.20433344098469e-06, "loss": 0.4631, "step": 5271 }, { "epoch": 0.37, "grad_norm": 1.402315746956907, "learning_rate": 7.20330192199723e-06, "loss": 0.5503, "step": 5272 }, { "epoch": 0.37, "grad_norm": 1.8730868563169927, "learning_rate": 7.2022702866238945e-06, "loss": 0.5921, "step": 5273 }, { "epoch": 0.37, "grad_norm": 1.8005092556400688, "learning_rate": 7.2012385349191815e-06, "loss": 0.5752, "step": 5274 }, { "epoch": 0.37, "grad_norm": 2.5364581223426232, "learning_rate": 7.20020666693759e-06, "loss": 0.5284, "step": 5275 }, { "epoch": 0.37, "grad_norm": 1.6587862283865742, "learning_rate": 7.199174682733626e-06, "loss": 0.5422, "step": 5276 }, { "epoch": 0.37, "grad_norm": 1.8328213600877143, "learning_rate": 7.1981425823618045e-06, "loss": 0.4888, "step": 5277 }, { "epoch": 0.37, "grad_norm": 1.629982461436803, "learning_rate": 7.197110365876644e-06, "loss": 0.4827, "step": 5278 }, { "epoch": 0.37, "grad_norm": 1.4092732155786576, "learning_rate": 7.1960780333326695e-06, "loss": 0.4829, "step": 5279 }, { "epoch": 0.37, "grad_norm": 1.6696243747739277, "learning_rate": 7.19504558478441e-06, "loss": 0.4528, "step": 5280 }, { "epoch": 0.37, "grad_norm": 1.4870829947661377, "learning_rate": 7.194013020286407e-06, "loss": 0.5127, "step": 5281 }, { "epoch": 0.37, "grad_norm": 0.7897434437984284, "learning_rate": 7.192980339893201e-06, "loss": 0.4496, "step": 5282 }, { "epoch": 0.37, "grad_norm": 2.417471151957371, "learning_rate": 7.191947543659341e-06, "loss": 0.5393, "step": 5283 }, { "epoch": 0.37, "grad_norm": 1.9497259971559688, "learning_rate": 7.190914631639385e-06, "loss": 0.5371, "step": 5284 }, { "epoch": 0.38, "grad_norm": 1.6395187052572293, "learning_rate": 7.189881603887897e-06, "loss": 0.5278, "step": 5285 }, { "epoch": 0.38, "grad_norm": 1.9199606064617991, "learning_rate": 7.18884846045944e-06, "loss": 0.5675, "step": 5286 }, { "epoch": 0.38, "grad_norm": 1.6567757570098334, "learning_rate": 7.187815201408592e-06, "loss": 0.5283, "step": 5287 }, { "epoch": 0.38, "grad_norm": 0.68564378957445, "learning_rate": 7.1867818267899305e-06, "loss": 0.4231, "step": 5288 }, { "epoch": 0.38, "grad_norm": 1.755078972203877, "learning_rate": 7.185748336658043e-06, "loss": 0.5283, "step": 5289 }, { "epoch": 0.38, "grad_norm": 1.632939537354433, "learning_rate": 7.184714731067521e-06, "loss": 0.5328, "step": 5290 }, { "epoch": 0.38, "grad_norm": 1.6433772723460291, "learning_rate": 7.183681010072966e-06, "loss": 0.5773, "step": 5291 }, { "epoch": 0.38, "grad_norm": 1.5534244546967315, "learning_rate": 7.182647173728981e-06, "loss": 0.5309, "step": 5292 }, { "epoch": 0.38, "grad_norm": 2.0750139843265685, "learning_rate": 7.181613222090175e-06, "loss": 0.6019, "step": 5293 }, { "epoch": 0.38, "grad_norm": 1.6006060937009494, "learning_rate": 7.180579155211167e-06, "loss": 0.5778, "step": 5294 }, { "epoch": 0.38, "grad_norm": 1.6890174708372767, "learning_rate": 7.179544973146579e-06, "loss": 0.5605, "step": 5295 }, { "epoch": 0.38, "grad_norm": 1.617631553486146, "learning_rate": 7.178510675951042e-06, "loss": 0.5476, "step": 5296 }, { "epoch": 0.38, "grad_norm": 1.896021418610378, "learning_rate": 7.177476263679186e-06, "loss": 0.5737, "step": 5297 }, { "epoch": 0.38, "grad_norm": 1.7065049082230734, "learning_rate": 7.176441736385658e-06, "loss": 0.5002, "step": 5298 }, { "epoch": 0.38, "grad_norm": 1.4385463718118985, "learning_rate": 7.1754070941251005e-06, "loss": 0.5485, "step": 5299 }, { "epoch": 0.38, "grad_norm": 1.5533163986558027, "learning_rate": 7.174372336952171e-06, "loss": 0.5296, "step": 5300 }, { "epoch": 0.38, "grad_norm": 2.077080300784717, "learning_rate": 7.173337464921526e-06, "loss": 0.4963, "step": 5301 }, { "epoch": 0.38, "grad_norm": 1.627443970665686, "learning_rate": 7.172302478087833e-06, "loss": 0.5387, "step": 5302 }, { "epoch": 0.38, "grad_norm": 2.2574212581540856, "learning_rate": 7.171267376505761e-06, "loss": 0.5403, "step": 5303 }, { "epoch": 0.38, "grad_norm": 1.4580101904782823, "learning_rate": 7.170232160229988e-06, "loss": 0.4788, "step": 5304 }, { "epoch": 0.38, "grad_norm": 2.7849583802248183, "learning_rate": 7.169196829315201e-06, "loss": 0.5671, "step": 5305 }, { "epoch": 0.38, "grad_norm": 2.2024137922400366, "learning_rate": 7.168161383816085e-06, "loss": 0.5397, "step": 5306 }, { "epoch": 0.38, "grad_norm": 1.6958604140560845, "learning_rate": 7.16712582378734e-06, "loss": 0.5657, "step": 5307 }, { "epoch": 0.38, "grad_norm": 1.523672667845232, "learning_rate": 7.166090149283664e-06, "loss": 0.4692, "step": 5308 }, { "epoch": 0.38, "grad_norm": 1.575102442172051, "learning_rate": 7.165054360359769e-06, "loss": 0.581, "step": 5309 }, { "epoch": 0.38, "grad_norm": 2.104394982839106, "learning_rate": 7.164018457070364e-06, "loss": 0.5768, "step": 5310 }, { "epoch": 0.38, "grad_norm": 1.4272713097407983, "learning_rate": 7.162982439470172e-06, "loss": 0.5313, "step": 5311 }, { "epoch": 0.38, "grad_norm": 1.5145696630465368, "learning_rate": 7.161946307613918e-06, "loss": 0.537, "step": 5312 }, { "epoch": 0.38, "grad_norm": 1.6311822549636168, "learning_rate": 7.160910061556337e-06, "loss": 0.5235, "step": 5313 }, { "epoch": 0.38, "grad_norm": 1.5648243822087662, "learning_rate": 7.15987370135216e-06, "loss": 0.5182, "step": 5314 }, { "epoch": 0.38, "grad_norm": 2.038558610786504, "learning_rate": 7.158837227056139e-06, "loss": 0.527, "step": 5315 }, { "epoch": 0.38, "grad_norm": 1.6603971358090268, "learning_rate": 7.157800638723019e-06, "loss": 0.5676, "step": 5316 }, { "epoch": 0.38, "grad_norm": 1.7626033177193374, "learning_rate": 7.1567639364075595e-06, "loss": 0.5534, "step": 5317 }, { "epoch": 0.38, "grad_norm": 2.253463666463328, "learning_rate": 7.1557271201645184e-06, "loss": 0.5003, "step": 5318 }, { "epoch": 0.38, "grad_norm": 1.8631878423772479, "learning_rate": 7.154690190048667e-06, "loss": 0.6196, "step": 5319 }, { "epoch": 0.38, "grad_norm": 1.896871517425509, "learning_rate": 7.153653146114779e-06, "loss": 0.5899, "step": 5320 }, { "epoch": 0.38, "grad_norm": 1.6096923029626529, "learning_rate": 7.152615988417634e-06, "loss": 0.5609, "step": 5321 }, { "epoch": 0.38, "grad_norm": 1.4528029308446935, "learning_rate": 7.151578717012018e-06, "loss": 0.5338, "step": 5322 }, { "epoch": 0.38, "grad_norm": 1.776661978842978, "learning_rate": 7.150541331952724e-06, "loss": 0.5296, "step": 5323 }, { "epoch": 0.38, "grad_norm": 1.805414344253933, "learning_rate": 7.1495038332945495e-06, "loss": 0.5296, "step": 5324 }, { "epoch": 0.38, "grad_norm": 0.8610570337158563, "learning_rate": 7.148466221092299e-06, "loss": 0.4437, "step": 5325 }, { "epoch": 0.38, "grad_norm": 1.8706284318661104, "learning_rate": 7.147428495400783e-06, "loss": 0.5656, "step": 5326 }, { "epoch": 0.38, "grad_norm": 1.5388671636992204, "learning_rate": 7.146390656274817e-06, "loss": 0.5722, "step": 5327 }, { "epoch": 0.38, "grad_norm": 1.8166473295465588, "learning_rate": 7.145352703769225e-06, "loss": 0.5237, "step": 5328 }, { "epoch": 0.38, "grad_norm": 1.896273333173229, "learning_rate": 7.144314637938833e-06, "loss": 0.6275, "step": 5329 }, { "epoch": 0.38, "grad_norm": 1.5849504476593916, "learning_rate": 7.143276458838476e-06, "loss": 0.5867, "step": 5330 }, { "epoch": 0.38, "grad_norm": 1.711993399603033, "learning_rate": 7.1422381665229935e-06, "loss": 0.5385, "step": 5331 }, { "epoch": 0.38, "grad_norm": 1.4824626341744926, "learning_rate": 7.141199761047233e-06, "loss": 0.5182, "step": 5332 }, { "epoch": 0.38, "grad_norm": 2.228913694024556, "learning_rate": 7.140161242466045e-06, "loss": 0.5457, "step": 5333 }, { "epoch": 0.38, "grad_norm": 1.6002694766569634, "learning_rate": 7.139122610834288e-06, "loss": 0.5713, "step": 5334 }, { "epoch": 0.38, "grad_norm": 1.627555111726412, "learning_rate": 7.138083866206826e-06, "loss": 0.5696, "step": 5335 }, { "epoch": 0.38, "grad_norm": 1.5352627976621351, "learning_rate": 7.137045008638531e-06, "loss": 0.5302, "step": 5336 }, { "epoch": 0.38, "grad_norm": 1.7056950348548432, "learning_rate": 7.136006038184276e-06, "loss": 0.5493, "step": 5337 }, { "epoch": 0.38, "grad_norm": 1.6514193256884193, "learning_rate": 7.1349669548989455e-06, "loss": 0.6088, "step": 5338 }, { "epoch": 0.38, "grad_norm": 1.471518974111202, "learning_rate": 7.133927758837426e-06, "loss": 0.5069, "step": 5339 }, { "epoch": 0.38, "grad_norm": 1.9341273072838945, "learning_rate": 7.13288845005461e-06, "loss": 0.6304, "step": 5340 }, { "epoch": 0.38, "grad_norm": 1.753497751662623, "learning_rate": 7.1318490286053995e-06, "loss": 0.5613, "step": 5341 }, { "epoch": 0.38, "grad_norm": 1.325593884994315, "learning_rate": 7.130809494544699e-06, "loss": 0.4739, "step": 5342 }, { "epoch": 0.38, "grad_norm": 3.0627626394898004, "learning_rate": 7.1297698479274235e-06, "loss": 0.6236, "step": 5343 }, { "epoch": 0.38, "grad_norm": 1.8143092595692016, "learning_rate": 7.128730088808486e-06, "loss": 0.6166, "step": 5344 }, { "epoch": 0.38, "grad_norm": 1.6107176079528123, "learning_rate": 7.127690217242813e-06, "loss": 0.5647, "step": 5345 }, { "epoch": 0.38, "grad_norm": 1.6231040606478415, "learning_rate": 7.1266502332853325e-06, "loss": 0.568, "step": 5346 }, { "epoch": 0.38, "grad_norm": 1.5726459329411413, "learning_rate": 7.1256101369909815e-06, "loss": 0.4863, "step": 5347 }, { "epoch": 0.38, "grad_norm": 3.953381203546077, "learning_rate": 7.1245699284146986e-06, "loss": 0.5144, "step": 5348 }, { "epoch": 0.38, "grad_norm": 1.6071115307474362, "learning_rate": 7.123529607611433e-06, "loss": 0.4963, "step": 5349 }, { "epoch": 0.38, "grad_norm": 1.5946157149527314, "learning_rate": 7.12248917463614e-06, "loss": 0.57, "step": 5350 }, { "epoch": 0.38, "grad_norm": 1.3796933425491658, "learning_rate": 7.121448629543777e-06, "loss": 0.4516, "step": 5351 }, { "epoch": 0.38, "grad_norm": 1.5265133574894296, "learning_rate": 7.1204079723893075e-06, "loss": 0.5637, "step": 5352 }, { "epoch": 0.38, "grad_norm": 1.730342876531837, "learning_rate": 7.119367203227705e-06, "loss": 0.5832, "step": 5353 }, { "epoch": 0.38, "grad_norm": 2.4765157149336408, "learning_rate": 7.118326322113944e-06, "loss": 0.483, "step": 5354 }, { "epoch": 0.38, "grad_norm": 1.3610711703906517, "learning_rate": 7.1172853291030094e-06, "loss": 0.5203, "step": 5355 }, { "epoch": 0.38, "grad_norm": 1.3594934468477995, "learning_rate": 7.116244224249888e-06, "loss": 0.4989, "step": 5356 }, { "epoch": 0.38, "grad_norm": 1.5634049904091782, "learning_rate": 7.115203007609578e-06, "loss": 0.5895, "step": 5357 }, { "epoch": 0.38, "grad_norm": 1.574872168860554, "learning_rate": 7.114161679237079e-06, "loss": 0.5778, "step": 5358 }, { "epoch": 0.38, "grad_norm": 1.5257506963426022, "learning_rate": 7.113120239187393e-06, "loss": 0.5864, "step": 5359 }, { "epoch": 0.38, "grad_norm": 1.6948834064708003, "learning_rate": 7.112078687515538e-06, "loss": 0.6052, "step": 5360 }, { "epoch": 0.38, "grad_norm": 1.456109205124301, "learning_rate": 7.111037024276529e-06, "loss": 0.5135, "step": 5361 }, { "epoch": 0.38, "grad_norm": 1.5374290697537834, "learning_rate": 7.109995249525392e-06, "loss": 0.5601, "step": 5362 }, { "epoch": 0.38, "grad_norm": 1.4839760360499616, "learning_rate": 7.108953363317155e-06, "loss": 0.576, "step": 5363 }, { "epoch": 0.38, "grad_norm": 0.7187490375057405, "learning_rate": 7.107911365706856e-06, "loss": 0.4648, "step": 5364 }, { "epoch": 0.38, "grad_norm": 1.6902380234377923, "learning_rate": 7.106869256749537e-06, "loss": 0.5627, "step": 5365 }, { "epoch": 0.38, "grad_norm": 1.4739323270343563, "learning_rate": 7.105827036500245e-06, "loss": 0.5366, "step": 5366 }, { "epoch": 0.38, "grad_norm": 0.7250956712873298, "learning_rate": 7.104784705014032e-06, "loss": 0.419, "step": 5367 }, { "epoch": 0.38, "grad_norm": 1.6454346185583595, "learning_rate": 7.103742262345959e-06, "loss": 0.5455, "step": 5368 }, { "epoch": 0.38, "grad_norm": 1.816008481598353, "learning_rate": 7.1026997085510915e-06, "loss": 0.5415, "step": 5369 }, { "epoch": 0.38, "grad_norm": 0.8822938169963456, "learning_rate": 7.1016570436845e-06, "loss": 0.4744, "step": 5370 }, { "epoch": 0.38, "grad_norm": 1.463388817348178, "learning_rate": 7.100614267801262e-06, "loss": 0.5002, "step": 5371 }, { "epoch": 0.38, "grad_norm": 1.7394591674015685, "learning_rate": 7.099571380956462e-06, "loss": 0.5553, "step": 5372 }, { "epoch": 0.38, "grad_norm": 0.7427113286051817, "learning_rate": 7.098528383205186e-06, "loss": 0.4568, "step": 5373 }, { "epoch": 0.38, "grad_norm": 1.7524652848972893, "learning_rate": 7.097485274602531e-06, "loss": 0.5567, "step": 5374 }, { "epoch": 0.38, "grad_norm": 1.7316665761664194, "learning_rate": 7.096442055203596e-06, "loss": 0.4785, "step": 5375 }, { "epoch": 0.38, "grad_norm": 2.1415361911303497, "learning_rate": 7.095398725063488e-06, "loss": 0.6195, "step": 5376 }, { "epoch": 0.38, "grad_norm": 1.73985059540259, "learning_rate": 7.0943552842373185e-06, "loss": 0.4716, "step": 5377 }, { "epoch": 0.38, "grad_norm": 1.8190014522796827, "learning_rate": 7.093311732780207e-06, "loss": 0.626, "step": 5378 }, { "epoch": 0.38, "grad_norm": 0.8100662483649863, "learning_rate": 7.0922680707472765e-06, "loss": 0.4773, "step": 5379 }, { "epoch": 0.38, "grad_norm": 2.3531442652631513, "learning_rate": 7.091224298193658e-06, "loss": 0.5972, "step": 5380 }, { "epoch": 0.38, "grad_norm": 1.6646633879445845, "learning_rate": 7.090180415174485e-06, "loss": 0.5804, "step": 5381 }, { "epoch": 0.38, "grad_norm": 1.7022106885075796, "learning_rate": 7.0891364217449e-06, "loss": 0.5905, "step": 5382 }, { "epoch": 0.38, "grad_norm": 1.6642102763721145, "learning_rate": 7.08809231796005e-06, "loss": 0.5196, "step": 5383 }, { "epoch": 0.38, "grad_norm": 1.6949094772710465, "learning_rate": 7.087048103875088e-06, "loss": 0.5848, "step": 5384 }, { "epoch": 0.38, "grad_norm": 2.607935193900281, "learning_rate": 7.086003779545174e-06, "loss": 0.5532, "step": 5385 }, { "epoch": 0.38, "grad_norm": 1.4794609341247655, "learning_rate": 7.084959345025471e-06, "loss": 0.5438, "step": 5386 }, { "epoch": 0.38, "grad_norm": 1.5502661073314832, "learning_rate": 7.0839148003711524e-06, "loss": 0.5453, "step": 5387 }, { "epoch": 0.38, "grad_norm": 1.8351242533690564, "learning_rate": 7.0828701456373914e-06, "loss": 0.6032, "step": 5388 }, { "epoch": 0.38, "grad_norm": 2.0490818799866295, "learning_rate": 7.0818253808793725e-06, "loss": 0.5657, "step": 5389 }, { "epoch": 0.38, "grad_norm": 2.05914619556099, "learning_rate": 7.080780506152282e-06, "loss": 0.5435, "step": 5390 }, { "epoch": 0.38, "grad_norm": 1.515721749750618, "learning_rate": 7.079735521511315e-06, "loss": 0.6052, "step": 5391 }, { "epoch": 0.38, "grad_norm": 1.8678028849102208, "learning_rate": 7.078690427011669e-06, "loss": 0.5597, "step": 5392 }, { "epoch": 0.38, "grad_norm": 1.52731061484615, "learning_rate": 7.077645222708554e-06, "loss": 0.5816, "step": 5393 }, { "epoch": 0.38, "grad_norm": 0.750972394230337, "learning_rate": 7.076599908657177e-06, "loss": 0.4441, "step": 5394 }, { "epoch": 0.38, "grad_norm": 1.4581909869165495, "learning_rate": 7.075554484912755e-06, "loss": 0.4987, "step": 5395 }, { "epoch": 0.38, "grad_norm": 1.8442822516496875, "learning_rate": 7.074508951530514e-06, "loss": 0.5295, "step": 5396 }, { "epoch": 0.38, "grad_norm": 1.560069065837114, "learning_rate": 7.07346330856568e-06, "loss": 0.5836, "step": 5397 }, { "epoch": 0.38, "grad_norm": 2.1791714401158484, "learning_rate": 7.072417556073489e-06, "loss": 0.5662, "step": 5398 }, { "epoch": 0.38, "grad_norm": 1.5640081949964852, "learning_rate": 7.071371694109179e-06, "loss": 0.5498, "step": 5399 }, { "epoch": 0.38, "grad_norm": 2.1332356840745352, "learning_rate": 7.070325722727998e-06, "loss": 0.5504, "step": 5400 }, { "epoch": 0.38, "grad_norm": 1.3997797829264835, "learning_rate": 7.069279641985197e-06, "loss": 0.5513, "step": 5401 }, { "epoch": 0.38, "grad_norm": 1.368895076210583, "learning_rate": 7.068233451936035e-06, "loss": 0.5325, "step": 5402 }, { "epoch": 0.38, "grad_norm": 2.107812015689986, "learning_rate": 7.067187152635773e-06, "loss": 0.5723, "step": 5403 }, { "epoch": 0.38, "grad_norm": 1.3505017663092647, "learning_rate": 7.06614074413968e-06, "loss": 0.5201, "step": 5404 }, { "epoch": 0.38, "grad_norm": 1.542658168469199, "learning_rate": 7.065094226503033e-06, "loss": 0.5062, "step": 5405 }, { "epoch": 0.38, "grad_norm": 1.480796787026757, "learning_rate": 7.064047599781111e-06, "loss": 0.5287, "step": 5406 }, { "epoch": 0.38, "grad_norm": 1.6250131829381875, "learning_rate": 7.0630008640292e-06, "loss": 0.5999, "step": 5407 }, { "epoch": 0.38, "grad_norm": 1.7787521293516744, "learning_rate": 7.061954019302594e-06, "loss": 0.561, "step": 5408 }, { "epoch": 0.38, "grad_norm": 1.595610059475579, "learning_rate": 7.0609070656565904e-06, "loss": 0.5479, "step": 5409 }, { "epoch": 0.38, "grad_norm": 1.6561758632514592, "learning_rate": 7.05986000314649e-06, "loss": 0.5181, "step": 5410 }, { "epoch": 0.38, "grad_norm": 1.766020191723996, "learning_rate": 7.0588128318276085e-06, "loss": 0.5352, "step": 5411 }, { "epoch": 0.38, "grad_norm": 1.4630035742998104, "learning_rate": 7.057765551755253e-06, "loss": 0.5711, "step": 5412 }, { "epoch": 0.38, "grad_norm": 1.528601663719652, "learning_rate": 7.056718162984751e-06, "loss": 0.5465, "step": 5413 }, { "epoch": 0.38, "grad_norm": 1.5523603250059257, "learning_rate": 7.055670665571426e-06, "loss": 0.5764, "step": 5414 }, { "epoch": 0.38, "grad_norm": 1.8216454465139897, "learning_rate": 7.054623059570609e-06, "loss": 0.5488, "step": 5415 }, { "epoch": 0.38, "grad_norm": 1.9944500196167971, "learning_rate": 7.053575345037641e-06, "loss": 0.5448, "step": 5416 }, { "epoch": 0.38, "grad_norm": 1.9158048313881042, "learning_rate": 7.052527522027865e-06, "loss": 0.5853, "step": 5417 }, { "epoch": 0.38, "grad_norm": 0.8153086497287203, "learning_rate": 7.051479590596631e-06, "loss": 0.4677, "step": 5418 }, { "epoch": 0.38, "grad_norm": 1.5920921623275361, "learning_rate": 7.050431550799292e-06, "loss": 0.4914, "step": 5419 }, { "epoch": 0.38, "grad_norm": 1.7123158238680598, "learning_rate": 7.049383402691215e-06, "loss": 0.564, "step": 5420 }, { "epoch": 0.38, "grad_norm": 2.254198189766179, "learning_rate": 7.048335146327759e-06, "loss": 0.5767, "step": 5421 }, { "epoch": 0.38, "grad_norm": 2.0337509090957266, "learning_rate": 7.047286781764299e-06, "loss": 0.661, "step": 5422 }, { "epoch": 0.38, "grad_norm": 1.4887659328602472, "learning_rate": 7.0462383090562145e-06, "loss": 0.6186, "step": 5423 }, { "epoch": 0.38, "grad_norm": 1.4902483318161455, "learning_rate": 7.04518972825889e-06, "loss": 0.5226, "step": 5424 }, { "epoch": 0.38, "grad_norm": 1.6453592156391383, "learning_rate": 7.044141039427713e-06, "loss": 0.5431, "step": 5425 }, { "epoch": 0.39, "grad_norm": 1.6838534465345063, "learning_rate": 7.043092242618082e-06, "loss": 0.5274, "step": 5426 }, { "epoch": 0.39, "grad_norm": 1.453790897308789, "learning_rate": 7.042043337885394e-06, "loss": 0.537, "step": 5427 }, { "epoch": 0.39, "grad_norm": 1.7630412595206904, "learning_rate": 7.0409943252850576e-06, "loss": 0.5925, "step": 5428 }, { "epoch": 0.39, "grad_norm": 1.8625421966308713, "learning_rate": 7.039945204872484e-06, "loss": 0.5401, "step": 5429 }, { "epoch": 0.39, "grad_norm": 1.8192065037919496, "learning_rate": 7.038895976703094e-06, "loss": 0.5381, "step": 5430 }, { "epoch": 0.39, "grad_norm": 3.2591247971273485, "learning_rate": 7.0378466408323085e-06, "loss": 0.5347, "step": 5431 }, { "epoch": 0.39, "grad_norm": 1.8032833977948368, "learning_rate": 7.036797197315561e-06, "loss": 0.5001, "step": 5432 }, { "epoch": 0.39, "grad_norm": 1.970598019951489, "learning_rate": 7.035747646208282e-06, "loss": 0.5926, "step": 5433 }, { "epoch": 0.39, "grad_norm": 1.5723639009509216, "learning_rate": 7.034697987565915e-06, "loss": 0.5344, "step": 5434 }, { "epoch": 0.39, "grad_norm": 1.6946692704906028, "learning_rate": 7.033648221443905e-06, "loss": 0.5648, "step": 5435 }, { "epoch": 0.39, "grad_norm": 1.6972713563028743, "learning_rate": 7.032598347897704e-06, "loss": 0.5728, "step": 5436 }, { "epoch": 0.39, "grad_norm": 1.6898129461033558, "learning_rate": 7.031548366982772e-06, "loss": 0.5625, "step": 5437 }, { "epoch": 0.39, "grad_norm": 1.853672325246368, "learning_rate": 7.0304982787545714e-06, "loss": 0.5577, "step": 5438 }, { "epoch": 0.39, "grad_norm": 3.6317568198218466, "learning_rate": 7.029448083268572e-06, "loss": 0.6588, "step": 5439 }, { "epoch": 0.39, "grad_norm": 1.4887429680781104, "learning_rate": 7.0283977805802475e-06, "loss": 0.4885, "step": 5440 }, { "epoch": 0.39, "grad_norm": 1.56162197660061, "learning_rate": 7.027347370745079e-06, "loss": 0.5543, "step": 5441 }, { "epoch": 0.39, "grad_norm": 2.1858434992828384, "learning_rate": 7.026296853818553e-06, "loss": 0.5633, "step": 5442 }, { "epoch": 0.39, "grad_norm": 2.0119486941507585, "learning_rate": 7.025246229856162e-06, "loss": 0.5676, "step": 5443 }, { "epoch": 0.39, "grad_norm": 0.7734841292009952, "learning_rate": 7.0241954989134005e-06, "loss": 0.4502, "step": 5444 }, { "epoch": 0.39, "grad_norm": 1.4912591003079245, "learning_rate": 7.023144661045777e-06, "loss": 0.546, "step": 5445 }, { "epoch": 0.39, "grad_norm": 1.6046790966748061, "learning_rate": 7.022093716308794e-06, "loss": 0.5443, "step": 5446 }, { "epoch": 0.39, "grad_norm": 1.5910788472280488, "learning_rate": 7.021042664757971e-06, "loss": 0.5418, "step": 5447 }, { "epoch": 0.39, "grad_norm": 1.6559051280972505, "learning_rate": 7.019991506448826e-06, "loss": 0.5739, "step": 5448 }, { "epoch": 0.39, "grad_norm": 1.8186744270694613, "learning_rate": 7.018940241436885e-06, "loss": 0.5579, "step": 5449 }, { "epoch": 0.39, "grad_norm": 1.4887910428171838, "learning_rate": 7.017888869777678e-06, "loss": 0.5121, "step": 5450 }, { "epoch": 0.39, "grad_norm": 1.7358732418637193, "learning_rate": 7.016837391526745e-06, "loss": 0.5901, "step": 5451 }, { "epoch": 0.39, "grad_norm": 1.7320886334294041, "learning_rate": 7.015785806739626e-06, "loss": 0.5641, "step": 5452 }, { "epoch": 0.39, "grad_norm": 1.652100257570366, "learning_rate": 7.0147341154718695e-06, "loss": 0.5819, "step": 5453 }, { "epoch": 0.39, "grad_norm": 1.7959906429772756, "learning_rate": 7.013682317779033e-06, "loss": 0.5651, "step": 5454 }, { "epoch": 0.39, "grad_norm": 1.6544727742800043, "learning_rate": 7.01263041371667e-06, "loss": 0.5427, "step": 5455 }, { "epoch": 0.39, "grad_norm": 1.7318964442697327, "learning_rate": 7.011578403340351e-06, "loss": 0.5632, "step": 5456 }, { "epoch": 0.39, "grad_norm": 1.7519479590267515, "learning_rate": 7.0105262867056436e-06, "loss": 0.5736, "step": 5457 }, { "epoch": 0.39, "grad_norm": 1.727841791461665, "learning_rate": 7.009474063868126e-06, "loss": 0.5645, "step": 5458 }, { "epoch": 0.39, "grad_norm": 4.614251345014641, "learning_rate": 7.008421734883378e-06, "loss": 0.5001, "step": 5459 }, { "epoch": 0.39, "grad_norm": 1.523678192367452, "learning_rate": 7.00736929980699e-06, "loss": 0.5142, "step": 5460 }, { "epoch": 0.39, "grad_norm": 0.8428398596835543, "learning_rate": 7.006316758694553e-06, "loss": 0.466, "step": 5461 }, { "epoch": 0.39, "grad_norm": 1.6524942586247529, "learning_rate": 7.005264111601667e-06, "loss": 0.5671, "step": 5462 }, { "epoch": 0.39, "grad_norm": 9.54633944524786, "learning_rate": 7.004211358583934e-06, "loss": 0.5291, "step": 5463 }, { "epoch": 0.39, "grad_norm": 1.566651070707878, "learning_rate": 7.003158499696968e-06, "loss": 0.6449, "step": 5464 }, { "epoch": 0.39, "grad_norm": 1.4705532183831205, "learning_rate": 7.002105534996382e-06, "loss": 0.5324, "step": 5465 }, { "epoch": 0.39, "grad_norm": 0.8638671651400454, "learning_rate": 7.001052464537797e-06, "loss": 0.4746, "step": 5466 }, { "epoch": 0.39, "grad_norm": 1.7290888193991074, "learning_rate": 6.99999928837684e-06, "loss": 0.5174, "step": 5467 }, { "epoch": 0.39, "grad_norm": 1.736948815926927, "learning_rate": 6.998946006569143e-06, "loss": 0.6056, "step": 5468 }, { "epoch": 0.39, "grad_norm": 0.8498783917390376, "learning_rate": 6.997892619170347e-06, "loss": 0.4567, "step": 5469 }, { "epoch": 0.39, "grad_norm": 1.843526462150231, "learning_rate": 6.9968391262360904e-06, "loss": 0.5648, "step": 5470 }, { "epoch": 0.39, "grad_norm": 1.7364243167559172, "learning_rate": 6.995785527822027e-06, "loss": 0.5954, "step": 5471 }, { "epoch": 0.39, "grad_norm": 1.6633747887723824, "learning_rate": 6.994731823983809e-06, "loss": 0.5226, "step": 5472 }, { "epoch": 0.39, "grad_norm": 1.7972758615268272, "learning_rate": 6.9936780147770965e-06, "loss": 0.484, "step": 5473 }, { "epoch": 0.39, "grad_norm": 1.5860602798796182, "learning_rate": 6.992624100257554e-06, "loss": 0.5588, "step": 5474 }, { "epoch": 0.39, "grad_norm": 0.7227461699474015, "learning_rate": 6.991570080480858e-06, "loss": 0.4557, "step": 5475 }, { "epoch": 0.39, "grad_norm": 1.8169240097253545, "learning_rate": 6.99051595550268e-06, "loss": 0.5898, "step": 5476 }, { "epoch": 0.39, "grad_norm": 1.7095440930807444, "learning_rate": 6.989461725378706e-06, "loss": 0.5597, "step": 5477 }, { "epoch": 0.39, "grad_norm": 1.686313521872057, "learning_rate": 6.988407390164621e-06, "loss": 0.596, "step": 5478 }, { "epoch": 0.39, "grad_norm": 1.6124814712994346, "learning_rate": 6.987352949916122e-06, "loss": 0.5123, "step": 5479 }, { "epoch": 0.39, "grad_norm": 1.982256340420072, "learning_rate": 6.986298404688904e-06, "loss": 0.6007, "step": 5480 }, { "epoch": 0.39, "grad_norm": 0.9050196396945963, "learning_rate": 6.985243754538673e-06, "loss": 0.4563, "step": 5481 }, { "epoch": 0.39, "grad_norm": 1.8733073725496132, "learning_rate": 6.984188999521141e-06, "loss": 0.6038, "step": 5482 }, { "epoch": 0.39, "grad_norm": 2.166917911105936, "learning_rate": 6.983134139692023e-06, "loss": 0.5826, "step": 5483 }, { "epoch": 0.39, "grad_norm": 5.609614659715804, "learning_rate": 6.98207917510704e-06, "loss": 0.4393, "step": 5484 }, { "epoch": 0.39, "grad_norm": 1.5224036003214179, "learning_rate": 6.981024105821919e-06, "loss": 0.5401, "step": 5485 }, { "epoch": 0.39, "grad_norm": 1.4636484106333545, "learning_rate": 6.9799689318923926e-06, "loss": 0.5496, "step": 5486 }, { "epoch": 0.39, "grad_norm": 1.7596583410894917, "learning_rate": 6.978913653374196e-06, "loss": 0.5328, "step": 5487 }, { "epoch": 0.39, "grad_norm": 1.7885507047466982, "learning_rate": 6.977858270323077e-06, "loss": 0.5423, "step": 5488 }, { "epoch": 0.39, "grad_norm": 1.437732607005903, "learning_rate": 6.97680278279478e-06, "loss": 0.5405, "step": 5489 }, { "epoch": 0.39, "grad_norm": 1.518304009580643, "learning_rate": 6.975747190845064e-06, "loss": 0.5385, "step": 5490 }, { "epoch": 0.39, "grad_norm": 0.7171058273447316, "learning_rate": 6.974691494529686e-06, "loss": 0.453, "step": 5491 }, { "epoch": 0.39, "grad_norm": 1.5387649864551098, "learning_rate": 6.973635693904413e-06, "loss": 0.5624, "step": 5492 }, { "epoch": 0.39, "grad_norm": 1.5425605459738025, "learning_rate": 6.972579789025012e-06, "loss": 0.5281, "step": 5493 }, { "epoch": 0.39, "grad_norm": 1.6020729739585984, "learning_rate": 6.971523779947266e-06, "loss": 0.5268, "step": 5494 }, { "epoch": 0.39, "grad_norm": 1.725835405501458, "learning_rate": 6.97046766672695e-06, "loss": 0.5702, "step": 5495 }, { "epoch": 0.39, "grad_norm": 2.5992457408726932, "learning_rate": 6.969411449419857e-06, "loss": 0.5814, "step": 5496 }, { "epoch": 0.39, "grad_norm": 1.5659065974527402, "learning_rate": 6.968355128081778e-06, "loss": 0.5765, "step": 5497 }, { "epoch": 0.39, "grad_norm": 1.6567854670617717, "learning_rate": 6.967298702768512e-06, "loss": 0.5008, "step": 5498 }, { "epoch": 0.39, "grad_norm": 1.476791477215469, "learning_rate": 6.966242173535861e-06, "loss": 0.4922, "step": 5499 }, { "epoch": 0.39, "grad_norm": 1.5080019724613851, "learning_rate": 6.965185540439636e-06, "loss": 0.5337, "step": 5500 }, { "epoch": 0.39, "grad_norm": 1.6913522053754984, "learning_rate": 6.964128803535654e-06, "loss": 0.5867, "step": 5501 }, { "epoch": 0.39, "grad_norm": 2.205258827279319, "learning_rate": 6.96307196287973e-06, "loss": 0.6313, "step": 5502 }, { "epoch": 0.39, "grad_norm": 1.7385492070742876, "learning_rate": 6.962015018527694e-06, "loss": 0.5833, "step": 5503 }, { "epoch": 0.39, "grad_norm": 1.833472738520152, "learning_rate": 6.960957970535378e-06, "loss": 0.5426, "step": 5504 }, { "epoch": 0.39, "grad_norm": 1.9253211182477978, "learning_rate": 6.959900818958617e-06, "loss": 0.5623, "step": 5505 }, { "epoch": 0.39, "grad_norm": 1.4237409443332223, "learning_rate": 6.958843563853253e-06, "loss": 0.5719, "step": 5506 }, { "epoch": 0.39, "grad_norm": 2.5716754846812706, "learning_rate": 6.957786205275135e-06, "loss": 0.6139, "step": 5507 }, { "epoch": 0.39, "grad_norm": 2.0062908247689517, "learning_rate": 6.956728743280116e-06, "loss": 0.5996, "step": 5508 }, { "epoch": 0.39, "grad_norm": 2.0080184173781803, "learning_rate": 6.955671177924053e-06, "loss": 0.5285, "step": 5509 }, { "epoch": 0.39, "grad_norm": 0.7371978294127515, "learning_rate": 6.954613509262812e-06, "loss": 0.4415, "step": 5510 }, { "epoch": 0.39, "grad_norm": 1.6778661898277907, "learning_rate": 6.9535557373522645e-06, "loss": 0.5302, "step": 5511 }, { "epoch": 0.39, "grad_norm": 1.7695826499461158, "learning_rate": 6.952497862248281e-06, "loss": 0.6118, "step": 5512 }, { "epoch": 0.39, "grad_norm": 1.6469923096326593, "learning_rate": 6.951439884006745e-06, "loss": 0.6109, "step": 5513 }, { "epoch": 0.39, "grad_norm": 2.347956152381896, "learning_rate": 6.950381802683542e-06, "loss": 0.5223, "step": 5514 }, { "epoch": 0.39, "grad_norm": 3.627441774934008, "learning_rate": 6.949323618334563e-06, "loss": 0.5319, "step": 5515 }, { "epoch": 0.39, "grad_norm": 1.619929019935426, "learning_rate": 6.948265331015708e-06, "loss": 0.5797, "step": 5516 }, { "epoch": 0.39, "grad_norm": 1.5233634071970434, "learning_rate": 6.947206940782873e-06, "loss": 0.5739, "step": 5517 }, { "epoch": 0.39, "grad_norm": 1.6489499093897342, "learning_rate": 6.946148447691971e-06, "loss": 0.6164, "step": 5518 }, { "epoch": 0.39, "grad_norm": 2.061217928735542, "learning_rate": 6.9450898517989125e-06, "loss": 0.5913, "step": 5519 }, { "epoch": 0.39, "grad_norm": 1.5630271619513867, "learning_rate": 6.94403115315962e-06, "loss": 0.4685, "step": 5520 }, { "epoch": 0.39, "grad_norm": 1.759024327821406, "learning_rate": 6.9429723518300114e-06, "loss": 0.5919, "step": 5521 }, { "epoch": 0.39, "grad_norm": 1.4861896090036453, "learning_rate": 6.941913447866022e-06, "loss": 0.5001, "step": 5522 }, { "epoch": 0.39, "grad_norm": 1.7021432220560504, "learning_rate": 6.940854441323582e-06, "loss": 0.625, "step": 5523 }, { "epoch": 0.39, "grad_norm": 2.335887201077733, "learning_rate": 6.939795332258635e-06, "loss": 0.5378, "step": 5524 }, { "epoch": 0.39, "grad_norm": 1.89099725927641, "learning_rate": 6.938736120727125e-06, "loss": 0.6076, "step": 5525 }, { "epoch": 0.39, "grad_norm": 1.5652230938459508, "learning_rate": 6.937676806785005e-06, "loss": 0.5307, "step": 5526 }, { "epoch": 0.39, "grad_norm": 1.6515498144714036, "learning_rate": 6.936617390488229e-06, "loss": 0.608, "step": 5527 }, { "epoch": 0.39, "grad_norm": 1.493605197873879, "learning_rate": 6.935557871892762e-06, "loss": 0.5882, "step": 5528 }, { "epoch": 0.39, "grad_norm": 1.8378243332029514, "learning_rate": 6.934498251054569e-06, "loss": 0.6378, "step": 5529 }, { "epoch": 0.39, "grad_norm": 2.024296820190803, "learning_rate": 6.933438528029624e-06, "loss": 0.5648, "step": 5530 }, { "epoch": 0.39, "grad_norm": 1.6553114656221715, "learning_rate": 6.932378702873902e-06, "loss": 0.5656, "step": 5531 }, { "epoch": 0.39, "grad_norm": 1.835926655970203, "learning_rate": 6.931318775643389e-06, "loss": 0.6075, "step": 5532 }, { "epoch": 0.39, "grad_norm": 1.6242146515764637, "learning_rate": 6.930258746394077e-06, "loss": 0.5741, "step": 5533 }, { "epoch": 0.39, "grad_norm": 1.8055460852708354, "learning_rate": 6.929198615181956e-06, "loss": 0.6057, "step": 5534 }, { "epoch": 0.39, "grad_norm": 1.83730243318912, "learning_rate": 6.928138382063028e-06, "loss": 0.564, "step": 5535 }, { "epoch": 0.39, "grad_norm": 1.5367323898398595, "learning_rate": 6.927078047093295e-06, "loss": 0.523, "step": 5536 }, { "epoch": 0.39, "grad_norm": 0.7736651132226489, "learning_rate": 6.926017610328772e-06, "loss": 0.4768, "step": 5537 }, { "epoch": 0.39, "grad_norm": 2.1678610537479277, "learning_rate": 6.924957071825471e-06, "loss": 0.5637, "step": 5538 }, { "epoch": 0.39, "grad_norm": 1.7923049757878826, "learning_rate": 6.923896431639416e-06, "loss": 0.5609, "step": 5539 }, { "epoch": 0.39, "grad_norm": 8.208093182543358, "learning_rate": 6.9228356898266315e-06, "loss": 0.5766, "step": 5540 }, { "epoch": 0.39, "grad_norm": 1.4134743765794109, "learning_rate": 6.921774846443152e-06, "loss": 0.5258, "step": 5541 }, { "epoch": 0.39, "grad_norm": 1.7137945410198139, "learning_rate": 6.920713901545012e-06, "loss": 0.6041, "step": 5542 }, { "epoch": 0.39, "grad_norm": 0.7897668901644281, "learning_rate": 6.919652855188255e-06, "loss": 0.4498, "step": 5543 }, { "epoch": 0.39, "grad_norm": 1.5531551614970929, "learning_rate": 6.918591707428931e-06, "loss": 0.591, "step": 5544 }, { "epoch": 0.39, "grad_norm": 1.6112693402794336, "learning_rate": 6.917530458323092e-06, "loss": 0.5709, "step": 5545 }, { "epoch": 0.39, "grad_norm": 1.4587945730890088, "learning_rate": 6.916469107926795e-06, "loss": 0.5142, "step": 5546 }, { "epoch": 0.39, "grad_norm": 1.7356511428978043, "learning_rate": 6.915407656296105e-06, "loss": 0.5623, "step": 5547 }, { "epoch": 0.39, "grad_norm": 0.7955653494262135, "learning_rate": 6.914346103487095e-06, "loss": 0.4467, "step": 5548 }, { "epoch": 0.39, "grad_norm": 1.5441931072619968, "learning_rate": 6.913284449555835e-06, "loss": 0.5491, "step": 5549 }, { "epoch": 0.39, "grad_norm": 2.1256322733340283, "learning_rate": 6.912222694558409e-06, "loss": 0.5526, "step": 5550 }, { "epoch": 0.39, "grad_norm": 1.8070476473226875, "learning_rate": 6.911160838550901e-06, "loss": 0.5505, "step": 5551 }, { "epoch": 0.39, "grad_norm": 2.1615752537695063, "learning_rate": 6.910098881589401e-06, "loss": 0.5172, "step": 5552 }, { "epoch": 0.39, "grad_norm": 1.490696491603564, "learning_rate": 6.909036823730004e-06, "loss": 0.5499, "step": 5553 }, { "epoch": 0.39, "grad_norm": 1.9077516476450789, "learning_rate": 6.907974665028816e-06, "loss": 0.5238, "step": 5554 }, { "epoch": 0.39, "grad_norm": 1.4640424149452718, "learning_rate": 6.906912405541939e-06, "loss": 0.4942, "step": 5555 }, { "epoch": 0.39, "grad_norm": 1.8308303948325417, "learning_rate": 6.905850045325489e-06, "loss": 0.5354, "step": 5556 }, { "epoch": 0.39, "grad_norm": 1.7732798774369198, "learning_rate": 6.90478758443558e-06, "loss": 0.6156, "step": 5557 }, { "epoch": 0.39, "grad_norm": 1.5818843559724487, "learning_rate": 6.903725022928338e-06, "loss": 0.5484, "step": 5558 }, { "epoch": 0.39, "grad_norm": 1.7172924597250256, "learning_rate": 6.902662360859888e-06, "loss": 0.5618, "step": 5559 }, { "epoch": 0.39, "grad_norm": 1.6709906481632917, "learning_rate": 6.901599598286367e-06, "loss": 0.5384, "step": 5560 }, { "epoch": 0.39, "grad_norm": 1.6938163143135139, "learning_rate": 6.900536735263909e-06, "loss": 0.6494, "step": 5561 }, { "epoch": 0.39, "grad_norm": 1.7156430100369624, "learning_rate": 6.899473771848664e-06, "loss": 0.5727, "step": 5562 }, { "epoch": 0.39, "grad_norm": 1.6170714665816175, "learning_rate": 6.898410708096775e-06, "loss": 0.5883, "step": 5563 }, { "epoch": 0.39, "grad_norm": 1.6378017042914796, "learning_rate": 6.897347544064401e-06, "loss": 0.5488, "step": 5564 }, { "epoch": 0.39, "grad_norm": 1.5170212737110325, "learning_rate": 6.896284279807702e-06, "loss": 0.5679, "step": 5565 }, { "epoch": 0.39, "grad_norm": 1.622743572919213, "learning_rate": 6.89522091538284e-06, "loss": 0.5785, "step": 5566 }, { "epoch": 0.4, "grad_norm": 1.983217406395213, "learning_rate": 6.894157450845988e-06, "loss": 0.4835, "step": 5567 }, { "epoch": 0.4, "grad_norm": 1.6005597958924112, "learning_rate": 6.8930938862533215e-06, "loss": 0.5587, "step": 5568 }, { "epoch": 0.4, "grad_norm": 1.8524016956666391, "learning_rate": 6.892030221661022e-06, "loss": 0.5351, "step": 5569 }, { "epoch": 0.4, "grad_norm": 1.5390459725289387, "learning_rate": 6.890966457125272e-06, "loss": 0.5531, "step": 5570 }, { "epoch": 0.4, "grad_norm": 1.7904894277321897, "learning_rate": 6.889902592702269e-06, "loss": 0.5465, "step": 5571 }, { "epoch": 0.4, "grad_norm": 1.8963662840452666, "learning_rate": 6.888838628448207e-06, "loss": 0.5948, "step": 5572 }, { "epoch": 0.4, "grad_norm": 1.992793126939426, "learning_rate": 6.887774564419288e-06, "loss": 0.4901, "step": 5573 }, { "epoch": 0.4, "grad_norm": 1.3974349691706454, "learning_rate": 6.886710400671719e-06, "loss": 0.4968, "step": 5574 }, { "epoch": 0.4, "grad_norm": 1.7453139173229686, "learning_rate": 6.885646137261714e-06, "loss": 0.6097, "step": 5575 }, { "epoch": 0.4, "grad_norm": 1.8266398962139143, "learning_rate": 6.884581774245492e-06, "loss": 0.5843, "step": 5576 }, { "epoch": 0.4, "grad_norm": 2.156518215294243, "learning_rate": 6.883517311679273e-06, "loss": 0.5277, "step": 5577 }, { "epoch": 0.4, "grad_norm": 1.8474835020724458, "learning_rate": 6.882452749619288e-06, "loss": 0.4614, "step": 5578 }, { "epoch": 0.4, "grad_norm": 2.071261807309986, "learning_rate": 6.88138808812177e-06, "loss": 0.5083, "step": 5579 }, { "epoch": 0.4, "grad_norm": 1.413079121466319, "learning_rate": 6.880323327242959e-06, "loss": 0.5769, "step": 5580 }, { "epoch": 0.4, "grad_norm": 1.578091386227666, "learning_rate": 6.879258467039098e-06, "loss": 0.4993, "step": 5581 }, { "epoch": 0.4, "grad_norm": 1.625801394165498, "learning_rate": 6.878193507566437e-06, "loss": 0.6408, "step": 5582 }, { "epoch": 0.4, "grad_norm": 1.651972448241917, "learning_rate": 6.877128448881231e-06, "loss": 0.6023, "step": 5583 }, { "epoch": 0.4, "grad_norm": 1.5703094719923367, "learning_rate": 6.876063291039739e-06, "loss": 0.5529, "step": 5584 }, { "epoch": 0.4, "grad_norm": 1.572250270925388, "learning_rate": 6.874998034098226e-06, "loss": 0.4998, "step": 5585 }, { "epoch": 0.4, "grad_norm": 2.0839674689804246, "learning_rate": 6.873932678112966e-06, "loss": 0.5403, "step": 5586 }, { "epoch": 0.4, "grad_norm": 1.5214643438316249, "learning_rate": 6.872867223140231e-06, "loss": 0.5098, "step": 5587 }, { "epoch": 0.4, "grad_norm": 1.382129523709888, "learning_rate": 6.871801669236304e-06, "loss": 0.5425, "step": 5588 }, { "epoch": 0.4, "grad_norm": 1.7010059114305394, "learning_rate": 6.8707360164574684e-06, "loss": 0.545, "step": 5589 }, { "epoch": 0.4, "grad_norm": 1.8509185231245286, "learning_rate": 6.869670264860018e-06, "loss": 0.4941, "step": 5590 }, { "epoch": 0.4, "grad_norm": 1.722483900111449, "learning_rate": 6.86860441450025e-06, "loss": 0.5756, "step": 5591 }, { "epoch": 0.4, "grad_norm": 1.7723268456838808, "learning_rate": 6.867538465434464e-06, "loss": 0.5609, "step": 5592 }, { "epoch": 0.4, "grad_norm": 1.396617276758096, "learning_rate": 6.866472417718969e-06, "loss": 0.5182, "step": 5593 }, { "epoch": 0.4, "grad_norm": 1.5708317461495733, "learning_rate": 6.8654062714100765e-06, "loss": 0.5494, "step": 5594 }, { "epoch": 0.4, "grad_norm": 1.4556654353668395, "learning_rate": 6.864340026564103e-06, "loss": 0.493, "step": 5595 }, { "epoch": 0.4, "grad_norm": 1.6370897174494747, "learning_rate": 6.863273683237373e-06, "loss": 0.5378, "step": 5596 }, { "epoch": 0.4, "grad_norm": 3.4664310731447485, "learning_rate": 6.862207241486214e-06, "loss": 0.5539, "step": 5597 }, { "epoch": 0.4, "grad_norm": 1.3978659302217062, "learning_rate": 6.861140701366957e-06, "loss": 0.4836, "step": 5598 }, { "epoch": 0.4, "grad_norm": 1.9541324066520893, "learning_rate": 6.860074062935942e-06, "loss": 0.5382, "step": 5599 }, { "epoch": 0.4, "grad_norm": 1.6012210103020015, "learning_rate": 6.859007326249512e-06, "loss": 0.511, "step": 5600 }, { "epoch": 0.4, "grad_norm": 1.957469413347741, "learning_rate": 6.857940491364017e-06, "loss": 0.5637, "step": 5601 }, { "epoch": 0.4, "grad_norm": 2.1826469063384217, "learning_rate": 6.856873558335808e-06, "loss": 0.5119, "step": 5602 }, { "epoch": 0.4, "grad_norm": 1.6498085167653496, "learning_rate": 6.8558065272212485e-06, "loss": 0.5134, "step": 5603 }, { "epoch": 0.4, "grad_norm": 1.958900426203835, "learning_rate": 6.854739398076698e-06, "loss": 0.6051, "step": 5604 }, { "epoch": 0.4, "grad_norm": 1.5899522703933686, "learning_rate": 6.853672170958526e-06, "loss": 0.5082, "step": 5605 }, { "epoch": 0.4, "grad_norm": 1.8109379055957895, "learning_rate": 6.852604845923111e-06, "loss": 0.5774, "step": 5606 }, { "epoch": 0.4, "grad_norm": 1.3901996958422966, "learning_rate": 6.851537423026831e-06, "loss": 0.5777, "step": 5607 }, { "epoch": 0.4, "grad_norm": 1.710336638828316, "learning_rate": 6.850469902326069e-06, "loss": 0.5847, "step": 5608 }, { "epoch": 0.4, "grad_norm": 1.772652010949035, "learning_rate": 6.849402283877218e-06, "loss": 0.5742, "step": 5609 }, { "epoch": 0.4, "grad_norm": 1.750357669101453, "learning_rate": 6.848334567736671e-06, "loss": 0.5656, "step": 5610 }, { "epoch": 0.4, "grad_norm": 0.8027268503190242, "learning_rate": 6.84726675396083e-06, "loss": 0.4726, "step": 5611 }, { "epoch": 0.4, "grad_norm": 1.9940014535981887, "learning_rate": 6.846198842606097e-06, "loss": 0.5843, "step": 5612 }, { "epoch": 0.4, "grad_norm": 2.211991474169648, "learning_rate": 6.8451308337288865e-06, "loss": 0.4784, "step": 5613 }, { "epoch": 0.4, "grad_norm": 1.4503197477331375, "learning_rate": 6.8440627273856145e-06, "loss": 0.6183, "step": 5614 }, { "epoch": 0.4, "grad_norm": 1.6703423423881074, "learning_rate": 6.8429945236327e-06, "loss": 0.5578, "step": 5615 }, { "epoch": 0.4, "grad_norm": 1.7677882721339337, "learning_rate": 6.84192622252657e-06, "loss": 0.595, "step": 5616 }, { "epoch": 0.4, "grad_norm": 0.7995572265132935, "learning_rate": 6.840857824123655e-06, "loss": 0.4509, "step": 5617 }, { "epoch": 0.4, "grad_norm": 1.4477045485234898, "learning_rate": 6.839789328480394e-06, "loss": 0.5098, "step": 5618 }, { "epoch": 0.4, "grad_norm": 1.4135206808136151, "learning_rate": 6.838720735653225e-06, "loss": 0.4989, "step": 5619 }, { "epoch": 0.4, "grad_norm": 1.5668820329403335, "learning_rate": 6.837652045698595e-06, "loss": 0.4968, "step": 5620 }, { "epoch": 0.4, "grad_norm": 1.6893947263309117, "learning_rate": 6.836583258672958e-06, "loss": 0.5485, "step": 5621 }, { "epoch": 0.4, "grad_norm": 1.69854202876631, "learning_rate": 6.8355143746327724e-06, "loss": 0.4553, "step": 5622 }, { "epoch": 0.4, "grad_norm": 1.6280083274578712, "learning_rate": 6.834445393634495e-06, "loss": 0.4861, "step": 5623 }, { "epoch": 0.4, "grad_norm": 2.024542693167338, "learning_rate": 6.833376315734599e-06, "loss": 0.5782, "step": 5624 }, { "epoch": 0.4, "grad_norm": 1.4717995400132047, "learning_rate": 6.832307140989551e-06, "loss": 0.5364, "step": 5625 }, { "epoch": 0.4, "grad_norm": 1.5976753478418442, "learning_rate": 6.831237869455833e-06, "loss": 0.5397, "step": 5626 }, { "epoch": 0.4, "grad_norm": 1.710974527552355, "learning_rate": 6.830168501189924e-06, "loss": 0.6024, "step": 5627 }, { "epoch": 0.4, "grad_norm": 1.5456147954508466, "learning_rate": 6.829099036248313e-06, "loss": 0.551, "step": 5628 }, { "epoch": 0.4, "grad_norm": 1.6950299355321186, "learning_rate": 6.828029474687494e-06, "loss": 0.5807, "step": 5629 }, { "epoch": 0.4, "grad_norm": 1.6615545251474442, "learning_rate": 6.826959816563964e-06, "loss": 0.5709, "step": 5630 }, { "epoch": 0.4, "grad_norm": 7.472720454767171, "learning_rate": 6.825890061934226e-06, "loss": 0.5079, "step": 5631 }, { "epoch": 0.4, "grad_norm": 1.4868501281574187, "learning_rate": 6.824820210854788e-06, "loss": 0.56, "step": 5632 }, { "epoch": 0.4, "grad_norm": 1.561822855805966, "learning_rate": 6.823750263382164e-06, "loss": 0.5152, "step": 5633 }, { "epoch": 0.4, "grad_norm": 0.834757427001846, "learning_rate": 6.822680219572869e-06, "loss": 0.4673, "step": 5634 }, { "epoch": 0.4, "grad_norm": 2.038292723489558, "learning_rate": 6.82161007948343e-06, "loss": 0.5605, "step": 5635 }, { "epoch": 0.4, "grad_norm": 1.7401537070849864, "learning_rate": 6.820539843170374e-06, "loss": 0.4735, "step": 5636 }, { "epoch": 0.4, "grad_norm": 1.7129843580890953, "learning_rate": 6.8194695106902355e-06, "loss": 0.5399, "step": 5637 }, { "epoch": 0.4, "grad_norm": 2.9274250569026057, "learning_rate": 6.8183990820995514e-06, "loss": 0.6126, "step": 5638 }, { "epoch": 0.4, "grad_norm": 1.5795876338278925, "learning_rate": 6.817328557454868e-06, "loss": 0.5424, "step": 5639 }, { "epoch": 0.4, "grad_norm": 1.727461184021476, "learning_rate": 6.816257936812729e-06, "loss": 0.4931, "step": 5640 }, { "epoch": 0.4, "grad_norm": 1.5132400771075096, "learning_rate": 6.8151872202296935e-06, "loss": 0.5314, "step": 5641 }, { "epoch": 0.4, "grad_norm": 1.5956439730358498, "learning_rate": 6.8141164077623165e-06, "loss": 0.5094, "step": 5642 }, { "epoch": 0.4, "grad_norm": 1.9182576340802884, "learning_rate": 6.813045499467167e-06, "loss": 0.5434, "step": 5643 }, { "epoch": 0.4, "grad_norm": 2.40139825165419, "learning_rate": 6.811974495400808e-06, "loss": 0.6132, "step": 5644 }, { "epoch": 0.4, "grad_norm": 1.5540229804362857, "learning_rate": 6.810903395619816e-06, "loss": 0.5442, "step": 5645 }, { "epoch": 0.4, "grad_norm": 1.6818075802953034, "learning_rate": 6.809832200180774e-06, "loss": 0.5657, "step": 5646 }, { "epoch": 0.4, "grad_norm": 1.89080822850704, "learning_rate": 6.80876090914026e-06, "loss": 0.5161, "step": 5647 }, { "epoch": 0.4, "grad_norm": 1.6960860794238837, "learning_rate": 6.807689522554867e-06, "loss": 0.5928, "step": 5648 }, { "epoch": 0.4, "grad_norm": 2.459358962203065, "learning_rate": 6.806618040481186e-06, "loss": 0.5379, "step": 5649 }, { "epoch": 0.4, "grad_norm": 1.6556605118514494, "learning_rate": 6.805546462975821e-06, "loss": 0.5304, "step": 5650 }, { "epoch": 0.4, "grad_norm": 0.7143538680416827, "learning_rate": 6.804474790095373e-06, "loss": 0.4345, "step": 5651 }, { "epoch": 0.4, "grad_norm": 1.4176980928079508, "learning_rate": 6.803403021896451e-06, "loss": 0.4786, "step": 5652 }, { "epoch": 0.4, "grad_norm": 0.8445155104414335, "learning_rate": 6.802331158435671e-06, "loss": 0.4549, "step": 5653 }, { "epoch": 0.4, "grad_norm": 3.512228846867844, "learning_rate": 6.801259199769654e-06, "loss": 0.5791, "step": 5654 }, { "epoch": 0.4, "grad_norm": 1.4877928966756284, "learning_rate": 6.80018714595502e-06, "loss": 0.572, "step": 5655 }, { "epoch": 0.4, "grad_norm": 1.8087461190308793, "learning_rate": 6.799114997048402e-06, "loss": 0.5811, "step": 5656 }, { "epoch": 0.4, "grad_norm": 1.6387938712991292, "learning_rate": 6.7980427531064334e-06, "loss": 0.502, "step": 5657 }, { "epoch": 0.4, "grad_norm": 0.827290834218492, "learning_rate": 6.796970414185755e-06, "loss": 0.4548, "step": 5658 }, { "epoch": 0.4, "grad_norm": 1.6467961269597464, "learning_rate": 6.795897980343009e-06, "loss": 0.4916, "step": 5659 }, { "epoch": 0.4, "grad_norm": 1.5919611647926077, "learning_rate": 6.794825451634848e-06, "loss": 0.5503, "step": 5660 }, { "epoch": 0.4, "grad_norm": 2.444908573300306, "learning_rate": 6.793752828117924e-06, "loss": 0.5125, "step": 5661 }, { "epoch": 0.4, "grad_norm": 1.628727627656973, "learning_rate": 6.792680109848897e-06, "loss": 0.5138, "step": 5662 }, { "epoch": 0.4, "grad_norm": 0.8305852345271679, "learning_rate": 6.791607296884433e-06, "loss": 0.4642, "step": 5663 }, { "epoch": 0.4, "grad_norm": 1.6246094028930107, "learning_rate": 6.790534389281201e-06, "loss": 0.6007, "step": 5664 }, { "epoch": 0.4, "grad_norm": 1.5118590842739732, "learning_rate": 6.7894613870958746e-06, "loss": 0.5545, "step": 5665 }, { "epoch": 0.4, "grad_norm": 2.0338614362699827, "learning_rate": 6.788388290385135e-06, "loss": 0.5998, "step": 5666 }, { "epoch": 0.4, "grad_norm": 1.555367359286219, "learning_rate": 6.7873150992056656e-06, "loss": 0.5793, "step": 5667 }, { "epoch": 0.4, "grad_norm": 1.8047183803600875, "learning_rate": 6.786241813614156e-06, "loss": 0.4846, "step": 5668 }, { "epoch": 0.4, "grad_norm": 1.8413359563596212, "learning_rate": 6.785168433667302e-06, "loss": 0.5069, "step": 5669 }, { "epoch": 0.4, "grad_norm": 1.550149455290965, "learning_rate": 6.784094959421802e-06, "loss": 0.5626, "step": 5670 }, { "epoch": 0.4, "grad_norm": 1.4433808141021034, "learning_rate": 6.783021390934361e-06, "loss": 0.4468, "step": 5671 }, { "epoch": 0.4, "grad_norm": 0.7575777218678642, "learning_rate": 6.781947728261687e-06, "loss": 0.4616, "step": 5672 }, { "epoch": 0.4, "grad_norm": 1.5836416875934447, "learning_rate": 6.780873971460499e-06, "loss": 0.5076, "step": 5673 }, { "epoch": 0.4, "grad_norm": 1.718955560214106, "learning_rate": 6.779800120587511e-06, "loss": 0.5346, "step": 5674 }, { "epoch": 0.4, "grad_norm": 1.7507806051631802, "learning_rate": 6.778726175699451e-06, "loss": 0.5368, "step": 5675 }, { "epoch": 0.4, "grad_norm": 2.6249175609672846, "learning_rate": 6.7776521368530455e-06, "loss": 0.5365, "step": 5676 }, { "epoch": 0.4, "grad_norm": 1.6974375994840956, "learning_rate": 6.776578004105032e-06, "loss": 0.4653, "step": 5677 }, { "epoch": 0.4, "grad_norm": 1.9163559566381556, "learning_rate": 6.775503777512149e-06, "loss": 0.5867, "step": 5678 }, { "epoch": 0.4, "grad_norm": 1.6165440078703628, "learning_rate": 6.774429457131139e-06, "loss": 0.5397, "step": 5679 }, { "epoch": 0.4, "grad_norm": 3.0008870392777984, "learning_rate": 6.773355043018753e-06, "loss": 0.5501, "step": 5680 }, { "epoch": 0.4, "grad_norm": 1.5696745764839155, "learning_rate": 6.7722805352317446e-06, "loss": 0.5793, "step": 5681 }, { "epoch": 0.4, "grad_norm": 0.7715710307914138, "learning_rate": 6.771205933826874e-06, "loss": 0.4428, "step": 5682 }, { "epoch": 0.4, "grad_norm": 0.8051298144770578, "learning_rate": 6.770131238860903e-06, "loss": 0.4615, "step": 5683 }, { "epoch": 0.4, "grad_norm": 1.573780428461699, "learning_rate": 6.769056450390603e-06, "loss": 0.5455, "step": 5684 }, { "epoch": 0.4, "grad_norm": 0.8328773318504669, "learning_rate": 6.7679815684727455e-06, "loss": 0.4601, "step": 5685 }, { "epoch": 0.4, "grad_norm": 1.5050988930162001, "learning_rate": 6.766906593164111e-06, "loss": 0.5287, "step": 5686 }, { "epoch": 0.4, "grad_norm": 1.6102258322150869, "learning_rate": 6.765831524521484e-06, "loss": 0.5765, "step": 5687 }, { "epoch": 0.4, "grad_norm": 1.691210294969553, "learning_rate": 6.7647563626016525e-06, "loss": 0.4971, "step": 5688 }, { "epoch": 0.4, "grad_norm": 1.4711216302559886, "learning_rate": 6.76368110746141e-06, "loss": 0.5253, "step": 5689 }, { "epoch": 0.4, "grad_norm": 1.7436965116512015, "learning_rate": 6.762605759157555e-06, "loss": 0.537, "step": 5690 }, { "epoch": 0.4, "grad_norm": 1.6577416075556308, "learning_rate": 6.761530317746892e-06, "loss": 0.604, "step": 5691 }, { "epoch": 0.4, "grad_norm": 1.932992900789687, "learning_rate": 6.7604547832862276e-06, "loss": 0.5117, "step": 5692 }, { "epoch": 0.4, "grad_norm": 0.8142297446367894, "learning_rate": 6.759379155832377e-06, "loss": 0.469, "step": 5693 }, { "epoch": 0.4, "grad_norm": 2.145549117706357, "learning_rate": 6.758303435442156e-06, "loss": 0.589, "step": 5694 }, { "epoch": 0.4, "grad_norm": 1.7448035989312431, "learning_rate": 6.757227622172392e-06, "loss": 0.5123, "step": 5695 }, { "epoch": 0.4, "grad_norm": 1.6041367449204575, "learning_rate": 6.7561517160799095e-06, "loss": 0.5434, "step": 5696 }, { "epoch": 0.4, "grad_norm": 1.5159738046616373, "learning_rate": 6.755075717221544e-06, "loss": 0.5296, "step": 5697 }, { "epoch": 0.4, "grad_norm": 1.6545479522505093, "learning_rate": 6.75399962565413e-06, "loss": 0.5924, "step": 5698 }, { "epoch": 0.4, "grad_norm": 1.6165168522602136, "learning_rate": 6.752923441434514e-06, "loss": 0.501, "step": 5699 }, { "epoch": 0.4, "grad_norm": 1.77212819530055, "learning_rate": 6.751847164619543e-06, "loss": 0.5602, "step": 5700 }, { "epoch": 0.4, "grad_norm": 1.697554077463077, "learning_rate": 6.7507707952660665e-06, "loss": 0.5596, "step": 5701 }, { "epoch": 0.4, "grad_norm": 2.2542574726760134, "learning_rate": 6.749694333430944e-06, "loss": 0.4851, "step": 5702 }, { "epoch": 0.4, "grad_norm": 1.6885383612703997, "learning_rate": 6.748617779171041e-06, "loss": 0.5111, "step": 5703 }, { "epoch": 0.4, "grad_norm": 1.574852551048577, "learning_rate": 6.747541132543218e-06, "loss": 0.4878, "step": 5704 }, { "epoch": 0.4, "grad_norm": 1.5277930125887604, "learning_rate": 6.746464393604354e-06, "loss": 0.4939, "step": 5705 }, { "epoch": 0.4, "grad_norm": 2.0976845936789403, "learning_rate": 6.745387562411321e-06, "loss": 0.5536, "step": 5706 }, { "epoch": 0.4, "grad_norm": 1.6130580734348265, "learning_rate": 6.744310639021003e-06, "loss": 0.5271, "step": 5707 }, { "epoch": 0.41, "grad_norm": 2.01339669451809, "learning_rate": 6.743233623490287e-06, "loss": 0.5094, "step": 5708 }, { "epoch": 0.41, "grad_norm": 0.7905027224675524, "learning_rate": 6.742156515876064e-06, "loss": 0.4653, "step": 5709 }, { "epoch": 0.41, "grad_norm": 1.5905269142101468, "learning_rate": 6.741079316235231e-06, "loss": 0.4987, "step": 5710 }, { "epoch": 0.41, "grad_norm": 1.6846557278146237, "learning_rate": 6.740002024624688e-06, "loss": 0.5437, "step": 5711 }, { "epoch": 0.41, "grad_norm": 1.5367251246347433, "learning_rate": 6.738924641101343e-06, "loss": 0.5246, "step": 5712 }, { "epoch": 0.41, "grad_norm": 2.0674745172192943, "learning_rate": 6.737847165722107e-06, "loss": 0.5427, "step": 5713 }, { "epoch": 0.41, "grad_norm": 0.805614223440847, "learning_rate": 6.736769598543894e-06, "loss": 0.4449, "step": 5714 }, { "epoch": 0.41, "grad_norm": 1.9063365055437047, "learning_rate": 6.735691939623626e-06, "loss": 0.5054, "step": 5715 }, { "epoch": 0.41, "grad_norm": 1.7558614145447742, "learning_rate": 6.734614189018227e-06, "loss": 0.5232, "step": 5716 }, { "epoch": 0.41, "grad_norm": 1.5348754071985198, "learning_rate": 6.733536346784631e-06, "loss": 0.5174, "step": 5717 }, { "epoch": 0.41, "grad_norm": 3.144195997605208, "learning_rate": 6.73245841297977e-06, "loss": 0.5206, "step": 5718 }, { "epoch": 0.41, "grad_norm": 2.151952167554706, "learning_rate": 6.7313803876605855e-06, "loss": 0.5762, "step": 5719 }, { "epoch": 0.41, "grad_norm": 1.7179519751478105, "learning_rate": 6.730302270884023e-06, "loss": 0.5357, "step": 5720 }, { "epoch": 0.41, "grad_norm": 3.949699126821526, "learning_rate": 6.7292240627070295e-06, "loss": 0.5702, "step": 5721 }, { "epoch": 0.41, "grad_norm": 1.629099399272092, "learning_rate": 6.7281457631865625e-06, "loss": 0.5637, "step": 5722 }, { "epoch": 0.41, "grad_norm": 1.6794508241025639, "learning_rate": 6.727067372379581e-06, "loss": 0.4718, "step": 5723 }, { "epoch": 0.41, "grad_norm": 1.5956770952264652, "learning_rate": 6.725988890343048e-06, "loss": 0.5324, "step": 5724 }, { "epoch": 0.41, "grad_norm": 0.7235190206520445, "learning_rate": 6.724910317133934e-06, "loss": 0.4269, "step": 5725 }, { "epoch": 0.41, "grad_norm": 1.4676282942084307, "learning_rate": 6.723831652809213e-06, "loss": 0.5089, "step": 5726 }, { "epoch": 0.41, "grad_norm": 1.4863073203782458, "learning_rate": 6.722752897425863e-06, "loss": 0.5181, "step": 5727 }, { "epoch": 0.41, "grad_norm": 1.8540395135905683, "learning_rate": 6.7216740510408655e-06, "loss": 0.6063, "step": 5728 }, { "epoch": 0.41, "grad_norm": 1.7691641916086094, "learning_rate": 6.720595113711212e-06, "loss": 0.5571, "step": 5729 }, { "epoch": 0.41, "grad_norm": 1.6282863440208029, "learning_rate": 6.719516085493894e-06, "loss": 0.6451, "step": 5730 }, { "epoch": 0.41, "grad_norm": 1.8028905669978532, "learning_rate": 6.718436966445911e-06, "loss": 0.6336, "step": 5731 }, { "epoch": 0.41, "grad_norm": 2.1701400717198314, "learning_rate": 6.717357756624263e-06, "loss": 0.6002, "step": 5732 }, { "epoch": 0.41, "grad_norm": 1.6370849216670216, "learning_rate": 6.7162784560859605e-06, "loss": 0.488, "step": 5733 }, { "epoch": 0.41, "grad_norm": 1.618042266572347, "learning_rate": 6.715199064888014e-06, "loss": 0.5585, "step": 5734 }, { "epoch": 0.41, "grad_norm": 1.7973475946349688, "learning_rate": 6.714119583087442e-06, "loss": 0.5748, "step": 5735 }, { "epoch": 0.41, "grad_norm": 1.5830779001178317, "learning_rate": 6.713040010741263e-06, "loss": 0.5482, "step": 5736 }, { "epoch": 0.41, "grad_norm": 0.8175832517242283, "learning_rate": 6.711960347906506e-06, "loss": 0.4546, "step": 5737 }, { "epoch": 0.41, "grad_norm": 1.925210434822727, "learning_rate": 6.7108805946402045e-06, "loss": 0.5221, "step": 5738 }, { "epoch": 0.41, "grad_norm": 2.199953464379688, "learning_rate": 6.709800750999392e-06, "loss": 0.6205, "step": 5739 }, { "epoch": 0.41, "grad_norm": 0.7266441741636346, "learning_rate": 6.70872081704111e-06, "loss": 0.43, "step": 5740 }, { "epoch": 0.41, "grad_norm": 1.98567652805149, "learning_rate": 6.707640792822405e-06, "loss": 0.5494, "step": 5741 }, { "epoch": 0.41, "grad_norm": 1.697844368211131, "learning_rate": 6.706560678400327e-06, "loss": 0.5877, "step": 5742 }, { "epoch": 0.41, "grad_norm": 1.7958680893123848, "learning_rate": 6.705480473831931e-06, "loss": 0.5337, "step": 5743 }, { "epoch": 0.41, "grad_norm": 1.8383336017791638, "learning_rate": 6.704400179174278e-06, "loss": 0.5383, "step": 5744 }, { "epoch": 0.41, "grad_norm": 1.5536424043182566, "learning_rate": 6.703319794484431e-06, "loss": 0.5143, "step": 5745 }, { "epoch": 0.41, "grad_norm": 1.52143293287606, "learning_rate": 6.702239319819462e-06, "loss": 0.5935, "step": 5746 }, { "epoch": 0.41, "grad_norm": 1.6518236420419183, "learning_rate": 6.701158755236443e-06, "loss": 0.5012, "step": 5747 }, { "epoch": 0.41, "grad_norm": 2.0331026921423443, "learning_rate": 6.700078100792456e-06, "loss": 0.5662, "step": 5748 }, { "epoch": 0.41, "grad_norm": 1.592362983700345, "learning_rate": 6.698997356544582e-06, "loss": 0.5768, "step": 5749 }, { "epoch": 0.41, "grad_norm": 1.3722368487055976, "learning_rate": 6.697916522549911e-06, "loss": 0.5506, "step": 5750 }, { "epoch": 0.41, "grad_norm": 1.7940171439446042, "learning_rate": 6.696835598865535e-06, "loss": 0.5959, "step": 5751 }, { "epoch": 0.41, "grad_norm": 1.6868269841295558, "learning_rate": 6.695754585548554e-06, "loss": 0.4601, "step": 5752 }, { "epoch": 0.41, "grad_norm": 1.5186106905035524, "learning_rate": 6.694673482656069e-06, "loss": 0.5474, "step": 5753 }, { "epoch": 0.41, "grad_norm": 1.6519051152536548, "learning_rate": 6.69359229024519e-06, "loss": 0.5828, "step": 5754 }, { "epoch": 0.41, "grad_norm": 17.220139531461516, "learning_rate": 6.692511008373026e-06, "loss": 0.6098, "step": 5755 }, { "epoch": 0.41, "grad_norm": 1.7882914488742574, "learning_rate": 6.6914296370966946e-06, "loss": 0.5359, "step": 5756 }, { "epoch": 0.41, "grad_norm": 1.7018547298903768, "learning_rate": 6.69034817647332e-06, "loss": 0.5607, "step": 5757 }, { "epoch": 0.41, "grad_norm": 1.6300845373513686, "learning_rate": 6.689266626560027e-06, "loss": 0.4767, "step": 5758 }, { "epoch": 0.41, "grad_norm": 1.6593647554783333, "learning_rate": 6.688184987413946e-06, "loss": 0.5007, "step": 5759 }, { "epoch": 0.41, "grad_norm": 1.70715391962162, "learning_rate": 6.687103259092214e-06, "loss": 0.5641, "step": 5760 }, { "epoch": 0.41, "grad_norm": 1.489071048716644, "learning_rate": 6.686021441651972e-06, "loss": 0.499, "step": 5761 }, { "epoch": 0.41, "grad_norm": 0.8570617378494868, "learning_rate": 6.684939535150363e-06, "loss": 0.4424, "step": 5762 }, { "epoch": 0.41, "grad_norm": 1.5602427679855386, "learning_rate": 6.68385753964454e-06, "loss": 0.4831, "step": 5763 }, { "epoch": 0.41, "grad_norm": 0.7482368615049606, "learning_rate": 6.682775455191655e-06, "loss": 0.453, "step": 5764 }, { "epoch": 0.41, "grad_norm": 1.723528573881567, "learning_rate": 6.681693281848869e-06, "loss": 0.5407, "step": 5765 }, { "epoch": 0.41, "grad_norm": 2.6196031432757465, "learning_rate": 6.680611019673344e-06, "loss": 0.5087, "step": 5766 }, { "epoch": 0.41, "grad_norm": 2.306307738471365, "learning_rate": 6.679528668722252e-06, "loss": 0.5362, "step": 5767 }, { "epoch": 0.41, "grad_norm": 1.6948068804369243, "learning_rate": 6.678446229052763e-06, "loss": 0.5271, "step": 5768 }, { "epoch": 0.41, "grad_norm": 2.2205973624194906, "learning_rate": 6.677363700722059e-06, "loss": 0.5506, "step": 5769 }, { "epoch": 0.41, "grad_norm": 1.537629822859046, "learning_rate": 6.67628108378732e-06, "loss": 0.5332, "step": 5770 }, { "epoch": 0.41, "grad_norm": 1.4724493193732056, "learning_rate": 6.675198378305734e-06, "loss": 0.4992, "step": 5771 }, { "epoch": 0.41, "grad_norm": 1.4876039510398744, "learning_rate": 6.674115584334492e-06, "loss": 0.5106, "step": 5772 }, { "epoch": 0.41, "grad_norm": 1.6734011657560028, "learning_rate": 6.673032701930793e-06, "loss": 0.558, "step": 5773 }, { "epoch": 0.41, "grad_norm": 1.595548803551656, "learning_rate": 6.671949731151836e-06, "loss": 0.5845, "step": 5774 }, { "epoch": 0.41, "grad_norm": 2.112351022890642, "learning_rate": 6.670866672054832e-06, "loss": 0.5575, "step": 5775 }, { "epoch": 0.41, "grad_norm": 0.7433940607804329, "learning_rate": 6.669783524696988e-06, "loss": 0.484, "step": 5776 }, { "epoch": 0.41, "grad_norm": 2.0134425174265953, "learning_rate": 6.668700289135519e-06, "loss": 0.5895, "step": 5777 }, { "epoch": 0.41, "grad_norm": 2.0297625476034855, "learning_rate": 6.667616965427648e-06, "loss": 0.4949, "step": 5778 }, { "epoch": 0.41, "grad_norm": 0.7106069278250118, "learning_rate": 6.666533553630596e-06, "loss": 0.4579, "step": 5779 }, { "epoch": 0.41, "grad_norm": 1.8146723135882608, "learning_rate": 6.665450053801596e-06, "loss": 0.5492, "step": 5780 }, { "epoch": 0.41, "grad_norm": 1.8171200575549025, "learning_rate": 6.664366465997881e-06, "loss": 0.5904, "step": 5781 }, { "epoch": 0.41, "grad_norm": 1.5678235917074674, "learning_rate": 6.663282790276689e-06, "loss": 0.4849, "step": 5782 }, { "epoch": 0.41, "grad_norm": 1.5050471351965762, "learning_rate": 6.662199026695264e-06, "loss": 0.4832, "step": 5783 }, { "epoch": 0.41, "grad_norm": 1.6138005637202333, "learning_rate": 6.661115175310856e-06, "loss": 0.5561, "step": 5784 }, { "epoch": 0.41, "grad_norm": 2.327840322474769, "learning_rate": 6.660031236180714e-06, "loss": 0.541, "step": 5785 }, { "epoch": 0.41, "grad_norm": 1.8569371961890748, "learning_rate": 6.658947209362098e-06, "loss": 0.4884, "step": 5786 }, { "epoch": 0.41, "grad_norm": 1.8688859563962748, "learning_rate": 6.657863094912268e-06, "loss": 0.5406, "step": 5787 }, { "epoch": 0.41, "grad_norm": 1.6048391504978559, "learning_rate": 6.656778892888492e-06, "loss": 0.5745, "step": 5788 }, { "epoch": 0.41, "grad_norm": 1.9897227982079615, "learning_rate": 6.655694603348042e-06, "loss": 0.5137, "step": 5789 }, { "epoch": 0.41, "grad_norm": 1.7871049057054125, "learning_rate": 6.65461022634819e-06, "loss": 0.5713, "step": 5790 }, { "epoch": 0.41, "grad_norm": 1.6563575605708936, "learning_rate": 6.6535257619462235e-06, "loss": 0.4688, "step": 5791 }, { "epoch": 0.41, "grad_norm": 2.1114938412394264, "learning_rate": 6.652441210199421e-06, "loss": 0.6042, "step": 5792 }, { "epoch": 0.41, "grad_norm": 1.6606467203691937, "learning_rate": 6.651356571165075e-06, "loss": 0.5664, "step": 5793 }, { "epoch": 0.41, "grad_norm": 1.640992918396469, "learning_rate": 6.650271844900479e-06, "loss": 0.4725, "step": 5794 }, { "epoch": 0.41, "grad_norm": 1.939534503436713, "learning_rate": 6.649187031462932e-06, "loss": 0.5927, "step": 5795 }, { "epoch": 0.41, "grad_norm": 3.027831462441318, "learning_rate": 6.648102130909739e-06, "loss": 0.6497, "step": 5796 }, { "epoch": 0.41, "grad_norm": 1.7887985959466541, "learning_rate": 6.647017143298205e-06, "loss": 0.6044, "step": 5797 }, { "epoch": 0.41, "grad_norm": 2.1472573308679497, "learning_rate": 6.645932068685645e-06, "loss": 0.5699, "step": 5798 }, { "epoch": 0.41, "grad_norm": 2.2014938565627316, "learning_rate": 6.644846907129378e-06, "loss": 0.4672, "step": 5799 }, { "epoch": 0.41, "grad_norm": 1.489588528129715, "learning_rate": 6.643761658686723e-06, "loss": 0.5382, "step": 5800 }, { "epoch": 0.41, "grad_norm": 2.1872491566097336, "learning_rate": 6.642676323415007e-06, "loss": 0.5429, "step": 5801 }, { "epoch": 0.41, "grad_norm": 1.4858081630269468, "learning_rate": 6.64159090137156e-06, "loss": 0.5103, "step": 5802 }, { "epoch": 0.41, "grad_norm": 1.6646291862997982, "learning_rate": 6.6405053926137205e-06, "loss": 0.5529, "step": 5803 }, { "epoch": 0.41, "grad_norm": 2.7234757898647746, "learning_rate": 6.6394197971988275e-06, "loss": 0.5163, "step": 5804 }, { "epoch": 0.41, "grad_norm": 1.7045573649277839, "learning_rate": 6.638334115184225e-06, "loss": 0.5393, "step": 5805 }, { "epoch": 0.41, "grad_norm": 1.59100620795315, "learning_rate": 6.637248346627264e-06, "loss": 0.5761, "step": 5806 }, { "epoch": 0.41, "grad_norm": 1.65334459440667, "learning_rate": 6.636162491585298e-06, "loss": 0.5945, "step": 5807 }, { "epoch": 0.41, "grad_norm": 1.8939127780484482, "learning_rate": 6.6350765501156844e-06, "loss": 0.5611, "step": 5808 }, { "epoch": 0.41, "grad_norm": 1.8084174524423957, "learning_rate": 6.633990522275786e-06, "loss": 0.5581, "step": 5809 }, { "epoch": 0.41, "grad_norm": 3.255443769800256, "learning_rate": 6.6329044081229735e-06, "loss": 0.5735, "step": 5810 }, { "epoch": 0.41, "grad_norm": 1.5527562812405404, "learning_rate": 6.631818207714614e-06, "loss": 0.6108, "step": 5811 }, { "epoch": 0.41, "grad_norm": 1.8891091345518953, "learning_rate": 6.6307319211080915e-06, "loss": 0.524, "step": 5812 }, { "epoch": 0.41, "grad_norm": 0.8597358763544644, "learning_rate": 6.629645548360781e-06, "loss": 0.4474, "step": 5813 }, { "epoch": 0.41, "grad_norm": 1.590928097655433, "learning_rate": 6.628559089530071e-06, "loss": 0.5393, "step": 5814 }, { "epoch": 0.41, "grad_norm": 0.8598403080943261, "learning_rate": 6.62747254467335e-06, "loss": 0.4767, "step": 5815 }, { "epoch": 0.41, "grad_norm": 1.8482831175420178, "learning_rate": 6.626385913848017e-06, "loss": 0.5926, "step": 5816 }, { "epoch": 0.41, "grad_norm": 1.4200643022178265, "learning_rate": 6.625299197111468e-06, "loss": 0.4558, "step": 5817 }, { "epoch": 0.41, "grad_norm": 2.5643562092642345, "learning_rate": 6.624212394521108e-06, "loss": 0.5199, "step": 5818 }, { "epoch": 0.41, "grad_norm": 1.5958436966698553, "learning_rate": 6.6231255061343455e-06, "loss": 0.5348, "step": 5819 }, { "epoch": 0.41, "grad_norm": 1.514106329736402, "learning_rate": 6.622038532008595e-06, "loss": 0.5767, "step": 5820 }, { "epoch": 0.41, "grad_norm": 1.5610319558934846, "learning_rate": 6.620951472201273e-06, "loss": 0.575, "step": 5821 }, { "epoch": 0.41, "grad_norm": 1.837319649847454, "learning_rate": 6.619864326769801e-06, "loss": 0.5196, "step": 5822 }, { "epoch": 0.41, "grad_norm": 1.9556621972093151, "learning_rate": 6.618777095771607e-06, "loss": 0.5694, "step": 5823 }, { "epoch": 0.41, "grad_norm": 1.616852612292827, "learning_rate": 6.617689779264121e-06, "loss": 0.5851, "step": 5824 }, { "epoch": 0.41, "grad_norm": 1.4980949960208676, "learning_rate": 6.6166023773047785e-06, "loss": 0.5159, "step": 5825 }, { "epoch": 0.41, "grad_norm": 6.318601235617634, "learning_rate": 6.615514889951021e-06, "loss": 0.5464, "step": 5826 }, { "epoch": 0.41, "grad_norm": 2.3174452650312207, "learning_rate": 6.6144273172602945e-06, "loss": 0.5894, "step": 5827 }, { "epoch": 0.41, "grad_norm": 2.144094295253536, "learning_rate": 6.613339659290046e-06, "loss": 0.5694, "step": 5828 }, { "epoch": 0.41, "grad_norm": 1.919415252851093, "learning_rate": 6.61225191609773e-06, "loss": 0.5155, "step": 5829 }, { "epoch": 0.41, "grad_norm": 1.594499950881667, "learning_rate": 6.611164087740803e-06, "loss": 0.5214, "step": 5830 }, { "epoch": 0.41, "grad_norm": 1.6892905245560006, "learning_rate": 6.610076174276731e-06, "loss": 0.5474, "step": 5831 }, { "epoch": 0.41, "grad_norm": 1.5805599373524648, "learning_rate": 6.608988175762981e-06, "loss": 0.5949, "step": 5832 }, { "epoch": 0.41, "grad_norm": 1.6184606368302594, "learning_rate": 6.607900092257021e-06, "loss": 0.5287, "step": 5833 }, { "epoch": 0.41, "grad_norm": 1.8338095688541032, "learning_rate": 6.606811923816331e-06, "loss": 0.5692, "step": 5834 }, { "epoch": 0.41, "grad_norm": 1.7563597841212846, "learning_rate": 6.6057236704983915e-06, "loss": 0.5232, "step": 5835 }, { "epoch": 0.41, "grad_norm": 1.3992834644711782, "learning_rate": 6.6046353323606845e-06, "loss": 0.521, "step": 5836 }, { "epoch": 0.41, "grad_norm": 0.7724057314054366, "learning_rate": 6.603546909460704e-06, "loss": 0.4573, "step": 5837 }, { "epoch": 0.41, "grad_norm": 1.9577311329599112, "learning_rate": 6.602458401855942e-06, "loss": 0.5167, "step": 5838 }, { "epoch": 0.41, "grad_norm": 2.0186379592615666, "learning_rate": 6.601369809603897e-06, "loss": 0.5243, "step": 5839 }, { "epoch": 0.41, "grad_norm": 1.4578422557147754, "learning_rate": 6.600281132762072e-06, "loss": 0.5176, "step": 5840 }, { "epoch": 0.41, "grad_norm": 1.6732457955302167, "learning_rate": 6.599192371387975e-06, "loss": 0.531, "step": 5841 }, { "epoch": 0.41, "grad_norm": 1.767581631015389, "learning_rate": 6.598103525539119e-06, "loss": 0.5482, "step": 5842 }, { "epoch": 0.41, "grad_norm": 1.6960501511413577, "learning_rate": 6.597014595273019e-06, "loss": 0.5367, "step": 5843 }, { "epoch": 0.41, "grad_norm": 2.060741112499225, "learning_rate": 6.595925580647197e-06, "loss": 0.6372, "step": 5844 }, { "epoch": 0.41, "grad_norm": 1.4729163491576367, "learning_rate": 6.594836481719179e-06, "loss": 0.5636, "step": 5845 }, { "epoch": 0.41, "grad_norm": 1.6451339460813295, "learning_rate": 6.593747298546493e-06, "loss": 0.5496, "step": 5846 }, { "epoch": 0.41, "grad_norm": 1.5021800487458752, "learning_rate": 6.592658031186675e-06, "loss": 0.5012, "step": 5847 }, { "epoch": 0.41, "grad_norm": 1.498489538791036, "learning_rate": 6.591568679697262e-06, "loss": 0.5169, "step": 5848 }, { "epoch": 0.42, "grad_norm": 1.8280574222427548, "learning_rate": 6.590479244135799e-06, "loss": 0.5591, "step": 5849 }, { "epoch": 0.42, "grad_norm": 1.5006993796623003, "learning_rate": 6.589389724559834e-06, "loss": 0.5256, "step": 5850 }, { "epoch": 0.42, "grad_norm": 1.8584691369104496, "learning_rate": 6.588300121026916e-06, "loss": 0.5904, "step": 5851 }, { "epoch": 0.42, "grad_norm": 1.5129653397107585, "learning_rate": 6.5872104335946055e-06, "loss": 0.4931, "step": 5852 }, { "epoch": 0.42, "grad_norm": 1.6003486391258779, "learning_rate": 6.586120662320461e-06, "loss": 0.4777, "step": 5853 }, { "epoch": 0.42, "grad_norm": 1.90456694502086, "learning_rate": 6.585030807262048e-06, "loss": 0.5859, "step": 5854 }, { "epoch": 0.42, "grad_norm": 1.6676159434966487, "learning_rate": 6.5839408684769365e-06, "loss": 0.5708, "step": 5855 }, { "epoch": 0.42, "grad_norm": 1.5249454621712055, "learning_rate": 6.582850846022703e-06, "loss": 0.5564, "step": 5856 }, { "epoch": 0.42, "grad_norm": 1.721881061115242, "learning_rate": 6.581760739956924e-06, "loss": 0.5733, "step": 5857 }, { "epoch": 0.42, "grad_norm": 1.6009569827800252, "learning_rate": 6.580670550337182e-06, "loss": 0.4962, "step": 5858 }, { "epoch": 0.42, "grad_norm": 1.8335563834269406, "learning_rate": 6.579580277221067e-06, "loss": 0.6091, "step": 5859 }, { "epoch": 0.42, "grad_norm": 1.6165275464339568, "learning_rate": 6.578489920666168e-06, "loss": 0.536, "step": 5860 }, { "epoch": 0.42, "grad_norm": 0.763836441011334, "learning_rate": 6.5773994807300845e-06, "loss": 0.4207, "step": 5861 }, { "epoch": 0.42, "grad_norm": 1.8510295611923626, "learning_rate": 6.576308957470414e-06, "loss": 0.5294, "step": 5862 }, { "epoch": 0.42, "grad_norm": 2.7844298885539325, "learning_rate": 6.575218350944763e-06, "loss": 0.4971, "step": 5863 }, { "epoch": 0.42, "grad_norm": 1.6406743511317718, "learning_rate": 6.574127661210741e-06, "loss": 0.5951, "step": 5864 }, { "epoch": 0.42, "grad_norm": 1.9972307078304927, "learning_rate": 6.573036888325963e-06, "loss": 0.5772, "step": 5865 }, { "epoch": 0.42, "grad_norm": 2.0482434941905243, "learning_rate": 6.571946032348046e-06, "loss": 0.5391, "step": 5866 }, { "epoch": 0.42, "grad_norm": 1.6305100511765362, "learning_rate": 6.570855093334614e-06, "loss": 0.5649, "step": 5867 }, { "epoch": 0.42, "grad_norm": 2.3945410403814242, "learning_rate": 6.5697640713432906e-06, "loss": 0.5004, "step": 5868 }, { "epoch": 0.42, "grad_norm": 1.417007383330024, "learning_rate": 6.568672966431711e-06, "loss": 0.4867, "step": 5869 }, { "epoch": 0.42, "grad_norm": 1.6052677574956207, "learning_rate": 6.56758177865751e-06, "loss": 0.6394, "step": 5870 }, { "epoch": 0.42, "grad_norm": 1.543452467672601, "learning_rate": 6.566490508078328e-06, "loss": 0.531, "step": 5871 }, { "epoch": 0.42, "grad_norm": 1.950585766806838, "learning_rate": 6.56539915475181e-06, "loss": 0.5164, "step": 5872 }, { "epoch": 0.42, "grad_norm": 1.4275478282152232, "learning_rate": 6.564307718735604e-06, "loss": 0.5028, "step": 5873 }, { "epoch": 0.42, "grad_norm": 1.5625520150603835, "learning_rate": 6.563216200087364e-06, "loss": 0.4954, "step": 5874 }, { "epoch": 0.42, "grad_norm": 1.6775833577708843, "learning_rate": 6.562124598864748e-06, "loss": 0.6345, "step": 5875 }, { "epoch": 0.42, "grad_norm": 1.7764478091407114, "learning_rate": 6.561032915125416e-06, "loss": 0.5393, "step": 5876 }, { "epoch": 0.42, "grad_norm": 1.5300578567796672, "learning_rate": 6.559941148927035e-06, "loss": 0.5924, "step": 5877 }, { "epoch": 0.42, "grad_norm": 1.6533778929847478, "learning_rate": 6.5588493003272794e-06, "loss": 0.569, "step": 5878 }, { "epoch": 0.42, "grad_norm": 1.6081161954902436, "learning_rate": 6.557757369383819e-06, "loss": 0.5254, "step": 5879 }, { "epoch": 0.42, "grad_norm": 1.679702874401066, "learning_rate": 6.556665356154338e-06, "loss": 0.5074, "step": 5880 }, { "epoch": 0.42, "grad_norm": 1.7384737418466396, "learning_rate": 6.555573260696517e-06, "loss": 0.5744, "step": 5881 }, { "epoch": 0.42, "grad_norm": 1.6065862374810773, "learning_rate": 6.554481083068047e-06, "loss": 0.5422, "step": 5882 }, { "epoch": 0.42, "grad_norm": 1.6710795018254225, "learning_rate": 6.553388823326617e-06, "loss": 0.6401, "step": 5883 }, { "epoch": 0.42, "grad_norm": 1.684213221474878, "learning_rate": 6.552296481529927e-06, "loss": 0.5538, "step": 5884 }, { "epoch": 0.42, "grad_norm": 8.780768900918462, "learning_rate": 6.551204057735676e-06, "loss": 0.5385, "step": 5885 }, { "epoch": 0.42, "grad_norm": 1.782050817111433, "learning_rate": 6.55011155200157e-06, "loss": 0.5255, "step": 5886 }, { "epoch": 0.42, "grad_norm": 1.5651635346159511, "learning_rate": 6.5490189643853204e-06, "loss": 0.4805, "step": 5887 }, { "epoch": 0.42, "grad_norm": 1.9920381791642292, "learning_rate": 6.547926294944641e-06, "loss": 0.5692, "step": 5888 }, { "epoch": 0.42, "grad_norm": 1.9985165058227028, "learning_rate": 6.546833543737249e-06, "loss": 0.5639, "step": 5889 }, { "epoch": 0.42, "grad_norm": 5.308138930059173, "learning_rate": 6.545740710820867e-06, "loss": 0.4833, "step": 5890 }, { "epoch": 0.42, "grad_norm": 1.6520408835743527, "learning_rate": 6.544647796253223e-06, "loss": 0.5861, "step": 5891 }, { "epoch": 0.42, "grad_norm": 1.5230205505046615, "learning_rate": 6.543554800092049e-06, "loss": 0.526, "step": 5892 }, { "epoch": 0.42, "grad_norm": 1.4137261112697015, "learning_rate": 6.542461722395082e-06, "loss": 0.4702, "step": 5893 }, { "epoch": 0.42, "grad_norm": 1.4328149194260784, "learning_rate": 6.541368563220056e-06, "loss": 0.5226, "step": 5894 }, { "epoch": 0.42, "grad_norm": 1.9595813454904862, "learning_rate": 6.540275322624724e-06, "loss": 0.61, "step": 5895 }, { "epoch": 0.42, "grad_norm": 1.5553627532159, "learning_rate": 6.539182000666828e-06, "loss": 0.5479, "step": 5896 }, { "epoch": 0.42, "grad_norm": 2.073676791399435, "learning_rate": 6.538088597404124e-06, "loss": 0.5596, "step": 5897 }, { "epoch": 0.42, "grad_norm": 2.237645027137355, "learning_rate": 6.536995112894368e-06, "loss": 0.5821, "step": 5898 }, { "epoch": 0.42, "grad_norm": 1.955893069975835, "learning_rate": 6.535901547195322e-06, "loss": 0.5331, "step": 5899 }, { "epoch": 0.42, "grad_norm": 1.8739029115448256, "learning_rate": 6.534807900364751e-06, "loss": 0.6017, "step": 5900 }, { "epoch": 0.42, "grad_norm": 1.6709628465706683, "learning_rate": 6.533714172460427e-06, "loss": 0.5285, "step": 5901 }, { "epoch": 0.42, "grad_norm": 2.209478194424544, "learning_rate": 6.532620363540124e-06, "loss": 0.577, "step": 5902 }, { "epoch": 0.42, "grad_norm": 2.359487017867562, "learning_rate": 6.53152647366162e-06, "loss": 0.5293, "step": 5903 }, { "epoch": 0.42, "grad_norm": 1.5308789261698046, "learning_rate": 6.530432502882699e-06, "loss": 0.4882, "step": 5904 }, { "epoch": 0.42, "grad_norm": 2.397453191396718, "learning_rate": 6.5293384512611455e-06, "loss": 0.5311, "step": 5905 }, { "epoch": 0.42, "grad_norm": 1.869570671392317, "learning_rate": 6.528244318854754e-06, "loss": 0.5529, "step": 5906 }, { "epoch": 0.42, "grad_norm": 1.444901815728329, "learning_rate": 6.5271501057213185e-06, "loss": 0.5014, "step": 5907 }, { "epoch": 0.42, "grad_norm": 1.6260663147828214, "learning_rate": 6.526055811918641e-06, "loss": 0.5441, "step": 5908 }, { "epoch": 0.42, "grad_norm": 0.7028672509522572, "learning_rate": 6.524961437504523e-06, "loss": 0.4617, "step": 5909 }, { "epoch": 0.42, "grad_norm": 1.738166921076007, "learning_rate": 6.523866982536776e-06, "loss": 0.5323, "step": 5910 }, { "epoch": 0.42, "grad_norm": 1.5900624286194096, "learning_rate": 6.52277244707321e-06, "loss": 0.5222, "step": 5911 }, { "epoch": 0.42, "grad_norm": 0.8163872490810836, "learning_rate": 6.521677831171646e-06, "loss": 0.4267, "step": 5912 }, { "epoch": 0.42, "grad_norm": 1.8027939439904435, "learning_rate": 6.5205831348898995e-06, "loss": 0.5573, "step": 5913 }, { "epoch": 0.42, "grad_norm": 1.6593700167849381, "learning_rate": 6.519488358285801e-06, "loss": 0.5363, "step": 5914 }, { "epoch": 0.42, "grad_norm": 1.6383081589122526, "learning_rate": 6.518393501417178e-06, "loss": 0.5566, "step": 5915 }, { "epoch": 0.42, "grad_norm": 1.8303875463301966, "learning_rate": 6.517298564341867e-06, "loss": 0.5522, "step": 5916 }, { "epoch": 0.42, "grad_norm": 1.7389136549573714, "learning_rate": 6.5162035471177035e-06, "loss": 0.5705, "step": 5917 }, { "epoch": 0.42, "grad_norm": 1.855754770762998, "learning_rate": 6.51510844980253e-06, "loss": 0.5392, "step": 5918 }, { "epoch": 0.42, "grad_norm": 2.0480221567265353, "learning_rate": 6.514013272454196e-06, "loss": 0.5825, "step": 5919 }, { "epoch": 0.42, "grad_norm": 1.5080935725586666, "learning_rate": 6.5129180151305495e-06, "loss": 0.6111, "step": 5920 }, { "epoch": 0.42, "grad_norm": 2.293947495216481, "learning_rate": 6.5118226778894465e-06, "loss": 0.5984, "step": 5921 }, { "epoch": 0.42, "grad_norm": 0.8635438813503999, "learning_rate": 6.510727260788747e-06, "loss": 0.4408, "step": 5922 }, { "epoch": 0.42, "grad_norm": 1.645348246440866, "learning_rate": 6.509631763886317e-06, "loss": 0.5985, "step": 5923 }, { "epoch": 0.42, "grad_norm": 1.5713177544585595, "learning_rate": 6.50853618724002e-06, "loss": 0.6187, "step": 5924 }, { "epoch": 0.42, "grad_norm": 1.7817240764026094, "learning_rate": 6.5074405309077305e-06, "loss": 0.5466, "step": 5925 }, { "epoch": 0.42, "grad_norm": 1.5919862255741748, "learning_rate": 6.506344794947324e-06, "loss": 0.5108, "step": 5926 }, { "epoch": 0.42, "grad_norm": 2.512349478548671, "learning_rate": 6.505248979416682e-06, "loss": 0.5816, "step": 5927 }, { "epoch": 0.42, "grad_norm": 1.5184983906760976, "learning_rate": 6.504153084373688e-06, "loss": 0.5013, "step": 5928 }, { "epoch": 0.42, "grad_norm": 1.5715874199727056, "learning_rate": 6.50305710987623e-06, "loss": 0.5045, "step": 5929 }, { "epoch": 0.42, "grad_norm": 2.457520276923852, "learning_rate": 6.501961055982202e-06, "loss": 0.5315, "step": 5930 }, { "epoch": 0.42, "grad_norm": 1.8022393562107695, "learning_rate": 6.500864922749505e-06, "loss": 0.5655, "step": 5931 }, { "epoch": 0.42, "grad_norm": 1.7217025605208927, "learning_rate": 6.499768710236034e-06, "loss": 0.6071, "step": 5932 }, { "epoch": 0.42, "grad_norm": 1.6193105576328, "learning_rate": 6.4986724184996984e-06, "loss": 0.5671, "step": 5933 }, { "epoch": 0.42, "grad_norm": 1.5333290148299747, "learning_rate": 6.49757604759841e-06, "loss": 0.5555, "step": 5934 }, { "epoch": 0.42, "grad_norm": 1.5497840864146137, "learning_rate": 6.496479597590077e-06, "loss": 0.6516, "step": 5935 }, { "epoch": 0.42, "grad_norm": 1.640125888594574, "learning_rate": 6.4953830685326225e-06, "loss": 0.5216, "step": 5936 }, { "epoch": 0.42, "grad_norm": 2.5640860438922024, "learning_rate": 6.494286460483966e-06, "loss": 0.5376, "step": 5937 }, { "epoch": 0.42, "grad_norm": 0.7908045999289842, "learning_rate": 6.493189773502038e-06, "loss": 0.4581, "step": 5938 }, { "epoch": 0.42, "grad_norm": 0.8532859801656744, "learning_rate": 6.492093007644764e-06, "loss": 0.4498, "step": 5939 }, { "epoch": 0.42, "grad_norm": 0.8497514367208415, "learning_rate": 6.490996162970084e-06, "loss": 0.4493, "step": 5940 }, { "epoch": 0.42, "grad_norm": 1.6745980467393213, "learning_rate": 6.489899239535932e-06, "loss": 0.5157, "step": 5941 }, { "epoch": 0.42, "grad_norm": 1.5036305212824783, "learning_rate": 6.488802237400254e-06, "loss": 0.5738, "step": 5942 }, { "epoch": 0.42, "grad_norm": 0.8026634387789531, "learning_rate": 6.487705156620998e-06, "loss": 0.4301, "step": 5943 }, { "epoch": 0.42, "grad_norm": 1.4659958864159832, "learning_rate": 6.486607997256114e-06, "loss": 0.5472, "step": 5944 }, { "epoch": 0.42, "grad_norm": 1.606843741757065, "learning_rate": 6.485510759363558e-06, "loss": 0.6044, "step": 5945 }, { "epoch": 0.42, "grad_norm": 1.6267398090021095, "learning_rate": 6.484413443001292e-06, "loss": 0.4863, "step": 5946 }, { "epoch": 0.42, "grad_norm": 1.454224818949021, "learning_rate": 6.483316048227275e-06, "loss": 0.5441, "step": 5947 }, { "epoch": 0.42, "grad_norm": 1.6579565543114174, "learning_rate": 6.482218575099481e-06, "loss": 0.5053, "step": 5948 }, { "epoch": 0.42, "grad_norm": 0.7525875137360138, "learning_rate": 6.481121023675878e-06, "loss": 0.4485, "step": 5949 }, { "epoch": 0.42, "grad_norm": 1.7427633200258947, "learning_rate": 6.480023394014443e-06, "loss": 0.5541, "step": 5950 }, { "epoch": 0.42, "grad_norm": 1.4199231134223715, "learning_rate": 6.478925686173158e-06, "loss": 0.5146, "step": 5951 }, { "epoch": 0.42, "grad_norm": 1.508635767035549, "learning_rate": 6.4778279002100075e-06, "loss": 0.5548, "step": 5952 }, { "epoch": 0.42, "grad_norm": 1.6757180312735853, "learning_rate": 6.4767300361829814e-06, "loss": 0.4886, "step": 5953 }, { "epoch": 0.42, "grad_norm": 1.5588845703950929, "learning_rate": 6.475632094150068e-06, "loss": 0.5178, "step": 5954 }, { "epoch": 0.42, "grad_norm": 2.516705531946218, "learning_rate": 6.474534074169269e-06, "loss": 0.5253, "step": 5955 }, { "epoch": 0.42, "grad_norm": 1.6518195383974432, "learning_rate": 6.473435976298583e-06, "loss": 0.5285, "step": 5956 }, { "epoch": 0.42, "grad_norm": 0.7626824341594294, "learning_rate": 6.472337800596017e-06, "loss": 0.4354, "step": 5957 }, { "epoch": 0.42, "grad_norm": 1.624042820353851, "learning_rate": 6.471239547119577e-06, "loss": 0.594, "step": 5958 }, { "epoch": 0.42, "grad_norm": 1.4637859970149665, "learning_rate": 6.470141215927281e-06, "loss": 0.5148, "step": 5959 }, { "epoch": 0.42, "grad_norm": 0.7511731471691121, "learning_rate": 6.469042807077144e-06, "loss": 0.4655, "step": 5960 }, { "epoch": 0.42, "grad_norm": 1.4096498393069241, "learning_rate": 6.467944320627188e-06, "loss": 0.4452, "step": 5961 }, { "epoch": 0.42, "grad_norm": 1.4871527081230382, "learning_rate": 6.466845756635439e-06, "loss": 0.5684, "step": 5962 }, { "epoch": 0.42, "grad_norm": 2.2584959584341315, "learning_rate": 6.465747115159927e-06, "loss": 0.6267, "step": 5963 }, { "epoch": 0.42, "grad_norm": 2.035223220833597, "learning_rate": 6.464648396258686e-06, "loss": 0.5675, "step": 5964 }, { "epoch": 0.42, "grad_norm": 1.8035887043566976, "learning_rate": 6.463549599989752e-06, "loss": 0.5917, "step": 5965 }, { "epoch": 0.42, "grad_norm": 1.5035927735922108, "learning_rate": 6.46245072641117e-06, "loss": 0.5117, "step": 5966 }, { "epoch": 0.42, "grad_norm": 1.600201071877732, "learning_rate": 6.461351775580986e-06, "loss": 0.6081, "step": 5967 }, { "epoch": 0.42, "grad_norm": 1.52042786557807, "learning_rate": 6.46025274755725e-06, "loss": 0.5214, "step": 5968 }, { "epoch": 0.42, "grad_norm": 1.8002064114917573, "learning_rate": 6.459153642398014e-06, "loss": 0.5806, "step": 5969 }, { "epoch": 0.42, "grad_norm": 1.863842937185797, "learning_rate": 6.458054460161339e-06, "loss": 0.5093, "step": 5970 }, { "epoch": 0.42, "grad_norm": 1.7742606767883655, "learning_rate": 6.456955200905287e-06, "loss": 0.5648, "step": 5971 }, { "epoch": 0.42, "grad_norm": 2.037778014924978, "learning_rate": 6.455855864687924e-06, "loss": 0.5612, "step": 5972 }, { "epoch": 0.42, "grad_norm": 1.8151945670938043, "learning_rate": 6.454756451567321e-06, "loss": 0.5197, "step": 5973 }, { "epoch": 0.42, "grad_norm": 0.7798079708825238, "learning_rate": 6.453656961601554e-06, "loss": 0.4347, "step": 5974 }, { "epoch": 0.42, "grad_norm": 1.9594605050070328, "learning_rate": 6.4525573948487e-06, "loss": 0.6037, "step": 5975 }, { "epoch": 0.42, "grad_norm": 2.4363698310556376, "learning_rate": 6.451457751366843e-06, "loss": 0.5556, "step": 5976 }, { "epoch": 0.42, "grad_norm": 1.5437835770149808, "learning_rate": 6.450358031214069e-06, "loss": 0.5531, "step": 5977 }, { "epoch": 0.42, "grad_norm": 1.5400375017935015, "learning_rate": 6.44925823444847e-06, "loss": 0.5449, "step": 5978 }, { "epoch": 0.42, "grad_norm": 2.1801814565703608, "learning_rate": 6.448158361128139e-06, "loss": 0.5186, "step": 5979 }, { "epoch": 0.42, "grad_norm": 1.6283196205715558, "learning_rate": 6.447058411311176e-06, "loss": 0.5737, "step": 5980 }, { "epoch": 0.42, "grad_norm": 1.6577426403853723, "learning_rate": 6.445958385055685e-06, "loss": 0.5764, "step": 5981 }, { "epoch": 0.42, "grad_norm": 1.9572003437188972, "learning_rate": 6.4448582824197726e-06, "loss": 0.6231, "step": 5982 }, { "epoch": 0.42, "grad_norm": 2.4984052705815953, "learning_rate": 6.44375810346155e-06, "loss": 0.5428, "step": 5983 }, { "epoch": 0.42, "grad_norm": 1.6047753135809402, "learning_rate": 6.442657848239132e-06, "loss": 0.5813, "step": 5984 }, { "epoch": 0.42, "grad_norm": 1.7338255396487672, "learning_rate": 6.441557516810639e-06, "loss": 0.574, "step": 5985 }, { "epoch": 0.42, "grad_norm": 1.8992292879482646, "learning_rate": 6.4404571092341925e-06, "loss": 0.5254, "step": 5986 }, { "epoch": 0.42, "grad_norm": 1.5875054890267597, "learning_rate": 6.4393566255679206e-06, "loss": 0.475, "step": 5987 }, { "epoch": 0.42, "grad_norm": 2.0799155168446557, "learning_rate": 6.4382560658699545e-06, "loss": 0.5293, "step": 5988 }, { "epoch": 0.42, "grad_norm": 0.8344387982173572, "learning_rate": 6.43715543019843e-06, "loss": 0.4564, "step": 5989 }, { "epoch": 0.43, "grad_norm": 0.796720829663042, "learning_rate": 6.436054718611485e-06, "loss": 0.4631, "step": 5990 }, { "epoch": 0.43, "grad_norm": 1.8938441992952706, "learning_rate": 6.4349539311672645e-06, "loss": 0.5094, "step": 5991 }, { "epoch": 0.43, "grad_norm": 1.9258956375330782, "learning_rate": 6.433853067923915e-06, "loss": 0.6326, "step": 5992 }, { "epoch": 0.43, "grad_norm": 1.7019214240135452, "learning_rate": 6.432752128939589e-06, "loss": 0.5177, "step": 5993 }, { "epoch": 0.43, "grad_norm": 1.6032057522097785, "learning_rate": 6.431651114272439e-06, "loss": 0.5331, "step": 5994 }, { "epoch": 0.43, "grad_norm": 1.4637203150717077, "learning_rate": 6.4305500239806265e-06, "loss": 0.5025, "step": 5995 }, { "epoch": 0.43, "grad_norm": 1.7745714885467025, "learning_rate": 6.429448858122314e-06, "loss": 0.5626, "step": 5996 }, { "epoch": 0.43, "grad_norm": 1.6216255553716963, "learning_rate": 6.428347616755668e-06, "loss": 0.6221, "step": 5997 }, { "epoch": 0.43, "grad_norm": 0.8837107220867592, "learning_rate": 6.4272462999388644e-06, "loss": 0.4428, "step": 5998 }, { "epoch": 0.43, "grad_norm": 1.7605128042352531, "learning_rate": 6.426144907730072e-06, "loss": 0.5209, "step": 5999 }, { "epoch": 0.43, "grad_norm": 1.622835734973294, "learning_rate": 6.425043440187475e-06, "loss": 0.5821, "step": 6000 }, { "epoch": 0.43, "grad_norm": 1.8626463694606845, "learning_rate": 6.423941897369255e-06, "loss": 0.5855, "step": 6001 }, { "epoch": 0.43, "grad_norm": 1.9012019592984086, "learning_rate": 6.422840279333598e-06, "loss": 0.5107, "step": 6002 }, { "epoch": 0.43, "grad_norm": 1.568800336255233, "learning_rate": 6.421738586138695e-06, "loss": 0.4794, "step": 6003 }, { "epoch": 0.43, "grad_norm": 2.9023818605059257, "learning_rate": 6.4206368178427444e-06, "loss": 0.5823, "step": 6004 }, { "epoch": 0.43, "grad_norm": 1.6515305827598143, "learning_rate": 6.419534974503942e-06, "loss": 0.5561, "step": 6005 }, { "epoch": 0.43, "grad_norm": 1.5439018601656729, "learning_rate": 6.418433056180493e-06, "loss": 0.5543, "step": 6006 }, { "epoch": 0.43, "grad_norm": 1.7666254171551659, "learning_rate": 6.417331062930604e-06, "loss": 0.5788, "step": 6007 }, { "epoch": 0.43, "grad_norm": 1.5302926039016347, "learning_rate": 6.4162289948124855e-06, "loss": 0.6269, "step": 6008 }, { "epoch": 0.43, "grad_norm": 1.6372357398534825, "learning_rate": 6.415126851884352e-06, "loss": 0.4776, "step": 6009 }, { "epoch": 0.43, "grad_norm": 1.6298204777041971, "learning_rate": 6.414024634204423e-06, "loss": 0.512, "step": 6010 }, { "epoch": 0.43, "grad_norm": 1.615374376257846, "learning_rate": 6.412922341830922e-06, "loss": 0.5301, "step": 6011 }, { "epoch": 0.43, "grad_norm": 1.667507907030113, "learning_rate": 6.411819974822075e-06, "loss": 0.4957, "step": 6012 }, { "epoch": 0.43, "grad_norm": 1.711810953759433, "learning_rate": 6.410717533236114e-06, "loss": 0.6315, "step": 6013 }, { "epoch": 0.43, "grad_norm": 1.6140636141920406, "learning_rate": 6.409615017131271e-06, "loss": 0.485, "step": 6014 }, { "epoch": 0.43, "grad_norm": 1.6473609664269142, "learning_rate": 6.408512426565788e-06, "loss": 0.5247, "step": 6015 }, { "epoch": 0.43, "grad_norm": 1.6413392568841374, "learning_rate": 6.4074097615979045e-06, "loss": 0.5521, "step": 6016 }, { "epoch": 0.43, "grad_norm": 1.6822529402881832, "learning_rate": 6.406307022285869e-06, "loss": 0.5639, "step": 6017 }, { "epoch": 0.43, "grad_norm": 3.9468393363453824, "learning_rate": 6.4052042086879315e-06, "loss": 0.569, "step": 6018 }, { "epoch": 0.43, "grad_norm": 1.6979536903856498, "learning_rate": 6.404101320862347e-06, "loss": 0.5188, "step": 6019 }, { "epoch": 0.43, "grad_norm": 1.512470851577651, "learning_rate": 6.402998358867372e-06, "loss": 0.52, "step": 6020 }, { "epoch": 0.43, "grad_norm": 1.8475737007233368, "learning_rate": 6.40189532276127e-06, "loss": 0.565, "step": 6021 }, { "epoch": 0.43, "grad_norm": 1.6463262615096825, "learning_rate": 6.400792212602307e-06, "loss": 0.5542, "step": 6022 }, { "epoch": 0.43, "grad_norm": 0.8123712430084031, "learning_rate": 6.3996890284487525e-06, "loss": 0.4516, "step": 6023 }, { "epoch": 0.43, "grad_norm": 1.5442401424985055, "learning_rate": 6.398585770358879e-06, "loss": 0.5287, "step": 6024 }, { "epoch": 0.43, "grad_norm": 1.691081570547479, "learning_rate": 6.3974824383909694e-06, "loss": 0.6219, "step": 6025 }, { "epoch": 0.43, "grad_norm": 1.5194993733210718, "learning_rate": 6.3963790326033e-06, "loss": 0.4854, "step": 6026 }, { "epoch": 0.43, "grad_norm": 1.7719891791811102, "learning_rate": 6.395275553054159e-06, "loss": 0.5187, "step": 6027 }, { "epoch": 0.43, "grad_norm": 1.5484178374074236, "learning_rate": 6.394171999801835e-06, "loss": 0.5929, "step": 6028 }, { "epoch": 0.43, "grad_norm": 1.9746152087259201, "learning_rate": 6.393068372904623e-06, "loss": 0.5316, "step": 6029 }, { "epoch": 0.43, "grad_norm": 1.6275468756754354, "learning_rate": 6.391964672420818e-06, "loss": 0.5647, "step": 6030 }, { "epoch": 0.43, "grad_norm": 1.849935197927109, "learning_rate": 6.3908608984087225e-06, "loss": 0.5343, "step": 6031 }, { "epoch": 0.43, "grad_norm": 2.339010604118414, "learning_rate": 6.389757050926641e-06, "loss": 0.5259, "step": 6032 }, { "epoch": 0.43, "grad_norm": 1.537707850968111, "learning_rate": 6.388653130032885e-06, "loss": 0.5304, "step": 6033 }, { "epoch": 0.43, "grad_norm": 1.6251019891427079, "learning_rate": 6.387549135785765e-06, "loss": 0.552, "step": 6034 }, { "epoch": 0.43, "grad_norm": 1.8893317186668974, "learning_rate": 6.386445068243596e-06, "loss": 0.5396, "step": 6035 }, { "epoch": 0.43, "grad_norm": 1.501769650190139, "learning_rate": 6.385340927464701e-06, "loss": 0.5304, "step": 6036 }, { "epoch": 0.43, "grad_norm": 1.754153773265779, "learning_rate": 6.384236713507404e-06, "loss": 0.5885, "step": 6037 }, { "epoch": 0.43, "grad_norm": 1.5234461500832417, "learning_rate": 6.383132426430034e-06, "loss": 0.5496, "step": 6038 }, { "epoch": 0.43, "grad_norm": 1.8244736229165082, "learning_rate": 6.382028066290923e-06, "loss": 0.5681, "step": 6039 }, { "epoch": 0.43, "grad_norm": 1.6667521962894258, "learning_rate": 6.380923633148406e-06, "loss": 0.4865, "step": 6040 }, { "epoch": 0.43, "grad_norm": 2.096830370144309, "learning_rate": 6.379819127060824e-06, "loss": 0.5722, "step": 6041 }, { "epoch": 0.43, "grad_norm": 2.5334943182876573, "learning_rate": 6.37871454808652e-06, "loss": 0.5752, "step": 6042 }, { "epoch": 0.43, "grad_norm": 1.721926322244316, "learning_rate": 6.377609896283842e-06, "loss": 0.5439, "step": 6043 }, { "epoch": 0.43, "grad_norm": 1.5420627136404597, "learning_rate": 6.376505171711142e-06, "loss": 0.59, "step": 6044 }, { "epoch": 0.43, "grad_norm": 3.0660598311639706, "learning_rate": 6.375400374426772e-06, "loss": 0.5059, "step": 6045 }, { "epoch": 0.43, "grad_norm": 1.7285186140305258, "learning_rate": 6.374295504489095e-06, "loss": 0.5649, "step": 6046 }, { "epoch": 0.43, "grad_norm": 0.6930666236267564, "learning_rate": 6.373190561956472e-06, "loss": 0.4376, "step": 6047 }, { "epoch": 0.43, "grad_norm": 1.4829460529081877, "learning_rate": 6.372085546887272e-06, "loss": 0.5277, "step": 6048 }, { "epoch": 0.43, "grad_norm": 1.8159403140375654, "learning_rate": 6.370980459339865e-06, "loss": 0.5666, "step": 6049 }, { "epoch": 0.43, "grad_norm": 3.2507604358748505, "learning_rate": 6.369875299372623e-06, "loss": 0.5356, "step": 6050 }, { "epoch": 0.43, "grad_norm": 1.6328435461626802, "learning_rate": 6.368770067043927e-06, "loss": 0.5627, "step": 6051 }, { "epoch": 0.43, "grad_norm": 0.8664752452428686, "learning_rate": 6.367664762412157e-06, "loss": 0.4515, "step": 6052 }, { "epoch": 0.43, "grad_norm": 1.5873065021394948, "learning_rate": 6.366559385535699e-06, "loss": 0.569, "step": 6053 }, { "epoch": 0.43, "grad_norm": 1.6050920565344173, "learning_rate": 6.365453936472945e-06, "loss": 0.4822, "step": 6054 }, { "epoch": 0.43, "grad_norm": 2.1566277832956833, "learning_rate": 6.3643484152822875e-06, "loss": 0.5252, "step": 6055 }, { "epoch": 0.43, "grad_norm": 1.6310431103553487, "learning_rate": 6.363242822022123e-06, "loss": 0.5566, "step": 6056 }, { "epoch": 0.43, "grad_norm": 1.667473201511612, "learning_rate": 6.362137156750855e-06, "loss": 0.6133, "step": 6057 }, { "epoch": 0.43, "grad_norm": 0.7424331248176058, "learning_rate": 6.361031419526885e-06, "loss": 0.4492, "step": 6058 }, { "epoch": 0.43, "grad_norm": 1.4496912626604237, "learning_rate": 6.359925610408625e-06, "loss": 0.5402, "step": 6059 }, { "epoch": 0.43, "grad_norm": 0.7774191132663957, "learning_rate": 6.358819729454485e-06, "loss": 0.447, "step": 6060 }, { "epoch": 0.43, "grad_norm": 1.5696001680809417, "learning_rate": 6.357713776722883e-06, "loss": 0.5554, "step": 6061 }, { "epoch": 0.43, "grad_norm": 1.6309799508288485, "learning_rate": 6.356607752272238e-06, "loss": 0.535, "step": 6062 }, { "epoch": 0.43, "grad_norm": 1.5409032780476837, "learning_rate": 6.355501656160975e-06, "loss": 0.5914, "step": 6063 }, { "epoch": 0.43, "grad_norm": 3.998334370519608, "learning_rate": 6.3543954884475225e-06, "loss": 0.5786, "step": 6064 }, { "epoch": 0.43, "grad_norm": 1.7298587965954304, "learning_rate": 6.35328924919031e-06, "loss": 0.5821, "step": 6065 }, { "epoch": 0.43, "grad_norm": 1.930386077934964, "learning_rate": 6.352182938447775e-06, "loss": 0.5359, "step": 6066 }, { "epoch": 0.43, "grad_norm": 1.7061029464254989, "learning_rate": 6.351076556278354e-06, "loss": 0.5433, "step": 6067 }, { "epoch": 0.43, "grad_norm": 1.6486790342231459, "learning_rate": 6.349970102740492e-06, "loss": 0.5306, "step": 6068 }, { "epoch": 0.43, "grad_norm": 1.4883072993822084, "learning_rate": 6.348863577892634e-06, "loss": 0.4812, "step": 6069 }, { "epoch": 0.43, "grad_norm": 1.672612799616705, "learning_rate": 6.347756981793231e-06, "loss": 0.517, "step": 6070 }, { "epoch": 0.43, "grad_norm": 1.6713994942557995, "learning_rate": 6.3466503145007385e-06, "loss": 0.5221, "step": 6071 }, { "epoch": 0.43, "grad_norm": 1.7280025606133818, "learning_rate": 6.345543576073614e-06, "loss": 0.6066, "step": 6072 }, { "epoch": 0.43, "grad_norm": 1.8332322985624276, "learning_rate": 6.344436766570317e-06, "loss": 0.5454, "step": 6073 }, { "epoch": 0.43, "grad_norm": 1.7801530058706834, "learning_rate": 6.343329886049316e-06, "loss": 0.5304, "step": 6074 }, { "epoch": 0.43, "grad_norm": 1.4013317306437543, "learning_rate": 6.342222934569078e-06, "loss": 0.5494, "step": 6075 }, { "epoch": 0.43, "grad_norm": 1.5946136250603649, "learning_rate": 6.341115912188075e-06, "loss": 0.5688, "step": 6076 }, { "epoch": 0.43, "grad_norm": 1.6880700240408795, "learning_rate": 6.340008818964786e-06, "loss": 0.4793, "step": 6077 }, { "epoch": 0.43, "grad_norm": 1.7427498920946018, "learning_rate": 6.33890165495769e-06, "loss": 0.5808, "step": 6078 }, { "epoch": 0.43, "grad_norm": 1.4727616842982063, "learning_rate": 6.337794420225274e-06, "loss": 0.5446, "step": 6079 }, { "epoch": 0.43, "grad_norm": 0.721164221831636, "learning_rate": 6.336687114826021e-06, "loss": 0.4388, "step": 6080 }, { "epoch": 0.43, "grad_norm": 2.8272780387034, "learning_rate": 6.335579738818428e-06, "loss": 0.4593, "step": 6081 }, { "epoch": 0.43, "grad_norm": 1.9699786007736828, "learning_rate": 6.334472292260986e-06, "loss": 0.6211, "step": 6082 }, { "epoch": 0.43, "grad_norm": 1.9846413541329506, "learning_rate": 6.333364775212195e-06, "loss": 0.559, "step": 6083 }, { "epoch": 0.43, "grad_norm": 1.6428691969143958, "learning_rate": 6.332257187730559e-06, "loss": 0.5158, "step": 6084 }, { "epoch": 0.43, "grad_norm": 1.5475940338363856, "learning_rate": 6.331149529874585e-06, "loss": 0.5177, "step": 6085 }, { "epoch": 0.43, "grad_norm": 7.802418063036298, "learning_rate": 6.330041801702782e-06, "loss": 0.4956, "step": 6086 }, { "epoch": 0.43, "grad_norm": 1.474375336430168, "learning_rate": 6.328934003273665e-06, "loss": 0.4963, "step": 6087 }, { "epoch": 0.43, "grad_norm": 1.8115152788835485, "learning_rate": 6.327826134645749e-06, "loss": 0.4461, "step": 6088 }, { "epoch": 0.43, "grad_norm": 1.8019911306165188, "learning_rate": 6.326718195877559e-06, "loss": 0.5545, "step": 6089 }, { "epoch": 0.43, "grad_norm": 1.6922096554144357, "learning_rate": 6.3256101870276165e-06, "loss": 0.5432, "step": 6090 }, { "epoch": 0.43, "grad_norm": 1.730534631411812, "learning_rate": 6.324502108154454e-06, "loss": 0.5926, "step": 6091 }, { "epoch": 0.43, "grad_norm": 1.6763032117537606, "learning_rate": 6.323393959316601e-06, "loss": 0.5834, "step": 6092 }, { "epoch": 0.43, "grad_norm": 1.6212395680993688, "learning_rate": 6.322285740572594e-06, "loss": 0.5665, "step": 6093 }, { "epoch": 0.43, "grad_norm": 1.4977825978565384, "learning_rate": 6.321177451980975e-06, "loss": 0.5561, "step": 6094 }, { "epoch": 0.43, "grad_norm": 1.5585795532454014, "learning_rate": 6.320069093600287e-06, "loss": 0.5748, "step": 6095 }, { "epoch": 0.43, "grad_norm": 1.759789714053568, "learning_rate": 6.318960665489076e-06, "loss": 0.5802, "step": 6096 }, { "epoch": 0.43, "grad_norm": 1.6970687185602316, "learning_rate": 6.317852167705893e-06, "loss": 0.503, "step": 6097 }, { "epoch": 0.43, "grad_norm": 1.7662538973367834, "learning_rate": 6.316743600309292e-06, "loss": 0.5124, "step": 6098 }, { "epoch": 0.43, "grad_norm": 2.0631605394959913, "learning_rate": 6.3156349633578325e-06, "loss": 0.5578, "step": 6099 }, { "epoch": 0.43, "grad_norm": 1.799792036265137, "learning_rate": 6.314526256910077e-06, "loss": 0.5372, "step": 6100 }, { "epoch": 0.43, "grad_norm": 1.5262260514380073, "learning_rate": 6.313417481024591e-06, "loss": 0.4798, "step": 6101 }, { "epoch": 0.43, "grad_norm": 1.6504546546202055, "learning_rate": 6.3123086357599425e-06, "loss": 0.4858, "step": 6102 }, { "epoch": 0.43, "grad_norm": 0.788843214084213, "learning_rate": 6.311199721174706e-06, "loss": 0.4554, "step": 6103 }, { "epoch": 0.43, "grad_norm": 1.9803111198019188, "learning_rate": 6.310090737327454e-06, "loss": 0.5302, "step": 6104 }, { "epoch": 0.43, "grad_norm": 1.5980135391963333, "learning_rate": 6.308981684276774e-06, "loss": 0.5663, "step": 6105 }, { "epoch": 0.43, "grad_norm": 2.05909675895273, "learning_rate": 6.307872562081244e-06, "loss": 0.5117, "step": 6106 }, { "epoch": 0.43, "grad_norm": 3.9094287150764266, "learning_rate": 6.306763370799454e-06, "loss": 0.5302, "step": 6107 }, { "epoch": 0.43, "grad_norm": 0.8014578744429597, "learning_rate": 6.305654110489996e-06, "loss": 0.4535, "step": 6108 }, { "epoch": 0.43, "grad_norm": 0.7821462655626772, "learning_rate": 6.304544781211461e-06, "loss": 0.4428, "step": 6109 }, { "epoch": 0.43, "grad_norm": 1.5627923418959861, "learning_rate": 6.303435383022453e-06, "loss": 0.5245, "step": 6110 }, { "epoch": 0.43, "grad_norm": 1.5778730970804111, "learning_rate": 6.3023259159815705e-06, "loss": 0.4845, "step": 6111 }, { "epoch": 0.43, "grad_norm": 1.7514241813910292, "learning_rate": 6.301216380147419e-06, "loss": 0.5967, "step": 6112 }, { "epoch": 0.43, "grad_norm": 1.7931257183949585, "learning_rate": 6.3001067755786105e-06, "loss": 0.4948, "step": 6113 }, { "epoch": 0.43, "grad_norm": 1.8632497864967363, "learning_rate": 6.298997102333756e-06, "loss": 0.5127, "step": 6114 }, { "epoch": 0.43, "grad_norm": 1.990703323015417, "learning_rate": 6.297887360471474e-06, "loss": 0.4982, "step": 6115 }, { "epoch": 0.43, "grad_norm": 0.7565502975097378, "learning_rate": 6.296777550050384e-06, "loss": 0.4396, "step": 6116 }, { "epoch": 0.43, "grad_norm": 1.696714864096735, "learning_rate": 6.295667671129109e-06, "loss": 0.5551, "step": 6117 }, { "epoch": 0.43, "grad_norm": 1.6355721572430648, "learning_rate": 6.294557723766277e-06, "loss": 0.658, "step": 6118 }, { "epoch": 0.43, "grad_norm": 2.0328367666366334, "learning_rate": 6.293447708020519e-06, "loss": 0.6119, "step": 6119 }, { "epoch": 0.43, "grad_norm": 1.7772455220383725, "learning_rate": 6.292337623950471e-06, "loss": 0.5195, "step": 6120 }, { "epoch": 0.43, "grad_norm": 1.6352842659506563, "learning_rate": 6.291227471614772e-06, "loss": 0.6237, "step": 6121 }, { "epoch": 0.43, "grad_norm": 1.835530446528254, "learning_rate": 6.29011725107206e-06, "loss": 0.5877, "step": 6122 }, { "epoch": 0.43, "grad_norm": 1.7602722289561124, "learning_rate": 6.289006962380986e-06, "loss": 0.5288, "step": 6123 }, { "epoch": 0.43, "grad_norm": 1.5528271240653484, "learning_rate": 6.287896605600195e-06, "loss": 0.5207, "step": 6124 }, { "epoch": 0.43, "grad_norm": 1.6632876956213896, "learning_rate": 6.286786180788341e-06, "loss": 0.5911, "step": 6125 }, { "epoch": 0.43, "grad_norm": 0.7783529778225994, "learning_rate": 6.285675688004081e-06, "loss": 0.4418, "step": 6126 }, { "epoch": 0.43, "grad_norm": 1.4813194355114034, "learning_rate": 6.284565127306075e-06, "loss": 0.5341, "step": 6127 }, { "epoch": 0.43, "grad_norm": 1.669930380544074, "learning_rate": 6.283454498752985e-06, "loss": 0.6283, "step": 6128 }, { "epoch": 0.43, "grad_norm": 2.1252952337405, "learning_rate": 6.28234380240348e-06, "loss": 0.4762, "step": 6129 }, { "epoch": 0.43, "grad_norm": 1.7698785457888835, "learning_rate": 6.281233038316231e-06, "loss": 0.5654, "step": 6130 }, { "epoch": 0.44, "grad_norm": 1.6502491720614618, "learning_rate": 6.280122206549911e-06, "loss": 0.6142, "step": 6131 }, { "epoch": 0.44, "grad_norm": 1.469238816355353, "learning_rate": 6.2790113071631995e-06, "loss": 0.4954, "step": 6132 }, { "epoch": 0.44, "grad_norm": 1.6244157797136503, "learning_rate": 6.277900340214775e-06, "loss": 0.5854, "step": 6133 }, { "epoch": 0.44, "grad_norm": 2.3954772480888566, "learning_rate": 6.276789305763325e-06, "loss": 0.4888, "step": 6134 }, { "epoch": 0.44, "grad_norm": 1.8572885649158046, "learning_rate": 6.275678203867539e-06, "loss": 0.5396, "step": 6135 }, { "epoch": 0.44, "grad_norm": 1.9666503447153676, "learning_rate": 6.274567034586107e-06, "loss": 0.4823, "step": 6136 }, { "epoch": 0.44, "grad_norm": 1.8319228051918792, "learning_rate": 6.273455797977724e-06, "loss": 0.5444, "step": 6137 }, { "epoch": 0.44, "grad_norm": 1.8598215106329359, "learning_rate": 6.2723444941010924e-06, "loss": 0.4726, "step": 6138 }, { "epoch": 0.44, "grad_norm": 1.5259607105043078, "learning_rate": 6.271233123014912e-06, "loss": 0.5149, "step": 6139 }, { "epoch": 0.44, "grad_norm": 0.7648063697617749, "learning_rate": 6.270121684777892e-06, "loss": 0.4062, "step": 6140 }, { "epoch": 0.44, "grad_norm": 2.751524512569989, "learning_rate": 6.2690101794487405e-06, "loss": 0.6027, "step": 6141 }, { "epoch": 0.44, "grad_norm": 1.675003716083765, "learning_rate": 6.267898607086169e-06, "loss": 0.566, "step": 6142 }, { "epoch": 0.44, "grad_norm": 1.5016987172815628, "learning_rate": 6.266786967748899e-06, "loss": 0.551, "step": 6143 }, { "epoch": 0.44, "grad_norm": 1.5423562554786494, "learning_rate": 6.265675261495648e-06, "loss": 0.4909, "step": 6144 }, { "epoch": 0.44, "grad_norm": 1.9171359140426696, "learning_rate": 6.264563488385142e-06, "loss": 0.5061, "step": 6145 }, { "epoch": 0.44, "grad_norm": 4.678936170508005, "learning_rate": 6.263451648476107e-06, "loss": 0.5251, "step": 6146 }, { "epoch": 0.44, "grad_norm": 1.5388787864978313, "learning_rate": 6.262339741827276e-06, "loss": 0.5849, "step": 6147 }, { "epoch": 0.44, "grad_norm": 2.384554247552369, "learning_rate": 6.261227768497381e-06, "loss": 0.6163, "step": 6148 }, { "epoch": 0.44, "grad_norm": 2.224197377473454, "learning_rate": 6.260115728545162e-06, "loss": 0.5727, "step": 6149 }, { "epoch": 0.44, "grad_norm": 2.0989297124904986, "learning_rate": 6.25900362202936e-06, "loss": 0.5506, "step": 6150 }, { "epoch": 0.44, "grad_norm": 0.7624101202259755, "learning_rate": 6.2578914490087215e-06, "loss": 0.4365, "step": 6151 }, { "epoch": 0.44, "grad_norm": 1.782475018928272, "learning_rate": 6.256779209541993e-06, "loss": 0.5393, "step": 6152 }, { "epoch": 0.44, "grad_norm": 1.9247115404036428, "learning_rate": 6.2556669036879305e-06, "loss": 0.5967, "step": 6153 }, { "epoch": 0.44, "grad_norm": 1.7011673386359527, "learning_rate": 6.254554531505286e-06, "loss": 0.5527, "step": 6154 }, { "epoch": 0.44, "grad_norm": 0.7385748487037749, "learning_rate": 6.2534420930528195e-06, "loss": 0.4286, "step": 6155 }, { "epoch": 0.44, "grad_norm": 1.5161974139428882, "learning_rate": 6.252329588389294e-06, "loss": 0.5272, "step": 6156 }, { "epoch": 0.44, "grad_norm": 2.889235873565762, "learning_rate": 6.2512170175734775e-06, "loss": 0.5592, "step": 6157 }, { "epoch": 0.44, "grad_norm": 1.8448791132858846, "learning_rate": 6.250104380664137e-06, "loss": 0.5469, "step": 6158 }, { "epoch": 0.44, "grad_norm": 1.8051013063694148, "learning_rate": 6.248991677720049e-06, "loss": 0.5842, "step": 6159 }, { "epoch": 0.44, "grad_norm": 1.5502540161998797, "learning_rate": 6.247878908799988e-06, "loss": 0.506, "step": 6160 }, { "epoch": 0.44, "grad_norm": 2.0813819952494255, "learning_rate": 6.2467660739627335e-06, "loss": 0.4698, "step": 6161 }, { "epoch": 0.44, "grad_norm": 1.5145498468738243, "learning_rate": 6.2456531732670725e-06, "loss": 0.4923, "step": 6162 }, { "epoch": 0.44, "grad_norm": 2.7066680956440012, "learning_rate": 6.244540206771787e-06, "loss": 0.538, "step": 6163 }, { "epoch": 0.44, "grad_norm": 2.2050823223399405, "learning_rate": 6.243427174535672e-06, "loss": 0.5496, "step": 6164 }, { "epoch": 0.44, "grad_norm": 1.8253386741625213, "learning_rate": 6.24231407661752e-06, "loss": 0.5756, "step": 6165 }, { "epoch": 0.44, "grad_norm": 1.9087996375251355, "learning_rate": 6.241200913076131e-06, "loss": 0.541, "step": 6166 }, { "epoch": 0.44, "grad_norm": 1.5359523046901937, "learning_rate": 6.240087683970302e-06, "loss": 0.5358, "step": 6167 }, { "epoch": 0.44, "grad_norm": 1.9341348886556613, "learning_rate": 6.2389743893588405e-06, "loss": 0.5499, "step": 6168 }, { "epoch": 0.44, "grad_norm": 1.9847867889561752, "learning_rate": 6.2378610293005536e-06, "loss": 0.608, "step": 6169 }, { "epoch": 0.44, "grad_norm": 1.8834368961291679, "learning_rate": 6.236747603854252e-06, "loss": 0.4832, "step": 6170 }, { "epoch": 0.44, "grad_norm": 1.589830020588979, "learning_rate": 6.23563411307875e-06, "loss": 0.5925, "step": 6171 }, { "epoch": 0.44, "grad_norm": 1.9198303737497495, "learning_rate": 6.23452055703287e-06, "loss": 0.5564, "step": 6172 }, { "epoch": 0.44, "grad_norm": 1.6413817779682078, "learning_rate": 6.2334069357754305e-06, "loss": 0.5104, "step": 6173 }, { "epoch": 0.44, "grad_norm": 2.1040736103756315, "learning_rate": 6.232293249365256e-06, "loss": 0.5297, "step": 6174 }, { "epoch": 0.44, "grad_norm": 1.6439080460669913, "learning_rate": 6.231179497861177e-06, "loss": 0.4819, "step": 6175 }, { "epoch": 0.44, "grad_norm": 1.627745819660884, "learning_rate": 6.230065681322025e-06, "loss": 0.5874, "step": 6176 }, { "epoch": 0.44, "grad_norm": 1.6354643101790303, "learning_rate": 6.228951799806636e-06, "loss": 0.5482, "step": 6177 }, { "epoch": 0.44, "grad_norm": 1.7619981749887288, "learning_rate": 6.227837853373849e-06, "loss": 0.5021, "step": 6178 }, { "epoch": 0.44, "grad_norm": 1.8309392706265493, "learning_rate": 6.226723842082505e-06, "loss": 0.5775, "step": 6179 }, { "epoch": 0.44, "grad_norm": 1.6317995520885173, "learning_rate": 6.225609765991452e-06, "loss": 0.5944, "step": 6180 }, { "epoch": 0.44, "grad_norm": 1.7403009692322589, "learning_rate": 6.224495625159538e-06, "loss": 0.4849, "step": 6181 }, { "epoch": 0.44, "grad_norm": 2.809357781127944, "learning_rate": 6.223381419645615e-06, "loss": 0.5581, "step": 6182 }, { "epoch": 0.44, "grad_norm": 1.6411644107832721, "learning_rate": 6.2222671495085415e-06, "loss": 0.6185, "step": 6183 }, { "epoch": 0.44, "grad_norm": 1.6449911107901565, "learning_rate": 6.221152814807176e-06, "loss": 0.49, "step": 6184 }, { "epoch": 0.44, "grad_norm": 1.6068414688377948, "learning_rate": 6.220038415600379e-06, "loss": 0.5417, "step": 6185 }, { "epoch": 0.44, "grad_norm": 1.9969485878249782, "learning_rate": 6.218923951947019e-06, "loss": 0.4713, "step": 6186 }, { "epoch": 0.44, "grad_norm": 1.5844837576922057, "learning_rate": 6.217809423905967e-06, "loss": 0.458, "step": 6187 }, { "epoch": 0.44, "grad_norm": 0.7709199699288237, "learning_rate": 6.2166948315360945e-06, "loss": 0.454, "step": 6188 }, { "epoch": 0.44, "grad_norm": 3.1604570184006198, "learning_rate": 6.2155801748962774e-06, "loss": 0.5671, "step": 6189 }, { "epoch": 0.44, "grad_norm": 1.8646447671552364, "learning_rate": 6.2144654540453965e-06, "loss": 0.5616, "step": 6190 }, { "epoch": 0.44, "grad_norm": 1.5365011377098845, "learning_rate": 6.213350669042335e-06, "loss": 0.5486, "step": 6191 }, { "epoch": 0.44, "grad_norm": 1.5281194323623237, "learning_rate": 6.212235819945982e-06, "loss": 0.5355, "step": 6192 }, { "epoch": 0.44, "grad_norm": 1.536637152413724, "learning_rate": 6.2111209068152226e-06, "loss": 0.5132, "step": 6193 }, { "epoch": 0.44, "grad_norm": 3.2466302964554465, "learning_rate": 6.210005929708954e-06, "loss": 0.5531, "step": 6194 }, { "epoch": 0.44, "grad_norm": 2.4598271805125638, "learning_rate": 6.208890888686072e-06, "loss": 0.6179, "step": 6195 }, { "epoch": 0.44, "grad_norm": 1.5699757222333501, "learning_rate": 6.2077757838054775e-06, "loss": 0.5789, "step": 6196 }, { "epoch": 0.44, "grad_norm": 1.8229522874119948, "learning_rate": 6.206660615126073e-06, "loss": 0.5209, "step": 6197 }, { "epoch": 0.44, "grad_norm": 1.7041064107277306, "learning_rate": 6.205545382706768e-06, "loss": 0.5641, "step": 6198 }, { "epoch": 0.44, "grad_norm": 1.6674827284717544, "learning_rate": 6.204430086606469e-06, "loss": 0.5016, "step": 6199 }, { "epoch": 0.44, "grad_norm": 2.117914363686433, "learning_rate": 6.203314726884091e-06, "loss": 0.5517, "step": 6200 }, { "epoch": 0.44, "grad_norm": 1.5255239957725937, "learning_rate": 6.202199303598553e-06, "loss": 0.6193, "step": 6201 }, { "epoch": 0.44, "grad_norm": 1.623757319562821, "learning_rate": 6.201083816808774e-06, "loss": 0.5583, "step": 6202 }, { "epoch": 0.44, "grad_norm": 1.687568547448088, "learning_rate": 6.199968266573678e-06, "loss": 0.5034, "step": 6203 }, { "epoch": 0.44, "grad_norm": 2.142736276580768, "learning_rate": 6.198852652952193e-06, "loss": 0.5507, "step": 6204 }, { "epoch": 0.44, "grad_norm": 0.7336887062723735, "learning_rate": 6.197736976003247e-06, "loss": 0.4499, "step": 6205 }, { "epoch": 0.44, "grad_norm": 1.4382025396164515, "learning_rate": 6.196621235785776e-06, "loss": 0.4628, "step": 6206 }, { "epoch": 0.44, "grad_norm": 1.5621131880838122, "learning_rate": 6.195505432358717e-06, "loss": 0.53, "step": 6207 }, { "epoch": 0.44, "grad_norm": 1.5569820390980393, "learning_rate": 6.194389565781008e-06, "loss": 0.4909, "step": 6208 }, { "epoch": 0.44, "grad_norm": 1.4800681596521752, "learning_rate": 6.1932736361115984e-06, "loss": 0.4824, "step": 6209 }, { "epoch": 0.44, "grad_norm": 1.6841342147700906, "learning_rate": 6.19215764340943e-06, "loss": 0.5703, "step": 6210 }, { "epoch": 0.44, "grad_norm": 0.834240733707024, "learning_rate": 6.191041587733456e-06, "loss": 0.4237, "step": 6211 }, { "epoch": 0.44, "grad_norm": 1.7158978162060057, "learning_rate": 6.189925469142629e-06, "loss": 0.5383, "step": 6212 }, { "epoch": 0.44, "grad_norm": 1.9712461494873257, "learning_rate": 6.188809287695907e-06, "loss": 0.5065, "step": 6213 }, { "epoch": 0.44, "grad_norm": 1.5990474779773305, "learning_rate": 6.187693043452251e-06, "loss": 0.571, "step": 6214 }, { "epoch": 0.44, "grad_norm": 0.7815430489918583, "learning_rate": 6.186576736470622e-06, "loss": 0.4775, "step": 6215 }, { "epoch": 0.44, "grad_norm": 1.8629920043976602, "learning_rate": 6.1854603668099896e-06, "loss": 0.5209, "step": 6216 }, { "epoch": 0.44, "grad_norm": 1.8963622335438568, "learning_rate": 6.184343934529326e-06, "loss": 0.5297, "step": 6217 }, { "epoch": 0.44, "grad_norm": 1.61221934032901, "learning_rate": 6.1832274396876e-06, "loss": 0.5715, "step": 6218 }, { "epoch": 0.44, "grad_norm": 1.593595627784326, "learning_rate": 6.182110882343794e-06, "loss": 0.5191, "step": 6219 }, { "epoch": 0.44, "grad_norm": 1.4927035826447947, "learning_rate": 6.180994262556883e-06, "loss": 0.5573, "step": 6220 }, { "epoch": 0.44, "grad_norm": 1.6555281521915473, "learning_rate": 6.179877580385854e-06, "loss": 0.5647, "step": 6221 }, { "epoch": 0.44, "grad_norm": 2.0348484358409746, "learning_rate": 6.178760835889693e-06, "loss": 0.5758, "step": 6222 }, { "epoch": 0.44, "grad_norm": 1.6724643936881138, "learning_rate": 6.177644029127391e-06, "loss": 0.5891, "step": 6223 }, { "epoch": 0.44, "grad_norm": 5.829288752461, "learning_rate": 6.1765271601579415e-06, "loss": 0.5534, "step": 6224 }, { "epoch": 0.44, "grad_norm": 1.4815107342645737, "learning_rate": 6.17541022904034e-06, "loss": 0.4882, "step": 6225 }, { "epoch": 0.44, "grad_norm": 2.1661590045651895, "learning_rate": 6.1742932358335885e-06, "loss": 0.5423, "step": 6226 }, { "epoch": 0.44, "grad_norm": 1.4789498097241023, "learning_rate": 6.173176180596687e-06, "loss": 0.5725, "step": 6227 }, { "epoch": 0.44, "grad_norm": 1.8176100947467226, "learning_rate": 6.172059063388647e-06, "loss": 0.5642, "step": 6228 }, { "epoch": 0.44, "grad_norm": 1.7426328800125375, "learning_rate": 6.170941884268474e-06, "loss": 0.5834, "step": 6229 }, { "epoch": 0.44, "grad_norm": 2.1388167962460383, "learning_rate": 6.169824643295184e-06, "loss": 0.5012, "step": 6230 }, { "epoch": 0.44, "grad_norm": 17.986604005793666, "learning_rate": 6.168707340527791e-06, "loss": 0.5548, "step": 6231 }, { "epoch": 0.44, "grad_norm": 1.4494248325167172, "learning_rate": 6.167589976025318e-06, "loss": 0.4875, "step": 6232 }, { "epoch": 0.44, "grad_norm": 1.8625723393003484, "learning_rate": 6.166472549846785e-06, "loss": 0.5776, "step": 6233 }, { "epoch": 0.44, "grad_norm": 1.5983690048936794, "learning_rate": 6.16535506205122e-06, "loss": 0.5794, "step": 6234 }, { "epoch": 0.44, "grad_norm": 2.0109205787572058, "learning_rate": 6.164237512697652e-06, "loss": 0.5494, "step": 6235 }, { "epoch": 0.44, "grad_norm": 0.7836244380548796, "learning_rate": 6.1631199018451115e-06, "loss": 0.422, "step": 6236 }, { "epoch": 0.44, "grad_norm": 1.6712249127803358, "learning_rate": 6.162002229552638e-06, "loss": 0.5797, "step": 6237 }, { "epoch": 0.44, "grad_norm": 2.8955147216207138, "learning_rate": 6.160884495879269e-06, "loss": 0.5483, "step": 6238 }, { "epoch": 0.44, "grad_norm": 1.3617425228454885, "learning_rate": 6.159766700884049e-06, "loss": 0.6039, "step": 6239 }, { "epoch": 0.44, "grad_norm": 1.59313777763064, "learning_rate": 6.15864884462602e-06, "loss": 0.5329, "step": 6240 }, { "epoch": 0.44, "grad_norm": 1.6879669534920296, "learning_rate": 6.157530927164235e-06, "loss": 0.5229, "step": 6241 }, { "epoch": 0.44, "grad_norm": 1.8037394547560757, "learning_rate": 6.156412948557743e-06, "loss": 0.5162, "step": 6242 }, { "epoch": 0.44, "grad_norm": 1.6176573591341008, "learning_rate": 6.1552949088656015e-06, "loss": 0.5572, "step": 6243 }, { "epoch": 0.44, "grad_norm": 1.8427606601954893, "learning_rate": 6.154176808146867e-06, "loss": 0.5365, "step": 6244 }, { "epoch": 0.44, "grad_norm": 1.697066633073979, "learning_rate": 6.153058646460603e-06, "loss": 0.5583, "step": 6245 }, { "epoch": 0.44, "grad_norm": 2.4994415746224634, "learning_rate": 6.151940423865873e-06, "loss": 0.6351, "step": 6246 }, { "epoch": 0.44, "grad_norm": 1.8157814585701075, "learning_rate": 6.150822140421749e-06, "loss": 0.5145, "step": 6247 }, { "epoch": 0.44, "grad_norm": 1.6593396021279347, "learning_rate": 6.149703796187297e-06, "loss": 0.5795, "step": 6248 }, { "epoch": 0.44, "grad_norm": 1.9478953416822407, "learning_rate": 6.148585391221597e-06, "loss": 0.5617, "step": 6249 }, { "epoch": 0.44, "grad_norm": 1.63436514829823, "learning_rate": 6.147466925583725e-06, "loss": 0.5688, "step": 6250 }, { "epoch": 0.44, "grad_norm": 1.645990993281783, "learning_rate": 6.14634839933276e-06, "loss": 0.5658, "step": 6251 }, { "epoch": 0.44, "grad_norm": 1.9717124058818387, "learning_rate": 6.145229812527789e-06, "loss": 0.5166, "step": 6252 }, { "epoch": 0.44, "grad_norm": 1.6890334311952144, "learning_rate": 6.144111165227899e-06, "loss": 0.4972, "step": 6253 }, { "epoch": 0.44, "grad_norm": 1.6392733997739652, "learning_rate": 6.142992457492181e-06, "loss": 0.5617, "step": 6254 }, { "epoch": 0.44, "grad_norm": 1.6143278914940584, "learning_rate": 6.141873689379727e-06, "loss": 0.5735, "step": 6255 }, { "epoch": 0.44, "grad_norm": 1.7310510130857852, "learning_rate": 6.140754860949637e-06, "loss": 0.6626, "step": 6256 }, { "epoch": 0.44, "grad_norm": 1.749418515672551, "learning_rate": 6.139635972261009e-06, "loss": 0.5306, "step": 6257 }, { "epoch": 0.44, "grad_norm": 1.6124828419584367, "learning_rate": 6.138517023372949e-06, "loss": 0.5758, "step": 6258 }, { "epoch": 0.44, "grad_norm": 2.4289655846329805, "learning_rate": 6.137398014344559e-06, "loss": 0.5115, "step": 6259 }, { "epoch": 0.44, "grad_norm": 2.1461138950780114, "learning_rate": 6.136278945234954e-06, "loss": 0.4833, "step": 6260 }, { "epoch": 0.44, "grad_norm": 3.4970710816669546, "learning_rate": 6.135159816103243e-06, "loss": 0.5403, "step": 6261 }, { "epoch": 0.44, "grad_norm": 3.3619107183317567, "learning_rate": 6.134040627008546e-06, "loss": 0.557, "step": 6262 }, { "epoch": 0.44, "grad_norm": 1.563213903190383, "learning_rate": 6.132921378009979e-06, "loss": 0.5326, "step": 6263 }, { "epoch": 0.44, "grad_norm": 1.7780829680616153, "learning_rate": 6.131802069166668e-06, "loss": 0.5137, "step": 6264 }, { "epoch": 0.44, "grad_norm": 1.8972372370268282, "learning_rate": 6.1306827005377346e-06, "loss": 0.5228, "step": 6265 }, { "epoch": 0.44, "grad_norm": 0.8571711819896531, "learning_rate": 6.129563272182311e-06, "loss": 0.4409, "step": 6266 }, { "epoch": 0.44, "grad_norm": 1.6146407453371268, "learning_rate": 6.128443784159526e-06, "loss": 0.5145, "step": 6267 }, { "epoch": 0.44, "grad_norm": 0.9411797893544394, "learning_rate": 6.127324236528519e-06, "loss": 0.4751, "step": 6268 }, { "epoch": 0.44, "grad_norm": 1.7601298166076602, "learning_rate": 6.126204629348425e-06, "loss": 0.4879, "step": 6269 }, { "epoch": 0.44, "grad_norm": 1.6247471615189382, "learning_rate": 6.125084962678385e-06, "loss": 0.4863, "step": 6270 }, { "epoch": 0.45, "grad_norm": 1.7757284189282887, "learning_rate": 6.123965236577547e-06, "loss": 0.5948, "step": 6271 }, { "epoch": 0.45, "grad_norm": 1.7007656689665966, "learning_rate": 6.122845451105055e-06, "loss": 0.5529, "step": 6272 }, { "epoch": 0.45, "grad_norm": 1.761731132253299, "learning_rate": 6.121725606320063e-06, "loss": 0.4897, "step": 6273 }, { "epoch": 0.45, "grad_norm": 2.09351514496744, "learning_rate": 6.120605702281722e-06, "loss": 0.5533, "step": 6274 }, { "epoch": 0.45, "grad_norm": 1.6651256125184892, "learning_rate": 6.119485739049193e-06, "loss": 0.5354, "step": 6275 }, { "epoch": 0.45, "grad_norm": 0.8518558758012634, "learning_rate": 6.1183657166816314e-06, "loss": 0.4636, "step": 6276 }, { "epoch": 0.45, "grad_norm": 1.4462954038825175, "learning_rate": 6.117245635238204e-06, "loss": 0.5522, "step": 6277 }, { "epoch": 0.45, "grad_norm": 2.968619172750841, "learning_rate": 6.116125494778078e-06, "loss": 0.5732, "step": 6278 }, { "epoch": 0.45, "grad_norm": 1.6727639050631524, "learning_rate": 6.11500529536042e-06, "loss": 0.5132, "step": 6279 }, { "epoch": 0.45, "grad_norm": 1.6731763441805236, "learning_rate": 6.113885037044405e-06, "loss": 0.621, "step": 6280 }, { "epoch": 0.45, "grad_norm": 1.4705909323859574, "learning_rate": 6.112764719889206e-06, "loss": 0.4712, "step": 6281 }, { "epoch": 0.45, "grad_norm": 1.6383368203352928, "learning_rate": 6.111644343954005e-06, "loss": 0.5224, "step": 6282 }, { "epoch": 0.45, "grad_norm": 1.6077922123191906, "learning_rate": 6.110523909297984e-06, "loss": 0.6028, "step": 6283 }, { "epoch": 0.45, "grad_norm": 2.490090985109898, "learning_rate": 6.109403415980326e-06, "loss": 0.5605, "step": 6284 }, { "epoch": 0.45, "grad_norm": 2.4572898598120876, "learning_rate": 6.108282864060221e-06, "loss": 0.6022, "step": 6285 }, { "epoch": 0.45, "grad_norm": 1.480338188907449, "learning_rate": 6.107162253596859e-06, "loss": 0.4921, "step": 6286 }, { "epoch": 0.45, "grad_norm": 2.6627672810356384, "learning_rate": 6.106041584649435e-06, "loss": 0.5393, "step": 6287 }, { "epoch": 0.45, "grad_norm": 0.7593494144629737, "learning_rate": 6.104920857277147e-06, "loss": 0.4462, "step": 6288 }, { "epoch": 0.45, "grad_norm": 2.594381554695951, "learning_rate": 6.103800071539196e-06, "loss": 0.4865, "step": 6289 }, { "epoch": 0.45, "grad_norm": 1.6960217135075029, "learning_rate": 6.102679227494785e-06, "loss": 0.6087, "step": 6290 }, { "epoch": 0.45, "grad_norm": 1.6747573741025117, "learning_rate": 6.10155832520312e-06, "loss": 0.5259, "step": 6291 }, { "epoch": 0.45, "grad_norm": 2.205040167932142, "learning_rate": 6.100437364723412e-06, "loss": 0.5446, "step": 6292 }, { "epoch": 0.45, "grad_norm": 1.80616112561425, "learning_rate": 6.099316346114874e-06, "loss": 0.5636, "step": 6293 }, { "epoch": 0.45, "grad_norm": 1.4748848929800007, "learning_rate": 6.098195269436722e-06, "loss": 0.5092, "step": 6294 }, { "epoch": 0.45, "grad_norm": 0.7488479915985337, "learning_rate": 6.097074134748174e-06, "loss": 0.4682, "step": 6295 }, { "epoch": 0.45, "grad_norm": 2.3134720416105514, "learning_rate": 6.095952942108453e-06, "loss": 0.4841, "step": 6296 }, { "epoch": 0.45, "grad_norm": 1.904022322153139, "learning_rate": 6.094831691576783e-06, "loss": 0.5963, "step": 6297 }, { "epoch": 0.45, "grad_norm": 4.296837633753355, "learning_rate": 6.093710383212396e-06, "loss": 0.4625, "step": 6298 }, { "epoch": 0.45, "grad_norm": 1.824140123248322, "learning_rate": 6.092589017074517e-06, "loss": 0.5638, "step": 6299 }, { "epoch": 0.45, "grad_norm": 1.6296849796776893, "learning_rate": 6.0914675932223865e-06, "loss": 0.5354, "step": 6300 }, { "epoch": 0.45, "grad_norm": 1.7669937105130415, "learning_rate": 6.090346111715239e-06, "loss": 0.5216, "step": 6301 }, { "epoch": 0.45, "grad_norm": 2.176589261455902, "learning_rate": 6.089224572612313e-06, "loss": 0.4818, "step": 6302 }, { "epoch": 0.45, "grad_norm": 2.0053015185415046, "learning_rate": 6.088102975972856e-06, "loss": 0.5304, "step": 6303 }, { "epoch": 0.45, "grad_norm": 2.488078859494076, "learning_rate": 6.086981321856112e-06, "loss": 0.5887, "step": 6304 }, { "epoch": 0.45, "grad_norm": 1.840681360537423, "learning_rate": 6.085859610321334e-06, "loss": 0.5456, "step": 6305 }, { "epoch": 0.45, "grad_norm": 1.9257712119036055, "learning_rate": 6.08473784142777e-06, "loss": 0.5211, "step": 6306 }, { "epoch": 0.45, "grad_norm": 1.5997691107744978, "learning_rate": 6.083616015234678e-06, "loss": 0.5761, "step": 6307 }, { "epoch": 0.45, "grad_norm": 1.6740624204383638, "learning_rate": 6.082494131801315e-06, "loss": 0.5651, "step": 6308 }, { "epoch": 0.45, "grad_norm": 1.5847670008909105, "learning_rate": 6.081372191186947e-06, "loss": 0.5836, "step": 6309 }, { "epoch": 0.45, "grad_norm": 1.5698393107218398, "learning_rate": 6.0802501934508325e-06, "loss": 0.5637, "step": 6310 }, { "epoch": 0.45, "grad_norm": 0.7372095157641763, "learning_rate": 6.0791281386522435e-06, "loss": 0.4823, "step": 6311 }, { "epoch": 0.45, "grad_norm": 1.6401589205168723, "learning_rate": 6.07800602685045e-06, "loss": 0.5839, "step": 6312 }, { "epoch": 0.45, "grad_norm": 1.932359912596974, "learning_rate": 6.076883858104725e-06, "loss": 0.6, "step": 6313 }, { "epoch": 0.45, "grad_norm": 1.6469076935301972, "learning_rate": 6.075761632474346e-06, "loss": 0.5111, "step": 6314 }, { "epoch": 0.45, "grad_norm": 1.86445809810398, "learning_rate": 6.074639350018593e-06, "loss": 0.5258, "step": 6315 }, { "epoch": 0.45, "grad_norm": 2.1040131736615444, "learning_rate": 6.073517010796746e-06, "loss": 0.515, "step": 6316 }, { "epoch": 0.45, "grad_norm": 1.791324308720816, "learning_rate": 6.072394614868094e-06, "loss": 0.5708, "step": 6317 }, { "epoch": 0.45, "grad_norm": 1.5879006715263964, "learning_rate": 6.071272162291926e-06, "loss": 0.5034, "step": 6318 }, { "epoch": 0.45, "grad_norm": 1.5722626614320403, "learning_rate": 6.07014965312753e-06, "loss": 0.5331, "step": 6319 }, { "epoch": 0.45, "grad_norm": 1.662804579069011, "learning_rate": 6.069027087434205e-06, "loss": 0.6106, "step": 6320 }, { "epoch": 0.45, "grad_norm": 1.5399928426239344, "learning_rate": 6.067904465271246e-06, "loss": 0.5227, "step": 6321 }, { "epoch": 0.45, "grad_norm": 1.3013792198802103, "learning_rate": 6.066781786697956e-06, "loss": 0.4858, "step": 6322 }, { "epoch": 0.45, "grad_norm": 1.796472118638848, "learning_rate": 6.065659051773636e-06, "loss": 0.4609, "step": 6323 }, { "epoch": 0.45, "grad_norm": 1.9458156447020258, "learning_rate": 6.064536260557595e-06, "loss": 0.4412, "step": 6324 }, { "epoch": 0.45, "grad_norm": 0.6496237951436457, "learning_rate": 6.063413413109141e-06, "loss": 0.4418, "step": 6325 }, { "epoch": 0.45, "grad_norm": 1.8212255812459504, "learning_rate": 6.062290509487586e-06, "loss": 0.4879, "step": 6326 }, { "epoch": 0.45, "grad_norm": 1.6328838466348194, "learning_rate": 6.061167549752247e-06, "loss": 0.5199, "step": 6327 }, { "epoch": 0.45, "grad_norm": 1.5251049366927516, "learning_rate": 6.060044533962444e-06, "loss": 0.5383, "step": 6328 }, { "epoch": 0.45, "grad_norm": 1.7937667289398975, "learning_rate": 6.0589214621774964e-06, "loss": 0.5383, "step": 6329 }, { "epoch": 0.45, "grad_norm": 1.6970897671626783, "learning_rate": 6.0577983344567286e-06, "loss": 0.5787, "step": 6330 }, { "epoch": 0.45, "grad_norm": 2.0208213777286996, "learning_rate": 6.056675150859468e-06, "loss": 0.5895, "step": 6331 }, { "epoch": 0.45, "grad_norm": 1.9150070801513752, "learning_rate": 6.055551911445045e-06, "loss": 0.4711, "step": 6332 }, { "epoch": 0.45, "grad_norm": 1.9756203576750297, "learning_rate": 6.054428616272794e-06, "loss": 0.5576, "step": 6333 }, { "epoch": 0.45, "grad_norm": 1.7700714709535976, "learning_rate": 6.053305265402049e-06, "loss": 0.5589, "step": 6334 }, { "epoch": 0.45, "grad_norm": 1.738138287283359, "learning_rate": 6.052181858892155e-06, "loss": 0.568, "step": 6335 }, { "epoch": 0.45, "grad_norm": 1.4521938960582153, "learning_rate": 6.051058396802446e-06, "loss": 0.4519, "step": 6336 }, { "epoch": 0.45, "grad_norm": 2.167688616087516, "learning_rate": 6.049934879192274e-06, "loss": 0.5401, "step": 6337 }, { "epoch": 0.45, "grad_norm": 2.8992867114857983, "learning_rate": 6.048811306120982e-06, "loss": 0.5117, "step": 6338 }, { "epoch": 0.45, "grad_norm": 1.8519241186657522, "learning_rate": 6.047687677647924e-06, "loss": 0.6114, "step": 6339 }, { "epoch": 0.45, "grad_norm": 0.8043450710659489, "learning_rate": 6.04656399383245e-06, "loss": 0.4283, "step": 6340 }, { "epoch": 0.45, "grad_norm": 1.7820317145615474, "learning_rate": 6.045440254733923e-06, "loss": 0.5933, "step": 6341 }, { "epoch": 0.45, "grad_norm": 1.4899686775862293, "learning_rate": 6.044316460411698e-06, "loss": 0.5426, "step": 6342 }, { "epoch": 0.45, "grad_norm": 1.6887543426235077, "learning_rate": 6.04319261092514e-06, "loss": 0.5731, "step": 6343 }, { "epoch": 0.45, "grad_norm": 1.5139654881037894, "learning_rate": 6.0420687063336115e-06, "loss": 0.5528, "step": 6344 }, { "epoch": 0.45, "grad_norm": 1.45012442475435, "learning_rate": 6.040944746696484e-06, "loss": 0.559, "step": 6345 }, { "epoch": 0.45, "grad_norm": 1.8451289691944497, "learning_rate": 6.039820732073128e-06, "loss": 0.5006, "step": 6346 }, { "epoch": 0.45, "grad_norm": 2.2048477445393, "learning_rate": 6.038696662522917e-06, "loss": 0.5922, "step": 6347 }, { "epoch": 0.45, "grad_norm": 1.5770085826218494, "learning_rate": 6.037572538105228e-06, "loss": 0.5098, "step": 6348 }, { "epoch": 0.45, "grad_norm": 1.7269173318399798, "learning_rate": 6.0364483588794445e-06, "loss": 0.5359, "step": 6349 }, { "epoch": 0.45, "grad_norm": 1.9730618275933753, "learning_rate": 6.035324124904944e-06, "loss": 0.5854, "step": 6350 }, { "epoch": 0.45, "grad_norm": 1.7972258596981647, "learning_rate": 6.034199836241116e-06, "loss": 0.5806, "step": 6351 }, { "epoch": 0.45, "grad_norm": 2.81097451051854, "learning_rate": 6.033075492947349e-06, "loss": 0.519, "step": 6352 }, { "epoch": 0.45, "grad_norm": 1.6040128263423006, "learning_rate": 6.031951095083033e-06, "loss": 0.5337, "step": 6353 }, { "epoch": 0.45, "grad_norm": 1.5692517052292783, "learning_rate": 6.030826642707564e-06, "loss": 0.4745, "step": 6354 }, { "epoch": 0.45, "grad_norm": 1.5214691951475245, "learning_rate": 6.029702135880337e-06, "loss": 0.5143, "step": 6355 }, { "epoch": 0.45, "grad_norm": 0.734691259803567, "learning_rate": 6.028577574660756e-06, "loss": 0.4727, "step": 6356 }, { "epoch": 0.45, "grad_norm": 1.7164708405942686, "learning_rate": 6.027452959108222e-06, "loss": 0.4784, "step": 6357 }, { "epoch": 0.45, "grad_norm": 2.4412936711761835, "learning_rate": 6.026328289282141e-06, "loss": 0.5755, "step": 6358 }, { "epoch": 0.45, "grad_norm": 1.8882605839140438, "learning_rate": 6.025203565241922e-06, "loss": 0.6094, "step": 6359 }, { "epoch": 0.45, "grad_norm": 1.9902520516716906, "learning_rate": 6.024078787046979e-06, "loss": 0.5202, "step": 6360 }, { "epoch": 0.45, "grad_norm": 1.6659585066847027, "learning_rate": 6.022953954756721e-06, "loss": 0.5709, "step": 6361 }, { "epoch": 0.45, "grad_norm": 1.4692642953293527, "learning_rate": 6.021829068430569e-06, "loss": 0.5096, "step": 6362 }, { "epoch": 0.45, "grad_norm": 1.6144990941518313, "learning_rate": 6.020704128127945e-06, "loss": 0.5332, "step": 6363 }, { "epoch": 0.45, "grad_norm": 1.5638504051059585, "learning_rate": 6.0195791339082696e-06, "loss": 0.4656, "step": 6364 }, { "epoch": 0.45, "grad_norm": 1.7115928554717594, "learning_rate": 6.018454085830969e-06, "loss": 0.4672, "step": 6365 }, { "epoch": 0.45, "grad_norm": 6.080648547201388, "learning_rate": 6.017328983955475e-06, "loss": 0.5645, "step": 6366 }, { "epoch": 0.45, "grad_norm": 2.0484814546379595, "learning_rate": 6.016203828341214e-06, "loss": 0.6072, "step": 6367 }, { "epoch": 0.45, "grad_norm": 1.471978090879093, "learning_rate": 6.015078619047625e-06, "loss": 0.459, "step": 6368 }, { "epoch": 0.45, "grad_norm": 2.3633706889560853, "learning_rate": 6.013953356134142e-06, "loss": 0.5459, "step": 6369 }, { "epoch": 0.45, "grad_norm": 2.1774102808482394, "learning_rate": 6.012828039660208e-06, "loss": 0.5975, "step": 6370 }, { "epoch": 0.45, "grad_norm": 1.5005848097500631, "learning_rate": 6.011702669685266e-06, "loss": 0.4591, "step": 6371 }, { "epoch": 0.45, "grad_norm": 1.8356678940819122, "learning_rate": 6.0105772462687605e-06, "loss": 0.5203, "step": 6372 }, { "epoch": 0.45, "grad_norm": 1.581454818676609, "learning_rate": 6.00945176947014e-06, "loss": 0.5355, "step": 6373 }, { "epoch": 0.45, "grad_norm": 1.6336609191507736, "learning_rate": 6.008326239348857e-06, "loss": 0.6264, "step": 6374 }, { "epoch": 0.45, "grad_norm": 1.914035863841177, "learning_rate": 6.007200655964366e-06, "loss": 0.5173, "step": 6375 }, { "epoch": 0.45, "grad_norm": 1.9260479448436225, "learning_rate": 6.006075019376122e-06, "loss": 0.6137, "step": 6376 }, { "epoch": 0.45, "grad_norm": 1.9273434159181793, "learning_rate": 6.004949329643587e-06, "loss": 0.5852, "step": 6377 }, { "epoch": 0.45, "grad_norm": 1.4489679471444523, "learning_rate": 6.003823586826223e-06, "loss": 0.5074, "step": 6378 }, { "epoch": 0.45, "grad_norm": 1.6810635269129035, "learning_rate": 6.002697790983496e-06, "loss": 0.4575, "step": 6379 }, { "epoch": 0.45, "grad_norm": 1.8108569238987724, "learning_rate": 6.0015719421748745e-06, "loss": 0.5, "step": 6380 }, { "epoch": 0.45, "grad_norm": 1.78871080938493, "learning_rate": 6.000446040459828e-06, "loss": 0.5339, "step": 6381 }, { "epoch": 0.45, "grad_norm": 1.6068370487258503, "learning_rate": 5.999320085897833e-06, "loss": 0.5133, "step": 6382 }, { "epoch": 0.45, "grad_norm": 1.8735702384117092, "learning_rate": 5.9981940785483635e-06, "loss": 0.5522, "step": 6383 }, { "epoch": 0.45, "grad_norm": 1.8079265609944464, "learning_rate": 5.997068018470902e-06, "loss": 0.5622, "step": 6384 }, { "epoch": 0.45, "grad_norm": 1.780889254395183, "learning_rate": 5.9959419057249276e-06, "loss": 0.4697, "step": 6385 }, { "epoch": 0.45, "grad_norm": 1.9088008489846524, "learning_rate": 5.9948157403699295e-06, "loss": 0.5964, "step": 6386 }, { "epoch": 0.45, "grad_norm": 1.8044304644326736, "learning_rate": 5.99368952246539e-06, "loss": 0.529, "step": 6387 }, { "epoch": 0.45, "grad_norm": 2.2119266590313384, "learning_rate": 5.992563252070805e-06, "loss": 0.5228, "step": 6388 }, { "epoch": 0.45, "grad_norm": 1.8190107729215494, "learning_rate": 5.991436929245665e-06, "loss": 0.5994, "step": 6389 }, { "epoch": 0.45, "grad_norm": 1.6999338400312642, "learning_rate": 5.990310554049468e-06, "loss": 0.6356, "step": 6390 }, { "epoch": 0.45, "grad_norm": 1.8751631482818127, "learning_rate": 5.98918412654171e-06, "loss": 0.5868, "step": 6391 }, { "epoch": 0.45, "grad_norm": 1.7434120009414837, "learning_rate": 5.988057646781896e-06, "loss": 0.5519, "step": 6392 }, { "epoch": 0.45, "grad_norm": 1.9212804413581503, "learning_rate": 5.986931114829528e-06, "loss": 0.5263, "step": 6393 }, { "epoch": 0.45, "grad_norm": 3.4072162119756433, "learning_rate": 5.9858045307441155e-06, "loss": 0.5127, "step": 6394 }, { "epoch": 0.45, "grad_norm": 1.8318166137598089, "learning_rate": 5.984677894585167e-06, "loss": 0.5263, "step": 6395 }, { "epoch": 0.45, "grad_norm": 0.7935450901203088, "learning_rate": 5.983551206412196e-06, "loss": 0.4322, "step": 6396 }, { "epoch": 0.45, "grad_norm": 2.4268643914625847, "learning_rate": 5.982424466284717e-06, "loss": 0.58, "step": 6397 }, { "epoch": 0.45, "grad_norm": 2.9115997839143635, "learning_rate": 5.981297674262249e-06, "loss": 0.4982, "step": 6398 }, { "epoch": 0.45, "grad_norm": 2.0955492552091592, "learning_rate": 5.980170830404312e-06, "loss": 0.5549, "step": 6399 }, { "epoch": 0.45, "grad_norm": 2.055557865680107, "learning_rate": 5.97904393477043e-06, "loss": 0.6038, "step": 6400 }, { "epoch": 0.45, "grad_norm": 0.715264243646855, "learning_rate": 5.977916987420132e-06, "loss": 0.4559, "step": 6401 }, { "epoch": 0.45, "grad_norm": 4.113281320147073, "learning_rate": 5.976789988412944e-06, "loss": 0.5746, "step": 6402 }, { "epoch": 0.45, "grad_norm": 1.807767601593898, "learning_rate": 5.975662937808399e-06, "loss": 0.5502, "step": 6403 }, { "epoch": 0.45, "grad_norm": 1.7918910403352066, "learning_rate": 5.974535835666031e-06, "loss": 0.5638, "step": 6404 }, { "epoch": 0.45, "grad_norm": 1.9171503401807197, "learning_rate": 5.97340868204538e-06, "loss": 0.5317, "step": 6405 }, { "epoch": 0.45, "grad_norm": 2.0244101954307583, "learning_rate": 5.972281477005981e-06, "loss": 0.5684, "step": 6406 }, { "epoch": 0.45, "grad_norm": 1.9052062480536094, "learning_rate": 5.971154220607381e-06, "loss": 0.5518, "step": 6407 }, { "epoch": 0.45, "grad_norm": 1.5313546492520331, "learning_rate": 5.9700269129091245e-06, "loss": 0.5522, "step": 6408 }, { "epoch": 0.45, "grad_norm": 1.785359937144492, "learning_rate": 5.968899553970762e-06, "loss": 0.4378, "step": 6409 }, { "epoch": 0.45, "grad_norm": 2.383542888183812, "learning_rate": 5.967772143851839e-06, "loss": 0.5821, "step": 6410 }, { "epoch": 0.45, "grad_norm": 0.8168834323566663, "learning_rate": 5.966644682611914e-06, "loss": 0.4291, "step": 6411 }, { "epoch": 0.46, "grad_norm": 1.6599904228026834, "learning_rate": 5.965517170310539e-06, "loss": 0.6025, "step": 6412 }, { "epoch": 0.46, "grad_norm": 1.701475557055519, "learning_rate": 5.964389607007277e-06, "loss": 0.5292, "step": 6413 }, { "epoch": 0.46, "grad_norm": 1.714978858169787, "learning_rate": 5.963261992761687e-06, "loss": 0.4997, "step": 6414 }, { "epoch": 0.46, "grad_norm": 1.522292367449255, "learning_rate": 5.962134327633336e-06, "loss": 0.5099, "step": 6415 }, { "epoch": 0.46, "grad_norm": 1.7324391241501949, "learning_rate": 5.96100661168179e-06, "loss": 0.6069, "step": 6416 }, { "epoch": 0.46, "grad_norm": 1.8151828672976762, "learning_rate": 5.959878844966618e-06, "loss": 0.5318, "step": 6417 }, { "epoch": 0.46, "grad_norm": 1.8376300738454943, "learning_rate": 5.9587510275473924e-06, "loss": 0.5405, "step": 6418 }, { "epoch": 0.46, "grad_norm": 2.1879205383186546, "learning_rate": 5.957623159483689e-06, "loss": 0.5596, "step": 6419 }, { "epoch": 0.46, "grad_norm": 1.6111426076499447, "learning_rate": 5.956495240835085e-06, "loss": 0.4993, "step": 6420 }, { "epoch": 0.46, "grad_norm": 1.7668998612240656, "learning_rate": 5.955367271661161e-06, "loss": 0.5014, "step": 6421 }, { "epoch": 0.46, "grad_norm": 1.5036456377043879, "learning_rate": 5.954239252021502e-06, "loss": 0.4486, "step": 6422 }, { "epoch": 0.46, "grad_norm": 1.894129421554375, "learning_rate": 5.953111181975692e-06, "loss": 0.567, "step": 6423 }, { "epoch": 0.46, "grad_norm": 0.772162357585521, "learning_rate": 5.95198306158332e-06, "loss": 0.449, "step": 6424 }, { "epoch": 0.46, "grad_norm": 3.185759438050489, "learning_rate": 5.950854890903975e-06, "loss": 0.5738, "step": 6425 }, { "epoch": 0.46, "grad_norm": 2.6700256299841554, "learning_rate": 5.949726669997256e-06, "loss": 0.5517, "step": 6426 }, { "epoch": 0.46, "grad_norm": 1.6459719032538305, "learning_rate": 5.948598398922754e-06, "loss": 0.5604, "step": 6427 }, { "epoch": 0.46, "grad_norm": 1.777195069504668, "learning_rate": 5.94747007774007e-06, "loss": 0.5309, "step": 6428 }, { "epoch": 0.46, "grad_norm": 0.7153534840062844, "learning_rate": 5.946341706508805e-06, "loss": 0.4129, "step": 6429 }, { "epoch": 0.46, "grad_norm": 2.227863363642028, "learning_rate": 5.945213285288567e-06, "loss": 0.535, "step": 6430 }, { "epoch": 0.46, "grad_norm": 0.7419371023920691, "learning_rate": 5.944084814138958e-06, "loss": 0.4627, "step": 6431 }, { "epoch": 0.46, "grad_norm": 0.7960311295047298, "learning_rate": 5.942956293119592e-06, "loss": 0.4552, "step": 6432 }, { "epoch": 0.46, "grad_norm": 1.6028805283375644, "learning_rate": 5.94182772229008e-06, "loss": 0.5572, "step": 6433 }, { "epoch": 0.46, "grad_norm": 0.7344999540880032, "learning_rate": 5.9406991017100344e-06, "loss": 0.4678, "step": 6434 }, { "epoch": 0.46, "grad_norm": 1.8242668279025658, "learning_rate": 5.9395704314390755e-06, "loss": 0.6057, "step": 6435 }, { "epoch": 0.46, "grad_norm": 0.7261870789752807, "learning_rate": 5.938441711536822e-06, "loss": 0.4401, "step": 6436 }, { "epoch": 0.46, "grad_norm": 3.8540429723911145, "learning_rate": 5.9373129420628994e-06, "loss": 0.6828, "step": 6437 }, { "epoch": 0.46, "grad_norm": 1.6125222360160034, "learning_rate": 5.936184123076929e-06, "loss": 0.5378, "step": 6438 }, { "epoch": 0.46, "grad_norm": 1.8500386336443424, "learning_rate": 5.935055254638543e-06, "loss": 0.5348, "step": 6439 }, { "epoch": 0.46, "grad_norm": 2.0237995741430703, "learning_rate": 5.933926336807369e-06, "loss": 0.5435, "step": 6440 }, { "epoch": 0.46, "grad_norm": 1.8116085279090233, "learning_rate": 5.932797369643042e-06, "loss": 0.5345, "step": 6441 }, { "epoch": 0.46, "grad_norm": 1.4960293755445644, "learning_rate": 5.931668353205196e-06, "loss": 0.5007, "step": 6442 }, { "epoch": 0.46, "grad_norm": 1.7637556711405256, "learning_rate": 5.930539287553471e-06, "loss": 0.5747, "step": 6443 }, { "epoch": 0.46, "grad_norm": 1.701291157491818, "learning_rate": 5.929410172747507e-06, "loss": 0.6108, "step": 6444 }, { "epoch": 0.46, "grad_norm": 1.9899878206660269, "learning_rate": 5.92828100884695e-06, "loss": 0.5131, "step": 6445 }, { "epoch": 0.46, "grad_norm": 2.0129797001662744, "learning_rate": 5.927151795911444e-06, "loss": 0.5285, "step": 6446 }, { "epoch": 0.46, "grad_norm": 1.5313099691712024, "learning_rate": 5.926022534000638e-06, "loss": 0.4676, "step": 6447 }, { "epoch": 0.46, "grad_norm": 1.7621705638801388, "learning_rate": 5.924893223174185e-06, "loss": 0.5032, "step": 6448 }, { "epoch": 0.46, "grad_norm": 1.6754084396537015, "learning_rate": 5.923763863491737e-06, "loss": 0.5567, "step": 6449 }, { "epoch": 0.46, "grad_norm": 1.7431475508451586, "learning_rate": 5.922634455012952e-06, "loss": 0.5231, "step": 6450 }, { "epoch": 0.46, "grad_norm": 4.223515725670235, "learning_rate": 5.9215049977974885e-06, "loss": 0.5907, "step": 6451 }, { "epoch": 0.46, "grad_norm": 1.6987386506378932, "learning_rate": 5.920375491905009e-06, "loss": 0.5577, "step": 6452 }, { "epoch": 0.46, "grad_norm": 2.408567243086743, "learning_rate": 5.919245937395177e-06, "loss": 0.5847, "step": 6453 }, { "epoch": 0.46, "grad_norm": 1.4083150243281561, "learning_rate": 5.9181163343276615e-06, "loss": 0.4772, "step": 6454 }, { "epoch": 0.46, "grad_norm": 1.6395284349959505, "learning_rate": 5.916986682762128e-06, "loss": 0.5641, "step": 6455 }, { "epoch": 0.46, "grad_norm": 0.7530802695977581, "learning_rate": 5.9158569827582525e-06, "loss": 0.4602, "step": 6456 }, { "epoch": 0.46, "grad_norm": 1.6704504379518457, "learning_rate": 5.9147272343757055e-06, "loss": 0.4974, "step": 6457 }, { "epoch": 0.46, "grad_norm": 2.497360090310237, "learning_rate": 5.9135974376741674e-06, "loss": 0.613, "step": 6458 }, { "epoch": 0.46, "grad_norm": 1.8003183315980518, "learning_rate": 5.912467592713318e-06, "loss": 0.56, "step": 6459 }, { "epoch": 0.46, "grad_norm": 1.791665458662133, "learning_rate": 5.911337699552838e-06, "loss": 0.5403, "step": 6460 }, { "epoch": 0.46, "grad_norm": 1.6732740987392551, "learning_rate": 5.910207758252412e-06, "loss": 0.4911, "step": 6461 }, { "epoch": 0.46, "grad_norm": 1.766311472230727, "learning_rate": 5.90907776887173e-06, "loss": 0.5282, "step": 6462 }, { "epoch": 0.46, "grad_norm": 1.69180903044053, "learning_rate": 5.907947731470477e-06, "loss": 0.5267, "step": 6463 }, { "epoch": 0.46, "grad_norm": 1.671977248328302, "learning_rate": 5.90681764610835e-06, "loss": 0.6076, "step": 6464 }, { "epoch": 0.46, "grad_norm": 2.0030861543779, "learning_rate": 5.905687512845041e-06, "loss": 0.5823, "step": 6465 }, { "epoch": 0.46, "grad_norm": 1.7783420368794396, "learning_rate": 5.904557331740248e-06, "loss": 0.5302, "step": 6466 }, { "epoch": 0.46, "grad_norm": 1.7029723094224982, "learning_rate": 5.903427102853675e-06, "loss": 0.555, "step": 6467 }, { "epoch": 0.46, "grad_norm": 1.9435310067845268, "learning_rate": 5.902296826245019e-06, "loss": 0.5297, "step": 6468 }, { "epoch": 0.46, "grad_norm": 1.8418662921400502, "learning_rate": 5.901166501973989e-06, "loss": 0.5449, "step": 6469 }, { "epoch": 0.46, "grad_norm": 1.7736040801869801, "learning_rate": 5.9000361301002885e-06, "loss": 0.5476, "step": 6470 }, { "epoch": 0.46, "grad_norm": 1.9482762803233913, "learning_rate": 5.898905710683631e-06, "loss": 0.5692, "step": 6471 }, { "epoch": 0.46, "grad_norm": 1.7334269984723598, "learning_rate": 5.897775243783726e-06, "loss": 0.6103, "step": 6472 }, { "epoch": 0.46, "grad_norm": 1.5231131679306649, "learning_rate": 5.896644729460293e-06, "loss": 0.5421, "step": 6473 }, { "epoch": 0.46, "grad_norm": 2.267901586270862, "learning_rate": 5.895514167773046e-06, "loss": 0.5887, "step": 6474 }, { "epoch": 0.46, "grad_norm": 4.95863221142584, "learning_rate": 5.894383558781708e-06, "loss": 0.5376, "step": 6475 }, { "epoch": 0.46, "grad_norm": 0.7462560562836029, "learning_rate": 5.893252902545999e-06, "loss": 0.4779, "step": 6476 }, { "epoch": 0.46, "grad_norm": 1.5844274750047111, "learning_rate": 5.892122199125644e-06, "loss": 0.5039, "step": 6477 }, { "epoch": 0.46, "grad_norm": 1.8888732096707277, "learning_rate": 5.890991448580372e-06, "loss": 0.5154, "step": 6478 }, { "epoch": 0.46, "grad_norm": 1.8066295067264166, "learning_rate": 5.8898606509699115e-06, "loss": 0.6317, "step": 6479 }, { "epoch": 0.46, "grad_norm": 2.0124340132045475, "learning_rate": 5.888729806353996e-06, "loss": 0.5682, "step": 6480 }, { "epoch": 0.46, "grad_norm": 1.9485768589091188, "learning_rate": 5.887598914792363e-06, "loss": 0.5178, "step": 6481 }, { "epoch": 0.46, "grad_norm": 1.6811444904472665, "learning_rate": 5.886467976344748e-06, "loss": 0.5545, "step": 6482 }, { "epoch": 0.46, "grad_norm": 2.9770245019025023, "learning_rate": 5.885336991070888e-06, "loss": 0.5176, "step": 6483 }, { "epoch": 0.46, "grad_norm": 0.7716739271670248, "learning_rate": 5.88420595903053e-06, "loss": 0.445, "step": 6484 }, { "epoch": 0.46, "grad_norm": 1.9323586162619188, "learning_rate": 5.883074880283417e-06, "loss": 0.642, "step": 6485 }, { "epoch": 0.46, "grad_norm": 1.8309837804236608, "learning_rate": 5.881943754889295e-06, "loss": 0.4937, "step": 6486 }, { "epoch": 0.46, "grad_norm": 1.3210912582358003, "learning_rate": 5.880812582907917e-06, "loss": 0.4149, "step": 6487 }, { "epoch": 0.46, "grad_norm": 2.5590120695836163, "learning_rate": 5.879681364399033e-06, "loss": 0.5514, "step": 6488 }, { "epoch": 0.46, "grad_norm": 1.473701090451251, "learning_rate": 5.8785500994223995e-06, "loss": 0.5332, "step": 6489 }, { "epoch": 0.46, "grad_norm": 1.6882501633378515, "learning_rate": 5.877418788037773e-06, "loss": 0.6006, "step": 6490 }, { "epoch": 0.46, "grad_norm": 1.8244230888729185, "learning_rate": 5.876287430304912e-06, "loss": 0.5631, "step": 6491 }, { "epoch": 0.46, "grad_norm": 1.9167636052587769, "learning_rate": 5.87515602628358e-06, "loss": 0.5776, "step": 6492 }, { "epoch": 0.46, "grad_norm": 1.6900279828057316, "learning_rate": 5.874024576033541e-06, "loss": 0.5654, "step": 6493 }, { "epoch": 0.46, "grad_norm": 1.813799726233723, "learning_rate": 5.872893079614563e-06, "loss": 0.5371, "step": 6494 }, { "epoch": 0.46, "grad_norm": 1.802170127111877, "learning_rate": 5.871761537086413e-06, "loss": 0.5439, "step": 6495 }, { "epoch": 0.46, "grad_norm": 2.123946053483945, "learning_rate": 5.870629948508867e-06, "loss": 0.5959, "step": 6496 }, { "epoch": 0.46, "grad_norm": 1.4733286324789803, "learning_rate": 5.869498313941696e-06, "loss": 0.4765, "step": 6497 }, { "epoch": 0.46, "grad_norm": 1.655261132677402, "learning_rate": 5.868366633444678e-06, "loss": 0.5383, "step": 6498 }, { "epoch": 0.46, "grad_norm": 1.504868092426369, "learning_rate": 5.8672349070775925e-06, "loss": 0.5524, "step": 6499 }, { "epoch": 0.46, "grad_norm": 1.3537507523156453, "learning_rate": 5.866103134900219e-06, "loss": 0.4753, "step": 6500 }, { "epoch": 0.46, "grad_norm": 1.7070599027335374, "learning_rate": 5.864971316972344e-06, "loss": 0.5086, "step": 6501 }, { "epoch": 0.46, "grad_norm": 1.577050630505989, "learning_rate": 5.863839453353753e-06, "loss": 0.527, "step": 6502 }, { "epoch": 0.46, "grad_norm": 1.5888107007520351, "learning_rate": 5.862707544104236e-06, "loss": 0.4901, "step": 6503 }, { "epoch": 0.46, "grad_norm": 1.5101230748555103, "learning_rate": 5.861575589283583e-06, "loss": 0.4781, "step": 6504 }, { "epoch": 0.46, "grad_norm": 1.6850334093785295, "learning_rate": 5.860443588951587e-06, "loss": 0.5289, "step": 6505 }, { "epoch": 0.46, "grad_norm": 1.5225714063852112, "learning_rate": 5.8593115431680446e-06, "loss": 0.5485, "step": 6506 }, { "epoch": 0.46, "grad_norm": 1.468185126349229, "learning_rate": 5.858179451992757e-06, "loss": 0.5116, "step": 6507 }, { "epoch": 0.46, "grad_norm": 1.9167290260953755, "learning_rate": 5.857047315485521e-06, "loss": 0.6069, "step": 6508 }, { "epoch": 0.46, "grad_norm": 1.610856044333189, "learning_rate": 5.855915133706142e-06, "loss": 0.5687, "step": 6509 }, { "epoch": 0.46, "grad_norm": 2.569083219951473, "learning_rate": 5.854782906714425e-06, "loss": 0.5437, "step": 6510 }, { "epoch": 0.46, "grad_norm": 4.975678275415909, "learning_rate": 5.853650634570178e-06, "loss": 0.47, "step": 6511 }, { "epoch": 0.46, "grad_norm": 1.550531104282591, "learning_rate": 5.852518317333215e-06, "loss": 0.5513, "step": 6512 }, { "epoch": 0.46, "grad_norm": 1.5076593158282061, "learning_rate": 5.851385955063342e-06, "loss": 0.5174, "step": 6513 }, { "epoch": 0.46, "grad_norm": 2.4261263481099307, "learning_rate": 5.850253547820382e-06, "loss": 0.5786, "step": 6514 }, { "epoch": 0.46, "grad_norm": 2.9391863591740357, "learning_rate": 5.849121095664145e-06, "loss": 0.5039, "step": 6515 }, { "epoch": 0.46, "grad_norm": 1.5052218345379327, "learning_rate": 5.847988598654455e-06, "loss": 0.5338, "step": 6516 }, { "epoch": 0.46, "grad_norm": 5.27682586507779, "learning_rate": 5.846856056851135e-06, "loss": 0.5834, "step": 6517 }, { "epoch": 0.46, "grad_norm": 1.704266513912345, "learning_rate": 5.845723470314008e-06, "loss": 0.5007, "step": 6518 }, { "epoch": 0.46, "grad_norm": 1.6868093071475736, "learning_rate": 5.844590839102901e-06, "loss": 0.5565, "step": 6519 }, { "epoch": 0.46, "grad_norm": 1.5717427382718754, "learning_rate": 5.843458163277646e-06, "loss": 0.5491, "step": 6520 }, { "epoch": 0.46, "grad_norm": 1.814481798794096, "learning_rate": 5.8423254428980715e-06, "loss": 0.598, "step": 6521 }, { "epoch": 0.46, "grad_norm": 1.6822941494722286, "learning_rate": 5.841192678024013e-06, "loss": 0.5239, "step": 6522 }, { "epoch": 0.46, "grad_norm": 1.7091621356880293, "learning_rate": 5.8400598687153065e-06, "loss": 0.5466, "step": 6523 }, { "epoch": 0.46, "grad_norm": 1.8703470191990452, "learning_rate": 5.838927015031792e-06, "loss": 0.532, "step": 6524 }, { "epoch": 0.46, "grad_norm": 1.9000594990125457, "learning_rate": 5.837794117033309e-06, "loss": 0.5833, "step": 6525 }, { "epoch": 0.46, "grad_norm": 1.6636464991010342, "learning_rate": 5.836661174779703e-06, "loss": 0.4796, "step": 6526 }, { "epoch": 0.46, "grad_norm": 1.8181075623051646, "learning_rate": 5.835528188330818e-06, "loss": 0.6016, "step": 6527 }, { "epoch": 0.46, "grad_norm": 1.7286660746278752, "learning_rate": 5.834395157746502e-06, "loss": 0.4815, "step": 6528 }, { "epoch": 0.46, "grad_norm": 2.6311749685805332, "learning_rate": 5.833262083086609e-06, "loss": 0.5701, "step": 6529 }, { "epoch": 0.46, "grad_norm": 1.6225147668060855, "learning_rate": 5.832128964410987e-06, "loss": 0.5573, "step": 6530 }, { "epoch": 0.46, "grad_norm": 1.6162450168338347, "learning_rate": 5.830995801779494e-06, "loss": 0.5186, "step": 6531 }, { "epoch": 0.46, "grad_norm": 1.8084536918066667, "learning_rate": 5.829862595251987e-06, "loss": 0.556, "step": 6532 }, { "epoch": 0.46, "grad_norm": 1.4588511693307329, "learning_rate": 5.828729344888326e-06, "loss": 0.4689, "step": 6533 }, { "epoch": 0.46, "grad_norm": 2.297747658567431, "learning_rate": 5.827596050748373e-06, "loss": 0.5161, "step": 6534 }, { "epoch": 0.46, "grad_norm": 1.6381720660000378, "learning_rate": 5.826462712891993e-06, "loss": 0.4965, "step": 6535 }, { "epoch": 0.46, "grad_norm": 1.7686671079376857, "learning_rate": 5.82532933137905e-06, "loss": 0.5698, "step": 6536 }, { "epoch": 0.46, "grad_norm": 1.6371246449748926, "learning_rate": 5.824195906269418e-06, "loss": 0.5254, "step": 6537 }, { "epoch": 0.46, "grad_norm": 1.5937466057559901, "learning_rate": 5.823062437622962e-06, "loss": 0.523, "step": 6538 }, { "epoch": 0.46, "grad_norm": 1.6819719970143758, "learning_rate": 5.821928925499561e-06, "loss": 0.6332, "step": 6539 }, { "epoch": 0.46, "grad_norm": 1.7938242791162198, "learning_rate": 5.820795369959089e-06, "loss": 0.5317, "step": 6540 }, { "epoch": 0.46, "grad_norm": 1.8836589751167838, "learning_rate": 5.819661771061426e-06, "loss": 0.5485, "step": 6541 }, { "epoch": 0.46, "grad_norm": 0.779515597515293, "learning_rate": 5.8185281288664485e-06, "loss": 0.4339, "step": 6542 }, { "epoch": 0.46, "grad_norm": 1.6693759194105433, "learning_rate": 5.817394443434042e-06, "loss": 0.5793, "step": 6543 }, { "epoch": 0.46, "grad_norm": 1.6917231571672975, "learning_rate": 5.816260714824092e-06, "loss": 0.5468, "step": 6544 }, { "epoch": 0.46, "grad_norm": 1.6269392933831908, "learning_rate": 5.815126943096485e-06, "loss": 0.5336, "step": 6545 }, { "epoch": 0.46, "grad_norm": 1.9857225426171916, "learning_rate": 5.81399312831111e-06, "loss": 0.5888, "step": 6546 }, { "epoch": 0.46, "grad_norm": 1.629520390041545, "learning_rate": 5.8128592705278605e-06, "loss": 0.5203, "step": 6547 }, { "epoch": 0.46, "grad_norm": 1.4673899924905942, "learning_rate": 5.811725369806631e-06, "loss": 0.4293, "step": 6548 }, { "epoch": 0.46, "grad_norm": 0.8356123724223296, "learning_rate": 5.810591426207315e-06, "loss": 0.467, "step": 6549 }, { "epoch": 0.46, "grad_norm": 1.5460995032312055, "learning_rate": 5.809457439789815e-06, "loss": 0.5419, "step": 6550 }, { "epoch": 0.46, "grad_norm": 1.7535564319300292, "learning_rate": 5.808323410614029e-06, "loss": 0.5144, "step": 6551 }, { "epoch": 0.46, "grad_norm": 2.2661333645891237, "learning_rate": 5.807189338739861e-06, "loss": 0.5452, "step": 6552 }, { "epoch": 0.47, "grad_norm": 2.006246899189775, "learning_rate": 5.806055224227219e-06, "loss": 0.5338, "step": 6553 }, { "epoch": 0.47, "grad_norm": 1.5713527799346696, "learning_rate": 5.804921067136007e-06, "loss": 0.6281, "step": 6554 }, { "epoch": 0.47, "grad_norm": 1.6159235048794975, "learning_rate": 5.803786867526138e-06, "loss": 0.5875, "step": 6555 }, { "epoch": 0.47, "grad_norm": 1.9771249425213298, "learning_rate": 5.802652625457522e-06, "loss": 0.5601, "step": 6556 }, { "epoch": 0.47, "grad_norm": 2.1691213570847827, "learning_rate": 5.801518340990075e-06, "loss": 0.5193, "step": 6557 }, { "epoch": 0.47, "grad_norm": 1.7639881774895407, "learning_rate": 5.800384014183714e-06, "loss": 0.5412, "step": 6558 }, { "epoch": 0.47, "grad_norm": 1.9977348245124626, "learning_rate": 5.799249645098357e-06, "loss": 0.5909, "step": 6559 }, { "epoch": 0.47, "grad_norm": 1.6028082111757918, "learning_rate": 5.798115233793924e-06, "loss": 0.6064, "step": 6560 }, { "epoch": 0.47, "grad_norm": 1.7423828058376563, "learning_rate": 5.796980780330341e-06, "loss": 0.5602, "step": 6561 }, { "epoch": 0.47, "grad_norm": 1.7200283719111704, "learning_rate": 5.795846284767532e-06, "loss": 0.5749, "step": 6562 }, { "epoch": 0.47, "grad_norm": 2.4851109977189454, "learning_rate": 5.7947117471654265e-06, "loss": 0.5202, "step": 6563 }, { "epoch": 0.47, "grad_norm": 1.6845304299926007, "learning_rate": 5.793577167583954e-06, "loss": 0.4929, "step": 6564 }, { "epoch": 0.47, "grad_norm": 0.7607268729754489, "learning_rate": 5.792442546083047e-06, "loss": 0.4464, "step": 6565 }, { "epoch": 0.47, "grad_norm": 1.4754679864135323, "learning_rate": 5.791307882722638e-06, "loss": 0.5132, "step": 6566 }, { "epoch": 0.47, "grad_norm": 1.9879661466868437, "learning_rate": 5.790173177562666e-06, "loss": 0.5596, "step": 6567 }, { "epoch": 0.47, "grad_norm": 1.7965359323545496, "learning_rate": 5.789038430663067e-06, "loss": 0.516, "step": 6568 }, { "epoch": 0.47, "grad_norm": 1.6295512429957864, "learning_rate": 5.787903642083789e-06, "loss": 0.5197, "step": 6569 }, { "epoch": 0.47, "grad_norm": 2.100171580133209, "learning_rate": 5.786768811884767e-06, "loss": 0.5673, "step": 6570 }, { "epoch": 0.47, "grad_norm": 1.8698466540805052, "learning_rate": 5.785633940125953e-06, "loss": 0.5722, "step": 6571 }, { "epoch": 0.47, "grad_norm": 3.3053257114793593, "learning_rate": 5.7844990268672905e-06, "loss": 0.5337, "step": 6572 }, { "epoch": 0.47, "grad_norm": 2.4963701055644405, "learning_rate": 5.783364072168732e-06, "loss": 0.6052, "step": 6573 }, { "epoch": 0.47, "grad_norm": 1.5596048107626008, "learning_rate": 5.782229076090229e-06, "loss": 0.5655, "step": 6574 }, { "epoch": 0.47, "grad_norm": 1.8048397410522108, "learning_rate": 5.781094038691735e-06, "loss": 0.4942, "step": 6575 }, { "epoch": 0.47, "grad_norm": 1.9452005700958668, "learning_rate": 5.779958960033206e-06, "loss": 0.5232, "step": 6576 }, { "epoch": 0.47, "grad_norm": 2.066144954196438, "learning_rate": 5.778823840174604e-06, "loss": 0.5086, "step": 6577 }, { "epoch": 0.47, "grad_norm": 2.1290465068300346, "learning_rate": 5.777688679175887e-06, "loss": 0.5219, "step": 6578 }, { "epoch": 0.47, "grad_norm": 1.6174604035963174, "learning_rate": 5.776553477097019e-06, "loss": 0.5501, "step": 6579 }, { "epoch": 0.47, "grad_norm": 2.265118118215325, "learning_rate": 5.775418233997965e-06, "loss": 0.5377, "step": 6580 }, { "epoch": 0.47, "grad_norm": 0.7921459288201813, "learning_rate": 5.774282949938692e-06, "loss": 0.4668, "step": 6581 }, { "epoch": 0.47, "grad_norm": 1.7498145047295934, "learning_rate": 5.77314762497917e-06, "loss": 0.4956, "step": 6582 }, { "epoch": 0.47, "grad_norm": 1.7969535894835564, "learning_rate": 5.772012259179371e-06, "loss": 0.5483, "step": 6583 }, { "epoch": 0.47, "grad_norm": 0.7415886293354063, "learning_rate": 5.770876852599268e-06, "loss": 0.4276, "step": 6584 }, { "epoch": 0.47, "grad_norm": 2.004676155991473, "learning_rate": 5.769741405298838e-06, "loss": 0.5775, "step": 6585 }, { "epoch": 0.47, "grad_norm": 1.7890772872768979, "learning_rate": 5.76860591733806e-06, "loss": 0.5407, "step": 6586 }, { "epoch": 0.47, "grad_norm": 1.869542100299995, "learning_rate": 5.7674703887769105e-06, "loss": 0.553, "step": 6587 }, { "epoch": 0.47, "grad_norm": 1.6012212973383677, "learning_rate": 5.766334819675376e-06, "loss": 0.5932, "step": 6588 }, { "epoch": 0.47, "grad_norm": 9.575654640004853, "learning_rate": 5.765199210093439e-06, "loss": 0.5435, "step": 6589 }, { "epoch": 0.47, "grad_norm": 1.66334889412972, "learning_rate": 5.764063560091087e-06, "loss": 0.5184, "step": 6590 }, { "epoch": 0.47, "grad_norm": 0.8187527659932782, "learning_rate": 5.762927869728308e-06, "loss": 0.4767, "step": 6591 }, { "epoch": 0.47, "grad_norm": 2.0357744904535315, "learning_rate": 5.7617921390650945e-06, "loss": 0.53, "step": 6592 }, { "epoch": 0.47, "grad_norm": 1.9753597136221202, "learning_rate": 5.76065636816144e-06, "loss": 0.5282, "step": 6593 }, { "epoch": 0.47, "grad_norm": 2.3260688541496064, "learning_rate": 5.759520557077337e-06, "loss": 0.5609, "step": 6594 }, { "epoch": 0.47, "grad_norm": 1.494799226710269, "learning_rate": 5.758384705872786e-06, "loss": 0.497, "step": 6595 }, { "epoch": 0.47, "grad_norm": 1.5495709125023163, "learning_rate": 5.757248814607784e-06, "loss": 0.5311, "step": 6596 }, { "epoch": 0.47, "grad_norm": 1.5708119918853996, "learning_rate": 5.756112883342334e-06, "loss": 0.5853, "step": 6597 }, { "epoch": 0.47, "grad_norm": 2.2078653977089955, "learning_rate": 5.754976912136439e-06, "loss": 0.5919, "step": 6598 }, { "epoch": 0.47, "grad_norm": 1.5936759250261905, "learning_rate": 5.753840901050107e-06, "loss": 0.4539, "step": 6599 }, { "epoch": 0.47, "grad_norm": 1.7786963315152928, "learning_rate": 5.752704850143342e-06, "loss": 0.533, "step": 6600 }, { "epoch": 0.47, "grad_norm": 1.5168067489811323, "learning_rate": 5.7515687594761596e-06, "loss": 0.4708, "step": 6601 }, { "epoch": 0.47, "grad_norm": 1.5622053000885727, "learning_rate": 5.750432629108566e-06, "loss": 0.5198, "step": 6602 }, { "epoch": 0.47, "grad_norm": 1.745616218048588, "learning_rate": 5.749296459100579e-06, "loss": 0.5908, "step": 6603 }, { "epoch": 0.47, "grad_norm": 0.8534249230972306, "learning_rate": 5.748160249512212e-06, "loss": 0.4548, "step": 6604 }, { "epoch": 0.47, "grad_norm": 1.6624628453546941, "learning_rate": 5.747024000403488e-06, "loss": 0.528, "step": 6605 }, { "epoch": 0.47, "grad_norm": 2.13237973219662, "learning_rate": 5.7458877118344235e-06, "loss": 0.5474, "step": 6606 }, { "epoch": 0.47, "grad_norm": 1.544974271944382, "learning_rate": 5.744751383865043e-06, "loss": 0.5578, "step": 6607 }, { "epoch": 0.47, "grad_norm": 3.155690954607798, "learning_rate": 5.743615016555373e-06, "loss": 0.507, "step": 6608 }, { "epoch": 0.47, "grad_norm": 1.6088632694038116, "learning_rate": 5.742478609965435e-06, "loss": 0.549, "step": 6609 }, { "epoch": 0.47, "grad_norm": 1.612726715281011, "learning_rate": 5.741342164155263e-06, "loss": 0.4767, "step": 6610 }, { "epoch": 0.47, "grad_norm": 2.288136149941527, "learning_rate": 5.740205679184885e-06, "loss": 0.5241, "step": 6611 }, { "epoch": 0.47, "grad_norm": 1.7465710402057408, "learning_rate": 5.739069155114335e-06, "loss": 0.5036, "step": 6612 }, { "epoch": 0.47, "grad_norm": 2.958629777481797, "learning_rate": 5.7379325920036475e-06, "loss": 0.5413, "step": 6613 }, { "epoch": 0.47, "grad_norm": 1.6609878250657621, "learning_rate": 5.736795989912861e-06, "loss": 0.5238, "step": 6614 }, { "epoch": 0.47, "grad_norm": 2.0560669417258426, "learning_rate": 5.735659348902013e-06, "loss": 0.5869, "step": 6615 }, { "epoch": 0.47, "grad_norm": 1.532075424044709, "learning_rate": 5.734522669031146e-06, "loss": 0.4669, "step": 6616 }, { "epoch": 0.47, "grad_norm": 1.781752735091126, "learning_rate": 5.733385950360302e-06, "loss": 0.5607, "step": 6617 }, { "epoch": 0.47, "grad_norm": 1.6572238765680563, "learning_rate": 5.732249192949527e-06, "loss": 0.5169, "step": 6618 }, { "epoch": 0.47, "grad_norm": 2.1189460427642324, "learning_rate": 5.731112396858869e-06, "loss": 0.6012, "step": 6619 }, { "epoch": 0.47, "grad_norm": 1.7317856924250314, "learning_rate": 5.7299755621483776e-06, "loss": 0.564, "step": 6620 }, { "epoch": 0.47, "grad_norm": 1.9411395948074164, "learning_rate": 5.728838688878102e-06, "loss": 0.5716, "step": 6621 }, { "epoch": 0.47, "grad_norm": 1.6536454088791743, "learning_rate": 5.727701777108098e-06, "loss": 0.5846, "step": 6622 }, { "epoch": 0.47, "grad_norm": 2.0199823398020422, "learning_rate": 5.7265648268984195e-06, "loss": 0.5287, "step": 6623 }, { "epoch": 0.47, "grad_norm": 1.875344510428827, "learning_rate": 5.725427838309125e-06, "loss": 0.6027, "step": 6624 }, { "epoch": 0.47, "grad_norm": 1.8531987267296437, "learning_rate": 5.724290811400276e-06, "loss": 0.5658, "step": 6625 }, { "epoch": 0.47, "grad_norm": 1.6631067710312177, "learning_rate": 5.7231537462319306e-06, "loss": 0.5973, "step": 6626 }, { "epoch": 0.47, "grad_norm": 1.4982126379101766, "learning_rate": 5.722016642864154e-06, "loss": 0.5478, "step": 6627 }, { "epoch": 0.47, "grad_norm": 1.5298938964956168, "learning_rate": 5.720879501357011e-06, "loss": 0.5098, "step": 6628 }, { "epoch": 0.47, "grad_norm": 1.5479555871060233, "learning_rate": 5.7197423217705714e-06, "loss": 0.6086, "step": 6629 }, { "epoch": 0.47, "grad_norm": 1.745614172295436, "learning_rate": 5.7186051041649026e-06, "loss": 0.5309, "step": 6630 }, { "epoch": 0.47, "grad_norm": 1.5024643555518982, "learning_rate": 5.717467848600078e-06, "loss": 0.5176, "step": 6631 }, { "epoch": 0.47, "grad_norm": 1.6087249159044514, "learning_rate": 5.7163305551361705e-06, "loss": 0.556, "step": 6632 }, { "epoch": 0.47, "grad_norm": 1.578641439905476, "learning_rate": 5.7151932238332556e-06, "loss": 0.5562, "step": 6633 }, { "epoch": 0.47, "grad_norm": 2.28683546376332, "learning_rate": 5.71405585475141e-06, "loss": 0.5538, "step": 6634 }, { "epoch": 0.47, "grad_norm": 1.7231499120955505, "learning_rate": 5.712918447950716e-06, "loss": 0.5842, "step": 6635 }, { "epoch": 0.47, "grad_norm": 1.4754039808961341, "learning_rate": 5.711781003491254e-06, "loss": 0.4663, "step": 6636 }, { "epoch": 0.47, "grad_norm": 1.9479050511366132, "learning_rate": 5.710643521433107e-06, "loss": 0.4729, "step": 6637 }, { "epoch": 0.47, "grad_norm": 1.735796109039984, "learning_rate": 5.709506001836361e-06, "loss": 0.5188, "step": 6638 }, { "epoch": 0.47, "grad_norm": 1.7540777761182584, "learning_rate": 5.708368444761105e-06, "loss": 0.6254, "step": 6639 }, { "epoch": 0.47, "grad_norm": 0.7520759847319023, "learning_rate": 5.707230850267427e-06, "loss": 0.4399, "step": 6640 }, { "epoch": 0.47, "grad_norm": 1.5699159567040197, "learning_rate": 5.706093218415418e-06, "loss": 0.5632, "step": 6641 }, { "epoch": 0.47, "grad_norm": 1.5737620587970302, "learning_rate": 5.704955549265173e-06, "loss": 0.5212, "step": 6642 }, { "epoch": 0.47, "grad_norm": 1.5102354240979676, "learning_rate": 5.703817842876786e-06, "loss": 0.5176, "step": 6643 }, { "epoch": 0.47, "grad_norm": 1.550836481535434, "learning_rate": 5.702680099310359e-06, "loss": 0.531, "step": 6644 }, { "epoch": 0.47, "grad_norm": 1.797260942151853, "learning_rate": 5.701542318625985e-06, "loss": 0.5102, "step": 6645 }, { "epoch": 0.47, "grad_norm": 1.7630394726064567, "learning_rate": 5.700404500883772e-06, "loss": 0.548, "step": 6646 }, { "epoch": 0.47, "grad_norm": 2.5566499576517447, "learning_rate": 5.699266646143817e-06, "loss": 0.53, "step": 6647 }, { "epoch": 0.47, "grad_norm": 1.476067404257678, "learning_rate": 5.6981287544662286e-06, "loss": 0.5643, "step": 6648 }, { "epoch": 0.47, "grad_norm": 3.980127810229455, "learning_rate": 5.696990825911114e-06, "loss": 0.5678, "step": 6649 }, { "epoch": 0.47, "grad_norm": 1.6917379651067455, "learning_rate": 5.695852860538585e-06, "loss": 0.5347, "step": 6650 }, { "epoch": 0.47, "grad_norm": 1.8662038937587992, "learning_rate": 5.694714858408746e-06, "loss": 0.5441, "step": 6651 }, { "epoch": 0.47, "grad_norm": 1.7485397760104502, "learning_rate": 5.693576819581717e-06, "loss": 0.4951, "step": 6652 }, { "epoch": 0.47, "grad_norm": 1.5040995838627809, "learning_rate": 5.692438744117609e-06, "loss": 0.526, "step": 6653 }, { "epoch": 0.47, "grad_norm": 1.6959696372402153, "learning_rate": 5.691300632076541e-06, "loss": 0.5168, "step": 6654 }, { "epoch": 0.47, "grad_norm": 0.7247403248877953, "learning_rate": 5.6901624835186295e-06, "loss": 0.4757, "step": 6655 }, { "epoch": 0.47, "grad_norm": 1.548243095282225, "learning_rate": 5.689024298503996e-06, "loss": 0.5224, "step": 6656 }, { "epoch": 0.47, "grad_norm": 1.700848735867785, "learning_rate": 5.687886077092768e-06, "loss": 0.5178, "step": 6657 }, { "epoch": 0.47, "grad_norm": 0.7672379756173603, "learning_rate": 5.686747819345064e-06, "loss": 0.4344, "step": 6658 }, { "epoch": 0.47, "grad_norm": 1.9156117373252597, "learning_rate": 5.685609525321015e-06, "loss": 0.4879, "step": 6659 }, { "epoch": 0.47, "grad_norm": 2.1940886734271228, "learning_rate": 5.684471195080746e-06, "loss": 0.5469, "step": 6660 }, { "epoch": 0.47, "grad_norm": 1.7734389924979084, "learning_rate": 5.683332828684391e-06, "loss": 0.5569, "step": 6661 }, { "epoch": 0.47, "grad_norm": 1.8535064391867826, "learning_rate": 5.6821944261920794e-06, "loss": 0.5555, "step": 6662 }, { "epoch": 0.47, "grad_norm": 2.054237461112791, "learning_rate": 5.681055987663946e-06, "loss": 0.5164, "step": 6663 }, { "epoch": 0.47, "grad_norm": 1.6101718658838793, "learning_rate": 5.679917513160128e-06, "loss": 0.563, "step": 6664 }, { "epoch": 0.47, "grad_norm": 0.7428838528857571, "learning_rate": 5.678779002740764e-06, "loss": 0.4424, "step": 6665 }, { "epoch": 0.47, "grad_norm": 2.000729852809355, "learning_rate": 5.677640456465992e-06, "loss": 0.5954, "step": 6666 }, { "epoch": 0.47, "grad_norm": 1.6723443851362405, "learning_rate": 5.676501874395956e-06, "loss": 0.4885, "step": 6667 }, { "epoch": 0.47, "grad_norm": 1.8813192666603997, "learning_rate": 5.675363256590798e-06, "loss": 0.5199, "step": 6668 }, { "epoch": 0.47, "grad_norm": 1.5842149677278634, "learning_rate": 5.674224603110665e-06, "loss": 0.4859, "step": 6669 }, { "epoch": 0.47, "grad_norm": 2.1258460391581875, "learning_rate": 5.673085914015701e-06, "loss": 0.579, "step": 6670 }, { "epoch": 0.47, "grad_norm": 2.071915758261809, "learning_rate": 5.671947189366061e-06, "loss": 0.5326, "step": 6671 }, { "epoch": 0.47, "grad_norm": 1.7858896533764734, "learning_rate": 5.670808429221894e-06, "loss": 0.5025, "step": 6672 }, { "epoch": 0.47, "grad_norm": 1.630768100685377, "learning_rate": 5.669669633643351e-06, "loss": 0.5326, "step": 6673 }, { "epoch": 0.47, "grad_norm": 0.8313405851425825, "learning_rate": 5.668530802690591e-06, "loss": 0.4628, "step": 6674 }, { "epoch": 0.47, "grad_norm": 1.8188713082885533, "learning_rate": 5.667391936423767e-06, "loss": 0.6095, "step": 6675 }, { "epoch": 0.47, "grad_norm": 0.7410904978841278, "learning_rate": 5.6662530349030396e-06, "loss": 0.4404, "step": 6676 }, { "epoch": 0.47, "grad_norm": 0.6797022392886681, "learning_rate": 5.66511409818857e-06, "loss": 0.4485, "step": 6677 }, { "epoch": 0.47, "grad_norm": 1.6991112340208845, "learning_rate": 5.663975126340519e-06, "loss": 0.5114, "step": 6678 }, { "epoch": 0.47, "grad_norm": 1.4949186231886575, "learning_rate": 5.662836119419053e-06, "loss": 0.5263, "step": 6679 }, { "epoch": 0.47, "grad_norm": 1.5968698878481151, "learning_rate": 5.6616970774843375e-06, "loss": 0.4368, "step": 6680 }, { "epoch": 0.47, "grad_norm": 1.8765070184513248, "learning_rate": 5.660558000596539e-06, "loss": 0.5802, "step": 6681 }, { "epoch": 0.47, "grad_norm": 1.598715098987565, "learning_rate": 5.659418888815829e-06, "loss": 0.5267, "step": 6682 }, { "epoch": 0.47, "grad_norm": 1.7994963055067141, "learning_rate": 5.658279742202379e-06, "loss": 0.5439, "step": 6683 }, { "epoch": 0.47, "grad_norm": 0.8434310508210648, "learning_rate": 5.657140560816362e-06, "loss": 0.4575, "step": 6684 }, { "epoch": 0.47, "grad_norm": 2.471254865073264, "learning_rate": 5.656001344717954e-06, "loss": 0.6009, "step": 6685 }, { "epoch": 0.47, "grad_norm": 1.7711996708237348, "learning_rate": 5.654862093967334e-06, "loss": 0.5492, "step": 6686 }, { "epoch": 0.47, "grad_norm": 1.71597050170929, "learning_rate": 5.653722808624678e-06, "loss": 0.6096, "step": 6687 }, { "epoch": 0.47, "grad_norm": 2.5364878131373056, "learning_rate": 5.652583488750166e-06, "loss": 0.5212, "step": 6688 }, { "epoch": 0.47, "grad_norm": 1.8745468182164633, "learning_rate": 5.6514441344039864e-06, "loss": 0.5863, "step": 6689 }, { "epoch": 0.47, "grad_norm": 2.8602776126487477, "learning_rate": 5.650304745646318e-06, "loss": 0.6187, "step": 6690 }, { "epoch": 0.47, "grad_norm": 1.3739225366926313, "learning_rate": 5.649165322537351e-06, "loss": 0.5463, "step": 6691 }, { "epoch": 0.47, "grad_norm": 1.8024861603560671, "learning_rate": 5.64802586513727e-06, "loss": 0.5755, "step": 6692 }, { "epoch": 0.47, "grad_norm": 1.5211624614073798, "learning_rate": 5.646886373506267e-06, "loss": 0.5807, "step": 6693 }, { "epoch": 0.48, "grad_norm": 1.4775770558716181, "learning_rate": 5.645746847704535e-06, "loss": 0.4892, "step": 6694 }, { "epoch": 0.48, "grad_norm": 1.948368889162901, "learning_rate": 5.644607287792267e-06, "loss": 0.5447, "step": 6695 }, { "epoch": 0.48, "grad_norm": 1.451366694437685, "learning_rate": 5.643467693829656e-06, "loss": 0.5134, "step": 6696 }, { "epoch": 0.48, "grad_norm": 1.562089884567449, "learning_rate": 5.642328065876903e-06, "loss": 0.5572, "step": 6697 }, { "epoch": 0.48, "grad_norm": 1.643358385293917, "learning_rate": 5.641188403994205e-06, "loss": 0.5395, "step": 6698 }, { "epoch": 0.48, "grad_norm": 1.526710648041879, "learning_rate": 5.640048708241761e-06, "loss": 0.5506, "step": 6699 }, { "epoch": 0.48, "grad_norm": 1.4821416483061174, "learning_rate": 5.638908978679776e-06, "loss": 0.5356, "step": 6700 }, { "epoch": 0.48, "grad_norm": 1.7516470699889475, "learning_rate": 5.6377692153684545e-06, "loss": 0.5504, "step": 6701 }, { "epoch": 0.48, "grad_norm": 1.661343665489507, "learning_rate": 5.636629418368001e-06, "loss": 0.5706, "step": 6702 }, { "epoch": 0.48, "grad_norm": 1.81514120744629, "learning_rate": 5.635489587738626e-06, "loss": 0.5577, "step": 6703 }, { "epoch": 0.48, "grad_norm": 0.7722265479031758, "learning_rate": 5.634349723540536e-06, "loss": 0.4694, "step": 6704 }, { "epoch": 0.48, "grad_norm": 1.7800201203746095, "learning_rate": 5.6332098258339455e-06, "loss": 0.5434, "step": 6705 }, { "epoch": 0.48, "grad_norm": 3.495232116267956, "learning_rate": 5.632069894679066e-06, "loss": 0.5621, "step": 6706 }, { "epoch": 0.48, "grad_norm": 1.5695256463797158, "learning_rate": 5.630929930136113e-06, "loss": 0.5224, "step": 6707 }, { "epoch": 0.48, "grad_norm": 2.003190247778446, "learning_rate": 5.629789932265303e-06, "loss": 0.5658, "step": 6708 }, { "epoch": 0.48, "grad_norm": 1.4172815957643492, "learning_rate": 5.628649901126854e-06, "loss": 0.5272, "step": 6709 }, { "epoch": 0.48, "grad_norm": 1.5351502237291679, "learning_rate": 5.627509836780988e-06, "loss": 0.5586, "step": 6710 }, { "epoch": 0.48, "grad_norm": 1.9028130184144265, "learning_rate": 5.626369739287926e-06, "loss": 0.5328, "step": 6711 }, { "epoch": 0.48, "grad_norm": 1.6026336373450234, "learning_rate": 5.6252296087078915e-06, "loss": 0.5241, "step": 6712 }, { "epoch": 0.48, "grad_norm": 1.4796314558332737, "learning_rate": 5.624089445101111e-06, "loss": 0.4669, "step": 6713 }, { "epoch": 0.48, "grad_norm": 1.634657431186811, "learning_rate": 5.6229492485278115e-06, "loss": 0.6167, "step": 6714 }, { "epoch": 0.48, "grad_norm": 1.9248752977447796, "learning_rate": 5.6218090190482215e-06, "loss": 0.554, "step": 6715 }, { "epoch": 0.48, "grad_norm": 1.5093216633969484, "learning_rate": 5.620668756722572e-06, "loss": 0.6172, "step": 6716 }, { "epoch": 0.48, "grad_norm": 1.7937289011555313, "learning_rate": 5.619528461611095e-06, "loss": 0.5091, "step": 6717 }, { "epoch": 0.48, "grad_norm": 1.9766599509201979, "learning_rate": 5.6183881337740275e-06, "loss": 0.5379, "step": 6718 }, { "epoch": 0.48, "grad_norm": 1.7876569669256506, "learning_rate": 5.617247773271602e-06, "loss": 0.5427, "step": 6719 }, { "epoch": 0.48, "grad_norm": 1.7822541035915287, "learning_rate": 5.616107380164056e-06, "loss": 0.5211, "step": 6720 }, { "epoch": 0.48, "grad_norm": 2.226603301310994, "learning_rate": 5.614966954511634e-06, "loss": 0.6159, "step": 6721 }, { "epoch": 0.48, "grad_norm": 1.6247178116912122, "learning_rate": 5.613826496374571e-06, "loss": 0.5106, "step": 6722 }, { "epoch": 0.48, "grad_norm": 1.5369539080308863, "learning_rate": 5.612686005813113e-06, "loss": 0.5685, "step": 6723 }, { "epoch": 0.48, "grad_norm": 0.8021381772109202, "learning_rate": 5.611545482887504e-06, "loss": 0.4405, "step": 6724 }, { "epoch": 0.48, "grad_norm": 2.1938146792378173, "learning_rate": 5.610404927657992e-06, "loss": 0.5226, "step": 6725 }, { "epoch": 0.48, "grad_norm": 1.9030832343107824, "learning_rate": 5.609264340184822e-06, "loss": 0.5453, "step": 6726 }, { "epoch": 0.48, "grad_norm": 1.7832316370095276, "learning_rate": 5.6081237205282455e-06, "loss": 0.5148, "step": 6727 }, { "epoch": 0.48, "grad_norm": 1.902862943683918, "learning_rate": 5.6069830687485135e-06, "loss": 0.5383, "step": 6728 }, { "epoch": 0.48, "grad_norm": 1.6308617272819366, "learning_rate": 5.605842384905878e-06, "loss": 0.5673, "step": 6729 }, { "epoch": 0.48, "grad_norm": 1.8887258072034738, "learning_rate": 5.604701669060596e-06, "loss": 0.5082, "step": 6730 }, { "epoch": 0.48, "grad_norm": 1.6925602926813255, "learning_rate": 5.603560921272923e-06, "loss": 0.5271, "step": 6731 }, { "epoch": 0.48, "grad_norm": 2.233038556947561, "learning_rate": 5.602420141603116e-06, "loss": 0.6031, "step": 6732 }, { "epoch": 0.48, "grad_norm": 1.7498356445548902, "learning_rate": 5.601279330111438e-06, "loss": 0.547, "step": 6733 }, { "epoch": 0.48, "grad_norm": 1.5183404627740058, "learning_rate": 5.600138486858147e-06, "loss": 0.5039, "step": 6734 }, { "epoch": 0.48, "grad_norm": 1.8864256386492217, "learning_rate": 5.598997611903507e-06, "loss": 0.6072, "step": 6735 }, { "epoch": 0.48, "grad_norm": 1.7563920701860751, "learning_rate": 5.597856705307782e-06, "loss": 0.5296, "step": 6736 }, { "epoch": 0.48, "grad_norm": 4.9910982831742405, "learning_rate": 5.596715767131242e-06, "loss": 0.5293, "step": 6737 }, { "epoch": 0.48, "grad_norm": 1.6921088304563325, "learning_rate": 5.595574797434154e-06, "loss": 0.6174, "step": 6738 }, { "epoch": 0.48, "grad_norm": 1.6876155591700581, "learning_rate": 5.594433796276786e-06, "loss": 0.4984, "step": 6739 }, { "epoch": 0.48, "grad_norm": 2.343848982639149, "learning_rate": 5.593292763719411e-06, "loss": 0.4824, "step": 6740 }, { "epoch": 0.48, "grad_norm": 1.6408076631830786, "learning_rate": 5.592151699822301e-06, "loss": 0.5104, "step": 6741 }, { "epoch": 0.48, "grad_norm": 1.7727616538646864, "learning_rate": 5.591010604645733e-06, "loss": 0.5472, "step": 6742 }, { "epoch": 0.48, "grad_norm": 1.6048311605043697, "learning_rate": 5.589869478249983e-06, "loss": 0.5722, "step": 6743 }, { "epoch": 0.48, "grad_norm": 1.7017395453743611, "learning_rate": 5.588728320695326e-06, "loss": 0.6008, "step": 6744 }, { "epoch": 0.48, "grad_norm": 1.6883454991776168, "learning_rate": 5.587587132042045e-06, "loss": 0.5387, "step": 6745 }, { "epoch": 0.48, "grad_norm": 0.7678308716911999, "learning_rate": 5.586445912350422e-06, "loss": 0.4691, "step": 6746 }, { "epoch": 0.48, "grad_norm": 1.834197528848065, "learning_rate": 5.585304661680737e-06, "loss": 0.5729, "step": 6747 }, { "epoch": 0.48, "grad_norm": 1.5884467802179805, "learning_rate": 5.584163380093278e-06, "loss": 0.4832, "step": 6748 }, { "epoch": 0.48, "grad_norm": 0.7336702916786736, "learning_rate": 5.5830220676483295e-06, "loss": 0.4558, "step": 6749 }, { "epoch": 0.48, "grad_norm": 1.595138903239873, "learning_rate": 5.581880724406179e-06, "loss": 0.5894, "step": 6750 }, { "epoch": 0.48, "grad_norm": 1.7617260242785975, "learning_rate": 5.580739350427117e-06, "loss": 0.5848, "step": 6751 }, { "epoch": 0.48, "grad_norm": 1.62754895196623, "learning_rate": 5.579597945771435e-06, "loss": 0.5621, "step": 6752 }, { "epoch": 0.48, "grad_norm": 1.6425514376228831, "learning_rate": 5.578456510499426e-06, "loss": 0.5657, "step": 6753 }, { "epoch": 0.48, "grad_norm": 1.8205366874940738, "learning_rate": 5.577315044671383e-06, "loss": 0.4811, "step": 6754 }, { "epoch": 0.48, "grad_norm": 1.796036794777471, "learning_rate": 5.576173548347604e-06, "loss": 0.5634, "step": 6755 }, { "epoch": 0.48, "grad_norm": 4.822329382621104, "learning_rate": 5.5750320215883855e-06, "loss": 0.5198, "step": 6756 }, { "epoch": 0.48, "grad_norm": 1.6373306573904345, "learning_rate": 5.5738904644540285e-06, "loss": 0.6103, "step": 6757 }, { "epoch": 0.48, "grad_norm": 1.416933995385322, "learning_rate": 5.572748877004831e-06, "loss": 0.557, "step": 6758 }, { "epoch": 0.48, "grad_norm": 1.722279640683044, "learning_rate": 5.571607259301097e-06, "loss": 0.5793, "step": 6759 }, { "epoch": 0.48, "grad_norm": 1.4293868683625621, "learning_rate": 5.570465611403131e-06, "loss": 0.5161, "step": 6760 }, { "epoch": 0.48, "grad_norm": 1.7016478609618106, "learning_rate": 5.569323933371239e-06, "loss": 0.5731, "step": 6761 }, { "epoch": 0.48, "grad_norm": 1.869296411801194, "learning_rate": 5.568182225265727e-06, "loss": 0.549, "step": 6762 }, { "epoch": 0.48, "grad_norm": 0.707212949528794, "learning_rate": 5.567040487146905e-06, "loss": 0.4528, "step": 6763 }, { "epoch": 0.48, "grad_norm": 2.04319955771742, "learning_rate": 5.565898719075083e-06, "loss": 0.5866, "step": 6764 }, { "epoch": 0.48, "grad_norm": 1.5474852511178936, "learning_rate": 5.564756921110572e-06, "loss": 0.528, "step": 6765 }, { "epoch": 0.48, "grad_norm": 1.665021536094839, "learning_rate": 5.563615093313688e-06, "loss": 0.5528, "step": 6766 }, { "epoch": 0.48, "grad_norm": 1.6935142618410886, "learning_rate": 5.562473235744745e-06, "loss": 0.5782, "step": 6767 }, { "epoch": 0.48, "grad_norm": 1.8740807746791552, "learning_rate": 5.56133134846406e-06, "loss": 0.5431, "step": 6768 }, { "epoch": 0.48, "grad_norm": 1.5943006582090593, "learning_rate": 5.56018943153195e-06, "loss": 0.5007, "step": 6769 }, { "epoch": 0.48, "grad_norm": 1.4988623488863015, "learning_rate": 5.559047485008737e-06, "loss": 0.5419, "step": 6770 }, { "epoch": 0.48, "grad_norm": 1.5693618060041399, "learning_rate": 5.5579055089547415e-06, "loss": 0.581, "step": 6771 }, { "epoch": 0.48, "grad_norm": 1.6730799642614054, "learning_rate": 5.556763503430287e-06, "loss": 0.5033, "step": 6772 }, { "epoch": 0.48, "grad_norm": 1.498046471523589, "learning_rate": 5.5556214684956966e-06, "loss": 0.4576, "step": 6773 }, { "epoch": 0.48, "grad_norm": 1.9530842819853285, "learning_rate": 5.5544794042112985e-06, "loss": 0.5607, "step": 6774 }, { "epoch": 0.48, "grad_norm": 2.292845483940485, "learning_rate": 5.5533373106374176e-06, "loss": 0.5994, "step": 6775 }, { "epoch": 0.48, "grad_norm": 1.8969872555806475, "learning_rate": 5.552195187834387e-06, "loss": 0.5815, "step": 6776 }, { "epoch": 0.48, "grad_norm": 1.6811661008158028, "learning_rate": 5.551053035862535e-06, "loss": 0.5071, "step": 6777 }, { "epoch": 0.48, "grad_norm": 1.8156736446530535, "learning_rate": 5.549910854782195e-06, "loss": 0.5898, "step": 6778 }, { "epoch": 0.48, "grad_norm": 1.7075229013231694, "learning_rate": 5.548768644653699e-06, "loss": 0.5374, "step": 6779 }, { "epoch": 0.48, "grad_norm": 1.6251682128346123, "learning_rate": 5.547626405537384e-06, "loss": 0.5579, "step": 6780 }, { "epoch": 0.48, "grad_norm": 1.5516764058741341, "learning_rate": 5.546484137493586e-06, "loss": 0.5812, "step": 6781 }, { "epoch": 0.48, "grad_norm": 2.1734460812534584, "learning_rate": 5.545341840582645e-06, "loss": 0.5461, "step": 6782 }, { "epoch": 0.48, "grad_norm": 1.887477114593371, "learning_rate": 5.544199514864901e-06, "loss": 0.4825, "step": 6783 }, { "epoch": 0.48, "grad_norm": 2.203025909311198, "learning_rate": 5.543057160400693e-06, "loss": 0.53, "step": 6784 }, { "epoch": 0.48, "grad_norm": 1.6514827601645161, "learning_rate": 5.541914777250367e-06, "loss": 0.5202, "step": 6785 }, { "epoch": 0.48, "grad_norm": 0.8650873848282368, "learning_rate": 5.540772365474265e-06, "loss": 0.4424, "step": 6786 }, { "epoch": 0.48, "grad_norm": 1.5945055039627944, "learning_rate": 5.5396299251327355e-06, "loss": 0.5389, "step": 6787 }, { "epoch": 0.48, "grad_norm": 1.7700858582581704, "learning_rate": 5.538487456286123e-06, "loss": 0.5853, "step": 6788 }, { "epoch": 0.48, "grad_norm": 1.7301410694556476, "learning_rate": 5.53734495899478e-06, "loss": 0.614, "step": 6789 }, { "epoch": 0.48, "grad_norm": 1.9595533560651899, "learning_rate": 5.536202433319055e-06, "loss": 0.6245, "step": 6790 }, { "epoch": 0.48, "grad_norm": 1.935518135846989, "learning_rate": 5.535059879319301e-06, "loss": 0.5453, "step": 6791 }, { "epoch": 0.48, "grad_norm": 1.5655703112249182, "learning_rate": 5.533917297055871e-06, "loss": 0.5285, "step": 6792 }, { "epoch": 0.48, "grad_norm": 1.9802205881281012, "learning_rate": 5.53277468658912e-06, "loss": 0.5937, "step": 6793 }, { "epoch": 0.48, "grad_norm": 1.8127084526071489, "learning_rate": 5.531632047979405e-06, "loss": 0.5657, "step": 6794 }, { "epoch": 0.48, "grad_norm": 2.4063874677030097, "learning_rate": 5.530489381287083e-06, "loss": 0.5621, "step": 6795 }, { "epoch": 0.48, "grad_norm": 1.6494133433579787, "learning_rate": 5.5293466865725145e-06, "loss": 0.5147, "step": 6796 }, { "epoch": 0.48, "grad_norm": 1.56398788795143, "learning_rate": 5.528203963896062e-06, "loss": 0.4594, "step": 6797 }, { "epoch": 0.48, "grad_norm": 1.6814529419823439, "learning_rate": 5.527061213318084e-06, "loss": 0.561, "step": 6798 }, { "epoch": 0.48, "grad_norm": 1.404081593739727, "learning_rate": 5.525918434898949e-06, "loss": 0.4557, "step": 6799 }, { "epoch": 0.48, "grad_norm": 1.7510986808564242, "learning_rate": 5.524775628699018e-06, "loss": 0.5587, "step": 6800 }, { "epoch": 0.48, "grad_norm": 1.675060987465966, "learning_rate": 5.523632794778661e-06, "loss": 0.5344, "step": 6801 }, { "epoch": 0.48, "grad_norm": 2.5897658033206574, "learning_rate": 5.522489933198246e-06, "loss": 0.6444, "step": 6802 }, { "epoch": 0.48, "grad_norm": 1.6794618340678447, "learning_rate": 5.521347044018142e-06, "loss": 0.5263, "step": 6803 }, { "epoch": 0.48, "grad_norm": 0.7831654878341199, "learning_rate": 5.520204127298721e-06, "loss": 0.4393, "step": 6804 }, { "epoch": 0.48, "grad_norm": 1.7286817537015349, "learning_rate": 5.519061183100354e-06, "loss": 0.4908, "step": 6805 }, { "epoch": 0.48, "grad_norm": 1.7763016868135992, "learning_rate": 5.517918211483418e-06, "loss": 0.5308, "step": 6806 }, { "epoch": 0.48, "grad_norm": 1.6141177023558428, "learning_rate": 5.516775212508286e-06, "loss": 0.5285, "step": 6807 }, { "epoch": 0.48, "grad_norm": 1.9861679514805537, "learning_rate": 5.515632186235338e-06, "loss": 0.5492, "step": 6808 }, { "epoch": 0.48, "grad_norm": 1.5193170720818678, "learning_rate": 5.514489132724949e-06, "loss": 0.4893, "step": 6809 }, { "epoch": 0.48, "grad_norm": 1.5584436058190756, "learning_rate": 5.513346052037501e-06, "loss": 0.5561, "step": 6810 }, { "epoch": 0.48, "grad_norm": 1.6889309523545417, "learning_rate": 5.512202944233374e-06, "loss": 0.5435, "step": 6811 }, { "epoch": 0.48, "grad_norm": 2.642698019431285, "learning_rate": 5.5110598093729535e-06, "loss": 0.5387, "step": 6812 }, { "epoch": 0.48, "grad_norm": 1.5483372768789536, "learning_rate": 5.509916647516622e-06, "loss": 0.517, "step": 6813 }, { "epoch": 0.48, "grad_norm": 1.5550859322787332, "learning_rate": 5.508773458724765e-06, "loss": 0.5804, "step": 6814 }, { "epoch": 0.48, "grad_norm": 1.926270174379352, "learning_rate": 5.50763024305777e-06, "loss": 0.5633, "step": 6815 }, { "epoch": 0.48, "grad_norm": 1.9076136517790978, "learning_rate": 5.506487000576025e-06, "loss": 0.4798, "step": 6816 }, { "epoch": 0.48, "grad_norm": 1.7241485741579365, "learning_rate": 5.50534373133992e-06, "loss": 0.4936, "step": 6817 }, { "epoch": 0.48, "grad_norm": 0.7574308787698483, "learning_rate": 5.504200435409848e-06, "loss": 0.4542, "step": 6818 }, { "epoch": 0.48, "grad_norm": 2.1523603404380585, "learning_rate": 5.5030571128462005e-06, "loss": 0.5958, "step": 6819 }, { "epoch": 0.48, "grad_norm": 1.66661416996218, "learning_rate": 5.501913763709371e-06, "loss": 0.487, "step": 6820 }, { "epoch": 0.48, "grad_norm": 1.9886661330289361, "learning_rate": 5.500770388059757e-06, "loss": 0.5591, "step": 6821 }, { "epoch": 0.48, "grad_norm": 0.7248487044352029, "learning_rate": 5.4996269859577535e-06, "loss": 0.436, "step": 6822 }, { "epoch": 0.48, "grad_norm": 1.727058386164912, "learning_rate": 5.498483557463761e-06, "loss": 0.5108, "step": 6823 }, { "epoch": 0.48, "grad_norm": 2.164327040374841, "learning_rate": 5.497340102638176e-06, "loss": 0.5012, "step": 6824 }, { "epoch": 0.48, "grad_norm": 1.8176847394409723, "learning_rate": 5.496196621541401e-06, "loss": 0.5828, "step": 6825 }, { "epoch": 0.48, "grad_norm": 1.7208381364463068, "learning_rate": 5.4950531142338405e-06, "loss": 0.545, "step": 6826 }, { "epoch": 0.48, "grad_norm": 2.0650721285234614, "learning_rate": 5.493909580775897e-06, "loss": 0.5334, "step": 6827 }, { "epoch": 0.48, "grad_norm": 0.7366063123848869, "learning_rate": 5.492766021227975e-06, "loss": 0.4569, "step": 6828 }, { "epoch": 0.48, "grad_norm": 1.6014310421100373, "learning_rate": 5.4916224356504834e-06, "loss": 0.5205, "step": 6829 }, { "epoch": 0.48, "grad_norm": 1.6981644786476937, "learning_rate": 5.490478824103827e-06, "loss": 0.5679, "step": 6830 }, { "epoch": 0.48, "grad_norm": 1.528255920609519, "learning_rate": 5.489335186648419e-06, "loss": 0.511, "step": 6831 }, { "epoch": 0.48, "grad_norm": 0.8342447215443964, "learning_rate": 5.488191523344667e-06, "loss": 0.4472, "step": 6832 }, { "epoch": 0.48, "grad_norm": 1.6505220972720764, "learning_rate": 5.487047834252984e-06, "loss": 0.5732, "step": 6833 }, { "epoch": 0.48, "grad_norm": 1.659439068002309, "learning_rate": 5.4859041194337856e-06, "loss": 0.4995, "step": 6834 }, { "epoch": 0.49, "grad_norm": 1.6339104751416502, "learning_rate": 5.484760378947485e-06, "loss": 0.5697, "step": 6835 }, { "epoch": 0.49, "grad_norm": 2.5667646386805973, "learning_rate": 5.483616612854499e-06, "loss": 0.6024, "step": 6836 }, { "epoch": 0.49, "grad_norm": 1.7215080676345558, "learning_rate": 5.482472821215244e-06, "loss": 0.5603, "step": 6837 }, { "epoch": 0.49, "grad_norm": 5.462486626274449, "learning_rate": 5.4813290040901405e-06, "loss": 0.6044, "step": 6838 }, { "epoch": 0.49, "grad_norm": 1.6376550408436865, "learning_rate": 5.480185161539606e-06, "loss": 0.5236, "step": 6839 }, { "epoch": 0.49, "grad_norm": 1.821669736021595, "learning_rate": 5.479041293624065e-06, "loss": 0.5524, "step": 6840 }, { "epoch": 0.49, "grad_norm": 1.5989907233963154, "learning_rate": 5.477897400403941e-06, "loss": 0.582, "step": 6841 }, { "epoch": 0.49, "grad_norm": 1.6768940150292562, "learning_rate": 5.476753481939656e-06, "loss": 0.5528, "step": 6842 }, { "epoch": 0.49, "grad_norm": 1.6500073827380295, "learning_rate": 5.475609538291637e-06, "loss": 0.5017, "step": 6843 }, { "epoch": 0.49, "grad_norm": 1.715070039505323, "learning_rate": 5.474465569520311e-06, "loss": 0.5993, "step": 6844 }, { "epoch": 0.49, "grad_norm": 1.8973153109474683, "learning_rate": 5.473321575686105e-06, "loss": 0.5758, "step": 6845 }, { "epoch": 0.49, "grad_norm": 2.042830409711636, "learning_rate": 5.47217755684945e-06, "loss": 0.5395, "step": 6846 }, { "epoch": 0.49, "grad_norm": 1.6009687100485974, "learning_rate": 5.471033513070776e-06, "loss": 0.5476, "step": 6847 }, { "epoch": 0.49, "grad_norm": 1.830977712380756, "learning_rate": 5.4698894444105155e-06, "loss": 0.4873, "step": 6848 }, { "epoch": 0.49, "grad_norm": 1.8412166568024568, "learning_rate": 5.468745350929103e-06, "loss": 0.634, "step": 6849 }, { "epoch": 0.49, "grad_norm": 1.7589456382081645, "learning_rate": 5.467601232686972e-06, "loss": 0.5713, "step": 6850 }, { "epoch": 0.49, "grad_norm": 1.447611218930729, "learning_rate": 5.466457089744561e-06, "loss": 0.5307, "step": 6851 }, { "epoch": 0.49, "grad_norm": 1.7536273806914817, "learning_rate": 5.465312922162304e-06, "loss": 0.524, "step": 6852 }, { "epoch": 0.49, "grad_norm": 1.9519312569900091, "learning_rate": 5.4641687300006454e-06, "loss": 0.5234, "step": 6853 }, { "epoch": 0.49, "grad_norm": 1.7592312729176955, "learning_rate": 5.463024513320018e-06, "loss": 0.6085, "step": 6854 }, { "epoch": 0.49, "grad_norm": 1.5173810980812892, "learning_rate": 5.4618802721808676e-06, "loss": 0.5343, "step": 6855 }, { "epoch": 0.49, "grad_norm": 4.6571551068423975, "learning_rate": 5.460736006643636e-06, "loss": 0.5115, "step": 6856 }, { "epoch": 0.49, "grad_norm": 2.457131983281772, "learning_rate": 5.459591716768768e-06, "loss": 0.5166, "step": 6857 }, { "epoch": 0.49, "grad_norm": 1.6561383961904648, "learning_rate": 5.4584474026167085e-06, "loss": 0.6072, "step": 6858 }, { "epoch": 0.49, "grad_norm": 0.7650884481059345, "learning_rate": 5.457303064247904e-06, "loss": 0.4494, "step": 6859 }, { "epoch": 0.49, "grad_norm": 1.947627716336281, "learning_rate": 5.4561587017228016e-06, "loss": 0.5563, "step": 6860 }, { "epoch": 0.49, "grad_norm": 1.3721809392583832, "learning_rate": 5.45501431510185e-06, "loss": 0.5063, "step": 6861 }, { "epoch": 0.49, "grad_norm": 1.7471197481097551, "learning_rate": 5.4538699044455e-06, "loss": 0.5318, "step": 6862 }, { "epoch": 0.49, "grad_norm": 1.6566591645074071, "learning_rate": 5.4527254698142065e-06, "loss": 0.5491, "step": 6863 }, { "epoch": 0.49, "grad_norm": 3.0620813763872676, "learning_rate": 5.451581011268417e-06, "loss": 0.5232, "step": 6864 }, { "epoch": 0.49, "grad_norm": 1.8305095614138014, "learning_rate": 5.450436528868589e-06, "loss": 0.5455, "step": 6865 }, { "epoch": 0.49, "grad_norm": 2.5076124820560914, "learning_rate": 5.449292022675179e-06, "loss": 0.5508, "step": 6866 }, { "epoch": 0.49, "grad_norm": 2.162939706461156, "learning_rate": 5.44814749274864e-06, "loss": 0.5263, "step": 6867 }, { "epoch": 0.49, "grad_norm": 1.6493360779913957, "learning_rate": 5.447002939149433e-06, "loss": 0.5416, "step": 6868 }, { "epoch": 0.49, "grad_norm": 1.6487864870071969, "learning_rate": 5.445858361938014e-06, "loss": 0.5987, "step": 6869 }, { "epoch": 0.49, "grad_norm": 1.6697345747372552, "learning_rate": 5.444713761174848e-06, "loss": 0.5225, "step": 6870 }, { "epoch": 0.49, "grad_norm": 1.592110884496427, "learning_rate": 5.443569136920393e-06, "loss": 0.583, "step": 6871 }, { "epoch": 0.49, "grad_norm": 1.767971354260779, "learning_rate": 5.442424489235114e-06, "loss": 0.5202, "step": 6872 }, { "epoch": 0.49, "grad_norm": 1.7578434505895417, "learning_rate": 5.441279818179474e-06, "loss": 0.5255, "step": 6873 }, { "epoch": 0.49, "grad_norm": 1.9573114752347067, "learning_rate": 5.440135123813939e-06, "loss": 0.5475, "step": 6874 }, { "epoch": 0.49, "grad_norm": 1.8926520693847737, "learning_rate": 5.438990406198975e-06, "loss": 0.5728, "step": 6875 }, { "epoch": 0.49, "grad_norm": 1.5583370220109374, "learning_rate": 5.437845665395049e-06, "loss": 0.5592, "step": 6876 }, { "epoch": 0.49, "grad_norm": 2.0122615193463664, "learning_rate": 5.436700901462633e-06, "loss": 0.5273, "step": 6877 }, { "epoch": 0.49, "grad_norm": 2.758672493056485, "learning_rate": 5.435556114462196e-06, "loss": 0.5399, "step": 6878 }, { "epoch": 0.49, "grad_norm": 1.6195408737847485, "learning_rate": 5.434411304454208e-06, "loss": 0.5331, "step": 6879 }, { "epoch": 0.49, "grad_norm": 2.0871695865625672, "learning_rate": 5.433266471499143e-06, "loss": 0.536, "step": 6880 }, { "epoch": 0.49, "grad_norm": 1.5431312928196177, "learning_rate": 5.432121615657475e-06, "loss": 0.5628, "step": 6881 }, { "epoch": 0.49, "grad_norm": 1.5801245171687117, "learning_rate": 5.43097673698968e-06, "loss": 0.5787, "step": 6882 }, { "epoch": 0.49, "grad_norm": 1.5992236585895854, "learning_rate": 5.429831835556232e-06, "loss": 0.516, "step": 6883 }, { "epoch": 0.49, "grad_norm": 2.648962780394367, "learning_rate": 5.42868691141761e-06, "loss": 0.4792, "step": 6884 }, { "epoch": 0.49, "grad_norm": 1.9197147505800205, "learning_rate": 5.427541964634294e-06, "loss": 0.6095, "step": 6885 }, { "epoch": 0.49, "grad_norm": 0.8053990026672082, "learning_rate": 5.426396995266762e-06, "loss": 0.4491, "step": 6886 }, { "epoch": 0.49, "grad_norm": 1.7859580506920931, "learning_rate": 5.4252520033754965e-06, "loss": 0.5691, "step": 6887 }, { "epoch": 0.49, "grad_norm": 1.611286187787717, "learning_rate": 5.424106989020979e-06, "loss": 0.5508, "step": 6888 }, { "epoch": 0.49, "grad_norm": 1.7646495895167915, "learning_rate": 5.422961952263692e-06, "loss": 0.5216, "step": 6889 }, { "epoch": 0.49, "grad_norm": 1.7498355138132562, "learning_rate": 5.4218168931641225e-06, "loss": 0.603, "step": 6890 }, { "epoch": 0.49, "grad_norm": 2.7666032776133855, "learning_rate": 5.420671811782755e-06, "loss": 0.5474, "step": 6891 }, { "epoch": 0.49, "grad_norm": 1.600096874996729, "learning_rate": 5.419526708180077e-06, "loss": 0.4791, "step": 6892 }, { "epoch": 0.49, "grad_norm": 1.7157826762501347, "learning_rate": 5.4183815824165776e-06, "loss": 0.4517, "step": 6893 }, { "epoch": 0.49, "grad_norm": 2.0094733372185503, "learning_rate": 5.417236434552745e-06, "loss": 0.5307, "step": 6894 }, { "epoch": 0.49, "grad_norm": 1.5304113876381422, "learning_rate": 5.4160912646490705e-06, "loss": 0.5932, "step": 6895 }, { "epoch": 0.49, "grad_norm": 0.6840037188705317, "learning_rate": 5.414946072766044e-06, "loss": 0.4664, "step": 6896 }, { "epoch": 0.49, "grad_norm": 1.5814451875383069, "learning_rate": 5.413800858964161e-06, "loss": 0.5096, "step": 6897 }, { "epoch": 0.49, "grad_norm": 1.4589791692976493, "learning_rate": 5.412655623303914e-06, "loss": 0.5069, "step": 6898 }, { "epoch": 0.49, "grad_norm": 2.083948739648997, "learning_rate": 5.411510365845798e-06, "loss": 0.4932, "step": 6899 }, { "epoch": 0.49, "grad_norm": 1.9653425779741514, "learning_rate": 5.410365086650312e-06, "loss": 0.4712, "step": 6900 }, { "epoch": 0.49, "grad_norm": 1.575567873552415, "learning_rate": 5.4092197857779505e-06, "loss": 0.5109, "step": 6901 }, { "epoch": 0.49, "grad_norm": 1.6798076731078717, "learning_rate": 5.4080744632892145e-06, "loss": 0.4904, "step": 6902 }, { "epoch": 0.49, "grad_norm": 1.448890322719478, "learning_rate": 5.4069291192446e-06, "loss": 0.5176, "step": 6903 }, { "epoch": 0.49, "grad_norm": 3.0668341957247187, "learning_rate": 5.4057837537046135e-06, "loss": 0.5119, "step": 6904 }, { "epoch": 0.49, "grad_norm": 1.7415097783732243, "learning_rate": 5.404638366729753e-06, "loss": 0.5778, "step": 6905 }, { "epoch": 0.49, "grad_norm": 1.5247562201584641, "learning_rate": 5.403492958380522e-06, "loss": 0.5043, "step": 6906 }, { "epoch": 0.49, "grad_norm": 2.073529504951222, "learning_rate": 5.4023475287174254e-06, "loss": 0.5341, "step": 6907 }, { "epoch": 0.49, "grad_norm": 0.8416959864542389, "learning_rate": 5.401202077800971e-06, "loss": 0.4315, "step": 6908 }, { "epoch": 0.49, "grad_norm": 1.9058821719808015, "learning_rate": 5.400056605691663e-06, "loss": 0.6106, "step": 6909 }, { "epoch": 0.49, "grad_norm": 1.5611663609160082, "learning_rate": 5.398911112450008e-06, "loss": 0.5063, "step": 6910 }, { "epoch": 0.49, "grad_norm": 1.730549211380824, "learning_rate": 5.397765598136517e-06, "loss": 0.4681, "step": 6911 }, { "epoch": 0.49, "grad_norm": 1.6984645208182654, "learning_rate": 5.396620062811699e-06, "loss": 0.4907, "step": 6912 }, { "epoch": 0.49, "grad_norm": 1.5431452525835356, "learning_rate": 5.395474506536066e-06, "loss": 0.539, "step": 6913 }, { "epoch": 0.49, "grad_norm": 1.5398478459980431, "learning_rate": 5.394328929370129e-06, "loss": 0.5618, "step": 6914 }, { "epoch": 0.49, "grad_norm": 1.8377864968904398, "learning_rate": 5.393183331374403e-06, "loss": 0.5359, "step": 6915 }, { "epoch": 0.49, "grad_norm": 2.238976006890208, "learning_rate": 5.3920377126094e-06, "loss": 0.4916, "step": 6916 }, { "epoch": 0.49, "grad_norm": 1.5977911626279486, "learning_rate": 5.390892073135637e-06, "loss": 0.5355, "step": 6917 }, { "epoch": 0.49, "grad_norm": 1.938305735845428, "learning_rate": 5.38974641301363e-06, "loss": 0.5208, "step": 6918 }, { "epoch": 0.49, "grad_norm": 1.6591006554501064, "learning_rate": 5.388600732303898e-06, "loss": 0.5971, "step": 6919 }, { "epoch": 0.49, "grad_norm": 1.6328976094888825, "learning_rate": 5.387455031066957e-06, "loss": 0.536, "step": 6920 }, { "epoch": 0.49, "grad_norm": 1.7192728937822588, "learning_rate": 5.386309309363329e-06, "loss": 0.5582, "step": 6921 }, { "epoch": 0.49, "grad_norm": 1.8797868115752492, "learning_rate": 5.385163567253533e-06, "loss": 0.6046, "step": 6922 }, { "epoch": 0.49, "grad_norm": 3.578515140377028, "learning_rate": 5.384017804798094e-06, "loss": 0.5202, "step": 6923 }, { "epoch": 0.49, "grad_norm": 0.7601036073124097, "learning_rate": 5.382872022057532e-06, "loss": 0.4189, "step": 6924 }, { "epoch": 0.49, "grad_norm": 2.115908610753962, "learning_rate": 5.381726219092375e-06, "loss": 0.4946, "step": 6925 }, { "epoch": 0.49, "grad_norm": 2.5270353344066674, "learning_rate": 5.380580395963143e-06, "loss": 0.5686, "step": 6926 }, { "epoch": 0.49, "grad_norm": 1.7546215933493545, "learning_rate": 5.379434552730365e-06, "loss": 0.5448, "step": 6927 }, { "epoch": 0.49, "grad_norm": 1.8447637322224328, "learning_rate": 5.378288689454569e-06, "loss": 0.4894, "step": 6928 }, { "epoch": 0.49, "grad_norm": 1.9147275564700281, "learning_rate": 5.377142806196282e-06, "loss": 0.5486, "step": 6929 }, { "epoch": 0.49, "grad_norm": 1.62302999259758, "learning_rate": 5.375996903016035e-06, "loss": 0.5257, "step": 6930 }, { "epoch": 0.49, "grad_norm": 1.9055137815076353, "learning_rate": 5.374850979974357e-06, "loss": 0.5413, "step": 6931 }, { "epoch": 0.49, "grad_norm": 0.7897735523760647, "learning_rate": 5.373705037131781e-06, "loss": 0.4247, "step": 6932 }, { "epoch": 0.49, "grad_norm": 1.7704118802747413, "learning_rate": 5.372559074548838e-06, "loss": 0.5164, "step": 6933 }, { "epoch": 0.49, "grad_norm": 1.5458396244025223, "learning_rate": 5.371413092286062e-06, "loss": 0.5391, "step": 6934 }, { "epoch": 0.49, "grad_norm": 1.9643875914658357, "learning_rate": 5.370267090403986e-06, "loss": 0.5396, "step": 6935 }, { "epoch": 0.49, "grad_norm": 1.4523912930073641, "learning_rate": 5.369121068963152e-06, "loss": 0.5678, "step": 6936 }, { "epoch": 0.49, "grad_norm": 0.7369652054910906, "learning_rate": 5.367975028024089e-06, "loss": 0.471, "step": 6937 }, { "epoch": 0.49, "grad_norm": 1.7829216213222345, "learning_rate": 5.3668289676473404e-06, "loss": 0.5707, "step": 6938 }, { "epoch": 0.49, "grad_norm": 1.72150076223606, "learning_rate": 5.365682887893441e-06, "loss": 0.5586, "step": 6939 }, { "epoch": 0.49, "grad_norm": 1.506106403016577, "learning_rate": 5.364536788822934e-06, "loss": 0.5807, "step": 6940 }, { "epoch": 0.49, "grad_norm": 1.5810194048234243, "learning_rate": 5.363390670496357e-06, "loss": 0.4996, "step": 6941 }, { "epoch": 0.49, "grad_norm": 1.6262793391049462, "learning_rate": 5.362244532974253e-06, "loss": 0.5879, "step": 6942 }, { "epoch": 0.49, "grad_norm": 1.6624331046162002, "learning_rate": 5.361098376317167e-06, "loss": 0.5577, "step": 6943 }, { "epoch": 0.49, "grad_norm": 1.6700052862033892, "learning_rate": 5.359952200585641e-06, "loss": 0.5008, "step": 6944 }, { "epoch": 0.49, "grad_norm": 1.5843493764891567, "learning_rate": 5.358806005840219e-06, "loss": 0.4716, "step": 6945 }, { "epoch": 0.49, "grad_norm": 1.9403387458442525, "learning_rate": 5.357659792141447e-06, "loss": 0.473, "step": 6946 }, { "epoch": 0.49, "grad_norm": 1.6359257822521884, "learning_rate": 5.356513559549876e-06, "loss": 0.5593, "step": 6947 }, { "epoch": 0.49, "grad_norm": 0.7310183427999907, "learning_rate": 5.3553673081260495e-06, "loss": 0.4857, "step": 6948 }, { "epoch": 0.49, "grad_norm": 0.6825380568381457, "learning_rate": 5.354221037930516e-06, "loss": 0.4409, "step": 6949 }, { "epoch": 0.49, "grad_norm": 1.8221171035252843, "learning_rate": 5.353074749023829e-06, "loss": 0.5326, "step": 6950 }, { "epoch": 0.49, "grad_norm": 0.8211126857606454, "learning_rate": 5.351928441466537e-06, "loss": 0.4511, "step": 6951 }, { "epoch": 0.49, "grad_norm": 1.846743687017633, "learning_rate": 5.350782115319192e-06, "loss": 0.5125, "step": 6952 }, { "epoch": 0.49, "grad_norm": 1.4963822913203957, "learning_rate": 5.349635770642348e-06, "loss": 0.6567, "step": 6953 }, { "epoch": 0.49, "grad_norm": 1.658417654526787, "learning_rate": 5.348489407496555e-06, "loss": 0.5823, "step": 6954 }, { "epoch": 0.49, "grad_norm": 1.8715326468117708, "learning_rate": 5.347343025942374e-06, "loss": 0.5757, "step": 6955 }, { "epoch": 0.49, "grad_norm": 1.8798641197126325, "learning_rate": 5.346196626040355e-06, "loss": 0.5657, "step": 6956 }, { "epoch": 0.49, "grad_norm": 1.807147006784223, "learning_rate": 5.345050207851058e-06, "loss": 0.5331, "step": 6957 }, { "epoch": 0.49, "grad_norm": 2.5836215387830497, "learning_rate": 5.3439037714350395e-06, "loss": 0.513, "step": 6958 }, { "epoch": 0.49, "grad_norm": 1.795980080542423, "learning_rate": 5.34275731685286e-06, "loss": 0.5658, "step": 6959 }, { "epoch": 0.49, "grad_norm": 1.5645278913858518, "learning_rate": 5.341610844165076e-06, "loss": 0.5101, "step": 6960 }, { "epoch": 0.49, "grad_norm": 1.7721525608869442, "learning_rate": 5.34046435343225e-06, "loss": 0.6163, "step": 6961 }, { "epoch": 0.49, "grad_norm": 1.608341325962311, "learning_rate": 5.339317844714944e-06, "loss": 0.5233, "step": 6962 }, { "epoch": 0.49, "grad_norm": 1.8005270857735067, "learning_rate": 5.3381713180737185e-06, "loss": 0.4769, "step": 6963 }, { "epoch": 0.49, "grad_norm": 1.7246935860918604, "learning_rate": 5.337024773569138e-06, "loss": 0.5856, "step": 6964 }, { "epoch": 0.49, "grad_norm": 1.523920939325314, "learning_rate": 5.335878211261767e-06, "loss": 0.5633, "step": 6965 }, { "epoch": 0.49, "grad_norm": 1.9275749132768, "learning_rate": 5.3347316312121724e-06, "loss": 0.5678, "step": 6966 }, { "epoch": 0.49, "grad_norm": 2.099006225437616, "learning_rate": 5.333585033480917e-06, "loss": 0.5606, "step": 6967 }, { "epoch": 0.49, "grad_norm": 1.5692240072987138, "learning_rate": 5.332438418128571e-06, "loss": 0.5046, "step": 6968 }, { "epoch": 0.49, "grad_norm": 1.5059448418874546, "learning_rate": 5.331291785215701e-06, "loss": 0.5257, "step": 6969 }, { "epoch": 0.49, "grad_norm": 1.4844021252382882, "learning_rate": 5.330145134802877e-06, "loss": 0.5641, "step": 6970 }, { "epoch": 0.49, "grad_norm": 1.377126571367187, "learning_rate": 5.328998466950667e-06, "loss": 0.4511, "step": 6971 }, { "epoch": 0.49, "grad_norm": 2.182705548652811, "learning_rate": 5.327851781719643e-06, "loss": 0.5305, "step": 6972 }, { "epoch": 0.49, "grad_norm": 2.016017308696118, "learning_rate": 5.326705079170378e-06, "loss": 0.5421, "step": 6973 }, { "epoch": 0.49, "grad_norm": 1.6127786779369107, "learning_rate": 5.325558359363444e-06, "loss": 0.6353, "step": 6974 }, { "epoch": 0.49, "grad_norm": 1.7838234113694753, "learning_rate": 5.324411622359413e-06, "loss": 0.6014, "step": 6975 }, { "epoch": 0.5, "grad_norm": 1.781212263683923, "learning_rate": 5.323264868218863e-06, "loss": 0.5583, "step": 6976 }, { "epoch": 0.5, "grad_norm": 1.535634304680371, "learning_rate": 5.322118097002364e-06, "loss": 0.5883, "step": 6977 }, { "epoch": 0.5, "grad_norm": 1.7589008068769565, "learning_rate": 5.320971308770498e-06, "loss": 0.5629, "step": 6978 }, { "epoch": 0.5, "grad_norm": 2.2869880359850314, "learning_rate": 5.319824503583839e-06, "loss": 0.5843, "step": 6979 }, { "epoch": 0.5, "grad_norm": 1.4618384765261996, "learning_rate": 5.318677681502965e-06, "loss": 0.4972, "step": 6980 }, { "epoch": 0.5, "grad_norm": 0.8511289184310895, "learning_rate": 5.3175308425884585e-06, "loss": 0.4465, "step": 6981 }, { "epoch": 0.5, "grad_norm": 1.9978626439835512, "learning_rate": 5.316383986900896e-06, "loss": 0.5343, "step": 6982 }, { "epoch": 0.5, "grad_norm": 1.9544685068137313, "learning_rate": 5.31523711450086e-06, "loss": 0.5538, "step": 6983 }, { "epoch": 0.5, "grad_norm": 1.4230179283454532, "learning_rate": 5.314090225448929e-06, "loss": 0.5127, "step": 6984 }, { "epoch": 0.5, "grad_norm": 1.8984074403965865, "learning_rate": 5.31294331980569e-06, "loss": 0.5408, "step": 6985 }, { "epoch": 0.5, "grad_norm": 1.4301540354980715, "learning_rate": 5.311796397631724e-06, "loss": 0.5447, "step": 6986 }, { "epoch": 0.5, "grad_norm": 1.88641514203611, "learning_rate": 5.310649458987616e-06, "loss": 0.56, "step": 6987 }, { "epoch": 0.5, "grad_norm": 1.7501132083980377, "learning_rate": 5.3095025039339496e-06, "loss": 0.5821, "step": 6988 }, { "epoch": 0.5, "grad_norm": 3.4952199376762434, "learning_rate": 5.308355532531314e-06, "loss": 0.5672, "step": 6989 }, { "epoch": 0.5, "grad_norm": 2.151607462627416, "learning_rate": 5.307208544840293e-06, "loss": 0.5207, "step": 6990 }, { "epoch": 0.5, "grad_norm": 2.60827662563997, "learning_rate": 5.3060615409214776e-06, "loss": 0.5742, "step": 6991 }, { "epoch": 0.5, "grad_norm": 1.7863217674113678, "learning_rate": 5.304914520835452e-06, "loss": 0.4835, "step": 6992 }, { "epoch": 0.5, "grad_norm": 1.5563435078014416, "learning_rate": 5.30376748464281e-06, "loss": 0.4976, "step": 6993 }, { "epoch": 0.5, "grad_norm": 1.5117521089523513, "learning_rate": 5.302620432404138e-06, "loss": 0.5841, "step": 6994 }, { "epoch": 0.5, "grad_norm": 2.0067057900232492, "learning_rate": 5.301473364180032e-06, "loss": 0.6124, "step": 6995 }, { "epoch": 0.5, "grad_norm": 1.8426391738329484, "learning_rate": 5.3003262800310805e-06, "loss": 0.4833, "step": 6996 }, { "epoch": 0.5, "grad_norm": 1.715258371915016, "learning_rate": 5.299179180017877e-06, "loss": 0.5172, "step": 6997 }, { "epoch": 0.5, "grad_norm": 1.9591099775091267, "learning_rate": 5.298032064201016e-06, "loss": 0.4771, "step": 6998 }, { "epoch": 0.5, "grad_norm": 1.836050398876122, "learning_rate": 5.296884932641091e-06, "loss": 0.6412, "step": 6999 }, { "epoch": 0.5, "grad_norm": 3.042380486167342, "learning_rate": 5.295737785398698e-06, "loss": 0.543, "step": 7000 }, { "epoch": 0.5, "grad_norm": 2.2117069758666794, "learning_rate": 5.294590622534431e-06, "loss": 0.5831, "step": 7001 }, { "epoch": 0.5, "grad_norm": 1.8353090998488797, "learning_rate": 5.2934434441088925e-06, "loss": 0.5486, "step": 7002 }, { "epoch": 0.5, "grad_norm": 1.5554162267991847, "learning_rate": 5.292296250182676e-06, "loss": 0.5056, "step": 7003 }, { "epoch": 0.5, "grad_norm": 0.7936267314306734, "learning_rate": 5.29114904081638e-06, "loss": 0.4493, "step": 7004 }, { "epoch": 0.5, "grad_norm": 1.9437582346297118, "learning_rate": 5.290001816070606e-06, "loss": 0.6191, "step": 7005 }, { "epoch": 0.5, "grad_norm": 1.7426267450638366, "learning_rate": 5.288854576005954e-06, "loss": 0.4912, "step": 7006 }, { "epoch": 0.5, "grad_norm": 2.5441189136279694, "learning_rate": 5.287707320683023e-06, "loss": 0.4929, "step": 7007 }, { "epoch": 0.5, "grad_norm": 1.6649730677336312, "learning_rate": 5.286560050162417e-06, "loss": 0.4943, "step": 7008 }, { "epoch": 0.5, "grad_norm": 1.5173375610006026, "learning_rate": 5.285412764504738e-06, "loss": 0.5004, "step": 7009 }, { "epoch": 0.5, "grad_norm": 1.6472109180947638, "learning_rate": 5.284265463770589e-06, "loss": 0.5581, "step": 7010 }, { "epoch": 0.5, "grad_norm": 1.8848417152411876, "learning_rate": 5.2831181480205774e-06, "loss": 0.5279, "step": 7011 }, { "epoch": 0.5, "grad_norm": 1.4680674468460801, "learning_rate": 5.281970817315304e-06, "loss": 0.5379, "step": 7012 }, { "epoch": 0.5, "grad_norm": 1.6745404478908734, "learning_rate": 5.280823471715377e-06, "loss": 0.5391, "step": 7013 }, { "epoch": 0.5, "grad_norm": 1.587726940534131, "learning_rate": 5.279676111281403e-06, "loss": 0.5376, "step": 7014 }, { "epoch": 0.5, "grad_norm": 1.7918703189754146, "learning_rate": 5.278528736073989e-06, "loss": 0.5453, "step": 7015 }, { "epoch": 0.5, "grad_norm": 1.561443119290921, "learning_rate": 5.277381346153743e-06, "loss": 0.578, "step": 7016 }, { "epoch": 0.5, "grad_norm": 1.600014820375347, "learning_rate": 5.276233941581274e-06, "loss": 0.5423, "step": 7017 }, { "epoch": 0.5, "grad_norm": 2.3037477893429505, "learning_rate": 5.275086522417193e-06, "loss": 0.565, "step": 7018 }, { "epoch": 0.5, "grad_norm": 0.781481554719757, "learning_rate": 5.27393908872211e-06, "loss": 0.4315, "step": 7019 }, { "epoch": 0.5, "grad_norm": 1.7953016018705, "learning_rate": 5.272791640556636e-06, "loss": 0.5108, "step": 7020 }, { "epoch": 0.5, "grad_norm": 0.7581132920306195, "learning_rate": 5.2716441779813835e-06, "loss": 0.4193, "step": 7021 }, { "epoch": 0.5, "grad_norm": 1.665117172115333, "learning_rate": 5.270496701056964e-06, "loss": 0.5374, "step": 7022 }, { "epoch": 0.5, "grad_norm": 1.6103185218458027, "learning_rate": 5.269349209843993e-06, "loss": 0.5712, "step": 7023 }, { "epoch": 0.5, "grad_norm": 2.181553038881357, "learning_rate": 5.268201704403082e-06, "loss": 0.594, "step": 7024 }, { "epoch": 0.5, "grad_norm": 1.899422161505861, "learning_rate": 5.2670541847948495e-06, "loss": 0.6153, "step": 7025 }, { "epoch": 0.5, "grad_norm": 1.6008747066951738, "learning_rate": 5.265906651079912e-06, "loss": 0.5625, "step": 7026 }, { "epoch": 0.5, "grad_norm": 1.767450567383687, "learning_rate": 5.264759103318882e-06, "loss": 0.4915, "step": 7027 }, { "epoch": 0.5, "grad_norm": 2.2237631575939423, "learning_rate": 5.26361154157238e-06, "loss": 0.5472, "step": 7028 }, { "epoch": 0.5, "grad_norm": 4.17307929353586, "learning_rate": 5.2624639659010225e-06, "loss": 0.5209, "step": 7029 }, { "epoch": 0.5, "grad_norm": 1.4534541420189495, "learning_rate": 5.261316376365429e-06, "loss": 0.5337, "step": 7030 }, { "epoch": 0.5, "grad_norm": 1.7932631131380319, "learning_rate": 5.260168773026218e-06, "loss": 0.5628, "step": 7031 }, { "epoch": 0.5, "grad_norm": 2.5482969468349235, "learning_rate": 5.259021155944013e-06, "loss": 0.5937, "step": 7032 }, { "epoch": 0.5, "grad_norm": 1.7890710027161159, "learning_rate": 5.257873525179433e-06, "loss": 0.5143, "step": 7033 }, { "epoch": 0.5, "grad_norm": 1.7563291540552648, "learning_rate": 5.256725880793098e-06, "loss": 0.5432, "step": 7034 }, { "epoch": 0.5, "grad_norm": 1.8428281776517292, "learning_rate": 5.255578222845632e-06, "loss": 0.6, "step": 7035 }, { "epoch": 0.5, "grad_norm": 1.6614926125640286, "learning_rate": 5.25443055139766e-06, "loss": 0.5366, "step": 7036 }, { "epoch": 0.5, "grad_norm": 1.80539304474754, "learning_rate": 5.2532828665098025e-06, "loss": 0.5498, "step": 7037 }, { "epoch": 0.5, "grad_norm": 1.7329691396716165, "learning_rate": 5.252135168242686e-06, "loss": 0.5369, "step": 7038 }, { "epoch": 0.5, "grad_norm": 0.7121229173875054, "learning_rate": 5.2509874566569355e-06, "loss": 0.4482, "step": 7039 }, { "epoch": 0.5, "grad_norm": 0.7755123416221916, "learning_rate": 5.249839731813177e-06, "loss": 0.4245, "step": 7040 }, { "epoch": 0.5, "grad_norm": 1.6875027963204883, "learning_rate": 5.248691993772038e-06, "loss": 0.5662, "step": 7041 }, { "epoch": 0.5, "grad_norm": 2.863930373213115, "learning_rate": 5.247544242594145e-06, "loss": 0.5467, "step": 7042 }, { "epoch": 0.5, "grad_norm": 1.691302569286533, "learning_rate": 5.2463964783401265e-06, "loss": 0.5497, "step": 7043 }, { "epoch": 0.5, "grad_norm": 1.6491347490759714, "learning_rate": 5.245248701070611e-06, "loss": 0.5737, "step": 7044 }, { "epoch": 0.5, "grad_norm": 1.7061477169836194, "learning_rate": 5.244100910846227e-06, "loss": 0.487, "step": 7045 }, { "epoch": 0.5, "grad_norm": 1.7896817768077522, "learning_rate": 5.242953107727606e-06, "loss": 0.5474, "step": 7046 }, { "epoch": 0.5, "grad_norm": 1.7925734906997224, "learning_rate": 5.24180529177538e-06, "loss": 0.5845, "step": 7047 }, { "epoch": 0.5, "grad_norm": 1.7593264382668896, "learning_rate": 5.240657463050179e-06, "loss": 0.5176, "step": 7048 }, { "epoch": 0.5, "grad_norm": 1.6207228830169353, "learning_rate": 5.239509621612635e-06, "loss": 0.557, "step": 7049 }, { "epoch": 0.5, "grad_norm": 1.4434230321433479, "learning_rate": 5.238361767523381e-06, "loss": 0.5074, "step": 7050 }, { "epoch": 0.5, "grad_norm": 1.662484952456173, "learning_rate": 5.2372139008430515e-06, "loss": 0.5512, "step": 7051 }, { "epoch": 0.5, "grad_norm": 1.7697828381047866, "learning_rate": 5.236066021632279e-06, "loss": 0.5605, "step": 7052 }, { "epoch": 0.5, "grad_norm": 1.7238588829708457, "learning_rate": 5.2349181299517e-06, "loss": 0.5587, "step": 7053 }, { "epoch": 0.5, "grad_norm": 1.7134398280710335, "learning_rate": 5.233770225861947e-06, "loss": 0.4846, "step": 7054 }, { "epoch": 0.5, "grad_norm": 2.186337835633182, "learning_rate": 5.232622309423661e-06, "loss": 0.5014, "step": 7055 }, { "epoch": 0.5, "grad_norm": 1.6732215935618153, "learning_rate": 5.231474380697476e-06, "loss": 0.5656, "step": 7056 }, { "epoch": 0.5, "grad_norm": 1.8241033033759355, "learning_rate": 5.23032643974403e-06, "loss": 0.5256, "step": 7057 }, { "epoch": 0.5, "grad_norm": 1.6781383724471364, "learning_rate": 5.229178486623962e-06, "loss": 0.4735, "step": 7058 }, { "epoch": 0.5, "grad_norm": 1.6439613492732295, "learning_rate": 5.228030521397908e-06, "loss": 0.5308, "step": 7059 }, { "epoch": 0.5, "grad_norm": 1.570317505145271, "learning_rate": 5.226882544126508e-06, "loss": 0.4488, "step": 7060 }, { "epoch": 0.5, "grad_norm": 1.6983211137879015, "learning_rate": 5.2257345548704055e-06, "loss": 0.479, "step": 7061 }, { "epoch": 0.5, "grad_norm": 2.043963367654638, "learning_rate": 5.224586553690238e-06, "loss": 0.5632, "step": 7062 }, { "epoch": 0.5, "grad_norm": 1.7285121275782998, "learning_rate": 5.223438540646648e-06, "loss": 0.573, "step": 7063 }, { "epoch": 0.5, "grad_norm": 2.2996037055143477, "learning_rate": 5.222290515800279e-06, "loss": 0.6057, "step": 7064 }, { "epoch": 0.5, "grad_norm": 1.5247382689469942, "learning_rate": 5.22114247921177e-06, "loss": 0.4863, "step": 7065 }, { "epoch": 0.5, "grad_norm": 2.1986877183367146, "learning_rate": 5.219994430941766e-06, "loss": 0.5418, "step": 7066 }, { "epoch": 0.5, "grad_norm": 1.7643448686306238, "learning_rate": 5.218846371050909e-06, "loss": 0.5751, "step": 7067 }, { "epoch": 0.5, "grad_norm": 1.6281119541807476, "learning_rate": 5.2176982995998475e-06, "loss": 0.5506, "step": 7068 }, { "epoch": 0.5, "grad_norm": 1.5186360542138146, "learning_rate": 5.216550216649224e-06, "loss": 0.57, "step": 7069 }, { "epoch": 0.5, "grad_norm": 1.5945754296133121, "learning_rate": 5.215402122259683e-06, "loss": 0.6253, "step": 7070 }, { "epoch": 0.5, "grad_norm": 1.8557963536463613, "learning_rate": 5.214254016491872e-06, "loss": 0.524, "step": 7071 }, { "epoch": 0.5, "grad_norm": 1.9320802794681051, "learning_rate": 5.21310589940644e-06, "loss": 0.5034, "step": 7072 }, { "epoch": 0.5, "grad_norm": 1.560260248752983, "learning_rate": 5.21195777106403e-06, "loss": 0.4977, "step": 7073 }, { "epoch": 0.5, "grad_norm": 1.7071366758187072, "learning_rate": 5.2108096315252935e-06, "loss": 0.5265, "step": 7074 }, { "epoch": 0.5, "grad_norm": 2.1059907136718805, "learning_rate": 5.209661480850877e-06, "loss": 0.5011, "step": 7075 }, { "epoch": 0.5, "grad_norm": 1.642852960823384, "learning_rate": 5.20851331910143e-06, "loss": 0.5834, "step": 7076 }, { "epoch": 0.5, "grad_norm": 0.8343008879961659, "learning_rate": 5.207365146337605e-06, "loss": 0.4298, "step": 7077 }, { "epoch": 0.5, "grad_norm": 1.7746126792704355, "learning_rate": 5.20621696262005e-06, "loss": 0.5902, "step": 7078 }, { "epoch": 0.5, "grad_norm": 1.6031878891625848, "learning_rate": 5.205068768009416e-06, "loss": 0.5794, "step": 7079 }, { "epoch": 0.5, "grad_norm": 0.8030260679962491, "learning_rate": 5.203920562566354e-06, "loss": 0.4611, "step": 7080 }, { "epoch": 0.5, "grad_norm": 1.5772106680268816, "learning_rate": 5.202772346351518e-06, "loss": 0.4488, "step": 7081 }, { "epoch": 0.5, "grad_norm": 1.699718614123332, "learning_rate": 5.201624119425559e-06, "loss": 0.4872, "step": 7082 }, { "epoch": 0.5, "grad_norm": 0.6660990822422064, "learning_rate": 5.200475881849131e-06, "loss": 0.4194, "step": 7083 }, { "epoch": 0.5, "grad_norm": 2.421458284083621, "learning_rate": 5.1993276336828865e-06, "loss": 0.5867, "step": 7084 }, { "epoch": 0.5, "grad_norm": 1.6057217818039338, "learning_rate": 5.198179374987483e-06, "loss": 0.4803, "step": 7085 }, { "epoch": 0.5, "grad_norm": 3.789475905772603, "learning_rate": 5.197031105823572e-06, "loss": 0.5598, "step": 7086 }, { "epoch": 0.5, "grad_norm": 1.6214953556258676, "learning_rate": 5.1958828262518126e-06, "loss": 0.4811, "step": 7087 }, { "epoch": 0.5, "grad_norm": 1.5564539833113114, "learning_rate": 5.194734536332856e-06, "loss": 0.4883, "step": 7088 }, { "epoch": 0.5, "grad_norm": 1.6105748242494609, "learning_rate": 5.193586236127362e-06, "loss": 0.5338, "step": 7089 }, { "epoch": 0.5, "grad_norm": 1.6914237757829251, "learning_rate": 5.1924379256959865e-06, "loss": 0.4805, "step": 7090 }, { "epoch": 0.5, "grad_norm": 1.3561994336142162, "learning_rate": 5.1912896050993875e-06, "loss": 0.5041, "step": 7091 }, { "epoch": 0.5, "grad_norm": 2.42275735902914, "learning_rate": 5.190141274398225e-06, "loss": 0.4571, "step": 7092 }, { "epoch": 0.5, "grad_norm": 1.5627965092120926, "learning_rate": 5.188992933653155e-06, "loss": 0.545, "step": 7093 }, { "epoch": 0.5, "grad_norm": 2.324944348807185, "learning_rate": 5.1878445829248395e-06, "loss": 0.5163, "step": 7094 }, { "epoch": 0.5, "grad_norm": 1.6378099612781631, "learning_rate": 5.1866962222739346e-06, "loss": 0.5363, "step": 7095 }, { "epoch": 0.5, "grad_norm": 1.605883906034545, "learning_rate": 5.185547851761102e-06, "loss": 0.5504, "step": 7096 }, { "epoch": 0.5, "grad_norm": 0.8669754574836585, "learning_rate": 5.184399471447005e-06, "loss": 0.4331, "step": 7097 }, { "epoch": 0.5, "grad_norm": 1.6836662874908435, "learning_rate": 5.183251081392303e-06, "loss": 0.6004, "step": 7098 }, { "epoch": 0.5, "grad_norm": 2.2619939821475747, "learning_rate": 5.182102681657657e-06, "loss": 0.5065, "step": 7099 }, { "epoch": 0.5, "grad_norm": 1.6678149143204306, "learning_rate": 5.180954272303732e-06, "loss": 0.4787, "step": 7100 }, { "epoch": 0.5, "grad_norm": 1.8957925922309402, "learning_rate": 5.179805853391187e-06, "loss": 0.6099, "step": 7101 }, { "epoch": 0.5, "grad_norm": 1.4585668255867563, "learning_rate": 5.1786574249806895e-06, "loss": 0.504, "step": 7102 }, { "epoch": 0.5, "grad_norm": 1.8331054119238788, "learning_rate": 5.1775089871329e-06, "loss": 0.5226, "step": 7103 }, { "epoch": 0.5, "grad_norm": 1.7182541145377153, "learning_rate": 5.1763605399084846e-06, "loss": 0.5484, "step": 7104 }, { "epoch": 0.5, "grad_norm": 3.2294353557351494, "learning_rate": 5.175212083368108e-06, "loss": 0.5779, "step": 7105 }, { "epoch": 0.5, "grad_norm": 1.5731470206466514, "learning_rate": 5.174063617572435e-06, "loss": 0.5827, "step": 7106 }, { "epoch": 0.5, "grad_norm": 1.558400605808407, "learning_rate": 5.172915142582132e-06, "loss": 0.529, "step": 7107 }, { "epoch": 0.5, "grad_norm": 0.7873223166390495, "learning_rate": 5.171766658457866e-06, "loss": 0.4315, "step": 7108 }, { "epoch": 0.5, "grad_norm": 1.8004574508128999, "learning_rate": 5.170618165260304e-06, "loss": 0.513, "step": 7109 }, { "epoch": 0.5, "grad_norm": 1.4179682013113315, "learning_rate": 5.1694696630501104e-06, "loss": 0.5089, "step": 7110 }, { "epoch": 0.5, "grad_norm": 2.0751890925869887, "learning_rate": 5.168321151887955e-06, "loss": 0.5386, "step": 7111 }, { "epoch": 0.5, "grad_norm": 1.6926506016262834, "learning_rate": 5.167172631834506e-06, "loss": 0.5428, "step": 7112 }, { "epoch": 0.5, "grad_norm": 1.4481960942188872, "learning_rate": 5.166024102950434e-06, "loss": 0.4843, "step": 7113 }, { "epoch": 0.5, "grad_norm": 0.7325949117503594, "learning_rate": 5.1648755652964044e-06, "loss": 0.462, "step": 7114 }, { "epoch": 0.5, "grad_norm": 1.8990580287602454, "learning_rate": 5.163727018933091e-06, "loss": 0.4894, "step": 7115 }, { "epoch": 0.5, "grad_norm": 2.1818327692016455, "learning_rate": 5.162578463921159e-06, "loss": 0.5333, "step": 7116 }, { "epoch": 0.51, "grad_norm": 1.512931452219238, "learning_rate": 5.1614299003212846e-06, "loss": 0.5845, "step": 7117 }, { "epoch": 0.51, "grad_norm": 1.7527905185203418, "learning_rate": 5.160281328194134e-06, "loss": 0.4174, "step": 7118 }, { "epoch": 0.51, "grad_norm": 1.5378745881261426, "learning_rate": 5.159132747600379e-06, "loss": 0.5251, "step": 7119 }, { "epoch": 0.51, "grad_norm": 1.6899676709529583, "learning_rate": 5.157984158600695e-06, "loss": 0.5712, "step": 7120 }, { "epoch": 0.51, "grad_norm": 0.7542740258759251, "learning_rate": 5.156835561255752e-06, "loss": 0.4498, "step": 7121 }, { "epoch": 0.51, "grad_norm": 1.6807544819822395, "learning_rate": 5.155686955626223e-06, "loss": 0.5263, "step": 7122 }, { "epoch": 0.51, "grad_norm": 2.686875036578096, "learning_rate": 5.1545383417727825e-06, "loss": 0.5701, "step": 7123 }, { "epoch": 0.51, "grad_norm": 1.620194273283092, "learning_rate": 5.153389719756102e-06, "loss": 0.4982, "step": 7124 }, { "epoch": 0.51, "grad_norm": 2.138943393903928, "learning_rate": 5.152241089636858e-06, "loss": 0.5592, "step": 7125 }, { "epoch": 0.51, "grad_norm": 2.1955589872455876, "learning_rate": 5.151092451475722e-06, "loss": 0.5327, "step": 7126 }, { "epoch": 0.51, "grad_norm": 1.5091118869387572, "learning_rate": 5.149943805333372e-06, "loss": 0.4988, "step": 7127 }, { "epoch": 0.51, "grad_norm": 1.7666169329152845, "learning_rate": 5.148795151270483e-06, "loss": 0.5643, "step": 7128 }, { "epoch": 0.51, "grad_norm": 2.189725432687955, "learning_rate": 5.147646489347727e-06, "loss": 0.5813, "step": 7129 }, { "epoch": 0.51, "grad_norm": 1.5298783823772737, "learning_rate": 5.146497819625787e-06, "loss": 0.5618, "step": 7130 }, { "epoch": 0.51, "grad_norm": 1.8082665095520953, "learning_rate": 5.145349142165333e-06, "loss": 0.5304, "step": 7131 }, { "epoch": 0.51, "grad_norm": 2.3450208913873527, "learning_rate": 5.144200457027045e-06, "loss": 0.5225, "step": 7132 }, { "epoch": 0.51, "grad_norm": 0.6968448027459868, "learning_rate": 5.143051764271599e-06, "loss": 0.4126, "step": 7133 }, { "epoch": 0.51, "grad_norm": 1.961580709313409, "learning_rate": 5.141903063959677e-06, "loss": 0.5133, "step": 7134 }, { "epoch": 0.51, "grad_norm": 0.7471798602529724, "learning_rate": 5.1407543561519535e-06, "loss": 0.4694, "step": 7135 }, { "epoch": 0.51, "grad_norm": 4.038518251440893, "learning_rate": 5.139605640909108e-06, "loss": 0.5118, "step": 7136 }, { "epoch": 0.51, "grad_norm": 1.7778208913877238, "learning_rate": 5.138456918291819e-06, "loss": 0.5357, "step": 7137 }, { "epoch": 0.51, "grad_norm": 1.4885933662221587, "learning_rate": 5.137308188360765e-06, "loss": 0.5141, "step": 7138 }, { "epoch": 0.51, "grad_norm": 1.9774818655028785, "learning_rate": 5.13615945117663e-06, "loss": 0.4445, "step": 7139 }, { "epoch": 0.51, "grad_norm": 1.8716662307838772, "learning_rate": 5.13501070680009e-06, "loss": 0.5681, "step": 7140 }, { "epoch": 0.51, "grad_norm": 1.810768876060837, "learning_rate": 5.133861955291827e-06, "loss": 0.5241, "step": 7141 }, { "epoch": 0.51, "grad_norm": 1.5727414963843227, "learning_rate": 5.132713196712521e-06, "loss": 0.5365, "step": 7142 }, { "epoch": 0.51, "grad_norm": 2.918848231131799, "learning_rate": 5.131564431122857e-06, "loss": 0.5212, "step": 7143 }, { "epoch": 0.51, "grad_norm": 0.6825396948884368, "learning_rate": 5.1304156585835116e-06, "loss": 0.4365, "step": 7144 }, { "epoch": 0.51, "grad_norm": 1.8334080262118002, "learning_rate": 5.1292668791551705e-06, "loss": 0.5033, "step": 7145 }, { "epoch": 0.51, "grad_norm": 1.854349327819781, "learning_rate": 5.128118092898514e-06, "loss": 0.5805, "step": 7146 }, { "epoch": 0.51, "grad_norm": 2.127937879591351, "learning_rate": 5.126969299874225e-06, "loss": 0.5655, "step": 7147 }, { "epoch": 0.51, "grad_norm": 1.863725899590818, "learning_rate": 5.125820500142988e-06, "loss": 0.5844, "step": 7148 }, { "epoch": 0.51, "grad_norm": 2.813888497787206, "learning_rate": 5.124671693765487e-06, "loss": 0.4989, "step": 7149 }, { "epoch": 0.51, "grad_norm": 1.6518913501196617, "learning_rate": 5.123522880802403e-06, "loss": 0.5096, "step": 7150 }, { "epoch": 0.51, "grad_norm": 2.7558537853024045, "learning_rate": 5.122374061314424e-06, "loss": 0.5331, "step": 7151 }, { "epoch": 0.51, "grad_norm": 1.9676623770108665, "learning_rate": 5.121225235362231e-06, "loss": 0.5326, "step": 7152 }, { "epoch": 0.51, "grad_norm": 2.2094081635692677, "learning_rate": 5.120076403006511e-06, "loss": 0.4963, "step": 7153 }, { "epoch": 0.51, "grad_norm": 0.6927926488967263, "learning_rate": 5.118927564307947e-06, "loss": 0.4714, "step": 7154 }, { "epoch": 0.51, "grad_norm": 1.6738943270583186, "learning_rate": 5.1177787193272265e-06, "loss": 0.5575, "step": 7155 }, { "epoch": 0.51, "grad_norm": 1.6185888292506552, "learning_rate": 5.116629868125036e-06, "loss": 0.5773, "step": 7156 }, { "epoch": 0.51, "grad_norm": 1.6562085052544793, "learning_rate": 5.1154810107620585e-06, "loss": 0.5734, "step": 7157 }, { "epoch": 0.51, "grad_norm": 1.811707302117279, "learning_rate": 5.114332147298986e-06, "loss": 0.5661, "step": 7158 }, { "epoch": 0.51, "grad_norm": 2.114680323180185, "learning_rate": 5.1131832777965e-06, "loss": 0.5703, "step": 7159 }, { "epoch": 0.51, "grad_norm": 0.7267628351405168, "learning_rate": 5.11203440231529e-06, "loss": 0.4345, "step": 7160 }, { "epoch": 0.51, "grad_norm": 1.6916179885567026, "learning_rate": 5.110885520916044e-06, "loss": 0.5199, "step": 7161 }, { "epoch": 0.51, "grad_norm": 1.850937205413982, "learning_rate": 5.109736633659448e-06, "loss": 0.5967, "step": 7162 }, { "epoch": 0.51, "grad_norm": 3.1097579920131526, "learning_rate": 5.1085877406061915e-06, "loss": 0.5991, "step": 7163 }, { "epoch": 0.51, "grad_norm": 0.793779886180151, "learning_rate": 5.107438841816963e-06, "loss": 0.4652, "step": 7164 }, { "epoch": 0.51, "grad_norm": 2.0311785186349547, "learning_rate": 5.1062899373524495e-06, "loss": 0.5286, "step": 7165 }, { "epoch": 0.51, "grad_norm": 1.7501828275635192, "learning_rate": 5.105141027273344e-06, "loss": 0.4693, "step": 7166 }, { "epoch": 0.51, "grad_norm": 1.839859039851572, "learning_rate": 5.103992111640331e-06, "loss": 0.6239, "step": 7167 }, { "epoch": 0.51, "grad_norm": 1.5347476768920956, "learning_rate": 5.102843190514104e-06, "loss": 0.532, "step": 7168 }, { "epoch": 0.51, "grad_norm": 1.4445105375159235, "learning_rate": 5.101694263955349e-06, "loss": 0.5094, "step": 7169 }, { "epoch": 0.51, "grad_norm": 1.5152072381311248, "learning_rate": 5.100545332024759e-06, "loss": 0.5185, "step": 7170 }, { "epoch": 0.51, "grad_norm": 1.7470586644169832, "learning_rate": 5.099396394783024e-06, "loss": 0.4763, "step": 7171 }, { "epoch": 0.51, "grad_norm": 1.589972242158325, "learning_rate": 5.098247452290834e-06, "loss": 0.5458, "step": 7172 }, { "epoch": 0.51, "grad_norm": 1.5267982809836074, "learning_rate": 5.097098504608882e-06, "loss": 0.5617, "step": 7173 }, { "epoch": 0.51, "grad_norm": 1.7781590189794554, "learning_rate": 5.095949551797859e-06, "loss": 0.5255, "step": 7174 }, { "epoch": 0.51, "grad_norm": 1.7461475656270056, "learning_rate": 5.094800593918454e-06, "loss": 0.4842, "step": 7175 }, { "epoch": 0.51, "grad_norm": 1.7348068432501793, "learning_rate": 5.09365163103136e-06, "loss": 0.4883, "step": 7176 }, { "epoch": 0.51, "grad_norm": 1.6683263207217836, "learning_rate": 5.09250266319727e-06, "loss": 0.5545, "step": 7177 }, { "epoch": 0.51, "grad_norm": 1.5183367623448456, "learning_rate": 5.091353690476876e-06, "loss": 0.5222, "step": 7178 }, { "epoch": 0.51, "grad_norm": 2.199029254173726, "learning_rate": 5.090204712930871e-06, "loss": 0.598, "step": 7179 }, { "epoch": 0.51, "grad_norm": 1.7310512453946842, "learning_rate": 5.089055730619946e-06, "loss": 0.5276, "step": 7180 }, { "epoch": 0.51, "grad_norm": 0.8464871528648609, "learning_rate": 5.087906743604797e-06, "loss": 0.4356, "step": 7181 }, { "epoch": 0.51, "grad_norm": 1.8354474276969215, "learning_rate": 5.086757751946115e-06, "loss": 0.5648, "step": 7182 }, { "epoch": 0.51, "grad_norm": 1.6984935815932878, "learning_rate": 5.0856087557045965e-06, "loss": 0.4851, "step": 7183 }, { "epoch": 0.51, "grad_norm": 1.722918994163249, "learning_rate": 5.084459754940931e-06, "loss": 0.5475, "step": 7184 }, { "epoch": 0.51, "grad_norm": 1.9591364963612192, "learning_rate": 5.083310749715815e-06, "loss": 0.5988, "step": 7185 }, { "epoch": 0.51, "grad_norm": 2.0784154677264985, "learning_rate": 5.082161740089944e-06, "loss": 0.5634, "step": 7186 }, { "epoch": 0.51, "grad_norm": 1.496673241833887, "learning_rate": 5.081012726124011e-06, "loss": 0.5965, "step": 7187 }, { "epoch": 0.51, "grad_norm": 2.2261573418980114, "learning_rate": 5.079863707878712e-06, "loss": 0.5102, "step": 7188 }, { "epoch": 0.51, "grad_norm": 2.1496305653936076, "learning_rate": 5.07871468541474e-06, "loss": 0.5347, "step": 7189 }, { "epoch": 0.51, "grad_norm": 0.7681969251651329, "learning_rate": 5.077565658792793e-06, "loss": 0.4736, "step": 7190 }, { "epoch": 0.51, "grad_norm": 2.158904767833304, "learning_rate": 5.076416628073565e-06, "loss": 0.5321, "step": 7191 }, { "epoch": 0.51, "grad_norm": 1.6847662975240523, "learning_rate": 5.075267593317751e-06, "loss": 0.5188, "step": 7192 }, { "epoch": 0.51, "grad_norm": 1.3552674680186045, "learning_rate": 5.074118554586047e-06, "loss": 0.4671, "step": 7193 }, { "epoch": 0.51, "grad_norm": 2.151352171650516, "learning_rate": 5.072969511939152e-06, "loss": 0.5051, "step": 7194 }, { "epoch": 0.51, "grad_norm": 1.954993407580758, "learning_rate": 5.071820465437759e-06, "loss": 0.5974, "step": 7195 }, { "epoch": 0.51, "grad_norm": 1.621000747261092, "learning_rate": 5.070671415142566e-06, "loss": 0.5398, "step": 7196 }, { "epoch": 0.51, "grad_norm": 1.5758683918705338, "learning_rate": 5.069522361114269e-06, "loss": 0.5637, "step": 7197 }, { "epoch": 0.51, "grad_norm": 1.5603712280776678, "learning_rate": 5.068373303413566e-06, "loss": 0.4902, "step": 7198 }, { "epoch": 0.51, "grad_norm": 1.7591020585422592, "learning_rate": 5.067224242101151e-06, "loss": 0.4751, "step": 7199 }, { "epoch": 0.51, "grad_norm": 1.644581644456421, "learning_rate": 5.066075177237726e-06, "loss": 0.5653, "step": 7200 }, { "epoch": 0.51, "grad_norm": 2.0052449603081564, "learning_rate": 5.0649261088839854e-06, "loss": 0.5612, "step": 7201 }, { "epoch": 0.51, "grad_norm": 1.6038822523909237, "learning_rate": 5.063777037100628e-06, "loss": 0.4771, "step": 7202 }, { "epoch": 0.51, "grad_norm": 1.79919749248377, "learning_rate": 5.062627961948352e-06, "loss": 0.5935, "step": 7203 }, { "epoch": 0.51, "grad_norm": 1.9810872651517457, "learning_rate": 5.061478883487854e-06, "loss": 0.4846, "step": 7204 }, { "epoch": 0.51, "grad_norm": 1.6822731765585872, "learning_rate": 5.060329801779834e-06, "loss": 0.5035, "step": 7205 }, { "epoch": 0.51, "grad_norm": 1.906475803232001, "learning_rate": 5.059180716884988e-06, "loss": 0.5763, "step": 7206 }, { "epoch": 0.51, "grad_norm": 1.631273332509671, "learning_rate": 5.058031628864017e-06, "loss": 0.5189, "step": 7207 }, { "epoch": 0.51, "grad_norm": 1.5515088499171164, "learning_rate": 5.056882537777619e-06, "loss": 0.5259, "step": 7208 }, { "epoch": 0.51, "grad_norm": 1.557540216053646, "learning_rate": 5.055733443686492e-06, "loss": 0.5263, "step": 7209 }, { "epoch": 0.51, "grad_norm": 1.5962054424753847, "learning_rate": 5.054584346651336e-06, "loss": 0.587, "step": 7210 }, { "epoch": 0.51, "grad_norm": 1.7644523494004751, "learning_rate": 5.0534352467328494e-06, "loss": 0.5303, "step": 7211 }, { "epoch": 0.51, "grad_norm": 1.6920811478208428, "learning_rate": 5.0522861439917326e-06, "loss": 0.5787, "step": 7212 }, { "epoch": 0.51, "grad_norm": 1.5514690733654695, "learning_rate": 5.0511370384886835e-06, "loss": 0.5209, "step": 7213 }, { "epoch": 0.51, "grad_norm": 1.6473977802266926, "learning_rate": 5.049987930284403e-06, "loss": 0.5153, "step": 7214 }, { "epoch": 0.51, "grad_norm": 1.7444379297982122, "learning_rate": 5.0488388194395925e-06, "loss": 0.5596, "step": 7215 }, { "epoch": 0.51, "grad_norm": 1.587468208291168, "learning_rate": 5.047689706014947e-06, "loss": 0.5033, "step": 7216 }, { "epoch": 0.51, "grad_norm": 1.7695429034238872, "learning_rate": 5.046540590071173e-06, "loss": 0.6132, "step": 7217 }, { "epoch": 0.51, "grad_norm": 1.843576083449908, "learning_rate": 5.045391471668965e-06, "loss": 0.5495, "step": 7218 }, { "epoch": 0.51, "grad_norm": 0.705654737869598, "learning_rate": 5.044242350869025e-06, "loss": 0.4269, "step": 7219 }, { "epoch": 0.51, "grad_norm": 1.7192155706036898, "learning_rate": 5.043093227732057e-06, "loss": 0.5318, "step": 7220 }, { "epoch": 0.51, "grad_norm": 3.930354376564211, "learning_rate": 5.041944102318756e-06, "loss": 0.593, "step": 7221 }, { "epoch": 0.51, "grad_norm": 1.7292720597457205, "learning_rate": 5.040794974689827e-06, "loss": 0.548, "step": 7222 }, { "epoch": 0.51, "grad_norm": 1.5109477056916976, "learning_rate": 5.039645844905967e-06, "loss": 0.5379, "step": 7223 }, { "epoch": 0.51, "grad_norm": 0.8638070213950221, "learning_rate": 5.038496713027881e-06, "loss": 0.4608, "step": 7224 }, { "epoch": 0.51, "grad_norm": 1.749235122769125, "learning_rate": 5.037347579116269e-06, "loss": 0.5315, "step": 7225 }, { "epoch": 0.51, "grad_norm": 0.7663640692608794, "learning_rate": 5.036198443231831e-06, "loss": 0.4755, "step": 7226 }, { "epoch": 0.51, "grad_norm": 1.806818927661662, "learning_rate": 5.035049305435267e-06, "loss": 0.6063, "step": 7227 }, { "epoch": 0.51, "grad_norm": 0.7573698531441196, "learning_rate": 5.033900165787281e-06, "loss": 0.4373, "step": 7228 }, { "epoch": 0.51, "grad_norm": 1.8455329651365062, "learning_rate": 5.032751024348572e-06, "loss": 0.5774, "step": 7229 }, { "epoch": 0.51, "grad_norm": 1.7822989499076083, "learning_rate": 5.031601881179845e-06, "loss": 0.5019, "step": 7230 }, { "epoch": 0.51, "grad_norm": 1.5448972505936522, "learning_rate": 5.030452736341799e-06, "loss": 0.5072, "step": 7231 }, { "epoch": 0.51, "grad_norm": 1.6595951684377732, "learning_rate": 5.029303589895136e-06, "loss": 0.5372, "step": 7232 }, { "epoch": 0.51, "grad_norm": 2.152031855143544, "learning_rate": 5.028154441900558e-06, "loss": 0.5201, "step": 7233 }, { "epoch": 0.51, "grad_norm": 3.1936899109439905, "learning_rate": 5.027005292418766e-06, "loss": 0.5086, "step": 7234 }, { "epoch": 0.51, "grad_norm": 1.563384468087144, "learning_rate": 5.025856141510465e-06, "loss": 0.5033, "step": 7235 }, { "epoch": 0.51, "grad_norm": 2.04675847653485, "learning_rate": 5.024706989236353e-06, "loss": 0.5715, "step": 7236 }, { "epoch": 0.51, "grad_norm": 1.8693857345891904, "learning_rate": 5.023557835657134e-06, "loss": 0.5849, "step": 7237 }, { "epoch": 0.51, "grad_norm": 1.7593542749232354, "learning_rate": 5.0224086808335115e-06, "loss": 0.5149, "step": 7238 }, { "epoch": 0.51, "grad_norm": 0.75551147779967, "learning_rate": 5.021259524826187e-06, "loss": 0.4101, "step": 7239 }, { "epoch": 0.51, "grad_norm": 1.7572922825046093, "learning_rate": 5.020110367695862e-06, "loss": 0.5475, "step": 7240 }, { "epoch": 0.51, "grad_norm": 1.6726415173378884, "learning_rate": 5.018961209503239e-06, "loss": 0.4617, "step": 7241 }, { "epoch": 0.51, "grad_norm": 1.603224859491169, "learning_rate": 5.017812050309021e-06, "loss": 0.5432, "step": 7242 }, { "epoch": 0.51, "grad_norm": 1.5848769110086018, "learning_rate": 5.016662890173909e-06, "loss": 0.5071, "step": 7243 }, { "epoch": 0.51, "grad_norm": 1.7082490889526456, "learning_rate": 5.015513729158608e-06, "loss": 0.5359, "step": 7244 }, { "epoch": 0.51, "grad_norm": 2.483721540952498, "learning_rate": 5.014364567323819e-06, "loss": 0.5698, "step": 7245 }, { "epoch": 0.51, "grad_norm": 2.22394592421172, "learning_rate": 5.013215404730244e-06, "loss": 0.5088, "step": 7246 }, { "epoch": 0.51, "grad_norm": 2.159977920143412, "learning_rate": 5.012066241438588e-06, "loss": 0.4958, "step": 7247 }, { "epoch": 0.51, "grad_norm": 3.2717837002259853, "learning_rate": 5.01091707750955e-06, "loss": 0.4975, "step": 7248 }, { "epoch": 0.51, "grad_norm": 0.8625731917922311, "learning_rate": 5.009767913003838e-06, "loss": 0.4315, "step": 7249 }, { "epoch": 0.51, "grad_norm": 1.8296157089392737, "learning_rate": 5.008618747982149e-06, "loss": 0.5825, "step": 7250 }, { "epoch": 0.51, "grad_norm": 0.6978983175685524, "learning_rate": 5.007469582505188e-06, "loss": 0.4323, "step": 7251 }, { "epoch": 0.51, "grad_norm": 2.28314763289816, "learning_rate": 5.006320416633661e-06, "loss": 0.4997, "step": 7252 }, { "epoch": 0.51, "grad_norm": 1.9671083190596543, "learning_rate": 5.005171250428267e-06, "loss": 0.5827, "step": 7253 }, { "epoch": 0.51, "grad_norm": 0.7045128805807999, "learning_rate": 5.004022083949711e-06, "loss": 0.4379, "step": 7254 }, { "epoch": 0.51, "grad_norm": 1.7271037491130197, "learning_rate": 5.002872917258693e-06, "loss": 0.5398, "step": 7255 }, { "epoch": 0.51, "grad_norm": 1.641112828116606, "learning_rate": 5.00172375041592e-06, "loss": 0.5336, "step": 7256 }, { "epoch": 0.51, "grad_norm": 2.784381814528962, "learning_rate": 5.00057458348209e-06, "loss": 0.5788, "step": 7257 }, { "epoch": 0.52, "grad_norm": 1.5072402623407863, "learning_rate": 4.9994254165179105e-06, "loss": 0.56, "step": 7258 }, { "epoch": 0.52, "grad_norm": 1.5348676725488957, "learning_rate": 4.998276249584082e-06, "loss": 0.6639, "step": 7259 }, { "epoch": 0.52, "grad_norm": 1.7916397577932328, "learning_rate": 4.997127082741307e-06, "loss": 0.4294, "step": 7260 }, { "epoch": 0.52, "grad_norm": 1.5645671543729323, "learning_rate": 4.995977916050291e-06, "loss": 0.484, "step": 7261 }, { "epoch": 0.52, "grad_norm": 1.7220891046244426, "learning_rate": 4.9948287495717345e-06, "loss": 0.4791, "step": 7262 }, { "epoch": 0.52, "grad_norm": 1.499614914639197, "learning_rate": 4.993679583366341e-06, "loss": 0.5255, "step": 7263 }, { "epoch": 0.52, "grad_norm": 1.7632920332517494, "learning_rate": 4.992530417494812e-06, "loss": 0.4827, "step": 7264 }, { "epoch": 0.52, "grad_norm": 2.055270365578761, "learning_rate": 4.9913812520178524e-06, "loss": 0.5338, "step": 7265 }, { "epoch": 0.52, "grad_norm": 1.9181685268858486, "learning_rate": 4.990232086996165e-06, "loss": 0.5925, "step": 7266 }, { "epoch": 0.52, "grad_norm": 2.052652317873827, "learning_rate": 4.98908292249045e-06, "loss": 0.5841, "step": 7267 }, { "epoch": 0.52, "grad_norm": 1.5402671363346916, "learning_rate": 4.987933758561414e-06, "loss": 0.5503, "step": 7268 }, { "epoch": 0.52, "grad_norm": 1.959775331612989, "learning_rate": 4.986784595269758e-06, "loss": 0.5187, "step": 7269 }, { "epoch": 0.52, "grad_norm": 1.735648065561153, "learning_rate": 4.985635432676184e-06, "loss": 0.5624, "step": 7270 }, { "epoch": 0.52, "grad_norm": 1.87712740782785, "learning_rate": 4.984486270841393e-06, "loss": 0.4823, "step": 7271 }, { "epoch": 0.52, "grad_norm": 0.7293366666424574, "learning_rate": 4.983337109826093e-06, "loss": 0.4563, "step": 7272 }, { "epoch": 0.52, "grad_norm": 0.7133623007553, "learning_rate": 4.982187949690981e-06, "loss": 0.4277, "step": 7273 }, { "epoch": 0.52, "grad_norm": 1.7270411304845135, "learning_rate": 4.981038790496763e-06, "loss": 0.5562, "step": 7274 }, { "epoch": 0.52, "grad_norm": 1.764351877262511, "learning_rate": 4.979889632304138e-06, "loss": 0.5697, "step": 7275 }, { "epoch": 0.52, "grad_norm": 1.4504232648468505, "learning_rate": 4.9787404751738135e-06, "loss": 0.4977, "step": 7276 }, { "epoch": 0.52, "grad_norm": 0.8048429391730955, "learning_rate": 4.97759131916649e-06, "loss": 0.4318, "step": 7277 }, { "epoch": 0.52, "grad_norm": 1.842523802109378, "learning_rate": 4.9764421643428675e-06, "loss": 0.5527, "step": 7278 }, { "epoch": 0.52, "grad_norm": 1.5425789839625967, "learning_rate": 4.975293010763648e-06, "loss": 0.5084, "step": 7279 }, { "epoch": 0.52, "grad_norm": 1.8472122702634888, "learning_rate": 4.974143858489537e-06, "loss": 0.5103, "step": 7280 }, { "epoch": 0.52, "grad_norm": 0.7591169982563802, "learning_rate": 4.972994707581235e-06, "loss": 0.445, "step": 7281 }, { "epoch": 0.52, "grad_norm": 1.819434752435671, "learning_rate": 4.971845558099443e-06, "loss": 0.6028, "step": 7282 }, { "epoch": 0.52, "grad_norm": 1.6563495002598083, "learning_rate": 4.970696410104865e-06, "loss": 0.569, "step": 7283 }, { "epoch": 0.52, "grad_norm": 1.9422865757037184, "learning_rate": 4.9695472636582035e-06, "loss": 0.5756, "step": 7284 }, { "epoch": 0.52, "grad_norm": 1.7093473061284465, "learning_rate": 4.968398118820157e-06, "loss": 0.5193, "step": 7285 }, { "epoch": 0.52, "grad_norm": 1.7519770391294718, "learning_rate": 4.9672489756514284e-06, "loss": 0.6103, "step": 7286 }, { "epoch": 0.52, "grad_norm": 1.5457193395840516, "learning_rate": 4.966099834212721e-06, "loss": 0.5481, "step": 7287 }, { "epoch": 0.52, "grad_norm": 1.8762245569098595, "learning_rate": 4.964950694564734e-06, "loss": 0.555, "step": 7288 }, { "epoch": 0.52, "grad_norm": 1.4737133075298772, "learning_rate": 4.963801556768172e-06, "loss": 0.5457, "step": 7289 }, { "epoch": 0.52, "grad_norm": 1.6450680366116832, "learning_rate": 4.962652420883732e-06, "loss": 0.5311, "step": 7290 }, { "epoch": 0.52, "grad_norm": 1.5037485486985844, "learning_rate": 4.9615032869721195e-06, "loss": 0.5117, "step": 7291 }, { "epoch": 0.52, "grad_norm": 1.903523040455951, "learning_rate": 4.960354155094034e-06, "loss": 0.5751, "step": 7292 }, { "epoch": 0.52, "grad_norm": 1.8904026770030742, "learning_rate": 4.959205025310175e-06, "loss": 0.4847, "step": 7293 }, { "epoch": 0.52, "grad_norm": 2.80369450954564, "learning_rate": 4.9580558976812445e-06, "loss": 0.5236, "step": 7294 }, { "epoch": 0.52, "grad_norm": 1.5086448198287172, "learning_rate": 4.956906772267945e-06, "loss": 0.496, "step": 7295 }, { "epoch": 0.52, "grad_norm": 0.7517681557055905, "learning_rate": 4.955757649130976e-06, "loss": 0.4372, "step": 7296 }, { "epoch": 0.52, "grad_norm": 0.6965478616827675, "learning_rate": 4.954608528331038e-06, "loss": 0.4687, "step": 7297 }, { "epoch": 0.52, "grad_norm": 1.486501053378748, "learning_rate": 4.953459409928829e-06, "loss": 0.5254, "step": 7298 }, { "epoch": 0.52, "grad_norm": 0.7619300355084733, "learning_rate": 4.9523102939850535e-06, "loss": 0.4197, "step": 7299 }, { "epoch": 0.52, "grad_norm": 2.4703459123713984, "learning_rate": 4.951161180560411e-06, "loss": 0.539, "step": 7300 }, { "epoch": 0.52, "grad_norm": 1.5990817683088132, "learning_rate": 4.950012069715598e-06, "loss": 0.5942, "step": 7301 }, { "epoch": 0.52, "grad_norm": 1.4823258718291015, "learning_rate": 4.948862961511318e-06, "loss": 0.586, "step": 7302 }, { "epoch": 0.52, "grad_norm": 1.6477803011147398, "learning_rate": 4.947713856008269e-06, "loss": 0.5404, "step": 7303 }, { "epoch": 0.52, "grad_norm": 0.7582317307299441, "learning_rate": 4.946564753267153e-06, "loss": 0.4442, "step": 7304 }, { "epoch": 0.52, "grad_norm": 2.056629385867051, "learning_rate": 4.945415653348665e-06, "loss": 0.6541, "step": 7305 }, { "epoch": 0.52, "grad_norm": 1.4548594594819615, "learning_rate": 4.944266556313509e-06, "loss": 0.4713, "step": 7306 }, { "epoch": 0.52, "grad_norm": 3.1494274175085297, "learning_rate": 4.943117462222384e-06, "loss": 0.5059, "step": 7307 }, { "epoch": 0.52, "grad_norm": 1.796287612559202, "learning_rate": 4.941968371135984e-06, "loss": 0.4994, "step": 7308 }, { "epoch": 0.52, "grad_norm": 1.861721318817861, "learning_rate": 4.940819283115013e-06, "loss": 0.5518, "step": 7309 }, { "epoch": 0.52, "grad_norm": 1.6850851880783693, "learning_rate": 4.939670198220168e-06, "loss": 0.5342, "step": 7310 }, { "epoch": 0.52, "grad_norm": 1.5351681440168856, "learning_rate": 4.938521116512147e-06, "loss": 0.5481, "step": 7311 }, { "epoch": 0.52, "grad_norm": 0.7683130362519132, "learning_rate": 4.93737203805165e-06, "loss": 0.4502, "step": 7312 }, { "epoch": 0.52, "grad_norm": 1.7135111792916076, "learning_rate": 4.936222962899372e-06, "loss": 0.4866, "step": 7313 }, { "epoch": 0.52, "grad_norm": 1.5765238638206085, "learning_rate": 4.935073891116015e-06, "loss": 0.5528, "step": 7314 }, { "epoch": 0.52, "grad_norm": 1.3960977494701152, "learning_rate": 4.933924822762276e-06, "loss": 0.5427, "step": 7315 }, { "epoch": 0.52, "grad_norm": 2.0418287895522167, "learning_rate": 4.93277575789885e-06, "loss": 0.5587, "step": 7316 }, { "epoch": 0.52, "grad_norm": 1.5025091497905536, "learning_rate": 4.931626696586435e-06, "loss": 0.492, "step": 7317 }, { "epoch": 0.52, "grad_norm": 2.0128210261794472, "learning_rate": 4.930477638885733e-06, "loss": 0.5476, "step": 7318 }, { "epoch": 0.52, "grad_norm": 0.7240440208197867, "learning_rate": 4.929328584857436e-06, "loss": 0.4442, "step": 7319 }, { "epoch": 0.52, "grad_norm": 0.7699847052167357, "learning_rate": 4.928179534562241e-06, "loss": 0.4316, "step": 7320 }, { "epoch": 0.52, "grad_norm": 1.6586849891371507, "learning_rate": 4.927030488060849e-06, "loss": 0.5568, "step": 7321 }, { "epoch": 0.52, "grad_norm": 2.170569533833282, "learning_rate": 4.9258814454139535e-06, "loss": 0.5194, "step": 7322 }, { "epoch": 0.52, "grad_norm": 1.8493214802532791, "learning_rate": 4.924732406682251e-06, "loss": 0.468, "step": 7323 }, { "epoch": 0.52, "grad_norm": 0.7951036683305411, "learning_rate": 4.923583371926436e-06, "loss": 0.4565, "step": 7324 }, { "epoch": 0.52, "grad_norm": 1.6236967319641786, "learning_rate": 4.922434341207208e-06, "loss": 0.5372, "step": 7325 }, { "epoch": 0.52, "grad_norm": 1.5171929123922037, "learning_rate": 4.9212853145852605e-06, "loss": 0.4953, "step": 7326 }, { "epoch": 0.52, "grad_norm": 2.430018558229765, "learning_rate": 4.9201362921212905e-06, "loss": 0.5332, "step": 7327 }, { "epoch": 0.52, "grad_norm": 1.7219510683218358, "learning_rate": 4.918987273875989e-06, "loss": 0.5116, "step": 7328 }, { "epoch": 0.52, "grad_norm": 1.673881951175707, "learning_rate": 4.917838259910058e-06, "loss": 0.5809, "step": 7329 }, { "epoch": 0.52, "grad_norm": 1.8155002702907637, "learning_rate": 4.916689250284188e-06, "loss": 0.5358, "step": 7330 }, { "epoch": 0.52, "grad_norm": 1.932130852182361, "learning_rate": 4.91554024505907e-06, "loss": 0.5895, "step": 7331 }, { "epoch": 0.52, "grad_norm": 1.54986950392895, "learning_rate": 4.914391244295405e-06, "loss": 0.5334, "step": 7332 }, { "epoch": 0.52, "grad_norm": 1.475992079844505, "learning_rate": 4.913242248053886e-06, "loss": 0.5487, "step": 7333 }, { "epoch": 0.52, "grad_norm": 1.8019433301398566, "learning_rate": 4.912093256395205e-06, "loss": 0.5334, "step": 7334 }, { "epoch": 0.52, "grad_norm": 0.757988423785613, "learning_rate": 4.910944269380054e-06, "loss": 0.4296, "step": 7335 }, { "epoch": 0.52, "grad_norm": 1.5404619600443263, "learning_rate": 4.909795287069131e-06, "loss": 0.5086, "step": 7336 }, { "epoch": 0.52, "grad_norm": 1.7294740967188893, "learning_rate": 4.908646309523126e-06, "loss": 0.5123, "step": 7337 }, { "epoch": 0.52, "grad_norm": 1.6191698637764513, "learning_rate": 4.907497336802732e-06, "loss": 0.5597, "step": 7338 }, { "epoch": 0.52, "grad_norm": 1.5788484188302085, "learning_rate": 4.9063483689686415e-06, "loss": 0.5193, "step": 7339 }, { "epoch": 0.52, "grad_norm": 2.5271686612925537, "learning_rate": 4.905199406081547e-06, "loss": 0.533, "step": 7340 }, { "epoch": 0.52, "grad_norm": 2.073671004993185, "learning_rate": 4.904050448202144e-06, "loss": 0.5145, "step": 7341 }, { "epoch": 0.52, "grad_norm": 1.8285991045877787, "learning_rate": 4.902901495391119e-06, "loss": 0.5013, "step": 7342 }, { "epoch": 0.52, "grad_norm": 1.5901907612462758, "learning_rate": 4.901752547709166e-06, "loss": 0.5395, "step": 7343 }, { "epoch": 0.52, "grad_norm": 1.7126069626901925, "learning_rate": 4.900603605216977e-06, "loss": 0.4922, "step": 7344 }, { "epoch": 0.52, "grad_norm": 1.507891969992394, "learning_rate": 4.899454667975242e-06, "loss": 0.5554, "step": 7345 }, { "epoch": 0.52, "grad_norm": 2.40863267000915, "learning_rate": 4.898305736044652e-06, "loss": 0.5683, "step": 7346 }, { "epoch": 0.52, "grad_norm": 1.9209637769275547, "learning_rate": 4.897156809485898e-06, "loss": 0.5584, "step": 7347 }, { "epoch": 0.52, "grad_norm": 1.7279904297580817, "learning_rate": 4.8960078883596706e-06, "loss": 0.5495, "step": 7348 }, { "epoch": 0.52, "grad_norm": 1.7577852968405348, "learning_rate": 4.894858972726659e-06, "loss": 0.5297, "step": 7349 }, { "epoch": 0.52, "grad_norm": 0.7589329591726663, "learning_rate": 4.8937100626475505e-06, "loss": 0.4363, "step": 7350 }, { "epoch": 0.52, "grad_norm": 2.0177208520344454, "learning_rate": 4.892561158183038e-06, "loss": 0.4878, "step": 7351 }, { "epoch": 0.52, "grad_norm": 2.591169402828059, "learning_rate": 4.891412259393811e-06, "loss": 0.5092, "step": 7352 }, { "epoch": 0.52, "grad_norm": 2.126151293028521, "learning_rate": 4.890263366340554e-06, "loss": 0.5393, "step": 7353 }, { "epoch": 0.52, "grad_norm": 2.014463959281932, "learning_rate": 4.889114479083958e-06, "loss": 0.5862, "step": 7354 }, { "epoch": 0.52, "grad_norm": 1.6088368368993453, "learning_rate": 4.887965597684711e-06, "loss": 0.5609, "step": 7355 }, { "epoch": 0.52, "grad_norm": 1.468983047816732, "learning_rate": 4.886816722203502e-06, "loss": 0.5169, "step": 7356 }, { "epoch": 0.52, "grad_norm": 4.683650072407551, "learning_rate": 4.885667852701017e-06, "loss": 0.531, "step": 7357 }, { "epoch": 0.52, "grad_norm": 1.7333239597381842, "learning_rate": 4.8845189892379415e-06, "loss": 0.5231, "step": 7358 }, { "epoch": 0.52, "grad_norm": 0.8062943636873092, "learning_rate": 4.883370131874966e-06, "loss": 0.4582, "step": 7359 }, { "epoch": 0.52, "grad_norm": 1.7797894737046922, "learning_rate": 4.882221280672775e-06, "loss": 0.5604, "step": 7360 }, { "epoch": 0.52, "grad_norm": 2.063033854882861, "learning_rate": 4.881072435692055e-06, "loss": 0.4771, "step": 7361 }, { "epoch": 0.52, "grad_norm": 2.2966781479327145, "learning_rate": 4.87992359699349e-06, "loss": 0.4907, "step": 7362 }, { "epoch": 0.52, "grad_norm": 2.0132215808843585, "learning_rate": 4.878774764637771e-06, "loss": 0.5399, "step": 7363 }, { "epoch": 0.52, "grad_norm": 1.703541505438867, "learning_rate": 4.877625938685579e-06, "loss": 0.5569, "step": 7364 }, { "epoch": 0.52, "grad_norm": 1.6747568189926068, "learning_rate": 4.876477119197597e-06, "loss": 0.5393, "step": 7365 }, { "epoch": 0.52, "grad_norm": 1.8621462905954436, "learning_rate": 4.875328306234514e-06, "loss": 0.5603, "step": 7366 }, { "epoch": 0.52, "grad_norm": 1.65100075481453, "learning_rate": 4.874179499857014e-06, "loss": 0.486, "step": 7367 }, { "epoch": 0.52, "grad_norm": 1.483617828046408, "learning_rate": 4.873030700125776e-06, "loss": 0.5323, "step": 7368 }, { "epoch": 0.52, "grad_norm": 1.5947419698944014, "learning_rate": 4.871881907101487e-06, "loss": 0.5383, "step": 7369 }, { "epoch": 0.52, "grad_norm": 1.7882505084752232, "learning_rate": 4.870733120844831e-06, "loss": 0.4783, "step": 7370 }, { "epoch": 0.52, "grad_norm": 2.034000441614421, "learning_rate": 4.86958434141649e-06, "loss": 0.5397, "step": 7371 }, { "epoch": 0.52, "grad_norm": 1.6923893103618055, "learning_rate": 4.868435568877146e-06, "loss": 0.5924, "step": 7372 }, { "epoch": 0.52, "grad_norm": 1.8831985494351888, "learning_rate": 4.867286803287478e-06, "loss": 0.5419, "step": 7373 }, { "epoch": 0.52, "grad_norm": 1.9059238241951892, "learning_rate": 4.866138044708175e-06, "loss": 0.4841, "step": 7374 }, { "epoch": 0.52, "grad_norm": 1.5834790823932463, "learning_rate": 4.864989293199912e-06, "loss": 0.4753, "step": 7375 }, { "epoch": 0.52, "grad_norm": 1.6879136176173732, "learning_rate": 4.863840548823373e-06, "loss": 0.5229, "step": 7376 }, { "epoch": 0.52, "grad_norm": 1.6147500189709878, "learning_rate": 4.862691811639235e-06, "loss": 0.5571, "step": 7377 }, { "epoch": 0.52, "grad_norm": 1.8373899915317233, "learning_rate": 4.861543081708183e-06, "loss": 0.6408, "step": 7378 }, { "epoch": 0.52, "grad_norm": 1.6412387802745478, "learning_rate": 4.860394359090895e-06, "loss": 0.5729, "step": 7379 }, { "epoch": 0.52, "grad_norm": 1.7730638218199797, "learning_rate": 4.859245643848047e-06, "loss": 0.6109, "step": 7380 }, { "epoch": 0.52, "grad_norm": 1.7258574952874157, "learning_rate": 4.858096936040324e-06, "loss": 0.492, "step": 7381 }, { "epoch": 0.52, "grad_norm": 1.916118385761965, "learning_rate": 4.8569482357284015e-06, "loss": 0.5452, "step": 7382 }, { "epoch": 0.52, "grad_norm": 1.6661544942603368, "learning_rate": 4.855799542972957e-06, "loss": 0.5665, "step": 7383 }, { "epoch": 0.52, "grad_norm": 1.7872691468342043, "learning_rate": 4.854650857834668e-06, "loss": 0.5779, "step": 7384 }, { "epoch": 0.52, "grad_norm": 1.7113523873740464, "learning_rate": 4.853502180374216e-06, "loss": 0.5283, "step": 7385 }, { "epoch": 0.52, "grad_norm": 1.4446689144822593, "learning_rate": 4.852353510652274e-06, "loss": 0.4979, "step": 7386 }, { "epoch": 0.52, "grad_norm": 1.5842041800155444, "learning_rate": 4.851204848729521e-06, "loss": 0.5187, "step": 7387 }, { "epoch": 0.52, "grad_norm": 1.7794692525222693, "learning_rate": 4.850056194666629e-06, "loss": 0.5082, "step": 7388 }, { "epoch": 0.52, "grad_norm": 1.4942439989136183, "learning_rate": 4.84890754852428e-06, "loss": 0.51, "step": 7389 }, { "epoch": 0.52, "grad_norm": 1.761109989153833, "learning_rate": 4.847758910363144e-06, "loss": 0.5862, "step": 7390 }, { "epoch": 0.52, "grad_norm": 6.173619657625336, "learning_rate": 4.8466102802439e-06, "loss": 0.512, "step": 7391 }, { "epoch": 0.52, "grad_norm": 1.5809107699010336, "learning_rate": 4.845461658227218e-06, "loss": 0.4672, "step": 7392 }, { "epoch": 0.52, "grad_norm": 1.555205269954731, "learning_rate": 4.844313044373778e-06, "loss": 0.5021, "step": 7393 }, { "epoch": 0.52, "grad_norm": 1.4719651548176411, "learning_rate": 4.84316443874425e-06, "loss": 0.4514, "step": 7394 }, { "epoch": 0.52, "grad_norm": 1.7005320090957707, "learning_rate": 4.8420158413993055e-06, "loss": 0.5527, "step": 7395 }, { "epoch": 0.52, "grad_norm": 0.7559765146035974, "learning_rate": 4.8408672523996216e-06, "loss": 0.4489, "step": 7396 }, { "epoch": 0.52, "grad_norm": 1.5339518301199495, "learning_rate": 4.839718671805868e-06, "loss": 0.4766, "step": 7397 }, { "epoch": 0.52, "grad_norm": 1.5129962411528446, "learning_rate": 4.838570099678718e-06, "loss": 0.4901, "step": 7398 }, { "epoch": 0.53, "grad_norm": 1.902755591832796, "learning_rate": 4.837421536078841e-06, "loss": 0.6113, "step": 7399 }, { "epoch": 0.53, "grad_norm": 1.9715098572942094, "learning_rate": 4.83627298106691e-06, "loss": 0.5479, "step": 7400 }, { "epoch": 0.53, "grad_norm": 1.5855626553528672, "learning_rate": 4.835124434703596e-06, "loss": 0.6019, "step": 7401 }, { "epoch": 0.53, "grad_norm": 1.5386253832460466, "learning_rate": 4.833975897049568e-06, "loss": 0.5372, "step": 7402 }, { "epoch": 0.53, "grad_norm": 1.6774804459341957, "learning_rate": 4.832827368165493e-06, "loss": 0.5077, "step": 7403 }, { "epoch": 0.53, "grad_norm": 1.7557684726530616, "learning_rate": 4.8316788481120466e-06, "loss": 0.5546, "step": 7404 }, { "epoch": 0.53, "grad_norm": 1.9292104967606223, "learning_rate": 4.830530336949891e-06, "loss": 0.5484, "step": 7405 }, { "epoch": 0.53, "grad_norm": 1.7686655525169592, "learning_rate": 4.829381834739699e-06, "loss": 0.5792, "step": 7406 }, { "epoch": 0.53, "grad_norm": 1.732875795650788, "learning_rate": 4.8282333415421345e-06, "loss": 0.5275, "step": 7407 }, { "epoch": 0.53, "grad_norm": 1.6610760187817324, "learning_rate": 4.827084857417869e-06, "loss": 0.4856, "step": 7408 }, { "epoch": 0.53, "grad_norm": 0.678595457152506, "learning_rate": 4.825936382427567e-06, "loss": 0.4306, "step": 7409 }, { "epoch": 0.53, "grad_norm": 1.9136617321001381, "learning_rate": 4.824787916631895e-06, "loss": 0.5392, "step": 7410 }, { "epoch": 0.53, "grad_norm": 2.3945844545734047, "learning_rate": 4.823639460091517e-06, "loss": 0.5581, "step": 7411 }, { "epoch": 0.53, "grad_norm": 1.7590313162382252, "learning_rate": 4.822491012867102e-06, "loss": 0.607, "step": 7412 }, { "epoch": 0.53, "grad_norm": 1.7297025618267894, "learning_rate": 4.821342575019313e-06, "loss": 0.4909, "step": 7413 }, { "epoch": 0.53, "grad_norm": 1.4372980742143602, "learning_rate": 4.820194146608813e-06, "loss": 0.5717, "step": 7414 }, { "epoch": 0.53, "grad_norm": 1.7093201199429693, "learning_rate": 4.81904572769627e-06, "loss": 0.5318, "step": 7415 }, { "epoch": 0.53, "grad_norm": 1.475697102776539, "learning_rate": 4.817897318342344e-06, "loss": 0.5002, "step": 7416 }, { "epoch": 0.53, "grad_norm": 1.855981730009265, "learning_rate": 4.8167489186077e-06, "loss": 0.5901, "step": 7417 }, { "epoch": 0.53, "grad_norm": 1.56069675030441, "learning_rate": 4.815600528552996e-06, "loss": 0.5132, "step": 7418 }, { "epoch": 0.53, "grad_norm": 1.8140602140772257, "learning_rate": 4.814452148238899e-06, "loss": 0.5395, "step": 7419 }, { "epoch": 0.53, "grad_norm": 1.671530695698421, "learning_rate": 4.813303777726067e-06, "loss": 0.5604, "step": 7420 }, { "epoch": 0.53, "grad_norm": 1.767389393392174, "learning_rate": 4.812155417075164e-06, "loss": 0.5639, "step": 7421 }, { "epoch": 0.53, "grad_norm": 2.081037361349603, "learning_rate": 4.811007066346846e-06, "loss": 0.5818, "step": 7422 }, { "epoch": 0.53, "grad_norm": 1.6410819600787845, "learning_rate": 4.809858725601777e-06, "loss": 0.4758, "step": 7423 }, { "epoch": 0.53, "grad_norm": 1.7808656731878307, "learning_rate": 4.808710394900613e-06, "loss": 0.4939, "step": 7424 }, { "epoch": 0.53, "grad_norm": 1.9010587241272825, "learning_rate": 4.807562074304015e-06, "loss": 0.5374, "step": 7425 }, { "epoch": 0.53, "grad_norm": 1.8307344161550274, "learning_rate": 4.80641376387264e-06, "loss": 0.5994, "step": 7426 }, { "epoch": 0.53, "grad_norm": 1.5752414441390228, "learning_rate": 4.805265463667146e-06, "loss": 0.5539, "step": 7427 }, { "epoch": 0.53, "grad_norm": 1.8435502554702665, "learning_rate": 4.804117173748191e-06, "loss": 0.512, "step": 7428 }, { "epoch": 0.53, "grad_norm": 1.836044495113223, "learning_rate": 4.802968894176428e-06, "loss": 0.5246, "step": 7429 }, { "epoch": 0.53, "grad_norm": 5.808261776435894, "learning_rate": 4.8018206250125175e-06, "loss": 0.5485, "step": 7430 }, { "epoch": 0.53, "grad_norm": 1.5316092311971667, "learning_rate": 4.800672366317114e-06, "loss": 0.4924, "step": 7431 }, { "epoch": 0.53, "grad_norm": 3.935278511444463, "learning_rate": 4.799524118150871e-06, "loss": 0.5741, "step": 7432 }, { "epoch": 0.53, "grad_norm": 1.5135101065081291, "learning_rate": 4.798375880574442e-06, "loss": 0.4955, "step": 7433 }, { "epoch": 0.53, "grad_norm": 1.9201351532437316, "learning_rate": 4.797227653648483e-06, "loss": 0.558, "step": 7434 }, { "epoch": 0.53, "grad_norm": 1.5240903669096573, "learning_rate": 4.7960794374336465e-06, "loss": 0.5736, "step": 7435 }, { "epoch": 0.53, "grad_norm": 1.5963124220227844, "learning_rate": 4.794931231990586e-06, "loss": 0.5217, "step": 7436 }, { "epoch": 0.53, "grad_norm": 1.7073144660270758, "learning_rate": 4.79378303737995e-06, "loss": 0.5832, "step": 7437 }, { "epoch": 0.53, "grad_norm": 1.7683480343901385, "learning_rate": 4.792634853662396e-06, "loss": 0.4593, "step": 7438 }, { "epoch": 0.53, "grad_norm": 1.500929338342082, "learning_rate": 4.79148668089857e-06, "loss": 0.5354, "step": 7439 }, { "epoch": 0.53, "grad_norm": 1.8788801129713897, "learning_rate": 4.7903385191491246e-06, "loss": 0.5449, "step": 7440 }, { "epoch": 0.53, "grad_norm": 1.7817568346780526, "learning_rate": 4.789190368474708e-06, "loss": 0.537, "step": 7441 }, { "epoch": 0.53, "grad_norm": 2.0801183752217605, "learning_rate": 4.7880422289359714e-06, "loss": 0.5422, "step": 7442 }, { "epoch": 0.53, "grad_norm": 2.2179929388035533, "learning_rate": 4.786894100593563e-06, "loss": 0.5841, "step": 7443 }, { "epoch": 0.53, "grad_norm": 1.7070374580430774, "learning_rate": 4.785745983508128e-06, "loss": 0.5329, "step": 7444 }, { "epoch": 0.53, "grad_norm": 1.6441414531436904, "learning_rate": 4.7845978777403175e-06, "loss": 0.5323, "step": 7445 }, { "epoch": 0.53, "grad_norm": 1.731431387725152, "learning_rate": 4.783449783350779e-06, "loss": 0.5236, "step": 7446 }, { "epoch": 0.53, "grad_norm": 2.5239439755761306, "learning_rate": 4.782301700400155e-06, "loss": 0.4864, "step": 7447 }, { "epoch": 0.53, "grad_norm": 2.851349324133474, "learning_rate": 4.781153628949092e-06, "loss": 0.5619, "step": 7448 }, { "epoch": 0.53, "grad_norm": 1.8905461710401283, "learning_rate": 4.780005569058236e-06, "loss": 0.5012, "step": 7449 }, { "epoch": 0.53, "grad_norm": 1.747240491128981, "learning_rate": 4.778857520788233e-06, "loss": 0.4931, "step": 7450 }, { "epoch": 0.53, "grad_norm": 1.6218377585856663, "learning_rate": 4.777709484199724e-06, "loss": 0.5544, "step": 7451 }, { "epoch": 0.53, "grad_norm": 1.528074819777602, "learning_rate": 4.776561459353352e-06, "loss": 0.5253, "step": 7452 }, { "epoch": 0.53, "grad_norm": 0.8394076133505175, "learning_rate": 4.775413446309763e-06, "loss": 0.4647, "step": 7453 }, { "epoch": 0.53, "grad_norm": 1.8208987385730326, "learning_rate": 4.774265445129596e-06, "loss": 0.6609, "step": 7454 }, { "epoch": 0.53, "grad_norm": 1.5537511415173335, "learning_rate": 4.773117455873493e-06, "loss": 0.4675, "step": 7455 }, { "epoch": 0.53, "grad_norm": 1.6393589097206174, "learning_rate": 4.7719694786020946e-06, "loss": 0.529, "step": 7456 }, { "epoch": 0.53, "grad_norm": 1.7787301197939274, "learning_rate": 4.770821513376041e-06, "loss": 0.5398, "step": 7457 }, { "epoch": 0.53, "grad_norm": 2.0877136113180232, "learning_rate": 4.769673560255972e-06, "loss": 0.5801, "step": 7458 }, { "epoch": 0.53, "grad_norm": 1.7021447527225175, "learning_rate": 4.768525619302525e-06, "loss": 0.5355, "step": 7459 }, { "epoch": 0.53, "grad_norm": 2.015943286957345, "learning_rate": 4.7673776905763395e-06, "loss": 0.5009, "step": 7460 }, { "epoch": 0.53, "grad_norm": 1.6458412097726336, "learning_rate": 4.766229774138054e-06, "loss": 0.5099, "step": 7461 }, { "epoch": 0.53, "grad_norm": 1.585458747173728, "learning_rate": 4.765081870048304e-06, "loss": 0.5225, "step": 7462 }, { "epoch": 0.53, "grad_norm": 1.4320428462954977, "learning_rate": 4.763933978367722e-06, "loss": 0.543, "step": 7463 }, { "epoch": 0.53, "grad_norm": 1.9244651984257728, "learning_rate": 4.762786099156949e-06, "loss": 0.5454, "step": 7464 }, { "epoch": 0.53, "grad_norm": 1.8985567502855376, "learning_rate": 4.76163823247662e-06, "loss": 0.5563, "step": 7465 }, { "epoch": 0.53, "grad_norm": 2.410341750380147, "learning_rate": 4.760490378387368e-06, "loss": 0.5446, "step": 7466 }, { "epoch": 0.53, "grad_norm": 2.175554390380109, "learning_rate": 4.759342536949822e-06, "loss": 0.5732, "step": 7467 }, { "epoch": 0.53, "grad_norm": 1.6401908700567378, "learning_rate": 4.758194708224621e-06, "loss": 0.5312, "step": 7468 }, { "epoch": 0.53, "grad_norm": 0.7128127440696862, "learning_rate": 4.7570468922723946e-06, "loss": 0.4326, "step": 7469 }, { "epoch": 0.53, "grad_norm": 2.3036649742093136, "learning_rate": 4.755899089153774e-06, "loss": 0.5559, "step": 7470 }, { "epoch": 0.53, "grad_norm": 1.7487258241775487, "learning_rate": 4.754751298929391e-06, "loss": 0.5336, "step": 7471 }, { "epoch": 0.53, "grad_norm": 1.9231597434583017, "learning_rate": 4.753603521659874e-06, "loss": 0.5421, "step": 7472 }, { "epoch": 0.53, "grad_norm": 1.6840225014811399, "learning_rate": 4.752455757405857e-06, "loss": 0.5594, "step": 7473 }, { "epoch": 0.53, "grad_norm": 1.6712448042547312, "learning_rate": 4.751308006227965e-06, "loss": 0.4901, "step": 7474 }, { "epoch": 0.53, "grad_norm": 2.5204317961760636, "learning_rate": 4.7501602681868234e-06, "loss": 0.5434, "step": 7475 }, { "epoch": 0.53, "grad_norm": 1.6245395466828847, "learning_rate": 4.749012543343066e-06, "loss": 0.5367, "step": 7476 }, { "epoch": 0.53, "grad_norm": 2.1041562578934156, "learning_rate": 4.747864831757316e-06, "loss": 0.6058, "step": 7477 }, { "epoch": 0.53, "grad_norm": 2.404924992002359, "learning_rate": 4.746717133490199e-06, "loss": 0.5595, "step": 7478 }, { "epoch": 0.53, "grad_norm": 1.5152583021831507, "learning_rate": 4.745569448602341e-06, "loss": 0.5743, "step": 7479 }, { "epoch": 0.53, "grad_norm": 0.7220905969730478, "learning_rate": 4.74442177715437e-06, "loss": 0.4477, "step": 7480 }, { "epoch": 0.53, "grad_norm": 1.8326847278405758, "learning_rate": 4.7432741192069045e-06, "loss": 0.5264, "step": 7481 }, { "epoch": 0.53, "grad_norm": 1.5997393604720236, "learning_rate": 4.742126474820568e-06, "loss": 0.4928, "step": 7482 }, { "epoch": 0.53, "grad_norm": 1.3972868113779104, "learning_rate": 4.740978844055989e-06, "loss": 0.5565, "step": 7483 }, { "epoch": 0.53, "grad_norm": 2.197027331142435, "learning_rate": 4.7398312269737824e-06, "loss": 0.5564, "step": 7484 }, { "epoch": 0.53, "grad_norm": 1.944802644093845, "learning_rate": 4.738683623634573e-06, "loss": 0.5541, "step": 7485 }, { "epoch": 0.53, "grad_norm": 1.6551526735568334, "learning_rate": 4.737536034098979e-06, "loss": 0.5048, "step": 7486 }, { "epoch": 0.53, "grad_norm": 2.31167449680483, "learning_rate": 4.7363884584276216e-06, "loss": 0.4427, "step": 7487 }, { "epoch": 0.53, "grad_norm": 1.8542841897334028, "learning_rate": 4.73524089668112e-06, "loss": 0.5451, "step": 7488 }, { "epoch": 0.53, "grad_norm": 2.3575642419174008, "learning_rate": 4.734093348920091e-06, "loss": 0.5581, "step": 7489 }, { "epoch": 0.53, "grad_norm": 1.6910039425539165, "learning_rate": 4.7329458152051504e-06, "loss": 0.4889, "step": 7490 }, { "epoch": 0.53, "grad_norm": 2.2840380467395156, "learning_rate": 4.73179829559692e-06, "loss": 0.5723, "step": 7491 }, { "epoch": 0.53, "grad_norm": 1.6401781185173763, "learning_rate": 4.73065079015601e-06, "loss": 0.4466, "step": 7492 }, { "epoch": 0.53, "grad_norm": 1.58735663124372, "learning_rate": 4.7295032989430375e-06, "loss": 0.5282, "step": 7493 }, { "epoch": 0.53, "grad_norm": 3.063171216389885, "learning_rate": 4.728355822018618e-06, "loss": 0.5496, "step": 7494 }, { "epoch": 0.53, "grad_norm": 1.8310121786480038, "learning_rate": 4.727208359443366e-06, "loss": 0.4686, "step": 7495 }, { "epoch": 0.53, "grad_norm": 1.7136942947762779, "learning_rate": 4.726060911277892e-06, "loss": 0.5378, "step": 7496 }, { "epoch": 0.53, "grad_norm": 2.656754583939008, "learning_rate": 4.724913477582808e-06, "loss": 0.4888, "step": 7497 }, { "epoch": 0.53, "grad_norm": 0.6924432510906878, "learning_rate": 4.723766058418726e-06, "loss": 0.4523, "step": 7498 }, { "epoch": 0.53, "grad_norm": 0.6532410295022965, "learning_rate": 4.722618653846259e-06, "loss": 0.4179, "step": 7499 }, { "epoch": 0.53, "grad_norm": 1.6144933320568173, "learning_rate": 4.721471263926013e-06, "loss": 0.5016, "step": 7500 }, { "epoch": 0.53, "grad_norm": 2.005213481341487, "learning_rate": 4.720323888718598e-06, "loss": 0.5759, "step": 7501 }, { "epoch": 0.53, "grad_norm": 2.262045600899499, "learning_rate": 4.7191765282846234e-06, "loss": 0.5684, "step": 7502 }, { "epoch": 0.53, "grad_norm": 1.5910710544108388, "learning_rate": 4.718029182684697e-06, "loss": 0.5753, "step": 7503 }, { "epoch": 0.53, "grad_norm": 1.7124610139064993, "learning_rate": 4.716881851979425e-06, "loss": 0.5978, "step": 7504 }, { "epoch": 0.53, "grad_norm": 3.1515524398778387, "learning_rate": 4.715734536229411e-06, "loss": 0.5422, "step": 7505 }, { "epoch": 0.53, "grad_norm": 1.712425424114244, "learning_rate": 4.714587235495263e-06, "loss": 0.5096, "step": 7506 }, { "epoch": 0.53, "grad_norm": 0.7656689492936505, "learning_rate": 4.713439949837585e-06, "loss": 0.4267, "step": 7507 }, { "epoch": 0.53, "grad_norm": 1.59352688301269, "learning_rate": 4.712292679316978e-06, "loss": 0.5595, "step": 7508 }, { "epoch": 0.53, "grad_norm": 1.5586540046966229, "learning_rate": 4.711145423994047e-06, "loss": 0.4992, "step": 7509 }, { "epoch": 0.53, "grad_norm": 1.5852895726635283, "learning_rate": 4.7099981839293956e-06, "loss": 0.5227, "step": 7510 }, { "epoch": 0.53, "grad_norm": 9.304911006753334, "learning_rate": 4.708850959183622e-06, "loss": 0.5499, "step": 7511 }, { "epoch": 0.53, "grad_norm": 2.167496624122782, "learning_rate": 4.707703749817325e-06, "loss": 0.5393, "step": 7512 }, { "epoch": 0.53, "grad_norm": 1.5680350233906049, "learning_rate": 4.706556555891108e-06, "loss": 0.5709, "step": 7513 }, { "epoch": 0.53, "grad_norm": 2.201014033283124, "learning_rate": 4.7054093774655694e-06, "loss": 0.524, "step": 7514 }, { "epoch": 0.53, "grad_norm": 1.5191105516273415, "learning_rate": 4.704262214601305e-06, "loss": 0.4511, "step": 7515 }, { "epoch": 0.53, "grad_norm": 1.6372813688524974, "learning_rate": 4.7031150673589095e-06, "loss": 0.5218, "step": 7516 }, { "epoch": 0.53, "grad_norm": 1.7736092171866171, "learning_rate": 4.701967935798985e-06, "loss": 0.5531, "step": 7517 }, { "epoch": 0.53, "grad_norm": 1.543645108684181, "learning_rate": 4.700820819982125e-06, "loss": 0.5111, "step": 7518 }, { "epoch": 0.53, "grad_norm": 2.4953844619178405, "learning_rate": 4.699673719968922e-06, "loss": 0.5124, "step": 7519 }, { "epoch": 0.53, "grad_norm": 1.5927424257296117, "learning_rate": 4.698526635819969e-06, "loss": 0.5607, "step": 7520 }, { "epoch": 0.53, "grad_norm": 1.9449488885871693, "learning_rate": 4.6973795675958625e-06, "loss": 0.5952, "step": 7521 }, { "epoch": 0.53, "grad_norm": 1.9984581668462433, "learning_rate": 4.696232515357192e-06, "loss": 0.5515, "step": 7522 }, { "epoch": 0.53, "grad_norm": 1.6852881570264506, "learning_rate": 4.6950854791645485e-06, "loss": 0.5158, "step": 7523 }, { "epoch": 0.53, "grad_norm": 2.1324273564783023, "learning_rate": 4.693938459078524e-06, "loss": 0.5058, "step": 7524 }, { "epoch": 0.53, "grad_norm": 1.9104510018384073, "learning_rate": 4.692791455159708e-06, "loss": 0.4644, "step": 7525 }, { "epoch": 0.53, "grad_norm": 1.9731952149685696, "learning_rate": 4.6916444674686885e-06, "loss": 0.5055, "step": 7526 }, { "epoch": 0.53, "grad_norm": 1.9198955078713131, "learning_rate": 4.69049749606605e-06, "loss": 0.4897, "step": 7527 }, { "epoch": 0.53, "grad_norm": 1.5513023343118293, "learning_rate": 4.689350541012386e-06, "loss": 0.5074, "step": 7528 }, { "epoch": 0.53, "grad_norm": 1.6341022434994197, "learning_rate": 4.688203602368277e-06, "loss": 0.4657, "step": 7529 }, { "epoch": 0.53, "grad_norm": 1.7720462701558881, "learning_rate": 4.687056680194312e-06, "loss": 0.5403, "step": 7530 }, { "epoch": 0.53, "grad_norm": 1.7849408711916237, "learning_rate": 4.685909774551071e-06, "loss": 0.5098, "step": 7531 }, { "epoch": 0.53, "grad_norm": 2.332887165781788, "learning_rate": 4.6847628854991425e-06, "loss": 0.6053, "step": 7532 }, { "epoch": 0.53, "grad_norm": 1.7139707142646405, "learning_rate": 4.683616013099106e-06, "loss": 0.6241, "step": 7533 }, { "epoch": 0.53, "grad_norm": 1.665656818969037, "learning_rate": 4.682469157411544e-06, "loss": 0.5302, "step": 7534 }, { "epoch": 0.53, "grad_norm": 1.5444400732674446, "learning_rate": 4.681322318497035e-06, "loss": 0.5197, "step": 7535 }, { "epoch": 0.53, "grad_norm": 1.7295750464442567, "learning_rate": 4.680175496416163e-06, "loss": 0.5574, "step": 7536 }, { "epoch": 0.53, "grad_norm": 1.6138834141619336, "learning_rate": 4.679028691229503e-06, "loss": 0.4755, "step": 7537 }, { "epoch": 0.53, "grad_norm": 5.08560198395392, "learning_rate": 4.677881902997638e-06, "loss": 0.4584, "step": 7538 }, { "epoch": 0.53, "grad_norm": 1.7858440384474503, "learning_rate": 4.676735131781139e-06, "loss": 0.5737, "step": 7539 }, { "epoch": 0.54, "grad_norm": 1.6706114807199945, "learning_rate": 4.675588377640588e-06, "loss": 0.5603, "step": 7540 }, { "epoch": 0.54, "grad_norm": 1.9456859921286078, "learning_rate": 4.674441640636559e-06, "loss": 0.5443, "step": 7541 }, { "epoch": 0.54, "grad_norm": 2.078625484326005, "learning_rate": 4.673294920829623e-06, "loss": 0.5181, "step": 7542 }, { "epoch": 0.54, "grad_norm": 1.675332270080036, "learning_rate": 4.672148218280358e-06, "loss": 0.5375, "step": 7543 }, { "epoch": 0.54, "grad_norm": 1.7271741595536443, "learning_rate": 4.671001533049334e-06, "loss": 0.5509, "step": 7544 }, { "epoch": 0.54, "grad_norm": 1.67701327800366, "learning_rate": 4.669854865197126e-06, "loss": 0.5517, "step": 7545 }, { "epoch": 0.54, "grad_norm": 1.5625369440612968, "learning_rate": 4.668708214784299e-06, "loss": 0.4588, "step": 7546 }, { "epoch": 0.54, "grad_norm": 1.4726044404341252, "learning_rate": 4.6675615818714295e-06, "loss": 0.4374, "step": 7547 }, { "epoch": 0.54, "grad_norm": 1.687053892557023, "learning_rate": 4.666414966519084e-06, "loss": 0.6007, "step": 7548 }, { "epoch": 0.54, "grad_norm": 1.7850736355110004, "learning_rate": 4.66526836878783e-06, "loss": 0.5397, "step": 7549 }, { "epoch": 0.54, "grad_norm": 1.6126145146068027, "learning_rate": 4.664121788738233e-06, "loss": 0.506, "step": 7550 }, { "epoch": 0.54, "grad_norm": 1.8616579008807657, "learning_rate": 4.662975226430863e-06, "loss": 0.5476, "step": 7551 }, { "epoch": 0.54, "grad_norm": 1.9274266295411446, "learning_rate": 4.661828681926283e-06, "loss": 0.5581, "step": 7552 }, { "epoch": 0.54, "grad_norm": 2.338469162586638, "learning_rate": 4.660682155285059e-06, "loss": 0.5623, "step": 7553 }, { "epoch": 0.54, "grad_norm": 0.7120003277831042, "learning_rate": 4.659535646567751e-06, "loss": 0.4514, "step": 7554 }, { "epoch": 0.54, "grad_norm": 2.235764161155742, "learning_rate": 4.658389155834926e-06, "loss": 0.656, "step": 7555 }, { "epoch": 0.54, "grad_norm": 1.764842734061612, "learning_rate": 4.657242683147143e-06, "loss": 0.6434, "step": 7556 }, { "epoch": 0.54, "grad_norm": 1.7623744207581058, "learning_rate": 4.6560962285649605e-06, "loss": 0.5433, "step": 7557 }, { "epoch": 0.54, "grad_norm": 1.860569038786662, "learning_rate": 4.6549497921489434e-06, "loss": 0.5242, "step": 7558 }, { "epoch": 0.54, "grad_norm": 1.910056511685819, "learning_rate": 4.6538033739596465e-06, "loss": 0.5789, "step": 7559 }, { "epoch": 0.54, "grad_norm": 2.0076845348305072, "learning_rate": 4.652656974057629e-06, "loss": 0.5233, "step": 7560 }, { "epoch": 0.54, "grad_norm": 1.7039585328243971, "learning_rate": 4.651510592503445e-06, "loss": 0.5459, "step": 7561 }, { "epoch": 0.54, "grad_norm": 1.7961498695469726, "learning_rate": 4.650364229357655e-06, "loss": 0.5605, "step": 7562 }, { "epoch": 0.54, "grad_norm": 2.052907591493439, "learning_rate": 4.64921788468081e-06, "loss": 0.5585, "step": 7563 }, { "epoch": 0.54, "grad_norm": 1.6352697779451293, "learning_rate": 4.6480715585334656e-06, "loss": 0.4648, "step": 7564 }, { "epoch": 0.54, "grad_norm": 3.5300353010117553, "learning_rate": 4.646925250976172e-06, "loss": 0.5343, "step": 7565 }, { "epoch": 0.54, "grad_norm": 1.7139736828437666, "learning_rate": 4.645778962069485e-06, "loss": 0.5419, "step": 7566 }, { "epoch": 0.54, "grad_norm": 1.8073851151583031, "learning_rate": 4.644632691873953e-06, "loss": 0.5287, "step": 7567 }, { "epoch": 0.54, "grad_norm": 1.9769091516347552, "learning_rate": 4.6434864404501265e-06, "loss": 0.5874, "step": 7568 }, { "epoch": 0.54, "grad_norm": 1.4654161779858512, "learning_rate": 4.642340207858552e-06, "loss": 0.5115, "step": 7569 }, { "epoch": 0.54, "grad_norm": 2.196747630514247, "learning_rate": 4.641193994159783e-06, "loss": 0.5338, "step": 7570 }, { "epoch": 0.54, "grad_norm": 1.7920066476461174, "learning_rate": 4.640047799414362e-06, "loss": 0.5317, "step": 7571 }, { "epoch": 0.54, "grad_norm": 0.7390100588519941, "learning_rate": 4.638901623682834e-06, "loss": 0.4587, "step": 7572 }, { "epoch": 0.54, "grad_norm": 1.6195240535306425, "learning_rate": 4.637755467025748e-06, "loss": 0.4743, "step": 7573 }, { "epoch": 0.54, "grad_norm": 1.8720107526740195, "learning_rate": 4.636609329503645e-06, "loss": 0.5491, "step": 7574 }, { "epoch": 0.54, "grad_norm": 2.4409551971810557, "learning_rate": 4.635463211177068e-06, "loss": 0.5046, "step": 7575 }, { "epoch": 0.54, "grad_norm": 2.1450486629150407, "learning_rate": 4.634317112106559e-06, "loss": 0.5481, "step": 7576 }, { "epoch": 0.54, "grad_norm": 1.5210384682958107, "learning_rate": 4.633171032352661e-06, "loss": 0.552, "step": 7577 }, { "epoch": 0.54, "grad_norm": 1.6018971079252604, "learning_rate": 4.632024971975912e-06, "loss": 0.5033, "step": 7578 }, { "epoch": 0.54, "grad_norm": 1.5788064522351564, "learning_rate": 4.630878931036851e-06, "loss": 0.5311, "step": 7579 }, { "epoch": 0.54, "grad_norm": 1.8884491861340373, "learning_rate": 4.629732909596013e-06, "loss": 0.558, "step": 7580 }, { "epoch": 0.54, "grad_norm": 0.6871614424468498, "learning_rate": 4.628586907713939e-06, "loss": 0.4484, "step": 7581 }, { "epoch": 0.54, "grad_norm": 1.453696925806882, "learning_rate": 4.627440925451164e-06, "loss": 0.5224, "step": 7582 }, { "epoch": 0.54, "grad_norm": 1.822978640784443, "learning_rate": 4.626294962868222e-06, "loss": 0.5722, "step": 7583 }, { "epoch": 0.54, "grad_norm": 0.7509044937362566, "learning_rate": 4.6251490200256434e-06, "loss": 0.4414, "step": 7584 }, { "epoch": 0.54, "grad_norm": 1.7214892707522254, "learning_rate": 4.624003096983966e-06, "loss": 0.5416, "step": 7585 }, { "epoch": 0.54, "grad_norm": 1.8441740002425562, "learning_rate": 4.622857193803719e-06, "loss": 0.5133, "step": 7586 }, { "epoch": 0.54, "grad_norm": 2.068651370457747, "learning_rate": 4.6217113105454315e-06, "loss": 0.4887, "step": 7587 }, { "epoch": 0.54, "grad_norm": 1.7297711968698046, "learning_rate": 4.620565447269636e-06, "loss": 0.551, "step": 7588 }, { "epoch": 0.54, "grad_norm": 2.3613889553371146, "learning_rate": 4.619419604036858e-06, "loss": 0.4952, "step": 7589 }, { "epoch": 0.54, "grad_norm": 1.7976198310358638, "learning_rate": 4.618273780907628e-06, "loss": 0.5747, "step": 7590 }, { "epoch": 0.54, "grad_norm": 1.869796665322123, "learning_rate": 4.617127977942468e-06, "loss": 0.5506, "step": 7591 }, { "epoch": 0.54, "grad_norm": 1.5941806056331238, "learning_rate": 4.615982195201907e-06, "loss": 0.5724, "step": 7592 }, { "epoch": 0.54, "grad_norm": 1.8405161065728752, "learning_rate": 4.614836432746468e-06, "loss": 0.5445, "step": 7593 }, { "epoch": 0.54, "grad_norm": 1.7760005902782898, "learning_rate": 4.613690690636674e-06, "loss": 0.5305, "step": 7594 }, { "epoch": 0.54, "grad_norm": 3.37875202238478, "learning_rate": 4.612544968933044e-06, "loss": 0.5059, "step": 7595 }, { "epoch": 0.54, "grad_norm": 1.709770939993201, "learning_rate": 4.611399267696103e-06, "loss": 0.5428, "step": 7596 }, { "epoch": 0.54, "grad_norm": 1.8408382395978682, "learning_rate": 4.610253586986371e-06, "loss": 0.6028, "step": 7597 }, { "epoch": 0.54, "grad_norm": 0.8089010793998264, "learning_rate": 4.609107926864365e-06, "loss": 0.4255, "step": 7598 }, { "epoch": 0.54, "grad_norm": 2.0895219430232768, "learning_rate": 4.607962287390601e-06, "loss": 0.5383, "step": 7599 }, { "epoch": 0.54, "grad_norm": 1.9638562906134536, "learning_rate": 4.606816668625599e-06, "loss": 0.5189, "step": 7600 }, { "epoch": 0.54, "grad_norm": 1.6023775818054875, "learning_rate": 4.6056710706298724e-06, "loss": 0.5142, "step": 7601 }, { "epoch": 0.54, "grad_norm": 4.806969880771486, "learning_rate": 4.604525493463936e-06, "loss": 0.5302, "step": 7602 }, { "epoch": 0.54, "grad_norm": 2.3056568312731347, "learning_rate": 4.603379937188302e-06, "loss": 0.4936, "step": 7603 }, { "epoch": 0.54, "grad_norm": 1.6195062941307052, "learning_rate": 4.602234401863484e-06, "loss": 0.5233, "step": 7604 }, { "epoch": 0.54, "grad_norm": 1.5169836155437795, "learning_rate": 4.601088887549994e-06, "loss": 0.4863, "step": 7605 }, { "epoch": 0.54, "grad_norm": 1.6096347028939986, "learning_rate": 4.599943394308338e-06, "loss": 0.5699, "step": 7606 }, { "epoch": 0.54, "grad_norm": 1.4642822096499069, "learning_rate": 4.5987979221990305e-06, "loss": 0.5282, "step": 7607 }, { "epoch": 0.54, "grad_norm": 1.793618157036969, "learning_rate": 4.597652471282575e-06, "loss": 0.5897, "step": 7608 }, { "epoch": 0.54, "grad_norm": 1.6536252474768351, "learning_rate": 4.59650704161948e-06, "loss": 0.5637, "step": 7609 }, { "epoch": 0.54, "grad_norm": 2.728008210731531, "learning_rate": 4.5953616332702485e-06, "loss": 0.543, "step": 7610 }, { "epoch": 0.54, "grad_norm": 2.215771238026647, "learning_rate": 4.594216246295387e-06, "loss": 0.5265, "step": 7611 }, { "epoch": 0.54, "grad_norm": 1.9059299452969218, "learning_rate": 4.5930708807554004e-06, "loss": 0.581, "step": 7612 }, { "epoch": 0.54, "grad_norm": 1.5196534652805553, "learning_rate": 4.591925536710788e-06, "loss": 0.44, "step": 7613 }, { "epoch": 0.54, "grad_norm": 1.678691175535686, "learning_rate": 4.5907802142220494e-06, "loss": 0.53, "step": 7614 }, { "epoch": 0.54, "grad_norm": 1.600846341865788, "learning_rate": 4.589634913349689e-06, "loss": 0.5403, "step": 7615 }, { "epoch": 0.54, "grad_norm": 3.38717387211115, "learning_rate": 4.588489634154203e-06, "loss": 0.5351, "step": 7616 }, { "epoch": 0.54, "grad_norm": 1.617404804983233, "learning_rate": 4.587344376696088e-06, "loss": 0.5559, "step": 7617 }, { "epoch": 0.54, "grad_norm": 2.3817129085341744, "learning_rate": 4.586199141035841e-06, "loss": 0.618, "step": 7618 }, { "epoch": 0.54, "grad_norm": 1.5826513125942436, "learning_rate": 4.585053927233957e-06, "loss": 0.5442, "step": 7619 }, { "epoch": 0.54, "grad_norm": 1.5619490953647914, "learning_rate": 4.583908735350932e-06, "loss": 0.548, "step": 7620 }, { "epoch": 0.54, "grad_norm": 2.2240305781925787, "learning_rate": 4.582763565447256e-06, "loss": 0.5641, "step": 7621 }, { "epoch": 0.54, "grad_norm": 3.940643336011773, "learning_rate": 4.581618417583423e-06, "loss": 0.4832, "step": 7622 }, { "epoch": 0.54, "grad_norm": 2.0879022804492866, "learning_rate": 4.580473291819924e-06, "loss": 0.5123, "step": 7623 }, { "epoch": 0.54, "grad_norm": 1.8394673957766323, "learning_rate": 4.579328188217247e-06, "loss": 0.5405, "step": 7624 }, { "epoch": 0.54, "grad_norm": 1.9240235134683301, "learning_rate": 4.578183106835878e-06, "loss": 0.5288, "step": 7625 }, { "epoch": 0.54, "grad_norm": 2.3315921907607766, "learning_rate": 4.577038047736309e-06, "loss": 0.4827, "step": 7626 }, { "epoch": 0.54, "grad_norm": 3.187533379601238, "learning_rate": 4.575893010979023e-06, "loss": 0.5296, "step": 7627 }, { "epoch": 0.54, "grad_norm": 1.6058530730610339, "learning_rate": 4.574747996624506e-06, "loss": 0.4814, "step": 7628 }, { "epoch": 0.54, "grad_norm": 2.020645400653702, "learning_rate": 4.573603004733238e-06, "loss": 0.5523, "step": 7629 }, { "epoch": 0.54, "grad_norm": 1.689399053340462, "learning_rate": 4.5724580353657075e-06, "loss": 0.5511, "step": 7630 }, { "epoch": 0.54, "grad_norm": 2.2100330188767803, "learning_rate": 4.571313088582392e-06, "loss": 0.4462, "step": 7631 }, { "epoch": 0.54, "grad_norm": 1.8245184809011534, "learning_rate": 4.57016816444377e-06, "loss": 0.6503, "step": 7632 }, { "epoch": 0.54, "grad_norm": 2.045863856493016, "learning_rate": 4.569023263010321e-06, "loss": 0.5093, "step": 7633 }, { "epoch": 0.54, "grad_norm": 1.6799133979958254, "learning_rate": 4.567878384342526e-06, "loss": 0.5503, "step": 7634 }, { "epoch": 0.54, "grad_norm": 1.9077899021463323, "learning_rate": 4.5667335285008585e-06, "loss": 0.5683, "step": 7635 }, { "epoch": 0.54, "grad_norm": 1.8564125806851104, "learning_rate": 4.565588695545793e-06, "loss": 0.5852, "step": 7636 }, { "epoch": 0.54, "grad_norm": 1.612345130815199, "learning_rate": 4.5644438855378055e-06, "loss": 0.522, "step": 7637 }, { "epoch": 0.54, "grad_norm": 1.7748344477176585, "learning_rate": 4.563299098537368e-06, "loss": 0.5378, "step": 7638 }, { "epoch": 0.54, "grad_norm": 2.0720019144953468, "learning_rate": 4.562154334604952e-06, "loss": 0.5841, "step": 7639 }, { "epoch": 0.54, "grad_norm": 1.7675219058203713, "learning_rate": 4.561009593801027e-06, "loss": 0.5636, "step": 7640 }, { "epoch": 0.54, "grad_norm": 1.9478367315395415, "learning_rate": 4.559864876186062e-06, "loss": 0.5205, "step": 7641 }, { "epoch": 0.54, "grad_norm": 1.5797460886108745, "learning_rate": 4.558720181820527e-06, "loss": 0.5207, "step": 7642 }, { "epoch": 0.54, "grad_norm": 1.9834704958813896, "learning_rate": 4.557575510764888e-06, "loss": 0.6434, "step": 7643 }, { "epoch": 0.54, "grad_norm": 1.7054655715297211, "learning_rate": 4.5564308630796075e-06, "loss": 0.5256, "step": 7644 }, { "epoch": 0.54, "grad_norm": 1.847900584734915, "learning_rate": 4.555286238825154e-06, "loss": 0.5561, "step": 7645 }, { "epoch": 0.54, "grad_norm": 1.9479305743045578, "learning_rate": 4.554141638061987e-06, "loss": 0.5458, "step": 7646 }, { "epoch": 0.54, "grad_norm": 1.631118807804268, "learning_rate": 4.5529970608505695e-06, "loss": 0.4943, "step": 7647 }, { "epoch": 0.54, "grad_norm": 2.0575653056875907, "learning_rate": 4.551852507251361e-06, "loss": 0.54, "step": 7648 }, { "epoch": 0.54, "grad_norm": 2.136427946294261, "learning_rate": 4.550707977324823e-06, "loss": 0.4625, "step": 7649 }, { "epoch": 0.54, "grad_norm": 1.939139534726499, "learning_rate": 4.549563471131412e-06, "loss": 0.5649, "step": 7650 }, { "epoch": 0.54, "grad_norm": 1.800416644486573, "learning_rate": 4.548418988731585e-06, "loss": 0.5431, "step": 7651 }, { "epoch": 0.54, "grad_norm": 2.10889282232532, "learning_rate": 4.547274530185795e-06, "loss": 0.4946, "step": 7652 }, { "epoch": 0.54, "grad_norm": 1.6960690553671018, "learning_rate": 4.546130095554501e-06, "loss": 0.5471, "step": 7653 }, { "epoch": 0.54, "grad_norm": 2.064963995183518, "learning_rate": 4.544985684898151e-06, "loss": 0.4973, "step": 7654 }, { "epoch": 0.54, "grad_norm": 2.1727105502548274, "learning_rate": 4.543841298277201e-06, "loss": 0.5078, "step": 7655 }, { "epoch": 0.54, "grad_norm": 1.6977705730668737, "learning_rate": 4.542696935752098e-06, "loss": 0.5315, "step": 7656 }, { "epoch": 0.54, "grad_norm": 0.7393213659233616, "learning_rate": 4.541552597383293e-06, "loss": 0.4467, "step": 7657 }, { "epoch": 0.54, "grad_norm": 1.5177484012035631, "learning_rate": 4.540408283231234e-06, "loss": 0.4986, "step": 7658 }, { "epoch": 0.54, "grad_norm": 1.6127223065519982, "learning_rate": 4.539263993356365e-06, "loss": 0.5877, "step": 7659 }, { "epoch": 0.54, "grad_norm": 1.9509470657316377, "learning_rate": 4.538119727819133e-06, "loss": 0.494, "step": 7660 }, { "epoch": 0.54, "grad_norm": 1.8727293336229307, "learning_rate": 4.536975486679983e-06, "loss": 0.4461, "step": 7661 }, { "epoch": 0.54, "grad_norm": 1.6301176566985953, "learning_rate": 4.535831269999358e-06, "loss": 0.5677, "step": 7662 }, { "epoch": 0.54, "grad_norm": 1.692768504940511, "learning_rate": 4.534687077837695e-06, "loss": 0.539, "step": 7663 }, { "epoch": 0.54, "grad_norm": 1.4946616473925367, "learning_rate": 4.5335429102554405e-06, "loss": 0.5315, "step": 7664 }, { "epoch": 0.54, "grad_norm": 1.4675399284294024, "learning_rate": 4.532398767313029e-06, "loss": 0.4846, "step": 7665 }, { "epoch": 0.54, "grad_norm": 0.7127809761190446, "learning_rate": 4.5312546490708995e-06, "loss": 0.4382, "step": 7666 }, { "epoch": 0.54, "grad_norm": 2.2180101341416854, "learning_rate": 4.530110555589485e-06, "loss": 0.5019, "step": 7667 }, { "epoch": 0.54, "grad_norm": 1.8957032705876284, "learning_rate": 4.5289664869292264e-06, "loss": 0.4855, "step": 7668 }, { "epoch": 0.54, "grad_norm": 2.1571479308015373, "learning_rate": 4.527822443150552e-06, "loss": 0.5095, "step": 7669 }, { "epoch": 0.54, "grad_norm": 0.6686081120272963, "learning_rate": 4.526678424313896e-06, "loss": 0.4399, "step": 7670 }, { "epoch": 0.54, "grad_norm": 2.0223543972476343, "learning_rate": 4.525534430479691e-06, "loss": 0.5347, "step": 7671 }, { "epoch": 0.54, "grad_norm": 1.791821063008179, "learning_rate": 4.524390461708365e-06, "loss": 0.5368, "step": 7672 }, { "epoch": 0.54, "grad_norm": 1.79738386544397, "learning_rate": 4.523246518060346e-06, "loss": 0.5381, "step": 7673 }, { "epoch": 0.54, "grad_norm": 1.6233618135651613, "learning_rate": 4.52210259959606e-06, "loss": 0.5987, "step": 7674 }, { "epoch": 0.54, "grad_norm": 1.5249505168901318, "learning_rate": 4.520958706375936e-06, "loss": 0.5532, "step": 7675 }, { "epoch": 0.54, "grad_norm": 1.9569769009766622, "learning_rate": 4.519814838460395e-06, "loss": 0.562, "step": 7676 }, { "epoch": 0.54, "grad_norm": 1.8764533455404855, "learning_rate": 4.518670995909862e-06, "loss": 0.546, "step": 7677 }, { "epoch": 0.54, "grad_norm": 1.6636347110932128, "learning_rate": 4.517527178784756e-06, "loss": 0.543, "step": 7678 }, { "epoch": 0.54, "grad_norm": 2.6867445464250483, "learning_rate": 4.516383387145503e-06, "loss": 0.5423, "step": 7679 }, { "epoch": 0.54, "grad_norm": 0.7648375856884928, "learning_rate": 4.515239621052516e-06, "loss": 0.4537, "step": 7680 }, { "epoch": 0.55, "grad_norm": 1.8285481330077449, "learning_rate": 4.514095880566216e-06, "loss": 0.4842, "step": 7681 }, { "epoch": 0.55, "grad_norm": 1.7989338050923456, "learning_rate": 4.512952165747016e-06, "loss": 0.5517, "step": 7682 }, { "epoch": 0.55, "grad_norm": 1.7144875513927733, "learning_rate": 4.5118084766553346e-06, "loss": 0.5214, "step": 7683 }, { "epoch": 0.55, "grad_norm": 1.8973409318100385, "learning_rate": 4.510664813351583e-06, "loss": 0.5293, "step": 7684 }, { "epoch": 0.55, "grad_norm": 1.6250332793448201, "learning_rate": 4.509521175896173e-06, "loss": 0.5387, "step": 7685 }, { "epoch": 0.55, "grad_norm": 1.7591710891050234, "learning_rate": 4.508377564349518e-06, "loss": 0.5731, "step": 7686 }, { "epoch": 0.55, "grad_norm": 2.052363344664565, "learning_rate": 4.5072339787720265e-06, "loss": 0.5895, "step": 7687 }, { "epoch": 0.55, "grad_norm": 1.6699829017765178, "learning_rate": 4.5060904192241055e-06, "loss": 0.5004, "step": 7688 }, { "epoch": 0.55, "grad_norm": 2.2388971716545374, "learning_rate": 4.50494688576616e-06, "loss": 0.4857, "step": 7689 }, { "epoch": 0.55, "grad_norm": 1.7273187411995266, "learning_rate": 4.503803378458601e-06, "loss": 0.4742, "step": 7690 }, { "epoch": 0.55, "grad_norm": 1.6367435543000965, "learning_rate": 4.502659897361826e-06, "loss": 0.5364, "step": 7691 }, { "epoch": 0.55, "grad_norm": 1.473924780610634, "learning_rate": 4.501516442536242e-06, "loss": 0.5576, "step": 7692 }, { "epoch": 0.55, "grad_norm": 2.1117095342295813, "learning_rate": 4.500373014042247e-06, "loss": 0.5642, "step": 7693 }, { "epoch": 0.55, "grad_norm": 1.688114138860154, "learning_rate": 4.499229611940244e-06, "loss": 0.5667, "step": 7694 }, { "epoch": 0.55, "grad_norm": 2.164235412003158, "learning_rate": 4.4980862362906295e-06, "loss": 0.5057, "step": 7695 }, { "epoch": 0.55, "grad_norm": 1.792499307726391, "learning_rate": 4.496942887153802e-06, "loss": 0.5301, "step": 7696 }, { "epoch": 0.55, "grad_norm": 1.637483835497282, "learning_rate": 4.495799564590152e-06, "loss": 0.518, "step": 7697 }, { "epoch": 0.55, "grad_norm": 0.7759101118563858, "learning_rate": 4.494656268660081e-06, "loss": 0.4614, "step": 7698 }, { "epoch": 0.55, "grad_norm": 1.5928222189726307, "learning_rate": 4.493512999423976e-06, "loss": 0.527, "step": 7699 }, { "epoch": 0.55, "grad_norm": 1.5662509683234036, "learning_rate": 4.4923697569422305e-06, "loss": 0.5529, "step": 7700 }, { "epoch": 0.55, "grad_norm": 1.8520892995384461, "learning_rate": 4.491226541275236e-06, "loss": 0.5561, "step": 7701 }, { "epoch": 0.55, "grad_norm": 2.0865850017145178, "learning_rate": 4.49008335248338e-06, "loss": 0.5837, "step": 7702 }, { "epoch": 0.55, "grad_norm": 1.4921219983983958, "learning_rate": 4.488940190627049e-06, "loss": 0.5278, "step": 7703 }, { "epoch": 0.55, "grad_norm": 1.8852311466559046, "learning_rate": 4.487797055766626e-06, "loss": 0.5773, "step": 7704 }, { "epoch": 0.55, "grad_norm": 1.5774348349540475, "learning_rate": 4.486653947962501e-06, "loss": 0.4948, "step": 7705 }, { "epoch": 0.55, "grad_norm": 2.174435118082163, "learning_rate": 4.485510867275052e-06, "loss": 0.5067, "step": 7706 }, { "epoch": 0.55, "grad_norm": 1.9398375378860961, "learning_rate": 4.484367813764665e-06, "loss": 0.5453, "step": 7707 }, { "epoch": 0.55, "grad_norm": 2.2664044771741256, "learning_rate": 4.4832247874917136e-06, "loss": 0.4743, "step": 7708 }, { "epoch": 0.55, "grad_norm": 1.8430589181201231, "learning_rate": 4.482081788516584e-06, "loss": 0.5341, "step": 7709 }, { "epoch": 0.55, "grad_norm": 1.5072951943065, "learning_rate": 4.480938816899647e-06, "loss": 0.4804, "step": 7710 }, { "epoch": 0.55, "grad_norm": 1.6013337324441983, "learning_rate": 4.479795872701282e-06, "loss": 0.4927, "step": 7711 }, { "epoch": 0.55, "grad_norm": 1.635300689955287, "learning_rate": 4.478652955981858e-06, "loss": 0.5702, "step": 7712 }, { "epoch": 0.55, "grad_norm": 1.9098950651326774, "learning_rate": 4.4775100668017554e-06, "loss": 0.5955, "step": 7713 }, { "epoch": 0.55, "grad_norm": 1.5541605174661794, "learning_rate": 4.4763672052213394e-06, "loss": 0.5604, "step": 7714 }, { "epoch": 0.55, "grad_norm": 1.9236072208088693, "learning_rate": 4.475224371300983e-06, "loss": 0.5446, "step": 7715 }, { "epoch": 0.55, "grad_norm": 1.7139521246929879, "learning_rate": 4.474081565101053e-06, "loss": 0.4822, "step": 7716 }, { "epoch": 0.55, "grad_norm": 1.7102231859800985, "learning_rate": 4.4729387866819164e-06, "loss": 0.5394, "step": 7717 }, { "epoch": 0.55, "grad_norm": 5.959213103141516, "learning_rate": 4.471796036103941e-06, "loss": 0.5575, "step": 7718 }, { "epoch": 0.55, "grad_norm": 1.648044139865756, "learning_rate": 4.4706533134274854e-06, "loss": 0.4833, "step": 7719 }, { "epoch": 0.55, "grad_norm": 1.7298676704751736, "learning_rate": 4.469510618712918e-06, "loss": 0.5621, "step": 7720 }, { "epoch": 0.55, "grad_norm": 1.770033057170539, "learning_rate": 4.4683679520205965e-06, "loss": 0.5338, "step": 7721 }, { "epoch": 0.55, "grad_norm": 1.7800747116793412, "learning_rate": 4.467225313410882e-06, "loss": 0.5369, "step": 7722 }, { "epoch": 0.55, "grad_norm": 2.2214112758991145, "learning_rate": 4.46608270294413e-06, "loss": 0.4733, "step": 7723 }, { "epoch": 0.55, "grad_norm": 1.8503249784974822, "learning_rate": 4.4649401206807e-06, "loss": 0.5322, "step": 7724 }, { "epoch": 0.55, "grad_norm": 1.762466857644363, "learning_rate": 4.463797566680946e-06, "loss": 0.5468, "step": 7725 }, { "epoch": 0.55, "grad_norm": 2.0072220912952816, "learning_rate": 4.462655041005222e-06, "loss": 0.6015, "step": 7726 }, { "epoch": 0.55, "grad_norm": 1.75281044662948, "learning_rate": 4.4615125437138775e-06, "loss": 0.5198, "step": 7727 }, { "epoch": 0.55, "grad_norm": 3.0596949222714342, "learning_rate": 4.460370074867265e-06, "loss": 0.5279, "step": 7728 }, { "epoch": 0.55, "grad_norm": 2.0054057441784052, "learning_rate": 4.459227634525736e-06, "loss": 0.5499, "step": 7729 }, { "epoch": 0.55, "grad_norm": 1.839333235657605, "learning_rate": 4.458085222749636e-06, "loss": 0.5667, "step": 7730 }, { "epoch": 0.55, "grad_norm": 2.318173791014407, "learning_rate": 4.456942839599308e-06, "loss": 0.5696, "step": 7731 }, { "epoch": 0.55, "grad_norm": 2.2521098044487142, "learning_rate": 4.4558004851351015e-06, "loss": 0.4785, "step": 7732 }, { "epoch": 0.55, "grad_norm": 1.7663475032848635, "learning_rate": 4.454658159417356e-06, "loss": 0.5022, "step": 7733 }, { "epoch": 0.55, "grad_norm": 1.7338690966588735, "learning_rate": 4.453515862506414e-06, "loss": 0.5425, "step": 7734 }, { "epoch": 0.55, "grad_norm": 1.9360545917811034, "learning_rate": 4.452373594462618e-06, "loss": 0.6101, "step": 7735 }, { "epoch": 0.55, "grad_norm": 2.0464884826813248, "learning_rate": 4.451231355346303e-06, "loss": 0.5643, "step": 7736 }, { "epoch": 0.55, "grad_norm": 1.5005504207468288, "learning_rate": 4.450089145217808e-06, "loss": 0.5102, "step": 7737 }, { "epoch": 0.55, "grad_norm": 1.727017729026395, "learning_rate": 4.448946964137465e-06, "loss": 0.5181, "step": 7738 }, { "epoch": 0.55, "grad_norm": 1.6387893905382804, "learning_rate": 4.4478048121656145e-06, "loss": 0.6241, "step": 7739 }, { "epoch": 0.55, "grad_norm": 2.48777931300927, "learning_rate": 4.446662689362583e-06, "loss": 0.5425, "step": 7740 }, { "epoch": 0.55, "grad_norm": 2.4402385396404056, "learning_rate": 4.445520595788705e-06, "loss": 0.5177, "step": 7741 }, { "epoch": 0.55, "grad_norm": 1.9916151747940631, "learning_rate": 4.444378531504304e-06, "loss": 0.5302, "step": 7742 }, { "epoch": 0.55, "grad_norm": 0.802666306985771, "learning_rate": 4.443236496569714e-06, "loss": 0.4505, "step": 7743 }, { "epoch": 0.55, "grad_norm": 0.7244211009866435, "learning_rate": 4.442094491045259e-06, "loss": 0.4403, "step": 7744 }, { "epoch": 0.55, "grad_norm": 3.726832690020085, "learning_rate": 4.440952514991265e-06, "loss": 0.5703, "step": 7745 }, { "epoch": 0.55, "grad_norm": 1.6716270583887567, "learning_rate": 4.43981056846805e-06, "loss": 0.5646, "step": 7746 }, { "epoch": 0.55, "grad_norm": 2.3255347577093994, "learning_rate": 4.438668651535941e-06, "loss": 0.5496, "step": 7747 }, { "epoch": 0.55, "grad_norm": 3.846344863679638, "learning_rate": 4.437526764255256e-06, "loss": 0.5129, "step": 7748 }, { "epoch": 0.55, "grad_norm": 1.7090142254369698, "learning_rate": 4.4363849066863115e-06, "loss": 0.5313, "step": 7749 }, { "epoch": 0.55, "grad_norm": 2.232062103018914, "learning_rate": 4.435243078889429e-06, "loss": 0.4953, "step": 7750 }, { "epoch": 0.55, "grad_norm": 2.3528487444536808, "learning_rate": 4.4341012809249185e-06, "loss": 0.6096, "step": 7751 }, { "epoch": 0.55, "grad_norm": 1.5811698978533302, "learning_rate": 4.432959512853096e-06, "loss": 0.5278, "step": 7752 }, { "epoch": 0.55, "grad_norm": 1.5394815631865082, "learning_rate": 4.431817774734273e-06, "loss": 0.5717, "step": 7753 }, { "epoch": 0.55, "grad_norm": 0.7537684392176923, "learning_rate": 4.430676066628763e-06, "loss": 0.4563, "step": 7754 }, { "epoch": 0.55, "grad_norm": 1.6357750179241812, "learning_rate": 4.4295343885968706e-06, "loss": 0.5029, "step": 7755 }, { "epoch": 0.55, "grad_norm": 1.5995602443279022, "learning_rate": 4.428392740698905e-06, "loss": 0.4934, "step": 7756 }, { "epoch": 0.55, "grad_norm": 1.6795486234694066, "learning_rate": 4.42725112299517e-06, "loss": 0.5059, "step": 7757 }, { "epoch": 0.55, "grad_norm": 1.7857132576727037, "learning_rate": 4.426109535545972e-06, "loss": 0.4905, "step": 7758 }, { "epoch": 0.55, "grad_norm": 1.4770187521344542, "learning_rate": 4.424967978411615e-06, "loss": 0.5503, "step": 7759 }, { "epoch": 0.55, "grad_norm": 1.8805423314932037, "learning_rate": 4.423826451652398e-06, "loss": 0.5298, "step": 7760 }, { "epoch": 0.55, "grad_norm": 2.1024821805331517, "learning_rate": 4.422684955328617e-06, "loss": 0.5793, "step": 7761 }, { "epoch": 0.55, "grad_norm": 1.895229857708383, "learning_rate": 4.421543489500576e-06, "loss": 0.5528, "step": 7762 }, { "epoch": 0.55, "grad_norm": 1.8558857100994492, "learning_rate": 4.420402054228567e-06, "loss": 0.6087, "step": 7763 }, { "epoch": 0.55, "grad_norm": 6.069955483937339, "learning_rate": 4.419260649572883e-06, "loss": 0.5354, "step": 7764 }, { "epoch": 0.55, "grad_norm": 1.9683629409502692, "learning_rate": 4.418119275593823e-06, "loss": 0.5686, "step": 7765 }, { "epoch": 0.55, "grad_norm": 1.8786774145762464, "learning_rate": 4.416977932351672e-06, "loss": 0.5603, "step": 7766 }, { "epoch": 0.55, "grad_norm": 1.77384946148783, "learning_rate": 4.415836619906724e-06, "loss": 0.602, "step": 7767 }, { "epoch": 0.55, "grad_norm": 1.844282904169627, "learning_rate": 4.414695338319263e-06, "loss": 0.613, "step": 7768 }, { "epoch": 0.55, "grad_norm": 1.9171261247811033, "learning_rate": 4.41355408764958e-06, "loss": 0.5833, "step": 7769 }, { "epoch": 0.55, "grad_norm": 1.7451110984460656, "learning_rate": 4.412412867957957e-06, "loss": 0.5413, "step": 7770 }, { "epoch": 0.55, "grad_norm": 1.473266560572293, "learning_rate": 4.411271679304675e-06, "loss": 0.5089, "step": 7771 }, { "epoch": 0.55, "grad_norm": 2.007570773228279, "learning_rate": 4.410130521750019e-06, "loss": 0.5103, "step": 7772 }, { "epoch": 0.55, "grad_norm": 1.8712021694148837, "learning_rate": 4.4089893953542675e-06, "loss": 0.5544, "step": 7773 }, { "epoch": 0.55, "grad_norm": 1.5133033506015046, "learning_rate": 4.4078483001777e-06, "loss": 0.5159, "step": 7774 }, { "epoch": 0.55, "grad_norm": 0.7197047394891494, "learning_rate": 4.406707236280592e-06, "loss": 0.4108, "step": 7775 }, { "epoch": 0.55, "grad_norm": 1.6294675883751288, "learning_rate": 4.405566203723215e-06, "loss": 0.4921, "step": 7776 }, { "epoch": 0.55, "grad_norm": 1.6664767756236918, "learning_rate": 4.404425202565847e-06, "loss": 0.5673, "step": 7777 }, { "epoch": 0.55, "grad_norm": 2.388514057804789, "learning_rate": 4.40328423286876e-06, "loss": 0.539, "step": 7778 }, { "epoch": 0.55, "grad_norm": 2.9914413452668716, "learning_rate": 4.4021432946922185e-06, "loss": 0.5772, "step": 7779 }, { "epoch": 0.55, "grad_norm": 2.195722334000968, "learning_rate": 4.4010023880964945e-06, "loss": 0.5544, "step": 7780 }, { "epoch": 0.55, "grad_norm": 0.730006847796988, "learning_rate": 4.399861513141856e-06, "loss": 0.4259, "step": 7781 }, { "epoch": 0.55, "grad_norm": 1.6588769059618975, "learning_rate": 4.398720669888565e-06, "loss": 0.572, "step": 7782 }, { "epoch": 0.55, "grad_norm": 0.8216655668975045, "learning_rate": 4.397579858396884e-06, "loss": 0.463, "step": 7783 }, { "epoch": 0.55, "grad_norm": 1.7072117039453567, "learning_rate": 4.396439078727078e-06, "loss": 0.5346, "step": 7784 }, { "epoch": 0.55, "grad_norm": 1.9697290102440417, "learning_rate": 4.395298330939405e-06, "loss": 0.5368, "step": 7785 }, { "epoch": 0.55, "grad_norm": 2.1178385113403535, "learning_rate": 4.394157615094123e-06, "loss": 0.5385, "step": 7786 }, { "epoch": 0.55, "grad_norm": 1.9039151595289425, "learning_rate": 4.393016931251488e-06, "loss": 0.6318, "step": 7787 }, { "epoch": 0.55, "grad_norm": 1.718421903359631, "learning_rate": 4.391876279471755e-06, "loss": 0.5053, "step": 7788 }, { "epoch": 0.55, "grad_norm": 0.6728876867902522, "learning_rate": 4.3907356598151794e-06, "loss": 0.4178, "step": 7789 }, { "epoch": 0.55, "grad_norm": 1.8231236125945454, "learning_rate": 4.389595072342011e-06, "loss": 0.5557, "step": 7790 }, { "epoch": 0.55, "grad_norm": 1.7420458927058629, "learning_rate": 4.388454517112496e-06, "loss": 0.5215, "step": 7791 }, { "epoch": 0.55, "grad_norm": 1.8631518385484427, "learning_rate": 4.387313994186888e-06, "loss": 0.5711, "step": 7792 }, { "epoch": 0.55, "grad_norm": 1.879147073204414, "learning_rate": 4.386173503625432e-06, "loss": 0.5617, "step": 7793 }, { "epoch": 0.55, "grad_norm": 1.84861051716151, "learning_rate": 4.3850330454883686e-06, "loss": 0.4952, "step": 7794 }, { "epoch": 0.55, "grad_norm": 1.7934780855399672, "learning_rate": 4.383892619835944e-06, "loss": 0.5179, "step": 7795 }, { "epoch": 0.55, "grad_norm": 1.776057060050692, "learning_rate": 4.3827522267284e-06, "loss": 0.5742, "step": 7796 }, { "epoch": 0.55, "grad_norm": 2.763400440336179, "learning_rate": 4.381611866225976e-06, "loss": 0.5537, "step": 7797 }, { "epoch": 0.55, "grad_norm": 1.6807216223020902, "learning_rate": 4.3804715383889056e-06, "loss": 0.5037, "step": 7798 }, { "epoch": 0.55, "grad_norm": 1.8115420558704574, "learning_rate": 4.379331243277429e-06, "loss": 0.5453, "step": 7799 }, { "epoch": 0.55, "grad_norm": 1.9751866537783156, "learning_rate": 4.378190980951781e-06, "loss": 0.5412, "step": 7800 }, { "epoch": 0.55, "grad_norm": 1.7440279714047158, "learning_rate": 4.37705075147219e-06, "loss": 0.5438, "step": 7801 }, { "epoch": 0.55, "grad_norm": 1.638571472114985, "learning_rate": 4.37591055489889e-06, "loss": 0.4844, "step": 7802 }, { "epoch": 0.55, "grad_norm": 1.5784936289648661, "learning_rate": 4.374770391292109e-06, "loss": 0.5324, "step": 7803 }, { "epoch": 0.55, "grad_norm": 1.9484976154087845, "learning_rate": 4.373630260712076e-06, "loss": 0.5702, "step": 7804 }, { "epoch": 0.55, "grad_norm": 1.6275976624791946, "learning_rate": 4.372490163219014e-06, "loss": 0.5105, "step": 7805 }, { "epoch": 0.55, "grad_norm": 1.7507898090464409, "learning_rate": 4.371350098873146e-06, "loss": 0.5245, "step": 7806 }, { "epoch": 0.55, "grad_norm": 0.7084981358941057, "learning_rate": 4.370210067734699e-06, "loss": 0.4516, "step": 7807 }, { "epoch": 0.55, "grad_norm": 1.6497529147543804, "learning_rate": 4.369070069863889e-06, "loss": 0.5618, "step": 7808 }, { "epoch": 0.55, "grad_norm": 1.95357466004288, "learning_rate": 4.367930105320936e-06, "loss": 0.5119, "step": 7809 }, { "epoch": 0.55, "grad_norm": 1.989629686565725, "learning_rate": 4.366790174166055e-06, "loss": 0.5912, "step": 7810 }, { "epoch": 0.55, "grad_norm": 1.7211167585283818, "learning_rate": 4.3656502764594646e-06, "loss": 0.5332, "step": 7811 }, { "epoch": 0.55, "grad_norm": 1.8431106689495764, "learning_rate": 4.364510412261376e-06, "loss": 0.5741, "step": 7812 }, { "epoch": 0.55, "grad_norm": 0.6840502659819915, "learning_rate": 4.363370581631999e-06, "loss": 0.4175, "step": 7813 }, { "epoch": 0.55, "grad_norm": 2.017976327659427, "learning_rate": 4.362230784631547e-06, "loss": 0.5395, "step": 7814 }, { "epoch": 0.55, "grad_norm": 1.5165424164803367, "learning_rate": 4.361091021320226e-06, "loss": 0.4728, "step": 7815 }, { "epoch": 0.55, "grad_norm": 1.4773008720638456, "learning_rate": 4.359951291758241e-06, "loss": 0.555, "step": 7816 }, { "epoch": 0.55, "grad_norm": 1.6323077521353688, "learning_rate": 4.358811596005798e-06, "loss": 0.6192, "step": 7817 }, { "epoch": 0.55, "grad_norm": 1.7302528178566496, "learning_rate": 4.357671934123098e-06, "loss": 0.6084, "step": 7818 }, { "epoch": 0.55, "grad_norm": 1.5936046359106553, "learning_rate": 4.356532306170345e-06, "loss": 0.5527, "step": 7819 }, { "epoch": 0.55, "grad_norm": 2.2671784289550514, "learning_rate": 4.355392712207735e-06, "loss": 0.5223, "step": 7820 }, { "epoch": 0.55, "grad_norm": 1.4254670052092133, "learning_rate": 4.354253152295466e-06, "loss": 0.4774, "step": 7821 }, { "epoch": 0.56, "grad_norm": 1.7957302616474857, "learning_rate": 4.353113626493734e-06, "loss": 0.5295, "step": 7822 }, { "epoch": 0.56, "grad_norm": 1.5684951681519816, "learning_rate": 4.351974134862731e-06, "loss": 0.5719, "step": 7823 }, { "epoch": 0.56, "grad_norm": 0.7705018806038914, "learning_rate": 4.350834677462652e-06, "loss": 0.3857, "step": 7824 }, { "epoch": 0.56, "grad_norm": 1.6763944921733034, "learning_rate": 4.349695254353683e-06, "loss": 0.5023, "step": 7825 }, { "epoch": 0.56, "grad_norm": 3.2335198450429417, "learning_rate": 4.348555865596015e-06, "loss": 0.5707, "step": 7826 }, { "epoch": 0.56, "grad_norm": 1.5283477817297213, "learning_rate": 4.347416511249835e-06, "loss": 0.5191, "step": 7827 }, { "epoch": 0.56, "grad_norm": 1.7194627146723842, "learning_rate": 4.346277191375323e-06, "loss": 0.5596, "step": 7828 }, { "epoch": 0.56, "grad_norm": 2.1607157690734593, "learning_rate": 4.345137906032668e-06, "loss": 0.4495, "step": 7829 }, { "epoch": 0.56, "grad_norm": 2.29182295438363, "learning_rate": 4.343998655282047e-06, "loss": 0.5135, "step": 7830 }, { "epoch": 0.56, "grad_norm": 1.5223211769287408, "learning_rate": 4.342859439183639e-06, "loss": 0.5077, "step": 7831 }, { "epoch": 0.56, "grad_norm": 1.7192749796162712, "learning_rate": 4.341720257797621e-06, "loss": 0.4346, "step": 7832 }, { "epoch": 0.56, "grad_norm": 2.2826772103730417, "learning_rate": 4.340581111184171e-06, "loss": 0.5066, "step": 7833 }, { "epoch": 0.56, "grad_norm": 1.6415873014866662, "learning_rate": 4.339441999403463e-06, "loss": 0.5159, "step": 7834 }, { "epoch": 0.56, "grad_norm": 1.5038585188988054, "learning_rate": 4.338302922515665e-06, "loss": 0.5117, "step": 7835 }, { "epoch": 0.56, "grad_norm": 1.7216132531026234, "learning_rate": 4.337163880580948e-06, "loss": 0.5675, "step": 7836 }, { "epoch": 0.56, "grad_norm": 1.8625720491645377, "learning_rate": 4.3360248736594825e-06, "loss": 0.4673, "step": 7837 }, { "epoch": 0.56, "grad_norm": 1.5799705660445293, "learning_rate": 4.3348859018114315e-06, "loss": 0.4858, "step": 7838 }, { "epoch": 0.56, "grad_norm": 1.726511685390362, "learning_rate": 4.333746965096962e-06, "loss": 0.5636, "step": 7839 }, { "epoch": 0.56, "grad_norm": 1.7745429996191497, "learning_rate": 4.332608063576234e-06, "loss": 0.5607, "step": 7840 }, { "epoch": 0.56, "grad_norm": 1.7141629717751148, "learning_rate": 4.331469197309411e-06, "loss": 0.6035, "step": 7841 }, { "epoch": 0.56, "grad_norm": 0.7360623857920902, "learning_rate": 4.33033036635665e-06, "loss": 0.4467, "step": 7842 }, { "epoch": 0.56, "grad_norm": 1.8722853889551492, "learning_rate": 4.329191570778109e-06, "loss": 0.5184, "step": 7843 }, { "epoch": 0.56, "grad_norm": 1.7789853774744986, "learning_rate": 4.32805281063394e-06, "loss": 0.553, "step": 7844 }, { "epoch": 0.56, "grad_norm": 1.7319756416649699, "learning_rate": 4.3269140859843e-06, "loss": 0.5626, "step": 7845 }, { "epoch": 0.56, "grad_norm": 1.5813526385949301, "learning_rate": 4.325775396889338e-06, "loss": 0.5181, "step": 7846 }, { "epoch": 0.56, "grad_norm": 1.800074308528196, "learning_rate": 4.3246367434092025e-06, "loss": 0.5501, "step": 7847 }, { "epoch": 0.56, "grad_norm": 1.898057185864801, "learning_rate": 4.3234981256040455e-06, "loss": 0.5615, "step": 7848 }, { "epoch": 0.56, "grad_norm": 0.670489997161316, "learning_rate": 4.322359543534009e-06, "loss": 0.4228, "step": 7849 }, { "epoch": 0.56, "grad_norm": 2.1542292162637353, "learning_rate": 4.321220997259239e-06, "loss": 0.5841, "step": 7850 }, { "epoch": 0.56, "grad_norm": 0.6851057746991106, "learning_rate": 4.320082486839873e-06, "loss": 0.4341, "step": 7851 }, { "epoch": 0.56, "grad_norm": 1.5947123377717245, "learning_rate": 4.3189440123360555e-06, "loss": 0.5204, "step": 7852 }, { "epoch": 0.56, "grad_norm": 2.449833099147218, "learning_rate": 4.317805573807922e-06, "loss": 0.4984, "step": 7853 }, { "epoch": 0.56, "grad_norm": 2.0017016770278757, "learning_rate": 4.316667171315611e-06, "loss": 0.5367, "step": 7854 }, { "epoch": 0.56, "grad_norm": 2.2407312684829734, "learning_rate": 4.315528804919254e-06, "loss": 0.5199, "step": 7855 }, { "epoch": 0.56, "grad_norm": 1.7422178030798168, "learning_rate": 4.314390474678986e-06, "loss": 0.5856, "step": 7856 }, { "epoch": 0.56, "grad_norm": 2.1312897948271763, "learning_rate": 4.313252180654937e-06, "loss": 0.4976, "step": 7857 }, { "epoch": 0.56, "grad_norm": 1.503900236580758, "learning_rate": 4.312113922907235e-06, "loss": 0.4753, "step": 7858 }, { "epoch": 0.56, "grad_norm": 1.5829677292329811, "learning_rate": 4.310975701496004e-06, "loss": 0.5471, "step": 7859 }, { "epoch": 0.56, "grad_norm": 1.7182784106414322, "learning_rate": 4.309837516481372e-06, "loss": 0.5062, "step": 7860 }, { "epoch": 0.56, "grad_norm": 1.7495510359338038, "learning_rate": 4.308699367923462e-06, "loss": 0.6341, "step": 7861 }, { "epoch": 0.56, "grad_norm": 1.6048786578238332, "learning_rate": 4.307561255882392e-06, "loss": 0.5499, "step": 7862 }, { "epoch": 0.56, "grad_norm": 1.5862666620284023, "learning_rate": 4.306423180418285e-06, "loss": 0.558, "step": 7863 }, { "epoch": 0.56, "grad_norm": 0.7297096470208068, "learning_rate": 4.3052851415912556e-06, "loss": 0.4233, "step": 7864 }, { "epoch": 0.56, "grad_norm": 0.8147348307588197, "learning_rate": 4.304147139461419e-06, "loss": 0.4562, "step": 7865 }, { "epoch": 0.56, "grad_norm": 1.9887050092621918, "learning_rate": 4.303009174088886e-06, "loss": 0.5422, "step": 7866 }, { "epoch": 0.56, "grad_norm": 2.15571059569158, "learning_rate": 4.301871245533772e-06, "loss": 0.5847, "step": 7867 }, { "epoch": 0.56, "grad_norm": 1.7251748094925652, "learning_rate": 4.300733353856185e-06, "loss": 0.5531, "step": 7868 }, { "epoch": 0.56, "grad_norm": 1.8642798732561452, "learning_rate": 4.299595499116231e-06, "loss": 0.4715, "step": 7869 }, { "epoch": 0.56, "grad_norm": 1.7568857492937195, "learning_rate": 4.298457681374014e-06, "loss": 0.5195, "step": 7870 }, { "epoch": 0.56, "grad_norm": 1.8848322800106194, "learning_rate": 4.2973199006896425e-06, "loss": 0.5321, "step": 7871 }, { "epoch": 0.56, "grad_norm": 1.6183151548191805, "learning_rate": 4.2961821571232145e-06, "loss": 0.5381, "step": 7872 }, { "epoch": 0.56, "grad_norm": 1.5826147380663012, "learning_rate": 4.2950444507348295e-06, "loss": 0.5422, "step": 7873 }, { "epoch": 0.56, "grad_norm": 1.6345221352652015, "learning_rate": 4.293906781584584e-06, "loss": 0.5882, "step": 7874 }, { "epoch": 0.56, "grad_norm": 1.709657323813796, "learning_rate": 4.2927691497325755e-06, "loss": 0.5887, "step": 7875 }, { "epoch": 0.56, "grad_norm": 1.572199562853259, "learning_rate": 4.291631555238898e-06, "loss": 0.5192, "step": 7876 }, { "epoch": 0.56, "grad_norm": 2.0713121371920975, "learning_rate": 4.290493998163639e-06, "loss": 0.5504, "step": 7877 }, { "epoch": 0.56, "grad_norm": 2.0418677259373217, "learning_rate": 4.289356478566894e-06, "loss": 0.5403, "step": 7878 }, { "epoch": 0.56, "grad_norm": 1.8187259021838331, "learning_rate": 4.288218996508748e-06, "loss": 0.5242, "step": 7879 }, { "epoch": 0.56, "grad_norm": 1.9829637019030217, "learning_rate": 4.287081552049286e-06, "loss": 0.5139, "step": 7880 }, { "epoch": 0.56, "grad_norm": 2.034555080093708, "learning_rate": 4.285944145248591e-06, "loss": 0.5698, "step": 7881 }, { "epoch": 0.56, "grad_norm": 1.5082810321087645, "learning_rate": 4.284806776166747e-06, "loss": 0.5278, "step": 7882 }, { "epoch": 0.56, "grad_norm": 1.8052116370629787, "learning_rate": 4.283669444863832e-06, "loss": 0.5902, "step": 7883 }, { "epoch": 0.56, "grad_norm": 2.0653800095688974, "learning_rate": 4.282532151399924e-06, "loss": 0.4874, "step": 7884 }, { "epoch": 0.56, "grad_norm": 1.9350174734699683, "learning_rate": 4.281394895835097e-06, "loss": 0.5182, "step": 7885 }, { "epoch": 0.56, "grad_norm": 2.012087286743686, "learning_rate": 4.28025767822943e-06, "loss": 0.5556, "step": 7886 }, { "epoch": 0.56, "grad_norm": 1.7079958813586946, "learning_rate": 4.279120498642991e-06, "loss": 0.5055, "step": 7887 }, { "epoch": 0.56, "grad_norm": 1.5495749273209722, "learning_rate": 4.277983357135848e-06, "loss": 0.4524, "step": 7888 }, { "epoch": 0.56, "grad_norm": 1.8789911970380075, "learning_rate": 4.276846253768071e-06, "loss": 0.4541, "step": 7889 }, { "epoch": 0.56, "grad_norm": 1.9401203949716959, "learning_rate": 4.275709188599725e-06, "loss": 0.5731, "step": 7890 }, { "epoch": 0.56, "grad_norm": 1.6858162112640054, "learning_rate": 4.2745721616908755e-06, "loss": 0.5133, "step": 7891 }, { "epoch": 0.56, "grad_norm": 1.6664719477173153, "learning_rate": 4.27343517310158e-06, "loss": 0.4646, "step": 7892 }, { "epoch": 0.56, "grad_norm": 1.4847764571137774, "learning_rate": 4.272298222891903e-06, "loss": 0.5165, "step": 7893 }, { "epoch": 0.56, "grad_norm": 1.6908283360539595, "learning_rate": 4.2711613111219e-06, "loss": 0.507, "step": 7894 }, { "epoch": 0.56, "grad_norm": 1.8648621546752273, "learning_rate": 4.270024437851625e-06, "loss": 0.5458, "step": 7895 }, { "epoch": 0.56, "grad_norm": 2.381721035780789, "learning_rate": 4.268887603141132e-06, "loss": 0.5277, "step": 7896 }, { "epoch": 0.56, "grad_norm": 1.6291358947509669, "learning_rate": 4.267750807050474e-06, "loss": 0.4859, "step": 7897 }, { "epoch": 0.56, "grad_norm": 1.844710515487976, "learning_rate": 4.266614049639699e-06, "loss": 0.4802, "step": 7898 }, { "epoch": 0.56, "grad_norm": 1.6810307856729383, "learning_rate": 4.265477330968856e-06, "loss": 0.4925, "step": 7899 }, { "epoch": 0.56, "grad_norm": 0.7327929701757846, "learning_rate": 4.2643406510979875e-06, "loss": 0.4241, "step": 7900 }, { "epoch": 0.56, "grad_norm": 1.8650091570812555, "learning_rate": 4.26320401008714e-06, "loss": 0.5104, "step": 7901 }, { "epoch": 0.56, "grad_norm": 1.5659548275582515, "learning_rate": 4.262067407996354e-06, "loss": 0.5232, "step": 7902 }, { "epoch": 0.56, "grad_norm": 1.9610255563873142, "learning_rate": 4.260930844885667e-06, "loss": 0.5726, "step": 7903 }, { "epoch": 0.56, "grad_norm": 1.836026513931319, "learning_rate": 4.259794320815116e-06, "loss": 0.5546, "step": 7904 }, { "epoch": 0.56, "grad_norm": 1.7088365120442235, "learning_rate": 4.258657835844738e-06, "loss": 0.602, "step": 7905 }, { "epoch": 0.56, "grad_norm": 1.5490907655980246, "learning_rate": 4.257521390034566e-06, "loss": 0.5131, "step": 7906 }, { "epoch": 0.56, "grad_norm": 1.6868213567133836, "learning_rate": 4.25638498344463e-06, "loss": 0.5433, "step": 7907 }, { "epoch": 0.56, "grad_norm": 1.7632771925777602, "learning_rate": 4.2552486161349574e-06, "loss": 0.4895, "step": 7908 }, { "epoch": 0.56, "grad_norm": 2.4268625220883977, "learning_rate": 4.254112288165577e-06, "loss": 0.5024, "step": 7909 }, { "epoch": 0.56, "grad_norm": 1.6081552856642392, "learning_rate": 4.252975999596515e-06, "loss": 0.5287, "step": 7910 }, { "epoch": 0.56, "grad_norm": 1.718699150329832, "learning_rate": 4.2518397504877886e-06, "loss": 0.5448, "step": 7911 }, { "epoch": 0.56, "grad_norm": 0.7931079933879331, "learning_rate": 4.250703540899422e-06, "loss": 0.4571, "step": 7912 }, { "epoch": 0.56, "grad_norm": 1.7880836426803675, "learning_rate": 4.249567370891436e-06, "loss": 0.5828, "step": 7913 }, { "epoch": 0.56, "grad_norm": 2.5375097964571953, "learning_rate": 4.248431240523844e-06, "loss": 0.4731, "step": 7914 }, { "epoch": 0.56, "grad_norm": 1.9239466228041664, "learning_rate": 4.247295149856657e-06, "loss": 0.5909, "step": 7915 }, { "epoch": 0.56, "grad_norm": 0.7507895330693232, "learning_rate": 4.246159098949894e-06, "loss": 0.4261, "step": 7916 }, { "epoch": 0.56, "grad_norm": 2.0079859976777383, "learning_rate": 4.2450230878635615e-06, "loss": 0.5267, "step": 7917 }, { "epoch": 0.56, "grad_norm": 1.8712595030219465, "learning_rate": 4.2438871166576664e-06, "loss": 0.5821, "step": 7918 }, { "epoch": 0.56, "grad_norm": 0.776474569363202, "learning_rate": 4.242751185392217e-06, "loss": 0.4327, "step": 7919 }, { "epoch": 0.56, "grad_norm": 2.813042830125963, "learning_rate": 4.241615294127215e-06, "loss": 0.5088, "step": 7920 }, { "epoch": 0.56, "grad_norm": 3.3139998201569933, "learning_rate": 4.240479442922664e-06, "loss": 0.5034, "step": 7921 }, { "epoch": 0.56, "grad_norm": 1.5420114540584131, "learning_rate": 4.239343631838562e-06, "loss": 0.4644, "step": 7922 }, { "epoch": 0.56, "grad_norm": 2.1138470591377088, "learning_rate": 4.2382078609349055e-06, "loss": 0.5675, "step": 7923 }, { "epoch": 0.56, "grad_norm": 1.8387897317479684, "learning_rate": 4.237072130271693e-06, "loss": 0.5485, "step": 7924 }, { "epoch": 0.56, "grad_norm": 0.7804033673626266, "learning_rate": 4.235936439908916e-06, "loss": 0.4341, "step": 7925 }, { "epoch": 0.56, "grad_norm": 1.9451340294345458, "learning_rate": 4.234800789906562e-06, "loss": 0.4799, "step": 7926 }, { "epoch": 0.56, "grad_norm": 2.1622604872659377, "learning_rate": 4.2336651803246255e-06, "loss": 0.5296, "step": 7927 }, { "epoch": 0.56, "grad_norm": 0.7152613330917906, "learning_rate": 4.232529611223091e-06, "loss": 0.4302, "step": 7928 }, { "epoch": 0.56, "grad_norm": 2.5897003932096645, "learning_rate": 4.231394082661944e-06, "loss": 0.544, "step": 7929 }, { "epoch": 0.56, "grad_norm": 0.733995343658287, "learning_rate": 4.230258594701163e-06, "loss": 0.4417, "step": 7930 }, { "epoch": 0.56, "grad_norm": 0.7596755239488437, "learning_rate": 4.229123147400733e-06, "loss": 0.464, "step": 7931 }, { "epoch": 0.56, "grad_norm": 1.9273885459274698, "learning_rate": 4.227987740820632e-06, "loss": 0.5636, "step": 7932 }, { "epoch": 0.56, "grad_norm": 2.44603570176126, "learning_rate": 4.226852375020832e-06, "loss": 0.5237, "step": 7933 }, { "epoch": 0.56, "grad_norm": 1.9176826456844964, "learning_rate": 4.22571705006131e-06, "loss": 0.4632, "step": 7934 }, { "epoch": 0.56, "grad_norm": 1.6687059668255804, "learning_rate": 4.224581766002037e-06, "loss": 0.5287, "step": 7935 }, { "epoch": 0.56, "grad_norm": 2.156555576850282, "learning_rate": 4.223446522902983e-06, "loss": 0.5378, "step": 7936 }, { "epoch": 0.56, "grad_norm": 1.537319769974889, "learning_rate": 4.222311320824115e-06, "loss": 0.4414, "step": 7937 }, { "epoch": 0.56, "grad_norm": 1.7643441773039121, "learning_rate": 4.221176159825397e-06, "loss": 0.5129, "step": 7938 }, { "epoch": 0.56, "grad_norm": 1.8243142275927837, "learning_rate": 4.220041039966795e-06, "loss": 0.4683, "step": 7939 }, { "epoch": 0.56, "grad_norm": 1.7793540689110432, "learning_rate": 4.218905961308267e-06, "loss": 0.5224, "step": 7940 }, { "epoch": 0.56, "grad_norm": 1.7560304497029875, "learning_rate": 4.217770923909773e-06, "loss": 0.5196, "step": 7941 }, { "epoch": 0.56, "grad_norm": 4.186740782278691, "learning_rate": 4.216635927831269e-06, "loss": 0.5998, "step": 7942 }, { "epoch": 0.56, "grad_norm": 1.6752875696067164, "learning_rate": 4.21550097313271e-06, "loss": 0.5755, "step": 7943 }, { "epoch": 0.56, "grad_norm": 3.1482501821445728, "learning_rate": 4.2143660598740495e-06, "loss": 0.5075, "step": 7944 }, { "epoch": 0.56, "grad_norm": 1.8028988200359461, "learning_rate": 4.213231188115233e-06, "loss": 0.4926, "step": 7945 }, { "epoch": 0.56, "grad_norm": 1.9928190440472924, "learning_rate": 4.212096357916214e-06, "loss": 0.4864, "step": 7946 }, { "epoch": 0.56, "grad_norm": 0.7393294256279669, "learning_rate": 4.2109615693369334e-06, "loss": 0.4417, "step": 7947 }, { "epoch": 0.56, "grad_norm": 2.0996513973852684, "learning_rate": 4.209826822437336e-06, "loss": 0.4831, "step": 7948 }, { "epoch": 0.56, "grad_norm": 1.8397284368099365, "learning_rate": 4.208692117277364e-06, "loss": 0.5379, "step": 7949 }, { "epoch": 0.56, "grad_norm": 1.7118306845783688, "learning_rate": 4.207557453916955e-06, "loss": 0.5019, "step": 7950 }, { "epoch": 0.56, "grad_norm": 1.7469222819825267, "learning_rate": 4.206422832416047e-06, "loss": 0.524, "step": 7951 }, { "epoch": 0.56, "grad_norm": 1.723656114814808, "learning_rate": 4.205288252834575e-06, "loss": 0.509, "step": 7952 }, { "epoch": 0.56, "grad_norm": 0.7137311439987881, "learning_rate": 4.204153715232468e-06, "loss": 0.4604, "step": 7953 }, { "epoch": 0.56, "grad_norm": 1.555600133726678, "learning_rate": 4.203019219669661e-06, "loss": 0.4846, "step": 7954 }, { "epoch": 0.56, "grad_norm": 3.2887793864405723, "learning_rate": 4.201884766206077e-06, "loss": 0.5567, "step": 7955 }, { "epoch": 0.56, "grad_norm": 1.7969233020844753, "learning_rate": 4.200750354901646e-06, "loss": 0.4847, "step": 7956 }, { "epoch": 0.56, "grad_norm": 1.470034068912422, "learning_rate": 4.199615985816287e-06, "loss": 0.574, "step": 7957 }, { "epoch": 0.56, "grad_norm": 1.9058082149355742, "learning_rate": 4.1984816590099266e-06, "loss": 0.5176, "step": 7958 }, { "epoch": 0.56, "grad_norm": 1.5804943393077662, "learning_rate": 4.1973473745424794e-06, "loss": 0.5054, "step": 7959 }, { "epoch": 0.56, "grad_norm": 1.708471450295443, "learning_rate": 4.196213132473863e-06, "loss": 0.5219, "step": 7960 }, { "epoch": 0.56, "grad_norm": 1.8273864161510986, "learning_rate": 4.195078932863994e-06, "loss": 0.6238, "step": 7961 }, { "epoch": 0.57, "grad_norm": 2.286954896990274, "learning_rate": 4.193944775772784e-06, "loss": 0.5754, "step": 7962 }, { "epoch": 0.57, "grad_norm": 1.4548170237210307, "learning_rate": 4.19281066126014e-06, "loss": 0.5, "step": 7963 }, { "epoch": 0.57, "grad_norm": 3.122786965417151, "learning_rate": 4.1916765893859714e-06, "loss": 0.5815, "step": 7964 }, { "epoch": 0.57, "grad_norm": 1.776458668293524, "learning_rate": 4.190542560210186e-06, "loss": 0.5829, "step": 7965 }, { "epoch": 0.57, "grad_norm": 2.094441016793542, "learning_rate": 4.189408573792686e-06, "loss": 0.5427, "step": 7966 }, { "epoch": 0.57, "grad_norm": 1.8383510419596714, "learning_rate": 4.1882746301933716e-06, "loss": 0.5685, "step": 7967 }, { "epoch": 0.57, "grad_norm": 1.637614878405265, "learning_rate": 4.18714072947214e-06, "loss": 0.6017, "step": 7968 }, { "epoch": 0.57, "grad_norm": 2.0490487669548823, "learning_rate": 4.186006871688891e-06, "loss": 0.5403, "step": 7969 }, { "epoch": 0.57, "grad_norm": 2.0274930705338816, "learning_rate": 4.1848730569035165e-06, "loss": 0.6037, "step": 7970 }, { "epoch": 0.57, "grad_norm": 1.8784803379050128, "learning_rate": 4.1837392851759105e-06, "loss": 0.5784, "step": 7971 }, { "epoch": 0.57, "grad_norm": 1.6962147571285617, "learning_rate": 4.1826055565659584e-06, "loss": 0.48, "step": 7972 }, { "epoch": 0.57, "grad_norm": 3.2980489937619515, "learning_rate": 4.181471871133553e-06, "loss": 0.5322, "step": 7973 }, { "epoch": 0.57, "grad_norm": 1.8811892886830837, "learning_rate": 4.180338228938577e-06, "loss": 0.5258, "step": 7974 }, { "epoch": 0.57, "grad_norm": 1.7426850107668241, "learning_rate": 4.179204630040911e-06, "loss": 0.5583, "step": 7975 }, { "epoch": 0.57, "grad_norm": 1.6743121050914795, "learning_rate": 4.1780710745004395e-06, "loss": 0.5772, "step": 7976 }, { "epoch": 0.57, "grad_norm": 1.850553457561551, "learning_rate": 4.176937562377039e-06, "loss": 0.5179, "step": 7977 }, { "epoch": 0.57, "grad_norm": 1.9005551691812823, "learning_rate": 4.175804093730585e-06, "loss": 0.6253, "step": 7978 }, { "epoch": 0.57, "grad_norm": 2.56592342327115, "learning_rate": 4.17467066862095e-06, "loss": 0.5343, "step": 7979 }, { "epoch": 0.57, "grad_norm": 1.818969020688428, "learning_rate": 4.173537287108009e-06, "loss": 0.5486, "step": 7980 }, { "epoch": 0.57, "grad_norm": 1.9088400935929546, "learning_rate": 4.172403949251628e-06, "loss": 0.5128, "step": 7981 }, { "epoch": 0.57, "grad_norm": 2.535497094078783, "learning_rate": 4.171270655111676e-06, "loss": 0.4881, "step": 7982 }, { "epoch": 0.57, "grad_norm": 1.975489106894407, "learning_rate": 4.170137404748014e-06, "loss": 0.5666, "step": 7983 }, { "epoch": 0.57, "grad_norm": 1.9279343615891371, "learning_rate": 4.169004198220508e-06, "loss": 0.5747, "step": 7984 }, { "epoch": 0.57, "grad_norm": 1.9345573574549626, "learning_rate": 4.1678710355890144e-06, "loss": 0.5049, "step": 7985 }, { "epoch": 0.57, "grad_norm": 2.04644699968564, "learning_rate": 4.166737916913394e-06, "loss": 0.5025, "step": 7986 }, { "epoch": 0.57, "grad_norm": 1.7086665061868136, "learning_rate": 4.1656048422534974e-06, "loss": 0.5446, "step": 7987 }, { "epoch": 0.57, "grad_norm": 1.514371385976562, "learning_rate": 4.164471811669184e-06, "loss": 0.4818, "step": 7988 }, { "epoch": 0.57, "grad_norm": 1.8564785920317513, "learning_rate": 4.163338825220299e-06, "loss": 0.5236, "step": 7989 }, { "epoch": 0.57, "grad_norm": 1.6143895565314013, "learning_rate": 4.162205882966691e-06, "loss": 0.5093, "step": 7990 }, { "epoch": 0.57, "grad_norm": 1.6227306990244474, "learning_rate": 4.16107298496821e-06, "loss": 0.5946, "step": 7991 }, { "epoch": 0.57, "grad_norm": 0.7005184064585938, "learning_rate": 4.159940131284694e-06, "loss": 0.4474, "step": 7992 }, { "epoch": 0.57, "grad_norm": 1.8705391471378023, "learning_rate": 4.158807321975989e-06, "loss": 0.5109, "step": 7993 }, { "epoch": 0.57, "grad_norm": 1.6055716688852015, "learning_rate": 4.1576745571019285e-06, "loss": 0.4941, "step": 7994 }, { "epoch": 0.57, "grad_norm": 1.833693387225302, "learning_rate": 4.156541836722355e-06, "loss": 0.5381, "step": 7995 }, { "epoch": 0.57, "grad_norm": 1.6914610967245645, "learning_rate": 4.1554091608971e-06, "loss": 0.567, "step": 7996 }, { "epoch": 0.57, "grad_norm": 2.7149015319014764, "learning_rate": 4.154276529685994e-06, "loss": 0.5381, "step": 7997 }, { "epoch": 0.57, "grad_norm": 1.503200814227334, "learning_rate": 4.153143943148866e-06, "loss": 0.5392, "step": 7998 }, { "epoch": 0.57, "grad_norm": 0.6789859940771124, "learning_rate": 4.152011401345546e-06, "loss": 0.4247, "step": 7999 }, { "epoch": 0.57, "grad_norm": 1.6692305988981593, "learning_rate": 4.150878904335856e-06, "loss": 0.5505, "step": 8000 }, { "epoch": 0.57, "grad_norm": 1.8223993776015004, "learning_rate": 4.149746452179622e-06, "loss": 0.5021, "step": 8001 }, { "epoch": 0.57, "grad_norm": 2.33822253734269, "learning_rate": 4.148614044936658e-06, "loss": 0.6324, "step": 8002 }, { "epoch": 0.57, "grad_norm": 1.8518221347380979, "learning_rate": 4.147481682666787e-06, "loss": 0.5121, "step": 8003 }, { "epoch": 0.57, "grad_norm": 1.6939870043935747, "learning_rate": 4.1463493654298225e-06, "loss": 0.5259, "step": 8004 }, { "epoch": 0.57, "grad_norm": 2.5748630821533043, "learning_rate": 4.145217093285575e-06, "loss": 0.5436, "step": 8005 }, { "epoch": 0.57, "grad_norm": 4.458857386133799, "learning_rate": 4.144084866293859e-06, "loss": 0.5443, "step": 8006 }, { "epoch": 0.57, "grad_norm": 1.7540567043355355, "learning_rate": 4.142952684514481e-06, "loss": 0.6157, "step": 8007 }, { "epoch": 0.57, "grad_norm": 1.7308415320151724, "learning_rate": 4.141820548007246e-06, "loss": 0.5891, "step": 8008 }, { "epoch": 0.57, "grad_norm": 2.7291068312469795, "learning_rate": 4.1406884568319546e-06, "loss": 0.5552, "step": 8009 }, { "epoch": 0.57, "grad_norm": 1.9027620443646127, "learning_rate": 4.139556411048414e-06, "loss": 0.6054, "step": 8010 }, { "epoch": 0.57, "grad_norm": 1.8201480121362499, "learning_rate": 4.13842441071642e-06, "loss": 0.5291, "step": 8011 }, { "epoch": 0.57, "grad_norm": 1.729437817952002, "learning_rate": 4.137292455895767e-06, "loss": 0.5899, "step": 8012 }, { "epoch": 0.57, "grad_norm": 1.8041904725409132, "learning_rate": 4.136160546646247e-06, "loss": 0.5601, "step": 8013 }, { "epoch": 0.57, "grad_norm": 1.8064754537516172, "learning_rate": 4.1350286830276575e-06, "loss": 0.6171, "step": 8014 }, { "epoch": 0.57, "grad_norm": 1.6763890074164498, "learning_rate": 4.1338968650997825e-06, "loss": 0.5193, "step": 8015 }, { "epoch": 0.57, "grad_norm": 1.698283484502184, "learning_rate": 4.132765092922411e-06, "loss": 0.5487, "step": 8016 }, { "epoch": 0.57, "grad_norm": 1.6544110751633898, "learning_rate": 4.131633366555323e-06, "loss": 0.5287, "step": 8017 }, { "epoch": 0.57, "grad_norm": 1.667581887816826, "learning_rate": 4.130501686058306e-06, "loss": 0.5493, "step": 8018 }, { "epoch": 0.57, "grad_norm": 1.8048246333346525, "learning_rate": 4.129370051491135e-06, "loss": 0.5198, "step": 8019 }, { "epoch": 0.57, "grad_norm": 1.6511643902526734, "learning_rate": 4.128238462913587e-06, "loss": 0.5971, "step": 8020 }, { "epoch": 0.57, "grad_norm": 1.880776327722236, "learning_rate": 4.1271069203854385e-06, "loss": 0.5324, "step": 8021 }, { "epoch": 0.57, "grad_norm": 1.7772654767339537, "learning_rate": 4.12597542396646e-06, "loss": 0.4887, "step": 8022 }, { "epoch": 0.57, "grad_norm": 1.601374787997852, "learning_rate": 4.124843973716421e-06, "loss": 0.5045, "step": 8023 }, { "epoch": 0.57, "grad_norm": 1.5981204700232814, "learning_rate": 4.123712569695089e-06, "loss": 0.5738, "step": 8024 }, { "epoch": 0.57, "grad_norm": 2.3673123814582278, "learning_rate": 4.1225812119622284e-06, "loss": 0.5314, "step": 8025 }, { "epoch": 0.57, "grad_norm": 2.0268922290080598, "learning_rate": 4.121449900577602e-06, "loss": 0.5485, "step": 8026 }, { "epoch": 0.57, "grad_norm": 1.794710067991761, "learning_rate": 4.120318635600968e-06, "loss": 0.5033, "step": 8027 }, { "epoch": 0.57, "grad_norm": 1.9127477861692541, "learning_rate": 4.119187417092083e-06, "loss": 0.5726, "step": 8028 }, { "epoch": 0.57, "grad_norm": 1.8824760470201243, "learning_rate": 4.1180562451107055e-06, "loss": 0.4956, "step": 8029 }, { "epoch": 0.57, "grad_norm": 1.8198457579585146, "learning_rate": 4.116925119716585e-06, "loss": 0.5819, "step": 8030 }, { "epoch": 0.57, "grad_norm": 0.7391725527955739, "learning_rate": 4.1157940409694724e-06, "loss": 0.4431, "step": 8031 }, { "epoch": 0.57, "grad_norm": 2.2096519678938256, "learning_rate": 4.114663008929112e-06, "loss": 0.6086, "step": 8032 }, { "epoch": 0.57, "grad_norm": 1.6394877536500403, "learning_rate": 4.113532023655255e-06, "loss": 0.4744, "step": 8033 }, { "epoch": 0.57, "grad_norm": 2.363291213460963, "learning_rate": 4.112401085207639e-06, "loss": 0.5859, "step": 8034 }, { "epoch": 0.57, "grad_norm": 1.8423611324651257, "learning_rate": 4.111270193646005e-06, "loss": 0.5216, "step": 8035 }, { "epoch": 0.57, "grad_norm": 1.5363841341377968, "learning_rate": 4.11013934903009e-06, "loss": 0.5055, "step": 8036 }, { "epoch": 0.57, "grad_norm": 1.6009461665178966, "learning_rate": 4.10900855141963e-06, "loss": 0.5989, "step": 8037 }, { "epoch": 0.57, "grad_norm": 2.4231981006339036, "learning_rate": 4.107877800874358e-06, "loss": 0.4829, "step": 8038 }, { "epoch": 0.57, "grad_norm": 1.5113720019239123, "learning_rate": 4.106747097454002e-06, "loss": 0.4563, "step": 8039 }, { "epoch": 0.57, "grad_norm": 1.4819132766217822, "learning_rate": 4.105616441218294e-06, "loss": 0.4998, "step": 8040 }, { "epoch": 0.57, "grad_norm": 2.3483951111166497, "learning_rate": 4.104485832226955e-06, "loss": 0.5791, "step": 8041 }, { "epoch": 0.57, "grad_norm": 2.393313078206014, "learning_rate": 4.103355270539709e-06, "loss": 0.5722, "step": 8042 }, { "epoch": 0.57, "grad_norm": 1.6954600755388396, "learning_rate": 4.1022247562162745e-06, "loss": 0.5307, "step": 8043 }, { "epoch": 0.57, "grad_norm": 0.7638012077510357, "learning_rate": 4.1010942893163706e-06, "loss": 0.4664, "step": 8044 }, { "epoch": 0.57, "grad_norm": 2.157643875769233, "learning_rate": 4.099963869899713e-06, "loss": 0.5184, "step": 8045 }, { "epoch": 0.57, "grad_norm": 1.6707527628570489, "learning_rate": 4.098833498026015e-06, "loss": 0.5773, "step": 8046 }, { "epoch": 0.57, "grad_norm": 0.6734425731387184, "learning_rate": 4.097703173754982e-06, "loss": 0.4188, "step": 8047 }, { "epoch": 0.57, "grad_norm": 1.796338518494986, "learning_rate": 4.096572897146327e-06, "loss": 0.5777, "step": 8048 }, { "epoch": 0.57, "grad_norm": 6.461908555341252, "learning_rate": 4.095442668259753e-06, "loss": 0.5406, "step": 8049 }, { "epoch": 0.57, "grad_norm": 1.6134720463989238, "learning_rate": 4.0943124871549604e-06, "loss": 0.5082, "step": 8050 }, { "epoch": 0.57, "grad_norm": 1.8931649146033012, "learning_rate": 4.093182353891652e-06, "loss": 0.5537, "step": 8051 }, { "epoch": 0.57, "grad_norm": 1.4214439085980473, "learning_rate": 4.092052268529525e-06, "loss": 0.5165, "step": 8052 }, { "epoch": 0.57, "grad_norm": 1.7005718643240582, "learning_rate": 4.090922231128274e-06, "loss": 0.5365, "step": 8053 }, { "epoch": 0.57, "grad_norm": 0.6762643748941881, "learning_rate": 4.089792241747588e-06, "loss": 0.4468, "step": 8054 }, { "epoch": 0.57, "grad_norm": 2.257922344305473, "learning_rate": 4.0886623004471635e-06, "loss": 0.5116, "step": 8055 }, { "epoch": 0.57, "grad_norm": 1.8853299942945974, "learning_rate": 4.087532407286684e-06, "loss": 0.576, "step": 8056 }, { "epoch": 0.57, "grad_norm": 1.649115283835289, "learning_rate": 4.086402562325834e-06, "loss": 0.5085, "step": 8057 }, { "epoch": 0.57, "grad_norm": 1.5154626800660163, "learning_rate": 4.085272765624295e-06, "loss": 0.4427, "step": 8058 }, { "epoch": 0.57, "grad_norm": 1.4809354568421493, "learning_rate": 4.084143017241749e-06, "loss": 0.4704, "step": 8059 }, { "epoch": 0.57, "grad_norm": 1.5944575227252882, "learning_rate": 4.083013317237874e-06, "loss": 0.5561, "step": 8060 }, { "epoch": 0.57, "grad_norm": 1.7195858766709167, "learning_rate": 4.081883665672342e-06, "loss": 0.6262, "step": 8061 }, { "epoch": 0.57, "grad_norm": 2.4424484928984533, "learning_rate": 4.080754062604823e-06, "loss": 0.4557, "step": 8062 }, { "epoch": 0.57, "grad_norm": 2.909367134289205, "learning_rate": 4.079624508094992e-06, "loss": 0.4716, "step": 8063 }, { "epoch": 0.57, "grad_norm": 1.9352521457830154, "learning_rate": 4.078495002202512e-06, "loss": 0.5363, "step": 8064 }, { "epoch": 0.57, "grad_norm": 1.5098524782882923, "learning_rate": 4.077365544987049e-06, "loss": 0.4904, "step": 8065 }, { "epoch": 0.57, "grad_norm": 1.7647283251349108, "learning_rate": 4.076236136508264e-06, "loss": 0.5635, "step": 8066 }, { "epoch": 0.57, "grad_norm": 1.7058665116336174, "learning_rate": 4.075106776825817e-06, "loss": 0.549, "step": 8067 }, { "epoch": 0.57, "grad_norm": 2.264659861014106, "learning_rate": 4.0739774659993635e-06, "loss": 0.5458, "step": 8068 }, { "epoch": 0.57, "grad_norm": 1.5899234927582957, "learning_rate": 4.072848204088557e-06, "loss": 0.5528, "step": 8069 }, { "epoch": 0.57, "grad_norm": 2.0866235883243163, "learning_rate": 4.071718991153051e-06, "loss": 0.5503, "step": 8070 }, { "epoch": 0.57, "grad_norm": 2.668717194017669, "learning_rate": 4.070589827252494e-06, "loss": 0.6699, "step": 8071 }, { "epoch": 0.57, "grad_norm": 1.700772535668077, "learning_rate": 4.069460712446531e-06, "loss": 0.4754, "step": 8072 }, { "epoch": 0.57, "grad_norm": 2.2490471321063645, "learning_rate": 4.068331646794805e-06, "loss": 0.6182, "step": 8073 }, { "epoch": 0.57, "grad_norm": 1.5728330563970998, "learning_rate": 4.067202630356959e-06, "loss": 0.5103, "step": 8074 }, { "epoch": 0.57, "grad_norm": 1.5537796211387174, "learning_rate": 4.066073663192633e-06, "loss": 0.5098, "step": 8075 }, { "epoch": 0.57, "grad_norm": 1.877821791552813, "learning_rate": 4.064944745361459e-06, "loss": 0.523, "step": 8076 }, { "epoch": 0.57, "grad_norm": 2.3091120202092426, "learning_rate": 4.063815876923071e-06, "loss": 0.5843, "step": 8077 }, { "epoch": 0.57, "grad_norm": 1.5654243189348296, "learning_rate": 4.062687057937102e-06, "loss": 0.5315, "step": 8078 }, { "epoch": 0.57, "grad_norm": 0.7455138461808073, "learning_rate": 4.061558288463179e-06, "loss": 0.4157, "step": 8079 }, { "epoch": 0.57, "grad_norm": 2.3270851778288706, "learning_rate": 4.060429568560926e-06, "loss": 0.5341, "step": 8080 }, { "epoch": 0.57, "grad_norm": 3.4657379783177267, "learning_rate": 4.059300898289966e-06, "loss": 0.5093, "step": 8081 }, { "epoch": 0.57, "grad_norm": 1.6436844312155177, "learning_rate": 4.058172277709922e-06, "loss": 0.5495, "step": 8082 }, { "epoch": 0.57, "grad_norm": 3.1101261714968746, "learning_rate": 4.057043706880409e-06, "loss": 0.5013, "step": 8083 }, { "epoch": 0.57, "grad_norm": 2.085097678879066, "learning_rate": 4.055915185861044e-06, "loss": 0.5216, "step": 8084 }, { "epoch": 0.57, "grad_norm": 1.6786776439011553, "learning_rate": 4.054786714711434e-06, "loss": 0.4531, "step": 8085 }, { "epoch": 0.57, "grad_norm": 1.7166592418849398, "learning_rate": 4.053658293491196e-06, "loss": 0.5318, "step": 8086 }, { "epoch": 0.57, "grad_norm": 1.830603445106854, "learning_rate": 4.052529922259932e-06, "loss": 0.5251, "step": 8087 }, { "epoch": 0.57, "grad_norm": 1.9615083413640595, "learning_rate": 4.051401601077249e-06, "loss": 0.5166, "step": 8088 }, { "epoch": 0.57, "grad_norm": 1.903908591338246, "learning_rate": 4.0502733300027465e-06, "loss": 0.4891, "step": 8089 }, { "epoch": 0.57, "grad_norm": 1.8007305972977772, "learning_rate": 4.049145109096026e-06, "loss": 0.4775, "step": 8090 }, { "epoch": 0.57, "grad_norm": 4.906094688916082, "learning_rate": 4.048016938416683e-06, "loss": 0.5598, "step": 8091 }, { "epoch": 0.57, "grad_norm": 1.7933271907649881, "learning_rate": 4.046888818024309e-06, "loss": 0.5456, "step": 8092 }, { "epoch": 0.57, "grad_norm": 2.301499846123311, "learning_rate": 4.045760747978499e-06, "loss": 0.5433, "step": 8093 }, { "epoch": 0.57, "grad_norm": 2.3762873451550433, "learning_rate": 4.04463272833884e-06, "loss": 0.4861, "step": 8094 }, { "epoch": 0.57, "grad_norm": 2.090876187819023, "learning_rate": 4.043504759164917e-06, "loss": 0.5752, "step": 8095 }, { "epoch": 0.57, "grad_norm": 1.7707101818185091, "learning_rate": 4.042376840516312e-06, "loss": 0.5451, "step": 8096 }, { "epoch": 0.57, "grad_norm": 1.7324969627030422, "learning_rate": 4.041248972452609e-06, "loss": 0.5182, "step": 8097 }, { "epoch": 0.57, "grad_norm": 1.9723449710012062, "learning_rate": 4.040121155033384e-06, "loss": 0.6016, "step": 8098 }, { "epoch": 0.57, "grad_norm": 1.6496725918523965, "learning_rate": 4.0389933883182124e-06, "loss": 0.4949, "step": 8099 }, { "epoch": 0.57, "grad_norm": 1.6785997983042253, "learning_rate": 4.037865672366664e-06, "loss": 0.553, "step": 8100 }, { "epoch": 0.57, "grad_norm": 0.7051041385239664, "learning_rate": 4.036738007238314e-06, "loss": 0.4251, "step": 8101 }, { "epoch": 0.57, "grad_norm": 1.6138575894300191, "learning_rate": 4.035610392992725e-06, "loss": 0.5814, "step": 8102 }, { "epoch": 0.58, "grad_norm": 1.643788208719384, "learning_rate": 4.0344828296894615e-06, "loss": 0.487, "step": 8103 }, { "epoch": 0.58, "grad_norm": 1.6718021266672007, "learning_rate": 4.033355317388088e-06, "loss": 0.5356, "step": 8104 }, { "epoch": 0.58, "grad_norm": 1.7762244068186792, "learning_rate": 4.0322278561481625e-06, "loss": 0.474, "step": 8105 }, { "epoch": 0.58, "grad_norm": 2.1539545801851334, "learning_rate": 4.031100446029242e-06, "loss": 0.5352, "step": 8106 }, { "epoch": 0.58, "grad_norm": 1.9318287547687025, "learning_rate": 4.029973087090875e-06, "loss": 0.5186, "step": 8107 }, { "epoch": 0.58, "grad_norm": 2.519152239462843, "learning_rate": 4.02884577939262e-06, "loss": 0.5295, "step": 8108 }, { "epoch": 0.58, "grad_norm": 2.036230396688792, "learning_rate": 4.027718522994021e-06, "loss": 0.5519, "step": 8109 }, { "epoch": 0.58, "grad_norm": 1.775029136828087, "learning_rate": 4.026591317954623e-06, "loss": 0.6353, "step": 8110 }, { "epoch": 0.58, "grad_norm": 0.7274629567635523, "learning_rate": 4.025464164333969e-06, "loss": 0.4375, "step": 8111 }, { "epoch": 0.58, "grad_norm": 1.772577413057217, "learning_rate": 4.024337062191602e-06, "loss": 0.5479, "step": 8112 }, { "epoch": 0.58, "grad_norm": 1.6638612791963472, "learning_rate": 4.023210011587058e-06, "loss": 0.5023, "step": 8113 }, { "epoch": 0.58, "grad_norm": 1.7557000247303571, "learning_rate": 4.022083012579871e-06, "loss": 0.4865, "step": 8114 }, { "epoch": 0.58, "grad_norm": 2.137306999101701, "learning_rate": 4.020956065229571e-06, "loss": 0.5777, "step": 8115 }, { "epoch": 0.58, "grad_norm": 2.468007829714127, "learning_rate": 4.019829169595691e-06, "loss": 0.5192, "step": 8116 }, { "epoch": 0.58, "grad_norm": 1.4068318882338702, "learning_rate": 4.018702325737753e-06, "loss": 0.4926, "step": 8117 }, { "epoch": 0.58, "grad_norm": 0.791820152633847, "learning_rate": 4.017575533715284e-06, "loss": 0.4129, "step": 8118 }, { "epoch": 0.58, "grad_norm": 1.9208662027326253, "learning_rate": 4.016448793587806e-06, "loss": 0.5429, "step": 8119 }, { "epoch": 0.58, "grad_norm": 1.6546036421765657, "learning_rate": 4.015322105414835e-06, "loss": 0.605, "step": 8120 }, { "epoch": 0.58, "grad_norm": 1.6696042219875784, "learning_rate": 4.014195469255886e-06, "loss": 0.5285, "step": 8121 }, { "epoch": 0.58, "grad_norm": 1.6481139999575902, "learning_rate": 4.0130688851704724e-06, "loss": 0.4958, "step": 8122 }, { "epoch": 0.58, "grad_norm": 1.5972058152175985, "learning_rate": 4.011942353218105e-06, "loss": 0.5184, "step": 8123 }, { "epoch": 0.58, "grad_norm": 1.806337656381638, "learning_rate": 4.010815873458291e-06, "loss": 0.5061, "step": 8124 }, { "epoch": 0.58, "grad_norm": 8.512126463333523, "learning_rate": 4.009689445950534e-06, "loss": 0.5566, "step": 8125 }, { "epoch": 0.58, "grad_norm": 3.7163315869538893, "learning_rate": 4.008563070754335e-06, "loss": 0.5338, "step": 8126 }, { "epoch": 0.58, "grad_norm": 1.916434813731539, "learning_rate": 4.0074367479291955e-06, "loss": 0.5501, "step": 8127 }, { "epoch": 0.58, "grad_norm": 1.7272368455457, "learning_rate": 4.0063104775346115e-06, "loss": 0.5026, "step": 8128 }, { "epoch": 0.58, "grad_norm": 1.9982195733885972, "learning_rate": 4.005184259630074e-06, "loss": 0.5044, "step": 8129 }, { "epoch": 0.58, "grad_norm": 1.6921723569830935, "learning_rate": 4.004058094275073e-06, "loss": 0.5272, "step": 8130 }, { "epoch": 0.58, "grad_norm": 1.8178123433534907, "learning_rate": 4.0029319815291e-06, "loss": 0.5288, "step": 8131 }, { "epoch": 0.58, "grad_norm": 1.6949549207219674, "learning_rate": 4.001805921451637e-06, "loss": 0.6009, "step": 8132 }, { "epoch": 0.58, "grad_norm": 1.522985643926616, "learning_rate": 4.000679914102168e-06, "loss": 0.4889, "step": 8133 }, { "epoch": 0.58, "grad_norm": 1.9320794635849636, "learning_rate": 3.9995539595401726e-06, "loss": 0.547, "step": 8134 }, { "epoch": 0.58, "grad_norm": 2.7974657650001435, "learning_rate": 3.998428057825127e-06, "loss": 0.5689, "step": 8135 }, { "epoch": 0.58, "grad_norm": 2.3467777740634563, "learning_rate": 3.997302209016506e-06, "loss": 0.5261, "step": 8136 }, { "epoch": 0.58, "grad_norm": 1.7439366874576783, "learning_rate": 3.996176413173777e-06, "loss": 0.544, "step": 8137 }, { "epoch": 0.58, "grad_norm": 1.63155754188709, "learning_rate": 3.995050670356415e-06, "loss": 0.5223, "step": 8138 }, { "epoch": 0.58, "grad_norm": 1.8198659613526509, "learning_rate": 3.993924980623879e-06, "loss": 0.5197, "step": 8139 }, { "epoch": 0.58, "grad_norm": 1.8403387856611204, "learning_rate": 3.992799344035637e-06, "loss": 0.4866, "step": 8140 }, { "epoch": 0.58, "grad_norm": 1.776172869877236, "learning_rate": 3.991673760651144e-06, "loss": 0.5303, "step": 8141 }, { "epoch": 0.58, "grad_norm": 1.6745295431735645, "learning_rate": 3.990548230529861e-06, "loss": 0.5725, "step": 8142 }, { "epoch": 0.58, "grad_norm": 2.1786269000598724, "learning_rate": 3.989422753731241e-06, "loss": 0.5042, "step": 8143 }, { "epoch": 0.58, "grad_norm": 1.7414140998266472, "learning_rate": 3.988297330314736e-06, "loss": 0.5051, "step": 8144 }, { "epoch": 0.58, "grad_norm": 1.6939850196508066, "learning_rate": 3.987171960339792e-06, "loss": 0.4828, "step": 8145 }, { "epoch": 0.58, "grad_norm": 1.674751757443478, "learning_rate": 3.9860466438658595e-06, "loss": 0.5064, "step": 8146 }, { "epoch": 0.58, "grad_norm": 2.4831467691195357, "learning_rate": 3.9849213809523765e-06, "loss": 0.5542, "step": 8147 }, { "epoch": 0.58, "grad_norm": 0.7430506226696686, "learning_rate": 3.983796171658788e-06, "loss": 0.4285, "step": 8148 }, { "epoch": 0.58, "grad_norm": 1.8957236935076944, "learning_rate": 3.982671016044527e-06, "loss": 0.4519, "step": 8149 }, { "epoch": 0.58, "grad_norm": 3.2653349339332935, "learning_rate": 3.981545914169032e-06, "loss": 0.522, "step": 8150 }, { "epoch": 0.58, "grad_norm": 2.028449451471558, "learning_rate": 3.980420866091733e-06, "loss": 0.5664, "step": 8151 }, { "epoch": 0.58, "grad_norm": 1.854547175774253, "learning_rate": 3.979295871872055e-06, "loss": 0.5812, "step": 8152 }, { "epoch": 0.58, "grad_norm": 1.7052195584115089, "learning_rate": 3.9781709315694314e-06, "loss": 0.5095, "step": 8153 }, { "epoch": 0.58, "grad_norm": 0.7206811074093877, "learning_rate": 3.97704604524328e-06, "loss": 0.4632, "step": 8154 }, { "epoch": 0.58, "grad_norm": 2.0629584482682106, "learning_rate": 3.9759212129530245e-06, "loss": 0.5383, "step": 8155 }, { "epoch": 0.58, "grad_norm": 1.8683143696109235, "learning_rate": 3.974796434758078e-06, "loss": 0.4816, "step": 8156 }, { "epoch": 0.58, "grad_norm": 2.165844621628089, "learning_rate": 3.973671710717859e-06, "loss": 0.5194, "step": 8157 }, { "epoch": 0.58, "grad_norm": 2.0767247692516224, "learning_rate": 3.972547040891779e-06, "loss": 0.5272, "step": 8158 }, { "epoch": 0.58, "grad_norm": 2.001089386988167, "learning_rate": 3.971422425339246e-06, "loss": 0.5295, "step": 8159 }, { "epoch": 0.58, "grad_norm": 1.668943805513631, "learning_rate": 3.9702978641196635e-06, "loss": 0.5817, "step": 8160 }, { "epoch": 0.58, "grad_norm": 19.973158136073252, "learning_rate": 3.969173357292439e-06, "loss": 0.5272, "step": 8161 }, { "epoch": 0.58, "grad_norm": 1.8432594953999082, "learning_rate": 3.9680489049169685e-06, "loss": 0.5652, "step": 8162 }, { "epoch": 0.58, "grad_norm": 1.7451871406071668, "learning_rate": 3.966924507052653e-06, "loss": 0.5328, "step": 8163 }, { "epoch": 0.58, "grad_norm": 1.7504238234083846, "learning_rate": 3.965800163758885e-06, "loss": 0.5325, "step": 8164 }, { "epoch": 0.58, "grad_norm": 2.1619975966320832, "learning_rate": 3.964675875095057e-06, "loss": 0.5021, "step": 8165 }, { "epoch": 0.58, "grad_norm": 1.5300654296760539, "learning_rate": 3.963551641120558e-06, "loss": 0.5181, "step": 8166 }, { "epoch": 0.58, "grad_norm": 1.950595505467225, "learning_rate": 3.962427461894772e-06, "loss": 0.5532, "step": 8167 }, { "epoch": 0.58, "grad_norm": 1.6765478622562457, "learning_rate": 3.961303337477084e-06, "loss": 0.4991, "step": 8168 }, { "epoch": 0.58, "grad_norm": 1.5569721997780555, "learning_rate": 3.960179267926873e-06, "loss": 0.5752, "step": 8169 }, { "epoch": 0.58, "grad_norm": 1.6397940331506433, "learning_rate": 3.9590552533035174e-06, "loss": 0.5541, "step": 8170 }, { "epoch": 0.58, "grad_norm": 2.017072248336799, "learning_rate": 3.9579312936663885e-06, "loss": 0.5682, "step": 8171 }, { "epoch": 0.58, "grad_norm": 1.828911044934909, "learning_rate": 3.956807389074861e-06, "loss": 0.5684, "step": 8172 }, { "epoch": 0.58, "grad_norm": 1.717498185538073, "learning_rate": 3.955683539588304e-06, "loss": 0.5439, "step": 8173 }, { "epoch": 0.58, "grad_norm": 2.0828741983352073, "learning_rate": 3.9545597452660795e-06, "loss": 0.541, "step": 8174 }, { "epoch": 0.58, "grad_norm": 1.9655673268437812, "learning_rate": 3.9534360061675496e-06, "loss": 0.5525, "step": 8175 }, { "epoch": 0.58, "grad_norm": 1.869580758657847, "learning_rate": 3.9523123223520776e-06, "loss": 0.5362, "step": 8176 }, { "epoch": 0.58, "grad_norm": 1.5617350279032653, "learning_rate": 3.95118869387902e-06, "loss": 0.5245, "step": 8177 }, { "epoch": 0.58, "grad_norm": 1.6893161331631614, "learning_rate": 3.950065120807729e-06, "loss": 0.4954, "step": 8178 }, { "epoch": 0.58, "grad_norm": 2.0611459020755665, "learning_rate": 3.948941603197553e-06, "loss": 0.5281, "step": 8179 }, { "epoch": 0.58, "grad_norm": 1.9700537578602089, "learning_rate": 3.947818141107847e-06, "loss": 0.5482, "step": 8180 }, { "epoch": 0.58, "grad_norm": 1.6940654205306918, "learning_rate": 3.9466947345979515e-06, "loss": 0.5552, "step": 8181 }, { "epoch": 0.58, "grad_norm": 1.7607093986381808, "learning_rate": 3.945571383727207e-06, "loss": 0.5315, "step": 8182 }, { "epoch": 0.58, "grad_norm": 1.887031920927936, "learning_rate": 3.944448088554956e-06, "loss": 0.5623, "step": 8183 }, { "epoch": 0.58, "grad_norm": 3.416858777557016, "learning_rate": 3.943324849140534e-06, "loss": 0.5101, "step": 8184 }, { "epoch": 0.58, "grad_norm": 2.0095193645522076, "learning_rate": 3.942201665543274e-06, "loss": 0.5346, "step": 8185 }, { "epoch": 0.58, "grad_norm": 1.9876052903800276, "learning_rate": 3.941078537822504e-06, "loss": 0.53, "step": 8186 }, { "epoch": 0.58, "grad_norm": 1.5162414856273319, "learning_rate": 3.939955466037557e-06, "loss": 0.511, "step": 8187 }, { "epoch": 0.58, "grad_norm": 1.853874385873753, "learning_rate": 3.938832450247754e-06, "loss": 0.5056, "step": 8188 }, { "epoch": 0.58, "grad_norm": 1.9190621700564683, "learning_rate": 3.937709490512417e-06, "loss": 0.4809, "step": 8189 }, { "epoch": 0.58, "grad_norm": 2.394459428968313, "learning_rate": 3.936586586890861e-06, "loss": 0.5567, "step": 8190 }, { "epoch": 0.58, "grad_norm": 1.8494934540255525, "learning_rate": 3.935463739442406e-06, "loss": 0.5695, "step": 8191 }, { "epoch": 0.58, "grad_norm": 1.8509905966360063, "learning_rate": 3.934340948226365e-06, "loss": 0.5252, "step": 8192 }, { "epoch": 0.58, "grad_norm": 1.976052163462318, "learning_rate": 3.933218213302047e-06, "loss": 0.6213, "step": 8193 }, { "epoch": 0.58, "grad_norm": 2.01953877815824, "learning_rate": 3.932095534728753e-06, "loss": 0.5588, "step": 8194 }, { "epoch": 0.58, "grad_norm": 1.8316037971404404, "learning_rate": 3.930972912565796e-06, "loss": 0.5666, "step": 8195 }, { "epoch": 0.58, "grad_norm": 2.0456936604664615, "learning_rate": 3.929850346872471e-06, "loss": 0.513, "step": 8196 }, { "epoch": 0.58, "grad_norm": 2.06454160468138, "learning_rate": 3.928727837708077e-06, "loss": 0.5466, "step": 8197 }, { "epoch": 0.58, "grad_norm": 3.154364225608142, "learning_rate": 3.927605385131907e-06, "loss": 0.4866, "step": 8198 }, { "epoch": 0.58, "grad_norm": 1.9420290084749225, "learning_rate": 3.9264829892032545e-06, "loss": 0.5752, "step": 8199 }, { "epoch": 0.58, "grad_norm": 1.870452377406962, "learning_rate": 3.925360649981409e-06, "loss": 0.5503, "step": 8200 }, { "epoch": 0.58, "grad_norm": 2.458021597221019, "learning_rate": 3.924238367525655e-06, "loss": 0.5456, "step": 8201 }, { "epoch": 0.58, "grad_norm": 2.1538717962533833, "learning_rate": 3.9231161418952755e-06, "loss": 0.5109, "step": 8202 }, { "epoch": 0.58, "grad_norm": 1.8092056277977968, "learning_rate": 3.921993973149551e-06, "loss": 0.5592, "step": 8203 }, { "epoch": 0.58, "grad_norm": 1.9580695054749024, "learning_rate": 3.920871861347757e-06, "loss": 0.5331, "step": 8204 }, { "epoch": 0.58, "grad_norm": 2.786130902803287, "learning_rate": 3.919749806549168e-06, "loss": 0.5377, "step": 8205 }, { "epoch": 0.58, "grad_norm": 2.07062162550016, "learning_rate": 3.918627808813054e-06, "loss": 0.5918, "step": 8206 }, { "epoch": 0.58, "grad_norm": 1.95724028687109, "learning_rate": 3.917505868198686e-06, "loss": 0.5238, "step": 8207 }, { "epoch": 0.58, "grad_norm": 1.5567314298832478, "learning_rate": 3.916383984765324e-06, "loss": 0.4847, "step": 8208 }, { "epoch": 0.58, "grad_norm": 2.9481423916673912, "learning_rate": 3.915262158572231e-06, "loss": 0.57, "step": 8209 }, { "epoch": 0.58, "grad_norm": 0.7676161373272865, "learning_rate": 3.914140389678668e-06, "loss": 0.4419, "step": 8210 }, { "epoch": 0.58, "grad_norm": 2.1262314672643328, "learning_rate": 3.913018678143889e-06, "loss": 0.5097, "step": 8211 }, { "epoch": 0.58, "grad_norm": 2.5121900912802557, "learning_rate": 3.911897024027145e-06, "loss": 0.4887, "step": 8212 }, { "epoch": 0.58, "grad_norm": 3.275970860385759, "learning_rate": 3.910775427387688e-06, "loss": 0.5552, "step": 8213 }, { "epoch": 0.58, "grad_norm": 2.1275808481834746, "learning_rate": 3.909653888284763e-06, "loss": 0.5601, "step": 8214 }, { "epoch": 0.58, "grad_norm": 1.6880901128399293, "learning_rate": 3.908532406777615e-06, "loss": 0.5209, "step": 8215 }, { "epoch": 0.58, "grad_norm": 1.6858984368705028, "learning_rate": 3.907410982925483e-06, "loss": 0.5146, "step": 8216 }, { "epoch": 0.58, "grad_norm": 1.5987749931612856, "learning_rate": 3.906289616787607e-06, "loss": 0.5245, "step": 8217 }, { "epoch": 0.58, "grad_norm": 1.7412792848397713, "learning_rate": 3.9051683084232184e-06, "loss": 0.5087, "step": 8218 }, { "epoch": 0.58, "grad_norm": 1.661788750501398, "learning_rate": 3.904047057891548e-06, "loss": 0.5032, "step": 8219 }, { "epoch": 0.58, "grad_norm": 1.6137403853968162, "learning_rate": 3.902925865251827e-06, "loss": 0.5081, "step": 8220 }, { "epoch": 0.58, "grad_norm": 1.7224014946367976, "learning_rate": 3.901804730563279e-06, "loss": 0.5126, "step": 8221 }, { "epoch": 0.58, "grad_norm": 0.7067455959940929, "learning_rate": 3.900683653885127e-06, "loss": 0.4342, "step": 8222 }, { "epoch": 0.58, "grad_norm": 2.1977703535211193, "learning_rate": 3.899562635276589e-06, "loss": 0.5098, "step": 8223 }, { "epoch": 0.58, "grad_norm": 2.694149771663255, "learning_rate": 3.89844167479688e-06, "loss": 0.5225, "step": 8224 }, { "epoch": 0.58, "grad_norm": 1.6586112243199327, "learning_rate": 3.8973207725052165e-06, "loss": 0.5217, "step": 8225 }, { "epoch": 0.58, "grad_norm": 1.958244641641531, "learning_rate": 3.896199928460806e-06, "loss": 0.4828, "step": 8226 }, { "epoch": 0.58, "grad_norm": 2.1756423610665876, "learning_rate": 3.895079142722854e-06, "loss": 0.524, "step": 8227 }, { "epoch": 0.58, "grad_norm": 1.8397552889569622, "learning_rate": 3.893958415350565e-06, "loss": 0.5664, "step": 8228 }, { "epoch": 0.58, "grad_norm": 2.477548116569571, "learning_rate": 3.8928377464031425e-06, "loss": 0.5446, "step": 8229 }, { "epoch": 0.58, "grad_norm": 1.5011808080842939, "learning_rate": 3.891717135939782e-06, "loss": 0.4866, "step": 8230 }, { "epoch": 0.58, "grad_norm": 1.7289705967649056, "learning_rate": 3.890596584019675e-06, "loss": 0.4996, "step": 8231 }, { "epoch": 0.58, "grad_norm": 2.1823279267558355, "learning_rate": 3.889476090702018e-06, "loss": 0.607, "step": 8232 }, { "epoch": 0.58, "grad_norm": 1.6457441889339104, "learning_rate": 3.888355656045996e-06, "loss": 0.5286, "step": 8233 }, { "epoch": 0.58, "grad_norm": 2.071570565043589, "learning_rate": 3.887235280110795e-06, "loss": 0.5699, "step": 8234 }, { "epoch": 0.58, "grad_norm": 1.934879707710038, "learning_rate": 3.886114962955597e-06, "loss": 0.4871, "step": 8235 }, { "epoch": 0.58, "grad_norm": 2.235639303660988, "learning_rate": 3.8849947046395805e-06, "loss": 0.5304, "step": 8236 }, { "epoch": 0.58, "grad_norm": 1.506300190173808, "learning_rate": 3.883874505221924e-06, "loss": 0.5742, "step": 8237 }, { "epoch": 0.58, "grad_norm": 1.9653894861772, "learning_rate": 3.882754364761797e-06, "loss": 0.5572, "step": 8238 }, { "epoch": 0.58, "grad_norm": 0.6594160132622261, "learning_rate": 3.8816342833183685e-06, "loss": 0.4012, "step": 8239 }, { "epoch": 0.58, "grad_norm": 1.6913808797206589, "learning_rate": 3.880514260950809e-06, "loss": 0.5427, "step": 8240 }, { "epoch": 0.58, "grad_norm": 1.5088933318397555, "learning_rate": 3.879394297718279e-06, "loss": 0.5497, "step": 8241 }, { "epoch": 0.58, "grad_norm": 1.6910703261078577, "learning_rate": 3.878274393679939e-06, "loss": 0.4862, "step": 8242 }, { "epoch": 0.58, "grad_norm": 1.5936672063997535, "learning_rate": 3.877154548894944e-06, "loss": 0.5143, "step": 8243 }, { "epoch": 0.59, "grad_norm": 2.1814327725729994, "learning_rate": 3.876034763422455e-06, "loss": 0.4788, "step": 8244 }, { "epoch": 0.59, "grad_norm": 2.787002400771939, "learning_rate": 3.874915037321617e-06, "loss": 0.4721, "step": 8245 }, { "epoch": 0.59, "grad_norm": 2.4381711661193552, "learning_rate": 3.873795370651576e-06, "loss": 0.4593, "step": 8246 }, { "epoch": 0.59, "grad_norm": 1.6609968205895653, "learning_rate": 3.872675763471483e-06, "loss": 0.5128, "step": 8247 }, { "epoch": 0.59, "grad_norm": 1.7655872912203199, "learning_rate": 3.871556215840475e-06, "loss": 0.4876, "step": 8248 }, { "epoch": 0.59, "grad_norm": 1.7096255553307074, "learning_rate": 3.870436727817691e-06, "loss": 0.5302, "step": 8249 }, { "epoch": 0.59, "grad_norm": 1.6639111687193957, "learning_rate": 3.869317299462267e-06, "loss": 0.5435, "step": 8250 }, { "epoch": 0.59, "grad_norm": 1.6776739576699145, "learning_rate": 3.868197930833334e-06, "loss": 0.52, "step": 8251 }, { "epoch": 0.59, "grad_norm": 2.0742309930945333, "learning_rate": 3.867078621990022e-06, "loss": 0.5005, "step": 8252 }, { "epoch": 0.59, "grad_norm": 1.643471147806347, "learning_rate": 3.865959372991457e-06, "loss": 0.5101, "step": 8253 }, { "epoch": 0.59, "grad_norm": 0.695603715354903, "learning_rate": 3.864840183896758e-06, "loss": 0.4364, "step": 8254 }, { "epoch": 0.59, "grad_norm": 2.0435943324578845, "learning_rate": 3.863721054765048e-06, "loss": 0.5438, "step": 8255 }, { "epoch": 0.59, "grad_norm": 1.482878781930355, "learning_rate": 3.862601985655442e-06, "loss": 0.4802, "step": 8256 }, { "epoch": 0.59, "grad_norm": 1.604015070653445, "learning_rate": 3.8614829766270545e-06, "loss": 0.5162, "step": 8257 }, { "epoch": 0.59, "grad_norm": 1.734114959345256, "learning_rate": 3.860364027738992e-06, "loss": 0.5097, "step": 8258 }, { "epoch": 0.59, "grad_norm": 0.7437754843269132, "learning_rate": 3.859245139050364e-06, "loss": 0.4449, "step": 8259 }, { "epoch": 0.59, "grad_norm": 1.770670914863751, "learning_rate": 3.858126310620274e-06, "loss": 0.5197, "step": 8260 }, { "epoch": 0.59, "grad_norm": 1.661363983153688, "learning_rate": 3.857007542507822e-06, "loss": 0.5458, "step": 8261 }, { "epoch": 0.59, "grad_norm": 1.6369956665577876, "learning_rate": 3.855888834772102e-06, "loss": 0.5036, "step": 8262 }, { "epoch": 0.59, "grad_norm": 1.836689503021629, "learning_rate": 3.854770187472213e-06, "loss": 0.53, "step": 8263 }, { "epoch": 0.59, "grad_norm": 1.5140100630273194, "learning_rate": 3.8536516006672415e-06, "loss": 0.539, "step": 8264 }, { "epoch": 0.59, "grad_norm": 1.6029930742422214, "learning_rate": 3.852533074416277e-06, "loss": 0.5154, "step": 8265 }, { "epoch": 0.59, "grad_norm": 0.7061318499354393, "learning_rate": 3.8514146087784035e-06, "loss": 0.4316, "step": 8266 }, { "epoch": 0.59, "grad_norm": 1.5168484657272634, "learning_rate": 3.850296203812704e-06, "loss": 0.5231, "step": 8267 }, { "epoch": 0.59, "grad_norm": 2.001980604059304, "learning_rate": 3.849177859578254e-06, "loss": 0.5258, "step": 8268 }, { "epoch": 0.59, "grad_norm": 1.8790081532355276, "learning_rate": 3.848059576134128e-06, "loss": 0.6054, "step": 8269 }, { "epoch": 0.59, "grad_norm": 2.0142043803420604, "learning_rate": 3.8469413535394e-06, "loss": 0.5118, "step": 8270 }, { "epoch": 0.59, "grad_norm": 2.496804308002027, "learning_rate": 3.8458231918531354e-06, "loss": 0.4693, "step": 8271 }, { "epoch": 0.59, "grad_norm": 1.5535138433768152, "learning_rate": 3.844705091134401e-06, "loss": 0.5256, "step": 8272 }, { "epoch": 0.59, "grad_norm": 1.9083851334242, "learning_rate": 3.8435870514422576e-06, "loss": 0.5789, "step": 8273 }, { "epoch": 0.59, "grad_norm": 2.513027750974792, "learning_rate": 3.842469072835766e-06, "loss": 0.559, "step": 8274 }, { "epoch": 0.59, "grad_norm": 1.811181677163189, "learning_rate": 3.841351155373981e-06, "loss": 0.5381, "step": 8275 }, { "epoch": 0.59, "grad_norm": 2.094791418287964, "learning_rate": 3.840233299115954e-06, "loss": 0.5158, "step": 8276 }, { "epoch": 0.59, "grad_norm": 1.6680064740146152, "learning_rate": 3.839115504120731e-06, "loss": 0.5369, "step": 8277 }, { "epoch": 0.59, "grad_norm": 1.5629202866297183, "learning_rate": 3.8379977704473635e-06, "loss": 0.4823, "step": 8278 }, { "epoch": 0.59, "grad_norm": 1.5497150034102116, "learning_rate": 3.836880098154889e-06, "loss": 0.4512, "step": 8279 }, { "epoch": 0.59, "grad_norm": 1.8742229183471364, "learning_rate": 3.835762487302349e-06, "loss": 0.5399, "step": 8280 }, { "epoch": 0.59, "grad_norm": 1.4218655138175456, "learning_rate": 3.8346449379487815e-06, "loss": 0.5031, "step": 8281 }, { "epoch": 0.59, "grad_norm": 1.6887971374776853, "learning_rate": 3.8335274501532165e-06, "loss": 0.5506, "step": 8282 }, { "epoch": 0.59, "grad_norm": 1.6947331197886903, "learning_rate": 3.832410023974685e-06, "loss": 0.5136, "step": 8283 }, { "epoch": 0.59, "grad_norm": 2.319672675951931, "learning_rate": 3.831292659472209e-06, "loss": 0.5602, "step": 8284 }, { "epoch": 0.59, "grad_norm": 2.5012915003054204, "learning_rate": 3.830175356704818e-06, "loss": 0.5159, "step": 8285 }, { "epoch": 0.59, "grad_norm": 1.451820205677297, "learning_rate": 3.829058115731527e-06, "loss": 0.5224, "step": 8286 }, { "epoch": 0.59, "grad_norm": 1.515511510614305, "learning_rate": 3.827940936611355e-06, "loss": 0.5352, "step": 8287 }, { "epoch": 0.59, "grad_norm": 1.9916843541890212, "learning_rate": 3.826823819403313e-06, "loss": 0.5184, "step": 8288 }, { "epoch": 0.59, "grad_norm": 3.0479672820660455, "learning_rate": 3.825706764166413e-06, "loss": 0.5348, "step": 8289 }, { "epoch": 0.59, "grad_norm": 2.006308031447736, "learning_rate": 3.824589770959661e-06, "loss": 0.5571, "step": 8290 }, { "epoch": 0.59, "grad_norm": 1.7815859679696167, "learning_rate": 3.823472839842061e-06, "loss": 0.5478, "step": 8291 }, { "epoch": 0.59, "grad_norm": 2.019881105360346, "learning_rate": 3.82235597087261e-06, "loss": 0.6519, "step": 8292 }, { "epoch": 0.59, "grad_norm": 1.7803367682114086, "learning_rate": 3.821239164110308e-06, "loss": 0.5178, "step": 8293 }, { "epoch": 0.59, "grad_norm": 2.343508459924527, "learning_rate": 3.820122419614147e-06, "loss": 0.5985, "step": 8294 }, { "epoch": 0.59, "grad_norm": 1.4659168781332659, "learning_rate": 3.819005737443117e-06, "loss": 0.4786, "step": 8295 }, { "epoch": 0.59, "grad_norm": 1.9765844948470523, "learning_rate": 3.817889117656208e-06, "loss": 0.5904, "step": 8296 }, { "epoch": 0.59, "grad_norm": 2.0761835756220006, "learning_rate": 3.816772560312401e-06, "loss": 0.5514, "step": 8297 }, { "epoch": 0.59, "grad_norm": 2.0785028048004603, "learning_rate": 3.815656065470677e-06, "loss": 0.4667, "step": 8298 }, { "epoch": 0.59, "grad_norm": 1.8197232023299037, "learning_rate": 3.8145396331900104e-06, "loss": 0.5123, "step": 8299 }, { "epoch": 0.59, "grad_norm": 1.8381055884609208, "learning_rate": 3.813423263529379e-06, "loss": 0.5883, "step": 8300 }, { "epoch": 0.59, "grad_norm": 1.6552915090780767, "learning_rate": 3.812306956547751e-06, "loss": 0.5016, "step": 8301 }, { "epoch": 0.59, "grad_norm": 1.4162336012327497, "learning_rate": 3.811190712304095e-06, "loss": 0.502, "step": 8302 }, { "epoch": 0.59, "grad_norm": 0.697693525212653, "learning_rate": 3.8100745308573713e-06, "loss": 0.4542, "step": 8303 }, { "epoch": 0.59, "grad_norm": 1.7718962937211127, "learning_rate": 3.8089584122665454e-06, "loss": 0.5464, "step": 8304 }, { "epoch": 0.59, "grad_norm": 1.5841583450388192, "learning_rate": 3.807842356590572e-06, "loss": 0.5071, "step": 8305 }, { "epoch": 0.59, "grad_norm": 1.9668260452289006, "learning_rate": 3.8067263638884045e-06, "loss": 0.5248, "step": 8306 }, { "epoch": 0.59, "grad_norm": 1.7406083781196189, "learning_rate": 3.805610434218992e-06, "loss": 0.5199, "step": 8307 }, { "epoch": 0.59, "grad_norm": 1.6849128273347758, "learning_rate": 3.804494567641284e-06, "loss": 0.5718, "step": 8308 }, { "epoch": 0.59, "grad_norm": 1.9790809425837808, "learning_rate": 3.8033787642142256e-06, "loss": 0.5441, "step": 8309 }, { "epoch": 0.59, "grad_norm": 1.5270833507466557, "learning_rate": 3.8022630239967534e-06, "loss": 0.5503, "step": 8310 }, { "epoch": 0.59, "grad_norm": 0.7942538612346829, "learning_rate": 3.801147347047809e-06, "loss": 0.4397, "step": 8311 }, { "epoch": 0.59, "grad_norm": 0.7047998047507246, "learning_rate": 3.800031733426324e-06, "loss": 0.432, "step": 8312 }, { "epoch": 0.59, "grad_norm": 0.7353598170369795, "learning_rate": 3.798916183191228e-06, "loss": 0.4342, "step": 8313 }, { "epoch": 0.59, "grad_norm": 1.8495971007829446, "learning_rate": 3.797800696401448e-06, "loss": 0.6421, "step": 8314 }, { "epoch": 0.59, "grad_norm": 1.7221839557091267, "learning_rate": 3.79668527311591e-06, "loss": 0.5598, "step": 8315 }, { "epoch": 0.59, "grad_norm": 0.7253606851292389, "learning_rate": 3.795569913393533e-06, "loss": 0.4491, "step": 8316 }, { "epoch": 0.59, "grad_norm": 1.9582679962975282, "learning_rate": 3.794454617293235e-06, "loss": 0.5722, "step": 8317 }, { "epoch": 0.59, "grad_norm": 1.6890616922825243, "learning_rate": 3.793339384873927e-06, "loss": 0.5849, "step": 8318 }, { "epoch": 0.59, "grad_norm": 1.641969971442546, "learning_rate": 3.7922242161945233e-06, "loss": 0.5221, "step": 8319 }, { "epoch": 0.59, "grad_norm": 1.6668779998482117, "learning_rate": 3.7911091113139296e-06, "loss": 0.5677, "step": 8320 }, { "epoch": 0.59, "grad_norm": 1.578741142094878, "learning_rate": 3.7899940702910486e-06, "loss": 0.4379, "step": 8321 }, { "epoch": 0.59, "grad_norm": 2.041813190927234, "learning_rate": 3.7888790931847787e-06, "loss": 0.4725, "step": 8322 }, { "epoch": 0.59, "grad_norm": 1.449652314254948, "learning_rate": 3.78776418005402e-06, "loss": 0.4552, "step": 8323 }, { "epoch": 0.59, "grad_norm": 1.7819662784169477, "learning_rate": 3.786649330957666e-06, "loss": 0.5563, "step": 8324 }, { "epoch": 0.59, "grad_norm": 0.7161089929904882, "learning_rate": 3.7855345459546056e-06, "loss": 0.4256, "step": 8325 }, { "epoch": 0.59, "grad_norm": 1.8054122418360026, "learning_rate": 3.784419825103724e-06, "loss": 0.4618, "step": 8326 }, { "epoch": 0.59, "grad_norm": 1.6116894756376345, "learning_rate": 3.7833051684639076e-06, "loss": 0.5044, "step": 8327 }, { "epoch": 0.59, "grad_norm": 1.702375692153408, "learning_rate": 3.782190576094036e-06, "loss": 0.5599, "step": 8328 }, { "epoch": 0.59, "grad_norm": 1.6805549429488584, "learning_rate": 3.781076048052981e-06, "loss": 0.5632, "step": 8329 }, { "epoch": 0.59, "grad_norm": 3.144650573310913, "learning_rate": 3.7799615843996227e-06, "loss": 0.5985, "step": 8330 }, { "epoch": 0.59, "grad_norm": 1.6181933139682563, "learning_rate": 3.7788471851928267e-06, "loss": 0.4989, "step": 8331 }, { "epoch": 0.59, "grad_norm": 2.433780442993537, "learning_rate": 3.77773285049146e-06, "loss": 0.544, "step": 8332 }, { "epoch": 0.59, "grad_norm": 1.7597129717842288, "learning_rate": 3.7766185803543846e-06, "loss": 0.5181, "step": 8333 }, { "epoch": 0.59, "grad_norm": 1.8865596169888008, "learning_rate": 3.7755043748404634e-06, "loss": 0.5611, "step": 8334 }, { "epoch": 0.59, "grad_norm": 2.0768283685765687, "learning_rate": 3.7743902340085497e-06, "loss": 0.5493, "step": 8335 }, { "epoch": 0.59, "grad_norm": 1.737426112080627, "learning_rate": 3.773276157917496e-06, "loss": 0.5497, "step": 8336 }, { "epoch": 0.59, "grad_norm": 1.695639236947354, "learning_rate": 3.7721621466261528e-06, "loss": 0.4545, "step": 8337 }, { "epoch": 0.59, "grad_norm": 2.215743902734911, "learning_rate": 3.7710482001933644e-06, "loss": 0.5225, "step": 8338 }, { "epoch": 0.59, "grad_norm": 1.8474655630518426, "learning_rate": 3.7699343186779765e-06, "loss": 0.4775, "step": 8339 }, { "epoch": 0.59, "grad_norm": 1.9957001625415416, "learning_rate": 3.768820502138825e-06, "loss": 0.532, "step": 8340 }, { "epoch": 0.59, "grad_norm": 1.5092663717156172, "learning_rate": 3.767706750634745e-06, "loss": 0.5464, "step": 8341 }, { "epoch": 0.59, "grad_norm": 1.7909071812188677, "learning_rate": 3.7665930642245716e-06, "loss": 0.5721, "step": 8342 }, { "epoch": 0.59, "grad_norm": 1.848160340738212, "learning_rate": 3.7654794429671324e-06, "loss": 0.5534, "step": 8343 }, { "epoch": 0.59, "grad_norm": 1.696298428278636, "learning_rate": 3.7643658869212496e-06, "loss": 0.528, "step": 8344 }, { "epoch": 0.59, "grad_norm": 1.7882039768489122, "learning_rate": 3.76325239614575e-06, "loss": 0.4966, "step": 8345 }, { "epoch": 0.59, "grad_norm": 2.676168192679192, "learning_rate": 3.762138970699448e-06, "loss": 0.5945, "step": 8346 }, { "epoch": 0.59, "grad_norm": 1.5581946306006955, "learning_rate": 3.761025610641161e-06, "loss": 0.5474, "step": 8347 }, { "epoch": 0.59, "grad_norm": 1.7175856013286779, "learning_rate": 3.759912316029698e-06, "loss": 0.6154, "step": 8348 }, { "epoch": 0.59, "grad_norm": 1.6065692917111996, "learning_rate": 3.7587990869238705e-06, "loss": 0.496, "step": 8349 }, { "epoch": 0.59, "grad_norm": 3.3997262808894453, "learning_rate": 3.757685923382481e-06, "loss": 0.5626, "step": 8350 }, { "epoch": 0.59, "grad_norm": 1.6401445028857886, "learning_rate": 3.7565728254643292e-06, "loss": 0.5989, "step": 8351 }, { "epoch": 0.59, "grad_norm": 0.7164778854079493, "learning_rate": 3.755459793228214e-06, "loss": 0.4465, "step": 8352 }, { "epoch": 0.59, "grad_norm": 1.7868719421654695, "learning_rate": 3.7543468267329296e-06, "loss": 0.5282, "step": 8353 }, { "epoch": 0.59, "grad_norm": 1.8730120549597624, "learning_rate": 3.753233926037268e-06, "loss": 0.5141, "step": 8354 }, { "epoch": 0.59, "grad_norm": 1.7329324824598293, "learning_rate": 3.752121091200015e-06, "loss": 0.5291, "step": 8355 }, { "epoch": 0.59, "grad_norm": 1.7116749710912391, "learning_rate": 3.751008322279952e-06, "loss": 0.5094, "step": 8356 }, { "epoch": 0.59, "grad_norm": 1.8907945600145537, "learning_rate": 3.7498956193358636e-06, "loss": 0.5119, "step": 8357 }, { "epoch": 0.59, "grad_norm": 1.8949801498358252, "learning_rate": 3.7487829824265255e-06, "loss": 0.5254, "step": 8358 }, { "epoch": 0.59, "grad_norm": 1.8236537947525284, "learning_rate": 3.7476704116107066e-06, "loss": 0.5509, "step": 8359 }, { "epoch": 0.59, "grad_norm": 2.024309841428457, "learning_rate": 3.7465579069471813e-06, "loss": 0.5624, "step": 8360 }, { "epoch": 0.59, "grad_norm": 1.8894171708903045, "learning_rate": 3.7454454684947163e-06, "loss": 0.6126, "step": 8361 }, { "epoch": 0.59, "grad_norm": 1.8874540845282881, "learning_rate": 3.744333096312072e-06, "loss": 0.4951, "step": 8362 }, { "epoch": 0.59, "grad_norm": 1.9397125909755997, "learning_rate": 3.7432207904580066e-06, "loss": 0.5501, "step": 8363 }, { "epoch": 0.59, "grad_norm": 2.028917642071207, "learning_rate": 3.7421085509912797e-06, "loss": 0.5372, "step": 8364 }, { "epoch": 0.59, "grad_norm": 1.5973939438244427, "learning_rate": 3.7409963779706415e-06, "loss": 0.4823, "step": 8365 }, { "epoch": 0.59, "grad_norm": 1.6936539483684092, "learning_rate": 3.7398842714548395e-06, "loss": 0.5207, "step": 8366 }, { "epoch": 0.59, "grad_norm": 1.5193164999190856, "learning_rate": 3.73877223150262e-06, "loss": 0.5846, "step": 8367 }, { "epoch": 0.59, "grad_norm": 1.6545796634243624, "learning_rate": 3.737660258172725e-06, "loss": 0.5766, "step": 8368 }, { "epoch": 0.59, "grad_norm": 0.7056881652354713, "learning_rate": 3.7365483515238944e-06, "loss": 0.4164, "step": 8369 }, { "epoch": 0.59, "grad_norm": 1.9885831089063595, "learning_rate": 3.7354365116148594e-06, "loss": 0.5702, "step": 8370 }, { "epoch": 0.59, "grad_norm": 0.7920317266708707, "learning_rate": 3.734324738504352e-06, "loss": 0.4556, "step": 8371 }, { "epoch": 0.59, "grad_norm": 1.4477074450110912, "learning_rate": 3.7332130322511016e-06, "loss": 0.533, "step": 8372 }, { "epoch": 0.59, "grad_norm": 1.4846544533535329, "learning_rate": 3.7321013929138327e-06, "loss": 0.56, "step": 8373 }, { "epoch": 0.59, "grad_norm": 1.6913307592120124, "learning_rate": 3.7309898205512616e-06, "loss": 0.5697, "step": 8374 }, { "epoch": 0.59, "grad_norm": 1.6753812224936273, "learning_rate": 3.7298783152221093e-06, "loss": 0.5388, "step": 8375 }, { "epoch": 0.59, "grad_norm": 1.9052895916415808, "learning_rate": 3.7287668769850886e-06, "loss": 0.5212, "step": 8376 }, { "epoch": 0.59, "grad_norm": 1.8127285585669985, "learning_rate": 3.7276555058989097e-06, "loss": 0.5912, "step": 8377 }, { "epoch": 0.59, "grad_norm": 1.7066600702084245, "learning_rate": 3.7265442020222764e-06, "loss": 0.5529, "step": 8378 }, { "epoch": 0.59, "grad_norm": 1.5739581009317187, "learning_rate": 3.725432965413895e-06, "loss": 0.4921, "step": 8379 }, { "epoch": 0.59, "grad_norm": 1.705446443523998, "learning_rate": 3.7243217961324628e-06, "loss": 0.5053, "step": 8380 }, { "epoch": 0.59, "grad_norm": 1.6674140281736634, "learning_rate": 3.7232106942366752e-06, "loss": 0.5113, "step": 8381 }, { "epoch": 0.59, "grad_norm": 1.6917089778598349, "learning_rate": 3.7220996597852254e-06, "loss": 0.5534, "step": 8382 }, { "epoch": 0.59, "grad_norm": 1.7539293530247697, "learning_rate": 3.7209886928368017e-06, "loss": 0.5664, "step": 8383 }, { "epoch": 0.59, "grad_norm": 1.422622081894532, "learning_rate": 3.71987779345009e-06, "loss": 0.501, "step": 8384 }, { "epoch": 0.6, "grad_norm": 2.087627391855198, "learning_rate": 3.7187669616837705e-06, "loss": 0.5134, "step": 8385 }, { "epoch": 0.6, "grad_norm": 1.88797064324849, "learning_rate": 3.7176561975965202e-06, "loss": 0.4962, "step": 8386 }, { "epoch": 0.6, "grad_norm": 2.1788565232948103, "learning_rate": 3.7165455012470165e-06, "loss": 0.5312, "step": 8387 }, { "epoch": 0.6, "grad_norm": 2.058239975680285, "learning_rate": 3.715434872693927e-06, "loss": 0.5183, "step": 8388 }, { "epoch": 0.6, "grad_norm": 1.7366744121824125, "learning_rate": 3.7143243119959214e-06, "loss": 0.4598, "step": 8389 }, { "epoch": 0.6, "grad_norm": 2.419152513630357, "learning_rate": 3.71321381921166e-06, "loss": 0.499, "step": 8390 }, { "epoch": 0.6, "grad_norm": 1.639980460932389, "learning_rate": 3.7121033943998074e-06, "loss": 0.4867, "step": 8391 }, { "epoch": 0.6, "grad_norm": 1.5698582986150216, "learning_rate": 3.7109930376190174e-06, "loss": 0.4946, "step": 8392 }, { "epoch": 0.6, "grad_norm": 2.119069914497332, "learning_rate": 3.7098827489279406e-06, "loss": 0.4961, "step": 8393 }, { "epoch": 0.6, "grad_norm": 2.039394087216505, "learning_rate": 3.7087725283852304e-06, "loss": 0.5588, "step": 8394 }, { "epoch": 0.6, "grad_norm": 1.3881358199205418, "learning_rate": 3.7076623760495307e-06, "loss": 0.4585, "step": 8395 }, { "epoch": 0.6, "grad_norm": 1.9353655851044937, "learning_rate": 3.7065522919794823e-06, "loss": 0.5521, "step": 8396 }, { "epoch": 0.6, "grad_norm": 2.6054465556511817, "learning_rate": 3.705442276233725e-06, "loss": 0.5329, "step": 8397 }, { "epoch": 0.6, "grad_norm": 1.7647525978053709, "learning_rate": 3.704332328870892e-06, "loss": 0.5223, "step": 8398 }, { "epoch": 0.6, "grad_norm": 1.9539717477466036, "learning_rate": 3.7032224499496184e-06, "loss": 0.5607, "step": 8399 }, { "epoch": 0.6, "grad_norm": 0.6742280056343347, "learning_rate": 3.7021126395285277e-06, "loss": 0.4327, "step": 8400 }, { "epoch": 0.6, "grad_norm": 0.7448518225024098, "learning_rate": 3.701002897666244e-06, "loss": 0.4272, "step": 8401 }, { "epoch": 0.6, "grad_norm": 0.7267983605003951, "learning_rate": 3.6998932244213908e-06, "loss": 0.4098, "step": 8402 }, { "epoch": 0.6, "grad_norm": 2.08111215081883, "learning_rate": 3.698783619852582e-06, "loss": 0.5795, "step": 8403 }, { "epoch": 0.6, "grad_norm": 0.716560104457119, "learning_rate": 3.697674084018432e-06, "loss": 0.4383, "step": 8404 }, { "epoch": 0.6, "grad_norm": 1.891634625591287, "learning_rate": 3.6965646169775483e-06, "loss": 0.5401, "step": 8405 }, { "epoch": 0.6, "grad_norm": 1.9008643322901941, "learning_rate": 3.6954552187885394e-06, "loss": 0.5309, "step": 8406 }, { "epoch": 0.6, "grad_norm": 1.6939699912376316, "learning_rate": 3.694345889510007e-06, "loss": 0.5307, "step": 8407 }, { "epoch": 0.6, "grad_norm": 1.5917040169994547, "learning_rate": 3.6932366292005463e-06, "loss": 0.53, "step": 8408 }, { "epoch": 0.6, "grad_norm": 0.6799297108008673, "learning_rate": 3.6921274379187572e-06, "loss": 0.4215, "step": 8409 }, { "epoch": 0.6, "grad_norm": 1.6363206927568872, "learning_rate": 3.691018315723228e-06, "loss": 0.5149, "step": 8410 }, { "epoch": 0.6, "grad_norm": 2.761710309927765, "learning_rate": 3.6899092626725462e-06, "loss": 0.6145, "step": 8411 }, { "epoch": 0.6, "grad_norm": 2.3064860362171906, "learning_rate": 3.6888002788252952e-06, "loss": 0.5153, "step": 8412 }, { "epoch": 0.6, "grad_norm": 1.6062453140056026, "learning_rate": 3.687691364240059e-06, "loss": 0.477, "step": 8413 }, { "epoch": 0.6, "grad_norm": 2.3072979958512025, "learning_rate": 3.6865825189754113e-06, "loss": 0.6136, "step": 8414 }, { "epoch": 0.6, "grad_norm": 1.384993033451164, "learning_rate": 3.685473743089925e-06, "loss": 0.5798, "step": 8415 }, { "epoch": 0.6, "grad_norm": 1.7314687943944427, "learning_rate": 3.684365036642168e-06, "loss": 0.5646, "step": 8416 }, { "epoch": 0.6, "grad_norm": 1.6076965723492733, "learning_rate": 3.68325639969071e-06, "loss": 0.4692, "step": 8417 }, { "epoch": 0.6, "grad_norm": 2.7114175178816224, "learning_rate": 3.6821478322941095e-06, "loss": 0.5557, "step": 8418 }, { "epoch": 0.6, "grad_norm": 1.6995126318004241, "learning_rate": 3.681039334510927e-06, "loss": 0.5203, "step": 8419 }, { "epoch": 0.6, "grad_norm": 0.7066413086902869, "learning_rate": 3.6799309063997142e-06, "loss": 0.4268, "step": 8420 }, { "epoch": 0.6, "grad_norm": 0.7493155069207863, "learning_rate": 3.6788225480190255e-06, "loss": 0.4721, "step": 8421 }, { "epoch": 0.6, "grad_norm": 1.5404377534119515, "learning_rate": 3.6777142594274074e-06, "loss": 0.5342, "step": 8422 }, { "epoch": 0.6, "grad_norm": 3.1484231457794656, "learning_rate": 3.6766060406833997e-06, "loss": 0.5329, "step": 8423 }, { "epoch": 0.6, "grad_norm": 2.2211614934160675, "learning_rate": 3.6754978918455475e-06, "loss": 0.5766, "step": 8424 }, { "epoch": 0.6, "grad_norm": 1.7827922027899583, "learning_rate": 3.6743898129723856e-06, "loss": 0.5292, "step": 8425 }, { "epoch": 0.6, "grad_norm": 1.9551897769164317, "learning_rate": 3.673281804122444e-06, "loss": 0.5716, "step": 8426 }, { "epoch": 0.6, "grad_norm": 1.640424045645493, "learning_rate": 3.6721738653542518e-06, "loss": 0.5679, "step": 8427 }, { "epoch": 0.6, "grad_norm": 1.5303126687199309, "learning_rate": 3.671065996726337e-06, "loss": 0.5136, "step": 8428 }, { "epoch": 0.6, "grad_norm": 1.7623890696198723, "learning_rate": 3.66995819829722e-06, "loss": 0.5366, "step": 8429 }, { "epoch": 0.6, "grad_norm": 1.4763085237360196, "learning_rate": 3.668850470125417e-06, "loss": 0.4984, "step": 8430 }, { "epoch": 0.6, "grad_norm": 1.7007394984924344, "learning_rate": 3.6677428122694415e-06, "loss": 0.5736, "step": 8431 }, { "epoch": 0.6, "grad_norm": 1.7325666740469503, "learning_rate": 3.6666352247878063e-06, "loss": 0.5147, "step": 8432 }, { "epoch": 0.6, "grad_norm": 1.7311144202354813, "learning_rate": 3.665527707739016e-06, "loss": 0.5637, "step": 8433 }, { "epoch": 0.6, "grad_norm": 1.8076587874058818, "learning_rate": 3.6644202611815748e-06, "loss": 0.5383, "step": 8434 }, { "epoch": 0.6, "grad_norm": 1.6214910120211192, "learning_rate": 3.6633128851739786e-06, "loss": 0.5175, "step": 8435 }, { "epoch": 0.6, "grad_norm": 1.6736829618330555, "learning_rate": 3.662205579774728e-06, "loss": 0.5186, "step": 8436 }, { "epoch": 0.6, "grad_norm": 0.6418234745547892, "learning_rate": 3.661098345042311e-06, "loss": 0.4493, "step": 8437 }, { "epoch": 0.6, "grad_norm": 1.9413842531911274, "learning_rate": 3.659991181035217e-06, "loss": 0.4986, "step": 8438 }, { "epoch": 0.6, "grad_norm": 1.725408384594458, "learning_rate": 3.658884087811927e-06, "loss": 0.5232, "step": 8439 }, { "epoch": 0.6, "grad_norm": 1.9657218231433107, "learning_rate": 3.6577770654309243e-06, "loss": 0.5422, "step": 8440 }, { "epoch": 0.6, "grad_norm": 1.767906565628731, "learning_rate": 3.6566701139506865e-06, "loss": 0.539, "step": 8441 }, { "epoch": 0.6, "grad_norm": 2.0079875035507406, "learning_rate": 3.655563233429683e-06, "loss": 0.5316, "step": 8442 }, { "epoch": 0.6, "grad_norm": 1.539904462720375, "learning_rate": 3.6544564239263867e-06, "loss": 0.5269, "step": 8443 }, { "epoch": 0.6, "grad_norm": 1.55200659282388, "learning_rate": 3.6533496854992623e-06, "loss": 0.5628, "step": 8444 }, { "epoch": 0.6, "grad_norm": 1.6830297008552193, "learning_rate": 3.6522430182067704e-06, "loss": 0.49, "step": 8445 }, { "epoch": 0.6, "grad_norm": 1.8372938741710332, "learning_rate": 3.6511364221073668e-06, "loss": 0.52, "step": 8446 }, { "epoch": 0.6, "grad_norm": 1.6127534925097204, "learning_rate": 3.6500298972595107e-06, "loss": 0.4968, "step": 8447 }, { "epoch": 0.6, "grad_norm": 0.6861653952254354, "learning_rate": 3.6489234437216474e-06, "loss": 0.4254, "step": 8448 }, { "epoch": 0.6, "grad_norm": 1.609156694371853, "learning_rate": 3.6478170615522278e-06, "loss": 0.5012, "step": 8449 }, { "epoch": 0.6, "grad_norm": 1.8850152859968103, "learning_rate": 3.6467107508096906e-06, "loss": 0.5555, "step": 8450 }, { "epoch": 0.6, "grad_norm": 2.551089139330252, "learning_rate": 3.645604511552479e-06, "loss": 0.5049, "step": 8451 }, { "epoch": 0.6, "grad_norm": 2.479701489216724, "learning_rate": 3.644498343839027e-06, "loss": 0.4981, "step": 8452 }, { "epoch": 0.6, "grad_norm": 1.7036964393343232, "learning_rate": 3.643392247727765e-06, "loss": 0.5241, "step": 8453 }, { "epoch": 0.6, "grad_norm": 1.5354638264525533, "learning_rate": 3.642286223277119e-06, "loss": 0.4589, "step": 8454 }, { "epoch": 0.6, "grad_norm": 1.6714423346925884, "learning_rate": 3.641180270545517e-06, "loss": 0.4794, "step": 8455 }, { "epoch": 0.6, "grad_norm": 1.9452121077743023, "learning_rate": 3.6400743895913775e-06, "loss": 0.5469, "step": 8456 }, { "epoch": 0.6, "grad_norm": 1.7468475271660806, "learning_rate": 3.6389685804731155e-06, "loss": 0.5132, "step": 8457 }, { "epoch": 0.6, "grad_norm": 1.878718162103846, "learning_rate": 3.637862843249147e-06, "loss": 0.5026, "step": 8458 }, { "epoch": 0.6, "grad_norm": 1.9539154861311303, "learning_rate": 3.636757177977878e-06, "loss": 0.5254, "step": 8459 }, { "epoch": 0.6, "grad_norm": 1.5859930834860356, "learning_rate": 3.635651584717715e-06, "loss": 0.5189, "step": 8460 }, { "epoch": 0.6, "grad_norm": 1.9967485788384574, "learning_rate": 3.6345460635270557e-06, "loss": 0.5016, "step": 8461 }, { "epoch": 0.6, "grad_norm": 0.7220680379633276, "learning_rate": 3.633440614464302e-06, "loss": 0.4159, "step": 8462 }, { "epoch": 0.6, "grad_norm": 2.164131922923717, "learning_rate": 3.6323352375878453e-06, "loss": 0.5565, "step": 8463 }, { "epoch": 0.6, "grad_norm": 1.6124963219654278, "learning_rate": 3.6312299329560764e-06, "loss": 0.4642, "step": 8464 }, { "epoch": 0.6, "grad_norm": 2.091709286107164, "learning_rate": 3.630124700627378e-06, "loss": 0.5019, "step": 8465 }, { "epoch": 0.6, "grad_norm": 1.6241556638302501, "learning_rate": 3.629019540660137e-06, "loss": 0.5338, "step": 8466 }, { "epoch": 0.6, "grad_norm": 1.5007975167723544, "learning_rate": 3.6279144531127293e-06, "loss": 0.5408, "step": 8467 }, { "epoch": 0.6, "grad_norm": 1.754604726653088, "learning_rate": 3.626809438043528e-06, "loss": 0.5354, "step": 8468 }, { "epoch": 0.6, "grad_norm": 1.816691222621599, "learning_rate": 3.6257044955109055e-06, "loss": 0.5661, "step": 8469 }, { "epoch": 0.6, "grad_norm": 1.706938793068767, "learning_rate": 3.6245996255732285e-06, "loss": 0.5337, "step": 8470 }, { "epoch": 0.6, "grad_norm": 0.7720404020705618, "learning_rate": 3.6234948282888615e-06, "loss": 0.4589, "step": 8471 }, { "epoch": 0.6, "grad_norm": 1.685261718786503, "learning_rate": 3.622390103716159e-06, "loss": 0.5634, "step": 8472 }, { "epoch": 0.6, "grad_norm": 1.7895304556248426, "learning_rate": 3.6212854519134812e-06, "loss": 0.531, "step": 8473 }, { "epoch": 0.6, "grad_norm": 1.8358878784059525, "learning_rate": 3.6201808729391776e-06, "loss": 0.514, "step": 8474 }, { "epoch": 0.6, "grad_norm": 1.7046440689926985, "learning_rate": 3.619076366851596e-06, "loss": 0.499, "step": 8475 }, { "epoch": 0.6, "grad_norm": 2.0540247521066135, "learning_rate": 3.617971933709078e-06, "loss": 0.5762, "step": 8476 }, { "epoch": 0.6, "grad_norm": 2.097317807870029, "learning_rate": 3.6168675735699664e-06, "loss": 0.543, "step": 8477 }, { "epoch": 0.6, "grad_norm": 1.5410546606354638, "learning_rate": 3.615763286492596e-06, "loss": 0.476, "step": 8478 }, { "epoch": 0.6, "grad_norm": 0.6841170620291218, "learning_rate": 3.6146590725353003e-06, "loss": 0.438, "step": 8479 }, { "epoch": 0.6, "grad_norm": 1.9975002288326282, "learning_rate": 3.613554931756405e-06, "loss": 0.5533, "step": 8480 }, { "epoch": 0.6, "grad_norm": 1.7175342312444326, "learning_rate": 3.6124508642142377e-06, "loss": 0.4976, "step": 8481 }, { "epoch": 0.6, "grad_norm": 2.1019451448738455, "learning_rate": 3.6113468699671174e-06, "loss": 0.5386, "step": 8482 }, { "epoch": 0.6, "grad_norm": 1.8112712426137274, "learning_rate": 3.61024294907336e-06, "loss": 0.5583, "step": 8483 }, { "epoch": 0.6, "grad_norm": 1.6233361508356663, "learning_rate": 3.6091391015912787e-06, "loss": 0.4899, "step": 8484 }, { "epoch": 0.6, "grad_norm": 1.7835638076941047, "learning_rate": 3.6080353275791837e-06, "loss": 0.4973, "step": 8485 }, { "epoch": 0.6, "grad_norm": 1.8464443473124268, "learning_rate": 3.606931627095379e-06, "loss": 0.5419, "step": 8486 }, { "epoch": 0.6, "grad_norm": 1.6703401583103825, "learning_rate": 3.605828000198165e-06, "loss": 0.5133, "step": 8487 }, { "epoch": 0.6, "grad_norm": 1.7300195235556997, "learning_rate": 3.604724446945842e-06, "loss": 0.5835, "step": 8488 }, { "epoch": 0.6, "grad_norm": 1.7206951598385563, "learning_rate": 3.6036209673967016e-06, "loss": 0.478, "step": 8489 }, { "epoch": 0.6, "grad_norm": 1.798518877342212, "learning_rate": 3.6025175616090335e-06, "loss": 0.4517, "step": 8490 }, { "epoch": 0.6, "grad_norm": 1.7483194208158377, "learning_rate": 3.6014142296411207e-06, "loss": 0.5648, "step": 8491 }, { "epoch": 0.6, "grad_norm": 1.523055153038924, "learning_rate": 3.6003109715512484e-06, "loss": 0.461, "step": 8492 }, { "epoch": 0.6, "grad_norm": 1.6793477648911694, "learning_rate": 3.599207787397695e-06, "loss": 0.5214, "step": 8493 }, { "epoch": 0.6, "grad_norm": 1.6538908914919823, "learning_rate": 3.5981046772387322e-06, "loss": 0.5179, "step": 8494 }, { "epoch": 0.6, "grad_norm": 2.519327032008648, "learning_rate": 3.5970016411326292e-06, "loss": 0.4738, "step": 8495 }, { "epoch": 0.6, "grad_norm": 0.7130394859550871, "learning_rate": 3.595898679137655e-06, "loss": 0.4313, "step": 8496 }, { "epoch": 0.6, "grad_norm": 1.5260350057255188, "learning_rate": 3.59479579131207e-06, "loss": 0.5527, "step": 8497 }, { "epoch": 0.6, "grad_norm": 1.6664911053351654, "learning_rate": 3.593692977714133e-06, "loss": 0.5272, "step": 8498 }, { "epoch": 0.6, "grad_norm": 1.496888040088009, "learning_rate": 3.5925902384020968e-06, "loss": 0.4815, "step": 8499 }, { "epoch": 0.6, "grad_norm": 1.7383545468342498, "learning_rate": 3.5914875734342136e-06, "loss": 0.4434, "step": 8500 }, { "epoch": 0.6, "grad_norm": 5.489685136666059, "learning_rate": 3.5903849828687305e-06, "loss": 0.4999, "step": 8501 }, { "epoch": 0.6, "grad_norm": 1.764116540152303, "learning_rate": 3.5892824667638893e-06, "loss": 0.5001, "step": 8502 }, { "epoch": 0.6, "grad_norm": 1.8918923410379185, "learning_rate": 3.5881800251779257e-06, "loss": 0.5346, "step": 8503 }, { "epoch": 0.6, "grad_norm": 1.957370527399626, "learning_rate": 3.5870776581690796e-06, "loss": 0.5316, "step": 8504 }, { "epoch": 0.6, "grad_norm": 2.5442222836100274, "learning_rate": 3.5859753657955795e-06, "loss": 0.4745, "step": 8505 }, { "epoch": 0.6, "grad_norm": 1.6413320543355392, "learning_rate": 3.5848731481156494e-06, "loss": 0.5524, "step": 8506 }, { "epoch": 0.6, "grad_norm": 1.807254487198008, "learning_rate": 3.583771005187516e-06, "loss": 0.5704, "step": 8507 }, { "epoch": 0.6, "grad_norm": 2.2078292157651527, "learning_rate": 3.5826689370693977e-06, "loss": 0.5403, "step": 8508 }, { "epoch": 0.6, "grad_norm": 1.9673840083137735, "learning_rate": 3.581566943819509e-06, "loss": 0.5564, "step": 8509 }, { "epoch": 0.6, "grad_norm": 1.9633379145802055, "learning_rate": 3.5804650254960584e-06, "loss": 0.5163, "step": 8510 }, { "epoch": 0.6, "grad_norm": 1.8161809746126951, "learning_rate": 3.5793631821572572e-06, "loss": 0.4922, "step": 8511 }, { "epoch": 0.6, "grad_norm": 1.7542027044361765, "learning_rate": 3.5782614138613065e-06, "loss": 0.5456, "step": 8512 }, { "epoch": 0.6, "grad_norm": 1.7504002409644621, "learning_rate": 3.5771597206664043e-06, "loss": 0.5623, "step": 8513 }, { "epoch": 0.6, "grad_norm": 1.7733740718895443, "learning_rate": 3.576058102630747e-06, "loss": 0.5053, "step": 8514 }, { "epoch": 0.6, "grad_norm": 1.6836940624096886, "learning_rate": 3.574956559812526e-06, "loss": 0.497, "step": 8515 }, { "epoch": 0.6, "grad_norm": 1.672679931234567, "learning_rate": 3.573855092269929e-06, "loss": 0.5656, "step": 8516 }, { "epoch": 0.6, "grad_norm": 1.5097036679768139, "learning_rate": 3.572753700061139e-06, "loss": 0.4998, "step": 8517 }, { "epoch": 0.6, "grad_norm": 1.835212990995145, "learning_rate": 3.5716523832443315e-06, "loss": 0.5445, "step": 8518 }, { "epoch": 0.6, "grad_norm": 2.2632335643866788, "learning_rate": 3.570551141877688e-06, "loss": 0.5128, "step": 8519 }, { "epoch": 0.6, "grad_norm": 2.077325256037344, "learning_rate": 3.5694499760193756e-06, "loss": 0.5678, "step": 8520 }, { "epoch": 0.6, "grad_norm": 1.9563976654204593, "learning_rate": 3.5683488857275627e-06, "loss": 0.6069, "step": 8521 }, { "epoch": 0.6, "grad_norm": 1.656952269379425, "learning_rate": 3.567247871060413e-06, "loss": 0.4867, "step": 8522 }, { "epoch": 0.6, "grad_norm": 1.5234003657520876, "learning_rate": 3.5661469320760865e-06, "loss": 0.5149, "step": 8523 }, { "epoch": 0.6, "grad_norm": 1.6767181931926602, "learning_rate": 3.565046068832737e-06, "loss": 0.4567, "step": 8524 }, { "epoch": 0.6, "grad_norm": 1.7659694529669896, "learning_rate": 3.5639452813885155e-06, "loss": 0.5, "step": 8525 }, { "epoch": 0.61, "grad_norm": 2.448629918223684, "learning_rate": 3.562844569801571e-06, "loss": 0.5178, "step": 8526 }, { "epoch": 0.61, "grad_norm": 1.9984636895239503, "learning_rate": 3.5617439341300476e-06, "loss": 0.5152, "step": 8527 }, { "epoch": 0.61, "grad_norm": 1.9593409164796303, "learning_rate": 3.560643374432081e-06, "loss": 0.5846, "step": 8528 }, { "epoch": 0.61, "grad_norm": 1.5876671974302838, "learning_rate": 3.559542890765809e-06, "loss": 0.5277, "step": 8529 }, { "epoch": 0.61, "grad_norm": 1.8568271110683356, "learning_rate": 3.558442483189362e-06, "loss": 0.5602, "step": 8530 }, { "epoch": 0.61, "grad_norm": 1.5624427529907663, "learning_rate": 3.5573421517608693e-06, "loss": 0.5021, "step": 8531 }, { "epoch": 0.61, "grad_norm": 0.7292797069158602, "learning_rate": 3.556241896538452e-06, "loss": 0.4195, "step": 8532 }, { "epoch": 0.61, "grad_norm": 1.5714879555592605, "learning_rate": 3.5551417175802282e-06, "loss": 0.5259, "step": 8533 }, { "epoch": 0.61, "grad_norm": 1.6502885950638757, "learning_rate": 3.554041614944316e-06, "loss": 0.5091, "step": 8534 }, { "epoch": 0.61, "grad_norm": 1.7478440126676482, "learning_rate": 3.5529415886888254e-06, "loss": 0.5289, "step": 8535 }, { "epoch": 0.61, "grad_norm": 1.919660465395001, "learning_rate": 3.5518416388718625e-06, "loss": 0.532, "step": 8536 }, { "epoch": 0.61, "grad_norm": 1.9914490422011197, "learning_rate": 3.550741765551532e-06, "loss": 0.4698, "step": 8537 }, { "epoch": 0.61, "grad_norm": 1.5637901791602202, "learning_rate": 3.549641968785933e-06, "loss": 0.5351, "step": 8538 }, { "epoch": 0.61, "grad_norm": 0.7056925718841278, "learning_rate": 3.5485422486331588e-06, "loss": 0.4532, "step": 8539 }, { "epoch": 0.61, "grad_norm": 1.8681022230562094, "learning_rate": 3.5474426051513e-06, "loss": 0.5751, "step": 8540 }, { "epoch": 0.61, "grad_norm": 1.6096352872820463, "learning_rate": 3.5463430383984467e-06, "loss": 0.5749, "step": 8541 }, { "epoch": 0.61, "grad_norm": 1.7693014157040068, "learning_rate": 3.5452435484326796e-06, "loss": 0.5057, "step": 8542 }, { "epoch": 0.61, "grad_norm": 1.763612851440349, "learning_rate": 3.544144135312077e-06, "loss": 0.4819, "step": 8543 }, { "epoch": 0.61, "grad_norm": 2.0328594430343587, "learning_rate": 3.5430447990947133e-06, "loss": 0.5218, "step": 8544 }, { "epoch": 0.61, "grad_norm": 1.792840819943376, "learning_rate": 3.5419455398386613e-06, "loss": 0.6028, "step": 8545 }, { "epoch": 0.61, "grad_norm": 1.6136447860490117, "learning_rate": 3.540846357601988e-06, "loss": 0.5395, "step": 8546 }, { "epoch": 0.61, "grad_norm": 1.6895752755192506, "learning_rate": 3.5397472524427535e-06, "loss": 0.5606, "step": 8547 }, { "epoch": 0.61, "grad_norm": 0.7907280370390577, "learning_rate": 3.5386482244190144e-06, "loss": 0.4288, "step": 8548 }, { "epoch": 0.61, "grad_norm": 1.669234409083218, "learning_rate": 3.5375492735888305e-06, "loss": 0.5348, "step": 8549 }, { "epoch": 0.61, "grad_norm": 2.1692310480009707, "learning_rate": 3.5364504000102483e-06, "loss": 0.4577, "step": 8550 }, { "epoch": 0.61, "grad_norm": 1.8768609492461321, "learning_rate": 3.535351603741316e-06, "loss": 0.4937, "step": 8551 }, { "epoch": 0.61, "grad_norm": 1.8979769644130036, "learning_rate": 3.5342528848400737e-06, "loss": 0.5822, "step": 8552 }, { "epoch": 0.61, "grad_norm": 1.632010467502985, "learning_rate": 3.5331542433645626e-06, "loss": 0.4744, "step": 8553 }, { "epoch": 0.61, "grad_norm": 2.090037254427941, "learning_rate": 3.5320556793728146e-06, "loss": 0.5301, "step": 8554 }, { "epoch": 0.61, "grad_norm": 1.5425418024801691, "learning_rate": 3.530957192922857e-06, "loss": 0.5233, "step": 8555 }, { "epoch": 0.61, "grad_norm": 1.7091554760972223, "learning_rate": 3.5298587840727206e-06, "loss": 0.5807, "step": 8556 }, { "epoch": 0.61, "grad_norm": 2.375482573237626, "learning_rate": 3.5287604528804248e-06, "loss": 0.5263, "step": 8557 }, { "epoch": 0.61, "grad_norm": 1.6296371668817176, "learning_rate": 3.5276621994039863e-06, "loss": 0.5681, "step": 8558 }, { "epoch": 0.61, "grad_norm": 1.740079077811567, "learning_rate": 3.5265640237014177e-06, "loss": 0.4979, "step": 8559 }, { "epoch": 0.61, "grad_norm": 1.7523085195136194, "learning_rate": 3.525465925830732e-06, "loss": 0.4819, "step": 8560 }, { "epoch": 0.61, "grad_norm": 1.8531724375930958, "learning_rate": 3.5243679058499336e-06, "loss": 0.5834, "step": 8561 }, { "epoch": 0.61, "grad_norm": 1.661884359110663, "learning_rate": 3.523269963817022e-06, "loss": 0.571, "step": 8562 }, { "epoch": 0.61, "grad_norm": 1.6013581741907958, "learning_rate": 3.522172099789993e-06, "loss": 0.472, "step": 8563 }, { "epoch": 0.61, "grad_norm": 1.736612761809169, "learning_rate": 3.5210743138268426e-06, "loss": 0.4776, "step": 8564 }, { "epoch": 0.61, "grad_norm": 1.9600178962443644, "learning_rate": 3.519976605985558e-06, "loss": 0.5294, "step": 8565 }, { "epoch": 0.61, "grad_norm": 1.8678277675344102, "learning_rate": 3.518878976324124e-06, "loss": 0.5202, "step": 8566 }, { "epoch": 0.61, "grad_norm": 2.0254992822402764, "learning_rate": 3.5177814249005205e-06, "loss": 0.5309, "step": 8567 }, { "epoch": 0.61, "grad_norm": 2.1136746347464395, "learning_rate": 3.5166839517727258e-06, "loss": 0.4887, "step": 8568 }, { "epoch": 0.61, "grad_norm": 1.6892594463048125, "learning_rate": 3.5155865569987113e-06, "loss": 0.488, "step": 8569 }, { "epoch": 0.61, "grad_norm": 1.5930608923913296, "learning_rate": 3.5144892406364423e-06, "loss": 0.5857, "step": 8570 }, { "epoch": 0.61, "grad_norm": 1.4945056491956892, "learning_rate": 3.5133920027438873e-06, "loss": 0.4749, "step": 8571 }, { "epoch": 0.61, "grad_norm": 1.941993021952384, "learning_rate": 3.5122948433790035e-06, "loss": 0.5301, "step": 8572 }, { "epoch": 0.61, "grad_norm": 2.98198260853281, "learning_rate": 3.5111977625997473e-06, "loss": 0.5872, "step": 8573 }, { "epoch": 0.61, "grad_norm": 2.218499233712483, "learning_rate": 3.5101007604640685e-06, "loss": 0.5514, "step": 8574 }, { "epoch": 0.61, "grad_norm": 6.061663801386839, "learning_rate": 3.5090038370299185e-06, "loss": 0.5662, "step": 8575 }, { "epoch": 0.61, "grad_norm": 1.5927614261550973, "learning_rate": 3.5079069923552374e-06, "loss": 0.5155, "step": 8576 }, { "epoch": 0.61, "grad_norm": 1.796613495643159, "learning_rate": 3.506810226497965e-06, "loss": 0.582, "step": 8577 }, { "epoch": 0.61, "grad_norm": 1.699352637295036, "learning_rate": 3.5057135395160335e-06, "loss": 0.4833, "step": 8578 }, { "epoch": 0.61, "grad_norm": 1.465252888199619, "learning_rate": 3.5046169314673788e-06, "loss": 0.5163, "step": 8579 }, { "epoch": 0.61, "grad_norm": 1.8199480413031062, "learning_rate": 3.503520402409924e-06, "loss": 0.5283, "step": 8580 }, { "epoch": 0.61, "grad_norm": 1.9394787376893605, "learning_rate": 3.502423952401593e-06, "loss": 0.5201, "step": 8581 }, { "epoch": 0.61, "grad_norm": 3.2842731981350988, "learning_rate": 3.501327581500301e-06, "loss": 0.5299, "step": 8582 }, { "epoch": 0.61, "grad_norm": 1.8416611124778581, "learning_rate": 3.500231289763967e-06, "loss": 0.5023, "step": 8583 }, { "epoch": 0.61, "grad_norm": 1.709810411818584, "learning_rate": 3.499135077250498e-06, "loss": 0.5614, "step": 8584 }, { "epoch": 0.61, "grad_norm": 1.7739171010628096, "learning_rate": 3.4980389440177975e-06, "loss": 0.5679, "step": 8585 }, { "epoch": 0.61, "grad_norm": 1.6873529955194646, "learning_rate": 3.4969428901237717e-06, "loss": 0.5345, "step": 8586 }, { "epoch": 0.61, "grad_norm": 1.6090567598692989, "learning_rate": 3.495846915626314e-06, "loss": 0.5732, "step": 8587 }, { "epoch": 0.61, "grad_norm": 1.7043479201062248, "learning_rate": 3.494751020583321e-06, "loss": 0.489, "step": 8588 }, { "epoch": 0.61, "grad_norm": 1.683191299785852, "learning_rate": 3.4936552050526763e-06, "loss": 0.5725, "step": 8589 }, { "epoch": 0.61, "grad_norm": 2.0456061144005218, "learning_rate": 3.4925594690922703e-06, "loss": 0.4688, "step": 8590 }, { "epoch": 0.61, "grad_norm": 1.7431043392552188, "learning_rate": 3.4914638127599816e-06, "loss": 0.4768, "step": 8591 }, { "epoch": 0.61, "grad_norm": 2.218100764236499, "learning_rate": 3.490368236113686e-06, "loss": 0.5787, "step": 8592 }, { "epoch": 0.61, "grad_norm": 1.9616652585769936, "learning_rate": 3.4892727392112522e-06, "loss": 0.588, "step": 8593 }, { "epoch": 0.61, "grad_norm": 2.5495594797245347, "learning_rate": 3.4881773221105543e-06, "loss": 0.5726, "step": 8594 }, { "epoch": 0.61, "grad_norm": 2.0960319080208016, "learning_rate": 3.487081984869452e-06, "loss": 0.5695, "step": 8595 }, { "epoch": 0.61, "grad_norm": 1.5890544908066384, "learning_rate": 3.485986727545807e-06, "loss": 0.6049, "step": 8596 }, { "epoch": 0.61, "grad_norm": 1.7871155643250827, "learning_rate": 3.4848915501974704e-06, "loss": 0.4913, "step": 8597 }, { "epoch": 0.61, "grad_norm": 1.8860304806161405, "learning_rate": 3.4837964528822986e-06, "loss": 0.533, "step": 8598 }, { "epoch": 0.61, "grad_norm": 1.995525144606837, "learning_rate": 3.4827014356581356e-06, "loss": 0.5443, "step": 8599 }, { "epoch": 0.61, "grad_norm": 1.7909766442646002, "learning_rate": 3.4816064985828225e-06, "loss": 0.4908, "step": 8600 }, { "epoch": 0.61, "grad_norm": 0.6517337367138254, "learning_rate": 3.4805116417142006e-06, "loss": 0.4267, "step": 8601 }, { "epoch": 0.61, "grad_norm": 2.398158352388218, "learning_rate": 3.4794168651101013e-06, "loss": 0.545, "step": 8602 }, { "epoch": 0.61, "grad_norm": 1.7285264595841063, "learning_rate": 3.4783221688283573e-06, "loss": 0.575, "step": 8603 }, { "epoch": 0.61, "grad_norm": 1.7758076016397153, "learning_rate": 3.4772275529267895e-06, "loss": 0.4789, "step": 8604 }, { "epoch": 0.61, "grad_norm": 1.797003567026006, "learning_rate": 3.4761330174632257e-06, "loss": 0.5128, "step": 8605 }, { "epoch": 0.61, "grad_norm": 1.5512389643162565, "learning_rate": 3.4750385624954784e-06, "loss": 0.5028, "step": 8606 }, { "epoch": 0.61, "grad_norm": 1.599609294590339, "learning_rate": 3.473944188081362e-06, "loss": 0.4465, "step": 8607 }, { "epoch": 0.61, "grad_norm": 2.4101185672505796, "learning_rate": 3.472849894278682e-06, "loss": 0.5498, "step": 8608 }, { "epoch": 0.61, "grad_norm": 1.8038988271282983, "learning_rate": 3.471755681145248e-06, "loss": 0.5416, "step": 8609 }, { "epoch": 0.61, "grad_norm": 1.7638431731433624, "learning_rate": 3.4706615487388558e-06, "loss": 0.5209, "step": 8610 }, { "epoch": 0.61, "grad_norm": 1.925043360975848, "learning_rate": 3.469567497117304e-06, "loss": 0.5223, "step": 8611 }, { "epoch": 0.61, "grad_norm": 1.8156497932930622, "learning_rate": 3.4684735263383806e-06, "loss": 0.5206, "step": 8612 }, { "epoch": 0.61, "grad_norm": 1.813503821841933, "learning_rate": 3.4673796364598765e-06, "loss": 0.6143, "step": 8613 }, { "epoch": 0.61, "grad_norm": 1.5192525632239866, "learning_rate": 3.466285827539574e-06, "loss": 0.4692, "step": 8614 }, { "epoch": 0.61, "grad_norm": 1.8270467887401127, "learning_rate": 3.465192099635249e-06, "loss": 0.5583, "step": 8615 }, { "epoch": 0.61, "grad_norm": 1.9076050656503434, "learning_rate": 3.4640984528046795e-06, "loss": 0.5507, "step": 8616 }, { "epoch": 0.61, "grad_norm": 1.6868215725175648, "learning_rate": 3.4630048871056337e-06, "loss": 0.4731, "step": 8617 }, { "epoch": 0.61, "grad_norm": 2.7849614511369456, "learning_rate": 3.4619114025958787e-06, "loss": 0.5375, "step": 8618 }, { "epoch": 0.61, "grad_norm": 1.5316625511303092, "learning_rate": 3.4608179993331725e-06, "loss": 0.5198, "step": 8619 }, { "epoch": 0.61, "grad_norm": 1.706739027702324, "learning_rate": 3.459724677375278e-06, "loss": 0.5681, "step": 8620 }, { "epoch": 0.61, "grad_norm": 1.6227669831218552, "learning_rate": 3.4586314367799445e-06, "loss": 0.4637, "step": 8621 }, { "epoch": 0.61, "grad_norm": 1.9206403426201597, "learning_rate": 3.4575382776049215e-06, "loss": 0.5003, "step": 8622 }, { "epoch": 0.61, "grad_norm": 1.5388343385147887, "learning_rate": 3.4564451999079514e-06, "loss": 0.4844, "step": 8623 }, { "epoch": 0.61, "grad_norm": 1.9585093878232358, "learning_rate": 3.455352203746778e-06, "loss": 0.5458, "step": 8624 }, { "epoch": 0.61, "grad_norm": 1.647005637683755, "learning_rate": 3.4542592891791336e-06, "loss": 0.4669, "step": 8625 }, { "epoch": 0.61, "grad_norm": 1.8003118173691572, "learning_rate": 3.453166456262753e-06, "loss": 0.5275, "step": 8626 }, { "epoch": 0.61, "grad_norm": 1.6796122718452906, "learning_rate": 3.4520737050553597e-06, "loss": 0.5815, "step": 8627 }, { "epoch": 0.61, "grad_norm": 1.8511951503229302, "learning_rate": 3.45098103561468e-06, "loss": 0.4375, "step": 8628 }, { "epoch": 0.61, "grad_norm": 2.0407871421057666, "learning_rate": 3.449888447998431e-06, "loss": 0.5277, "step": 8629 }, { "epoch": 0.61, "grad_norm": 1.682204350976052, "learning_rate": 3.4487959422643258e-06, "loss": 0.5119, "step": 8630 }, { "epoch": 0.61, "grad_norm": 1.6878932183954773, "learning_rate": 3.447703518470075e-06, "loss": 0.5613, "step": 8631 }, { "epoch": 0.61, "grad_norm": 2.042871195740435, "learning_rate": 3.446611176673384e-06, "loss": 0.515, "step": 8632 }, { "epoch": 0.61, "grad_norm": 1.8148633910102985, "learning_rate": 3.445518916931955e-06, "loss": 0.43, "step": 8633 }, { "epoch": 0.61, "grad_norm": 1.4018971984286506, "learning_rate": 3.4444267393034823e-06, "loss": 0.5058, "step": 8634 }, { "epoch": 0.61, "grad_norm": 2.034117699742884, "learning_rate": 3.4433346438456626e-06, "loss": 0.5595, "step": 8635 }, { "epoch": 0.61, "grad_norm": 1.8971252949494033, "learning_rate": 3.4422426306161823e-06, "loss": 0.5885, "step": 8636 }, { "epoch": 0.61, "grad_norm": 1.9524806164744102, "learning_rate": 3.441150699672724e-06, "loss": 0.5934, "step": 8637 }, { "epoch": 0.61, "grad_norm": 2.060682299676636, "learning_rate": 3.4400588510729653e-06, "loss": 0.6305, "step": 8638 }, { "epoch": 0.61, "grad_norm": 1.613404989710232, "learning_rate": 3.4389670848745853e-06, "loss": 0.5218, "step": 8639 }, { "epoch": 0.61, "grad_norm": 1.6738325295174015, "learning_rate": 3.4378754011352544e-06, "loss": 0.5372, "step": 8640 }, { "epoch": 0.61, "grad_norm": 1.7116897543054015, "learning_rate": 3.4367837999126387e-06, "loss": 0.512, "step": 8641 }, { "epoch": 0.61, "grad_norm": 1.609462177648127, "learning_rate": 3.4356922812643965e-06, "loss": 0.5032, "step": 8642 }, { "epoch": 0.61, "grad_norm": 1.784940138181244, "learning_rate": 3.4346008452481905e-06, "loss": 0.5168, "step": 8643 }, { "epoch": 0.61, "grad_norm": 1.994258288383099, "learning_rate": 3.4335094919216727e-06, "loss": 0.5121, "step": 8644 }, { "epoch": 0.61, "grad_norm": 2.205454381757803, "learning_rate": 3.4324182213424904e-06, "loss": 0.5259, "step": 8645 }, { "epoch": 0.61, "grad_norm": 1.6637040572898558, "learning_rate": 3.4313270335682903e-06, "loss": 0.5143, "step": 8646 }, { "epoch": 0.61, "grad_norm": 1.9105361910812084, "learning_rate": 3.4302359286567107e-06, "loss": 0.4649, "step": 8647 }, { "epoch": 0.61, "grad_norm": 1.7796593830722696, "learning_rate": 3.4291449066653892e-06, "loss": 0.5134, "step": 8648 }, { "epoch": 0.61, "grad_norm": 3.518521295497625, "learning_rate": 3.428053967651955e-06, "loss": 0.4941, "step": 8649 }, { "epoch": 0.61, "grad_norm": 1.8614138367925748, "learning_rate": 3.426963111674039e-06, "loss": 0.5849, "step": 8650 }, { "epoch": 0.61, "grad_norm": 0.698779158847941, "learning_rate": 3.4258723387892606e-06, "loss": 0.3946, "step": 8651 }, { "epoch": 0.61, "grad_norm": 1.6699290795618462, "learning_rate": 3.4247816490552397e-06, "loss": 0.4839, "step": 8652 }, { "epoch": 0.61, "grad_norm": 5.150985150635657, "learning_rate": 3.4236910425295877e-06, "loss": 0.547, "step": 8653 }, { "epoch": 0.61, "grad_norm": 4.153213924331854, "learning_rate": 3.4226005192699176e-06, "loss": 0.5473, "step": 8654 }, { "epoch": 0.61, "grad_norm": 1.932604065261497, "learning_rate": 3.421510079333833e-06, "loss": 0.6506, "step": 8655 }, { "epoch": 0.61, "grad_norm": 1.9940902301764873, "learning_rate": 3.4204197227789354e-06, "loss": 0.5697, "step": 8656 }, { "epoch": 0.61, "grad_norm": 2.4088539330779293, "learning_rate": 3.419329449662818e-06, "loss": 0.4976, "step": 8657 }, { "epoch": 0.61, "grad_norm": 1.582088661610243, "learning_rate": 3.4182392600430774e-06, "loss": 0.4251, "step": 8658 }, { "epoch": 0.61, "grad_norm": 1.6874182691325592, "learning_rate": 3.4171491539772987e-06, "loss": 0.5435, "step": 8659 }, { "epoch": 0.61, "grad_norm": 2.112689960500304, "learning_rate": 3.416059131523064e-06, "loss": 0.4929, "step": 8660 }, { "epoch": 0.61, "grad_norm": 1.8404314423311119, "learning_rate": 3.4149691927379537e-06, "loss": 0.5348, "step": 8661 }, { "epoch": 0.61, "grad_norm": 0.7351351634574725, "learning_rate": 3.413879337679541e-06, "loss": 0.421, "step": 8662 }, { "epoch": 0.61, "grad_norm": 1.881815962240398, "learning_rate": 3.4127895664053965e-06, "loss": 0.5826, "step": 8663 }, { "epoch": 0.61, "grad_norm": 2.8455034811340165, "learning_rate": 3.4116998789730842e-06, "loss": 0.5378, "step": 8664 }, { "epoch": 0.61, "grad_norm": 2.3794682897095276, "learning_rate": 3.4106102754401684e-06, "loss": 0.5115, "step": 8665 }, { "epoch": 0.61, "grad_norm": 1.811074004947556, "learning_rate": 3.409520755864203e-06, "loss": 0.5958, "step": 8666 }, { "epoch": 0.62, "grad_norm": 0.7093287021883683, "learning_rate": 3.4084313203027397e-06, "loss": 0.4473, "step": 8667 }, { "epoch": 0.62, "grad_norm": 1.648522436314926, "learning_rate": 3.4073419688133267e-06, "loss": 0.551, "step": 8668 }, { "epoch": 0.62, "grad_norm": 0.64521368998871, "learning_rate": 3.4062527014535075e-06, "loss": 0.4293, "step": 8669 }, { "epoch": 0.62, "grad_norm": 1.792444529117091, "learning_rate": 3.4051635182808224e-06, "loss": 0.5477, "step": 8670 }, { "epoch": 0.62, "grad_norm": 0.7402741843603029, "learning_rate": 3.4040744193528043e-06, "loss": 0.4381, "step": 8671 }, { "epoch": 0.62, "grad_norm": 1.725039163527142, "learning_rate": 3.4029854047269807e-06, "loss": 0.5215, "step": 8672 }, { "epoch": 0.62, "grad_norm": 2.2198063120849194, "learning_rate": 3.4018964744608818e-06, "loss": 0.5937, "step": 8673 }, { "epoch": 0.62, "grad_norm": 1.900822413372096, "learning_rate": 3.400807628612026e-06, "loss": 0.5652, "step": 8674 }, { "epoch": 0.62, "grad_norm": 1.8193036803983371, "learning_rate": 3.3997188672379288e-06, "loss": 0.5772, "step": 8675 }, { "epoch": 0.62, "grad_norm": 1.9118938818426392, "learning_rate": 3.3986301903961044e-06, "loss": 0.5529, "step": 8676 }, { "epoch": 0.62, "grad_norm": 1.6266251939683571, "learning_rate": 3.397541598144059e-06, "loss": 0.5689, "step": 8677 }, { "epoch": 0.62, "grad_norm": 1.9911971014897505, "learning_rate": 3.3964530905392973e-06, "loss": 0.5332, "step": 8678 }, { "epoch": 0.62, "grad_norm": 1.782663298535882, "learning_rate": 3.395364667639317e-06, "loss": 0.537, "step": 8679 }, { "epoch": 0.62, "grad_norm": 1.5432822311263883, "learning_rate": 3.3942763295016102e-06, "loss": 0.5644, "step": 8680 }, { "epoch": 0.62, "grad_norm": 2.1036298410776286, "learning_rate": 3.39318807618367e-06, "loss": 0.5114, "step": 8681 }, { "epoch": 0.62, "grad_norm": 1.7247365991192332, "learning_rate": 3.39209990774298e-06, "loss": 0.5419, "step": 8682 }, { "epoch": 0.62, "grad_norm": 1.8635104929423267, "learning_rate": 3.391011824237021e-06, "loss": 0.4947, "step": 8683 }, { "epoch": 0.62, "grad_norm": 0.7630188709424032, "learning_rate": 3.389923825723269e-06, "loss": 0.4662, "step": 8684 }, { "epoch": 0.62, "grad_norm": 1.6432060861959497, "learning_rate": 3.388835912259198e-06, "loss": 0.5169, "step": 8685 }, { "epoch": 0.62, "grad_norm": 0.7436378893697764, "learning_rate": 3.387748083902273e-06, "loss": 0.44, "step": 8686 }, { "epoch": 0.62, "grad_norm": 1.6815626134445634, "learning_rate": 3.3866603407099553e-06, "loss": 0.5345, "step": 8687 }, { "epoch": 0.62, "grad_norm": 1.6078976835107763, "learning_rate": 3.385572682739707e-06, "loss": 0.5142, "step": 8688 }, { "epoch": 0.62, "grad_norm": 1.4752894209064205, "learning_rate": 3.3844851100489805e-06, "loss": 0.481, "step": 8689 }, { "epoch": 0.62, "grad_norm": 1.5824417373169573, "learning_rate": 3.3833976226952236e-06, "loss": 0.5216, "step": 8690 }, { "epoch": 0.62, "grad_norm": 1.8663350529602494, "learning_rate": 3.3823102207358806e-06, "loss": 0.514, "step": 8691 }, { "epoch": 0.62, "grad_norm": 1.7372084891678465, "learning_rate": 3.3812229042283952e-06, "loss": 0.5772, "step": 8692 }, { "epoch": 0.62, "grad_norm": 1.6541724232330821, "learning_rate": 3.3801356732302017e-06, "loss": 0.5317, "step": 8693 }, { "epoch": 0.62, "grad_norm": 1.8360050281720424, "learning_rate": 3.37904852779873e-06, "loss": 0.5287, "step": 8694 }, { "epoch": 0.62, "grad_norm": 1.528874961048527, "learning_rate": 3.3779614679914065e-06, "loss": 0.5326, "step": 8695 }, { "epoch": 0.62, "grad_norm": 1.5873512777811323, "learning_rate": 3.3768744938656557e-06, "loss": 0.5498, "step": 8696 }, { "epoch": 0.62, "grad_norm": 3.2058477143882076, "learning_rate": 3.375787605478893e-06, "loss": 0.591, "step": 8697 }, { "epoch": 0.62, "grad_norm": 0.7104820141122891, "learning_rate": 3.374700802888533e-06, "loss": 0.4612, "step": 8698 }, { "epoch": 0.62, "grad_norm": 1.5145771750535428, "learning_rate": 3.373614086151984e-06, "loss": 0.4466, "step": 8699 }, { "epoch": 0.62, "grad_norm": 1.5531167678437505, "learning_rate": 3.3725274553266507e-06, "loss": 0.5019, "step": 8700 }, { "epoch": 0.62, "grad_norm": 1.5329267700508526, "learning_rate": 3.3714409104699317e-06, "loss": 0.5157, "step": 8701 }, { "epoch": 0.62, "grad_norm": 1.9184291329123824, "learning_rate": 3.3703544516392206e-06, "loss": 0.4969, "step": 8702 }, { "epoch": 0.62, "grad_norm": 2.302020837304368, "learning_rate": 3.3692680788919106e-06, "loss": 0.5808, "step": 8703 }, { "epoch": 0.62, "grad_norm": 2.192828168145422, "learning_rate": 3.3681817922853864e-06, "loss": 0.5397, "step": 8704 }, { "epoch": 0.62, "grad_norm": 1.7544172058140637, "learning_rate": 3.3670955918770286e-06, "loss": 0.4681, "step": 8705 }, { "epoch": 0.62, "grad_norm": 1.7326022392870213, "learning_rate": 3.366009477724214e-06, "loss": 0.5634, "step": 8706 }, { "epoch": 0.62, "grad_norm": 1.6442635530563818, "learning_rate": 3.3649234498843176e-06, "loss": 0.5235, "step": 8707 }, { "epoch": 0.62, "grad_norm": 1.7724380689672161, "learning_rate": 3.3638375084147048e-06, "loss": 0.5165, "step": 8708 }, { "epoch": 0.62, "grad_norm": 1.9819695917615792, "learning_rate": 3.362751653372738e-06, "loss": 0.4727, "step": 8709 }, { "epoch": 0.62, "grad_norm": 1.7370736255479442, "learning_rate": 3.3616658848157756e-06, "loss": 0.5223, "step": 8710 }, { "epoch": 0.62, "grad_norm": 0.6821214642713133, "learning_rate": 3.360580202801174e-06, "loss": 0.4152, "step": 8711 }, { "epoch": 0.62, "grad_norm": 1.4619705666751812, "learning_rate": 3.359494607386281e-06, "loss": 0.4724, "step": 8712 }, { "epoch": 0.62, "grad_norm": 2.195350949697797, "learning_rate": 3.3584090986284413e-06, "loss": 0.605, "step": 8713 }, { "epoch": 0.62, "grad_norm": 1.5245448970656623, "learning_rate": 3.3573236765849948e-06, "loss": 0.5084, "step": 8714 }, { "epoch": 0.62, "grad_norm": 2.6616741485570996, "learning_rate": 3.356238341313279e-06, "loss": 0.546, "step": 8715 }, { "epoch": 0.62, "grad_norm": 1.7317094974200704, "learning_rate": 3.3551530928706243e-06, "loss": 0.5504, "step": 8716 }, { "epoch": 0.62, "grad_norm": 1.6197301181188668, "learning_rate": 3.3540679313143547e-06, "loss": 0.5106, "step": 8717 }, { "epoch": 0.62, "grad_norm": 1.606982083849189, "learning_rate": 3.3529828567017964e-06, "loss": 0.563, "step": 8718 }, { "epoch": 0.62, "grad_norm": 1.9752174382659495, "learning_rate": 3.3518978690902626e-06, "loss": 0.5582, "step": 8719 }, { "epoch": 0.62, "grad_norm": 1.8474482585100718, "learning_rate": 3.3508129685370695e-06, "loss": 0.5758, "step": 8720 }, { "epoch": 0.62, "grad_norm": 1.6622527822775381, "learning_rate": 3.349728155099522e-06, "loss": 0.489, "step": 8721 }, { "epoch": 0.62, "grad_norm": 1.709067352864673, "learning_rate": 3.3486434288349267e-06, "loss": 0.5109, "step": 8722 }, { "epoch": 0.62, "grad_norm": 1.6588613551054197, "learning_rate": 3.347558789800581e-06, "loss": 0.5557, "step": 8723 }, { "epoch": 0.62, "grad_norm": 1.6021685648710173, "learning_rate": 3.3464742380537794e-06, "loss": 0.4991, "step": 8724 }, { "epoch": 0.62, "grad_norm": 2.0258177447801304, "learning_rate": 3.3453897736518095e-06, "loss": 0.5238, "step": 8725 }, { "epoch": 0.62, "grad_norm": 2.072276249437479, "learning_rate": 3.34430539665196e-06, "loss": 0.5489, "step": 8726 }, { "epoch": 0.62, "grad_norm": 1.8725454293532717, "learning_rate": 3.3432211071115094e-06, "loss": 0.5501, "step": 8727 }, { "epoch": 0.62, "grad_norm": 1.3853614999313302, "learning_rate": 3.342136905087733e-06, "loss": 0.5114, "step": 8728 }, { "epoch": 0.62, "grad_norm": 1.9139110569609044, "learning_rate": 3.341052790637903e-06, "loss": 0.4895, "step": 8729 }, { "epoch": 0.62, "grad_norm": 1.9736040968468074, "learning_rate": 3.339968763819288e-06, "loss": 0.5661, "step": 8730 }, { "epoch": 0.62, "grad_norm": 2.947135807016129, "learning_rate": 3.338884824689147e-06, "loss": 0.4838, "step": 8731 }, { "epoch": 0.62, "grad_norm": 0.7306704436808996, "learning_rate": 3.3378009733047354e-06, "loss": 0.4083, "step": 8732 }, { "epoch": 0.62, "grad_norm": 2.0070891191150984, "learning_rate": 3.3367172097233117e-06, "loss": 0.5441, "step": 8733 }, { "epoch": 0.62, "grad_norm": 1.5771871603849519, "learning_rate": 3.33563353400212e-06, "loss": 0.5367, "step": 8734 }, { "epoch": 0.62, "grad_norm": 1.9145345593286793, "learning_rate": 3.3345499461984053e-06, "loss": 0.5791, "step": 8735 }, { "epoch": 0.62, "grad_norm": 1.8357422563510664, "learning_rate": 3.3334664463694038e-06, "loss": 0.5064, "step": 8736 }, { "epoch": 0.62, "grad_norm": 1.7345692670567234, "learning_rate": 3.332383034572354e-06, "loss": 0.5641, "step": 8737 }, { "epoch": 0.62, "grad_norm": 1.4792210902259442, "learning_rate": 3.331299710864483e-06, "loss": 0.4185, "step": 8738 }, { "epoch": 0.62, "grad_norm": 0.6983032508015882, "learning_rate": 3.330216475303015e-06, "loss": 0.4305, "step": 8739 }, { "epoch": 0.62, "grad_norm": 2.014625610080385, "learning_rate": 3.329133327945169e-06, "loss": 0.4754, "step": 8740 }, { "epoch": 0.62, "grad_norm": 1.7267475796402738, "learning_rate": 3.328050268848164e-06, "loss": 0.5315, "step": 8741 }, { "epoch": 0.62, "grad_norm": 0.7331320059676318, "learning_rate": 3.3269672980692088e-06, "loss": 0.4193, "step": 8742 }, { "epoch": 0.62, "grad_norm": 1.8977198556168882, "learning_rate": 3.32588441566551e-06, "loss": 0.6017, "step": 8743 }, { "epoch": 0.62, "grad_norm": 2.028536633135678, "learning_rate": 3.324801621694268e-06, "loss": 0.5636, "step": 8744 }, { "epoch": 0.62, "grad_norm": 1.8173604061545672, "learning_rate": 3.3237189162126824e-06, "loss": 0.5427, "step": 8745 }, { "epoch": 0.62, "grad_norm": 1.6062393326220585, "learning_rate": 3.322636299277944e-06, "loss": 0.5804, "step": 8746 }, { "epoch": 0.62, "grad_norm": 1.8896105544901673, "learning_rate": 3.321553770947237e-06, "loss": 0.6159, "step": 8747 }, { "epoch": 0.62, "grad_norm": 1.570061667820974, "learning_rate": 3.32047133127775e-06, "loss": 0.5648, "step": 8748 }, { "epoch": 0.62, "grad_norm": 1.8521180071137868, "learning_rate": 3.3193889803266568e-06, "loss": 0.5031, "step": 8749 }, { "epoch": 0.62, "grad_norm": 1.8867655262847396, "learning_rate": 3.3183067181511336e-06, "loss": 0.59, "step": 8750 }, { "epoch": 0.62, "grad_norm": 1.3955502056782982, "learning_rate": 3.317224544808346e-06, "loss": 0.4875, "step": 8751 }, { "epoch": 0.62, "grad_norm": 1.503660982162972, "learning_rate": 3.3161424603554614e-06, "loss": 0.5118, "step": 8752 }, { "epoch": 0.62, "grad_norm": 2.777343821614377, "learning_rate": 3.315060464849639e-06, "loss": 0.4984, "step": 8753 }, { "epoch": 0.62, "grad_norm": 0.6430606160987455, "learning_rate": 3.313978558348031e-06, "loss": 0.4232, "step": 8754 }, { "epoch": 0.62, "grad_norm": 1.7355417784496763, "learning_rate": 3.3128967409077865e-06, "loss": 0.5279, "step": 8755 }, { "epoch": 0.62, "grad_norm": 1.6780742067650607, "learning_rate": 3.3118150125860554e-06, "loss": 0.4869, "step": 8756 }, { "epoch": 0.62, "grad_norm": 1.7744423075096543, "learning_rate": 3.3107333734399745e-06, "loss": 0.4757, "step": 8757 }, { "epoch": 0.62, "grad_norm": 1.5184488750126515, "learning_rate": 3.309651823526682e-06, "loss": 0.5324, "step": 8758 }, { "epoch": 0.62, "grad_norm": 0.7175830555028696, "learning_rate": 3.3085703629033054e-06, "loss": 0.4299, "step": 8759 }, { "epoch": 0.62, "grad_norm": 1.7502343041082078, "learning_rate": 3.307488991626976e-06, "loss": 0.527, "step": 8760 }, { "epoch": 0.62, "grad_norm": 1.7803509969246603, "learning_rate": 3.3064077097548132e-06, "loss": 0.5284, "step": 8761 }, { "epoch": 0.62, "grad_norm": 1.5667037330781803, "learning_rate": 3.3053265173439313e-06, "loss": 0.5422, "step": 8762 }, { "epoch": 0.62, "grad_norm": 1.7612782928933564, "learning_rate": 3.304245414451448e-06, "loss": 0.5255, "step": 8763 }, { "epoch": 0.62, "grad_norm": 1.6949691001918408, "learning_rate": 3.3031644011344656e-06, "loss": 0.5351, "step": 8764 }, { "epoch": 0.62, "grad_norm": 1.8582294466256635, "learning_rate": 3.302083477450091e-06, "loss": 0.5148, "step": 8765 }, { "epoch": 0.62, "grad_norm": 1.6699717830493317, "learning_rate": 3.301002643455419e-06, "loss": 0.5067, "step": 8766 }, { "epoch": 0.62, "grad_norm": 3.1653379266711354, "learning_rate": 3.2999218992075454e-06, "loss": 0.4735, "step": 8767 }, { "epoch": 0.62, "grad_norm": 1.6440443825412594, "learning_rate": 3.2988412447635576e-06, "loss": 0.5807, "step": 8768 }, { "epoch": 0.62, "grad_norm": 1.9731577030414527, "learning_rate": 3.2977606801805405e-06, "loss": 0.5098, "step": 8769 }, { "epoch": 0.62, "grad_norm": 0.7586272671470367, "learning_rate": 3.29668020551557e-06, "loss": 0.4173, "step": 8770 }, { "epoch": 0.62, "grad_norm": 2.1153836239405117, "learning_rate": 3.2955998208257233e-06, "loss": 0.5362, "step": 8771 }, { "epoch": 0.62, "grad_norm": 4.124606752774784, "learning_rate": 3.2945195261680707e-06, "loss": 0.5079, "step": 8772 }, { "epoch": 0.62, "grad_norm": 1.8262355093624791, "learning_rate": 3.293439321599675e-06, "loss": 0.4998, "step": 8773 }, { "epoch": 0.62, "grad_norm": 1.613514762223597, "learning_rate": 3.2923592071775957e-06, "loss": 0.5172, "step": 8774 }, { "epoch": 0.62, "grad_norm": 1.8108967659185795, "learning_rate": 3.2912791829588907e-06, "loss": 0.5209, "step": 8775 }, { "epoch": 0.62, "grad_norm": 2.144065452721013, "learning_rate": 3.2901992490006094e-06, "loss": 0.5653, "step": 8776 }, { "epoch": 0.62, "grad_norm": 1.4240217852935155, "learning_rate": 3.2891194053597963e-06, "loss": 0.5072, "step": 8777 }, { "epoch": 0.62, "grad_norm": 2.0040727027960568, "learning_rate": 3.288039652093494e-06, "loss": 0.5338, "step": 8778 }, { "epoch": 0.62, "grad_norm": 1.581623467424679, "learning_rate": 3.2869599892587386e-06, "loss": 0.5379, "step": 8779 }, { "epoch": 0.62, "grad_norm": 2.450278753159194, "learning_rate": 3.2858804169125614e-06, "loss": 0.5458, "step": 8780 }, { "epoch": 0.62, "grad_norm": 1.962922809515889, "learning_rate": 3.284800935111987e-06, "loss": 0.6345, "step": 8781 }, { "epoch": 0.62, "grad_norm": 2.9471520646468976, "learning_rate": 3.2837215439140408e-06, "loss": 0.4892, "step": 8782 }, { "epoch": 0.62, "grad_norm": 1.7706037224192066, "learning_rate": 3.282642243375738e-06, "loss": 0.569, "step": 8783 }, { "epoch": 0.62, "grad_norm": 1.6486145798614509, "learning_rate": 3.2815630335540914e-06, "loss": 0.5987, "step": 8784 }, { "epoch": 0.62, "grad_norm": 1.6464281226010138, "learning_rate": 3.280483914506106e-06, "loss": 0.5133, "step": 8785 }, { "epoch": 0.62, "grad_norm": 1.7564117321545594, "learning_rate": 3.2794048862887883e-06, "loss": 0.4696, "step": 8786 }, { "epoch": 0.62, "grad_norm": 1.651781603556541, "learning_rate": 3.2783259489591358e-06, "loss": 0.4591, "step": 8787 }, { "epoch": 0.62, "grad_norm": 2.2172293826649137, "learning_rate": 3.2772471025741402e-06, "loss": 0.5185, "step": 8788 }, { "epoch": 0.62, "grad_norm": 1.7221375168178839, "learning_rate": 3.276168347190788e-06, "loss": 0.5016, "step": 8789 }, { "epoch": 0.62, "grad_norm": 2.481062365562745, "learning_rate": 3.2750896828660665e-06, "loss": 0.5287, "step": 8790 }, { "epoch": 0.62, "grad_norm": 1.6989048115097212, "learning_rate": 3.274011109656954e-06, "loss": 0.589, "step": 8791 }, { "epoch": 0.62, "grad_norm": 1.6073771656780032, "learning_rate": 3.2729326276204198e-06, "loss": 0.4542, "step": 8792 }, { "epoch": 0.62, "grad_norm": 4.428510629422196, "learning_rate": 3.2718542368134387e-06, "loss": 0.4972, "step": 8793 }, { "epoch": 0.62, "grad_norm": 2.1827445015129916, "learning_rate": 3.2707759372929717e-06, "loss": 0.4738, "step": 8794 }, { "epoch": 0.62, "grad_norm": 1.6728501731270036, "learning_rate": 3.26969772911598e-06, "loss": 0.5558, "step": 8795 }, { "epoch": 0.62, "grad_norm": 1.5106552761137235, "learning_rate": 3.2686196123394153e-06, "loss": 0.4885, "step": 8796 }, { "epoch": 0.62, "grad_norm": 1.7491874422972191, "learning_rate": 3.2675415870202316e-06, "loss": 0.4892, "step": 8797 }, { "epoch": 0.62, "grad_norm": 2.39076749973051, "learning_rate": 3.2664636532153714e-06, "loss": 0.5415, "step": 8798 }, { "epoch": 0.62, "grad_norm": 1.8904385402759394, "learning_rate": 3.265385810981774e-06, "loss": 0.5233, "step": 8799 }, { "epoch": 0.62, "grad_norm": 1.9259245446769644, "learning_rate": 3.264308060376376e-06, "loss": 0.4875, "step": 8800 }, { "epoch": 0.62, "grad_norm": 1.5332293840163687, "learning_rate": 3.2632304014561078e-06, "loss": 0.5277, "step": 8801 }, { "epoch": 0.62, "grad_norm": 1.6325759527138293, "learning_rate": 3.262152834277895e-06, "loss": 0.5003, "step": 8802 }, { "epoch": 0.62, "grad_norm": 1.6709570828979896, "learning_rate": 3.261075358898659e-06, "loss": 0.4917, "step": 8803 }, { "epoch": 0.62, "grad_norm": 1.9396535043472216, "learning_rate": 3.259997975375312e-06, "loss": 0.5664, "step": 8804 }, { "epoch": 0.62, "grad_norm": 1.7899186275209775, "learning_rate": 3.2589206837647704e-06, "loss": 0.5012, "step": 8805 }, { "epoch": 0.62, "grad_norm": 0.7058695582727664, "learning_rate": 3.2578434841239376e-06, "loss": 0.4368, "step": 8806 }, { "epoch": 0.62, "grad_norm": 1.7069594216857484, "learning_rate": 3.2567663765097147e-06, "loss": 0.4748, "step": 8807 }, { "epoch": 0.63, "grad_norm": 1.7140813372191206, "learning_rate": 3.255689360978998e-06, "loss": 0.5009, "step": 8808 }, { "epoch": 0.63, "grad_norm": 1.963009395356615, "learning_rate": 3.25461243758868e-06, "loss": 0.5472, "step": 8809 }, { "epoch": 0.63, "grad_norm": 1.6029690176009699, "learning_rate": 3.2535356063956487e-06, "loss": 0.513, "step": 8810 }, { "epoch": 0.63, "grad_norm": 2.200908604342597, "learning_rate": 3.252458867456782e-06, "loss": 0.5643, "step": 8811 }, { "epoch": 0.63, "grad_norm": 1.5890209256493737, "learning_rate": 3.2513822208289613e-06, "loss": 0.5055, "step": 8812 }, { "epoch": 0.63, "grad_norm": 1.723558186386323, "learning_rate": 3.2503056665690567e-06, "loss": 0.5412, "step": 8813 }, { "epoch": 0.63, "grad_norm": 1.6174319172625327, "learning_rate": 3.2492292047339352e-06, "loss": 0.5176, "step": 8814 }, { "epoch": 0.63, "grad_norm": 1.5533850945307162, "learning_rate": 3.248152835380459e-06, "loss": 0.4897, "step": 8815 }, { "epoch": 0.63, "grad_norm": 1.692046650517022, "learning_rate": 3.247076558565487e-06, "loss": 0.5218, "step": 8816 }, { "epoch": 0.63, "grad_norm": 2.126519466295127, "learning_rate": 3.2460003743458713e-06, "loss": 0.4877, "step": 8817 }, { "epoch": 0.63, "grad_norm": 1.6737946007985132, "learning_rate": 3.2449242827784587e-06, "loss": 0.4668, "step": 8818 }, { "epoch": 0.63, "grad_norm": 1.9136544766049641, "learning_rate": 3.2438482839200913e-06, "loss": 0.5802, "step": 8819 }, { "epoch": 0.63, "grad_norm": 1.7763654441412375, "learning_rate": 3.24277237782761e-06, "loss": 0.5285, "step": 8820 }, { "epoch": 0.63, "grad_norm": 1.6172640032642618, "learning_rate": 3.2416965645578454e-06, "loss": 0.5531, "step": 8821 }, { "epoch": 0.63, "grad_norm": 1.5639062136345954, "learning_rate": 3.240620844167626e-06, "loss": 0.5114, "step": 8822 }, { "epoch": 0.63, "grad_norm": 1.735754373471776, "learning_rate": 3.2395452167137733e-06, "loss": 0.5243, "step": 8823 }, { "epoch": 0.63, "grad_norm": 1.5071975487230058, "learning_rate": 3.2384696822531103e-06, "loss": 0.5269, "step": 8824 }, { "epoch": 0.63, "grad_norm": 1.8411605995400482, "learning_rate": 3.237394240842447e-06, "loss": 0.5435, "step": 8825 }, { "epoch": 0.63, "grad_norm": 1.8098605838646344, "learning_rate": 3.2363188925385907e-06, "loss": 0.5289, "step": 8826 }, { "epoch": 0.63, "grad_norm": 1.5447150297580017, "learning_rate": 3.2352436373983488e-06, "loss": 0.5708, "step": 8827 }, { "epoch": 0.63, "grad_norm": 1.5727054311043593, "learning_rate": 3.2341684754785173e-06, "loss": 0.5299, "step": 8828 }, { "epoch": 0.63, "grad_norm": 2.371638342278786, "learning_rate": 3.23309340683589e-06, "loss": 0.5328, "step": 8829 }, { "epoch": 0.63, "grad_norm": 1.698520171207794, "learning_rate": 3.2320184315272558e-06, "loss": 0.5227, "step": 8830 }, { "epoch": 0.63, "grad_norm": 1.6490462139394768, "learning_rate": 3.230943549609399e-06, "loss": 0.5486, "step": 8831 }, { "epoch": 0.63, "grad_norm": 2.6099302354869924, "learning_rate": 3.2298687611390988e-06, "loss": 0.5014, "step": 8832 }, { "epoch": 0.63, "grad_norm": 1.6012021741636442, "learning_rate": 3.2287940661731286e-06, "loss": 0.4731, "step": 8833 }, { "epoch": 0.63, "grad_norm": 3.7883477216278996, "learning_rate": 3.227719464768256e-06, "loss": 0.4982, "step": 8834 }, { "epoch": 0.63, "grad_norm": 1.6168369271411065, "learning_rate": 3.2266449569812485e-06, "loss": 0.5363, "step": 8835 }, { "epoch": 0.63, "grad_norm": 1.7955985782876087, "learning_rate": 3.2255705428688633e-06, "loss": 0.5187, "step": 8836 }, { "epoch": 0.63, "grad_norm": 1.667124828339718, "learning_rate": 3.2244962224878527e-06, "loss": 0.4947, "step": 8837 }, { "epoch": 0.63, "grad_norm": 1.7950764707758937, "learning_rate": 3.223421995894968e-06, "loss": 0.497, "step": 8838 }, { "epoch": 0.63, "grad_norm": 1.7778013002698017, "learning_rate": 3.2223478631469553e-06, "loss": 0.4968, "step": 8839 }, { "epoch": 0.63, "grad_norm": 2.223399310624077, "learning_rate": 3.2212738243005513e-06, "loss": 0.5274, "step": 8840 }, { "epoch": 0.63, "grad_norm": 2.143800184782339, "learning_rate": 3.22019987941249e-06, "loss": 0.4535, "step": 8841 }, { "epoch": 0.63, "grad_norm": 2.224501186883041, "learning_rate": 3.219126028539503e-06, "loss": 0.5213, "step": 8842 }, { "epoch": 0.63, "grad_norm": 2.236901032845963, "learning_rate": 3.218052271738314e-06, "loss": 0.5035, "step": 8843 }, { "epoch": 0.63, "grad_norm": 1.9849777726078448, "learning_rate": 3.216978609065641e-06, "loss": 0.5525, "step": 8844 }, { "epoch": 0.63, "grad_norm": 1.72619192613113, "learning_rate": 3.2159050405781996e-06, "loss": 0.5312, "step": 8845 }, { "epoch": 0.63, "grad_norm": 2.428814182269306, "learning_rate": 3.214831566332699e-06, "loss": 0.5307, "step": 8846 }, { "epoch": 0.63, "grad_norm": 2.1267841643936136, "learning_rate": 3.2137581863858453e-06, "loss": 0.4888, "step": 8847 }, { "epoch": 0.63, "grad_norm": 2.7612964070781665, "learning_rate": 3.2126849007943365e-06, "loss": 0.5022, "step": 8848 }, { "epoch": 0.63, "grad_norm": 1.7402150835335857, "learning_rate": 3.2116117096148658e-06, "loss": 0.4739, "step": 8849 }, { "epoch": 0.63, "grad_norm": 1.8809436669449568, "learning_rate": 3.2105386129041267e-06, "loss": 0.5313, "step": 8850 }, { "epoch": 0.63, "grad_norm": 1.9217937425837572, "learning_rate": 3.2094656107188005e-06, "loss": 0.4891, "step": 8851 }, { "epoch": 0.63, "grad_norm": 1.9173151380642097, "learning_rate": 3.2083927031155683e-06, "loss": 0.56, "step": 8852 }, { "epoch": 0.63, "grad_norm": 1.7041845533055937, "learning_rate": 3.2073198901511028e-06, "loss": 0.4731, "step": 8853 }, { "epoch": 0.63, "grad_norm": 1.519594752880686, "learning_rate": 3.206247171882077e-06, "loss": 0.5479, "step": 8854 }, { "epoch": 0.63, "grad_norm": 1.5649561305130497, "learning_rate": 3.205174548365154e-06, "loss": 0.517, "step": 8855 }, { "epoch": 0.63, "grad_norm": 3.7613930744223207, "learning_rate": 3.2041020196569907e-06, "loss": 0.4536, "step": 8856 }, { "epoch": 0.63, "grad_norm": 1.8289240718468172, "learning_rate": 3.2030295858142457e-06, "loss": 0.5744, "step": 8857 }, { "epoch": 0.63, "grad_norm": 1.8977605637114223, "learning_rate": 3.201957246893568e-06, "loss": 0.5428, "step": 8858 }, { "epoch": 0.63, "grad_norm": 1.6572344599573299, "learning_rate": 3.2008850029515993e-06, "loss": 0.503, "step": 8859 }, { "epoch": 0.63, "grad_norm": 0.703639281063879, "learning_rate": 3.1998128540449814e-06, "loss": 0.4451, "step": 8860 }, { "epoch": 0.63, "grad_norm": 2.048165093412023, "learning_rate": 3.198740800230348e-06, "loss": 0.54, "step": 8861 }, { "epoch": 0.63, "grad_norm": 4.119196361087267, "learning_rate": 3.1976688415643296e-06, "loss": 0.6413, "step": 8862 }, { "epoch": 0.63, "grad_norm": 2.069504954444917, "learning_rate": 3.196596978103551e-06, "loss": 0.4928, "step": 8863 }, { "epoch": 0.63, "grad_norm": 1.6724020509815019, "learning_rate": 3.1955252099046287e-06, "loss": 0.4887, "step": 8864 }, { "epoch": 0.63, "grad_norm": 2.3532014702392527, "learning_rate": 3.1944535370241815e-06, "loss": 0.5333, "step": 8865 }, { "epoch": 0.63, "grad_norm": 1.6817071004016269, "learning_rate": 3.193381959518814e-06, "loss": 0.5162, "step": 8866 }, { "epoch": 0.63, "grad_norm": 1.8671215652671114, "learning_rate": 3.192310477445136e-06, "loss": 0.5097, "step": 8867 }, { "epoch": 0.63, "grad_norm": 1.9615373145463573, "learning_rate": 3.1912390908597407e-06, "loss": 0.4868, "step": 8868 }, { "epoch": 0.63, "grad_norm": 2.560988701924626, "learning_rate": 3.1901677998192276e-06, "loss": 0.5307, "step": 8869 }, { "epoch": 0.63, "grad_norm": 1.6477806114724451, "learning_rate": 3.189096604380184e-06, "loss": 0.5499, "step": 8870 }, { "epoch": 0.63, "grad_norm": 1.6299894975318883, "learning_rate": 3.188025504599194e-06, "loss": 0.5225, "step": 8871 }, { "epoch": 0.63, "grad_norm": 1.7495043542644595, "learning_rate": 3.1869545005328352e-06, "loss": 0.4393, "step": 8872 }, { "epoch": 0.63, "grad_norm": 2.334187180576593, "learning_rate": 3.1858835922376843e-06, "loss": 0.5552, "step": 8873 }, { "epoch": 0.63, "grad_norm": 1.88676101440517, "learning_rate": 3.184812779770308e-06, "loss": 0.5329, "step": 8874 }, { "epoch": 0.63, "grad_norm": 1.843975804988695, "learning_rate": 3.1837420631872704e-06, "loss": 0.5103, "step": 8875 }, { "epoch": 0.63, "grad_norm": 1.538061725992317, "learning_rate": 3.1826714425451343e-06, "loss": 0.4815, "step": 8876 }, { "epoch": 0.63, "grad_norm": 2.0041269282416097, "learning_rate": 3.1816009179004502e-06, "loss": 0.563, "step": 8877 }, { "epoch": 0.63, "grad_norm": 1.638735922471186, "learning_rate": 3.1805304893097666e-06, "loss": 0.5191, "step": 8878 }, { "epoch": 0.63, "grad_norm": 0.6791779937912032, "learning_rate": 3.179460156829626e-06, "loss": 0.4364, "step": 8879 }, { "epoch": 0.63, "grad_norm": 1.9983680865835123, "learning_rate": 3.1783899205165713e-06, "loss": 0.5043, "step": 8880 }, { "epoch": 0.63, "grad_norm": 1.9992011112974084, "learning_rate": 3.1773197804271314e-06, "loss": 0.5856, "step": 8881 }, { "epoch": 0.63, "grad_norm": 1.6819113025305745, "learning_rate": 3.176249736617839e-06, "loss": 0.5095, "step": 8882 }, { "epoch": 0.63, "grad_norm": 1.665053879346903, "learning_rate": 3.1751797891452123e-06, "loss": 0.518, "step": 8883 }, { "epoch": 0.63, "grad_norm": 3.2013840883739455, "learning_rate": 3.1741099380657747e-06, "loss": 0.5234, "step": 8884 }, { "epoch": 0.63, "grad_norm": 1.620405058088708, "learning_rate": 3.1730401834360374e-06, "loss": 0.4847, "step": 8885 }, { "epoch": 0.63, "grad_norm": 0.7094216013713207, "learning_rate": 3.1719705253125077e-06, "loss": 0.4432, "step": 8886 }, { "epoch": 0.63, "grad_norm": 1.951322121397613, "learning_rate": 3.1709009637516873e-06, "loss": 0.5012, "step": 8887 }, { "epoch": 0.63, "grad_norm": 1.9668804443441663, "learning_rate": 3.169831498810079e-06, "loss": 0.5914, "step": 8888 }, { "epoch": 0.63, "grad_norm": 2.266091473315016, "learning_rate": 3.1687621305441697e-06, "loss": 0.5139, "step": 8889 }, { "epoch": 0.63, "grad_norm": 1.550962411101854, "learning_rate": 3.1676928590104496e-06, "loss": 0.5016, "step": 8890 }, { "epoch": 0.63, "grad_norm": 1.6751889401452973, "learning_rate": 3.166623684265403e-06, "loss": 0.5255, "step": 8891 }, { "epoch": 0.63, "grad_norm": 1.5534663213507676, "learning_rate": 3.165554606365506e-06, "loss": 0.4674, "step": 8892 }, { "epoch": 0.63, "grad_norm": 1.842189164651779, "learning_rate": 3.16448562536723e-06, "loss": 0.5953, "step": 8893 }, { "epoch": 0.63, "grad_norm": 1.9416804793643982, "learning_rate": 3.1634167413270412e-06, "loss": 0.5485, "step": 8894 }, { "epoch": 0.63, "grad_norm": 1.7330105137932992, "learning_rate": 3.1623479543014055e-06, "loss": 0.4611, "step": 8895 }, { "epoch": 0.63, "grad_norm": 1.5590603585473128, "learning_rate": 3.1612792643467773e-06, "loss": 0.5889, "step": 8896 }, { "epoch": 0.63, "grad_norm": 0.7665545622398862, "learning_rate": 3.160210671519609e-06, "loss": 0.4403, "step": 8897 }, { "epoch": 0.63, "grad_norm": 1.500678656679247, "learning_rate": 3.159142175876345e-06, "loss": 0.4917, "step": 8898 }, { "epoch": 0.63, "grad_norm": 1.6802750171088427, "learning_rate": 3.1580737774734312e-06, "loss": 0.529, "step": 8899 }, { "epoch": 0.63, "grad_norm": 1.8622255711741527, "learning_rate": 3.1570054763673014e-06, "loss": 0.4951, "step": 8900 }, { "epoch": 0.63, "grad_norm": 1.7471254122887967, "learning_rate": 3.155937272614388e-06, "loss": 0.4983, "step": 8901 }, { "epoch": 0.63, "grad_norm": 1.769529339941457, "learning_rate": 3.154869166271114e-06, "loss": 0.5864, "step": 8902 }, { "epoch": 0.63, "grad_norm": 1.7349614858583104, "learning_rate": 3.153801157393903e-06, "loss": 0.5808, "step": 8903 }, { "epoch": 0.63, "grad_norm": 2.0771345307694147, "learning_rate": 3.1527332460391723e-06, "loss": 0.5127, "step": 8904 }, { "epoch": 0.63, "grad_norm": 1.6641947466980234, "learning_rate": 3.1516654322633296e-06, "loss": 0.5366, "step": 8905 }, { "epoch": 0.63, "grad_norm": 1.806735590087179, "learning_rate": 3.150597716122783e-06, "loss": 0.442, "step": 8906 }, { "epoch": 0.63, "grad_norm": 1.527521586032973, "learning_rate": 3.149530097673932e-06, "loss": 0.4992, "step": 8907 }, { "epoch": 0.63, "grad_norm": 2.318084612396937, "learning_rate": 3.148462576973171e-06, "loss": 0.6277, "step": 8908 }, { "epoch": 0.63, "grad_norm": 1.7658537467646718, "learning_rate": 3.147395154076889e-06, "loss": 0.5158, "step": 8909 }, { "epoch": 0.63, "grad_norm": 1.5660830161559902, "learning_rate": 3.1463278290414744e-06, "loss": 0.5913, "step": 8910 }, { "epoch": 0.63, "grad_norm": 1.7433083625846246, "learning_rate": 3.1452606019233043e-06, "loss": 0.5596, "step": 8911 }, { "epoch": 0.63, "grad_norm": 1.6464210485947475, "learning_rate": 3.144193472778755e-06, "loss": 0.5145, "step": 8912 }, { "epoch": 0.63, "grad_norm": 1.7672273031569048, "learning_rate": 3.1431264416641917e-06, "loss": 0.5041, "step": 8913 }, { "epoch": 0.63, "grad_norm": 1.927528887344882, "learning_rate": 3.1420595086359846e-06, "loss": 0.5102, "step": 8914 }, { "epoch": 0.63, "grad_norm": 1.8530578782897877, "learning_rate": 3.140992673750489e-06, "loss": 0.5448, "step": 8915 }, { "epoch": 0.63, "grad_norm": 2.44020890865194, "learning_rate": 3.1399259370640613e-06, "loss": 0.5214, "step": 8916 }, { "epoch": 0.63, "grad_norm": 2.3836434513752036, "learning_rate": 3.1388592986330446e-06, "loss": 0.5054, "step": 8917 }, { "epoch": 0.63, "grad_norm": 1.579434860162173, "learning_rate": 3.1377927585137878e-06, "loss": 0.4978, "step": 8918 }, { "epoch": 0.63, "grad_norm": 1.690735797584824, "learning_rate": 3.1367263167626284e-06, "loss": 0.5, "step": 8919 }, { "epoch": 0.63, "grad_norm": 0.8695682063603708, "learning_rate": 3.1356599734358966e-06, "loss": 0.4106, "step": 8920 }, { "epoch": 0.63, "grad_norm": 0.722244603056578, "learning_rate": 3.1345937285899243e-06, "loss": 0.4486, "step": 8921 }, { "epoch": 0.63, "grad_norm": 1.9619560491423496, "learning_rate": 3.1335275822810324e-06, "loss": 0.6416, "step": 8922 }, { "epoch": 0.63, "grad_norm": 1.710842740818859, "learning_rate": 3.1324615345655373e-06, "loss": 0.5248, "step": 8923 }, { "epoch": 0.63, "grad_norm": 1.6252746356188683, "learning_rate": 3.1313955854997513e-06, "loss": 0.5121, "step": 8924 }, { "epoch": 0.63, "grad_norm": 1.687793358503084, "learning_rate": 3.130329735139983e-06, "loss": 0.5735, "step": 8925 }, { "epoch": 0.63, "grad_norm": 1.7429534324367386, "learning_rate": 3.1292639835425332e-06, "loss": 0.5528, "step": 8926 }, { "epoch": 0.63, "grad_norm": 0.7702870025998363, "learning_rate": 3.128198330763699e-06, "loss": 0.4394, "step": 8927 }, { "epoch": 0.63, "grad_norm": 0.6812226705363837, "learning_rate": 3.1271327768597698e-06, "loss": 0.426, "step": 8928 }, { "epoch": 0.63, "grad_norm": 2.2456042845526794, "learning_rate": 3.1260673218870353e-06, "loss": 0.6127, "step": 8929 }, { "epoch": 0.63, "grad_norm": 0.7303013344599943, "learning_rate": 3.125001965901775e-06, "loss": 0.4396, "step": 8930 }, { "epoch": 0.63, "grad_norm": 1.7340934159895796, "learning_rate": 3.123936708960263e-06, "loss": 0.5415, "step": 8931 }, { "epoch": 0.63, "grad_norm": 0.662558498377235, "learning_rate": 3.1228715511187714e-06, "loss": 0.4002, "step": 8932 }, { "epoch": 0.63, "grad_norm": 1.5364259541301655, "learning_rate": 3.1218064924335646e-06, "loss": 0.5262, "step": 8933 }, { "epoch": 0.63, "grad_norm": 1.8361651620544046, "learning_rate": 3.1207415329609037e-06, "loss": 0.5593, "step": 8934 }, { "epoch": 0.63, "grad_norm": 1.7229413234691433, "learning_rate": 3.1196766727570434e-06, "loss": 0.5017, "step": 8935 }, { "epoch": 0.63, "grad_norm": 1.643490426496726, "learning_rate": 3.1186119118782302e-06, "loss": 0.5033, "step": 8936 }, { "epoch": 0.63, "grad_norm": 1.8508625392548246, "learning_rate": 3.1175472503807137e-06, "loss": 0.4603, "step": 8937 }, { "epoch": 0.63, "grad_norm": 0.7552256825313363, "learning_rate": 3.116482688320729e-06, "loss": 0.4485, "step": 8938 }, { "epoch": 0.63, "grad_norm": 2.4254989796420166, "learning_rate": 3.1154182257545096e-06, "loss": 0.5346, "step": 8939 }, { "epoch": 0.63, "grad_norm": 2.0161091641160183, "learning_rate": 3.1143538627382874e-06, "loss": 0.4816, "step": 8940 }, { "epoch": 0.63, "grad_norm": 1.6689575397794592, "learning_rate": 3.113289599328282e-06, "loss": 0.5671, "step": 8941 }, { "epoch": 0.63, "grad_norm": 1.4895506104009335, "learning_rate": 3.112225435580715e-06, "loss": 0.4922, "step": 8942 }, { "epoch": 0.63, "grad_norm": 2.187744079640872, "learning_rate": 3.1111613715517942e-06, "loss": 0.538, "step": 8943 }, { "epoch": 0.63, "grad_norm": 2.5462907169642666, "learning_rate": 3.1100974072977318e-06, "loss": 0.5156, "step": 8944 }, { "epoch": 0.63, "grad_norm": 1.845530126571093, "learning_rate": 3.1090335428747285e-06, "loss": 0.6331, "step": 8945 }, { "epoch": 0.63, "grad_norm": 1.4531816171061094, "learning_rate": 3.1079697783389805e-06, "loss": 0.5085, "step": 8946 }, { "epoch": 0.63, "grad_norm": 1.7627754213614966, "learning_rate": 3.1069061137466793e-06, "loss": 0.523, "step": 8947 }, { "epoch": 0.63, "grad_norm": 1.6940541628125119, "learning_rate": 3.1058425491540122e-06, "loss": 0.601, "step": 8948 }, { "epoch": 0.64, "grad_norm": 1.6252235166048519, "learning_rate": 3.104779084617161e-06, "loss": 0.5619, "step": 8949 }, { "epoch": 0.64, "grad_norm": 1.4251883678560178, "learning_rate": 3.103715720192301e-06, "loss": 0.5169, "step": 8950 }, { "epoch": 0.64, "grad_norm": 1.7181893959908747, "learning_rate": 3.1026524559355987e-06, "loss": 0.5178, "step": 8951 }, { "epoch": 0.64, "grad_norm": 1.9390467742937576, "learning_rate": 3.1015892919032254e-06, "loss": 0.4664, "step": 8952 }, { "epoch": 0.64, "grad_norm": 1.9509674180105803, "learning_rate": 3.1005262281513386e-06, "loss": 0.5182, "step": 8953 }, { "epoch": 0.64, "grad_norm": 0.7384615622694864, "learning_rate": 3.099463264736091e-06, "loss": 0.4156, "step": 8954 }, { "epoch": 0.64, "grad_norm": 0.7070443343052686, "learning_rate": 3.0984004017136337e-06, "loss": 0.4349, "step": 8955 }, { "epoch": 0.64, "grad_norm": 1.8771750034289651, "learning_rate": 3.0973376391401126e-06, "loss": 0.541, "step": 8956 }, { "epoch": 0.64, "grad_norm": 2.57841370124129, "learning_rate": 3.096274977071664e-06, "loss": 0.5679, "step": 8957 }, { "epoch": 0.64, "grad_norm": 1.6884188962354933, "learning_rate": 3.0952124155644203e-06, "loss": 0.5035, "step": 8958 }, { "epoch": 0.64, "grad_norm": 1.6587398979679187, "learning_rate": 3.0941499546745126e-06, "loss": 0.5841, "step": 8959 }, { "epoch": 0.64, "grad_norm": 1.7775281231798017, "learning_rate": 3.0930875944580625e-06, "loss": 0.535, "step": 8960 }, { "epoch": 0.64, "grad_norm": 1.7811868407511644, "learning_rate": 3.092025334971186e-06, "loss": 0.5115, "step": 8961 }, { "epoch": 0.64, "grad_norm": 1.653756080545026, "learning_rate": 3.0909631762699964e-06, "loss": 0.5201, "step": 8962 }, { "epoch": 0.64, "grad_norm": 3.2755082440010823, "learning_rate": 3.089901118410601e-06, "loss": 0.5391, "step": 8963 }, { "epoch": 0.64, "grad_norm": 2.2276009967103927, "learning_rate": 3.0888391614491007e-06, "loss": 0.4306, "step": 8964 }, { "epoch": 0.64, "grad_norm": 2.3844859022038385, "learning_rate": 3.0877773054415926e-06, "loss": 0.5468, "step": 8965 }, { "epoch": 0.64, "grad_norm": 1.7211002851153443, "learning_rate": 3.086715550444165e-06, "loss": 0.5459, "step": 8966 }, { "epoch": 0.64, "grad_norm": 1.7512123749301147, "learning_rate": 3.085653896512907e-06, "loss": 0.5151, "step": 8967 }, { "epoch": 0.64, "grad_norm": 1.9759594116036368, "learning_rate": 3.084592343703896e-06, "loss": 0.474, "step": 8968 }, { "epoch": 0.64, "grad_norm": 2.6295821319593657, "learning_rate": 3.0835308920732066e-06, "loss": 0.5635, "step": 8969 }, { "epoch": 0.64, "grad_norm": 1.5607213238082143, "learning_rate": 3.0824695416769103e-06, "loss": 0.5169, "step": 8970 }, { "epoch": 0.64, "grad_norm": 1.9165661191766092, "learning_rate": 3.0814082925710707e-06, "loss": 0.5446, "step": 8971 }, { "epoch": 0.64, "grad_norm": 1.5078281834408467, "learning_rate": 3.0803471448117463e-06, "loss": 0.4601, "step": 8972 }, { "epoch": 0.64, "grad_norm": 1.5565400254731439, "learning_rate": 3.079286098454989e-06, "loss": 0.5055, "step": 8973 }, { "epoch": 0.64, "grad_norm": 1.8914347168093333, "learning_rate": 3.0782251535568497e-06, "loss": 0.5559, "step": 8974 }, { "epoch": 0.64, "grad_norm": 2.6175778328971906, "learning_rate": 3.0771643101733693e-06, "loss": 0.4821, "step": 8975 }, { "epoch": 0.64, "grad_norm": 1.9366468399465062, "learning_rate": 3.0761035683605856e-06, "loss": 0.5257, "step": 8976 }, { "epoch": 0.64, "grad_norm": 1.5093825999652826, "learning_rate": 3.07504292817453e-06, "loss": 0.4941, "step": 8977 }, { "epoch": 0.64, "grad_norm": 2.0646063815978413, "learning_rate": 3.073982389671229e-06, "loss": 0.5383, "step": 8978 }, { "epoch": 0.64, "grad_norm": 1.6709794191958434, "learning_rate": 3.0729219529067054e-06, "loss": 0.5181, "step": 8979 }, { "epoch": 0.64, "grad_norm": 2.204822145819431, "learning_rate": 3.0718616179369753e-06, "loss": 0.5259, "step": 8980 }, { "epoch": 0.64, "grad_norm": 1.718157047057334, "learning_rate": 3.070801384818045e-06, "loss": 0.6182, "step": 8981 }, { "epoch": 0.64, "grad_norm": 1.9887835947764605, "learning_rate": 3.0697412536059247e-06, "loss": 0.5476, "step": 8982 }, { "epoch": 0.64, "grad_norm": 2.070915319817309, "learning_rate": 3.068681224356611e-06, "loss": 0.5209, "step": 8983 }, { "epoch": 0.64, "grad_norm": 1.7181723813366638, "learning_rate": 3.0676212971261005e-06, "loss": 0.5447, "step": 8984 }, { "epoch": 0.64, "grad_norm": 1.6827030494131703, "learning_rate": 3.0665614719703784e-06, "loss": 0.5518, "step": 8985 }, { "epoch": 0.64, "grad_norm": 1.831661068486478, "learning_rate": 3.065501748945433e-06, "loss": 0.51, "step": 8986 }, { "epoch": 0.64, "grad_norm": 1.6337148593472135, "learning_rate": 3.06444212810724e-06, "loss": 0.5386, "step": 8987 }, { "epoch": 0.64, "grad_norm": 1.5813227706191602, "learning_rate": 3.0633826095117706e-06, "loss": 0.5406, "step": 8988 }, { "epoch": 0.64, "grad_norm": 2.1932359688386516, "learning_rate": 3.0623231932149965e-06, "loss": 0.5644, "step": 8989 }, { "epoch": 0.64, "grad_norm": 1.7428902645358257, "learning_rate": 3.061263879272876e-06, "loss": 0.5291, "step": 8990 }, { "epoch": 0.64, "grad_norm": 1.7700574596017273, "learning_rate": 3.0602046677413665e-06, "loss": 0.565, "step": 8991 }, { "epoch": 0.64, "grad_norm": 1.7164610893366252, "learning_rate": 3.0591455586764184e-06, "loss": 0.5253, "step": 8992 }, { "epoch": 0.64, "grad_norm": 1.5969970085297904, "learning_rate": 3.0580865521339798e-06, "loss": 0.5189, "step": 8993 }, { "epoch": 0.64, "grad_norm": 1.7642723986050934, "learning_rate": 3.0570276481699894e-06, "loss": 0.4651, "step": 8994 }, { "epoch": 0.64, "grad_norm": 1.8216632960188424, "learning_rate": 3.0559688468403832e-06, "loss": 0.5742, "step": 8995 }, { "epoch": 0.64, "grad_norm": 1.7593466046499908, "learning_rate": 3.0549101482010875e-06, "loss": 0.4593, "step": 8996 }, { "epoch": 0.64, "grad_norm": 2.334780926670625, "learning_rate": 3.0538515523080304e-06, "loss": 0.5869, "step": 8997 }, { "epoch": 0.64, "grad_norm": 1.7412048491676766, "learning_rate": 3.0527930592171273e-06, "loss": 0.52, "step": 8998 }, { "epoch": 0.64, "grad_norm": 3.360786745051587, "learning_rate": 3.0517346689842954e-06, "loss": 0.5212, "step": 8999 }, { "epoch": 0.64, "grad_norm": 1.9498373447263968, "learning_rate": 3.050676381665436e-06, "loss": 0.5335, "step": 9000 }, { "epoch": 0.64, "grad_norm": 3.0231406272000036, "learning_rate": 3.0496181973164585e-06, "loss": 0.5132, "step": 9001 }, { "epoch": 0.64, "grad_norm": 1.6223038221992754, "learning_rate": 3.0485601159932564e-06, "loss": 0.4632, "step": 9002 }, { "epoch": 0.64, "grad_norm": 2.0774166598415933, "learning_rate": 3.0475021377517194e-06, "loss": 0.5308, "step": 9003 }, { "epoch": 0.64, "grad_norm": 1.804274296587741, "learning_rate": 3.0464442626477375e-06, "loss": 0.5249, "step": 9004 }, { "epoch": 0.64, "grad_norm": 5.224036488826843, "learning_rate": 3.045386490737189e-06, "loss": 0.5614, "step": 9005 }, { "epoch": 0.64, "grad_norm": 1.7330671546615168, "learning_rate": 3.0443288220759483e-06, "loss": 0.5106, "step": 9006 }, { "epoch": 0.64, "grad_norm": 1.3608814447332673, "learning_rate": 3.0432712567198853e-06, "loss": 0.5254, "step": 9007 }, { "epoch": 0.64, "grad_norm": 1.739900962941544, "learning_rate": 3.0422137947248664e-06, "loss": 0.5083, "step": 9008 }, { "epoch": 0.64, "grad_norm": 1.6103585829932647, "learning_rate": 3.041156436146748e-06, "loss": 0.5576, "step": 9009 }, { "epoch": 0.64, "grad_norm": 1.9489929267542763, "learning_rate": 3.0400991810413854e-06, "loss": 0.5016, "step": 9010 }, { "epoch": 0.64, "grad_norm": 1.9113859502231836, "learning_rate": 3.039042029464623e-06, "loss": 0.6077, "step": 9011 }, { "epoch": 0.64, "grad_norm": 1.9074311765185656, "learning_rate": 3.037984981472306e-06, "loss": 0.6075, "step": 9012 }, { "epoch": 0.64, "grad_norm": 1.9576645624315303, "learning_rate": 3.0369280371202703e-06, "loss": 0.5731, "step": 9013 }, { "epoch": 0.64, "grad_norm": 1.7547931859123291, "learning_rate": 3.035871196464349e-06, "loss": 0.511, "step": 9014 }, { "epoch": 0.64, "grad_norm": 1.8598804493695826, "learning_rate": 3.0348144595603633e-06, "loss": 0.4743, "step": 9015 }, { "epoch": 0.64, "grad_norm": 2.076158254351888, "learning_rate": 3.03375782646414e-06, "loss": 0.5624, "step": 9016 }, { "epoch": 0.64, "grad_norm": 2.226056441228961, "learning_rate": 3.032701297231491e-06, "loss": 0.5487, "step": 9017 }, { "epoch": 0.64, "grad_norm": 11.457833471430083, "learning_rate": 3.0316448719182224e-06, "loss": 0.4868, "step": 9018 }, { "epoch": 0.64, "grad_norm": 0.6385500546649843, "learning_rate": 3.0305885505801435e-06, "loss": 0.4492, "step": 9019 }, { "epoch": 0.64, "grad_norm": 1.868736026433916, "learning_rate": 3.0295323332730515e-06, "loss": 0.4521, "step": 9020 }, { "epoch": 0.64, "grad_norm": 1.8023401071464142, "learning_rate": 3.0284762200527367e-06, "loss": 0.5003, "step": 9021 }, { "epoch": 0.64, "grad_norm": 1.567993440844698, "learning_rate": 3.0274202109749877e-06, "loss": 0.5843, "step": 9022 }, { "epoch": 0.64, "grad_norm": 2.806802473816881, "learning_rate": 3.026364306095589e-06, "loss": 0.566, "step": 9023 }, { "epoch": 0.64, "grad_norm": 2.00575768960431, "learning_rate": 3.025308505470316e-06, "loss": 0.5335, "step": 9024 }, { "epoch": 0.64, "grad_norm": 1.7151981093860453, "learning_rate": 3.0242528091549382e-06, "loss": 0.5807, "step": 9025 }, { "epoch": 0.64, "grad_norm": 2.089195224507916, "learning_rate": 3.0231972172052197e-06, "loss": 0.5204, "step": 9026 }, { "epoch": 0.64, "grad_norm": 1.8700469760304583, "learning_rate": 3.022141729676925e-06, "loss": 0.5099, "step": 9027 }, { "epoch": 0.64, "grad_norm": 1.8665526656034335, "learning_rate": 3.0210863466258044e-06, "loss": 0.5261, "step": 9028 }, { "epoch": 0.64, "grad_norm": 3.716833263355458, "learning_rate": 3.02003106810761e-06, "loss": 0.4974, "step": 9029 }, { "epoch": 0.64, "grad_norm": 1.7682277602689471, "learning_rate": 3.0189758941780817e-06, "loss": 0.5134, "step": 9030 }, { "epoch": 0.64, "grad_norm": 1.6311762848280693, "learning_rate": 3.017920824892961e-06, "loss": 0.5123, "step": 9031 }, { "epoch": 0.64, "grad_norm": 1.7266939899941267, "learning_rate": 3.0168658603079783e-06, "loss": 0.5335, "step": 9032 }, { "epoch": 0.64, "grad_norm": 1.7577503912681252, "learning_rate": 3.0158110004788587e-06, "loss": 0.5517, "step": 9033 }, { "epoch": 0.64, "grad_norm": 1.6407520330276324, "learning_rate": 3.014756245461328e-06, "loss": 0.4596, "step": 9034 }, { "epoch": 0.64, "grad_norm": 1.4538438690941864, "learning_rate": 3.0137015953110983e-06, "loss": 0.4635, "step": 9035 }, { "epoch": 0.64, "grad_norm": 1.8951192232699503, "learning_rate": 3.012647050083881e-06, "loss": 0.5148, "step": 9036 }, { "epoch": 0.64, "grad_norm": 1.8117284127773448, "learning_rate": 3.0115926098353793e-06, "loss": 0.5801, "step": 9037 }, { "epoch": 0.64, "grad_norm": 1.4575865000496393, "learning_rate": 3.0105382746212952e-06, "loss": 0.4873, "step": 9038 }, { "epoch": 0.64, "grad_norm": 0.7553313428722288, "learning_rate": 3.0094840444973204e-06, "loss": 0.4104, "step": 9039 }, { "epoch": 0.64, "grad_norm": 1.8000695540517964, "learning_rate": 3.008429919519144e-06, "loss": 0.5552, "step": 9040 }, { "epoch": 0.64, "grad_norm": 1.895191584639015, "learning_rate": 3.0073758997424453e-06, "loss": 0.5635, "step": 9041 }, { "epoch": 0.64, "grad_norm": 1.8818604761723605, "learning_rate": 3.006321985222905e-06, "loss": 0.6052, "step": 9042 }, { "epoch": 0.64, "grad_norm": 1.8496861388750736, "learning_rate": 3.0052681760161927e-06, "loss": 0.5825, "step": 9043 }, { "epoch": 0.64, "grad_norm": 1.5870882257801573, "learning_rate": 3.0042144721779755e-06, "loss": 0.5418, "step": 9044 }, { "epoch": 0.64, "grad_norm": 2.0263551709210055, "learning_rate": 3.0031608737639095e-06, "loss": 0.5948, "step": 9045 }, { "epoch": 0.64, "grad_norm": 2.039966107566047, "learning_rate": 3.0021073808296553e-06, "loss": 0.5343, "step": 9046 }, { "epoch": 0.64, "grad_norm": 1.7071790207949384, "learning_rate": 3.001053993430858e-06, "loss": 0.5026, "step": 9047 }, { "epoch": 0.64, "grad_norm": 1.7980572366895888, "learning_rate": 3.000000711623163e-06, "loss": 0.5443, "step": 9048 }, { "epoch": 0.64, "grad_norm": 1.6923195763210657, "learning_rate": 2.9989475354622055e-06, "loss": 0.4672, "step": 9049 }, { "epoch": 0.64, "grad_norm": 1.5492367449603235, "learning_rate": 2.99789446500362e-06, "loss": 0.5636, "step": 9050 }, { "epoch": 0.64, "grad_norm": 2.0782130955323086, "learning_rate": 2.9968415003030337e-06, "loss": 0.4544, "step": 9051 }, { "epoch": 0.64, "grad_norm": 1.6544206657704241, "learning_rate": 2.995788641416066e-06, "loss": 0.5569, "step": 9052 }, { "epoch": 0.64, "grad_norm": 1.629938069184111, "learning_rate": 2.9947358883983347e-06, "loss": 0.6035, "step": 9053 }, { "epoch": 0.64, "grad_norm": 1.5900035672666304, "learning_rate": 2.993683241305449e-06, "loss": 0.4943, "step": 9054 }, { "epoch": 0.64, "grad_norm": 1.8533380503704442, "learning_rate": 2.992630700193013e-06, "loss": 0.5193, "step": 9055 }, { "epoch": 0.64, "grad_norm": 1.558344642984947, "learning_rate": 2.9915782651166226e-06, "loss": 0.5678, "step": 9056 }, { "epoch": 0.64, "grad_norm": 1.7541849406246062, "learning_rate": 2.9905259361318763e-06, "loss": 0.527, "step": 9057 }, { "epoch": 0.64, "grad_norm": 3.1358498860846757, "learning_rate": 2.9894737132943573e-06, "loss": 0.5396, "step": 9058 }, { "epoch": 0.64, "grad_norm": 1.8263316312287419, "learning_rate": 2.9884215966596507e-06, "loss": 0.4962, "step": 9059 }, { "epoch": 0.64, "grad_norm": 1.8879636877729697, "learning_rate": 2.9873695862833295e-06, "loss": 0.5866, "step": 9060 }, { "epoch": 0.64, "grad_norm": 1.847644580086368, "learning_rate": 2.9863176822209694e-06, "loss": 0.4814, "step": 9061 }, { "epoch": 0.64, "grad_norm": 1.8482484330841524, "learning_rate": 2.9852658845281313e-06, "loss": 0.5301, "step": 9062 }, { "epoch": 0.64, "grad_norm": 1.9578317251517725, "learning_rate": 2.9842141932603757e-06, "loss": 0.5788, "step": 9063 }, { "epoch": 0.64, "grad_norm": 1.569155942427024, "learning_rate": 2.9831626084732568e-06, "loss": 0.5508, "step": 9064 }, { "epoch": 0.64, "grad_norm": 1.8271862358497726, "learning_rate": 2.9821111302223226e-06, "loss": 0.5029, "step": 9065 }, { "epoch": 0.64, "grad_norm": 1.5600804808284479, "learning_rate": 2.9810597585631173e-06, "loss": 0.5151, "step": 9066 }, { "epoch": 0.64, "grad_norm": 0.6756596923441516, "learning_rate": 2.9800084935511746e-06, "loss": 0.4326, "step": 9067 }, { "epoch": 0.64, "grad_norm": 2.5780330666130133, "learning_rate": 2.9789573352420296e-06, "loss": 0.5105, "step": 9068 }, { "epoch": 0.64, "grad_norm": 0.8320275511792663, "learning_rate": 2.9779062836912076e-06, "loss": 0.4016, "step": 9069 }, { "epoch": 0.64, "grad_norm": 2.0167229087392036, "learning_rate": 2.9768553389542265e-06, "loss": 0.5684, "step": 9070 }, { "epoch": 0.64, "grad_norm": 1.541893262516551, "learning_rate": 2.9758045010866e-06, "loss": 0.483, "step": 9071 }, { "epoch": 0.64, "grad_norm": 1.7421225648676588, "learning_rate": 2.97475377014384e-06, "loss": 0.5071, "step": 9072 }, { "epoch": 0.64, "grad_norm": 1.8991158542986202, "learning_rate": 2.973703146181448e-06, "loss": 0.4399, "step": 9073 }, { "epoch": 0.64, "grad_norm": 2.144857953910054, "learning_rate": 2.972652629254923e-06, "loss": 0.5633, "step": 9074 }, { "epoch": 0.64, "grad_norm": 1.7474322907095547, "learning_rate": 2.971602219419753e-06, "loss": 0.5155, "step": 9075 }, { "epoch": 0.64, "grad_norm": 1.7751174624702097, "learning_rate": 2.9705519167314295e-06, "loss": 0.5042, "step": 9076 }, { "epoch": 0.64, "grad_norm": 2.5255934537175073, "learning_rate": 2.9695017212454302e-06, "loss": 0.5445, "step": 9077 }, { "epoch": 0.64, "grad_norm": 2.0735033758027996, "learning_rate": 2.9684516330172297e-06, "loss": 0.5243, "step": 9078 }, { "epoch": 0.64, "grad_norm": 0.690645719944104, "learning_rate": 2.9674016521022977e-06, "loss": 0.4294, "step": 9079 }, { "epoch": 0.64, "grad_norm": 1.649333469110579, "learning_rate": 2.966351778556097e-06, "loss": 0.5599, "step": 9080 }, { "epoch": 0.64, "grad_norm": 2.3392982829962583, "learning_rate": 2.965302012434088e-06, "loss": 0.5871, "step": 9081 }, { "epoch": 0.64, "grad_norm": 1.6322904357154613, "learning_rate": 2.964252353791719e-06, "loss": 0.5243, "step": 9082 }, { "epoch": 0.64, "grad_norm": 1.7649422780164459, "learning_rate": 2.963202802684441e-06, "loss": 0.5239, "step": 9083 }, { "epoch": 0.64, "grad_norm": 1.5702511444579155, "learning_rate": 2.9621533591676923e-06, "loss": 0.4729, "step": 9084 }, { "epoch": 0.64, "grad_norm": 1.7699795599402075, "learning_rate": 2.961104023296908e-06, "loss": 0.5228, "step": 9085 }, { "epoch": 0.64, "grad_norm": 2.328969849088684, "learning_rate": 2.960054795127516e-06, "loss": 0.5985, "step": 9086 }, { "epoch": 0.64, "grad_norm": 5.435355368118025, "learning_rate": 2.9590056747149437e-06, "loss": 0.4739, "step": 9087 }, { "epoch": 0.64, "grad_norm": 2.04534983820134, "learning_rate": 2.9579566621146082e-06, "loss": 0.5339, "step": 9088 }, { "epoch": 0.64, "grad_norm": 1.6358878890054194, "learning_rate": 2.9569077573819205e-06, "loss": 0.4516, "step": 9089 }, { "epoch": 0.65, "grad_norm": 1.559303470986315, "learning_rate": 2.955858960572287e-06, "loss": 0.5035, "step": 9090 }, { "epoch": 0.65, "grad_norm": 1.525706501303829, "learning_rate": 2.954810271741111e-06, "loss": 0.5241, "step": 9091 }, { "epoch": 0.65, "grad_norm": 1.595887534947597, "learning_rate": 2.953761690943787e-06, "loss": 0.4874, "step": 9092 }, { "epoch": 0.65, "grad_norm": 1.84480894344978, "learning_rate": 2.9527132182357034e-06, "loss": 0.6127, "step": 9093 }, { "epoch": 0.65, "grad_norm": 0.724954841055202, "learning_rate": 2.951664853672244e-06, "loss": 0.4192, "step": 9094 }, { "epoch": 0.65, "grad_norm": 1.5072061096539031, "learning_rate": 2.9506165973087873e-06, "loss": 0.5501, "step": 9095 }, { "epoch": 0.65, "grad_norm": 1.6408571725687078, "learning_rate": 2.949568449200708e-06, "loss": 0.5552, "step": 9096 }, { "epoch": 0.65, "grad_norm": 1.7545752499715042, "learning_rate": 2.9485204094033694e-06, "loss": 0.4826, "step": 9097 }, { "epoch": 0.65, "grad_norm": 0.750689301902596, "learning_rate": 2.9474724779721352e-06, "loss": 0.4283, "step": 9098 }, { "epoch": 0.65, "grad_norm": 1.7694538005454794, "learning_rate": 2.94642465496236e-06, "loss": 0.603, "step": 9099 }, { "epoch": 0.65, "grad_norm": 1.4383253830582385, "learning_rate": 2.945376940429393e-06, "loss": 0.5595, "step": 9100 }, { "epoch": 0.65, "grad_norm": 7.653869215502521, "learning_rate": 2.944329334428576e-06, "loss": 0.5508, "step": 9101 }, { "epoch": 0.65, "grad_norm": 1.431629562731777, "learning_rate": 2.9432818370152503e-06, "loss": 0.4798, "step": 9102 }, { "epoch": 0.65, "grad_norm": 1.8359598722880233, "learning_rate": 2.942234448244748e-06, "loss": 0.5177, "step": 9103 }, { "epoch": 0.65, "grad_norm": 1.6642943830379398, "learning_rate": 2.941187168172395e-06, "loss": 0.5697, "step": 9104 }, { "epoch": 0.65, "grad_norm": 0.681232889719987, "learning_rate": 2.940139996853509e-06, "loss": 0.4369, "step": 9105 }, { "epoch": 0.65, "grad_norm": 1.572636934299552, "learning_rate": 2.9390929343434117e-06, "loss": 0.5915, "step": 9106 }, { "epoch": 0.65, "grad_norm": 1.7111222481926083, "learning_rate": 2.9380459806974075e-06, "loss": 0.5807, "step": 9107 }, { "epoch": 0.65, "grad_norm": 1.7582873850297525, "learning_rate": 2.936999135970801e-06, "loss": 0.5603, "step": 9108 }, { "epoch": 0.65, "grad_norm": 1.6930541640976577, "learning_rate": 2.935952400218891e-06, "loss": 0.5686, "step": 9109 }, { "epoch": 0.65, "grad_norm": 1.3512936299481089, "learning_rate": 2.9349057734969688e-06, "loss": 0.4582, "step": 9110 }, { "epoch": 0.65, "grad_norm": 1.3979439085817935, "learning_rate": 2.9338592558603217e-06, "loss": 0.5344, "step": 9111 }, { "epoch": 0.65, "grad_norm": 1.7966918616772511, "learning_rate": 2.9328128473642303e-06, "loss": 0.5925, "step": 9112 }, { "epoch": 0.65, "grad_norm": 2.018210215146075, "learning_rate": 2.931766548063967e-06, "loss": 0.4942, "step": 9113 }, { "epoch": 0.65, "grad_norm": 1.433107988509724, "learning_rate": 2.9307203580148037e-06, "loss": 0.4576, "step": 9114 }, { "epoch": 0.65, "grad_norm": 1.8698281153158276, "learning_rate": 2.929674277272003e-06, "loss": 0.5283, "step": 9115 }, { "epoch": 0.65, "grad_norm": 1.4812923824188158, "learning_rate": 2.9286283058908215e-06, "loss": 0.5165, "step": 9116 }, { "epoch": 0.65, "grad_norm": 1.798572885710306, "learning_rate": 2.9275824439265123e-06, "loss": 0.5586, "step": 9117 }, { "epoch": 0.65, "grad_norm": 1.663242449610458, "learning_rate": 2.926536691434321e-06, "loss": 0.5867, "step": 9118 }, { "epoch": 0.65, "grad_norm": 0.7797944645711377, "learning_rate": 2.925491048469488e-06, "loss": 0.4359, "step": 9119 }, { "epoch": 0.65, "grad_norm": 1.8509222277392656, "learning_rate": 2.9244455150872448e-06, "loss": 0.5094, "step": 9120 }, { "epoch": 0.65, "grad_norm": 5.298498877267779, "learning_rate": 2.9234000913428246e-06, "loss": 0.4629, "step": 9121 }, { "epoch": 0.65, "grad_norm": 1.889587237086541, "learning_rate": 2.9223547772914483e-06, "loss": 0.5122, "step": 9122 }, { "epoch": 0.65, "grad_norm": 2.6626645448575164, "learning_rate": 2.921309572988331e-06, "loss": 0.5966, "step": 9123 }, { "epoch": 0.65, "grad_norm": 0.6936885797313508, "learning_rate": 2.9202644784886863e-06, "loss": 0.4191, "step": 9124 }, { "epoch": 0.65, "grad_norm": 1.7698025310630925, "learning_rate": 2.9192194938477197e-06, "loss": 0.561, "step": 9125 }, { "epoch": 0.65, "grad_norm": 1.742444055050693, "learning_rate": 2.9181746191206296e-06, "loss": 0.5386, "step": 9126 }, { "epoch": 0.65, "grad_norm": 1.635750401286632, "learning_rate": 2.9171298543626094e-06, "loss": 0.5009, "step": 9127 }, { "epoch": 0.65, "grad_norm": 2.184473278677268, "learning_rate": 2.916085199628849e-06, "loss": 0.5437, "step": 9128 }, { "epoch": 0.65, "grad_norm": 0.6767015162089856, "learning_rate": 2.915040654974529e-06, "loss": 0.4251, "step": 9129 }, { "epoch": 0.65, "grad_norm": 1.830246567476984, "learning_rate": 2.913996220454828e-06, "loss": 0.5229, "step": 9130 }, { "epoch": 0.65, "grad_norm": 1.9968279851940836, "learning_rate": 2.9129518961249114e-06, "loss": 0.5292, "step": 9131 }, { "epoch": 0.65, "grad_norm": 1.7084140894745679, "learning_rate": 2.9119076820399517e-06, "loss": 0.5249, "step": 9132 }, { "epoch": 0.65, "grad_norm": 1.511815267251824, "learning_rate": 2.910863578255102e-06, "loss": 0.5401, "step": 9133 }, { "epoch": 0.65, "grad_norm": 4.356168419984041, "learning_rate": 2.909819584825516e-06, "loss": 0.5326, "step": 9134 }, { "epoch": 0.65, "grad_norm": 2.0771701803836335, "learning_rate": 2.9087757018063434e-06, "loss": 0.5129, "step": 9135 }, { "epoch": 0.65, "grad_norm": 2.183058695978997, "learning_rate": 2.9077319292527235e-06, "loss": 0.5835, "step": 9136 }, { "epoch": 0.65, "grad_norm": 2.245600146769842, "learning_rate": 2.9066882672197944e-06, "loss": 0.5677, "step": 9137 }, { "epoch": 0.65, "grad_norm": 1.5172333535787021, "learning_rate": 2.9056447157626823e-06, "loss": 0.4495, "step": 9138 }, { "epoch": 0.65, "grad_norm": 1.8752268297469012, "learning_rate": 2.9046012749365134e-06, "loss": 0.606, "step": 9139 }, { "epoch": 0.65, "grad_norm": 1.8421887234326304, "learning_rate": 2.9035579447964045e-06, "loss": 0.514, "step": 9140 }, { "epoch": 0.65, "grad_norm": 2.5955653759184454, "learning_rate": 2.9025147253974695e-06, "loss": 0.4884, "step": 9141 }, { "epoch": 0.65, "grad_norm": 1.8218979729146112, "learning_rate": 2.9014716167948155e-06, "loss": 0.522, "step": 9142 }, { "epoch": 0.65, "grad_norm": 2.008799245319336, "learning_rate": 2.9004286190435383e-06, "loss": 0.5012, "step": 9143 }, { "epoch": 0.65, "grad_norm": 1.5598532865449968, "learning_rate": 2.8993857321987385e-06, "loss": 0.4545, "step": 9144 }, { "epoch": 0.65, "grad_norm": 2.1187891226444515, "learning_rate": 2.898342956315501e-06, "loss": 0.528, "step": 9145 }, { "epoch": 0.65, "grad_norm": 1.920936293937471, "learning_rate": 2.8973002914489098e-06, "loss": 0.5367, "step": 9146 }, { "epoch": 0.65, "grad_norm": 1.7215885793665529, "learning_rate": 2.896257737654042e-06, "loss": 0.5563, "step": 9147 }, { "epoch": 0.65, "grad_norm": 1.9995339251368327, "learning_rate": 2.8952152949859714e-06, "loss": 0.5626, "step": 9148 }, { "epoch": 0.65, "grad_norm": 1.6887871743280758, "learning_rate": 2.8941729634997584e-06, "loss": 0.5606, "step": 9149 }, { "epoch": 0.65, "grad_norm": 1.688499528366517, "learning_rate": 2.893130743250465e-06, "loss": 0.5573, "step": 9150 }, { "epoch": 0.65, "grad_norm": 1.8053165038463148, "learning_rate": 2.8920886342931444e-06, "loss": 0.4619, "step": 9151 }, { "epoch": 0.65, "grad_norm": 2.3806881003346683, "learning_rate": 2.891046636682845e-06, "loss": 0.5589, "step": 9152 }, { "epoch": 0.65, "grad_norm": 1.902122019322406, "learning_rate": 2.890004750474611e-06, "loss": 0.4429, "step": 9153 }, { "epoch": 0.65, "grad_norm": 1.898635296583468, "learning_rate": 2.888962975723471e-06, "loss": 0.5956, "step": 9154 }, { "epoch": 0.65, "grad_norm": 1.7721280205285528, "learning_rate": 2.887921312484464e-06, "loss": 0.5188, "step": 9155 }, { "epoch": 0.65, "grad_norm": 1.6007764780313938, "learning_rate": 2.886879760812608e-06, "loss": 0.4761, "step": 9156 }, { "epoch": 0.65, "grad_norm": 0.7995164646770508, "learning_rate": 2.885838320762924e-06, "loss": 0.4519, "step": 9157 }, { "epoch": 0.65, "grad_norm": 1.901669090604936, "learning_rate": 2.8847969923904225e-06, "loss": 0.5013, "step": 9158 }, { "epoch": 0.65, "grad_norm": 1.9099657929034635, "learning_rate": 2.883755775750111e-06, "loss": 0.5669, "step": 9159 }, { "epoch": 0.65, "grad_norm": 1.9110806431936591, "learning_rate": 2.8827146708969935e-06, "loss": 0.5458, "step": 9160 }, { "epoch": 0.65, "grad_norm": 1.9182891986418684, "learning_rate": 2.8816736778860566e-06, "loss": 0.4935, "step": 9161 }, { "epoch": 0.65, "grad_norm": 1.5203921105583955, "learning_rate": 2.880632796772298e-06, "loss": 0.5041, "step": 9162 }, { "epoch": 0.65, "grad_norm": 1.599742054897293, "learning_rate": 2.879592027610695e-06, "loss": 0.5305, "step": 9163 }, { "epoch": 0.65, "grad_norm": 1.9902824940569694, "learning_rate": 2.878551370456225e-06, "loss": 0.5162, "step": 9164 }, { "epoch": 0.65, "grad_norm": 1.5565396103595257, "learning_rate": 2.8775108253638605e-06, "loss": 0.5403, "step": 9165 }, { "epoch": 0.65, "grad_norm": 2.0790674560027878, "learning_rate": 2.8764703923885663e-06, "loss": 0.5624, "step": 9166 }, { "epoch": 0.65, "grad_norm": 1.8666482826241881, "learning_rate": 2.8754300715853035e-06, "loss": 0.5956, "step": 9167 }, { "epoch": 0.65, "grad_norm": 1.6450497955015766, "learning_rate": 2.874389863009022e-06, "loss": 0.5377, "step": 9168 }, { "epoch": 0.65, "grad_norm": 3.649811266278659, "learning_rate": 2.873349766714669e-06, "loss": 0.5647, "step": 9169 }, { "epoch": 0.65, "grad_norm": 1.7870294154573634, "learning_rate": 2.8723097827571887e-06, "loss": 0.5458, "step": 9170 }, { "epoch": 0.65, "grad_norm": 1.6636373270478908, "learning_rate": 2.8712699111915143e-06, "loss": 0.5217, "step": 9171 }, { "epoch": 0.65, "grad_norm": 0.7341497383219123, "learning_rate": 2.870230152072579e-06, "loss": 0.4261, "step": 9172 }, { "epoch": 0.65, "grad_norm": 4.0114796908380335, "learning_rate": 2.8691905054552994e-06, "loss": 0.4799, "step": 9173 }, { "epoch": 0.65, "grad_norm": 1.810177702667246, "learning_rate": 2.8681509713946014e-06, "loss": 0.5328, "step": 9174 }, { "epoch": 0.65, "grad_norm": 1.6226085653214999, "learning_rate": 2.867111549945391e-06, "loss": 0.4702, "step": 9175 }, { "epoch": 0.65, "grad_norm": 2.4067844495067865, "learning_rate": 2.8660722411625778e-06, "loss": 0.6179, "step": 9176 }, { "epoch": 0.65, "grad_norm": 0.7355554226232269, "learning_rate": 2.8650330451010557e-06, "loss": 0.427, "step": 9177 }, { "epoch": 0.65, "grad_norm": 1.7938501530899296, "learning_rate": 2.863993961815726e-06, "loss": 0.538, "step": 9178 }, { "epoch": 0.65, "grad_norm": 1.6962142045092483, "learning_rate": 2.862954991361471e-06, "loss": 0.49, "step": 9179 }, { "epoch": 0.65, "grad_norm": 2.309038951121496, "learning_rate": 2.861916133793173e-06, "loss": 0.5223, "step": 9180 }, { "epoch": 0.65, "grad_norm": 1.9297972013347495, "learning_rate": 2.860877389165714e-06, "loss": 0.5564, "step": 9181 }, { "epoch": 0.65, "grad_norm": 1.7934832808001466, "learning_rate": 2.8598387575339564e-06, "loss": 0.5292, "step": 9182 }, { "epoch": 0.65, "grad_norm": 1.7300594203134563, "learning_rate": 2.85880023895277e-06, "loss": 0.4593, "step": 9183 }, { "epoch": 0.65, "grad_norm": 1.8250566388660086, "learning_rate": 2.857761833477007e-06, "loss": 0.4788, "step": 9184 }, { "epoch": 0.65, "grad_norm": 2.587926903641795, "learning_rate": 2.8567235411615265e-06, "loss": 0.5377, "step": 9185 }, { "epoch": 0.65, "grad_norm": 2.076744444427647, "learning_rate": 2.8556853620611686e-06, "loss": 0.4886, "step": 9186 }, { "epoch": 0.65, "grad_norm": 1.8902720984402064, "learning_rate": 2.854647296230776e-06, "loss": 0.5257, "step": 9187 }, { "epoch": 0.65, "grad_norm": 1.7051974394668763, "learning_rate": 2.853609343725183e-06, "loss": 0.4931, "step": 9188 }, { "epoch": 0.65, "grad_norm": 2.4742301361047168, "learning_rate": 2.852571504599217e-06, "loss": 0.5173, "step": 9189 }, { "epoch": 0.65, "grad_norm": 1.6856630772661272, "learning_rate": 2.8515337789077026e-06, "loss": 0.5099, "step": 9190 }, { "epoch": 0.65, "grad_norm": 5.268095978659321, "learning_rate": 2.850496166705452e-06, "loss": 0.4601, "step": 9191 }, { "epoch": 0.65, "grad_norm": 3.41932900636466, "learning_rate": 2.849458668047278e-06, "loss": 0.5099, "step": 9192 }, { "epoch": 0.65, "grad_norm": 1.8355156684447664, "learning_rate": 2.848421282987983e-06, "loss": 0.4698, "step": 9193 }, { "epoch": 0.65, "grad_norm": 1.7160351651580157, "learning_rate": 2.8473840115823675e-06, "loss": 0.5697, "step": 9194 }, { "epoch": 0.65, "grad_norm": 1.69471406297245, "learning_rate": 2.8463468538852225e-06, "loss": 0.5311, "step": 9195 }, { "epoch": 0.65, "grad_norm": 1.9812985813261297, "learning_rate": 2.845309809951333e-06, "loss": 0.5336, "step": 9196 }, { "epoch": 0.65, "grad_norm": 1.7245171239765553, "learning_rate": 2.8442728798354837e-06, "loss": 0.5125, "step": 9197 }, { "epoch": 0.65, "grad_norm": 2.171097880203168, "learning_rate": 2.8432360635924434e-06, "loss": 0.5737, "step": 9198 }, { "epoch": 0.65, "grad_norm": 1.9059421697353383, "learning_rate": 2.8421993612769817e-06, "loss": 0.4865, "step": 9199 }, { "epoch": 0.65, "grad_norm": 1.775668768457437, "learning_rate": 2.8411627729438616e-06, "loss": 0.4951, "step": 9200 }, { "epoch": 0.65, "grad_norm": 1.6478315936015098, "learning_rate": 2.8401262986478396e-06, "loss": 0.4924, "step": 9201 }, { "epoch": 0.65, "grad_norm": 1.846348492591966, "learning_rate": 2.8390899384436667e-06, "loss": 0.5, "step": 9202 }, { "epoch": 0.65, "grad_norm": 1.752893292083521, "learning_rate": 2.838053692386081e-06, "loss": 0.4914, "step": 9203 }, { "epoch": 0.65, "grad_norm": 1.6927451075127622, "learning_rate": 2.8370175605298296e-06, "loss": 0.5579, "step": 9204 }, { "epoch": 0.65, "grad_norm": 1.7841354076107514, "learning_rate": 2.835981542929638e-06, "loss": 0.5686, "step": 9205 }, { "epoch": 0.65, "grad_norm": 1.7380021488273631, "learning_rate": 2.834945639640235e-06, "loss": 0.5407, "step": 9206 }, { "epoch": 0.65, "grad_norm": 2.3258914948572307, "learning_rate": 2.833909850716336e-06, "loss": 0.4976, "step": 9207 }, { "epoch": 0.65, "grad_norm": 1.8730161499635014, "learning_rate": 2.8328741762126607e-06, "loss": 0.5397, "step": 9208 }, { "epoch": 0.65, "grad_norm": 1.7299205883915294, "learning_rate": 2.8318386161839163e-06, "loss": 0.5241, "step": 9209 }, { "epoch": 0.65, "grad_norm": 1.6845094122112925, "learning_rate": 2.8308031706847994e-06, "loss": 0.4841, "step": 9210 }, { "epoch": 0.65, "grad_norm": 1.5731515300190162, "learning_rate": 2.829767839770012e-06, "loss": 0.5358, "step": 9211 }, { "epoch": 0.65, "grad_norm": 1.5947759187815131, "learning_rate": 2.8287326234942403e-06, "loss": 0.5638, "step": 9212 }, { "epoch": 0.65, "grad_norm": 0.8152013611848146, "learning_rate": 2.8276975219121705e-06, "loss": 0.4498, "step": 9213 }, { "epoch": 0.65, "grad_norm": 1.633436745150401, "learning_rate": 2.826662535078474e-06, "loss": 0.5167, "step": 9214 }, { "epoch": 0.65, "grad_norm": 3.0306683179296225, "learning_rate": 2.825627663047831e-06, "loss": 0.5616, "step": 9215 }, { "epoch": 0.65, "grad_norm": 0.6581517902793548, "learning_rate": 2.8245929058749004e-06, "loss": 0.4238, "step": 9216 }, { "epoch": 0.65, "grad_norm": 2.3908531170067024, "learning_rate": 2.823558263614344e-06, "loss": 0.5086, "step": 9217 }, { "epoch": 0.65, "grad_norm": 1.7372537977656668, "learning_rate": 2.8225237363208148e-06, "loss": 0.5054, "step": 9218 }, { "epoch": 0.65, "grad_norm": 1.6977616383481866, "learning_rate": 2.82148932404896e-06, "loss": 0.5042, "step": 9219 }, { "epoch": 0.65, "grad_norm": 1.9320238236481364, "learning_rate": 2.820455026853423e-06, "loss": 0.6162, "step": 9220 }, { "epoch": 0.65, "grad_norm": 1.6694785564525654, "learning_rate": 2.819420844788835e-06, "loss": 0.5954, "step": 9221 }, { "epoch": 0.65, "grad_norm": 1.7541670210174858, "learning_rate": 2.8183867779098262e-06, "loss": 0.515, "step": 9222 }, { "epoch": 0.65, "grad_norm": 1.734812944385469, "learning_rate": 2.817352826271021e-06, "loss": 0.493, "step": 9223 }, { "epoch": 0.65, "grad_norm": 1.9359575841068561, "learning_rate": 2.816318989927035e-06, "loss": 0.4903, "step": 9224 }, { "epoch": 0.65, "grad_norm": 1.7424749149611731, "learning_rate": 2.8152852689324807e-06, "loss": 0.4928, "step": 9225 }, { "epoch": 0.65, "grad_norm": 1.9194836941090903, "learning_rate": 2.814251663341958e-06, "loss": 0.4957, "step": 9226 }, { "epoch": 0.65, "grad_norm": 1.8364464075597358, "learning_rate": 2.813218173210072e-06, "loss": 0.5366, "step": 9227 }, { "epoch": 0.65, "grad_norm": 2.170775762020465, "learning_rate": 2.8121847985914108e-06, "loss": 0.6025, "step": 9228 }, { "epoch": 0.65, "grad_norm": 1.7052730735353285, "learning_rate": 2.8111515395405615e-06, "loss": 0.5543, "step": 9229 }, { "epoch": 0.65, "grad_norm": 1.792400223004841, "learning_rate": 2.8101183961121046e-06, "loss": 0.5497, "step": 9230 }, { "epoch": 0.66, "grad_norm": 1.4631512126909236, "learning_rate": 2.8090853683606143e-06, "loss": 0.5033, "step": 9231 }, { "epoch": 0.66, "grad_norm": 1.9356325420576315, "learning_rate": 2.8080524563406607e-06, "loss": 0.5572, "step": 9232 }, { "epoch": 0.66, "grad_norm": 1.7417291538556863, "learning_rate": 2.8070196601068e-06, "loss": 0.4954, "step": 9233 }, { "epoch": 0.66, "grad_norm": 1.826998556068702, "learning_rate": 2.805986979713596e-06, "loss": 0.6107, "step": 9234 }, { "epoch": 0.66, "grad_norm": 1.7365963405401088, "learning_rate": 2.8049544152155912e-06, "loss": 0.4931, "step": 9235 }, { "epoch": 0.66, "grad_norm": 1.7476107889090398, "learning_rate": 2.803921966667332e-06, "loss": 0.5037, "step": 9236 }, { "epoch": 0.66, "grad_norm": 1.7902239574645038, "learning_rate": 2.802889634123357e-06, "loss": 0.5386, "step": 9237 }, { "epoch": 0.66, "grad_norm": 1.8428493787010312, "learning_rate": 2.801857417638195e-06, "loss": 0.4752, "step": 9238 }, { "epoch": 0.66, "grad_norm": 1.4974175025634846, "learning_rate": 2.800825317266376e-06, "loss": 0.4931, "step": 9239 }, { "epoch": 0.66, "grad_norm": 1.9900314212309922, "learning_rate": 2.7997933330624116e-06, "loss": 0.5013, "step": 9240 }, { "epoch": 0.66, "grad_norm": 1.832737043728728, "learning_rate": 2.7987614650808206e-06, "loss": 0.5972, "step": 9241 }, { "epoch": 0.66, "grad_norm": 1.6351588349335633, "learning_rate": 2.797729713376106e-06, "loss": 0.514, "step": 9242 }, { "epoch": 0.66, "grad_norm": 1.6632863927196342, "learning_rate": 2.7966980780027735e-06, "loss": 0.4879, "step": 9243 }, { "epoch": 0.66, "grad_norm": 1.5743530225730007, "learning_rate": 2.7956665590153102e-06, "loss": 0.4898, "step": 9244 }, { "epoch": 0.66, "grad_norm": 1.7619056937780793, "learning_rate": 2.7946351564682105e-06, "loss": 0.4859, "step": 9245 }, { "epoch": 0.66, "grad_norm": 1.9704870364714537, "learning_rate": 2.7936038704159564e-06, "loss": 0.5519, "step": 9246 }, { "epoch": 0.66, "grad_norm": 1.8182461740118228, "learning_rate": 2.7925727009130204e-06, "loss": 0.4593, "step": 9247 }, { "epoch": 0.66, "grad_norm": 1.8414737536759742, "learning_rate": 2.791541648013874e-06, "loss": 0.4474, "step": 9248 }, { "epoch": 0.66, "grad_norm": 0.6468683569543854, "learning_rate": 2.7905107117729813e-06, "loss": 0.418, "step": 9249 }, { "epoch": 0.66, "grad_norm": 1.5676782973967742, "learning_rate": 2.7894798922448006e-06, "loss": 0.5688, "step": 9250 }, { "epoch": 0.66, "grad_norm": 1.8083141886729648, "learning_rate": 2.788449189483782e-06, "loss": 0.5697, "step": 9251 }, { "epoch": 0.66, "grad_norm": 2.1331555849335153, "learning_rate": 2.7874186035443695e-06, "loss": 0.5697, "step": 9252 }, { "epoch": 0.66, "grad_norm": 1.4770902294896449, "learning_rate": 2.786388134481004e-06, "loss": 0.4777, "step": 9253 }, { "epoch": 0.66, "grad_norm": 0.711291720105949, "learning_rate": 2.7853577823481183e-06, "loss": 0.4181, "step": 9254 }, { "epoch": 0.66, "grad_norm": 1.9038756772823697, "learning_rate": 2.78432754720014e-06, "loss": 0.6013, "step": 9255 }, { "epoch": 0.66, "grad_norm": 0.6938828438640092, "learning_rate": 2.783297429091485e-06, "loss": 0.4236, "step": 9256 }, { "epoch": 0.66, "grad_norm": 1.7582971766812825, "learning_rate": 2.7822674280765748e-06, "loss": 0.4507, "step": 9257 }, { "epoch": 0.66, "grad_norm": 1.572472733266068, "learning_rate": 2.7812375442098117e-06, "loss": 0.435, "step": 9258 }, { "epoch": 0.66, "grad_norm": 2.115866685954038, "learning_rate": 2.7802077775456e-06, "loss": 0.5994, "step": 9259 }, { "epoch": 0.66, "grad_norm": 0.7813877281360558, "learning_rate": 2.779178128138335e-06, "loss": 0.4074, "step": 9260 }, { "epoch": 0.66, "grad_norm": 1.8277438162168547, "learning_rate": 2.7781485960424065e-06, "loss": 0.4952, "step": 9261 }, { "epoch": 0.66, "grad_norm": 1.713533068768173, "learning_rate": 2.7771191813122e-06, "loss": 0.5953, "step": 9262 }, { "epoch": 0.66, "grad_norm": 1.642190276930909, "learning_rate": 2.776089884002086e-06, "loss": 0.5313, "step": 9263 }, { "epoch": 0.66, "grad_norm": 1.6950645395268544, "learning_rate": 2.7750607041664446e-06, "loss": 0.5733, "step": 9264 }, { "epoch": 0.66, "grad_norm": 1.8139080282852396, "learning_rate": 2.7740316418596338e-06, "loss": 0.5558, "step": 9265 }, { "epoch": 0.66, "grad_norm": 1.6721247076595647, "learning_rate": 2.7730026971360135e-06, "loss": 0.5896, "step": 9266 }, { "epoch": 0.66, "grad_norm": 1.6511017127903675, "learning_rate": 2.771973870049938e-06, "loss": 0.481, "step": 9267 }, { "epoch": 0.66, "grad_norm": 1.9303518769716375, "learning_rate": 2.7709451606557512e-06, "loss": 0.5597, "step": 9268 }, { "epoch": 0.66, "grad_norm": 1.8906158271664704, "learning_rate": 2.7699165690077967e-06, "loss": 0.4665, "step": 9269 }, { "epoch": 0.66, "grad_norm": 1.4990544713190823, "learning_rate": 2.768888095160403e-06, "loss": 0.5209, "step": 9270 }, { "epoch": 0.66, "grad_norm": 1.8522669652814832, "learning_rate": 2.7678597391679005e-06, "loss": 0.4965, "step": 9271 }, { "epoch": 0.66, "grad_norm": 2.2846770621647994, "learning_rate": 2.7668315010846103e-06, "loss": 0.5321, "step": 9272 }, { "epoch": 0.66, "grad_norm": 2.0553969485821004, "learning_rate": 2.765803380964847e-06, "loss": 0.51, "step": 9273 }, { "epoch": 0.66, "grad_norm": 2.260029541259063, "learning_rate": 2.7647753788629195e-06, "loss": 0.5643, "step": 9274 }, { "epoch": 0.66, "grad_norm": 1.723609956085677, "learning_rate": 2.763747494833131e-06, "loss": 0.5034, "step": 9275 }, { "epoch": 0.66, "grad_norm": 1.9499304426733872, "learning_rate": 2.762719728929779e-06, "loss": 0.5285, "step": 9276 }, { "epoch": 0.66, "grad_norm": 1.8133685210442814, "learning_rate": 2.7616920812071497e-06, "loss": 0.5507, "step": 9277 }, { "epoch": 0.66, "grad_norm": 1.5803550019868808, "learning_rate": 2.760664551719529e-06, "loss": 0.5437, "step": 9278 }, { "epoch": 0.66, "grad_norm": 2.1062895399459682, "learning_rate": 2.7596371405211953e-06, "loss": 0.4909, "step": 9279 }, { "epoch": 0.66, "grad_norm": 2.212459526923453, "learning_rate": 2.75860984766642e-06, "loss": 0.5346, "step": 9280 }, { "epoch": 0.66, "grad_norm": 1.6612916414737877, "learning_rate": 2.7575826732094664e-06, "loss": 0.5078, "step": 9281 }, { "epoch": 0.66, "grad_norm": 1.7995843503628777, "learning_rate": 2.756555617204594e-06, "loss": 0.5555, "step": 9282 }, { "epoch": 0.66, "grad_norm": 2.109575929476647, "learning_rate": 2.7555286797060554e-06, "loss": 0.5772, "step": 9283 }, { "epoch": 0.66, "grad_norm": 0.7994388654430824, "learning_rate": 2.754501860768098e-06, "loss": 0.4452, "step": 9284 }, { "epoch": 0.66, "grad_norm": 2.075919769946844, "learning_rate": 2.7534751604449626e-06, "loss": 0.5676, "step": 9285 }, { "epoch": 0.66, "grad_norm": 1.756488185789471, "learning_rate": 2.752448578790878e-06, "loss": 0.5328, "step": 9286 }, { "epoch": 0.66, "grad_norm": 1.944593874313746, "learning_rate": 2.7514221158600784e-06, "loss": 0.5183, "step": 9287 }, { "epoch": 0.66, "grad_norm": 1.525239231754289, "learning_rate": 2.7503957717067808e-06, "loss": 0.4971, "step": 9288 }, { "epoch": 0.66, "grad_norm": 2.417064782487489, "learning_rate": 2.7493695463852007e-06, "loss": 0.4959, "step": 9289 }, { "epoch": 0.66, "grad_norm": 1.686435627184048, "learning_rate": 2.748343439949547e-06, "loss": 0.5723, "step": 9290 }, { "epoch": 0.66, "grad_norm": 1.6502132597590549, "learning_rate": 2.747317452454023e-06, "loss": 0.506, "step": 9291 }, { "epoch": 0.66, "grad_norm": 2.7640331740878117, "learning_rate": 2.746291583952826e-06, "loss": 0.5243, "step": 9292 }, { "epoch": 0.66, "grad_norm": 1.6205110126490547, "learning_rate": 2.7452658345001405e-06, "loss": 0.5256, "step": 9293 }, { "epoch": 0.66, "grad_norm": 1.4636302116446764, "learning_rate": 2.744240204150157e-06, "loss": 0.5714, "step": 9294 }, { "epoch": 0.66, "grad_norm": 1.7797221685885345, "learning_rate": 2.743214692957048e-06, "loss": 0.6032, "step": 9295 }, { "epoch": 0.66, "grad_norm": 1.4560745076320603, "learning_rate": 2.742189300974986e-06, "loss": 0.5685, "step": 9296 }, { "epoch": 0.66, "grad_norm": 1.789475038177261, "learning_rate": 2.7411640282581355e-06, "loss": 0.5125, "step": 9297 }, { "epoch": 0.66, "grad_norm": 1.7508253438300656, "learning_rate": 2.740138874860655e-06, "loss": 0.5704, "step": 9298 }, { "epoch": 0.66, "grad_norm": 1.8482259702939492, "learning_rate": 2.739113840836699e-06, "loss": 0.5827, "step": 9299 }, { "epoch": 0.66, "grad_norm": 1.5229068750500316, "learning_rate": 2.7380889262404082e-06, "loss": 0.5205, "step": 9300 }, { "epoch": 0.66, "grad_norm": 1.874173951215821, "learning_rate": 2.737064131125925e-06, "loss": 0.5666, "step": 9301 }, { "epoch": 0.66, "grad_norm": 2.0662724707522493, "learning_rate": 2.7360394555473826e-06, "loss": 0.5832, "step": 9302 }, { "epoch": 0.66, "grad_norm": 2.067854630452239, "learning_rate": 2.7350148995589067e-06, "loss": 0.5867, "step": 9303 }, { "epoch": 0.66, "grad_norm": 1.911388843056198, "learning_rate": 2.7339904632146206e-06, "loss": 0.5259, "step": 9304 }, { "epoch": 0.66, "grad_norm": 2.0834183385996417, "learning_rate": 2.7329661465686337e-06, "loss": 0.4833, "step": 9305 }, { "epoch": 0.66, "grad_norm": 2.0845536322063714, "learning_rate": 2.73194194967506e-06, "loss": 0.4826, "step": 9306 }, { "epoch": 0.66, "grad_norm": 1.8636226624950012, "learning_rate": 2.730917872587996e-06, "loss": 0.5509, "step": 9307 }, { "epoch": 0.66, "grad_norm": 0.6800080943890687, "learning_rate": 2.7298939153615378e-06, "loss": 0.4178, "step": 9308 }, { "epoch": 0.66, "grad_norm": 1.8532234980719897, "learning_rate": 2.728870078049776e-06, "loss": 0.5124, "step": 9309 }, { "epoch": 0.66, "grad_norm": 1.96553799181879, "learning_rate": 2.727846360706794e-06, "loss": 0.5035, "step": 9310 }, { "epoch": 0.66, "grad_norm": 1.8353900574617938, "learning_rate": 2.726822763386664e-06, "loss": 0.5928, "step": 9311 }, { "epoch": 0.66, "grad_norm": 1.8941871909253338, "learning_rate": 2.725799286143457e-06, "loss": 0.569, "step": 9312 }, { "epoch": 0.66, "grad_norm": 1.83019971316929, "learning_rate": 2.7247759290312415e-06, "loss": 0.5392, "step": 9313 }, { "epoch": 0.66, "grad_norm": 1.5965189125401191, "learning_rate": 2.7237526921040686e-06, "loss": 0.5529, "step": 9314 }, { "epoch": 0.66, "grad_norm": 2.0681856101171383, "learning_rate": 2.722729575415994e-06, "loss": 0.5251, "step": 9315 }, { "epoch": 0.66, "grad_norm": 1.625842698559353, "learning_rate": 2.721706579021055e-06, "loss": 0.5025, "step": 9316 }, { "epoch": 0.66, "grad_norm": 1.9939530437945896, "learning_rate": 2.720683702973299e-06, "loss": 0.5313, "step": 9317 }, { "epoch": 0.66, "grad_norm": 1.5258914555045733, "learning_rate": 2.719660947326751e-06, "loss": 0.5107, "step": 9318 }, { "epoch": 0.66, "grad_norm": 1.5693644908111686, "learning_rate": 2.7186383121354386e-06, "loss": 0.4954, "step": 9319 }, { "epoch": 0.66, "grad_norm": 2.1519957647444548, "learning_rate": 2.717615797453381e-06, "loss": 0.5348, "step": 9320 }, { "epoch": 0.66, "grad_norm": 1.7871406512818238, "learning_rate": 2.71659340333459e-06, "loss": 0.5397, "step": 9321 }, { "epoch": 0.66, "grad_norm": 1.6815042267837494, "learning_rate": 2.7155711298330756e-06, "loss": 0.5023, "step": 9322 }, { "epoch": 0.66, "grad_norm": 1.6826848128405918, "learning_rate": 2.7145489770028305e-06, "loss": 0.5285, "step": 9323 }, { "epoch": 0.66, "grad_norm": 1.7264827528897124, "learning_rate": 2.7135269448978565e-06, "loss": 0.4265, "step": 9324 }, { "epoch": 0.66, "grad_norm": 2.6764825799656937, "learning_rate": 2.712505033572135e-06, "loss": 0.4423, "step": 9325 }, { "epoch": 0.66, "grad_norm": 0.832978465528351, "learning_rate": 2.711483243079649e-06, "loss": 0.4564, "step": 9326 }, { "epoch": 0.66, "grad_norm": 1.8611549034013508, "learning_rate": 2.7104615734743723e-06, "loss": 0.5406, "step": 9327 }, { "epoch": 0.66, "grad_norm": 1.7117965995619921, "learning_rate": 2.709440024810273e-06, "loss": 0.5313, "step": 9328 }, { "epoch": 0.66, "grad_norm": 1.694823182443523, "learning_rate": 2.708418597141316e-06, "loss": 0.4875, "step": 9329 }, { "epoch": 0.66, "grad_norm": 1.722797912156979, "learning_rate": 2.7073972905214507e-06, "loss": 0.573, "step": 9330 }, { "epoch": 0.66, "grad_norm": 1.5285703612318093, "learning_rate": 2.7063761050046287e-06, "loss": 0.4905, "step": 9331 }, { "epoch": 0.66, "grad_norm": 0.7965520956129154, "learning_rate": 2.7053550406447936e-06, "loss": 0.4244, "step": 9332 }, { "epoch": 0.66, "grad_norm": 1.8732292708925156, "learning_rate": 2.70433409749588e-06, "loss": 0.5485, "step": 9333 }, { "epoch": 0.66, "grad_norm": 1.5521628933093334, "learning_rate": 2.703313275611821e-06, "loss": 0.5007, "step": 9334 }, { "epoch": 0.66, "grad_norm": 1.471029445740381, "learning_rate": 2.7022925750465324e-06, "loss": 0.5308, "step": 9335 }, { "epoch": 0.66, "grad_norm": 1.580520502223484, "learning_rate": 2.7012719958539397e-06, "loss": 0.5548, "step": 9336 }, { "epoch": 0.66, "grad_norm": 1.7484873422133842, "learning_rate": 2.700251538087947e-06, "loss": 0.5385, "step": 9337 }, { "epoch": 0.66, "grad_norm": 1.5348742822199164, "learning_rate": 2.699231201802461e-06, "loss": 0.4917, "step": 9338 }, { "epoch": 0.66, "grad_norm": 1.7893554788990624, "learning_rate": 2.698210987051379e-06, "loss": 0.5363, "step": 9339 }, { "epoch": 0.66, "grad_norm": 1.8525944779731645, "learning_rate": 2.6971908938885927e-06, "loss": 0.4514, "step": 9340 }, { "epoch": 0.66, "grad_norm": 1.8819884842467036, "learning_rate": 2.696170922367988e-06, "loss": 0.5048, "step": 9341 }, { "epoch": 0.66, "grad_norm": 1.6370273642950401, "learning_rate": 2.6951510725434373e-06, "loss": 0.5129, "step": 9342 }, { "epoch": 0.66, "grad_norm": 2.310070122054087, "learning_rate": 2.6941313444688204e-06, "loss": 0.5006, "step": 9343 }, { "epoch": 0.66, "grad_norm": 1.6524659510356103, "learning_rate": 2.6931117381979975e-06, "loss": 0.4788, "step": 9344 }, { "epoch": 0.66, "grad_norm": 2.0474619403926475, "learning_rate": 2.692092253784832e-06, "loss": 0.5272, "step": 9345 }, { "epoch": 0.66, "grad_norm": 1.6255214084003808, "learning_rate": 2.6910728912831694e-06, "loss": 0.5014, "step": 9346 }, { "epoch": 0.66, "grad_norm": 1.7867970489126048, "learning_rate": 2.690053650746865e-06, "loss": 0.5607, "step": 9347 }, { "epoch": 0.66, "grad_norm": 1.9125258318695169, "learning_rate": 2.6890345322297517e-06, "loss": 0.5444, "step": 9348 }, { "epoch": 0.66, "grad_norm": 1.7523316153532298, "learning_rate": 2.688015535785665e-06, "loss": 0.6388, "step": 9349 }, { "epoch": 0.66, "grad_norm": 2.0060448983972936, "learning_rate": 2.6869966614684316e-06, "loss": 0.4551, "step": 9350 }, { "epoch": 0.66, "grad_norm": 2.2568151691213374, "learning_rate": 2.685977909331872e-06, "loss": 0.5394, "step": 9351 }, { "epoch": 0.66, "grad_norm": 1.6814779094344485, "learning_rate": 2.684959279429802e-06, "loss": 0.5983, "step": 9352 }, { "epoch": 0.66, "grad_norm": 1.5371419736291012, "learning_rate": 2.683940771816026e-06, "loss": 0.5674, "step": 9353 }, { "epoch": 0.66, "grad_norm": 2.2540789291162486, "learning_rate": 2.682922386544346e-06, "loss": 0.5012, "step": 9354 }, { "epoch": 0.66, "grad_norm": 1.7525923367612886, "learning_rate": 2.6819041236685567e-06, "loss": 0.5251, "step": 9355 }, { "epoch": 0.66, "grad_norm": 1.7750315002687527, "learning_rate": 2.6808859832424468e-06, "loss": 0.5368, "step": 9356 }, { "epoch": 0.66, "grad_norm": 2.0989793739943554, "learning_rate": 2.6798679653197966e-06, "loss": 0.5852, "step": 9357 }, { "epoch": 0.66, "grad_norm": 0.6843911610005086, "learning_rate": 2.678850069954383e-06, "loss": 0.4249, "step": 9358 }, { "epoch": 0.66, "grad_norm": 1.9690874698783156, "learning_rate": 2.6778322971999758e-06, "loss": 0.5316, "step": 9359 }, { "epoch": 0.66, "grad_norm": 1.6356944238602544, "learning_rate": 2.676814647110333e-06, "loss": 0.5014, "step": 9360 }, { "epoch": 0.66, "grad_norm": 1.5107126430462452, "learning_rate": 2.6757971197392125e-06, "loss": 0.4957, "step": 9361 }, { "epoch": 0.66, "grad_norm": 1.5837786806537586, "learning_rate": 2.6747797151403635e-06, "loss": 0.5875, "step": 9362 }, { "epoch": 0.66, "grad_norm": 1.936127848414354, "learning_rate": 2.6737624333675294e-06, "loss": 0.5112, "step": 9363 }, { "epoch": 0.66, "grad_norm": 1.6101981232695142, "learning_rate": 2.6727452744744477e-06, "loss": 0.4812, "step": 9364 }, { "epoch": 0.66, "grad_norm": 2.567099088883363, "learning_rate": 2.671728238514842e-06, "loss": 0.5142, "step": 9365 }, { "epoch": 0.66, "grad_norm": 1.8245813232009847, "learning_rate": 2.6707113255424446e-06, "loss": 0.5393, "step": 9366 }, { "epoch": 0.66, "grad_norm": 1.8684000690400189, "learning_rate": 2.669694535610965e-06, "loss": 0.5721, "step": 9367 }, { "epoch": 0.66, "grad_norm": 1.7885652942674504, "learning_rate": 2.6686778687741167e-06, "loss": 0.5651, "step": 9368 }, { "epoch": 0.66, "grad_norm": 1.648879552695787, "learning_rate": 2.6676613250856034e-06, "loss": 0.4819, "step": 9369 }, { "epoch": 0.66, "grad_norm": 6.763955154289071, "learning_rate": 2.6666449045991207e-06, "loss": 0.4791, "step": 9370 }, { "epoch": 0.66, "grad_norm": 2.152722224370005, "learning_rate": 2.6656286073683637e-06, "loss": 0.439, "step": 9371 }, { "epoch": 0.67, "grad_norm": 1.9422984690133138, "learning_rate": 2.6646124334470093e-06, "loss": 0.5454, "step": 9372 }, { "epoch": 0.67, "grad_norm": 2.9126048547479577, "learning_rate": 2.6635963828887434e-06, "loss": 0.4539, "step": 9373 }, { "epoch": 0.67, "grad_norm": 1.4969257087155272, "learning_rate": 2.6625804557472314e-06, "loss": 0.5012, "step": 9374 }, { "epoch": 0.67, "grad_norm": 1.8691573805423851, "learning_rate": 2.6615646520761416e-06, "loss": 0.5924, "step": 9375 }, { "epoch": 0.67, "grad_norm": 1.707156075484976, "learning_rate": 2.660548971929127e-06, "loss": 0.5168, "step": 9376 }, { "epoch": 0.67, "grad_norm": 1.7506782915511374, "learning_rate": 2.659533415359845e-06, "loss": 0.5233, "step": 9377 }, { "epoch": 0.67, "grad_norm": 3.6306477722123436, "learning_rate": 2.6585179824219404e-06, "loss": 0.5731, "step": 9378 }, { "epoch": 0.67, "grad_norm": 1.5320841232970568, "learning_rate": 2.6575026731690477e-06, "loss": 0.5387, "step": 9379 }, { "epoch": 0.67, "grad_norm": 1.9250511403363815, "learning_rate": 2.6564874876548017e-06, "loss": 0.5901, "step": 9380 }, { "epoch": 0.67, "grad_norm": 2.0367193087793094, "learning_rate": 2.655472425932828e-06, "loss": 0.5915, "step": 9381 }, { "epoch": 0.67, "grad_norm": 1.7628790226768085, "learning_rate": 2.6544574880567462e-06, "loss": 0.5419, "step": 9382 }, { "epoch": 0.67, "grad_norm": 1.7755247051803598, "learning_rate": 2.6534426740801656e-06, "loss": 0.5662, "step": 9383 }, { "epoch": 0.67, "grad_norm": 1.9728866560893619, "learning_rate": 2.6524279840566945e-06, "loss": 0.5328, "step": 9384 }, { "epoch": 0.67, "grad_norm": 1.6309847498070371, "learning_rate": 2.651413418039932e-06, "loss": 0.4825, "step": 9385 }, { "epoch": 0.67, "grad_norm": 1.7754392779482857, "learning_rate": 2.6503989760834715e-06, "loss": 0.4626, "step": 9386 }, { "epoch": 0.67, "grad_norm": 1.5216796274296036, "learning_rate": 2.649384658240898e-06, "loss": 0.4962, "step": 9387 }, { "epoch": 0.67, "grad_norm": 2.483726980438515, "learning_rate": 2.6483704645657917e-06, "loss": 0.4461, "step": 9388 }, { "epoch": 0.67, "grad_norm": 1.972131420989064, "learning_rate": 2.6473563951117276e-06, "loss": 0.5476, "step": 9389 }, { "epoch": 0.67, "grad_norm": 1.6762656868956984, "learning_rate": 2.6463424499322687e-06, "loss": 0.5107, "step": 9390 }, { "epoch": 0.67, "grad_norm": 0.708621822418427, "learning_rate": 2.645328629080977e-06, "loss": 0.4317, "step": 9391 }, { "epoch": 0.67, "grad_norm": 2.165464534176344, "learning_rate": 2.644314932611406e-06, "loss": 0.4934, "step": 9392 }, { "epoch": 0.67, "grad_norm": 0.7542945465405825, "learning_rate": 2.643301360577102e-06, "loss": 0.4435, "step": 9393 }, { "epoch": 0.67, "grad_norm": 1.8020095306104078, "learning_rate": 2.642287913031608e-06, "loss": 0.4854, "step": 9394 }, { "epoch": 0.67, "grad_norm": 1.667704427705805, "learning_rate": 2.641274590028452e-06, "loss": 0.4727, "step": 9395 }, { "epoch": 0.67, "grad_norm": 1.5017919037242102, "learning_rate": 2.6402613916211684e-06, "loss": 0.5165, "step": 9396 }, { "epoch": 0.67, "grad_norm": 1.619905483132893, "learning_rate": 2.639248317863271e-06, "loss": 0.6024, "step": 9397 }, { "epoch": 0.67, "grad_norm": 2.0077043337989315, "learning_rate": 2.6382353688082778e-06, "loss": 0.4749, "step": 9398 }, { "epoch": 0.67, "grad_norm": 0.6955360646684536, "learning_rate": 2.6372225445096944e-06, "loss": 0.4215, "step": 9399 }, { "epoch": 0.67, "grad_norm": 1.9761185471217073, "learning_rate": 2.6362098450210232e-06, "loss": 0.5906, "step": 9400 }, { "epoch": 0.67, "grad_norm": 1.8402587661000904, "learning_rate": 2.635197270395759e-06, "loss": 0.5531, "step": 9401 }, { "epoch": 0.67, "grad_norm": 3.133315185894663, "learning_rate": 2.6341848206873843e-06, "loss": 0.5569, "step": 9402 }, { "epoch": 0.67, "grad_norm": 2.322999908109515, "learning_rate": 2.6331724959493875e-06, "loss": 0.5171, "step": 9403 }, { "epoch": 0.67, "grad_norm": 2.3725694964344233, "learning_rate": 2.632160296235238e-06, "loss": 0.5691, "step": 9404 }, { "epoch": 0.67, "grad_norm": 1.720015607072126, "learning_rate": 2.6311482215984046e-06, "loss": 0.5959, "step": 9405 }, { "epoch": 0.67, "grad_norm": 1.7551855073394211, "learning_rate": 2.630136272092349e-06, "loss": 0.5191, "step": 9406 }, { "epoch": 0.67, "grad_norm": 1.9205755823336972, "learning_rate": 2.6291244477705258e-06, "loss": 0.5108, "step": 9407 }, { "epoch": 0.67, "grad_norm": 1.6106247792856978, "learning_rate": 2.6281127486863846e-06, "loss": 0.5056, "step": 9408 }, { "epoch": 0.67, "grad_norm": 1.6019931154317864, "learning_rate": 2.6271011748933627e-06, "loss": 0.5184, "step": 9409 }, { "epoch": 0.67, "grad_norm": 2.52329924799111, "learning_rate": 2.626089726444898e-06, "loss": 0.5931, "step": 9410 }, { "epoch": 0.67, "grad_norm": 1.8922648967243674, "learning_rate": 2.6250784033944177e-06, "loss": 0.5644, "step": 9411 }, { "epoch": 0.67, "grad_norm": 2.766223840924474, "learning_rate": 2.6240672057953452e-06, "loss": 0.5199, "step": 9412 }, { "epoch": 0.67, "grad_norm": 1.6536856463364034, "learning_rate": 2.6230561337010916e-06, "loss": 0.4583, "step": 9413 }, { "epoch": 0.67, "grad_norm": 2.581675097498057, "learning_rate": 2.6220451871650674e-06, "loss": 0.5486, "step": 9414 }, { "epoch": 0.67, "grad_norm": 2.7255320494273776, "learning_rate": 2.6210343662406746e-06, "loss": 0.4982, "step": 9415 }, { "epoch": 0.67, "grad_norm": 1.5450549838378127, "learning_rate": 2.6200236709813063e-06, "loss": 0.47, "step": 9416 }, { "epoch": 0.67, "grad_norm": 1.5719209700293362, "learning_rate": 2.6190131014403553e-06, "loss": 0.4797, "step": 9417 }, { "epoch": 0.67, "grad_norm": 1.8393266579014138, "learning_rate": 2.618002657671196e-06, "loss": 0.5697, "step": 9418 }, { "epoch": 0.67, "grad_norm": 4.137594834059182, "learning_rate": 2.616992339727211e-06, "loss": 0.547, "step": 9419 }, { "epoch": 0.67, "grad_norm": 2.280693140997432, "learning_rate": 2.6159821476617637e-06, "loss": 0.4904, "step": 9420 }, { "epoch": 0.67, "grad_norm": 1.8123526476827017, "learning_rate": 2.6149720815282176e-06, "loss": 0.595, "step": 9421 }, { "epoch": 0.67, "grad_norm": 1.8847424908189572, "learning_rate": 2.613962141379928e-06, "loss": 0.5233, "step": 9422 }, { "epoch": 0.67, "grad_norm": 0.7367662600057733, "learning_rate": 2.6129523272702422e-06, "loss": 0.4309, "step": 9423 }, { "epoch": 0.67, "grad_norm": 1.680444664528618, "learning_rate": 2.6119426392525053e-06, "loss": 0.5823, "step": 9424 }, { "epoch": 0.67, "grad_norm": 1.6029475928022763, "learning_rate": 2.6109330773800466e-06, "loss": 0.5891, "step": 9425 }, { "epoch": 0.67, "grad_norm": 2.081789554583414, "learning_rate": 2.6099236417062013e-06, "loss": 0.5082, "step": 9426 }, { "epoch": 0.67, "grad_norm": 4.907833550623655, "learning_rate": 2.6089143322842863e-06, "loss": 0.579, "step": 9427 }, { "epoch": 0.67, "grad_norm": 1.6286104842076286, "learning_rate": 2.607905149167619e-06, "loss": 0.5293, "step": 9428 }, { "epoch": 0.67, "grad_norm": 1.7738339715338716, "learning_rate": 2.606896092409506e-06, "loss": 0.5268, "step": 9429 }, { "epoch": 0.67, "grad_norm": 1.9732824688826989, "learning_rate": 2.605887162063251e-06, "loss": 0.5628, "step": 9430 }, { "epoch": 0.67, "grad_norm": 1.8938000553833283, "learning_rate": 2.60487835818215e-06, "loss": 0.4734, "step": 9431 }, { "epoch": 0.67, "grad_norm": 1.7642685339128816, "learning_rate": 2.6038696808194886e-06, "loss": 0.5328, "step": 9432 }, { "epoch": 0.67, "grad_norm": 2.020182967608077, "learning_rate": 2.6028611300285496e-06, "loss": 0.5759, "step": 9433 }, { "epoch": 0.67, "grad_norm": 3.5154605846524563, "learning_rate": 2.601852705862608e-06, "loss": 0.5749, "step": 9434 }, { "epoch": 0.67, "grad_norm": 1.998371151999566, "learning_rate": 2.600844408374933e-06, "loss": 0.5418, "step": 9435 }, { "epoch": 0.67, "grad_norm": 1.8043891956131017, "learning_rate": 2.5998362376187852e-06, "loss": 0.4581, "step": 9436 }, { "epoch": 0.67, "grad_norm": 1.845530234977172, "learning_rate": 2.598828193647421e-06, "loss": 0.5612, "step": 9437 }, { "epoch": 0.67, "grad_norm": 1.6788507557295131, "learning_rate": 2.597820276514089e-06, "loss": 0.4858, "step": 9438 }, { "epoch": 0.67, "grad_norm": 1.9792954891265269, "learning_rate": 2.596812486272028e-06, "loss": 0.53, "step": 9439 }, { "epoch": 0.67, "grad_norm": 1.7373057541308596, "learning_rate": 2.5958048229744747e-06, "loss": 0.5404, "step": 9440 }, { "epoch": 0.67, "grad_norm": 2.339298134494633, "learning_rate": 2.5947972866746573e-06, "loss": 0.5593, "step": 9441 }, { "epoch": 0.67, "grad_norm": 1.6172184720769516, "learning_rate": 2.5937898774257993e-06, "loss": 0.5508, "step": 9442 }, { "epoch": 0.67, "grad_norm": 1.6498094604726037, "learning_rate": 2.592782595281111e-06, "loss": 0.5134, "step": 9443 }, { "epoch": 0.67, "grad_norm": 1.7213098754311706, "learning_rate": 2.591775440293801e-06, "loss": 0.5075, "step": 9444 }, { "epoch": 0.67, "grad_norm": 1.6207791465240433, "learning_rate": 2.5907684125170764e-06, "loss": 0.5044, "step": 9445 }, { "epoch": 0.67, "grad_norm": 0.6736754203347862, "learning_rate": 2.5897615120041254e-06, "loss": 0.4163, "step": 9446 }, { "epoch": 0.67, "grad_norm": 1.624793886944733, "learning_rate": 2.58875473880814e-06, "loss": 0.5421, "step": 9447 }, { "epoch": 0.67, "grad_norm": 0.7945752530063672, "learning_rate": 2.5877480929822975e-06, "loss": 0.4583, "step": 9448 }, { "epoch": 0.67, "grad_norm": 1.9211719659216349, "learning_rate": 2.586741574579777e-06, "loss": 0.4714, "step": 9449 }, { "epoch": 0.67, "grad_norm": 1.7740340453493817, "learning_rate": 2.585735183653742e-06, "loss": 0.5802, "step": 9450 }, { "epoch": 0.67, "grad_norm": 1.6438096084255551, "learning_rate": 2.5847289202573556e-06, "loss": 0.4885, "step": 9451 }, { "epoch": 0.67, "grad_norm": 2.3279498691081817, "learning_rate": 2.583722784443772e-06, "loss": 0.568, "step": 9452 }, { "epoch": 0.67, "grad_norm": 0.6967730817955221, "learning_rate": 2.5827167762661375e-06, "loss": 0.4427, "step": 9453 }, { "epoch": 0.67, "grad_norm": 1.6935689450566607, "learning_rate": 2.581710895777596e-06, "loss": 0.5223, "step": 9454 }, { "epoch": 0.67, "grad_norm": 1.7229871431083046, "learning_rate": 2.5807051430312757e-06, "loss": 0.5635, "step": 9455 }, { "epoch": 0.67, "grad_norm": 0.7324760060440557, "learning_rate": 2.579699518080311e-06, "loss": 0.427, "step": 9456 }, { "epoch": 0.67, "grad_norm": 0.7059640199939262, "learning_rate": 2.578694020977817e-06, "loss": 0.445, "step": 9457 }, { "epoch": 0.67, "grad_norm": 2.4020426232563015, "learning_rate": 2.5776886517769096e-06, "loss": 0.6046, "step": 9458 }, { "epoch": 0.67, "grad_norm": 1.9229153322093986, "learning_rate": 2.576683410530695e-06, "loss": 0.4881, "step": 9459 }, { "epoch": 0.67, "grad_norm": 1.8051396010963872, "learning_rate": 2.5756782972922743e-06, "loss": 0.5431, "step": 9460 }, { "epoch": 0.67, "grad_norm": 1.7327483046434844, "learning_rate": 2.5746733121147427e-06, "loss": 0.5619, "step": 9461 }, { "epoch": 0.67, "grad_norm": 1.52122955396837, "learning_rate": 2.5736684550511824e-06, "loss": 0.4914, "step": 9462 }, { "epoch": 0.67, "grad_norm": 1.4510721707049927, "learning_rate": 2.572663726154676e-06, "loss": 0.4537, "step": 9463 }, { "epoch": 0.67, "grad_norm": 1.483829631160677, "learning_rate": 2.5716591254782976e-06, "loss": 0.53, "step": 9464 }, { "epoch": 0.67, "grad_norm": 1.4531050308737308, "learning_rate": 2.570654653075111e-06, "loss": 0.5145, "step": 9465 }, { "epoch": 0.67, "grad_norm": 1.6527268485830084, "learning_rate": 2.56965030899818e-06, "loss": 0.4879, "step": 9466 }, { "epoch": 0.67, "grad_norm": 1.6253567652311904, "learning_rate": 2.5686460933005507e-06, "loss": 0.5276, "step": 9467 }, { "epoch": 0.67, "grad_norm": 0.7727776089497317, "learning_rate": 2.567642006035278e-06, "loss": 0.4687, "step": 9468 }, { "epoch": 0.67, "grad_norm": 1.7084362372291313, "learning_rate": 2.5666380472553937e-06, "loss": 0.603, "step": 9469 }, { "epoch": 0.67, "grad_norm": 1.8033910548066474, "learning_rate": 2.5656342170139337e-06, "loss": 0.5501, "step": 9470 }, { "epoch": 0.67, "grad_norm": 2.088318484360144, "learning_rate": 2.564630515363923e-06, "loss": 0.553, "step": 9471 }, { "epoch": 0.67, "grad_norm": 1.4394327090730423, "learning_rate": 2.5636269423583797e-06, "loss": 0.498, "step": 9472 }, { "epoch": 0.67, "grad_norm": 1.8393807634961048, "learning_rate": 2.5626234980503194e-06, "loss": 0.545, "step": 9473 }, { "epoch": 0.67, "grad_norm": 2.0113717395339137, "learning_rate": 2.561620182492741e-06, "loss": 0.5932, "step": 9474 }, { "epoch": 0.67, "grad_norm": 1.9480433883782802, "learning_rate": 2.5606169957386517e-06, "loss": 0.5094, "step": 9475 }, { "epoch": 0.67, "grad_norm": 1.7760046490703643, "learning_rate": 2.5596139378410356e-06, "loss": 0.5514, "step": 9476 }, { "epoch": 0.67, "grad_norm": 1.6384559507421317, "learning_rate": 2.5586110088528826e-06, "loss": 0.5594, "step": 9477 }, { "epoch": 0.67, "grad_norm": 1.8881217252930413, "learning_rate": 2.557608208827165e-06, "loss": 0.5698, "step": 9478 }, { "epoch": 0.67, "grad_norm": 3.0398247252978576, "learning_rate": 2.5566055378168607e-06, "loss": 0.5178, "step": 9479 }, { "epoch": 0.67, "grad_norm": 1.7891940371001271, "learning_rate": 2.55560299587493e-06, "loss": 0.5425, "step": 9480 }, { "epoch": 0.67, "grad_norm": 2.0900382668957946, "learning_rate": 2.554600583054332e-06, "loss": 0.5507, "step": 9481 }, { "epoch": 0.67, "grad_norm": 1.9520235527169394, "learning_rate": 2.5535982994080173e-06, "loss": 0.4997, "step": 9482 }, { "epoch": 0.67, "grad_norm": 1.550274041685687, "learning_rate": 2.552596144988929e-06, "loss": 0.5131, "step": 9483 }, { "epoch": 0.67, "grad_norm": 1.6737042034419627, "learning_rate": 2.5515941198500084e-06, "loss": 0.4993, "step": 9484 }, { "epoch": 0.67, "grad_norm": 1.680866337687511, "learning_rate": 2.5505922240441782e-06, "loss": 0.5265, "step": 9485 }, { "epoch": 0.67, "grad_norm": 1.5727894325858913, "learning_rate": 2.5495904576243714e-06, "loss": 0.5632, "step": 9486 }, { "epoch": 0.67, "grad_norm": 1.6767497017004442, "learning_rate": 2.548588820643496e-06, "loss": 0.5377, "step": 9487 }, { "epoch": 0.67, "grad_norm": 1.6200520076198297, "learning_rate": 2.547587313154467e-06, "loss": 0.5873, "step": 9488 }, { "epoch": 0.67, "grad_norm": 1.8266796533073957, "learning_rate": 2.5465859352101855e-06, "loss": 0.5338, "step": 9489 }, { "epoch": 0.67, "grad_norm": 1.8158208529541568, "learning_rate": 2.545584686863548e-06, "loss": 0.5787, "step": 9490 }, { "epoch": 0.67, "grad_norm": 1.6145414603972545, "learning_rate": 2.544583568167447e-06, "loss": 0.4998, "step": 9491 }, { "epoch": 0.67, "grad_norm": 1.687926448148595, "learning_rate": 2.543582579174759e-06, "loss": 0.47, "step": 9492 }, { "epoch": 0.67, "grad_norm": 1.6457384371363428, "learning_rate": 2.542581719938363e-06, "loss": 0.5549, "step": 9493 }, { "epoch": 0.67, "grad_norm": 0.7023248187836227, "learning_rate": 2.541580990511128e-06, "loss": 0.4519, "step": 9494 }, { "epoch": 0.67, "grad_norm": 1.610444745996479, "learning_rate": 2.5405803909459144e-06, "loss": 0.5982, "step": 9495 }, { "epoch": 0.67, "grad_norm": 0.674970176760348, "learning_rate": 2.5395799212955807e-06, "loss": 0.4403, "step": 9496 }, { "epoch": 0.67, "grad_norm": 0.7358487555138075, "learning_rate": 2.538579581612968e-06, "loss": 0.401, "step": 9497 }, { "epoch": 0.67, "grad_norm": 1.9786864200332772, "learning_rate": 2.537579371950927e-06, "loss": 0.4738, "step": 9498 }, { "epoch": 0.67, "grad_norm": 1.8772472975570638, "learning_rate": 2.536579292362285e-06, "loss": 0.5423, "step": 9499 }, { "epoch": 0.67, "grad_norm": 1.6514954900327883, "learning_rate": 2.535579342899872e-06, "loss": 0.5458, "step": 9500 }, { "epoch": 0.67, "grad_norm": 1.747457513047944, "learning_rate": 2.534579523616509e-06, "loss": 0.5434, "step": 9501 }, { "epoch": 0.67, "grad_norm": 1.9813805648455791, "learning_rate": 2.533579834565009e-06, "loss": 0.5011, "step": 9502 }, { "epoch": 0.67, "grad_norm": 1.783658793666206, "learning_rate": 2.5325802757981823e-06, "loss": 0.5515, "step": 9503 }, { "epoch": 0.67, "grad_norm": 1.8777267092642822, "learning_rate": 2.531580847368822e-06, "loss": 0.5845, "step": 9504 }, { "epoch": 0.67, "grad_norm": 2.017657819652052, "learning_rate": 2.5305815493297286e-06, "loss": 0.5056, "step": 9505 }, { "epoch": 0.67, "grad_norm": 0.7296420022061271, "learning_rate": 2.529582381733684e-06, "loss": 0.4256, "step": 9506 }, { "epoch": 0.67, "grad_norm": 1.6619169096277253, "learning_rate": 2.5285833446334705e-06, "loss": 0.5277, "step": 9507 }, { "epoch": 0.67, "grad_norm": 1.6814349946899256, "learning_rate": 2.527584438081856e-06, "loss": 0.4953, "step": 9508 }, { "epoch": 0.67, "grad_norm": 1.592925707335148, "learning_rate": 2.5265856621316102e-06, "loss": 0.5021, "step": 9509 }, { "epoch": 0.67, "grad_norm": 0.753231769233868, "learning_rate": 2.5255870168354933e-06, "loss": 0.4419, "step": 9510 }, { "epoch": 0.67, "grad_norm": 2.053864038502355, "learning_rate": 2.5245885022462523e-06, "loss": 0.5488, "step": 9511 }, { "epoch": 0.67, "grad_norm": 1.659879164129113, "learning_rate": 2.5235901184166346e-06, "loss": 0.5053, "step": 9512 }, { "epoch": 0.68, "grad_norm": 1.9932880817028733, "learning_rate": 2.522591865399378e-06, "loss": 0.5655, "step": 9513 }, { "epoch": 0.68, "grad_norm": 2.023091592823052, "learning_rate": 2.521593743247216e-06, "loss": 0.5084, "step": 9514 }, { "epoch": 0.68, "grad_norm": 1.7180139281185067, "learning_rate": 2.520595752012866e-06, "loss": 0.554, "step": 9515 }, { "epoch": 0.68, "grad_norm": 0.7609612794144697, "learning_rate": 2.5195978917490537e-06, "loss": 0.4346, "step": 9516 }, { "epoch": 0.68, "grad_norm": 1.641154249846229, "learning_rate": 2.5186001625084843e-06, "loss": 0.5238, "step": 9517 }, { "epoch": 0.68, "grad_norm": 1.375778202185351, "learning_rate": 2.5176025643438617e-06, "loss": 0.4477, "step": 9518 }, { "epoch": 0.68, "grad_norm": 0.7336396538417131, "learning_rate": 2.5166050973078837e-06, "loss": 0.4299, "step": 9519 }, { "epoch": 0.68, "grad_norm": 3.287552738310857, "learning_rate": 2.5156077614532386e-06, "loss": 0.5369, "step": 9520 }, { "epoch": 0.68, "grad_norm": 0.7713769540425621, "learning_rate": 2.5146105568326124e-06, "loss": 0.42, "step": 9521 }, { "epoch": 0.68, "grad_norm": 1.7049407892302118, "learning_rate": 2.5136134834986757e-06, "loss": 0.517, "step": 9522 }, { "epoch": 0.68, "grad_norm": 1.9442216339101994, "learning_rate": 2.5126165415041003e-06, "loss": 0.4659, "step": 9523 }, { "epoch": 0.68, "grad_norm": 3.2863669705743144, "learning_rate": 2.5116197309015476e-06, "loss": 0.5223, "step": 9524 }, { "epoch": 0.68, "grad_norm": 1.9373991814528804, "learning_rate": 2.5106230517436725e-06, "loss": 0.5539, "step": 9525 }, { "epoch": 0.68, "grad_norm": 1.9943180010617376, "learning_rate": 2.5096265040831245e-06, "loss": 0.488, "step": 9526 }, { "epoch": 0.68, "grad_norm": 1.7709808569931884, "learning_rate": 2.50863008797254e-06, "loss": 0.5166, "step": 9527 }, { "epoch": 0.68, "grad_norm": 1.928882090532752, "learning_rate": 2.5076338034645597e-06, "loss": 0.5381, "step": 9528 }, { "epoch": 0.68, "grad_norm": 1.6230732440384907, "learning_rate": 2.506637650611805e-06, "loss": 0.5475, "step": 9529 }, { "epoch": 0.68, "grad_norm": 1.6709327335313024, "learning_rate": 2.5056416294668984e-06, "loss": 0.5254, "step": 9530 }, { "epoch": 0.68, "grad_norm": 1.6257774698593597, "learning_rate": 2.504645740082453e-06, "loss": 0.5147, "step": 9531 }, { "epoch": 0.68, "grad_norm": 0.8388249876566426, "learning_rate": 2.503649982511075e-06, "loss": 0.4499, "step": 9532 }, { "epoch": 0.68, "grad_norm": 1.80968184389095, "learning_rate": 2.502654356805365e-06, "loss": 0.4526, "step": 9533 }, { "epoch": 0.68, "grad_norm": 1.7729032330510688, "learning_rate": 2.501658863017911e-06, "loss": 0.5156, "step": 9534 }, { "epoch": 0.68, "grad_norm": 0.7352275364190343, "learning_rate": 2.5006635012013047e-06, "loss": 0.4162, "step": 9535 }, { "epoch": 0.68, "grad_norm": 1.9480841117748162, "learning_rate": 2.49966827140812e-06, "loss": 0.5479, "step": 9536 }, { "epoch": 0.68, "grad_norm": 1.8445906108264996, "learning_rate": 2.4986731736909293e-06, "loss": 0.5168, "step": 9537 }, { "epoch": 0.68, "grad_norm": 1.9230596095204575, "learning_rate": 2.4976782081022966e-06, "loss": 0.5416, "step": 9538 }, { "epoch": 0.68, "grad_norm": 1.4914599300101168, "learning_rate": 2.4966833746947807e-06, "loss": 0.5196, "step": 9539 }, { "epoch": 0.68, "grad_norm": 1.7613411500829173, "learning_rate": 2.495688673520933e-06, "loss": 0.5552, "step": 9540 }, { "epoch": 0.68, "grad_norm": 1.8615093903965423, "learning_rate": 2.494694104633293e-06, "loss": 0.5429, "step": 9541 }, { "epoch": 0.68, "grad_norm": 1.8241569533587358, "learning_rate": 2.4936996680844e-06, "loss": 0.5459, "step": 9542 }, { "epoch": 0.68, "grad_norm": 1.558975224550322, "learning_rate": 2.4927053639267827e-06, "loss": 0.516, "step": 9543 }, { "epoch": 0.68, "grad_norm": 2.4980271780878778, "learning_rate": 2.4917111922129663e-06, "loss": 0.4988, "step": 9544 }, { "epoch": 0.68, "grad_norm": 1.6932431685756684, "learning_rate": 2.4907171529954616e-06, "loss": 0.4999, "step": 9545 }, { "epoch": 0.68, "grad_norm": 1.5745391868989644, "learning_rate": 2.4897232463267797e-06, "loss": 0.4743, "step": 9546 }, { "epoch": 0.68, "grad_norm": 1.9878366173094526, "learning_rate": 2.488729472259422e-06, "loss": 0.5301, "step": 9547 }, { "epoch": 0.68, "grad_norm": 1.8631650287995503, "learning_rate": 2.4877358308458828e-06, "loss": 0.5347, "step": 9548 }, { "epoch": 0.68, "grad_norm": 1.852351578240347, "learning_rate": 2.48674232213865e-06, "loss": 0.5053, "step": 9549 }, { "epoch": 0.68, "grad_norm": 1.6330634572438074, "learning_rate": 2.4857489461902034e-06, "loss": 0.5432, "step": 9550 }, { "epoch": 0.68, "grad_norm": 1.7360097987857852, "learning_rate": 2.484755703053019e-06, "loss": 0.436, "step": 9551 }, { "epoch": 0.68, "grad_norm": 1.6325831783489244, "learning_rate": 2.4837625927795592e-06, "loss": 0.503, "step": 9552 }, { "epoch": 0.68, "grad_norm": 1.9333221507410914, "learning_rate": 2.482769615422286e-06, "loss": 0.5557, "step": 9553 }, { "epoch": 0.68, "grad_norm": 1.9565121364111384, "learning_rate": 2.4817767710336503e-06, "loss": 0.5189, "step": 9554 }, { "epoch": 0.68, "grad_norm": 2.6553194290600133, "learning_rate": 2.4807840596660986e-06, "loss": 0.5087, "step": 9555 }, { "epoch": 0.68, "grad_norm": 1.952742800569021, "learning_rate": 2.479791481372072e-06, "loss": 0.5122, "step": 9556 }, { "epoch": 0.68, "grad_norm": 1.8264825487052163, "learning_rate": 2.478799036203994e-06, "loss": 0.5805, "step": 9557 }, { "epoch": 0.68, "grad_norm": 1.6943459428099492, "learning_rate": 2.4778067242142985e-06, "loss": 0.5011, "step": 9558 }, { "epoch": 0.68, "grad_norm": 1.853499828518338, "learning_rate": 2.476814545455396e-06, "loss": 0.5415, "step": 9559 }, { "epoch": 0.68, "grad_norm": 1.551611367214949, "learning_rate": 2.4758224999796987e-06, "loss": 0.4644, "step": 9560 }, { "epoch": 0.68, "grad_norm": 1.852126994506452, "learning_rate": 2.4748305878396105e-06, "loss": 0.5151, "step": 9561 }, { "epoch": 0.68, "grad_norm": 0.8082480467119703, "learning_rate": 2.473838809087527e-06, "loss": 0.4092, "step": 9562 }, { "epoch": 0.68, "grad_norm": 1.6972044468724394, "learning_rate": 2.472847163775839e-06, "loss": 0.5004, "step": 9563 }, { "epoch": 0.68, "grad_norm": 1.8414117624802269, "learning_rate": 2.4718556519569238e-06, "loss": 0.5388, "step": 9564 }, { "epoch": 0.68, "grad_norm": 1.7411187096555942, "learning_rate": 2.4708642736831624e-06, "loss": 0.4674, "step": 9565 }, { "epoch": 0.68, "grad_norm": 1.6947042490477509, "learning_rate": 2.469873029006919e-06, "loss": 0.5267, "step": 9566 }, { "epoch": 0.68, "grad_norm": 1.8947207283291747, "learning_rate": 2.4688819179805547e-06, "loss": 0.496, "step": 9567 }, { "epoch": 0.68, "grad_norm": 1.680949200879843, "learning_rate": 2.4678909406564246e-06, "loss": 0.4976, "step": 9568 }, { "epoch": 0.68, "grad_norm": 1.766009164129411, "learning_rate": 2.4669000970868745e-06, "loss": 0.5155, "step": 9569 }, { "epoch": 0.68, "grad_norm": 10.445280831561234, "learning_rate": 2.4659093873242466e-06, "loss": 0.5396, "step": 9570 }, { "epoch": 0.68, "grad_norm": 3.2145917133301234, "learning_rate": 2.46491881142087e-06, "loss": 0.5587, "step": 9571 }, { "epoch": 0.68, "grad_norm": 1.6820701632272255, "learning_rate": 2.4639283694290714e-06, "loss": 0.5074, "step": 9572 }, { "epoch": 0.68, "grad_norm": 1.8844335329400594, "learning_rate": 2.4629380614011705e-06, "loss": 0.5811, "step": 9573 }, { "epoch": 0.68, "grad_norm": 1.9398746387485344, "learning_rate": 2.461947887389479e-06, "loss": 0.5666, "step": 9574 }, { "epoch": 0.68, "grad_norm": 1.9609301187943184, "learning_rate": 2.4609578474462986e-06, "loss": 0.5499, "step": 9575 }, { "epoch": 0.68, "grad_norm": 1.7183875863020177, "learning_rate": 2.4599679416239264e-06, "loss": 0.5626, "step": 9576 }, { "epoch": 0.68, "grad_norm": 2.9360086989093914, "learning_rate": 2.458978169974658e-06, "loss": 0.4993, "step": 9577 }, { "epoch": 0.68, "grad_norm": 1.711903623150239, "learning_rate": 2.457988532550771e-06, "loss": 0.4473, "step": 9578 }, { "epoch": 0.68, "grad_norm": 1.7350133731129185, "learning_rate": 2.4569990294045426e-06, "loss": 0.5146, "step": 9579 }, { "epoch": 0.68, "grad_norm": 1.8130451758861044, "learning_rate": 2.456009660588243e-06, "loss": 0.5549, "step": 9580 }, { "epoch": 0.68, "grad_norm": 1.8784648522692742, "learning_rate": 2.455020426154135e-06, "loss": 0.5001, "step": 9581 }, { "epoch": 0.68, "grad_norm": 1.7029316562199608, "learning_rate": 2.4540313261544696e-06, "loss": 0.4918, "step": 9582 }, { "epoch": 0.68, "grad_norm": 2.2115179537516596, "learning_rate": 2.4530423606414966e-06, "loss": 0.5586, "step": 9583 }, { "epoch": 0.68, "grad_norm": 2.298389140851446, "learning_rate": 2.4520535296674565e-06, "loss": 0.5391, "step": 9584 }, { "epoch": 0.68, "grad_norm": 0.6715579160679879, "learning_rate": 2.4510648332845827e-06, "loss": 0.3968, "step": 9585 }, { "epoch": 0.68, "grad_norm": 1.6706786945390941, "learning_rate": 2.450076271545103e-06, "loss": 0.5066, "step": 9586 }, { "epoch": 0.68, "grad_norm": 1.631643744810339, "learning_rate": 2.4490878445012313e-06, "loss": 0.4488, "step": 9587 }, { "epoch": 0.68, "grad_norm": 1.8576095885645028, "learning_rate": 2.448099552205187e-06, "loss": 0.5679, "step": 9588 }, { "epoch": 0.68, "grad_norm": 2.0684579814895114, "learning_rate": 2.4471113947091697e-06, "loss": 0.5251, "step": 9589 }, { "epoch": 0.68, "grad_norm": 1.9953171470408428, "learning_rate": 2.446123372065379e-06, "loss": 0.5786, "step": 9590 }, { "epoch": 0.68, "grad_norm": 0.6636786446021233, "learning_rate": 2.445135484326005e-06, "loss": 0.4484, "step": 9591 }, { "epoch": 0.68, "grad_norm": 1.7907418374842137, "learning_rate": 2.4441477315432324e-06, "loss": 0.4782, "step": 9592 }, { "epoch": 0.68, "grad_norm": 1.9537334149203662, "learning_rate": 2.4431601137692383e-06, "loss": 0.5533, "step": 9593 }, { "epoch": 0.68, "grad_norm": 1.7966067205624559, "learning_rate": 2.442172631056189e-06, "loss": 0.5041, "step": 9594 }, { "epoch": 0.68, "grad_norm": 1.6090092832165857, "learning_rate": 2.441185283456249e-06, "loss": 0.4934, "step": 9595 }, { "epoch": 0.68, "grad_norm": 2.4183084889583575, "learning_rate": 2.4401980710215716e-06, "loss": 0.5696, "step": 9596 }, { "epoch": 0.68, "grad_norm": 1.8852750049293814, "learning_rate": 2.4392109938043067e-06, "loss": 0.5186, "step": 9597 }, { "epoch": 0.68, "grad_norm": 0.7135088439101847, "learning_rate": 2.438224051856594e-06, "loss": 0.4103, "step": 9598 }, { "epoch": 0.68, "grad_norm": 1.683944205881511, "learning_rate": 2.4372372452305683e-06, "loss": 0.4589, "step": 9599 }, { "epoch": 0.68, "grad_norm": 1.8582910428671553, "learning_rate": 2.436250573978356e-06, "loss": 0.6219, "step": 9600 }, { "epoch": 0.68, "grad_norm": 2.1222296661880096, "learning_rate": 2.4352640381520737e-06, "loss": 0.5995, "step": 9601 }, { "epoch": 0.68, "grad_norm": 2.3580470030994203, "learning_rate": 2.4342776378038358e-06, "loss": 0.5823, "step": 9602 }, { "epoch": 0.68, "grad_norm": 2.0128325241315514, "learning_rate": 2.4332913729857466e-06, "loss": 0.5588, "step": 9603 }, { "epoch": 0.68, "grad_norm": 2.3822140829201848, "learning_rate": 2.4323052437499046e-06, "loss": 0.4959, "step": 9604 }, { "epoch": 0.68, "grad_norm": 1.7112563955961178, "learning_rate": 2.431319250148402e-06, "loss": 0.5412, "step": 9605 }, { "epoch": 0.68, "grad_norm": 1.664078167254722, "learning_rate": 2.430333392233316e-06, "loss": 0.5848, "step": 9606 }, { "epoch": 0.68, "grad_norm": 3.668304497860216, "learning_rate": 2.429347670056732e-06, "loss": 0.4826, "step": 9607 }, { "epoch": 0.68, "grad_norm": 1.7745119659289073, "learning_rate": 2.428362083670713e-06, "loss": 0.5834, "step": 9608 }, { "epoch": 0.68, "grad_norm": 1.719230827065138, "learning_rate": 2.4273766331273235e-06, "loss": 0.5154, "step": 9609 }, { "epoch": 0.68, "grad_norm": 8.039030953657164, "learning_rate": 2.426391318478614e-06, "loss": 0.5418, "step": 9610 }, { "epoch": 0.68, "grad_norm": 1.7871671378822676, "learning_rate": 2.4254061397766403e-06, "loss": 0.5371, "step": 9611 }, { "epoch": 0.68, "grad_norm": 4.156800602127164, "learning_rate": 2.4244210970734354e-06, "loss": 0.488, "step": 9612 }, { "epoch": 0.68, "grad_norm": 1.9159866945345476, "learning_rate": 2.4234361904210357e-06, "loss": 0.5179, "step": 9613 }, { "epoch": 0.68, "grad_norm": 1.9667429349145797, "learning_rate": 2.422451419871467e-06, "loss": 0.5536, "step": 9614 }, { "epoch": 0.68, "grad_norm": 2.008466148218622, "learning_rate": 2.4214667854767484e-06, "loss": 0.4948, "step": 9615 }, { "epoch": 0.68, "grad_norm": 2.5618663358603255, "learning_rate": 2.4204822872888935e-06, "loss": 0.5105, "step": 9616 }, { "epoch": 0.68, "grad_norm": 2.145010496636656, "learning_rate": 2.4194979253599004e-06, "loss": 0.5428, "step": 9617 }, { "epoch": 0.68, "grad_norm": 1.9775325740704937, "learning_rate": 2.4185136997417748e-06, "loss": 0.5038, "step": 9618 }, { "epoch": 0.68, "grad_norm": 2.08691380280909, "learning_rate": 2.4175296104865008e-06, "loss": 0.5288, "step": 9619 }, { "epoch": 0.68, "grad_norm": 1.7767166240308947, "learning_rate": 2.4165456576460638e-06, "loss": 0.5089, "step": 9620 }, { "epoch": 0.68, "grad_norm": 1.6896755097495912, "learning_rate": 2.415561841272439e-06, "loss": 0.5599, "step": 9621 }, { "epoch": 0.68, "grad_norm": 3.09077774601849, "learning_rate": 2.414578161417595e-06, "loss": 0.5608, "step": 9622 }, { "epoch": 0.68, "grad_norm": 2.0115685681436695, "learning_rate": 2.413594618133495e-06, "loss": 0.5644, "step": 9623 }, { "epoch": 0.68, "grad_norm": 1.6285159494116115, "learning_rate": 2.412611211472089e-06, "loss": 0.447, "step": 9624 }, { "epoch": 0.68, "grad_norm": 1.498081385007708, "learning_rate": 2.4116279414853265e-06, "loss": 0.5396, "step": 9625 }, { "epoch": 0.68, "grad_norm": 1.7421109857184551, "learning_rate": 2.4106448082251466e-06, "loss": 0.5695, "step": 9626 }, { "epoch": 0.68, "grad_norm": 1.8268009152160873, "learning_rate": 2.4096618117434815e-06, "loss": 0.5122, "step": 9627 }, { "epoch": 0.68, "grad_norm": 1.3805049180469506, "learning_rate": 2.4086789520922578e-06, "loss": 0.4929, "step": 9628 }, { "epoch": 0.68, "grad_norm": 1.8648818199509813, "learning_rate": 2.4076962293233913e-06, "loss": 0.5435, "step": 9629 }, { "epoch": 0.68, "grad_norm": 1.6630724784711406, "learning_rate": 2.4067136434887967e-06, "loss": 0.5596, "step": 9630 }, { "epoch": 0.68, "grad_norm": 1.8679785844554826, "learning_rate": 2.405731194640372e-06, "loss": 0.4329, "step": 9631 }, { "epoch": 0.68, "grad_norm": 1.6490938436951075, "learning_rate": 2.4047488828300166e-06, "loss": 0.4649, "step": 9632 }, { "epoch": 0.68, "grad_norm": 1.8972396161508762, "learning_rate": 2.403766708109619e-06, "loss": 0.5438, "step": 9633 }, { "epoch": 0.68, "grad_norm": 2.0082860902695945, "learning_rate": 2.4027846705310615e-06, "loss": 0.539, "step": 9634 }, { "epoch": 0.68, "grad_norm": 2.1723936954406247, "learning_rate": 2.401802770146219e-06, "loss": 0.6375, "step": 9635 }, { "epoch": 0.68, "grad_norm": 1.553225181520766, "learning_rate": 2.4008210070069554e-06, "loss": 0.481, "step": 9636 }, { "epoch": 0.68, "grad_norm": 1.891443991007578, "learning_rate": 2.399839381165137e-06, "loss": 0.5151, "step": 9637 }, { "epoch": 0.68, "grad_norm": 1.88999372008184, "learning_rate": 2.3988578926726112e-06, "loss": 0.5776, "step": 9638 }, { "epoch": 0.68, "grad_norm": 1.7107626454275147, "learning_rate": 2.397876541581227e-06, "loss": 0.4857, "step": 9639 }, { "epoch": 0.68, "grad_norm": 2.066592409323531, "learning_rate": 2.3968953279428175e-06, "loss": 0.5646, "step": 9640 }, { "epoch": 0.68, "grad_norm": 1.456923225283102, "learning_rate": 2.395914251809221e-06, "loss": 0.5291, "step": 9641 }, { "epoch": 0.68, "grad_norm": 1.6434393381699206, "learning_rate": 2.394933313232256e-06, "loss": 0.5262, "step": 9642 }, { "epoch": 0.68, "grad_norm": 0.7547059262723421, "learning_rate": 2.393952512263738e-06, "loss": 0.4354, "step": 9643 }, { "epoch": 0.68, "grad_norm": 1.6322853463247602, "learning_rate": 2.392971848955483e-06, "loss": 0.5006, "step": 9644 }, { "epoch": 0.68, "grad_norm": 1.5049163449136036, "learning_rate": 2.391991323359287e-06, "loss": 0.5343, "step": 9645 }, { "epoch": 0.68, "grad_norm": 1.6780925953778096, "learning_rate": 2.3910109355269485e-06, "loss": 0.5331, "step": 9646 }, { "epoch": 0.68, "grad_norm": 1.8391275800045892, "learning_rate": 2.3900306855102482e-06, "loss": 0.5945, "step": 9647 }, { "epoch": 0.68, "grad_norm": 1.9052184544425523, "learning_rate": 2.3890505733609752e-06, "loss": 0.5728, "step": 9648 }, { "epoch": 0.68, "grad_norm": 1.891118447636134, "learning_rate": 2.388070599130896e-06, "loss": 0.516, "step": 9649 }, { "epoch": 0.68, "grad_norm": 1.6619637124295878, "learning_rate": 2.3870907628717787e-06, "loss": 0.506, "step": 9650 }, { "epoch": 0.68, "grad_norm": 1.714740117786903, "learning_rate": 2.3861110646353804e-06, "loss": 0.4947, "step": 9651 }, { "epoch": 0.68, "grad_norm": 1.8795911532916356, "learning_rate": 2.385131504473453e-06, "loss": 0.5654, "step": 9652 }, { "epoch": 0.68, "grad_norm": 1.7687032138593557, "learning_rate": 2.384152082437742e-06, "loss": 0.4982, "step": 9653 }, { "epoch": 0.69, "grad_norm": 1.7318848519220131, "learning_rate": 2.3831727985799792e-06, "loss": 0.5384, "step": 9654 }, { "epoch": 0.69, "grad_norm": 1.4707849783710212, "learning_rate": 2.382193652951897e-06, "loss": 0.5167, "step": 9655 }, { "epoch": 0.69, "grad_norm": 2.1945608387239024, "learning_rate": 2.3812146456052155e-06, "loss": 0.5822, "step": 9656 }, { "epoch": 0.69, "grad_norm": 1.7122781316756488, "learning_rate": 2.3802357765916507e-06, "loss": 0.4975, "step": 9657 }, { "epoch": 0.69, "grad_norm": 1.6951044331552443, "learning_rate": 2.379257045962911e-06, "loss": 0.523, "step": 9658 }, { "epoch": 0.69, "grad_norm": 1.8054299410815489, "learning_rate": 2.3782784537706905e-06, "loss": 0.5988, "step": 9659 }, { "epoch": 0.69, "grad_norm": 1.696654150313717, "learning_rate": 2.3773000000666895e-06, "loss": 0.5485, "step": 9660 }, { "epoch": 0.69, "grad_norm": 1.8463808570501872, "learning_rate": 2.376321684902587e-06, "loss": 0.549, "step": 9661 }, { "epoch": 0.69, "grad_norm": 0.7268450219389964, "learning_rate": 2.375343508330064e-06, "loss": 0.4001, "step": 9662 }, { "epoch": 0.69, "grad_norm": 1.7267263724348494, "learning_rate": 2.3743654704007906e-06, "loss": 0.5181, "step": 9663 }, { "epoch": 0.69, "grad_norm": 2.0690023829139963, "learning_rate": 2.3733875711664296e-06, "loss": 0.5146, "step": 9664 }, { "epoch": 0.69, "grad_norm": 1.6031077975368517, "learning_rate": 2.372409810678639e-06, "loss": 0.5357, "step": 9665 }, { "epoch": 0.69, "grad_norm": 1.7533683237193083, "learning_rate": 2.3714321889890623e-06, "loss": 0.5013, "step": 9666 }, { "epoch": 0.69, "grad_norm": 1.5910701971635117, "learning_rate": 2.370454706149348e-06, "loss": 0.4991, "step": 9667 }, { "epoch": 0.69, "grad_norm": 1.8899799969970354, "learning_rate": 2.369477362211124e-06, "loss": 0.621, "step": 9668 }, { "epoch": 0.69, "grad_norm": 1.639762493839967, "learning_rate": 2.368500157226021e-06, "loss": 0.5253, "step": 9669 }, { "epoch": 0.69, "grad_norm": 1.547093097682518, "learning_rate": 2.3675230912456525e-06, "loss": 0.4248, "step": 9670 }, { "epoch": 0.69, "grad_norm": 1.9338899450303266, "learning_rate": 2.366546164321637e-06, "loss": 0.5769, "step": 9671 }, { "epoch": 0.69, "grad_norm": 0.766105732873643, "learning_rate": 2.3655693765055776e-06, "loss": 0.4406, "step": 9672 }, { "epoch": 0.69, "grad_norm": 2.2886378304249533, "learning_rate": 2.364592727849069e-06, "loss": 0.4774, "step": 9673 }, { "epoch": 0.69, "grad_norm": 1.905989960256344, "learning_rate": 2.363616218403702e-06, "loss": 0.5309, "step": 9674 }, { "epoch": 0.69, "grad_norm": 1.5939169102625215, "learning_rate": 2.36263984822106e-06, "loss": 0.5068, "step": 9675 }, { "epoch": 0.69, "grad_norm": 1.8241803203480123, "learning_rate": 2.3616636173527196e-06, "loss": 0.5913, "step": 9676 }, { "epoch": 0.69, "grad_norm": 0.7546650492108021, "learning_rate": 2.3606875258502433e-06, "loss": 0.4318, "step": 9677 }, { "epoch": 0.69, "grad_norm": 1.5515550406588925, "learning_rate": 2.359711573765199e-06, "loss": 0.4407, "step": 9678 }, { "epoch": 0.69, "grad_norm": 1.877280232280693, "learning_rate": 2.3587357611491337e-06, "loss": 0.5732, "step": 9679 }, { "epoch": 0.69, "grad_norm": 1.5867770438057536, "learning_rate": 2.357760088053595e-06, "loss": 0.5453, "step": 9680 }, { "epoch": 0.69, "grad_norm": 1.7095753277278445, "learning_rate": 2.3567845545301216e-06, "loss": 0.5209, "step": 9681 }, { "epoch": 0.69, "grad_norm": 1.7294618572672786, "learning_rate": 2.3558091606302448e-06, "loss": 0.4707, "step": 9682 }, { "epoch": 0.69, "grad_norm": 1.9447321110997529, "learning_rate": 2.3548339064054898e-06, "loss": 0.5479, "step": 9683 }, { "epoch": 0.69, "grad_norm": 1.7927290691912816, "learning_rate": 2.353858791907369e-06, "loss": 0.5491, "step": 9684 }, { "epoch": 0.69, "grad_norm": 1.8901077533736064, "learning_rate": 2.352883817187393e-06, "loss": 0.6015, "step": 9685 }, { "epoch": 0.69, "grad_norm": 1.8229733548640452, "learning_rate": 2.351908982297064e-06, "loss": 0.5444, "step": 9686 }, { "epoch": 0.69, "grad_norm": 1.8486539378617293, "learning_rate": 2.3509342872878745e-06, "loss": 0.4934, "step": 9687 }, { "epoch": 0.69, "grad_norm": 1.599023473474026, "learning_rate": 2.349959732211315e-06, "loss": 0.5803, "step": 9688 }, { "epoch": 0.69, "grad_norm": 2.002305253530704, "learning_rate": 2.3489853171188576e-06, "loss": 0.5482, "step": 9689 }, { "epoch": 0.69, "grad_norm": 1.7607307991302097, "learning_rate": 2.3480110420619827e-06, "loss": 0.5318, "step": 9690 }, { "epoch": 0.69, "grad_norm": 0.6783820516278984, "learning_rate": 2.347036907092149e-06, "loss": 0.4311, "step": 9691 }, { "epoch": 0.69, "grad_norm": 1.5355355565630149, "learning_rate": 2.3460629122608146e-06, "loss": 0.4408, "step": 9692 }, { "epoch": 0.69, "grad_norm": 0.6624803588869154, "learning_rate": 2.3450890576194308e-06, "loss": 0.413, "step": 9693 }, { "epoch": 0.69, "grad_norm": 2.0909935875789767, "learning_rate": 2.3441153432194387e-06, "loss": 0.4857, "step": 9694 }, { "epoch": 0.69, "grad_norm": 1.6915842598027797, "learning_rate": 2.3431417691122753e-06, "loss": 0.4846, "step": 9695 }, { "epoch": 0.69, "grad_norm": 2.3084265208131844, "learning_rate": 2.3421683353493623e-06, "loss": 0.5561, "step": 9696 }, { "epoch": 0.69, "grad_norm": 1.4748566703904935, "learning_rate": 2.341195041982128e-06, "loss": 0.5278, "step": 9697 }, { "epoch": 0.69, "grad_norm": 1.741744206412284, "learning_rate": 2.3402218890619793e-06, "loss": 0.4235, "step": 9698 }, { "epoch": 0.69, "grad_norm": 1.8158697207660703, "learning_rate": 2.3392488766403224e-06, "loss": 0.587, "step": 9699 }, { "epoch": 0.69, "grad_norm": 1.8145529251033394, "learning_rate": 2.338276004768556e-06, "loss": 0.5014, "step": 9700 }, { "epoch": 0.69, "grad_norm": 1.4662875126056782, "learning_rate": 2.33730327349807e-06, "loss": 0.5235, "step": 9701 }, { "epoch": 0.69, "grad_norm": 1.5810085902491504, "learning_rate": 2.3363306828802494e-06, "loss": 0.5093, "step": 9702 }, { "epoch": 0.69, "grad_norm": 2.0887149660566395, "learning_rate": 2.335358232966466e-06, "loss": 0.5433, "step": 9703 }, { "epoch": 0.69, "grad_norm": 1.6452600578084005, "learning_rate": 2.3343859238080897e-06, "loss": 0.5643, "step": 9704 }, { "epoch": 0.69, "grad_norm": 1.5950349589435142, "learning_rate": 2.3334137554564817e-06, "loss": 0.5125, "step": 9705 }, { "epoch": 0.69, "grad_norm": 1.743533157515716, "learning_rate": 2.332441727962996e-06, "loss": 0.5411, "step": 9706 }, { "epoch": 0.69, "grad_norm": 1.6597038470694456, "learning_rate": 2.3314698413789738e-06, "loss": 0.5331, "step": 9707 }, { "epoch": 0.69, "grad_norm": 1.7294436241495723, "learning_rate": 2.3304980957557587e-06, "loss": 0.544, "step": 9708 }, { "epoch": 0.69, "grad_norm": 2.0543890153487485, "learning_rate": 2.3295264911446812e-06, "loss": 0.471, "step": 9709 }, { "epoch": 0.69, "grad_norm": 1.5164334231859424, "learning_rate": 2.328555027597062e-06, "loss": 0.5113, "step": 9710 }, { "epoch": 0.69, "grad_norm": 1.7338193581547061, "learning_rate": 2.3275837051642177e-06, "loss": 0.532, "step": 9711 }, { "epoch": 0.69, "grad_norm": 1.7028900201934263, "learning_rate": 2.3266125238974573e-06, "loss": 0.5447, "step": 9712 }, { "epoch": 0.69, "grad_norm": 1.8203394447615429, "learning_rate": 2.3256414838480846e-06, "loss": 0.4893, "step": 9713 }, { "epoch": 0.69, "grad_norm": 1.9374825560531639, "learning_rate": 2.3246705850673885e-06, "loss": 0.5841, "step": 9714 }, { "epoch": 0.69, "grad_norm": 1.9646893317680507, "learning_rate": 2.3236998276066575e-06, "loss": 0.5691, "step": 9715 }, { "epoch": 0.69, "grad_norm": 0.6739759458106583, "learning_rate": 2.32272921151717e-06, "loss": 0.4167, "step": 9716 }, { "epoch": 0.69, "grad_norm": 1.7887761796192139, "learning_rate": 2.3217587368501976e-06, "loss": 0.5451, "step": 9717 }, { "epoch": 0.69, "grad_norm": 1.8281315117448913, "learning_rate": 2.3207884036570056e-06, "loss": 0.5607, "step": 9718 }, { "epoch": 0.69, "grad_norm": 1.4891015531184466, "learning_rate": 2.319818211988845e-06, "loss": 0.4557, "step": 9719 }, { "epoch": 0.69, "grad_norm": 1.7791805765281783, "learning_rate": 2.318848161896971e-06, "loss": 0.5609, "step": 9720 }, { "epoch": 0.69, "grad_norm": 1.8461719845254208, "learning_rate": 2.317878253432621e-06, "loss": 0.4859, "step": 9721 }, { "epoch": 0.69, "grad_norm": 1.4456484988475986, "learning_rate": 2.3169084866470297e-06, "loss": 0.5115, "step": 9722 }, { "epoch": 0.69, "grad_norm": 1.9647630411142032, "learning_rate": 2.3159388615914234e-06, "loss": 0.5696, "step": 9723 }, { "epoch": 0.69, "grad_norm": 1.70415832038415, "learning_rate": 2.314969378317022e-06, "loss": 0.4634, "step": 9724 }, { "epoch": 0.69, "grad_norm": 1.5825333841364404, "learning_rate": 2.3140000368750375e-06, "loss": 0.5044, "step": 9725 }, { "epoch": 0.69, "grad_norm": 1.9036931200954714, "learning_rate": 2.313030837316669e-06, "loss": 0.5502, "step": 9726 }, { "epoch": 0.69, "grad_norm": 1.720146302408287, "learning_rate": 2.31206177969312e-06, "loss": 0.49, "step": 9727 }, { "epoch": 0.69, "grad_norm": 1.6401267101372148, "learning_rate": 2.3110928640555736e-06, "loss": 0.5504, "step": 9728 }, { "epoch": 0.69, "grad_norm": 1.6918370911647205, "learning_rate": 2.3101240904552138e-06, "loss": 0.5712, "step": 9729 }, { "epoch": 0.69, "grad_norm": 1.818275127413543, "learning_rate": 2.3091554589432142e-06, "loss": 0.5987, "step": 9730 }, { "epoch": 0.69, "grad_norm": 1.580037050067111, "learning_rate": 2.3081869695707404e-06, "loss": 0.5052, "step": 9731 }, { "epoch": 0.69, "grad_norm": 1.7342208824027834, "learning_rate": 2.3072186223889543e-06, "loss": 0.5272, "step": 9732 }, { "epoch": 0.69, "grad_norm": 1.7154978353023667, "learning_rate": 2.306250417449003e-06, "loss": 0.4807, "step": 9733 }, { "epoch": 0.69, "grad_norm": 1.6551836637353523, "learning_rate": 2.3052823548020325e-06, "loss": 0.5358, "step": 9734 }, { "epoch": 0.69, "grad_norm": 1.774074635914195, "learning_rate": 2.304314434499179e-06, "loss": 0.5279, "step": 9735 }, { "epoch": 0.69, "grad_norm": 2.159175676488079, "learning_rate": 2.3033466565915707e-06, "loss": 0.4797, "step": 9736 }, { "epoch": 0.69, "grad_norm": 1.8312616941870463, "learning_rate": 2.302379021130332e-06, "loss": 0.5331, "step": 9737 }, { "epoch": 0.69, "grad_norm": 1.9948508015697015, "learning_rate": 2.3014115281665707e-06, "loss": 0.6009, "step": 9738 }, { "epoch": 0.69, "grad_norm": 1.4409636546117957, "learning_rate": 2.3004441777513996e-06, "loss": 0.4791, "step": 9739 }, { "epoch": 0.69, "grad_norm": 2.0089803309236247, "learning_rate": 2.2994769699359125e-06, "loss": 0.6072, "step": 9740 }, { "epoch": 0.69, "grad_norm": 1.8773971502124038, "learning_rate": 2.2985099047712028e-06, "loss": 0.5379, "step": 9741 }, { "epoch": 0.69, "grad_norm": 1.9934135424013362, "learning_rate": 2.2975429823083538e-06, "loss": 0.4856, "step": 9742 }, { "epoch": 0.69, "grad_norm": 1.4576319873617547, "learning_rate": 2.296576202598443e-06, "loss": 0.4992, "step": 9743 }, { "epoch": 0.69, "grad_norm": 1.8986928570965453, "learning_rate": 2.2956095656925366e-06, "loss": 0.5171, "step": 9744 }, { "epoch": 0.69, "grad_norm": 2.207293399251415, "learning_rate": 2.294643071641696e-06, "loss": 0.5541, "step": 9745 }, { "epoch": 0.69, "grad_norm": 1.5925524492841927, "learning_rate": 2.2936767204969762e-06, "loss": 0.4533, "step": 9746 }, { "epoch": 0.69, "grad_norm": 1.5678707339411364, "learning_rate": 2.2927105123094217e-06, "loss": 0.5601, "step": 9747 }, { "epoch": 0.69, "grad_norm": 1.7588709897517392, "learning_rate": 2.2917444471300733e-06, "loss": 0.5221, "step": 9748 }, { "epoch": 0.69, "grad_norm": 1.7218656456911963, "learning_rate": 2.290778525009956e-06, "loss": 0.5752, "step": 9749 }, { "epoch": 0.69, "grad_norm": 1.6990540002271146, "learning_rate": 2.289812746000102e-06, "loss": 0.5806, "step": 9750 }, { "epoch": 0.69, "grad_norm": 1.8200136531976747, "learning_rate": 2.2888471101515194e-06, "loss": 0.5022, "step": 9751 }, { "epoch": 0.69, "grad_norm": 1.4780039394443454, "learning_rate": 2.287881617515219e-06, "loss": 0.5472, "step": 9752 }, { "epoch": 0.69, "grad_norm": 1.9384819649968479, "learning_rate": 2.2869162681422014e-06, "loss": 0.5768, "step": 9753 }, { "epoch": 0.69, "grad_norm": 1.5890684536589057, "learning_rate": 2.2859510620834602e-06, "loss": 0.4874, "step": 9754 }, { "epoch": 0.69, "grad_norm": 1.838674774942663, "learning_rate": 2.284985999389982e-06, "loss": 0.5224, "step": 9755 }, { "epoch": 0.69, "grad_norm": 1.9250016326617472, "learning_rate": 2.2840210801127395e-06, "loss": 0.5336, "step": 9756 }, { "epoch": 0.69, "grad_norm": 1.5551144897928308, "learning_rate": 2.2830563043027098e-06, "loss": 0.5324, "step": 9757 }, { "epoch": 0.69, "grad_norm": 1.61680023463662, "learning_rate": 2.2820916720108505e-06, "loss": 0.5218, "step": 9758 }, { "epoch": 0.69, "grad_norm": 1.5204825376988291, "learning_rate": 2.2811271832881188e-06, "loss": 0.4943, "step": 9759 }, { "epoch": 0.69, "grad_norm": 1.7471273883077791, "learning_rate": 2.2801628381854624e-06, "loss": 0.5246, "step": 9760 }, { "epoch": 0.69, "grad_norm": 1.5317506001946553, "learning_rate": 2.2791986367538206e-06, "loss": 0.4605, "step": 9761 }, { "epoch": 0.69, "grad_norm": 1.583891594547221, "learning_rate": 2.278234579044128e-06, "loss": 0.5387, "step": 9762 }, { "epoch": 0.69, "grad_norm": 2.265264957433339, "learning_rate": 2.2772706651073055e-06, "loss": 0.5393, "step": 9763 }, { "epoch": 0.69, "grad_norm": 1.8385113371723907, "learning_rate": 2.276306894994273e-06, "loss": 0.5051, "step": 9764 }, { "epoch": 0.69, "grad_norm": 2.070098999658853, "learning_rate": 2.275343268755939e-06, "loss": 0.5193, "step": 9765 }, { "epoch": 0.69, "grad_norm": 2.0134649446140824, "learning_rate": 2.274379786443206e-06, "loss": 0.5298, "step": 9766 }, { "epoch": 0.69, "grad_norm": 1.6702191200538403, "learning_rate": 2.27341644810697e-06, "loss": 0.5681, "step": 9767 }, { "epoch": 0.69, "grad_norm": 1.5769896075984415, "learning_rate": 2.2724532537981126e-06, "loss": 0.4636, "step": 9768 }, { "epoch": 0.69, "grad_norm": 3.6695226923222353, "learning_rate": 2.27149020356752e-06, "loss": 0.5577, "step": 9769 }, { "epoch": 0.69, "grad_norm": 1.8551965210392765, "learning_rate": 2.270527297466059e-06, "loss": 0.5056, "step": 9770 }, { "epoch": 0.69, "grad_norm": 1.8481204115572398, "learning_rate": 2.2695645355445965e-06, "loss": 0.4663, "step": 9771 }, { "epoch": 0.69, "grad_norm": 2.1514694618880834, "learning_rate": 2.2686019178539835e-06, "loss": 0.5646, "step": 9772 }, { "epoch": 0.69, "grad_norm": 1.4693111647424204, "learning_rate": 2.267639444445076e-06, "loss": 0.4502, "step": 9773 }, { "epoch": 0.69, "grad_norm": 2.0458428035768925, "learning_rate": 2.2666771153687096e-06, "loss": 0.4903, "step": 9774 }, { "epoch": 0.69, "grad_norm": 1.5148752318899077, "learning_rate": 2.2657149306757183e-06, "loss": 0.4965, "step": 9775 }, { "epoch": 0.69, "grad_norm": 1.6849122728551587, "learning_rate": 2.2647528904169324e-06, "loss": 0.5275, "step": 9776 }, { "epoch": 0.69, "grad_norm": 1.7016994350821346, "learning_rate": 2.2637909946431656e-06, "loss": 0.5023, "step": 9777 }, { "epoch": 0.69, "grad_norm": 3.9669850263693625, "learning_rate": 2.2628292434052325e-06, "loss": 0.4635, "step": 9778 }, { "epoch": 0.69, "grad_norm": 1.8858119320989581, "learning_rate": 2.261867636753929e-06, "loss": 0.5326, "step": 9779 }, { "epoch": 0.69, "grad_norm": 0.7157565542437598, "learning_rate": 2.260906174740059e-06, "loss": 0.437, "step": 9780 }, { "epoch": 0.69, "grad_norm": 0.7202378924459611, "learning_rate": 2.2599448574144047e-06, "loss": 0.4231, "step": 9781 }, { "epoch": 0.69, "grad_norm": 1.7952437531304084, "learning_rate": 2.258983684827747e-06, "loss": 0.4797, "step": 9782 }, { "epoch": 0.69, "grad_norm": 1.7791493568955592, "learning_rate": 2.258022657030859e-06, "loss": 0.5464, "step": 9783 }, { "epoch": 0.69, "grad_norm": 1.7566530186058351, "learning_rate": 2.2570617740745054e-06, "loss": 0.4491, "step": 9784 }, { "epoch": 0.69, "grad_norm": 1.6333048071871863, "learning_rate": 2.2561010360094447e-06, "loss": 0.559, "step": 9785 }, { "epoch": 0.69, "grad_norm": 1.5174857859245723, "learning_rate": 2.2551404428864236e-06, "loss": 0.4917, "step": 9786 }, { "epoch": 0.69, "grad_norm": 2.1389021893128843, "learning_rate": 2.2541799947561853e-06, "loss": 0.4918, "step": 9787 }, { "epoch": 0.69, "grad_norm": 1.686580714592363, "learning_rate": 2.2532196916694633e-06, "loss": 0.513, "step": 9788 }, { "epoch": 0.69, "grad_norm": 1.6450877583967884, "learning_rate": 2.252259533676985e-06, "loss": 0.5612, "step": 9789 }, { "epoch": 0.69, "grad_norm": 1.628652444908251, "learning_rate": 2.2512995208294688e-06, "loss": 0.4981, "step": 9790 }, { "epoch": 0.69, "grad_norm": 1.8355199298065652, "learning_rate": 2.250339653177625e-06, "loss": 0.5068, "step": 9791 }, { "epoch": 0.69, "grad_norm": 1.5534055191360703, "learning_rate": 2.24937993077216e-06, "loss": 0.5592, "step": 9792 }, { "epoch": 0.69, "grad_norm": 1.7546093388778625, "learning_rate": 2.248420353663766e-06, "loss": 0.5508, "step": 9793 }, { "epoch": 0.7, "grad_norm": 1.9418276595862947, "learning_rate": 2.2474609219031325e-06, "loss": 0.5614, "step": 9794 }, { "epoch": 0.7, "grad_norm": 1.7659724092061488, "learning_rate": 2.2465016355409395e-06, "loss": 0.5244, "step": 9795 }, { "epoch": 0.7, "grad_norm": 1.9078676817209506, "learning_rate": 2.2455424946278603e-06, "loss": 0.5138, "step": 9796 }, { "epoch": 0.7, "grad_norm": 2.2238629544739674, "learning_rate": 2.244583499214562e-06, "loss": 0.5531, "step": 9797 }, { "epoch": 0.7, "grad_norm": 1.6850235283148265, "learning_rate": 2.243624649351696e-06, "loss": 0.5203, "step": 9798 }, { "epoch": 0.7, "grad_norm": 2.0103901501721877, "learning_rate": 2.2426659450899203e-06, "loss": 0.5142, "step": 9799 }, { "epoch": 0.7, "grad_norm": 2.2203015186902033, "learning_rate": 2.24170738647987e-06, "loss": 0.526, "step": 9800 }, { "epoch": 0.7, "grad_norm": 0.6935535403925185, "learning_rate": 2.240748973572184e-06, "loss": 0.4276, "step": 9801 }, { "epoch": 0.7, "grad_norm": 2.20103154952227, "learning_rate": 2.2397907064174827e-06, "loss": 0.5519, "step": 9802 }, { "epoch": 0.7, "grad_norm": 1.648189533844457, "learning_rate": 2.2388325850663913e-06, "loss": 0.5064, "step": 9803 }, { "epoch": 0.7, "grad_norm": 1.9211950522476275, "learning_rate": 2.2378746095695207e-06, "loss": 0.5047, "step": 9804 }, { "epoch": 0.7, "grad_norm": 1.7064761260042274, "learning_rate": 2.236916779977469e-06, "loss": 0.5075, "step": 9805 }, { "epoch": 0.7, "grad_norm": 0.8344109204186879, "learning_rate": 2.235959096340839e-06, "loss": 0.4202, "step": 9806 }, { "epoch": 0.7, "grad_norm": 1.5960044185409226, "learning_rate": 2.2350015587102135e-06, "loss": 0.5482, "step": 9807 }, { "epoch": 0.7, "grad_norm": 1.7898930094525705, "learning_rate": 2.2340441671361774e-06, "loss": 0.6038, "step": 9808 }, { "epoch": 0.7, "grad_norm": 0.7142109056622868, "learning_rate": 2.2330869216692964e-06, "loss": 0.4412, "step": 9809 }, { "epoch": 0.7, "grad_norm": 1.663516948502248, "learning_rate": 2.2321298223601433e-06, "loss": 0.5167, "step": 9810 }, { "epoch": 0.7, "grad_norm": 1.6814355294355856, "learning_rate": 2.2311728692592705e-06, "loss": 0.5294, "step": 9811 }, { "epoch": 0.7, "grad_norm": 1.6692833489155774, "learning_rate": 2.230216062417228e-06, "loss": 0.5143, "step": 9812 }, { "epoch": 0.7, "grad_norm": 2.548215115919644, "learning_rate": 2.229259401884559e-06, "loss": 0.5354, "step": 9813 }, { "epoch": 0.7, "grad_norm": 0.8479091089072636, "learning_rate": 2.228302887711797e-06, "loss": 0.4266, "step": 9814 }, { "epoch": 0.7, "grad_norm": 1.6494211725633305, "learning_rate": 2.22734651994947e-06, "loss": 0.4707, "step": 9815 }, { "epoch": 0.7, "grad_norm": 1.6046435591145247, "learning_rate": 2.226390298648093e-06, "loss": 0.4934, "step": 9816 }, { "epoch": 0.7, "grad_norm": 2.1102304856213485, "learning_rate": 2.2254342238581787e-06, "loss": 0.5227, "step": 9817 }, { "epoch": 0.7, "grad_norm": 1.5254649715565505, "learning_rate": 2.2244782956302304e-06, "loss": 0.4821, "step": 9818 }, { "epoch": 0.7, "grad_norm": 2.5853601028571447, "learning_rate": 2.2235225140147427e-06, "loss": 0.5365, "step": 9819 }, { "epoch": 0.7, "grad_norm": 1.5959536400136145, "learning_rate": 2.222566879062204e-06, "loss": 0.5328, "step": 9820 }, { "epoch": 0.7, "grad_norm": 1.9075411941456688, "learning_rate": 2.221611390823094e-06, "loss": 0.4509, "step": 9821 }, { "epoch": 0.7, "grad_norm": 1.8467325386419042, "learning_rate": 2.2206560493478873e-06, "loss": 0.5421, "step": 9822 }, { "epoch": 0.7, "grad_norm": 1.6682965002879802, "learning_rate": 2.219700854687043e-06, "loss": 0.5491, "step": 9823 }, { "epoch": 0.7, "grad_norm": 0.69361826370397, "learning_rate": 2.218745806891021e-06, "loss": 0.43, "step": 9824 }, { "epoch": 0.7, "grad_norm": 1.7758083775761881, "learning_rate": 2.21779090601027e-06, "loss": 0.5965, "step": 9825 }, { "epoch": 0.7, "grad_norm": 1.5691503985185293, "learning_rate": 2.2168361520952298e-06, "loss": 0.5176, "step": 9826 }, { "epoch": 0.7, "grad_norm": 1.605465088313817, "learning_rate": 2.2158815451963373e-06, "loss": 0.6051, "step": 9827 }, { "epoch": 0.7, "grad_norm": 0.6824569229793276, "learning_rate": 2.2149270853640116e-06, "loss": 0.4446, "step": 9828 }, { "epoch": 0.7, "grad_norm": 2.3279249262316486, "learning_rate": 2.2139727726486775e-06, "loss": 0.5632, "step": 9829 }, { "epoch": 0.7, "grad_norm": 1.6630246621711562, "learning_rate": 2.213018607100741e-06, "loss": 0.5393, "step": 9830 }, { "epoch": 0.7, "grad_norm": 2.153134548734179, "learning_rate": 2.2120645887706046e-06, "loss": 0.4906, "step": 9831 }, { "epoch": 0.7, "grad_norm": 1.9403335042450074, "learning_rate": 2.211110717708664e-06, "loss": 0.5568, "step": 9832 }, { "epoch": 0.7, "grad_norm": 1.6619787011954972, "learning_rate": 2.2101569939653044e-06, "loss": 0.4992, "step": 9833 }, { "epoch": 0.7, "grad_norm": 1.9323816123535564, "learning_rate": 2.2092034175909084e-06, "loss": 0.5104, "step": 9834 }, { "epoch": 0.7, "grad_norm": 0.7402635796299208, "learning_rate": 2.208249988635842e-06, "loss": 0.4316, "step": 9835 }, { "epoch": 0.7, "grad_norm": 1.4628329571468286, "learning_rate": 2.2072967071504712e-06, "loss": 0.5113, "step": 9836 }, { "epoch": 0.7, "grad_norm": 1.717266774038048, "learning_rate": 2.206343573185151e-06, "loss": 0.5407, "step": 9837 }, { "epoch": 0.7, "grad_norm": 2.0788995618959807, "learning_rate": 2.205390586790232e-06, "loss": 0.5242, "step": 9838 }, { "epoch": 0.7, "grad_norm": 2.111756982890185, "learning_rate": 2.204437748016047e-06, "loss": 0.5956, "step": 9839 }, { "epoch": 0.7, "grad_norm": 1.5192303423506381, "learning_rate": 2.2034850569129357e-06, "loss": 0.5618, "step": 9840 }, { "epoch": 0.7, "grad_norm": 1.6338007437450441, "learning_rate": 2.2025325135312203e-06, "loss": 0.5567, "step": 9841 }, { "epoch": 0.7, "grad_norm": 1.5130919754535974, "learning_rate": 2.2015801179212152e-06, "loss": 0.5032, "step": 9842 }, { "epoch": 0.7, "grad_norm": 1.6096667806755216, "learning_rate": 2.2006278701332313e-06, "loss": 0.5827, "step": 9843 }, { "epoch": 0.7, "grad_norm": 1.6273355838135086, "learning_rate": 2.199675770217568e-06, "loss": 0.5956, "step": 9844 }, { "epoch": 0.7, "grad_norm": 1.7929622625107424, "learning_rate": 2.1987238182245218e-06, "loss": 0.5004, "step": 9845 }, { "epoch": 0.7, "grad_norm": 1.6298876602057937, "learning_rate": 2.1977720142043737e-06, "loss": 0.5019, "step": 9846 }, { "epoch": 0.7, "grad_norm": 1.7730182950509759, "learning_rate": 2.1968203582074026e-06, "loss": 0.5146, "step": 9847 }, { "epoch": 0.7, "grad_norm": 2.2336815756351047, "learning_rate": 2.1958688502838787e-06, "loss": 0.5444, "step": 9848 }, { "epoch": 0.7, "grad_norm": 1.896585654591367, "learning_rate": 2.194917490484064e-06, "loss": 0.4952, "step": 9849 }, { "epoch": 0.7, "grad_norm": 1.692158092086213, "learning_rate": 2.1939662788582137e-06, "loss": 0.5939, "step": 9850 }, { "epoch": 0.7, "grad_norm": 1.7814729944899672, "learning_rate": 2.1930152154565696e-06, "loss": 0.5934, "step": 9851 }, { "epoch": 0.7, "grad_norm": 1.6697437849235102, "learning_rate": 2.1920643003293766e-06, "loss": 0.466, "step": 9852 }, { "epoch": 0.7, "grad_norm": 1.9675541085977006, "learning_rate": 2.1911135335268608e-06, "loss": 0.527, "step": 9853 }, { "epoch": 0.7, "grad_norm": 1.7795176499679832, "learning_rate": 2.190162915099245e-06, "loss": 0.4953, "step": 9854 }, { "epoch": 0.7, "grad_norm": 1.92656390973599, "learning_rate": 2.189212445096745e-06, "loss": 0.4927, "step": 9855 }, { "epoch": 0.7, "grad_norm": 1.6510505905988975, "learning_rate": 2.1882621235695685e-06, "loss": 0.6125, "step": 9856 }, { "epoch": 0.7, "grad_norm": 1.6943968810018013, "learning_rate": 2.187311950567916e-06, "loss": 0.49, "step": 9857 }, { "epoch": 0.7, "grad_norm": 2.0429419409319003, "learning_rate": 2.1863619261419726e-06, "loss": 0.5593, "step": 9858 }, { "epoch": 0.7, "grad_norm": 1.651692684158317, "learning_rate": 2.18541205034193e-06, "loss": 0.4789, "step": 9859 }, { "epoch": 0.7, "grad_norm": 1.8091459818880926, "learning_rate": 2.1844623232179583e-06, "loss": 0.555, "step": 9860 }, { "epoch": 0.7, "grad_norm": 1.9393682605731715, "learning_rate": 2.1835127448202277e-06, "loss": 0.5727, "step": 9861 }, { "epoch": 0.7, "grad_norm": 1.8993530512545764, "learning_rate": 2.1825633151988966e-06, "loss": 0.5089, "step": 9862 }, { "epoch": 0.7, "grad_norm": 0.6913309208285455, "learning_rate": 2.181614034404118e-06, "loss": 0.4378, "step": 9863 }, { "epoch": 0.7, "grad_norm": 1.7115490938281233, "learning_rate": 2.1806649024860386e-06, "loss": 0.6026, "step": 9864 }, { "epoch": 0.7, "grad_norm": 1.5739686785651636, "learning_rate": 2.17971591949479e-06, "loss": 0.5545, "step": 9865 }, { "epoch": 0.7, "grad_norm": 0.796164169898495, "learning_rate": 2.1787670854805033e-06, "loss": 0.4479, "step": 9866 }, { "epoch": 0.7, "grad_norm": 1.9650883356401943, "learning_rate": 2.1778184004932984e-06, "loss": 0.5535, "step": 9867 }, { "epoch": 0.7, "grad_norm": 1.7500808946078343, "learning_rate": 2.1768698645832883e-06, "loss": 0.5131, "step": 9868 }, { "epoch": 0.7, "grad_norm": 0.6794179121459604, "learning_rate": 2.1759214778005784e-06, "loss": 0.4105, "step": 9869 }, { "epoch": 0.7, "grad_norm": 1.5967481894013247, "learning_rate": 2.174973240195265e-06, "loss": 0.5455, "step": 9870 }, { "epoch": 0.7, "grad_norm": 2.2166597354677346, "learning_rate": 2.1740251518174394e-06, "loss": 0.5629, "step": 9871 }, { "epoch": 0.7, "grad_norm": 1.7964576448836764, "learning_rate": 2.1730772127171793e-06, "loss": 0.4714, "step": 9872 }, { "epoch": 0.7, "grad_norm": 1.784357296207718, "learning_rate": 2.17212942294456e-06, "loss": 0.5189, "step": 9873 }, { "epoch": 0.7, "grad_norm": 1.7685191721911813, "learning_rate": 2.1711817825496463e-06, "loss": 0.6218, "step": 9874 }, { "epoch": 0.7, "grad_norm": 1.7071092811119726, "learning_rate": 2.170234291582498e-06, "loss": 0.5367, "step": 9875 }, { "epoch": 0.7, "grad_norm": 1.5919135967654028, "learning_rate": 2.1692869500931618e-06, "loss": 0.5247, "step": 9876 }, { "epoch": 0.7, "grad_norm": 2.1371890611389315, "learning_rate": 2.1683397581316802e-06, "loss": 0.5324, "step": 9877 }, { "epoch": 0.7, "grad_norm": 1.6230525319150255, "learning_rate": 2.1673927157480874e-06, "loss": 0.4736, "step": 9878 }, { "epoch": 0.7, "grad_norm": 1.862684696291668, "learning_rate": 2.1664458229924098e-06, "loss": 0.5326, "step": 9879 }, { "epoch": 0.7, "grad_norm": 1.4485560987420139, "learning_rate": 2.165499079914667e-06, "loss": 0.5291, "step": 9880 }, { "epoch": 0.7, "grad_norm": 1.5512433332026951, "learning_rate": 2.164552486564863e-06, "loss": 0.4999, "step": 9881 }, { "epoch": 0.7, "grad_norm": 1.7892125553676461, "learning_rate": 2.1636060429930094e-06, "loss": 0.581, "step": 9882 }, { "epoch": 0.7, "grad_norm": 1.8407015730095222, "learning_rate": 2.162659749249093e-06, "loss": 0.5399, "step": 9883 }, { "epoch": 0.7, "grad_norm": 2.0044684170019775, "learning_rate": 2.1617136053831034e-06, "loss": 0.5412, "step": 9884 }, { "epoch": 0.7, "grad_norm": 1.5040287879269847, "learning_rate": 2.1607676114450176e-06, "loss": 0.5692, "step": 9885 }, { "epoch": 0.7, "grad_norm": 1.8771342001352436, "learning_rate": 2.159821767484808e-06, "loss": 0.5771, "step": 9886 }, { "epoch": 0.7, "grad_norm": 2.4342795875356713, "learning_rate": 2.1588760735524384e-06, "loss": 0.5541, "step": 9887 }, { "epoch": 0.7, "grad_norm": 1.6926798274257615, "learning_rate": 2.1579305296978575e-06, "loss": 0.4983, "step": 9888 }, { "epoch": 0.7, "grad_norm": 1.783006088232884, "learning_rate": 2.15698513597102e-06, "loss": 0.5558, "step": 9889 }, { "epoch": 0.7, "grad_norm": 1.7616768180301434, "learning_rate": 2.1560398924218593e-06, "loss": 0.549, "step": 9890 }, { "epoch": 0.7, "grad_norm": 1.5778620453987877, "learning_rate": 2.1550947991003084e-06, "loss": 0.5463, "step": 9891 }, { "epoch": 0.7, "grad_norm": 1.7088188421028374, "learning_rate": 2.1541498560562903e-06, "loss": 0.4576, "step": 9892 }, { "epoch": 0.7, "grad_norm": 1.7307619740476605, "learning_rate": 2.1532050633397193e-06, "loss": 0.5187, "step": 9893 }, { "epoch": 0.7, "grad_norm": 1.530945891324916, "learning_rate": 2.152260421000505e-06, "loss": 0.4625, "step": 9894 }, { "epoch": 0.7, "grad_norm": 1.7336063750900397, "learning_rate": 2.1513159290885435e-06, "loss": 0.5067, "step": 9895 }, { "epoch": 0.7, "grad_norm": 1.8473967266720934, "learning_rate": 2.150371587653726e-06, "loss": 0.4936, "step": 9896 }, { "epoch": 0.7, "grad_norm": 1.5198557951448186, "learning_rate": 2.1494273967459383e-06, "loss": 0.5426, "step": 9897 }, { "epoch": 0.7, "grad_norm": 1.6220751413338879, "learning_rate": 2.1484833564150542e-06, "loss": 0.5206, "step": 9898 }, { "epoch": 0.7, "grad_norm": 1.5808624273864196, "learning_rate": 2.1475394667109435e-06, "loss": 0.5054, "step": 9899 }, { "epoch": 0.7, "grad_norm": 1.7050956412769964, "learning_rate": 2.1465957276834592e-06, "loss": 0.6037, "step": 9900 }, { "epoch": 0.7, "grad_norm": 1.791247256625663, "learning_rate": 2.1456521393824614e-06, "loss": 0.4627, "step": 9901 }, { "epoch": 0.7, "grad_norm": 4.210346089398442, "learning_rate": 2.144708701857788e-06, "loss": 0.521, "step": 9902 }, { "epoch": 0.7, "grad_norm": 1.9144515467075263, "learning_rate": 2.1437654151592754e-06, "loss": 0.5148, "step": 9903 }, { "epoch": 0.7, "grad_norm": 1.531238761544277, "learning_rate": 2.142822279336752e-06, "loss": 0.5856, "step": 9904 }, { "epoch": 0.7, "grad_norm": 1.6302470641626148, "learning_rate": 2.141879294440039e-06, "loss": 0.522, "step": 9905 }, { "epoch": 0.7, "grad_norm": 1.7149840605409237, "learning_rate": 2.1409364605189447e-06, "loss": 0.5395, "step": 9906 }, { "epoch": 0.7, "grad_norm": 1.8506683524660588, "learning_rate": 2.1399937776232727e-06, "loss": 0.5091, "step": 9907 }, { "epoch": 0.7, "grad_norm": 1.604209151273876, "learning_rate": 2.1390512458028234e-06, "loss": 0.5509, "step": 9908 }, { "epoch": 0.7, "grad_norm": 1.549677829306699, "learning_rate": 2.13810886510738e-06, "loss": 0.4925, "step": 9909 }, { "epoch": 0.7, "grad_norm": 0.6993336529375667, "learning_rate": 2.137166635586726e-06, "loss": 0.4504, "step": 9910 }, { "epoch": 0.7, "grad_norm": 1.7255831385632994, "learning_rate": 2.1362245572906263e-06, "loss": 0.5201, "step": 9911 }, { "epoch": 0.7, "grad_norm": 1.6915292071317485, "learning_rate": 2.1352826302688538e-06, "loss": 0.4856, "step": 9912 }, { "epoch": 0.7, "grad_norm": 1.6425612857906209, "learning_rate": 2.134340854571158e-06, "loss": 0.5187, "step": 9913 }, { "epoch": 0.7, "grad_norm": 2.983444695348457, "learning_rate": 2.1333992302472876e-06, "loss": 0.5638, "step": 9914 }, { "epoch": 0.7, "grad_norm": 1.9646144756486192, "learning_rate": 2.132457757346984e-06, "loss": 0.5812, "step": 9915 }, { "epoch": 0.7, "grad_norm": 1.9553779286653636, "learning_rate": 2.131516435919978e-06, "loss": 0.4756, "step": 9916 }, { "epoch": 0.7, "grad_norm": 1.4336064972879234, "learning_rate": 2.1305752660159956e-06, "loss": 0.4705, "step": 9917 }, { "epoch": 0.7, "grad_norm": 1.7200076168128053, "learning_rate": 2.1296342476847467e-06, "loss": 0.5299, "step": 9918 }, { "epoch": 0.7, "grad_norm": 1.6441341656632302, "learning_rate": 2.1286933809759465e-06, "loss": 0.5767, "step": 9919 }, { "epoch": 0.7, "grad_norm": 1.796907382138991, "learning_rate": 2.1277526659392896e-06, "loss": 0.5108, "step": 9920 }, { "epoch": 0.7, "grad_norm": 1.6966933440186764, "learning_rate": 2.1268121026244693e-06, "loss": 0.5634, "step": 9921 }, { "epoch": 0.7, "grad_norm": 1.8589131947080262, "learning_rate": 2.1258716910811692e-06, "loss": 0.5918, "step": 9922 }, { "epoch": 0.7, "grad_norm": 1.941100492529861, "learning_rate": 2.1249314313590657e-06, "loss": 0.5784, "step": 9923 }, { "epoch": 0.7, "grad_norm": 1.8299830228926899, "learning_rate": 2.1239913235078275e-06, "loss": 0.5999, "step": 9924 }, { "epoch": 0.7, "grad_norm": 1.5845347392542977, "learning_rate": 2.1230513675771114e-06, "loss": 0.4894, "step": 9925 }, { "epoch": 0.7, "grad_norm": 1.5827159938835709, "learning_rate": 2.12211156361657e-06, "loss": 0.5231, "step": 9926 }, { "epoch": 0.7, "grad_norm": 1.743811289047712, "learning_rate": 2.121171911675848e-06, "loss": 0.5761, "step": 9927 }, { "epoch": 0.7, "grad_norm": 1.8250149600021641, "learning_rate": 2.1202324118045805e-06, "loss": 0.531, "step": 9928 }, { "epoch": 0.7, "grad_norm": 1.6651945715853556, "learning_rate": 2.119293064052396e-06, "loss": 0.5318, "step": 9929 }, { "epoch": 0.7, "grad_norm": 1.5509718645062733, "learning_rate": 2.1183538684689097e-06, "loss": 0.5924, "step": 9930 }, { "epoch": 0.7, "grad_norm": 0.6900881964781389, "learning_rate": 2.117414825103741e-06, "loss": 0.4445, "step": 9931 }, { "epoch": 0.7, "grad_norm": 0.7442753839838445, "learning_rate": 2.1164759340064854e-06, "loss": 0.4433, "step": 9932 }, { "epoch": 0.7, "grad_norm": 1.6131919110575181, "learning_rate": 2.115537195226743e-06, "loss": 0.5549, "step": 9933 }, { "epoch": 0.7, "grad_norm": 1.394578529127927, "learning_rate": 2.114598608814099e-06, "loss": 0.4713, "step": 9934 }, { "epoch": 0.71, "grad_norm": 1.901474818385091, "learning_rate": 2.113660174818134e-06, "loss": 0.5175, "step": 9935 }, { "epoch": 0.71, "grad_norm": 1.7659107212231282, "learning_rate": 2.1127218932884205e-06, "loss": 0.5792, "step": 9936 }, { "epoch": 0.71, "grad_norm": 1.8245121512516913, "learning_rate": 2.1117837642745164e-06, "loss": 0.5466, "step": 9937 }, { "epoch": 0.71, "grad_norm": 1.8890652163218158, "learning_rate": 2.110845787825984e-06, "loss": 0.4439, "step": 9938 }, { "epoch": 0.71, "grad_norm": 1.730373075654687, "learning_rate": 2.1099079639923653e-06, "loss": 0.547, "step": 9939 }, { "epoch": 0.71, "grad_norm": 2.190909436660858, "learning_rate": 2.1089702928232024e-06, "loss": 0.4652, "step": 9940 }, { "epoch": 0.71, "grad_norm": 1.5059507893783808, "learning_rate": 2.108032774368021e-06, "loss": 0.4661, "step": 9941 }, { "epoch": 0.71, "grad_norm": 1.732801463097491, "learning_rate": 2.1070954086763516e-06, "loss": 0.5961, "step": 9942 }, { "epoch": 0.71, "grad_norm": 0.7000885972452211, "learning_rate": 2.1061581957977038e-06, "loss": 0.4225, "step": 9943 }, { "epoch": 0.71, "grad_norm": 18.11123632557978, "learning_rate": 2.1052211357815856e-06, "loss": 0.4908, "step": 9944 }, { "epoch": 0.71, "grad_norm": 2.5322755475069454, "learning_rate": 2.1042842286774958e-06, "loss": 0.5598, "step": 9945 }, { "epoch": 0.71, "grad_norm": 2.0519288271184726, "learning_rate": 2.1033474745349253e-06, "loss": 0.5944, "step": 9946 }, { "epoch": 0.71, "grad_norm": 1.9316259934194702, "learning_rate": 2.102410873403358e-06, "loss": 0.5276, "step": 9947 }, { "epoch": 0.71, "grad_norm": 1.8346813140060125, "learning_rate": 2.1014744253322626e-06, "loss": 0.5001, "step": 9948 }, { "epoch": 0.71, "grad_norm": 1.9105347175477196, "learning_rate": 2.1005381303711136e-06, "loss": 0.6416, "step": 9949 }, { "epoch": 0.71, "grad_norm": 1.9403395751476167, "learning_rate": 2.0996019885693635e-06, "loss": 0.4989, "step": 9950 }, { "epoch": 0.71, "grad_norm": 1.5981535783815422, "learning_rate": 2.0986659999764646e-06, "loss": 0.4915, "step": 9951 }, { "epoch": 0.71, "grad_norm": 2.942300733214206, "learning_rate": 2.0977301646418587e-06, "loss": 0.528, "step": 9952 }, { "epoch": 0.71, "grad_norm": 1.969779062066473, "learning_rate": 2.096794482614979e-06, "loss": 0.4856, "step": 9953 }, { "epoch": 0.71, "grad_norm": 1.687118497364785, "learning_rate": 2.0958589539452547e-06, "loss": 0.5738, "step": 9954 }, { "epoch": 0.71, "grad_norm": 1.490783632983565, "learning_rate": 2.094923578682098e-06, "loss": 0.428, "step": 9955 }, { "epoch": 0.71, "grad_norm": 2.0600156675526136, "learning_rate": 2.093988356874923e-06, "loss": 0.5557, "step": 9956 }, { "epoch": 0.71, "grad_norm": 1.7654964953611865, "learning_rate": 2.093053288573129e-06, "loss": 0.5096, "step": 9957 }, { "epoch": 0.71, "grad_norm": 1.7636164408026669, "learning_rate": 2.0921183738261107e-06, "loss": 0.5153, "step": 9958 }, { "epoch": 0.71, "grad_norm": 1.6941360392035278, "learning_rate": 2.091183612683255e-06, "loss": 0.5498, "step": 9959 }, { "epoch": 0.71, "grad_norm": 1.6616591593769634, "learning_rate": 2.090249005193934e-06, "loss": 0.5101, "step": 9960 }, { "epoch": 0.71, "grad_norm": 1.6161933862388798, "learning_rate": 2.0893145514075232e-06, "loss": 0.4937, "step": 9961 }, { "epoch": 0.71, "grad_norm": 0.6336901559186278, "learning_rate": 2.088380251373379e-06, "loss": 0.4138, "step": 9962 }, { "epoch": 0.71, "grad_norm": 1.5739663939060524, "learning_rate": 2.0874461051408556e-06, "loss": 0.5507, "step": 9963 }, { "epoch": 0.71, "grad_norm": 1.9702078571105015, "learning_rate": 2.0865121127592987e-06, "loss": 0.5827, "step": 9964 }, { "epoch": 0.71, "grad_norm": 1.5380812570872537, "learning_rate": 2.0855782742780434e-06, "loss": 0.4696, "step": 9965 }, { "epoch": 0.71, "grad_norm": 1.6077312661243806, "learning_rate": 2.0846445897464213e-06, "loss": 0.5261, "step": 9966 }, { "epoch": 0.71, "grad_norm": 1.8894270389324255, "learning_rate": 2.0837110592137467e-06, "loss": 0.4776, "step": 9967 }, { "epoch": 0.71, "grad_norm": 1.705222215828887, "learning_rate": 2.0827776827293393e-06, "loss": 0.4751, "step": 9968 }, { "epoch": 0.71, "grad_norm": 1.6187646536575793, "learning_rate": 2.0818444603424982e-06, "loss": 0.4934, "step": 9969 }, { "epoch": 0.71, "grad_norm": 1.6638889343405148, "learning_rate": 2.080911392102522e-06, "loss": 0.5269, "step": 9970 }, { "epoch": 0.71, "grad_norm": 3.339380621470723, "learning_rate": 2.079978478058694e-06, "loss": 0.4774, "step": 9971 }, { "epoch": 0.71, "grad_norm": 1.6601567588991704, "learning_rate": 2.079045718260299e-06, "loss": 0.5537, "step": 9972 }, { "epoch": 0.71, "grad_norm": 1.5730684604889715, "learning_rate": 2.0781131127566086e-06, "loss": 0.4779, "step": 9973 }, { "epoch": 0.71, "grad_norm": 1.859461940431205, "learning_rate": 2.077180661596882e-06, "loss": 0.4691, "step": 9974 }, { "epoch": 0.71, "grad_norm": 2.614613212115766, "learning_rate": 2.076248364830377e-06, "loss": 0.4727, "step": 9975 }, { "epoch": 0.71, "grad_norm": 1.5513138709835115, "learning_rate": 2.0753162225063407e-06, "loss": 0.5157, "step": 9976 }, { "epoch": 0.71, "grad_norm": 1.9260043149397121, "learning_rate": 2.0743842346740135e-06, "loss": 0.5422, "step": 9977 }, { "epoch": 0.71, "grad_norm": 1.7233955526042861, "learning_rate": 2.073452401382622e-06, "loss": 0.5393, "step": 9978 }, { "epoch": 0.71, "grad_norm": 1.7340521529462616, "learning_rate": 2.0725207226813915e-06, "loss": 0.4698, "step": 9979 }, { "epoch": 0.71, "grad_norm": 1.742981085043424, "learning_rate": 2.0715891986195358e-06, "loss": 0.5865, "step": 9980 }, { "epoch": 0.71, "grad_norm": 0.6950640925626478, "learning_rate": 2.070657829246262e-06, "loss": 0.415, "step": 9981 }, { "epoch": 0.71, "grad_norm": 1.7664743906897409, "learning_rate": 2.069726614610767e-06, "loss": 0.5493, "step": 9982 }, { "epoch": 0.71, "grad_norm": 0.720725083725896, "learning_rate": 2.0687955547622417e-06, "loss": 0.4261, "step": 9983 }, { "epoch": 0.71, "grad_norm": 0.6738589870009388, "learning_rate": 2.0678646497498695e-06, "loss": 0.4066, "step": 9984 }, { "epoch": 0.71, "grad_norm": 1.6682323426659522, "learning_rate": 2.06693389962282e-06, "loss": 0.5078, "step": 9985 }, { "epoch": 0.71, "grad_norm": 1.4430006479831332, "learning_rate": 2.06600330443026e-06, "loss": 0.4743, "step": 9986 }, { "epoch": 0.71, "grad_norm": 1.6343820224980672, "learning_rate": 2.0650728642213472e-06, "loss": 0.5544, "step": 9987 }, { "epoch": 0.71, "grad_norm": 1.695608677489692, "learning_rate": 2.0641425790452314e-06, "loss": 0.5506, "step": 9988 }, { "epoch": 0.71, "grad_norm": 2.1445091623919015, "learning_rate": 2.063212448951054e-06, "loss": 0.4959, "step": 9989 }, { "epoch": 0.71, "grad_norm": 1.6914704891935786, "learning_rate": 2.062282473987943e-06, "loss": 0.4977, "step": 9990 }, { "epoch": 0.71, "grad_norm": 1.5501811386175672, "learning_rate": 2.061352654205029e-06, "loss": 0.4878, "step": 9991 }, { "epoch": 0.71, "grad_norm": 1.9027966683377582, "learning_rate": 2.060422989651424e-06, "loss": 0.4886, "step": 9992 }, { "epoch": 0.71, "grad_norm": 1.6721796303840772, "learning_rate": 2.0594934803762368e-06, "loss": 0.4935, "step": 9993 }, { "epoch": 0.71, "grad_norm": 3.7335766028855946, "learning_rate": 2.0585641264285684e-06, "loss": 0.5438, "step": 9994 }, { "epoch": 0.71, "grad_norm": 1.8491462149443747, "learning_rate": 2.0576349278575092e-06, "loss": 0.5706, "step": 9995 }, { "epoch": 0.71, "grad_norm": 0.7512657107728453, "learning_rate": 2.056705884712145e-06, "loss": 0.4362, "step": 9996 }, { "epoch": 0.71, "grad_norm": 1.6982321154788775, "learning_rate": 2.0557769970415463e-06, "loss": 0.4093, "step": 9997 }, { "epoch": 0.71, "grad_norm": 1.6049880358389776, "learning_rate": 2.054848264894786e-06, "loss": 0.506, "step": 9998 }, { "epoch": 0.71, "grad_norm": 1.6503151460510972, "learning_rate": 2.0539196883209185e-06, "loss": 0.5495, "step": 9999 }, { "epoch": 0.71, "grad_norm": 0.755921830268919, "learning_rate": 2.0529912673689958e-06, "loss": 0.4594, "step": 10000 }, { "epoch": 0.71, "grad_norm": 0.7185551331740226, "learning_rate": 2.0520630020880593e-06, "loss": 0.4763, "step": 10001 }, { "epoch": 0.71, "grad_norm": 2.332056365899894, "learning_rate": 2.0511348925271447e-06, "loss": 0.4357, "step": 10002 }, { "epoch": 0.71, "grad_norm": 1.9177014971361952, "learning_rate": 2.050206938735279e-06, "loss": 0.556, "step": 10003 }, { "epoch": 0.71, "grad_norm": 1.6691278121121338, "learning_rate": 2.0492791407614764e-06, "loss": 0.5201, "step": 10004 }, { "epoch": 0.71, "grad_norm": 1.764658783727552, "learning_rate": 2.0483514986547478e-06, "loss": 0.5243, "step": 10005 }, { "epoch": 0.71, "grad_norm": 1.823389461878058, "learning_rate": 2.0474240124640944e-06, "loss": 0.5354, "step": 10006 }, { "epoch": 0.71, "grad_norm": 1.4845856701293219, "learning_rate": 2.046496682238511e-06, "loss": 0.551, "step": 10007 }, { "epoch": 0.71, "grad_norm": 1.691274862136385, "learning_rate": 2.0455695080269796e-06, "loss": 0.5133, "step": 10008 }, { "epoch": 0.71, "grad_norm": 0.7206217222903164, "learning_rate": 2.044642489878477e-06, "loss": 0.44, "step": 10009 }, { "epoch": 0.71, "grad_norm": 1.890543042895478, "learning_rate": 2.043715627841973e-06, "loss": 0.5277, "step": 10010 }, { "epoch": 0.71, "grad_norm": 0.7391971754713759, "learning_rate": 2.042788921966426e-06, "loss": 0.4477, "step": 10011 }, { "epoch": 0.71, "grad_norm": 1.4667832561720997, "learning_rate": 2.041862372300791e-06, "loss": 0.4574, "step": 10012 }, { "epoch": 0.71, "grad_norm": 1.720710947376018, "learning_rate": 2.040935978894005e-06, "loss": 0.5395, "step": 10013 }, { "epoch": 0.71, "grad_norm": 1.697987115620822, "learning_rate": 2.040009741795011e-06, "loss": 0.5375, "step": 10014 }, { "epoch": 0.71, "grad_norm": 1.6094444439066218, "learning_rate": 2.0390836610527304e-06, "loss": 0.5655, "step": 10015 }, { "epoch": 0.71, "grad_norm": 1.6892549418071816, "learning_rate": 2.0381577367160836e-06, "loss": 0.5061, "step": 10016 }, { "epoch": 0.71, "grad_norm": 1.6895879876227233, "learning_rate": 2.037231968833982e-06, "loss": 0.5359, "step": 10017 }, { "epoch": 0.71, "grad_norm": 2.5068357294279298, "learning_rate": 2.036306357455326e-06, "loss": 0.5589, "step": 10018 }, { "epoch": 0.71, "grad_norm": 2.030089620057589, "learning_rate": 2.0353809026290134e-06, "loss": 0.5161, "step": 10019 }, { "epoch": 0.71, "grad_norm": 1.9121564902961647, "learning_rate": 2.0344556044039225e-06, "loss": 0.4851, "step": 10020 }, { "epoch": 0.71, "grad_norm": 1.7450171086894917, "learning_rate": 2.0335304628289393e-06, "loss": 0.5286, "step": 10021 }, { "epoch": 0.71, "grad_norm": 1.8591463052626689, "learning_rate": 2.032605477952927e-06, "loss": 0.5616, "step": 10022 }, { "epoch": 0.71, "grad_norm": 1.733764481955262, "learning_rate": 2.0316806498247483e-06, "loss": 0.4767, "step": 10023 }, { "epoch": 0.71, "grad_norm": 0.7409600811666619, "learning_rate": 2.0307559784932555e-06, "loss": 0.4413, "step": 10024 }, { "epoch": 0.71, "grad_norm": 1.6508008317646463, "learning_rate": 2.029831464007293e-06, "loss": 0.4937, "step": 10025 }, { "epoch": 0.71, "grad_norm": 1.6260427651307787, "learning_rate": 2.028907106415699e-06, "loss": 0.5697, "step": 10026 }, { "epoch": 0.71, "grad_norm": 1.5445609987650661, "learning_rate": 2.027982905767297e-06, "loss": 0.4651, "step": 10027 }, { "epoch": 0.71, "grad_norm": 1.5468760798195007, "learning_rate": 2.027058862110909e-06, "loss": 0.5172, "step": 10028 }, { "epoch": 0.71, "grad_norm": 1.7196611156189003, "learning_rate": 2.026134975495345e-06, "loss": 0.499, "step": 10029 }, { "epoch": 0.71, "grad_norm": 1.6681550892510362, "learning_rate": 2.0252112459694086e-06, "loss": 0.4596, "step": 10030 }, { "epoch": 0.71, "grad_norm": 1.7160117676270987, "learning_rate": 2.0242876735818946e-06, "loss": 0.5368, "step": 10031 }, { "epoch": 0.71, "grad_norm": 1.9305106955863673, "learning_rate": 2.023364258381588e-06, "loss": 0.4537, "step": 10032 }, { "epoch": 0.71, "grad_norm": 1.658694782263401, "learning_rate": 2.0224410004172696e-06, "loss": 0.5356, "step": 10033 }, { "epoch": 0.71, "grad_norm": 0.7106424205017708, "learning_rate": 2.0215178997377054e-06, "loss": 0.4337, "step": 10034 }, { "epoch": 0.71, "grad_norm": 1.707212101921754, "learning_rate": 2.0205949563916575e-06, "loss": 0.5301, "step": 10035 }, { "epoch": 0.71, "grad_norm": 1.4546355569917444, "learning_rate": 2.0196721704278804e-06, "loss": 0.4561, "step": 10036 }, { "epoch": 0.71, "grad_norm": 1.56270525769816, "learning_rate": 2.0187495418951197e-06, "loss": 0.4868, "step": 10037 }, { "epoch": 0.71, "grad_norm": 1.7915481983474573, "learning_rate": 2.0178270708421076e-06, "loss": 0.5415, "step": 10038 }, { "epoch": 0.71, "grad_norm": 2.2579044375118738, "learning_rate": 2.0169047573175733e-06, "loss": 0.5625, "step": 10039 }, { "epoch": 0.71, "grad_norm": 0.6889215472110888, "learning_rate": 2.0159826013702416e-06, "loss": 0.4206, "step": 10040 }, { "epoch": 0.71, "grad_norm": 2.513439337019751, "learning_rate": 2.015060603048818e-06, "loss": 0.5577, "step": 10041 }, { "epoch": 0.71, "grad_norm": 1.7193556687057865, "learning_rate": 2.0141387624020096e-06, "loss": 0.4562, "step": 10042 }, { "epoch": 0.71, "grad_norm": 3.0039910407030352, "learning_rate": 2.0132170794785057e-06, "loss": 0.5261, "step": 10043 }, { "epoch": 0.71, "grad_norm": 2.0860928941872263, "learning_rate": 2.0122955543269996e-06, "loss": 0.5415, "step": 10044 }, { "epoch": 0.71, "grad_norm": 2.519010682111012, "learning_rate": 2.0113741869961646e-06, "loss": 0.5504, "step": 10045 }, { "epoch": 0.71, "grad_norm": 1.7348591207037212, "learning_rate": 2.0104529775346714e-06, "loss": 0.5456, "step": 10046 }, { "epoch": 0.71, "grad_norm": 0.680252512283667, "learning_rate": 2.0095319259911824e-06, "loss": 0.4059, "step": 10047 }, { "epoch": 0.71, "grad_norm": 2.1906381331267064, "learning_rate": 2.00861103241435e-06, "loss": 0.5123, "step": 10048 }, { "epoch": 0.71, "grad_norm": 1.6990731157674086, "learning_rate": 2.007690296852821e-06, "loss": 0.5451, "step": 10049 }, { "epoch": 0.71, "grad_norm": 2.329274581098137, "learning_rate": 2.006769719355226e-06, "loss": 0.5085, "step": 10050 }, { "epoch": 0.71, "grad_norm": 2.145496350510256, "learning_rate": 2.0058492999702006e-06, "loss": 0.5865, "step": 10051 }, { "epoch": 0.71, "grad_norm": 9.610698936107974, "learning_rate": 2.004929038746359e-06, "loss": 0.5431, "step": 10052 }, { "epoch": 0.71, "grad_norm": 1.643859131999563, "learning_rate": 2.004008935732314e-06, "loss": 0.5535, "step": 10053 }, { "epoch": 0.71, "grad_norm": 1.7835010202990726, "learning_rate": 2.0030889909766692e-06, "loss": 0.4887, "step": 10054 }, { "epoch": 0.71, "grad_norm": 1.4438750642282292, "learning_rate": 2.0021692045280185e-06, "loss": 0.4685, "step": 10055 }, { "epoch": 0.71, "grad_norm": 0.6856412906292524, "learning_rate": 2.0012495764349504e-06, "loss": 0.4246, "step": 10056 }, { "epoch": 0.71, "grad_norm": 1.708480894805625, "learning_rate": 2.000330106746039e-06, "loss": 0.5282, "step": 10057 }, { "epoch": 0.71, "grad_norm": 1.597876649783731, "learning_rate": 1.9994107955098556e-06, "loss": 0.489, "step": 10058 }, { "epoch": 0.71, "grad_norm": 1.637834250722319, "learning_rate": 1.9984916427749614e-06, "loss": 0.5034, "step": 10059 }, { "epoch": 0.71, "grad_norm": 0.7198341267726184, "learning_rate": 1.997572648589909e-06, "loss": 0.4348, "step": 10060 }, { "epoch": 0.71, "grad_norm": 1.6196059487939933, "learning_rate": 1.9966538130032432e-06, "loss": 0.5687, "step": 10061 }, { "epoch": 0.71, "grad_norm": 1.8433459073082177, "learning_rate": 1.9957351360634997e-06, "loss": 0.4837, "step": 10062 }, { "epoch": 0.71, "grad_norm": 1.5571865193298435, "learning_rate": 1.9948166178192075e-06, "loss": 0.4866, "step": 10063 }, { "epoch": 0.71, "grad_norm": 1.6026021881759482, "learning_rate": 1.9938982583188832e-06, "loss": 0.4646, "step": 10064 }, { "epoch": 0.71, "grad_norm": 1.8695350842122758, "learning_rate": 1.9929800576110388e-06, "loss": 0.5297, "step": 10065 }, { "epoch": 0.71, "grad_norm": 1.8618913718011791, "learning_rate": 1.992062015744177e-06, "loss": 0.5668, "step": 10066 }, { "epoch": 0.71, "grad_norm": 0.662078744700633, "learning_rate": 1.9911441327667913e-06, "loss": 0.3905, "step": 10067 }, { "epoch": 0.71, "grad_norm": 1.5209710953154347, "learning_rate": 1.9902264087273706e-06, "loss": 0.517, "step": 10068 }, { "epoch": 0.71, "grad_norm": 1.579016550775007, "learning_rate": 1.9893088436743853e-06, "loss": 0.5323, "step": 10069 }, { "epoch": 0.71, "grad_norm": 2.396054010838562, "learning_rate": 1.9883914376563117e-06, "loss": 0.5014, "step": 10070 }, { "epoch": 0.71, "grad_norm": 1.6429848932711697, "learning_rate": 1.9874741907216062e-06, "loss": 0.4736, "step": 10071 }, { "epoch": 0.71, "grad_norm": 1.6153335841768872, "learning_rate": 1.986557102918723e-06, "loss": 0.5303, "step": 10072 }, { "epoch": 0.71, "grad_norm": 1.5355684566033818, "learning_rate": 1.985640174296101e-06, "loss": 0.4801, "step": 10073 }, { "epoch": 0.71, "grad_norm": 1.6312232181855317, "learning_rate": 1.984723404902183e-06, "loss": 0.5701, "step": 10074 }, { "epoch": 0.71, "grad_norm": 1.8490795624029488, "learning_rate": 1.9838067947853895e-06, "loss": 0.4548, "step": 10075 }, { "epoch": 0.72, "grad_norm": 1.731555757474522, "learning_rate": 1.982890343994142e-06, "loss": 0.4979, "step": 10076 }, { "epoch": 0.72, "grad_norm": 1.4902044317430612, "learning_rate": 1.9819740525768495e-06, "loss": 0.5137, "step": 10077 }, { "epoch": 0.72, "grad_norm": 2.2470842006939598, "learning_rate": 1.981057920581914e-06, "loss": 0.5374, "step": 10078 }, { "epoch": 0.72, "grad_norm": 1.6561644236104762, "learning_rate": 1.9801419480577312e-06, "loss": 0.5384, "step": 10079 }, { "epoch": 0.72, "grad_norm": 2.116363150450398, "learning_rate": 1.9792261350526788e-06, "loss": 0.5394, "step": 10080 }, { "epoch": 0.72, "grad_norm": 1.6473487288245503, "learning_rate": 1.9783104816151416e-06, "loss": 0.5394, "step": 10081 }, { "epoch": 0.72, "grad_norm": 1.6328414660031356, "learning_rate": 1.9773949877934823e-06, "loss": 0.5535, "step": 10082 }, { "epoch": 0.72, "grad_norm": 1.513138516992385, "learning_rate": 1.9764796536360614e-06, "loss": 0.5734, "step": 10083 }, { "epoch": 0.72, "grad_norm": 0.6887886949931826, "learning_rate": 1.9755644791912306e-06, "loss": 0.4091, "step": 10084 }, { "epoch": 0.72, "grad_norm": 1.7335834319728325, "learning_rate": 1.9746494645073316e-06, "loss": 0.5637, "step": 10085 }, { "epoch": 0.72, "grad_norm": 1.74170071733523, "learning_rate": 1.973734609632701e-06, "loss": 0.5192, "step": 10086 }, { "epoch": 0.72, "grad_norm": 2.4174786624232327, "learning_rate": 1.9728199146156613e-06, "loss": 0.4703, "step": 10087 }, { "epoch": 0.72, "grad_norm": 1.8114084093920053, "learning_rate": 1.971905379504531e-06, "loss": 0.5336, "step": 10088 }, { "epoch": 0.72, "grad_norm": 1.671948551175786, "learning_rate": 1.970991004347619e-06, "loss": 0.452, "step": 10089 }, { "epoch": 0.72, "grad_norm": 1.6322878195638608, "learning_rate": 1.9700767891932264e-06, "loss": 0.4871, "step": 10090 }, { "epoch": 0.72, "grad_norm": 2.1992140648607155, "learning_rate": 1.9691627340896456e-06, "loss": 0.4972, "step": 10091 }, { "epoch": 0.72, "grad_norm": 1.6719171978075562, "learning_rate": 1.9682488390851563e-06, "loss": 0.4982, "step": 10092 }, { "epoch": 0.72, "grad_norm": 1.91694735454238, "learning_rate": 1.967335104228039e-06, "loss": 0.5145, "step": 10093 }, { "epoch": 0.72, "grad_norm": 1.5409815435994731, "learning_rate": 1.9664215295665566e-06, "loss": 0.5419, "step": 10094 }, { "epoch": 0.72, "grad_norm": 1.6420082185492388, "learning_rate": 1.965508115148968e-06, "loss": 0.5938, "step": 10095 }, { "epoch": 0.72, "grad_norm": 1.7435130421164953, "learning_rate": 1.9645948610235226e-06, "loss": 0.4849, "step": 10096 }, { "epoch": 0.72, "grad_norm": 1.7898826957947112, "learning_rate": 1.963681767238462e-06, "loss": 0.4675, "step": 10097 }, { "epoch": 0.72, "grad_norm": 4.326302976313969, "learning_rate": 1.9627688338420215e-06, "loss": 0.5551, "step": 10098 }, { "epoch": 0.72, "grad_norm": 1.7631136985024314, "learning_rate": 1.9618560608824182e-06, "loss": 0.5291, "step": 10099 }, { "epoch": 0.72, "grad_norm": 0.7686811039016653, "learning_rate": 1.9609434484078766e-06, "loss": 0.423, "step": 10100 }, { "epoch": 0.72, "grad_norm": 2.1277402714003295, "learning_rate": 1.9600309964665975e-06, "loss": 0.5928, "step": 10101 }, { "epoch": 0.72, "grad_norm": 1.8327210176143773, "learning_rate": 1.9591187051067837e-06, "loss": 0.5669, "step": 10102 }, { "epoch": 0.72, "grad_norm": 1.6169882192816916, "learning_rate": 1.958206574376621e-06, "loss": 0.5169, "step": 10103 }, { "epoch": 0.72, "grad_norm": 1.5658756958339162, "learning_rate": 1.9572946043242952e-06, "loss": 0.4979, "step": 10104 }, { "epoch": 0.72, "grad_norm": 0.7378599888194799, "learning_rate": 1.9563827949979806e-06, "loss": 0.4229, "step": 10105 }, { "epoch": 0.72, "grad_norm": 1.8160774046571133, "learning_rate": 1.9554711464458382e-06, "loss": 0.486, "step": 10106 }, { "epoch": 0.72, "grad_norm": 0.7182979131187212, "learning_rate": 1.954559658716026e-06, "loss": 0.4202, "step": 10107 }, { "epoch": 0.72, "grad_norm": 1.9079709123471837, "learning_rate": 1.953648331856692e-06, "loss": 0.5314, "step": 10108 }, { "epoch": 0.72, "grad_norm": 1.7943206597427712, "learning_rate": 1.952737165915977e-06, "loss": 0.5522, "step": 10109 }, { "epoch": 0.72, "grad_norm": 1.662380167528331, "learning_rate": 1.9518261609420075e-06, "loss": 0.5353, "step": 10110 }, { "epoch": 0.72, "grad_norm": 1.7030795007923176, "learning_rate": 1.950915316982912e-06, "loss": 0.5456, "step": 10111 }, { "epoch": 0.72, "grad_norm": 1.770626312492131, "learning_rate": 1.950004634086799e-06, "loss": 0.5915, "step": 10112 }, { "epoch": 0.72, "grad_norm": 1.771650854103482, "learning_rate": 1.9490941123017766e-06, "loss": 0.5233, "step": 10113 }, { "epoch": 0.72, "grad_norm": 1.958356019497405, "learning_rate": 1.948183751675941e-06, "loss": 0.5672, "step": 10114 }, { "epoch": 0.72, "grad_norm": 1.8554772181892192, "learning_rate": 1.9472735522573805e-06, "loss": 0.5028, "step": 10115 }, { "epoch": 0.72, "grad_norm": 2.216694865231501, "learning_rate": 1.9463635140941766e-06, "loss": 0.5235, "step": 10116 }, { "epoch": 0.72, "grad_norm": 0.7005744679959377, "learning_rate": 1.9454536372343974e-06, "loss": 0.4374, "step": 10117 }, { "epoch": 0.72, "grad_norm": 1.7787420612367, "learning_rate": 1.9445439217261073e-06, "loss": 0.4732, "step": 10118 }, { "epoch": 0.72, "grad_norm": 2.5364020820231348, "learning_rate": 1.943634367617361e-06, "loss": 0.5545, "step": 10119 }, { "epoch": 0.72, "grad_norm": 1.8722586791997047, "learning_rate": 1.9427249749562034e-06, "loss": 0.5495, "step": 10120 }, { "epoch": 0.72, "grad_norm": 1.742561440776197, "learning_rate": 1.9418157437906737e-06, "loss": 0.5315, "step": 10121 }, { "epoch": 0.72, "grad_norm": 1.7082117290891496, "learning_rate": 1.9409066741687952e-06, "loss": 0.5498, "step": 10122 }, { "epoch": 0.72, "grad_norm": 2.0873933150457162, "learning_rate": 1.939997766138596e-06, "loss": 0.5089, "step": 10123 }, { "epoch": 0.72, "grad_norm": 0.751027603485962, "learning_rate": 1.939089019748081e-06, "loss": 0.4586, "step": 10124 }, { "epoch": 0.72, "grad_norm": 1.7596588651751137, "learning_rate": 1.9381804350452568e-06, "loss": 0.51, "step": 10125 }, { "epoch": 0.72, "grad_norm": 1.7916081893083429, "learning_rate": 1.9372720120781157e-06, "loss": 0.5196, "step": 10126 }, { "epoch": 0.72, "grad_norm": 1.9077171813890403, "learning_rate": 1.9363637508946457e-06, "loss": 0.5213, "step": 10127 }, { "epoch": 0.72, "grad_norm": 1.7867421371152294, "learning_rate": 1.9354556515428246e-06, "loss": 0.5583, "step": 10128 }, { "epoch": 0.72, "grad_norm": 1.9534052686920502, "learning_rate": 1.934547714070617e-06, "loss": 0.5441, "step": 10129 }, { "epoch": 0.72, "grad_norm": 1.7497997552070477, "learning_rate": 1.9336399385259895e-06, "loss": 0.5401, "step": 10130 }, { "epoch": 0.72, "grad_norm": 2.1942277021391328, "learning_rate": 1.932732324956889e-06, "loss": 0.4843, "step": 10131 }, { "epoch": 0.72, "grad_norm": 1.5942516172839618, "learning_rate": 1.931824873411261e-06, "loss": 0.5039, "step": 10132 }, { "epoch": 0.72, "grad_norm": 1.5365465662896785, "learning_rate": 1.9309175839370386e-06, "loss": 0.5523, "step": 10133 }, { "epoch": 0.72, "grad_norm": 0.7323923824634078, "learning_rate": 1.9300104565821496e-06, "loss": 0.449, "step": 10134 }, { "epoch": 0.72, "grad_norm": 2.3253882496820055, "learning_rate": 1.9291034913945123e-06, "loss": 0.5176, "step": 10135 }, { "epoch": 0.72, "grad_norm": 1.9088634584458906, "learning_rate": 1.9281966884220328e-06, "loss": 0.5509, "step": 10136 }, { "epoch": 0.72, "grad_norm": 1.7222852678813565, "learning_rate": 1.9272900477126124e-06, "loss": 0.5778, "step": 10137 }, { "epoch": 0.72, "grad_norm": 1.7752371309653752, "learning_rate": 1.9263835693141437e-06, "loss": 0.4685, "step": 10138 }, { "epoch": 0.72, "grad_norm": 1.4778273443597498, "learning_rate": 1.9254772532745115e-06, "loss": 0.503, "step": 10139 }, { "epoch": 0.72, "grad_norm": 2.5236091299627534, "learning_rate": 1.924571099641587e-06, "loss": 0.5249, "step": 10140 }, { "epoch": 0.72, "grad_norm": 1.708836024408149, "learning_rate": 1.923665108463237e-06, "loss": 0.5083, "step": 10141 }, { "epoch": 0.72, "grad_norm": 1.9080344861443752, "learning_rate": 1.922759279787321e-06, "loss": 0.5881, "step": 10142 }, { "epoch": 0.72, "grad_norm": 1.9134671306375153, "learning_rate": 1.9218536136616873e-06, "loss": 0.5182, "step": 10143 }, { "epoch": 0.72, "grad_norm": 2.4752790991383797, "learning_rate": 1.9209481101341755e-06, "loss": 0.5606, "step": 10144 }, { "epoch": 0.72, "grad_norm": 1.4938728223762303, "learning_rate": 1.9200427692526173e-06, "loss": 0.4832, "step": 10145 }, { "epoch": 0.72, "grad_norm": 1.6072015107777136, "learning_rate": 1.9191375910648387e-06, "loss": 0.5295, "step": 10146 }, { "epoch": 0.72, "grad_norm": 1.6036691883646164, "learning_rate": 1.91823257561865e-06, "loss": 0.5412, "step": 10147 }, { "epoch": 0.72, "grad_norm": 1.828480673427384, "learning_rate": 1.917327722961859e-06, "loss": 0.4797, "step": 10148 }, { "epoch": 0.72, "grad_norm": 0.6518286401008918, "learning_rate": 1.9164230331422634e-06, "loss": 0.3946, "step": 10149 }, { "epoch": 0.72, "grad_norm": 1.7657883900451388, "learning_rate": 1.9155185062076515e-06, "loss": 0.4525, "step": 10150 }, { "epoch": 0.72, "grad_norm": 1.6791301265887801, "learning_rate": 1.914614142205806e-06, "loss": 0.5009, "step": 10151 }, { "epoch": 0.72, "grad_norm": 1.6524123705823408, "learning_rate": 1.913709941184492e-06, "loss": 0.5452, "step": 10152 }, { "epoch": 0.72, "grad_norm": 1.7018087073829087, "learning_rate": 1.9128059031914807e-06, "loss": 0.5461, "step": 10153 }, { "epoch": 0.72, "grad_norm": 1.6637716799976487, "learning_rate": 1.9119020282745204e-06, "loss": 0.5214, "step": 10154 }, { "epoch": 0.72, "grad_norm": 1.6196061284180758, "learning_rate": 1.910998316481359e-06, "loss": 0.5278, "step": 10155 }, { "epoch": 0.72, "grad_norm": 0.7913205202441055, "learning_rate": 1.9100947678597337e-06, "loss": 0.46, "step": 10156 }, { "epoch": 0.72, "grad_norm": 1.637925521715112, "learning_rate": 1.9091913824573725e-06, "loss": 0.5267, "step": 10157 }, { "epoch": 0.72, "grad_norm": 1.959301486403192, "learning_rate": 1.9082881603219973e-06, "loss": 0.558, "step": 10158 }, { "epoch": 0.72, "grad_norm": 2.058616101165977, "learning_rate": 1.9073851015013145e-06, "loss": 0.5742, "step": 10159 }, { "epoch": 0.72, "grad_norm": 1.7680816420441705, "learning_rate": 1.9064822060430328e-06, "loss": 0.4987, "step": 10160 }, { "epoch": 0.72, "grad_norm": 1.9060046144832865, "learning_rate": 1.9055794739948419e-06, "loss": 0.5272, "step": 10161 }, { "epoch": 0.72, "grad_norm": 1.4930931863712387, "learning_rate": 1.9046769054044283e-06, "loss": 0.4159, "step": 10162 }, { "epoch": 0.72, "grad_norm": 0.659868167027125, "learning_rate": 1.903774500319469e-06, "loss": 0.437, "step": 10163 }, { "epoch": 0.72, "grad_norm": 1.9145983095290537, "learning_rate": 1.902872258787632e-06, "loss": 0.5116, "step": 10164 }, { "epoch": 0.72, "grad_norm": 1.878534051335053, "learning_rate": 1.9019701808565788e-06, "loss": 0.5627, "step": 10165 }, { "epoch": 0.72, "grad_norm": 1.6790690727868454, "learning_rate": 1.901068266573956e-06, "loss": 0.4999, "step": 10166 }, { "epoch": 0.72, "grad_norm": 1.4415355729968105, "learning_rate": 1.9001665159874083e-06, "loss": 0.4524, "step": 10167 }, { "epoch": 0.72, "grad_norm": 1.4724404168120528, "learning_rate": 1.8992649291445692e-06, "loss": 0.5071, "step": 10168 }, { "epoch": 0.72, "grad_norm": 1.745829776376896, "learning_rate": 1.8983635060930644e-06, "loss": 0.5316, "step": 10169 }, { "epoch": 0.72, "grad_norm": 1.9979843200522793, "learning_rate": 1.8974622468805076e-06, "loss": 0.4738, "step": 10170 }, { "epoch": 0.72, "grad_norm": 1.525709734670125, "learning_rate": 1.8965611515545056e-06, "loss": 0.5006, "step": 10171 }, { "epoch": 0.72, "grad_norm": 2.179159897753943, "learning_rate": 1.8956602201626634e-06, "loss": 0.5604, "step": 10172 }, { "epoch": 0.72, "grad_norm": 1.6703632166160867, "learning_rate": 1.8947594527525654e-06, "loss": 0.4268, "step": 10173 }, { "epoch": 0.72, "grad_norm": 1.772341724813742, "learning_rate": 1.8938588493717953e-06, "loss": 0.5079, "step": 10174 }, { "epoch": 0.72, "grad_norm": 1.7660365394608772, "learning_rate": 1.8929584100679255e-06, "loss": 0.5613, "step": 10175 }, { "epoch": 0.72, "grad_norm": 1.5183539052199289, "learning_rate": 1.8920581348885226e-06, "loss": 0.5226, "step": 10176 }, { "epoch": 0.72, "grad_norm": 1.6906848002335533, "learning_rate": 1.8911580238811378e-06, "loss": 0.4669, "step": 10177 }, { "epoch": 0.72, "grad_norm": 2.0860454977897884, "learning_rate": 1.890258077093321e-06, "loss": 0.496, "step": 10178 }, { "epoch": 0.72, "grad_norm": 3.753939221084808, "learning_rate": 1.8893582945726097e-06, "loss": 0.4884, "step": 10179 }, { "epoch": 0.72, "grad_norm": 1.5966760956055974, "learning_rate": 1.8884586763665336e-06, "loss": 0.4948, "step": 10180 }, { "epoch": 0.72, "grad_norm": 1.8830847208798305, "learning_rate": 1.8875592225226153e-06, "loss": 0.5438, "step": 10181 }, { "epoch": 0.72, "grad_norm": 1.7636523561812776, "learning_rate": 1.8866599330883617e-06, "loss": 0.4776, "step": 10182 }, { "epoch": 0.72, "grad_norm": 0.6923793345269317, "learning_rate": 1.8857608081112833e-06, "loss": 0.4424, "step": 10183 }, { "epoch": 0.72, "grad_norm": 2.7571738390679097, "learning_rate": 1.8848618476388697e-06, "loss": 0.5278, "step": 10184 }, { "epoch": 0.72, "grad_norm": 1.7600253174851084, "learning_rate": 1.8839630517186086e-06, "loss": 0.4623, "step": 10185 }, { "epoch": 0.72, "grad_norm": 1.5173448138694987, "learning_rate": 1.8830644203979781e-06, "loss": 0.4749, "step": 10186 }, { "epoch": 0.72, "grad_norm": 1.6719521399746966, "learning_rate": 1.882165953724447e-06, "loss": 0.4986, "step": 10187 }, { "epoch": 0.72, "grad_norm": 1.6321891085482587, "learning_rate": 1.8812676517454758e-06, "loss": 0.5266, "step": 10188 }, { "epoch": 0.72, "grad_norm": 1.5852823077815796, "learning_rate": 1.8803695145085116e-06, "loss": 0.5022, "step": 10189 }, { "epoch": 0.72, "grad_norm": 0.7039969038602375, "learning_rate": 1.8794715420610038e-06, "loss": 0.4409, "step": 10190 }, { "epoch": 0.72, "grad_norm": 1.8735150954911302, "learning_rate": 1.8785737344503817e-06, "loss": 0.5337, "step": 10191 }, { "epoch": 0.72, "grad_norm": 2.1824007723051673, "learning_rate": 1.8776760917240715e-06, "loss": 0.5303, "step": 10192 }, { "epoch": 0.72, "grad_norm": 1.7375535966139435, "learning_rate": 1.8767786139294903e-06, "loss": 0.5049, "step": 10193 }, { "epoch": 0.72, "grad_norm": 1.928853201034043, "learning_rate": 1.8758813011140447e-06, "loss": 0.4255, "step": 10194 }, { "epoch": 0.72, "grad_norm": 1.7028719099117662, "learning_rate": 1.8749841533251373e-06, "loss": 0.5261, "step": 10195 }, { "epoch": 0.72, "grad_norm": 1.7970616310074443, "learning_rate": 1.8740871706101543e-06, "loss": 0.5076, "step": 10196 }, { "epoch": 0.72, "grad_norm": 1.6478847649353774, "learning_rate": 1.8731903530164786e-06, "loss": 0.4643, "step": 10197 }, { "epoch": 0.72, "grad_norm": 0.7087381307321095, "learning_rate": 1.8722937005914838e-06, "loss": 0.4155, "step": 10198 }, { "epoch": 0.72, "grad_norm": 1.8160531458745552, "learning_rate": 1.8713972133825331e-06, "loss": 0.5576, "step": 10199 }, { "epoch": 0.72, "grad_norm": 1.530259897680395, "learning_rate": 1.8705008914369854e-06, "loss": 0.5198, "step": 10200 }, { "epoch": 0.72, "grad_norm": 0.7535356684844365, "learning_rate": 1.8696047348021807e-06, "loss": 0.438, "step": 10201 }, { "epoch": 0.72, "grad_norm": 1.782971284607118, "learning_rate": 1.8687087435254652e-06, "loss": 0.4662, "step": 10202 }, { "epoch": 0.72, "grad_norm": 1.918259259524247, "learning_rate": 1.8678129176541622e-06, "loss": 0.4645, "step": 10203 }, { "epoch": 0.72, "grad_norm": 2.647501647075869, "learning_rate": 1.8669172572355953e-06, "loss": 0.4723, "step": 10204 }, { "epoch": 0.72, "grad_norm": 1.6544055298911489, "learning_rate": 1.8660217623170723e-06, "loss": 0.5002, "step": 10205 }, { "epoch": 0.72, "grad_norm": 2.5756080001696935, "learning_rate": 1.865126432945903e-06, "loss": 0.6528, "step": 10206 }, { "epoch": 0.72, "grad_norm": 1.8215094474652893, "learning_rate": 1.8642312691693754e-06, "loss": 0.5518, "step": 10207 }, { "epoch": 0.72, "grad_norm": 2.0092817267570338, "learning_rate": 1.8633362710347764e-06, "loss": 0.543, "step": 10208 }, { "epoch": 0.72, "grad_norm": 1.6965626794015187, "learning_rate": 1.8624414385893875e-06, "loss": 0.4888, "step": 10209 }, { "epoch": 0.72, "grad_norm": 1.6911713222135047, "learning_rate": 1.8615467718804713e-06, "loss": 0.4897, "step": 10210 }, { "epoch": 0.72, "grad_norm": 1.7279048872893696, "learning_rate": 1.860652270955291e-06, "loss": 0.477, "step": 10211 }, { "epoch": 0.72, "grad_norm": 1.6957088777858205, "learning_rate": 1.8597579358610917e-06, "loss": 0.4674, "step": 10212 }, { "epoch": 0.72, "grad_norm": 2.0400924923405483, "learning_rate": 1.8588637666451225e-06, "loss": 0.4666, "step": 10213 }, { "epoch": 0.72, "grad_norm": 1.6701781750279638, "learning_rate": 1.8579697633546107e-06, "loss": 0.5524, "step": 10214 }, { "epoch": 0.72, "grad_norm": 1.6782171160790955, "learning_rate": 1.8570759260367831e-06, "loss": 0.4846, "step": 10215 }, { "epoch": 0.72, "grad_norm": 1.641324253578939, "learning_rate": 1.8561822547388547e-06, "loss": 0.5355, "step": 10216 }, { "epoch": 0.73, "grad_norm": 1.6780355917771068, "learning_rate": 1.855288749508032e-06, "loss": 0.4886, "step": 10217 }, { "epoch": 0.73, "grad_norm": 1.9223914458998919, "learning_rate": 1.8543954103915152e-06, "loss": 0.5163, "step": 10218 }, { "epoch": 0.73, "grad_norm": 0.7662077303941629, "learning_rate": 1.8535022374364897e-06, "loss": 0.4522, "step": 10219 }, { "epoch": 0.73, "grad_norm": 1.632915480745482, "learning_rate": 1.8526092306901384e-06, "loss": 0.5324, "step": 10220 }, { "epoch": 0.73, "grad_norm": 1.5904080470879762, "learning_rate": 1.8517163901996316e-06, "loss": 0.5291, "step": 10221 }, { "epoch": 0.73, "grad_norm": 1.505433322125966, "learning_rate": 1.8508237160121333e-06, "loss": 0.5818, "step": 10222 }, { "epoch": 0.73, "grad_norm": 1.9509554935377402, "learning_rate": 1.8499312081747973e-06, "loss": 0.5398, "step": 10223 }, { "epoch": 0.73, "grad_norm": 3.1725972677284298, "learning_rate": 1.8490388667347686e-06, "loss": 0.5469, "step": 10224 }, { "epoch": 0.73, "grad_norm": 2.145164727864108, "learning_rate": 1.8481466917391855e-06, "loss": 0.5475, "step": 10225 }, { "epoch": 0.73, "grad_norm": 1.6688874584695186, "learning_rate": 1.8472546832351723e-06, "loss": 0.5413, "step": 10226 }, { "epoch": 0.73, "grad_norm": 2.1532841920741874, "learning_rate": 1.8463628412698497e-06, "loss": 0.5709, "step": 10227 }, { "epoch": 0.73, "grad_norm": 1.614321563182535, "learning_rate": 1.8454711658903278e-06, "loss": 0.5051, "step": 10228 }, { "epoch": 0.73, "grad_norm": 2.8066266816649494, "learning_rate": 1.844579657143708e-06, "loss": 0.555, "step": 10229 }, { "epoch": 0.73, "grad_norm": 1.711868293931153, "learning_rate": 1.8436883150770845e-06, "loss": 0.5426, "step": 10230 }, { "epoch": 0.73, "grad_norm": 1.7455495395677578, "learning_rate": 1.842797139737536e-06, "loss": 0.5539, "step": 10231 }, { "epoch": 0.73, "grad_norm": 1.7737050188503225, "learning_rate": 1.8419061311721442e-06, "loss": 0.5084, "step": 10232 }, { "epoch": 0.73, "grad_norm": 1.4853996692594096, "learning_rate": 1.8410152894279692e-06, "loss": 0.4844, "step": 10233 }, { "epoch": 0.73, "grad_norm": 1.8765078572698706, "learning_rate": 1.8401246145520729e-06, "loss": 0.5744, "step": 10234 }, { "epoch": 0.73, "grad_norm": 1.673044804276567, "learning_rate": 1.839234106591498e-06, "loss": 0.5118, "step": 10235 }, { "epoch": 0.73, "grad_norm": 1.636426400643627, "learning_rate": 1.8383437655932895e-06, "loss": 0.4893, "step": 10236 }, { "epoch": 0.73, "grad_norm": 1.6853707709969132, "learning_rate": 1.8374535916044784e-06, "loss": 0.5223, "step": 10237 }, { "epoch": 0.73, "grad_norm": 1.7179470221751045, "learning_rate": 1.8365635846720814e-06, "loss": 0.5523, "step": 10238 }, { "epoch": 0.73, "grad_norm": 1.7728143434078685, "learning_rate": 1.8356737448431179e-06, "loss": 0.555, "step": 10239 }, { "epoch": 0.73, "grad_norm": 1.5278036823091092, "learning_rate": 1.8347840721645883e-06, "loss": 0.5272, "step": 10240 }, { "epoch": 0.73, "grad_norm": 2.5251237394598176, "learning_rate": 1.83389456668349e-06, "loss": 0.4629, "step": 10241 }, { "epoch": 0.73, "grad_norm": 1.5318292832259057, "learning_rate": 1.8330052284468065e-06, "loss": 0.5225, "step": 10242 }, { "epoch": 0.73, "grad_norm": 0.7146175948064835, "learning_rate": 1.8321160575015211e-06, "loss": 0.4322, "step": 10243 }, { "epoch": 0.73, "grad_norm": 1.7050205935910363, "learning_rate": 1.831227053894598e-06, "loss": 0.5365, "step": 10244 }, { "epoch": 0.73, "grad_norm": 2.225599415910144, "learning_rate": 1.8303382176729996e-06, "loss": 0.5255, "step": 10245 }, { "epoch": 0.73, "grad_norm": 1.54064879292326, "learning_rate": 1.8294495488836762e-06, "loss": 0.4579, "step": 10246 }, { "epoch": 0.73, "grad_norm": 2.187580777844201, "learning_rate": 1.828561047573571e-06, "loss": 0.4447, "step": 10247 }, { "epoch": 0.73, "grad_norm": 2.0429011825702035, "learning_rate": 1.8276727137896199e-06, "loss": 0.575, "step": 10248 }, { "epoch": 0.73, "grad_norm": 1.6149149611328548, "learning_rate": 1.8267845475787433e-06, "loss": 0.4826, "step": 10249 }, { "epoch": 0.73, "grad_norm": 1.6001103620602708, "learning_rate": 1.8258965489878589e-06, "loss": 0.5286, "step": 10250 }, { "epoch": 0.73, "grad_norm": 1.8266797780773252, "learning_rate": 1.8250087180638743e-06, "loss": 0.4873, "step": 10251 }, { "epoch": 0.73, "grad_norm": 1.8035756083423333, "learning_rate": 1.824121054853688e-06, "loss": 0.5375, "step": 10252 }, { "epoch": 0.73, "grad_norm": 3.6608137565991514, "learning_rate": 1.82323355940419e-06, "loss": 0.5946, "step": 10253 }, { "epoch": 0.73, "grad_norm": 1.6381398334487136, "learning_rate": 1.822346231762257e-06, "loss": 0.5343, "step": 10254 }, { "epoch": 0.73, "grad_norm": 1.6484521308403561, "learning_rate": 1.821459071974766e-06, "loss": 0.4966, "step": 10255 }, { "epoch": 0.73, "grad_norm": 2.0326007990692947, "learning_rate": 1.8205720800885763e-06, "loss": 0.5029, "step": 10256 }, { "epoch": 0.73, "grad_norm": 1.4437674345309566, "learning_rate": 1.819685256150542e-06, "loss": 0.5124, "step": 10257 }, { "epoch": 0.73, "grad_norm": 1.6517247547535743, "learning_rate": 1.8187986002075092e-06, "loss": 0.4898, "step": 10258 }, { "epoch": 0.73, "grad_norm": 0.7204775791678775, "learning_rate": 1.8179121123063137e-06, "loss": 0.4524, "step": 10259 }, { "epoch": 0.73, "grad_norm": 1.8476266816181046, "learning_rate": 1.8170257924937845e-06, "loss": 0.4862, "step": 10260 }, { "epoch": 0.73, "grad_norm": 2.001977459882083, "learning_rate": 1.8161396408167349e-06, "loss": 0.5897, "step": 10261 }, { "epoch": 0.73, "grad_norm": 1.8715315799430345, "learning_rate": 1.8152536573219815e-06, "loss": 0.6164, "step": 10262 }, { "epoch": 0.73, "grad_norm": 2.8713777847710897, "learning_rate": 1.8143678420563194e-06, "loss": 0.5278, "step": 10263 }, { "epoch": 0.73, "grad_norm": 1.5791041144065054, "learning_rate": 1.813482195066542e-06, "loss": 0.5177, "step": 10264 }, { "epoch": 0.73, "grad_norm": 2.4436716648595866, "learning_rate": 1.812596716399433e-06, "loss": 0.5287, "step": 10265 }, { "epoch": 0.73, "grad_norm": 2.1032978296268943, "learning_rate": 1.8117114061017655e-06, "loss": 0.5081, "step": 10266 }, { "epoch": 0.73, "grad_norm": 2.331970753087138, "learning_rate": 1.8108262642203068e-06, "loss": 0.5158, "step": 10267 }, { "epoch": 0.73, "grad_norm": 2.4750113229756296, "learning_rate": 1.8099412908018093e-06, "loss": 0.5869, "step": 10268 }, { "epoch": 0.73, "grad_norm": 4.53333963819753, "learning_rate": 1.8090564858930222e-06, "loss": 0.5431, "step": 10269 }, { "epoch": 0.73, "grad_norm": 1.7181905402845163, "learning_rate": 1.8081718495406842e-06, "loss": 0.5055, "step": 10270 }, { "epoch": 0.73, "grad_norm": 1.5873132535531636, "learning_rate": 1.807287381791526e-06, "loss": 0.4958, "step": 10271 }, { "epoch": 0.73, "grad_norm": 1.514184129184166, "learning_rate": 1.806403082692263e-06, "loss": 0.427, "step": 10272 }, { "epoch": 0.73, "grad_norm": 1.7632200982812913, "learning_rate": 1.8055189522896144e-06, "loss": 0.5692, "step": 10273 }, { "epoch": 0.73, "grad_norm": 0.6859453577184376, "learning_rate": 1.8046349906302774e-06, "loss": 0.4266, "step": 10274 }, { "epoch": 0.73, "grad_norm": 2.5753104256284596, "learning_rate": 1.8037511977609474e-06, "loss": 0.5675, "step": 10275 }, { "epoch": 0.73, "grad_norm": 2.0032313867336136, "learning_rate": 1.8028675737283098e-06, "loss": 0.5419, "step": 10276 }, { "epoch": 0.73, "grad_norm": 0.6744645711328259, "learning_rate": 1.8019841185790398e-06, "loss": 0.4324, "step": 10277 }, { "epoch": 0.73, "grad_norm": 0.6676238218487873, "learning_rate": 1.8011008323598067e-06, "loss": 0.4053, "step": 10278 }, { "epoch": 0.73, "grad_norm": 2.423500139257181, "learning_rate": 1.8002177151172657e-06, "loss": 0.4651, "step": 10279 }, { "epoch": 0.73, "grad_norm": 0.6858536381620776, "learning_rate": 1.799334766898067e-06, "loss": 0.3959, "step": 10280 }, { "epoch": 0.73, "grad_norm": 5.038373956546306, "learning_rate": 1.7984519877488515e-06, "loss": 0.473, "step": 10281 }, { "epoch": 0.73, "grad_norm": 1.687170789860624, "learning_rate": 1.7975693777162506e-06, "loss": 0.4883, "step": 10282 }, { "epoch": 0.73, "grad_norm": 1.5408224808190714, "learning_rate": 1.7966869368468876e-06, "loss": 0.5773, "step": 10283 }, { "epoch": 0.73, "grad_norm": 1.5943652518338445, "learning_rate": 1.7958046651873716e-06, "loss": 0.54, "step": 10284 }, { "epoch": 0.73, "grad_norm": 1.5365052918813296, "learning_rate": 1.7949225627843142e-06, "loss": 0.406, "step": 10285 }, { "epoch": 0.73, "grad_norm": 2.2252852534713883, "learning_rate": 1.7940406296843054e-06, "loss": 0.6191, "step": 10286 }, { "epoch": 0.73, "grad_norm": 2.10556325734974, "learning_rate": 1.7931588659339339e-06, "loss": 0.5534, "step": 10287 }, { "epoch": 0.73, "grad_norm": 1.5842475643812837, "learning_rate": 1.7922772715797775e-06, "loss": 0.4965, "step": 10288 }, { "epoch": 0.73, "grad_norm": 2.3075002023145132, "learning_rate": 1.7913958466684046e-06, "loss": 0.4635, "step": 10289 }, { "epoch": 0.73, "grad_norm": 1.5653405101513966, "learning_rate": 1.7905145912463773e-06, "loss": 0.5327, "step": 10290 }, { "epoch": 0.73, "grad_norm": 1.5237925113232005, "learning_rate": 1.7896335053602409e-06, "loss": 0.4682, "step": 10291 }, { "epoch": 0.73, "grad_norm": 1.8318671220515856, "learning_rate": 1.7887525890565443e-06, "loss": 0.543, "step": 10292 }, { "epoch": 0.73, "grad_norm": 1.9930256077007082, "learning_rate": 1.787871842381816e-06, "loss": 0.5533, "step": 10293 }, { "epoch": 0.73, "grad_norm": 1.7596420698745947, "learning_rate": 1.7869912653825804e-06, "loss": 0.5685, "step": 10294 }, { "epoch": 0.73, "grad_norm": 2.11990274752441, "learning_rate": 1.7861108581053533e-06, "loss": 0.5275, "step": 10295 }, { "epoch": 0.73, "grad_norm": 1.421362756674344, "learning_rate": 1.7852306205966413e-06, "loss": 0.4029, "step": 10296 }, { "epoch": 0.73, "grad_norm": 3.4398351761731654, "learning_rate": 1.7843505529029426e-06, "loss": 0.5464, "step": 10297 }, { "epoch": 0.73, "grad_norm": 1.5991478915852824, "learning_rate": 1.783470655070742e-06, "loss": 0.5091, "step": 10298 }, { "epoch": 0.73, "grad_norm": 1.6640874573350437, "learning_rate": 1.782590927146521e-06, "loss": 0.5076, "step": 10299 }, { "epoch": 0.73, "grad_norm": 0.6896046778372105, "learning_rate": 1.7817113691767485e-06, "loss": 0.4404, "step": 10300 }, { "epoch": 0.73, "grad_norm": 1.9633955958098148, "learning_rate": 1.7808319812078884e-06, "loss": 0.524, "step": 10301 }, { "epoch": 0.73, "grad_norm": 2.1184092690481817, "learning_rate": 1.7799527632863877e-06, "loss": 0.5026, "step": 10302 }, { "epoch": 0.73, "grad_norm": 1.6350990899119886, "learning_rate": 1.7790737154586951e-06, "loss": 0.4898, "step": 10303 }, { "epoch": 0.73, "grad_norm": 1.471295395073119, "learning_rate": 1.7781948377712444e-06, "loss": 0.5702, "step": 10304 }, { "epoch": 0.73, "grad_norm": 1.6354896351665964, "learning_rate": 1.7773161302704578e-06, "loss": 0.5923, "step": 10305 }, { "epoch": 0.73, "grad_norm": 2.0062452698505227, "learning_rate": 1.7764375930027528e-06, "loss": 0.5645, "step": 10306 }, { "epoch": 0.73, "grad_norm": 1.5824567447183244, "learning_rate": 1.775559226014537e-06, "loss": 0.5498, "step": 10307 }, { "epoch": 0.73, "grad_norm": 1.8110817153523036, "learning_rate": 1.774681029352211e-06, "loss": 0.4755, "step": 10308 }, { "epoch": 0.73, "grad_norm": 0.6593025218220048, "learning_rate": 1.77380300306216e-06, "loss": 0.4126, "step": 10309 }, { "epoch": 0.73, "grad_norm": 2.0152794191454837, "learning_rate": 1.772925147190766e-06, "loss": 0.5973, "step": 10310 }, { "epoch": 0.73, "grad_norm": 2.597243670649812, "learning_rate": 1.7720474617844008e-06, "loss": 0.4862, "step": 10311 }, { "epoch": 0.73, "grad_norm": 2.5748548100654904, "learning_rate": 1.7711699468894262e-06, "loss": 0.5564, "step": 10312 }, { "epoch": 0.73, "grad_norm": 1.8729983966265478, "learning_rate": 1.7702926025521972e-06, "loss": 0.5151, "step": 10313 }, { "epoch": 0.73, "grad_norm": 1.6947000122007196, "learning_rate": 1.769415428819054e-06, "loss": 0.4847, "step": 10314 }, { "epoch": 0.73, "grad_norm": 1.6161940049854904, "learning_rate": 1.7685384257363374e-06, "loss": 0.4457, "step": 10315 }, { "epoch": 0.73, "grad_norm": 1.5841886546252433, "learning_rate": 1.7676615933503694e-06, "loss": 0.5959, "step": 10316 }, { "epoch": 0.73, "grad_norm": 1.6243431532036179, "learning_rate": 1.7667849317074686e-06, "loss": 0.488, "step": 10317 }, { "epoch": 0.73, "grad_norm": 1.7400271755768821, "learning_rate": 1.7659084408539435e-06, "loss": 0.4951, "step": 10318 }, { "epoch": 0.73, "grad_norm": 1.6498859482199957, "learning_rate": 1.7650321208360932e-06, "loss": 0.5478, "step": 10319 }, { "epoch": 0.73, "grad_norm": 1.5066752768022127, "learning_rate": 1.7641559717002094e-06, "loss": 0.5101, "step": 10320 }, { "epoch": 0.73, "grad_norm": 2.397551992860907, "learning_rate": 1.7632799934925682e-06, "loss": 0.498, "step": 10321 }, { "epoch": 0.73, "grad_norm": 2.065950663636038, "learning_rate": 1.7624041862594487e-06, "loss": 0.5418, "step": 10322 }, { "epoch": 0.73, "grad_norm": 1.5808678758814272, "learning_rate": 1.761528550047109e-06, "loss": 0.5192, "step": 10323 }, { "epoch": 0.73, "grad_norm": 1.641899930610851, "learning_rate": 1.7606530849018044e-06, "loss": 0.5385, "step": 10324 }, { "epoch": 0.73, "grad_norm": 1.821812471995573, "learning_rate": 1.7597777908697804e-06, "loss": 0.5493, "step": 10325 }, { "epoch": 0.73, "grad_norm": 1.7093218858287975, "learning_rate": 1.7589026679972731e-06, "loss": 0.4929, "step": 10326 }, { "epoch": 0.73, "grad_norm": 1.476475243649168, "learning_rate": 1.7580277163305109e-06, "loss": 0.4392, "step": 10327 }, { "epoch": 0.73, "grad_norm": 1.5308765696377187, "learning_rate": 1.757152935915708e-06, "loss": 0.4851, "step": 10328 }, { "epoch": 0.73, "grad_norm": 1.5575976191337766, "learning_rate": 1.7562783267990758e-06, "loss": 0.5679, "step": 10329 }, { "epoch": 0.73, "grad_norm": 1.7047632152777175, "learning_rate": 1.7554038890268132e-06, "loss": 0.5259, "step": 10330 }, { "epoch": 0.73, "grad_norm": 1.6062119872948746, "learning_rate": 1.7545296226451115e-06, "loss": 0.5229, "step": 10331 }, { "epoch": 0.73, "grad_norm": 0.687552023179318, "learning_rate": 1.7536555277001538e-06, "loss": 0.4, "step": 10332 }, { "epoch": 0.73, "grad_norm": 1.5527546140474207, "learning_rate": 1.752781604238108e-06, "loss": 0.4962, "step": 10333 }, { "epoch": 0.73, "grad_norm": 1.878967841047379, "learning_rate": 1.751907852305144e-06, "loss": 0.5168, "step": 10334 }, { "epoch": 0.73, "grad_norm": 1.6729464416993773, "learning_rate": 1.7510342719474122e-06, "loss": 0.5289, "step": 10335 }, { "epoch": 0.73, "grad_norm": 1.6134296145143128, "learning_rate": 1.750160863211059e-06, "loss": 0.4738, "step": 10336 }, { "epoch": 0.73, "grad_norm": 1.533127672403869, "learning_rate": 1.749287626142221e-06, "loss": 0.5538, "step": 10337 }, { "epoch": 0.73, "grad_norm": 1.6399703884410466, "learning_rate": 1.7484145607870267e-06, "loss": 0.4824, "step": 10338 }, { "epoch": 0.73, "grad_norm": 1.5784383598902836, "learning_rate": 1.7475416671915917e-06, "loss": 0.4932, "step": 10339 }, { "epoch": 0.73, "grad_norm": 1.4921738258238617, "learning_rate": 1.7466689454020252e-06, "loss": 0.5707, "step": 10340 }, { "epoch": 0.73, "grad_norm": 1.7844735175746989, "learning_rate": 1.7457963954644324e-06, "loss": 0.5097, "step": 10341 }, { "epoch": 0.73, "grad_norm": 1.9697191460906485, "learning_rate": 1.7449240174248988e-06, "loss": 0.4883, "step": 10342 }, { "epoch": 0.73, "grad_norm": 1.9585912796546394, "learning_rate": 1.7440518113295095e-06, "loss": 0.518, "step": 10343 }, { "epoch": 0.73, "grad_norm": 2.1492245220407944, "learning_rate": 1.7431797772243336e-06, "loss": 0.5547, "step": 10344 }, { "epoch": 0.73, "grad_norm": 1.8535980764213418, "learning_rate": 1.7423079151554401e-06, "loss": 0.5817, "step": 10345 }, { "epoch": 0.73, "grad_norm": 1.5944258898918746, "learning_rate": 1.74143622516888e-06, "loss": 0.5733, "step": 10346 }, { "epoch": 0.73, "grad_norm": 1.6640121644785961, "learning_rate": 1.7405647073106996e-06, "loss": 0.5212, "step": 10347 }, { "epoch": 0.73, "grad_norm": 1.8652546485379602, "learning_rate": 1.7396933616269363e-06, "loss": 0.4983, "step": 10348 }, { "epoch": 0.73, "grad_norm": 1.6540322731858663, "learning_rate": 1.7388221881636163e-06, "loss": 0.5958, "step": 10349 }, { "epoch": 0.73, "grad_norm": 1.6088697712529059, "learning_rate": 1.7379511869667616e-06, "loss": 0.5079, "step": 10350 }, { "epoch": 0.73, "grad_norm": 0.6548238733295465, "learning_rate": 1.7370803580823741e-06, "loss": 0.4236, "step": 10351 }, { "epoch": 0.73, "grad_norm": 1.9759411167808332, "learning_rate": 1.7362097015564622e-06, "loss": 0.4925, "step": 10352 }, { "epoch": 0.73, "grad_norm": 3.3134523705439682, "learning_rate": 1.735339217435011e-06, "loss": 0.5737, "step": 10353 }, { "epoch": 0.73, "grad_norm": 0.7263337916184869, "learning_rate": 1.7344689057640047e-06, "loss": 0.4055, "step": 10354 }, { "epoch": 0.73, "grad_norm": 1.6632690589477614, "learning_rate": 1.7335987665894161e-06, "loss": 0.4731, "step": 10355 }, { "epoch": 0.73, "grad_norm": 1.8483688818726582, "learning_rate": 1.732728799957209e-06, "loss": 0.5321, "step": 10356 }, { "epoch": 0.73, "grad_norm": 1.873937351044728, "learning_rate": 1.7318590059133388e-06, "loss": 0.4842, "step": 10357 }, { "epoch": 0.74, "grad_norm": 1.7600011802961772, "learning_rate": 1.7309893845037483e-06, "loss": 0.5457, "step": 10358 }, { "epoch": 0.74, "grad_norm": 2.173819979530444, "learning_rate": 1.730119935774376e-06, "loss": 0.6189, "step": 10359 }, { "epoch": 0.74, "grad_norm": 1.6560097824555855, "learning_rate": 1.7292506597711479e-06, "loss": 0.4676, "step": 10360 }, { "epoch": 0.74, "grad_norm": 1.4438926081279408, "learning_rate": 1.7283815565399831e-06, "loss": 0.4985, "step": 10361 }, { "epoch": 0.74, "grad_norm": 1.634944471107545, "learning_rate": 1.7275126261267916e-06, "loss": 0.4585, "step": 10362 }, { "epoch": 0.74, "grad_norm": 1.672401088751102, "learning_rate": 1.726643868577469e-06, "loss": 0.4938, "step": 10363 }, { "epoch": 0.74, "grad_norm": 1.7312622590562106, "learning_rate": 1.7257752839379116e-06, "loss": 0.5491, "step": 10364 }, { "epoch": 0.74, "grad_norm": 3.573520631994281, "learning_rate": 1.7249068722539974e-06, "loss": 0.5296, "step": 10365 }, { "epoch": 0.74, "grad_norm": 1.6206713183616401, "learning_rate": 1.7240386335715992e-06, "loss": 0.5113, "step": 10366 }, { "epoch": 0.74, "grad_norm": 1.6744626421913837, "learning_rate": 1.7231705679365812e-06, "loss": 0.5189, "step": 10367 }, { "epoch": 0.74, "grad_norm": 1.6804936812649143, "learning_rate": 1.7223026753947968e-06, "loss": 0.5277, "step": 10368 }, { "epoch": 0.74, "grad_norm": 1.6682704988388106, "learning_rate": 1.7214349559920935e-06, "loss": 0.5739, "step": 10369 }, { "epoch": 0.74, "grad_norm": 1.8590285278297647, "learning_rate": 1.7205674097743013e-06, "loss": 0.4646, "step": 10370 }, { "epoch": 0.74, "grad_norm": 1.7654059720660669, "learning_rate": 1.7197000367872546e-06, "loss": 0.4876, "step": 10371 }, { "epoch": 0.74, "grad_norm": 1.803284097360823, "learning_rate": 1.7188328370767655e-06, "loss": 0.57, "step": 10372 }, { "epoch": 0.74, "grad_norm": 0.8036707048224655, "learning_rate": 1.7179658106886454e-06, "loss": 0.4269, "step": 10373 }, { "epoch": 0.74, "grad_norm": 1.8502739283553713, "learning_rate": 1.7170989576686886e-06, "loss": 0.475, "step": 10374 }, { "epoch": 0.74, "grad_norm": 1.7159013696702918, "learning_rate": 1.7162322780626927e-06, "loss": 0.4873, "step": 10375 }, { "epoch": 0.74, "grad_norm": 1.6739567271397748, "learning_rate": 1.7153657719164328e-06, "loss": 0.5367, "step": 10376 }, { "epoch": 0.74, "grad_norm": 1.864675244696054, "learning_rate": 1.714499439275683e-06, "loss": 0.472, "step": 10377 }, { "epoch": 0.74, "grad_norm": 2.6730129367537763, "learning_rate": 1.7136332801862054e-06, "loss": 0.5418, "step": 10378 }, { "epoch": 0.74, "grad_norm": 1.985802702866608, "learning_rate": 1.712767294693754e-06, "loss": 0.574, "step": 10379 }, { "epoch": 0.74, "grad_norm": 1.6182077241835342, "learning_rate": 1.7119014828440738e-06, "loss": 0.4954, "step": 10380 }, { "epoch": 0.74, "grad_norm": 1.742991375490611, "learning_rate": 1.7110358446828979e-06, "loss": 0.5448, "step": 10381 }, { "epoch": 0.74, "grad_norm": 1.7058014552596428, "learning_rate": 1.7101703802559527e-06, "loss": 0.4955, "step": 10382 }, { "epoch": 0.74, "grad_norm": 0.658482061262983, "learning_rate": 1.7093050896089557e-06, "loss": 0.43, "step": 10383 }, { "epoch": 0.74, "grad_norm": 1.6178108292214874, "learning_rate": 1.7084399727876144e-06, "loss": 0.5457, "step": 10384 }, { "epoch": 0.74, "grad_norm": 2.1970929159463317, "learning_rate": 1.707575029837627e-06, "loss": 0.5455, "step": 10385 }, { "epoch": 0.74, "grad_norm": 1.809179981089818, "learning_rate": 1.7067102608046826e-06, "loss": 0.572, "step": 10386 }, { "epoch": 0.74, "grad_norm": 1.7271939999098316, "learning_rate": 1.7058456657344629e-06, "loss": 0.4448, "step": 10387 }, { "epoch": 0.74, "grad_norm": 2.1632508641204566, "learning_rate": 1.704981244672636e-06, "loss": 0.4833, "step": 10388 }, { "epoch": 0.74, "grad_norm": 1.8450837209214086, "learning_rate": 1.7041169976648648e-06, "loss": 0.598, "step": 10389 }, { "epoch": 0.74, "grad_norm": 1.8983380378202808, "learning_rate": 1.7032529247568019e-06, "loss": 0.48, "step": 10390 }, { "epoch": 0.74, "grad_norm": 1.8499653379713006, "learning_rate": 1.70238902599409e-06, "loss": 0.5128, "step": 10391 }, { "epoch": 0.74, "grad_norm": 1.670900701875766, "learning_rate": 1.7015253014223655e-06, "loss": 0.5478, "step": 10392 }, { "epoch": 0.74, "grad_norm": 1.8133398193051051, "learning_rate": 1.7006617510872475e-06, "loss": 0.4835, "step": 10393 }, { "epoch": 0.74, "grad_norm": 1.6032307954882428, "learning_rate": 1.6997983750343594e-06, "loss": 0.5424, "step": 10394 }, { "epoch": 0.74, "grad_norm": 1.6120602642781467, "learning_rate": 1.6989351733093019e-06, "loss": 0.5196, "step": 10395 }, { "epoch": 0.74, "grad_norm": 1.932607485570893, "learning_rate": 1.6980721459576743e-06, "loss": 0.6054, "step": 10396 }, { "epoch": 0.74, "grad_norm": 1.593932553661682, "learning_rate": 1.6972092930250639e-06, "loss": 0.543, "step": 10397 }, { "epoch": 0.74, "grad_norm": 0.8039147055590061, "learning_rate": 1.6963466145570507e-06, "loss": 0.4438, "step": 10398 }, { "epoch": 0.74, "grad_norm": 0.6793703957727107, "learning_rate": 1.6954841105992047e-06, "loss": 0.4191, "step": 10399 }, { "epoch": 0.74, "grad_norm": 2.081196371902629, "learning_rate": 1.6946217811970823e-06, "loss": 0.5318, "step": 10400 }, { "epoch": 0.74, "grad_norm": 2.2658102642237985, "learning_rate": 1.6937596263962402e-06, "loss": 0.4996, "step": 10401 }, { "epoch": 0.74, "grad_norm": 1.4400183125041217, "learning_rate": 1.6928976462422163e-06, "loss": 0.4304, "step": 10402 }, { "epoch": 0.74, "grad_norm": 2.1113126024773203, "learning_rate": 1.692035840780547e-06, "loss": 0.5005, "step": 10403 }, { "epoch": 0.74, "grad_norm": 1.5698146616707072, "learning_rate": 1.6911742100567496e-06, "loss": 0.5054, "step": 10404 }, { "epoch": 0.74, "grad_norm": 1.770830893977512, "learning_rate": 1.6903127541163462e-06, "loss": 0.5966, "step": 10405 }, { "epoch": 0.74, "grad_norm": 1.5257558337666013, "learning_rate": 1.6894514730048356e-06, "loss": 0.5549, "step": 10406 }, { "epoch": 0.74, "grad_norm": 0.7212878409751853, "learning_rate": 1.6885903667677155e-06, "loss": 0.4301, "step": 10407 }, { "epoch": 0.74, "grad_norm": 2.2237780482882656, "learning_rate": 1.6877294354504736e-06, "loss": 0.5865, "step": 10408 }, { "epoch": 0.74, "grad_norm": 2.671086678063606, "learning_rate": 1.686868679098586e-06, "loss": 0.5875, "step": 10409 }, { "epoch": 0.74, "grad_norm": 1.5840092294401948, "learning_rate": 1.6860080977575232e-06, "loss": 0.4682, "step": 10410 }, { "epoch": 0.74, "grad_norm": 1.4905424363420188, "learning_rate": 1.6851476914727406e-06, "loss": 0.4941, "step": 10411 }, { "epoch": 0.74, "grad_norm": 1.584074625439617, "learning_rate": 1.6842874602896886e-06, "loss": 0.5169, "step": 10412 }, { "epoch": 0.74, "grad_norm": 1.7611079199144852, "learning_rate": 1.6834274042538079e-06, "loss": 0.5325, "step": 10413 }, { "epoch": 0.74, "grad_norm": 1.6668419782344575, "learning_rate": 1.6825675234105304e-06, "loss": 0.5171, "step": 10414 }, { "epoch": 0.74, "grad_norm": 1.8164629091306044, "learning_rate": 1.681707817805277e-06, "loss": 0.4815, "step": 10415 }, { "epoch": 0.74, "grad_norm": 1.6564290836698472, "learning_rate": 1.6808482874834608e-06, "loss": 0.5237, "step": 10416 }, { "epoch": 0.74, "grad_norm": 1.6441686709304315, "learning_rate": 1.6799889324904862e-06, "loss": 0.566, "step": 10417 }, { "epoch": 0.74, "grad_norm": 5.024789097625792, "learning_rate": 1.6791297528717444e-06, "loss": 0.5072, "step": 10418 }, { "epoch": 0.74, "grad_norm": 1.6239723318605777, "learning_rate": 1.678270748672622e-06, "loss": 0.552, "step": 10419 }, { "epoch": 0.74, "grad_norm": 1.528074809745828, "learning_rate": 1.6774119199384936e-06, "loss": 0.4728, "step": 10420 }, { "epoch": 0.74, "grad_norm": 2.113207746251324, "learning_rate": 1.6765532667147267e-06, "loss": 0.5054, "step": 10421 }, { "epoch": 0.74, "grad_norm": 2.0096637856811537, "learning_rate": 1.675694789046679e-06, "loss": 0.5712, "step": 10422 }, { "epoch": 0.74, "grad_norm": 1.7768621547958665, "learning_rate": 1.6748364869796934e-06, "loss": 0.5728, "step": 10423 }, { "epoch": 0.74, "grad_norm": 1.778457500294935, "learning_rate": 1.6739783605591153e-06, "loss": 0.5682, "step": 10424 }, { "epoch": 0.74, "grad_norm": 1.6821392099697976, "learning_rate": 1.6731204098302684e-06, "loss": 0.4984, "step": 10425 }, { "epoch": 0.74, "grad_norm": 1.673424745104213, "learning_rate": 1.6722626348384746e-06, "loss": 0.4734, "step": 10426 }, { "epoch": 0.74, "grad_norm": 1.6486004789863549, "learning_rate": 1.6714050356290445e-06, "loss": 0.4768, "step": 10427 }, { "epoch": 0.74, "grad_norm": 2.0346234412877657, "learning_rate": 1.6705476122472786e-06, "loss": 0.5796, "step": 10428 }, { "epoch": 0.74, "grad_norm": 2.034375902730215, "learning_rate": 1.6696903647384722e-06, "loss": 0.4636, "step": 10429 }, { "epoch": 0.74, "grad_norm": 2.077013018006939, "learning_rate": 1.6688332931479012e-06, "loss": 0.4784, "step": 10430 }, { "epoch": 0.74, "grad_norm": 1.9325640624759457, "learning_rate": 1.667976397520847e-06, "loss": 0.4728, "step": 10431 }, { "epoch": 0.74, "grad_norm": 1.6127831067676852, "learning_rate": 1.6671196779025678e-06, "loss": 0.4802, "step": 10432 }, { "epoch": 0.74, "grad_norm": 1.538673351490178, "learning_rate": 1.6662631343383222e-06, "loss": 0.4803, "step": 10433 }, { "epoch": 0.74, "grad_norm": 3.1941630998708828, "learning_rate": 1.6654067668733515e-06, "loss": 0.5381, "step": 10434 }, { "epoch": 0.74, "grad_norm": 4.764459039099109, "learning_rate": 1.6645505755528958e-06, "loss": 0.5677, "step": 10435 }, { "epoch": 0.74, "grad_norm": 1.644119138447471, "learning_rate": 1.6636945604221822e-06, "loss": 0.4869, "step": 10436 }, { "epoch": 0.74, "grad_norm": 1.8814813450933507, "learning_rate": 1.6628387215264252e-06, "loss": 0.5823, "step": 10437 }, { "epoch": 0.74, "grad_norm": 1.9149876593201713, "learning_rate": 1.661983058910835e-06, "loss": 0.5007, "step": 10438 }, { "epoch": 0.74, "grad_norm": 2.4141258959374676, "learning_rate": 1.66112757262061e-06, "loss": 0.4768, "step": 10439 }, { "epoch": 0.74, "grad_norm": 1.7520994297315047, "learning_rate": 1.6602722627009426e-06, "loss": 0.5605, "step": 10440 }, { "epoch": 0.74, "grad_norm": 1.614663997261603, "learning_rate": 1.6594171291970086e-06, "loss": 0.5073, "step": 10441 }, { "epoch": 0.74, "grad_norm": 1.845411779764572, "learning_rate": 1.6585621721539814e-06, "loss": 0.546, "step": 10442 }, { "epoch": 0.74, "grad_norm": 0.8303856505958676, "learning_rate": 1.6577073916170227e-06, "loss": 0.4277, "step": 10443 }, { "epoch": 0.74, "grad_norm": 1.6999103029327187, "learning_rate": 1.6568527876312845e-06, "loss": 0.5893, "step": 10444 }, { "epoch": 0.74, "grad_norm": 2.089428611848198, "learning_rate": 1.6559983602419123e-06, "loss": 0.5572, "step": 10445 }, { "epoch": 0.74, "grad_norm": 1.796832616236273, "learning_rate": 1.655144109494034e-06, "loss": 0.5026, "step": 10446 }, { "epoch": 0.74, "grad_norm": 2.0366481534010057, "learning_rate": 1.6542900354327813e-06, "loss": 0.5486, "step": 10447 }, { "epoch": 0.74, "grad_norm": 0.7069394734044736, "learning_rate": 1.6534361381032643e-06, "loss": 0.428, "step": 10448 }, { "epoch": 0.74, "grad_norm": 1.9225174078373273, "learning_rate": 1.65258241755059e-06, "loss": 0.4959, "step": 10449 }, { "epoch": 0.74, "grad_norm": 2.2663531745010843, "learning_rate": 1.651728873819855e-06, "loss": 0.576, "step": 10450 }, { "epoch": 0.74, "grad_norm": 1.4848480139106952, "learning_rate": 1.650875506956146e-06, "loss": 0.496, "step": 10451 }, { "epoch": 0.74, "grad_norm": 1.6867404433647457, "learning_rate": 1.650022317004543e-06, "loss": 0.5314, "step": 10452 }, { "epoch": 0.74, "grad_norm": 2.010497067852514, "learning_rate": 1.6491693040101098e-06, "loss": 0.5685, "step": 10453 }, { "epoch": 0.74, "grad_norm": 1.731167480927865, "learning_rate": 1.648316468017911e-06, "loss": 0.5521, "step": 10454 }, { "epoch": 0.74, "grad_norm": 1.7001376783878632, "learning_rate": 1.6474638090729916e-06, "loss": 0.4898, "step": 10455 }, { "epoch": 0.74, "grad_norm": 1.7116000092731083, "learning_rate": 1.6466113272203937e-06, "loss": 0.5335, "step": 10456 }, { "epoch": 0.74, "grad_norm": 0.7295693826471109, "learning_rate": 1.6457590225051484e-06, "loss": 0.4403, "step": 10457 }, { "epoch": 0.74, "grad_norm": 1.712322168164006, "learning_rate": 1.6449068949722775e-06, "loss": 0.554, "step": 10458 }, { "epoch": 0.74, "grad_norm": 1.6914116024619779, "learning_rate": 1.6440549446667942e-06, "loss": 0.5551, "step": 10459 }, { "epoch": 0.74, "grad_norm": 1.581935491863476, "learning_rate": 1.6432031716336988e-06, "loss": 0.5244, "step": 10460 }, { "epoch": 0.74, "grad_norm": 1.61089403713094, "learning_rate": 1.6423515759179865e-06, "loss": 0.504, "step": 10461 }, { "epoch": 0.74, "grad_norm": 3.9893262656655604, "learning_rate": 1.6415001575646412e-06, "loss": 0.5341, "step": 10462 }, { "epoch": 0.74, "grad_norm": 1.8189755653690287, "learning_rate": 1.6406489166186373e-06, "loss": 0.5838, "step": 10463 }, { "epoch": 0.74, "grad_norm": 1.8043261206860068, "learning_rate": 1.6397978531249409e-06, "loss": 0.5363, "step": 10464 }, { "epoch": 0.74, "grad_norm": 2.2655373867020736, "learning_rate": 1.638946967128508e-06, "loss": 0.5217, "step": 10465 }, { "epoch": 0.74, "grad_norm": 1.505133999515219, "learning_rate": 1.6380962586742865e-06, "loss": 0.517, "step": 10466 }, { "epoch": 0.74, "grad_norm": 1.4141230442163966, "learning_rate": 1.6372457278072107e-06, "loss": 0.4839, "step": 10467 }, { "epoch": 0.74, "grad_norm": 6.582674375931129, "learning_rate": 1.63639537457221e-06, "loss": 0.5071, "step": 10468 }, { "epoch": 0.74, "grad_norm": 1.8354463835693777, "learning_rate": 1.6355451990142028e-06, "loss": 0.5668, "step": 10469 }, { "epoch": 0.74, "grad_norm": 1.5745620154106472, "learning_rate": 1.6346952011781008e-06, "loss": 0.5474, "step": 10470 }, { "epoch": 0.74, "grad_norm": 1.7788333448717903, "learning_rate": 1.6338453811087996e-06, "loss": 0.5219, "step": 10471 }, { "epoch": 0.74, "grad_norm": 2.4823211679482955, "learning_rate": 1.632995738851192e-06, "loss": 0.5308, "step": 10472 }, { "epoch": 0.74, "grad_norm": 1.7478644535050798, "learning_rate": 1.6321462744501575e-06, "loss": 0.5174, "step": 10473 }, { "epoch": 0.74, "grad_norm": 1.6357623793709244, "learning_rate": 1.631296987950569e-06, "loss": 0.5449, "step": 10474 }, { "epoch": 0.74, "grad_norm": 1.9629081222425508, "learning_rate": 1.63044787939729e-06, "loss": 0.5921, "step": 10475 }, { "epoch": 0.74, "grad_norm": 1.9732968490301797, "learning_rate": 1.6295989488351682e-06, "loss": 0.5026, "step": 10476 }, { "epoch": 0.74, "grad_norm": 1.7829393018748605, "learning_rate": 1.628750196309054e-06, "loss": 0.5283, "step": 10477 }, { "epoch": 0.74, "grad_norm": 0.7256328329260381, "learning_rate": 1.627901621863776e-06, "loss": 0.4463, "step": 10478 }, { "epoch": 0.74, "grad_norm": 1.8012742636433268, "learning_rate": 1.6270532255441608e-06, "loss": 0.5886, "step": 10479 }, { "epoch": 0.74, "grad_norm": 1.5604780476144748, "learning_rate": 1.6262050073950225e-06, "loss": 0.5277, "step": 10480 }, { "epoch": 0.74, "grad_norm": 1.760169445239215, "learning_rate": 1.6253569674611686e-06, "loss": 0.5424, "step": 10481 }, { "epoch": 0.74, "grad_norm": 1.7360568362647566, "learning_rate": 1.6245091057873956e-06, "loss": 0.4367, "step": 10482 }, { "epoch": 0.74, "grad_norm": 1.7388162130083156, "learning_rate": 1.6236614224184866e-06, "loss": 0.5544, "step": 10483 }, { "epoch": 0.74, "grad_norm": 1.4971832304283674, "learning_rate": 1.6228139173992248e-06, "loss": 0.4999, "step": 10484 }, { "epoch": 0.74, "grad_norm": 1.5734170549309998, "learning_rate": 1.6219665907743736e-06, "loss": 0.525, "step": 10485 }, { "epoch": 0.74, "grad_norm": 1.885442262812269, "learning_rate": 1.6211194425886934e-06, "loss": 0.5022, "step": 10486 }, { "epoch": 0.74, "grad_norm": 1.6523328012455403, "learning_rate": 1.6202724728869336e-06, "loss": 0.518, "step": 10487 }, { "epoch": 0.74, "grad_norm": 1.8878318726837369, "learning_rate": 1.6194256817138338e-06, "loss": 0.5685, "step": 10488 }, { "epoch": 0.74, "grad_norm": 1.8269859147306058, "learning_rate": 1.6185790691141263e-06, "loss": 0.4993, "step": 10489 }, { "epoch": 0.74, "grad_norm": 1.7083580867608161, "learning_rate": 1.6177326351325284e-06, "loss": 0.5287, "step": 10490 }, { "epoch": 0.74, "grad_norm": 1.683238402067469, "learning_rate": 1.6168863798137536e-06, "loss": 0.4902, "step": 10491 }, { "epoch": 0.74, "grad_norm": 2.1131166105537824, "learning_rate": 1.6160403032025034e-06, "loss": 0.5479, "step": 10492 }, { "epoch": 0.74, "grad_norm": 1.7882708144846682, "learning_rate": 1.615194405343471e-06, "loss": 0.5669, "step": 10493 }, { "epoch": 0.74, "grad_norm": 1.5872525220638656, "learning_rate": 1.6143486862813407e-06, "loss": 0.4869, "step": 10494 }, { "epoch": 0.74, "grad_norm": 5.346798934728638, "learning_rate": 1.6135031460607819e-06, "loss": 0.5017, "step": 10495 }, { "epoch": 0.74, "grad_norm": 1.6014576561987575, "learning_rate": 1.6126577847264656e-06, "loss": 0.4556, "step": 10496 }, { "epoch": 0.74, "grad_norm": 1.8333216252496483, "learning_rate": 1.6118126023230414e-06, "loss": 0.4925, "step": 10497 }, { "epoch": 0.74, "grad_norm": 1.9077865484524617, "learning_rate": 1.6109675988951555e-06, "loss": 0.5476, "step": 10498 }, { "epoch": 0.75, "grad_norm": 2.390198977760358, "learning_rate": 1.6101227744874453e-06, "loss": 0.5696, "step": 10499 }, { "epoch": 0.75, "grad_norm": 0.7029415668056194, "learning_rate": 1.609278129144538e-06, "loss": 0.4164, "step": 10500 }, { "epoch": 0.75, "grad_norm": 2.109878660666533, "learning_rate": 1.6084336629110475e-06, "loss": 0.5355, "step": 10501 }, { "epoch": 0.75, "grad_norm": 1.7004587511779912, "learning_rate": 1.6075893758315813e-06, "loss": 0.4839, "step": 10502 }, { "epoch": 0.75, "grad_norm": 2.097992435180401, "learning_rate": 1.6067452679507434e-06, "loss": 0.5764, "step": 10503 }, { "epoch": 0.75, "grad_norm": 1.5943383940552824, "learning_rate": 1.6059013393131161e-06, "loss": 0.4627, "step": 10504 }, { "epoch": 0.75, "grad_norm": 1.6665759002047902, "learning_rate": 1.6050575899632826e-06, "loss": 0.4274, "step": 10505 }, { "epoch": 0.75, "grad_norm": 1.6095919574384423, "learning_rate": 1.6042140199458077e-06, "loss": 0.559, "step": 10506 }, { "epoch": 0.75, "grad_norm": 1.6151377466704804, "learning_rate": 1.603370629305258e-06, "loss": 0.4201, "step": 10507 }, { "epoch": 0.75, "grad_norm": 0.6876774323582775, "learning_rate": 1.6025274180861793e-06, "loss": 0.4234, "step": 10508 }, { "epoch": 0.75, "grad_norm": 1.7057814339569395, "learning_rate": 1.6016843863331145e-06, "loss": 0.5365, "step": 10509 }, { "epoch": 0.75, "grad_norm": 1.7113847373611897, "learning_rate": 1.6008415340905953e-06, "loss": 0.5012, "step": 10510 }, { "epoch": 0.75, "grad_norm": 2.075098300799037, "learning_rate": 1.5999988614031448e-06, "loss": 0.4973, "step": 10511 }, { "epoch": 0.75, "grad_norm": 1.5291551092016695, "learning_rate": 1.599156368315276e-06, "loss": 0.4439, "step": 10512 }, { "epoch": 0.75, "grad_norm": 2.092371357322078, "learning_rate": 1.5983140548714893e-06, "loss": 0.4321, "step": 10513 }, { "epoch": 0.75, "grad_norm": 2.475977271335705, "learning_rate": 1.597471921116283e-06, "loss": 0.5681, "step": 10514 }, { "epoch": 0.75, "grad_norm": 1.5367767102513537, "learning_rate": 1.5966299670941382e-06, "loss": 0.4591, "step": 10515 }, { "epoch": 0.75, "grad_norm": 1.4515114442294532, "learning_rate": 1.59578819284953e-06, "loss": 0.4992, "step": 10516 }, { "epoch": 0.75, "grad_norm": 1.5597355951708003, "learning_rate": 1.5949465984269252e-06, "loss": 0.5063, "step": 10517 }, { "epoch": 0.75, "grad_norm": 1.7233974701441424, "learning_rate": 1.5941051838707788e-06, "loss": 0.5908, "step": 10518 }, { "epoch": 0.75, "grad_norm": 1.7795115205809668, "learning_rate": 1.5932639492255387e-06, "loss": 0.5009, "step": 10519 }, { "epoch": 0.75, "grad_norm": 1.7023634159671173, "learning_rate": 1.592422894535639e-06, "loss": 0.5274, "step": 10520 }, { "epoch": 0.75, "grad_norm": 2.2900454921527733, "learning_rate": 1.5915820198455083e-06, "loss": 0.506, "step": 10521 }, { "epoch": 0.75, "grad_norm": 1.5852153479305722, "learning_rate": 1.5907413251995645e-06, "loss": 0.4587, "step": 10522 }, { "epoch": 0.75, "grad_norm": 1.6076262723829147, "learning_rate": 1.5899008106422166e-06, "loss": 0.5701, "step": 10523 }, { "epoch": 0.75, "grad_norm": 1.6681457132968678, "learning_rate": 1.589060476217864e-06, "loss": 0.5479, "step": 10524 }, { "epoch": 0.75, "grad_norm": 0.6910785175580504, "learning_rate": 1.5882203219708918e-06, "loss": 0.4107, "step": 10525 }, { "epoch": 0.75, "grad_norm": 1.5515541352812257, "learning_rate": 1.5873803479456862e-06, "loss": 0.5159, "step": 10526 }, { "epoch": 0.75, "grad_norm": 2.4533807687539966, "learning_rate": 1.5865405541866125e-06, "loss": 0.6025, "step": 10527 }, { "epoch": 0.75, "grad_norm": 1.6343628503124479, "learning_rate": 1.585700940738033e-06, "loss": 0.4717, "step": 10528 }, { "epoch": 0.75, "grad_norm": 0.7476675367887125, "learning_rate": 1.584861507644299e-06, "loss": 0.4455, "step": 10529 }, { "epoch": 0.75, "grad_norm": 1.5418299112602618, "learning_rate": 1.5840222549497525e-06, "loss": 0.5234, "step": 10530 }, { "epoch": 0.75, "grad_norm": 2.496478664328739, "learning_rate": 1.5831831826987265e-06, "loss": 0.6286, "step": 10531 }, { "epoch": 0.75, "grad_norm": 1.5757495961424142, "learning_rate": 1.5823442909355396e-06, "loss": 0.4491, "step": 10532 }, { "epoch": 0.75, "grad_norm": 1.9372873790348948, "learning_rate": 1.5815055797045114e-06, "loss": 0.5036, "step": 10533 }, { "epoch": 0.75, "grad_norm": 0.7707684437897937, "learning_rate": 1.5806670490499398e-06, "loss": 0.4355, "step": 10534 }, { "epoch": 0.75, "grad_norm": 2.070519191230848, "learning_rate": 1.5798286990161228e-06, "loss": 0.5803, "step": 10535 }, { "epoch": 0.75, "grad_norm": 1.9614394164162958, "learning_rate": 1.5789905296473402e-06, "loss": 0.5418, "step": 10536 }, { "epoch": 0.75, "grad_norm": 1.7917863946270411, "learning_rate": 1.5781525409878723e-06, "loss": 0.5227, "step": 10537 }, { "epoch": 0.75, "grad_norm": 1.8560592386632566, "learning_rate": 1.577314733081981e-06, "loss": 0.5506, "step": 10538 }, { "epoch": 0.75, "grad_norm": 1.6879649386929427, "learning_rate": 1.5764771059739226e-06, "loss": 0.5161, "step": 10539 }, { "epoch": 0.75, "grad_norm": 1.980745341681599, "learning_rate": 1.5756396597079444e-06, "loss": 0.5308, "step": 10540 }, { "epoch": 0.75, "grad_norm": 1.70962004496072, "learning_rate": 1.5748023943282826e-06, "loss": 0.5317, "step": 10541 }, { "epoch": 0.75, "grad_norm": 1.6834825958134105, "learning_rate": 1.5739653098791657e-06, "loss": 0.4819, "step": 10542 }, { "epoch": 0.75, "grad_norm": 1.868616147458286, "learning_rate": 1.573128406404807e-06, "loss": 0.5222, "step": 10543 }, { "epoch": 0.75, "grad_norm": 1.60210951861304, "learning_rate": 1.5722916839494212e-06, "loss": 0.4685, "step": 10544 }, { "epoch": 0.75, "grad_norm": 2.1592221601056694, "learning_rate": 1.5714551425572016e-06, "loss": 0.5486, "step": 10545 }, { "epoch": 0.75, "grad_norm": 0.6927347568386585, "learning_rate": 1.570618782272339e-06, "loss": 0.4226, "step": 10546 }, { "epoch": 0.75, "grad_norm": 1.712883565939955, "learning_rate": 1.5697826031390124e-06, "loss": 0.5945, "step": 10547 }, { "epoch": 0.75, "grad_norm": 1.6405186910954446, "learning_rate": 1.568946605201392e-06, "loss": 0.5348, "step": 10548 }, { "epoch": 0.75, "grad_norm": 1.5340521854019662, "learning_rate": 1.5681107885036395e-06, "loss": 0.5331, "step": 10549 }, { "epoch": 0.75, "grad_norm": 3.706074225740816, "learning_rate": 1.5672751530899022e-06, "loss": 0.4716, "step": 10550 }, { "epoch": 0.75, "grad_norm": 2.5059568131667644, "learning_rate": 1.5664396990043235e-06, "loss": 0.4394, "step": 10551 }, { "epoch": 0.75, "grad_norm": 1.8163896571578324, "learning_rate": 1.5656044262910336e-06, "loss": 0.5711, "step": 10552 }, { "epoch": 0.75, "grad_norm": 3.206830705180215, "learning_rate": 1.5647693349941556e-06, "loss": 0.5363, "step": 10553 }, { "epoch": 0.75, "grad_norm": 1.4996482187963185, "learning_rate": 1.5639344251578037e-06, "loss": 0.5065, "step": 10554 }, { "epoch": 0.75, "grad_norm": 2.72390835082974, "learning_rate": 1.5630996968260743e-06, "loss": 0.5694, "step": 10555 }, { "epoch": 0.75, "grad_norm": 1.6030333660602594, "learning_rate": 1.5622651500430685e-06, "loss": 0.5327, "step": 10556 }, { "epoch": 0.75, "grad_norm": 1.7613705442948675, "learning_rate": 1.5614307848528642e-06, "loss": 0.4916, "step": 10557 }, { "epoch": 0.75, "grad_norm": 2.4522919803883747, "learning_rate": 1.560596601299537e-06, "loss": 0.5297, "step": 10558 }, { "epoch": 0.75, "grad_norm": 1.7244155153954548, "learning_rate": 1.5597625994271526e-06, "loss": 0.5303, "step": 10559 }, { "epoch": 0.75, "grad_norm": 1.631481795986207, "learning_rate": 1.558928779279764e-06, "loss": 0.4773, "step": 10560 }, { "epoch": 0.75, "grad_norm": 2.03245063943376, "learning_rate": 1.558095140901419e-06, "loss": 0.5751, "step": 10561 }, { "epoch": 0.75, "grad_norm": 1.974918995766225, "learning_rate": 1.5572616843361482e-06, "loss": 0.4778, "step": 10562 }, { "epoch": 0.75, "grad_norm": 1.5285489382674438, "learning_rate": 1.5564284096279842e-06, "loss": 0.4815, "step": 10563 }, { "epoch": 0.75, "grad_norm": 1.8486274103134617, "learning_rate": 1.5555953168209388e-06, "loss": 0.511, "step": 10564 }, { "epoch": 0.75, "grad_norm": 0.6815884474658551, "learning_rate": 1.5547624059590205e-06, "loss": 0.4185, "step": 10565 }, { "epoch": 0.75, "grad_norm": 1.6976080859535605, "learning_rate": 1.553929677086224e-06, "loss": 0.5592, "step": 10566 }, { "epoch": 0.75, "grad_norm": 1.8389683258075102, "learning_rate": 1.5530971302465393e-06, "loss": 0.571, "step": 10567 }, { "epoch": 0.75, "grad_norm": 2.2126127615792184, "learning_rate": 1.5522647654839463e-06, "loss": 0.5754, "step": 10568 }, { "epoch": 0.75, "grad_norm": 2.1721316387773975, "learning_rate": 1.5514325828424087e-06, "loss": 0.5489, "step": 10569 }, { "epoch": 0.75, "grad_norm": 0.6486859796974606, "learning_rate": 1.5506005823658882e-06, "loss": 0.4519, "step": 10570 }, { "epoch": 0.75, "grad_norm": 2.022804116013147, "learning_rate": 1.5497687640983322e-06, "loss": 0.4846, "step": 10571 }, { "epoch": 0.75, "grad_norm": 1.6858171529752886, "learning_rate": 1.5489371280836834e-06, "loss": 0.541, "step": 10572 }, { "epoch": 0.75, "grad_norm": 1.9118269638462828, "learning_rate": 1.5481056743658673e-06, "loss": 0.4646, "step": 10573 }, { "epoch": 0.75, "grad_norm": 1.808211036897223, "learning_rate": 1.547274402988806e-06, "loss": 0.4882, "step": 10574 }, { "epoch": 0.75, "grad_norm": 1.7831761138572344, "learning_rate": 1.5464433139964102e-06, "loss": 0.5383, "step": 10575 }, { "epoch": 0.75, "grad_norm": 0.8271366007488715, "learning_rate": 1.5456124074325813e-06, "loss": 0.4213, "step": 10576 }, { "epoch": 0.75, "grad_norm": 1.9366548898578648, "learning_rate": 1.54478168334121e-06, "loss": 0.5378, "step": 10577 }, { "epoch": 0.75, "grad_norm": 0.680826441158982, "learning_rate": 1.5439511417661774e-06, "loss": 0.4388, "step": 10578 }, { "epoch": 0.75, "grad_norm": 1.7966196531816798, "learning_rate": 1.5431207827513583e-06, "loss": 0.5142, "step": 10579 }, { "epoch": 0.75, "grad_norm": 1.7021357834542508, "learning_rate": 1.5422906063406113e-06, "loss": 0.5408, "step": 10580 }, { "epoch": 0.75, "grad_norm": 1.7826831807680679, "learning_rate": 1.5414606125777909e-06, "loss": 0.5259, "step": 10581 }, { "epoch": 0.75, "grad_norm": 2.0601909195649655, "learning_rate": 1.5406308015067395e-06, "loss": 0.4754, "step": 10582 }, { "epoch": 0.75, "grad_norm": 1.6626932677704283, "learning_rate": 1.539801173171292e-06, "loss": 0.5245, "step": 10583 }, { "epoch": 0.75, "grad_norm": 1.5636834247415081, "learning_rate": 1.538971727615272e-06, "loss": 0.5477, "step": 10584 }, { "epoch": 0.75, "grad_norm": 1.5277738934296092, "learning_rate": 1.5381424648824906e-06, "loss": 0.5121, "step": 10585 }, { "epoch": 0.75, "grad_norm": 1.9488143339892243, "learning_rate": 1.5373133850167571e-06, "loss": 0.5093, "step": 10586 }, { "epoch": 0.75, "grad_norm": 1.779370984788307, "learning_rate": 1.5364844880618623e-06, "loss": 0.492, "step": 10587 }, { "epoch": 0.75, "grad_norm": 0.7089197687603893, "learning_rate": 1.535655774061593e-06, "loss": 0.4474, "step": 10588 }, { "epoch": 0.75, "grad_norm": 1.7385138278494736, "learning_rate": 1.5348272430597244e-06, "loss": 0.5323, "step": 10589 }, { "epoch": 0.75, "grad_norm": 1.7124121394045315, "learning_rate": 1.5339988951000223e-06, "loss": 0.5023, "step": 10590 }, { "epoch": 0.75, "grad_norm": 1.675266686172874, "learning_rate": 1.533170730226245e-06, "loss": 0.4993, "step": 10591 }, { "epoch": 0.75, "grad_norm": 1.9899787294938223, "learning_rate": 1.5323427484821335e-06, "loss": 0.4556, "step": 10592 }, { "epoch": 0.75, "grad_norm": 1.643389436258584, "learning_rate": 1.5315149499114312e-06, "loss": 0.5631, "step": 10593 }, { "epoch": 0.75, "grad_norm": 1.9029607579165586, "learning_rate": 1.5306873345578604e-06, "loss": 0.487, "step": 10594 }, { "epoch": 0.75, "grad_norm": 1.601861228143719, "learning_rate": 1.5298599024651405e-06, "loss": 0.5172, "step": 10595 }, { "epoch": 0.75, "grad_norm": 1.6417143393739395, "learning_rate": 1.5290326536769789e-06, "loss": 0.5443, "step": 10596 }, { "epoch": 0.75, "grad_norm": 1.459931469995595, "learning_rate": 1.528205588237074e-06, "loss": 0.4863, "step": 10597 }, { "epoch": 0.75, "grad_norm": 1.7914135208721658, "learning_rate": 1.527378706189115e-06, "loss": 0.5246, "step": 10598 }, { "epoch": 0.75, "grad_norm": 1.9367377870755567, "learning_rate": 1.5265520075767782e-06, "loss": 0.5154, "step": 10599 }, { "epoch": 0.75, "grad_norm": 1.8034822817888396, "learning_rate": 1.5257254924437343e-06, "loss": 0.5165, "step": 10600 }, { "epoch": 0.75, "grad_norm": 1.4107578533780403, "learning_rate": 1.5248991608336421e-06, "loss": 0.5249, "step": 10601 }, { "epoch": 0.75, "grad_norm": 2.0529019119313725, "learning_rate": 1.524073012790153e-06, "loss": 0.5143, "step": 10602 }, { "epoch": 0.75, "grad_norm": 1.8190727879715343, "learning_rate": 1.523247048356904e-06, "loss": 0.4724, "step": 10603 }, { "epoch": 0.75, "grad_norm": 1.6113187561466273, "learning_rate": 1.5224212675775267e-06, "loss": 0.5672, "step": 10604 }, { "epoch": 0.75, "grad_norm": 2.16565101494338, "learning_rate": 1.5215956704956415e-06, "loss": 0.541, "step": 10605 }, { "epoch": 0.75, "grad_norm": 1.9207470862072435, "learning_rate": 1.5207702571548593e-06, "loss": 0.5665, "step": 10606 }, { "epoch": 0.75, "grad_norm": 1.7415394523381789, "learning_rate": 1.519945027598782e-06, "loss": 0.4826, "step": 10607 }, { "epoch": 0.75, "grad_norm": 1.752841273068188, "learning_rate": 1.5191199818710001e-06, "loss": 0.5866, "step": 10608 }, { "epoch": 0.75, "grad_norm": 1.8213531127537046, "learning_rate": 1.518295120015097e-06, "loss": 0.4931, "step": 10609 }, { "epoch": 0.75, "grad_norm": 1.647190556206935, "learning_rate": 1.5174704420746416e-06, "loss": 0.5573, "step": 10610 }, { "epoch": 0.75, "grad_norm": 1.6092606191379788, "learning_rate": 1.5166459480931982e-06, "loss": 0.5418, "step": 10611 }, { "epoch": 0.75, "grad_norm": 1.7350699219259809, "learning_rate": 1.5158216381143193e-06, "loss": 0.486, "step": 10612 }, { "epoch": 0.75, "grad_norm": 0.7459738584848288, "learning_rate": 1.5149975121815469e-06, "loss": 0.4113, "step": 10613 }, { "epoch": 0.75, "grad_norm": 0.641178616146587, "learning_rate": 1.5141735703384164e-06, "loss": 0.3935, "step": 10614 }, { "epoch": 0.75, "grad_norm": 1.7813011003941057, "learning_rate": 1.5133498126284462e-06, "loss": 0.4974, "step": 10615 }, { "epoch": 0.75, "grad_norm": 1.7115000460670287, "learning_rate": 1.5125262390951567e-06, "loss": 0.5307, "step": 10616 }, { "epoch": 0.75, "grad_norm": 1.8442863263768914, "learning_rate": 1.5117028497820468e-06, "loss": 0.5694, "step": 10617 }, { "epoch": 0.75, "grad_norm": 1.7398189795154542, "learning_rate": 1.5108796447326124e-06, "loss": 0.489, "step": 10618 }, { "epoch": 0.75, "grad_norm": 2.309662876245267, "learning_rate": 1.510056623990338e-06, "loss": 0.5085, "step": 10619 }, { "epoch": 0.75, "grad_norm": 1.889164999736863, "learning_rate": 1.5092337875986979e-06, "loss": 0.5696, "step": 10620 }, { "epoch": 0.75, "grad_norm": 2.8164331110931573, "learning_rate": 1.5084111356011593e-06, "loss": 0.5543, "step": 10621 }, { "epoch": 0.75, "grad_norm": 3.0112979218009968, "learning_rate": 1.5075886680411744e-06, "loss": 0.4711, "step": 10622 }, { "epoch": 0.75, "grad_norm": 1.71695761659884, "learning_rate": 1.5067663849621894e-06, "loss": 0.5502, "step": 10623 }, { "epoch": 0.75, "grad_norm": 1.5982064913156648, "learning_rate": 1.5059442864076407e-06, "loss": 0.4946, "step": 10624 }, { "epoch": 0.75, "grad_norm": 0.6241593659583228, "learning_rate": 1.5051223724209547e-06, "loss": 0.4315, "step": 10625 }, { "epoch": 0.75, "grad_norm": 1.6089190494309604, "learning_rate": 1.504300643045547e-06, "loss": 0.5031, "step": 10626 }, { "epoch": 0.75, "grad_norm": 1.5581866746379132, "learning_rate": 1.5034790983248243e-06, "loss": 0.4679, "step": 10627 }, { "epoch": 0.75, "grad_norm": 1.5239901666291509, "learning_rate": 1.5026577383021852e-06, "loss": 0.5125, "step": 10628 }, { "epoch": 0.75, "grad_norm": 1.5901763065985381, "learning_rate": 1.501836563021013e-06, "loss": 0.4958, "step": 10629 }, { "epoch": 0.75, "grad_norm": 1.5760596647339957, "learning_rate": 1.5010155725246872e-06, "loss": 0.5044, "step": 10630 }, { "epoch": 0.75, "grad_norm": 2.5948959757516312, "learning_rate": 1.5001947668565753e-06, "loss": 0.5266, "step": 10631 }, { "epoch": 0.75, "grad_norm": 1.6151573997010014, "learning_rate": 1.4993741460600358e-06, "loss": 0.4562, "step": 10632 }, { "epoch": 0.75, "grad_norm": 1.5757647653396676, "learning_rate": 1.4985537101784143e-06, "loss": 0.5316, "step": 10633 }, { "epoch": 0.75, "grad_norm": 1.7078402249890485, "learning_rate": 1.497733459255049e-06, "loss": 0.5506, "step": 10634 }, { "epoch": 0.75, "grad_norm": 1.5952281306193585, "learning_rate": 1.4969133933332725e-06, "loss": 0.5569, "step": 10635 }, { "epoch": 0.75, "grad_norm": 1.805018857226252, "learning_rate": 1.496093512456399e-06, "loss": 0.56, "step": 10636 }, { "epoch": 0.75, "grad_norm": 1.9116389656346477, "learning_rate": 1.495273816667741e-06, "loss": 0.509, "step": 10637 }, { "epoch": 0.75, "grad_norm": 1.6188495359660995, "learning_rate": 1.4944543060105927e-06, "loss": 0.5524, "step": 10638 }, { "epoch": 0.75, "grad_norm": 2.078024210505921, "learning_rate": 1.4936349805282492e-06, "loss": 0.5228, "step": 10639 }, { "epoch": 0.76, "grad_norm": 1.575664277284591, "learning_rate": 1.4928158402639852e-06, "loss": 0.4744, "step": 10640 }, { "epoch": 0.76, "grad_norm": 0.6060302950906317, "learning_rate": 1.4919968852610728e-06, "loss": 0.4375, "step": 10641 }, { "epoch": 0.76, "grad_norm": 1.8948057755284016, "learning_rate": 1.491178115562772e-06, "loss": 0.5376, "step": 10642 }, { "epoch": 0.76, "grad_norm": 1.526267861905416, "learning_rate": 1.4903595312123321e-06, "loss": 0.4941, "step": 10643 }, { "epoch": 0.76, "grad_norm": 1.8339647936393995, "learning_rate": 1.4895411322529956e-06, "loss": 0.5098, "step": 10644 }, { "epoch": 0.76, "grad_norm": 2.138151412327116, "learning_rate": 1.488722918727989e-06, "loss": 0.4921, "step": 10645 }, { "epoch": 0.76, "grad_norm": 3.9081826770571655, "learning_rate": 1.4879048906805388e-06, "loss": 0.4607, "step": 10646 }, { "epoch": 0.76, "grad_norm": 1.563515562211448, "learning_rate": 1.4870870481538508e-06, "loss": 0.4824, "step": 10647 }, { "epoch": 0.76, "grad_norm": 1.899819854147182, "learning_rate": 1.4862693911911286e-06, "loss": 0.4856, "step": 10648 }, { "epoch": 0.76, "grad_norm": 1.782140482090628, "learning_rate": 1.4854519198355633e-06, "loss": 0.5292, "step": 10649 }, { "epoch": 0.76, "grad_norm": 1.8402595544170228, "learning_rate": 1.4846346341303363e-06, "loss": 0.5989, "step": 10650 }, { "epoch": 0.76, "grad_norm": 1.6722973764078668, "learning_rate": 1.4838175341186212e-06, "loss": 0.5475, "step": 10651 }, { "epoch": 0.76, "grad_norm": 1.9788795017011398, "learning_rate": 1.4830006198435771e-06, "loss": 0.4595, "step": 10652 }, { "epoch": 0.76, "grad_norm": 0.7128330190419396, "learning_rate": 1.4821838913483572e-06, "loss": 0.4148, "step": 10653 }, { "epoch": 0.76, "grad_norm": 1.8108972455284293, "learning_rate": 1.4813673486761039e-06, "loss": 0.5129, "step": 10654 }, { "epoch": 0.76, "grad_norm": 1.6221522997641011, "learning_rate": 1.4805509918699507e-06, "loss": 0.5082, "step": 10655 }, { "epoch": 0.76, "grad_norm": 1.7417044562230422, "learning_rate": 1.4797348209730188e-06, "loss": 0.5672, "step": 10656 }, { "epoch": 0.76, "grad_norm": 1.5801951396392688, "learning_rate": 1.4789188360284223e-06, "loss": 0.5204, "step": 10657 }, { "epoch": 0.76, "grad_norm": 1.4968218956063872, "learning_rate": 1.478103037079266e-06, "loss": 0.4329, "step": 10658 }, { "epoch": 0.76, "grad_norm": 1.4940954964448796, "learning_rate": 1.4772874241686391e-06, "loss": 0.491, "step": 10659 }, { "epoch": 0.76, "grad_norm": 1.5655389373557391, "learning_rate": 1.4764719973396269e-06, "loss": 0.4908, "step": 10660 }, { "epoch": 0.76, "grad_norm": 1.649701363073898, "learning_rate": 1.4756567566353037e-06, "loss": 0.4976, "step": 10661 }, { "epoch": 0.76, "grad_norm": 1.8619229021719754, "learning_rate": 1.4748417020987322e-06, "loss": 0.4679, "step": 10662 }, { "epoch": 0.76, "grad_norm": 3.0286197317647723, "learning_rate": 1.4740268337729684e-06, "loss": 0.4459, "step": 10663 }, { "epoch": 0.76, "grad_norm": 1.4768557174194714, "learning_rate": 1.4732121517010518e-06, "loss": 0.5001, "step": 10664 }, { "epoch": 0.76, "grad_norm": 1.7256660922611382, "learning_rate": 1.4723976559260234e-06, "loss": 0.5622, "step": 10665 }, { "epoch": 0.76, "grad_norm": 1.7359346787799372, "learning_rate": 1.471583346490902e-06, "loss": 0.5457, "step": 10666 }, { "epoch": 0.76, "grad_norm": 1.529220143912608, "learning_rate": 1.4707692234387055e-06, "loss": 0.483, "step": 10667 }, { "epoch": 0.76, "grad_norm": 0.6626058888944303, "learning_rate": 1.4699552868124339e-06, "loss": 0.4269, "step": 10668 }, { "epoch": 0.76, "grad_norm": 1.4868942135751833, "learning_rate": 1.4691415366550893e-06, "loss": 0.5155, "step": 10669 }, { "epoch": 0.76, "grad_norm": 1.5112318244480527, "learning_rate": 1.4683279730096512e-06, "loss": 0.4939, "step": 10670 }, { "epoch": 0.76, "grad_norm": 1.8057538409751797, "learning_rate": 1.4675145959190945e-06, "loss": 0.4625, "step": 10671 }, { "epoch": 0.76, "grad_norm": 1.6114983304995802, "learning_rate": 1.46670140542639e-06, "loss": 0.5652, "step": 10672 }, { "epoch": 0.76, "grad_norm": 1.67434590180966, "learning_rate": 1.4658884015744884e-06, "loss": 0.5353, "step": 10673 }, { "epoch": 0.76, "grad_norm": 4.65392258370727, "learning_rate": 1.465075584406338e-06, "loss": 0.5359, "step": 10674 }, { "epoch": 0.76, "grad_norm": 1.514195136264466, "learning_rate": 1.4642629539648701e-06, "loss": 0.5118, "step": 10675 }, { "epoch": 0.76, "grad_norm": 1.4945172739297936, "learning_rate": 1.4634505102930163e-06, "loss": 0.5151, "step": 10676 }, { "epoch": 0.76, "grad_norm": 1.5406396533403712, "learning_rate": 1.462638253433689e-06, "loss": 0.4996, "step": 10677 }, { "epoch": 0.76, "grad_norm": 1.787443911491598, "learning_rate": 1.4618261834297952e-06, "loss": 0.5226, "step": 10678 }, { "epoch": 0.76, "grad_norm": 1.8455987551687656, "learning_rate": 1.4610143003242316e-06, "loss": 0.5735, "step": 10679 }, { "epoch": 0.76, "grad_norm": 1.766502041386081, "learning_rate": 1.4602026041598838e-06, "loss": 0.4929, "step": 10680 }, { "epoch": 0.76, "grad_norm": 1.944532504643116, "learning_rate": 1.459391094979631e-06, "loss": 0.5726, "step": 10681 }, { "epoch": 0.76, "grad_norm": 1.5302360011048066, "learning_rate": 1.4585797728263363e-06, "loss": 0.5214, "step": 10682 }, { "epoch": 0.76, "grad_norm": 1.7718192933221877, "learning_rate": 1.4577686377428573e-06, "loss": 0.5415, "step": 10683 }, { "epoch": 0.76, "grad_norm": 2.0937012353019915, "learning_rate": 1.4569576897720423e-06, "loss": 0.5282, "step": 10684 }, { "epoch": 0.76, "grad_norm": 1.7610264414283165, "learning_rate": 1.4561469289567276e-06, "loss": 0.5357, "step": 10685 }, { "epoch": 0.76, "grad_norm": 1.999607693380123, "learning_rate": 1.4553363553397414e-06, "loss": 0.5139, "step": 10686 }, { "epoch": 0.76, "grad_norm": 0.7319133603518555, "learning_rate": 1.4545259689638968e-06, "loss": 0.4205, "step": 10687 }, { "epoch": 0.76, "grad_norm": 1.6458117214361008, "learning_rate": 1.4537157698720078e-06, "loss": 0.4931, "step": 10688 }, { "epoch": 0.76, "grad_norm": 4.273140218189718, "learning_rate": 1.4529057581068661e-06, "loss": 0.5863, "step": 10689 }, { "epoch": 0.76, "grad_norm": 1.6473670781037948, "learning_rate": 1.4520959337112616e-06, "loss": 0.5247, "step": 10690 }, { "epoch": 0.76, "grad_norm": 3.2844654535239934, "learning_rate": 1.4512862967279723e-06, "loss": 0.5588, "step": 10691 }, { "epoch": 0.76, "grad_norm": 4.002149913091146, "learning_rate": 1.450476847199765e-06, "loss": 0.5292, "step": 10692 }, { "epoch": 0.76, "grad_norm": 1.8126966015498267, "learning_rate": 1.4496675851693997e-06, "loss": 0.4759, "step": 10693 }, { "epoch": 0.76, "grad_norm": 1.730106746541734, "learning_rate": 1.44885851067962e-06, "loss": 0.574, "step": 10694 }, { "epoch": 0.76, "grad_norm": 1.7648808117702592, "learning_rate": 1.4480496237731694e-06, "loss": 0.4735, "step": 10695 }, { "epoch": 0.76, "grad_norm": 1.5552632130396524, "learning_rate": 1.4472409244927721e-06, "loss": 0.4498, "step": 10696 }, { "epoch": 0.76, "grad_norm": 0.7195631372753983, "learning_rate": 1.4464324128811486e-06, "loss": 0.4074, "step": 10697 }, { "epoch": 0.76, "grad_norm": 1.8913628342970745, "learning_rate": 1.4456240889810036e-06, "loss": 0.5087, "step": 10698 }, { "epoch": 0.76, "grad_norm": 1.4838989943157586, "learning_rate": 1.4448159528350391e-06, "loss": 0.5257, "step": 10699 }, { "epoch": 0.76, "grad_norm": 1.8783356377653806, "learning_rate": 1.4440080044859444e-06, "loss": 0.5549, "step": 10700 }, { "epoch": 0.76, "grad_norm": 1.5347467908333357, "learning_rate": 1.4432002439763947e-06, "loss": 0.4645, "step": 10701 }, { "epoch": 0.76, "grad_norm": 1.5897542757523386, "learning_rate": 1.4423926713490605e-06, "loss": 0.4767, "step": 10702 }, { "epoch": 0.76, "grad_norm": 2.01805779961288, "learning_rate": 1.4415852866465996e-06, "loss": 0.4746, "step": 10703 }, { "epoch": 0.76, "grad_norm": 1.8168347799452822, "learning_rate": 1.4407780899116635e-06, "loss": 0.5455, "step": 10704 }, { "epoch": 0.76, "grad_norm": 1.7020611176928275, "learning_rate": 1.4399710811868855e-06, "loss": 0.576, "step": 10705 }, { "epoch": 0.76, "grad_norm": 1.7232590412084658, "learning_rate": 1.4391642605149015e-06, "loss": 0.5067, "step": 10706 }, { "epoch": 0.76, "grad_norm": 1.8275071233720077, "learning_rate": 1.4383576279383249e-06, "loss": 0.4832, "step": 10707 }, { "epoch": 0.76, "grad_norm": 3.2919671787304994, "learning_rate": 1.4375511834997675e-06, "loss": 0.4615, "step": 10708 }, { "epoch": 0.76, "grad_norm": 1.5733899964307254, "learning_rate": 1.4367449272418277e-06, "loss": 0.5101, "step": 10709 }, { "epoch": 0.76, "grad_norm": 1.697898425723707, "learning_rate": 1.4359388592070944e-06, "loss": 0.4929, "step": 10710 }, { "epoch": 0.76, "grad_norm": 1.8787909848683368, "learning_rate": 1.435132979438149e-06, "loss": 0.5485, "step": 10711 }, { "epoch": 0.76, "grad_norm": 0.613462879827104, "learning_rate": 1.4343272879775577e-06, "loss": 0.408, "step": 10712 }, { "epoch": 0.76, "grad_norm": 1.6619895628544024, "learning_rate": 1.4335217848678811e-06, "loss": 0.4491, "step": 10713 }, { "epoch": 0.76, "grad_norm": 1.6100993244451636, "learning_rate": 1.4327164701516683e-06, "loss": 0.4796, "step": 10714 }, { "epoch": 0.76, "grad_norm": 1.896784020065865, "learning_rate": 1.4319113438714599e-06, "loss": 0.5217, "step": 10715 }, { "epoch": 0.76, "grad_norm": 1.6669638644168543, "learning_rate": 1.4311064060697855e-06, "loss": 0.5415, "step": 10716 }, { "epoch": 0.76, "grad_norm": 1.7126172593838513, "learning_rate": 1.4303016567891609e-06, "loss": 0.5096, "step": 10717 }, { "epoch": 0.76, "grad_norm": 1.707240071979739, "learning_rate": 1.4294970960721023e-06, "loss": 0.5036, "step": 10718 }, { "epoch": 0.76, "grad_norm": 2.294482582179599, "learning_rate": 1.4286927239611032e-06, "loss": 0.5228, "step": 10719 }, { "epoch": 0.76, "grad_norm": 1.548492764423679, "learning_rate": 1.427888540498656e-06, "loss": 0.5669, "step": 10720 }, { "epoch": 0.76, "grad_norm": 1.489976931028792, "learning_rate": 1.4270845457272398e-06, "loss": 0.4885, "step": 10721 }, { "epoch": 0.76, "grad_norm": 2.145834950694346, "learning_rate": 1.4262807396893253e-06, "loss": 0.5075, "step": 10722 }, { "epoch": 0.76, "grad_norm": 1.6544547435093089, "learning_rate": 1.4254771224273723e-06, "loss": 0.5448, "step": 10723 }, { "epoch": 0.76, "grad_norm": 1.6651615091922791, "learning_rate": 1.4246736939838267e-06, "loss": 0.4897, "step": 10724 }, { "epoch": 0.76, "grad_norm": 1.4593585620603162, "learning_rate": 1.4238704544011351e-06, "loss": 0.5003, "step": 10725 }, { "epoch": 0.76, "grad_norm": 1.7245978761357388, "learning_rate": 1.4230674037217218e-06, "loss": 0.4534, "step": 10726 }, { "epoch": 0.76, "grad_norm": 2.5848753525251906, "learning_rate": 1.4222645419880088e-06, "loss": 0.5061, "step": 10727 }, { "epoch": 0.76, "grad_norm": 1.6296942640530503, "learning_rate": 1.421461869242406e-06, "loss": 0.4651, "step": 10728 }, { "epoch": 0.76, "grad_norm": 1.6256920746644146, "learning_rate": 1.420659385527312e-06, "loss": 0.5225, "step": 10729 }, { "epoch": 0.76, "grad_norm": 1.8940850447915853, "learning_rate": 1.41985709088512e-06, "loss": 0.4523, "step": 10730 }, { "epoch": 0.76, "grad_norm": 1.6349970355833847, "learning_rate": 1.4190549853582058e-06, "loss": 0.4886, "step": 10731 }, { "epoch": 0.76, "grad_norm": 1.9063994496970478, "learning_rate": 1.4182530689889417e-06, "loss": 0.4775, "step": 10732 }, { "epoch": 0.76, "grad_norm": 1.7125627742039657, "learning_rate": 1.4174513418196867e-06, "loss": 0.4586, "step": 10733 }, { "epoch": 0.76, "grad_norm": 1.7510900369627114, "learning_rate": 1.4166498038927923e-06, "loss": 0.5567, "step": 10734 }, { "epoch": 0.76, "grad_norm": 1.7634913914957377, "learning_rate": 1.4158484552505947e-06, "loss": 0.5058, "step": 10735 }, { "epoch": 0.76, "grad_norm": 1.5729081779198033, "learning_rate": 1.4150472959354295e-06, "loss": 0.4976, "step": 10736 }, { "epoch": 0.76, "grad_norm": 1.8632113637130792, "learning_rate": 1.4142463259896123e-06, "loss": 0.5066, "step": 10737 }, { "epoch": 0.76, "grad_norm": 1.6688570314621418, "learning_rate": 1.413445545455454e-06, "loss": 0.4868, "step": 10738 }, { "epoch": 0.76, "grad_norm": 0.7023971128056485, "learning_rate": 1.4126449543752551e-06, "loss": 0.4413, "step": 10739 }, { "epoch": 0.76, "grad_norm": 1.6909637471205259, "learning_rate": 1.411844552791305e-06, "loss": 0.5009, "step": 10740 }, { "epoch": 0.76, "grad_norm": 2.1619395615344295, "learning_rate": 1.4110443407458856e-06, "loss": 0.5483, "step": 10741 }, { "epoch": 0.76, "grad_norm": 1.6149214819350879, "learning_rate": 1.4102443182812642e-06, "loss": 0.4382, "step": 10742 }, { "epoch": 0.76, "grad_norm": 1.8694029230294038, "learning_rate": 1.409444485439701e-06, "loss": 0.5435, "step": 10743 }, { "epoch": 0.76, "grad_norm": 1.6490457447757367, "learning_rate": 1.408644842263447e-06, "loss": 0.5389, "step": 10744 }, { "epoch": 0.76, "grad_norm": 1.6000138393444086, "learning_rate": 1.4078453887947407e-06, "loss": 0.5064, "step": 10745 }, { "epoch": 0.76, "grad_norm": 1.717760476677669, "learning_rate": 1.4070461250758156e-06, "loss": 0.4971, "step": 10746 }, { "epoch": 0.76, "grad_norm": 1.5467693725305895, "learning_rate": 1.4062470511488845e-06, "loss": 0.4635, "step": 10747 }, { "epoch": 0.76, "grad_norm": 1.7248685137234698, "learning_rate": 1.4054481670561653e-06, "loss": 0.5078, "step": 10748 }, { "epoch": 0.76, "grad_norm": 1.6763223195559056, "learning_rate": 1.4046494728398524e-06, "loss": 0.5552, "step": 10749 }, { "epoch": 0.76, "grad_norm": 1.5971850813479491, "learning_rate": 1.403850968542137e-06, "loss": 0.4917, "step": 10750 }, { "epoch": 0.76, "grad_norm": 2.1171878568620532, "learning_rate": 1.4030526542051985e-06, "loss": 0.4949, "step": 10751 }, { "epoch": 0.76, "grad_norm": 1.8266235665584014, "learning_rate": 1.402254529871207e-06, "loss": 0.5276, "step": 10752 }, { "epoch": 0.76, "grad_norm": 1.537600336842719, "learning_rate": 1.401456595582324e-06, "loss": 0.5503, "step": 10753 }, { "epoch": 0.76, "grad_norm": 1.5732412234281759, "learning_rate": 1.400658851380694e-06, "loss": 0.4895, "step": 10754 }, { "epoch": 0.76, "grad_norm": 2.5202157508929215, "learning_rate": 1.3998612973084623e-06, "loss": 0.5683, "step": 10755 }, { "epoch": 0.76, "grad_norm": 0.7205810359616004, "learning_rate": 1.3990639334077549e-06, "loss": 0.4228, "step": 10756 }, { "epoch": 0.76, "grad_norm": 1.8458442072661652, "learning_rate": 1.3982667597206917e-06, "loss": 0.495, "step": 10757 }, { "epoch": 0.76, "grad_norm": 0.7493755654774225, "learning_rate": 1.3974697762893829e-06, "loss": 0.4334, "step": 10758 }, { "epoch": 0.76, "grad_norm": 2.0170758999995666, "learning_rate": 1.3966729831559272e-06, "loss": 0.5544, "step": 10759 }, { "epoch": 0.76, "grad_norm": 1.5806114980049066, "learning_rate": 1.3958763803624159e-06, "loss": 0.539, "step": 10760 }, { "epoch": 0.76, "grad_norm": 1.7386390559295708, "learning_rate": 1.3950799679509252e-06, "loss": 0.5126, "step": 10761 }, { "epoch": 0.76, "grad_norm": 1.780698069041306, "learning_rate": 1.3942837459635262e-06, "loss": 0.4859, "step": 10762 }, { "epoch": 0.76, "grad_norm": 1.5833203439517853, "learning_rate": 1.393487714442277e-06, "loss": 0.55, "step": 10763 }, { "epoch": 0.76, "grad_norm": 1.7703132117391545, "learning_rate": 1.392691873429229e-06, "loss": 0.5311, "step": 10764 }, { "epoch": 0.76, "grad_norm": 1.7591659013846739, "learning_rate": 1.3918962229664174e-06, "loss": 0.4928, "step": 10765 }, { "epoch": 0.76, "grad_norm": 2.3766931020872804, "learning_rate": 1.3911007630958723e-06, "loss": 0.5052, "step": 10766 }, { "epoch": 0.76, "grad_norm": 1.4238150525240583, "learning_rate": 1.390305493859616e-06, "loss": 0.4587, "step": 10767 }, { "epoch": 0.76, "grad_norm": 1.486732409296287, "learning_rate": 1.3895104152996542e-06, "loss": 0.4477, "step": 10768 }, { "epoch": 0.76, "grad_norm": 4.470421234028526, "learning_rate": 1.3887155274579855e-06, "loss": 0.5735, "step": 10769 }, { "epoch": 0.76, "grad_norm": 1.9718124236404266, "learning_rate": 1.3879208303765996e-06, "loss": 0.508, "step": 10770 }, { "epoch": 0.76, "grad_norm": 1.6352607679428355, "learning_rate": 1.3871263240974764e-06, "loss": 0.5242, "step": 10771 }, { "epoch": 0.76, "grad_norm": 6.64926080617895, "learning_rate": 1.3863320086625814e-06, "loss": 0.5503, "step": 10772 }, { "epoch": 0.76, "grad_norm": 1.9877700749720486, "learning_rate": 1.3855378841138744e-06, "loss": 0.5142, "step": 10773 }, { "epoch": 0.76, "grad_norm": 1.6574324579675601, "learning_rate": 1.3847439504933036e-06, "loss": 0.5299, "step": 10774 }, { "epoch": 0.76, "grad_norm": 1.673189786737734, "learning_rate": 1.383950207842808e-06, "loss": 0.5598, "step": 10775 }, { "epoch": 0.76, "grad_norm": 1.6265973083249583, "learning_rate": 1.3831566562043165e-06, "loss": 0.5392, "step": 10776 }, { "epoch": 0.76, "grad_norm": 0.7985231841081898, "learning_rate": 1.3823632956197431e-06, "loss": 0.4412, "step": 10777 }, { "epoch": 0.76, "grad_norm": 1.7135545892219708, "learning_rate": 1.3815701261310011e-06, "loss": 0.6281, "step": 10778 }, { "epoch": 0.76, "grad_norm": 1.618624885081931, "learning_rate": 1.3807771477799848e-06, "loss": 0.4682, "step": 10779 }, { "epoch": 0.76, "grad_norm": 1.7250185364305821, "learning_rate": 1.3799843606085834e-06, "loss": 0.5448, "step": 10780 }, { "epoch": 0.77, "grad_norm": 1.896743668037169, "learning_rate": 1.3791917646586738e-06, "loss": 0.4985, "step": 10781 }, { "epoch": 0.77, "grad_norm": 1.6575098190862174, "learning_rate": 1.3783993599721246e-06, "loss": 0.4842, "step": 10782 }, { "epoch": 0.77, "grad_norm": 0.6646799959977178, "learning_rate": 1.3776071465907947e-06, "loss": 0.4199, "step": 10783 }, { "epoch": 0.77, "grad_norm": 1.6072581843289775, "learning_rate": 1.376815124556526e-06, "loss": 0.5164, "step": 10784 }, { "epoch": 0.77, "grad_norm": 1.6995449140290093, "learning_rate": 1.3760232939111628e-06, "loss": 0.5051, "step": 10785 }, { "epoch": 0.77, "grad_norm": 1.9830614836395266, "learning_rate": 1.3752316546965273e-06, "loss": 0.5345, "step": 10786 }, { "epoch": 0.77, "grad_norm": 1.9445254194837112, "learning_rate": 1.3744402069544383e-06, "loss": 0.5365, "step": 10787 }, { "epoch": 0.77, "grad_norm": 2.1075986754908067, "learning_rate": 1.3736489507267026e-06, "loss": 0.5288, "step": 10788 }, { "epoch": 0.77, "grad_norm": 1.6220517537654202, "learning_rate": 1.3728578860551168e-06, "loss": 0.5854, "step": 10789 }, { "epoch": 0.77, "grad_norm": 1.8437658547550106, "learning_rate": 1.3720670129814695e-06, "loss": 0.5561, "step": 10790 }, { "epoch": 0.77, "grad_norm": 1.5569581175004117, "learning_rate": 1.3712763315475347e-06, "loss": 0.5243, "step": 10791 }, { "epoch": 0.77, "grad_norm": 1.4876635475661717, "learning_rate": 1.3704858417950796e-06, "loss": 0.5057, "step": 10792 }, { "epoch": 0.77, "grad_norm": 1.7237070729382227, "learning_rate": 1.3696955437658605e-06, "loss": 0.5354, "step": 10793 }, { "epoch": 0.77, "grad_norm": 1.6329757577962747, "learning_rate": 1.3689054375016236e-06, "loss": 0.5325, "step": 10794 }, { "epoch": 0.77, "grad_norm": 1.8207530199229034, "learning_rate": 1.368115523044108e-06, "loss": 0.5187, "step": 10795 }, { "epoch": 0.77, "grad_norm": 1.96717407545796, "learning_rate": 1.3673258004350332e-06, "loss": 0.485, "step": 10796 }, { "epoch": 0.77, "grad_norm": 4.820840660173064, "learning_rate": 1.3665362697161216e-06, "loss": 0.5432, "step": 10797 }, { "epoch": 0.77, "grad_norm": 1.706725214050547, "learning_rate": 1.365746930929075e-06, "loss": 0.4918, "step": 10798 }, { "epoch": 0.77, "grad_norm": 1.5185241550654174, "learning_rate": 1.3649577841155914e-06, "loss": 0.5179, "step": 10799 }, { "epoch": 0.77, "grad_norm": 1.358417799735522, "learning_rate": 1.3641688293173516e-06, "loss": 0.4618, "step": 10800 }, { "epoch": 0.77, "grad_norm": 1.5536970917126065, "learning_rate": 1.3633800665760377e-06, "loss": 0.5222, "step": 10801 }, { "epoch": 0.77, "grad_norm": 0.7303642229592839, "learning_rate": 1.3625914959333092e-06, "loss": 0.4291, "step": 10802 }, { "epoch": 0.77, "grad_norm": 1.7863602360718673, "learning_rate": 1.3618031174308221e-06, "loss": 0.5382, "step": 10803 }, { "epoch": 0.77, "grad_norm": 1.5937928157830576, "learning_rate": 1.361014931110225e-06, "loss": 0.5474, "step": 10804 }, { "epoch": 0.77, "grad_norm": 1.824677566969117, "learning_rate": 1.3602269370131482e-06, "loss": 0.4961, "step": 10805 }, { "epoch": 0.77, "grad_norm": 2.004135958829725, "learning_rate": 1.3594391351812191e-06, "loss": 0.4895, "step": 10806 }, { "epoch": 0.77, "grad_norm": 3.5644815147600806, "learning_rate": 1.3586515256560484e-06, "loss": 0.5761, "step": 10807 }, { "epoch": 0.77, "grad_norm": 1.7672489785111498, "learning_rate": 1.3578641084792455e-06, "loss": 0.5223, "step": 10808 }, { "epoch": 0.77, "grad_norm": 2.396672796929587, "learning_rate": 1.3570768836923997e-06, "loss": 0.4696, "step": 10809 }, { "epoch": 0.77, "grad_norm": 1.8328926536843815, "learning_rate": 1.356289851337097e-06, "loss": 0.5479, "step": 10810 }, { "epoch": 0.77, "grad_norm": 1.9344470374285925, "learning_rate": 1.3555030114549111e-06, "loss": 0.5148, "step": 10811 }, { "epoch": 0.77, "grad_norm": 1.898740355845096, "learning_rate": 1.3547163640874055e-06, "loss": 0.54, "step": 10812 }, { "epoch": 0.77, "grad_norm": 1.592347259048833, "learning_rate": 1.353929909276135e-06, "loss": 0.5496, "step": 10813 }, { "epoch": 0.77, "grad_norm": 1.7115295963858392, "learning_rate": 1.3531436470626392e-06, "loss": 0.4501, "step": 10814 }, { "epoch": 0.77, "grad_norm": 1.982638465216036, "learning_rate": 1.3523575774884533e-06, "loss": 0.5668, "step": 10815 }, { "epoch": 0.77, "grad_norm": 1.7137491018533926, "learning_rate": 1.3515717005951007e-06, "loss": 0.5923, "step": 10816 }, { "epoch": 0.77, "grad_norm": 1.4560434586256394, "learning_rate": 1.3507860164240926e-06, "loss": 0.4622, "step": 10817 }, { "epoch": 0.77, "grad_norm": 1.6763341786919048, "learning_rate": 1.350000525016933e-06, "loss": 0.5224, "step": 10818 }, { "epoch": 0.77, "grad_norm": 1.5820061868755284, "learning_rate": 1.3492152264151132e-06, "loss": 0.4645, "step": 10819 }, { "epoch": 0.77, "grad_norm": 1.5955575530072472, "learning_rate": 1.3484301206601174e-06, "loss": 0.5529, "step": 10820 }, { "epoch": 0.77, "grad_norm": 1.6539871815222473, "learning_rate": 1.3476452077934143e-06, "loss": 0.509, "step": 10821 }, { "epoch": 0.77, "grad_norm": 1.7803501078801294, "learning_rate": 1.3468604878564667e-06, "loss": 0.5686, "step": 10822 }, { "epoch": 0.77, "grad_norm": 1.5839262309868443, "learning_rate": 1.3460759608907275e-06, "loss": 0.4696, "step": 10823 }, { "epoch": 0.77, "grad_norm": 1.638000104485249, "learning_rate": 1.3452916269376364e-06, "loss": 0.5251, "step": 10824 }, { "epoch": 0.77, "grad_norm": 1.5937625982921233, "learning_rate": 1.3445074860386275e-06, "loss": 0.4964, "step": 10825 }, { "epoch": 0.77, "grad_norm": 1.4614454784739406, "learning_rate": 1.3437235382351166e-06, "loss": 0.4622, "step": 10826 }, { "epoch": 0.77, "grad_norm": 1.472918352339485, "learning_rate": 1.3429397835685203e-06, "loss": 0.5298, "step": 10827 }, { "epoch": 0.77, "grad_norm": 1.6966648336472956, "learning_rate": 1.3421562220802353e-06, "loss": 0.5282, "step": 10828 }, { "epoch": 0.77, "grad_norm": 1.4272625988031236, "learning_rate": 1.3413728538116544e-06, "loss": 0.477, "step": 10829 }, { "epoch": 0.77, "grad_norm": 0.6821881276400853, "learning_rate": 1.3405896788041533e-06, "loss": 0.4108, "step": 10830 }, { "epoch": 0.77, "grad_norm": 1.8282967541759858, "learning_rate": 1.339806697099107e-06, "loss": 0.4943, "step": 10831 }, { "epoch": 0.77, "grad_norm": 2.250535763471322, "learning_rate": 1.3390239087378749e-06, "loss": 0.4816, "step": 10832 }, { "epoch": 0.77, "grad_norm": 1.6265469597184998, "learning_rate": 1.3382413137618017e-06, "loss": 0.5432, "step": 10833 }, { "epoch": 0.77, "grad_norm": 0.6748725763302437, "learning_rate": 1.3374589122122334e-06, "loss": 0.4237, "step": 10834 }, { "epoch": 0.77, "grad_norm": 1.6431619625285614, "learning_rate": 1.336676704130494e-06, "loss": 0.525, "step": 10835 }, { "epoch": 0.77, "grad_norm": 1.7352099551862556, "learning_rate": 1.3358946895579056e-06, "loss": 0.559, "step": 10836 }, { "epoch": 0.77, "grad_norm": 2.260550393493645, "learning_rate": 1.3351128685357723e-06, "loss": 0.5052, "step": 10837 }, { "epoch": 0.77, "grad_norm": 1.4441147204245448, "learning_rate": 1.3343312411053989e-06, "loss": 0.4801, "step": 10838 }, { "epoch": 0.77, "grad_norm": 0.7896090140454196, "learning_rate": 1.3335498073080688e-06, "loss": 0.4415, "step": 10839 }, { "epoch": 0.77, "grad_norm": 1.8723307585971487, "learning_rate": 1.332768567185061e-06, "loss": 0.557, "step": 10840 }, { "epoch": 0.77, "grad_norm": 2.2690086799152573, "learning_rate": 1.3319875207776444e-06, "loss": 0.4677, "step": 10841 }, { "epoch": 0.77, "grad_norm": 1.7306168696864754, "learning_rate": 1.3312066681270758e-06, "loss": 0.5228, "step": 10842 }, { "epoch": 0.77, "grad_norm": 2.2791162568489716, "learning_rate": 1.3304260092746034e-06, "loss": 0.4687, "step": 10843 }, { "epoch": 0.77, "grad_norm": 0.6640722135241682, "learning_rate": 1.3296455442614625e-06, "loss": 0.4259, "step": 10844 }, { "epoch": 0.77, "grad_norm": 1.5376700139075838, "learning_rate": 1.328865273128881e-06, "loss": 0.4699, "step": 10845 }, { "epoch": 0.77, "grad_norm": 1.9620033433990103, "learning_rate": 1.3280851959180747e-06, "loss": 0.5042, "step": 10846 }, { "epoch": 0.77, "grad_norm": 1.6584580160662243, "learning_rate": 1.3273053126702512e-06, "loss": 0.5036, "step": 10847 }, { "epoch": 0.77, "grad_norm": 0.7170176569345356, "learning_rate": 1.326525623426605e-06, "loss": 0.4238, "step": 10848 }, { "epoch": 0.77, "grad_norm": 1.9149581596712926, "learning_rate": 1.3257461282283236e-06, "loss": 0.4951, "step": 10849 }, { "epoch": 0.77, "grad_norm": 1.7172657234205344, "learning_rate": 1.3249668271165827e-06, "loss": 0.4922, "step": 10850 }, { "epoch": 0.77, "grad_norm": 2.1579950177673957, "learning_rate": 1.3241877201325453e-06, "loss": 0.4799, "step": 10851 }, { "epoch": 0.77, "grad_norm": 1.9550172552017977, "learning_rate": 1.3234088073173684e-06, "loss": 0.6122, "step": 10852 }, { "epoch": 0.77, "grad_norm": 1.5836425870918611, "learning_rate": 1.3226300887121956e-06, "loss": 0.5034, "step": 10853 }, { "epoch": 0.77, "grad_norm": 1.7411442061465618, "learning_rate": 1.3218515643581624e-06, "loss": 0.4813, "step": 10854 }, { "epoch": 0.77, "grad_norm": 1.7273748559302142, "learning_rate": 1.3210732342963944e-06, "loss": 0.6032, "step": 10855 }, { "epoch": 0.77, "grad_norm": 1.8359622808204898, "learning_rate": 1.3202950985680013e-06, "loss": 0.5453, "step": 10856 }, { "epoch": 0.77, "grad_norm": 1.8134492969100848, "learning_rate": 1.3195171572140926e-06, "loss": 0.5803, "step": 10857 }, { "epoch": 0.77, "grad_norm": 1.873054262760071, "learning_rate": 1.3187394102757567e-06, "loss": 0.5315, "step": 10858 }, { "epoch": 0.77, "grad_norm": 3.608522399650705, "learning_rate": 1.3179618577940796e-06, "loss": 0.4858, "step": 10859 }, { "epoch": 0.77, "grad_norm": 0.7345848305050079, "learning_rate": 1.3171844998101329e-06, "loss": 0.4109, "step": 10860 }, { "epoch": 0.77, "grad_norm": 1.9721112041195157, "learning_rate": 1.316407336364981e-06, "loss": 0.4802, "step": 10861 }, { "epoch": 0.77, "grad_norm": 1.6165099301235486, "learning_rate": 1.3156303674996763e-06, "loss": 0.5166, "step": 10862 }, { "epoch": 0.77, "grad_norm": 1.7498451984947772, "learning_rate": 1.314853593255258e-06, "loss": 0.5243, "step": 10863 }, { "epoch": 0.77, "grad_norm": 1.759533510534774, "learning_rate": 1.3140770136727604e-06, "loss": 0.4907, "step": 10864 }, { "epoch": 0.77, "grad_norm": 1.76117665340654, "learning_rate": 1.3133006287932048e-06, "loss": 0.5611, "step": 10865 }, { "epoch": 0.77, "grad_norm": 2.6206652410470435, "learning_rate": 1.312524438657603e-06, "loss": 0.4935, "step": 10866 }, { "epoch": 0.77, "grad_norm": 1.6213199008454326, "learning_rate": 1.3117484433069528e-06, "loss": 0.5158, "step": 10867 }, { "epoch": 0.77, "grad_norm": 1.6494584252806501, "learning_rate": 1.31097264278225e-06, "loss": 0.5687, "step": 10868 }, { "epoch": 0.77, "grad_norm": 1.545792484611017, "learning_rate": 1.310197037124471e-06, "loss": 0.515, "step": 10869 }, { "epoch": 0.77, "grad_norm": 1.5150031808353726, "learning_rate": 1.3094216263745874e-06, "loss": 0.4934, "step": 10870 }, { "epoch": 0.77, "grad_norm": 1.6938887840230332, "learning_rate": 1.3086464105735585e-06, "loss": 0.5022, "step": 10871 }, { "epoch": 0.77, "grad_norm": 1.6672224122830244, "learning_rate": 1.3078713897623346e-06, "loss": 0.533, "step": 10872 }, { "epoch": 0.77, "grad_norm": 1.5924839949429324, "learning_rate": 1.307096563981856e-06, "loss": 0.5183, "step": 10873 }, { "epoch": 0.77, "grad_norm": 1.6076138539633134, "learning_rate": 1.3063219332730486e-06, "loss": 0.5678, "step": 10874 }, { "epoch": 0.77, "grad_norm": 1.6844381393107593, "learning_rate": 1.3055474976768323e-06, "loss": 0.5204, "step": 10875 }, { "epoch": 0.77, "grad_norm": 1.8786530261159826, "learning_rate": 1.304773257234116e-06, "loss": 0.5142, "step": 10876 }, { "epoch": 0.77, "grad_norm": 1.780473282005095, "learning_rate": 1.3039992119857974e-06, "loss": 0.5269, "step": 10877 }, { "epoch": 0.77, "grad_norm": 1.628889769970003, "learning_rate": 1.3032253619727653e-06, "loss": 0.5852, "step": 10878 }, { "epoch": 0.77, "grad_norm": 0.6635681414229859, "learning_rate": 1.3024517072358932e-06, "loss": 0.4132, "step": 10879 }, { "epoch": 0.77, "grad_norm": 1.8671274388850376, "learning_rate": 1.3016782478160546e-06, "loss": 0.5643, "step": 10880 }, { "epoch": 0.77, "grad_norm": 1.4677589370330062, "learning_rate": 1.3009049837541e-06, "loss": 0.4643, "step": 10881 }, { "epoch": 0.77, "grad_norm": 1.6096491344110326, "learning_rate": 1.3001319150908787e-06, "loss": 0.4846, "step": 10882 }, { "epoch": 0.77, "grad_norm": 1.5987129552508992, "learning_rate": 1.299359041867227e-06, "loss": 0.5089, "step": 10883 }, { "epoch": 0.77, "grad_norm": 1.4806032435840462, "learning_rate": 1.2985863641239699e-06, "loss": 0.4941, "step": 10884 }, { "epoch": 0.77, "grad_norm": 2.733318059857817, "learning_rate": 1.2978138819019242e-06, "loss": 0.5224, "step": 10885 }, { "epoch": 0.77, "grad_norm": 1.589178650142607, "learning_rate": 1.2970415952418913e-06, "loss": 0.5277, "step": 10886 }, { "epoch": 0.77, "grad_norm": 1.5974596521741362, "learning_rate": 1.2962695041846717e-06, "loss": 0.4663, "step": 10887 }, { "epoch": 0.77, "grad_norm": 1.5829995520229314, "learning_rate": 1.2954976087710458e-06, "loss": 0.5461, "step": 10888 }, { "epoch": 0.77, "grad_norm": 1.6826849810476183, "learning_rate": 1.2947259090417879e-06, "loss": 0.4897, "step": 10889 }, { "epoch": 0.77, "grad_norm": 1.5241364430488633, "learning_rate": 1.2939544050376634e-06, "loss": 0.4947, "step": 10890 }, { "epoch": 0.77, "grad_norm": 1.6331117431285345, "learning_rate": 1.2931830967994247e-06, "loss": 0.5327, "step": 10891 }, { "epoch": 0.77, "grad_norm": 2.213586049037629, "learning_rate": 1.2924119843678168e-06, "loss": 0.503, "step": 10892 }, { "epoch": 0.77, "grad_norm": 1.6095104738278714, "learning_rate": 1.2916410677835695e-06, "loss": 0.5569, "step": 10893 }, { "epoch": 0.77, "grad_norm": 1.764558426687655, "learning_rate": 1.2908703470874063e-06, "loss": 0.5549, "step": 10894 }, { "epoch": 0.77, "grad_norm": 1.7652998150375065, "learning_rate": 1.29009982232004e-06, "loss": 0.5218, "step": 10895 }, { "epoch": 0.77, "grad_norm": 1.9896737676938796, "learning_rate": 1.2893294935221734e-06, "loss": 0.4888, "step": 10896 }, { "epoch": 0.77, "grad_norm": 1.73947015601203, "learning_rate": 1.2885593607344932e-06, "loss": 0.4929, "step": 10897 }, { "epoch": 0.77, "grad_norm": 2.1017269643989707, "learning_rate": 1.2877894239976852e-06, "loss": 0.5537, "step": 10898 }, { "epoch": 0.77, "grad_norm": 1.9744171509484898, "learning_rate": 1.2870196833524202e-06, "loss": 0.5063, "step": 10899 }, { "epoch": 0.77, "grad_norm": 1.6034252855920001, "learning_rate": 1.286250138839355e-06, "loss": 0.5488, "step": 10900 }, { "epoch": 0.77, "grad_norm": 1.4709446157678105, "learning_rate": 1.2854807904991413e-06, "loss": 0.4567, "step": 10901 }, { "epoch": 0.77, "grad_norm": 1.9340872316312068, "learning_rate": 1.284711638372419e-06, "loss": 0.5135, "step": 10902 }, { "epoch": 0.77, "grad_norm": 1.4863720974327204, "learning_rate": 1.2839426824998186e-06, "loss": 0.5075, "step": 10903 }, { "epoch": 0.77, "grad_norm": 1.661969280381403, "learning_rate": 1.2831739229219559e-06, "loss": 0.5403, "step": 10904 }, { "epoch": 0.77, "grad_norm": 0.7470434211525666, "learning_rate": 1.2824053596794412e-06, "loss": 0.4078, "step": 10905 }, { "epoch": 0.77, "grad_norm": 1.7095089273979542, "learning_rate": 1.281636992812872e-06, "loss": 0.551, "step": 10906 }, { "epoch": 0.77, "grad_norm": 1.8277512043405852, "learning_rate": 1.2808688223628362e-06, "loss": 0.5072, "step": 10907 }, { "epoch": 0.77, "grad_norm": 1.5660011362295392, "learning_rate": 1.280100848369913e-06, "loss": 0.4696, "step": 10908 }, { "epoch": 0.77, "grad_norm": 1.7260660750113028, "learning_rate": 1.279333070874665e-06, "loss": 0.5206, "step": 10909 }, { "epoch": 0.77, "grad_norm": 1.8580107217008046, "learning_rate": 1.278565489917654e-06, "loss": 0.5062, "step": 10910 }, { "epoch": 0.77, "grad_norm": 2.090537444035423, "learning_rate": 1.2777981055394228e-06, "loss": 0.5537, "step": 10911 }, { "epoch": 0.77, "grad_norm": 1.8390883058838106, "learning_rate": 1.2770309177805079e-06, "loss": 0.4766, "step": 10912 }, { "epoch": 0.77, "grad_norm": 1.5258573207786446, "learning_rate": 1.2762639266814352e-06, "loss": 0.4744, "step": 10913 }, { "epoch": 0.77, "grad_norm": 2.118609343832253, "learning_rate": 1.2754971322827198e-06, "loss": 0.5708, "step": 10914 }, { "epoch": 0.77, "grad_norm": 1.637177550612563, "learning_rate": 1.2747305346248673e-06, "loss": 0.5297, "step": 10915 }, { "epoch": 0.77, "grad_norm": 1.565659377173418, "learning_rate": 1.2739641337483682e-06, "loss": 0.4914, "step": 10916 }, { "epoch": 0.77, "grad_norm": 0.7300313374075204, "learning_rate": 1.2731979296937119e-06, "loss": 0.4321, "step": 10917 }, { "epoch": 0.77, "grad_norm": 1.7013292738967674, "learning_rate": 1.2724319225013675e-06, "loss": 0.5365, "step": 10918 }, { "epoch": 0.77, "grad_norm": 1.6586119382020115, "learning_rate": 1.2716661122118e-06, "loss": 0.53, "step": 10919 }, { "epoch": 0.77, "grad_norm": 0.720656146042525, "learning_rate": 1.2709004988654617e-06, "loss": 0.4217, "step": 10920 }, { "epoch": 0.77, "grad_norm": 1.9288341502543607, "learning_rate": 1.2701350825027946e-06, "loss": 0.4858, "step": 10921 }, { "epoch": 0.78, "grad_norm": 1.9361374663790252, "learning_rate": 1.2693698631642333e-06, "loss": 0.634, "step": 10922 }, { "epoch": 0.78, "grad_norm": 2.3860815941462477, "learning_rate": 1.2686048408901946e-06, "loss": 0.5376, "step": 10923 }, { "epoch": 0.78, "grad_norm": 1.6787751125163501, "learning_rate": 1.2678400157210924e-06, "loss": 0.517, "step": 10924 }, { "epoch": 0.78, "grad_norm": 1.7674574770848104, "learning_rate": 1.2670753876973263e-06, "loss": 0.5089, "step": 10925 }, { "epoch": 0.78, "grad_norm": 2.2337662604144652, "learning_rate": 1.2663109568592874e-06, "loss": 0.4933, "step": 10926 }, { "epoch": 0.78, "grad_norm": 1.6985228572491242, "learning_rate": 1.2655467232473567e-06, "loss": 0.4558, "step": 10927 }, { "epoch": 0.78, "grad_norm": 1.5703144330172456, "learning_rate": 1.2647826869018991e-06, "loss": 0.4567, "step": 10928 }, { "epoch": 0.78, "grad_norm": 1.8103053629105468, "learning_rate": 1.2640188478632798e-06, "loss": 0.5373, "step": 10929 }, { "epoch": 0.78, "grad_norm": 1.5867033004971172, "learning_rate": 1.2632552061718428e-06, "loss": 0.5389, "step": 10930 }, { "epoch": 0.78, "grad_norm": 1.670952411694668, "learning_rate": 1.2624917618679272e-06, "loss": 0.5978, "step": 10931 }, { "epoch": 0.78, "grad_norm": 1.7404344452819018, "learning_rate": 1.2617285149918612e-06, "loss": 0.5095, "step": 10932 }, { "epoch": 0.78, "grad_norm": 2.0390939570436766, "learning_rate": 1.2609654655839636e-06, "loss": 0.5067, "step": 10933 }, { "epoch": 0.78, "grad_norm": 1.7684965824315106, "learning_rate": 1.2602026136845386e-06, "loss": 0.5585, "step": 10934 }, { "epoch": 0.78, "grad_norm": 1.7626715308845304, "learning_rate": 1.259439959333882e-06, "loss": 0.4445, "step": 10935 }, { "epoch": 0.78, "grad_norm": 1.5751333808817984, "learning_rate": 1.258677502572284e-06, "loss": 0.4912, "step": 10936 }, { "epoch": 0.78, "grad_norm": 0.7485465937847902, "learning_rate": 1.2579152434400167e-06, "loss": 0.4224, "step": 10937 }, { "epoch": 0.78, "grad_norm": 1.5862343480983825, "learning_rate": 1.2571531819773474e-06, "loss": 0.4975, "step": 10938 }, { "epoch": 0.78, "grad_norm": 1.8662947066915405, "learning_rate": 1.2563913182245268e-06, "loss": 0.4962, "step": 10939 }, { "epoch": 0.78, "grad_norm": 1.6056648932982254, "learning_rate": 1.2556296522218048e-06, "loss": 0.5353, "step": 10940 }, { "epoch": 0.78, "grad_norm": 1.957946890623682, "learning_rate": 1.2548681840094102e-06, "loss": 0.5486, "step": 10941 }, { "epoch": 0.78, "grad_norm": 1.778359055854306, "learning_rate": 1.254106913627569e-06, "loss": 0.4731, "step": 10942 }, { "epoch": 0.78, "grad_norm": 1.6516141999403011, "learning_rate": 1.2533458411164927e-06, "loss": 0.484, "step": 10943 }, { "epoch": 0.78, "grad_norm": 2.385155025537929, "learning_rate": 1.2525849665163853e-06, "loss": 0.5328, "step": 10944 }, { "epoch": 0.78, "grad_norm": 1.5913787534575845, "learning_rate": 1.2518242898674388e-06, "loss": 0.5638, "step": 10945 }, { "epoch": 0.78, "grad_norm": 1.6377100515917407, "learning_rate": 1.2510638112098318e-06, "loss": 0.4728, "step": 10946 }, { "epoch": 0.78, "grad_norm": 1.8594719833180613, "learning_rate": 1.2503035305837391e-06, "loss": 0.5721, "step": 10947 }, { "epoch": 0.78, "grad_norm": 1.817810557259487, "learning_rate": 1.249543448029319e-06, "loss": 0.5381, "step": 10948 }, { "epoch": 0.78, "grad_norm": 1.7363955972245342, "learning_rate": 1.2487835635867224e-06, "loss": 0.4544, "step": 10949 }, { "epoch": 0.78, "grad_norm": 1.4928895333967576, "learning_rate": 1.2480238772960885e-06, "loss": 0.4756, "step": 10950 }, { "epoch": 0.78, "grad_norm": 1.8761309484910431, "learning_rate": 1.2472643891975467e-06, "loss": 0.5422, "step": 10951 }, { "epoch": 0.78, "grad_norm": 1.6560713003289882, "learning_rate": 1.2465050993312172e-06, "loss": 0.5015, "step": 10952 }, { "epoch": 0.78, "grad_norm": 2.264307497425222, "learning_rate": 1.2457460077372057e-06, "loss": 0.561, "step": 10953 }, { "epoch": 0.78, "grad_norm": 1.639747596662213, "learning_rate": 1.2449871144556114e-06, "loss": 0.4399, "step": 10954 }, { "epoch": 0.78, "grad_norm": 1.964402197714277, "learning_rate": 1.2442284195265213e-06, "loss": 0.5528, "step": 10955 }, { "epoch": 0.78, "grad_norm": 1.9106746159107661, "learning_rate": 1.243469922990012e-06, "loss": 0.5506, "step": 10956 }, { "epoch": 0.78, "grad_norm": 1.860330424706358, "learning_rate": 1.2427116248861521e-06, "loss": 0.5317, "step": 10957 }, { "epoch": 0.78, "grad_norm": 1.6511085444033526, "learning_rate": 1.2419535252549925e-06, "loss": 0.5474, "step": 10958 }, { "epoch": 0.78, "grad_norm": 1.6171166731318076, "learning_rate": 1.2411956241365846e-06, "loss": 0.4332, "step": 10959 }, { "epoch": 0.78, "grad_norm": 1.9527818379880324, "learning_rate": 1.2404379215709595e-06, "loss": 0.5211, "step": 10960 }, { "epoch": 0.78, "grad_norm": 1.7909853473487907, "learning_rate": 1.239680417598142e-06, "loss": 0.5122, "step": 10961 }, { "epoch": 0.78, "grad_norm": 1.6705765684652034, "learning_rate": 1.238923112258147e-06, "loss": 0.5577, "step": 10962 }, { "epoch": 0.78, "grad_norm": 1.59827989968891, "learning_rate": 1.2381660055909767e-06, "loss": 0.5347, "step": 10963 }, { "epoch": 0.78, "grad_norm": 1.6932388352388064, "learning_rate": 1.2374090976366264e-06, "loss": 0.4934, "step": 10964 }, { "epoch": 0.78, "grad_norm": 1.5892702090688726, "learning_rate": 1.2366523884350745e-06, "loss": 0.5346, "step": 10965 }, { "epoch": 0.78, "grad_norm": 1.6500639183074481, "learning_rate": 1.235895878026298e-06, "loss": 0.5109, "step": 10966 }, { "epoch": 0.78, "grad_norm": 1.7288294292301885, "learning_rate": 1.2351395664502548e-06, "loss": 0.4571, "step": 10967 }, { "epoch": 0.78, "grad_norm": 0.6763036019456693, "learning_rate": 1.234383453746898e-06, "loss": 0.416, "step": 10968 }, { "epoch": 0.78, "grad_norm": 1.6328431926850808, "learning_rate": 1.2336275399561636e-06, "loss": 0.5477, "step": 10969 }, { "epoch": 0.78, "grad_norm": 1.9745137245006725, "learning_rate": 1.2328718251179877e-06, "loss": 0.5475, "step": 10970 }, { "epoch": 0.78, "grad_norm": 2.472301979203551, "learning_rate": 1.2321163092722855e-06, "loss": 0.5307, "step": 10971 }, { "epoch": 0.78, "grad_norm": 1.644615842700015, "learning_rate": 1.2313609924589675e-06, "loss": 0.4648, "step": 10972 }, { "epoch": 0.78, "grad_norm": 1.807021630265791, "learning_rate": 1.2306058747179312e-06, "loss": 0.4956, "step": 10973 }, { "epoch": 0.78, "grad_norm": 1.7424712562772797, "learning_rate": 1.2298509560890653e-06, "loss": 0.468, "step": 10974 }, { "epoch": 0.78, "grad_norm": 1.8247576444375855, "learning_rate": 1.2290962366122482e-06, "loss": 0.5743, "step": 10975 }, { "epoch": 0.78, "grad_norm": 1.6459636194229441, "learning_rate": 1.2283417163273425e-06, "loss": 0.5103, "step": 10976 }, { "epoch": 0.78, "grad_norm": 1.6763732738088475, "learning_rate": 1.2275873952742106e-06, "loss": 0.5284, "step": 10977 }, { "epoch": 0.78, "grad_norm": 1.9541266323491837, "learning_rate": 1.226833273492694e-06, "loss": 0.5853, "step": 10978 }, { "epoch": 0.78, "grad_norm": 2.071068654143779, "learning_rate": 1.2260793510226289e-06, "loss": 0.6004, "step": 10979 }, { "epoch": 0.78, "grad_norm": 1.9772278175674907, "learning_rate": 1.22532562790384e-06, "loss": 0.4984, "step": 10980 }, { "epoch": 0.78, "grad_norm": 1.604690224230873, "learning_rate": 1.2245721041761417e-06, "loss": 0.5191, "step": 10981 }, { "epoch": 0.78, "grad_norm": 1.9135204332283975, "learning_rate": 1.2238187798793393e-06, "loss": 0.4895, "step": 10982 }, { "epoch": 0.78, "grad_norm": 1.5643918593884518, "learning_rate": 1.2230656550532233e-06, "loss": 0.5268, "step": 10983 }, { "epoch": 0.78, "grad_norm": 2.3586472881216167, "learning_rate": 1.222312729737577e-06, "loss": 0.5818, "step": 10984 }, { "epoch": 0.78, "grad_norm": 2.018657263132681, "learning_rate": 1.2215600039721731e-06, "loss": 0.5103, "step": 10985 }, { "epoch": 0.78, "grad_norm": 2.3797141593182243, "learning_rate": 1.2208074777967731e-06, "loss": 0.5932, "step": 10986 }, { "epoch": 0.78, "grad_norm": 4.498677460649487, "learning_rate": 1.2200551512511288e-06, "loss": 0.4832, "step": 10987 }, { "epoch": 0.78, "grad_norm": 1.708014649105612, "learning_rate": 1.2193030243749771e-06, "loss": 0.4929, "step": 10988 }, { "epoch": 0.78, "grad_norm": 2.2853829723086583, "learning_rate": 1.2185510972080527e-06, "loss": 0.5301, "step": 10989 }, { "epoch": 0.78, "grad_norm": 1.6260146990716302, "learning_rate": 1.217799369790072e-06, "loss": 0.5284, "step": 10990 }, { "epoch": 0.78, "grad_norm": 0.7216031665940201, "learning_rate": 1.217047842160744e-06, "loss": 0.4344, "step": 10991 }, { "epoch": 0.78, "grad_norm": 1.8084276467837974, "learning_rate": 1.216296514359767e-06, "loss": 0.5077, "step": 10992 }, { "epoch": 0.78, "grad_norm": 1.7280216784861346, "learning_rate": 1.2155453864268296e-06, "loss": 0.5521, "step": 10993 }, { "epoch": 0.78, "grad_norm": 1.7761296895490348, "learning_rate": 1.2147944584016097e-06, "loss": 0.5619, "step": 10994 }, { "epoch": 0.78, "grad_norm": 1.6934806052539673, "learning_rate": 1.2140437303237696e-06, "loss": 0.5786, "step": 10995 }, { "epoch": 0.78, "grad_norm": 0.7103871468257379, "learning_rate": 1.2132932022329707e-06, "loss": 0.4186, "step": 10996 }, { "epoch": 0.78, "grad_norm": 1.8452055767860907, "learning_rate": 1.2125428741688549e-06, "loss": 0.5208, "step": 10997 }, { "epoch": 0.78, "grad_norm": 1.921722611938055, "learning_rate": 1.21179274617106e-06, "loss": 0.4652, "step": 10998 }, { "epoch": 0.78, "grad_norm": 1.9065178563011886, "learning_rate": 1.211042818279205e-06, "loss": 0.5283, "step": 10999 }, { "epoch": 0.78, "grad_norm": 1.8959396608879782, "learning_rate": 1.2102930905329101e-06, "loss": 0.5924, "step": 11000 }, { "epoch": 0.78, "grad_norm": 1.701487420256794, "learning_rate": 1.2095435629717739e-06, "loss": 0.5692, "step": 11001 }, { "epoch": 0.78, "grad_norm": 1.5104060329745876, "learning_rate": 1.208794235635391e-06, "loss": 0.4782, "step": 11002 }, { "epoch": 0.78, "grad_norm": 1.4737803083244958, "learning_rate": 1.2080451085633426e-06, "loss": 0.4538, "step": 11003 }, { "epoch": 0.78, "grad_norm": 1.7539798377374956, "learning_rate": 1.2072961817952005e-06, "loss": 0.4488, "step": 11004 }, { "epoch": 0.78, "grad_norm": 1.9284126154254553, "learning_rate": 1.206547455370527e-06, "loss": 0.5156, "step": 11005 }, { "epoch": 0.78, "grad_norm": 1.8075635304328046, "learning_rate": 1.205798929328869e-06, "loss": 0.5144, "step": 11006 }, { "epoch": 0.78, "grad_norm": 1.824095700367255, "learning_rate": 1.2050506037097687e-06, "loss": 0.5491, "step": 11007 }, { "epoch": 0.78, "grad_norm": 1.953721775757005, "learning_rate": 1.2043024785527542e-06, "loss": 0.5383, "step": 11008 }, { "epoch": 0.78, "grad_norm": 0.6471240243110402, "learning_rate": 1.2035545538973442e-06, "loss": 0.4208, "step": 11009 }, { "epoch": 0.78, "grad_norm": 1.4729591148435044, "learning_rate": 1.202806829783047e-06, "loss": 0.447, "step": 11010 }, { "epoch": 0.78, "grad_norm": 1.8555601159808282, "learning_rate": 1.2020593062493596e-06, "loss": 0.5291, "step": 11011 }, { "epoch": 0.78, "grad_norm": 2.1363934261708106, "learning_rate": 1.2013119833357706e-06, "loss": 0.4998, "step": 11012 }, { "epoch": 0.78, "grad_norm": 0.7025609295265299, "learning_rate": 1.2005648610817528e-06, "loss": 0.4191, "step": 11013 }, { "epoch": 0.78, "grad_norm": 1.527027904313943, "learning_rate": 1.1998179395267729e-06, "loss": 0.481, "step": 11014 }, { "epoch": 0.78, "grad_norm": 1.6594806284918397, "learning_rate": 1.1990712187102865e-06, "loss": 0.48, "step": 11015 }, { "epoch": 0.78, "grad_norm": 1.793571414082666, "learning_rate": 1.1983246986717378e-06, "loss": 0.4762, "step": 11016 }, { "epoch": 0.78, "grad_norm": 2.3243059541804163, "learning_rate": 1.1975783794505614e-06, "loss": 0.4944, "step": 11017 }, { "epoch": 0.78, "grad_norm": 1.4591267966253827, "learning_rate": 1.1968322610861767e-06, "loss": 0.5156, "step": 11018 }, { "epoch": 0.78, "grad_norm": 1.793487527401563, "learning_rate": 1.1960863436180016e-06, "loss": 0.5755, "step": 11019 }, { "epoch": 0.78, "grad_norm": 0.7824704432162839, "learning_rate": 1.195340627085434e-06, "loss": 0.4163, "step": 11020 }, { "epoch": 0.78, "grad_norm": 2.282166863818371, "learning_rate": 1.1945951115278664e-06, "loss": 0.5762, "step": 11021 }, { "epoch": 0.78, "grad_norm": 1.690058224417182, "learning_rate": 1.1938497969846795e-06, "loss": 0.5664, "step": 11022 }, { "epoch": 0.78, "grad_norm": 1.6081645291220275, "learning_rate": 1.193104683495243e-06, "loss": 0.5357, "step": 11023 }, { "epoch": 0.78, "grad_norm": 1.7060383066416038, "learning_rate": 1.1923597710989183e-06, "loss": 0.547, "step": 11024 }, { "epoch": 0.78, "grad_norm": 2.5144036571345776, "learning_rate": 1.1916150598350496e-06, "loss": 0.5269, "step": 11025 }, { "epoch": 0.78, "grad_norm": 1.6275865287745257, "learning_rate": 1.1908705497429812e-06, "loss": 0.4949, "step": 11026 }, { "epoch": 0.78, "grad_norm": 1.5939634210379698, "learning_rate": 1.1901262408620357e-06, "loss": 0.4834, "step": 11027 }, { "epoch": 0.78, "grad_norm": 1.6486298453755994, "learning_rate": 1.1893821332315336e-06, "loss": 0.5233, "step": 11028 }, { "epoch": 0.78, "grad_norm": 1.6391315217765428, "learning_rate": 1.188638226890776e-06, "loss": 0.5493, "step": 11029 }, { "epoch": 0.78, "grad_norm": 0.6883404143738218, "learning_rate": 1.1878945218790633e-06, "loss": 0.4311, "step": 11030 }, { "epoch": 0.78, "grad_norm": 1.6802829877219045, "learning_rate": 1.187151018235681e-06, "loss": 0.5959, "step": 11031 }, { "epoch": 0.78, "grad_norm": 1.899265892332404, "learning_rate": 1.1864077159998999e-06, "loss": 0.5883, "step": 11032 }, { "epoch": 0.78, "grad_norm": 1.5415573676291532, "learning_rate": 1.1856646152109857e-06, "loss": 0.5274, "step": 11033 }, { "epoch": 0.78, "grad_norm": 1.6883672803281489, "learning_rate": 1.184921715908191e-06, "loss": 0.484, "step": 11034 }, { "epoch": 0.78, "grad_norm": 1.8394521423042018, "learning_rate": 1.18417901813076e-06, "loss": 0.5057, "step": 11035 }, { "epoch": 0.78, "grad_norm": 2.026076084241232, "learning_rate": 1.1834365219179218e-06, "loss": 0.5591, "step": 11036 }, { "epoch": 0.78, "grad_norm": 1.5282277016145205, "learning_rate": 1.1826942273088981e-06, "loss": 0.4942, "step": 11037 }, { "epoch": 0.78, "grad_norm": 1.8278155242431713, "learning_rate": 1.1819521343429008e-06, "loss": 0.4886, "step": 11038 }, { "epoch": 0.78, "grad_norm": 1.5119151294561946, "learning_rate": 1.1812102430591288e-06, "loss": 0.5456, "step": 11039 }, { "epoch": 0.78, "grad_norm": 1.6578488826590867, "learning_rate": 1.1804685534967735e-06, "loss": 0.5059, "step": 11040 }, { "epoch": 0.78, "grad_norm": 0.6851913239115897, "learning_rate": 1.1797270656950083e-06, "loss": 0.3928, "step": 11041 }, { "epoch": 0.78, "grad_norm": 1.7771646111658195, "learning_rate": 1.178985779693007e-06, "loss": 0.4892, "step": 11042 }, { "epoch": 0.78, "grad_norm": 2.277006845806584, "learning_rate": 1.1782446955299231e-06, "loss": 0.5242, "step": 11043 }, { "epoch": 0.78, "grad_norm": 2.228147404969636, "learning_rate": 1.177503813244904e-06, "loss": 0.5196, "step": 11044 }, { "epoch": 0.78, "grad_norm": 4.294776651948943, "learning_rate": 1.1767631328770863e-06, "loss": 0.5653, "step": 11045 }, { "epoch": 0.78, "grad_norm": 0.7817257516735312, "learning_rate": 1.176022654465594e-06, "loss": 0.4043, "step": 11046 }, { "epoch": 0.78, "grad_norm": 1.7191469025325101, "learning_rate": 1.1752823780495449e-06, "loss": 0.4797, "step": 11047 }, { "epoch": 0.78, "grad_norm": 1.482447481265252, "learning_rate": 1.174542303668037e-06, "loss": 0.4362, "step": 11048 }, { "epoch": 0.78, "grad_norm": 1.629180682672729, "learning_rate": 1.1738024313601698e-06, "loss": 0.4799, "step": 11049 }, { "epoch": 0.78, "grad_norm": 1.6457025525595588, "learning_rate": 1.1730627611650219e-06, "loss": 0.5115, "step": 11050 }, { "epoch": 0.78, "grad_norm": 7.156821153458225, "learning_rate": 1.1723232931216666e-06, "loss": 0.5335, "step": 11051 }, { "epoch": 0.78, "grad_norm": 1.647359602191678, "learning_rate": 1.1715840272691647e-06, "loss": 0.5026, "step": 11052 }, { "epoch": 0.78, "grad_norm": 1.6032113622712225, "learning_rate": 1.170844963646567e-06, "loss": 0.4857, "step": 11053 }, { "epoch": 0.78, "grad_norm": 1.854832090505108, "learning_rate": 1.1701061022929145e-06, "loss": 0.5212, "step": 11054 }, { "epoch": 0.78, "grad_norm": 1.8580915641460534, "learning_rate": 1.169367443247234e-06, "loss": 0.4424, "step": 11055 }, { "epoch": 0.78, "grad_norm": 0.7374975733355197, "learning_rate": 1.1686289865485451e-06, "loss": 0.4272, "step": 11056 }, { "epoch": 0.78, "grad_norm": 1.849392665004187, "learning_rate": 1.1678907322358552e-06, "loss": 0.543, "step": 11057 }, { "epoch": 0.78, "grad_norm": 1.6697567788169236, "learning_rate": 1.167152680348162e-06, "loss": 0.5214, "step": 11058 }, { "epoch": 0.78, "grad_norm": 1.5385731450986397, "learning_rate": 1.1664148309244515e-06, "loss": 0.4641, "step": 11059 }, { "epoch": 0.78, "grad_norm": 1.8216632432992292, "learning_rate": 1.1656771840037002e-06, "loss": 0.5538, "step": 11060 }, { "epoch": 0.78, "grad_norm": 1.6809404848911063, "learning_rate": 1.1649397396248735e-06, "loss": 0.5709, "step": 11061 }, { "epoch": 0.78, "grad_norm": 1.864258149103781, "learning_rate": 1.1642024978269234e-06, "loss": 0.5259, "step": 11062 }, { "epoch": 0.79, "grad_norm": 2.424047123756508, "learning_rate": 1.1634654586487949e-06, "loss": 0.5322, "step": 11063 }, { "epoch": 0.79, "grad_norm": 1.5703400590444618, "learning_rate": 1.1627286221294205e-06, "loss": 0.5264, "step": 11064 }, { "epoch": 0.79, "grad_norm": 1.7229052354129164, "learning_rate": 1.1619919883077241e-06, "loss": 0.5342, "step": 11065 }, { "epoch": 0.79, "grad_norm": 0.6771223248527087, "learning_rate": 1.1612555572226147e-06, "loss": 0.4179, "step": 11066 }, { "epoch": 0.79, "grad_norm": 0.7724598926354093, "learning_rate": 1.1605193289129928e-06, "loss": 0.4029, "step": 11067 }, { "epoch": 0.79, "grad_norm": 1.818533303875245, "learning_rate": 1.1597833034177524e-06, "loss": 0.5794, "step": 11068 }, { "epoch": 0.79, "grad_norm": 1.7394223814700076, "learning_rate": 1.159047480775769e-06, "loss": 0.512, "step": 11069 }, { "epoch": 0.79, "grad_norm": 0.6692522679870658, "learning_rate": 1.1583118610259143e-06, "loss": 0.4036, "step": 11070 }, { "epoch": 0.79, "grad_norm": 1.7233215265793131, "learning_rate": 1.1575764442070414e-06, "loss": 0.4969, "step": 11071 }, { "epoch": 0.79, "grad_norm": 1.8570213267342284, "learning_rate": 1.156841230358004e-06, "loss": 0.5437, "step": 11072 }, { "epoch": 0.79, "grad_norm": 1.6494940728613363, "learning_rate": 1.156106219517633e-06, "loss": 0.487, "step": 11073 }, { "epoch": 0.79, "grad_norm": 0.7159297859726088, "learning_rate": 1.1553714117247571e-06, "loss": 0.432, "step": 11074 }, { "epoch": 0.79, "grad_norm": 1.5863115733199185, "learning_rate": 1.1546368070181902e-06, "loss": 0.5138, "step": 11075 }, { "epoch": 0.79, "grad_norm": 2.9631153218443793, "learning_rate": 1.153902405436737e-06, "loss": 0.5429, "step": 11076 }, { "epoch": 0.79, "grad_norm": 0.6628920954687847, "learning_rate": 1.1531682070191925e-06, "loss": 0.4462, "step": 11077 }, { "epoch": 0.79, "grad_norm": 1.7115840402996398, "learning_rate": 1.1524342118043358e-06, "loss": 0.4864, "step": 11078 }, { "epoch": 0.79, "grad_norm": 2.814729795146018, "learning_rate": 1.1517004198309434e-06, "loss": 0.5358, "step": 11079 }, { "epoch": 0.79, "grad_norm": 1.8379887977149012, "learning_rate": 1.1509668311377736e-06, "loss": 0.4652, "step": 11080 }, { "epoch": 0.79, "grad_norm": 1.8053041270071903, "learning_rate": 1.1502334457635783e-06, "loss": 0.5184, "step": 11081 }, { "epoch": 0.79, "grad_norm": 1.6243944752636452, "learning_rate": 1.149500263747097e-06, "loss": 0.5039, "step": 11082 }, { "epoch": 0.79, "grad_norm": 1.8840037575938509, "learning_rate": 1.1487672851270593e-06, "loss": 0.4768, "step": 11083 }, { "epoch": 0.79, "grad_norm": 1.612842232359914, "learning_rate": 1.1480345099421846e-06, "loss": 0.5254, "step": 11084 }, { "epoch": 0.79, "grad_norm": 1.8533041491280224, "learning_rate": 1.1473019382311778e-06, "loss": 0.4788, "step": 11085 }, { "epoch": 0.79, "grad_norm": 1.8123912644057865, "learning_rate": 1.1465695700327373e-06, "loss": 0.497, "step": 11086 }, { "epoch": 0.79, "grad_norm": 1.7963399967020914, "learning_rate": 1.1458374053855492e-06, "loss": 0.5186, "step": 11087 }, { "epoch": 0.79, "grad_norm": 1.7671215129009903, "learning_rate": 1.1451054443282893e-06, "loss": 0.5183, "step": 11088 }, { "epoch": 0.79, "grad_norm": 2.933085174237955, "learning_rate": 1.1443736868996219e-06, "loss": 0.5014, "step": 11089 }, { "epoch": 0.79, "grad_norm": 1.614274346166808, "learning_rate": 1.1436421331382015e-06, "loss": 0.5139, "step": 11090 }, { "epoch": 0.79, "grad_norm": 2.0392198378519173, "learning_rate": 1.1429107830826714e-06, "loss": 0.5965, "step": 11091 }, { "epoch": 0.79, "grad_norm": 1.6412227802051131, "learning_rate": 1.142179636771662e-06, "loss": 0.5207, "step": 11092 }, { "epoch": 0.79, "grad_norm": 0.666422957985759, "learning_rate": 1.1414486942437969e-06, "loss": 0.3986, "step": 11093 }, { "epoch": 0.79, "grad_norm": 1.690088482907427, "learning_rate": 1.1407179555376857e-06, "loss": 0.5033, "step": 11094 }, { "epoch": 0.79, "grad_norm": 1.8692701515284387, "learning_rate": 1.1399874206919293e-06, "loss": 0.5058, "step": 11095 }, { "epoch": 0.79, "grad_norm": 1.6785784592570467, "learning_rate": 1.1392570897451183e-06, "loss": 0.5233, "step": 11096 }, { "epoch": 0.79, "grad_norm": 1.694438462699581, "learning_rate": 1.138526962735827e-06, "loss": 0.5351, "step": 11097 }, { "epoch": 0.79, "grad_norm": 1.7479075398014026, "learning_rate": 1.137797039702629e-06, "loss": 0.4985, "step": 11098 }, { "epoch": 0.79, "grad_norm": 1.6139429629316708, "learning_rate": 1.1370673206840766e-06, "loss": 0.5422, "step": 11099 }, { "epoch": 0.79, "grad_norm": 1.9534962277113515, "learning_rate": 1.1363378057187192e-06, "loss": 0.5443, "step": 11100 }, { "epoch": 0.79, "grad_norm": 1.4813818950120183, "learning_rate": 1.135608494845088e-06, "loss": 0.5215, "step": 11101 }, { "epoch": 0.79, "grad_norm": 0.7134652909176317, "learning_rate": 1.1348793881017133e-06, "loss": 0.4475, "step": 11102 }, { "epoch": 0.79, "grad_norm": 1.5668747641577059, "learning_rate": 1.1341504855271045e-06, "loss": 0.4797, "step": 11103 }, { "epoch": 0.79, "grad_norm": 1.6709158855891055, "learning_rate": 1.1334217871597663e-06, "loss": 0.5399, "step": 11104 }, { "epoch": 0.79, "grad_norm": 1.6339004610879886, "learning_rate": 1.1326932930381918e-06, "loss": 0.4981, "step": 11105 }, { "epoch": 0.79, "grad_norm": 1.7922285646965694, "learning_rate": 1.131965003200861e-06, "loss": 0.5597, "step": 11106 }, { "epoch": 0.79, "grad_norm": 0.7017141293375915, "learning_rate": 1.131236917686247e-06, "loss": 0.3977, "step": 11107 }, { "epoch": 0.79, "grad_norm": 1.72860437953875, "learning_rate": 1.1305090365328053e-06, "loss": 0.5778, "step": 11108 }, { "epoch": 0.79, "grad_norm": 1.6632253233964331, "learning_rate": 1.1297813597789908e-06, "loss": 0.4844, "step": 11109 }, { "epoch": 0.79, "grad_norm": 1.7154549907553032, "learning_rate": 1.129053887463238e-06, "loss": 0.4994, "step": 11110 }, { "epoch": 0.79, "grad_norm": 4.46975914053688, "learning_rate": 1.128326619623975e-06, "loss": 0.4811, "step": 11111 }, { "epoch": 0.79, "grad_norm": 4.7999360228035135, "learning_rate": 1.127599556299619e-06, "loss": 0.4672, "step": 11112 }, { "epoch": 0.79, "grad_norm": 2.555470185326032, "learning_rate": 1.126872697528576e-06, "loss": 0.5114, "step": 11113 }, { "epoch": 0.79, "grad_norm": 1.858505766639672, "learning_rate": 1.1261460433492422e-06, "loss": 0.5446, "step": 11114 }, { "epoch": 0.79, "grad_norm": 0.7864236081883107, "learning_rate": 1.1254195937999996e-06, "loss": 0.4373, "step": 11115 }, { "epoch": 0.79, "grad_norm": 1.54696010889377, "learning_rate": 1.124693348919223e-06, "loss": 0.5077, "step": 11116 }, { "epoch": 0.79, "grad_norm": 0.7089584157336273, "learning_rate": 1.1239673087452752e-06, "loss": 0.4444, "step": 11117 }, { "epoch": 0.79, "grad_norm": 1.5338215593657034, "learning_rate": 1.1232414733165075e-06, "loss": 0.4368, "step": 11118 }, { "epoch": 0.79, "grad_norm": 1.858944271086086, "learning_rate": 1.122515842671263e-06, "loss": 0.5414, "step": 11119 }, { "epoch": 0.79, "grad_norm": 1.637832291844311, "learning_rate": 1.1217904168478677e-06, "loss": 0.4885, "step": 11120 }, { "epoch": 0.79, "grad_norm": 1.9301701873818475, "learning_rate": 1.1210651958846463e-06, "loss": 0.5327, "step": 11121 }, { "epoch": 0.79, "grad_norm": 0.8157902687369112, "learning_rate": 1.1203401798199038e-06, "loss": 0.4227, "step": 11122 }, { "epoch": 0.79, "grad_norm": 1.8289121508508992, "learning_rate": 1.1196153686919386e-06, "loss": 0.5462, "step": 11123 }, { "epoch": 0.79, "grad_norm": 1.5691796235958002, "learning_rate": 1.1188907625390388e-06, "loss": 0.4511, "step": 11124 }, { "epoch": 0.79, "grad_norm": 1.7414822917977066, "learning_rate": 1.1181663613994798e-06, "loss": 0.6033, "step": 11125 }, { "epoch": 0.79, "grad_norm": 0.6427160612525361, "learning_rate": 1.117442165311528e-06, "loss": 0.3947, "step": 11126 }, { "epoch": 0.79, "grad_norm": 0.7096478750145321, "learning_rate": 1.1167181743134347e-06, "loss": 0.4336, "step": 11127 }, { "epoch": 0.79, "grad_norm": 1.8109523665306329, "learning_rate": 1.1159943884434482e-06, "loss": 0.5539, "step": 11128 }, { "epoch": 0.79, "grad_norm": 2.034902072683474, "learning_rate": 1.1152708077397973e-06, "loss": 0.5384, "step": 11129 }, { "epoch": 0.79, "grad_norm": 1.8784719444462108, "learning_rate": 1.1145474322407075e-06, "loss": 0.5559, "step": 11130 }, { "epoch": 0.79, "grad_norm": 1.622403309252361, "learning_rate": 1.1138242619843847e-06, "loss": 0.5457, "step": 11131 }, { "epoch": 0.79, "grad_norm": 1.6999004716709418, "learning_rate": 1.113101297009036e-06, "loss": 0.4983, "step": 11132 }, { "epoch": 0.79, "grad_norm": 2.1081469057694524, "learning_rate": 1.1123785373528457e-06, "loss": 0.5144, "step": 11133 }, { "epoch": 0.79, "grad_norm": 1.6511637177414893, "learning_rate": 1.111655983053994e-06, "loss": 0.5262, "step": 11134 }, { "epoch": 0.79, "grad_norm": 1.5776658079743169, "learning_rate": 1.110933634150649e-06, "loss": 0.4773, "step": 11135 }, { "epoch": 0.79, "grad_norm": 1.930519945106859, "learning_rate": 1.110211490680967e-06, "loss": 0.5384, "step": 11136 }, { "epoch": 0.79, "grad_norm": 1.7659565725303588, "learning_rate": 1.1094895526830962e-06, "loss": 0.545, "step": 11137 }, { "epoch": 0.79, "grad_norm": 1.770996257828368, "learning_rate": 1.1087678201951674e-06, "loss": 0.4837, "step": 11138 }, { "epoch": 0.79, "grad_norm": 2.6687343167217743, "learning_rate": 1.1080462932553098e-06, "loss": 0.569, "step": 11139 }, { "epoch": 0.79, "grad_norm": 1.685860891824896, "learning_rate": 1.1073249719016344e-06, "loss": 0.4963, "step": 11140 }, { "epoch": 0.79, "grad_norm": 2.2301315632440697, "learning_rate": 1.106603856172243e-06, "loss": 0.6189, "step": 11141 }, { "epoch": 0.79, "grad_norm": 1.5866212007628233, "learning_rate": 1.1058829461052295e-06, "loss": 0.4692, "step": 11142 }, { "epoch": 0.79, "grad_norm": 2.5825050382108365, "learning_rate": 1.1051622417386732e-06, "loss": 0.5291, "step": 11143 }, { "epoch": 0.79, "grad_norm": 1.5796595338982982, "learning_rate": 1.1044417431106463e-06, "loss": 0.5407, "step": 11144 }, { "epoch": 0.79, "grad_norm": 1.686755937557189, "learning_rate": 1.1037214502592052e-06, "loss": 0.5172, "step": 11145 }, { "epoch": 0.79, "grad_norm": 0.6929221323398291, "learning_rate": 1.1030013632223995e-06, "loss": 0.4323, "step": 11146 }, { "epoch": 0.79, "grad_norm": 1.8879598052186182, "learning_rate": 1.1022814820382672e-06, "loss": 0.4793, "step": 11147 }, { "epoch": 0.79, "grad_norm": 1.8852614481421912, "learning_rate": 1.1015618067448337e-06, "loss": 0.4987, "step": 11148 }, { "epoch": 0.79, "grad_norm": 1.651180518198633, "learning_rate": 1.1008423373801163e-06, "loss": 0.4793, "step": 11149 }, { "epoch": 0.79, "grad_norm": 2.003576310707084, "learning_rate": 1.1001230739821167e-06, "loss": 0.5189, "step": 11150 }, { "epoch": 0.79, "grad_norm": 1.7213631488257894, "learning_rate": 1.0994040165888331e-06, "loss": 0.481, "step": 11151 }, { "epoch": 0.79, "grad_norm": 1.7779710528983186, "learning_rate": 1.0986851652382452e-06, "loss": 0.5921, "step": 11152 }, { "epoch": 0.79, "grad_norm": 2.0432025431254726, "learning_rate": 1.0979665199683265e-06, "loss": 0.6009, "step": 11153 }, { "epoch": 0.79, "grad_norm": 1.5565712984954172, "learning_rate": 1.0972480808170382e-06, "loss": 0.4917, "step": 11154 }, { "epoch": 0.79, "grad_norm": 1.6240446883184885, "learning_rate": 1.0965298478223303e-06, "loss": 0.5798, "step": 11155 }, { "epoch": 0.79, "grad_norm": 1.7404132740207419, "learning_rate": 1.0958118210221441e-06, "loss": 0.4449, "step": 11156 }, { "epoch": 0.79, "grad_norm": 1.8027968115520983, "learning_rate": 1.0950940004544041e-06, "loss": 0.4873, "step": 11157 }, { "epoch": 0.79, "grad_norm": 2.1639365633793495, "learning_rate": 1.0943763861570333e-06, "loss": 0.5365, "step": 11158 }, { "epoch": 0.79, "grad_norm": 1.5422627125746204, "learning_rate": 1.0936589781679347e-06, "loss": 0.4431, "step": 11159 }, { "epoch": 0.79, "grad_norm": 1.8785255004952675, "learning_rate": 1.0929417765250061e-06, "loss": 0.4852, "step": 11160 }, { "epoch": 0.79, "grad_norm": 1.7836357943258823, "learning_rate": 1.0922247812661296e-06, "loss": 0.4949, "step": 11161 }, { "epoch": 0.79, "grad_norm": 1.7236054805561156, "learning_rate": 1.0915079924291827e-06, "loss": 0.5224, "step": 11162 }, { "epoch": 0.79, "grad_norm": 1.898786434369378, "learning_rate": 1.0907914100520283e-06, "loss": 0.5341, "step": 11163 }, { "epoch": 0.79, "grad_norm": 2.089475040774961, "learning_rate": 1.090075034172517e-06, "loss": 0.5897, "step": 11164 }, { "epoch": 0.79, "grad_norm": 2.1059460987511107, "learning_rate": 1.089358864828491e-06, "loss": 0.4773, "step": 11165 }, { "epoch": 0.79, "grad_norm": 1.53142149457687, "learning_rate": 1.088642902057781e-06, "loss": 0.5275, "step": 11166 }, { "epoch": 0.79, "grad_norm": 0.7895355250373021, "learning_rate": 1.0879271458982072e-06, "loss": 0.4343, "step": 11167 }, { "epoch": 0.79, "grad_norm": 1.5713675236029516, "learning_rate": 1.0872115963875767e-06, "loss": 0.4532, "step": 11168 }, { "epoch": 0.79, "grad_norm": 1.5292880607179657, "learning_rate": 1.0864962535636875e-06, "loss": 0.4822, "step": 11169 }, { "epoch": 0.79, "grad_norm": 1.63805691128887, "learning_rate": 1.0857811174643274e-06, "loss": 0.4554, "step": 11170 }, { "epoch": 0.79, "grad_norm": 1.6793385899930928, "learning_rate": 1.0850661881272717e-06, "loss": 0.4505, "step": 11171 }, { "epoch": 0.79, "grad_norm": 1.7980603408807818, "learning_rate": 1.0843514655902854e-06, "loss": 0.4966, "step": 11172 }, { "epoch": 0.79, "grad_norm": 1.8506529302035666, "learning_rate": 1.0836369498911225e-06, "loss": 0.5019, "step": 11173 }, { "epoch": 0.79, "grad_norm": 1.9037669051111683, "learning_rate": 1.0829226410675281e-06, "loss": 0.5774, "step": 11174 }, { "epoch": 0.79, "grad_norm": 1.6595953089528037, "learning_rate": 1.0822085391572313e-06, "loss": 0.4871, "step": 11175 }, { "epoch": 0.79, "grad_norm": 0.6284332914057973, "learning_rate": 1.0814946441979546e-06, "loss": 0.4346, "step": 11176 }, { "epoch": 0.79, "grad_norm": 1.6835647852809437, "learning_rate": 1.0807809562274091e-06, "loss": 0.4895, "step": 11177 }, { "epoch": 0.79, "grad_norm": 0.7538026581725579, "learning_rate": 1.0800674752832928e-06, "loss": 0.4179, "step": 11178 }, { "epoch": 0.79, "grad_norm": 1.5169441400085024, "learning_rate": 1.0793542014032965e-06, "loss": 0.4859, "step": 11179 }, { "epoch": 0.79, "grad_norm": 1.657919998980935, "learning_rate": 1.078641134625094e-06, "loss": 0.4786, "step": 11180 }, { "epoch": 0.79, "grad_norm": 1.526419642531314, "learning_rate": 1.0779282749863563e-06, "loss": 0.4743, "step": 11181 }, { "epoch": 0.79, "grad_norm": 0.6704425381233902, "learning_rate": 1.077215622524736e-06, "loss": 0.4093, "step": 11182 }, { "epoch": 0.79, "grad_norm": 1.7837322254114216, "learning_rate": 1.0765031772778784e-06, "loss": 0.4889, "step": 11183 }, { "epoch": 0.79, "grad_norm": 1.4711503117869231, "learning_rate": 1.0757909392834176e-06, "loss": 0.4897, "step": 11184 }, { "epoch": 0.79, "grad_norm": 0.6452190795256263, "learning_rate": 1.0750789085789771e-06, "loss": 0.4117, "step": 11185 }, { "epoch": 0.79, "grad_norm": 1.6978878996497897, "learning_rate": 1.0743670852021687e-06, "loss": 0.5163, "step": 11186 }, { "epoch": 0.79, "grad_norm": 1.8243618986529708, "learning_rate": 1.0736554691905897e-06, "loss": 0.52, "step": 11187 }, { "epoch": 0.79, "grad_norm": 1.778538328936432, "learning_rate": 1.072944060581837e-06, "loss": 0.4929, "step": 11188 }, { "epoch": 0.79, "grad_norm": 1.8396999355508834, "learning_rate": 1.0722328594134833e-06, "loss": 0.4882, "step": 11189 }, { "epoch": 0.79, "grad_norm": 1.505306199692012, "learning_rate": 1.0715218657230998e-06, "loss": 0.5134, "step": 11190 }, { "epoch": 0.79, "grad_norm": 1.6111833344627358, "learning_rate": 1.0708110795482423e-06, "loss": 0.4444, "step": 11191 }, { "epoch": 0.79, "grad_norm": 2.150753717465292, "learning_rate": 1.0701005009264576e-06, "loss": 0.5289, "step": 11192 }, { "epoch": 0.79, "grad_norm": 1.701918741162257, "learning_rate": 1.0693901298952818e-06, "loss": 0.5305, "step": 11193 }, { "epoch": 0.79, "grad_norm": 1.6043926277244096, "learning_rate": 1.0686799664922375e-06, "loss": 0.5008, "step": 11194 }, { "epoch": 0.79, "grad_norm": 1.9687120105738858, "learning_rate": 1.067970010754838e-06, "loss": 0.5187, "step": 11195 }, { "epoch": 0.79, "grad_norm": 1.6980819377996579, "learning_rate": 1.0672602627205864e-06, "loss": 0.5412, "step": 11196 }, { "epoch": 0.79, "grad_norm": 1.8345757018648285, "learning_rate": 1.0665507224269745e-06, "loss": 0.4761, "step": 11197 }, { "epoch": 0.79, "grad_norm": 2.2230268807799183, "learning_rate": 1.0658413899114806e-06, "loss": 0.5454, "step": 11198 }, { "epoch": 0.79, "grad_norm": 1.471367017086696, "learning_rate": 1.0651322652115742e-06, "loss": 0.4927, "step": 11199 }, { "epoch": 0.79, "grad_norm": 1.6936977406183227, "learning_rate": 1.064423348364717e-06, "loss": 0.4543, "step": 11200 }, { "epoch": 0.79, "grad_norm": 1.6959324852011637, "learning_rate": 1.063714639408353e-06, "loss": 0.5691, "step": 11201 }, { "epoch": 0.79, "grad_norm": 1.9975095222568415, "learning_rate": 1.0630061383799195e-06, "loss": 0.5076, "step": 11202 }, { "epoch": 0.79, "grad_norm": 1.4476778242794197, "learning_rate": 1.0622978453168425e-06, "loss": 0.4835, "step": 11203 }, { "epoch": 0.8, "grad_norm": 1.7950793374307685, "learning_rate": 1.0615897602565373e-06, "loss": 0.4854, "step": 11204 }, { "epoch": 0.8, "grad_norm": 1.9230525268436058, "learning_rate": 1.0608818832364043e-06, "loss": 0.5456, "step": 11205 }, { "epoch": 0.8, "grad_norm": 3.16668070932607, "learning_rate": 1.0601742142938381e-06, "loss": 0.5425, "step": 11206 }, { "epoch": 0.8, "grad_norm": 1.8876980579309854, "learning_rate": 1.05946675346622e-06, "loss": 0.5461, "step": 11207 }, { "epoch": 0.8, "grad_norm": 1.9063616649930932, "learning_rate": 1.0587595007909202e-06, "loss": 0.5094, "step": 11208 }, { "epoch": 0.8, "grad_norm": 1.538386309880416, "learning_rate": 1.0580524563052997e-06, "loss": 0.4627, "step": 11209 }, { "epoch": 0.8, "grad_norm": 1.8340928317081577, "learning_rate": 1.057345620046703e-06, "loss": 0.5918, "step": 11210 }, { "epoch": 0.8, "grad_norm": 1.7316772843205683, "learning_rate": 1.0566389920524728e-06, "loss": 0.5084, "step": 11211 }, { "epoch": 0.8, "grad_norm": 2.725939466735819, "learning_rate": 1.055932572359931e-06, "loss": 0.5355, "step": 11212 }, { "epoch": 0.8, "grad_norm": 1.555657920225572, "learning_rate": 1.0552263610063963e-06, "loss": 0.4998, "step": 11213 }, { "epoch": 0.8, "grad_norm": 1.691753626229931, "learning_rate": 1.0545203580291707e-06, "loss": 0.522, "step": 11214 }, { "epoch": 0.8, "grad_norm": 1.9533704718835754, "learning_rate": 1.05381456346555e-06, "loss": 0.5343, "step": 11215 }, { "epoch": 0.8, "grad_norm": 0.794312838566112, "learning_rate": 1.0531089773528163e-06, "loss": 0.4295, "step": 11216 }, { "epoch": 0.8, "grad_norm": 1.6352570806213012, "learning_rate": 1.0524035997282377e-06, "loss": 0.5502, "step": 11217 }, { "epoch": 0.8, "grad_norm": 2.1227235809166394, "learning_rate": 1.0516984306290796e-06, "loss": 0.481, "step": 11218 }, { "epoch": 0.8, "grad_norm": 1.5844463187904028, "learning_rate": 1.0509934700925883e-06, "loss": 0.4922, "step": 11219 }, { "epoch": 0.8, "grad_norm": 0.6832139725834395, "learning_rate": 1.0502887181560028e-06, "loss": 0.4489, "step": 11220 }, { "epoch": 0.8, "grad_norm": 1.6741842556122202, "learning_rate": 1.0495841748565505e-06, "loss": 0.52, "step": 11221 }, { "epoch": 0.8, "grad_norm": 0.7018547386492621, "learning_rate": 1.0488798402314477e-06, "loss": 0.421, "step": 11222 }, { "epoch": 0.8, "grad_norm": 1.560095275313976, "learning_rate": 1.0481757143179016e-06, "loss": 0.5381, "step": 11223 }, { "epoch": 0.8, "grad_norm": 1.6096803022182287, "learning_rate": 1.0474717971531035e-06, "loss": 0.4593, "step": 11224 }, { "epoch": 0.8, "grad_norm": 1.6016734969988498, "learning_rate": 1.0467680887742382e-06, "loss": 0.5217, "step": 11225 }, { "epoch": 0.8, "grad_norm": 6.358567249553165, "learning_rate": 1.046064589218478e-06, "loss": 0.5472, "step": 11226 }, { "epoch": 0.8, "grad_norm": 1.5832135878785962, "learning_rate": 1.0453612985229833e-06, "loss": 0.4746, "step": 11227 }, { "epoch": 0.8, "grad_norm": 1.7769036252275616, "learning_rate": 1.044658216724907e-06, "loss": 0.5785, "step": 11228 }, { "epoch": 0.8, "grad_norm": 1.9118738864322207, "learning_rate": 1.0439553438613831e-06, "loss": 0.5794, "step": 11229 }, { "epoch": 0.8, "grad_norm": 1.744364280319165, "learning_rate": 1.0432526799695459e-06, "loss": 0.4855, "step": 11230 }, { "epoch": 0.8, "grad_norm": 0.6757044129565623, "learning_rate": 1.0425502250865076e-06, "loss": 0.4146, "step": 11231 }, { "epoch": 0.8, "grad_norm": 1.859169124561317, "learning_rate": 1.0418479792493775e-06, "loss": 0.4806, "step": 11232 }, { "epoch": 0.8, "grad_norm": 1.9385427776601147, "learning_rate": 1.041145942495247e-06, "loss": 0.5307, "step": 11233 }, { "epoch": 0.8, "grad_norm": 1.9568555702806791, "learning_rate": 1.0404441148612044e-06, "loss": 0.4871, "step": 11234 }, { "epoch": 0.8, "grad_norm": 1.6910389124449499, "learning_rate": 1.0397424963843194e-06, "loss": 0.552, "step": 11235 }, { "epoch": 0.8, "grad_norm": 1.5800851664359956, "learning_rate": 1.039041087101655e-06, "loss": 0.5311, "step": 11236 }, { "epoch": 0.8, "grad_norm": 1.6571277078178699, "learning_rate": 1.0383398870502615e-06, "loss": 0.5369, "step": 11237 }, { "epoch": 0.8, "grad_norm": 1.56530741732832, "learning_rate": 1.0376388962671797e-06, "loss": 0.5082, "step": 11238 }, { "epoch": 0.8, "grad_norm": 1.419038265671975, "learning_rate": 1.0369381147894387e-06, "loss": 0.4641, "step": 11239 }, { "epoch": 0.8, "grad_norm": 1.958854165287303, "learning_rate": 1.036237542654052e-06, "loss": 0.4854, "step": 11240 }, { "epoch": 0.8, "grad_norm": 1.974030312244267, "learning_rate": 1.035537179898033e-06, "loss": 0.5412, "step": 11241 }, { "epoch": 0.8, "grad_norm": 1.3635790482005044, "learning_rate": 1.0348370265583718e-06, "loss": 0.4591, "step": 11242 }, { "epoch": 0.8, "grad_norm": 0.74069473441299, "learning_rate": 1.0341370826720542e-06, "loss": 0.414, "step": 11243 }, { "epoch": 0.8, "grad_norm": 0.7017157193597974, "learning_rate": 1.0334373482760545e-06, "loss": 0.4277, "step": 11244 }, { "epoch": 0.8, "grad_norm": 1.3531103955907227, "learning_rate": 1.032737823407335e-06, "loss": 0.4472, "step": 11245 }, { "epoch": 0.8, "grad_norm": 2.047917649106481, "learning_rate": 1.0320385081028478e-06, "loss": 0.5373, "step": 11246 }, { "epoch": 0.8, "grad_norm": 1.9841614516203951, "learning_rate": 1.03133940239953e-06, "loss": 0.5538, "step": 11247 }, { "epoch": 0.8, "grad_norm": 0.7271225706796668, "learning_rate": 1.0306405063343128e-06, "loss": 0.4412, "step": 11248 }, { "epoch": 0.8, "grad_norm": 2.0359063886262168, "learning_rate": 1.0299418199441146e-06, "loss": 0.5393, "step": 11249 }, { "epoch": 0.8, "grad_norm": 1.8229399706705152, "learning_rate": 1.0292433432658415e-06, "loss": 0.5174, "step": 11250 }, { "epoch": 0.8, "grad_norm": 1.6247049215096885, "learning_rate": 1.0285450763363896e-06, "loss": 0.5333, "step": 11251 }, { "epoch": 0.8, "grad_norm": 1.74968438512329, "learning_rate": 1.0278470191926442e-06, "loss": 0.4747, "step": 11252 }, { "epoch": 0.8, "grad_norm": 1.6868810242861803, "learning_rate": 1.0271491718714798e-06, "loss": 0.5454, "step": 11253 }, { "epoch": 0.8, "grad_norm": 1.6430458720890107, "learning_rate": 1.026451534409757e-06, "loss": 0.4874, "step": 11254 }, { "epoch": 0.8, "grad_norm": 1.8258672822223552, "learning_rate": 1.0257541068443277e-06, "loss": 0.5261, "step": 11255 }, { "epoch": 0.8, "grad_norm": 1.643039303350932, "learning_rate": 1.0250568892120339e-06, "loss": 0.5017, "step": 11256 }, { "epoch": 0.8, "grad_norm": 1.602833255793951, "learning_rate": 1.0243598815497036e-06, "loss": 0.548, "step": 11257 }, { "epoch": 0.8, "grad_norm": 1.5959711964924643, "learning_rate": 1.0236630838941575e-06, "loss": 0.4978, "step": 11258 }, { "epoch": 0.8, "grad_norm": 1.6276797577956414, "learning_rate": 1.0229664962821977e-06, "loss": 0.4541, "step": 11259 }, { "epoch": 0.8, "grad_norm": 1.662179601231129, "learning_rate": 1.0222701187506268e-06, "loss": 0.4803, "step": 11260 }, { "epoch": 0.8, "grad_norm": 1.6539954849931506, "learning_rate": 1.021573951336225e-06, "loss": 0.4778, "step": 11261 }, { "epoch": 0.8, "grad_norm": 1.4391245952132268, "learning_rate": 1.020877994075769e-06, "loss": 0.4554, "step": 11262 }, { "epoch": 0.8, "grad_norm": 1.4764173203615507, "learning_rate": 1.020182247006018e-06, "loss": 0.4864, "step": 11263 }, { "epoch": 0.8, "grad_norm": 1.5895719211089743, "learning_rate": 1.019486710163729e-06, "loss": 0.5934, "step": 11264 }, { "epoch": 0.8, "grad_norm": 1.7690808229285628, "learning_rate": 1.0187913835856384e-06, "loss": 0.4776, "step": 11265 }, { "epoch": 0.8, "grad_norm": 1.8741920771391927, "learning_rate": 1.0180962673084754e-06, "loss": 0.5576, "step": 11266 }, { "epoch": 0.8, "grad_norm": 2.027334830807492, "learning_rate": 1.0174013613689633e-06, "loss": 0.5765, "step": 11267 }, { "epoch": 0.8, "grad_norm": 2.1233094933883145, "learning_rate": 1.0167066658038045e-06, "loss": 0.5678, "step": 11268 }, { "epoch": 0.8, "grad_norm": 1.6240466848459032, "learning_rate": 1.016012180649698e-06, "loss": 0.5786, "step": 11269 }, { "epoch": 0.8, "grad_norm": 1.7662460861256604, "learning_rate": 1.0153179059433254e-06, "loss": 0.5129, "step": 11270 }, { "epoch": 0.8, "grad_norm": 1.8739949639578033, "learning_rate": 1.0146238417213654e-06, "loss": 0.5851, "step": 11271 }, { "epoch": 0.8, "grad_norm": 1.6660511290325462, "learning_rate": 1.0139299880204773e-06, "loss": 0.5519, "step": 11272 }, { "epoch": 0.8, "grad_norm": 2.0197271678444064, "learning_rate": 1.0132363448773135e-06, "loss": 0.5066, "step": 11273 }, { "epoch": 0.8, "grad_norm": 4.372904521366764, "learning_rate": 1.0125429123285152e-06, "loss": 0.5562, "step": 11274 }, { "epoch": 0.8, "grad_norm": 1.7676138329453863, "learning_rate": 1.0118496904107117e-06, "loss": 0.5189, "step": 11275 }, { "epoch": 0.8, "grad_norm": 2.742781051032969, "learning_rate": 1.0111566791605227e-06, "loss": 0.4771, "step": 11276 }, { "epoch": 0.8, "grad_norm": 1.6866914910595707, "learning_rate": 1.0104638786145526e-06, "loss": 0.4683, "step": 11277 }, { "epoch": 0.8, "grad_norm": 1.5221461906571683, "learning_rate": 1.0097712888093985e-06, "loss": 0.5388, "step": 11278 }, { "epoch": 0.8, "grad_norm": 1.7501840965823392, "learning_rate": 1.0090789097816456e-06, "loss": 0.4673, "step": 11279 }, { "epoch": 0.8, "grad_norm": 1.5845013277869782, "learning_rate": 1.0083867415678683e-06, "loss": 0.48, "step": 11280 }, { "epoch": 0.8, "grad_norm": 1.8373876279578896, "learning_rate": 1.0076947842046296e-06, "loss": 0.4181, "step": 11281 }, { "epoch": 0.8, "grad_norm": 2.030640035398701, "learning_rate": 1.007003037728478e-06, "loss": 0.4926, "step": 11282 }, { "epoch": 0.8, "grad_norm": 1.5456396040907112, "learning_rate": 1.0063115021759584e-06, "loss": 0.5245, "step": 11283 }, { "epoch": 0.8, "grad_norm": 1.5809970510631979, "learning_rate": 1.0056201775835962e-06, "loss": 0.5285, "step": 11284 }, { "epoch": 0.8, "grad_norm": 1.7012260696415753, "learning_rate": 1.004929063987911e-06, "loss": 0.524, "step": 11285 }, { "epoch": 0.8, "grad_norm": 2.01427925484465, "learning_rate": 1.0042381614254103e-06, "loss": 0.5569, "step": 11286 }, { "epoch": 0.8, "grad_norm": 1.7973437892148814, "learning_rate": 1.0035474699325891e-06, "loss": 0.5205, "step": 11287 }, { "epoch": 0.8, "grad_norm": 1.8344983678319808, "learning_rate": 1.0028569895459334e-06, "loss": 0.4621, "step": 11288 }, { "epoch": 0.8, "grad_norm": 1.8884431348681736, "learning_rate": 1.0021667203019136e-06, "loss": 0.5545, "step": 11289 }, { "epoch": 0.8, "grad_norm": 1.997099536414051, "learning_rate": 1.0014766622369965e-06, "loss": 0.5648, "step": 11290 }, { "epoch": 0.8, "grad_norm": 1.7455785530881238, "learning_rate": 1.00078681538763e-06, "loss": 0.4992, "step": 11291 }, { "epoch": 0.8, "grad_norm": 1.925086172712342, "learning_rate": 1.000097179790256e-06, "loss": 0.5748, "step": 11292 }, { "epoch": 0.8, "grad_norm": 1.7296727085013825, "learning_rate": 9.994077554812998e-07, "loss": 0.4993, "step": 11293 }, { "epoch": 0.8, "grad_norm": 1.747736662318826, "learning_rate": 9.98718542497183e-07, "loss": 0.5242, "step": 11294 }, { "epoch": 0.8, "grad_norm": 1.5412689579217458, "learning_rate": 9.980295408743123e-07, "loss": 0.4753, "step": 11295 }, { "epoch": 0.8, "grad_norm": 3.202185540638962, "learning_rate": 9.973407506490811e-07, "loss": 0.5154, "step": 11296 }, { "epoch": 0.8, "grad_norm": 1.7907739538212493, "learning_rate": 9.966521718578737e-07, "loss": 0.4838, "step": 11297 }, { "epoch": 0.8, "grad_norm": 1.6349700752509877, "learning_rate": 9.95963804537064e-07, "loss": 0.5035, "step": 11298 }, { "epoch": 0.8, "grad_norm": 1.7189847878783926, "learning_rate": 9.952756487230152e-07, "loss": 0.5169, "step": 11299 }, { "epoch": 0.8, "grad_norm": 0.6993075710889468, "learning_rate": 9.945877044520736e-07, "loss": 0.4093, "step": 11300 }, { "epoch": 0.8, "grad_norm": 1.8269426114870027, "learning_rate": 9.93899971760584e-07, "loss": 0.5438, "step": 11301 }, { "epoch": 0.8, "grad_norm": 2.209143675968071, "learning_rate": 9.932124506848718e-07, "loss": 0.4694, "step": 11302 }, { "epoch": 0.8, "grad_norm": 0.737565124124016, "learning_rate": 9.92525141261254e-07, "loss": 0.406, "step": 11303 }, { "epoch": 0.8, "grad_norm": 1.897537849224304, "learning_rate": 9.918380435260384e-07, "loss": 0.4946, "step": 11304 }, { "epoch": 0.8, "grad_norm": 1.8188239591604525, "learning_rate": 9.911511575155181e-07, "loss": 0.5989, "step": 11305 }, { "epoch": 0.8, "grad_norm": 0.7624925439489431, "learning_rate": 9.904644832659788e-07, "loss": 0.4063, "step": 11306 }, { "epoch": 0.8, "grad_norm": 1.9721404466234673, "learning_rate": 9.897780208136903e-07, "loss": 0.5354, "step": 11307 }, { "epoch": 0.8, "grad_norm": 1.7555857344038668, "learning_rate": 9.890917701949154e-07, "loss": 0.5253, "step": 11308 }, { "epoch": 0.8, "grad_norm": 1.7994873118273602, "learning_rate": 9.884057314459045e-07, "loss": 0.492, "step": 11309 }, { "epoch": 0.8, "grad_norm": 2.83234422689632, "learning_rate": 9.87719904602895e-07, "loss": 0.5526, "step": 11310 }, { "epoch": 0.8, "grad_norm": 0.7017765208363271, "learning_rate": 9.87034289702118e-07, "loss": 0.4316, "step": 11311 }, { "epoch": 0.8, "grad_norm": 1.9780567669422324, "learning_rate": 9.863488867797843e-07, "loss": 0.5005, "step": 11312 }, { "epoch": 0.8, "grad_norm": 1.8921110164547692, "learning_rate": 9.856636958721056e-07, "loss": 0.482, "step": 11313 }, { "epoch": 0.8, "grad_norm": 1.6612827718152423, "learning_rate": 9.849787170152708e-07, "loss": 0.4947, "step": 11314 }, { "epoch": 0.8, "grad_norm": 1.6305622843380931, "learning_rate": 9.842939502454656e-07, "loss": 0.5562, "step": 11315 }, { "epoch": 0.8, "grad_norm": 1.7890266840807443, "learning_rate": 9.836093955988606e-07, "loss": 0.5068, "step": 11316 }, { "epoch": 0.8, "grad_norm": 0.7002631125001765, "learning_rate": 9.82925053111617e-07, "loss": 0.4337, "step": 11317 }, { "epoch": 0.8, "grad_norm": 2.172150856521467, "learning_rate": 9.822409228198854e-07, "loss": 0.5918, "step": 11318 }, { "epoch": 0.8, "grad_norm": 1.6736999570847173, "learning_rate": 9.815570047597989e-07, "loss": 0.55, "step": 11319 }, { "epoch": 0.8, "grad_norm": 7.592727368431582, "learning_rate": 9.808732989674907e-07, "loss": 0.5709, "step": 11320 }, { "epoch": 0.8, "grad_norm": 1.9580933607882423, "learning_rate": 9.801898054790726e-07, "loss": 0.5697, "step": 11321 }, { "epoch": 0.8, "grad_norm": 1.621275298163921, "learning_rate": 9.795065243306495e-07, "loss": 0.5348, "step": 11322 }, { "epoch": 0.8, "grad_norm": 1.5381814835425238, "learning_rate": 9.788234555583153e-07, "loss": 0.4978, "step": 11323 }, { "epoch": 0.8, "grad_norm": 1.5912911217912171, "learning_rate": 9.78140599198152e-07, "loss": 0.479, "step": 11324 }, { "epoch": 0.8, "grad_norm": 1.5246572283666426, "learning_rate": 9.774579552862307e-07, "loss": 0.4498, "step": 11325 }, { "epoch": 0.8, "grad_norm": 0.6995513582959493, "learning_rate": 9.767755238586097e-07, "loss": 0.4089, "step": 11326 }, { "epoch": 0.8, "grad_norm": 1.7483635073494488, "learning_rate": 9.760933049513378e-07, "loss": 0.5563, "step": 11327 }, { "epoch": 0.8, "grad_norm": 1.775802423221744, "learning_rate": 9.754112986004527e-07, "loss": 0.5285, "step": 11328 }, { "epoch": 0.8, "grad_norm": 0.7602698235954728, "learning_rate": 9.747295048419813e-07, "loss": 0.4414, "step": 11329 }, { "epoch": 0.8, "grad_norm": 1.616324833828178, "learning_rate": 9.740479237119337e-07, "loss": 0.5141, "step": 11330 }, { "epoch": 0.8, "grad_norm": 1.9453668205876433, "learning_rate": 9.733665552463184e-07, "loss": 0.5071, "step": 11331 }, { "epoch": 0.8, "grad_norm": 1.9143893603852753, "learning_rate": 9.726853994811269e-07, "loss": 0.5855, "step": 11332 }, { "epoch": 0.8, "grad_norm": 0.6031974543925593, "learning_rate": 9.720044564523379e-07, "loss": 0.4255, "step": 11333 }, { "epoch": 0.8, "grad_norm": 1.7169815780706554, "learning_rate": 9.713237261959223e-07, "loss": 0.5109, "step": 11334 }, { "epoch": 0.8, "grad_norm": 1.810057457122053, "learning_rate": 9.706432087478385e-07, "loss": 0.577, "step": 11335 }, { "epoch": 0.8, "grad_norm": 1.8168647326606424, "learning_rate": 9.699629041440345e-07, "loss": 0.4996, "step": 11336 }, { "epoch": 0.8, "grad_norm": 1.7455573947035439, "learning_rate": 9.692828124204446e-07, "loss": 0.524, "step": 11337 }, { "epoch": 0.8, "grad_norm": 1.6733443172691176, "learning_rate": 9.686029336129942e-07, "loss": 0.5422, "step": 11338 }, { "epoch": 0.8, "grad_norm": 1.7017681759225547, "learning_rate": 9.679232677575978e-07, "loss": 0.535, "step": 11339 }, { "epoch": 0.8, "grad_norm": 1.547736157076401, "learning_rate": 9.672438148901564e-07, "loss": 0.5196, "step": 11340 }, { "epoch": 0.8, "grad_norm": 1.4400199766700286, "learning_rate": 9.66564575046563e-07, "loss": 0.4838, "step": 11341 }, { "epoch": 0.8, "grad_norm": 1.9306752176824982, "learning_rate": 9.658855482626933e-07, "loss": 0.5093, "step": 11342 }, { "epoch": 0.8, "grad_norm": 3.5808182739425645, "learning_rate": 9.652067345744204e-07, "loss": 0.4372, "step": 11343 }, { "epoch": 0.8, "grad_norm": 1.8101458378227766, "learning_rate": 9.645281340175988e-07, "loss": 0.4719, "step": 11344 }, { "epoch": 0.81, "grad_norm": 2.065174261335442, "learning_rate": 9.638497466280756e-07, "loss": 0.5161, "step": 11345 }, { "epoch": 0.81, "grad_norm": 1.7513698973138914, "learning_rate": 9.631715724416846e-07, "loss": 0.5699, "step": 11346 }, { "epoch": 0.81, "grad_norm": 1.872464287888122, "learning_rate": 9.624936114942496e-07, "loss": 0.5042, "step": 11347 }, { "epoch": 0.81, "grad_norm": 0.6966883748993468, "learning_rate": 9.618158638215846e-07, "loss": 0.4628, "step": 11348 }, { "epoch": 0.81, "grad_norm": 1.8932863605899417, "learning_rate": 9.611383294594862e-07, "loss": 0.5753, "step": 11349 }, { "epoch": 0.81, "grad_norm": 0.664643126447566, "learning_rate": 9.604610084437493e-07, "loss": 0.4352, "step": 11350 }, { "epoch": 0.81, "grad_norm": 2.1056715216520465, "learning_rate": 9.597839008101484e-07, "loss": 0.4958, "step": 11351 }, { "epoch": 0.81, "grad_norm": 2.20856386000501, "learning_rate": 9.591070065944524e-07, "loss": 0.4884, "step": 11352 }, { "epoch": 0.81, "grad_norm": 1.5760012699770996, "learning_rate": 9.584303258324163e-07, "loss": 0.5716, "step": 11353 }, { "epoch": 0.81, "grad_norm": 1.578295699784479, "learning_rate": 9.57753858559785e-07, "loss": 0.5213, "step": 11354 }, { "epoch": 0.81, "grad_norm": 1.7353295777821152, "learning_rate": 9.570776048122938e-07, "loss": 0.4161, "step": 11355 }, { "epoch": 0.81, "grad_norm": 1.562285898224663, "learning_rate": 9.56401564625661e-07, "loss": 0.5287, "step": 11356 }, { "epoch": 0.81, "grad_norm": 2.1628738499128253, "learning_rate": 9.557257380355995e-07, "loss": 0.5046, "step": 11357 }, { "epoch": 0.81, "grad_norm": 2.1189800258548654, "learning_rate": 9.550501250778082e-07, "loss": 0.5342, "step": 11358 }, { "epoch": 0.81, "grad_norm": 1.7436801491295326, "learning_rate": 9.543747257879755e-07, "loss": 0.5165, "step": 11359 }, { "epoch": 0.81, "grad_norm": 1.5189451747586926, "learning_rate": 9.53699540201779e-07, "loss": 0.5193, "step": 11360 }, { "epoch": 0.81, "grad_norm": 1.8186777529850144, "learning_rate": 9.530245683548817e-07, "loss": 0.4892, "step": 11361 }, { "epoch": 0.81, "grad_norm": 1.8772213200819938, "learning_rate": 9.523498102829421e-07, "loss": 0.5029, "step": 11362 }, { "epoch": 0.81, "grad_norm": 1.6698644250995676, "learning_rate": 9.516752660215994e-07, "loss": 0.5269, "step": 11363 }, { "epoch": 0.81, "grad_norm": 2.6980975903759923, "learning_rate": 9.510009356064865e-07, "loss": 0.4955, "step": 11364 }, { "epoch": 0.81, "grad_norm": 1.7044455079214986, "learning_rate": 9.503268190732245e-07, "loss": 0.5094, "step": 11365 }, { "epoch": 0.81, "grad_norm": 2.9954361961568905, "learning_rate": 9.496529164574231e-07, "loss": 0.4444, "step": 11366 }, { "epoch": 0.81, "grad_norm": 1.7040010860621926, "learning_rate": 9.48979227794678e-07, "loss": 0.5502, "step": 11367 }, { "epoch": 0.81, "grad_norm": 1.7736057140736934, "learning_rate": 9.483057531205769e-07, "loss": 0.5338, "step": 11368 }, { "epoch": 0.81, "grad_norm": 1.5809655816442083, "learning_rate": 9.476324924706948e-07, "loss": 0.5268, "step": 11369 }, { "epoch": 0.81, "grad_norm": 0.6978031736891618, "learning_rate": 9.469594458805958e-07, "loss": 0.448, "step": 11370 }, { "epoch": 0.81, "grad_norm": 1.7972289124689291, "learning_rate": 9.462866133858333e-07, "loss": 0.5054, "step": 11371 }, { "epoch": 0.81, "grad_norm": 2.147941262717982, "learning_rate": 9.456139950219456e-07, "loss": 0.5136, "step": 11372 }, { "epoch": 0.81, "grad_norm": 2.1650067067819485, "learning_rate": 9.449415908244675e-07, "loss": 0.5471, "step": 11373 }, { "epoch": 0.81, "grad_norm": 5.150840192628067, "learning_rate": 9.442694008289133e-07, "loss": 0.507, "step": 11374 }, { "epoch": 0.81, "grad_norm": 1.6273699952619212, "learning_rate": 9.435974250707924e-07, "loss": 0.5032, "step": 11375 }, { "epoch": 0.81, "grad_norm": 1.7169425884171914, "learning_rate": 9.429256635856005e-07, "loss": 0.4931, "step": 11376 }, { "epoch": 0.81, "grad_norm": 1.6799521087795601, "learning_rate": 9.422541164088222e-07, "loss": 0.56, "step": 11377 }, { "epoch": 0.81, "grad_norm": 1.9169903707200466, "learning_rate": 9.415827835759322e-07, "loss": 0.5349, "step": 11378 }, { "epoch": 0.81, "grad_norm": 1.8556671747937616, "learning_rate": 9.409116651223888e-07, "loss": 0.3749, "step": 11379 }, { "epoch": 0.81, "grad_norm": 0.6800788418201351, "learning_rate": 9.402407610836479e-07, "loss": 0.4004, "step": 11380 }, { "epoch": 0.81, "grad_norm": 1.5604774961518988, "learning_rate": 9.39570071495145e-07, "loss": 0.5555, "step": 11381 }, { "epoch": 0.81, "grad_norm": 1.7881496019767595, "learning_rate": 9.388995963923092e-07, "loss": 0.5113, "step": 11382 }, { "epoch": 0.81, "grad_norm": 1.7873421602199242, "learning_rate": 9.382293358105582e-07, "loss": 0.5244, "step": 11383 }, { "epoch": 0.81, "grad_norm": 1.6869319487609227, "learning_rate": 9.375592897852965e-07, "loss": 0.5323, "step": 11384 }, { "epoch": 0.81, "grad_norm": 1.608282551078421, "learning_rate": 9.3688945835192e-07, "loss": 0.5089, "step": 11385 }, { "epoch": 0.81, "grad_norm": 0.7169645751096076, "learning_rate": 9.36219841545809e-07, "loss": 0.4172, "step": 11386 }, { "epoch": 0.81, "grad_norm": 1.8531746311695307, "learning_rate": 9.355504394023357e-07, "loss": 0.508, "step": 11387 }, { "epoch": 0.81, "grad_norm": 1.855111064806148, "learning_rate": 9.348812519568601e-07, "loss": 0.5412, "step": 11388 }, { "epoch": 0.81, "grad_norm": 1.699777286853456, "learning_rate": 9.342122792447317e-07, "loss": 0.5072, "step": 11389 }, { "epoch": 0.81, "grad_norm": 1.750406852364639, "learning_rate": 9.335435213012883e-07, "loss": 0.5282, "step": 11390 }, { "epoch": 0.81, "grad_norm": 1.7411926031218778, "learning_rate": 9.328749781618529e-07, "loss": 0.505, "step": 11391 }, { "epoch": 0.81, "grad_norm": 1.547791745381671, "learning_rate": 9.322066498617449e-07, "loss": 0.5181, "step": 11392 }, { "epoch": 0.81, "grad_norm": 1.6132651709581736, "learning_rate": 9.315385364362639e-07, "loss": 0.5349, "step": 11393 }, { "epoch": 0.81, "grad_norm": 1.8259910066071368, "learning_rate": 9.308706379207033e-07, "loss": 0.495, "step": 11394 }, { "epoch": 0.81, "grad_norm": 1.5430699434370803, "learning_rate": 9.302029543503438e-07, "loss": 0.5106, "step": 11395 }, { "epoch": 0.81, "grad_norm": 1.4908708370028025, "learning_rate": 9.29535485760456e-07, "loss": 0.5016, "step": 11396 }, { "epoch": 0.81, "grad_norm": 1.7606120089248032, "learning_rate": 9.288682321862952e-07, "loss": 0.5118, "step": 11397 }, { "epoch": 0.81, "grad_norm": 1.8492275677524488, "learning_rate": 9.282011936631074e-07, "loss": 0.5412, "step": 11398 }, { "epoch": 0.81, "grad_norm": 1.9763270715124037, "learning_rate": 9.27534370226133e-07, "loss": 0.416, "step": 11399 }, { "epoch": 0.81, "grad_norm": 2.7847722348253394, "learning_rate": 9.268677619105909e-07, "loss": 0.5598, "step": 11400 }, { "epoch": 0.81, "grad_norm": 2.0868305956048934, "learning_rate": 9.262013687516963e-07, "loss": 0.4431, "step": 11401 }, { "epoch": 0.81, "grad_norm": 1.8060922511848032, "learning_rate": 9.255351907846471e-07, "loss": 0.5341, "step": 11402 }, { "epoch": 0.81, "grad_norm": 1.7090484923204894, "learning_rate": 9.248692280446375e-07, "loss": 0.5538, "step": 11403 }, { "epoch": 0.81, "grad_norm": 0.7228495982581072, "learning_rate": 9.242034805668431e-07, "loss": 0.4267, "step": 11404 }, { "epoch": 0.81, "grad_norm": 1.611887104930997, "learning_rate": 9.23537948386431e-07, "loss": 0.4945, "step": 11405 }, { "epoch": 0.81, "grad_norm": 1.7023374711029844, "learning_rate": 9.228726315385578e-07, "loss": 0.4779, "step": 11406 }, { "epoch": 0.81, "grad_norm": 1.5861331149525635, "learning_rate": 9.222075300583666e-07, "loss": 0.5165, "step": 11407 }, { "epoch": 0.81, "grad_norm": 1.563240728054472, "learning_rate": 9.215426439809932e-07, "loss": 0.4644, "step": 11408 }, { "epoch": 0.81, "grad_norm": 2.2715797945667107, "learning_rate": 9.208779733415557e-07, "loss": 0.4654, "step": 11409 }, { "epoch": 0.81, "grad_norm": 1.9585246257289508, "learning_rate": 9.202135181751654e-07, "loss": 0.557, "step": 11410 }, { "epoch": 0.81, "grad_norm": 1.5293403027352221, "learning_rate": 9.195492785169208e-07, "loss": 0.4981, "step": 11411 }, { "epoch": 0.81, "grad_norm": 1.7787224817853278, "learning_rate": 9.188852544019105e-07, "loss": 0.5752, "step": 11412 }, { "epoch": 0.81, "grad_norm": 1.7716955244654948, "learning_rate": 9.182214458652095e-07, "loss": 0.5403, "step": 11413 }, { "epoch": 0.81, "grad_norm": 1.8253558377806711, "learning_rate": 9.175578529418828e-07, "loss": 0.5443, "step": 11414 }, { "epoch": 0.81, "grad_norm": 1.7286096604667267, "learning_rate": 9.168944756669845e-07, "loss": 0.5297, "step": 11415 }, { "epoch": 0.81, "grad_norm": 0.6843084230179324, "learning_rate": 9.162313140755541e-07, "loss": 0.4091, "step": 11416 }, { "epoch": 0.81, "grad_norm": 0.658878938244442, "learning_rate": 9.155683682026239e-07, "loss": 0.4212, "step": 11417 }, { "epoch": 0.81, "grad_norm": 1.700426736796978, "learning_rate": 9.149056380832122e-07, "loss": 0.5235, "step": 11418 }, { "epoch": 0.81, "grad_norm": 1.5615117119993294, "learning_rate": 9.142431237523269e-07, "loss": 0.4906, "step": 11419 }, { "epoch": 0.81, "grad_norm": 1.7946811734534318, "learning_rate": 9.135808252449651e-07, "loss": 0.4807, "step": 11420 }, { "epoch": 0.81, "grad_norm": 2.1244915772021558, "learning_rate": 9.12918742596109e-07, "loss": 0.5546, "step": 11421 }, { "epoch": 0.81, "grad_norm": 3.092374141488237, "learning_rate": 9.122568758407358e-07, "loss": 0.5012, "step": 11422 }, { "epoch": 0.81, "grad_norm": 2.105735506070524, "learning_rate": 9.115952250138043e-07, "loss": 0.5561, "step": 11423 }, { "epoch": 0.81, "grad_norm": 1.6100361746014882, "learning_rate": 9.10933790150268e-07, "loss": 0.4793, "step": 11424 }, { "epoch": 0.81, "grad_norm": 2.116538221484533, "learning_rate": 9.102725712850619e-07, "loss": 0.5695, "step": 11425 }, { "epoch": 0.81, "grad_norm": 1.9736229655512154, "learning_rate": 9.096115684531176e-07, "loss": 0.4861, "step": 11426 }, { "epoch": 0.81, "grad_norm": 5.439199921776878, "learning_rate": 9.089507816893517e-07, "loss": 0.5289, "step": 11427 }, { "epoch": 0.81, "grad_norm": 1.5160212969197129, "learning_rate": 9.082902110286656e-07, "loss": 0.4525, "step": 11428 }, { "epoch": 0.81, "grad_norm": 2.5851088918676406, "learning_rate": 9.076298565059576e-07, "loss": 0.4999, "step": 11429 }, { "epoch": 0.81, "grad_norm": 1.8531147122537543, "learning_rate": 9.069697181561061e-07, "loss": 0.5475, "step": 11430 }, { "epoch": 0.81, "grad_norm": 2.050832444769667, "learning_rate": 9.063097960139844e-07, "loss": 0.6065, "step": 11431 }, { "epoch": 0.81, "grad_norm": 2.803483367721511, "learning_rate": 9.056500901144477e-07, "loss": 0.5714, "step": 11432 }, { "epoch": 0.81, "grad_norm": 1.9697589718580937, "learning_rate": 9.049906004923498e-07, "loss": 0.5377, "step": 11433 }, { "epoch": 0.81, "grad_norm": 1.8302198825783438, "learning_rate": 9.043313271825227e-07, "loss": 0.5651, "step": 11434 }, { "epoch": 0.81, "grad_norm": 1.5939394625571612, "learning_rate": 9.036722702197931e-07, "loss": 0.476, "step": 11435 }, { "epoch": 0.81, "grad_norm": 1.6545358083012638, "learning_rate": 9.030134296389747e-07, "loss": 0.5205, "step": 11436 }, { "epoch": 0.81, "grad_norm": 1.9416592647029627, "learning_rate": 9.023548054748688e-07, "loss": 0.4587, "step": 11437 }, { "epoch": 0.81, "grad_norm": 1.6534329514064374, "learning_rate": 9.016963977622684e-07, "loss": 0.5218, "step": 11438 }, { "epoch": 0.81, "grad_norm": 1.6850412836553035, "learning_rate": 9.010382065359496e-07, "loss": 0.5707, "step": 11439 }, { "epoch": 0.81, "grad_norm": 1.7236297411331254, "learning_rate": 9.003802318306825e-07, "loss": 0.5618, "step": 11440 }, { "epoch": 0.81, "grad_norm": 1.7391241322889712, "learning_rate": 8.997224736812227e-07, "loss": 0.5143, "step": 11441 }, { "epoch": 0.81, "grad_norm": 5.665125675988946, "learning_rate": 8.990649321223155e-07, "loss": 0.4675, "step": 11442 }, { "epoch": 0.81, "grad_norm": 1.8963080395212901, "learning_rate": 8.984076071886944e-07, "loss": 0.4834, "step": 11443 }, { "epoch": 0.81, "grad_norm": 1.7768184235166375, "learning_rate": 8.977504989150815e-07, "loss": 0.5415, "step": 11444 }, { "epoch": 0.81, "grad_norm": 1.6207616297188878, "learning_rate": 8.970936073361891e-07, "loss": 0.5057, "step": 11445 }, { "epoch": 0.81, "grad_norm": 2.4941088358110255, "learning_rate": 8.964369324867133e-07, "loss": 0.444, "step": 11446 }, { "epoch": 0.81, "grad_norm": 2.206323691941326, "learning_rate": 8.957804744013438e-07, "loss": 0.4645, "step": 11447 }, { "epoch": 0.81, "grad_norm": 1.9917211199659994, "learning_rate": 8.951242331147564e-07, "loss": 0.5202, "step": 11448 }, { "epoch": 0.81, "grad_norm": 1.6160296092094137, "learning_rate": 8.944682086616163e-07, "loss": 0.5354, "step": 11449 }, { "epoch": 0.81, "grad_norm": 1.6780494878376804, "learning_rate": 8.93812401076578e-07, "loss": 0.53, "step": 11450 }, { "epoch": 0.81, "grad_norm": 2.31782958415964, "learning_rate": 8.931568103942801e-07, "loss": 0.5317, "step": 11451 }, { "epoch": 0.81, "grad_norm": 1.9703059611813696, "learning_rate": 8.925014366493573e-07, "loss": 0.5052, "step": 11452 }, { "epoch": 0.81, "grad_norm": 1.6563945593461646, "learning_rate": 8.91846279876426e-07, "loss": 0.5841, "step": 11453 }, { "epoch": 0.81, "grad_norm": 1.7951701273680511, "learning_rate": 8.911913401100947e-07, "loss": 0.4866, "step": 11454 }, { "epoch": 0.81, "grad_norm": 1.5532410587993264, "learning_rate": 8.905366173849589e-07, "loss": 0.4605, "step": 11455 }, { "epoch": 0.81, "grad_norm": 1.6650844940587148, "learning_rate": 8.898821117356038e-07, "loss": 0.4999, "step": 11456 }, { "epoch": 0.81, "grad_norm": 1.656960214228648, "learning_rate": 8.89227823196604e-07, "loss": 0.4846, "step": 11457 }, { "epoch": 0.81, "grad_norm": 3.992822390116767, "learning_rate": 8.885737518025167e-07, "loss": 0.4796, "step": 11458 }, { "epoch": 0.81, "grad_norm": 1.7239979414563649, "learning_rate": 8.879198975878983e-07, "loss": 0.5789, "step": 11459 }, { "epoch": 0.81, "grad_norm": 1.7357423401790106, "learning_rate": 8.872662605872834e-07, "loss": 0.5026, "step": 11460 }, { "epoch": 0.81, "grad_norm": 1.707551336459569, "learning_rate": 8.866128408352015e-07, "loss": 0.51, "step": 11461 }, { "epoch": 0.81, "grad_norm": 1.5934933300180776, "learning_rate": 8.85959638366165e-07, "loss": 0.4601, "step": 11462 }, { "epoch": 0.81, "grad_norm": 1.659670916654916, "learning_rate": 8.853066532146832e-07, "loss": 0.5845, "step": 11463 }, { "epoch": 0.81, "grad_norm": 1.5474338109099939, "learning_rate": 8.846538854152448e-07, "loss": 0.4624, "step": 11464 }, { "epoch": 0.81, "grad_norm": 1.6079674831858195, "learning_rate": 8.840013350023329e-07, "loss": 0.5002, "step": 11465 }, { "epoch": 0.81, "grad_norm": 1.8889093872796265, "learning_rate": 8.833490020104179e-07, "loss": 0.5344, "step": 11466 }, { "epoch": 0.81, "grad_norm": 0.7429156056737085, "learning_rate": 8.826968864739577e-07, "loss": 0.4208, "step": 11467 }, { "epoch": 0.81, "grad_norm": 1.7621530148233677, "learning_rate": 8.820449884274001e-07, "loss": 0.5372, "step": 11468 }, { "epoch": 0.81, "grad_norm": 0.7679718145561856, "learning_rate": 8.813933079051789e-07, "loss": 0.43, "step": 11469 }, { "epoch": 0.81, "grad_norm": 1.8348658213916487, "learning_rate": 8.807418449417188e-07, "loss": 0.4999, "step": 11470 }, { "epoch": 0.81, "grad_norm": 2.01681997159082, "learning_rate": 8.800905995714321e-07, "loss": 0.6066, "step": 11471 }, { "epoch": 0.81, "grad_norm": 1.9510206445409704, "learning_rate": 8.794395718287202e-07, "loss": 0.5249, "step": 11472 }, { "epoch": 0.81, "grad_norm": 2.3534530468026187, "learning_rate": 8.78788761747974e-07, "loss": 0.5211, "step": 11473 }, { "epoch": 0.81, "grad_norm": 1.7750022594290558, "learning_rate": 8.781381693635671e-07, "loss": 0.5252, "step": 11474 }, { "epoch": 0.81, "grad_norm": 2.416616478384309, "learning_rate": 8.77487794709872e-07, "loss": 0.517, "step": 11475 }, { "epoch": 0.81, "grad_norm": 0.6853277537672671, "learning_rate": 8.768376378212384e-07, "loss": 0.4125, "step": 11476 }, { "epoch": 0.81, "grad_norm": 1.5516992524116755, "learning_rate": 8.761876987320128e-07, "loss": 0.4203, "step": 11477 }, { "epoch": 0.81, "grad_norm": 0.723343585241408, "learning_rate": 8.755379774765261e-07, "loss": 0.418, "step": 11478 }, { "epoch": 0.81, "grad_norm": 1.8529870511940283, "learning_rate": 8.74888474089099e-07, "loss": 0.5322, "step": 11479 }, { "epoch": 0.81, "grad_norm": 1.5170711750233299, "learning_rate": 8.742391886040413e-07, "loss": 0.5327, "step": 11480 }, { "epoch": 0.81, "grad_norm": 1.6600922148936235, "learning_rate": 8.735901210556475e-07, "loss": 0.4903, "step": 11481 }, { "epoch": 0.81, "grad_norm": 2.1990118040736797, "learning_rate": 8.729412714782082e-07, "loss": 0.5902, "step": 11482 }, { "epoch": 0.81, "grad_norm": 1.9267919002565872, "learning_rate": 8.722926399059939e-07, "loss": 0.4993, "step": 11483 }, { "epoch": 0.81, "grad_norm": 1.813239365500518, "learning_rate": 8.716442263732694e-07, "loss": 0.5088, "step": 11484 }, { "epoch": 0.82, "grad_norm": 1.7922215975103903, "learning_rate": 8.70996030914285e-07, "loss": 0.5502, "step": 11485 }, { "epoch": 0.82, "grad_norm": 1.5544469128362726, "learning_rate": 8.703480535632819e-07, "loss": 0.5113, "step": 11486 }, { "epoch": 0.82, "grad_norm": 1.6524990041165688, "learning_rate": 8.697002943544886e-07, "loss": 0.5278, "step": 11487 }, { "epoch": 0.82, "grad_norm": 1.9136300929806342, "learning_rate": 8.6905275332212e-07, "loss": 0.5749, "step": 11488 }, { "epoch": 0.82, "grad_norm": 1.6001608388819686, "learning_rate": 8.684054305003825e-07, "loss": 0.523, "step": 11489 }, { "epoch": 0.82, "grad_norm": 3.5135796435306514, "learning_rate": 8.677583259234695e-07, "loss": 0.5798, "step": 11490 }, { "epoch": 0.82, "grad_norm": 1.7531176205882582, "learning_rate": 8.671114396255653e-07, "loss": 0.4659, "step": 11491 }, { "epoch": 0.82, "grad_norm": 2.4634712993660233, "learning_rate": 8.66464771640837e-07, "loss": 0.4793, "step": 11492 }, { "epoch": 0.82, "grad_norm": 1.5767256450655862, "learning_rate": 8.658183220034461e-07, "loss": 0.4956, "step": 11493 }, { "epoch": 0.82, "grad_norm": 1.6394903394199316, "learning_rate": 8.651720907475419e-07, "loss": 0.505, "step": 11494 }, { "epoch": 0.82, "grad_norm": 1.687410782779506, "learning_rate": 8.645260779072573e-07, "loss": 0.5582, "step": 11495 }, { "epoch": 0.82, "grad_norm": 2.173868809061579, "learning_rate": 8.638802835167176e-07, "loss": 0.5596, "step": 11496 }, { "epoch": 0.82, "grad_norm": 1.707366935398623, "learning_rate": 8.632347076100367e-07, "loss": 0.5684, "step": 11497 }, { "epoch": 0.82, "grad_norm": 1.5586250427886035, "learning_rate": 8.625893502213173e-07, "loss": 0.5454, "step": 11498 }, { "epoch": 0.82, "grad_norm": 1.6356103383267382, "learning_rate": 8.619442113846466e-07, "loss": 0.5388, "step": 11499 }, { "epoch": 0.82, "grad_norm": 2.311119373313283, "learning_rate": 8.612992911341039e-07, "loss": 0.5511, "step": 11500 }, { "epoch": 0.82, "grad_norm": 0.7040610376391158, "learning_rate": 8.606545895037565e-07, "loss": 0.459, "step": 11501 }, { "epoch": 0.82, "grad_norm": 3.082658154526252, "learning_rate": 8.600101065276601e-07, "loss": 0.519, "step": 11502 }, { "epoch": 0.82, "grad_norm": 1.985940852463969, "learning_rate": 8.593658422398594e-07, "loss": 0.5403, "step": 11503 }, { "epoch": 0.82, "grad_norm": 1.7944157440093698, "learning_rate": 8.587217966743828e-07, "loss": 0.4617, "step": 11504 }, { "epoch": 0.82, "grad_norm": 2.306862298628854, "learning_rate": 8.580779698652553e-07, "loss": 0.5163, "step": 11505 }, { "epoch": 0.82, "grad_norm": 1.820859079821516, "learning_rate": 8.574343618464836e-07, "loss": 0.5818, "step": 11506 }, { "epoch": 0.82, "grad_norm": 1.8192055686064592, "learning_rate": 8.567909726520657e-07, "loss": 0.5013, "step": 11507 }, { "epoch": 0.82, "grad_norm": 1.7073168096070146, "learning_rate": 8.561478023159875e-07, "loss": 0.5267, "step": 11508 }, { "epoch": 0.82, "grad_norm": 1.750956375047228, "learning_rate": 8.555048508722235e-07, "loss": 0.4484, "step": 11509 }, { "epoch": 0.82, "grad_norm": 1.6304771863865426, "learning_rate": 8.548621183547384e-07, "loss": 0.5345, "step": 11510 }, { "epoch": 0.82, "grad_norm": 1.7308141324665218, "learning_rate": 8.542196047974794e-07, "loss": 0.4844, "step": 11511 }, { "epoch": 0.82, "grad_norm": 1.5977599623795842, "learning_rate": 8.535773102343909e-07, "loss": 0.4633, "step": 11512 }, { "epoch": 0.82, "grad_norm": 1.8062968555588848, "learning_rate": 8.52935234699398e-07, "loss": 0.5405, "step": 11513 }, { "epoch": 0.82, "grad_norm": 1.5317284958786475, "learning_rate": 8.522933782264175e-07, "loss": 0.4734, "step": 11514 }, { "epoch": 0.82, "grad_norm": 0.689423372575403, "learning_rate": 8.516517408493557e-07, "loss": 0.456, "step": 11515 }, { "epoch": 0.82, "grad_norm": 1.461893448043425, "learning_rate": 8.510103226021055e-07, "loss": 0.4708, "step": 11516 }, { "epoch": 0.82, "grad_norm": 1.422726554155183, "learning_rate": 8.503691235185496e-07, "loss": 0.498, "step": 11517 }, { "epoch": 0.82, "grad_norm": 1.9341917794590062, "learning_rate": 8.497281436325566e-07, "loss": 0.4984, "step": 11518 }, { "epoch": 0.82, "grad_norm": 1.9418836736921243, "learning_rate": 8.490873829779855e-07, "loss": 0.5049, "step": 11519 }, { "epoch": 0.82, "grad_norm": 1.9655926574334055, "learning_rate": 8.484468415886848e-07, "loss": 0.5143, "step": 11520 }, { "epoch": 0.82, "grad_norm": 1.629822552308648, "learning_rate": 8.478065194984892e-07, "loss": 0.5212, "step": 11521 }, { "epoch": 0.82, "grad_norm": 2.4831620859711565, "learning_rate": 8.471664167412236e-07, "loss": 0.5015, "step": 11522 }, { "epoch": 0.82, "grad_norm": 1.6858985873627381, "learning_rate": 8.465265333506972e-07, "loss": 0.5041, "step": 11523 }, { "epoch": 0.82, "grad_norm": 2.3919290090708385, "learning_rate": 8.458868693607158e-07, "loss": 0.5583, "step": 11524 }, { "epoch": 0.82, "grad_norm": 1.5324542921633564, "learning_rate": 8.452474248050646e-07, "loss": 0.4981, "step": 11525 }, { "epoch": 0.82, "grad_norm": 1.9272217873365456, "learning_rate": 8.446081997175232e-07, "loss": 0.5556, "step": 11526 }, { "epoch": 0.82, "grad_norm": 1.7172861798504175, "learning_rate": 8.439691941318567e-07, "loss": 0.4744, "step": 11527 }, { "epoch": 0.82, "grad_norm": 1.9780276446129526, "learning_rate": 8.433304080818211e-07, "loss": 0.5384, "step": 11528 }, { "epoch": 0.82, "grad_norm": 2.0104438520906216, "learning_rate": 8.42691841601157e-07, "loss": 0.4952, "step": 11529 }, { "epoch": 0.82, "grad_norm": 1.7761761015789197, "learning_rate": 8.420534947235953e-07, "loss": 0.5429, "step": 11530 }, { "epoch": 0.82, "grad_norm": 1.759222397283599, "learning_rate": 8.414153674828596e-07, "loss": 0.474, "step": 11531 }, { "epoch": 0.82, "grad_norm": 2.2875978147326923, "learning_rate": 8.407774599126539e-07, "loss": 0.4985, "step": 11532 }, { "epoch": 0.82, "grad_norm": 1.7878639170486843, "learning_rate": 8.401397720466775e-07, "loss": 0.4295, "step": 11533 }, { "epoch": 0.82, "grad_norm": 0.7807324431314969, "learning_rate": 8.395023039186117e-07, "loss": 0.4311, "step": 11534 }, { "epoch": 0.82, "grad_norm": 1.4920564002064747, "learning_rate": 8.388650555621342e-07, "loss": 0.4513, "step": 11535 }, { "epoch": 0.82, "grad_norm": 10.182404370683514, "learning_rate": 8.382280270109028e-07, "loss": 0.5202, "step": 11536 }, { "epoch": 0.82, "grad_norm": 1.5848791832093747, "learning_rate": 8.375912182985696e-07, "loss": 0.514, "step": 11537 }, { "epoch": 0.82, "grad_norm": 2.494739828001238, "learning_rate": 8.369546294587722e-07, "loss": 0.5374, "step": 11538 }, { "epoch": 0.82, "grad_norm": 2.151964869414437, "learning_rate": 8.363182605251375e-07, "loss": 0.5471, "step": 11539 }, { "epoch": 0.82, "grad_norm": 1.706957449465364, "learning_rate": 8.356821115312818e-07, "loss": 0.4943, "step": 11540 }, { "epoch": 0.82, "grad_norm": 1.8028750027748284, "learning_rate": 8.350461825108053e-07, "loss": 0.5558, "step": 11541 }, { "epoch": 0.82, "grad_norm": 1.8784423025197514, "learning_rate": 8.344104734973047e-07, "loss": 0.5587, "step": 11542 }, { "epoch": 0.82, "grad_norm": 2.216868332789322, "learning_rate": 8.337749845243564e-07, "loss": 0.509, "step": 11543 }, { "epoch": 0.82, "grad_norm": 2.5583507085985717, "learning_rate": 8.331397156255311e-07, "loss": 0.4449, "step": 11544 }, { "epoch": 0.82, "grad_norm": 1.6668821286616307, "learning_rate": 8.32504666834385e-07, "loss": 0.5473, "step": 11545 }, { "epoch": 0.82, "grad_norm": 1.5236560093457887, "learning_rate": 8.318698381844637e-07, "loss": 0.5622, "step": 11546 }, { "epoch": 0.82, "grad_norm": 1.9576424583108563, "learning_rate": 8.312352297093024e-07, "loss": 0.5772, "step": 11547 }, { "epoch": 0.82, "grad_norm": 1.7300432879184895, "learning_rate": 8.306008414424205e-07, "loss": 0.5249, "step": 11548 }, { "epoch": 0.82, "grad_norm": 1.9168756159085125, "learning_rate": 8.299666734173301e-07, "loss": 0.521, "step": 11549 }, { "epoch": 0.82, "grad_norm": 1.5301044041229603, "learning_rate": 8.293327256675299e-07, "loss": 0.4391, "step": 11550 }, { "epoch": 0.82, "grad_norm": 2.092994392897207, "learning_rate": 8.286989982265076e-07, "loss": 0.5878, "step": 11551 }, { "epoch": 0.82, "grad_norm": 1.9989058805350945, "learning_rate": 8.280654911277397e-07, "loss": 0.5343, "step": 11552 }, { "epoch": 0.82, "grad_norm": 1.7247896784974281, "learning_rate": 8.274322044046868e-07, "loss": 0.5484, "step": 11553 }, { "epoch": 0.82, "grad_norm": 1.6367108020107815, "learning_rate": 8.267991380908052e-07, "loss": 0.5043, "step": 11554 }, { "epoch": 0.82, "grad_norm": 1.8115965964767373, "learning_rate": 8.261662922195329e-07, "loss": 0.528, "step": 11555 }, { "epoch": 0.82, "grad_norm": 2.040919127651128, "learning_rate": 8.255336668243003e-07, "loss": 0.5321, "step": 11556 }, { "epoch": 0.82, "grad_norm": 1.6971381779736305, "learning_rate": 8.249012619385238e-07, "loss": 0.5059, "step": 11557 }, { "epoch": 0.82, "grad_norm": 1.9282287982360757, "learning_rate": 8.242690775956102e-07, "loss": 0.5388, "step": 11558 }, { "epoch": 0.82, "grad_norm": 1.8168832923298792, "learning_rate": 8.236371138289545e-07, "loss": 0.5172, "step": 11559 }, { "epoch": 0.82, "grad_norm": 1.6344013476910677, "learning_rate": 8.230053706719355e-07, "loss": 0.512, "step": 11560 }, { "epoch": 0.82, "grad_norm": 1.8039543126779392, "learning_rate": 8.223738481579285e-07, "loss": 0.5693, "step": 11561 }, { "epoch": 0.82, "grad_norm": 1.5692278929487165, "learning_rate": 8.217425463202899e-07, "loss": 0.5551, "step": 11562 }, { "epoch": 0.82, "grad_norm": 1.5922385459637651, "learning_rate": 8.211114651923685e-07, "loss": 0.4918, "step": 11563 }, { "epoch": 0.82, "grad_norm": 1.5218168202771971, "learning_rate": 8.204806048074975e-07, "loss": 0.4982, "step": 11564 }, { "epoch": 0.82, "grad_norm": 1.823193271348865, "learning_rate": 8.198499651990055e-07, "loss": 0.5585, "step": 11565 }, { "epoch": 0.82, "grad_norm": 0.633560106202617, "learning_rate": 8.192195464002011e-07, "loss": 0.4236, "step": 11566 }, { "epoch": 0.82, "grad_norm": 1.8244940176775135, "learning_rate": 8.185893484443868e-07, "loss": 0.5012, "step": 11567 }, { "epoch": 0.82, "grad_norm": 2.0991864557002105, "learning_rate": 8.179593713648521e-07, "loss": 0.571, "step": 11568 }, { "epoch": 0.82, "grad_norm": 1.6806775986254028, "learning_rate": 8.173296151948734e-07, "loss": 0.5234, "step": 11569 }, { "epoch": 0.82, "grad_norm": 1.5381687093239398, "learning_rate": 8.167000799677189e-07, "loss": 0.4998, "step": 11570 }, { "epoch": 0.82, "grad_norm": 1.9017646926837435, "learning_rate": 8.160707657166389e-07, "loss": 0.4939, "step": 11571 }, { "epoch": 0.82, "grad_norm": 1.447012124211361, "learning_rate": 8.154416724748798e-07, "loss": 0.4716, "step": 11572 }, { "epoch": 0.82, "grad_norm": 2.036569453031018, "learning_rate": 8.148128002756705e-07, "loss": 0.5719, "step": 11573 }, { "epoch": 0.82, "grad_norm": 1.5365458957759495, "learning_rate": 8.141841491522296e-07, "loss": 0.5018, "step": 11574 }, { "epoch": 0.82, "grad_norm": 1.5244753900794081, "learning_rate": 8.135557191377658e-07, "loss": 0.5149, "step": 11575 }, { "epoch": 0.82, "grad_norm": 1.638326131750024, "learning_rate": 8.129275102654749e-07, "loss": 0.4747, "step": 11576 }, { "epoch": 0.82, "grad_norm": 1.558480860457365, "learning_rate": 8.122995225685415e-07, "loss": 0.5375, "step": 11577 }, { "epoch": 0.82, "grad_norm": 0.6920347985933656, "learning_rate": 8.116717560801357e-07, "loss": 0.4196, "step": 11578 }, { "epoch": 0.82, "grad_norm": 1.7716018719761366, "learning_rate": 8.110442108334199e-07, "loss": 0.5003, "step": 11579 }, { "epoch": 0.82, "grad_norm": 1.7713580366394237, "learning_rate": 8.104168868615431e-07, "loss": 0.526, "step": 11580 }, { "epoch": 0.82, "grad_norm": 1.7611812733122423, "learning_rate": 8.097897841976426e-07, "loss": 0.5163, "step": 11581 }, { "epoch": 0.82, "grad_norm": 1.4564373158717063, "learning_rate": 8.091629028748454e-07, "loss": 0.4693, "step": 11582 }, { "epoch": 0.82, "grad_norm": 1.9603695796111504, "learning_rate": 8.085362429262617e-07, "loss": 0.5753, "step": 11583 }, { "epoch": 0.82, "grad_norm": 0.7165583835320107, "learning_rate": 8.079098043849981e-07, "loss": 0.429, "step": 11584 }, { "epoch": 0.82, "grad_norm": 1.5963338897454797, "learning_rate": 8.072835872841428e-07, "loss": 0.5743, "step": 11585 }, { "epoch": 0.82, "grad_norm": 1.5930881542000306, "learning_rate": 8.066575916567754e-07, "loss": 0.4815, "step": 11586 }, { "epoch": 0.82, "grad_norm": 1.762880942878839, "learning_rate": 8.060318175359627e-07, "loss": 0.4622, "step": 11587 }, { "epoch": 0.82, "grad_norm": 1.6626947187831724, "learning_rate": 8.054062649547606e-07, "loss": 0.5603, "step": 11588 }, { "epoch": 0.82, "grad_norm": 1.7182211689187945, "learning_rate": 8.047809339462137e-07, "loss": 0.5796, "step": 11589 }, { "epoch": 0.82, "grad_norm": 1.5245330356016016, "learning_rate": 8.041558245433512e-07, "loss": 0.5426, "step": 11590 }, { "epoch": 0.82, "grad_norm": 1.6512528106432813, "learning_rate": 8.035309367791977e-07, "loss": 0.5161, "step": 11591 }, { "epoch": 0.82, "grad_norm": 1.548290467019442, "learning_rate": 8.029062706867585e-07, "loss": 0.5379, "step": 11592 }, { "epoch": 0.82, "grad_norm": 1.6895400350456444, "learning_rate": 8.022818262990329e-07, "loss": 0.5261, "step": 11593 }, { "epoch": 0.82, "grad_norm": 1.540972705674565, "learning_rate": 8.016576036490026e-07, "loss": 0.4474, "step": 11594 }, { "epoch": 0.82, "grad_norm": 1.5222165027930799, "learning_rate": 8.010336027696464e-07, "loss": 0.5043, "step": 11595 }, { "epoch": 0.82, "grad_norm": 1.9340380376426691, "learning_rate": 8.004098236939217e-07, "loss": 0.4998, "step": 11596 }, { "epoch": 0.82, "grad_norm": 1.5827584122072866, "learning_rate": 7.997862664547807e-07, "loss": 0.4416, "step": 11597 }, { "epoch": 0.82, "grad_norm": 1.533233801278073, "learning_rate": 7.991629310851612e-07, "loss": 0.5443, "step": 11598 }, { "epoch": 0.82, "grad_norm": 1.9405392941711468, "learning_rate": 7.985398176179898e-07, "loss": 0.5162, "step": 11599 }, { "epoch": 0.82, "grad_norm": 1.6719826690154074, "learning_rate": 7.979169260861835e-07, "loss": 0.531, "step": 11600 }, { "epoch": 0.82, "grad_norm": 0.6516388496123298, "learning_rate": 7.972942565226422e-07, "loss": 0.4382, "step": 11601 }, { "epoch": 0.82, "grad_norm": 6.206022581514963, "learning_rate": 7.966718089602587e-07, "loss": 0.5562, "step": 11602 }, { "epoch": 0.82, "grad_norm": 0.6926899604236465, "learning_rate": 7.960495834319132e-07, "loss": 0.432, "step": 11603 }, { "epoch": 0.82, "grad_norm": 1.6910516028015816, "learning_rate": 7.954275799704742e-07, "loss": 0.551, "step": 11604 }, { "epoch": 0.82, "grad_norm": 1.4081090889849808, "learning_rate": 7.948057986087971e-07, "loss": 0.4868, "step": 11605 }, { "epoch": 0.82, "grad_norm": 1.821448545448793, "learning_rate": 7.941842393797266e-07, "loss": 0.5431, "step": 11606 }, { "epoch": 0.82, "grad_norm": 1.5977710457350438, "learning_rate": 7.935629023160974e-07, "loss": 0.5103, "step": 11607 }, { "epoch": 0.82, "grad_norm": 2.3195857929261297, "learning_rate": 7.929417874507272e-07, "loss": 0.4837, "step": 11608 }, { "epoch": 0.82, "grad_norm": 1.7172181770928112, "learning_rate": 7.92320894816428e-07, "loss": 0.5664, "step": 11609 }, { "epoch": 0.82, "grad_norm": 1.8121893801856221, "learning_rate": 7.917002244459965e-07, "loss": 0.5004, "step": 11610 }, { "epoch": 0.82, "grad_norm": 1.8091082745591047, "learning_rate": 7.910797763722189e-07, "loss": 0.4846, "step": 11611 }, { "epoch": 0.82, "grad_norm": 0.6831671339817541, "learning_rate": 7.904595506278701e-07, "loss": 0.3926, "step": 11612 }, { "epoch": 0.82, "grad_norm": 1.5865678877004648, "learning_rate": 7.898395472457094e-07, "loss": 0.5601, "step": 11613 }, { "epoch": 0.82, "grad_norm": 7.083476568796669, "learning_rate": 7.892197662584921e-07, "loss": 0.4876, "step": 11614 }, { "epoch": 0.82, "grad_norm": 3.065362923699742, "learning_rate": 7.886002076989541e-07, "loss": 0.4968, "step": 11615 }, { "epoch": 0.82, "grad_norm": 2.134446674871776, "learning_rate": 7.879808715998233e-07, "loss": 0.5222, "step": 11616 }, { "epoch": 0.82, "grad_norm": 1.7318602253573585, "learning_rate": 7.873617579938148e-07, "loss": 0.5781, "step": 11617 }, { "epoch": 0.82, "grad_norm": 2.2824437068092203, "learning_rate": 7.867428669136324e-07, "loss": 0.554, "step": 11618 }, { "epoch": 0.82, "grad_norm": 3.76347032225722, "learning_rate": 7.861241983919699e-07, "loss": 0.4886, "step": 11619 }, { "epoch": 0.82, "grad_norm": 0.768536978835744, "learning_rate": 7.855057524615029e-07, "loss": 0.4298, "step": 11620 }, { "epoch": 0.82, "grad_norm": 1.5351437618232844, "learning_rate": 7.848875291549058e-07, "loss": 0.5141, "step": 11621 }, { "epoch": 0.82, "grad_norm": 1.9998997757821406, "learning_rate": 7.842695285048302e-07, "loss": 0.5117, "step": 11622 }, { "epoch": 0.82, "grad_norm": 1.5340231765478325, "learning_rate": 7.836517505439245e-07, "loss": 0.5177, "step": 11623 }, { "epoch": 0.82, "grad_norm": 1.5633604037210223, "learning_rate": 7.830341953048182e-07, "loss": 0.5362, "step": 11624 }, { "epoch": 0.82, "grad_norm": 1.7255333076395678, "learning_rate": 7.824168628201356e-07, "loss": 0.5331, "step": 11625 }, { "epoch": 0.83, "grad_norm": 1.730343933641218, "learning_rate": 7.817997531224864e-07, "loss": 0.5212, "step": 11626 }, { "epoch": 0.83, "grad_norm": 2.0319644153570278, "learning_rate": 7.811828662444664e-07, "loss": 0.5393, "step": 11627 }, { "epoch": 0.83, "grad_norm": 2.3741106493630455, "learning_rate": 7.805662022186633e-07, "loss": 0.5455, "step": 11628 }, { "epoch": 0.83, "grad_norm": 0.6876200671960324, "learning_rate": 7.799497610776502e-07, "loss": 0.4272, "step": 11629 }, { "epoch": 0.83, "grad_norm": 1.5761733020522175, "learning_rate": 7.793335428539917e-07, "loss": 0.5094, "step": 11630 }, { "epoch": 0.83, "grad_norm": 1.5652783913939403, "learning_rate": 7.787175475802355e-07, "loss": 0.4975, "step": 11631 }, { "epoch": 0.83, "grad_norm": 1.75350994889242, "learning_rate": 7.78101775288922e-07, "loss": 0.4947, "step": 11632 }, { "epoch": 0.83, "grad_norm": 1.911594606584196, "learning_rate": 7.774862260125787e-07, "loss": 0.5336, "step": 11633 }, { "epoch": 0.83, "grad_norm": 2.4189484961837233, "learning_rate": 7.76870899783721e-07, "loss": 0.4802, "step": 11634 }, { "epoch": 0.83, "grad_norm": 1.7074099623664252, "learning_rate": 7.762557966348522e-07, "loss": 0.5423, "step": 11635 }, { "epoch": 0.83, "grad_norm": 2.9068816906762174, "learning_rate": 7.75640916598464e-07, "loss": 0.4737, "step": 11636 }, { "epoch": 0.83, "grad_norm": 1.7653720106601511, "learning_rate": 7.750262597070379e-07, "loss": 0.5738, "step": 11637 }, { "epoch": 0.83, "grad_norm": 1.599249729705426, "learning_rate": 7.7441182599304e-07, "loss": 0.4909, "step": 11638 }, { "epoch": 0.83, "grad_norm": 1.712011193510669, "learning_rate": 7.73797615488927e-07, "loss": 0.5507, "step": 11639 }, { "epoch": 0.83, "grad_norm": 1.9836283556759475, "learning_rate": 7.731836282271449e-07, "loss": 0.6292, "step": 11640 }, { "epoch": 0.83, "grad_norm": 0.6742657937245757, "learning_rate": 7.725698642401258e-07, "loss": 0.4303, "step": 11641 }, { "epoch": 0.83, "grad_norm": 1.8265637047221108, "learning_rate": 7.719563235602922e-07, "loss": 0.5418, "step": 11642 }, { "epoch": 0.83, "grad_norm": 1.9144035754325404, "learning_rate": 7.7134300622005e-07, "loss": 0.5406, "step": 11643 }, { "epoch": 0.83, "grad_norm": 2.396229602480046, "learning_rate": 7.707299122518008e-07, "loss": 0.5448, "step": 11644 }, { "epoch": 0.83, "grad_norm": 2.203802831481613, "learning_rate": 7.701170416879273e-07, "loss": 0.4956, "step": 11645 }, { "epoch": 0.83, "grad_norm": 1.7071202008390076, "learning_rate": 7.695043945608049e-07, "loss": 0.4935, "step": 11646 }, { "epoch": 0.83, "grad_norm": 1.579683455180068, "learning_rate": 7.688919709027948e-07, "loss": 0.5138, "step": 11647 }, { "epoch": 0.83, "grad_norm": 2.2847832749340427, "learning_rate": 7.682797707462475e-07, "loss": 0.5756, "step": 11648 }, { "epoch": 0.83, "grad_norm": 1.6810715770467088, "learning_rate": 7.676677941235034e-07, "loss": 0.4561, "step": 11649 }, { "epoch": 0.83, "grad_norm": 1.646865140092308, "learning_rate": 7.670560410668865e-07, "loss": 0.5434, "step": 11650 }, { "epoch": 0.83, "grad_norm": 2.040736753589956, "learning_rate": 7.664445116087122e-07, "loss": 0.5095, "step": 11651 }, { "epoch": 0.83, "grad_norm": 1.7544003330955769, "learning_rate": 7.658332057812839e-07, "loss": 0.4945, "step": 11652 }, { "epoch": 0.83, "grad_norm": 1.5459964850226318, "learning_rate": 7.652221236168933e-07, "loss": 0.4803, "step": 11653 }, { "epoch": 0.83, "grad_norm": 1.866088022518965, "learning_rate": 7.646112651478199e-07, "loss": 0.5038, "step": 11654 }, { "epoch": 0.83, "grad_norm": 3.7668379754432606, "learning_rate": 7.640006304063302e-07, "loss": 0.5736, "step": 11655 }, { "epoch": 0.83, "grad_norm": 1.4672347136553276, "learning_rate": 7.633902194246823e-07, "loss": 0.5311, "step": 11656 }, { "epoch": 0.83, "grad_norm": 1.7503436442404559, "learning_rate": 7.627800322351175e-07, "loss": 0.5341, "step": 11657 }, { "epoch": 0.83, "grad_norm": 1.46854657094346, "learning_rate": 7.621700688698691e-07, "loss": 0.4446, "step": 11658 }, { "epoch": 0.83, "grad_norm": 1.6209562288034756, "learning_rate": 7.615603293611568e-07, "loss": 0.5375, "step": 11659 }, { "epoch": 0.83, "grad_norm": 1.6191683804996029, "learning_rate": 7.609508137411914e-07, "loss": 0.4721, "step": 11660 }, { "epoch": 0.83, "grad_norm": 2.3084684252873737, "learning_rate": 7.603415220421667e-07, "loss": 0.5152, "step": 11661 }, { "epoch": 0.83, "grad_norm": 1.688203648679798, "learning_rate": 7.597324542962676e-07, "loss": 0.4817, "step": 11662 }, { "epoch": 0.83, "grad_norm": 1.6534277400131838, "learning_rate": 7.591236105356703e-07, "loss": 0.5285, "step": 11663 }, { "epoch": 0.83, "grad_norm": 2.1415265862650403, "learning_rate": 7.585149907925327e-07, "loss": 0.5078, "step": 11664 }, { "epoch": 0.83, "grad_norm": 0.6438326529136835, "learning_rate": 7.579065950990072e-07, "loss": 0.3849, "step": 11665 }, { "epoch": 0.83, "grad_norm": 1.8160514703506496, "learning_rate": 7.572984234872266e-07, "loss": 0.4987, "step": 11666 }, { "epoch": 0.83, "grad_norm": 1.9876701666238215, "learning_rate": 7.566904759893217e-07, "loss": 0.574, "step": 11667 }, { "epoch": 0.83, "grad_norm": 1.5309174226186526, "learning_rate": 7.560827526374037e-07, "loss": 0.5258, "step": 11668 }, { "epoch": 0.83, "grad_norm": 1.5382654283677692, "learning_rate": 7.554752534635745e-07, "loss": 0.4683, "step": 11669 }, { "epoch": 0.83, "grad_norm": 0.7456717723821226, "learning_rate": 7.548679784999253e-07, "loss": 0.4317, "step": 11670 }, { "epoch": 0.83, "grad_norm": 1.85388900257951, "learning_rate": 7.542609277785335e-07, "loss": 0.563, "step": 11671 }, { "epoch": 0.83, "grad_norm": 0.6245303467254607, "learning_rate": 7.536541013314669e-07, "loss": 0.4372, "step": 11672 }, { "epoch": 0.83, "grad_norm": 1.5765350339689714, "learning_rate": 7.530474991907777e-07, "loss": 0.5391, "step": 11673 }, { "epoch": 0.83, "grad_norm": 1.5800575851806558, "learning_rate": 7.524411213885119e-07, "loss": 0.5124, "step": 11674 }, { "epoch": 0.83, "grad_norm": 1.5159862773659922, "learning_rate": 7.518349679566977e-07, "loss": 0.5266, "step": 11675 }, { "epoch": 0.83, "grad_norm": 1.7538921323799161, "learning_rate": 7.512290389273558e-07, "loss": 0.5369, "step": 11676 }, { "epoch": 0.83, "grad_norm": 1.7768138885011255, "learning_rate": 7.506233343324925e-07, "loss": 0.5111, "step": 11677 }, { "epoch": 0.83, "grad_norm": 1.7931886403188386, "learning_rate": 7.500178542041037e-07, "loss": 0.526, "step": 11678 }, { "epoch": 0.83, "grad_norm": 2.4218894899436174, "learning_rate": 7.494125985741735e-07, "loss": 0.513, "step": 11679 }, { "epoch": 0.83, "grad_norm": 1.5639437351817747, "learning_rate": 7.48807567474672e-07, "loss": 0.5349, "step": 11680 }, { "epoch": 0.83, "grad_norm": 2.3311878373056234, "learning_rate": 7.482027609375597e-07, "loss": 0.5169, "step": 11681 }, { "epoch": 0.83, "grad_norm": 1.6852592687408217, "learning_rate": 7.475981789947845e-07, "loss": 0.5484, "step": 11682 }, { "epoch": 0.83, "grad_norm": 2.1266636588728067, "learning_rate": 7.46993821678283e-07, "loss": 0.5882, "step": 11683 }, { "epoch": 0.83, "grad_norm": 2.5396118765331153, "learning_rate": 7.463896890199785e-07, "loss": 0.5134, "step": 11684 }, { "epoch": 0.83, "grad_norm": 1.4796675396505603, "learning_rate": 7.457857810517838e-07, "loss": 0.4769, "step": 11685 }, { "epoch": 0.83, "grad_norm": 0.6278306783613304, "learning_rate": 7.451820978056007e-07, "loss": 0.4323, "step": 11686 }, { "epoch": 0.83, "grad_norm": 1.9784204780731713, "learning_rate": 7.445786393133153e-07, "loss": 0.5458, "step": 11687 }, { "epoch": 0.83, "grad_norm": 1.7141498533420882, "learning_rate": 7.439754056068054e-07, "loss": 0.5525, "step": 11688 }, { "epoch": 0.83, "grad_norm": 1.582235601773808, "learning_rate": 7.433723967179363e-07, "loss": 0.4731, "step": 11689 }, { "epoch": 0.83, "grad_norm": 1.2928042213722004, "learning_rate": 7.427696126785605e-07, "loss": 0.4159, "step": 11690 }, { "epoch": 0.83, "grad_norm": 1.8678097834504697, "learning_rate": 7.421670535205206e-07, "loss": 0.5181, "step": 11691 }, { "epoch": 0.83, "grad_norm": 0.7249687656500796, "learning_rate": 7.41564719275642e-07, "loss": 0.4289, "step": 11692 }, { "epoch": 0.83, "grad_norm": 1.748509234238804, "learning_rate": 7.409626099757467e-07, "loss": 0.5795, "step": 11693 }, { "epoch": 0.83, "grad_norm": 1.7393461391644058, "learning_rate": 7.403607256526363e-07, "loss": 0.5287, "step": 11694 }, { "epoch": 0.83, "grad_norm": 0.7861816181915153, "learning_rate": 7.397590663381077e-07, "loss": 0.4058, "step": 11695 }, { "epoch": 0.83, "grad_norm": 1.7305382316717044, "learning_rate": 7.391576320639388e-07, "loss": 0.5865, "step": 11696 }, { "epoch": 0.83, "grad_norm": 1.805045647217426, "learning_rate": 7.385564228619029e-07, "loss": 0.5594, "step": 11697 }, { "epoch": 0.83, "grad_norm": 1.6263114811584123, "learning_rate": 7.37955438763756e-07, "loss": 0.5061, "step": 11698 }, { "epoch": 0.83, "grad_norm": 1.8189516494093805, "learning_rate": 7.373546798012448e-07, "loss": 0.5522, "step": 11699 }, { "epoch": 0.83, "grad_norm": 1.7546626891983064, "learning_rate": 7.367541460061029e-07, "loss": 0.5443, "step": 11700 }, { "epoch": 0.83, "grad_norm": 1.648723629318573, "learning_rate": 7.36153837410053e-07, "loss": 0.529, "step": 11701 }, { "epoch": 0.83, "grad_norm": 1.7434630171949477, "learning_rate": 7.35553754044806e-07, "loss": 0.4755, "step": 11702 }, { "epoch": 0.83, "grad_norm": 1.6310135978226974, "learning_rate": 7.349538959420577e-07, "loss": 0.5216, "step": 11703 }, { "epoch": 0.83, "grad_norm": 1.6369144801150737, "learning_rate": 7.343542631334988e-07, "loss": 0.5214, "step": 11704 }, { "epoch": 0.83, "grad_norm": 1.9427740604092802, "learning_rate": 7.337548556508006e-07, "loss": 0.5765, "step": 11705 }, { "epoch": 0.83, "grad_norm": 2.263104623753018, "learning_rate": 7.331556735256268e-07, "loss": 0.4668, "step": 11706 }, { "epoch": 0.83, "grad_norm": 1.5766192705065845, "learning_rate": 7.325567167896286e-07, "loss": 0.5113, "step": 11707 }, { "epoch": 0.83, "grad_norm": 1.6619438482655509, "learning_rate": 7.319579854744446e-07, "loss": 0.5174, "step": 11708 }, { "epoch": 0.83, "grad_norm": 1.6401420964110671, "learning_rate": 7.313594796117029e-07, "loss": 0.5086, "step": 11709 }, { "epoch": 0.83, "grad_norm": 1.5666994335476736, "learning_rate": 7.307611992330166e-07, "loss": 0.53, "step": 11710 }, { "epoch": 0.83, "grad_norm": 2.1466156573936104, "learning_rate": 7.301631443699896e-07, "loss": 0.6009, "step": 11711 }, { "epoch": 0.83, "grad_norm": 0.6810794632928244, "learning_rate": 7.295653150542143e-07, "loss": 0.396, "step": 11712 }, { "epoch": 0.83, "grad_norm": 3.8436543582411202, "learning_rate": 7.289677113172683e-07, "loss": 0.5026, "step": 11713 }, { "epoch": 0.83, "grad_norm": 2.0161864306361528, "learning_rate": 7.283703331907216e-07, "loss": 0.5017, "step": 11714 }, { "epoch": 0.83, "grad_norm": 0.63188129273209, "learning_rate": 7.277731807061261e-07, "loss": 0.3829, "step": 11715 }, { "epoch": 0.83, "grad_norm": 1.7288999046786706, "learning_rate": 7.271762538950295e-07, "loss": 0.5219, "step": 11716 }, { "epoch": 0.83, "grad_norm": 1.7465654423386443, "learning_rate": 7.265795527889602e-07, "loss": 0.5216, "step": 11717 }, { "epoch": 0.83, "grad_norm": 1.7742502570340188, "learning_rate": 7.259830774194398e-07, "loss": 0.5093, "step": 11718 }, { "epoch": 0.83, "grad_norm": 0.7567646762093673, "learning_rate": 7.253868278179749e-07, "loss": 0.4389, "step": 11719 }, { "epoch": 0.83, "grad_norm": 1.5092947792239753, "learning_rate": 7.247908040160628e-07, "loss": 0.4857, "step": 11720 }, { "epoch": 0.83, "grad_norm": 1.7884496890540516, "learning_rate": 7.241950060451875e-07, "loss": 0.5178, "step": 11721 }, { "epoch": 0.83, "grad_norm": 2.2801036048891934, "learning_rate": 7.23599433936818e-07, "loss": 0.6031, "step": 11722 }, { "epoch": 0.83, "grad_norm": 2.0850986836143712, "learning_rate": 7.230040877224193e-07, "loss": 0.508, "step": 11723 }, { "epoch": 0.83, "grad_norm": 1.6045627405522787, "learning_rate": 7.224089674334362e-07, "loss": 0.5198, "step": 11724 }, { "epoch": 0.83, "grad_norm": 2.2923550449683745, "learning_rate": 7.218140731013068e-07, "loss": 0.5166, "step": 11725 }, { "epoch": 0.83, "grad_norm": 1.4327126725620658, "learning_rate": 7.212194047574522e-07, "loss": 0.4933, "step": 11726 }, { "epoch": 0.83, "grad_norm": 1.6047979555657583, "learning_rate": 7.206249624332895e-07, "loss": 0.4968, "step": 11727 }, { "epoch": 0.83, "grad_norm": 1.9583177220634218, "learning_rate": 7.200307461602158e-07, "loss": 0.5262, "step": 11728 }, { "epoch": 0.83, "grad_norm": 1.9930174277230923, "learning_rate": 7.194367559696203e-07, "loss": 0.4913, "step": 11729 }, { "epoch": 0.83, "grad_norm": 1.8196360526875268, "learning_rate": 7.188429918928802e-07, "loss": 0.6062, "step": 11730 }, { "epoch": 0.83, "grad_norm": 1.5371334674007648, "learning_rate": 7.182494539613594e-07, "loss": 0.5098, "step": 11731 }, { "epoch": 0.83, "grad_norm": 1.6414438411408285, "learning_rate": 7.176561422064127e-07, "loss": 0.5391, "step": 11732 }, { "epoch": 0.83, "grad_norm": 2.0270038142229927, "learning_rate": 7.170630566593762e-07, "loss": 0.598, "step": 11733 }, { "epoch": 0.83, "grad_norm": 2.048513675185958, "learning_rate": 7.164701973515841e-07, "loss": 0.468, "step": 11734 }, { "epoch": 0.83, "grad_norm": 1.887991490611884, "learning_rate": 7.158775643143501e-07, "loss": 0.5108, "step": 11735 }, { "epoch": 0.83, "grad_norm": 1.6540049498822724, "learning_rate": 7.152851575789793e-07, "loss": 0.5183, "step": 11736 }, { "epoch": 0.83, "grad_norm": 0.7176338322690999, "learning_rate": 7.146929771767647e-07, "loss": 0.4188, "step": 11737 }, { "epoch": 0.83, "grad_norm": 1.6813935219342995, "learning_rate": 7.141010231389883e-07, "loss": 0.5576, "step": 11738 }, { "epoch": 0.83, "grad_norm": 1.769808455240415, "learning_rate": 7.135092954969192e-07, "loss": 0.5327, "step": 11739 }, { "epoch": 0.83, "grad_norm": 1.5875555614296115, "learning_rate": 7.129177942818128e-07, "loss": 0.5113, "step": 11740 }, { "epoch": 0.83, "grad_norm": 1.6989571675350958, "learning_rate": 7.123265195249152e-07, "loss": 0.5461, "step": 11741 }, { "epoch": 0.83, "grad_norm": 2.0254744464058625, "learning_rate": 7.117354712574592e-07, "loss": 0.5423, "step": 11742 }, { "epoch": 0.83, "grad_norm": 5.411387811545026, "learning_rate": 7.111446495106667e-07, "loss": 0.4646, "step": 11743 }, { "epoch": 0.83, "grad_norm": 1.5942877032876244, "learning_rate": 7.105540543157474e-07, "loss": 0.5043, "step": 11744 }, { "epoch": 0.83, "grad_norm": 1.942628897669263, "learning_rate": 7.099636857038949e-07, "loss": 0.5779, "step": 11745 }, { "epoch": 0.83, "grad_norm": 1.7365427836767406, "learning_rate": 7.093735437062999e-07, "loss": 0.5151, "step": 11746 }, { "epoch": 0.83, "grad_norm": 1.8247248391417072, "learning_rate": 7.087836283541317e-07, "loss": 0.5827, "step": 11747 }, { "epoch": 0.83, "grad_norm": 1.7062673841761788, "learning_rate": 7.081939396785526e-07, "loss": 0.4796, "step": 11748 }, { "epoch": 0.83, "grad_norm": 1.5678610202714978, "learning_rate": 7.076044777107122e-07, "loss": 0.5373, "step": 11749 }, { "epoch": 0.83, "grad_norm": 0.7739580235023502, "learning_rate": 7.070152424817484e-07, "loss": 0.4308, "step": 11750 }, { "epoch": 0.83, "grad_norm": 1.625979876412168, "learning_rate": 7.064262340227868e-07, "loss": 0.538, "step": 11751 }, { "epoch": 0.83, "grad_norm": 1.8792085023529423, "learning_rate": 7.058374523649381e-07, "loss": 0.544, "step": 11752 }, { "epoch": 0.83, "grad_norm": 1.6038628638541297, "learning_rate": 7.05248897539308e-07, "loss": 0.4823, "step": 11753 }, { "epoch": 0.83, "grad_norm": 1.9262273277634299, "learning_rate": 7.046605695769825e-07, "loss": 0.5916, "step": 11754 }, { "epoch": 0.83, "grad_norm": 2.1283190651172297, "learning_rate": 7.040724685090416e-07, "loss": 0.5249, "step": 11755 }, { "epoch": 0.83, "grad_norm": 0.7559554985882286, "learning_rate": 7.034845943665469e-07, "loss": 0.4194, "step": 11756 }, { "epoch": 0.83, "grad_norm": 1.892010964689599, "learning_rate": 7.028969471805563e-07, "loss": 0.5329, "step": 11757 }, { "epoch": 0.83, "grad_norm": 5.097428510997845, "learning_rate": 7.023095269821101e-07, "loss": 0.4895, "step": 11758 }, { "epoch": 0.83, "grad_norm": 4.320470950834997, "learning_rate": 7.017223338022361e-07, "loss": 0.5046, "step": 11759 }, { "epoch": 0.83, "grad_norm": 1.9057888539607624, "learning_rate": 7.011353676719535e-07, "loss": 0.5013, "step": 11760 }, { "epoch": 0.83, "grad_norm": 1.831383180857901, "learning_rate": 7.005486286222668e-07, "loss": 0.5137, "step": 11761 }, { "epoch": 0.83, "grad_norm": 1.6757386208362623, "learning_rate": 6.999621166841714e-07, "loss": 0.5172, "step": 11762 }, { "epoch": 0.83, "grad_norm": 1.7101491615035325, "learning_rate": 6.993758318886452e-07, "loss": 0.5197, "step": 11763 }, { "epoch": 0.83, "grad_norm": 2.106896453289844, "learning_rate": 6.987897742666621e-07, "loss": 0.5175, "step": 11764 }, { "epoch": 0.83, "grad_norm": 1.8713826789143162, "learning_rate": 6.982039438491766e-07, "loss": 0.536, "step": 11765 }, { "epoch": 0.83, "grad_norm": 1.401630796686222, "learning_rate": 6.976183406671355e-07, "loss": 0.4492, "step": 11766 }, { "epoch": 0.84, "grad_norm": 2.1680493064548454, "learning_rate": 6.970329647514723e-07, "loss": 0.5305, "step": 11767 }, { "epoch": 0.84, "grad_norm": 1.6551899334210425, "learning_rate": 6.964478161331079e-07, "loss": 0.4877, "step": 11768 }, { "epoch": 0.84, "grad_norm": 2.6988076549411204, "learning_rate": 6.958628948429536e-07, "loss": 0.5393, "step": 11769 }, { "epoch": 0.84, "grad_norm": 2.095467595918868, "learning_rate": 6.952782009119041e-07, "loss": 0.5247, "step": 11770 }, { "epoch": 0.84, "grad_norm": 1.5297646366140598, "learning_rate": 6.94693734370847e-07, "loss": 0.4644, "step": 11771 }, { "epoch": 0.84, "grad_norm": 1.819367966899226, "learning_rate": 6.941094952506549e-07, "loss": 0.5606, "step": 11772 }, { "epoch": 0.84, "grad_norm": 1.7373888065032759, "learning_rate": 6.935254835821897e-07, "loss": 0.4914, "step": 11773 }, { "epoch": 0.84, "grad_norm": 1.763793275290126, "learning_rate": 6.929416993963018e-07, "loss": 0.4764, "step": 11774 }, { "epoch": 0.84, "grad_norm": 1.7552752126809743, "learning_rate": 6.923581427238258e-07, "loss": 0.5493, "step": 11775 }, { "epoch": 0.84, "grad_norm": 1.4279978045574078, "learning_rate": 6.917748135955904e-07, "loss": 0.5151, "step": 11776 }, { "epoch": 0.84, "grad_norm": 1.6198639900622387, "learning_rate": 6.911917120424072e-07, "loss": 0.5165, "step": 11777 }, { "epoch": 0.84, "grad_norm": 3.8366133144857177, "learning_rate": 6.906088380950776e-07, "loss": 0.477, "step": 11778 }, { "epoch": 0.84, "grad_norm": 1.7082065925953187, "learning_rate": 6.900261917843915e-07, "loss": 0.5018, "step": 11779 }, { "epoch": 0.84, "grad_norm": 1.943017859534004, "learning_rate": 6.894437731411263e-07, "loss": 0.5038, "step": 11780 }, { "epoch": 0.84, "grad_norm": 2.475951821202235, "learning_rate": 6.888615821960482e-07, "loss": 0.5211, "step": 11781 }, { "epoch": 0.84, "grad_norm": 1.7970312721669137, "learning_rate": 6.882796189799073e-07, "loss": 0.5562, "step": 11782 }, { "epoch": 0.84, "grad_norm": 1.5486480404484708, "learning_rate": 6.876978835234494e-07, "loss": 0.4806, "step": 11783 }, { "epoch": 0.84, "grad_norm": 1.9950435751345448, "learning_rate": 6.871163758573995e-07, "loss": 0.4857, "step": 11784 }, { "epoch": 0.84, "grad_norm": 0.7082899741949696, "learning_rate": 6.865350960124773e-07, "loss": 0.4111, "step": 11785 }, { "epoch": 0.84, "grad_norm": 1.6036864199778271, "learning_rate": 6.859540440193873e-07, "loss": 0.4915, "step": 11786 }, { "epoch": 0.84, "grad_norm": 1.6786206978896119, "learning_rate": 6.853732199088226e-07, "loss": 0.541, "step": 11787 }, { "epoch": 0.84, "grad_norm": 1.4242563925928984, "learning_rate": 6.847926237114654e-07, "loss": 0.5074, "step": 11788 }, { "epoch": 0.84, "grad_norm": 1.8207592999978541, "learning_rate": 6.842122554579827e-07, "loss": 0.4054, "step": 11789 }, { "epoch": 0.84, "grad_norm": 2.3605272984539556, "learning_rate": 6.836321151790326e-07, "loss": 0.5551, "step": 11790 }, { "epoch": 0.84, "grad_norm": 0.7493659316116227, "learning_rate": 6.8305220290526e-07, "loss": 0.4089, "step": 11791 }, { "epoch": 0.84, "grad_norm": 0.6533537816039451, "learning_rate": 6.824725186672992e-07, "loss": 0.4014, "step": 11792 }, { "epoch": 0.84, "grad_norm": 1.769666273987326, "learning_rate": 6.818930624957682e-07, "loss": 0.532, "step": 11793 }, { "epoch": 0.84, "grad_norm": 2.063951855360864, "learning_rate": 6.813138344212766e-07, "loss": 0.5001, "step": 11794 }, { "epoch": 0.84, "grad_norm": 2.720824439969221, "learning_rate": 6.807348344744242e-07, "loss": 0.5943, "step": 11795 }, { "epoch": 0.84, "grad_norm": 1.531909646954632, "learning_rate": 6.801560626857922e-07, "loss": 0.4388, "step": 11796 }, { "epoch": 0.84, "grad_norm": 1.544663812329272, "learning_rate": 6.795775190859544e-07, "loss": 0.5118, "step": 11797 }, { "epoch": 0.84, "grad_norm": 1.7999083217658174, "learning_rate": 6.789992037054721e-07, "loss": 0.5675, "step": 11798 }, { "epoch": 0.84, "grad_norm": 1.5653751865554992, "learning_rate": 6.784211165748944e-07, "loss": 0.4742, "step": 11799 }, { "epoch": 0.84, "grad_norm": 1.6853735124624345, "learning_rate": 6.778432577247557e-07, "loss": 0.547, "step": 11800 }, { "epoch": 0.84, "grad_norm": 1.8679263583925094, "learning_rate": 6.772656271855815e-07, "loss": 0.483, "step": 11801 }, { "epoch": 0.84, "grad_norm": 0.7413442933179937, "learning_rate": 6.766882249878842e-07, "loss": 0.4582, "step": 11802 }, { "epoch": 0.84, "grad_norm": 1.5829422641563322, "learning_rate": 6.761110511621644e-07, "loss": 0.5382, "step": 11803 }, { "epoch": 0.84, "grad_norm": 1.7411922036799057, "learning_rate": 6.755341057389109e-07, "loss": 0.5112, "step": 11804 }, { "epoch": 0.84, "grad_norm": 1.8442264120935636, "learning_rate": 6.749573887485978e-07, "loss": 0.5143, "step": 11805 }, { "epoch": 0.84, "grad_norm": 1.5937919382873207, "learning_rate": 6.743809002216923e-07, "loss": 0.5729, "step": 11806 }, { "epoch": 0.84, "grad_norm": 1.9332930127727608, "learning_rate": 6.738046401886439e-07, "loss": 0.5167, "step": 11807 }, { "epoch": 0.84, "grad_norm": 2.0476774449399744, "learning_rate": 6.732286086798934e-07, "loss": 0.5583, "step": 11808 }, { "epoch": 0.84, "grad_norm": 4.657767145257553, "learning_rate": 6.726528057258691e-07, "loss": 0.5648, "step": 11809 }, { "epoch": 0.84, "grad_norm": 1.4810884138377158, "learning_rate": 6.720772313569868e-07, "loss": 0.4509, "step": 11810 }, { "epoch": 0.84, "grad_norm": 1.8765133058832177, "learning_rate": 6.71501885603651e-07, "loss": 0.4816, "step": 11811 }, { "epoch": 0.84, "grad_norm": 1.8296442511351512, "learning_rate": 6.709267684962506e-07, "loss": 0.5304, "step": 11812 }, { "epoch": 0.84, "grad_norm": 1.7863413591773318, "learning_rate": 6.703518800651692e-07, "loss": 0.5904, "step": 11813 }, { "epoch": 0.84, "grad_norm": 1.7746467775639743, "learning_rate": 6.69777220340771e-07, "loss": 0.5076, "step": 11814 }, { "epoch": 0.84, "grad_norm": 2.536070109182169, "learning_rate": 6.692027893534131e-07, "loss": 0.5117, "step": 11815 }, { "epoch": 0.84, "grad_norm": 1.7995584359711236, "learning_rate": 6.686285871334386e-07, "loss": 0.5692, "step": 11816 }, { "epoch": 0.84, "grad_norm": 0.68664583102823, "learning_rate": 6.680546137111787e-07, "loss": 0.4137, "step": 11817 }, { "epoch": 0.84, "grad_norm": 1.6183396077534355, "learning_rate": 6.674808691169543e-07, "loss": 0.5601, "step": 11818 }, { "epoch": 0.84, "grad_norm": 1.9088050736116609, "learning_rate": 6.669073533810693e-07, "loss": 0.5101, "step": 11819 }, { "epoch": 0.84, "grad_norm": 1.6324730051985417, "learning_rate": 6.663340665338208e-07, "loss": 0.4597, "step": 11820 }, { "epoch": 0.84, "grad_norm": 1.8164582767815105, "learning_rate": 6.657610086054911e-07, "loss": 0.5746, "step": 11821 }, { "epoch": 0.84, "grad_norm": 0.6580095526866813, "learning_rate": 6.651881796263515e-07, "loss": 0.3958, "step": 11822 }, { "epoch": 0.84, "grad_norm": 1.7101971635891393, "learning_rate": 6.646155796266618e-07, "loss": 0.5541, "step": 11823 }, { "epoch": 0.84, "grad_norm": 0.730223468332458, "learning_rate": 6.64043208636665e-07, "loss": 0.4331, "step": 11824 }, { "epoch": 0.84, "grad_norm": 1.5920837223212028, "learning_rate": 6.634710666866001e-07, "loss": 0.5502, "step": 11825 }, { "epoch": 0.84, "grad_norm": 2.0847140154454378, "learning_rate": 6.62899153806687e-07, "loss": 0.4955, "step": 11826 }, { "epoch": 0.84, "grad_norm": 1.8709589804553248, "learning_rate": 6.623274700271376e-07, "loss": 0.4782, "step": 11827 }, { "epoch": 0.84, "grad_norm": 1.5351706184914884, "learning_rate": 6.617560153781471e-07, "loss": 0.5164, "step": 11828 }, { "epoch": 0.84, "grad_norm": 1.8726129105132676, "learning_rate": 6.611847898899059e-07, "loss": 0.5047, "step": 11829 }, { "epoch": 0.84, "grad_norm": 2.8529257548143003, "learning_rate": 6.606137935925854e-07, "loss": 0.5403, "step": 11830 }, { "epoch": 0.84, "grad_norm": 1.7395416207803651, "learning_rate": 6.600430265163476e-07, "loss": 0.505, "step": 11831 }, { "epoch": 0.84, "grad_norm": 1.7575780476355187, "learning_rate": 6.594724886913434e-07, "loss": 0.5632, "step": 11832 }, { "epoch": 0.84, "grad_norm": 1.6329065116169648, "learning_rate": 6.589021801477097e-07, "loss": 0.5447, "step": 11833 }, { "epoch": 0.84, "grad_norm": 1.7818750994843486, "learning_rate": 6.583321009155741e-07, "loss": 0.5088, "step": 11834 }, { "epoch": 0.84, "grad_norm": 1.6584674767098533, "learning_rate": 6.577622510250459e-07, "loss": 0.5509, "step": 11835 }, { "epoch": 0.84, "grad_norm": 1.604826325853938, "learning_rate": 6.571926305062314e-07, "loss": 0.5138, "step": 11836 }, { "epoch": 0.84, "grad_norm": 1.5207163780762065, "learning_rate": 6.566232393892163e-07, "loss": 0.481, "step": 11837 }, { "epoch": 0.84, "grad_norm": 0.6034200038016776, "learning_rate": 6.560540777040791e-07, "loss": 0.4081, "step": 11838 }, { "epoch": 0.84, "grad_norm": 1.9661035325886336, "learning_rate": 6.554851454808853e-07, "loss": 0.505, "step": 11839 }, { "epoch": 0.84, "grad_norm": 1.598767838178624, "learning_rate": 6.549164427496868e-07, "loss": 0.452, "step": 11840 }, { "epoch": 0.84, "grad_norm": 1.6332809514699482, "learning_rate": 6.543479695405264e-07, "loss": 0.512, "step": 11841 }, { "epoch": 0.84, "grad_norm": 1.8424738837900463, "learning_rate": 6.5377972588343e-07, "loss": 0.5637, "step": 11842 }, { "epoch": 0.84, "grad_norm": 2.1820938830766266, "learning_rate": 6.532117118084152e-07, "loss": 0.4679, "step": 11843 }, { "epoch": 0.84, "grad_norm": 2.0999802046232223, "learning_rate": 6.526439273454871e-07, "loss": 0.4478, "step": 11844 }, { "epoch": 0.84, "grad_norm": 2.3500281042325892, "learning_rate": 6.520763725246376e-07, "loss": 0.5401, "step": 11845 }, { "epoch": 0.84, "grad_norm": 0.6966234289956624, "learning_rate": 6.515090473758468e-07, "loss": 0.4264, "step": 11846 }, { "epoch": 0.84, "grad_norm": 2.1480606844986663, "learning_rate": 6.509419519290832e-07, "loss": 0.5295, "step": 11847 }, { "epoch": 0.84, "grad_norm": 1.4980252847211433, "learning_rate": 6.503750862143027e-07, "loss": 0.5197, "step": 11848 }, { "epoch": 0.84, "grad_norm": 1.8123147851797627, "learning_rate": 6.498084502614482e-07, "loss": 0.4911, "step": 11849 }, { "epoch": 0.84, "grad_norm": 1.5384462825794913, "learning_rate": 6.492420441004516e-07, "loss": 0.4769, "step": 11850 }, { "epoch": 0.84, "grad_norm": 1.6117337797167215, "learning_rate": 6.486758677612326e-07, "loss": 0.4278, "step": 11851 }, { "epoch": 0.84, "grad_norm": 1.6767912263132492, "learning_rate": 6.481099212736986e-07, "loss": 0.4665, "step": 11852 }, { "epoch": 0.84, "grad_norm": 1.6964713794386312, "learning_rate": 6.475442046677455e-07, "loss": 0.5916, "step": 11853 }, { "epoch": 0.84, "grad_norm": 0.7185875338736285, "learning_rate": 6.46978717973254e-07, "loss": 0.4387, "step": 11854 }, { "epoch": 0.84, "grad_norm": 1.4792906721573698, "learning_rate": 6.464134612200984e-07, "loss": 0.4902, "step": 11855 }, { "epoch": 0.84, "grad_norm": 1.8401960606002405, "learning_rate": 6.45848434438135e-07, "loss": 0.5083, "step": 11856 }, { "epoch": 0.84, "grad_norm": 1.786115361437105, "learning_rate": 6.452836376572125e-07, "loss": 0.5482, "step": 11857 }, { "epoch": 0.84, "grad_norm": 2.8057932812776394, "learning_rate": 6.44719070907161e-07, "loss": 0.5535, "step": 11858 }, { "epoch": 0.84, "grad_norm": 1.5058202220882033, "learning_rate": 6.441547342178089e-07, "loss": 0.5292, "step": 11859 }, { "epoch": 0.84, "grad_norm": 1.5140274210519358, "learning_rate": 6.435906276189619e-07, "loss": 0.4174, "step": 11860 }, { "epoch": 0.84, "grad_norm": 1.9255255357109917, "learning_rate": 6.430267511404187e-07, "loss": 0.4611, "step": 11861 }, { "epoch": 0.84, "grad_norm": 1.6891356915749074, "learning_rate": 6.424631048119679e-07, "loss": 0.4916, "step": 11862 }, { "epoch": 0.84, "grad_norm": 1.7677709698256114, "learning_rate": 6.418996886633799e-07, "loss": 0.5147, "step": 11863 }, { "epoch": 0.84, "grad_norm": 1.5412107486755593, "learning_rate": 6.41336502724419e-07, "loss": 0.4995, "step": 11864 }, { "epoch": 0.84, "grad_norm": 2.722550127633212, "learning_rate": 6.407735470248305e-07, "loss": 0.5626, "step": 11865 }, { "epoch": 0.84, "grad_norm": 1.857630425647706, "learning_rate": 6.40210821594357e-07, "loss": 0.5029, "step": 11866 }, { "epoch": 0.84, "grad_norm": 1.758628626374555, "learning_rate": 6.396483264627202e-07, "loss": 0.527, "step": 11867 }, { "epoch": 0.84, "grad_norm": 1.8544266035428565, "learning_rate": 6.390860616596328e-07, "loss": 0.4962, "step": 11868 }, { "epoch": 0.84, "grad_norm": 1.5501200208790278, "learning_rate": 6.385240272147969e-07, "loss": 0.5528, "step": 11869 }, { "epoch": 0.84, "grad_norm": 5.362444374529221, "learning_rate": 6.379622231579008e-07, "loss": 0.5197, "step": 11870 }, { "epoch": 0.84, "grad_norm": 2.0678746217594144, "learning_rate": 6.374006495186214e-07, "loss": 0.5624, "step": 11871 }, { "epoch": 0.84, "grad_norm": 2.0727872957261932, "learning_rate": 6.368393063266209e-07, "loss": 0.5548, "step": 11872 }, { "epoch": 0.84, "grad_norm": 0.6897554930141901, "learning_rate": 6.362781936115531e-07, "loss": 0.4188, "step": 11873 }, { "epoch": 0.84, "grad_norm": 1.6950268731589517, "learning_rate": 6.357173114030574e-07, "loss": 0.508, "step": 11874 }, { "epoch": 0.84, "grad_norm": 1.4575225304125996, "learning_rate": 6.351566597307612e-07, "loss": 0.5274, "step": 11875 }, { "epoch": 0.84, "grad_norm": 1.570234943046175, "learning_rate": 6.345962386242805e-07, "loss": 0.5218, "step": 11876 }, { "epoch": 0.84, "grad_norm": 2.1954854015289182, "learning_rate": 6.340360481132185e-07, "loss": 0.5483, "step": 11877 }, { "epoch": 0.84, "grad_norm": 1.668535474698413, "learning_rate": 6.334760882271673e-07, "loss": 0.5079, "step": 11878 }, { "epoch": 0.84, "grad_norm": 1.7881519330911249, "learning_rate": 6.329163589957038e-07, "loss": 0.5385, "step": 11879 }, { "epoch": 0.84, "grad_norm": 2.69226515510854, "learning_rate": 6.323568604483966e-07, "loss": 0.4764, "step": 11880 }, { "epoch": 0.84, "grad_norm": 2.866313735154939, "learning_rate": 6.31797592614799e-07, "loss": 0.51, "step": 11881 }, { "epoch": 0.84, "grad_norm": 2.6848172023794215, "learning_rate": 6.312385555244538e-07, "loss": 0.5167, "step": 11882 }, { "epoch": 0.84, "grad_norm": 1.6434595154611973, "learning_rate": 6.306797492068934e-07, "loss": 0.4714, "step": 11883 }, { "epoch": 0.84, "grad_norm": 1.74888765048661, "learning_rate": 6.301211736916307e-07, "loss": 0.4888, "step": 11884 }, { "epoch": 0.84, "grad_norm": 1.7380916045639057, "learning_rate": 6.295628290081779e-07, "loss": 0.534, "step": 11885 }, { "epoch": 0.84, "grad_norm": 0.6536593219521794, "learning_rate": 6.29004715186024e-07, "loss": 0.3836, "step": 11886 }, { "epoch": 0.84, "grad_norm": 1.6035103266869262, "learning_rate": 6.284468322546533e-07, "loss": 0.4853, "step": 11887 }, { "epoch": 0.84, "grad_norm": 1.604382490559528, "learning_rate": 6.278891802435311e-07, "loss": 0.5318, "step": 11888 }, { "epoch": 0.84, "grad_norm": 1.6459072424118653, "learning_rate": 6.273317591821188e-07, "loss": 0.4513, "step": 11889 }, { "epoch": 0.84, "grad_norm": 1.5905258953822918, "learning_rate": 6.267745690998606e-07, "loss": 0.5586, "step": 11890 }, { "epoch": 0.84, "grad_norm": 1.6832761236489961, "learning_rate": 6.262176100261868e-07, "loss": 0.5091, "step": 11891 }, { "epoch": 0.84, "grad_norm": 1.4947962450590664, "learning_rate": 6.256608819905191e-07, "loss": 0.5339, "step": 11892 }, { "epoch": 0.84, "grad_norm": 1.7357471644104834, "learning_rate": 6.251043850222666e-07, "loss": 0.4761, "step": 11893 }, { "epoch": 0.84, "grad_norm": 2.0866853312912004, "learning_rate": 6.245481191508251e-07, "loss": 0.5048, "step": 11894 }, { "epoch": 0.84, "grad_norm": 1.709380097923953, "learning_rate": 6.239920844055757e-07, "loss": 0.5699, "step": 11895 }, { "epoch": 0.84, "grad_norm": 1.6665780839320476, "learning_rate": 6.23436280815895e-07, "loss": 0.5438, "step": 11896 }, { "epoch": 0.84, "grad_norm": 1.6160611268580256, "learning_rate": 6.228807084111388e-07, "loss": 0.4958, "step": 11897 }, { "epoch": 0.84, "grad_norm": 1.8223511085625619, "learning_rate": 6.223253672206547e-07, "loss": 0.5339, "step": 11898 }, { "epoch": 0.84, "grad_norm": 1.5225139495983389, "learning_rate": 6.217702572737789e-07, "loss": 0.477, "step": 11899 }, { "epoch": 0.84, "grad_norm": 1.734033256675826, "learning_rate": 6.212153785998331e-07, "loss": 0.586, "step": 11900 }, { "epoch": 0.84, "grad_norm": 1.9344618827641535, "learning_rate": 6.206607312281299e-07, "loss": 0.5075, "step": 11901 }, { "epoch": 0.84, "grad_norm": 1.8026786266052433, "learning_rate": 6.201063151879644e-07, "loss": 0.5429, "step": 11902 }, { "epoch": 0.84, "grad_norm": 1.5226500493783643, "learning_rate": 6.195521305086249e-07, "loss": 0.5268, "step": 11903 }, { "epoch": 0.84, "grad_norm": 1.854557169301373, "learning_rate": 6.189981772193848e-07, "loss": 0.5006, "step": 11904 }, { "epoch": 0.84, "grad_norm": 1.8874139656557904, "learning_rate": 6.184444553495056e-07, "loss": 0.5288, "step": 11905 }, { "epoch": 0.84, "grad_norm": 2.236671700749139, "learning_rate": 6.178909649282383e-07, "loss": 0.4426, "step": 11906 }, { "epoch": 0.84, "grad_norm": 1.575524762018905, "learning_rate": 6.173377059848168e-07, "loss": 0.4998, "step": 11907 }, { "epoch": 0.85, "grad_norm": 1.627295515810354, "learning_rate": 6.167846785484699e-07, "loss": 0.5116, "step": 11908 }, { "epoch": 0.85, "grad_norm": 1.9582523217877705, "learning_rate": 6.162318826484081e-07, "loss": 0.6021, "step": 11909 }, { "epoch": 0.85, "grad_norm": 2.263550606160613, "learning_rate": 6.156793183138321e-07, "loss": 0.4504, "step": 11910 }, { "epoch": 0.85, "grad_norm": 1.8356778064239916, "learning_rate": 6.151269855739306e-07, "loss": 0.5238, "step": 11911 }, { "epoch": 0.85, "grad_norm": 1.6623013888868055, "learning_rate": 6.145748844578797e-07, "loss": 0.495, "step": 11912 }, { "epoch": 0.85, "grad_norm": 1.7036969898522185, "learning_rate": 6.140230149948446e-07, "loss": 0.5298, "step": 11913 }, { "epoch": 0.85, "grad_norm": 1.8218746057944, "learning_rate": 6.134713772139733e-07, "loss": 0.5096, "step": 11914 }, { "epoch": 0.85, "grad_norm": 1.7135750983967926, "learning_rate": 6.129199711444095e-07, "loss": 0.5314, "step": 11915 }, { "epoch": 0.85, "grad_norm": 0.6584490172985701, "learning_rate": 6.123687968152775e-07, "loss": 0.409, "step": 11916 }, { "epoch": 0.85, "grad_norm": 1.7154526498373748, "learning_rate": 6.118178542556924e-07, "loss": 0.5073, "step": 11917 }, { "epoch": 0.85, "grad_norm": 0.6440051895600025, "learning_rate": 6.112671434947581e-07, "loss": 0.4247, "step": 11918 }, { "epoch": 0.85, "grad_norm": 1.709970979397856, "learning_rate": 6.107166645615636e-07, "loss": 0.5508, "step": 11919 }, { "epoch": 0.85, "grad_norm": 1.6024044902431125, "learning_rate": 6.101664174851895e-07, "loss": 0.5251, "step": 11920 }, { "epoch": 0.85, "grad_norm": 2.2482844914840583, "learning_rate": 6.096164022946988e-07, "loss": 0.4883, "step": 11921 }, { "epoch": 0.85, "grad_norm": 1.7715679695075872, "learning_rate": 6.090666190191463e-07, "loss": 0.5123, "step": 11922 }, { "epoch": 0.85, "grad_norm": 1.7937842322718651, "learning_rate": 6.085170676875734e-07, "loss": 0.5812, "step": 11923 }, { "epoch": 0.85, "grad_norm": 2.247826295659756, "learning_rate": 6.079677483290103e-07, "loss": 0.5691, "step": 11924 }, { "epoch": 0.85, "grad_norm": 1.8899982859448592, "learning_rate": 6.074186609724714e-07, "loss": 0.5168, "step": 11925 }, { "epoch": 0.85, "grad_norm": 1.7898727534449974, "learning_rate": 6.068698056469636e-07, "loss": 0.5182, "step": 11926 }, { "epoch": 0.85, "grad_norm": 2.253929494355518, "learning_rate": 6.063211823814796e-07, "loss": 0.4655, "step": 11927 }, { "epoch": 0.85, "grad_norm": 0.7134080675395075, "learning_rate": 6.057727912049976e-07, "loss": 0.4271, "step": 11928 }, { "epoch": 0.85, "grad_norm": 1.6857481343498593, "learning_rate": 6.052246321464867e-07, "loss": 0.5506, "step": 11929 }, { "epoch": 0.85, "grad_norm": 2.297299773356219, "learning_rate": 6.046767052349017e-07, "loss": 0.5586, "step": 11930 }, { "epoch": 0.85, "grad_norm": 2.2570031003038404, "learning_rate": 6.041290104991881e-07, "loss": 0.5294, "step": 11931 }, { "epoch": 0.85, "grad_norm": 1.5042563658450654, "learning_rate": 6.035815479682744e-07, "loss": 0.4538, "step": 11932 }, { "epoch": 0.85, "grad_norm": 1.7206443971645515, "learning_rate": 6.030343176710801e-07, "loss": 0.4968, "step": 11933 }, { "epoch": 0.85, "grad_norm": 1.7074437828615119, "learning_rate": 6.024873196365122e-07, "loss": 0.4911, "step": 11934 }, { "epoch": 0.85, "grad_norm": 1.7055227661598962, "learning_rate": 6.019405538934647e-07, "loss": 0.5449, "step": 11935 }, { "epoch": 0.85, "grad_norm": 1.5237698533417183, "learning_rate": 6.013940204708213e-07, "loss": 0.4931, "step": 11936 }, { "epoch": 0.85, "grad_norm": 1.7399586950096302, "learning_rate": 6.008477193974477e-07, "loss": 0.5654, "step": 11937 }, { "epoch": 0.85, "grad_norm": 0.6858261680749975, "learning_rate": 6.003016507022064e-07, "loss": 0.4318, "step": 11938 }, { "epoch": 0.85, "grad_norm": 1.7514194207823826, "learning_rate": 5.997558144139387e-07, "loss": 0.5156, "step": 11939 }, { "epoch": 0.85, "grad_norm": 1.683721248888722, "learning_rate": 5.992102105614794e-07, "loss": 0.449, "step": 11940 }, { "epoch": 0.85, "grad_norm": 2.0868539272656466, "learning_rate": 5.986648391736482e-07, "loss": 0.4817, "step": 11941 }, { "epoch": 0.85, "grad_norm": 2.0683465755366797, "learning_rate": 5.981197002792544e-07, "loss": 0.5539, "step": 11942 }, { "epoch": 0.85, "grad_norm": 1.6826689293171189, "learning_rate": 5.975747939070942e-07, "loss": 0.5521, "step": 11943 }, { "epoch": 0.85, "grad_norm": 1.6440222620061942, "learning_rate": 5.97030120085949e-07, "loss": 0.5017, "step": 11944 }, { "epoch": 0.85, "grad_norm": 1.6198271648457239, "learning_rate": 5.964856788445944e-07, "loss": 0.4461, "step": 11945 }, { "epoch": 0.85, "grad_norm": 1.4337266230037986, "learning_rate": 5.95941470211786e-07, "loss": 0.4649, "step": 11946 }, { "epoch": 0.85, "grad_norm": 0.7296666224437548, "learning_rate": 5.953974942162721e-07, "loss": 0.4145, "step": 11947 }, { "epoch": 0.85, "grad_norm": 1.7718148746362548, "learning_rate": 5.94853750886788e-07, "loss": 0.5065, "step": 11948 }, { "epoch": 0.85, "grad_norm": 1.6373547570398883, "learning_rate": 5.94310240252055e-07, "loss": 0.503, "step": 11949 }, { "epoch": 0.85, "grad_norm": 2.7284074339036697, "learning_rate": 5.937669623407849e-07, "loss": 0.5455, "step": 11950 }, { "epoch": 0.85, "grad_norm": 1.6260004312325043, "learning_rate": 5.932239171816728e-07, "loss": 0.4523, "step": 11951 }, { "epoch": 0.85, "grad_norm": 3.713113714208407, "learning_rate": 5.926811048034059e-07, "loss": 0.4417, "step": 11952 }, { "epoch": 0.85, "grad_norm": 1.6179873437298362, "learning_rate": 5.921385252346567e-07, "loss": 0.5384, "step": 11953 }, { "epoch": 0.85, "grad_norm": 1.7066919621697534, "learning_rate": 5.915961785040869e-07, "loss": 0.5036, "step": 11954 }, { "epoch": 0.85, "grad_norm": 0.6712154905853225, "learning_rate": 5.910540646403456e-07, "loss": 0.4154, "step": 11955 }, { "epoch": 0.85, "grad_norm": 1.765034413512885, "learning_rate": 5.905121836720656e-07, "loss": 0.4636, "step": 11956 }, { "epoch": 0.85, "grad_norm": 1.628945460935493, "learning_rate": 5.899705356278762e-07, "loss": 0.5094, "step": 11957 }, { "epoch": 0.85, "grad_norm": 1.7282870711030984, "learning_rate": 5.89429120536385e-07, "loss": 0.5906, "step": 11958 }, { "epoch": 0.85, "grad_norm": 1.9878310463226856, "learning_rate": 5.888879384261925e-07, "loss": 0.4686, "step": 11959 }, { "epoch": 0.85, "grad_norm": 1.729480354191174, "learning_rate": 5.883469893258864e-07, "loss": 0.4723, "step": 11960 }, { "epoch": 0.85, "grad_norm": 3.187639425292521, "learning_rate": 5.878062732640422e-07, "loss": 0.4884, "step": 11961 }, { "epoch": 0.85, "grad_norm": 1.6403075558053284, "learning_rate": 5.872657902692198e-07, "loss": 0.5035, "step": 11962 }, { "epoch": 0.85, "grad_norm": 1.7411309215424438, "learning_rate": 5.867255403699707e-07, "loss": 0.5306, "step": 11963 }, { "epoch": 0.85, "grad_norm": 1.903703156139573, "learning_rate": 5.861855235948327e-07, "loss": 0.5621, "step": 11964 }, { "epoch": 0.85, "grad_norm": 1.8823160819244178, "learning_rate": 5.856457399723314e-07, "loss": 0.5593, "step": 11965 }, { "epoch": 0.85, "grad_norm": 1.598334705149845, "learning_rate": 5.851061895309812e-07, "loss": 0.4806, "step": 11966 }, { "epoch": 0.85, "grad_norm": 1.5842231052615492, "learning_rate": 5.845668722992798e-07, "loss": 0.5065, "step": 11967 }, { "epoch": 0.85, "grad_norm": 1.6598353412845184, "learning_rate": 5.8402778830572e-07, "loss": 0.4621, "step": 11968 }, { "epoch": 0.85, "grad_norm": 1.6793044472313043, "learning_rate": 5.834889375787739e-07, "loss": 0.5745, "step": 11969 }, { "epoch": 0.85, "grad_norm": 1.5660511724563229, "learning_rate": 5.829503201469083e-07, "loss": 0.4452, "step": 11970 }, { "epoch": 0.85, "grad_norm": 3.9328728721672395, "learning_rate": 5.824119360385733e-07, "loss": 0.5443, "step": 11971 }, { "epoch": 0.85, "grad_norm": 1.741335355452342, "learning_rate": 5.818737852822087e-07, "loss": 0.5712, "step": 11972 }, { "epoch": 0.85, "grad_norm": 1.796779552844296, "learning_rate": 5.81335867906242e-07, "loss": 0.5041, "step": 11973 }, { "epoch": 0.85, "grad_norm": 2.1516935312292262, "learning_rate": 5.807981839390858e-07, "loss": 0.5378, "step": 11974 }, { "epoch": 0.85, "grad_norm": 1.653289173553318, "learning_rate": 5.802607334091459e-07, "loss": 0.4443, "step": 11975 }, { "epoch": 0.85, "grad_norm": 1.447520176777563, "learning_rate": 5.797235163448089e-07, "loss": 0.4965, "step": 11976 }, { "epoch": 0.85, "grad_norm": 2.1663290283082044, "learning_rate": 5.791865327744534e-07, "loss": 0.5501, "step": 11977 }, { "epoch": 0.85, "grad_norm": 2.4800003915366915, "learning_rate": 5.786497827264453e-07, "loss": 0.6063, "step": 11978 }, { "epoch": 0.85, "grad_norm": 1.631201711900396, "learning_rate": 5.781132662291377e-07, "loss": 0.5514, "step": 11979 }, { "epoch": 0.85, "grad_norm": 1.6529748439031402, "learning_rate": 5.775769833108713e-07, "loss": 0.531, "step": 11980 }, { "epoch": 0.85, "grad_norm": 1.7593629281339247, "learning_rate": 5.770409339999728e-07, "loss": 0.577, "step": 11981 }, { "epoch": 0.85, "grad_norm": 1.6046544442584851, "learning_rate": 5.765051183247595e-07, "loss": 0.5123, "step": 11982 }, { "epoch": 0.85, "grad_norm": 1.781066901579231, "learning_rate": 5.759695363135342e-07, "loss": 0.4909, "step": 11983 }, { "epoch": 0.85, "grad_norm": 1.7314072889320744, "learning_rate": 5.75434187994589e-07, "loss": 0.5058, "step": 11984 }, { "epoch": 0.85, "grad_norm": 1.7385292732205877, "learning_rate": 5.74899073396204e-07, "loss": 0.4949, "step": 11985 }, { "epoch": 0.85, "grad_norm": 2.173556611117297, "learning_rate": 5.743641925466415e-07, "loss": 0.5381, "step": 11986 }, { "epoch": 0.85, "grad_norm": 1.7658006890739704, "learning_rate": 5.738295454741605e-07, "loss": 0.5457, "step": 11987 }, { "epoch": 0.85, "grad_norm": 2.160098429811828, "learning_rate": 5.732951322070001e-07, "loss": 0.5662, "step": 11988 }, { "epoch": 0.85, "grad_norm": 1.763675154752843, "learning_rate": 5.727609527733902e-07, "loss": 0.511, "step": 11989 }, { "epoch": 0.85, "grad_norm": 2.046754691274286, "learning_rate": 5.722270072015485e-07, "loss": 0.5356, "step": 11990 }, { "epoch": 0.85, "grad_norm": 2.6312526699710177, "learning_rate": 5.716932955196808e-07, "loss": 0.5303, "step": 11991 }, { "epoch": 0.85, "grad_norm": 1.5787063483059043, "learning_rate": 5.711598177559774e-07, "loss": 0.5564, "step": 11992 }, { "epoch": 0.85, "grad_norm": 1.5721386062010998, "learning_rate": 5.70626573938618e-07, "loss": 0.5092, "step": 11993 }, { "epoch": 0.85, "grad_norm": 1.6827894632927107, "learning_rate": 5.700935640957739e-07, "loss": 0.5648, "step": 11994 }, { "epoch": 0.85, "grad_norm": 1.6602957237472147, "learning_rate": 5.695607882555976e-07, "loss": 0.5289, "step": 11995 }, { "epoch": 0.85, "grad_norm": 1.5933598806148763, "learning_rate": 5.690282464462332e-07, "loss": 0.4985, "step": 11996 }, { "epoch": 0.85, "grad_norm": 1.5048600581733191, "learning_rate": 5.684959386958094e-07, "loss": 0.4492, "step": 11997 }, { "epoch": 0.85, "grad_norm": 1.881140348018367, "learning_rate": 5.679638650324481e-07, "loss": 0.5342, "step": 11998 }, { "epoch": 0.85, "grad_norm": 1.8520708976491924, "learning_rate": 5.674320254842524e-07, "loss": 0.606, "step": 11999 }, { "epoch": 0.85, "grad_norm": 1.766322833678862, "learning_rate": 5.669004200793166e-07, "loss": 0.4552, "step": 12000 }, { "epoch": 0.85, "grad_norm": 1.6966724404555678, "learning_rate": 5.663690488457219e-07, "loss": 0.4836, "step": 12001 }, { "epoch": 0.85, "grad_norm": 1.5793062973316243, "learning_rate": 5.65837911811537e-07, "loss": 0.5246, "step": 12002 }, { "epoch": 0.85, "grad_norm": 3.476583609600949, "learning_rate": 5.653070090048202e-07, "loss": 0.5093, "step": 12003 }, { "epoch": 0.85, "grad_norm": 0.6700526545156688, "learning_rate": 5.647763404536117e-07, "loss": 0.4346, "step": 12004 }, { "epoch": 0.85, "grad_norm": 1.5862861157103307, "learning_rate": 5.642459061859474e-07, "loss": 0.5377, "step": 12005 }, { "epoch": 0.85, "grad_norm": 2.2773895841279717, "learning_rate": 5.637157062298438e-07, "loss": 0.4951, "step": 12006 }, { "epoch": 0.85, "grad_norm": 1.7402309532473723, "learning_rate": 5.631857406133084e-07, "loss": 0.5687, "step": 12007 }, { "epoch": 0.85, "grad_norm": 1.5248248062781713, "learning_rate": 5.626560093643368e-07, "loss": 0.5123, "step": 12008 }, { "epoch": 0.85, "grad_norm": 1.7192937453511143, "learning_rate": 5.621265125109099e-07, "loss": 0.5385, "step": 12009 }, { "epoch": 0.85, "grad_norm": 1.7860101084030808, "learning_rate": 5.615972500809997e-07, "loss": 0.4765, "step": 12010 }, { "epoch": 0.85, "grad_norm": 1.9940093827387109, "learning_rate": 5.610682221025609e-07, "loss": 0.4845, "step": 12011 }, { "epoch": 0.85, "grad_norm": 1.920110984690365, "learning_rate": 5.605394286035398e-07, "loss": 0.5063, "step": 12012 }, { "epoch": 0.85, "grad_norm": 2.136361564563695, "learning_rate": 5.600108696118689e-07, "loss": 0.4839, "step": 12013 }, { "epoch": 0.85, "grad_norm": 1.680940328562352, "learning_rate": 5.594825451554687e-07, "loss": 0.5766, "step": 12014 }, { "epoch": 0.85, "grad_norm": 0.6913431326788476, "learning_rate": 5.589544552622483e-07, "loss": 0.4231, "step": 12015 }, { "epoch": 0.85, "grad_norm": 1.6388068964632823, "learning_rate": 5.584265999600996e-07, "loss": 0.5978, "step": 12016 }, { "epoch": 0.85, "grad_norm": 2.148902186931401, "learning_rate": 5.578989792769102e-07, "loss": 0.5397, "step": 12017 }, { "epoch": 0.85, "grad_norm": 1.7496795071141151, "learning_rate": 5.573715932405471e-07, "loss": 0.5614, "step": 12018 }, { "epoch": 0.85, "grad_norm": 2.6402507635266943, "learning_rate": 5.568444418788715e-07, "loss": 0.4835, "step": 12019 }, { "epoch": 0.85, "grad_norm": 1.558014625293106, "learning_rate": 5.563175252197256e-07, "loss": 0.5111, "step": 12020 }, { "epoch": 0.85, "grad_norm": 1.575170404466334, "learning_rate": 5.55790843290947e-07, "loss": 0.4755, "step": 12021 }, { "epoch": 0.85, "grad_norm": 0.6657532218483908, "learning_rate": 5.552643961203553e-07, "loss": 0.4237, "step": 12022 }, { "epoch": 0.85, "grad_norm": 1.5179121504360753, "learning_rate": 5.547381837357574e-07, "loss": 0.4417, "step": 12023 }, { "epoch": 0.85, "grad_norm": 2.6178987988636546, "learning_rate": 5.542122061649536e-07, "loss": 0.5814, "step": 12024 }, { "epoch": 0.85, "grad_norm": 1.7654513317867686, "learning_rate": 5.536864634357236e-07, "loss": 0.6093, "step": 12025 }, { "epoch": 0.85, "grad_norm": 1.5524783630614913, "learning_rate": 5.531609555758422e-07, "loss": 0.5169, "step": 12026 }, { "epoch": 0.85, "grad_norm": 1.909191525556932, "learning_rate": 5.526356826130653e-07, "loss": 0.4935, "step": 12027 }, { "epoch": 0.85, "grad_norm": 2.0994209094052705, "learning_rate": 5.521106445751434e-07, "loss": 0.5474, "step": 12028 }, { "epoch": 0.85, "grad_norm": 0.6983958537719402, "learning_rate": 5.515858414898073e-07, "loss": 0.412, "step": 12029 }, { "epoch": 0.85, "grad_norm": 0.7928440357945047, "learning_rate": 5.510612733847803e-07, "loss": 0.4348, "step": 12030 }, { "epoch": 0.85, "grad_norm": 1.4664762331090024, "learning_rate": 5.505369402877725e-07, "loss": 0.5151, "step": 12031 }, { "epoch": 0.85, "grad_norm": 1.6898242897804663, "learning_rate": 5.5001284222648e-07, "loss": 0.5296, "step": 12032 }, { "epoch": 0.85, "grad_norm": 1.5250434961707768, "learning_rate": 5.49488979228589e-07, "loss": 0.4573, "step": 12033 }, { "epoch": 0.85, "grad_norm": 2.103412785083146, "learning_rate": 5.489653513217691e-07, "loss": 0.5195, "step": 12034 }, { "epoch": 0.85, "grad_norm": 1.641010230967123, "learning_rate": 5.484419585336814e-07, "loss": 0.5095, "step": 12035 }, { "epoch": 0.85, "grad_norm": 1.7552337280410197, "learning_rate": 5.47918800891974e-07, "loss": 0.4962, "step": 12036 }, { "epoch": 0.85, "grad_norm": 1.7317893223176797, "learning_rate": 5.473958784242812e-07, "loss": 0.4386, "step": 12037 }, { "epoch": 0.85, "grad_norm": 1.4480586389367875, "learning_rate": 5.468731911582254e-07, "loss": 0.5062, "step": 12038 }, { "epoch": 0.85, "grad_norm": 0.6754492727268356, "learning_rate": 5.463507391214168e-07, "loss": 0.4174, "step": 12039 }, { "epoch": 0.85, "grad_norm": 1.8190524211184038, "learning_rate": 5.458285223414539e-07, "loss": 0.5299, "step": 12040 }, { "epoch": 0.85, "grad_norm": 1.4856174208518556, "learning_rate": 5.453065408459207e-07, "loss": 0.4258, "step": 12041 }, { "epoch": 0.85, "grad_norm": 1.784321253695816, "learning_rate": 5.447847946623902e-07, "loss": 0.5175, "step": 12042 }, { "epoch": 0.85, "grad_norm": 1.9916579404944152, "learning_rate": 5.442632838184236e-07, "loss": 0.5598, "step": 12043 }, { "epoch": 0.85, "grad_norm": 1.8033688602047737, "learning_rate": 5.437420083415685e-07, "loss": 0.5502, "step": 12044 }, { "epoch": 0.85, "grad_norm": 1.556711842674976, "learning_rate": 5.432209682593608e-07, "loss": 0.4807, "step": 12045 }, { "epoch": 0.85, "grad_norm": 1.6746330320669653, "learning_rate": 5.427001635993217e-07, "loss": 0.4666, "step": 12046 }, { "epoch": 0.85, "grad_norm": 1.6629748677795093, "learning_rate": 5.421795943889652e-07, "loss": 0.4699, "step": 12047 }, { "epoch": 0.85, "grad_norm": 1.5964575465911919, "learning_rate": 5.416592606557863e-07, "loss": 0.4887, "step": 12048 }, { "epoch": 0.86, "grad_norm": 1.8567422656838277, "learning_rate": 5.411391624272727e-07, "loss": 0.5255, "step": 12049 }, { "epoch": 0.86, "grad_norm": 1.8014161062785252, "learning_rate": 5.406192997308973e-07, "loss": 0.5168, "step": 12050 }, { "epoch": 0.86, "grad_norm": 1.6256615920801798, "learning_rate": 5.400996725941205e-07, "loss": 0.512, "step": 12051 }, { "epoch": 0.86, "grad_norm": 1.7403596764645173, "learning_rate": 5.395802810443923e-07, "loss": 0.5603, "step": 12052 }, { "epoch": 0.86, "grad_norm": 1.6584507749108262, "learning_rate": 5.39061125109146e-07, "loss": 0.5452, "step": 12053 }, { "epoch": 0.86, "grad_norm": 1.823114153572426, "learning_rate": 5.385422048158085e-07, "loss": 0.6018, "step": 12054 }, { "epoch": 0.86, "grad_norm": 2.4286725902888806, "learning_rate": 5.380235201917882e-07, "loss": 0.511, "step": 12055 }, { "epoch": 0.86, "grad_norm": 2.3663443547860146, "learning_rate": 5.375050712644858e-07, "loss": 0.5802, "step": 12056 }, { "epoch": 0.86, "grad_norm": 1.7460093394806389, "learning_rate": 5.369868580612842e-07, "loss": 0.558, "step": 12057 }, { "epoch": 0.86, "grad_norm": 0.6998336888058949, "learning_rate": 5.364688806095609e-07, "loss": 0.4179, "step": 12058 }, { "epoch": 0.86, "grad_norm": 2.1630715981600535, "learning_rate": 5.359511389366767e-07, "loss": 0.4901, "step": 12059 }, { "epoch": 0.86, "grad_norm": 1.6441422860491586, "learning_rate": 5.354336330699788e-07, "loss": 0.5737, "step": 12060 }, { "epoch": 0.86, "grad_norm": 1.8662961592034863, "learning_rate": 5.349163630368042e-07, "loss": 0.5537, "step": 12061 }, { "epoch": 0.86, "grad_norm": 2.0962281472131985, "learning_rate": 5.343993288644777e-07, "loss": 0.4396, "step": 12062 }, { "epoch": 0.86, "grad_norm": 1.61177359644906, "learning_rate": 5.338825305803102e-07, "loss": 0.468, "step": 12063 }, { "epoch": 0.86, "grad_norm": 2.0672792530757085, "learning_rate": 5.333659682116004e-07, "loss": 0.5515, "step": 12064 }, { "epoch": 0.86, "grad_norm": 1.9013734072368138, "learning_rate": 5.328496417856349e-07, "loss": 0.5187, "step": 12065 }, { "epoch": 0.86, "grad_norm": 1.5294908486908725, "learning_rate": 5.323335513296884e-07, "loss": 0.4935, "step": 12066 }, { "epoch": 0.86, "grad_norm": 1.9246820219887248, "learning_rate": 5.31817696871022e-07, "loss": 0.5141, "step": 12067 }, { "epoch": 0.86, "grad_norm": 0.7259973959079472, "learning_rate": 5.313020784368861e-07, "loss": 0.461, "step": 12068 }, { "epoch": 0.86, "grad_norm": 1.6236963469958938, "learning_rate": 5.307866960545144e-07, "loss": 0.5257, "step": 12069 }, { "epoch": 0.86, "grad_norm": 0.7099428569771515, "learning_rate": 5.302715497511351e-07, "loss": 0.3998, "step": 12070 }, { "epoch": 0.86, "grad_norm": 1.9952794355632641, "learning_rate": 5.297566395539571e-07, "loss": 0.5733, "step": 12071 }, { "epoch": 0.86, "grad_norm": 1.6044162405945104, "learning_rate": 5.29241965490181e-07, "loss": 0.476, "step": 12072 }, { "epoch": 0.86, "grad_norm": 1.5679247108505296, "learning_rate": 5.28727527586993e-07, "loss": 0.5269, "step": 12073 }, { "epoch": 0.86, "grad_norm": 1.9704135839261498, "learning_rate": 5.282133258715677e-07, "loss": 0.5683, "step": 12074 }, { "epoch": 0.86, "grad_norm": 2.309242749017236, "learning_rate": 5.276993603710678e-07, "loss": 0.5363, "step": 12075 }, { "epoch": 0.86, "grad_norm": 1.7193020503878274, "learning_rate": 5.2718563111264e-07, "loss": 0.532, "step": 12076 }, { "epoch": 0.86, "grad_norm": 1.7178981824215593, "learning_rate": 5.266721381234257e-07, "loss": 0.5285, "step": 12077 }, { "epoch": 0.86, "grad_norm": 0.629481930654937, "learning_rate": 5.261588814305452e-07, "loss": 0.4033, "step": 12078 }, { "epoch": 0.86, "grad_norm": 2.1712144146576677, "learning_rate": 5.256458610611121e-07, "loss": 0.5291, "step": 12079 }, { "epoch": 0.86, "grad_norm": 2.427262181699948, "learning_rate": 5.251330770422258e-07, "loss": 0.589, "step": 12080 }, { "epoch": 0.86, "grad_norm": 1.8820242009404307, "learning_rate": 5.246205294009732e-07, "loss": 0.5146, "step": 12081 }, { "epoch": 0.86, "grad_norm": 1.9702102334884573, "learning_rate": 5.241082181644302e-07, "loss": 0.4971, "step": 12082 }, { "epoch": 0.86, "grad_norm": 1.7184838730810563, "learning_rate": 5.235961433596565e-07, "loss": 0.5211, "step": 12083 }, { "epoch": 0.86, "grad_norm": 1.6681081441983008, "learning_rate": 5.230843050137024e-07, "loss": 0.5036, "step": 12084 }, { "epoch": 0.86, "grad_norm": 1.5312797730920813, "learning_rate": 5.225727031536055e-07, "loss": 0.4887, "step": 12085 }, { "epoch": 0.86, "grad_norm": 1.7656844943859566, "learning_rate": 5.220613378063893e-07, "loss": 0.5046, "step": 12086 }, { "epoch": 0.86, "grad_norm": 1.655327749347915, "learning_rate": 5.21550208999067e-07, "loss": 0.4994, "step": 12087 }, { "epoch": 0.86, "grad_norm": 7.020194081515909, "learning_rate": 5.21039316758638e-07, "loss": 0.5245, "step": 12088 }, { "epoch": 0.86, "grad_norm": 1.709507317574151, "learning_rate": 5.205286611120897e-07, "loss": 0.4874, "step": 12089 }, { "epoch": 0.86, "grad_norm": 1.747767822812459, "learning_rate": 5.200182420863952e-07, "loss": 0.4893, "step": 12090 }, { "epoch": 0.86, "grad_norm": 1.5865999789820204, "learning_rate": 5.195080597085178e-07, "loss": 0.4697, "step": 12091 }, { "epoch": 0.86, "grad_norm": 1.83209616480102, "learning_rate": 5.189981140054063e-07, "loss": 0.5203, "step": 12092 }, { "epoch": 0.86, "grad_norm": 2.4771489892692724, "learning_rate": 5.18488405003999e-07, "loss": 0.5805, "step": 12093 }, { "epoch": 0.86, "grad_norm": 1.7656458112427895, "learning_rate": 5.179789327312184e-07, "loss": 0.5586, "step": 12094 }, { "epoch": 0.86, "grad_norm": 2.0143316911300655, "learning_rate": 5.174696972139781e-07, "loss": 0.5097, "step": 12095 }, { "epoch": 0.86, "grad_norm": 1.7383149672425786, "learning_rate": 5.16960698479177e-07, "loss": 0.5509, "step": 12096 }, { "epoch": 0.86, "grad_norm": 1.5306551092838805, "learning_rate": 5.164519365537025e-07, "loss": 0.5233, "step": 12097 }, { "epoch": 0.86, "grad_norm": 1.675045905313407, "learning_rate": 5.159434114644302e-07, "loss": 0.5852, "step": 12098 }, { "epoch": 0.86, "grad_norm": 1.6799394272277186, "learning_rate": 5.154351232382188e-07, "loss": 0.5636, "step": 12099 }, { "epoch": 0.86, "grad_norm": 1.7960446852641612, "learning_rate": 5.149270719019223e-07, "loss": 0.6451, "step": 12100 }, { "epoch": 0.86, "grad_norm": 1.714993917872733, "learning_rate": 5.144192574823742e-07, "loss": 0.5519, "step": 12101 }, { "epoch": 0.86, "grad_norm": 1.6761722902531901, "learning_rate": 5.139116800063998e-07, "loss": 0.4248, "step": 12102 }, { "epoch": 0.86, "grad_norm": 1.7825219133398151, "learning_rate": 5.13404339500812e-07, "loss": 0.5394, "step": 12103 }, { "epoch": 0.86, "grad_norm": 1.5716209153934202, "learning_rate": 5.128972359924089e-07, "loss": 0.481, "step": 12104 }, { "epoch": 0.86, "grad_norm": 1.971697522478389, "learning_rate": 5.123903695079796e-07, "loss": 0.5165, "step": 12105 }, { "epoch": 0.86, "grad_norm": 1.8548097920168471, "learning_rate": 5.118837400742954e-07, "loss": 0.5681, "step": 12106 }, { "epoch": 0.86, "grad_norm": 1.7539230019943948, "learning_rate": 5.113773477181216e-07, "loss": 0.4872, "step": 12107 }, { "epoch": 0.86, "grad_norm": 1.5255602134477608, "learning_rate": 5.108711924662047e-07, "loss": 0.4749, "step": 12108 }, { "epoch": 0.86, "grad_norm": 1.5014163171506343, "learning_rate": 5.103652743452824e-07, "loss": 0.5196, "step": 12109 }, { "epoch": 0.86, "grad_norm": 1.6893249124076233, "learning_rate": 5.098595933820799e-07, "loss": 0.4927, "step": 12110 }, { "epoch": 0.86, "grad_norm": 2.487606515304919, "learning_rate": 5.093541496033077e-07, "loss": 0.517, "step": 12111 }, { "epoch": 0.86, "grad_norm": 3.450134089478849, "learning_rate": 5.088489430356675e-07, "loss": 0.5933, "step": 12112 }, { "epoch": 0.86, "grad_norm": 1.720673500365488, "learning_rate": 5.083439737058426e-07, "loss": 0.5951, "step": 12113 }, { "epoch": 0.86, "grad_norm": 2.9764661887884247, "learning_rate": 5.078392416405092e-07, "loss": 0.5001, "step": 12114 }, { "epoch": 0.86, "grad_norm": 0.7084252367065852, "learning_rate": 5.073347468663281e-07, "loss": 0.432, "step": 12115 }, { "epoch": 0.86, "grad_norm": 1.9012352673683215, "learning_rate": 5.06830489409949e-07, "loss": 0.4794, "step": 12116 }, { "epoch": 0.86, "grad_norm": 1.934534961171081, "learning_rate": 5.063264692980085e-07, "loss": 0.4781, "step": 12117 }, { "epoch": 0.86, "grad_norm": 1.5942180535648045, "learning_rate": 5.058226865571309e-07, "loss": 0.5353, "step": 12118 }, { "epoch": 0.86, "grad_norm": 1.547990741499169, "learning_rate": 5.053191412139279e-07, "loss": 0.4611, "step": 12119 }, { "epoch": 0.86, "grad_norm": 1.4751136971484011, "learning_rate": 5.048158332949971e-07, "loss": 0.5189, "step": 12120 }, { "epoch": 0.86, "grad_norm": 2.0279100116624313, "learning_rate": 5.043127628269257e-07, "loss": 0.4304, "step": 12121 }, { "epoch": 0.86, "grad_norm": 0.717671125696523, "learning_rate": 5.038099298362876e-07, "loss": 0.4499, "step": 12122 }, { "epoch": 0.86, "grad_norm": 1.8596612724278592, "learning_rate": 5.033073343496453e-07, "loss": 0.505, "step": 12123 }, { "epoch": 0.86, "grad_norm": 1.5724992404653202, "learning_rate": 5.028049763935455e-07, "loss": 0.534, "step": 12124 }, { "epoch": 0.86, "grad_norm": 2.579980142088563, "learning_rate": 5.023028559945242e-07, "loss": 0.5759, "step": 12125 }, { "epoch": 0.86, "grad_norm": 1.8369410938202544, "learning_rate": 5.018009731791084e-07, "loss": 0.5286, "step": 12126 }, { "epoch": 0.86, "grad_norm": 1.53515209216614, "learning_rate": 5.01299327973806e-07, "loss": 0.4386, "step": 12127 }, { "epoch": 0.86, "grad_norm": 1.5529780041185468, "learning_rate": 5.007979204051183e-07, "loss": 0.4763, "step": 12128 }, { "epoch": 0.86, "grad_norm": 3.1611284036240366, "learning_rate": 5.002967504995271e-07, "loss": 0.4622, "step": 12129 }, { "epoch": 0.86, "grad_norm": 1.589932854497379, "learning_rate": 4.997958182835111e-07, "loss": 0.5161, "step": 12130 }, { "epoch": 0.86, "grad_norm": 1.6825136586889002, "learning_rate": 4.99295123783528e-07, "loss": 0.4651, "step": 12131 }, { "epoch": 0.86, "grad_norm": 1.7666139925499125, "learning_rate": 4.987946670260263e-07, "loss": 0.5054, "step": 12132 }, { "epoch": 0.86, "grad_norm": 1.8985189461266954, "learning_rate": 4.98294448037443e-07, "loss": 0.5659, "step": 12133 }, { "epoch": 0.86, "grad_norm": 1.697755180128862, "learning_rate": 4.977944668442003e-07, "loss": 0.4916, "step": 12134 }, { "epoch": 0.86, "grad_norm": 1.657827573498729, "learning_rate": 4.97294723472711e-07, "loss": 0.5343, "step": 12135 }, { "epoch": 0.86, "grad_norm": 2.4648167673068264, "learning_rate": 4.96795217949369e-07, "loss": 0.5635, "step": 12136 }, { "epoch": 0.86, "grad_norm": 1.5133949060819472, "learning_rate": 4.962959503005649e-07, "loss": 0.4406, "step": 12137 }, { "epoch": 0.86, "grad_norm": 1.7895249187527886, "learning_rate": 4.957969205526686e-07, "loss": 0.5649, "step": 12138 }, { "epoch": 0.86, "grad_norm": 1.5133005982878787, "learning_rate": 4.952981287320407e-07, "loss": 0.418, "step": 12139 }, { "epoch": 0.86, "grad_norm": 1.635699707122845, "learning_rate": 4.947995748650302e-07, "loss": 0.5516, "step": 12140 }, { "epoch": 0.86, "grad_norm": 1.9715144815710928, "learning_rate": 4.943012589779716e-07, "loss": 0.5206, "step": 12141 }, { "epoch": 0.86, "grad_norm": 1.598388057457011, "learning_rate": 4.938031810971888e-07, "loss": 0.4724, "step": 12142 }, { "epoch": 0.86, "grad_norm": 1.8273366052974582, "learning_rate": 4.933053412489902e-07, "loss": 0.5691, "step": 12143 }, { "epoch": 0.86, "grad_norm": 1.5825041862283393, "learning_rate": 4.928077394596742e-07, "loss": 0.4957, "step": 12144 }, { "epoch": 0.86, "grad_norm": 1.8447191952001432, "learning_rate": 4.923103757555258e-07, "loss": 0.5366, "step": 12145 }, { "epoch": 0.86, "grad_norm": 2.060233920792918, "learning_rate": 4.918132501628175e-07, "loss": 0.5439, "step": 12146 }, { "epoch": 0.86, "grad_norm": 1.6660802609296108, "learning_rate": 4.913163627078099e-07, "loss": 0.5432, "step": 12147 }, { "epoch": 0.86, "grad_norm": 1.5470866122301923, "learning_rate": 4.90819713416748e-07, "loss": 0.4659, "step": 12148 }, { "epoch": 0.86, "grad_norm": 0.7241297530832049, "learning_rate": 4.903233023158698e-07, "loss": 0.4087, "step": 12149 }, { "epoch": 0.86, "grad_norm": 1.685307021917687, "learning_rate": 4.898271294313944e-07, "loss": 0.5857, "step": 12150 }, { "epoch": 0.86, "grad_norm": 0.6678768821126596, "learning_rate": 4.89331194789533e-07, "loss": 0.4349, "step": 12151 }, { "epoch": 0.86, "grad_norm": 0.6772124036922401, "learning_rate": 4.888354984164823e-07, "loss": 0.4568, "step": 12152 }, { "epoch": 0.86, "grad_norm": 1.576651885600271, "learning_rate": 4.883400403384259e-07, "loss": 0.5131, "step": 12153 }, { "epoch": 0.86, "grad_norm": 2.2211948321094903, "learning_rate": 4.878448205815372e-07, "loss": 0.4455, "step": 12154 }, { "epoch": 0.86, "grad_norm": 1.5587162238148649, "learning_rate": 4.873498391719728e-07, "loss": 0.5154, "step": 12155 }, { "epoch": 0.86, "grad_norm": 1.6161597679007669, "learning_rate": 4.868550961358825e-07, "loss": 0.5306, "step": 12156 }, { "epoch": 0.86, "grad_norm": 3.3085015321875897, "learning_rate": 4.863605914993979e-07, "loss": 0.5806, "step": 12157 }, { "epoch": 0.86, "grad_norm": 2.6144151940945757, "learning_rate": 4.858663252886419e-07, "loss": 0.556, "step": 12158 }, { "epoch": 0.86, "grad_norm": 1.647844550256474, "learning_rate": 4.853722975297209e-07, "loss": 0.5075, "step": 12159 }, { "epoch": 0.86, "grad_norm": 1.725145035576416, "learning_rate": 4.848785082487345e-07, "loss": 0.4851, "step": 12160 }, { "epoch": 0.86, "grad_norm": 2.0257575725206105, "learning_rate": 4.843849574717635e-07, "loss": 0.5087, "step": 12161 }, { "epoch": 0.86, "grad_norm": 1.4482759857354528, "learning_rate": 4.838916452248804e-07, "loss": 0.5182, "step": 12162 }, { "epoch": 0.86, "grad_norm": 3.8689034669336997, "learning_rate": 4.833985715341427e-07, "loss": 0.5332, "step": 12163 }, { "epoch": 0.86, "grad_norm": 1.43619520340393, "learning_rate": 4.829057364255973e-07, "loss": 0.5224, "step": 12164 }, { "epoch": 0.86, "grad_norm": 2.2270340663767403, "learning_rate": 4.82413139925278e-07, "loss": 0.5107, "step": 12165 }, { "epoch": 0.86, "grad_norm": 1.716543905864648, "learning_rate": 4.81920782059202e-07, "loss": 0.4671, "step": 12166 }, { "epoch": 0.86, "grad_norm": 1.503242822941123, "learning_rate": 4.814286628533815e-07, "loss": 0.5388, "step": 12167 }, { "epoch": 0.86, "grad_norm": 2.4207722056783836, "learning_rate": 4.809367823338096e-07, "loss": 0.5674, "step": 12168 }, { "epoch": 0.86, "grad_norm": 1.7530497405773005, "learning_rate": 4.804451405264692e-07, "loss": 0.4925, "step": 12169 }, { "epoch": 0.86, "grad_norm": 2.140599674070669, "learning_rate": 4.799537374573304e-07, "loss": 0.5268, "step": 12170 }, { "epoch": 0.86, "grad_norm": 1.569107162677777, "learning_rate": 4.794625731523517e-07, "loss": 0.5185, "step": 12171 }, { "epoch": 0.86, "grad_norm": 1.620896084807001, "learning_rate": 4.789716476374784e-07, "loss": 0.5122, "step": 12172 }, { "epoch": 0.86, "grad_norm": 1.5041944784747527, "learning_rate": 4.784809609386415e-07, "loss": 0.4732, "step": 12173 }, { "epoch": 0.86, "grad_norm": 1.5013790616513638, "learning_rate": 4.779905130817608e-07, "loss": 0.5438, "step": 12174 }, { "epoch": 0.86, "grad_norm": 1.5352456299231325, "learning_rate": 4.77500304092744e-07, "loss": 0.4751, "step": 12175 }, { "epoch": 0.86, "grad_norm": 1.5387240448379724, "learning_rate": 4.770103339974852e-07, "loss": 0.4961, "step": 12176 }, { "epoch": 0.86, "grad_norm": 1.591140801312342, "learning_rate": 4.7652060282186775e-07, "loss": 0.5523, "step": 12177 }, { "epoch": 0.86, "grad_norm": 1.6424000696599224, "learning_rate": 4.7603111059175756e-07, "loss": 0.4376, "step": 12178 }, { "epoch": 0.86, "grad_norm": 1.9327883253462246, "learning_rate": 4.755418573330151e-07, "loss": 0.5095, "step": 12179 }, { "epoch": 0.86, "grad_norm": 1.8885801303110779, "learning_rate": 4.750528430714824e-07, "loss": 0.5017, "step": 12180 }, { "epoch": 0.86, "grad_norm": 2.256980254772972, "learning_rate": 4.745640678329905e-07, "loss": 0.5392, "step": 12181 }, { "epoch": 0.86, "grad_norm": 1.8145944241286276, "learning_rate": 4.740755316433593e-07, "loss": 0.553, "step": 12182 }, { "epoch": 0.86, "grad_norm": 1.8397057471017302, "learning_rate": 4.7358723452839426e-07, "loss": 0.439, "step": 12183 }, { "epoch": 0.86, "grad_norm": 1.9499841666007993, "learning_rate": 4.730991765138898e-07, "loss": 0.506, "step": 12184 }, { "epoch": 0.86, "grad_norm": 1.5753479520584128, "learning_rate": 4.726113576256247e-07, "loss": 0.5335, "step": 12185 }, { "epoch": 0.86, "grad_norm": 2.1829950717224986, "learning_rate": 4.7212377788937005e-07, "loss": 0.559, "step": 12186 }, { "epoch": 0.86, "grad_norm": 1.571909014710363, "learning_rate": 4.7163643733087907e-07, "loss": 0.568, "step": 12187 }, { "epoch": 0.86, "grad_norm": 1.6727449077048773, "learning_rate": 4.7114933597589673e-07, "loss": 0.5323, "step": 12188 }, { "epoch": 0.86, "grad_norm": 0.7738311036565972, "learning_rate": 4.7066247385015027e-07, "loss": 0.4164, "step": 12189 }, { "epoch": 0.87, "grad_norm": 1.738045667402, "learning_rate": 4.701758509793608e-07, "loss": 0.56, "step": 12190 }, { "epoch": 0.87, "grad_norm": 1.7779965836390155, "learning_rate": 4.696894673892327e-07, "loss": 0.504, "step": 12191 }, { "epoch": 0.87, "grad_norm": 1.8310565899920823, "learning_rate": 4.692033231054566e-07, "loss": 0.5157, "step": 12192 }, { "epoch": 0.87, "grad_norm": 2.140515636280888, "learning_rate": 4.6871741815371354e-07, "loss": 0.5489, "step": 12193 }, { "epoch": 0.87, "grad_norm": 1.6909652336897303, "learning_rate": 4.6823175255967025e-07, "loss": 0.5552, "step": 12194 }, { "epoch": 0.87, "grad_norm": 1.6428281737923989, "learning_rate": 4.6774632634898287e-07, "loss": 0.504, "step": 12195 }, { "epoch": 0.87, "grad_norm": 5.233064281697578, "learning_rate": 4.6726113954729093e-07, "loss": 0.4851, "step": 12196 }, { "epoch": 0.87, "grad_norm": 2.4278839710414037, "learning_rate": 4.667761921802244e-07, "loss": 0.5335, "step": 12197 }, { "epoch": 0.87, "grad_norm": 1.822449788922429, "learning_rate": 4.662914842734001e-07, "loss": 0.4602, "step": 12198 }, { "epoch": 0.87, "grad_norm": 1.437880637952307, "learning_rate": 4.658070158524219e-07, "loss": 0.5095, "step": 12199 }, { "epoch": 0.87, "grad_norm": 1.730726776520087, "learning_rate": 4.653227869428817e-07, "loss": 0.5485, "step": 12200 }, { "epoch": 0.87, "grad_norm": 1.705346105941609, "learning_rate": 4.648387975703567e-07, "loss": 0.5518, "step": 12201 }, { "epoch": 0.87, "grad_norm": 1.6647249562117743, "learning_rate": 4.643550477604147e-07, "loss": 0.5642, "step": 12202 }, { "epoch": 0.87, "grad_norm": 1.69807256236346, "learning_rate": 4.6387153753860713e-07, "loss": 0.5527, "step": 12203 }, { "epoch": 0.87, "grad_norm": 1.4948495098515497, "learning_rate": 4.633882669304757e-07, "loss": 0.4906, "step": 12204 }, { "epoch": 0.87, "grad_norm": 1.9042973386029913, "learning_rate": 4.629052359615477e-07, "loss": 0.5882, "step": 12205 }, { "epoch": 0.87, "grad_norm": 1.9993910482253405, "learning_rate": 4.6242244465733887e-07, "loss": 0.4814, "step": 12206 }, { "epoch": 0.87, "grad_norm": 1.8624835543717628, "learning_rate": 4.619398930433533e-07, "loss": 0.4928, "step": 12207 }, { "epoch": 0.87, "grad_norm": 6.807461723652015, "learning_rate": 4.614575811450772e-07, "loss": 0.5246, "step": 12208 }, { "epoch": 0.87, "grad_norm": 1.721552280749168, "learning_rate": 4.6097550898799246e-07, "loss": 0.4994, "step": 12209 }, { "epoch": 0.87, "grad_norm": 1.8041626559165236, "learning_rate": 4.6049367659756095e-07, "loss": 0.4952, "step": 12210 }, { "epoch": 0.87, "grad_norm": 1.597638959299611, "learning_rate": 4.600120839992356e-07, "loss": 0.5036, "step": 12211 }, { "epoch": 0.87, "grad_norm": 2.0856481033150116, "learning_rate": 4.595307312184549e-07, "loss": 0.4946, "step": 12212 }, { "epoch": 0.87, "grad_norm": 1.934238513870674, "learning_rate": 4.5904961828064644e-07, "loss": 0.5673, "step": 12213 }, { "epoch": 0.87, "grad_norm": 0.6861848302017461, "learning_rate": 4.585687452112253e-07, "loss": 0.4311, "step": 12214 }, { "epoch": 0.87, "grad_norm": 1.602783972828645, "learning_rate": 4.58088112035589e-07, "loss": 0.5511, "step": 12215 }, { "epoch": 0.87, "grad_norm": 1.8163133925421504, "learning_rate": 4.5760771877913115e-07, "loss": 0.4696, "step": 12216 }, { "epoch": 0.87, "grad_norm": 1.5549107908748099, "learning_rate": 4.571275654672236e-07, "loss": 0.5314, "step": 12217 }, { "epoch": 0.87, "grad_norm": 1.4682993704480658, "learning_rate": 4.566476521252322e-07, "loss": 0.5016, "step": 12218 }, { "epoch": 0.87, "grad_norm": 1.5632442109196556, "learning_rate": 4.561679787785067e-07, "loss": 0.5414, "step": 12219 }, { "epoch": 0.87, "grad_norm": 1.6368322537500342, "learning_rate": 4.556885454523846e-07, "loss": 0.5111, "step": 12220 }, { "epoch": 0.87, "grad_norm": 1.7212020670434314, "learning_rate": 4.552093521721934e-07, "loss": 0.5288, "step": 12221 }, { "epoch": 0.87, "grad_norm": 4.585935044380919, "learning_rate": 4.5473039896324233e-07, "loss": 0.5186, "step": 12222 }, { "epoch": 0.87, "grad_norm": 1.8217472155595527, "learning_rate": 4.542516858508339e-07, "loss": 0.4889, "step": 12223 }, { "epoch": 0.87, "grad_norm": 1.6975788259832736, "learning_rate": 4.53773212860254e-07, "loss": 0.5154, "step": 12224 }, { "epoch": 0.87, "grad_norm": 1.7222590318789708, "learning_rate": 4.5329498001677905e-07, "loss": 0.5464, "step": 12225 }, { "epoch": 0.87, "grad_norm": 1.8697217533368162, "learning_rate": 4.5281698734566835e-07, "loss": 0.535, "step": 12226 }, { "epoch": 0.87, "grad_norm": 2.48609239735506, "learning_rate": 4.5233923487217226e-07, "loss": 0.5536, "step": 12227 }, { "epoch": 0.87, "grad_norm": 1.5826131514665254, "learning_rate": 4.518617226215277e-07, "loss": 0.4956, "step": 12228 }, { "epoch": 0.87, "grad_norm": 9.687318013996034, "learning_rate": 4.5138445061895795e-07, "loss": 0.5498, "step": 12229 }, { "epoch": 0.87, "grad_norm": 1.8386186094074422, "learning_rate": 4.509074188896739e-07, "loss": 0.5727, "step": 12230 }, { "epoch": 0.87, "grad_norm": 1.490911653614934, "learning_rate": 4.504306274588749e-07, "loss": 0.5204, "step": 12231 }, { "epoch": 0.87, "grad_norm": 0.7023216089004408, "learning_rate": 4.4995407635174736e-07, "loss": 0.4229, "step": 12232 }, { "epoch": 0.87, "grad_norm": 1.674072967666397, "learning_rate": 4.494777655934618e-07, "loss": 0.4756, "step": 12233 }, { "epoch": 0.87, "grad_norm": 2.0797090444859876, "learning_rate": 4.4900169520918026e-07, "loss": 0.5856, "step": 12234 }, { "epoch": 0.87, "grad_norm": 2.6762990884206603, "learning_rate": 4.4852586522404984e-07, "loss": 0.5065, "step": 12235 }, { "epoch": 0.87, "grad_norm": 1.5668661230471939, "learning_rate": 4.4805027566320545e-07, "loss": 0.4956, "step": 12236 }, { "epoch": 0.87, "grad_norm": 2.866681017111776, "learning_rate": 4.475749265517709e-07, "loss": 0.5285, "step": 12237 }, { "epoch": 0.87, "grad_norm": 1.575889174278197, "learning_rate": 4.470998179148528e-07, "loss": 0.4711, "step": 12238 }, { "epoch": 0.87, "grad_norm": 2.1998939538644966, "learning_rate": 4.46624949777551e-07, "loss": 0.622, "step": 12239 }, { "epoch": 0.87, "grad_norm": 1.7718647567704433, "learning_rate": 4.461503221649477e-07, "loss": 0.5372, "step": 12240 }, { "epoch": 0.87, "grad_norm": 1.5768677510647495, "learning_rate": 4.4567593510211504e-07, "loss": 0.5327, "step": 12241 }, { "epoch": 0.87, "grad_norm": 1.5869406877975802, "learning_rate": 4.4520178861411134e-07, "loss": 0.4802, "step": 12242 }, { "epoch": 0.87, "grad_norm": 1.8484366133985686, "learning_rate": 4.4472788272598324e-07, "loss": 0.5441, "step": 12243 }, { "epoch": 0.87, "grad_norm": 2.2211458665442376, "learning_rate": 4.4425421746276456e-07, "loss": 0.4832, "step": 12244 }, { "epoch": 0.87, "grad_norm": 1.886772703376908, "learning_rate": 4.4378079284947307e-07, "loss": 0.5145, "step": 12245 }, { "epoch": 0.87, "grad_norm": 0.6417831384396389, "learning_rate": 4.4330760891112044e-07, "loss": 0.4342, "step": 12246 }, { "epoch": 0.87, "grad_norm": 1.692950823141219, "learning_rate": 4.428346656726995e-07, "loss": 0.4681, "step": 12247 }, { "epoch": 0.87, "grad_norm": 2.079898153823917, "learning_rate": 4.42361963159193e-07, "loss": 0.5173, "step": 12248 }, { "epoch": 0.87, "grad_norm": 1.9526160834612902, "learning_rate": 4.41889501395571e-07, "loss": 0.5396, "step": 12249 }, { "epoch": 0.87, "grad_norm": 1.8923973714122537, "learning_rate": 4.4141728040679067e-07, "loss": 0.4905, "step": 12250 }, { "epoch": 0.87, "grad_norm": 1.9011127460172248, "learning_rate": 4.4094530021779714e-07, "loss": 0.5351, "step": 12251 }, { "epoch": 0.87, "grad_norm": 1.5921535621640754, "learning_rate": 4.4047356085352045e-07, "loss": 0.5411, "step": 12252 }, { "epoch": 0.87, "grad_norm": 1.689882143868564, "learning_rate": 4.4000206233887955e-07, "loss": 0.5313, "step": 12253 }, { "epoch": 0.87, "grad_norm": 2.1397025032804633, "learning_rate": 4.395308046987812e-07, "loss": 0.5364, "step": 12254 }, { "epoch": 0.87, "grad_norm": 1.63535928825643, "learning_rate": 4.3905978795811934e-07, "loss": 0.5691, "step": 12255 }, { "epoch": 0.87, "grad_norm": 1.6835969788884702, "learning_rate": 4.3858901214177354e-07, "loss": 0.5039, "step": 12256 }, { "epoch": 0.87, "grad_norm": 1.842927103005912, "learning_rate": 4.3811847727461167e-07, "loss": 0.5241, "step": 12257 }, { "epoch": 0.87, "grad_norm": 1.6631032621437394, "learning_rate": 4.3764818338149107e-07, "loss": 0.5627, "step": 12258 }, { "epoch": 0.87, "grad_norm": 1.6591317516484885, "learning_rate": 4.371781304872513e-07, "loss": 0.5228, "step": 12259 }, { "epoch": 0.87, "grad_norm": 1.9465438148591414, "learning_rate": 4.3670831861672536e-07, "loss": 0.5361, "step": 12260 }, { "epoch": 0.87, "grad_norm": 1.5162096590093637, "learning_rate": 4.3623874779472665e-07, "loss": 0.498, "step": 12261 }, { "epoch": 0.87, "grad_norm": 1.6739803453075528, "learning_rate": 4.357694180460631e-07, "loss": 0.557, "step": 12262 }, { "epoch": 0.87, "grad_norm": 1.5927258345142263, "learning_rate": 4.3530032939552327e-07, "loss": 0.4653, "step": 12263 }, { "epoch": 0.87, "grad_norm": 0.6465400099571806, "learning_rate": 4.348314818678878e-07, "loss": 0.4095, "step": 12264 }, { "epoch": 0.87, "grad_norm": 1.7275165048921362, "learning_rate": 4.3436287548792256e-07, "loss": 0.4957, "step": 12265 }, { "epoch": 0.87, "grad_norm": 1.906791659489699, "learning_rate": 4.3389451028038043e-07, "loss": 0.5369, "step": 12266 }, { "epoch": 0.87, "grad_norm": 2.0287484465906287, "learning_rate": 4.3342638627000335e-07, "loss": 0.553, "step": 12267 }, { "epoch": 0.87, "grad_norm": 1.7400319804281201, "learning_rate": 4.329585034815165e-07, "loss": 0.5367, "step": 12268 }, { "epoch": 0.87, "grad_norm": 1.6177641632109991, "learning_rate": 4.324908619396384e-07, "loss": 0.45, "step": 12269 }, { "epoch": 0.87, "grad_norm": 1.4682015978453395, "learning_rate": 4.320234616690688e-07, "loss": 0.486, "step": 12270 }, { "epoch": 0.87, "grad_norm": 1.7116653087007963, "learning_rate": 4.315563026944985e-07, "loss": 0.5244, "step": 12271 }, { "epoch": 0.87, "grad_norm": 1.6287654375233782, "learning_rate": 4.310893850406039e-07, "loss": 0.5579, "step": 12272 }, { "epoch": 0.87, "grad_norm": 1.8989764135222662, "learning_rate": 4.3062270873205024e-07, "loss": 0.4612, "step": 12273 }, { "epoch": 0.87, "grad_norm": 1.5911339482131992, "learning_rate": 4.301562737934889e-07, "loss": 0.4887, "step": 12274 }, { "epoch": 0.87, "grad_norm": 0.6612568152977363, "learning_rate": 4.296900802495568e-07, "loss": 0.4159, "step": 12275 }, { "epoch": 0.87, "grad_norm": 1.7623336448041877, "learning_rate": 4.29224128124881e-07, "loss": 0.5346, "step": 12276 }, { "epoch": 0.87, "grad_norm": 2.3374360666364997, "learning_rate": 4.287584174440751e-07, "loss": 0.5649, "step": 12277 }, { "epoch": 0.87, "grad_norm": 1.9758723873980755, "learning_rate": 4.282929482317388e-07, "loss": 0.5562, "step": 12278 }, { "epoch": 0.87, "grad_norm": 2.087429678777001, "learning_rate": 4.278277205124598e-07, "loss": 0.542, "step": 12279 }, { "epoch": 0.87, "grad_norm": 1.785891089761743, "learning_rate": 4.2736273431081387e-07, "loss": 0.5064, "step": 12280 }, { "epoch": 0.87, "grad_norm": 0.7248866277052052, "learning_rate": 4.268979896513631e-07, "loss": 0.4105, "step": 12281 }, { "epoch": 0.87, "grad_norm": 1.6706680838494201, "learning_rate": 4.26433486558655e-07, "loss": 0.5291, "step": 12282 }, { "epoch": 0.87, "grad_norm": 1.5806722391571353, "learning_rate": 4.259692250572278e-07, "loss": 0.4914, "step": 12283 }, { "epoch": 0.87, "grad_norm": 1.9580104408239736, "learning_rate": 4.255052051716052e-07, "loss": 0.5083, "step": 12284 }, { "epoch": 0.87, "grad_norm": 1.9737839122823915, "learning_rate": 4.250414269262981e-07, "loss": 0.6239, "step": 12285 }, { "epoch": 0.87, "grad_norm": 2.207022811008975, "learning_rate": 4.245778903458059e-07, "loss": 0.4542, "step": 12286 }, { "epoch": 0.87, "grad_norm": 1.6910806069778506, "learning_rate": 4.241145954546111e-07, "loss": 0.4907, "step": 12287 }, { "epoch": 0.87, "grad_norm": 1.8890750790770736, "learning_rate": 4.236515422771903e-07, "loss": 0.496, "step": 12288 }, { "epoch": 0.87, "grad_norm": 1.4520931225977063, "learning_rate": 4.2318873083800116e-07, "loss": 0.4742, "step": 12289 }, { "epoch": 0.87, "grad_norm": 1.8167224705270189, "learning_rate": 4.2272616116149247e-07, "loss": 0.4729, "step": 12290 }, { "epoch": 0.87, "grad_norm": 1.8709470370839907, "learning_rate": 4.222638332720963e-07, "loss": 0.5184, "step": 12291 }, { "epoch": 0.87, "grad_norm": 1.8938117463681103, "learning_rate": 4.218017471942376e-07, "loss": 0.5748, "step": 12292 }, { "epoch": 0.87, "grad_norm": 1.6356292894508706, "learning_rate": 4.213399029523224e-07, "loss": 0.5168, "step": 12293 }, { "epoch": 0.87, "grad_norm": 1.8054411212679735, "learning_rate": 4.208783005707484e-07, "loss": 0.5293, "step": 12294 }, { "epoch": 0.87, "grad_norm": 1.8252076467676241, "learning_rate": 4.204169400738989e-07, "loss": 0.5596, "step": 12295 }, { "epoch": 0.87, "grad_norm": 2.3584863405776257, "learning_rate": 4.1995582148614433e-07, "loss": 0.5066, "step": 12296 }, { "epoch": 0.87, "grad_norm": 2.2935675552810766, "learning_rate": 4.1949494483184363e-07, "loss": 0.4997, "step": 12297 }, { "epoch": 0.87, "grad_norm": 1.9805818345796364, "learning_rate": 4.190343101353389e-07, "loss": 0.586, "step": 12298 }, { "epoch": 0.87, "grad_norm": 1.7307053773846368, "learning_rate": 4.185739174209658e-07, "loss": 0.5004, "step": 12299 }, { "epoch": 0.87, "grad_norm": 1.7025069588976744, "learning_rate": 4.1811376671304195e-07, "loss": 0.5105, "step": 12300 }, { "epoch": 0.87, "grad_norm": 0.6912325697982562, "learning_rate": 4.176538580358741e-07, "loss": 0.4124, "step": 12301 }, { "epoch": 0.87, "grad_norm": 2.2755802706622754, "learning_rate": 4.171941914137567e-07, "loss": 0.5517, "step": 12302 }, { "epoch": 0.87, "grad_norm": 2.1237433499663783, "learning_rate": 4.167347668709709e-07, "loss": 0.4921, "step": 12303 }, { "epoch": 0.87, "grad_norm": 2.1514041190081286, "learning_rate": 4.162755844317856e-07, "loss": 0.5254, "step": 12304 }, { "epoch": 0.87, "grad_norm": 2.0086974201144505, "learning_rate": 4.1581664412045473e-07, "loss": 0.4822, "step": 12305 }, { "epoch": 0.87, "grad_norm": 4.464369031659622, "learning_rate": 4.1535794596122224e-07, "loss": 0.4892, "step": 12306 }, { "epoch": 0.87, "grad_norm": 1.6954258560785411, "learning_rate": 4.1489948997831817e-07, "loss": 0.4815, "step": 12307 }, { "epoch": 0.87, "grad_norm": 2.3283447605092906, "learning_rate": 4.144412761959593e-07, "loss": 0.5468, "step": 12308 }, { "epoch": 0.87, "grad_norm": 1.7100903620642396, "learning_rate": 4.139833046383512e-07, "loss": 0.4748, "step": 12309 }, { "epoch": 0.87, "grad_norm": 0.6934287379856986, "learning_rate": 4.135255753296824e-07, "loss": 0.4439, "step": 12310 }, { "epoch": 0.87, "grad_norm": 1.6186559646451069, "learning_rate": 4.130680882941357e-07, "loss": 0.5427, "step": 12311 }, { "epoch": 0.87, "grad_norm": 2.2235112302383553, "learning_rate": 4.1261084355587456e-07, "loss": 0.5495, "step": 12312 }, { "epoch": 0.87, "grad_norm": 1.8935765637379058, "learning_rate": 4.1215384113905243e-07, "loss": 0.4948, "step": 12313 }, { "epoch": 0.87, "grad_norm": 1.6105856625539303, "learning_rate": 4.116970810678106e-07, "loss": 0.5256, "step": 12314 }, { "epoch": 0.87, "grad_norm": 1.7277794367597874, "learning_rate": 4.1124056336627637e-07, "loss": 0.4926, "step": 12315 }, { "epoch": 0.87, "grad_norm": 1.5711753052031339, "learning_rate": 4.1078428805856494e-07, "loss": 0.5224, "step": 12316 }, { "epoch": 0.87, "grad_norm": 1.8779856758083189, "learning_rate": 4.1032825516877594e-07, "loss": 0.4823, "step": 12317 }, { "epoch": 0.87, "grad_norm": 1.462142314539904, "learning_rate": 4.098724647210023e-07, "loss": 0.444, "step": 12318 }, { "epoch": 0.87, "grad_norm": 1.5439116639856993, "learning_rate": 4.0941691673931805e-07, "loss": 0.5235, "step": 12319 }, { "epoch": 0.87, "grad_norm": 1.7193245307132088, "learning_rate": 4.089616112477873e-07, "loss": 0.5443, "step": 12320 }, { "epoch": 0.87, "grad_norm": 2.4919994933615004, "learning_rate": 4.0850654827045976e-07, "loss": 0.4858, "step": 12321 }, { "epoch": 0.87, "grad_norm": 1.750670013964222, "learning_rate": 4.0805172783137615e-07, "loss": 0.548, "step": 12322 }, { "epoch": 0.87, "grad_norm": 1.6649224801048146, "learning_rate": 4.075971499545589e-07, "loss": 0.4979, "step": 12323 }, { "epoch": 0.87, "grad_norm": 1.5990137799288404, "learning_rate": 4.071428146640216e-07, "loss": 0.5414, "step": 12324 }, { "epoch": 0.87, "grad_norm": 1.5455490444534017, "learning_rate": 4.066887219837629e-07, "loss": 0.4763, "step": 12325 }, { "epoch": 0.87, "grad_norm": 1.658673562007029, "learning_rate": 4.0623487193777066e-07, "loss": 0.4881, "step": 12326 }, { "epoch": 0.87, "grad_norm": 1.6995465491355064, "learning_rate": 4.057812645500192e-07, "loss": 0.518, "step": 12327 }, { "epoch": 0.87, "grad_norm": 0.6560348724599131, "learning_rate": 4.0532789984446707e-07, "loss": 0.4032, "step": 12328 }, { "epoch": 0.87, "grad_norm": 1.877197611851411, "learning_rate": 4.048747778450657e-07, "loss": 0.4528, "step": 12329 }, { "epoch": 0.87, "grad_norm": 2.1616460892038902, "learning_rate": 4.044218985757481e-07, "loss": 0.5265, "step": 12330 }, { "epoch": 0.88, "grad_norm": 1.682650648601404, "learning_rate": 4.039692620604374e-07, "loss": 0.5066, "step": 12331 }, { "epoch": 0.88, "grad_norm": 1.9878494950659022, "learning_rate": 4.0351686832304447e-07, "loss": 0.5256, "step": 12332 }, { "epoch": 0.88, "grad_norm": 2.04621093817879, "learning_rate": 4.0306471738746466e-07, "loss": 0.4901, "step": 12333 }, { "epoch": 0.88, "grad_norm": 1.7178782599220441, "learning_rate": 4.0261280927758493e-07, "loss": 0.4667, "step": 12334 }, { "epoch": 0.88, "grad_norm": 1.5487204373965842, "learning_rate": 4.021611440172729e-07, "loss": 0.5116, "step": 12335 }, { "epoch": 0.88, "grad_norm": 1.8929941708512685, "learning_rate": 4.0170972163038946e-07, "loss": 0.5034, "step": 12336 }, { "epoch": 0.88, "grad_norm": 0.6810709474104447, "learning_rate": 4.012585421407794e-07, "loss": 0.4273, "step": 12337 }, { "epoch": 0.88, "grad_norm": 1.7406537935740145, "learning_rate": 4.0080760557227594e-07, "loss": 0.5419, "step": 12338 }, { "epoch": 0.88, "grad_norm": 1.5179733566998834, "learning_rate": 4.0035691194869944e-07, "loss": 0.4991, "step": 12339 }, { "epoch": 0.88, "grad_norm": 2.0735403896018023, "learning_rate": 3.999064612938552e-07, "loss": 0.5039, "step": 12340 }, { "epoch": 0.88, "grad_norm": 1.5607821889608993, "learning_rate": 3.994562536315405e-07, "loss": 0.5268, "step": 12341 }, { "epoch": 0.88, "grad_norm": 1.4607944741470478, "learning_rate": 3.990062889855345e-07, "loss": 0.5141, "step": 12342 }, { "epoch": 0.88, "grad_norm": 0.6663512421304779, "learning_rate": 3.985565673796066e-07, "loss": 0.4145, "step": 12343 }, { "epoch": 0.88, "grad_norm": 1.677430124330887, "learning_rate": 3.981070888375127e-07, "loss": 0.5154, "step": 12344 }, { "epoch": 0.88, "grad_norm": 2.450724520146816, "learning_rate": 3.9765785338299555e-07, "loss": 0.4993, "step": 12345 }, { "epoch": 0.88, "grad_norm": 2.034613627351141, "learning_rate": 3.972088610397867e-07, "loss": 0.4773, "step": 12346 }, { "epoch": 0.88, "grad_norm": 1.5410983761597516, "learning_rate": 3.967601118316e-07, "loss": 0.5031, "step": 12347 }, { "epoch": 0.88, "grad_norm": 1.797640787687162, "learning_rate": 3.963116057821437e-07, "loss": 0.5219, "step": 12348 }, { "epoch": 0.88, "grad_norm": 1.878816099283627, "learning_rate": 3.9586334291510766e-07, "loss": 0.5001, "step": 12349 }, { "epoch": 0.88, "grad_norm": 1.7339217593321463, "learning_rate": 3.9541532325417077e-07, "loss": 0.5753, "step": 12350 }, { "epoch": 0.88, "grad_norm": 1.676553841615516, "learning_rate": 3.94967546822998e-07, "loss": 0.4804, "step": 12351 }, { "epoch": 0.88, "grad_norm": 2.2676220256648456, "learning_rate": 3.945200136452437e-07, "loss": 0.516, "step": 12352 }, { "epoch": 0.88, "grad_norm": 1.6343944176420924, "learning_rate": 3.9407272374454907e-07, "loss": 0.4806, "step": 12353 }, { "epoch": 0.88, "grad_norm": 1.79163437445112, "learning_rate": 3.93625677144539e-07, "loss": 0.4491, "step": 12354 }, { "epoch": 0.88, "grad_norm": 2.454971829469113, "learning_rate": 3.9317887386882914e-07, "loss": 0.5025, "step": 12355 }, { "epoch": 0.88, "grad_norm": 6.7381801972971225, "learning_rate": 3.9273231394102165e-07, "loss": 0.5194, "step": 12356 }, { "epoch": 0.88, "grad_norm": 0.6835995843702123, "learning_rate": 3.922859973847054e-07, "loss": 0.4325, "step": 12357 }, { "epoch": 0.88, "grad_norm": 1.8191071197350734, "learning_rate": 3.918399242234544e-07, "loss": 0.4665, "step": 12358 }, { "epoch": 0.88, "grad_norm": 9.739378104672744, "learning_rate": 3.9139409448083423e-07, "loss": 0.5659, "step": 12359 }, { "epoch": 0.88, "grad_norm": 1.9453520806401217, "learning_rate": 3.9094850818039375e-07, "loss": 0.5744, "step": 12360 }, { "epoch": 0.88, "grad_norm": 1.8102387251044818, "learning_rate": 3.905031653456709e-07, "loss": 0.4996, "step": 12361 }, { "epoch": 0.88, "grad_norm": 1.7246932910855062, "learning_rate": 3.900580660001896e-07, "loss": 0.4998, "step": 12362 }, { "epoch": 0.88, "grad_norm": 1.761760579505079, "learning_rate": 3.8961321016746154e-07, "loss": 0.5111, "step": 12363 }, { "epoch": 0.88, "grad_norm": 1.871385728970166, "learning_rate": 3.891685978709875e-07, "loss": 0.4967, "step": 12364 }, { "epoch": 0.88, "grad_norm": 1.7244918727396341, "learning_rate": 3.887242291342502e-07, "loss": 0.5244, "step": 12365 }, { "epoch": 0.88, "grad_norm": 1.853364956467938, "learning_rate": 3.882801039807249e-07, "loss": 0.4808, "step": 12366 }, { "epoch": 0.88, "grad_norm": 1.5377428896429148, "learning_rate": 3.878362224338705e-07, "loss": 0.4386, "step": 12367 }, { "epoch": 0.88, "grad_norm": 2.020381619343402, "learning_rate": 3.8739258451713503e-07, "loss": 0.5171, "step": 12368 }, { "epoch": 0.88, "grad_norm": 1.5360068628012573, "learning_rate": 3.8694919025395406e-07, "loss": 0.5517, "step": 12369 }, { "epoch": 0.88, "grad_norm": 1.8456188830840825, "learning_rate": 3.8650603966774616e-07, "loss": 0.4967, "step": 12370 }, { "epoch": 0.88, "grad_norm": 1.6692377635423776, "learning_rate": 3.860631327819236e-07, "loss": 0.5351, "step": 12371 }, { "epoch": 0.88, "grad_norm": 1.5362116013174432, "learning_rate": 3.856204696198801e-07, "loss": 0.5051, "step": 12372 }, { "epoch": 0.88, "grad_norm": 1.5957596664982405, "learning_rate": 3.851780502049984e-07, "loss": 0.509, "step": 12373 }, { "epoch": 0.88, "grad_norm": 0.6793994194976533, "learning_rate": 3.847358745606494e-07, "loss": 0.4167, "step": 12374 }, { "epoch": 0.88, "grad_norm": 1.8869545861724444, "learning_rate": 3.842939427101905e-07, "loss": 0.5673, "step": 12375 }, { "epoch": 0.88, "grad_norm": 1.8232148487435595, "learning_rate": 3.838522546769663e-07, "loss": 0.5173, "step": 12376 }, { "epoch": 0.88, "grad_norm": 2.4242606864126355, "learning_rate": 3.8341081048430595e-07, "loss": 0.4875, "step": 12377 }, { "epoch": 0.88, "grad_norm": 1.6861368596826092, "learning_rate": 3.8296961015553135e-07, "loss": 0.5315, "step": 12378 }, { "epoch": 0.88, "grad_norm": 1.685632591992498, "learning_rate": 3.825286537139461e-07, "loss": 0.4667, "step": 12379 }, { "epoch": 0.88, "grad_norm": 0.7371131282537533, "learning_rate": 3.820879411828432e-07, "loss": 0.4462, "step": 12380 }, { "epoch": 0.88, "grad_norm": 0.7428880403741701, "learning_rate": 3.816474725855029e-07, "loss": 0.4357, "step": 12381 }, { "epoch": 0.88, "grad_norm": 1.7594881645476148, "learning_rate": 3.812072479451917e-07, "loss": 0.5943, "step": 12382 }, { "epoch": 0.88, "grad_norm": 0.694971112377807, "learning_rate": 3.807672672851659e-07, "loss": 0.4031, "step": 12383 }, { "epoch": 0.88, "grad_norm": 0.6452036034650921, "learning_rate": 3.803275306286641e-07, "loss": 0.4143, "step": 12384 }, { "epoch": 0.88, "grad_norm": 1.6953471493992103, "learning_rate": 3.798880379989156e-07, "loss": 0.5851, "step": 12385 }, { "epoch": 0.88, "grad_norm": 1.8357923925893973, "learning_rate": 3.7944878941913565e-07, "loss": 0.5174, "step": 12386 }, { "epoch": 0.88, "grad_norm": 1.8087467135469366, "learning_rate": 3.790097849125285e-07, "loss": 0.5639, "step": 12387 }, { "epoch": 0.88, "grad_norm": 1.8112305733299847, "learning_rate": 3.785710245022817e-07, "loss": 0.4674, "step": 12388 }, { "epoch": 0.88, "grad_norm": 1.6657893430709256, "learning_rate": 3.781325082115722e-07, "loss": 0.4961, "step": 12389 }, { "epoch": 0.88, "grad_norm": 2.200450011566859, "learning_rate": 3.77694236063566e-07, "loss": 0.4959, "step": 12390 }, { "epoch": 0.88, "grad_norm": 1.5768621263312423, "learning_rate": 3.772562080814124e-07, "loss": 0.4773, "step": 12391 }, { "epoch": 0.88, "grad_norm": 2.1351162564871666, "learning_rate": 3.7681842428824945e-07, "loss": 0.4741, "step": 12392 }, { "epoch": 0.88, "grad_norm": 0.6545433999582903, "learning_rate": 3.763808847072026e-07, "loss": 0.4378, "step": 12393 }, { "epoch": 0.88, "grad_norm": 1.4586527030671088, "learning_rate": 3.759435893613855e-07, "loss": 0.4448, "step": 12394 }, { "epoch": 0.88, "grad_norm": 1.6488007271663436, "learning_rate": 3.7550653827389593e-07, "loss": 0.5407, "step": 12395 }, { "epoch": 0.88, "grad_norm": 1.78614659047331, "learning_rate": 3.7506973146782033e-07, "loss": 0.5174, "step": 12396 }, { "epoch": 0.88, "grad_norm": 1.5869179742485857, "learning_rate": 3.746331689662336e-07, "loss": 0.4909, "step": 12397 }, { "epoch": 0.88, "grad_norm": 0.7167899871569551, "learning_rate": 3.7419685079219504e-07, "loss": 0.4332, "step": 12398 }, { "epoch": 0.88, "grad_norm": 1.7325139187670855, "learning_rate": 3.737607769687546e-07, "loss": 0.5119, "step": 12399 }, { "epoch": 0.88, "grad_norm": 1.518656376330653, "learning_rate": 3.7332494751894374e-07, "loss": 0.4998, "step": 12400 }, { "epoch": 0.88, "grad_norm": 4.5839432087451755, "learning_rate": 3.728893624657881e-07, "loss": 0.5029, "step": 12401 }, { "epoch": 0.88, "grad_norm": 1.7491849170824432, "learning_rate": 3.7245402183229474e-07, "loss": 0.5084, "step": 12402 }, { "epoch": 0.88, "grad_norm": 1.6118194449607304, "learning_rate": 3.720189256414597e-07, "loss": 0.4714, "step": 12403 }, { "epoch": 0.88, "grad_norm": 2.012701586294835, "learning_rate": 3.7158407391626683e-07, "loss": 0.4573, "step": 12404 }, { "epoch": 0.88, "grad_norm": 1.8274193145480169, "learning_rate": 3.7114946667968667e-07, "loss": 0.5245, "step": 12405 }, { "epoch": 0.88, "grad_norm": 2.3052606094070516, "learning_rate": 3.707151039546775e-07, "loss": 0.5085, "step": 12406 }, { "epoch": 0.88, "grad_norm": 4.339859599357677, "learning_rate": 3.7028098576418034e-07, "loss": 0.5651, "step": 12407 }, { "epoch": 0.88, "grad_norm": 2.0773581227143127, "learning_rate": 3.698471121311309e-07, "loss": 0.4958, "step": 12408 }, { "epoch": 0.88, "grad_norm": 1.6926047370164214, "learning_rate": 3.694134830784457e-07, "loss": 0.5011, "step": 12409 }, { "epoch": 0.88, "grad_norm": 1.8423370970370172, "learning_rate": 3.689800986290304e-07, "loss": 0.5254, "step": 12410 }, { "epoch": 0.88, "grad_norm": 1.7719328847206304, "learning_rate": 3.685469588057783e-07, "loss": 0.5067, "step": 12411 }, { "epoch": 0.88, "grad_norm": 2.6132454858073855, "learning_rate": 3.6811406363157e-07, "loss": 0.4909, "step": 12412 }, { "epoch": 0.88, "grad_norm": 1.497438205349686, "learning_rate": 3.6768141312927217e-07, "loss": 0.5268, "step": 12413 }, { "epoch": 0.88, "grad_norm": 1.6179937478298771, "learning_rate": 3.6724900732173774e-07, "loss": 0.5764, "step": 12414 }, { "epoch": 0.88, "grad_norm": 2.27646366200564, "learning_rate": 3.6681684623180893e-07, "loss": 0.4827, "step": 12415 }, { "epoch": 0.88, "grad_norm": 1.6314592158727765, "learning_rate": 3.66384929882313e-07, "loss": 0.599, "step": 12416 }, { "epoch": 0.88, "grad_norm": 1.5833870067193596, "learning_rate": 3.659532582960662e-07, "loss": 0.4617, "step": 12417 }, { "epoch": 0.88, "grad_norm": 1.5874451825825013, "learning_rate": 3.6552183149587196e-07, "loss": 0.556, "step": 12418 }, { "epoch": 0.88, "grad_norm": 1.5596956806720983, "learning_rate": 3.6509064950451587e-07, "loss": 0.5315, "step": 12419 }, { "epoch": 0.88, "grad_norm": 1.5213866620027063, "learning_rate": 3.6465971234477925e-07, "loss": 0.5538, "step": 12420 }, { "epoch": 0.88, "grad_norm": 2.9797191332486643, "learning_rate": 3.642290200394222e-07, "loss": 0.5342, "step": 12421 }, { "epoch": 0.88, "grad_norm": 0.7346952333267825, "learning_rate": 3.6379857261119645e-07, "loss": 0.4166, "step": 12422 }, { "epoch": 0.88, "grad_norm": 1.4814648298685746, "learning_rate": 3.633683700828394e-07, "loss": 0.5038, "step": 12423 }, { "epoch": 0.88, "grad_norm": 1.668279687834836, "learning_rate": 3.629384124770774e-07, "loss": 0.5282, "step": 12424 }, { "epoch": 0.88, "grad_norm": 1.5451790286500853, "learning_rate": 3.6250869981661994e-07, "loss": 0.4987, "step": 12425 }, { "epoch": 0.88, "grad_norm": 1.6978142515404004, "learning_rate": 3.620792321241667e-07, "loss": 0.5147, "step": 12426 }, { "epoch": 0.88, "grad_norm": 1.723486978535732, "learning_rate": 3.6165000942240405e-07, "loss": 0.5125, "step": 12427 }, { "epoch": 0.88, "grad_norm": 1.8489426918227718, "learning_rate": 3.612210317340048e-07, "loss": 0.5249, "step": 12428 }, { "epoch": 0.88, "grad_norm": 1.696464781440644, "learning_rate": 3.607922990816298e-07, "loss": 0.5282, "step": 12429 }, { "epoch": 0.88, "grad_norm": 2.34639285998573, "learning_rate": 3.603638114879238e-07, "loss": 0.4983, "step": 12430 }, { "epoch": 0.88, "grad_norm": 0.6785922916666683, "learning_rate": 3.599355689755241e-07, "loss": 0.4287, "step": 12431 }, { "epoch": 0.88, "grad_norm": 2.0757421781106697, "learning_rate": 3.595075715670493e-07, "loss": 0.5116, "step": 12432 }, { "epoch": 0.88, "grad_norm": 0.6019605382837037, "learning_rate": 3.590798192851086e-07, "loss": 0.4026, "step": 12433 }, { "epoch": 0.88, "grad_norm": 1.7879294373156671, "learning_rate": 3.5865231215229776e-07, "loss": 0.4965, "step": 12434 }, { "epoch": 0.88, "grad_norm": 1.7388662738236278, "learning_rate": 3.582250501911988e-07, "loss": 0.5693, "step": 12435 }, { "epoch": 0.88, "grad_norm": 2.0380334737055747, "learning_rate": 3.5779803342438193e-07, "loss": 0.4732, "step": 12436 }, { "epoch": 0.88, "grad_norm": 1.6636485703583506, "learning_rate": 3.5737126187440185e-07, "loss": 0.4671, "step": 12437 }, { "epoch": 0.88, "grad_norm": 2.3679527225201427, "learning_rate": 3.569447355638034e-07, "loss": 0.4821, "step": 12438 }, { "epoch": 0.88, "grad_norm": 1.7588955687651104, "learning_rate": 3.565184545151168e-07, "loss": 0.5487, "step": 12439 }, { "epoch": 0.88, "grad_norm": 1.5323530818434774, "learning_rate": 3.5609241875085965e-07, "loss": 0.4879, "step": 12440 }, { "epoch": 0.88, "grad_norm": 1.6351755055533477, "learning_rate": 3.5566662829353615e-07, "loss": 0.487, "step": 12441 }, { "epoch": 0.88, "grad_norm": 2.0784736969493514, "learning_rate": 3.552410831656394e-07, "loss": 0.5075, "step": 12442 }, { "epoch": 0.88, "grad_norm": 2.0589603643386694, "learning_rate": 3.548157833896476e-07, "loss": 0.4581, "step": 12443 }, { "epoch": 0.88, "grad_norm": 2.139761427440226, "learning_rate": 3.5439072898802495e-07, "loss": 0.5532, "step": 12444 }, { "epoch": 0.88, "grad_norm": 1.7699920421652853, "learning_rate": 3.5396591998322574e-07, "loss": 0.5323, "step": 12445 }, { "epoch": 0.88, "grad_norm": 1.8319637874433377, "learning_rate": 3.535413563976897e-07, "loss": 0.5366, "step": 12446 }, { "epoch": 0.88, "grad_norm": 1.752406238242355, "learning_rate": 3.5311703825384347e-07, "loss": 0.5967, "step": 12447 }, { "epoch": 0.88, "grad_norm": 2.6152648887778565, "learning_rate": 3.526929655741024e-07, "loss": 0.4809, "step": 12448 }, { "epoch": 0.88, "grad_norm": 2.126123427340597, "learning_rate": 3.522691383808635e-07, "loss": 0.4712, "step": 12449 }, { "epoch": 0.88, "grad_norm": 1.8321299088960519, "learning_rate": 3.518455566965195e-07, "loss": 0.5394, "step": 12450 }, { "epoch": 0.88, "grad_norm": 1.7009862437164112, "learning_rate": 3.5142222054344253e-07, "loss": 0.4966, "step": 12451 }, { "epoch": 0.88, "grad_norm": 0.6863300546085954, "learning_rate": 3.509991299439963e-07, "loss": 0.4145, "step": 12452 }, { "epoch": 0.88, "grad_norm": 2.587300818150453, "learning_rate": 3.5057628492052743e-07, "loss": 0.5777, "step": 12453 }, { "epoch": 0.88, "grad_norm": 1.548722795555203, "learning_rate": 3.501536854953752e-07, "loss": 0.468, "step": 12454 }, { "epoch": 0.88, "grad_norm": 2.726629852185792, "learning_rate": 3.4973133169086014e-07, "loss": 0.511, "step": 12455 }, { "epoch": 0.88, "grad_norm": 1.6378154859726575, "learning_rate": 3.493092235292922e-07, "loss": 0.5101, "step": 12456 }, { "epoch": 0.88, "grad_norm": 0.6626688404897362, "learning_rate": 3.488873610329718e-07, "loss": 0.4165, "step": 12457 }, { "epoch": 0.88, "grad_norm": 1.6618381522316965, "learning_rate": 3.484657442241807e-07, "loss": 0.5403, "step": 12458 }, { "epoch": 0.88, "grad_norm": 1.7796960568769282, "learning_rate": 3.480443731251909e-07, "loss": 0.5159, "step": 12459 }, { "epoch": 0.88, "grad_norm": 1.755099758067953, "learning_rate": 3.476232477582586e-07, "loss": 0.5671, "step": 12460 }, { "epoch": 0.88, "grad_norm": 1.5727271177203757, "learning_rate": 3.472023681456321e-07, "loss": 0.5142, "step": 12461 }, { "epoch": 0.88, "grad_norm": 1.4914597753875545, "learning_rate": 3.4678173430954197e-07, "loss": 0.4437, "step": 12462 }, { "epoch": 0.88, "grad_norm": 1.7877285138940051, "learning_rate": 3.4636134627220817e-07, "loss": 0.5335, "step": 12463 }, { "epoch": 0.88, "grad_norm": 1.7412223351900527, "learning_rate": 3.459412040558363e-07, "loss": 0.5258, "step": 12464 }, { "epoch": 0.88, "grad_norm": 1.7676650122398736, "learning_rate": 3.4552130768262027e-07, "loss": 0.5663, "step": 12465 }, { "epoch": 0.88, "grad_norm": 2.5247326306134914, "learning_rate": 3.451016571747412e-07, "loss": 0.4842, "step": 12466 }, { "epoch": 0.88, "grad_norm": 1.6785787388521347, "learning_rate": 3.446822525543647e-07, "loss": 0.4747, "step": 12467 }, { "epoch": 0.88, "grad_norm": 1.8362940892368056, "learning_rate": 3.4426309384364586e-07, "loss": 0.5535, "step": 12468 }, { "epoch": 0.88, "grad_norm": 1.6958188642803864, "learning_rate": 3.438441810647258e-07, "loss": 0.5281, "step": 12469 }, { "epoch": 0.88, "grad_norm": 1.8095019597230562, "learning_rate": 3.434255142397341e-07, "loss": 0.5509, "step": 12470 }, { "epoch": 0.88, "grad_norm": 1.7017073577840283, "learning_rate": 3.4300709339078473e-07, "loss": 0.5614, "step": 12471 }, { "epoch": 0.89, "grad_norm": 1.8362911467089231, "learning_rate": 3.425889185399811e-07, "loss": 0.5584, "step": 12472 }, { "epoch": 0.89, "grad_norm": 1.8493286477648103, "learning_rate": 3.4217098970941274e-07, "loss": 0.5363, "step": 12473 }, { "epoch": 0.89, "grad_norm": 1.8083878695632642, "learning_rate": 3.417533069211554e-07, "loss": 0.5438, "step": 12474 }, { "epoch": 0.89, "grad_norm": 1.8030305332635188, "learning_rate": 3.4133587019727245e-07, "loss": 0.4956, "step": 12475 }, { "epoch": 0.89, "grad_norm": 1.5551236268686666, "learning_rate": 3.4091867955981406e-07, "loss": 0.4202, "step": 12476 }, { "epoch": 0.89, "grad_norm": 2.3524658390371815, "learning_rate": 3.405017350308187e-07, "loss": 0.5679, "step": 12477 }, { "epoch": 0.89, "grad_norm": 0.6183407373668105, "learning_rate": 3.4008503663231053e-07, "loss": 0.4152, "step": 12478 }, { "epoch": 0.89, "grad_norm": 2.121337402537972, "learning_rate": 3.3966858438629956e-07, "loss": 0.4785, "step": 12479 }, { "epoch": 0.89, "grad_norm": 1.7733972128270377, "learning_rate": 3.3925237831478663e-07, "loss": 0.4241, "step": 12480 }, { "epoch": 0.89, "grad_norm": 0.7057561401574279, "learning_rate": 3.388364184397552e-07, "loss": 0.4483, "step": 12481 }, { "epoch": 0.89, "grad_norm": 1.5171669463785225, "learning_rate": 3.384207047831789e-07, "loss": 0.4581, "step": 12482 }, { "epoch": 0.89, "grad_norm": 1.8045629364767846, "learning_rate": 3.3800523736701506e-07, "loss": 0.483, "step": 12483 }, { "epoch": 0.89, "grad_norm": 1.8337528399994962, "learning_rate": 3.3759001621321233e-07, "loss": 0.5042, "step": 12484 }, { "epoch": 0.89, "grad_norm": 0.7194773878095185, "learning_rate": 3.371750413437042e-07, "loss": 0.4325, "step": 12485 }, { "epoch": 0.89, "grad_norm": 2.002345823706235, "learning_rate": 3.3676031278040864e-07, "loss": 0.5186, "step": 12486 }, { "epoch": 0.89, "grad_norm": 1.8767107949229984, "learning_rate": 3.3634583054523604e-07, "loss": 0.4861, "step": 12487 }, { "epoch": 0.89, "grad_norm": 1.7511657216123926, "learning_rate": 3.3593159466007883e-07, "loss": 0.5702, "step": 12488 }, { "epoch": 0.89, "grad_norm": 1.3964660245304514, "learning_rate": 3.355176051468195e-07, "loss": 0.4047, "step": 12489 }, { "epoch": 0.89, "grad_norm": 1.719904778481833, "learning_rate": 3.351038620273239e-07, "loss": 0.5393, "step": 12490 }, { "epoch": 0.89, "grad_norm": 1.904003857101435, "learning_rate": 3.346903653234507e-07, "loss": 0.4925, "step": 12491 }, { "epoch": 0.89, "grad_norm": 1.6732667012662514, "learning_rate": 3.3427711505704006e-07, "loss": 0.4879, "step": 12492 }, { "epoch": 0.89, "grad_norm": 1.9238664209323715, "learning_rate": 3.338641112499219e-07, "loss": 0.4819, "step": 12493 }, { "epoch": 0.89, "grad_norm": 2.2108035981828085, "learning_rate": 3.3345135392391204e-07, "loss": 0.5202, "step": 12494 }, { "epoch": 0.89, "grad_norm": 1.7747833940273126, "learning_rate": 3.3303884310081413e-07, "loss": 0.5157, "step": 12495 }, { "epoch": 0.89, "grad_norm": 1.93839944723024, "learning_rate": 3.326265788024197e-07, "loss": 0.4953, "step": 12496 }, { "epoch": 0.89, "grad_norm": 1.794718864034495, "learning_rate": 3.3221456105050287e-07, "loss": 0.4592, "step": 12497 }, { "epoch": 0.89, "grad_norm": 1.647197066179309, "learning_rate": 3.318027898668302e-07, "loss": 0.5437, "step": 12498 }, { "epoch": 0.89, "grad_norm": 2.0725523273814197, "learning_rate": 3.313912652731521e-07, "loss": 0.5501, "step": 12499 }, { "epoch": 0.89, "grad_norm": 1.5942693681007043, "learning_rate": 3.309799872912067e-07, "loss": 0.4966, "step": 12500 }, { "epoch": 0.89, "grad_norm": 0.7047569986918572, "learning_rate": 3.3056895594272e-07, "loss": 0.3991, "step": 12501 }, { "epoch": 0.89, "grad_norm": 1.9848195050935615, "learning_rate": 3.3015817124940173e-07, "loss": 0.4841, "step": 12502 }, { "epoch": 0.89, "grad_norm": 2.100284456771557, "learning_rate": 3.297476332329541e-07, "loss": 0.4913, "step": 12503 }, { "epoch": 0.89, "grad_norm": 1.63446412365419, "learning_rate": 3.293373419150603e-07, "loss": 0.5858, "step": 12504 }, { "epoch": 0.89, "grad_norm": 0.7033022336517203, "learning_rate": 3.289272973173946e-07, "loss": 0.3955, "step": 12505 }, { "epoch": 0.89, "grad_norm": 1.4432488644418342, "learning_rate": 3.2851749946161693e-07, "loss": 0.5152, "step": 12506 }, { "epoch": 0.89, "grad_norm": 1.8373940527895882, "learning_rate": 3.281079483693739e-07, "loss": 0.5192, "step": 12507 }, { "epoch": 0.89, "grad_norm": 2.3338120097010107, "learning_rate": 3.2769864406230034e-07, "loss": 0.5112, "step": 12508 }, { "epoch": 0.89, "grad_norm": 1.743270895832969, "learning_rate": 3.2728958656201514e-07, "loss": 0.5017, "step": 12509 }, { "epoch": 0.89, "grad_norm": 1.7438214920199944, "learning_rate": 3.268807758901288e-07, "loss": 0.5397, "step": 12510 }, { "epoch": 0.89, "grad_norm": 1.6064161706536735, "learning_rate": 3.2647221206823344e-07, "loss": 0.5289, "step": 12511 }, { "epoch": 0.89, "grad_norm": 1.8111676925005222, "learning_rate": 3.260638951179118e-07, "loss": 0.5583, "step": 12512 }, { "epoch": 0.89, "grad_norm": 1.4370685474768852, "learning_rate": 3.256558250607328e-07, "loss": 0.4416, "step": 12513 }, { "epoch": 0.89, "grad_norm": 2.336523808942497, "learning_rate": 3.2524800191825246e-07, "loss": 0.4698, "step": 12514 }, { "epoch": 0.89, "grad_norm": 2.3897024487036687, "learning_rate": 3.2484042571201303e-07, "loss": 0.4804, "step": 12515 }, { "epoch": 0.89, "grad_norm": 1.6478874253185325, "learning_rate": 3.244330964635434e-07, "loss": 0.503, "step": 12516 }, { "epoch": 0.89, "grad_norm": 1.6339097201040622, "learning_rate": 3.240260141943607e-07, "loss": 0.5014, "step": 12517 }, { "epoch": 0.89, "grad_norm": 1.7549066710919277, "learning_rate": 3.2361917892596797e-07, "loss": 0.5667, "step": 12518 }, { "epoch": 0.89, "grad_norm": 1.763013971566291, "learning_rate": 3.232125906798572e-07, "loss": 0.4682, "step": 12519 }, { "epoch": 0.89, "grad_norm": 0.6894127007137747, "learning_rate": 3.228062494775025e-07, "loss": 0.4368, "step": 12520 }, { "epoch": 0.89, "grad_norm": 1.6539527381569308, "learning_rate": 3.224001553403716e-07, "loss": 0.5517, "step": 12521 }, { "epoch": 0.89, "grad_norm": 2.1498594872265713, "learning_rate": 3.2199430828991465e-07, "loss": 0.5285, "step": 12522 }, { "epoch": 0.89, "grad_norm": 1.6025105582973271, "learning_rate": 3.2158870834756883e-07, "loss": 0.5227, "step": 12523 }, { "epoch": 0.89, "grad_norm": 0.7261506604621331, "learning_rate": 3.2118335553475987e-07, "loss": 0.4329, "step": 12524 }, { "epoch": 0.89, "grad_norm": 1.8892685018211115, "learning_rate": 3.2077824987290064e-07, "loss": 0.553, "step": 12525 }, { "epoch": 0.89, "grad_norm": 1.8131354960596509, "learning_rate": 3.2037339138338953e-07, "loss": 0.5207, "step": 12526 }, { "epoch": 0.89, "grad_norm": 1.658046862623057, "learning_rate": 3.199687800876128e-07, "loss": 0.5233, "step": 12527 }, { "epoch": 0.89, "grad_norm": 1.762672849967191, "learning_rate": 3.195644160069428e-07, "loss": 0.5317, "step": 12528 }, { "epoch": 0.89, "grad_norm": 3.5410074322697236, "learning_rate": 3.191602991627396e-07, "loss": 0.5455, "step": 12529 }, { "epoch": 0.89, "grad_norm": 0.6556822653568161, "learning_rate": 3.1875642957635065e-07, "loss": 0.3988, "step": 12530 }, { "epoch": 0.89, "grad_norm": 1.8380604320401743, "learning_rate": 3.1835280726911e-07, "loss": 0.5576, "step": 12531 }, { "epoch": 0.89, "grad_norm": 2.266337012110237, "learning_rate": 3.1794943226233554e-07, "loss": 0.4626, "step": 12532 }, { "epoch": 0.89, "grad_norm": 1.4843844187881166, "learning_rate": 3.1754630457733916e-07, "loss": 0.4907, "step": 12533 }, { "epoch": 0.89, "grad_norm": 1.8946230576440088, "learning_rate": 3.1714342423541266e-07, "loss": 0.5548, "step": 12534 }, { "epoch": 0.89, "grad_norm": 1.8054242669318, "learning_rate": 3.167407912578374e-07, "loss": 0.4952, "step": 12535 }, { "epoch": 0.89, "grad_norm": 1.879568913579606, "learning_rate": 3.163384056658836e-07, "loss": 0.508, "step": 12536 }, { "epoch": 0.89, "grad_norm": 1.5317103805395762, "learning_rate": 3.1593626748080477e-07, "loss": 0.4839, "step": 12537 }, { "epoch": 0.89, "grad_norm": 0.7229205522756116, "learning_rate": 3.1553437672384557e-07, "loss": 0.4472, "step": 12538 }, { "epoch": 0.89, "grad_norm": 1.5554374743947448, "learning_rate": 3.151327334162313e-07, "loss": 0.4902, "step": 12539 }, { "epoch": 0.89, "grad_norm": 1.6745436246203391, "learning_rate": 3.1473133757918275e-07, "loss": 0.4812, "step": 12540 }, { "epoch": 0.89, "grad_norm": 1.7771640619395916, "learning_rate": 3.1433018923390013e-07, "loss": 0.5542, "step": 12541 }, { "epoch": 0.89, "grad_norm": 1.6762317733017926, "learning_rate": 3.139292884015738e-07, "loss": 0.4388, "step": 12542 }, { "epoch": 0.89, "grad_norm": 1.5690028516826968, "learning_rate": 3.1352863510338117e-07, "loss": 0.496, "step": 12543 }, { "epoch": 0.89, "grad_norm": 0.7647092928342583, "learning_rate": 3.131282293604859e-07, "loss": 0.4433, "step": 12544 }, { "epoch": 0.89, "grad_norm": 1.5617413529898307, "learning_rate": 3.127280711940395e-07, "loss": 0.5045, "step": 12545 }, { "epoch": 0.89, "grad_norm": 0.7350365501430058, "learning_rate": 3.123281606251782e-07, "loss": 0.4554, "step": 12546 }, { "epoch": 0.89, "grad_norm": 1.9042160581511811, "learning_rate": 3.119284976750281e-07, "loss": 0.5326, "step": 12547 }, { "epoch": 0.89, "grad_norm": 1.6322990040816168, "learning_rate": 3.1152908236469934e-07, "loss": 0.5239, "step": 12548 }, { "epoch": 0.89, "grad_norm": 1.5894684704674582, "learning_rate": 3.111299147152913e-07, "loss": 0.5038, "step": 12549 }, { "epoch": 0.89, "grad_norm": 1.7244832665357233, "learning_rate": 3.1073099474789037e-07, "loss": 0.5489, "step": 12550 }, { "epoch": 0.89, "grad_norm": 2.031632484963072, "learning_rate": 3.103323224835658e-07, "loss": 0.5945, "step": 12551 }, { "epoch": 0.89, "grad_norm": 1.8206248078004774, "learning_rate": 3.0993389794338027e-07, "loss": 0.5517, "step": 12552 }, { "epoch": 0.89, "grad_norm": 0.639279852441662, "learning_rate": 3.0953572114837793e-07, "loss": 0.4262, "step": 12553 }, { "epoch": 0.89, "grad_norm": 1.5839584887986864, "learning_rate": 3.0913779211959206e-07, "loss": 0.4558, "step": 12554 }, { "epoch": 0.89, "grad_norm": 1.905332512413174, "learning_rate": 3.08740110878043e-07, "loss": 0.4659, "step": 12555 }, { "epoch": 0.89, "grad_norm": 1.6202315624452055, "learning_rate": 3.0834267744473787e-07, "loss": 0.4866, "step": 12556 }, { "epoch": 0.89, "grad_norm": 1.6106166218712408, "learning_rate": 3.0794549184066935e-07, "loss": 0.5382, "step": 12557 }, { "epoch": 0.89, "grad_norm": 1.626317132528925, "learning_rate": 3.0754855408681894e-07, "loss": 0.5479, "step": 12558 }, { "epoch": 0.89, "grad_norm": 1.701276506268353, "learning_rate": 3.0715186420415435e-07, "loss": 0.5153, "step": 12559 }, { "epoch": 0.89, "grad_norm": 1.8908114580824256, "learning_rate": 3.067554222136293e-07, "loss": 0.515, "step": 12560 }, { "epoch": 0.89, "grad_norm": 1.8444150181119605, "learning_rate": 3.063592281361866e-07, "loss": 0.5675, "step": 12561 }, { "epoch": 0.89, "grad_norm": 1.805427172360426, "learning_rate": 3.0596328199275217e-07, "loss": 0.5481, "step": 12562 }, { "epoch": 0.89, "grad_norm": 1.8880506073447187, "learning_rate": 3.0556758380424436e-07, "loss": 0.5078, "step": 12563 }, { "epoch": 0.89, "grad_norm": 1.6469998422406427, "learning_rate": 3.051721335915631e-07, "loss": 0.464, "step": 12564 }, { "epoch": 0.89, "grad_norm": 1.6793441758707297, "learning_rate": 3.047769313755977e-07, "loss": 0.4899, "step": 12565 }, { "epoch": 0.89, "grad_norm": 1.6333958925848486, "learning_rate": 3.0438197717722497e-07, "loss": 0.5495, "step": 12566 }, { "epoch": 0.89, "grad_norm": 1.6768849836675201, "learning_rate": 3.0398727101730643e-07, "loss": 0.5177, "step": 12567 }, { "epoch": 0.89, "grad_norm": 1.6107014437582887, "learning_rate": 3.0359281291669375e-07, "loss": 0.4841, "step": 12568 }, { "epoch": 0.89, "grad_norm": 1.9567287909895485, "learning_rate": 3.031986028962203e-07, "loss": 0.5453, "step": 12569 }, { "epoch": 0.89, "grad_norm": 1.9784228551109233, "learning_rate": 3.0280464097671325e-07, "loss": 0.492, "step": 12570 }, { "epoch": 0.89, "grad_norm": 2.823760220653909, "learning_rate": 3.0241092717898044e-07, "loss": 0.4787, "step": 12571 }, { "epoch": 0.89, "grad_norm": 1.5937144051798962, "learning_rate": 3.0201746152382026e-07, "loss": 0.5337, "step": 12572 }, { "epoch": 0.89, "grad_norm": 1.6381979623448044, "learning_rate": 3.0162424403201717e-07, "loss": 0.5279, "step": 12573 }, { "epoch": 0.89, "grad_norm": 1.659974824679718, "learning_rate": 3.012312747243412e-07, "loss": 0.5062, "step": 12574 }, { "epoch": 0.89, "grad_norm": 1.902696925242269, "learning_rate": 3.008385536215519e-07, "loss": 0.5264, "step": 12575 }, { "epoch": 0.89, "grad_norm": 1.9388114875813645, "learning_rate": 3.004460807443921e-07, "loss": 0.52, "step": 12576 }, { "epoch": 0.89, "grad_norm": 1.9188063683712924, "learning_rate": 3.000538561135946e-07, "loss": 0.5571, "step": 12577 }, { "epoch": 0.89, "grad_norm": 1.5599176770682812, "learning_rate": 2.996618797498785e-07, "loss": 0.5477, "step": 12578 }, { "epoch": 0.89, "grad_norm": 1.6788869500385653, "learning_rate": 2.9927015167394825e-07, "loss": 0.5145, "step": 12579 }, { "epoch": 0.89, "grad_norm": 1.7033205231160007, "learning_rate": 2.9887867190649787e-07, "loss": 0.5058, "step": 12580 }, { "epoch": 0.89, "grad_norm": 1.702883391627166, "learning_rate": 2.9848744046820365e-07, "loss": 0.5591, "step": 12581 }, { "epoch": 0.89, "grad_norm": 2.1593497753580824, "learning_rate": 2.980964573797357e-07, "loss": 0.5563, "step": 12582 }, { "epoch": 0.89, "grad_norm": 1.7830795849894678, "learning_rate": 2.977057226617447e-07, "loss": 0.507, "step": 12583 }, { "epoch": 0.89, "grad_norm": 1.7816462389028185, "learning_rate": 2.9731523633487024e-07, "loss": 0.4639, "step": 12584 }, { "epoch": 0.89, "grad_norm": 1.7888232007306213, "learning_rate": 2.969249984197403e-07, "loss": 0.5313, "step": 12585 }, { "epoch": 0.89, "grad_norm": 1.7156642066744452, "learning_rate": 2.96535008936969e-07, "loss": 0.5192, "step": 12586 }, { "epoch": 0.89, "grad_norm": 1.9278711141420761, "learning_rate": 2.961452679071547e-07, "loss": 0.5825, "step": 12587 }, { "epoch": 0.89, "grad_norm": 2.5511461940170626, "learning_rate": 2.9575577535088607e-07, "loss": 0.59, "step": 12588 }, { "epoch": 0.89, "grad_norm": 1.6025922948800877, "learning_rate": 2.953665312887388e-07, "loss": 0.5198, "step": 12589 }, { "epoch": 0.89, "grad_norm": 1.786652456663681, "learning_rate": 2.9497753574127153e-07, "loss": 0.5778, "step": 12590 }, { "epoch": 0.89, "grad_norm": 1.6847330373715332, "learning_rate": 2.9458878872903495e-07, "loss": 0.4711, "step": 12591 }, { "epoch": 0.89, "grad_norm": 2.428077130260277, "learning_rate": 2.9420029027256103e-07, "loss": 0.5304, "step": 12592 }, { "epoch": 0.89, "grad_norm": 0.9019794540373057, "learning_rate": 2.93812040392375e-07, "loss": 0.4432, "step": 12593 }, { "epoch": 0.89, "grad_norm": 0.7006085962224345, "learning_rate": 2.934240391089826e-07, "loss": 0.4138, "step": 12594 }, { "epoch": 0.89, "grad_norm": 1.6378776511210356, "learning_rate": 2.93036286442881e-07, "loss": 0.4965, "step": 12595 }, { "epoch": 0.89, "grad_norm": 1.8771780799177442, "learning_rate": 2.926487824145513e-07, "loss": 0.5017, "step": 12596 }, { "epoch": 0.89, "grad_norm": 0.7068106309235032, "learning_rate": 2.9226152704446455e-07, "loss": 0.4148, "step": 12597 }, { "epoch": 0.89, "grad_norm": 1.7191428484984292, "learning_rate": 2.9187452035307596e-07, "loss": 0.5169, "step": 12598 }, { "epoch": 0.89, "grad_norm": 1.7077416030716195, "learning_rate": 2.91487762360827e-07, "loss": 0.5233, "step": 12599 }, { "epoch": 0.89, "grad_norm": 1.6323401988794095, "learning_rate": 2.911012530881507e-07, "loss": 0.5946, "step": 12600 }, { "epoch": 0.89, "grad_norm": 1.5653659064062897, "learning_rate": 2.9071499255546196e-07, "loss": 0.5506, "step": 12601 }, { "epoch": 0.89, "grad_norm": 1.7244412780214051, "learning_rate": 2.9032898078316384e-07, "loss": 0.5394, "step": 12602 }, { "epoch": 0.89, "grad_norm": 1.8543204034393266, "learning_rate": 2.899432177916478e-07, "loss": 0.5415, "step": 12603 }, { "epoch": 0.89, "grad_norm": 1.9592909607014217, "learning_rate": 2.895577036012909e-07, "loss": 0.5133, "step": 12604 }, { "epoch": 0.89, "grad_norm": 1.8897191564023803, "learning_rate": 2.8917243823245786e-07, "loss": 0.5757, "step": 12605 }, { "epoch": 0.89, "grad_norm": 1.452172533661388, "learning_rate": 2.8878742170549813e-07, "loss": 0.4811, "step": 12606 }, { "epoch": 0.89, "grad_norm": 2.0902452452399762, "learning_rate": 2.8840265404075087e-07, "loss": 0.5497, "step": 12607 }, { "epoch": 0.89, "grad_norm": 1.825188805121277, "learning_rate": 2.8801813525854036e-07, "loss": 0.5859, "step": 12608 }, { "epoch": 0.89, "grad_norm": 2.179325531989453, "learning_rate": 2.8763386537917825e-07, "loss": 0.5027, "step": 12609 }, { "epoch": 0.89, "grad_norm": 1.714759902893055, "learning_rate": 2.872498444229643e-07, "loss": 0.5399, "step": 12610 }, { "epoch": 0.89, "grad_norm": 1.7998267889267174, "learning_rate": 2.868660724101807e-07, "loss": 0.5383, "step": 12611 }, { "epoch": 0.89, "grad_norm": 1.8301061314544274, "learning_rate": 2.864825493611029e-07, "loss": 0.4884, "step": 12612 }, { "epoch": 0.9, "grad_norm": 1.712235098295122, "learning_rate": 2.8609927529598737e-07, "loss": 0.5014, "step": 12613 }, { "epoch": 0.9, "grad_norm": 1.791156572768819, "learning_rate": 2.8571625023508245e-07, "loss": 0.5461, "step": 12614 }, { "epoch": 0.9, "grad_norm": 1.659704413571172, "learning_rate": 2.8533347419861745e-07, "loss": 0.5316, "step": 12615 }, { "epoch": 0.9, "grad_norm": 2.357412303161694, "learning_rate": 2.849509472068146e-07, "loss": 0.4891, "step": 12616 }, { "epoch": 0.9, "grad_norm": 1.8635510415628678, "learning_rate": 2.845686692798799e-07, "loss": 0.5491, "step": 12617 }, { "epoch": 0.9, "grad_norm": 0.6213281005722057, "learning_rate": 2.84186640438005e-07, "loss": 0.3846, "step": 12618 }, { "epoch": 0.9, "grad_norm": 1.5512755701445857, "learning_rate": 2.838048607013727e-07, "loss": 0.532, "step": 12619 }, { "epoch": 0.9, "grad_norm": 1.5270898148623333, "learning_rate": 2.834233300901473e-07, "loss": 0.5225, "step": 12620 }, { "epoch": 0.9, "grad_norm": 1.900257539282859, "learning_rate": 2.8304204862448445e-07, "loss": 0.4717, "step": 12621 }, { "epoch": 0.9, "grad_norm": 1.7455862971956742, "learning_rate": 2.8266101632452246e-07, "loss": 0.5642, "step": 12622 }, { "epoch": 0.9, "grad_norm": 1.9468392384157973, "learning_rate": 2.8228023321039135e-07, "loss": 0.4935, "step": 12623 }, { "epoch": 0.9, "grad_norm": 1.5536161381465765, "learning_rate": 2.8189969930220327e-07, "loss": 0.4681, "step": 12624 }, { "epoch": 0.9, "grad_norm": 2.0191128188154113, "learning_rate": 2.815194146200606e-07, "loss": 0.5484, "step": 12625 }, { "epoch": 0.9, "grad_norm": 0.7082325441488533, "learning_rate": 2.8113937918405053e-07, "loss": 0.4175, "step": 12626 }, { "epoch": 0.9, "grad_norm": 1.841608082630723, "learning_rate": 2.8075959301424814e-07, "loss": 0.482, "step": 12627 }, { "epoch": 0.9, "grad_norm": 2.301984529597994, "learning_rate": 2.8038005613071626e-07, "loss": 0.446, "step": 12628 }, { "epoch": 0.9, "grad_norm": 2.304615487218864, "learning_rate": 2.8000076855350056e-07, "loss": 0.506, "step": 12629 }, { "epoch": 0.9, "grad_norm": 1.8631016760100112, "learning_rate": 2.796217303026383e-07, "loss": 0.5567, "step": 12630 }, { "epoch": 0.9, "grad_norm": 1.8419418107893775, "learning_rate": 2.792429413981512e-07, "loss": 0.5328, "step": 12631 }, { "epoch": 0.9, "grad_norm": 1.3863056347575036, "learning_rate": 2.788644018600478e-07, "loss": 0.4372, "step": 12632 }, { "epoch": 0.9, "grad_norm": 2.6542321668746274, "learning_rate": 2.7848611170832376e-07, "loss": 0.5249, "step": 12633 }, { "epoch": 0.9, "grad_norm": 1.7860389743462115, "learning_rate": 2.7810807096296246e-07, "loss": 0.553, "step": 12634 }, { "epoch": 0.9, "grad_norm": 1.6754011345895023, "learning_rate": 2.777302796439335e-07, "loss": 0.5174, "step": 12635 }, { "epoch": 0.9, "grad_norm": 3.3393878176164904, "learning_rate": 2.7735273777119153e-07, "loss": 0.4955, "step": 12636 }, { "epoch": 0.9, "grad_norm": 1.846853399911836, "learning_rate": 2.769754453646806e-07, "loss": 0.5158, "step": 12637 }, { "epoch": 0.9, "grad_norm": 2.5353014385334527, "learning_rate": 2.7659840244433025e-07, "loss": 0.4891, "step": 12638 }, { "epoch": 0.9, "grad_norm": 3.730672181262175, "learning_rate": 2.7622160903005737e-07, "loss": 0.4797, "step": 12639 }, { "epoch": 0.9, "grad_norm": 2.1405968772232105, "learning_rate": 2.75845065141766e-07, "loss": 0.5258, "step": 12640 }, { "epoch": 0.9, "grad_norm": 1.9748259792141227, "learning_rate": 2.7546877079934473e-07, "loss": 0.4872, "step": 12641 }, { "epoch": 0.9, "grad_norm": 1.5172793288264743, "learning_rate": 2.750927260226732e-07, "loss": 0.4737, "step": 12642 }, { "epoch": 0.9, "grad_norm": 1.6214503134802014, "learning_rate": 2.7471693083161334e-07, "loss": 0.4927, "step": 12643 }, { "epoch": 0.9, "grad_norm": 1.6889251605129691, "learning_rate": 2.7434138524601696e-07, "loss": 0.4959, "step": 12644 }, { "epoch": 0.9, "grad_norm": 2.108090182953655, "learning_rate": 2.73966089285721e-07, "loss": 0.4218, "step": 12645 }, { "epoch": 0.9, "grad_norm": 1.768757353925345, "learning_rate": 2.735910429705502e-07, "loss": 0.4457, "step": 12646 }, { "epoch": 0.9, "grad_norm": 1.6422180326770917, "learning_rate": 2.7321624632031694e-07, "loss": 0.5204, "step": 12647 }, { "epoch": 0.9, "grad_norm": 1.604038047791915, "learning_rate": 2.728416993548161e-07, "loss": 0.513, "step": 12648 }, { "epoch": 0.9, "grad_norm": 0.7361590662842092, "learning_rate": 2.7246740209383617e-07, "loss": 0.4134, "step": 12649 }, { "epoch": 0.9, "grad_norm": 1.603988079819678, "learning_rate": 2.7209335455714634e-07, "loss": 0.5278, "step": 12650 }, { "epoch": 0.9, "grad_norm": 1.60668062937063, "learning_rate": 2.7171955676450645e-07, "loss": 0.5254, "step": 12651 }, { "epoch": 0.9, "grad_norm": 2.0539533623653017, "learning_rate": 2.713460087356601e-07, "loss": 0.4757, "step": 12652 }, { "epoch": 0.9, "grad_norm": 1.6324746298878663, "learning_rate": 2.709727104903409e-07, "loss": 0.4641, "step": 12653 }, { "epoch": 0.9, "grad_norm": 2.1978328787332275, "learning_rate": 2.705996620482676e-07, "loss": 0.5111, "step": 12654 }, { "epoch": 0.9, "grad_norm": 1.8185328264679268, "learning_rate": 2.702268634291455e-07, "loss": 0.5951, "step": 12655 }, { "epoch": 0.9, "grad_norm": 1.8206380725852163, "learning_rate": 2.6985431465266666e-07, "loss": 0.4388, "step": 12656 }, { "epoch": 0.9, "grad_norm": 0.6721611403124952, "learning_rate": 2.694820157385114e-07, "loss": 0.4328, "step": 12657 }, { "epoch": 0.9, "grad_norm": 1.6225581686512236, "learning_rate": 2.6910996670634516e-07, "loss": 0.5122, "step": 12658 }, { "epoch": 0.9, "grad_norm": 1.8278943835333348, "learning_rate": 2.687381675758211e-07, "loss": 0.4824, "step": 12659 }, { "epoch": 0.9, "grad_norm": 1.6864328605930328, "learning_rate": 2.6836661836657783e-07, "loss": 0.4953, "step": 12660 }, { "epoch": 0.9, "grad_norm": 1.9774623394007422, "learning_rate": 2.6799531909824315e-07, "loss": 0.5883, "step": 12661 }, { "epoch": 0.9, "grad_norm": 1.6623897926028357, "learning_rate": 2.676242697904302e-07, "loss": 0.6034, "step": 12662 }, { "epoch": 0.9, "grad_norm": 1.8348595342446348, "learning_rate": 2.6725347046273887e-07, "loss": 0.4652, "step": 12663 }, { "epoch": 0.9, "grad_norm": 1.8603397793981393, "learning_rate": 2.6688292113475566e-07, "loss": 0.5075, "step": 12664 }, { "epoch": 0.9, "grad_norm": 1.6639125522391218, "learning_rate": 2.6651262182605496e-07, "loss": 0.4824, "step": 12665 }, { "epoch": 0.9, "grad_norm": 0.7451529656300367, "learning_rate": 2.661425725561967e-07, "loss": 0.4178, "step": 12666 }, { "epoch": 0.9, "grad_norm": 0.6879709862534997, "learning_rate": 2.6577277334472793e-07, "loss": 0.4113, "step": 12667 }, { "epoch": 0.9, "grad_norm": 1.767935061910562, "learning_rate": 2.6540322421118313e-07, "loss": 0.588, "step": 12668 }, { "epoch": 0.9, "grad_norm": 1.5908417034501903, "learning_rate": 2.6503392517508275e-07, "loss": 0.4362, "step": 12669 }, { "epoch": 0.9, "grad_norm": 1.7741862559239043, "learning_rate": 2.646648762559356e-07, "loss": 0.4872, "step": 12670 }, { "epoch": 0.9, "grad_norm": 0.6931247775428896, "learning_rate": 2.6429607747323393e-07, "loss": 0.4193, "step": 12671 }, { "epoch": 0.9, "grad_norm": 1.8038879722615504, "learning_rate": 2.6392752884646156e-07, "loss": 0.5324, "step": 12672 }, { "epoch": 0.9, "grad_norm": 1.6694481054008183, "learning_rate": 2.6355923039508404e-07, "loss": 0.5525, "step": 12673 }, { "epoch": 0.9, "grad_norm": 1.658243421095066, "learning_rate": 2.6319118213855745e-07, "loss": 0.504, "step": 12674 }, { "epoch": 0.9, "grad_norm": 1.4774394591598534, "learning_rate": 2.628233840963235e-07, "loss": 0.4234, "step": 12675 }, { "epoch": 0.9, "grad_norm": 2.670672959642424, "learning_rate": 2.624558362878099e-07, "loss": 0.5109, "step": 12676 }, { "epoch": 0.9, "grad_norm": 1.6106937230458371, "learning_rate": 2.6208853873243233e-07, "loss": 0.5176, "step": 12677 }, { "epoch": 0.9, "grad_norm": 1.8188661357949891, "learning_rate": 2.617214914495925e-07, "loss": 0.6206, "step": 12678 }, { "epoch": 0.9, "grad_norm": 1.8444630700234739, "learning_rate": 2.613546944586781e-07, "loss": 0.5398, "step": 12679 }, { "epoch": 0.9, "grad_norm": 1.7965362265636515, "learning_rate": 2.6098814777906603e-07, "loss": 0.5131, "step": 12680 }, { "epoch": 0.9, "grad_norm": 1.803197513668809, "learning_rate": 2.606218514301179e-07, "loss": 0.5984, "step": 12681 }, { "epoch": 0.9, "grad_norm": 2.3723385823146947, "learning_rate": 2.6025580543118266e-07, "loss": 0.4581, "step": 12682 }, { "epoch": 0.9, "grad_norm": 1.7404512966050116, "learning_rate": 2.5989000980159605e-07, "loss": 0.4451, "step": 12683 }, { "epoch": 0.9, "grad_norm": 4.716802982532928, "learning_rate": 2.5952446456068147e-07, "loss": 0.5504, "step": 12684 }, { "epoch": 0.9, "grad_norm": 1.7032712041715503, "learning_rate": 2.5915916972774736e-07, "loss": 0.5848, "step": 12685 }, { "epoch": 0.9, "grad_norm": 1.897051450745246, "learning_rate": 2.5879412532208993e-07, "loss": 0.597, "step": 12686 }, { "epoch": 0.9, "grad_norm": 0.746706573128253, "learning_rate": 2.584293313629921e-07, "loss": 0.4234, "step": 12687 }, { "epoch": 0.9, "grad_norm": 1.9181149309020304, "learning_rate": 2.58064787869724e-07, "loss": 0.4781, "step": 12688 }, { "epoch": 0.9, "grad_norm": 1.7920063547865288, "learning_rate": 2.577004948615414e-07, "loss": 0.5512, "step": 12689 }, { "epoch": 0.9, "grad_norm": 0.7225141325912885, "learning_rate": 2.5733645235768766e-07, "loss": 0.4302, "step": 12690 }, { "epoch": 0.9, "grad_norm": 1.494289814419068, "learning_rate": 2.5697266037739253e-07, "loss": 0.475, "step": 12691 }, { "epoch": 0.9, "grad_norm": 2.3327465466111534, "learning_rate": 2.5660911893987337e-07, "loss": 0.559, "step": 12692 }, { "epoch": 0.9, "grad_norm": 1.880594408493992, "learning_rate": 2.562458280643343e-07, "loss": 0.5588, "step": 12693 }, { "epoch": 0.9, "grad_norm": 1.7360575729267824, "learning_rate": 2.558827877699627e-07, "loss": 0.4604, "step": 12694 }, { "epoch": 0.9, "grad_norm": 1.5753545760948986, "learning_rate": 2.555199980759393e-07, "loss": 0.5202, "step": 12695 }, { "epoch": 0.9, "grad_norm": 1.5839456085661119, "learning_rate": 2.55157459001425e-07, "loss": 0.4664, "step": 12696 }, { "epoch": 0.9, "grad_norm": 1.657120155041787, "learning_rate": 2.5479517056557214e-07, "loss": 0.5251, "step": 12697 }, { "epoch": 0.9, "grad_norm": 1.7245698023094969, "learning_rate": 2.5443313278751656e-07, "loss": 0.4897, "step": 12698 }, { "epoch": 0.9, "grad_norm": 1.7103033677898396, "learning_rate": 2.5407134568638357e-07, "loss": 0.5476, "step": 12699 }, { "epoch": 0.9, "grad_norm": 1.6237081031423706, "learning_rate": 2.53709809281284e-07, "loss": 0.5127, "step": 12700 }, { "epoch": 0.9, "grad_norm": 1.8289877525626415, "learning_rate": 2.5334852359131356e-07, "loss": 0.539, "step": 12701 }, { "epoch": 0.9, "grad_norm": 1.8459814438274638, "learning_rate": 2.5298748863555935e-07, "loss": 0.54, "step": 12702 }, { "epoch": 0.9, "grad_norm": 1.3871047095490303, "learning_rate": 2.5262670443309045e-07, "loss": 0.4235, "step": 12703 }, { "epoch": 0.9, "grad_norm": 2.3502516099606585, "learning_rate": 2.5226617100296504e-07, "loss": 0.4722, "step": 12704 }, { "epoch": 0.9, "grad_norm": 0.652451729796664, "learning_rate": 2.5190588836422835e-07, "loss": 0.4163, "step": 12705 }, { "epoch": 0.9, "grad_norm": 2.10700019892435, "learning_rate": 2.515458565359108e-07, "loss": 0.5467, "step": 12706 }, { "epoch": 0.9, "grad_norm": 1.5844130841816766, "learning_rate": 2.5118607553703214e-07, "loss": 0.5514, "step": 12707 }, { "epoch": 0.9, "grad_norm": 1.7769491683970342, "learning_rate": 2.5082654538659546e-07, "loss": 0.4792, "step": 12708 }, { "epoch": 0.9, "grad_norm": 1.7423771811779871, "learning_rate": 2.504672661035934e-07, "loss": 0.5378, "step": 12709 }, { "epoch": 0.9, "grad_norm": 1.5805357750008233, "learning_rate": 2.5010823770700354e-07, "loss": 0.4955, "step": 12710 }, { "epoch": 0.9, "grad_norm": 0.7128316440233982, "learning_rate": 2.4974946021579125e-07, "loss": 0.442, "step": 12711 }, { "epoch": 0.9, "grad_norm": 1.8006057306944947, "learning_rate": 2.493909336489092e-07, "loss": 0.5045, "step": 12712 }, { "epoch": 0.9, "grad_norm": 1.568745433139465, "learning_rate": 2.490326580252944e-07, "loss": 0.4581, "step": 12713 }, { "epoch": 0.9, "grad_norm": 1.909795599980966, "learning_rate": 2.4867463336387454e-07, "loss": 0.5457, "step": 12714 }, { "epoch": 0.9, "grad_norm": 1.823375250464417, "learning_rate": 2.483168596835589e-07, "loss": 0.5163, "step": 12715 }, { "epoch": 0.9, "grad_norm": 1.6976184050426828, "learning_rate": 2.4795933700324846e-07, "loss": 0.5288, "step": 12716 }, { "epoch": 0.9, "grad_norm": 2.866068915370391, "learning_rate": 2.476020653418271e-07, "loss": 0.4138, "step": 12717 }, { "epoch": 0.9, "grad_norm": 1.8702720226616825, "learning_rate": 2.47245044718169e-07, "loss": 0.5093, "step": 12718 }, { "epoch": 0.9, "grad_norm": 2.4102874474745914, "learning_rate": 2.4688827515113143e-07, "loss": 0.6072, "step": 12719 }, { "epoch": 0.9, "grad_norm": 0.7765533453112328, "learning_rate": 2.465317566595604e-07, "loss": 0.4422, "step": 12720 }, { "epoch": 0.9, "grad_norm": 1.7631187701441917, "learning_rate": 2.461754892622903e-07, "loss": 0.5153, "step": 12721 }, { "epoch": 0.9, "grad_norm": 1.8474875714521757, "learning_rate": 2.4581947297813825e-07, "loss": 0.5064, "step": 12722 }, { "epoch": 0.9, "grad_norm": 1.5823223189861697, "learning_rate": 2.4546370782591147e-07, "loss": 0.4803, "step": 12723 }, { "epoch": 0.9, "grad_norm": 1.8557883592668791, "learning_rate": 2.45108193824401e-07, "loss": 0.504, "step": 12724 }, { "epoch": 0.9, "grad_norm": 1.695964157990849, "learning_rate": 2.4475293099238905e-07, "loss": 0.5327, "step": 12725 }, { "epoch": 0.9, "grad_norm": 1.6248062663050375, "learning_rate": 2.4439791934863957e-07, "loss": 0.5034, "step": 12726 }, { "epoch": 0.9, "grad_norm": 1.6775142440015796, "learning_rate": 2.440431589119058e-07, "loss": 0.5394, "step": 12727 }, { "epoch": 0.9, "grad_norm": 1.5742147916582065, "learning_rate": 2.4368864970092776e-07, "loss": 0.492, "step": 12728 }, { "epoch": 0.9, "grad_norm": 1.4891188143072773, "learning_rate": 2.433343917344322e-07, "loss": 0.5095, "step": 12729 }, { "epoch": 0.9, "grad_norm": 1.8458558350587122, "learning_rate": 2.429803850311324e-07, "loss": 0.6044, "step": 12730 }, { "epoch": 0.9, "grad_norm": 1.4149480400706864, "learning_rate": 2.426266296097263e-07, "loss": 0.5193, "step": 12731 }, { "epoch": 0.9, "grad_norm": 0.668582836047589, "learning_rate": 2.4227312548890325e-07, "loss": 0.417, "step": 12732 }, { "epoch": 0.9, "grad_norm": 1.696232849546226, "learning_rate": 2.41919872687334e-07, "loss": 0.4873, "step": 12733 }, { "epoch": 0.9, "grad_norm": 1.9994363840717644, "learning_rate": 2.4156687122368016e-07, "loss": 0.4863, "step": 12734 }, { "epoch": 0.9, "grad_norm": 1.6550694503793202, "learning_rate": 2.4121412111658746e-07, "loss": 0.4913, "step": 12735 }, { "epoch": 0.9, "grad_norm": 2.9173395497030055, "learning_rate": 2.4086162238469045e-07, "loss": 0.5509, "step": 12736 }, { "epoch": 0.9, "grad_norm": 1.6190433159642212, "learning_rate": 2.4050937504660866e-07, "loss": 0.5305, "step": 12737 }, { "epoch": 0.9, "grad_norm": 1.6013913139188896, "learning_rate": 2.401573791209488e-07, "loss": 0.4359, "step": 12738 }, { "epoch": 0.9, "grad_norm": 1.7469842535438236, "learning_rate": 2.39805634626305e-07, "loss": 0.5356, "step": 12739 }, { "epoch": 0.9, "grad_norm": 1.6316081275779228, "learning_rate": 2.394541415812568e-07, "loss": 0.5112, "step": 12740 }, { "epoch": 0.9, "grad_norm": 1.6196806625098836, "learning_rate": 2.3910290000437207e-07, "loss": 0.5357, "step": 12741 }, { "epoch": 0.9, "grad_norm": 5.549438197176269, "learning_rate": 2.387519099142049e-07, "loss": 0.4662, "step": 12742 }, { "epoch": 0.9, "grad_norm": 1.5884896490620557, "learning_rate": 2.384011713292933e-07, "loss": 0.461, "step": 12743 }, { "epoch": 0.9, "grad_norm": 1.5861294807642932, "learning_rate": 2.3805068426816847e-07, "loss": 0.5202, "step": 12744 }, { "epoch": 0.9, "grad_norm": 7.800853640963795, "learning_rate": 2.3770044874934117e-07, "loss": 0.5534, "step": 12745 }, { "epoch": 0.9, "grad_norm": 0.7327610298248551, "learning_rate": 2.3735046479131274e-07, "loss": 0.4465, "step": 12746 }, { "epoch": 0.9, "grad_norm": 1.78794757616712, "learning_rate": 2.3700073241257117e-07, "loss": 0.461, "step": 12747 }, { "epoch": 0.9, "grad_norm": 1.6992487112074224, "learning_rate": 2.3665125163159052e-07, "loss": 0.5507, "step": 12748 }, { "epoch": 0.9, "grad_norm": 1.7969576077098495, "learning_rate": 2.3630202246683109e-07, "loss": 0.5212, "step": 12749 }, { "epoch": 0.9, "grad_norm": 1.8448275415665345, "learning_rate": 2.3595304493673977e-07, "loss": 0.523, "step": 12750 }, { "epoch": 0.9, "grad_norm": 1.9400387095410139, "learning_rate": 2.3560431905975234e-07, "loss": 0.5918, "step": 12751 }, { "epoch": 0.9, "grad_norm": 2.4842316301573715, "learning_rate": 2.3525584485428799e-07, "loss": 0.4888, "step": 12752 }, { "epoch": 0.9, "grad_norm": 2.094220179435843, "learning_rate": 2.349076223387564e-07, "loss": 0.506, "step": 12753 }, { "epoch": 0.91, "grad_norm": 1.5490564310415358, "learning_rate": 2.3455965153154847e-07, "loss": 0.5303, "step": 12754 }, { "epoch": 0.91, "grad_norm": 1.613553412728933, "learning_rate": 2.342119324510489e-07, "loss": 0.5622, "step": 12755 }, { "epoch": 0.91, "grad_norm": 1.876349133523773, "learning_rate": 2.33864465115623e-07, "loss": 0.5674, "step": 12756 }, { "epoch": 0.91, "grad_norm": 1.4581458478697864, "learning_rate": 2.3351724954362608e-07, "loss": 0.5018, "step": 12757 }, { "epoch": 0.91, "grad_norm": 1.532832900762323, "learning_rate": 2.3317028575339906e-07, "loss": 0.5139, "step": 12758 }, { "epoch": 0.91, "grad_norm": 0.7173337432027899, "learning_rate": 2.3282357376326947e-07, "loss": 0.4211, "step": 12759 }, { "epoch": 0.91, "grad_norm": 1.6956049801149211, "learning_rate": 2.3247711359155324e-07, "loss": 0.5289, "step": 12760 }, { "epoch": 0.91, "grad_norm": 2.1449348835594253, "learning_rate": 2.3213090525654902e-07, "loss": 0.5615, "step": 12761 }, { "epoch": 0.91, "grad_norm": 1.6784218486321325, "learning_rate": 2.3178494877654724e-07, "loss": 0.4773, "step": 12762 }, { "epoch": 0.91, "grad_norm": 1.888948000713616, "learning_rate": 2.3143924416982044e-07, "loss": 0.5595, "step": 12763 }, { "epoch": 0.91, "grad_norm": 0.7191948867790153, "learning_rate": 2.3109379145463128e-07, "loss": 0.4302, "step": 12764 }, { "epoch": 0.91, "grad_norm": 2.529244734370472, "learning_rate": 2.307485906492274e-07, "loss": 0.4695, "step": 12765 }, { "epoch": 0.91, "grad_norm": 1.6502624377433872, "learning_rate": 2.3040364177184305e-07, "loss": 0.5345, "step": 12766 }, { "epoch": 0.91, "grad_norm": 1.870126874704981, "learning_rate": 2.3005894484070092e-07, "loss": 0.5905, "step": 12767 }, { "epoch": 0.91, "grad_norm": 1.8569725551205007, "learning_rate": 2.297144998740075e-07, "loss": 0.5435, "step": 12768 }, { "epoch": 0.91, "grad_norm": 1.8362850571045017, "learning_rate": 2.2937030688995776e-07, "loss": 0.5202, "step": 12769 }, { "epoch": 0.91, "grad_norm": 2.2351174278379373, "learning_rate": 2.2902636590673376e-07, "loss": 0.5478, "step": 12770 }, { "epoch": 0.91, "grad_norm": 1.6411435966691348, "learning_rate": 2.2868267694250322e-07, "loss": 0.5211, "step": 12771 }, { "epoch": 0.91, "grad_norm": 1.7264879993726723, "learning_rate": 2.2833924001542163e-07, "loss": 0.5053, "step": 12772 }, { "epoch": 0.91, "grad_norm": 0.665588816249034, "learning_rate": 2.2799605514362888e-07, "loss": 0.4269, "step": 12773 }, { "epoch": 0.91, "grad_norm": 1.713833009086183, "learning_rate": 2.276531223452555e-07, "loss": 0.5101, "step": 12774 }, { "epoch": 0.91, "grad_norm": 1.4470838559102182, "learning_rate": 2.2731044163841477e-07, "loss": 0.4818, "step": 12775 }, { "epoch": 0.91, "grad_norm": 2.115648654273564, "learning_rate": 2.269680130412083e-07, "loss": 0.5013, "step": 12776 }, { "epoch": 0.91, "grad_norm": 1.506165745728885, "learning_rate": 2.2662583657172444e-07, "loss": 0.4877, "step": 12777 }, { "epoch": 0.91, "grad_norm": 1.7364834039667563, "learning_rate": 2.2628391224803868e-07, "loss": 0.4255, "step": 12778 }, { "epoch": 0.91, "grad_norm": 2.3298774433007754, "learning_rate": 2.2594224008821276e-07, "loss": 0.4997, "step": 12779 }, { "epoch": 0.91, "grad_norm": 2.676868802700795, "learning_rate": 2.2560082011029326e-07, "loss": 0.5058, "step": 12780 }, { "epoch": 0.91, "grad_norm": 0.682350249259026, "learning_rate": 2.252596523323175e-07, "loss": 0.4304, "step": 12781 }, { "epoch": 0.91, "grad_norm": 1.702551003384806, "learning_rate": 2.2491873677230492e-07, "loss": 0.4959, "step": 12782 }, { "epoch": 0.91, "grad_norm": 1.5473745289080922, "learning_rate": 2.245780734482661e-07, "loss": 0.5503, "step": 12783 }, { "epoch": 0.91, "grad_norm": 2.2596058206607212, "learning_rate": 2.2423766237819332e-07, "loss": 0.4786, "step": 12784 }, { "epoch": 0.91, "grad_norm": 0.6857643862198614, "learning_rate": 2.2389750358007e-07, "loss": 0.4314, "step": 12785 }, { "epoch": 0.91, "grad_norm": 1.752986504938983, "learning_rate": 2.235575970718651e-07, "loss": 0.4954, "step": 12786 }, { "epoch": 0.91, "grad_norm": 2.6651851325089138, "learning_rate": 2.2321794287153198e-07, "loss": 0.486, "step": 12787 }, { "epoch": 0.91, "grad_norm": 1.8999645346459915, "learning_rate": 2.2287854099701301e-07, "loss": 0.4675, "step": 12788 }, { "epoch": 0.91, "grad_norm": 1.7010455351632758, "learning_rate": 2.225393914662366e-07, "loss": 0.516, "step": 12789 }, { "epoch": 0.91, "grad_norm": 1.6378901022642982, "learning_rate": 2.22200494297119e-07, "loss": 0.484, "step": 12790 }, { "epoch": 0.91, "grad_norm": 1.9202971033130536, "learning_rate": 2.2186184950755862e-07, "loss": 0.5384, "step": 12791 }, { "epoch": 0.91, "grad_norm": 1.6454756158899664, "learning_rate": 2.2152345711544732e-07, "loss": 0.5059, "step": 12792 }, { "epoch": 0.91, "grad_norm": 1.8749689371584566, "learning_rate": 2.2118531713865854e-07, "loss": 0.5171, "step": 12793 }, { "epoch": 0.91, "grad_norm": 1.7172298075393906, "learning_rate": 2.2084742959505358e-07, "loss": 0.5601, "step": 12794 }, { "epoch": 0.91, "grad_norm": 2.884477347158867, "learning_rate": 2.20509794502482e-07, "loss": 0.5585, "step": 12795 }, { "epoch": 0.91, "grad_norm": 1.6126760046040245, "learning_rate": 2.2017241187877736e-07, "loss": 0.5126, "step": 12796 }, { "epoch": 0.91, "grad_norm": 1.9760093410394666, "learning_rate": 2.1983528174176372e-07, "loss": 0.5094, "step": 12797 }, { "epoch": 0.91, "grad_norm": 1.8418336596828138, "learning_rate": 2.1949840410924682e-07, "loss": 0.467, "step": 12798 }, { "epoch": 0.91, "grad_norm": 0.6182225135299495, "learning_rate": 2.1916177899902248e-07, "loss": 0.3742, "step": 12799 }, { "epoch": 0.91, "grad_norm": 1.7412256932121002, "learning_rate": 2.1882540642887308e-07, "loss": 0.5352, "step": 12800 }, { "epoch": 0.91, "grad_norm": 1.6314739736421984, "learning_rate": 2.1848928641656664e-07, "loss": 0.468, "step": 12801 }, { "epoch": 0.91, "grad_norm": 1.7001496155469862, "learning_rate": 2.1815341897985842e-07, "loss": 0.4898, "step": 12802 }, { "epoch": 0.91, "grad_norm": 2.3657625861140903, "learning_rate": 2.178178041364881e-07, "loss": 0.4784, "step": 12803 }, { "epoch": 0.91, "grad_norm": 1.8984403873119962, "learning_rate": 2.1748244190418766e-07, "loss": 0.5652, "step": 12804 }, { "epoch": 0.91, "grad_norm": 1.6452831027597885, "learning_rate": 2.1714733230066897e-07, "loss": 0.5233, "step": 12805 }, { "epoch": 0.91, "grad_norm": 2.1753818657430046, "learning_rate": 2.168124753436346e-07, "loss": 0.5023, "step": 12806 }, { "epoch": 0.91, "grad_norm": 1.4615127118840312, "learning_rate": 2.164778710507731e-07, "loss": 0.4927, "step": 12807 }, { "epoch": 0.91, "grad_norm": 0.6904294455096526, "learning_rate": 2.1614351943975932e-07, "loss": 0.3938, "step": 12808 }, { "epoch": 0.91, "grad_norm": 1.444772360065789, "learning_rate": 2.1580942052825515e-07, "loss": 0.4256, "step": 12809 }, { "epoch": 0.91, "grad_norm": 2.085776857037535, "learning_rate": 2.1547557433390765e-07, "loss": 0.494, "step": 12810 }, { "epoch": 0.91, "grad_norm": 1.7325796695439903, "learning_rate": 2.1514198087435322e-07, "loss": 0.4897, "step": 12811 }, { "epoch": 0.91, "grad_norm": 1.546304275013161, "learning_rate": 2.1480864016721226e-07, "loss": 0.4879, "step": 12812 }, { "epoch": 0.91, "grad_norm": 2.4075575403422236, "learning_rate": 2.1447555223009341e-07, "loss": 0.571, "step": 12813 }, { "epoch": 0.91, "grad_norm": 2.3589662735500236, "learning_rate": 2.1414271708059153e-07, "loss": 0.4887, "step": 12814 }, { "epoch": 0.91, "grad_norm": 1.6775011186105564, "learning_rate": 2.1381013473628754e-07, "loss": 0.5393, "step": 12815 }, { "epoch": 0.91, "grad_norm": 3.1084847057922835, "learning_rate": 2.1347780521475126e-07, "loss": 0.5254, "step": 12816 }, { "epoch": 0.91, "grad_norm": 2.344582001195526, "learning_rate": 2.1314572853353532e-07, "loss": 0.5255, "step": 12817 }, { "epoch": 0.91, "grad_norm": 1.8860609995611886, "learning_rate": 2.1281390471018237e-07, "loss": 0.4888, "step": 12818 }, { "epoch": 0.91, "grad_norm": 1.6237938495543278, "learning_rate": 2.1248233376222004e-07, "loss": 0.5642, "step": 12819 }, { "epoch": 0.91, "grad_norm": 1.4989483710994105, "learning_rate": 2.1215101570716378e-07, "loss": 0.4663, "step": 12820 }, { "epoch": 0.91, "grad_norm": 1.8839064457118004, "learning_rate": 2.1181995056251347e-07, "loss": 0.4967, "step": 12821 }, { "epoch": 0.91, "grad_norm": 2.9827257600123684, "learning_rate": 2.1148913834575792e-07, "loss": 0.5444, "step": 12822 }, { "epoch": 0.91, "grad_norm": 0.699994246513223, "learning_rate": 2.1115857907437198e-07, "loss": 0.4297, "step": 12823 }, { "epoch": 0.91, "grad_norm": 1.6446379236036026, "learning_rate": 2.1082827276581675e-07, "loss": 0.5753, "step": 12824 }, { "epoch": 0.91, "grad_norm": 2.0595384597828805, "learning_rate": 2.1049821943754046e-07, "loss": 0.5815, "step": 12825 }, { "epoch": 0.91, "grad_norm": 1.3846848506717584, "learning_rate": 2.101684191069764e-07, "loss": 0.4734, "step": 12826 }, { "epoch": 0.91, "grad_norm": 1.8086867435699205, "learning_rate": 2.0983887179154783e-07, "loss": 0.5251, "step": 12827 }, { "epoch": 0.91, "grad_norm": 0.7125358048893002, "learning_rate": 2.095095775086603e-07, "loss": 0.4274, "step": 12828 }, { "epoch": 0.91, "grad_norm": 1.8489682335404216, "learning_rate": 2.091805362757099e-07, "loss": 0.5189, "step": 12829 }, { "epoch": 0.91, "grad_norm": 2.6101176511957074, "learning_rate": 2.0885174811007657e-07, "loss": 0.556, "step": 12830 }, { "epoch": 0.91, "grad_norm": 2.0717716811709175, "learning_rate": 2.085232130291287e-07, "loss": 0.6026, "step": 12831 }, { "epoch": 0.91, "grad_norm": 2.2821167853749027, "learning_rate": 2.0819493105022125e-07, "loss": 0.5138, "step": 12832 }, { "epoch": 0.91, "grad_norm": 1.8749203596707527, "learning_rate": 2.078669021906926e-07, "loss": 0.4689, "step": 12833 }, { "epoch": 0.91, "grad_norm": 1.8277718936205098, "learning_rate": 2.075391264678739e-07, "loss": 0.5867, "step": 12834 }, { "epoch": 0.91, "grad_norm": 1.8844913121038056, "learning_rate": 2.072116038990768e-07, "loss": 0.5441, "step": 12835 }, { "epoch": 0.91, "grad_norm": 2.550975307952143, "learning_rate": 2.0688433450160305e-07, "loss": 0.5453, "step": 12836 }, { "epoch": 0.91, "grad_norm": 2.023836812982778, "learning_rate": 2.0655731829273994e-07, "loss": 0.5032, "step": 12837 }, { "epoch": 0.91, "grad_norm": 1.783704573568617, "learning_rate": 2.06230555289762e-07, "loss": 0.593, "step": 12838 }, { "epoch": 0.91, "grad_norm": 2.114186099743231, "learning_rate": 2.0590404550992982e-07, "loss": 0.5509, "step": 12839 }, { "epoch": 0.91, "grad_norm": 2.5462700982895363, "learning_rate": 2.0557778897048963e-07, "loss": 0.5157, "step": 12840 }, { "epoch": 0.91, "grad_norm": 2.2016694914443002, "learning_rate": 2.052517856886771e-07, "loss": 0.4619, "step": 12841 }, { "epoch": 0.91, "grad_norm": 1.8559301982643677, "learning_rate": 2.0492603568171177e-07, "loss": 0.5437, "step": 12842 }, { "epoch": 0.91, "grad_norm": 2.081394745303622, "learning_rate": 2.0460053896680154e-07, "loss": 0.5655, "step": 12843 }, { "epoch": 0.91, "grad_norm": 0.6221447411911857, "learning_rate": 2.0427529556113935e-07, "loss": 0.3869, "step": 12844 }, { "epoch": 0.91, "grad_norm": 1.662511679133034, "learning_rate": 2.0395030548190642e-07, "loss": 0.4644, "step": 12845 }, { "epoch": 0.91, "grad_norm": 1.7591217520153677, "learning_rate": 2.0362556874627014e-07, "loss": 0.5076, "step": 12846 }, { "epoch": 0.91, "grad_norm": 2.260989049018104, "learning_rate": 2.0330108537138294e-07, "loss": 0.4696, "step": 12847 }, { "epoch": 0.91, "grad_norm": 0.7195526476533084, "learning_rate": 2.0297685537438606e-07, "loss": 0.4231, "step": 12848 }, { "epoch": 0.91, "grad_norm": 1.9764874279863391, "learning_rate": 2.0265287877240581e-07, "loss": 0.5531, "step": 12849 }, { "epoch": 0.91, "grad_norm": 1.4763259200621193, "learning_rate": 2.0232915558255738e-07, "loss": 0.4975, "step": 12850 }, { "epoch": 0.91, "grad_norm": 0.7424916912727368, "learning_rate": 2.0200568582193881e-07, "loss": 0.4222, "step": 12851 }, { "epoch": 0.91, "grad_norm": 2.688235911427795, "learning_rate": 2.0168246950763693e-07, "loss": 0.5065, "step": 12852 }, { "epoch": 0.91, "grad_norm": 0.6188613677531778, "learning_rate": 2.01359506656727e-07, "loss": 0.4055, "step": 12853 }, { "epoch": 0.91, "grad_norm": 1.5553440897948918, "learning_rate": 2.0103679728626758e-07, "loss": 0.4857, "step": 12854 }, { "epoch": 0.91, "grad_norm": 1.377726817849253, "learning_rate": 2.007143414133067e-07, "loss": 0.4942, "step": 12855 }, { "epoch": 0.91, "grad_norm": 1.6666616184780845, "learning_rate": 2.0039213905487465e-07, "loss": 0.4693, "step": 12856 }, { "epoch": 0.91, "grad_norm": 1.7864468025010933, "learning_rate": 2.0007019022799445e-07, "loss": 0.5379, "step": 12857 }, { "epoch": 0.91, "grad_norm": 2.0977212474888742, "learning_rate": 1.9974849494967086e-07, "loss": 0.4979, "step": 12858 }, { "epoch": 0.91, "grad_norm": 2.030358419235614, "learning_rate": 1.9942705323689694e-07, "loss": 0.4893, "step": 12859 }, { "epoch": 0.91, "grad_norm": 1.7011193368483453, "learning_rate": 1.9910586510665299e-07, "loss": 0.4738, "step": 12860 }, { "epoch": 0.91, "grad_norm": 1.9230292016443153, "learning_rate": 1.9878493057590432e-07, "loss": 0.5181, "step": 12861 }, { "epoch": 0.91, "grad_norm": 1.7884005325590633, "learning_rate": 1.9846424966160515e-07, "loss": 0.4777, "step": 12862 }, { "epoch": 0.91, "grad_norm": 1.7575842579734136, "learning_rate": 1.9814382238069308e-07, "loss": 0.5276, "step": 12863 }, { "epoch": 0.91, "grad_norm": 2.2006834522212837, "learning_rate": 1.9782364875009673e-07, "loss": 0.4576, "step": 12864 }, { "epoch": 0.91, "grad_norm": 1.626273087926516, "learning_rate": 1.9750372878672596e-07, "loss": 0.5338, "step": 12865 }, { "epoch": 0.91, "grad_norm": 1.5913212737346931, "learning_rate": 1.9718406250748167e-07, "loss": 0.5507, "step": 12866 }, { "epoch": 0.91, "grad_norm": 1.495878469826059, "learning_rate": 1.9686464992924925e-07, "loss": 0.4775, "step": 12867 }, { "epoch": 0.91, "grad_norm": 1.9906105379181938, "learning_rate": 1.965454910689013e-07, "loss": 0.4879, "step": 12868 }, { "epoch": 0.91, "grad_norm": 1.7350232831013752, "learning_rate": 1.962265859432977e-07, "loss": 0.5511, "step": 12869 }, { "epoch": 0.91, "grad_norm": 7.556782631794976, "learning_rate": 1.959079345692827e-07, "loss": 0.516, "step": 12870 }, { "epoch": 0.91, "grad_norm": 1.9255250889411737, "learning_rate": 1.9558953696368842e-07, "loss": 0.4848, "step": 12871 }, { "epoch": 0.91, "grad_norm": 1.825290333706671, "learning_rate": 1.9527139314333475e-07, "loss": 0.4924, "step": 12872 }, { "epoch": 0.91, "grad_norm": 2.2073369212312355, "learning_rate": 1.9495350312502716e-07, "loss": 0.4991, "step": 12873 }, { "epoch": 0.91, "grad_norm": 1.4814136243599614, "learning_rate": 1.946358669255566e-07, "loss": 0.4935, "step": 12874 }, { "epoch": 0.91, "grad_norm": 1.5403339762887331, "learning_rate": 1.9431848456170242e-07, "loss": 0.5147, "step": 12875 }, { "epoch": 0.91, "grad_norm": 1.8254228369622763, "learning_rate": 1.9400135605023073e-07, "loss": 0.502, "step": 12876 }, { "epoch": 0.91, "grad_norm": 0.7370825183249379, "learning_rate": 1.9368448140789142e-07, "loss": 0.4184, "step": 12877 }, { "epoch": 0.91, "grad_norm": 1.7317346011737067, "learning_rate": 1.9336786065142388e-07, "loss": 0.528, "step": 12878 }, { "epoch": 0.91, "grad_norm": 1.7917666993201937, "learning_rate": 1.930514937975536e-07, "loss": 0.5485, "step": 12879 }, { "epoch": 0.91, "grad_norm": 1.8987064590859657, "learning_rate": 1.9273538086299114e-07, "loss": 0.5288, "step": 12880 }, { "epoch": 0.91, "grad_norm": 2.309789541342419, "learning_rate": 1.9241952186443535e-07, "loss": 0.5374, "step": 12881 }, { "epoch": 0.91, "grad_norm": 1.492034210007121, "learning_rate": 1.921039168185701e-07, "loss": 0.4767, "step": 12882 }, { "epoch": 0.91, "grad_norm": 1.947001735988435, "learning_rate": 1.9178856574206816e-07, "loss": 0.4908, "step": 12883 }, { "epoch": 0.91, "grad_norm": 2.2440130057016443, "learning_rate": 1.9147346865158622e-07, "loss": 0.5688, "step": 12884 }, { "epoch": 0.91, "grad_norm": 2.321784957433587, "learning_rate": 1.911586255637693e-07, "loss": 0.5688, "step": 12885 }, { "epoch": 0.91, "grad_norm": 1.8446689471314754, "learning_rate": 1.9084403649524797e-07, "loss": 0.5051, "step": 12886 }, { "epoch": 0.91, "grad_norm": 1.5891090255506783, "learning_rate": 1.905297014626406e-07, "loss": 0.5222, "step": 12887 }, { "epoch": 0.91, "grad_norm": 1.6392731513830539, "learning_rate": 1.9021562048255116e-07, "loss": 0.5712, "step": 12888 }, { "epoch": 0.91, "grad_norm": 1.742516955596712, "learning_rate": 1.8990179357156967e-07, "loss": 0.5235, "step": 12889 }, { "epoch": 0.91, "grad_norm": 2.126402558134192, "learning_rate": 1.8958822074627514e-07, "loss": 0.5827, "step": 12890 }, { "epoch": 0.91, "grad_norm": 1.8140401783010942, "learning_rate": 1.892749020232304e-07, "loss": 0.5612, "step": 12891 }, { "epoch": 0.91, "grad_norm": 1.886235217543352, "learning_rate": 1.8896183741898722e-07, "loss": 0.5316, "step": 12892 }, { "epoch": 0.91, "grad_norm": 1.426671418592252, "learning_rate": 1.8864902695008014e-07, "loss": 0.4736, "step": 12893 }, { "epoch": 0.91, "grad_norm": 1.5476836972174197, "learning_rate": 1.8833647063303596e-07, "loss": 0.4952, "step": 12894 }, { "epoch": 0.92, "grad_norm": 1.8523745506733946, "learning_rate": 1.8802416848436255e-07, "loss": 0.5011, "step": 12895 }, { "epoch": 0.92, "grad_norm": 2.143212631942806, "learning_rate": 1.8771212052055844e-07, "loss": 0.5122, "step": 12896 }, { "epoch": 0.92, "grad_norm": 1.6903537676124054, "learning_rate": 1.8740032675810594e-07, "loss": 0.5643, "step": 12897 }, { "epoch": 0.92, "grad_norm": 1.5930889272638418, "learning_rate": 1.8708878721347524e-07, "loss": 0.5283, "step": 12898 }, { "epoch": 0.92, "grad_norm": 2.0722536460301124, "learning_rate": 1.8677750190312426e-07, "loss": 0.5421, "step": 12899 }, { "epoch": 0.92, "grad_norm": 3.7060014090443762, "learning_rate": 1.8646647084349434e-07, "loss": 0.552, "step": 12900 }, { "epoch": 0.92, "grad_norm": 1.6882200546782162, "learning_rate": 1.8615569405101562e-07, "loss": 0.5223, "step": 12901 }, { "epoch": 0.92, "grad_norm": 1.969404544030653, "learning_rate": 1.8584517154210502e-07, "loss": 0.5157, "step": 12902 }, { "epoch": 0.92, "grad_norm": 2.5291444453738237, "learning_rate": 1.8553490333316492e-07, "loss": 0.5181, "step": 12903 }, { "epoch": 0.92, "grad_norm": 0.7739207126553875, "learning_rate": 1.8522488944058502e-07, "loss": 0.4127, "step": 12904 }, { "epoch": 0.92, "grad_norm": 1.6170715476806818, "learning_rate": 1.8491512988074057e-07, "loss": 0.4686, "step": 12905 }, { "epoch": 0.92, "grad_norm": 1.6434258864581324, "learning_rate": 1.846056246699962e-07, "loss": 0.4472, "step": 12906 }, { "epoch": 0.92, "grad_norm": 1.7900514790010746, "learning_rate": 1.8429637382469833e-07, "loss": 0.4399, "step": 12907 }, { "epoch": 0.92, "grad_norm": 1.9938809040544891, "learning_rate": 1.8398737736118388e-07, "loss": 0.5303, "step": 12908 }, { "epoch": 0.92, "grad_norm": 4.719382279773229, "learning_rate": 1.8367863529577479e-07, "loss": 0.501, "step": 12909 }, { "epoch": 0.92, "grad_norm": 1.524916660947038, "learning_rate": 1.8337014764478079e-07, "loss": 0.5738, "step": 12910 }, { "epoch": 0.92, "grad_norm": 1.6391613195997803, "learning_rate": 1.830619144244966e-07, "loss": 0.505, "step": 12911 }, { "epoch": 0.92, "grad_norm": 1.955297617854535, "learning_rate": 1.8275393565120314e-07, "loss": 0.5179, "step": 12912 }, { "epoch": 0.92, "grad_norm": 0.7258159023083258, "learning_rate": 1.824462113411707e-07, "loss": 0.4318, "step": 12913 }, { "epoch": 0.92, "grad_norm": 1.7253783274081733, "learning_rate": 1.8213874151065348e-07, "loss": 0.4779, "step": 12914 }, { "epoch": 0.92, "grad_norm": 2.1495900239906627, "learning_rate": 1.8183152617589294e-07, "loss": 0.5049, "step": 12915 }, { "epoch": 0.92, "grad_norm": 0.7018904727112434, "learning_rate": 1.815245653531167e-07, "loss": 0.4137, "step": 12916 }, { "epoch": 0.92, "grad_norm": 1.593477688342379, "learning_rate": 1.812178590585406e-07, "loss": 0.6069, "step": 12917 }, { "epoch": 0.92, "grad_norm": 1.5142734110805667, "learning_rate": 1.809114073083662e-07, "loss": 0.4079, "step": 12918 }, { "epoch": 0.92, "grad_norm": 8.17440356412819, "learning_rate": 1.8060521011877995e-07, "loss": 0.5545, "step": 12919 }, { "epoch": 0.92, "grad_norm": 1.8616406108413817, "learning_rate": 1.8029926750595672e-07, "loss": 0.5456, "step": 12920 }, { "epoch": 0.92, "grad_norm": 1.5320606552668217, "learning_rate": 1.7999357948605744e-07, "loss": 0.5358, "step": 12921 }, { "epoch": 0.92, "grad_norm": 1.7104067710413295, "learning_rate": 1.7968814607523033e-07, "loss": 0.5268, "step": 12922 }, { "epoch": 0.92, "grad_norm": 1.8634205651369784, "learning_rate": 1.7938296728960803e-07, "loss": 0.5282, "step": 12923 }, { "epoch": 0.92, "grad_norm": 1.7831057782822592, "learning_rate": 1.7907804314531264e-07, "loss": 0.5218, "step": 12924 }, { "epoch": 0.92, "grad_norm": 1.4761489014103117, "learning_rate": 1.7877337365845015e-07, "loss": 0.5288, "step": 12925 }, { "epoch": 0.92, "grad_norm": 1.5177750692583039, "learning_rate": 1.784689588451144e-07, "loss": 0.5133, "step": 12926 }, { "epoch": 0.92, "grad_norm": 0.7070074477479356, "learning_rate": 1.7816479872138582e-07, "loss": 0.4217, "step": 12927 }, { "epoch": 0.92, "grad_norm": 1.859807336449304, "learning_rate": 1.77860893303331e-07, "loss": 0.5878, "step": 12928 }, { "epoch": 0.92, "grad_norm": 1.60485492447602, "learning_rate": 1.7755724260700436e-07, "loss": 0.4781, "step": 12929 }, { "epoch": 0.92, "grad_norm": 1.7057507442705977, "learning_rate": 1.7725384664844414e-07, "loss": 0.5018, "step": 12930 }, { "epoch": 0.92, "grad_norm": 0.7617218157424617, "learning_rate": 1.7695070544367755e-07, "loss": 0.4355, "step": 12931 }, { "epoch": 0.92, "grad_norm": 1.9795185853486468, "learning_rate": 1.766478190087173e-07, "loss": 0.5105, "step": 12932 }, { "epoch": 0.92, "grad_norm": 2.0044915010283266, "learning_rate": 1.7634518735956342e-07, "loss": 0.5381, "step": 12933 }, { "epoch": 0.92, "grad_norm": 2.380680709333662, "learning_rate": 1.760428105122014e-07, "loss": 0.5169, "step": 12934 }, { "epoch": 0.92, "grad_norm": 0.7450506881748511, "learning_rate": 1.7574068848260294e-07, "loss": 0.4219, "step": 12935 }, { "epoch": 0.92, "grad_norm": 1.6672872785091464, "learning_rate": 1.7543882128672973e-07, "loss": 0.5387, "step": 12936 }, { "epoch": 0.92, "grad_norm": 2.72782501872599, "learning_rate": 1.751372089405251e-07, "loss": 0.5131, "step": 12937 }, { "epoch": 0.92, "grad_norm": 2.1108828035044804, "learning_rate": 1.7483585145992132e-07, "loss": 0.5521, "step": 12938 }, { "epoch": 0.92, "grad_norm": 2.0062488267929224, "learning_rate": 1.7453474886083843e-07, "loss": 0.5304, "step": 12939 }, { "epoch": 0.92, "grad_norm": 2.053886032685585, "learning_rate": 1.7423390115918092e-07, "loss": 0.5057, "step": 12940 }, { "epoch": 0.92, "grad_norm": 1.7928015381275821, "learning_rate": 1.7393330837084111e-07, "loss": 0.5631, "step": 12941 }, { "epoch": 0.92, "grad_norm": 0.783353010626, "learning_rate": 1.736329705116957e-07, "loss": 0.4148, "step": 12942 }, { "epoch": 0.92, "grad_norm": 1.6023737831798517, "learning_rate": 1.7333288759761202e-07, "loss": 0.5371, "step": 12943 }, { "epoch": 0.92, "grad_norm": 0.6379437929166991, "learning_rate": 1.7303305964443962e-07, "loss": 0.3711, "step": 12944 }, { "epoch": 0.92, "grad_norm": 2.5684705795978933, "learning_rate": 1.7273348666801693e-07, "loss": 0.5627, "step": 12945 }, { "epoch": 0.92, "grad_norm": 3.0498306854532005, "learning_rate": 1.7243416868416852e-07, "loss": 0.5087, "step": 12946 }, { "epoch": 0.92, "grad_norm": 2.9834239020676803, "learning_rate": 1.7213510570870562e-07, "loss": 0.5206, "step": 12947 }, { "epoch": 0.92, "grad_norm": 1.7965586393635196, "learning_rate": 1.7183629775742562e-07, "loss": 0.5905, "step": 12948 }, { "epoch": 0.92, "grad_norm": 1.6256688813520603, "learning_rate": 1.7153774484611197e-07, "loss": 0.4501, "step": 12949 }, { "epoch": 0.92, "grad_norm": 1.6445550447691175, "learning_rate": 1.7123944699053596e-07, "loss": 0.4924, "step": 12950 }, { "epoch": 0.92, "grad_norm": 2.174917378145778, "learning_rate": 1.7094140420645444e-07, "loss": 0.5538, "step": 12951 }, { "epoch": 0.92, "grad_norm": 1.872499079068012, "learning_rate": 1.7064361650961093e-07, "loss": 0.5719, "step": 12952 }, { "epoch": 0.92, "grad_norm": 1.908903810145752, "learning_rate": 1.7034608391573504e-07, "loss": 0.5265, "step": 12953 }, { "epoch": 0.92, "grad_norm": 1.802029100765159, "learning_rate": 1.7004880644054533e-07, "loss": 0.5364, "step": 12954 }, { "epoch": 0.92, "grad_norm": 1.6602819759523981, "learning_rate": 1.6975178409974314e-07, "loss": 0.5229, "step": 12955 }, { "epoch": 0.92, "grad_norm": 1.7913793450454798, "learning_rate": 1.6945501690901867e-07, "loss": 0.5196, "step": 12956 }, { "epoch": 0.92, "grad_norm": 1.5727910401852632, "learning_rate": 1.6915850488404883e-07, "loss": 0.4425, "step": 12957 }, { "epoch": 0.92, "grad_norm": 0.6914953083213943, "learning_rate": 1.68862248040495e-07, "loss": 0.4182, "step": 12958 }, { "epoch": 0.92, "grad_norm": 1.6919504137537773, "learning_rate": 1.6856624639400854e-07, "loss": 0.5266, "step": 12959 }, { "epoch": 0.92, "grad_norm": 1.8457808651664118, "learning_rate": 1.682704999602236e-07, "loss": 0.5596, "step": 12960 }, { "epoch": 0.92, "grad_norm": 1.5418337502244523, "learning_rate": 1.6797500875476268e-07, "loss": 0.5586, "step": 12961 }, { "epoch": 0.92, "grad_norm": 1.6667032756442683, "learning_rate": 1.67679772793235e-07, "loss": 0.5379, "step": 12962 }, { "epoch": 0.92, "grad_norm": 1.7763069559378224, "learning_rate": 1.6738479209123582e-07, "loss": 0.5005, "step": 12963 }, { "epoch": 0.92, "grad_norm": 1.8017231985404945, "learning_rate": 1.6709006666434768e-07, "loss": 0.5237, "step": 12964 }, { "epoch": 0.92, "grad_norm": 0.688322929159905, "learning_rate": 1.6679559652813759e-07, "loss": 0.4285, "step": 12965 }, { "epoch": 0.92, "grad_norm": 1.6054994581745439, "learning_rate": 1.6650138169816198e-07, "loss": 0.4827, "step": 12966 }, { "epoch": 0.92, "grad_norm": 2.6461681980305376, "learning_rate": 1.6620742218996066e-07, "loss": 0.5176, "step": 12967 }, { "epoch": 0.92, "grad_norm": 1.7298947221846763, "learning_rate": 1.6591371801906287e-07, "loss": 0.492, "step": 12968 }, { "epoch": 0.92, "grad_norm": 1.9482126102008412, "learning_rate": 1.6562026920098228e-07, "loss": 0.4801, "step": 12969 }, { "epoch": 0.92, "grad_norm": 0.6744132220614073, "learning_rate": 1.6532707575122043e-07, "loss": 0.4325, "step": 12970 }, { "epoch": 0.92, "grad_norm": 1.6152341087048243, "learning_rate": 1.650341376852649e-07, "loss": 0.5397, "step": 12971 }, { "epoch": 0.92, "grad_norm": 0.6641516706584203, "learning_rate": 1.6474145501858884e-07, "loss": 0.4264, "step": 12972 }, { "epoch": 0.92, "grad_norm": 1.7693987954550874, "learning_rate": 1.6444902776665385e-07, "loss": 0.5231, "step": 12973 }, { "epoch": 0.92, "grad_norm": 1.8103811375228294, "learning_rate": 1.6415685594490526e-07, "loss": 0.5786, "step": 12974 }, { "epoch": 0.92, "grad_norm": 1.7824358921651722, "learning_rate": 1.6386493956877858e-07, "loss": 0.477, "step": 12975 }, { "epoch": 0.92, "grad_norm": 1.7230891729480233, "learning_rate": 1.63573278653692e-07, "loss": 0.5816, "step": 12976 }, { "epoch": 0.92, "grad_norm": 2.0074024522536655, "learning_rate": 1.6328187321505318e-07, "loss": 0.554, "step": 12977 }, { "epoch": 0.92, "grad_norm": 2.0822021630381795, "learning_rate": 1.6299072326825592e-07, "loss": 0.6178, "step": 12978 }, { "epoch": 0.92, "grad_norm": 1.5767552778138463, "learning_rate": 1.626998288286774e-07, "loss": 0.5159, "step": 12979 }, { "epoch": 0.92, "grad_norm": 2.5488718119615865, "learning_rate": 1.624091899116853e-07, "loss": 0.5089, "step": 12980 }, { "epoch": 0.92, "grad_norm": 1.5226089081896337, "learning_rate": 1.621188065326318e-07, "loss": 0.5701, "step": 12981 }, { "epoch": 0.92, "grad_norm": 1.8386362128201983, "learning_rate": 1.6182867870685626e-07, "loss": 0.5297, "step": 12982 }, { "epoch": 0.92, "grad_norm": 1.5817653038690747, "learning_rate": 1.6153880644968366e-07, "loss": 0.4617, "step": 12983 }, { "epoch": 0.92, "grad_norm": 1.7132260715716927, "learning_rate": 1.6124918977642512e-07, "loss": 0.5314, "step": 12984 }, { "epoch": 0.92, "grad_norm": 1.5375403619073302, "learning_rate": 1.6095982870238168e-07, "loss": 0.4922, "step": 12985 }, { "epoch": 0.92, "grad_norm": 1.5629476524544272, "learning_rate": 1.6067072324283672e-07, "loss": 0.4685, "step": 12986 }, { "epoch": 0.92, "grad_norm": 1.7658817587709719, "learning_rate": 1.603818734130619e-07, "loss": 0.5544, "step": 12987 }, { "epoch": 0.92, "grad_norm": 1.7313781386637763, "learning_rate": 1.6009327922831552e-07, "loss": 0.5026, "step": 12988 }, { "epoch": 0.92, "grad_norm": 1.7609404349592683, "learning_rate": 1.5980494070384212e-07, "loss": 0.5699, "step": 12989 }, { "epoch": 0.92, "grad_norm": 1.7286487279570024, "learning_rate": 1.5951685785487226e-07, "loss": 0.5565, "step": 12990 }, { "epoch": 0.92, "grad_norm": 1.7080096126514979, "learning_rate": 1.5922903069662378e-07, "loss": 0.5682, "step": 12991 }, { "epoch": 0.92, "grad_norm": 2.383334416297703, "learning_rate": 1.589414592443006e-07, "loss": 0.4827, "step": 12992 }, { "epoch": 0.92, "grad_norm": 0.7022119367114747, "learning_rate": 1.5865414351309339e-07, "loss": 0.4336, "step": 12993 }, { "epoch": 0.92, "grad_norm": 1.6622722116674689, "learning_rate": 1.583670835181794e-07, "loss": 0.4744, "step": 12994 }, { "epoch": 0.92, "grad_norm": 2.0537273027663328, "learning_rate": 1.5808027927472103e-07, "loss": 0.5824, "step": 12995 }, { "epoch": 0.92, "grad_norm": 1.4674304776984295, "learning_rate": 1.5779373079786998e-07, "loss": 0.477, "step": 12996 }, { "epoch": 0.92, "grad_norm": 1.8495907972420185, "learning_rate": 1.575074381027608e-07, "loss": 0.5643, "step": 12997 }, { "epoch": 0.92, "grad_norm": 1.7132091901153577, "learning_rate": 1.5722140120451756e-07, "loss": 0.4618, "step": 12998 }, { "epoch": 0.92, "grad_norm": 1.865561935712061, "learning_rate": 1.5693562011824926e-07, "loss": 0.5545, "step": 12999 }, { "epoch": 0.92, "grad_norm": 1.6767901040331972, "learning_rate": 1.5665009485905215e-07, "loss": 0.4746, "step": 13000 }, { "epoch": 0.92, "grad_norm": 1.6671333648425857, "learning_rate": 1.5636482544200915e-07, "loss": 0.475, "step": 13001 }, { "epoch": 0.92, "grad_norm": 1.6393118670633124, "learning_rate": 1.5607981188218768e-07, "loss": 0.5236, "step": 13002 }, { "epoch": 0.92, "grad_norm": 1.9246373500097729, "learning_rate": 1.5579505419464514e-07, "loss": 0.5057, "step": 13003 }, { "epoch": 0.92, "grad_norm": 1.7020835671781742, "learning_rate": 1.5551055239442114e-07, "loss": 0.4826, "step": 13004 }, { "epoch": 0.92, "grad_norm": 1.4714402837846494, "learning_rate": 1.5522630649654537e-07, "loss": 0.4459, "step": 13005 }, { "epoch": 0.92, "grad_norm": 1.6765312525052818, "learning_rate": 1.5494231651603242e-07, "loss": 0.4729, "step": 13006 }, { "epoch": 0.92, "grad_norm": 2.3494689403228066, "learning_rate": 1.546585824678837e-07, "loss": 0.5193, "step": 13007 }, { "epoch": 0.92, "grad_norm": 1.4856063160945567, "learning_rate": 1.5437510436708768e-07, "loss": 0.4676, "step": 13008 }, { "epoch": 0.92, "grad_norm": 2.023093565145505, "learning_rate": 1.5409188222861694e-07, "loss": 0.5906, "step": 13009 }, { "epoch": 0.92, "grad_norm": 2.2131889042579553, "learning_rate": 1.538089160674333e-07, "loss": 0.5314, "step": 13010 }, { "epoch": 0.92, "grad_norm": 0.6343516224210075, "learning_rate": 1.5352620589848376e-07, "loss": 0.3964, "step": 13011 }, { "epoch": 0.92, "grad_norm": 1.7721008873206334, "learning_rate": 1.5324375173670247e-07, "loss": 0.5245, "step": 13012 }, { "epoch": 0.92, "grad_norm": 1.845964985329556, "learning_rate": 1.5296155359700972e-07, "loss": 0.5608, "step": 13013 }, { "epoch": 0.92, "grad_norm": 1.6836798460190794, "learning_rate": 1.526796114943102e-07, "loss": 0.4429, "step": 13014 }, { "epoch": 0.92, "grad_norm": 1.7116603300412874, "learning_rate": 1.5239792544349984e-07, "loss": 0.5798, "step": 13015 }, { "epoch": 0.92, "grad_norm": 1.7006079890203314, "learning_rate": 1.5211649545945672e-07, "loss": 0.5197, "step": 13016 }, { "epoch": 0.92, "grad_norm": 1.7326668589559964, "learning_rate": 1.5183532155704673e-07, "loss": 0.4816, "step": 13017 }, { "epoch": 0.92, "grad_norm": 1.8070746475993482, "learning_rate": 1.5155440375112297e-07, "loss": 0.4856, "step": 13018 }, { "epoch": 0.92, "grad_norm": 2.0171448276874058, "learning_rate": 1.5127374205652523e-07, "loss": 0.5531, "step": 13019 }, { "epoch": 0.92, "grad_norm": 1.9334040531277459, "learning_rate": 1.5099333648807723e-07, "loss": 0.5282, "step": 13020 }, { "epoch": 0.92, "grad_norm": 1.5708952461251648, "learning_rate": 1.5071318706059157e-07, "loss": 0.5379, "step": 13021 }, { "epoch": 0.92, "grad_norm": 1.7870386557785074, "learning_rate": 1.5043329378886805e-07, "loss": 0.4584, "step": 13022 }, { "epoch": 0.92, "grad_norm": 1.8744683596755989, "learning_rate": 1.5015365668769044e-07, "loss": 0.5765, "step": 13023 }, { "epoch": 0.92, "grad_norm": 1.6658297434180285, "learning_rate": 1.498742757718302e-07, "loss": 0.5002, "step": 13024 }, { "epoch": 0.92, "grad_norm": 2.337214101702783, "learning_rate": 1.4959515105604394e-07, "loss": 0.5541, "step": 13025 }, { "epoch": 0.92, "grad_norm": 1.718442721079696, "learning_rate": 1.493162825550787e-07, "loss": 0.519, "step": 13026 }, { "epoch": 0.92, "grad_norm": 1.7212674390774225, "learning_rate": 1.4903767028366322e-07, "loss": 0.5096, "step": 13027 }, { "epoch": 0.92, "grad_norm": 1.6180712900647878, "learning_rate": 1.4875931425651524e-07, "loss": 0.5082, "step": 13028 }, { "epoch": 0.92, "grad_norm": 1.649319677550335, "learning_rate": 1.4848121448833853e-07, "loss": 0.5507, "step": 13029 }, { "epoch": 0.92, "grad_norm": 1.8671984204542902, "learning_rate": 1.4820337099382298e-07, "loss": 0.5955, "step": 13030 }, { "epoch": 0.92, "grad_norm": 1.6008257395326015, "learning_rate": 1.4792578378764633e-07, "loss": 0.4505, "step": 13031 }, { "epoch": 0.92, "grad_norm": 1.8350178688823395, "learning_rate": 1.476484528844696e-07, "loss": 0.5242, "step": 13032 }, { "epoch": 0.92, "grad_norm": 1.733489744176243, "learning_rate": 1.4737137829894498e-07, "loss": 0.5331, "step": 13033 }, { "epoch": 0.92, "grad_norm": 1.6559441700619066, "learning_rate": 1.4709456004570632e-07, "loss": 0.5728, "step": 13034 }, { "epoch": 0.92, "grad_norm": 1.737022618526114, "learning_rate": 1.4681799813937692e-07, "loss": 0.5684, "step": 13035 }, { "epoch": 0.93, "grad_norm": 1.5818804199599092, "learning_rate": 1.4654169259456563e-07, "loss": 0.5209, "step": 13036 }, { "epoch": 0.93, "grad_norm": 1.6304304279246429, "learning_rate": 1.4626564342586802e-07, "loss": 0.4715, "step": 13037 }, { "epoch": 0.93, "grad_norm": 1.7198812345857497, "learning_rate": 1.4598985064786632e-07, "loss": 0.5238, "step": 13038 }, { "epoch": 0.93, "grad_norm": 1.5568735909496947, "learning_rate": 1.4571431427512771e-07, "loss": 0.5053, "step": 13039 }, { "epoch": 0.93, "grad_norm": 2.129023849986105, "learning_rate": 1.4543903432220784e-07, "loss": 0.5162, "step": 13040 }, { "epoch": 0.93, "grad_norm": 1.7686448822993477, "learning_rate": 1.4516401080364728e-07, "loss": 0.5531, "step": 13041 }, { "epoch": 0.93, "grad_norm": 1.6959646897344067, "learning_rate": 1.448892437339744e-07, "loss": 0.518, "step": 13042 }, { "epoch": 0.93, "grad_norm": 1.8945615544536747, "learning_rate": 1.4461473312770368e-07, "loss": 0.5222, "step": 13043 }, { "epoch": 0.93, "grad_norm": 1.584443791967715, "learning_rate": 1.4434047899933357e-07, "loss": 0.484, "step": 13044 }, { "epoch": 0.93, "grad_norm": 1.6515760411101494, "learning_rate": 1.4406648136335412e-07, "loss": 0.4753, "step": 13045 }, { "epoch": 0.93, "grad_norm": 1.5330660454752978, "learning_rate": 1.437927402342365e-07, "loss": 0.479, "step": 13046 }, { "epoch": 0.93, "grad_norm": 1.6520645678223127, "learning_rate": 1.4351925562644143e-07, "loss": 0.5549, "step": 13047 }, { "epoch": 0.93, "grad_norm": 1.6327099860829077, "learning_rate": 1.4324602755441507e-07, "loss": 0.505, "step": 13048 }, { "epoch": 0.93, "grad_norm": 1.6430968811586766, "learning_rate": 1.4297305603259037e-07, "loss": 0.5296, "step": 13049 }, { "epoch": 0.93, "grad_norm": 2.0257047347751134, "learning_rate": 1.4270034107538743e-07, "loss": 0.6322, "step": 13050 }, { "epoch": 0.93, "grad_norm": 2.013108784179735, "learning_rate": 1.424278826972103e-07, "loss": 0.5249, "step": 13051 }, { "epoch": 0.93, "grad_norm": 1.8433861695789293, "learning_rate": 1.4215568091245359e-07, "loss": 0.5822, "step": 13052 }, { "epoch": 0.93, "grad_norm": 1.5594359918863692, "learning_rate": 1.4188373573549297e-07, "loss": 0.4951, "step": 13053 }, { "epoch": 0.93, "grad_norm": 2.0655393455338342, "learning_rate": 1.4161204718069642e-07, "loss": 0.5276, "step": 13054 }, { "epoch": 0.93, "grad_norm": 1.727087456292135, "learning_rate": 1.413406152624125e-07, "loss": 0.5056, "step": 13055 }, { "epoch": 0.93, "grad_norm": 1.9409173488756806, "learning_rate": 1.4106943999498134e-07, "loss": 0.5038, "step": 13056 }, { "epoch": 0.93, "grad_norm": 2.0830462774232683, "learning_rate": 1.4079852139272653e-07, "loss": 0.5438, "step": 13057 }, { "epoch": 0.93, "grad_norm": 1.720752273696097, "learning_rate": 1.405278594699594e-07, "loss": 0.5014, "step": 13058 }, { "epoch": 0.93, "grad_norm": 1.6848732508131365, "learning_rate": 1.4025745424097626e-07, "loss": 0.559, "step": 13059 }, { "epoch": 0.93, "grad_norm": 2.118371998408966, "learning_rate": 1.399873057200618e-07, "loss": 0.4875, "step": 13060 }, { "epoch": 0.93, "grad_norm": 3.489592294769544, "learning_rate": 1.397174139214863e-07, "loss": 0.5375, "step": 13061 }, { "epoch": 0.93, "grad_norm": 1.6580948634680197, "learning_rate": 1.39447778859505e-07, "loss": 0.5761, "step": 13062 }, { "epoch": 0.93, "grad_norm": 1.7335255440823734, "learning_rate": 1.3917840054836262e-07, "loss": 0.5452, "step": 13063 }, { "epoch": 0.93, "grad_norm": 1.7933710022876321, "learning_rate": 1.3890927900228722e-07, "loss": 0.6088, "step": 13064 }, { "epoch": 0.93, "grad_norm": 1.711963779422511, "learning_rate": 1.3864041423549525e-07, "loss": 0.5512, "step": 13065 }, { "epoch": 0.93, "grad_norm": 1.7395271176421412, "learning_rate": 1.3837180626218915e-07, "loss": 0.5794, "step": 13066 }, { "epoch": 0.93, "grad_norm": 1.643545145278159, "learning_rate": 1.381034550965582e-07, "loss": 0.5621, "step": 13067 }, { "epoch": 0.93, "grad_norm": 1.6572539590163193, "learning_rate": 1.3783536075277716e-07, "loss": 0.4477, "step": 13068 }, { "epoch": 0.93, "grad_norm": 1.5479326231006578, "learning_rate": 1.3756752324500745e-07, "loss": 0.4436, "step": 13069 }, { "epoch": 0.93, "grad_norm": 1.5025543120957576, "learning_rate": 1.372999425873972e-07, "loss": 0.5328, "step": 13070 }, { "epoch": 0.93, "grad_norm": 1.6814224765009957, "learning_rate": 1.370326187940807e-07, "loss": 0.5517, "step": 13071 }, { "epoch": 0.93, "grad_norm": 2.817323364321787, "learning_rate": 1.367655518791794e-07, "loss": 0.4577, "step": 13072 }, { "epoch": 0.93, "grad_norm": 1.4750838518576463, "learning_rate": 1.3649874185680146e-07, "loss": 0.4706, "step": 13073 }, { "epoch": 0.93, "grad_norm": 1.540199066560695, "learning_rate": 1.3623218874103839e-07, "loss": 0.5007, "step": 13074 }, { "epoch": 0.93, "grad_norm": 0.6457275121594575, "learning_rate": 1.3596589254597281e-07, "loss": 0.3917, "step": 13075 }, { "epoch": 0.93, "grad_norm": 1.818209867825898, "learning_rate": 1.3569985328567015e-07, "loss": 0.5297, "step": 13076 }, { "epoch": 0.93, "grad_norm": 1.5166010493626751, "learning_rate": 1.354340709741836e-07, "loss": 0.4727, "step": 13077 }, { "epoch": 0.93, "grad_norm": 1.7221167440258964, "learning_rate": 1.351685456255525e-07, "loss": 0.5726, "step": 13078 }, { "epoch": 0.93, "grad_norm": 1.531053084822976, "learning_rate": 1.3490327725380336e-07, "loss": 0.4864, "step": 13079 }, { "epoch": 0.93, "grad_norm": 1.7294173900272511, "learning_rate": 1.3463826587294893e-07, "loss": 0.5096, "step": 13080 }, { "epoch": 0.93, "grad_norm": 1.737765610236196, "learning_rate": 1.343735114969863e-07, "loss": 0.5358, "step": 13081 }, { "epoch": 0.93, "grad_norm": 1.553726035162499, "learning_rate": 1.3410901413990319e-07, "loss": 0.4798, "step": 13082 }, { "epoch": 0.93, "grad_norm": 1.7022280947040254, "learning_rate": 1.3384477381566895e-07, "loss": 0.53, "step": 13083 }, { "epoch": 0.93, "grad_norm": 1.7551485095536432, "learning_rate": 1.3358079053824303e-07, "loss": 0.5022, "step": 13084 }, { "epoch": 0.93, "grad_norm": 1.8535672509979602, "learning_rate": 1.3331706432156811e-07, "loss": 0.5231, "step": 13085 }, { "epoch": 0.93, "grad_norm": 1.5336442029713055, "learning_rate": 1.3305359517957806e-07, "loss": 0.4335, "step": 13086 }, { "epoch": 0.93, "grad_norm": 1.598157428207112, "learning_rate": 1.3279038312618787e-07, "loss": 0.4749, "step": 13087 }, { "epoch": 0.93, "grad_norm": 1.8120364155108648, "learning_rate": 1.325274281753014e-07, "loss": 0.5252, "step": 13088 }, { "epoch": 0.93, "grad_norm": 1.8427128825035377, "learning_rate": 1.3226473034081032e-07, "loss": 0.4892, "step": 13089 }, { "epoch": 0.93, "grad_norm": 1.8975669572540133, "learning_rate": 1.3200228963658969e-07, "loss": 0.561, "step": 13090 }, { "epoch": 0.93, "grad_norm": 6.304275851382534, "learning_rate": 1.3174010607650388e-07, "loss": 0.5146, "step": 13091 }, { "epoch": 0.93, "grad_norm": 1.6853084049805058, "learning_rate": 1.3147817967440135e-07, "loss": 0.5573, "step": 13092 }, { "epoch": 0.93, "grad_norm": 1.6348634174268335, "learning_rate": 1.3121651044411766e-07, "loss": 0.5386, "step": 13093 }, { "epoch": 0.93, "grad_norm": 1.6982856343962642, "learning_rate": 1.3095509839947618e-07, "loss": 0.6194, "step": 13094 }, { "epoch": 0.93, "grad_norm": 1.8169435184716034, "learning_rate": 1.3069394355428422e-07, "loss": 0.4989, "step": 13095 }, { "epoch": 0.93, "grad_norm": 1.6181642891386452, "learning_rate": 1.304330459223385e-07, "loss": 0.5606, "step": 13096 }, { "epoch": 0.93, "grad_norm": 1.8013133583327057, "learning_rate": 1.3017240551741918e-07, "loss": 0.531, "step": 13097 }, { "epoch": 0.93, "grad_norm": 1.8718321757105212, "learning_rate": 1.2991202235329515e-07, "loss": 0.5286, "step": 13098 }, { "epoch": 0.93, "grad_norm": 1.5258082363787606, "learning_rate": 1.2965189644371934e-07, "loss": 0.4587, "step": 13099 }, { "epoch": 0.93, "grad_norm": 2.575738718281483, "learning_rate": 1.2939202780243353e-07, "loss": 0.5333, "step": 13100 }, { "epoch": 0.93, "grad_norm": 1.4574833079588099, "learning_rate": 1.291324164431651e-07, "loss": 0.469, "step": 13101 }, { "epoch": 0.93, "grad_norm": 1.8599377560820922, "learning_rate": 1.2887306237962694e-07, "loss": 0.5451, "step": 13102 }, { "epoch": 0.93, "grad_norm": 1.7914601733164435, "learning_rate": 1.2861396562551974e-07, "loss": 0.5749, "step": 13103 }, { "epoch": 0.93, "grad_norm": 2.0777094060284287, "learning_rate": 1.2835512619452873e-07, "loss": 0.5475, "step": 13104 }, { "epoch": 0.93, "grad_norm": 1.696790443675144, "learning_rate": 1.280965441003279e-07, "loss": 0.4663, "step": 13105 }, { "epoch": 0.93, "grad_norm": 1.6115377858631736, "learning_rate": 1.2783821935657526e-07, "loss": 0.4983, "step": 13106 }, { "epoch": 0.93, "grad_norm": 1.6822834303369707, "learning_rate": 1.2758015197691709e-07, "loss": 0.518, "step": 13107 }, { "epoch": 0.93, "grad_norm": 1.792006926387145, "learning_rate": 1.2732234197498582e-07, "loss": 0.479, "step": 13108 }, { "epoch": 0.93, "grad_norm": 1.5893516342800516, "learning_rate": 1.2706478936439893e-07, "loss": 0.5412, "step": 13109 }, { "epoch": 0.93, "grad_norm": 1.426800813490248, "learning_rate": 1.2680749415876214e-07, "loss": 0.4469, "step": 13110 }, { "epoch": 0.93, "grad_norm": 1.8734684237609165, "learning_rate": 1.2655045637166574e-07, "loss": 0.5302, "step": 13111 }, { "epoch": 0.93, "grad_norm": 0.6738272762311658, "learning_rate": 1.2629367601668774e-07, "loss": 0.4148, "step": 13112 }, { "epoch": 0.93, "grad_norm": 1.7062137750487198, "learning_rate": 1.2603715310739174e-07, "loss": 0.5036, "step": 13113 }, { "epoch": 0.93, "grad_norm": 1.7881646548596797, "learning_rate": 1.2578088765732964e-07, "loss": 0.488, "step": 13114 }, { "epoch": 0.93, "grad_norm": 1.6797741373226827, "learning_rate": 1.2552487968003568e-07, "loss": 0.4819, "step": 13115 }, { "epoch": 0.93, "grad_norm": 1.6894196033551132, "learning_rate": 1.2526912918903512e-07, "loss": 0.5467, "step": 13116 }, { "epoch": 0.93, "grad_norm": 1.9169292136333569, "learning_rate": 1.250136361978377e-07, "loss": 0.5154, "step": 13117 }, { "epoch": 0.93, "grad_norm": 1.61196562998539, "learning_rate": 1.2475840071993817e-07, "loss": 0.472, "step": 13118 }, { "epoch": 0.93, "grad_norm": 2.01850817310306, "learning_rate": 1.2450342276881965e-07, "loss": 0.5408, "step": 13119 }, { "epoch": 0.93, "grad_norm": 1.6473311978392116, "learning_rate": 1.2424870235795027e-07, "loss": 0.5007, "step": 13120 }, { "epoch": 0.93, "grad_norm": 1.534949153447226, "learning_rate": 1.2399423950078704e-07, "loss": 0.5446, "step": 13121 }, { "epoch": 0.93, "grad_norm": 1.6405486443696216, "learning_rate": 1.2374003421076918e-07, "loss": 0.4591, "step": 13122 }, { "epoch": 0.93, "grad_norm": 1.6499520361411846, "learning_rate": 1.23486086501326e-07, "loss": 0.5014, "step": 13123 }, { "epoch": 0.93, "grad_norm": 1.7023345175831042, "learning_rate": 1.2323239638587114e-07, "loss": 0.5725, "step": 13124 }, { "epoch": 0.93, "grad_norm": 0.6342419726149009, "learning_rate": 1.2297896387780616e-07, "loss": 0.3731, "step": 13125 }, { "epoch": 0.93, "grad_norm": 1.7031899165624982, "learning_rate": 1.2272578899051867e-07, "loss": 0.4905, "step": 13126 }, { "epoch": 0.93, "grad_norm": 1.6239537717422563, "learning_rate": 1.2247287173738021e-07, "loss": 0.4779, "step": 13127 }, { "epoch": 0.93, "grad_norm": 1.565949404250858, "learning_rate": 1.2222021213175284e-07, "loss": 0.4494, "step": 13128 }, { "epoch": 0.93, "grad_norm": 1.8263637750978845, "learning_rate": 1.2196781018698146e-07, "loss": 0.5697, "step": 13129 }, { "epoch": 0.93, "grad_norm": 1.4128947497612738, "learning_rate": 1.2171566591639982e-07, "loss": 0.4575, "step": 13130 }, { "epoch": 0.93, "grad_norm": 1.8939840990048236, "learning_rate": 1.214637793333262e-07, "loss": 0.5038, "step": 13131 }, { "epoch": 0.93, "grad_norm": 1.857407273090847, "learning_rate": 1.212121504510666e-07, "loss": 0.5351, "step": 13132 }, { "epoch": 0.93, "grad_norm": 1.5820624596837314, "learning_rate": 1.2096077928291317e-07, "loss": 0.5282, "step": 13133 }, { "epoch": 0.93, "grad_norm": 1.6572627484708098, "learning_rate": 1.2070966584214306e-07, "loss": 0.555, "step": 13134 }, { "epoch": 0.93, "grad_norm": 4.279583270568435, "learning_rate": 1.2045881014202287e-07, "loss": 0.5086, "step": 13135 }, { "epoch": 0.93, "grad_norm": 1.7585715390152703, "learning_rate": 1.2020821219580147e-07, "loss": 0.4939, "step": 13136 }, { "epoch": 0.93, "grad_norm": 1.6013342920723699, "learning_rate": 1.199578720167177e-07, "loss": 0.4879, "step": 13137 }, { "epoch": 0.93, "grad_norm": 1.7368354117302693, "learning_rate": 1.197077896179949e-07, "loss": 0.6018, "step": 13138 }, { "epoch": 0.93, "grad_norm": 1.955171683593444, "learning_rate": 1.1945796501284358e-07, "loss": 0.542, "step": 13139 }, { "epoch": 0.93, "grad_norm": 1.9561572229997517, "learning_rate": 1.1920839821445984e-07, "loss": 0.4853, "step": 13140 }, { "epoch": 0.93, "grad_norm": 1.6026857307758637, "learning_rate": 1.1895908923602706e-07, "loss": 0.5634, "step": 13141 }, { "epoch": 0.93, "grad_norm": 1.5758273573583133, "learning_rate": 1.1871003809071413e-07, "loss": 0.5518, "step": 13142 }, { "epoch": 0.93, "grad_norm": 1.6775533556240612, "learning_rate": 1.184612447916772e-07, "loss": 0.5188, "step": 13143 }, { "epoch": 0.93, "grad_norm": 1.7694038347531653, "learning_rate": 1.1821270935205797e-07, "loss": 0.4856, "step": 13144 }, { "epoch": 0.93, "grad_norm": 1.6027565909070323, "learning_rate": 1.179644317849854e-07, "loss": 0.5485, "step": 13145 }, { "epoch": 0.93, "grad_norm": 1.5285317167668322, "learning_rate": 1.1771641210357399e-07, "loss": 0.4865, "step": 13146 }, { "epoch": 0.93, "grad_norm": 1.488570737291413, "learning_rate": 1.1746865032092548e-07, "loss": 0.5158, "step": 13147 }, { "epoch": 0.93, "grad_norm": 2.332779008516804, "learning_rate": 1.1722114645012717e-07, "loss": 0.5638, "step": 13148 }, { "epoch": 0.93, "grad_norm": 2.523946852915129, "learning_rate": 1.169739005042525e-07, "loss": 0.5311, "step": 13149 }, { "epoch": 0.93, "grad_norm": 3.5829279558453524, "learning_rate": 1.1672691249636269e-07, "loss": 0.5673, "step": 13150 }, { "epoch": 0.93, "grad_norm": 1.9563332832021643, "learning_rate": 1.1648018243950454e-07, "loss": 0.5122, "step": 13151 }, { "epoch": 0.93, "grad_norm": 1.8385638756270517, "learning_rate": 1.1623371034671039e-07, "loss": 0.5454, "step": 13152 }, { "epoch": 0.93, "grad_norm": 1.7432544276344193, "learning_rate": 1.1598749623099926e-07, "loss": 0.5029, "step": 13153 }, { "epoch": 0.93, "grad_norm": 1.7981484131174983, "learning_rate": 1.1574154010537908e-07, "loss": 0.5406, "step": 13154 }, { "epoch": 0.93, "grad_norm": 1.6362569938572553, "learning_rate": 1.1549584198284058e-07, "loss": 0.501, "step": 13155 }, { "epoch": 0.93, "grad_norm": 2.1954736501952956, "learning_rate": 1.1525040187636283e-07, "loss": 0.5487, "step": 13156 }, { "epoch": 0.93, "grad_norm": 1.920972302088347, "learning_rate": 1.1500521979890989e-07, "loss": 0.5725, "step": 13157 }, { "epoch": 0.93, "grad_norm": 2.041531314431294, "learning_rate": 1.1476029576343473e-07, "loss": 0.4694, "step": 13158 }, { "epoch": 0.93, "grad_norm": 1.9773427262837928, "learning_rate": 1.1451562978287368e-07, "loss": 0.4899, "step": 13159 }, { "epoch": 0.93, "grad_norm": 2.0246123715490754, "learning_rate": 1.1427122187015194e-07, "loss": 0.533, "step": 13160 }, { "epoch": 0.93, "grad_norm": 1.8532688588907387, "learning_rate": 1.1402707203817865e-07, "loss": 0.5102, "step": 13161 }, { "epoch": 0.93, "grad_norm": 1.6367604121184889, "learning_rate": 1.1378318029985125e-07, "loss": 0.564, "step": 13162 }, { "epoch": 0.93, "grad_norm": 5.446211565019482, "learning_rate": 1.135395466680539e-07, "loss": 0.4454, "step": 13163 }, { "epoch": 0.93, "grad_norm": 2.217608694537276, "learning_rate": 1.1329617115565461e-07, "loss": 0.5507, "step": 13164 }, { "epoch": 0.93, "grad_norm": 1.8305696431866008, "learning_rate": 1.1305305377551035e-07, "loss": 0.502, "step": 13165 }, { "epoch": 0.93, "grad_norm": 1.7683184842654862, "learning_rate": 1.1281019454046249e-07, "loss": 0.5331, "step": 13166 }, { "epoch": 0.93, "grad_norm": 1.8013851487819206, "learning_rate": 1.1256759346334078e-07, "loss": 0.5089, "step": 13167 }, { "epoch": 0.93, "grad_norm": 1.7448758768439352, "learning_rate": 1.1232525055695942e-07, "loss": 0.5539, "step": 13168 }, { "epoch": 0.93, "grad_norm": 2.7688480595484135, "learning_rate": 1.1208316583411983e-07, "loss": 0.4582, "step": 13169 }, { "epoch": 0.93, "grad_norm": 2.0185065368087485, "learning_rate": 1.1184133930761066e-07, "loss": 0.5218, "step": 13170 }, { "epoch": 0.93, "grad_norm": 3.8818501127252585, "learning_rate": 1.1159977099020447e-07, "loss": 0.445, "step": 13171 }, { "epoch": 0.93, "grad_norm": 1.851163219283377, "learning_rate": 1.1135846089466273e-07, "loss": 0.6096, "step": 13172 }, { "epoch": 0.93, "grad_norm": 0.7119038612668248, "learning_rate": 1.1111740903373247e-07, "loss": 0.4026, "step": 13173 }, { "epoch": 0.93, "grad_norm": 2.026616984199528, "learning_rate": 1.1087661542014572e-07, "loss": 0.5242, "step": 13174 }, { "epoch": 0.93, "grad_norm": 1.6312344066019504, "learning_rate": 1.1063608006662396e-07, "loss": 0.506, "step": 13175 }, { "epoch": 0.93, "grad_norm": 1.6071782358727515, "learning_rate": 1.1039580298587038e-07, "loss": 0.5356, "step": 13176 }, { "epoch": 0.94, "grad_norm": 2.3896970562175013, "learning_rate": 1.1015578419057981e-07, "loss": 0.5647, "step": 13177 }, { "epoch": 0.94, "grad_norm": 1.7586881291761984, "learning_rate": 1.0991602369342935e-07, "loss": 0.489, "step": 13178 }, { "epoch": 0.94, "grad_norm": 1.552357694599565, "learning_rate": 1.096765215070844e-07, "loss": 0.5342, "step": 13179 }, { "epoch": 0.94, "grad_norm": 1.4736974076713947, "learning_rate": 1.0943727764419654e-07, "loss": 0.4739, "step": 13180 }, { "epoch": 0.94, "grad_norm": 1.5245124469287474, "learning_rate": 1.0919829211740285e-07, "loss": 0.4836, "step": 13181 }, { "epoch": 0.94, "grad_norm": 1.650646123254438, "learning_rate": 1.0895956493932825e-07, "loss": 0.5209, "step": 13182 }, { "epoch": 0.94, "grad_norm": 1.825913493686763, "learning_rate": 1.0872109612258152e-07, "loss": 0.5655, "step": 13183 }, { "epoch": 0.94, "grad_norm": 1.6885782997955416, "learning_rate": 1.0848288567976095e-07, "loss": 0.5073, "step": 13184 }, { "epoch": 0.94, "grad_norm": 1.6179252823491033, "learning_rate": 1.0824493362344924e-07, "loss": 0.4825, "step": 13185 }, { "epoch": 0.94, "grad_norm": 1.7054783136312042, "learning_rate": 1.0800723996621577e-07, "loss": 0.56, "step": 13186 }, { "epoch": 0.94, "grad_norm": 1.6334818530026622, "learning_rate": 1.0776980472061549e-07, "loss": 0.536, "step": 13187 }, { "epoch": 0.94, "grad_norm": 1.6222781495632796, "learning_rate": 1.0753262789919228e-07, "loss": 0.5189, "step": 13188 }, { "epoch": 0.94, "grad_norm": 1.681180530064635, "learning_rate": 1.0729570951447276e-07, "loss": 0.4409, "step": 13189 }, { "epoch": 0.94, "grad_norm": 1.672149976906714, "learning_rate": 1.0705904957897306e-07, "loss": 0.4817, "step": 13190 }, { "epoch": 0.94, "grad_norm": 1.8774081396217348, "learning_rate": 1.0682264810519372e-07, "loss": 0.4976, "step": 13191 }, { "epoch": 0.94, "grad_norm": 2.4376795941643055, "learning_rate": 1.0658650510562251e-07, "loss": 0.5875, "step": 13192 }, { "epoch": 0.94, "grad_norm": 2.045113512317086, "learning_rate": 1.0635062059273338e-07, "loss": 0.5676, "step": 13193 }, { "epoch": 0.94, "grad_norm": 1.6933141906350917, "learning_rate": 1.0611499457898577e-07, "loss": 0.471, "step": 13194 }, { "epoch": 0.94, "grad_norm": 1.6882715042453085, "learning_rate": 1.0587962707682753e-07, "loss": 0.4686, "step": 13195 }, { "epoch": 0.94, "grad_norm": 3.0869017847599993, "learning_rate": 1.0564451809869092e-07, "loss": 0.5442, "step": 13196 }, { "epoch": 0.94, "grad_norm": 1.685210001726867, "learning_rate": 1.054096676569949e-07, "loss": 0.5483, "step": 13197 }, { "epoch": 0.94, "grad_norm": 0.7211416235464321, "learning_rate": 1.0517507576414565e-07, "loss": 0.3983, "step": 13198 }, { "epoch": 0.94, "grad_norm": 1.5315092226876486, "learning_rate": 1.0494074243253439e-07, "loss": 0.5165, "step": 13199 }, { "epoch": 0.94, "grad_norm": 1.5225681055774467, "learning_rate": 1.0470666767454063e-07, "loss": 0.4643, "step": 13200 }, { "epoch": 0.94, "grad_norm": 1.825181770706918, "learning_rate": 1.0447285150252784e-07, "loss": 0.595, "step": 13201 }, { "epoch": 0.94, "grad_norm": 1.6141646241871468, "learning_rate": 1.0423929392884724e-07, "loss": 0.5808, "step": 13202 }, { "epoch": 0.94, "grad_norm": 1.7522258146657708, "learning_rate": 1.0400599496583619e-07, "loss": 0.4871, "step": 13203 }, { "epoch": 0.94, "grad_norm": 2.5998027009521794, "learning_rate": 1.0377295462581816e-07, "loss": 0.5454, "step": 13204 }, { "epoch": 0.94, "grad_norm": 1.674251663777696, "learning_rate": 1.0354017292110385e-07, "loss": 0.5317, "step": 13205 }, { "epoch": 0.94, "grad_norm": 1.695856066472541, "learning_rate": 1.0330764986398844e-07, "loss": 0.5764, "step": 13206 }, { "epoch": 0.94, "grad_norm": 1.7799558121192665, "learning_rate": 1.0307538546675599e-07, "loss": 0.5404, "step": 13207 }, { "epoch": 0.94, "grad_norm": 2.7856162461635967, "learning_rate": 1.0284337974167446e-07, "loss": 0.4375, "step": 13208 }, { "epoch": 0.94, "grad_norm": 1.873481232213926, "learning_rate": 1.0261163270099905e-07, "loss": 0.4645, "step": 13209 }, { "epoch": 0.94, "grad_norm": 0.6883294051777229, "learning_rate": 1.0238014435697219e-07, "loss": 0.432, "step": 13210 }, { "epoch": 0.94, "grad_norm": 0.6905730275908555, "learning_rate": 1.0214891472182131e-07, "loss": 0.3976, "step": 13211 }, { "epoch": 0.94, "grad_norm": 1.689132259113071, "learning_rate": 1.0191794380776166e-07, "loss": 0.5878, "step": 13212 }, { "epoch": 0.94, "grad_norm": 0.7159264484856204, "learning_rate": 1.016872316269918e-07, "loss": 0.4227, "step": 13213 }, { "epoch": 0.94, "grad_norm": 1.4854025721230046, "learning_rate": 1.014567781917014e-07, "loss": 0.558, "step": 13214 }, { "epoch": 0.94, "grad_norm": 1.5644879375662322, "learning_rate": 1.012265835140619e-07, "loss": 0.4481, "step": 13215 }, { "epoch": 0.94, "grad_norm": 1.8360012626429543, "learning_rate": 1.0099664760623407e-07, "loss": 0.5144, "step": 13216 }, { "epoch": 0.94, "grad_norm": 2.1959262740740444, "learning_rate": 1.0076697048036266e-07, "loss": 0.5215, "step": 13217 }, { "epoch": 0.94, "grad_norm": 1.9547117499634035, "learning_rate": 1.0053755214858129e-07, "loss": 0.5408, "step": 13218 }, { "epoch": 0.94, "grad_norm": 2.156349269935813, "learning_rate": 1.0030839262300807e-07, "loss": 0.5218, "step": 13219 }, { "epoch": 0.94, "grad_norm": 1.882461647230272, "learning_rate": 1.0007949191574717e-07, "loss": 0.5249, "step": 13220 }, { "epoch": 0.94, "grad_norm": 1.698588282893965, "learning_rate": 9.985085003889173e-08, "loss": 0.5448, "step": 13221 }, { "epoch": 0.94, "grad_norm": 0.6635177838975698, "learning_rate": 9.962246700451761e-08, "loss": 0.392, "step": 13222 }, { "epoch": 0.94, "grad_norm": 1.7308755506393345, "learning_rate": 9.939434282469018e-08, "loss": 0.5048, "step": 13223 }, { "epoch": 0.94, "grad_norm": 1.6001245004397182, "learning_rate": 9.916647751145869e-08, "loss": 0.528, "step": 13224 }, { "epoch": 0.94, "grad_norm": 2.6753705312184866, "learning_rate": 9.893887107686017e-08, "loss": 0.539, "step": 13225 }, { "epoch": 0.94, "grad_norm": 1.6515335350470768, "learning_rate": 9.871152353291724e-08, "loss": 0.5072, "step": 13226 }, { "epoch": 0.94, "grad_norm": 2.2142488468767705, "learning_rate": 9.848443489163972e-08, "loss": 0.4658, "step": 13227 }, { "epoch": 0.94, "grad_norm": 1.5323670454336231, "learning_rate": 9.825760516502302e-08, "loss": 0.4948, "step": 13228 }, { "epoch": 0.94, "grad_norm": 1.6623706467915622, "learning_rate": 9.803103436504923e-08, "loss": 0.5911, "step": 13229 }, { "epoch": 0.94, "grad_norm": 1.6423071178462147, "learning_rate": 9.780472250368656e-08, "loss": 0.5101, "step": 13230 }, { "epoch": 0.94, "grad_norm": 1.8516455389639768, "learning_rate": 9.757866959288875e-08, "loss": 0.5603, "step": 13231 }, { "epoch": 0.94, "grad_norm": 1.8004678887841719, "learning_rate": 9.735287564459739e-08, "loss": 0.5066, "step": 13232 }, { "epoch": 0.94, "grad_norm": 1.7875690978134984, "learning_rate": 9.712734067073959e-08, "loss": 0.5498, "step": 13233 }, { "epoch": 0.94, "grad_norm": 1.522951121094993, "learning_rate": 9.69020646832286e-08, "loss": 0.4695, "step": 13234 }, { "epoch": 0.94, "grad_norm": 1.5479729106162157, "learning_rate": 9.667704769396546e-08, "loss": 0.3966, "step": 13235 }, { "epoch": 0.94, "grad_norm": 2.1753118609488222, "learning_rate": 9.6452289714834e-08, "loss": 0.4988, "step": 13236 }, { "epoch": 0.94, "grad_norm": 1.7439013878297207, "learning_rate": 9.622779075770917e-08, "loss": 0.5231, "step": 13237 }, { "epoch": 0.94, "grad_norm": 1.9007875054003631, "learning_rate": 9.600355083444813e-08, "loss": 0.503, "step": 13238 }, { "epoch": 0.94, "grad_norm": 2.231269560907182, "learning_rate": 9.577956995689697e-08, "loss": 0.52, "step": 13239 }, { "epoch": 0.94, "grad_norm": 1.9305540414562146, "learning_rate": 9.555584813688623e-08, "loss": 0.4918, "step": 13240 }, { "epoch": 0.94, "grad_norm": 0.6860089648071429, "learning_rate": 9.533238538623424e-08, "loss": 0.4148, "step": 13241 }, { "epoch": 0.94, "grad_norm": 2.0493181056811838, "learning_rate": 9.510918171674544e-08, "loss": 0.5698, "step": 13242 }, { "epoch": 0.94, "grad_norm": 1.8118398591915739, "learning_rate": 9.488623714020872e-08, "loss": 0.5558, "step": 13243 }, { "epoch": 0.94, "grad_norm": 2.0265985300652667, "learning_rate": 9.466355166840302e-08, "loss": 0.4574, "step": 13244 }, { "epoch": 0.94, "grad_norm": 1.9180999210340808, "learning_rate": 9.444112531308892e-08, "loss": 0.4909, "step": 13245 }, { "epoch": 0.94, "grad_norm": 1.9008720103183583, "learning_rate": 9.421895808601811e-08, "loss": 0.5194, "step": 13246 }, { "epoch": 0.94, "grad_norm": 2.126353036520229, "learning_rate": 9.399704999892345e-08, "loss": 0.572, "step": 13247 }, { "epoch": 0.94, "grad_norm": 2.3202856101685114, "learning_rate": 9.377540106352945e-08, "loss": 0.5352, "step": 13248 }, { "epoch": 0.94, "grad_norm": 1.6303168472489464, "learning_rate": 9.35540112915434e-08, "loss": 0.4783, "step": 13249 }, { "epoch": 0.94, "grad_norm": 1.8721498355682304, "learning_rate": 9.333288069465928e-08, "loss": 0.5247, "step": 13250 }, { "epoch": 0.94, "grad_norm": 1.591092591852355, "learning_rate": 9.311200928455832e-08, "loss": 0.5354, "step": 13251 }, { "epoch": 0.94, "grad_norm": 2.155419284456468, "learning_rate": 9.289139707290839e-08, "loss": 0.5709, "step": 13252 }, { "epoch": 0.94, "grad_norm": 1.7047973816410467, "learning_rate": 9.26710440713624e-08, "loss": 0.4983, "step": 13253 }, { "epoch": 0.94, "grad_norm": 1.5643003050366895, "learning_rate": 9.245095029155993e-08, "loss": 0.4884, "step": 13254 }, { "epoch": 0.94, "grad_norm": 1.8580252611109072, "learning_rate": 9.223111574512721e-08, "loss": 0.5542, "step": 13255 }, { "epoch": 0.94, "grad_norm": 1.7797127195306517, "learning_rate": 9.201154044367667e-08, "loss": 0.5802, "step": 13256 }, { "epoch": 0.94, "grad_norm": 0.7324805747955572, "learning_rate": 9.17922243988073e-08, "loss": 0.4248, "step": 13257 }, { "epoch": 0.94, "grad_norm": 1.6851642828882265, "learning_rate": 9.157316762210378e-08, "loss": 0.5124, "step": 13258 }, { "epoch": 0.94, "grad_norm": 1.6566246197704426, "learning_rate": 9.135437012513737e-08, "loss": 0.4761, "step": 13259 }, { "epoch": 0.94, "grad_norm": 1.514201655924791, "learning_rate": 9.113583191946662e-08, "loss": 0.5236, "step": 13260 }, { "epoch": 0.94, "grad_norm": 1.634956392473887, "learning_rate": 9.091755301663452e-08, "loss": 0.4814, "step": 13261 }, { "epoch": 0.94, "grad_norm": 1.8323985454617717, "learning_rate": 9.069953342817129e-08, "loss": 0.5451, "step": 13262 }, { "epoch": 0.94, "grad_norm": 1.9242919702295125, "learning_rate": 9.048177316559326e-08, "loss": 0.5202, "step": 13263 }, { "epoch": 0.94, "grad_norm": 1.8898142409798948, "learning_rate": 9.026427224040402e-08, "loss": 0.5051, "step": 13264 }, { "epoch": 0.94, "grad_norm": 1.9425505713913152, "learning_rate": 9.004703066409326e-08, "loss": 0.5424, "step": 13265 }, { "epoch": 0.94, "grad_norm": 1.8531722529918413, "learning_rate": 8.983004844813404e-08, "loss": 0.6175, "step": 13266 }, { "epoch": 0.94, "grad_norm": 1.5267709103770997, "learning_rate": 8.96133256039905e-08, "loss": 0.516, "step": 13267 }, { "epoch": 0.94, "grad_norm": 1.5733922955172732, "learning_rate": 8.939686214310961e-08, "loss": 0.54, "step": 13268 }, { "epoch": 0.94, "grad_norm": 1.5930758632501985, "learning_rate": 8.91806580769261e-08, "loss": 0.5711, "step": 13269 }, { "epoch": 0.94, "grad_norm": 1.660508156651888, "learning_rate": 8.896471341685975e-08, "loss": 0.5352, "step": 13270 }, { "epoch": 0.94, "grad_norm": 2.149956409546665, "learning_rate": 8.874902817431863e-08, "loss": 0.5016, "step": 13271 }, { "epoch": 0.94, "grad_norm": 1.7553119807356274, "learning_rate": 8.85336023606953e-08, "loss": 0.5235, "step": 13272 }, { "epoch": 0.94, "grad_norm": 0.7267233401169656, "learning_rate": 8.8318435987369e-08, "loss": 0.443, "step": 13273 }, { "epoch": 0.94, "grad_norm": 0.61640499114548, "learning_rate": 8.810352906570673e-08, "loss": 0.4089, "step": 13274 }, { "epoch": 0.94, "grad_norm": 1.7988610144250905, "learning_rate": 8.788888160705888e-08, "loss": 0.4907, "step": 13275 }, { "epoch": 0.94, "grad_norm": 1.589982635070214, "learning_rate": 8.767449362276526e-08, "loss": 0.5411, "step": 13276 }, { "epoch": 0.94, "grad_norm": 1.7601502470007693, "learning_rate": 8.746036512415013e-08, "loss": 0.5573, "step": 13277 }, { "epoch": 0.94, "grad_norm": 2.3563650243590137, "learning_rate": 8.724649612252445e-08, "loss": 0.5222, "step": 13278 }, { "epoch": 0.94, "grad_norm": 1.8995182289689183, "learning_rate": 8.70328866291864e-08, "loss": 0.563, "step": 13279 }, { "epoch": 0.94, "grad_norm": 1.8196757895037838, "learning_rate": 8.681953665541754e-08, "loss": 0.5509, "step": 13280 }, { "epoch": 0.94, "grad_norm": 1.8578971754819096, "learning_rate": 8.660644621248938e-08, "loss": 0.5021, "step": 13281 }, { "epoch": 0.94, "grad_norm": 2.4547226703414844, "learning_rate": 8.639361531165735e-08, "loss": 0.4671, "step": 13282 }, { "epoch": 0.94, "grad_norm": 3.8213172429616193, "learning_rate": 8.618104396416416e-08, "loss": 0.4731, "step": 13283 }, { "epoch": 0.94, "grad_norm": 1.7966337444902523, "learning_rate": 8.596873218123858e-08, "loss": 0.5094, "step": 13284 }, { "epoch": 0.94, "grad_norm": 1.628631105505193, "learning_rate": 8.575667997409553e-08, "loss": 0.5169, "step": 13285 }, { "epoch": 0.94, "grad_norm": 1.8026571227831583, "learning_rate": 8.554488735393662e-08, "loss": 0.5227, "step": 13286 }, { "epoch": 0.94, "grad_norm": 1.7220391125107832, "learning_rate": 8.5333354331949e-08, "loss": 0.4843, "step": 13287 }, { "epoch": 0.94, "grad_norm": 0.7306319138885214, "learning_rate": 8.512208091930706e-08, "loss": 0.3905, "step": 13288 }, { "epoch": 0.94, "grad_norm": 1.5796255258153578, "learning_rate": 8.491106712716968e-08, "loss": 0.5234, "step": 13289 }, { "epoch": 0.94, "grad_norm": 1.511806742799927, "learning_rate": 8.470031296668569e-08, "loss": 0.4512, "step": 13290 }, { "epoch": 0.94, "grad_norm": 2.1707546612581257, "learning_rate": 8.448981844898563e-08, "loss": 0.4832, "step": 13291 }, { "epoch": 0.94, "grad_norm": 2.17274749425256, "learning_rate": 8.42795835851895e-08, "loss": 0.5702, "step": 13292 }, { "epoch": 0.94, "grad_norm": 1.644371463017494, "learning_rate": 8.406960838640233e-08, "loss": 0.5507, "step": 13293 }, { "epoch": 0.94, "grad_norm": 1.6578637416713278, "learning_rate": 8.385989286371632e-08, "loss": 0.5613, "step": 13294 }, { "epoch": 0.94, "grad_norm": 1.8034548225494922, "learning_rate": 8.365043702820875e-08, "loss": 0.6174, "step": 13295 }, { "epoch": 0.94, "grad_norm": 1.5929181072933678, "learning_rate": 8.344124089094352e-08, "loss": 0.5135, "step": 13296 }, { "epoch": 0.94, "grad_norm": 1.5657749806087617, "learning_rate": 8.323230446297237e-08, "loss": 0.5416, "step": 13297 }, { "epoch": 0.94, "grad_norm": 3.0966734878638795, "learning_rate": 8.302362775533091e-08, "loss": 0.5028, "step": 13298 }, { "epoch": 0.94, "grad_norm": 1.527821019626074, "learning_rate": 8.281521077904198e-08, "loss": 0.4321, "step": 13299 }, { "epoch": 0.94, "grad_norm": 1.721230922689349, "learning_rate": 8.260705354511566e-08, "loss": 0.5929, "step": 13300 }, { "epoch": 0.94, "grad_norm": 1.7585005840800618, "learning_rate": 8.239915606454707e-08, "loss": 0.497, "step": 13301 }, { "epoch": 0.94, "grad_norm": 1.629287753706686, "learning_rate": 8.219151834831851e-08, "loss": 0.5084, "step": 13302 }, { "epoch": 0.94, "grad_norm": 1.5541421447740387, "learning_rate": 8.198414040739732e-08, "loss": 0.5265, "step": 13303 }, { "epoch": 0.94, "grad_norm": 1.6267837962100555, "learning_rate": 8.177702225273865e-08, "loss": 0.5118, "step": 13304 }, { "epoch": 0.94, "grad_norm": 2.268169319512834, "learning_rate": 8.157016389528261e-08, "loss": 0.6038, "step": 13305 }, { "epoch": 0.94, "grad_norm": 2.1061410647800214, "learning_rate": 8.136356534595602e-08, "loss": 0.5375, "step": 13306 }, { "epoch": 0.94, "grad_norm": 1.7152081647130295, "learning_rate": 8.115722661567348e-08, "loss": 0.5579, "step": 13307 }, { "epoch": 0.94, "grad_norm": 1.8035117071798605, "learning_rate": 8.095114771533297e-08, "loss": 0.4863, "step": 13308 }, { "epoch": 0.94, "grad_norm": 1.663476190277438, "learning_rate": 8.074532865582129e-08, "loss": 0.5144, "step": 13309 }, { "epoch": 0.94, "grad_norm": 1.6734809985375327, "learning_rate": 8.053976944800978e-08, "loss": 0.5084, "step": 13310 }, { "epoch": 0.94, "grad_norm": 1.5999668721413445, "learning_rate": 8.033447010275696e-08, "loss": 0.4809, "step": 13311 }, { "epoch": 0.94, "grad_norm": 1.5818141862270003, "learning_rate": 8.01294306309075e-08, "loss": 0.5079, "step": 13312 }, { "epoch": 0.94, "grad_norm": 4.794585032305057, "learning_rate": 7.992465104329273e-08, "loss": 0.4826, "step": 13313 }, { "epoch": 0.94, "grad_norm": 0.7124379836122415, "learning_rate": 7.972013135072897e-08, "loss": 0.4385, "step": 13314 }, { "epoch": 0.94, "grad_norm": 1.8364607206223444, "learning_rate": 7.95158715640193e-08, "loss": 0.4907, "step": 13315 }, { "epoch": 0.94, "grad_norm": 2.1914095169768424, "learning_rate": 7.93118716939556e-08, "loss": 0.5612, "step": 13316 }, { "epoch": 0.95, "grad_norm": 1.8631439012568103, "learning_rate": 7.910813175131205e-08, "loss": 0.517, "step": 13317 }, { "epoch": 0.95, "grad_norm": 2.0793786942444163, "learning_rate": 7.890465174685114e-08, "loss": 0.5409, "step": 13318 }, { "epoch": 0.95, "grad_norm": 1.81298250185738, "learning_rate": 7.870143169132093e-08, "loss": 0.5103, "step": 13319 }, { "epoch": 0.95, "grad_norm": 1.4850967765852918, "learning_rate": 7.849847159545731e-08, "loss": 0.4893, "step": 13320 }, { "epoch": 0.95, "grad_norm": 1.5450078992982785, "learning_rate": 7.829577146998058e-08, "loss": 0.5476, "step": 13321 }, { "epoch": 0.95, "grad_norm": 1.9048665646178986, "learning_rate": 7.809333132559827e-08, "loss": 0.5156, "step": 13322 }, { "epoch": 0.95, "grad_norm": 1.769447589377848, "learning_rate": 7.78911511730035e-08, "loss": 0.4928, "step": 13323 }, { "epoch": 0.95, "grad_norm": 1.9334912059502467, "learning_rate": 7.768923102287717e-08, "loss": 0.4762, "step": 13324 }, { "epoch": 0.95, "grad_norm": 0.7260619027981845, "learning_rate": 7.748757088588466e-08, "loss": 0.4014, "step": 13325 }, { "epoch": 0.95, "grad_norm": 1.673369725576811, "learning_rate": 7.728617077267742e-08, "loss": 0.5468, "step": 13326 }, { "epoch": 0.95, "grad_norm": 1.781258213419989, "learning_rate": 7.708503069389639e-08, "loss": 0.5403, "step": 13327 }, { "epoch": 0.95, "grad_norm": 1.748714564549957, "learning_rate": 7.68841506601642e-08, "loss": 0.4827, "step": 13328 }, { "epoch": 0.95, "grad_norm": 2.059987221184396, "learning_rate": 7.668353068209344e-08, "loss": 0.4804, "step": 13329 }, { "epoch": 0.95, "grad_norm": 1.7375412250997837, "learning_rate": 7.648317077028066e-08, "loss": 0.5425, "step": 13330 }, { "epoch": 0.95, "grad_norm": 1.6834722007441973, "learning_rate": 7.628307093531018e-08, "loss": 0.5485, "step": 13331 }, { "epoch": 0.95, "grad_norm": 1.700299148201846, "learning_rate": 7.608323118775241e-08, "loss": 0.4972, "step": 13332 }, { "epoch": 0.95, "grad_norm": 2.278503819402102, "learning_rate": 7.588365153816224e-08, "loss": 0.5319, "step": 13333 }, { "epoch": 0.95, "grad_norm": 1.677861048642186, "learning_rate": 7.568433199708236e-08, "loss": 0.5585, "step": 13334 }, { "epoch": 0.95, "grad_norm": 1.6517932075943318, "learning_rate": 7.548527257504267e-08, "loss": 0.5152, "step": 13335 }, { "epoch": 0.95, "grad_norm": 1.7805255814493153, "learning_rate": 7.528647328255701e-08, "loss": 0.5705, "step": 13336 }, { "epoch": 0.95, "grad_norm": 1.7030129469383202, "learning_rate": 7.508793413012749e-08, "loss": 0.5036, "step": 13337 }, { "epoch": 0.95, "grad_norm": 1.7415983330134428, "learning_rate": 7.48896551282402e-08, "loss": 0.5223, "step": 13338 }, { "epoch": 0.95, "grad_norm": 1.7230438882791808, "learning_rate": 7.469163628737064e-08, "loss": 0.5659, "step": 13339 }, { "epoch": 0.95, "grad_norm": 1.5236686472499619, "learning_rate": 7.449387761797766e-08, "loss": 0.4994, "step": 13340 }, { "epoch": 0.95, "grad_norm": 1.6632839415721188, "learning_rate": 7.429637913050847e-08, "loss": 0.5259, "step": 13341 }, { "epoch": 0.95, "grad_norm": 1.9709579829148869, "learning_rate": 7.409914083539471e-08, "loss": 0.578, "step": 13342 }, { "epoch": 0.95, "grad_norm": 1.6354446564805123, "learning_rate": 7.390216274305584e-08, "loss": 0.5655, "step": 13343 }, { "epoch": 0.95, "grad_norm": 1.879110780828671, "learning_rate": 7.370544486389686e-08, "loss": 0.5933, "step": 13344 }, { "epoch": 0.95, "grad_norm": 1.8048491971300358, "learning_rate": 7.350898720830779e-08, "loss": 0.5085, "step": 13345 }, { "epoch": 0.95, "grad_norm": 1.492622374601976, "learning_rate": 7.331278978666812e-08, "loss": 0.4563, "step": 13346 }, { "epoch": 0.95, "grad_norm": 1.6756374909703124, "learning_rate": 7.311685260934065e-08, "loss": 0.4758, "step": 13347 }, { "epoch": 0.95, "grad_norm": 1.6452728306211448, "learning_rate": 7.2921175686676e-08, "loss": 0.535, "step": 13348 }, { "epoch": 0.95, "grad_norm": 0.7059315716079407, "learning_rate": 7.272575902900925e-08, "loss": 0.4298, "step": 13349 }, { "epoch": 0.95, "grad_norm": 1.6180626788816594, "learning_rate": 7.253060264666434e-08, "loss": 0.4945, "step": 13350 }, { "epoch": 0.95, "grad_norm": 0.6678214505068178, "learning_rate": 7.233570654994915e-08, "loss": 0.4226, "step": 13351 }, { "epoch": 0.95, "grad_norm": 1.7883200628945006, "learning_rate": 7.214107074915932e-08, "loss": 0.5283, "step": 13352 }, { "epoch": 0.95, "grad_norm": 1.6163726106285747, "learning_rate": 7.194669525457609e-08, "loss": 0.4689, "step": 13353 }, { "epoch": 0.95, "grad_norm": 1.6622326364157907, "learning_rate": 7.175258007646679e-08, "loss": 0.4785, "step": 13354 }, { "epoch": 0.95, "grad_norm": 0.6653429803520305, "learning_rate": 7.155872522508545e-08, "loss": 0.4165, "step": 13355 }, { "epoch": 0.95, "grad_norm": 1.914487779258154, "learning_rate": 7.136513071067164e-08, "loss": 0.5314, "step": 13356 }, { "epoch": 0.95, "grad_norm": 1.5303492511056946, "learning_rate": 7.117179654345329e-08, "loss": 0.4964, "step": 13357 }, { "epoch": 0.95, "grad_norm": 3.120055217182968, "learning_rate": 7.09787227336406e-08, "loss": 0.5872, "step": 13358 }, { "epoch": 0.95, "grad_norm": 1.8761769429594013, "learning_rate": 7.078590929143426e-08, "loss": 0.5529, "step": 13359 }, { "epoch": 0.95, "grad_norm": 1.9294302636094869, "learning_rate": 7.059335622701835e-08, "loss": 0.4854, "step": 13360 }, { "epoch": 0.95, "grad_norm": 1.6161278765833476, "learning_rate": 7.040106355056476e-08, "loss": 0.4661, "step": 13361 }, { "epoch": 0.95, "grad_norm": 1.67341798805015, "learning_rate": 7.02090312722309e-08, "loss": 0.5799, "step": 13362 }, { "epoch": 0.95, "grad_norm": 1.875048625533406, "learning_rate": 7.001725940215975e-08, "loss": 0.6032, "step": 13363 }, { "epoch": 0.95, "grad_norm": 2.0831020079408535, "learning_rate": 6.982574795048214e-08, "loss": 0.5019, "step": 13364 }, { "epoch": 0.95, "grad_norm": 1.553063035689426, "learning_rate": 6.963449692731439e-08, "loss": 0.537, "step": 13365 }, { "epoch": 0.95, "grad_norm": 1.4975976741123553, "learning_rate": 6.944350634275898e-08, "loss": 0.5005, "step": 13366 }, { "epoch": 0.95, "grad_norm": 2.3666998139686903, "learning_rate": 6.925277620690507e-08, "loss": 0.49, "step": 13367 }, { "epoch": 0.95, "grad_norm": 2.5694537282616436, "learning_rate": 6.906230652982627e-08, "loss": 0.5114, "step": 13368 }, { "epoch": 0.95, "grad_norm": 1.6180963383890599, "learning_rate": 6.887209732158506e-08, "loss": 0.4953, "step": 13369 }, { "epoch": 0.95, "grad_norm": 2.144715485930929, "learning_rate": 6.868214859222899e-08, "loss": 0.5491, "step": 13370 }, { "epoch": 0.95, "grad_norm": 1.5294086217624001, "learning_rate": 6.849246035179057e-08, "loss": 0.476, "step": 13371 }, { "epoch": 0.95, "grad_norm": 1.5904972834732505, "learning_rate": 6.830303261029126e-08, "loss": 0.4757, "step": 13372 }, { "epoch": 0.95, "grad_norm": 1.8288579947357921, "learning_rate": 6.811386537773634e-08, "loss": 0.575, "step": 13373 }, { "epoch": 0.95, "grad_norm": 1.795680611119649, "learning_rate": 6.792495866411896e-08, "loss": 0.4896, "step": 13374 }, { "epoch": 0.95, "grad_norm": 1.660074754243511, "learning_rate": 6.77363124794167e-08, "loss": 0.5293, "step": 13375 }, { "epoch": 0.95, "grad_norm": 2.368690918676919, "learning_rate": 6.754792683359601e-08, "loss": 0.5387, "step": 13376 }, { "epoch": 0.95, "grad_norm": 1.8366273086462805, "learning_rate": 6.735980173660728e-08, "loss": 0.4936, "step": 13377 }, { "epoch": 0.95, "grad_norm": 1.6567820853331534, "learning_rate": 6.717193719838755e-08, "loss": 0.483, "step": 13378 }, { "epoch": 0.95, "grad_norm": 2.0485381324512937, "learning_rate": 6.698433322886055e-08, "loss": 0.6365, "step": 13379 }, { "epoch": 0.95, "grad_norm": 1.6830510169028807, "learning_rate": 6.67969898379367e-08, "loss": 0.4756, "step": 13380 }, { "epoch": 0.95, "grad_norm": 0.6656386080466188, "learning_rate": 6.660990703551195e-08, "loss": 0.4217, "step": 13381 }, { "epoch": 0.95, "grad_norm": 1.7928496475202342, "learning_rate": 6.642308483146842e-08, "loss": 0.5253, "step": 13382 }, { "epoch": 0.95, "grad_norm": 1.7833409016331307, "learning_rate": 6.623652323567431e-08, "loss": 0.4773, "step": 13383 }, { "epoch": 0.95, "grad_norm": 1.754424857803028, "learning_rate": 6.605022225798563e-08, "loss": 0.5039, "step": 13384 }, { "epoch": 0.95, "grad_norm": 1.5797217711390255, "learning_rate": 6.586418190824228e-08, "loss": 0.5636, "step": 13385 }, { "epoch": 0.95, "grad_norm": 1.752777120473431, "learning_rate": 6.567840219627198e-08, "loss": 0.4835, "step": 13386 }, { "epoch": 0.95, "grad_norm": 1.6743565230509634, "learning_rate": 6.549288313188851e-08, "loss": 0.5561, "step": 13387 }, { "epoch": 0.95, "grad_norm": 1.654180398810232, "learning_rate": 6.530762472489128e-08, "loss": 0.5148, "step": 13388 }, { "epoch": 0.95, "grad_norm": 1.6400643340806267, "learning_rate": 6.512262698506632e-08, "loss": 0.5367, "step": 13389 }, { "epoch": 0.95, "grad_norm": 1.5844814019226747, "learning_rate": 6.49378899221853e-08, "loss": 0.4425, "step": 13390 }, { "epoch": 0.95, "grad_norm": 1.6315284585406573, "learning_rate": 6.475341354600762e-08, "loss": 0.4719, "step": 13391 }, { "epoch": 0.95, "grad_norm": 2.3865546067146606, "learning_rate": 6.456919786627824e-08, "loss": 0.5154, "step": 13392 }, { "epoch": 0.95, "grad_norm": 1.8408464314796802, "learning_rate": 6.438524289272662e-08, "loss": 0.4753, "step": 13393 }, { "epoch": 0.95, "grad_norm": 1.6925066164838034, "learning_rate": 6.420154863507056e-08, "loss": 0.5268, "step": 13394 }, { "epoch": 0.95, "grad_norm": 2.004397655373775, "learning_rate": 6.401811510301337e-08, "loss": 0.5238, "step": 13395 }, { "epoch": 0.95, "grad_norm": 1.7962022178319446, "learning_rate": 6.38349423062451e-08, "loss": 0.6012, "step": 13396 }, { "epoch": 0.95, "grad_norm": 2.0907976447317504, "learning_rate": 6.365203025444133e-08, "loss": 0.5654, "step": 13397 }, { "epoch": 0.95, "grad_norm": 1.7298887900558526, "learning_rate": 6.346937895726324e-08, "loss": 0.5206, "step": 13398 }, { "epoch": 0.95, "grad_norm": 1.648360626517202, "learning_rate": 6.328698842436032e-08, "loss": 0.5509, "step": 13399 }, { "epoch": 0.95, "grad_norm": 2.1081303306850607, "learning_rate": 6.310485866536653e-08, "loss": 0.576, "step": 13400 }, { "epoch": 0.95, "grad_norm": 1.595806411843447, "learning_rate": 6.292298968990251e-08, "loss": 0.5213, "step": 13401 }, { "epoch": 0.95, "grad_norm": 1.5675183188381925, "learning_rate": 6.274138150757503e-08, "loss": 0.5337, "step": 13402 }, { "epoch": 0.95, "grad_norm": 1.9137576573693766, "learning_rate": 6.256003412797807e-08, "loss": 0.462, "step": 13403 }, { "epoch": 0.95, "grad_norm": 0.6812983168941994, "learning_rate": 6.237894756069007e-08, "loss": 0.391, "step": 13404 }, { "epoch": 0.95, "grad_norm": 1.6429217492421295, "learning_rate": 6.219812181527673e-08, "loss": 0.5293, "step": 13405 }, { "epoch": 0.95, "grad_norm": 1.5710539652249362, "learning_rate": 6.201755690129096e-08, "loss": 0.4542, "step": 13406 }, { "epoch": 0.95, "grad_norm": 1.8411509097008711, "learning_rate": 6.183725282826958e-08, "loss": 0.5353, "step": 13407 }, { "epoch": 0.95, "grad_norm": 1.596627721615267, "learning_rate": 6.165720960573662e-08, "loss": 0.4664, "step": 13408 }, { "epoch": 0.95, "grad_norm": 1.6771937366903975, "learning_rate": 6.147742724320394e-08, "loss": 0.5177, "step": 13409 }, { "epoch": 0.95, "grad_norm": 2.094216082932814, "learning_rate": 6.129790575016725e-08, "loss": 0.5265, "step": 13410 }, { "epoch": 0.95, "grad_norm": 1.6540755149447988, "learning_rate": 6.111864513611065e-08, "loss": 0.5314, "step": 13411 }, { "epoch": 0.95, "grad_norm": 1.7861375265426342, "learning_rate": 6.093964541050157e-08, "loss": 0.5685, "step": 13412 }, { "epoch": 0.95, "grad_norm": 1.5748671680450401, "learning_rate": 6.076090658279632e-08, "loss": 0.5102, "step": 13413 }, { "epoch": 0.95, "grad_norm": 2.556161371158732, "learning_rate": 6.05824286624368e-08, "loss": 0.5259, "step": 13414 }, { "epoch": 0.95, "grad_norm": 1.8344635685275616, "learning_rate": 6.040421165884991e-08, "loss": 0.5275, "step": 13415 }, { "epoch": 0.95, "grad_norm": 1.5229663424963504, "learning_rate": 6.02262555814509e-08, "loss": 0.4712, "step": 13416 }, { "epoch": 0.95, "grad_norm": 1.4376129889489422, "learning_rate": 6.004856043963836e-08, "loss": 0.4563, "step": 13417 }, { "epoch": 0.95, "grad_norm": 1.7876184649939328, "learning_rate": 5.987112624280034e-08, "loss": 0.4921, "step": 13418 }, { "epoch": 0.95, "grad_norm": 1.6340068293474668, "learning_rate": 5.969395300030878e-08, "loss": 0.441, "step": 13419 }, { "epoch": 0.95, "grad_norm": 1.7506523443137987, "learning_rate": 5.9517040721522314e-08, "loss": 0.4997, "step": 13420 }, { "epoch": 0.95, "grad_norm": 1.8746323175760728, "learning_rate": 5.934038941578679e-08, "loss": 0.5562, "step": 13421 }, { "epoch": 0.95, "grad_norm": 1.9351534805137758, "learning_rate": 5.916399909243309e-08, "loss": 0.5324, "step": 13422 }, { "epoch": 0.95, "grad_norm": 2.0505259913374143, "learning_rate": 5.898786976077875e-08, "loss": 0.5291, "step": 13423 }, { "epoch": 0.95, "grad_norm": 2.0178405573273235, "learning_rate": 5.881200143012744e-08, "loss": 0.5263, "step": 13424 }, { "epoch": 0.95, "grad_norm": 2.009706464231653, "learning_rate": 5.863639410976951e-08, "loss": 0.5819, "step": 13425 }, { "epoch": 0.95, "grad_norm": 3.319199156660441, "learning_rate": 5.846104780898032e-08, "loss": 0.5691, "step": 13426 }, { "epoch": 0.95, "grad_norm": 1.8535511951251624, "learning_rate": 5.8285962537023565e-08, "loss": 0.5028, "step": 13427 }, { "epoch": 0.95, "grad_norm": 1.9534541453852678, "learning_rate": 5.81111383031463e-08, "loss": 0.6048, "step": 13428 }, { "epoch": 0.95, "grad_norm": 1.5827648951870454, "learning_rate": 5.793657511658502e-08, "loss": 0.4509, "step": 13429 }, { "epoch": 0.95, "grad_norm": 1.5980687985662734, "learning_rate": 5.776227298655901e-08, "loss": 0.4672, "step": 13430 }, { "epoch": 0.95, "grad_norm": 12.582911141647005, "learning_rate": 5.7588231922277025e-08, "loss": 0.4788, "step": 13431 }, { "epoch": 0.95, "grad_norm": 2.0168425799177743, "learning_rate": 5.7414451932931156e-08, "loss": 0.5328, "step": 13432 }, { "epoch": 0.95, "grad_norm": 1.7272323532949372, "learning_rate": 5.724093302770184e-08, "loss": 0.4978, "step": 13433 }, { "epoch": 0.95, "grad_norm": 1.6801779973860833, "learning_rate": 5.706767521575563e-08, "loss": 0.4898, "step": 13434 }, { "epoch": 0.95, "grad_norm": 4.354322021760506, "learning_rate": 5.689467850624242e-08, "loss": 0.5061, "step": 13435 }, { "epoch": 0.95, "grad_norm": 1.6484029108375429, "learning_rate": 5.672194290830268e-08, "loss": 0.5643, "step": 13436 }, { "epoch": 0.95, "grad_norm": 0.6449602396775459, "learning_rate": 5.654946843105968e-08, "loss": 0.4276, "step": 13437 }, { "epoch": 0.95, "grad_norm": 1.8335991159532974, "learning_rate": 5.637725508362446e-08, "loss": 0.5048, "step": 13438 }, { "epoch": 0.95, "grad_norm": 0.6823278509396153, "learning_rate": 5.620530287509418e-08, "loss": 0.4437, "step": 13439 }, { "epoch": 0.95, "grad_norm": 1.5729144392492262, "learning_rate": 5.603361181455158e-08, "loss": 0.5043, "step": 13440 }, { "epoch": 0.95, "grad_norm": 0.7092050047082739, "learning_rate": 5.586218191106607e-08, "loss": 0.4196, "step": 13441 }, { "epoch": 0.95, "grad_norm": 1.7931022657314613, "learning_rate": 5.5691013173693185e-08, "loss": 0.4904, "step": 13442 }, { "epoch": 0.95, "grad_norm": 1.6665382183279451, "learning_rate": 5.552010561147458e-08, "loss": 0.4718, "step": 13443 }, { "epoch": 0.95, "grad_norm": 0.6819431982322116, "learning_rate": 5.5349459233438044e-08, "loss": 0.4318, "step": 13444 }, { "epoch": 0.95, "grad_norm": 1.7922030320275668, "learning_rate": 5.517907404859802e-08, "loss": 0.4759, "step": 13445 }, { "epoch": 0.95, "grad_norm": 2.0445390749198236, "learning_rate": 5.500895006595452e-08, "loss": 0.5378, "step": 13446 }, { "epoch": 0.95, "grad_norm": 1.513944227125897, "learning_rate": 5.483908729449372e-08, "loss": 0.4641, "step": 13447 }, { "epoch": 0.95, "grad_norm": 1.7558670197483133, "learning_rate": 5.466948574318953e-08, "loss": 0.4362, "step": 13448 }, { "epoch": 0.95, "grad_norm": 1.447877989305085, "learning_rate": 5.4500145420999795e-08, "loss": 0.4366, "step": 13449 }, { "epoch": 0.95, "grad_norm": 1.592575130963031, "learning_rate": 5.433106633687013e-08, "loss": 0.5419, "step": 13450 }, { "epoch": 0.95, "grad_norm": 1.6628726277697317, "learning_rate": 5.416224849973173e-08, "loss": 0.5037, "step": 13451 }, { "epoch": 0.95, "grad_norm": 1.6641599200784583, "learning_rate": 5.3993691918502455e-08, "loss": 0.4721, "step": 13452 }, { "epoch": 0.95, "grad_norm": 2.7159631757985503, "learning_rate": 5.382539660208519e-08, "loss": 0.5327, "step": 13453 }, { "epoch": 0.95, "grad_norm": 0.7041378142271149, "learning_rate": 5.36573625593706e-08, "loss": 0.4223, "step": 13454 }, { "epoch": 0.95, "grad_norm": 1.9161707904837102, "learning_rate": 5.348958979923491e-08, "loss": 0.5372, "step": 13455 }, { "epoch": 0.95, "grad_norm": 1.765784321385749, "learning_rate": 5.332207833053937e-08, "loss": 0.534, "step": 13456 }, { "epoch": 0.95, "grad_norm": 2.5480151394848547, "learning_rate": 5.315482816213413e-08, "loss": 0.5627, "step": 13457 }, { "epoch": 0.96, "grad_norm": 1.8943917553211822, "learning_rate": 5.2987839302852676e-08, "loss": 0.4621, "step": 13458 }, { "epoch": 0.96, "grad_norm": 2.3490621921712256, "learning_rate": 5.282111176151683e-08, "loss": 0.4683, "step": 13459 }, { "epoch": 0.96, "grad_norm": 1.8083271366746292, "learning_rate": 5.265464554693234e-08, "loss": 0.5542, "step": 13460 }, { "epoch": 0.96, "grad_norm": 1.811398994035874, "learning_rate": 5.2488440667893825e-08, "loss": 0.5767, "step": 13461 }, { "epoch": 0.96, "grad_norm": 1.8280836424957276, "learning_rate": 5.2322497133180386e-08, "loss": 0.5106, "step": 13462 }, { "epoch": 0.96, "grad_norm": 1.5580861217613675, "learning_rate": 5.2156814951557225e-08, "loss": 0.4954, "step": 13463 }, { "epoch": 0.96, "grad_norm": 1.7099717565933956, "learning_rate": 5.199139413177734e-08, "loss": 0.5573, "step": 13464 }, { "epoch": 0.96, "grad_norm": 2.075248734992447, "learning_rate": 5.182623468257819e-08, "loss": 0.4672, "step": 13465 }, { "epoch": 0.96, "grad_norm": 1.6280732456691183, "learning_rate": 5.166133661268335e-08, "loss": 0.5696, "step": 13466 }, { "epoch": 0.96, "grad_norm": 0.7361206626530917, "learning_rate": 5.1496699930804196e-08, "loss": 0.4428, "step": 13467 }, { "epoch": 0.96, "grad_norm": 1.7535188431145043, "learning_rate": 5.1332324645637646e-08, "loss": 0.5916, "step": 13468 }, { "epoch": 0.96, "grad_norm": 1.677611782393833, "learning_rate": 5.1168210765865644e-08, "loss": 0.4919, "step": 13469 }, { "epoch": 0.96, "grad_norm": 1.5847152796425827, "learning_rate": 5.1004358300157374e-08, "loss": 0.5073, "step": 13470 }, { "epoch": 0.96, "grad_norm": 1.7953184313424153, "learning_rate": 5.084076725716924e-08, "loss": 0.4391, "step": 13471 }, { "epoch": 0.96, "grad_norm": 1.5746857107565237, "learning_rate": 5.0677437645541e-08, "loss": 0.5457, "step": 13472 }, { "epoch": 0.96, "grad_norm": 1.7436937206615564, "learning_rate": 5.0514369473901315e-08, "loss": 0.4695, "step": 13473 }, { "epoch": 0.96, "grad_norm": 1.627958593764295, "learning_rate": 5.035156275086439e-08, "loss": 0.4771, "step": 13474 }, { "epoch": 0.96, "grad_norm": 1.7675633944848665, "learning_rate": 5.0189017485028915e-08, "loss": 0.458, "step": 13475 }, { "epoch": 0.96, "grad_norm": 1.5939533651406348, "learning_rate": 5.0026733684982455e-08, "loss": 0.5386, "step": 13476 }, { "epoch": 0.96, "grad_norm": 1.6893117489822334, "learning_rate": 4.986471135929538e-08, "loss": 0.5065, "step": 13477 }, { "epoch": 0.96, "grad_norm": 0.6784606065507232, "learning_rate": 4.970295051652918e-08, "loss": 0.4282, "step": 13478 }, { "epoch": 0.96, "grad_norm": 0.7473703701368204, "learning_rate": 4.954145116522591e-08, "loss": 0.42, "step": 13479 }, { "epoch": 0.96, "grad_norm": 1.887855512881831, "learning_rate": 4.9380213313918736e-08, "loss": 0.5222, "step": 13480 }, { "epoch": 0.96, "grad_norm": 1.986767390150602, "learning_rate": 4.921923697112252e-08, "loss": 0.5383, "step": 13481 }, { "epoch": 0.96, "grad_norm": 2.1592848554386364, "learning_rate": 4.905852214534268e-08, "loss": 0.5288, "step": 13482 }, { "epoch": 0.96, "grad_norm": 1.53979915954014, "learning_rate": 4.889806884506687e-08, "loss": 0.4108, "step": 13483 }, { "epoch": 0.96, "grad_norm": 1.8262072779890686, "learning_rate": 4.87378770787722e-08, "loss": 0.554, "step": 13484 }, { "epoch": 0.96, "grad_norm": 2.07797290670762, "learning_rate": 4.857794685492023e-08, "loss": 0.5784, "step": 13485 }, { "epoch": 0.96, "grad_norm": 2.0143849669217406, "learning_rate": 4.84182781819581e-08, "loss": 0.5607, "step": 13486 }, { "epoch": 0.96, "grad_norm": 1.673227367902069, "learning_rate": 4.8258871068321834e-08, "loss": 0.4984, "step": 13487 }, { "epoch": 0.96, "grad_norm": 1.943486479082497, "learning_rate": 4.8099725522429695e-08, "loss": 0.5479, "step": 13488 }, { "epoch": 0.96, "grad_norm": 2.8726823283365865, "learning_rate": 4.794084155268996e-08, "loss": 0.5234, "step": 13489 }, { "epoch": 0.96, "grad_norm": 1.616155205355414, "learning_rate": 4.7782219167494814e-08, "loss": 0.4513, "step": 13490 }, { "epoch": 0.96, "grad_norm": 1.7493781605692384, "learning_rate": 4.76238583752231e-08, "loss": 0.447, "step": 13491 }, { "epoch": 0.96, "grad_norm": 1.6206661781984562, "learning_rate": 4.746575918424034e-08, "loss": 0.4895, "step": 13492 }, { "epoch": 0.96, "grad_norm": 2.9850481665196864, "learning_rate": 4.730792160289766e-08, "loss": 0.4919, "step": 13493 }, { "epoch": 0.96, "grad_norm": 1.7267101071897075, "learning_rate": 4.7150345639532245e-08, "loss": 0.5008, "step": 13494 }, { "epoch": 0.96, "grad_norm": 2.0914627693709575, "learning_rate": 4.699303130246857e-08, "loss": 0.5206, "step": 13495 }, { "epoch": 0.96, "grad_norm": 1.8705177864367013, "learning_rate": 4.6835978600015544e-08, "loss": 0.5429, "step": 13496 }, { "epoch": 0.96, "grad_norm": 1.6849582389661173, "learning_rate": 4.66791875404704e-08, "loss": 0.5294, "step": 13497 }, { "epoch": 0.96, "grad_norm": 1.7192704174323055, "learning_rate": 4.652265813211376e-08, "loss": 0.5216, "step": 13498 }, { "epoch": 0.96, "grad_norm": 1.8281659337905243, "learning_rate": 4.6366390383215663e-08, "loss": 0.5214, "step": 13499 }, { "epoch": 0.96, "grad_norm": 1.7284313841117678, "learning_rate": 4.621038430203006e-08, "loss": 0.5356, "step": 13500 }, { "epoch": 0.96, "grad_norm": 1.5790346510915527, "learning_rate": 4.6054639896797594e-08, "loss": 0.4722, "step": 13501 }, { "epoch": 0.96, "grad_norm": 1.7221845124017083, "learning_rate": 4.589915717574556e-08, "loss": 0.4753, "step": 13502 }, { "epoch": 0.96, "grad_norm": 2.1566720490876325, "learning_rate": 4.5743936147086274e-08, "loss": 0.5799, "step": 13503 }, { "epoch": 0.96, "grad_norm": 1.913353239577548, "learning_rate": 4.558897681901986e-08, "loss": 0.4962, "step": 13504 }, { "epoch": 0.96, "grad_norm": 1.6636466457459353, "learning_rate": 4.543427919973142e-08, "loss": 0.5215, "step": 13505 }, { "epoch": 0.96, "grad_norm": 1.666692080266022, "learning_rate": 4.527984329739277e-08, "loss": 0.5016, "step": 13506 }, { "epoch": 0.96, "grad_norm": 1.7494176440550904, "learning_rate": 4.512566912016181e-08, "loss": 0.5272, "step": 13507 }, { "epoch": 0.96, "grad_norm": 1.710727193725113, "learning_rate": 4.4971756676182586e-08, "loss": 0.5358, "step": 13508 }, { "epoch": 0.96, "grad_norm": 1.71237987896558, "learning_rate": 4.4818105973584714e-08, "loss": 0.4912, "step": 13509 }, { "epoch": 0.96, "grad_norm": 1.982262899944758, "learning_rate": 4.4664717020485024e-08, "loss": 0.4882, "step": 13510 }, { "epoch": 0.96, "grad_norm": 1.5248669573717897, "learning_rate": 4.451158982498594e-08, "loss": 0.542, "step": 13511 }, { "epoch": 0.96, "grad_norm": 1.5829819300019203, "learning_rate": 4.435872439517597e-08, "loss": 0.5098, "step": 13512 }, { "epoch": 0.96, "grad_norm": 1.741175371108839, "learning_rate": 4.4206120739130887e-08, "loss": 0.515, "step": 13513 }, { "epoch": 0.96, "grad_norm": 1.5544050713235698, "learning_rate": 4.40537788649098e-08, "loss": 0.4409, "step": 13514 }, { "epoch": 0.96, "grad_norm": 1.6391655996414793, "learning_rate": 4.390169878056238e-08, "loss": 0.527, "step": 13515 }, { "epoch": 0.96, "grad_norm": 0.7420280671547909, "learning_rate": 4.374988049411999e-08, "loss": 0.4156, "step": 13516 }, { "epoch": 0.96, "grad_norm": 0.6583594213476687, "learning_rate": 4.359832401360398e-08, "loss": 0.4109, "step": 13517 }, { "epoch": 0.96, "grad_norm": 1.4812647286569571, "learning_rate": 4.344702934701794e-08, "loss": 0.4315, "step": 13518 }, { "epoch": 0.96, "grad_norm": 1.7634519624310165, "learning_rate": 4.32959965023555e-08, "loss": 0.5262, "step": 13519 }, { "epoch": 0.96, "grad_norm": 0.670074050413529, "learning_rate": 4.314522548759414e-08, "loss": 0.4321, "step": 13520 }, { "epoch": 0.96, "grad_norm": 1.5084187762874215, "learning_rate": 4.299471631069751e-08, "loss": 0.5465, "step": 13521 }, { "epoch": 0.96, "grad_norm": 0.6256008576612377, "learning_rate": 4.284446897961703e-08, "loss": 0.4006, "step": 13522 }, { "epoch": 0.96, "grad_norm": 1.841570993112766, "learning_rate": 4.2694483502289105e-08, "loss": 0.5575, "step": 13523 }, { "epoch": 0.96, "grad_norm": 1.7727537525765926, "learning_rate": 4.254475988663631e-08, "loss": 0.5703, "step": 13524 }, { "epoch": 0.96, "grad_norm": 1.692731591299952, "learning_rate": 4.239529814056675e-08, "loss": 0.5342, "step": 13525 }, { "epoch": 0.96, "grad_norm": 1.5533537830820223, "learning_rate": 4.224609827197634e-08, "loss": 0.4791, "step": 13526 }, { "epoch": 0.96, "grad_norm": 1.724535382062079, "learning_rate": 4.209716028874655e-08, "loss": 0.5199, "step": 13527 }, { "epoch": 0.96, "grad_norm": 2.0221821169698915, "learning_rate": 4.194848419874387e-08, "loss": 0.4749, "step": 13528 }, { "epoch": 0.96, "grad_norm": 1.7715978565470583, "learning_rate": 4.1800070009823113e-08, "loss": 0.5601, "step": 13529 }, { "epoch": 0.96, "grad_norm": 1.7254247664983484, "learning_rate": 4.165191772982302e-08, "loss": 0.5195, "step": 13530 }, { "epoch": 0.96, "grad_norm": 1.586477851604231, "learning_rate": 4.150402736656955e-08, "loss": 0.5096, "step": 13531 }, { "epoch": 0.96, "grad_norm": 2.231661655399869, "learning_rate": 4.135639892787535e-08, "loss": 0.5716, "step": 13532 }, { "epoch": 0.96, "grad_norm": 1.7281970538905138, "learning_rate": 4.120903242153862e-08, "loss": 0.503, "step": 13533 }, { "epoch": 0.96, "grad_norm": 1.6331855019557588, "learning_rate": 4.106192785534258e-08, "loss": 0.4809, "step": 13534 }, { "epoch": 0.96, "grad_norm": 1.5966585075269586, "learning_rate": 4.091508523705934e-08, "loss": 0.5459, "step": 13535 }, { "epoch": 0.96, "grad_norm": 0.7406712939253222, "learning_rate": 4.076850457444492e-08, "loss": 0.4389, "step": 13536 }, { "epoch": 0.96, "grad_norm": 1.943912872534278, "learning_rate": 4.0622185875242024e-08, "loss": 0.4836, "step": 13537 }, { "epoch": 0.96, "grad_norm": 1.8733216805567499, "learning_rate": 4.0476129147180573e-08, "loss": 0.5541, "step": 13538 }, { "epoch": 0.96, "grad_norm": 1.5284225675624992, "learning_rate": 4.0330334397974405e-08, "loss": 0.4994, "step": 13539 }, { "epoch": 0.96, "grad_norm": 1.7926471502953936, "learning_rate": 4.0184801635325676e-08, "loss": 0.4934, "step": 13540 }, { "epoch": 0.96, "grad_norm": 1.6630940973155597, "learning_rate": 4.003953086692214e-08, "loss": 0.4577, "step": 13541 }, { "epoch": 0.96, "grad_norm": 1.5876445041765532, "learning_rate": 3.989452210043709e-08, "loss": 0.5004, "step": 13542 }, { "epoch": 0.96, "grad_norm": 1.901179571493885, "learning_rate": 3.9749775343531086e-08, "loss": 0.5237, "step": 13543 }, { "epoch": 0.96, "grad_norm": 1.9568972197303869, "learning_rate": 3.96052906038491e-08, "loss": 0.6032, "step": 13544 }, { "epoch": 0.96, "grad_norm": 1.918394152973589, "learning_rate": 3.946106788902337e-08, "loss": 0.5536, "step": 13545 }, { "epoch": 0.96, "grad_norm": 2.3690753836684393, "learning_rate": 3.9317107206673364e-08, "loss": 0.5226, "step": 13546 }, { "epoch": 0.96, "grad_norm": 1.8967361825089084, "learning_rate": 3.917340856440244e-08, "loss": 0.4477, "step": 13547 }, { "epoch": 0.96, "grad_norm": 1.8808881553752759, "learning_rate": 3.902997196980174e-08, "loss": 0.5154, "step": 13548 }, { "epoch": 0.96, "grad_norm": 1.816953585530986, "learning_rate": 3.888679743044799e-08, "loss": 0.5508, "step": 13549 }, { "epoch": 0.96, "grad_norm": 4.7182652723672165, "learning_rate": 3.874388495390402e-08, "loss": 0.5254, "step": 13550 }, { "epoch": 0.96, "grad_norm": 1.8420155675126932, "learning_rate": 3.8601234547719354e-08, "loss": 0.5601, "step": 13551 }, { "epoch": 0.96, "grad_norm": 4.589802632176212, "learning_rate": 3.845884621942853e-08, "loss": 0.4836, "step": 13552 }, { "epoch": 0.96, "grad_norm": 0.7107620243714934, "learning_rate": 3.831671997655384e-08, "loss": 0.4326, "step": 13553 }, { "epoch": 0.96, "grad_norm": 1.6019172693516748, "learning_rate": 3.817485582660263e-08, "loss": 0.4805, "step": 13554 }, { "epoch": 0.96, "grad_norm": 1.6741964932657278, "learning_rate": 3.803325377706779e-08, "loss": 0.4638, "step": 13555 }, { "epoch": 0.96, "grad_norm": 1.6907528556823936, "learning_rate": 3.789191383543056e-08, "loss": 0.5005, "step": 13556 }, { "epoch": 0.96, "grad_norm": 1.6998320895461911, "learning_rate": 3.7750836009156074e-08, "loss": 0.4391, "step": 13557 }, { "epoch": 0.96, "grad_norm": 1.9953528276207508, "learning_rate": 3.7610020305696716e-08, "loss": 0.5274, "step": 13558 }, { "epoch": 0.96, "grad_norm": 2.0121406373910395, "learning_rate": 3.746946673249152e-08, "loss": 0.4483, "step": 13559 }, { "epoch": 0.96, "grad_norm": 4.155136028532395, "learning_rate": 3.7329175296964e-08, "loss": 0.5726, "step": 13560 }, { "epoch": 0.96, "grad_norm": 1.6198016887616635, "learning_rate": 3.718914600652546e-08, "loss": 0.4939, "step": 13561 }, { "epoch": 0.96, "grad_norm": 0.6494925106838684, "learning_rate": 3.7049378868572186e-08, "loss": 0.4296, "step": 13562 }, { "epoch": 0.96, "grad_norm": 1.6778122309291308, "learning_rate": 3.690987389048772e-08, "loss": 0.5128, "step": 13563 }, { "epoch": 0.96, "grad_norm": 1.7340801444347587, "learning_rate": 3.6770631079641185e-08, "loss": 0.5367, "step": 13564 }, { "epoch": 0.96, "grad_norm": 1.6983451067182276, "learning_rate": 3.663165044338723e-08, "loss": 0.4874, "step": 13565 }, { "epoch": 0.96, "grad_norm": 1.7862432746652919, "learning_rate": 3.649293198906778e-08, "loss": 0.5221, "step": 13566 }, { "epoch": 0.96, "grad_norm": 1.9162733189678123, "learning_rate": 3.63544757240103e-08, "loss": 0.5296, "step": 13567 }, { "epoch": 0.96, "grad_norm": 0.6692983118349588, "learning_rate": 3.621628165552893e-08, "loss": 0.439, "step": 13568 }, { "epoch": 0.96, "grad_norm": 1.7864579706975108, "learning_rate": 3.607834979092284e-08, "loss": 0.5658, "step": 13569 }, { "epoch": 0.96, "grad_norm": 1.8307996415544872, "learning_rate": 3.5940680137478425e-08, "loss": 0.579, "step": 13570 }, { "epoch": 0.96, "grad_norm": 1.5774309034230973, "learning_rate": 3.580327270246764e-08, "loss": 0.4579, "step": 13571 }, { "epoch": 0.96, "grad_norm": 2.1152719534357693, "learning_rate": 3.566612749314913e-08, "loss": 0.5989, "step": 13572 }, { "epoch": 0.96, "grad_norm": 2.041686939417134, "learning_rate": 3.552924451676709e-08, "loss": 0.5388, "step": 13573 }, { "epoch": 0.96, "grad_norm": 1.504916420521562, "learning_rate": 3.5392623780552415e-08, "loss": 0.535, "step": 13574 }, { "epoch": 0.96, "grad_norm": 1.5756269279706359, "learning_rate": 3.52562652917221e-08, "loss": 0.5568, "step": 13575 }, { "epoch": 0.96, "grad_norm": 1.5653795656307867, "learning_rate": 3.512016905747817e-08, "loss": 0.4839, "step": 13576 }, { "epoch": 0.96, "grad_norm": 1.6555537849264002, "learning_rate": 3.4984335085010425e-08, "loss": 0.5365, "step": 13577 }, { "epoch": 0.96, "grad_norm": 2.0066329634111195, "learning_rate": 3.484876338149368e-08, "loss": 0.5032, "step": 13578 }, { "epoch": 0.96, "grad_norm": 2.3571161874995776, "learning_rate": 3.4713453954089425e-08, "loss": 0.4739, "step": 13579 }, { "epoch": 0.96, "grad_norm": 2.0814980573350117, "learning_rate": 3.457840680994584e-08, "loss": 0.5208, "step": 13580 }, { "epoch": 0.96, "grad_norm": 1.9235533573339703, "learning_rate": 3.444362195619555e-08, "loss": 0.5544, "step": 13581 }, { "epoch": 0.96, "grad_norm": 1.7370474985713849, "learning_rate": 3.430909939995841e-08, "loss": 0.4533, "step": 13582 }, { "epoch": 0.96, "grad_norm": 1.629337189711682, "learning_rate": 3.4174839148340964e-08, "loss": 0.5149, "step": 13583 }, { "epoch": 0.96, "grad_norm": 1.8643303570224463, "learning_rate": 3.404084120843531e-08, "loss": 0.5306, "step": 13584 }, { "epoch": 0.96, "grad_norm": 1.8757311992125263, "learning_rate": 3.390710558731913e-08, "loss": 0.5138, "step": 13585 }, { "epoch": 0.96, "grad_norm": 2.217709941504913, "learning_rate": 3.377363229205732e-08, "loss": 0.4901, "step": 13586 }, { "epoch": 0.96, "grad_norm": 0.7214443133277657, "learning_rate": 3.36404213296998e-08, "loss": 0.4204, "step": 13587 }, { "epoch": 0.96, "grad_norm": 1.561784423002264, "learning_rate": 3.3507472707283716e-08, "loss": 0.54, "step": 13588 }, { "epoch": 0.96, "grad_norm": 2.3812184634857934, "learning_rate": 3.337478643183179e-08, "loss": 0.4746, "step": 13589 }, { "epoch": 0.96, "grad_norm": 1.9477716493002033, "learning_rate": 3.324236251035229e-08, "loss": 0.5404, "step": 13590 }, { "epoch": 0.96, "grad_norm": 1.6596289823523223, "learning_rate": 3.31102009498413e-08, "loss": 0.5118, "step": 13591 }, { "epoch": 0.96, "grad_norm": 1.8617291662402755, "learning_rate": 3.297830175727989e-08, "loss": 0.5048, "step": 13592 }, { "epoch": 0.96, "grad_norm": 0.666490444276437, "learning_rate": 3.284666493963473e-08, "loss": 0.4211, "step": 13593 }, { "epoch": 0.96, "grad_norm": 2.0035348308753553, "learning_rate": 3.2715290503859685e-08, "loss": 0.5814, "step": 13594 }, { "epoch": 0.96, "grad_norm": 2.6817175860089, "learning_rate": 3.2584178456894766e-08, "loss": 0.5119, "step": 13595 }, { "epoch": 0.96, "grad_norm": 1.5583792205216291, "learning_rate": 3.2453328805665544e-08, "loss": 0.4477, "step": 13596 }, { "epoch": 0.96, "grad_norm": 1.6164753386814517, "learning_rate": 3.232274155708315e-08, "loss": 0.5388, "step": 13597 }, { "epoch": 0.96, "grad_norm": 0.6388501041058647, "learning_rate": 3.219241671804707e-08, "loss": 0.4284, "step": 13598 }, { "epoch": 0.97, "grad_norm": 1.9173507737396176, "learning_rate": 3.206235429544069e-08, "loss": 0.5276, "step": 13599 }, { "epoch": 0.97, "grad_norm": 1.9950124470494812, "learning_rate": 3.193255429613407e-08, "loss": 0.5455, "step": 13600 }, { "epoch": 0.97, "grad_norm": 2.6266415874671907, "learning_rate": 3.1803016726983936e-08, "loss": 0.556, "step": 13601 }, { "epoch": 0.97, "grad_norm": 0.6719188961522228, "learning_rate": 3.1673741594833165e-08, "loss": 0.4158, "step": 13602 }, { "epoch": 0.97, "grad_norm": 0.7311484360621182, "learning_rate": 3.154472890651072e-08, "loss": 0.4124, "step": 13603 }, { "epoch": 0.97, "grad_norm": 1.599198582280624, "learning_rate": 3.141597866883117e-08, "loss": 0.4779, "step": 13604 }, { "epoch": 0.97, "grad_norm": 1.6740225150508983, "learning_rate": 3.128749088859517e-08, "loss": 0.4813, "step": 13605 }, { "epoch": 0.97, "grad_norm": 1.6566542992114166, "learning_rate": 3.115926557259008e-08, "loss": 0.4857, "step": 13606 }, { "epoch": 0.97, "grad_norm": 1.570729639738782, "learning_rate": 3.103130272758936e-08, "loss": 0.5054, "step": 13607 }, { "epoch": 0.97, "grad_norm": 1.6772865886331187, "learning_rate": 3.0903602360352613e-08, "loss": 0.4752, "step": 13608 }, { "epoch": 0.97, "grad_norm": 1.7484575967352922, "learning_rate": 3.0776164477624994e-08, "loss": 0.4824, "step": 13609 }, { "epoch": 0.97, "grad_norm": 1.6022771260224067, "learning_rate": 3.064898908613834e-08, "loss": 0.4974, "step": 13610 }, { "epoch": 0.97, "grad_norm": 1.5108438787217773, "learning_rate": 3.0522076192610605e-08, "loss": 0.4791, "step": 13611 }, { "epoch": 0.97, "grad_norm": 0.6605534377592105, "learning_rate": 3.039542580374588e-08, "loss": 0.4043, "step": 13612 }, { "epoch": 0.97, "grad_norm": 0.7212945818309867, "learning_rate": 3.026903792623381e-08, "loss": 0.4343, "step": 13613 }, { "epoch": 0.97, "grad_norm": 1.6912877547802512, "learning_rate": 3.014291256675128e-08, "loss": 0.5168, "step": 13614 }, { "epoch": 0.97, "grad_norm": 1.8620807891718332, "learning_rate": 3.001704973196018e-08, "loss": 0.5582, "step": 13615 }, { "epoch": 0.97, "grad_norm": 1.7050529085162809, "learning_rate": 2.989144942850852e-08, "loss": 0.5593, "step": 13616 }, { "epoch": 0.97, "grad_norm": 4.322949221548623, "learning_rate": 2.9766111663032115e-08, "loss": 0.4778, "step": 13617 }, { "epoch": 0.97, "grad_norm": 1.8020855254567174, "learning_rate": 2.9641036442151217e-08, "loss": 0.5149, "step": 13618 }, { "epoch": 0.97, "grad_norm": 1.5942868039249611, "learning_rate": 2.9516223772472206e-08, "loss": 0.5546, "step": 13619 }, { "epoch": 0.97, "grad_norm": 1.6312543665024264, "learning_rate": 2.9391673660588705e-08, "loss": 0.5414, "step": 13620 }, { "epoch": 0.97, "grad_norm": 2.478712385208612, "learning_rate": 2.9267386113079888e-08, "loss": 0.5105, "step": 13621 }, { "epoch": 0.97, "grad_norm": 1.8503865983330183, "learning_rate": 2.914336113651106e-08, "loss": 0.4945, "step": 13622 }, { "epoch": 0.97, "grad_norm": 2.0201830435343378, "learning_rate": 2.9019598737433096e-08, "loss": 0.5202, "step": 13623 }, { "epoch": 0.97, "grad_norm": 1.9387429773907985, "learning_rate": 2.8896098922384096e-08, "loss": 0.5493, "step": 13624 }, { "epoch": 0.97, "grad_norm": 1.9344569066301873, "learning_rate": 2.877286169788718e-08, "loss": 0.5093, "step": 13625 }, { "epoch": 0.97, "grad_norm": 1.4473988234474753, "learning_rate": 2.864988707045324e-08, "loss": 0.4894, "step": 13626 }, { "epoch": 0.97, "grad_norm": 1.988243036038166, "learning_rate": 2.852717504657654e-08, "loss": 0.5689, "step": 13627 }, { "epoch": 0.97, "grad_norm": 1.7367526270529918, "learning_rate": 2.8404725632740772e-08, "loss": 0.4662, "step": 13628 }, { "epoch": 0.97, "grad_norm": 1.73945726750156, "learning_rate": 2.8282538835413542e-08, "loss": 0.5079, "step": 13629 }, { "epoch": 0.97, "grad_norm": 1.572319806330478, "learning_rate": 2.816061466104858e-08, "loss": 0.4485, "step": 13630 }, { "epoch": 0.97, "grad_norm": 1.622408719602149, "learning_rate": 2.8038953116087396e-08, "loss": 0.5009, "step": 13631 }, { "epoch": 0.97, "grad_norm": 1.5254061167801063, "learning_rate": 2.791755420695541e-08, "loss": 0.5748, "step": 13632 }, { "epoch": 0.97, "grad_norm": 1.653291167903297, "learning_rate": 2.7796417940066378e-08, "loss": 0.5786, "step": 13633 }, { "epoch": 0.97, "grad_norm": 1.4548676973965442, "learning_rate": 2.7675544321818514e-08, "loss": 0.4788, "step": 13634 }, { "epoch": 0.97, "grad_norm": 1.5287913983949086, "learning_rate": 2.7554933358597267e-08, "loss": 0.5556, "step": 13635 }, { "epoch": 0.97, "grad_norm": 1.806706186799095, "learning_rate": 2.74345850567731e-08, "loss": 0.562, "step": 13636 }, { "epoch": 0.97, "grad_norm": 1.6652886890732341, "learning_rate": 2.7314499422703146e-08, "loss": 0.4705, "step": 13637 }, { "epoch": 0.97, "grad_norm": 2.7546842854479214, "learning_rate": 2.7194676462731772e-08, "loss": 0.4549, "step": 13638 }, { "epoch": 0.97, "grad_norm": 1.6199775439149924, "learning_rate": 2.7075116183187256e-08, "loss": 0.5554, "step": 13639 }, { "epoch": 0.97, "grad_norm": 1.6946665625084318, "learning_rate": 2.6955818590385652e-08, "loss": 0.5218, "step": 13640 }, { "epoch": 0.97, "grad_norm": 1.5588065002615186, "learning_rate": 2.6836783690629142e-08, "loss": 0.5397, "step": 13641 }, { "epoch": 0.97, "grad_norm": 2.713855278728243, "learning_rate": 2.671801149020492e-08, "loss": 0.4988, "step": 13642 }, { "epoch": 0.97, "grad_norm": 1.8770952322963037, "learning_rate": 2.6599501995386857e-08, "loss": 0.4687, "step": 13643 }, { "epoch": 0.97, "grad_norm": 1.748130952268625, "learning_rate": 2.64812552124355e-08, "loss": 0.5558, "step": 13644 }, { "epoch": 0.97, "grad_norm": 1.6037063936991014, "learning_rate": 2.636327114759696e-08, "loss": 0.4643, "step": 13645 }, { "epoch": 0.97, "grad_norm": 1.85709122912522, "learning_rate": 2.624554980710292e-08, "loss": 0.5483, "step": 13646 }, { "epoch": 0.97, "grad_norm": 2.2597091627055925, "learning_rate": 2.6128091197172856e-08, "loss": 0.476, "step": 13647 }, { "epoch": 0.97, "grad_norm": 1.7227582884415344, "learning_rate": 2.6010895324010133e-08, "loss": 0.4833, "step": 13648 }, { "epoch": 0.97, "grad_norm": 1.4727964472970885, "learning_rate": 2.589396219380702e-08, "loss": 0.4928, "step": 13649 }, { "epoch": 0.97, "grad_norm": 1.9138748010218598, "learning_rate": 2.577729181273858e-08, "loss": 0.5406, "step": 13650 }, { "epoch": 0.97, "grad_norm": 1.7419112843146773, "learning_rate": 2.5660884186968772e-08, "loss": 0.5681, "step": 13651 }, { "epoch": 0.97, "grad_norm": 1.5708899626292299, "learning_rate": 2.554473932264656e-08, "loss": 0.5018, "step": 13652 }, { "epoch": 0.97, "grad_norm": 1.6817664049731726, "learning_rate": 2.5428857225907045e-08, "loss": 0.5086, "step": 13653 }, { "epoch": 0.97, "grad_norm": 1.7819320293186123, "learning_rate": 2.5313237902871436e-08, "loss": 0.5608, "step": 13654 }, { "epoch": 0.97, "grad_norm": 2.1301992738620137, "learning_rate": 2.5197881359646514e-08, "loss": 0.4853, "step": 13655 }, { "epoch": 0.97, "grad_norm": 0.6575275659428215, "learning_rate": 2.5082787602327407e-08, "loss": 0.4274, "step": 13656 }, { "epoch": 0.97, "grad_norm": 1.6153340831873553, "learning_rate": 2.496795663699203e-08, "loss": 0.5706, "step": 13657 }, { "epoch": 0.97, "grad_norm": 1.8417143745977065, "learning_rate": 2.4853388469707197e-08, "loss": 0.4033, "step": 13658 }, { "epoch": 0.97, "grad_norm": 1.6756737049895802, "learning_rate": 2.473908310652473e-08, "loss": 0.5476, "step": 13659 }, { "epoch": 0.97, "grad_norm": 0.6580460181718044, "learning_rate": 2.4625040553482026e-08, "loss": 0.4198, "step": 13660 }, { "epoch": 0.97, "grad_norm": 1.5232498454151047, "learning_rate": 2.451126081660371e-08, "loss": 0.4603, "step": 13661 }, { "epoch": 0.97, "grad_norm": 1.6833046990712928, "learning_rate": 2.439774390189942e-08, "loss": 0.5219, "step": 13662 }, { "epoch": 0.97, "grad_norm": 1.6865603854468418, "learning_rate": 2.4284489815366575e-08, "loss": 0.4891, "step": 13663 }, { "epoch": 0.97, "grad_norm": 1.706168157501822, "learning_rate": 2.4171498562986505e-08, "loss": 0.4987, "step": 13664 }, { "epoch": 0.97, "grad_norm": 1.5970857477745983, "learning_rate": 2.4058770150728884e-08, "loss": 0.4845, "step": 13665 }, { "epoch": 0.97, "grad_norm": 1.6907244955922287, "learning_rate": 2.3946304584547276e-08, "loss": 0.5631, "step": 13666 }, { "epoch": 0.97, "grad_norm": 1.7663907458663923, "learning_rate": 2.383410187038304e-08, "loss": 0.4883, "step": 13667 }, { "epoch": 0.97, "grad_norm": 1.9361286545402132, "learning_rate": 2.3722162014163662e-08, "loss": 0.49, "step": 13668 }, { "epoch": 0.97, "grad_norm": 2.2112166733851497, "learning_rate": 2.3610485021800524e-08, "loss": 0.5578, "step": 13669 }, { "epoch": 0.97, "grad_norm": 1.5619156316025222, "learning_rate": 2.349907089919501e-08, "loss": 0.4851, "step": 13670 }, { "epoch": 0.97, "grad_norm": 1.7106082931088564, "learning_rate": 2.338791965223075e-08, "loss": 0.5019, "step": 13671 }, { "epoch": 0.97, "grad_norm": 1.751928551104829, "learning_rate": 2.3277031286779717e-08, "loss": 0.5528, "step": 13672 }, { "epoch": 0.97, "grad_norm": 1.709930774002848, "learning_rate": 2.3166405808699443e-08, "loss": 0.5254, "step": 13673 }, { "epoch": 0.97, "grad_norm": 1.6800445667081099, "learning_rate": 2.3056043223833034e-08, "loss": 0.5276, "step": 13674 }, { "epoch": 0.97, "grad_norm": 1.5502393428718613, "learning_rate": 2.2945943538010828e-08, "loss": 0.4938, "step": 13675 }, { "epoch": 0.97, "grad_norm": 1.9354817129188846, "learning_rate": 2.283610675704817e-08, "loss": 0.5225, "step": 13676 }, { "epoch": 0.97, "grad_norm": 1.9833620713634694, "learning_rate": 2.2726532886748197e-08, "loss": 0.6167, "step": 13677 }, { "epoch": 0.97, "grad_norm": 1.5442226917866675, "learning_rate": 2.2617221932897393e-08, "loss": 0.5096, "step": 13678 }, { "epoch": 0.97, "grad_norm": 0.7277016520126909, "learning_rate": 2.250817390127058e-08, "loss": 0.4107, "step": 13679 }, { "epoch": 0.97, "grad_norm": 2.109429676350748, "learning_rate": 2.239938879762815e-08, "loss": 0.4945, "step": 13680 }, { "epoch": 0.97, "grad_norm": 1.8235380575146225, "learning_rate": 2.2290866627716623e-08, "loss": 0.5078, "step": 13681 }, { "epoch": 0.97, "grad_norm": 1.5900978282548284, "learning_rate": 2.2182607397268075e-08, "loss": 0.455, "step": 13682 }, { "epoch": 0.97, "grad_norm": 0.6757511222176741, "learning_rate": 2.2074611112001267e-08, "loss": 0.4236, "step": 13683 }, { "epoch": 0.97, "grad_norm": 1.6008895925771716, "learning_rate": 2.1966877777621077e-08, "loss": 0.5155, "step": 13684 }, { "epoch": 0.97, "grad_norm": 2.7898676850002455, "learning_rate": 2.185940739981851e-08, "loss": 0.4895, "step": 13685 }, { "epoch": 0.97, "grad_norm": 1.4206818297431292, "learning_rate": 2.1752199984270138e-08, "loss": 0.4627, "step": 13686 }, { "epoch": 0.97, "grad_norm": 1.7906074333581887, "learning_rate": 2.164525553663921e-08, "loss": 0.515, "step": 13687 }, { "epoch": 0.97, "grad_norm": 1.670554487378473, "learning_rate": 2.1538574062574534e-08, "loss": 0.5979, "step": 13688 }, { "epoch": 0.97, "grad_norm": 1.972688988628833, "learning_rate": 2.1432155567712166e-08, "loss": 0.4947, "step": 13689 }, { "epoch": 0.97, "grad_norm": 1.7769306894580654, "learning_rate": 2.1326000057672602e-08, "loss": 0.5451, "step": 13690 }, { "epoch": 0.97, "grad_norm": 1.7685589014128131, "learning_rate": 2.122010753806414e-08, "loss": 0.5544, "step": 13691 }, { "epoch": 0.97, "grad_norm": 1.7475246776912268, "learning_rate": 2.1114478014479522e-08, "loss": 0.5995, "step": 13692 }, { "epoch": 0.97, "grad_norm": 1.8923773704215427, "learning_rate": 2.1009111492499845e-08, "loss": 0.5089, "step": 13693 }, { "epoch": 0.97, "grad_norm": 1.983006889062014, "learning_rate": 2.090400797768899e-08, "loss": 0.5343, "step": 13694 }, { "epoch": 0.97, "grad_norm": 1.537101402374633, "learning_rate": 2.079916747560029e-08, "loss": 0.505, "step": 13695 }, { "epoch": 0.97, "grad_norm": 1.6560284380741874, "learning_rate": 2.069458999177154e-08, "loss": 0.5013, "step": 13696 }, { "epoch": 0.97, "grad_norm": 2.1106066738763447, "learning_rate": 2.0590275531726656e-08, "loss": 0.5415, "step": 13697 }, { "epoch": 0.97, "grad_norm": 2.1157674101543216, "learning_rate": 2.0486224100976228e-08, "loss": 0.504, "step": 13698 }, { "epoch": 0.97, "grad_norm": 1.7423651479687947, "learning_rate": 2.0382435705015856e-08, "loss": 0.5322, "step": 13699 }, { "epoch": 0.97, "grad_norm": 1.6354017143892279, "learning_rate": 2.027891034932894e-08, "loss": 0.513, "step": 13700 }, { "epoch": 0.97, "grad_norm": 1.7121944222048529, "learning_rate": 2.017564803938332e-08, "loss": 0.47, "step": 13701 }, { "epoch": 0.97, "grad_norm": 1.6820944982391355, "learning_rate": 2.0072648780634085e-08, "loss": 0.5193, "step": 13702 }, { "epoch": 0.97, "grad_norm": 2.2057795555035042, "learning_rate": 1.9969912578521324e-08, "loss": 0.5199, "step": 13703 }, { "epoch": 0.97, "grad_norm": 1.81671289194695, "learning_rate": 1.9867439438472914e-08, "loss": 0.5321, "step": 13704 }, { "epoch": 0.97, "grad_norm": 1.765450916375056, "learning_rate": 1.9765229365901195e-08, "loss": 0.5128, "step": 13705 }, { "epoch": 0.97, "grad_norm": 1.571612705940576, "learning_rate": 1.9663282366205737e-08, "loss": 0.5008, "step": 13706 }, { "epoch": 0.97, "grad_norm": 2.1366950233804265, "learning_rate": 1.9561598444771125e-08, "loss": 0.5923, "step": 13707 }, { "epoch": 0.97, "grad_norm": 2.447865014207708, "learning_rate": 1.946017760696861e-08, "loss": 0.5672, "step": 13708 }, { "epoch": 0.97, "grad_norm": 2.032117564033243, "learning_rate": 1.935901985815669e-08, "loss": 0.5649, "step": 13709 }, { "epoch": 0.97, "grad_norm": 1.6241708649943962, "learning_rate": 1.92581252036772e-08, "loss": 0.4436, "step": 13710 }, { "epoch": 0.97, "grad_norm": 0.6684570481476297, "learning_rate": 1.915749364886088e-08, "loss": 0.4359, "step": 13711 }, { "epoch": 0.97, "grad_norm": 1.6267207264791732, "learning_rate": 1.9057125199023474e-08, "loss": 0.4982, "step": 13712 }, { "epoch": 0.97, "grad_norm": 1.8452156479085255, "learning_rate": 1.895701985946574e-08, "loss": 0.5314, "step": 13713 }, { "epoch": 0.97, "grad_norm": 1.5888422535011206, "learning_rate": 1.8857177635476786e-08, "loss": 0.5659, "step": 13714 }, { "epoch": 0.97, "grad_norm": 1.8590122147672654, "learning_rate": 1.8757598532330167e-08, "loss": 0.5783, "step": 13715 }, { "epoch": 0.97, "grad_norm": 1.6007233057969275, "learning_rate": 1.865828255528612e-08, "loss": 0.5324, "step": 13716 }, { "epoch": 0.97, "grad_norm": 1.6651651929023643, "learning_rate": 1.8559229709589898e-08, "loss": 0.5501, "step": 13717 }, { "epoch": 0.97, "grad_norm": 2.374049367068231, "learning_rate": 1.846044000047509e-08, "loss": 0.5583, "step": 13718 }, { "epoch": 0.97, "grad_norm": 1.4363236054512196, "learning_rate": 1.836191343315974e-08, "loss": 0.4595, "step": 13719 }, { "epoch": 0.97, "grad_norm": 0.7756100392365685, "learning_rate": 1.8263650012848022e-08, "loss": 0.413, "step": 13720 }, { "epoch": 0.97, "grad_norm": 2.2459010448598997, "learning_rate": 1.8165649744730785e-08, "loss": 0.5276, "step": 13721 }, { "epoch": 0.97, "grad_norm": 1.620424034689471, "learning_rate": 1.8067912633984443e-08, "loss": 0.5051, "step": 13722 }, { "epoch": 0.97, "grad_norm": 1.7791096773686077, "learning_rate": 1.797043868577264e-08, "loss": 0.5145, "step": 13723 }, { "epoch": 0.97, "grad_norm": 2.1915913009589794, "learning_rate": 1.7873227905243483e-08, "loss": 0.5551, "step": 13724 }, { "epoch": 0.97, "grad_norm": 2.0244974723429805, "learning_rate": 1.7776280297531757e-08, "loss": 0.5235, "step": 13725 }, { "epoch": 0.97, "grad_norm": 1.7637481779396387, "learning_rate": 1.767959586775947e-08, "loss": 0.5693, "step": 13726 }, { "epoch": 0.97, "grad_norm": 1.9058907731768433, "learning_rate": 1.7583174621033094e-08, "loss": 0.5645, "step": 13727 }, { "epoch": 0.97, "grad_norm": 1.7514230814994545, "learning_rate": 1.7487016562446336e-08, "loss": 0.5098, "step": 13728 }, { "epoch": 0.97, "grad_norm": 1.7959362357364308, "learning_rate": 1.7391121697077906e-08, "loss": 0.5861, "step": 13729 }, { "epoch": 0.97, "grad_norm": 1.8874699203887477, "learning_rate": 1.729549002999431e-08, "loss": 0.5855, "step": 13730 }, { "epoch": 0.97, "grad_norm": 1.7585724877207944, "learning_rate": 1.720012156624651e-08, "loss": 0.5312, "step": 13731 }, { "epoch": 0.97, "grad_norm": 0.6764485153053917, "learning_rate": 1.7105016310872135e-08, "loss": 0.3926, "step": 13732 }, { "epoch": 0.97, "grad_norm": 1.740025805559989, "learning_rate": 1.7010174268895507e-08, "loss": 0.4328, "step": 13733 }, { "epoch": 0.97, "grad_norm": 1.6003553315143626, "learning_rate": 1.6915595445325948e-08, "loss": 0.5345, "step": 13734 }, { "epoch": 0.97, "grad_norm": 1.5939040396003548, "learning_rate": 1.682127984516002e-08, "loss": 0.5639, "step": 13735 }, { "epoch": 0.97, "grad_norm": 1.5684646424583175, "learning_rate": 1.6727227473378737e-08, "loss": 0.5371, "step": 13736 }, { "epoch": 0.97, "grad_norm": 1.612242701939781, "learning_rate": 1.6633438334951458e-08, "loss": 0.5254, "step": 13737 }, { "epoch": 0.97, "grad_norm": 1.6283067535066, "learning_rate": 1.653991243483144e-08, "loss": 0.5396, "step": 13738 }, { "epoch": 0.97, "grad_norm": 1.6102057212694685, "learning_rate": 1.6446649777959732e-08, "loss": 0.4473, "step": 13739 }, { "epoch": 0.98, "grad_norm": 1.6069983102856222, "learning_rate": 1.635365036926295e-08, "loss": 0.5575, "step": 13740 }, { "epoch": 0.98, "grad_norm": 1.6081285848299098, "learning_rate": 1.6260914213652723e-08, "loss": 0.4573, "step": 13741 }, { "epoch": 0.98, "grad_norm": 2.239849892582331, "learning_rate": 1.616844131602846e-08, "loss": 0.4946, "step": 13742 }, { "epoch": 0.98, "grad_norm": 0.6316326425812798, "learning_rate": 1.607623168127459e-08, "loss": 0.4129, "step": 13743 }, { "epoch": 0.98, "grad_norm": 1.6160584751180578, "learning_rate": 1.5984285314262214e-08, "loss": 0.547, "step": 13744 }, { "epoch": 0.98, "grad_norm": 1.7355194362348485, "learning_rate": 1.5892602219847452e-08, "loss": 0.4856, "step": 13745 }, { "epoch": 0.98, "grad_norm": 1.696070800839325, "learning_rate": 1.580118240287476e-08, "loss": 0.5461, "step": 13746 }, { "epoch": 0.98, "grad_norm": 0.6889008900892428, "learning_rate": 1.571002586817194e-08, "loss": 0.4076, "step": 13747 }, { "epoch": 0.98, "grad_norm": 1.6531855458470746, "learning_rate": 1.5619132620554034e-08, "loss": 0.5022, "step": 13748 }, { "epoch": 0.98, "grad_norm": 3.028031633821661, "learning_rate": 1.5528502664823865e-08, "loss": 0.5417, "step": 13749 }, { "epoch": 0.98, "grad_norm": 2.241664044279924, "learning_rate": 1.5438136005767602e-08, "loss": 0.5637, "step": 13750 }, { "epoch": 0.98, "grad_norm": 1.5859081770093417, "learning_rate": 1.5348032648159206e-08, "loss": 0.5727, "step": 13751 }, { "epoch": 0.98, "grad_norm": 2.142684694829177, "learning_rate": 1.5258192596757093e-08, "loss": 0.5505, "step": 13752 }, { "epoch": 0.98, "grad_norm": 1.7521103017933197, "learning_rate": 1.516861585630913e-08, "loss": 0.5308, "step": 13753 }, { "epoch": 0.98, "grad_norm": 1.7025776063386364, "learning_rate": 1.5079302431544873e-08, "loss": 0.5051, "step": 13754 }, { "epoch": 0.98, "grad_norm": 1.6441056454535075, "learning_rate": 1.499025232718332e-08, "loss": 0.4834, "step": 13755 }, { "epoch": 0.98, "grad_norm": 1.556282959281427, "learning_rate": 1.4901465547928483e-08, "loss": 0.4515, "step": 13756 }, { "epoch": 0.98, "grad_norm": 2.063678524679631, "learning_rate": 1.4812942098469396e-08, "loss": 0.5409, "step": 13757 }, { "epoch": 0.98, "grad_norm": 1.6124437472435278, "learning_rate": 1.4724681983483424e-08, "loss": 0.5618, "step": 13758 }, { "epoch": 0.98, "grad_norm": 1.5112333205867117, "learning_rate": 1.463668520763184e-08, "loss": 0.5119, "step": 13759 }, { "epoch": 0.98, "grad_norm": 1.8516740082344818, "learning_rate": 1.4548951775563703e-08, "loss": 0.5983, "step": 13760 }, { "epoch": 0.98, "grad_norm": 1.8769304485977745, "learning_rate": 1.4461481691912527e-08, "loss": 0.5311, "step": 13761 }, { "epoch": 0.98, "grad_norm": 1.8802250547184778, "learning_rate": 1.4374274961299062e-08, "loss": 0.5204, "step": 13762 }, { "epoch": 0.98, "grad_norm": 2.1710117584091067, "learning_rate": 1.4287331588330178e-08, "loss": 0.5149, "step": 13763 }, { "epoch": 0.98, "grad_norm": 2.091972570829777, "learning_rate": 1.4200651577598312e-08, "loss": 0.4941, "step": 13764 }, { "epoch": 0.98, "grad_norm": 1.6682376636246679, "learning_rate": 1.411423493368258e-08, "loss": 0.6043, "step": 13765 }, { "epoch": 0.98, "grad_norm": 2.067810273698281, "learning_rate": 1.4028081661147108e-08, "loss": 0.522, "step": 13766 }, { "epoch": 0.98, "grad_norm": 2.236525920051452, "learning_rate": 1.3942191764543255e-08, "loss": 0.5641, "step": 13767 }, { "epoch": 0.98, "grad_norm": 1.6859805640907688, "learning_rate": 1.385656524840795e-08, "loss": 0.5591, "step": 13768 }, { "epoch": 0.98, "grad_norm": 1.7206930308220043, "learning_rate": 1.3771202117264237e-08, "loss": 0.4978, "step": 13769 }, { "epoch": 0.98, "grad_norm": 1.6164930876480414, "learning_rate": 1.368610237562129e-08, "loss": 0.4979, "step": 13770 }, { "epoch": 0.98, "grad_norm": 1.6941596001084085, "learning_rate": 1.3601266027973848e-08, "loss": 0.5456, "step": 13771 }, { "epoch": 0.98, "grad_norm": 1.8742478864970795, "learning_rate": 1.3516693078804432e-08, "loss": 0.5161, "step": 13772 }, { "epoch": 0.98, "grad_norm": 1.6533949546834341, "learning_rate": 1.3432383532579474e-08, "loss": 0.5342, "step": 13773 }, { "epoch": 0.98, "grad_norm": 1.697619091249961, "learning_rate": 1.334833739375263e-08, "loss": 0.5169, "step": 13774 }, { "epoch": 0.98, "grad_norm": 1.6500453690624288, "learning_rate": 1.3264554666763685e-08, "loss": 0.5578, "step": 13775 }, { "epoch": 0.98, "grad_norm": 1.6164268654748593, "learning_rate": 1.318103535603854e-08, "loss": 0.5356, "step": 13776 }, { "epoch": 0.98, "grad_norm": 1.693258800491067, "learning_rate": 1.3097779465989225e-08, "loss": 0.5789, "step": 13777 }, { "epoch": 0.98, "grad_norm": 0.7027221266563234, "learning_rate": 1.3014787001012219e-08, "loss": 0.4478, "step": 13778 }, { "epoch": 0.98, "grad_norm": 1.871834528035164, "learning_rate": 1.2932057965492905e-08, "loss": 0.605, "step": 13779 }, { "epoch": 0.98, "grad_norm": 1.567817924211742, "learning_rate": 1.2849592363801122e-08, "loss": 0.516, "step": 13780 }, { "epoch": 0.98, "grad_norm": 1.8229754701452192, "learning_rate": 1.2767390200292274e-08, "loss": 0.4755, "step": 13781 }, { "epoch": 0.98, "grad_norm": 0.7349779189445561, "learning_rate": 1.2685451479308442e-08, "loss": 0.4146, "step": 13782 }, { "epoch": 0.98, "grad_norm": 1.6602550782095997, "learning_rate": 1.2603776205178941e-08, "loss": 0.5004, "step": 13783 }, { "epoch": 0.98, "grad_norm": 1.6212497271851576, "learning_rate": 1.2522364382217544e-08, "loss": 0.4948, "step": 13784 }, { "epoch": 0.98, "grad_norm": 2.3892271486532097, "learning_rate": 1.24412160147247e-08, "loss": 0.5719, "step": 13785 }, { "epoch": 0.98, "grad_norm": 1.7540980975702138, "learning_rate": 1.2360331106986979e-08, "loss": 0.5109, "step": 13786 }, { "epoch": 0.98, "grad_norm": 1.740212610434419, "learning_rate": 1.2279709663277073e-08, "loss": 0.4735, "step": 13787 }, { "epoch": 0.98, "grad_norm": 1.7820749849831707, "learning_rate": 1.21993516878538e-08, "loss": 0.4826, "step": 13788 }, { "epoch": 0.98, "grad_norm": 0.7766033991733574, "learning_rate": 1.2119257184960986e-08, "loss": 0.4345, "step": 13789 }, { "epoch": 0.98, "grad_norm": 2.737395370239475, "learning_rate": 1.2039426158830802e-08, "loss": 0.4403, "step": 13790 }, { "epoch": 0.98, "grad_norm": 1.8670576133552697, "learning_rate": 1.1959858613679875e-08, "loss": 0.5055, "step": 13791 }, { "epoch": 0.98, "grad_norm": 1.6188737783961151, "learning_rate": 1.18805545537104e-08, "loss": 0.5188, "step": 13792 }, { "epoch": 0.98, "grad_norm": 2.022036712738585, "learning_rate": 1.1801513983112356e-08, "loss": 0.5604, "step": 13793 }, { "epoch": 0.98, "grad_norm": 1.5197650742101925, "learning_rate": 1.1722736906060738e-08, "loss": 0.4555, "step": 13794 }, { "epoch": 0.98, "grad_norm": 1.821502823667854, "learning_rate": 1.1644223326716663e-08, "loss": 0.4335, "step": 13795 }, { "epoch": 0.98, "grad_norm": 1.9506144089178716, "learning_rate": 1.1565973249227924e-08, "loss": 0.4837, "step": 13796 }, { "epoch": 0.98, "grad_norm": 2.0832846143893833, "learning_rate": 1.1487986677727326e-08, "loss": 0.4801, "step": 13797 }, { "epoch": 0.98, "grad_norm": 1.9942276950610516, "learning_rate": 1.141026361633435e-08, "loss": 0.5418, "step": 13798 }, { "epoch": 0.98, "grad_norm": 2.219015473775589, "learning_rate": 1.1332804069155156e-08, "loss": 0.5032, "step": 13799 }, { "epoch": 0.98, "grad_norm": 1.7275231383627365, "learning_rate": 1.1255608040281473e-08, "loss": 0.5419, "step": 13800 }, { "epoch": 0.98, "grad_norm": 1.7300486822606524, "learning_rate": 1.117867553379004e-08, "loss": 0.5255, "step": 13801 }, { "epoch": 0.98, "grad_norm": 1.6940142992592748, "learning_rate": 1.1102006553745936e-08, "loss": 0.5652, "step": 13802 }, { "epoch": 0.98, "grad_norm": 1.8182172858358334, "learning_rate": 1.1025601104198702e-08, "loss": 0.5749, "step": 13803 }, { "epoch": 0.98, "grad_norm": 1.7914581734291233, "learning_rate": 1.0949459189183442e-08, "loss": 0.452, "step": 13804 }, { "epoch": 0.98, "grad_norm": 1.9040933566529674, "learning_rate": 1.0873580812723605e-08, "loss": 0.5357, "step": 13805 }, { "epoch": 0.98, "grad_norm": 1.7187671095964874, "learning_rate": 1.0797965978826541e-08, "loss": 0.5223, "step": 13806 }, { "epoch": 0.98, "grad_norm": 0.6170376708699634, "learning_rate": 1.0722614691486832e-08, "loss": 0.4004, "step": 13807 }, { "epoch": 0.98, "grad_norm": 1.7387855870329176, "learning_rate": 1.0647526954684073e-08, "loss": 0.5659, "step": 13808 }, { "epoch": 0.98, "grad_norm": 1.8794910765745756, "learning_rate": 1.0572702772385645e-08, "loss": 0.5344, "step": 13809 }, { "epoch": 0.98, "grad_norm": 2.446154487956777, "learning_rate": 1.0498142148543388e-08, "loss": 0.5256, "step": 13810 }, { "epoch": 0.98, "grad_norm": 1.4398546129368928, "learning_rate": 1.0423845087095818e-08, "loss": 0.4269, "step": 13811 }, { "epoch": 0.98, "grad_norm": 1.7924797899392848, "learning_rate": 1.0349811591967573e-08, "loss": 0.5177, "step": 13812 }, { "epoch": 0.98, "grad_norm": 0.7202270786782777, "learning_rate": 1.0276041667069968e-08, "loss": 0.4169, "step": 13813 }, { "epoch": 0.98, "grad_norm": 1.708925492199874, "learning_rate": 1.0202535316299334e-08, "loss": 0.481, "step": 13814 }, { "epoch": 0.98, "grad_norm": 1.5330440619883268, "learning_rate": 1.0129292543538115e-08, "loss": 0.5065, "step": 13815 }, { "epoch": 0.98, "grad_norm": 1.7823191972984158, "learning_rate": 1.0056313352656e-08, "loss": 0.4949, "step": 13816 }, { "epoch": 0.98, "grad_norm": 2.6583394898306665, "learning_rate": 9.983597747507679e-09, "loss": 0.4923, "step": 13817 }, { "epoch": 0.98, "grad_norm": 1.609245772614628, "learning_rate": 9.911145731934524e-09, "loss": 0.521, "step": 13818 }, { "epoch": 0.98, "grad_norm": 1.665619734142658, "learning_rate": 9.838957309762365e-09, "loss": 0.4996, "step": 13819 }, { "epoch": 0.98, "grad_norm": 1.8601877603949746, "learning_rate": 9.767032484806482e-09, "loss": 0.5017, "step": 13820 }, { "epoch": 0.98, "grad_norm": 1.9462619207328242, "learning_rate": 9.695371260864394e-09, "loss": 0.5226, "step": 13821 }, { "epoch": 0.98, "grad_norm": 1.5327915404748742, "learning_rate": 9.623973641723072e-09, "loss": 0.4599, "step": 13822 }, { "epoch": 0.98, "grad_norm": 1.640183305775688, "learning_rate": 9.552839631152277e-09, "loss": 0.4661, "step": 13823 }, { "epoch": 0.98, "grad_norm": 1.546458784191896, "learning_rate": 9.481969232911226e-09, "loss": 0.5599, "step": 13824 }, { "epoch": 0.98, "grad_norm": 1.7515434366211393, "learning_rate": 9.411362450742479e-09, "loss": 0.4771, "step": 13825 }, { "epoch": 0.98, "grad_norm": 1.6868595488614635, "learning_rate": 9.34101928837583e-09, "loss": 0.485, "step": 13826 }, { "epoch": 0.98, "grad_norm": 2.033065196525684, "learning_rate": 9.270939749527197e-09, "loss": 0.5659, "step": 13827 }, { "epoch": 0.98, "grad_norm": 1.7708692838640576, "learning_rate": 9.201123837898063e-09, "loss": 0.478, "step": 13828 }, { "epoch": 0.98, "grad_norm": 3.1758656021872973, "learning_rate": 9.131571557177144e-09, "loss": 0.5817, "step": 13829 }, { "epoch": 0.98, "grad_norm": 0.7473160868376661, "learning_rate": 9.062282911038167e-09, "loss": 0.444, "step": 13830 }, { "epoch": 0.98, "grad_norm": 1.4330398745562019, "learning_rate": 8.993257903140984e-09, "loss": 0.4856, "step": 13831 }, { "epoch": 0.98, "grad_norm": 1.6292561653557966, "learning_rate": 8.924496537131566e-09, "loss": 0.5085, "step": 13832 }, { "epoch": 0.98, "grad_norm": 1.8082917433884704, "learning_rate": 8.855998816642008e-09, "loss": 0.4301, "step": 13833 }, { "epoch": 0.98, "grad_norm": 2.002856632619029, "learning_rate": 8.787764745291638e-09, "loss": 0.4838, "step": 13834 }, { "epoch": 0.98, "grad_norm": 0.6759347153878912, "learning_rate": 8.719794326683128e-09, "loss": 0.4177, "step": 13835 }, { "epoch": 0.98, "grad_norm": 2.2721507454970955, "learning_rate": 8.652087564408607e-09, "loss": 0.4889, "step": 13836 }, { "epoch": 0.98, "grad_norm": 1.5929412683874062, "learning_rate": 8.584644462043545e-09, "loss": 0.4582, "step": 13837 }, { "epoch": 0.98, "grad_norm": 1.7897137741009035, "learning_rate": 8.51746502315065e-09, "loss": 0.581, "step": 13838 }, { "epoch": 0.98, "grad_norm": 1.7996306555655746, "learning_rate": 8.450549251279306e-09, "loss": 0.5695, "step": 13839 }, { "epoch": 0.98, "grad_norm": 1.5273339002060116, "learning_rate": 8.383897149962794e-09, "loss": 0.4999, "step": 13840 }, { "epoch": 0.98, "grad_norm": 1.5750512914346113, "learning_rate": 8.3175087227233e-09, "loss": 0.4606, "step": 13841 }, { "epoch": 0.98, "grad_norm": 1.7219578697266564, "learning_rate": 8.251383973066907e-09, "loss": 0.5405, "step": 13842 }, { "epoch": 0.98, "grad_norm": 2.1452607296480357, "learning_rate": 8.185522904486932e-09, "loss": 0.4852, "step": 13843 }, { "epoch": 0.98, "grad_norm": 1.8451043425648626, "learning_rate": 8.119925520462257e-09, "loss": 0.5452, "step": 13844 }, { "epoch": 0.98, "grad_norm": 1.6680890690164312, "learning_rate": 8.054591824457892e-09, "loss": 0.5474, "step": 13845 }, { "epoch": 0.98, "grad_norm": 1.4726365788283373, "learning_rate": 7.989521819924406e-09, "loss": 0.4801, "step": 13846 }, { "epoch": 0.98, "grad_norm": 1.6871755353559854, "learning_rate": 7.924715510300162e-09, "loss": 0.5784, "step": 13847 }, { "epoch": 0.98, "grad_norm": 1.677132360115063, "learning_rate": 7.860172899007978e-09, "loss": 0.5126, "step": 13848 }, { "epoch": 0.98, "grad_norm": 2.3084543466001453, "learning_rate": 7.795893989456792e-09, "loss": 0.5274, "step": 13849 }, { "epoch": 0.98, "grad_norm": 1.6855330125619785, "learning_rate": 7.731878785042778e-09, "loss": 0.5167, "step": 13850 }, { "epoch": 0.98, "grad_norm": 0.6613322886448719, "learning_rate": 7.668127289147121e-09, "loss": 0.4073, "step": 13851 }, { "epoch": 0.98, "grad_norm": 1.917431623101421, "learning_rate": 7.604639505136568e-09, "loss": 0.4905, "step": 13852 }, { "epoch": 0.98, "grad_norm": 1.855896296893096, "learning_rate": 7.541415436366218e-09, "loss": 0.5275, "step": 13853 }, { "epoch": 0.98, "grad_norm": 1.6817356830093526, "learning_rate": 7.478455086174507e-09, "loss": 0.4919, "step": 13854 }, { "epoch": 0.98, "grad_norm": 1.8443558426653477, "learning_rate": 7.4157584578882226e-09, "loss": 0.482, "step": 13855 }, { "epoch": 0.98, "grad_norm": 1.9091181551897176, "learning_rate": 7.353325554818603e-09, "loss": 0.4888, "step": 13856 }, { "epoch": 0.98, "grad_norm": 0.6500583303331111, "learning_rate": 7.291156380264119e-09, "loss": 0.4362, "step": 13857 }, { "epoch": 0.98, "grad_norm": 1.835000603367823, "learning_rate": 7.229250937507704e-09, "loss": 0.5278, "step": 13858 }, { "epoch": 0.98, "grad_norm": 1.6899015645233655, "learning_rate": 7.167609229820627e-09, "loss": 0.5693, "step": 13859 }, { "epoch": 0.98, "grad_norm": 0.6499629607362998, "learning_rate": 7.106231260458063e-09, "loss": 0.4197, "step": 13860 }, { "epoch": 0.98, "grad_norm": 2.4721431021543365, "learning_rate": 7.045117032662419e-09, "loss": 0.4928, "step": 13861 }, { "epoch": 0.98, "grad_norm": 5.33074914229362, "learning_rate": 6.984266549662777e-09, "loss": 0.506, "step": 13862 }, { "epoch": 0.98, "grad_norm": 1.9376528020824142, "learning_rate": 6.923679814672124e-09, "loss": 0.5119, "step": 13863 }, { "epoch": 0.98, "grad_norm": 1.6870020684056182, "learning_rate": 6.8633568308917875e-09, "loss": 0.4815, "step": 13864 }, { "epoch": 0.98, "grad_norm": 1.5241698549444123, "learning_rate": 6.803297601508108e-09, "loss": 0.5783, "step": 13865 }, { "epoch": 0.98, "grad_norm": 1.6623960127782504, "learning_rate": 6.7435021296935464e-09, "loss": 0.4282, "step": 13866 }, { "epoch": 0.98, "grad_norm": 2.280188502268781, "learning_rate": 6.6839704186066885e-09, "loss": 0.5094, "step": 13867 }, { "epoch": 0.98, "grad_norm": 1.8100413830189652, "learning_rate": 6.6247024713922415e-09, "loss": 0.5275, "step": 13868 }, { "epoch": 0.98, "grad_norm": 1.6687045949892414, "learning_rate": 6.5656982911810326e-09, "loss": 0.4763, "step": 13869 }, { "epoch": 0.98, "grad_norm": 0.6463604907448521, "learning_rate": 6.50695788108946e-09, "loss": 0.4314, "step": 13870 }, { "epoch": 0.98, "grad_norm": 2.2368923096007935, "learning_rate": 6.4484812442205946e-09, "loss": 0.6394, "step": 13871 }, { "epoch": 0.98, "grad_norm": 2.217003591847839, "learning_rate": 6.390268383663079e-09, "loss": 0.508, "step": 13872 }, { "epoch": 0.98, "grad_norm": 2.1030559816349905, "learning_rate": 6.332319302492784e-09, "loss": 0.4828, "step": 13873 }, { "epoch": 0.98, "grad_norm": 2.708979611688709, "learning_rate": 6.274634003770042e-09, "loss": 0.5032, "step": 13874 }, { "epoch": 0.98, "grad_norm": 1.5336497619764464, "learning_rate": 6.2172124905418575e-09, "loss": 0.5191, "step": 13875 }, { "epoch": 0.98, "grad_norm": 1.6554497310084306, "learning_rate": 6.160054765842471e-09, "loss": 0.5257, "step": 13876 }, { "epoch": 0.98, "grad_norm": 2.506916971894586, "learning_rate": 6.103160832690025e-09, "loss": 0.5493, "step": 13877 }, { "epoch": 0.98, "grad_norm": 1.9838467567041875, "learning_rate": 6.046530694090447e-09, "loss": 0.5102, "step": 13878 }, { "epoch": 0.98, "grad_norm": 2.342547441041773, "learning_rate": 5.990164353034678e-09, "loss": 0.5465, "step": 13879 }, { "epoch": 0.98, "grad_norm": 1.7426838837865224, "learning_rate": 5.9340618125003355e-09, "loss": 0.51, "step": 13880 }, { "epoch": 0.99, "grad_norm": 1.7736237171131768, "learning_rate": 5.878223075451162e-09, "loss": 0.4988, "step": 13881 }, { "epoch": 0.99, "grad_norm": 1.5247088289740156, "learning_rate": 5.822648144837018e-09, "loss": 0.4897, "step": 13882 }, { "epoch": 0.99, "grad_norm": 1.5616947752876682, "learning_rate": 5.767337023592778e-09, "loss": 0.4589, "step": 13883 }, { "epoch": 0.99, "grad_norm": 1.5685325850546339, "learning_rate": 5.712289714640551e-09, "loss": 0.5455, "step": 13884 }, { "epoch": 0.99, "grad_norm": 2.9541701217059058, "learning_rate": 5.657506220888564e-09, "loss": 0.461, "step": 13885 }, { "epoch": 0.99, "grad_norm": 2.6722364587580145, "learning_rate": 5.602986545229505e-09, "loss": 0.4726, "step": 13886 }, { "epoch": 0.99, "grad_norm": 1.7392507931891652, "learning_rate": 5.5487306905444016e-09, "loss": 0.5541, "step": 13887 }, { "epoch": 0.99, "grad_norm": 0.652687566116949, "learning_rate": 5.494738659699295e-09, "loss": 0.4252, "step": 13888 }, { "epoch": 0.99, "grad_norm": 1.8258839186846314, "learning_rate": 5.441010455545237e-09, "loss": 0.5279, "step": 13889 }, { "epoch": 0.99, "grad_norm": 2.0291273625170705, "learning_rate": 5.3875460809210685e-09, "loss": 0.5974, "step": 13890 }, { "epoch": 0.99, "grad_norm": 2.2429992029608097, "learning_rate": 5.334345538650643e-09, "loss": 0.4241, "step": 13891 }, { "epoch": 0.99, "grad_norm": 1.672379074001293, "learning_rate": 5.281408831544488e-09, "loss": 0.5423, "step": 13892 }, { "epoch": 0.99, "grad_norm": 1.5476875423475707, "learning_rate": 5.2287359623987014e-09, "loss": 0.4868, "step": 13893 }, { "epoch": 0.99, "grad_norm": 1.6087314362824645, "learning_rate": 5.176326933995501e-09, "loss": 0.4923, "step": 13894 }, { "epoch": 0.99, "grad_norm": 1.6457920747713095, "learning_rate": 5.124181749103785e-09, "loss": 0.4647, "step": 13895 }, { "epoch": 0.99, "grad_norm": 2.071348628878063, "learning_rate": 5.07230041047746e-09, "loss": 0.5072, "step": 13896 }, { "epoch": 0.99, "grad_norm": 1.5088108342926303, "learning_rate": 5.020682920857667e-09, "loss": 0.4582, "step": 13897 }, { "epoch": 0.99, "grad_norm": 1.9823024993310459, "learning_rate": 4.9693292829705585e-09, "loss": 0.535, "step": 13898 }, { "epoch": 0.99, "grad_norm": 4.984130235873916, "learning_rate": 4.918239499528965e-09, "loss": 0.5306, "step": 13899 }, { "epoch": 0.99, "grad_norm": 1.6703747466500334, "learning_rate": 4.867413573231283e-09, "loss": 0.5435, "step": 13900 }, { "epoch": 0.99, "grad_norm": 1.9087075778644362, "learning_rate": 4.816851506763143e-09, "loss": 0.5171, "step": 13901 }, { "epoch": 0.99, "grad_norm": 1.5545800476244913, "learning_rate": 4.76655330279463e-09, "loss": 0.5583, "step": 13902 }, { "epoch": 0.99, "grad_norm": 1.7249920327573967, "learning_rate": 4.716518963983063e-09, "loss": 0.52, "step": 13903 }, { "epoch": 0.99, "grad_norm": 1.5068479306904612, "learning_rate": 4.6667484929713295e-09, "loss": 0.5283, "step": 13904 }, { "epoch": 0.99, "grad_norm": 1.6898277712666616, "learning_rate": 4.617241892387881e-09, "loss": 0.5209, "step": 13905 }, { "epoch": 0.99, "grad_norm": 2.8169903292066514, "learning_rate": 4.567999164848957e-09, "loss": 0.5395, "step": 13906 }, { "epoch": 0.99, "grad_norm": 1.6725218175966918, "learning_rate": 4.519020312955258e-09, "loss": 0.5558, "step": 13907 }, { "epoch": 0.99, "grad_norm": 2.2013663746533934, "learning_rate": 4.470305339293601e-09, "loss": 0.5089, "step": 13908 }, { "epoch": 0.99, "grad_norm": 1.7488237631330972, "learning_rate": 4.421854246437485e-09, "loss": 0.4673, "step": 13909 }, { "epoch": 0.99, "grad_norm": 2.1380639737179044, "learning_rate": 4.373667036946527e-09, "loss": 0.5407, "step": 13910 }, { "epoch": 0.99, "grad_norm": 1.9801680822509038, "learning_rate": 4.3257437133659156e-09, "loss": 0.5244, "step": 13911 }, { "epoch": 0.99, "grad_norm": 1.696714752220602, "learning_rate": 4.278084278227513e-09, "loss": 0.5448, "step": 13912 }, { "epoch": 0.99, "grad_norm": 1.8332027599914975, "learning_rate": 4.230688734048194e-09, "loss": 0.5161, "step": 13913 }, { "epoch": 0.99, "grad_norm": 1.6788906232196072, "learning_rate": 4.183557083331513e-09, "loss": 0.4956, "step": 13914 }, { "epoch": 0.99, "grad_norm": 2.665141528603961, "learning_rate": 4.136689328568255e-09, "loss": 0.5346, "step": 13915 }, { "epoch": 0.99, "grad_norm": 2.06264423543389, "learning_rate": 4.090085472232552e-09, "loss": 0.4728, "step": 13916 }, { "epoch": 0.99, "grad_norm": 1.7265083359904163, "learning_rate": 4.043745516787434e-09, "loss": 0.5551, "step": 13917 }, { "epoch": 0.99, "grad_norm": 3.7093988881454796, "learning_rate": 3.997669464680387e-09, "loss": 0.5144, "step": 13918 }, { "epoch": 0.99, "grad_norm": 9.420997339837395, "learning_rate": 3.95185731834502e-09, "loss": 0.4495, "step": 13919 }, { "epoch": 0.99, "grad_norm": 2.1353348231993943, "learning_rate": 3.90630908020162e-09, "loss": 0.517, "step": 13920 }, { "epoch": 0.99, "grad_norm": 1.8497608698838504, "learning_rate": 3.86102475265604e-09, "loss": 0.5592, "step": 13921 }, { "epoch": 0.99, "grad_norm": 0.7697526741814533, "learning_rate": 3.816004338100254e-09, "loss": 0.4347, "step": 13922 }, { "epoch": 0.99, "grad_norm": 1.9174488541550874, "learning_rate": 3.771247838912362e-09, "loss": 0.5354, "step": 13923 }, { "epoch": 0.99, "grad_norm": 1.741001757147235, "learning_rate": 3.726755257457137e-09, "loss": 0.6207, "step": 13924 }, { "epoch": 0.99, "grad_norm": 2.20594735805295, "learning_rate": 3.682526596083813e-09, "loss": 0.5806, "step": 13925 }, { "epoch": 0.99, "grad_norm": 2.6758119495167874, "learning_rate": 3.6385618571294077e-09, "loss": 0.5309, "step": 13926 }, { "epoch": 0.99, "grad_norm": 1.7117890791853878, "learning_rate": 3.5948610429165088e-09, "loss": 0.581, "step": 13927 }, { "epoch": 0.99, "grad_norm": 1.7659112498408214, "learning_rate": 3.5514241557532693e-09, "loss": 0.502, "step": 13928 }, { "epoch": 0.99, "grad_norm": 1.7278242861119757, "learning_rate": 3.50825119793341e-09, "loss": 0.5354, "step": 13929 }, { "epoch": 0.99, "grad_norm": 1.69051871516047, "learning_rate": 3.4653421717384395e-09, "loss": 0.5052, "step": 13930 }, { "epoch": 0.99, "grad_norm": 1.895585066326675, "learning_rate": 3.422697079434878e-09, "loss": 0.45, "step": 13931 }, { "epoch": 0.99, "grad_norm": 5.990333237591098, "learning_rate": 3.380315923275368e-09, "loss": 0.5424, "step": 13932 }, { "epoch": 0.99, "grad_norm": 1.4989741606332263, "learning_rate": 3.338198705498119e-09, "loss": 0.4121, "step": 13933 }, { "epoch": 0.99, "grad_norm": 1.7664706485841073, "learning_rate": 3.2963454283280184e-09, "loss": 0.5436, "step": 13934 }, { "epoch": 0.99, "grad_norm": 1.7137145571715726, "learning_rate": 3.2547560939760746e-09, "loss": 0.4697, "step": 13935 }, { "epoch": 0.99, "grad_norm": 1.5980731602798788, "learning_rate": 3.21343070463942e-09, "loss": 0.5161, "step": 13936 }, { "epoch": 0.99, "grad_norm": 1.8148072184784318, "learning_rate": 3.1723692625007518e-09, "loss": 0.4773, "step": 13937 }, { "epoch": 0.99, "grad_norm": 0.7024169774295151, "learning_rate": 3.1315717697294466e-09, "loss": 0.402, "step": 13938 }, { "epoch": 0.99, "grad_norm": 1.7203502808674962, "learning_rate": 3.091038228479892e-09, "loss": 0.5037, "step": 13939 }, { "epoch": 0.99, "grad_norm": 1.6770002337895986, "learning_rate": 3.0507686408931536e-09, "loss": 0.5337, "step": 13940 }, { "epoch": 0.99, "grad_norm": 1.5845993816901094, "learning_rate": 3.010763009097528e-09, "loss": 0.5347, "step": 13941 }, { "epoch": 0.99, "grad_norm": 1.6534109644248134, "learning_rate": 2.9710213352052154e-09, "loss": 0.481, "step": 13942 }, { "epoch": 0.99, "grad_norm": 1.9495968329948787, "learning_rate": 2.931543621315647e-09, "loss": 0.5444, "step": 13943 }, { "epoch": 0.99, "grad_norm": 4.010129273437819, "learning_rate": 2.8923298695143764e-09, "loss": 0.5241, "step": 13944 }, { "epoch": 0.99, "grad_norm": 1.788717360800131, "learning_rate": 2.8533800818730805e-09, "loss": 0.4817, "step": 13945 }, { "epoch": 0.99, "grad_norm": 1.551542736081716, "learning_rate": 2.814694260448447e-09, "loss": 0.4421, "step": 13946 }, { "epoch": 0.99, "grad_norm": 2.1062601593786265, "learning_rate": 2.7762724072843972e-09, "loss": 0.4576, "step": 13947 }, { "epoch": 0.99, "grad_norm": 0.7015696800274323, "learning_rate": 2.738114524410973e-09, "loss": 0.4453, "step": 13948 }, { "epoch": 0.99, "grad_norm": 1.965126031220617, "learning_rate": 2.7002206138432296e-09, "loss": 0.5834, "step": 13949 }, { "epoch": 0.99, "grad_norm": 1.8977511939332101, "learning_rate": 2.662590677582899e-09, "loss": 0.5022, "step": 13950 }, { "epoch": 0.99, "grad_norm": 1.5666078402058337, "learning_rate": 2.6252247176172807e-09, "loss": 0.4664, "step": 13951 }, { "epoch": 0.99, "grad_norm": 2.4062609051236463, "learning_rate": 2.5881227359214612e-09, "loss": 0.4835, "step": 13952 }, { "epoch": 0.99, "grad_norm": 2.4216141545247125, "learning_rate": 2.551284734454429e-09, "loss": 0.4991, "step": 13953 }, { "epoch": 0.99, "grad_norm": 1.71318315457788, "learning_rate": 2.514710715162405e-09, "loss": 0.5112, "step": 13954 }, { "epoch": 0.99, "grad_norm": 1.8297195138566402, "learning_rate": 2.4784006799766224e-09, "loss": 0.5685, "step": 13955 }, { "epoch": 0.99, "grad_norm": 1.4968817884785155, "learning_rate": 2.442354630816102e-09, "loss": 0.4958, "step": 13956 }, { "epoch": 0.99, "grad_norm": 1.60279085261076, "learning_rate": 2.4065725695837647e-09, "loss": 0.5448, "step": 13957 }, { "epoch": 0.99, "grad_norm": 1.6974740872588794, "learning_rate": 2.3710544981708772e-09, "loss": 0.4998, "step": 13958 }, { "epoch": 0.99, "grad_norm": 1.832489573617952, "learning_rate": 2.3358004184531602e-09, "loss": 0.5252, "step": 13959 }, { "epoch": 0.99, "grad_norm": 1.666269563885567, "learning_rate": 2.300810332293013e-09, "loss": 0.5564, "step": 13960 }, { "epoch": 0.99, "grad_norm": 1.6095229705332432, "learning_rate": 2.266084241538402e-09, "loss": 0.4968, "step": 13961 }, { "epoch": 0.99, "grad_norm": 1.7083493239825653, "learning_rate": 2.2316221480239706e-09, "loss": 0.5291, "step": 13962 }, { "epoch": 0.99, "grad_norm": 2.1015288880355527, "learning_rate": 2.1974240535699296e-09, "loss": 0.5109, "step": 13963 }, { "epoch": 0.99, "grad_norm": 1.8501412192309126, "learning_rate": 2.163489959982612e-09, "loss": 0.5253, "step": 13964 }, { "epoch": 0.99, "grad_norm": 1.5778749058636505, "learning_rate": 2.1298198690550277e-09, "loss": 0.5046, "step": 13965 }, { "epoch": 0.99, "grad_norm": 1.819524662966565, "learning_rate": 2.096413782565754e-09, "loss": 0.5485, "step": 13966 }, { "epoch": 0.99, "grad_norm": 2.2237160586068887, "learning_rate": 2.06327170227838e-09, "loss": 0.5106, "step": 13967 }, { "epoch": 0.99, "grad_norm": 1.8314724983876818, "learning_rate": 2.030393629944838e-09, "loss": 0.5859, "step": 13968 }, { "epoch": 0.99, "grad_norm": 4.15877215545157, "learning_rate": 1.9977795673009614e-09, "loss": 0.5743, "step": 13969 }, { "epoch": 0.99, "grad_norm": 1.7485228537003954, "learning_rate": 1.9654295160703716e-09, "loss": 0.5464, "step": 13970 }, { "epoch": 0.99, "grad_norm": 1.7011822626352269, "learning_rate": 1.933343477961147e-09, "loss": 0.547, "step": 13971 }, { "epoch": 0.99, "grad_norm": 1.7645272523558688, "learning_rate": 1.9015214546685978e-09, "loss": 0.5819, "step": 13972 }, { "epoch": 0.99, "grad_norm": 1.6697872382637506, "learning_rate": 1.869963447873602e-09, "loss": 0.5491, "step": 13973 }, { "epoch": 0.99, "grad_norm": 1.4313479062996486, "learning_rate": 1.8386694592426035e-09, "loss": 0.4906, "step": 13974 }, { "epoch": 0.99, "grad_norm": 0.6871352763517582, "learning_rate": 1.8076394904298355e-09, "loss": 0.4467, "step": 13975 }, { "epoch": 0.99, "grad_norm": 2.038175941409779, "learning_rate": 1.7768735430734319e-09, "loss": 0.5831, "step": 13976 }, { "epoch": 0.99, "grad_norm": 1.982093277656842, "learning_rate": 1.746371618798759e-09, "loss": 0.5693, "step": 13977 }, { "epoch": 0.99, "grad_norm": 1.460749766586072, "learning_rate": 1.7161337192173055e-09, "loss": 0.4956, "step": 13978 }, { "epoch": 0.99, "grad_norm": 1.8070354015241525, "learning_rate": 1.6861598459261275e-09, "loss": 0.4441, "step": 13979 }, { "epoch": 0.99, "grad_norm": 1.6922967097137218, "learning_rate": 1.6564500005084028e-09, "loss": 0.5516, "step": 13980 }, { "epoch": 0.99, "grad_norm": 1.7263917473992119, "learning_rate": 1.6270041845339867e-09, "loss": 0.4878, "step": 13981 }, { "epoch": 0.99, "grad_norm": 1.7660840459601508, "learning_rate": 1.597822399557747e-09, "loss": 0.5831, "step": 13982 }, { "epoch": 0.99, "grad_norm": 2.095193440996543, "learning_rate": 1.5689046471217827e-09, "loss": 0.5569, "step": 13983 }, { "epoch": 0.99, "grad_norm": 1.7220821767688, "learning_rate": 1.5402509287532063e-09, "loss": 0.5406, "step": 13984 }, { "epoch": 0.99, "grad_norm": 1.812011646755183, "learning_rate": 1.5118612459652516e-09, "loss": 0.5099, "step": 13985 }, { "epoch": 0.99, "grad_norm": 1.6141820431343143, "learning_rate": 1.4837356002583847e-09, "loss": 0.4825, "step": 13986 }, { "epoch": 0.99, "grad_norm": 0.6708826398290908, "learning_rate": 1.4558739931175292e-09, "loss": 0.4345, "step": 13987 }, { "epoch": 0.99, "grad_norm": 0.7701728061624483, "learning_rate": 1.4282764260148407e-09, "loss": 0.4017, "step": 13988 }, { "epoch": 0.99, "grad_norm": 1.772943082822712, "learning_rate": 1.4009429004085973e-09, "loss": 0.4635, "step": 13989 }, { "epoch": 0.99, "grad_norm": 1.9465925656932277, "learning_rate": 1.3738734177415335e-09, "loss": 0.4903, "step": 13990 }, { "epoch": 0.99, "grad_norm": 8.354655856995636, "learning_rate": 1.3470679794441721e-09, "loss": 0.4462, "step": 13991 }, { "epoch": 0.99, "grad_norm": 1.4804488290996796, "learning_rate": 1.3205265869326022e-09, "loss": 0.5656, "step": 13992 }, { "epoch": 0.99, "grad_norm": 1.6857169140227601, "learning_rate": 1.2942492416090357e-09, "loss": 0.4849, "step": 13993 }, { "epoch": 0.99, "grad_norm": 1.5167864934891746, "learning_rate": 1.2682359448606962e-09, "loss": 0.5682, "step": 13994 }, { "epoch": 0.99, "grad_norm": 2.570461044097694, "learning_rate": 1.2424866980620398e-09, "loss": 0.5096, "step": 13995 }, { "epoch": 0.99, "grad_norm": 1.9851574796917155, "learning_rate": 1.2170015025736448e-09, "loss": 0.5452, "step": 13996 }, { "epoch": 0.99, "grad_norm": 1.8432076841773941, "learning_rate": 1.1917803597411015e-09, "loss": 0.4942, "step": 13997 }, { "epoch": 0.99, "grad_norm": 1.8307737376337636, "learning_rate": 1.1668232708972327e-09, "loss": 0.55, "step": 13998 }, { "epoch": 0.99, "grad_norm": 1.626935979482044, "learning_rate": 1.142130237360428e-09, "loss": 0.5679, "step": 13999 }, { "epoch": 0.99, "grad_norm": 2.0746799200345234, "learning_rate": 1.1177012604340897e-09, "loss": 0.5214, "step": 14000 }, { "epoch": 0.99, "grad_norm": 1.5044714612318666, "learning_rate": 1.093536341409407e-09, "loss": 0.4765, "step": 14001 }, { "epoch": 0.99, "grad_norm": 1.8191842593159564, "learning_rate": 1.069635481563136e-09, "loss": 0.4962, "step": 14002 }, { "epoch": 0.99, "grad_norm": 2.3438972914403795, "learning_rate": 1.0459986821570455e-09, "loss": 0.5304, "step": 14003 }, { "epoch": 0.99, "grad_norm": 2.079169482822846, "learning_rate": 1.0226259444401365e-09, "loss": 0.4888, "step": 14004 }, { "epoch": 0.99, "grad_norm": 2.0814805982754025, "learning_rate": 9.995172696475318e-10, "loss": 0.5042, "step": 14005 }, { "epoch": 0.99, "grad_norm": 1.5929471243458846, "learning_rate": 9.766726589988118e-10, "loss": 0.5144, "step": 14006 }, { "epoch": 0.99, "grad_norm": 1.7345748920472417, "learning_rate": 9.540921137013438e-10, "loss": 0.5688, "step": 14007 }, { "epoch": 0.99, "grad_norm": 1.6687922514862206, "learning_rate": 9.31775634947507e-10, "loss": 0.5232, "step": 14008 }, { "epoch": 0.99, "grad_norm": 1.7899400163578012, "learning_rate": 9.09723223916914e-10, "loss": 0.5053, "step": 14009 }, { "epoch": 0.99, "grad_norm": 1.6676304753767186, "learning_rate": 8.879348817736333e-10, "loss": 0.5607, "step": 14010 }, { "epoch": 0.99, "grad_norm": 1.5481319237365292, "learning_rate": 8.664106096689662e-10, "loss": 0.5251, "step": 14011 }, { "epoch": 0.99, "grad_norm": 1.7660253000527921, "learning_rate": 8.451504087403362e-10, "loss": 0.4758, "step": 14012 }, { "epoch": 0.99, "grad_norm": 1.4472550218715898, "learning_rate": 8.241542801096236e-10, "loss": 0.4407, "step": 14013 }, { "epoch": 0.99, "grad_norm": 0.6484917153509575, "learning_rate": 8.034222248870516e-10, "loss": 0.3993, "step": 14014 }, { "epoch": 0.99, "grad_norm": 1.801084926177681, "learning_rate": 7.829542441672999e-10, "loss": 0.4762, "step": 14015 }, { "epoch": 0.99, "grad_norm": 1.5949156531313347, "learning_rate": 7.627503390311707e-10, "loss": 0.5015, "step": 14016 }, { "epoch": 0.99, "grad_norm": 1.9214136663714037, "learning_rate": 7.428105105466987e-10, "loss": 0.5853, "step": 14017 }, { "epoch": 0.99, "grad_norm": 2.11614414038328, "learning_rate": 7.231347597669303e-10, "loss": 0.5556, "step": 14018 }, { "epoch": 0.99, "grad_norm": 1.9136872693995517, "learning_rate": 7.037230877304791e-10, "loss": 0.483, "step": 14019 }, { "epoch": 0.99, "grad_norm": 1.6428634765830783, "learning_rate": 6.845754954637462e-10, "loss": 0.5477, "step": 14020 }, { "epoch": 0.99, "grad_norm": 1.603031871162165, "learning_rate": 6.656919839775899e-10, "loss": 0.4921, "step": 14021 }, { "epoch": 1.0, "grad_norm": 0.7836345747264397, "learning_rate": 6.470725542695455e-10, "loss": 0.4219, "step": 14022 }, { "epoch": 1.0, "grad_norm": 1.6719921562129527, "learning_rate": 6.287172073232706e-10, "loss": 0.5283, "step": 14023 }, { "epoch": 1.0, "grad_norm": 1.9625170399960061, "learning_rate": 6.106259441085449e-10, "loss": 0.5028, "step": 14024 }, { "epoch": 1.0, "grad_norm": 1.780084880351303, "learning_rate": 5.927987655801603e-10, "loss": 0.5443, "step": 14025 }, { "epoch": 1.0, "grad_norm": 1.944805460023378, "learning_rate": 5.752356726812514e-10, "loss": 0.6018, "step": 14026 }, { "epoch": 1.0, "grad_norm": 1.8659361151212914, "learning_rate": 5.57936666338299e-10, "loss": 0.557, "step": 14027 }, { "epoch": 1.0, "grad_norm": 1.6311114993047828, "learning_rate": 5.40901747465572e-10, "loss": 0.5586, "step": 14028 }, { "epoch": 1.0, "grad_norm": 1.5908724331481414, "learning_rate": 5.241309169634612e-10, "loss": 0.4566, "step": 14029 }, { "epoch": 1.0, "grad_norm": 1.6432804514463457, "learning_rate": 5.076241757168143e-10, "loss": 0.517, "step": 14030 }, { "epoch": 1.0, "grad_norm": 2.113234072828806, "learning_rate": 4.913815245977117e-10, "loss": 0.547, "step": 14031 }, { "epoch": 1.0, "grad_norm": 2.083481131375769, "learning_rate": 4.754029644649105e-10, "loss": 0.5233, "step": 14032 }, { "epoch": 1.0, "grad_norm": 1.5728150560074747, "learning_rate": 4.596884961621806e-10, "loss": 0.5207, "step": 14033 }, { "epoch": 1.0, "grad_norm": 1.5666707792536556, "learning_rate": 4.442381205188584e-10, "loss": 0.542, "step": 14034 }, { "epoch": 1.0, "grad_norm": 2.391347631703284, "learning_rate": 4.290518383520681e-10, "loss": 0.4411, "step": 14035 }, { "epoch": 1.0, "grad_norm": 3.8580915530930837, "learning_rate": 4.1412965046394584e-10, "loss": 0.5392, "step": 14036 }, { "epoch": 1.0, "grad_norm": 0.6764842922152625, "learning_rate": 3.994715576421948e-10, "loss": 0.4546, "step": 14037 }, { "epoch": 1.0, "grad_norm": 1.829444286521338, "learning_rate": 3.850775606611956e-10, "loss": 0.5479, "step": 14038 }, { "epoch": 1.0, "grad_norm": 1.600367140098887, "learning_rate": 3.70947660281451e-10, "loss": 0.4736, "step": 14039 }, { "epoch": 1.0, "grad_norm": 1.6036436643465142, "learning_rate": 3.570818572490309e-10, "loss": 0.4995, "step": 14040 }, { "epoch": 1.0, "grad_norm": 1.6785783624893744, "learning_rate": 3.434801522966824e-10, "loss": 0.526, "step": 14041 }, { "epoch": 1.0, "grad_norm": 2.538593086652002, "learning_rate": 3.30142546143275e-10, "loss": 0.52, "step": 14042 }, { "epoch": 1.0, "grad_norm": 2.0055212667516527, "learning_rate": 3.1706903949269006e-10, "loss": 0.4835, "step": 14043 }, { "epoch": 1.0, "grad_norm": 1.5828870526811387, "learning_rate": 3.0425963303604143e-10, "loss": 0.4987, "step": 14044 }, { "epoch": 1.0, "grad_norm": 1.6482968313308326, "learning_rate": 2.9171432744945494e-10, "loss": 0.4801, "step": 14045 }, { "epoch": 1.0, "grad_norm": 1.6819540731450964, "learning_rate": 2.794331233957337e-10, "loss": 0.4334, "step": 14046 }, { "epoch": 1.0, "grad_norm": 1.7548608087235462, "learning_rate": 2.6741602152380307e-10, "loss": 0.4917, "step": 14047 }, { "epoch": 1.0, "grad_norm": 1.4693252108480692, "learning_rate": 2.5566302246815556e-10, "loss": 0.4757, "step": 14048 }, { "epoch": 1.0, "grad_norm": 1.6482174010241917, "learning_rate": 2.4417412684996087e-10, "loss": 0.5317, "step": 14049 }, { "epoch": 1.0, "grad_norm": 2.1973443884683697, "learning_rate": 2.32949335275956e-10, "loss": 0.5958, "step": 14050 }, { "epoch": 1.0, "grad_norm": 1.7171854302664875, "learning_rate": 2.2198864833955503e-10, "loss": 0.568, "step": 14051 }, { "epoch": 1.0, "grad_norm": 1.6311906211724032, "learning_rate": 2.1129206661862911e-10, "loss": 0.4749, "step": 14052 }, { "epoch": 1.0, "grad_norm": 1.7532043405981916, "learning_rate": 2.0085959067939198e-10, "loss": 0.5268, "step": 14053 }, { "epoch": 1.0, "grad_norm": 1.5394554284054034, "learning_rate": 1.9069122107195915e-10, "loss": 0.5074, "step": 14054 }, { "epoch": 1.0, "grad_norm": 1.7525939719136379, "learning_rate": 1.8078695833423364e-10, "loss": 0.5618, "step": 14055 }, { "epoch": 1.0, "grad_norm": 3.142876594191645, "learning_rate": 1.7114680298857544e-10, "loss": 0.4394, "step": 14056 }, { "epoch": 1.0, "grad_norm": 1.8426101628565785, "learning_rate": 1.61770755545132e-10, "loss": 0.5479, "step": 14057 }, { "epoch": 1.0, "grad_norm": 1.958393493658163, "learning_rate": 1.5265881649850767e-10, "loss": 0.5332, "step": 14058 }, { "epoch": 1.0, "grad_norm": 1.5706728300954715, "learning_rate": 1.4381098633053926e-10, "loss": 0.5291, "step": 14059 }, { "epoch": 1.0, "grad_norm": 1.8135497633598439, "learning_rate": 1.352272655075204e-10, "loss": 0.5576, "step": 14060 }, { "epoch": 1.0, "grad_norm": 1.5976135754572138, "learning_rate": 1.269076544846426e-10, "loss": 0.5368, "step": 14061 }, { "epoch": 1.0, "grad_norm": 1.89458883691368, "learning_rate": 1.188521536998888e-10, "loss": 0.5673, "step": 14062 }, { "epoch": 1.0, "grad_norm": 1.6235713373143799, "learning_rate": 1.110607635790295e-10, "loss": 0.5327, "step": 14063 }, { "epoch": 1.0, "grad_norm": 1.7708324529793058, "learning_rate": 1.035334845339575e-10, "loss": 0.5186, "step": 14064 }, { "epoch": 1.0, "grad_norm": 1.653001111268248, "learning_rate": 9.627031696268773e-11, "loss": 0.5313, "step": 14065 }, { "epoch": 1.0, "grad_norm": 2.210710247897176, "learning_rate": 8.927126124824714e-11, "loss": 0.4716, "step": 14066 }, { "epoch": 1.0, "grad_norm": 0.6571520616949684, "learning_rate": 8.253631776033999e-11, "loss": 0.4173, "step": 14067 }, { "epoch": 1.0, "grad_norm": 1.6636133675495723, "learning_rate": 7.606548685479276e-11, "loss": 0.534, "step": 14068 }, { "epoch": 1.0, "grad_norm": 1.5453879746003851, "learning_rate": 6.985876887355413e-11, "loss": 0.4572, "step": 14069 }, { "epoch": 1.0, "grad_norm": 2.547377051744731, "learning_rate": 6.391616414469504e-11, "loss": 0.5853, "step": 14070 }, { "epoch": 1.0, "grad_norm": 1.7580175269455187, "learning_rate": 5.823767298185346e-11, "loss": 0.5022, "step": 14071 }, { "epoch": 1.0, "grad_norm": 1.7052096978189406, "learning_rate": 5.282329568478961e-11, "loss": 0.5011, "step": 14072 }, { "epoch": 1.0, "grad_norm": 1.7261080841364274, "learning_rate": 4.767303253994105e-11, "loss": 0.5588, "step": 14073 }, { "epoch": 1.0, "grad_norm": 0.6907828754992204, "learning_rate": 4.278688381875728e-11, "loss": 0.4066, "step": 14074 }, { "epoch": 1.0, "grad_norm": 2.7724862846500646, "learning_rate": 3.816484978047541e-11, "loss": 0.5217, "step": 14075 }, { "epoch": 1.0, "grad_norm": 0.6620951611880568, "learning_rate": 3.380693066767915e-11, "loss": 0.4241, "step": 14076 }, { "epoch": 1.0, "grad_norm": 1.5700137204719178, "learning_rate": 2.971312671185001e-11, "loss": 0.4925, "step": 14077 }, { "epoch": 1.0, "grad_norm": 1.6862164612767983, "learning_rate": 2.5883438128926354e-11, "loss": 0.5114, "step": 14078 }, { "epoch": 1.0, "grad_norm": 1.5462663339023022, "learning_rate": 2.2317865120968785e-11, "loss": 0.5178, "step": 14079 }, { "epoch": 1.0, "grad_norm": 1.7378250059874554, "learning_rate": 1.9016407876160102e-11, "loss": 0.4925, "step": 14080 }, { "epoch": 1.0, "grad_norm": 0.691874180294237, "learning_rate": 1.5979066569360435e-11, "loss": 0.4342, "step": 14081 }, { "epoch": 1.0, "grad_norm": 2.08339024644167, "learning_rate": 1.3205841360441896e-11, "loss": 0.4438, "step": 14082 }, { "epoch": 1.0, "grad_norm": 2.06953941839617, "learning_rate": 1.0696732396509036e-11, "loss": 0.5204, "step": 14083 }, { "epoch": 1.0, "grad_norm": 1.6668875675980066, "learning_rate": 8.451739810233505e-12, "loss": 0.5328, "step": 14084 }, { "epoch": 1.0, "grad_norm": 2.3720684453129035, "learning_rate": 6.4708637192989475e-12, "loss": 0.5081, "step": 14085 }, { "epoch": 1.0, "grad_norm": 3.1296512908248424, "learning_rate": 4.7541042291765485e-12, "loss": 0.5259, "step": 14086 }, { "epoch": 1.0, "grad_norm": 1.572778221995818, "learning_rate": 3.301461430349484e-12, "loss": 0.458, "step": 14087 }, { "epoch": 1.0, "grad_norm": 1.8555207340843716, "learning_rate": 2.1129353988680323e-12, "loss": 0.4669, "step": 14088 }, { "epoch": 1.0, "grad_norm": 1.7386741818807192, "learning_rate": 1.188526198570017e-12, "loss": 0.5212, "step": 14089 }, { "epoch": 1.0, "grad_norm": 2.7436704099250657, "learning_rate": 5.282338777501394e-13, "loss": 0.5915, "step": 14090 }, { "epoch": 1.0, "grad_norm": 1.9236723627453942, "learning_rate": 1.3205847138042515e-13, "loss": 0.525, "step": 14091 }, { "epoch": 1.0, "grad_norm": 1.2388009305235081, "learning_rate": 0.0, "loss": 0.4138, "step": 14092 }, { "epoch": 1.0, "step": 14092, "total_flos": 4090201459113984.0, "train_loss": 0.5389912225498484, "train_runtime": 197525.1262, "train_samples_per_second": 13.698, "train_steps_per_second": 0.071 } ], "logging_steps": 1.0, "max_steps": 14092, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "total_flos": 4090201459113984.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }