{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.999878419452887, "eval_steps": 514, "global_step": 10280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00048632218844984804, "grad_norm": 0.31342887076158477, "learning_rate": 5.000000000000001e-07, "loss": 0.7161, "step": 1 }, { "epoch": 0.00048632218844984804, "eval_loss": 0.7662228345870972, "eval_runtime": 104.4915, "eval_samples_per_second": 290.483, "eval_steps_per_second": 36.319, "step": 1 }, { "epoch": 0.0009726443768996961, "grad_norm": 0.334158515730528, "learning_rate": 1.0000000000000002e-06, "loss": 0.7407, "step": 2 }, { "epoch": 0.001458966565349544, "grad_norm": 0.31676767269315387, "learning_rate": 1.5e-06, "loss": 0.7369, "step": 3 }, { "epoch": 0.0019452887537993921, "grad_norm": 0.326261473546936, "learning_rate": 2.0000000000000003e-06, "loss": 0.7904, "step": 4 }, { "epoch": 0.0024316109422492403, "grad_norm": 0.3264110392881449, "learning_rate": 2.5e-06, "loss": 0.8081, "step": 5 }, { "epoch": 0.002917933130699088, "grad_norm": 0.29499153993503074, "learning_rate": 3e-06, "loss": 0.7963, "step": 6 }, { "epoch": 0.003404255319148936, "grad_norm": 0.3391905332281365, "learning_rate": 3.5e-06, "loss": 0.7669, "step": 7 }, { "epoch": 0.0038905775075987843, "grad_norm": 0.29078806642007365, "learning_rate": 4.000000000000001e-06, "loss": 0.7713, "step": 8 }, { "epoch": 0.004376899696048632, "grad_norm": 0.28565706294072274, "learning_rate": 4.5e-06, "loss": 0.7614, "step": 9 }, { "epoch": 0.004863221884498481, "grad_norm": 0.1905001372976004, "learning_rate": 5e-06, "loss": 0.7764, "step": 10 }, { "epoch": 0.005349544072948328, "grad_norm": 0.19436409340543703, "learning_rate": 5.500000000000001e-06, "loss": 0.7218, "step": 11 }, { "epoch": 0.005835866261398176, "grad_norm": 0.18034219716480748, "learning_rate": 6e-06, "loss": 0.6978, "step": 12 }, { "epoch": 0.006322188449848025, "grad_norm": 0.16176764190446236, "learning_rate": 6.5000000000000004e-06, "loss": 0.7673, "step": 13 }, { "epoch": 0.006808510638297872, "grad_norm": 0.1840359774691012, "learning_rate": 7e-06, "loss": 0.7289, "step": 14 }, { "epoch": 0.00729483282674772, "grad_norm": 0.19452307183227896, "learning_rate": 7.500000000000001e-06, "loss": 0.6935, "step": 15 }, { "epoch": 0.007781155015197569, "grad_norm": 0.18147032815544542, "learning_rate": 8.000000000000001e-06, "loss": 0.7184, "step": 16 }, { "epoch": 0.008267477203647417, "grad_norm": 0.14921988529574418, "learning_rate": 8.5e-06, "loss": 0.7208, "step": 17 }, { "epoch": 0.008753799392097264, "grad_norm": 0.13370296313161623, "learning_rate": 9e-06, "loss": 0.6778, "step": 18 }, { "epoch": 0.009240121580547113, "grad_norm": 0.14257941486401873, "learning_rate": 9.5e-06, "loss": 0.6935, "step": 19 }, { "epoch": 0.009726443768996961, "grad_norm": 0.13681987045190647, "learning_rate": 1e-05, "loss": 0.641, "step": 20 }, { "epoch": 0.010212765957446808, "grad_norm": 0.14011042403475713, "learning_rate": 9.99999963340339e-06, "loss": 0.6986, "step": 21 }, { "epoch": 0.010699088145896657, "grad_norm": 0.12723168311084196, "learning_rate": 9.999998533613611e-06, "loss": 0.6693, "step": 22 }, { "epoch": 0.011185410334346505, "grad_norm": 0.13487668537848563, "learning_rate": 9.999996700630827e-06, "loss": 0.7116, "step": 23 }, { "epoch": 0.011671732522796352, "grad_norm": 0.14091846193535895, "learning_rate": 9.999994134455306e-06, "loss": 0.7058, "step": 24 }, { "epoch": 0.0121580547112462, "grad_norm": 0.14196587593318416, "learning_rate": 9.999990835087423e-06, "loss": 0.6628, "step": 25 }, { "epoch": 0.01264437689969605, "grad_norm": 0.1303053296835844, "learning_rate": 9.999986802527664e-06, "loss": 0.654, "step": 26 }, { "epoch": 0.013130699088145896, "grad_norm": 0.12532311300629753, "learning_rate": 9.999982036776617e-06, "loss": 0.6745, "step": 27 }, { "epoch": 0.013617021276595745, "grad_norm": 0.12361891832951762, "learning_rate": 9.999976537834983e-06, "loss": 0.7084, "step": 28 }, { "epoch": 0.014103343465045593, "grad_norm": 0.12688676168891824, "learning_rate": 9.99997030570357e-06, "loss": 0.7141, "step": 29 }, { "epoch": 0.01458966565349544, "grad_norm": 0.1225998310166146, "learning_rate": 9.999963340383288e-06, "loss": 0.6663, "step": 30 }, { "epoch": 0.015075987841945289, "grad_norm": 0.12170075854441824, "learning_rate": 9.999955641875162e-06, "loss": 0.6965, "step": 31 }, { "epoch": 0.015562310030395137, "grad_norm": 0.1221017771612701, "learning_rate": 9.999947210180319e-06, "loss": 0.7278, "step": 32 }, { "epoch": 0.016048632218844984, "grad_norm": 0.1235116174205995, "learning_rate": 9.999938045299996e-06, "loss": 0.7131, "step": 33 }, { "epoch": 0.016534954407294834, "grad_norm": 0.12123547248834239, "learning_rate": 9.999928147235536e-06, "loss": 0.6628, "step": 34 }, { "epoch": 0.01702127659574468, "grad_norm": 0.1209956452505472, "learning_rate": 9.99991751598839e-06, "loss": 0.6924, "step": 35 }, { "epoch": 0.017507598784194528, "grad_norm": 0.12174816723148756, "learning_rate": 9.999906151560122e-06, "loss": 0.6386, "step": 36 }, { "epoch": 0.01799392097264438, "grad_norm": 0.12194164456847331, "learning_rate": 9.999894053952391e-06, "loss": 0.6598, "step": 37 }, { "epoch": 0.018480243161094225, "grad_norm": 0.12109765845671047, "learning_rate": 9.999881223166976e-06, "loss": 0.6792, "step": 38 }, { "epoch": 0.018966565349544072, "grad_norm": 0.11665211787806205, "learning_rate": 9.999867659205758e-06, "loss": 0.7378, "step": 39 }, { "epoch": 0.019452887537993922, "grad_norm": 0.12184656948093012, "learning_rate": 9.999853362070724e-06, "loss": 0.7125, "step": 40 }, { "epoch": 0.01993920972644377, "grad_norm": 0.1234263613036688, "learning_rate": 9.99983833176397e-06, "loss": 0.6814, "step": 41 }, { "epoch": 0.020425531914893616, "grad_norm": 0.11468002752463782, "learning_rate": 9.999822568287703e-06, "loss": 0.6636, "step": 42 }, { "epoch": 0.020911854103343466, "grad_norm": 0.11842038191755005, "learning_rate": 9.999806071644234e-06, "loss": 0.6381, "step": 43 }, { "epoch": 0.021398176291793313, "grad_norm": 0.1144864666549832, "learning_rate": 9.999788841835981e-06, "loss": 0.6846, "step": 44 }, { "epoch": 0.02188449848024316, "grad_norm": 0.11904181909619377, "learning_rate": 9.999770878865469e-06, "loss": 0.6435, "step": 45 }, { "epoch": 0.02237082066869301, "grad_norm": 0.12234067261973151, "learning_rate": 9.999752182735335e-06, "loss": 0.7094, "step": 46 }, { "epoch": 0.022857142857142857, "grad_norm": 0.11794427564135766, "learning_rate": 9.999732753448318e-06, "loss": 0.6986, "step": 47 }, { "epoch": 0.023343465045592704, "grad_norm": 0.12072167177150002, "learning_rate": 9.99971259100727e-06, "loss": 0.6604, "step": 48 }, { "epoch": 0.023829787234042554, "grad_norm": 0.11996908580390374, "learning_rate": 9.999691695415146e-06, "loss": 0.6842, "step": 49 }, { "epoch": 0.0243161094224924, "grad_norm": 0.12186788928863322, "learning_rate": 9.99967006667501e-06, "loss": 0.6848, "step": 50 }, { "epoch": 0.024802431610942248, "grad_norm": 0.1179648173315135, "learning_rate": 9.999647704790032e-06, "loss": 0.6662, "step": 51 }, { "epoch": 0.0252887537993921, "grad_norm": 0.12178429099697184, "learning_rate": 9.999624609763495e-06, "loss": 0.7101, "step": 52 }, { "epoch": 0.025775075987841945, "grad_norm": 0.12246757168040684, "learning_rate": 9.999600781598783e-06, "loss": 0.7063, "step": 53 }, { "epoch": 0.026261398176291792, "grad_norm": 0.1229181946656624, "learning_rate": 9.99957622029939e-06, "loss": 0.6585, "step": 54 }, { "epoch": 0.026747720364741642, "grad_norm": 0.12636155475725114, "learning_rate": 9.999550925868919e-06, "loss": 0.6953, "step": 55 }, { "epoch": 0.02723404255319149, "grad_norm": 0.12015041575765832, "learning_rate": 9.999524898311077e-06, "loss": 0.6971, "step": 56 }, { "epoch": 0.027720364741641336, "grad_norm": 0.11512943579786336, "learning_rate": 9.999498137629684e-06, "loss": 0.6552, "step": 57 }, { "epoch": 0.028206686930091186, "grad_norm": 0.1213401086498426, "learning_rate": 9.999470643828662e-06, "loss": 0.6479, "step": 58 }, { "epoch": 0.028693009118541033, "grad_norm": 0.12091003596412728, "learning_rate": 9.99944241691204e-06, "loss": 0.6516, "step": 59 }, { "epoch": 0.02917933130699088, "grad_norm": 0.12373118381179596, "learning_rate": 9.999413456883963e-06, "loss": 0.6657, "step": 60 }, { "epoch": 0.02966565349544073, "grad_norm": 0.12080391465640891, "learning_rate": 9.999383763748673e-06, "loss": 0.6375, "step": 61 }, { "epoch": 0.030151975683890577, "grad_norm": 0.11849737888049058, "learning_rate": 9.999353337510526e-06, "loss": 0.6786, "step": 62 }, { "epoch": 0.030638297872340424, "grad_norm": 0.11963326720084462, "learning_rate": 9.999322178173985e-06, "loss": 0.6483, "step": 63 }, { "epoch": 0.031124620060790274, "grad_norm": 0.11698726914446085, "learning_rate": 9.999290285743617e-06, "loss": 0.6765, "step": 64 }, { "epoch": 0.031610942249240125, "grad_norm": 0.11522309731346195, "learning_rate": 9.999257660224098e-06, "loss": 0.6514, "step": 65 }, { "epoch": 0.03209726443768997, "grad_norm": 0.12142778040603465, "learning_rate": 9.999224301620214e-06, "loss": 0.6607, "step": 66 }, { "epoch": 0.03258358662613982, "grad_norm": 0.11833572736427499, "learning_rate": 9.999190209936857e-06, "loss": 0.659, "step": 67 }, { "epoch": 0.03306990881458967, "grad_norm": 0.12175472926875927, "learning_rate": 9.999155385179025e-06, "loss": 0.6543, "step": 68 }, { "epoch": 0.03355623100303951, "grad_norm": 0.11724310675083763, "learning_rate": 9.999119827351824e-06, "loss": 0.69, "step": 69 }, { "epoch": 0.03404255319148936, "grad_norm": 0.11899996574595607, "learning_rate": 9.99908353646047e-06, "loss": 0.6891, "step": 70 }, { "epoch": 0.03452887537993921, "grad_norm": 0.11978458478743302, "learning_rate": 9.999046512510284e-06, "loss": 0.6669, "step": 71 }, { "epoch": 0.035015197568389056, "grad_norm": 0.12777701402284913, "learning_rate": 9.999008755506694e-06, "loss": 0.6619, "step": 72 }, { "epoch": 0.035501519756838906, "grad_norm": 0.12196178571987971, "learning_rate": 9.998970265455238e-06, "loss": 0.645, "step": 73 }, { "epoch": 0.03598784194528876, "grad_norm": 0.11813820328725896, "learning_rate": 9.99893104236156e-06, "loss": 0.6387, "step": 74 }, { "epoch": 0.0364741641337386, "grad_norm": 0.12029825188155972, "learning_rate": 9.99889108623141e-06, "loss": 0.7012, "step": 75 }, { "epoch": 0.03696048632218845, "grad_norm": 0.11227692399158924, "learning_rate": 9.99885039707065e-06, "loss": 0.6643, "step": 76 }, { "epoch": 0.0374468085106383, "grad_norm": 0.11308607110289605, "learning_rate": 9.998808974885244e-06, "loss": 0.6366, "step": 77 }, { "epoch": 0.037933130699088144, "grad_norm": 0.11722293546518353, "learning_rate": 9.998766819681268e-06, "loss": 0.6785, "step": 78 }, { "epoch": 0.038419452887537994, "grad_norm": 0.11778874372386479, "learning_rate": 9.9987239314649e-06, "loss": 0.6631, "step": 79 }, { "epoch": 0.038905775075987845, "grad_norm": 0.11393109271304705, "learning_rate": 9.998680310242434e-06, "loss": 0.6447, "step": 80 }, { "epoch": 0.03939209726443769, "grad_norm": 0.11352105585660828, "learning_rate": 9.998635956020263e-06, "loss": 0.6953, "step": 81 }, { "epoch": 0.03987841945288754, "grad_norm": 0.11492422904092232, "learning_rate": 9.998590868804895e-06, "loss": 0.6497, "step": 82 }, { "epoch": 0.04036474164133739, "grad_norm": 0.11216849249930264, "learning_rate": 9.998545048602938e-06, "loss": 0.5993, "step": 83 }, { "epoch": 0.04085106382978723, "grad_norm": 0.11208333477392295, "learning_rate": 9.99849849542111e-06, "loss": 0.6423, "step": 84 }, { "epoch": 0.04133738601823708, "grad_norm": 0.11004452394150985, "learning_rate": 9.99845120926624e-06, "loss": 0.6814, "step": 85 }, { "epoch": 0.04182370820668693, "grad_norm": 0.1093933685979544, "learning_rate": 9.99840319014526e-06, "loss": 0.6342, "step": 86 }, { "epoch": 0.042310030395136776, "grad_norm": 0.10475010595542357, "learning_rate": 9.998354438065215e-06, "loss": 0.6252, "step": 87 }, { "epoch": 0.042796352583586626, "grad_norm": 0.10849079308705853, "learning_rate": 9.99830495303325e-06, "loss": 0.6482, "step": 88 }, { "epoch": 0.04328267477203648, "grad_norm": 0.1109051843507213, "learning_rate": 9.998254735056624e-06, "loss": 0.6181, "step": 89 }, { "epoch": 0.04376899696048632, "grad_norm": 0.10347472557390267, "learning_rate": 9.998203784142701e-06, "loss": 0.6496, "step": 90 }, { "epoch": 0.04425531914893617, "grad_norm": 0.1023494893362925, "learning_rate": 9.998152100298952e-06, "loss": 0.6889, "step": 91 }, { "epoch": 0.04474164133738602, "grad_norm": 0.10128090895450541, "learning_rate": 9.998099683532953e-06, "loss": 0.6375, "step": 92 }, { "epoch": 0.045227963525835864, "grad_norm": 0.1010149046358367, "learning_rate": 9.998046533852395e-06, "loss": 0.666, "step": 93 }, { "epoch": 0.045714285714285714, "grad_norm": 0.09733547430095113, "learning_rate": 9.997992651265067e-06, "loss": 0.6296, "step": 94 }, { "epoch": 0.046200607902735565, "grad_norm": 0.09621884123455722, "learning_rate": 9.997938035778874e-06, "loss": 0.638, "step": 95 }, { "epoch": 0.04668693009118541, "grad_norm": 0.09502640263983121, "learning_rate": 9.997882687401823e-06, "loss": 0.6465, "step": 96 }, { "epoch": 0.04717325227963526, "grad_norm": 0.09792477904150157, "learning_rate": 9.997826606142031e-06, "loss": 0.6196, "step": 97 }, { "epoch": 0.04765957446808511, "grad_norm": 0.09964761338523928, "learning_rate": 9.997769792007721e-06, "loss": 0.6903, "step": 98 }, { "epoch": 0.04814589665653495, "grad_norm": 0.09468446698771453, "learning_rate": 9.997712245007225e-06, "loss": 0.6601, "step": 99 }, { "epoch": 0.0486322188449848, "grad_norm": 0.09135140026417463, "learning_rate": 9.997653965148978e-06, "loss": 0.6785, "step": 100 }, { "epoch": 0.04911854103343465, "grad_norm": 0.09305247116557831, "learning_rate": 9.997594952441533e-06, "loss": 0.6497, "step": 101 }, { "epoch": 0.049604863221884496, "grad_norm": 0.08954055344921939, "learning_rate": 9.997535206893538e-06, "loss": 0.6684, "step": 102 }, { "epoch": 0.050091185410334346, "grad_norm": 0.09406151851787839, "learning_rate": 9.997474728513757e-06, "loss": 0.6902, "step": 103 }, { "epoch": 0.0505775075987842, "grad_norm": 0.08570960033414718, "learning_rate": 9.997413517311055e-06, "loss": 0.6376, "step": 104 }, { "epoch": 0.05106382978723404, "grad_norm": 0.0838484001210435, "learning_rate": 9.997351573294412e-06, "loss": 0.631, "step": 105 }, { "epoch": 0.05155015197568389, "grad_norm": 0.0842468989312265, "learning_rate": 9.997288896472907e-06, "loss": 0.619, "step": 106 }, { "epoch": 0.05203647416413374, "grad_norm": 0.09010940006399906, "learning_rate": 9.997225486855735e-06, "loss": 0.6707, "step": 107 }, { "epoch": 0.052522796352583584, "grad_norm": 0.0808816638089893, "learning_rate": 9.997161344452194e-06, "loss": 0.648, "step": 108 }, { "epoch": 0.053009118541033434, "grad_norm": 0.08337249166573397, "learning_rate": 9.997096469271686e-06, "loss": 0.6594, "step": 109 }, { "epoch": 0.053495440729483285, "grad_norm": 0.08747506870175674, "learning_rate": 9.997030861323728e-06, "loss": 0.6706, "step": 110 }, { "epoch": 0.05398176291793313, "grad_norm": 0.0798971427458053, "learning_rate": 9.996964520617938e-06, "loss": 0.6515, "step": 111 }, { "epoch": 0.05446808510638298, "grad_norm": 0.09064111340606204, "learning_rate": 9.996897447164047e-06, "loss": 0.6413, "step": 112 }, { "epoch": 0.05495440729483283, "grad_norm": 0.08853314144771497, "learning_rate": 9.996829640971888e-06, "loss": 0.6524, "step": 113 }, { "epoch": 0.05544072948328267, "grad_norm": 0.07849763044568041, "learning_rate": 9.996761102051404e-06, "loss": 0.6399, "step": 114 }, { "epoch": 0.05592705167173252, "grad_norm": 0.07796705560689386, "learning_rate": 9.996691830412649e-06, "loss": 0.6382, "step": 115 }, { "epoch": 0.05641337386018237, "grad_norm": 0.0798110601596513, "learning_rate": 9.996621826065776e-06, "loss": 0.6282, "step": 116 }, { "epoch": 0.056899696048632216, "grad_norm": 0.08267540446214865, "learning_rate": 9.996551089021051e-06, "loss": 0.6714, "step": 117 }, { "epoch": 0.057386018237082066, "grad_norm": 0.07829609873617312, "learning_rate": 9.996479619288853e-06, "loss": 0.6554, "step": 118 }, { "epoch": 0.05787234042553192, "grad_norm": 0.08075387885281121, "learning_rate": 9.996407416879654e-06, "loss": 0.6747, "step": 119 }, { "epoch": 0.05835866261398176, "grad_norm": 0.08397011690681794, "learning_rate": 9.996334481804047e-06, "loss": 0.6902, "step": 120 }, { "epoch": 0.05884498480243161, "grad_norm": 0.08116164351180236, "learning_rate": 9.996260814072725e-06, "loss": 0.6744, "step": 121 }, { "epoch": 0.05933130699088146, "grad_norm": 0.07428881827032656, "learning_rate": 9.99618641369649e-06, "loss": 0.6376, "step": 122 }, { "epoch": 0.059817629179331304, "grad_norm": 0.07660023165080271, "learning_rate": 9.996111280686254e-06, "loss": 0.651, "step": 123 }, { "epoch": 0.060303951367781154, "grad_norm": 0.07500605550465393, "learning_rate": 9.996035415053032e-06, "loss": 0.6416, "step": 124 }, { "epoch": 0.060790273556231005, "grad_norm": 0.07986814893372031, "learning_rate": 9.995958816807951e-06, "loss": 0.6687, "step": 125 }, { "epoch": 0.06127659574468085, "grad_norm": 0.0804707259260744, "learning_rate": 9.995881485962243e-06, "loss": 0.6622, "step": 126 }, { "epoch": 0.0617629179331307, "grad_norm": 0.07626838879851239, "learning_rate": 9.995803422527246e-06, "loss": 0.6189, "step": 127 }, { "epoch": 0.06224924012158055, "grad_norm": 0.07676272450717668, "learning_rate": 9.99572462651441e-06, "loss": 0.6209, "step": 128 }, { "epoch": 0.0627355623100304, "grad_norm": 0.0795701961265009, "learning_rate": 9.995645097935285e-06, "loss": 0.6701, "step": 129 }, { "epoch": 0.06322188449848025, "grad_norm": 0.07338760681445435, "learning_rate": 9.995564836801538e-06, "loss": 0.6359, "step": 130 }, { "epoch": 0.06370820668693009, "grad_norm": 0.07615608704119091, "learning_rate": 9.995483843124933e-06, "loss": 0.6762, "step": 131 }, { "epoch": 0.06419452887537994, "grad_norm": 0.0774115676139232, "learning_rate": 9.995402116917353e-06, "loss": 0.6565, "step": 132 }, { "epoch": 0.06468085106382979, "grad_norm": 0.0728271064207136, "learning_rate": 9.995319658190778e-06, "loss": 0.6381, "step": 133 }, { "epoch": 0.06516717325227964, "grad_norm": 0.07473810238831956, "learning_rate": 9.995236466957301e-06, "loss": 0.6685, "step": 134 }, { "epoch": 0.06565349544072949, "grad_norm": 0.07778393318867559, "learning_rate": 9.995152543229122e-06, "loss": 0.7078, "step": 135 }, { "epoch": 0.06613981762917934, "grad_norm": 0.07654598128919217, "learning_rate": 9.995067887018544e-06, "loss": 0.6695, "step": 136 }, { "epoch": 0.06662613981762917, "grad_norm": 0.07368534573201836, "learning_rate": 9.994982498337985e-06, "loss": 0.6264, "step": 137 }, { "epoch": 0.06711246200607902, "grad_norm": 0.07755422239785308, "learning_rate": 9.994896377199962e-06, "loss": 0.6673, "step": 138 }, { "epoch": 0.06759878419452887, "grad_norm": 0.0739152449458426, "learning_rate": 9.994809523617109e-06, "loss": 0.617, "step": 139 }, { "epoch": 0.06808510638297872, "grad_norm": 0.07061573569583886, "learning_rate": 9.994721937602157e-06, "loss": 0.6225, "step": 140 }, { "epoch": 0.06857142857142857, "grad_norm": 0.07457515709097928, "learning_rate": 9.994633619167953e-06, "loss": 0.6056, "step": 141 }, { "epoch": 0.06905775075987843, "grad_norm": 0.07792013158490566, "learning_rate": 9.994544568327445e-06, "loss": 0.649, "step": 142 }, { "epoch": 0.06954407294832826, "grad_norm": 0.07351487856534614, "learning_rate": 9.994454785093695e-06, "loss": 0.6279, "step": 143 }, { "epoch": 0.07003039513677811, "grad_norm": 0.07577260988839267, "learning_rate": 9.994364269479863e-06, "loss": 0.6703, "step": 144 }, { "epoch": 0.07051671732522796, "grad_norm": 0.08086276565885057, "learning_rate": 9.99427302149923e-06, "loss": 0.6939, "step": 145 }, { "epoch": 0.07100303951367781, "grad_norm": 0.07932628326597181, "learning_rate": 9.994181041165169e-06, "loss": 0.6415, "step": 146 }, { "epoch": 0.07148936170212766, "grad_norm": 0.0800659998687647, "learning_rate": 9.994088328491173e-06, "loss": 0.6587, "step": 147 }, { "epoch": 0.07197568389057751, "grad_norm": 0.07676164938191389, "learning_rate": 9.993994883490834e-06, "loss": 0.699, "step": 148 }, { "epoch": 0.07246200607902735, "grad_norm": 0.07354122008105174, "learning_rate": 9.993900706177857e-06, "loss": 0.6276, "step": 149 }, { "epoch": 0.0729483282674772, "grad_norm": 0.07430454020811418, "learning_rate": 9.99380579656605e-06, "loss": 0.6437, "step": 150 }, { "epoch": 0.07343465045592705, "grad_norm": 0.07562772046850341, "learning_rate": 9.993710154669332e-06, "loss": 0.6438, "step": 151 }, { "epoch": 0.0739209726443769, "grad_norm": 0.07644640073684972, "learning_rate": 9.993613780501727e-06, "loss": 0.6343, "step": 152 }, { "epoch": 0.07440729483282675, "grad_norm": 0.07181629428203033, "learning_rate": 9.993516674077367e-06, "loss": 0.6513, "step": 153 }, { "epoch": 0.0748936170212766, "grad_norm": 0.07509996315055295, "learning_rate": 9.99341883541049e-06, "loss": 0.6626, "step": 154 }, { "epoch": 0.07537993920972644, "grad_norm": 0.08578615670234167, "learning_rate": 9.993320264515448e-06, "loss": 0.6339, "step": 155 }, { "epoch": 0.07586626139817629, "grad_norm": 0.06955973492988606, "learning_rate": 9.99322096140669e-06, "loss": 0.6177, "step": 156 }, { "epoch": 0.07635258358662614, "grad_norm": 0.07267545309129311, "learning_rate": 9.993120926098781e-06, "loss": 0.6342, "step": 157 }, { "epoch": 0.07683890577507599, "grad_norm": 0.07832105979852015, "learning_rate": 9.99302015860639e-06, "loss": 0.6736, "step": 158 }, { "epoch": 0.07732522796352584, "grad_norm": 0.07777012583112805, "learning_rate": 9.99291865894429e-06, "loss": 0.6419, "step": 159 }, { "epoch": 0.07781155015197569, "grad_norm": 0.0765011036786609, "learning_rate": 9.992816427127367e-06, "loss": 0.6309, "step": 160 }, { "epoch": 0.07829787234042553, "grad_norm": 0.07604964078473068, "learning_rate": 9.992713463170613e-06, "loss": 0.6362, "step": 161 }, { "epoch": 0.07878419452887538, "grad_norm": 0.07603084698259067, "learning_rate": 9.992609767089127e-06, "loss": 0.6715, "step": 162 }, { "epoch": 0.07927051671732523, "grad_norm": 0.07222366232326545, "learning_rate": 9.992505338898113e-06, "loss": 0.6433, "step": 163 }, { "epoch": 0.07975683890577508, "grad_norm": 0.07294061717851048, "learning_rate": 9.992400178612882e-06, "loss": 0.6329, "step": 164 }, { "epoch": 0.08024316109422493, "grad_norm": 0.07461766568616675, "learning_rate": 9.99229428624886e-06, "loss": 0.6437, "step": 165 }, { "epoch": 0.08072948328267478, "grad_norm": 0.07066059778730192, "learning_rate": 9.99218766182157e-06, "loss": 0.6429, "step": 166 }, { "epoch": 0.08121580547112461, "grad_norm": 0.07932064552397436, "learning_rate": 9.992080305346652e-06, "loss": 0.6924, "step": 167 }, { "epoch": 0.08170212765957446, "grad_norm": 0.07387041791715146, "learning_rate": 9.991972216839845e-06, "loss": 0.6281, "step": 168 }, { "epoch": 0.08218844984802431, "grad_norm": 0.07464034222960271, "learning_rate": 9.991863396317e-06, "loss": 0.6719, "step": 169 }, { "epoch": 0.08267477203647416, "grad_norm": 0.07629524145920062, "learning_rate": 9.991753843794072e-06, "loss": 0.6822, "step": 170 }, { "epoch": 0.08316109422492401, "grad_norm": 0.0717343177682337, "learning_rate": 9.991643559287131e-06, "loss": 0.6253, "step": 171 }, { "epoch": 0.08364741641337387, "grad_norm": 0.074456963337064, "learning_rate": 9.991532542812345e-06, "loss": 0.6728, "step": 172 }, { "epoch": 0.0841337386018237, "grad_norm": 0.08032867027111115, "learning_rate": 9.991420794385994e-06, "loss": 0.6283, "step": 173 }, { "epoch": 0.08462006079027355, "grad_norm": 0.07605022839286113, "learning_rate": 9.991308314024466e-06, "loss": 0.6265, "step": 174 }, { "epoch": 0.0851063829787234, "grad_norm": 0.07803743225470429, "learning_rate": 9.99119510174425e-06, "loss": 0.6516, "step": 175 }, { "epoch": 0.08559270516717325, "grad_norm": 0.07566298812696846, "learning_rate": 9.991081157561955e-06, "loss": 0.6559, "step": 176 }, { "epoch": 0.0860790273556231, "grad_norm": 0.07953039318586384, "learning_rate": 9.990966481494285e-06, "loss": 0.6199, "step": 177 }, { "epoch": 0.08656534954407295, "grad_norm": 0.0798743101352713, "learning_rate": 9.990851073558056e-06, "loss": 0.6279, "step": 178 }, { "epoch": 0.08705167173252279, "grad_norm": 0.07549964902318917, "learning_rate": 9.990734933770192e-06, "loss": 0.6357, "step": 179 }, { "epoch": 0.08753799392097264, "grad_norm": 0.08140632061403627, "learning_rate": 9.990618062147724e-06, "loss": 0.6924, "step": 180 }, { "epoch": 0.08802431610942249, "grad_norm": 0.07373636899236395, "learning_rate": 9.99050045870779e-06, "loss": 0.638, "step": 181 }, { "epoch": 0.08851063829787234, "grad_norm": 0.0738017611641229, "learning_rate": 9.990382123467633e-06, "loss": 0.6613, "step": 182 }, { "epoch": 0.08899696048632219, "grad_norm": 0.071911012138807, "learning_rate": 9.990263056444607e-06, "loss": 0.6337, "step": 183 }, { "epoch": 0.08948328267477204, "grad_norm": 0.07865641677176818, "learning_rate": 9.990143257656173e-06, "loss": 0.6321, "step": 184 }, { "epoch": 0.08996960486322189, "grad_norm": 0.07330689404202993, "learning_rate": 9.990022727119897e-06, "loss": 0.6563, "step": 185 }, { "epoch": 0.09045592705167173, "grad_norm": 0.07041441409005485, "learning_rate": 9.989901464853454e-06, "loss": 0.6084, "step": 186 }, { "epoch": 0.09094224924012158, "grad_norm": 0.07330065136647741, "learning_rate": 9.989779470874626e-06, "loss": 0.6136, "step": 187 }, { "epoch": 0.09142857142857143, "grad_norm": 0.0759547086678673, "learning_rate": 9.9896567452013e-06, "loss": 0.6564, "step": 188 }, { "epoch": 0.09191489361702128, "grad_norm": 0.07164189294623767, "learning_rate": 9.989533287851472e-06, "loss": 0.6468, "step": 189 }, { "epoch": 0.09240121580547113, "grad_norm": 0.07629127905525687, "learning_rate": 9.989409098843249e-06, "loss": 0.6488, "step": 190 }, { "epoch": 0.09288753799392098, "grad_norm": 0.07471202058897447, "learning_rate": 9.98928417819484e-06, "loss": 0.6249, "step": 191 }, { "epoch": 0.09337386018237082, "grad_norm": 0.07296776823910131, "learning_rate": 9.989158525924562e-06, "loss": 0.6313, "step": 192 }, { "epoch": 0.09386018237082067, "grad_norm": 0.07251456227592262, "learning_rate": 9.989032142050845e-06, "loss": 0.6364, "step": 193 }, { "epoch": 0.09434650455927052, "grad_norm": 0.07930573859948695, "learning_rate": 9.988905026592217e-06, "loss": 0.6566, "step": 194 }, { "epoch": 0.09483282674772037, "grad_norm": 0.0726289212935777, "learning_rate": 9.98877717956732e-06, "loss": 0.6428, "step": 195 }, { "epoch": 0.09531914893617022, "grad_norm": 0.07236931893551049, "learning_rate": 9.988648600994898e-06, "loss": 0.6315, "step": 196 }, { "epoch": 0.09580547112462007, "grad_norm": 0.07250340091652571, "learning_rate": 9.988519290893813e-06, "loss": 0.6322, "step": 197 }, { "epoch": 0.0962917933130699, "grad_norm": 0.07269441037382905, "learning_rate": 9.988389249283019e-06, "loss": 0.6359, "step": 198 }, { "epoch": 0.09677811550151975, "grad_norm": 0.07297038121770004, "learning_rate": 9.98825847618159e-06, "loss": 0.6189, "step": 199 }, { "epoch": 0.0972644376899696, "grad_norm": 0.07552803910007803, "learning_rate": 9.9881269716087e-06, "loss": 0.6287, "step": 200 }, { "epoch": 0.09775075987841945, "grad_norm": 0.074487954110049, "learning_rate": 9.987994735583635e-06, "loss": 0.6409, "step": 201 }, { "epoch": 0.0982370820668693, "grad_norm": 0.07236572318666377, "learning_rate": 9.987861768125783e-06, "loss": 0.6327, "step": 202 }, { "epoch": 0.09872340425531916, "grad_norm": 0.07322032012079707, "learning_rate": 9.987728069254645e-06, "loss": 0.6251, "step": 203 }, { "epoch": 0.09920972644376899, "grad_norm": 0.07840698977706183, "learning_rate": 9.987593638989824e-06, "loss": 0.6162, "step": 204 }, { "epoch": 0.09969604863221884, "grad_norm": 0.0703988397218052, "learning_rate": 9.987458477351034e-06, "loss": 0.5898, "step": 205 }, { "epoch": 0.10018237082066869, "grad_norm": 0.07562550442182135, "learning_rate": 9.987322584358095e-06, "loss": 0.6618, "step": 206 }, { "epoch": 0.10066869300911854, "grad_norm": 0.07466455915688629, "learning_rate": 9.987185960030933e-06, "loss": 0.6193, "step": 207 }, { "epoch": 0.1011550151975684, "grad_norm": 0.07223918433037317, "learning_rate": 9.987048604389584e-06, "loss": 0.6221, "step": 208 }, { "epoch": 0.10164133738601824, "grad_norm": 0.08110884305727797, "learning_rate": 9.986910517454188e-06, "loss": 0.6141, "step": 209 }, { "epoch": 0.10212765957446808, "grad_norm": 0.07312053516960584, "learning_rate": 9.986771699244995e-06, "loss": 0.6015, "step": 210 }, { "epoch": 0.10261398176291793, "grad_norm": 0.07426585754655243, "learning_rate": 9.986632149782362e-06, "loss": 0.6064, "step": 211 }, { "epoch": 0.10310030395136778, "grad_norm": 0.07524094590731174, "learning_rate": 9.98649186908675e-06, "loss": 0.6475, "step": 212 }, { "epoch": 0.10358662613981763, "grad_norm": 0.07189953499669959, "learning_rate": 9.98635085717873e-06, "loss": 0.6157, "step": 213 }, { "epoch": 0.10407294832826748, "grad_norm": 0.07289407133976893, "learning_rate": 9.986209114078982e-06, "loss": 0.6148, "step": 214 }, { "epoch": 0.10455927051671733, "grad_norm": 0.08151355057726764, "learning_rate": 9.98606663980829e-06, "loss": 0.6475, "step": 215 }, { "epoch": 0.10504559270516717, "grad_norm": 0.07638129409651782, "learning_rate": 9.985923434387545e-06, "loss": 0.6439, "step": 216 }, { "epoch": 0.10553191489361702, "grad_norm": 0.07633020661462125, "learning_rate": 9.985779497837748e-06, "loss": 0.67, "step": 217 }, { "epoch": 0.10601823708206687, "grad_norm": 0.07429493171857507, "learning_rate": 9.985634830180005e-06, "loss": 0.6345, "step": 218 }, { "epoch": 0.10650455927051672, "grad_norm": 0.07294904642499844, "learning_rate": 9.985489431435528e-06, "loss": 0.6303, "step": 219 }, { "epoch": 0.10699088145896657, "grad_norm": 0.07922924783596189, "learning_rate": 9.98534330162564e-06, "loss": 0.6402, "step": 220 }, { "epoch": 0.10747720364741642, "grad_norm": 0.0792066395615164, "learning_rate": 9.985196440771771e-06, "loss": 0.6673, "step": 221 }, { "epoch": 0.10796352583586626, "grad_norm": 0.07417640799704432, "learning_rate": 9.985048848895454e-06, "loss": 0.6355, "step": 222 }, { "epoch": 0.1084498480243161, "grad_norm": 0.07534901438232722, "learning_rate": 9.984900526018331e-06, "loss": 0.6339, "step": 223 }, { "epoch": 0.10893617021276596, "grad_norm": 0.07525590620754326, "learning_rate": 9.984751472162154e-06, "loss": 0.6754, "step": 224 }, { "epoch": 0.1094224924012158, "grad_norm": 0.07585142840936479, "learning_rate": 9.98460168734878e-06, "loss": 0.6423, "step": 225 }, { "epoch": 0.10990881458966566, "grad_norm": 0.07509447600300756, "learning_rate": 9.984451171600171e-06, "loss": 0.6108, "step": 226 }, { "epoch": 0.11039513677811551, "grad_norm": 0.07379294315072249, "learning_rate": 9.9842999249384e-06, "loss": 0.649, "step": 227 }, { "epoch": 0.11088145896656534, "grad_norm": 0.08109799647676486, "learning_rate": 9.984147947385647e-06, "loss": 0.6824, "step": 228 }, { "epoch": 0.1113677811550152, "grad_norm": 0.08385890406611128, "learning_rate": 9.983995238964194e-06, "loss": 0.6624, "step": 229 }, { "epoch": 0.11185410334346504, "grad_norm": 0.07371528502111811, "learning_rate": 9.98384179969644e-06, "loss": 0.6019, "step": 230 }, { "epoch": 0.1123404255319149, "grad_norm": 0.07326480823488515, "learning_rate": 9.983687629604879e-06, "loss": 0.6314, "step": 231 }, { "epoch": 0.11282674772036475, "grad_norm": 0.07698464930942882, "learning_rate": 9.98353272871212e-06, "loss": 0.6957, "step": 232 }, { "epoch": 0.1133130699088146, "grad_norm": 0.08105359686436309, "learning_rate": 9.983377097040879e-06, "loss": 0.6538, "step": 233 }, { "epoch": 0.11379939209726443, "grad_norm": 0.07594073018857872, "learning_rate": 9.983220734613975e-06, "loss": 0.6422, "step": 234 }, { "epoch": 0.11428571428571428, "grad_norm": 0.07639549994262636, "learning_rate": 9.98306364145434e-06, "loss": 0.6732, "step": 235 }, { "epoch": 0.11477203647416413, "grad_norm": 0.07361286876813115, "learning_rate": 9.98290581758501e-06, "loss": 0.6194, "step": 236 }, { "epoch": 0.11525835866261398, "grad_norm": 0.0772227742611177, "learning_rate": 9.982747263029123e-06, "loss": 0.6436, "step": 237 }, { "epoch": 0.11574468085106383, "grad_norm": 0.07973545295674096, "learning_rate": 9.982587977809934e-06, "loss": 0.6783, "step": 238 }, { "epoch": 0.11623100303951368, "grad_norm": 0.07299524102693468, "learning_rate": 9.9824279619508e-06, "loss": 0.608, "step": 239 }, { "epoch": 0.11671732522796352, "grad_norm": 0.07447952526538501, "learning_rate": 9.982267215475186e-06, "loss": 0.6467, "step": 240 }, { "epoch": 0.11720364741641337, "grad_norm": 0.07367224054974836, "learning_rate": 9.98210573840666e-06, "loss": 0.6259, "step": 241 }, { "epoch": 0.11768996960486322, "grad_norm": 0.08029766455887737, "learning_rate": 9.981943530768903e-06, "loss": 0.6468, "step": 242 }, { "epoch": 0.11817629179331307, "grad_norm": 0.08044818330872944, "learning_rate": 9.981780592585702e-06, "loss": 0.6432, "step": 243 }, { "epoch": 0.11866261398176292, "grad_norm": 0.0778126874748524, "learning_rate": 9.981616923880948e-06, "loss": 0.6783, "step": 244 }, { "epoch": 0.11914893617021277, "grad_norm": 0.07142908435344075, "learning_rate": 9.981452524678641e-06, "loss": 0.6235, "step": 245 }, { "epoch": 0.11963525835866261, "grad_norm": 0.07267064743613215, "learning_rate": 9.981287395002892e-06, "loss": 0.5984, "step": 246 }, { "epoch": 0.12012158054711246, "grad_norm": 0.07577047506107071, "learning_rate": 9.981121534877912e-06, "loss": 0.656, "step": 247 }, { "epoch": 0.12060790273556231, "grad_norm": 0.07448927533837468, "learning_rate": 9.980954944328023e-06, "loss": 0.614, "step": 248 }, { "epoch": 0.12109422492401216, "grad_norm": 0.0739277611575981, "learning_rate": 9.980787623377654e-06, "loss": 0.6265, "step": 249 }, { "epoch": 0.12158054711246201, "grad_norm": 0.07265875812751259, "learning_rate": 9.98061957205134e-06, "loss": 0.6118, "step": 250 }, { "epoch": 0.12206686930091186, "grad_norm": 0.07429152601952896, "learning_rate": 9.980450790373724e-06, "loss": 0.645, "step": 251 }, { "epoch": 0.1225531914893617, "grad_norm": 0.07254255502966724, "learning_rate": 9.980281278369558e-06, "loss": 0.6448, "step": 252 }, { "epoch": 0.12303951367781155, "grad_norm": 0.0752170871851315, "learning_rate": 9.980111036063696e-06, "loss": 0.6514, "step": 253 }, { "epoch": 0.1235258358662614, "grad_norm": 0.07374713989231758, "learning_rate": 9.979940063481105e-06, "loss": 0.6283, "step": 254 }, { "epoch": 0.12401215805471125, "grad_norm": 0.07797996972352954, "learning_rate": 9.979768360646854e-06, "loss": 0.687, "step": 255 }, { "epoch": 0.1244984802431611, "grad_norm": 0.07532383141165228, "learning_rate": 9.97959592758612e-06, "loss": 0.6211, "step": 256 }, { "epoch": 0.12498480243161095, "grad_norm": 0.07408087865565535, "learning_rate": 9.979422764324193e-06, "loss": 0.635, "step": 257 }, { "epoch": 0.1254711246200608, "grad_norm": 0.0751721437373112, "learning_rate": 9.979248870886463e-06, "loss": 0.6539, "step": 258 }, { "epoch": 0.12595744680851065, "grad_norm": 0.0764488732522613, "learning_rate": 9.979074247298428e-06, "loss": 0.6647, "step": 259 }, { "epoch": 0.1264437689969605, "grad_norm": 0.07531640440287267, "learning_rate": 9.978898893585695e-06, "loss": 0.6734, "step": 260 }, { "epoch": 0.12693009118541032, "grad_norm": 0.08489543512024499, "learning_rate": 9.978722809773979e-06, "loss": 0.6832, "step": 261 }, { "epoch": 0.12741641337386017, "grad_norm": 0.07450921950140398, "learning_rate": 9.9785459958891e-06, "loss": 0.5894, "step": 262 }, { "epoch": 0.12790273556231002, "grad_norm": 0.07672978323296226, "learning_rate": 9.978368451956986e-06, "loss": 0.6666, "step": 263 }, { "epoch": 0.12838905775075987, "grad_norm": 0.07009524571111454, "learning_rate": 9.978190178003672e-06, "loss": 0.613, "step": 264 }, { "epoch": 0.12887537993920972, "grad_norm": 0.07762736841300429, "learning_rate": 9.9780111740553e-06, "loss": 0.6976, "step": 265 }, { "epoch": 0.12936170212765957, "grad_norm": 0.07306396494022326, "learning_rate": 9.977831440138117e-06, "loss": 0.6047, "step": 266 }, { "epoch": 0.12984802431610942, "grad_norm": 0.07401168336824537, "learning_rate": 9.97765097627848e-06, "loss": 0.6101, "step": 267 }, { "epoch": 0.13033434650455927, "grad_norm": 0.071910194215463, "learning_rate": 9.977469782502853e-06, "loss": 0.5705, "step": 268 }, { "epoch": 0.13082066869300912, "grad_norm": 0.07846985374167931, "learning_rate": 9.977287858837804e-06, "loss": 0.6543, "step": 269 }, { "epoch": 0.13130699088145897, "grad_norm": 0.07686257706733314, "learning_rate": 9.977105205310016e-06, "loss": 0.644, "step": 270 }, { "epoch": 0.13179331306990882, "grad_norm": 0.07512913995712098, "learning_rate": 9.976921821946264e-06, "loss": 0.6017, "step": 271 }, { "epoch": 0.13227963525835867, "grad_norm": 0.07289863724485444, "learning_rate": 9.976737708773445e-06, "loss": 0.6178, "step": 272 }, { "epoch": 0.1327659574468085, "grad_norm": 0.07965974283291472, "learning_rate": 9.976552865818555e-06, "loss": 0.6484, "step": 273 }, { "epoch": 0.13325227963525835, "grad_norm": 0.07738280827994552, "learning_rate": 9.9763672931087e-06, "loss": 0.6514, "step": 274 }, { "epoch": 0.1337386018237082, "grad_norm": 0.0730594725117199, "learning_rate": 9.976180990671092e-06, "loss": 0.6142, "step": 275 }, { "epoch": 0.13422492401215805, "grad_norm": 0.07474972138959583, "learning_rate": 9.97599395853305e-06, "loss": 0.6216, "step": 276 }, { "epoch": 0.1347112462006079, "grad_norm": 0.0778031292425587, "learning_rate": 9.975806196722e-06, "loss": 0.625, "step": 277 }, { "epoch": 0.13519756838905775, "grad_norm": 0.07245982646641651, "learning_rate": 9.975617705265475e-06, "loss": 0.6103, "step": 278 }, { "epoch": 0.1356838905775076, "grad_norm": 0.07575059026747623, "learning_rate": 9.975428484191117e-06, "loss": 0.6164, "step": 279 }, { "epoch": 0.13617021276595745, "grad_norm": 0.07356708561545493, "learning_rate": 9.97523853352667e-06, "loss": 0.6361, "step": 280 }, { "epoch": 0.1366565349544073, "grad_norm": 0.07593902953055177, "learning_rate": 9.97504785329999e-06, "loss": 0.6294, "step": 281 }, { "epoch": 0.13714285714285715, "grad_norm": 0.07033779192369982, "learning_rate": 9.974856443539036e-06, "loss": 0.5876, "step": 282 }, { "epoch": 0.137629179331307, "grad_norm": 0.07453039463703805, "learning_rate": 9.974664304271881e-06, "loss": 0.6153, "step": 283 }, { "epoch": 0.13811550151975685, "grad_norm": 0.07486083974906238, "learning_rate": 9.974471435526694e-06, "loss": 0.6429, "step": 284 }, { "epoch": 0.1386018237082067, "grad_norm": 0.07200964246522952, "learning_rate": 9.974277837331761e-06, "loss": 0.6183, "step": 285 }, { "epoch": 0.13908814589665652, "grad_norm": 0.07562642361751189, "learning_rate": 9.974083509715471e-06, "loss": 0.6278, "step": 286 }, { "epoch": 0.13957446808510637, "grad_norm": 0.07867559069806047, "learning_rate": 9.973888452706317e-06, "loss": 0.6866, "step": 287 }, { "epoch": 0.14006079027355622, "grad_norm": 0.07827332015368069, "learning_rate": 9.973692666332905e-06, "loss": 0.6592, "step": 288 }, { "epoch": 0.14054711246200607, "grad_norm": 0.07484953929573433, "learning_rate": 9.973496150623943e-06, "loss": 0.6283, "step": 289 }, { "epoch": 0.14103343465045592, "grad_norm": 0.07330948170161683, "learning_rate": 9.973298905608248e-06, "loss": 0.6256, "step": 290 }, { "epoch": 0.14151975683890577, "grad_norm": 0.0883851588244503, "learning_rate": 9.973100931314743e-06, "loss": 0.665, "step": 291 }, { "epoch": 0.14200607902735563, "grad_norm": 0.0729883437724811, "learning_rate": 9.972902227772461e-06, "loss": 0.6588, "step": 292 }, { "epoch": 0.14249240121580548, "grad_norm": 0.07442876412386121, "learning_rate": 9.972702795010539e-06, "loss": 0.6616, "step": 293 }, { "epoch": 0.14297872340425533, "grad_norm": 0.07251376051202359, "learning_rate": 9.97250263305822e-06, "loss": 0.5975, "step": 294 }, { "epoch": 0.14346504559270518, "grad_norm": 0.07431377251626112, "learning_rate": 9.972301741944856e-06, "loss": 0.6019, "step": 295 }, { "epoch": 0.14395136778115503, "grad_norm": 0.07618992064548367, "learning_rate": 9.972100121699907e-06, "loss": 0.6404, "step": 296 }, { "epoch": 0.14443768996960488, "grad_norm": 0.07338435809055148, "learning_rate": 9.971897772352936e-06, "loss": 0.64, "step": 297 }, { "epoch": 0.1449240121580547, "grad_norm": 0.0777590300690969, "learning_rate": 9.971694693933617e-06, "loss": 0.6863, "step": 298 }, { "epoch": 0.14541033434650455, "grad_norm": 0.07564757932934363, "learning_rate": 9.971490886471728e-06, "loss": 0.6282, "step": 299 }, { "epoch": 0.1458966565349544, "grad_norm": 0.07584579832665737, "learning_rate": 9.971286349997155e-06, "loss": 0.6513, "step": 300 }, { "epoch": 0.14638297872340425, "grad_norm": 0.07165551524960252, "learning_rate": 9.971081084539893e-06, "loss": 0.6231, "step": 301 }, { "epoch": 0.1468693009118541, "grad_norm": 0.07759548163032844, "learning_rate": 9.97087509013004e-06, "loss": 0.6762, "step": 302 }, { "epoch": 0.14735562310030395, "grad_norm": 0.07583941154315121, "learning_rate": 9.970668366797802e-06, "loss": 0.6395, "step": 303 }, { "epoch": 0.1478419452887538, "grad_norm": 0.07250091980070991, "learning_rate": 9.970460914573494e-06, "loss": 0.6364, "step": 304 }, { "epoch": 0.14832826747720365, "grad_norm": 0.07318863031884103, "learning_rate": 9.970252733487537e-06, "loss": 0.5936, "step": 305 }, { "epoch": 0.1488145896656535, "grad_norm": 0.07446466389801451, "learning_rate": 9.970043823570457e-06, "loss": 0.5781, "step": 306 }, { "epoch": 0.14930091185410335, "grad_norm": 0.07172664341644777, "learning_rate": 9.96983418485289e-06, "loss": 0.5729, "step": 307 }, { "epoch": 0.1497872340425532, "grad_norm": 0.0736030378888872, "learning_rate": 9.969623817365574e-06, "loss": 0.6508, "step": 308 }, { "epoch": 0.15027355623100305, "grad_norm": 0.07465281130369202, "learning_rate": 9.96941272113936e-06, "loss": 0.6472, "step": 309 }, { "epoch": 0.15075987841945288, "grad_norm": 0.07306605030090156, "learning_rate": 9.969200896205201e-06, "loss": 0.6191, "step": 310 }, { "epoch": 0.15124620060790273, "grad_norm": 0.08723717562871942, "learning_rate": 9.96898834259416e-06, "loss": 0.6658, "step": 311 }, { "epoch": 0.15173252279635258, "grad_norm": 0.0791621089206097, "learning_rate": 9.968775060337406e-06, "loss": 0.6556, "step": 312 }, { "epoch": 0.15221884498480243, "grad_norm": 0.07902589971775417, "learning_rate": 9.968561049466214e-06, "loss": 0.6413, "step": 313 }, { "epoch": 0.15270516717325228, "grad_norm": 0.07978880316551544, "learning_rate": 9.968346310011965e-06, "loss": 0.6541, "step": 314 }, { "epoch": 0.15319148936170213, "grad_norm": 0.07676081352777822, "learning_rate": 9.968130842006148e-06, "loss": 0.6567, "step": 315 }, { "epoch": 0.15367781155015198, "grad_norm": 0.07876049793989714, "learning_rate": 9.967914645480361e-06, "loss": 0.6701, "step": 316 }, { "epoch": 0.15416413373860183, "grad_norm": 0.07272295966553359, "learning_rate": 9.967697720466306e-06, "loss": 0.6166, "step": 317 }, { "epoch": 0.15465045592705168, "grad_norm": 0.07734962766032809, "learning_rate": 9.967480066995792e-06, "loss": 0.6468, "step": 318 }, { "epoch": 0.15513677811550153, "grad_norm": 0.07380252667197663, "learning_rate": 9.967261685100736e-06, "loss": 0.6317, "step": 319 }, { "epoch": 0.15562310030395138, "grad_norm": 0.07379445209764204, "learning_rate": 9.96704257481316e-06, "loss": 0.634, "step": 320 }, { "epoch": 0.15610942249240123, "grad_norm": 0.07178504237687208, "learning_rate": 9.966822736165194e-06, "loss": 0.6452, "step": 321 }, { "epoch": 0.15659574468085105, "grad_norm": 0.0752094396672398, "learning_rate": 9.966602169189077e-06, "loss": 0.6317, "step": 322 }, { "epoch": 0.1570820668693009, "grad_norm": 0.07704411452781729, "learning_rate": 9.966380873917152e-06, "loss": 0.6329, "step": 323 }, { "epoch": 0.15756838905775075, "grad_norm": 0.07331666896990816, "learning_rate": 9.966158850381868e-06, "loss": 0.6213, "step": 324 }, { "epoch": 0.1580547112462006, "grad_norm": 0.07323834901564434, "learning_rate": 9.965936098615783e-06, "loss": 0.6463, "step": 325 }, { "epoch": 0.15854103343465045, "grad_norm": 0.07473530428529629, "learning_rate": 9.965712618651561e-06, "loss": 0.6077, "step": 326 }, { "epoch": 0.1590273556231003, "grad_norm": 0.07017372721610983, "learning_rate": 9.965488410521974e-06, "loss": 0.5694, "step": 327 }, { "epoch": 0.15951367781155015, "grad_norm": 0.07326794013189401, "learning_rate": 9.965263474259896e-06, "loss": 0.6197, "step": 328 }, { "epoch": 0.16, "grad_norm": 0.07924480238840713, "learning_rate": 9.965037809898316e-06, "loss": 0.6612, "step": 329 }, { "epoch": 0.16048632218844985, "grad_norm": 0.0724181569720144, "learning_rate": 9.964811417470322e-06, "loss": 0.6315, "step": 330 }, { "epoch": 0.1609726443768997, "grad_norm": 0.07592111290791996, "learning_rate": 9.964584297009112e-06, "loss": 0.6799, "step": 331 }, { "epoch": 0.16145896656534955, "grad_norm": 0.07594391968708164, "learning_rate": 9.964356448547993e-06, "loss": 0.6398, "step": 332 }, { "epoch": 0.1619452887537994, "grad_norm": 0.07364414977888648, "learning_rate": 9.964127872120375e-06, "loss": 0.6375, "step": 333 }, { "epoch": 0.16243161094224923, "grad_norm": 0.07421290629411958, "learning_rate": 9.963898567759775e-06, "loss": 0.6112, "step": 334 }, { "epoch": 0.16291793313069908, "grad_norm": 0.07357497539724447, "learning_rate": 9.96366853549982e-06, "loss": 0.5918, "step": 335 }, { "epoch": 0.16340425531914893, "grad_norm": 0.07243767024457398, "learning_rate": 9.96343777537424e-06, "loss": 0.622, "step": 336 }, { "epoch": 0.16389057750759878, "grad_norm": 0.07453095932083066, "learning_rate": 9.963206287416873e-06, "loss": 0.6059, "step": 337 }, { "epoch": 0.16437689969604863, "grad_norm": 0.07504766468119894, "learning_rate": 9.962974071661664e-06, "loss": 0.6742, "step": 338 }, { "epoch": 0.16486322188449848, "grad_norm": 0.07040383572997289, "learning_rate": 9.962741128142667e-06, "loss": 0.6272, "step": 339 }, { "epoch": 0.16534954407294833, "grad_norm": 0.07413113117809093, "learning_rate": 9.96250745689404e-06, "loss": 0.6242, "step": 340 }, { "epoch": 0.16583586626139818, "grad_norm": 0.07334220952792778, "learning_rate": 9.962273057950048e-06, "loss": 0.6274, "step": 341 }, { "epoch": 0.16632218844984803, "grad_norm": 0.0764970593500182, "learning_rate": 9.962037931345058e-06, "loss": 0.6349, "step": 342 }, { "epoch": 0.16680851063829788, "grad_norm": 0.07483289748593701, "learning_rate": 9.961802077113558e-06, "loss": 0.6278, "step": 343 }, { "epoch": 0.16729483282674773, "grad_norm": 0.0740205554768208, "learning_rate": 9.961565495290126e-06, "loss": 0.6234, "step": 344 }, { "epoch": 0.16778115501519758, "grad_norm": 0.07378161086081035, "learning_rate": 9.961328185909457e-06, "loss": 0.6375, "step": 345 }, { "epoch": 0.1682674772036474, "grad_norm": 0.07309496134712835, "learning_rate": 9.96109014900635e-06, "loss": 0.6371, "step": 346 }, { "epoch": 0.16875379939209725, "grad_norm": 0.07340947211478033, "learning_rate": 9.960851384615709e-06, "loss": 0.6111, "step": 347 }, { "epoch": 0.1692401215805471, "grad_norm": 0.07139157389599032, "learning_rate": 9.960611892772544e-06, "loss": 0.5984, "step": 348 }, { "epoch": 0.16972644376899695, "grad_norm": 0.07516349399841989, "learning_rate": 9.96037167351198e-06, "loss": 0.5988, "step": 349 }, { "epoch": 0.1702127659574468, "grad_norm": 0.07508077902165791, "learning_rate": 9.960130726869237e-06, "loss": 0.6306, "step": 350 }, { "epoch": 0.17069908814589665, "grad_norm": 0.07321463915401381, "learning_rate": 9.95988905287965e-06, "loss": 0.6211, "step": 351 }, { "epoch": 0.1711854103343465, "grad_norm": 0.07091922946685138, "learning_rate": 9.959646651578656e-06, "loss": 0.586, "step": 352 }, { "epoch": 0.17167173252279636, "grad_norm": 0.07584891592007169, "learning_rate": 9.959403523001801e-06, "loss": 0.6152, "step": 353 }, { "epoch": 0.1721580547112462, "grad_norm": 0.07788514013608414, "learning_rate": 9.959159667184736e-06, "loss": 0.6885, "step": 354 }, { "epoch": 0.17264437689969606, "grad_norm": 0.08209778358702638, "learning_rate": 9.958915084163223e-06, "loss": 0.653, "step": 355 }, { "epoch": 0.1731306990881459, "grad_norm": 0.07362893687536431, "learning_rate": 9.958669773973124e-06, "loss": 0.6366, "step": 356 }, { "epoch": 0.17361702127659576, "grad_norm": 0.07938618815926102, "learning_rate": 9.958423736650413e-06, "loss": 0.6793, "step": 357 }, { "epoch": 0.17410334346504558, "grad_norm": 0.07580898641050113, "learning_rate": 9.958176972231166e-06, "loss": 0.6328, "step": 358 }, { "epoch": 0.17458966565349543, "grad_norm": 0.08175037924395943, "learning_rate": 9.957929480751572e-06, "loss": 0.6227, "step": 359 }, { "epoch": 0.17507598784194528, "grad_norm": 0.0745173085056933, "learning_rate": 9.957681262247918e-06, "loss": 0.6038, "step": 360 }, { "epoch": 0.17556231003039513, "grad_norm": 0.07749405203334765, "learning_rate": 9.957432316756608e-06, "loss": 0.6364, "step": 361 }, { "epoch": 0.17604863221884498, "grad_norm": 0.0752943992721238, "learning_rate": 9.957182644314144e-06, "loss": 0.6234, "step": 362 }, { "epoch": 0.17653495440729483, "grad_norm": 0.07489723293994147, "learning_rate": 9.956932244957135e-06, "loss": 0.6365, "step": 363 }, { "epoch": 0.17702127659574468, "grad_norm": 0.07175150599048309, "learning_rate": 9.956681118722302e-06, "loss": 0.5957, "step": 364 }, { "epoch": 0.17750759878419453, "grad_norm": 0.0832717686492948, "learning_rate": 9.956429265646472e-06, "loss": 0.6561, "step": 365 }, { "epoch": 0.17799392097264438, "grad_norm": 0.07631890515577754, "learning_rate": 9.956176685766574e-06, "loss": 0.6732, "step": 366 }, { "epoch": 0.17848024316109423, "grad_norm": 0.07452110620795337, "learning_rate": 9.955923379119645e-06, "loss": 0.6213, "step": 367 }, { "epoch": 0.17896656534954408, "grad_norm": 0.080836270562088, "learning_rate": 9.95566934574283e-06, "loss": 0.6714, "step": 368 }, { "epoch": 0.17945288753799393, "grad_norm": 0.07765305202486368, "learning_rate": 9.955414585673384e-06, "loss": 0.6251, "step": 369 }, { "epoch": 0.17993920972644378, "grad_norm": 0.07681719880195695, "learning_rate": 9.95515909894866e-06, "loss": 0.6231, "step": 370 }, { "epoch": 0.1804255319148936, "grad_norm": 0.07595508436229585, "learning_rate": 9.954902885606122e-06, "loss": 0.5958, "step": 371 }, { "epoch": 0.18091185410334346, "grad_norm": 0.07572195314562842, "learning_rate": 9.954645945683343e-06, "loss": 0.6324, "step": 372 }, { "epoch": 0.1813981762917933, "grad_norm": 0.07564765453982031, "learning_rate": 9.954388279218002e-06, "loss": 0.6233, "step": 373 }, { "epoch": 0.18188449848024316, "grad_norm": 0.07647251819232564, "learning_rate": 9.954129886247879e-06, "loss": 0.6288, "step": 374 }, { "epoch": 0.182370820668693, "grad_norm": 0.07815265755808583, "learning_rate": 9.953870766810864e-06, "loss": 0.6542, "step": 375 }, { "epoch": 0.18285714285714286, "grad_norm": 0.07550430163230877, "learning_rate": 9.953610920944959e-06, "loss": 0.5907, "step": 376 }, { "epoch": 0.1833434650455927, "grad_norm": 0.07752316334718226, "learning_rate": 9.953350348688264e-06, "loss": 0.6129, "step": 377 }, { "epoch": 0.18382978723404256, "grad_norm": 0.08002741566262664, "learning_rate": 9.953089050078988e-06, "loss": 0.6672, "step": 378 }, { "epoch": 0.1843161094224924, "grad_norm": 0.08118664207341818, "learning_rate": 9.95282702515545e-06, "loss": 0.6599, "step": 379 }, { "epoch": 0.18480243161094226, "grad_norm": 0.07268067047244009, "learning_rate": 9.952564273956071e-06, "loss": 0.5924, "step": 380 }, { "epoch": 0.1852887537993921, "grad_norm": 0.07525568137843315, "learning_rate": 9.952300796519383e-06, "loss": 0.6691, "step": 381 }, { "epoch": 0.18577507598784196, "grad_norm": 0.07736204276080864, "learning_rate": 9.952036592884019e-06, "loss": 0.6351, "step": 382 }, { "epoch": 0.18626139817629178, "grad_norm": 0.07469426185229484, "learning_rate": 9.951771663088724e-06, "loss": 0.6133, "step": 383 }, { "epoch": 0.18674772036474163, "grad_norm": 0.07725666077306236, "learning_rate": 9.951506007172344e-06, "loss": 0.6022, "step": 384 }, { "epoch": 0.18723404255319148, "grad_norm": 0.07980306064310377, "learning_rate": 9.951239625173836e-06, "loss": 0.6085, "step": 385 }, { "epoch": 0.18772036474164133, "grad_norm": 0.07177225906938704, "learning_rate": 9.950972517132263e-06, "loss": 0.5814, "step": 386 }, { "epoch": 0.18820668693009118, "grad_norm": 0.07503775177335276, "learning_rate": 9.950704683086793e-06, "loss": 0.6442, "step": 387 }, { "epoch": 0.18869300911854103, "grad_norm": 0.07822645307237987, "learning_rate": 9.950436123076698e-06, "loss": 0.6291, "step": 388 }, { "epoch": 0.18917933130699088, "grad_norm": 0.07701312265398665, "learning_rate": 9.950166837141365e-06, "loss": 0.6184, "step": 389 }, { "epoch": 0.18966565349544073, "grad_norm": 0.07381968422799055, "learning_rate": 9.949896825320276e-06, "loss": 0.5961, "step": 390 }, { "epoch": 0.19015197568389058, "grad_norm": 0.0774285794317031, "learning_rate": 9.949626087653026e-06, "loss": 0.6119, "step": 391 }, { "epoch": 0.19063829787234043, "grad_norm": 0.07533357959360332, "learning_rate": 9.94935462417932e-06, "loss": 0.6065, "step": 392 }, { "epoch": 0.19112462006079028, "grad_norm": 0.07592544655627194, "learning_rate": 9.949082434938959e-06, "loss": 0.6157, "step": 393 }, { "epoch": 0.19161094224924013, "grad_norm": 0.0773325236029274, "learning_rate": 9.948809519971861e-06, "loss": 0.6163, "step": 394 }, { "epoch": 0.19209726443768996, "grad_norm": 0.07457669854173453, "learning_rate": 9.948535879318044e-06, "loss": 0.619, "step": 395 }, { "epoch": 0.1925835866261398, "grad_norm": 0.07455267687335572, "learning_rate": 9.948261513017637e-06, "loss": 0.6436, "step": 396 }, { "epoch": 0.19306990881458966, "grad_norm": 0.07720189485645515, "learning_rate": 9.947986421110867e-06, "loss": 0.619, "step": 397 }, { "epoch": 0.1935562310030395, "grad_norm": 0.07787967933710195, "learning_rate": 9.947710603638078e-06, "loss": 0.6344, "step": 398 }, { "epoch": 0.19404255319148936, "grad_norm": 0.07666831965447489, "learning_rate": 9.947434060639714e-06, "loss": 0.6248, "step": 399 }, { "epoch": 0.1945288753799392, "grad_norm": 0.07360172238376061, "learning_rate": 9.947156792156325e-06, "loss": 0.5773, "step": 400 }, { "epoch": 0.19501519756838906, "grad_norm": 0.07847475862029958, "learning_rate": 9.946878798228573e-06, "loss": 0.6313, "step": 401 }, { "epoch": 0.1955015197568389, "grad_norm": 0.07431338152085773, "learning_rate": 9.94660007889722e-06, "loss": 0.6185, "step": 402 }, { "epoch": 0.19598784194528876, "grad_norm": 0.07909828376252435, "learning_rate": 9.946320634203139e-06, "loss": 0.6773, "step": 403 }, { "epoch": 0.1964741641337386, "grad_norm": 0.07695983804208197, "learning_rate": 9.946040464187305e-06, "loss": 0.5862, "step": 404 }, { "epoch": 0.19696048632218846, "grad_norm": 0.07552860793015936, "learning_rate": 9.945759568890804e-06, "loss": 0.6257, "step": 405 }, { "epoch": 0.1974468085106383, "grad_norm": 0.07263370651060398, "learning_rate": 9.945477948354825e-06, "loss": 0.5863, "step": 406 }, { "epoch": 0.19793313069908813, "grad_norm": 0.07281159174481305, "learning_rate": 9.945195602620663e-06, "loss": 0.6087, "step": 407 }, { "epoch": 0.19841945288753798, "grad_norm": 0.0736120933522763, "learning_rate": 9.944912531729723e-06, "loss": 0.6471, "step": 408 }, { "epoch": 0.19890577507598783, "grad_norm": 0.07740214011476758, "learning_rate": 9.944628735723514e-06, "loss": 0.6542, "step": 409 }, { "epoch": 0.19939209726443768, "grad_norm": 0.10666995775093237, "learning_rate": 9.94434421464365e-06, "loss": 0.6314, "step": 410 }, { "epoch": 0.19987841945288753, "grad_norm": 0.0772072886483773, "learning_rate": 9.944058968531855e-06, "loss": 0.6079, "step": 411 }, { "epoch": 0.20036474164133738, "grad_norm": 0.07130391118148917, "learning_rate": 9.943772997429955e-06, "loss": 0.5873, "step": 412 }, { "epoch": 0.20085106382978724, "grad_norm": 0.07450831706219276, "learning_rate": 9.943486301379885e-06, "loss": 0.6337, "step": 413 }, { "epoch": 0.20133738601823709, "grad_norm": 0.07702137581975181, "learning_rate": 9.943198880423685e-06, "loss": 0.6352, "step": 414 }, { "epoch": 0.20182370820668694, "grad_norm": 0.0758668538802447, "learning_rate": 9.942910734603505e-06, "loss": 0.6167, "step": 415 }, { "epoch": 0.2023100303951368, "grad_norm": 0.0756601791447037, "learning_rate": 9.942621863961595e-06, "loss": 0.642, "step": 416 }, { "epoch": 0.20279635258358664, "grad_norm": 0.0754402938917895, "learning_rate": 9.942332268540316e-06, "loss": 0.6528, "step": 417 }, { "epoch": 0.2032826747720365, "grad_norm": 0.07689886002839368, "learning_rate": 9.942041948382133e-06, "loss": 0.6201, "step": 418 }, { "epoch": 0.2037689969604863, "grad_norm": 0.07646160106063425, "learning_rate": 9.94175090352962e-06, "loss": 0.6412, "step": 419 }, { "epoch": 0.20425531914893616, "grad_norm": 0.07369601649884883, "learning_rate": 9.941459134025455e-06, "loss": 0.6255, "step": 420 }, { "epoch": 0.204741641337386, "grad_norm": 0.0849247857802085, "learning_rate": 9.94116663991242e-06, "loss": 0.5797, "step": 421 }, { "epoch": 0.20522796352583586, "grad_norm": 0.07169202414610462, "learning_rate": 9.94087342123341e-06, "loss": 0.5798, "step": 422 }, { "epoch": 0.2057142857142857, "grad_norm": 0.07568098279764568, "learning_rate": 9.940579478031418e-06, "loss": 0.6395, "step": 423 }, { "epoch": 0.20620060790273556, "grad_norm": 0.07518919387235251, "learning_rate": 9.94028481034955e-06, "loss": 0.594, "step": 424 }, { "epoch": 0.2066869300911854, "grad_norm": 0.07397919814811302, "learning_rate": 9.939989418231015e-06, "loss": 0.6176, "step": 425 }, { "epoch": 0.20717325227963526, "grad_norm": 0.07639873863361582, "learning_rate": 9.939693301719131e-06, "loss": 0.6406, "step": 426 }, { "epoch": 0.2076595744680851, "grad_norm": 0.0782517394883419, "learning_rate": 9.939396460857317e-06, "loss": 0.6164, "step": 427 }, { "epoch": 0.20814589665653496, "grad_norm": 0.07326346144393303, "learning_rate": 9.939098895689104e-06, "loss": 0.615, "step": 428 }, { "epoch": 0.2086322188449848, "grad_norm": 0.07073338269850502, "learning_rate": 9.938800606258122e-06, "loss": 0.5874, "step": 429 }, { "epoch": 0.20911854103343466, "grad_norm": 0.07994201273381059, "learning_rate": 9.938501592608117e-06, "loss": 0.6279, "step": 430 }, { "epoch": 0.20960486322188449, "grad_norm": 0.07591241484669652, "learning_rate": 9.938201854782935e-06, "loss": 0.6592, "step": 431 }, { "epoch": 0.21009118541033434, "grad_norm": 0.07391522831462577, "learning_rate": 9.937901392826525e-06, "loss": 0.6484, "step": 432 }, { "epoch": 0.21057750759878419, "grad_norm": 0.07396763630117018, "learning_rate": 9.937600206782951e-06, "loss": 0.5993, "step": 433 }, { "epoch": 0.21106382978723404, "grad_norm": 0.07971136014753553, "learning_rate": 9.937298296696377e-06, "loss": 0.6065, "step": 434 }, { "epoch": 0.2115501519756839, "grad_norm": 0.07469907654962667, "learning_rate": 9.936995662611074e-06, "loss": 0.6189, "step": 435 }, { "epoch": 0.21203647416413374, "grad_norm": 0.07345585801523219, "learning_rate": 9.93669230457142e-06, "loss": 0.5837, "step": 436 }, { "epoch": 0.2125227963525836, "grad_norm": 0.07446872719945817, "learning_rate": 9.9363882226219e-06, "loss": 0.6417, "step": 437 }, { "epoch": 0.21300911854103344, "grad_norm": 0.07651414373055855, "learning_rate": 9.936083416807103e-06, "loss": 0.6093, "step": 438 }, { "epoch": 0.2134954407294833, "grad_norm": 0.0756813213374065, "learning_rate": 9.935777887171727e-06, "loss": 0.6256, "step": 439 }, { "epoch": 0.21398176291793314, "grad_norm": 0.07227490250178426, "learning_rate": 9.935471633760572e-06, "loss": 0.631, "step": 440 }, { "epoch": 0.214468085106383, "grad_norm": 0.07389776343485273, "learning_rate": 9.93516465661855e-06, "loss": 0.631, "step": 441 }, { "epoch": 0.21495440729483284, "grad_norm": 0.07734100943928443, "learning_rate": 9.934856955790672e-06, "loss": 0.6372, "step": 442 }, { "epoch": 0.21544072948328266, "grad_norm": 0.07752921853682646, "learning_rate": 9.934548531322061e-06, "loss": 0.5906, "step": 443 }, { "epoch": 0.2159270516717325, "grad_norm": 0.07817168205190837, "learning_rate": 9.934239383257942e-06, "loss": 0.6299, "step": 444 }, { "epoch": 0.21641337386018236, "grad_norm": 0.07389085118763335, "learning_rate": 9.933929511643651e-06, "loss": 0.5723, "step": 445 }, { "epoch": 0.2168996960486322, "grad_norm": 0.08895902667132614, "learning_rate": 9.933618916524625e-06, "loss": 0.6295, "step": 446 }, { "epoch": 0.21738601823708206, "grad_norm": 0.07838298322165756, "learning_rate": 9.93330759794641e-06, "loss": 0.6487, "step": 447 }, { "epoch": 0.2178723404255319, "grad_norm": 0.07474744779378172, "learning_rate": 9.932995555954657e-06, "loss": 0.6241, "step": 448 }, { "epoch": 0.21835866261398176, "grad_norm": 0.07604580436929075, "learning_rate": 9.932682790595123e-06, "loss": 0.6357, "step": 449 }, { "epoch": 0.2188449848024316, "grad_norm": 0.0733846681325185, "learning_rate": 9.932369301913673e-06, "loss": 0.6372, "step": 450 }, { "epoch": 0.21933130699088146, "grad_norm": 0.07248628370045519, "learning_rate": 9.932055089956276e-06, "loss": 0.6373, "step": 451 }, { "epoch": 0.21981762917933131, "grad_norm": 0.07143990946931077, "learning_rate": 9.931740154769008e-06, "loss": 0.6073, "step": 452 }, { "epoch": 0.22030395136778116, "grad_norm": 0.07684842487595032, "learning_rate": 9.931424496398048e-06, "loss": 0.6538, "step": 453 }, { "epoch": 0.22079027355623101, "grad_norm": 0.07721539704589185, "learning_rate": 9.931108114889685e-06, "loss": 0.649, "step": 454 }, { "epoch": 0.22127659574468084, "grad_norm": 0.07074242481837646, "learning_rate": 9.930791010290316e-06, "loss": 0.5966, "step": 455 }, { "epoch": 0.2217629179331307, "grad_norm": 0.07372620377435624, "learning_rate": 9.930473182646436e-06, "loss": 0.6437, "step": 456 }, { "epoch": 0.22224924012158054, "grad_norm": 0.0743048136038808, "learning_rate": 9.930154632004654e-06, "loss": 0.6296, "step": 457 }, { "epoch": 0.2227355623100304, "grad_norm": 0.07792617987197321, "learning_rate": 9.929835358411682e-06, "loss": 0.6355, "step": 458 }, { "epoch": 0.22322188449848024, "grad_norm": 0.0708828431490397, "learning_rate": 9.929515361914335e-06, "loss": 0.6078, "step": 459 }, { "epoch": 0.2237082066869301, "grad_norm": 0.07737049787688959, "learning_rate": 9.929194642559538e-06, "loss": 0.6528, "step": 460 }, { "epoch": 0.22419452887537994, "grad_norm": 0.07349583464643533, "learning_rate": 9.928873200394323e-06, "loss": 0.6118, "step": 461 }, { "epoch": 0.2246808510638298, "grad_norm": 0.11868280258548222, "learning_rate": 9.928551035465823e-06, "loss": 0.6588, "step": 462 }, { "epoch": 0.22516717325227964, "grad_norm": 0.07721198493465202, "learning_rate": 9.928228147821282e-06, "loss": 0.6343, "step": 463 }, { "epoch": 0.2256534954407295, "grad_norm": 0.07615577084672769, "learning_rate": 9.927904537508046e-06, "loss": 0.6219, "step": 464 }, { "epoch": 0.22613981762917934, "grad_norm": 0.071620193767772, "learning_rate": 9.927580204573571e-06, "loss": 0.5889, "step": 465 }, { "epoch": 0.2266261398176292, "grad_norm": 0.07235223427194966, "learning_rate": 9.927255149065413e-06, "loss": 0.6038, "step": 466 }, { "epoch": 0.22711246200607904, "grad_norm": 0.07742966226846906, "learning_rate": 9.926929371031242e-06, "loss": 0.6021, "step": 467 }, { "epoch": 0.22759878419452886, "grad_norm": 0.08084185006551577, "learning_rate": 9.926602870518826e-06, "loss": 0.6356, "step": 468 }, { "epoch": 0.22808510638297871, "grad_norm": 0.07687582330497636, "learning_rate": 9.926275647576046e-06, "loss": 0.6036, "step": 469 }, { "epoch": 0.22857142857142856, "grad_norm": 0.07280657764875939, "learning_rate": 9.925947702250884e-06, "loss": 0.5918, "step": 470 }, { "epoch": 0.22905775075987841, "grad_norm": 0.07516592158582709, "learning_rate": 9.925619034591429e-06, "loss": 0.6178, "step": 471 }, { "epoch": 0.22954407294832826, "grad_norm": 0.07421193750711921, "learning_rate": 9.925289644645876e-06, "loss": 0.6076, "step": 472 }, { "epoch": 0.23003039513677812, "grad_norm": 0.07901459575606913, "learning_rate": 9.924959532462527e-06, "loss": 0.6179, "step": 473 }, { "epoch": 0.23051671732522797, "grad_norm": 0.07984168188458464, "learning_rate": 9.92462869808979e-06, "loss": 0.6374, "step": 474 }, { "epoch": 0.23100303951367782, "grad_norm": 0.07129044878012378, "learning_rate": 9.924297141576176e-06, "loss": 0.5536, "step": 475 }, { "epoch": 0.23148936170212767, "grad_norm": 0.07598139936236821, "learning_rate": 9.923964862970306e-06, "loss": 0.5784, "step": 476 }, { "epoch": 0.23197568389057752, "grad_norm": 0.07613946279554822, "learning_rate": 9.923631862320907e-06, "loss": 0.5868, "step": 477 }, { "epoch": 0.23246200607902737, "grad_norm": 0.07115161589929749, "learning_rate": 9.923298139676802e-06, "loss": 0.6012, "step": 478 }, { "epoch": 0.23294832826747722, "grad_norm": 0.0791874289284463, "learning_rate": 9.922963695086936e-06, "loss": 0.6136, "step": 479 }, { "epoch": 0.23343465045592704, "grad_norm": 0.07507642632721559, "learning_rate": 9.922628528600347e-06, "loss": 0.6279, "step": 480 }, { "epoch": 0.2339209726443769, "grad_norm": 0.07748937754994555, "learning_rate": 9.922292640266184e-06, "loss": 0.6444, "step": 481 }, { "epoch": 0.23440729483282674, "grad_norm": 0.07759085664135958, "learning_rate": 9.9219560301337e-06, "loss": 0.6458, "step": 482 }, { "epoch": 0.2348936170212766, "grad_norm": 0.07377761390411647, "learning_rate": 9.92161869825226e-06, "loss": 0.6138, "step": 483 }, { "epoch": 0.23537993920972644, "grad_norm": 0.07420455297957712, "learning_rate": 9.921280644671324e-06, "loss": 0.6211, "step": 484 }, { "epoch": 0.2358662613981763, "grad_norm": 0.07991178658536623, "learning_rate": 9.92094186944047e-06, "loss": 0.6124, "step": 485 }, { "epoch": 0.23635258358662614, "grad_norm": 0.07470449885186195, "learning_rate": 9.92060237260937e-06, "loss": 0.6157, "step": 486 }, { "epoch": 0.236838905775076, "grad_norm": 0.07626616445129225, "learning_rate": 9.920262154227806e-06, "loss": 0.5957, "step": 487 }, { "epoch": 0.23732522796352584, "grad_norm": 0.07678478362923138, "learning_rate": 9.919921214345674e-06, "loss": 0.587, "step": 488 }, { "epoch": 0.2378115501519757, "grad_norm": 0.07462321190667279, "learning_rate": 9.919579553012964e-06, "loss": 0.6327, "step": 489 }, { "epoch": 0.23829787234042554, "grad_norm": 0.07309292679627498, "learning_rate": 9.919237170279778e-06, "loss": 0.5835, "step": 490 }, { "epoch": 0.2387841945288754, "grad_norm": 0.07457806463730678, "learning_rate": 9.918894066196322e-06, "loss": 0.611, "step": 491 }, { "epoch": 0.23927051671732522, "grad_norm": 0.07576722141103294, "learning_rate": 9.918550240812912e-06, "loss": 0.6072, "step": 492 }, { "epoch": 0.23975683890577507, "grad_norm": 0.07661148430761854, "learning_rate": 9.918205694179961e-06, "loss": 0.6413, "step": 493 }, { "epoch": 0.24024316109422492, "grad_norm": 0.07283782084317245, "learning_rate": 9.917860426347994e-06, "loss": 0.6072, "step": 494 }, { "epoch": 0.24072948328267477, "grad_norm": 0.07429240232374179, "learning_rate": 9.917514437367644e-06, "loss": 0.641, "step": 495 }, { "epoch": 0.24121580547112462, "grad_norm": 0.07120995408021623, "learning_rate": 9.917167727289641e-06, "loss": 0.5972, "step": 496 }, { "epoch": 0.24170212765957447, "grad_norm": 0.07517749617028277, "learning_rate": 9.91682029616483e-06, "loss": 0.6137, "step": 497 }, { "epoch": 0.24218844984802432, "grad_norm": 0.07923127533130168, "learning_rate": 9.916472144044157e-06, "loss": 0.644, "step": 498 }, { "epoch": 0.24267477203647417, "grad_norm": 0.07387849925847506, "learning_rate": 9.916123270978673e-06, "loss": 0.6565, "step": 499 }, { "epoch": 0.24316109422492402, "grad_norm": 0.07292179430812065, "learning_rate": 9.91577367701954e-06, "loss": 0.611, "step": 500 }, { "epoch": 0.24364741641337387, "grad_norm": 0.0772507722278035, "learning_rate": 9.915423362218017e-06, "loss": 0.6446, "step": 501 }, { "epoch": 0.24413373860182372, "grad_norm": 0.07710857117115859, "learning_rate": 9.915072326625479e-06, "loss": 0.6379, "step": 502 }, { "epoch": 0.24462006079027357, "grad_norm": 0.07285698280642044, "learning_rate": 9.914720570293397e-06, "loss": 0.6026, "step": 503 }, { "epoch": 0.2451063829787234, "grad_norm": 0.0733862563471539, "learning_rate": 9.914368093273354e-06, "loss": 0.6331, "step": 504 }, { "epoch": 0.24559270516717324, "grad_norm": 0.07420637988829044, "learning_rate": 9.914014895617036e-06, "loss": 0.5941, "step": 505 }, { "epoch": 0.2460790273556231, "grad_norm": 0.07923092784020519, "learning_rate": 9.913660977376236e-06, "loss": 0.6114, "step": 506 }, { "epoch": 0.24656534954407294, "grad_norm": 0.07548113044308191, "learning_rate": 9.913306338602852e-06, "loss": 0.6332, "step": 507 }, { "epoch": 0.2470516717325228, "grad_norm": 0.07568186968690875, "learning_rate": 9.912950979348889e-06, "loss": 0.6095, "step": 508 }, { "epoch": 0.24753799392097264, "grad_norm": 0.073329676390455, "learning_rate": 9.912594899666454e-06, "loss": 0.5964, "step": 509 }, { "epoch": 0.2480243161094225, "grad_norm": 0.07530844381021684, "learning_rate": 9.912238099607763e-06, "loss": 0.6348, "step": 510 }, { "epoch": 0.24851063829787234, "grad_norm": 0.0735368360110718, "learning_rate": 9.911880579225137e-06, "loss": 0.6123, "step": 511 }, { "epoch": 0.2489969604863222, "grad_norm": 0.07401497675772126, "learning_rate": 9.911522338571002e-06, "loss": 0.6319, "step": 512 }, { "epoch": 0.24948328267477204, "grad_norm": 0.07598525902317663, "learning_rate": 9.911163377697891e-06, "loss": 0.5908, "step": 513 }, { "epoch": 0.2499696048632219, "grad_norm": 0.07988537951661231, "learning_rate": 9.91080369665844e-06, "loss": 0.6227, "step": 514 }, { "epoch": 0.2499696048632219, "eval_loss": 0.6214485168457031, "eval_runtime": 105.2182, "eval_samples_per_second": 288.477, "eval_steps_per_second": 36.068, "step": 514 }, { "epoch": 0.25045592705167175, "grad_norm": 0.07589060902724236, "learning_rate": 9.910443295505392e-06, "loss": 0.6145, "step": 515 }, { "epoch": 0.2509422492401216, "grad_norm": 0.07288991412090616, "learning_rate": 9.910082174291597e-06, "loss": 0.6072, "step": 516 }, { "epoch": 0.25142857142857145, "grad_norm": 0.07414715332393881, "learning_rate": 9.90972033307001e-06, "loss": 0.6459, "step": 517 }, { "epoch": 0.2519148936170213, "grad_norm": 0.0761473577886796, "learning_rate": 9.909357771893689e-06, "loss": 0.6707, "step": 518 }, { "epoch": 0.25240121580547115, "grad_norm": 0.08455929630962569, "learning_rate": 9.908994490815799e-06, "loss": 0.6258, "step": 519 }, { "epoch": 0.252887537993921, "grad_norm": 0.07407118772256081, "learning_rate": 9.908630489889615e-06, "loss": 0.5906, "step": 520 }, { "epoch": 0.25337386018237085, "grad_norm": 0.07521990216742436, "learning_rate": 9.908265769168507e-06, "loss": 0.5908, "step": 521 }, { "epoch": 0.25386018237082064, "grad_norm": 0.07679672666813263, "learning_rate": 9.907900328705965e-06, "loss": 0.5779, "step": 522 }, { "epoch": 0.2543465045592705, "grad_norm": 0.07671000448785356, "learning_rate": 9.90753416855557e-06, "loss": 0.6177, "step": 523 }, { "epoch": 0.25483282674772034, "grad_norm": 0.07626573478005942, "learning_rate": 9.90716728877102e-06, "loss": 0.6124, "step": 524 }, { "epoch": 0.2553191489361702, "grad_norm": 0.07958550152849972, "learning_rate": 9.90679968940611e-06, "loss": 0.6455, "step": 525 }, { "epoch": 0.25580547112462004, "grad_norm": 0.07869148910618602, "learning_rate": 9.906431370514746e-06, "loss": 0.6503, "step": 526 }, { "epoch": 0.2562917933130699, "grad_norm": 0.07442998191091328, "learning_rate": 9.906062332150939e-06, "loss": 0.6244, "step": 527 }, { "epoch": 0.25677811550151974, "grad_norm": 0.07322069741610936, "learning_rate": 9.905692574368802e-06, "loss": 0.6143, "step": 528 }, { "epoch": 0.2572644376899696, "grad_norm": 0.07243230323425069, "learning_rate": 9.905322097222557e-06, "loss": 0.577, "step": 529 }, { "epoch": 0.25775075987841944, "grad_norm": 0.07392644886548709, "learning_rate": 9.90495090076653e-06, "loss": 0.6173, "step": 530 }, { "epoch": 0.2582370820668693, "grad_norm": 0.07326043331226169, "learning_rate": 9.904578985055151e-06, "loss": 0.593, "step": 531 }, { "epoch": 0.25872340425531914, "grad_norm": 0.0761866844421115, "learning_rate": 9.904206350142962e-06, "loss": 0.6146, "step": 532 }, { "epoch": 0.259209726443769, "grad_norm": 0.07466453648142758, "learning_rate": 9.9038329960846e-06, "loss": 0.615, "step": 533 }, { "epoch": 0.25969604863221885, "grad_norm": 0.07758069788648553, "learning_rate": 9.903458922934819e-06, "loss": 0.6165, "step": 534 }, { "epoch": 0.2601823708206687, "grad_norm": 0.08130805265534356, "learning_rate": 9.903084130748468e-06, "loss": 0.6514, "step": 535 }, { "epoch": 0.26066869300911855, "grad_norm": 0.07513827656136587, "learning_rate": 9.902708619580507e-06, "loss": 0.6419, "step": 536 }, { "epoch": 0.2611550151975684, "grad_norm": 0.07554053453656473, "learning_rate": 9.902332389486001e-06, "loss": 0.5757, "step": 537 }, { "epoch": 0.26164133738601825, "grad_norm": 0.07682878866688235, "learning_rate": 9.901955440520121e-06, "loss": 0.6296, "step": 538 }, { "epoch": 0.2621276595744681, "grad_norm": 0.08162414706107891, "learning_rate": 9.90157777273814e-06, "loss": 0.6256, "step": 539 }, { "epoch": 0.26261398176291795, "grad_norm": 0.07775975078261581, "learning_rate": 9.90119938619544e-06, "loss": 0.61, "step": 540 }, { "epoch": 0.2631003039513678, "grad_norm": 0.07824306209995144, "learning_rate": 9.900820280947505e-06, "loss": 0.5721, "step": 541 }, { "epoch": 0.26358662613981765, "grad_norm": 0.07368803248818379, "learning_rate": 9.90044045704993e-06, "loss": 0.6295, "step": 542 }, { "epoch": 0.2640729483282675, "grad_norm": 0.07725198361916966, "learning_rate": 9.90005991455841e-06, "loss": 0.6373, "step": 543 }, { "epoch": 0.26455927051671735, "grad_norm": 0.07847348883834761, "learning_rate": 9.899678653528747e-06, "loss": 0.6027, "step": 544 }, { "epoch": 0.2650455927051672, "grad_norm": 0.07229624668472394, "learning_rate": 9.89929667401685e-06, "loss": 0.611, "step": 545 }, { "epoch": 0.265531914893617, "grad_norm": 0.08032354201405056, "learning_rate": 9.89891397607873e-06, "loss": 0.6233, "step": 546 }, { "epoch": 0.26601823708206684, "grad_norm": 0.07699928199935681, "learning_rate": 9.898530559770508e-06, "loss": 0.6367, "step": 547 }, { "epoch": 0.2665045592705167, "grad_norm": 0.07244592165765863, "learning_rate": 9.898146425148403e-06, "loss": 0.6087, "step": 548 }, { "epoch": 0.26699088145896654, "grad_norm": 0.07303833129817308, "learning_rate": 9.897761572268748e-06, "loss": 0.6248, "step": 549 }, { "epoch": 0.2674772036474164, "grad_norm": 0.07648930853409155, "learning_rate": 9.897376001187978e-06, "loss": 0.6348, "step": 550 }, { "epoch": 0.26796352583586625, "grad_norm": 0.07250421486915064, "learning_rate": 9.896989711962627e-06, "loss": 0.5822, "step": 551 }, { "epoch": 0.2684498480243161, "grad_norm": 0.07669573361463654, "learning_rate": 9.896602704649348e-06, "loss": 0.6578, "step": 552 }, { "epoch": 0.26893617021276595, "grad_norm": 0.0803366526048726, "learning_rate": 9.896214979304884e-06, "loss": 0.6492, "step": 553 }, { "epoch": 0.2694224924012158, "grad_norm": 0.07700448398210802, "learning_rate": 9.895826535986095e-06, "loss": 0.607, "step": 554 }, { "epoch": 0.26990881458966565, "grad_norm": 0.0776266991713559, "learning_rate": 9.89543737474994e-06, "loss": 0.5945, "step": 555 }, { "epoch": 0.2703951367781155, "grad_norm": 0.07522103980996976, "learning_rate": 9.895047495653485e-06, "loss": 0.6153, "step": 556 }, { "epoch": 0.27088145896656535, "grad_norm": 0.07743620531989164, "learning_rate": 9.894656898753902e-06, "loss": 0.574, "step": 557 }, { "epoch": 0.2713677811550152, "grad_norm": 0.07522427661033675, "learning_rate": 9.894265584108466e-06, "loss": 0.616, "step": 558 }, { "epoch": 0.27185410334346505, "grad_norm": 0.13632008039363205, "learning_rate": 9.893873551774561e-06, "loss": 0.6109, "step": 559 }, { "epoch": 0.2723404255319149, "grad_norm": 0.08010155018404212, "learning_rate": 9.893480801809675e-06, "loss": 0.6318, "step": 560 }, { "epoch": 0.27282674772036475, "grad_norm": 0.0749363948780222, "learning_rate": 9.893087334271398e-06, "loss": 0.5767, "step": 561 }, { "epoch": 0.2733130699088146, "grad_norm": 0.07482152043920853, "learning_rate": 9.892693149217427e-06, "loss": 0.6156, "step": 562 }, { "epoch": 0.27379939209726445, "grad_norm": 0.07919591176340275, "learning_rate": 9.892298246705566e-06, "loss": 0.6151, "step": 563 }, { "epoch": 0.2742857142857143, "grad_norm": 0.07795060723844054, "learning_rate": 9.891902626793723e-06, "loss": 0.609, "step": 564 }, { "epoch": 0.27477203647416415, "grad_norm": 0.07210354659172886, "learning_rate": 9.891506289539912e-06, "loss": 0.6128, "step": 565 }, { "epoch": 0.275258358662614, "grad_norm": 0.07715300121517131, "learning_rate": 9.891109235002248e-06, "loss": 0.646, "step": 566 }, { "epoch": 0.27574468085106385, "grad_norm": 0.07794141151981211, "learning_rate": 9.89071146323896e-06, "loss": 0.6232, "step": 567 }, { "epoch": 0.2762310030395137, "grad_norm": 0.081645702794136, "learning_rate": 9.89031297430837e-06, "loss": 0.6232, "step": 568 }, { "epoch": 0.27671732522796355, "grad_norm": 0.08178305842758182, "learning_rate": 9.889913768268918e-06, "loss": 0.6722, "step": 569 }, { "epoch": 0.2772036474164134, "grad_norm": 0.07605924407860792, "learning_rate": 9.88951384517914e-06, "loss": 0.6155, "step": 570 }, { "epoch": 0.2776899696048632, "grad_norm": 0.07480987856310256, "learning_rate": 9.889113205097682e-06, "loss": 0.5984, "step": 571 }, { "epoch": 0.27817629179331305, "grad_norm": 0.07844896251101492, "learning_rate": 9.88871184808329e-06, "loss": 0.5923, "step": 572 }, { "epoch": 0.2786626139817629, "grad_norm": 0.0763243386187881, "learning_rate": 9.888309774194822e-06, "loss": 0.6339, "step": 573 }, { "epoch": 0.27914893617021275, "grad_norm": 0.07146612633011887, "learning_rate": 9.887906983491236e-06, "loss": 0.5898, "step": 574 }, { "epoch": 0.2796352583586626, "grad_norm": 0.07463219701665877, "learning_rate": 9.887503476031594e-06, "loss": 0.6014, "step": 575 }, { "epoch": 0.28012158054711245, "grad_norm": 0.0774337690825949, "learning_rate": 9.887099251875072e-06, "loss": 0.6336, "step": 576 }, { "epoch": 0.2806079027355623, "grad_norm": 0.07622175835329276, "learning_rate": 9.88669431108094e-06, "loss": 0.5896, "step": 577 }, { "epoch": 0.28109422492401215, "grad_norm": 0.07417235299022149, "learning_rate": 9.886288653708578e-06, "loss": 0.5898, "step": 578 }, { "epoch": 0.281580547112462, "grad_norm": 0.0750556864122582, "learning_rate": 9.885882279817473e-06, "loss": 0.5994, "step": 579 }, { "epoch": 0.28206686930091185, "grad_norm": 0.07259439228436972, "learning_rate": 9.885475189467217e-06, "loss": 0.6015, "step": 580 }, { "epoch": 0.2825531914893617, "grad_norm": 0.08096810389461838, "learning_rate": 9.885067382717501e-06, "loss": 0.6303, "step": 581 }, { "epoch": 0.28303951367781155, "grad_norm": 0.07656747826915329, "learning_rate": 9.884658859628126e-06, "loss": 0.6235, "step": 582 }, { "epoch": 0.2835258358662614, "grad_norm": 0.07407093804321452, "learning_rate": 9.884249620259e-06, "loss": 0.6306, "step": 583 }, { "epoch": 0.28401215805471125, "grad_norm": 0.07678458194671911, "learning_rate": 9.88383966467013e-06, "loss": 0.6341, "step": 584 }, { "epoch": 0.2844984802431611, "grad_norm": 0.07517273995768671, "learning_rate": 9.883428992921634e-06, "loss": 0.6475, "step": 585 }, { "epoch": 0.28498480243161095, "grad_norm": 0.07516108205639849, "learning_rate": 9.88301760507373e-06, "loss": 0.6351, "step": 586 }, { "epoch": 0.2854711246200608, "grad_norm": 0.0741107173160987, "learning_rate": 9.882605501186747e-06, "loss": 0.6257, "step": 587 }, { "epoch": 0.28595744680851065, "grad_norm": 0.07873028174145927, "learning_rate": 9.88219268132111e-06, "loss": 0.5696, "step": 588 }, { "epoch": 0.2864437689969605, "grad_norm": 0.07537660815824522, "learning_rate": 9.881779145537359e-06, "loss": 0.5996, "step": 589 }, { "epoch": 0.28693009118541035, "grad_norm": 0.07513490085084712, "learning_rate": 9.88136489389613e-06, "loss": 0.6275, "step": 590 }, { "epoch": 0.2874164133738602, "grad_norm": 0.08066569161946668, "learning_rate": 9.880949926458174e-06, "loss": 0.6684, "step": 591 }, { "epoch": 0.28790273556231005, "grad_norm": 0.07709164494735121, "learning_rate": 9.880534243284338e-06, "loss": 0.6149, "step": 592 }, { "epoch": 0.2883890577507599, "grad_norm": 0.07594899490206673, "learning_rate": 9.880117844435575e-06, "loss": 0.6177, "step": 593 }, { "epoch": 0.28887537993920975, "grad_norm": 0.07299231382190172, "learning_rate": 9.87970072997295e-06, "loss": 0.6134, "step": 594 }, { "epoch": 0.28936170212765955, "grad_norm": 0.07763823662793731, "learning_rate": 9.879282899957625e-06, "loss": 0.5842, "step": 595 }, { "epoch": 0.2898480243161094, "grad_norm": 0.0766311164247948, "learning_rate": 9.87886435445087e-06, "loss": 0.6314, "step": 596 }, { "epoch": 0.29033434650455925, "grad_norm": 0.07984107461223952, "learning_rate": 9.87844509351406e-06, "loss": 0.6881, "step": 597 }, { "epoch": 0.2908206686930091, "grad_norm": 0.07558194427065122, "learning_rate": 9.878025117208676e-06, "loss": 0.6352, "step": 598 }, { "epoch": 0.29130699088145895, "grad_norm": 0.07728297115017416, "learning_rate": 9.877604425596303e-06, "loss": 0.6013, "step": 599 }, { "epoch": 0.2917933130699088, "grad_norm": 0.07840027810999112, "learning_rate": 9.87718301873863e-06, "loss": 0.5728, "step": 600 }, { "epoch": 0.29227963525835865, "grad_norm": 0.07722122568644645, "learning_rate": 9.87676089669745e-06, "loss": 0.6368, "step": 601 }, { "epoch": 0.2927659574468085, "grad_norm": 0.07632554633533033, "learning_rate": 9.876338059534664e-06, "loss": 0.607, "step": 602 }, { "epoch": 0.29325227963525835, "grad_norm": 0.07529935937597487, "learning_rate": 9.875914507312277e-06, "loss": 0.6576, "step": 603 }, { "epoch": 0.2937386018237082, "grad_norm": 0.07460223062666418, "learning_rate": 9.875490240092397e-06, "loss": 0.5874, "step": 604 }, { "epoch": 0.29422492401215805, "grad_norm": 0.07879593078852981, "learning_rate": 9.875065257937237e-06, "loss": 0.5934, "step": 605 }, { "epoch": 0.2947112462006079, "grad_norm": 0.0715437718862713, "learning_rate": 9.874639560909118e-06, "loss": 0.5779, "step": 606 }, { "epoch": 0.29519756838905775, "grad_norm": 0.07273058756189188, "learning_rate": 9.874213149070463e-06, "loss": 0.6027, "step": 607 }, { "epoch": 0.2956838905775076, "grad_norm": 0.07662064449090256, "learning_rate": 9.8737860224838e-06, "loss": 0.593, "step": 608 }, { "epoch": 0.29617021276595745, "grad_norm": 0.07741820641354102, "learning_rate": 9.873358181211762e-06, "loss": 0.6126, "step": 609 }, { "epoch": 0.2966565349544073, "grad_norm": 0.07765359047523991, "learning_rate": 9.872929625317087e-06, "loss": 0.6162, "step": 610 }, { "epoch": 0.29714285714285715, "grad_norm": 0.0833328990770357, "learning_rate": 9.872500354862618e-06, "loss": 0.6631, "step": 611 }, { "epoch": 0.297629179331307, "grad_norm": 0.07808821812865127, "learning_rate": 9.872070369911304e-06, "loss": 0.6232, "step": 612 }, { "epoch": 0.29811550151975685, "grad_norm": 0.07697593909903068, "learning_rate": 9.871639670526194e-06, "loss": 0.6565, "step": 613 }, { "epoch": 0.2986018237082067, "grad_norm": 0.07370264019066085, "learning_rate": 9.87120825677045e-06, "loss": 0.6321, "step": 614 }, { "epoch": 0.29908814589665655, "grad_norm": 0.0727886652645723, "learning_rate": 9.87077612870733e-06, "loss": 0.6015, "step": 615 }, { "epoch": 0.2995744680851064, "grad_norm": 0.07542069863314463, "learning_rate": 9.870343286400202e-06, "loss": 0.6391, "step": 616 }, { "epoch": 0.30006079027355625, "grad_norm": 0.07684140526385554, "learning_rate": 9.86990972991254e-06, "loss": 0.6068, "step": 617 }, { "epoch": 0.3005471124620061, "grad_norm": 0.07621891194931185, "learning_rate": 9.869475459307913e-06, "loss": 0.5929, "step": 618 }, { "epoch": 0.3010334346504559, "grad_norm": 0.07179766415423608, "learning_rate": 9.86904047465001e-06, "loss": 0.573, "step": 619 }, { "epoch": 0.30151975683890575, "grad_norm": 0.0792872907161523, "learning_rate": 9.868604776002612e-06, "loss": 0.6523, "step": 620 }, { "epoch": 0.3020060790273556, "grad_norm": 0.07495051908455809, "learning_rate": 9.86816836342961e-06, "loss": 0.6315, "step": 621 }, { "epoch": 0.30249240121580545, "grad_norm": 0.07820825446959971, "learning_rate": 9.867731236995e-06, "loss": 0.5941, "step": 622 }, { "epoch": 0.3029787234042553, "grad_norm": 0.07796673011558322, "learning_rate": 9.86729339676288e-06, "loss": 0.6144, "step": 623 }, { "epoch": 0.30346504559270515, "grad_norm": 0.07361026745986436, "learning_rate": 9.866854842797455e-06, "loss": 0.5673, "step": 624 }, { "epoch": 0.303951367781155, "grad_norm": 0.08289100643421991, "learning_rate": 9.866415575163036e-06, "loss": 0.6789, "step": 625 }, { "epoch": 0.30443768996960485, "grad_norm": 0.07471444334785458, "learning_rate": 9.865975593924032e-06, "loss": 0.6125, "step": 626 }, { "epoch": 0.3049240121580547, "grad_norm": 0.07592829255837753, "learning_rate": 9.865534899144966e-06, "loss": 0.5848, "step": 627 }, { "epoch": 0.30541033434650455, "grad_norm": 0.07500904571242861, "learning_rate": 9.865093490890457e-06, "loss": 0.5963, "step": 628 }, { "epoch": 0.3058966565349544, "grad_norm": 0.0789002146024155, "learning_rate": 9.864651369225236e-06, "loss": 0.6528, "step": 629 }, { "epoch": 0.30638297872340425, "grad_norm": 0.07392833124910866, "learning_rate": 9.864208534214132e-06, "loss": 0.6176, "step": 630 }, { "epoch": 0.3068693009118541, "grad_norm": 0.0766961339559058, "learning_rate": 9.863764985922083e-06, "loss": 0.6003, "step": 631 }, { "epoch": 0.30735562310030395, "grad_norm": 0.07549114504172938, "learning_rate": 9.863320724414134e-06, "loss": 0.5885, "step": 632 }, { "epoch": 0.3078419452887538, "grad_norm": 0.0771367870397442, "learning_rate": 9.862875749755425e-06, "loss": 0.629, "step": 633 }, { "epoch": 0.30832826747720365, "grad_norm": 0.07757058569579986, "learning_rate": 9.862430062011209e-06, "loss": 0.6253, "step": 634 }, { "epoch": 0.3088145896656535, "grad_norm": 0.07791342697788971, "learning_rate": 9.861983661246841e-06, "loss": 0.6214, "step": 635 }, { "epoch": 0.30930091185410336, "grad_norm": 0.07927633000808611, "learning_rate": 9.86153654752778e-06, "loss": 0.6678, "step": 636 }, { "epoch": 0.3097872340425532, "grad_norm": 0.07715047905975243, "learning_rate": 9.861088720919592e-06, "loss": 0.6275, "step": 637 }, { "epoch": 0.31027355623100306, "grad_norm": 0.07598115195008412, "learning_rate": 9.860640181487942e-06, "loss": 0.6028, "step": 638 }, { "epoch": 0.3107598784194529, "grad_norm": 0.07706302997100106, "learning_rate": 9.860190929298607e-06, "loss": 0.6185, "step": 639 }, { "epoch": 0.31124620060790276, "grad_norm": 0.07784783744635577, "learning_rate": 9.859740964417464e-06, "loss": 0.6024, "step": 640 }, { "epoch": 0.3117325227963526, "grad_norm": 0.07591354484928346, "learning_rate": 9.859290286910495e-06, "loss": 0.6042, "step": 641 }, { "epoch": 0.31221884498480246, "grad_norm": 0.07446271844355028, "learning_rate": 9.858838896843785e-06, "loss": 0.5915, "step": 642 }, { "epoch": 0.31270516717325225, "grad_norm": 0.07481824884425073, "learning_rate": 9.858386794283527e-06, "loss": 0.6225, "step": 643 }, { "epoch": 0.3131914893617021, "grad_norm": 0.07856629947318088, "learning_rate": 9.857933979296017e-06, "loss": 0.6073, "step": 644 }, { "epoch": 0.31367781155015195, "grad_norm": 0.07753303860073824, "learning_rate": 9.857480451947653e-06, "loss": 0.65, "step": 645 }, { "epoch": 0.3141641337386018, "grad_norm": 0.07819017808697808, "learning_rate": 9.857026212304942e-06, "loss": 0.6123, "step": 646 }, { "epoch": 0.31465045592705165, "grad_norm": 0.07495690460091109, "learning_rate": 9.856571260434492e-06, "loss": 0.6027, "step": 647 }, { "epoch": 0.3151367781155015, "grad_norm": 0.07460249544332014, "learning_rate": 9.856115596403016e-06, "loss": 0.586, "step": 648 }, { "epoch": 0.31562310030395135, "grad_norm": 0.07784276518518962, "learning_rate": 9.855659220277334e-06, "loss": 0.5718, "step": 649 }, { "epoch": 0.3161094224924012, "grad_norm": 0.07485943767475445, "learning_rate": 9.855202132124367e-06, "loss": 0.6109, "step": 650 }, { "epoch": 0.31659574468085105, "grad_norm": 0.08020793741394526, "learning_rate": 9.85474433201114e-06, "loss": 0.6094, "step": 651 }, { "epoch": 0.3170820668693009, "grad_norm": 0.0774710633397956, "learning_rate": 9.854285820004787e-06, "loss": 0.5885, "step": 652 }, { "epoch": 0.31756838905775076, "grad_norm": 0.08517051225467598, "learning_rate": 9.853826596172542e-06, "loss": 0.6504, "step": 653 }, { "epoch": 0.3180547112462006, "grad_norm": 0.07496279642075002, "learning_rate": 9.853366660581747e-06, "loss": 0.629, "step": 654 }, { "epoch": 0.31854103343465046, "grad_norm": 0.07733450380865818, "learning_rate": 9.852906013299844e-06, "loss": 0.6442, "step": 655 }, { "epoch": 0.3190273556231003, "grad_norm": 0.07960430891285124, "learning_rate": 9.852444654394381e-06, "loss": 0.5794, "step": 656 }, { "epoch": 0.31951367781155016, "grad_norm": 0.08247442292541365, "learning_rate": 9.851982583933015e-06, "loss": 0.6755, "step": 657 }, { "epoch": 0.32, "grad_norm": 0.07906928316752099, "learning_rate": 9.8515198019835e-06, "loss": 0.6016, "step": 658 }, { "epoch": 0.32048632218844986, "grad_norm": 0.07279415448761366, "learning_rate": 9.851056308613699e-06, "loss": 0.6201, "step": 659 }, { "epoch": 0.3209726443768997, "grad_norm": 0.07821441013586179, "learning_rate": 9.850592103891578e-06, "loss": 0.6593, "step": 660 }, { "epoch": 0.32145896656534956, "grad_norm": 0.07651420351781575, "learning_rate": 9.850127187885206e-06, "loss": 0.6042, "step": 661 }, { "epoch": 0.3219452887537994, "grad_norm": 0.07898612474588815, "learning_rate": 9.84966156066276e-06, "loss": 0.6313, "step": 662 }, { "epoch": 0.32243161094224926, "grad_norm": 0.07681441100755516, "learning_rate": 9.849195222292516e-06, "loss": 0.6382, "step": 663 }, { "epoch": 0.3229179331306991, "grad_norm": 0.08195586140848041, "learning_rate": 9.84872817284286e-06, "loss": 0.6158, "step": 664 }, { "epoch": 0.32340425531914896, "grad_norm": 0.07864083344934501, "learning_rate": 9.848260412382279e-06, "loss": 0.6507, "step": 665 }, { "epoch": 0.3238905775075988, "grad_norm": 0.07889338788677215, "learning_rate": 9.847791940979363e-06, "loss": 0.6016, "step": 666 }, { "epoch": 0.32437689969604866, "grad_norm": 0.0796539823708227, "learning_rate": 9.847322758702812e-06, "loss": 0.6211, "step": 667 }, { "epoch": 0.32486322188449845, "grad_norm": 0.07486164930445696, "learning_rate": 9.846852865621418e-06, "loss": 0.6195, "step": 668 }, { "epoch": 0.3253495440729483, "grad_norm": 0.07344230603374521, "learning_rate": 9.846382261804095e-06, "loss": 0.5837, "step": 669 }, { "epoch": 0.32583586626139815, "grad_norm": 0.0761890307252086, "learning_rate": 9.845910947319848e-06, "loss": 0.5642, "step": 670 }, { "epoch": 0.326322188449848, "grad_norm": 0.07368529390910186, "learning_rate": 9.845438922237787e-06, "loss": 0.6246, "step": 671 }, { "epoch": 0.32680851063829786, "grad_norm": 0.07474690893701941, "learning_rate": 9.844966186627134e-06, "loss": 0.6071, "step": 672 }, { "epoch": 0.3272948328267477, "grad_norm": 0.07488169201570247, "learning_rate": 9.844492740557206e-06, "loss": 0.5959, "step": 673 }, { "epoch": 0.32778115501519756, "grad_norm": 0.07734039064837983, "learning_rate": 9.84401858409743e-06, "loss": 0.6404, "step": 674 }, { "epoch": 0.3282674772036474, "grad_norm": 0.07457934442579399, "learning_rate": 9.843543717317338e-06, "loss": 0.571, "step": 675 }, { "epoch": 0.32875379939209726, "grad_norm": 0.07382180274743218, "learning_rate": 9.843068140286562e-06, "loss": 0.5834, "step": 676 }, { "epoch": 0.3292401215805471, "grad_norm": 0.07242396945111616, "learning_rate": 9.842591853074838e-06, "loss": 0.597, "step": 677 }, { "epoch": 0.32972644376899696, "grad_norm": 0.07702264661601613, "learning_rate": 9.842114855752013e-06, "loss": 0.6258, "step": 678 }, { "epoch": 0.3302127659574468, "grad_norm": 0.08038052018472745, "learning_rate": 9.841637148388028e-06, "loss": 0.6348, "step": 679 }, { "epoch": 0.33069908814589666, "grad_norm": 0.07527820360896588, "learning_rate": 9.841158731052937e-06, "loss": 0.6303, "step": 680 }, { "epoch": 0.3311854103343465, "grad_norm": 0.07944841316419674, "learning_rate": 9.840679603816892e-06, "loss": 0.6924, "step": 681 }, { "epoch": 0.33167173252279636, "grad_norm": 0.07541956585873169, "learning_rate": 9.840199766750153e-06, "loss": 0.6139, "step": 682 }, { "epoch": 0.3321580547112462, "grad_norm": 0.08391678283781674, "learning_rate": 9.839719219923082e-06, "loss": 0.6003, "step": 683 }, { "epoch": 0.33264437689969606, "grad_norm": 0.0759802269594239, "learning_rate": 9.839237963406147e-06, "loss": 0.6052, "step": 684 }, { "epoch": 0.3331306990881459, "grad_norm": 0.07769340011449526, "learning_rate": 9.838755997269917e-06, "loss": 0.6029, "step": 685 }, { "epoch": 0.33361702127659576, "grad_norm": 0.07699460805415395, "learning_rate": 9.838273321585067e-06, "loss": 0.6133, "step": 686 }, { "epoch": 0.3341033434650456, "grad_norm": 0.08324795617970843, "learning_rate": 9.837789936422378e-06, "loss": 0.6769, "step": 687 }, { "epoch": 0.33458966565349546, "grad_norm": 0.08194637481266749, "learning_rate": 9.837305841852731e-06, "loss": 0.5749, "step": 688 }, { "epoch": 0.3350759878419453, "grad_norm": 0.07811188647141626, "learning_rate": 9.836821037947113e-06, "loss": 0.6044, "step": 689 }, { "epoch": 0.33556231003039516, "grad_norm": 0.08997965828782913, "learning_rate": 9.836335524776616e-06, "loss": 0.7448, "step": 690 }, { "epoch": 0.336048632218845, "grad_norm": 0.08292868046021122, "learning_rate": 9.835849302412435e-06, "loss": 0.5858, "step": 691 }, { "epoch": 0.3365349544072948, "grad_norm": 0.08069143975469402, "learning_rate": 9.835362370925868e-06, "loss": 0.6057, "step": 692 }, { "epoch": 0.33702127659574466, "grad_norm": 0.07584607394712571, "learning_rate": 9.83487473038832e-06, "loss": 0.557, "step": 693 }, { "epoch": 0.3375075987841945, "grad_norm": 0.07567055955041796, "learning_rate": 9.834386380871294e-06, "loss": 0.5572, "step": 694 }, { "epoch": 0.33799392097264436, "grad_norm": 0.07297629979505112, "learning_rate": 9.833897322446404e-06, "loss": 0.6044, "step": 695 }, { "epoch": 0.3384802431610942, "grad_norm": 0.07891151794248634, "learning_rate": 9.833407555185366e-06, "loss": 0.6468, "step": 696 }, { "epoch": 0.33896656534954406, "grad_norm": 0.07795895679859437, "learning_rate": 9.832917079159994e-06, "loss": 0.6162, "step": 697 }, { "epoch": 0.3394528875379939, "grad_norm": 0.07137876301000494, "learning_rate": 9.832425894442217e-06, "loss": 0.5521, "step": 698 }, { "epoch": 0.33993920972644376, "grad_norm": 0.07294508322977147, "learning_rate": 9.831934001104056e-06, "loss": 0.577, "step": 699 }, { "epoch": 0.3404255319148936, "grad_norm": 0.07696648748694457, "learning_rate": 9.831441399217645e-06, "loss": 0.5845, "step": 700 }, { "epoch": 0.34091185410334346, "grad_norm": 0.07619005166154548, "learning_rate": 9.830948088855217e-06, "loss": 0.6073, "step": 701 }, { "epoch": 0.3413981762917933, "grad_norm": 0.07745192730719962, "learning_rate": 9.830454070089111e-06, "loss": 0.6034, "step": 702 }, { "epoch": 0.34188449848024316, "grad_norm": 0.07605660388505125, "learning_rate": 9.829959342991769e-06, "loss": 0.6019, "step": 703 }, { "epoch": 0.342370820668693, "grad_norm": 0.08351355346840139, "learning_rate": 9.829463907635737e-06, "loss": 0.6215, "step": 704 }, { "epoch": 0.34285714285714286, "grad_norm": 0.07559345496782574, "learning_rate": 9.828967764093666e-06, "loss": 0.6155, "step": 705 }, { "epoch": 0.3433434650455927, "grad_norm": 0.0746566912957668, "learning_rate": 9.828470912438308e-06, "loss": 0.6551, "step": 706 }, { "epoch": 0.34382978723404256, "grad_norm": 0.0879140204566672, "learning_rate": 9.827973352742523e-06, "loss": 0.5951, "step": 707 }, { "epoch": 0.3443161094224924, "grad_norm": 0.08011827301576804, "learning_rate": 9.82747508507927e-06, "loss": 0.63, "step": 708 }, { "epoch": 0.34480243161094226, "grad_norm": 0.08785356758693239, "learning_rate": 9.826976109521616e-06, "loss": 0.6128, "step": 709 }, { "epoch": 0.3452887537993921, "grad_norm": 0.07691999715793084, "learning_rate": 9.826476426142729e-06, "loss": 0.6105, "step": 710 }, { "epoch": 0.34577507598784196, "grad_norm": 0.07464313027315647, "learning_rate": 9.825976035015881e-06, "loss": 0.5893, "step": 711 }, { "epoch": 0.3462613981762918, "grad_norm": 0.08167798190056258, "learning_rate": 9.825474936214453e-06, "loss": 0.6157, "step": 712 }, { "epoch": 0.34674772036474166, "grad_norm": 0.0799450167567303, "learning_rate": 9.824973129811919e-06, "loss": 0.5729, "step": 713 }, { "epoch": 0.3472340425531915, "grad_norm": 0.07631565362495049, "learning_rate": 9.82447061588187e-06, "loss": 0.6165, "step": 714 }, { "epoch": 0.34772036474164136, "grad_norm": 0.07452953158131076, "learning_rate": 9.823967394497988e-06, "loss": 0.594, "step": 715 }, { "epoch": 0.34820668693009116, "grad_norm": 0.07602715809828217, "learning_rate": 9.823463465734068e-06, "loss": 0.6164, "step": 716 }, { "epoch": 0.348693009118541, "grad_norm": 0.07579853378576255, "learning_rate": 9.822958829664007e-06, "loss": 0.5924, "step": 717 }, { "epoch": 0.34917933130699086, "grad_norm": 0.07337284322477278, "learning_rate": 9.822453486361801e-06, "loss": 0.5654, "step": 718 }, { "epoch": 0.3496656534954407, "grad_norm": 0.0780096594422364, "learning_rate": 9.821947435901552e-06, "loss": 0.5959, "step": 719 }, { "epoch": 0.35015197568389056, "grad_norm": 0.07657189795860213, "learning_rate": 9.82144067835747e-06, "loss": 0.5908, "step": 720 }, { "epoch": 0.3506382978723404, "grad_norm": 0.07451495597472056, "learning_rate": 9.820933213803863e-06, "loss": 0.6152, "step": 721 }, { "epoch": 0.35112462006079026, "grad_norm": 0.07596477483710605, "learning_rate": 9.820425042315145e-06, "loss": 0.6124, "step": 722 }, { "epoch": 0.3516109422492401, "grad_norm": 0.0756182429112107, "learning_rate": 9.819916163965835e-06, "loss": 0.5901, "step": 723 }, { "epoch": 0.35209726443768996, "grad_norm": 0.0783777772990784, "learning_rate": 9.819406578830553e-06, "loss": 0.5966, "step": 724 }, { "epoch": 0.3525835866261398, "grad_norm": 0.08137822089637715, "learning_rate": 9.818896286984025e-06, "loss": 0.6366, "step": 725 }, { "epoch": 0.35306990881458966, "grad_norm": 0.07975805026259344, "learning_rate": 9.818385288501078e-06, "loss": 0.6224, "step": 726 }, { "epoch": 0.3535562310030395, "grad_norm": 0.08130133299735791, "learning_rate": 9.817873583456646e-06, "loss": 0.661, "step": 727 }, { "epoch": 0.35404255319148936, "grad_norm": 0.07622972893811689, "learning_rate": 9.81736117192576e-06, "loss": 0.6402, "step": 728 }, { "epoch": 0.3545288753799392, "grad_norm": 0.07570450408498365, "learning_rate": 9.816848053983568e-06, "loss": 0.6268, "step": 729 }, { "epoch": 0.35501519756838906, "grad_norm": 0.07833182563314217, "learning_rate": 9.816334229705304e-06, "loss": 0.5937, "step": 730 }, { "epoch": 0.3555015197568389, "grad_norm": 0.07597390230572401, "learning_rate": 9.81581969916632e-06, "loss": 0.609, "step": 731 }, { "epoch": 0.35598784194528876, "grad_norm": 0.07329152255471012, "learning_rate": 9.815304462442064e-06, "loss": 0.5949, "step": 732 }, { "epoch": 0.3564741641337386, "grad_norm": 0.07240498323302307, "learning_rate": 9.81478851960809e-06, "loss": 0.5638, "step": 733 }, { "epoch": 0.35696048632218846, "grad_norm": 0.07369913230703103, "learning_rate": 9.814271870740054e-06, "loss": 0.6111, "step": 734 }, { "epoch": 0.3574468085106383, "grad_norm": 0.07674314521953614, "learning_rate": 9.81375451591372e-06, "loss": 0.5791, "step": 735 }, { "epoch": 0.35793313069908816, "grad_norm": 0.07945307747873935, "learning_rate": 9.813236455204948e-06, "loss": 0.6099, "step": 736 }, { "epoch": 0.358419452887538, "grad_norm": 0.09091806479730845, "learning_rate": 9.81271768868971e-06, "loss": 0.6528, "step": 737 }, { "epoch": 0.35890577507598787, "grad_norm": 0.07151096378399975, "learning_rate": 9.812198216444072e-06, "loss": 0.5806, "step": 738 }, { "epoch": 0.3593920972644377, "grad_norm": 0.07746056417723758, "learning_rate": 9.811678038544215e-06, "loss": 0.639, "step": 739 }, { "epoch": 0.35987841945288757, "grad_norm": 0.07804611191692483, "learning_rate": 9.81115715506641e-06, "loss": 0.6532, "step": 740 }, { "epoch": 0.36036474164133736, "grad_norm": 0.07394008459322403, "learning_rate": 9.810635566087046e-06, "loss": 0.6404, "step": 741 }, { "epoch": 0.3608510638297872, "grad_norm": 0.07744506355928404, "learning_rate": 9.810113271682603e-06, "loss": 0.6231, "step": 742 }, { "epoch": 0.36133738601823706, "grad_norm": 0.07564954071125862, "learning_rate": 9.809590271929673e-06, "loss": 0.6113, "step": 743 }, { "epoch": 0.3618237082066869, "grad_norm": 0.07453273918898405, "learning_rate": 9.809066566904943e-06, "loss": 0.5955, "step": 744 }, { "epoch": 0.36231003039513676, "grad_norm": 0.07479149946864226, "learning_rate": 9.808542156685214e-06, "loss": 0.5769, "step": 745 }, { "epoch": 0.3627963525835866, "grad_norm": 0.082402355714264, "learning_rate": 9.808017041347381e-06, "loss": 0.6218, "step": 746 }, { "epoch": 0.36328267477203646, "grad_norm": 0.07873225068391046, "learning_rate": 9.807491220968449e-06, "loss": 0.6112, "step": 747 }, { "epoch": 0.3637689969604863, "grad_norm": 0.07455467176287123, "learning_rate": 9.806964695625521e-06, "loss": 0.5794, "step": 748 }, { "epoch": 0.36425531914893616, "grad_norm": 0.07669381174528357, "learning_rate": 9.806437465395806e-06, "loss": 0.5928, "step": 749 }, { "epoch": 0.364741641337386, "grad_norm": 0.07708746212665712, "learning_rate": 9.805909530356619e-06, "loss": 0.5792, "step": 750 }, { "epoch": 0.36522796352583586, "grad_norm": 0.0782021814908094, "learning_rate": 9.805380890585374e-06, "loss": 0.5986, "step": 751 }, { "epoch": 0.3657142857142857, "grad_norm": 0.07580465311028298, "learning_rate": 9.804851546159591e-06, "loss": 0.6016, "step": 752 }, { "epoch": 0.36620060790273556, "grad_norm": 0.07425982247576306, "learning_rate": 9.804321497156889e-06, "loss": 0.5865, "step": 753 }, { "epoch": 0.3666869300911854, "grad_norm": 0.07343054880813348, "learning_rate": 9.803790743654997e-06, "loss": 0.5846, "step": 754 }, { "epoch": 0.36717325227963526, "grad_norm": 0.07602241035777223, "learning_rate": 9.803259285731744e-06, "loss": 0.6245, "step": 755 }, { "epoch": 0.3676595744680851, "grad_norm": 0.07560893349405994, "learning_rate": 9.802727123465061e-06, "loss": 0.5705, "step": 756 }, { "epoch": 0.36814589665653497, "grad_norm": 0.0781592045474348, "learning_rate": 9.802194256932985e-06, "loss": 0.6035, "step": 757 }, { "epoch": 0.3686322188449848, "grad_norm": 0.5479549618834126, "learning_rate": 9.801660686213653e-06, "loss": 0.6121, "step": 758 }, { "epoch": 0.36911854103343467, "grad_norm": 0.07972933601606225, "learning_rate": 9.801126411385306e-06, "loss": 0.5695, "step": 759 }, { "epoch": 0.3696048632218845, "grad_norm": 0.08009475079810593, "learning_rate": 9.800591432526291e-06, "loss": 0.6005, "step": 760 }, { "epoch": 0.37009118541033437, "grad_norm": 0.07582135814627514, "learning_rate": 9.80005574971506e-06, "loss": 0.551, "step": 761 }, { "epoch": 0.3705775075987842, "grad_norm": 0.07473388860056311, "learning_rate": 9.79951936303016e-06, "loss": 0.5768, "step": 762 }, { "epoch": 0.37106382978723407, "grad_norm": 0.08367807189980862, "learning_rate": 9.798982272550248e-06, "loss": 0.6329, "step": 763 }, { "epoch": 0.3715501519756839, "grad_norm": 0.07874208648761716, "learning_rate": 9.79844447835408e-06, "loss": 0.6011, "step": 764 }, { "epoch": 0.3720364741641337, "grad_norm": 0.07706266515535616, "learning_rate": 9.797905980520522e-06, "loss": 0.6013, "step": 765 }, { "epoch": 0.37252279635258356, "grad_norm": 0.0958171568155027, "learning_rate": 9.797366779128532e-06, "loss": 0.6558, "step": 766 }, { "epoch": 0.3730091185410334, "grad_norm": 0.07697272843657556, "learning_rate": 9.796826874257186e-06, "loss": 0.6125, "step": 767 }, { "epoch": 0.37349544072948326, "grad_norm": 0.08186349415790875, "learning_rate": 9.796286265985648e-06, "loss": 0.6011, "step": 768 }, { "epoch": 0.3739817629179331, "grad_norm": 0.0883592718059818, "learning_rate": 9.795744954393193e-06, "loss": 0.5679, "step": 769 }, { "epoch": 0.37446808510638296, "grad_norm": 0.08167812442101358, "learning_rate": 9.795202939559202e-06, "loss": 0.6299, "step": 770 }, { "epoch": 0.3749544072948328, "grad_norm": 0.0792685871068706, "learning_rate": 9.794660221563153e-06, "loss": 0.5999, "step": 771 }, { "epoch": 0.37544072948328266, "grad_norm": 0.08532744694283427, "learning_rate": 9.79411680048463e-06, "loss": 0.6254, "step": 772 }, { "epoch": 0.3759270516717325, "grad_norm": 0.07806256718078687, "learning_rate": 9.793572676403317e-06, "loss": 0.619, "step": 773 }, { "epoch": 0.37641337386018237, "grad_norm": 0.0793179847038666, "learning_rate": 9.793027849399007e-06, "loss": 0.6606, "step": 774 }, { "epoch": 0.3768996960486322, "grad_norm": 0.07888310670534646, "learning_rate": 9.792482319551591e-06, "loss": 0.6185, "step": 775 }, { "epoch": 0.37738601823708207, "grad_norm": 0.07647686653808983, "learning_rate": 9.791936086941065e-06, "loss": 0.6145, "step": 776 }, { "epoch": 0.3778723404255319, "grad_norm": 0.07602720666265521, "learning_rate": 9.791389151647528e-06, "loss": 0.5883, "step": 777 }, { "epoch": 0.37835866261398177, "grad_norm": 0.0801984661142592, "learning_rate": 9.790841513751183e-06, "loss": 0.5713, "step": 778 }, { "epoch": 0.3788449848024316, "grad_norm": 0.07716436222336652, "learning_rate": 9.790293173332332e-06, "loss": 0.6222, "step": 779 }, { "epoch": 0.37933130699088147, "grad_norm": 0.0756425996143619, "learning_rate": 9.789744130471384e-06, "loss": 0.5851, "step": 780 }, { "epoch": 0.3798176291793313, "grad_norm": 0.1257844678410659, "learning_rate": 9.789194385248853e-06, "loss": 0.6561, "step": 781 }, { "epoch": 0.38030395136778117, "grad_norm": 0.13855605654022526, "learning_rate": 9.788643937745349e-06, "loss": 0.6287, "step": 782 }, { "epoch": 0.380790273556231, "grad_norm": 0.07487811345641193, "learning_rate": 9.788092788041589e-06, "loss": 0.5895, "step": 783 }, { "epoch": 0.38127659574468087, "grad_norm": 0.07377100047352117, "learning_rate": 9.787540936218393e-06, "loss": 0.5368, "step": 784 }, { "epoch": 0.3817629179331307, "grad_norm": 0.07584459135647661, "learning_rate": 9.786988382356688e-06, "loss": 0.5947, "step": 785 }, { "epoch": 0.38224924012158057, "grad_norm": 0.07479041400561619, "learning_rate": 9.786435126537494e-06, "loss": 0.5972, "step": 786 }, { "epoch": 0.3827355623100304, "grad_norm": 0.07404379534937033, "learning_rate": 9.785881168841944e-06, "loss": 0.614, "step": 787 }, { "epoch": 0.38322188449848027, "grad_norm": 0.07399166012541243, "learning_rate": 9.785326509351268e-06, "loss": 0.6004, "step": 788 }, { "epoch": 0.38370820668693006, "grad_norm": 0.08121249684545276, "learning_rate": 9.7847711481468e-06, "loss": 0.6078, "step": 789 }, { "epoch": 0.3841945288753799, "grad_norm": 0.07987169556666426, "learning_rate": 9.784215085309977e-06, "loss": 0.5872, "step": 790 }, { "epoch": 0.38468085106382977, "grad_norm": 0.07695724165931447, "learning_rate": 9.783658320922341e-06, "loss": 0.5858, "step": 791 }, { "epoch": 0.3851671732522796, "grad_norm": 0.08018413963144698, "learning_rate": 9.783100855065533e-06, "loss": 0.601, "step": 792 }, { "epoch": 0.38565349544072947, "grad_norm": 0.0762652933877143, "learning_rate": 9.782542687821302e-06, "loss": 0.6222, "step": 793 }, { "epoch": 0.3861398176291793, "grad_norm": 0.07851378702374802, "learning_rate": 9.781983819271494e-06, "loss": 0.5988, "step": 794 }, { "epoch": 0.38662613981762917, "grad_norm": 0.07160612568255702, "learning_rate": 9.781424249498064e-06, "loss": 0.5586, "step": 795 }, { "epoch": 0.387112462006079, "grad_norm": 0.07633586461842592, "learning_rate": 9.780863978583061e-06, "loss": 0.622, "step": 796 }, { "epoch": 0.38759878419452887, "grad_norm": 0.08553591241183264, "learning_rate": 9.78030300660865e-06, "loss": 0.6428, "step": 797 }, { "epoch": 0.3880851063829787, "grad_norm": 0.07603119624398466, "learning_rate": 9.779741333657084e-06, "loss": 0.596, "step": 798 }, { "epoch": 0.38857142857142857, "grad_norm": 0.07346753616473085, "learning_rate": 9.779178959810728e-06, "loss": 0.5701, "step": 799 }, { "epoch": 0.3890577507598784, "grad_norm": 0.07552954801862151, "learning_rate": 9.778615885152052e-06, "loss": 0.6303, "step": 800 }, { "epoch": 0.38954407294832827, "grad_norm": 0.07781764530797763, "learning_rate": 9.778052109763619e-06, "loss": 0.5965, "step": 801 }, { "epoch": 0.3900303951367781, "grad_norm": 0.081946823974092, "learning_rate": 9.777487633728103e-06, "loss": 0.5732, "step": 802 }, { "epoch": 0.39051671732522797, "grad_norm": 0.07662313874272146, "learning_rate": 9.776922457128277e-06, "loss": 0.6052, "step": 803 }, { "epoch": 0.3910030395136778, "grad_norm": 0.07587320821089638, "learning_rate": 9.77635658004702e-06, "loss": 0.6246, "step": 804 }, { "epoch": 0.39148936170212767, "grad_norm": 0.07369205805954429, "learning_rate": 9.77579000256731e-06, "loss": 0.602, "step": 805 }, { "epoch": 0.3919756838905775, "grad_norm": 0.07457476463638249, "learning_rate": 9.775222724772226e-06, "loss": 0.5692, "step": 806 }, { "epoch": 0.39246200607902737, "grad_norm": 0.07812562033668508, "learning_rate": 9.774654746744957e-06, "loss": 0.6353, "step": 807 }, { "epoch": 0.3929483282674772, "grad_norm": 0.07605717651897465, "learning_rate": 9.77408606856879e-06, "loss": 0.5628, "step": 808 }, { "epoch": 0.39343465045592707, "grad_norm": 0.07584249979144048, "learning_rate": 9.773516690327111e-06, "loss": 0.5825, "step": 809 }, { "epoch": 0.3939209726443769, "grad_norm": 0.07921338181632771, "learning_rate": 9.77294661210342e-06, "loss": 0.6414, "step": 810 }, { "epoch": 0.39440729483282677, "grad_norm": 0.07717726232374068, "learning_rate": 9.772375833981306e-06, "loss": 0.602, "step": 811 }, { "epoch": 0.3948936170212766, "grad_norm": 0.07342179279640387, "learning_rate": 9.771804356044473e-06, "loss": 0.5587, "step": 812 }, { "epoch": 0.3953799392097264, "grad_norm": 0.0752072699002813, "learning_rate": 9.771232178376717e-06, "loss": 0.6419, "step": 813 }, { "epoch": 0.39586626139817627, "grad_norm": 0.07353312399126578, "learning_rate": 9.770659301061943e-06, "loss": 0.5743, "step": 814 }, { "epoch": 0.3963525835866261, "grad_norm": 0.07518407178014146, "learning_rate": 9.770085724184158e-06, "loss": 0.5839, "step": 815 }, { "epoch": 0.39683890577507597, "grad_norm": 0.07512845684968103, "learning_rate": 9.769511447827466e-06, "loss": 0.5983, "step": 816 }, { "epoch": 0.3973252279635258, "grad_norm": 0.07483337228430045, "learning_rate": 9.768936472076086e-06, "loss": 0.5643, "step": 817 }, { "epoch": 0.39781155015197567, "grad_norm": 0.07826536390308092, "learning_rate": 9.768360797014325e-06, "loss": 0.5902, "step": 818 }, { "epoch": 0.3982978723404255, "grad_norm": 0.07823860074212571, "learning_rate": 9.767784422726601e-06, "loss": 0.6034, "step": 819 }, { "epoch": 0.39878419452887537, "grad_norm": 0.07928236098172678, "learning_rate": 9.767207349297434e-06, "loss": 0.6056, "step": 820 }, { "epoch": 0.3992705167173252, "grad_norm": 0.07502602439184818, "learning_rate": 9.766629576811444e-06, "loss": 0.5634, "step": 821 }, { "epoch": 0.39975683890577507, "grad_norm": 0.07440247125134211, "learning_rate": 9.766051105353355e-06, "loss": 0.5997, "step": 822 }, { "epoch": 0.4002431610942249, "grad_norm": 0.07432716463247864, "learning_rate": 9.765471935007995e-06, "loss": 0.647, "step": 823 }, { "epoch": 0.40072948328267477, "grad_norm": 0.0732308644469105, "learning_rate": 9.76489206586029e-06, "loss": 0.578, "step": 824 }, { "epoch": 0.4012158054711246, "grad_norm": 0.07893899133784575, "learning_rate": 9.764311497995272e-06, "loss": 0.6225, "step": 825 }, { "epoch": 0.40170212765957447, "grad_norm": 0.07296501503582166, "learning_rate": 9.763730231498077e-06, "loss": 0.5958, "step": 826 }, { "epoch": 0.4021884498480243, "grad_norm": 0.0764385710082842, "learning_rate": 9.763148266453937e-06, "loss": 0.6243, "step": 827 }, { "epoch": 0.40267477203647417, "grad_norm": 0.07583379733812651, "learning_rate": 9.762565602948194e-06, "loss": 0.6402, "step": 828 }, { "epoch": 0.403161094224924, "grad_norm": 0.07634036599173366, "learning_rate": 9.761982241066288e-06, "loss": 0.6329, "step": 829 }, { "epoch": 0.40364741641337387, "grad_norm": 0.08103214345275933, "learning_rate": 9.761398180893761e-06, "loss": 0.585, "step": 830 }, { "epoch": 0.4041337386018237, "grad_norm": 0.07620272595765024, "learning_rate": 9.760813422516262e-06, "loss": 0.6248, "step": 831 }, { "epoch": 0.4046200607902736, "grad_norm": 0.0796826241754394, "learning_rate": 9.760227966019537e-06, "loss": 0.6052, "step": 832 }, { "epoch": 0.4051063829787234, "grad_norm": 0.07632403049958285, "learning_rate": 9.759641811489435e-06, "loss": 0.5757, "step": 833 }, { "epoch": 0.4055927051671733, "grad_norm": 0.07616229687040604, "learning_rate": 9.759054959011913e-06, "loss": 0.6068, "step": 834 }, { "epoch": 0.4060790273556231, "grad_norm": 0.07226407706388134, "learning_rate": 9.758467408673022e-06, "loss": 0.5968, "step": 835 }, { "epoch": 0.406565349544073, "grad_norm": 0.0808888319207876, "learning_rate": 9.757879160558923e-06, "loss": 0.6091, "step": 836 }, { "epoch": 0.4070516717325228, "grad_norm": 0.07666755480788576, "learning_rate": 9.757290214755873e-06, "loss": 0.5954, "step": 837 }, { "epoch": 0.4075379939209726, "grad_norm": 0.0785550634517315, "learning_rate": 9.756700571350234e-06, "loss": 0.5964, "step": 838 }, { "epoch": 0.40802431610942247, "grad_norm": 0.0756310976979388, "learning_rate": 9.756110230428476e-06, "loss": 0.6078, "step": 839 }, { "epoch": 0.4085106382978723, "grad_norm": 0.07712320689679607, "learning_rate": 9.75551919207716e-06, "loss": 0.5821, "step": 840 }, { "epoch": 0.40899696048632217, "grad_norm": 0.07383297779896966, "learning_rate": 9.754927456382957e-06, "loss": 0.5553, "step": 841 }, { "epoch": 0.409483282674772, "grad_norm": 0.07829601350018571, "learning_rate": 9.75433502343264e-06, "loss": 0.5716, "step": 842 }, { "epoch": 0.40996960486322187, "grad_norm": 0.08032894608305195, "learning_rate": 9.753741893313077e-06, "loss": 0.6025, "step": 843 }, { "epoch": 0.4104559270516717, "grad_norm": 0.07736144033521516, "learning_rate": 9.753148066111251e-06, "loss": 0.6401, "step": 844 }, { "epoch": 0.41094224924012157, "grad_norm": 0.07223111267633972, "learning_rate": 9.752553541914236e-06, "loss": 0.6098, "step": 845 }, { "epoch": 0.4114285714285714, "grad_norm": 0.08321710030729894, "learning_rate": 9.751958320809213e-06, "loss": 0.5994, "step": 846 }, { "epoch": 0.41191489361702127, "grad_norm": 0.08000958815494474, "learning_rate": 9.751362402883465e-06, "loss": 0.6199, "step": 847 }, { "epoch": 0.4124012158054711, "grad_norm": 0.07387542505957467, "learning_rate": 9.750765788224374e-06, "loss": 0.5773, "step": 848 }, { "epoch": 0.41288753799392097, "grad_norm": 0.08077551136540044, "learning_rate": 9.750168476919429e-06, "loss": 0.608, "step": 849 }, { "epoch": 0.4133738601823708, "grad_norm": 0.07915987309003285, "learning_rate": 9.74957046905622e-06, "loss": 0.601, "step": 850 }, { "epoch": 0.4138601823708207, "grad_norm": 0.07569127118214747, "learning_rate": 9.748971764722434e-06, "loss": 0.6116, "step": 851 }, { "epoch": 0.4143465045592705, "grad_norm": 0.07674797213372184, "learning_rate": 9.74837236400587e-06, "loss": 0.5906, "step": 852 }, { "epoch": 0.4148328267477204, "grad_norm": 0.07856468005675736, "learning_rate": 9.747772266994418e-06, "loss": 0.6059, "step": 853 }, { "epoch": 0.4153191489361702, "grad_norm": 0.07738863175280847, "learning_rate": 9.747171473776078e-06, "loss": 0.5887, "step": 854 }, { "epoch": 0.4158054711246201, "grad_norm": 0.07652290342538677, "learning_rate": 9.74656998443895e-06, "loss": 0.6342, "step": 855 }, { "epoch": 0.4162917933130699, "grad_norm": 0.0786624751612501, "learning_rate": 9.745967799071234e-06, "loss": 0.65, "step": 856 }, { "epoch": 0.4167781155015198, "grad_norm": 0.07803366601690188, "learning_rate": 9.745364917761235e-06, "loss": 0.564, "step": 857 }, { "epoch": 0.4172644376899696, "grad_norm": 0.0785850851671536, "learning_rate": 9.744761340597356e-06, "loss": 0.6004, "step": 858 }, { "epoch": 0.4177507598784195, "grad_norm": 0.07700850294747096, "learning_rate": 9.744157067668108e-06, "loss": 0.5712, "step": 859 }, { "epoch": 0.4182370820668693, "grad_norm": 0.07553743520602545, "learning_rate": 9.7435520990621e-06, "loss": 0.596, "step": 860 }, { "epoch": 0.4187234042553192, "grad_norm": 0.07547797004118767, "learning_rate": 9.742946434868044e-06, "loss": 0.6543, "step": 861 }, { "epoch": 0.41920972644376897, "grad_norm": 0.07754776062169427, "learning_rate": 9.742340075174751e-06, "loss": 0.6027, "step": 862 }, { "epoch": 0.4196960486322188, "grad_norm": 0.0766152608824993, "learning_rate": 9.74173302007114e-06, "loss": 0.5901, "step": 863 }, { "epoch": 0.42018237082066867, "grad_norm": 0.1914583263147464, "learning_rate": 9.741125269646228e-06, "loss": 0.6266, "step": 864 }, { "epoch": 0.4206686930091185, "grad_norm": 0.07538623756665357, "learning_rate": 9.740516823989133e-06, "loss": 0.5612, "step": 865 }, { "epoch": 0.42115501519756837, "grad_norm": 0.07417571500644635, "learning_rate": 9.739907683189078e-06, "loss": 0.5562, "step": 866 }, { "epoch": 0.4216413373860182, "grad_norm": 0.07442235181744117, "learning_rate": 9.739297847335387e-06, "loss": 0.619, "step": 867 }, { "epoch": 0.4221276595744681, "grad_norm": 0.07910113288806607, "learning_rate": 9.738687316517486e-06, "loss": 0.6059, "step": 868 }, { "epoch": 0.4226139817629179, "grad_norm": 0.08559545367039657, "learning_rate": 9.7380760908249e-06, "loss": 0.6919, "step": 869 }, { "epoch": 0.4231003039513678, "grad_norm": 0.08695687320206436, "learning_rate": 9.73746417034726e-06, "loss": 0.6031, "step": 870 }, { "epoch": 0.4235866261398176, "grad_norm": 0.07725936774072596, "learning_rate": 9.736851555174295e-06, "loss": 0.5728, "step": 871 }, { "epoch": 0.4240729483282675, "grad_norm": 0.0778958283361194, "learning_rate": 9.736238245395842e-06, "loss": 0.6, "step": 872 }, { "epoch": 0.4245592705167173, "grad_norm": 0.07442993196817138, "learning_rate": 9.735624241101836e-06, "loss": 0.5682, "step": 873 }, { "epoch": 0.4250455927051672, "grad_norm": 0.07601852794136615, "learning_rate": 9.735009542382308e-06, "loss": 0.5736, "step": 874 }, { "epoch": 0.425531914893617, "grad_norm": 0.07273129906169155, "learning_rate": 9.734394149327402e-06, "loss": 0.5669, "step": 875 }, { "epoch": 0.4260182370820669, "grad_norm": 0.08654044254014771, "learning_rate": 9.733778062027355e-06, "loss": 0.6199, "step": 876 }, { "epoch": 0.4265045592705167, "grad_norm": 0.07827821768608124, "learning_rate": 9.733161280572512e-06, "loss": 0.6123, "step": 877 }, { "epoch": 0.4269908814589666, "grad_norm": 0.07420143017419538, "learning_rate": 9.732543805053316e-06, "loss": 0.5998, "step": 878 }, { "epoch": 0.4274772036474164, "grad_norm": 0.07849074664698075, "learning_rate": 9.731925635560314e-06, "loss": 0.5939, "step": 879 }, { "epoch": 0.4279635258358663, "grad_norm": 0.07323072801587062, "learning_rate": 9.73130677218415e-06, "loss": 0.5935, "step": 880 }, { "epoch": 0.4284498480243161, "grad_norm": 0.07321945312865086, "learning_rate": 9.730687215015576e-06, "loss": 0.5851, "step": 881 }, { "epoch": 0.428936170212766, "grad_norm": 0.075591448859421, "learning_rate": 9.730066964145441e-06, "loss": 0.5823, "step": 882 }, { "epoch": 0.4294224924012158, "grad_norm": 0.07441155830176882, "learning_rate": 9.729446019664701e-06, "loss": 0.6085, "step": 883 }, { "epoch": 0.4299088145896657, "grad_norm": 0.07271166988434982, "learning_rate": 9.728824381664408e-06, "loss": 0.575, "step": 884 }, { "epoch": 0.43039513677811553, "grad_norm": 0.07346486482966536, "learning_rate": 9.728202050235718e-06, "loss": 0.5881, "step": 885 }, { "epoch": 0.4308814589665653, "grad_norm": 0.07489452556645379, "learning_rate": 9.72757902546989e-06, "loss": 0.6044, "step": 886 }, { "epoch": 0.4313677811550152, "grad_norm": 0.07632507019752027, "learning_rate": 9.726955307458286e-06, "loss": 0.6231, "step": 887 }, { "epoch": 0.431854103343465, "grad_norm": 0.08578252253728172, "learning_rate": 9.72633089629236e-06, "loss": 0.6285, "step": 888 }, { "epoch": 0.4323404255319149, "grad_norm": 0.07284350237287862, "learning_rate": 9.725705792063681e-06, "loss": 0.5657, "step": 889 }, { "epoch": 0.4328267477203647, "grad_norm": 0.07759959988539591, "learning_rate": 9.725079994863914e-06, "loss": 0.6165, "step": 890 }, { "epoch": 0.4333130699088146, "grad_norm": 0.07381930553688103, "learning_rate": 9.724453504784819e-06, "loss": 0.5513, "step": 891 }, { "epoch": 0.4337993920972644, "grad_norm": 0.07617855274750135, "learning_rate": 9.723826321918268e-06, "loss": 0.5861, "step": 892 }, { "epoch": 0.4342857142857143, "grad_norm": 0.07127956105247536, "learning_rate": 9.72319844635623e-06, "loss": 0.5439, "step": 893 }, { "epoch": 0.4347720364741641, "grad_norm": 0.07448495855434166, "learning_rate": 9.722569878190776e-06, "loss": 0.6072, "step": 894 }, { "epoch": 0.435258358662614, "grad_norm": 0.07440876596547552, "learning_rate": 9.721940617514076e-06, "loss": 0.5854, "step": 895 }, { "epoch": 0.4357446808510638, "grad_norm": 0.0744417889991682, "learning_rate": 9.721310664418406e-06, "loss": 0.6157, "step": 896 }, { "epoch": 0.4362310030395137, "grad_norm": 0.15080017407035806, "learning_rate": 9.720680018996142e-06, "loss": 0.6143, "step": 897 }, { "epoch": 0.4367173252279635, "grad_norm": 0.07117656694073243, "learning_rate": 9.72004868133976e-06, "loss": 0.5783, "step": 898 }, { "epoch": 0.4372036474164134, "grad_norm": 0.08124580916688322, "learning_rate": 9.719416651541839e-06, "loss": 0.6025, "step": 899 }, { "epoch": 0.4376899696048632, "grad_norm": 0.07972345776023025, "learning_rate": 9.718783929695056e-06, "loss": 0.5895, "step": 900 }, { "epoch": 0.4381762917933131, "grad_norm": 0.07641182226859868, "learning_rate": 9.718150515892199e-06, "loss": 0.5851, "step": 901 }, { "epoch": 0.4386626139817629, "grad_norm": 0.07953936083977978, "learning_rate": 9.717516410226144e-06, "loss": 0.62, "step": 902 }, { "epoch": 0.4391489361702128, "grad_norm": 0.08455624384394385, "learning_rate": 9.716881612789878e-06, "loss": 0.6444, "step": 903 }, { "epoch": 0.43963525835866263, "grad_norm": 0.07756115270524952, "learning_rate": 9.716246123676491e-06, "loss": 0.6123, "step": 904 }, { "epoch": 0.4401215805471125, "grad_norm": 0.07732160915673524, "learning_rate": 9.715609942979163e-06, "loss": 0.603, "step": 905 }, { "epoch": 0.44060790273556233, "grad_norm": 0.07766174992231115, "learning_rate": 9.714973070791187e-06, "loss": 0.6185, "step": 906 }, { "epoch": 0.4410942249240122, "grad_norm": 0.07937360530387118, "learning_rate": 9.714335507205953e-06, "loss": 0.5601, "step": 907 }, { "epoch": 0.44158054711246203, "grad_norm": 0.07686143431141876, "learning_rate": 9.713697252316951e-06, "loss": 0.6079, "step": 908 }, { "epoch": 0.4420668693009119, "grad_norm": 0.07288172634716461, "learning_rate": 9.713058306217776e-06, "loss": 0.5616, "step": 909 }, { "epoch": 0.4425531914893617, "grad_norm": 0.0840387537815393, "learning_rate": 9.712418669002119e-06, "loss": 0.6173, "step": 910 }, { "epoch": 0.4430395136778115, "grad_norm": 0.0791516823690533, "learning_rate": 9.711778340763778e-06, "loss": 0.6151, "step": 911 }, { "epoch": 0.4435258358662614, "grad_norm": 0.081325706302791, "learning_rate": 9.711137321596649e-06, "loss": 0.6754, "step": 912 }, { "epoch": 0.4440121580547112, "grad_norm": 0.07313916628395718, "learning_rate": 9.71049561159473e-06, "loss": 0.5868, "step": 913 }, { "epoch": 0.4444984802431611, "grad_norm": 0.07855701100463838, "learning_rate": 9.70985321085212e-06, "loss": 0.5954, "step": 914 }, { "epoch": 0.4449848024316109, "grad_norm": 0.08335727397651717, "learning_rate": 9.709210119463022e-06, "loss": 0.6362, "step": 915 }, { "epoch": 0.4454711246200608, "grad_norm": 0.07602292881303029, "learning_rate": 9.708566337521736e-06, "loss": 0.6011, "step": 916 }, { "epoch": 0.4459574468085106, "grad_norm": 0.07595063819769632, "learning_rate": 9.707921865122665e-06, "loss": 0.6069, "step": 917 }, { "epoch": 0.4464437689969605, "grad_norm": 0.07746393755940535, "learning_rate": 9.707276702360315e-06, "loss": 0.6204, "step": 918 }, { "epoch": 0.4469300911854103, "grad_norm": 0.07765430935306938, "learning_rate": 9.706630849329292e-06, "loss": 0.6032, "step": 919 }, { "epoch": 0.4474164133738602, "grad_norm": 0.13254051685910764, "learning_rate": 9.705984306124302e-06, "loss": 0.5959, "step": 920 }, { "epoch": 0.44790273556231003, "grad_norm": 0.0736482439739256, "learning_rate": 9.705337072840152e-06, "loss": 0.5957, "step": 921 }, { "epoch": 0.4483890577507599, "grad_norm": 0.07161912283251916, "learning_rate": 9.704689149571755e-06, "loss": 0.6029, "step": 922 }, { "epoch": 0.44887537993920973, "grad_norm": 0.07632947843340931, "learning_rate": 9.70404053641412e-06, "loss": 0.6094, "step": 923 }, { "epoch": 0.4493617021276596, "grad_norm": 0.07187594235372388, "learning_rate": 9.703391233462356e-06, "loss": 0.5928, "step": 924 }, { "epoch": 0.44984802431610943, "grad_norm": 0.07522064767838982, "learning_rate": 9.70274124081168e-06, "loss": 0.6075, "step": 925 }, { "epoch": 0.4503343465045593, "grad_norm": 0.08512216752391312, "learning_rate": 9.702090558557404e-06, "loss": 0.6582, "step": 926 }, { "epoch": 0.45082066869300913, "grad_norm": 0.0768733313224146, "learning_rate": 9.701439186794943e-06, "loss": 0.5934, "step": 927 }, { "epoch": 0.451306990881459, "grad_norm": 0.07544172707600616, "learning_rate": 9.700787125619812e-06, "loss": 0.5961, "step": 928 }, { "epoch": 0.45179331306990883, "grad_norm": 0.08177673987106983, "learning_rate": 9.700134375127633e-06, "loss": 0.6159, "step": 929 }, { "epoch": 0.4522796352583587, "grad_norm": 0.07107017092833946, "learning_rate": 9.69948093541412e-06, "loss": 0.561, "step": 930 }, { "epoch": 0.45276595744680853, "grad_norm": 0.08007185519302115, "learning_rate": 9.698826806575093e-06, "loss": 0.6439, "step": 931 }, { "epoch": 0.4532522796352584, "grad_norm": 0.07487054762146209, "learning_rate": 9.698171988706476e-06, "loss": 0.574, "step": 932 }, { "epoch": 0.45373860182370823, "grad_norm": 0.07978953515184978, "learning_rate": 9.697516481904286e-06, "loss": 0.5847, "step": 933 }, { "epoch": 0.4542249240121581, "grad_norm": 0.08028804220821169, "learning_rate": 9.696860286264648e-06, "loss": 0.6054, "step": 934 }, { "epoch": 0.4547112462006079, "grad_norm": 0.07803227031905113, "learning_rate": 9.696203401883786e-06, "loss": 0.6331, "step": 935 }, { "epoch": 0.4551975683890577, "grad_norm": 0.07670678948077339, "learning_rate": 9.695545828858024e-06, "loss": 0.6871, "step": 936 }, { "epoch": 0.4556838905775076, "grad_norm": 0.07743577914984455, "learning_rate": 9.694887567283786e-06, "loss": 0.6515, "step": 937 }, { "epoch": 0.45617021276595743, "grad_norm": 0.07861986358754945, "learning_rate": 9.694228617257602e-06, "loss": 0.5849, "step": 938 }, { "epoch": 0.4566565349544073, "grad_norm": 0.07455616986614651, "learning_rate": 9.693568978876098e-06, "loss": 0.6069, "step": 939 }, { "epoch": 0.45714285714285713, "grad_norm": 0.07513334112457971, "learning_rate": 9.692908652236002e-06, "loss": 0.5738, "step": 940 }, { "epoch": 0.457629179331307, "grad_norm": 0.08441089807460127, "learning_rate": 9.692247637434142e-06, "loss": 0.6307, "step": 941 }, { "epoch": 0.45811550151975683, "grad_norm": 0.07570139872137768, "learning_rate": 9.691585934567452e-06, "loss": 0.5746, "step": 942 }, { "epoch": 0.4586018237082067, "grad_norm": 0.07390655051148683, "learning_rate": 9.690923543732962e-06, "loss": 0.6427, "step": 943 }, { "epoch": 0.45908814589665653, "grad_norm": 0.07322564550559024, "learning_rate": 9.690260465027802e-06, "loss": 0.5742, "step": 944 }, { "epoch": 0.4595744680851064, "grad_norm": 0.07980290492363408, "learning_rate": 9.689596698549203e-06, "loss": 0.6152, "step": 945 }, { "epoch": 0.46006079027355623, "grad_norm": 0.07472707478190149, "learning_rate": 9.688932244394507e-06, "loss": 0.581, "step": 946 }, { "epoch": 0.4605471124620061, "grad_norm": 0.07570147061772793, "learning_rate": 9.688267102661142e-06, "loss": 0.6201, "step": 947 }, { "epoch": 0.46103343465045593, "grad_norm": 0.0777773138389416, "learning_rate": 9.687601273446645e-06, "loss": 0.6202, "step": 948 }, { "epoch": 0.4615197568389058, "grad_norm": 0.08220264826870818, "learning_rate": 9.686934756848651e-06, "loss": 0.5952, "step": 949 }, { "epoch": 0.46200607902735563, "grad_norm": 0.07929919144080227, "learning_rate": 9.686267552964901e-06, "loss": 0.636, "step": 950 }, { "epoch": 0.4624924012158055, "grad_norm": 0.07685286867352173, "learning_rate": 9.68559966189323e-06, "loss": 0.6212, "step": 951 }, { "epoch": 0.46297872340425533, "grad_norm": 0.07679017248783176, "learning_rate": 9.684931083731578e-06, "loss": 0.6039, "step": 952 }, { "epoch": 0.4634650455927052, "grad_norm": 0.07042817668451752, "learning_rate": 9.68426181857798e-06, "loss": 0.5599, "step": 953 }, { "epoch": 0.46395136778115503, "grad_norm": 0.07582605049687227, "learning_rate": 9.683591866530582e-06, "loss": 0.577, "step": 954 }, { "epoch": 0.4644376899696049, "grad_norm": 0.07607125580240615, "learning_rate": 9.682921227687622e-06, "loss": 0.6175, "step": 955 }, { "epoch": 0.46492401215805473, "grad_norm": 0.07679547238371429, "learning_rate": 9.682249902147442e-06, "loss": 0.6199, "step": 956 }, { "epoch": 0.4654103343465046, "grad_norm": 0.0696181884733738, "learning_rate": 9.681577890008485e-06, "loss": 0.5577, "step": 957 }, { "epoch": 0.46589665653495443, "grad_norm": 0.07824392162082655, "learning_rate": 9.680905191369293e-06, "loss": 0.6027, "step": 958 }, { "epoch": 0.46638297872340423, "grad_norm": 0.07504234092024205, "learning_rate": 9.680231806328509e-06, "loss": 0.5968, "step": 959 }, { "epoch": 0.4668693009118541, "grad_norm": 0.07245260973260609, "learning_rate": 9.67955773498488e-06, "loss": 0.5817, "step": 960 }, { "epoch": 0.46735562310030393, "grad_norm": 0.07389774613836207, "learning_rate": 9.678882977437248e-06, "loss": 0.5845, "step": 961 }, { "epoch": 0.4678419452887538, "grad_norm": 0.07849571359931412, "learning_rate": 9.678207533784558e-06, "loss": 0.6428, "step": 962 }, { "epoch": 0.46832826747720363, "grad_norm": 0.07368316570430054, "learning_rate": 9.67753140412586e-06, "loss": 0.6082, "step": 963 }, { "epoch": 0.4688145896656535, "grad_norm": 0.07588381016100772, "learning_rate": 9.676854588560298e-06, "loss": 0.6272, "step": 964 }, { "epoch": 0.46930091185410333, "grad_norm": 0.0750664224771575, "learning_rate": 9.67617708718712e-06, "loss": 0.6318, "step": 965 }, { "epoch": 0.4697872340425532, "grad_norm": 0.07174390421484508, "learning_rate": 9.675498900105674e-06, "loss": 0.5834, "step": 966 }, { "epoch": 0.47027355623100303, "grad_norm": 0.07457658652906511, "learning_rate": 9.674820027415406e-06, "loss": 0.5752, "step": 967 }, { "epoch": 0.4707598784194529, "grad_norm": 0.07643389304674403, "learning_rate": 9.674140469215868e-06, "loss": 0.5857, "step": 968 }, { "epoch": 0.47124620060790273, "grad_norm": 0.07647902583321699, "learning_rate": 9.673460225606711e-06, "loss": 0.623, "step": 969 }, { "epoch": 0.4717325227963526, "grad_norm": 0.07451087076680062, "learning_rate": 9.672779296687678e-06, "loss": 0.5752, "step": 970 }, { "epoch": 0.47221884498480243, "grad_norm": 0.07683147292688484, "learning_rate": 9.672097682558628e-06, "loss": 0.6324, "step": 971 }, { "epoch": 0.4727051671732523, "grad_norm": 0.07772949335564111, "learning_rate": 9.671415383319507e-06, "loss": 0.5916, "step": 972 }, { "epoch": 0.47319148936170213, "grad_norm": 0.07308603579574793, "learning_rate": 9.670732399070365e-06, "loss": 0.6207, "step": 973 }, { "epoch": 0.473677811550152, "grad_norm": 0.07629877966214045, "learning_rate": 9.67004872991136e-06, "loss": 0.5829, "step": 974 }, { "epoch": 0.47416413373860183, "grad_norm": 0.07183852049069808, "learning_rate": 9.669364375942739e-06, "loss": 0.5644, "step": 975 }, { "epoch": 0.4746504559270517, "grad_norm": 0.07019255651184401, "learning_rate": 9.668679337264857e-06, "loss": 0.5468, "step": 976 }, { "epoch": 0.47513677811550153, "grad_norm": 0.07488267975270439, "learning_rate": 9.667993613978166e-06, "loss": 0.6012, "step": 977 }, { "epoch": 0.4756231003039514, "grad_norm": 0.07423245735285204, "learning_rate": 9.66730720618322e-06, "loss": 0.5769, "step": 978 }, { "epoch": 0.47610942249240124, "grad_norm": 0.07970041513583959, "learning_rate": 9.666620113980673e-06, "loss": 0.6254, "step": 979 }, { "epoch": 0.4765957446808511, "grad_norm": 0.07116566520277325, "learning_rate": 9.66593233747128e-06, "loss": 0.5661, "step": 980 }, { "epoch": 0.47708206686930094, "grad_norm": 0.07815806025417396, "learning_rate": 9.665243876755894e-06, "loss": 0.6078, "step": 981 }, { "epoch": 0.4775683890577508, "grad_norm": 0.08098149359036909, "learning_rate": 9.66455473193547e-06, "loss": 0.6158, "step": 982 }, { "epoch": 0.4780547112462006, "grad_norm": 0.07332806249391385, "learning_rate": 9.663864903111066e-06, "loss": 0.5837, "step": 983 }, { "epoch": 0.47854103343465043, "grad_norm": 0.07488025281445045, "learning_rate": 9.663174390383836e-06, "loss": 0.606, "step": 984 }, { "epoch": 0.4790273556231003, "grad_norm": 0.07572123285091018, "learning_rate": 9.662483193855035e-06, "loss": 0.6334, "step": 985 }, { "epoch": 0.47951367781155013, "grad_norm": 0.07754112262676686, "learning_rate": 9.661791313626019e-06, "loss": 0.6101, "step": 986 }, { "epoch": 0.48, "grad_norm": 0.07368653207471317, "learning_rate": 9.661098749798243e-06, "loss": 0.5957, "step": 987 }, { "epoch": 0.48048632218844983, "grad_norm": 0.07534261258971515, "learning_rate": 9.660405502473268e-06, "loss": 0.6143, "step": 988 }, { "epoch": 0.4809726443768997, "grad_norm": 0.07416568150076407, "learning_rate": 9.659711571752749e-06, "loss": 0.5545, "step": 989 }, { "epoch": 0.48145896656534953, "grad_norm": 0.08096641649729179, "learning_rate": 9.659016957738441e-06, "loss": 0.5826, "step": 990 }, { "epoch": 0.4819452887537994, "grad_norm": 0.07448803186437745, "learning_rate": 9.658321660532204e-06, "loss": 0.637, "step": 991 }, { "epoch": 0.48243161094224923, "grad_norm": 0.07693468224925519, "learning_rate": 9.657625680235994e-06, "loss": 0.6109, "step": 992 }, { "epoch": 0.4829179331306991, "grad_norm": 0.07642236003341642, "learning_rate": 9.656929016951869e-06, "loss": 0.5642, "step": 993 }, { "epoch": 0.48340425531914893, "grad_norm": 0.08302559808727765, "learning_rate": 9.656231670781987e-06, "loss": 0.6498, "step": 994 }, { "epoch": 0.4838905775075988, "grad_norm": 0.09619351084023765, "learning_rate": 9.655533641828602e-06, "loss": 0.6415, "step": 995 }, { "epoch": 0.48437689969604864, "grad_norm": 0.08491930525223579, "learning_rate": 9.654834930194079e-06, "loss": 0.7193, "step": 996 }, { "epoch": 0.4848632218844985, "grad_norm": 0.07929948822646465, "learning_rate": 9.654135535980874e-06, "loss": 0.602, "step": 997 }, { "epoch": 0.48534954407294834, "grad_norm": 0.0738595479696469, "learning_rate": 9.653435459291541e-06, "loss": 0.5978, "step": 998 }, { "epoch": 0.4858358662613982, "grad_norm": 0.07368254032340832, "learning_rate": 9.65273470022874e-06, "loss": 0.6041, "step": 999 }, { "epoch": 0.48632218844984804, "grad_norm": 0.07542858510769612, "learning_rate": 9.652033258895233e-06, "loss": 0.5893, "step": 1000 }, { "epoch": 0.4868085106382979, "grad_norm": 0.07435466428592213, "learning_rate": 9.651331135393875e-06, "loss": 0.5937, "step": 1001 }, { "epoch": 0.48729483282674774, "grad_norm": 0.08410787611001522, "learning_rate": 9.650628329827627e-06, "loss": 0.589, "step": 1002 }, { "epoch": 0.4877811550151976, "grad_norm": 0.07650370376747663, "learning_rate": 9.649924842299544e-06, "loss": 0.6184, "step": 1003 }, { "epoch": 0.48826747720364744, "grad_norm": 0.07582375275769716, "learning_rate": 9.649220672912788e-06, "loss": 0.5942, "step": 1004 }, { "epoch": 0.4887537993920973, "grad_norm": 0.07783386547488437, "learning_rate": 9.648515821770612e-06, "loss": 0.6128, "step": 1005 }, { "epoch": 0.48924012158054714, "grad_norm": 0.07848464060947229, "learning_rate": 9.647810288976381e-06, "loss": 0.613, "step": 1006 }, { "epoch": 0.48972644376899693, "grad_norm": 0.07508332012991602, "learning_rate": 9.64710407463355e-06, "loss": 0.6154, "step": 1007 }, { "epoch": 0.4902127659574468, "grad_norm": 0.07382064657896553, "learning_rate": 9.646397178845679e-06, "loss": 0.5961, "step": 1008 }, { "epoch": 0.49069908814589663, "grad_norm": 0.0830034984028087, "learning_rate": 9.645689601716424e-06, "loss": 0.5727, "step": 1009 }, { "epoch": 0.4911854103343465, "grad_norm": 0.07249779314047998, "learning_rate": 9.644981343349545e-06, "loss": 0.6099, "step": 1010 }, { "epoch": 0.49167173252279633, "grad_norm": 0.08514650264082513, "learning_rate": 9.644272403848897e-06, "loss": 0.6283, "step": 1011 }, { "epoch": 0.4921580547112462, "grad_norm": 0.08050869620416247, "learning_rate": 9.64356278331844e-06, "loss": 0.6077, "step": 1012 }, { "epoch": 0.49264437689969603, "grad_norm": 0.07509472703104218, "learning_rate": 9.642852481862235e-06, "loss": 0.5929, "step": 1013 }, { "epoch": 0.4931306990881459, "grad_norm": 0.07324237738835222, "learning_rate": 9.642141499584436e-06, "loss": 0.5893, "step": 1014 }, { "epoch": 0.49361702127659574, "grad_norm": 0.07673034617567834, "learning_rate": 9.6414298365893e-06, "loss": 0.634, "step": 1015 }, { "epoch": 0.4941033434650456, "grad_norm": 0.0726904256785006, "learning_rate": 9.640717492981185e-06, "loss": 0.5807, "step": 1016 }, { "epoch": 0.49458966565349544, "grad_norm": 0.07479533320906948, "learning_rate": 9.64000446886455e-06, "loss": 0.6042, "step": 1017 }, { "epoch": 0.4950759878419453, "grad_norm": 0.07960652206021393, "learning_rate": 9.63929076434395e-06, "loss": 0.6331, "step": 1018 }, { "epoch": 0.49556231003039514, "grad_norm": 0.07046297273903293, "learning_rate": 9.638576379524041e-06, "loss": 0.546, "step": 1019 }, { "epoch": 0.496048632218845, "grad_norm": 0.0801776950275796, "learning_rate": 9.63786131450958e-06, "loss": 0.6363, "step": 1020 }, { "epoch": 0.49653495440729484, "grad_norm": 0.07445033127789338, "learning_rate": 9.637145569405426e-06, "loss": 0.5629, "step": 1021 }, { "epoch": 0.4970212765957447, "grad_norm": 0.07526862940422334, "learning_rate": 9.63642914431653e-06, "loss": 0.6108, "step": 1022 }, { "epoch": 0.49750759878419454, "grad_norm": 0.07254346330912441, "learning_rate": 9.635712039347953e-06, "loss": 0.594, "step": 1023 }, { "epoch": 0.4979939209726444, "grad_norm": 0.0780571845691102, "learning_rate": 9.634994254604845e-06, "loss": 0.595, "step": 1024 }, { "epoch": 0.49848024316109424, "grad_norm": 0.0783126025407137, "learning_rate": 9.634275790192464e-06, "loss": 0.615, "step": 1025 }, { "epoch": 0.4989665653495441, "grad_norm": 0.07302505638797673, "learning_rate": 9.633556646216164e-06, "loss": 0.5855, "step": 1026 }, { "epoch": 0.49945288753799394, "grad_norm": 0.07082429330689427, "learning_rate": 9.6328368227814e-06, "loss": 0.5242, "step": 1027 }, { "epoch": 0.4999392097264438, "grad_norm": 0.07407361719321554, "learning_rate": 9.632116319993726e-06, "loss": 0.5895, "step": 1028 }, { "epoch": 0.4999392097264438, "eval_loss": 0.6022451519966125, "eval_runtime": 105.1713, "eval_samples_per_second": 288.605, "eval_steps_per_second": 36.084, "step": 1028 }, { "epoch": 0.5004255319148936, "grad_norm": 0.07491363414159202, "learning_rate": 9.631395137958792e-06, "loss": 0.6184, "step": 1029 }, { "epoch": 0.5009118541033435, "grad_norm": 0.07497897535023801, "learning_rate": 9.630673276782356e-06, "loss": 0.6243, "step": 1030 }, { "epoch": 0.5013981762917933, "grad_norm": 0.07384236285385469, "learning_rate": 9.629950736570268e-06, "loss": 0.6156, "step": 1031 }, { "epoch": 0.5018844984802432, "grad_norm": 0.07873756699993263, "learning_rate": 9.629227517428482e-06, "loss": 0.5716, "step": 1032 }, { "epoch": 0.502370820668693, "grad_norm": 0.07576613933547476, "learning_rate": 9.628503619463049e-06, "loss": 0.6326, "step": 1033 }, { "epoch": 0.5028571428571429, "grad_norm": 0.07500439174705274, "learning_rate": 9.62777904278012e-06, "loss": 0.6272, "step": 1034 }, { "epoch": 0.5033434650455927, "grad_norm": 0.07537447974397528, "learning_rate": 9.627053787485944e-06, "loss": 0.5918, "step": 1035 }, { "epoch": 0.5038297872340426, "grad_norm": 0.074503245299955, "learning_rate": 9.626327853686877e-06, "loss": 0.5583, "step": 1036 }, { "epoch": 0.5043161094224924, "grad_norm": 0.075726082212813, "learning_rate": 9.625601241489365e-06, "loss": 0.6056, "step": 1037 }, { "epoch": 0.5048024316109423, "grad_norm": 0.07059161411427989, "learning_rate": 9.624873950999958e-06, "loss": 0.5654, "step": 1038 }, { "epoch": 0.5052887537993921, "grad_norm": 0.07767977093951244, "learning_rate": 9.624145982325303e-06, "loss": 0.6159, "step": 1039 }, { "epoch": 0.505775075987842, "grad_norm": 0.07830933778804226, "learning_rate": 9.623417335572155e-06, "loss": 0.6052, "step": 1040 }, { "epoch": 0.5062613981762918, "grad_norm": 0.07804381159534474, "learning_rate": 9.622688010847352e-06, "loss": 0.5782, "step": 1041 }, { "epoch": 0.5067477203647417, "grad_norm": 0.07235693447421539, "learning_rate": 9.621958008257848e-06, "loss": 0.5685, "step": 1042 }, { "epoch": 0.5072340425531915, "grad_norm": 0.07672947712687692, "learning_rate": 9.62122732791069e-06, "loss": 0.6142, "step": 1043 }, { "epoch": 0.5077203647416413, "grad_norm": 0.07598179177937309, "learning_rate": 9.62049596991302e-06, "loss": 0.601, "step": 1044 }, { "epoch": 0.5082066869300912, "grad_norm": 0.07957812258745778, "learning_rate": 9.619763934372084e-06, "loss": 0.6012, "step": 1045 }, { "epoch": 0.508693009118541, "grad_norm": 0.07179777474901587, "learning_rate": 9.61903122139523e-06, "loss": 0.5676, "step": 1046 }, { "epoch": 0.5091793313069909, "grad_norm": 0.07119213189596682, "learning_rate": 9.6182978310899e-06, "loss": 0.5481, "step": 1047 }, { "epoch": 0.5096656534954407, "grad_norm": 0.07484075027205032, "learning_rate": 9.617563763563635e-06, "loss": 0.6257, "step": 1048 }, { "epoch": 0.5101519756838906, "grad_norm": 0.07484625229774869, "learning_rate": 9.616829018924083e-06, "loss": 0.599, "step": 1049 }, { "epoch": 0.5106382978723404, "grad_norm": 0.07479653876160268, "learning_rate": 9.616093597278981e-06, "loss": 0.5815, "step": 1050 }, { "epoch": 0.5111246200607903, "grad_norm": 0.07920682715652025, "learning_rate": 9.615357498736172e-06, "loss": 0.6282, "step": 1051 }, { "epoch": 0.5116109422492401, "grad_norm": 0.079108272703273, "learning_rate": 9.614620723403599e-06, "loss": 0.6223, "step": 1052 }, { "epoch": 0.51209726443769, "grad_norm": 0.07666487314988865, "learning_rate": 9.613883271389297e-06, "loss": 0.5949, "step": 1053 }, { "epoch": 0.5125835866261398, "grad_norm": 0.07796212649776288, "learning_rate": 9.613145142801407e-06, "loss": 0.6169, "step": 1054 }, { "epoch": 0.5130699088145897, "grad_norm": 0.07727794160332548, "learning_rate": 9.61240633774817e-06, "loss": 0.5608, "step": 1055 }, { "epoch": 0.5135562310030395, "grad_norm": 0.23440049337137667, "learning_rate": 9.61166685633792e-06, "loss": 0.6003, "step": 1056 }, { "epoch": 0.5140425531914894, "grad_norm": 0.07302507192319002, "learning_rate": 9.610926698679093e-06, "loss": 0.5901, "step": 1057 }, { "epoch": 0.5145288753799392, "grad_norm": 0.0793574387041743, "learning_rate": 9.610185864880228e-06, "loss": 0.6337, "step": 1058 }, { "epoch": 0.5150151975683891, "grad_norm": 0.07760275379212223, "learning_rate": 9.609444355049957e-06, "loss": 0.6091, "step": 1059 }, { "epoch": 0.5155015197568389, "grad_norm": 0.07644658434262741, "learning_rate": 9.608702169297014e-06, "loss": 0.6041, "step": 1060 }, { "epoch": 0.5159878419452888, "grad_norm": 0.07579633137358423, "learning_rate": 9.607959307730237e-06, "loss": 0.6002, "step": 1061 }, { "epoch": 0.5164741641337386, "grad_norm": 0.07601248234149546, "learning_rate": 9.607215770458551e-06, "loss": 0.6262, "step": 1062 }, { "epoch": 0.5169604863221885, "grad_norm": 0.07614345620960081, "learning_rate": 9.606471557590992e-06, "loss": 0.5828, "step": 1063 }, { "epoch": 0.5174468085106383, "grad_norm": 0.0771995257204074, "learning_rate": 9.605726669236688e-06, "loss": 0.6175, "step": 1064 }, { "epoch": 0.5179331306990882, "grad_norm": 0.07387346768464613, "learning_rate": 9.60498110550487e-06, "loss": 0.6017, "step": 1065 }, { "epoch": 0.518419452887538, "grad_norm": 0.08075466368475279, "learning_rate": 9.604234866504868e-06, "loss": 0.6301, "step": 1066 }, { "epoch": 0.5189057750759879, "grad_norm": 0.07266668357878729, "learning_rate": 9.603487952346104e-06, "loss": 0.5699, "step": 1067 }, { "epoch": 0.5193920972644377, "grad_norm": 0.07294684771036698, "learning_rate": 9.602740363138108e-06, "loss": 0.5854, "step": 1068 }, { "epoch": 0.5198784194528875, "grad_norm": 0.07431612814625059, "learning_rate": 9.601992098990506e-06, "loss": 0.615, "step": 1069 }, { "epoch": 0.5203647416413374, "grad_norm": 0.07444677404238248, "learning_rate": 9.601243160013023e-06, "loss": 0.5647, "step": 1070 }, { "epoch": 0.5208510638297872, "grad_norm": 0.0785028987815514, "learning_rate": 9.600493546315482e-06, "loss": 0.5966, "step": 1071 }, { "epoch": 0.5213373860182371, "grad_norm": 0.07459638381862556, "learning_rate": 9.599743258007803e-06, "loss": 0.5662, "step": 1072 }, { "epoch": 0.5218237082066869, "grad_norm": 0.07701982569012482, "learning_rate": 9.598992295200007e-06, "loss": 0.6135, "step": 1073 }, { "epoch": 0.5223100303951368, "grad_norm": 0.07516017019795646, "learning_rate": 9.598240658002217e-06, "loss": 0.5883, "step": 1074 }, { "epoch": 0.5227963525835866, "grad_norm": 0.07210430860226744, "learning_rate": 9.597488346524653e-06, "loss": 0.57, "step": 1075 }, { "epoch": 0.5232826747720365, "grad_norm": 0.07697882335037791, "learning_rate": 9.59673536087763e-06, "loss": 0.6177, "step": 1076 }, { "epoch": 0.5237689969604863, "grad_norm": 0.08038171405169131, "learning_rate": 9.595981701171564e-06, "loss": 0.6211, "step": 1077 }, { "epoch": 0.5242553191489362, "grad_norm": 0.07202005407284515, "learning_rate": 9.595227367516974e-06, "loss": 0.5517, "step": 1078 }, { "epoch": 0.524741641337386, "grad_norm": 0.07598168784271613, "learning_rate": 9.594472360024472e-06, "loss": 0.6156, "step": 1079 }, { "epoch": 0.5252279635258359, "grad_norm": 0.07380647747687255, "learning_rate": 9.593716678804772e-06, "loss": 0.5999, "step": 1080 }, { "epoch": 0.5257142857142857, "grad_norm": 0.07510898117414629, "learning_rate": 9.592960323968688e-06, "loss": 0.6014, "step": 1081 }, { "epoch": 0.5262006079027356, "grad_norm": 0.07877687247884968, "learning_rate": 9.592203295627127e-06, "loss": 0.6071, "step": 1082 }, { "epoch": 0.5266869300911854, "grad_norm": 0.07301529882321994, "learning_rate": 9.591445593891102e-06, "loss": 0.5824, "step": 1083 }, { "epoch": 0.5271732522796353, "grad_norm": 0.073996777203989, "learning_rate": 9.59068721887172e-06, "loss": 0.5965, "step": 1084 }, { "epoch": 0.5276595744680851, "grad_norm": 0.07614222372100486, "learning_rate": 9.589928170680186e-06, "loss": 0.6096, "step": 1085 }, { "epoch": 0.528145896656535, "grad_norm": 0.07907477045381418, "learning_rate": 9.58916844942781e-06, "loss": 0.5949, "step": 1086 }, { "epoch": 0.5286322188449848, "grad_norm": 0.0742931640338888, "learning_rate": 9.588408055225992e-06, "loss": 0.6046, "step": 1087 }, { "epoch": 0.5291185410334347, "grad_norm": 0.07792562293019752, "learning_rate": 9.58764698818624e-06, "loss": 0.5945, "step": 1088 }, { "epoch": 0.5296048632218845, "grad_norm": 0.07301008726052358, "learning_rate": 9.586885248420152e-06, "loss": 0.5662, "step": 1089 }, { "epoch": 0.5300911854103344, "grad_norm": 0.0755314524155536, "learning_rate": 9.586122836039432e-06, "loss": 0.5783, "step": 1090 }, { "epoch": 0.5305775075987842, "grad_norm": 0.07434203408018285, "learning_rate": 9.585359751155874e-06, "loss": 0.5477, "step": 1091 }, { "epoch": 0.531063829787234, "grad_norm": 0.07538682718890699, "learning_rate": 9.58459599388138e-06, "loss": 0.6004, "step": 1092 }, { "epoch": 0.5315501519756839, "grad_norm": 0.07136586709394917, "learning_rate": 9.583831564327945e-06, "loss": 0.5912, "step": 1093 }, { "epoch": 0.5320364741641337, "grad_norm": 0.0767772025676226, "learning_rate": 9.583066462607664e-06, "loss": 0.6308, "step": 1094 }, { "epoch": 0.5325227963525836, "grad_norm": 0.0726085661295553, "learning_rate": 9.58230068883273e-06, "loss": 0.6073, "step": 1095 }, { "epoch": 0.5330091185410334, "grad_norm": 0.07517222027021538, "learning_rate": 9.581534243115437e-06, "loss": 0.5522, "step": 1096 }, { "epoch": 0.5334954407294833, "grad_norm": 0.07273686570952088, "learning_rate": 9.580767125568172e-06, "loss": 0.6008, "step": 1097 }, { "epoch": 0.5339817629179331, "grad_norm": 0.07447422341141843, "learning_rate": 9.579999336303427e-06, "loss": 0.6049, "step": 1098 }, { "epoch": 0.534468085106383, "grad_norm": 0.07004702097814727, "learning_rate": 9.579230875433788e-06, "loss": 0.5823, "step": 1099 }, { "epoch": 0.5349544072948328, "grad_norm": 0.07856976194826039, "learning_rate": 9.578461743071943e-06, "loss": 0.6126, "step": 1100 }, { "epoch": 0.5354407294832827, "grad_norm": 0.07457931409633366, "learning_rate": 9.577691939330675e-06, "loss": 0.5633, "step": 1101 }, { "epoch": 0.5359270516717325, "grad_norm": 0.07180677894012881, "learning_rate": 9.576921464322866e-06, "loss": 0.577, "step": 1102 }, { "epoch": 0.5364133738601824, "grad_norm": 0.07720956030452346, "learning_rate": 9.576150318161499e-06, "loss": 0.587, "step": 1103 }, { "epoch": 0.5368996960486322, "grad_norm": 0.07543257359756422, "learning_rate": 9.575378500959654e-06, "loss": 0.6025, "step": 1104 }, { "epoch": 0.5373860182370821, "grad_norm": 0.07639144543849306, "learning_rate": 9.574606012830509e-06, "loss": 0.5813, "step": 1105 }, { "epoch": 0.5378723404255319, "grad_norm": 0.07846367381718024, "learning_rate": 9.57383285388734e-06, "loss": 0.6197, "step": 1106 }, { "epoch": 0.5383586626139818, "grad_norm": 0.07551930929835175, "learning_rate": 9.573059024243522e-06, "loss": 0.6154, "step": 1107 }, { "epoch": 0.5388449848024316, "grad_norm": 0.074725189306958, "learning_rate": 9.57228452401253e-06, "loss": 0.5637, "step": 1108 }, { "epoch": 0.5393313069908815, "grad_norm": 0.07981735973049686, "learning_rate": 9.571509353307933e-06, "loss": 0.6231, "step": 1109 }, { "epoch": 0.5398176291793313, "grad_norm": 0.08027613534457816, "learning_rate": 9.570733512243402e-06, "loss": 0.6377, "step": 1110 }, { "epoch": 0.5403039513677812, "grad_norm": 0.07335214025548115, "learning_rate": 9.569957000932706e-06, "loss": 0.5974, "step": 1111 }, { "epoch": 0.540790273556231, "grad_norm": 0.07366471459190997, "learning_rate": 9.569179819489712e-06, "loss": 0.5717, "step": 1112 }, { "epoch": 0.5412765957446809, "grad_norm": 0.07460379681865435, "learning_rate": 9.568401968028382e-06, "loss": 0.6122, "step": 1113 }, { "epoch": 0.5417629179331307, "grad_norm": 0.07035806810923242, "learning_rate": 9.567623446662781e-06, "loss": 0.6119, "step": 1114 }, { "epoch": 0.5422492401215806, "grad_norm": 0.07634389129306696, "learning_rate": 9.566844255507073e-06, "loss": 0.6078, "step": 1115 }, { "epoch": 0.5427355623100304, "grad_norm": 0.07066396119916037, "learning_rate": 9.566064394675511e-06, "loss": 0.5897, "step": 1116 }, { "epoch": 0.5432218844984802, "grad_norm": 0.07917244664151035, "learning_rate": 9.56528386428246e-06, "loss": 0.5941, "step": 1117 }, { "epoch": 0.5437082066869301, "grad_norm": 0.07560950730130118, "learning_rate": 9.564502664442371e-06, "loss": 0.5868, "step": 1118 }, { "epoch": 0.5441945288753799, "grad_norm": 0.07493054510390476, "learning_rate": 9.563720795269801e-06, "loss": 0.6076, "step": 1119 }, { "epoch": 0.5446808510638298, "grad_norm": 0.07613734596151789, "learning_rate": 9.5629382568794e-06, "loss": 0.5915, "step": 1120 }, { "epoch": 0.5451671732522796, "grad_norm": 0.0727097956776757, "learning_rate": 9.562155049385919e-06, "loss": 0.5966, "step": 1121 }, { "epoch": 0.5456534954407295, "grad_norm": 0.07573881937442128, "learning_rate": 9.561371172904207e-06, "loss": 0.6126, "step": 1122 }, { "epoch": 0.5461398176291793, "grad_norm": 0.0763737905532064, "learning_rate": 9.56058662754921e-06, "loss": 0.6469, "step": 1123 }, { "epoch": 0.5466261398176292, "grad_norm": 0.07512554259787733, "learning_rate": 9.559801413435972e-06, "loss": 0.6035, "step": 1124 }, { "epoch": 0.547112462006079, "grad_norm": 0.07171943476778324, "learning_rate": 9.559015530679639e-06, "loss": 0.577, "step": 1125 }, { "epoch": 0.5475987841945289, "grad_norm": 0.07556762598612062, "learning_rate": 9.558228979395448e-06, "loss": 0.5912, "step": 1126 }, { "epoch": 0.5480851063829787, "grad_norm": 0.07845005559698841, "learning_rate": 9.557441759698741e-06, "loss": 0.6058, "step": 1127 }, { "epoch": 0.5485714285714286, "grad_norm": 0.07541322310557426, "learning_rate": 9.556653871704951e-06, "loss": 0.62, "step": 1128 }, { "epoch": 0.5490577507598784, "grad_norm": 0.07964467770841274, "learning_rate": 9.555865315529616e-06, "loss": 0.5998, "step": 1129 }, { "epoch": 0.5495440729483283, "grad_norm": 0.07708081006027927, "learning_rate": 9.555076091288366e-06, "loss": 0.6094, "step": 1130 }, { "epoch": 0.5500303951367781, "grad_norm": 0.07388688075123805, "learning_rate": 9.554286199096937e-06, "loss": 0.6088, "step": 1131 }, { "epoch": 0.550516717325228, "grad_norm": 0.07820699280148219, "learning_rate": 9.553495639071152e-06, "loss": 0.6175, "step": 1132 }, { "epoch": 0.5510030395136778, "grad_norm": 0.07895001899338633, "learning_rate": 9.552704411326938e-06, "loss": 0.5891, "step": 1133 }, { "epoch": 0.5514893617021277, "grad_norm": 0.07382050601093598, "learning_rate": 9.551912515980323e-06, "loss": 0.5627, "step": 1134 }, { "epoch": 0.5519756838905775, "grad_norm": 0.07499578199643642, "learning_rate": 9.55111995314743e-06, "loss": 0.5776, "step": 1135 }, { "epoch": 0.5524620060790274, "grad_norm": 0.07965034804950924, "learning_rate": 9.550326722944476e-06, "loss": 0.6498, "step": 1136 }, { "epoch": 0.5529483282674772, "grad_norm": 0.07527774360450135, "learning_rate": 9.54953282548778e-06, "loss": 0.6389, "step": 1137 }, { "epoch": 0.5534346504559271, "grad_norm": 0.07606955485113887, "learning_rate": 9.548738260893759e-06, "loss": 0.6579, "step": 1138 }, { "epoch": 0.5539209726443769, "grad_norm": 0.07382363701618233, "learning_rate": 9.547943029278925e-06, "loss": 0.5796, "step": 1139 }, { "epoch": 0.5544072948328268, "grad_norm": 0.0694551534818775, "learning_rate": 9.547147130759894e-06, "loss": 0.5739, "step": 1140 }, { "epoch": 0.5548936170212766, "grad_norm": 0.07373619111569436, "learning_rate": 9.546350565453368e-06, "loss": 0.5837, "step": 1141 }, { "epoch": 0.5553799392097264, "grad_norm": 0.07491697143764887, "learning_rate": 9.545553333476164e-06, "loss": 0.6072, "step": 1142 }, { "epoch": 0.5558662613981763, "grad_norm": 0.07300765249514717, "learning_rate": 9.544755434945178e-06, "loss": 0.5794, "step": 1143 }, { "epoch": 0.5563525835866261, "grad_norm": 0.07219044733634956, "learning_rate": 9.543956869977418e-06, "loss": 0.5789, "step": 1144 }, { "epoch": 0.556838905775076, "grad_norm": 0.07708903089414154, "learning_rate": 9.543157638689982e-06, "loss": 0.6116, "step": 1145 }, { "epoch": 0.5573252279635258, "grad_norm": 0.07814561564178836, "learning_rate": 9.542357741200071e-06, "loss": 0.6113, "step": 1146 }, { "epoch": 0.5578115501519757, "grad_norm": 0.07727051772150526, "learning_rate": 9.541557177624978e-06, "loss": 0.5911, "step": 1147 }, { "epoch": 0.5582978723404255, "grad_norm": 0.10617057932196375, "learning_rate": 9.5407559480821e-06, "loss": 0.5659, "step": 1148 }, { "epoch": 0.5587841945288754, "grad_norm": 0.0751891554302743, "learning_rate": 9.539954052688921e-06, "loss": 0.5825, "step": 1149 }, { "epoch": 0.5592705167173252, "grad_norm": 0.0774385166597363, "learning_rate": 9.53915149156304e-06, "loss": 0.5873, "step": 1150 }, { "epoch": 0.5597568389057751, "grad_norm": 0.0692018140462834, "learning_rate": 9.538348264822135e-06, "loss": 0.5958, "step": 1151 }, { "epoch": 0.5602431610942249, "grad_norm": 0.0728596158882538, "learning_rate": 9.537544372583996e-06, "loss": 0.5913, "step": 1152 }, { "epoch": 0.5607294832826748, "grad_norm": 0.0716324183449931, "learning_rate": 9.536739814966499e-06, "loss": 0.567, "step": 1153 }, { "epoch": 0.5612158054711246, "grad_norm": 0.07715957661852073, "learning_rate": 9.535934592087627e-06, "loss": 0.6288, "step": 1154 }, { "epoch": 0.5617021276595745, "grad_norm": 0.07297922165750517, "learning_rate": 9.535128704065457e-06, "loss": 0.643, "step": 1155 }, { "epoch": 0.5621884498480243, "grad_norm": 0.09017847485980017, "learning_rate": 9.534322151018163e-06, "loss": 0.6377, "step": 1156 }, { "epoch": 0.5626747720364742, "grad_norm": 0.07478377162164523, "learning_rate": 9.533514933064015e-06, "loss": 0.5626, "step": 1157 }, { "epoch": 0.563161094224924, "grad_norm": 0.07239324297731042, "learning_rate": 9.532707050321384e-06, "loss": 0.5809, "step": 1158 }, { "epoch": 0.5636474164133739, "grad_norm": 0.0746734820682129, "learning_rate": 9.531898502908735e-06, "loss": 0.6449, "step": 1159 }, { "epoch": 0.5641337386018237, "grad_norm": 0.08138913592038577, "learning_rate": 9.531089290944636e-06, "loss": 0.5791, "step": 1160 }, { "epoch": 0.5646200607902736, "grad_norm": 0.07793773324359105, "learning_rate": 9.530279414547743e-06, "loss": 0.6197, "step": 1161 }, { "epoch": 0.5651063829787234, "grad_norm": 0.07178282678689916, "learning_rate": 9.529468873836822e-06, "loss": 0.5766, "step": 1162 }, { "epoch": 0.5655927051671733, "grad_norm": 0.07242130866068215, "learning_rate": 9.528657668930724e-06, "loss": 0.5622, "step": 1163 }, { "epoch": 0.5660790273556231, "grad_norm": 0.07590187583915445, "learning_rate": 9.527845799948407e-06, "loss": 0.5872, "step": 1164 }, { "epoch": 0.5665653495440729, "grad_norm": 0.07302200706948855, "learning_rate": 9.52703326700892e-06, "loss": 0.6151, "step": 1165 }, { "epoch": 0.5670516717325228, "grad_norm": 0.07078885700845401, "learning_rate": 9.526220070231412e-06, "loss": 0.5586, "step": 1166 }, { "epoch": 0.5675379939209726, "grad_norm": 0.07663254444990945, "learning_rate": 9.52540620973513e-06, "loss": 0.5818, "step": 1167 }, { "epoch": 0.5680243161094225, "grad_norm": 0.07387885600875368, "learning_rate": 9.524591685639414e-06, "loss": 0.5652, "step": 1168 }, { "epoch": 0.5685106382978723, "grad_norm": 0.07383673904850574, "learning_rate": 9.523776498063709e-06, "loss": 0.599, "step": 1169 }, { "epoch": 0.5689969604863222, "grad_norm": 0.07660471356355565, "learning_rate": 9.522960647127553e-06, "loss": 0.5769, "step": 1170 }, { "epoch": 0.569483282674772, "grad_norm": 0.07693800724854444, "learning_rate": 9.522144132950576e-06, "loss": 0.6037, "step": 1171 }, { "epoch": 0.5699696048632219, "grad_norm": 0.07342133761920683, "learning_rate": 9.52132695565252e-06, "loss": 0.5784, "step": 1172 }, { "epoch": 0.5704559270516717, "grad_norm": 0.07239702949908601, "learning_rate": 9.520509115353205e-06, "loss": 0.594, "step": 1173 }, { "epoch": 0.5709422492401216, "grad_norm": 0.07318922399266402, "learning_rate": 9.519690612172563e-06, "loss": 0.5676, "step": 1174 }, { "epoch": 0.5714285714285714, "grad_norm": 0.08462636739837555, "learning_rate": 9.518871446230616e-06, "loss": 0.6666, "step": 1175 }, { "epoch": 0.5719148936170213, "grad_norm": 0.07147104118893316, "learning_rate": 9.518051617647488e-06, "loss": 0.5904, "step": 1176 }, { "epoch": 0.5724012158054711, "grad_norm": 0.07543685467803073, "learning_rate": 9.517231126543396e-06, "loss": 0.6038, "step": 1177 }, { "epoch": 0.572887537993921, "grad_norm": 0.07442965675655525, "learning_rate": 9.516409973038655e-06, "loss": 0.5654, "step": 1178 }, { "epoch": 0.5733738601823708, "grad_norm": 0.07422810108869447, "learning_rate": 9.515588157253679e-06, "loss": 0.6028, "step": 1179 }, { "epoch": 0.5738601823708207, "grad_norm": 0.07281447082147334, "learning_rate": 9.514765679308979e-06, "loss": 0.5872, "step": 1180 }, { "epoch": 0.5743465045592705, "grad_norm": 0.07343713574963837, "learning_rate": 9.513942539325158e-06, "loss": 0.5569, "step": 1181 }, { "epoch": 0.5748328267477204, "grad_norm": 0.07524771751860296, "learning_rate": 9.513118737422926e-06, "loss": 0.6022, "step": 1182 }, { "epoch": 0.5753191489361702, "grad_norm": 0.0759502988036261, "learning_rate": 9.51229427372308e-06, "loss": 0.5824, "step": 1183 }, { "epoch": 0.5758054711246201, "grad_norm": 0.07450311723224759, "learning_rate": 9.511469148346517e-06, "loss": 0.5869, "step": 1184 }, { "epoch": 0.5762917933130699, "grad_norm": 0.07372768985360179, "learning_rate": 9.510643361414236e-06, "loss": 0.6092, "step": 1185 }, { "epoch": 0.5767781155015198, "grad_norm": 0.07135395518528469, "learning_rate": 9.50981691304733e-06, "loss": 0.5766, "step": 1186 }, { "epoch": 0.5772644376899696, "grad_norm": 0.07703190611762453, "learning_rate": 9.508989803366984e-06, "loss": 0.5964, "step": 1187 }, { "epoch": 0.5777507598784195, "grad_norm": 0.07712962057222558, "learning_rate": 9.508162032494485e-06, "loss": 0.6346, "step": 1188 }, { "epoch": 0.5782370820668693, "grad_norm": 0.07449114456144172, "learning_rate": 9.50733360055122e-06, "loss": 0.6099, "step": 1189 }, { "epoch": 0.5787234042553191, "grad_norm": 0.07904267706379085, "learning_rate": 9.506504507658665e-06, "loss": 0.6109, "step": 1190 }, { "epoch": 0.579209726443769, "grad_norm": 0.07811448853572688, "learning_rate": 9.5056747539384e-06, "loss": 0.6281, "step": 1191 }, { "epoch": 0.5796960486322188, "grad_norm": 0.07482062202862808, "learning_rate": 9.504844339512096e-06, "loss": 0.564, "step": 1192 }, { "epoch": 0.5801823708206687, "grad_norm": 0.07577843012573345, "learning_rate": 9.504013264501526e-06, "loss": 0.6126, "step": 1193 }, { "epoch": 0.5806686930091185, "grad_norm": 0.07522558537462215, "learning_rate": 9.503181529028558e-06, "loss": 0.5979, "step": 1194 }, { "epoch": 0.5811550151975684, "grad_norm": 0.08146507705850126, "learning_rate": 9.502349133215156e-06, "loss": 0.6421, "step": 1195 }, { "epoch": 0.5816413373860182, "grad_norm": 0.07371125397635524, "learning_rate": 9.501516077183381e-06, "loss": 0.5843, "step": 1196 }, { "epoch": 0.5821276595744681, "grad_norm": 0.07504681158171868, "learning_rate": 9.500682361055391e-06, "loss": 0.6335, "step": 1197 }, { "epoch": 0.5826139817629179, "grad_norm": 0.07397798706558126, "learning_rate": 9.49984798495344e-06, "loss": 0.5437, "step": 1198 }, { "epoch": 0.5831003039513678, "grad_norm": 0.07154544956037139, "learning_rate": 9.499012948999884e-06, "loss": 0.536, "step": 1199 }, { "epoch": 0.5835866261398176, "grad_norm": 0.07402888598281882, "learning_rate": 9.498177253317167e-06, "loss": 0.619, "step": 1200 }, { "epoch": 0.5840729483282675, "grad_norm": 0.07332466029099505, "learning_rate": 9.497340898027836e-06, "loss": 0.6025, "step": 1201 }, { "epoch": 0.5845592705167173, "grad_norm": 0.07512862470619239, "learning_rate": 9.496503883254534e-06, "loss": 0.6272, "step": 1202 }, { "epoch": 0.5850455927051672, "grad_norm": 0.07337634551222842, "learning_rate": 9.495666209119998e-06, "loss": 0.5668, "step": 1203 }, { "epoch": 0.585531914893617, "grad_norm": 0.07505241723685735, "learning_rate": 9.494827875747064e-06, "loss": 0.5754, "step": 1204 }, { "epoch": 0.5860182370820669, "grad_norm": 0.0730663492813903, "learning_rate": 9.493988883258664e-06, "loss": 0.5829, "step": 1205 }, { "epoch": 0.5865045592705167, "grad_norm": 0.07621175142706714, "learning_rate": 9.493149231777828e-06, "loss": 0.5577, "step": 1206 }, { "epoch": 0.5869908814589666, "grad_norm": 0.07830307485344025, "learning_rate": 9.492308921427677e-06, "loss": 0.6024, "step": 1207 }, { "epoch": 0.5874772036474164, "grad_norm": 0.07586187829377121, "learning_rate": 9.49146795233144e-06, "loss": 0.6169, "step": 1208 }, { "epoch": 0.5879635258358663, "grad_norm": 0.07554861577529298, "learning_rate": 9.49062632461243e-06, "loss": 0.5898, "step": 1209 }, { "epoch": 0.5884498480243161, "grad_norm": 0.0759674005850759, "learning_rate": 9.489784038394065e-06, "loss": 0.5928, "step": 1210 }, { "epoch": 0.588936170212766, "grad_norm": 0.07780786350709513, "learning_rate": 9.488941093799855e-06, "loss": 0.5946, "step": 1211 }, { "epoch": 0.5894224924012158, "grad_norm": 0.07361167720487613, "learning_rate": 9.488097490953408e-06, "loss": 0.5771, "step": 1212 }, { "epoch": 0.5899088145896657, "grad_norm": 0.07261713939570166, "learning_rate": 9.48725322997843e-06, "loss": 0.5811, "step": 1213 }, { "epoch": 0.5903951367781155, "grad_norm": 0.07469408824707235, "learning_rate": 9.486408310998724e-06, "loss": 0.5911, "step": 1214 }, { "epoch": 0.5908814589665653, "grad_norm": 0.07547460066996871, "learning_rate": 9.485562734138184e-06, "loss": 0.5779, "step": 1215 }, { "epoch": 0.5913677811550152, "grad_norm": 0.07492236184990318, "learning_rate": 9.484716499520806e-06, "loss": 0.5993, "step": 1216 }, { "epoch": 0.591854103343465, "grad_norm": 0.07598081017418767, "learning_rate": 9.48386960727068e-06, "loss": 0.5993, "step": 1217 }, { "epoch": 0.5923404255319149, "grad_norm": 0.07257975256839314, "learning_rate": 9.483022057511996e-06, "loss": 0.6299, "step": 1218 }, { "epoch": 0.5928267477203647, "grad_norm": 0.0707006246123199, "learning_rate": 9.482173850369034e-06, "loss": 0.5813, "step": 1219 }, { "epoch": 0.5933130699088146, "grad_norm": 0.07754682435623171, "learning_rate": 9.481324985966175e-06, "loss": 0.6043, "step": 1220 }, { "epoch": 0.5937993920972644, "grad_norm": 0.07430417411565539, "learning_rate": 9.480475464427896e-06, "loss": 0.6132, "step": 1221 }, { "epoch": 0.5942857142857143, "grad_norm": 0.07287647783414317, "learning_rate": 9.47962528587877e-06, "loss": 0.5796, "step": 1222 }, { "epoch": 0.5947720364741641, "grad_norm": 0.07541448093323741, "learning_rate": 9.478774450443465e-06, "loss": 0.5969, "step": 1223 }, { "epoch": 0.595258358662614, "grad_norm": 0.07667670668646333, "learning_rate": 9.477922958246747e-06, "loss": 0.5899, "step": 1224 }, { "epoch": 0.5957446808510638, "grad_norm": 0.07234539868105472, "learning_rate": 9.477070809413475e-06, "loss": 0.569, "step": 1225 }, { "epoch": 0.5962310030395137, "grad_norm": 0.07578751071641912, "learning_rate": 9.476218004068611e-06, "loss": 0.5988, "step": 1226 }, { "epoch": 0.5967173252279635, "grad_norm": 0.07478184066147164, "learning_rate": 9.475364542337207e-06, "loss": 0.5893, "step": 1227 }, { "epoch": 0.5972036474164134, "grad_norm": 0.07387041018992169, "learning_rate": 9.474510424344416e-06, "loss": 0.6116, "step": 1228 }, { "epoch": 0.5976899696048632, "grad_norm": 0.07592645788685992, "learning_rate": 9.473655650215481e-06, "loss": 0.598, "step": 1229 }, { "epoch": 0.5981762917933131, "grad_norm": 0.0778384138669128, "learning_rate": 9.472800220075746e-06, "loss": 0.632, "step": 1230 }, { "epoch": 0.5986626139817629, "grad_norm": 0.08315915323835953, "learning_rate": 9.471944134050652e-06, "loss": 0.6338, "step": 1231 }, { "epoch": 0.5991489361702128, "grad_norm": 0.07406289139040127, "learning_rate": 9.471087392265733e-06, "loss": 0.5984, "step": 1232 }, { "epoch": 0.5996352583586626, "grad_norm": 0.07485174259119663, "learning_rate": 9.470229994846621e-06, "loss": 0.5914, "step": 1233 }, { "epoch": 0.6001215805471125, "grad_norm": 0.0695961091240149, "learning_rate": 9.469371941919042e-06, "loss": 0.6005, "step": 1234 }, { "epoch": 0.6006079027355623, "grad_norm": 0.07190793080085171, "learning_rate": 9.46851323360882e-06, "loss": 0.5654, "step": 1235 }, { "epoch": 0.6010942249240122, "grad_norm": 0.08626371378581066, "learning_rate": 9.467653870041876e-06, "loss": 0.6234, "step": 1236 }, { "epoch": 0.601580547112462, "grad_norm": 0.07753810543470328, "learning_rate": 9.466793851344228e-06, "loss": 0.5937, "step": 1237 }, { "epoch": 0.6020668693009118, "grad_norm": 0.07275612325989092, "learning_rate": 9.465933177641981e-06, "loss": 0.6063, "step": 1238 }, { "epoch": 0.6025531914893617, "grad_norm": 0.07439257696987374, "learning_rate": 9.465071849061352e-06, "loss": 0.6177, "step": 1239 }, { "epoch": 0.6030395136778115, "grad_norm": 0.08869141664038398, "learning_rate": 9.464209865728638e-06, "loss": 0.6026, "step": 1240 }, { "epoch": 0.6035258358662614, "grad_norm": 0.07918603182980609, "learning_rate": 9.463347227770243e-06, "loss": 0.5977, "step": 1241 }, { "epoch": 0.6040121580547112, "grad_norm": 0.07729756677106756, "learning_rate": 9.46248393531266e-06, "loss": 0.5935, "step": 1242 }, { "epoch": 0.6044984802431611, "grad_norm": 0.08014931140903206, "learning_rate": 9.461619988482484e-06, "loss": 0.6237, "step": 1243 }, { "epoch": 0.6049848024316109, "grad_norm": 0.07227259063371626, "learning_rate": 9.460755387406402e-06, "loss": 0.5402, "step": 1244 }, { "epoch": 0.6054711246200608, "grad_norm": 0.07554439089112634, "learning_rate": 9.459890132211198e-06, "loss": 0.6012, "step": 1245 }, { "epoch": 0.6059574468085106, "grad_norm": 0.07168730785396009, "learning_rate": 9.45902422302375e-06, "loss": 0.5548, "step": 1246 }, { "epoch": 0.6064437689969605, "grad_norm": 0.07622369288726513, "learning_rate": 9.458157659971036e-06, "loss": 0.6531, "step": 1247 }, { "epoch": 0.6069300911854103, "grad_norm": 0.07599697389522288, "learning_rate": 9.457290443180128e-06, "loss": 0.6062, "step": 1248 }, { "epoch": 0.6074164133738602, "grad_norm": 0.07858094420082529, "learning_rate": 9.45642257277819e-06, "loss": 0.6099, "step": 1249 }, { "epoch": 0.60790273556231, "grad_norm": 0.07785243173717052, "learning_rate": 9.45555404889249e-06, "loss": 0.6732, "step": 1250 }, { "epoch": 0.6083890577507599, "grad_norm": 0.07574158313953933, "learning_rate": 9.454684871650383e-06, "loss": 0.574, "step": 1251 }, { "epoch": 0.6088753799392097, "grad_norm": 0.07679608406517179, "learning_rate": 9.453815041179329e-06, "loss": 0.5931, "step": 1252 }, { "epoch": 0.6093617021276596, "grad_norm": 0.0766464199097489, "learning_rate": 9.452944557606872e-06, "loss": 0.5984, "step": 1253 }, { "epoch": 0.6098480243161094, "grad_norm": 0.07530702461792167, "learning_rate": 9.452073421060664e-06, "loss": 0.576, "step": 1254 }, { "epoch": 0.6103343465045593, "grad_norm": 0.07159417467375702, "learning_rate": 9.451201631668445e-06, "loss": 0.5732, "step": 1255 }, { "epoch": 0.6108206686930091, "grad_norm": 0.08246673631446423, "learning_rate": 9.450329189558055e-06, "loss": 0.5734, "step": 1256 }, { "epoch": 0.611306990881459, "grad_norm": 0.0788529325647114, "learning_rate": 9.449456094857424e-06, "loss": 0.5859, "step": 1257 }, { "epoch": 0.6117933130699088, "grad_norm": 0.07147612723333374, "learning_rate": 9.448582347694584e-06, "loss": 0.5908, "step": 1258 }, { "epoch": 0.6122796352583587, "grad_norm": 0.08668808967495738, "learning_rate": 9.44770794819766e-06, "loss": 0.6401, "step": 1259 }, { "epoch": 0.6127659574468085, "grad_norm": 0.07864084941176788, "learning_rate": 9.446832896494874e-06, "loss": 0.6307, "step": 1260 }, { "epoch": 0.6132522796352584, "grad_norm": 0.07369508141475373, "learning_rate": 9.445957192714539e-06, "loss": 0.5811, "step": 1261 }, { "epoch": 0.6137386018237082, "grad_norm": 0.07836061362948436, "learning_rate": 9.445080836985067e-06, "loss": 0.6167, "step": 1262 }, { "epoch": 0.614224924012158, "grad_norm": 0.0749987054036263, "learning_rate": 9.444203829434972e-06, "loss": 0.5782, "step": 1263 }, { "epoch": 0.6147112462006079, "grad_norm": 0.0732620064153045, "learning_rate": 9.44332617019285e-06, "loss": 0.5954, "step": 1264 }, { "epoch": 0.6151975683890577, "grad_norm": 0.07711692559992857, "learning_rate": 9.442447859387402e-06, "loss": 0.5656, "step": 1265 }, { "epoch": 0.6156838905775076, "grad_norm": 0.07112162984434953, "learning_rate": 9.441568897147423e-06, "loss": 0.5458, "step": 1266 }, { "epoch": 0.6161702127659574, "grad_norm": 0.07518981577083829, "learning_rate": 9.440689283601805e-06, "loss": 0.6413, "step": 1267 }, { "epoch": 0.6166565349544073, "grad_norm": 0.07618361826932424, "learning_rate": 9.43980901887953e-06, "loss": 0.6183, "step": 1268 }, { "epoch": 0.6171428571428571, "grad_norm": 0.07970576693265521, "learning_rate": 9.438928103109678e-06, "loss": 0.5605, "step": 1269 }, { "epoch": 0.617629179331307, "grad_norm": 0.07340073066761459, "learning_rate": 9.438046536421428e-06, "loss": 0.6091, "step": 1270 }, { "epoch": 0.6181155015197568, "grad_norm": 0.07694405716043108, "learning_rate": 9.43716431894405e-06, "loss": 0.6068, "step": 1271 }, { "epoch": 0.6186018237082067, "grad_norm": 0.0839716058733985, "learning_rate": 9.436281450806914e-06, "loss": 0.6246, "step": 1272 }, { "epoch": 0.6190881458966565, "grad_norm": 0.07374800482169956, "learning_rate": 9.435397932139478e-06, "loss": 0.5798, "step": 1273 }, { "epoch": 0.6195744680851064, "grad_norm": 0.07463121031358598, "learning_rate": 9.434513763071304e-06, "loss": 0.5859, "step": 1274 }, { "epoch": 0.6200607902735562, "grad_norm": 0.07200095190924559, "learning_rate": 9.433628943732045e-06, "loss": 0.5342, "step": 1275 }, { "epoch": 0.6205471124620061, "grad_norm": 0.07536715163612707, "learning_rate": 9.432743474251446e-06, "loss": 0.5978, "step": 1276 }, { "epoch": 0.6210334346504559, "grad_norm": 0.0753631077103014, "learning_rate": 9.431857354759354e-06, "loss": 0.5737, "step": 1277 }, { "epoch": 0.6215197568389058, "grad_norm": 0.07951888275567472, "learning_rate": 9.43097058538571e-06, "loss": 0.5864, "step": 1278 }, { "epoch": 0.6220060790273556, "grad_norm": 0.07552921428479899, "learning_rate": 9.430083166260546e-06, "loss": 0.5717, "step": 1279 }, { "epoch": 0.6224924012158055, "grad_norm": 0.07930337438500187, "learning_rate": 9.429195097513993e-06, "loss": 0.5976, "step": 1280 }, { "epoch": 0.6229787234042553, "grad_norm": 0.07354276103923192, "learning_rate": 9.428306379276275e-06, "loss": 0.5533, "step": 1281 }, { "epoch": 0.6234650455927052, "grad_norm": 0.07786090673902678, "learning_rate": 9.427417011677713e-06, "loss": 0.6011, "step": 1282 }, { "epoch": 0.623951367781155, "grad_norm": 0.07781255651216264, "learning_rate": 9.426526994848724e-06, "loss": 0.5869, "step": 1283 }, { "epoch": 0.6244376899696049, "grad_norm": 0.07724423362135842, "learning_rate": 9.425636328919816e-06, "loss": 0.6124, "step": 1284 }, { "epoch": 0.6249240121580547, "grad_norm": 0.0768883950631787, "learning_rate": 9.424745014021598e-06, "loss": 0.599, "step": 1285 }, { "epoch": 0.6254103343465045, "grad_norm": 0.07255756431144399, "learning_rate": 9.423853050284771e-06, "loss": 0.5507, "step": 1286 }, { "epoch": 0.6258966565349544, "grad_norm": 0.07963698391002398, "learning_rate": 9.422960437840128e-06, "loss": 0.6186, "step": 1287 }, { "epoch": 0.6263829787234042, "grad_norm": 0.0752600077048511, "learning_rate": 9.422067176818564e-06, "loss": 0.596, "step": 1288 }, { "epoch": 0.6268693009118541, "grad_norm": 0.07138938654122967, "learning_rate": 9.421173267351064e-06, "loss": 0.5658, "step": 1289 }, { "epoch": 0.6273556231003039, "grad_norm": 0.07739640773278825, "learning_rate": 9.42027870956871e-06, "loss": 0.6144, "step": 1290 }, { "epoch": 0.6278419452887538, "grad_norm": 0.07617241888870808, "learning_rate": 9.41938350360268e-06, "loss": 0.591, "step": 1291 }, { "epoch": 0.6283282674772036, "grad_norm": 0.0755635925663801, "learning_rate": 9.418487649584242e-06, "loss": 0.5898, "step": 1292 }, { "epoch": 0.6288145896656535, "grad_norm": 0.07579957617527425, "learning_rate": 9.41759114764477e-06, "loss": 0.6098, "step": 1293 }, { "epoch": 0.6293009118541033, "grad_norm": 0.07751294289185374, "learning_rate": 9.416693997915717e-06, "loss": 0.6006, "step": 1294 }, { "epoch": 0.6297872340425532, "grad_norm": 0.07595048115012434, "learning_rate": 9.415796200528646e-06, "loss": 0.6268, "step": 1295 }, { "epoch": 0.630273556231003, "grad_norm": 0.07616064150377345, "learning_rate": 9.414897755615206e-06, "loss": 0.6164, "step": 1296 }, { "epoch": 0.6307598784194529, "grad_norm": 0.07424701903459012, "learning_rate": 9.413998663307145e-06, "loss": 0.5807, "step": 1297 }, { "epoch": 0.6312462006079027, "grad_norm": 0.08021849567048744, "learning_rate": 9.413098923736305e-06, "loss": 0.5615, "step": 1298 }, { "epoch": 0.6317325227963526, "grad_norm": 0.07796190104640652, "learning_rate": 9.412198537034622e-06, "loss": 0.6282, "step": 1299 }, { "epoch": 0.6322188449848024, "grad_norm": 0.0713647537014503, "learning_rate": 9.411297503334126e-06, "loss": 0.572, "step": 1300 }, { "epoch": 0.6327051671732523, "grad_norm": 0.07691961588478133, "learning_rate": 9.410395822766946e-06, "loss": 0.6033, "step": 1301 }, { "epoch": 0.6331914893617021, "grad_norm": 0.07181511775233088, "learning_rate": 9.4094934954653e-06, "loss": 0.5846, "step": 1302 }, { "epoch": 0.633677811550152, "grad_norm": 0.07280482498453499, "learning_rate": 9.408590521561509e-06, "loss": 0.5776, "step": 1303 }, { "epoch": 0.6341641337386018, "grad_norm": 0.07547264112790869, "learning_rate": 9.407686901187978e-06, "loss": 0.6287, "step": 1304 }, { "epoch": 0.6346504559270517, "grad_norm": 0.07467375211345316, "learning_rate": 9.406782634477219e-06, "loss": 0.6164, "step": 1305 }, { "epoch": 0.6351367781155015, "grad_norm": 0.07298493095458604, "learning_rate": 9.405877721561826e-06, "loss": 0.5519, "step": 1306 }, { "epoch": 0.6356231003039514, "grad_norm": 0.07369224380228381, "learning_rate": 9.404972162574497e-06, "loss": 0.5951, "step": 1307 }, { "epoch": 0.6361094224924012, "grad_norm": 0.07455567363611043, "learning_rate": 9.404065957648023e-06, "loss": 0.6038, "step": 1308 }, { "epoch": 0.6365957446808511, "grad_norm": 0.06949987297984836, "learning_rate": 9.40315910691529e-06, "loss": 0.5354, "step": 1309 }, { "epoch": 0.6370820668693009, "grad_norm": 0.07576107588905442, "learning_rate": 9.402251610509272e-06, "loss": 0.6082, "step": 1310 }, { "epoch": 0.6375683890577507, "grad_norm": 0.07499662795318578, "learning_rate": 9.401343468563046e-06, "loss": 0.5933, "step": 1311 }, { "epoch": 0.6380547112462006, "grad_norm": 0.07949361276484732, "learning_rate": 9.400434681209782e-06, "loss": 0.5956, "step": 1312 }, { "epoch": 0.6385410334346504, "grad_norm": 0.07140019093868136, "learning_rate": 9.399525248582744e-06, "loss": 0.5921, "step": 1313 }, { "epoch": 0.6390273556231003, "grad_norm": 0.07156033904262038, "learning_rate": 9.398615170815286e-06, "loss": 0.579, "step": 1314 }, { "epoch": 0.6395136778115501, "grad_norm": 0.07320530784039472, "learning_rate": 9.397704448040865e-06, "loss": 0.5678, "step": 1315 }, { "epoch": 0.64, "grad_norm": 0.0802553142080068, "learning_rate": 9.396793080393022e-06, "loss": 0.6226, "step": 1316 }, { "epoch": 0.6404863221884498, "grad_norm": 0.0759220028641158, "learning_rate": 9.395881068005406e-06, "loss": 0.6251, "step": 1317 }, { "epoch": 0.6409726443768997, "grad_norm": 0.07403911679268774, "learning_rate": 9.39496841101175e-06, "loss": 0.5755, "step": 1318 }, { "epoch": 0.6414589665653495, "grad_norm": 0.07542447324190077, "learning_rate": 9.394055109545884e-06, "loss": 0.587, "step": 1319 }, { "epoch": 0.6419452887537994, "grad_norm": 0.07147402975334692, "learning_rate": 9.393141163741732e-06, "loss": 0.5944, "step": 1320 }, { "epoch": 0.6424316109422492, "grad_norm": 0.07154490670142041, "learning_rate": 9.392226573733319e-06, "loss": 0.5299, "step": 1321 }, { "epoch": 0.6429179331306991, "grad_norm": 0.07441657808752647, "learning_rate": 9.391311339654755e-06, "loss": 0.5667, "step": 1322 }, { "epoch": 0.6434042553191489, "grad_norm": 0.0746968318126394, "learning_rate": 9.390395461640246e-06, "loss": 0.6213, "step": 1323 }, { "epoch": 0.6438905775075988, "grad_norm": 0.07366961550448232, "learning_rate": 9.389478939824104e-06, "loss": 0.583, "step": 1324 }, { "epoch": 0.6443768996960486, "grad_norm": 0.07473921421602571, "learning_rate": 9.388561774340719e-06, "loss": 0.5706, "step": 1325 }, { "epoch": 0.6448632218844985, "grad_norm": 0.0704060647745574, "learning_rate": 9.387643965324584e-06, "loss": 0.5679, "step": 1326 }, { "epoch": 0.6453495440729483, "grad_norm": 0.07896972142778609, "learning_rate": 9.386725512910289e-06, "loss": 0.6135, "step": 1327 }, { "epoch": 0.6458358662613982, "grad_norm": 0.07380151563569691, "learning_rate": 9.385806417232511e-06, "loss": 0.6039, "step": 1328 }, { "epoch": 0.646322188449848, "grad_norm": 0.07790959362018812, "learning_rate": 9.384886678426027e-06, "loss": 0.6603, "step": 1329 }, { "epoch": 0.6468085106382979, "grad_norm": 0.06950009595065189, "learning_rate": 9.383966296625704e-06, "loss": 0.5441, "step": 1330 }, { "epoch": 0.6472948328267477, "grad_norm": 0.07004069179969032, "learning_rate": 9.383045271966507e-06, "loss": 0.5518, "step": 1331 }, { "epoch": 0.6477811550151976, "grad_norm": 0.07078421296750745, "learning_rate": 9.382123604583492e-06, "loss": 0.6194, "step": 1332 }, { "epoch": 0.6482674772036474, "grad_norm": 0.07680435549364414, "learning_rate": 9.381201294611815e-06, "loss": 0.5827, "step": 1333 }, { "epoch": 0.6487537993920973, "grad_norm": 0.07073906693618659, "learning_rate": 9.38027834218672e-06, "loss": 0.5849, "step": 1334 }, { "epoch": 0.6492401215805471, "grad_norm": 0.07342404060838689, "learning_rate": 9.379354747443548e-06, "loss": 0.601, "step": 1335 }, { "epoch": 0.6497264437689969, "grad_norm": 0.08563051196626331, "learning_rate": 9.378430510517732e-06, "loss": 0.6084, "step": 1336 }, { "epoch": 0.6502127659574468, "grad_norm": 0.07317972487372107, "learning_rate": 9.3775056315448e-06, "loss": 0.5682, "step": 1337 }, { "epoch": 0.6506990881458966, "grad_norm": 0.0719539746980849, "learning_rate": 9.37658011066038e-06, "loss": 0.5699, "step": 1338 }, { "epoch": 0.6511854103343465, "grad_norm": 0.07566972402368674, "learning_rate": 9.375653948000186e-06, "loss": 0.6213, "step": 1339 }, { "epoch": 0.6516717325227963, "grad_norm": 0.07380417213702649, "learning_rate": 9.374727143700028e-06, "loss": 0.5904, "step": 1340 }, { "epoch": 0.6521580547112462, "grad_norm": 0.07271264780998443, "learning_rate": 9.373799697895813e-06, "loss": 0.6051, "step": 1341 }, { "epoch": 0.652644376899696, "grad_norm": 0.07838171715161318, "learning_rate": 9.372871610723542e-06, "loss": 0.6013, "step": 1342 }, { "epoch": 0.6531306990881459, "grad_norm": 0.14165576564958637, "learning_rate": 9.371942882319306e-06, "loss": 0.6204, "step": 1343 }, { "epoch": 0.6536170212765957, "grad_norm": 0.07348756709956493, "learning_rate": 9.37101351281929e-06, "loss": 0.614, "step": 1344 }, { "epoch": 0.6541033434650456, "grad_norm": 0.07401053713580107, "learning_rate": 9.370083502359781e-06, "loss": 0.5747, "step": 1345 }, { "epoch": 0.6545896656534954, "grad_norm": 0.08453390034625481, "learning_rate": 9.36915285107715e-06, "loss": 0.587, "step": 1346 }, { "epoch": 0.6550759878419453, "grad_norm": 0.07227759634646228, "learning_rate": 9.368221559107872e-06, "loss": 0.5649, "step": 1347 }, { "epoch": 0.6555623100303951, "grad_norm": 0.0728266159654345, "learning_rate": 9.367289626588504e-06, "loss": 0.5729, "step": 1348 }, { "epoch": 0.656048632218845, "grad_norm": 0.07427873409535371, "learning_rate": 9.366357053655707e-06, "loss": 0.6213, "step": 1349 }, { "epoch": 0.6565349544072948, "grad_norm": 0.072299499982864, "learning_rate": 9.36542384044623e-06, "loss": 0.5836, "step": 1350 }, { "epoch": 0.6570212765957447, "grad_norm": 0.07801311903207866, "learning_rate": 9.364489987096921e-06, "loss": 0.5937, "step": 1351 }, { "epoch": 0.6575075987841945, "grad_norm": 0.07462615537520143, "learning_rate": 9.363555493744719e-06, "loss": 0.5673, "step": 1352 }, { "epoch": 0.6579939209726444, "grad_norm": 0.07760785514022325, "learning_rate": 9.362620360526652e-06, "loss": 0.618, "step": 1353 }, { "epoch": 0.6584802431610942, "grad_norm": 0.08192318385525697, "learning_rate": 9.36168458757985e-06, "loss": 0.6401, "step": 1354 }, { "epoch": 0.6589665653495441, "grad_norm": 0.07440302843810591, "learning_rate": 9.360748175041537e-06, "loss": 0.5455, "step": 1355 }, { "epoch": 0.6594528875379939, "grad_norm": 0.07687441229260948, "learning_rate": 9.359811123049022e-06, "loss": 0.592, "step": 1356 }, { "epoch": 0.6599392097264438, "grad_norm": 0.07370265794485174, "learning_rate": 9.358873431739712e-06, "loss": 0.5588, "step": 1357 }, { "epoch": 0.6604255319148936, "grad_norm": 0.0721400445080261, "learning_rate": 9.357935101251115e-06, "loss": 0.5903, "step": 1358 }, { "epoch": 0.6609118541033434, "grad_norm": 0.07182714890651236, "learning_rate": 9.35699613172082e-06, "loss": 0.5748, "step": 1359 }, { "epoch": 0.6613981762917933, "grad_norm": 0.07518118467550815, "learning_rate": 9.356056523286522e-06, "loss": 0.6496, "step": 1360 }, { "epoch": 0.6618844984802431, "grad_norm": 0.07468012041776521, "learning_rate": 9.355116276086e-06, "loss": 0.6238, "step": 1361 }, { "epoch": 0.662370820668693, "grad_norm": 0.07325718409085749, "learning_rate": 9.354175390257131e-06, "loss": 0.5733, "step": 1362 }, { "epoch": 0.6628571428571428, "grad_norm": 0.07171755391535022, "learning_rate": 9.353233865937888e-06, "loss": 0.565, "step": 1363 }, { "epoch": 0.6633434650455927, "grad_norm": 0.0731184986212707, "learning_rate": 9.352291703266332e-06, "loss": 0.5839, "step": 1364 }, { "epoch": 0.6638297872340425, "grad_norm": 0.07396741175939628, "learning_rate": 9.351348902380622e-06, "loss": 0.5738, "step": 1365 }, { "epoch": 0.6643161094224924, "grad_norm": 0.07343427635834941, "learning_rate": 9.350405463419006e-06, "loss": 0.5635, "step": 1366 }, { "epoch": 0.6648024316109422, "grad_norm": 0.07598390137699437, "learning_rate": 9.349461386519832e-06, "loss": 0.5626, "step": 1367 }, { "epoch": 0.6652887537993921, "grad_norm": 0.07539746214781103, "learning_rate": 9.348516671821537e-06, "loss": 0.6235, "step": 1368 }, { "epoch": 0.6657750759878419, "grad_norm": 0.07471727742270907, "learning_rate": 9.347571319462654e-06, "loss": 0.5894, "step": 1369 }, { "epoch": 0.6662613981762918, "grad_norm": 0.06957919467162495, "learning_rate": 9.346625329581805e-06, "loss": 0.5693, "step": 1370 }, { "epoch": 0.6667477203647416, "grad_norm": 0.07199540687343707, "learning_rate": 9.345678702317711e-06, "loss": 0.5708, "step": 1371 }, { "epoch": 0.6672340425531915, "grad_norm": 0.07639686103443452, "learning_rate": 9.344731437809184e-06, "loss": 0.6139, "step": 1372 }, { "epoch": 0.6677203647416413, "grad_norm": 0.07024923997307037, "learning_rate": 9.34378353619513e-06, "loss": 0.5854, "step": 1373 }, { "epoch": 0.6682066869300912, "grad_norm": 0.07617353295964674, "learning_rate": 9.342834997614547e-06, "loss": 0.6467, "step": 1374 }, { "epoch": 0.668693009118541, "grad_norm": 0.07461421852183255, "learning_rate": 9.341885822206529e-06, "loss": 0.5927, "step": 1375 }, { "epoch": 0.6691793313069909, "grad_norm": 0.07416874639241813, "learning_rate": 9.340936010110259e-06, "loss": 0.5669, "step": 1376 }, { "epoch": 0.6696656534954407, "grad_norm": 0.07430935160163336, "learning_rate": 9.339985561465018e-06, "loss": 0.5926, "step": 1377 }, { "epoch": 0.6701519756838906, "grad_norm": 0.07428555762948726, "learning_rate": 9.339034476410177e-06, "loss": 0.5855, "step": 1378 }, { "epoch": 0.6706382978723404, "grad_norm": 0.07725419552262293, "learning_rate": 9.338082755085205e-06, "loss": 0.6111, "step": 1379 }, { "epoch": 0.6711246200607903, "grad_norm": 0.07045166755952159, "learning_rate": 9.337130397629659e-06, "loss": 0.5337, "step": 1380 }, { "epoch": 0.6716109422492401, "grad_norm": 0.0741447422026241, "learning_rate": 9.336177404183191e-06, "loss": 0.5781, "step": 1381 }, { "epoch": 0.67209726443769, "grad_norm": 0.08752395837855446, "learning_rate": 9.335223774885547e-06, "loss": 0.6158, "step": 1382 }, { "epoch": 0.6725835866261398, "grad_norm": 0.07797832166761781, "learning_rate": 9.334269509876566e-06, "loss": 0.6512, "step": 1383 }, { "epoch": 0.6730699088145896, "grad_norm": 0.07329881022345053, "learning_rate": 9.333314609296182e-06, "loss": 0.5876, "step": 1384 }, { "epoch": 0.6735562310030395, "grad_norm": 0.07543180645199737, "learning_rate": 9.332359073284417e-06, "loss": 0.6284, "step": 1385 }, { "epoch": 0.6740425531914893, "grad_norm": 0.07872529346912259, "learning_rate": 9.33140290198139e-06, "loss": 0.5843, "step": 1386 }, { "epoch": 0.6745288753799392, "grad_norm": 0.07220715364940554, "learning_rate": 9.330446095527316e-06, "loss": 0.5695, "step": 1387 }, { "epoch": 0.675015197568389, "grad_norm": 0.07165501643998996, "learning_rate": 9.329488654062496e-06, "loss": 0.5928, "step": 1388 }, { "epoch": 0.6755015197568389, "grad_norm": 0.07621881974718972, "learning_rate": 9.32853057772733e-06, "loss": 0.5872, "step": 1389 }, { "epoch": 0.6759878419452887, "grad_norm": 0.07428394665142012, "learning_rate": 9.32757186666231e-06, "loss": 0.5706, "step": 1390 }, { "epoch": 0.6764741641337386, "grad_norm": 0.07395745268521768, "learning_rate": 9.326612521008015e-06, "loss": 0.5883, "step": 1391 }, { "epoch": 0.6769604863221884, "grad_norm": 0.07074106010201726, "learning_rate": 9.32565254090513e-06, "loss": 0.5713, "step": 1392 }, { "epoch": 0.6774468085106383, "grad_norm": 0.0727276700834868, "learning_rate": 9.324691926494419e-06, "loss": 0.5982, "step": 1393 }, { "epoch": 0.6779331306990881, "grad_norm": 0.07364483108630815, "learning_rate": 9.323730677916747e-06, "loss": 0.5986, "step": 1394 }, { "epoch": 0.678419452887538, "grad_norm": 0.07459201813263207, "learning_rate": 9.32276879531307e-06, "loss": 0.5743, "step": 1395 }, { "epoch": 0.6789057750759878, "grad_norm": 0.0759748185806464, "learning_rate": 9.321806278824436e-06, "loss": 0.615, "step": 1396 }, { "epoch": 0.6793920972644377, "grad_norm": 0.07002473464917006, "learning_rate": 9.320843128591992e-06, "loss": 0.535, "step": 1397 }, { "epoch": 0.6798784194528875, "grad_norm": 0.0731356078991561, "learning_rate": 9.319879344756968e-06, "loss": 0.582, "step": 1398 }, { "epoch": 0.6803647416413374, "grad_norm": 0.07723671011700303, "learning_rate": 9.318914927460694e-06, "loss": 0.5624, "step": 1399 }, { "epoch": 0.6808510638297872, "grad_norm": 0.07724769484034885, "learning_rate": 9.31794987684459e-06, "loss": 0.6218, "step": 1400 }, { "epoch": 0.6813373860182371, "grad_norm": 0.07400788891102064, "learning_rate": 9.31698419305017e-06, "loss": 0.5958, "step": 1401 }, { "epoch": 0.6818237082066869, "grad_norm": 0.07757563601577189, "learning_rate": 9.31601787621904e-06, "loss": 0.6182, "step": 1402 }, { "epoch": 0.6823100303951368, "grad_norm": 0.07511121562407494, "learning_rate": 9.315050926492901e-06, "loss": 0.6065, "step": 1403 }, { "epoch": 0.6827963525835866, "grad_norm": 0.07931631630368464, "learning_rate": 9.314083344013544e-06, "loss": 0.5592, "step": 1404 }, { "epoch": 0.6832826747720365, "grad_norm": 0.0741642610955171, "learning_rate": 9.313115128922853e-06, "loss": 0.6155, "step": 1405 }, { "epoch": 0.6837689969604863, "grad_norm": 0.08036336873400678, "learning_rate": 9.312146281362811e-06, "loss": 0.6576, "step": 1406 }, { "epoch": 0.6842553191489362, "grad_norm": 0.07491409078338893, "learning_rate": 9.311176801475481e-06, "loss": 0.5878, "step": 1407 }, { "epoch": 0.684741641337386, "grad_norm": 0.07208127938249424, "learning_rate": 9.31020668940303e-06, "loss": 0.5876, "step": 1408 }, { "epoch": 0.6852279635258358, "grad_norm": 0.07351932513541455, "learning_rate": 9.309235945287715e-06, "loss": 0.5753, "step": 1409 }, { "epoch": 0.6857142857142857, "grad_norm": 0.07886115028555457, "learning_rate": 9.308264569271882e-06, "loss": 0.6074, "step": 1410 }, { "epoch": 0.6862006079027355, "grad_norm": 0.07488771684118997, "learning_rate": 9.307292561497974e-06, "loss": 0.6166, "step": 1411 }, { "epoch": 0.6866869300911854, "grad_norm": 0.07445563565056222, "learning_rate": 9.306319922108525e-06, "loss": 0.5779, "step": 1412 }, { "epoch": 0.6871732522796352, "grad_norm": 0.07895209662723902, "learning_rate": 9.30534665124616e-06, "loss": 0.6297, "step": 1413 }, { "epoch": 0.6876595744680851, "grad_norm": 0.0717508744330046, "learning_rate": 9.304372749053599e-06, "loss": 0.5608, "step": 1414 }, { "epoch": 0.6881458966565349, "grad_norm": 0.10111506966422479, "learning_rate": 9.303398215673654e-06, "loss": 0.5969, "step": 1415 }, { "epoch": 0.6886322188449848, "grad_norm": 0.07264927316471913, "learning_rate": 9.30242305124923e-06, "loss": 0.5566, "step": 1416 }, { "epoch": 0.6891185410334346, "grad_norm": 0.08338970011909767, "learning_rate": 9.301447255923321e-06, "loss": 0.6405, "step": 1417 }, { "epoch": 0.6896048632218845, "grad_norm": 0.07554742175539556, "learning_rate": 9.300470829839018e-06, "loss": 0.6081, "step": 1418 }, { "epoch": 0.6900911854103343, "grad_norm": 0.07231073949415669, "learning_rate": 9.299493773139504e-06, "loss": 0.5545, "step": 1419 }, { "epoch": 0.6905775075987842, "grad_norm": 0.0740590781974643, "learning_rate": 9.298516085968052e-06, "loss": 0.6112, "step": 1420 }, { "epoch": 0.691063829787234, "grad_norm": 0.07493620812179583, "learning_rate": 9.29753776846803e-06, "loss": 0.5885, "step": 1421 }, { "epoch": 0.6915501519756839, "grad_norm": 0.07029894830845575, "learning_rate": 9.296558820782895e-06, "loss": 0.5488, "step": 1422 }, { "epoch": 0.6920364741641337, "grad_norm": 0.07530266395754613, "learning_rate": 9.2955792430562e-06, "loss": 0.6113, "step": 1423 }, { "epoch": 0.6925227963525836, "grad_norm": 0.07160850113556726, "learning_rate": 9.294599035431588e-06, "loss": 0.5801, "step": 1424 }, { "epoch": 0.6930091185410334, "grad_norm": 0.07401711312717284, "learning_rate": 9.293618198052796e-06, "loss": 0.5944, "step": 1425 }, { "epoch": 0.6934954407294833, "grad_norm": 0.07612252518364429, "learning_rate": 9.29263673106365e-06, "loss": 0.5902, "step": 1426 }, { "epoch": 0.6939817629179331, "grad_norm": 0.07427845038687236, "learning_rate": 9.291654634608079e-06, "loss": 0.6033, "step": 1427 }, { "epoch": 0.694468085106383, "grad_norm": 0.07323589939350156, "learning_rate": 9.290671908830087e-06, "loss": 0.5827, "step": 1428 }, { "epoch": 0.6949544072948328, "grad_norm": 0.0712966875300514, "learning_rate": 9.289688553873783e-06, "loss": 0.5741, "step": 1429 }, { "epoch": 0.6954407294832827, "grad_norm": 0.07130510436210435, "learning_rate": 9.288704569883366e-06, "loss": 0.5688, "step": 1430 }, { "epoch": 0.6959270516717325, "grad_norm": 0.07660663484948632, "learning_rate": 9.287719957003128e-06, "loss": 0.6295, "step": 1431 }, { "epoch": 0.6964133738601823, "grad_norm": 0.07277229698979291, "learning_rate": 9.286734715377446e-06, "loss": 0.5997, "step": 1432 }, { "epoch": 0.6968996960486322, "grad_norm": 0.07280402845683927, "learning_rate": 9.285748845150797e-06, "loss": 0.5987, "step": 1433 }, { "epoch": 0.697386018237082, "grad_norm": 0.07711021088789537, "learning_rate": 9.284762346467749e-06, "loss": 0.5807, "step": 1434 }, { "epoch": 0.6978723404255319, "grad_norm": 0.07800455939095077, "learning_rate": 9.283775219472958e-06, "loss": 0.601, "step": 1435 }, { "epoch": 0.6983586626139817, "grad_norm": 0.07280568878207604, "learning_rate": 9.282787464311176e-06, "loss": 0.5458, "step": 1436 }, { "epoch": 0.6988449848024316, "grad_norm": 0.07529816212765439, "learning_rate": 9.281799081127249e-06, "loss": 0.6013, "step": 1437 }, { "epoch": 0.6993313069908814, "grad_norm": 0.0770675973094629, "learning_rate": 9.280810070066108e-06, "loss": 0.6156, "step": 1438 }, { "epoch": 0.6998176291793313, "grad_norm": 0.0777478441442148, "learning_rate": 9.279820431272783e-06, "loss": 0.6144, "step": 1439 }, { "epoch": 0.7003039513677811, "grad_norm": 0.07343110528732574, "learning_rate": 9.278830164892392e-06, "loss": 0.558, "step": 1440 }, { "epoch": 0.700790273556231, "grad_norm": 0.0739284962130104, "learning_rate": 9.277839271070146e-06, "loss": 0.6098, "step": 1441 }, { "epoch": 0.7012765957446808, "grad_norm": 0.06990607854779657, "learning_rate": 9.27684774995135e-06, "loss": 0.559, "step": 1442 }, { "epoch": 0.7017629179331307, "grad_norm": 0.07621166065463421, "learning_rate": 9.275855601681398e-06, "loss": 0.5934, "step": 1443 }, { "epoch": 0.7022492401215805, "grad_norm": 0.07445005797019247, "learning_rate": 9.274862826405777e-06, "loss": 0.6009, "step": 1444 }, { "epoch": 0.7027355623100304, "grad_norm": 0.0761006018967215, "learning_rate": 9.273869424270068e-06, "loss": 0.5847, "step": 1445 }, { "epoch": 0.7032218844984802, "grad_norm": 0.08103538466046839, "learning_rate": 9.27287539541994e-06, "loss": 0.5937, "step": 1446 }, { "epoch": 0.7037082066869301, "grad_norm": 0.07678231053956183, "learning_rate": 9.271880740001158e-06, "loss": 0.597, "step": 1447 }, { "epoch": 0.7041945288753799, "grad_norm": 0.07488548883099819, "learning_rate": 9.270885458159576e-06, "loss": 0.5999, "step": 1448 }, { "epoch": 0.7046808510638298, "grad_norm": 0.07303580166136743, "learning_rate": 9.269889550041138e-06, "loss": 0.5674, "step": 1449 }, { "epoch": 0.7051671732522796, "grad_norm": 0.07183746472331608, "learning_rate": 9.268893015791889e-06, "loss": 0.5702, "step": 1450 }, { "epoch": 0.7056534954407295, "grad_norm": 0.07349428775246662, "learning_rate": 9.267895855557954e-06, "loss": 0.5828, "step": 1451 }, { "epoch": 0.7061398176291793, "grad_norm": 0.0774485123484772, "learning_rate": 9.266898069485556e-06, "loss": 0.5873, "step": 1452 }, { "epoch": 0.7066261398176292, "grad_norm": 0.07686840919494571, "learning_rate": 9.26589965772101e-06, "loss": 0.6104, "step": 1453 }, { "epoch": 0.707112462006079, "grad_norm": 0.07506950804808296, "learning_rate": 9.264900620410722e-06, "loss": 0.6014, "step": 1454 }, { "epoch": 0.7075987841945289, "grad_norm": 0.07259189951541917, "learning_rate": 9.263900957701191e-06, "loss": 0.5499, "step": 1455 }, { "epoch": 0.7080851063829787, "grad_norm": 0.07660514904475572, "learning_rate": 9.262900669739003e-06, "loss": 0.5951, "step": 1456 }, { "epoch": 0.7085714285714285, "grad_norm": 0.07388875540870823, "learning_rate": 9.26189975667084e-06, "loss": 0.5852, "step": 1457 }, { "epoch": 0.7090577507598784, "grad_norm": 0.07142243387914578, "learning_rate": 9.260898218643475e-06, "loss": 0.5663, "step": 1458 }, { "epoch": 0.7095440729483282, "grad_norm": 0.0765504082170502, "learning_rate": 9.259896055803772e-06, "loss": 0.5872, "step": 1459 }, { "epoch": 0.7100303951367781, "grad_norm": 0.07653204930742795, "learning_rate": 9.258893268298685e-06, "loss": 0.6205, "step": 1460 }, { "epoch": 0.7105167173252279, "grad_norm": 0.06978368133165619, "learning_rate": 9.257889856275266e-06, "loss": 0.5601, "step": 1461 }, { "epoch": 0.7110030395136778, "grad_norm": 0.07527087752028105, "learning_rate": 9.25688581988065e-06, "loss": 0.5855, "step": 1462 }, { "epoch": 0.7114893617021276, "grad_norm": 0.07230819844467026, "learning_rate": 9.255881159262067e-06, "loss": 0.6071, "step": 1463 }, { "epoch": 0.7119756838905775, "grad_norm": 0.07905349253913133, "learning_rate": 9.254875874566844e-06, "loss": 0.6097, "step": 1464 }, { "epoch": 0.7124620060790273, "grad_norm": 0.07981563554797644, "learning_rate": 9.25386996594239e-06, "loss": 0.5821, "step": 1465 }, { "epoch": 0.7129483282674772, "grad_norm": 0.08896425541342512, "learning_rate": 9.25286343353621e-06, "loss": 0.5818, "step": 1466 }, { "epoch": 0.713434650455927, "grad_norm": 0.07943597993980359, "learning_rate": 9.251856277495903e-06, "loss": 0.5834, "step": 1467 }, { "epoch": 0.7139209726443769, "grad_norm": 0.07754984722940073, "learning_rate": 9.250848497969156e-06, "loss": 0.6082, "step": 1468 }, { "epoch": 0.7144072948328267, "grad_norm": 0.07254018381941152, "learning_rate": 9.249840095103748e-06, "loss": 0.603, "step": 1469 }, { "epoch": 0.7148936170212766, "grad_norm": 0.0733075420079743, "learning_rate": 9.248831069047551e-06, "loss": 0.559, "step": 1470 }, { "epoch": 0.7153799392097264, "grad_norm": 0.07266444657951242, "learning_rate": 9.247821419948526e-06, "loss": 0.5907, "step": 1471 }, { "epoch": 0.7158662613981763, "grad_norm": 0.07705924353116533, "learning_rate": 9.246811147954726e-06, "loss": 0.5999, "step": 1472 }, { "epoch": 0.7163525835866261, "grad_norm": 0.07533047665866212, "learning_rate": 9.245800253214298e-06, "loss": 0.5869, "step": 1473 }, { "epoch": 0.716838905775076, "grad_norm": 0.0744974944358589, "learning_rate": 9.244788735875477e-06, "loss": 0.5708, "step": 1474 }, { "epoch": 0.7173252279635258, "grad_norm": 0.07178464311652623, "learning_rate": 9.243776596086591e-06, "loss": 0.5975, "step": 1475 }, { "epoch": 0.7178115501519757, "grad_norm": 0.07236143152879067, "learning_rate": 9.242763833996058e-06, "loss": 0.5904, "step": 1476 }, { "epoch": 0.7182978723404255, "grad_norm": 0.07337170812927998, "learning_rate": 9.241750449752388e-06, "loss": 0.5879, "step": 1477 }, { "epoch": 0.7187841945288754, "grad_norm": 0.07635888197606563, "learning_rate": 9.240736443504184e-06, "loss": 0.6021, "step": 1478 }, { "epoch": 0.7192705167173252, "grad_norm": 0.07378321260219771, "learning_rate": 9.239721815400136e-06, "loss": 0.5678, "step": 1479 }, { "epoch": 0.7197568389057751, "grad_norm": 0.07611041717504945, "learning_rate": 9.238706565589029e-06, "loss": 0.6214, "step": 1480 }, { "epoch": 0.7202431610942249, "grad_norm": 0.0759542049940672, "learning_rate": 9.237690694219739e-06, "loss": 0.6274, "step": 1481 }, { "epoch": 0.7207294832826747, "grad_norm": 0.08270812030201027, "learning_rate": 9.23667420144123e-06, "loss": 0.5587, "step": 1482 }, { "epoch": 0.7212158054711246, "grad_norm": 0.07548350171759112, "learning_rate": 9.235657087402561e-06, "loss": 0.5403, "step": 1483 }, { "epoch": 0.7217021276595744, "grad_norm": 0.07307205409205421, "learning_rate": 9.234639352252878e-06, "loss": 0.5763, "step": 1484 }, { "epoch": 0.7221884498480243, "grad_norm": 0.07899770694583005, "learning_rate": 9.233620996141421e-06, "loss": 0.5796, "step": 1485 }, { "epoch": 0.7226747720364741, "grad_norm": 0.0722503208501567, "learning_rate": 9.232602019217523e-06, "loss": 0.555, "step": 1486 }, { "epoch": 0.723161094224924, "grad_norm": 0.0777770475318072, "learning_rate": 9.231582421630601e-06, "loss": 0.6055, "step": 1487 }, { "epoch": 0.7236474164133738, "grad_norm": 0.07009950208335403, "learning_rate": 9.230562203530171e-06, "loss": 0.5407, "step": 1488 }, { "epoch": 0.7241337386018237, "grad_norm": 0.07477081720472192, "learning_rate": 9.229541365065834e-06, "loss": 0.5421, "step": 1489 }, { "epoch": 0.7246200607902735, "grad_norm": 0.0759906895921592, "learning_rate": 9.228519906387287e-06, "loss": 0.6108, "step": 1490 }, { "epoch": 0.7251063829787234, "grad_norm": 0.0733455529361756, "learning_rate": 9.227497827644313e-06, "loss": 0.5619, "step": 1491 }, { "epoch": 0.7255927051671732, "grad_norm": 0.08263305822583293, "learning_rate": 9.22647512898679e-06, "loss": 0.5921, "step": 1492 }, { "epoch": 0.7260790273556231, "grad_norm": 0.07574909281930131, "learning_rate": 9.225451810564683e-06, "loss": 0.5839, "step": 1493 }, { "epoch": 0.7265653495440729, "grad_norm": 0.07165287942370305, "learning_rate": 9.224427872528051e-06, "loss": 0.5818, "step": 1494 }, { "epoch": 0.7270516717325228, "grad_norm": 0.07230920519373132, "learning_rate": 9.223403315027044e-06, "loss": 0.5493, "step": 1495 }, { "epoch": 0.7275379939209726, "grad_norm": 0.07825297185910812, "learning_rate": 9.2223781382119e-06, "loss": 0.5845, "step": 1496 }, { "epoch": 0.7280243161094225, "grad_norm": 0.10248125073170866, "learning_rate": 9.22135234223295e-06, "loss": 0.5989, "step": 1497 }, { "epoch": 0.7285106382978723, "grad_norm": 0.07484499954746404, "learning_rate": 9.220325927240617e-06, "loss": 0.5904, "step": 1498 }, { "epoch": 0.7289969604863222, "grad_norm": 0.07437512859206304, "learning_rate": 9.21929889338541e-06, "loss": 0.5816, "step": 1499 }, { "epoch": 0.729483282674772, "grad_norm": 0.0732815561616446, "learning_rate": 9.218271240817935e-06, "loss": 0.5893, "step": 1500 }, { "epoch": 0.7299696048632219, "grad_norm": 0.08304059906781426, "learning_rate": 9.217242969688883e-06, "loss": 0.5904, "step": 1501 }, { "epoch": 0.7304559270516717, "grad_norm": 0.07563609614571871, "learning_rate": 9.216214080149039e-06, "loss": 0.5929, "step": 1502 }, { "epoch": 0.7309422492401216, "grad_norm": 0.0674900711119595, "learning_rate": 9.21518457234928e-06, "loss": 0.5246, "step": 1503 }, { "epoch": 0.7314285714285714, "grad_norm": 0.17283452029261448, "learning_rate": 9.214154446440571e-06, "loss": 0.616, "step": 1504 }, { "epoch": 0.7319148936170212, "grad_norm": 0.07309989389637253, "learning_rate": 9.213123702573964e-06, "loss": 0.5547, "step": 1505 }, { "epoch": 0.7324012158054711, "grad_norm": 0.07344874038511445, "learning_rate": 9.212092340900613e-06, "loss": 0.5959, "step": 1506 }, { "epoch": 0.7328875379939209, "grad_norm": 0.07147587530509987, "learning_rate": 9.21106036157175e-06, "loss": 0.5607, "step": 1507 }, { "epoch": 0.7333738601823708, "grad_norm": 0.07980585878521716, "learning_rate": 9.210027764738704e-06, "loss": 0.6112, "step": 1508 }, { "epoch": 0.7338601823708206, "grad_norm": 0.07498692429958936, "learning_rate": 9.208994550552894e-06, "loss": 0.5861, "step": 1509 }, { "epoch": 0.7343465045592705, "grad_norm": 0.07347339263737054, "learning_rate": 9.207960719165832e-06, "loss": 0.5493, "step": 1510 }, { "epoch": 0.7348328267477203, "grad_norm": 0.0759092211741073, "learning_rate": 9.206926270729112e-06, "loss": 0.6228, "step": 1511 }, { "epoch": 0.7353191489361702, "grad_norm": 0.07368383507571005, "learning_rate": 9.205891205394429e-06, "loss": 0.5753, "step": 1512 }, { "epoch": 0.73580547112462, "grad_norm": 0.10975233474738065, "learning_rate": 9.204855523313561e-06, "loss": 0.6039, "step": 1513 }, { "epoch": 0.7362917933130699, "grad_norm": 0.07828013689371244, "learning_rate": 9.203819224638381e-06, "loss": 0.6182, "step": 1514 }, { "epoch": 0.7367781155015197, "grad_norm": 0.07398729106921355, "learning_rate": 9.202782309520848e-06, "loss": 0.6034, "step": 1515 }, { "epoch": 0.7372644376899696, "grad_norm": 0.07464730833887104, "learning_rate": 9.201744778113016e-06, "loss": 0.5877, "step": 1516 }, { "epoch": 0.7377507598784194, "grad_norm": 0.0759031077190833, "learning_rate": 9.200706630567026e-06, "loss": 0.6258, "step": 1517 }, { "epoch": 0.7382370820668693, "grad_norm": 0.07669451336585603, "learning_rate": 9.199667867035111e-06, "loss": 0.6061, "step": 1518 }, { "epoch": 0.7387234042553191, "grad_norm": 0.07328423972939507, "learning_rate": 9.198628487669592e-06, "loss": 0.5765, "step": 1519 }, { "epoch": 0.739209726443769, "grad_norm": 0.07677780278507479, "learning_rate": 9.197588492622887e-06, "loss": 0.5501, "step": 1520 }, { "epoch": 0.7396960486322188, "grad_norm": 0.07562906841082154, "learning_rate": 9.196547882047493e-06, "loss": 0.6453, "step": 1521 }, { "epoch": 0.7401823708206687, "grad_norm": 0.07719961962985727, "learning_rate": 9.195506656096009e-06, "loss": 0.5944, "step": 1522 }, { "epoch": 0.7406686930091185, "grad_norm": 0.07365637597557352, "learning_rate": 9.194464814921116e-06, "loss": 0.5952, "step": 1523 }, { "epoch": 0.7411550151975684, "grad_norm": 0.07537542482795369, "learning_rate": 9.19342235867559e-06, "loss": 0.6007, "step": 1524 }, { "epoch": 0.7416413373860182, "grad_norm": 0.07442924149828235, "learning_rate": 9.192379287512294e-06, "loss": 0.6462, "step": 1525 }, { "epoch": 0.7421276595744681, "grad_norm": 0.07243963193138703, "learning_rate": 9.191335601584184e-06, "loss": 0.5503, "step": 1526 }, { "epoch": 0.7426139817629179, "grad_norm": 0.07705782469231785, "learning_rate": 9.190291301044303e-06, "loss": 0.603, "step": 1527 }, { "epoch": 0.7431003039513678, "grad_norm": 0.07215074604849654, "learning_rate": 9.189246386045787e-06, "loss": 0.5577, "step": 1528 }, { "epoch": 0.7435866261398176, "grad_norm": 0.08000326095818983, "learning_rate": 9.18820085674186e-06, "loss": 0.5661, "step": 1529 }, { "epoch": 0.7440729483282674, "grad_norm": 0.0749021802919331, "learning_rate": 9.187154713285838e-06, "loss": 0.5809, "step": 1530 }, { "epoch": 0.7445592705167173, "grad_norm": 0.0736784956044312, "learning_rate": 9.186107955831127e-06, "loss": 0.5751, "step": 1531 }, { "epoch": 0.7450455927051671, "grad_norm": 0.07678452495549294, "learning_rate": 9.185060584531218e-06, "loss": 0.6519, "step": 1532 }, { "epoch": 0.745531914893617, "grad_norm": 0.07297220390267127, "learning_rate": 9.1840125995397e-06, "loss": 0.5475, "step": 1533 }, { "epoch": 0.7460182370820668, "grad_norm": 0.07344202552366175, "learning_rate": 9.182964001010248e-06, "loss": 0.5823, "step": 1534 }, { "epoch": 0.7465045592705167, "grad_norm": 0.07550904525865863, "learning_rate": 9.181914789096625e-06, "loss": 0.563, "step": 1535 }, { "epoch": 0.7469908814589665, "grad_norm": 0.0761919034104001, "learning_rate": 9.180864963952686e-06, "loss": 0.6179, "step": 1536 }, { "epoch": 0.7474772036474164, "grad_norm": 0.0707945206580333, "learning_rate": 9.179814525732378e-06, "loss": 0.5708, "step": 1537 }, { "epoch": 0.7479635258358662, "grad_norm": 0.07331153646422277, "learning_rate": 9.178763474589734e-06, "loss": 0.6065, "step": 1538 }, { "epoch": 0.7484498480243161, "grad_norm": 0.07577450992313728, "learning_rate": 9.17771181067888e-06, "loss": 0.6273, "step": 1539 }, { "epoch": 0.7489361702127659, "grad_norm": 0.07710105110365151, "learning_rate": 9.17665953415403e-06, "loss": 0.5965, "step": 1540 }, { "epoch": 0.7494224924012158, "grad_norm": 0.07191974951275368, "learning_rate": 9.175606645169489e-06, "loss": 0.5571, "step": 1541 }, { "epoch": 0.7499088145896656, "grad_norm": 0.06956148797674722, "learning_rate": 9.174553143879649e-06, "loss": 0.5568, "step": 1542 }, { "epoch": 0.7499088145896656, "eval_loss": 0.592545211315155, "eval_runtime": 105.2769, "eval_samples_per_second": 288.316, "eval_steps_per_second": 36.048, "step": 1542 }, { "epoch": 0.7503951367781155, "grad_norm": 0.07118021983808168, "learning_rate": 9.173499030438996e-06, "loss": 0.5947, "step": 1543 }, { "epoch": 0.7508814589665653, "grad_norm": 0.07394858052720103, "learning_rate": 9.172444305002105e-06, "loss": 0.5583, "step": 1544 }, { "epoch": 0.7513677811550152, "grad_norm": 0.07357842008611473, "learning_rate": 9.171388967723638e-06, "loss": 0.605, "step": 1545 }, { "epoch": 0.751854103343465, "grad_norm": 0.07210513399010314, "learning_rate": 9.170333018758345e-06, "loss": 0.5773, "step": 1546 }, { "epoch": 0.7523404255319149, "grad_norm": 0.07282937669966806, "learning_rate": 9.169276458261075e-06, "loss": 0.5857, "step": 1547 }, { "epoch": 0.7528267477203647, "grad_norm": 0.07604656523395356, "learning_rate": 9.168219286386757e-06, "loss": 0.5669, "step": 1548 }, { "epoch": 0.7533130699088146, "grad_norm": 0.07091346756396157, "learning_rate": 9.167161503290414e-06, "loss": 0.5809, "step": 1549 }, { "epoch": 0.7537993920972644, "grad_norm": 0.0708038357037518, "learning_rate": 9.166103109127158e-06, "loss": 0.5677, "step": 1550 }, { "epoch": 0.7542857142857143, "grad_norm": 0.07396265084406707, "learning_rate": 9.16504410405219e-06, "loss": 0.6011, "step": 1551 }, { "epoch": 0.7547720364741641, "grad_norm": 0.07753680322894918, "learning_rate": 9.1639844882208e-06, "loss": 0.5704, "step": 1552 }, { "epoch": 0.7552583586626139, "grad_norm": 0.07402147944062856, "learning_rate": 9.162924261788372e-06, "loss": 0.5799, "step": 1553 }, { "epoch": 0.7557446808510638, "grad_norm": 0.0726343876310961, "learning_rate": 9.161863424910373e-06, "loss": 0.5552, "step": 1554 }, { "epoch": 0.7562310030395136, "grad_norm": 0.07709517455087571, "learning_rate": 9.160801977742364e-06, "loss": 0.6229, "step": 1555 }, { "epoch": 0.7567173252279635, "grad_norm": 0.07321368609324581, "learning_rate": 9.159739920439994e-06, "loss": 0.5615, "step": 1556 }, { "epoch": 0.7572036474164133, "grad_norm": 0.07403176107096356, "learning_rate": 9.158677253159003e-06, "loss": 0.5952, "step": 1557 }, { "epoch": 0.7576899696048632, "grad_norm": 0.07811411439216485, "learning_rate": 9.157613976055216e-06, "loss": 0.5982, "step": 1558 }, { "epoch": 0.758176291793313, "grad_norm": 0.07055719503424134, "learning_rate": 9.156550089284553e-06, "loss": 0.5715, "step": 1559 }, { "epoch": 0.7586626139817629, "grad_norm": 0.0713130750447445, "learning_rate": 9.15548559300302e-06, "loss": 0.5458, "step": 1560 }, { "epoch": 0.7591489361702127, "grad_norm": 0.0735660614070684, "learning_rate": 9.154420487366713e-06, "loss": 0.5719, "step": 1561 }, { "epoch": 0.7596352583586626, "grad_norm": 0.07046788817877819, "learning_rate": 9.153354772531819e-06, "loss": 0.5639, "step": 1562 }, { "epoch": 0.7601215805471124, "grad_norm": 0.07052317679863633, "learning_rate": 9.152288448654612e-06, "loss": 0.5895, "step": 1563 }, { "epoch": 0.7606079027355623, "grad_norm": 0.07196556264515612, "learning_rate": 9.151221515891455e-06, "loss": 0.6189, "step": 1564 }, { "epoch": 0.7610942249240121, "grad_norm": 0.07100226559825275, "learning_rate": 9.150153974398804e-06, "loss": 0.573, "step": 1565 }, { "epoch": 0.761580547112462, "grad_norm": 0.07656318474867924, "learning_rate": 9.1490858243332e-06, "loss": 0.5786, "step": 1566 }, { "epoch": 0.7620668693009118, "grad_norm": 0.07185257662473404, "learning_rate": 9.148017065851276e-06, "loss": 0.5807, "step": 1567 }, { "epoch": 0.7625531914893617, "grad_norm": 0.07661574876741803, "learning_rate": 9.146947699109753e-06, "loss": 0.6625, "step": 1568 }, { "epoch": 0.7630395136778115, "grad_norm": 0.0706785318579316, "learning_rate": 9.145877724265444e-06, "loss": 0.5808, "step": 1569 }, { "epoch": 0.7635258358662614, "grad_norm": 0.07030773844311922, "learning_rate": 9.144807141475244e-06, "loss": 0.5835, "step": 1570 }, { "epoch": 0.7640121580547112, "grad_norm": 0.07597128889382836, "learning_rate": 9.143735950896143e-06, "loss": 0.5653, "step": 1571 }, { "epoch": 0.7644984802431611, "grad_norm": 0.07157429771445756, "learning_rate": 9.142664152685224e-06, "loss": 0.5802, "step": 1572 }, { "epoch": 0.7649848024316109, "grad_norm": 0.07562191397123438, "learning_rate": 9.141591746999648e-06, "loss": 0.6153, "step": 1573 }, { "epoch": 0.7654711246200608, "grad_norm": 0.06858514426249837, "learning_rate": 9.140518733996672e-06, "loss": 0.5499, "step": 1574 }, { "epoch": 0.7659574468085106, "grad_norm": 0.07216713056338284, "learning_rate": 9.139445113833644e-06, "loss": 0.5926, "step": 1575 }, { "epoch": 0.7664437689969605, "grad_norm": 0.07359980233813823, "learning_rate": 9.138370886667996e-06, "loss": 0.6309, "step": 1576 }, { "epoch": 0.7669300911854103, "grad_norm": 0.07164340392473688, "learning_rate": 9.137296052657252e-06, "loss": 0.5884, "step": 1577 }, { "epoch": 0.7674164133738601, "grad_norm": 0.07284243924280691, "learning_rate": 9.136220611959023e-06, "loss": 0.5624, "step": 1578 }, { "epoch": 0.76790273556231, "grad_norm": 0.07335630182395572, "learning_rate": 9.135144564731012e-06, "loss": 0.5797, "step": 1579 }, { "epoch": 0.7683890577507598, "grad_norm": 0.07427718215377827, "learning_rate": 9.134067911131008e-06, "loss": 0.5773, "step": 1580 }, { "epoch": 0.7688753799392097, "grad_norm": 0.07631953773449411, "learning_rate": 9.13299065131689e-06, "loss": 0.5912, "step": 1581 }, { "epoch": 0.7693617021276595, "grad_norm": 0.07475723621154033, "learning_rate": 9.131912785446628e-06, "loss": 0.5787, "step": 1582 }, { "epoch": 0.7698480243161094, "grad_norm": 0.06871487386066841, "learning_rate": 9.130834313678275e-06, "loss": 0.572, "step": 1583 }, { "epoch": 0.7703343465045592, "grad_norm": 0.07118428741912614, "learning_rate": 9.12975523616998e-06, "loss": 0.5493, "step": 1584 }, { "epoch": 0.7708206686930091, "grad_norm": 0.08795264332600707, "learning_rate": 9.128675553079974e-06, "loss": 0.592, "step": 1585 }, { "epoch": 0.7713069908814589, "grad_norm": 0.07580482493684017, "learning_rate": 9.127595264566584e-06, "loss": 0.5726, "step": 1586 }, { "epoch": 0.7717933130699088, "grad_norm": 0.07397115078670591, "learning_rate": 9.12651437078822e-06, "loss": 0.5885, "step": 1587 }, { "epoch": 0.7722796352583586, "grad_norm": 0.0736965848383469, "learning_rate": 9.125432871903383e-06, "loss": 0.5564, "step": 1588 }, { "epoch": 0.7727659574468085, "grad_norm": 0.07556312629489341, "learning_rate": 9.124350768070664e-06, "loss": 0.5867, "step": 1589 }, { "epoch": 0.7732522796352583, "grad_norm": 0.0723278569344617, "learning_rate": 9.123268059448738e-06, "loss": 0.5773, "step": 1590 }, { "epoch": 0.7737386018237082, "grad_norm": 0.07111236027265931, "learning_rate": 9.122184746196375e-06, "loss": 0.5606, "step": 1591 }, { "epoch": 0.774224924012158, "grad_norm": 0.07314998368055502, "learning_rate": 9.12110082847243e-06, "loss": 0.5652, "step": 1592 }, { "epoch": 0.7747112462006079, "grad_norm": 0.08077715198771841, "learning_rate": 9.120016306435845e-06, "loss": 0.5869, "step": 1593 }, { "epoch": 0.7751975683890577, "grad_norm": 0.07637817839652904, "learning_rate": 9.118931180245657e-06, "loss": 0.584, "step": 1594 }, { "epoch": 0.7756838905775076, "grad_norm": 0.09806405502936687, "learning_rate": 9.117845450060983e-06, "loss": 0.6143, "step": 1595 }, { "epoch": 0.7761702127659574, "grad_norm": 0.0762660796563828, "learning_rate": 9.116759116041037e-06, "loss": 0.6288, "step": 1596 }, { "epoch": 0.7766565349544073, "grad_norm": 0.07261781541959192, "learning_rate": 9.115672178345111e-06, "loss": 0.5465, "step": 1597 }, { "epoch": 0.7771428571428571, "grad_norm": 0.0775499486740439, "learning_rate": 9.114584637132601e-06, "loss": 0.5818, "step": 1598 }, { "epoch": 0.777629179331307, "grad_norm": 0.07576487990215618, "learning_rate": 9.113496492562977e-06, "loss": 0.5936, "step": 1599 }, { "epoch": 0.7781155015197568, "grad_norm": 0.07689785922442594, "learning_rate": 9.112407744795803e-06, "loss": 0.6061, "step": 1600 }, { "epoch": 0.7786018237082067, "grad_norm": 0.07581730234825507, "learning_rate": 9.111318393990736e-06, "loss": 0.5767, "step": 1601 }, { "epoch": 0.7790881458966565, "grad_norm": 0.07666301558140577, "learning_rate": 9.11022844030751e-06, "loss": 0.5961, "step": 1602 }, { "epoch": 0.7795744680851063, "grad_norm": 0.07670102786083799, "learning_rate": 9.10913788390596e-06, "loss": 0.6211, "step": 1603 }, { "epoch": 0.7800607902735562, "grad_norm": 0.07367474284677104, "learning_rate": 9.108046724946e-06, "loss": 0.5916, "step": 1604 }, { "epoch": 0.780547112462006, "grad_norm": 0.07180226798472734, "learning_rate": 9.10695496358764e-06, "loss": 0.5394, "step": 1605 }, { "epoch": 0.7810334346504559, "grad_norm": 0.07318518812704777, "learning_rate": 9.105862599990972e-06, "loss": 0.5515, "step": 1606 }, { "epoch": 0.7815197568389057, "grad_norm": 0.07806489299053469, "learning_rate": 9.104769634316177e-06, "loss": 0.6249, "step": 1607 }, { "epoch": 0.7820060790273556, "grad_norm": 0.07190729959180565, "learning_rate": 9.103676066723528e-06, "loss": 0.5847, "step": 1608 }, { "epoch": 0.7824924012158054, "grad_norm": 0.07142981952775516, "learning_rate": 9.102581897373385e-06, "loss": 0.5603, "step": 1609 }, { "epoch": 0.7829787234042553, "grad_norm": 0.07191075091806294, "learning_rate": 9.101487126426193e-06, "loss": 0.5567, "step": 1610 }, { "epoch": 0.7834650455927051, "grad_norm": 0.07345364503929963, "learning_rate": 9.100391754042493e-06, "loss": 0.5812, "step": 1611 }, { "epoch": 0.783951367781155, "grad_norm": 0.07743056775911689, "learning_rate": 9.099295780382904e-06, "loss": 0.6485, "step": 1612 }, { "epoch": 0.7844376899696048, "grad_norm": 0.07973017596861867, "learning_rate": 9.098199205608138e-06, "loss": 0.6669, "step": 1613 }, { "epoch": 0.7849240121580547, "grad_norm": 0.0738459429073468, "learning_rate": 9.097102029878998e-06, "loss": 0.5714, "step": 1614 }, { "epoch": 0.7854103343465045, "grad_norm": 0.07355050608545767, "learning_rate": 9.096004253356369e-06, "loss": 0.6277, "step": 1615 }, { "epoch": 0.7858966565349544, "grad_norm": 0.07575738283900403, "learning_rate": 9.09490587620123e-06, "loss": 0.5462, "step": 1616 }, { "epoch": 0.7863829787234042, "grad_norm": 0.07472663205739084, "learning_rate": 9.093806898574647e-06, "loss": 0.5594, "step": 1617 }, { "epoch": 0.7868693009118541, "grad_norm": 0.07565495488852164, "learning_rate": 9.092707320637769e-06, "loss": 0.6028, "step": 1618 }, { "epoch": 0.7873556231003039, "grad_norm": 0.07374015368399801, "learning_rate": 9.091607142551839e-06, "loss": 0.5813, "step": 1619 }, { "epoch": 0.7878419452887538, "grad_norm": 0.07705942762753146, "learning_rate": 9.090506364478183e-06, "loss": 0.5623, "step": 1620 }, { "epoch": 0.7883282674772036, "grad_norm": 0.07275002427389189, "learning_rate": 9.089404986578221e-06, "loss": 0.5622, "step": 1621 }, { "epoch": 0.7888145896656535, "grad_norm": 0.07581372267510471, "learning_rate": 9.088303009013454e-06, "loss": 0.6029, "step": 1622 }, { "epoch": 0.7893009118541033, "grad_norm": 0.073760722989789, "learning_rate": 9.08720043194548e-06, "loss": 0.5783, "step": 1623 }, { "epoch": 0.7897872340425532, "grad_norm": 0.07138247123984222, "learning_rate": 9.086097255535974e-06, "loss": 0.5845, "step": 1624 }, { "epoch": 0.790273556231003, "grad_norm": 0.07239373294843275, "learning_rate": 9.084993479946706e-06, "loss": 0.6144, "step": 1625 }, { "epoch": 0.7907598784194528, "grad_norm": 0.0734432758313814, "learning_rate": 9.083889105339532e-06, "loss": 0.5636, "step": 1626 }, { "epoch": 0.7912462006079027, "grad_norm": 0.07684721680070418, "learning_rate": 9.082784131876398e-06, "loss": 0.5955, "step": 1627 }, { "epoch": 0.7917325227963525, "grad_norm": 0.0734344558953088, "learning_rate": 9.081678559719334e-06, "loss": 0.5764, "step": 1628 }, { "epoch": 0.7922188449848024, "grad_norm": 0.07336420163099079, "learning_rate": 9.080572389030458e-06, "loss": 0.5714, "step": 1629 }, { "epoch": 0.7927051671732522, "grad_norm": 0.07518446872847741, "learning_rate": 9.079465619971979e-06, "loss": 0.5727, "step": 1630 }, { "epoch": 0.7931914893617021, "grad_norm": 0.07557529988370244, "learning_rate": 9.078358252706194e-06, "loss": 0.5866, "step": 1631 }, { "epoch": 0.7936778115501519, "grad_norm": 0.07924937651398163, "learning_rate": 9.077250287395482e-06, "loss": 0.5831, "step": 1632 }, { "epoch": 0.7941641337386018, "grad_norm": 0.07111948359091794, "learning_rate": 9.07614172420232e-06, "loss": 0.5594, "step": 1633 }, { "epoch": 0.7946504559270516, "grad_norm": 0.07511669959189178, "learning_rate": 9.075032563289256e-06, "loss": 0.5751, "step": 1634 }, { "epoch": 0.7951367781155015, "grad_norm": 0.07439036106895336, "learning_rate": 9.073922804818944e-06, "loss": 0.5805, "step": 1635 }, { "epoch": 0.7956231003039513, "grad_norm": 0.07181786016281136, "learning_rate": 9.072812448954117e-06, "loss": 0.5591, "step": 1636 }, { "epoch": 0.7961094224924012, "grad_norm": 0.07878442540853178, "learning_rate": 9.071701495857593e-06, "loss": 0.5561, "step": 1637 }, { "epoch": 0.796595744680851, "grad_norm": 0.07369341255058924, "learning_rate": 9.070589945692281e-06, "loss": 0.5723, "step": 1638 }, { "epoch": 0.7970820668693009, "grad_norm": 0.07196748777154602, "learning_rate": 9.069477798621178e-06, "loss": 0.5796, "step": 1639 }, { "epoch": 0.7975683890577507, "grad_norm": 0.07863094262229525, "learning_rate": 9.068365054807369e-06, "loss": 0.6029, "step": 1640 }, { "epoch": 0.7980547112462006, "grad_norm": 0.07236803516073441, "learning_rate": 9.067251714414023e-06, "loss": 0.6115, "step": 1641 }, { "epoch": 0.7985410334346504, "grad_norm": 0.07651305023013237, "learning_rate": 9.0661377776044e-06, "loss": 0.6132, "step": 1642 }, { "epoch": 0.7990273556231003, "grad_norm": 0.07015096359229805, "learning_rate": 9.065023244541846e-06, "loss": 0.5745, "step": 1643 }, { "epoch": 0.7995136778115501, "grad_norm": 0.07287519876258483, "learning_rate": 9.063908115389794e-06, "loss": 0.5588, "step": 1644 }, { "epoch": 0.8, "grad_norm": 0.07328509906921751, "learning_rate": 9.062792390311768e-06, "loss": 0.5725, "step": 1645 }, { "epoch": 0.8004863221884498, "grad_norm": 0.07523232831435116, "learning_rate": 9.061676069471372e-06, "loss": 0.6283, "step": 1646 }, { "epoch": 0.8009726443768997, "grad_norm": 0.07248796754762984, "learning_rate": 9.060559153032305e-06, "loss": 0.5724, "step": 1647 }, { "epoch": 0.8014589665653495, "grad_norm": 0.07389920446623921, "learning_rate": 9.059441641158348e-06, "loss": 0.5958, "step": 1648 }, { "epoch": 0.8019452887537994, "grad_norm": 0.0691834517848783, "learning_rate": 9.05832353401337e-06, "loss": 0.5635, "step": 1649 }, { "epoch": 0.8024316109422492, "grad_norm": 0.087411497760545, "learning_rate": 9.057204831761334e-06, "loss": 0.5707, "step": 1650 }, { "epoch": 0.802917933130699, "grad_norm": 0.07525238297349253, "learning_rate": 9.056085534566283e-06, "loss": 0.5705, "step": 1651 }, { "epoch": 0.8034042553191489, "grad_norm": 0.07262995236326107, "learning_rate": 9.054965642592346e-06, "loss": 0.5645, "step": 1652 }, { "epoch": 0.8038905775075987, "grad_norm": 0.07417723618128876, "learning_rate": 9.053845156003746e-06, "loss": 0.5725, "step": 1653 }, { "epoch": 0.8043768996960486, "grad_norm": 0.07106478714913289, "learning_rate": 9.052724074964789e-06, "loss": 0.5464, "step": 1654 }, { "epoch": 0.8048632218844984, "grad_norm": 0.07773557497453094, "learning_rate": 9.051602399639867e-06, "loss": 0.613, "step": 1655 }, { "epoch": 0.8053495440729483, "grad_norm": 0.07166948375181545, "learning_rate": 9.050480130193461e-06, "loss": 0.5715, "step": 1656 }, { "epoch": 0.8058358662613981, "grad_norm": 0.07029399058062628, "learning_rate": 9.049357266790143e-06, "loss": 0.5864, "step": 1657 }, { "epoch": 0.806322188449848, "grad_norm": 0.07442190646574257, "learning_rate": 9.048233809594561e-06, "loss": 0.6446, "step": 1658 }, { "epoch": 0.8068085106382978, "grad_norm": 0.07440493159559092, "learning_rate": 9.047109758771467e-06, "loss": 0.5952, "step": 1659 }, { "epoch": 0.8072948328267477, "grad_norm": 0.07314544206796571, "learning_rate": 9.04598511448568e-06, "loss": 0.5859, "step": 1660 }, { "epoch": 0.8077811550151975, "grad_norm": 0.06861782040904005, "learning_rate": 9.044859876902124e-06, "loss": 0.5673, "step": 1661 }, { "epoch": 0.8082674772036474, "grad_norm": 0.07227060176254103, "learning_rate": 9.043734046185799e-06, "loss": 0.5728, "step": 1662 }, { "epoch": 0.8087537993920972, "grad_norm": 0.07117328740849665, "learning_rate": 9.042607622501794e-06, "loss": 0.5513, "step": 1663 }, { "epoch": 0.8092401215805471, "grad_norm": 0.07466451582311796, "learning_rate": 9.04148060601529e-06, "loss": 0.5771, "step": 1664 }, { "epoch": 0.8097264437689969, "grad_norm": 0.07409395764568921, "learning_rate": 9.040352996891549e-06, "loss": 0.5923, "step": 1665 }, { "epoch": 0.8102127659574468, "grad_norm": 0.0727421232905243, "learning_rate": 9.039224795295923e-06, "loss": 0.5967, "step": 1666 }, { "epoch": 0.8106990881458966, "grad_norm": 0.07328631324290474, "learning_rate": 9.038096001393847e-06, "loss": 0.6252, "step": 1667 }, { "epoch": 0.8111854103343465, "grad_norm": 0.07561124672077546, "learning_rate": 9.036966615350848e-06, "loss": 0.5784, "step": 1668 }, { "epoch": 0.8116717325227963, "grad_norm": 0.07346062434985712, "learning_rate": 9.03583663733254e-06, "loss": 0.584, "step": 1669 }, { "epoch": 0.8121580547112462, "grad_norm": 0.07940070889841651, "learning_rate": 9.034706067504618e-06, "loss": 0.5788, "step": 1670 }, { "epoch": 0.812644376899696, "grad_norm": 0.07187568885659784, "learning_rate": 9.033574906032866e-06, "loss": 0.5919, "step": 1671 }, { "epoch": 0.813130699088146, "grad_norm": 0.06908340343975836, "learning_rate": 9.032443153083163e-06, "loss": 0.5803, "step": 1672 }, { "epoch": 0.8136170212765957, "grad_norm": 0.07355584516555978, "learning_rate": 9.03131080882146e-06, "loss": 0.583, "step": 1673 }, { "epoch": 0.8141033434650456, "grad_norm": 0.07423856338194974, "learning_rate": 9.030177873413806e-06, "loss": 0.5371, "step": 1674 }, { "epoch": 0.8145896656534954, "grad_norm": 0.0731960037760341, "learning_rate": 9.029044347026332e-06, "loss": 0.5673, "step": 1675 }, { "epoch": 0.8150759878419452, "grad_norm": 0.08038420495259692, "learning_rate": 9.02791022982526e-06, "loss": 0.5931, "step": 1676 }, { "epoch": 0.8155623100303951, "grad_norm": 0.07341009886182812, "learning_rate": 9.02677552197689e-06, "loss": 0.6173, "step": 1677 }, { "epoch": 0.8160486322188449, "grad_norm": 0.07212165853233529, "learning_rate": 9.025640223647616e-06, "loss": 0.5804, "step": 1678 }, { "epoch": 0.8165349544072948, "grad_norm": 0.07438445712530184, "learning_rate": 9.024504335003918e-06, "loss": 0.568, "step": 1679 }, { "epoch": 0.8170212765957446, "grad_norm": 0.07745216994949357, "learning_rate": 9.023367856212362e-06, "loss": 0.5893, "step": 1680 }, { "epoch": 0.8175075987841945, "grad_norm": 0.07782732242653724, "learning_rate": 9.022230787439597e-06, "loss": 0.5706, "step": 1681 }, { "epoch": 0.8179939209726443, "grad_norm": 0.07399645125005298, "learning_rate": 9.021093128852363e-06, "loss": 0.5641, "step": 1682 }, { "epoch": 0.8184802431610942, "grad_norm": 0.07774936464533533, "learning_rate": 9.019954880617486e-06, "loss": 0.5965, "step": 1683 }, { "epoch": 0.818966565349544, "grad_norm": 0.07350171443380829, "learning_rate": 9.018816042901873e-06, "loss": 0.5888, "step": 1684 }, { "epoch": 0.819452887537994, "grad_norm": 0.07296282203618629, "learning_rate": 9.017676615872524e-06, "loss": 0.578, "step": 1685 }, { "epoch": 0.8199392097264437, "grad_norm": 0.07458383558568267, "learning_rate": 9.016536599696524e-06, "loss": 0.5937, "step": 1686 }, { "epoch": 0.8204255319148936, "grad_norm": 0.07516328495212585, "learning_rate": 9.015395994541041e-06, "loss": 0.6027, "step": 1687 }, { "epoch": 0.8209118541033434, "grad_norm": 0.07760485343942312, "learning_rate": 9.014254800573334e-06, "loss": 0.5547, "step": 1688 }, { "epoch": 0.8213981762917933, "grad_norm": 0.07485184169756832, "learning_rate": 9.013113017960747e-06, "loss": 0.5717, "step": 1689 }, { "epoch": 0.8218844984802431, "grad_norm": 0.07375602770450967, "learning_rate": 9.011970646870706e-06, "loss": 0.5789, "step": 1690 }, { "epoch": 0.822370820668693, "grad_norm": 0.07592702782230341, "learning_rate": 9.01082768747073e-06, "loss": 0.5876, "step": 1691 }, { "epoch": 0.8228571428571428, "grad_norm": 0.07709204242085249, "learning_rate": 9.009684139928419e-06, "loss": 0.5919, "step": 1692 }, { "epoch": 0.8233434650455927, "grad_norm": 0.07344620360493821, "learning_rate": 9.00854000441146e-06, "loss": 0.5738, "step": 1693 }, { "epoch": 0.8238297872340425, "grad_norm": 0.07839384699549824, "learning_rate": 9.007395281087632e-06, "loss": 0.5888, "step": 1694 }, { "epoch": 0.8243161094224924, "grad_norm": 0.07553135863620744, "learning_rate": 9.006249970124793e-06, "loss": 0.6127, "step": 1695 }, { "epoch": 0.8248024316109422, "grad_norm": 0.07187484456694787, "learning_rate": 9.005104071690887e-06, "loss": 0.5475, "step": 1696 }, { "epoch": 0.8252887537993921, "grad_norm": 0.07390033022275086, "learning_rate": 9.00395758595395e-06, "loss": 0.5847, "step": 1697 }, { "epoch": 0.8257750759878419, "grad_norm": 0.07247525563929644, "learning_rate": 9.002810513082104e-06, "loss": 0.5889, "step": 1698 }, { "epoch": 0.8262613981762917, "grad_norm": 0.0772085413640557, "learning_rate": 9.00166285324355e-06, "loss": 0.6176, "step": 1699 }, { "epoch": 0.8267477203647416, "grad_norm": 0.08567194868303393, "learning_rate": 9.00051460660658e-06, "loss": 0.6075, "step": 1700 }, { "epoch": 0.8272340425531914, "grad_norm": 0.07746542664192362, "learning_rate": 8.999365773339573e-06, "loss": 0.5719, "step": 1701 }, { "epoch": 0.8277203647416413, "grad_norm": 0.07266215749314871, "learning_rate": 8.998216353610989e-06, "loss": 0.5798, "step": 1702 }, { "epoch": 0.8282066869300911, "grad_norm": 0.07253835215721129, "learning_rate": 8.99706634758938e-06, "loss": 0.5878, "step": 1703 }, { "epoch": 0.828693009118541, "grad_norm": 0.07824890088111018, "learning_rate": 8.995915755443382e-06, "loss": 0.6092, "step": 1704 }, { "epoch": 0.8291793313069908, "grad_norm": 0.0760099245256956, "learning_rate": 8.994764577341715e-06, "loss": 0.6274, "step": 1705 }, { "epoch": 0.8296656534954407, "grad_norm": 0.07046188661348363, "learning_rate": 8.993612813453186e-06, "loss": 0.5615, "step": 1706 }, { "epoch": 0.8301519756838905, "grad_norm": 0.07280733043950458, "learning_rate": 8.992460463946689e-06, "loss": 0.617, "step": 1707 }, { "epoch": 0.8306382978723404, "grad_norm": 0.07386435769644766, "learning_rate": 8.9913075289912e-06, "loss": 0.5753, "step": 1708 }, { "epoch": 0.8311246200607902, "grad_norm": 0.07236056195150549, "learning_rate": 8.99015400875579e-06, "loss": 0.5633, "step": 1709 }, { "epoch": 0.8316109422492401, "grad_norm": 0.07542819798329134, "learning_rate": 8.988999903409604e-06, "loss": 0.5836, "step": 1710 }, { "epoch": 0.8320972644376899, "grad_norm": 0.07747168818735184, "learning_rate": 8.987845213121879e-06, "loss": 0.6039, "step": 1711 }, { "epoch": 0.8325835866261398, "grad_norm": 0.077752119336714, "learning_rate": 8.986689938061938e-06, "loss": 0.6165, "step": 1712 }, { "epoch": 0.8330699088145896, "grad_norm": 0.07806058042030259, "learning_rate": 8.985534078399191e-06, "loss": 0.6548, "step": 1713 }, { "epoch": 0.8335562310030395, "grad_norm": 0.07553643279218694, "learning_rate": 8.98437763430313e-06, "loss": 0.6244, "step": 1714 }, { "epoch": 0.8340425531914893, "grad_norm": 0.07671067908323834, "learning_rate": 8.983220605943335e-06, "loss": 0.6183, "step": 1715 }, { "epoch": 0.8345288753799393, "grad_norm": 0.07741727677689454, "learning_rate": 8.98206299348947e-06, "loss": 0.6228, "step": 1716 }, { "epoch": 0.835015197568389, "grad_norm": 0.07216911966672036, "learning_rate": 8.980904797111287e-06, "loss": 0.5604, "step": 1717 }, { "epoch": 0.835501519756839, "grad_norm": 0.07440058651270015, "learning_rate": 8.97974601697862e-06, "loss": 0.5811, "step": 1718 }, { "epoch": 0.8359878419452887, "grad_norm": 0.0746064900492746, "learning_rate": 8.978586653261395e-06, "loss": 0.589, "step": 1719 }, { "epoch": 0.8364741641337387, "grad_norm": 0.07235270763835797, "learning_rate": 8.977426706129615e-06, "loss": 0.5899, "step": 1720 }, { "epoch": 0.8369604863221884, "grad_norm": 0.0752546348466607, "learning_rate": 8.976266175753376e-06, "loss": 0.6022, "step": 1721 }, { "epoch": 0.8374468085106384, "grad_norm": 0.0748912661462555, "learning_rate": 8.975105062302856e-06, "loss": 0.6094, "step": 1722 }, { "epoch": 0.8379331306990881, "grad_norm": 0.0733073753146529, "learning_rate": 8.973943365948318e-06, "loss": 0.5615, "step": 1723 }, { "epoch": 0.8384194528875379, "grad_norm": 0.0734476230232227, "learning_rate": 8.972781086860115e-06, "loss": 0.5753, "step": 1724 }, { "epoch": 0.8389057750759878, "grad_norm": 0.07633658867437207, "learning_rate": 8.971618225208678e-06, "loss": 0.6667, "step": 1725 }, { "epoch": 0.8393920972644376, "grad_norm": 0.07541740130586612, "learning_rate": 8.970454781164529e-06, "loss": 0.5845, "step": 1726 }, { "epoch": 0.8398784194528875, "grad_norm": 0.07414936120986003, "learning_rate": 8.969290754898272e-06, "loss": 0.5696, "step": 1727 }, { "epoch": 0.8403647416413373, "grad_norm": 0.07226168570528396, "learning_rate": 8.968126146580602e-06, "loss": 0.5913, "step": 1728 }, { "epoch": 0.8408510638297872, "grad_norm": 0.07042620815515654, "learning_rate": 8.966960956382293e-06, "loss": 0.5856, "step": 1729 }, { "epoch": 0.841337386018237, "grad_norm": 0.07296803356505525, "learning_rate": 8.965795184474209e-06, "loss": 0.6134, "step": 1730 }, { "epoch": 0.841823708206687, "grad_norm": 0.07427100045337967, "learning_rate": 8.964628831027296e-06, "loss": 0.5921, "step": 1731 }, { "epoch": 0.8423100303951367, "grad_norm": 0.07245783357819416, "learning_rate": 8.963461896212585e-06, "loss": 0.5937, "step": 1732 }, { "epoch": 0.8427963525835866, "grad_norm": 0.0761264904307955, "learning_rate": 8.962294380201195e-06, "loss": 0.5871, "step": 1733 }, { "epoch": 0.8432826747720364, "grad_norm": 0.07544877652570996, "learning_rate": 8.961126283164328e-06, "loss": 0.5959, "step": 1734 }, { "epoch": 0.8437689969604864, "grad_norm": 0.07599806425823115, "learning_rate": 8.959957605273274e-06, "loss": 0.5895, "step": 1735 }, { "epoch": 0.8442553191489361, "grad_norm": 0.0770339060887551, "learning_rate": 8.958788346699405e-06, "loss": 0.5711, "step": 1736 }, { "epoch": 0.844741641337386, "grad_norm": 0.07250882644105865, "learning_rate": 8.957618507614182e-06, "loss": 0.5773, "step": 1737 }, { "epoch": 0.8452279635258358, "grad_norm": 0.07002626608661978, "learning_rate": 8.956448088189144e-06, "loss": 0.5506, "step": 1738 }, { "epoch": 0.8457142857142858, "grad_norm": 0.07628888894734462, "learning_rate": 8.955277088595924e-06, "loss": 0.605, "step": 1739 }, { "epoch": 0.8462006079027355, "grad_norm": 0.07702051631590402, "learning_rate": 8.954105509006235e-06, "loss": 0.6031, "step": 1740 }, { "epoch": 0.8466869300911855, "grad_norm": 0.07260300281865754, "learning_rate": 8.952933349591872e-06, "loss": 0.5501, "step": 1741 }, { "epoch": 0.8471732522796352, "grad_norm": 0.07389498738809527, "learning_rate": 8.951760610524725e-06, "loss": 0.6181, "step": 1742 }, { "epoch": 0.8476595744680852, "grad_norm": 0.07062573382571884, "learning_rate": 8.950587291976758e-06, "loss": 0.5402, "step": 1743 }, { "epoch": 0.848145896656535, "grad_norm": 0.07350470206207332, "learning_rate": 8.949413394120026e-06, "loss": 0.5595, "step": 1744 }, { "epoch": 0.8486322188449849, "grad_norm": 0.07699941052844284, "learning_rate": 8.94823891712667e-06, "loss": 0.5917, "step": 1745 }, { "epoch": 0.8491185410334346, "grad_norm": 0.0741770418789494, "learning_rate": 8.94706386116891e-06, "loss": 0.5941, "step": 1746 }, { "epoch": 0.8496048632218844, "grad_norm": 0.0759476218643951, "learning_rate": 8.945888226419056e-06, "loss": 0.6261, "step": 1747 }, { "epoch": 0.8500911854103343, "grad_norm": 0.07298008462158603, "learning_rate": 8.944712013049505e-06, "loss": 0.5632, "step": 1748 }, { "epoch": 0.8505775075987841, "grad_norm": 0.07334761932590854, "learning_rate": 8.943535221232731e-06, "loss": 0.5938, "step": 1749 }, { "epoch": 0.851063829787234, "grad_norm": 0.08062886925564591, "learning_rate": 8.9423578511413e-06, "loss": 0.6011, "step": 1750 }, { "epoch": 0.8515501519756838, "grad_norm": 0.07387128105610562, "learning_rate": 8.941179902947856e-06, "loss": 0.5856, "step": 1751 }, { "epoch": 0.8520364741641338, "grad_norm": 0.07063102885367174, "learning_rate": 8.940001376825136e-06, "loss": 0.567, "step": 1752 }, { "epoch": 0.8525227963525835, "grad_norm": 0.07809396720464015, "learning_rate": 8.938822272945956e-06, "loss": 0.647, "step": 1753 }, { "epoch": 0.8530091185410335, "grad_norm": 0.07265612064070369, "learning_rate": 8.937642591483218e-06, "loss": 0.5822, "step": 1754 }, { "epoch": 0.8534954407294832, "grad_norm": 0.07511471917960437, "learning_rate": 8.936462332609907e-06, "loss": 0.6235, "step": 1755 }, { "epoch": 0.8539817629179332, "grad_norm": 0.07179712358551885, "learning_rate": 8.935281496499098e-06, "loss": 0.6184, "step": 1756 }, { "epoch": 0.854468085106383, "grad_norm": 0.07384253094153809, "learning_rate": 8.934100083323945e-06, "loss": 0.6146, "step": 1757 }, { "epoch": 0.8549544072948329, "grad_norm": 0.07414507661436152, "learning_rate": 8.93291809325769e-06, "loss": 0.6134, "step": 1758 }, { "epoch": 0.8554407294832826, "grad_norm": 0.07291158653408171, "learning_rate": 8.931735526473657e-06, "loss": 0.6224, "step": 1759 }, { "epoch": 0.8559270516717326, "grad_norm": 0.07800586892031393, "learning_rate": 8.93055238314526e-06, "loss": 0.577, "step": 1760 }, { "epoch": 0.8564133738601823, "grad_norm": 0.07381345121072054, "learning_rate": 8.929368663445985e-06, "loss": 0.597, "step": 1761 }, { "epoch": 0.8568996960486323, "grad_norm": 0.07313500176625996, "learning_rate": 8.92818436754942e-06, "loss": 0.5374, "step": 1762 }, { "epoch": 0.857386018237082, "grad_norm": 0.07754940049868893, "learning_rate": 8.926999495629225e-06, "loss": 0.5767, "step": 1763 }, { "epoch": 0.857872340425532, "grad_norm": 0.07685640896181924, "learning_rate": 8.925814047859147e-06, "loss": 0.6029, "step": 1764 }, { "epoch": 0.8583586626139817, "grad_norm": 0.08123464359078096, "learning_rate": 8.92462802441302e-06, "loss": 0.615, "step": 1765 }, { "epoch": 0.8588449848024317, "grad_norm": 0.07521549285135744, "learning_rate": 8.92344142546476e-06, "loss": 0.5603, "step": 1766 }, { "epoch": 0.8593313069908814, "grad_norm": 0.07407625619133144, "learning_rate": 8.92225425118837e-06, "loss": 0.5616, "step": 1767 }, { "epoch": 0.8598176291793314, "grad_norm": 0.07479406319718562, "learning_rate": 8.92106650175793e-06, "loss": 0.5815, "step": 1768 }, { "epoch": 0.8603039513677812, "grad_norm": 0.0775809123259529, "learning_rate": 8.919878177347619e-06, "loss": 0.5831, "step": 1769 }, { "epoch": 0.8607902735562311, "grad_norm": 0.07290792413533646, "learning_rate": 8.918689278131684e-06, "loss": 0.5787, "step": 1770 }, { "epoch": 0.8612765957446809, "grad_norm": 0.07430533803119772, "learning_rate": 8.917499804284466e-06, "loss": 0.607, "step": 1771 }, { "epoch": 0.8617629179331306, "grad_norm": 0.07137457198774201, "learning_rate": 8.91630975598039e-06, "loss": 0.616, "step": 1772 }, { "epoch": 0.8622492401215806, "grad_norm": 0.0732224722333857, "learning_rate": 8.91511913339396e-06, "loss": 0.5953, "step": 1773 }, { "epoch": 0.8627355623100303, "grad_norm": 0.07713203812817963, "learning_rate": 8.913927936699765e-06, "loss": 0.5919, "step": 1774 }, { "epoch": 0.8632218844984803, "grad_norm": 0.07125787615846159, "learning_rate": 8.912736166072487e-06, "loss": 0.5611, "step": 1775 }, { "epoch": 0.86370820668693, "grad_norm": 0.0790858738192093, "learning_rate": 8.91154382168688e-06, "loss": 0.6164, "step": 1776 }, { "epoch": 0.86419452887538, "grad_norm": 0.07457881168675248, "learning_rate": 8.910350903717793e-06, "loss": 0.5762, "step": 1777 }, { "epoch": 0.8646808510638297, "grad_norm": 0.07382250428158418, "learning_rate": 8.90915741234015e-06, "loss": 0.5919, "step": 1778 }, { "epoch": 0.8651671732522797, "grad_norm": 0.07145584394044313, "learning_rate": 8.907963347728964e-06, "loss": 0.5835, "step": 1779 }, { "epoch": 0.8656534954407294, "grad_norm": 0.07250899102574372, "learning_rate": 8.90676871005933e-06, "loss": 0.5662, "step": 1780 }, { "epoch": 0.8661398176291794, "grad_norm": 0.07389342039749239, "learning_rate": 8.90557349950643e-06, "loss": 0.5811, "step": 1781 }, { "epoch": 0.8666261398176291, "grad_norm": 0.0755531278875473, "learning_rate": 8.904377716245525e-06, "loss": 0.5741, "step": 1782 }, { "epoch": 0.867112462006079, "grad_norm": 0.07300211455686316, "learning_rate": 8.903181360451966e-06, "loss": 0.5679, "step": 1783 }, { "epoch": 0.8675987841945288, "grad_norm": 0.07375435548275153, "learning_rate": 8.901984432301185e-06, "loss": 0.5664, "step": 1784 }, { "epoch": 0.8680851063829788, "grad_norm": 0.07192587016348345, "learning_rate": 8.900786931968696e-06, "loss": 0.594, "step": 1785 }, { "epoch": 0.8685714285714285, "grad_norm": 0.07245213201990867, "learning_rate": 8.899588859630102e-06, "loss": 0.553, "step": 1786 }, { "epoch": 0.8690577507598785, "grad_norm": 0.0794118055625682, "learning_rate": 8.89839021546108e-06, "loss": 0.5478, "step": 1787 }, { "epoch": 0.8695440729483283, "grad_norm": 0.07104283997778915, "learning_rate": 8.897190999637406e-06, "loss": 0.5407, "step": 1788 }, { "epoch": 0.8700303951367782, "grad_norm": 0.07308588559191222, "learning_rate": 8.895991212334927e-06, "loss": 0.594, "step": 1789 }, { "epoch": 0.870516717325228, "grad_norm": 0.07813308134028296, "learning_rate": 8.894790853729577e-06, "loss": 0.555, "step": 1790 }, { "epoch": 0.8710030395136779, "grad_norm": 0.07810939367623708, "learning_rate": 8.893589923997379e-06, "loss": 0.5732, "step": 1791 }, { "epoch": 0.8714893617021277, "grad_norm": 0.07322235690887234, "learning_rate": 8.892388423314431e-06, "loss": 0.5763, "step": 1792 }, { "epoch": 0.8719756838905776, "grad_norm": 0.07304298460722047, "learning_rate": 8.891186351856923e-06, "loss": 0.57, "step": 1793 }, { "epoch": 0.8724620060790274, "grad_norm": 0.07747438261036639, "learning_rate": 8.889983709801123e-06, "loss": 0.6369, "step": 1794 }, { "epoch": 0.8729483282674773, "grad_norm": 0.07512815424485461, "learning_rate": 8.888780497323386e-06, "loss": 0.57, "step": 1795 }, { "epoch": 0.873434650455927, "grad_norm": 0.07214961704738479, "learning_rate": 8.88757671460015e-06, "loss": 0.5652, "step": 1796 }, { "epoch": 0.8739209726443768, "grad_norm": 0.07157004904071518, "learning_rate": 8.886372361807933e-06, "loss": 0.5685, "step": 1797 }, { "epoch": 0.8744072948328268, "grad_norm": 0.07291528381801188, "learning_rate": 8.885167439123343e-06, "loss": 0.5735, "step": 1798 }, { "epoch": 0.8748936170212765, "grad_norm": 0.07064590011520856, "learning_rate": 8.883961946723067e-06, "loss": 0.5771, "step": 1799 }, { "epoch": 0.8753799392097265, "grad_norm": 0.07454442816844008, "learning_rate": 8.882755884783877e-06, "loss": 0.6097, "step": 1800 }, { "epoch": 0.8758662613981762, "grad_norm": 0.08336435374338261, "learning_rate": 8.88154925348263e-06, "loss": 0.6007, "step": 1801 }, { "epoch": 0.8763525835866262, "grad_norm": 0.07194424430808544, "learning_rate": 8.88034205299626e-06, "loss": 0.5881, "step": 1802 }, { "epoch": 0.876838905775076, "grad_norm": 0.07217333243591798, "learning_rate": 8.879134283501791e-06, "loss": 0.5849, "step": 1803 }, { "epoch": 0.8773252279635259, "grad_norm": 0.07407238318024906, "learning_rate": 8.877925945176333e-06, "loss": 0.5794, "step": 1804 }, { "epoch": 0.8778115501519757, "grad_norm": 0.07417279552340213, "learning_rate": 8.876717038197072e-06, "loss": 0.5495, "step": 1805 }, { "epoch": 0.8782978723404256, "grad_norm": 0.07373951036618491, "learning_rate": 8.875507562741278e-06, "loss": 0.6046, "step": 1806 }, { "epoch": 0.8787841945288754, "grad_norm": 0.07475959022208804, "learning_rate": 8.87429751898631e-06, "loss": 0.604, "step": 1807 }, { "epoch": 0.8792705167173253, "grad_norm": 0.07086238419697516, "learning_rate": 8.873086907109608e-06, "loss": 0.5601, "step": 1808 }, { "epoch": 0.879756838905775, "grad_norm": 0.07290959201460409, "learning_rate": 8.87187572728869e-06, "loss": 0.588, "step": 1809 }, { "epoch": 0.880243161094225, "grad_norm": 0.07330452794101329, "learning_rate": 8.870663979701167e-06, "loss": 0.6058, "step": 1810 }, { "epoch": 0.8807294832826748, "grad_norm": 0.0734147200310214, "learning_rate": 8.869451664524725e-06, "loss": 0.5935, "step": 1811 }, { "epoch": 0.8812158054711247, "grad_norm": 0.07043841596878565, "learning_rate": 8.868238781937137e-06, "loss": 0.5311, "step": 1812 }, { "epoch": 0.8817021276595745, "grad_norm": 0.07232977526210412, "learning_rate": 8.867025332116259e-06, "loss": 0.5639, "step": 1813 }, { "epoch": 0.8821884498480244, "grad_norm": 0.07112890099021922, "learning_rate": 8.865811315240027e-06, "loss": 0.5751, "step": 1814 }, { "epoch": 0.8826747720364742, "grad_norm": 0.07345759793344649, "learning_rate": 8.864596731486466e-06, "loss": 0.5852, "step": 1815 }, { "epoch": 0.8831610942249241, "grad_norm": 0.07661329800703762, "learning_rate": 8.86338158103368e-06, "loss": 0.6206, "step": 1816 }, { "epoch": 0.8836474164133739, "grad_norm": 0.08308756505901159, "learning_rate": 8.862165864059857e-06, "loss": 0.6262, "step": 1817 }, { "epoch": 0.8841337386018238, "grad_norm": 0.07392341845122752, "learning_rate": 8.860949580743267e-06, "loss": 0.588, "step": 1818 }, { "epoch": 0.8846200607902736, "grad_norm": 0.07144143023133331, "learning_rate": 8.859732731262268e-06, "loss": 0.5795, "step": 1819 }, { "epoch": 0.8851063829787233, "grad_norm": 0.07396038022740108, "learning_rate": 8.85851531579529e-06, "loss": 0.5772, "step": 1820 }, { "epoch": 0.8855927051671733, "grad_norm": 0.07688288033822276, "learning_rate": 8.857297334520859e-06, "loss": 0.5916, "step": 1821 }, { "epoch": 0.886079027355623, "grad_norm": 0.078087355099266, "learning_rate": 8.856078787617577e-06, "loss": 0.6095, "step": 1822 }, { "epoch": 0.886565349544073, "grad_norm": 0.07155526097783196, "learning_rate": 8.854859675264129e-06, "loss": 0.6092, "step": 1823 }, { "epoch": 0.8870516717325228, "grad_norm": 0.07213763377542855, "learning_rate": 8.853639997639282e-06, "loss": 0.5855, "step": 1824 }, { "epoch": 0.8875379939209727, "grad_norm": 0.07371499925541963, "learning_rate": 8.852419754921894e-06, "loss": 0.5883, "step": 1825 }, { "epoch": 0.8880243161094225, "grad_norm": 0.0741751389661036, "learning_rate": 8.851198947290895e-06, "loss": 0.5931, "step": 1826 }, { "epoch": 0.8885106382978724, "grad_norm": 0.07535910264684668, "learning_rate": 8.849977574925302e-06, "loss": 0.5968, "step": 1827 }, { "epoch": 0.8889969604863222, "grad_norm": 0.07431349323625044, "learning_rate": 8.848755638004217e-06, "loss": 0.6184, "step": 1828 }, { "epoch": 0.8894832826747721, "grad_norm": 0.07177952381151019, "learning_rate": 8.847533136706826e-06, "loss": 0.5686, "step": 1829 }, { "epoch": 0.8899696048632219, "grad_norm": 0.072367975354459, "learning_rate": 8.846310071212392e-06, "loss": 0.6207, "step": 1830 }, { "epoch": 0.8904559270516718, "grad_norm": 0.07935903680649259, "learning_rate": 8.845086441700261e-06, "loss": 0.5528, "step": 1831 }, { "epoch": 0.8909422492401216, "grad_norm": 0.07482355604954846, "learning_rate": 8.843862248349868e-06, "loss": 0.5717, "step": 1832 }, { "epoch": 0.8914285714285715, "grad_norm": 0.07275913321947461, "learning_rate": 8.842637491340728e-06, "loss": 0.5623, "step": 1833 }, { "epoch": 0.8919148936170213, "grad_norm": 0.0757380783011119, "learning_rate": 8.841412170852435e-06, "loss": 0.6116, "step": 1834 }, { "epoch": 0.8924012158054712, "grad_norm": 0.07625820826312853, "learning_rate": 8.840186287064669e-06, "loss": 0.6419, "step": 1835 }, { "epoch": 0.892887537993921, "grad_norm": 0.07272042793767222, "learning_rate": 8.838959840157192e-06, "loss": 0.5481, "step": 1836 }, { "epoch": 0.8933738601823709, "grad_norm": 0.08169984702086527, "learning_rate": 8.837732830309848e-06, "loss": 0.6204, "step": 1837 }, { "epoch": 0.8938601823708207, "grad_norm": 0.07427025854834735, "learning_rate": 8.836505257702565e-06, "loss": 0.6099, "step": 1838 }, { "epoch": 0.8943465045592706, "grad_norm": 0.07550707716624953, "learning_rate": 8.835277122515354e-06, "loss": 0.6247, "step": 1839 }, { "epoch": 0.8948328267477204, "grad_norm": 0.07315107499038113, "learning_rate": 8.834048424928305e-06, "loss": 0.6008, "step": 1840 }, { "epoch": 0.8953191489361703, "grad_norm": 0.07233291013245507, "learning_rate": 8.832819165121594e-06, "loss": 0.5487, "step": 1841 }, { "epoch": 0.8958054711246201, "grad_norm": 0.07163137146747516, "learning_rate": 8.831589343275474e-06, "loss": 0.5546, "step": 1842 }, { "epoch": 0.89629179331307, "grad_norm": 0.07416546849099498, "learning_rate": 8.83035895957029e-06, "loss": 0.5731, "step": 1843 }, { "epoch": 0.8967781155015198, "grad_norm": 0.07687206384413069, "learning_rate": 8.829128014186458e-06, "loss": 0.6126, "step": 1844 }, { "epoch": 0.8972644376899696, "grad_norm": 0.07482452758160005, "learning_rate": 8.827896507304488e-06, "loss": 0.6352, "step": 1845 }, { "epoch": 0.8977507598784195, "grad_norm": 0.075647536473654, "learning_rate": 8.826664439104964e-06, "loss": 0.5759, "step": 1846 }, { "epoch": 0.8982370820668693, "grad_norm": 0.07240884129541364, "learning_rate": 8.825431809768554e-06, "loss": 0.5748, "step": 1847 }, { "epoch": 0.8987234042553192, "grad_norm": 0.07389696423901282, "learning_rate": 8.82419861947601e-06, "loss": 0.5713, "step": 1848 }, { "epoch": 0.899209726443769, "grad_norm": 0.07428749474448496, "learning_rate": 8.822964868408164e-06, "loss": 0.611, "step": 1849 }, { "epoch": 0.8996960486322189, "grad_norm": 0.07690624039589869, "learning_rate": 8.821730556745933e-06, "loss": 0.5923, "step": 1850 }, { "epoch": 0.9001823708206687, "grad_norm": 0.07270364688051707, "learning_rate": 8.820495684670315e-06, "loss": 0.5916, "step": 1851 }, { "epoch": 0.9006686930091186, "grad_norm": 0.07312127487499655, "learning_rate": 8.81926025236239e-06, "loss": 0.6308, "step": 1852 }, { "epoch": 0.9011550151975684, "grad_norm": 0.07146416378388792, "learning_rate": 8.818024260003319e-06, "loss": 0.5711, "step": 1853 }, { "epoch": 0.9016413373860183, "grad_norm": 0.07130166608385094, "learning_rate": 8.816787707774347e-06, "loss": 0.5655, "step": 1854 }, { "epoch": 0.902127659574468, "grad_norm": 0.07316680537191218, "learning_rate": 8.8155505958568e-06, "loss": 0.5887, "step": 1855 }, { "epoch": 0.902613981762918, "grad_norm": 0.07480706657946969, "learning_rate": 8.814312924432086e-06, "loss": 0.5341, "step": 1856 }, { "epoch": 0.9031003039513678, "grad_norm": 0.07755230340098143, "learning_rate": 8.813074693681697e-06, "loss": 0.6117, "step": 1857 }, { "epoch": 0.9035866261398177, "grad_norm": 0.07541342156250899, "learning_rate": 8.811835903787204e-06, "loss": 0.604, "step": 1858 }, { "epoch": 0.9040729483282675, "grad_norm": 0.07906610477805168, "learning_rate": 8.810596554930262e-06, "loss": 0.6164, "step": 1859 }, { "epoch": 0.9045592705167174, "grad_norm": 0.07350433657886936, "learning_rate": 8.809356647292609e-06, "loss": 0.6049, "step": 1860 }, { "epoch": 0.9050455927051672, "grad_norm": 0.07512495990679414, "learning_rate": 8.808116181056059e-06, "loss": 0.6135, "step": 1861 }, { "epoch": 0.9055319148936171, "grad_norm": 0.07308124586475026, "learning_rate": 8.806875156402516e-06, "loss": 0.6053, "step": 1862 }, { "epoch": 0.9060182370820669, "grad_norm": 0.07131909029016202, "learning_rate": 8.805633573513962e-06, "loss": 0.5879, "step": 1863 }, { "epoch": 0.9065045592705168, "grad_norm": 0.07422709663903333, "learning_rate": 8.804391432572459e-06, "loss": 0.5604, "step": 1864 }, { "epoch": 0.9069908814589666, "grad_norm": 0.08205489241017076, "learning_rate": 8.803148733760155e-06, "loss": 0.6801, "step": 1865 }, { "epoch": 0.9074772036474165, "grad_norm": 0.07391652391123428, "learning_rate": 8.801905477259276e-06, "loss": 0.6153, "step": 1866 }, { "epoch": 0.9079635258358663, "grad_norm": 0.07077010357824341, "learning_rate": 8.800661663252134e-06, "loss": 0.553, "step": 1867 }, { "epoch": 0.9084498480243162, "grad_norm": 0.08035645758146163, "learning_rate": 8.799417291921117e-06, "loss": 0.5557, "step": 1868 }, { "epoch": 0.908936170212766, "grad_norm": 0.06992597126663248, "learning_rate": 8.7981723634487e-06, "loss": 0.5294, "step": 1869 }, { "epoch": 0.9094224924012158, "grad_norm": 0.07058614228000314, "learning_rate": 8.796926878017438e-06, "loss": 0.5879, "step": 1870 }, { "epoch": 0.9099088145896657, "grad_norm": 0.07192381102857433, "learning_rate": 8.795680835809964e-06, "loss": 0.5711, "step": 1871 }, { "epoch": 0.9103951367781155, "grad_norm": 0.07045862120870869, "learning_rate": 8.794434237009e-06, "loss": 0.6138, "step": 1872 }, { "epoch": 0.9108814589665654, "grad_norm": 0.0736511925101877, "learning_rate": 8.793187081797343e-06, "loss": 0.5503, "step": 1873 }, { "epoch": 0.9113677811550152, "grad_norm": 0.07689011225703497, "learning_rate": 8.791939370357876e-06, "loss": 0.5961, "step": 1874 }, { "epoch": 0.9118541033434651, "grad_norm": 0.07242971371312248, "learning_rate": 8.790691102873558e-06, "loss": 0.5748, "step": 1875 }, { "epoch": 0.9123404255319149, "grad_norm": 0.07310194234505171, "learning_rate": 8.789442279527438e-06, "loss": 0.5921, "step": 1876 }, { "epoch": 0.9128267477203648, "grad_norm": 0.07194543576831185, "learning_rate": 8.78819290050264e-06, "loss": 0.5741, "step": 1877 }, { "epoch": 0.9133130699088146, "grad_norm": 0.07332708608921876, "learning_rate": 8.78694296598237e-06, "loss": 0.6284, "step": 1878 }, { "epoch": 0.9137993920972645, "grad_norm": 0.08254463870559857, "learning_rate": 8.785692476149918e-06, "loss": 0.6006, "step": 1879 }, { "epoch": 0.9142857142857143, "grad_norm": 0.07031572348799323, "learning_rate": 8.784441431188653e-06, "loss": 0.5916, "step": 1880 }, { "epoch": 0.9147720364741642, "grad_norm": 0.06844342078713918, "learning_rate": 8.783189831282028e-06, "loss": 0.5525, "step": 1881 }, { "epoch": 0.915258358662614, "grad_norm": 0.07299935616393388, "learning_rate": 8.781937676613577e-06, "loss": 0.5618, "step": 1882 }, { "epoch": 0.9157446808510639, "grad_norm": 0.07430506819954745, "learning_rate": 8.78068496736691e-06, "loss": 0.6192, "step": 1883 }, { "epoch": 0.9162310030395137, "grad_norm": 0.07721377115803853, "learning_rate": 8.779431703725726e-06, "loss": 0.6015, "step": 1884 }, { "epoch": 0.9167173252279636, "grad_norm": 0.07548748738965665, "learning_rate": 8.7781778858738e-06, "loss": 0.5937, "step": 1885 }, { "epoch": 0.9172036474164134, "grad_norm": 0.07204255191539881, "learning_rate": 8.776923513994993e-06, "loss": 0.5508, "step": 1886 }, { "epoch": 0.9176899696048633, "grad_norm": 0.08560770198995425, "learning_rate": 8.77566858827324e-06, "loss": 0.6626, "step": 1887 }, { "epoch": 0.9181762917933131, "grad_norm": 0.07371661573832375, "learning_rate": 8.774413108892566e-06, "loss": 0.5572, "step": 1888 }, { "epoch": 0.918662613981763, "grad_norm": 0.07416826120355921, "learning_rate": 8.77315707603707e-06, "loss": 0.5573, "step": 1889 }, { "epoch": 0.9191489361702128, "grad_norm": 0.07105055457362772, "learning_rate": 8.771900489890936e-06, "loss": 0.5506, "step": 1890 }, { "epoch": 0.9196352583586627, "grad_norm": 0.08067359575114463, "learning_rate": 8.770643350638428e-06, "loss": 0.5687, "step": 1891 }, { "epoch": 0.9201215805471125, "grad_norm": 0.07377436823297279, "learning_rate": 8.76938565846389e-06, "loss": 0.6047, "step": 1892 }, { "epoch": 0.9206079027355623, "grad_norm": 0.0806401297058479, "learning_rate": 8.768127413551753e-06, "loss": 0.6379, "step": 1893 }, { "epoch": 0.9210942249240122, "grad_norm": 0.0738382309456317, "learning_rate": 8.766868616086517e-06, "loss": 0.599, "step": 1894 }, { "epoch": 0.921580547112462, "grad_norm": 0.07237935229265387, "learning_rate": 8.765609266252775e-06, "loss": 0.5709, "step": 1895 }, { "epoch": 0.9220668693009119, "grad_norm": 0.07745783967681805, "learning_rate": 8.764349364235197e-06, "loss": 0.5959, "step": 1896 }, { "epoch": 0.9225531914893617, "grad_norm": 0.07219069912436764, "learning_rate": 8.763088910218528e-06, "loss": 0.6246, "step": 1897 }, { "epoch": 0.9230395136778116, "grad_norm": 0.07165332905833564, "learning_rate": 8.761827904387608e-06, "loss": 0.5744, "step": 1898 }, { "epoch": 0.9235258358662614, "grad_norm": 0.07390910334111164, "learning_rate": 8.76056634692734e-06, "loss": 0.5627, "step": 1899 }, { "epoch": 0.9240121580547113, "grad_norm": 0.07575407455272574, "learning_rate": 8.759304238022723e-06, "loss": 0.5885, "step": 1900 }, { "epoch": 0.9244984802431611, "grad_norm": 0.07666566543189379, "learning_rate": 8.75804157785883e-06, "loss": 0.5774, "step": 1901 }, { "epoch": 0.924984802431611, "grad_norm": 0.07653634702640007, "learning_rate": 8.756778366620814e-06, "loss": 0.6036, "step": 1902 }, { "epoch": 0.9254711246200608, "grad_norm": 0.07172263865615873, "learning_rate": 8.755514604493912e-06, "loss": 0.5956, "step": 1903 }, { "epoch": 0.9259574468085107, "grad_norm": 0.0837717654353363, "learning_rate": 8.754250291663439e-06, "loss": 0.5936, "step": 1904 }, { "epoch": 0.9264437689969605, "grad_norm": 0.0787432784764435, "learning_rate": 8.752985428314795e-06, "loss": 0.5944, "step": 1905 }, { "epoch": 0.9269300911854104, "grad_norm": 0.0733548556385432, "learning_rate": 8.751720014633454e-06, "loss": 0.598, "step": 1906 }, { "epoch": 0.9274164133738602, "grad_norm": 0.0739171245461002, "learning_rate": 8.750454050804978e-06, "loss": 0.6018, "step": 1907 }, { "epoch": 0.9279027355623101, "grad_norm": 0.07877341419849072, "learning_rate": 8.749187537015003e-06, "loss": 0.552, "step": 1908 }, { "epoch": 0.9283890577507599, "grad_norm": 0.07783256628409974, "learning_rate": 8.747920473449252e-06, "loss": 0.5797, "step": 1909 }, { "epoch": 0.9288753799392098, "grad_norm": 0.07360927254570707, "learning_rate": 8.746652860293523e-06, "loss": 0.5827, "step": 1910 }, { "epoch": 0.9293617021276596, "grad_norm": 0.07427300754756623, "learning_rate": 8.745384697733699e-06, "loss": 0.5693, "step": 1911 }, { "epoch": 0.9298480243161095, "grad_norm": 0.0769733477434556, "learning_rate": 8.744115985955738e-06, "loss": 0.6133, "step": 1912 }, { "epoch": 0.9303343465045593, "grad_norm": 0.07271782174438762, "learning_rate": 8.74284672514569e-06, "loss": 0.5804, "step": 1913 }, { "epoch": 0.9308206686930092, "grad_norm": 0.07846413106062895, "learning_rate": 8.74157691548967e-06, "loss": 0.6565, "step": 1914 }, { "epoch": 0.931306990881459, "grad_norm": 0.07377507142002467, "learning_rate": 8.740306557173881e-06, "loss": 0.5529, "step": 1915 }, { "epoch": 0.9317933130699089, "grad_norm": 0.0742597999791996, "learning_rate": 8.739035650384612e-06, "loss": 0.5608, "step": 1916 }, { "epoch": 0.9322796352583587, "grad_norm": 0.0751723870957562, "learning_rate": 8.737764195308226e-06, "loss": 0.5805, "step": 1917 }, { "epoch": 0.9327659574468085, "grad_norm": 0.0790512521332783, "learning_rate": 8.736492192131164e-06, "loss": 0.6095, "step": 1918 }, { "epoch": 0.9332522796352584, "grad_norm": 0.07406014315950632, "learning_rate": 8.735219641039953e-06, "loss": 0.5523, "step": 1919 }, { "epoch": 0.9337386018237082, "grad_norm": 0.07226407226743525, "learning_rate": 8.733946542221198e-06, "loss": 0.5418, "step": 1920 }, { "epoch": 0.9342249240121581, "grad_norm": 0.07279557859791015, "learning_rate": 8.732672895861585e-06, "loss": 0.5927, "step": 1921 }, { "epoch": 0.9347112462006079, "grad_norm": 0.07253148743378941, "learning_rate": 8.731398702147877e-06, "loss": 0.5738, "step": 1922 }, { "epoch": 0.9351975683890578, "grad_norm": 0.07304467265066063, "learning_rate": 8.730123961266923e-06, "loss": 0.5803, "step": 1923 }, { "epoch": 0.9356838905775076, "grad_norm": 0.07322965206078319, "learning_rate": 8.72884867340565e-06, "loss": 0.5611, "step": 1924 }, { "epoch": 0.9361702127659575, "grad_norm": 0.07608127855139726, "learning_rate": 8.727572838751062e-06, "loss": 0.5797, "step": 1925 }, { "epoch": 0.9366565349544073, "grad_norm": 0.07303585133740899, "learning_rate": 8.726296457490246e-06, "loss": 0.6106, "step": 1926 }, { "epoch": 0.9371428571428572, "grad_norm": 0.07045183741301447, "learning_rate": 8.72501952981037e-06, "loss": 0.5349, "step": 1927 }, { "epoch": 0.937629179331307, "grad_norm": 0.07306198098723882, "learning_rate": 8.723742055898681e-06, "loss": 0.5947, "step": 1928 }, { "epoch": 0.9381155015197569, "grad_norm": 0.0740225934736366, "learning_rate": 8.722464035942505e-06, "loss": 0.5521, "step": 1929 }, { "epoch": 0.9386018237082067, "grad_norm": 0.07067908994654054, "learning_rate": 8.721185470129248e-06, "loss": 0.5606, "step": 1930 }, { "epoch": 0.9390881458966566, "grad_norm": 0.07385786456444564, "learning_rate": 8.7199063586464e-06, "loss": 0.5872, "step": 1931 }, { "epoch": 0.9395744680851064, "grad_norm": 0.0842922501458967, "learning_rate": 8.718626701681527e-06, "loss": 0.5742, "step": 1932 }, { "epoch": 0.9400607902735563, "grad_norm": 0.06953351399551343, "learning_rate": 8.717346499422275e-06, "loss": 0.5383, "step": 1933 }, { "epoch": 0.9405471124620061, "grad_norm": 0.074435973311784, "learning_rate": 8.716065752056373e-06, "loss": 0.5868, "step": 1934 }, { "epoch": 0.941033434650456, "grad_norm": 0.07414631919745308, "learning_rate": 8.714784459771626e-06, "loss": 0.6038, "step": 1935 }, { "epoch": 0.9415197568389058, "grad_norm": 0.07477347706541111, "learning_rate": 8.713502622755924e-06, "loss": 0.6034, "step": 1936 }, { "epoch": 0.9420060790273557, "grad_norm": 0.0767264112357078, "learning_rate": 8.712220241197232e-06, "loss": 0.5866, "step": 1937 }, { "epoch": 0.9424924012158055, "grad_norm": 0.07238657065157811, "learning_rate": 8.710937315283594e-06, "loss": 0.5953, "step": 1938 }, { "epoch": 0.9429787234042554, "grad_norm": 0.07567420200802548, "learning_rate": 8.709653845203141e-06, "loss": 0.5919, "step": 1939 }, { "epoch": 0.9434650455927052, "grad_norm": 0.07368377062051222, "learning_rate": 8.708369831144078e-06, "loss": 0.6119, "step": 1940 }, { "epoch": 0.9439513677811551, "grad_norm": 0.07455304205447773, "learning_rate": 8.707085273294692e-06, "loss": 0.5768, "step": 1941 }, { "epoch": 0.9444376899696049, "grad_norm": 0.07557124587294992, "learning_rate": 8.705800171843345e-06, "loss": 0.6057, "step": 1942 }, { "epoch": 0.9449240121580547, "grad_norm": 0.07335490326331268, "learning_rate": 8.704514526978485e-06, "loss": 0.594, "step": 1943 }, { "epoch": 0.9454103343465046, "grad_norm": 0.07603034862894496, "learning_rate": 8.703228338888636e-06, "loss": 0.5993, "step": 1944 }, { "epoch": 0.9458966565349544, "grad_norm": 0.07707660781225507, "learning_rate": 8.701941607762407e-06, "loss": 0.6245, "step": 1945 }, { "epoch": 0.9463829787234043, "grad_norm": 0.07372636881111767, "learning_rate": 8.700654333788478e-06, "loss": 0.5867, "step": 1946 }, { "epoch": 0.9468693009118541, "grad_norm": 0.07414267455408871, "learning_rate": 8.699366517155614e-06, "loss": 0.5551, "step": 1947 }, { "epoch": 0.947355623100304, "grad_norm": 0.07513833224044704, "learning_rate": 8.69807815805266e-06, "loss": 0.566, "step": 1948 }, { "epoch": 0.9478419452887538, "grad_norm": 0.07186551438153543, "learning_rate": 8.696789256668538e-06, "loss": 0.567, "step": 1949 }, { "epoch": 0.9483282674772037, "grad_norm": 0.07553769945065232, "learning_rate": 8.695499813192254e-06, "loss": 0.6565, "step": 1950 }, { "epoch": 0.9488145896656535, "grad_norm": 0.07654798759273028, "learning_rate": 8.694209827812886e-06, "loss": 0.6238, "step": 1951 }, { "epoch": 0.9493009118541034, "grad_norm": 0.07265853801793293, "learning_rate": 8.692919300719596e-06, "loss": 0.5808, "step": 1952 }, { "epoch": 0.9497872340425532, "grad_norm": 0.07065949865374788, "learning_rate": 8.691628232101627e-06, "loss": 0.5573, "step": 1953 }, { "epoch": 0.9502735562310031, "grad_norm": 0.07273958322712094, "learning_rate": 8.690336622148299e-06, "loss": 0.6058, "step": 1954 }, { "epoch": 0.9507598784194529, "grad_norm": 0.07281979887883913, "learning_rate": 8.689044471049013e-06, "loss": 0.6115, "step": 1955 }, { "epoch": 0.9512462006079028, "grad_norm": 0.06989490203333762, "learning_rate": 8.687751778993246e-06, "loss": 0.5197, "step": 1956 }, { "epoch": 0.9517325227963526, "grad_norm": 0.07206230968129262, "learning_rate": 8.686458546170558e-06, "loss": 0.6135, "step": 1957 }, { "epoch": 0.9522188449848025, "grad_norm": 0.07388518202418963, "learning_rate": 8.685164772770588e-06, "loss": 0.6241, "step": 1958 }, { "epoch": 0.9527051671732523, "grad_norm": 0.07680439843553713, "learning_rate": 8.68387045898305e-06, "loss": 0.5814, "step": 1959 }, { "epoch": 0.9531914893617022, "grad_norm": 0.07100583350362148, "learning_rate": 8.682575604997744e-06, "loss": 0.5718, "step": 1960 }, { "epoch": 0.953677811550152, "grad_norm": 0.07817881497642581, "learning_rate": 8.681280211004543e-06, "loss": 0.5757, "step": 1961 }, { "epoch": 0.9541641337386019, "grad_norm": 0.07330469912104214, "learning_rate": 8.679984277193403e-06, "loss": 0.6138, "step": 1962 }, { "epoch": 0.9546504559270517, "grad_norm": 0.07277157621205767, "learning_rate": 8.678687803754358e-06, "loss": 0.5792, "step": 1963 }, { "epoch": 0.9551367781155016, "grad_norm": 0.07152158692855926, "learning_rate": 8.67739079087752e-06, "loss": 0.5612, "step": 1964 }, { "epoch": 0.9556231003039514, "grad_norm": 0.07144118075126639, "learning_rate": 8.676093238753083e-06, "loss": 0.5777, "step": 1965 }, { "epoch": 0.9561094224924012, "grad_norm": 0.07219566881100747, "learning_rate": 8.674795147571318e-06, "loss": 0.6013, "step": 1966 }, { "epoch": 0.9565957446808511, "grad_norm": 0.07484808961086889, "learning_rate": 8.673496517522572e-06, "loss": 0.591, "step": 1967 }, { "epoch": 0.9570820668693009, "grad_norm": 0.07256855041644918, "learning_rate": 8.672197348797278e-06, "loss": 0.6032, "step": 1968 }, { "epoch": 0.9575683890577508, "grad_norm": 0.06871632178820912, "learning_rate": 8.670897641585945e-06, "loss": 0.5813, "step": 1969 }, { "epoch": 0.9580547112462006, "grad_norm": 0.07448842817312101, "learning_rate": 8.669597396079156e-06, "loss": 0.6148, "step": 1970 }, { "epoch": 0.9585410334346505, "grad_norm": 0.0760953890895604, "learning_rate": 8.668296612467583e-06, "loss": 0.6205, "step": 1971 }, { "epoch": 0.9590273556231003, "grad_norm": 0.072864671447709, "learning_rate": 8.666995290941967e-06, "loss": 0.5785, "step": 1972 }, { "epoch": 0.9595136778115502, "grad_norm": 0.07181234183052794, "learning_rate": 8.665693431693132e-06, "loss": 0.5795, "step": 1973 }, { "epoch": 0.96, "grad_norm": 0.07046195108688545, "learning_rate": 8.664391034911982e-06, "loss": 0.5857, "step": 1974 }, { "epoch": 0.9604863221884499, "grad_norm": 0.07627922595473915, "learning_rate": 8.663088100789501e-06, "loss": 0.6225, "step": 1975 }, { "epoch": 0.9609726443768997, "grad_norm": 0.07411225875795115, "learning_rate": 8.661784629516745e-06, "loss": 0.6005, "step": 1976 }, { "epoch": 0.9614589665653496, "grad_norm": 0.07187964684316119, "learning_rate": 8.660480621284855e-06, "loss": 0.5628, "step": 1977 }, { "epoch": 0.9619452887537994, "grad_norm": 0.07711000051669091, "learning_rate": 8.65917607628505e-06, "loss": 0.563, "step": 1978 }, { "epoch": 0.9624316109422493, "grad_norm": 0.074930074772771, "learning_rate": 8.657870994708627e-06, "loss": 0.6273, "step": 1979 }, { "epoch": 0.9629179331306991, "grad_norm": 0.0745730780384544, "learning_rate": 8.656565376746959e-06, "loss": 0.5784, "step": 1980 }, { "epoch": 0.963404255319149, "grad_norm": 0.06961842260503341, "learning_rate": 8.655259222591503e-06, "loss": 0.5631, "step": 1981 }, { "epoch": 0.9638905775075988, "grad_norm": 0.07463124924712256, "learning_rate": 8.65395253243379e-06, "loss": 0.5494, "step": 1982 }, { "epoch": 0.9643768996960487, "grad_norm": 0.07289374732622579, "learning_rate": 8.65264530646543e-06, "loss": 0.5931, "step": 1983 }, { "epoch": 0.9648632218844985, "grad_norm": 0.0695810224455607, "learning_rate": 8.651337544878115e-06, "loss": 0.5695, "step": 1984 }, { "epoch": 0.9653495440729484, "grad_norm": 0.0718534767257931, "learning_rate": 8.650029247863615e-06, "loss": 0.5545, "step": 1985 }, { "epoch": 0.9658358662613982, "grad_norm": 0.07294577914846184, "learning_rate": 8.648720415613774e-06, "loss": 0.5699, "step": 1986 }, { "epoch": 0.9663221884498481, "grad_norm": 0.07270140052961915, "learning_rate": 8.647411048320515e-06, "loss": 0.5562, "step": 1987 }, { "epoch": 0.9668085106382979, "grad_norm": 0.07238928658794108, "learning_rate": 8.646101146175846e-06, "loss": 0.5668, "step": 1988 }, { "epoch": 0.9672948328267478, "grad_norm": 0.07164837616909217, "learning_rate": 8.64479070937185e-06, "loss": 0.6128, "step": 1989 }, { "epoch": 0.9677811550151976, "grad_norm": 0.07367725008368022, "learning_rate": 8.643479738100684e-06, "loss": 0.5774, "step": 1990 }, { "epoch": 0.9682674772036474, "grad_norm": 0.07848211598679297, "learning_rate": 8.642168232554589e-06, "loss": 0.595, "step": 1991 }, { "epoch": 0.9687537993920973, "grad_norm": 0.06763999496876924, "learning_rate": 8.640856192925884e-06, "loss": 0.5556, "step": 1992 }, { "epoch": 0.9692401215805471, "grad_norm": 0.07027828631001344, "learning_rate": 8.639543619406961e-06, "loss": 0.5289, "step": 1993 }, { "epoch": 0.969726443768997, "grad_norm": 0.07367010261794604, "learning_rate": 8.638230512190298e-06, "loss": 0.5922, "step": 1994 }, { "epoch": 0.9702127659574468, "grad_norm": 0.07014210837887246, "learning_rate": 8.636916871468442e-06, "loss": 0.5229, "step": 1995 }, { "epoch": 0.9706990881458967, "grad_norm": 0.07195483549925343, "learning_rate": 8.63560269743403e-06, "loss": 0.5868, "step": 1996 }, { "epoch": 0.9711854103343465, "grad_norm": 0.08163082582222239, "learning_rate": 8.634287990279767e-06, "loss": 0.5573, "step": 1997 }, { "epoch": 0.9716717325227964, "grad_norm": 0.07638161736844712, "learning_rate": 8.632972750198438e-06, "loss": 0.5735, "step": 1998 }, { "epoch": 0.9721580547112462, "grad_norm": 0.07276505191514222, "learning_rate": 8.631656977382912e-06, "loss": 0.5893, "step": 1999 }, { "epoch": 0.9726443768996961, "grad_norm": 0.07002002728625449, "learning_rate": 8.630340672026129e-06, "loss": 0.5758, "step": 2000 }, { "epoch": 0.9731306990881459, "grad_norm": 0.07129980243856032, "learning_rate": 8.629023834321113e-06, "loss": 0.5733, "step": 2001 }, { "epoch": 0.9736170212765958, "grad_norm": 0.07649137111336735, "learning_rate": 8.627706464460964e-06, "loss": 0.6199, "step": 2002 }, { "epoch": 0.9741033434650456, "grad_norm": 0.07561432784319275, "learning_rate": 8.626388562638853e-06, "loss": 0.5458, "step": 2003 }, { "epoch": 0.9745896656534955, "grad_norm": 0.07550392686570884, "learning_rate": 8.625070129048042e-06, "loss": 0.5969, "step": 2004 }, { "epoch": 0.9750759878419453, "grad_norm": 0.07632023476484963, "learning_rate": 8.623751163881862e-06, "loss": 0.5831, "step": 2005 }, { "epoch": 0.9755623100303952, "grad_norm": 0.0772423447099558, "learning_rate": 8.622431667333724e-06, "loss": 0.592, "step": 2006 }, { "epoch": 0.976048632218845, "grad_norm": 0.07227388275743091, "learning_rate": 8.621111639597117e-06, "loss": 0.5945, "step": 2007 }, { "epoch": 0.9765349544072949, "grad_norm": 0.07175435857817608, "learning_rate": 8.619791080865609e-06, "loss": 0.5616, "step": 2008 }, { "epoch": 0.9770212765957447, "grad_norm": 0.09577836305853851, "learning_rate": 8.618469991332846e-06, "loss": 0.5759, "step": 2009 }, { "epoch": 0.9775075987841946, "grad_norm": 0.07421635889520364, "learning_rate": 8.617148371192547e-06, "loss": 0.5556, "step": 2010 }, { "epoch": 0.9779939209726444, "grad_norm": 0.07543784169961848, "learning_rate": 8.615826220638514e-06, "loss": 0.5539, "step": 2011 }, { "epoch": 0.9784802431610943, "grad_norm": 0.07404003153084862, "learning_rate": 8.61450353986463e-06, "loss": 0.5864, "step": 2012 }, { "epoch": 0.9789665653495441, "grad_norm": 0.07343042903390791, "learning_rate": 8.613180329064844e-06, "loss": 0.5616, "step": 2013 }, { "epoch": 0.9794528875379939, "grad_norm": 0.072255986061141, "learning_rate": 8.611856588433193e-06, "loss": 0.5804, "step": 2014 }, { "epoch": 0.9799392097264438, "grad_norm": 0.07213135134031219, "learning_rate": 8.61053231816379e-06, "loss": 0.5844, "step": 2015 }, { "epoch": 0.9804255319148936, "grad_norm": 0.0708814169189653, "learning_rate": 8.609207518450823e-06, "loss": 0.573, "step": 2016 }, { "epoch": 0.9809118541033435, "grad_norm": 0.07768338090487058, "learning_rate": 8.607882189488558e-06, "loss": 0.6023, "step": 2017 }, { "epoch": 0.9813981762917933, "grad_norm": 0.07552085324272174, "learning_rate": 8.60655633147134e-06, "loss": 0.5846, "step": 2018 }, { "epoch": 0.9818844984802432, "grad_norm": 0.07810155964317222, "learning_rate": 8.605229944593592e-06, "loss": 0.5678, "step": 2019 }, { "epoch": 0.982370820668693, "grad_norm": 0.0736400652851956, "learning_rate": 8.603903029049812e-06, "loss": 0.5972, "step": 2020 }, { "epoch": 0.9828571428571429, "grad_norm": 0.0706211241308663, "learning_rate": 8.602575585034579e-06, "loss": 0.5698, "step": 2021 }, { "epoch": 0.9833434650455927, "grad_norm": 0.07145541857657702, "learning_rate": 8.601247612742545e-06, "loss": 0.5962, "step": 2022 }, { "epoch": 0.9838297872340426, "grad_norm": 0.07440499802663506, "learning_rate": 8.599919112368444e-06, "loss": 0.5867, "step": 2023 }, { "epoch": 0.9843161094224924, "grad_norm": 0.07111096838989284, "learning_rate": 8.598590084107085e-06, "loss": 0.5728, "step": 2024 }, { "epoch": 0.9848024316109423, "grad_norm": 0.0737273788900959, "learning_rate": 8.597260528153354e-06, "loss": 0.5776, "step": 2025 }, { "epoch": 0.9852887537993921, "grad_norm": 0.07220304504764649, "learning_rate": 8.595930444702217e-06, "loss": 0.5851, "step": 2026 }, { "epoch": 0.985775075987842, "grad_norm": 0.07540167851680561, "learning_rate": 8.594599833948715e-06, "loss": 0.6414, "step": 2027 }, { "epoch": 0.9862613981762918, "grad_norm": 0.07264709450883156, "learning_rate": 8.593268696087966e-06, "loss": 0.5927, "step": 2028 }, { "epoch": 0.9867477203647417, "grad_norm": 0.07135532712612644, "learning_rate": 8.591937031315167e-06, "loss": 0.5568, "step": 2029 }, { "epoch": 0.9872340425531915, "grad_norm": 0.07044681021365043, "learning_rate": 8.590604839825593e-06, "loss": 0.5769, "step": 2030 }, { "epoch": 0.9877203647416414, "grad_norm": 0.07524956835825793, "learning_rate": 8.58927212181459e-06, "loss": 0.557, "step": 2031 }, { "epoch": 0.9882066869300912, "grad_norm": 0.07272223393951906, "learning_rate": 8.587938877477593e-06, "loss": 0.5941, "step": 2032 }, { "epoch": 0.9886930091185411, "grad_norm": 0.0749076817410383, "learning_rate": 8.586605107010103e-06, "loss": 0.5769, "step": 2033 }, { "epoch": 0.9891793313069909, "grad_norm": 0.09352000564960601, "learning_rate": 8.5852708106077e-06, "loss": 0.6562, "step": 2034 }, { "epoch": 0.9896656534954408, "grad_norm": 0.07210073897981624, "learning_rate": 8.583935988466048e-06, "loss": 0.6037, "step": 2035 }, { "epoch": 0.9901519756838906, "grad_norm": 0.07592592637947772, "learning_rate": 8.58260064078088e-06, "loss": 0.5658, "step": 2036 }, { "epoch": 0.9906382978723405, "grad_norm": 0.07686089700617904, "learning_rate": 8.581264767748012e-06, "loss": 0.5712, "step": 2037 }, { "epoch": 0.9911246200607903, "grad_norm": 0.07355065392333741, "learning_rate": 8.579928369563335e-06, "loss": 0.6046, "step": 2038 }, { "epoch": 0.9916109422492401, "grad_norm": 0.07260917104148394, "learning_rate": 8.578591446422814e-06, "loss": 0.6062, "step": 2039 }, { "epoch": 0.99209726443769, "grad_norm": 0.0763273623411675, "learning_rate": 8.577253998522496e-06, "loss": 0.5613, "step": 2040 }, { "epoch": 0.9925835866261398, "grad_norm": 0.0728063601092115, "learning_rate": 8.5759160260585e-06, "loss": 0.5608, "step": 2041 }, { "epoch": 0.9930699088145897, "grad_norm": 0.07310299316281559, "learning_rate": 8.574577529227027e-06, "loss": 0.5772, "step": 2042 }, { "epoch": 0.9935562310030395, "grad_norm": 0.07394776255131645, "learning_rate": 8.573238508224351e-06, "loss": 0.5998, "step": 2043 }, { "epoch": 0.9940425531914894, "grad_norm": 0.07777617226060557, "learning_rate": 8.571898963246826e-06, "loss": 0.5949, "step": 2044 }, { "epoch": 0.9945288753799392, "grad_norm": 0.0687563166434955, "learning_rate": 8.570558894490878e-06, "loss": 0.5315, "step": 2045 }, { "epoch": 0.9950151975683891, "grad_norm": 0.07260669177965728, "learning_rate": 8.569218302153015e-06, "loss": 0.5727, "step": 2046 }, { "epoch": 0.9955015197568389, "grad_norm": 0.07082971629937435, "learning_rate": 8.567877186429819e-06, "loss": 0.5615, "step": 2047 }, { "epoch": 0.9959878419452888, "grad_norm": 0.0763588646851888, "learning_rate": 8.566535547517949e-06, "loss": 0.6399, "step": 2048 }, { "epoch": 0.9964741641337386, "grad_norm": 0.07574455989652469, "learning_rate": 8.565193385614143e-06, "loss": 0.5699, "step": 2049 }, { "epoch": 0.9969604863221885, "grad_norm": 0.07630762610502481, "learning_rate": 8.563850700915211e-06, "loss": 0.5885, "step": 2050 }, { "epoch": 0.9974468085106383, "grad_norm": 0.07295687274868297, "learning_rate": 8.562507493618046e-06, "loss": 0.5879, "step": 2051 }, { "epoch": 0.9979331306990882, "grad_norm": 0.07396831755057373, "learning_rate": 8.56116376391961e-06, "loss": 0.5413, "step": 2052 }, { "epoch": 0.998419452887538, "grad_norm": 0.07391435511617277, "learning_rate": 8.559819512016949e-06, "loss": 0.5988, "step": 2053 }, { "epoch": 0.9989057750759879, "grad_norm": 0.07007833288228817, "learning_rate": 8.55847473810718e-06, "loss": 0.5526, "step": 2054 }, { "epoch": 0.9993920972644377, "grad_norm": 0.07568621827235816, "learning_rate": 8.5571294423875e-06, "loss": 0.6245, "step": 2055 }, { "epoch": 0.9998784194528876, "grad_norm": 0.08584046270551529, "learning_rate": 8.55578362505518e-06, "loss": 0.6564, "step": 2056 }, { "epoch": 0.9998784194528876, "eval_loss": 0.585534930229187, "eval_runtime": 105.1103, "eval_samples_per_second": 288.773, "eval_steps_per_second": 36.105, "step": 2056 }, { "epoch": 1.0, "grad_norm": 0.08584046270551529, "learning_rate": 8.554437286307573e-06, "loss": 0.1579, "step": 2057 }, { "epoch": 1.0003647416413375, "grad_norm": 0.07623802936811282, "learning_rate": 8.553090426342098e-06, "loss": 0.448, "step": 2058 }, { "epoch": 1.00048632218845, "grad_norm": 0.08080827269924548, "learning_rate": 8.551743045356262e-06, "loss": 0.5614, "step": 2059 }, { "epoch": 1.0009726443768996, "grad_norm": 0.08034094867182404, "learning_rate": 8.550395143547641e-06, "loss": 0.5627, "step": 2060 }, { "epoch": 1.0014589665653495, "grad_norm": 0.07109104639314932, "learning_rate": 8.54904672111389e-06, "loss": 0.5245, "step": 2061 }, { "epoch": 1.0019452887537994, "grad_norm": 0.07510940006349107, "learning_rate": 8.54769777825274e-06, "loss": 0.5409, "step": 2062 }, { "epoch": 1.0024316109422493, "grad_norm": 0.07989040607054634, "learning_rate": 8.546348315161994e-06, "loss": 0.583, "step": 2063 }, { "epoch": 1.002917933130699, "grad_norm": 0.07398209704917016, "learning_rate": 8.544998332039543e-06, "loss": 0.5252, "step": 2064 }, { "epoch": 1.003404255319149, "grad_norm": 0.07474911859311478, "learning_rate": 8.54364782908334e-06, "loss": 0.5578, "step": 2065 }, { "epoch": 1.0038905775075988, "grad_norm": 0.08226936885030092, "learning_rate": 8.542296806491426e-06, "loss": 0.564, "step": 2066 }, { "epoch": 1.0043768996960487, "grad_norm": 0.08250439967948725, "learning_rate": 8.540945264461909e-06, "loss": 0.6038, "step": 2067 }, { "epoch": 1.0048632218844984, "grad_norm": 0.07329354696050824, "learning_rate": 8.53959320319298e-06, "loss": 0.5521, "step": 2068 }, { "epoch": 1.0053495440729483, "grad_norm": 0.07334265848797816, "learning_rate": 8.538240622882901e-06, "loss": 0.5549, "step": 2069 }, { "epoch": 1.0058358662613982, "grad_norm": 0.0756716614044278, "learning_rate": 8.536887523730015e-06, "loss": 0.551, "step": 2070 }, { "epoch": 1.006322188449848, "grad_norm": 0.07275129250413012, "learning_rate": 8.535533905932739e-06, "loss": 0.54, "step": 2071 }, { "epoch": 1.0068085106382978, "grad_norm": 0.07406226979470239, "learning_rate": 8.534179769689562e-06, "loss": 0.5369, "step": 2072 }, { "epoch": 1.0072948328267477, "grad_norm": 0.07280953912415816, "learning_rate": 8.532825115199057e-06, "loss": 0.5748, "step": 2073 }, { "epoch": 1.0077811550151976, "grad_norm": 0.07186372545295325, "learning_rate": 8.531469942659867e-06, "loss": 0.537, "step": 2074 }, { "epoch": 1.0082674772036475, "grad_norm": 0.07580037131962247, "learning_rate": 8.53011425227071e-06, "loss": 0.5776, "step": 2075 }, { "epoch": 1.0087537993920972, "grad_norm": 0.0745244875313004, "learning_rate": 8.528758044230386e-06, "loss": 0.5592, "step": 2076 }, { "epoch": 1.009240121580547, "grad_norm": 0.07577338322201924, "learning_rate": 8.527401318737766e-06, "loss": 0.5617, "step": 2077 }, { "epoch": 1.009726443768997, "grad_norm": 0.07154629608876042, "learning_rate": 8.526044075991801e-06, "loss": 0.5374, "step": 2078 }, { "epoch": 1.010212765957447, "grad_norm": 0.07541158295455186, "learning_rate": 8.524686316191512e-06, "loss": 0.5806, "step": 2079 }, { "epoch": 1.0106990881458966, "grad_norm": 0.07306716776333762, "learning_rate": 8.523328039536002e-06, "loss": 0.5312, "step": 2080 }, { "epoch": 1.0111854103343465, "grad_norm": 0.0777828615718599, "learning_rate": 8.521969246224442e-06, "loss": 0.5678, "step": 2081 }, { "epoch": 1.0116717325227964, "grad_norm": 0.07118144851681076, "learning_rate": 8.520609936456088e-06, "loss": 0.5409, "step": 2082 }, { "epoch": 1.012158054711246, "grad_norm": 0.08142507329757849, "learning_rate": 8.519250110430265e-06, "loss": 0.5735, "step": 2083 }, { "epoch": 1.012644376899696, "grad_norm": 0.07249102398365048, "learning_rate": 8.517889768346378e-06, "loss": 0.581, "step": 2084 }, { "epoch": 1.013130699088146, "grad_norm": 0.07310912041905886, "learning_rate": 8.516528910403906e-06, "loss": 0.5436, "step": 2085 }, { "epoch": 1.0136170212765958, "grad_norm": 0.07055727155659802, "learning_rate": 8.5151675368024e-06, "loss": 0.5759, "step": 2086 }, { "epoch": 1.0141033434650455, "grad_norm": 0.07485275721702012, "learning_rate": 8.51380564774149e-06, "loss": 0.5401, "step": 2087 }, { "epoch": 1.0145896656534954, "grad_norm": 0.07354969772707608, "learning_rate": 8.512443243420888e-06, "loss": 0.5208, "step": 2088 }, { "epoch": 1.0150759878419453, "grad_norm": 0.07059929263115745, "learning_rate": 8.511080324040371e-06, "loss": 0.5206, "step": 2089 }, { "epoch": 1.0155623100303952, "grad_norm": 0.07432317457166472, "learning_rate": 8.509716889799793e-06, "loss": 0.5244, "step": 2090 }, { "epoch": 1.016048632218845, "grad_norm": 0.06894732267377408, "learning_rate": 8.508352940899089e-06, "loss": 0.5322, "step": 2091 }, { "epoch": 1.0165349544072948, "grad_norm": 0.08592273618989658, "learning_rate": 8.506988477538267e-06, "loss": 0.5739, "step": 2092 }, { "epoch": 1.0170212765957447, "grad_norm": 0.07160035061418382, "learning_rate": 8.505623499917409e-06, "loss": 0.5209, "step": 2093 }, { "epoch": 1.0175075987841946, "grad_norm": 0.08393077406675499, "learning_rate": 8.504258008236671e-06, "loss": 0.5321, "step": 2094 }, { "epoch": 1.0179939209726443, "grad_norm": 0.07397506669026443, "learning_rate": 8.502892002696293e-06, "loss": 0.566, "step": 2095 }, { "epoch": 1.0184802431610942, "grad_norm": 0.07513818241637829, "learning_rate": 8.50152548349658e-06, "loss": 0.5579, "step": 2096 }, { "epoch": 1.018966565349544, "grad_norm": 0.07300586844794889, "learning_rate": 8.500158450837918e-06, "loss": 0.5654, "step": 2097 }, { "epoch": 1.019452887537994, "grad_norm": 0.07187787986600684, "learning_rate": 8.498790904920765e-06, "loss": 0.5697, "step": 2098 }, { "epoch": 1.0199392097264437, "grad_norm": 0.0752395730314187, "learning_rate": 8.497422845945658e-06, "loss": 0.5969, "step": 2099 }, { "epoch": 1.0204255319148936, "grad_norm": 0.07678577926050456, "learning_rate": 8.496054274113205e-06, "loss": 0.5428, "step": 2100 }, { "epoch": 1.0209118541033435, "grad_norm": 0.07508445266084898, "learning_rate": 8.494685189624094e-06, "loss": 0.5585, "step": 2101 }, { "epoch": 1.0213981762917934, "grad_norm": 0.07275078821546245, "learning_rate": 8.493315592679085e-06, "loss": 0.5519, "step": 2102 }, { "epoch": 1.021884498480243, "grad_norm": 0.07103263076957497, "learning_rate": 8.491945483479014e-06, "loss": 0.5366, "step": 2103 }, { "epoch": 1.022370820668693, "grad_norm": 0.07400272214735697, "learning_rate": 8.49057486222479e-06, "loss": 0.5263, "step": 2104 }, { "epoch": 1.022857142857143, "grad_norm": 0.07447294390255364, "learning_rate": 8.4892037291174e-06, "loss": 0.5371, "step": 2105 }, { "epoch": 1.0233434650455928, "grad_norm": 0.07703843168113399, "learning_rate": 8.487832084357908e-06, "loss": 0.5806, "step": 2106 }, { "epoch": 1.0238297872340425, "grad_norm": 0.07112136477701764, "learning_rate": 8.486459928147448e-06, "loss": 0.5482, "step": 2107 }, { "epoch": 1.0243161094224924, "grad_norm": 0.07402568629395213, "learning_rate": 8.485087260687231e-06, "loss": 0.6188, "step": 2108 }, { "epoch": 1.0248024316109423, "grad_norm": 0.0711352325681102, "learning_rate": 8.48371408217854e-06, "loss": 0.5478, "step": 2109 }, { "epoch": 1.025288753799392, "grad_norm": 0.07342794180455385, "learning_rate": 8.482340392822742e-06, "loss": 0.5695, "step": 2110 }, { "epoch": 1.025775075987842, "grad_norm": 0.07383221788704233, "learning_rate": 8.480966192821268e-06, "loss": 0.5609, "step": 2111 }, { "epoch": 1.0262613981762918, "grad_norm": 0.0712678743489981, "learning_rate": 8.47959148237563e-06, "loss": 0.532, "step": 2112 }, { "epoch": 1.0267477203647417, "grad_norm": 0.07281036787112662, "learning_rate": 8.478216261687417e-06, "loss": 0.5355, "step": 2113 }, { "epoch": 1.0272340425531914, "grad_norm": 0.07642695711546861, "learning_rate": 8.476840530958286e-06, "loss": 0.5571, "step": 2114 }, { "epoch": 1.0277203647416413, "grad_norm": 0.07681613148925025, "learning_rate": 8.475464290389974e-06, "loss": 0.5727, "step": 2115 }, { "epoch": 1.0282066869300912, "grad_norm": 0.07484996838733651, "learning_rate": 8.47408754018429e-06, "loss": 0.5595, "step": 2116 }, { "epoch": 1.0286930091185411, "grad_norm": 0.07543542146274154, "learning_rate": 8.472710280543118e-06, "loss": 0.5603, "step": 2117 }, { "epoch": 1.0291793313069908, "grad_norm": 0.07285779612769512, "learning_rate": 8.47133251166842e-06, "loss": 0.5596, "step": 2118 }, { "epoch": 1.0296656534954407, "grad_norm": 0.09095318745851422, "learning_rate": 8.469954233762228e-06, "loss": 0.5624, "step": 2119 }, { "epoch": 1.0301519756838906, "grad_norm": 0.07142018944954683, "learning_rate": 8.468575447026653e-06, "loss": 0.5547, "step": 2120 }, { "epoch": 1.0306382978723405, "grad_norm": 0.07484497432204437, "learning_rate": 8.467196151663873e-06, "loss": 0.6306, "step": 2121 }, { "epoch": 1.0311246200607902, "grad_norm": 0.07861051691327721, "learning_rate": 8.465816347876154e-06, "loss": 0.5509, "step": 2122 }, { "epoch": 1.03161094224924, "grad_norm": 0.07127396654233782, "learning_rate": 8.464436035865823e-06, "loss": 0.5271, "step": 2123 }, { "epoch": 1.03209726443769, "grad_norm": 0.07442358549394897, "learning_rate": 8.463055215835288e-06, "loss": 0.5632, "step": 2124 }, { "epoch": 1.03258358662614, "grad_norm": 0.07635231185619852, "learning_rate": 8.461673887987033e-06, "loss": 0.5719, "step": 2125 }, { "epoch": 1.0330699088145896, "grad_norm": 0.07785198446309144, "learning_rate": 8.460292052523611e-06, "loss": 0.5438, "step": 2126 }, { "epoch": 1.0335562310030395, "grad_norm": 0.07380077539325311, "learning_rate": 8.458909709647653e-06, "loss": 0.5651, "step": 2127 }, { "epoch": 1.0340425531914894, "grad_norm": 0.07545329165626526, "learning_rate": 8.457526859561867e-06, "loss": 0.5802, "step": 2128 }, { "epoch": 1.0345288753799393, "grad_norm": 0.08091225237505012, "learning_rate": 8.456143502469027e-06, "loss": 0.6121, "step": 2129 }, { "epoch": 1.035015197568389, "grad_norm": 0.07516124663988459, "learning_rate": 8.454759638571991e-06, "loss": 0.5382, "step": 2130 }, { "epoch": 1.035501519756839, "grad_norm": 0.0715082909148488, "learning_rate": 8.453375268073686e-06, "loss": 0.534, "step": 2131 }, { "epoch": 1.0359878419452888, "grad_norm": 0.07411000226810031, "learning_rate": 8.451990391177112e-06, "loss": 0.5945, "step": 2132 }, { "epoch": 1.0364741641337385, "grad_norm": 0.07672861486745983, "learning_rate": 8.450605008085348e-06, "loss": 0.5598, "step": 2133 }, { "epoch": 1.0369604863221884, "grad_norm": 0.07747130495077176, "learning_rate": 8.449219119001543e-06, "loss": 0.5802, "step": 2134 }, { "epoch": 1.0374468085106383, "grad_norm": 0.07564503208515647, "learning_rate": 8.447832724128926e-06, "loss": 0.5796, "step": 2135 }, { "epoch": 1.0379331306990882, "grad_norm": 0.07258007971835202, "learning_rate": 8.44644582367079e-06, "loss": 0.5461, "step": 2136 }, { "epoch": 1.038419452887538, "grad_norm": 0.07401389323829763, "learning_rate": 8.44505841783051e-06, "loss": 0.5335, "step": 2137 }, { "epoch": 1.0389057750759878, "grad_norm": 0.07667259321025276, "learning_rate": 8.443670506811537e-06, "loss": 0.5991, "step": 2138 }, { "epoch": 1.0393920972644377, "grad_norm": 0.06974108948664576, "learning_rate": 8.442282090817388e-06, "loss": 0.5229, "step": 2139 }, { "epoch": 1.0398784194528876, "grad_norm": 0.07294166682055703, "learning_rate": 8.440893170051658e-06, "loss": 0.5171, "step": 2140 }, { "epoch": 1.0403647416413373, "grad_norm": 0.07493317720976654, "learning_rate": 8.43950374471802e-06, "loss": 0.5671, "step": 2141 }, { "epoch": 1.0408510638297872, "grad_norm": 0.07229591864002279, "learning_rate": 8.43811381502022e-06, "loss": 0.5619, "step": 2142 }, { "epoch": 1.041337386018237, "grad_norm": 0.07110021039536353, "learning_rate": 8.436723381162066e-06, "loss": 0.5596, "step": 2143 }, { "epoch": 1.041823708206687, "grad_norm": 0.07186193090485854, "learning_rate": 8.435332443347458e-06, "loss": 0.5365, "step": 2144 }, { "epoch": 1.0423100303951367, "grad_norm": 0.07477354732914351, "learning_rate": 8.433941001780356e-06, "loss": 0.5409, "step": 2145 }, { "epoch": 1.0427963525835866, "grad_norm": 0.07244688674350228, "learning_rate": 8.432549056664802e-06, "loss": 0.5658, "step": 2146 }, { "epoch": 1.0432826747720365, "grad_norm": 0.07313689703662463, "learning_rate": 8.431156608204907e-06, "loss": 0.5382, "step": 2147 }, { "epoch": 1.0437689969604864, "grad_norm": 0.07367009641217478, "learning_rate": 8.42976365660486e-06, "loss": 0.6123, "step": 2148 }, { "epoch": 1.044255319148936, "grad_norm": 0.07780235394359179, "learning_rate": 8.42837020206892e-06, "loss": 0.5969, "step": 2149 }, { "epoch": 1.044741641337386, "grad_norm": 0.07472651715480962, "learning_rate": 8.42697624480142e-06, "loss": 0.5605, "step": 2150 }, { "epoch": 1.045227963525836, "grad_norm": 0.07354993535807795, "learning_rate": 8.425581785006773e-06, "loss": 0.5178, "step": 2151 }, { "epoch": 1.0457142857142858, "grad_norm": 0.07233547389029334, "learning_rate": 8.424186822889455e-06, "loss": 0.5358, "step": 2152 }, { "epoch": 1.0462006079027355, "grad_norm": 0.07433317637025985, "learning_rate": 8.422791358654023e-06, "loss": 0.5811, "step": 2153 }, { "epoch": 1.0466869300911854, "grad_norm": 0.07421772465514842, "learning_rate": 8.42139539250511e-06, "loss": 0.5462, "step": 2154 }, { "epoch": 1.0471732522796353, "grad_norm": 0.07434454678202498, "learning_rate": 8.419998924647412e-06, "loss": 0.5775, "step": 2155 }, { "epoch": 1.047659574468085, "grad_norm": 0.07469403535236148, "learning_rate": 8.418601955285708e-06, "loss": 0.577, "step": 2156 }, { "epoch": 1.048145896656535, "grad_norm": 0.07892199011396608, "learning_rate": 8.41720448462485e-06, "loss": 0.5913, "step": 2157 }, { "epoch": 1.0486322188449848, "grad_norm": 0.07357476284509384, "learning_rate": 8.415806512869759e-06, "loss": 0.5882, "step": 2158 }, { "epoch": 1.0491185410334347, "grad_norm": 0.07513409151134295, "learning_rate": 8.41440804022543e-06, "loss": 0.5598, "step": 2159 }, { "epoch": 1.0496048632218844, "grad_norm": 0.07461118076178293, "learning_rate": 8.413009066896938e-06, "loss": 0.5285, "step": 2160 }, { "epoch": 1.0500911854103343, "grad_norm": 0.07570819271043552, "learning_rate": 8.411609593089423e-06, "loss": 0.5576, "step": 2161 }, { "epoch": 1.0505775075987842, "grad_norm": 0.07453660945407443, "learning_rate": 8.4102096190081e-06, "loss": 0.5546, "step": 2162 }, { "epoch": 1.0510638297872341, "grad_norm": 0.07257821944566752, "learning_rate": 8.408809144858265e-06, "loss": 0.5455, "step": 2163 }, { "epoch": 1.0515501519756838, "grad_norm": 0.07159868333635545, "learning_rate": 8.407408170845277e-06, "loss": 0.552, "step": 2164 }, { "epoch": 1.0520364741641337, "grad_norm": 0.07289110625977005, "learning_rate": 8.406006697174574e-06, "loss": 0.56, "step": 2165 }, { "epoch": 1.0525227963525836, "grad_norm": 0.07220522122717925, "learning_rate": 8.404604724051668e-06, "loss": 0.5176, "step": 2166 }, { "epoch": 1.0530091185410335, "grad_norm": 0.07469138263726466, "learning_rate": 8.403202251682139e-06, "loss": 0.6139, "step": 2167 }, { "epoch": 1.0534954407294832, "grad_norm": 0.07256256065081844, "learning_rate": 8.401799280271647e-06, "loss": 0.557, "step": 2168 }, { "epoch": 1.053981762917933, "grad_norm": 0.07267374607151465, "learning_rate": 8.400395810025922e-06, "loss": 0.5475, "step": 2169 }, { "epoch": 1.054468085106383, "grad_norm": 0.07561963705071116, "learning_rate": 8.398991841150763e-06, "loss": 0.5433, "step": 2170 }, { "epoch": 1.054954407294833, "grad_norm": 0.07282401954668276, "learning_rate": 8.39758737385205e-06, "loss": 0.5537, "step": 2171 }, { "epoch": 1.0554407294832826, "grad_norm": 0.07353765326332937, "learning_rate": 8.396182408335729e-06, "loss": 0.5431, "step": 2172 }, { "epoch": 1.0559270516717325, "grad_norm": 0.0732847949635867, "learning_rate": 8.394776944807826e-06, "loss": 0.554, "step": 2173 }, { "epoch": 1.0564133738601824, "grad_norm": 0.07565713616358952, "learning_rate": 8.393370983474434e-06, "loss": 0.5731, "step": 2174 }, { "epoch": 1.0568996960486323, "grad_norm": 0.07234871606732343, "learning_rate": 8.39196452454172e-06, "loss": 0.5524, "step": 2175 }, { "epoch": 1.057386018237082, "grad_norm": 0.07668876339335134, "learning_rate": 8.39055756821593e-06, "loss": 0.5747, "step": 2176 }, { "epoch": 1.057872340425532, "grad_norm": 0.07517489810268597, "learning_rate": 8.389150114703373e-06, "loss": 0.587, "step": 2177 }, { "epoch": 1.0583586626139818, "grad_norm": 0.07301766554156319, "learning_rate": 8.387742164210438e-06, "loss": 0.566, "step": 2178 }, { "epoch": 1.0588449848024317, "grad_norm": 0.07843038786552695, "learning_rate": 8.386333716943584e-06, "loss": 0.5849, "step": 2179 }, { "epoch": 1.0593313069908814, "grad_norm": 0.07232294751002794, "learning_rate": 8.384924773109347e-06, "loss": 0.5413, "step": 2180 }, { "epoch": 1.0598176291793313, "grad_norm": 0.07343113044675709, "learning_rate": 8.38351533291433e-06, "loss": 0.5651, "step": 2181 }, { "epoch": 1.0603039513677812, "grad_norm": 0.07535488548512996, "learning_rate": 8.38210539656521e-06, "loss": 0.5435, "step": 2182 }, { "epoch": 1.060790273556231, "grad_norm": 0.07228023912142174, "learning_rate": 8.38069496426874e-06, "loss": 0.5426, "step": 2183 }, { "epoch": 1.0612765957446808, "grad_norm": 0.07474386443343015, "learning_rate": 8.379284036231745e-06, "loss": 0.5128, "step": 2184 }, { "epoch": 1.0617629179331307, "grad_norm": 0.07440189406953827, "learning_rate": 8.37787261266112e-06, "loss": 0.5551, "step": 2185 }, { "epoch": 1.0622492401215806, "grad_norm": 0.07466431745253764, "learning_rate": 8.376460693763835e-06, "loss": 0.5608, "step": 2186 }, { "epoch": 1.0627355623100303, "grad_norm": 0.07525622176944022, "learning_rate": 8.375048279746932e-06, "loss": 0.5623, "step": 2187 }, { "epoch": 1.0632218844984802, "grad_norm": 0.07562724741081166, "learning_rate": 8.373635370817524e-06, "loss": 0.5992, "step": 2188 }, { "epoch": 1.0637082066869301, "grad_norm": 0.07497321672440468, "learning_rate": 8.372221967182799e-06, "loss": 0.5698, "step": 2189 }, { "epoch": 1.06419452887538, "grad_norm": 0.0744499252787531, "learning_rate": 8.370808069050016e-06, "loss": 0.527, "step": 2190 }, { "epoch": 1.0646808510638297, "grad_norm": 0.07570711282991258, "learning_rate": 8.369393676626509e-06, "loss": 0.5876, "step": 2191 }, { "epoch": 1.0651671732522796, "grad_norm": 0.0757940071069943, "learning_rate": 8.367978790119682e-06, "loss": 0.5664, "step": 2192 }, { "epoch": 1.0656534954407295, "grad_norm": 0.07737614745719217, "learning_rate": 8.36656340973701e-06, "loss": 0.5793, "step": 2193 }, { "epoch": 1.0661398176291794, "grad_norm": 0.07437499874331091, "learning_rate": 8.365147535686044e-06, "loss": 0.5586, "step": 2194 }, { "epoch": 1.066626139817629, "grad_norm": 0.07097712926586076, "learning_rate": 8.363731168174406e-06, "loss": 0.5383, "step": 2195 }, { "epoch": 1.067112462006079, "grad_norm": 0.07620958529288846, "learning_rate": 8.36231430740979e-06, "loss": 0.593, "step": 2196 }, { "epoch": 1.067598784194529, "grad_norm": 0.07654302835048336, "learning_rate": 8.360896953599962e-06, "loss": 0.5657, "step": 2197 }, { "epoch": 1.0680851063829788, "grad_norm": 0.07313120306302988, "learning_rate": 8.359479106952761e-06, "loss": 0.5526, "step": 2198 }, { "epoch": 1.0685714285714285, "grad_norm": 0.07309142908696792, "learning_rate": 8.3580607676761e-06, "loss": 0.5856, "step": 2199 }, { "epoch": 1.0690577507598784, "grad_norm": 0.07030504162244383, "learning_rate": 8.356641935977959e-06, "loss": 0.5146, "step": 2200 }, { "epoch": 1.0695440729483283, "grad_norm": 0.07316451380685493, "learning_rate": 8.355222612066397e-06, "loss": 0.5773, "step": 2201 }, { "epoch": 1.070030395136778, "grad_norm": 0.07308678809183129, "learning_rate": 8.353802796149537e-06, "loss": 0.5366, "step": 2202 }, { "epoch": 1.070516717325228, "grad_norm": 0.07459788396881113, "learning_rate": 8.352382488435585e-06, "loss": 0.5592, "step": 2203 }, { "epoch": 1.0710030395136778, "grad_norm": 0.0726918593860298, "learning_rate": 8.350961689132808e-06, "loss": 0.5355, "step": 2204 }, { "epoch": 1.0714893617021277, "grad_norm": 0.07542305808351824, "learning_rate": 8.349540398449551e-06, "loss": 0.5486, "step": 2205 }, { "epoch": 1.0719756838905776, "grad_norm": 0.07345811923448287, "learning_rate": 8.348118616594234e-06, "loss": 0.5632, "step": 2206 }, { "epoch": 1.0724620060790273, "grad_norm": 0.07618050686783405, "learning_rate": 8.346696343775342e-06, "loss": 0.5728, "step": 2207 }, { "epoch": 1.0729483282674772, "grad_norm": 0.07286824931810211, "learning_rate": 8.345273580201434e-06, "loss": 0.509, "step": 2208 }, { "epoch": 1.0734346504559271, "grad_norm": 0.07321153833555988, "learning_rate": 8.343850326081144e-06, "loss": 0.5399, "step": 2209 }, { "epoch": 1.0739209726443768, "grad_norm": 0.07178865134780009, "learning_rate": 8.342426581623175e-06, "loss": 0.5618, "step": 2210 }, { "epoch": 1.0744072948328267, "grad_norm": 0.08005915401669642, "learning_rate": 8.341002347036304e-06, "loss": 0.6187, "step": 2211 }, { "epoch": 1.0748936170212766, "grad_norm": 0.07106382847881387, "learning_rate": 8.33957762252938e-06, "loss": 0.5496, "step": 2212 }, { "epoch": 1.0753799392097265, "grad_norm": 0.07067027929768914, "learning_rate": 8.338152408311319e-06, "loss": 0.553, "step": 2213 }, { "epoch": 1.0758662613981762, "grad_norm": 0.0734579768255136, "learning_rate": 8.336726704591115e-06, "loss": 0.5899, "step": 2214 }, { "epoch": 1.076352583586626, "grad_norm": 0.07765415774627402, "learning_rate": 8.33530051157783e-06, "loss": 0.5516, "step": 2215 }, { "epoch": 1.076838905775076, "grad_norm": 0.07842763607023047, "learning_rate": 8.333873829480603e-06, "loss": 0.5858, "step": 2216 }, { "epoch": 1.077325227963526, "grad_norm": 0.07280806452633432, "learning_rate": 8.332446658508635e-06, "loss": 0.5336, "step": 2217 }, { "epoch": 1.0778115501519756, "grad_norm": 0.07328412399923136, "learning_rate": 8.331018998871207e-06, "loss": 0.5515, "step": 2218 }, { "epoch": 1.0782978723404255, "grad_norm": 0.07004925866365012, "learning_rate": 8.32959085077767e-06, "loss": 0.5217, "step": 2219 }, { "epoch": 1.0787841945288754, "grad_norm": 0.07724885337452692, "learning_rate": 8.328162214437445e-06, "loss": 0.5509, "step": 2220 }, { "epoch": 1.0792705167173253, "grad_norm": 0.07403290060850776, "learning_rate": 8.326733090060022e-06, "loss": 0.5188, "step": 2221 }, { "epoch": 1.079756838905775, "grad_norm": 0.07350000907247445, "learning_rate": 8.325303477854972e-06, "loss": 0.5425, "step": 2222 }, { "epoch": 1.080243161094225, "grad_norm": 0.0724690874642837, "learning_rate": 8.323873378031929e-06, "loss": 0.5446, "step": 2223 }, { "epoch": 1.0807294832826748, "grad_norm": 0.07519148328232632, "learning_rate": 8.322442790800597e-06, "loss": 0.5325, "step": 2224 }, { "epoch": 1.0812158054711247, "grad_norm": 0.07463718551271542, "learning_rate": 8.32101171637076e-06, "loss": 0.5579, "step": 2225 }, { "epoch": 1.0817021276595744, "grad_norm": 0.07443179556519772, "learning_rate": 8.319580154952266e-06, "loss": 0.5528, "step": 2226 }, { "epoch": 1.0821884498480243, "grad_norm": 0.07014470338470903, "learning_rate": 8.318148106755042e-06, "loss": 0.5706, "step": 2227 }, { "epoch": 1.0826747720364742, "grad_norm": 0.07177509490248288, "learning_rate": 8.316715571989076e-06, "loss": 0.5604, "step": 2228 }, { "epoch": 1.083161094224924, "grad_norm": 0.07217672580997961, "learning_rate": 8.315282550864437e-06, "loss": 0.5295, "step": 2229 }, { "epoch": 1.0836474164133738, "grad_norm": 0.07944460307309677, "learning_rate": 8.313849043591257e-06, "loss": 0.6133, "step": 2230 }, { "epoch": 1.0841337386018237, "grad_norm": 0.07032441954062435, "learning_rate": 8.312415050379747e-06, "loss": 0.5335, "step": 2231 }, { "epoch": 1.0846200607902736, "grad_norm": 0.07353808812680972, "learning_rate": 8.310980571440184e-06, "loss": 0.5421, "step": 2232 }, { "epoch": 1.0851063829787233, "grad_norm": 0.06888454283672282, "learning_rate": 8.309545606982921e-06, "loss": 0.5053, "step": 2233 }, { "epoch": 1.0855927051671732, "grad_norm": 0.07420964223007172, "learning_rate": 8.308110157218375e-06, "loss": 0.553, "step": 2234 }, { "epoch": 1.0860790273556231, "grad_norm": 0.0740488375265095, "learning_rate": 8.306674222357042e-06, "loss": 0.5475, "step": 2235 }, { "epoch": 1.086565349544073, "grad_norm": 0.07710966058515396, "learning_rate": 8.305237802609482e-06, "loss": 0.6266, "step": 2236 }, { "epoch": 1.0870516717325227, "grad_norm": 0.0713823660595672, "learning_rate": 8.303800898186334e-06, "loss": 0.5446, "step": 2237 }, { "epoch": 1.0875379939209726, "grad_norm": 0.07380580777069042, "learning_rate": 8.302363509298301e-06, "loss": 0.5906, "step": 2238 }, { "epoch": 1.0880243161094225, "grad_norm": 0.0729766720411248, "learning_rate": 8.300925636156159e-06, "loss": 0.5316, "step": 2239 }, { "epoch": 1.0885106382978724, "grad_norm": 0.07237347169294653, "learning_rate": 8.299487278970759e-06, "loss": 0.5441, "step": 2240 }, { "epoch": 1.088996960486322, "grad_norm": 0.07251047475207656, "learning_rate": 8.298048437953016e-06, "loss": 0.5296, "step": 2241 }, { "epoch": 1.089483282674772, "grad_norm": 0.06924194952532757, "learning_rate": 8.296609113313922e-06, "loss": 0.5259, "step": 2242 }, { "epoch": 1.089969604863222, "grad_norm": 0.07397881628415039, "learning_rate": 8.295169305264537e-06, "loss": 0.5775, "step": 2243 }, { "epoch": 1.0904559270516718, "grad_norm": 0.07407185351497901, "learning_rate": 8.293729014015992e-06, "loss": 0.566, "step": 2244 }, { "epoch": 1.0909422492401215, "grad_norm": 0.07206987544427718, "learning_rate": 8.292288239779488e-06, "loss": 0.5652, "step": 2245 }, { "epoch": 1.0914285714285714, "grad_norm": 0.07477198142835094, "learning_rate": 8.290846982766305e-06, "loss": 0.5506, "step": 2246 }, { "epoch": 1.0919148936170213, "grad_norm": 0.07233292901579166, "learning_rate": 8.289405243187778e-06, "loss": 0.5605, "step": 2247 }, { "epoch": 1.0924012158054712, "grad_norm": 0.07297230216106336, "learning_rate": 8.287963021255328e-06, "loss": 0.5256, "step": 2248 }, { "epoch": 1.092887537993921, "grad_norm": 0.07555470857502421, "learning_rate": 8.286520317180436e-06, "loss": 0.5665, "step": 2249 }, { "epoch": 1.0933738601823708, "grad_norm": 0.07414197887063483, "learning_rate": 8.285077131174661e-06, "loss": 0.5274, "step": 2250 }, { "epoch": 1.0938601823708207, "grad_norm": 0.0708868582550793, "learning_rate": 8.283633463449632e-06, "loss": 0.5679, "step": 2251 }, { "epoch": 1.0943465045592706, "grad_norm": 0.07733633582481737, "learning_rate": 8.282189314217041e-06, "loss": 0.5828, "step": 2252 }, { "epoch": 1.0948328267477203, "grad_norm": 0.07256848277708165, "learning_rate": 8.28074468368866e-06, "loss": 0.5363, "step": 2253 }, { "epoch": 1.0953191489361702, "grad_norm": 0.08374072509978993, "learning_rate": 8.279299572076325e-06, "loss": 0.598, "step": 2254 }, { "epoch": 1.0958054711246201, "grad_norm": 0.07690293949297158, "learning_rate": 8.277853979591947e-06, "loss": 0.5707, "step": 2255 }, { "epoch": 1.0962917933130698, "grad_norm": 0.07592035379760226, "learning_rate": 8.276407906447506e-06, "loss": 0.5752, "step": 2256 }, { "epoch": 1.0967781155015197, "grad_norm": 0.0738589124225445, "learning_rate": 8.274961352855052e-06, "loss": 0.5505, "step": 2257 }, { "epoch": 1.0972644376899696, "grad_norm": 0.07401731823831176, "learning_rate": 8.273514319026704e-06, "loss": 0.5775, "step": 2258 }, { "epoch": 1.0977507598784195, "grad_norm": 0.07492825265528798, "learning_rate": 8.272066805174656e-06, "loss": 0.5767, "step": 2259 }, { "epoch": 1.0982370820668692, "grad_norm": 0.07443394569593802, "learning_rate": 8.270618811511166e-06, "loss": 0.5761, "step": 2260 }, { "epoch": 1.0987234042553191, "grad_norm": 0.0747289140034974, "learning_rate": 8.269170338248569e-06, "loss": 0.5767, "step": 2261 }, { "epoch": 1.099209726443769, "grad_norm": 0.07274394752087299, "learning_rate": 8.267721385599265e-06, "loss": 0.5221, "step": 2262 }, { "epoch": 1.099696048632219, "grad_norm": 0.07658406691689554, "learning_rate": 8.266271953775729e-06, "loss": 0.5909, "step": 2263 }, { "epoch": 1.1001823708206686, "grad_norm": 0.07421433969434833, "learning_rate": 8.2648220429905e-06, "loss": 0.5568, "step": 2264 }, { "epoch": 1.1006686930091185, "grad_norm": 0.072874222443183, "learning_rate": 8.263371653456193e-06, "loss": 0.5493, "step": 2265 }, { "epoch": 1.1011550151975684, "grad_norm": 0.06875869349577292, "learning_rate": 8.26192078538549e-06, "loss": 0.5013, "step": 2266 }, { "epoch": 1.1016413373860183, "grad_norm": 0.07111814410994231, "learning_rate": 8.260469438991147e-06, "loss": 0.5305, "step": 2267 }, { "epoch": 1.102127659574468, "grad_norm": 0.07018516477532395, "learning_rate": 8.259017614485987e-06, "loss": 0.5533, "step": 2268 }, { "epoch": 1.102613981762918, "grad_norm": 0.07599691791673922, "learning_rate": 8.2575653120829e-06, "loss": 0.5559, "step": 2269 }, { "epoch": 1.1031003039513678, "grad_norm": 0.07667748011043014, "learning_rate": 8.256112531994855e-06, "loss": 0.5499, "step": 2270 }, { "epoch": 1.1035866261398177, "grad_norm": 0.07102692839201065, "learning_rate": 8.25465927443488e-06, "loss": 0.5686, "step": 2271 }, { "epoch": 1.1040729483282674, "grad_norm": 0.0713289531214979, "learning_rate": 8.253205539616083e-06, "loss": 0.5698, "step": 2272 }, { "epoch": 1.1045592705167173, "grad_norm": 0.07311010381473355, "learning_rate": 8.251751327751636e-06, "loss": 0.5533, "step": 2273 }, { "epoch": 1.1050455927051672, "grad_norm": 0.07378183356170921, "learning_rate": 8.250296639054782e-06, "loss": 0.5738, "step": 2274 }, { "epoch": 1.105531914893617, "grad_norm": 0.06968460024663366, "learning_rate": 8.248841473738836e-06, "loss": 0.54, "step": 2275 }, { "epoch": 1.1060182370820668, "grad_norm": 0.07682691308786616, "learning_rate": 8.247385832017182e-06, "loss": 0.5986, "step": 2276 }, { "epoch": 1.1065045592705167, "grad_norm": 0.0707884963401753, "learning_rate": 8.24592971410327e-06, "loss": 0.5275, "step": 2277 }, { "epoch": 1.1069908814589666, "grad_norm": 0.07164579476861906, "learning_rate": 8.244473120210628e-06, "loss": 0.5513, "step": 2278 }, { "epoch": 1.1074772036474165, "grad_norm": 0.07290309222367687, "learning_rate": 8.243016050552843e-06, "loss": 0.5723, "step": 2279 }, { "epoch": 1.1079635258358662, "grad_norm": 0.07797427227668575, "learning_rate": 8.241558505343584e-06, "loss": 0.6045, "step": 2280 }, { "epoch": 1.1084498480243161, "grad_norm": 0.07626168056439447, "learning_rate": 8.240100484796581e-06, "loss": 0.5946, "step": 2281 }, { "epoch": 1.108936170212766, "grad_norm": 0.07078858084019193, "learning_rate": 8.238641989125633e-06, "loss": 0.5175, "step": 2282 }, { "epoch": 1.1094224924012157, "grad_norm": 0.07316825569301089, "learning_rate": 8.237183018544617e-06, "loss": 0.5412, "step": 2283 }, { "epoch": 1.1099088145896656, "grad_norm": 0.0735351716808662, "learning_rate": 8.23572357326747e-06, "loss": 0.5534, "step": 2284 }, { "epoch": 1.1103951367781155, "grad_norm": 0.07391375176435087, "learning_rate": 8.234263653508205e-06, "loss": 0.566, "step": 2285 }, { "epoch": 1.1108814589665654, "grad_norm": 0.07628396721234228, "learning_rate": 8.232803259480903e-06, "loss": 0.5795, "step": 2286 }, { "epoch": 1.111367781155015, "grad_norm": 0.07346587884926059, "learning_rate": 8.231342391399715e-06, "loss": 0.5421, "step": 2287 }, { "epoch": 1.111854103343465, "grad_norm": 0.07084422266219942, "learning_rate": 8.229881049478859e-06, "loss": 0.5736, "step": 2288 }, { "epoch": 1.112340425531915, "grad_norm": 0.07085885676595888, "learning_rate": 8.228419233932625e-06, "loss": 0.557, "step": 2289 }, { "epoch": 1.1128267477203648, "grad_norm": 0.07310875931804299, "learning_rate": 8.226956944975371e-06, "loss": 0.5325, "step": 2290 }, { "epoch": 1.1133130699088145, "grad_norm": 0.0713775887788025, "learning_rate": 8.225494182821526e-06, "loss": 0.5739, "step": 2291 }, { "epoch": 1.1137993920972644, "grad_norm": 0.07465749052223648, "learning_rate": 8.224030947685588e-06, "loss": 0.5519, "step": 2292 }, { "epoch": 1.1142857142857143, "grad_norm": 0.07363670927009504, "learning_rate": 8.222567239782122e-06, "loss": 0.5485, "step": 2293 }, { "epoch": 1.1147720364741642, "grad_norm": 0.07347708471026415, "learning_rate": 8.221103059325764e-06, "loss": 0.572, "step": 2294 }, { "epoch": 1.115258358662614, "grad_norm": 0.07382179111154043, "learning_rate": 8.21963840653122e-06, "loss": 0.5408, "step": 2295 }, { "epoch": 1.1157446808510638, "grad_norm": 0.0725446340206082, "learning_rate": 8.218173281613266e-06, "loss": 0.5648, "step": 2296 }, { "epoch": 1.1162310030395137, "grad_norm": 0.07297901421589112, "learning_rate": 8.216707684786747e-06, "loss": 0.5075, "step": 2297 }, { "epoch": 1.1167173252279636, "grad_norm": 0.07355275080077124, "learning_rate": 8.215241616266572e-06, "loss": 0.5676, "step": 2298 }, { "epoch": 1.1172036474164133, "grad_norm": 0.0780838701470014, "learning_rate": 8.213775076267725e-06, "loss": 0.5872, "step": 2299 }, { "epoch": 1.1176899696048632, "grad_norm": 0.07437635816784668, "learning_rate": 8.212308065005258e-06, "loss": 0.5528, "step": 2300 }, { "epoch": 1.1181762917933131, "grad_norm": 0.07255265887762759, "learning_rate": 8.210840582694292e-06, "loss": 0.5425, "step": 2301 }, { "epoch": 1.1186626139817628, "grad_norm": 0.07222145230841452, "learning_rate": 8.209372629550018e-06, "loss": 0.5325, "step": 2302 }, { "epoch": 1.1191489361702127, "grad_norm": 0.0756938581315832, "learning_rate": 8.20790420578769e-06, "loss": 0.5757, "step": 2303 }, { "epoch": 1.1196352583586626, "grad_norm": 0.07315454133058819, "learning_rate": 8.206435311622641e-06, "loss": 0.5478, "step": 2304 }, { "epoch": 1.1201215805471125, "grad_norm": 0.07108540784682003, "learning_rate": 8.204965947270263e-06, "loss": 0.5295, "step": 2305 }, { "epoch": 1.1206079027355622, "grad_norm": 0.07104173942728903, "learning_rate": 8.203496112946024e-06, "loss": 0.5281, "step": 2306 }, { "epoch": 1.1210942249240121, "grad_norm": 0.07256337848384752, "learning_rate": 8.202025808865457e-06, "loss": 0.5636, "step": 2307 }, { "epoch": 1.121580547112462, "grad_norm": 0.07464557496901748, "learning_rate": 8.20055503524417e-06, "loss": 0.5596, "step": 2308 }, { "epoch": 1.122066869300912, "grad_norm": 0.06911534907101134, "learning_rate": 8.199083792297828e-06, "loss": 0.5198, "step": 2309 }, { "epoch": 1.1225531914893616, "grad_norm": 0.07364494487904293, "learning_rate": 8.197612080242176e-06, "loss": 0.5522, "step": 2310 }, { "epoch": 1.1230395136778115, "grad_norm": 0.0776368366618566, "learning_rate": 8.196139899293026e-06, "loss": 0.605, "step": 2311 }, { "epoch": 1.1235258358662614, "grad_norm": 0.07438549464229299, "learning_rate": 8.194667249666252e-06, "loss": 0.5465, "step": 2312 }, { "epoch": 1.1240121580547113, "grad_norm": 0.07548215385303386, "learning_rate": 8.193194131577807e-06, "loss": 0.5587, "step": 2313 }, { "epoch": 1.124498480243161, "grad_norm": 0.09125662242222138, "learning_rate": 8.191720545243702e-06, "loss": 0.5984, "step": 2314 }, { "epoch": 1.124984802431611, "grad_norm": 0.07428471493036362, "learning_rate": 8.190246490880022e-06, "loss": 0.5864, "step": 2315 }, { "epoch": 1.1254711246200608, "grad_norm": 0.07233519563341763, "learning_rate": 8.188771968702924e-06, "loss": 0.5337, "step": 2316 }, { "epoch": 1.1259574468085107, "grad_norm": 0.07468656713441423, "learning_rate": 8.187296978928626e-06, "loss": 0.5479, "step": 2317 }, { "epoch": 1.1264437689969604, "grad_norm": 0.07203886426181973, "learning_rate": 8.18582152177342e-06, "loss": 0.5825, "step": 2318 }, { "epoch": 1.1269300911854103, "grad_norm": 0.07114598694440448, "learning_rate": 8.184345597453668e-06, "loss": 0.5387, "step": 2319 }, { "epoch": 1.1274164133738602, "grad_norm": 0.07372910166719628, "learning_rate": 8.182869206185793e-06, "loss": 0.5398, "step": 2320 }, { "epoch": 1.12790273556231, "grad_norm": 0.07590526077415068, "learning_rate": 8.181392348186292e-06, "loss": 0.5646, "step": 2321 }, { "epoch": 1.1283890577507598, "grad_norm": 0.07348618862183959, "learning_rate": 8.17991502367173e-06, "loss": 0.5337, "step": 2322 }, { "epoch": 1.1288753799392097, "grad_norm": 0.07230980408532811, "learning_rate": 8.178437232858743e-06, "loss": 0.5361, "step": 2323 }, { "epoch": 1.1293617021276596, "grad_norm": 0.07326929881544383, "learning_rate": 8.176958975964027e-06, "loss": 0.5531, "step": 2324 }, { "epoch": 1.1298480243161095, "grad_norm": 0.07568903508588357, "learning_rate": 8.175480253204354e-06, "loss": 0.5344, "step": 2325 }, { "epoch": 1.1303343465045592, "grad_norm": 0.07817467765739233, "learning_rate": 8.174001064796561e-06, "loss": 0.6008, "step": 2326 }, { "epoch": 1.1308206686930091, "grad_norm": 0.07577221864945845, "learning_rate": 8.172521410957556e-06, "loss": 0.5538, "step": 2327 }, { "epoch": 1.131306990881459, "grad_norm": 0.08501073684948966, "learning_rate": 8.171041291904314e-06, "loss": 0.5968, "step": 2328 }, { "epoch": 1.1317933130699087, "grad_norm": 0.07583377031660356, "learning_rate": 8.169560707853875e-06, "loss": 0.5612, "step": 2329 }, { "epoch": 1.1322796352583586, "grad_norm": 0.07264208084143413, "learning_rate": 8.168079659023349e-06, "loss": 0.5479, "step": 2330 }, { "epoch": 1.1327659574468085, "grad_norm": 0.07409268626914883, "learning_rate": 8.16659814562992e-06, "loss": 0.547, "step": 2331 }, { "epoch": 1.1332522796352584, "grad_norm": 0.07250890432028849, "learning_rate": 8.16511616789083e-06, "loss": 0.5613, "step": 2332 }, { "epoch": 1.1337386018237081, "grad_norm": 0.07470000985304677, "learning_rate": 8.163633726023397e-06, "loss": 0.5722, "step": 2333 }, { "epoch": 1.134224924012158, "grad_norm": 0.07379213284018293, "learning_rate": 8.162150820245005e-06, "loss": 0.5662, "step": 2334 }, { "epoch": 1.134711246200608, "grad_norm": 0.07258929104067394, "learning_rate": 8.1606674507731e-06, "loss": 0.5402, "step": 2335 }, { "epoch": 1.1351975683890578, "grad_norm": 0.07584517661540127, "learning_rate": 8.159183617825208e-06, "loss": 0.5801, "step": 2336 }, { "epoch": 1.1356838905775075, "grad_norm": 0.07251319222361043, "learning_rate": 8.157699321618912e-06, "loss": 0.5039, "step": 2337 }, { "epoch": 1.1361702127659574, "grad_norm": 0.07237410467070883, "learning_rate": 8.156214562371872e-06, "loss": 0.5528, "step": 2338 }, { "epoch": 1.1366565349544073, "grad_norm": 0.07391677045271353, "learning_rate": 8.154729340301803e-06, "loss": 0.5409, "step": 2339 }, { "epoch": 1.1371428571428572, "grad_norm": 0.07314975802655224, "learning_rate": 8.153243655626501e-06, "loss": 0.5556, "step": 2340 }, { "epoch": 1.137629179331307, "grad_norm": 0.07315068503315438, "learning_rate": 8.151757508563828e-06, "loss": 0.5752, "step": 2341 }, { "epoch": 1.1381155015197568, "grad_norm": 0.07444581458029567, "learning_rate": 8.150270899331704e-06, "loss": 0.5481, "step": 2342 }, { "epoch": 1.1386018237082067, "grad_norm": 0.08269614720845281, "learning_rate": 8.148783828148127e-06, "loss": 0.5925, "step": 2343 }, { "epoch": 1.1390881458966566, "grad_norm": 0.07279040538769634, "learning_rate": 8.147296295231158e-06, "loss": 0.6029, "step": 2344 }, { "epoch": 1.1395744680851063, "grad_norm": 0.07399682151655745, "learning_rate": 8.145808300798929e-06, "loss": 0.5589, "step": 2345 }, { "epoch": 1.1400607902735562, "grad_norm": 0.07046144666284118, "learning_rate": 8.144319845069635e-06, "loss": 0.5361, "step": 2346 }, { "epoch": 1.1405471124620061, "grad_norm": 0.07476559736693897, "learning_rate": 8.14283092826154e-06, "loss": 0.5619, "step": 2347 }, { "epoch": 1.1410334346504558, "grad_norm": 0.07259863378385552, "learning_rate": 8.14134155059298e-06, "loss": 0.5454, "step": 2348 }, { "epoch": 1.1415197568389057, "grad_norm": 0.07290388885427557, "learning_rate": 8.139851712282354e-06, "loss": 0.5437, "step": 2349 }, { "epoch": 1.1420060790273556, "grad_norm": 0.07247692548828968, "learning_rate": 8.138361413548129e-06, "loss": 0.5889, "step": 2350 }, { "epoch": 1.1424924012158055, "grad_norm": 0.07299489110850316, "learning_rate": 8.136870654608842e-06, "loss": 0.5922, "step": 2351 }, { "epoch": 1.1429787234042554, "grad_norm": 0.07369963670994291, "learning_rate": 8.135379435683093e-06, "loss": 0.583, "step": 2352 }, { "epoch": 1.1434650455927051, "grad_norm": 0.07816873752694214, "learning_rate": 8.133887756989558e-06, "loss": 0.5877, "step": 2353 }, { "epoch": 1.143951367781155, "grad_norm": 0.07520663338779614, "learning_rate": 8.132395618746968e-06, "loss": 0.5627, "step": 2354 }, { "epoch": 1.144437689969605, "grad_norm": 0.07272354248253457, "learning_rate": 8.130903021174133e-06, "loss": 0.5721, "step": 2355 }, { "epoch": 1.1449240121580546, "grad_norm": 0.0721093954128523, "learning_rate": 8.129409964489922e-06, "loss": 0.5434, "step": 2356 }, { "epoch": 1.1454103343465045, "grad_norm": 0.07081119583169738, "learning_rate": 8.127916448913279e-06, "loss": 0.5178, "step": 2357 }, { "epoch": 1.1458966565349544, "grad_norm": 0.07449602768115966, "learning_rate": 8.126422474663205e-06, "loss": 0.5669, "step": 2358 }, { "epoch": 1.1463829787234043, "grad_norm": 0.07772632051068967, "learning_rate": 8.124928041958782e-06, "loss": 0.622, "step": 2359 }, { "epoch": 1.146869300911854, "grad_norm": 0.07740272001670627, "learning_rate": 8.123433151019145e-06, "loss": 0.5525, "step": 2360 }, { "epoch": 1.147355623100304, "grad_norm": 0.07279554553202595, "learning_rate": 8.121937802063506e-06, "loss": 0.5496, "step": 2361 }, { "epoch": 1.1478419452887538, "grad_norm": 0.0743936126173316, "learning_rate": 8.120441995311142e-06, "loss": 0.5526, "step": 2362 }, { "epoch": 1.1483282674772037, "grad_norm": 0.07477752989667144, "learning_rate": 8.118945730981391e-06, "loss": 0.5545, "step": 2363 }, { "epoch": 1.1488145896656534, "grad_norm": 0.07226408996123988, "learning_rate": 8.117449009293668e-06, "loss": 0.5303, "step": 2364 }, { "epoch": 1.1493009118541033, "grad_norm": 0.0744115269384535, "learning_rate": 8.11595183046745e-06, "loss": 0.5725, "step": 2365 }, { "epoch": 1.1497872340425532, "grad_norm": 0.07881001847565439, "learning_rate": 8.114454194722277e-06, "loss": 0.558, "step": 2366 }, { "epoch": 1.1502735562310031, "grad_norm": 0.07023724203490371, "learning_rate": 8.112956102277768e-06, "loss": 0.5424, "step": 2367 }, { "epoch": 1.1507598784194528, "grad_norm": 0.07304456908262345, "learning_rate": 8.111457553353593e-06, "loss": 0.5441, "step": 2368 }, { "epoch": 1.1512462006079027, "grad_norm": 0.07323779317396077, "learning_rate": 8.109958548169502e-06, "loss": 0.538, "step": 2369 }, { "epoch": 1.1517325227963526, "grad_norm": 0.07918191480121578, "learning_rate": 8.108459086945304e-06, "loss": 0.5225, "step": 2370 }, { "epoch": 1.1522188449848025, "grad_norm": 0.07288681865481134, "learning_rate": 8.10695916990088e-06, "loss": 0.5817, "step": 2371 }, { "epoch": 1.1527051671732522, "grad_norm": 0.07480566253435578, "learning_rate": 8.105458797256178e-06, "loss": 0.5153, "step": 2372 }, { "epoch": 1.1531914893617021, "grad_norm": 0.07325030843769754, "learning_rate": 8.103957969231209e-06, "loss": 0.5772, "step": 2373 }, { "epoch": 1.153677811550152, "grad_norm": 0.07258208535140075, "learning_rate": 8.102456686046049e-06, "loss": 0.576, "step": 2374 }, { "epoch": 1.1541641337386017, "grad_norm": 0.07551636128247924, "learning_rate": 8.100954947920848e-06, "loss": 0.5704, "step": 2375 }, { "epoch": 1.1546504559270516, "grad_norm": 0.07388579042972913, "learning_rate": 8.099452755075816e-06, "loss": 0.5568, "step": 2376 }, { "epoch": 1.1551367781155015, "grad_norm": 0.07007502527740003, "learning_rate": 8.097950107731233e-06, "loss": 0.5261, "step": 2377 }, { "epoch": 1.1556231003039514, "grad_norm": 0.0710994571298559, "learning_rate": 8.09644700610745e-06, "loss": 0.5547, "step": 2378 }, { "epoch": 1.1561094224924013, "grad_norm": 0.0725557276806488, "learning_rate": 8.094943450424874e-06, "loss": 0.5456, "step": 2379 }, { "epoch": 1.156595744680851, "grad_norm": 0.07050151057099496, "learning_rate": 8.093439440903988e-06, "loss": 0.5467, "step": 2380 }, { "epoch": 1.157082066869301, "grad_norm": 0.07348609928936829, "learning_rate": 8.091934977765335e-06, "loss": 0.5249, "step": 2381 }, { "epoch": 1.1575683890577508, "grad_norm": 0.07275185521050123, "learning_rate": 8.090430061229528e-06, "loss": 0.5669, "step": 2382 }, { "epoch": 1.1580547112462005, "grad_norm": 0.07142429747405558, "learning_rate": 8.088924691517246e-06, "loss": 0.5243, "step": 2383 }, { "epoch": 1.1585410334346504, "grad_norm": 0.0694540819435593, "learning_rate": 8.087418868849239e-06, "loss": 0.5458, "step": 2384 }, { "epoch": 1.1590273556231003, "grad_norm": 0.07172422515760193, "learning_rate": 8.08591259344631e-06, "loss": 0.5192, "step": 2385 }, { "epoch": 1.1595136778115502, "grad_norm": 0.07140406545204077, "learning_rate": 8.084405865529345e-06, "loss": 0.5989, "step": 2386 }, { "epoch": 1.16, "grad_norm": 0.06955197600711828, "learning_rate": 8.082898685319285e-06, "loss": 0.5427, "step": 2387 }, { "epoch": 1.1604863221884498, "grad_norm": 0.07217385558971261, "learning_rate": 8.081391053037141e-06, "loss": 0.5604, "step": 2388 }, { "epoch": 1.1609726443768997, "grad_norm": 0.0727474916000304, "learning_rate": 8.079882968903991e-06, "loss": 0.5709, "step": 2389 }, { "epoch": 1.1614589665653496, "grad_norm": 0.07219831244740385, "learning_rate": 8.078374433140978e-06, "loss": 0.5509, "step": 2390 }, { "epoch": 1.1619452887537993, "grad_norm": 0.07554260473766503, "learning_rate": 8.076865445969313e-06, "loss": 0.5894, "step": 2391 }, { "epoch": 1.1624316109422492, "grad_norm": 0.07242202238091798, "learning_rate": 8.07535600761027e-06, "loss": 0.5359, "step": 2392 }, { "epoch": 1.1629179331306991, "grad_norm": 0.0713802542785267, "learning_rate": 8.07384611828519e-06, "loss": 0.5151, "step": 2393 }, { "epoch": 1.1634042553191488, "grad_norm": 0.07036634028632871, "learning_rate": 8.072335778215482e-06, "loss": 0.5202, "step": 2394 }, { "epoch": 1.1638905775075987, "grad_norm": 0.07384883135264404, "learning_rate": 8.070824987622622e-06, "loss": 0.5426, "step": 2395 }, { "epoch": 1.1643768996960486, "grad_norm": 0.07494430281618236, "learning_rate": 8.069313746728149e-06, "loss": 0.537, "step": 2396 }, { "epoch": 1.1648632218844985, "grad_norm": 0.0708101239525627, "learning_rate": 8.067802055753668e-06, "loss": 0.5805, "step": 2397 }, { "epoch": 1.1653495440729484, "grad_norm": 0.07304393953637715, "learning_rate": 8.066289914920855e-06, "loss": 0.5805, "step": 2398 }, { "epoch": 1.1658358662613981, "grad_norm": 0.07248427654023368, "learning_rate": 8.064777324451445e-06, "loss": 0.5456, "step": 2399 }, { "epoch": 1.166322188449848, "grad_norm": 0.07168278920822603, "learning_rate": 8.063264284567245e-06, "loss": 0.5325, "step": 2400 }, { "epoch": 1.166808510638298, "grad_norm": 0.07260304773273424, "learning_rate": 8.061750795490121e-06, "loss": 0.5421, "step": 2401 }, { "epoch": 1.1672948328267476, "grad_norm": 0.07073240793528326, "learning_rate": 8.060236857442013e-06, "loss": 0.5444, "step": 2402 }, { "epoch": 1.1677811550151975, "grad_norm": 0.07124751379094929, "learning_rate": 8.058722470644919e-06, "loss": 0.5538, "step": 2403 }, { "epoch": 1.1682674772036474, "grad_norm": 0.07255706674488437, "learning_rate": 8.05720763532091e-06, "loss": 0.5956, "step": 2404 }, { "epoch": 1.1687537993920973, "grad_norm": 0.07514028642949143, "learning_rate": 8.055692351692118e-06, "loss": 0.5598, "step": 2405 }, { "epoch": 1.169240121580547, "grad_norm": 0.07435480683259586, "learning_rate": 8.054176619980742e-06, "loss": 0.575, "step": 2406 }, { "epoch": 1.169726443768997, "grad_norm": 0.06917363009187513, "learning_rate": 8.052660440409049e-06, "loss": 0.5447, "step": 2407 }, { "epoch": 1.1702127659574468, "grad_norm": 0.07660782953715432, "learning_rate": 8.051143813199366e-06, "loss": 0.5436, "step": 2408 }, { "epoch": 1.1706990881458967, "grad_norm": 0.07096661355467314, "learning_rate": 8.049626738574091e-06, "loss": 0.5357, "step": 2409 }, { "epoch": 1.1711854103343464, "grad_norm": 0.0723918015905883, "learning_rate": 8.048109216755687e-06, "loss": 0.564, "step": 2410 }, { "epoch": 1.1716717325227963, "grad_norm": 0.07240156024247478, "learning_rate": 8.046591247966677e-06, "loss": 0.5579, "step": 2411 }, { "epoch": 1.1721580547112462, "grad_norm": 0.07499967279942138, "learning_rate": 8.045072832429659e-06, "loss": 0.5298, "step": 2412 }, { "epoch": 1.1726443768996961, "grad_norm": 0.07206703218637224, "learning_rate": 8.043553970367289e-06, "loss": 0.5558, "step": 2413 }, { "epoch": 1.1731306990881458, "grad_norm": 0.07382353090754513, "learning_rate": 8.042034662002291e-06, "loss": 0.559, "step": 2414 }, { "epoch": 1.1736170212765957, "grad_norm": 0.07221139146654051, "learning_rate": 8.040514907557453e-06, "loss": 0.5544, "step": 2415 }, { "epoch": 1.1741033434650456, "grad_norm": 0.07118992274955663, "learning_rate": 8.038994707255634e-06, "loss": 0.5592, "step": 2416 }, { "epoch": 1.1745896656534955, "grad_norm": 0.07545508631132494, "learning_rate": 8.037474061319749e-06, "loss": 0.598, "step": 2417 }, { "epoch": 1.1750759878419452, "grad_norm": 0.07437658853441415, "learning_rate": 8.035952969972787e-06, "loss": 0.5671, "step": 2418 }, { "epoch": 1.1755623100303951, "grad_norm": 0.07134353693043526, "learning_rate": 8.034431433437796e-06, "loss": 0.5057, "step": 2419 }, { "epoch": 1.176048632218845, "grad_norm": 0.07567010649198576, "learning_rate": 8.032909451937894e-06, "loss": 0.5648, "step": 2420 }, { "epoch": 1.1765349544072947, "grad_norm": 0.07445581833328138, "learning_rate": 8.031387025696262e-06, "loss": 0.5437, "step": 2421 }, { "epoch": 1.1770212765957446, "grad_norm": 0.0724348871635926, "learning_rate": 8.029864154936147e-06, "loss": 0.5698, "step": 2422 }, { "epoch": 1.1775075987841945, "grad_norm": 0.07251554067984517, "learning_rate": 8.028340839880859e-06, "loss": 0.5499, "step": 2423 }, { "epoch": 1.1779939209726444, "grad_norm": 0.07270364518484779, "learning_rate": 8.026817080753777e-06, "loss": 0.523, "step": 2424 }, { "epoch": 1.1784802431610943, "grad_norm": 0.07547479780283077, "learning_rate": 8.025292877778341e-06, "loss": 0.6004, "step": 2425 }, { "epoch": 1.178966565349544, "grad_norm": 0.07493385206939647, "learning_rate": 8.02376823117806e-06, "loss": 0.6001, "step": 2426 }, { "epoch": 1.179452887537994, "grad_norm": 0.07283915499977123, "learning_rate": 8.022243141176504e-06, "loss": 0.5441, "step": 2427 }, { "epoch": 1.1799392097264438, "grad_norm": 0.07832562541333396, "learning_rate": 8.020717607997311e-06, "loss": 0.5782, "step": 2428 }, { "epoch": 1.1804255319148935, "grad_norm": 0.07315487304790864, "learning_rate": 8.019191631864185e-06, "loss": 0.5598, "step": 2429 }, { "epoch": 1.1809118541033434, "grad_norm": 0.07260609245093201, "learning_rate": 8.017665213000889e-06, "loss": 0.5425, "step": 2430 }, { "epoch": 1.1813981762917933, "grad_norm": 0.07519989191703304, "learning_rate": 8.016138351631259e-06, "loss": 0.5805, "step": 2431 }, { "epoch": 1.1818844984802432, "grad_norm": 0.07355054047266626, "learning_rate": 8.01461104797919e-06, "loss": 0.5557, "step": 2432 }, { "epoch": 1.182370820668693, "grad_norm": 0.07191380812111182, "learning_rate": 8.013083302268645e-06, "loss": 0.5486, "step": 2433 }, { "epoch": 1.1828571428571428, "grad_norm": 0.07376908579355418, "learning_rate": 8.011555114723648e-06, "loss": 0.567, "step": 2434 }, { "epoch": 1.1833434650455927, "grad_norm": 0.07125255458262628, "learning_rate": 8.010026485568292e-06, "loss": 0.5566, "step": 2435 }, { "epoch": 1.1838297872340426, "grad_norm": 0.07301185433898116, "learning_rate": 8.008497415026733e-06, "loss": 0.5267, "step": 2436 }, { "epoch": 1.1843161094224923, "grad_norm": 0.07171045719796212, "learning_rate": 8.006967903323192e-06, "loss": 0.5473, "step": 2437 }, { "epoch": 1.1848024316109422, "grad_norm": 0.07321562721300487, "learning_rate": 8.005437950681956e-06, "loss": 0.5404, "step": 2438 }, { "epoch": 1.1852887537993921, "grad_norm": 0.07683406798361234, "learning_rate": 8.003907557327371e-06, "loss": 0.5453, "step": 2439 }, { "epoch": 1.185775075987842, "grad_norm": 0.07503300807536474, "learning_rate": 8.002376723483855e-06, "loss": 0.5278, "step": 2440 }, { "epoch": 1.1862613981762917, "grad_norm": 0.07652952388338584, "learning_rate": 8.000845449375888e-06, "loss": 0.557, "step": 2441 }, { "epoch": 1.1867477203647416, "grad_norm": 0.07450488448116986, "learning_rate": 7.999313735228012e-06, "loss": 0.5365, "step": 2442 }, { "epoch": 1.1872340425531915, "grad_norm": 0.07159190223343588, "learning_rate": 7.997781581264837e-06, "loss": 0.5294, "step": 2443 }, { "epoch": 1.1877203647416414, "grad_norm": 0.06893698833420311, "learning_rate": 7.996248987711033e-06, "loss": 0.5199, "step": 2444 }, { "epoch": 1.1882066869300911, "grad_norm": 0.07420866832536976, "learning_rate": 7.994715954791341e-06, "loss": 0.5658, "step": 2445 }, { "epoch": 1.188693009118541, "grad_norm": 0.07652733654565654, "learning_rate": 7.993182482730562e-06, "loss": 0.5338, "step": 2446 }, { "epoch": 1.189179331306991, "grad_norm": 0.07666678117168962, "learning_rate": 7.991648571753561e-06, "loss": 0.5366, "step": 2447 }, { "epoch": 1.1896656534954406, "grad_norm": 0.07298374530670172, "learning_rate": 7.99011422208527e-06, "loss": 0.534, "step": 2448 }, { "epoch": 1.1901519756838905, "grad_norm": 0.07050531308264864, "learning_rate": 7.988579433950682e-06, "loss": 0.5688, "step": 2449 }, { "epoch": 1.1906382978723404, "grad_norm": 0.07497532154511899, "learning_rate": 7.987044207574858e-06, "loss": 0.5545, "step": 2450 }, { "epoch": 1.1911246200607903, "grad_norm": 0.07384194932322664, "learning_rate": 7.985508543182922e-06, "loss": 0.5231, "step": 2451 }, { "epoch": 1.1916109422492402, "grad_norm": 0.0727618724432849, "learning_rate": 7.98397244100006e-06, "loss": 0.5358, "step": 2452 }, { "epoch": 1.19209726443769, "grad_norm": 0.075666823132178, "learning_rate": 7.982435901251527e-06, "loss": 0.5779, "step": 2453 }, { "epoch": 1.1925835866261398, "grad_norm": 0.07296200200723844, "learning_rate": 7.980898924162634e-06, "loss": 0.5466, "step": 2454 }, { "epoch": 1.1930699088145897, "grad_norm": 0.07121593121910853, "learning_rate": 7.979361509958764e-06, "loss": 0.5544, "step": 2455 }, { "epoch": 1.1935562310030394, "grad_norm": 0.07400329717840662, "learning_rate": 7.977823658865364e-06, "loss": 0.541, "step": 2456 }, { "epoch": 1.1940425531914893, "grad_norm": 0.0724387865765923, "learning_rate": 7.976285371107937e-06, "loss": 0.569, "step": 2457 }, { "epoch": 1.1945288753799392, "grad_norm": 0.07045253743964261, "learning_rate": 7.97474664691206e-06, "loss": 0.5806, "step": 2458 }, { "epoch": 1.1950151975683891, "grad_norm": 0.07472049799434143, "learning_rate": 7.973207486503368e-06, "loss": 0.5542, "step": 2459 }, { "epoch": 1.1955015197568388, "grad_norm": 0.07230265384847155, "learning_rate": 7.971667890107561e-06, "loss": 0.5308, "step": 2460 }, { "epoch": 1.1959878419452887, "grad_norm": 0.07165969078477409, "learning_rate": 7.970127857950403e-06, "loss": 0.5442, "step": 2461 }, { "epoch": 1.1964741641337386, "grad_norm": 0.07141666260760997, "learning_rate": 7.968587390257723e-06, "loss": 0.5392, "step": 2462 }, { "epoch": 1.1969604863221885, "grad_norm": 0.073726461087926, "learning_rate": 7.967046487255412e-06, "loss": 0.5591, "step": 2463 }, { "epoch": 1.1974468085106382, "grad_norm": 0.07365654959312602, "learning_rate": 7.965505149169428e-06, "loss": 0.5534, "step": 2464 }, { "epoch": 1.1979331306990881, "grad_norm": 0.07256725216797784, "learning_rate": 7.963963376225788e-06, "loss": 0.5184, "step": 2465 }, { "epoch": 1.198419452887538, "grad_norm": 0.0722980832197576, "learning_rate": 7.962421168650576e-06, "loss": 0.5258, "step": 2466 }, { "epoch": 1.1989057750759877, "grad_norm": 0.07002800580779552, "learning_rate": 7.960878526669942e-06, "loss": 0.533, "step": 2467 }, { "epoch": 1.1993920972644376, "grad_norm": 0.07528203964508505, "learning_rate": 7.959335450510095e-06, "loss": 0.5561, "step": 2468 }, { "epoch": 1.1998784194528875, "grad_norm": 0.07609456104563551, "learning_rate": 7.957791940397309e-06, "loss": 0.5808, "step": 2469 }, { "epoch": 1.2003647416413374, "grad_norm": 0.06919684922827463, "learning_rate": 7.956247996557924e-06, "loss": 0.5209, "step": 2470 }, { "epoch": 1.2008510638297873, "grad_norm": 0.0732215846563845, "learning_rate": 7.95470361921834e-06, "loss": 0.5601, "step": 2471 }, { "epoch": 1.201337386018237, "grad_norm": 0.07671494773527357, "learning_rate": 7.953158808605023e-06, "loss": 0.6011, "step": 2472 }, { "epoch": 1.201823708206687, "grad_norm": 0.07531487021854413, "learning_rate": 7.951613564944502e-06, "loss": 0.599, "step": 2473 }, { "epoch": 1.2023100303951368, "grad_norm": 0.0718495963689583, "learning_rate": 7.95006788846337e-06, "loss": 0.5438, "step": 2474 }, { "epoch": 1.2027963525835865, "grad_norm": 0.0705459930335682, "learning_rate": 7.94852177938828e-06, "loss": 0.5155, "step": 2475 }, { "epoch": 1.2032826747720364, "grad_norm": 0.07212110106373414, "learning_rate": 7.946975237945958e-06, "loss": 0.5541, "step": 2476 }, { "epoch": 1.2037689969604863, "grad_norm": 0.07152013637987481, "learning_rate": 7.94542826436318e-06, "loss": 0.5142, "step": 2477 }, { "epoch": 1.2042553191489362, "grad_norm": 0.07505027355903858, "learning_rate": 7.943880858866794e-06, "loss": 0.5644, "step": 2478 }, { "epoch": 1.204741641337386, "grad_norm": 0.07345249967963459, "learning_rate": 7.942333021683712e-06, "loss": 0.5288, "step": 2479 }, { "epoch": 1.2052279635258358, "grad_norm": 0.0710178381330521, "learning_rate": 7.940784753040903e-06, "loss": 0.5588, "step": 2480 }, { "epoch": 1.2057142857142857, "grad_norm": 0.07389132534313793, "learning_rate": 7.939236053165404e-06, "loss": 0.5495, "step": 2481 }, { "epoch": 1.2062006079027356, "grad_norm": 0.07367613047760858, "learning_rate": 7.937686922284319e-06, "loss": 0.528, "step": 2482 }, { "epoch": 1.2066869300911853, "grad_norm": 0.0704449840835992, "learning_rate": 7.936137360624802e-06, "loss": 0.536, "step": 2483 }, { "epoch": 1.2071732522796352, "grad_norm": 0.07098458523649463, "learning_rate": 7.934587368414085e-06, "loss": 0.5548, "step": 2484 }, { "epoch": 1.2076595744680851, "grad_norm": 0.07141953262171628, "learning_rate": 7.933036945879455e-06, "loss": 0.5372, "step": 2485 }, { "epoch": 1.208145896656535, "grad_norm": 0.07441054373557604, "learning_rate": 7.931486093248263e-06, "loss": 0.5808, "step": 2486 }, { "epoch": 1.2086322188449847, "grad_norm": 0.07615638168102483, "learning_rate": 7.929934810747926e-06, "loss": 0.5867, "step": 2487 }, { "epoch": 1.2091185410334346, "grad_norm": 0.07317579164594856, "learning_rate": 7.928383098605921e-06, "loss": 0.5749, "step": 2488 }, { "epoch": 1.2096048632218845, "grad_norm": 0.07023029276349807, "learning_rate": 7.926830957049787e-06, "loss": 0.5434, "step": 2489 }, { "epoch": 1.2100911854103344, "grad_norm": 0.07346701164137073, "learning_rate": 7.92527838630713e-06, "loss": 0.588, "step": 2490 }, { "epoch": 1.2105775075987841, "grad_norm": 0.0720930157034673, "learning_rate": 7.923725386605617e-06, "loss": 0.5407, "step": 2491 }, { "epoch": 1.211063829787234, "grad_norm": 0.07560890263630829, "learning_rate": 7.922171958172976e-06, "loss": 0.5613, "step": 2492 }, { "epoch": 1.211550151975684, "grad_norm": 0.07103108081036084, "learning_rate": 7.920618101237001e-06, "loss": 0.5719, "step": 2493 }, { "epoch": 1.2120364741641336, "grad_norm": 0.07036796985177463, "learning_rate": 7.919063816025547e-06, "loss": 0.5442, "step": 2494 }, { "epoch": 1.2125227963525835, "grad_norm": 0.07682791215301267, "learning_rate": 7.917509102766535e-06, "loss": 0.6024, "step": 2495 }, { "epoch": 1.2130091185410334, "grad_norm": 0.0698638951979365, "learning_rate": 7.915953961687942e-06, "loss": 0.5412, "step": 2496 }, { "epoch": 1.2134954407294833, "grad_norm": 0.07205491035653111, "learning_rate": 7.914398393017812e-06, "loss": 0.5612, "step": 2497 }, { "epoch": 1.2139817629179332, "grad_norm": 0.07381386928626457, "learning_rate": 7.912842396984256e-06, "loss": 0.568, "step": 2498 }, { "epoch": 1.214468085106383, "grad_norm": 0.07378946035796669, "learning_rate": 7.911285973815437e-06, "loss": 0.5599, "step": 2499 }, { "epoch": 1.2149544072948328, "grad_norm": 0.07209503485870092, "learning_rate": 7.90972912373959e-06, "loss": 0.567, "step": 2500 }, { "epoch": 1.2154407294832827, "grad_norm": 0.07304474231827386, "learning_rate": 7.90817184698501e-06, "loss": 0.5433, "step": 2501 }, { "epoch": 1.2159270516717324, "grad_norm": 0.0737088694280214, "learning_rate": 7.906614143780053e-06, "loss": 0.5726, "step": 2502 }, { "epoch": 1.2164133738601823, "grad_norm": 0.0726806891844952, "learning_rate": 7.905056014353139e-06, "loss": 0.5425, "step": 2503 }, { "epoch": 1.2168996960486322, "grad_norm": 0.07055461388876123, "learning_rate": 7.903497458932749e-06, "loss": 0.5309, "step": 2504 }, { "epoch": 1.2173860182370821, "grad_norm": 0.07089727983976464, "learning_rate": 7.901938477747428e-06, "loss": 0.541, "step": 2505 }, { "epoch": 1.2178723404255318, "grad_norm": 0.07313922186526196, "learning_rate": 7.900379071025783e-06, "loss": 0.5774, "step": 2506 }, { "epoch": 1.2183586626139817, "grad_norm": 0.07319577197309318, "learning_rate": 7.898819238996484e-06, "loss": 0.5606, "step": 2507 }, { "epoch": 1.2188449848024316, "grad_norm": 0.07321982527566595, "learning_rate": 7.897258981888261e-06, "loss": 0.5421, "step": 2508 }, { "epoch": 1.2193313069908815, "grad_norm": 0.07783215847254792, "learning_rate": 7.895698299929909e-06, "loss": 0.5763, "step": 2509 }, { "epoch": 1.2198176291793312, "grad_norm": 0.07221243246103402, "learning_rate": 7.894137193350284e-06, "loss": 0.5462, "step": 2510 }, { "epoch": 1.2203039513677811, "grad_norm": 0.07006969259138601, "learning_rate": 7.892575662378306e-06, "loss": 0.5438, "step": 2511 }, { "epoch": 1.220790273556231, "grad_norm": 0.07150545694044028, "learning_rate": 7.891013707242953e-06, "loss": 0.517, "step": 2512 }, { "epoch": 1.2212765957446807, "grad_norm": 0.07218882606700625, "learning_rate": 7.88945132817327e-06, "loss": 0.5365, "step": 2513 }, { "epoch": 1.2217629179331306, "grad_norm": 0.07476785491518514, "learning_rate": 7.887888525398362e-06, "loss": 0.5429, "step": 2514 }, { "epoch": 1.2222492401215805, "grad_norm": 0.07353948350864925, "learning_rate": 7.886325299147394e-06, "loss": 0.5549, "step": 2515 }, { "epoch": 1.2227355623100304, "grad_norm": 0.07417135655598107, "learning_rate": 7.8847616496496e-06, "loss": 0.5999, "step": 2516 }, { "epoch": 1.2232218844984803, "grad_norm": 0.07233066218840994, "learning_rate": 7.883197577134267e-06, "loss": 0.5698, "step": 2517 }, { "epoch": 1.22370820668693, "grad_norm": 0.07085273845507489, "learning_rate": 7.881633081830751e-06, "loss": 0.5273, "step": 2518 }, { "epoch": 1.22419452887538, "grad_norm": 0.07097689357999455, "learning_rate": 7.880068163968467e-06, "loss": 0.5643, "step": 2519 }, { "epoch": 1.2246808510638298, "grad_norm": 0.07339634432025, "learning_rate": 7.878502823776892e-06, "loss": 0.5712, "step": 2520 }, { "epoch": 1.2251671732522795, "grad_norm": 0.07539123662166866, "learning_rate": 7.876937061485563e-06, "loss": 0.5967, "step": 2521 }, { "epoch": 1.2256534954407294, "grad_norm": 0.07139334714072847, "learning_rate": 7.875370877324086e-06, "loss": 0.5605, "step": 2522 }, { "epoch": 1.2261398176291793, "grad_norm": 0.07813376555309574, "learning_rate": 7.873804271522122e-06, "loss": 0.5481, "step": 2523 }, { "epoch": 1.2266261398176292, "grad_norm": 0.07152899008994364, "learning_rate": 7.872237244309395e-06, "loss": 0.568, "step": 2524 }, { "epoch": 1.2271124620060792, "grad_norm": 0.07419754228040835, "learning_rate": 7.870669795915692e-06, "loss": 0.5456, "step": 2525 }, { "epoch": 1.2275987841945288, "grad_norm": 0.07102799209861953, "learning_rate": 7.869101926570864e-06, "loss": 0.5572, "step": 2526 }, { "epoch": 1.2280851063829787, "grad_norm": 0.0719424005075752, "learning_rate": 7.867533636504818e-06, "loss": 0.5326, "step": 2527 }, { "epoch": 1.2285714285714286, "grad_norm": 0.07174277234768475, "learning_rate": 7.865964925947526e-06, "loss": 0.5407, "step": 2528 }, { "epoch": 1.2290577507598783, "grad_norm": 0.07079182926445766, "learning_rate": 7.864395795129025e-06, "loss": 0.5221, "step": 2529 }, { "epoch": 1.2295440729483282, "grad_norm": 0.07329877496339461, "learning_rate": 7.862826244279406e-06, "loss": 0.5573, "step": 2530 }, { "epoch": 1.2300303951367781, "grad_norm": 0.07312771116259933, "learning_rate": 7.86125627362883e-06, "loss": 0.5633, "step": 2531 }, { "epoch": 1.230516717325228, "grad_norm": 0.07131109345688764, "learning_rate": 7.859685883407513e-06, "loss": 0.5306, "step": 2532 }, { "epoch": 1.2310030395136777, "grad_norm": 0.07531301797415688, "learning_rate": 7.858115073845733e-06, "loss": 0.5573, "step": 2533 }, { "epoch": 1.2314893617021276, "grad_norm": 0.07141081234675177, "learning_rate": 7.856543845173836e-06, "loss": 0.5641, "step": 2534 }, { "epoch": 1.2319756838905775, "grad_norm": 0.07323072910414083, "learning_rate": 7.854972197622221e-06, "loss": 0.5543, "step": 2535 }, { "epoch": 1.2324620060790275, "grad_norm": 0.06860959496601561, "learning_rate": 7.853400131421353e-06, "loss": 0.5372, "step": 2536 }, { "epoch": 1.2329483282674771, "grad_norm": 0.07285360241160728, "learning_rate": 7.85182764680176e-06, "loss": 0.5862, "step": 2537 }, { "epoch": 1.233434650455927, "grad_norm": 0.07160352449303746, "learning_rate": 7.850254743994026e-06, "loss": 0.5564, "step": 2538 }, { "epoch": 1.233920972644377, "grad_norm": 0.07488460756657045, "learning_rate": 7.848681423228799e-06, "loss": 0.5826, "step": 2539 }, { "epoch": 1.2344072948328266, "grad_norm": 0.07029839485982914, "learning_rate": 7.847107684736792e-06, "loss": 0.5283, "step": 2540 }, { "epoch": 1.2348936170212765, "grad_norm": 0.07618170457925849, "learning_rate": 7.845533528748774e-06, "loss": 0.545, "step": 2541 }, { "epoch": 1.2353799392097264, "grad_norm": 0.07119841948715123, "learning_rate": 7.843958955495579e-06, "loss": 0.5221, "step": 2542 }, { "epoch": 1.2358662613981763, "grad_norm": 0.07457691116419964, "learning_rate": 7.842383965208095e-06, "loss": 0.5499, "step": 2543 }, { "epoch": 1.2363525835866263, "grad_norm": 0.07475554035386363, "learning_rate": 7.840808558117281e-06, "loss": 0.5988, "step": 2544 }, { "epoch": 1.236838905775076, "grad_norm": 0.07600818264050219, "learning_rate": 7.839232734454154e-06, "loss": 0.623, "step": 2545 }, { "epoch": 1.2373252279635258, "grad_norm": 0.07066056329461261, "learning_rate": 7.837656494449785e-06, "loss": 0.5365, "step": 2546 }, { "epoch": 1.2378115501519757, "grad_norm": 0.06929162205657466, "learning_rate": 7.836079838335317e-06, "loss": 0.5378, "step": 2547 }, { "epoch": 1.2382978723404254, "grad_norm": 0.07160701142561782, "learning_rate": 7.834502766341944e-06, "loss": 0.5392, "step": 2548 }, { "epoch": 1.2387841945288753, "grad_norm": 0.07760038183075246, "learning_rate": 7.83292527870093e-06, "loss": 0.5454, "step": 2549 }, { "epoch": 1.2392705167173252, "grad_norm": 0.07262611121677474, "learning_rate": 7.831347375643594e-06, "loss": 0.5338, "step": 2550 }, { "epoch": 1.2397568389057751, "grad_norm": 0.07438800345414766, "learning_rate": 7.829769057401316e-06, "loss": 0.5276, "step": 2551 }, { "epoch": 1.2402431610942248, "grad_norm": 0.07997838302733981, "learning_rate": 7.828190324205542e-06, "loss": 0.6197, "step": 2552 }, { "epoch": 1.2407294832826747, "grad_norm": 0.09832363003269558, "learning_rate": 7.826611176287772e-06, "loss": 0.5797, "step": 2553 }, { "epoch": 1.2412158054711246, "grad_norm": 0.07171182557266415, "learning_rate": 7.825031613879572e-06, "loss": 0.5561, "step": 2554 }, { "epoch": 1.2417021276595746, "grad_norm": 0.0738645441554879, "learning_rate": 7.823451637212564e-06, "loss": 0.5824, "step": 2555 }, { "epoch": 1.2421884498480242, "grad_norm": 0.0725554935230709, "learning_rate": 7.821871246518437e-06, "loss": 0.5284, "step": 2556 }, { "epoch": 1.2426747720364741, "grad_norm": 0.07570249684648245, "learning_rate": 7.820290442028937e-06, "loss": 0.5723, "step": 2557 }, { "epoch": 1.243161094224924, "grad_norm": 0.07630549253280923, "learning_rate": 7.81870922397587e-06, "loss": 0.5837, "step": 2558 }, { "epoch": 1.243647416413374, "grad_norm": 0.08104742521616841, "learning_rate": 7.817127592591105e-06, "loss": 0.6158, "step": 2559 }, { "epoch": 1.2441337386018236, "grad_norm": 0.07669906480136184, "learning_rate": 7.815545548106567e-06, "loss": 0.573, "step": 2560 }, { "epoch": 1.2446200607902735, "grad_norm": 0.07180462704135149, "learning_rate": 7.813963090754248e-06, "loss": 0.5387, "step": 2561 }, { "epoch": 1.2451063829787234, "grad_norm": 0.07095743048486752, "learning_rate": 7.812380220766195e-06, "loss": 0.5467, "step": 2562 }, { "epoch": 1.2455927051671734, "grad_norm": 0.06935717459609982, "learning_rate": 7.810796938374521e-06, "loss": 0.5322, "step": 2563 }, { "epoch": 1.246079027355623, "grad_norm": 0.07418734013166545, "learning_rate": 7.809213243811394e-06, "loss": 0.5531, "step": 2564 }, { "epoch": 1.246565349544073, "grad_norm": 0.07526756646521683, "learning_rate": 7.807629137309046e-06, "loss": 0.5553, "step": 2565 }, { "epoch": 1.2470516717325228, "grad_norm": 0.07588947083105696, "learning_rate": 7.806044619099767e-06, "loss": 0.5537, "step": 2566 }, { "epoch": 1.2475379939209725, "grad_norm": 0.0776090176055357, "learning_rate": 7.80445968941591e-06, "loss": 0.5816, "step": 2567 }, { "epoch": 1.2480243161094224, "grad_norm": 0.07796578492508335, "learning_rate": 7.802874348489887e-06, "loss": 0.5485, "step": 2568 }, { "epoch": 1.2485106382978723, "grad_norm": 0.07499229875326857, "learning_rate": 7.801288596554168e-06, "loss": 0.5727, "step": 2569 }, { "epoch": 1.2489969604863222, "grad_norm": 0.0703022327767209, "learning_rate": 7.799702433841288e-06, "loss": 0.5224, "step": 2570 }, { "epoch": 1.2489969604863222, "eval_loss": 0.5824215412139893, "eval_runtime": 105.1433, "eval_samples_per_second": 288.682, "eval_steps_per_second": 36.094, "step": 2570 }, { "epoch": 1.2494832826747722, "grad_norm": 0.07358938619390225, "learning_rate": 7.79811586058384e-06, "loss": 0.5753, "step": 2571 }, { "epoch": 1.2499696048632218, "grad_norm": 0.07051388390271865, "learning_rate": 7.796528877014474e-06, "loss": 0.4974, "step": 2572 }, { "epoch": 1.2504559270516717, "grad_norm": 0.07452573103011828, "learning_rate": 7.794941483365903e-06, "loss": 0.5566, "step": 2573 }, { "epoch": 1.2509422492401217, "grad_norm": 0.07810445928003949, "learning_rate": 7.793353679870906e-06, "loss": 0.5483, "step": 2574 }, { "epoch": 1.2514285714285713, "grad_norm": 0.07174413784254638, "learning_rate": 7.791765466762308e-06, "loss": 0.5169, "step": 2575 }, { "epoch": 1.2519148936170212, "grad_norm": 0.07524117931119266, "learning_rate": 7.79017684427301e-06, "loss": 0.5762, "step": 2576 }, { "epoch": 1.2524012158054711, "grad_norm": 0.07356261552811819, "learning_rate": 7.788587812635964e-06, "loss": 0.5639, "step": 2577 }, { "epoch": 1.252887537993921, "grad_norm": 0.07277492668062818, "learning_rate": 7.786998372084179e-06, "loss": 0.5437, "step": 2578 }, { "epoch": 1.253373860182371, "grad_norm": 0.07728796632913545, "learning_rate": 7.785408522850733e-06, "loss": 0.5447, "step": 2579 }, { "epoch": 1.2538601823708206, "grad_norm": 0.07466761671733702, "learning_rate": 7.783818265168756e-06, "loss": 0.562, "step": 2580 }, { "epoch": 1.2543465045592705, "grad_norm": 0.07274026823554142, "learning_rate": 7.782227599271443e-06, "loss": 0.5604, "step": 2581 }, { "epoch": 1.2548328267477205, "grad_norm": 0.07830210020324926, "learning_rate": 7.780636525392047e-06, "loss": 0.6249, "step": 2582 }, { "epoch": 1.2553191489361701, "grad_norm": 0.08748456334804489, "learning_rate": 7.779045043763883e-06, "loss": 0.5472, "step": 2583 }, { "epoch": 1.25580547112462, "grad_norm": 0.07567169382500258, "learning_rate": 7.777453154620318e-06, "loss": 0.5863, "step": 2584 }, { "epoch": 1.25629179331307, "grad_norm": 0.07086785292878028, "learning_rate": 7.775860858194788e-06, "loss": 0.5273, "step": 2585 }, { "epoch": 1.2567781155015196, "grad_norm": 0.07470599770095356, "learning_rate": 7.774268154720788e-06, "loss": 0.5204, "step": 2586 }, { "epoch": 1.2572644376899695, "grad_norm": 0.07509453124864622, "learning_rate": 7.772675044431865e-06, "loss": 0.5657, "step": 2587 }, { "epoch": 1.2577507598784194, "grad_norm": 0.07293588515574081, "learning_rate": 7.771081527561632e-06, "loss": 0.5569, "step": 2588 }, { "epoch": 1.2582370820668694, "grad_norm": 0.0743274443805202, "learning_rate": 7.769487604343761e-06, "loss": 0.5694, "step": 2589 }, { "epoch": 1.2587234042553193, "grad_norm": 0.07370388435728399, "learning_rate": 7.767893275011986e-06, "loss": 0.5552, "step": 2590 }, { "epoch": 1.259209726443769, "grad_norm": 0.0717242308642081, "learning_rate": 7.76629853980009e-06, "loss": 0.5369, "step": 2591 }, { "epoch": 1.2596960486322188, "grad_norm": 0.07726035737295897, "learning_rate": 7.764703398941927e-06, "loss": 0.6139, "step": 2592 }, { "epoch": 1.2601823708206688, "grad_norm": 0.073874805303104, "learning_rate": 7.763107852671406e-06, "loss": 0.5318, "step": 2593 }, { "epoch": 1.2606686930091184, "grad_norm": 0.07517378400057814, "learning_rate": 7.761511901222495e-06, "loss": 0.5554, "step": 2594 }, { "epoch": 1.2611550151975683, "grad_norm": 0.0734981507620573, "learning_rate": 7.759915544829225e-06, "loss": 0.5767, "step": 2595 }, { "epoch": 1.2616413373860182, "grad_norm": 0.07223905718186543, "learning_rate": 7.758318783725678e-06, "loss": 0.5605, "step": 2596 }, { "epoch": 1.2621276595744682, "grad_norm": 0.07318658729171917, "learning_rate": 7.756721618146007e-06, "loss": 0.534, "step": 2597 }, { "epoch": 1.262613981762918, "grad_norm": 0.07123303100648597, "learning_rate": 7.755124048324416e-06, "loss": 0.5681, "step": 2598 }, { "epoch": 1.2631003039513677, "grad_norm": 0.07476412223009882, "learning_rate": 7.753526074495168e-06, "loss": 0.5959, "step": 2599 }, { "epoch": 1.2635866261398176, "grad_norm": 0.07797201064958541, "learning_rate": 7.75192769689259e-06, "loss": 0.6223, "step": 2600 }, { "epoch": 1.2640729483282676, "grad_norm": 0.07484586517357113, "learning_rate": 7.750328915751064e-06, "loss": 0.5424, "step": 2601 }, { "epoch": 1.2645592705167172, "grad_norm": 0.07171490308238153, "learning_rate": 7.748729731305036e-06, "loss": 0.5351, "step": 2602 }, { "epoch": 1.2650455927051671, "grad_norm": 0.07388897417089899, "learning_rate": 7.747130143789006e-06, "loss": 0.5539, "step": 2603 }, { "epoch": 1.265531914893617, "grad_norm": 0.0727703100841655, "learning_rate": 7.745530153437538e-06, "loss": 0.5779, "step": 2604 }, { "epoch": 1.2660182370820667, "grad_norm": 0.0725721800207721, "learning_rate": 7.743929760485248e-06, "loss": 0.5572, "step": 2605 }, { "epoch": 1.2665045592705166, "grad_norm": 0.07560644933635846, "learning_rate": 7.742328965166818e-06, "loss": 0.5774, "step": 2606 }, { "epoch": 1.2669908814589665, "grad_norm": 0.07170362081087035, "learning_rate": 7.74072776771699e-06, "loss": 0.5197, "step": 2607 }, { "epoch": 1.2674772036474165, "grad_norm": 0.07697544950202906, "learning_rate": 7.739126168370554e-06, "loss": 0.551, "step": 2608 }, { "epoch": 1.2679635258358664, "grad_norm": 0.07172692702726284, "learning_rate": 7.737524167362373e-06, "loss": 0.5228, "step": 2609 }, { "epoch": 1.268449848024316, "grad_norm": 0.07120482949202118, "learning_rate": 7.73592176492736e-06, "loss": 0.5699, "step": 2610 }, { "epoch": 1.268936170212766, "grad_norm": 0.07344462909185348, "learning_rate": 7.734318961300484e-06, "loss": 0.531, "step": 2611 }, { "epoch": 1.2694224924012159, "grad_norm": 0.07326625012114829, "learning_rate": 7.732715756716786e-06, "loss": 0.539, "step": 2612 }, { "epoch": 1.2699088145896655, "grad_norm": 0.07256093941774487, "learning_rate": 7.731112151411355e-06, "loss": 0.537, "step": 2613 }, { "epoch": 1.2703951367781154, "grad_norm": 0.0774795879946063, "learning_rate": 7.729508145619339e-06, "loss": 0.5101, "step": 2614 }, { "epoch": 1.2708814589665653, "grad_norm": 0.07533263430429454, "learning_rate": 7.72790373957595e-06, "loss": 0.6075, "step": 2615 }, { "epoch": 1.2713677811550153, "grad_norm": 0.07272391406574882, "learning_rate": 7.726298933516453e-06, "loss": 0.5445, "step": 2616 }, { "epoch": 1.2718541033434652, "grad_norm": 0.07405978547986122, "learning_rate": 7.724693727676181e-06, "loss": 0.5636, "step": 2617 }, { "epoch": 1.2723404255319148, "grad_norm": 0.07398454840100682, "learning_rate": 7.72308812229051e-06, "loss": 0.573, "step": 2618 }, { "epoch": 1.2728267477203647, "grad_norm": 0.07407253394234505, "learning_rate": 7.721482117594891e-06, "loss": 0.5389, "step": 2619 }, { "epoch": 1.2733130699088147, "grad_norm": 0.07386724668611457, "learning_rate": 7.719875713824824e-06, "loss": 0.5318, "step": 2620 }, { "epoch": 1.2737993920972643, "grad_norm": 0.07387320747550771, "learning_rate": 7.718268911215869e-06, "loss": 0.5367, "step": 2621 }, { "epoch": 1.2742857142857142, "grad_norm": 0.06914403426406225, "learning_rate": 7.716661710003647e-06, "loss": 0.5415, "step": 2622 }, { "epoch": 1.2747720364741641, "grad_norm": 0.07069904237227867, "learning_rate": 7.715054110423834e-06, "loss": 0.5448, "step": 2623 }, { "epoch": 1.275258358662614, "grad_norm": 0.0798494750730378, "learning_rate": 7.71344611271217e-06, "loss": 0.5648, "step": 2624 }, { "epoch": 1.275744680851064, "grad_norm": 0.07324639351911368, "learning_rate": 7.711837717104442e-06, "loss": 0.5543, "step": 2625 }, { "epoch": 1.2762310030395136, "grad_norm": 0.07240246960603337, "learning_rate": 7.71022892383651e-06, "loss": 0.5852, "step": 2626 }, { "epoch": 1.2767173252279636, "grad_norm": 0.07512491596878752, "learning_rate": 7.708619733144285e-06, "loss": 0.5796, "step": 2627 }, { "epoch": 1.2772036474164135, "grad_norm": 0.07294552336685806, "learning_rate": 7.707010145263733e-06, "loss": 0.5607, "step": 2628 }, { "epoch": 1.2776899696048631, "grad_norm": 0.07344589203686383, "learning_rate": 7.705400160430882e-06, "loss": 0.567, "step": 2629 }, { "epoch": 1.278176291793313, "grad_norm": 0.07631165533278293, "learning_rate": 7.70378977888182e-06, "loss": 0.5903, "step": 2630 }, { "epoch": 1.278662613981763, "grad_norm": 0.0701379290634204, "learning_rate": 7.702179000852693e-06, "loss": 0.5155, "step": 2631 }, { "epoch": 1.2791489361702126, "grad_norm": 0.07329540652056298, "learning_rate": 7.700567826579697e-06, "loss": 0.5636, "step": 2632 }, { "epoch": 1.2796352583586625, "grad_norm": 0.07321077775536616, "learning_rate": 7.698956256299098e-06, "loss": 0.5407, "step": 2633 }, { "epoch": 1.2801215805471124, "grad_norm": 0.07325217104749837, "learning_rate": 7.697344290247214e-06, "loss": 0.5537, "step": 2634 }, { "epoch": 1.2806079027355624, "grad_norm": 0.07416569074271362, "learning_rate": 7.69573192866042e-06, "loss": 0.5929, "step": 2635 }, { "epoch": 1.2810942249240123, "grad_norm": 0.07557838205853717, "learning_rate": 7.694119171775148e-06, "loss": 0.5947, "step": 2636 }, { "epoch": 1.281580547112462, "grad_norm": 0.07584617624367863, "learning_rate": 7.692506019827894e-06, "loss": 0.5836, "step": 2637 }, { "epoch": 1.2820668693009118, "grad_norm": 0.07428891374469863, "learning_rate": 7.69089247305521e-06, "loss": 0.5725, "step": 2638 }, { "epoch": 1.2825531914893618, "grad_norm": 0.06932411279762006, "learning_rate": 7.689278531693698e-06, "loss": 0.5279, "step": 2639 }, { "epoch": 1.2830395136778114, "grad_norm": 0.07207574358356089, "learning_rate": 7.687664195980031e-06, "loss": 0.5331, "step": 2640 }, { "epoch": 1.2835258358662613, "grad_norm": 0.07266605096940389, "learning_rate": 7.686049466150931e-06, "loss": 0.5453, "step": 2641 }, { "epoch": 1.2840121580547113, "grad_norm": 0.07152417217234301, "learning_rate": 7.684434342443176e-06, "loss": 0.5281, "step": 2642 }, { "epoch": 1.2844984802431612, "grad_norm": 0.07088887399366368, "learning_rate": 7.682818825093613e-06, "loss": 0.5648, "step": 2643 }, { "epoch": 1.284984802431611, "grad_norm": 0.07492941461371523, "learning_rate": 7.68120291433913e-06, "loss": 0.586, "step": 2644 }, { "epoch": 1.2854711246200607, "grad_norm": 0.07400338823111081, "learning_rate": 7.679586610416689e-06, "loss": 0.5428, "step": 2645 }, { "epoch": 1.2859574468085107, "grad_norm": 0.07067097572968692, "learning_rate": 7.6779699135633e-06, "loss": 0.5302, "step": 2646 }, { "epoch": 1.2864437689969606, "grad_norm": 0.07073207345566752, "learning_rate": 7.676352824016032e-06, "loss": 0.5587, "step": 2647 }, { "epoch": 1.2869300911854102, "grad_norm": 0.06920525666732837, "learning_rate": 7.674735342012014e-06, "loss": 0.512, "step": 2648 }, { "epoch": 1.2874164133738601, "grad_norm": 0.07382544918151622, "learning_rate": 7.673117467788435e-06, "loss": 0.553, "step": 2649 }, { "epoch": 1.28790273556231, "grad_norm": 0.07316121970231027, "learning_rate": 7.671499201582533e-06, "loss": 0.5777, "step": 2650 }, { "epoch": 1.28838905775076, "grad_norm": 0.07180229251100594, "learning_rate": 7.66988054363161e-06, "loss": 0.5309, "step": 2651 }, { "epoch": 1.2888753799392099, "grad_norm": 0.07453064513266427, "learning_rate": 7.668261494173024e-06, "loss": 0.5544, "step": 2652 }, { "epoch": 1.2893617021276595, "grad_norm": 0.07314819667687368, "learning_rate": 7.66664205344419e-06, "loss": 0.5471, "step": 2653 }, { "epoch": 1.2898480243161095, "grad_norm": 0.06843732806395814, "learning_rate": 7.665022221682578e-06, "loss": 0.52, "step": 2654 }, { "epoch": 1.2903343465045594, "grad_norm": 0.07496065238433991, "learning_rate": 7.663401999125724e-06, "loss": 0.5642, "step": 2655 }, { "epoch": 1.290820668693009, "grad_norm": 0.07207211095392176, "learning_rate": 7.661781386011211e-06, "loss": 0.5649, "step": 2656 }, { "epoch": 1.291306990881459, "grad_norm": 0.07444921741384546, "learning_rate": 7.660160382576683e-06, "loss": 0.5417, "step": 2657 }, { "epoch": 1.2917933130699089, "grad_norm": 0.07403637105177087, "learning_rate": 7.658538989059846e-06, "loss": 0.5698, "step": 2658 }, { "epoch": 1.2922796352583585, "grad_norm": 0.07318493121602063, "learning_rate": 7.656917205698452e-06, "loss": 0.5437, "step": 2659 }, { "epoch": 1.2927659574468084, "grad_norm": 0.0726427717659996, "learning_rate": 7.655295032730323e-06, "loss": 0.6179, "step": 2660 }, { "epoch": 1.2932522796352584, "grad_norm": 0.07307210350273614, "learning_rate": 7.65367247039333e-06, "loss": 0.5628, "step": 2661 }, { "epoch": 1.2937386018237083, "grad_norm": 0.073894520521864, "learning_rate": 7.652049518925404e-06, "loss": 0.5646, "step": 2662 }, { "epoch": 1.2942249240121582, "grad_norm": 0.07086921025174431, "learning_rate": 7.650426178564532e-06, "loss": 0.5149, "step": 2663 }, { "epoch": 1.2947112462006078, "grad_norm": 0.06973234170335904, "learning_rate": 7.648802449548758e-06, "loss": 0.5202, "step": 2664 }, { "epoch": 1.2951975683890578, "grad_norm": 0.07339648183617803, "learning_rate": 7.647178332116186e-06, "loss": 0.5464, "step": 2665 }, { "epoch": 1.2956838905775077, "grad_norm": 0.074870667285401, "learning_rate": 7.64555382650497e-06, "loss": 0.5813, "step": 2666 }, { "epoch": 1.2961702127659573, "grad_norm": 0.07267891042846718, "learning_rate": 7.643928932953328e-06, "loss": 0.5467, "step": 2667 }, { "epoch": 1.2966565349544072, "grad_norm": 0.07040101068996828, "learning_rate": 7.642303651699533e-06, "loss": 0.5588, "step": 2668 }, { "epoch": 1.2971428571428572, "grad_norm": 0.07369027735012644, "learning_rate": 7.64067798298191e-06, "loss": 0.5715, "step": 2669 }, { "epoch": 1.297629179331307, "grad_norm": 0.07648028830088888, "learning_rate": 7.63905192703885e-06, "loss": 0.575, "step": 2670 }, { "epoch": 1.298115501519757, "grad_norm": 0.07169219565123473, "learning_rate": 7.637425484108793e-06, "loss": 0.563, "step": 2671 }, { "epoch": 1.2986018237082066, "grad_norm": 0.07685159389997796, "learning_rate": 7.635798654430237e-06, "loss": 0.5745, "step": 2672 }, { "epoch": 1.2990881458966566, "grad_norm": 0.07328231737117846, "learning_rate": 7.634171438241745e-06, "loss": 0.5446, "step": 2673 }, { "epoch": 1.2995744680851065, "grad_norm": 0.07154640234028622, "learning_rate": 7.63254383578192e-06, "loss": 0.5483, "step": 2674 }, { "epoch": 1.3000607902735561, "grad_norm": 0.0744325457954457, "learning_rate": 7.630915847289435e-06, "loss": 0.5324, "step": 2675 }, { "epoch": 1.300547112462006, "grad_norm": 0.0738984084935473, "learning_rate": 7.629287473003019e-06, "loss": 0.5716, "step": 2676 }, { "epoch": 1.301033434650456, "grad_norm": 0.06985192839078329, "learning_rate": 7.627658713161453e-06, "loss": 0.5419, "step": 2677 }, { "epoch": 1.3015197568389056, "grad_norm": 0.07328820265870761, "learning_rate": 7.626029568003575e-06, "loss": 0.5348, "step": 2678 }, { "epoch": 1.3020060790273555, "grad_norm": 0.07498915909201755, "learning_rate": 7.624400037768283e-06, "loss": 0.5123, "step": 2679 }, { "epoch": 1.3024924012158055, "grad_norm": 0.07561418122831266, "learning_rate": 7.622770122694526e-06, "loss": 0.5862, "step": 2680 }, { "epoch": 1.3029787234042554, "grad_norm": 0.07017352174027704, "learning_rate": 7.6211398230213155e-06, "loss": 0.5585, "step": 2681 }, { "epoch": 1.3034650455927053, "grad_norm": 0.07066548657233461, "learning_rate": 7.619509138987713e-06, "loss": 0.5586, "step": 2682 }, { "epoch": 1.303951367781155, "grad_norm": 0.07537297276629902, "learning_rate": 7.617878070832842e-06, "loss": 0.5355, "step": 2683 }, { "epoch": 1.3044376899696049, "grad_norm": 0.07105726117785882, "learning_rate": 7.616246618795879e-06, "loss": 0.5416, "step": 2684 }, { "epoch": 1.3049240121580548, "grad_norm": 0.06872990101322848, "learning_rate": 7.614614783116061e-06, "loss": 0.5456, "step": 2685 }, { "epoch": 1.3054103343465044, "grad_norm": 0.0729629812521218, "learning_rate": 7.612982564032675e-06, "loss": 0.5789, "step": 2686 }, { "epoch": 1.3058966565349543, "grad_norm": 0.07083687337857913, "learning_rate": 7.61134996178507e-06, "loss": 0.5395, "step": 2687 }, { "epoch": 1.3063829787234043, "grad_norm": 0.0720236302006423, "learning_rate": 7.6097169766126445e-06, "loss": 0.5708, "step": 2688 }, { "epoch": 1.3068693009118542, "grad_norm": 0.074979362657179, "learning_rate": 7.608083608754861e-06, "loss": 0.5485, "step": 2689 }, { "epoch": 1.307355623100304, "grad_norm": 0.07442099562708793, "learning_rate": 7.606449858451232e-06, "loss": 0.5539, "step": 2690 }, { "epoch": 1.3078419452887537, "grad_norm": 0.0730207405677716, "learning_rate": 7.60481572594133e-06, "loss": 0.5502, "step": 2691 }, { "epoch": 1.3083282674772037, "grad_norm": 0.07272502430633573, "learning_rate": 7.603181211464783e-06, "loss": 0.5454, "step": 2692 }, { "epoch": 1.3088145896656536, "grad_norm": 0.07197664749840792, "learning_rate": 7.60154631526127e-06, "loss": 0.5387, "step": 2693 }, { "epoch": 1.3093009118541032, "grad_norm": 0.07432887890012968, "learning_rate": 7.599911037570533e-06, "loss": 0.5526, "step": 2694 }, { "epoch": 1.3097872340425532, "grad_norm": 0.07169348023536669, "learning_rate": 7.598275378632367e-06, "loss": 0.5329, "step": 2695 }, { "epoch": 1.310273556231003, "grad_norm": 0.07174243799509789, "learning_rate": 7.596639338686622e-06, "loss": 0.5537, "step": 2696 }, { "epoch": 1.310759878419453, "grad_norm": 0.07353367460780375, "learning_rate": 7.595002917973204e-06, "loss": 0.5635, "step": 2697 }, { "epoch": 1.3112462006079029, "grad_norm": 0.07512562945371454, "learning_rate": 7.593366116732077e-06, "loss": 0.5717, "step": 2698 }, { "epoch": 1.3117325227963526, "grad_norm": 0.07244172271725031, "learning_rate": 7.59172893520326e-06, "loss": 0.5296, "step": 2699 }, { "epoch": 1.3122188449848025, "grad_norm": 0.06860621533762223, "learning_rate": 7.590091373626823e-06, "loss": 0.5024, "step": 2700 }, { "epoch": 1.3127051671732524, "grad_norm": 0.0754041820927164, "learning_rate": 7.588453432242899e-06, "loss": 0.5705, "step": 2701 }, { "epoch": 1.313191489361702, "grad_norm": 0.07584182472745714, "learning_rate": 7.586815111291674e-06, "loss": 0.5532, "step": 2702 }, { "epoch": 1.313677811550152, "grad_norm": 0.07811875447392391, "learning_rate": 7.585176411013389e-06, "loss": 0.5636, "step": 2703 }, { "epoch": 1.3141641337386019, "grad_norm": 0.07434695285341286, "learning_rate": 7.583537331648339e-06, "loss": 0.5621, "step": 2704 }, { "epoch": 1.3146504559270515, "grad_norm": 0.07161432299560656, "learning_rate": 7.581897873436876e-06, "loss": 0.5546, "step": 2705 }, { "epoch": 1.3151367781155014, "grad_norm": 0.07083763721926453, "learning_rate": 7.58025803661941e-06, "loss": 0.5352, "step": 2706 }, { "epoch": 1.3156231003039514, "grad_norm": 0.07263295184757475, "learning_rate": 7.578617821436405e-06, "loss": 0.5769, "step": 2707 }, { "epoch": 1.3161094224924013, "grad_norm": 0.07402261218645703, "learning_rate": 7.576977228128377e-06, "loss": 0.5368, "step": 2708 }, { "epoch": 1.3165957446808512, "grad_norm": 0.07496631978430902, "learning_rate": 7.575336256935902e-06, "loss": 0.5596, "step": 2709 }, { "epoch": 1.3170820668693008, "grad_norm": 0.07125406556075664, "learning_rate": 7.573694908099612e-06, "loss": 0.5617, "step": 2710 }, { "epoch": 1.3175683890577508, "grad_norm": 0.07081462316054976, "learning_rate": 7.5720531818601876e-06, "loss": 0.5797, "step": 2711 }, { "epoch": 1.3180547112462007, "grad_norm": 0.07141198018795063, "learning_rate": 7.570411078458373e-06, "loss": 0.5678, "step": 2712 }, { "epoch": 1.3185410334346503, "grad_norm": 0.07134062044168017, "learning_rate": 7.568768598134961e-06, "loss": 0.5382, "step": 2713 }, { "epoch": 1.3190273556231003, "grad_norm": 0.07744804283935018, "learning_rate": 7.567125741130806e-06, "loss": 0.5657, "step": 2714 }, { "epoch": 1.3195136778115502, "grad_norm": 0.07560759785152554, "learning_rate": 7.5654825076868124e-06, "loss": 0.575, "step": 2715 }, { "epoch": 1.32, "grad_norm": 0.0723428554303582, "learning_rate": 7.563838898043942e-06, "loss": 0.5621, "step": 2716 }, { "epoch": 1.32048632218845, "grad_norm": 0.07282727632474047, "learning_rate": 7.56219491244321e-06, "loss": 0.5428, "step": 2717 }, { "epoch": 1.3209726443768997, "grad_norm": 0.0766516296440535, "learning_rate": 7.560550551125691e-06, "loss": 0.5596, "step": 2718 }, { "epoch": 1.3214589665653496, "grad_norm": 0.07223833061979915, "learning_rate": 7.558905814332514e-06, "loss": 0.5348, "step": 2719 }, { "epoch": 1.3219452887537995, "grad_norm": 0.07229167302652503, "learning_rate": 7.557260702304853e-06, "loss": 0.5507, "step": 2720 }, { "epoch": 1.3224316109422491, "grad_norm": 0.07201860164448105, "learning_rate": 7.555615215283952e-06, "loss": 0.5723, "step": 2721 }, { "epoch": 1.322917933130699, "grad_norm": 0.07294388551249625, "learning_rate": 7.553969353511099e-06, "loss": 0.5507, "step": 2722 }, { "epoch": 1.323404255319149, "grad_norm": 0.0748079880187394, "learning_rate": 7.552323117227642e-06, "loss": 0.5641, "step": 2723 }, { "epoch": 1.3238905775075989, "grad_norm": 0.07573816637056172, "learning_rate": 7.550676506674986e-06, "loss": 0.5767, "step": 2724 }, { "epoch": 1.3243768996960488, "grad_norm": 0.07506100403603923, "learning_rate": 7.549029522094583e-06, "loss": 0.5746, "step": 2725 }, { "epoch": 1.3248632218844985, "grad_norm": 0.07100833882128814, "learning_rate": 7.547382163727949e-06, "loss": 0.53, "step": 2726 }, { "epoch": 1.3253495440729484, "grad_norm": 0.071307822896735, "learning_rate": 7.545734431816647e-06, "loss": 0.5378, "step": 2727 }, { "epoch": 1.3258358662613983, "grad_norm": 0.07369445843652978, "learning_rate": 7.544086326602298e-06, "loss": 0.5447, "step": 2728 }, { "epoch": 1.326322188449848, "grad_norm": 0.06875727544382673, "learning_rate": 7.5424378483265795e-06, "loss": 0.5262, "step": 2729 }, { "epoch": 1.3268085106382979, "grad_norm": 0.06877588729134668, "learning_rate": 7.5407889972312236e-06, "loss": 0.5268, "step": 2730 }, { "epoch": 1.3272948328267478, "grad_norm": 0.07596685692231431, "learning_rate": 7.5391397735580115e-06, "loss": 0.5296, "step": 2731 }, { "epoch": 1.3277811550151974, "grad_norm": 0.07100340791035951, "learning_rate": 7.537490177548787e-06, "loss": 0.5425, "step": 2732 }, { "epoch": 1.3282674772036474, "grad_norm": 0.07347219706544224, "learning_rate": 7.535840209445444e-06, "loss": 0.5817, "step": 2733 }, { "epoch": 1.3287537993920973, "grad_norm": 0.07269969932284565, "learning_rate": 7.53418986948993e-06, "loss": 0.5657, "step": 2734 }, { "epoch": 1.3292401215805472, "grad_norm": 0.07108917353295441, "learning_rate": 7.5325391579242476e-06, "loss": 0.5809, "step": 2735 }, { "epoch": 1.329726443768997, "grad_norm": 0.0756803482113215, "learning_rate": 7.5308880749904576e-06, "loss": 0.5183, "step": 2736 }, { "epoch": 1.3302127659574468, "grad_norm": 0.0713285105015546, "learning_rate": 7.529236620930671e-06, "loss": 0.5425, "step": 2737 }, { "epoch": 1.3306990881458967, "grad_norm": 0.07373404240829051, "learning_rate": 7.527584795987057e-06, "loss": 0.5569, "step": 2738 }, { "epoch": 1.3311854103343466, "grad_norm": 0.0767723705422561, "learning_rate": 7.525932600401833e-06, "loss": 0.5716, "step": 2739 }, { "epoch": 1.3316717325227962, "grad_norm": 0.07091844335228077, "learning_rate": 7.524280034417278e-06, "loss": 0.5115, "step": 2740 }, { "epoch": 1.3321580547112462, "grad_norm": 0.07273466135943404, "learning_rate": 7.522627098275723e-06, "loss": 0.5515, "step": 2741 }, { "epoch": 1.332644376899696, "grad_norm": 0.07499768447130718, "learning_rate": 7.520973792219548e-06, "loss": 0.5206, "step": 2742 }, { "epoch": 1.333130699088146, "grad_norm": 0.07515786948003962, "learning_rate": 7.519320116491195e-06, "loss": 0.5816, "step": 2743 }, { "epoch": 1.3336170212765959, "grad_norm": 0.07270801398997014, "learning_rate": 7.517666071333155e-06, "loss": 0.5637, "step": 2744 }, { "epoch": 1.3341033434650456, "grad_norm": 0.07474638177335434, "learning_rate": 7.516011656987976e-06, "loss": 0.5458, "step": 2745 }, { "epoch": 1.3345896656534955, "grad_norm": 0.07479027885327674, "learning_rate": 7.5143568736982585e-06, "loss": 0.5724, "step": 2746 }, { "epoch": 1.3350759878419454, "grad_norm": 0.07024389883300447, "learning_rate": 7.512701721706659e-06, "loss": 0.5381, "step": 2747 }, { "epoch": 1.335562310030395, "grad_norm": 0.07414204692333647, "learning_rate": 7.5110462012558835e-06, "loss": 0.5745, "step": 2748 }, { "epoch": 1.336048632218845, "grad_norm": 0.07695660208068462, "learning_rate": 7.509390312588699e-06, "loss": 0.5749, "step": 2749 }, { "epoch": 1.3365349544072949, "grad_norm": 0.0732252332053761, "learning_rate": 7.50773405594792e-06, "loss": 0.5229, "step": 2750 }, { "epoch": 1.3370212765957445, "grad_norm": 0.07068423805726977, "learning_rate": 7.5060774315764195e-06, "loss": 0.5703, "step": 2751 }, { "epoch": 1.3375075987841945, "grad_norm": 0.07380370967009903, "learning_rate": 7.5044204397171225e-06, "loss": 0.5284, "step": 2752 }, { "epoch": 1.3379939209726444, "grad_norm": 0.07702725696772723, "learning_rate": 7.502763080613008e-06, "loss": 0.6185, "step": 2753 }, { "epoch": 1.3384802431610943, "grad_norm": 0.07265422851999226, "learning_rate": 7.501105354507107e-06, "loss": 0.5503, "step": 2754 }, { "epoch": 1.3389665653495442, "grad_norm": 0.07361567032933555, "learning_rate": 7.499447261642509e-06, "loss": 0.5178, "step": 2755 }, { "epoch": 1.3394528875379939, "grad_norm": 0.07129462515465361, "learning_rate": 7.497788802262353e-06, "loss": 0.5452, "step": 2756 }, { "epoch": 1.3399392097264438, "grad_norm": 0.07064482130612999, "learning_rate": 7.496129976609833e-06, "loss": 0.5532, "step": 2757 }, { "epoch": 1.3404255319148937, "grad_norm": 0.07292646659664338, "learning_rate": 7.494470784928197e-06, "loss": 0.5813, "step": 2758 }, { "epoch": 1.3409118541033433, "grad_norm": 0.0733806862580877, "learning_rate": 7.492811227460748e-06, "loss": 0.5639, "step": 2759 }, { "epoch": 1.3413981762917933, "grad_norm": 0.07500971335263869, "learning_rate": 7.491151304450839e-06, "loss": 0.5391, "step": 2760 }, { "epoch": 1.3418844984802432, "grad_norm": 0.07343777062528506, "learning_rate": 7.489491016141881e-06, "loss": 0.5908, "step": 2761 }, { "epoch": 1.342370820668693, "grad_norm": 0.07680005433648988, "learning_rate": 7.487830362777335e-06, "loss": 0.5437, "step": 2762 }, { "epoch": 1.342857142857143, "grad_norm": 0.07642924520386632, "learning_rate": 7.486169344600718e-06, "loss": 0.5691, "step": 2763 }, { "epoch": 1.3433434650455927, "grad_norm": 0.07235966130217936, "learning_rate": 7.484507961855599e-06, "loss": 0.5683, "step": 2764 }, { "epoch": 1.3438297872340426, "grad_norm": 0.07154928798615308, "learning_rate": 7.482846214785602e-06, "loss": 0.5549, "step": 2765 }, { "epoch": 1.3443161094224925, "grad_norm": 0.0746256748865418, "learning_rate": 7.481184103634399e-06, "loss": 0.5874, "step": 2766 }, { "epoch": 1.3448024316109422, "grad_norm": 0.07636614904951292, "learning_rate": 7.479521628645725e-06, "loss": 0.5746, "step": 2767 }, { "epoch": 1.345288753799392, "grad_norm": 0.06935711963650168, "learning_rate": 7.47785879006336e-06, "loss": 0.5186, "step": 2768 }, { "epoch": 1.345775075987842, "grad_norm": 0.06918929230542553, "learning_rate": 7.476195588131142e-06, "loss": 0.5128, "step": 2769 }, { "epoch": 1.3462613981762919, "grad_norm": 0.07343596859446479, "learning_rate": 7.474532023092961e-06, "loss": 0.5576, "step": 2770 }, { "epoch": 1.3467477203647418, "grad_norm": 0.07621140587989529, "learning_rate": 7.472868095192758e-06, "loss": 0.566, "step": 2771 }, { "epoch": 1.3472340425531915, "grad_norm": 0.07480282113210489, "learning_rate": 7.471203804674531e-06, "loss": 0.5404, "step": 2772 }, { "epoch": 1.3477203647416414, "grad_norm": 0.0767269627779973, "learning_rate": 7.469539151782328e-06, "loss": 0.5625, "step": 2773 }, { "epoch": 1.3482066869300913, "grad_norm": 0.07135737087821863, "learning_rate": 7.467874136760251e-06, "loss": 0.5096, "step": 2774 }, { "epoch": 1.348693009118541, "grad_norm": 0.07067221291446979, "learning_rate": 7.4662087598524555e-06, "loss": 0.5166, "step": 2775 }, { "epoch": 1.3491793313069909, "grad_norm": 0.07194599554732935, "learning_rate": 7.464543021303153e-06, "loss": 0.5348, "step": 2776 }, { "epoch": 1.3496656534954408, "grad_norm": 0.11722754992133265, "learning_rate": 7.462876921356602e-06, "loss": 0.5661, "step": 2777 }, { "epoch": 1.3501519756838904, "grad_norm": 0.07924318696350909, "learning_rate": 7.46121046025712e-06, "loss": 0.5812, "step": 2778 }, { "epoch": 1.3506382978723404, "grad_norm": 0.072894839745393, "learning_rate": 7.459543638249071e-06, "loss": 0.5802, "step": 2779 }, { "epoch": 1.3511246200607903, "grad_norm": 0.07079268796791323, "learning_rate": 7.457876455576879e-06, "loss": 0.5204, "step": 2780 }, { "epoch": 1.3516109422492402, "grad_norm": 0.06954499029261359, "learning_rate": 7.456208912485015e-06, "loss": 0.5533, "step": 2781 }, { "epoch": 1.35209726443769, "grad_norm": 0.07180234178730577, "learning_rate": 7.454541009218006e-06, "loss": 0.5444, "step": 2782 }, { "epoch": 1.3525835866261398, "grad_norm": 0.0750973886651206, "learning_rate": 7.4528727460204316e-06, "loss": 0.5503, "step": 2783 }, { "epoch": 1.3530699088145897, "grad_norm": 0.07052402629173818, "learning_rate": 7.451204123136923e-06, "loss": 0.5269, "step": 2784 }, { "epoch": 1.3535562310030396, "grad_norm": 0.07312009233445739, "learning_rate": 7.449535140812164e-06, "loss": 0.5661, "step": 2785 }, { "epoch": 1.3540425531914893, "grad_norm": 0.07308544634827979, "learning_rate": 7.447865799290894e-06, "loss": 0.5615, "step": 2786 }, { "epoch": 1.3545288753799392, "grad_norm": 0.07383957112236195, "learning_rate": 7.446196098817903e-06, "loss": 0.5618, "step": 2787 }, { "epoch": 1.355015197568389, "grad_norm": 0.06910668368195767, "learning_rate": 7.4445260396380315e-06, "loss": 0.5271, "step": 2788 }, { "epoch": 1.355501519756839, "grad_norm": 0.08520558447022207, "learning_rate": 7.4428556219961745e-06, "loss": 0.5478, "step": 2789 }, { "epoch": 1.3559878419452889, "grad_norm": 0.0728728692581348, "learning_rate": 7.441184846137282e-06, "loss": 0.5286, "step": 2790 }, { "epoch": 1.3564741641337386, "grad_norm": 0.0746774473855491, "learning_rate": 7.4395137123063535e-06, "loss": 0.5528, "step": 2791 }, { "epoch": 1.3569604863221885, "grad_norm": 0.07441602427893382, "learning_rate": 7.437842220748441e-06, "loss": 0.5376, "step": 2792 }, { "epoch": 1.3574468085106384, "grad_norm": 0.07789502817645118, "learning_rate": 7.43617037170865e-06, "loss": 0.5438, "step": 2793 }, { "epoch": 1.357933130699088, "grad_norm": 0.07085350568100886, "learning_rate": 7.43449816543214e-06, "loss": 0.5563, "step": 2794 }, { "epoch": 1.358419452887538, "grad_norm": 0.07124389214173889, "learning_rate": 7.43282560216412e-06, "loss": 0.5573, "step": 2795 }, { "epoch": 1.3589057750759879, "grad_norm": 0.07284524070744001, "learning_rate": 7.4311526821498505e-06, "loss": 0.5443, "step": 2796 }, { "epoch": 1.3593920972644378, "grad_norm": 0.07601028754945906, "learning_rate": 7.429479405634647e-06, "loss": 0.5762, "step": 2797 }, { "epoch": 1.3598784194528877, "grad_norm": 0.07748444578281123, "learning_rate": 7.427805772863878e-06, "loss": 0.5623, "step": 2798 }, { "epoch": 1.3603647416413374, "grad_norm": 0.07310537205843276, "learning_rate": 7.4261317840829635e-06, "loss": 0.5716, "step": 2799 }, { "epoch": 1.3608510638297873, "grad_norm": 0.07168809957318174, "learning_rate": 7.424457439537371e-06, "loss": 0.5427, "step": 2800 }, { "epoch": 1.3613373860182372, "grad_norm": 0.07202764836618479, "learning_rate": 7.42278273947263e-06, "loss": 0.5379, "step": 2801 }, { "epoch": 1.3618237082066869, "grad_norm": 0.07386640909667373, "learning_rate": 7.42110768413431e-06, "loss": 0.5526, "step": 2802 }, { "epoch": 1.3623100303951368, "grad_norm": 0.0736861769756912, "learning_rate": 7.419432273768041e-06, "loss": 0.5527, "step": 2803 }, { "epoch": 1.3627963525835867, "grad_norm": 0.07183365272393766, "learning_rate": 7.417756508619504e-06, "loss": 0.54, "step": 2804 }, { "epoch": 1.3632826747720364, "grad_norm": 0.07437558078885173, "learning_rate": 7.416080388934433e-06, "loss": 0.5467, "step": 2805 }, { "epoch": 1.3637689969604863, "grad_norm": 0.07475835623248893, "learning_rate": 7.414403914958607e-06, "loss": 0.5718, "step": 2806 }, { "epoch": 1.3642553191489362, "grad_norm": 0.07149494868687824, "learning_rate": 7.412727086937864e-06, "loss": 0.556, "step": 2807 }, { "epoch": 1.364741641337386, "grad_norm": 0.07148101038637412, "learning_rate": 7.411049905118093e-06, "loss": 0.5192, "step": 2808 }, { "epoch": 1.365227963525836, "grad_norm": 0.07241062392752333, "learning_rate": 7.409372369745232e-06, "loss": 0.5675, "step": 2809 }, { "epoch": 1.3657142857142857, "grad_norm": 0.07631166170340024, "learning_rate": 7.407694481065274e-06, "loss": 0.5724, "step": 2810 }, { "epoch": 1.3662006079027356, "grad_norm": 0.06952784825593136, "learning_rate": 7.406016239324262e-06, "loss": 0.5257, "step": 2811 }, { "epoch": 1.3666869300911855, "grad_norm": 0.07355501824184059, "learning_rate": 7.404337644768289e-06, "loss": 0.5273, "step": 2812 }, { "epoch": 1.3671732522796352, "grad_norm": 0.07154587221104756, "learning_rate": 7.402658697643504e-06, "loss": 0.5826, "step": 2813 }, { "epoch": 1.367659574468085, "grad_norm": 0.07236149323762646, "learning_rate": 7.400979398196107e-06, "loss": 0.5461, "step": 2814 }, { "epoch": 1.368145896656535, "grad_norm": 0.07317092435020892, "learning_rate": 7.399299746672344e-06, "loss": 0.5572, "step": 2815 }, { "epoch": 1.3686322188449849, "grad_norm": 0.08558772311693255, "learning_rate": 7.397619743318519e-06, "loss": 0.5861, "step": 2816 }, { "epoch": 1.3691185410334348, "grad_norm": 0.07169593551956592, "learning_rate": 7.395939388380986e-06, "loss": 0.5531, "step": 2817 }, { "epoch": 1.3696048632218845, "grad_norm": 0.07048911954452661, "learning_rate": 7.3942586821061505e-06, "loss": 0.5456, "step": 2818 }, { "epoch": 1.3700911854103344, "grad_norm": 0.07470367769238317, "learning_rate": 7.392577624740467e-06, "loss": 0.5842, "step": 2819 }, { "epoch": 1.3705775075987843, "grad_norm": 0.18957076990199326, "learning_rate": 7.390896216530442e-06, "loss": 0.6094, "step": 2820 }, { "epoch": 1.371063829787234, "grad_norm": 0.07181529660807395, "learning_rate": 7.38921445772264e-06, "loss": 0.5357, "step": 2821 }, { "epoch": 1.3715501519756839, "grad_norm": 0.07517854521329402, "learning_rate": 7.387532348563668e-06, "loss": 0.5977, "step": 2822 }, { "epoch": 1.3720364741641338, "grad_norm": 0.07339891946059386, "learning_rate": 7.38584988930019e-06, "loss": 0.5354, "step": 2823 }, { "epoch": 1.3725227963525835, "grad_norm": 0.0749372211065316, "learning_rate": 7.3841670801789175e-06, "loss": 0.5832, "step": 2824 }, { "epoch": 1.3730091185410334, "grad_norm": 0.07333183161587552, "learning_rate": 7.382483921446619e-06, "loss": 0.5865, "step": 2825 }, { "epoch": 1.3734954407294833, "grad_norm": 0.07131610559246881, "learning_rate": 7.380800413350108e-06, "loss": 0.5596, "step": 2826 }, { "epoch": 1.3739817629179332, "grad_norm": 0.07585132827891361, "learning_rate": 7.379116556136251e-06, "loss": 0.5634, "step": 2827 }, { "epoch": 1.374468085106383, "grad_norm": 0.07086433646522601, "learning_rate": 7.377432350051968e-06, "loss": 0.5701, "step": 2828 }, { "epoch": 1.3749544072948328, "grad_norm": 0.07815009712652388, "learning_rate": 7.375747795344227e-06, "loss": 0.5596, "step": 2829 }, { "epoch": 1.3754407294832827, "grad_norm": 0.07533885199256991, "learning_rate": 7.374062892260052e-06, "loss": 0.5458, "step": 2830 }, { "epoch": 1.3759270516717326, "grad_norm": 0.07409574900520231, "learning_rate": 7.372377641046512e-06, "loss": 0.504, "step": 2831 }, { "epoch": 1.3764133738601823, "grad_norm": 0.07587331618184268, "learning_rate": 7.3706920419507325e-06, "loss": 0.534, "step": 2832 }, { "epoch": 1.3768996960486322, "grad_norm": 0.0720185972198476, "learning_rate": 7.369006095219886e-06, "loss": 0.5191, "step": 2833 }, { "epoch": 1.377386018237082, "grad_norm": 0.07082787061550921, "learning_rate": 7.367319801101196e-06, "loss": 0.5503, "step": 2834 }, { "epoch": 1.377872340425532, "grad_norm": 0.07465233848387376, "learning_rate": 7.3656331598419405e-06, "loss": 0.5214, "step": 2835 }, { "epoch": 1.3783586626139819, "grad_norm": 0.07384824657892668, "learning_rate": 7.3639461716894465e-06, "loss": 0.547, "step": 2836 }, { "epoch": 1.3788449848024316, "grad_norm": 0.07098669004602412, "learning_rate": 7.36225883689109e-06, "loss": 0.5197, "step": 2837 }, { "epoch": 1.3793313069908815, "grad_norm": 0.06927078737252892, "learning_rate": 7.360571155694299e-06, "loss": 0.5398, "step": 2838 }, { "epoch": 1.3798176291793314, "grad_norm": 0.07129588884877884, "learning_rate": 7.358883128346556e-06, "loss": 0.5557, "step": 2839 }, { "epoch": 1.380303951367781, "grad_norm": 0.07586852058302193, "learning_rate": 7.35719475509539e-06, "loss": 0.5662, "step": 2840 }, { "epoch": 1.380790273556231, "grad_norm": 0.0718329178142727, "learning_rate": 7.355506036188379e-06, "loss": 0.54, "step": 2841 }, { "epoch": 1.3812765957446809, "grad_norm": 0.07233453430135549, "learning_rate": 7.353816971873157e-06, "loss": 0.5711, "step": 2842 }, { "epoch": 1.3817629179331308, "grad_norm": 0.07483219855572677, "learning_rate": 7.352127562397405e-06, "loss": 0.5425, "step": 2843 }, { "epoch": 1.3822492401215807, "grad_norm": 0.07378567330802537, "learning_rate": 7.3504378080088565e-06, "loss": 0.5661, "step": 2844 }, { "epoch": 1.3827355623100304, "grad_norm": 0.0788120437038464, "learning_rate": 7.348747708955295e-06, "loss": 0.5166, "step": 2845 }, { "epoch": 1.3832218844984803, "grad_norm": 0.07173608627648453, "learning_rate": 7.347057265484553e-06, "loss": 0.5479, "step": 2846 }, { "epoch": 1.3837082066869302, "grad_norm": 0.07248099415059925, "learning_rate": 7.345366477844516e-06, "loss": 0.6016, "step": 2847 }, { "epoch": 1.3841945288753799, "grad_norm": 0.07054823618129706, "learning_rate": 7.343675346283118e-06, "loss": 0.5623, "step": 2848 }, { "epoch": 1.3846808510638298, "grad_norm": 0.07109370612873508, "learning_rate": 7.341983871048343e-06, "loss": 0.5275, "step": 2849 }, { "epoch": 1.3851671732522797, "grad_norm": 0.07323870167369544, "learning_rate": 7.340292052388232e-06, "loss": 0.5659, "step": 2850 }, { "epoch": 1.3856534954407294, "grad_norm": 0.07004095477558245, "learning_rate": 7.338599890550865e-06, "loss": 0.5122, "step": 2851 }, { "epoch": 1.3861398176291793, "grad_norm": 0.07112006983727202, "learning_rate": 7.3369073857843805e-06, "loss": 0.5138, "step": 2852 }, { "epoch": 1.3866261398176292, "grad_norm": 0.07785461494481812, "learning_rate": 7.3352145383369655e-06, "loss": 0.6049, "step": 2853 }, { "epoch": 1.387112462006079, "grad_norm": 0.07382196200333008, "learning_rate": 7.333521348456858e-06, "loss": 0.5865, "step": 2854 }, { "epoch": 1.387598784194529, "grad_norm": 0.07256705503860443, "learning_rate": 7.331827816392341e-06, "loss": 0.5387, "step": 2855 }, { "epoch": 1.3880851063829787, "grad_norm": 0.07219826758052535, "learning_rate": 7.330133942391757e-06, "loss": 0.5449, "step": 2856 }, { "epoch": 1.3885714285714286, "grad_norm": 0.0733839603950585, "learning_rate": 7.328439726703489e-06, "loss": 0.5903, "step": 2857 }, { "epoch": 1.3890577507598785, "grad_norm": 0.07241773622070892, "learning_rate": 7.326745169575978e-06, "loss": 0.5543, "step": 2858 }, { "epoch": 1.3895440729483282, "grad_norm": 0.07097945461680506, "learning_rate": 7.325050271257707e-06, "loss": 0.5159, "step": 2859 }, { "epoch": 1.390030395136778, "grad_norm": 0.07060664798701335, "learning_rate": 7.323355031997219e-06, "loss": 0.5167, "step": 2860 }, { "epoch": 1.390516717325228, "grad_norm": 0.06969922407444792, "learning_rate": 7.321659452043098e-06, "loss": 0.5243, "step": 2861 }, { "epoch": 1.3910030395136779, "grad_norm": 0.07144180799115653, "learning_rate": 7.319963531643983e-06, "loss": 0.5387, "step": 2862 }, { "epoch": 1.3914893617021278, "grad_norm": 0.07215642533462507, "learning_rate": 7.318267271048561e-06, "loss": 0.5542, "step": 2863 }, { "epoch": 1.3919756838905775, "grad_norm": 0.07275601306170419, "learning_rate": 7.3165706705055695e-06, "loss": 0.5487, "step": 2864 }, { "epoch": 1.3924620060790274, "grad_norm": 0.07508419904523045, "learning_rate": 7.314873730263795e-06, "loss": 0.5754, "step": 2865 }, { "epoch": 1.3929483282674773, "grad_norm": 0.07151443273915334, "learning_rate": 7.313176450572075e-06, "loss": 0.5342, "step": 2866 }, { "epoch": 1.393434650455927, "grad_norm": 0.07278666922289213, "learning_rate": 7.311478831679296e-06, "loss": 0.5581, "step": 2867 }, { "epoch": 1.3939209726443769, "grad_norm": 0.07160797595436258, "learning_rate": 7.3097808738343955e-06, "loss": 0.531, "step": 2868 }, { "epoch": 1.3944072948328268, "grad_norm": 0.07474752287177243, "learning_rate": 7.308082577286359e-06, "loss": 0.5968, "step": 2869 }, { "epoch": 1.3948936170212767, "grad_norm": 0.07071587047456368, "learning_rate": 7.306383942284223e-06, "loss": 0.5377, "step": 2870 }, { "epoch": 1.3953799392097264, "grad_norm": 0.07273908938660241, "learning_rate": 7.304684969077074e-06, "loss": 0.5186, "step": 2871 }, { "epoch": 1.3958662613981763, "grad_norm": 0.07411038431648295, "learning_rate": 7.302985657914044e-06, "loss": 0.5527, "step": 2872 }, { "epoch": 1.3963525835866262, "grad_norm": 0.07646412926021802, "learning_rate": 7.3012860090443196e-06, "loss": 0.5736, "step": 2873 }, { "epoch": 1.396838905775076, "grad_norm": 0.07393976364464108, "learning_rate": 7.299586022717134e-06, "loss": 0.5997, "step": 2874 }, { "epoch": 1.3973252279635258, "grad_norm": 0.07415618502150105, "learning_rate": 7.2978856991817715e-06, "loss": 0.564, "step": 2875 }, { "epoch": 1.3978115501519757, "grad_norm": 0.07532202269238364, "learning_rate": 7.296185038687566e-06, "loss": 0.5509, "step": 2876 }, { "epoch": 1.3982978723404256, "grad_norm": 0.06939612130541992, "learning_rate": 7.2944840414839e-06, "loss": 0.5397, "step": 2877 }, { "epoch": 1.3987841945288753, "grad_norm": 0.0740348888212191, "learning_rate": 7.292782707820205e-06, "loss": 0.5591, "step": 2878 }, { "epoch": 1.3992705167173252, "grad_norm": 0.07390150825340479, "learning_rate": 7.291081037945963e-06, "loss": 0.5575, "step": 2879 }, { "epoch": 1.399756838905775, "grad_norm": 0.07231549687431432, "learning_rate": 7.2893790321107015e-06, "loss": 0.5584, "step": 2880 }, { "epoch": 1.400243161094225, "grad_norm": 0.07733413463169406, "learning_rate": 7.287676690564005e-06, "loss": 0.5865, "step": 2881 }, { "epoch": 1.4007294832826749, "grad_norm": 0.0724220270588826, "learning_rate": 7.285974013555498e-06, "loss": 0.57, "step": 2882 }, { "epoch": 1.4012158054711246, "grad_norm": 0.07338037031835948, "learning_rate": 7.284271001334862e-06, "loss": 0.543, "step": 2883 }, { "epoch": 1.4017021276595745, "grad_norm": 0.07560763262282227, "learning_rate": 7.282567654151822e-06, "loss": 0.5828, "step": 2884 }, { "epoch": 1.4021884498480244, "grad_norm": 0.07615789911052342, "learning_rate": 7.280863972256156e-06, "loss": 0.601, "step": 2885 }, { "epoch": 1.402674772036474, "grad_norm": 0.07020233800855825, "learning_rate": 7.2791599558976925e-06, "loss": 0.522, "step": 2886 }, { "epoch": 1.403161094224924, "grad_norm": 0.07156443938040494, "learning_rate": 7.2774556053263e-06, "loss": 0.5396, "step": 2887 }, { "epoch": 1.4036474164133739, "grad_norm": 0.07303153564149846, "learning_rate": 7.275750920791905e-06, "loss": 0.5696, "step": 2888 }, { "epoch": 1.4041337386018238, "grad_norm": 0.076449654503809, "learning_rate": 7.274045902544482e-06, "loss": 0.5677, "step": 2889 }, { "epoch": 1.4046200607902737, "grad_norm": 0.07254520642196749, "learning_rate": 7.272340550834049e-06, "loss": 0.5781, "step": 2890 }, { "epoch": 1.4051063829787234, "grad_norm": 0.07090249053401353, "learning_rate": 7.27063486591068e-06, "loss": 0.5416, "step": 2891 }, { "epoch": 1.4055927051671733, "grad_norm": 0.07068832992042283, "learning_rate": 7.268928848024492e-06, "loss": 0.5502, "step": 2892 }, { "epoch": 1.4060790273556232, "grad_norm": 0.07102742964508688, "learning_rate": 7.267222497425653e-06, "loss": 0.522, "step": 2893 }, { "epoch": 1.4065653495440729, "grad_norm": 0.07072343476739507, "learning_rate": 7.2655158143643835e-06, "loss": 0.5644, "step": 2894 }, { "epoch": 1.4070516717325228, "grad_norm": 0.07407702907327725, "learning_rate": 7.263808799090944e-06, "loss": 0.5737, "step": 2895 }, { "epoch": 1.4075379939209727, "grad_norm": 0.0739752075966807, "learning_rate": 7.262101451855652e-06, "loss": 0.5802, "step": 2896 }, { "epoch": 1.4080243161094224, "grad_norm": 0.07034633406691732, "learning_rate": 7.26039377290887e-06, "loss": 0.5063, "step": 2897 }, { "epoch": 1.4085106382978723, "grad_norm": 0.07258208292619515, "learning_rate": 7.25868576250101e-06, "loss": 0.5289, "step": 2898 }, { "epoch": 1.4089969604863222, "grad_norm": 0.07395196085382696, "learning_rate": 7.256977420882532e-06, "loss": 0.5664, "step": 2899 }, { "epoch": 1.409483282674772, "grad_norm": 0.07466260790183507, "learning_rate": 7.255268748303944e-06, "loss": 0.5548, "step": 2900 }, { "epoch": 1.409969604863222, "grad_norm": 0.07091725641249104, "learning_rate": 7.253559745015804e-06, "loss": 0.5174, "step": 2901 }, { "epoch": 1.4104559270516717, "grad_norm": 0.07343794089575462, "learning_rate": 7.25185041126872e-06, "loss": 0.5619, "step": 2902 }, { "epoch": 1.4109422492401216, "grad_norm": 0.07304034701522473, "learning_rate": 7.250140747313344e-06, "loss": 0.5555, "step": 2903 }, { "epoch": 1.4114285714285715, "grad_norm": 0.07043003883584882, "learning_rate": 7.24843075340038e-06, "loss": 0.536, "step": 2904 }, { "epoch": 1.4119148936170212, "grad_norm": 0.0731204391784189, "learning_rate": 7.246720429780577e-06, "loss": 0.5358, "step": 2905 }, { "epoch": 1.412401215805471, "grad_norm": 0.07553089619187109, "learning_rate": 7.2450097767047365e-06, "loss": 0.5554, "step": 2906 }, { "epoch": 1.412887537993921, "grad_norm": 0.07178286341781658, "learning_rate": 7.243298794423707e-06, "loss": 0.5219, "step": 2907 }, { "epoch": 1.4133738601823709, "grad_norm": 0.075458150666953, "learning_rate": 7.241587483188383e-06, "loss": 0.5843, "step": 2908 }, { "epoch": 1.4138601823708208, "grad_norm": 0.07264898605466318, "learning_rate": 7.239875843249711e-06, "loss": 0.56, "step": 2909 }, { "epoch": 1.4143465045592705, "grad_norm": 0.07324078665502846, "learning_rate": 7.238163874858681e-06, "loss": 0.5523, "step": 2910 }, { "epoch": 1.4148328267477204, "grad_norm": 0.07304737618554122, "learning_rate": 7.236451578266334e-06, "loss": 0.524, "step": 2911 }, { "epoch": 1.4153191489361703, "grad_norm": 0.07681772766413196, "learning_rate": 7.23473895372376e-06, "loss": 0.5623, "step": 2912 }, { "epoch": 1.41580547112462, "grad_norm": 0.09232250440068646, "learning_rate": 7.233026001482095e-06, "loss": 0.5442, "step": 2913 }, { "epoch": 1.4162917933130699, "grad_norm": 0.07158200143802625, "learning_rate": 7.231312721792526e-06, "loss": 0.5142, "step": 2914 }, { "epoch": 1.4167781155015198, "grad_norm": 0.0752229809809883, "learning_rate": 7.229599114906284e-06, "loss": 0.546, "step": 2915 }, { "epoch": 1.4172644376899697, "grad_norm": 0.07332506419653727, "learning_rate": 7.227885181074651e-06, "loss": 0.5495, "step": 2916 }, { "epoch": 1.4177507598784196, "grad_norm": 0.07655510596221113, "learning_rate": 7.226170920548955e-06, "loss": 0.5632, "step": 2917 }, { "epoch": 1.4182370820668693, "grad_norm": 0.07161543018038535, "learning_rate": 7.224456333580574e-06, "loss": 0.5718, "step": 2918 }, { "epoch": 1.4187234042553192, "grad_norm": 0.07220675071247758, "learning_rate": 7.2227414204209316e-06, "loss": 0.5363, "step": 2919 }, { "epoch": 1.419209726443769, "grad_norm": 0.07253803096202562, "learning_rate": 7.2210261813215e-06, "loss": 0.5485, "step": 2920 }, { "epoch": 1.4196960486322188, "grad_norm": 0.0692362395534776, "learning_rate": 7.2193106165338e-06, "loss": 0.5363, "step": 2921 }, { "epoch": 1.4201823708206687, "grad_norm": 0.07172650959798435, "learning_rate": 7.2175947263094015e-06, "loss": 0.5429, "step": 2922 }, { "epoch": 1.4206686930091186, "grad_norm": 0.07552111125767548, "learning_rate": 7.215878510899918e-06, "loss": 0.5865, "step": 2923 }, { "epoch": 1.4211550151975683, "grad_norm": 0.07690821478542738, "learning_rate": 7.214161970557014e-06, "loss": 0.5676, "step": 2924 }, { "epoch": 1.4216413373860182, "grad_norm": 0.07494485129939144, "learning_rate": 7.212445105532402e-06, "loss": 0.5698, "step": 2925 }, { "epoch": 1.422127659574468, "grad_norm": 0.07281961081163435, "learning_rate": 7.2107279160778376e-06, "loss": 0.563, "step": 2926 }, { "epoch": 1.422613981762918, "grad_norm": 0.07153299855121928, "learning_rate": 7.209010402445129e-06, "loss": 0.5389, "step": 2927 }, { "epoch": 1.4231003039513679, "grad_norm": 0.07205024151133409, "learning_rate": 7.2072925648861304e-06, "loss": 0.5416, "step": 2928 }, { "epoch": 1.4235866261398176, "grad_norm": 0.0723651105714062, "learning_rate": 7.205574403652742e-06, "loss": 0.5149, "step": 2929 }, { "epoch": 1.4240729483282675, "grad_norm": 0.06961775243006621, "learning_rate": 7.203855918996912e-06, "loss": 0.5352, "step": 2930 }, { "epoch": 1.4245592705167174, "grad_norm": 0.07295568722111719, "learning_rate": 7.20213711117064e-06, "loss": 0.5607, "step": 2931 }, { "epoch": 1.425045592705167, "grad_norm": 0.07233304147311813, "learning_rate": 7.200417980425969e-06, "loss": 0.5364, "step": 2932 }, { "epoch": 1.425531914893617, "grad_norm": 0.07244381251575628, "learning_rate": 7.198698527014985e-06, "loss": 0.5534, "step": 2933 }, { "epoch": 1.4260182370820669, "grad_norm": 0.07184447123285677, "learning_rate": 7.1969787511898315e-06, "loss": 0.5388, "step": 2934 }, { "epoch": 1.4265045592705168, "grad_norm": 0.07207958771349697, "learning_rate": 7.195258653202693e-06, "loss": 0.5269, "step": 2935 }, { "epoch": 1.4269908814589667, "grad_norm": 0.07235465963887146, "learning_rate": 7.193538233305801e-06, "loss": 0.569, "step": 2936 }, { "epoch": 1.4274772036474164, "grad_norm": 0.07523043195217095, "learning_rate": 7.191817491751437e-06, "loss": 0.5643, "step": 2937 }, { "epoch": 1.4279635258358663, "grad_norm": 0.06973791267895596, "learning_rate": 7.190096428791926e-06, "loss": 0.5504, "step": 2938 }, { "epoch": 1.4284498480243162, "grad_norm": 0.07336296054204193, "learning_rate": 7.188375044679645e-06, "loss": 0.572, "step": 2939 }, { "epoch": 1.4289361702127659, "grad_norm": 0.07416592836274838, "learning_rate": 7.186653339667016e-06, "loss": 0.5374, "step": 2940 }, { "epoch": 1.4294224924012158, "grad_norm": 0.07167443545562771, "learning_rate": 7.184931314006504e-06, "loss": 0.5398, "step": 2941 }, { "epoch": 1.4299088145896657, "grad_norm": 0.07193340663506219, "learning_rate": 7.183208967950627e-06, "loss": 0.5519, "step": 2942 }, { "epoch": 1.4303951367781156, "grad_norm": 0.07637659783285151, "learning_rate": 7.181486301751945e-06, "loss": 0.5423, "step": 2943 }, { "epoch": 1.4308814589665653, "grad_norm": 0.07460956940701864, "learning_rate": 7.179763315663071e-06, "loss": 0.5791, "step": 2944 }, { "epoch": 1.4313677811550152, "grad_norm": 0.07178759314396749, "learning_rate": 7.1780400099366595e-06, "loss": 0.5472, "step": 2945 }, { "epoch": 1.431854103343465, "grad_norm": 0.0732634138785685, "learning_rate": 7.176316384825414e-06, "loss": 0.5664, "step": 2946 }, { "epoch": 1.432340425531915, "grad_norm": 0.07101649384704461, "learning_rate": 7.174592440582084e-06, "loss": 0.5468, "step": 2947 }, { "epoch": 1.4328267477203647, "grad_norm": 0.07311283871274517, "learning_rate": 7.172868177459467e-06, "loss": 0.551, "step": 2948 }, { "epoch": 1.4333130699088146, "grad_norm": 0.0748691707711707, "learning_rate": 7.171143595710406e-06, "loss": 0.5256, "step": 2949 }, { "epoch": 1.4337993920972645, "grad_norm": 0.07156771620031142, "learning_rate": 7.169418695587791e-06, "loss": 0.5065, "step": 2950 }, { "epoch": 1.4342857142857142, "grad_norm": 0.07525715626809713, "learning_rate": 7.16769347734456e-06, "loss": 0.5747, "step": 2951 }, { "epoch": 1.434772036474164, "grad_norm": 0.07142270349168325, "learning_rate": 7.165967941233698e-06, "loss": 0.5199, "step": 2952 }, { "epoch": 1.435258358662614, "grad_norm": 0.07509813779295409, "learning_rate": 7.164242087508232e-06, "loss": 0.5773, "step": 2953 }, { "epoch": 1.4357446808510639, "grad_norm": 0.07195585471837591, "learning_rate": 7.162515916421241e-06, "loss": 0.511, "step": 2954 }, { "epoch": 1.4362310030395138, "grad_norm": 0.07118912309486448, "learning_rate": 7.160789428225847e-06, "loss": 0.5375, "step": 2955 }, { "epoch": 1.4367173252279635, "grad_norm": 0.07240982057746294, "learning_rate": 7.159062623175222e-06, "loss": 0.5504, "step": 2956 }, { "epoch": 1.4372036474164134, "grad_norm": 0.07412775758703977, "learning_rate": 7.1573355015225795e-06, "loss": 0.5524, "step": 2957 }, { "epoch": 1.4376899696048633, "grad_norm": 0.07026734906933059, "learning_rate": 7.155608063521185e-06, "loss": 0.5429, "step": 2958 }, { "epoch": 1.438176291793313, "grad_norm": 0.07533367397354152, "learning_rate": 7.153880309424347e-06, "loss": 0.5765, "step": 2959 }, { "epoch": 1.4386626139817629, "grad_norm": 0.07183972165978872, "learning_rate": 7.152152239485419e-06, "loss": 0.5655, "step": 2960 }, { "epoch": 1.4391489361702128, "grad_norm": 0.07248919534916828, "learning_rate": 7.1504238539578064e-06, "loss": 0.5757, "step": 2961 }, { "epoch": 1.4396352583586627, "grad_norm": 0.0746259132503898, "learning_rate": 7.148695153094954e-06, "loss": 0.5314, "step": 2962 }, { "epoch": 1.4401215805471126, "grad_norm": 0.07048273194028765, "learning_rate": 7.1469661371503575e-06, "loss": 0.5302, "step": 2963 }, { "epoch": 1.4406079027355623, "grad_norm": 0.07455320830588442, "learning_rate": 7.145236806377559e-06, "loss": 0.5487, "step": 2964 }, { "epoch": 1.4410942249240122, "grad_norm": 0.0751360432228851, "learning_rate": 7.143507161030141e-06, "loss": 0.5633, "step": 2965 }, { "epoch": 1.441580547112462, "grad_norm": 0.07267197128379294, "learning_rate": 7.14177720136174e-06, "loss": 0.5162, "step": 2966 }, { "epoch": 1.4420668693009118, "grad_norm": 0.07125145902664135, "learning_rate": 7.140046927626034e-06, "loss": 0.5289, "step": 2967 }, { "epoch": 1.4425531914893617, "grad_norm": 0.07310039768164478, "learning_rate": 7.138316340076748e-06, "loss": 0.5461, "step": 2968 }, { "epoch": 1.4430395136778116, "grad_norm": 0.07306132053552425, "learning_rate": 7.136585438967653e-06, "loss": 0.5814, "step": 2969 }, { "epoch": 1.4435258358662613, "grad_norm": 0.07221209602036564, "learning_rate": 7.134854224552565e-06, "loss": 0.5439, "step": 2970 }, { "epoch": 1.4440121580547112, "grad_norm": 0.07284288447008033, "learning_rate": 7.1331226970853504e-06, "loss": 0.5808, "step": 2971 }, { "epoch": 1.444498480243161, "grad_norm": 0.07558224654508197, "learning_rate": 7.131390856819914e-06, "loss": 0.6128, "step": 2972 }, { "epoch": 1.444984802431611, "grad_norm": 0.07048089675894902, "learning_rate": 7.129658704010212e-06, "loss": 0.5206, "step": 2973 }, { "epoch": 1.4454711246200609, "grad_norm": 0.07082341456388493, "learning_rate": 7.127926238910243e-06, "loss": 0.5691, "step": 2974 }, { "epoch": 1.4459574468085106, "grad_norm": 0.07522646736361067, "learning_rate": 7.126193461774058e-06, "loss": 0.6011, "step": 2975 }, { "epoch": 1.4464437689969605, "grad_norm": 0.07120922249737295, "learning_rate": 7.124460372855745e-06, "loss": 0.5659, "step": 2976 }, { "epoch": 1.4469300911854104, "grad_norm": 0.07259620255231417, "learning_rate": 7.122726972409443e-06, "loss": 0.5896, "step": 2977 }, { "epoch": 1.44741641337386, "grad_norm": 0.07359223836184006, "learning_rate": 7.120993260689337e-06, "loss": 0.5377, "step": 2978 }, { "epoch": 1.44790273556231, "grad_norm": 0.0718845196665278, "learning_rate": 7.1192592379496535e-06, "loss": 0.513, "step": 2979 }, { "epoch": 1.4483890577507599, "grad_norm": 0.07138327331800925, "learning_rate": 7.11752490444467e-06, "loss": 0.5782, "step": 2980 }, { "epoch": 1.4488753799392098, "grad_norm": 0.07205155524477207, "learning_rate": 7.115790260428704e-06, "loss": 0.5259, "step": 2981 }, { "epoch": 1.4493617021276597, "grad_norm": 0.07407600788318844, "learning_rate": 7.114055306156122e-06, "loss": 0.5693, "step": 2982 }, { "epoch": 1.4498480243161094, "grad_norm": 0.07065327395196716, "learning_rate": 7.112320041881338e-06, "loss": 0.5548, "step": 2983 }, { "epoch": 1.4503343465045593, "grad_norm": 0.07492596056103878, "learning_rate": 7.110584467858806e-06, "loss": 0.5734, "step": 2984 }, { "epoch": 1.4508206686930092, "grad_norm": 0.07248858280579723, "learning_rate": 7.108848584343028e-06, "loss": 0.5511, "step": 2985 }, { "epoch": 1.4513069908814589, "grad_norm": 0.07332417704501748, "learning_rate": 7.107112391588554e-06, "loss": 0.5697, "step": 2986 }, { "epoch": 1.4517933130699088, "grad_norm": 0.07079207263051158, "learning_rate": 7.105375889849976e-06, "loss": 0.5444, "step": 2987 }, { "epoch": 1.4522796352583587, "grad_norm": 0.07527411800683574, "learning_rate": 7.103639079381931e-06, "loss": 0.5795, "step": 2988 }, { "epoch": 1.4527659574468086, "grad_norm": 0.0730518598455749, "learning_rate": 7.101901960439104e-06, "loss": 0.5271, "step": 2989 }, { "epoch": 1.4532522796352585, "grad_norm": 0.07515882259900461, "learning_rate": 7.100164533276223e-06, "loss": 0.5462, "step": 2990 }, { "epoch": 1.4537386018237082, "grad_norm": 0.0719848490921908, "learning_rate": 7.098426798148061e-06, "loss": 0.563, "step": 2991 }, { "epoch": 1.454224924012158, "grad_norm": 0.07067161697045983, "learning_rate": 7.09668875530944e-06, "loss": 0.5432, "step": 2992 }, { "epoch": 1.454711246200608, "grad_norm": 0.07217341124494471, "learning_rate": 7.0949504050152206e-06, "loss": 0.5497, "step": 2993 }, { "epoch": 1.4551975683890577, "grad_norm": 0.07401443780624582, "learning_rate": 7.093211747520316e-06, "loss": 0.554, "step": 2994 }, { "epoch": 1.4556838905775076, "grad_norm": 0.07282930442079073, "learning_rate": 7.091472783079677e-06, "loss": 0.5539, "step": 2995 }, { "epoch": 1.4561702127659575, "grad_norm": 0.07355281223700758, "learning_rate": 7.089733511948306e-06, "loss": 0.586, "step": 2996 }, { "epoch": 1.4566565349544072, "grad_norm": 0.07266678283458142, "learning_rate": 7.087993934381245e-06, "loss": 0.5955, "step": 2997 }, { "epoch": 1.457142857142857, "grad_norm": 0.07243351259384075, "learning_rate": 7.086254050633584e-06, "loss": 0.5573, "step": 2998 }, { "epoch": 1.457629179331307, "grad_norm": 0.07447622621153978, "learning_rate": 7.084513860960458e-06, "loss": 0.6065, "step": 2999 }, { "epoch": 1.4581155015197569, "grad_norm": 0.07315971291215784, "learning_rate": 7.082773365617046e-06, "loss": 0.5669, "step": 3000 }, { "epoch": 1.4586018237082068, "grad_norm": 0.07684035492440253, "learning_rate": 7.081032564858571e-06, "loss": 0.5571, "step": 3001 }, { "epoch": 1.4590881458966565, "grad_norm": 0.072572804482754, "learning_rate": 7.079291458940302e-06, "loss": 0.5736, "step": 3002 }, { "epoch": 1.4595744680851064, "grad_norm": 0.07099953911953658, "learning_rate": 7.077550048117552e-06, "loss": 0.5406, "step": 3003 }, { "epoch": 1.4600607902735563, "grad_norm": 0.07435277917034713, "learning_rate": 7.075808332645681e-06, "loss": 0.5647, "step": 3004 }, { "epoch": 1.460547112462006, "grad_norm": 0.07113457425602679, "learning_rate": 7.074066312780088e-06, "loss": 0.5502, "step": 3005 }, { "epoch": 1.4610334346504559, "grad_norm": 0.07374900945887536, "learning_rate": 7.0723239887762255e-06, "loss": 0.5808, "step": 3006 }, { "epoch": 1.4615197568389058, "grad_norm": 0.07469148464397955, "learning_rate": 7.070581360889581e-06, "loss": 0.5763, "step": 3007 }, { "epoch": 1.4620060790273557, "grad_norm": 0.0726295731428625, "learning_rate": 7.0688384293756925e-06, "loss": 0.5658, "step": 3008 }, { "epoch": 1.4624924012158056, "grad_norm": 0.07129793794780138, "learning_rate": 7.067095194490143e-06, "loss": 0.5284, "step": 3009 }, { "epoch": 1.4629787234042553, "grad_norm": 0.07859046644428544, "learning_rate": 7.065351656488557e-06, "loss": 0.6022, "step": 3010 }, { "epoch": 1.4634650455927052, "grad_norm": 0.07128031613683354, "learning_rate": 7.063607815626603e-06, "loss": 0.5362, "step": 3011 }, { "epoch": 1.463951367781155, "grad_norm": 0.07167413239697759, "learning_rate": 7.0618636721599965e-06, "loss": 0.527, "step": 3012 }, { "epoch": 1.4644376899696048, "grad_norm": 0.0721553867960776, "learning_rate": 7.060119226344497e-06, "loss": 0.5565, "step": 3013 }, { "epoch": 1.4649240121580547, "grad_norm": 0.07124441621907927, "learning_rate": 7.058374478435908e-06, "loss": 0.5437, "step": 3014 }, { "epoch": 1.4654103343465046, "grad_norm": 0.07407227290221569, "learning_rate": 7.056629428690075e-06, "loss": 0.5578, "step": 3015 }, { "epoch": 1.4658966565349545, "grad_norm": 0.0743308615949895, "learning_rate": 7.0548840773628915e-06, "loss": 0.578, "step": 3016 }, { "epoch": 1.4663829787234042, "grad_norm": 0.07107825327066132, "learning_rate": 7.053138424710293e-06, "loss": 0.5542, "step": 3017 }, { "epoch": 1.466869300911854, "grad_norm": 0.07125440918150784, "learning_rate": 7.0513924709882595e-06, "loss": 0.5175, "step": 3018 }, { "epoch": 1.467355623100304, "grad_norm": 0.07055319986563977, "learning_rate": 7.049646216452815e-06, "loss": 0.5405, "step": 3019 }, { "epoch": 1.467841945288754, "grad_norm": 0.07018330260522715, "learning_rate": 7.047899661360027e-06, "loss": 0.5087, "step": 3020 }, { "epoch": 1.4683282674772036, "grad_norm": 0.07550792592405127, "learning_rate": 7.046152805966009e-06, "loss": 0.5935, "step": 3021 }, { "epoch": 1.4688145896656535, "grad_norm": 0.0751320774378785, "learning_rate": 7.044405650526919e-06, "loss": 0.5371, "step": 3022 }, { "epoch": 1.4693009118541034, "grad_norm": 0.07543667946173886, "learning_rate": 7.042658195298956e-06, "loss": 0.5808, "step": 3023 }, { "epoch": 1.469787234042553, "grad_norm": 0.07168438950940491, "learning_rate": 7.040910440538364e-06, "loss": 0.5445, "step": 3024 }, { "epoch": 1.470273556231003, "grad_norm": 0.0719175441601274, "learning_rate": 7.0391623865014325e-06, "loss": 0.5433, "step": 3025 }, { "epoch": 1.4707598784194529, "grad_norm": 0.0722164607459636, "learning_rate": 7.037414033444494e-06, "loss": 0.5631, "step": 3026 }, { "epoch": 1.4712462006079028, "grad_norm": 0.07523894214520926, "learning_rate": 7.035665381623922e-06, "loss": 0.5354, "step": 3027 }, { "epoch": 1.4717325227963527, "grad_norm": 0.073140997379711, "learning_rate": 7.033916431296139e-06, "loss": 0.5675, "step": 3028 }, { "epoch": 1.4722188449848024, "grad_norm": 0.07318593829045134, "learning_rate": 7.032167182717607e-06, "loss": 0.5542, "step": 3029 }, { "epoch": 1.4727051671732523, "grad_norm": 0.07014501567334279, "learning_rate": 7.030417636144836e-06, "loss": 0.5187, "step": 3030 }, { "epoch": 1.4731914893617022, "grad_norm": 0.07165359940136974, "learning_rate": 7.028667791834375e-06, "loss": 0.5526, "step": 3031 }, { "epoch": 1.4736778115501519, "grad_norm": 0.07833469378227913, "learning_rate": 7.026917650042821e-06, "loss": 0.6136, "step": 3032 }, { "epoch": 1.4741641337386018, "grad_norm": 0.0736553271457589, "learning_rate": 7.0251672110268084e-06, "loss": 0.5759, "step": 3033 }, { "epoch": 1.4746504559270517, "grad_norm": 0.07188472691743732, "learning_rate": 7.0234164750430235e-06, "loss": 0.5304, "step": 3034 }, { "epoch": 1.4751367781155016, "grad_norm": 0.07790420205160582, "learning_rate": 7.021665442348189e-06, "loss": 0.5581, "step": 3035 }, { "epoch": 1.4756231003039515, "grad_norm": 0.07197532584910898, "learning_rate": 7.019914113199074e-06, "loss": 0.5473, "step": 3036 }, { "epoch": 1.4761094224924012, "grad_norm": 0.07208350701203245, "learning_rate": 7.018162487852494e-06, "loss": 0.5618, "step": 3037 }, { "epoch": 1.476595744680851, "grad_norm": 0.07799693452450422, "learning_rate": 7.0164105665652995e-06, "loss": 0.5553, "step": 3038 }, { "epoch": 1.477082066869301, "grad_norm": 0.07053946525652079, "learning_rate": 7.014658349594396e-06, "loss": 0.5156, "step": 3039 }, { "epoch": 1.4775683890577507, "grad_norm": 0.06900659441096428, "learning_rate": 7.012905837196724e-06, "loss": 0.5323, "step": 3040 }, { "epoch": 1.4780547112462006, "grad_norm": 0.08067139962297624, "learning_rate": 7.011153029629267e-06, "loss": 0.5531, "step": 3041 }, { "epoch": 1.4785410334346505, "grad_norm": 0.07078492767699299, "learning_rate": 7.009399927149059e-06, "loss": 0.5659, "step": 3042 }, { "epoch": 1.4790273556231002, "grad_norm": 0.07305158196589566, "learning_rate": 7.007646530013168e-06, "loss": 0.5633, "step": 3043 }, { "epoch": 1.47951367781155, "grad_norm": 0.07546726610475094, "learning_rate": 7.0058928384787115e-06, "loss": 0.5584, "step": 3044 }, { "epoch": 1.48, "grad_norm": 0.06983494205533083, "learning_rate": 7.004138852802849e-06, "loss": 0.5551, "step": 3045 }, { "epoch": 1.4804863221884499, "grad_norm": 0.07294696941571022, "learning_rate": 7.002384573242782e-06, "loss": 0.5516, "step": 3046 }, { "epoch": 1.4809726443768998, "grad_norm": 0.07063355929854213, "learning_rate": 7.000630000055757e-06, "loss": 0.5211, "step": 3047 }, { "epoch": 1.4814589665653495, "grad_norm": 0.07126431143715, "learning_rate": 6.99887513349906e-06, "loss": 0.5431, "step": 3048 }, { "epoch": 1.4819452887537994, "grad_norm": 0.07428791534868628, "learning_rate": 6.997119973830024e-06, "loss": 0.5686, "step": 3049 }, { "epoch": 1.4824316109422493, "grad_norm": 0.07309933810283628, "learning_rate": 6.995364521306023e-06, "loss": 0.5474, "step": 3050 }, { "epoch": 1.482917933130699, "grad_norm": 0.07212724445739932, "learning_rate": 6.993608776184473e-06, "loss": 0.5148, "step": 3051 }, { "epoch": 1.4834042553191489, "grad_norm": 0.07318674268465943, "learning_rate": 6.991852738722835e-06, "loss": 0.5631, "step": 3052 }, { "epoch": 1.4838905775075988, "grad_norm": 0.07380906062330862, "learning_rate": 6.990096409178612e-06, "loss": 0.5633, "step": 3053 }, { "epoch": 1.4843768996960487, "grad_norm": 0.07175019954692677, "learning_rate": 6.98833978780935e-06, "loss": 0.5695, "step": 3054 }, { "epoch": 1.4848632218844986, "grad_norm": 0.07514840922383961, "learning_rate": 6.9865828748726376e-06, "loss": 0.5776, "step": 3055 }, { "epoch": 1.4853495440729483, "grad_norm": 0.07259754236959098, "learning_rate": 6.984825670626105e-06, "loss": 0.5442, "step": 3056 }, { "epoch": 1.4858358662613982, "grad_norm": 0.07477696437719421, "learning_rate": 6.983068175327427e-06, "loss": 0.554, "step": 3057 }, { "epoch": 1.486322188449848, "grad_norm": 0.07136033655996425, "learning_rate": 6.9813103892343205e-06, "loss": 0.5594, "step": 3058 }, { "epoch": 1.4868085106382978, "grad_norm": 0.0695046548188177, "learning_rate": 6.979552312604545e-06, "loss": 0.5404, "step": 3059 }, { "epoch": 1.4872948328267477, "grad_norm": 0.07550839218908577, "learning_rate": 6.977793945695901e-06, "loss": 0.5704, "step": 3060 }, { "epoch": 1.4877811550151976, "grad_norm": 0.07104429896611426, "learning_rate": 6.976035288766235e-06, "loss": 0.5369, "step": 3061 }, { "epoch": 1.4882674772036475, "grad_norm": 0.07619924556653611, "learning_rate": 6.974276342073434e-06, "loss": 0.5708, "step": 3062 }, { "epoch": 1.4887537993920974, "grad_norm": 0.07110498030569501, "learning_rate": 6.9725171058754275e-06, "loss": 0.5352, "step": 3063 }, { "epoch": 1.489240121580547, "grad_norm": 0.07329850463671592, "learning_rate": 6.970757580430184e-06, "loss": 0.5583, "step": 3064 }, { "epoch": 1.489726443768997, "grad_norm": 0.07063919179366497, "learning_rate": 6.968997765995722e-06, "loss": 0.5486, "step": 3065 }, { "epoch": 1.490212765957447, "grad_norm": 0.07503426553670628, "learning_rate": 6.967237662830096e-06, "loss": 0.564, "step": 3066 }, { "epoch": 1.4906990881458966, "grad_norm": 0.07148177717612322, "learning_rate": 6.965477271191407e-06, "loss": 0.5537, "step": 3067 }, { "epoch": 1.4911854103343465, "grad_norm": 0.07348425497748244, "learning_rate": 6.963716591337797e-06, "loss": 0.5736, "step": 3068 }, { "epoch": 1.4916717325227964, "grad_norm": 0.07497767912472064, "learning_rate": 6.9619556235274475e-06, "loss": 0.636, "step": 3069 }, { "epoch": 1.492158054711246, "grad_norm": 0.07411834084612781, "learning_rate": 6.960194368018587e-06, "loss": 0.5776, "step": 3070 }, { "epoch": 1.492644376899696, "grad_norm": 0.07324794197358897, "learning_rate": 6.95843282506948e-06, "loss": 0.562, "step": 3071 }, { "epoch": 1.4931306990881459, "grad_norm": 0.0727415948537489, "learning_rate": 6.956670994938438e-06, "loss": 0.5885, "step": 3072 }, { "epoch": 1.4936170212765958, "grad_norm": 0.07151424124766156, "learning_rate": 6.9549088778838145e-06, "loss": 0.531, "step": 3073 }, { "epoch": 1.4941033434650457, "grad_norm": 0.07411308962781803, "learning_rate": 6.953146474164003e-06, "loss": 0.5346, "step": 3074 }, { "epoch": 1.4945896656534954, "grad_norm": 0.07235227352132019, "learning_rate": 6.951383784037442e-06, "loss": 0.5425, "step": 3075 }, { "epoch": 1.4950759878419453, "grad_norm": 0.07495502558692727, "learning_rate": 6.9496208077626084e-06, "loss": 0.5905, "step": 3076 }, { "epoch": 1.4955623100303952, "grad_norm": 0.0703968726060793, "learning_rate": 6.947857545598023e-06, "loss": 0.5071, "step": 3077 }, { "epoch": 1.4960486322188449, "grad_norm": 0.0720641865876843, "learning_rate": 6.946093997802248e-06, "loss": 0.5505, "step": 3078 }, { "epoch": 1.4965349544072948, "grad_norm": 0.07325115038844345, "learning_rate": 6.944330164633886e-06, "loss": 0.5397, "step": 3079 }, { "epoch": 1.4970212765957447, "grad_norm": 0.07412819439542268, "learning_rate": 6.942566046351586e-06, "loss": 0.5561, "step": 3080 }, { "epoch": 1.4975075987841946, "grad_norm": 0.0726852683677845, "learning_rate": 6.940801643214033e-06, "loss": 0.5404, "step": 3081 }, { "epoch": 1.4979939209726445, "grad_norm": 0.07318285267210774, "learning_rate": 6.93903695547996e-06, "loss": 0.5568, "step": 3082 }, { "epoch": 1.4984802431610942, "grad_norm": 0.07479248574935365, "learning_rate": 6.9372719834081345e-06, "loss": 0.5628, "step": 3083 }, { "epoch": 1.498966565349544, "grad_norm": 0.07041983721972382, "learning_rate": 6.935506727257374e-06, "loss": 0.5459, "step": 3084 }, { "epoch": 1.498966565349544, "eval_loss": 0.5784030556678772, "eval_runtime": 105.0371, "eval_samples_per_second": 288.974, "eval_steps_per_second": 36.13, "step": 3084 }, { "epoch": 1.499452887537994, "grad_norm": 0.0724537077619785, "learning_rate": 6.9337411872865316e-06, "loss": 0.5684, "step": 3085 }, { "epoch": 1.4999392097264437, "grad_norm": 0.0727640790978791, "learning_rate": 6.931975363754502e-06, "loss": 0.5349, "step": 3086 }, { "epoch": 1.5004255319148936, "grad_norm": 0.06949866650222498, "learning_rate": 6.930209256920224e-06, "loss": 0.5249, "step": 3087 }, { "epoch": 1.5009118541033435, "grad_norm": 0.08516437144935896, "learning_rate": 6.928442867042679e-06, "loss": 0.5693, "step": 3088 }, { "epoch": 1.5013981762917932, "grad_norm": 0.09787544489121727, "learning_rate": 6.926676194380884e-06, "loss": 0.5678, "step": 3089 }, { "epoch": 1.5018844984802433, "grad_norm": 0.0739586349387754, "learning_rate": 6.924909239193905e-06, "loss": 0.5712, "step": 3090 }, { "epoch": 1.502370820668693, "grad_norm": 0.07316698456215946, "learning_rate": 6.9231420017408456e-06, "loss": 0.5736, "step": 3091 }, { "epoch": 1.502857142857143, "grad_norm": 0.07256762125561368, "learning_rate": 6.921374482280851e-06, "loss": 0.5512, "step": 3092 }, { "epoch": 1.5033434650455928, "grad_norm": 0.07538508659684552, "learning_rate": 6.9196066810731055e-06, "loss": 0.5792, "step": 3093 }, { "epoch": 1.5038297872340425, "grad_norm": 0.07278086333803305, "learning_rate": 6.9178385983768396e-06, "loss": 0.556, "step": 3094 }, { "epoch": 1.5043161094224924, "grad_norm": 0.07541265418038101, "learning_rate": 6.916070234451321e-06, "loss": 0.5657, "step": 3095 }, { "epoch": 1.5048024316109423, "grad_norm": 0.06879095829094561, "learning_rate": 6.914301589555862e-06, "loss": 0.5386, "step": 3096 }, { "epoch": 1.505288753799392, "grad_norm": 0.0713171273585956, "learning_rate": 6.912532663949813e-06, "loss": 0.5435, "step": 3097 }, { "epoch": 1.505775075987842, "grad_norm": 0.07375715098468083, "learning_rate": 6.910763457892567e-06, "loss": 0.5394, "step": 3098 }, { "epoch": 1.5062613981762918, "grad_norm": 0.0734911066106034, "learning_rate": 6.9089939716435575e-06, "loss": 0.5979, "step": 3099 }, { "epoch": 1.5067477203647417, "grad_norm": 0.0715276213150515, "learning_rate": 6.90722420546226e-06, "loss": 0.5367, "step": 3100 }, { "epoch": 1.5072340425531916, "grad_norm": 0.07347299469010009, "learning_rate": 6.905454159608191e-06, "loss": 0.5526, "step": 3101 }, { "epoch": 1.5077203647416413, "grad_norm": 0.07181239218242261, "learning_rate": 6.903683834340909e-06, "loss": 0.52, "step": 3102 }, { "epoch": 1.5082066869300912, "grad_norm": 0.07594578773348291, "learning_rate": 6.901913229920008e-06, "loss": 0.5461, "step": 3103 }, { "epoch": 1.508693009118541, "grad_norm": 0.07081633161367618, "learning_rate": 6.90014234660513e-06, "loss": 0.5589, "step": 3104 }, { "epoch": 1.5091793313069908, "grad_norm": 0.07092879956122423, "learning_rate": 6.898371184655955e-06, "loss": 0.5416, "step": 3105 }, { "epoch": 1.5096656534954407, "grad_norm": 0.07389180320722963, "learning_rate": 6.896599744332204e-06, "loss": 0.5288, "step": 3106 }, { "epoch": 1.5101519756838906, "grad_norm": 0.07153559585772981, "learning_rate": 6.894828025893636e-06, "loss": 0.5365, "step": 3107 }, { "epoch": 1.5106382978723403, "grad_norm": 0.07476698559294594, "learning_rate": 6.893056029600056e-06, "loss": 0.5884, "step": 3108 }, { "epoch": 1.5111246200607904, "grad_norm": 0.07561354566118965, "learning_rate": 6.891283755711309e-06, "loss": 0.5616, "step": 3109 }, { "epoch": 1.51161094224924, "grad_norm": 0.07696656598183628, "learning_rate": 6.889511204487273e-06, "loss": 0.5609, "step": 3110 }, { "epoch": 1.51209726443769, "grad_norm": 0.07709441751978834, "learning_rate": 6.887738376187876e-06, "loss": 0.5664, "step": 3111 }, { "epoch": 1.51258358662614, "grad_norm": 0.07560704909968341, "learning_rate": 6.8859652710730826e-06, "loss": 0.5808, "step": 3112 }, { "epoch": 1.5130699088145896, "grad_norm": 0.07165194474765134, "learning_rate": 6.8841918894028995e-06, "loss": 0.5459, "step": 3113 }, { "epoch": 1.5135562310030395, "grad_norm": 0.07183554804472318, "learning_rate": 6.882418231437371e-06, "loss": 0.526, "step": 3114 }, { "epoch": 1.5140425531914894, "grad_norm": 0.0707048188502243, "learning_rate": 6.880644297436587e-06, "loss": 0.5478, "step": 3115 }, { "epoch": 1.514528875379939, "grad_norm": 0.07352916573160088, "learning_rate": 6.878870087660673e-06, "loss": 0.5512, "step": 3116 }, { "epoch": 1.5150151975683892, "grad_norm": 0.07437583341490736, "learning_rate": 6.877095602369796e-06, "loss": 0.5584, "step": 3117 }, { "epoch": 1.5155015197568389, "grad_norm": 0.07493668972429506, "learning_rate": 6.8753208418241645e-06, "loss": 0.5848, "step": 3118 }, { "epoch": 1.5159878419452888, "grad_norm": 0.0743196163226166, "learning_rate": 6.873545806284027e-06, "loss": 0.5635, "step": 3119 }, { "epoch": 1.5164741641337387, "grad_norm": 0.07128069207444965, "learning_rate": 6.871770496009671e-06, "loss": 0.5334, "step": 3120 }, { "epoch": 1.5169604863221884, "grad_norm": 0.07079144509858189, "learning_rate": 6.869994911261429e-06, "loss": 0.5389, "step": 3121 }, { "epoch": 1.5174468085106383, "grad_norm": 0.06928529093219288, "learning_rate": 6.868219052299669e-06, "loss": 0.5367, "step": 3122 }, { "epoch": 1.5179331306990882, "grad_norm": 0.07305972708173213, "learning_rate": 6.866442919384799e-06, "loss": 0.5635, "step": 3123 }, { "epoch": 1.5184194528875379, "grad_norm": 0.07548168694245962, "learning_rate": 6.8646665127772715e-06, "loss": 0.5544, "step": 3124 }, { "epoch": 1.518905775075988, "grad_norm": 0.07208763548114153, "learning_rate": 6.862889832737573e-06, "loss": 0.5153, "step": 3125 }, { "epoch": 1.5193920972644377, "grad_norm": 0.07318620215535641, "learning_rate": 6.8611128795262345e-06, "loss": 0.563, "step": 3126 }, { "epoch": 1.5198784194528874, "grad_norm": 0.07170851941567723, "learning_rate": 6.859335653403828e-06, "loss": 0.5543, "step": 3127 }, { "epoch": 1.5203647416413375, "grad_norm": 0.07152631565596852, "learning_rate": 6.8575581546309614e-06, "loss": 0.5534, "step": 3128 }, { "epoch": 1.5208510638297872, "grad_norm": 0.07341010178694772, "learning_rate": 6.855780383468285e-06, "loss": 0.548, "step": 3129 }, { "epoch": 1.521337386018237, "grad_norm": 0.06967523281033652, "learning_rate": 6.854002340176489e-06, "loss": 0.5118, "step": 3130 }, { "epoch": 1.521823708206687, "grad_norm": 0.07071033144460957, "learning_rate": 6.852224025016304e-06, "loss": 0.5324, "step": 3131 }, { "epoch": 1.5223100303951367, "grad_norm": 0.07468146926454766, "learning_rate": 6.8504454382484995e-06, "loss": 0.5541, "step": 3132 }, { "epoch": 1.5227963525835866, "grad_norm": 0.07320351831902266, "learning_rate": 6.848666580133885e-06, "loss": 0.5815, "step": 3133 }, { "epoch": 1.5232826747720365, "grad_norm": 0.06970478739345275, "learning_rate": 6.846887450933308e-06, "loss": 0.5386, "step": 3134 }, { "epoch": 1.5237689969604862, "grad_norm": 0.07484435534881968, "learning_rate": 6.8451080509076594e-06, "loss": 0.5607, "step": 3135 }, { "epoch": 1.5242553191489363, "grad_norm": 0.07175184017360012, "learning_rate": 6.843328380317869e-06, "loss": 0.5144, "step": 3136 }, { "epoch": 1.524741641337386, "grad_norm": 0.07500237812607986, "learning_rate": 6.841548439424904e-06, "loss": 0.5487, "step": 3137 }, { "epoch": 1.525227963525836, "grad_norm": 0.0710474445588129, "learning_rate": 6.83976822848977e-06, "loss": 0.5205, "step": 3138 }, { "epoch": 1.5257142857142858, "grad_norm": 0.07277617539552242, "learning_rate": 6.83798774777352e-06, "loss": 0.5417, "step": 3139 }, { "epoch": 1.5262006079027355, "grad_norm": 0.0719445412108135, "learning_rate": 6.836206997537237e-06, "loss": 0.5196, "step": 3140 }, { "epoch": 1.5266869300911854, "grad_norm": 0.07054146362597816, "learning_rate": 6.834425978042049e-06, "loss": 0.5617, "step": 3141 }, { "epoch": 1.5271732522796353, "grad_norm": 0.0725585821349836, "learning_rate": 6.832644689549124e-06, "loss": 0.5749, "step": 3142 }, { "epoch": 1.527659574468085, "grad_norm": 0.07509877595405921, "learning_rate": 6.830863132319666e-06, "loss": 0.5929, "step": 3143 }, { "epoch": 1.528145896656535, "grad_norm": 0.07183585737634014, "learning_rate": 6.82908130661492e-06, "loss": 0.5405, "step": 3144 }, { "epoch": 1.5286322188449848, "grad_norm": 0.07344294571363798, "learning_rate": 6.827299212696171e-06, "loss": 0.4908, "step": 3145 }, { "epoch": 1.5291185410334347, "grad_norm": 0.07495323196219186, "learning_rate": 6.8255168508247425e-06, "loss": 0.5787, "step": 3146 }, { "epoch": 1.5296048632218846, "grad_norm": 0.07376405243380828, "learning_rate": 6.823734221261999e-06, "loss": 0.5659, "step": 3147 }, { "epoch": 1.5300911854103343, "grad_norm": 0.07002837051559817, "learning_rate": 6.821951324269341e-06, "loss": 0.513, "step": 3148 }, { "epoch": 1.5305775075987842, "grad_norm": 0.07194280447730006, "learning_rate": 6.820168160108211e-06, "loss": 0.5421, "step": 3149 }, { "epoch": 1.531063829787234, "grad_norm": 0.07343311766494003, "learning_rate": 6.818384729040091e-06, "loss": 0.5432, "step": 3150 }, { "epoch": 1.5315501519756838, "grad_norm": 0.07414031098421327, "learning_rate": 6.816601031326498e-06, "loss": 0.5741, "step": 3151 }, { "epoch": 1.5320364741641337, "grad_norm": 0.07439420780960881, "learning_rate": 6.814817067228993e-06, "loss": 0.5485, "step": 3152 }, { "epoch": 1.5325227963525836, "grad_norm": 0.07390182381954416, "learning_rate": 6.8130328370091745e-06, "loss": 0.5258, "step": 3153 }, { "epoch": 1.5330091185410333, "grad_norm": 0.0718458946892044, "learning_rate": 6.811248340928678e-06, "loss": 0.5466, "step": 3154 }, { "epoch": 1.5334954407294834, "grad_norm": 0.07234075836847709, "learning_rate": 6.809463579249182e-06, "loss": 0.5166, "step": 3155 }, { "epoch": 1.533981762917933, "grad_norm": 0.07406731431262802, "learning_rate": 6.807678552232397e-06, "loss": 0.5703, "step": 3156 }, { "epoch": 1.534468085106383, "grad_norm": 0.07266559850895499, "learning_rate": 6.8058932601400815e-06, "loss": 0.5746, "step": 3157 }, { "epoch": 1.534954407294833, "grad_norm": 0.07229886526051414, "learning_rate": 6.804107703234026e-06, "loss": 0.5408, "step": 3158 }, { "epoch": 1.5354407294832826, "grad_norm": 0.06722723172174702, "learning_rate": 6.802321881776064e-06, "loss": 0.4872, "step": 3159 }, { "epoch": 1.5359270516717325, "grad_norm": 0.07166558743849152, "learning_rate": 6.800535796028064e-06, "loss": 0.4936, "step": 3160 }, { "epoch": 1.5364133738601824, "grad_norm": 0.07055004239131063, "learning_rate": 6.798749446251935e-06, "loss": 0.5706, "step": 3161 }, { "epoch": 1.536899696048632, "grad_norm": 0.07395142364101165, "learning_rate": 6.796962832709628e-06, "loss": 0.5743, "step": 3162 }, { "epoch": 1.5373860182370822, "grad_norm": 0.07579883200079188, "learning_rate": 6.795175955663127e-06, "loss": 0.5919, "step": 3163 }, { "epoch": 1.537872340425532, "grad_norm": 0.07104115584521561, "learning_rate": 6.793388815374458e-06, "loss": 0.5344, "step": 3164 }, { "epoch": 1.5383586626139818, "grad_norm": 0.07004090427406262, "learning_rate": 6.791601412105682e-06, "loss": 0.5046, "step": 3165 }, { "epoch": 1.5388449848024317, "grad_norm": 0.07711177709162328, "learning_rate": 6.789813746118905e-06, "loss": 0.5985, "step": 3166 }, { "epoch": 1.5393313069908814, "grad_norm": 0.07618121127762313, "learning_rate": 6.788025817676267e-06, "loss": 0.5577, "step": 3167 }, { "epoch": 1.5398176291793313, "grad_norm": 0.07833061904585184, "learning_rate": 6.7862376270399475e-06, "loss": 0.5759, "step": 3168 }, { "epoch": 1.5403039513677812, "grad_norm": 0.07148994344193592, "learning_rate": 6.784449174472164e-06, "loss": 0.5399, "step": 3169 }, { "epoch": 1.5407902735562309, "grad_norm": 0.07220829942206815, "learning_rate": 6.782660460235174e-06, "loss": 0.5419, "step": 3170 }, { "epoch": 1.541276595744681, "grad_norm": 0.07271922423562557, "learning_rate": 6.78087148459127e-06, "loss": 0.5735, "step": 3171 }, { "epoch": 1.5417629179331307, "grad_norm": 0.0723293628145058, "learning_rate": 6.779082247802785e-06, "loss": 0.5329, "step": 3172 }, { "epoch": 1.5422492401215806, "grad_norm": 0.07432241856381423, "learning_rate": 6.777292750132092e-06, "loss": 0.5704, "step": 3173 }, { "epoch": 1.5427355623100305, "grad_norm": 0.07702546353732548, "learning_rate": 6.775502991841599e-06, "loss": 0.5194, "step": 3174 }, { "epoch": 1.5432218844984802, "grad_norm": 0.07564757115329124, "learning_rate": 6.773712973193756e-06, "loss": 0.5905, "step": 3175 }, { "epoch": 1.54370820668693, "grad_norm": 0.07058217865105539, "learning_rate": 6.771922694451045e-06, "loss": 0.5488, "step": 3176 }, { "epoch": 1.54419452887538, "grad_norm": 0.07177721713578614, "learning_rate": 6.770132155875994e-06, "loss": 0.5229, "step": 3177 }, { "epoch": 1.5446808510638297, "grad_norm": 0.07540765684642103, "learning_rate": 6.768341357731164e-06, "loss": 0.5512, "step": 3178 }, { "epoch": 1.5451671732522796, "grad_norm": 0.07355782345854359, "learning_rate": 6.766550300279154e-06, "loss": 0.5418, "step": 3179 }, { "epoch": 1.5456534954407295, "grad_norm": 0.07247766030644205, "learning_rate": 6.764758983782603e-06, "loss": 0.564, "step": 3180 }, { "epoch": 1.5461398176291792, "grad_norm": 0.07150159358993446, "learning_rate": 6.762967408504188e-06, "loss": 0.5636, "step": 3181 }, { "epoch": 1.5466261398176293, "grad_norm": 0.07301610043205627, "learning_rate": 6.761175574706621e-06, "loss": 0.5299, "step": 3182 }, { "epoch": 1.547112462006079, "grad_norm": 0.07866225329389155, "learning_rate": 6.759383482652655e-06, "loss": 0.6068, "step": 3183 }, { "epoch": 1.547598784194529, "grad_norm": 0.07474911744879517, "learning_rate": 6.757591132605082e-06, "loss": 0.5613, "step": 3184 }, { "epoch": 1.5480851063829788, "grad_norm": 0.07609140762383507, "learning_rate": 6.755798524826728e-06, "loss": 0.5458, "step": 3185 }, { "epoch": 1.5485714285714285, "grad_norm": 0.0711891370539839, "learning_rate": 6.7540056595804585e-06, "loss": 0.5663, "step": 3186 }, { "epoch": 1.5490577507598784, "grad_norm": 0.07343599011323172, "learning_rate": 6.752212537129177e-06, "loss": 0.5389, "step": 3187 }, { "epoch": 1.5495440729483283, "grad_norm": 0.07158832676612963, "learning_rate": 6.750419157735823e-06, "loss": 0.5403, "step": 3188 }, { "epoch": 1.550030395136778, "grad_norm": 0.07117522223174699, "learning_rate": 6.748625521663379e-06, "loss": 0.5468, "step": 3189 }, { "epoch": 1.550516717325228, "grad_norm": 0.07567464696082797, "learning_rate": 6.7468316291748596e-06, "loss": 0.6244, "step": 3190 }, { "epoch": 1.5510030395136778, "grad_norm": 0.07163995518174511, "learning_rate": 6.745037480533316e-06, "loss": 0.5111, "step": 3191 }, { "epoch": 1.5514893617021277, "grad_norm": 0.07346488860301928, "learning_rate": 6.743243076001844e-06, "loss": 0.583, "step": 3192 }, { "epoch": 1.5519756838905776, "grad_norm": 0.07203699435665942, "learning_rate": 6.74144841584357e-06, "loss": 0.5643, "step": 3193 }, { "epoch": 1.5524620060790273, "grad_norm": 0.07025097264787714, "learning_rate": 6.739653500321661e-06, "loss": 0.5587, "step": 3194 }, { "epoch": 1.5529483282674772, "grad_norm": 0.0732974312736898, "learning_rate": 6.737858329699322e-06, "loss": 0.5742, "step": 3195 }, { "epoch": 1.553434650455927, "grad_norm": 0.07328227920673683, "learning_rate": 6.736062904239793e-06, "loss": 0.5443, "step": 3196 }, { "epoch": 1.5539209726443768, "grad_norm": 0.07330600466366281, "learning_rate": 6.734267224206355e-06, "loss": 0.53, "step": 3197 }, { "epoch": 1.554407294832827, "grad_norm": 0.08022963931480914, "learning_rate": 6.73247128986232e-06, "loss": 0.5316, "step": 3198 }, { "epoch": 1.5548936170212766, "grad_norm": 0.07497996039847053, "learning_rate": 6.730675101471044e-06, "loss": 0.5856, "step": 3199 }, { "epoch": 1.5553799392097263, "grad_norm": 0.07142146001295316, "learning_rate": 6.72887865929592e-06, "loss": 0.5703, "step": 3200 }, { "epoch": 1.5558662613981764, "grad_norm": 0.0727213804568006, "learning_rate": 6.727081963600371e-06, "loss": 0.5601, "step": 3201 }, { "epoch": 1.556352583586626, "grad_norm": 0.07280318451316951, "learning_rate": 6.725285014647866e-06, "loss": 0.5552, "step": 3202 }, { "epoch": 1.556838905775076, "grad_norm": 0.07444714144621148, "learning_rate": 6.723487812701904e-06, "loss": 0.5708, "step": 3203 }, { "epoch": 1.557325227963526, "grad_norm": 0.07232298868451444, "learning_rate": 6.721690358026027e-06, "loss": 0.5745, "step": 3204 }, { "epoch": 1.5578115501519756, "grad_norm": 0.07116376868026475, "learning_rate": 6.7198926508838095e-06, "loss": 0.5391, "step": 3205 }, { "epoch": 1.5582978723404255, "grad_norm": 0.07234229019563575, "learning_rate": 6.718094691538866e-06, "loss": 0.532, "step": 3206 }, { "epoch": 1.5587841945288754, "grad_norm": 0.07016951902865325, "learning_rate": 6.716296480254845e-06, "loss": 0.5298, "step": 3207 }, { "epoch": 1.559270516717325, "grad_norm": 0.07235586125803008, "learning_rate": 6.714498017295436e-06, "loss": 0.5425, "step": 3208 }, { "epoch": 1.5597568389057752, "grad_norm": 0.31434103642050903, "learning_rate": 6.712699302924362e-06, "loss": 0.5588, "step": 3209 }, { "epoch": 1.560243161094225, "grad_norm": 0.07314316819547519, "learning_rate": 6.7109003374053834e-06, "loss": 0.5258, "step": 3210 }, { "epoch": 1.5607294832826748, "grad_norm": 0.07171419228159252, "learning_rate": 6.7091011210023e-06, "loss": 0.5983, "step": 3211 }, { "epoch": 1.5612158054711247, "grad_norm": 0.07730277799525664, "learning_rate": 6.707301653978945e-06, "loss": 0.5976, "step": 3212 }, { "epoch": 1.5617021276595744, "grad_norm": 0.07173323902425, "learning_rate": 6.70550193659919e-06, "loss": 0.5391, "step": 3213 }, { "epoch": 1.5621884498480243, "grad_norm": 0.07333917368499077, "learning_rate": 6.703701969126944e-06, "loss": 0.5664, "step": 3214 }, { "epoch": 1.5626747720364742, "grad_norm": 0.07576083610894023, "learning_rate": 6.70190175182615e-06, "loss": 0.5993, "step": 3215 }, { "epoch": 1.5631610942249239, "grad_norm": 0.07348457237728495, "learning_rate": 6.700101284960792e-06, "loss": 0.5528, "step": 3216 }, { "epoch": 1.563647416413374, "grad_norm": 0.06827242260955661, "learning_rate": 6.698300568794884e-06, "loss": 0.502, "step": 3217 }, { "epoch": 1.5641337386018237, "grad_norm": 0.07933834881970789, "learning_rate": 6.696499603592486e-06, "loss": 0.5412, "step": 3218 }, { "epoch": 1.5646200607902736, "grad_norm": 0.07488163723208957, "learning_rate": 6.694698389617684e-06, "loss": 0.5373, "step": 3219 }, { "epoch": 1.5651063829787235, "grad_norm": 0.0717208267990933, "learning_rate": 6.6928969271346065e-06, "loss": 0.5076, "step": 3220 }, { "epoch": 1.5655927051671732, "grad_norm": 0.07476304552491932, "learning_rate": 6.691095216407422e-06, "loss": 0.5445, "step": 3221 }, { "epoch": 1.566079027355623, "grad_norm": 0.0719274754530095, "learning_rate": 6.689293257700325e-06, "loss": 0.5937, "step": 3222 }, { "epoch": 1.566565349544073, "grad_norm": 0.07090253390507247, "learning_rate": 6.687491051277557e-06, "loss": 0.5417, "step": 3223 }, { "epoch": 1.5670516717325227, "grad_norm": 0.07085954313742286, "learning_rate": 6.6856885974033895e-06, "loss": 0.5494, "step": 3224 }, { "epoch": 1.5675379939209726, "grad_norm": 0.07442683356199686, "learning_rate": 6.6838858963421295e-06, "loss": 0.5335, "step": 3225 }, { "epoch": 1.5680243161094225, "grad_norm": 0.07174191055132459, "learning_rate": 6.682082948358125e-06, "loss": 0.5689, "step": 3226 }, { "epoch": 1.5685106382978722, "grad_norm": 0.07060495703511777, "learning_rate": 6.680279753715758e-06, "loss": 0.5277, "step": 3227 }, { "epoch": 1.5689969604863223, "grad_norm": 0.076415025813712, "learning_rate": 6.678476312679446e-06, "loss": 0.5711, "step": 3228 }, { "epoch": 1.569483282674772, "grad_norm": 0.0714782210933602, "learning_rate": 6.676672625513642e-06, "loss": 0.5532, "step": 3229 }, { "epoch": 1.569969604863222, "grad_norm": 0.07130813186999481, "learning_rate": 6.674868692482839e-06, "loss": 0.5495, "step": 3230 }, { "epoch": 1.5704559270516718, "grad_norm": 0.0706254640829068, "learning_rate": 6.67306451385156e-06, "loss": 0.5257, "step": 3231 }, { "epoch": 1.5709422492401215, "grad_norm": 0.07553240429440361, "learning_rate": 6.6712600898843705e-06, "loss": 0.5776, "step": 3232 }, { "epoch": 1.5714285714285714, "grad_norm": 0.07146520155715774, "learning_rate": 6.6694554208458665e-06, "loss": 0.5509, "step": 3233 }, { "epoch": 1.5719148936170213, "grad_norm": 0.07410855691473235, "learning_rate": 6.6676505070006826e-06, "loss": 0.5337, "step": 3234 }, { "epoch": 1.572401215805471, "grad_norm": 0.07384514727552748, "learning_rate": 6.6658453486134885e-06, "loss": 0.5374, "step": 3235 }, { "epoch": 1.5728875379939211, "grad_norm": 0.070967292667365, "learning_rate": 6.6640399459489924e-06, "loss": 0.5598, "step": 3236 }, { "epoch": 1.5733738601823708, "grad_norm": 0.07181216459562484, "learning_rate": 6.662234299271934e-06, "loss": 0.531, "step": 3237 }, { "epoch": 1.5738601823708207, "grad_norm": 0.07029494882217778, "learning_rate": 6.660428408847093e-06, "loss": 0.553, "step": 3238 }, { "epoch": 1.5743465045592706, "grad_norm": 0.07012420579724925, "learning_rate": 6.658622274939279e-06, "loss": 0.5685, "step": 3239 }, { "epoch": 1.5748328267477203, "grad_norm": 0.07660643213081383, "learning_rate": 6.6568158978133455e-06, "loss": 0.5467, "step": 3240 }, { "epoch": 1.5753191489361702, "grad_norm": 0.07149805914337362, "learning_rate": 6.655009277734174e-06, "loss": 0.5662, "step": 3241 }, { "epoch": 1.57580547112462, "grad_norm": 0.07336476678608676, "learning_rate": 6.653202414966685e-06, "loss": 0.5254, "step": 3242 }, { "epoch": 1.5762917933130698, "grad_norm": 0.07460358075613517, "learning_rate": 6.651395309775837e-06, "loss": 0.5563, "step": 3243 }, { "epoch": 1.57677811550152, "grad_norm": 0.0729305948739106, "learning_rate": 6.649587962426618e-06, "loss": 0.5279, "step": 3244 }, { "epoch": 1.5772644376899696, "grad_norm": 0.07018140427685124, "learning_rate": 6.647780373184056e-06, "loss": 0.5273, "step": 3245 }, { "epoch": 1.5777507598784195, "grad_norm": 0.07181111863298231, "learning_rate": 6.645972542313216e-06, "loss": 0.5563, "step": 3246 }, { "epoch": 1.5782370820668694, "grad_norm": 0.07327155633461549, "learning_rate": 6.644164470079193e-06, "loss": 0.5683, "step": 3247 }, { "epoch": 1.578723404255319, "grad_norm": 0.07065978850029275, "learning_rate": 6.642356156747122e-06, "loss": 0.5258, "step": 3248 }, { "epoch": 1.579209726443769, "grad_norm": 0.07304613705470726, "learning_rate": 6.64054760258217e-06, "loss": 0.5811, "step": 3249 }, { "epoch": 1.579696048632219, "grad_norm": 0.0724926883521955, "learning_rate": 6.6387388078495405e-06, "loss": 0.5512, "step": 3250 }, { "epoch": 1.5801823708206686, "grad_norm": 0.07417419833082137, "learning_rate": 6.636929772814476e-06, "loss": 0.5658, "step": 3251 }, { "epoch": 1.5806686930091185, "grad_norm": 0.0774684680523497, "learning_rate": 6.635120497742249e-06, "loss": 0.5548, "step": 3252 }, { "epoch": 1.5811550151975684, "grad_norm": 0.07144822690310161, "learning_rate": 6.633310982898168e-06, "loss": 0.5408, "step": 3253 }, { "epoch": 1.581641337386018, "grad_norm": 0.07423440047920843, "learning_rate": 6.63150122854758e-06, "loss": 0.5372, "step": 3254 }, { "epoch": 1.5821276595744682, "grad_norm": 0.07335562427886222, "learning_rate": 6.629691234955863e-06, "loss": 0.5467, "step": 3255 }, { "epoch": 1.582613981762918, "grad_norm": 0.07156578625257565, "learning_rate": 6.627881002388431e-06, "loss": 0.5274, "step": 3256 }, { "epoch": 1.5831003039513678, "grad_norm": 0.07217948467383893, "learning_rate": 6.626070531110738e-06, "loss": 0.5512, "step": 3257 }, { "epoch": 1.5835866261398177, "grad_norm": 0.07127305873656875, "learning_rate": 6.624259821388266e-06, "loss": 0.5648, "step": 3258 }, { "epoch": 1.5840729483282674, "grad_norm": 0.07814092976868031, "learning_rate": 6.622448873486536e-06, "loss": 0.5462, "step": 3259 }, { "epoch": 1.5845592705167173, "grad_norm": 0.081895010949251, "learning_rate": 6.620637687671103e-06, "loss": 0.609, "step": 3260 }, { "epoch": 1.5850455927051672, "grad_norm": 0.07393576618543267, "learning_rate": 6.6188262642075566e-06, "loss": 0.5778, "step": 3261 }, { "epoch": 1.585531914893617, "grad_norm": 0.07093869832903571, "learning_rate": 6.617014603361522e-06, "loss": 0.5517, "step": 3262 }, { "epoch": 1.586018237082067, "grad_norm": 0.07267524584484943, "learning_rate": 6.6152027053986575e-06, "loss": 0.5894, "step": 3263 }, { "epoch": 1.5865045592705167, "grad_norm": 0.07247215403176348, "learning_rate": 6.613390570584659e-06, "loss": 0.5243, "step": 3264 }, { "epoch": 1.5869908814589666, "grad_norm": 0.07539930335319005, "learning_rate": 6.6115781991852535e-06, "loss": 0.5645, "step": 3265 }, { "epoch": 1.5874772036474165, "grad_norm": 0.07676592524900043, "learning_rate": 6.609765591466206e-06, "loss": 0.5319, "step": 3266 }, { "epoch": 1.5879635258358662, "grad_norm": 0.07200588528709602, "learning_rate": 6.607952747693315e-06, "loss": 0.5022, "step": 3267 }, { "epoch": 1.588449848024316, "grad_norm": 0.07289569684017909, "learning_rate": 6.606139668132412e-06, "loss": 0.5377, "step": 3268 }, { "epoch": 1.588936170212766, "grad_norm": 0.0737518333531295, "learning_rate": 6.604326353049368e-06, "loss": 0.5526, "step": 3269 }, { "epoch": 1.5894224924012157, "grad_norm": 0.07384929929083588, "learning_rate": 6.602512802710082e-06, "loss": 0.5914, "step": 3270 }, { "epoch": 1.5899088145896658, "grad_norm": 0.07307112002147667, "learning_rate": 6.60069901738049e-06, "loss": 0.5229, "step": 3271 }, { "epoch": 1.5903951367781155, "grad_norm": 0.07293252687403938, "learning_rate": 6.598884997326564e-06, "loss": 0.546, "step": 3272 }, { "epoch": 1.5908814589665652, "grad_norm": 0.07904077204692532, "learning_rate": 6.597070742814311e-06, "loss": 0.6021, "step": 3273 }, { "epoch": 1.5913677811550153, "grad_norm": 0.07341496526115474, "learning_rate": 6.595256254109768e-06, "loss": 0.5608, "step": 3274 }, { "epoch": 1.591854103343465, "grad_norm": 0.07015346872022103, "learning_rate": 6.593441531479011e-06, "loss": 0.5338, "step": 3275 }, { "epoch": 1.592340425531915, "grad_norm": 0.0765460423403433, "learning_rate": 6.591626575188149e-06, "loss": 0.571, "step": 3276 }, { "epoch": 1.5928267477203648, "grad_norm": 0.07489506422806602, "learning_rate": 6.589811385503324e-06, "loss": 0.5581, "step": 3277 }, { "epoch": 1.5933130699088145, "grad_norm": 0.07201722047185763, "learning_rate": 6.587995962690712e-06, "loss": 0.5585, "step": 3278 }, { "epoch": 1.5937993920972644, "grad_norm": 0.07405049988261514, "learning_rate": 6.586180307016525e-06, "loss": 0.5652, "step": 3279 }, { "epoch": 1.5942857142857143, "grad_norm": 0.07134103679162339, "learning_rate": 6.584364418747009e-06, "loss": 0.5796, "step": 3280 }, { "epoch": 1.594772036474164, "grad_norm": 0.07238724185584482, "learning_rate": 6.582548298148442e-06, "loss": 0.5033, "step": 3281 }, { "epoch": 1.5952583586626141, "grad_norm": 0.07164794837473736, "learning_rate": 6.5807319454871385e-06, "loss": 0.5508, "step": 3282 }, { "epoch": 1.5957446808510638, "grad_norm": 0.07293965281400856, "learning_rate": 6.5789153610294445e-06, "loss": 0.5572, "step": 3283 }, { "epoch": 1.5962310030395137, "grad_norm": 0.0727976292071456, "learning_rate": 6.5770985450417445e-06, "loss": 0.5664, "step": 3284 }, { "epoch": 1.5967173252279636, "grad_norm": 0.07186216370513868, "learning_rate": 6.575281497790451e-06, "loss": 0.5192, "step": 3285 }, { "epoch": 1.5972036474164133, "grad_norm": 0.07395938092943025, "learning_rate": 6.5734642195420136e-06, "loss": 0.5772, "step": 3286 }, { "epoch": 1.5976899696048632, "grad_norm": 0.0699872818752595, "learning_rate": 6.571646710562918e-06, "loss": 0.537, "step": 3287 }, { "epoch": 1.598176291793313, "grad_norm": 0.0739518065036882, "learning_rate": 6.5698289711196785e-06, "loss": 0.5919, "step": 3288 }, { "epoch": 1.5986626139817628, "grad_norm": 0.07191872355669547, "learning_rate": 6.568011001478846e-06, "loss": 0.5667, "step": 3289 }, { "epoch": 1.599148936170213, "grad_norm": 0.07340443237635351, "learning_rate": 6.5661928019070075e-06, "loss": 0.5136, "step": 3290 }, { "epoch": 1.5996352583586626, "grad_norm": 0.07368969597025045, "learning_rate": 6.56437437267078e-06, "loss": 0.5676, "step": 3291 }, { "epoch": 1.6001215805471125, "grad_norm": 0.07088516554493693, "learning_rate": 6.562555714036814e-06, "loss": 0.5558, "step": 3292 }, { "epoch": 1.6006079027355624, "grad_norm": 0.07121955410425318, "learning_rate": 6.560736826271799e-06, "loss": 0.546, "step": 3293 }, { "epoch": 1.601094224924012, "grad_norm": 0.07146220070247321, "learning_rate": 6.55891770964245e-06, "loss": 0.5479, "step": 3294 }, { "epoch": 1.601580547112462, "grad_norm": 0.07163782715059633, "learning_rate": 6.55709836441552e-06, "loss": 0.5306, "step": 3295 }, { "epoch": 1.602066869300912, "grad_norm": 0.07112124390682836, "learning_rate": 6.5552787908578e-06, "loss": 0.5402, "step": 3296 }, { "epoch": 1.6025531914893616, "grad_norm": 0.0729935016764105, "learning_rate": 6.553458989236105e-06, "loss": 0.5344, "step": 3297 }, { "epoch": 1.6030395136778115, "grad_norm": 0.07308374443027824, "learning_rate": 6.55163895981729e-06, "loss": 0.5714, "step": 3298 }, { "epoch": 1.6035258358662614, "grad_norm": 0.07555733970186158, "learning_rate": 6.5498187028682425e-06, "loss": 0.5486, "step": 3299 }, { "epoch": 1.604012158054711, "grad_norm": 0.07199022580017078, "learning_rate": 6.547998218655881e-06, "loss": 0.5852, "step": 3300 }, { "epoch": 1.6044984802431612, "grad_norm": 0.07227522564501239, "learning_rate": 6.546177507447158e-06, "loss": 0.5227, "step": 3301 }, { "epoch": 1.604984802431611, "grad_norm": 0.07219391640105048, "learning_rate": 6.5443565695090624e-06, "loss": 0.5713, "step": 3302 }, { "epoch": 1.6054711246200608, "grad_norm": 0.06953391874443515, "learning_rate": 6.542535405108614e-06, "loss": 0.5447, "step": 3303 }, { "epoch": 1.6059574468085107, "grad_norm": 0.07107518834888034, "learning_rate": 6.540714014512866e-06, "loss": 0.5448, "step": 3304 }, { "epoch": 1.6064437689969604, "grad_norm": 0.07101284290220428, "learning_rate": 6.538892397988902e-06, "loss": 0.5342, "step": 3305 }, { "epoch": 1.6069300911854103, "grad_norm": 0.07226955782389821, "learning_rate": 6.537070555803844e-06, "loss": 0.5584, "step": 3306 }, { "epoch": 1.6074164133738602, "grad_norm": 0.07274351835337117, "learning_rate": 6.535248488224843e-06, "loss": 0.5744, "step": 3307 }, { "epoch": 1.60790273556231, "grad_norm": 0.07459823992393674, "learning_rate": 6.533426195519086e-06, "loss": 0.553, "step": 3308 }, { "epoch": 1.60838905775076, "grad_norm": 0.07456248119729599, "learning_rate": 6.5316036779537896e-06, "loss": 0.5932, "step": 3309 }, { "epoch": 1.6088753799392097, "grad_norm": 0.07316986771263397, "learning_rate": 6.5297809357962064e-06, "loss": 0.5552, "step": 3310 }, { "epoch": 1.6093617021276596, "grad_norm": 0.0700021588345707, "learning_rate": 6.527957969313621e-06, "loss": 0.5063, "step": 3311 }, { "epoch": 1.6098480243161095, "grad_norm": 0.07229185641738506, "learning_rate": 6.526134778773349e-06, "loss": 0.5271, "step": 3312 }, { "epoch": 1.6103343465045592, "grad_norm": 0.07716759762977039, "learning_rate": 6.524311364442745e-06, "loss": 0.5506, "step": 3313 }, { "epoch": 1.610820668693009, "grad_norm": 0.0742000874502674, "learning_rate": 6.522487726589187e-06, "loss": 0.5379, "step": 3314 }, { "epoch": 1.611306990881459, "grad_norm": 0.072679282896985, "learning_rate": 6.520663865480095e-06, "loss": 0.5514, "step": 3315 }, { "epoch": 1.6117933130699087, "grad_norm": 0.0723466796728807, "learning_rate": 6.518839781382914e-06, "loss": 0.5115, "step": 3316 }, { "epoch": 1.6122796352583588, "grad_norm": 0.07482409978841498, "learning_rate": 6.517015474565127e-06, "loss": 0.5782, "step": 3317 }, { "epoch": 1.6127659574468085, "grad_norm": 0.07091564284890334, "learning_rate": 6.515190945294248e-06, "loss": 0.5348, "step": 3318 }, { "epoch": 1.6132522796352584, "grad_norm": 0.07133014992093543, "learning_rate": 6.5133661938378205e-06, "loss": 0.584, "step": 3319 }, { "epoch": 1.6137386018237083, "grad_norm": 0.0749693946980126, "learning_rate": 6.511541220463427e-06, "loss": 0.5569, "step": 3320 }, { "epoch": 1.614224924012158, "grad_norm": 0.07179411847546166, "learning_rate": 6.509716025438679e-06, "loss": 0.5585, "step": 3321 }, { "epoch": 1.614711246200608, "grad_norm": 0.07185434387601454, "learning_rate": 6.50789060903122e-06, "loss": 0.5275, "step": 3322 }, { "epoch": 1.6151975683890578, "grad_norm": 0.07813640536644906, "learning_rate": 6.5060649715087275e-06, "loss": 0.5945, "step": 3323 }, { "epoch": 1.6156838905775075, "grad_norm": 0.07333886719594797, "learning_rate": 6.5042391131389086e-06, "loss": 0.5438, "step": 3324 }, { "epoch": 1.6161702127659574, "grad_norm": 0.07202840727115539, "learning_rate": 6.502413034189505e-06, "loss": 0.538, "step": 3325 }, { "epoch": 1.6166565349544073, "grad_norm": 0.0714070481420102, "learning_rate": 6.500586734928292e-06, "loss": 0.547, "step": 3326 }, { "epoch": 1.617142857142857, "grad_norm": 0.1107496293830457, "learning_rate": 6.498760215623072e-06, "loss": 0.559, "step": 3327 }, { "epoch": 1.6176291793313071, "grad_norm": 0.07610325435589102, "learning_rate": 6.496933476541687e-06, "loss": 0.6172, "step": 3328 }, { "epoch": 1.6181155015197568, "grad_norm": 0.07218988589616845, "learning_rate": 6.495106517952007e-06, "loss": 0.5442, "step": 3329 }, { "epoch": 1.6186018237082067, "grad_norm": 0.07306265130124329, "learning_rate": 6.493279340121935e-06, "loss": 0.5241, "step": 3330 }, { "epoch": 1.6190881458966566, "grad_norm": 0.0726919210048757, "learning_rate": 6.4914519433194046e-06, "loss": 0.611, "step": 3331 }, { "epoch": 1.6195744680851063, "grad_norm": 0.06984768128625651, "learning_rate": 6.489624327812383e-06, "loss": 0.5292, "step": 3332 }, { "epoch": 1.6200607902735562, "grad_norm": 0.07176070271103654, "learning_rate": 6.48779649386887e-06, "loss": 0.5788, "step": 3333 }, { "epoch": 1.6205471124620061, "grad_norm": 0.07370358775992361, "learning_rate": 6.4859684417568955e-06, "loss": 0.5739, "step": 3334 }, { "epoch": 1.6210334346504558, "grad_norm": 0.07463639342421202, "learning_rate": 6.484140171744524e-06, "loss": 0.5594, "step": 3335 }, { "epoch": 1.621519756838906, "grad_norm": 0.07521656953566101, "learning_rate": 6.482311684099849e-06, "loss": 0.5647, "step": 3336 }, { "epoch": 1.6220060790273556, "grad_norm": 0.07673724562491101, "learning_rate": 6.480482979090999e-06, "loss": 0.5682, "step": 3337 }, { "epoch": 1.6224924012158055, "grad_norm": 0.06892917284174166, "learning_rate": 6.4786540569861315e-06, "loss": 0.545, "step": 3338 }, { "epoch": 1.6229787234042554, "grad_norm": 0.07140027295216198, "learning_rate": 6.476824918053438e-06, "loss": 0.5113, "step": 3339 }, { "epoch": 1.623465045592705, "grad_norm": 0.07659421781724557, "learning_rate": 6.474995562561142e-06, "loss": 0.5756, "step": 3340 }, { "epoch": 1.623951367781155, "grad_norm": 0.07423572415022596, "learning_rate": 6.473165990777495e-06, "loss": 0.5542, "step": 3341 }, { "epoch": 1.624437689969605, "grad_norm": 0.07091832573032526, "learning_rate": 6.471336202970784e-06, "loss": 0.5079, "step": 3342 }, { "epoch": 1.6249240121580546, "grad_norm": 0.0744546931374889, "learning_rate": 6.469506199409328e-06, "loss": 0.5657, "step": 3343 }, { "epoch": 1.6254103343465045, "grad_norm": 0.07119571369192718, "learning_rate": 6.467675980361474e-06, "loss": 0.5083, "step": 3344 }, { "epoch": 1.6258966565349544, "grad_norm": 0.07028604422875337, "learning_rate": 6.465845546095605e-06, "loss": 0.5312, "step": 3345 }, { "epoch": 1.626382978723404, "grad_norm": 0.07479678802136304, "learning_rate": 6.464014896880133e-06, "loss": 0.565, "step": 3346 }, { "epoch": 1.6268693009118542, "grad_norm": 0.06915116777199172, "learning_rate": 6.4621840329835e-06, "loss": 0.5135, "step": 3347 }, { "epoch": 1.627355623100304, "grad_norm": 0.0718886127527123, "learning_rate": 6.460352954674184e-06, "loss": 0.5106, "step": 3348 }, { "epoch": 1.6278419452887538, "grad_norm": 0.07222227018942742, "learning_rate": 6.4585216622206895e-06, "loss": 0.5363, "step": 3349 }, { "epoch": 1.6283282674772037, "grad_norm": 0.07359034532628995, "learning_rate": 6.456690155891556e-06, "loss": 0.5765, "step": 3350 }, { "epoch": 1.6288145896656534, "grad_norm": 0.07365975369712202, "learning_rate": 6.454858435955353e-06, "loss": 0.5617, "step": 3351 }, { "epoch": 1.6293009118541033, "grad_norm": 0.07425469323802146, "learning_rate": 6.453026502680683e-06, "loss": 0.5529, "step": 3352 }, { "epoch": 1.6297872340425532, "grad_norm": 0.07585176691977917, "learning_rate": 6.451194356336174e-06, "loss": 0.5573, "step": 3353 }, { "epoch": 1.630273556231003, "grad_norm": 0.07361855253982075, "learning_rate": 6.449361997190495e-06, "loss": 0.5441, "step": 3354 }, { "epoch": 1.630759878419453, "grad_norm": 0.07415644033566286, "learning_rate": 6.4475294255123355e-06, "loss": 0.5631, "step": 3355 }, { "epoch": 1.6312462006079027, "grad_norm": 0.06964319946642412, "learning_rate": 6.445696641570423e-06, "loss": 0.53, "step": 3356 }, { "epoch": 1.6317325227963526, "grad_norm": 0.07106091866374166, "learning_rate": 6.443863645633517e-06, "loss": 0.5401, "step": 3357 }, { "epoch": 1.6322188449848025, "grad_norm": 0.07555702494703029, "learning_rate": 6.442030437970402e-06, "loss": 0.5874, "step": 3358 }, { "epoch": 1.6327051671732522, "grad_norm": 0.07664921378519898, "learning_rate": 6.4401970188499e-06, "loss": 0.5929, "step": 3359 }, { "epoch": 1.633191489361702, "grad_norm": 0.06963611351982496, "learning_rate": 6.438363388540858e-06, "loss": 0.5413, "step": 3360 }, { "epoch": 1.633677811550152, "grad_norm": 0.07366895035817969, "learning_rate": 6.436529547312161e-06, "loss": 0.5711, "step": 3361 }, { "epoch": 1.6341641337386017, "grad_norm": 0.0724475753197608, "learning_rate": 6.434695495432718e-06, "loss": 0.57, "step": 3362 }, { "epoch": 1.6346504559270518, "grad_norm": 0.06817296255819978, "learning_rate": 6.432861233171473e-06, "loss": 0.4896, "step": 3363 }, { "epoch": 1.6351367781155015, "grad_norm": 0.07131217234375203, "learning_rate": 6.431026760797397e-06, "loss": 0.5627, "step": 3364 }, { "epoch": 1.6356231003039514, "grad_norm": 0.06995061696203149, "learning_rate": 6.429192078579498e-06, "loss": 0.5267, "step": 3365 }, { "epoch": 1.6361094224924013, "grad_norm": 0.07077290492741074, "learning_rate": 6.42735718678681e-06, "loss": 0.5589, "step": 3366 }, { "epoch": 1.636595744680851, "grad_norm": 0.07154034910375451, "learning_rate": 6.425522085688401e-06, "loss": 0.5791, "step": 3367 }, { "epoch": 1.637082066869301, "grad_norm": 0.0731832489640663, "learning_rate": 6.423686775553364e-06, "loss": 0.5694, "step": 3368 }, { "epoch": 1.6375683890577508, "grad_norm": 0.07459119368468636, "learning_rate": 6.421851256650831e-06, "loss": 0.543, "step": 3369 }, { "epoch": 1.6380547112462005, "grad_norm": 0.07276161805255993, "learning_rate": 6.420015529249955e-06, "loss": 0.5469, "step": 3370 }, { "epoch": 1.6385410334346504, "grad_norm": 0.0687617035605552, "learning_rate": 6.418179593619928e-06, "loss": 0.5522, "step": 3371 }, { "epoch": 1.6390273556231003, "grad_norm": 0.07023826343869467, "learning_rate": 6.416343450029967e-06, "loss": 0.53, "step": 3372 }, { "epoch": 1.63951367781155, "grad_norm": 0.07153498347034304, "learning_rate": 6.414507098749324e-06, "loss": 0.5448, "step": 3373 }, { "epoch": 1.6400000000000001, "grad_norm": 0.07031049952680485, "learning_rate": 6.412670540047275e-06, "loss": 0.5372, "step": 3374 }, { "epoch": 1.6404863221884498, "grad_norm": 0.07642733811048066, "learning_rate": 6.410833774193137e-06, "loss": 0.5967, "step": 3375 }, { "epoch": 1.6409726443768997, "grad_norm": 0.06967841381479503, "learning_rate": 6.408996801456246e-06, "loss": 0.516, "step": 3376 }, { "epoch": 1.6414589665653496, "grad_norm": 0.07275926964865533, "learning_rate": 6.407159622105974e-06, "loss": 0.5476, "step": 3377 }, { "epoch": 1.6419452887537993, "grad_norm": 0.06911037198168787, "learning_rate": 6.405322236411722e-06, "loss": 0.5076, "step": 3378 }, { "epoch": 1.6424316109422492, "grad_norm": 0.06993404724053431, "learning_rate": 6.403484644642923e-06, "loss": 0.5197, "step": 3379 }, { "epoch": 1.6429179331306991, "grad_norm": 0.07286448383096875, "learning_rate": 6.401646847069038e-06, "loss": 0.57, "step": 3380 }, { "epoch": 1.6434042553191488, "grad_norm": 0.07435546696321146, "learning_rate": 6.3998088439595605e-06, "loss": 0.521, "step": 3381 }, { "epoch": 1.643890577507599, "grad_norm": 0.06869542579443716, "learning_rate": 6.397970635584012e-06, "loss": 0.5233, "step": 3382 }, { "epoch": 1.6443768996960486, "grad_norm": 0.07623847245338596, "learning_rate": 6.396132222211945e-06, "loss": 0.5842, "step": 3383 }, { "epoch": 1.6448632218844985, "grad_norm": 0.069444079369739, "learning_rate": 6.394293604112941e-06, "loss": 0.5584, "step": 3384 }, { "epoch": 1.6453495440729484, "grad_norm": 0.06920549963577673, "learning_rate": 6.392454781556614e-06, "loss": 0.527, "step": 3385 }, { "epoch": 1.645835866261398, "grad_norm": 0.07150253604511128, "learning_rate": 6.390615754812605e-06, "loss": 0.5422, "step": 3386 }, { "epoch": 1.646322188449848, "grad_norm": 0.07380647396593863, "learning_rate": 6.388776524150586e-06, "loss": 0.6001, "step": 3387 }, { "epoch": 1.646808510638298, "grad_norm": 0.07534998231994375, "learning_rate": 6.386937089840262e-06, "loss": 0.5726, "step": 3388 }, { "epoch": 1.6472948328267476, "grad_norm": 0.0703812333326199, "learning_rate": 6.385097452151363e-06, "loss": 0.559, "step": 3389 }, { "epoch": 1.6477811550151977, "grad_norm": 0.07579382270670242, "learning_rate": 6.3832576113536515e-06, "loss": 0.5931, "step": 3390 }, { "epoch": 1.6482674772036474, "grad_norm": 0.0730380978934539, "learning_rate": 6.381417567716919e-06, "loss": 0.5484, "step": 3391 }, { "epoch": 1.6487537993920973, "grad_norm": 0.080873039534468, "learning_rate": 6.379577321510988e-06, "loss": 0.5263, "step": 3392 }, { "epoch": 1.6492401215805472, "grad_norm": 0.07442847785577929, "learning_rate": 6.3777368730057075e-06, "loss": 0.557, "step": 3393 }, { "epoch": 1.649726443768997, "grad_norm": 0.06712855488192049, "learning_rate": 6.375896222470961e-06, "loss": 0.4888, "step": 3394 }, { "epoch": 1.6502127659574468, "grad_norm": 0.0743225166886425, "learning_rate": 6.374055370176657e-06, "loss": 0.5373, "step": 3395 }, { "epoch": 1.6506990881458967, "grad_norm": 0.07154325162816429, "learning_rate": 6.372214316392737e-06, "loss": 0.5552, "step": 3396 }, { "epoch": 1.6511854103343464, "grad_norm": 0.07191603652286814, "learning_rate": 6.37037306138917e-06, "loss": 0.5282, "step": 3397 }, { "epoch": 1.6516717325227963, "grad_norm": 0.06939362956965285, "learning_rate": 6.368531605435955e-06, "loss": 0.5465, "step": 3398 }, { "epoch": 1.6521580547112462, "grad_norm": 0.07437388186012737, "learning_rate": 6.366689948803121e-06, "loss": 0.625, "step": 3399 }, { "epoch": 1.652644376899696, "grad_norm": 0.07492458554891947, "learning_rate": 6.364848091760727e-06, "loss": 0.5401, "step": 3400 }, { "epoch": 1.653130699088146, "grad_norm": 0.07604797113839827, "learning_rate": 6.363006034578856e-06, "loss": 0.5564, "step": 3401 }, { "epoch": 1.6536170212765957, "grad_norm": 0.07419551766677225, "learning_rate": 6.36116377752763e-06, "loss": 0.5271, "step": 3402 }, { "epoch": 1.6541033434650456, "grad_norm": 0.07504553771179602, "learning_rate": 6.359321320877193e-06, "loss": 0.5532, "step": 3403 }, { "epoch": 1.6545896656534955, "grad_norm": 0.0725165387268915, "learning_rate": 6.3574786648977205e-06, "loss": 0.5956, "step": 3404 }, { "epoch": 1.6550759878419452, "grad_norm": 0.0700954308787515, "learning_rate": 6.355635809859416e-06, "loss": 0.557, "step": 3405 }, { "epoch": 1.6555623100303951, "grad_norm": 0.07022157789133485, "learning_rate": 6.3537927560325155e-06, "loss": 0.5624, "step": 3406 }, { "epoch": 1.656048632218845, "grad_norm": 0.07220842322534013, "learning_rate": 6.3519495036872815e-06, "loss": 0.5464, "step": 3407 }, { "epoch": 1.6565349544072947, "grad_norm": 0.07020232744591141, "learning_rate": 6.350106053094004e-06, "loss": 0.5616, "step": 3408 }, { "epoch": 1.6570212765957448, "grad_norm": 0.07342677126572464, "learning_rate": 6.348262404523005e-06, "loss": 0.5553, "step": 3409 }, { "epoch": 1.6575075987841945, "grad_norm": 0.0735559234379739, "learning_rate": 6.346418558244634e-06, "loss": 0.5617, "step": 3410 }, { "epoch": 1.6579939209726444, "grad_norm": 0.07410723554599913, "learning_rate": 6.344574514529272e-06, "loss": 0.5457, "step": 3411 }, { "epoch": 1.6584802431610943, "grad_norm": 0.06764480738125728, "learning_rate": 6.342730273647327e-06, "loss": 0.5073, "step": 3412 }, { "epoch": 1.658966565349544, "grad_norm": 0.07155817038410059, "learning_rate": 6.340885835869233e-06, "loss": 0.5644, "step": 3413 }, { "epoch": 1.659452887537994, "grad_norm": 0.0695935896652981, "learning_rate": 6.339041201465459e-06, "loss": 0.5168, "step": 3414 }, { "epoch": 1.6599392097264438, "grad_norm": 0.07077491789727446, "learning_rate": 6.3371963707065e-06, "loss": 0.5543, "step": 3415 }, { "epoch": 1.6604255319148935, "grad_norm": 0.08015293257927245, "learning_rate": 6.3353513438628764e-06, "loss": 0.5585, "step": 3416 }, { "epoch": 1.6609118541033434, "grad_norm": 0.07357720324580157, "learning_rate": 6.333506121205144e-06, "loss": 0.6082, "step": 3417 }, { "epoch": 1.6613981762917933, "grad_norm": 0.07326962524988391, "learning_rate": 6.33166070300388e-06, "loss": 0.5378, "step": 3418 }, { "epoch": 1.661884498480243, "grad_norm": 0.06750085921534843, "learning_rate": 6.329815089529696e-06, "loss": 0.5132, "step": 3419 }, { "epoch": 1.6623708206686931, "grad_norm": 0.07327890231180548, "learning_rate": 6.32796928105323e-06, "loss": 0.6039, "step": 3420 }, { "epoch": 1.6628571428571428, "grad_norm": 0.07301937848254036, "learning_rate": 6.32612327784515e-06, "loss": 0.5667, "step": 3421 }, { "epoch": 1.6633434650455927, "grad_norm": 0.06924845171060541, "learning_rate": 6.324277080176151e-06, "loss": 0.494, "step": 3422 }, { "epoch": 1.6638297872340426, "grad_norm": 0.07149583581069985, "learning_rate": 6.3224306883169565e-06, "loss": 0.535, "step": 3423 }, { "epoch": 1.6643161094224923, "grad_norm": 0.0737453113581752, "learning_rate": 6.320584102538316e-06, "loss": 0.5391, "step": 3424 }, { "epoch": 1.6648024316109422, "grad_norm": 0.07237495506610687, "learning_rate": 6.318737323111015e-06, "loss": 0.5289, "step": 3425 }, { "epoch": 1.6652887537993921, "grad_norm": 0.07308418536483928, "learning_rate": 6.316890350305861e-06, "loss": 0.582, "step": 3426 }, { "epoch": 1.6657750759878418, "grad_norm": 0.07039007862105334, "learning_rate": 6.315043184393691e-06, "loss": 0.5181, "step": 3427 }, { "epoch": 1.666261398176292, "grad_norm": 0.07294068981271111, "learning_rate": 6.313195825645371e-06, "loss": 0.5558, "step": 3428 }, { "epoch": 1.6667477203647416, "grad_norm": 0.07440584718609539, "learning_rate": 6.311348274331797e-06, "loss": 0.5299, "step": 3429 }, { "epoch": 1.6672340425531915, "grad_norm": 0.07220354336316229, "learning_rate": 6.309500530723889e-06, "loss": 0.552, "step": 3430 }, { "epoch": 1.6677203647416414, "grad_norm": 0.07408564378247474, "learning_rate": 6.3076525950925975e-06, "loss": 0.5546, "step": 3431 }, { "epoch": 1.668206686930091, "grad_norm": 0.07199997484962213, "learning_rate": 6.305804467708902e-06, "loss": 0.5346, "step": 3432 }, { "epoch": 1.668693009118541, "grad_norm": 0.07044592574990392, "learning_rate": 6.3039561488438115e-06, "loss": 0.5364, "step": 3433 }, { "epoch": 1.669179331306991, "grad_norm": 0.07016152799974806, "learning_rate": 6.302107638768359e-06, "loss": 0.5316, "step": 3434 }, { "epoch": 1.6696656534954406, "grad_norm": 0.07206425324719826, "learning_rate": 6.300258937753607e-06, "loss": 0.55, "step": 3435 }, { "epoch": 1.6701519756838907, "grad_norm": 0.07031404784080043, "learning_rate": 6.2984100460706476e-06, "loss": 0.5162, "step": 3436 }, { "epoch": 1.6706382978723404, "grad_norm": 0.07132698925175493, "learning_rate": 6.296560963990599e-06, "loss": 0.5278, "step": 3437 }, { "epoch": 1.6711246200607903, "grad_norm": 0.07315548152365, "learning_rate": 6.2947116917846085e-06, "loss": 0.5506, "step": 3438 }, { "epoch": 1.6716109422492402, "grad_norm": 0.07320352950730759, "learning_rate": 6.29286222972385e-06, "loss": 0.5472, "step": 3439 }, { "epoch": 1.67209726443769, "grad_norm": 0.07584456827317385, "learning_rate": 6.291012578079528e-06, "loss": 0.5883, "step": 3440 }, { "epoch": 1.6725835866261398, "grad_norm": 0.07060915074816591, "learning_rate": 6.289162737122873e-06, "loss": 0.5338, "step": 3441 }, { "epoch": 1.6730699088145897, "grad_norm": 0.0700103844965376, "learning_rate": 6.287312707125139e-06, "loss": 0.549, "step": 3442 }, { "epoch": 1.6735562310030394, "grad_norm": 0.07987773004371675, "learning_rate": 6.285462488357618e-06, "loss": 0.5463, "step": 3443 }, { "epoch": 1.6740425531914893, "grad_norm": 0.07065521169091803, "learning_rate": 6.283612081091619e-06, "loss": 0.5256, "step": 3444 }, { "epoch": 1.6745288753799392, "grad_norm": 0.07185225363803119, "learning_rate": 6.281761485598484e-06, "loss": 0.5422, "step": 3445 }, { "epoch": 1.675015197568389, "grad_norm": 0.07331266687140567, "learning_rate": 6.279910702149584e-06, "loss": 0.5876, "step": 3446 }, { "epoch": 1.675501519756839, "grad_norm": 0.07461911269210564, "learning_rate": 6.278059731016313e-06, "loss": 0.5459, "step": 3447 }, { "epoch": 1.6759878419452887, "grad_norm": 0.06956593015645747, "learning_rate": 6.276208572470096e-06, "loss": 0.4949, "step": 3448 }, { "epoch": 1.6764741641337386, "grad_norm": 0.0702961430246337, "learning_rate": 6.274357226782384e-06, "loss": 0.5523, "step": 3449 }, { "epoch": 1.6769604863221885, "grad_norm": 0.07147229026179, "learning_rate": 6.272505694224655e-06, "loss": 0.5496, "step": 3450 }, { "epoch": 1.6774468085106382, "grad_norm": 0.07587812402997444, "learning_rate": 6.270653975068418e-06, "loss": 0.5539, "step": 3451 }, { "epoch": 1.6779331306990881, "grad_norm": 0.0734741665495876, "learning_rate": 6.268802069585205e-06, "loss": 0.56, "step": 3452 }, { "epoch": 1.678419452887538, "grad_norm": 0.07506979605329234, "learning_rate": 6.266949978046576e-06, "loss": 0.5889, "step": 3453 }, { "epoch": 1.6789057750759877, "grad_norm": 0.07300090763970113, "learning_rate": 6.26509770072412e-06, "loss": 0.5637, "step": 3454 }, { "epoch": 1.6793920972644378, "grad_norm": 0.06914753026846533, "learning_rate": 6.263245237889451e-06, "loss": 0.5544, "step": 3455 }, { "epoch": 1.6798784194528875, "grad_norm": 0.0722890714909905, "learning_rate": 6.261392589814214e-06, "loss": 0.563, "step": 3456 }, { "epoch": 1.6803647416413374, "grad_norm": 0.07269298755097635, "learning_rate": 6.259539756770078e-06, "loss": 0.5573, "step": 3457 }, { "epoch": 1.6808510638297873, "grad_norm": 0.07055887355195972, "learning_rate": 6.257686739028739e-06, "loss": 0.5486, "step": 3458 }, { "epoch": 1.681337386018237, "grad_norm": 0.07042043459620466, "learning_rate": 6.255833536861921e-06, "loss": 0.5337, "step": 3459 }, { "epoch": 1.681823708206687, "grad_norm": 0.0729035793138229, "learning_rate": 6.253980150541378e-06, "loss": 0.5639, "step": 3460 }, { "epoch": 1.6823100303951368, "grad_norm": 0.07131871959759666, "learning_rate": 6.252126580338885e-06, "loss": 0.5166, "step": 3461 }, { "epoch": 1.6827963525835865, "grad_norm": 0.07104127141305687, "learning_rate": 6.250272826526248e-06, "loss": 0.545, "step": 3462 }, { "epoch": 1.6832826747720366, "grad_norm": 0.07028033697890941, "learning_rate": 6.248418889375299e-06, "loss": 0.5334, "step": 3463 }, { "epoch": 1.6837689969604863, "grad_norm": 0.06858869063379364, "learning_rate": 6.246564769157895e-06, "loss": 0.5299, "step": 3464 }, { "epoch": 1.6842553191489362, "grad_norm": 0.07466509908792074, "learning_rate": 6.244710466145924e-06, "loss": 0.5858, "step": 3465 }, { "epoch": 1.6847416413373861, "grad_norm": 0.07497535752935497, "learning_rate": 6.242855980611298e-06, "loss": 0.6044, "step": 3466 }, { "epoch": 1.6852279635258358, "grad_norm": 0.07746186650363425, "learning_rate": 6.241001312825955e-06, "loss": 0.5756, "step": 3467 }, { "epoch": 1.6857142857142857, "grad_norm": 0.07220985737960124, "learning_rate": 6.239146463061864e-06, "loss": 0.5848, "step": 3468 }, { "epoch": 1.6862006079027356, "grad_norm": 0.07390719815103404, "learning_rate": 6.237291431591015e-06, "loss": 0.5594, "step": 3469 }, { "epoch": 1.6866869300911853, "grad_norm": 0.07291685913457813, "learning_rate": 6.235436218685427e-06, "loss": 0.566, "step": 3470 }, { "epoch": 1.6871732522796352, "grad_norm": 0.0722062240015657, "learning_rate": 6.233580824617147e-06, "loss": 0.5494, "step": 3471 }, { "epoch": 1.6876595744680851, "grad_norm": 0.07355731543139962, "learning_rate": 6.231725249658248e-06, "loss": 0.5471, "step": 3472 }, { "epoch": 1.6881458966565348, "grad_norm": 0.07015747437220496, "learning_rate": 6.229869494080828e-06, "loss": 0.5166, "step": 3473 }, { "epoch": 1.688632218844985, "grad_norm": 0.07267653450607674, "learning_rate": 6.228013558157011e-06, "loss": 0.5676, "step": 3474 }, { "epoch": 1.6891185410334346, "grad_norm": 0.07061020483229881, "learning_rate": 6.226157442158954e-06, "loss": 0.5297, "step": 3475 }, { "epoch": 1.6896048632218845, "grad_norm": 0.07740873367140404, "learning_rate": 6.224301146358831e-06, "loss": 0.5487, "step": 3476 }, { "epoch": 1.6900911854103344, "grad_norm": 0.07262189988070133, "learning_rate": 6.222444671028846e-06, "loss": 0.5494, "step": 3477 }, { "epoch": 1.6905775075987841, "grad_norm": 0.06967127517489614, "learning_rate": 6.220588016441234e-06, "loss": 0.5145, "step": 3478 }, { "epoch": 1.691063829787234, "grad_norm": 0.06966666670930907, "learning_rate": 6.218731182868249e-06, "loss": 0.5221, "step": 3479 }, { "epoch": 1.691550151975684, "grad_norm": 0.06860028726467582, "learning_rate": 6.216874170582176e-06, "loss": 0.5237, "step": 3480 }, { "epoch": 1.6920364741641336, "grad_norm": 0.07334406390886508, "learning_rate": 6.215016979855324e-06, "loss": 0.5751, "step": 3481 }, { "epoch": 1.6925227963525837, "grad_norm": 0.07015295794360239, "learning_rate": 6.213159610960029e-06, "loss": 0.5388, "step": 3482 }, { "epoch": 1.6930091185410334, "grad_norm": 0.07038569302208326, "learning_rate": 6.211302064168654e-06, "loss": 0.5262, "step": 3483 }, { "epoch": 1.6934954407294833, "grad_norm": 0.0687806760408564, "learning_rate": 6.209444339753587e-06, "loss": 0.5344, "step": 3484 }, { "epoch": 1.6939817629179332, "grad_norm": 0.07150568590958441, "learning_rate": 6.207586437987241e-06, "loss": 0.5303, "step": 3485 }, { "epoch": 1.694468085106383, "grad_norm": 0.07267683292631394, "learning_rate": 6.205728359142056e-06, "loss": 0.5769, "step": 3486 }, { "epoch": 1.6949544072948328, "grad_norm": 0.07025379501761682, "learning_rate": 6.2038701034905e-06, "loss": 0.5618, "step": 3487 }, { "epoch": 1.6954407294832827, "grad_norm": 0.07193721091828442, "learning_rate": 6.202011671305065e-06, "loss": 0.5203, "step": 3488 }, { "epoch": 1.6959270516717324, "grad_norm": 0.07103752414158168, "learning_rate": 6.200153062858268e-06, "loss": 0.5454, "step": 3489 }, { "epoch": 1.6964133738601823, "grad_norm": 0.07274057275608531, "learning_rate": 6.198294278422652e-06, "loss": 0.5702, "step": 3490 }, { "epoch": 1.6968996960486322, "grad_norm": 0.07127870652316795, "learning_rate": 6.196435318270788e-06, "loss": 0.5242, "step": 3491 }, { "epoch": 1.697386018237082, "grad_norm": 0.07270868914238528, "learning_rate": 6.19457618267527e-06, "loss": 0.5578, "step": 3492 }, { "epoch": 1.697872340425532, "grad_norm": 0.0703077530354761, "learning_rate": 6.192716871908721e-06, "loss": 0.5365, "step": 3493 }, { "epoch": 1.6983586626139817, "grad_norm": 0.07204047106001092, "learning_rate": 6.1908573862437885e-06, "loss": 0.5797, "step": 3494 }, { "epoch": 1.6988449848024316, "grad_norm": 0.07504411096514044, "learning_rate": 6.188997725953141e-06, "loss": 0.585, "step": 3495 }, { "epoch": 1.6993313069908815, "grad_norm": 0.07112888675894341, "learning_rate": 6.18713789130948e-06, "loss": 0.5444, "step": 3496 }, { "epoch": 1.6998176291793312, "grad_norm": 0.07132450544712032, "learning_rate": 6.185277882585528e-06, "loss": 0.5232, "step": 3497 }, { "epoch": 1.7003039513677811, "grad_norm": 0.07291371092849813, "learning_rate": 6.183417700054035e-06, "loss": 0.5565, "step": 3498 }, { "epoch": 1.700790273556231, "grad_norm": 0.07314998467558513, "learning_rate": 6.181557343987775e-06, "loss": 0.5541, "step": 3499 }, { "epoch": 1.7012765957446807, "grad_norm": 0.07155905366858664, "learning_rate": 6.179696814659547e-06, "loss": 0.5322, "step": 3500 }, { "epoch": 1.7017629179331308, "grad_norm": 0.07287449463409627, "learning_rate": 6.177836112342176e-06, "loss": 0.5887, "step": 3501 }, { "epoch": 1.7022492401215805, "grad_norm": 0.07261494504931845, "learning_rate": 6.175975237308516e-06, "loss": 0.5319, "step": 3502 }, { "epoch": 1.7027355623100304, "grad_norm": 0.073142233700149, "learning_rate": 6.174114189831441e-06, "loss": 0.5568, "step": 3503 }, { "epoch": 1.7032218844984803, "grad_norm": 0.07275484890670135, "learning_rate": 6.172252970183854e-06, "loss": 0.6009, "step": 3504 }, { "epoch": 1.70370820668693, "grad_norm": 0.07237350334886138, "learning_rate": 6.170391578638681e-06, "loss": 0.5483, "step": 3505 }, { "epoch": 1.70419452887538, "grad_norm": 0.07517900788602927, "learning_rate": 6.168530015468872e-06, "loss": 0.5733, "step": 3506 }, { "epoch": 1.7046808510638298, "grad_norm": 0.07352048336663716, "learning_rate": 6.166668280947408e-06, "loss": 0.5409, "step": 3507 }, { "epoch": 1.7051671732522795, "grad_norm": 0.0725547704029076, "learning_rate": 6.1648063753472875e-06, "loss": 0.5552, "step": 3508 }, { "epoch": 1.7056534954407296, "grad_norm": 0.07220301147051507, "learning_rate": 6.16294429894154e-06, "loss": 0.5464, "step": 3509 }, { "epoch": 1.7061398176291793, "grad_norm": 0.0710666461142256, "learning_rate": 6.161082052003215e-06, "loss": 0.5346, "step": 3510 }, { "epoch": 1.7066261398176292, "grad_norm": 0.07565430366698148, "learning_rate": 6.159219634805394e-06, "loss": 0.587, "step": 3511 }, { "epoch": 1.7071124620060791, "grad_norm": 0.07128599994758103, "learning_rate": 6.157357047621176e-06, "loss": 0.5748, "step": 3512 }, { "epoch": 1.7075987841945288, "grad_norm": 0.07297765244833161, "learning_rate": 6.155494290723691e-06, "loss": 0.5375, "step": 3513 }, { "epoch": 1.7080851063829787, "grad_norm": 0.0792437135387949, "learning_rate": 6.153631364386091e-06, "loss": 0.5373, "step": 3514 }, { "epoch": 1.7085714285714286, "grad_norm": 0.07240105274594913, "learning_rate": 6.15176826888155e-06, "loss": 0.5485, "step": 3515 }, { "epoch": 1.7090577507598783, "grad_norm": 0.07393454324383125, "learning_rate": 6.149905004483272e-06, "loss": 0.5716, "step": 3516 }, { "epoch": 1.7095440729483282, "grad_norm": 0.07475190267630555, "learning_rate": 6.148041571464483e-06, "loss": 0.5601, "step": 3517 }, { "epoch": 1.7100303951367781, "grad_norm": 0.07326956142275377, "learning_rate": 6.146177970098434e-06, "loss": 0.5234, "step": 3518 }, { "epoch": 1.7105167173252278, "grad_norm": 0.07439871870321202, "learning_rate": 6.144314200658401e-06, "loss": 0.5407, "step": 3519 }, { "epoch": 1.711003039513678, "grad_norm": 0.07607020859262323, "learning_rate": 6.142450263417685e-06, "loss": 0.5172, "step": 3520 }, { "epoch": 1.7114893617021276, "grad_norm": 0.07121399993155196, "learning_rate": 6.1405861586496125e-06, "loss": 0.517, "step": 3521 }, { "epoch": 1.7119756838905775, "grad_norm": 0.07059184271014422, "learning_rate": 6.138721886627532e-06, "loss": 0.5648, "step": 3522 }, { "epoch": 1.7124620060790274, "grad_norm": 0.07140527318071622, "learning_rate": 6.136857447624818e-06, "loss": 0.5272, "step": 3523 }, { "epoch": 1.7129483282674771, "grad_norm": 0.0730752244195604, "learning_rate": 6.134992841914869e-06, "loss": 0.5668, "step": 3524 }, { "epoch": 1.713434650455927, "grad_norm": 0.07565606813572627, "learning_rate": 6.133128069771107e-06, "loss": 0.5858, "step": 3525 }, { "epoch": 1.713920972644377, "grad_norm": 0.07203715162008086, "learning_rate": 6.131263131466982e-06, "loss": 0.5507, "step": 3526 }, { "epoch": 1.7144072948328266, "grad_norm": 0.0775887937031711, "learning_rate": 6.129398027275966e-06, "loss": 0.6, "step": 3527 }, { "epoch": 1.7148936170212767, "grad_norm": 0.07064216132886232, "learning_rate": 6.127532757471553e-06, "loss": 0.5539, "step": 3528 }, { "epoch": 1.7153799392097264, "grad_norm": 0.0726866206458179, "learning_rate": 6.125667322327266e-06, "loss": 0.5664, "step": 3529 }, { "epoch": 1.7158662613981763, "grad_norm": 0.06967467383360446, "learning_rate": 6.123801722116649e-06, "loss": 0.525, "step": 3530 }, { "epoch": 1.7163525835866262, "grad_norm": 0.07081201980771898, "learning_rate": 6.121935957113271e-06, "loss": 0.5242, "step": 3531 }, { "epoch": 1.716838905775076, "grad_norm": 0.07351651568219589, "learning_rate": 6.120070027590724e-06, "loss": 0.5426, "step": 3532 }, { "epoch": 1.7173252279635258, "grad_norm": 0.07222617797151484, "learning_rate": 6.118203933822628e-06, "loss": 0.5381, "step": 3533 }, { "epoch": 1.7178115501519757, "grad_norm": 0.06917247862721279, "learning_rate": 6.116337676082623e-06, "loss": 0.511, "step": 3534 }, { "epoch": 1.7182978723404254, "grad_norm": 0.0754651715338366, "learning_rate": 6.114471254644375e-06, "loss": 0.6034, "step": 3535 }, { "epoch": 1.7187841945288755, "grad_norm": 0.07205487316612337, "learning_rate": 6.112604669781572e-06, "loss": 0.5721, "step": 3536 }, { "epoch": 1.7192705167173252, "grad_norm": 0.07856583507993582, "learning_rate": 6.110737921767931e-06, "loss": 0.535, "step": 3537 }, { "epoch": 1.7197568389057751, "grad_norm": 0.07115444337024836, "learning_rate": 6.1088710108771845e-06, "loss": 0.5448, "step": 3538 }, { "epoch": 1.720243161094225, "grad_norm": 0.07305108996799066, "learning_rate": 6.107003937383098e-06, "loss": 0.5533, "step": 3539 }, { "epoch": 1.7207294832826747, "grad_norm": 0.07550895883040964, "learning_rate": 6.105136701559453e-06, "loss": 0.5831, "step": 3540 }, { "epoch": 1.7212158054711246, "grad_norm": 0.07507836898136312, "learning_rate": 6.103269303680063e-06, "loss": 0.5661, "step": 3541 }, { "epoch": 1.7217021276595745, "grad_norm": 0.07008914370481566, "learning_rate": 6.101401744018756e-06, "loss": 0.5029, "step": 3542 }, { "epoch": 1.7221884498480242, "grad_norm": 0.07651101941723358, "learning_rate": 6.099534022849392e-06, "loss": 0.5167, "step": 3543 }, { "epoch": 1.7226747720364741, "grad_norm": 0.07278891169391895, "learning_rate": 6.097666140445848e-06, "loss": 0.574, "step": 3544 }, { "epoch": 1.723161094224924, "grad_norm": 0.07419667475638432, "learning_rate": 6.09579809708203e-06, "loss": 0.5675, "step": 3545 }, { "epoch": 1.7236474164133737, "grad_norm": 0.07132183658485121, "learning_rate": 6.093929893031865e-06, "loss": 0.5284, "step": 3546 }, { "epoch": 1.7241337386018238, "grad_norm": 0.07247309940970907, "learning_rate": 6.092061528569303e-06, "loss": 0.5155, "step": 3547 }, { "epoch": 1.7246200607902735, "grad_norm": 0.0705966682854827, "learning_rate": 6.090193003968319e-06, "loss": 0.5375, "step": 3548 }, { "epoch": 1.7251063829787234, "grad_norm": 0.07441859204161902, "learning_rate": 6.088324319502912e-06, "loss": 0.5765, "step": 3549 }, { "epoch": 1.7255927051671733, "grad_norm": 0.0733133846506137, "learning_rate": 6.086455475447102e-06, "loss": 0.555, "step": 3550 }, { "epoch": 1.726079027355623, "grad_norm": 0.07415090356658685, "learning_rate": 6.084586472074933e-06, "loss": 0.5407, "step": 3551 }, { "epoch": 1.726565349544073, "grad_norm": 0.07198872510394184, "learning_rate": 6.082717309660474e-06, "loss": 0.5274, "step": 3552 }, { "epoch": 1.7270516717325228, "grad_norm": 0.07286615459382179, "learning_rate": 6.080847988477819e-06, "loss": 0.5242, "step": 3553 }, { "epoch": 1.7275379939209725, "grad_norm": 0.07538009981745537, "learning_rate": 6.078978508801079e-06, "loss": 0.5684, "step": 3554 }, { "epoch": 1.7280243161094226, "grad_norm": 0.07185413538904672, "learning_rate": 6.0771088709043915e-06, "loss": 0.5303, "step": 3555 }, { "epoch": 1.7285106382978723, "grad_norm": 0.07264790164048276, "learning_rate": 6.075239075061921e-06, "loss": 0.5706, "step": 3556 }, { "epoch": 1.7289969604863222, "grad_norm": 0.07266353472118756, "learning_rate": 6.073369121547851e-06, "loss": 0.56, "step": 3557 }, { "epoch": 1.7294832826747721, "grad_norm": 0.07423555281045208, "learning_rate": 6.071499010636387e-06, "loss": 0.5666, "step": 3558 }, { "epoch": 1.7299696048632218, "grad_norm": 0.07525381662140035, "learning_rate": 6.069628742601761e-06, "loss": 0.5676, "step": 3559 }, { "epoch": 1.7304559270516717, "grad_norm": 0.07443860364609997, "learning_rate": 6.067758317718227e-06, "loss": 0.5661, "step": 3560 }, { "epoch": 1.7309422492401216, "grad_norm": 0.06969686395927308, "learning_rate": 6.065887736260061e-06, "loss": 0.5405, "step": 3561 }, { "epoch": 1.7314285714285713, "grad_norm": 0.0718479392508208, "learning_rate": 6.064016998501563e-06, "loss": 0.5646, "step": 3562 }, { "epoch": 1.7319148936170212, "grad_norm": 0.07514505651563702, "learning_rate": 6.062146104717053e-06, "loss": 0.5582, "step": 3563 }, { "epoch": 1.7324012158054711, "grad_norm": 0.07103669937924188, "learning_rate": 6.060275055180877e-06, "loss": 0.5687, "step": 3564 }, { "epoch": 1.7328875379939208, "grad_norm": 0.0710078358345454, "learning_rate": 6.058403850167407e-06, "loss": 0.559, "step": 3565 }, { "epoch": 1.733373860182371, "grad_norm": 0.06997667824929588, "learning_rate": 6.056532489951032e-06, "loss": 0.536, "step": 3566 }, { "epoch": 1.7338601823708206, "grad_norm": 0.0706188721861992, "learning_rate": 6.054660974806164e-06, "loss": 0.5742, "step": 3567 }, { "epoch": 1.7343465045592705, "grad_norm": 0.07204773947379925, "learning_rate": 6.052789305007241e-06, "loss": 0.5917, "step": 3568 }, { "epoch": 1.7348328267477204, "grad_norm": 0.06936993031005802, "learning_rate": 6.050917480828721e-06, "loss": 0.5496, "step": 3569 }, { "epoch": 1.7353191489361701, "grad_norm": 0.07148965795942203, "learning_rate": 6.049045502545085e-06, "loss": 0.5003, "step": 3570 }, { "epoch": 1.73580547112462, "grad_norm": 0.0737125659556872, "learning_rate": 6.047173370430841e-06, "loss": 0.5539, "step": 3571 }, { "epoch": 1.73629179331307, "grad_norm": 0.07185820600545809, "learning_rate": 6.045301084760513e-06, "loss": 0.5477, "step": 3572 }, { "epoch": 1.7367781155015196, "grad_norm": 0.07671154228843335, "learning_rate": 6.04342864580865e-06, "loss": 0.5778, "step": 3573 }, { "epoch": 1.7372644376899697, "grad_norm": 0.07876861276744633, "learning_rate": 6.041556053849825e-06, "loss": 0.5683, "step": 3574 }, { "epoch": 1.7377507598784194, "grad_norm": 0.07621898739787598, "learning_rate": 6.039683309158635e-06, "loss": 0.5414, "step": 3575 }, { "epoch": 1.7382370820668693, "grad_norm": 0.07062539025763707, "learning_rate": 6.037810412009693e-06, "loss": 0.5307, "step": 3576 }, { "epoch": 1.7387234042553192, "grad_norm": 0.08218588023469914, "learning_rate": 6.035937362677637e-06, "loss": 0.6059, "step": 3577 }, { "epoch": 1.739209726443769, "grad_norm": 0.07219396593997263, "learning_rate": 6.034064161437133e-06, "loss": 0.5285, "step": 3578 }, { "epoch": 1.7396960486322188, "grad_norm": 0.073513164585894, "learning_rate": 6.032190808562861e-06, "loss": 0.5773, "step": 3579 }, { "epoch": 1.7401823708206687, "grad_norm": 0.07320664527212553, "learning_rate": 6.0303173043295295e-06, "loss": 0.5578, "step": 3580 }, { "epoch": 1.7406686930091184, "grad_norm": 0.07710056816365062, "learning_rate": 6.028443649011864e-06, "loss": 0.5649, "step": 3581 }, { "epoch": 1.7411550151975685, "grad_norm": 0.07326470935035503, "learning_rate": 6.026569842884617e-06, "loss": 0.5534, "step": 3582 }, { "epoch": 1.7416413373860182, "grad_norm": 0.07197865271632267, "learning_rate": 6.02469588622256e-06, "loss": 0.5579, "step": 3583 }, { "epoch": 1.7421276595744681, "grad_norm": 0.07416629554633317, "learning_rate": 6.022821779300487e-06, "loss": 0.5405, "step": 3584 }, { "epoch": 1.742613981762918, "grad_norm": 0.0739177085627786, "learning_rate": 6.020947522393214e-06, "loss": 0.5448, "step": 3585 }, { "epoch": 1.7431003039513677, "grad_norm": 0.07383647293083587, "learning_rate": 6.019073115775582e-06, "loss": 0.5629, "step": 3586 }, { "epoch": 1.7435866261398176, "grad_norm": 0.07400571979424464, "learning_rate": 6.017198559722451e-06, "loss": 0.5545, "step": 3587 }, { "epoch": 1.7440729483282675, "grad_norm": 0.07582019354540598, "learning_rate": 6.0153238545087e-06, "loss": 0.5519, "step": 3588 }, { "epoch": 1.7445592705167172, "grad_norm": 0.07241105769853494, "learning_rate": 6.013449000409236e-06, "loss": 0.5362, "step": 3589 }, { "epoch": 1.7450455927051671, "grad_norm": 0.07854879356795515, "learning_rate": 6.011573997698985e-06, "loss": 0.5927, "step": 3590 }, { "epoch": 1.745531914893617, "grad_norm": 0.07245268974875702, "learning_rate": 6.009698846652896e-06, "loss": 0.5163, "step": 3591 }, { "epoch": 1.7460182370820667, "grad_norm": 0.07329472333519695, "learning_rate": 6.007823547545933e-06, "loss": 0.5712, "step": 3592 }, { "epoch": 1.7465045592705168, "grad_norm": 0.07178601716777852, "learning_rate": 6.005948100653094e-06, "loss": 0.5265, "step": 3593 }, { "epoch": 1.7469908814589665, "grad_norm": 0.08049572723234834, "learning_rate": 6.00407250624939e-06, "loss": 0.5437, "step": 3594 }, { "epoch": 1.7474772036474164, "grad_norm": 0.07338112673150728, "learning_rate": 6.002196764609853e-06, "loss": 0.5679, "step": 3595 }, { "epoch": 1.7479635258358663, "grad_norm": 0.07032839623261679, "learning_rate": 6.0003208760095426e-06, "loss": 0.5332, "step": 3596 }, { "epoch": 1.748449848024316, "grad_norm": 0.0755904426093377, "learning_rate": 5.998444840723534e-06, "loss": 0.5629, "step": 3597 }, { "epoch": 1.748936170212766, "grad_norm": 0.06932283426829247, "learning_rate": 5.996568659026929e-06, "loss": 0.535, "step": 3598 }, { "epoch": 1.748936170212766, "eval_loss": 0.5749828219413757, "eval_runtime": 105.195, "eval_samples_per_second": 288.54, "eval_steps_per_second": 36.076, "step": 3598 }, { "epoch": 1.7494224924012158, "grad_norm": 0.07192976175379051, "learning_rate": 5.994692331194847e-06, "loss": 0.5592, "step": 3599 }, { "epoch": 1.7499088145896655, "grad_norm": 0.073295720203715, "learning_rate": 5.99281585750243e-06, "loss": 0.544, "step": 3600 }, { "epoch": 1.7503951367781156, "grad_norm": 0.07101517052015995, "learning_rate": 5.99093923822484e-06, "loss": 0.5377, "step": 3601 }, { "epoch": 1.7508814589665653, "grad_norm": 0.07510091411794076, "learning_rate": 5.989062473637264e-06, "loss": 0.5365, "step": 3602 }, { "epoch": 1.7513677811550152, "grad_norm": 0.07245106002976794, "learning_rate": 5.9871855640149075e-06, "loss": 0.5404, "step": 3603 }, { "epoch": 1.7518541033434651, "grad_norm": 0.07367707966079796, "learning_rate": 5.985308509633e-06, "loss": 0.5853, "step": 3604 }, { "epoch": 1.7523404255319148, "grad_norm": 0.06952865724495874, "learning_rate": 5.983431310766787e-06, "loss": 0.5034, "step": 3605 }, { "epoch": 1.7528267477203647, "grad_norm": 0.07349873704457437, "learning_rate": 5.981553967691542e-06, "loss": 0.5555, "step": 3606 }, { "epoch": 1.7533130699088146, "grad_norm": 0.07096240195801282, "learning_rate": 5.979676480682553e-06, "loss": 0.5272, "step": 3607 }, { "epoch": 1.7537993920972643, "grad_norm": 0.07111505280060815, "learning_rate": 5.977798850015132e-06, "loss": 0.5303, "step": 3608 }, { "epoch": 1.7542857142857144, "grad_norm": 0.07267172831798531, "learning_rate": 5.975921075964614e-06, "loss": 0.5414, "step": 3609 }, { "epoch": 1.7547720364741641, "grad_norm": 0.07375392374034453, "learning_rate": 5.974043158806351e-06, "loss": 0.5707, "step": 3610 }, { "epoch": 1.7552583586626138, "grad_norm": 0.06945078244517708, "learning_rate": 5.972165098815721e-06, "loss": 0.5453, "step": 3611 }, { "epoch": 1.755744680851064, "grad_norm": 0.07373462664172362, "learning_rate": 5.970286896268118e-06, "loss": 0.5334, "step": 3612 }, { "epoch": 1.7562310030395136, "grad_norm": 0.07532147086303878, "learning_rate": 5.968408551438963e-06, "loss": 0.5516, "step": 3613 }, { "epoch": 1.7567173252279635, "grad_norm": 0.07462807868060534, "learning_rate": 5.966530064603688e-06, "loss": 0.5612, "step": 3614 }, { "epoch": 1.7572036474164134, "grad_norm": 0.07291028674466045, "learning_rate": 5.964651436037756e-06, "loss": 0.5494, "step": 3615 }, { "epoch": 1.7576899696048631, "grad_norm": 0.07612560089523582, "learning_rate": 5.9627726660166455e-06, "loss": 0.5431, "step": 3616 }, { "epoch": 1.758176291793313, "grad_norm": 0.06951500235641493, "learning_rate": 5.960893754815855e-06, "loss": 0.5219, "step": 3617 }, { "epoch": 1.758662613981763, "grad_norm": 0.07313582547903942, "learning_rate": 5.959014702710908e-06, "loss": 0.5576, "step": 3618 }, { "epoch": 1.7591489361702126, "grad_norm": 0.08916398621923113, "learning_rate": 5.957135509977344e-06, "loss": 0.5697, "step": 3619 }, { "epoch": 1.7596352583586627, "grad_norm": 0.074255810644125, "learning_rate": 5.955256176890728e-06, "loss": 0.5021, "step": 3620 }, { "epoch": 1.7601215805471124, "grad_norm": 0.07999992375481285, "learning_rate": 5.953376703726642e-06, "loss": 0.5713, "step": 3621 }, { "epoch": 1.7606079027355623, "grad_norm": 0.07732857159098362, "learning_rate": 5.951497090760687e-06, "loss": 0.5928, "step": 3622 }, { "epoch": 1.7610942249240122, "grad_norm": 0.0752080245612592, "learning_rate": 5.94961733826849e-06, "loss": 0.5581, "step": 3623 }, { "epoch": 1.761580547112462, "grad_norm": 0.07118654165041885, "learning_rate": 5.9477374465256936e-06, "loss": 0.524, "step": 3624 }, { "epoch": 1.7620668693009118, "grad_norm": 0.07787560729031419, "learning_rate": 5.945857415807962e-06, "loss": 0.5601, "step": 3625 }, { "epoch": 1.7625531914893617, "grad_norm": 0.07311346398756595, "learning_rate": 5.943977246390982e-06, "loss": 0.5851, "step": 3626 }, { "epoch": 1.7630395136778114, "grad_norm": 0.07133042578063493, "learning_rate": 5.942096938550458e-06, "loss": 0.5328, "step": 3627 }, { "epoch": 1.7635258358662615, "grad_norm": 0.07896292882967725, "learning_rate": 5.940216492562116e-06, "loss": 0.6395, "step": 3628 }, { "epoch": 1.7640121580547112, "grad_norm": 0.06927315267834584, "learning_rate": 5.938335908701702e-06, "loss": 0.5235, "step": 3629 }, { "epoch": 1.7644984802431611, "grad_norm": 0.07628064400730479, "learning_rate": 5.936455187244984e-06, "loss": 0.5718, "step": 3630 }, { "epoch": 1.764984802431611, "grad_norm": 0.07036770818404987, "learning_rate": 5.934574328467746e-06, "loss": 0.5473, "step": 3631 }, { "epoch": 1.7654711246200607, "grad_norm": 0.07267823799718665, "learning_rate": 5.932693332645796e-06, "loss": 0.5633, "step": 3632 }, { "epoch": 1.7659574468085106, "grad_norm": 0.0711508112093232, "learning_rate": 5.930812200054959e-06, "loss": 0.5407, "step": 3633 }, { "epoch": 1.7664437689969605, "grad_norm": 0.08003617253679567, "learning_rate": 5.928930930971084e-06, "loss": 0.4988, "step": 3634 }, { "epoch": 1.7669300911854102, "grad_norm": 0.07557166099442586, "learning_rate": 5.927049525670036e-06, "loss": 0.5529, "step": 3635 }, { "epoch": 1.7674164133738601, "grad_norm": 0.07102324358656804, "learning_rate": 5.925167984427703e-06, "loss": 0.5742, "step": 3636 }, { "epoch": 1.76790273556231, "grad_norm": 0.06986729522281891, "learning_rate": 5.923286307519991e-06, "loss": 0.5393, "step": 3637 }, { "epoch": 1.7683890577507597, "grad_norm": 0.07094448030699813, "learning_rate": 5.921404495222827e-06, "loss": 0.5697, "step": 3638 }, { "epoch": 1.7688753799392098, "grad_norm": 0.08123346878547236, "learning_rate": 5.919522547812155e-06, "loss": 0.5935, "step": 3639 }, { "epoch": 1.7693617021276595, "grad_norm": 0.07338052107632764, "learning_rate": 5.917640465563945e-06, "loss": 0.5718, "step": 3640 }, { "epoch": 1.7698480243161094, "grad_norm": 0.07217313260325932, "learning_rate": 5.915758248754181e-06, "loss": 0.5666, "step": 3641 }, { "epoch": 1.7703343465045593, "grad_norm": 0.06905138734972223, "learning_rate": 5.913875897658869e-06, "loss": 0.5498, "step": 3642 }, { "epoch": 1.770820668693009, "grad_norm": 0.06966606160607425, "learning_rate": 5.911993412554035e-06, "loss": 0.5329, "step": 3643 }, { "epoch": 1.771306990881459, "grad_norm": 0.07182203077965714, "learning_rate": 5.910110793715722e-06, "loss": 0.5792, "step": 3644 }, { "epoch": 1.7717933130699088, "grad_norm": 0.07238359817385392, "learning_rate": 5.908228041419998e-06, "loss": 0.554, "step": 3645 }, { "epoch": 1.7722796352583585, "grad_norm": 0.0699897897107677, "learning_rate": 5.906345155942943e-06, "loss": 0.5559, "step": 3646 }, { "epoch": 1.7727659574468086, "grad_norm": 0.0731887618484756, "learning_rate": 5.904462137560664e-06, "loss": 0.5555, "step": 3647 }, { "epoch": 1.7732522796352583, "grad_norm": 0.07163249058896368, "learning_rate": 5.902578986549283e-06, "loss": 0.5366, "step": 3648 }, { "epoch": 1.7737386018237082, "grad_norm": 0.0761682820692465, "learning_rate": 5.900695703184944e-06, "loss": 0.5729, "step": 3649 }, { "epoch": 1.7742249240121581, "grad_norm": 0.07793529449567257, "learning_rate": 5.898812287743808e-06, "loss": 0.6167, "step": 3650 }, { "epoch": 1.7747112462006078, "grad_norm": 0.07441850369094558, "learning_rate": 5.896928740502057e-06, "loss": 0.5681, "step": 3651 }, { "epoch": 1.7751975683890577, "grad_norm": 0.07578780790005148, "learning_rate": 5.895045061735891e-06, "loss": 0.5767, "step": 3652 }, { "epoch": 1.7756838905775076, "grad_norm": 0.06891701086798034, "learning_rate": 5.8931612517215305e-06, "loss": 0.5176, "step": 3653 }, { "epoch": 1.7761702127659573, "grad_norm": 0.07473015452158757, "learning_rate": 5.891277310735216e-06, "loss": 0.5415, "step": 3654 }, { "epoch": 1.7766565349544075, "grad_norm": 0.07022847829312584, "learning_rate": 5.889393239053203e-06, "loss": 0.5188, "step": 3655 }, { "epoch": 1.7771428571428571, "grad_norm": 0.07053240306281798, "learning_rate": 5.887509036951773e-06, "loss": 0.545, "step": 3656 }, { "epoch": 1.777629179331307, "grad_norm": 0.07320632766375113, "learning_rate": 5.88562470470722e-06, "loss": 0.5638, "step": 3657 }, { "epoch": 1.778115501519757, "grad_norm": 0.07024946691265871, "learning_rate": 5.883740242595862e-06, "loss": 0.5695, "step": 3658 }, { "epoch": 1.7786018237082066, "grad_norm": 0.07346540996701748, "learning_rate": 5.8818556508940325e-06, "loss": 0.5649, "step": 3659 }, { "epoch": 1.7790881458966565, "grad_norm": 0.07255370698813807, "learning_rate": 5.879970929878086e-06, "loss": 0.5398, "step": 3660 }, { "epoch": 1.7795744680851064, "grad_norm": 0.07249017333713408, "learning_rate": 5.878086079824394e-06, "loss": 0.5519, "step": 3661 }, { "epoch": 1.7800607902735561, "grad_norm": 0.07320838547526598, "learning_rate": 5.876201101009352e-06, "loss": 0.5336, "step": 3662 }, { "epoch": 1.780547112462006, "grad_norm": 0.07665163618450926, "learning_rate": 5.874315993709368e-06, "loss": 0.5972, "step": 3663 }, { "epoch": 1.781033434650456, "grad_norm": 0.07098204771026999, "learning_rate": 5.872430758200869e-06, "loss": 0.5502, "step": 3664 }, { "epoch": 1.7815197568389056, "grad_norm": 0.07232170640321002, "learning_rate": 5.8705453947603096e-06, "loss": 0.5335, "step": 3665 }, { "epoch": 1.7820060790273557, "grad_norm": 0.07407242812484605, "learning_rate": 5.868659903664152e-06, "loss": 0.5461, "step": 3666 }, { "epoch": 1.7824924012158054, "grad_norm": 0.07081051683619383, "learning_rate": 5.866774285188887e-06, "loss": 0.5322, "step": 3667 }, { "epoch": 1.7829787234042553, "grad_norm": 0.07256050589283215, "learning_rate": 5.8648885396110136e-06, "loss": 0.5751, "step": 3668 }, { "epoch": 1.7834650455927052, "grad_norm": 0.07345485356488476, "learning_rate": 5.863002667207057e-06, "loss": 0.5334, "step": 3669 }, { "epoch": 1.783951367781155, "grad_norm": 0.07311652224392305, "learning_rate": 5.861116668253559e-06, "loss": 0.5545, "step": 3670 }, { "epoch": 1.7844376899696048, "grad_norm": 0.07035308188288608, "learning_rate": 5.8592305430270814e-06, "loss": 0.5326, "step": 3671 }, { "epoch": 1.7849240121580547, "grad_norm": 0.07337421208906444, "learning_rate": 5.8573442918042015e-06, "loss": 0.5399, "step": 3672 }, { "epoch": 1.7854103343465044, "grad_norm": 0.07339199974139411, "learning_rate": 5.855457914861515e-06, "loss": 0.5672, "step": 3673 }, { "epoch": 1.7858966565349546, "grad_norm": 0.07263076362552391, "learning_rate": 5.853571412475644e-06, "loss": 0.5894, "step": 3674 }, { "epoch": 1.7863829787234042, "grad_norm": 0.06975837140108927, "learning_rate": 5.851684784923215e-06, "loss": 0.5456, "step": 3675 }, { "epoch": 1.7868693009118541, "grad_norm": 0.07510960014348426, "learning_rate": 5.849798032480886e-06, "loss": 0.5819, "step": 3676 }, { "epoch": 1.787355623100304, "grad_norm": 0.07087019549529482, "learning_rate": 5.8479111554253235e-06, "loss": 0.5189, "step": 3677 }, { "epoch": 1.7878419452887537, "grad_norm": 0.07108053470474533, "learning_rate": 5.8460241540332195e-06, "loss": 0.541, "step": 3678 }, { "epoch": 1.7883282674772036, "grad_norm": 0.06875864765708137, "learning_rate": 5.84413702858128e-06, "loss": 0.5463, "step": 3679 }, { "epoch": 1.7888145896656535, "grad_norm": 0.07062519581399442, "learning_rate": 5.8422497793462315e-06, "loss": 0.5519, "step": 3680 }, { "epoch": 1.7893009118541032, "grad_norm": 0.07193395736085764, "learning_rate": 5.840362406604818e-06, "loss": 0.5102, "step": 3681 }, { "epoch": 1.7897872340425534, "grad_norm": 0.06948915889974423, "learning_rate": 5.8384749106338e-06, "loss": 0.542, "step": 3682 }, { "epoch": 1.790273556231003, "grad_norm": 0.07321434746943749, "learning_rate": 5.836587291709958e-06, "loss": 0.5682, "step": 3683 }, { "epoch": 1.7907598784194527, "grad_norm": 0.07430036328949104, "learning_rate": 5.83469955011009e-06, "loss": 0.5942, "step": 3684 }, { "epoch": 1.7912462006079028, "grad_norm": 0.07059708000115801, "learning_rate": 5.832811686111011e-06, "loss": 0.5472, "step": 3685 }, { "epoch": 1.7917325227963525, "grad_norm": 0.07171486128807007, "learning_rate": 5.830923699989556e-06, "loss": 0.5416, "step": 3686 }, { "epoch": 1.7922188449848024, "grad_norm": 0.07159611497207326, "learning_rate": 5.829035592022575e-06, "loss": 0.564, "step": 3687 }, { "epoch": 1.7927051671732523, "grad_norm": 0.07368568359696098, "learning_rate": 5.82714736248694e-06, "loss": 0.5643, "step": 3688 }, { "epoch": 1.793191489361702, "grad_norm": 0.07437127960393947, "learning_rate": 5.825259011659537e-06, "loss": 0.5557, "step": 3689 }, { "epoch": 1.793677811550152, "grad_norm": 0.07771526757703247, "learning_rate": 5.82337053981727e-06, "loss": 0.5891, "step": 3690 }, { "epoch": 1.7941641337386018, "grad_norm": 0.07346763115777125, "learning_rate": 5.821481947237066e-06, "loss": 0.6043, "step": 3691 }, { "epoch": 1.7946504559270515, "grad_norm": 0.07097919982578436, "learning_rate": 5.81959323419586e-06, "loss": 0.5244, "step": 3692 }, { "epoch": 1.7951367781155017, "grad_norm": 0.07744988259151567, "learning_rate": 5.817704400970615e-06, "loss": 0.5422, "step": 3693 }, { "epoch": 1.7956231003039513, "grad_norm": 0.07188470449476515, "learning_rate": 5.815815447838304e-06, "loss": 0.567, "step": 3694 }, { "epoch": 1.7961094224924012, "grad_norm": 0.07121923239208007, "learning_rate": 5.813926375075924e-06, "loss": 0.5311, "step": 3695 }, { "epoch": 1.7965957446808511, "grad_norm": 0.07422271611608093, "learning_rate": 5.812037182960483e-06, "loss": 0.595, "step": 3696 }, { "epoch": 1.7970820668693008, "grad_norm": 0.06910502049231106, "learning_rate": 5.8101478717690095e-06, "loss": 0.5113, "step": 3697 }, { "epoch": 1.7975683890577507, "grad_norm": 0.07607890949252384, "learning_rate": 5.8082584417785515e-06, "loss": 0.5644, "step": 3698 }, { "epoch": 1.7980547112462006, "grad_norm": 0.07612833556606277, "learning_rate": 5.806368893266171e-06, "loss": 0.5559, "step": 3699 }, { "epoch": 1.7985410334346503, "grad_norm": 0.07302375718789052, "learning_rate": 5.804479226508949e-06, "loss": 0.5568, "step": 3700 }, { "epoch": 1.7990273556231005, "grad_norm": 0.07367495043226692, "learning_rate": 5.8025894417839835e-06, "loss": 0.5936, "step": 3701 }, { "epoch": 1.7995136778115501, "grad_norm": 0.06990224779710322, "learning_rate": 5.800699539368391e-06, "loss": 0.5375, "step": 3702 }, { "epoch": 1.8, "grad_norm": 0.06947119655247352, "learning_rate": 5.798809519539302e-06, "loss": 0.5797, "step": 3703 }, { "epoch": 1.80048632218845, "grad_norm": 0.07224670458000126, "learning_rate": 5.7969193825738705e-06, "loss": 0.5622, "step": 3704 }, { "epoch": 1.8009726443768996, "grad_norm": 0.0718322950102316, "learning_rate": 5.795029128749261e-06, "loss": 0.5381, "step": 3705 }, { "epoch": 1.8014589665653495, "grad_norm": 0.07103691465981724, "learning_rate": 5.793138758342657e-06, "loss": 0.5354, "step": 3706 }, { "epoch": 1.8019452887537994, "grad_norm": 0.07085149059648509, "learning_rate": 5.79124827163126e-06, "loss": 0.5507, "step": 3707 }, { "epoch": 1.8024316109422491, "grad_norm": 0.0699389836394544, "learning_rate": 5.78935766889229e-06, "loss": 0.5547, "step": 3708 }, { "epoch": 1.802917933130699, "grad_norm": 0.07145536821188114, "learning_rate": 5.7874669504029825e-06, "loss": 0.555, "step": 3709 }, { "epoch": 1.803404255319149, "grad_norm": 0.0733717470584871, "learning_rate": 5.785576116440586e-06, "loss": 0.5703, "step": 3710 }, { "epoch": 1.8038905775075986, "grad_norm": 0.06988065584928621, "learning_rate": 5.783685167282376e-06, "loss": 0.5628, "step": 3711 }, { "epoch": 1.8043768996960488, "grad_norm": 0.07332804898486768, "learning_rate": 5.781794103205633e-06, "loss": 0.5527, "step": 3712 }, { "epoch": 1.8048632218844984, "grad_norm": 0.07534552990260916, "learning_rate": 5.779902924487666e-06, "loss": 0.572, "step": 3713 }, { "epoch": 1.8053495440729483, "grad_norm": 0.1380323192966416, "learning_rate": 5.77801163140579e-06, "loss": 0.5342, "step": 3714 }, { "epoch": 1.8058358662613982, "grad_norm": 0.07387432136816943, "learning_rate": 5.776120224237343e-06, "loss": 0.584, "step": 3715 }, { "epoch": 1.806322188449848, "grad_norm": 0.07457195188960578, "learning_rate": 5.774228703259678e-06, "loss": 0.5578, "step": 3716 }, { "epoch": 1.8068085106382978, "grad_norm": 0.07214637870674079, "learning_rate": 5.772337068750165e-06, "loss": 0.5613, "step": 3717 }, { "epoch": 1.8072948328267477, "grad_norm": 0.07317412182570897, "learning_rate": 5.770445320986194e-06, "loss": 0.5901, "step": 3718 }, { "epoch": 1.8077811550151974, "grad_norm": 0.07140099143671426, "learning_rate": 5.768553460245162e-06, "loss": 0.5413, "step": 3719 }, { "epoch": 1.8082674772036476, "grad_norm": 0.07249968798442306, "learning_rate": 5.766661486804495e-06, "loss": 0.5444, "step": 3720 }, { "epoch": 1.8087537993920972, "grad_norm": 0.07363267939840673, "learning_rate": 5.7647694009416264e-06, "loss": 0.5377, "step": 3721 }, { "epoch": 1.8092401215805471, "grad_norm": 0.07498413465427893, "learning_rate": 5.762877202934011e-06, "loss": 0.5548, "step": 3722 }, { "epoch": 1.809726443768997, "grad_norm": 0.06991658062181326, "learning_rate": 5.760984893059115e-06, "loss": 0.5399, "step": 3723 }, { "epoch": 1.8102127659574467, "grad_norm": 0.06972887924205033, "learning_rate": 5.7590924715944265e-06, "loss": 0.5588, "step": 3724 }, { "epoch": 1.8106990881458966, "grad_norm": 0.0740470483094124, "learning_rate": 5.757199938817447e-06, "loss": 0.5509, "step": 3725 }, { "epoch": 1.8111854103343465, "grad_norm": 0.07400547939704079, "learning_rate": 5.755307295005695e-06, "loss": 0.5589, "step": 3726 }, { "epoch": 1.8116717325227962, "grad_norm": 0.07134953028119086, "learning_rate": 5.753414540436706e-06, "loss": 0.5381, "step": 3727 }, { "epoch": 1.8121580547112464, "grad_norm": 0.07369445984870336, "learning_rate": 5.75152167538803e-06, "loss": 0.5627, "step": 3728 }, { "epoch": 1.812644376899696, "grad_norm": 0.07171313402278903, "learning_rate": 5.749628700137234e-06, "loss": 0.5558, "step": 3729 }, { "epoch": 1.813130699088146, "grad_norm": 0.06949271634278949, "learning_rate": 5.747735614961902e-06, "loss": 0.5295, "step": 3730 }, { "epoch": 1.8136170212765959, "grad_norm": 0.07149059892565086, "learning_rate": 5.745842420139632e-06, "loss": 0.5508, "step": 3731 }, { "epoch": 1.8141033434650455, "grad_norm": 0.0726401694256946, "learning_rate": 5.743949115948042e-06, "loss": 0.579, "step": 3732 }, { "epoch": 1.8145896656534954, "grad_norm": 0.07788523135051495, "learning_rate": 5.7420557026647625e-06, "loss": 0.6107, "step": 3733 }, { "epoch": 1.8150759878419453, "grad_norm": 0.07157641394166475, "learning_rate": 5.74016218056744e-06, "loss": 0.5161, "step": 3734 }, { "epoch": 1.815562310030395, "grad_norm": 0.06893289553292144, "learning_rate": 5.7382685499337385e-06, "loss": 0.5069, "step": 3735 }, { "epoch": 1.816048632218845, "grad_norm": 0.07109600112504161, "learning_rate": 5.736374811041339e-06, "loss": 0.5006, "step": 3736 }, { "epoch": 1.8165349544072948, "grad_norm": 0.07124248629780326, "learning_rate": 5.734480964167935e-06, "loss": 0.5433, "step": 3737 }, { "epoch": 1.8170212765957445, "grad_norm": 0.07473931891488424, "learning_rate": 5.732587009591238e-06, "loss": 0.5227, "step": 3738 }, { "epoch": 1.8175075987841947, "grad_norm": 0.07204180645130044, "learning_rate": 5.730692947588975e-06, "loss": 0.5328, "step": 3739 }, { "epoch": 1.8179939209726443, "grad_norm": 0.07376308697607588, "learning_rate": 5.728798778438889e-06, "loss": 0.5457, "step": 3740 }, { "epoch": 1.8184802431610942, "grad_norm": 0.07001674613205891, "learning_rate": 5.726904502418739e-06, "loss": 0.5567, "step": 3741 }, { "epoch": 1.8189665653495442, "grad_norm": 0.07234234515741456, "learning_rate": 5.725010119806297e-06, "loss": 0.5477, "step": 3742 }, { "epoch": 1.8194528875379938, "grad_norm": 0.07624441123895309, "learning_rate": 5.7231156308793545e-06, "loss": 0.5701, "step": 3743 }, { "epoch": 1.8199392097264437, "grad_norm": 0.07352657802771531, "learning_rate": 5.721221035915717e-06, "loss": 0.5587, "step": 3744 }, { "epoch": 1.8204255319148936, "grad_norm": 0.07124527335189594, "learning_rate": 5.719326335193204e-06, "loss": 0.5657, "step": 3745 }, { "epoch": 1.8209118541033433, "grad_norm": 0.07074566160162261, "learning_rate": 5.717431528989651e-06, "loss": 0.5451, "step": 3746 }, { "epoch": 1.8213981762917935, "grad_norm": 0.07446677937679787, "learning_rate": 5.715536617582913e-06, "loss": 0.594, "step": 3747 }, { "epoch": 1.8218844984802431, "grad_norm": 0.07478715029140917, "learning_rate": 5.713641601250854e-06, "loss": 0.5097, "step": 3748 }, { "epoch": 1.822370820668693, "grad_norm": 0.06976310538342256, "learning_rate": 5.71174648027136e-06, "loss": 0.4994, "step": 3749 }, { "epoch": 1.822857142857143, "grad_norm": 0.07248474608070116, "learning_rate": 5.709851254922326e-06, "loss": 0.536, "step": 3750 }, { "epoch": 1.8233434650455926, "grad_norm": 0.07085216066460785, "learning_rate": 5.7079559254816665e-06, "loss": 0.5359, "step": 3751 }, { "epoch": 1.8238297872340425, "grad_norm": 0.07000847837824252, "learning_rate": 5.706060492227311e-06, "loss": 0.5324, "step": 3752 }, { "epoch": 1.8243161094224924, "grad_norm": 0.07080978614683664, "learning_rate": 5.7041649554372015e-06, "loss": 0.5622, "step": 3753 }, { "epoch": 1.8248024316109421, "grad_norm": 0.07212048374871464, "learning_rate": 5.702269315389296e-06, "loss": 0.5169, "step": 3754 }, { "epoch": 1.8252887537993923, "grad_norm": 0.07374951233787296, "learning_rate": 5.70037357236157e-06, "loss": 0.5767, "step": 3755 }, { "epoch": 1.825775075987842, "grad_norm": 0.06737529434869072, "learning_rate": 5.698477726632015e-06, "loss": 0.5066, "step": 3756 }, { "epoch": 1.8262613981762916, "grad_norm": 0.06982984660697597, "learning_rate": 5.6965817784786325e-06, "loss": 0.5388, "step": 3757 }, { "epoch": 1.8267477203647418, "grad_norm": 0.0712839524633681, "learning_rate": 5.694685728179442e-06, "loss": 0.5457, "step": 3758 }, { "epoch": 1.8272340425531914, "grad_norm": 0.07475472581807995, "learning_rate": 5.69278957601248e-06, "loss": 0.557, "step": 3759 }, { "epoch": 1.8277203647416413, "grad_norm": 0.07384993449774267, "learning_rate": 5.690893322255791e-06, "loss": 0.5712, "step": 3760 }, { "epoch": 1.8282066869300913, "grad_norm": 0.07239955692087521, "learning_rate": 5.688996967187445e-06, "loss": 0.5363, "step": 3761 }, { "epoch": 1.828693009118541, "grad_norm": 0.06774496828657123, "learning_rate": 5.687100511085515e-06, "loss": 0.488, "step": 3762 }, { "epoch": 1.8291793313069908, "grad_norm": 0.0725479493409849, "learning_rate": 5.685203954228099e-06, "loss": 0.5591, "step": 3763 }, { "epoch": 1.8296656534954407, "grad_norm": 0.07129933692150062, "learning_rate": 5.683307296893303e-06, "loss": 0.525, "step": 3764 }, { "epoch": 1.8301519756838904, "grad_norm": 0.0740819613134729, "learning_rate": 5.681410539359251e-06, "loss": 0.5974, "step": 3765 }, { "epoch": 1.8306382978723406, "grad_norm": 0.06792062074025114, "learning_rate": 5.679513681904084e-06, "loss": 0.5468, "step": 3766 }, { "epoch": 1.8311246200607902, "grad_norm": 0.07084457779575046, "learning_rate": 5.67761672480595e-06, "loss": 0.5432, "step": 3767 }, { "epoch": 1.8316109422492401, "grad_norm": 0.07157840213834797, "learning_rate": 5.675719668343019e-06, "loss": 0.5447, "step": 3768 }, { "epoch": 1.83209726443769, "grad_norm": 0.07330750788051905, "learning_rate": 5.673822512793471e-06, "loss": 0.5874, "step": 3769 }, { "epoch": 1.8325835866261397, "grad_norm": 0.07112420469894742, "learning_rate": 5.671925258435504e-06, "loss": 0.4942, "step": 3770 }, { "epoch": 1.8330699088145896, "grad_norm": 0.06948498376476693, "learning_rate": 5.670027905547329e-06, "loss": 0.5788, "step": 3771 }, { "epoch": 1.8335562310030395, "grad_norm": 0.0717781120787052, "learning_rate": 5.668130454407168e-06, "loss": 0.5593, "step": 3772 }, { "epoch": 1.8340425531914892, "grad_norm": 0.07215707676776603, "learning_rate": 5.666232905293263e-06, "loss": 0.5677, "step": 3773 }, { "epoch": 1.8345288753799394, "grad_norm": 0.06992043349783852, "learning_rate": 5.664335258483871e-06, "loss": 0.5693, "step": 3774 }, { "epoch": 1.835015197568389, "grad_norm": 0.06998043413851437, "learning_rate": 5.6624375142572555e-06, "loss": 0.5422, "step": 3775 }, { "epoch": 1.835501519756839, "grad_norm": 0.07438282662720953, "learning_rate": 5.6605396728917006e-06, "loss": 0.5562, "step": 3776 }, { "epoch": 1.8359878419452889, "grad_norm": 0.06952341201837078, "learning_rate": 5.658641734665503e-06, "loss": 0.5517, "step": 3777 }, { "epoch": 1.8364741641337385, "grad_norm": 0.07031362325325231, "learning_rate": 5.656743699856976e-06, "loss": 0.5401, "step": 3778 }, { "epoch": 1.8369604863221884, "grad_norm": 0.06846355226164075, "learning_rate": 5.654845568744443e-06, "loss": 0.5158, "step": 3779 }, { "epoch": 1.8374468085106384, "grad_norm": 0.069317791282025, "learning_rate": 5.652947341606243e-06, "loss": 0.5426, "step": 3780 }, { "epoch": 1.837933130699088, "grad_norm": 0.07102378330650222, "learning_rate": 5.65104901872073e-06, "loss": 0.5823, "step": 3781 }, { "epoch": 1.838419452887538, "grad_norm": 0.06903938789067822, "learning_rate": 5.649150600366272e-06, "loss": 0.5286, "step": 3782 }, { "epoch": 1.8389057750759878, "grad_norm": 0.07252347180331538, "learning_rate": 5.64725208682125e-06, "loss": 0.5455, "step": 3783 }, { "epoch": 1.8393920972644375, "grad_norm": 0.07207477062229378, "learning_rate": 5.645353478364059e-06, "loss": 0.5556, "step": 3784 }, { "epoch": 1.8398784194528877, "grad_norm": 0.0710066071493234, "learning_rate": 5.64345477527311e-06, "loss": 0.5733, "step": 3785 }, { "epoch": 1.8403647416413373, "grad_norm": 0.07240032700807762, "learning_rate": 5.641555977826824e-06, "loss": 0.5895, "step": 3786 }, { "epoch": 1.8408510638297872, "grad_norm": 0.07330265381642612, "learning_rate": 5.639657086303639e-06, "loss": 0.5504, "step": 3787 }, { "epoch": 1.8413373860182372, "grad_norm": 0.07176866909404722, "learning_rate": 5.637758100982007e-06, "loss": 0.5532, "step": 3788 }, { "epoch": 1.8418237082066868, "grad_norm": 0.07343934482235806, "learning_rate": 5.635859022140391e-06, "loss": 0.55, "step": 3789 }, { "epoch": 1.8423100303951367, "grad_norm": 0.07001562238906066, "learning_rate": 5.633959850057271e-06, "loss": 0.5451, "step": 3790 }, { "epoch": 1.8427963525835866, "grad_norm": 0.07117624050553056, "learning_rate": 5.632060585011138e-06, "loss": 0.5419, "step": 3791 }, { "epoch": 1.8432826747720363, "grad_norm": 0.07420003163647222, "learning_rate": 5.630161227280496e-06, "loss": 0.6141, "step": 3792 }, { "epoch": 1.8437689969604865, "grad_norm": 0.07046916735273435, "learning_rate": 5.628261777143867e-06, "loss": 0.5335, "step": 3793 }, { "epoch": 1.8442553191489361, "grad_norm": 0.06949081978361181, "learning_rate": 5.626362234879783e-06, "loss": 0.5251, "step": 3794 }, { "epoch": 1.844741641337386, "grad_norm": 0.06894200071003406, "learning_rate": 5.62446260076679e-06, "loss": 0.5451, "step": 3795 }, { "epoch": 1.845227963525836, "grad_norm": 0.07145977770879011, "learning_rate": 5.622562875083448e-06, "loss": 0.5753, "step": 3796 }, { "epoch": 1.8457142857142856, "grad_norm": 0.07112706181136323, "learning_rate": 5.620663058108331e-06, "loss": 0.5631, "step": 3797 }, { "epoch": 1.8462006079027355, "grad_norm": 0.06995478302103418, "learning_rate": 5.618763150120024e-06, "loss": 0.5471, "step": 3798 }, { "epoch": 1.8466869300911855, "grad_norm": 0.06914015464339193, "learning_rate": 5.616863151397127e-06, "loss": 0.5406, "step": 3799 }, { "epoch": 1.8471732522796351, "grad_norm": 0.06943456983708265, "learning_rate": 5.614963062218253e-06, "loss": 0.5267, "step": 3800 }, { "epoch": 1.8476595744680853, "grad_norm": 0.07191017173048178, "learning_rate": 5.61306288286203e-06, "loss": 0.5529, "step": 3801 }, { "epoch": 1.848145896656535, "grad_norm": 0.07494122014165715, "learning_rate": 5.611162613607098e-06, "loss": 0.5526, "step": 3802 }, { "epoch": 1.8486322188449849, "grad_norm": 0.07197727781320475, "learning_rate": 5.609262254732107e-06, "loss": 0.5362, "step": 3803 }, { "epoch": 1.8491185410334348, "grad_norm": 0.07479025691542991, "learning_rate": 5.607361806515727e-06, "loss": 0.5332, "step": 3804 }, { "epoch": 1.8496048632218844, "grad_norm": 0.07023181417435177, "learning_rate": 5.605461269236635e-06, "loss": 0.5834, "step": 3805 }, { "epoch": 1.8500911854103343, "grad_norm": 0.06995680724069142, "learning_rate": 5.603560643173522e-06, "loss": 0.538, "step": 3806 }, { "epoch": 1.8505775075987843, "grad_norm": 0.07053583060796019, "learning_rate": 5.601659928605095e-06, "loss": 0.52, "step": 3807 }, { "epoch": 1.851063829787234, "grad_norm": 0.07227349280471497, "learning_rate": 5.599759125810073e-06, "loss": 0.5759, "step": 3808 }, { "epoch": 1.8515501519756838, "grad_norm": 0.07273850793386738, "learning_rate": 5.597858235067184e-06, "loss": 0.5327, "step": 3809 }, { "epoch": 1.8520364741641338, "grad_norm": 0.07037922265494392, "learning_rate": 5.595957256655174e-06, "loss": 0.5488, "step": 3810 }, { "epoch": 1.8525227963525834, "grad_norm": 0.06965936993655833, "learning_rate": 5.594056190852801e-06, "loss": 0.5164, "step": 3811 }, { "epoch": 1.8530091185410336, "grad_norm": 0.07030335031873611, "learning_rate": 5.592155037938834e-06, "loss": 0.5114, "step": 3812 }, { "epoch": 1.8534954407294832, "grad_norm": 0.07190786810614103, "learning_rate": 5.5902537981920545e-06, "loss": 0.5821, "step": 3813 }, { "epoch": 1.8539817629179332, "grad_norm": 0.07535660040517028, "learning_rate": 5.588352471891259e-06, "loss": 0.613, "step": 3814 }, { "epoch": 1.854468085106383, "grad_norm": 0.07413350038775467, "learning_rate": 5.586451059315253e-06, "loss": 0.5684, "step": 3815 }, { "epoch": 1.8549544072948327, "grad_norm": 0.07270456231481043, "learning_rate": 5.584549560742859e-06, "loss": 0.5974, "step": 3816 }, { "epoch": 1.8554407294832826, "grad_norm": 0.07168856657316058, "learning_rate": 5.58264797645291e-06, "loss": 0.54, "step": 3817 }, { "epoch": 1.8559270516717326, "grad_norm": 0.06948602461810775, "learning_rate": 5.580746306724252e-06, "loss": 0.5237, "step": 3818 }, { "epoch": 1.8564133738601822, "grad_norm": 0.0697832717925343, "learning_rate": 5.578844551835742e-06, "loss": 0.5509, "step": 3819 }, { "epoch": 1.8568996960486324, "grad_norm": 0.0711256778625363, "learning_rate": 5.576942712066255e-06, "loss": 0.5429, "step": 3820 }, { "epoch": 1.857386018237082, "grad_norm": 0.07462287816350456, "learning_rate": 5.575040787694668e-06, "loss": 0.5684, "step": 3821 }, { "epoch": 1.857872340425532, "grad_norm": 0.07207751331406778, "learning_rate": 5.57313877899988e-06, "loss": 0.5492, "step": 3822 }, { "epoch": 1.8583586626139819, "grad_norm": 0.07470260184550374, "learning_rate": 5.571236686260798e-06, "loss": 0.5506, "step": 3823 }, { "epoch": 1.8588449848024315, "grad_norm": 0.07352014608781392, "learning_rate": 5.569334509756344e-06, "loss": 0.5639, "step": 3824 }, { "epoch": 1.8593313069908814, "grad_norm": 0.0706346220437522, "learning_rate": 5.567432249765449e-06, "loss": 0.5165, "step": 3825 }, { "epoch": 1.8598176291793314, "grad_norm": 0.07151119129191524, "learning_rate": 5.565529906567057e-06, "loss": 0.5969, "step": 3826 }, { "epoch": 1.860303951367781, "grad_norm": 0.06985574004158678, "learning_rate": 5.563627480440127e-06, "loss": 0.5326, "step": 3827 }, { "epoch": 1.8607902735562312, "grad_norm": 0.07580449354122563, "learning_rate": 5.561724971663628e-06, "loss": 0.5668, "step": 3828 }, { "epoch": 1.8612765957446809, "grad_norm": 0.07302002817660541, "learning_rate": 5.559822380516539e-06, "loss": 0.5452, "step": 3829 }, { "epoch": 1.8617629179331305, "grad_norm": 0.0723112034166102, "learning_rate": 5.557919707277857e-06, "loss": 0.554, "step": 3830 }, { "epoch": 1.8622492401215807, "grad_norm": 0.07250965581229067, "learning_rate": 5.556016952226585e-06, "loss": 0.5478, "step": 3831 }, { "epoch": 1.8627355623100303, "grad_norm": 0.0712328543900115, "learning_rate": 5.554114115641741e-06, "loss": 0.5669, "step": 3832 }, { "epoch": 1.8632218844984803, "grad_norm": 0.07334487483474895, "learning_rate": 5.552211197802354e-06, "loss": 0.5403, "step": 3833 }, { "epoch": 1.8637082066869302, "grad_norm": 0.07367820153807855, "learning_rate": 5.550308198987466e-06, "loss": 0.5349, "step": 3834 }, { "epoch": 1.8641945288753798, "grad_norm": 0.0703755107906746, "learning_rate": 5.548405119476129e-06, "loss": 0.5579, "step": 3835 }, { "epoch": 1.8646808510638297, "grad_norm": 0.07094734124585628, "learning_rate": 5.546501959547411e-06, "loss": 0.5341, "step": 3836 }, { "epoch": 1.8651671732522797, "grad_norm": 0.07282036499949628, "learning_rate": 5.544598719480383e-06, "loss": 0.5382, "step": 3837 }, { "epoch": 1.8656534954407293, "grad_norm": 0.07447166786091301, "learning_rate": 5.54269539955414e-06, "loss": 0.5417, "step": 3838 }, { "epoch": 1.8661398176291795, "grad_norm": 0.0751788995888351, "learning_rate": 5.540792000047778e-06, "loss": 0.5502, "step": 3839 }, { "epoch": 1.8666261398176291, "grad_norm": 0.06961499050080971, "learning_rate": 5.538888521240411e-06, "loss": 0.5282, "step": 3840 }, { "epoch": 1.867112462006079, "grad_norm": 0.073412480596751, "learning_rate": 5.53698496341116e-06, "loss": 0.5475, "step": 3841 }, { "epoch": 1.867598784194529, "grad_norm": 0.06985806140808806, "learning_rate": 5.535081326839165e-06, "loss": 0.527, "step": 3842 }, { "epoch": 1.8680851063829786, "grad_norm": 0.07483264512773195, "learning_rate": 5.5331776118035675e-06, "loss": 0.5569, "step": 3843 }, { "epoch": 1.8685714285714285, "grad_norm": 0.06884638658368249, "learning_rate": 5.53127381858353e-06, "loss": 0.5407, "step": 3844 }, { "epoch": 1.8690577507598785, "grad_norm": 0.07308863350731348, "learning_rate": 5.529369947458219e-06, "loss": 0.5305, "step": 3845 }, { "epoch": 1.8695440729483281, "grad_norm": 0.07345115465989771, "learning_rate": 5.527465998706815e-06, "loss": 0.527, "step": 3846 }, { "epoch": 1.8700303951367783, "grad_norm": 0.07044998523833643, "learning_rate": 5.525561972608513e-06, "loss": 0.5322, "step": 3847 }, { "epoch": 1.870516717325228, "grad_norm": 0.07408426800749078, "learning_rate": 5.523657869442516e-06, "loss": 0.5883, "step": 3848 }, { "epoch": 1.8710030395136779, "grad_norm": 0.07356293128805845, "learning_rate": 5.521753689488039e-06, "loss": 0.5475, "step": 3849 }, { "epoch": 1.8714893617021278, "grad_norm": 0.08080229590294455, "learning_rate": 5.519849433024308e-06, "loss": 0.584, "step": 3850 }, { "epoch": 1.8719756838905774, "grad_norm": 0.07829341159558895, "learning_rate": 5.517945100330563e-06, "loss": 0.5597, "step": 3851 }, { "epoch": 1.8724620060790274, "grad_norm": 0.07113114856750816, "learning_rate": 5.516040691686049e-06, "loss": 0.5604, "step": 3852 }, { "epoch": 1.8729483282674773, "grad_norm": 0.07140712557824944, "learning_rate": 5.514136207370026e-06, "loss": 0.5548, "step": 3853 }, { "epoch": 1.873434650455927, "grad_norm": 0.06989501222149999, "learning_rate": 5.512231647661769e-06, "loss": 0.5362, "step": 3854 }, { "epoch": 1.8739209726443768, "grad_norm": 0.07006989477905902, "learning_rate": 5.510327012840556e-06, "loss": 0.5465, "step": 3855 }, { "epoch": 1.8744072948328268, "grad_norm": 0.07101153343172435, "learning_rate": 5.508422303185682e-06, "loss": 0.5601, "step": 3856 }, { "epoch": 1.8748936170212764, "grad_norm": 0.07072906848524038, "learning_rate": 5.506517518976452e-06, "loss": 0.5176, "step": 3857 }, { "epoch": 1.8753799392097266, "grad_norm": 0.07042125266705732, "learning_rate": 5.50461266049218e-06, "loss": 0.56, "step": 3858 }, { "epoch": 1.8758662613981762, "grad_norm": 0.0791804688202137, "learning_rate": 5.502707728012191e-06, "loss": 0.5806, "step": 3859 }, { "epoch": 1.8763525835866262, "grad_norm": 0.0739118879721059, "learning_rate": 5.500802721815821e-06, "loss": 0.5812, "step": 3860 }, { "epoch": 1.876838905775076, "grad_norm": 0.07219821284102616, "learning_rate": 5.49889764218242e-06, "loss": 0.5675, "step": 3861 }, { "epoch": 1.8773252279635257, "grad_norm": 0.07220254355889123, "learning_rate": 5.496992489391345e-06, "loss": 0.5562, "step": 3862 }, { "epoch": 1.8778115501519757, "grad_norm": 0.07156920075813895, "learning_rate": 5.495087263721965e-06, "loss": 0.5627, "step": 3863 }, { "epoch": 1.8782978723404256, "grad_norm": 0.07315547986518335, "learning_rate": 5.493181965453659e-06, "loss": 0.5747, "step": 3864 }, { "epoch": 1.8787841945288752, "grad_norm": 0.07303839541450867, "learning_rate": 5.491276594865818e-06, "loss": 0.5846, "step": 3865 }, { "epoch": 1.8792705167173254, "grad_norm": 0.07185358493666952, "learning_rate": 5.489371152237847e-06, "loss": 0.5485, "step": 3866 }, { "epoch": 1.879756838905775, "grad_norm": 0.07236494540245907, "learning_rate": 5.487465637849151e-06, "loss": 0.5835, "step": 3867 }, { "epoch": 1.880243161094225, "grad_norm": 0.07536297807903382, "learning_rate": 5.4855600519791545e-06, "loss": 0.5499, "step": 3868 }, { "epoch": 1.8807294832826749, "grad_norm": 0.07329774304125254, "learning_rate": 5.483654394907291e-06, "loss": 0.5462, "step": 3869 }, { "epoch": 1.8812158054711245, "grad_norm": 0.06997923564090663, "learning_rate": 5.481748666913001e-06, "loss": 0.5306, "step": 3870 }, { "epoch": 1.8817021276595745, "grad_norm": 0.07844534972760336, "learning_rate": 5.479842868275742e-06, "loss": 0.5799, "step": 3871 }, { "epoch": 1.8821884498480244, "grad_norm": 0.07270346625623395, "learning_rate": 5.477936999274975e-06, "loss": 0.5848, "step": 3872 }, { "epoch": 1.882674772036474, "grad_norm": 0.07006362676995413, "learning_rate": 5.476031060190173e-06, "loss": 0.5212, "step": 3873 }, { "epoch": 1.8831610942249242, "grad_norm": 0.07100012373998063, "learning_rate": 5.474125051300821e-06, "loss": 0.5357, "step": 3874 }, { "epoch": 1.8836474164133739, "grad_norm": 0.07592269995110867, "learning_rate": 5.472218972886416e-06, "loss": 0.5273, "step": 3875 }, { "epoch": 1.8841337386018238, "grad_norm": 0.06928587250916836, "learning_rate": 5.470312825226461e-06, "loss": 0.559, "step": 3876 }, { "epoch": 1.8846200607902737, "grad_norm": 0.07456925756944746, "learning_rate": 5.46840660860047e-06, "loss": 0.5562, "step": 3877 }, { "epoch": 1.8851063829787233, "grad_norm": 0.07021559134295732, "learning_rate": 5.46650032328797e-06, "loss": 0.5351, "step": 3878 }, { "epoch": 1.8855927051671733, "grad_norm": 0.07018384447802946, "learning_rate": 5.464593969568494e-06, "loss": 0.5189, "step": 3879 }, { "epoch": 1.8860790273556232, "grad_norm": 0.06838130255211024, "learning_rate": 5.46268754772159e-06, "loss": 0.5484, "step": 3880 }, { "epoch": 1.8865653495440728, "grad_norm": 0.07267231207550441, "learning_rate": 5.4607810580268094e-06, "loss": 0.5723, "step": 3881 }, { "epoch": 1.8870516717325228, "grad_norm": 0.07126094801636974, "learning_rate": 5.45887450076372e-06, "loss": 0.568, "step": 3882 }, { "epoch": 1.8875379939209727, "grad_norm": 0.0704742186439123, "learning_rate": 5.456967876211896e-06, "loss": 0.5557, "step": 3883 }, { "epoch": 1.8880243161094223, "grad_norm": 0.07126629149481976, "learning_rate": 5.455061184650921e-06, "loss": 0.5805, "step": 3884 }, { "epoch": 1.8885106382978725, "grad_norm": 0.07299989314675018, "learning_rate": 5.453154426360393e-06, "loss": 0.5858, "step": 3885 }, { "epoch": 1.8889969604863222, "grad_norm": 0.07049872375213509, "learning_rate": 5.451247601619913e-06, "loss": 0.5403, "step": 3886 }, { "epoch": 1.889483282674772, "grad_norm": 0.07591691887747315, "learning_rate": 5.449340710709097e-06, "loss": 0.5481, "step": 3887 }, { "epoch": 1.889969604863222, "grad_norm": 0.07091248843228551, "learning_rate": 5.4474337539075675e-06, "loss": 0.5561, "step": 3888 }, { "epoch": 1.8904559270516716, "grad_norm": 0.07358419989721389, "learning_rate": 5.445526731494959e-06, "loss": 0.5815, "step": 3889 }, { "epoch": 1.8909422492401216, "grad_norm": 0.07069980987976046, "learning_rate": 5.443619643750916e-06, "loss": 0.5442, "step": 3890 }, { "epoch": 1.8914285714285715, "grad_norm": 0.07208274573910803, "learning_rate": 5.441712490955088e-06, "loss": 0.593, "step": 3891 }, { "epoch": 1.8919148936170211, "grad_norm": 0.06985344054806343, "learning_rate": 5.43980527338714e-06, "loss": 0.5956, "step": 3892 }, { "epoch": 1.8924012158054713, "grad_norm": 0.07245029698248719, "learning_rate": 5.437897991326743e-06, "loss": 0.5517, "step": 3893 }, { "epoch": 1.892887537993921, "grad_norm": 0.07317693205661047, "learning_rate": 5.435990645053578e-06, "loss": 0.5419, "step": 3894 }, { "epoch": 1.8933738601823709, "grad_norm": 0.07464253102269856, "learning_rate": 5.434083234847336e-06, "loss": 0.5417, "step": 3895 }, { "epoch": 1.8938601823708208, "grad_norm": 0.07301445302319524, "learning_rate": 5.432175760987717e-06, "loss": 0.5015, "step": 3896 }, { "epoch": 1.8943465045592704, "grad_norm": 0.07070000351914066, "learning_rate": 5.430268223754431e-06, "loss": 0.5716, "step": 3897 }, { "epoch": 1.8948328267477204, "grad_norm": 0.07216905273639297, "learning_rate": 5.4283606234271955e-06, "loss": 0.542, "step": 3898 }, { "epoch": 1.8953191489361703, "grad_norm": 0.07173351593003321, "learning_rate": 5.42645296028574e-06, "loss": 0.5351, "step": 3899 }, { "epoch": 1.89580547112462, "grad_norm": 0.07149455964894896, "learning_rate": 5.424545234609798e-06, "loss": 0.5778, "step": 3900 }, { "epoch": 1.89629179331307, "grad_norm": 0.07425694531189812, "learning_rate": 5.42263744667912e-06, "loss": 0.5402, "step": 3901 }, { "epoch": 1.8967781155015198, "grad_norm": 0.06991460034781667, "learning_rate": 5.4207295967734595e-06, "loss": 0.5071, "step": 3902 }, { "epoch": 1.8972644376899694, "grad_norm": 0.06679630885145077, "learning_rate": 5.418821685172582e-06, "loss": 0.5101, "step": 3903 }, { "epoch": 1.8977507598784196, "grad_norm": 0.07648029531820426, "learning_rate": 5.41691371215626e-06, "loss": 0.5527, "step": 3904 }, { "epoch": 1.8982370820668693, "grad_norm": 0.07441453165877433, "learning_rate": 5.415005678004277e-06, "loss": 0.5512, "step": 3905 }, { "epoch": 1.8987234042553192, "grad_norm": 0.07054987103941525, "learning_rate": 5.413097582996423e-06, "loss": 0.5491, "step": 3906 }, { "epoch": 1.899209726443769, "grad_norm": 0.07307538508563054, "learning_rate": 5.4111894274125e-06, "loss": 0.5502, "step": 3907 }, { "epoch": 1.8996960486322187, "grad_norm": 0.07268277319538835, "learning_rate": 5.409281211532317e-06, "loss": 0.5476, "step": 3908 }, { "epoch": 1.9001823708206687, "grad_norm": 0.07254551181776563, "learning_rate": 5.40737293563569e-06, "loss": 0.5432, "step": 3909 }, { "epoch": 1.9006686930091186, "grad_norm": 0.0697224203216681, "learning_rate": 5.40546460000245e-06, "loss": 0.5245, "step": 3910 }, { "epoch": 1.9011550151975682, "grad_norm": 0.07385901022020659, "learning_rate": 5.40355620491243e-06, "loss": 0.5308, "step": 3911 }, { "epoch": 1.9016413373860184, "grad_norm": 0.0705972347693607, "learning_rate": 5.401647750645477e-06, "loss": 0.5738, "step": 3912 }, { "epoch": 1.902127659574468, "grad_norm": 0.07139404096088262, "learning_rate": 5.399739237481441e-06, "loss": 0.5444, "step": 3913 }, { "epoch": 1.902613981762918, "grad_norm": 0.07443937105047072, "learning_rate": 5.397830665700185e-06, "loss": 0.5382, "step": 3914 }, { "epoch": 1.9031003039513679, "grad_norm": 0.07133547255546843, "learning_rate": 5.39592203558158e-06, "loss": 0.5643, "step": 3915 }, { "epoch": 1.9035866261398176, "grad_norm": 0.07117385062756147, "learning_rate": 5.394013347405505e-06, "loss": 0.5329, "step": 3916 }, { "epoch": 1.9040729483282675, "grad_norm": 0.07247909116453287, "learning_rate": 5.392104601451845e-06, "loss": 0.4999, "step": 3917 }, { "epoch": 1.9045592705167174, "grad_norm": 0.06985442759353504, "learning_rate": 5.390195798000498e-06, "loss": 0.5547, "step": 3918 }, { "epoch": 1.905045592705167, "grad_norm": 0.06996615553375128, "learning_rate": 5.38828693733137e-06, "loss": 0.5353, "step": 3919 }, { "epoch": 1.9055319148936172, "grad_norm": 0.06883564717525122, "learning_rate": 5.386378019724372e-06, "loss": 0.5374, "step": 3920 }, { "epoch": 1.9060182370820669, "grad_norm": 0.07193159873531106, "learning_rate": 5.384469045459424e-06, "loss": 0.5637, "step": 3921 }, { "epoch": 1.9065045592705168, "grad_norm": 0.07196204445970461, "learning_rate": 5.382560014816457e-06, "loss": 0.5492, "step": 3922 }, { "epoch": 1.9069908814589667, "grad_norm": 0.07020407782598055, "learning_rate": 5.380650928075407e-06, "loss": 0.5276, "step": 3923 }, { "epoch": 1.9074772036474164, "grad_norm": 0.07268836847250665, "learning_rate": 5.378741785516222e-06, "loss": 0.5335, "step": 3924 }, { "epoch": 1.9079635258358663, "grad_norm": 0.0728378345883877, "learning_rate": 5.376832587418854e-06, "loss": 0.5742, "step": 3925 }, { "epoch": 1.9084498480243162, "grad_norm": 0.07528133437836587, "learning_rate": 5.3749233340632676e-06, "loss": 0.5641, "step": 3926 }, { "epoch": 1.9089361702127658, "grad_norm": 0.06986530945962423, "learning_rate": 5.373014025729431e-06, "loss": 0.5189, "step": 3927 }, { "epoch": 1.9094224924012158, "grad_norm": 0.07137578923183152, "learning_rate": 5.371104662697324e-06, "loss": 0.5762, "step": 3928 }, { "epoch": 1.9099088145896657, "grad_norm": 0.07180724242060038, "learning_rate": 5.369195245246932e-06, "loss": 0.5611, "step": 3929 }, { "epoch": 1.9103951367781153, "grad_norm": 0.07406527169169644, "learning_rate": 5.36728577365825e-06, "loss": 0.5107, "step": 3930 }, { "epoch": 1.9108814589665655, "grad_norm": 0.07820647798443633, "learning_rate": 5.365376248211279e-06, "loss": 0.5442, "step": 3931 }, { "epoch": 1.9113677811550152, "grad_norm": 0.07074490016606895, "learning_rate": 5.363466669186032e-06, "loss": 0.5376, "step": 3932 }, { "epoch": 1.911854103343465, "grad_norm": 0.07314800802056005, "learning_rate": 5.3615570368625235e-06, "loss": 0.5575, "step": 3933 }, { "epoch": 1.912340425531915, "grad_norm": 0.07102998973258125, "learning_rate": 5.359647351520783e-06, "loss": 0.5461, "step": 3934 }, { "epoch": 1.9128267477203647, "grad_norm": 0.07318178917179745, "learning_rate": 5.357737613440842e-06, "loss": 0.5822, "step": 3935 }, { "epoch": 1.9133130699088146, "grad_norm": 0.07373341681033826, "learning_rate": 5.355827822902741e-06, "loss": 0.5479, "step": 3936 }, { "epoch": 1.9137993920972645, "grad_norm": 0.07285353873902074, "learning_rate": 5.353917980186533e-06, "loss": 0.5967, "step": 3937 }, { "epoch": 1.9142857142857141, "grad_norm": 0.06897671793099461, "learning_rate": 5.35200808557227e-06, "loss": 0.5096, "step": 3938 }, { "epoch": 1.9147720364741643, "grad_norm": 0.07331452707960859, "learning_rate": 5.35009813934002e-06, "loss": 0.5756, "step": 3939 }, { "epoch": 1.915258358662614, "grad_norm": 0.07424482210725596, "learning_rate": 5.348188141769852e-06, "loss": 0.5494, "step": 3940 }, { "epoch": 1.9157446808510639, "grad_norm": 0.0695504987734764, "learning_rate": 5.3462780931418475e-06, "loss": 0.5287, "step": 3941 }, { "epoch": 1.9162310030395138, "grad_norm": 0.07204821742874425, "learning_rate": 5.344367993736094e-06, "loss": 0.5336, "step": 3942 }, { "epoch": 1.9167173252279635, "grad_norm": 0.07224236062750922, "learning_rate": 5.342457843832686e-06, "loss": 0.5313, "step": 3943 }, { "epoch": 1.9172036474164134, "grad_norm": 0.07063997584455262, "learning_rate": 5.340547643711721e-06, "loss": 0.5206, "step": 3944 }, { "epoch": 1.9176899696048633, "grad_norm": 0.06907095390317514, "learning_rate": 5.338637393653313e-06, "loss": 0.5443, "step": 3945 }, { "epoch": 1.918176291793313, "grad_norm": 0.0714142566364829, "learning_rate": 5.336727093937575e-06, "loss": 0.5391, "step": 3946 }, { "epoch": 1.918662613981763, "grad_norm": 0.0709911708720824, "learning_rate": 5.334816744844633e-06, "loss": 0.5596, "step": 3947 }, { "epoch": 1.9191489361702128, "grad_norm": 0.07182097343162858, "learning_rate": 5.3329063466546186e-06, "loss": 0.56, "step": 3948 }, { "epoch": 1.9196352583586627, "grad_norm": 0.0723170487931, "learning_rate": 5.3309958996476676e-06, "loss": 0.5539, "step": 3949 }, { "epoch": 1.9201215805471126, "grad_norm": 0.07111847281377336, "learning_rate": 5.329085404103929e-06, "loss": 0.5907, "step": 3950 }, { "epoch": 1.9206079027355623, "grad_norm": 0.0710246141206249, "learning_rate": 5.32717486030355e-06, "loss": 0.5571, "step": 3951 }, { "epoch": 1.9210942249240122, "grad_norm": 0.07033797682194054, "learning_rate": 5.3252642685266945e-06, "loss": 0.5434, "step": 3952 }, { "epoch": 1.921580547112462, "grad_norm": 0.07385120540880619, "learning_rate": 5.323353629053527e-06, "loss": 0.5858, "step": 3953 }, { "epoch": 1.9220668693009118, "grad_norm": 0.0701213705216543, "learning_rate": 5.3214429421642224e-06, "loss": 0.5506, "step": 3954 }, { "epoch": 1.9225531914893617, "grad_norm": 0.07527342395918077, "learning_rate": 5.319532208138959e-06, "loss": 0.5708, "step": 3955 }, { "epoch": 1.9230395136778116, "grad_norm": 0.07239596068120352, "learning_rate": 5.317621427257927e-06, "loss": 0.5227, "step": 3956 }, { "epoch": 1.9235258358662612, "grad_norm": 0.06778220679439734, "learning_rate": 5.31571059980132e-06, "loss": 0.5293, "step": 3957 }, { "epoch": 1.9240121580547114, "grad_norm": 0.07155222314869827, "learning_rate": 5.313799726049339e-06, "loss": 0.5439, "step": 3958 }, { "epoch": 1.924498480243161, "grad_norm": 0.07235966322343111, "learning_rate": 5.311888806282191e-06, "loss": 0.5325, "step": 3959 }, { "epoch": 1.924984802431611, "grad_norm": 0.0715066938139115, "learning_rate": 5.30997784078009e-06, "loss": 0.5599, "step": 3960 }, { "epoch": 1.9254711246200609, "grad_norm": 0.07530726114029725, "learning_rate": 5.308066829823261e-06, "loss": 0.572, "step": 3961 }, { "epoch": 1.9259574468085106, "grad_norm": 0.07401814104456632, "learning_rate": 5.306155773691928e-06, "loss": 0.5813, "step": 3962 }, { "epoch": 1.9264437689969605, "grad_norm": 0.07332258443827892, "learning_rate": 5.304244672666328e-06, "loss": 0.5431, "step": 3963 }, { "epoch": 1.9269300911854104, "grad_norm": 0.07460145163668583, "learning_rate": 5.3023335270267e-06, "loss": 0.6378, "step": 3964 }, { "epoch": 1.92741641337386, "grad_norm": 0.07069127075668984, "learning_rate": 5.300422337053297e-06, "loss": 0.564, "step": 3965 }, { "epoch": 1.9279027355623102, "grad_norm": 0.07069580705921646, "learning_rate": 5.2985111030263685e-06, "loss": 0.524, "step": 3966 }, { "epoch": 1.9283890577507599, "grad_norm": 0.0727467711525965, "learning_rate": 5.2965998252261755e-06, "loss": 0.5416, "step": 3967 }, { "epoch": 1.9288753799392098, "grad_norm": 0.0730161945732158, "learning_rate": 5.294688503932986e-06, "loss": 0.5358, "step": 3968 }, { "epoch": 1.9293617021276597, "grad_norm": 0.06934197918501088, "learning_rate": 5.2927771394270754e-06, "loss": 0.5642, "step": 3969 }, { "epoch": 1.9298480243161094, "grad_norm": 0.07728581534430642, "learning_rate": 5.290865731988721e-06, "loss": 0.5871, "step": 3970 }, { "epoch": 1.9303343465045593, "grad_norm": 0.07420400809000982, "learning_rate": 5.28895428189821e-06, "loss": 0.5354, "step": 3971 }, { "epoch": 1.9308206686930092, "grad_norm": 0.07210480311510799, "learning_rate": 5.2870427894358345e-06, "loss": 0.5481, "step": 3972 }, { "epoch": 1.9313069908814589, "grad_norm": 0.069850947800103, "learning_rate": 5.285131254881895e-06, "loss": 0.5385, "step": 3973 }, { "epoch": 1.931793313069909, "grad_norm": 0.06939765769104236, "learning_rate": 5.283219678516694e-06, "loss": 0.5252, "step": 3974 }, { "epoch": 1.9322796352583587, "grad_norm": 0.0725840974144373, "learning_rate": 5.281308060620543e-06, "loss": 0.4922, "step": 3975 }, { "epoch": 1.9327659574468083, "grad_norm": 0.0756057412616692, "learning_rate": 5.279396401473759e-06, "loss": 0.5511, "step": 3976 }, { "epoch": 1.9332522796352585, "grad_norm": 0.07052894851960988, "learning_rate": 5.277484701356665e-06, "loss": 0.5241, "step": 3977 }, { "epoch": 1.9337386018237082, "grad_norm": 0.07145290958592206, "learning_rate": 5.275572960549592e-06, "loss": 0.5484, "step": 3978 }, { "epoch": 1.934224924012158, "grad_norm": 0.06914131431117099, "learning_rate": 5.273661179332874e-06, "loss": 0.5068, "step": 3979 }, { "epoch": 1.934711246200608, "grad_norm": 0.072621996981385, "learning_rate": 5.27174935798685e-06, "loss": 0.5381, "step": 3980 }, { "epoch": 1.9351975683890577, "grad_norm": 0.07168212583920859, "learning_rate": 5.269837496791871e-06, "loss": 0.5487, "step": 3981 }, { "epoch": 1.9356838905775076, "grad_norm": 0.07226193494444172, "learning_rate": 5.267925596028285e-06, "loss": 0.5668, "step": 3982 }, { "epoch": 1.9361702127659575, "grad_norm": 0.06999206951575934, "learning_rate": 5.266013655976454e-06, "loss": 0.527, "step": 3983 }, { "epoch": 1.9366565349544071, "grad_norm": 0.06888583022490859, "learning_rate": 5.264101676916741e-06, "loss": 0.5054, "step": 3984 }, { "epoch": 1.9371428571428573, "grad_norm": 0.06930221184078372, "learning_rate": 5.262189659129515e-06, "loss": 0.5458, "step": 3985 }, { "epoch": 1.937629179331307, "grad_norm": 0.07149689576266506, "learning_rate": 5.260277602895154e-06, "loss": 0.5364, "step": 3986 }, { "epoch": 1.9381155015197569, "grad_norm": 0.0723772926861595, "learning_rate": 5.258365508494039e-06, "loss": 0.5719, "step": 3987 }, { "epoch": 1.9386018237082068, "grad_norm": 0.07061068362122885, "learning_rate": 5.256453376206555e-06, "loss": 0.5411, "step": 3988 }, { "epoch": 1.9390881458966565, "grad_norm": 0.07096073363336144, "learning_rate": 5.2545412063130964e-06, "loss": 0.5095, "step": 3989 }, { "epoch": 1.9395744680851064, "grad_norm": 0.07126984711713157, "learning_rate": 5.252628999094059e-06, "loss": 0.5326, "step": 3990 }, { "epoch": 1.9400607902735563, "grad_norm": 0.07002943984588834, "learning_rate": 5.2507167548298475e-06, "loss": 0.5303, "step": 3991 }, { "epoch": 1.940547112462006, "grad_norm": 0.07303616960770704, "learning_rate": 5.248804473800872e-06, "loss": 0.5388, "step": 3992 }, { "epoch": 1.941033434650456, "grad_norm": 0.06987933879758945, "learning_rate": 5.246892156287546e-06, "loss": 0.5644, "step": 3993 }, { "epoch": 1.9415197568389058, "grad_norm": 0.07092382913678416, "learning_rate": 5.244979802570288e-06, "loss": 0.4876, "step": 3994 }, { "epoch": 1.9420060790273557, "grad_norm": 0.07183594237566124, "learning_rate": 5.243067412929524e-06, "loss": 0.5376, "step": 3995 }, { "epoch": 1.9424924012158056, "grad_norm": 0.07218317665616278, "learning_rate": 5.241154987645687e-06, "loss": 0.5494, "step": 3996 }, { "epoch": 1.9429787234042553, "grad_norm": 0.0738576927670373, "learning_rate": 5.239242526999207e-06, "loss": 0.5898, "step": 3997 }, { "epoch": 1.9434650455927052, "grad_norm": 0.07209239934543615, "learning_rate": 5.237330031270526e-06, "loss": 0.5806, "step": 3998 }, { "epoch": 1.943951367781155, "grad_norm": 0.07188300097353648, "learning_rate": 5.235417500740093e-06, "loss": 0.5655, "step": 3999 }, { "epoch": 1.9444376899696048, "grad_norm": 0.07541195031417097, "learning_rate": 5.233504935688355e-06, "loss": 0.6531, "step": 4000 }, { "epoch": 1.9449240121580547, "grad_norm": 0.07168950322346442, "learning_rate": 5.231592336395771e-06, "loss": 0.5679, "step": 4001 }, { "epoch": 1.9454103343465046, "grad_norm": 0.07131777859880024, "learning_rate": 5.229679703142801e-06, "loss": 0.5313, "step": 4002 }, { "epoch": 1.9458966565349542, "grad_norm": 0.0725064508896351, "learning_rate": 5.227767036209911e-06, "loss": 0.5589, "step": 4003 }, { "epoch": 1.9463829787234044, "grad_norm": 0.07317081821397187, "learning_rate": 5.225854335877571e-06, "loss": 0.5578, "step": 4004 }, { "epoch": 1.946869300911854, "grad_norm": 0.06909282131971625, "learning_rate": 5.223941602426258e-06, "loss": 0.536, "step": 4005 }, { "epoch": 1.947355623100304, "grad_norm": 0.07251713147657754, "learning_rate": 5.222028836136451e-06, "loss": 0.5372, "step": 4006 }, { "epoch": 1.9478419452887539, "grad_norm": 0.07342534467884852, "learning_rate": 5.220116037288637e-06, "loss": 0.534, "step": 4007 }, { "epoch": 1.9483282674772036, "grad_norm": 0.07086290939044369, "learning_rate": 5.218203206163306e-06, "loss": 0.5362, "step": 4008 }, { "epoch": 1.9488145896656535, "grad_norm": 0.0729895180234574, "learning_rate": 5.216290343040952e-06, "loss": 0.5345, "step": 4009 }, { "epoch": 1.9493009118541034, "grad_norm": 0.07390850352288804, "learning_rate": 5.214377448202075e-06, "loss": 0.5469, "step": 4010 }, { "epoch": 1.949787234042553, "grad_norm": 0.07469818163452135, "learning_rate": 5.212464521927182e-06, "loss": 0.5923, "step": 4011 }, { "epoch": 1.9502735562310032, "grad_norm": 0.07385365805873335, "learning_rate": 5.210551564496778e-06, "loss": 0.5726, "step": 4012 }, { "epoch": 1.9507598784194529, "grad_norm": 0.06869536584519903, "learning_rate": 5.2086385761913775e-06, "loss": 0.5292, "step": 4013 }, { "epoch": 1.9512462006079028, "grad_norm": 0.07268517784103212, "learning_rate": 5.2067255572914995e-06, "loss": 0.585, "step": 4014 }, { "epoch": 1.9517325227963527, "grad_norm": 0.07237858910815546, "learning_rate": 5.204812508077666e-06, "loss": 0.5477, "step": 4015 }, { "epoch": 1.9522188449848024, "grad_norm": 0.07232510702682564, "learning_rate": 5.202899428830404e-06, "loss": 0.5708, "step": 4016 }, { "epoch": 1.9527051671732523, "grad_norm": 0.07168769172266656, "learning_rate": 5.200986319830245e-06, "loss": 0.5362, "step": 4017 }, { "epoch": 1.9531914893617022, "grad_norm": 0.06986399501839158, "learning_rate": 5.199073181357725e-06, "loss": 0.5196, "step": 4018 }, { "epoch": 1.9536778115501519, "grad_norm": 0.0721377619109054, "learning_rate": 5.197160013693382e-06, "loss": 0.5711, "step": 4019 }, { "epoch": 1.954164133738602, "grad_norm": 0.07280342068128169, "learning_rate": 5.195246817117763e-06, "loss": 0.5483, "step": 4020 }, { "epoch": 1.9546504559270517, "grad_norm": 0.07057144849216664, "learning_rate": 5.193333591911416e-06, "loss": 0.5296, "step": 4021 }, { "epoch": 1.9551367781155016, "grad_norm": 0.06970470710427557, "learning_rate": 5.191420338354892e-06, "loss": 0.5063, "step": 4022 }, { "epoch": 1.9556231003039515, "grad_norm": 0.07083213803572287, "learning_rate": 5.18950705672875e-06, "loss": 0.5294, "step": 4023 }, { "epoch": 1.9561094224924012, "grad_norm": 0.07133845440081246, "learning_rate": 5.18759374731355e-06, "loss": 0.5076, "step": 4024 }, { "epoch": 1.956595744680851, "grad_norm": 0.06995335914854336, "learning_rate": 5.185680410389856e-06, "loss": 0.5165, "step": 4025 }, { "epoch": 1.957082066869301, "grad_norm": 0.07226385668066908, "learning_rate": 5.183767046238239e-06, "loss": 0.5179, "step": 4026 }, { "epoch": 1.9575683890577507, "grad_norm": 0.07405507194945109, "learning_rate": 5.181853655139272e-06, "loss": 0.5317, "step": 4027 }, { "epoch": 1.9580547112462006, "grad_norm": 0.07216105571906228, "learning_rate": 5.179940237373532e-06, "loss": 0.5537, "step": 4028 }, { "epoch": 1.9585410334346505, "grad_norm": 0.07353059038918697, "learning_rate": 5.1780267932215985e-06, "loss": 0.5453, "step": 4029 }, { "epoch": 1.9590273556231002, "grad_norm": 0.0737631617555941, "learning_rate": 5.176113322964058e-06, "loss": 0.5927, "step": 4030 }, { "epoch": 1.9595136778115503, "grad_norm": 0.0715725098077897, "learning_rate": 5.174199826881498e-06, "loss": 0.5455, "step": 4031 }, { "epoch": 1.96, "grad_norm": 0.07020638664063605, "learning_rate": 5.1722863052545124e-06, "loss": 0.53, "step": 4032 }, { "epoch": 1.9604863221884499, "grad_norm": 0.0726420771490652, "learning_rate": 5.170372758363695e-06, "loss": 0.5275, "step": 4033 }, { "epoch": 1.9609726443768998, "grad_norm": 0.07075944796673647, "learning_rate": 5.168459186489649e-06, "loss": 0.5603, "step": 4034 }, { "epoch": 1.9614589665653495, "grad_norm": 0.07192249888606185, "learning_rate": 5.166545589912977e-06, "loss": 0.5395, "step": 4035 }, { "epoch": 1.9619452887537994, "grad_norm": 0.06991776406339933, "learning_rate": 5.1646319689142835e-06, "loss": 0.5219, "step": 4036 }, { "epoch": 1.9624316109422493, "grad_norm": 0.07066816999990898, "learning_rate": 5.1627183237741816e-06, "loss": 0.543, "step": 4037 }, { "epoch": 1.962917933130699, "grad_norm": 0.07379540972212562, "learning_rate": 5.160804654773286e-06, "loss": 0.5481, "step": 4038 }, { "epoch": 1.963404255319149, "grad_norm": 0.07201823454747135, "learning_rate": 5.158890962192214e-06, "loss": 0.5395, "step": 4039 }, { "epoch": 1.9638905775075988, "grad_norm": 0.07317804299750949, "learning_rate": 5.156977246311585e-06, "loss": 0.5846, "step": 4040 }, { "epoch": 1.9643768996960487, "grad_norm": 0.07088796227807322, "learning_rate": 5.155063507412027e-06, "loss": 0.5358, "step": 4041 }, { "epoch": 1.9648632218844986, "grad_norm": 0.07095566646975195, "learning_rate": 5.153149745774167e-06, "loss": 0.5322, "step": 4042 }, { "epoch": 1.9653495440729483, "grad_norm": 0.07415211984680001, "learning_rate": 5.151235961678635e-06, "loss": 0.5465, "step": 4043 }, { "epoch": 1.9658358662613982, "grad_norm": 0.07032419654654286, "learning_rate": 5.149322155406067e-06, "loss": 0.5598, "step": 4044 }, { "epoch": 1.966322188449848, "grad_norm": 0.0721532815547123, "learning_rate": 5.147408327237099e-06, "loss": 0.5493, "step": 4045 }, { "epoch": 1.9668085106382978, "grad_norm": 0.06997445146897438, "learning_rate": 5.145494477452375e-06, "loss": 0.5347, "step": 4046 }, { "epoch": 1.9672948328267479, "grad_norm": 0.0744699556037541, "learning_rate": 5.143580606332539e-06, "loss": 0.5726, "step": 4047 }, { "epoch": 1.9677811550151976, "grad_norm": 0.07257275928752864, "learning_rate": 5.1416667141582355e-06, "loss": 0.5473, "step": 4048 }, { "epoch": 1.9682674772036473, "grad_norm": 0.07353724562471495, "learning_rate": 5.139752801210118e-06, "loss": 0.5621, "step": 4049 }, { "epoch": 1.9687537993920974, "grad_norm": 0.07708911764038119, "learning_rate": 5.1378388677688415e-06, "loss": 0.5778, "step": 4050 }, { "epoch": 1.969240121580547, "grad_norm": 0.07422356627523158, "learning_rate": 5.135924914115058e-06, "loss": 0.553, "step": 4051 }, { "epoch": 1.969726443768997, "grad_norm": 0.07368977492635634, "learning_rate": 5.134010940529429e-06, "loss": 0.5409, "step": 4052 }, { "epoch": 1.9702127659574469, "grad_norm": 0.07218959240008989, "learning_rate": 5.132096947292618e-06, "loss": 0.5343, "step": 4053 }, { "epoch": 1.9706990881458966, "grad_norm": 0.06817959793877926, "learning_rate": 5.130182934685289e-06, "loss": 0.533, "step": 4054 }, { "epoch": 1.9711854103343465, "grad_norm": 0.07050214275020256, "learning_rate": 5.128268902988112e-06, "loss": 0.5542, "step": 4055 }, { "epoch": 1.9716717325227964, "grad_norm": 0.06909868526207856, "learning_rate": 5.126354852481757e-06, "loss": 0.5244, "step": 4056 }, { "epoch": 1.972158054711246, "grad_norm": 0.07509301931401295, "learning_rate": 5.124440783446898e-06, "loss": 0.5497, "step": 4057 }, { "epoch": 1.9726443768996962, "grad_norm": 0.07196289454389032, "learning_rate": 5.122526696164211e-06, "loss": 0.5522, "step": 4058 }, { "epoch": 1.9731306990881459, "grad_norm": 0.07429985104790021, "learning_rate": 5.1206125909143745e-06, "loss": 0.5358, "step": 4059 }, { "epoch": 1.9736170212765958, "grad_norm": 0.07345988567106172, "learning_rate": 5.118698467978072e-06, "loss": 0.5175, "step": 4060 }, { "epoch": 1.9741033434650457, "grad_norm": 0.07507778545792528, "learning_rate": 5.1167843276359865e-06, "loss": 0.5669, "step": 4061 }, { "epoch": 1.9745896656534954, "grad_norm": 0.06873489290267816, "learning_rate": 5.114870170168806e-06, "loss": 0.525, "step": 4062 }, { "epoch": 1.9750759878419453, "grad_norm": 0.07326567348593864, "learning_rate": 5.112955995857219e-06, "loss": 0.5597, "step": 4063 }, { "epoch": 1.9755623100303952, "grad_norm": 0.07402028313087448, "learning_rate": 5.111041804981919e-06, "loss": 0.5443, "step": 4064 }, { "epoch": 1.9760486322188449, "grad_norm": 0.07002293728443178, "learning_rate": 5.109127597823598e-06, "loss": 0.5243, "step": 4065 }, { "epoch": 1.976534954407295, "grad_norm": 0.07333863778960509, "learning_rate": 5.107213374662954e-06, "loss": 0.5506, "step": 4066 }, { "epoch": 1.9770212765957447, "grad_norm": 0.07135685685852484, "learning_rate": 5.1052991357806865e-06, "loss": 0.5594, "step": 4067 }, { "epoch": 1.9775075987841946, "grad_norm": 0.07330575880332194, "learning_rate": 5.103384881457497e-06, "loss": 0.5445, "step": 4068 }, { "epoch": 1.9779939209726445, "grad_norm": 0.06983365004135339, "learning_rate": 5.1014706119740875e-06, "loss": 0.5399, "step": 4069 }, { "epoch": 1.9784802431610942, "grad_norm": 0.07358784483555246, "learning_rate": 5.0995563276111655e-06, "loss": 0.5571, "step": 4070 }, { "epoch": 1.978966565349544, "grad_norm": 0.0714844601554554, "learning_rate": 5.09764202864944e-06, "loss": 0.5307, "step": 4071 }, { "epoch": 1.979452887537994, "grad_norm": 0.0711046234990911, "learning_rate": 5.095727715369618e-06, "loss": 0.5671, "step": 4072 }, { "epoch": 1.9799392097264437, "grad_norm": 0.07429043037525987, "learning_rate": 5.0938133880524145e-06, "loss": 0.5926, "step": 4073 }, { "epoch": 1.9804255319148936, "grad_norm": 0.06911949445868028, "learning_rate": 5.091899046978542e-06, "loss": 0.5193, "step": 4074 }, { "epoch": 1.9809118541033435, "grad_norm": 0.07112322230377795, "learning_rate": 5.0899846924287184e-06, "loss": 0.5365, "step": 4075 }, { "epoch": 1.9813981762917932, "grad_norm": 0.0719120839126588, "learning_rate": 5.0880703246836614e-06, "loss": 0.5501, "step": 4076 }, { "epoch": 1.9818844984802433, "grad_norm": 0.07339119495313026, "learning_rate": 5.086155944024093e-06, "loss": 0.5627, "step": 4077 }, { "epoch": 1.982370820668693, "grad_norm": 0.07503823576953976, "learning_rate": 5.084241550730732e-06, "loss": 0.5843, "step": 4078 }, { "epoch": 1.9828571428571429, "grad_norm": 0.07195738578023082, "learning_rate": 5.0823271450843045e-06, "loss": 0.5629, "step": 4079 }, { "epoch": 1.9833434650455928, "grad_norm": 0.07506331827880443, "learning_rate": 5.080412727365536e-06, "loss": 0.5742, "step": 4080 }, { "epoch": 1.9838297872340425, "grad_norm": 0.07080692046771095, "learning_rate": 5.078498297855156e-06, "loss": 0.556, "step": 4081 }, { "epoch": 1.9843161094224924, "grad_norm": 0.07462004660722861, "learning_rate": 5.076583856833888e-06, "loss": 0.5687, "step": 4082 }, { "epoch": 1.9848024316109423, "grad_norm": 0.07714499905054355, "learning_rate": 5.074669404582469e-06, "loss": 0.5561, "step": 4083 }, { "epoch": 1.985288753799392, "grad_norm": 0.06959446373368061, "learning_rate": 5.072754941381631e-06, "loss": 0.5263, "step": 4084 }, { "epoch": 1.985775075987842, "grad_norm": 0.07378311642764318, "learning_rate": 5.070840467512106e-06, "loss": 0.5458, "step": 4085 }, { "epoch": 1.9862613981762918, "grad_norm": 0.06885446916429626, "learning_rate": 5.0689259832546314e-06, "loss": 0.5346, "step": 4086 }, { "epoch": 1.9867477203647417, "grad_norm": 0.0705163584020101, "learning_rate": 5.067011488889944e-06, "loss": 0.5493, "step": 4087 }, { "epoch": 1.9872340425531916, "grad_norm": 0.07303617589070245, "learning_rate": 5.065096984698783e-06, "loss": 0.5583, "step": 4088 }, { "epoch": 1.9877203647416413, "grad_norm": 0.07515882598679031, "learning_rate": 5.063182470961888e-06, "loss": 0.5944, "step": 4089 }, { "epoch": 1.9882066869300912, "grad_norm": 0.07209010037220351, "learning_rate": 5.061267947960001e-06, "loss": 0.5619, "step": 4090 }, { "epoch": 1.988693009118541, "grad_norm": 0.07138280513586763, "learning_rate": 5.059353415973865e-06, "loss": 0.5216, "step": 4091 }, { "epoch": 1.9891793313069908, "grad_norm": 0.07235689246934927, "learning_rate": 5.057438875284224e-06, "loss": 0.5539, "step": 4092 }, { "epoch": 1.989665653495441, "grad_norm": 0.07043325640792983, "learning_rate": 5.0555243261718245e-06, "loss": 0.522, "step": 4093 }, { "epoch": 1.9901519756838906, "grad_norm": 0.06843926096249822, "learning_rate": 5.053609768917414e-06, "loss": 0.5254, "step": 4094 }, { "epoch": 1.9906382978723405, "grad_norm": 0.07374768776751374, "learning_rate": 5.051695203801739e-06, "loss": 0.5654, "step": 4095 }, { "epoch": 1.9911246200607904, "grad_norm": 0.07364024588806227, "learning_rate": 5.0497806311055505e-06, "loss": 0.5459, "step": 4096 }, { "epoch": 1.99161094224924, "grad_norm": 0.06944235861576799, "learning_rate": 5.047866051109597e-06, "loss": 0.5113, "step": 4097 }, { "epoch": 1.99209726443769, "grad_norm": 0.0733226774324953, "learning_rate": 5.04595146409463e-06, "loss": 0.5662, "step": 4098 }, { "epoch": 1.9925835866261399, "grad_norm": 0.06968827268978985, "learning_rate": 5.044036870341403e-06, "loss": 0.5288, "step": 4099 }, { "epoch": 1.9930699088145896, "grad_norm": 0.07132226729205574, "learning_rate": 5.0421222701306685e-06, "loss": 0.5377, "step": 4100 }, { "epoch": 1.9935562310030395, "grad_norm": 0.07136921612932069, "learning_rate": 5.040207663743182e-06, "loss": 0.5458, "step": 4101 }, { "epoch": 1.9940425531914894, "grad_norm": 0.07261022128192629, "learning_rate": 5.038293051459698e-06, "loss": 0.5601, "step": 4102 }, { "epoch": 1.994528875379939, "grad_norm": 0.07029858773182517, "learning_rate": 5.0363784335609744e-06, "loss": 0.5371, "step": 4103 }, { "epoch": 1.9950151975683892, "grad_norm": 0.07160378990780952, "learning_rate": 5.034463810327766e-06, "loss": 0.5643, "step": 4104 }, { "epoch": 1.9955015197568389, "grad_norm": 0.07237286811587941, "learning_rate": 5.03254918204083e-06, "loss": 0.5254, "step": 4105 }, { "epoch": 1.9959878419452888, "grad_norm": 0.06890026400947108, "learning_rate": 5.030634548980926e-06, "loss": 0.5074, "step": 4106 }, { "epoch": 1.9964741641337387, "grad_norm": 0.07340888261976052, "learning_rate": 5.028719911428814e-06, "loss": 0.5684, "step": 4107 }, { "epoch": 1.9969604863221884, "grad_norm": 0.07154652513498307, "learning_rate": 5.026805269665254e-06, "loss": 0.5613, "step": 4108 }, { "epoch": 1.9974468085106383, "grad_norm": 0.06863125066672594, "learning_rate": 5.0248906239710025e-06, "loss": 0.5495, "step": 4109 }, { "epoch": 1.9979331306990882, "grad_norm": 0.06959777212822249, "learning_rate": 5.022975974626827e-06, "loss": 0.5575, "step": 4110 }, { "epoch": 1.9984194528875379, "grad_norm": 0.07228218266351222, "learning_rate": 5.021061321913484e-06, "loss": 0.5278, "step": 4111 }, { "epoch": 1.998905775075988, "grad_norm": 0.07318956743749344, "learning_rate": 5.0191466661117385e-06, "loss": 0.576, "step": 4112 }, { "epoch": 1.998905775075988, "eval_loss": 0.5719841718673706, "eval_runtime": 105.2602, "eval_samples_per_second": 288.362, "eval_steps_per_second": 36.054, "step": 4112 }, { "epoch": 1.9993920972644377, "grad_norm": 0.06899319062796848, "learning_rate": 5.0172320075023504e-06, "loss": 0.521, "step": 4113 }, { "epoch": 1.9998784194528876, "grad_norm": 0.0724600612638861, "learning_rate": 5.015317346366085e-06, "loss": 0.544, "step": 4114 }, { "epoch": 2.0, "grad_norm": 0.0724600612638861, "learning_rate": 5.013402682983705e-06, "loss": 0.1305, "step": 4115 }, { "epoch": 2.0003647416413375, "grad_norm": 0.07090566340907718, "learning_rate": 5.011488017635973e-06, "loss": 0.3857, "step": 4116 }, { "epoch": 2.0004863221884497, "grad_norm": 0.07560565895633789, "learning_rate": 5.009573350603654e-06, "loss": 0.5416, "step": 4117 }, { "epoch": 2.0009726443769, "grad_norm": 0.07463160319833827, "learning_rate": 5.007658682167511e-06, "loss": 0.5216, "step": 4118 }, { "epoch": 2.0014589665653495, "grad_norm": 0.0704286164735406, "learning_rate": 5.0057440126083105e-06, "loss": 0.5226, "step": 4119 }, { "epoch": 2.001945288753799, "grad_norm": 0.0756680374343442, "learning_rate": 5.003829342206815e-06, "loss": 0.513, "step": 4120 }, { "epoch": 2.0024316109422493, "grad_norm": 0.07200504172525779, "learning_rate": 5.00191467124379e-06, "loss": 0.5403, "step": 4121 }, { "epoch": 2.002917933130699, "grad_norm": 0.07970405331927172, "learning_rate": 5e-06, "loss": 0.5563, "step": 4122 }, { "epoch": 2.003404255319149, "grad_norm": 0.07336267398523405, "learning_rate": 4.998085328756211e-06, "loss": 0.5194, "step": 4123 }, { "epoch": 2.003890577507599, "grad_norm": 0.07732463909571638, "learning_rate": 4.9961706577931865e-06, "loss": 0.5164, "step": 4124 }, { "epoch": 2.0043768996960485, "grad_norm": 0.07538394478171248, "learning_rate": 4.99425598739169e-06, "loss": 0.5233, "step": 4125 }, { "epoch": 2.0048632218844986, "grad_norm": 0.07178068742791732, "learning_rate": 4.99234131783249e-06, "loss": 0.4967, "step": 4126 }, { "epoch": 2.0053495440729483, "grad_norm": 0.07280447920895801, "learning_rate": 4.990426649396349e-06, "loss": 0.5426, "step": 4127 }, { "epoch": 2.005835866261398, "grad_norm": 0.07895528535780773, "learning_rate": 4.98851198236403e-06, "loss": 0.5518, "step": 4128 }, { "epoch": 2.006322188449848, "grad_norm": 0.07581014746674569, "learning_rate": 4.986597317016298e-06, "loss": 0.5256, "step": 4129 }, { "epoch": 2.006808510638298, "grad_norm": 0.07120238586983466, "learning_rate": 4.984682653633917e-06, "loss": 0.5177, "step": 4130 }, { "epoch": 2.007294832826748, "grad_norm": 0.07119804097902376, "learning_rate": 4.982767992497652e-06, "loss": 0.5203, "step": 4131 }, { "epoch": 2.0077811550151976, "grad_norm": 0.07559586109173598, "learning_rate": 4.980853333888262e-06, "loss": 0.5398, "step": 4132 }, { "epoch": 2.0082674772036473, "grad_norm": 0.07350223373534821, "learning_rate": 4.978938678086517e-06, "loss": 0.5405, "step": 4133 }, { "epoch": 2.0087537993920974, "grad_norm": 0.07208264639169884, "learning_rate": 4.977024025373174e-06, "loss": 0.5432, "step": 4134 }, { "epoch": 2.009240121580547, "grad_norm": 0.07131172276718777, "learning_rate": 4.9751093760289975e-06, "loss": 0.5057, "step": 4135 }, { "epoch": 2.009726443768997, "grad_norm": 0.07025931825106611, "learning_rate": 4.9731947303347485e-06, "loss": 0.5219, "step": 4136 }, { "epoch": 2.010212765957447, "grad_norm": 0.07234591131423174, "learning_rate": 4.971280088571187e-06, "loss": 0.5022, "step": 4137 }, { "epoch": 2.0106990881458966, "grad_norm": 0.07184852077216447, "learning_rate": 4.969365451019075e-06, "loss": 0.5143, "step": 4138 }, { "epoch": 2.0111854103343467, "grad_norm": 0.07427629112549243, "learning_rate": 4.967450817959171e-06, "loss": 0.5432, "step": 4139 }, { "epoch": 2.0116717325227964, "grad_norm": 0.07088431735887683, "learning_rate": 4.965536189672236e-06, "loss": 0.5, "step": 4140 }, { "epoch": 2.012158054711246, "grad_norm": 0.07118584943300031, "learning_rate": 4.963621566439027e-06, "loss": 0.5232, "step": 4141 }, { "epoch": 2.012644376899696, "grad_norm": 0.07206765143102957, "learning_rate": 4.961706948540303e-06, "loss": 0.5285, "step": 4142 }, { "epoch": 2.013130699088146, "grad_norm": 0.07256591203748938, "learning_rate": 4.959792336256819e-06, "loss": 0.5482, "step": 4143 }, { "epoch": 2.0136170212765956, "grad_norm": 0.06995835914694061, "learning_rate": 4.957877729869332e-06, "loss": 0.5209, "step": 4144 }, { "epoch": 2.0141033434650457, "grad_norm": 0.0740994737041949, "learning_rate": 4.955963129658599e-06, "loss": 0.5261, "step": 4145 }, { "epoch": 2.0145896656534954, "grad_norm": 0.07456378202106874, "learning_rate": 4.954048535905372e-06, "loss": 0.5189, "step": 4146 }, { "epoch": 2.015075987841945, "grad_norm": 0.07511338240986537, "learning_rate": 4.952133948890406e-06, "loss": 0.5562, "step": 4147 }, { "epoch": 2.015562310030395, "grad_norm": 0.0742620935041202, "learning_rate": 4.950219368894452e-06, "loss": 0.5299, "step": 4148 }, { "epoch": 2.016048632218845, "grad_norm": 0.07290041090130435, "learning_rate": 4.948304796198262e-06, "loss": 0.4918, "step": 4149 }, { "epoch": 2.016534954407295, "grad_norm": 0.07032882448577277, "learning_rate": 4.946390231082586e-06, "loss": 0.4971, "step": 4150 }, { "epoch": 2.0170212765957447, "grad_norm": 0.07241552735232347, "learning_rate": 4.9444756738281755e-06, "loss": 0.5386, "step": 4151 }, { "epoch": 2.0175075987841944, "grad_norm": 0.07000695319231048, "learning_rate": 4.942561124715776e-06, "loss": 0.5048, "step": 4152 }, { "epoch": 2.0179939209726445, "grad_norm": 0.07320006631497436, "learning_rate": 4.940646584026136e-06, "loss": 0.5178, "step": 4153 }, { "epoch": 2.018480243161094, "grad_norm": 0.07268784042118799, "learning_rate": 4.93873205204e-06, "loss": 0.5521, "step": 4154 }, { "epoch": 2.018966565349544, "grad_norm": 0.07309063892742822, "learning_rate": 4.936817529038113e-06, "loss": 0.5432, "step": 4155 }, { "epoch": 2.019452887537994, "grad_norm": 0.0751944181988103, "learning_rate": 4.934903015301218e-06, "loss": 0.5415, "step": 4156 }, { "epoch": 2.0199392097264437, "grad_norm": 0.0717854491309982, "learning_rate": 4.932988511110058e-06, "loss": 0.5705, "step": 4157 }, { "epoch": 2.020425531914894, "grad_norm": 0.06762626397281103, "learning_rate": 4.93107401674537e-06, "loss": 0.4698, "step": 4158 }, { "epoch": 2.0209118541033435, "grad_norm": 0.07382780195075954, "learning_rate": 4.929159532487895e-06, "loss": 0.5526, "step": 4159 }, { "epoch": 2.021398176291793, "grad_norm": 0.07380518136830846, "learning_rate": 4.92724505861837e-06, "loss": 0.5456, "step": 4160 }, { "epoch": 2.0218844984802433, "grad_norm": 0.07498551761636121, "learning_rate": 4.9253305954175316e-06, "loss": 0.5339, "step": 4161 }, { "epoch": 2.022370820668693, "grad_norm": 0.07476218196616753, "learning_rate": 4.9234161431661124e-06, "loss": 0.5407, "step": 4162 }, { "epoch": 2.0228571428571427, "grad_norm": 0.07188806504372429, "learning_rate": 4.9215017021448476e-06, "loss": 0.5189, "step": 4163 }, { "epoch": 2.023343465045593, "grad_norm": 0.07642579388800144, "learning_rate": 4.919587272634466e-06, "loss": 0.5595, "step": 4164 }, { "epoch": 2.0238297872340425, "grad_norm": 0.07056888290520946, "learning_rate": 4.917672854915697e-06, "loss": 0.5149, "step": 4165 }, { "epoch": 2.024316109422492, "grad_norm": 0.07207158328439678, "learning_rate": 4.915758449269271e-06, "loss": 0.5433, "step": 4166 }, { "epoch": 2.0248024316109423, "grad_norm": 0.0712268953352973, "learning_rate": 4.91384405597591e-06, "loss": 0.5552, "step": 4167 }, { "epoch": 2.025288753799392, "grad_norm": 0.07160884103680992, "learning_rate": 4.9119296753163385e-06, "loss": 0.552, "step": 4168 }, { "epoch": 2.025775075987842, "grad_norm": 0.0705533289785172, "learning_rate": 4.9100153075712815e-06, "loss": 0.5297, "step": 4169 }, { "epoch": 2.026261398176292, "grad_norm": 0.07214722507916452, "learning_rate": 4.908100953021458e-06, "loss": 0.5241, "step": 4170 }, { "epoch": 2.0267477203647415, "grad_norm": 0.07379176885362691, "learning_rate": 4.906186611947587e-06, "loss": 0.5793, "step": 4171 }, { "epoch": 2.0272340425531916, "grad_norm": 0.0704623777822778, "learning_rate": 4.9042722846303836e-06, "loss": 0.4898, "step": 4172 }, { "epoch": 2.0277203647416413, "grad_norm": 0.07360585873779849, "learning_rate": 4.902357971350562e-06, "loss": 0.5257, "step": 4173 }, { "epoch": 2.028206686930091, "grad_norm": 0.07277010233759118, "learning_rate": 4.900443672388835e-06, "loss": 0.5212, "step": 4174 }, { "epoch": 2.028693009118541, "grad_norm": 0.0721973073846585, "learning_rate": 4.898529388025913e-06, "loss": 0.5276, "step": 4175 }, { "epoch": 2.029179331306991, "grad_norm": 0.07716581794527941, "learning_rate": 4.896615118542505e-06, "loss": 0.5153, "step": 4176 }, { "epoch": 2.029665653495441, "grad_norm": 0.07735463118096297, "learning_rate": 4.894700864219314e-06, "loss": 0.5267, "step": 4177 }, { "epoch": 2.0301519756838906, "grad_norm": 0.07002988041194366, "learning_rate": 4.892786625337047e-06, "loss": 0.5047, "step": 4178 }, { "epoch": 2.0306382978723403, "grad_norm": 0.06980995310400111, "learning_rate": 4.890872402176404e-06, "loss": 0.507, "step": 4179 }, { "epoch": 2.0311246200607904, "grad_norm": 0.07303792142582727, "learning_rate": 4.8889581950180835e-06, "loss": 0.5325, "step": 4180 }, { "epoch": 2.03161094224924, "grad_norm": 0.0703341485201467, "learning_rate": 4.887044004142783e-06, "loss": 0.529, "step": 4181 }, { "epoch": 2.03209726443769, "grad_norm": 0.07221863594394819, "learning_rate": 4.8851298298311965e-06, "loss": 0.5263, "step": 4182 }, { "epoch": 2.03258358662614, "grad_norm": 0.07138545399327458, "learning_rate": 4.883215672364016e-06, "loss": 0.5425, "step": 4183 }, { "epoch": 2.0330699088145896, "grad_norm": 0.06970260205901674, "learning_rate": 4.881301532021931e-06, "loss": 0.4924, "step": 4184 }, { "epoch": 2.0335562310030397, "grad_norm": 0.06925962166465548, "learning_rate": 4.879387409085628e-06, "loss": 0.5089, "step": 4185 }, { "epoch": 2.0340425531914894, "grad_norm": 0.07320339747292594, "learning_rate": 4.877473303835791e-06, "loss": 0.555, "step": 4186 }, { "epoch": 2.034528875379939, "grad_norm": 0.07052211739307121, "learning_rate": 4.875559216553104e-06, "loss": 0.5212, "step": 4187 }, { "epoch": 2.0350151975683892, "grad_norm": 0.07173699493673336, "learning_rate": 4.873645147518244e-06, "loss": 0.5494, "step": 4188 }, { "epoch": 2.035501519756839, "grad_norm": 0.07123179946314978, "learning_rate": 4.871731097011889e-06, "loss": 0.5288, "step": 4189 }, { "epoch": 2.0359878419452886, "grad_norm": 0.07295432467900945, "learning_rate": 4.869817065314711e-06, "loss": 0.5469, "step": 4190 }, { "epoch": 2.0364741641337387, "grad_norm": 0.06949617993069249, "learning_rate": 4.867903052707383e-06, "loss": 0.4878, "step": 4191 }, { "epoch": 2.0369604863221884, "grad_norm": 0.07072975679994874, "learning_rate": 4.865989059470572e-06, "loss": 0.5129, "step": 4192 }, { "epoch": 2.037446808510638, "grad_norm": 0.07139665836685158, "learning_rate": 4.8640750858849435e-06, "loss": 0.5437, "step": 4193 }, { "epoch": 2.037933130699088, "grad_norm": 0.07180968210847434, "learning_rate": 4.86216113223116e-06, "loss": 0.5027, "step": 4194 }, { "epoch": 2.038419452887538, "grad_norm": 0.07397630404840642, "learning_rate": 4.860247198789883e-06, "loss": 0.5335, "step": 4195 }, { "epoch": 2.038905775075988, "grad_norm": 0.07140117973666019, "learning_rate": 4.858333285841765e-06, "loss": 0.526, "step": 4196 }, { "epoch": 2.0393920972644377, "grad_norm": 0.07070515223426743, "learning_rate": 4.856419393667463e-06, "loss": 0.511, "step": 4197 }, { "epoch": 2.0398784194528874, "grad_norm": 0.07291706561477407, "learning_rate": 4.8545055225476265e-06, "loss": 0.5319, "step": 4198 }, { "epoch": 2.0403647416413375, "grad_norm": 0.07030076437250854, "learning_rate": 4.8525916727629025e-06, "loss": 0.5169, "step": 4199 }, { "epoch": 2.040851063829787, "grad_norm": 0.07393845704355517, "learning_rate": 4.850677844593936e-06, "loss": 0.5252, "step": 4200 }, { "epoch": 2.041337386018237, "grad_norm": 0.07027881943979325, "learning_rate": 4.848764038321367e-06, "loss": 0.5152, "step": 4201 }, { "epoch": 2.041823708206687, "grad_norm": 0.0730431578843535, "learning_rate": 4.846850254225835e-06, "loss": 0.5429, "step": 4202 }, { "epoch": 2.0423100303951367, "grad_norm": 0.06987754199598122, "learning_rate": 4.8449364925879745e-06, "loss": 0.5667, "step": 4203 }, { "epoch": 2.042796352583587, "grad_norm": 0.07165875716009988, "learning_rate": 4.843022753688415e-06, "loss": 0.5004, "step": 4204 }, { "epoch": 2.0432826747720365, "grad_norm": 0.07216895464315917, "learning_rate": 4.841109037807787e-06, "loss": 0.5568, "step": 4205 }, { "epoch": 2.043768996960486, "grad_norm": 0.0714623513986144, "learning_rate": 4.839195345226715e-06, "loss": 0.5071, "step": 4206 }, { "epoch": 2.0442553191489363, "grad_norm": 0.07240143533521515, "learning_rate": 4.837281676225819e-06, "loss": 0.5349, "step": 4207 }, { "epoch": 2.044741641337386, "grad_norm": 0.07244361076855506, "learning_rate": 4.835368031085717e-06, "loss": 0.4984, "step": 4208 }, { "epoch": 2.0452279635258357, "grad_norm": 0.07202071236904925, "learning_rate": 4.833454410087024e-06, "loss": 0.4868, "step": 4209 }, { "epoch": 2.045714285714286, "grad_norm": 0.06925764718537741, "learning_rate": 4.831540813510352e-06, "loss": 0.5199, "step": 4210 }, { "epoch": 2.0462006079027355, "grad_norm": 0.07533749803028636, "learning_rate": 4.829627241636306e-06, "loss": 0.5519, "step": 4211 }, { "epoch": 2.0466869300911856, "grad_norm": 0.07465566970295337, "learning_rate": 4.827713694745489e-06, "loss": 0.5335, "step": 4212 }, { "epoch": 2.0471732522796353, "grad_norm": 0.07170669847746954, "learning_rate": 4.825800173118503e-06, "loss": 0.5285, "step": 4213 }, { "epoch": 2.047659574468085, "grad_norm": 0.07260824290566995, "learning_rate": 4.823886677035944e-06, "loss": 0.5343, "step": 4214 }, { "epoch": 2.048145896656535, "grad_norm": 0.07965887746294288, "learning_rate": 4.821973206778403e-06, "loss": 0.4994, "step": 4215 }, { "epoch": 2.048632218844985, "grad_norm": 0.07177784973470001, "learning_rate": 4.82005976262647e-06, "loss": 0.507, "step": 4216 }, { "epoch": 2.0491185410334345, "grad_norm": 0.08499717899919779, "learning_rate": 4.818146344860729e-06, "loss": 0.5485, "step": 4217 }, { "epoch": 2.0496048632218846, "grad_norm": 0.07176714252214604, "learning_rate": 4.816232953761762e-06, "loss": 0.5384, "step": 4218 }, { "epoch": 2.0500911854103343, "grad_norm": 0.07289023465593142, "learning_rate": 4.814319589610146e-06, "loss": 0.5575, "step": 4219 }, { "epoch": 2.050577507598784, "grad_norm": 0.07162841827502749, "learning_rate": 4.812406252686453e-06, "loss": 0.5087, "step": 4220 }, { "epoch": 2.051063829787234, "grad_norm": 0.06999808794476338, "learning_rate": 4.810492943271253e-06, "loss": 0.5381, "step": 4221 }, { "epoch": 2.051550151975684, "grad_norm": 0.07362939865519282, "learning_rate": 4.8085796616451086e-06, "loss": 0.5467, "step": 4222 }, { "epoch": 2.052036474164134, "grad_norm": 0.07235852498278311, "learning_rate": 4.806666408088585e-06, "loss": 0.5181, "step": 4223 }, { "epoch": 2.0525227963525836, "grad_norm": 0.0717383388555045, "learning_rate": 4.804753182882237e-06, "loss": 0.548, "step": 4224 }, { "epoch": 2.0530091185410333, "grad_norm": 0.07089317780872104, "learning_rate": 4.802839986306619e-06, "loss": 0.5145, "step": 4225 }, { "epoch": 2.0534954407294834, "grad_norm": 0.06934855459471415, "learning_rate": 4.800926818642278e-06, "loss": 0.4913, "step": 4226 }, { "epoch": 2.053981762917933, "grad_norm": 0.07144099789625005, "learning_rate": 4.799013680169757e-06, "loss": 0.5542, "step": 4227 }, { "epoch": 2.054468085106383, "grad_norm": 0.07252433831201918, "learning_rate": 4.797100571169597e-06, "loss": 0.5007, "step": 4228 }, { "epoch": 2.054954407294833, "grad_norm": 0.0749424627498245, "learning_rate": 4.795187491922336e-06, "loss": 0.5374, "step": 4229 }, { "epoch": 2.0554407294832826, "grad_norm": 0.07256734461702409, "learning_rate": 4.793274442708502e-06, "loss": 0.5005, "step": 4230 }, { "epoch": 2.0559270516717327, "grad_norm": 0.07206862839449416, "learning_rate": 4.791361423808623e-06, "loss": 0.555, "step": 4231 }, { "epoch": 2.0564133738601824, "grad_norm": 0.07178024549561958, "learning_rate": 4.789448435503224e-06, "loss": 0.5347, "step": 4232 }, { "epoch": 2.056899696048632, "grad_norm": 0.07063748081507971, "learning_rate": 4.78753547807282e-06, "loss": 0.5148, "step": 4233 }, { "epoch": 2.0573860182370822, "grad_norm": 0.06988911857142277, "learning_rate": 4.785622551797926e-06, "loss": 0.4973, "step": 4234 }, { "epoch": 2.057872340425532, "grad_norm": 0.07065133985763723, "learning_rate": 4.78370965695905e-06, "loss": 0.5148, "step": 4235 }, { "epoch": 2.0583586626139816, "grad_norm": 0.0732950012079987, "learning_rate": 4.781796793836696e-06, "loss": 0.4937, "step": 4236 }, { "epoch": 2.0588449848024317, "grad_norm": 0.0694929566050334, "learning_rate": 4.779883962711364e-06, "loss": 0.5067, "step": 4237 }, { "epoch": 2.0593313069908814, "grad_norm": 0.0716832955681354, "learning_rate": 4.7779711638635504e-06, "loss": 0.5417, "step": 4238 }, { "epoch": 2.059817629179331, "grad_norm": 0.07233257038104912, "learning_rate": 4.776058397573744e-06, "loss": 0.5001, "step": 4239 }, { "epoch": 2.060303951367781, "grad_norm": 0.07224215030014126, "learning_rate": 4.7741456641224295e-06, "loss": 0.5291, "step": 4240 }, { "epoch": 2.060790273556231, "grad_norm": 0.07081977650099833, "learning_rate": 4.7722329637900895e-06, "loss": 0.488, "step": 4241 }, { "epoch": 2.061276595744681, "grad_norm": 0.07122991027392214, "learning_rate": 4.7703202968572e-06, "loss": 0.5148, "step": 4242 }, { "epoch": 2.0617629179331307, "grad_norm": 0.0715727784395469, "learning_rate": 4.768407663604229e-06, "loss": 0.5322, "step": 4243 }, { "epoch": 2.0622492401215804, "grad_norm": 0.07028995464871736, "learning_rate": 4.7664950643116445e-06, "loss": 0.5064, "step": 4244 }, { "epoch": 2.0627355623100305, "grad_norm": 0.07034899044321367, "learning_rate": 4.764582499259908e-06, "loss": 0.5243, "step": 4245 }, { "epoch": 2.06322188449848, "grad_norm": 0.07235862275941812, "learning_rate": 4.7626699687294746e-06, "loss": 0.5326, "step": 4246 }, { "epoch": 2.06370820668693, "grad_norm": 0.07509184317130671, "learning_rate": 4.760757473000794e-06, "loss": 0.5324, "step": 4247 }, { "epoch": 2.06419452887538, "grad_norm": 0.07254159181567657, "learning_rate": 4.758845012354314e-06, "loss": 0.5137, "step": 4248 }, { "epoch": 2.0646808510638297, "grad_norm": 0.07267173493401578, "learning_rate": 4.756932587070476e-06, "loss": 0.5284, "step": 4249 }, { "epoch": 2.06516717325228, "grad_norm": 0.07311315318936584, "learning_rate": 4.755020197429713e-06, "loss": 0.543, "step": 4250 }, { "epoch": 2.0656534954407295, "grad_norm": 0.0734409520378827, "learning_rate": 4.7531078437124555e-06, "loss": 0.5171, "step": 4251 }, { "epoch": 2.066139817629179, "grad_norm": 0.07187278949456045, "learning_rate": 4.751195526199129e-06, "loss": 0.5286, "step": 4252 }, { "epoch": 2.0666261398176293, "grad_norm": 0.07164257355329291, "learning_rate": 4.749283245170153e-06, "loss": 0.5113, "step": 4253 }, { "epoch": 2.067112462006079, "grad_norm": 0.07227591181386367, "learning_rate": 4.747371000905943e-06, "loss": 0.5251, "step": 4254 }, { "epoch": 2.0675987841945287, "grad_norm": 0.07252537478858759, "learning_rate": 4.745458793686906e-06, "loss": 0.5247, "step": 4255 }, { "epoch": 2.068085106382979, "grad_norm": 0.07474147359887902, "learning_rate": 4.743546623793447e-06, "loss": 0.5475, "step": 4256 }, { "epoch": 2.0685714285714285, "grad_norm": 0.06968974901182258, "learning_rate": 4.741634491505963e-06, "loss": 0.482, "step": 4257 }, { "epoch": 2.0690577507598786, "grad_norm": 0.07364892356782594, "learning_rate": 4.739722397104849e-06, "loss": 0.5244, "step": 4258 }, { "epoch": 2.0695440729483283, "grad_norm": 0.07361437024359314, "learning_rate": 4.737810340870484e-06, "loss": 0.549, "step": 4259 }, { "epoch": 2.070030395136778, "grad_norm": 0.07175054061207829, "learning_rate": 4.73589832308326e-06, "loss": 0.5396, "step": 4260 }, { "epoch": 2.070516717325228, "grad_norm": 0.07522201111796856, "learning_rate": 4.733986344023547e-06, "loss": 0.5461, "step": 4261 }, { "epoch": 2.071003039513678, "grad_norm": 0.07293163098546677, "learning_rate": 4.732074403971716e-06, "loss": 0.5522, "step": 4262 }, { "epoch": 2.0714893617021275, "grad_norm": 0.07235982880906476, "learning_rate": 4.730162503208131e-06, "loss": 0.5387, "step": 4263 }, { "epoch": 2.0719756838905776, "grad_norm": 0.07058410497227291, "learning_rate": 4.728250642013151e-06, "loss": 0.5388, "step": 4264 }, { "epoch": 2.0724620060790273, "grad_norm": 0.0727881467734768, "learning_rate": 4.726338820667128e-06, "loss": 0.5213, "step": 4265 }, { "epoch": 2.072948328267477, "grad_norm": 0.0711038161087961, "learning_rate": 4.7244270394504085e-06, "loss": 0.554, "step": 4266 }, { "epoch": 2.073434650455927, "grad_norm": 0.07323877583410632, "learning_rate": 4.722515298643335e-06, "loss": 0.5151, "step": 4267 }, { "epoch": 2.073920972644377, "grad_norm": 0.0736735373071005, "learning_rate": 4.720603598526243e-06, "loss": 0.527, "step": 4268 }, { "epoch": 2.074407294832827, "grad_norm": 0.07490647231505362, "learning_rate": 4.718691939379459e-06, "loss": 0.5319, "step": 4269 }, { "epoch": 2.0748936170212766, "grad_norm": 0.0726703140165146, "learning_rate": 4.716780321483308e-06, "loss": 0.5215, "step": 4270 }, { "epoch": 2.0753799392097263, "grad_norm": 0.07039672869273815, "learning_rate": 4.714868745118107e-06, "loss": 0.5142, "step": 4271 }, { "epoch": 2.0758662613981764, "grad_norm": 0.07150052442021539, "learning_rate": 4.712957210564166e-06, "loss": 0.5239, "step": 4272 }, { "epoch": 2.076352583586626, "grad_norm": 0.07266818345949104, "learning_rate": 4.7110457181017925e-06, "loss": 0.5631, "step": 4273 }, { "epoch": 2.076838905775076, "grad_norm": 0.07405293793860726, "learning_rate": 4.709134268011281e-06, "loss": 0.5305, "step": 4274 }, { "epoch": 2.077325227963526, "grad_norm": 0.07175009658076159, "learning_rate": 4.707222860572928e-06, "loss": 0.5136, "step": 4275 }, { "epoch": 2.0778115501519756, "grad_norm": 0.07114418055672524, "learning_rate": 4.705311496067016e-06, "loss": 0.5141, "step": 4276 }, { "epoch": 2.0782978723404257, "grad_norm": 0.07065371945223853, "learning_rate": 4.703400174773825e-06, "loss": 0.5085, "step": 4277 }, { "epoch": 2.0787841945288754, "grad_norm": 0.07275091785706649, "learning_rate": 4.701488896973633e-06, "loss": 0.5033, "step": 4278 }, { "epoch": 2.079270516717325, "grad_norm": 0.07149974274334023, "learning_rate": 4.6995776629467045e-06, "loss": 0.5219, "step": 4279 }, { "epoch": 2.0797568389057752, "grad_norm": 0.06835347572736793, "learning_rate": 4.6976664729733e-06, "loss": 0.5029, "step": 4280 }, { "epoch": 2.080243161094225, "grad_norm": 0.06915904097928988, "learning_rate": 4.695755327333673e-06, "loss": 0.5139, "step": 4281 }, { "epoch": 2.0807294832826746, "grad_norm": 0.07463886689914738, "learning_rate": 4.693844226308073e-06, "loss": 0.5522, "step": 4282 }, { "epoch": 2.0812158054711247, "grad_norm": 0.0722963202740917, "learning_rate": 4.691933170176741e-06, "loss": 0.5187, "step": 4283 }, { "epoch": 2.0817021276595744, "grad_norm": 0.07214746268838576, "learning_rate": 4.6900221592199105e-06, "loss": 0.5343, "step": 4284 }, { "epoch": 2.082188449848024, "grad_norm": 0.07260814640148158, "learning_rate": 4.68811119371781e-06, "loss": 0.5274, "step": 4285 }, { "epoch": 2.082674772036474, "grad_norm": 0.07157456990337037, "learning_rate": 4.686200273950662e-06, "loss": 0.5252, "step": 4286 }, { "epoch": 2.083161094224924, "grad_norm": 0.07055184941556789, "learning_rate": 4.684289400198682e-06, "loss": 0.5179, "step": 4287 }, { "epoch": 2.083647416413374, "grad_norm": 0.07009853751425962, "learning_rate": 4.682378572742074e-06, "loss": 0.5215, "step": 4288 }, { "epoch": 2.0841337386018237, "grad_norm": 0.07473808557838861, "learning_rate": 4.680467791861042e-06, "loss": 0.5105, "step": 4289 }, { "epoch": 2.0846200607902734, "grad_norm": 0.07159394012230107, "learning_rate": 4.67855705783578e-06, "loss": 0.5113, "step": 4290 }, { "epoch": 2.0851063829787235, "grad_norm": 0.07003680550608568, "learning_rate": 4.676646370946475e-06, "loss": 0.4907, "step": 4291 }, { "epoch": 2.085592705167173, "grad_norm": 0.07090815229239068, "learning_rate": 4.674735731473308e-06, "loss": 0.5267, "step": 4292 }, { "epoch": 2.086079027355623, "grad_norm": 0.07244282893226998, "learning_rate": 4.672825139696452e-06, "loss": 0.514, "step": 4293 }, { "epoch": 2.086565349544073, "grad_norm": 0.07012379184722464, "learning_rate": 4.670914595896075e-06, "loss": 0.5109, "step": 4294 }, { "epoch": 2.0870516717325227, "grad_norm": 0.07360448994801068, "learning_rate": 4.669004100352333e-06, "loss": 0.4967, "step": 4295 }, { "epoch": 2.087537993920973, "grad_norm": 0.07345894847607636, "learning_rate": 4.667093653345382e-06, "loss": 0.568, "step": 4296 }, { "epoch": 2.0880243161094225, "grad_norm": 0.0719953055546164, "learning_rate": 4.665183255155367e-06, "loss": 0.5517, "step": 4297 }, { "epoch": 2.088510638297872, "grad_norm": 0.07145027950921647, "learning_rate": 4.663272906062426e-06, "loss": 0.519, "step": 4298 }, { "epoch": 2.0889969604863223, "grad_norm": 0.07292007262090149, "learning_rate": 4.661362606346689e-06, "loss": 0.5412, "step": 4299 }, { "epoch": 2.089483282674772, "grad_norm": 0.06976454334601297, "learning_rate": 4.65945235628828e-06, "loss": 0.4879, "step": 4300 }, { "epoch": 2.0899696048632217, "grad_norm": 0.0708872059691606, "learning_rate": 4.657542156167316e-06, "loss": 0.5036, "step": 4301 }, { "epoch": 2.090455927051672, "grad_norm": 0.07277940726493234, "learning_rate": 4.655632006263907e-06, "loss": 0.5109, "step": 4302 }, { "epoch": 2.0909422492401215, "grad_norm": 0.07262977668497943, "learning_rate": 4.653721906858153e-06, "loss": 0.5727, "step": 4303 }, { "epoch": 2.0914285714285716, "grad_norm": 0.07464151898875335, "learning_rate": 4.651811858230149e-06, "loss": 0.5068, "step": 4304 }, { "epoch": 2.0919148936170213, "grad_norm": 0.07239360841205382, "learning_rate": 4.6499018606599815e-06, "loss": 0.547, "step": 4305 }, { "epoch": 2.092401215805471, "grad_norm": 0.07078991567607104, "learning_rate": 4.647991914427732e-06, "loss": 0.5422, "step": 4306 }, { "epoch": 2.092887537993921, "grad_norm": 0.07056129806041661, "learning_rate": 4.64608201981347e-06, "loss": 0.498, "step": 4307 }, { "epoch": 2.093373860182371, "grad_norm": 0.06966870092423305, "learning_rate": 4.644172177097259e-06, "loss": 0.5165, "step": 4308 }, { "epoch": 2.0938601823708205, "grad_norm": 0.07103894963685853, "learning_rate": 4.64226238655916e-06, "loss": 0.4946, "step": 4309 }, { "epoch": 2.0943465045592706, "grad_norm": 0.07365719921672839, "learning_rate": 4.640352648479219e-06, "loss": 0.564, "step": 4310 }, { "epoch": 2.0948328267477203, "grad_norm": 0.07441239044739215, "learning_rate": 4.638442963137478e-06, "loss": 0.5748, "step": 4311 }, { "epoch": 2.09531914893617, "grad_norm": 0.07244824047157294, "learning_rate": 4.636533330813971e-06, "loss": 0.5222, "step": 4312 }, { "epoch": 2.09580547112462, "grad_norm": 0.07166380134159551, "learning_rate": 4.6346237517887214e-06, "loss": 0.5098, "step": 4313 }, { "epoch": 2.09629179331307, "grad_norm": 0.07032576929905841, "learning_rate": 4.632714226341751e-06, "loss": 0.4956, "step": 4314 }, { "epoch": 2.09677811550152, "grad_norm": 0.07244751402274383, "learning_rate": 4.630804754753069e-06, "loss": 0.5207, "step": 4315 }, { "epoch": 2.0972644376899696, "grad_norm": 0.06921197183750415, "learning_rate": 4.628895337302676e-06, "loss": 0.4966, "step": 4316 }, { "epoch": 2.0977507598784193, "grad_norm": 0.07455758201185998, "learning_rate": 4.62698597427057e-06, "loss": 0.5158, "step": 4317 }, { "epoch": 2.0982370820668694, "grad_norm": 0.07075546064268393, "learning_rate": 4.625076665936733e-06, "loss": 0.4856, "step": 4318 }, { "epoch": 2.098723404255319, "grad_norm": 0.07475995164046163, "learning_rate": 4.623167412581147e-06, "loss": 0.5166, "step": 4319 }, { "epoch": 2.099209726443769, "grad_norm": 0.07230168705872128, "learning_rate": 4.621258214483779e-06, "loss": 0.5416, "step": 4320 }, { "epoch": 2.099696048632219, "grad_norm": 0.07280495227536604, "learning_rate": 4.619349071924594e-06, "loss": 0.5269, "step": 4321 }, { "epoch": 2.1001823708206686, "grad_norm": 0.07272142886384839, "learning_rate": 4.617439985183545e-06, "loss": 0.5566, "step": 4322 }, { "epoch": 2.1006686930091187, "grad_norm": 0.06986298285529687, "learning_rate": 4.615530954540578e-06, "loss": 0.515, "step": 4323 }, { "epoch": 2.1011550151975684, "grad_norm": 0.06904839372569721, "learning_rate": 4.6136219802756295e-06, "loss": 0.483, "step": 4324 }, { "epoch": 2.101641337386018, "grad_norm": 0.07023845723050942, "learning_rate": 4.6117130626686304e-06, "loss": 0.493, "step": 4325 }, { "epoch": 2.1021276595744682, "grad_norm": 0.07063847117540045, "learning_rate": 4.609804201999503e-06, "loss": 0.522, "step": 4326 }, { "epoch": 2.102613981762918, "grad_norm": 0.0714599395002521, "learning_rate": 4.6078953985481565e-06, "loss": 0.5031, "step": 4327 }, { "epoch": 2.1031003039513676, "grad_norm": 0.07290365972396484, "learning_rate": 4.6059866525944984e-06, "loss": 0.5264, "step": 4328 }, { "epoch": 2.1035866261398177, "grad_norm": 0.07368246307381719, "learning_rate": 4.604077964418422e-06, "loss": 0.5907, "step": 4329 }, { "epoch": 2.1040729483282674, "grad_norm": 0.06990178817338745, "learning_rate": 4.602169334299817e-06, "loss": 0.5231, "step": 4330 }, { "epoch": 2.1045592705167175, "grad_norm": 0.07145163700036854, "learning_rate": 4.60026076251856e-06, "loss": 0.5304, "step": 4331 }, { "epoch": 2.1050455927051672, "grad_norm": 0.07204756646226827, "learning_rate": 4.5983522493545246e-06, "loss": 0.5552, "step": 4332 }, { "epoch": 2.105531914893617, "grad_norm": 0.0705789472994367, "learning_rate": 4.59644379508757e-06, "loss": 0.5288, "step": 4333 }, { "epoch": 2.106018237082067, "grad_norm": 0.07242253113802473, "learning_rate": 4.594535399997551e-06, "loss": 0.5434, "step": 4334 }, { "epoch": 2.1065045592705167, "grad_norm": 0.07047178928556341, "learning_rate": 4.59262706436431e-06, "loss": 0.5093, "step": 4335 }, { "epoch": 2.1069908814589664, "grad_norm": 0.07459318432108243, "learning_rate": 4.590718788467685e-06, "loss": 0.5662, "step": 4336 }, { "epoch": 2.1074772036474165, "grad_norm": 0.07259322741165329, "learning_rate": 4.588810572587502e-06, "loss": 0.5233, "step": 4337 }, { "epoch": 2.107963525835866, "grad_norm": 0.07034984759649421, "learning_rate": 4.5869024170035786e-06, "loss": 0.523, "step": 4338 }, { "epoch": 2.108449848024316, "grad_norm": 0.0723900388291076, "learning_rate": 4.584994321995725e-06, "loss": 0.5572, "step": 4339 }, { "epoch": 2.108936170212766, "grad_norm": 0.07247297420638485, "learning_rate": 4.583086287843741e-06, "loss": 0.5627, "step": 4340 }, { "epoch": 2.1094224924012157, "grad_norm": 0.07349196046813768, "learning_rate": 4.58117831482742e-06, "loss": 0.5491, "step": 4341 }, { "epoch": 2.109908814589666, "grad_norm": 0.07250925681538714, "learning_rate": 4.579270403226542e-06, "loss": 0.521, "step": 4342 }, { "epoch": 2.1103951367781155, "grad_norm": 0.06974951948719156, "learning_rate": 4.577362553320882e-06, "loss": 0.482, "step": 4343 }, { "epoch": 2.110881458966565, "grad_norm": 0.07289419016946141, "learning_rate": 4.575454765390204e-06, "loss": 0.545, "step": 4344 }, { "epoch": 2.1113677811550153, "grad_norm": 0.07283152144900658, "learning_rate": 4.573547039714263e-06, "loss": 0.4963, "step": 4345 }, { "epoch": 2.111854103343465, "grad_norm": 0.07459158269856914, "learning_rate": 4.571639376572806e-06, "loss": 0.5551, "step": 4346 }, { "epoch": 2.1123404255319147, "grad_norm": 0.07127804473628366, "learning_rate": 4.569731776245571e-06, "loss": 0.5118, "step": 4347 }, { "epoch": 2.112826747720365, "grad_norm": 0.07176982435222327, "learning_rate": 4.567824239012284e-06, "loss": 0.4822, "step": 4348 }, { "epoch": 2.1133130699088145, "grad_norm": 0.07094660000979022, "learning_rate": 4.5659167651526645e-06, "loss": 0.5492, "step": 4349 }, { "epoch": 2.1137993920972646, "grad_norm": 0.07468500504931763, "learning_rate": 4.564009354946422e-06, "loss": 0.4999, "step": 4350 }, { "epoch": 2.1142857142857143, "grad_norm": 0.07096007843599139, "learning_rate": 4.562102008673258e-06, "loss": 0.4932, "step": 4351 }, { "epoch": 2.114772036474164, "grad_norm": 0.07472469352671478, "learning_rate": 4.56019472661286e-06, "loss": 0.5505, "step": 4352 }, { "epoch": 2.115258358662614, "grad_norm": 0.07229753615196546, "learning_rate": 4.558287509044913e-06, "loss": 0.5381, "step": 4353 }, { "epoch": 2.115744680851064, "grad_norm": 0.07050239474417577, "learning_rate": 4.556380356249086e-06, "loss": 0.5205, "step": 4354 }, { "epoch": 2.1162310030395135, "grad_norm": 0.07117464513021124, "learning_rate": 4.554473268505043e-06, "loss": 0.5029, "step": 4355 }, { "epoch": 2.1167173252279636, "grad_norm": 0.07075203879334808, "learning_rate": 4.552566246092434e-06, "loss": 0.5271, "step": 4356 }, { "epoch": 2.1172036474164133, "grad_norm": 0.07555866623262578, "learning_rate": 4.550659289290905e-06, "loss": 0.5174, "step": 4357 }, { "epoch": 2.1176899696048634, "grad_norm": 0.0723577624300876, "learning_rate": 4.548752398380088e-06, "loss": 0.5277, "step": 4358 }, { "epoch": 2.118176291793313, "grad_norm": 0.07390188067949988, "learning_rate": 4.546845573639609e-06, "loss": 0.5274, "step": 4359 }, { "epoch": 2.118662613981763, "grad_norm": 0.07405879124155919, "learning_rate": 4.544938815349079e-06, "loss": 0.5452, "step": 4360 }, { "epoch": 2.119148936170213, "grad_norm": 0.07467607547896579, "learning_rate": 4.543032123788105e-06, "loss": 0.5272, "step": 4361 }, { "epoch": 2.1196352583586626, "grad_norm": 0.073900252927743, "learning_rate": 4.541125499236281e-06, "loss": 0.5637, "step": 4362 }, { "epoch": 2.1201215805471123, "grad_norm": 0.07222066525962098, "learning_rate": 4.539218941973191e-06, "loss": 0.5379, "step": 4363 }, { "epoch": 2.1206079027355624, "grad_norm": 0.0714327005797944, "learning_rate": 4.537312452278412e-06, "loss": 0.523, "step": 4364 }, { "epoch": 2.121094224924012, "grad_norm": 0.07518860546404318, "learning_rate": 4.535406030431507e-06, "loss": 0.5221, "step": 4365 }, { "epoch": 2.121580547112462, "grad_norm": 0.07293100530194213, "learning_rate": 4.533499676712032e-06, "loss": 0.5008, "step": 4366 }, { "epoch": 2.122066869300912, "grad_norm": 0.07103974494309062, "learning_rate": 4.531593391399532e-06, "loss": 0.4809, "step": 4367 }, { "epoch": 2.1225531914893616, "grad_norm": 0.07299123816732794, "learning_rate": 4.5296871747735396e-06, "loss": 0.5408, "step": 4368 }, { "epoch": 2.1230395136778117, "grad_norm": 0.07312515277720558, "learning_rate": 4.527781027113584e-06, "loss": 0.5308, "step": 4369 }, { "epoch": 2.1235258358662614, "grad_norm": 0.07131281916667226, "learning_rate": 4.5258749486991794e-06, "loss": 0.5264, "step": 4370 }, { "epoch": 2.124012158054711, "grad_norm": 0.07351564221942003, "learning_rate": 4.523968939809829e-06, "loss": 0.562, "step": 4371 }, { "epoch": 2.1244984802431612, "grad_norm": 0.0711517874828819, "learning_rate": 4.522063000725028e-06, "loss": 0.4869, "step": 4372 }, { "epoch": 2.124984802431611, "grad_norm": 0.07363116551111525, "learning_rate": 4.52015713172426e-06, "loss": 0.5528, "step": 4373 }, { "epoch": 2.1254711246200606, "grad_norm": 0.07261106618957541, "learning_rate": 4.5182513330869996e-06, "loss": 0.582, "step": 4374 }, { "epoch": 2.1259574468085107, "grad_norm": 0.07012232020803523, "learning_rate": 4.516345605092712e-06, "loss": 0.503, "step": 4375 }, { "epoch": 2.1264437689969604, "grad_norm": 0.07423309762826079, "learning_rate": 4.514439948020847e-06, "loss": 0.5412, "step": 4376 }, { "epoch": 2.12693009118541, "grad_norm": 0.06996702153876222, "learning_rate": 4.512534362150851e-06, "loss": 0.5273, "step": 4377 }, { "epoch": 2.1274164133738602, "grad_norm": 0.07004293397103051, "learning_rate": 4.510628847762155e-06, "loss": 0.4796, "step": 4378 }, { "epoch": 2.12790273556231, "grad_norm": 0.10364937852536732, "learning_rate": 4.5087234051341825e-06, "loss": 0.5239, "step": 4379 }, { "epoch": 2.12838905775076, "grad_norm": 0.07153957074665195, "learning_rate": 4.506818034546343e-06, "loss": 0.5366, "step": 4380 }, { "epoch": 2.1288753799392097, "grad_norm": 0.07267839149239437, "learning_rate": 4.504912736278038e-06, "loss": 0.5012, "step": 4381 }, { "epoch": 2.1293617021276594, "grad_norm": 0.07203663993755269, "learning_rate": 4.503007510608657e-06, "loss": 0.5335, "step": 4382 }, { "epoch": 2.1298480243161095, "grad_norm": 0.0715465433743639, "learning_rate": 4.501102357817582e-06, "loss": 0.5115, "step": 4383 }, { "epoch": 2.130334346504559, "grad_norm": 0.07195440043584993, "learning_rate": 4.499197278184181e-06, "loss": 0.5112, "step": 4384 }, { "epoch": 2.1308206686930093, "grad_norm": 0.07513808040327515, "learning_rate": 4.497292271987812e-06, "loss": 0.505, "step": 4385 }, { "epoch": 2.131306990881459, "grad_norm": 0.07168007140474772, "learning_rate": 4.495387339507822e-06, "loss": 0.5376, "step": 4386 }, { "epoch": 2.1317933130699087, "grad_norm": 0.07358464449910782, "learning_rate": 4.493482481023549e-06, "loss": 0.5986, "step": 4387 }, { "epoch": 2.132279635258359, "grad_norm": 0.07045364757161546, "learning_rate": 4.491577696814318e-06, "loss": 0.5577, "step": 4388 }, { "epoch": 2.1327659574468085, "grad_norm": 0.07397463312377561, "learning_rate": 4.4896729871594446e-06, "loss": 0.5283, "step": 4389 }, { "epoch": 2.133252279635258, "grad_norm": 0.07356866875885495, "learning_rate": 4.487768352338232e-06, "loss": 0.5568, "step": 4390 }, { "epoch": 2.1337386018237083, "grad_norm": 0.0732072980689226, "learning_rate": 4.4858637926299745e-06, "loss": 0.5648, "step": 4391 }, { "epoch": 2.134224924012158, "grad_norm": 0.07065829230946877, "learning_rate": 4.4839593083139536e-06, "loss": 0.4997, "step": 4392 }, { "epoch": 2.1347112462006077, "grad_norm": 0.0705261761238876, "learning_rate": 4.482054899669439e-06, "loss": 0.5296, "step": 4393 }, { "epoch": 2.135197568389058, "grad_norm": 0.07140251558208224, "learning_rate": 4.480150566975693e-06, "loss": 0.536, "step": 4394 }, { "epoch": 2.1356838905775075, "grad_norm": 0.07393775448589415, "learning_rate": 4.478246310511963e-06, "loss": 0.5339, "step": 4395 }, { "epoch": 2.1361702127659576, "grad_norm": 0.07167531424173072, "learning_rate": 4.476342130557486e-06, "loss": 0.5686, "step": 4396 }, { "epoch": 2.1366565349544073, "grad_norm": 0.07158235561773914, "learning_rate": 4.474438027391489e-06, "loss": 0.5183, "step": 4397 }, { "epoch": 2.137142857142857, "grad_norm": 0.0706203738598817, "learning_rate": 4.472534001293187e-06, "loss": 0.5242, "step": 4398 }, { "epoch": 2.137629179331307, "grad_norm": 0.07585642372597831, "learning_rate": 4.4706300525417845e-06, "loss": 0.5496, "step": 4399 }, { "epoch": 2.138115501519757, "grad_norm": 0.07214989660051936, "learning_rate": 4.468726181416473e-06, "loss": 0.5296, "step": 4400 }, { "epoch": 2.1386018237082065, "grad_norm": 0.07326627443925492, "learning_rate": 4.466822388196434e-06, "loss": 0.5733, "step": 4401 }, { "epoch": 2.1390881458966566, "grad_norm": 0.07068439489348208, "learning_rate": 4.464918673160837e-06, "loss": 0.5364, "step": 4402 }, { "epoch": 2.1395744680851063, "grad_norm": 0.07027562850145501, "learning_rate": 4.463015036588841e-06, "loss": 0.522, "step": 4403 }, { "epoch": 2.140060790273556, "grad_norm": 0.07164970636775356, "learning_rate": 4.46111147875959e-06, "loss": 0.5092, "step": 4404 }, { "epoch": 2.140547112462006, "grad_norm": 0.07040889695873687, "learning_rate": 4.459207999952223e-06, "loss": 0.4772, "step": 4405 }, { "epoch": 2.141033434650456, "grad_norm": 0.07402437125040097, "learning_rate": 4.457304600445861e-06, "loss": 0.5818, "step": 4406 }, { "epoch": 2.141519756838906, "grad_norm": 0.07055123213084917, "learning_rate": 4.455401280519617e-06, "loss": 0.5225, "step": 4407 }, { "epoch": 2.1420060790273556, "grad_norm": 0.07274622696245434, "learning_rate": 4.45349804045259e-06, "loss": 0.5227, "step": 4408 }, { "epoch": 2.1424924012158053, "grad_norm": 0.07233454570922326, "learning_rate": 4.451594880523872e-06, "loss": 0.5482, "step": 4409 }, { "epoch": 2.1429787234042554, "grad_norm": 0.07079664235469232, "learning_rate": 4.449691801012535e-06, "loss": 0.487, "step": 4410 }, { "epoch": 2.143465045592705, "grad_norm": 0.07145382426144663, "learning_rate": 4.447788802197647e-06, "loss": 0.5194, "step": 4411 }, { "epoch": 2.1439513677811552, "grad_norm": 0.070013756924666, "learning_rate": 4.44588588435826e-06, "loss": 0.5288, "step": 4412 }, { "epoch": 2.144437689969605, "grad_norm": 0.07459799994464224, "learning_rate": 4.443983047773417e-06, "loss": 0.5999, "step": 4413 }, { "epoch": 2.1449240121580546, "grad_norm": 0.07096886542848622, "learning_rate": 4.442080292722144e-06, "loss": 0.5415, "step": 4414 }, { "epoch": 2.1454103343465047, "grad_norm": 0.07258522929978131, "learning_rate": 4.4401776194834615e-06, "loss": 0.5443, "step": 4415 }, { "epoch": 2.1458966565349544, "grad_norm": 0.07216420978459806, "learning_rate": 4.438275028336374e-06, "loss": 0.5068, "step": 4416 }, { "epoch": 2.146382978723404, "grad_norm": 0.07258998264618445, "learning_rate": 4.436372519559874e-06, "loss": 0.5189, "step": 4417 }, { "epoch": 2.1468693009118542, "grad_norm": 0.07184244385273168, "learning_rate": 4.434470093432945e-06, "loss": 0.5464, "step": 4418 }, { "epoch": 2.147355623100304, "grad_norm": 0.07152660167371257, "learning_rate": 4.432567750234554e-06, "loss": 0.5316, "step": 4419 }, { "epoch": 2.1478419452887536, "grad_norm": 0.07649460446870177, "learning_rate": 4.430665490243659e-06, "loss": 0.5276, "step": 4420 }, { "epoch": 2.1483282674772037, "grad_norm": 0.07127005041148703, "learning_rate": 4.428763313739204e-06, "loss": 0.5354, "step": 4421 }, { "epoch": 2.1488145896656534, "grad_norm": 0.07296418973582286, "learning_rate": 4.426861221000121e-06, "loss": 0.5471, "step": 4422 }, { "epoch": 2.1493009118541035, "grad_norm": 0.07097311914711622, "learning_rate": 4.424959212305334e-06, "loss": 0.4923, "step": 4423 }, { "epoch": 2.1497872340425532, "grad_norm": 0.07275208094053433, "learning_rate": 4.423057287933748e-06, "loss": 0.5256, "step": 4424 }, { "epoch": 2.150273556231003, "grad_norm": 0.07289688301809912, "learning_rate": 4.421155448164258e-06, "loss": 0.5334, "step": 4425 }, { "epoch": 2.150759878419453, "grad_norm": 0.07219912078647304, "learning_rate": 4.419253693275749e-06, "loss": 0.5093, "step": 4426 }, { "epoch": 2.1512462006079027, "grad_norm": 0.0725635644775168, "learning_rate": 4.4173520235470905e-06, "loss": 0.5058, "step": 4427 }, { "epoch": 2.1517325227963524, "grad_norm": 0.0753176353347214, "learning_rate": 4.415450439257142e-06, "loss": 0.5717, "step": 4428 }, { "epoch": 2.1522188449848025, "grad_norm": 0.06971112714686117, "learning_rate": 4.4135489406847485e-06, "loss": 0.505, "step": 4429 }, { "epoch": 2.152705167173252, "grad_norm": 0.07338960859625769, "learning_rate": 4.411647528108744e-06, "loss": 0.5157, "step": 4430 }, { "epoch": 2.153191489361702, "grad_norm": 0.07650207286913285, "learning_rate": 4.409746201807947e-06, "loss": 0.5286, "step": 4431 }, { "epoch": 2.153677811550152, "grad_norm": 0.06991520097871859, "learning_rate": 4.4078449620611674e-06, "loss": 0.5185, "step": 4432 }, { "epoch": 2.1541641337386017, "grad_norm": 0.07123275782083067, "learning_rate": 4.4059438091472e-06, "loss": 0.5328, "step": 4433 }, { "epoch": 2.154650455927052, "grad_norm": 0.07502707815545193, "learning_rate": 4.404042743344827e-06, "loss": 0.5355, "step": 4434 }, { "epoch": 2.1551367781155015, "grad_norm": 0.0717210084395386, "learning_rate": 4.402141764932818e-06, "loss": 0.5377, "step": 4435 }, { "epoch": 2.155623100303951, "grad_norm": 0.0699130628287047, "learning_rate": 4.40024087418993e-06, "loss": 0.4834, "step": 4436 }, { "epoch": 2.1561094224924013, "grad_norm": 0.07032858642481528, "learning_rate": 4.398340071394906e-06, "loss": 0.4989, "step": 4437 }, { "epoch": 2.156595744680851, "grad_norm": 0.07335052042640616, "learning_rate": 4.39643935682648e-06, "loss": 0.5305, "step": 4438 }, { "epoch": 2.1570820668693007, "grad_norm": 0.07612970735097073, "learning_rate": 4.394538730763368e-06, "loss": 0.5405, "step": 4439 }, { "epoch": 2.157568389057751, "grad_norm": 0.07165852630690416, "learning_rate": 4.392638193484274e-06, "loss": 0.5501, "step": 4440 }, { "epoch": 2.1580547112462005, "grad_norm": 0.07224930588412967, "learning_rate": 4.390737745267893e-06, "loss": 0.5234, "step": 4441 }, { "epoch": 2.1585410334346506, "grad_norm": 0.07496450242407453, "learning_rate": 4.388837386392903e-06, "loss": 0.5161, "step": 4442 }, { "epoch": 2.1590273556231003, "grad_norm": 0.07262170312440885, "learning_rate": 4.38693711713797e-06, "loss": 0.5339, "step": 4443 }, { "epoch": 2.15951367781155, "grad_norm": 0.07470624822842568, "learning_rate": 4.385036937781747e-06, "loss": 0.5471, "step": 4444 }, { "epoch": 2.16, "grad_norm": 0.0737252417065575, "learning_rate": 4.383136848602874e-06, "loss": 0.5548, "step": 4445 }, { "epoch": 2.16048632218845, "grad_norm": 0.07493011443252882, "learning_rate": 4.381236849879977e-06, "loss": 0.5608, "step": 4446 }, { "epoch": 2.1609726443768995, "grad_norm": 0.07173763277960096, "learning_rate": 4.3793369418916705e-06, "loss": 0.5073, "step": 4447 }, { "epoch": 2.1614589665653496, "grad_norm": 0.0725532176251541, "learning_rate": 4.3774371249165525e-06, "loss": 0.5494, "step": 4448 }, { "epoch": 2.1619452887537993, "grad_norm": 0.06817887560450904, "learning_rate": 4.375537399233211e-06, "loss": 0.5148, "step": 4449 }, { "epoch": 2.1624316109422494, "grad_norm": 0.07149176284578583, "learning_rate": 4.373637765120218e-06, "loss": 0.538, "step": 4450 }, { "epoch": 2.162917933130699, "grad_norm": 0.06933298478705206, "learning_rate": 4.371738222856134e-06, "loss": 0.4891, "step": 4451 }, { "epoch": 2.163404255319149, "grad_norm": 0.07537965341211422, "learning_rate": 4.369838772719505e-06, "loss": 0.5396, "step": 4452 }, { "epoch": 2.163890577507599, "grad_norm": 0.07489509692036718, "learning_rate": 4.3679394149888646e-06, "loss": 0.5409, "step": 4453 }, { "epoch": 2.1643768996960486, "grad_norm": 0.07225314716427281, "learning_rate": 4.366040149942731e-06, "loss": 0.5256, "step": 4454 }, { "epoch": 2.1648632218844983, "grad_norm": 0.06921989961719661, "learning_rate": 4.36414097785961e-06, "loss": 0.5053, "step": 4455 }, { "epoch": 2.1653495440729484, "grad_norm": 0.06957765254772537, "learning_rate": 4.362241899017995e-06, "loss": 0.5076, "step": 4456 }, { "epoch": 2.165835866261398, "grad_norm": 0.07317471277420289, "learning_rate": 4.360342913696363e-06, "loss": 0.5389, "step": 4457 }, { "epoch": 2.166322188449848, "grad_norm": 0.07111617999675747, "learning_rate": 4.358444022173177e-06, "loss": 0.5108, "step": 4458 }, { "epoch": 2.166808510638298, "grad_norm": 0.0705933684205117, "learning_rate": 4.356545224726891e-06, "loss": 0.5002, "step": 4459 }, { "epoch": 2.1672948328267476, "grad_norm": 0.07283998229835192, "learning_rate": 4.354646521635942e-06, "loss": 0.5008, "step": 4460 }, { "epoch": 2.1677811550151977, "grad_norm": 0.07281584095044312, "learning_rate": 4.3527479131787505e-06, "loss": 0.5423, "step": 4461 }, { "epoch": 2.1682674772036474, "grad_norm": 0.07098384695177379, "learning_rate": 4.35084939963373e-06, "loss": 0.5157, "step": 4462 }, { "epoch": 2.168753799392097, "grad_norm": 0.07229381706846445, "learning_rate": 4.348950981279271e-06, "loss": 0.5205, "step": 4463 }, { "epoch": 2.1692401215805472, "grad_norm": 0.0727448953839507, "learning_rate": 4.347052658393759e-06, "loss": 0.5266, "step": 4464 }, { "epoch": 2.169726443768997, "grad_norm": 0.0747763508564579, "learning_rate": 4.345154431255559e-06, "loss": 0.5466, "step": 4465 }, { "epoch": 2.1702127659574466, "grad_norm": 0.06954406846660843, "learning_rate": 4.343256300143026e-06, "loss": 0.5201, "step": 4466 }, { "epoch": 2.1706990881458967, "grad_norm": 0.0756097044811774, "learning_rate": 4.341358265334498e-06, "loss": 0.5266, "step": 4467 }, { "epoch": 2.1711854103343464, "grad_norm": 0.0712351094386963, "learning_rate": 4.339460327108301e-06, "loss": 0.5066, "step": 4468 }, { "epoch": 2.1716717325227965, "grad_norm": 0.07048703296532557, "learning_rate": 4.337562485742747e-06, "loss": 0.5175, "step": 4469 }, { "epoch": 2.1721580547112462, "grad_norm": 0.07233271147362716, "learning_rate": 4.335664741516132e-06, "loss": 0.5179, "step": 4470 }, { "epoch": 2.172644376899696, "grad_norm": 0.07672380531710715, "learning_rate": 4.333767094706738e-06, "loss": 0.5466, "step": 4471 }, { "epoch": 2.173130699088146, "grad_norm": 0.07852345841998083, "learning_rate": 4.331869545592834e-06, "loss": 0.5534, "step": 4472 }, { "epoch": 2.1736170212765957, "grad_norm": 0.0722184845292258, "learning_rate": 4.3299720944526746e-06, "loss": 0.5549, "step": 4473 }, { "epoch": 2.1741033434650454, "grad_norm": 0.07130650521018668, "learning_rate": 4.328074741564498e-06, "loss": 0.51, "step": 4474 }, { "epoch": 2.1745896656534955, "grad_norm": 0.07387767633058749, "learning_rate": 4.326177487206531e-06, "loss": 0.5224, "step": 4475 }, { "epoch": 2.1750759878419452, "grad_norm": 0.07595354066387881, "learning_rate": 4.324280331656982e-06, "loss": 0.5559, "step": 4476 }, { "epoch": 2.1755623100303954, "grad_norm": 0.0732706271764856, "learning_rate": 4.322383275194051e-06, "loss": 0.5161, "step": 4477 }, { "epoch": 2.176048632218845, "grad_norm": 0.07536805327615512, "learning_rate": 4.320486318095917e-06, "loss": 0.5464, "step": 4478 }, { "epoch": 2.1765349544072947, "grad_norm": 0.06948896974377608, "learning_rate": 4.318589460640748e-06, "loss": 0.4822, "step": 4479 }, { "epoch": 2.177021276595745, "grad_norm": 0.0736711355302051, "learning_rate": 4.316692703106698e-06, "loss": 0.5455, "step": 4480 }, { "epoch": 2.1775075987841945, "grad_norm": 0.07227677761420534, "learning_rate": 4.3147960457719025e-06, "loss": 0.5263, "step": 4481 }, { "epoch": 2.177993920972644, "grad_norm": 0.0676000024639692, "learning_rate": 4.312899488914486e-06, "loss": 0.4812, "step": 4482 }, { "epoch": 2.1784802431610943, "grad_norm": 0.07171891389812027, "learning_rate": 4.311003032812558e-06, "loss": 0.5489, "step": 4483 }, { "epoch": 2.178966565349544, "grad_norm": 0.07074825308105669, "learning_rate": 4.3091066777442094e-06, "loss": 0.5247, "step": 4484 }, { "epoch": 2.1794528875379937, "grad_norm": 0.07216165209108397, "learning_rate": 4.307210423987522e-06, "loss": 0.5038, "step": 4485 }, { "epoch": 2.179939209726444, "grad_norm": 0.07405024135841418, "learning_rate": 4.30531427182056e-06, "loss": 0.5374, "step": 4486 }, { "epoch": 2.1804255319148935, "grad_norm": 0.07175393728509953, "learning_rate": 4.303418221521369e-06, "loss": 0.5396, "step": 4487 }, { "epoch": 2.1809118541033437, "grad_norm": 0.0746652818866144, "learning_rate": 4.301522273367986e-06, "loss": 0.558, "step": 4488 }, { "epoch": 2.1813981762917933, "grad_norm": 0.0730468957005626, "learning_rate": 4.2996264276384305e-06, "loss": 0.5148, "step": 4489 }, { "epoch": 2.181884498480243, "grad_norm": 0.07543224255611654, "learning_rate": 4.297730684610706e-06, "loss": 0.532, "step": 4490 }, { "epoch": 2.182370820668693, "grad_norm": 0.06991979796767724, "learning_rate": 4.295835044562802e-06, "loss": 0.5055, "step": 4491 }, { "epoch": 2.182857142857143, "grad_norm": 0.07445756278160635, "learning_rate": 4.293939507772692e-06, "loss": 0.5532, "step": 4492 }, { "epoch": 2.1833434650455925, "grad_norm": 0.07412206940131677, "learning_rate": 4.292044074518335e-06, "loss": 0.5067, "step": 4493 }, { "epoch": 2.1838297872340426, "grad_norm": 0.07235603166218797, "learning_rate": 4.290148745077675e-06, "loss": 0.524, "step": 4494 }, { "epoch": 2.1843161094224923, "grad_norm": 0.07092968594100339, "learning_rate": 4.28825351972864e-06, "loss": 0.5041, "step": 4495 }, { "epoch": 2.1848024316109425, "grad_norm": 0.0749341346831391, "learning_rate": 4.286358398749146e-06, "loss": 0.5407, "step": 4496 }, { "epoch": 2.185288753799392, "grad_norm": 0.07214373683180679, "learning_rate": 4.284463382417088e-06, "loss": 0.5546, "step": 4497 }, { "epoch": 2.185775075987842, "grad_norm": 0.07444150224827252, "learning_rate": 4.282568471010349e-06, "loss": 0.5702, "step": 4498 }, { "epoch": 2.186261398176292, "grad_norm": 0.07066189503164857, "learning_rate": 4.280673664806798e-06, "loss": 0.4866, "step": 4499 }, { "epoch": 2.1867477203647416, "grad_norm": 0.07028448033069667, "learning_rate": 4.278778964084284e-06, "loss": 0.5137, "step": 4500 }, { "epoch": 2.1872340425531913, "grad_norm": 0.06927337705382537, "learning_rate": 4.276884369120647e-06, "loss": 0.4922, "step": 4501 }, { "epoch": 2.1877203647416414, "grad_norm": 0.07389348298428064, "learning_rate": 4.274989880193705e-06, "loss": 0.5232, "step": 4502 }, { "epoch": 2.188206686930091, "grad_norm": 0.07310086878402257, "learning_rate": 4.273095497581263e-06, "loss": 0.519, "step": 4503 }, { "epoch": 2.1886930091185413, "grad_norm": 0.07009569188352663, "learning_rate": 4.271201221561112e-06, "loss": 0.5057, "step": 4504 }, { "epoch": 2.189179331306991, "grad_norm": 0.07508730736469998, "learning_rate": 4.269307052411026e-06, "loss": 0.5522, "step": 4505 }, { "epoch": 2.1896656534954406, "grad_norm": 0.0725965306677151, "learning_rate": 4.267412990408764e-06, "loss": 0.526, "step": 4506 }, { "epoch": 2.1901519756838908, "grad_norm": 0.06951893872984645, "learning_rate": 4.2655190358320665e-06, "loss": 0.5245, "step": 4507 }, { "epoch": 2.1906382978723404, "grad_norm": 0.0700318474514735, "learning_rate": 4.263625188958662e-06, "loss": 0.538, "step": 4508 }, { "epoch": 2.19112462006079, "grad_norm": 0.0712692185200809, "learning_rate": 4.261731450066262e-06, "loss": 0.5149, "step": 4509 }, { "epoch": 2.1916109422492402, "grad_norm": 0.07205495500666896, "learning_rate": 4.259837819432562e-06, "loss": 0.5125, "step": 4510 }, { "epoch": 2.19209726443769, "grad_norm": 0.07417334975079336, "learning_rate": 4.25794429733524e-06, "loss": 0.5368, "step": 4511 }, { "epoch": 2.1925835866261396, "grad_norm": 0.07284175709859952, "learning_rate": 4.2560508840519595e-06, "loss": 0.5086, "step": 4512 }, { "epoch": 2.1930699088145897, "grad_norm": 0.07272175185268577, "learning_rate": 4.254157579860367e-06, "loss": 0.5743, "step": 4513 }, { "epoch": 2.1935562310030394, "grad_norm": 0.07210804493097787, "learning_rate": 4.2522643850380985e-06, "loss": 0.5117, "step": 4514 }, { "epoch": 2.1940425531914896, "grad_norm": 0.07198219782177015, "learning_rate": 4.250371299862768e-06, "loss": 0.5131, "step": 4515 }, { "epoch": 2.1945288753799392, "grad_norm": 0.07306219573299305, "learning_rate": 4.248478324611972e-06, "loss": 0.5521, "step": 4516 }, { "epoch": 2.195015197568389, "grad_norm": 0.07190504043703015, "learning_rate": 4.2465854595632956e-06, "loss": 0.5295, "step": 4517 }, { "epoch": 2.195501519756839, "grad_norm": 0.06900639720632466, "learning_rate": 4.244692704994306e-06, "loss": 0.5215, "step": 4518 }, { "epoch": 2.1959878419452887, "grad_norm": 0.07320857810557652, "learning_rate": 4.242800061182555e-06, "loss": 0.5362, "step": 4519 }, { "epoch": 2.1964741641337384, "grad_norm": 0.07590987873967169, "learning_rate": 4.240907528405574e-06, "loss": 0.5649, "step": 4520 }, { "epoch": 2.1969604863221885, "grad_norm": 0.07043953599485209, "learning_rate": 4.239015106940887e-06, "loss": 0.5162, "step": 4521 }, { "epoch": 2.1974468085106382, "grad_norm": 0.07603876241019514, "learning_rate": 4.2371227970659916e-06, "loss": 0.5217, "step": 4522 }, { "epoch": 2.197933130699088, "grad_norm": 0.07226567672526107, "learning_rate": 4.235230599058374e-06, "loss": 0.5135, "step": 4523 }, { "epoch": 2.198419452887538, "grad_norm": 0.07378502313590074, "learning_rate": 4.233338513195505e-06, "loss": 0.5559, "step": 4524 }, { "epoch": 2.1989057750759877, "grad_norm": 0.07001480243661323, "learning_rate": 4.2314465397548395e-06, "loss": 0.5454, "step": 4525 }, { "epoch": 2.199392097264438, "grad_norm": 0.07261870597075992, "learning_rate": 4.229554679013809e-06, "loss": 0.5103, "step": 4526 }, { "epoch": 2.1998784194528875, "grad_norm": 0.07381774804999802, "learning_rate": 4.227662931249837e-06, "loss": 0.5452, "step": 4527 }, { "epoch": 2.200364741641337, "grad_norm": 0.07242801873466624, "learning_rate": 4.225771296740325e-06, "loss": 0.5081, "step": 4528 }, { "epoch": 2.2008510638297873, "grad_norm": 0.07200057722862975, "learning_rate": 4.2238797757626595e-06, "loss": 0.531, "step": 4529 }, { "epoch": 2.201337386018237, "grad_norm": 0.07131311868598234, "learning_rate": 4.221988368594213e-06, "loss": 0.5305, "step": 4530 }, { "epoch": 2.201823708206687, "grad_norm": 0.07014780105050691, "learning_rate": 4.220097075512335e-06, "loss": 0.525, "step": 4531 }, { "epoch": 2.202310030395137, "grad_norm": 0.07362033174690961, "learning_rate": 4.218205896794366e-06, "loss": 0.5449, "step": 4532 }, { "epoch": 2.2027963525835865, "grad_norm": 0.077389770039334, "learning_rate": 4.216314832717625e-06, "loss": 0.5447, "step": 4533 }, { "epoch": 2.2032826747720367, "grad_norm": 0.07485067164681249, "learning_rate": 4.214423883559414e-06, "loss": 0.5689, "step": 4534 }, { "epoch": 2.2037689969604863, "grad_norm": 0.06938596053126789, "learning_rate": 4.21253304959702e-06, "loss": 0.5205, "step": 4535 }, { "epoch": 2.204255319148936, "grad_norm": 0.0725739562019719, "learning_rate": 4.210642331107711e-06, "loss": 0.5353, "step": 4536 }, { "epoch": 2.204741641337386, "grad_norm": 0.07115611396586252, "learning_rate": 4.208751728368741e-06, "loss": 0.4929, "step": 4537 }, { "epoch": 2.205227963525836, "grad_norm": 0.0735740345041347, "learning_rate": 4.206861241657345e-06, "loss": 0.5124, "step": 4538 }, { "epoch": 2.2057142857142855, "grad_norm": 0.07352300880891982, "learning_rate": 4.204970871250741e-06, "loss": 0.5252, "step": 4539 }, { "epoch": 2.2062006079027356, "grad_norm": 0.07403477627573103, "learning_rate": 4.203080617426131e-06, "loss": 0.5639, "step": 4540 }, { "epoch": 2.2066869300911853, "grad_norm": 0.07080002310921088, "learning_rate": 4.201190480460699e-06, "loss": 0.5282, "step": 4541 }, { "epoch": 2.2071732522796355, "grad_norm": 0.07274715488876342, "learning_rate": 4.1993004606316114e-06, "loss": 0.5215, "step": 4542 }, { "epoch": 2.207659574468085, "grad_norm": 0.07349389107660528, "learning_rate": 4.197410558216018e-06, "loss": 0.5517, "step": 4543 }, { "epoch": 2.208145896656535, "grad_norm": 0.07512923526697164, "learning_rate": 4.1955207734910536e-06, "loss": 0.532, "step": 4544 }, { "epoch": 2.208632218844985, "grad_norm": 0.07047559321781026, "learning_rate": 4.193631106733831e-06, "loss": 0.4959, "step": 4545 }, { "epoch": 2.2091185410334346, "grad_norm": 0.0716291272792849, "learning_rate": 4.191741558221451e-06, "loss": 0.5242, "step": 4546 }, { "epoch": 2.2096048632218843, "grad_norm": 0.0749361291341572, "learning_rate": 4.189852128230992e-06, "loss": 0.5773, "step": 4547 }, { "epoch": 2.2100911854103344, "grad_norm": 0.07238070330465997, "learning_rate": 4.187962817039519e-06, "loss": 0.5127, "step": 4548 }, { "epoch": 2.210577507598784, "grad_norm": 0.07097999202201388, "learning_rate": 4.186073624924077e-06, "loss": 0.56, "step": 4549 }, { "epoch": 2.211063829787234, "grad_norm": 0.07368278475409375, "learning_rate": 4.184184552161696e-06, "loss": 0.5271, "step": 4550 }, { "epoch": 2.211550151975684, "grad_norm": 0.07285182586110941, "learning_rate": 4.182295599029386e-06, "loss": 0.545, "step": 4551 }, { "epoch": 2.2120364741641336, "grad_norm": 0.07230305585935967, "learning_rate": 4.180406765804141e-06, "loss": 0.5506, "step": 4552 }, { "epoch": 2.2125227963525838, "grad_norm": 0.07287189587695644, "learning_rate": 4.178518052762935e-06, "loss": 0.5409, "step": 4553 }, { "epoch": 2.2130091185410334, "grad_norm": 0.07359170055027614, "learning_rate": 4.176629460182731e-06, "loss": 0.5092, "step": 4554 }, { "epoch": 2.213495440729483, "grad_norm": 0.0786894117608461, "learning_rate": 4.174740988340465e-06, "loss": 0.522, "step": 4555 }, { "epoch": 2.2139817629179332, "grad_norm": 0.07303618465465561, "learning_rate": 4.172852637513062e-06, "loss": 0.5318, "step": 4556 }, { "epoch": 2.214468085106383, "grad_norm": 0.0712247643033999, "learning_rate": 4.170964407977426e-06, "loss": 0.5217, "step": 4557 }, { "epoch": 2.214954407294833, "grad_norm": 0.06723044216202748, "learning_rate": 4.169076300010446e-06, "loss": 0.4926, "step": 4558 }, { "epoch": 2.2154407294832827, "grad_norm": 0.0711516408446843, "learning_rate": 4.167188313888991e-06, "loss": 0.4711, "step": 4559 }, { "epoch": 2.2159270516717324, "grad_norm": 0.07169142713748168, "learning_rate": 4.165300449889912e-06, "loss": 0.5289, "step": 4560 }, { "epoch": 2.2164133738601826, "grad_norm": 0.07209325629063798, "learning_rate": 4.163412708290043e-06, "loss": 0.5193, "step": 4561 }, { "epoch": 2.2168996960486322, "grad_norm": 0.07691351297728855, "learning_rate": 4.161525089366201e-06, "loss": 0.5346, "step": 4562 }, { "epoch": 2.217386018237082, "grad_norm": 0.07129482418946885, "learning_rate": 4.1596375933951835e-06, "loss": 0.5065, "step": 4563 }, { "epoch": 2.217872340425532, "grad_norm": 0.07133699367081238, "learning_rate": 4.15775022065377e-06, "loss": 0.5284, "step": 4564 }, { "epoch": 2.2183586626139817, "grad_norm": 0.07915723191085236, "learning_rate": 4.155862971418721e-06, "loss": 0.5387, "step": 4565 }, { "epoch": 2.2188449848024314, "grad_norm": 0.07392118929781695, "learning_rate": 4.153975845966783e-06, "loss": 0.5827, "step": 4566 }, { "epoch": 2.2193313069908815, "grad_norm": 0.07292366089529126, "learning_rate": 4.1520888445746765e-06, "loss": 0.5229, "step": 4567 }, { "epoch": 2.2198176291793312, "grad_norm": 0.07253272769929474, "learning_rate": 4.150201967519115e-06, "loss": 0.533, "step": 4568 }, { "epoch": 2.2203039513677814, "grad_norm": 0.07275933252685185, "learning_rate": 4.148315215076786e-06, "loss": 0.5241, "step": 4569 }, { "epoch": 2.220790273556231, "grad_norm": 0.07183404921358959, "learning_rate": 4.146428587524358e-06, "loss": 0.5422, "step": 4570 }, { "epoch": 2.2212765957446807, "grad_norm": 0.07065214255551733, "learning_rate": 4.144542085138484e-06, "loss": 0.5158, "step": 4571 }, { "epoch": 2.221762917933131, "grad_norm": 0.07374647611575025, "learning_rate": 4.142655708195799e-06, "loss": 0.5002, "step": 4572 }, { "epoch": 2.2222492401215805, "grad_norm": 0.07055733762349352, "learning_rate": 4.140769456972919e-06, "loss": 0.5021, "step": 4573 }, { "epoch": 2.22273556231003, "grad_norm": 0.07314155481825418, "learning_rate": 4.138883331746442e-06, "loss": 0.5453, "step": 4574 }, { "epoch": 2.2232218844984803, "grad_norm": 0.06835525333484847, "learning_rate": 4.136997332792944e-06, "loss": 0.4826, "step": 4575 }, { "epoch": 2.22370820668693, "grad_norm": 0.07444367651981872, "learning_rate": 4.135111460388989e-06, "loss": 0.5235, "step": 4576 }, { "epoch": 2.2241945288753797, "grad_norm": 0.07637947583636884, "learning_rate": 4.133225714811115e-06, "loss": 0.534, "step": 4577 }, { "epoch": 2.22468085106383, "grad_norm": 0.07035935307584323, "learning_rate": 4.131340096335849e-06, "loss": 0.5161, "step": 4578 }, { "epoch": 2.2251671732522795, "grad_norm": 0.06844843561417313, "learning_rate": 4.129454605239692e-06, "loss": 0.4676, "step": 4579 }, { "epoch": 2.2256534954407297, "grad_norm": 0.07005001194486436, "learning_rate": 4.127569241799132e-06, "loss": 0.4827, "step": 4580 }, { "epoch": 2.2261398176291793, "grad_norm": 0.07256398001827741, "learning_rate": 4.125684006290636e-06, "loss": 0.5268, "step": 4581 }, { "epoch": 2.226626139817629, "grad_norm": 0.07429866901508127, "learning_rate": 4.123798898990651e-06, "loss": 0.5419, "step": 4582 }, { "epoch": 2.227112462006079, "grad_norm": 0.07200029499266601, "learning_rate": 4.121913920175608e-06, "loss": 0.5304, "step": 4583 }, { "epoch": 2.227598784194529, "grad_norm": 0.07255472672805988, "learning_rate": 4.120029070121917e-06, "loss": 0.5179, "step": 4584 }, { "epoch": 2.2280851063829785, "grad_norm": 0.0732353870750939, "learning_rate": 4.118144349105969e-06, "loss": 0.5733, "step": 4585 }, { "epoch": 2.2285714285714286, "grad_norm": 0.07421249353976915, "learning_rate": 4.116259757404139e-06, "loss": 0.5467, "step": 4586 }, { "epoch": 2.2290577507598783, "grad_norm": 0.07077620611601619, "learning_rate": 4.114375295292781e-06, "loss": 0.5197, "step": 4587 }, { "epoch": 2.2295440729483285, "grad_norm": 0.07129469226114649, "learning_rate": 4.112490963048228e-06, "loss": 0.5269, "step": 4588 }, { "epoch": 2.230030395136778, "grad_norm": 0.07378638969970018, "learning_rate": 4.110606760946797e-06, "loss": 0.5615, "step": 4589 }, { "epoch": 2.230516717325228, "grad_norm": 0.07015004010154713, "learning_rate": 4.108722689264786e-06, "loss": 0.5233, "step": 4590 }, { "epoch": 2.231003039513678, "grad_norm": 0.07102246082714694, "learning_rate": 4.10683874827847e-06, "loss": 0.5367, "step": 4591 }, { "epoch": 2.2314893617021276, "grad_norm": 0.06864479112331835, "learning_rate": 4.104954938264109e-06, "loss": 0.5003, "step": 4592 }, { "epoch": 2.2319756838905773, "grad_norm": 0.0723011955543065, "learning_rate": 4.103071259497945e-06, "loss": 0.5284, "step": 4593 }, { "epoch": 2.2324620060790275, "grad_norm": 0.07224698434066429, "learning_rate": 4.101187712256193e-06, "loss": 0.5304, "step": 4594 }, { "epoch": 2.232948328267477, "grad_norm": 0.07046994643372083, "learning_rate": 4.099304296815058e-06, "loss": 0.5196, "step": 4595 }, { "epoch": 2.2334346504559273, "grad_norm": 0.07476715069820349, "learning_rate": 4.097421013450718e-06, "loss": 0.5415, "step": 4596 }, { "epoch": 2.233920972644377, "grad_norm": 0.07031622644758091, "learning_rate": 4.095537862439338e-06, "loss": 0.5028, "step": 4597 }, { "epoch": 2.2344072948328266, "grad_norm": 0.07243301251094518, "learning_rate": 4.093654844057059e-06, "loss": 0.5382, "step": 4598 }, { "epoch": 2.2348936170212768, "grad_norm": 0.07357768193996367, "learning_rate": 4.091771958580005e-06, "loss": 0.5525, "step": 4599 }, { "epoch": 2.2353799392097264, "grad_norm": 0.06907282011657624, "learning_rate": 4.089889206284279e-06, "loss": 0.4794, "step": 4600 }, { "epoch": 2.235866261398176, "grad_norm": 0.0713803728663108, "learning_rate": 4.088006587445967e-06, "loss": 0.5079, "step": 4601 }, { "epoch": 2.2363525835866263, "grad_norm": 0.07066356129219667, "learning_rate": 4.086124102341133e-06, "loss": 0.5417, "step": 4602 }, { "epoch": 2.236838905775076, "grad_norm": 0.07160730071846899, "learning_rate": 4.0842417512458184e-06, "loss": 0.5287, "step": 4603 }, { "epoch": 2.2373252279635256, "grad_norm": 0.07067886150922982, "learning_rate": 4.082359534436055e-06, "loss": 0.5363, "step": 4604 }, { "epoch": 2.2378115501519757, "grad_norm": 0.07179142221792519, "learning_rate": 4.080477452187845e-06, "loss": 0.5161, "step": 4605 }, { "epoch": 2.2382978723404254, "grad_norm": 0.0743988081950859, "learning_rate": 4.078595504777174e-06, "loss": 0.5364, "step": 4606 }, { "epoch": 2.2387841945288756, "grad_norm": 0.07147876117880697, "learning_rate": 4.07671369248001e-06, "loss": 0.4912, "step": 4607 }, { "epoch": 2.2392705167173252, "grad_norm": 0.07683126158721, "learning_rate": 4.074832015572299e-06, "loss": 0.5517, "step": 4608 }, { "epoch": 2.239756838905775, "grad_norm": 0.07643915789894339, "learning_rate": 4.072950474329965e-06, "loss": 0.5218, "step": 4609 }, { "epoch": 2.240243161094225, "grad_norm": 0.07204330812356448, "learning_rate": 4.071069069028918e-06, "loss": 0.5159, "step": 4610 }, { "epoch": 2.2407294832826747, "grad_norm": 0.07480233157113961, "learning_rate": 4.0691877999450425e-06, "loss": 0.534, "step": 4611 }, { "epoch": 2.2412158054711244, "grad_norm": 0.06969247871258243, "learning_rate": 4.067306667354206e-06, "loss": 0.4986, "step": 4612 }, { "epoch": 2.2417021276595746, "grad_norm": 0.0729209483183523, "learning_rate": 4.065425671532256e-06, "loss": 0.5654, "step": 4613 }, { "epoch": 2.2421884498480242, "grad_norm": 0.07287429914765206, "learning_rate": 4.063544812755018e-06, "loss": 0.5296, "step": 4614 }, { "epoch": 2.2426747720364744, "grad_norm": 0.06970692610968177, "learning_rate": 4.061664091298299e-06, "loss": 0.47, "step": 4615 }, { "epoch": 2.243161094224924, "grad_norm": 0.07151609676590803, "learning_rate": 4.059783507437886e-06, "loss": 0.5249, "step": 4616 }, { "epoch": 2.2436474164133737, "grad_norm": 0.06959640989329831, "learning_rate": 4.057903061449545e-06, "loss": 0.5031, "step": 4617 }, { "epoch": 2.244133738601824, "grad_norm": 0.07062961902988361, "learning_rate": 4.0560227536090206e-06, "loss": 0.5109, "step": 4618 }, { "epoch": 2.2446200607902735, "grad_norm": 0.0763826527208737, "learning_rate": 4.05414258419204e-06, "loss": 0.5526, "step": 4619 }, { "epoch": 2.2451063829787232, "grad_norm": 0.07449247268750961, "learning_rate": 4.05226255347431e-06, "loss": 0.5297, "step": 4620 }, { "epoch": 2.2455927051671734, "grad_norm": 0.07481854868251386, "learning_rate": 4.050382661731513e-06, "loss": 0.5562, "step": 4621 }, { "epoch": 2.246079027355623, "grad_norm": 0.07600959564524962, "learning_rate": 4.048502909239314e-06, "loss": 0.5612, "step": 4622 }, { "epoch": 2.246565349544073, "grad_norm": 0.07154286450044224, "learning_rate": 4.046623296273359e-06, "loss": 0.5441, "step": 4623 }, { "epoch": 2.247051671732523, "grad_norm": 0.0732323366918422, "learning_rate": 4.044743823109272e-06, "loss": 0.5358, "step": 4624 }, { "epoch": 2.2475379939209725, "grad_norm": 0.07030235463227916, "learning_rate": 4.042864490022656e-06, "loss": 0.5035, "step": 4625 }, { "epoch": 2.2480243161094227, "grad_norm": 0.07219052717636693, "learning_rate": 4.040985297289093e-06, "loss": 0.5231, "step": 4626 }, { "epoch": 2.2480243161094227, "eval_loss": 0.5724824666976929, "eval_runtime": 105.2478, "eval_samples_per_second": 288.396, "eval_steps_per_second": 36.058, "step": 4626 }, { "epoch": 2.2485106382978723, "grad_norm": 0.06894684561606178, "learning_rate": 4.0391062451841455e-06, "loss": 0.5131, "step": 4627 }, { "epoch": 2.248996960486322, "grad_norm": 0.0726377000345425, "learning_rate": 4.037227333983356e-06, "loss": 0.5379, "step": 4628 }, { "epoch": 2.249483282674772, "grad_norm": 0.07495720921777778, "learning_rate": 4.035348563962245e-06, "loss": 0.5513, "step": 4629 }, { "epoch": 2.249969604863222, "grad_norm": 0.07434418403216476, "learning_rate": 4.033469935396313e-06, "loss": 0.5665, "step": 4630 }, { "epoch": 2.2504559270516715, "grad_norm": 0.0721622807479631, "learning_rate": 4.031591448561038e-06, "loss": 0.5036, "step": 4631 }, { "epoch": 2.2509422492401217, "grad_norm": 0.07101519806430857, "learning_rate": 4.0297131037318826e-06, "loss": 0.5292, "step": 4632 }, { "epoch": 2.2514285714285713, "grad_norm": 0.07130386691688971, "learning_rate": 4.0278349011842806e-06, "loss": 0.5513, "step": 4633 }, { "epoch": 2.2519148936170215, "grad_norm": 0.07154600826096935, "learning_rate": 4.025956841193651e-06, "loss": 0.546, "step": 4634 }, { "epoch": 2.252401215805471, "grad_norm": 0.06985275787801024, "learning_rate": 4.0240789240353885e-06, "loss": 0.5144, "step": 4635 }, { "epoch": 2.252887537993921, "grad_norm": 0.06890198681421501, "learning_rate": 4.022201149984871e-06, "loss": 0.4974, "step": 4636 }, { "epoch": 2.253373860182371, "grad_norm": 0.07104841733003331, "learning_rate": 4.02032351931745e-06, "loss": 0.5111, "step": 4637 }, { "epoch": 2.2538601823708206, "grad_norm": 0.07476571695331928, "learning_rate": 4.01844603230846e-06, "loss": 0.5467, "step": 4638 }, { "epoch": 2.2543465045592703, "grad_norm": 0.07383025791394142, "learning_rate": 4.016568689233214e-06, "loss": 0.5372, "step": 4639 }, { "epoch": 2.2548328267477205, "grad_norm": 0.0714222417481429, "learning_rate": 4.014691490367e-06, "loss": 0.5211, "step": 4640 }, { "epoch": 2.25531914893617, "grad_norm": 0.07231688856038394, "learning_rate": 4.012814435985092e-06, "loss": 0.4955, "step": 4641 }, { "epoch": 2.25580547112462, "grad_norm": 0.07432097278521449, "learning_rate": 4.010937526362737e-06, "loss": 0.5577, "step": 4642 }, { "epoch": 2.25629179331307, "grad_norm": 0.07361659734610657, "learning_rate": 4.009060761775161e-06, "loss": 0.5448, "step": 4643 }, { "epoch": 2.2567781155015196, "grad_norm": 0.07263906957251715, "learning_rate": 4.007184142497572e-06, "loss": 0.5581, "step": 4644 }, { "epoch": 2.2572644376899698, "grad_norm": 0.07150350702796775, "learning_rate": 4.005307668805154e-06, "loss": 0.5385, "step": 4645 }, { "epoch": 2.2577507598784194, "grad_norm": 0.07248082133327993, "learning_rate": 4.0034313409730726e-06, "loss": 0.5385, "step": 4646 }, { "epoch": 2.258237082066869, "grad_norm": 0.07637048032760417, "learning_rate": 4.001555159276467e-06, "loss": 0.58, "step": 4647 }, { "epoch": 2.2587234042553193, "grad_norm": 0.0709570721204946, "learning_rate": 3.999679123990458e-06, "loss": 0.5205, "step": 4648 }, { "epoch": 2.259209726443769, "grad_norm": 0.07119445196361225, "learning_rate": 3.997803235390148e-06, "loss": 0.5056, "step": 4649 }, { "epoch": 2.259696048632219, "grad_norm": 0.07226177551198266, "learning_rate": 3.9959274937506125e-06, "loss": 0.5164, "step": 4650 }, { "epoch": 2.2601823708206688, "grad_norm": 0.07259049106799932, "learning_rate": 3.994051899346907e-06, "loss": 0.505, "step": 4651 }, { "epoch": 2.2606686930091184, "grad_norm": 0.07165842963067946, "learning_rate": 3.9921764524540675e-06, "loss": 0.5155, "step": 4652 }, { "epoch": 2.2611550151975686, "grad_norm": 0.07275979455306059, "learning_rate": 3.990301153347107e-06, "loss": 0.5387, "step": 4653 }, { "epoch": 2.2616413373860182, "grad_norm": 0.07375990794048601, "learning_rate": 3.988426002301016e-06, "loss": 0.5488, "step": 4654 }, { "epoch": 2.262127659574468, "grad_norm": 0.07060908895057215, "learning_rate": 3.9865509995907656e-06, "loss": 0.5137, "step": 4655 }, { "epoch": 2.262613981762918, "grad_norm": 0.07263896287183237, "learning_rate": 3.984676145491302e-06, "loss": 0.5424, "step": 4656 }, { "epoch": 2.2631003039513677, "grad_norm": 0.0704289138414631, "learning_rate": 3.982801440277552e-06, "loss": 0.5284, "step": 4657 }, { "epoch": 2.2635866261398174, "grad_norm": 0.07567836995960589, "learning_rate": 3.980926884224417e-06, "loss": 0.5314, "step": 4658 }, { "epoch": 2.2640729483282676, "grad_norm": 0.07207726148750707, "learning_rate": 3.979052477606785e-06, "loss": 0.5102, "step": 4659 }, { "epoch": 2.2645592705167172, "grad_norm": 0.06963018684727802, "learning_rate": 3.977178220699514e-06, "loss": 0.5642, "step": 4660 }, { "epoch": 2.2650455927051674, "grad_norm": 0.06972161012251381, "learning_rate": 3.9753041137774414e-06, "loss": 0.4987, "step": 4661 }, { "epoch": 2.265531914893617, "grad_norm": 0.07375655082562882, "learning_rate": 3.9734301571153845e-06, "loss": 0.5676, "step": 4662 }, { "epoch": 2.2660182370820667, "grad_norm": 0.07115099880046266, "learning_rate": 3.971556350988137e-06, "loss": 0.5307, "step": 4663 }, { "epoch": 2.266504559270517, "grad_norm": 0.07041156696485094, "learning_rate": 3.969682695670472e-06, "loss": 0.493, "step": 4664 }, { "epoch": 2.2669908814589665, "grad_norm": 0.07670636771867809, "learning_rate": 3.96780919143714e-06, "loss": 0.5489, "step": 4665 }, { "epoch": 2.2674772036474162, "grad_norm": 0.07414283445717815, "learning_rate": 3.965935838562868e-06, "loss": 0.5069, "step": 4666 }, { "epoch": 2.2679635258358664, "grad_norm": 0.07353006904596122, "learning_rate": 3.9640626373223636e-06, "loss": 0.524, "step": 4667 }, { "epoch": 2.268449848024316, "grad_norm": 0.07001901504172404, "learning_rate": 3.96218958799031e-06, "loss": 0.4888, "step": 4668 }, { "epoch": 2.2689361702127657, "grad_norm": 0.07387519149202325, "learning_rate": 3.9603166908413665e-06, "loss": 0.5397, "step": 4669 }, { "epoch": 2.269422492401216, "grad_norm": 0.07462909382918813, "learning_rate": 3.958443946150176e-06, "loss": 0.5267, "step": 4670 }, { "epoch": 2.2699088145896655, "grad_norm": 0.07581939008792042, "learning_rate": 3.956571354191352e-06, "loss": 0.5518, "step": 4671 }, { "epoch": 2.2703951367781157, "grad_norm": 0.07367720647418047, "learning_rate": 3.95469891523949e-06, "loss": 0.5449, "step": 4672 }, { "epoch": 2.2708814589665653, "grad_norm": 0.06898445831740564, "learning_rate": 3.952826629569162e-06, "loss": 0.5034, "step": 4673 }, { "epoch": 2.271367781155015, "grad_norm": 0.07124674172456269, "learning_rate": 3.950954497454916e-06, "loss": 0.5213, "step": 4674 }, { "epoch": 2.271854103343465, "grad_norm": 0.07317665317565439, "learning_rate": 3.949082519171282e-06, "loss": 0.527, "step": 4675 }, { "epoch": 2.272340425531915, "grad_norm": 0.07177760821923058, "learning_rate": 3.947210694992761e-06, "loss": 0.5182, "step": 4676 }, { "epoch": 2.272826747720365, "grad_norm": 0.07299501756526486, "learning_rate": 3.945339025193837e-06, "loss": 0.5334, "step": 4677 }, { "epoch": 2.2733130699088147, "grad_norm": 0.07265700949450907, "learning_rate": 3.943467510048969e-06, "loss": 0.499, "step": 4678 }, { "epoch": 2.2737993920972643, "grad_norm": 0.07297855579993766, "learning_rate": 3.941596149832593e-06, "loss": 0.5396, "step": 4679 }, { "epoch": 2.2742857142857145, "grad_norm": 0.07111614061774357, "learning_rate": 3.939724944819122e-06, "loss": 0.521, "step": 4680 }, { "epoch": 2.274772036474164, "grad_norm": 0.07058081484806333, "learning_rate": 3.937853895282948e-06, "loss": 0.514, "step": 4681 }, { "epoch": 2.275258358662614, "grad_norm": 0.07133281626498338, "learning_rate": 3.935983001498439e-06, "loss": 0.5256, "step": 4682 }, { "epoch": 2.275744680851064, "grad_norm": 0.079577019991552, "learning_rate": 3.9341122637399395e-06, "loss": 0.5137, "step": 4683 }, { "epoch": 2.2762310030395136, "grad_norm": 0.07245763195009111, "learning_rate": 3.932241682281774e-06, "loss": 0.5395, "step": 4684 }, { "epoch": 2.2767173252279633, "grad_norm": 0.0711027919037749, "learning_rate": 3.93037125739824e-06, "loss": 0.5041, "step": 4685 }, { "epoch": 2.2772036474164135, "grad_norm": 0.07090235344559005, "learning_rate": 3.928500989363614e-06, "loss": 0.535, "step": 4686 }, { "epoch": 2.277689969604863, "grad_norm": 0.07326328738846202, "learning_rate": 3.9266308784521515e-06, "loss": 0.5145, "step": 4687 }, { "epoch": 2.2781762917933133, "grad_norm": 0.07415270979860497, "learning_rate": 3.92476092493808e-06, "loss": 0.5587, "step": 4688 }, { "epoch": 2.278662613981763, "grad_norm": 0.07531879257606712, "learning_rate": 3.922891129095609e-06, "loss": 0.5633, "step": 4689 }, { "epoch": 2.2791489361702126, "grad_norm": 0.07452808453864476, "learning_rate": 3.9210214911989235e-06, "loss": 0.5794, "step": 4690 }, { "epoch": 2.2796352583586628, "grad_norm": 0.06826857272640785, "learning_rate": 3.919152011522183e-06, "loss": 0.4823, "step": 4691 }, { "epoch": 2.2801215805471124, "grad_norm": 0.07137685960301225, "learning_rate": 3.917282690339527e-06, "loss": 0.4975, "step": 4692 }, { "epoch": 2.280607902735562, "grad_norm": 0.07439128760967693, "learning_rate": 3.915413527925069e-06, "loss": 0.5278, "step": 4693 }, { "epoch": 2.2810942249240123, "grad_norm": 0.07618049696879843, "learning_rate": 3.913544524552899e-06, "loss": 0.5567, "step": 4694 }, { "epoch": 2.281580547112462, "grad_norm": 0.07384424073008243, "learning_rate": 3.911675680497089e-06, "loss": 0.5265, "step": 4695 }, { "epoch": 2.2820668693009116, "grad_norm": 0.07291462394707528, "learning_rate": 3.9098069960316805e-06, "loss": 0.5036, "step": 4696 }, { "epoch": 2.2825531914893618, "grad_norm": 0.06924792038609237, "learning_rate": 3.907938471430697e-06, "loss": 0.4844, "step": 4697 }, { "epoch": 2.2830395136778114, "grad_norm": 0.07073242890386755, "learning_rate": 3.906070106968135e-06, "loss": 0.527, "step": 4698 }, { "epoch": 2.2835258358662616, "grad_norm": 0.07398613169879711, "learning_rate": 3.90420190291797e-06, "loss": 0.5402, "step": 4699 }, { "epoch": 2.2840121580547113, "grad_norm": 0.07156168035173004, "learning_rate": 3.9023338595541535e-06, "loss": 0.5294, "step": 4700 }, { "epoch": 2.284498480243161, "grad_norm": 0.06993466863618257, "learning_rate": 3.90046597715061e-06, "loss": 0.545, "step": 4701 }, { "epoch": 2.284984802431611, "grad_norm": 0.07086382449912602, "learning_rate": 3.898598255981245e-06, "loss": 0.5151, "step": 4702 }, { "epoch": 2.2854711246200607, "grad_norm": 0.07101506174003709, "learning_rate": 3.8967306963199394e-06, "loss": 0.4801, "step": 4703 }, { "epoch": 2.285957446808511, "grad_norm": 0.07147039245482908, "learning_rate": 3.894863298440548e-06, "loss": 0.5235, "step": 4704 }, { "epoch": 2.2864437689969606, "grad_norm": 0.06988310483689339, "learning_rate": 3.8929960626169036e-06, "loss": 0.5272, "step": 4705 }, { "epoch": 2.2869300911854102, "grad_norm": 0.07048340393795582, "learning_rate": 3.891128989122816e-06, "loss": 0.5355, "step": 4706 }, { "epoch": 2.2874164133738604, "grad_norm": 0.0743465997284842, "learning_rate": 3.889262078232071e-06, "loss": 0.5662, "step": 4707 }, { "epoch": 2.28790273556231, "grad_norm": 0.07107155213536039, "learning_rate": 3.887395330218429e-06, "loss": 0.5308, "step": 4708 }, { "epoch": 2.2883890577507597, "grad_norm": 0.07308034590818346, "learning_rate": 3.8855287453556275e-06, "loss": 0.5128, "step": 4709 }, { "epoch": 2.28887537993921, "grad_norm": 0.07297240475753164, "learning_rate": 3.8836623239173794e-06, "loss": 0.5137, "step": 4710 }, { "epoch": 2.2893617021276595, "grad_norm": 0.07148740551001319, "learning_rate": 3.881796066177374e-06, "loss": 0.4975, "step": 4711 }, { "epoch": 2.2898480243161092, "grad_norm": 0.07199130545353712, "learning_rate": 3.879929972409276e-06, "loss": 0.5352, "step": 4712 }, { "epoch": 2.2903343465045594, "grad_norm": 0.07472586487009215, "learning_rate": 3.87806404288673e-06, "loss": 0.5494, "step": 4713 }, { "epoch": 2.290820668693009, "grad_norm": 0.07351698648123663, "learning_rate": 3.876198277883353e-06, "loss": 0.5177, "step": 4714 }, { "epoch": 2.291306990881459, "grad_norm": 0.07444764856092664, "learning_rate": 3.874332677672735e-06, "loss": 0.5549, "step": 4715 }, { "epoch": 2.291793313069909, "grad_norm": 0.07391139936003299, "learning_rate": 3.872467242528448e-06, "loss": 0.5604, "step": 4716 }, { "epoch": 2.2922796352583585, "grad_norm": 0.06864485214114208, "learning_rate": 3.870601972724036e-06, "loss": 0.5119, "step": 4717 }, { "epoch": 2.2927659574468087, "grad_norm": 0.0727952036298192, "learning_rate": 3.868736868533019e-06, "loss": 0.5045, "step": 4718 }, { "epoch": 2.2932522796352584, "grad_norm": 0.07105980123026319, "learning_rate": 3.866871930228894e-06, "loss": 0.5121, "step": 4719 }, { "epoch": 2.293738601823708, "grad_norm": 0.07278632325963608, "learning_rate": 3.865007158085134e-06, "loss": 0.5251, "step": 4720 }, { "epoch": 2.294224924012158, "grad_norm": 0.07036386733435124, "learning_rate": 3.863142552375184e-06, "loss": 0.5125, "step": 4721 }, { "epoch": 2.294711246200608, "grad_norm": 0.07184550273936362, "learning_rate": 3.8612781133724695e-06, "loss": 0.5339, "step": 4722 }, { "epoch": 2.2951975683890575, "grad_norm": 0.07296058221154193, "learning_rate": 3.859413841350388e-06, "loss": 0.5394, "step": 4723 }, { "epoch": 2.2956838905775077, "grad_norm": 0.07301576172435108, "learning_rate": 3.8575497365823164e-06, "loss": 0.547, "step": 4724 }, { "epoch": 2.2961702127659573, "grad_norm": 0.07148596667984473, "learning_rate": 3.855685799341601e-06, "loss": 0.5085, "step": 4725 }, { "epoch": 2.2966565349544075, "grad_norm": 0.07141941471796145, "learning_rate": 3.853822029901568e-06, "loss": 0.5258, "step": 4726 }, { "epoch": 2.297142857142857, "grad_norm": 0.07187324843725157, "learning_rate": 3.85195842853552e-06, "loss": 0.5476, "step": 4727 }, { "epoch": 2.297629179331307, "grad_norm": 0.07151277663867313, "learning_rate": 3.85009499551673e-06, "loss": 0.5243, "step": 4728 }, { "epoch": 2.298115501519757, "grad_norm": 0.06959874803827659, "learning_rate": 3.848231731118452e-06, "loss": 0.479, "step": 4729 }, { "epoch": 2.2986018237082066, "grad_norm": 0.07133049646175173, "learning_rate": 3.846368635613912e-06, "loss": 0.5044, "step": 4730 }, { "epoch": 2.2990881458966568, "grad_norm": 0.07249016020552276, "learning_rate": 3.8445057092763086e-06, "loss": 0.5206, "step": 4731 }, { "epoch": 2.2995744680851065, "grad_norm": 0.07414595690934293, "learning_rate": 3.842642952378823e-06, "loss": 0.4939, "step": 4732 }, { "epoch": 2.300060790273556, "grad_norm": 0.0732216936531094, "learning_rate": 3.840780365194606e-06, "loss": 0.5117, "step": 4733 }, { "epoch": 2.3005471124620063, "grad_norm": 0.07005468712117852, "learning_rate": 3.838917947996786e-06, "loss": 0.5215, "step": 4734 }, { "epoch": 2.301033434650456, "grad_norm": 0.07158626863991081, "learning_rate": 3.837055701058462e-06, "loss": 0.5067, "step": 4735 }, { "epoch": 2.3015197568389056, "grad_norm": 0.07049661636196115, "learning_rate": 3.835193624652714e-06, "loss": 0.4754, "step": 4736 }, { "epoch": 2.3020060790273558, "grad_norm": 0.06993799380420346, "learning_rate": 3.833331719052593e-06, "loss": 0.5046, "step": 4737 }, { "epoch": 2.3024924012158055, "grad_norm": 0.07070982916552473, "learning_rate": 3.8314699845311295e-06, "loss": 0.5014, "step": 4738 }, { "epoch": 2.302978723404255, "grad_norm": 0.07131370952569205, "learning_rate": 3.829608421361321e-06, "loss": 0.5261, "step": 4739 }, { "epoch": 2.3034650455927053, "grad_norm": 0.07281328662297258, "learning_rate": 3.827747029816148e-06, "loss": 0.5169, "step": 4740 }, { "epoch": 2.303951367781155, "grad_norm": 0.0730018194450882, "learning_rate": 3.82588581016856e-06, "loss": 0.5345, "step": 4741 }, { "epoch": 2.304437689969605, "grad_norm": 0.07176971230555247, "learning_rate": 3.824024762691485e-06, "loss": 0.5591, "step": 4742 }, { "epoch": 2.3049240121580548, "grad_norm": 0.0730575520211184, "learning_rate": 3.822163887657825e-06, "loss": 0.5016, "step": 4743 }, { "epoch": 2.3054103343465044, "grad_norm": 0.07111386060619199, "learning_rate": 3.820303185340456e-06, "loss": 0.5157, "step": 4744 }, { "epoch": 2.3058966565349546, "grad_norm": 0.0719805286930231, "learning_rate": 3.818442656012228e-06, "loss": 0.5297, "step": 4745 }, { "epoch": 2.3063829787234043, "grad_norm": 0.07345378781834982, "learning_rate": 3.816582299945967e-06, "loss": 0.5341, "step": 4746 }, { "epoch": 2.306869300911854, "grad_norm": 0.07203398851803429, "learning_rate": 3.814722117414473e-06, "loss": 0.4971, "step": 4747 }, { "epoch": 2.307355623100304, "grad_norm": 0.07108983146392801, "learning_rate": 3.812862108690522e-06, "loss": 0.5231, "step": 4748 }, { "epoch": 2.3078419452887537, "grad_norm": 0.07077642296207613, "learning_rate": 3.8110022740468587e-06, "loss": 0.5099, "step": 4749 }, { "epoch": 2.3083282674772034, "grad_norm": 0.0718048590818655, "learning_rate": 3.8091426137562128e-06, "loss": 0.5031, "step": 4750 }, { "epoch": 2.3088145896656536, "grad_norm": 0.07155037591587134, "learning_rate": 3.8072831280912785e-06, "loss": 0.5534, "step": 4751 }, { "epoch": 2.3093009118541032, "grad_norm": 0.07114749677425357, "learning_rate": 3.8054238173247295e-06, "loss": 0.5054, "step": 4752 }, { "epoch": 2.3097872340425534, "grad_norm": 0.0701140022782335, "learning_rate": 3.8035646817292136e-06, "loss": 0.5116, "step": 4753 }, { "epoch": 2.310273556231003, "grad_norm": 0.07063718472295341, "learning_rate": 3.8017057215773502e-06, "loss": 0.5194, "step": 4754 }, { "epoch": 2.3107598784194527, "grad_norm": 0.07179254125621902, "learning_rate": 3.799846937141734e-06, "loss": 0.5287, "step": 4755 }, { "epoch": 2.311246200607903, "grad_norm": 0.07255922385885435, "learning_rate": 3.7979883286949366e-06, "loss": 0.5177, "step": 4756 }, { "epoch": 2.3117325227963526, "grad_norm": 0.06930819228017146, "learning_rate": 3.7961298965095005e-06, "loss": 0.5051, "step": 4757 }, { "epoch": 2.3122188449848027, "grad_norm": 0.07497506082260916, "learning_rate": 3.794271640857945e-06, "loss": 0.5585, "step": 4758 }, { "epoch": 2.3127051671732524, "grad_norm": 0.0699955603041079, "learning_rate": 3.792413562012761e-06, "loss": 0.5191, "step": 4759 }, { "epoch": 2.313191489361702, "grad_norm": 0.07171338950978387, "learning_rate": 3.790555660246415e-06, "loss": 0.5253, "step": 4760 }, { "epoch": 2.3136778115501517, "grad_norm": 0.07228107672574575, "learning_rate": 3.7886979358313477e-06, "loss": 0.5367, "step": 4761 }, { "epoch": 2.314164133738602, "grad_norm": 0.07395584018772629, "learning_rate": 3.7868403890399734e-06, "loss": 0.5715, "step": 4762 }, { "epoch": 2.3146504559270515, "grad_norm": 0.07755690422327892, "learning_rate": 3.784983020144679e-06, "loss": 0.5908, "step": 4763 }, { "epoch": 2.3151367781155017, "grad_norm": 0.07157445152422266, "learning_rate": 3.7831258294178268e-06, "loss": 0.5094, "step": 4764 }, { "epoch": 2.3156231003039514, "grad_norm": 0.06955610533387216, "learning_rate": 3.7812688171317534e-06, "loss": 0.4898, "step": 4765 }, { "epoch": 2.316109422492401, "grad_norm": 0.07203231578730333, "learning_rate": 3.7794119835587687e-06, "loss": 0.5197, "step": 4766 }, { "epoch": 2.316595744680851, "grad_norm": 0.07098383689277571, "learning_rate": 3.7775553289711536e-06, "loss": 0.4942, "step": 4767 }, { "epoch": 2.317082066869301, "grad_norm": 0.06945193951382612, "learning_rate": 3.775698853641171e-06, "loss": 0.5191, "step": 4768 }, { "epoch": 2.317568389057751, "grad_norm": 0.0724118194764258, "learning_rate": 3.7738425578410477e-06, "loss": 0.5253, "step": 4769 }, { "epoch": 2.3180547112462007, "grad_norm": 0.07415442115060311, "learning_rate": 3.7719864418429887e-06, "loss": 0.5425, "step": 4770 }, { "epoch": 2.3185410334346503, "grad_norm": 0.07288948164699163, "learning_rate": 3.7701305059191736e-06, "loss": 0.5358, "step": 4771 }, { "epoch": 2.3190273556231005, "grad_norm": 0.07314320607063393, "learning_rate": 3.7682747503417537e-06, "loss": 0.5181, "step": 4772 }, { "epoch": 2.31951367781155, "grad_norm": 0.07141040186456821, "learning_rate": 3.7664191753828536e-06, "loss": 0.5355, "step": 4773 }, { "epoch": 2.32, "grad_norm": 0.07141149119131202, "learning_rate": 3.764563781314574e-06, "loss": 0.5388, "step": 4774 }, { "epoch": 2.32048632218845, "grad_norm": 0.07050257665061001, "learning_rate": 3.762708568408987e-06, "loss": 0.5055, "step": 4775 }, { "epoch": 2.3209726443768997, "grad_norm": 0.07036539776286459, "learning_rate": 3.760853536938137e-06, "loss": 0.5025, "step": 4776 }, { "epoch": 2.3214589665653493, "grad_norm": 0.07005550035762793, "learning_rate": 3.7589986871740466e-06, "loss": 0.4919, "step": 4777 }, { "epoch": 2.3219452887537995, "grad_norm": 0.07613550958473317, "learning_rate": 3.7571440193887044e-06, "loss": 0.5536, "step": 4778 }, { "epoch": 2.322431610942249, "grad_norm": 0.07806552850604012, "learning_rate": 3.7552895338540785e-06, "loss": 0.5183, "step": 4779 }, { "epoch": 2.3229179331306993, "grad_norm": 0.07288733633932881, "learning_rate": 3.7534352308421075e-06, "loss": 0.5026, "step": 4780 }, { "epoch": 2.323404255319149, "grad_norm": 0.07202825418931809, "learning_rate": 3.7515811106247047e-06, "loss": 0.5293, "step": 4781 }, { "epoch": 2.3238905775075986, "grad_norm": 0.0708108492366933, "learning_rate": 3.7497271734737545e-06, "loss": 0.5065, "step": 4782 }, { "epoch": 2.3243768996960488, "grad_norm": 0.0723607877647087, "learning_rate": 3.7478734196611172e-06, "loss": 0.5546, "step": 4783 }, { "epoch": 2.3248632218844985, "grad_norm": 0.07195535147468135, "learning_rate": 3.7460198494586236e-06, "loss": 0.5342, "step": 4784 }, { "epoch": 2.325349544072948, "grad_norm": 0.06962206754001322, "learning_rate": 3.7441664631380787e-06, "loss": 0.5238, "step": 4785 }, { "epoch": 2.3258358662613983, "grad_norm": 0.06909603792505634, "learning_rate": 3.7423132609712613e-06, "loss": 0.512, "step": 4786 }, { "epoch": 2.326322188449848, "grad_norm": 0.07044052794358648, "learning_rate": 3.740460243229923e-06, "loss": 0.5187, "step": 4787 }, { "epoch": 2.3268085106382976, "grad_norm": 0.07491907293544683, "learning_rate": 3.7386074101857866e-06, "loss": 0.5827, "step": 4788 }, { "epoch": 2.3272948328267478, "grad_norm": 0.07142568653057813, "learning_rate": 3.736754762110549e-06, "loss": 0.5395, "step": 4789 }, { "epoch": 2.3277811550151974, "grad_norm": 0.07071150138241877, "learning_rate": 3.7349022992758816e-06, "loss": 0.5343, "step": 4790 }, { "epoch": 2.3282674772036476, "grad_norm": 0.07158552480920521, "learning_rate": 3.733050021953425e-06, "loss": 0.5355, "step": 4791 }, { "epoch": 2.3287537993920973, "grad_norm": 0.07190579571713522, "learning_rate": 3.731197930414797e-06, "loss": 0.509, "step": 4792 }, { "epoch": 2.329240121580547, "grad_norm": 0.0719205145585961, "learning_rate": 3.7293460249315826e-06, "loss": 0.5204, "step": 4793 }, { "epoch": 2.329726443768997, "grad_norm": 0.07390191912726275, "learning_rate": 3.7274943057753455e-06, "loss": 0.5365, "step": 4794 }, { "epoch": 2.3302127659574468, "grad_norm": 0.07421952266945099, "learning_rate": 3.725642773217617e-06, "loss": 0.5496, "step": 4795 }, { "epoch": 2.330699088145897, "grad_norm": 0.07368880630778797, "learning_rate": 3.7237914275299057e-06, "loss": 0.543, "step": 4796 }, { "epoch": 2.3311854103343466, "grad_norm": 0.07089745500331597, "learning_rate": 3.721940268983688e-06, "loss": 0.5415, "step": 4797 }, { "epoch": 2.3316717325227962, "grad_norm": 0.06981533793513522, "learning_rate": 3.720089297850418e-06, "loss": 0.506, "step": 4798 }, { "epoch": 2.3321580547112464, "grad_norm": 0.07037159633706239, "learning_rate": 3.7182385144015165e-06, "loss": 0.5255, "step": 4799 }, { "epoch": 2.332644376899696, "grad_norm": 0.06824072749249188, "learning_rate": 3.716387918908383e-06, "loss": 0.511, "step": 4800 }, { "epoch": 2.3331306990881457, "grad_norm": 0.07148729152702869, "learning_rate": 3.7145375116423847e-06, "loss": 0.5298, "step": 4801 }, { "epoch": 2.333617021276596, "grad_norm": 0.07303649687938313, "learning_rate": 3.7126872928748623e-06, "loss": 0.5027, "step": 4802 }, { "epoch": 2.3341033434650456, "grad_norm": 0.071207249943337, "learning_rate": 3.7108372628771284e-06, "loss": 0.5055, "step": 4803 }, { "epoch": 2.3345896656534952, "grad_norm": 0.07197906235010862, "learning_rate": 3.7089874219204715e-06, "loss": 0.5383, "step": 4804 }, { "epoch": 2.3350759878419454, "grad_norm": 0.07362050908452387, "learning_rate": 3.707137770276149e-06, "loss": 0.5428, "step": 4805 }, { "epoch": 2.335562310030395, "grad_norm": 0.07671725407732392, "learning_rate": 3.7052883082153927e-06, "loss": 0.5407, "step": 4806 }, { "epoch": 2.336048632218845, "grad_norm": 0.07427949562763843, "learning_rate": 3.7034390360094026e-06, "loss": 0.5472, "step": 4807 }, { "epoch": 2.336534954407295, "grad_norm": 0.07145509561176622, "learning_rate": 3.701589953929354e-06, "loss": 0.5186, "step": 4808 }, { "epoch": 2.3370212765957445, "grad_norm": 0.07342895170374958, "learning_rate": 3.6997410622463947e-06, "loss": 0.554, "step": 4809 }, { "epoch": 2.3375075987841947, "grad_norm": 0.07333972793510114, "learning_rate": 3.6978923612316427e-06, "loss": 0.5575, "step": 4810 }, { "epoch": 2.3379939209726444, "grad_norm": 0.07116944443519507, "learning_rate": 3.6960438511561897e-06, "loss": 0.5343, "step": 4811 }, { "epoch": 2.338480243161094, "grad_norm": 0.07030477272928837, "learning_rate": 3.694195532291098e-06, "loss": 0.5166, "step": 4812 }, { "epoch": 2.338966565349544, "grad_norm": 0.07121734054318392, "learning_rate": 3.6923474049074037e-06, "loss": 0.491, "step": 4813 }, { "epoch": 2.339452887537994, "grad_norm": 0.07407484792987686, "learning_rate": 3.690499469276113e-06, "loss": 0.5565, "step": 4814 }, { "epoch": 2.3399392097264435, "grad_norm": 0.07365003602827443, "learning_rate": 3.6886517256682053e-06, "loss": 0.5295, "step": 4815 }, { "epoch": 2.3404255319148937, "grad_norm": 0.07009545778284128, "learning_rate": 3.686804174354631e-06, "loss": 0.5008, "step": 4816 }, { "epoch": 2.3409118541033433, "grad_norm": 0.07093665843271561, "learning_rate": 3.684956815606311e-06, "loss": 0.5033, "step": 4817 }, { "epoch": 2.3413981762917935, "grad_norm": 0.07405490483646458, "learning_rate": 3.683109649694141e-06, "loss": 0.5465, "step": 4818 }, { "epoch": 2.341884498480243, "grad_norm": 0.0702377861505547, "learning_rate": 3.681262676888987e-06, "loss": 0.5185, "step": 4819 }, { "epoch": 2.342370820668693, "grad_norm": 0.07231639145354879, "learning_rate": 3.6794158974616857e-06, "loss": 0.5201, "step": 4820 }, { "epoch": 2.342857142857143, "grad_norm": 0.07176915036529734, "learning_rate": 3.6775693116830456e-06, "loss": 0.5498, "step": 4821 }, { "epoch": 2.3433434650455927, "grad_norm": 0.07051901451212278, "learning_rate": 3.67572291982385e-06, "loss": 0.4991, "step": 4822 }, { "epoch": 2.343829787234043, "grad_norm": 0.06992144811152087, "learning_rate": 3.6738767221548505e-06, "loss": 0.4726, "step": 4823 }, { "epoch": 2.3443161094224925, "grad_norm": 0.07274848353394854, "learning_rate": 3.6720307189467702e-06, "loss": 0.5203, "step": 4824 }, { "epoch": 2.344802431610942, "grad_norm": 0.06974851133236662, "learning_rate": 3.6701849104703046e-06, "loss": 0.5117, "step": 4825 }, { "epoch": 2.3452887537993923, "grad_norm": 0.07503067916975177, "learning_rate": 3.6683392969961213e-06, "loss": 0.5434, "step": 4826 }, { "epoch": 2.345775075987842, "grad_norm": 0.07194162855671007, "learning_rate": 3.666493878794858e-06, "loss": 0.5428, "step": 4827 }, { "epoch": 2.3462613981762916, "grad_norm": 0.07654877556882764, "learning_rate": 3.664648656137124e-06, "loss": 0.5215, "step": 4828 }, { "epoch": 2.3467477203647418, "grad_norm": 0.06877015547464911, "learning_rate": 3.662803629293501e-06, "loss": 0.4833, "step": 4829 }, { "epoch": 2.3472340425531915, "grad_norm": 0.07081742857510276, "learning_rate": 3.6609587985345418e-06, "loss": 0.4967, "step": 4830 }, { "epoch": 2.347720364741641, "grad_norm": 0.07099546314230694, "learning_rate": 3.6591141641307683e-06, "loss": 0.526, "step": 4831 }, { "epoch": 2.3482066869300913, "grad_norm": 0.07009258986916114, "learning_rate": 3.657269726352676e-06, "loss": 0.5141, "step": 4832 }, { "epoch": 2.348693009118541, "grad_norm": 0.07150192698685824, "learning_rate": 3.6554254854707294e-06, "loss": 0.5088, "step": 4833 }, { "epoch": 2.349179331306991, "grad_norm": 0.07117969510673795, "learning_rate": 3.6535814417553674e-06, "loss": 0.5034, "step": 4834 }, { "epoch": 2.3496656534954408, "grad_norm": 0.07634931192122975, "learning_rate": 3.6517375954769975e-06, "loss": 0.5341, "step": 4835 }, { "epoch": 2.3501519756838904, "grad_norm": 0.07494425914526873, "learning_rate": 3.649893946905999e-06, "loss": 0.5745, "step": 4836 }, { "epoch": 2.3506382978723406, "grad_norm": 0.07001160047298115, "learning_rate": 3.648050496312721e-06, "loss": 0.487, "step": 4837 }, { "epoch": 2.3511246200607903, "grad_norm": 0.06904929264094169, "learning_rate": 3.6462072439674857e-06, "loss": 0.484, "step": 4838 }, { "epoch": 2.35161094224924, "grad_norm": 0.07184589864888712, "learning_rate": 3.6443641901405834e-06, "loss": 0.5626, "step": 4839 }, { "epoch": 2.35209726443769, "grad_norm": 0.07413227159000095, "learning_rate": 3.6425213351022803e-06, "loss": 0.5473, "step": 4840 }, { "epoch": 2.3525835866261398, "grad_norm": 0.07070517806821587, "learning_rate": 3.640678679122808e-06, "loss": 0.5197, "step": 4841 }, { "epoch": 2.3530699088145894, "grad_norm": 0.0739435464504075, "learning_rate": 3.6388362224723705e-06, "loss": 0.5512, "step": 4842 }, { "epoch": 2.3535562310030396, "grad_norm": 0.06968604104799363, "learning_rate": 3.636993965421144e-06, "loss": 0.514, "step": 4843 }, { "epoch": 2.3540425531914893, "grad_norm": 0.07414563041898994, "learning_rate": 3.635151908239275e-06, "loss": 0.5619, "step": 4844 }, { "epoch": 2.3545288753799394, "grad_norm": 0.0763558038856102, "learning_rate": 3.6333100511968807e-06, "loss": 0.5319, "step": 4845 }, { "epoch": 2.355015197568389, "grad_norm": 0.0713713703377589, "learning_rate": 3.6314683945640462e-06, "loss": 0.4953, "step": 4846 }, { "epoch": 2.3555015197568387, "grad_norm": 0.07386739311345307, "learning_rate": 3.629626938610831e-06, "loss": 0.5098, "step": 4847 }, { "epoch": 2.355987841945289, "grad_norm": 0.07077049155481384, "learning_rate": 3.6277856836072647e-06, "loss": 0.5093, "step": 4848 }, { "epoch": 2.3564741641337386, "grad_norm": 0.07648380639768826, "learning_rate": 3.6259446298233434e-06, "loss": 0.5799, "step": 4849 }, { "epoch": 2.3569604863221887, "grad_norm": 0.07352679893735549, "learning_rate": 3.62410377752904e-06, "loss": 0.5052, "step": 4850 }, { "epoch": 2.3574468085106384, "grad_norm": 0.07355841291565811, "learning_rate": 3.6222631269942933e-06, "loss": 0.5203, "step": 4851 }, { "epoch": 2.357933130699088, "grad_norm": 0.07470438722217965, "learning_rate": 3.620422678489014e-06, "loss": 0.5285, "step": 4852 }, { "epoch": 2.358419452887538, "grad_norm": 0.07189338853898246, "learning_rate": 3.618582432283082e-06, "loss": 0.5432, "step": 4853 }, { "epoch": 2.358905775075988, "grad_norm": 0.07311963763751952, "learning_rate": 3.616742388646351e-06, "loss": 0.5386, "step": 4854 }, { "epoch": 2.3593920972644375, "grad_norm": 0.07099606579083245, "learning_rate": 3.6149025478486393e-06, "loss": 0.4954, "step": 4855 }, { "epoch": 2.3598784194528877, "grad_norm": 0.0750258337291273, "learning_rate": 3.6130629101597404e-06, "loss": 0.5387, "step": 4856 }, { "epoch": 2.3603647416413374, "grad_norm": 0.07569689187905691, "learning_rate": 3.6112234758494156e-06, "loss": 0.5514, "step": 4857 }, { "epoch": 2.360851063829787, "grad_norm": 0.07319743555953466, "learning_rate": 3.6093842451873955e-06, "loss": 0.5027, "step": 4858 }, { "epoch": 2.361337386018237, "grad_norm": 0.07370897480139857, "learning_rate": 3.6075452184433867e-06, "loss": 0.5318, "step": 4859 }, { "epoch": 2.361823708206687, "grad_norm": 0.07117204860689534, "learning_rate": 3.6057063958870604e-06, "loss": 0.5186, "step": 4860 }, { "epoch": 2.362310030395137, "grad_norm": 0.07099939836852176, "learning_rate": 3.6038677777880564e-06, "loss": 0.5298, "step": 4861 }, { "epoch": 2.3627963525835867, "grad_norm": 0.07109218678986207, "learning_rate": 3.6020293644159887e-06, "loss": 0.5181, "step": 4862 }, { "epoch": 2.3632826747720364, "grad_norm": 0.0728788035204336, "learning_rate": 3.6001911560404403e-06, "loss": 0.5518, "step": 4863 }, { "epoch": 2.3637689969604865, "grad_norm": 0.06970638306718911, "learning_rate": 3.5983531529309625e-06, "loss": 0.5256, "step": 4864 }, { "epoch": 2.364255319148936, "grad_norm": 0.0708632735719275, "learning_rate": 3.5965153553570774e-06, "loss": 0.5184, "step": 4865 }, { "epoch": 2.364741641337386, "grad_norm": 0.07267390571201542, "learning_rate": 3.594677763588279e-06, "loss": 0.5231, "step": 4866 }, { "epoch": 2.365227963525836, "grad_norm": 0.07136855787566969, "learning_rate": 3.592840377894028e-06, "loss": 0.5176, "step": 4867 }, { "epoch": 2.3657142857142857, "grad_norm": 0.07032806839767311, "learning_rate": 3.5910031985437553e-06, "loss": 0.5312, "step": 4868 }, { "epoch": 2.3662006079027353, "grad_norm": 0.07234474630538439, "learning_rate": 3.589166225806865e-06, "loss": 0.5298, "step": 4869 }, { "epoch": 2.3666869300911855, "grad_norm": 0.07240200475705828, "learning_rate": 3.5873294599527255e-06, "loss": 0.5472, "step": 4870 }, { "epoch": 2.367173252279635, "grad_norm": 0.07406665008836129, "learning_rate": 3.5854929012506788e-06, "loss": 0.5362, "step": 4871 }, { "epoch": 2.3676595744680853, "grad_norm": 0.06955507412384188, "learning_rate": 3.5836565499700348e-06, "loss": 0.494, "step": 4872 }, { "epoch": 2.368145896656535, "grad_norm": 0.07437143351671988, "learning_rate": 3.581820406380075e-06, "loss": 0.5387, "step": 4873 }, { "epoch": 2.3686322188449846, "grad_norm": 0.07197375685166531, "learning_rate": 3.5799844707500475e-06, "loss": 0.5355, "step": 4874 }, { "epoch": 2.3691185410334348, "grad_norm": 0.07343538810466457, "learning_rate": 3.5781487433491724e-06, "loss": 0.501, "step": 4875 }, { "epoch": 2.3696048632218845, "grad_norm": 0.07493330334459582, "learning_rate": 3.5763132244466363e-06, "loss": 0.5111, "step": 4876 }, { "epoch": 2.3700911854103346, "grad_norm": 0.07184650306558257, "learning_rate": 3.5744779143116005e-06, "loss": 0.5146, "step": 4877 }, { "epoch": 2.3705775075987843, "grad_norm": 0.07148059921025327, "learning_rate": 3.5726428132131902e-06, "loss": 0.5367, "step": 4878 }, { "epoch": 2.371063829787234, "grad_norm": 0.07157251056385625, "learning_rate": 3.5708079214205027e-06, "loss": 0.5399, "step": 4879 }, { "epoch": 2.371550151975684, "grad_norm": 0.07086216464861735, "learning_rate": 3.5689732392026044e-06, "loss": 0.5176, "step": 4880 }, { "epoch": 2.3720364741641338, "grad_norm": 0.07354005094734528, "learning_rate": 3.5671387668285294e-06, "loss": 0.5609, "step": 4881 }, { "epoch": 2.3725227963525835, "grad_norm": 0.07361620052596375, "learning_rate": 3.565304504567284e-06, "loss": 0.5239, "step": 4882 }, { "epoch": 2.3730091185410336, "grad_norm": 0.07340444972490866, "learning_rate": 3.5634704526878405e-06, "loss": 0.5837, "step": 4883 }, { "epoch": 2.3734954407294833, "grad_norm": 0.071420407144292, "learning_rate": 3.561636611459143e-06, "loss": 0.5227, "step": 4884 }, { "epoch": 2.373981762917933, "grad_norm": 0.07335219183256791, "learning_rate": 3.559802981150102e-06, "loss": 0.5083, "step": 4885 }, { "epoch": 2.374468085106383, "grad_norm": 0.07195634767795334, "learning_rate": 3.557969562029599e-06, "loss": 0.5199, "step": 4886 }, { "epoch": 2.3749544072948328, "grad_norm": 0.07223007961779847, "learning_rate": 3.5561363543664846e-06, "loss": 0.5431, "step": 4887 }, { "epoch": 2.375440729483283, "grad_norm": 0.07330084517468019, "learning_rate": 3.5543033584295775e-06, "loss": 0.5466, "step": 4888 }, { "epoch": 2.3759270516717326, "grad_norm": 0.0713798829929337, "learning_rate": 3.5524705744876666e-06, "loss": 0.5222, "step": 4889 }, { "epoch": 2.3764133738601823, "grad_norm": 0.07173041515811288, "learning_rate": 3.550638002809507e-06, "loss": 0.5019, "step": 4890 }, { "epoch": 2.3768996960486324, "grad_norm": 0.070172140030036, "learning_rate": 3.548805643663826e-06, "loss": 0.4846, "step": 4891 }, { "epoch": 2.377386018237082, "grad_norm": 0.0737277339039337, "learning_rate": 3.546973497319319e-06, "loss": 0.5242, "step": 4892 }, { "epoch": 2.3778723404255317, "grad_norm": 0.07381191572837867, "learning_rate": 3.5451415640446485e-06, "loss": 0.5796, "step": 4893 }, { "epoch": 2.378358662613982, "grad_norm": 0.07046316987745402, "learning_rate": 3.543309844108444e-06, "loss": 0.5197, "step": 4894 }, { "epoch": 2.3788449848024316, "grad_norm": 0.07114823390414793, "learning_rate": 3.5414783377793105e-06, "loss": 0.5134, "step": 4895 }, { "epoch": 2.3793313069908812, "grad_norm": 0.07696036127901158, "learning_rate": 3.539647045325817e-06, "loss": 0.5797, "step": 4896 }, { "epoch": 2.3798176291793314, "grad_norm": 0.07314560125293358, "learning_rate": 3.5378159670165e-06, "loss": 0.5219, "step": 4897 }, { "epoch": 2.380303951367781, "grad_norm": 0.072809667254612, "learning_rate": 3.5359851031198687e-06, "loss": 0.5237, "step": 4898 }, { "epoch": 2.380790273556231, "grad_norm": 0.0718829561423349, "learning_rate": 3.534154453904396e-06, "loss": 0.5233, "step": 4899 }, { "epoch": 2.381276595744681, "grad_norm": 0.0725935395427643, "learning_rate": 3.5323240196385265e-06, "loss": 0.5416, "step": 4900 }, { "epoch": 2.3817629179331306, "grad_norm": 0.0703349844653874, "learning_rate": 3.530493800590674e-06, "loss": 0.5255, "step": 4901 }, { "epoch": 2.3822492401215807, "grad_norm": 0.07275114629535887, "learning_rate": 3.5286637970292176e-06, "loss": 0.5193, "step": 4902 }, { "epoch": 2.3827355623100304, "grad_norm": 0.07205386414748965, "learning_rate": 3.5268340092225074e-06, "loss": 0.5229, "step": 4903 }, { "epoch": 2.3832218844984805, "grad_norm": 0.07009430414343244, "learning_rate": 3.5250044374388605e-06, "loss": 0.4847, "step": 4904 }, { "epoch": 2.38370820668693, "grad_norm": 0.0718443944175079, "learning_rate": 3.5231750819465633e-06, "loss": 0.4914, "step": 4905 }, { "epoch": 2.38419452887538, "grad_norm": 0.07108323576989391, "learning_rate": 3.5213459430138697e-06, "loss": 0.523, "step": 4906 }, { "epoch": 2.3846808510638295, "grad_norm": 0.07208729365718768, "learning_rate": 3.5195170209090026e-06, "loss": 0.5257, "step": 4907 }, { "epoch": 2.3851671732522797, "grad_norm": 0.07269320932049202, "learning_rate": 3.5176883159001536e-06, "loss": 0.5287, "step": 4908 }, { "epoch": 2.3856534954407294, "grad_norm": 0.07244691989592308, "learning_rate": 3.515859828255479e-06, "loss": 0.5737, "step": 4909 }, { "epoch": 2.3861398176291795, "grad_norm": 0.07276605958379954, "learning_rate": 3.5140315582431074e-06, "loss": 0.4934, "step": 4910 }, { "epoch": 2.386626139817629, "grad_norm": 0.06961523820488281, "learning_rate": 3.512203506131133e-06, "loss": 0.5142, "step": 4911 }, { "epoch": 2.387112462006079, "grad_norm": 0.07414438917471289, "learning_rate": 3.510375672187617e-06, "loss": 0.5415, "step": 4912 }, { "epoch": 2.387598784194529, "grad_norm": 0.07097672058797641, "learning_rate": 3.5085480566805963e-06, "loss": 0.5366, "step": 4913 }, { "epoch": 2.3880851063829787, "grad_norm": 0.07018587035995108, "learning_rate": 3.5067206598780656e-06, "loss": 0.4792, "step": 4914 }, { "epoch": 2.388571428571429, "grad_norm": 0.07188431897401468, "learning_rate": 3.504893482047993e-06, "loss": 0.5199, "step": 4915 }, { "epoch": 2.3890577507598785, "grad_norm": 0.07393089822267285, "learning_rate": 3.503066523458313e-06, "loss": 0.5482, "step": 4916 }, { "epoch": 2.389544072948328, "grad_norm": 0.0788267512243813, "learning_rate": 3.5012397843769287e-06, "loss": 0.5528, "step": 4917 }, { "epoch": 2.3900303951367783, "grad_norm": 0.07172982882729341, "learning_rate": 3.4994132650717107e-06, "loss": 0.52, "step": 4918 }, { "epoch": 2.390516717325228, "grad_norm": 0.07119375132893335, "learning_rate": 3.4975869658104964e-06, "loss": 0.5413, "step": 4919 }, { "epoch": 2.3910030395136777, "grad_norm": 0.07489557955345133, "learning_rate": 3.495760886861093e-06, "loss": 0.5593, "step": 4920 }, { "epoch": 2.391489361702128, "grad_norm": 0.07352003216737586, "learning_rate": 3.4939350284912737e-06, "loss": 0.5026, "step": 4921 }, { "epoch": 2.3919756838905775, "grad_norm": 0.07409139572085124, "learning_rate": 3.4921093909687808e-06, "loss": 0.5302, "step": 4922 }, { "epoch": 2.392462006079027, "grad_norm": 0.07052214217858034, "learning_rate": 3.490283974561322e-06, "loss": 0.5108, "step": 4923 }, { "epoch": 2.3929483282674773, "grad_norm": 0.08132424318787977, "learning_rate": 3.4884587795365744e-06, "loss": 0.5927, "step": 4924 }, { "epoch": 2.393434650455927, "grad_norm": 0.07116737077954373, "learning_rate": 3.486633806162181e-06, "loss": 0.4948, "step": 4925 }, { "epoch": 2.393920972644377, "grad_norm": 0.07142369209504221, "learning_rate": 3.4848090547057556e-06, "loss": 0.5379, "step": 4926 }, { "epoch": 2.3944072948328268, "grad_norm": 0.0719390472744797, "learning_rate": 3.482984525434876e-06, "loss": 0.4995, "step": 4927 }, { "epoch": 2.3948936170212765, "grad_norm": 0.07081053458427171, "learning_rate": 3.4811602186170886e-06, "loss": 0.5295, "step": 4928 }, { "epoch": 2.3953799392097266, "grad_norm": 0.07064728634122945, "learning_rate": 3.4793361345199074e-06, "loss": 0.518, "step": 4929 }, { "epoch": 2.3958662613981763, "grad_norm": 0.06928785824615319, "learning_rate": 3.4775122734108128e-06, "loss": 0.5024, "step": 4930 }, { "epoch": 2.396352583586626, "grad_norm": 0.07401725184601242, "learning_rate": 3.475688635557256e-06, "loss": 0.5738, "step": 4931 }, { "epoch": 2.396838905775076, "grad_norm": 0.07304477812217713, "learning_rate": 3.4738652212266506e-06, "loss": 0.5237, "step": 4932 }, { "epoch": 2.3973252279635258, "grad_norm": 0.06998979315027283, "learning_rate": 3.47204203068638e-06, "loss": 0.5012, "step": 4933 }, { "epoch": 2.3978115501519754, "grad_norm": 0.07150652298146956, "learning_rate": 3.470219064203795e-06, "loss": 0.5106, "step": 4934 }, { "epoch": 2.3982978723404256, "grad_norm": 0.07192059209215543, "learning_rate": 3.4683963220462113e-06, "loss": 0.524, "step": 4935 }, { "epoch": 2.3987841945288753, "grad_norm": 0.07068771005942975, "learning_rate": 3.4665738044809155e-06, "loss": 0.5188, "step": 4936 }, { "epoch": 2.3992705167173254, "grad_norm": 0.07204779497860687, "learning_rate": 3.4647515117751586e-06, "loss": 0.556, "step": 4937 }, { "epoch": 2.399756838905775, "grad_norm": 0.06956834440228282, "learning_rate": 3.462929444196158e-06, "loss": 0.5242, "step": 4938 }, { "epoch": 2.4002431610942248, "grad_norm": 0.07463770657647964, "learning_rate": 3.4611076020110996e-06, "loss": 0.5506, "step": 4939 }, { "epoch": 2.400729483282675, "grad_norm": 0.07148630675309851, "learning_rate": 3.4592859854871362e-06, "loss": 0.5102, "step": 4940 }, { "epoch": 2.4012158054711246, "grad_norm": 0.07150810813784506, "learning_rate": 3.4574645948913866e-06, "loss": 0.5262, "step": 4941 }, { "epoch": 2.4017021276595747, "grad_norm": 0.07196031898359004, "learning_rate": 3.455643430490938e-06, "loss": 0.5232, "step": 4942 }, { "epoch": 2.4021884498480244, "grad_norm": 0.07150618340785458, "learning_rate": 3.453822492552843e-06, "loss": 0.5271, "step": 4943 }, { "epoch": 2.402674772036474, "grad_norm": 0.06959454167437171, "learning_rate": 3.452001781344121e-06, "loss": 0.5274, "step": 4944 }, { "epoch": 2.403161094224924, "grad_norm": 0.07749890209793345, "learning_rate": 3.4501812971317596e-06, "loss": 0.5579, "step": 4945 }, { "epoch": 2.403647416413374, "grad_norm": 0.06986649260752789, "learning_rate": 3.448361040182712e-06, "loss": 0.5274, "step": 4946 }, { "epoch": 2.4041337386018236, "grad_norm": 0.07553775535189526, "learning_rate": 3.4465410107638974e-06, "loss": 0.5021, "step": 4947 }, { "epoch": 2.4046200607902737, "grad_norm": 0.07298953312805401, "learning_rate": 3.444721209142201e-06, "loss": 0.5056, "step": 4948 }, { "epoch": 2.4051063829787234, "grad_norm": 0.07184657536314888, "learning_rate": 3.442901635584479e-06, "loss": 0.4956, "step": 4949 }, { "epoch": 2.405592705167173, "grad_norm": 0.0704641027560034, "learning_rate": 3.4410822903575516e-06, "loss": 0.4979, "step": 4950 }, { "epoch": 2.406079027355623, "grad_norm": 0.07006671870344676, "learning_rate": 3.4392631737282022e-06, "loss": 0.4962, "step": 4951 }, { "epoch": 2.406565349544073, "grad_norm": 0.07273848194328271, "learning_rate": 3.437444285963187e-06, "loss": 0.5265, "step": 4952 }, { "epoch": 2.407051671732523, "grad_norm": 0.07130091640710873, "learning_rate": 3.4356256273292215e-06, "loss": 0.5506, "step": 4953 }, { "epoch": 2.4075379939209727, "grad_norm": 0.07309498937330876, "learning_rate": 3.4338071980929933e-06, "loss": 0.5436, "step": 4954 }, { "epoch": 2.4080243161094224, "grad_norm": 0.07631879586998074, "learning_rate": 3.431988998521155e-06, "loss": 0.5264, "step": 4955 }, { "epoch": 2.4085106382978725, "grad_norm": 0.07013709319346881, "learning_rate": 3.430171028880323e-06, "loss": 0.4985, "step": 4956 }, { "epoch": 2.408996960486322, "grad_norm": 0.06959931044153714, "learning_rate": 3.428353289437084e-06, "loss": 0.5184, "step": 4957 }, { "epoch": 2.409483282674772, "grad_norm": 0.0719716497037636, "learning_rate": 3.426535780457987e-06, "loss": 0.5339, "step": 4958 }, { "epoch": 2.409969604863222, "grad_norm": 0.0695659600629915, "learning_rate": 3.424718502209551e-06, "loss": 0.4859, "step": 4959 }, { "epoch": 2.4104559270516717, "grad_norm": 0.07241069862462253, "learning_rate": 3.4229014549582567e-06, "loss": 0.5625, "step": 4960 }, { "epoch": 2.4109422492401213, "grad_norm": 0.07440275495385786, "learning_rate": 3.4210846389705567e-06, "loss": 0.5551, "step": 4961 }, { "epoch": 2.4114285714285715, "grad_norm": 0.07512386055939603, "learning_rate": 3.4192680545128636e-06, "loss": 0.5225, "step": 4962 }, { "epoch": 2.411914893617021, "grad_norm": 0.07366556776583193, "learning_rate": 3.4174517018515603e-06, "loss": 0.523, "step": 4963 }, { "epoch": 2.4124012158054713, "grad_norm": 0.07229718128615438, "learning_rate": 3.415635581252993e-06, "loss": 0.5142, "step": 4964 }, { "epoch": 2.412887537993921, "grad_norm": 0.07078196288655379, "learning_rate": 3.4138196929834765e-06, "loss": 0.5106, "step": 4965 }, { "epoch": 2.4133738601823707, "grad_norm": 0.07281030570025167, "learning_rate": 3.4120040373092876e-06, "loss": 0.5323, "step": 4966 }, { "epoch": 2.413860182370821, "grad_norm": 0.0727869988770298, "learning_rate": 3.4101886144966772e-06, "loss": 0.5096, "step": 4967 }, { "epoch": 2.4143465045592705, "grad_norm": 0.07109123264772012, "learning_rate": 3.4083734248118514e-06, "loss": 0.5496, "step": 4968 }, { "epoch": 2.4148328267477206, "grad_norm": 0.07372158280934651, "learning_rate": 3.4065584685209895e-06, "loss": 0.5315, "step": 4969 }, { "epoch": 2.4153191489361703, "grad_norm": 0.07052926961665368, "learning_rate": 3.4047437458902333e-06, "loss": 0.5259, "step": 4970 }, { "epoch": 2.41580547112462, "grad_norm": 0.07303967664621815, "learning_rate": 3.402929257185691e-06, "loss": 0.5662, "step": 4971 }, { "epoch": 2.41629179331307, "grad_norm": 0.0705263464176145, "learning_rate": 3.4011150026734373e-06, "loss": 0.5378, "step": 4972 }, { "epoch": 2.4167781155015198, "grad_norm": 0.07115516616977438, "learning_rate": 3.3993009826195116e-06, "loss": 0.5499, "step": 4973 }, { "epoch": 2.4172644376899695, "grad_norm": 0.073584321003851, "learning_rate": 3.3974871972899204e-06, "loss": 0.5768, "step": 4974 }, { "epoch": 2.4177507598784196, "grad_norm": 0.07267100038340704, "learning_rate": 3.3956736469506334e-06, "loss": 0.5467, "step": 4975 }, { "epoch": 2.4182370820668693, "grad_norm": 0.06841099298058952, "learning_rate": 3.3938603318675888e-06, "loss": 0.4699, "step": 4976 }, { "epoch": 2.418723404255319, "grad_norm": 0.07093807119191846, "learning_rate": 3.392047252306687e-06, "loss": 0.5135, "step": 4977 }, { "epoch": 2.419209726443769, "grad_norm": 0.07208415886449852, "learning_rate": 3.3902344085337956e-06, "loss": 0.5242, "step": 4978 }, { "epoch": 2.4196960486322188, "grad_norm": 0.07117623067278844, "learning_rate": 3.3884218008147486e-06, "loss": 0.5066, "step": 4979 }, { "epoch": 2.420182370820669, "grad_norm": 0.07269038638145053, "learning_rate": 3.3866094294153436e-06, "loss": 0.5248, "step": 4980 }, { "epoch": 2.4206686930091186, "grad_norm": 0.07631746466793166, "learning_rate": 3.384797294601344e-06, "loss": 0.5348, "step": 4981 }, { "epoch": 2.4211550151975683, "grad_norm": 0.07027250038154988, "learning_rate": 3.3829853966384803e-06, "loss": 0.5232, "step": 4982 }, { "epoch": 2.4216413373860184, "grad_norm": 0.06996966005597284, "learning_rate": 3.381173735792445e-06, "loss": 0.5241, "step": 4983 }, { "epoch": 2.422127659574468, "grad_norm": 0.07012278863799731, "learning_rate": 3.379362312328899e-06, "loss": 0.5093, "step": 4984 }, { "epoch": 2.4226139817629178, "grad_norm": 0.06935591945197173, "learning_rate": 3.3775511265134646e-06, "loss": 0.4929, "step": 4985 }, { "epoch": 2.423100303951368, "grad_norm": 0.07399464231282409, "learning_rate": 3.375740178611735e-06, "loss": 0.5702, "step": 4986 }, { "epoch": 2.4235866261398176, "grad_norm": 0.07498828428271959, "learning_rate": 3.3739294688892632e-06, "loss": 0.5202, "step": 4987 }, { "epoch": 2.4240729483282673, "grad_norm": 0.072819413162072, "learning_rate": 3.3721189976115693e-06, "loss": 0.5192, "step": 4988 }, { "epoch": 2.4245592705167174, "grad_norm": 0.07150380805813271, "learning_rate": 3.370308765044139e-06, "loss": 0.5245, "step": 4989 }, { "epoch": 2.425045592705167, "grad_norm": 0.07105783789227622, "learning_rate": 3.368498771452422e-06, "loss": 0.5448, "step": 4990 }, { "epoch": 2.425531914893617, "grad_norm": 0.07133550615854353, "learning_rate": 3.366689017101834e-06, "loss": 0.5342, "step": 4991 }, { "epoch": 2.426018237082067, "grad_norm": 0.07275842003869201, "learning_rate": 3.364879502257753e-06, "loss": 0.5638, "step": 4992 }, { "epoch": 2.4265045592705166, "grad_norm": 0.072649123807271, "learning_rate": 3.3630702271855253e-06, "loss": 0.5288, "step": 4993 }, { "epoch": 2.4269908814589667, "grad_norm": 0.07027449399870422, "learning_rate": 3.36126119215046e-06, "loss": 0.5316, "step": 4994 }, { "epoch": 2.4274772036474164, "grad_norm": 0.07371908724534923, "learning_rate": 3.359452397417832e-06, "loss": 0.525, "step": 4995 }, { "epoch": 2.4279635258358665, "grad_norm": 0.07297046365843625, "learning_rate": 3.35764384325288e-06, "loss": 0.5412, "step": 4996 }, { "epoch": 2.428449848024316, "grad_norm": 0.07071125744170881, "learning_rate": 3.355835529920808e-06, "loss": 0.5394, "step": 4997 }, { "epoch": 2.428936170212766, "grad_norm": 0.07198964939283464, "learning_rate": 3.3540274576867853e-06, "loss": 0.5462, "step": 4998 }, { "epoch": 2.429422492401216, "grad_norm": 0.07044944977639393, "learning_rate": 3.3522196268159444e-06, "loss": 0.5286, "step": 4999 }, { "epoch": 2.4299088145896657, "grad_norm": 0.07203293701028296, "learning_rate": 3.350412037573385e-06, "loss": 0.5473, "step": 5000 }, { "epoch": 2.4303951367781154, "grad_norm": 0.07302044938806027, "learning_rate": 3.3486046902241663e-06, "loss": 0.5592, "step": 5001 }, { "epoch": 2.4308814589665655, "grad_norm": 0.07182251391475623, "learning_rate": 3.3467975850333167e-06, "loss": 0.5367, "step": 5002 }, { "epoch": 2.431367781155015, "grad_norm": 0.07281607973738147, "learning_rate": 3.3449907222658266e-06, "loss": 0.5404, "step": 5003 }, { "epoch": 2.431854103343465, "grad_norm": 0.07243085781205838, "learning_rate": 3.3431841021866553e-06, "loss": 0.5515, "step": 5004 }, { "epoch": 2.432340425531915, "grad_norm": 0.07008303138680833, "learning_rate": 3.3413777250607215e-06, "loss": 0.5287, "step": 5005 }, { "epoch": 2.4328267477203647, "grad_norm": 0.07354579843332798, "learning_rate": 3.3395715911529087e-06, "loss": 0.5625, "step": 5006 }, { "epoch": 2.433313069908815, "grad_norm": 0.07262924032431435, "learning_rate": 3.337765700728066e-06, "loss": 0.5351, "step": 5007 }, { "epoch": 2.4337993920972645, "grad_norm": 0.0706884573910302, "learning_rate": 3.3359600540510084e-06, "loss": 0.5038, "step": 5008 }, { "epoch": 2.434285714285714, "grad_norm": 0.07335802698683956, "learning_rate": 3.334154651386512e-06, "loss": 0.5349, "step": 5009 }, { "epoch": 2.4347720364741643, "grad_norm": 0.0736114455498874, "learning_rate": 3.3323494929993187e-06, "loss": 0.5226, "step": 5010 }, { "epoch": 2.435258358662614, "grad_norm": 0.07106527596001265, "learning_rate": 3.330544579154135e-06, "loss": 0.5295, "step": 5011 }, { "epoch": 2.4357446808510637, "grad_norm": 0.07007541160356795, "learning_rate": 3.3287399101156316e-06, "loss": 0.5093, "step": 5012 }, { "epoch": 2.436231003039514, "grad_norm": 0.071585998365469, "learning_rate": 3.326935486148441e-06, "loss": 0.5345, "step": 5013 }, { "epoch": 2.4367173252279635, "grad_norm": 0.07211180696172437, "learning_rate": 3.325131307517163e-06, "loss": 0.4913, "step": 5014 }, { "epoch": 2.437203647416413, "grad_norm": 0.07239889854364302, "learning_rate": 3.3233273744863604e-06, "loss": 0.5672, "step": 5015 }, { "epoch": 2.4376899696048633, "grad_norm": 0.0743708532124169, "learning_rate": 3.321523687320557e-06, "loss": 0.5348, "step": 5016 }, { "epoch": 2.438176291793313, "grad_norm": 0.06834174755171417, "learning_rate": 3.319720246284245e-06, "loss": 0.5057, "step": 5017 }, { "epoch": 2.438662613981763, "grad_norm": 0.07168753302618132, "learning_rate": 3.3179170516418766e-06, "loss": 0.5023, "step": 5018 }, { "epoch": 2.439148936170213, "grad_norm": 0.07021648883197688, "learning_rate": 3.316114103657873e-06, "loss": 0.5042, "step": 5019 }, { "epoch": 2.4396352583586625, "grad_norm": 0.07298538780128841, "learning_rate": 3.314311402596614e-06, "loss": 0.5258, "step": 5020 }, { "epoch": 2.4401215805471126, "grad_norm": 0.07083119018124148, "learning_rate": 3.3125089487224436e-06, "loss": 0.4902, "step": 5021 }, { "epoch": 2.4406079027355623, "grad_norm": 0.073575205324048, "learning_rate": 3.310706742299675e-06, "loss": 0.5171, "step": 5022 }, { "epoch": 2.4410942249240124, "grad_norm": 0.07271595891577688, "learning_rate": 3.308904783592579e-06, "loss": 0.5509, "step": 5023 }, { "epoch": 2.441580547112462, "grad_norm": 0.07343510789440008, "learning_rate": 3.307103072865393e-06, "loss": 0.5367, "step": 5024 }, { "epoch": 2.4420668693009118, "grad_norm": 0.06847101097932697, "learning_rate": 3.3053016103823177e-06, "loss": 0.5095, "step": 5025 }, { "epoch": 2.4425531914893615, "grad_norm": 0.0714349311736113, "learning_rate": 3.3035003964075164e-06, "loss": 0.5518, "step": 5026 }, { "epoch": 2.4430395136778116, "grad_norm": 0.07292549049261239, "learning_rate": 3.3016994312051165e-06, "loss": 0.5505, "step": 5027 }, { "epoch": 2.4435258358662613, "grad_norm": 0.0715769313065459, "learning_rate": 3.2998987150392105e-06, "loss": 0.5145, "step": 5028 }, { "epoch": 2.4440121580547114, "grad_norm": 0.07106215780346531, "learning_rate": 3.298098248173852e-06, "loss": 0.4977, "step": 5029 }, { "epoch": 2.444498480243161, "grad_norm": 0.0719648408348036, "learning_rate": 3.2962980308730584e-06, "loss": 0.5354, "step": 5030 }, { "epoch": 2.4449848024316108, "grad_norm": 0.07383354532637519, "learning_rate": 3.2944980634008116e-06, "loss": 0.5278, "step": 5031 }, { "epoch": 2.445471124620061, "grad_norm": 0.0681992386879797, "learning_rate": 3.2926983460210564e-06, "loss": 0.4934, "step": 5032 }, { "epoch": 2.4459574468085106, "grad_norm": 0.07072753816425954, "learning_rate": 3.2908988789977015e-06, "loss": 0.4894, "step": 5033 }, { "epoch": 2.4464437689969607, "grad_norm": 0.07487692419696054, "learning_rate": 3.2890996625946182e-06, "loss": 0.5401, "step": 5034 }, { "epoch": 2.4469300911854104, "grad_norm": 0.07705752697122643, "learning_rate": 3.2873006970756398e-06, "loss": 0.5424, "step": 5035 }, { "epoch": 2.44741641337386, "grad_norm": 0.07552299366795687, "learning_rate": 3.2855019827045657e-06, "loss": 0.5661, "step": 5036 }, { "epoch": 2.44790273556231, "grad_norm": 0.07102483254120338, "learning_rate": 3.2837035197451562e-06, "loss": 0.5162, "step": 5037 }, { "epoch": 2.44838905775076, "grad_norm": 0.07024705567037107, "learning_rate": 3.2819053084611362e-06, "loss": 0.5253, "step": 5038 }, { "epoch": 2.4488753799392096, "grad_norm": 0.07306052835846437, "learning_rate": 3.280107349116191e-06, "loss": 0.5271, "step": 5039 }, { "epoch": 2.4493617021276597, "grad_norm": 0.07307510661801636, "learning_rate": 3.2783096419739737e-06, "loss": 0.5195, "step": 5040 }, { "epoch": 2.4498480243161094, "grad_norm": 0.0752984637950309, "learning_rate": 3.2765121872980965e-06, "loss": 0.568, "step": 5041 }, { "epoch": 2.450334346504559, "grad_norm": 0.07346277918700476, "learning_rate": 3.2747149853521347e-06, "loss": 0.5533, "step": 5042 }, { "epoch": 2.450820668693009, "grad_norm": 0.07267036618745515, "learning_rate": 3.2729180363996295e-06, "loss": 0.5318, "step": 5043 }, { "epoch": 2.451306990881459, "grad_norm": 0.0710627941839871, "learning_rate": 3.271121340704082e-06, "loss": 0.515, "step": 5044 }, { "epoch": 2.451793313069909, "grad_norm": 0.07276987587388119, "learning_rate": 3.269324898528956e-06, "loss": 0.552, "step": 5045 }, { "epoch": 2.4522796352583587, "grad_norm": 0.06922275148956468, "learning_rate": 3.2675287101376816e-06, "loss": 0.5152, "step": 5046 }, { "epoch": 2.4527659574468084, "grad_norm": 0.07359596710512181, "learning_rate": 3.2657327757936473e-06, "loss": 0.5498, "step": 5047 }, { "epoch": 2.4532522796352585, "grad_norm": 0.07195999137370987, "learning_rate": 3.263937095760208e-06, "loss": 0.565, "step": 5048 }, { "epoch": 2.453738601823708, "grad_norm": 0.07642688500893463, "learning_rate": 3.262141670300679e-06, "loss": 0.5306, "step": 5049 }, { "epoch": 2.4542249240121583, "grad_norm": 0.0726630718418881, "learning_rate": 3.26034649967834e-06, "loss": 0.5133, "step": 5050 }, { "epoch": 2.454711246200608, "grad_norm": 0.07331577238739899, "learning_rate": 3.258551584156432e-06, "loss": 0.5264, "step": 5051 }, { "epoch": 2.4551975683890577, "grad_norm": 0.06953850791873235, "learning_rate": 3.2567569239981576e-06, "loss": 0.5223, "step": 5052 }, { "epoch": 2.4556838905775074, "grad_norm": 0.06972153226455745, "learning_rate": 3.254962519466686e-06, "loss": 0.5019, "step": 5053 }, { "epoch": 2.4561702127659575, "grad_norm": 0.0724929681390814, "learning_rate": 3.2531683708251438e-06, "loss": 0.5096, "step": 5054 }, { "epoch": 2.456656534954407, "grad_norm": 0.07122869359278075, "learning_rate": 3.251374478336623e-06, "loss": 0.5232, "step": 5055 }, { "epoch": 2.4571428571428573, "grad_norm": 0.0729856920840314, "learning_rate": 3.2495808422641785e-06, "loss": 0.5525, "step": 5056 }, { "epoch": 2.457629179331307, "grad_norm": 0.07464012492086165, "learning_rate": 3.247787462870824e-06, "loss": 0.494, "step": 5057 }, { "epoch": 2.4581155015197567, "grad_norm": 0.07161393532190671, "learning_rate": 3.2459943404195428e-06, "loss": 0.5134, "step": 5058 }, { "epoch": 2.458601823708207, "grad_norm": 0.07764407501003323, "learning_rate": 3.2442014751732735e-06, "loss": 0.6107, "step": 5059 }, { "epoch": 2.4590881458966565, "grad_norm": 0.07226747955452442, "learning_rate": 3.2424088673949195e-06, "loss": 0.529, "step": 5060 }, { "epoch": 2.4595744680851066, "grad_norm": 0.07234355836589856, "learning_rate": 3.240616517347346e-06, "loss": 0.5185, "step": 5061 }, { "epoch": 2.4600607902735563, "grad_norm": 0.07122928197953035, "learning_rate": 3.2388244252933802e-06, "loss": 0.5174, "step": 5062 }, { "epoch": 2.460547112462006, "grad_norm": 0.07105231715013642, "learning_rate": 3.237032591495814e-06, "loss": 0.5169, "step": 5063 }, { "epoch": 2.461033434650456, "grad_norm": 0.07324041315179579, "learning_rate": 3.235241016217398e-06, "loss": 0.536, "step": 5064 }, { "epoch": 2.461519756838906, "grad_norm": 0.07262187301769511, "learning_rate": 3.233449699720847e-06, "loss": 0.5509, "step": 5065 }, { "epoch": 2.4620060790273555, "grad_norm": 0.0718986128087145, "learning_rate": 3.231658642268837e-06, "loss": 0.5369, "step": 5066 }, { "epoch": 2.4624924012158056, "grad_norm": 0.07194898261899282, "learning_rate": 3.229867844124006e-06, "loss": 0.546, "step": 5067 }, { "epoch": 2.4629787234042553, "grad_norm": 0.0714987692882125, "learning_rate": 3.2280773055489563e-06, "loss": 0.5336, "step": 5068 }, { "epoch": 2.463465045592705, "grad_norm": 0.07049700257498381, "learning_rate": 3.2262870268062463e-06, "loss": 0.4797, "step": 5069 }, { "epoch": 2.463951367781155, "grad_norm": 0.07366350993223539, "learning_rate": 3.2244970081584027e-06, "loss": 0.5218, "step": 5070 }, { "epoch": 2.4644376899696048, "grad_norm": 0.07430582458980749, "learning_rate": 3.22270724986791e-06, "loss": 0.5368, "step": 5071 }, { "epoch": 2.464924012158055, "grad_norm": 0.07194807492022116, "learning_rate": 3.2209177521972168e-06, "loss": 0.528, "step": 5072 }, { "epoch": 2.4654103343465046, "grad_norm": 0.07572255353920243, "learning_rate": 3.219128515408733e-06, "loss": 0.5637, "step": 5073 }, { "epoch": 2.4658966565349543, "grad_norm": 0.07160160451857686, "learning_rate": 3.217339539764829e-06, "loss": 0.5682, "step": 5074 }, { "epoch": 2.4663829787234044, "grad_norm": 0.07560677368806716, "learning_rate": 3.215550825527836e-06, "loss": 0.5623, "step": 5075 }, { "epoch": 2.466869300911854, "grad_norm": 0.0695743038537317, "learning_rate": 3.2137623729600533e-06, "loss": 0.5156, "step": 5076 }, { "epoch": 2.4673556231003038, "grad_norm": 0.07234252817302601, "learning_rate": 3.211974182323733e-06, "loss": 0.5134, "step": 5077 }, { "epoch": 2.467841945288754, "grad_norm": 0.0698089297650444, "learning_rate": 3.2101862538810957e-06, "loss": 0.4844, "step": 5078 }, { "epoch": 2.4683282674772036, "grad_norm": 0.07153891775507754, "learning_rate": 3.208398587894319e-06, "loss": 0.5109, "step": 5079 }, { "epoch": 2.4688145896656533, "grad_norm": 0.07403346441706771, "learning_rate": 3.2066111846255443e-06, "loss": 0.5449, "step": 5080 }, { "epoch": 2.4693009118541034, "grad_norm": 0.07548148533572237, "learning_rate": 3.2048240443368745e-06, "loss": 0.5715, "step": 5081 }, { "epoch": 2.469787234042553, "grad_norm": 0.07185172430860484, "learning_rate": 3.2030371672903725e-06, "loss": 0.5198, "step": 5082 }, { "epoch": 2.470273556231003, "grad_norm": 0.07409698546697403, "learning_rate": 3.2012505537480655e-06, "loss": 0.5346, "step": 5083 }, { "epoch": 2.470759878419453, "grad_norm": 0.07106574003718143, "learning_rate": 3.199464203971938e-06, "loss": 0.5071, "step": 5084 }, { "epoch": 2.4712462006079026, "grad_norm": 0.07129559196503259, "learning_rate": 3.197678118223938e-06, "loss": 0.5567, "step": 5085 }, { "epoch": 2.4717325227963527, "grad_norm": 0.07257593060183752, "learning_rate": 3.1958922967659755e-06, "loss": 0.5357, "step": 5086 }, { "epoch": 2.4722188449848024, "grad_norm": 0.0709990155023218, "learning_rate": 3.19410673985992e-06, "loss": 0.5234, "step": 5087 }, { "epoch": 2.4727051671732525, "grad_norm": 0.07290586660681272, "learning_rate": 3.1923214477676044e-06, "loss": 0.5155, "step": 5088 }, { "epoch": 2.473191489361702, "grad_norm": 0.06971042862182966, "learning_rate": 3.190536420750821e-06, "loss": 0.4969, "step": 5089 }, { "epoch": 2.473677811550152, "grad_norm": 0.07188004896681896, "learning_rate": 3.1887516590713235e-06, "loss": 0.5263, "step": 5090 }, { "epoch": 2.474164133738602, "grad_norm": 0.07224013653999033, "learning_rate": 3.186967162990827e-06, "loss": 0.5217, "step": 5091 }, { "epoch": 2.4746504559270517, "grad_norm": 0.07357100791819154, "learning_rate": 3.185182932771009e-06, "loss": 0.5399, "step": 5092 }, { "epoch": 2.4751367781155014, "grad_norm": 0.06976210807632803, "learning_rate": 3.1833989686735046e-06, "loss": 0.5169, "step": 5093 }, { "epoch": 2.4756231003039515, "grad_norm": 0.07201308193067005, "learning_rate": 3.1816152709599097e-06, "loss": 0.5583, "step": 5094 }, { "epoch": 2.476109422492401, "grad_norm": 0.07089292305447724, "learning_rate": 3.179831839891788e-06, "loss": 0.5247, "step": 5095 }, { "epoch": 2.476595744680851, "grad_norm": 0.07262174947901902, "learning_rate": 3.178048675730659e-06, "loss": 0.5586, "step": 5096 }, { "epoch": 2.477082066869301, "grad_norm": 0.07037491773073425, "learning_rate": 3.1762657787380026e-06, "loss": 0.4807, "step": 5097 }, { "epoch": 2.4775683890577507, "grad_norm": 0.0678355792009076, "learning_rate": 3.1744831491752583e-06, "loss": 0.4794, "step": 5098 }, { "epoch": 2.478054711246201, "grad_norm": 0.0697889555240722, "learning_rate": 3.17270078730383e-06, "loss": 0.4906, "step": 5099 }, { "epoch": 2.4785410334346505, "grad_norm": 0.07174063352807762, "learning_rate": 3.170918693385081e-06, "loss": 0.5244, "step": 5100 }, { "epoch": 2.4790273556231, "grad_norm": 0.0751389700518392, "learning_rate": 3.169136867680336e-06, "loss": 0.5608, "step": 5101 }, { "epoch": 2.4795136778115503, "grad_norm": 0.07063891123769281, "learning_rate": 3.167355310450877e-06, "loss": 0.5075, "step": 5102 }, { "epoch": 2.48, "grad_norm": 0.07095625005602661, "learning_rate": 3.165574021957952e-06, "loss": 0.5066, "step": 5103 }, { "epoch": 2.4804863221884497, "grad_norm": 0.07270491841607786, "learning_rate": 3.1637930024627645e-06, "loss": 0.549, "step": 5104 }, { "epoch": 2.4809726443769, "grad_norm": 0.07046670502839882, "learning_rate": 3.1620122522264817e-06, "loss": 0.5219, "step": 5105 }, { "epoch": 2.4814589665653495, "grad_norm": 0.07050616523016418, "learning_rate": 3.160231771510231e-06, "loss": 0.5171, "step": 5106 }, { "epoch": 2.481945288753799, "grad_norm": 0.0775774417380877, "learning_rate": 3.1584515605750998e-06, "loss": 0.529, "step": 5107 }, { "epoch": 2.4824316109422493, "grad_norm": 0.07238201009234521, "learning_rate": 3.1566716196821333e-06, "loss": 0.5343, "step": 5108 }, { "epoch": 2.482917933130699, "grad_norm": 0.07084878986794341, "learning_rate": 3.1548919490923422e-06, "loss": 0.5585, "step": 5109 }, { "epoch": 2.483404255319149, "grad_norm": 0.07032651813525949, "learning_rate": 3.1531125490666946e-06, "loss": 0.5074, "step": 5110 }, { "epoch": 2.483890577507599, "grad_norm": 0.07263629183764513, "learning_rate": 3.1513334198661183e-06, "loss": 0.5074, "step": 5111 }, { "epoch": 2.4843768996960485, "grad_norm": 0.07250651862525727, "learning_rate": 3.149554561751502e-06, "loss": 0.5153, "step": 5112 }, { "epoch": 2.4848632218844986, "grad_norm": 0.07009165594868517, "learning_rate": 3.1477759749836967e-06, "loss": 0.5261, "step": 5113 }, { "epoch": 2.4853495440729483, "grad_norm": 0.07289543070295175, "learning_rate": 3.145997659823512e-06, "loss": 0.5382, "step": 5114 }, { "epoch": 2.4858358662613984, "grad_norm": 0.07196283219115769, "learning_rate": 3.1442196165317164e-06, "loss": 0.5255, "step": 5115 }, { "epoch": 2.486322188449848, "grad_norm": 0.07583836895376056, "learning_rate": 3.1424418453690402e-06, "loss": 0.5672, "step": 5116 }, { "epoch": 2.4868085106382978, "grad_norm": 0.07547283790207375, "learning_rate": 3.140664346596174e-06, "loss": 0.571, "step": 5117 }, { "epoch": 2.487294832826748, "grad_norm": 0.07158727011845846, "learning_rate": 3.1388871204737663e-06, "loss": 0.5162, "step": 5118 }, { "epoch": 2.4877811550151976, "grad_norm": 0.07313306887019055, "learning_rate": 3.1371101672624283e-06, "loss": 0.5417, "step": 5119 }, { "epoch": 2.4882674772036473, "grad_norm": 0.07831223238599944, "learning_rate": 3.13533348722273e-06, "loss": 0.5313, "step": 5120 }, { "epoch": 2.4887537993920974, "grad_norm": 0.07192867093833415, "learning_rate": 3.1335570806152027e-06, "loss": 0.4988, "step": 5121 }, { "epoch": 2.489240121580547, "grad_norm": 0.0705930535697104, "learning_rate": 3.1317809477003326e-06, "loss": 0.531, "step": 5122 }, { "epoch": 2.4897264437689968, "grad_norm": 0.07185750539686564, "learning_rate": 3.130005088738572e-06, "loss": 0.5175, "step": 5123 }, { "epoch": 2.490212765957447, "grad_norm": 0.0711568250872684, "learning_rate": 3.1282295039903297e-06, "loss": 0.5242, "step": 5124 }, { "epoch": 2.4906990881458966, "grad_norm": 0.07012317643905235, "learning_rate": 3.126454193715975e-06, "loss": 0.5317, "step": 5125 }, { "epoch": 2.4911854103343467, "grad_norm": 0.07077478128219651, "learning_rate": 3.1246791581758384e-06, "loss": 0.5095, "step": 5126 }, { "epoch": 2.4916717325227964, "grad_norm": 0.07340973295579808, "learning_rate": 3.1229043976302064e-06, "loss": 0.5363, "step": 5127 }, { "epoch": 2.492158054711246, "grad_norm": 0.07394294213601862, "learning_rate": 3.1211299123393296e-06, "loss": 0.5164, "step": 5128 }, { "epoch": 2.492644376899696, "grad_norm": 0.07337476372527484, "learning_rate": 3.1193557025634147e-06, "loss": 0.554, "step": 5129 }, { "epoch": 2.493130699088146, "grad_norm": 0.23507972355249732, "learning_rate": 3.1175817685626285e-06, "loss": 0.5848, "step": 5130 }, { "epoch": 2.4936170212765956, "grad_norm": 0.07376429044732093, "learning_rate": 3.1158081105971018e-06, "loss": 0.5432, "step": 5131 }, { "epoch": 2.4941033434650457, "grad_norm": 0.0699875429290197, "learning_rate": 3.114034728926918e-06, "loss": 0.5211, "step": 5132 }, { "epoch": 2.4945896656534954, "grad_norm": 0.06980976826865469, "learning_rate": 3.112261623812125e-06, "loss": 0.5296, "step": 5133 }, { "epoch": 2.495075987841945, "grad_norm": 0.07405956990481313, "learning_rate": 3.1104887955127283e-06, "loss": 0.5402, "step": 5134 }, { "epoch": 2.495562310030395, "grad_norm": 0.07112345591580038, "learning_rate": 3.108716244288693e-06, "loss": 0.5428, "step": 5135 }, { "epoch": 2.496048632218845, "grad_norm": 0.07000413692993583, "learning_rate": 3.1069439703999447e-06, "loss": 0.5325, "step": 5136 }, { "epoch": 2.496534954407295, "grad_norm": 0.0736007009386916, "learning_rate": 3.1051719741063646e-06, "loss": 0.5433, "step": 5137 }, { "epoch": 2.4970212765957447, "grad_norm": 0.07117726808041258, "learning_rate": 3.103400255667798e-06, "loss": 0.5111, "step": 5138 }, { "epoch": 2.4975075987841944, "grad_norm": 0.07191517886552011, "learning_rate": 3.101628815344046e-06, "loss": 0.5238, "step": 5139 }, { "epoch": 2.4979939209726445, "grad_norm": 0.07020578696988963, "learning_rate": 3.099857653394871e-06, "loss": 0.4881, "step": 5140 }, { "epoch": 2.4979939209726445, "eval_loss": 0.5709736347198486, "eval_runtime": 104.9642, "eval_samples_per_second": 289.175, "eval_steps_per_second": 36.155, "step": 5140 }, { "epoch": 2.498480243161094, "grad_norm": 0.07165086577672265, "learning_rate": 3.098086770079993e-06, "loss": 0.5572, "step": 5141 }, { "epoch": 2.4989665653495443, "grad_norm": 0.07483984142746496, "learning_rate": 3.0963161656590933e-06, "loss": 0.5512, "step": 5142 }, { "epoch": 2.499452887537994, "grad_norm": 0.07300335321382202, "learning_rate": 3.0945458403918104e-06, "loss": 0.5444, "step": 5143 }, { "epoch": 2.4999392097264437, "grad_norm": 0.07364864241562977, "learning_rate": 3.0927757945377413e-06, "loss": 0.5603, "step": 5144 }, { "epoch": 2.5004255319148934, "grad_norm": 0.06895337202780867, "learning_rate": 3.0910060283564454e-06, "loss": 0.5116, "step": 5145 }, { "epoch": 2.5009118541033435, "grad_norm": 0.07236063519921059, "learning_rate": 3.0892365421074366e-06, "loss": 0.5501, "step": 5146 }, { "epoch": 2.501398176291793, "grad_norm": 0.07020073702375772, "learning_rate": 3.08746733605019e-06, "loss": 0.5065, "step": 5147 }, { "epoch": 2.5018844984802433, "grad_norm": 0.06964144865939198, "learning_rate": 3.085698410444139e-06, "loss": 0.514, "step": 5148 }, { "epoch": 2.502370820668693, "grad_norm": 0.07336464063880291, "learning_rate": 3.083929765548679e-06, "loss": 0.5401, "step": 5149 }, { "epoch": 2.5028571428571427, "grad_norm": 0.0711731885608878, "learning_rate": 3.0821614016231617e-06, "loss": 0.5333, "step": 5150 }, { "epoch": 2.503343465045593, "grad_norm": 0.07567293270713694, "learning_rate": 3.0803933189268966e-06, "loss": 0.5526, "step": 5151 }, { "epoch": 2.5038297872340425, "grad_norm": 0.07151002183351457, "learning_rate": 3.0786255177191515e-06, "loss": 0.5313, "step": 5152 }, { "epoch": 2.5043161094224926, "grad_norm": 0.0723448603993983, "learning_rate": 3.0768579982591557e-06, "loss": 0.5107, "step": 5153 }, { "epoch": 2.5048024316109423, "grad_norm": 0.07565431318969268, "learning_rate": 3.0750907608060954e-06, "loss": 0.5619, "step": 5154 }, { "epoch": 2.505288753799392, "grad_norm": 0.0691905260316451, "learning_rate": 3.0733238056191173e-06, "loss": 0.5236, "step": 5155 }, { "epoch": 2.505775075987842, "grad_norm": 0.07368013747856297, "learning_rate": 3.0715571329573233e-06, "loss": 0.5408, "step": 5156 }, { "epoch": 2.506261398176292, "grad_norm": 0.07311793676604153, "learning_rate": 3.0697907430797767e-06, "loss": 0.5229, "step": 5157 }, { "epoch": 2.506747720364742, "grad_norm": 0.0714406976803571, "learning_rate": 3.068024636245499e-06, "loss": 0.5179, "step": 5158 }, { "epoch": 2.5072340425531916, "grad_norm": 0.0741667379421227, "learning_rate": 3.0662588127134697e-06, "loss": 0.5521, "step": 5159 }, { "epoch": 2.5077203647416413, "grad_norm": 0.07061477567556528, "learning_rate": 3.0644932727426275e-06, "loss": 0.5081, "step": 5160 }, { "epoch": 2.508206686930091, "grad_norm": 0.0676538916570248, "learning_rate": 3.062728016591866e-06, "loss": 0.4511, "step": 5161 }, { "epoch": 2.508693009118541, "grad_norm": 0.06965509800177648, "learning_rate": 3.0609630445200424e-06, "loss": 0.5185, "step": 5162 }, { "epoch": 2.509179331306991, "grad_norm": 0.0736500319452885, "learning_rate": 3.0591983567859685e-06, "loss": 0.5337, "step": 5163 }, { "epoch": 2.509665653495441, "grad_norm": 0.073219446833232, "learning_rate": 3.0574339536484164e-06, "loss": 0.5016, "step": 5164 }, { "epoch": 2.5101519756838906, "grad_norm": 0.07152702686461844, "learning_rate": 3.055669835366116e-06, "loss": 0.5276, "step": 5165 }, { "epoch": 2.5106382978723403, "grad_norm": 0.07110473566386229, "learning_rate": 3.053906002197754e-06, "loss": 0.5638, "step": 5166 }, { "epoch": 2.5111246200607904, "grad_norm": 0.0762557165318456, "learning_rate": 3.0521424544019786e-06, "loss": 0.544, "step": 5167 }, { "epoch": 2.51161094224924, "grad_norm": 0.07305990167429371, "learning_rate": 3.050379192237393e-06, "loss": 0.5127, "step": 5168 }, { "epoch": 2.51209726443769, "grad_norm": 0.07305388570305144, "learning_rate": 3.048616215962558e-06, "loss": 0.5057, "step": 5169 }, { "epoch": 2.51258358662614, "grad_norm": 0.07425831814350997, "learning_rate": 3.0468535258359964e-06, "loss": 0.5756, "step": 5170 }, { "epoch": 2.5130699088145896, "grad_norm": 0.07036315899760799, "learning_rate": 3.045091122116186e-06, "loss": 0.5124, "step": 5171 }, { "epoch": 2.5135562310030393, "grad_norm": 0.07222052317843929, "learning_rate": 3.0433290050615626e-06, "loss": 0.5526, "step": 5172 }, { "epoch": 2.5140425531914894, "grad_norm": 0.07202971612489384, "learning_rate": 3.041567174930522e-06, "loss": 0.5125, "step": 5173 }, { "epoch": 2.514528875379939, "grad_norm": 0.07181682251503373, "learning_rate": 3.039805631981415e-06, "loss": 0.5183, "step": 5174 }, { "epoch": 2.515015197568389, "grad_norm": 0.0720441762663447, "learning_rate": 3.0380443764725538e-06, "loss": 0.5261, "step": 5175 }, { "epoch": 2.515501519756839, "grad_norm": 0.0717310784091408, "learning_rate": 3.036283408662204e-06, "loss": 0.5286, "step": 5176 }, { "epoch": 2.5159878419452886, "grad_norm": 0.0701282753128611, "learning_rate": 3.034522728808593e-06, "loss": 0.4995, "step": 5177 }, { "epoch": 2.5164741641337387, "grad_norm": 0.07066424607797053, "learning_rate": 3.0327623371699043e-06, "loss": 0.4916, "step": 5178 }, { "epoch": 2.5169604863221884, "grad_norm": 0.06821429406072016, "learning_rate": 3.0310022340042798e-06, "loss": 0.4884, "step": 5179 }, { "epoch": 2.5174468085106385, "grad_norm": 0.07183190513578791, "learning_rate": 3.0292424195698177e-06, "loss": 0.5251, "step": 5180 }, { "epoch": 2.517933130699088, "grad_norm": 0.07047535260710414, "learning_rate": 3.027482894124576e-06, "loss": 0.5182, "step": 5181 }, { "epoch": 2.518419452887538, "grad_norm": 0.07231648296593661, "learning_rate": 3.025723657926568e-06, "loss": 0.5106, "step": 5182 }, { "epoch": 2.518905775075988, "grad_norm": 0.07022519935915474, "learning_rate": 3.023964711233767e-06, "loss": 0.5275, "step": 5183 }, { "epoch": 2.5193920972644377, "grad_norm": 0.07116710643697498, "learning_rate": 3.0222060543040994e-06, "loss": 0.5058, "step": 5184 }, { "epoch": 2.5198784194528874, "grad_norm": 0.07033377823009221, "learning_rate": 3.0204476873954558e-06, "loss": 0.5085, "step": 5185 }, { "epoch": 2.5203647416413375, "grad_norm": 0.07219927495954807, "learning_rate": 3.0186896107656803e-06, "loss": 0.5413, "step": 5186 }, { "epoch": 2.520851063829787, "grad_norm": 0.06940063883794943, "learning_rate": 3.016931824672573e-06, "loss": 0.486, "step": 5187 }, { "epoch": 2.521337386018237, "grad_norm": 0.06906707175649249, "learning_rate": 3.0151743293738955e-06, "loss": 0.4855, "step": 5188 }, { "epoch": 2.521823708206687, "grad_norm": 0.07206245306632703, "learning_rate": 3.013417125127364e-06, "loss": 0.5128, "step": 5189 }, { "epoch": 2.5223100303951367, "grad_norm": 0.06889094833978955, "learning_rate": 3.0116602121906514e-06, "loss": 0.4675, "step": 5190 }, { "epoch": 2.522796352583587, "grad_norm": 0.07087814856013125, "learning_rate": 3.0099035908213893e-06, "loss": 0.5221, "step": 5191 }, { "epoch": 2.5232826747720365, "grad_norm": 0.07008707338481325, "learning_rate": 3.0081472612771656e-06, "loss": 0.51, "step": 5192 }, { "epoch": 2.523768996960486, "grad_norm": 0.07174946418666132, "learning_rate": 3.006391223815528e-06, "loss": 0.5337, "step": 5193 }, { "epoch": 2.5242553191489363, "grad_norm": 0.07031243831566994, "learning_rate": 3.0046354786939785e-06, "loss": 0.4829, "step": 5194 }, { "epoch": 2.524741641337386, "grad_norm": 0.0729406523193348, "learning_rate": 3.002880026169977e-06, "loss": 0.5347, "step": 5195 }, { "epoch": 2.525227963525836, "grad_norm": 0.07059223581186867, "learning_rate": 3.0011248665009405e-06, "loss": 0.532, "step": 5196 }, { "epoch": 2.525714285714286, "grad_norm": 0.07048365431127444, "learning_rate": 2.9993699999442445e-06, "loss": 0.5502, "step": 5197 }, { "epoch": 2.5262006079027355, "grad_norm": 0.06910190222350247, "learning_rate": 2.997615426757219e-06, "loss": 0.4988, "step": 5198 }, { "epoch": 2.526686930091185, "grad_norm": 0.0699236297396845, "learning_rate": 2.9958611471971534e-06, "loss": 0.5095, "step": 5199 }, { "epoch": 2.5271732522796353, "grad_norm": 0.0739257683961832, "learning_rate": 2.9941071615212906e-06, "loss": 0.5694, "step": 5200 }, { "epoch": 2.527659574468085, "grad_norm": 0.07325283319655512, "learning_rate": 2.992353469986835e-06, "loss": 0.5028, "step": 5201 }, { "epoch": 2.528145896656535, "grad_norm": 0.07148874722633088, "learning_rate": 2.990600072850942e-06, "loss": 0.5436, "step": 5202 }, { "epoch": 2.528632218844985, "grad_norm": 0.07341045758166169, "learning_rate": 2.9888469703707323e-06, "loss": 0.52, "step": 5203 }, { "epoch": 2.5291185410334345, "grad_norm": 0.07119609016675617, "learning_rate": 2.9870941628032777e-06, "loss": 0.5314, "step": 5204 }, { "epoch": 2.5296048632218846, "grad_norm": 0.07177544344758144, "learning_rate": 2.9853416504056044e-06, "loss": 0.5239, "step": 5205 }, { "epoch": 2.5300911854103343, "grad_norm": 0.06994400548437793, "learning_rate": 2.9835894334347005e-06, "loss": 0.4928, "step": 5206 }, { "epoch": 2.5305775075987844, "grad_norm": 0.06966879737529871, "learning_rate": 2.9818375121475084e-06, "loss": 0.4945, "step": 5207 }, { "epoch": 2.531063829787234, "grad_norm": 0.07279910295191842, "learning_rate": 2.9800858868009276e-06, "loss": 0.53, "step": 5208 }, { "epoch": 2.531550151975684, "grad_norm": 0.06929579028085348, "learning_rate": 2.978334557651813e-06, "loss": 0.484, "step": 5209 }, { "epoch": 2.5320364741641335, "grad_norm": 0.06713729823254085, "learning_rate": 2.9765835249569786e-06, "loss": 0.462, "step": 5210 }, { "epoch": 2.5325227963525836, "grad_norm": 0.07271017018802592, "learning_rate": 2.974832788973193e-06, "loss": 0.5683, "step": 5211 }, { "epoch": 2.5330091185410333, "grad_norm": 0.0731602019961289, "learning_rate": 2.973082349957181e-06, "loss": 0.5321, "step": 5212 }, { "epoch": 2.5334954407294834, "grad_norm": 0.07075667268472696, "learning_rate": 2.971332208165626e-06, "loss": 0.5406, "step": 5213 }, { "epoch": 2.533981762917933, "grad_norm": 0.06889726630509321, "learning_rate": 2.9695823638551657e-06, "loss": 0.4914, "step": 5214 }, { "epoch": 2.5344680851063828, "grad_norm": 0.0691887393959937, "learning_rate": 2.9678328172823937e-06, "loss": 0.4851, "step": 5215 }, { "epoch": 2.534954407294833, "grad_norm": 0.07046926930632237, "learning_rate": 2.966083568703863e-06, "loss": 0.519, "step": 5216 }, { "epoch": 2.5354407294832826, "grad_norm": 0.075046384901123, "learning_rate": 2.9643346183760802e-06, "loss": 0.5183, "step": 5217 }, { "epoch": 2.5359270516717327, "grad_norm": 0.0720481248942936, "learning_rate": 2.962585966555509e-06, "loss": 0.5493, "step": 5218 }, { "epoch": 2.5364133738601824, "grad_norm": 0.07397849605279953, "learning_rate": 2.9608376134985696e-06, "loss": 0.5349, "step": 5219 }, { "epoch": 2.536899696048632, "grad_norm": 0.07083021017899752, "learning_rate": 2.9590895594616377e-06, "loss": 0.5215, "step": 5220 }, { "epoch": 2.537386018237082, "grad_norm": 0.07066373777118727, "learning_rate": 2.9573418047010448e-06, "loss": 0.5084, "step": 5221 }, { "epoch": 2.537872340425532, "grad_norm": 0.07405362335264465, "learning_rate": 2.9555943494730817e-06, "loss": 0.5297, "step": 5222 }, { "epoch": 2.538358662613982, "grad_norm": 0.0723235318908062, "learning_rate": 2.953847194033991e-06, "loss": 0.5335, "step": 5223 }, { "epoch": 2.5388449848024317, "grad_norm": 0.07300873760833729, "learning_rate": 2.952100338639974e-06, "loss": 0.5366, "step": 5224 }, { "epoch": 2.5393313069908814, "grad_norm": 0.07227121490875922, "learning_rate": 2.950353783547187e-06, "loss": 0.5174, "step": 5225 }, { "epoch": 2.539817629179331, "grad_norm": 0.07166463649943024, "learning_rate": 2.948607529011742e-06, "loss": 0.5534, "step": 5226 }, { "epoch": 2.540303951367781, "grad_norm": 0.07041677367153859, "learning_rate": 2.946861575289708e-06, "loss": 0.5483, "step": 5227 }, { "epoch": 2.540790273556231, "grad_norm": 0.07382337984952471, "learning_rate": 2.9451159226371097e-06, "loss": 0.5383, "step": 5228 }, { "epoch": 2.541276595744681, "grad_norm": 0.07095447433106677, "learning_rate": 2.9433705713099257e-06, "loss": 0.4841, "step": 5229 }, { "epoch": 2.5417629179331307, "grad_norm": 0.070941641566057, "learning_rate": 2.941625521564093e-06, "loss": 0.516, "step": 5230 }, { "epoch": 2.5422492401215804, "grad_norm": 0.07234703568990339, "learning_rate": 2.9398807736555036e-06, "loss": 0.5462, "step": 5231 }, { "epoch": 2.5427355623100305, "grad_norm": 0.0718035031427715, "learning_rate": 2.9381363278400043e-06, "loss": 0.5041, "step": 5232 }, { "epoch": 2.54322188449848, "grad_norm": 0.07209680344069905, "learning_rate": 2.9363921843733984e-06, "loss": 0.5167, "step": 5233 }, { "epoch": 2.5437082066869303, "grad_norm": 0.07222340455173297, "learning_rate": 2.934648343511445e-06, "loss": 0.5595, "step": 5234 }, { "epoch": 2.54419452887538, "grad_norm": 0.06949006995013941, "learning_rate": 2.9329048055098582e-06, "loss": 0.4957, "step": 5235 }, { "epoch": 2.5446808510638297, "grad_norm": 0.07077812057498424, "learning_rate": 2.931161570624308e-06, "loss": 0.5455, "step": 5236 }, { "epoch": 2.5451671732522794, "grad_norm": 0.07297797733435306, "learning_rate": 2.929418639110422e-06, "loss": 0.5055, "step": 5237 }, { "epoch": 2.5456534954407295, "grad_norm": 0.07235705936738485, "learning_rate": 2.927676011223778e-06, "loss": 0.5184, "step": 5238 }, { "epoch": 2.546139817629179, "grad_norm": 0.07115646941972145, "learning_rate": 2.925933687219912e-06, "loss": 0.5192, "step": 5239 }, { "epoch": 2.5466261398176293, "grad_norm": 0.07222373189759254, "learning_rate": 2.92419166735432e-06, "loss": 0.5253, "step": 5240 }, { "epoch": 2.547112462006079, "grad_norm": 0.06844920517682934, "learning_rate": 2.922449951882448e-06, "loss": 0.4976, "step": 5241 }, { "epoch": 2.5475987841945287, "grad_norm": 0.07150832419015081, "learning_rate": 2.9207085410596987e-06, "loss": 0.5224, "step": 5242 }, { "epoch": 2.548085106382979, "grad_norm": 0.07065039976242223, "learning_rate": 2.91896743514143e-06, "loss": 0.5242, "step": 5243 }, { "epoch": 2.5485714285714285, "grad_norm": 0.07036292963992544, "learning_rate": 2.9172266343829547e-06, "loss": 0.4951, "step": 5244 }, { "epoch": 2.5490577507598786, "grad_norm": 0.07280445217666232, "learning_rate": 2.9154861390395417e-06, "loss": 0.5499, "step": 5245 }, { "epoch": 2.5495440729483283, "grad_norm": 0.0697516359103554, "learning_rate": 2.913745949366416e-06, "loss": 0.516, "step": 5246 }, { "epoch": 2.550030395136778, "grad_norm": 0.07252689662567713, "learning_rate": 2.9120060656187577e-06, "loss": 0.5202, "step": 5247 }, { "epoch": 2.550516717325228, "grad_norm": 0.0739677980914118, "learning_rate": 2.9102664880516973e-06, "loss": 0.5857, "step": 5248 }, { "epoch": 2.551003039513678, "grad_norm": 0.07195073968025892, "learning_rate": 2.908527216920325e-06, "loss": 0.5605, "step": 5249 }, { "epoch": 2.551489361702128, "grad_norm": 0.0709461915170774, "learning_rate": 2.906788252479687e-06, "loss": 0.5137, "step": 5250 }, { "epoch": 2.5519756838905776, "grad_norm": 0.07016900503196073, "learning_rate": 2.905049594984781e-06, "loss": 0.4832, "step": 5251 }, { "epoch": 2.5524620060790273, "grad_norm": 0.07461585214894023, "learning_rate": 2.903311244690563e-06, "loss": 0.5603, "step": 5252 }, { "epoch": 2.552948328267477, "grad_norm": 0.07006371556845854, "learning_rate": 2.9015732018519415e-06, "loss": 0.5042, "step": 5253 }, { "epoch": 2.553434650455927, "grad_norm": 0.07304062182928074, "learning_rate": 2.8998354667237806e-06, "loss": 0.5161, "step": 5254 }, { "epoch": 2.553920972644377, "grad_norm": 0.07115914863402885, "learning_rate": 2.898098039560899e-06, "loss": 0.508, "step": 5255 }, { "epoch": 2.554407294832827, "grad_norm": 0.07209164351203949, "learning_rate": 2.8963609206180715e-06, "loss": 0.4868, "step": 5256 }, { "epoch": 2.5548936170212766, "grad_norm": 0.07273737158722844, "learning_rate": 2.8946241101500246e-06, "loss": 0.5673, "step": 5257 }, { "epoch": 2.5553799392097263, "grad_norm": 0.07217837398100864, "learning_rate": 2.8928876084114456e-06, "loss": 0.5278, "step": 5258 }, { "epoch": 2.5558662613981764, "grad_norm": 0.07300754126510027, "learning_rate": 2.8911514156569715e-06, "loss": 0.5299, "step": 5259 }, { "epoch": 2.556352583586626, "grad_norm": 0.0729385912459572, "learning_rate": 2.8894155321411943e-06, "loss": 0.5378, "step": 5260 }, { "epoch": 2.556838905775076, "grad_norm": 0.06840634153916052, "learning_rate": 2.887679958118662e-06, "loss": 0.4847, "step": 5261 }, { "epoch": 2.557325227963526, "grad_norm": 0.0706326013417714, "learning_rate": 2.8859446938438794e-06, "loss": 0.5105, "step": 5262 }, { "epoch": 2.5578115501519756, "grad_norm": 0.07392191466607143, "learning_rate": 2.884209739571299e-06, "loss": 0.5421, "step": 5263 }, { "epoch": 2.5582978723404253, "grad_norm": 0.07040961638293448, "learning_rate": 2.8824750955553325e-06, "loss": 0.5472, "step": 5264 }, { "epoch": 2.5587841945288754, "grad_norm": 0.06971496028015046, "learning_rate": 2.880740762050348e-06, "loss": 0.5005, "step": 5265 }, { "epoch": 2.559270516717325, "grad_norm": 0.07072607532764644, "learning_rate": 2.8790067393106653e-06, "loss": 0.515, "step": 5266 }, { "epoch": 2.559756838905775, "grad_norm": 0.06917005729041993, "learning_rate": 2.877273027590558e-06, "loss": 0.5145, "step": 5267 }, { "epoch": 2.560243161094225, "grad_norm": 0.07141443837313326, "learning_rate": 2.875539627144257e-06, "loss": 0.5204, "step": 5268 }, { "epoch": 2.5607294832826746, "grad_norm": 0.06899865083882975, "learning_rate": 2.873806538225944e-06, "loss": 0.4919, "step": 5269 }, { "epoch": 2.5612158054711247, "grad_norm": 0.06975767819440655, "learning_rate": 2.8720737610897575e-06, "loss": 0.5132, "step": 5270 }, { "epoch": 2.5617021276595744, "grad_norm": 0.07172518728964794, "learning_rate": 2.8703412959897904e-06, "loss": 0.5255, "step": 5271 }, { "epoch": 2.5621884498480245, "grad_norm": 0.07076484580877153, "learning_rate": 2.8686091431800883e-06, "loss": 0.538, "step": 5272 }, { "epoch": 2.562674772036474, "grad_norm": 0.07069483007438665, "learning_rate": 2.8668773029146517e-06, "loss": 0.487, "step": 5273 }, { "epoch": 2.563161094224924, "grad_norm": 0.07152830371593032, "learning_rate": 2.8651457754474354e-06, "loss": 0.5172, "step": 5274 }, { "epoch": 2.563647416413374, "grad_norm": 0.07231745130793858, "learning_rate": 2.8634145610323462e-06, "loss": 0.5349, "step": 5275 }, { "epoch": 2.5641337386018237, "grad_norm": 0.06942002369005398, "learning_rate": 2.8616836599232513e-06, "loss": 0.5, "step": 5276 }, { "epoch": 2.564620060790274, "grad_norm": 0.07133104590296707, "learning_rate": 2.8599530723739673e-06, "loss": 0.5166, "step": 5277 }, { "epoch": 2.5651063829787235, "grad_norm": 0.07220081410227014, "learning_rate": 2.8582227986382617e-06, "loss": 0.5149, "step": 5278 }, { "epoch": 2.565592705167173, "grad_norm": 0.06859177438327475, "learning_rate": 2.8564928389698605e-06, "loss": 0.4893, "step": 5279 }, { "epoch": 2.566079027355623, "grad_norm": 0.06935779233292914, "learning_rate": 2.854763193622444e-06, "loss": 0.5109, "step": 5280 }, { "epoch": 2.566565349544073, "grad_norm": 0.07230111515732772, "learning_rate": 2.8530338628496433e-06, "loss": 0.5445, "step": 5281 }, { "epoch": 2.5670516717325227, "grad_norm": 0.07597840175779053, "learning_rate": 2.8513048469050476e-06, "loss": 0.5588, "step": 5282 }, { "epoch": 2.567537993920973, "grad_norm": 0.07151983171688847, "learning_rate": 2.8495761460421957e-06, "loss": 0.5148, "step": 5283 }, { "epoch": 2.5680243161094225, "grad_norm": 0.07337071758599584, "learning_rate": 2.8478477605145815e-06, "loss": 0.5389, "step": 5284 }, { "epoch": 2.568510638297872, "grad_norm": 0.07144312497699838, "learning_rate": 2.8461196905756544e-06, "loss": 0.5352, "step": 5285 }, { "epoch": 2.5689969604863223, "grad_norm": 0.07082174842796415, "learning_rate": 2.8443919364788157e-06, "loss": 0.4933, "step": 5286 }, { "epoch": 2.569483282674772, "grad_norm": 0.07244737093567413, "learning_rate": 2.842664498477421e-06, "loss": 0.5257, "step": 5287 }, { "epoch": 2.569969604863222, "grad_norm": 0.07415387531548687, "learning_rate": 2.8409373768247795e-06, "loss": 0.5698, "step": 5288 }, { "epoch": 2.570455927051672, "grad_norm": 0.07388906751027818, "learning_rate": 2.839210571774154e-06, "loss": 0.5524, "step": 5289 }, { "epoch": 2.5709422492401215, "grad_norm": 0.07262094743517403, "learning_rate": 2.837484083578761e-06, "loss": 0.5499, "step": 5290 }, { "epoch": 2.571428571428571, "grad_norm": 0.06943069103189393, "learning_rate": 2.8357579124917694e-06, "loss": 0.5061, "step": 5291 }, { "epoch": 2.5719148936170213, "grad_norm": 0.07203734129483275, "learning_rate": 2.834032058766304e-06, "loss": 0.5393, "step": 5292 }, { "epoch": 2.572401215805471, "grad_norm": 0.07310077793872213, "learning_rate": 2.83230652265544e-06, "loss": 0.5474, "step": 5293 }, { "epoch": 2.572887537993921, "grad_norm": 0.08540313442429173, "learning_rate": 2.83058130441221e-06, "loss": 0.5856, "step": 5294 }, { "epoch": 2.573373860182371, "grad_norm": 0.06925801772830338, "learning_rate": 2.828856404289596e-06, "loss": 0.4806, "step": 5295 }, { "epoch": 2.5738601823708205, "grad_norm": 0.07285313916417645, "learning_rate": 2.827131822540535e-06, "loss": 0.5301, "step": 5296 }, { "epoch": 2.5743465045592706, "grad_norm": 0.07005282769407138, "learning_rate": 2.8254075594179177e-06, "loss": 0.5214, "step": 5297 }, { "epoch": 2.5748328267477203, "grad_norm": 0.06760403731737047, "learning_rate": 2.823683615174587e-06, "loss": 0.4743, "step": 5298 }, { "epoch": 2.5753191489361704, "grad_norm": 0.07155123614917548, "learning_rate": 2.8219599900633417e-06, "loss": 0.5253, "step": 5299 }, { "epoch": 2.57580547112462, "grad_norm": 0.07164208280364782, "learning_rate": 2.82023668433693e-06, "loss": 0.5091, "step": 5300 }, { "epoch": 2.57629179331307, "grad_norm": 0.07173328473577424, "learning_rate": 2.8185136982480554e-06, "loss": 0.5339, "step": 5301 }, { "epoch": 2.57677811550152, "grad_norm": 0.07232906126540413, "learning_rate": 2.816791032049375e-06, "loss": 0.5495, "step": 5302 }, { "epoch": 2.5772644376899696, "grad_norm": 0.07271566293105103, "learning_rate": 2.8150686859934974e-06, "loss": 0.5271, "step": 5303 }, { "epoch": 2.5777507598784197, "grad_norm": 0.07032940175152132, "learning_rate": 2.813346660332986e-06, "loss": 0.5094, "step": 5304 }, { "epoch": 2.5782370820668694, "grad_norm": 0.0699621605588178, "learning_rate": 2.811624955320356e-06, "loss": 0.5269, "step": 5305 }, { "epoch": 2.578723404255319, "grad_norm": 0.07205969727227979, "learning_rate": 2.809903571208075e-06, "loss": 0.5302, "step": 5306 }, { "epoch": 2.579209726443769, "grad_norm": 0.07002055560445936, "learning_rate": 2.808182508248565e-06, "loss": 0.503, "step": 5307 }, { "epoch": 2.579696048632219, "grad_norm": 0.0685787232898974, "learning_rate": 2.8064617666942e-06, "loss": 0.4965, "step": 5308 }, { "epoch": 2.5801823708206686, "grad_norm": 0.07603117430086737, "learning_rate": 2.804741346797308e-06, "loss": 0.5432, "step": 5309 }, { "epoch": 2.5806686930091187, "grad_norm": 0.07207282273647803, "learning_rate": 2.8030212488101714e-06, "loss": 0.5292, "step": 5310 }, { "epoch": 2.5811550151975684, "grad_norm": 0.07230275450271861, "learning_rate": 2.801301472985016e-06, "loss": 0.5378, "step": 5311 }, { "epoch": 2.581641337386018, "grad_norm": 0.07234208356600152, "learning_rate": 2.799582019574033e-06, "loss": 0.5063, "step": 5312 }, { "epoch": 2.582127659574468, "grad_norm": 0.07248370684931718, "learning_rate": 2.79786288882936e-06, "loss": 0.5065, "step": 5313 }, { "epoch": 2.582613981762918, "grad_norm": 0.07022688737732607, "learning_rate": 2.7961440810030878e-06, "loss": 0.5139, "step": 5314 }, { "epoch": 2.583100303951368, "grad_norm": 0.07024845533111988, "learning_rate": 2.794425596347259e-06, "loss": 0.503, "step": 5315 }, { "epoch": 2.5835866261398177, "grad_norm": 0.07150716858810083, "learning_rate": 2.7927074351138704e-06, "loss": 0.5109, "step": 5316 }, { "epoch": 2.5840729483282674, "grad_norm": 0.06849105936526505, "learning_rate": 2.7909895975548717e-06, "loss": 0.4921, "step": 5317 }, { "epoch": 2.584559270516717, "grad_norm": 0.07044883715059527, "learning_rate": 2.7892720839221633e-06, "loss": 0.5272, "step": 5318 }, { "epoch": 2.585045592705167, "grad_norm": 0.07394074859092659, "learning_rate": 2.787554894467599e-06, "loss": 0.5485, "step": 5319 }, { "epoch": 2.585531914893617, "grad_norm": 0.07346051836976804, "learning_rate": 2.785838029442986e-06, "loss": 0.5562, "step": 5320 }, { "epoch": 2.586018237082067, "grad_norm": 0.07017259779383905, "learning_rate": 2.784121489100082e-06, "loss": 0.5067, "step": 5321 }, { "epoch": 2.5865045592705167, "grad_norm": 0.07326868226652182, "learning_rate": 2.7824052736905993e-06, "loss": 0.5386, "step": 5322 }, { "epoch": 2.5869908814589664, "grad_norm": 0.07263318556183915, "learning_rate": 2.7806893834661998e-06, "loss": 0.5389, "step": 5323 }, { "epoch": 2.5874772036474165, "grad_norm": 0.07280897133010984, "learning_rate": 2.778973818678501e-06, "loss": 0.5296, "step": 5324 }, { "epoch": 2.587963525835866, "grad_norm": 0.06900944653569056, "learning_rate": 2.777258579579072e-06, "loss": 0.4905, "step": 5325 }, { "epoch": 2.5884498480243163, "grad_norm": 0.07420931195821912, "learning_rate": 2.7755436664194293e-06, "loss": 0.5208, "step": 5326 }, { "epoch": 2.588936170212766, "grad_norm": 0.07046102140257733, "learning_rate": 2.773829079451048e-06, "loss": 0.4951, "step": 5327 }, { "epoch": 2.5894224924012157, "grad_norm": 0.07500427753109747, "learning_rate": 2.772114818925352e-06, "loss": 0.5778, "step": 5328 }, { "epoch": 2.589908814589666, "grad_norm": 0.07207267607008341, "learning_rate": 2.770400885093718e-06, "loss": 0.5379, "step": 5329 }, { "epoch": 2.5903951367781155, "grad_norm": 0.0708115651005762, "learning_rate": 2.768687278207475e-06, "loss": 0.4859, "step": 5330 }, { "epoch": 2.590881458966565, "grad_norm": 0.07504072594663244, "learning_rate": 2.7669739985179046e-06, "loss": 0.5626, "step": 5331 }, { "epoch": 2.5913677811550153, "grad_norm": 0.07052117077560555, "learning_rate": 2.7652610462762407e-06, "loss": 0.5192, "step": 5332 }, { "epoch": 2.591854103343465, "grad_norm": 0.0719427334073481, "learning_rate": 2.7635484217336666e-06, "loss": 0.5228, "step": 5333 }, { "epoch": 2.5923404255319147, "grad_norm": 0.07087242421254078, "learning_rate": 2.7618361251413207e-06, "loss": 0.5131, "step": 5334 }, { "epoch": 2.592826747720365, "grad_norm": 0.07081540882432405, "learning_rate": 2.76012415675029e-06, "loss": 0.5211, "step": 5335 }, { "epoch": 2.5933130699088145, "grad_norm": 0.07282737956103896, "learning_rate": 2.758412516811617e-06, "loss": 0.5384, "step": 5336 }, { "epoch": 2.5937993920972646, "grad_norm": 0.07116256741985431, "learning_rate": 2.756701205576293e-06, "loss": 0.5026, "step": 5337 }, { "epoch": 2.5942857142857143, "grad_norm": 0.07056525705508711, "learning_rate": 2.754990223295263e-06, "loss": 0.4989, "step": 5338 }, { "epoch": 2.594772036474164, "grad_norm": 0.07131474025342094, "learning_rate": 2.7532795702194253e-06, "loss": 0.5218, "step": 5339 }, { "epoch": 2.595258358662614, "grad_norm": 0.07077640561004091, "learning_rate": 2.7515692465996236e-06, "loss": 0.54, "step": 5340 }, { "epoch": 2.595744680851064, "grad_norm": 0.0717768894162335, "learning_rate": 2.7498592526866584e-06, "loss": 0.5061, "step": 5341 }, { "epoch": 2.596231003039514, "grad_norm": 0.06968033830886546, "learning_rate": 2.7481495887312824e-06, "loss": 0.4864, "step": 5342 }, { "epoch": 2.5967173252279636, "grad_norm": 0.07050832828051792, "learning_rate": 2.7464402549841974e-06, "loss": 0.5076, "step": 5343 }, { "epoch": 2.5972036474164133, "grad_norm": 0.07476535279675123, "learning_rate": 2.7447312516960584e-06, "loss": 0.5386, "step": 5344 }, { "epoch": 2.597689969604863, "grad_norm": 0.0739501455654807, "learning_rate": 2.743022579117471e-06, "loss": 0.5491, "step": 5345 }, { "epoch": 2.598176291793313, "grad_norm": 0.07169171462833135, "learning_rate": 2.741314237498993e-06, "loss": 0.509, "step": 5346 }, { "epoch": 2.598662613981763, "grad_norm": 0.07345756835469414, "learning_rate": 2.739606227091132e-06, "loss": 0.5157, "step": 5347 }, { "epoch": 2.599148936170213, "grad_norm": 0.07140180546973116, "learning_rate": 2.7378985481443483e-06, "loss": 0.5127, "step": 5348 }, { "epoch": 2.5996352583586626, "grad_norm": 0.07365691975968618, "learning_rate": 2.7361912009090565e-06, "loss": 0.5287, "step": 5349 }, { "epoch": 2.6001215805471123, "grad_norm": 0.0707045686004956, "learning_rate": 2.7344841856356173e-06, "loss": 0.5332, "step": 5350 }, { "epoch": 2.6006079027355624, "grad_norm": 0.0728315060365903, "learning_rate": 2.732777502574346e-06, "loss": 0.5419, "step": 5351 }, { "epoch": 2.601094224924012, "grad_norm": 0.07228432394782106, "learning_rate": 2.7310711519755084e-06, "loss": 0.5265, "step": 5352 }, { "epoch": 2.6015805471124622, "grad_norm": 0.07560565480418585, "learning_rate": 2.72936513408932e-06, "loss": 0.5176, "step": 5353 }, { "epoch": 2.602066869300912, "grad_norm": 0.073950056714824, "learning_rate": 2.7276594491659523e-06, "loss": 0.5267, "step": 5354 }, { "epoch": 2.6025531914893616, "grad_norm": 0.0747150400588746, "learning_rate": 2.725954097455521e-06, "loss": 0.5384, "step": 5355 }, { "epoch": 2.6030395136778113, "grad_norm": 0.0719801009949029, "learning_rate": 2.7242490792080965e-06, "loss": 0.5219, "step": 5356 }, { "epoch": 2.6035258358662614, "grad_norm": 0.07548736066206169, "learning_rate": 2.722544394673703e-06, "loss": 0.5249, "step": 5357 }, { "epoch": 2.604012158054711, "grad_norm": 0.07212949782593735, "learning_rate": 2.720840044102311e-06, "loss": 0.5429, "step": 5358 }, { "epoch": 2.604498480243161, "grad_norm": 0.07220165538862498, "learning_rate": 2.719136027743845e-06, "loss": 0.5178, "step": 5359 }, { "epoch": 2.604984802431611, "grad_norm": 0.07076923339172092, "learning_rate": 2.7174323458481798e-06, "loss": 0.5234, "step": 5360 }, { "epoch": 2.6054711246200606, "grad_norm": 0.07207414681785498, "learning_rate": 2.7157289986651403e-06, "loss": 0.4976, "step": 5361 }, { "epoch": 2.6059574468085107, "grad_norm": 0.07534019862275049, "learning_rate": 2.714025986444504e-06, "loss": 0.525, "step": 5362 }, { "epoch": 2.6064437689969604, "grad_norm": 0.07104113436064638, "learning_rate": 2.712323309435998e-06, "loss": 0.5063, "step": 5363 }, { "epoch": 2.6069300911854105, "grad_norm": 0.07338672309848925, "learning_rate": 2.7106209678893e-06, "loss": 0.5371, "step": 5364 }, { "epoch": 2.60741641337386, "grad_norm": 0.06958449635019943, "learning_rate": 2.7089189620540394e-06, "loss": 0.4697, "step": 5365 }, { "epoch": 2.60790273556231, "grad_norm": 0.07152435385673764, "learning_rate": 2.7072172921797947e-06, "loss": 0.5601, "step": 5366 }, { "epoch": 2.60838905775076, "grad_norm": 0.06982943350477738, "learning_rate": 2.7055159585160996e-06, "loss": 0.4824, "step": 5367 }, { "epoch": 2.6088753799392097, "grad_norm": 0.0704847983076069, "learning_rate": 2.703814961312433e-06, "loss": 0.539, "step": 5368 }, { "epoch": 2.60936170212766, "grad_norm": 0.07357250143935838, "learning_rate": 2.7021143008182297e-06, "loss": 0.4924, "step": 5369 }, { "epoch": 2.6098480243161095, "grad_norm": 0.0695244855160726, "learning_rate": 2.700413977282868e-06, "loss": 0.4915, "step": 5370 }, { "epoch": 2.610334346504559, "grad_norm": 0.07046223635539373, "learning_rate": 2.698713990955683e-06, "loss": 0.5105, "step": 5371 }, { "epoch": 2.610820668693009, "grad_norm": 0.06989228866744805, "learning_rate": 2.6970143420859585e-06, "loss": 0.5168, "step": 5372 }, { "epoch": 2.611306990881459, "grad_norm": 0.07379507159177955, "learning_rate": 2.6953150309229287e-06, "loss": 0.5174, "step": 5373 }, { "epoch": 2.6117933130699087, "grad_norm": 0.07042036716669867, "learning_rate": 2.6936160577157776e-06, "loss": 0.5239, "step": 5374 }, { "epoch": 2.612279635258359, "grad_norm": 0.0686762000938755, "learning_rate": 2.6919174227136417e-06, "loss": 0.499, "step": 5375 }, { "epoch": 2.6127659574468085, "grad_norm": 0.07253511335109725, "learning_rate": 2.6902191261656053e-06, "loss": 0.5363, "step": 5376 }, { "epoch": 2.613252279635258, "grad_norm": 0.07026774500298956, "learning_rate": 2.6885211683207048e-06, "loss": 0.4961, "step": 5377 }, { "epoch": 2.6137386018237083, "grad_norm": 0.0732181670954931, "learning_rate": 2.6868235494279266e-06, "loss": 0.5504, "step": 5378 }, { "epoch": 2.614224924012158, "grad_norm": 0.07052222124938673, "learning_rate": 2.685126269736207e-06, "loss": 0.5052, "step": 5379 }, { "epoch": 2.614711246200608, "grad_norm": 0.07320576478941102, "learning_rate": 2.6834293294944326e-06, "loss": 0.5112, "step": 5380 }, { "epoch": 2.615197568389058, "grad_norm": 0.07155115078112458, "learning_rate": 2.6817327289514406e-06, "loss": 0.5095, "step": 5381 }, { "epoch": 2.6156838905775075, "grad_norm": 0.06935843349081532, "learning_rate": 2.680036468356018e-06, "loss": 0.4904, "step": 5382 }, { "epoch": 2.616170212765957, "grad_norm": 0.06882477273475883, "learning_rate": 2.678340547956903e-06, "loss": 0.4941, "step": 5383 }, { "epoch": 2.6166565349544073, "grad_norm": 0.07176851914682074, "learning_rate": 2.6766449680027816e-06, "loss": 0.5149, "step": 5384 }, { "epoch": 2.617142857142857, "grad_norm": 0.07072959811037098, "learning_rate": 2.674949728742293e-06, "loss": 0.5183, "step": 5385 }, { "epoch": 2.617629179331307, "grad_norm": 0.07152883595682842, "learning_rate": 2.673254830424024e-06, "loss": 0.4906, "step": 5386 }, { "epoch": 2.618115501519757, "grad_norm": 0.07173668116143077, "learning_rate": 2.6715602732965117e-06, "loss": 0.5207, "step": 5387 }, { "epoch": 2.6186018237082065, "grad_norm": 0.07203431301024624, "learning_rate": 2.6698660576082447e-06, "loss": 0.5014, "step": 5388 }, { "epoch": 2.6190881458966566, "grad_norm": 0.07291059057218043, "learning_rate": 2.668172183607659e-06, "loss": 0.5261, "step": 5389 }, { "epoch": 2.6195744680851063, "grad_norm": 0.07336530900987731, "learning_rate": 2.666478651543144e-06, "loss": 0.535, "step": 5390 }, { "epoch": 2.6200607902735564, "grad_norm": 0.07047428040808709, "learning_rate": 2.6647854616630353e-06, "loss": 0.546, "step": 5391 }, { "epoch": 2.620547112462006, "grad_norm": 0.06957885197784568, "learning_rate": 2.6630926142156203e-06, "loss": 0.5222, "step": 5392 }, { "epoch": 2.621033434650456, "grad_norm": 0.06941410059153282, "learning_rate": 2.6614001094491366e-06, "loss": 0.4757, "step": 5393 }, { "epoch": 2.621519756838906, "grad_norm": 0.07354842673337593, "learning_rate": 2.65970794761177e-06, "loss": 0.5625, "step": 5394 }, { "epoch": 2.6220060790273556, "grad_norm": 0.0719906916771628, "learning_rate": 2.658016128951657e-06, "loss": 0.5172, "step": 5395 }, { "epoch": 2.6224924012158057, "grad_norm": 0.06972531460506548, "learning_rate": 2.656324653716884e-06, "loss": 0.5204, "step": 5396 }, { "epoch": 2.6229787234042554, "grad_norm": 0.07046115123659691, "learning_rate": 2.6546335221554863e-06, "loss": 0.5086, "step": 5397 }, { "epoch": 2.623465045592705, "grad_norm": 0.07077336423198709, "learning_rate": 2.652942734515449e-06, "loss": 0.4948, "step": 5398 }, { "epoch": 2.623951367781155, "grad_norm": 0.07262509628601738, "learning_rate": 2.651252291044707e-06, "loss": 0.5186, "step": 5399 }, { "epoch": 2.624437689969605, "grad_norm": 0.07364182001915393, "learning_rate": 2.649562191991145e-06, "loss": 0.55, "step": 5400 }, { "epoch": 2.6249240121580546, "grad_norm": 0.07237908006245841, "learning_rate": 2.6478724376025966e-06, "loss": 0.5364, "step": 5401 }, { "epoch": 2.6254103343465047, "grad_norm": 0.06829752929436642, "learning_rate": 2.646183028126844e-06, "loss": 0.5237, "step": 5402 }, { "epoch": 2.6258966565349544, "grad_norm": 0.07043563725870507, "learning_rate": 2.6444939638116224e-06, "loss": 0.5129, "step": 5403 }, { "epoch": 2.626382978723404, "grad_norm": 0.06991877719724306, "learning_rate": 2.6428052449046116e-06, "loss": 0.4868, "step": 5404 }, { "epoch": 2.626869300911854, "grad_norm": 0.07086121907189452, "learning_rate": 2.641116871653444e-06, "loss": 0.5315, "step": 5405 }, { "epoch": 2.627355623100304, "grad_norm": 0.07255566790931886, "learning_rate": 2.639428844305701e-06, "loss": 0.5185, "step": 5406 }, { "epoch": 2.627841945288754, "grad_norm": 0.07277298799470891, "learning_rate": 2.637741163108911e-06, "loss": 0.5282, "step": 5407 }, { "epoch": 2.6283282674772037, "grad_norm": 0.071298737147626, "learning_rate": 2.636053828310555e-06, "loss": 0.5438, "step": 5408 }, { "epoch": 2.6288145896656534, "grad_norm": 0.07078792644565439, "learning_rate": 2.6343668401580603e-06, "loss": 0.5082, "step": 5409 }, { "epoch": 2.629300911854103, "grad_norm": 0.07208895976637411, "learning_rate": 2.632680198898805e-06, "loss": 0.5299, "step": 5410 }, { "epoch": 2.629787234042553, "grad_norm": 0.07064011601316954, "learning_rate": 2.630993904780116e-06, "loss": 0.5073, "step": 5411 }, { "epoch": 2.630273556231003, "grad_norm": 0.0696893706421984, "learning_rate": 2.6293079580492688e-06, "loss": 0.4821, "step": 5412 }, { "epoch": 2.630759878419453, "grad_norm": 0.07064719280396976, "learning_rate": 2.6276223589534877e-06, "loss": 0.5085, "step": 5413 }, { "epoch": 2.6312462006079027, "grad_norm": 0.06985828675677272, "learning_rate": 2.6259371077399487e-06, "loss": 0.5073, "step": 5414 }, { "epoch": 2.6317325227963524, "grad_norm": 0.07440549595560729, "learning_rate": 2.624252204655773e-06, "loss": 0.5545, "step": 5415 }, { "epoch": 2.6322188449848025, "grad_norm": 0.0727442963447437, "learning_rate": 2.6225676499480335e-06, "loss": 0.5462, "step": 5416 }, { "epoch": 2.632705167173252, "grad_norm": 0.07367430195550313, "learning_rate": 2.6208834438637525e-06, "loss": 0.5359, "step": 5417 }, { "epoch": 2.6331914893617023, "grad_norm": 0.07179279324843299, "learning_rate": 2.619199586649895e-06, "loss": 0.509, "step": 5418 }, { "epoch": 2.633677811550152, "grad_norm": 0.07323904450174652, "learning_rate": 2.6175160785533836e-06, "loss": 0.5517, "step": 5419 }, { "epoch": 2.6341641337386017, "grad_norm": 0.07185192751608897, "learning_rate": 2.615832919821082e-06, "loss": 0.5458, "step": 5420 }, { "epoch": 2.634650455927052, "grad_norm": 0.07039799585439428, "learning_rate": 2.6141501106998105e-06, "loss": 0.5026, "step": 5421 }, { "epoch": 2.6351367781155015, "grad_norm": 0.07083730090579023, "learning_rate": 2.612467651436332e-06, "loss": 0.5256, "step": 5422 }, { "epoch": 2.6356231003039516, "grad_norm": 0.06911830742166492, "learning_rate": 2.610785542277361e-06, "loss": 0.4997, "step": 5423 }, { "epoch": 2.6361094224924013, "grad_norm": 0.07146195336802952, "learning_rate": 2.6091037834695582e-06, "loss": 0.5253, "step": 5424 }, { "epoch": 2.636595744680851, "grad_norm": 0.07177669466974806, "learning_rate": 2.6074223752595353e-06, "loss": 0.5089, "step": 5425 }, { "epoch": 2.6370820668693007, "grad_norm": 0.07514201973940192, "learning_rate": 2.605741317893851e-06, "loss": 0.5304, "step": 5426 }, { "epoch": 2.637568389057751, "grad_norm": 0.07522626997007498, "learning_rate": 2.6040606116190148e-06, "loss": 0.5677, "step": 5427 }, { "epoch": 2.6380547112462005, "grad_norm": 0.07182090106266803, "learning_rate": 2.6023802566814814e-06, "loss": 0.5287, "step": 5428 }, { "epoch": 2.6385410334346506, "grad_norm": 0.07139477720137276, "learning_rate": 2.6007002533276572e-06, "loss": 0.4881, "step": 5429 }, { "epoch": 2.6390273556231003, "grad_norm": 0.07050693859886928, "learning_rate": 2.5990206018038945e-06, "loss": 0.5054, "step": 5430 }, { "epoch": 2.63951367781155, "grad_norm": 0.0760384804349611, "learning_rate": 2.597341302356495e-06, "loss": 0.5592, "step": 5431 }, { "epoch": 2.64, "grad_norm": 0.07123783936348688, "learning_rate": 2.595662355231713e-06, "loss": 0.5193, "step": 5432 }, { "epoch": 2.64048632218845, "grad_norm": 0.07185462538174979, "learning_rate": 2.5939837606757413e-06, "loss": 0.517, "step": 5433 }, { "epoch": 2.6409726443769, "grad_norm": 0.07559142914504793, "learning_rate": 2.592305518934728e-06, "loss": 0.5368, "step": 5434 }, { "epoch": 2.6414589665653496, "grad_norm": 0.07019601347029748, "learning_rate": 2.5906276302547696e-06, "loss": 0.4925, "step": 5435 }, { "epoch": 2.6419452887537993, "grad_norm": 0.07127606451702115, "learning_rate": 2.5889500948819092e-06, "loss": 0.5101, "step": 5436 }, { "epoch": 2.642431610942249, "grad_norm": 0.07141823128198688, "learning_rate": 2.5872729130621376e-06, "loss": 0.5397, "step": 5437 }, { "epoch": 2.642917933130699, "grad_norm": 0.072262958458875, "learning_rate": 2.5855960850413936e-06, "loss": 0.5041, "step": 5438 }, { "epoch": 2.643404255319149, "grad_norm": 0.07105915717983775, "learning_rate": 2.5839196110655684e-06, "loss": 0.5182, "step": 5439 }, { "epoch": 2.643890577507599, "grad_norm": 0.07256900934576364, "learning_rate": 2.582243491380495e-06, "loss": 0.4918, "step": 5440 }, { "epoch": 2.6443768996960486, "grad_norm": 0.07011692996937904, "learning_rate": 2.580567726231959e-06, "loss": 0.5059, "step": 5441 }, { "epoch": 2.6448632218844983, "grad_norm": 0.06970504967531409, "learning_rate": 2.5788923158656907e-06, "loss": 0.5266, "step": 5442 }, { "epoch": 2.6453495440729484, "grad_norm": 0.06860414306069924, "learning_rate": 2.5772172605273716e-06, "loss": 0.4819, "step": 5443 }, { "epoch": 2.645835866261398, "grad_norm": 0.07261839124610621, "learning_rate": 2.575542560462628e-06, "loss": 0.538, "step": 5444 }, { "epoch": 2.6463221884498482, "grad_norm": 0.07030170337939613, "learning_rate": 2.573868215917037e-06, "loss": 0.4965, "step": 5445 }, { "epoch": 2.646808510638298, "grad_norm": 0.07472859611553342, "learning_rate": 2.5721942271361233e-06, "loss": 0.5321, "step": 5446 }, { "epoch": 2.6472948328267476, "grad_norm": 0.0702225524867287, "learning_rate": 2.5705205943653543e-06, "loss": 0.5105, "step": 5447 }, { "epoch": 2.6477811550151977, "grad_norm": 0.06931388998874215, "learning_rate": 2.568847317850152e-06, "loss": 0.503, "step": 5448 }, { "epoch": 2.6482674772036474, "grad_norm": 0.07238755783628201, "learning_rate": 2.567174397835883e-06, "loss": 0.5181, "step": 5449 }, { "epoch": 2.6487537993920975, "grad_norm": 0.07221063094098708, "learning_rate": 2.565501834567862e-06, "loss": 0.5097, "step": 5450 }, { "epoch": 2.6492401215805472, "grad_norm": 0.07146203195646807, "learning_rate": 2.563829628291351e-06, "loss": 0.5292, "step": 5451 }, { "epoch": 2.649726443768997, "grad_norm": 0.07107832846243915, "learning_rate": 2.562157779251561e-06, "loss": 0.5083, "step": 5452 }, { "epoch": 2.6502127659574466, "grad_norm": 0.0730459562903449, "learning_rate": 2.5604862876936486e-06, "loss": 0.5319, "step": 5453 }, { "epoch": 2.6506990881458967, "grad_norm": 0.07067950687746566, "learning_rate": 2.55881515386272e-06, "loss": 0.5211, "step": 5454 }, { "epoch": 2.6511854103343464, "grad_norm": 0.07456650374475696, "learning_rate": 2.5571443780038276e-06, "loss": 0.5193, "step": 5455 }, { "epoch": 2.6516717325227965, "grad_norm": 0.07264119265392369, "learning_rate": 2.5554739603619714e-06, "loss": 0.5082, "step": 5456 }, { "epoch": 2.652158054711246, "grad_norm": 0.07209841873963313, "learning_rate": 2.553803901182098e-06, "loss": 0.4962, "step": 5457 }, { "epoch": 2.652644376899696, "grad_norm": 0.07421098993375227, "learning_rate": 2.5521342007091056e-06, "loss": 0.5106, "step": 5458 }, { "epoch": 2.653130699088146, "grad_norm": 0.07232980647533674, "learning_rate": 2.5504648591878356e-06, "loss": 0.4968, "step": 5459 }, { "epoch": 2.6536170212765957, "grad_norm": 0.07091403602817326, "learning_rate": 2.5487958768630774e-06, "loss": 0.5476, "step": 5460 }, { "epoch": 2.654103343465046, "grad_norm": 0.07293520639187848, "learning_rate": 2.5471272539795705e-06, "loss": 0.5236, "step": 5461 }, { "epoch": 2.6545896656534955, "grad_norm": 0.06984339439743953, "learning_rate": 2.545458990781996e-06, "loss": 0.5164, "step": 5462 }, { "epoch": 2.655075987841945, "grad_norm": 0.0712103953582825, "learning_rate": 2.5437910875149868e-06, "loss": 0.5378, "step": 5463 }, { "epoch": 2.655562310030395, "grad_norm": 0.07147882450311213, "learning_rate": 2.542123544423123e-06, "loss": 0.5337, "step": 5464 }, { "epoch": 2.656048632218845, "grad_norm": 0.07240067887458716, "learning_rate": 2.5404563617509303e-06, "loss": 0.5353, "step": 5465 }, { "epoch": 2.6565349544072947, "grad_norm": 0.07154365135057479, "learning_rate": 2.5387895397428818e-06, "loss": 0.5513, "step": 5466 }, { "epoch": 2.657021276595745, "grad_norm": 0.07014841861279734, "learning_rate": 2.5371230786433985e-06, "loss": 0.4875, "step": 5467 }, { "epoch": 2.6575075987841945, "grad_norm": 0.0724819852173919, "learning_rate": 2.5354569786968486e-06, "loss": 0.5523, "step": 5468 }, { "epoch": 2.657993920972644, "grad_norm": 0.07177628213291166, "learning_rate": 2.5337912401475453e-06, "loss": 0.5144, "step": 5469 }, { "epoch": 2.6584802431610943, "grad_norm": 0.06993684346586565, "learning_rate": 2.5321258632397516e-06, "loss": 0.5211, "step": 5470 }, { "epoch": 2.658966565349544, "grad_norm": 0.07071627977417842, "learning_rate": 2.530460848217675e-06, "loss": 0.5609, "step": 5471 }, { "epoch": 2.659452887537994, "grad_norm": 0.07050137630629093, "learning_rate": 2.5287961953254712e-06, "loss": 0.4928, "step": 5472 }, { "epoch": 2.659939209726444, "grad_norm": 0.07293696855713029, "learning_rate": 2.527131904807244e-06, "loss": 0.5288, "step": 5473 }, { "epoch": 2.6604255319148935, "grad_norm": 0.07419547537809282, "learning_rate": 2.525467976907041e-06, "loss": 0.5529, "step": 5474 }, { "epoch": 2.660911854103343, "grad_norm": 0.07102323721474527, "learning_rate": 2.523804411868857e-06, "loss": 0.5315, "step": 5475 }, { "epoch": 2.6613981762917933, "grad_norm": 0.07163897353856712, "learning_rate": 2.522141209936641e-06, "loss": 0.5191, "step": 5476 }, { "epoch": 2.661884498480243, "grad_norm": 0.06956818849518469, "learning_rate": 2.520478371354277e-06, "loss": 0.5173, "step": 5477 }, { "epoch": 2.662370820668693, "grad_norm": 0.07268633543083317, "learning_rate": 2.5188158963656023e-06, "loss": 0.514, "step": 5478 }, { "epoch": 2.662857142857143, "grad_norm": 0.07384958300308643, "learning_rate": 2.517153785214401e-06, "loss": 0.5431, "step": 5479 }, { "epoch": 2.6633434650455925, "grad_norm": 0.07235268669890155, "learning_rate": 2.5154920381444026e-06, "loss": 0.5252, "step": 5480 }, { "epoch": 2.6638297872340426, "grad_norm": 0.07043298014929625, "learning_rate": 2.513830655399283e-06, "loss": 0.5168, "step": 5481 }, { "epoch": 2.6643161094224923, "grad_norm": 0.07134391552701407, "learning_rate": 2.512169637222666e-06, "loss": 0.5306, "step": 5482 }, { "epoch": 2.6648024316109424, "grad_norm": 0.0717307847963953, "learning_rate": 2.51050898385812e-06, "loss": 0.4981, "step": 5483 }, { "epoch": 2.665288753799392, "grad_norm": 0.07252979963221534, "learning_rate": 2.508848695549162e-06, "loss": 0.5212, "step": 5484 }, { "epoch": 2.665775075987842, "grad_norm": 0.07355436401453107, "learning_rate": 2.507188772539254e-06, "loss": 0.5423, "step": 5485 }, { "epoch": 2.666261398176292, "grad_norm": 0.07526361711365165, "learning_rate": 2.505529215071804e-06, "loss": 0.5479, "step": 5486 }, { "epoch": 2.6667477203647416, "grad_norm": 0.07165395763290587, "learning_rate": 2.5038700233901684e-06, "loss": 0.5363, "step": 5487 }, { "epoch": 2.6672340425531917, "grad_norm": 0.07059084631046932, "learning_rate": 2.5022111977376486e-06, "loss": 0.5209, "step": 5488 }, { "epoch": 2.6677203647416414, "grad_norm": 0.07234672794170492, "learning_rate": 2.5005527383574925e-06, "loss": 0.5256, "step": 5489 }, { "epoch": 2.668206686930091, "grad_norm": 0.06837843636370457, "learning_rate": 2.4988946454928934e-06, "loss": 0.4836, "step": 5490 }, { "epoch": 2.668693009118541, "grad_norm": 0.07390564917350201, "learning_rate": 2.4972369193869935e-06, "loss": 0.5437, "step": 5491 }, { "epoch": 2.669179331306991, "grad_norm": 0.07348253210685227, "learning_rate": 2.495579560282878e-06, "loss": 0.5324, "step": 5492 }, { "epoch": 2.6696656534954406, "grad_norm": 0.07141996806576167, "learning_rate": 2.4939225684235814e-06, "loss": 0.5332, "step": 5493 }, { "epoch": 2.6701519756838907, "grad_norm": 0.0723361192948505, "learning_rate": 2.4922659440520806e-06, "loss": 0.5355, "step": 5494 }, { "epoch": 2.6706382978723404, "grad_norm": 0.07345653446246092, "learning_rate": 2.4906096874113023e-06, "loss": 0.5086, "step": 5495 }, { "epoch": 2.67112462006079, "grad_norm": 0.07135224528207111, "learning_rate": 2.4889537987441177e-06, "loss": 0.5265, "step": 5496 }, { "epoch": 2.6716109422492402, "grad_norm": 0.07182902592095845, "learning_rate": 2.487298278293343e-06, "loss": 0.5255, "step": 5497 }, { "epoch": 2.67209726443769, "grad_norm": 0.07494158870178803, "learning_rate": 2.4856431263017427e-06, "loss": 0.5333, "step": 5498 }, { "epoch": 2.67258358662614, "grad_norm": 0.06840428343481617, "learning_rate": 2.4839883430120253e-06, "loss": 0.4895, "step": 5499 }, { "epoch": 2.6730699088145897, "grad_norm": 0.06960352607855569, "learning_rate": 2.4823339286668464e-06, "loss": 0.5479, "step": 5500 }, { "epoch": 2.6735562310030394, "grad_norm": 0.07030391637082783, "learning_rate": 2.4806798835088066e-06, "loss": 0.4812, "step": 5501 }, { "epoch": 2.674042553191489, "grad_norm": 0.07171035050184746, "learning_rate": 2.4790262077804534e-06, "loss": 0.4921, "step": 5502 }, { "epoch": 2.674528875379939, "grad_norm": 0.07211281686168014, "learning_rate": 2.477372901724279e-06, "loss": 0.5282, "step": 5503 }, { "epoch": 2.675015197568389, "grad_norm": 0.07268259051770798, "learning_rate": 2.475719965582722e-06, "loss": 0.5542, "step": 5504 }, { "epoch": 2.675501519756839, "grad_norm": 0.08048652171992439, "learning_rate": 2.4740673995981672e-06, "loss": 0.5848, "step": 5505 }, { "epoch": 2.6759878419452887, "grad_norm": 0.07379128152650583, "learning_rate": 2.4724152040129447e-06, "loss": 0.5815, "step": 5506 }, { "epoch": 2.6764741641337384, "grad_norm": 0.07148522054284791, "learning_rate": 2.4707633790693296e-06, "loss": 0.5676, "step": 5507 }, { "epoch": 2.6769604863221885, "grad_norm": 0.07224678269640722, "learning_rate": 2.4691119250095437e-06, "loss": 0.4953, "step": 5508 }, { "epoch": 2.677446808510638, "grad_norm": 0.07148156681707943, "learning_rate": 2.467460842075756e-06, "loss": 0.5086, "step": 5509 }, { "epoch": 2.6779331306990883, "grad_norm": 0.07002738198610287, "learning_rate": 2.4658101305100746e-06, "loss": 0.5315, "step": 5510 }, { "epoch": 2.678419452887538, "grad_norm": 0.07274020135726435, "learning_rate": 2.4641597905545576e-06, "loss": 0.5205, "step": 5511 }, { "epoch": 2.6789057750759877, "grad_norm": 0.07094755233200126, "learning_rate": 2.4625098224512136e-06, "loss": 0.5457, "step": 5512 }, { "epoch": 2.679392097264438, "grad_norm": 0.07162468971909133, "learning_rate": 2.460860226441989e-06, "loss": 0.5342, "step": 5513 }, { "epoch": 2.6798784194528875, "grad_norm": 0.06958401149156632, "learning_rate": 2.4592110027687777e-06, "loss": 0.498, "step": 5514 }, { "epoch": 2.6803647416413376, "grad_norm": 0.07136401504593012, "learning_rate": 2.457562151673421e-06, "loss": 0.519, "step": 5515 }, { "epoch": 2.6808510638297873, "grad_norm": 0.07196541431784219, "learning_rate": 2.4559136733977027e-06, "loss": 0.5058, "step": 5516 }, { "epoch": 2.681337386018237, "grad_norm": 0.07040794256120098, "learning_rate": 2.454265568183355e-06, "loss": 0.5307, "step": 5517 }, { "epoch": 2.6818237082066867, "grad_norm": 0.0709257024571426, "learning_rate": 2.4526178362720525e-06, "loss": 0.5352, "step": 5518 }, { "epoch": 2.682310030395137, "grad_norm": 0.07101514949193398, "learning_rate": 2.450970477905417e-06, "loss": 0.5268, "step": 5519 }, { "epoch": 2.6827963525835865, "grad_norm": 0.07108833033756663, "learning_rate": 2.449323493325015e-06, "loss": 0.5531, "step": 5520 }, { "epoch": 2.6832826747720366, "grad_norm": 0.06936878392460284, "learning_rate": 2.4476768827723578e-06, "loss": 0.5178, "step": 5521 }, { "epoch": 2.6837689969604863, "grad_norm": 0.0716604086673739, "learning_rate": 2.4460306464889023e-06, "loss": 0.532, "step": 5522 }, { "epoch": 2.684255319148936, "grad_norm": 0.07463544926494957, "learning_rate": 2.4443847847160496e-06, "loss": 0.5441, "step": 5523 }, { "epoch": 2.684741641337386, "grad_norm": 0.07198906494769135, "learning_rate": 2.44273929769515e-06, "loss": 0.5186, "step": 5524 }, { "epoch": 2.685227963525836, "grad_norm": 0.07025053527235166, "learning_rate": 2.44109418566749e-06, "loss": 0.5071, "step": 5525 }, { "epoch": 2.685714285714286, "grad_norm": 0.07293297745366367, "learning_rate": 2.4394494488743096e-06, "loss": 0.5158, "step": 5526 }, { "epoch": 2.6862006079027356, "grad_norm": 0.0691133111848275, "learning_rate": 2.437805087556791e-06, "loss": 0.4969, "step": 5527 }, { "epoch": 2.6866869300911853, "grad_norm": 0.07390547638546005, "learning_rate": 2.4361611019560604e-06, "loss": 0.5474, "step": 5528 }, { "epoch": 2.687173252279635, "grad_norm": 0.07174916433543181, "learning_rate": 2.434517492313188e-06, "loss": 0.5097, "step": 5529 }, { "epoch": 2.687659574468085, "grad_norm": 0.07005905846385797, "learning_rate": 2.4328742588691943e-06, "loss": 0.5319, "step": 5530 }, { "epoch": 2.688145896656535, "grad_norm": 0.07161187006550968, "learning_rate": 2.431231401865039e-06, "loss": 0.5531, "step": 5531 }, { "epoch": 2.688632218844985, "grad_norm": 0.07093107406094733, "learning_rate": 2.429588921541628e-06, "loss": 0.5302, "step": 5532 }, { "epoch": 2.6891185410334346, "grad_norm": 0.07327274548228252, "learning_rate": 2.427946818139813e-06, "loss": 0.5181, "step": 5533 }, { "epoch": 2.6896048632218843, "grad_norm": 0.07110966723406568, "learning_rate": 2.4263050919003896e-06, "loss": 0.5293, "step": 5534 }, { "epoch": 2.6900911854103344, "grad_norm": 0.0736781505637428, "learning_rate": 2.424663743064098e-06, "loss": 0.5439, "step": 5535 }, { "epoch": 2.690577507598784, "grad_norm": 0.07541627188970834, "learning_rate": 2.4230227718716236e-06, "loss": 0.5403, "step": 5536 }, { "epoch": 2.6910638297872342, "grad_norm": 0.06986709238970748, "learning_rate": 2.421382178563596e-06, "loss": 0.5453, "step": 5537 }, { "epoch": 2.691550151975684, "grad_norm": 0.07320275588309096, "learning_rate": 2.419741963380592e-06, "loss": 0.5262, "step": 5538 }, { "epoch": 2.6920364741641336, "grad_norm": 0.06986324858812706, "learning_rate": 2.4181021265631266e-06, "loss": 0.5027, "step": 5539 }, { "epoch": 2.6925227963525837, "grad_norm": 0.07304783069393933, "learning_rate": 2.4164626683516645e-06, "loss": 0.5143, "step": 5540 }, { "epoch": 2.6930091185410334, "grad_norm": 0.07433487188953387, "learning_rate": 2.414823588986614e-06, "loss": 0.5423, "step": 5541 }, { "epoch": 2.6934954407294835, "grad_norm": 0.06984550945744551, "learning_rate": 2.413184888708328e-06, "loss": 0.5209, "step": 5542 }, { "epoch": 2.6939817629179332, "grad_norm": 0.07082889998695244, "learning_rate": 2.4115465677571028e-06, "loss": 0.4847, "step": 5543 }, { "epoch": 2.694468085106383, "grad_norm": 0.07075816786007702, "learning_rate": 2.409908626373179e-06, "loss": 0.5254, "step": 5544 }, { "epoch": 2.6949544072948326, "grad_norm": 0.06965501234774592, "learning_rate": 2.4082710647967433e-06, "loss": 0.5289, "step": 5545 }, { "epoch": 2.6954407294832827, "grad_norm": 0.06882764416383183, "learning_rate": 2.4066338832679247e-06, "loss": 0.4929, "step": 5546 }, { "epoch": 2.6959270516717324, "grad_norm": 0.06989379449315315, "learning_rate": 2.4049970820267955e-06, "loss": 0.5305, "step": 5547 }, { "epoch": 2.6964133738601825, "grad_norm": 0.07065726726124981, "learning_rate": 2.403360661313378e-06, "loss": 0.5114, "step": 5548 }, { "epoch": 2.6968996960486322, "grad_norm": 0.07201786486434365, "learning_rate": 2.4017246213676327e-06, "loss": 0.5113, "step": 5549 }, { "epoch": 2.697386018237082, "grad_norm": 0.07164575621587971, "learning_rate": 2.4000889624294665e-06, "loss": 0.5153, "step": 5550 }, { "epoch": 2.697872340425532, "grad_norm": 0.07729564282718766, "learning_rate": 2.3984536847387297e-06, "loss": 0.5508, "step": 5551 }, { "epoch": 2.6983586626139817, "grad_norm": 0.07260408882671729, "learning_rate": 2.3968187885352177e-06, "loss": 0.5447, "step": 5552 }, { "epoch": 2.698844984802432, "grad_norm": 0.06809081848991765, "learning_rate": 2.3951842740586713e-06, "loss": 0.4852, "step": 5553 }, { "epoch": 2.6993313069908815, "grad_norm": 0.07183952533858455, "learning_rate": 2.3935501415487695e-06, "loss": 0.5079, "step": 5554 }, { "epoch": 2.699817629179331, "grad_norm": 0.07128041658867738, "learning_rate": 2.391916391245141e-06, "loss": 0.537, "step": 5555 }, { "epoch": 2.700303951367781, "grad_norm": 0.07102924837917349, "learning_rate": 2.3902830233873576e-06, "loss": 0.5233, "step": 5556 }, { "epoch": 2.700790273556231, "grad_norm": 0.07198285070901485, "learning_rate": 2.388650038214933e-06, "loss": 0.5465, "step": 5557 }, { "epoch": 2.7012765957446807, "grad_norm": 0.07059293976643284, "learning_rate": 2.3870174359673265e-06, "loss": 0.5278, "step": 5558 }, { "epoch": 2.701762917933131, "grad_norm": 0.0704532376320318, "learning_rate": 2.3853852168839405e-06, "loss": 0.5377, "step": 5559 }, { "epoch": 2.7022492401215805, "grad_norm": 0.07405614468932602, "learning_rate": 2.3837533812041215e-06, "loss": 0.5281, "step": 5560 }, { "epoch": 2.70273556231003, "grad_norm": 0.06904732661446518, "learning_rate": 2.38212192916716e-06, "loss": 0.5085, "step": 5561 }, { "epoch": 2.7032218844984803, "grad_norm": 0.07158806723325084, "learning_rate": 2.3804908610122897e-06, "loss": 0.494, "step": 5562 }, { "epoch": 2.70370820668693, "grad_norm": 0.07248825127993813, "learning_rate": 2.378860176978688e-06, "loss": 0.5232, "step": 5563 }, { "epoch": 2.70419452887538, "grad_norm": 0.07019742536519977, "learning_rate": 2.377229877305476e-06, "loss": 0.4804, "step": 5564 }, { "epoch": 2.70468085106383, "grad_norm": 0.0743684617059333, "learning_rate": 2.375599962231717e-06, "loss": 0.5729, "step": 5565 }, { "epoch": 2.7051671732522795, "grad_norm": 0.07020128103353578, "learning_rate": 2.373970431996424e-06, "loss": 0.5437, "step": 5566 }, { "epoch": 2.7056534954407296, "grad_norm": 0.06930938199853524, "learning_rate": 2.3723412868385463e-06, "loss": 0.5031, "step": 5567 }, { "epoch": 2.7061398176291793, "grad_norm": 0.07223579879131732, "learning_rate": 2.3707125269969814e-06, "loss": 0.5095, "step": 5568 }, { "epoch": 2.7066261398176295, "grad_norm": 0.0719653038090186, "learning_rate": 2.3690841527105658e-06, "loss": 0.5364, "step": 5569 }, { "epoch": 2.707112462006079, "grad_norm": 0.06755525438650527, "learning_rate": 2.3674561642180826e-06, "loss": 0.4909, "step": 5570 }, { "epoch": 2.707598784194529, "grad_norm": 0.07029513463711251, "learning_rate": 2.365828561758259e-06, "loss": 0.5249, "step": 5571 }, { "epoch": 2.7080851063829785, "grad_norm": 0.07209432010500018, "learning_rate": 2.3642013455697633e-06, "loss": 0.5425, "step": 5572 }, { "epoch": 2.7085714285714286, "grad_norm": 0.07448050151918781, "learning_rate": 2.3625745158912083e-06, "loss": 0.5562, "step": 5573 }, { "epoch": 2.7090577507598783, "grad_norm": 0.07373010631886454, "learning_rate": 2.360948072961151e-06, "loss": 0.5313, "step": 5574 }, { "epoch": 2.7095440729483284, "grad_norm": 0.06972371224639265, "learning_rate": 2.3593220170180907e-06, "loss": 0.5047, "step": 5575 }, { "epoch": 2.710030395136778, "grad_norm": 0.0723323149363478, "learning_rate": 2.3576963483004695e-06, "loss": 0.5209, "step": 5576 }, { "epoch": 2.710516717325228, "grad_norm": 0.07224878616671035, "learning_rate": 2.3560710670466736e-06, "loss": 0.5091, "step": 5577 }, { "epoch": 2.711003039513678, "grad_norm": 0.06951460608906329, "learning_rate": 2.354446173495032e-06, "loss": 0.4877, "step": 5578 }, { "epoch": 2.7114893617021276, "grad_norm": 0.06902839979461814, "learning_rate": 2.3528216678838167e-06, "loss": 0.5088, "step": 5579 }, { "epoch": 2.7119756838905777, "grad_norm": 0.07326065318949865, "learning_rate": 2.351197550451243e-06, "loss": 0.5157, "step": 5580 }, { "epoch": 2.7124620060790274, "grad_norm": 0.07070154790644714, "learning_rate": 2.349573821435469e-06, "loss": 0.5108, "step": 5581 }, { "epoch": 2.712948328267477, "grad_norm": 0.0716585021239851, "learning_rate": 2.3479504810745974e-06, "loss": 0.5149, "step": 5582 }, { "epoch": 2.713434650455927, "grad_norm": 0.07281089782751654, "learning_rate": 2.3463275296066714e-06, "loss": 0.5311, "step": 5583 }, { "epoch": 2.713920972644377, "grad_norm": 0.06873241475266974, "learning_rate": 2.344704967269678e-06, "loss": 0.5035, "step": 5584 }, { "epoch": 2.7144072948328266, "grad_norm": 0.07011966025948382, "learning_rate": 2.3430827943015494e-06, "loss": 0.5286, "step": 5585 }, { "epoch": 2.7148936170212767, "grad_norm": 0.07632790413093576, "learning_rate": 2.341461010940157e-06, "loss": 0.5298, "step": 5586 }, { "epoch": 2.7153799392097264, "grad_norm": 0.07078491865452298, "learning_rate": 2.339839617423318e-06, "loss": 0.5321, "step": 5587 }, { "epoch": 2.715866261398176, "grad_norm": 0.06985087531831614, "learning_rate": 2.3382186139887907e-06, "loss": 0.5166, "step": 5588 }, { "epoch": 2.7163525835866262, "grad_norm": 0.07268927934417703, "learning_rate": 2.336598000874277e-06, "loss": 0.5353, "step": 5589 }, { "epoch": 2.716838905775076, "grad_norm": 0.06954367265126185, "learning_rate": 2.3349777783174215e-06, "loss": 0.5081, "step": 5590 }, { "epoch": 2.717325227963526, "grad_norm": 0.07109153459356275, "learning_rate": 2.333357946555812e-06, "loss": 0.5051, "step": 5591 }, { "epoch": 2.7178115501519757, "grad_norm": 0.07053609850390538, "learning_rate": 2.3317385058269776e-06, "loss": 0.5118, "step": 5592 }, { "epoch": 2.7182978723404254, "grad_norm": 0.07361033838033207, "learning_rate": 2.3301194563683914e-06, "loss": 0.568, "step": 5593 }, { "epoch": 2.7187841945288755, "grad_norm": 0.07481171847106032, "learning_rate": 2.3285007984174686e-06, "loss": 0.5372, "step": 5594 }, { "epoch": 2.7192705167173252, "grad_norm": 0.07365341846223933, "learning_rate": 2.3268825322115662e-06, "loss": 0.5754, "step": 5595 }, { "epoch": 2.7197568389057754, "grad_norm": 0.07022134671005106, "learning_rate": 2.3252646579879856e-06, "loss": 0.5142, "step": 5596 }, { "epoch": 2.720243161094225, "grad_norm": 0.07003090411619539, "learning_rate": 2.323647175983969e-06, "loss": 0.5103, "step": 5597 }, { "epoch": 2.7207294832826747, "grad_norm": 0.07080753856763596, "learning_rate": 2.3220300864367023e-06, "loss": 0.5007, "step": 5598 }, { "epoch": 2.7212158054711244, "grad_norm": 0.07017405179048752, "learning_rate": 2.320413389583313e-06, "loss": 0.5339, "step": 5599 }, { "epoch": 2.7217021276595745, "grad_norm": 0.070600487568738, "learning_rate": 2.318797085660871e-06, "loss": 0.5135, "step": 5600 }, { "epoch": 2.722188449848024, "grad_norm": 0.07090042371628778, "learning_rate": 2.3171811749063915e-06, "loss": 0.5456, "step": 5601 }, { "epoch": 2.7226747720364743, "grad_norm": 0.06950781471709777, "learning_rate": 2.3155656575568235e-06, "loss": 0.4862, "step": 5602 }, { "epoch": 2.723161094224924, "grad_norm": 0.07287528728892682, "learning_rate": 2.3139505338490703e-06, "loss": 0.5338, "step": 5603 }, { "epoch": 2.7236474164133737, "grad_norm": 0.06975011693382906, "learning_rate": 2.312335804019969e-06, "loss": 0.4974, "step": 5604 }, { "epoch": 2.724133738601824, "grad_norm": 0.0713308710116638, "learning_rate": 2.3107214683063016e-06, "loss": 0.5381, "step": 5605 }, { "epoch": 2.7246200607902735, "grad_norm": 0.07097525191371563, "learning_rate": 2.309107526944792e-06, "loss": 0.5391, "step": 5606 }, { "epoch": 2.7251063829787237, "grad_norm": 0.07264474672484772, "learning_rate": 2.307493980172106e-06, "loss": 0.5286, "step": 5607 }, { "epoch": 2.7255927051671733, "grad_norm": 0.07195173921207297, "learning_rate": 2.305880828224853e-06, "loss": 0.523, "step": 5608 }, { "epoch": 2.726079027355623, "grad_norm": 0.07126396038957458, "learning_rate": 2.3042680713395827e-06, "loss": 0.5382, "step": 5609 }, { "epoch": 2.7265653495440727, "grad_norm": 0.06793950166223107, "learning_rate": 2.3026557097527876e-06, "loss": 0.4953, "step": 5610 }, { "epoch": 2.727051671732523, "grad_norm": 0.06919454588343514, "learning_rate": 2.3010437437009024e-06, "loss": 0.5049, "step": 5611 }, { "epoch": 2.7275379939209725, "grad_norm": 0.07001554335212819, "learning_rate": 2.2994321734203033e-06, "loss": 0.527, "step": 5612 }, { "epoch": 2.7280243161094226, "grad_norm": 0.06837297417544108, "learning_rate": 2.2978209991473087e-06, "loss": 0.4849, "step": 5613 }, { "epoch": 2.7285106382978723, "grad_norm": 0.07098178413468569, "learning_rate": 2.2962102211181804e-06, "loss": 0.5339, "step": 5614 }, { "epoch": 2.728996960486322, "grad_norm": 0.07066720128928897, "learning_rate": 2.2945998395691184e-06, "loss": 0.5244, "step": 5615 }, { "epoch": 2.729483282674772, "grad_norm": 0.07113858815212676, "learning_rate": 2.2929898547362704e-06, "loss": 0.5362, "step": 5616 }, { "epoch": 2.729969604863222, "grad_norm": 0.07209782526530836, "learning_rate": 2.2913802668557184e-06, "loss": 0.5336, "step": 5617 }, { "epoch": 2.730455927051672, "grad_norm": 0.07330370384198436, "learning_rate": 2.2897710761634915e-06, "loss": 0.5103, "step": 5618 }, { "epoch": 2.7309422492401216, "grad_norm": 0.07166970053568655, "learning_rate": 2.2881622828955596e-06, "loss": 0.5067, "step": 5619 }, { "epoch": 2.7314285714285713, "grad_norm": 0.07989330839393682, "learning_rate": 2.2865538872878323e-06, "loss": 0.5246, "step": 5620 }, { "epoch": 2.731914893617021, "grad_norm": 0.06971311989823643, "learning_rate": 2.284945889576166e-06, "loss": 0.5022, "step": 5621 }, { "epoch": 2.732401215805471, "grad_norm": 0.07238271284131334, "learning_rate": 2.2833382899963535e-06, "loss": 0.5173, "step": 5622 }, { "epoch": 2.732887537993921, "grad_norm": 0.07277622360808055, "learning_rate": 2.2817310887841317e-06, "loss": 0.5278, "step": 5623 }, { "epoch": 2.733373860182371, "grad_norm": 0.07306060202304211, "learning_rate": 2.2801242861751764e-06, "loss": 0.5362, "step": 5624 }, { "epoch": 2.7338601823708206, "grad_norm": 0.07142415733875977, "learning_rate": 2.278517882405109e-06, "loss": 0.5305, "step": 5625 }, { "epoch": 2.7343465045592703, "grad_norm": 0.07339428420585087, "learning_rate": 2.27691187770949e-06, "loss": 0.5098, "step": 5626 }, { "epoch": 2.7348328267477204, "grad_norm": 0.07122928974435357, "learning_rate": 2.275306272323821e-06, "loss": 0.554, "step": 5627 }, { "epoch": 2.73531914893617, "grad_norm": 0.07150665122920959, "learning_rate": 2.2737010664835463e-06, "loss": 0.5146, "step": 5628 }, { "epoch": 2.7358054711246202, "grad_norm": 0.08413665437670209, "learning_rate": 2.2720962604240507e-06, "loss": 0.5171, "step": 5629 }, { "epoch": 2.73629179331307, "grad_norm": 0.0695353502480389, "learning_rate": 2.270491854380664e-06, "loss": 0.4989, "step": 5630 }, { "epoch": 2.7367781155015196, "grad_norm": 0.07134401407134096, "learning_rate": 2.2688878485886485e-06, "loss": 0.5136, "step": 5631 }, { "epoch": 2.7372644376899697, "grad_norm": 0.07457788954707786, "learning_rate": 2.267284243283216e-06, "loss": 0.5565, "step": 5632 }, { "epoch": 2.7377507598784194, "grad_norm": 0.07236904847217807, "learning_rate": 2.2656810386995177e-06, "loss": 0.5319, "step": 5633 }, { "epoch": 2.7382370820668696, "grad_norm": 0.07060571139008473, "learning_rate": 2.264078235072645e-06, "loss": 0.5054, "step": 5634 }, { "epoch": 2.7387234042553192, "grad_norm": 0.07177170374485324, "learning_rate": 2.2624758326376302e-06, "loss": 0.5148, "step": 5635 }, { "epoch": 2.739209726443769, "grad_norm": 0.07045058052696496, "learning_rate": 2.260873831629448e-06, "loss": 0.4928, "step": 5636 }, { "epoch": 2.7396960486322186, "grad_norm": 0.0712138974195529, "learning_rate": 2.2592722322830134e-06, "loss": 0.5102, "step": 5637 }, { "epoch": 2.7401823708206687, "grad_norm": 0.07170392369198372, "learning_rate": 2.257671034833181e-06, "loss": 0.5146, "step": 5638 }, { "epoch": 2.7406686930091184, "grad_norm": 0.07018899215736679, "learning_rate": 2.2560702395147525e-06, "loss": 0.5195, "step": 5639 }, { "epoch": 2.7411550151975685, "grad_norm": 0.07570360447055016, "learning_rate": 2.2544698465624636e-06, "loss": 0.5309, "step": 5640 }, { "epoch": 2.7416413373860182, "grad_norm": 0.07191305688364757, "learning_rate": 2.252869856210994e-06, "loss": 0.5303, "step": 5641 }, { "epoch": 2.742127659574468, "grad_norm": 0.07138258218868866, "learning_rate": 2.251270268694965e-06, "loss": 0.5163, "step": 5642 }, { "epoch": 2.742613981762918, "grad_norm": 0.07240392461843162, "learning_rate": 2.2496710842489366e-06, "loss": 0.5344, "step": 5643 }, { "epoch": 2.7431003039513677, "grad_norm": 0.0726514924909096, "learning_rate": 2.2480723031074115e-06, "loss": 0.5244, "step": 5644 }, { "epoch": 2.743586626139818, "grad_norm": 0.07269021595662892, "learning_rate": 2.246473925504835e-06, "loss": 0.5656, "step": 5645 }, { "epoch": 2.7440729483282675, "grad_norm": 0.07214932848795096, "learning_rate": 2.2448759516755875e-06, "loss": 0.5682, "step": 5646 }, { "epoch": 2.744559270516717, "grad_norm": 0.07191148846468028, "learning_rate": 2.2432783818539943e-06, "loss": 0.5371, "step": 5647 }, { "epoch": 2.745045592705167, "grad_norm": 0.07381359296925709, "learning_rate": 2.2416812162743223e-06, "loss": 0.5602, "step": 5648 }, { "epoch": 2.745531914893617, "grad_norm": 0.07175112283130808, "learning_rate": 2.2400844551707775e-06, "loss": 0.5234, "step": 5649 }, { "epoch": 2.7460182370820667, "grad_norm": 0.07346820404330406, "learning_rate": 2.238488098777506e-06, "loss": 0.5373, "step": 5650 }, { "epoch": 2.746504559270517, "grad_norm": 0.07105938320022662, "learning_rate": 2.236892147328596e-06, "loss": 0.5204, "step": 5651 }, { "epoch": 2.7469908814589665, "grad_norm": 0.07105792234366043, "learning_rate": 2.235296601058075e-06, "loss": 0.4975, "step": 5652 }, { "epoch": 2.747477203647416, "grad_norm": 0.06846726381934992, "learning_rate": 2.2337014601999126e-06, "loss": 0.489, "step": 5653 }, { "epoch": 2.7479635258358663, "grad_norm": 0.07091550834247812, "learning_rate": 2.2321067249880174e-06, "loss": 0.5415, "step": 5654 }, { "epoch": 2.7479635258358663, "eval_loss": 0.5694078803062439, "eval_runtime": 105.092, "eval_samples_per_second": 288.823, "eval_steps_per_second": 36.111, "step": 5654 }, { "epoch": 2.748449848024316, "grad_norm": 0.07141290534589537, "learning_rate": 2.23051239565624e-06, "loss": 0.513, "step": 5655 }, { "epoch": 2.748936170212766, "grad_norm": 0.07638192578767015, "learning_rate": 2.228918472438367e-06, "loss": 0.5621, "step": 5656 }, { "epoch": 2.749422492401216, "grad_norm": 0.07102926799877714, "learning_rate": 2.2273249555681353e-06, "loss": 0.5069, "step": 5657 }, { "epoch": 2.7499088145896655, "grad_norm": 0.07203318290741409, "learning_rate": 2.2257318452792125e-06, "loss": 0.5403, "step": 5658 }, { "epoch": 2.7503951367781156, "grad_norm": 0.0723852810533683, "learning_rate": 2.224139141805211e-06, "loss": 0.5342, "step": 5659 }, { "epoch": 2.7508814589665653, "grad_norm": 0.07043355093162192, "learning_rate": 2.2225468453796845e-06, "loss": 0.523, "step": 5660 }, { "epoch": 2.7513677811550155, "grad_norm": 0.0722684863195179, "learning_rate": 2.220954956236121e-06, "loss": 0.5052, "step": 5661 }, { "epoch": 2.751854103343465, "grad_norm": 0.06666211191068558, "learning_rate": 2.2193634746079547e-06, "loss": 0.4631, "step": 5662 }, { "epoch": 2.752340425531915, "grad_norm": 0.06917323312668554, "learning_rate": 2.217772400728559e-06, "loss": 0.4783, "step": 5663 }, { "epoch": 2.7528267477203645, "grad_norm": 0.07073418099752314, "learning_rate": 2.216181734831246e-06, "loss": 0.5492, "step": 5664 }, { "epoch": 2.7533130699088146, "grad_norm": 0.07056968473260043, "learning_rate": 2.2145914771492695e-06, "loss": 0.5046, "step": 5665 }, { "epoch": 2.7537993920972643, "grad_norm": 0.06975736567621126, "learning_rate": 2.213001627915823e-06, "loss": 0.4997, "step": 5666 }, { "epoch": 2.7542857142857144, "grad_norm": 0.07185033001928948, "learning_rate": 2.211412187364038e-06, "loss": 0.5482, "step": 5667 }, { "epoch": 2.754772036474164, "grad_norm": 0.07018843701639142, "learning_rate": 2.2098231557269904e-06, "loss": 0.5228, "step": 5668 }, { "epoch": 2.755258358662614, "grad_norm": 0.07489600556079372, "learning_rate": 2.208234533237692e-06, "loss": 0.5334, "step": 5669 }, { "epoch": 2.755744680851064, "grad_norm": 0.07143307120942906, "learning_rate": 2.206646320129097e-06, "loss": 0.5555, "step": 5670 }, { "epoch": 2.7562310030395136, "grad_norm": 0.07121134469861118, "learning_rate": 2.2050585166340983e-06, "loss": 0.5005, "step": 5671 }, { "epoch": 2.7567173252279638, "grad_norm": 0.06980461310639734, "learning_rate": 2.2034711229855294e-06, "loss": 0.5037, "step": 5672 }, { "epoch": 2.7572036474164134, "grad_norm": 0.07513261524121989, "learning_rate": 2.201884139416163e-06, "loss": 0.5456, "step": 5673 }, { "epoch": 2.757689969604863, "grad_norm": 0.06958029805555384, "learning_rate": 2.200297566158714e-06, "loss": 0.4997, "step": 5674 }, { "epoch": 2.758176291793313, "grad_norm": 0.07100101880619507, "learning_rate": 2.1987114034458334e-06, "loss": 0.5096, "step": 5675 }, { "epoch": 2.758662613981763, "grad_norm": 0.07142993279008687, "learning_rate": 2.197125651510115e-06, "loss": 0.5402, "step": 5676 }, { "epoch": 2.7591489361702126, "grad_norm": 0.07355139146509079, "learning_rate": 2.195540310584091e-06, "loss": 0.5544, "step": 5677 }, { "epoch": 2.7596352583586627, "grad_norm": 0.07604200028932977, "learning_rate": 2.193955380900234e-06, "loss": 0.5734, "step": 5678 }, { "epoch": 2.7601215805471124, "grad_norm": 0.07162264768455495, "learning_rate": 2.1923708626909556e-06, "loss": 0.5379, "step": 5679 }, { "epoch": 2.760607902735562, "grad_norm": 0.07280155808718544, "learning_rate": 2.1907867561886072e-06, "loss": 0.488, "step": 5680 }, { "epoch": 2.7610942249240122, "grad_norm": 0.07179248641820297, "learning_rate": 2.1892030616254806e-06, "loss": 0.5219, "step": 5681 }, { "epoch": 2.761580547112462, "grad_norm": 0.07289671255835094, "learning_rate": 2.187619779233806e-06, "loss": 0.5288, "step": 5682 }, { "epoch": 2.762066869300912, "grad_norm": 0.07128508152861099, "learning_rate": 2.1860369092457538e-06, "loss": 0.5193, "step": 5683 }, { "epoch": 2.7625531914893617, "grad_norm": 0.07621602651666413, "learning_rate": 2.1844544518934347e-06, "loss": 0.5894, "step": 5684 }, { "epoch": 2.7630395136778114, "grad_norm": 0.07324615745984062, "learning_rate": 2.1828724074088974e-06, "loss": 0.5413, "step": 5685 }, { "epoch": 2.7635258358662615, "grad_norm": 0.0687093950885111, "learning_rate": 2.181290776024131e-06, "loss": 0.4925, "step": 5686 }, { "epoch": 2.7640121580547112, "grad_norm": 0.07157969474799021, "learning_rate": 2.1797095579710635e-06, "loss": 0.5287, "step": 5687 }, { "epoch": 2.7644984802431614, "grad_norm": 0.0709592813701329, "learning_rate": 2.178128753481563e-06, "loss": 0.5144, "step": 5688 }, { "epoch": 2.764984802431611, "grad_norm": 0.07059597497068189, "learning_rate": 2.1765483627874367e-06, "loss": 0.5217, "step": 5689 }, { "epoch": 2.7654711246200607, "grad_norm": 0.06934498745626758, "learning_rate": 2.17496838612043e-06, "loss": 0.5057, "step": 5690 }, { "epoch": 2.7659574468085104, "grad_norm": 0.07260755429842425, "learning_rate": 2.17338882371223e-06, "loss": 0.542, "step": 5691 }, { "epoch": 2.7664437689969605, "grad_norm": 0.07227348678611166, "learning_rate": 2.1718096757944595e-06, "loss": 0.5525, "step": 5692 }, { "epoch": 2.7669300911854102, "grad_norm": 0.07359647639091586, "learning_rate": 2.1702309425986844e-06, "loss": 0.5761, "step": 5693 }, { "epoch": 2.7674164133738604, "grad_norm": 0.06963198455221663, "learning_rate": 2.168652624356407e-06, "loss": 0.5067, "step": 5694 }, { "epoch": 2.76790273556231, "grad_norm": 0.07089314811044034, "learning_rate": 2.1670747212990713e-06, "loss": 0.5509, "step": 5695 }, { "epoch": 2.7683890577507597, "grad_norm": 0.07457690545924978, "learning_rate": 2.1654972336580564e-06, "loss": 0.592, "step": 5696 }, { "epoch": 2.76887537993921, "grad_norm": 0.07034505457662908, "learning_rate": 2.163920161664685e-06, "loss": 0.4911, "step": 5697 }, { "epoch": 2.7693617021276595, "grad_norm": 0.07317829353500106, "learning_rate": 2.162343505550216e-06, "loss": 0.5376, "step": 5698 }, { "epoch": 2.7698480243161097, "grad_norm": 0.07272812315437173, "learning_rate": 2.160767265545848e-06, "loss": 0.5767, "step": 5699 }, { "epoch": 2.7703343465045593, "grad_norm": 0.07097499352682997, "learning_rate": 2.1591914418827186e-06, "loss": 0.5181, "step": 5700 }, { "epoch": 2.770820668693009, "grad_norm": 0.07338895054972039, "learning_rate": 2.1576160347919057e-06, "loss": 0.5079, "step": 5701 }, { "epoch": 2.7713069908814587, "grad_norm": 0.07340714535159648, "learning_rate": 2.156041044504423e-06, "loss": 0.5499, "step": 5702 }, { "epoch": 2.771793313069909, "grad_norm": 0.07228642072474788, "learning_rate": 2.154466471251226e-06, "loss": 0.5442, "step": 5703 }, { "epoch": 2.7722796352583585, "grad_norm": 0.07261309153864731, "learning_rate": 2.1528923152632082e-06, "loss": 0.5504, "step": 5704 }, { "epoch": 2.7727659574468086, "grad_norm": 0.07080196355380952, "learning_rate": 2.1513185767712007e-06, "loss": 0.4914, "step": 5705 }, { "epoch": 2.7732522796352583, "grad_norm": 0.07004859884970265, "learning_rate": 2.1497452560059756e-06, "loss": 0.5034, "step": 5706 }, { "epoch": 2.773738601823708, "grad_norm": 0.07091009165202827, "learning_rate": 2.1481723531982417e-06, "loss": 0.549, "step": 5707 }, { "epoch": 2.774224924012158, "grad_norm": 0.07295709553380648, "learning_rate": 2.146599868578649e-06, "loss": 0.5187, "step": 5708 }, { "epoch": 2.774711246200608, "grad_norm": 0.0709791895901247, "learning_rate": 2.1450278023777823e-06, "loss": 0.5067, "step": 5709 }, { "epoch": 2.775197568389058, "grad_norm": 0.0714091929739096, "learning_rate": 2.1434561548261666e-06, "loss": 0.5226, "step": 5710 }, { "epoch": 2.7756838905775076, "grad_norm": 0.06989281216328769, "learning_rate": 2.1418849261542667e-06, "loss": 0.5234, "step": 5711 }, { "epoch": 2.7761702127659573, "grad_norm": 0.06963062486462816, "learning_rate": 2.1403141165924877e-06, "loss": 0.5403, "step": 5712 }, { "epoch": 2.7766565349544075, "grad_norm": 0.07047935292102145, "learning_rate": 2.1387437263711702e-06, "loss": 0.5158, "step": 5713 }, { "epoch": 2.777142857142857, "grad_norm": 0.07346388177391716, "learning_rate": 2.1371737557205928e-06, "loss": 0.5828, "step": 5714 }, { "epoch": 2.7776291793313073, "grad_norm": 0.06995173229130977, "learning_rate": 2.135604204870975e-06, "loss": 0.5251, "step": 5715 }, { "epoch": 2.778115501519757, "grad_norm": 0.07026410882101986, "learning_rate": 2.1340350740524735e-06, "loss": 0.5198, "step": 5716 }, { "epoch": 2.7786018237082066, "grad_norm": 0.07176639094885762, "learning_rate": 2.1324663634951826e-06, "loss": 0.5547, "step": 5717 }, { "epoch": 2.7790881458966563, "grad_norm": 0.0701402835214612, "learning_rate": 2.130898073429137e-06, "loss": 0.5136, "step": 5718 }, { "epoch": 2.7795744680851064, "grad_norm": 0.07055392906838966, "learning_rate": 2.1293302040843073e-06, "loss": 0.5223, "step": 5719 }, { "epoch": 2.780060790273556, "grad_norm": 0.07153922747116245, "learning_rate": 2.1277627556906057e-06, "loss": 0.5371, "step": 5720 }, { "epoch": 2.7805471124620063, "grad_norm": 0.07265716916318489, "learning_rate": 2.1261957284778784e-06, "loss": 0.5192, "step": 5721 }, { "epoch": 2.781033434650456, "grad_norm": 0.07098168504086935, "learning_rate": 2.1246291226759157e-06, "loss": 0.5142, "step": 5722 }, { "epoch": 2.7815197568389056, "grad_norm": 0.07136917271299496, "learning_rate": 2.1230629385144388e-06, "loss": 0.5155, "step": 5723 }, { "epoch": 2.7820060790273557, "grad_norm": 0.07084188270269456, "learning_rate": 2.1214971762231113e-06, "loss": 0.5048, "step": 5724 }, { "epoch": 2.7824924012158054, "grad_norm": 0.07054557304479277, "learning_rate": 2.1199318360315356e-06, "loss": 0.516, "step": 5725 }, { "epoch": 2.7829787234042556, "grad_norm": 0.07242674971400906, "learning_rate": 2.118366918169251e-06, "loss": 0.5614, "step": 5726 }, { "epoch": 2.7834650455927052, "grad_norm": 0.07352127403953698, "learning_rate": 2.1168024228657345e-06, "loss": 0.5716, "step": 5727 }, { "epoch": 2.783951367781155, "grad_norm": 0.06993833048638261, "learning_rate": 2.115238350350402e-06, "loss": 0.5034, "step": 5728 }, { "epoch": 2.7844376899696046, "grad_norm": 0.07044432038973897, "learning_rate": 2.1136747008526055e-06, "loss": 0.5012, "step": 5729 }, { "epoch": 2.7849240121580547, "grad_norm": 0.07217061610508575, "learning_rate": 2.1121114746016386e-06, "loss": 0.4989, "step": 5730 }, { "epoch": 2.7854103343465044, "grad_norm": 0.07452163843632588, "learning_rate": 2.1105486718267304e-06, "loss": 0.5715, "step": 5731 }, { "epoch": 2.7858966565349546, "grad_norm": 0.0694893833016534, "learning_rate": 2.1089862927570474e-06, "loss": 0.5043, "step": 5732 }, { "epoch": 2.7863829787234042, "grad_norm": 0.07303068781298948, "learning_rate": 2.1074243376216947e-06, "loss": 0.5159, "step": 5733 }, { "epoch": 2.786869300911854, "grad_norm": 0.06736055148091624, "learning_rate": 2.105862806649716e-06, "loss": 0.4786, "step": 5734 }, { "epoch": 2.787355623100304, "grad_norm": 0.07166611537333546, "learning_rate": 2.104301700070091e-06, "loss": 0.5217, "step": 5735 }, { "epoch": 2.7878419452887537, "grad_norm": 0.06934871248787595, "learning_rate": 2.102741018111739e-06, "loss": 0.5031, "step": 5736 }, { "epoch": 2.788328267477204, "grad_norm": 0.07132465895570106, "learning_rate": 2.1011807610035184e-06, "loss": 0.5303, "step": 5737 }, { "epoch": 2.7888145896656535, "grad_norm": 0.0726711361821812, "learning_rate": 2.099620928974219e-06, "loss": 0.5462, "step": 5738 }, { "epoch": 2.7893009118541032, "grad_norm": 0.07451730972315569, "learning_rate": 2.098061522252574e-06, "loss": 0.5347, "step": 5739 }, { "epoch": 2.7897872340425534, "grad_norm": 0.07305846242527281, "learning_rate": 2.0965025410672535e-06, "loss": 0.5397, "step": 5740 }, { "epoch": 2.790273556231003, "grad_norm": 0.07096620474136943, "learning_rate": 2.094943985646864e-06, "loss": 0.5057, "step": 5741 }, { "epoch": 2.7907598784194527, "grad_norm": 0.07197848039927461, "learning_rate": 2.0933858562199496e-06, "loss": 0.515, "step": 5742 }, { "epoch": 2.791246200607903, "grad_norm": 0.07162385688008034, "learning_rate": 2.0918281530149925e-06, "loss": 0.5598, "step": 5743 }, { "epoch": 2.7917325227963525, "grad_norm": 0.07225857096500149, "learning_rate": 2.090270876260412e-06, "loss": 0.5538, "step": 5744 }, { "epoch": 2.792218844984802, "grad_norm": 0.0710079335104998, "learning_rate": 2.0887140261845662e-06, "loss": 0.5129, "step": 5745 }, { "epoch": 2.7927051671732523, "grad_norm": 0.06897347717466744, "learning_rate": 2.087157603015748e-06, "loss": 0.4972, "step": 5746 }, { "epoch": 2.793191489361702, "grad_norm": 0.07158877477721493, "learning_rate": 2.085601606982188e-06, "loss": 0.5473, "step": 5747 }, { "epoch": 2.793677811550152, "grad_norm": 0.07226705053894875, "learning_rate": 2.084046038312059e-06, "loss": 0.5454, "step": 5748 }, { "epoch": 2.794164133738602, "grad_norm": 0.06884637258851432, "learning_rate": 2.0824908972334663e-06, "loss": 0.4886, "step": 5749 }, { "epoch": 2.7946504559270515, "grad_norm": 0.06711383641550824, "learning_rate": 2.0809361839744525e-06, "loss": 0.4852, "step": 5750 }, { "epoch": 2.7951367781155017, "grad_norm": 0.06991482797358153, "learning_rate": 2.079381898762999e-06, "loss": 0.5301, "step": 5751 }, { "epoch": 2.7956231003039513, "grad_norm": 0.07046779476426093, "learning_rate": 2.077828041827026e-06, "loss": 0.4931, "step": 5752 }, { "epoch": 2.7961094224924015, "grad_norm": 0.07044017005746363, "learning_rate": 2.076274613394386e-06, "loss": 0.5224, "step": 5753 }, { "epoch": 2.796595744680851, "grad_norm": 0.06963419833518798, "learning_rate": 2.0747216136928723e-06, "loss": 0.5132, "step": 5754 }, { "epoch": 2.797082066869301, "grad_norm": 0.07118776107136607, "learning_rate": 2.0731690429502147e-06, "loss": 0.5121, "step": 5755 }, { "epoch": 2.7975683890577505, "grad_norm": 0.0724347149903344, "learning_rate": 2.0716169013940812e-06, "loss": 0.5385, "step": 5756 }, { "epoch": 2.7980547112462006, "grad_norm": 0.06874723162493693, "learning_rate": 2.070065189252075e-06, "loss": 0.489, "step": 5757 }, { "epoch": 2.7985410334346503, "grad_norm": 0.07044914805777469, "learning_rate": 2.068513906751738e-06, "loss": 0.5126, "step": 5758 }, { "epoch": 2.7990273556231005, "grad_norm": 0.06807560767207664, "learning_rate": 2.0669630541205466e-06, "loss": 0.4912, "step": 5759 }, { "epoch": 2.79951367781155, "grad_norm": 0.07073039019185584, "learning_rate": 2.0654126315859163e-06, "loss": 0.5083, "step": 5760 }, { "epoch": 2.8, "grad_norm": 0.06932443322958762, "learning_rate": 2.063862639375199e-06, "loss": 0.5178, "step": 5761 }, { "epoch": 2.80048632218845, "grad_norm": 0.07414322991678517, "learning_rate": 2.062313077715684e-06, "loss": 0.5711, "step": 5762 }, { "epoch": 2.8009726443768996, "grad_norm": 0.07084694860474966, "learning_rate": 2.0607639468345965e-06, "loss": 0.5, "step": 5763 }, { "epoch": 2.8014589665653498, "grad_norm": 0.07101310025760442, "learning_rate": 2.0592152469590994e-06, "loss": 0.494, "step": 5764 }, { "epoch": 2.8019452887537994, "grad_norm": 0.0723072157347182, "learning_rate": 2.057666978316289e-06, "loss": 0.5021, "step": 5765 }, { "epoch": 2.802431610942249, "grad_norm": 0.07195987893412906, "learning_rate": 2.0561191411332052e-06, "loss": 0.5051, "step": 5766 }, { "epoch": 2.802917933130699, "grad_norm": 0.07177955323971101, "learning_rate": 2.054571735636822e-06, "loss": 0.5323, "step": 5767 }, { "epoch": 2.803404255319149, "grad_norm": 0.07045972107988707, "learning_rate": 2.0530247620540444e-06, "loss": 0.5117, "step": 5768 }, { "epoch": 2.8038905775075986, "grad_norm": 0.0743218681275549, "learning_rate": 2.05147822061172e-06, "loss": 0.5097, "step": 5769 }, { "epoch": 2.8043768996960488, "grad_norm": 0.07315073997702788, "learning_rate": 2.049932111536632e-06, "loss": 0.5018, "step": 5770 }, { "epoch": 2.8048632218844984, "grad_norm": 0.0721786973598316, "learning_rate": 2.0483864350555e-06, "loss": 0.5167, "step": 5771 }, { "epoch": 2.805349544072948, "grad_norm": 0.07163341001088543, "learning_rate": 2.0468411913949787e-06, "loss": 0.5106, "step": 5772 }, { "epoch": 2.8058358662613982, "grad_norm": 0.07239868251111778, "learning_rate": 2.0452963807816616e-06, "loss": 0.5396, "step": 5773 }, { "epoch": 2.806322188449848, "grad_norm": 0.0693450570944712, "learning_rate": 2.043752003442078e-06, "loss": 0.5128, "step": 5774 }, { "epoch": 2.806808510638298, "grad_norm": 0.07379031693412846, "learning_rate": 2.042208059602692e-06, "loss": 0.5533, "step": 5775 }, { "epoch": 2.8072948328267477, "grad_norm": 0.07184461643654642, "learning_rate": 2.0406645494899063e-06, "loss": 0.5332, "step": 5776 }, { "epoch": 2.8077811550151974, "grad_norm": 0.07341888226834573, "learning_rate": 2.039121473330059e-06, "loss": 0.5326, "step": 5777 }, { "epoch": 2.8082674772036476, "grad_norm": 0.0701332332262917, "learning_rate": 2.0375788313494245e-06, "loss": 0.5216, "step": 5778 }, { "epoch": 2.8087537993920972, "grad_norm": 0.07137725859230615, "learning_rate": 2.036036623774214e-06, "loss": 0.5662, "step": 5779 }, { "epoch": 2.8092401215805474, "grad_norm": 0.07060620140196677, "learning_rate": 2.0344948508305746e-06, "loss": 0.496, "step": 5780 }, { "epoch": 2.809726443768997, "grad_norm": 0.07221948308646506, "learning_rate": 2.03295351274459e-06, "loss": 0.5372, "step": 5781 }, { "epoch": 2.8102127659574467, "grad_norm": 0.070956310554444, "learning_rate": 2.031412609742279e-06, "loss": 0.5431, "step": 5782 }, { "epoch": 2.8106990881458964, "grad_norm": 0.06935941415008046, "learning_rate": 2.0298721420495986e-06, "loss": 0.5152, "step": 5783 }, { "epoch": 2.8111854103343465, "grad_norm": 0.07089598014718915, "learning_rate": 2.0283321098924407e-06, "loss": 0.5154, "step": 5784 }, { "epoch": 2.8116717325227962, "grad_norm": 0.07065291169604153, "learning_rate": 2.0267925134966333e-06, "loss": 0.5222, "step": 5785 }, { "epoch": 2.8121580547112464, "grad_norm": 0.07248031597923572, "learning_rate": 2.025253353087941e-06, "loss": 0.5289, "step": 5786 }, { "epoch": 2.812644376899696, "grad_norm": 0.07020067258837767, "learning_rate": 2.0237146288920632e-06, "loss": 0.4923, "step": 5787 }, { "epoch": 2.8131306990881457, "grad_norm": 0.07117474561102627, "learning_rate": 2.022176341134638e-06, "loss": 0.5274, "step": 5788 }, { "epoch": 2.813617021276596, "grad_norm": 0.07560309408683605, "learning_rate": 2.0206384900412364e-06, "loss": 0.5157, "step": 5789 }, { "epoch": 2.8141033434650455, "grad_norm": 0.0687342718106197, "learning_rate": 2.0191010758373675e-06, "loss": 0.5428, "step": 5790 }, { "epoch": 2.8145896656534957, "grad_norm": 0.07198071207499912, "learning_rate": 2.0175640987484755e-06, "loss": 0.538, "step": 5791 }, { "epoch": 2.8150759878419453, "grad_norm": 0.07066103065228692, "learning_rate": 2.0160275589999407e-06, "loss": 0.5595, "step": 5792 }, { "epoch": 2.815562310030395, "grad_norm": 0.07295026560041401, "learning_rate": 2.014491456817079e-06, "loss": 0.5116, "step": 5793 }, { "epoch": 2.8160486322188447, "grad_norm": 0.06917307368191179, "learning_rate": 2.0129557924251425e-06, "loss": 0.5249, "step": 5794 }, { "epoch": 2.816534954407295, "grad_norm": 0.06905962678506213, "learning_rate": 2.011420566049319e-06, "loss": 0.5124, "step": 5795 }, { "epoch": 2.8170212765957445, "grad_norm": 0.07397970018237573, "learning_rate": 2.0098857779147316e-06, "loss": 0.5273, "step": 5796 }, { "epoch": 2.8175075987841947, "grad_norm": 0.07157959264698292, "learning_rate": 2.00835142824644e-06, "loss": 0.5224, "step": 5797 }, { "epoch": 2.8179939209726443, "grad_norm": 0.07038787458285181, "learning_rate": 2.0068175172694394e-06, "loss": 0.5032, "step": 5798 }, { "epoch": 2.818480243161094, "grad_norm": 0.07188025256279675, "learning_rate": 2.0052840452086595e-06, "loss": 0.5242, "step": 5799 }, { "epoch": 2.818966565349544, "grad_norm": 0.07096406154939713, "learning_rate": 2.003751012288969e-06, "loss": 0.5073, "step": 5800 }, { "epoch": 2.819452887537994, "grad_norm": 0.07115547759235426, "learning_rate": 2.002218418735165e-06, "loss": 0.5042, "step": 5801 }, { "epoch": 2.819939209726444, "grad_norm": 0.07002606997167922, "learning_rate": 2.0006862647719887e-06, "loss": 0.527, "step": 5802 }, { "epoch": 2.8204255319148936, "grad_norm": 0.0718461093582595, "learning_rate": 1.999154550624113e-06, "loss": 0.5505, "step": 5803 }, { "epoch": 2.8209118541033433, "grad_norm": 0.06956317750838951, "learning_rate": 1.9976232765161453e-06, "loss": 0.523, "step": 5804 }, { "epoch": 2.8213981762917935, "grad_norm": 0.07007071552060323, "learning_rate": 1.99609244267263e-06, "loss": 0.5179, "step": 5805 }, { "epoch": 2.821884498480243, "grad_norm": 0.0719422450559224, "learning_rate": 1.994562049318046e-06, "loss": 0.5147, "step": 5806 }, { "epoch": 2.8223708206686933, "grad_norm": 0.07038272683119702, "learning_rate": 1.993032096676808e-06, "loss": 0.5769, "step": 5807 }, { "epoch": 2.822857142857143, "grad_norm": 0.07260561605391949, "learning_rate": 1.991502584973267e-06, "loss": 0.5128, "step": 5808 }, { "epoch": 2.8233434650455926, "grad_norm": 0.07200563112090586, "learning_rate": 1.989973514431709e-06, "loss": 0.5681, "step": 5809 }, { "epoch": 2.8238297872340423, "grad_norm": 0.070468852376796, "learning_rate": 1.9884448852763534e-06, "loss": 0.5061, "step": 5810 }, { "epoch": 2.8243161094224924, "grad_norm": 0.07277574241372085, "learning_rate": 1.9869166977313565e-06, "loss": 0.5544, "step": 5811 }, { "epoch": 2.824802431610942, "grad_norm": 0.07275952137937516, "learning_rate": 1.98538895202081e-06, "loss": 0.524, "step": 5812 }, { "epoch": 2.8252887537993923, "grad_norm": 0.07128718743871466, "learning_rate": 1.9838616483687414e-06, "loss": 0.4881, "step": 5813 }, { "epoch": 2.825775075987842, "grad_norm": 0.06921129214403458, "learning_rate": 1.982334786999111e-06, "loss": 0.4786, "step": 5814 }, { "epoch": 2.8262613981762916, "grad_norm": 0.07151066581385927, "learning_rate": 1.980808368135818e-06, "loss": 0.5232, "step": 5815 }, { "epoch": 2.8267477203647418, "grad_norm": 0.07207762732264693, "learning_rate": 1.979282392002691e-06, "loss": 0.5467, "step": 5816 }, { "epoch": 2.8272340425531914, "grad_norm": 0.07098923965832625, "learning_rate": 1.9777568588234985e-06, "loss": 0.5535, "step": 5817 }, { "epoch": 2.8277203647416416, "grad_norm": 0.07013401460573276, "learning_rate": 1.976231768821943e-06, "loss": 0.5142, "step": 5818 }, { "epoch": 2.8282066869300913, "grad_norm": 0.0732227452899353, "learning_rate": 1.9747071222216614e-06, "loss": 0.5528, "step": 5819 }, { "epoch": 2.828693009118541, "grad_norm": 0.0697618981827064, "learning_rate": 1.9731829192462236e-06, "loss": 0.5272, "step": 5820 }, { "epoch": 2.8291793313069906, "grad_norm": 0.07169420302433707, "learning_rate": 1.9716591601191413e-06, "loss": 0.4844, "step": 5821 }, { "epoch": 2.8296656534954407, "grad_norm": 0.07174393539631475, "learning_rate": 1.9701358450638543e-06, "loss": 0.5355, "step": 5822 }, { "epoch": 2.8301519756838904, "grad_norm": 0.07114986162707497, "learning_rate": 1.9686129743037387e-06, "loss": 0.4957, "step": 5823 }, { "epoch": 2.8306382978723406, "grad_norm": 0.07038983638335994, "learning_rate": 1.9670905480621068e-06, "loss": 0.5284, "step": 5824 }, { "epoch": 2.8311246200607902, "grad_norm": 0.07220166255257733, "learning_rate": 1.965568566562205e-06, "loss": 0.5183, "step": 5825 }, { "epoch": 2.83161094224924, "grad_norm": 0.068669207063441, "learning_rate": 1.9640470300272146e-06, "loss": 0.48, "step": 5826 }, { "epoch": 2.83209726443769, "grad_norm": 0.07061723105411294, "learning_rate": 1.962525938680252e-06, "loss": 0.5204, "step": 5827 }, { "epoch": 2.8325835866261397, "grad_norm": 0.07275773790562813, "learning_rate": 1.961005292744368e-06, "loss": 0.5324, "step": 5828 }, { "epoch": 2.83306990881459, "grad_norm": 0.07148353604041843, "learning_rate": 1.9594850924425486e-06, "loss": 0.5146, "step": 5829 }, { "epoch": 2.8335562310030395, "grad_norm": 0.07102619578790878, "learning_rate": 1.957965337997712e-06, "loss": 0.5195, "step": 5830 }, { "epoch": 2.8340425531914892, "grad_norm": 0.07194909091768985, "learning_rate": 1.9564460296327137e-06, "loss": 0.5421, "step": 5831 }, { "epoch": 2.8345288753799394, "grad_norm": 0.07034046436333947, "learning_rate": 1.9549271675703434e-06, "loss": 0.5525, "step": 5832 }, { "epoch": 2.835015197568389, "grad_norm": 0.072395113559526, "learning_rate": 1.953408752033325e-06, "loss": 0.5291, "step": 5833 }, { "epoch": 2.835501519756839, "grad_norm": 0.07123389602895357, "learning_rate": 1.951890783244316e-06, "loss": 0.5132, "step": 5834 }, { "epoch": 2.835987841945289, "grad_norm": 0.06929022928358465, "learning_rate": 1.9503732614259113e-06, "loss": 0.4982, "step": 5835 }, { "epoch": 2.8364741641337385, "grad_norm": 0.0715648512994881, "learning_rate": 1.948856186800636e-06, "loss": 0.5598, "step": 5836 }, { "epoch": 2.8369604863221882, "grad_norm": 0.07182233045283128, "learning_rate": 1.9473395595909533e-06, "loss": 0.5135, "step": 5837 }, { "epoch": 2.8374468085106384, "grad_norm": 0.07378443404345784, "learning_rate": 1.945823380019257e-06, "loss": 0.5535, "step": 5838 }, { "epoch": 2.837933130699088, "grad_norm": 0.0699727216242389, "learning_rate": 1.944307648307882e-06, "loss": 0.5181, "step": 5839 }, { "epoch": 2.838419452887538, "grad_norm": 0.07384742178422674, "learning_rate": 1.94279236467909e-06, "loss": 0.5804, "step": 5840 }, { "epoch": 2.838905775075988, "grad_norm": 0.06967814521185865, "learning_rate": 1.9412775293550814e-06, "loss": 0.4913, "step": 5841 }, { "epoch": 2.8393920972644375, "grad_norm": 0.07668445566534125, "learning_rate": 1.9397631425579884e-06, "loss": 0.5283, "step": 5842 }, { "epoch": 2.8398784194528877, "grad_norm": 0.06996898714384027, "learning_rate": 1.9382492045098792e-06, "loss": 0.5263, "step": 5843 }, { "epoch": 2.8403647416413373, "grad_norm": 0.07105381155641805, "learning_rate": 1.9367357154327577e-06, "loss": 0.4945, "step": 5844 }, { "epoch": 2.8408510638297875, "grad_norm": 0.07109255895461253, "learning_rate": 1.935222675548556e-06, "loss": 0.5154, "step": 5845 }, { "epoch": 2.841337386018237, "grad_norm": 0.07181578825833233, "learning_rate": 1.933710085079146e-06, "loss": 0.5159, "step": 5846 }, { "epoch": 2.841823708206687, "grad_norm": 0.07241450417865103, "learning_rate": 1.9321979442463325e-06, "loss": 0.5174, "step": 5847 }, { "epoch": 2.8423100303951365, "grad_norm": 0.07379668212785787, "learning_rate": 1.9306862532718527e-06, "loss": 0.5296, "step": 5848 }, { "epoch": 2.8427963525835866, "grad_norm": 0.07232859649421171, "learning_rate": 1.92917501237738e-06, "loss": 0.5534, "step": 5849 }, { "epoch": 2.8432826747720363, "grad_norm": 0.07007512173931056, "learning_rate": 1.9276642217845197e-06, "loss": 0.5322, "step": 5850 }, { "epoch": 2.8437689969604865, "grad_norm": 0.0722997015260819, "learning_rate": 1.926153881714813e-06, "loss": 0.5801, "step": 5851 }, { "epoch": 2.844255319148936, "grad_norm": 0.06906461006082809, "learning_rate": 1.9246439923897335e-06, "loss": 0.4783, "step": 5852 }, { "epoch": 2.844741641337386, "grad_norm": 0.07137444718583476, "learning_rate": 1.9231345540306893e-06, "loss": 0.4999, "step": 5853 }, { "epoch": 2.845227963525836, "grad_norm": 0.06893563741233799, "learning_rate": 1.9216255668590233e-06, "loss": 0.5044, "step": 5854 }, { "epoch": 2.8457142857142856, "grad_norm": 0.07055570255494763, "learning_rate": 1.92011703109601e-06, "loss": 0.5061, "step": 5855 }, { "epoch": 2.8462006079027358, "grad_norm": 0.07170500706941783, "learning_rate": 1.918608946962858e-06, "loss": 0.5391, "step": 5856 }, { "epoch": 2.8466869300911855, "grad_norm": 0.0736865699433173, "learning_rate": 1.9171013146807148e-06, "loss": 0.5374, "step": 5857 }, { "epoch": 2.847173252279635, "grad_norm": 0.06915405862816253, "learning_rate": 1.9155941344706547e-06, "loss": 0.4745, "step": 5858 }, { "epoch": 2.8476595744680853, "grad_norm": 0.07017117745507662, "learning_rate": 1.914087406553691e-06, "loss": 0.5254, "step": 5859 }, { "epoch": 2.848145896656535, "grad_norm": 0.07153193780324923, "learning_rate": 1.912581131150764e-06, "loss": 0.496, "step": 5860 }, { "epoch": 2.848632218844985, "grad_norm": 0.06907821202483959, "learning_rate": 1.911075308482754e-06, "loss": 0.4957, "step": 5861 }, { "epoch": 2.8491185410334348, "grad_norm": 0.07038768803173205, "learning_rate": 1.909569938770474e-06, "loss": 0.5387, "step": 5862 }, { "epoch": 2.8496048632218844, "grad_norm": 0.0713273212298614, "learning_rate": 1.908065022234668e-06, "loss": 0.5229, "step": 5863 }, { "epoch": 2.850091185410334, "grad_norm": 0.07059827157015541, "learning_rate": 1.9065605590960146e-06, "loss": 0.5057, "step": 5864 }, { "epoch": 2.8505775075987843, "grad_norm": 0.07140515753841477, "learning_rate": 1.9050565495751271e-06, "loss": 0.5229, "step": 5865 }, { "epoch": 2.851063829787234, "grad_norm": 0.07347178816517402, "learning_rate": 1.9035529938925518e-06, "loss": 0.5131, "step": 5866 }, { "epoch": 2.851550151975684, "grad_norm": 0.07189753191769561, "learning_rate": 1.9020498922687668e-06, "loss": 0.5217, "step": 5867 }, { "epoch": 2.8520364741641338, "grad_norm": 0.07624390139991714, "learning_rate": 1.9005472449241857e-06, "loss": 0.5327, "step": 5868 }, { "epoch": 2.8525227963525834, "grad_norm": 0.06986334710139973, "learning_rate": 1.8990450520791547e-06, "loss": 0.4916, "step": 5869 }, { "epoch": 2.8530091185410336, "grad_norm": 0.0720841881751233, "learning_rate": 1.8975433139539534e-06, "loss": 0.5041, "step": 5870 }, { "epoch": 2.8534954407294832, "grad_norm": 0.07233949366611366, "learning_rate": 1.8960420307687937e-06, "loss": 0.5452, "step": 5871 }, { "epoch": 2.8539817629179334, "grad_norm": 0.0741325589165113, "learning_rate": 1.8945412027438226e-06, "loss": 0.5202, "step": 5872 }, { "epoch": 2.854468085106383, "grad_norm": 0.06993626746771713, "learning_rate": 1.8930408300991194e-06, "loss": 0.5067, "step": 5873 }, { "epoch": 2.8549544072948327, "grad_norm": 0.06993304776029446, "learning_rate": 1.8915409130546968e-06, "loss": 0.5039, "step": 5874 }, { "epoch": 2.8554407294832824, "grad_norm": 0.07288562071089878, "learning_rate": 1.8900414518305004e-06, "loss": 0.5344, "step": 5875 }, { "epoch": 2.8559270516717326, "grad_norm": 0.0720526868085092, "learning_rate": 1.8885424466464086e-06, "loss": 0.5334, "step": 5876 }, { "epoch": 2.8564133738601822, "grad_norm": 0.07360343886864168, "learning_rate": 1.8870438977222345e-06, "loss": 0.5405, "step": 5877 }, { "epoch": 2.8568996960486324, "grad_norm": 0.07383537183725086, "learning_rate": 1.885545805277723e-06, "loss": 0.5839, "step": 5878 }, { "epoch": 2.857386018237082, "grad_norm": 0.06836280121655228, "learning_rate": 1.8840481695325519e-06, "loss": 0.504, "step": 5879 }, { "epoch": 2.8578723404255317, "grad_norm": 0.07503690367359185, "learning_rate": 1.8825509907063328e-06, "loss": 0.5568, "step": 5880 }, { "epoch": 2.858358662613982, "grad_norm": 0.07101128036324922, "learning_rate": 1.88105426901861e-06, "loss": 0.5388, "step": 5881 }, { "epoch": 2.8588449848024315, "grad_norm": 0.07018221159276668, "learning_rate": 1.8795580046888607e-06, "loss": 0.4987, "step": 5882 }, { "epoch": 2.8593313069908817, "grad_norm": 0.07351941044608798, "learning_rate": 1.878062197936495e-06, "loss": 0.548, "step": 5883 }, { "epoch": 2.8598176291793314, "grad_norm": 0.0713989646309417, "learning_rate": 1.8765668489808559e-06, "loss": 0.5045, "step": 5884 }, { "epoch": 2.860303951367781, "grad_norm": 0.07032523093089568, "learning_rate": 1.8750719580412196e-06, "loss": 0.5002, "step": 5885 }, { "epoch": 2.860790273556231, "grad_norm": 0.07441826580913863, "learning_rate": 1.873577525336795e-06, "loss": 0.5564, "step": 5886 }, { "epoch": 2.861276595744681, "grad_norm": 0.07111767656647187, "learning_rate": 1.872083551086723e-06, "loss": 0.5343, "step": 5887 }, { "epoch": 2.8617629179331305, "grad_norm": 0.06989844336756533, "learning_rate": 1.8705900355100787e-06, "loss": 0.5444, "step": 5888 }, { "epoch": 2.8622492401215807, "grad_norm": 0.07148322306161893, "learning_rate": 1.8690969788258684e-06, "loss": 0.5458, "step": 5889 }, { "epoch": 2.8627355623100303, "grad_norm": 0.07115519202624379, "learning_rate": 1.8676043812530325e-06, "loss": 0.5322, "step": 5890 }, { "epoch": 2.86322188449848, "grad_norm": 0.07196279675006961, "learning_rate": 1.866112243010444e-06, "loss": 0.5227, "step": 5891 }, { "epoch": 2.86370820668693, "grad_norm": 0.07119518886513132, "learning_rate": 1.864620564316907e-06, "loss": 0.5339, "step": 5892 }, { "epoch": 2.86419452887538, "grad_norm": 0.07051931878186678, "learning_rate": 1.8631293453911596e-06, "loss": 0.5114, "step": 5893 }, { "epoch": 2.86468085106383, "grad_norm": 0.07135782068445463, "learning_rate": 1.861638586451872e-06, "loss": 0.5159, "step": 5894 }, { "epoch": 2.8651671732522797, "grad_norm": 0.06988752907224728, "learning_rate": 1.8601482877176475e-06, "loss": 0.5226, "step": 5895 }, { "epoch": 2.8656534954407293, "grad_norm": 0.07216267415702485, "learning_rate": 1.8586584494070214e-06, "loss": 0.5414, "step": 5896 }, { "epoch": 2.8661398176291795, "grad_norm": 0.0707290624063378, "learning_rate": 1.857169071738461e-06, "loss": 0.5349, "step": 5897 }, { "epoch": 2.866626139817629, "grad_norm": 0.0720293644487387, "learning_rate": 1.855680154930367e-06, "loss": 0.4974, "step": 5898 }, { "epoch": 2.8671124620060793, "grad_norm": 0.07282997624292858, "learning_rate": 1.8541916992010727e-06, "loss": 0.5331, "step": 5899 }, { "epoch": 2.867598784194529, "grad_norm": 0.06872523260553294, "learning_rate": 1.8527037047688422e-06, "loss": 0.5416, "step": 5900 }, { "epoch": 2.8680851063829786, "grad_norm": 0.06985026635125152, "learning_rate": 1.851216171851874e-06, "loss": 0.5221, "step": 5901 }, { "epoch": 2.8685714285714283, "grad_norm": 0.06916399231211036, "learning_rate": 1.8497291006682967e-06, "loss": 0.4832, "step": 5902 }, { "epoch": 2.8690577507598785, "grad_norm": 0.06914632836635576, "learning_rate": 1.8482424914361735e-06, "loss": 0.4996, "step": 5903 }, { "epoch": 2.869544072948328, "grad_norm": 0.07194505543985094, "learning_rate": 1.8467563443734982e-06, "loss": 0.5488, "step": 5904 }, { "epoch": 2.8700303951367783, "grad_norm": 0.07193927603522775, "learning_rate": 1.845270659698198e-06, "loss": 0.5038, "step": 5905 }, { "epoch": 2.870516717325228, "grad_norm": 0.06977363500973308, "learning_rate": 1.8437854376281307e-06, "loss": 0.5064, "step": 5906 }, { "epoch": 2.8710030395136776, "grad_norm": 0.07336662309007039, "learning_rate": 1.8423006783810893e-06, "loss": 0.5445, "step": 5907 }, { "epoch": 2.8714893617021278, "grad_norm": 0.07349780066206979, "learning_rate": 1.8408163821747943e-06, "loss": 0.525, "step": 5908 }, { "epoch": 2.8719756838905774, "grad_norm": 0.07358272136557588, "learning_rate": 1.8393325492269016e-06, "loss": 0.527, "step": 5909 }, { "epoch": 2.8724620060790276, "grad_norm": 0.0705264759202915, "learning_rate": 1.8378491797549969e-06, "loss": 0.4959, "step": 5910 }, { "epoch": 2.8729483282674773, "grad_norm": 0.07045440984823527, "learning_rate": 1.8363662739766036e-06, "loss": 0.5175, "step": 5911 }, { "epoch": 2.873434650455927, "grad_norm": 0.07027576530858748, "learning_rate": 1.8348838321091705e-06, "loss": 0.5218, "step": 5912 }, { "epoch": 2.8739209726443766, "grad_norm": 0.07077195358920799, "learning_rate": 1.833401854370081e-06, "loss": 0.5094, "step": 5913 }, { "epoch": 2.8744072948328268, "grad_norm": 0.06933642971308226, "learning_rate": 1.8319203409766507e-06, "loss": 0.4972, "step": 5914 }, { "epoch": 2.8748936170212764, "grad_norm": 0.06977561700168815, "learning_rate": 1.8304392921461262e-06, "loss": 0.4994, "step": 5915 }, { "epoch": 2.8753799392097266, "grad_norm": 0.0727938020413016, "learning_rate": 1.8289587080956873e-06, "loss": 0.5443, "step": 5916 }, { "epoch": 2.8758662613981762, "grad_norm": 0.06737963729317442, "learning_rate": 1.8274785890424434e-06, "loss": 0.4869, "step": 5917 }, { "epoch": 2.876352583586626, "grad_norm": 0.06835079165428672, "learning_rate": 1.8259989352034385e-06, "loss": 0.4793, "step": 5918 }, { "epoch": 2.876838905775076, "grad_norm": 0.07107406207500278, "learning_rate": 1.8245197467956472e-06, "loss": 0.5054, "step": 5919 }, { "epoch": 2.8773252279635257, "grad_norm": 0.07368407128404685, "learning_rate": 1.8230410240359742e-06, "loss": 0.5451, "step": 5920 }, { "epoch": 2.877811550151976, "grad_norm": 0.06893833745367822, "learning_rate": 1.8215627671412605e-06, "loss": 0.5022, "step": 5921 }, { "epoch": 2.8782978723404256, "grad_norm": 0.07037184250553943, "learning_rate": 1.8200849763282713e-06, "loss": 0.5251, "step": 5922 }, { "epoch": 2.8787841945288752, "grad_norm": 0.07365039573898581, "learning_rate": 1.8186076518137102e-06, "loss": 0.5596, "step": 5923 }, { "epoch": 2.8792705167173254, "grad_norm": 0.07277497393431025, "learning_rate": 1.8171307938142101e-06, "loss": 0.5396, "step": 5924 }, { "epoch": 2.879756838905775, "grad_norm": 0.06994217079860676, "learning_rate": 1.8156544025463346e-06, "loss": 0.5085, "step": 5925 }, { "epoch": 2.880243161094225, "grad_norm": 0.07100016575212124, "learning_rate": 1.8141784782265809e-06, "loss": 0.5039, "step": 5926 }, { "epoch": 2.880729483282675, "grad_norm": 0.07197386046214273, "learning_rate": 1.812703021071376e-06, "loss": 0.5636, "step": 5927 }, { "epoch": 2.8812158054711245, "grad_norm": 0.06906040139240434, "learning_rate": 1.811228031297077e-06, "loss": 0.5171, "step": 5928 }, { "epoch": 2.8817021276595742, "grad_norm": 0.07248768415258881, "learning_rate": 1.809753509119978e-06, "loss": 0.5218, "step": 5929 }, { "epoch": 2.8821884498480244, "grad_norm": 0.06940242852647203, "learning_rate": 1.8082794547562993e-06, "loss": 0.5116, "step": 5930 }, { "epoch": 2.882674772036474, "grad_norm": 0.070882906781668, "learning_rate": 1.806805868422194e-06, "loss": 0.4932, "step": 5931 }, { "epoch": 2.883161094224924, "grad_norm": 0.07297468126613556, "learning_rate": 1.805332750333747e-06, "loss": 0.5619, "step": 5932 }, { "epoch": 2.883647416413374, "grad_norm": 0.06884612396778514, "learning_rate": 1.8038601007069745e-06, "loss": 0.4903, "step": 5933 }, { "epoch": 2.8841337386018235, "grad_norm": 0.07274918156943112, "learning_rate": 1.8023879197578237e-06, "loss": 0.5818, "step": 5934 }, { "epoch": 2.8846200607902737, "grad_norm": 0.07041133158857056, "learning_rate": 1.800916207702173e-06, "loss": 0.499, "step": 5935 }, { "epoch": 2.8851063829787233, "grad_norm": 0.07027261923576446, "learning_rate": 1.7994449647558337e-06, "loss": 0.5209, "step": 5936 }, { "epoch": 2.8855927051671735, "grad_norm": 0.07249666418503105, "learning_rate": 1.7979741911345445e-06, "loss": 0.5424, "step": 5937 }, { "epoch": 2.886079027355623, "grad_norm": 0.06978072084012901, "learning_rate": 1.7965038870539785e-06, "loss": 0.4935, "step": 5938 }, { "epoch": 2.886565349544073, "grad_norm": 0.07842199313111962, "learning_rate": 1.7950340527297399e-06, "loss": 0.55, "step": 5939 }, { "epoch": 2.8870516717325225, "grad_norm": 0.07213701620607908, "learning_rate": 1.7935646883773622e-06, "loss": 0.5458, "step": 5940 }, { "epoch": 2.8875379939209727, "grad_norm": 0.07050509495685729, "learning_rate": 1.7920957942123113e-06, "loss": 0.5278, "step": 5941 }, { "epoch": 2.8880243161094223, "grad_norm": 0.07227954260715562, "learning_rate": 1.7906273704499844e-06, "loss": 0.5136, "step": 5942 }, { "epoch": 2.8885106382978725, "grad_norm": 0.07280864929648496, "learning_rate": 1.7891594173057086e-06, "loss": 0.5305, "step": 5943 }, { "epoch": 2.888996960486322, "grad_norm": 0.07080388388532134, "learning_rate": 1.787691934994743e-06, "loss": 0.5224, "step": 5944 }, { "epoch": 2.889483282674772, "grad_norm": 0.07191046458895495, "learning_rate": 1.7862249237322765e-06, "loss": 0.4826, "step": 5945 }, { "epoch": 2.889969604863222, "grad_norm": 0.07374816464037266, "learning_rate": 1.7847583837334303e-06, "loss": 0.5477, "step": 5946 }, { "epoch": 2.8904559270516716, "grad_norm": 0.07200066641878645, "learning_rate": 1.7832923152132542e-06, "loss": 0.5323, "step": 5947 }, { "epoch": 2.8909422492401218, "grad_norm": 0.07304184845500321, "learning_rate": 1.7818267183867332e-06, "loss": 0.5604, "step": 5948 }, { "epoch": 2.8914285714285715, "grad_norm": 0.07037607372791684, "learning_rate": 1.7803615934687796e-06, "loss": 0.5137, "step": 5949 }, { "epoch": 2.891914893617021, "grad_norm": 0.07425937143966992, "learning_rate": 1.7788969406742363e-06, "loss": 0.5419, "step": 5950 }, { "epoch": 2.8924012158054713, "grad_norm": 0.07848712966988176, "learning_rate": 1.777432760217881e-06, "loss": 0.5634, "step": 5951 }, { "epoch": 2.892887537993921, "grad_norm": 0.07547995739528522, "learning_rate": 1.7759690523144146e-06, "loss": 0.6216, "step": 5952 }, { "epoch": 2.893373860182371, "grad_norm": 0.07167419191835152, "learning_rate": 1.774505817178475e-06, "loss": 0.5389, "step": 5953 }, { "epoch": 2.8938601823708208, "grad_norm": 0.06763457554316366, "learning_rate": 1.7730430550246303e-06, "loss": 0.469, "step": 5954 }, { "epoch": 2.8943465045592704, "grad_norm": 0.07020301150716754, "learning_rate": 1.7715807660673768e-06, "loss": 0.5174, "step": 5955 }, { "epoch": 2.89483282674772, "grad_norm": 0.07313243217168361, "learning_rate": 1.7701189505211424e-06, "loss": 0.5341, "step": 5956 }, { "epoch": 2.8953191489361703, "grad_norm": 0.07191171511544767, "learning_rate": 1.7686576086002866e-06, "loss": 0.5042, "step": 5957 }, { "epoch": 2.89580547112462, "grad_norm": 0.0701450628664206, "learning_rate": 1.7671967405190976e-06, "loss": 0.5239, "step": 5958 }, { "epoch": 2.89629179331307, "grad_norm": 0.07212256973710782, "learning_rate": 1.7657363464917964e-06, "loss": 0.5539, "step": 5959 }, { "epoch": 2.8967781155015198, "grad_norm": 0.07146378804551277, "learning_rate": 1.7642764267325323e-06, "loss": 0.537, "step": 5960 }, { "epoch": 2.8972644376899694, "grad_norm": 0.07155844731220108, "learning_rate": 1.7628169814553858e-06, "loss": 0.5222, "step": 5961 }, { "epoch": 2.8977507598784196, "grad_norm": 0.07022504830497889, "learning_rate": 1.761358010874369e-06, "loss": 0.4966, "step": 5962 }, { "epoch": 2.8982370820668693, "grad_norm": 0.0697817587268933, "learning_rate": 1.759899515203422e-06, "loss": 0.4913, "step": 5963 }, { "epoch": 2.8987234042553194, "grad_norm": 0.06827936377073991, "learning_rate": 1.7584414946564176e-06, "loss": 0.5013, "step": 5964 }, { "epoch": 2.899209726443769, "grad_norm": 0.07096588902736498, "learning_rate": 1.7569839494471574e-06, "loss": 0.5001, "step": 5965 }, { "epoch": 2.8996960486322187, "grad_norm": 0.07422903737339483, "learning_rate": 1.7555268797893743e-06, "loss": 0.5337, "step": 5966 }, { "epoch": 2.9001823708206684, "grad_norm": 0.07100258758927531, "learning_rate": 1.7540702858967313e-06, "loss": 0.5107, "step": 5967 }, { "epoch": 2.9006686930091186, "grad_norm": 0.07322721442600164, "learning_rate": 1.7526141679828202e-06, "loss": 0.5296, "step": 5968 }, { "epoch": 2.9011550151975682, "grad_norm": 0.07087597321835215, "learning_rate": 1.7511585262611652e-06, "loss": 0.4965, "step": 5969 }, { "epoch": 2.9016413373860184, "grad_norm": 0.07189283734367957, "learning_rate": 1.7497033609452192e-06, "loss": 0.5437, "step": 5970 }, { "epoch": 2.902127659574468, "grad_norm": 0.07343283569382482, "learning_rate": 1.748248672248366e-06, "loss": 0.5144, "step": 5971 }, { "epoch": 2.9026139817629177, "grad_norm": 0.06887918035627322, "learning_rate": 1.7467944603839187e-06, "loss": 0.5165, "step": 5972 }, { "epoch": 2.903100303951368, "grad_norm": 0.06991157318745035, "learning_rate": 1.7453407255651212e-06, "loss": 0.5389, "step": 5973 }, { "epoch": 2.9035866261398176, "grad_norm": 0.07250723012097712, "learning_rate": 1.743887468005147e-06, "loss": 0.5291, "step": 5974 }, { "epoch": 2.9040729483282677, "grad_norm": 0.06847955452770746, "learning_rate": 1.7424346879171001e-06, "loss": 0.482, "step": 5975 }, { "epoch": 2.9045592705167174, "grad_norm": 0.07117190239765178, "learning_rate": 1.7409823855140146e-06, "loss": 0.5143, "step": 5976 }, { "epoch": 2.905045592705167, "grad_norm": 0.07397924116146913, "learning_rate": 1.739530561008853e-06, "loss": 0.5334, "step": 5977 }, { "epoch": 2.905531914893617, "grad_norm": 0.06902003861202617, "learning_rate": 1.7380792146145098e-06, "loss": 0.5171, "step": 5978 }, { "epoch": 2.906018237082067, "grad_norm": 0.06956050885778228, "learning_rate": 1.7366283465438082e-06, "loss": 0.5218, "step": 5979 }, { "epoch": 2.906504559270517, "grad_norm": 0.07180694213311371, "learning_rate": 1.7351779570095017e-06, "loss": 0.5215, "step": 5980 }, { "epoch": 2.9069908814589667, "grad_norm": 0.0703823518152641, "learning_rate": 1.7337280462242735e-06, "loss": 0.5107, "step": 5981 }, { "epoch": 2.9074772036474164, "grad_norm": 0.0690398187429159, "learning_rate": 1.7322786144007358e-06, "loss": 0.5093, "step": 5982 }, { "epoch": 2.907963525835866, "grad_norm": 0.07071541522557791, "learning_rate": 1.7308296617514319e-06, "loss": 0.4974, "step": 5983 }, { "epoch": 2.908449848024316, "grad_norm": 0.07062205160482402, "learning_rate": 1.7293811884888344e-06, "loss": 0.4917, "step": 5984 }, { "epoch": 2.908936170212766, "grad_norm": 0.07498167025936278, "learning_rate": 1.7279331948253452e-06, "loss": 0.5366, "step": 5985 }, { "epoch": 2.909422492401216, "grad_norm": 0.07200212818994361, "learning_rate": 1.7264856809732966e-06, "loss": 0.5256, "step": 5986 }, { "epoch": 2.9099088145896657, "grad_norm": 0.07481312959203508, "learning_rate": 1.7250386471449493e-06, "loss": 0.5388, "step": 5987 }, { "epoch": 2.9103951367781153, "grad_norm": 0.07477589243709247, "learning_rate": 1.7235920935524947e-06, "loss": 0.567, "step": 5988 }, { "epoch": 2.9108814589665655, "grad_norm": 0.06825899619487304, "learning_rate": 1.7221460204080537e-06, "loss": 0.4513, "step": 5989 }, { "epoch": 2.911367781155015, "grad_norm": 0.07225644321129099, "learning_rate": 1.7207004279236762e-06, "loss": 0.5246, "step": 5990 }, { "epoch": 2.9118541033434653, "grad_norm": 0.07484221681855527, "learning_rate": 1.719255316311342e-06, "loss": 0.5591, "step": 5991 }, { "epoch": 2.912340425531915, "grad_norm": 0.06939055296710232, "learning_rate": 1.7178106857829602e-06, "loss": 0.5016, "step": 5992 }, { "epoch": 2.9128267477203647, "grad_norm": 0.06974699141258886, "learning_rate": 1.7163665365503702e-06, "loss": 0.5067, "step": 5993 }, { "epoch": 2.9133130699088143, "grad_norm": 0.07488770736024253, "learning_rate": 1.7149228688253388e-06, "loss": 0.5461, "step": 5994 }, { "epoch": 2.9137993920972645, "grad_norm": 0.07055136412381614, "learning_rate": 1.7134796828195643e-06, "loss": 0.5156, "step": 5995 }, { "epoch": 2.914285714285714, "grad_norm": 0.07104380273782636, "learning_rate": 1.7120369787446734e-06, "loss": 0.519, "step": 5996 }, { "epoch": 2.9147720364741643, "grad_norm": 0.07327946385579932, "learning_rate": 1.7105947568122227e-06, "loss": 0.5518, "step": 5997 }, { "epoch": 2.915258358662614, "grad_norm": 0.07168190690447519, "learning_rate": 1.7091530172336968e-06, "loss": 0.4871, "step": 5998 }, { "epoch": 2.9157446808510636, "grad_norm": 0.0676031914797481, "learning_rate": 1.7077117602205128e-06, "loss": 0.5023, "step": 5999 }, { "epoch": 2.9162310030395138, "grad_norm": 0.07008615685094173, "learning_rate": 1.706270985984011e-06, "loss": 0.5088, "step": 6000 }, { "epoch": 2.9167173252279635, "grad_norm": 0.06867686135284184, "learning_rate": 1.7048306947354642e-06, "loss": 0.4736, "step": 6001 }, { "epoch": 2.9172036474164136, "grad_norm": 0.07005083320905237, "learning_rate": 1.7033908866860794e-06, "loss": 0.5128, "step": 6002 }, { "epoch": 2.9176899696048633, "grad_norm": 0.06933621532800117, "learning_rate": 1.7019515620469851e-06, "loss": 0.5362, "step": 6003 }, { "epoch": 2.918176291793313, "grad_norm": 0.0703131563761392, "learning_rate": 1.700512721029242e-06, "loss": 0.5366, "step": 6004 }, { "epoch": 2.918662613981763, "grad_norm": 0.0717712736315408, "learning_rate": 1.6990743638438411e-06, "loss": 0.5527, "step": 6005 }, { "epoch": 2.9191489361702128, "grad_norm": 0.07361356277841921, "learning_rate": 1.6976364907016995e-06, "loss": 0.5074, "step": 6006 }, { "epoch": 2.919635258358663, "grad_norm": 0.0676281383765581, "learning_rate": 1.6961991018136664e-06, "loss": 0.4929, "step": 6007 }, { "epoch": 2.9201215805471126, "grad_norm": 0.07227412116950283, "learning_rate": 1.6947621973905176e-06, "loss": 0.5763, "step": 6008 }, { "epoch": 2.9206079027355623, "grad_norm": 0.06893859678991712, "learning_rate": 1.693325777642959e-06, "loss": 0.5147, "step": 6009 }, { "epoch": 2.921094224924012, "grad_norm": 0.0714765583781806, "learning_rate": 1.6918898427816255e-06, "loss": 0.5168, "step": 6010 }, { "epoch": 2.921580547112462, "grad_norm": 0.06789973577550529, "learning_rate": 1.6904543930170802e-06, "loss": 0.504, "step": 6011 }, { "epoch": 2.9220668693009118, "grad_norm": 0.0722466904533531, "learning_rate": 1.689019428559816e-06, "loss": 0.5232, "step": 6012 }, { "epoch": 2.922553191489362, "grad_norm": 0.06838086464874564, "learning_rate": 1.687584949620255e-06, "loss": 0.5147, "step": 6013 }, { "epoch": 2.9230395136778116, "grad_norm": 0.07365026698934893, "learning_rate": 1.6861509564087453e-06, "loss": 0.5334, "step": 6014 }, { "epoch": 2.9235258358662612, "grad_norm": 0.07015160335133704, "learning_rate": 1.6847174491355662e-06, "loss": 0.5365, "step": 6015 }, { "epoch": 2.9240121580547114, "grad_norm": 0.07146543331738837, "learning_rate": 1.6832844280109256e-06, "loss": 0.5278, "step": 6016 }, { "epoch": 2.924498480243161, "grad_norm": 0.06789744125428117, "learning_rate": 1.68185189324496e-06, "loss": 0.4657, "step": 6017 }, { "epoch": 2.924984802431611, "grad_norm": 0.07122907863059538, "learning_rate": 1.6804198450477345e-06, "loss": 0.5122, "step": 6018 }, { "epoch": 2.925471124620061, "grad_norm": 0.07112499999109342, "learning_rate": 1.6789882836292403e-06, "loss": 0.5488, "step": 6019 }, { "epoch": 2.9259574468085106, "grad_norm": 0.07130916145644813, "learning_rate": 1.6775572091994036e-06, "loss": 0.5318, "step": 6020 }, { "epoch": 2.9264437689969602, "grad_norm": 0.07092088569827337, "learning_rate": 1.6761266219680734e-06, "loss": 0.5332, "step": 6021 }, { "epoch": 2.9269300911854104, "grad_norm": 0.07078000135641391, "learning_rate": 1.6746965221450285e-06, "loss": 0.5419, "step": 6022 }, { "epoch": 2.92741641337386, "grad_norm": 0.06816657452952198, "learning_rate": 1.673266909939978e-06, "loss": 0.5108, "step": 6023 }, { "epoch": 2.92790273556231, "grad_norm": 0.07281214106359819, "learning_rate": 1.6718377855625567e-06, "loss": 0.5309, "step": 6024 }, { "epoch": 2.92838905775076, "grad_norm": 0.07141867442896573, "learning_rate": 1.6704091492223313e-06, "loss": 0.5222, "step": 6025 }, { "epoch": 2.9288753799392095, "grad_norm": 0.0718099258009478, "learning_rate": 1.6689810011287933e-06, "loss": 0.5436, "step": 6026 }, { "epoch": 2.9293617021276597, "grad_norm": 0.07223062760344223, "learning_rate": 1.667553341491366e-06, "loss": 0.5431, "step": 6027 }, { "epoch": 2.9298480243161094, "grad_norm": 0.07097535127157406, "learning_rate": 1.6661261705193998e-06, "loss": 0.5103, "step": 6028 }, { "epoch": 2.9303343465045595, "grad_norm": 0.07280884368679566, "learning_rate": 1.6646994884221707e-06, "loss": 0.5239, "step": 6029 }, { "epoch": 2.930820668693009, "grad_norm": 0.07156559806022524, "learning_rate": 1.663273295408887e-06, "loss": 0.5515, "step": 6030 }, { "epoch": 2.931306990881459, "grad_norm": 0.07046549374071892, "learning_rate": 1.6618475916886834e-06, "loss": 0.5105, "step": 6031 }, { "epoch": 2.931793313069909, "grad_norm": 0.07100912615035251, "learning_rate": 1.660422377470623e-06, "loss": 0.5061, "step": 6032 }, { "epoch": 2.9322796352583587, "grad_norm": 0.07177730183676781, "learning_rate": 1.6589976529636976e-06, "loss": 0.5296, "step": 6033 }, { "epoch": 2.9327659574468083, "grad_norm": 0.06996226536673274, "learning_rate": 1.6575734183768267e-06, "loss": 0.4991, "step": 6034 }, { "epoch": 2.9332522796352585, "grad_norm": 0.07065636754306481, "learning_rate": 1.6561496739188582e-06, "loss": 0.5621, "step": 6035 }, { "epoch": 2.933738601823708, "grad_norm": 0.06868773838186035, "learning_rate": 1.6547264197985685e-06, "loss": 0.5006, "step": 6036 }, { "epoch": 2.934224924012158, "grad_norm": 0.06856724646626858, "learning_rate": 1.653303656224659e-06, "loss": 0.4835, "step": 6037 }, { "epoch": 2.934711246200608, "grad_norm": 0.06977618962396141, "learning_rate": 1.6518813834057662e-06, "loss": 0.5222, "step": 6038 }, { "epoch": 2.9351975683890577, "grad_norm": 0.073265559420687, "learning_rate": 1.6504596015504482e-06, "loss": 0.5312, "step": 6039 }, { "epoch": 2.935683890577508, "grad_norm": 0.0719482414605524, "learning_rate": 1.6490383108671926e-06, "loss": 0.5398, "step": 6040 }, { "epoch": 2.9361702127659575, "grad_norm": 0.07281183254750259, "learning_rate": 1.6476175115644162e-06, "loss": 0.5675, "step": 6041 }, { "epoch": 2.936656534954407, "grad_norm": 0.07049732432187852, "learning_rate": 1.6461972038504631e-06, "loss": 0.4935, "step": 6042 }, { "epoch": 2.9371428571428573, "grad_norm": 0.07162588331611189, "learning_rate": 1.6447773879336064e-06, "loss": 0.5386, "step": 6043 }, { "epoch": 2.937629179331307, "grad_norm": 0.07056773183133058, "learning_rate": 1.6433580640220431e-06, "loss": 0.5036, "step": 6044 }, { "epoch": 2.938115501519757, "grad_norm": 0.07132677753981184, "learning_rate": 1.6419392323239026e-06, "loss": 0.5334, "step": 6045 }, { "epoch": 2.9386018237082068, "grad_norm": 0.07468836253526807, "learning_rate": 1.6405208930472404e-06, "loss": 0.5505, "step": 6046 }, { "epoch": 2.9390881458966565, "grad_norm": 0.07214054683430728, "learning_rate": 1.63910304640004e-06, "loss": 0.4762, "step": 6047 }, { "epoch": 2.939574468085106, "grad_norm": 0.07475753543519399, "learning_rate": 1.6376856925902123e-06, "loss": 0.5363, "step": 6048 }, { "epoch": 2.9400607902735563, "grad_norm": 0.07106541720468036, "learning_rate": 1.6362688318255958e-06, "loss": 0.526, "step": 6049 }, { "epoch": 2.940547112462006, "grad_norm": 0.06920072059423771, "learning_rate": 1.634852464313958e-06, "loss": 0.5095, "step": 6050 }, { "epoch": 2.941033434650456, "grad_norm": 0.0704480594708247, "learning_rate": 1.6334365902629917e-06, "loss": 0.5274, "step": 6051 }, { "epoch": 2.9415197568389058, "grad_norm": 0.07097498997412845, "learning_rate": 1.63202120988032e-06, "loss": 0.4945, "step": 6052 }, { "epoch": 2.9420060790273554, "grad_norm": 0.0738334596683305, "learning_rate": 1.630606323373492e-06, "loss": 0.516, "step": 6053 }, { "epoch": 2.9424924012158056, "grad_norm": 0.07141998040096903, "learning_rate": 1.6291919309499849e-06, "loss": 0.5254, "step": 6054 }, { "epoch": 2.9429787234042553, "grad_norm": 0.07119822669664502, "learning_rate": 1.6277780328172026e-06, "loss": 0.5118, "step": 6055 }, { "epoch": 2.9434650455927054, "grad_norm": 0.07230331754380472, "learning_rate": 1.6263646291824764e-06, "loss": 0.5261, "step": 6056 }, { "epoch": 2.943951367781155, "grad_norm": 0.07406292921022503, "learning_rate": 1.6249517202530707e-06, "loss": 0.5199, "step": 6057 }, { "epoch": 2.9444376899696048, "grad_norm": 0.07392152844374228, "learning_rate": 1.6235393062361666e-06, "loss": 0.5795, "step": 6058 }, { "epoch": 2.9449240121580544, "grad_norm": 0.07398911777580285, "learning_rate": 1.6221273873388816e-06, "loss": 0.5473, "step": 6059 }, { "epoch": 2.9454103343465046, "grad_norm": 0.07002314362490732, "learning_rate": 1.6207159637682568e-06, "loss": 0.5028, "step": 6060 }, { "epoch": 2.9458966565349542, "grad_norm": 0.06939334846963183, "learning_rate": 1.6193050357312612e-06, "loss": 0.5189, "step": 6061 }, { "epoch": 2.9463829787234044, "grad_norm": 0.06948157078915279, "learning_rate": 1.617894603434792e-06, "loss": 0.4878, "step": 6062 }, { "epoch": 2.946869300911854, "grad_norm": 0.07042747510794506, "learning_rate": 1.6164846670856732e-06, "loss": 0.5241, "step": 6063 }, { "epoch": 2.9473556231003037, "grad_norm": 0.07468897432673499, "learning_rate": 1.6150752268906555e-06, "loss": 0.5496, "step": 6064 }, { "epoch": 2.947841945288754, "grad_norm": 0.07318349839112621, "learning_rate": 1.613666283056417e-06, "loss": 0.5105, "step": 6065 }, { "epoch": 2.9483282674772036, "grad_norm": 0.06990394364557215, "learning_rate": 1.6122578357895641e-06, "loss": 0.5099, "step": 6066 }, { "epoch": 2.9488145896656537, "grad_norm": 0.07407482681614101, "learning_rate": 1.6108498852966291e-06, "loss": 0.5797, "step": 6067 }, { "epoch": 2.9493009118541034, "grad_norm": 0.07191676067587008, "learning_rate": 1.6094424317840724e-06, "loss": 0.5391, "step": 6068 }, { "epoch": 2.949787234042553, "grad_norm": 0.07010958232081264, "learning_rate": 1.60803547545828e-06, "loss": 0.5339, "step": 6069 }, { "epoch": 2.950273556231003, "grad_norm": 0.0699173064606949, "learning_rate": 1.6066290165255676e-06, "loss": 0.5147, "step": 6070 }, { "epoch": 2.950759878419453, "grad_norm": 0.06962652414736897, "learning_rate": 1.6052230551921748e-06, "loss": 0.5221, "step": 6071 }, { "epoch": 2.951246200607903, "grad_norm": 0.06985566938253028, "learning_rate": 1.6038175916642718e-06, "loss": 0.4967, "step": 6072 }, { "epoch": 2.9517325227963527, "grad_norm": 0.07258963703840368, "learning_rate": 1.6024126261479516e-06, "loss": 0.5263, "step": 6073 }, { "epoch": 2.9522188449848024, "grad_norm": 0.07314877709463435, "learning_rate": 1.6010081588492381e-06, "loss": 0.5115, "step": 6074 }, { "epoch": 2.952705167173252, "grad_norm": 0.0748136834455607, "learning_rate": 1.5996041899740804e-06, "loss": 0.5298, "step": 6075 }, { "epoch": 2.953191489361702, "grad_norm": 0.07469751670967531, "learning_rate": 1.5982007197283539e-06, "loss": 0.5805, "step": 6076 }, { "epoch": 2.953677811550152, "grad_norm": 0.07206073194033645, "learning_rate": 1.596797748317862e-06, "loss": 0.5387, "step": 6077 }, { "epoch": 2.954164133738602, "grad_norm": 0.07002819675982361, "learning_rate": 1.5953952759483344e-06, "loss": 0.5165, "step": 6078 }, { "epoch": 2.9546504559270517, "grad_norm": 0.07158275399443301, "learning_rate": 1.5939933028254272e-06, "loss": 0.5121, "step": 6079 }, { "epoch": 2.9551367781155014, "grad_norm": 0.07258915114443422, "learning_rate": 1.5925918291547249e-06, "loss": 0.5081, "step": 6080 }, { "epoch": 2.9556231003039515, "grad_norm": 0.07123377533054698, "learning_rate": 1.591190855141737e-06, "loss": 0.5352, "step": 6081 }, { "epoch": 2.956109422492401, "grad_norm": 0.07193813824280655, "learning_rate": 1.5897903809919008e-06, "loss": 0.5376, "step": 6082 }, { "epoch": 2.9565957446808513, "grad_norm": 0.0729967474531232, "learning_rate": 1.5883904069105793e-06, "loss": 0.5722, "step": 6083 }, { "epoch": 2.957082066869301, "grad_norm": 0.07019001229182958, "learning_rate": 1.5869909331030636e-06, "loss": 0.5009, "step": 6084 }, { "epoch": 2.9575683890577507, "grad_norm": 0.07088399060126647, "learning_rate": 1.58559195977457e-06, "loss": 0.5249, "step": 6085 }, { "epoch": 2.9580547112462003, "grad_norm": 0.07176912479884007, "learning_rate": 1.5841934871302423e-06, "loss": 0.5416, "step": 6086 }, { "epoch": 2.9585410334346505, "grad_norm": 0.07280805550982031, "learning_rate": 1.5827955153751507e-06, "loss": 0.5323, "step": 6087 }, { "epoch": 2.9590273556231, "grad_norm": 0.07106150728197004, "learning_rate": 1.5813980447142924e-06, "loss": 0.5062, "step": 6088 }, { "epoch": 2.9595136778115503, "grad_norm": 0.07253740645729541, "learning_rate": 1.5800010753525896e-06, "loss": 0.542, "step": 6089 }, { "epoch": 2.96, "grad_norm": 0.07217280197606152, "learning_rate": 1.5786046074948924e-06, "loss": 0.5513, "step": 6090 }, { "epoch": 2.9604863221884496, "grad_norm": 0.06928289448280522, "learning_rate": 1.5772086413459787e-06, "loss": 0.5073, "step": 6091 }, { "epoch": 2.9609726443768998, "grad_norm": 0.06960362723218959, "learning_rate": 1.5758131771105457e-06, "loss": 0.5012, "step": 6092 }, { "epoch": 2.9614589665653495, "grad_norm": 0.06825358917997426, "learning_rate": 1.574418214993228e-06, "loss": 0.4727, "step": 6093 }, { "epoch": 2.9619452887537996, "grad_norm": 0.06889632566465823, "learning_rate": 1.5730237551985794e-06, "loss": 0.5153, "step": 6094 }, { "epoch": 2.9624316109422493, "grad_norm": 0.07279289320975993, "learning_rate": 1.5716297979310807e-06, "loss": 0.5228, "step": 6095 }, { "epoch": 2.962917933130699, "grad_norm": 0.07129552296789354, "learning_rate": 1.5702363433951407e-06, "loss": 0.5254, "step": 6096 }, { "epoch": 2.963404255319149, "grad_norm": 0.07213254444269546, "learning_rate": 1.5688433917950934e-06, "loss": 0.5276, "step": 6097 }, { "epoch": 2.9638905775075988, "grad_norm": 0.07307685030409322, "learning_rate": 1.5674509433351992e-06, "loss": 0.5613, "step": 6098 }, { "epoch": 2.964376899696049, "grad_norm": 0.07021795372797525, "learning_rate": 1.566058998219645e-06, "loss": 0.5225, "step": 6099 }, { "epoch": 2.9648632218844986, "grad_norm": 0.07051656671820612, "learning_rate": 1.5646675566525437e-06, "loss": 0.5046, "step": 6100 }, { "epoch": 2.9653495440729483, "grad_norm": 0.07163921484084286, "learning_rate": 1.5632766188379346e-06, "loss": 0.5243, "step": 6101 }, { "epoch": 2.965835866261398, "grad_norm": 0.06946789441899145, "learning_rate": 1.5618861849797824e-06, "loss": 0.5206, "step": 6102 }, { "epoch": 2.966322188449848, "grad_norm": 0.06940574585310512, "learning_rate": 1.5604962552819792e-06, "loss": 0.4944, "step": 6103 }, { "epoch": 2.9668085106382978, "grad_norm": 0.06913005264797906, "learning_rate": 1.559106829948342e-06, "loss": 0.4807, "step": 6104 }, { "epoch": 2.967294832826748, "grad_norm": 0.07269691611788394, "learning_rate": 1.5577179091826156e-06, "loss": 0.5571, "step": 6105 }, { "epoch": 2.9677811550151976, "grad_norm": 0.07254529560632424, "learning_rate": 1.5563294931884665e-06, "loss": 0.549, "step": 6106 }, { "epoch": 2.9682674772036473, "grad_norm": 0.07168650248203734, "learning_rate": 1.554941582169492e-06, "loss": 0.5294, "step": 6107 }, { "epoch": 2.9687537993920974, "grad_norm": 0.07075477280000363, "learning_rate": 1.5535541763292127e-06, "loss": 0.5072, "step": 6108 }, { "epoch": 2.969240121580547, "grad_norm": 0.07165454533166851, "learning_rate": 1.5521672758710772e-06, "loss": 0.5289, "step": 6109 }, { "epoch": 2.969726443768997, "grad_norm": 0.07219331729512117, "learning_rate": 1.550780880998456e-06, "loss": 0.5215, "step": 6110 }, { "epoch": 2.970212765957447, "grad_norm": 0.07119116092956648, "learning_rate": 1.5493949919146517e-06, "loss": 0.5284, "step": 6111 }, { "epoch": 2.9706990881458966, "grad_norm": 0.0700695460048129, "learning_rate": 1.5480096088228874e-06, "loss": 0.5112, "step": 6112 }, { "epoch": 2.9711854103343462, "grad_norm": 0.0716373132061361, "learning_rate": 1.5466247319263144e-06, "loss": 0.5306, "step": 6113 }, { "epoch": 2.9716717325227964, "grad_norm": 0.07144441769307608, "learning_rate": 1.5452403614280087e-06, "loss": 0.5128, "step": 6114 }, { "epoch": 2.972158054711246, "grad_norm": 0.07155792877241401, "learning_rate": 1.5438564975309728e-06, "loss": 0.5122, "step": 6115 }, { "epoch": 2.972644376899696, "grad_norm": 0.07361865969549194, "learning_rate": 1.5424731404381344e-06, "loss": 0.5554, "step": 6116 }, { "epoch": 2.973130699088146, "grad_norm": 0.07128556580327253, "learning_rate": 1.5410902903523467e-06, "loss": 0.528, "step": 6117 }, { "epoch": 2.9736170212765956, "grad_norm": 0.07478992479503081, "learning_rate": 1.53970794747639e-06, "loss": 0.5402, "step": 6118 }, { "epoch": 2.9741033434650457, "grad_norm": 0.07097175238564427, "learning_rate": 1.5383261120129679e-06, "loss": 0.5117, "step": 6119 }, { "epoch": 2.9745896656534954, "grad_norm": 0.07217017031113146, "learning_rate": 1.5369447841647133e-06, "loss": 0.5174, "step": 6120 }, { "epoch": 2.9750759878419455, "grad_norm": 0.07153177765734692, "learning_rate": 1.535563964134179e-06, "loss": 0.5089, "step": 6121 }, { "epoch": 2.975562310030395, "grad_norm": 0.07278554468685437, "learning_rate": 1.5341836521238486e-06, "loss": 0.5647, "step": 6122 }, { "epoch": 2.976048632218845, "grad_norm": 0.07018225675099196, "learning_rate": 1.532803848336128e-06, "loss": 0.4829, "step": 6123 }, { "epoch": 2.976534954407295, "grad_norm": 0.07046158785296074, "learning_rate": 1.5314245529733507e-06, "loss": 0.5269, "step": 6124 }, { "epoch": 2.9770212765957447, "grad_norm": 0.07155523015072264, "learning_rate": 1.5300457662377744e-06, "loss": 0.5183, "step": 6125 }, { "epoch": 2.977507598784195, "grad_norm": 0.07030725503171567, "learning_rate": 1.5286674883315828e-06, "loss": 0.4965, "step": 6126 }, { "epoch": 2.9779939209726445, "grad_norm": 0.07015393569927068, "learning_rate": 1.5272897194568837e-06, "loss": 0.5502, "step": 6127 }, { "epoch": 2.978480243161094, "grad_norm": 0.06920298317638908, "learning_rate": 1.525912459815711e-06, "loss": 0.5304, "step": 6128 }, { "epoch": 2.978966565349544, "grad_norm": 0.06948782463006692, "learning_rate": 1.5245357096100266e-06, "loss": 0.4959, "step": 6129 }, { "epoch": 2.979452887537994, "grad_norm": 0.069537593099965, "learning_rate": 1.523159469041714e-06, "loss": 0.5258, "step": 6130 }, { "epoch": 2.9799392097264437, "grad_norm": 0.07111963157495997, "learning_rate": 1.5217837383125828e-06, "loss": 0.5206, "step": 6131 }, { "epoch": 2.980425531914894, "grad_norm": 0.07089125224482085, "learning_rate": 1.520408517624369e-06, "loss": 0.4981, "step": 6132 }, { "epoch": 2.9809118541033435, "grad_norm": 0.07104372785165855, "learning_rate": 1.5190338071787325e-06, "loss": 0.5695, "step": 6133 }, { "epoch": 2.981398176291793, "grad_norm": 0.07222586036350302, "learning_rate": 1.5176596071772592e-06, "loss": 0.5036, "step": 6134 }, { "epoch": 2.9818844984802433, "grad_norm": 0.07251231875788276, "learning_rate": 1.5162859178214617e-06, "loss": 0.5456, "step": 6135 }, { "epoch": 2.982370820668693, "grad_norm": 0.0684742697918209, "learning_rate": 1.5149127393127727e-06, "loss": 0.4976, "step": 6136 }, { "epoch": 2.982857142857143, "grad_norm": 0.07190973330983985, "learning_rate": 1.5135400718525545e-06, "loss": 0.5695, "step": 6137 }, { "epoch": 2.983343465045593, "grad_norm": 0.07176590213796266, "learning_rate": 1.5121679156420932e-06, "loss": 0.5241, "step": 6138 }, { "epoch": 2.9838297872340425, "grad_norm": 0.07181166336761126, "learning_rate": 1.5107962708826e-06, "loss": 0.5402, "step": 6139 }, { "epoch": 2.984316109422492, "grad_norm": 0.06958430694648206, "learning_rate": 1.5094251377752112e-06, "loss": 0.5181, "step": 6140 }, { "epoch": 2.9848024316109423, "grad_norm": 0.07000975881574777, "learning_rate": 1.5080545165209881e-06, "loss": 0.5105, "step": 6141 }, { "epoch": 2.985288753799392, "grad_norm": 0.06921138290213592, "learning_rate": 1.5066844073209164e-06, "loss": 0.5157, "step": 6142 }, { "epoch": 2.985775075987842, "grad_norm": 0.07036673448578677, "learning_rate": 1.5053148103759075e-06, "loss": 0.5278, "step": 6143 }, { "epoch": 2.9862613981762918, "grad_norm": 0.07128475918845432, "learning_rate": 1.5039457258867961e-06, "loss": 0.5205, "step": 6144 }, { "epoch": 2.9867477203647415, "grad_norm": 0.07272772419222057, "learning_rate": 1.5025771540543443e-06, "loss": 0.5113, "step": 6145 }, { "epoch": 2.9872340425531916, "grad_norm": 0.07086674942672036, "learning_rate": 1.5012090950792353e-06, "loss": 0.5338, "step": 6146 }, { "epoch": 2.9877203647416413, "grad_norm": 0.06983750488366851, "learning_rate": 1.4998415491620822e-06, "loss": 0.4985, "step": 6147 }, { "epoch": 2.9882066869300914, "grad_norm": 0.07163101506992368, "learning_rate": 1.4984745165034192e-06, "loss": 0.5401, "step": 6148 }, { "epoch": 2.988693009118541, "grad_norm": 0.07263895277449675, "learning_rate": 1.4971079973037078e-06, "loss": 0.5349, "step": 6149 }, { "epoch": 2.9891793313069908, "grad_norm": 0.07714082810051419, "learning_rate": 1.4957419917633293e-06, "loss": 0.4933, "step": 6150 }, { "epoch": 2.989665653495441, "grad_norm": 0.07236696119733044, "learning_rate": 1.4943765000825933e-06, "loss": 0.5105, "step": 6151 }, { "epoch": 2.9901519756838906, "grad_norm": 0.07102042208541053, "learning_rate": 1.4930115224617353e-06, "loss": 0.5238, "step": 6152 }, { "epoch": 2.9906382978723407, "grad_norm": 0.07049020098172762, "learning_rate": 1.491647059100913e-06, "loss": 0.5236, "step": 6153 }, { "epoch": 2.9911246200607904, "grad_norm": 0.06999746809100563, "learning_rate": 1.490283110200209e-06, "loss": 0.4926, "step": 6154 }, { "epoch": 2.99161094224924, "grad_norm": 0.0732270009364142, "learning_rate": 1.488919675959632e-06, "loss": 0.5493, "step": 6155 }, { "epoch": 2.9920972644376898, "grad_norm": 0.07085013259654209, "learning_rate": 1.4875567565791132e-06, "loss": 0.5481, "step": 6156 }, { "epoch": 2.99258358662614, "grad_norm": 0.06999918428777495, "learning_rate": 1.4861943522585093e-06, "loss": 0.505, "step": 6157 }, { "epoch": 2.9930699088145896, "grad_norm": 0.06963902933179202, "learning_rate": 1.4848324631976025e-06, "loss": 0.5259, "step": 6158 }, { "epoch": 2.9935562310030397, "grad_norm": 0.07001521704660149, "learning_rate": 1.4834710895960968e-06, "loss": 0.4737, "step": 6159 }, { "epoch": 2.9940425531914894, "grad_norm": 0.07287657562554004, "learning_rate": 1.4821102316536235e-06, "loss": 0.5212, "step": 6160 }, { "epoch": 2.994528875379939, "grad_norm": 0.07061187642821795, "learning_rate": 1.4807498895697365e-06, "loss": 0.507, "step": 6161 }, { "epoch": 2.995015197568389, "grad_norm": 0.07173662217190524, "learning_rate": 1.479390063543914e-06, "loss": 0.5391, "step": 6162 }, { "epoch": 2.995501519756839, "grad_norm": 0.06943580885088292, "learning_rate": 1.47803075377556e-06, "loss": 0.4807, "step": 6163 }, { "epoch": 2.995987841945289, "grad_norm": 0.07244212099807654, "learning_rate": 1.4766719604640012e-06, "loss": 0.5509, "step": 6164 }, { "epoch": 2.9964741641337387, "grad_norm": 0.07057960905286166, "learning_rate": 1.4753136838084892e-06, "loss": 0.523, "step": 6165 }, { "epoch": 2.9969604863221884, "grad_norm": 0.06959391140760514, "learning_rate": 1.4739559240082001e-06, "loss": 0.5007, "step": 6166 }, { "epoch": 2.997446808510638, "grad_norm": 0.07490875829482416, "learning_rate": 1.4725986812622339e-06, "loss": 0.5632, "step": 6167 }, { "epoch": 2.997933130699088, "grad_norm": 0.06918319147220206, "learning_rate": 1.471241955769615e-06, "loss": 0.4932, "step": 6168 }, { "epoch": 2.997933130699088, "eval_loss": 0.5682429075241089, "eval_runtime": 105.2179, "eval_samples_per_second": 288.478, "eval_steps_per_second": 36.068, "step": 6168 }, { "epoch": 2.998419452887538, "grad_norm": 0.07322987594972527, "learning_rate": 1.469885747729291e-06, "loss": 0.5123, "step": 6169 }, { "epoch": 2.998905775075988, "grad_norm": 0.06951847275323363, "learning_rate": 1.4685300573401357e-06, "loss": 0.4923, "step": 6170 }, { "epoch": 2.9993920972644377, "grad_norm": 0.07291889866397412, "learning_rate": 1.4671748848009443e-06, "loss": 0.5287, "step": 6171 }, { "epoch": 2.9998784194528874, "grad_norm": 0.07112669098830986, "learning_rate": 1.4658202303104385e-06, "loss": 0.529, "step": 6172 }, { "epoch": 3.0, "grad_norm": 0.07112669098830986, "learning_rate": 1.4644660940672628e-06, "loss": 0.1511, "step": 6173 }, { "epoch": 3.0003647416413375, "grad_norm": 0.06955238737644102, "learning_rate": 1.4631124762699856e-06, "loss": 0.3596, "step": 6174 }, { "epoch": 3.0004863221884497, "grad_norm": 0.07051223448095613, "learning_rate": 1.4617593771170996e-06, "loss": 0.4762, "step": 6175 }, { "epoch": 3.0009726443769, "grad_norm": 0.07204726616358026, "learning_rate": 1.4604067968070218e-06, "loss": 0.5139, "step": 6176 }, { "epoch": 3.0014589665653495, "grad_norm": 0.0721694540504832, "learning_rate": 1.4590547355380925e-06, "loss": 0.477, "step": 6177 }, { "epoch": 3.001945288753799, "grad_norm": 0.07155953490441903, "learning_rate": 1.4577031935085762e-06, "loss": 0.5355, "step": 6178 }, { "epoch": 3.0024316109422493, "grad_norm": 0.06948066977378234, "learning_rate": 1.4563521709166606e-06, "loss": 0.4806, "step": 6179 }, { "epoch": 3.002917933130699, "grad_norm": 0.07219113710944954, "learning_rate": 1.455001667960459e-06, "loss": 0.5102, "step": 6180 }, { "epoch": 3.003404255319149, "grad_norm": 0.07069258204358841, "learning_rate": 1.4536516848380061e-06, "loss": 0.4911, "step": 6181 }, { "epoch": 3.003890577507599, "grad_norm": 0.07250456230563963, "learning_rate": 1.4523022217472626e-06, "loss": 0.4937, "step": 6182 }, { "epoch": 3.0043768996960485, "grad_norm": 0.07210395667743953, "learning_rate": 1.4509532788861113e-06, "loss": 0.5389, "step": 6183 }, { "epoch": 3.0048632218844986, "grad_norm": 0.07200277108613336, "learning_rate": 1.4496048564523595e-06, "loss": 0.5277, "step": 6184 }, { "epoch": 3.0053495440729483, "grad_norm": 0.06992059476819104, "learning_rate": 1.4482569546437386e-06, "loss": 0.4959, "step": 6185 }, { "epoch": 3.005835866261398, "grad_norm": 0.07223332263166926, "learning_rate": 1.4469095736579019e-06, "loss": 0.5138, "step": 6186 }, { "epoch": 3.006322188449848, "grad_norm": 0.0706874370686038, "learning_rate": 1.4455627136924282e-06, "loss": 0.5124, "step": 6187 }, { "epoch": 3.006808510638298, "grad_norm": 0.07267794332800777, "learning_rate": 1.4442163749448201e-06, "loss": 0.5732, "step": 6188 }, { "epoch": 3.007294832826748, "grad_norm": 0.0727296703807697, "learning_rate": 1.4428705576125012e-06, "loss": 0.5033, "step": 6189 }, { "epoch": 3.0077811550151976, "grad_norm": 0.071193769441196, "learning_rate": 1.4415252618928216e-06, "loss": 0.5097, "step": 6190 }, { "epoch": 3.0082674772036473, "grad_norm": 0.07004230354354819, "learning_rate": 1.4401804879830527e-06, "loss": 0.4983, "step": 6191 }, { "epoch": 3.0087537993920974, "grad_norm": 0.07075477419294558, "learning_rate": 1.4388362360803909e-06, "loss": 0.5062, "step": 6192 }, { "epoch": 3.009240121580547, "grad_norm": 0.07435942836111588, "learning_rate": 1.4374925063819557e-06, "loss": 0.5568, "step": 6193 }, { "epoch": 3.009726443768997, "grad_norm": 0.074419386899034, "learning_rate": 1.4361492990847892e-06, "loss": 0.5102, "step": 6194 }, { "epoch": 3.010212765957447, "grad_norm": 0.07130657221589082, "learning_rate": 1.434806614385858e-06, "loss": 0.5209, "step": 6195 }, { "epoch": 3.0106990881458966, "grad_norm": 0.07139139930266573, "learning_rate": 1.4334644524820512e-06, "loss": 0.494, "step": 6196 }, { "epoch": 3.0111854103343467, "grad_norm": 0.0701504719476038, "learning_rate": 1.432122813570182e-06, "loss": 0.5285, "step": 6197 }, { "epoch": 3.0116717325227964, "grad_norm": 0.0690511543570714, "learning_rate": 1.430781697846988e-06, "loss": 0.4964, "step": 6198 }, { "epoch": 3.012158054711246, "grad_norm": 0.07207671846249566, "learning_rate": 1.4294411055091246e-06, "loss": 0.5094, "step": 6199 }, { "epoch": 3.012644376899696, "grad_norm": 0.06929879249846607, "learning_rate": 1.4281010367531773e-06, "loss": 0.5136, "step": 6200 }, { "epoch": 3.013130699088146, "grad_norm": 0.06924668780601602, "learning_rate": 1.4267614917756495e-06, "loss": 0.519, "step": 6201 }, { "epoch": 3.0136170212765956, "grad_norm": 0.07050709081625034, "learning_rate": 1.4254224707729736e-06, "loss": 0.4733, "step": 6202 }, { "epoch": 3.0141033434650457, "grad_norm": 0.072794380308219, "learning_rate": 1.4240839739415002e-06, "loss": 0.5298, "step": 6203 }, { "epoch": 3.0145896656534954, "grad_norm": 0.0697028306395541, "learning_rate": 1.4227460014775051e-06, "loss": 0.4784, "step": 6204 }, { "epoch": 3.015075987841945, "grad_norm": 0.07291993555025729, "learning_rate": 1.4214085535771865e-06, "loss": 0.5438, "step": 6205 }, { "epoch": 3.015562310030395, "grad_norm": 0.06960749927708733, "learning_rate": 1.4200716304366658e-06, "loss": 0.4893, "step": 6206 }, { "epoch": 3.016048632218845, "grad_norm": 0.07162484393079868, "learning_rate": 1.4187352322519876e-06, "loss": 0.5018, "step": 6207 }, { "epoch": 3.016534954407295, "grad_norm": 0.07149464072525191, "learning_rate": 1.4173993592191199e-06, "loss": 0.5174, "step": 6208 }, { "epoch": 3.0170212765957447, "grad_norm": 0.0716207880463414, "learning_rate": 1.416064011533953e-06, "loss": 0.5189, "step": 6209 }, { "epoch": 3.0175075987841944, "grad_norm": 0.06972230036932861, "learning_rate": 1.4147291893923004e-06, "loss": 0.5191, "step": 6210 }, { "epoch": 3.0179939209726445, "grad_norm": 0.07318914066741655, "learning_rate": 1.4133948929898988e-06, "loss": 0.5074, "step": 6211 }, { "epoch": 3.018480243161094, "grad_norm": 0.07063476281997885, "learning_rate": 1.412061122522409e-06, "loss": 0.5547, "step": 6212 }, { "epoch": 3.018966565349544, "grad_norm": 0.07304224120202682, "learning_rate": 1.4107278781854107e-06, "loss": 0.534, "step": 6213 }, { "epoch": 3.019452887537994, "grad_norm": 0.06895765283435634, "learning_rate": 1.4093951601744098e-06, "loss": 0.4958, "step": 6214 }, { "epoch": 3.0199392097264437, "grad_norm": 0.07217781718503266, "learning_rate": 1.4080629686848347e-06, "loss": 0.5055, "step": 6215 }, { "epoch": 3.020425531914894, "grad_norm": 0.07276528482364031, "learning_rate": 1.4067313039120361e-06, "loss": 0.5214, "step": 6216 }, { "epoch": 3.0209118541033435, "grad_norm": 0.07160486140565049, "learning_rate": 1.4054001660512873e-06, "loss": 0.4875, "step": 6217 }, { "epoch": 3.021398176291793, "grad_norm": 0.06866248613764944, "learning_rate": 1.404069555297785e-06, "loss": 0.4975, "step": 6218 }, { "epoch": 3.0218844984802433, "grad_norm": 0.07032880112944945, "learning_rate": 1.4027394718466463e-06, "loss": 0.5038, "step": 6219 }, { "epoch": 3.022370820668693, "grad_norm": 0.07077522392031475, "learning_rate": 1.4014099158929162e-06, "loss": 0.5105, "step": 6220 }, { "epoch": 3.0228571428571427, "grad_norm": 0.06927426904119653, "learning_rate": 1.4000808876315568e-06, "loss": 0.519, "step": 6221 }, { "epoch": 3.023343465045593, "grad_norm": 0.07363887692539962, "learning_rate": 1.398752387257456e-06, "loss": 0.5779, "step": 6222 }, { "epoch": 3.0238297872340425, "grad_norm": 0.06902544366548685, "learning_rate": 1.3974244149654221e-06, "loss": 0.4847, "step": 6223 }, { "epoch": 3.024316109422492, "grad_norm": 0.07075728502671924, "learning_rate": 1.396096970950188e-06, "loss": 0.4873, "step": 6224 }, { "epoch": 3.0248024316109423, "grad_norm": 0.07237784655488048, "learning_rate": 1.3947700554064086e-06, "loss": 0.5298, "step": 6225 }, { "epoch": 3.025288753799392, "grad_norm": 0.06925651254264355, "learning_rate": 1.39344366852866e-06, "loss": 0.489, "step": 6226 }, { "epoch": 3.025775075987842, "grad_norm": 0.06945003347846412, "learning_rate": 1.3921178105114436e-06, "loss": 0.5039, "step": 6227 }, { "epoch": 3.026261398176292, "grad_norm": 0.07107567334362128, "learning_rate": 1.3907924815491791e-06, "loss": 0.5178, "step": 6228 }, { "epoch": 3.0267477203647415, "grad_norm": 0.07470872355468923, "learning_rate": 1.3894676818362112e-06, "loss": 0.5208, "step": 6229 }, { "epoch": 3.0272340425531916, "grad_norm": 0.0689356325817015, "learning_rate": 1.388143411566808e-06, "loss": 0.4893, "step": 6230 }, { "epoch": 3.0277203647416413, "grad_norm": 0.07016919821332085, "learning_rate": 1.3868196709351582e-06, "loss": 0.505, "step": 6231 }, { "epoch": 3.028206686930091, "grad_norm": 0.07241493526076244, "learning_rate": 1.3854964601353732e-06, "loss": 0.5002, "step": 6232 }, { "epoch": 3.028693009118541, "grad_norm": 0.07239961017815634, "learning_rate": 1.3841737793614869e-06, "loss": 0.5332, "step": 6233 }, { "epoch": 3.029179331306991, "grad_norm": 0.0741605478453966, "learning_rate": 1.3828516288074551e-06, "loss": 0.5113, "step": 6234 }, { "epoch": 3.029665653495441, "grad_norm": 0.0699171037833093, "learning_rate": 1.3815300086671569e-06, "loss": 0.5013, "step": 6235 }, { "epoch": 3.0301519756838906, "grad_norm": 0.06880322183982368, "learning_rate": 1.380208919134392e-06, "loss": 0.4955, "step": 6236 }, { "epoch": 3.0306382978723403, "grad_norm": 0.07159963920616899, "learning_rate": 1.3788883604028825e-06, "loss": 0.4988, "step": 6237 }, { "epoch": 3.0311246200607904, "grad_norm": 0.07051842277578378, "learning_rate": 1.377568332666276e-06, "loss": 0.504, "step": 6238 }, { "epoch": 3.03161094224924, "grad_norm": 0.06947650910451554, "learning_rate": 1.3762488361181382e-06, "loss": 0.4606, "step": 6239 }, { "epoch": 3.03209726443769, "grad_norm": 0.07284046010469804, "learning_rate": 1.3749298709519576e-06, "loss": 0.5367, "step": 6240 }, { "epoch": 3.03258358662614, "grad_norm": 0.06930677666814176, "learning_rate": 1.3736114373611464e-06, "loss": 0.493, "step": 6241 }, { "epoch": 3.0330699088145896, "grad_norm": 0.07178891453088389, "learning_rate": 1.3722935355390394e-06, "loss": 0.5118, "step": 6242 }, { "epoch": 3.0335562310030397, "grad_norm": 0.07255145843100853, "learning_rate": 1.3709761656788884e-06, "loss": 0.5364, "step": 6243 }, { "epoch": 3.0340425531914894, "grad_norm": 0.06934723784036224, "learning_rate": 1.3696593279738718e-06, "loss": 0.5046, "step": 6244 }, { "epoch": 3.034528875379939, "grad_norm": 0.07427312441588829, "learning_rate": 1.3683430226170903e-06, "loss": 0.5037, "step": 6245 }, { "epoch": 3.0350151975683892, "grad_norm": 0.06928410477212978, "learning_rate": 1.3670272498015636e-06, "loss": 0.4838, "step": 6246 }, { "epoch": 3.035501519756839, "grad_norm": 0.07128968017872989, "learning_rate": 1.3657120097202359e-06, "loss": 0.4791, "step": 6247 }, { "epoch": 3.0359878419452886, "grad_norm": 0.07173212178439788, "learning_rate": 1.3643973025659723e-06, "loss": 0.5385, "step": 6248 }, { "epoch": 3.0364741641337387, "grad_norm": 0.07035323168537334, "learning_rate": 1.3630831285315588e-06, "loss": 0.5042, "step": 6249 }, { "epoch": 3.0369604863221884, "grad_norm": 0.07251501452065133, "learning_rate": 1.3617694878097048e-06, "loss": 0.5322, "step": 6250 }, { "epoch": 3.037446808510638, "grad_norm": 0.07251463708211572, "learning_rate": 1.3604563805930405e-06, "loss": 0.4886, "step": 6251 }, { "epoch": 3.037933130699088, "grad_norm": 0.06907004330044605, "learning_rate": 1.3591438070741182e-06, "loss": 0.5016, "step": 6252 }, { "epoch": 3.038419452887538, "grad_norm": 0.07170928985389605, "learning_rate": 1.3578317674454117e-06, "loss": 0.501, "step": 6253 }, { "epoch": 3.038905775075988, "grad_norm": 0.0714334508847959, "learning_rate": 1.3565202618993173e-06, "loss": 0.498, "step": 6254 }, { "epoch": 3.0393920972644377, "grad_norm": 0.07113356755831104, "learning_rate": 1.3552092906281505e-06, "loss": 0.4976, "step": 6255 }, { "epoch": 3.0398784194528874, "grad_norm": 0.07338302872459229, "learning_rate": 1.3538988538241548e-06, "loss": 0.4945, "step": 6256 }, { "epoch": 3.0403647416413375, "grad_norm": 0.07321136354776898, "learning_rate": 1.3525889516794865e-06, "loss": 0.518, "step": 6257 }, { "epoch": 3.040851063829787, "grad_norm": 0.07159510679921081, "learning_rate": 1.3512795843862292e-06, "loss": 0.509, "step": 6258 }, { "epoch": 3.041337386018237, "grad_norm": 0.07433397630829636, "learning_rate": 1.349970752136387e-06, "loss": 0.5349, "step": 6259 }, { "epoch": 3.041823708206687, "grad_norm": 0.06958759396400542, "learning_rate": 1.3486624551218853e-06, "loss": 0.502, "step": 6260 }, { "epoch": 3.0423100303951367, "grad_norm": 0.0704650932052657, "learning_rate": 1.3473546935345704e-06, "loss": 0.5116, "step": 6261 }, { "epoch": 3.042796352583587, "grad_norm": 0.07027851137018615, "learning_rate": 1.3460474675662117e-06, "loss": 0.4975, "step": 6262 }, { "epoch": 3.0432826747720365, "grad_norm": 0.06795607521033717, "learning_rate": 1.344740777408498e-06, "loss": 0.5008, "step": 6263 }, { "epoch": 3.043768996960486, "grad_norm": 0.07057546231753216, "learning_rate": 1.3434346232530416e-06, "loss": 0.4678, "step": 6264 }, { "epoch": 3.0442553191489363, "grad_norm": 0.07178224314994901, "learning_rate": 1.3421290052913744e-06, "loss": 0.4973, "step": 6265 }, { "epoch": 3.044741641337386, "grad_norm": 0.06819376763651104, "learning_rate": 1.3408239237149507e-06, "loss": 0.4837, "step": 6266 }, { "epoch": 3.0452279635258357, "grad_norm": 0.07052840370706334, "learning_rate": 1.3395193787151455e-06, "loss": 0.4882, "step": 6267 }, { "epoch": 3.045714285714286, "grad_norm": 0.07029072818881148, "learning_rate": 1.3382153704832569e-06, "loss": 0.4989, "step": 6268 }, { "epoch": 3.0462006079027355, "grad_norm": 0.07275798787716026, "learning_rate": 1.3369118992105012e-06, "loss": 0.55, "step": 6269 }, { "epoch": 3.0466869300911856, "grad_norm": 0.07083812317306203, "learning_rate": 1.3356089650880184e-06, "loss": 0.5089, "step": 6270 }, { "epoch": 3.0471732522796353, "grad_norm": 0.07249749636383808, "learning_rate": 1.334306568306869e-06, "loss": 0.5494, "step": 6271 }, { "epoch": 3.047659574468085, "grad_norm": 0.07215260095398096, "learning_rate": 1.3330047090580345e-06, "loss": 0.488, "step": 6272 }, { "epoch": 3.048145896656535, "grad_norm": 0.07110178094004686, "learning_rate": 1.3317033875324182e-06, "loss": 0.5076, "step": 6273 }, { "epoch": 3.048632218844985, "grad_norm": 0.07175936217987068, "learning_rate": 1.3304026039208434e-06, "loss": 0.5021, "step": 6274 }, { "epoch": 3.0491185410334345, "grad_norm": 0.06935499105944194, "learning_rate": 1.3291023584140562e-06, "loss": 0.5146, "step": 6275 }, { "epoch": 3.0496048632218846, "grad_norm": 0.07102140988657896, "learning_rate": 1.327802651202722e-06, "loss": 0.5101, "step": 6276 }, { "epoch": 3.0500911854103343, "grad_norm": 0.0709765143690515, "learning_rate": 1.3265034824774287e-06, "loss": 0.529, "step": 6277 }, { "epoch": 3.050577507598784, "grad_norm": 0.0735611020926711, "learning_rate": 1.3252048524286843e-06, "loss": 0.5232, "step": 6278 }, { "epoch": 3.051063829787234, "grad_norm": 0.06836202171404077, "learning_rate": 1.3239067612469182e-06, "loss": 0.4472, "step": 6279 }, { "epoch": 3.051550151975684, "grad_norm": 0.07090143864412517, "learning_rate": 1.3226092091224806e-06, "loss": 0.4929, "step": 6280 }, { "epoch": 3.052036474164134, "grad_norm": 0.06984041427868111, "learning_rate": 1.3213121962456433e-06, "loss": 0.4953, "step": 6281 }, { "epoch": 3.0525227963525836, "grad_norm": 0.0730628285266339, "learning_rate": 1.320015722806598e-06, "loss": 0.5035, "step": 6282 }, { "epoch": 3.0530091185410333, "grad_norm": 0.07417655743507183, "learning_rate": 1.3187197889954579e-06, "loss": 0.5184, "step": 6283 }, { "epoch": 3.0534954407294834, "grad_norm": 0.07164698341009024, "learning_rate": 1.3174243950022569e-06, "loss": 0.505, "step": 6284 }, { "epoch": 3.053981762917933, "grad_norm": 0.06742437210564374, "learning_rate": 1.31612954101695e-06, "loss": 0.4722, "step": 6285 }, { "epoch": 3.054468085106383, "grad_norm": 0.07048552927687309, "learning_rate": 1.3148352272294128e-06, "loss": 0.4846, "step": 6286 }, { "epoch": 3.054954407294833, "grad_norm": 0.07223294823135329, "learning_rate": 1.3135414538294421e-06, "loss": 0.5241, "step": 6287 }, { "epoch": 3.0554407294832826, "grad_norm": 0.06816249635962288, "learning_rate": 1.3122482210067545e-06, "loss": 0.4932, "step": 6288 }, { "epoch": 3.0559270516717327, "grad_norm": 0.07257120033790468, "learning_rate": 1.3109555289509879e-06, "loss": 0.5173, "step": 6289 }, { "epoch": 3.0564133738601824, "grad_norm": 0.07446069467480337, "learning_rate": 1.3096633778517026e-06, "loss": 0.5135, "step": 6290 }, { "epoch": 3.056899696048632, "grad_norm": 0.07154583301404861, "learning_rate": 1.3083717678983737e-06, "loss": 0.5186, "step": 6291 }, { "epoch": 3.0573860182370822, "grad_norm": 0.07168192956656737, "learning_rate": 1.3070806992804047e-06, "loss": 0.5379, "step": 6292 }, { "epoch": 3.057872340425532, "grad_norm": 0.06962513931621467, "learning_rate": 1.3057901721871157e-06, "loss": 0.4855, "step": 6293 }, { "epoch": 3.0583586626139816, "grad_norm": 0.06947283885309945, "learning_rate": 1.3045001868077478e-06, "loss": 0.4867, "step": 6294 }, { "epoch": 3.0588449848024317, "grad_norm": 0.07353521692109127, "learning_rate": 1.3032107433314618e-06, "loss": 0.499, "step": 6295 }, { "epoch": 3.0593313069908814, "grad_norm": 0.07148062990889317, "learning_rate": 1.3019218419473406e-06, "loss": 0.4968, "step": 6296 }, { "epoch": 3.059817629179331, "grad_norm": 0.07256044760721751, "learning_rate": 1.3006334828443868e-06, "loss": 0.5405, "step": 6297 }, { "epoch": 3.060303951367781, "grad_norm": 0.07041057907092613, "learning_rate": 1.2993456662115234e-06, "loss": 0.5224, "step": 6298 }, { "epoch": 3.060790273556231, "grad_norm": 0.07219259347943267, "learning_rate": 1.298058392237595e-06, "loss": 0.529, "step": 6299 }, { "epoch": 3.061276595744681, "grad_norm": 0.07055471295668753, "learning_rate": 1.2967716611113645e-06, "loss": 0.4971, "step": 6300 }, { "epoch": 3.0617629179331307, "grad_norm": 0.07227026389293165, "learning_rate": 1.2954854730215172e-06, "loss": 0.5034, "step": 6301 }, { "epoch": 3.0622492401215804, "grad_norm": 0.07076437334651081, "learning_rate": 1.2941998281566575e-06, "loss": 0.5088, "step": 6302 }, { "epoch": 3.0627355623100305, "grad_norm": 0.07099447501307093, "learning_rate": 1.292914726705311e-06, "loss": 0.5127, "step": 6303 }, { "epoch": 3.06322188449848, "grad_norm": 0.07276744147582066, "learning_rate": 1.291630168855924e-06, "loss": 0.5458, "step": 6304 }, { "epoch": 3.06370820668693, "grad_norm": 0.06956717760551456, "learning_rate": 1.290346154796861e-06, "loss": 0.4726, "step": 6305 }, { "epoch": 3.06419452887538, "grad_norm": 0.07276407136808401, "learning_rate": 1.2890626847164078e-06, "loss": 0.5148, "step": 6306 }, { "epoch": 3.0646808510638297, "grad_norm": 0.07206947415467035, "learning_rate": 1.2877797588027713e-06, "loss": 0.5399, "step": 6307 }, { "epoch": 3.06516717325228, "grad_norm": 0.07449496778010822, "learning_rate": 1.2864973772440787e-06, "loss": 0.5116, "step": 6308 }, { "epoch": 3.0656534954407295, "grad_norm": 0.06991173475730521, "learning_rate": 1.2852155402283756e-06, "loss": 0.4741, "step": 6309 }, { "epoch": 3.066139817629179, "grad_norm": 0.0691589602206517, "learning_rate": 1.2839342479436279e-06, "loss": 0.5114, "step": 6310 }, { "epoch": 3.0666261398176293, "grad_norm": 0.07486175626612472, "learning_rate": 1.2826535005777257e-06, "loss": 0.5432, "step": 6311 }, { "epoch": 3.067112462006079, "grad_norm": 0.06931029321733267, "learning_rate": 1.2813732983184745e-06, "loss": 0.4911, "step": 6312 }, { "epoch": 3.0675987841945287, "grad_norm": 0.07156429182458042, "learning_rate": 1.2800936413536008e-06, "loss": 0.5327, "step": 6313 }, { "epoch": 3.068085106382979, "grad_norm": 0.07182725231193507, "learning_rate": 1.2788145298707526e-06, "loss": 0.5379, "step": 6314 }, { "epoch": 3.0685714285714285, "grad_norm": 0.07123078441726857, "learning_rate": 1.2775359640574969e-06, "loss": 0.5117, "step": 6315 }, { "epoch": 3.0690577507598786, "grad_norm": 0.07521351926623729, "learning_rate": 1.2762579441013207e-06, "loss": 0.5241, "step": 6316 }, { "epoch": 3.0695440729483283, "grad_norm": 0.07345094914579875, "learning_rate": 1.2749804701896307e-06, "loss": 0.5321, "step": 6317 }, { "epoch": 3.070030395136778, "grad_norm": 0.07080752055646776, "learning_rate": 1.2737035425097543e-06, "loss": 0.5428, "step": 6318 }, { "epoch": 3.070516717325228, "grad_norm": 0.07054922317713008, "learning_rate": 1.2724271612489403e-06, "loss": 0.4962, "step": 6319 }, { "epoch": 3.071003039513678, "grad_norm": 0.0696894246487351, "learning_rate": 1.271151326594352e-06, "loss": 0.5079, "step": 6320 }, { "epoch": 3.0714893617021275, "grad_norm": 0.06858804681381224, "learning_rate": 1.2698760387330782e-06, "loss": 0.5081, "step": 6321 }, { "epoch": 3.0719756838905776, "grad_norm": 0.07292821371054758, "learning_rate": 1.2686012978521244e-06, "loss": 0.5062, "step": 6322 }, { "epoch": 3.0724620060790273, "grad_norm": 0.06872869023772299, "learning_rate": 1.2673271041384177e-06, "loss": 0.487, "step": 6323 }, { "epoch": 3.072948328267477, "grad_norm": 0.07140688053844178, "learning_rate": 1.266053457778804e-06, "loss": 0.5078, "step": 6324 }, { "epoch": 3.073434650455927, "grad_norm": 0.07061360252060708, "learning_rate": 1.2647803589600488e-06, "loss": 0.5158, "step": 6325 }, { "epoch": 3.073920972644377, "grad_norm": 0.07179162073953198, "learning_rate": 1.2635078078688378e-06, "loss": 0.5075, "step": 6326 }, { "epoch": 3.074407294832827, "grad_norm": 0.07045132338503757, "learning_rate": 1.262235804691776e-06, "loss": 0.5068, "step": 6327 }, { "epoch": 3.0748936170212766, "grad_norm": 0.07118685947785651, "learning_rate": 1.2609643496153866e-06, "loss": 0.5073, "step": 6328 }, { "epoch": 3.0753799392097263, "grad_norm": 0.07184116769388826, "learning_rate": 1.2596934428261181e-06, "loss": 0.5118, "step": 6329 }, { "epoch": 3.0758662613981764, "grad_norm": 0.07113752423084277, "learning_rate": 1.2584230845103312e-06, "loss": 0.5219, "step": 6330 }, { "epoch": 3.076352583586626, "grad_norm": 0.07097254703792906, "learning_rate": 1.2571532748543114e-06, "loss": 0.5001, "step": 6331 }, { "epoch": 3.076838905775076, "grad_norm": 0.06938086836569492, "learning_rate": 1.2558840140442602e-06, "loss": 0.5206, "step": 6332 }, { "epoch": 3.077325227963526, "grad_norm": 0.0717412607583114, "learning_rate": 1.2546153022663015e-06, "loss": 0.5232, "step": 6333 }, { "epoch": 3.0778115501519756, "grad_norm": 0.07134253539864335, "learning_rate": 1.2533471397064783e-06, "loss": 0.5096, "step": 6334 }, { "epoch": 3.0782978723404257, "grad_norm": 0.06845991587156157, "learning_rate": 1.2520795265507502e-06, "loss": 0.4801, "step": 6335 }, { "epoch": 3.0787841945288754, "grad_norm": 0.07210549772526677, "learning_rate": 1.2508124629849981e-06, "loss": 0.5239, "step": 6336 }, { "epoch": 3.079270516717325, "grad_norm": 0.06973865953789864, "learning_rate": 1.249545949195024e-06, "loss": 0.4897, "step": 6337 }, { "epoch": 3.0797568389057752, "grad_norm": 0.0722478556980012, "learning_rate": 1.2482799853665473e-06, "loss": 0.5239, "step": 6338 }, { "epoch": 3.080243161094225, "grad_norm": 0.07359308056235384, "learning_rate": 1.2470145716852072e-06, "loss": 0.5164, "step": 6339 }, { "epoch": 3.0807294832826746, "grad_norm": 0.07131621061382196, "learning_rate": 1.245749708336562e-06, "loss": 0.4762, "step": 6340 }, { "epoch": 3.0812158054711247, "grad_norm": 0.07263226576185644, "learning_rate": 1.2444853955060899e-06, "loss": 0.5286, "step": 6341 }, { "epoch": 3.0817021276595744, "grad_norm": 0.07299253584561631, "learning_rate": 1.2432216333791875e-06, "loss": 0.5177, "step": 6342 }, { "epoch": 3.082188449848024, "grad_norm": 0.07091960840101029, "learning_rate": 1.2419584221411719e-06, "loss": 0.4905, "step": 6343 }, { "epoch": 3.082674772036474, "grad_norm": 0.07007660780025235, "learning_rate": 1.240695761977278e-06, "loss": 0.511, "step": 6344 }, { "epoch": 3.083161094224924, "grad_norm": 0.07013552941360761, "learning_rate": 1.2394336530726608e-06, "loss": 0.4929, "step": 6345 }, { "epoch": 3.083647416413374, "grad_norm": 0.06951857278051657, "learning_rate": 1.2381720956123933e-06, "loss": 0.5273, "step": 6346 }, { "epoch": 3.0841337386018237, "grad_norm": 0.07373984506989155, "learning_rate": 1.2369110897814708e-06, "loss": 0.5187, "step": 6347 }, { "epoch": 3.0846200607902734, "grad_norm": 0.06876828815974513, "learning_rate": 1.2356506357648058e-06, "loss": 0.474, "step": 6348 }, { "epoch": 3.0851063829787235, "grad_norm": 0.07113609268148895, "learning_rate": 1.2343907337472261e-06, "loss": 0.5393, "step": 6349 }, { "epoch": 3.085592705167173, "grad_norm": 0.07008265151912244, "learning_rate": 1.2331313839134845e-06, "loss": 0.4674, "step": 6350 }, { "epoch": 3.086079027355623, "grad_norm": 0.07362148196322874, "learning_rate": 1.23187258644825e-06, "loss": 0.5221, "step": 6351 }, { "epoch": 3.086565349544073, "grad_norm": 0.07217852200413702, "learning_rate": 1.2306143415361104e-06, "loss": 0.5286, "step": 6352 }, { "epoch": 3.0870516717325227, "grad_norm": 0.07084309184955308, "learning_rate": 1.2293566493615734e-06, "loss": 0.51, "step": 6353 }, { "epoch": 3.087537993920973, "grad_norm": 0.07106327145762124, "learning_rate": 1.2280995101090653e-06, "loss": 0.5211, "step": 6354 }, { "epoch": 3.0880243161094225, "grad_norm": 0.07100153619531661, "learning_rate": 1.2268429239629314e-06, "loss": 0.4956, "step": 6355 }, { "epoch": 3.088510638297872, "grad_norm": 0.06737100715668831, "learning_rate": 1.225586891107436e-06, "loss": 0.459, "step": 6356 }, { "epoch": 3.0889969604863223, "grad_norm": 0.06917788612588545, "learning_rate": 1.2243314117267608e-06, "loss": 0.483, "step": 6357 }, { "epoch": 3.089483282674772, "grad_norm": 0.07090212462023035, "learning_rate": 1.2230764860050094e-06, "loss": 0.5065, "step": 6358 }, { "epoch": 3.0899696048632217, "grad_norm": 0.07255885515473502, "learning_rate": 1.221822114126201e-06, "loss": 0.5186, "step": 6359 }, { "epoch": 3.090455927051672, "grad_norm": 0.0716228877422812, "learning_rate": 1.2205682962742754e-06, "loss": 0.5048, "step": 6360 }, { "epoch": 3.0909422492401215, "grad_norm": 0.06983538010363304, "learning_rate": 1.2193150326330915e-06, "loss": 0.4788, "step": 6361 }, { "epoch": 3.0914285714285716, "grad_norm": 0.06962692046284083, "learning_rate": 1.2180623233864254e-06, "loss": 0.5057, "step": 6362 }, { "epoch": 3.0919148936170213, "grad_norm": 0.06861106518135233, "learning_rate": 1.2168101687179722e-06, "loss": 0.4685, "step": 6363 }, { "epoch": 3.092401215805471, "grad_norm": 0.07138466663715992, "learning_rate": 1.2155585688113476e-06, "loss": 0.4958, "step": 6364 }, { "epoch": 3.092887537993921, "grad_norm": 0.06940887925919648, "learning_rate": 1.214307523850083e-06, "loss": 0.5019, "step": 6365 }, { "epoch": 3.093373860182371, "grad_norm": 0.07142201589147261, "learning_rate": 1.2130570340176306e-06, "loss": 0.5035, "step": 6366 }, { "epoch": 3.0938601823708205, "grad_norm": 0.07149166942094624, "learning_rate": 1.2118070994973612e-06, "loss": 0.5472, "step": 6367 }, { "epoch": 3.0943465045592706, "grad_norm": 0.07370689619712235, "learning_rate": 1.2105577204725627e-06, "loss": 0.5077, "step": 6368 }, { "epoch": 3.0948328267477203, "grad_norm": 0.06954060524989816, "learning_rate": 1.209308897126442e-06, "loss": 0.5229, "step": 6369 }, { "epoch": 3.09531914893617, "grad_norm": 0.07152600209309178, "learning_rate": 1.208060629642126e-06, "loss": 0.4883, "step": 6370 }, { "epoch": 3.09580547112462, "grad_norm": 0.07024148787034443, "learning_rate": 1.2068129182026582e-06, "loss": 0.5164, "step": 6371 }, { "epoch": 3.09629179331307, "grad_norm": 0.06939886621298567, "learning_rate": 1.205565762991001e-06, "loss": 0.4999, "step": 6372 }, { "epoch": 3.09677811550152, "grad_norm": 0.07192795308272598, "learning_rate": 1.204319164190037e-06, "loss": 0.5114, "step": 6373 }, { "epoch": 3.0972644376899696, "grad_norm": 0.07122076974685827, "learning_rate": 1.2030731219825637e-06, "loss": 0.4736, "step": 6374 }, { "epoch": 3.0977507598784193, "grad_norm": 0.06931748807231478, "learning_rate": 1.2018276365513009e-06, "loss": 0.4853, "step": 6375 }, { "epoch": 3.0982370820668694, "grad_norm": 0.07190532623733153, "learning_rate": 1.2005827080788835e-06, "loss": 0.5382, "step": 6376 }, { "epoch": 3.098723404255319, "grad_norm": 0.07557272310804244, "learning_rate": 1.1993383367478672e-06, "loss": 0.5493, "step": 6377 }, { "epoch": 3.099209726443769, "grad_norm": 0.07028894199463619, "learning_rate": 1.1980945227407242e-06, "loss": 0.5331, "step": 6378 }, { "epoch": 3.099696048632219, "grad_norm": 0.06953908103001373, "learning_rate": 1.1968512662398458e-06, "loss": 0.5069, "step": 6379 }, { "epoch": 3.1001823708206686, "grad_norm": 0.07070552459581703, "learning_rate": 1.1956085674275419e-06, "loss": 0.5164, "step": 6380 }, { "epoch": 3.1006686930091187, "grad_norm": 0.07278151402145511, "learning_rate": 1.1943664264860395e-06, "loss": 0.5325, "step": 6381 }, { "epoch": 3.1011550151975684, "grad_norm": 0.07048772064653154, "learning_rate": 1.193124843597485e-06, "loss": 0.4709, "step": 6382 }, { "epoch": 3.101641337386018, "grad_norm": 0.07323368836055087, "learning_rate": 1.1918838189439426e-06, "loss": 0.5399, "step": 6383 }, { "epoch": 3.1021276595744682, "grad_norm": 0.06980025203650327, "learning_rate": 1.1906433527073934e-06, "loss": 0.5133, "step": 6384 }, { "epoch": 3.102613981762918, "grad_norm": 0.07171990081161943, "learning_rate": 1.1894034450697389e-06, "loss": 0.5008, "step": 6385 }, { "epoch": 3.1031003039513676, "grad_norm": 0.0773046940284695, "learning_rate": 1.1881640962127972e-06, "loss": 0.543, "step": 6386 }, { "epoch": 3.1035866261398177, "grad_norm": 0.07622947195450273, "learning_rate": 1.1869253063183039e-06, "loss": 0.5906, "step": 6387 }, { "epoch": 3.1040729483282674, "grad_norm": 0.07202062857229782, "learning_rate": 1.1856870755679146e-06, "loss": 0.5243, "step": 6388 }, { "epoch": 3.1045592705167175, "grad_norm": 0.07124144193174368, "learning_rate": 1.1844494041432008e-06, "loss": 0.5145, "step": 6389 }, { "epoch": 3.1050455927051672, "grad_norm": 0.07050302024638118, "learning_rate": 1.1832122922256539e-06, "loss": 0.5067, "step": 6390 }, { "epoch": 3.105531914893617, "grad_norm": 0.07209536677435852, "learning_rate": 1.181975739996682e-06, "loss": 0.5537, "step": 6391 }, { "epoch": 3.106018237082067, "grad_norm": 0.06929175378286714, "learning_rate": 1.1807397476376109e-06, "loss": 0.4975, "step": 6392 }, { "epoch": 3.1065045592705167, "grad_norm": 0.0696040713117208, "learning_rate": 1.1795043153296849e-06, "loss": 0.4853, "step": 6393 }, { "epoch": 3.1069908814589664, "grad_norm": 0.07267699530695315, "learning_rate": 1.178269443254067e-06, "loss": 0.5128, "step": 6394 }, { "epoch": 3.1074772036474165, "grad_norm": 0.06980701990610522, "learning_rate": 1.1770351315918365e-06, "loss": 0.4856, "step": 6395 }, { "epoch": 3.107963525835866, "grad_norm": 0.07085465994523285, "learning_rate": 1.1758013805239925e-06, "loss": 0.4977, "step": 6396 }, { "epoch": 3.108449848024316, "grad_norm": 0.07118085670496584, "learning_rate": 1.1745681902314481e-06, "loss": 0.4935, "step": 6397 }, { "epoch": 3.108936170212766, "grad_norm": 0.07054901413909052, "learning_rate": 1.173335560895038e-06, "loss": 0.4983, "step": 6398 }, { "epoch": 3.1094224924012157, "grad_norm": 0.07073906424636892, "learning_rate": 1.172103492695514e-06, "loss": 0.5189, "step": 6399 }, { "epoch": 3.109908814589666, "grad_norm": 0.07306412765885605, "learning_rate": 1.1708719858135415e-06, "loss": 0.5508, "step": 6400 }, { "epoch": 3.1103951367781155, "grad_norm": 0.07092420869708216, "learning_rate": 1.1696410404297115e-06, "loss": 0.484, "step": 6401 }, { "epoch": 3.110881458966565, "grad_norm": 0.07087973722298993, "learning_rate": 1.1684106567245268e-06, "loss": 0.5114, "step": 6402 }, { "epoch": 3.1113677811550153, "grad_norm": 0.0694453810190983, "learning_rate": 1.167180834878408e-06, "loss": 0.4701, "step": 6403 }, { "epoch": 3.111854103343465, "grad_norm": 0.07005915801860707, "learning_rate": 1.1659515750716953e-06, "loss": 0.5013, "step": 6404 }, { "epoch": 3.1123404255319147, "grad_norm": 0.0743821835995421, "learning_rate": 1.164722877484646e-06, "loss": 0.5161, "step": 6405 }, { "epoch": 3.112826747720365, "grad_norm": 0.07115091590642147, "learning_rate": 1.163494742297434e-06, "loss": 0.512, "step": 6406 }, { "epoch": 3.1133130699088145, "grad_norm": 0.07017697233296112, "learning_rate": 1.1622671696901515e-06, "loss": 0.5243, "step": 6407 }, { "epoch": 3.1137993920972646, "grad_norm": 0.06874474726754566, "learning_rate": 1.1610401598428089e-06, "loss": 0.4991, "step": 6408 }, { "epoch": 3.1142857142857143, "grad_norm": 0.0698505587286747, "learning_rate": 1.159813712935332e-06, "loss": 0.5015, "step": 6409 }, { "epoch": 3.114772036474164, "grad_norm": 0.07130561064962943, "learning_rate": 1.158587829147566e-06, "loss": 0.5212, "step": 6410 }, { "epoch": 3.115258358662614, "grad_norm": 0.0696803114674483, "learning_rate": 1.1573625086592744e-06, "loss": 0.4951, "step": 6411 }, { "epoch": 3.115744680851064, "grad_norm": 0.06952126592570107, "learning_rate": 1.1561377516501332e-06, "loss": 0.4865, "step": 6412 }, { "epoch": 3.1162310030395135, "grad_norm": 0.07310200272286681, "learning_rate": 1.1549135582997406e-06, "loss": 0.5278, "step": 6413 }, { "epoch": 3.1167173252279636, "grad_norm": 0.06995403451589693, "learning_rate": 1.1536899287876108e-06, "loss": 0.5073, "step": 6414 }, { "epoch": 3.1172036474164133, "grad_norm": 0.06980246919365295, "learning_rate": 1.1524668632931756e-06, "loss": 0.4972, "step": 6415 }, { "epoch": 3.1176899696048634, "grad_norm": 0.07089394701964344, "learning_rate": 1.1512443619957831e-06, "loss": 0.5101, "step": 6416 }, { "epoch": 3.118176291793313, "grad_norm": 0.06896366804844528, "learning_rate": 1.1500224250746993e-06, "loss": 0.4978, "step": 6417 }, { "epoch": 3.118662613981763, "grad_norm": 0.07044518015495504, "learning_rate": 1.1488010527091075e-06, "loss": 0.5143, "step": 6418 }, { "epoch": 3.119148936170213, "grad_norm": 0.0694168420707088, "learning_rate": 1.1475802450781064e-06, "loss": 0.4627, "step": 6419 }, { "epoch": 3.1196352583586626, "grad_norm": 0.07108325359298967, "learning_rate": 1.1463600023607174e-06, "loss": 0.5264, "step": 6420 }, { "epoch": 3.1201215805471123, "grad_norm": 0.0741012993303335, "learning_rate": 1.1451403247358728e-06, "loss": 0.5305, "step": 6421 }, { "epoch": 3.1206079027355624, "grad_norm": 0.0730793398426267, "learning_rate": 1.1439212123824244e-06, "loss": 0.5305, "step": 6422 }, { "epoch": 3.121094224924012, "grad_norm": 0.07158082043287103, "learning_rate": 1.1427026654791417e-06, "loss": 0.5116, "step": 6423 }, { "epoch": 3.121580547112462, "grad_norm": 0.07215578268810226, "learning_rate": 1.1414846842047106e-06, "loss": 0.5093, "step": 6424 }, { "epoch": 3.122066869300912, "grad_norm": 0.07130340174874215, "learning_rate": 1.1402672687377341e-06, "loss": 0.5163, "step": 6425 }, { "epoch": 3.1225531914893616, "grad_norm": 0.07147530505630274, "learning_rate": 1.1390504192567336e-06, "loss": 0.5125, "step": 6426 }, { "epoch": 3.1230395136778117, "grad_norm": 0.07205014957433295, "learning_rate": 1.1378341359401445e-06, "loss": 0.535, "step": 6427 }, { "epoch": 3.1235258358662614, "grad_norm": 0.07057800732615672, "learning_rate": 1.136618418966321e-06, "loss": 0.4911, "step": 6428 }, { "epoch": 3.124012158054711, "grad_norm": 0.07182188469510198, "learning_rate": 1.1354032685135346e-06, "loss": 0.5079, "step": 6429 }, { "epoch": 3.1244984802431612, "grad_norm": 0.07093446407995187, "learning_rate": 1.1341886847599742e-06, "loss": 0.5225, "step": 6430 }, { "epoch": 3.124984802431611, "grad_norm": 0.06971455401202456, "learning_rate": 1.1329746678837433e-06, "loss": 0.4892, "step": 6431 }, { "epoch": 3.1254711246200606, "grad_norm": 0.0724929505064268, "learning_rate": 1.1317612180628645e-06, "loss": 0.5121, "step": 6432 }, { "epoch": 3.1259574468085107, "grad_norm": 0.07267449665125537, "learning_rate": 1.1305483354752767e-06, "loss": 0.5109, "step": 6433 }, { "epoch": 3.1264437689969604, "grad_norm": 0.07050355890136843, "learning_rate": 1.1293360202988346e-06, "loss": 0.5272, "step": 6434 }, { "epoch": 3.12693009118541, "grad_norm": 0.07160387234521386, "learning_rate": 1.1281242727113112e-06, "loss": 0.4799, "step": 6435 }, { "epoch": 3.1274164133738602, "grad_norm": 0.07253525937791627, "learning_rate": 1.126913092890395e-06, "loss": 0.5211, "step": 6436 }, { "epoch": 3.12790273556231, "grad_norm": 0.07453943214394856, "learning_rate": 1.1257024810136903e-06, "loss": 0.5386, "step": 6437 }, { "epoch": 3.12838905775076, "grad_norm": 0.07111230448726456, "learning_rate": 1.1244924372587224e-06, "loss": 0.5057, "step": 6438 }, { "epoch": 3.1288753799392097, "grad_norm": 0.06883369956196185, "learning_rate": 1.1232829618029295e-06, "loss": 0.4554, "step": 6439 }, { "epoch": 3.1293617021276594, "grad_norm": 0.06990761360628253, "learning_rate": 1.1220740548236685e-06, "loss": 0.501, "step": 6440 }, { "epoch": 3.1298480243161095, "grad_norm": 0.07248503063531504, "learning_rate": 1.1208657164982096e-06, "loss": 0.4931, "step": 6441 }, { "epoch": 3.130334346504559, "grad_norm": 0.07135472037332573, "learning_rate": 1.1196579470037427e-06, "loss": 0.5106, "step": 6442 }, { "epoch": 3.1308206686930093, "grad_norm": 0.07307392616254636, "learning_rate": 1.1184507465173732e-06, "loss": 0.5238, "step": 6443 }, { "epoch": 3.131306990881459, "grad_norm": 0.06803548474807124, "learning_rate": 1.1172441152161246e-06, "loss": 0.4867, "step": 6444 }, { "epoch": 3.1317933130699087, "grad_norm": 0.07050409202969812, "learning_rate": 1.1160380532769343e-06, "loss": 0.5006, "step": 6445 }, { "epoch": 3.132279635258359, "grad_norm": 0.0701212453951125, "learning_rate": 1.1148325608766586e-06, "loss": 0.4865, "step": 6446 }, { "epoch": 3.1327659574468085, "grad_norm": 0.0728671539359153, "learning_rate": 1.1136276381920684e-06, "loss": 0.5396, "step": 6447 }, { "epoch": 3.133252279635258, "grad_norm": 0.07154492601195753, "learning_rate": 1.112423285399853e-06, "loss": 0.516, "step": 6448 }, { "epoch": 3.1337386018237083, "grad_norm": 0.07028965389300273, "learning_rate": 1.111219502676616e-06, "loss": 0.4756, "step": 6449 }, { "epoch": 3.134224924012158, "grad_norm": 0.07010990940901243, "learning_rate": 1.1100162901988786e-06, "loss": 0.5071, "step": 6450 }, { "epoch": 3.1347112462006077, "grad_norm": 0.07263811393972959, "learning_rate": 1.108813648143079e-06, "loss": 0.5358, "step": 6451 }, { "epoch": 3.135197568389058, "grad_norm": 0.0697698055740256, "learning_rate": 1.1076115766855705e-06, "loss": 0.4968, "step": 6452 }, { "epoch": 3.1356838905775075, "grad_norm": 0.07328748919823568, "learning_rate": 1.106410076002623e-06, "loss": 0.5, "step": 6453 }, { "epoch": 3.1361702127659576, "grad_norm": 0.07295765849631576, "learning_rate": 1.1052091462704235e-06, "loss": 0.5289, "step": 6454 }, { "epoch": 3.1366565349544073, "grad_norm": 0.0721159537936726, "learning_rate": 1.1040087876650745e-06, "loss": 0.5185, "step": 6455 }, { "epoch": 3.137142857142857, "grad_norm": 0.07252959834112245, "learning_rate": 1.1028090003625946e-06, "loss": 0.5123, "step": 6456 }, { "epoch": 3.137629179331307, "grad_norm": 0.07400730843125519, "learning_rate": 1.1016097845389195e-06, "loss": 0.5023, "step": 6457 }, { "epoch": 3.138115501519757, "grad_norm": 0.07037077636612832, "learning_rate": 1.1004111403699002e-06, "loss": 0.515, "step": 6458 }, { "epoch": 3.1386018237082065, "grad_norm": 0.06949047008004194, "learning_rate": 1.0992130680313046e-06, "loss": 0.5014, "step": 6459 }, { "epoch": 3.1390881458966566, "grad_norm": 0.0723124746550498, "learning_rate": 1.0980155676988159e-06, "loss": 0.4957, "step": 6460 }, { "epoch": 3.1395744680851063, "grad_norm": 0.0733040070150769, "learning_rate": 1.0968186395480345e-06, "loss": 0.5137, "step": 6461 }, { "epoch": 3.140060790273556, "grad_norm": 0.0693618606067608, "learning_rate": 1.0956222837544762e-06, "loss": 0.5084, "step": 6462 }, { "epoch": 3.140547112462006, "grad_norm": 0.07147189848760056, "learning_rate": 1.0944265004935723e-06, "loss": 0.5543, "step": 6463 }, { "epoch": 3.141033434650456, "grad_norm": 0.07092690296158626, "learning_rate": 1.0932312899406717e-06, "loss": 0.5092, "step": 6464 }, { "epoch": 3.141519756838906, "grad_norm": 0.06904957285530479, "learning_rate": 1.092036652271038e-06, "loss": 0.4779, "step": 6465 }, { "epoch": 3.1420060790273556, "grad_norm": 0.07027316271985638, "learning_rate": 1.0908425876598512e-06, "loss": 0.5164, "step": 6466 }, { "epoch": 3.1424924012158053, "grad_norm": 0.07462036631002521, "learning_rate": 1.0896490962822082e-06, "loss": 0.5364, "step": 6467 }, { "epoch": 3.1429787234042554, "grad_norm": 0.07178073401915917, "learning_rate": 1.0884561783131192e-06, "loss": 0.4985, "step": 6468 }, { "epoch": 3.143465045592705, "grad_norm": 0.07040681264010286, "learning_rate": 1.0872638339275137e-06, "loss": 0.5043, "step": 6469 }, { "epoch": 3.1439513677811552, "grad_norm": 0.06799596736398962, "learning_rate": 1.0860720633002353e-06, "loss": 0.455, "step": 6470 }, { "epoch": 3.144437689969605, "grad_norm": 0.07266262274751878, "learning_rate": 1.0848808666060428e-06, "loss": 0.4814, "step": 6471 }, { "epoch": 3.1449240121580546, "grad_norm": 0.0710889334460567, "learning_rate": 1.0836902440196123e-06, "loss": 0.4888, "step": 6472 }, { "epoch": 3.1454103343465047, "grad_norm": 0.06953159349693616, "learning_rate": 1.0825001957155344e-06, "loss": 0.5311, "step": 6473 }, { "epoch": 3.1458966565349544, "grad_norm": 0.07087006109263393, "learning_rate": 1.0813107218683171e-06, "loss": 0.5157, "step": 6474 }, { "epoch": 3.146382978723404, "grad_norm": 0.0682967817638768, "learning_rate": 1.0801218226523825e-06, "loss": 0.4834, "step": 6475 }, { "epoch": 3.1468693009118542, "grad_norm": 0.07416131855160117, "learning_rate": 1.0789334982420697e-06, "loss": 0.5521, "step": 6476 }, { "epoch": 3.147355623100304, "grad_norm": 0.07184293517622208, "learning_rate": 1.0777457488116323e-06, "loss": 0.513, "step": 6477 }, { "epoch": 3.1478419452887536, "grad_norm": 0.07113617237868496, "learning_rate": 1.0765585745352408e-06, "loss": 0.4873, "step": 6478 }, { "epoch": 3.1483282674772037, "grad_norm": 0.06857685330472764, "learning_rate": 1.0753719755869813e-06, "loss": 0.487, "step": 6479 }, { "epoch": 3.1488145896656534, "grad_norm": 0.07131933428519391, "learning_rate": 1.0741859521408538e-06, "loss": 0.5288, "step": 6480 }, { "epoch": 3.1493009118541035, "grad_norm": 0.0692138770368766, "learning_rate": 1.0730005043707765e-06, "loss": 0.504, "step": 6481 }, { "epoch": 3.1497872340425532, "grad_norm": 0.07141054056291997, "learning_rate": 1.0718156324505802e-06, "loss": 0.4879, "step": 6482 }, { "epoch": 3.150273556231003, "grad_norm": 0.07220328983530064, "learning_rate": 1.070631336554015e-06, "loss": 0.513, "step": 6483 }, { "epoch": 3.150759878419453, "grad_norm": 0.07037762286764798, "learning_rate": 1.0694476168547424e-06, "loss": 0.4942, "step": 6484 }, { "epoch": 3.1512462006079027, "grad_norm": 0.07149601343198705, "learning_rate": 1.068264473526343e-06, "loss": 0.5213, "step": 6485 }, { "epoch": 3.1517325227963524, "grad_norm": 0.0689444757096791, "learning_rate": 1.0670819067423106e-06, "loss": 0.4762, "step": 6486 }, { "epoch": 3.1522188449848025, "grad_norm": 0.07197825902523278, "learning_rate": 1.0658999166760553e-06, "loss": 0.5037, "step": 6487 }, { "epoch": 3.152705167173252, "grad_norm": 0.07145908179824197, "learning_rate": 1.064718503500904e-06, "loss": 0.5184, "step": 6488 }, { "epoch": 3.153191489361702, "grad_norm": 0.07296960258045442, "learning_rate": 1.063537667390095e-06, "loss": 0.5449, "step": 6489 }, { "epoch": 3.153677811550152, "grad_norm": 0.07053824556853927, "learning_rate": 1.0623574085167848e-06, "loss": 0.5033, "step": 6490 }, { "epoch": 3.1541641337386017, "grad_norm": 0.06925212017012625, "learning_rate": 1.0611777270540452e-06, "loss": 0.475, "step": 6491 }, { "epoch": 3.154650455927052, "grad_norm": 0.06900113915807202, "learning_rate": 1.0599986231748644e-06, "loss": 0.524, "step": 6492 }, { "epoch": 3.1551367781155015, "grad_norm": 0.07098961588124957, "learning_rate": 1.0588200970521439e-06, "loss": 0.5085, "step": 6493 }, { "epoch": 3.155623100303951, "grad_norm": 0.07255999292924366, "learning_rate": 1.0576421488587013e-06, "loss": 0.5089, "step": 6494 }, { "epoch": 3.1561094224924013, "grad_norm": 0.07195155002584919, "learning_rate": 1.0564647787672694e-06, "loss": 0.5297, "step": 6495 }, { "epoch": 3.156595744680851, "grad_norm": 0.07095972315666883, "learning_rate": 1.0552879869504956e-06, "loss": 0.5321, "step": 6496 }, { "epoch": 3.1570820668693007, "grad_norm": 0.07080137678492887, "learning_rate": 1.054111773580943e-06, "loss": 0.5102, "step": 6497 }, { "epoch": 3.157568389057751, "grad_norm": 0.07347241061884391, "learning_rate": 1.052936138831091e-06, "loss": 0.538, "step": 6498 }, { "epoch": 3.1580547112462005, "grad_norm": 0.0698909188455296, "learning_rate": 1.0517610828733322e-06, "loss": 0.4934, "step": 6499 }, { "epoch": 3.1585410334346506, "grad_norm": 0.06991964254084676, "learning_rate": 1.0505866058799746e-06, "loss": 0.461, "step": 6500 }, { "epoch": 3.1590273556231003, "grad_norm": 0.06971650722292501, "learning_rate": 1.0494127080232436e-06, "loss": 0.4767, "step": 6501 }, { "epoch": 3.15951367781155, "grad_norm": 0.07066489578198713, "learning_rate": 1.0482393894752764e-06, "loss": 0.5191, "step": 6502 }, { "epoch": 3.16, "grad_norm": 0.07247830060940354, "learning_rate": 1.0470666504081295e-06, "loss": 0.5605, "step": 6503 }, { "epoch": 3.16048632218845, "grad_norm": 0.0721443024330125, "learning_rate": 1.045894490993768e-06, "loss": 0.4867, "step": 6504 }, { "epoch": 3.1609726443768995, "grad_norm": 0.07147873334806096, "learning_rate": 1.0447229114040774e-06, "loss": 0.5182, "step": 6505 }, { "epoch": 3.1614589665653496, "grad_norm": 0.07250603851923879, "learning_rate": 1.0435519118108572e-06, "loss": 0.5238, "step": 6506 }, { "epoch": 3.1619452887537993, "grad_norm": 0.06886656634411971, "learning_rate": 1.0423814923858205e-06, "loss": 0.4658, "step": 6507 }, { "epoch": 3.1624316109422494, "grad_norm": 0.07004182950749595, "learning_rate": 1.0412116533005962e-06, "loss": 0.4726, "step": 6508 }, { "epoch": 3.162917933130699, "grad_norm": 0.07118430137392404, "learning_rate": 1.0400423947267264e-06, "loss": 0.5102, "step": 6509 }, { "epoch": 3.163404255319149, "grad_norm": 0.07179710744773422, "learning_rate": 1.0388737168356728e-06, "loss": 0.522, "step": 6510 }, { "epoch": 3.163890577507599, "grad_norm": 0.07372243185053347, "learning_rate": 1.0377056197988067e-06, "loss": 0.5422, "step": 6511 }, { "epoch": 3.1643768996960486, "grad_norm": 0.07238017122553848, "learning_rate": 1.0365381037874166e-06, "loss": 0.531, "step": 6512 }, { "epoch": 3.1648632218844983, "grad_norm": 0.07021007864470094, "learning_rate": 1.0353711689727058e-06, "loss": 0.4992, "step": 6513 }, { "epoch": 3.1653495440729484, "grad_norm": 0.06887908772773807, "learning_rate": 1.0342048155257917e-06, "loss": 0.4782, "step": 6514 }, { "epoch": 3.165835866261398, "grad_norm": 0.07463052992192676, "learning_rate": 1.0330390436177061e-06, "loss": 0.5396, "step": 6515 }, { "epoch": 3.166322188449848, "grad_norm": 0.07009162139754778, "learning_rate": 1.031873853419398e-06, "loss": 0.5119, "step": 6516 }, { "epoch": 3.166808510638298, "grad_norm": 0.07224230827980828, "learning_rate": 1.0307092451017275e-06, "loss": 0.5168, "step": 6517 }, { "epoch": 3.1672948328267476, "grad_norm": 0.07334670292074728, "learning_rate": 1.0295452188354737e-06, "loss": 0.5223, "step": 6518 }, { "epoch": 3.1677811550151977, "grad_norm": 0.0695372476547809, "learning_rate": 1.0283817747913244e-06, "loss": 0.5007, "step": 6519 }, { "epoch": 3.1682674772036474, "grad_norm": 0.0722402283297588, "learning_rate": 1.0272189131398875e-06, "loss": 0.5007, "step": 6520 }, { "epoch": 3.168753799392097, "grad_norm": 0.07066322122298833, "learning_rate": 1.0260566340516826e-06, "loss": 0.4933, "step": 6521 }, { "epoch": 3.1692401215805472, "grad_norm": 0.07467043023409528, "learning_rate": 1.0248949376971457e-06, "loss": 0.5552, "step": 6522 }, { "epoch": 3.169726443768997, "grad_norm": 0.06981953434443888, "learning_rate": 1.0237338242466254e-06, "loss": 0.5153, "step": 6523 }, { "epoch": 3.1702127659574466, "grad_norm": 0.0744979639933302, "learning_rate": 1.0225732938703865e-06, "loss": 0.5124, "step": 6524 }, { "epoch": 3.1706990881458967, "grad_norm": 0.07143937671122483, "learning_rate": 1.0214133467386072e-06, "loss": 0.5245, "step": 6525 }, { "epoch": 3.1711854103343464, "grad_norm": 0.07518448974694031, "learning_rate": 1.0202539830213808e-06, "loss": 0.5922, "step": 6526 }, { "epoch": 3.1716717325227965, "grad_norm": 0.0694974279600695, "learning_rate": 1.0190952028887136e-06, "loss": 0.5026, "step": 6527 }, { "epoch": 3.1721580547112462, "grad_norm": 0.07345074555256155, "learning_rate": 1.0179370065105299e-06, "loss": 0.564, "step": 6528 }, { "epoch": 3.172644376899696, "grad_norm": 0.0726248518607165, "learning_rate": 1.016779394056665e-06, "loss": 0.5175, "step": 6529 }, { "epoch": 3.173130699088146, "grad_norm": 0.07237804506656892, "learning_rate": 1.0156223656968695e-06, "loss": 0.5524, "step": 6530 }, { "epoch": 3.1736170212765957, "grad_norm": 0.06939657595428865, "learning_rate": 1.0144659216008084e-06, "loss": 0.4636, "step": 6531 }, { "epoch": 3.1741033434650454, "grad_norm": 0.07038133202224696, "learning_rate": 1.0133100619380626e-06, "loss": 0.5054, "step": 6532 }, { "epoch": 3.1745896656534955, "grad_norm": 0.07139104462041464, "learning_rate": 1.0121547868781228e-06, "loss": 0.493, "step": 6533 }, { "epoch": 3.1750759878419452, "grad_norm": 0.07018597721813158, "learning_rate": 1.0110000965903988e-06, "loss": 0.5018, "step": 6534 }, { "epoch": 3.1755623100303954, "grad_norm": 0.07173715312699312, "learning_rate": 1.0098459912442126e-06, "loss": 0.5059, "step": 6535 }, { "epoch": 3.176048632218845, "grad_norm": 0.07079841394075786, "learning_rate": 1.0086924710088003e-06, "loss": 0.5078, "step": 6536 }, { "epoch": 3.1765349544072947, "grad_norm": 0.07284862135037284, "learning_rate": 1.007539536053313e-06, "loss": 0.5438, "step": 6537 }, { "epoch": 3.177021276595745, "grad_norm": 0.07044494982880632, "learning_rate": 1.0063871865468156e-06, "loss": 0.4973, "step": 6538 }, { "epoch": 3.1775075987841945, "grad_norm": 0.07106102566423396, "learning_rate": 1.0052354226582861e-06, "loss": 0.4717, "step": 6539 }, { "epoch": 3.177993920972644, "grad_norm": 0.07188334847371916, "learning_rate": 1.004084244556619e-06, "loss": 0.5053, "step": 6540 }, { "epoch": 3.1784802431610943, "grad_norm": 0.07053686110847719, "learning_rate": 1.0029336524106202e-06, "loss": 0.473, "step": 6541 }, { "epoch": 3.178966565349544, "grad_norm": 0.07175167836259107, "learning_rate": 1.0017836463890118e-06, "loss": 0.4972, "step": 6542 }, { "epoch": 3.1794528875379937, "grad_norm": 0.07044035821433776, "learning_rate": 1.0006342266604291e-06, "loss": 0.4512, "step": 6543 }, { "epoch": 3.179939209726444, "grad_norm": 0.07167506835808471, "learning_rate": 9.994853933934212e-07, "loss": 0.4959, "step": 6544 }, { "epoch": 3.1804255319148935, "grad_norm": 0.07288030164798386, "learning_rate": 9.983371467564511e-07, "loss": 0.5233, "step": 6545 }, { "epoch": 3.1809118541033437, "grad_norm": 0.07296513728946342, "learning_rate": 9.97189486917896e-07, "loss": 0.4849, "step": 6546 }, { "epoch": 3.1813981762917933, "grad_norm": 0.07375066155458689, "learning_rate": 9.960424140460496e-07, "loss": 0.5481, "step": 6547 }, { "epoch": 3.181884498480243, "grad_norm": 0.07188220544116591, "learning_rate": 9.948959283091141e-07, "loss": 0.5636, "step": 6548 }, { "epoch": 3.182370820668693, "grad_norm": 0.07252764198511936, "learning_rate": 9.937500298752101e-07, "loss": 0.5214, "step": 6549 }, { "epoch": 3.182857142857143, "grad_norm": 0.07061568805737316, "learning_rate": 9.926047189123699e-07, "loss": 0.4787, "step": 6550 }, { "epoch": 3.1833434650455925, "grad_norm": 0.07278077678552378, "learning_rate": 9.914599955885407e-07, "loss": 0.5207, "step": 6551 }, { "epoch": 3.1838297872340426, "grad_norm": 0.07033369968208668, "learning_rate": 9.903158600715834e-07, "loss": 0.4972, "step": 6552 }, { "epoch": 3.1843161094224923, "grad_norm": 0.07234286522060195, "learning_rate": 9.891723125292723e-07, "loss": 0.5256, "step": 6553 }, { "epoch": 3.1848024316109425, "grad_norm": 0.07102207940232765, "learning_rate": 9.88029353129295e-07, "loss": 0.5016, "step": 6554 }, { "epoch": 3.185288753799392, "grad_norm": 0.07316197025954028, "learning_rate": 9.868869820392545e-07, "loss": 0.5188, "step": 6555 }, { "epoch": 3.185775075987842, "grad_norm": 0.06800174953747572, "learning_rate": 9.857451994266665e-07, "loss": 0.4631, "step": 6556 }, { "epoch": 3.186261398176292, "grad_norm": 0.07114460446832876, "learning_rate": 9.846040054589596e-07, "loss": 0.5113, "step": 6557 }, { "epoch": 3.1867477203647416, "grad_norm": 0.07016854668068076, "learning_rate": 9.834634003034777e-07, "loss": 0.4835, "step": 6558 }, { "epoch": 3.1872340425531913, "grad_norm": 0.07204058729789382, "learning_rate": 9.82323384127477e-07, "loss": 0.5333, "step": 6559 }, { "epoch": 3.1877203647416414, "grad_norm": 0.07370539109717562, "learning_rate": 9.811839570981291e-07, "loss": 0.5435, "step": 6560 }, { "epoch": 3.188206686930091, "grad_norm": 0.07242073036108321, "learning_rate": 9.800451193825167e-07, "loss": 0.5241, "step": 6561 }, { "epoch": 3.1886930091185413, "grad_norm": 0.07101656429489084, "learning_rate": 9.78906871147638e-07, "loss": 0.4929, "step": 6562 }, { "epoch": 3.189179331306991, "grad_norm": 0.07616065146271954, "learning_rate": 9.777692125604039e-07, "loss": 0.5322, "step": 6563 }, { "epoch": 3.1896656534954406, "grad_norm": 0.07323442124348449, "learning_rate": 9.766321437876391e-07, "loss": 0.5198, "step": 6564 }, { "epoch": 3.1901519756838908, "grad_norm": 0.07132179297828939, "learning_rate": 9.754956649960823e-07, "loss": 0.5516, "step": 6565 }, { "epoch": 3.1906382978723404, "grad_norm": 0.07057315349992284, "learning_rate": 9.743597763523855e-07, "loss": 0.485, "step": 6566 }, { "epoch": 3.19112462006079, "grad_norm": 0.0712612554963249, "learning_rate": 9.732244780231127e-07, "loss": 0.5193, "step": 6567 }, { "epoch": 3.1916109422492402, "grad_norm": 0.07187691360138726, "learning_rate": 9.720897701747435e-07, "loss": 0.5336, "step": 6568 }, { "epoch": 3.19209726443769, "grad_norm": 0.06805895008877791, "learning_rate": 9.709556529736692e-07, "loss": 0.4727, "step": 6569 }, { "epoch": 3.1925835866261396, "grad_norm": 0.07177273244968645, "learning_rate": 9.698221265861957e-07, "loss": 0.5302, "step": 6570 }, { "epoch": 3.1930699088145897, "grad_norm": 0.07254163064575793, "learning_rate": 9.686891911785418e-07, "loss": 0.5386, "step": 6571 }, { "epoch": 3.1935562310030394, "grad_norm": 0.07035196610090834, "learning_rate": 9.675568469168388e-07, "loss": 0.4986, "step": 6572 }, { "epoch": 3.1940425531914896, "grad_norm": 0.0695053897796987, "learning_rate": 9.664250939671332e-07, "loss": 0.489, "step": 6573 }, { "epoch": 3.1945288753799392, "grad_norm": 0.08581518037914929, "learning_rate": 9.652939324953835e-07, "loss": 0.5726, "step": 6574 }, { "epoch": 3.195015197568389, "grad_norm": 0.06990004453786167, "learning_rate": 9.641633626674612e-07, "loss": 0.4919, "step": 6575 }, { "epoch": 3.195501519756839, "grad_norm": 0.06954543980025382, "learning_rate": 9.630333846491518e-07, "loss": 0.4882, "step": 6576 }, { "epoch": 3.1959878419452887, "grad_norm": 0.07087106464588994, "learning_rate": 9.61903998606154e-07, "loss": 0.542, "step": 6577 }, { "epoch": 3.1964741641337384, "grad_norm": 0.07319970567781901, "learning_rate": 9.607752047040792e-07, "loss": 0.5162, "step": 6578 }, { "epoch": 3.1969604863221885, "grad_norm": 0.07132779625985851, "learning_rate": 9.59647003108452e-07, "loss": 0.5181, "step": 6579 }, { "epoch": 3.1974468085106382, "grad_norm": 0.07110947027204667, "learning_rate": 9.58519393984712e-07, "loss": 0.5163, "step": 6580 }, { "epoch": 3.197933130699088, "grad_norm": 0.07313446791100611, "learning_rate": 9.573923774982075e-07, "loss": 0.5642, "step": 6581 }, { "epoch": 3.198419452887538, "grad_norm": 0.06999437368133128, "learning_rate": 9.562659538142027e-07, "loss": 0.5109, "step": 6582 }, { "epoch": 3.1989057750759877, "grad_norm": 0.07058124518391623, "learning_rate": 9.551401230978773e-07, "loss": 0.5212, "step": 6583 }, { "epoch": 3.199392097264438, "grad_norm": 0.07144869186374213, "learning_rate": 9.540148855143205e-07, "loss": 0.5023, "step": 6584 }, { "epoch": 3.1998784194528875, "grad_norm": 0.0710627424540113, "learning_rate": 9.528902412285351e-07, "loss": 0.5053, "step": 6585 }, { "epoch": 3.200364741641337, "grad_norm": 0.07064290630221878, "learning_rate": 9.517661904054387e-07, "loss": 0.5165, "step": 6586 }, { "epoch": 3.2008510638297873, "grad_norm": 0.07243029054189776, "learning_rate": 9.506427332098589e-07, "loss": 0.5128, "step": 6587 }, { "epoch": 3.201337386018237, "grad_norm": 0.07127970828142006, "learning_rate": 9.495198698065394e-07, "loss": 0.5428, "step": 6588 }, { "epoch": 3.201823708206687, "grad_norm": 0.07300225237254729, "learning_rate": 9.483976003601341e-07, "loss": 0.5024, "step": 6589 }, { "epoch": 3.202310030395137, "grad_norm": 0.07096281524340389, "learning_rate": 9.472759250352126e-07, "loss": 0.507, "step": 6590 }, { "epoch": 3.2027963525835865, "grad_norm": 0.07075964114599965, "learning_rate": 9.461548439962542e-07, "loss": 0.555, "step": 6591 }, { "epoch": 3.2032826747720367, "grad_norm": 0.07560427051894147, "learning_rate": 9.450343574076537e-07, "loss": 0.5466, "step": 6592 }, { "epoch": 3.2037689969604863, "grad_norm": 0.07379002330362418, "learning_rate": 9.439144654337179e-07, "loss": 0.5292, "step": 6593 }, { "epoch": 3.204255319148936, "grad_norm": 0.0715001658634348, "learning_rate": 9.427951682386654e-07, "loss": 0.514, "step": 6594 }, { "epoch": 3.204741641337386, "grad_norm": 0.06931942506780431, "learning_rate": 9.416764659866301e-07, "loss": 0.4881, "step": 6595 }, { "epoch": 3.205227963525836, "grad_norm": 0.07138063141402301, "learning_rate": 9.405583588416545e-07, "loss": 0.5011, "step": 6596 }, { "epoch": 3.2057142857142855, "grad_norm": 0.07312667271218534, "learning_rate": 9.394408469676974e-07, "loss": 0.5232, "step": 6597 }, { "epoch": 3.2062006079027356, "grad_norm": 0.07307923454377412, "learning_rate": 9.383239305286302e-07, "loss": 0.5227, "step": 6598 }, { "epoch": 3.2066869300911853, "grad_norm": 0.0710776103712388, "learning_rate": 9.372076096882344e-07, "loss": 0.5221, "step": 6599 }, { "epoch": 3.2071732522796355, "grad_norm": 0.07509157716515864, "learning_rate": 9.360918846102057e-07, "loss": 0.5274, "step": 6600 }, { "epoch": 3.207659574468085, "grad_norm": 0.06899005816163993, "learning_rate": 9.34976755458154e-07, "loss": 0.4544, "step": 6601 }, { "epoch": 3.208145896656535, "grad_norm": 0.0708896240031363, "learning_rate": 9.338622223956006e-07, "loss": 0.5183, "step": 6602 }, { "epoch": 3.208632218844985, "grad_norm": 0.06917118232357904, "learning_rate": 9.327482855859776e-07, "loss": 0.4722, "step": 6603 }, { "epoch": 3.2091185410334346, "grad_norm": 0.07060890582885317, "learning_rate": 9.31634945192632e-07, "loss": 0.5056, "step": 6604 }, { "epoch": 3.2096048632218843, "grad_norm": 0.07285005633966471, "learning_rate": 9.305222013788223e-07, "loss": 0.535, "step": 6605 }, { "epoch": 3.2100911854103344, "grad_norm": 0.07419051531572524, "learning_rate": 9.294100543077201e-07, "loss": 0.5671, "step": 6606 }, { "epoch": 3.210577507598784, "grad_norm": 0.07082598871396202, "learning_rate": 9.282985041424086e-07, "loss": 0.4965, "step": 6607 }, { "epoch": 3.211063829787234, "grad_norm": 0.07188920613846218, "learning_rate": 9.271875510458845e-07, "loss": 0.5251, "step": 6608 }, { "epoch": 3.211550151975684, "grad_norm": 0.06979772983714502, "learning_rate": 9.26077195181056e-07, "loss": 0.4803, "step": 6609 }, { "epoch": 3.2120364741641336, "grad_norm": 0.0714712665373541, "learning_rate": 9.249674367107453e-07, "loss": 0.5041, "step": 6610 }, { "epoch": 3.2125227963525838, "grad_norm": 0.07338503338950801, "learning_rate": 9.238582757976839e-07, "loss": 0.5635, "step": 6611 }, { "epoch": 3.2130091185410334, "grad_norm": 0.06895964928351896, "learning_rate": 9.227497126045187e-07, "loss": 0.4963, "step": 6612 }, { "epoch": 3.213495440729483, "grad_norm": 0.07286438208122141, "learning_rate": 9.216417472938083e-07, "loss": 0.4933, "step": 6613 }, { "epoch": 3.2139817629179332, "grad_norm": 0.07292777456855977, "learning_rate": 9.20534380028022e-07, "loss": 0.5485, "step": 6614 }, { "epoch": 3.214468085106383, "grad_norm": 0.06952089188154065, "learning_rate": 9.194276109695443e-07, "loss": 0.4985, "step": 6615 }, { "epoch": 3.214954407294833, "grad_norm": 0.07202678853959618, "learning_rate": 9.183214402806689e-07, "loss": 0.5055, "step": 6616 }, { "epoch": 3.2154407294832827, "grad_norm": 0.07176545814135184, "learning_rate": 9.172158681236043e-07, "loss": 0.5412, "step": 6617 }, { "epoch": 3.2159270516717324, "grad_norm": 0.07091850643730259, "learning_rate": 9.161108946604674e-07, "loss": 0.5167, "step": 6618 }, { "epoch": 3.2164133738601826, "grad_norm": 0.07242664674208017, "learning_rate": 9.150065200532942e-07, "loss": 0.4939, "step": 6619 }, { "epoch": 3.2168996960486322, "grad_norm": 0.07159815503445581, "learning_rate": 9.139027444640264e-07, "loss": 0.5114, "step": 6620 }, { "epoch": 3.217386018237082, "grad_norm": 0.07181949478413042, "learning_rate": 9.127995680545204e-07, "loss": 0.5444, "step": 6621 }, { "epoch": 3.217872340425532, "grad_norm": 0.07241375375847589, "learning_rate": 9.116969909865448e-07, "loss": 0.5096, "step": 6622 }, { "epoch": 3.2183586626139817, "grad_norm": 0.07027026120224286, "learning_rate": 9.105950134217795e-07, "loss": 0.5005, "step": 6623 }, { "epoch": 3.2188449848024314, "grad_norm": 0.0705956631807819, "learning_rate": 9.09493635521817e-07, "loss": 0.5228, "step": 6624 }, { "epoch": 3.2193313069908815, "grad_norm": 0.07202787207458479, "learning_rate": 9.083928574481637e-07, "loss": 0.5103, "step": 6625 }, { "epoch": 3.2198176291793312, "grad_norm": 0.07143516564887394, "learning_rate": 9.072926793622333e-07, "loss": 0.5213, "step": 6626 }, { "epoch": 3.2203039513677814, "grad_norm": 0.07354103714973544, "learning_rate": 9.061931014253556e-07, "loss": 0.5354, "step": 6627 }, { "epoch": 3.220790273556231, "grad_norm": 0.07298242203084525, "learning_rate": 9.050941237987709e-07, "loss": 0.5837, "step": 6628 }, { "epoch": 3.2212765957446807, "grad_norm": 0.07068375541369609, "learning_rate": 9.039957466436328e-07, "loss": 0.4972, "step": 6629 }, { "epoch": 3.221762917933131, "grad_norm": 0.07128749994619307, "learning_rate": 9.02897970121005e-07, "loss": 0.4883, "step": 6630 }, { "epoch": 3.2222492401215805, "grad_norm": 0.07135012464820764, "learning_rate": 9.018007943918645e-07, "loss": 0.5203, "step": 6631 }, { "epoch": 3.22273556231003, "grad_norm": 0.0710371230491351, "learning_rate": 9.007042196170989e-07, "loss": 0.508, "step": 6632 }, { "epoch": 3.2232218844984803, "grad_norm": 0.07261249038470223, "learning_rate": 8.99608245957509e-07, "loss": 0.5074, "step": 6633 }, { "epoch": 3.22370820668693, "grad_norm": 0.0685338730412036, "learning_rate": 8.985128735738069e-07, "loss": 0.4747, "step": 6634 }, { "epoch": 3.2241945288753797, "grad_norm": 0.07353307586944226, "learning_rate": 8.974181026266165e-07, "loss": 0.4988, "step": 6635 }, { "epoch": 3.22468085106383, "grad_norm": 0.06928889568150072, "learning_rate": 8.963239332764718e-07, "loss": 0.4996, "step": 6636 }, { "epoch": 3.2251671732522795, "grad_norm": 0.07699051618424035, "learning_rate": 8.952303656838235e-07, "loss": 0.5494, "step": 6637 }, { "epoch": 3.2256534954407297, "grad_norm": 0.07118625366267374, "learning_rate": 8.941374000090297e-07, "loss": 0.5159, "step": 6638 }, { "epoch": 3.2261398176291793, "grad_norm": 0.07148611563861293, "learning_rate": 8.930450364123616e-07, "loss": 0.5383, "step": 6639 }, { "epoch": 3.226626139817629, "grad_norm": 0.0703852031534301, "learning_rate": 8.919532750540006e-07, "loss": 0.5076, "step": 6640 }, { "epoch": 3.227112462006079, "grad_norm": 0.07182033856521591, "learning_rate": 8.908621160940418e-07, "loss": 0.4783, "step": 6641 }, { "epoch": 3.227598784194529, "grad_norm": 0.07021041457609291, "learning_rate": 8.89771559692491e-07, "loss": 0.5109, "step": 6642 }, { "epoch": 3.2280851063829785, "grad_norm": 0.07201072462342475, "learning_rate": 8.886816060092663e-07, "loss": 0.513, "step": 6643 }, { "epoch": 3.2285714285714286, "grad_norm": 0.06837650667282466, "learning_rate": 8.875922552041971e-07, "loss": 0.4593, "step": 6644 }, { "epoch": 3.2290577507598783, "grad_norm": 0.07115129558286391, "learning_rate": 8.865035074370243e-07, "loss": 0.4773, "step": 6645 }, { "epoch": 3.2295440729483285, "grad_norm": 0.06993739552887139, "learning_rate": 8.854153628674e-07, "loss": 0.5203, "step": 6646 }, { "epoch": 3.230030395136778, "grad_norm": 0.07052411287303395, "learning_rate": 8.84327821654889e-07, "loss": 0.4717, "step": 6647 }, { "epoch": 3.230516717325228, "grad_norm": 0.07182067644441656, "learning_rate": 8.832408839589656e-07, "loss": 0.5043, "step": 6648 }, { "epoch": 3.231003039513678, "grad_norm": 0.07136228080162373, "learning_rate": 8.821545499390183e-07, "loss": 0.536, "step": 6649 }, { "epoch": 3.2314893617021276, "grad_norm": 0.07022756578578787, "learning_rate": 8.810688197543449e-07, "loss": 0.5142, "step": 6650 }, { "epoch": 3.2319756838905773, "grad_norm": 0.07025163375485158, "learning_rate": 8.799836935641559e-07, "loss": 0.5254, "step": 6651 }, { "epoch": 3.2324620060790275, "grad_norm": 0.07096770568648499, "learning_rate": 8.788991715275718e-07, "loss": 0.5187, "step": 6652 }, { "epoch": 3.232948328267477, "grad_norm": 0.07241760452396179, "learning_rate": 8.77815253803626e-07, "loss": 0.515, "step": 6653 }, { "epoch": 3.2334346504559273, "grad_norm": 0.07219397152470888, "learning_rate": 8.767319405512631e-07, "loss": 0.522, "step": 6654 }, { "epoch": 3.233920972644377, "grad_norm": 0.07157240958150009, "learning_rate": 8.756492319293381e-07, "loss": 0.5269, "step": 6655 }, { "epoch": 3.2344072948328266, "grad_norm": 0.06960412720172376, "learning_rate": 8.745671280966178e-07, "loss": 0.4881, "step": 6656 }, { "epoch": 3.2348936170212768, "grad_norm": 0.07017221096742848, "learning_rate": 8.73485629211781e-07, "loss": 0.4671, "step": 6657 }, { "epoch": 3.2353799392097264, "grad_norm": 0.07045450849325116, "learning_rate": 8.724047354334169e-07, "loss": 0.5082, "step": 6658 }, { "epoch": 3.235866261398176, "grad_norm": 0.07106206450127038, "learning_rate": 8.713244469200272e-07, "loss": 0.4787, "step": 6659 }, { "epoch": 3.2363525835866263, "grad_norm": 0.06890903072031643, "learning_rate": 8.702447638300221e-07, "loss": 0.4938, "step": 6660 }, { "epoch": 3.236838905775076, "grad_norm": 0.0704515274818252, "learning_rate": 8.691656863217263e-07, "loss": 0.4992, "step": 6661 }, { "epoch": 3.2373252279635256, "grad_norm": 0.07346651287263162, "learning_rate": 8.680872145533742e-07, "loss": 0.5394, "step": 6662 }, { "epoch": 3.2378115501519757, "grad_norm": 0.07127118651222632, "learning_rate": 8.670093486831105e-07, "loss": 0.4934, "step": 6663 }, { "epoch": 3.2382978723404254, "grad_norm": 0.07268534199866672, "learning_rate": 8.659320888689932e-07, "loss": 0.5511, "step": 6664 }, { "epoch": 3.2387841945288756, "grad_norm": 0.07068047988483725, "learning_rate": 8.648554352689892e-07, "loss": 0.5291, "step": 6665 }, { "epoch": 3.2392705167173252, "grad_norm": 0.07293907280902406, "learning_rate": 8.637793880409778e-07, "loss": 0.5238, "step": 6666 }, { "epoch": 3.239756838905775, "grad_norm": 0.07019097943184698, "learning_rate": 8.627039473427495e-07, "loss": 0.4628, "step": 6667 }, { "epoch": 3.240243161094225, "grad_norm": 0.06937920215943859, "learning_rate": 8.616291133320053e-07, "loss": 0.4743, "step": 6668 }, { "epoch": 3.2407294832826747, "grad_norm": 0.07198544230257038, "learning_rate": 8.605548861663571e-07, "loss": 0.5234, "step": 6669 }, { "epoch": 3.2412158054711244, "grad_norm": 0.07081791779059504, "learning_rate": 8.594812660033286e-07, "loss": 0.4964, "step": 6670 }, { "epoch": 3.2417021276595746, "grad_norm": 0.07122570568972306, "learning_rate": 8.584082530003535e-07, "loss": 0.5076, "step": 6671 }, { "epoch": 3.2421884498480242, "grad_norm": 0.07109610901748635, "learning_rate": 8.573358473147775e-07, "loss": 0.5098, "step": 6672 }, { "epoch": 3.2426747720364744, "grad_norm": 0.07052747008516987, "learning_rate": 8.56264049103856e-07, "loss": 0.5418, "step": 6673 }, { "epoch": 3.243161094224924, "grad_norm": 0.07097981311497588, "learning_rate": 8.551928585247565e-07, "loss": 0.4875, "step": 6674 }, { "epoch": 3.2436474164133737, "grad_norm": 0.07077987147567684, "learning_rate": 8.541222757345574e-07, "loss": 0.4915, "step": 6675 }, { "epoch": 3.244133738601824, "grad_norm": 0.07039884421441911, "learning_rate": 8.530523008902464e-07, "loss": 0.5176, "step": 6676 }, { "epoch": 3.2446200607902735, "grad_norm": 0.07089924937189282, "learning_rate": 8.51982934148724e-07, "loss": 0.4903, "step": 6677 }, { "epoch": 3.2451063829787232, "grad_norm": 0.07264228774406058, "learning_rate": 8.50914175666801e-07, "loss": 0.5673, "step": 6678 }, { "epoch": 3.2455927051671734, "grad_norm": 0.07108938563226617, "learning_rate": 8.498460256011976e-07, "loss": 0.499, "step": 6679 }, { "epoch": 3.246079027355623, "grad_norm": 0.06956281785561204, "learning_rate": 8.487784841085461e-07, "loss": 0.5014, "step": 6680 }, { "epoch": 3.246565349544073, "grad_norm": 0.07188876324488672, "learning_rate": 8.477115513453904e-07, "loss": 0.5286, "step": 6681 }, { "epoch": 3.247051671732523, "grad_norm": 0.07232988044828668, "learning_rate": 8.466452274681825e-07, "loss": 0.5353, "step": 6682 }, { "epoch": 3.247051671732523, "eval_loss": 0.5697982907295227, "eval_runtime": 105.1823, "eval_samples_per_second": 288.575, "eval_steps_per_second": 36.08, "step": 6682 }, { "epoch": 3.2475379939209725, "grad_norm": 0.07050943371541134, "learning_rate": 8.455795126332883e-07, "loss": 0.5272, "step": 6683 }, { "epoch": 3.2480243161094227, "grad_norm": 0.06906538143042283, "learning_rate": 8.445144069969813e-07, "loss": 0.4706, "step": 6684 }, { "epoch": 3.2485106382978723, "grad_norm": 0.06959869590037358, "learning_rate": 8.434499107154486e-07, "loss": 0.5078, "step": 6685 }, { "epoch": 3.248996960486322, "grad_norm": 0.06833386951533407, "learning_rate": 8.423860239447851e-07, "loss": 0.4784, "step": 6686 }, { "epoch": 3.249483282674772, "grad_norm": 0.06957653617225527, "learning_rate": 8.413227468410001e-07, "loss": 0.5003, "step": 6687 }, { "epoch": 3.249969604863222, "grad_norm": 0.07190594943453445, "learning_rate": 8.40260079560008e-07, "loss": 0.5167, "step": 6688 }, { "epoch": 3.2504559270516715, "grad_norm": 0.07120868946868153, "learning_rate": 8.39198022257638e-07, "loss": 0.5215, "step": 6689 }, { "epoch": 3.2509422492401217, "grad_norm": 0.07338432145314944, "learning_rate": 8.381365750896292e-07, "loss": 0.5233, "step": 6690 }, { "epoch": 3.2514285714285713, "grad_norm": 0.06982103780120874, "learning_rate": 8.37075738211629e-07, "loss": 0.4729, "step": 6691 }, { "epoch": 3.2519148936170215, "grad_norm": 0.07012801317526553, "learning_rate": 8.360155117792002e-07, "loss": 0.4961, "step": 6692 }, { "epoch": 3.252401215805471, "grad_norm": 0.07458212688986891, "learning_rate": 8.349558959478116e-07, "loss": 0.5428, "step": 6693 }, { "epoch": 3.252887537993921, "grad_norm": 0.07126116694589527, "learning_rate": 8.338968908728434e-07, "loss": 0.5119, "step": 6694 }, { "epoch": 3.253373860182371, "grad_norm": 0.07004356300358536, "learning_rate": 8.32838496709587e-07, "loss": 0.5136, "step": 6695 }, { "epoch": 3.2538601823708206, "grad_norm": 0.07200875228844517, "learning_rate": 8.317807136132439e-07, "loss": 0.5508, "step": 6696 }, { "epoch": 3.2543465045592703, "grad_norm": 0.06993379002200782, "learning_rate": 8.307235417389253e-07, "loss": 0.4862, "step": 6697 }, { "epoch": 3.2548328267477205, "grad_norm": 0.07357284131512169, "learning_rate": 8.296669812416546e-07, "loss": 0.5091, "step": 6698 }, { "epoch": 3.25531914893617, "grad_norm": 0.06981934265510063, "learning_rate": 8.286110322763635e-07, "loss": 0.4885, "step": 6699 }, { "epoch": 3.25580547112462, "grad_norm": 0.06853191052680513, "learning_rate": 8.275556949978958e-07, "loss": 0.4661, "step": 6700 }, { "epoch": 3.25629179331307, "grad_norm": 0.07160968738055536, "learning_rate": 8.265009695610038e-07, "loss": 0.5139, "step": 6701 }, { "epoch": 3.2567781155015196, "grad_norm": 0.06970425937040438, "learning_rate": 8.254468561203527e-07, "loss": 0.5184, "step": 6702 }, { "epoch": 3.2572644376899698, "grad_norm": 0.06988621532912487, "learning_rate": 8.243933548305133e-07, "loss": 0.5002, "step": 6703 }, { "epoch": 3.2577507598784194, "grad_norm": 0.07045706268925343, "learning_rate": 8.233404658459721e-07, "loss": 0.4973, "step": 6704 }, { "epoch": 3.258237082066869, "grad_norm": 0.07087213933843038, "learning_rate": 8.222881893211221e-07, "loss": 0.514, "step": 6705 }, { "epoch": 3.2587234042553193, "grad_norm": 0.07146602335709688, "learning_rate": 8.212365254102677e-07, "loss": 0.5292, "step": 6706 }, { "epoch": 3.259209726443769, "grad_norm": 0.07260648628114531, "learning_rate": 8.201854742676241e-07, "loss": 0.5212, "step": 6707 }, { "epoch": 3.259696048632219, "grad_norm": 0.07302207479121936, "learning_rate": 8.191350360473161e-07, "loss": 0.5495, "step": 6708 }, { "epoch": 3.2601823708206688, "grad_norm": 0.07044224517567534, "learning_rate": 8.180852109033766e-07, "loss": 0.529, "step": 6709 }, { "epoch": 3.2606686930091184, "grad_norm": 0.06983207440788895, "learning_rate": 8.17035998989753e-07, "loss": 0.4896, "step": 6710 }, { "epoch": 3.2611550151975686, "grad_norm": 0.07134130972511271, "learning_rate": 8.159874004603002e-07, "loss": 0.537, "step": 6711 }, { "epoch": 3.2616413373860182, "grad_norm": 0.0729634608031717, "learning_rate": 8.149394154687823e-07, "loss": 0.5375, "step": 6712 }, { "epoch": 3.262127659574468, "grad_norm": 0.0702147847350036, "learning_rate": 8.138920441688741e-07, "loss": 0.474, "step": 6713 }, { "epoch": 3.262613981762918, "grad_norm": 0.07555019420391294, "learning_rate": 8.128452867141618e-07, "loss": 0.5241, "step": 6714 }, { "epoch": 3.2631003039513677, "grad_norm": 0.07237785593610427, "learning_rate": 8.117991432581396e-07, "loss": 0.5357, "step": 6715 }, { "epoch": 3.2635866261398174, "grad_norm": 0.06965607494193911, "learning_rate": 8.107536139542132e-07, "loss": 0.5194, "step": 6716 }, { "epoch": 3.2640729483282676, "grad_norm": 0.07128960605514549, "learning_rate": 8.097086989556979e-07, "loss": 0.5139, "step": 6717 }, { "epoch": 3.2645592705167172, "grad_norm": 0.07038015499999553, "learning_rate": 8.086643984158177e-07, "loss": 0.5469, "step": 6718 }, { "epoch": 3.2650455927051674, "grad_norm": 0.0720534862310441, "learning_rate": 8.076207124877067e-07, "loss": 0.5016, "step": 6719 }, { "epoch": 3.265531914893617, "grad_norm": 0.07420249347257879, "learning_rate": 8.065776413244114e-07, "loss": 0.5269, "step": 6720 }, { "epoch": 3.2660182370820667, "grad_norm": 0.07270066923272671, "learning_rate": 8.05535185078885e-07, "loss": 0.5323, "step": 6721 }, { "epoch": 3.266504559270517, "grad_norm": 0.07420155667146282, "learning_rate": 8.044933439039926e-07, "loss": 0.5333, "step": 6722 }, { "epoch": 3.2669908814589665, "grad_norm": 0.06909639448934451, "learning_rate": 8.034521179525079e-07, "loss": 0.5057, "step": 6723 }, { "epoch": 3.2674772036474162, "grad_norm": 0.07106245916394575, "learning_rate": 8.024115073771154e-07, "loss": 0.5204, "step": 6724 }, { "epoch": 3.2679635258358664, "grad_norm": 0.06897116055661569, "learning_rate": 8.013715123304089e-07, "loss": 0.5234, "step": 6725 }, { "epoch": 3.268449848024316, "grad_norm": 0.07031939633422496, "learning_rate": 8.003321329648911e-07, "loss": 0.5019, "step": 6726 }, { "epoch": 3.2689361702127657, "grad_norm": 0.07288176899274526, "learning_rate": 7.992933694329747e-07, "loss": 0.5566, "step": 6727 }, { "epoch": 3.269422492401216, "grad_norm": 0.07443121690921613, "learning_rate": 7.982552218869843e-07, "loss": 0.5381, "step": 6728 }, { "epoch": 3.2699088145896655, "grad_norm": 0.06991596429238804, "learning_rate": 7.972176904791518e-07, "loss": 0.494, "step": 6729 }, { "epoch": 3.2703951367781157, "grad_norm": 0.07274997577093707, "learning_rate": 7.96180775361619e-07, "loss": 0.5255, "step": 6730 }, { "epoch": 3.2708814589665653, "grad_norm": 0.07267445539400239, "learning_rate": 7.951444766864397e-07, "loss": 0.5084, "step": 6731 }, { "epoch": 3.271367781155015, "grad_norm": 0.06945763463521465, "learning_rate": 7.94108794605572e-07, "loss": 0.4999, "step": 6732 }, { "epoch": 3.271854103343465, "grad_norm": 0.07044988524813009, "learning_rate": 7.930737292708889e-07, "loss": 0.4983, "step": 6733 }, { "epoch": 3.272340425531915, "grad_norm": 0.07277831958200251, "learning_rate": 7.920392808341704e-07, "loss": 0.5308, "step": 6734 }, { "epoch": 3.272826747720365, "grad_norm": 0.07243967490419853, "learning_rate": 7.910054494471064e-07, "loss": 0.5136, "step": 6735 }, { "epoch": 3.2733130699088147, "grad_norm": 0.07103700203490818, "learning_rate": 7.899722352612976e-07, "loss": 0.5209, "step": 6736 }, { "epoch": 3.2737993920972643, "grad_norm": 0.07173752856364275, "learning_rate": 7.889396384282522e-07, "loss": 0.4937, "step": 6737 }, { "epoch": 3.2742857142857145, "grad_norm": 0.07223241974934827, "learning_rate": 7.879076590993889e-07, "loss": 0.5064, "step": 6738 }, { "epoch": 3.274772036474164, "grad_norm": 0.07187526073243612, "learning_rate": 7.868762974260358e-07, "loss": 0.5364, "step": 6739 }, { "epoch": 3.275258358662614, "grad_norm": 0.07355205971680061, "learning_rate": 7.858455535594306e-07, "loss": 0.5042, "step": 6740 }, { "epoch": 3.275744680851064, "grad_norm": 0.07302633220465582, "learning_rate": 7.848154276507203e-07, "loss": 0.5266, "step": 6741 }, { "epoch": 3.2762310030395136, "grad_norm": 0.07185108454555761, "learning_rate": 7.837859198509612e-07, "loss": 0.5352, "step": 6742 }, { "epoch": 3.2767173252279633, "grad_norm": 0.06970707735731069, "learning_rate": 7.827570303111182e-07, "loss": 0.5098, "step": 6743 }, { "epoch": 3.2772036474164135, "grad_norm": 0.06983103064121515, "learning_rate": 7.817287591820666e-07, "loss": 0.4939, "step": 6744 }, { "epoch": 3.277689969604863, "grad_norm": 0.07063865905214017, "learning_rate": 7.807011066145897e-07, "loss": 0.5167, "step": 6745 }, { "epoch": 3.2781762917933133, "grad_norm": 0.07052672816456025, "learning_rate": 7.796740727593849e-07, "loss": 0.5206, "step": 6746 }, { "epoch": 3.278662613981763, "grad_norm": 0.0710467610618152, "learning_rate": 7.786476577670509e-07, "loss": 0.4939, "step": 6747 }, { "epoch": 3.2791489361702126, "grad_norm": 0.0724447999760171, "learning_rate": 7.776218617881016e-07, "loss": 0.5164, "step": 6748 }, { "epoch": 3.2796352583586628, "grad_norm": 0.07445658665825779, "learning_rate": 7.765966849729578e-07, "loss": 0.5472, "step": 6749 }, { "epoch": 3.2801215805471124, "grad_norm": 0.06952869171566352, "learning_rate": 7.755721274719502e-07, "loss": 0.4791, "step": 6750 }, { "epoch": 3.280607902735562, "grad_norm": 0.0712442624772608, "learning_rate": 7.745481894353186e-07, "loss": 0.4885, "step": 6751 }, { "epoch": 3.2810942249240123, "grad_norm": 0.06954541584441366, "learning_rate": 7.735248710132115e-07, "loss": 0.4988, "step": 6752 }, { "epoch": 3.281580547112462, "grad_norm": 0.07137156468614886, "learning_rate": 7.725021723556875e-07, "loss": 0.4884, "step": 6753 }, { "epoch": 3.2820668693009116, "grad_norm": 0.07121825188777446, "learning_rate": 7.714800936127137e-07, "loss": 0.5203, "step": 6754 }, { "epoch": 3.2825531914893618, "grad_norm": 0.07259237781028875, "learning_rate": 7.704586349341658e-07, "loss": 0.528, "step": 6755 }, { "epoch": 3.2830395136778114, "grad_norm": 0.07181558435771633, "learning_rate": 7.694377964698297e-07, "loss": 0.493, "step": 6756 }, { "epoch": 3.2835258358662616, "grad_norm": 0.06973374411306754, "learning_rate": 7.684175783693998e-07, "loss": 0.4952, "step": 6757 }, { "epoch": 3.2840121580547113, "grad_norm": 0.07179561183796015, "learning_rate": 7.673979807824788e-07, "loss": 0.5216, "step": 6758 }, { "epoch": 3.284498480243161, "grad_norm": 0.06984942550101632, "learning_rate": 7.663790038585794e-07, "loss": 0.4961, "step": 6759 }, { "epoch": 3.284984802431611, "grad_norm": 0.07264213214800169, "learning_rate": 7.653606477471237e-07, "loss": 0.4816, "step": 6760 }, { "epoch": 3.2854711246200607, "grad_norm": 0.07155529148916669, "learning_rate": 7.643429125974411e-07, "loss": 0.5261, "step": 6761 }, { "epoch": 3.285957446808511, "grad_norm": 0.0704857524744365, "learning_rate": 7.633257985587711e-07, "loss": 0.4903, "step": 6762 }, { "epoch": 3.2864437689969606, "grad_norm": 0.06888892461398638, "learning_rate": 7.623093057802622e-07, "loss": 0.4863, "step": 6763 }, { "epoch": 3.2869300911854102, "grad_norm": 0.06930161900158482, "learning_rate": 7.612934344109718e-07, "loss": 0.488, "step": 6764 }, { "epoch": 3.2874164133738604, "grad_norm": 0.06957023485748803, "learning_rate": 7.602781845998652e-07, "loss": 0.4884, "step": 6765 }, { "epoch": 3.28790273556231, "grad_norm": 0.07232513784655341, "learning_rate": 7.592635564958178e-07, "loss": 0.5248, "step": 6766 }, { "epoch": 3.2883890577507597, "grad_norm": 0.07253776676777018, "learning_rate": 7.582495502476134e-07, "loss": 0.54, "step": 6767 }, { "epoch": 3.28887537993921, "grad_norm": 0.07055165233972628, "learning_rate": 7.572361660039434e-07, "loss": 0.4807, "step": 6768 }, { "epoch": 3.2893617021276595, "grad_norm": 0.07430399192847663, "learning_rate": 7.562234039134103e-07, "loss": 0.5165, "step": 6769 }, { "epoch": 3.2898480243161092, "grad_norm": 0.07031466168770636, "learning_rate": 7.552112641245241e-07, "loss": 0.503, "step": 6770 }, { "epoch": 3.2903343465045594, "grad_norm": 0.07115246543581653, "learning_rate": 7.541997467857026e-07, "loss": 0.5163, "step": 6771 }, { "epoch": 3.290820668693009, "grad_norm": 0.07174269848071549, "learning_rate": 7.531888520452746e-07, "loss": 0.5042, "step": 6772 }, { "epoch": 3.291306990881459, "grad_norm": 0.07064398846981618, "learning_rate": 7.521785800514752e-07, "loss": 0.5128, "step": 6773 }, { "epoch": 3.291793313069909, "grad_norm": 0.07069848162737531, "learning_rate": 7.511689309524501e-07, "loss": 0.5075, "step": 6774 }, { "epoch": 3.2922796352583585, "grad_norm": 0.07167712052467, "learning_rate": 7.501599048962527e-07, "loss": 0.5276, "step": 6775 }, { "epoch": 3.2927659574468087, "grad_norm": 0.07100369101453206, "learning_rate": 7.491515020308448e-07, "loss": 0.5, "step": 6776 }, { "epoch": 3.2932522796352584, "grad_norm": 0.07238004051221887, "learning_rate": 7.481437225040978e-07, "loss": 0.5408, "step": 6777 }, { "epoch": 3.293738601823708, "grad_norm": 0.07119703583328676, "learning_rate": 7.471365664637903e-07, "loss": 0.5271, "step": 6778 }, { "epoch": 3.294224924012158, "grad_norm": 0.0733725231937714, "learning_rate": 7.461300340576128e-07, "loss": 0.5519, "step": 6779 }, { "epoch": 3.294711246200608, "grad_norm": 0.07132850809023118, "learning_rate": 7.451241254331582e-07, "loss": 0.5221, "step": 6780 }, { "epoch": 3.2951975683890575, "grad_norm": 0.06837321276176714, "learning_rate": 7.441188407379335e-07, "loss": 0.4874, "step": 6781 }, { "epoch": 3.2956838905775077, "grad_norm": 0.07240454055555363, "learning_rate": 7.431141801193509e-07, "loss": 0.5198, "step": 6782 }, { "epoch": 3.2961702127659573, "grad_norm": 0.07196043095564707, "learning_rate": 7.421101437247346e-07, "loss": 0.5332, "step": 6783 }, { "epoch": 3.2966565349544075, "grad_norm": 0.07009129817913741, "learning_rate": 7.411067317013148e-07, "loss": 0.4809, "step": 6784 }, { "epoch": 3.297142857142857, "grad_norm": 0.07073461793953208, "learning_rate": 7.401039441962293e-07, "loss": 0.5092, "step": 6785 }, { "epoch": 3.297629179331307, "grad_norm": 0.06732821441053059, "learning_rate": 7.39101781356526e-07, "loss": 0.4807, "step": 6786 }, { "epoch": 3.298115501519757, "grad_norm": 0.07038959061073322, "learning_rate": 7.381002433291612e-07, "loss": 0.5157, "step": 6787 }, { "epoch": 3.2986018237082066, "grad_norm": 0.07188046113026367, "learning_rate": 7.370993302609986e-07, "loss": 0.5143, "step": 6788 }, { "epoch": 3.2990881458966568, "grad_norm": 0.0715737240485823, "learning_rate": 7.360990422988101e-07, "loss": 0.5059, "step": 6789 }, { "epoch": 3.2995744680851065, "grad_norm": 0.07322841119648478, "learning_rate": 7.35099379589278e-07, "loss": 0.5228, "step": 6790 }, { "epoch": 3.300060790273556, "grad_norm": 0.07221420468839182, "learning_rate": 7.341003422789905e-07, "loss": 0.4891, "step": 6791 }, { "epoch": 3.3005471124620063, "grad_norm": 0.07151167475645931, "learning_rate": 7.331019305144455e-07, "loss": 0.5068, "step": 6792 }, { "epoch": 3.301033434650456, "grad_norm": 0.07080602881298616, "learning_rate": 7.321041444420479e-07, "loss": 0.479, "step": 6793 }, { "epoch": 3.3015197568389056, "grad_norm": 0.07479582354571418, "learning_rate": 7.311069842081142e-07, "loss": 0.5757, "step": 6794 }, { "epoch": 3.3020060790273558, "grad_norm": 0.0716130319413144, "learning_rate": 7.301104499588629e-07, "loss": 0.5307, "step": 6795 }, { "epoch": 3.3024924012158055, "grad_norm": 0.07347438582822559, "learning_rate": 7.291145418404272e-07, "loss": 0.525, "step": 6796 }, { "epoch": 3.302978723404255, "grad_norm": 0.07230789927798162, "learning_rate": 7.281192599988441e-07, "loss": 0.5216, "step": 6797 }, { "epoch": 3.3034650455927053, "grad_norm": 0.07220062514410652, "learning_rate": 7.271246045800612e-07, "loss": 0.5264, "step": 6798 }, { "epoch": 3.303951367781155, "grad_norm": 0.07206019733268727, "learning_rate": 7.261305757299336e-07, "loss": 0.5165, "step": 6799 }, { "epoch": 3.304437689969605, "grad_norm": 0.06917531688270125, "learning_rate": 7.251371735942231e-07, "loss": 0.5255, "step": 6800 }, { "epoch": 3.3049240121580548, "grad_norm": 0.0702745406461957, "learning_rate": 7.241443983186025e-07, "loss": 0.5088, "step": 6801 }, { "epoch": 3.3054103343465044, "grad_norm": 0.07011622230132807, "learning_rate": 7.231522500486504e-07, "loss": 0.5155, "step": 6802 }, { "epoch": 3.3058966565349546, "grad_norm": 0.07116590907538135, "learning_rate": 7.221607289298538e-07, "loss": 0.5107, "step": 6803 }, { "epoch": 3.3063829787234043, "grad_norm": 0.07221529877806088, "learning_rate": 7.211698351076085e-07, "loss": 0.5513, "step": 6804 }, { "epoch": 3.306869300911854, "grad_norm": 0.07107072814013866, "learning_rate": 7.201795687272178e-07, "loss": 0.5244, "step": 6805 }, { "epoch": 3.307355623100304, "grad_norm": 0.07233933774627571, "learning_rate": 7.191899299338923e-07, "loss": 0.4831, "step": 6806 }, { "epoch": 3.3078419452887537, "grad_norm": 0.07173336054889858, "learning_rate": 7.182009188727524e-07, "loss": 0.4924, "step": 6807 }, { "epoch": 3.3083282674772034, "grad_norm": 0.07273530116143037, "learning_rate": 7.172125356888237e-07, "loss": 0.5248, "step": 6808 }, { "epoch": 3.3088145896656536, "grad_norm": 0.07074318123582363, "learning_rate": 7.162247805270445e-07, "loss": 0.5402, "step": 6809 }, { "epoch": 3.3093009118541032, "grad_norm": 0.07047086884404011, "learning_rate": 7.152376535322542e-07, "loss": 0.5084, "step": 6810 }, { "epoch": 3.3097872340425534, "grad_norm": 0.07055593819345828, "learning_rate": 7.142511548492054e-07, "loss": 0.5086, "step": 6811 }, { "epoch": 3.310273556231003, "grad_norm": 0.07049889519653926, "learning_rate": 7.132652846225563e-07, "loss": 0.52, "step": 6812 }, { "epoch": 3.3107598784194527, "grad_norm": 0.07112052853074773, "learning_rate": 7.122800429968746e-07, "loss": 0.4907, "step": 6813 }, { "epoch": 3.311246200607903, "grad_norm": 0.07011189736779255, "learning_rate": 7.112954301166341e-07, "loss": 0.518, "step": 6814 }, { "epoch": 3.3117325227963526, "grad_norm": 0.06903750164058604, "learning_rate": 7.103114461262179e-07, "loss": 0.4889, "step": 6815 }, { "epoch": 3.3122188449848027, "grad_norm": 0.06806022432356529, "learning_rate": 7.093280911699147e-07, "loss": 0.4577, "step": 6816 }, { "epoch": 3.3127051671732524, "grad_norm": 0.07044795974662547, "learning_rate": 7.083453653919237e-07, "loss": 0.5079, "step": 6817 }, { "epoch": 3.313191489361702, "grad_norm": 0.07063160311300756, "learning_rate": 7.073632689363485e-07, "loss": 0.5069, "step": 6818 }, { "epoch": 3.3136778115501517, "grad_norm": 0.0695024603128343, "learning_rate": 7.063818019472046e-07, "loss": 0.4834, "step": 6819 }, { "epoch": 3.314164133738602, "grad_norm": 0.07205120122841974, "learning_rate": 7.054009645684128e-07, "loss": 0.5065, "step": 6820 }, { "epoch": 3.3146504559270515, "grad_norm": 0.07092022520080976, "learning_rate": 7.044207569438011e-07, "loss": 0.5137, "step": 6821 }, { "epoch": 3.3151367781155017, "grad_norm": 0.0684122819517937, "learning_rate": 7.034411792171053e-07, "loss": 0.4988, "step": 6822 }, { "epoch": 3.3156231003039514, "grad_norm": 0.07116750965108525, "learning_rate": 7.024622315319713e-07, "loss": 0.5461, "step": 6823 }, { "epoch": 3.316109422492401, "grad_norm": 0.0721253908927146, "learning_rate": 7.014839140319485e-07, "loss": 0.5113, "step": 6824 }, { "epoch": 3.316595744680851, "grad_norm": 0.07034657572765021, "learning_rate": 7.005062268604962e-07, "loss": 0.4876, "step": 6825 }, { "epoch": 3.317082066869301, "grad_norm": 0.07213556525075031, "learning_rate": 6.995291701609824e-07, "loss": 0.5047, "step": 6826 }, { "epoch": 3.317568389057751, "grad_norm": 0.07033754126346546, "learning_rate": 6.985527440766804e-07, "loss": 0.5073, "step": 6827 }, { "epoch": 3.3180547112462007, "grad_norm": 0.07055221284024986, "learning_rate": 6.975769487507722e-07, "loss": 0.5127, "step": 6828 }, { "epoch": 3.3185410334346503, "grad_norm": 0.07329364576154067, "learning_rate": 6.966017843263473e-07, "loss": 0.479, "step": 6829 }, { "epoch": 3.3190273556231005, "grad_norm": 0.07287549121511681, "learning_rate": 6.956272509464024e-07, "loss": 0.4868, "step": 6830 }, { "epoch": 3.31951367781155, "grad_norm": 0.07079689226198173, "learning_rate": 6.946533487538415e-07, "loss": 0.4973, "step": 6831 }, { "epoch": 3.32, "grad_norm": 0.07105703135005628, "learning_rate": 6.93680077891477e-07, "loss": 0.5193, "step": 6832 }, { "epoch": 3.32048632218845, "grad_norm": 0.07442941352069911, "learning_rate": 6.927074385020271e-07, "loss": 0.5471, "step": 6833 }, { "epoch": 3.3209726443768997, "grad_norm": 0.07201007580230237, "learning_rate": 6.917354307281193e-07, "loss": 0.5261, "step": 6834 }, { "epoch": 3.3214589665653493, "grad_norm": 0.07306767546324147, "learning_rate": 6.907640547122868e-07, "loss": 0.5106, "step": 6835 }, { "epoch": 3.3219452887537995, "grad_norm": 0.07297965829683148, "learning_rate": 6.897933105969701e-07, "loss": 0.5401, "step": 6836 }, { "epoch": 3.322431610942249, "grad_norm": 0.0703237212977566, "learning_rate": 6.888231985245197e-07, "loss": 0.5119, "step": 6837 }, { "epoch": 3.3229179331306993, "grad_norm": 0.06833607035733347, "learning_rate": 6.878537186371914e-07, "loss": 0.4886, "step": 6838 }, { "epoch": 3.323404255319149, "grad_norm": 0.06941673468252307, "learning_rate": 6.868848710771469e-07, "loss": 0.5058, "step": 6839 }, { "epoch": 3.3238905775075986, "grad_norm": 0.06976322127009543, "learning_rate": 6.859166559864571e-07, "loss": 0.516, "step": 6840 }, { "epoch": 3.3243768996960488, "grad_norm": 0.07247747356576587, "learning_rate": 6.849490735071008e-07, "loss": 0.5437, "step": 6841 }, { "epoch": 3.3248632218844985, "grad_norm": 0.07020835253166453, "learning_rate": 6.839821237809613e-07, "loss": 0.5111, "step": 6842 }, { "epoch": 3.325349544072948, "grad_norm": 0.0727483312302522, "learning_rate": 6.830158069498322e-07, "loss": 0.538, "step": 6843 }, { "epoch": 3.3258358662613983, "grad_norm": 0.07098719836907831, "learning_rate": 6.820501231554121e-07, "loss": 0.5023, "step": 6844 }, { "epoch": 3.326322188449848, "grad_norm": 0.07215865030941816, "learning_rate": 6.810850725393081e-07, "loss": 0.5043, "step": 6845 }, { "epoch": 3.3268085106382976, "grad_norm": 0.07129774083883823, "learning_rate": 6.801206552430334e-07, "loss": 0.4993, "step": 6846 }, { "epoch": 3.3272948328267478, "grad_norm": 0.07098179715158444, "learning_rate": 6.791568714080093e-07, "loss": 0.5025, "step": 6847 }, { "epoch": 3.3277811550151974, "grad_norm": 0.07039916869682948, "learning_rate": 6.78193721175564e-07, "loss": 0.4878, "step": 6848 }, { "epoch": 3.3282674772036476, "grad_norm": 0.07004807825599091, "learning_rate": 6.772312046869317e-07, "loss": 0.5012, "step": 6849 }, { "epoch": 3.3287537993920973, "grad_norm": 0.07385155510415911, "learning_rate": 6.762693220832551e-07, "loss": 0.5397, "step": 6850 }, { "epoch": 3.329240121580547, "grad_norm": 0.06900376933849205, "learning_rate": 6.753080735055828e-07, "loss": 0.524, "step": 6851 }, { "epoch": 3.329726443768997, "grad_norm": 0.07223504908694708, "learning_rate": 6.743474590948718e-07, "loss": 0.5044, "step": 6852 }, { "epoch": 3.3302127659574468, "grad_norm": 0.0725330101992085, "learning_rate": 6.733874789919847e-07, "loss": 0.5302, "step": 6853 }, { "epoch": 3.330699088145897, "grad_norm": 0.07151248169613576, "learning_rate": 6.724281333376919e-07, "loss": 0.4921, "step": 6854 }, { "epoch": 3.3311854103343466, "grad_norm": 0.07296942855063826, "learning_rate": 6.714694222726703e-07, "loss": 0.526, "step": 6855 }, { "epoch": 3.3316717325227962, "grad_norm": 0.06901844245211027, "learning_rate": 6.705113459375046e-07, "loss": 0.4962, "step": 6856 }, { "epoch": 3.3321580547112464, "grad_norm": 0.07019486686507174, "learning_rate": 6.695539044726851e-07, "loss": 0.4893, "step": 6857 }, { "epoch": 3.332644376899696, "grad_norm": 0.06924531756890973, "learning_rate": 6.685970980186107e-07, "loss": 0.4823, "step": 6858 }, { "epoch": 3.3331306990881457, "grad_norm": 0.07405629083660108, "learning_rate": 6.676409267155847e-07, "loss": 0.5141, "step": 6859 }, { "epoch": 3.333617021276596, "grad_norm": 0.07135188527271055, "learning_rate": 6.666853907038201e-07, "loss": 0.5186, "step": 6860 }, { "epoch": 3.3341033434650456, "grad_norm": 0.07553585945008054, "learning_rate": 6.657304901234346e-07, "loss": 0.5827, "step": 6861 }, { "epoch": 3.3345896656534952, "grad_norm": 0.07176905115831651, "learning_rate": 6.647762251144541e-07, "loss": 0.5327, "step": 6862 }, { "epoch": 3.3350759878419454, "grad_norm": 0.07161461562606672, "learning_rate": 6.638225958168104e-07, "loss": 0.501, "step": 6863 }, { "epoch": 3.335562310030395, "grad_norm": 0.0679051608513659, "learning_rate": 6.628696023703424e-07, "loss": 0.471, "step": 6864 }, { "epoch": 3.336048632218845, "grad_norm": 0.07047009248565354, "learning_rate": 6.619172449147953e-07, "loss": 0.5208, "step": 6865 }, { "epoch": 3.336534954407295, "grad_norm": 0.07325616084016405, "learning_rate": 6.609655235898227e-07, "loss": 0.5145, "step": 6866 }, { "epoch": 3.3370212765957445, "grad_norm": 0.07306992308508012, "learning_rate": 6.600144385349833e-07, "loss": 0.5265, "step": 6867 }, { "epoch": 3.3375075987841947, "grad_norm": 0.06890257583881561, "learning_rate": 6.590639898897421e-07, "loss": 0.5007, "step": 6868 }, { "epoch": 3.3379939209726444, "grad_norm": 0.07052569019779599, "learning_rate": 6.581141777934724e-07, "loss": 0.5223, "step": 6869 }, { "epoch": 3.338480243161094, "grad_norm": 0.07156611360513149, "learning_rate": 6.571650023854531e-07, "loss": 0.5173, "step": 6870 }, { "epoch": 3.338966565349544, "grad_norm": 0.07390972790522173, "learning_rate": 6.562164638048712e-07, "loss": 0.5271, "step": 6871 }, { "epoch": 3.339452887537994, "grad_norm": 0.07022824836649157, "learning_rate": 6.552685621908155e-07, "loss": 0.4915, "step": 6872 }, { "epoch": 3.3399392097264435, "grad_norm": 0.06812410937204921, "learning_rate": 6.543212976822894e-07, "loss": 0.4856, "step": 6873 }, { "epoch": 3.3404255319148937, "grad_norm": 0.06763439434837915, "learning_rate": 6.533746704181959e-07, "loss": 0.4865, "step": 6874 }, { "epoch": 3.3409118541033433, "grad_norm": 0.06985526175797392, "learning_rate": 6.524286805373475e-07, "loss": 0.5106, "step": 6875 }, { "epoch": 3.3413981762917935, "grad_norm": 0.07301500226376727, "learning_rate": 6.514833281784638e-07, "loss": 0.5249, "step": 6876 }, { "epoch": 3.341884498480243, "grad_norm": 0.07312317989562328, "learning_rate": 6.505386134801688e-07, "loss": 0.4944, "step": 6877 }, { "epoch": 3.342370820668693, "grad_norm": 0.06991911870995818, "learning_rate": 6.495945365809947e-07, "loss": 0.4948, "step": 6878 }, { "epoch": 3.342857142857143, "grad_norm": 0.07354675339985596, "learning_rate": 6.486510976193799e-07, "loss": 0.5039, "step": 6879 }, { "epoch": 3.3433434650455927, "grad_norm": 0.07229525841049106, "learning_rate": 6.47708296733669e-07, "loss": 0.5039, "step": 6880 }, { "epoch": 3.343829787234043, "grad_norm": 0.07199884472693327, "learning_rate": 6.467661340621129e-07, "loss": 0.5033, "step": 6881 }, { "epoch": 3.3443161094224925, "grad_norm": 0.07170618104931153, "learning_rate": 6.458246097428689e-07, "loss": 0.5162, "step": 6882 }, { "epoch": 3.344802431610942, "grad_norm": 0.07172488898901912, "learning_rate": 6.448837239140004e-07, "loss": 0.5381, "step": 6883 }, { "epoch": 3.3452887537993923, "grad_norm": 0.07340970993169409, "learning_rate": 6.439434767134789e-07, "loss": 0.4978, "step": 6884 }, { "epoch": 3.345775075987842, "grad_norm": 0.07489428138614118, "learning_rate": 6.430038682791795e-07, "loss": 0.536, "step": 6885 }, { "epoch": 3.3462613981762916, "grad_norm": 0.06890991108619181, "learning_rate": 6.420648987488876e-07, "loss": 0.469, "step": 6886 }, { "epoch": 3.3467477203647418, "grad_norm": 0.07159045268877802, "learning_rate": 6.411265682602891e-07, "loss": 0.5299, "step": 6887 }, { "epoch": 3.3472340425531915, "grad_norm": 0.06987965818335329, "learning_rate": 6.401888769509812e-07, "loss": 0.4956, "step": 6888 }, { "epoch": 3.347720364741641, "grad_norm": 0.07122874921424016, "learning_rate": 6.392518249584656e-07, "loss": 0.4741, "step": 6889 }, { "epoch": 3.3482066869300913, "grad_norm": 0.07095585158552611, "learning_rate": 6.383154124201496e-07, "loss": 0.5256, "step": 6890 }, { "epoch": 3.348693009118541, "grad_norm": 0.07465843087894065, "learning_rate": 6.373796394733489e-07, "loss": 0.5357, "step": 6891 }, { "epoch": 3.349179331306991, "grad_norm": 0.07424499235580542, "learning_rate": 6.364445062552832e-07, "loss": 0.5394, "step": 6892 }, { "epoch": 3.3496656534954408, "grad_norm": 0.07305714454200418, "learning_rate": 6.355100129030794e-07, "loss": 0.5015, "step": 6893 }, { "epoch": 3.3501519756838904, "grad_norm": 0.07193012438733509, "learning_rate": 6.345761595537698e-07, "loss": 0.5187, "step": 6894 }, { "epoch": 3.3506382978723406, "grad_norm": 0.0696762064376855, "learning_rate": 6.336429463442939e-07, "loss": 0.4847, "step": 6895 }, { "epoch": 3.3511246200607903, "grad_norm": 0.06980664385411242, "learning_rate": 6.327103734114965e-07, "loss": 0.5123, "step": 6896 }, { "epoch": 3.35161094224924, "grad_norm": 0.06987357360157116, "learning_rate": 6.31778440892129e-07, "loss": 0.4915, "step": 6897 }, { "epoch": 3.35209726443769, "grad_norm": 0.07077963651500906, "learning_rate": 6.308471489228491e-07, "loss": 0.5145, "step": 6898 }, { "epoch": 3.3525835866261398, "grad_norm": 0.06990134387597335, "learning_rate": 6.299164976402195e-07, "loss": 0.5102, "step": 6899 }, { "epoch": 3.3530699088145894, "grad_norm": 0.07007511425795404, "learning_rate": 6.2898648718071e-07, "loss": 0.4967, "step": 6900 }, { "epoch": 3.3535562310030396, "grad_norm": 0.07039322230102411, "learning_rate": 6.280571176806971e-07, "loss": 0.5179, "step": 6901 }, { "epoch": 3.3540425531914893, "grad_norm": 0.07088766077186194, "learning_rate": 6.271283892764602e-07, "loss": 0.5125, "step": 6902 }, { "epoch": 3.3545288753799394, "grad_norm": 0.07012555991426875, "learning_rate": 6.262003021041873e-07, "loss": 0.5011, "step": 6903 }, { "epoch": 3.355015197568389, "grad_norm": 0.07259479134508723, "learning_rate": 6.252728562999727e-07, "loss": 0.5299, "step": 6904 }, { "epoch": 3.3555015197568387, "grad_norm": 0.07233588008061326, "learning_rate": 6.243460519998156e-07, "loss": 0.5288, "step": 6905 }, { "epoch": 3.355987841945289, "grad_norm": 0.07197398806366222, "learning_rate": 6.234198893396209e-07, "loss": 0.5156, "step": 6906 }, { "epoch": 3.3564741641337386, "grad_norm": 0.07352762733286107, "learning_rate": 6.224943684551998e-07, "loss": 0.5266, "step": 6907 }, { "epoch": 3.3569604863221887, "grad_norm": 0.07493186335683674, "learning_rate": 6.215694894822699e-07, "loss": 0.523, "step": 6908 }, { "epoch": 3.3574468085106384, "grad_norm": 0.07445561573799665, "learning_rate": 6.206452525564533e-07, "loss": 0.4994, "step": 6909 }, { "epoch": 3.357933130699088, "grad_norm": 0.07052407039126818, "learning_rate": 6.197216578132803e-07, "loss": 0.498, "step": 6910 }, { "epoch": 3.358419452887538, "grad_norm": 0.07012363060295893, "learning_rate": 6.187987053881845e-07, "loss": 0.4722, "step": 6911 }, { "epoch": 3.358905775075988, "grad_norm": 0.07261266834039559, "learning_rate": 6.178763954165068e-07, "loss": 0.5213, "step": 6912 }, { "epoch": 3.3593920972644375, "grad_norm": 0.07232269041700885, "learning_rate": 6.169547280334937e-07, "loss": 0.5275, "step": 6913 }, { "epoch": 3.3598784194528877, "grad_norm": 0.07164236571973215, "learning_rate": 6.16033703374297e-07, "loss": 0.5135, "step": 6914 }, { "epoch": 3.3603647416413374, "grad_norm": 0.07025702522424446, "learning_rate": 6.151133215739752e-07, "loss": 0.5064, "step": 6915 }, { "epoch": 3.360851063829787, "grad_norm": 0.0749076829503178, "learning_rate": 6.141935827674905e-07, "loss": 0.5305, "step": 6916 }, { "epoch": 3.361337386018237, "grad_norm": 0.07331282874334093, "learning_rate": 6.132744870897122e-07, "loss": 0.546, "step": 6917 }, { "epoch": 3.361823708206687, "grad_norm": 0.07315131009487047, "learning_rate": 6.123560346754165e-07, "loss": 0.5186, "step": 6918 }, { "epoch": 3.362310030395137, "grad_norm": 0.07163492403134779, "learning_rate": 6.114382256592826e-07, "loss": 0.4884, "step": 6919 }, { "epoch": 3.3627963525835867, "grad_norm": 0.07188716509215164, "learning_rate": 6.105210601758982e-07, "loss": 0.5243, "step": 6920 }, { "epoch": 3.3632826747720364, "grad_norm": 0.07093292399676515, "learning_rate": 6.096045383597537e-07, "loss": 0.4748, "step": 6921 }, { "epoch": 3.3637689969604865, "grad_norm": 0.07095474269585418, "learning_rate": 6.08688660345248e-07, "loss": 0.5043, "step": 6922 }, { "epoch": 3.364255319148936, "grad_norm": 0.07485676589207554, "learning_rate": 6.077734262666834e-07, "loss": 0.5328, "step": 6923 }, { "epoch": 3.364741641337386, "grad_norm": 0.06989839092264896, "learning_rate": 6.06858836258269e-07, "loss": 0.4727, "step": 6924 }, { "epoch": 3.365227963525836, "grad_norm": 0.07345294628496163, "learning_rate": 6.059448904541182e-07, "loss": 0.5133, "step": 6925 }, { "epoch": 3.3657142857142857, "grad_norm": 0.07248750660668703, "learning_rate": 6.050315889882519e-07, "loss": 0.509, "step": 6926 }, { "epoch": 3.3662006079027353, "grad_norm": 0.07255466582114804, "learning_rate": 6.04118931994594e-07, "loss": 0.4967, "step": 6927 }, { "epoch": 3.3666869300911855, "grad_norm": 0.06982519439786458, "learning_rate": 6.032069196069773e-07, "loss": 0.5053, "step": 6928 }, { "epoch": 3.367173252279635, "grad_norm": 0.07180227655168682, "learning_rate": 6.022955519591367e-07, "loss": 0.4845, "step": 6929 }, { "epoch": 3.3676595744680853, "grad_norm": 0.06945925797893547, "learning_rate": 6.013848291847152e-07, "loss": 0.505, "step": 6930 }, { "epoch": 3.368145896656535, "grad_norm": 0.07040127217085423, "learning_rate": 6.004747514172576e-07, "loss": 0.5031, "step": 6931 }, { "epoch": 3.3686322188449846, "grad_norm": 0.0716278952251427, "learning_rate": 5.995653187902178e-07, "loss": 0.5159, "step": 6932 }, { "epoch": 3.3691185410334348, "grad_norm": 0.07078209420303555, "learning_rate": 5.986565314369541e-07, "loss": 0.5199, "step": 6933 }, { "epoch": 3.3696048632218845, "grad_norm": 0.07047849202980369, "learning_rate": 5.977483894907294e-07, "loss": 0.5216, "step": 6934 }, { "epoch": 3.3700911854103346, "grad_norm": 0.07026331066333459, "learning_rate": 5.968408930847125e-07, "loss": 0.4904, "step": 6935 }, { "epoch": 3.3705775075987843, "grad_norm": 0.07584883007700144, "learning_rate": 5.959340423519777e-07, "loss": 0.5275, "step": 6936 }, { "epoch": 3.371063829787234, "grad_norm": 0.07145854236577773, "learning_rate": 5.950278374255036e-07, "loss": 0.5122, "step": 6937 }, { "epoch": 3.371550151975684, "grad_norm": 0.06994631048614783, "learning_rate": 5.941222784381756e-07, "loss": 0.4609, "step": 6938 }, { "epoch": 3.3720364741641338, "grad_norm": 0.07289834743011642, "learning_rate": 5.932173655227835e-07, "loss": 0.5201, "step": 6939 }, { "epoch": 3.3725227963525835, "grad_norm": 0.07009972446600306, "learning_rate": 5.923130988120223e-07, "loss": 0.5027, "step": 6940 }, { "epoch": 3.3730091185410336, "grad_norm": 0.07182732171826438, "learning_rate": 5.914094784384927e-07, "loss": 0.5036, "step": 6941 }, { "epoch": 3.3734954407294833, "grad_norm": 0.06978282640318427, "learning_rate": 5.905065045347002e-07, "loss": 0.4857, "step": 6942 }, { "epoch": 3.373981762917933, "grad_norm": 0.07292716115234207, "learning_rate": 5.896041772330558e-07, "loss": 0.5062, "step": 6943 }, { "epoch": 3.374468085106383, "grad_norm": 0.0694177295780197, "learning_rate": 5.88702496665875e-07, "loss": 0.5034, "step": 6944 }, { "epoch": 3.3749544072948328, "grad_norm": 0.07184645671495853, "learning_rate": 5.8780146296538e-07, "loss": 0.5274, "step": 6945 }, { "epoch": 3.375440729483283, "grad_norm": 0.07204979439826342, "learning_rate": 5.869010762636962e-07, "loss": 0.502, "step": 6946 }, { "epoch": 3.3759270516717326, "grad_norm": 0.07137765799661595, "learning_rate": 5.860013366928558e-07, "loss": 0.4835, "step": 6947 }, { "epoch": 3.3764133738601823, "grad_norm": 0.07113765494398433, "learning_rate": 5.851022443847948e-07, "loss": 0.4734, "step": 6948 }, { "epoch": 3.3768996960486324, "grad_norm": 0.07076144058410078, "learning_rate": 5.842037994713551e-07, "loss": 0.5181, "step": 6949 }, { "epoch": 3.377386018237082, "grad_norm": 0.07213018338053362, "learning_rate": 5.83306002084284e-07, "loss": 0.4971, "step": 6950 }, { "epoch": 3.3778723404255317, "grad_norm": 0.06849450134607875, "learning_rate": 5.824088523552323e-07, "loss": 0.4902, "step": 6951 }, { "epoch": 3.378358662613982, "grad_norm": 0.07235153708470951, "learning_rate": 5.815123504157577e-07, "loss": 0.5262, "step": 6952 }, { "epoch": 3.3788449848024316, "grad_norm": 0.06997504362258379, "learning_rate": 5.806164963973216e-07, "loss": 0.5187, "step": 6953 }, { "epoch": 3.3793313069908812, "grad_norm": 0.07116715910091094, "learning_rate": 5.79721290431291e-07, "loss": 0.5121, "step": 6954 }, { "epoch": 3.3798176291793314, "grad_norm": 0.0693753037815743, "learning_rate": 5.788267326489372e-07, "loss": 0.4834, "step": 6955 }, { "epoch": 3.380303951367781, "grad_norm": 0.07463094711201416, "learning_rate": 5.779328231814374e-07, "loss": 0.5291, "step": 6956 }, { "epoch": 3.380790273556231, "grad_norm": 0.07289396407672954, "learning_rate": 5.770395621598734e-07, "loss": 0.526, "step": 6957 }, { "epoch": 3.381276595744681, "grad_norm": 0.07195755756508643, "learning_rate": 5.761469497152317e-07, "loss": 0.5137, "step": 6958 }, { "epoch": 3.3817629179331306, "grad_norm": 0.071978917504858, "learning_rate": 5.752549859784034e-07, "loss": 0.5335, "step": 6959 }, { "epoch": 3.3822492401215807, "grad_norm": 0.0703328971028879, "learning_rate": 5.743636710801848e-07, "loss": 0.5032, "step": 6960 }, { "epoch": 3.3827355623100304, "grad_norm": 0.06820713659481933, "learning_rate": 5.734730051512777e-07, "loss": 0.4686, "step": 6961 }, { "epoch": 3.3832218844984805, "grad_norm": 0.06944420384106066, "learning_rate": 5.725829883222877e-07, "loss": 0.4972, "step": 6962 }, { "epoch": 3.38370820668693, "grad_norm": 0.0707031295099363, "learning_rate": 5.716936207237261e-07, "loss": 0.5148, "step": 6963 }, { "epoch": 3.38419452887538, "grad_norm": 0.0699244552159447, "learning_rate": 5.708049024860085e-07, "loss": 0.4831, "step": 6964 }, { "epoch": 3.3846808510638295, "grad_norm": 0.0723919186975563, "learning_rate": 5.699168337394545e-07, "loss": 0.5136, "step": 6965 }, { "epoch": 3.3851671732522797, "grad_norm": 0.07104080541784422, "learning_rate": 5.690294146142899e-07, "loss": 0.4973, "step": 6966 }, { "epoch": 3.3856534954407294, "grad_norm": 0.07396129154565106, "learning_rate": 5.681426452406453e-07, "loss": 0.534, "step": 6967 }, { "epoch": 3.3861398176291795, "grad_norm": 0.07233664955810466, "learning_rate": 5.67256525748554e-07, "loss": 0.4943, "step": 6968 }, { "epoch": 3.386626139817629, "grad_norm": 0.0709888910446805, "learning_rate": 5.663710562679564e-07, "loss": 0.5103, "step": 6969 }, { "epoch": 3.387112462006079, "grad_norm": 0.07344730493274089, "learning_rate": 5.654862369286962e-07, "loss": 0.5157, "step": 6970 }, { "epoch": 3.387598784194529, "grad_norm": 0.07036358846668035, "learning_rate": 5.646020678605219e-07, "loss": 0.5054, "step": 6971 }, { "epoch": 3.3880851063829787, "grad_norm": 0.07557695558944763, "learning_rate": 5.637185491930875e-07, "loss": 0.5619, "step": 6972 }, { "epoch": 3.388571428571429, "grad_norm": 0.07301760348337281, "learning_rate": 5.628356810559499e-07, "loss": 0.5083, "step": 6973 }, { "epoch": 3.3890577507598785, "grad_norm": 0.06970113006259425, "learning_rate": 5.619534635785729e-07, "loss": 0.4926, "step": 6974 }, { "epoch": 3.389544072948328, "grad_norm": 0.07082295446846913, "learning_rate": 5.610718968903228e-07, "loss": 0.5258, "step": 6975 }, { "epoch": 3.3900303951367783, "grad_norm": 0.07610425384451482, "learning_rate": 5.60190981120472e-07, "loss": 0.5703, "step": 6976 }, { "epoch": 3.390516717325228, "grad_norm": 0.06964598441979405, "learning_rate": 5.593107163981959e-07, "loss": 0.509, "step": 6977 }, { "epoch": 3.3910030395136777, "grad_norm": 0.07538918402249042, "learning_rate": 5.584311028525774e-07, "loss": 0.5478, "step": 6978 }, { "epoch": 3.391489361702128, "grad_norm": 0.07284115855131425, "learning_rate": 5.575521406125989e-07, "loss": 0.5361, "step": 6979 }, { "epoch": 3.3919756838905775, "grad_norm": 0.07071726931141209, "learning_rate": 5.566738298071522e-07, "loss": 0.4765, "step": 6980 }, { "epoch": 3.392462006079027, "grad_norm": 0.0725478087966237, "learning_rate": 5.557961705650294e-07, "loss": 0.5194, "step": 6981 }, { "epoch": 3.3929483282674773, "grad_norm": 0.07129352237661303, "learning_rate": 5.549191630149326e-07, "loss": 0.4878, "step": 6982 }, { "epoch": 3.393434650455927, "grad_norm": 0.07253398573983671, "learning_rate": 5.540428072854626e-07, "loss": 0.5306, "step": 6983 }, { "epoch": 3.393920972644377, "grad_norm": 0.07224592433579474, "learning_rate": 5.531671035051278e-07, "loss": 0.5263, "step": 6984 }, { "epoch": 3.3944072948328268, "grad_norm": 0.07048070394112615, "learning_rate": 5.522920518023406e-07, "loss": 0.5177, "step": 6985 }, { "epoch": 3.3948936170212765, "grad_norm": 0.06932192918130829, "learning_rate": 5.514176523054166e-07, "loss": 0.4835, "step": 6986 }, { "epoch": 3.3953799392097266, "grad_norm": 0.07168886718069899, "learning_rate": 5.50543905142577e-07, "loss": 0.5108, "step": 6987 }, { "epoch": 3.3958662613981763, "grad_norm": 0.07121659000177742, "learning_rate": 5.496708104419468e-07, "loss": 0.5295, "step": 6988 }, { "epoch": 3.396352583586626, "grad_norm": 0.06974675909586693, "learning_rate": 5.487983683315556e-07, "loss": 0.4906, "step": 6989 }, { "epoch": 3.396838905775076, "grad_norm": 0.07027974009588528, "learning_rate": 5.479265789393368e-07, "loss": 0.4915, "step": 6990 }, { "epoch": 3.3973252279635258, "grad_norm": 0.07050969042971653, "learning_rate": 5.470554423931285e-07, "loss": 0.505, "step": 6991 }, { "epoch": 3.3978115501519754, "grad_norm": 0.07264894599366457, "learning_rate": 5.461849588206725e-07, "loss": 0.5111, "step": 6992 }, { "epoch": 3.3982978723404256, "grad_norm": 0.07288463132939543, "learning_rate": 5.453151283496177e-07, "loss": 0.537, "step": 6993 }, { "epoch": 3.3987841945288753, "grad_norm": 0.07090308068330071, "learning_rate": 5.444459511075117e-07, "loss": 0.5371, "step": 6994 }, { "epoch": 3.3992705167173254, "grad_norm": 0.07039122724116818, "learning_rate": 5.435774272218109e-07, "loss": 0.4694, "step": 6995 }, { "epoch": 3.399756838905775, "grad_norm": 0.07111096784485435, "learning_rate": 5.427095568198743e-07, "loss": 0.5052, "step": 6996 }, { "epoch": 3.4002431610942248, "grad_norm": 0.07019330839702344, "learning_rate": 5.418423400289651e-07, "loss": 0.4978, "step": 6997 }, { "epoch": 3.400729483282675, "grad_norm": 0.07133190991310967, "learning_rate": 5.409757769762514e-07, "loss": 0.5128, "step": 6998 }, { "epoch": 3.4012158054711246, "grad_norm": 0.07080585057474179, "learning_rate": 5.401098677888029e-07, "loss": 0.5158, "step": 6999 }, { "epoch": 3.4017021276595747, "grad_norm": 0.07491017341842574, "learning_rate": 5.392446125935985e-07, "loss": 0.5764, "step": 7000 }, { "epoch": 3.4021884498480244, "grad_norm": 0.0709200181091196, "learning_rate": 5.383800115175159e-07, "loss": 0.5142, "step": 7001 }, { "epoch": 3.402674772036474, "grad_norm": 0.07251055782223502, "learning_rate": 5.375160646873395e-07, "loss": 0.5414, "step": 7002 }, { "epoch": 3.403161094224924, "grad_norm": 0.06818878883827324, "learning_rate": 5.366527722297577e-07, "loss": 0.4578, "step": 7003 }, { "epoch": 3.403647416413374, "grad_norm": 0.07359690497362702, "learning_rate": 5.357901342713623e-07, "loss": 0.5461, "step": 7004 }, { "epoch": 3.4041337386018236, "grad_norm": 0.06994053916105168, "learning_rate": 5.349281509386489e-07, "loss": 0.4954, "step": 7005 }, { "epoch": 3.4046200607902737, "grad_norm": 0.0761199202333254, "learning_rate": 5.340668223580181e-07, "loss": 0.5306, "step": 7006 }, { "epoch": 3.4051063829787234, "grad_norm": 0.06982552015233777, "learning_rate": 5.332061486557738e-07, "loss": 0.4884, "step": 7007 }, { "epoch": 3.405592705167173, "grad_norm": 0.07176320925514097, "learning_rate": 5.32346129958125e-07, "loss": 0.5174, "step": 7008 }, { "epoch": 3.406079027355623, "grad_norm": 0.07107531987896348, "learning_rate": 5.314867663911816e-07, "loss": 0.5325, "step": 7009 }, { "epoch": 3.406565349544073, "grad_norm": 0.07292283770107981, "learning_rate": 5.306280580809609e-07, "loss": 0.5568, "step": 7010 }, { "epoch": 3.407051671732523, "grad_norm": 0.0712032282326734, "learning_rate": 5.297700051533816e-07, "loss": 0.5369, "step": 7011 }, { "epoch": 3.4075379939209727, "grad_norm": 0.07087962842437506, "learning_rate": 5.289126077342687e-07, "loss": 0.4855, "step": 7012 }, { "epoch": 3.4080243161094224, "grad_norm": 0.07078355715841247, "learning_rate": 5.280558659493495e-07, "loss": 0.4856, "step": 7013 }, { "epoch": 3.4085106382978725, "grad_norm": 0.07036367780252621, "learning_rate": 5.271997799242551e-07, "loss": 0.4898, "step": 7014 }, { "epoch": 3.408996960486322, "grad_norm": 0.07348879404029077, "learning_rate": 5.263443497845211e-07, "loss": 0.5336, "step": 7015 }, { "epoch": 3.409483282674772, "grad_norm": 0.07236918066119626, "learning_rate": 5.254895756555861e-07, "loss": 0.5347, "step": 7016 }, { "epoch": 3.409969604863222, "grad_norm": 0.07193884345986701, "learning_rate": 5.246354576627927e-07, "loss": 0.5326, "step": 7017 }, { "epoch": 3.4104559270516717, "grad_norm": 0.0710068962169839, "learning_rate": 5.237819959313895e-07, "loss": 0.4693, "step": 7018 }, { "epoch": 3.4109422492401213, "grad_norm": 0.07259084261867696, "learning_rate": 5.229291905865252e-07, "loss": 0.5452, "step": 7019 }, { "epoch": 3.4114285714285715, "grad_norm": 0.07168642138661631, "learning_rate": 5.220770417532551e-07, "loss": 0.5031, "step": 7020 }, { "epoch": 3.411914893617021, "grad_norm": 0.07363813709197267, "learning_rate": 5.21225549556536e-07, "loss": 0.5068, "step": 7021 }, { "epoch": 3.4124012158054713, "grad_norm": 0.07271955634676451, "learning_rate": 5.203747141212318e-07, "loss": 0.5212, "step": 7022 }, { "epoch": 3.412887537993921, "grad_norm": 0.07234699811117531, "learning_rate": 5.195245355721051e-07, "loss": 0.5188, "step": 7023 }, { "epoch": 3.4133738601823707, "grad_norm": 0.07044012315922774, "learning_rate": 5.186750140338265e-07, "loss": 0.5, "step": 7024 }, { "epoch": 3.413860182370821, "grad_norm": 0.06927884975579712, "learning_rate": 5.178261496309678e-07, "loss": 0.491, "step": 7025 }, { "epoch": 3.4143465045592705, "grad_norm": 0.07385042443044354, "learning_rate": 5.169779424880056e-07, "loss": 0.5075, "step": 7026 }, { "epoch": 3.4148328267477206, "grad_norm": 0.06872762242420284, "learning_rate": 5.161303927293204e-07, "loss": 0.4499, "step": 7027 }, { "epoch": 3.4153191489361703, "grad_norm": 0.07087068488454867, "learning_rate": 5.152835004791951e-07, "loss": 0.4861, "step": 7028 }, { "epoch": 3.41580547112462, "grad_norm": 0.07211812711599211, "learning_rate": 5.144372658618175e-07, "loss": 0.5175, "step": 7029 }, { "epoch": 3.41629179331307, "grad_norm": 0.07242957922778535, "learning_rate": 5.135916890012776e-07, "loss": 0.5278, "step": 7030 }, { "epoch": 3.4167781155015198, "grad_norm": 0.0681669124635261, "learning_rate": 5.127467700215705e-07, "loss": 0.5018, "step": 7031 }, { "epoch": 3.4172644376899695, "grad_norm": 0.07093977691217154, "learning_rate": 5.119025090465929e-07, "loss": 0.5003, "step": 7032 }, { "epoch": 3.4177507598784196, "grad_norm": 0.07215714001791727, "learning_rate": 5.110589062001464e-07, "loss": 0.5069, "step": 7033 }, { "epoch": 3.4182370820668693, "grad_norm": 0.07218972844316805, "learning_rate": 5.102159616059365e-07, "loss": 0.5189, "step": 7034 }, { "epoch": 3.418723404255319, "grad_norm": 0.06916201744900763, "learning_rate": 5.093736753875711e-07, "loss": 0.5108, "step": 7035 }, { "epoch": 3.419209726443769, "grad_norm": 0.06901520081784702, "learning_rate": 5.085320476685601e-07, "loss": 0.506, "step": 7036 }, { "epoch": 3.4196960486322188, "grad_norm": 0.07030731603597559, "learning_rate": 5.076910785723226e-07, "loss": 0.4928, "step": 7037 }, { "epoch": 3.420182370820669, "grad_norm": 0.0678502030957812, "learning_rate": 5.068507682221741e-07, "loss": 0.4508, "step": 7038 }, { "epoch": 3.4206686930091186, "grad_norm": 0.07121966076896366, "learning_rate": 5.060111167413373e-07, "loss": 0.5355, "step": 7039 }, { "epoch": 3.4211550151975683, "grad_norm": 0.07092152492180169, "learning_rate": 5.051721242529378e-07, "loss": 0.5018, "step": 7040 }, { "epoch": 3.4216413373860184, "grad_norm": 0.07301453923717974, "learning_rate": 5.043337908800039e-07, "loss": 0.5236, "step": 7041 }, { "epoch": 3.422127659574468, "grad_norm": 0.07220762767794195, "learning_rate": 5.034961167454677e-07, "loss": 0.4998, "step": 7042 }, { "epoch": 3.4226139817629178, "grad_norm": 0.07027668722284915, "learning_rate": 5.02659101972165e-07, "loss": 0.4808, "step": 7043 }, { "epoch": 3.423100303951368, "grad_norm": 0.07123364200597594, "learning_rate": 5.018227466828341e-07, "loss": 0.5286, "step": 7044 }, { "epoch": 3.4235866261398176, "grad_norm": 0.06898968630479926, "learning_rate": 5.009870510001175e-07, "loss": 0.4512, "step": 7045 }, { "epoch": 3.4240729483282673, "grad_norm": 0.07309872312298674, "learning_rate": 5.0015201504656e-07, "loss": 0.5126, "step": 7046 }, { "epoch": 3.4245592705167174, "grad_norm": 0.06961150293663415, "learning_rate": 4.993176389446103e-07, "loss": 0.4945, "step": 7047 }, { "epoch": 3.425045592705167, "grad_norm": 0.0718692549643475, "learning_rate": 4.984839228166205e-07, "loss": 0.4987, "step": 7048 }, { "epoch": 3.425531914893617, "grad_norm": 0.06981846342767056, "learning_rate": 4.97650866784845e-07, "loss": 0.5103, "step": 7049 }, { "epoch": 3.426018237082067, "grad_norm": 0.071170215354394, "learning_rate": 4.968184709714424e-07, "loss": 0.5005, "step": 7050 }, { "epoch": 3.4265045592705166, "grad_norm": 0.07067289717994367, "learning_rate": 4.959867354984743e-07, "loss": 0.5275, "step": 7051 }, { "epoch": 3.4269908814589667, "grad_norm": 0.07218932808030425, "learning_rate": 4.951556604879049e-07, "loss": 0.5139, "step": 7052 }, { "epoch": 3.4274772036474164, "grad_norm": 0.07135808243745731, "learning_rate": 4.943252460616016e-07, "loss": 0.5249, "step": 7053 }, { "epoch": 3.4279635258358665, "grad_norm": 0.06967460094210229, "learning_rate": 4.934954923413359e-07, "loss": 0.5056, "step": 7054 }, { "epoch": 3.428449848024316, "grad_norm": 0.07398164664889825, "learning_rate": 4.926663994487813e-07, "loss": 0.5343, "step": 7055 }, { "epoch": 3.428936170212766, "grad_norm": 0.07061688764260694, "learning_rate": 4.918379675055152e-07, "loss": 0.5019, "step": 7056 }, { "epoch": 3.429422492401216, "grad_norm": 0.0693050127553063, "learning_rate": 4.910101966330178e-07, "loss": 0.4885, "step": 7057 }, { "epoch": 3.4299088145896657, "grad_norm": 0.07164036328868739, "learning_rate": 4.90183086952672e-07, "loss": 0.5474, "step": 7058 }, { "epoch": 3.4303951367781154, "grad_norm": 0.07326086395011282, "learning_rate": 4.89356638585764e-07, "loss": 0.5103, "step": 7059 }, { "epoch": 3.4308814589665655, "grad_norm": 0.06937296923055017, "learning_rate": 4.885308516534831e-07, "loss": 0.5121, "step": 7060 }, { "epoch": 3.431367781155015, "grad_norm": 0.07241584210722997, "learning_rate": 4.877057262769219e-07, "loss": 0.5309, "step": 7061 }, { "epoch": 3.431854103343465, "grad_norm": 0.07102410199391818, "learning_rate": 4.868812625770752e-07, "loss": 0.5148, "step": 7062 }, { "epoch": 3.432340425531915, "grad_norm": 0.07120101665824168, "learning_rate": 4.860574606748419e-07, "loss": 0.5209, "step": 7063 }, { "epoch": 3.4328267477203647, "grad_norm": 0.07250009451665422, "learning_rate": 4.852343206910226e-07, "loss": 0.5154, "step": 7064 }, { "epoch": 3.433313069908815, "grad_norm": 0.0693803939931791, "learning_rate": 4.844118427463212e-07, "loss": 0.4883, "step": 7065 }, { "epoch": 3.4337993920972645, "grad_norm": 0.07548868797724302, "learning_rate": 4.835900269613458e-07, "loss": 0.5632, "step": 7066 }, { "epoch": 3.434285714285714, "grad_norm": 0.07123149948943107, "learning_rate": 4.827688734566055e-07, "loss": 0.5183, "step": 7067 }, { "epoch": 3.4347720364741643, "grad_norm": 0.07185169163908411, "learning_rate": 4.819483823525128e-07, "loss": 0.5058, "step": 7068 }, { "epoch": 3.435258358662614, "grad_norm": 0.07160810322752237, "learning_rate": 4.81128553769385e-07, "loss": 0.5022, "step": 7069 }, { "epoch": 3.4357446808510637, "grad_norm": 0.07056010585649415, "learning_rate": 4.803093878274395e-07, "loss": 0.4992, "step": 7070 }, { "epoch": 3.436231003039514, "grad_norm": 0.07026237044656844, "learning_rate": 4.794908846467977e-07, "loss": 0.526, "step": 7071 }, { "epoch": 3.4367173252279635, "grad_norm": 0.07056044928922275, "learning_rate": 4.786730443474824e-07, "loss": 0.5158, "step": 7072 }, { "epoch": 3.437203647416413, "grad_norm": 0.07304663594739744, "learning_rate": 4.778558670494232e-07, "loss": 0.536, "step": 7073 }, { "epoch": 3.4376899696048633, "grad_norm": 0.06901423513347338, "learning_rate": 4.770393528724488e-07, "loss": 0.4869, "step": 7074 }, { "epoch": 3.438176291793313, "grad_norm": 0.07032568562380762, "learning_rate": 4.7622350193629154e-07, "loss": 0.4967, "step": 7075 }, { "epoch": 3.438662613981763, "grad_norm": 0.06976932633573946, "learning_rate": 4.7540831436058697e-07, "loss": 0.4925, "step": 7076 }, { "epoch": 3.439148936170213, "grad_norm": 0.06943396048677875, "learning_rate": 4.7459379026487287e-07, "loss": 0.4759, "step": 7077 }, { "epoch": 3.4396352583586625, "grad_norm": 0.07187575923267354, "learning_rate": 4.7377992976858965e-07, "loss": 0.5189, "step": 7078 }, { "epoch": 3.4401215805471126, "grad_norm": 0.07243133155628674, "learning_rate": 4.72966732991082e-07, "loss": 0.5248, "step": 7079 }, { "epoch": 3.4406079027355623, "grad_norm": 0.06917242258743568, "learning_rate": 4.721542000515944e-07, "loss": 0.4812, "step": 7080 }, { "epoch": 3.4410942249240124, "grad_norm": 0.07165330712505101, "learning_rate": 4.713423310692761e-07, "loss": 0.5365, "step": 7081 }, { "epoch": 3.441580547112462, "grad_norm": 0.07300047992753031, "learning_rate": 4.7053112616317897e-07, "loss": 0.551, "step": 7082 }, { "epoch": 3.4420668693009118, "grad_norm": 0.06996620557840325, "learning_rate": 4.6972058545225684e-07, "loss": 0.5222, "step": 7083 }, { "epoch": 3.4425531914893615, "grad_norm": 0.06926246347018855, "learning_rate": 4.6891070905536574e-07, "loss": 0.5037, "step": 7084 }, { "epoch": 3.4430395136778116, "grad_norm": 0.07139518563847702, "learning_rate": 4.6810149709126673e-07, "loss": 0.5067, "step": 7085 }, { "epoch": 3.4435258358662613, "grad_norm": 0.07307768421863547, "learning_rate": 4.672929496786188e-07, "loss": 0.5431, "step": 7086 }, { "epoch": 3.4440121580547114, "grad_norm": 0.07133626947826453, "learning_rate": 4.6648506693598717e-07, "loss": 0.5053, "step": 7087 }, { "epoch": 3.444498480243161, "grad_norm": 0.07266562294426104, "learning_rate": 4.656778489818392e-07, "loss": 0.5209, "step": 7088 }, { "epoch": 3.4449848024316108, "grad_norm": 0.0717737756173884, "learning_rate": 4.6487129593454415e-07, "loss": 0.5372, "step": 7089 }, { "epoch": 3.445471124620061, "grad_norm": 0.06998815530908782, "learning_rate": 4.64065407912373e-07, "loss": 0.5212, "step": 7090 }, { "epoch": 3.4459574468085106, "grad_norm": 0.06968397885693899, "learning_rate": 4.6326018503350165e-07, "loss": 0.5005, "step": 7091 }, { "epoch": 3.4464437689969607, "grad_norm": 0.07104505938033598, "learning_rate": 4.624556274160058e-07, "loss": 0.5275, "step": 7092 }, { "epoch": 3.4469300911854104, "grad_norm": 0.0712905520734446, "learning_rate": 4.6165173517786543e-07, "loss": 0.5355, "step": 7093 }, { "epoch": 3.44741641337386, "grad_norm": 0.07301836762255351, "learning_rate": 4.6084850843696126e-07, "loss": 0.5305, "step": 7094 }, { "epoch": 3.44790273556231, "grad_norm": 0.07631065334930298, "learning_rate": 4.60045947311078e-07, "loss": 0.5296, "step": 7095 }, { "epoch": 3.44838905775076, "grad_norm": 0.07242896729776276, "learning_rate": 4.59244051917902e-07, "loss": 0.5438, "step": 7096 }, { "epoch": 3.4488753799392096, "grad_norm": 0.07160705541580994, "learning_rate": 4.58442822375022e-07, "loss": 0.5025, "step": 7097 }, { "epoch": 3.4493617021276597, "grad_norm": 0.072060190992709, "learning_rate": 4.576422587999296e-07, "loss": 0.5598, "step": 7098 }, { "epoch": 3.4498480243161094, "grad_norm": 0.07223336011269477, "learning_rate": 4.568423613100176e-07, "loss": 0.5504, "step": 7099 }, { "epoch": 3.450334346504559, "grad_norm": 0.07056766944609026, "learning_rate": 4.560431300225837e-07, "loss": 0.4883, "step": 7100 }, { "epoch": 3.450820668693009, "grad_norm": 0.07168714970698431, "learning_rate": 4.552445650548237e-07, "loss": 0.5216, "step": 7101 }, { "epoch": 3.451306990881459, "grad_norm": 0.07278388720838608, "learning_rate": 4.54446666523839e-07, "loss": 0.5016, "step": 7102 }, { "epoch": 3.451793313069909, "grad_norm": 0.07287780960020424, "learning_rate": 4.5364943454663245e-07, "loss": 0.4878, "step": 7103 }, { "epoch": 3.4522796352583587, "grad_norm": 0.06834876349142859, "learning_rate": 4.528528692401091e-07, "loss": 0.459, "step": 7104 }, { "epoch": 3.4527659574468084, "grad_norm": 0.0699751033364438, "learning_rate": 4.5205697072107645e-07, "loss": 0.516, "step": 7105 }, { "epoch": 3.4532522796352585, "grad_norm": 0.0718489100081899, "learning_rate": 4.512617391062435e-07, "loss": 0.5034, "step": 7106 }, { "epoch": 3.453738601823708, "grad_norm": 0.07224462156306567, "learning_rate": 4.504671745122219e-07, "loss": 0.5149, "step": 7107 }, { "epoch": 3.4542249240121583, "grad_norm": 0.07302976300880235, "learning_rate": 4.496732770555251e-07, "loss": 0.524, "step": 7108 }, { "epoch": 3.454711246200608, "grad_norm": 0.07022343940747477, "learning_rate": 4.4888004685257115e-07, "loss": 0.4994, "step": 7109 }, { "epoch": 3.4551975683890577, "grad_norm": 0.07179982711260838, "learning_rate": 4.480874840196764e-07, "loss": 0.5137, "step": 7110 }, { "epoch": 3.4556838905775074, "grad_norm": 0.0714773289119303, "learning_rate": 4.472955886730618e-07, "loss": 0.51, "step": 7111 }, { "epoch": 3.4561702127659575, "grad_norm": 0.07253153438209857, "learning_rate": 4.4650436092884995e-07, "loss": 0.5076, "step": 7112 }, { "epoch": 3.456656534954407, "grad_norm": 0.07163446422150273, "learning_rate": 4.457138009030654e-07, "loss": 0.5384, "step": 7113 }, { "epoch": 3.4571428571428573, "grad_norm": 0.0732425096355872, "learning_rate": 4.449239087116353e-07, "loss": 0.5602, "step": 7114 }, { "epoch": 3.457629179331307, "grad_norm": 0.070089514892091, "learning_rate": 4.4413468447038645e-07, "loss": 0.4952, "step": 7115 }, { "epoch": 3.4581155015197567, "grad_norm": 0.06946903703312084, "learning_rate": 4.433461282950513e-07, "loss": 0.4779, "step": 7116 }, { "epoch": 3.458601823708207, "grad_norm": 0.07162757805626836, "learning_rate": 4.425582403012618e-07, "loss": 0.4773, "step": 7117 }, { "epoch": 3.4590881458966565, "grad_norm": 0.07459478433037206, "learning_rate": 4.4177102060455337e-07, "loss": 0.5424, "step": 7118 }, { "epoch": 3.4595744680851066, "grad_norm": 0.07047360814388473, "learning_rate": 4.4098446932036245e-07, "loss": 0.5208, "step": 7119 }, { "epoch": 3.4600607902735563, "grad_norm": 0.0715903661063747, "learning_rate": 4.401985865640285e-07, "loss": 0.5153, "step": 7120 }, { "epoch": 3.460547112462006, "grad_norm": 0.0725549678723102, "learning_rate": 4.394133724507915e-07, "loss": 0.5404, "step": 7121 }, { "epoch": 3.461033434650456, "grad_norm": 0.07160869326361206, "learning_rate": 4.386288270957945e-07, "loss": 0.5229, "step": 7122 }, { "epoch": 3.461519756838906, "grad_norm": 0.07246517266938815, "learning_rate": 4.378449506140825e-07, "loss": 0.5311, "step": 7123 }, { "epoch": 3.4620060790273555, "grad_norm": 0.07048331845904107, "learning_rate": 4.3706174312060144e-07, "loss": 0.4768, "step": 7124 }, { "epoch": 3.4624924012158056, "grad_norm": 0.07085176001446238, "learning_rate": 4.362792047302006e-07, "loss": 0.5045, "step": 7125 }, { "epoch": 3.4629787234042553, "grad_norm": 0.06929619781996207, "learning_rate": 4.3549733555762865e-07, "loss": 0.4709, "step": 7126 }, { "epoch": 3.463465045592705, "grad_norm": 0.06929348444624528, "learning_rate": 4.3471613571754e-07, "loss": 0.5107, "step": 7127 }, { "epoch": 3.463951367781155, "grad_norm": 0.07065988592529936, "learning_rate": 4.339356053244881e-07, "loss": 0.5002, "step": 7128 }, { "epoch": 3.4644376899696048, "grad_norm": 0.0730719880299531, "learning_rate": 4.331557444929291e-07, "loss": 0.5204, "step": 7129 }, { "epoch": 3.464924012158055, "grad_norm": 0.07100835518529446, "learning_rate": 4.323765533372193e-07, "loss": 0.502, "step": 7130 }, { "epoch": 3.4654103343465046, "grad_norm": 0.071465428028525, "learning_rate": 4.3159803197161956e-07, "loss": 0.4954, "step": 7131 }, { "epoch": 3.4658966565349543, "grad_norm": 0.07015204160106535, "learning_rate": 4.308201805102907e-07, "loss": 0.5164, "step": 7132 }, { "epoch": 3.4663829787234044, "grad_norm": 0.07125852329743032, "learning_rate": 4.3004299906729553e-07, "loss": 0.5345, "step": 7133 }, { "epoch": 3.466869300911854, "grad_norm": 0.07147334100257322, "learning_rate": 4.292664877565994e-07, "loss": 0.5302, "step": 7134 }, { "epoch": 3.4673556231003038, "grad_norm": 0.06994256639555531, "learning_rate": 4.2849064669206907e-07, "loss": 0.5232, "step": 7135 }, { "epoch": 3.467841945288754, "grad_norm": 0.0690320813027386, "learning_rate": 4.277154759874719e-07, "loss": 0.5092, "step": 7136 }, { "epoch": 3.4683282674772036, "grad_norm": 0.07234131221540303, "learning_rate": 4.2694097575647906e-07, "loss": 0.5211, "step": 7137 }, { "epoch": 3.4688145896656533, "grad_norm": 0.06711167917497439, "learning_rate": 4.26167146112661e-07, "loss": 0.4939, "step": 7138 }, { "epoch": 3.4693009118541034, "grad_norm": 0.07143250277257142, "learning_rate": 4.2539398716949233e-07, "loss": 0.5179, "step": 7139 }, { "epoch": 3.469787234042553, "grad_norm": 0.07176964242052265, "learning_rate": 4.2462149904034686e-07, "loss": 0.4986, "step": 7140 }, { "epoch": 3.470273556231003, "grad_norm": 0.06903382031338814, "learning_rate": 4.238496818385018e-07, "loss": 0.495, "step": 7141 }, { "epoch": 3.470759878419453, "grad_norm": 0.07223861402373928, "learning_rate": 4.2307853567713495e-07, "loss": 0.5513, "step": 7142 }, { "epoch": 3.4712462006079026, "grad_norm": 0.07446774725994262, "learning_rate": 4.2230806066932695e-07, "loss": 0.5224, "step": 7143 }, { "epoch": 3.4717325227963527, "grad_norm": 0.07076187304073288, "learning_rate": 4.215382569280585e-07, "loss": 0.4988, "step": 7144 }, { "epoch": 3.4722188449848024, "grad_norm": 0.0723513833382034, "learning_rate": 4.2076912456621265e-07, "loss": 0.5322, "step": 7145 }, { "epoch": 3.4727051671732525, "grad_norm": 0.06886845431872449, "learning_rate": 4.200006636965742e-07, "loss": 0.4924, "step": 7146 }, { "epoch": 3.473191489361702, "grad_norm": 0.0729523350576455, "learning_rate": 4.192328744318291e-07, "loss": 0.4722, "step": 7147 }, { "epoch": 3.473677811550152, "grad_norm": 0.06954424549529944, "learning_rate": 4.1846575688456516e-07, "loss": 0.5167, "step": 7148 }, { "epoch": 3.474164133738602, "grad_norm": 0.0713360878554759, "learning_rate": 4.1769931116727114e-07, "loss": 0.5204, "step": 7149 }, { "epoch": 3.4746504559270517, "grad_norm": 0.07207305888129763, "learning_rate": 4.169335373923372e-07, "loss": 0.505, "step": 7150 }, { "epoch": 3.4751367781155014, "grad_norm": 0.06919764667063984, "learning_rate": 4.1616843567205636e-07, "loss": 0.4661, "step": 7151 }, { "epoch": 3.4756231003039515, "grad_norm": 0.07080383094334816, "learning_rate": 4.154040061186215e-07, "loss": 0.5224, "step": 7152 }, { "epoch": 3.476109422492401, "grad_norm": 0.0684666964684904, "learning_rate": 4.14640248844127e-07, "loss": 0.4823, "step": 7153 }, { "epoch": 3.476595744680851, "grad_norm": 0.0710639276185835, "learning_rate": 4.1387716396057044e-07, "loss": 0.5157, "step": 7154 }, { "epoch": 3.477082066869301, "grad_norm": 0.07250362419442305, "learning_rate": 4.1311475157984895e-07, "loss": 0.4864, "step": 7155 }, { "epoch": 3.4775683890577507, "grad_norm": 0.07525166273514451, "learning_rate": 4.123530118137609e-07, "loss": 0.5422, "step": 7156 }, { "epoch": 3.478054711246201, "grad_norm": 0.06877918690737896, "learning_rate": 4.1159194477400797e-07, "loss": 0.5068, "step": 7157 }, { "epoch": 3.4785410334346505, "grad_norm": 0.06844393142806099, "learning_rate": 4.108315505721916e-07, "loss": 0.4806, "step": 7158 }, { "epoch": 3.4790273556231, "grad_norm": 0.07071825519917485, "learning_rate": 4.100718293198147e-07, "loss": 0.5156, "step": 7159 }, { "epoch": 3.4795136778115503, "grad_norm": 0.06990493781272573, "learning_rate": 4.093127811282821e-07, "loss": 0.4915, "step": 7160 }, { "epoch": 3.48, "grad_norm": 0.07077455058971847, "learning_rate": 4.085544061088992e-07, "loss": 0.4807, "step": 7161 }, { "epoch": 3.4804863221884497, "grad_norm": 0.07137638945796826, "learning_rate": 4.0779670437287475e-07, "loss": 0.5388, "step": 7162 }, { "epoch": 3.4809726443769, "grad_norm": 0.07077230534039745, "learning_rate": 4.0703967603131334e-07, "loss": 0.497, "step": 7163 }, { "epoch": 3.4814589665653495, "grad_norm": 0.07088931557799548, "learning_rate": 4.0628332119522827e-07, "loss": 0.5151, "step": 7164 }, { "epoch": 3.481945288753799, "grad_norm": 0.07069425624386806, "learning_rate": 4.055276399755287e-07, "loss": 0.5009, "step": 7165 }, { "epoch": 3.4824316109422493, "grad_norm": 0.07440464444834212, "learning_rate": 4.04772632483027e-07, "loss": 0.4936, "step": 7166 }, { "epoch": 3.482917933130699, "grad_norm": 0.07064961563693023, "learning_rate": 4.0401829882843635e-07, "loss": 0.517, "step": 7167 }, { "epoch": 3.483404255319149, "grad_norm": 0.07173284264035261, "learning_rate": 4.0326463912237156e-07, "loss": 0.5181, "step": 7168 }, { "epoch": 3.483890577507599, "grad_norm": 0.07045352367077301, "learning_rate": 4.0251165347534815e-07, "loss": 0.48, "step": 7169 }, { "epoch": 3.4843768996960485, "grad_norm": 0.07216283147051752, "learning_rate": 4.0175934199778275e-07, "loss": 0.4871, "step": 7170 }, { "epoch": 3.4848632218844986, "grad_norm": 0.06990082659203009, "learning_rate": 4.010077047999933e-07, "loss": 0.5175, "step": 7171 }, { "epoch": 3.4853495440729483, "grad_norm": 0.07302629595603717, "learning_rate": 4.0025674199219877e-07, "loss": 0.5255, "step": 7172 }, { "epoch": 3.4858358662613984, "grad_norm": 0.06939565664702761, "learning_rate": 3.9950645368452e-07, "loss": 0.5, "step": 7173 }, { "epoch": 3.486322188449848, "grad_norm": 0.07109383901805268, "learning_rate": 3.987568399869773e-07, "loss": 0.4967, "step": 7174 }, { "epoch": 3.4868085106382978, "grad_norm": 0.07230052445640836, "learning_rate": 3.980079010094934e-07, "loss": 0.5249, "step": 7175 }, { "epoch": 3.487294832826748, "grad_norm": 0.07087001627414656, "learning_rate": 3.9725963686189197e-07, "loss": 0.5186, "step": 7176 }, { "epoch": 3.4877811550151976, "grad_norm": 0.07214567955124625, "learning_rate": 3.9651204765389806e-07, "loss": 0.5153, "step": 7177 }, { "epoch": 3.4882674772036473, "grad_norm": 0.06926102858760724, "learning_rate": 3.957651334951357e-07, "loss": 0.4896, "step": 7178 }, { "epoch": 3.4887537993920974, "grad_norm": 0.07232712025398089, "learning_rate": 3.950188944951311e-07, "loss": 0.501, "step": 7179 }, { "epoch": 3.489240121580547, "grad_norm": 0.06916223835487106, "learning_rate": 3.9427333076331343e-07, "loss": 0.4751, "step": 7180 }, { "epoch": 3.4897264437689968, "grad_norm": 0.07116770262447458, "learning_rate": 3.935284424090091e-07, "loss": 0.4947, "step": 7181 }, { "epoch": 3.490212765957447, "grad_norm": 0.06987756140168783, "learning_rate": 3.9278422954144965e-07, "loss": 0.4973, "step": 7182 }, { "epoch": 3.4906990881458966, "grad_norm": 0.06947174725678804, "learning_rate": 3.920406922697645e-07, "loss": 0.4817, "step": 7183 }, { "epoch": 3.4911854103343467, "grad_norm": 0.07122649105872585, "learning_rate": 3.9129783070298523e-07, "loss": 0.4957, "step": 7184 }, { "epoch": 3.4916717325227964, "grad_norm": 0.07352646944774852, "learning_rate": 3.9055564495004306e-07, "loss": 0.499, "step": 7185 }, { "epoch": 3.492158054711246, "grad_norm": 0.06801638145018832, "learning_rate": 3.898141351197726e-07, "loss": 0.4684, "step": 7186 }, { "epoch": 3.492644376899696, "grad_norm": 0.06917360868985718, "learning_rate": 3.8907330132090694e-07, "loss": 0.4778, "step": 7187 }, { "epoch": 3.493130699088146, "grad_norm": 0.06850481430670212, "learning_rate": 3.8833314366208077e-07, "loss": 0.5036, "step": 7188 }, { "epoch": 3.4936170212765956, "grad_norm": 0.07086381384672834, "learning_rate": 3.875936622518306e-07, "loss": 0.5007, "step": 7189 }, { "epoch": 3.4941033434650457, "grad_norm": 0.07178627609022711, "learning_rate": 3.8685485719859253e-07, "loss": 0.5087, "step": 7190 }, { "epoch": 3.4945896656534954, "grad_norm": 0.07445536827016301, "learning_rate": 3.861167286107037e-07, "loss": 0.5219, "step": 7191 }, { "epoch": 3.495075987841945, "grad_norm": 0.06917347367775835, "learning_rate": 3.853792765964032e-07, "loss": 0.5007, "step": 7192 }, { "epoch": 3.495562310030395, "grad_norm": 0.07141310309123573, "learning_rate": 3.846425012638283e-07, "loss": 0.5113, "step": 7193 }, { "epoch": 3.496048632218845, "grad_norm": 0.07118210431123688, "learning_rate": 3.839064027210204e-07, "loss": 0.5089, "step": 7194 }, { "epoch": 3.496534954407295, "grad_norm": 0.06976991006173139, "learning_rate": 3.831709810759188e-07, "loss": 0.4918, "step": 7195 }, { "epoch": 3.4970212765957447, "grad_norm": 0.0695214866054242, "learning_rate": 3.824362364363654e-07, "loss": 0.5112, "step": 7196 }, { "epoch": 3.4970212765957447, "eval_loss": 0.5695327520370483, "eval_runtime": 105.0873, "eval_samples_per_second": 288.836, "eval_steps_per_second": 36.113, "step": 7196 }, { "epoch": 3.4975075987841944, "grad_norm": 0.07127541092472973, "learning_rate": 3.817021689101019e-07, "loss": 0.4985, "step": 7197 }, { "epoch": 3.4979939209726445, "grad_norm": 0.06933232372142334, "learning_rate": 3.809687786047711e-07, "loss": 0.4956, "step": 7198 }, { "epoch": 3.498480243161094, "grad_norm": 0.06976287854341931, "learning_rate": 3.8023606562791584e-07, "loss": 0.507, "step": 7199 }, { "epoch": 3.4989665653495443, "grad_norm": 0.07417999133780669, "learning_rate": 3.795040300869812e-07, "loss": 0.5355, "step": 7200 }, { "epoch": 3.499452887537994, "grad_norm": 0.06957336907711105, "learning_rate": 3.7877267208931147e-07, "loss": 0.4784, "step": 7201 }, { "epoch": 3.4999392097264437, "grad_norm": 0.06993007321014524, "learning_rate": 3.7804199174215183e-07, "loss": 0.4783, "step": 7202 }, { "epoch": 3.5004255319148934, "grad_norm": 0.07071303000219951, "learning_rate": 3.773119891526483e-07, "loss": 0.5265, "step": 7203 }, { "epoch": 3.5009118541033435, "grad_norm": 0.0700252671593182, "learning_rate": 3.7658266442784754e-07, "loss": 0.4978, "step": 7204 }, { "epoch": 3.501398176291793, "grad_norm": 0.07275378205179954, "learning_rate": 3.758540176746961e-07, "loss": 0.4984, "step": 7205 }, { "epoch": 3.5018844984802433, "grad_norm": 0.07103846666310974, "learning_rate": 3.751260490000436e-07, "loss": 0.4924, "step": 7206 }, { "epoch": 3.502370820668693, "grad_norm": 0.06964549269570205, "learning_rate": 3.743987585106362e-07, "loss": 0.4747, "step": 7207 }, { "epoch": 3.5028571428571427, "grad_norm": 0.07325844307206314, "learning_rate": 3.7367214631312377e-07, "loss": 0.5368, "step": 7208 }, { "epoch": 3.503343465045593, "grad_norm": 0.07428724785785978, "learning_rate": 3.729462125140559e-07, "loss": 0.5176, "step": 7209 }, { "epoch": 3.5038297872340425, "grad_norm": 0.07274666171203868, "learning_rate": 3.7222095721988204e-07, "loss": 0.5359, "step": 7210 }, { "epoch": 3.5043161094224926, "grad_norm": 0.07000711141871847, "learning_rate": 3.7149638053695256e-07, "loss": 0.4958, "step": 7211 }, { "epoch": 3.5048024316109423, "grad_norm": 0.06953932306519946, "learning_rate": 3.707724825715192e-07, "loss": 0.5076, "step": 7212 }, { "epoch": 3.505288753799392, "grad_norm": 0.06981113823480962, "learning_rate": 3.7004926342973257e-07, "loss": 0.5096, "step": 7213 }, { "epoch": 3.505775075987842, "grad_norm": 0.07100583897763792, "learning_rate": 3.6932672321764507e-07, "loss": 0.5064, "step": 7214 }, { "epoch": 3.506261398176292, "grad_norm": 0.07209787004705707, "learning_rate": 3.686048620412086e-07, "loss": 0.5264, "step": 7215 }, { "epoch": 3.506747720364742, "grad_norm": 0.06983077455674916, "learning_rate": 3.678836800062763e-07, "loss": 0.5243, "step": 7216 }, { "epoch": 3.5072340425531916, "grad_norm": 0.07071790008837835, "learning_rate": 3.671631772186007e-07, "loss": 0.5154, "step": 7217 }, { "epoch": 3.5077203647416413, "grad_norm": 0.07099082095666753, "learning_rate": 3.664433537838363e-07, "loss": 0.5013, "step": 7218 }, { "epoch": 3.508206686930091, "grad_norm": 0.06786772715700722, "learning_rate": 3.6572420980753643e-07, "loss": 0.4771, "step": 7219 }, { "epoch": 3.508693009118541, "grad_norm": 0.07187268531739176, "learning_rate": 3.6500574539515557e-07, "loss": 0.4585, "step": 7220 }, { "epoch": 3.509179331306991, "grad_norm": 0.07140916415400446, "learning_rate": 3.642879606520494e-07, "loss": 0.5282, "step": 7221 }, { "epoch": 3.509665653495441, "grad_norm": 0.06872218671094972, "learning_rate": 3.635708556834705e-07, "loss": 0.5035, "step": 7222 }, { "epoch": 3.5101519756838906, "grad_norm": 0.07046853781545757, "learning_rate": 3.628544305945758e-07, "loss": 0.4945, "step": 7223 }, { "epoch": 3.5106382978723403, "grad_norm": 0.07031160272234435, "learning_rate": 3.6213868549042073e-07, "loss": 0.504, "step": 7224 }, { "epoch": 3.5111246200607904, "grad_norm": 0.06806264170788083, "learning_rate": 3.614236204759608e-07, "loss": 0.4876, "step": 7225 }, { "epoch": 3.51161094224924, "grad_norm": 0.06855526913994629, "learning_rate": 3.607092356560521e-07, "loss": 0.4654, "step": 7226 }, { "epoch": 3.51209726443769, "grad_norm": 0.0744999005612875, "learning_rate": 3.5999553113545193e-07, "loss": 0.5299, "step": 7227 }, { "epoch": 3.51258358662614, "grad_norm": 0.07141921773306568, "learning_rate": 3.5928250701881606e-07, "loss": 0.5109, "step": 7228 }, { "epoch": 3.5130699088145896, "grad_norm": 0.07180466879930848, "learning_rate": 3.5857016341070136e-07, "loss": 0.5072, "step": 7229 }, { "epoch": 3.5135562310030393, "grad_norm": 0.07056873086957717, "learning_rate": 3.57858500415566e-07, "loss": 0.5182, "step": 7230 }, { "epoch": 3.5140425531914894, "grad_norm": 0.06932979467409334, "learning_rate": 3.5714751813776593e-07, "loss": 0.4707, "step": 7231 }, { "epoch": 3.514528875379939, "grad_norm": 0.07354808645815163, "learning_rate": 3.564372166815594e-07, "loss": 0.5364, "step": 7232 }, { "epoch": 3.515015197568389, "grad_norm": 0.07024151848785647, "learning_rate": 3.557275961511042e-07, "loss": 0.5036, "step": 7233 }, { "epoch": 3.515501519756839, "grad_norm": 0.06963388563562779, "learning_rate": 3.550186566504576e-07, "loss": 0.4909, "step": 7234 }, { "epoch": 3.5159878419452886, "grad_norm": 0.07524825802159825, "learning_rate": 3.5431039828357717e-07, "loss": 0.5306, "step": 7235 }, { "epoch": 3.5164741641337387, "grad_norm": 0.07174939743415036, "learning_rate": 3.53602821154323e-07, "loss": 0.5209, "step": 7236 }, { "epoch": 3.5169604863221884, "grad_norm": 0.07235563433780665, "learning_rate": 3.5289592536645047e-07, "loss": 0.5002, "step": 7237 }, { "epoch": 3.5174468085106385, "grad_norm": 0.07295740900489246, "learning_rate": 3.5218971102361945e-07, "loss": 0.5156, "step": 7238 }, { "epoch": 3.517933130699088, "grad_norm": 0.07051145344528857, "learning_rate": 3.514841782293882e-07, "loss": 0.5348, "step": 7239 }, { "epoch": 3.518419452887538, "grad_norm": 0.06978549215038118, "learning_rate": 3.507793270872145e-07, "loss": 0.4936, "step": 7240 }, { "epoch": 3.518905775075988, "grad_norm": 0.0725950057773154, "learning_rate": 3.500751577004574e-07, "loss": 0.5079, "step": 7241 }, { "epoch": 3.5193920972644377, "grad_norm": 0.07063254131635323, "learning_rate": 3.4937167017237484e-07, "loss": 0.5006, "step": 7242 }, { "epoch": 3.5198784194528874, "grad_norm": 0.07208035373266007, "learning_rate": 3.4866886460612536e-07, "loss": 0.537, "step": 7243 }, { "epoch": 3.5203647416413375, "grad_norm": 0.07053910416583992, "learning_rate": 3.479667411047677e-07, "loss": 0.5043, "step": 7244 }, { "epoch": 3.520851063829787, "grad_norm": 0.07020601152892847, "learning_rate": 3.4726529977126e-07, "loss": 0.483, "step": 7245 }, { "epoch": 3.521337386018237, "grad_norm": 0.07195262181511256, "learning_rate": 3.465645407084611e-07, "loss": 0.5521, "step": 7246 }, { "epoch": 3.521823708206687, "grad_norm": 0.07078835399859162, "learning_rate": 3.4586446401912833e-07, "loss": 0.4946, "step": 7247 }, { "epoch": 3.5223100303951367, "grad_norm": 0.07257120723260403, "learning_rate": 3.451650698059211e-07, "loss": 0.5104, "step": 7248 }, { "epoch": 3.522796352583587, "grad_norm": 0.07289859046708881, "learning_rate": 3.444663581713975e-07, "loss": 0.5166, "step": 7249 }, { "epoch": 3.5232826747720365, "grad_norm": 0.07032179912809126, "learning_rate": 3.4376832921801494e-07, "loss": 0.4812, "step": 7250 }, { "epoch": 3.523768996960486, "grad_norm": 0.06983409897908303, "learning_rate": 3.4307098304813215e-07, "loss": 0.4849, "step": 7251 }, { "epoch": 3.5242553191489363, "grad_norm": 0.0700615456848194, "learning_rate": 3.423743197640067e-07, "loss": 0.5053, "step": 7252 }, { "epoch": 3.524741641337386, "grad_norm": 0.06886085254109796, "learning_rate": 3.4167833946779696e-07, "loss": 0.4834, "step": 7253 }, { "epoch": 3.525227963525836, "grad_norm": 0.06909140227938304, "learning_rate": 3.40983042261559e-07, "loss": 0.5098, "step": 7254 }, { "epoch": 3.525714285714286, "grad_norm": 0.07250910437189453, "learning_rate": 3.4028842824725183e-07, "loss": 0.5175, "step": 7255 }, { "epoch": 3.5262006079027355, "grad_norm": 0.07269852744018593, "learning_rate": 3.3959449752673235e-07, "loss": 0.5163, "step": 7256 }, { "epoch": 3.526686930091185, "grad_norm": 0.07032829211257909, "learning_rate": 3.3890125020175693e-07, "loss": 0.5105, "step": 7257 }, { "epoch": 3.5271732522796353, "grad_norm": 0.07452891215519673, "learning_rate": 3.3820868637398305e-07, "loss": 0.5469, "step": 7258 }, { "epoch": 3.527659574468085, "grad_norm": 0.07154076504998388, "learning_rate": 3.3751680614496686e-07, "loss": 0.5103, "step": 7259 }, { "epoch": 3.528145896656535, "grad_norm": 0.06965758296055197, "learning_rate": 3.3682560961616537e-07, "loss": 0.5097, "step": 7260 }, { "epoch": 3.528632218844985, "grad_norm": 0.06982160264205557, "learning_rate": 3.3613509688893433e-07, "loss": 0.4888, "step": 7261 }, { "epoch": 3.5291185410334345, "grad_norm": 0.06985765804356635, "learning_rate": 3.354452680645298e-07, "loss": 0.5221, "step": 7262 }, { "epoch": 3.5296048632218846, "grad_norm": 0.0702127760826987, "learning_rate": 3.3475612324410656e-07, "loss": 0.5015, "step": 7263 }, { "epoch": 3.5300911854103343, "grad_norm": 0.07270437601568784, "learning_rate": 3.340676625287209e-07, "loss": 0.5217, "step": 7264 }, { "epoch": 3.5305775075987844, "grad_norm": 0.07272582472340099, "learning_rate": 3.333798860193277e-07, "loss": 0.5135, "step": 7265 }, { "epoch": 3.531063829787234, "grad_norm": 0.07086140237375115, "learning_rate": 3.3269279381678065e-07, "loss": 0.5148, "step": 7266 }, { "epoch": 3.531550151975684, "grad_norm": 0.07013359650019135, "learning_rate": 3.3200638602183533e-07, "loss": 0.5103, "step": 7267 }, { "epoch": 3.5320364741641335, "grad_norm": 0.06846799013128262, "learning_rate": 3.3132066273514397e-07, "loss": 0.474, "step": 7268 }, { "epoch": 3.5325227963525836, "grad_norm": 0.07381697955492518, "learning_rate": 3.3063562405726277e-07, "loss": 0.5211, "step": 7269 }, { "epoch": 3.5330091185410333, "grad_norm": 0.07164947525743896, "learning_rate": 3.299512700886415e-07, "loss": 0.5184, "step": 7270 }, { "epoch": 3.5334954407294834, "grad_norm": 0.07025197422814637, "learning_rate": 3.292676009296353e-07, "loss": 0.5163, "step": 7271 }, { "epoch": 3.533981762917933, "grad_norm": 0.07518078518688896, "learning_rate": 3.285846166804946e-07, "loss": 0.5232, "step": 7272 }, { "epoch": 3.5344680851063828, "grad_norm": 0.07443116926836839, "learning_rate": 3.2790231744137315e-07, "loss": 0.5277, "step": 7273 }, { "epoch": 3.534954407294833, "grad_norm": 0.07140071642719877, "learning_rate": 3.27220703312322e-07, "loss": 0.5168, "step": 7274 }, { "epoch": 3.5354407294832826, "grad_norm": 0.06914373463769209, "learning_rate": 3.265397743932913e-07, "loss": 0.5159, "step": 7275 }, { "epoch": 3.5359270516717327, "grad_norm": 0.07184274483734404, "learning_rate": 3.2585953078413225e-07, "loss": 0.5113, "step": 7276 }, { "epoch": 3.5364133738601824, "grad_norm": 0.06965237554484427, "learning_rate": 3.25179972584595e-07, "loss": 0.4758, "step": 7277 }, { "epoch": 3.536899696048632, "grad_norm": 0.07108640543151509, "learning_rate": 3.245010998943282e-07, "loss": 0.4964, "step": 7278 }, { "epoch": 3.537386018237082, "grad_norm": 0.07048754731021302, "learning_rate": 3.2382291281288113e-07, "loss": 0.5292, "step": 7279 }, { "epoch": 3.537872340425532, "grad_norm": 0.07127731989257474, "learning_rate": 3.231454114397026e-07, "loss": 0.4912, "step": 7280 }, { "epoch": 3.538358662613982, "grad_norm": 0.07204628855333067, "learning_rate": 3.224685958741408e-07, "loss": 0.5294, "step": 7281 }, { "epoch": 3.5388449848024317, "grad_norm": 0.07064881150887918, "learning_rate": 3.21792466215442e-07, "loss": 0.5127, "step": 7282 }, { "epoch": 3.5393313069908814, "grad_norm": 0.07036704136607158, "learning_rate": 3.2111702256275355e-07, "loss": 0.5008, "step": 7283 }, { "epoch": 3.539817629179331, "grad_norm": 0.07207000377795311, "learning_rate": 3.2044226501512233e-07, "loss": 0.5365, "step": 7284 }, { "epoch": 3.540303951367781, "grad_norm": 0.0704674492089507, "learning_rate": 3.197681936714919e-07, "loss": 0.5217, "step": 7285 }, { "epoch": 3.540790273556231, "grad_norm": 0.07244311802596472, "learning_rate": 3.1909480863070884e-07, "loss": 0.5238, "step": 7286 }, { "epoch": 3.541276595744681, "grad_norm": 0.0730980193009039, "learning_rate": 3.184221099915163e-07, "loss": 0.5163, "step": 7287 }, { "epoch": 3.5417629179331307, "grad_norm": 0.07489262923338012, "learning_rate": 3.177500978525594e-07, "loss": 0.5678, "step": 7288 }, { "epoch": 3.5422492401215804, "grad_norm": 0.07119119841417239, "learning_rate": 3.1707877231237916e-07, "loss": 0.5199, "step": 7289 }, { "epoch": 3.5427355623100305, "grad_norm": 0.07298154464650503, "learning_rate": 3.164081334694186e-07, "loss": 0.5816, "step": 7290 }, { "epoch": 3.54322188449848, "grad_norm": 0.07122694185732, "learning_rate": 3.157381814220206e-07, "loss": 0.5424, "step": 7291 }, { "epoch": 3.5437082066869303, "grad_norm": 0.07261416258715955, "learning_rate": 3.150689162684245e-07, "loss": 0.5518, "step": 7292 }, { "epoch": 3.54419452887538, "grad_norm": 0.06949925293479649, "learning_rate": 3.1440033810677117e-07, "loss": 0.5205, "step": 7293 }, { "epoch": 3.5446808510638297, "grad_norm": 0.0696420813181653, "learning_rate": 3.1373244703509996e-07, "loss": 0.497, "step": 7294 }, { "epoch": 3.5451671732522794, "grad_norm": 0.07148950250450589, "learning_rate": 3.130652431513487e-07, "loss": 0.5304, "step": 7295 }, { "epoch": 3.5456534954407295, "grad_norm": 0.07056600888232763, "learning_rate": 3.1239872655335625e-07, "loss": 0.4983, "step": 7296 }, { "epoch": 3.546139817629179, "grad_norm": 0.07080671410213533, "learning_rate": 3.117328973388595e-07, "loss": 0.501, "step": 7297 }, { "epoch": 3.5466261398176293, "grad_norm": 0.06958819278284702, "learning_rate": 3.1106775560549473e-07, "loss": 0.4902, "step": 7298 }, { "epoch": 3.547112462006079, "grad_norm": 0.07069892602944006, "learning_rate": 3.104033014507968e-07, "loss": 0.495, "step": 7299 }, { "epoch": 3.5475987841945287, "grad_norm": 0.07165124410756647, "learning_rate": 3.09739534972201e-07, "loss": 0.5044, "step": 7300 }, { "epoch": 3.548085106382979, "grad_norm": 0.07255700556377429, "learning_rate": 3.0907645626704066e-07, "loss": 0.5294, "step": 7301 }, { "epoch": 3.5485714285714285, "grad_norm": 0.07089350357252436, "learning_rate": 3.0841406543254904e-07, "loss": 0.535, "step": 7302 }, { "epoch": 3.5490577507598786, "grad_norm": 0.07056033227481667, "learning_rate": 3.077523625658585e-07, "loss": 0.4949, "step": 7303 }, { "epoch": 3.5495440729483283, "grad_norm": 0.06987344071988595, "learning_rate": 3.0709134776399973e-07, "loss": 0.4992, "step": 7304 }, { "epoch": 3.550030395136778, "grad_norm": 0.07139241586942209, "learning_rate": 3.064310211239035e-07, "loss": 0.5283, "step": 7305 }, { "epoch": 3.550516717325228, "grad_norm": 0.06995996907834645, "learning_rate": 3.0577138274239913e-07, "loss": 0.5302, "step": 7306 }, { "epoch": 3.551003039513678, "grad_norm": 0.07210397608334039, "learning_rate": 3.0511243271621474e-07, "loss": 0.498, "step": 7307 }, { "epoch": 3.551489361702128, "grad_norm": 0.07764076830224335, "learning_rate": 3.044541711419774e-07, "loss": 0.5631, "step": 7308 }, { "epoch": 3.5519756838905776, "grad_norm": 0.07048568394689735, "learning_rate": 3.03796598116215e-07, "loss": 0.4868, "step": 7309 }, { "epoch": 3.5524620060790273, "grad_norm": 0.07073990252842037, "learning_rate": 3.0313971373535257e-07, "loss": 0.5133, "step": 7310 }, { "epoch": 3.552948328267477, "grad_norm": 0.06957175542958917, "learning_rate": 3.024835180957153e-07, "loss": 0.4561, "step": 7311 }, { "epoch": 3.553434650455927, "grad_norm": 0.07158292685283037, "learning_rate": 3.018280112935257e-07, "loss": 0.5156, "step": 7312 }, { "epoch": 3.553920972644377, "grad_norm": 0.07408736551579814, "learning_rate": 3.011731934249079e-07, "loss": 0.5567, "step": 7313 }, { "epoch": 3.554407294832827, "grad_norm": 0.07355718618666156, "learning_rate": 3.005190645858819e-07, "loss": 0.5476, "step": 7314 }, { "epoch": 3.5548936170212766, "grad_norm": 0.06999316579318449, "learning_rate": 2.998656248723686e-07, "loss": 0.4811, "step": 7315 }, { "epoch": 3.5553799392097263, "grad_norm": 0.07356577301068572, "learning_rate": 2.992128743801881e-07, "loss": 0.5477, "step": 7316 }, { "epoch": 3.5558662613981764, "grad_norm": 0.0709066460044624, "learning_rate": 2.985608132050588e-07, "loss": 0.4957, "step": 7317 }, { "epoch": 3.556352583586626, "grad_norm": 0.0737971307981053, "learning_rate": 2.9790944144259757e-07, "loss": 0.538, "step": 7318 }, { "epoch": 3.556838905775076, "grad_norm": 0.0704499061960886, "learning_rate": 2.9725875918832084e-07, "loss": 0.5193, "step": 7319 }, { "epoch": 3.557325227963526, "grad_norm": 0.07020270123634124, "learning_rate": 2.9660876653764435e-07, "loss": 0.4957, "step": 7320 }, { "epoch": 3.5578115501519756, "grad_norm": 0.07162127256981395, "learning_rate": 2.9595946358588144e-07, "loss": 0.5485, "step": 7321 }, { "epoch": 3.5582978723404253, "grad_norm": 0.07157321804593564, "learning_rate": 2.953108504282454e-07, "loss": 0.5151, "step": 7322 }, { "epoch": 3.5587841945288754, "grad_norm": 0.07160379601788354, "learning_rate": 2.9466292715984724e-07, "loss": 0.5226, "step": 7323 }, { "epoch": 3.559270516717325, "grad_norm": 0.06904822263240454, "learning_rate": 2.9401569387569885e-07, "loss": 0.4852, "step": 7324 }, { "epoch": 3.559756838905775, "grad_norm": 0.07243466734547406, "learning_rate": 2.933691506707087e-07, "loss": 0.531, "step": 7325 }, { "epoch": 3.560243161094225, "grad_norm": 0.06975049768054856, "learning_rate": 2.927232976396849e-07, "loss": 0.4899, "step": 7326 }, { "epoch": 3.5607294832826746, "grad_norm": 0.0716366338504042, "learning_rate": 2.9207813487733493e-07, "loss": 0.5255, "step": 7327 }, { "epoch": 3.5612158054711247, "grad_norm": 0.07091388921273582, "learning_rate": 2.91433662478266e-07, "loss": 0.4954, "step": 7328 }, { "epoch": 3.5617021276595744, "grad_norm": 0.07352913450742303, "learning_rate": 2.907898805369797e-07, "loss": 0.521, "step": 7329 }, { "epoch": 3.5621884498480245, "grad_norm": 0.07051134063364362, "learning_rate": 2.901467891478815e-07, "loss": 0.4929, "step": 7330 }, { "epoch": 3.562674772036474, "grad_norm": 0.06977906695245811, "learning_rate": 2.895043884052723e-07, "loss": 0.5167, "step": 7331 }, { "epoch": 3.563161094224924, "grad_norm": 0.07052924754278243, "learning_rate": 2.8886267840335326e-07, "loss": 0.4674, "step": 7332 }, { "epoch": 3.563647416413374, "grad_norm": 0.07141125812518652, "learning_rate": 2.8822165923622415e-07, "loss": 0.5193, "step": 7333 }, { "epoch": 3.5641337386018237, "grad_norm": 0.07141514804061745, "learning_rate": 2.8758133099788257e-07, "loss": 0.4861, "step": 7334 }, { "epoch": 3.564620060790274, "grad_norm": 0.0707225607741812, "learning_rate": 2.8694169378222614e-07, "loss": 0.5111, "step": 7335 }, { "epoch": 3.5651063829787235, "grad_norm": 0.07074623550470321, "learning_rate": 2.863027476830499e-07, "loss": 0.5257, "step": 7336 }, { "epoch": 3.565592705167173, "grad_norm": 0.07049540091389571, "learning_rate": 2.856644927940477e-07, "loss": 0.5098, "step": 7337 }, { "epoch": 3.566079027355623, "grad_norm": 0.07357890612930548, "learning_rate": 2.8502692920881314e-07, "loss": 0.5293, "step": 7338 }, { "epoch": 3.566565349544073, "grad_norm": 0.06851401088382048, "learning_rate": 2.8439005702083745e-07, "loss": 0.4753, "step": 7339 }, { "epoch": 3.5670516717325227, "grad_norm": 0.06934347719824543, "learning_rate": 2.837538763235104e-07, "loss": 0.504, "step": 7340 }, { "epoch": 3.567537993920973, "grad_norm": 0.07084414331060598, "learning_rate": 2.8311838721012117e-07, "loss": 0.5049, "step": 7341 }, { "epoch": 3.5680243161094225, "grad_norm": 0.07223718426749741, "learning_rate": 2.8248358977385647e-07, "loss": 0.4937, "step": 7342 }, { "epoch": 3.568510638297872, "grad_norm": 0.07148240198208974, "learning_rate": 2.8184948410780234e-07, "loss": 0.5267, "step": 7343 }, { "epoch": 3.5689969604863223, "grad_norm": 0.0686633788215377, "learning_rate": 2.8121607030494325e-07, "loss": 0.4858, "step": 7344 }, { "epoch": 3.569483282674772, "grad_norm": 0.07098816534537977, "learning_rate": 2.8058334845816214e-07, "loss": 0.5174, "step": 7345 }, { "epoch": 3.569969604863222, "grad_norm": 0.07012065138151906, "learning_rate": 2.7995131866024093e-07, "loss": 0.5128, "step": 7346 }, { "epoch": 3.570455927051672, "grad_norm": 0.07181431304103206, "learning_rate": 2.7931998100385826e-07, "loss": 0.5493, "step": 7347 }, { "epoch": 3.5709422492401215, "grad_norm": 0.07000204189012846, "learning_rate": 2.7868933558159393e-07, "loss": 0.5226, "step": 7348 }, { "epoch": 3.571428571428571, "grad_norm": 0.071025096261643, "learning_rate": 2.7805938248592456e-07, "loss": 0.542, "step": 7349 }, { "epoch": 3.5719148936170213, "grad_norm": 0.07075337073244381, "learning_rate": 2.7743012180922566e-07, "loss": 0.5179, "step": 7350 }, { "epoch": 3.572401215805471, "grad_norm": 0.07008650815964618, "learning_rate": 2.7680155364377073e-07, "loss": 0.499, "step": 7351 }, { "epoch": 3.572887537993921, "grad_norm": 0.0718683174311907, "learning_rate": 2.7617367808173256e-07, "loss": 0.5355, "step": 7352 }, { "epoch": 3.573373860182371, "grad_norm": 0.0706689430312276, "learning_rate": 2.7554649521518204e-07, "loss": 0.5189, "step": 7353 }, { "epoch": 3.5738601823708205, "grad_norm": 0.0702389835659637, "learning_rate": 2.749200051360884e-07, "loss": 0.5022, "step": 7354 }, { "epoch": 3.5743465045592706, "grad_norm": 0.07344524893557296, "learning_rate": 2.7429420793631924e-07, "loss": 0.5148, "step": 7355 }, { "epoch": 3.5748328267477203, "grad_norm": 0.07093299060198022, "learning_rate": 2.7366910370764e-07, "loss": 0.5154, "step": 7356 }, { "epoch": 3.5753191489361704, "grad_norm": 0.06930701045478772, "learning_rate": 2.7304469254171626e-07, "loss": 0.5013, "step": 7357 }, { "epoch": 3.57580547112462, "grad_norm": 0.07499797266616692, "learning_rate": 2.7242097453010984e-07, "loss": 0.5077, "step": 7358 }, { "epoch": 3.57629179331307, "grad_norm": 0.07034100300012767, "learning_rate": 2.7179794976428197e-07, "loss": 0.5023, "step": 7359 }, { "epoch": 3.57677811550152, "grad_norm": 0.0704858758123458, "learning_rate": 2.7117561833559293e-07, "loss": 0.4778, "step": 7360 }, { "epoch": 3.5772644376899696, "grad_norm": 0.0694951792810299, "learning_rate": 2.705539803353008e-07, "loss": 0.4869, "step": 7361 }, { "epoch": 3.5777507598784197, "grad_norm": 0.0700097501186522, "learning_rate": 2.699330358545599e-07, "loss": 0.4688, "step": 7362 }, { "epoch": 3.5782370820668694, "grad_norm": 0.07445755451914522, "learning_rate": 2.6931278498442625e-07, "loss": 0.5605, "step": 7363 }, { "epoch": 3.578723404255319, "grad_norm": 0.07057763626888834, "learning_rate": 2.686932278158516e-07, "loss": 0.4986, "step": 7364 }, { "epoch": 3.579209726443769, "grad_norm": 0.06905809186338163, "learning_rate": 2.680743644396883e-07, "loss": 0.4849, "step": 7365 }, { "epoch": 3.579696048632219, "grad_norm": 0.07077645054507545, "learning_rate": 2.6745619494668473e-07, "loss": 0.5161, "step": 7366 }, { "epoch": 3.5801823708206686, "grad_norm": 0.07061450291663751, "learning_rate": 2.668387194274885e-07, "loss": 0.4961, "step": 7367 }, { "epoch": 3.5806686930091187, "grad_norm": 0.07331287074668448, "learning_rate": 2.662219379726455e-07, "loss": 0.5272, "step": 7368 }, { "epoch": 3.5811550151975684, "grad_norm": 0.07255059252702477, "learning_rate": 2.6560585067259947e-07, "loss": 0.491, "step": 7369 }, { "epoch": 3.581641337386018, "grad_norm": 0.06985575383706202, "learning_rate": 2.649904576176932e-07, "loss": 0.465, "step": 7370 }, { "epoch": 3.582127659574468, "grad_norm": 0.0720242260152669, "learning_rate": 2.64375758898166e-07, "loss": 0.4901, "step": 7371 }, { "epoch": 3.582613981762918, "grad_norm": 0.07171093475434703, "learning_rate": 2.637617546041582e-07, "loss": 0.5016, "step": 7372 }, { "epoch": 3.583100303951368, "grad_norm": 0.0726937568670949, "learning_rate": 2.631484448257049e-07, "loss": 0.5284, "step": 7373 }, { "epoch": 3.5835866261398177, "grad_norm": 0.07183658239706207, "learning_rate": 2.6253582965274194e-07, "loss": 0.5035, "step": 7374 }, { "epoch": 3.5840729483282674, "grad_norm": 0.07423235951045551, "learning_rate": 2.6192390917510193e-07, "loss": 0.5371, "step": 7375 }, { "epoch": 3.584559270516717, "grad_norm": 0.07046762590420802, "learning_rate": 2.613126834825169e-07, "loss": 0.5689, "step": 7376 }, { "epoch": 3.585045592705167, "grad_norm": 0.07174241814782681, "learning_rate": 2.6070215266461474e-07, "loss": 0.5288, "step": 7377 }, { "epoch": 3.585531914893617, "grad_norm": 0.07064555403281207, "learning_rate": 2.6009231681092375e-07, "loss": 0.5057, "step": 7378 }, { "epoch": 3.586018237082067, "grad_norm": 0.07192003560825794, "learning_rate": 2.5948317601086905e-07, "loss": 0.5292, "step": 7379 }, { "epoch": 3.5865045592705167, "grad_norm": 0.07499300581435787, "learning_rate": 2.588747303537742e-07, "loss": 0.532, "step": 7380 }, { "epoch": 3.5869908814589664, "grad_norm": 0.07102887886070022, "learning_rate": 2.582669799288612e-07, "loss": 0.5318, "step": 7381 }, { "epoch": 3.5874772036474165, "grad_norm": 0.07089444182256209, "learning_rate": 2.5765992482524984e-07, "loss": 0.4865, "step": 7382 }, { "epoch": 3.587963525835866, "grad_norm": 0.07022833005819736, "learning_rate": 2.570535651319578e-07, "loss": 0.5586, "step": 7383 }, { "epoch": 3.5884498480243163, "grad_norm": 0.07050662195952102, "learning_rate": 2.5644790093790063e-07, "loss": 0.525, "step": 7384 }, { "epoch": 3.588936170212766, "grad_norm": 0.07280986380759177, "learning_rate": 2.5584293233189227e-07, "loss": 0.5203, "step": 7385 }, { "epoch": 3.5894224924012157, "grad_norm": 0.07027483080377553, "learning_rate": 2.5523865940264405e-07, "loss": 0.4961, "step": 7386 }, { "epoch": 3.589908814589666, "grad_norm": 0.06943702667867642, "learning_rate": 2.5463508223876663e-07, "loss": 0.5115, "step": 7387 }, { "epoch": 3.5903951367781155, "grad_norm": 0.06891489153963594, "learning_rate": 2.540322009287671e-07, "loss": 0.4773, "step": 7388 }, { "epoch": 3.590881458966565, "grad_norm": 0.07136935747486889, "learning_rate": 2.5343001556105087e-07, "loss": 0.533, "step": 7389 }, { "epoch": 3.5913677811550153, "grad_norm": 0.0696238531507961, "learning_rate": 2.528285262239233e-07, "loss": 0.4942, "step": 7390 }, { "epoch": 3.591854103343465, "grad_norm": 0.06957558191341166, "learning_rate": 2.5222773300558333e-07, "loss": 0.4924, "step": 7391 }, { "epoch": 3.5923404255319147, "grad_norm": 0.07390134284820647, "learning_rate": 2.516276359941322e-07, "loss": 0.5317, "step": 7392 }, { "epoch": 3.592826747720365, "grad_norm": 0.07053198466740905, "learning_rate": 2.510282352775667e-07, "loss": 0.4903, "step": 7393 }, { "epoch": 3.5933130699088145, "grad_norm": 0.0721362443692563, "learning_rate": 2.5042953094378263e-07, "loss": 0.4803, "step": 7394 }, { "epoch": 3.5937993920972646, "grad_norm": 0.06991093912445948, "learning_rate": 2.4983152308057255e-07, "loss": 0.4927, "step": 7395 }, { "epoch": 3.5942857142857143, "grad_norm": 0.06969785853854706, "learning_rate": 2.49234211775628e-07, "loss": 0.5184, "step": 7396 }, { "epoch": 3.594772036474164, "grad_norm": 0.07196577209444105, "learning_rate": 2.486375971165378e-07, "loss": 0.4988, "step": 7397 }, { "epoch": 3.595258358662614, "grad_norm": 0.07109875877628512, "learning_rate": 2.480416791907886e-07, "loss": 0.5006, "step": 7398 }, { "epoch": 3.595744680851064, "grad_norm": 0.07247700603611107, "learning_rate": 2.474464580857644e-07, "loss": 0.5486, "step": 7399 }, { "epoch": 3.596231003039514, "grad_norm": 0.07269753622871465, "learning_rate": 2.468519338887493e-07, "loss": 0.5443, "step": 7400 }, { "epoch": 3.5967173252279636, "grad_norm": 0.06837364821142979, "learning_rate": 2.462581066869224e-07, "loss": 0.5018, "step": 7401 }, { "epoch": 3.5972036474164133, "grad_norm": 0.07631546900480853, "learning_rate": 2.456649765673619e-07, "loss": 0.5691, "step": 7402 }, { "epoch": 3.597689969604863, "grad_norm": 0.07000531815975064, "learning_rate": 2.4507254361704314e-07, "loss": 0.5129, "step": 7403 }, { "epoch": 3.598176291793313, "grad_norm": 0.06757217200901171, "learning_rate": 2.444808079228406e-07, "loss": 0.5149, "step": 7404 }, { "epoch": 3.598662613981763, "grad_norm": 0.07296253687726682, "learning_rate": 2.438897695715253e-07, "loss": 0.5365, "step": 7405 }, { "epoch": 3.599148936170213, "grad_norm": 0.07088652055812886, "learning_rate": 2.432994286497653e-07, "loss": 0.5009, "step": 7406 }, { "epoch": 3.5996352583586626, "grad_norm": 0.07235536573855218, "learning_rate": 2.427097852441285e-07, "loss": 0.5372, "step": 7407 }, { "epoch": 3.6001215805471123, "grad_norm": 0.07394500721827958, "learning_rate": 2.42120839441079e-07, "loss": 0.4965, "step": 7408 }, { "epoch": 3.6006079027355624, "grad_norm": 0.07160582200171706, "learning_rate": 2.415325913269795e-07, "loss": 0.5119, "step": 7409 }, { "epoch": 3.601094224924012, "grad_norm": 0.06897702814331691, "learning_rate": 2.4094504098808866e-07, "loss": 0.4893, "step": 7410 }, { "epoch": 3.6015805471124622, "grad_norm": 0.07247169442824791, "learning_rate": 2.403581885105655e-07, "loss": 0.5441, "step": 7411 }, { "epoch": 3.602066869300912, "grad_norm": 0.07183658560987477, "learning_rate": 2.397720339804649e-07, "loss": 0.5198, "step": 7412 }, { "epoch": 3.6025531914893616, "grad_norm": 0.07276708641966347, "learning_rate": 2.3918657748373875e-07, "loss": 0.5024, "step": 7413 }, { "epoch": 3.6030395136778113, "grad_norm": 0.07014591244813682, "learning_rate": 2.386018191062389e-07, "loss": 0.5102, "step": 7414 }, { "epoch": 3.6035258358662614, "grad_norm": 0.07062201830488386, "learning_rate": 2.3801775893371293e-07, "loss": 0.4936, "step": 7415 }, { "epoch": 3.604012158054711, "grad_norm": 0.071802947917923, "learning_rate": 2.3743439705180725e-07, "loss": 0.527, "step": 7416 }, { "epoch": 3.604498480243161, "grad_norm": 0.06784027054515912, "learning_rate": 2.368517335460635e-07, "loss": 0.4961, "step": 7417 }, { "epoch": 3.604984802431611, "grad_norm": 0.06887758033600011, "learning_rate": 2.362697685019244e-07, "loss": 0.4993, "step": 7418 }, { "epoch": 3.6054711246200606, "grad_norm": 0.0748083992245796, "learning_rate": 2.3568850200472838e-07, "loss": 0.5514, "step": 7419 }, { "epoch": 3.6059574468085107, "grad_norm": 0.07297176482559332, "learning_rate": 2.3510793413971167e-07, "loss": 0.4907, "step": 7420 }, { "epoch": 3.6064437689969604, "grad_norm": 0.0688853916093152, "learning_rate": 2.3452806499200675e-07, "loss": 0.5083, "step": 7421 }, { "epoch": 3.6069300911854105, "grad_norm": 0.07102867200205872, "learning_rate": 2.339488946466456e-07, "loss": 0.5431, "step": 7422 }, { "epoch": 3.60741641337386, "grad_norm": 0.07223226914649179, "learning_rate": 2.3337042318855695e-07, "loss": 0.5469, "step": 7423 }, { "epoch": 3.60790273556231, "grad_norm": 0.07277685605012142, "learning_rate": 2.3279265070256741e-07, "loss": 0.5139, "step": 7424 }, { "epoch": 3.60838905775076, "grad_norm": 0.07142077991011306, "learning_rate": 2.3221557727340026e-07, "loss": 0.5004, "step": 7425 }, { "epoch": 3.6088753799392097, "grad_norm": 0.0720272062001332, "learning_rate": 2.3163920298567677e-07, "loss": 0.5432, "step": 7426 }, { "epoch": 3.60936170212766, "grad_norm": 0.07274183132731583, "learning_rate": 2.3106352792391595e-07, "loss": 0.5265, "step": 7427 }, { "epoch": 3.6098480243161095, "grad_norm": 0.06988868757771198, "learning_rate": 2.3048855217253363e-07, "loss": 0.4977, "step": 7428 }, { "epoch": 3.610334346504559, "grad_norm": 0.07442318782605123, "learning_rate": 2.2991427581584402e-07, "loss": 0.5421, "step": 7429 }, { "epoch": 3.610820668693009, "grad_norm": 0.07098783879347895, "learning_rate": 2.293406989380581e-07, "loss": 0.5061, "step": 7430 }, { "epoch": 3.611306990881459, "grad_norm": 0.06958367767745825, "learning_rate": 2.2876782162328415e-07, "loss": 0.5026, "step": 7431 }, { "epoch": 3.6117933130699087, "grad_norm": 0.06951823437829387, "learning_rate": 2.281956439555283e-07, "loss": 0.526, "step": 7432 }, { "epoch": 3.612279635258359, "grad_norm": 0.06748836588914339, "learning_rate": 2.276241660186934e-07, "loss": 0.4738, "step": 7433 }, { "epoch": 3.6127659574468085, "grad_norm": 0.07240365452193252, "learning_rate": 2.2705338789658082e-07, "loss": 0.5397, "step": 7434 }, { "epoch": 3.613252279635258, "grad_norm": 0.0695118761076568, "learning_rate": 2.2648330967288857e-07, "loss": 0.5136, "step": 7435 }, { "epoch": 3.6137386018237083, "grad_norm": 0.07266416395887992, "learning_rate": 2.25913931431212e-07, "loss": 0.5085, "step": 7436 }, { "epoch": 3.614224924012158, "grad_norm": 0.07251594605067847, "learning_rate": 2.253452532550443e-07, "loss": 0.5412, "step": 7437 }, { "epoch": 3.614711246200608, "grad_norm": 0.07268244260928317, "learning_rate": 2.247772752277755e-07, "loss": 0.5122, "step": 7438 }, { "epoch": 3.615197568389058, "grad_norm": 0.07528286822987387, "learning_rate": 2.242099974326928e-07, "loss": 0.5839, "step": 7439 }, { "epoch": 3.6156838905775075, "grad_norm": 0.06895527780745686, "learning_rate": 2.2364341995298133e-07, "loss": 0.4772, "step": 7440 }, { "epoch": 3.616170212765957, "grad_norm": 0.07364367874106896, "learning_rate": 2.2307754287172302e-07, "loss": 0.5136, "step": 7441 }, { "epoch": 3.6166565349544073, "grad_norm": 0.07035932283324323, "learning_rate": 2.2251236627189753e-07, "loss": 0.5142, "step": 7442 }, { "epoch": 3.617142857142857, "grad_norm": 0.07032631320183147, "learning_rate": 2.2194789023638143e-07, "loss": 0.5046, "step": 7443 }, { "epoch": 3.617629179331307, "grad_norm": 0.0715205283156684, "learning_rate": 2.2138411484794953e-07, "loss": 0.4893, "step": 7444 }, { "epoch": 3.618115501519757, "grad_norm": 0.06917054097695331, "learning_rate": 2.2082104018927187e-07, "loss": 0.495, "step": 7445 }, { "epoch": 3.6186018237082065, "grad_norm": 0.07030850935382998, "learning_rate": 2.2025866634291736e-07, "loss": 0.5284, "step": 7446 }, { "epoch": 3.6190881458966566, "grad_norm": 0.07325364317153965, "learning_rate": 2.1969699339135232e-07, "loss": 0.4958, "step": 7447 }, { "epoch": 3.6195744680851063, "grad_norm": 0.07020402839254393, "learning_rate": 2.1913602141693914e-07, "loss": 0.4822, "step": 7448 }, { "epoch": 3.6200607902735564, "grad_norm": 0.06963284597459264, "learning_rate": 2.1857575050193757e-07, "loss": 0.4835, "step": 7449 }, { "epoch": 3.620547112462006, "grad_norm": 0.0691059181677648, "learning_rate": 2.1801618072850639e-07, "loss": 0.4757, "step": 7450 }, { "epoch": 3.621033434650456, "grad_norm": 0.1402888849727775, "learning_rate": 2.174573121786988e-07, "loss": 0.5087, "step": 7451 }, { "epoch": 3.621519756838906, "grad_norm": 0.07348022933877976, "learning_rate": 2.1689914493446706e-07, "loss": 0.518, "step": 7452 }, { "epoch": 3.6220060790273556, "grad_norm": 0.06882365623797035, "learning_rate": 2.1634167907766013e-07, "loss": 0.4951, "step": 7453 }, { "epoch": 3.6224924012158057, "grad_norm": 0.0705265521251137, "learning_rate": 2.1578491469002372e-07, "loss": 0.4737, "step": 7454 }, { "epoch": 3.6229787234042554, "grad_norm": 0.07046128277836229, "learning_rate": 2.1522885185320087e-07, "loss": 0.5104, "step": 7455 }, { "epoch": 3.623465045592705, "grad_norm": 0.07336965831753864, "learning_rate": 2.14673490648733e-07, "loss": 0.5429, "step": 7456 }, { "epoch": 3.623951367781155, "grad_norm": 0.0725408501616935, "learning_rate": 2.141188311580561e-07, "loss": 0.5264, "step": 7457 }, { "epoch": 3.624437689969605, "grad_norm": 0.07432858153177303, "learning_rate": 2.1356487346250565e-07, "loss": 0.5761, "step": 7458 }, { "epoch": 3.6249240121580546, "grad_norm": 0.07124357527045246, "learning_rate": 2.130116176433128e-07, "loss": 0.5113, "step": 7459 }, { "epoch": 3.6254103343465047, "grad_norm": 0.07204968916253275, "learning_rate": 2.1245906378160653e-07, "loss": 0.5435, "step": 7460 }, { "epoch": 3.6258966565349544, "grad_norm": 0.07539146703039709, "learning_rate": 2.1190721195841258e-07, "loss": 0.5377, "step": 7461 }, { "epoch": 3.626382978723404, "grad_norm": 0.07215788789355801, "learning_rate": 2.1135606225465343e-07, "loss": 0.5342, "step": 7462 }, { "epoch": 3.626869300911854, "grad_norm": 0.0712556716268439, "learning_rate": 2.1080561475114891e-07, "loss": 0.5063, "step": 7463 }, { "epoch": 3.627355623100304, "grad_norm": 0.06997387064185723, "learning_rate": 2.1025586952861608e-07, "loss": 0.5159, "step": 7464 }, { "epoch": 3.627841945288754, "grad_norm": 0.07428476347166349, "learning_rate": 2.0970682666766884e-07, "loss": 0.5314, "step": 7465 }, { "epoch": 3.6283282674772037, "grad_norm": 0.0723191926741368, "learning_rate": 2.091584862488183e-07, "loss": 0.5502, "step": 7466 }, { "epoch": 3.6288145896656534, "grad_norm": 0.07074671122403996, "learning_rate": 2.0861084835247237e-07, "loss": 0.5088, "step": 7467 }, { "epoch": 3.629300911854103, "grad_norm": 0.07326909417220616, "learning_rate": 2.0806391305893568e-07, "loss": 0.5288, "step": 7468 }, { "epoch": 3.629787234042553, "grad_norm": 0.06985561967981996, "learning_rate": 2.0751768044841027e-07, "loss": 0.4751, "step": 7469 }, { "epoch": 3.630273556231003, "grad_norm": 0.0699171773158737, "learning_rate": 2.0697215060099417e-07, "loss": 0.4986, "step": 7470 }, { "epoch": 3.630759878419453, "grad_norm": 0.06846680490778347, "learning_rate": 2.0642732359668294e-07, "loss": 0.4787, "step": 7471 }, { "epoch": 3.6312462006079027, "grad_norm": 0.0705745900938879, "learning_rate": 2.0588319951537095e-07, "loss": 0.5104, "step": 7472 }, { "epoch": 3.6317325227963524, "grad_norm": 0.06908864481648404, "learning_rate": 2.0533977843684716e-07, "loss": 0.4798, "step": 7473 }, { "epoch": 3.6322188449848025, "grad_norm": 0.0703322752196678, "learning_rate": 2.0479706044079784e-07, "loss": 0.5262, "step": 7474 }, { "epoch": 3.632705167173252, "grad_norm": 0.0686731730665516, "learning_rate": 2.0425504560680654e-07, "loss": 0.4865, "step": 7475 }, { "epoch": 3.6331914893617023, "grad_norm": 0.07202493190236539, "learning_rate": 2.03713734014353e-07, "loss": 0.5308, "step": 7476 }, { "epoch": 3.633677811550152, "grad_norm": 0.06979792559986302, "learning_rate": 2.0317312574281544e-07, "loss": 0.4934, "step": 7477 }, { "epoch": 3.6341641337386017, "grad_norm": 0.07473781976748568, "learning_rate": 2.0263322087146708e-07, "loss": 0.5192, "step": 7478 }, { "epoch": 3.634650455927052, "grad_norm": 0.0703706668541725, "learning_rate": 2.020940194794796e-07, "loss": 0.5048, "step": 7479 }, { "epoch": 3.6351367781155015, "grad_norm": 0.06992478243465769, "learning_rate": 2.015555216459203e-07, "loss": 0.5145, "step": 7480 }, { "epoch": 3.6356231003039516, "grad_norm": 0.07310334378802451, "learning_rate": 2.0101772744975324e-07, "loss": 0.4975, "step": 7481 }, { "epoch": 3.6361094224924013, "grad_norm": 0.06962887525056555, "learning_rate": 2.0048063696984088e-07, "loss": 0.4973, "step": 7482 }, { "epoch": 3.636595744680851, "grad_norm": 0.07252029821221445, "learning_rate": 1.9994425028494137e-07, "loss": 0.5351, "step": 7483 }, { "epoch": 3.6370820668693007, "grad_norm": 0.07624364882914925, "learning_rate": 1.9940856747370895e-07, "loss": 0.53, "step": 7484 }, { "epoch": 3.637568389057751, "grad_norm": 0.07283865369015942, "learning_rate": 1.988735886146953e-07, "loss": 0.5472, "step": 7485 }, { "epoch": 3.6380547112462005, "grad_norm": 0.06889612348561956, "learning_rate": 1.9833931378634985e-07, "loss": 0.4637, "step": 7486 }, { "epoch": 3.6385410334346506, "grad_norm": 0.06862095008992869, "learning_rate": 1.9780574306701715e-07, "loss": 0.5111, "step": 7487 }, { "epoch": 3.6390273556231003, "grad_norm": 0.07006805649849787, "learning_rate": 1.972728765349402e-07, "loss": 0.4975, "step": 7488 }, { "epoch": 3.63951367781155, "grad_norm": 0.11638624971168805, "learning_rate": 1.9674071426825647e-07, "loss": 0.5266, "step": 7489 }, { "epoch": 3.64, "grad_norm": 0.07362129423013457, "learning_rate": 1.96209256345003e-07, "loss": 0.5358, "step": 7490 }, { "epoch": 3.64048632218845, "grad_norm": 0.07234328561646522, "learning_rate": 1.9567850284311185e-07, "loss": 0.5348, "step": 7491 }, { "epoch": 3.6409726443769, "grad_norm": 0.07036638198076449, "learning_rate": 1.9514845384041081e-07, "loss": 0.5021, "step": 7492 }, { "epoch": 3.6414589665653496, "grad_norm": 0.07183510166582527, "learning_rate": 1.9461910941462657e-07, "loss": 0.5171, "step": 7493 }, { "epoch": 3.6419452887537993, "grad_norm": 0.07121910445789473, "learning_rate": 1.9409046964338152e-07, "loss": 0.5131, "step": 7494 }, { "epoch": 3.642431610942249, "grad_norm": 0.07123112984882467, "learning_rate": 1.9356253460419416e-07, "loss": 0.5231, "step": 7495 }, { "epoch": 3.642917933130699, "grad_norm": 0.07230851178721137, "learning_rate": 1.9303530437448036e-07, "loss": 0.5285, "step": 7496 }, { "epoch": 3.643404255319149, "grad_norm": 0.07180968000417191, "learning_rate": 1.9250877903155329e-07, "loss": 0.5605, "step": 7497 }, { "epoch": 3.643890577507599, "grad_norm": 0.07193875215347835, "learning_rate": 1.9198295865262063e-07, "loss": 0.5146, "step": 7498 }, { "epoch": 3.6443768996960486, "grad_norm": 0.07302790121597812, "learning_rate": 1.914578433147879e-07, "loss": 0.5255, "step": 7499 }, { "epoch": 3.6448632218844983, "grad_norm": 0.07130233395149034, "learning_rate": 1.9093343309505797e-07, "loss": 0.5055, "step": 7500 }, { "epoch": 3.6453495440729484, "grad_norm": 0.07203586544334373, "learning_rate": 1.9040972807032988e-07, "loss": 0.5238, "step": 7501 }, { "epoch": 3.645835866261398, "grad_norm": 0.0728670850843985, "learning_rate": 1.8988672831739828e-07, "loss": 0.5151, "step": 7502 }, { "epoch": 3.6463221884498482, "grad_norm": 0.07058333820381851, "learning_rate": 1.8936443391295578e-07, "loss": 0.484, "step": 7503 }, { "epoch": 3.646808510638298, "grad_norm": 0.06888119281513207, "learning_rate": 1.888428449335905e-07, "loss": 0.4772, "step": 7504 }, { "epoch": 3.6472948328267476, "grad_norm": 0.07005706663977893, "learning_rate": 1.883219614557874e-07, "loss": 0.4864, "step": 7505 }, { "epoch": 3.6477811550151977, "grad_norm": 0.07017465080421058, "learning_rate": 1.878017835559287e-07, "loss": 0.4854, "step": 7506 }, { "epoch": 3.6482674772036474, "grad_norm": 0.069891375391845, "learning_rate": 1.872823113102923e-07, "loss": 0.4784, "step": 7507 }, { "epoch": 3.6487537993920975, "grad_norm": 0.07324580049276005, "learning_rate": 1.867635447950522e-07, "loss": 0.5448, "step": 7508 }, { "epoch": 3.6492401215805472, "grad_norm": 0.07277742233402125, "learning_rate": 1.8624548408628152e-07, "loss": 0.5059, "step": 7509 }, { "epoch": 3.649726443768997, "grad_norm": 0.07001265792556749, "learning_rate": 1.857281292599461e-07, "loss": 0.5018, "step": 7510 }, { "epoch": 3.6502127659574466, "grad_norm": 0.06979377235499186, "learning_rate": 1.852114803919114e-07, "loss": 0.4886, "step": 7511 }, { "epoch": 3.6506990881458967, "grad_norm": 0.06877636540563002, "learning_rate": 1.846955375579379e-07, "loss": 0.4833, "step": 7512 }, { "epoch": 3.6511854103343464, "grad_norm": 0.07306061608472722, "learning_rate": 1.8418030083368178e-07, "loss": 0.5144, "step": 7513 }, { "epoch": 3.6516717325227965, "grad_norm": 0.06852282899311782, "learning_rate": 1.8366577029469701e-07, "loss": 0.5075, "step": 7514 }, { "epoch": 3.652158054711246, "grad_norm": 0.07112433008367616, "learning_rate": 1.8315194601643439e-07, "loss": 0.5191, "step": 7515 }, { "epoch": 3.652644376899696, "grad_norm": 0.06898192783108989, "learning_rate": 1.8263882807423972e-07, "loss": 0.4863, "step": 7516 }, { "epoch": 3.653130699088146, "grad_norm": 0.0690880864184972, "learning_rate": 1.8212641654335618e-07, "loss": 0.4798, "step": 7517 }, { "epoch": 3.6536170212765957, "grad_norm": 0.07184164267834733, "learning_rate": 1.8161471149892306e-07, "loss": 0.5303, "step": 7518 }, { "epoch": 3.654103343465046, "grad_norm": 0.0739491901871606, "learning_rate": 1.8110371301597596e-07, "loss": 0.5295, "step": 7519 }, { "epoch": 3.6545896656534955, "grad_norm": 0.07362011800370305, "learning_rate": 1.8059342116944711e-07, "loss": 0.5315, "step": 7520 }, { "epoch": 3.655075987841945, "grad_norm": 0.07119662997312755, "learning_rate": 1.8008383603416558e-07, "loss": 0.5241, "step": 7521 }, { "epoch": 3.655562310030395, "grad_norm": 0.07032375495360829, "learning_rate": 1.7957495768485543e-07, "loss": 0.5152, "step": 7522 }, { "epoch": 3.656048632218845, "grad_norm": 0.07334274070229832, "learning_rate": 1.7906678619613814e-07, "loss": 0.5285, "step": 7523 }, { "epoch": 3.6565349544072947, "grad_norm": 0.07324739680703293, "learning_rate": 1.7855932164253133e-07, "loss": 0.5604, "step": 7524 }, { "epoch": 3.657021276595745, "grad_norm": 0.07191865658351163, "learning_rate": 1.7805256409844873e-07, "loss": 0.5109, "step": 7525 }, { "epoch": 3.6575075987841945, "grad_norm": 0.06996314492438994, "learning_rate": 1.7754651363820042e-07, "loss": 0.5038, "step": 7526 }, { "epoch": 3.657993920972644, "grad_norm": 0.0702110192894247, "learning_rate": 1.7704117033599477e-07, "loss": 0.4843, "step": 7527 }, { "epoch": 3.6584802431610943, "grad_norm": 0.07133565464454686, "learning_rate": 1.7653653426593197e-07, "loss": 0.5104, "step": 7528 }, { "epoch": 3.658966565349544, "grad_norm": 0.07254986884328445, "learning_rate": 1.7603260550201284e-07, "loss": 0.5075, "step": 7529 }, { "epoch": 3.659452887537994, "grad_norm": 0.07234594342644482, "learning_rate": 1.7552938411813214e-07, "loss": 0.494, "step": 7530 }, { "epoch": 3.659939209726444, "grad_norm": 0.06997898847699532, "learning_rate": 1.750268701880814e-07, "loss": 0.5021, "step": 7531 }, { "epoch": 3.6604255319148935, "grad_norm": 0.06841732536213396, "learning_rate": 1.7452506378554945e-07, "loss": 0.5122, "step": 7532 }, { "epoch": 3.660911854103343, "grad_norm": 0.0715954332291871, "learning_rate": 1.7402396498411967e-07, "loss": 0.5223, "step": 7533 }, { "epoch": 3.6613981762917933, "grad_norm": 0.07156559770059086, "learning_rate": 1.7352357385727326e-07, "loss": 0.5058, "step": 7534 }, { "epoch": 3.661884498480243, "grad_norm": 0.07099284531232679, "learning_rate": 1.7302389047838597e-07, "loss": 0.4979, "step": 7535 }, { "epoch": 3.662370820668693, "grad_norm": 0.07162207060421072, "learning_rate": 1.7252491492073143e-07, "loss": 0.5191, "step": 7536 }, { "epoch": 3.662857142857143, "grad_norm": 0.07145515138662749, "learning_rate": 1.7202664725747885e-07, "loss": 0.523, "step": 7537 }, { "epoch": 3.6633434650455925, "grad_norm": 0.07041061927338907, "learning_rate": 1.715290875616926e-07, "loss": 0.484, "step": 7538 }, { "epoch": 3.6638297872340426, "grad_norm": 0.07289429657436966, "learning_rate": 1.7103223590633489e-07, "loss": 0.5221, "step": 7539 }, { "epoch": 3.6643161094224923, "grad_norm": 0.0698684944550393, "learning_rate": 1.705360923642635e-07, "loss": 0.5334, "step": 7540 }, { "epoch": 3.6648024316109424, "grad_norm": 0.06973663585020563, "learning_rate": 1.7004065700823192e-07, "loss": 0.4884, "step": 7541 }, { "epoch": 3.665288753799392, "grad_norm": 0.06965277173611224, "learning_rate": 1.6954592991088982e-07, "loss": 0.4983, "step": 7542 }, { "epoch": 3.665775075987842, "grad_norm": 0.0712242615242087, "learning_rate": 1.6905191114478415e-07, "loss": 0.5297, "step": 7543 }, { "epoch": 3.666261398176292, "grad_norm": 0.06993880782261867, "learning_rate": 1.6855860078235642e-07, "loss": 0.483, "step": 7544 }, { "epoch": 3.6667477203647416, "grad_norm": 0.07168190079168284, "learning_rate": 1.6806599889594488e-07, "loss": 0.5286, "step": 7545 }, { "epoch": 3.6672340425531917, "grad_norm": 0.0714318564216297, "learning_rate": 1.6757410555778454e-07, "loss": 0.5334, "step": 7546 }, { "epoch": 3.6677203647416414, "grad_norm": 0.06996352764558236, "learning_rate": 1.67082920840006e-07, "loss": 0.4922, "step": 7547 }, { "epoch": 3.668206686930091, "grad_norm": 0.07091207684774888, "learning_rate": 1.6659244481463553e-07, "loss": 0.4977, "step": 7548 }, { "epoch": 3.668693009118541, "grad_norm": 0.07038717321528706, "learning_rate": 1.661026775535962e-07, "loss": 0.4868, "step": 7549 }, { "epoch": 3.669179331306991, "grad_norm": 0.06978806215257613, "learning_rate": 1.6561361912870667e-07, "loss": 0.4914, "step": 7550 }, { "epoch": 3.6696656534954406, "grad_norm": 0.07232564978162159, "learning_rate": 1.6512526961168173e-07, "loss": 0.5275, "step": 7551 }, { "epoch": 3.6701519756838907, "grad_norm": 0.0695353219389521, "learning_rate": 1.646376290741325e-07, "loss": 0.5272, "step": 7552 }, { "epoch": 3.6706382978723404, "grad_norm": 0.07076372926681185, "learning_rate": 1.6415069758756564e-07, "loss": 0.5244, "step": 7553 }, { "epoch": 3.67112462006079, "grad_norm": 0.06975678337646281, "learning_rate": 1.636644752233846e-07, "loss": 0.4836, "step": 7554 }, { "epoch": 3.6716109422492402, "grad_norm": 0.07084517689200552, "learning_rate": 1.631789620528873e-07, "loss": 0.5405, "step": 7555 }, { "epoch": 3.67209726443769, "grad_norm": 0.06926883388737212, "learning_rate": 1.6269415814727018e-07, "loss": 0.4942, "step": 7556 }, { "epoch": 3.67258358662614, "grad_norm": 0.07095701886476784, "learning_rate": 1.6221006357762304e-07, "loss": 0.5147, "step": 7557 }, { "epoch": 3.6730699088145897, "grad_norm": 0.06857787415435455, "learning_rate": 1.6172667841493351e-07, "loss": 0.5008, "step": 7558 }, { "epoch": 3.6735562310030394, "grad_norm": 0.07206429092235508, "learning_rate": 1.6124400273008434e-07, "loss": 0.5125, "step": 7559 }, { "epoch": 3.674042553191489, "grad_norm": 0.07299348530193564, "learning_rate": 1.6076203659385503e-07, "loss": 0.5021, "step": 7560 }, { "epoch": 3.674528875379939, "grad_norm": 0.06916757741865318, "learning_rate": 1.6028078007691962e-07, "loss": 0.521, "step": 7561 }, { "epoch": 3.675015197568389, "grad_norm": 0.07063395884421529, "learning_rate": 1.598002332498483e-07, "loss": 0.5087, "step": 7562 }, { "epoch": 3.675501519756839, "grad_norm": 0.07063914656245275, "learning_rate": 1.5932039618310913e-07, "loss": 0.5236, "step": 7563 }, { "epoch": 3.6759878419452887, "grad_norm": 0.07278125313816183, "learning_rate": 1.588412689470642e-07, "loss": 0.5291, "step": 7564 }, { "epoch": 3.6764741641337384, "grad_norm": 0.06915412333874485, "learning_rate": 1.583628516119723e-07, "loss": 0.5126, "step": 7565 }, { "epoch": 3.6769604863221885, "grad_norm": 0.07255686904347249, "learning_rate": 1.5788514424798785e-07, "loss": 0.5161, "step": 7566 }, { "epoch": 3.677446808510638, "grad_norm": 0.07220331901678488, "learning_rate": 1.574081469251615e-07, "loss": 0.5014, "step": 7567 }, { "epoch": 3.6779331306990883, "grad_norm": 0.07088416458188823, "learning_rate": 1.5693185971343895e-07, "loss": 0.4972, "step": 7568 }, { "epoch": 3.678419452887538, "grad_norm": 0.07159320842660802, "learning_rate": 1.564562826826621e-07, "loss": 0.5075, "step": 7569 }, { "epoch": 3.6789057750759877, "grad_norm": 0.07133548899316397, "learning_rate": 1.5598141590256966e-07, "loss": 0.5155, "step": 7570 }, { "epoch": 3.679392097264438, "grad_norm": 0.07148643741355755, "learning_rate": 1.5550725944279476e-07, "loss": 0.4651, "step": 7571 }, { "epoch": 3.6798784194528875, "grad_norm": 0.07042599246655751, "learning_rate": 1.550338133728674e-07, "loss": 0.4801, "step": 7572 }, { "epoch": 3.6803647416413376, "grad_norm": 0.07010151777419237, "learning_rate": 1.5456107776221363e-07, "loss": 0.4825, "step": 7573 }, { "epoch": 3.6808510638297873, "grad_norm": 0.0692687248738914, "learning_rate": 1.5408905268015361e-07, "loss": 0.4857, "step": 7574 }, { "epoch": 3.681337386018237, "grad_norm": 0.07088841546605369, "learning_rate": 1.5361773819590585e-07, "loss": 0.4897, "step": 7575 }, { "epoch": 3.6818237082066867, "grad_norm": 0.0708516927536673, "learning_rate": 1.5314713437858174e-07, "loss": 0.5234, "step": 7576 }, { "epoch": 3.682310030395137, "grad_norm": 0.07051250862511367, "learning_rate": 1.5267724129719108e-07, "loss": 0.4981, "step": 7577 }, { "epoch": 3.6827963525835865, "grad_norm": 0.07274738439743421, "learning_rate": 1.5220805902063762e-07, "loss": 0.4884, "step": 7578 }, { "epoch": 3.6832826747720366, "grad_norm": 0.07226312187858681, "learning_rate": 1.5173958761772246e-07, "loss": 0.5242, "step": 7579 }, { "epoch": 3.6837689969604863, "grad_norm": 0.06949428643138196, "learning_rate": 1.5127182715714006e-07, "loss": 0.5074, "step": 7580 }, { "epoch": 3.684255319148936, "grad_norm": 0.07101417643479951, "learning_rate": 1.5080477770748392e-07, "loss": 0.5152, "step": 7581 }, { "epoch": 3.684741641337386, "grad_norm": 0.07034009047803332, "learning_rate": 1.503384393372409e-07, "loss": 0.4962, "step": 7582 }, { "epoch": 3.685227963525836, "grad_norm": 0.07060814694846111, "learning_rate": 1.4987281211479466e-07, "loss": 0.4946, "step": 7583 }, { "epoch": 3.685714285714286, "grad_norm": 0.07068001761701923, "learning_rate": 1.4940789610842332e-07, "loss": 0.5521, "step": 7584 }, { "epoch": 3.6862006079027356, "grad_norm": 0.07075723212084156, "learning_rate": 1.4894369138630182e-07, "loss": 0.4936, "step": 7585 }, { "epoch": 3.6866869300911853, "grad_norm": 0.07259529763174113, "learning_rate": 1.484801980165007e-07, "loss": 0.4844, "step": 7586 }, { "epoch": 3.687173252279635, "grad_norm": 0.0721453433478143, "learning_rate": 1.480174160669856e-07, "loss": 0.5082, "step": 7587 }, { "epoch": 3.687659574468085, "grad_norm": 0.0704804282711398, "learning_rate": 1.475553456056189e-07, "loss": 0.5209, "step": 7588 }, { "epoch": 3.688145896656535, "grad_norm": 0.06964632010285582, "learning_rate": 1.4709398670015752e-07, "loss": 0.4896, "step": 7589 }, { "epoch": 3.688632218844985, "grad_norm": 0.07000606698519893, "learning_rate": 1.4663333941825452e-07, "loss": 0.5161, "step": 7590 }, { "epoch": 3.6891185410334346, "grad_norm": 0.07251020833609347, "learning_rate": 1.461734038274587e-07, "loss": 0.4848, "step": 7591 }, { "epoch": 3.6896048632218843, "grad_norm": 0.07640300754390118, "learning_rate": 1.4571417999521442e-07, "loss": 0.5622, "step": 7592 }, { "epoch": 3.6900911854103344, "grad_norm": 0.07157233023252813, "learning_rate": 1.4525566798886115e-07, "loss": 0.5161, "step": 7593 }, { "epoch": 3.690577507598784, "grad_norm": 0.07105282903720113, "learning_rate": 1.4479786787563565e-07, "loss": 0.5088, "step": 7594 }, { "epoch": 3.6910638297872342, "grad_norm": 0.07691002610042437, "learning_rate": 1.4434077972266757e-07, "loss": 0.5577, "step": 7595 }, { "epoch": 3.691550151975684, "grad_norm": 0.07154039785140516, "learning_rate": 1.4388440359698496e-07, "loss": 0.5417, "step": 7596 }, { "epoch": 3.6920364741641336, "grad_norm": 0.07288494659992656, "learning_rate": 1.4342873956550928e-07, "loss": 0.5267, "step": 7597 }, { "epoch": 3.6925227963525837, "grad_norm": 0.07134174873389273, "learning_rate": 1.4297378769505876e-07, "loss": 0.5447, "step": 7598 }, { "epoch": 3.6930091185410334, "grad_norm": 0.07465082134896685, "learning_rate": 1.425195480523478e-07, "loss": 0.5342, "step": 7599 }, { "epoch": 3.6934954407294835, "grad_norm": 0.07066529622833347, "learning_rate": 1.4206602070398424e-07, "loss": 0.5055, "step": 7600 }, { "epoch": 3.6939817629179332, "grad_norm": 0.07340587027710287, "learning_rate": 1.4161320571647374e-07, "loss": 0.5202, "step": 7601 }, { "epoch": 3.694468085106383, "grad_norm": 0.07252197267251007, "learning_rate": 1.4116110315621546e-07, "loss": 0.5129, "step": 7602 }, { "epoch": 3.6949544072948326, "grad_norm": 0.07066841715628236, "learning_rate": 1.4070971308950577e-07, "loss": 0.4969, "step": 7603 }, { "epoch": 3.6954407294832827, "grad_norm": 0.07080447583813533, "learning_rate": 1.4025903558253673e-07, "loss": 0.5166, "step": 7604 }, { "epoch": 3.6959270516717324, "grad_norm": 0.06927407891020573, "learning_rate": 1.3980907070139328e-07, "loss": 0.4804, "step": 7605 }, { "epoch": 3.6964133738601825, "grad_norm": 0.07047311460358835, "learning_rate": 1.3935981851205815e-07, "loss": 0.5028, "step": 7606 }, { "epoch": 3.6968996960486322, "grad_norm": 0.0701806178313648, "learning_rate": 1.389112790804098e-07, "loss": 0.5085, "step": 7607 }, { "epoch": 3.697386018237082, "grad_norm": 0.07068318754242246, "learning_rate": 1.3846345247222115e-07, "loss": 0.4903, "step": 7608 }, { "epoch": 3.697872340425532, "grad_norm": 0.0732179315736856, "learning_rate": 1.3801633875316078e-07, "loss": 0.5198, "step": 7609 }, { "epoch": 3.6983586626139817, "grad_norm": 0.06900913217367231, "learning_rate": 1.3756993798879237e-07, "loss": 0.4831, "step": 7610 }, { "epoch": 3.698844984802432, "grad_norm": 0.06800958306976836, "learning_rate": 1.3712425024457633e-07, "loss": 0.4504, "step": 7611 }, { "epoch": 3.6993313069908815, "grad_norm": 0.07270882847618375, "learning_rate": 1.3667927558586756e-07, "loss": 0.4912, "step": 7612 }, { "epoch": 3.699817629179331, "grad_norm": 0.07359378423019014, "learning_rate": 1.3623501407791618e-07, "loss": 0.5063, "step": 7613 }, { "epoch": 3.700303951367781, "grad_norm": 0.06995003341319025, "learning_rate": 1.3579146578586832e-07, "loss": 0.4909, "step": 7614 }, { "epoch": 3.700790273556231, "grad_norm": 0.07064228266289245, "learning_rate": 1.3534863077476535e-07, "loss": 0.4971, "step": 7615 }, { "epoch": 3.7012765957446807, "grad_norm": 0.07338589569601295, "learning_rate": 1.3490650910954306e-07, "loss": 0.5741, "step": 7616 }, { "epoch": 3.701762917933131, "grad_norm": 0.06836310783173091, "learning_rate": 1.3446510085503516e-07, "loss": 0.4851, "step": 7617 }, { "epoch": 3.7022492401215805, "grad_norm": 0.07155419113637652, "learning_rate": 1.3402440607596821e-07, "loss": 0.5356, "step": 7618 }, { "epoch": 3.70273556231003, "grad_norm": 0.06912456092294525, "learning_rate": 1.335844248369661e-07, "loss": 0.5103, "step": 7619 }, { "epoch": 3.7032218844984803, "grad_norm": 0.07165302452954286, "learning_rate": 1.3314515720254552e-07, "loss": 0.5298, "step": 7620 }, { "epoch": 3.70370820668693, "grad_norm": 0.07241053022989817, "learning_rate": 1.3270660323712104e-07, "loss": 0.5231, "step": 7621 }, { "epoch": 3.70419452887538, "grad_norm": 0.07020504665779764, "learning_rate": 1.3226876300500125e-07, "loss": 0.5065, "step": 7622 }, { "epoch": 3.70468085106383, "grad_norm": 0.07436683994800043, "learning_rate": 1.318316365703909e-07, "loss": 0.554, "step": 7623 }, { "epoch": 3.7051671732522795, "grad_norm": 0.07107360024773648, "learning_rate": 1.3139522399738924e-07, "loss": 0.5351, "step": 7624 }, { "epoch": 3.7056534954407296, "grad_norm": 0.06897596353895076, "learning_rate": 1.3095952534999123e-07, "loss": 0.5001, "step": 7625 }, { "epoch": 3.7061398176291793, "grad_norm": 0.07009673119420902, "learning_rate": 1.3052454069208686e-07, "loss": 0.5154, "step": 7626 }, { "epoch": 3.7066261398176295, "grad_norm": 0.06937188232905657, "learning_rate": 1.3009027008746234e-07, "loss": 0.4803, "step": 7627 }, { "epoch": 3.707112462006079, "grad_norm": 0.07099904865956519, "learning_rate": 1.2965671359979838e-07, "loss": 0.4989, "step": 7628 }, { "epoch": 3.707598784194529, "grad_norm": 0.0746563104742717, "learning_rate": 1.2922387129267077e-07, "loss": 0.5355, "step": 7629 }, { "epoch": 3.7080851063829785, "grad_norm": 0.06922289823851557, "learning_rate": 1.28791743229551e-07, "loss": 0.4938, "step": 7630 }, { "epoch": 3.7085714285714286, "grad_norm": 0.07208477780138299, "learning_rate": 1.2836032947380616e-07, "loss": 0.5141, "step": 7631 }, { "epoch": 3.7090577507598783, "grad_norm": 0.06922684255046883, "learning_rate": 1.2792963008869786e-07, "loss": 0.4909, "step": 7632 }, { "epoch": 3.7095440729483284, "grad_norm": 0.07215478226688445, "learning_rate": 1.2749964513738277e-07, "loss": 0.5166, "step": 7633 }, { "epoch": 3.710030395136778, "grad_norm": 0.07173461218672605, "learning_rate": 1.2707037468291438e-07, "loss": 0.5329, "step": 7634 }, { "epoch": 3.710516717325228, "grad_norm": 0.07670294994184858, "learning_rate": 1.2664181878823955e-07, "loss": 0.5679, "step": 7635 }, { "epoch": 3.711003039513678, "grad_norm": 0.07347386059810658, "learning_rate": 1.2621397751620135e-07, "loss": 0.4993, "step": 7636 }, { "epoch": 3.7114893617021276, "grad_norm": 0.07283229973830455, "learning_rate": 1.257868509295379e-07, "loss": 0.5362, "step": 7637 }, { "epoch": 3.7119756838905777, "grad_norm": 0.0699449288709463, "learning_rate": 1.253604390908819e-07, "loss": 0.4791, "step": 7638 }, { "epoch": 3.7124620060790274, "grad_norm": 0.06875317816670751, "learning_rate": 1.249347420627628e-07, "loss": 0.4817, "step": 7639 }, { "epoch": 3.712948328267477, "grad_norm": 0.0708711370024746, "learning_rate": 1.2450975990760395e-07, "loss": 0.4902, "step": 7640 }, { "epoch": 3.713434650455927, "grad_norm": 0.07414615114078611, "learning_rate": 1.240854926877233e-07, "loss": 0.5418, "step": 7641 }, { "epoch": 3.713920972644377, "grad_norm": 0.07233836782519713, "learning_rate": 1.2366194046533608e-07, "loss": 0.5357, "step": 7642 }, { "epoch": 3.7144072948328266, "grad_norm": 0.06927638489188884, "learning_rate": 1.232391033025504e-07, "loss": 0.4693, "step": 7643 }, { "epoch": 3.7148936170212767, "grad_norm": 0.07170036375808124, "learning_rate": 1.228169812613711e-07, "loss": 0.4974, "step": 7644 }, { "epoch": 3.7153799392097264, "grad_norm": 0.0706144044845616, "learning_rate": 1.2239557440369754e-07, "loss": 0.5294, "step": 7645 }, { "epoch": 3.715866261398176, "grad_norm": 0.07164507648402191, "learning_rate": 1.219748827913242e-07, "loss": 0.5319, "step": 7646 }, { "epoch": 3.7163525835866262, "grad_norm": 0.07018422264423925, "learning_rate": 1.215549064859406e-07, "loss": 0.4821, "step": 7647 }, { "epoch": 3.716838905775076, "grad_norm": 0.07231904033091041, "learning_rate": 1.2113564554913137e-07, "loss": 0.5123, "step": 7648 }, { "epoch": 3.717325227963526, "grad_norm": 0.07318609230931034, "learning_rate": 1.2071710004237624e-07, "loss": 0.4861, "step": 7649 }, { "epoch": 3.7178115501519757, "grad_norm": 0.07023324809750443, "learning_rate": 1.2029927002705112e-07, "loss": 0.4837, "step": 7650 }, { "epoch": 3.7182978723404254, "grad_norm": 0.070244922239307, "learning_rate": 1.1988215556442474e-07, "loss": 0.501, "step": 7651 }, { "epoch": 3.7187841945288755, "grad_norm": 0.07211113681391308, "learning_rate": 1.1946575671566373e-07, "loss": 0.5224, "step": 7652 }, { "epoch": 3.7192705167173252, "grad_norm": 0.0711248489328205, "learning_rate": 1.1905007354182651e-07, "loss": 0.5709, "step": 7653 }, { "epoch": 3.7197568389057754, "grad_norm": 0.07233170031170742, "learning_rate": 1.186351061038693e-07, "loss": 0.5283, "step": 7654 }, { "epoch": 3.720243161094225, "grad_norm": 0.07077937210014604, "learning_rate": 1.1822085446264231e-07, "loss": 0.516, "step": 7655 }, { "epoch": 3.7207294832826747, "grad_norm": 0.07010215098590673, "learning_rate": 1.1780731867889084e-07, "loss": 0.4993, "step": 7656 }, { "epoch": 3.7212158054711244, "grad_norm": 0.07403837525571441, "learning_rate": 1.1739449881325471e-07, "loss": 0.5342, "step": 7657 }, { "epoch": 3.7217021276595745, "grad_norm": 0.07193425347874219, "learning_rate": 1.1698239492626995e-07, "loss": 0.5135, "step": 7658 }, { "epoch": 3.722188449848024, "grad_norm": 0.07186607821697964, "learning_rate": 1.1657100707836711e-07, "loss": 0.5167, "step": 7659 }, { "epoch": 3.7226747720364743, "grad_norm": 0.07210328771239458, "learning_rate": 1.1616033532987014e-07, "loss": 0.5126, "step": 7660 }, { "epoch": 3.723161094224924, "grad_norm": 0.07167477408992676, "learning_rate": 1.157503797410009e-07, "loss": 0.514, "step": 7661 }, { "epoch": 3.7236474164133737, "grad_norm": 0.07012051918891697, "learning_rate": 1.1534114037187404e-07, "loss": 0.4989, "step": 7662 }, { "epoch": 3.724133738601824, "grad_norm": 0.07242985684220683, "learning_rate": 1.1493261728249994e-07, "loss": 0.4999, "step": 7663 }, { "epoch": 3.7246200607902735, "grad_norm": 0.07247641802680536, "learning_rate": 1.1452481053278398e-07, "loss": 0.5159, "step": 7664 }, { "epoch": 3.7251063829787237, "grad_norm": 0.07137726377360303, "learning_rate": 1.1411772018252665e-07, "loss": 0.5309, "step": 7665 }, { "epoch": 3.7255927051671733, "grad_norm": 0.06982851917962289, "learning_rate": 1.1371134629142189e-07, "loss": 0.5013, "step": 7666 }, { "epoch": 3.726079027355623, "grad_norm": 0.07031351800497866, "learning_rate": 1.1330568891906202e-07, "loss": 0.4836, "step": 7667 }, { "epoch": 3.7265653495440727, "grad_norm": 0.07103266506018624, "learning_rate": 1.1290074812493001e-07, "loss": 0.5097, "step": 7668 }, { "epoch": 3.727051671732523, "grad_norm": 0.06974900788385342, "learning_rate": 1.1249652396840672e-07, "loss": 0.5025, "step": 7669 }, { "epoch": 3.7275379939209725, "grad_norm": 0.06901027156582124, "learning_rate": 1.1209301650876636e-07, "loss": 0.4599, "step": 7670 }, { "epoch": 3.7280243161094226, "grad_norm": 0.07093595557651035, "learning_rate": 1.1169022580517941e-07, "loss": 0.4878, "step": 7671 }, { "epoch": 3.7285106382978723, "grad_norm": 0.07052910320572137, "learning_rate": 1.1128815191671083e-07, "loss": 0.5008, "step": 7672 }, { "epoch": 3.728996960486322, "grad_norm": 0.0731877231163336, "learning_rate": 1.1088679490231957e-07, "loss": 0.5672, "step": 7673 }, { "epoch": 3.729483282674772, "grad_norm": 0.07339205740980259, "learning_rate": 1.1048615482086023e-07, "loss": 0.5442, "step": 7674 }, { "epoch": 3.729969604863222, "grad_norm": 0.06903168525592246, "learning_rate": 1.1008623173108191e-07, "loss": 0.4853, "step": 7675 }, { "epoch": 3.730455927051672, "grad_norm": 0.07523719574251578, "learning_rate": 1.0968702569162992e-07, "loss": 0.5342, "step": 7676 }, { "epoch": 3.7309422492401216, "grad_norm": 0.06692655233097024, "learning_rate": 1.092885367610419e-07, "loss": 0.4608, "step": 7677 }, { "epoch": 3.7314285714285713, "grad_norm": 0.07212617980149026, "learning_rate": 1.088907649977522e-07, "loss": 0.5513, "step": 7678 }, { "epoch": 3.731914893617021, "grad_norm": 0.07122131620923024, "learning_rate": 1.0849371046008971e-07, "loss": 0.5099, "step": 7679 }, { "epoch": 3.732401215805471, "grad_norm": 0.07405052838004117, "learning_rate": 1.0809737320627733e-07, "loss": 0.566, "step": 7680 }, { "epoch": 3.732887537993921, "grad_norm": 0.07119708603134343, "learning_rate": 1.0770175329443521e-07, "loss": 0.5187, "step": 7681 }, { "epoch": 3.733373860182371, "grad_norm": 0.07195301881303603, "learning_rate": 1.0730685078257418e-07, "loss": 0.5012, "step": 7682 }, { "epoch": 3.7338601823708206, "grad_norm": 0.07274869256330098, "learning_rate": 1.0691266572860348e-07, "loss": 0.5193, "step": 7683 }, { "epoch": 3.7343465045592703, "grad_norm": 0.0698769526640335, "learning_rate": 1.0651919819032574e-07, "loss": 0.5025, "step": 7684 }, { "epoch": 3.7348328267477204, "grad_norm": 0.07292602709683178, "learning_rate": 1.0612644822543871e-07, "loss": 0.5323, "step": 7685 }, { "epoch": 3.73531914893617, "grad_norm": 0.07202062094302504, "learning_rate": 1.0573441589153411e-07, "loss": 0.5249, "step": 7686 }, { "epoch": 3.7358054711246202, "grad_norm": 0.07070207502566973, "learning_rate": 1.0534310124609926e-07, "loss": 0.5278, "step": 7687 }, { "epoch": 3.73629179331307, "grad_norm": 0.07007999656390297, "learning_rate": 1.0495250434651604e-07, "loss": 0.4818, "step": 7688 }, { "epoch": 3.7367781155015196, "grad_norm": 0.06909397718244228, "learning_rate": 1.0456262525006089e-07, "loss": 0.5034, "step": 7689 }, { "epoch": 3.7372644376899697, "grad_norm": 0.07355070954577128, "learning_rate": 1.0417346401390582e-07, "loss": 0.5148, "step": 7690 }, { "epoch": 3.7377507598784194, "grad_norm": 0.06872264458556969, "learning_rate": 1.0378502069511631e-07, "loss": 0.4668, "step": 7691 }, { "epoch": 3.7382370820668696, "grad_norm": 0.0731657868460375, "learning_rate": 1.0339729535065346e-07, "loss": 0.4949, "step": 7692 }, { "epoch": 3.7387234042553192, "grad_norm": 0.07010725468949738, "learning_rate": 1.0301028803737234e-07, "loss": 0.4885, "step": 7693 }, { "epoch": 3.739209726443769, "grad_norm": 0.07383235823572544, "learning_rate": 1.0262399881202367e-07, "loss": 0.5329, "step": 7694 }, { "epoch": 3.7396960486322186, "grad_norm": 0.07215151996141322, "learning_rate": 1.022384277312527e-07, "loss": 0.5094, "step": 7695 }, { "epoch": 3.7401823708206687, "grad_norm": 0.07011532505657807, "learning_rate": 1.0185357485159808e-07, "loss": 0.5111, "step": 7696 }, { "epoch": 3.7406686930091184, "grad_norm": 0.07099063894681273, "learning_rate": 1.0146944022949467e-07, "loss": 0.5198, "step": 7697 }, { "epoch": 3.7411550151975685, "grad_norm": 0.07085483380424308, "learning_rate": 1.0108602392127131e-07, "loss": 0.5121, "step": 7698 }, { "epoch": 3.7416413373860182, "grad_norm": 0.07051419266290489, "learning_rate": 1.0070332598315135e-07, "loss": 0.4983, "step": 7699 }, { "epoch": 3.742127659574468, "grad_norm": 0.07167384562559563, "learning_rate": 1.003213464712538e-07, "loss": 0.5156, "step": 7700 }, { "epoch": 3.742613981762918, "grad_norm": 0.07129763955641986, "learning_rate": 9.994008544159106e-08, "loss": 0.5047, "step": 7701 }, { "epoch": 3.7431003039513677, "grad_norm": 0.07328466995093061, "learning_rate": 9.95595429500712e-08, "loss": 0.508, "step": 7702 }, { "epoch": 3.743586626139818, "grad_norm": 0.0726347452359639, "learning_rate": 9.917971905249568e-08, "loss": 0.5186, "step": 7703 }, { "epoch": 3.7440729483282675, "grad_norm": 0.0682033717409881, "learning_rate": 9.880061380456218e-08, "loss": 0.4771, "step": 7704 }, { "epoch": 3.744559270516717, "grad_norm": 0.07002158602692787, "learning_rate": 9.842222726186179e-08, "loss": 0.4932, "step": 7705 }, { "epoch": 3.745045592705167, "grad_norm": 0.07149011531806744, "learning_rate": 9.804455947988067e-08, "loss": 0.5126, "step": 7706 }, { "epoch": 3.745531914893617, "grad_norm": 0.07067131462924231, "learning_rate": 9.766761051399954e-08, "loss": 0.5029, "step": 7707 }, { "epoch": 3.7460182370820667, "grad_norm": 0.06864055007370284, "learning_rate": 9.729138041949359e-08, "loss": 0.4808, "step": 7708 }, { "epoch": 3.746504559270517, "grad_norm": 0.06946166489287424, "learning_rate": 9.691586925153262e-08, "loss": 0.4954, "step": 7709 }, { "epoch": 3.7469908814589665, "grad_norm": 0.072672556691609, "learning_rate": 9.654107706518145e-08, "loss": 0.536, "step": 7710 }, { "epoch": 3.7469908814589665, "eval_loss": 0.5693764686584473, "eval_runtime": 105.164, "eval_samples_per_second": 288.625, "eval_steps_per_second": 36.087, "step": 7710 }, { "epoch": 3.747477203647416, "grad_norm": 0.07401255269072891, "learning_rate": 9.616700391539947e-08, "loss": 0.5188, "step": 7711 }, { "epoch": 3.7479635258358663, "grad_norm": 0.0689452786677942, "learning_rate": 9.579364985703887e-08, "loss": 0.4875, "step": 7712 }, { "epoch": 3.748449848024316, "grad_norm": 0.0714686620646307, "learning_rate": 9.542101494484867e-08, "loss": 0.5053, "step": 7713 }, { "epoch": 3.748936170212766, "grad_norm": 0.07076403033714619, "learning_rate": 9.504909923347127e-08, "loss": 0.4964, "step": 7714 }, { "epoch": 3.749422492401216, "grad_norm": 0.07099573600275635, "learning_rate": 9.46779027774447e-08, "loss": 0.5118, "step": 7715 }, { "epoch": 3.7499088145896655, "grad_norm": 0.07492769120656283, "learning_rate": 9.430742563119932e-08, "loss": 0.5665, "step": 7716 }, { "epoch": 3.7503951367781156, "grad_norm": 0.07147937855029338, "learning_rate": 9.393766784906277e-08, "loss": 0.5052, "step": 7717 }, { "epoch": 3.7508814589665653, "grad_norm": 0.07053002047155855, "learning_rate": 9.356862948525447e-08, "loss": 0.5445, "step": 7718 }, { "epoch": 3.7513677811550155, "grad_norm": 0.0704034358557316, "learning_rate": 9.320031059389112e-08, "loss": 0.4982, "step": 7719 }, { "epoch": 3.751854103343465, "grad_norm": 0.07190381013695785, "learning_rate": 9.283271122898174e-08, "loss": 0.5225, "step": 7720 }, { "epoch": 3.752340425531915, "grad_norm": 0.07106024899789946, "learning_rate": 9.246583144443044e-08, "loss": 0.4826, "step": 7721 }, { "epoch": 3.7528267477203645, "grad_norm": 0.0701169228898378, "learning_rate": 9.209967129403585e-08, "loss": 0.5454, "step": 7722 }, { "epoch": 3.7533130699088146, "grad_norm": 0.07100833659957302, "learning_rate": 9.173423083149224e-08, "loss": 0.517, "step": 7723 }, { "epoch": 3.7537993920972643, "grad_norm": 0.07037772196538646, "learning_rate": 9.13695101103862e-08, "loss": 0.5088, "step": 7724 }, { "epoch": 3.7542857142857144, "grad_norm": 0.07140332964667107, "learning_rate": 9.100550918420048e-08, "loss": 0.5548, "step": 7725 }, { "epoch": 3.754772036474164, "grad_norm": 0.07118150260149636, "learning_rate": 9.064222810631185e-08, "loss": 0.5156, "step": 7726 }, { "epoch": 3.755258358662614, "grad_norm": 0.07054829159526961, "learning_rate": 9.027966692999046e-08, "loss": 0.5048, "step": 7727 }, { "epoch": 3.755744680851064, "grad_norm": 0.06991846164704252, "learning_rate": 8.991782570840269e-08, "loss": 0.4692, "step": 7728 }, { "epoch": 3.7562310030395136, "grad_norm": 0.07146371575995158, "learning_rate": 8.955670449460773e-08, "loss": 0.5028, "step": 7729 }, { "epoch": 3.7567173252279638, "grad_norm": 0.07024640452959402, "learning_rate": 8.919630334156049e-08, "loss": 0.5094, "step": 7730 }, { "epoch": 3.7572036474164134, "grad_norm": 0.07363743364009692, "learning_rate": 8.883662230210977e-08, "loss": 0.5137, "step": 7731 }, { "epoch": 3.757689969604863, "grad_norm": 0.06997767666772173, "learning_rate": 8.847766142899839e-08, "loss": 0.492, "step": 7732 }, { "epoch": 3.758176291793313, "grad_norm": 0.06853033038881279, "learning_rate": 8.811942077486369e-08, "loss": 0.4523, "step": 7733 }, { "epoch": 3.758662613981763, "grad_norm": 0.06960930169371204, "learning_rate": 8.776190039223753e-08, "loss": 0.486, "step": 7734 }, { "epoch": 3.7591489361702126, "grad_norm": 0.07245123958728712, "learning_rate": 8.740510033354688e-08, "loss": 0.5097, "step": 7735 }, { "epoch": 3.7596352583586627, "grad_norm": 0.06983026851201811, "learning_rate": 8.704902065111209e-08, "loss": 0.4933, "step": 7736 }, { "epoch": 3.7601215805471124, "grad_norm": 0.070011578391243, "learning_rate": 8.669366139714808e-08, "loss": 0.5029, "step": 7737 }, { "epoch": 3.760607902735562, "grad_norm": 0.07021901100753344, "learning_rate": 8.633902262376425e-08, "loss": 0.5155, "step": 7738 }, { "epoch": 3.7610942249240122, "grad_norm": 0.06963807861193787, "learning_rate": 8.598510438296459e-08, "loss": 0.5151, "step": 7739 }, { "epoch": 3.761580547112462, "grad_norm": 0.07407682065062528, "learning_rate": 8.563190672664701e-08, "loss": 0.5501, "step": 7740 }, { "epoch": 3.762066869300912, "grad_norm": 0.06807800433434286, "learning_rate": 8.527942970660396e-08, "loss": 0.4824, "step": 7741 }, { "epoch": 3.7625531914893617, "grad_norm": 0.07016065469569435, "learning_rate": 8.492767337452246e-08, "loss": 0.5088, "step": 7742 }, { "epoch": 3.7630395136778114, "grad_norm": 0.07151401098844945, "learning_rate": 8.457663778198288e-08, "loss": 0.5037, "step": 7743 }, { "epoch": 3.7635258358662615, "grad_norm": 0.07077814626567643, "learning_rate": 8.422632298046129e-08, "loss": 0.4832, "step": 7744 }, { "epoch": 3.7640121580547112, "grad_norm": 0.07310981759699837, "learning_rate": 8.387672902132715e-08, "loss": 0.5116, "step": 7745 }, { "epoch": 3.7644984802431614, "grad_norm": 0.07047883999217851, "learning_rate": 8.35278559558439e-08, "loss": 0.5164, "step": 7746 }, { "epoch": 3.764984802431611, "grad_norm": 0.07221396770439817, "learning_rate": 8.317970383517115e-08, "loss": 0.5245, "step": 7747 }, { "epoch": 3.7654711246200607, "grad_norm": 0.06839329811368865, "learning_rate": 8.283227271035976e-08, "loss": 0.4694, "step": 7748 }, { "epoch": 3.7659574468085104, "grad_norm": 0.07111116043817207, "learning_rate": 8.24855626323584e-08, "loss": 0.4883, "step": 7749 }, { "epoch": 3.7664437689969605, "grad_norm": 0.0728438452541302, "learning_rate": 8.213957365200642e-08, "loss": 0.5255, "step": 7750 }, { "epoch": 3.7669300911854102, "grad_norm": 0.07492742473737278, "learning_rate": 8.179430582004045e-08, "loss": 0.5658, "step": 7751 }, { "epoch": 3.7674164133738604, "grad_norm": 0.07001314783322304, "learning_rate": 8.144975918708941e-08, "loss": 0.4829, "step": 7752 }, { "epoch": 3.76790273556231, "grad_norm": 0.07112155282149944, "learning_rate": 8.110593380367737e-08, "loss": 0.5105, "step": 7753 }, { "epoch": 3.7683890577507597, "grad_norm": 0.07199049394533069, "learning_rate": 8.076282972022232e-08, "loss": 0.5365, "step": 7754 }, { "epoch": 3.76887537993921, "grad_norm": 0.07152711078433943, "learning_rate": 8.042044698703676e-08, "loss": 0.5094, "step": 7755 }, { "epoch": 3.7693617021276595, "grad_norm": 0.07118183193702346, "learning_rate": 8.007878565432669e-08, "loss": 0.4938, "step": 7756 }, { "epoch": 3.7698480243161097, "grad_norm": 0.07164547343704651, "learning_rate": 7.973784577219368e-08, "loss": 0.5395, "step": 7757 }, { "epoch": 3.7703343465045593, "grad_norm": 0.06961476049470836, "learning_rate": 7.939762739063217e-08, "loss": 0.4953, "step": 7758 }, { "epoch": 3.770820668693009, "grad_norm": 0.07411063978964855, "learning_rate": 7.905813055953227e-08, "loss": 0.5812, "step": 7759 }, { "epoch": 3.7713069908814587, "grad_norm": 0.07064286742619653, "learning_rate": 7.87193553286758e-08, "loss": 0.4927, "step": 7760 }, { "epoch": 3.771793313069909, "grad_norm": 0.07137804994590126, "learning_rate": 7.838130174774083e-08, "loss": 0.5152, "step": 7761 }, { "epoch": 3.7722796352583585, "grad_norm": 0.07220577117782458, "learning_rate": 7.804396986629936e-08, "loss": 0.5293, "step": 7762 }, { "epoch": 3.7727659574468086, "grad_norm": 0.06974146470799511, "learning_rate": 7.770735973381737e-08, "loss": 0.4895, "step": 7763 }, { "epoch": 3.7732522796352583, "grad_norm": 0.07161473468232411, "learning_rate": 7.737147139965484e-08, "loss": 0.5167, "step": 7764 }, { "epoch": 3.773738601823708, "grad_norm": 0.0711206025735908, "learning_rate": 7.703630491306568e-08, "loss": 0.5104, "step": 7765 }, { "epoch": 3.774224924012158, "grad_norm": 0.0698696650192937, "learning_rate": 7.670186032319837e-08, "loss": 0.4773, "step": 7766 }, { "epoch": 3.774711246200608, "grad_norm": 0.07112267845987022, "learning_rate": 7.636813767909534e-08, "loss": 0.5049, "step": 7767 }, { "epoch": 3.775197568389058, "grad_norm": 0.0713664405146949, "learning_rate": 7.603513702969412e-08, "loss": 0.5028, "step": 7768 }, { "epoch": 3.7756838905775076, "grad_norm": 0.07101275038554768, "learning_rate": 7.570285842382396e-08, "loss": 0.5044, "step": 7769 }, { "epoch": 3.7761702127659573, "grad_norm": 0.070824573826293, "learning_rate": 7.537130191021091e-08, "loss": 0.5069, "step": 7770 }, { "epoch": 3.7766565349544075, "grad_norm": 0.07073915403343078, "learning_rate": 7.50404675374733e-08, "loss": 0.5118, "step": 7771 }, { "epoch": 3.777142857142857, "grad_norm": 0.07173185365424199, "learning_rate": 7.471035535412508e-08, "loss": 0.5198, "step": 7772 }, { "epoch": 3.7776291793313073, "grad_norm": 0.07143915504987877, "learning_rate": 7.438096540857254e-08, "loss": 0.5048, "step": 7773 }, { "epoch": 3.778115501519757, "grad_norm": 0.07245258291915908, "learning_rate": 7.405229774911759e-08, "loss": 0.5317, "step": 7774 }, { "epoch": 3.7786018237082066, "grad_norm": 0.06975720862357525, "learning_rate": 7.372435242395504e-08, "loss": 0.5346, "step": 7775 }, { "epoch": 3.7790881458966563, "grad_norm": 0.07093710869503764, "learning_rate": 7.339712948117416e-08, "loss": 0.5128, "step": 7776 }, { "epoch": 3.7795744680851064, "grad_norm": 0.07019112109244323, "learning_rate": 7.307062896875938e-08, "loss": 0.5034, "step": 7777 }, { "epoch": 3.780060790273556, "grad_norm": 0.07214557791330724, "learning_rate": 7.274485093458794e-08, "loss": 0.531, "step": 7778 }, { "epoch": 3.7805471124620063, "grad_norm": 0.06988299892027762, "learning_rate": 7.241979542643162e-08, "loss": 0.4923, "step": 7779 }, { "epoch": 3.781033434650456, "grad_norm": 0.0717535516835555, "learning_rate": 7.209546249195509e-08, "loss": 0.4936, "step": 7780 }, { "epoch": 3.7815197568389056, "grad_norm": 0.07266876744376508, "learning_rate": 7.177185217871974e-08, "loss": 0.5355, "step": 7781 }, { "epoch": 3.7820060790273557, "grad_norm": 0.07301470070331494, "learning_rate": 7.144896453417816e-08, "loss": 0.5324, "step": 7782 }, { "epoch": 3.7824924012158054, "grad_norm": 0.06971067933149161, "learning_rate": 7.112679960567858e-08, "loss": 0.4911, "step": 7783 }, { "epoch": 3.7829787234042556, "grad_norm": 0.07032926182160719, "learning_rate": 7.080535744046268e-08, "loss": 0.4981, "step": 7784 }, { "epoch": 3.7834650455927052, "grad_norm": 0.07054086290073121, "learning_rate": 7.048463808566663e-08, "loss": 0.5135, "step": 7785 }, { "epoch": 3.783951367781155, "grad_norm": 0.06983713804253608, "learning_rate": 7.016464158832004e-08, "loss": 0.5183, "step": 7786 }, { "epoch": 3.7844376899696046, "grad_norm": 0.07259163726003327, "learning_rate": 6.984536799534702e-08, "loss": 0.5518, "step": 7787 }, { "epoch": 3.7849240121580547, "grad_norm": 0.07117251160452116, "learning_rate": 6.952681735356514e-08, "loss": 0.4973, "step": 7788 }, { "epoch": 3.7854103343465044, "grad_norm": 0.06994672228891526, "learning_rate": 6.920898970968593e-08, "loss": 0.5024, "step": 7789 }, { "epoch": 3.7858966565349546, "grad_norm": 0.07312686210672546, "learning_rate": 6.889188511031541e-08, "loss": 0.5304, "step": 7790 }, { "epoch": 3.7863829787234042, "grad_norm": 0.0693726335978675, "learning_rate": 6.857550360195364e-08, "loss": 0.4725, "step": 7791 }, { "epoch": 3.786869300911854, "grad_norm": 0.07297801900897251, "learning_rate": 6.82598452309946e-08, "loss": 0.5639, "step": 7792 }, { "epoch": 3.787355623100304, "grad_norm": 0.07171369918597217, "learning_rate": 6.794491004372516e-08, "loss": 0.5196, "step": 7793 }, { "epoch": 3.7878419452887537, "grad_norm": 0.07196317081973158, "learning_rate": 6.763069808632783e-08, "loss": 0.5369, "step": 7794 }, { "epoch": 3.788328267477204, "grad_norm": 0.07212672396374854, "learning_rate": 6.73172094048774e-08, "loss": 0.5524, "step": 7795 }, { "epoch": 3.7888145896656535, "grad_norm": 0.07046776091701304, "learning_rate": 6.700444404534434e-08, "loss": 0.5202, "step": 7796 }, { "epoch": 3.7893009118541032, "grad_norm": 0.07163163695006357, "learning_rate": 6.669240205359139e-08, "loss": 0.5084, "step": 7797 }, { "epoch": 3.7897872340425534, "grad_norm": 0.07037420173174773, "learning_rate": 6.638108347537587e-08, "loss": 0.5361, "step": 7798 }, { "epoch": 3.790273556231003, "grad_norm": 0.074410572555401, "learning_rate": 6.60704883563501e-08, "loss": 0.5407, "step": 7799 }, { "epoch": 3.7907598784194527, "grad_norm": 0.07194043298296843, "learning_rate": 6.576061674205825e-08, "loss": 0.5248, "step": 7800 }, { "epoch": 3.791246200607903, "grad_norm": 0.07103625618496813, "learning_rate": 6.54514686779406e-08, "loss": 0.54, "step": 7801 }, { "epoch": 3.7917325227963525, "grad_norm": 0.06970610957861578, "learning_rate": 6.514304420932927e-08, "loss": 0.534, "step": 7802 }, { "epoch": 3.792218844984802, "grad_norm": 0.0724173150267212, "learning_rate": 6.483534338145192e-08, "loss": 0.5317, "step": 7803 }, { "epoch": 3.7927051671732523, "grad_norm": 0.06920362971271889, "learning_rate": 6.452836623942859e-08, "loss": 0.4739, "step": 7804 }, { "epoch": 3.793191489361702, "grad_norm": 0.06861661899647022, "learning_rate": 6.422211282827384e-08, "loss": 0.4944, "step": 7805 }, { "epoch": 3.793677811550152, "grad_norm": 0.07376530336636636, "learning_rate": 6.391658319289729e-08, "loss": 0.5397, "step": 7806 }, { "epoch": 3.794164133738602, "grad_norm": 0.07159772618726348, "learning_rate": 6.361177737810087e-08, "loss": 0.469, "step": 7807 }, { "epoch": 3.7946504559270515, "grad_norm": 0.07187959489780896, "learning_rate": 6.330769542858106e-08, "loss": 0.4883, "step": 7808 }, { "epoch": 3.7951367781155017, "grad_norm": 0.0698058257982298, "learning_rate": 6.30043373889272e-08, "loss": 0.4727, "step": 7809 }, { "epoch": 3.7956231003039513, "grad_norm": 0.07125906445090661, "learning_rate": 6.270170330362479e-08, "loss": 0.5222, "step": 7810 }, { "epoch": 3.7961094224924015, "grad_norm": 0.072199820897027, "learning_rate": 6.239979321705003e-08, "loss": 0.5111, "step": 7811 }, { "epoch": 3.796595744680851, "grad_norm": 0.07012334199895957, "learning_rate": 6.209860717347638e-08, "loss": 0.4944, "step": 7812 }, { "epoch": 3.797082066869301, "grad_norm": 0.06898592212385408, "learning_rate": 6.179814521706739e-08, "loss": 0.4807, "step": 7813 }, { "epoch": 3.7975683890577505, "grad_norm": 0.0726591986935889, "learning_rate": 6.149840739188396e-08, "loss": 0.532, "step": 7814 }, { "epoch": 3.7980547112462006, "grad_norm": 0.07188334665145897, "learning_rate": 6.119939374187866e-08, "loss": 0.5231, "step": 7815 }, { "epoch": 3.7985410334346503, "grad_norm": 0.0717317819459501, "learning_rate": 6.090110431089813e-08, "loss": 0.4989, "step": 7816 }, { "epoch": 3.7990273556231005, "grad_norm": 0.07219236459886245, "learning_rate": 6.060353914268402e-08, "loss": 0.499, "step": 7817 }, { "epoch": 3.79951367781155, "grad_norm": 0.06886751728506314, "learning_rate": 6.030669828087033e-08, "loss": 0.4927, "step": 7818 }, { "epoch": 3.8, "grad_norm": 0.07159356727369592, "learning_rate": 6.0010581768985e-08, "loss": 0.5246, "step": 7819 }, { "epoch": 3.80048632218845, "grad_norm": 0.07076491383466604, "learning_rate": 5.971518965045054e-08, "loss": 0.4927, "step": 7820 }, { "epoch": 3.8009726443768996, "grad_norm": 0.07333703014917749, "learning_rate": 5.942052196858339e-08, "loss": 0.5141, "step": 7821 }, { "epoch": 3.8014589665653498, "grad_norm": 0.07402200485244595, "learning_rate": 5.9126578766592334e-08, "loss": 0.5749, "step": 7822 }, { "epoch": 3.8019452887537994, "grad_norm": 0.0717420115764388, "learning_rate": 5.8833360087581225e-08, "loss": 0.4986, "step": 7823 }, { "epoch": 3.802431610942249, "grad_norm": 0.06934498795118592, "learning_rate": 5.854086597454678e-08, "loss": 0.5089, "step": 7824 }, { "epoch": 3.802917933130699, "grad_norm": 0.0699841659199271, "learning_rate": 5.8249096470380793e-08, "loss": 0.5132, "step": 7825 }, { "epoch": 3.803404255319149, "grad_norm": 0.07320797365932691, "learning_rate": 5.7958051617867384e-08, "loss": 0.5461, "step": 7826 }, { "epoch": 3.8038905775075986, "grad_norm": 0.07109882012615379, "learning_rate": 5.7667731459685185e-08, "loss": 0.4809, "step": 7827 }, { "epoch": 3.8043768996960488, "grad_norm": 0.06913960629259028, "learning_rate": 5.737813603840625e-08, "loss": 0.5108, "step": 7828 }, { "epoch": 3.8048632218844984, "grad_norm": 0.07123382032328665, "learning_rate": 5.7089265396496617e-08, "loss": 0.5012, "step": 7829 }, { "epoch": 3.805349544072948, "grad_norm": 0.07139689541080747, "learning_rate": 5.680111957631518e-08, "loss": 0.5106, "step": 7830 }, { "epoch": 3.8058358662613982, "grad_norm": 0.07008085710962113, "learning_rate": 5.651369862011646e-08, "loss": 0.5001, "step": 7831 }, { "epoch": 3.806322188449848, "grad_norm": 0.07009846601560987, "learning_rate": 5.622700257004676e-08, "loss": 0.4833, "step": 7832 }, { "epoch": 3.806808510638298, "grad_norm": 0.0730541562075264, "learning_rate": 5.594103146814633e-08, "loss": 0.505, "step": 7833 }, { "epoch": 3.8072948328267477, "grad_norm": 0.06997168458391015, "learning_rate": 5.565578535635052e-08, "loss": 0.4949, "step": 7834 }, { "epoch": 3.8077811550151974, "grad_norm": 0.07041589109332315, "learning_rate": 5.537126427648698e-08, "loss": 0.4849, "step": 7835 }, { "epoch": 3.8082674772036476, "grad_norm": 0.06878899087449222, "learning_rate": 5.508746827027789e-08, "loss": 0.4664, "step": 7836 }, { "epoch": 3.8087537993920972, "grad_norm": 0.07209964303415166, "learning_rate": 5.480439737933774e-08, "loss": 0.5324, "step": 7837 }, { "epoch": 3.8092401215805474, "grad_norm": 0.072873171654013, "learning_rate": 5.4522051645176654e-08, "loss": 0.5115, "step": 7838 }, { "epoch": 3.809726443768997, "grad_norm": 0.06991811675142123, "learning_rate": 5.4240431109197075e-08, "loss": 0.4868, "step": 7839 }, { "epoch": 3.8102127659574467, "grad_norm": 0.07195009979048064, "learning_rate": 5.395953581269542e-08, "loss": 0.5366, "step": 7840 }, { "epoch": 3.8106990881458964, "grad_norm": 0.06984969421663202, "learning_rate": 5.367936579686206e-08, "loss": 0.4904, "step": 7841 }, { "epoch": 3.8111854103343465, "grad_norm": 0.07108010520164718, "learning_rate": 5.339992110278025e-08, "loss": 0.5182, "step": 7842 }, { "epoch": 3.8116717325227962, "grad_norm": 0.07001040453672337, "learning_rate": 5.3121201771427214e-08, "loss": 0.5125, "step": 7843 }, { "epoch": 3.8121580547112464, "grad_norm": 0.07098170843084955, "learning_rate": 5.284320784367525e-08, "loss": 0.5077, "step": 7844 }, { "epoch": 3.812644376899696, "grad_norm": 0.06947872438084872, "learning_rate": 5.2565939360287866e-08, "loss": 0.4798, "step": 7845 }, { "epoch": 3.8131306990881457, "grad_norm": 0.07032561420588579, "learning_rate": 5.2289396361923096e-08, "loss": 0.4718, "step": 7846 }, { "epoch": 3.813617021276596, "grad_norm": 0.07046750898414043, "learning_rate": 5.2013578889134054e-08, "loss": 0.5091, "step": 7847 }, { "epoch": 3.8141033434650455, "grad_norm": 0.07076459196141849, "learning_rate": 5.1738486982365055e-08, "loss": 0.5221, "step": 7848 }, { "epoch": 3.8145896656534957, "grad_norm": 0.07292765833751894, "learning_rate": 5.1464120681956055e-08, "loss": 0.5083, "step": 7849 }, { "epoch": 3.8150759878419453, "grad_norm": 0.0703473460462279, "learning_rate": 5.119048002813931e-08, "loss": 0.5159, "step": 7850 }, { "epoch": 3.815562310030395, "grad_norm": 0.07235388709190145, "learning_rate": 5.091756506104162e-08, "loss": 0.532, "step": 7851 }, { "epoch": 3.8160486322188447, "grad_norm": 0.07090107767816602, "learning_rate": 5.0645375820682075e-08, "loss": 0.5472, "step": 7852 }, { "epoch": 3.816534954407295, "grad_norm": 0.07253015436573712, "learning_rate": 5.0373912346974305e-08, "loss": 0.5107, "step": 7853 }, { "epoch": 3.8170212765957445, "grad_norm": 0.07138190665650147, "learning_rate": 5.010317467972592e-08, "loss": 0.5198, "step": 7854 }, { "epoch": 3.8175075987841947, "grad_norm": 0.07245824848976665, "learning_rate": 4.983316285863682e-08, "loss": 0.5122, "step": 7855 }, { "epoch": 3.8179939209726443, "grad_norm": 0.07105082772833467, "learning_rate": 4.9563876923302004e-08, "loss": 0.4913, "step": 7856 }, { "epoch": 3.818480243161094, "grad_norm": 0.07009525509163841, "learning_rate": 4.929531691320821e-08, "loss": 0.4898, "step": 7857 }, { "epoch": 3.818966565349544, "grad_norm": 0.07293765875685293, "learning_rate": 4.9027482867737286e-08, "loss": 0.5189, "step": 7858 }, { "epoch": 3.819452887537994, "grad_norm": 0.07079885810829864, "learning_rate": 4.876037482616447e-08, "loss": 0.5313, "step": 7859 }, { "epoch": 3.819939209726444, "grad_norm": 0.06927717991427378, "learning_rate": 4.849399282765732e-08, "loss": 0.4853, "step": 7860 }, { "epoch": 3.8204255319148936, "grad_norm": 0.06938708230031493, "learning_rate": 4.822833691127793e-08, "loss": 0.5033, "step": 7861 }, { "epoch": 3.8209118541033433, "grad_norm": 0.07075138864596536, "learning_rate": 4.79634071159818e-08, "loss": 0.5036, "step": 7862 }, { "epoch": 3.8213981762917935, "grad_norm": 0.06879710615572684, "learning_rate": 4.769920348061785e-08, "loss": 0.4961, "step": 7863 }, { "epoch": 3.821884498480243, "grad_norm": 0.07129116758488921, "learning_rate": 4.743572604392899e-08, "loss": 0.5013, "step": 7864 }, { "epoch": 3.8223708206686933, "grad_norm": 0.07063452560304825, "learning_rate": 4.717297484455041e-08, "loss": 0.5062, "step": 7865 }, { "epoch": 3.822857142857143, "grad_norm": 0.06790452136271805, "learning_rate": 4.691094992101242e-08, "loss": 0.4893, "step": 7866 }, { "epoch": 3.8233434650455926, "grad_norm": 0.06966108365059806, "learning_rate": 4.66496513117376e-08, "loss": 0.4884, "step": 7867 }, { "epoch": 3.8238297872340423, "grad_norm": 0.0700776471147457, "learning_rate": 4.6389079055041976e-08, "loss": 0.5226, "step": 7868 }, { "epoch": 3.8243161094224924, "grad_norm": 0.06897968533983888, "learning_rate": 4.612923318913609e-08, "loss": 0.4881, "step": 7869 }, { "epoch": 3.824802431610942, "grad_norm": 0.06955439364137804, "learning_rate": 4.5870113752123355e-08, "loss": 0.49, "step": 7870 }, { "epoch": 3.8252887537993923, "grad_norm": 0.07223171414025488, "learning_rate": 4.561172078200005e-08, "loss": 0.5569, "step": 7871 }, { "epoch": 3.825775075987842, "grad_norm": 0.07027894520467323, "learning_rate": 4.535405431665751e-08, "loss": 0.52, "step": 7872 }, { "epoch": 3.8262613981762916, "grad_norm": 0.07234696220495436, "learning_rate": 4.5097114393879426e-08, "loss": 0.5183, "step": 7873 }, { "epoch": 3.8267477203647418, "grad_norm": 0.06741978813203077, "learning_rate": 4.484090105134231e-08, "loss": 0.4679, "step": 7874 }, { "epoch": 3.8272340425531914, "grad_norm": 0.07253755459446587, "learning_rate": 4.458541432661778e-08, "loss": 0.5334, "step": 7875 }, { "epoch": 3.8277203647416416, "grad_norm": 0.0692470292194821, "learning_rate": 4.433065425716976e-08, "loss": 0.4898, "step": 7876 }, { "epoch": 3.8282066869300913, "grad_norm": 0.07073813658569714, "learning_rate": 4.407662088035613e-08, "loss": 0.5308, "step": 7877 }, { "epoch": 3.828693009118541, "grad_norm": 0.06900075105207301, "learning_rate": 4.382331423342767e-08, "loss": 0.4872, "step": 7878 }, { "epoch": 3.8291793313069906, "grad_norm": 0.06973538266640854, "learning_rate": 4.3570734353528545e-08, "loss": 0.4986, "step": 7879 }, { "epoch": 3.8296656534954407, "grad_norm": 0.07416474799652553, "learning_rate": 4.331888127769801e-08, "loss": 0.5374, "step": 7880 }, { "epoch": 3.8301519756838904, "grad_norm": 0.07118278628780612, "learning_rate": 4.3067755042866534e-08, "loss": 0.5398, "step": 7881 }, { "epoch": 3.8306382978723406, "grad_norm": 0.07059650921553912, "learning_rate": 4.28173556858591e-08, "loss": 0.4968, "step": 7882 }, { "epoch": 3.8311246200607902, "grad_norm": 0.071473825792252, "learning_rate": 4.256768324339356e-08, "loss": 0.4863, "step": 7883 }, { "epoch": 3.83161094224924, "grad_norm": 0.07224562663968982, "learning_rate": 4.231873775208228e-08, "loss": 0.503, "step": 7884 }, { "epoch": 3.83209726443769, "grad_norm": 0.0731941168944801, "learning_rate": 4.207051924842942e-08, "loss": 0.5283, "step": 7885 }, { "epoch": 3.8325835866261397, "grad_norm": 0.0725168508435478, "learning_rate": 4.182302776883418e-08, "loss": 0.5349, "step": 7886 }, { "epoch": 3.83306990881459, "grad_norm": 0.0721460699220282, "learning_rate": 4.157626334958809e-08, "loss": 0.5211, "step": 7887 }, { "epoch": 3.8335562310030395, "grad_norm": 0.07329234530379539, "learning_rate": 4.133022602687664e-08, "loss": 0.5072, "step": 7888 }, { "epoch": 3.8340425531914892, "grad_norm": 0.07433649136403024, "learning_rate": 4.108491583677765e-08, "loss": 0.5368, "step": 7889 }, { "epoch": 3.8345288753799394, "grad_norm": 0.06986081044042201, "learning_rate": 4.084033281526345e-08, "loss": 0.5185, "step": 7890 }, { "epoch": 3.835015197568389, "grad_norm": 0.07157927764885447, "learning_rate": 4.0596476998199795e-08, "loss": 0.523, "step": 7891 }, { "epoch": 3.835501519756839, "grad_norm": 0.07272545510780835, "learning_rate": 4.035334842134475e-08, "loss": 0.5138, "step": 7892 }, { "epoch": 3.835987841945289, "grad_norm": 0.07047869766337185, "learning_rate": 4.011094712035091e-08, "loss": 0.4675, "step": 7893 }, { "epoch": 3.8364741641337385, "grad_norm": 0.06939849308348055, "learning_rate": 3.986927313076372e-08, "loss": 0.4822, "step": 7894 }, { "epoch": 3.8369604863221882, "grad_norm": 0.07034797788341138, "learning_rate": 3.962832648802151e-08, "loss": 0.516, "step": 7895 }, { "epoch": 3.8374468085106384, "grad_norm": 0.07225809705980982, "learning_rate": 3.9388107227456007e-08, "loss": 0.494, "step": 7896 }, { "epoch": 3.837933130699088, "grad_norm": 0.07161172741771306, "learning_rate": 3.914861538429349e-08, "loss": 0.5001, "step": 7897 }, { "epoch": 3.838419452887538, "grad_norm": 0.06865559628080964, "learning_rate": 3.890985099365196e-08, "loss": 0.4913, "step": 7898 }, { "epoch": 3.838905775075988, "grad_norm": 0.07014735261654034, "learning_rate": 3.867181409054399e-08, "loss": 0.5233, "step": 7899 }, { "epoch": 3.8393920972644375, "grad_norm": 0.07052245799038553, "learning_rate": 3.8434504709874974e-08, "loss": 0.5304, "step": 7900 }, { "epoch": 3.8398784194528877, "grad_norm": 0.072591351918813, "learning_rate": 3.81979228864432e-08, "loss": 0.5101, "step": 7901 }, { "epoch": 3.8403647416413373, "grad_norm": 0.07023520616863106, "learning_rate": 3.7962068654941454e-08, "loss": 0.4865, "step": 7902 }, { "epoch": 3.8408510638297875, "grad_norm": 0.07089444509138243, "learning_rate": 3.772694204995431e-08, "loss": 0.5143, "step": 7903 }, { "epoch": 3.841337386018237, "grad_norm": 0.0706212951663669, "learning_rate": 3.74925431059614e-08, "loss": 0.5216, "step": 7904 }, { "epoch": 3.841823708206687, "grad_norm": 0.07240029730870431, "learning_rate": 3.725887185733357e-08, "loss": 0.5256, "step": 7905 }, { "epoch": 3.8423100303951365, "grad_norm": 0.06826607263517383, "learning_rate": 3.702592833833618e-08, "loss": 0.4897, "step": 7906 }, { "epoch": 3.8427963525835866, "grad_norm": 0.06889763208378237, "learning_rate": 3.679371258312858e-08, "loss": 0.5102, "step": 7907 }, { "epoch": 3.8432826747720363, "grad_norm": 0.07105502092173523, "learning_rate": 3.656222462576187e-08, "loss": 0.5044, "step": 7908 }, { "epoch": 3.8437689969604865, "grad_norm": 0.07254610173429767, "learning_rate": 3.6331464500181656e-08, "loss": 0.5155, "step": 7909 }, { "epoch": 3.844255319148936, "grad_norm": 0.07228866457659237, "learning_rate": 3.610143224022589e-08, "loss": 0.4897, "step": 7910 }, { "epoch": 3.844741641337386, "grad_norm": 0.07329058159601545, "learning_rate": 3.5872127879625904e-08, "loss": 0.5269, "step": 7911 }, { "epoch": 3.845227963525836, "grad_norm": 0.0728705956119226, "learning_rate": 3.5643551452007595e-08, "loss": 0.5007, "step": 7912 }, { "epoch": 3.8457142857142856, "grad_norm": 0.07100347098671603, "learning_rate": 3.5415702990888035e-08, "loss": 0.52, "step": 7913 }, { "epoch": 3.8462006079027358, "grad_norm": 0.07433461743446373, "learning_rate": 3.518858252967883e-08, "loss": 0.4843, "step": 7914 }, { "epoch": 3.8466869300911855, "grad_norm": 0.0697692931923992, "learning_rate": 3.496219010168556e-08, "loss": 0.4854, "step": 7915 }, { "epoch": 3.847173252279635, "grad_norm": 0.07156558640182122, "learning_rate": 3.473652574010444e-08, "loss": 0.5126, "step": 7916 }, { "epoch": 3.8476595744680853, "grad_norm": 0.06953625004607342, "learning_rate": 3.451158947802846e-08, "loss": 0.4859, "step": 7917 }, { "epoch": 3.848145896656535, "grad_norm": 0.07036746057962606, "learning_rate": 3.428738134844012e-08, "loss": 0.5032, "step": 7918 }, { "epoch": 3.848632218844985, "grad_norm": 0.07130335421097073, "learning_rate": 3.406390138421867e-08, "loss": 0.5113, "step": 7919 }, { "epoch": 3.8491185410334348, "grad_norm": 0.07251656070863585, "learning_rate": 3.384114961813345e-08, "loss": 0.4894, "step": 7920 }, { "epoch": 3.8496048632218844, "grad_norm": 0.07201203072271546, "learning_rate": 3.361912608284945e-08, "loss": 0.5111, "step": 7921 }, { "epoch": 3.850091185410334, "grad_norm": 0.07087311142096846, "learning_rate": 3.339783081092396e-08, "loss": 0.5162, "step": 7922 }, { "epoch": 3.8505775075987843, "grad_norm": 0.06882443704770187, "learning_rate": 3.317726383480657e-08, "loss": 0.4748, "step": 7923 }, { "epoch": 3.851063829787234, "grad_norm": 0.06999204770726007, "learning_rate": 3.295742518684198e-08, "loss": 0.5043, "step": 7924 }, { "epoch": 3.851550151975684, "grad_norm": 0.07062946953927692, "learning_rate": 3.273831489926604e-08, "loss": 0.5002, "step": 7925 }, { "epoch": 3.8520364741641338, "grad_norm": 0.06907333204617308, "learning_rate": 3.251993300420919e-08, "loss": 0.4989, "step": 7926 }, { "epoch": 3.8525227963525834, "grad_norm": 0.0701487432594494, "learning_rate": 3.2302279533695244e-08, "loss": 0.4863, "step": 7927 }, { "epoch": 3.8530091185410336, "grad_norm": 0.07109656714211889, "learning_rate": 3.208535451963979e-08, "loss": 0.5198, "step": 7928 }, { "epoch": 3.8534954407294832, "grad_norm": 0.06805048706923046, "learning_rate": 3.186915799385237e-08, "loss": 0.4735, "step": 7929 }, { "epoch": 3.8539817629179334, "grad_norm": 0.07020111510368054, "learning_rate": 3.165368998803597e-08, "loss": 0.513, "step": 7930 }, { "epoch": 3.854468085106383, "grad_norm": 0.06853839430597691, "learning_rate": 3.143895053378698e-08, "loss": 0.4913, "step": 7931 }, { "epoch": 3.8549544072948327, "grad_norm": 0.06873485436844724, "learning_rate": 3.12249396625941e-08, "loss": 0.4745, "step": 7932 }, { "epoch": 3.8554407294832824, "grad_norm": 0.07019950593056339, "learning_rate": 3.101165740584e-08, "loss": 0.4949, "step": 7933 }, { "epoch": 3.8559270516717326, "grad_norm": 0.06906572506719856, "learning_rate": 3.079910379479911e-08, "loss": 0.4945, "step": 7934 }, { "epoch": 3.8564133738601822, "grad_norm": 0.07110717184636539, "learning_rate": 3.0587278860640946e-08, "loss": 0.4886, "step": 7935 }, { "epoch": 3.8568996960486324, "grad_norm": 0.07156272785598021, "learning_rate": 3.037618263442676e-08, "loss": 0.5273, "step": 7936 }, { "epoch": 3.857386018237082, "grad_norm": 0.07229447303050426, "learning_rate": 3.016581514711181e-08, "loss": 0.5408, "step": 7937 }, { "epoch": 3.8578723404255317, "grad_norm": 0.07061412520549051, "learning_rate": 2.9956176429543626e-08, "loss": 0.5363, "step": 7938 }, { "epoch": 3.858358662613982, "grad_norm": 0.07075999479098197, "learning_rate": 2.9747266512463735e-08, "loss": 0.5112, "step": 7939 }, { "epoch": 3.8588449848024315, "grad_norm": 0.0685432427703031, "learning_rate": 2.9539085426505965e-08, "loss": 0.4642, "step": 7940 }, { "epoch": 3.8593313069908817, "grad_norm": 0.06990221476977632, "learning_rate": 2.9331633202198116e-08, "loss": 0.4987, "step": 7941 }, { "epoch": 3.8598176291793314, "grad_norm": 0.0700837523202798, "learning_rate": 2.912490986996086e-08, "loss": 0.4944, "step": 7942 }, { "epoch": 3.860303951367781, "grad_norm": 0.07189610924231331, "learning_rate": 2.8918915460107723e-08, "loss": 0.5121, "step": 7943 }, { "epoch": 3.860790273556231, "grad_norm": 0.06933396439094201, "learning_rate": 2.871365000284454e-08, "loss": 0.5046, "step": 7944 }, { "epoch": 3.861276595744681, "grad_norm": 0.0713305139064588, "learning_rate": 2.8509113528272238e-08, "loss": 0.5034, "step": 7945 }, { "epoch": 3.8617629179331305, "grad_norm": 0.07142600151912995, "learning_rate": 2.8305306066383487e-08, "loss": 0.4954, "step": 7946 }, { "epoch": 3.8622492401215807, "grad_norm": 0.06839717905794623, "learning_rate": 2.8102227647064385e-08, "loss": 0.4615, "step": 7947 }, { "epoch": 3.8627355623100303, "grad_norm": 0.07174380305517765, "learning_rate": 2.7899878300093886e-08, "loss": 0.4838, "step": 7948 }, { "epoch": 3.86322188449848, "grad_norm": 0.07251124745635402, "learning_rate": 2.769825805514381e-08, "loss": 0.5365, "step": 7949 }, { "epoch": 3.86370820668693, "grad_norm": 0.07144880923228115, "learning_rate": 2.7497366941780513e-08, "loss": 0.4932, "step": 7950 }, { "epoch": 3.86419452887538, "grad_norm": 0.07028412591632652, "learning_rate": 2.7297204989461536e-08, "loss": 0.5106, "step": 7951 }, { "epoch": 3.86468085106383, "grad_norm": 0.07157316307252287, "learning_rate": 2.7097772227538956e-08, "loss": 0.5275, "step": 7952 }, { "epoch": 3.8651671732522797, "grad_norm": 0.07322448346050967, "learning_rate": 2.689906868525716e-08, "loss": 0.5032, "step": 7953 }, { "epoch": 3.8656534954407293, "grad_norm": 0.06944357777896402, "learning_rate": 2.6701094391753392e-08, "loss": 0.4881, "step": 7954 }, { "epoch": 3.8661398176291795, "grad_norm": 0.0691630501171565, "learning_rate": 2.650384937605832e-08, "loss": 0.4879, "step": 7955 }, { "epoch": 3.866626139817629, "grad_norm": 0.07000903279732242, "learning_rate": 2.6307333667096036e-08, "loss": 0.517, "step": 7956 }, { "epoch": 3.8671124620060793, "grad_norm": 0.07332953420884539, "learning_rate": 2.6111547293683482e-08, "loss": 0.4955, "step": 7957 }, { "epoch": 3.867598784194529, "grad_norm": 0.06766470855443787, "learning_rate": 2.591649028453047e-08, "loss": 0.4765, "step": 7958 }, { "epoch": 3.8680851063829786, "grad_norm": 0.07102516840665797, "learning_rate": 2.5722162668239124e-08, "loss": 0.522, "step": 7959 }, { "epoch": 3.8685714285714283, "grad_norm": 0.07205109579088757, "learning_rate": 2.5528564473306648e-08, "loss": 0.5011, "step": 7960 }, { "epoch": 3.8690577507598785, "grad_norm": 0.07371491152142387, "learning_rate": 2.5335695728120336e-08, "loss": 0.5115, "step": 7961 }, { "epoch": 3.869544072948328, "grad_norm": 0.07108892884186348, "learning_rate": 2.514355646096367e-08, "loss": 0.5184, "step": 7962 }, { "epoch": 3.8700303951367783, "grad_norm": 0.07012381297719256, "learning_rate": 2.495214670001134e-08, "loss": 0.5127, "step": 7963 }, { "epoch": 3.870516717325228, "grad_norm": 0.06854326583201292, "learning_rate": 2.4761466473331443e-08, "loss": 0.4836, "step": 7964 }, { "epoch": 3.8710030395136776, "grad_norm": 0.0699264852923121, "learning_rate": 2.457151580888495e-08, "loss": 0.5022, "step": 7965 }, { "epoch": 3.8714893617021278, "grad_norm": 0.07154132930607603, "learning_rate": 2.438229473452569e-08, "loss": 0.5231, "step": 7966 }, { "epoch": 3.8719756838905774, "grad_norm": 0.0729996863070197, "learning_rate": 2.4193803278000916e-08, "loss": 0.534, "step": 7967 }, { "epoch": 3.8724620060790276, "grad_norm": 0.0698960523369294, "learning_rate": 2.4006041466950735e-08, "loss": 0.5167, "step": 7968 }, { "epoch": 3.8729483282674773, "grad_norm": 0.07313663359826762, "learning_rate": 2.3819009328908683e-08, "loss": 0.4901, "step": 7969 }, { "epoch": 3.873434650455927, "grad_norm": 0.07341818534513772, "learning_rate": 2.3632706891300593e-08, "loss": 0.5032, "step": 7970 }, { "epoch": 3.8739209726443766, "grad_norm": 0.06797034121251241, "learning_rate": 2.344713418144573e-08, "loss": 0.4978, "step": 7971 }, { "epoch": 3.8744072948328268, "grad_norm": 0.07139995622111268, "learning_rate": 2.326229122655621e-08, "loss": 0.5086, "step": 7972 }, { "epoch": 3.8748936170212764, "grad_norm": 0.06973763921383579, "learning_rate": 2.307817805373702e-08, "loss": 0.5161, "step": 7973 }, { "epoch": 3.8753799392097266, "grad_norm": 0.0732180694004753, "learning_rate": 2.2894794689986565e-08, "loss": 0.5179, "step": 7974 }, { "epoch": 3.8758662613981762, "grad_norm": 0.07347820833299802, "learning_rate": 2.2712141162195e-08, "loss": 0.5198, "step": 7975 }, { "epoch": 3.876352583586626, "grad_norm": 0.07075042268700264, "learning_rate": 2.2530217497147566e-08, "loss": 0.5171, "step": 7976 }, { "epoch": 3.876838905775076, "grad_norm": 0.07148612546164884, "learning_rate": 2.23490237215207e-08, "loss": 0.5312, "step": 7977 }, { "epoch": 3.8773252279635257, "grad_norm": 0.07246561919183483, "learning_rate": 2.216855986188482e-08, "loss": 0.5209, "step": 7978 }, { "epoch": 3.877811550151976, "grad_norm": 0.0709196590158568, "learning_rate": 2.1988825944702086e-08, "loss": 0.5072, "step": 7979 }, { "epoch": 3.8782978723404256, "grad_norm": 0.07221483609910441, "learning_rate": 2.1809821996329195e-08, "loss": 0.5416, "step": 7980 }, { "epoch": 3.8787841945288752, "grad_norm": 0.06911499768906816, "learning_rate": 2.1631548043014593e-08, "loss": 0.4929, "step": 7981 }, { "epoch": 3.8792705167173254, "grad_norm": 0.07057777768451894, "learning_rate": 2.1454004110900706e-08, "loss": 0.5078, "step": 7982 }, { "epoch": 3.879756838905775, "grad_norm": 0.07049499313249019, "learning_rate": 2.1277190226021706e-08, "loss": 0.5225, "step": 7983 }, { "epoch": 3.880243161094225, "grad_norm": 0.07039969486107699, "learning_rate": 2.1101106414306293e-08, "loss": 0.52, "step": 7984 }, { "epoch": 3.880729483282675, "grad_norm": 0.07118495342841619, "learning_rate": 2.092575270157382e-08, "loss": 0.5165, "step": 7985 }, { "epoch": 3.8812158054711245, "grad_norm": 0.07177304519647379, "learning_rate": 2.0751129113538715e-08, "loss": 0.5211, "step": 7986 }, { "epoch": 3.8817021276595742, "grad_norm": 0.07136027164333868, "learning_rate": 2.0577235675807717e-08, "loss": 0.5272, "step": 7987 }, { "epoch": 3.8821884498480244, "grad_norm": 0.07170350874886534, "learning_rate": 2.0404072413879318e-08, "loss": 0.5141, "step": 7988 }, { "epoch": 3.882674772036474, "grad_norm": 0.07200504812634104, "learning_rate": 2.0231639353147093e-08, "loss": 0.5256, "step": 7989 }, { "epoch": 3.883161094224924, "grad_norm": 0.07150320420962743, "learning_rate": 2.0059936518895816e-08, "loss": 0.4851, "step": 7990 }, { "epoch": 3.883647416413374, "grad_norm": 0.0723740199137926, "learning_rate": 1.988896393630424e-08, "loss": 0.4972, "step": 7991 }, { "epoch": 3.8841337386018235, "grad_norm": 0.07019376784195688, "learning_rate": 1.971872163044286e-08, "loss": 0.503, "step": 7992 }, { "epoch": 3.8846200607902737, "grad_norm": 0.06803617292223385, "learning_rate": 1.9549209626276156e-08, "loss": 0.5013, "step": 7993 }, { "epoch": 3.8851063829787233, "grad_norm": 0.07025752497536777, "learning_rate": 1.9380427948660906e-08, "loss": 0.4938, "step": 7994 }, { "epoch": 3.8855927051671735, "grad_norm": 0.07168717704944641, "learning_rate": 1.9212376622347318e-08, "loss": 0.5256, "step": 7995 }, { "epoch": 3.886079027355623, "grad_norm": 0.06975030982400551, "learning_rate": 1.9045055671978452e-08, "loss": 0.5085, "step": 7996 }, { "epoch": 3.886565349544073, "grad_norm": 0.07066849529866817, "learning_rate": 1.8878465122089683e-08, "loss": 0.4946, "step": 7997 }, { "epoch": 3.8870516717325225, "grad_norm": 0.07087872286839735, "learning_rate": 1.8712604997108696e-08, "loss": 0.5033, "step": 7998 }, { "epoch": 3.8875379939209727, "grad_norm": 0.07137933732484057, "learning_rate": 1.854747532135881e-08, "loss": 0.4957, "step": 7999 }, { "epoch": 3.8880243161094223, "grad_norm": 0.07321259547447465, "learning_rate": 1.8383076119053433e-08, "loss": 0.4922, "step": 8000 }, { "epoch": 3.8885106382978725, "grad_norm": 0.0709579965162484, "learning_rate": 1.821940741429995e-08, "loss": 0.4958, "step": 8001 }, { "epoch": 3.888996960486322, "grad_norm": 0.07289988721569056, "learning_rate": 1.805646923109805e-08, "loss": 0.5131, "step": 8002 }, { "epoch": 3.889483282674772, "grad_norm": 0.06970760540710476, "learning_rate": 1.7894261593341956e-08, "loss": 0.4883, "step": 8003 }, { "epoch": 3.889969604863222, "grad_norm": 0.07111830826865695, "learning_rate": 1.773278452481597e-08, "loss": 0.5106, "step": 8004 }, { "epoch": 3.8904559270516716, "grad_norm": 0.0694227570324697, "learning_rate": 1.7572038049200603e-08, "loss": 0.5001, "step": 8005 }, { "epoch": 3.8909422492401218, "grad_norm": 0.07346816846689401, "learning_rate": 1.741202219006588e-08, "loss": 0.5364, "step": 8006 }, { "epoch": 3.8914285714285715, "grad_norm": 0.07277358815957531, "learning_rate": 1.7252736970877483e-08, "loss": 0.5103, "step": 8007 }, { "epoch": 3.891914893617021, "grad_norm": 0.07317398562816137, "learning_rate": 1.7094182414992277e-08, "loss": 0.5296, "step": 8008 }, { "epoch": 3.8924012158054713, "grad_norm": 0.07220176697891172, "learning_rate": 1.693635854566056e-08, "loss": 0.526, "step": 8009 }, { "epoch": 3.892887537993921, "grad_norm": 0.07039717442309316, "learning_rate": 1.6779265386025478e-08, "loss": 0.5375, "step": 8010 }, { "epoch": 3.893373860182371, "grad_norm": 0.07012526973137066, "learning_rate": 1.6622902959123055e-08, "loss": 0.4978, "step": 8011 }, { "epoch": 3.8938601823708208, "grad_norm": 0.07111492182383376, "learning_rate": 1.6467271287881615e-08, "loss": 0.5017, "step": 8012 }, { "epoch": 3.8943465045592704, "grad_norm": 0.0712681043073196, "learning_rate": 1.63123703951229e-08, "loss": 0.4995, "step": 8013 }, { "epoch": 3.89483282674772, "grad_norm": 0.07170752214295342, "learning_rate": 1.615820030356208e-08, "loss": 0.536, "step": 8014 }, { "epoch": 3.8953191489361703, "grad_norm": 0.07188190801573636, "learning_rate": 1.6004761035805505e-08, "loss": 0.5392, "step": 8015 }, { "epoch": 3.89580547112462, "grad_norm": 0.07183102857139019, "learning_rate": 1.5852052614354074e-08, "loss": 0.5327, "step": 8016 }, { "epoch": 3.89629179331307, "grad_norm": 0.06906320866272496, "learning_rate": 1.5700075061600427e-08, "loss": 0.5085, "step": 8017 }, { "epoch": 3.8967781155015198, "grad_norm": 0.06958322538993796, "learning_rate": 1.554882839982952e-08, "loss": 0.5061, "step": 8018 }, { "epoch": 3.8972644376899694, "grad_norm": 0.07141640504236503, "learning_rate": 1.539831265122138e-08, "loss": 0.5157, "step": 8019 }, { "epoch": 3.8977507598784196, "grad_norm": 0.06949156172828688, "learning_rate": 1.5248527837846694e-08, "loss": 0.514, "step": 8020 }, { "epoch": 3.8982370820668693, "grad_norm": 0.06985149360430036, "learning_rate": 1.509947398167011e-08, "loss": 0.4768, "step": 8021 }, { "epoch": 3.8987234042553194, "grad_norm": 0.07116323573215827, "learning_rate": 1.4951151104548034e-08, "loss": 0.5086, "step": 8022 }, { "epoch": 3.899209726443769, "grad_norm": 0.07192095375157564, "learning_rate": 1.4803559228230291e-08, "loss": 0.5206, "step": 8023 }, { "epoch": 3.8996960486322187, "grad_norm": 0.07393147855346544, "learning_rate": 1.4656698374360678e-08, "loss": 0.5586, "step": 8024 }, { "epoch": 3.9001823708206684, "grad_norm": 0.07006074145980727, "learning_rate": 1.4510568564473082e-08, "loss": 0.5385, "step": 8025 }, { "epoch": 3.9006686930091186, "grad_norm": 0.07144684980170236, "learning_rate": 1.4365169819997582e-08, "loss": 0.518, "step": 8026 }, { "epoch": 3.9011550151975682, "grad_norm": 0.06967983310849515, "learning_rate": 1.422050216225379e-08, "loss": 0.5157, "step": 8027 }, { "epoch": 3.9016413373860184, "grad_norm": 0.07032373783419817, "learning_rate": 1.4076565612455851e-08, "loss": 0.5328, "step": 8028 }, { "epoch": 3.902127659574468, "grad_norm": 0.07510804876309583, "learning_rate": 1.3933360191710766e-08, "loss": 0.5361, "step": 8029 }, { "epoch": 3.9026139817629177, "grad_norm": 0.0721959049120463, "learning_rate": 1.379088592101785e-08, "loss": 0.5086, "step": 8030 }, { "epoch": 3.903100303951368, "grad_norm": 0.07070208058292794, "learning_rate": 1.3649142821269834e-08, "loss": 0.4771, "step": 8031 }, { "epoch": 3.9035866261398176, "grad_norm": 0.0733363413301437, "learning_rate": 1.350813091325065e-08, "loss": 0.5219, "step": 8032 }, { "epoch": 3.9040729483282677, "grad_norm": 0.07031034853924799, "learning_rate": 1.3367850217639312e-08, "loss": 0.4882, "step": 8033 }, { "epoch": 3.9045592705167174, "grad_norm": 0.06892490282880302, "learning_rate": 1.3228300755005474e-08, "loss": 0.5073, "step": 8034 }, { "epoch": 3.905045592705167, "grad_norm": 0.06996258561675435, "learning_rate": 1.308948254581277e-08, "loss": 0.5063, "step": 8035 }, { "epoch": 3.905531914893617, "grad_norm": 0.07088594932322743, "learning_rate": 1.2951395610417139e-08, "loss": 0.5099, "step": 8036 }, { "epoch": 3.906018237082067, "grad_norm": 0.07284858078289273, "learning_rate": 1.2814039969067938e-08, "loss": 0.5215, "step": 8037 }, { "epoch": 3.906504559270517, "grad_norm": 0.07099830970134939, "learning_rate": 1.2677415641906277e-08, "loss": 0.5, "step": 8038 }, { "epoch": 3.9069908814589667, "grad_norm": 0.06909582846937622, "learning_rate": 1.2541522648966686e-08, "loss": 0.5178, "step": 8039 }, { "epoch": 3.9074772036474164, "grad_norm": 0.07344312000094438, "learning_rate": 1.2406361010177115e-08, "loss": 0.5519, "step": 8040 }, { "epoch": 3.907963525835866, "grad_norm": 0.06989499221506916, "learning_rate": 1.2271930745356153e-08, "loss": 0.5127, "step": 8041 }, { "epoch": 3.908449848024316, "grad_norm": 0.07147983131290152, "learning_rate": 1.2138231874217477e-08, "loss": 0.5047, "step": 8042 }, { "epoch": 3.908936170212766, "grad_norm": 0.07270367242485364, "learning_rate": 1.2005264416365958e-08, "loss": 0.5125, "step": 8043 }, { "epoch": 3.909422492401216, "grad_norm": 0.07285102824140043, "learning_rate": 1.1873028391300445e-08, "loss": 0.498, "step": 8044 }, { "epoch": 3.9099088145896657, "grad_norm": 0.07164730193100352, "learning_rate": 1.1741523818410983e-08, "loss": 0.5126, "step": 8045 }, { "epoch": 3.9103951367781153, "grad_norm": 0.07075149131951955, "learning_rate": 1.161075071698159e-08, "loss": 0.5196, "step": 8046 }, { "epoch": 3.9108814589665655, "grad_norm": 0.07368281119650384, "learning_rate": 1.1480709106189148e-08, "loss": 0.5633, "step": 8047 }, { "epoch": 3.911367781155015, "grad_norm": 0.07108252270608832, "learning_rate": 1.1351399005101737e-08, "loss": 0.4856, "step": 8048 }, { "epoch": 3.9118541033434653, "grad_norm": 0.0693175439233018, "learning_rate": 1.1222820432681969e-08, "loss": 0.5013, "step": 8049 }, { "epoch": 3.912340425531915, "grad_norm": 0.07234073173869401, "learning_rate": 1.109497340778476e-08, "loss": 0.5089, "step": 8050 }, { "epoch": 3.9128267477203647, "grad_norm": 0.07302117425737847, "learning_rate": 1.0967857949156224e-08, "loss": 0.5466, "step": 8051 }, { "epoch": 3.9133130699088143, "grad_norm": 0.07303509271950777, "learning_rate": 1.0841474075437563e-08, "loss": 0.4849, "step": 8052 }, { "epoch": 3.9137993920972645, "grad_norm": 0.07076824660442604, "learning_rate": 1.071582180516062e-08, "loss": 0.5153, "step": 8053 }, { "epoch": 3.914285714285714, "grad_norm": 0.07109986978236102, "learning_rate": 1.0590901156751765e-08, "loss": 0.5131, "step": 8054 }, { "epoch": 3.9147720364741643, "grad_norm": 0.07131947830451511, "learning_rate": 1.0466712148528569e-08, "loss": 0.5143, "step": 8055 }, { "epoch": 3.915258358662614, "grad_norm": 0.07175059732026251, "learning_rate": 1.0343254798702018e-08, "loss": 0.5503, "step": 8056 }, { "epoch": 3.9157446808510636, "grad_norm": 0.07202828609471504, "learning_rate": 1.0220529125375967e-08, "loss": 0.512, "step": 8057 }, { "epoch": 3.9162310030395138, "grad_norm": 0.0707318762087733, "learning_rate": 1.0098535146547128e-08, "loss": 0.5097, "step": 8058 }, { "epoch": 3.9167173252279635, "grad_norm": 0.07093996689822704, "learning_rate": 9.977272880103418e-09, "loss": 0.5279, "step": 8059 }, { "epoch": 3.9172036474164136, "grad_norm": 0.07153232852592072, "learning_rate": 9.856742343827275e-09, "loss": 0.5079, "step": 8060 }, { "epoch": 3.9176899696048633, "grad_norm": 0.07001829844735431, "learning_rate": 9.736943555392897e-09, "loss": 0.5286, "step": 8061 }, { "epoch": 3.918176291793313, "grad_norm": 0.07266345529267103, "learning_rate": 9.617876532367897e-09, "loss": 0.5486, "step": 8062 }, { "epoch": 3.918662613981763, "grad_norm": 0.06941666160229705, "learning_rate": 9.499541292211645e-09, "loss": 0.4737, "step": 8063 }, { "epoch": 3.9191489361702128, "grad_norm": 0.0698667328209181, "learning_rate": 9.381937852276924e-09, "loss": 0.4918, "step": 8064 }, { "epoch": 3.919635258358663, "grad_norm": 0.0728594624611233, "learning_rate": 9.265066229808272e-09, "loss": 0.5466, "step": 8065 }, { "epoch": 3.9201215805471126, "grad_norm": 0.07212304246567539, "learning_rate": 9.148926441944762e-09, "loss": 0.5341, "step": 8066 }, { "epoch": 3.9206079027355623, "grad_norm": 0.06768923129651373, "learning_rate": 9.0335185057161e-09, "loss": 0.4758, "step": 8067 }, { "epoch": 3.921094224924012, "grad_norm": 0.06987244158761928, "learning_rate": 8.918842438045416e-09, "loss": 0.5126, "step": 8068 }, { "epoch": 3.921580547112462, "grad_norm": 0.07084128925372642, "learning_rate": 8.804898255749261e-09, "loss": 0.5078, "step": 8069 }, { "epoch": 3.9220668693009118, "grad_norm": 0.0716854976258303, "learning_rate": 8.691685975535935e-09, "loss": 0.511, "step": 8070 }, { "epoch": 3.922553191489362, "grad_norm": 0.07222527846272814, "learning_rate": 8.579205614006603e-09, "loss": 0.5096, "step": 8071 }, { "epoch": 3.9230395136778116, "grad_norm": 0.06996761532935165, "learning_rate": 8.467457187655847e-09, "loss": 0.5227, "step": 8072 }, { "epoch": 3.9235258358662612, "grad_norm": 0.07161724318413952, "learning_rate": 8.356440712869452e-09, "loss": 0.5348, "step": 8073 }, { "epoch": 3.9240121580547114, "grad_norm": 0.07413587454398333, "learning_rate": 8.246156205927725e-09, "loss": 0.5382, "step": 8074 }, { "epoch": 3.924498480243161, "grad_norm": 0.07044649136763231, "learning_rate": 8.13660368300162e-09, "loss": 0.5072, "step": 8075 }, { "epoch": 3.924984802431611, "grad_norm": 0.06826681416322335, "learning_rate": 8.027783160156622e-09, "loss": 0.4963, "step": 8076 }, { "epoch": 3.925471124620061, "grad_norm": 0.0721365217239518, "learning_rate": 7.919694653349408e-09, "loss": 0.5051, "step": 8077 }, { "epoch": 3.9259574468085106, "grad_norm": 0.07278202553001152, "learning_rate": 7.812338178430079e-09, "loss": 0.5021, "step": 8078 }, { "epoch": 3.9264437689969602, "grad_norm": 0.07225236612920691, "learning_rate": 7.705713751141041e-09, "loss": 0.5579, "step": 8079 }, { "epoch": 3.9269300911854104, "grad_norm": 0.07197941205032261, "learning_rate": 7.599821387118122e-09, "loss": 0.5131, "step": 8080 }, { "epoch": 3.92741641337386, "grad_norm": 0.07153841104866336, "learning_rate": 7.494661101889456e-09, "loss": 0.5068, "step": 8081 }, { "epoch": 3.92790273556231, "grad_norm": 0.07887686925940758, "learning_rate": 7.390232910874373e-09, "loss": 0.5372, "step": 8082 }, { "epoch": 3.92838905775076, "grad_norm": 0.06896797961653627, "learning_rate": 7.286536829386737e-09, "loss": 0.5092, "step": 8083 }, { "epoch": 3.9288753799392095, "grad_norm": 0.07192585453559809, "learning_rate": 7.183572872632716e-09, "loss": 0.4893, "step": 8084 }, { "epoch": 3.9293617021276597, "grad_norm": 0.07287838702336884, "learning_rate": 7.081341055710789e-09, "loss": 0.5513, "step": 8085 }, { "epoch": 3.9298480243161094, "grad_norm": 0.076278886647371, "learning_rate": 6.979841393611741e-09, "loss": 0.533, "step": 8086 }, { "epoch": 3.9303343465045595, "grad_norm": 0.07175553030354498, "learning_rate": 6.879073901219224e-09, "loss": 0.5127, "step": 8087 }, { "epoch": 3.930820668693009, "grad_norm": 0.07206795743866867, "learning_rate": 6.7790385933097505e-09, "loss": 0.5086, "step": 8088 }, { "epoch": 3.931306990881459, "grad_norm": 0.07291996651267474, "learning_rate": 6.679735484552696e-09, "loss": 0.5287, "step": 8089 }, { "epoch": 3.931793313069909, "grad_norm": 0.07403055741183005, "learning_rate": 6.581164589509192e-09, "loss": 0.5072, "step": 8090 }, { "epoch": 3.9322796352583587, "grad_norm": 0.06903281597139206, "learning_rate": 6.483325922634342e-09, "loss": 0.4806, "step": 8091 }, { "epoch": 3.9327659574468083, "grad_norm": 0.06834538252196153, "learning_rate": 6.386219498274449e-09, "loss": 0.5005, "step": 8092 }, { "epoch": 3.9332522796352585, "grad_norm": 0.07238842386730485, "learning_rate": 6.289845330669231e-09, "loss": 0.512, "step": 8093 }, { "epoch": 3.933738601823708, "grad_norm": 0.0742271470241622, "learning_rate": 6.194203433951274e-09, "loss": 0.5067, "step": 8094 }, { "epoch": 3.934224924012158, "grad_norm": 0.07062883187495671, "learning_rate": 6.099293822144359e-09, "loss": 0.5013, "step": 8095 }, { "epoch": 3.934711246200608, "grad_norm": 0.06990834319789539, "learning_rate": 6.005116509166797e-09, "loss": 0.5117, "step": 8096 }, { "epoch": 3.9351975683890577, "grad_norm": 0.07039004221223741, "learning_rate": 5.911671508828098e-09, "loss": 0.4815, "step": 8097 }, { "epoch": 3.935683890577508, "grad_norm": 0.07292652009321607, "learning_rate": 5.81895883483119e-09, "loss": 0.5244, "step": 8098 }, { "epoch": 3.9361702127659575, "grad_norm": 0.07155828797770042, "learning_rate": 5.726978500771307e-09, "loss": 0.5088, "step": 8099 }, { "epoch": 3.936656534954407, "grad_norm": 0.06892749606309138, "learning_rate": 5.635730520136551e-09, "loss": 0.4707, "step": 8100 }, { "epoch": 3.9371428571428573, "grad_norm": 0.07224752014891758, "learning_rate": 5.5452149063067724e-09, "loss": 0.4893, "step": 8101 }, { "epoch": 3.937629179331307, "grad_norm": 0.07286475931072105, "learning_rate": 5.4554316725558e-09, "loss": 0.5401, "step": 8102 }, { "epoch": 3.938115501519757, "grad_norm": 0.07169064040307217, "learning_rate": 5.366380832048657e-09, "loss": 0.5364, "step": 8103 }, { "epoch": 3.9386018237082068, "grad_norm": 0.06964149781236798, "learning_rate": 5.278062397844341e-09, "loss": 0.4997, "step": 8104 }, { "epoch": 3.9390881458966565, "grad_norm": 0.07368815291661669, "learning_rate": 5.190476382893051e-09, "loss": 0.5127, "step": 8105 }, { "epoch": 3.939574468085106, "grad_norm": 0.07056507845515898, "learning_rate": 5.103622800038399e-09, "loss": 0.5012, "step": 8106 }, { "epoch": 3.9400607902735563, "grad_norm": 0.06900093577242564, "learning_rate": 5.017501662016866e-09, "loss": 0.4918, "step": 8107 }, { "epoch": 3.940547112462006, "grad_norm": 0.07093634738506353, "learning_rate": 4.932112981456682e-09, "loss": 0.504, "step": 8108 }, { "epoch": 3.941033434650456, "grad_norm": 0.07323133370754058, "learning_rate": 4.847456770880055e-09, "loss": 0.5589, "step": 8109 }, { "epoch": 3.9415197568389058, "grad_norm": 0.07079831119733815, "learning_rate": 4.7635330426992755e-09, "loss": 0.4945, "step": 8110 }, { "epoch": 3.9420060790273554, "grad_norm": 0.07454331180430393, "learning_rate": 4.680341809222277e-09, "loss": 0.5673, "step": 8111 }, { "epoch": 3.9424924012158056, "grad_norm": 0.0722031174688914, "learning_rate": 4.597883082647636e-09, "loss": 0.5304, "step": 8112 }, { "epoch": 3.9429787234042553, "grad_norm": 0.07369599139790713, "learning_rate": 4.51615687506679e-09, "loss": 0.5489, "step": 8113 }, { "epoch": 3.9434650455927054, "grad_norm": 0.07018931189411005, "learning_rate": 4.435163198463488e-09, "loss": 0.5016, "step": 8114 }, { "epoch": 3.943951367781155, "grad_norm": 0.07264213074530183, "learning_rate": 4.354902064716005e-09, "loss": 0.5356, "step": 8115 }, { "epoch": 3.9444376899696048, "grad_norm": 0.06940034409598339, "learning_rate": 4.275373485592149e-09, "loss": 0.5014, "step": 8116 }, { "epoch": 3.9449240121580544, "grad_norm": 0.07212645682487613, "learning_rate": 4.196577472754815e-09, "loss": 0.5045, "step": 8117 }, { "epoch": 3.9454103343465046, "grad_norm": 0.07022424093777124, "learning_rate": 4.118514037758093e-09, "loss": 0.5059, "step": 8118 }, { "epoch": 3.9458966565349542, "grad_norm": 0.06994721478412266, "learning_rate": 4.041183192049492e-09, "loss": 0.5344, "step": 8119 }, { "epoch": 3.9463829787234044, "grad_norm": 0.07063191809187022, "learning_rate": 3.96458494696883e-09, "loss": 0.4856, "step": 8120 }, { "epoch": 3.946869300911854, "grad_norm": 0.07206067649205745, "learning_rate": 3.88871931374768e-09, "loss": 0.5336, "step": 8121 }, { "epoch": 3.9473556231003037, "grad_norm": 0.07007946834779166, "learning_rate": 3.81358630351103e-09, "loss": 0.5124, "step": 8122 }, { "epoch": 3.947841945288754, "grad_norm": 0.06979905687357034, "learning_rate": 3.739185927276734e-09, "loss": 0.4766, "step": 8123 }, { "epoch": 3.9483282674772036, "grad_norm": 0.07092051187454781, "learning_rate": 3.6655181959543984e-09, "loss": 0.4803, "step": 8124 }, { "epoch": 3.9488145896656537, "grad_norm": 0.06894058270608466, "learning_rate": 3.5925831203470484e-09, "loss": 0.4813, "step": 8125 }, { "epoch": 3.9493009118541034, "grad_norm": 0.06977231889346967, "learning_rate": 3.5203807111489074e-09, "loss": 0.4875, "step": 8126 }, { "epoch": 3.949787234042553, "grad_norm": 0.07458863074111872, "learning_rate": 3.4489109789487275e-09, "loss": 0.5366, "step": 8127 }, { "epoch": 3.950273556231003, "grad_norm": 0.0726611346904699, "learning_rate": 3.3781739342259033e-09, "loss": 0.5401, "step": 8128 }, { "epoch": 3.950759878419453, "grad_norm": 0.07202718490680439, "learning_rate": 3.3081695873532493e-09, "loss": 0.4994, "step": 8129 }, { "epoch": 3.951246200607903, "grad_norm": 0.0703655299161122, "learning_rate": 3.2388979485964422e-09, "loss": 0.5312, "step": 8130 }, { "epoch": 3.9517325227963527, "grad_norm": 0.0696733281534409, "learning_rate": 3.1703590281134676e-09, "loss": 0.4759, "step": 8131 }, { "epoch": 3.9522188449848024, "grad_norm": 0.0737254573011368, "learning_rate": 3.1025528359540644e-09, "loss": 0.5452, "step": 8132 }, { "epoch": 3.952705167173252, "grad_norm": 0.07274737439261085, "learning_rate": 3.0354793820625005e-09, "loss": 0.5384, "step": 8133 }, { "epoch": 3.953191489361702, "grad_norm": 0.07124987279119638, "learning_rate": 2.969138676273131e-09, "loss": 0.5191, "step": 8134 }, { "epoch": 3.953677811550152, "grad_norm": 0.07023365395484578, "learning_rate": 2.9035307283142857e-09, "loss": 0.5106, "step": 8135 }, { "epoch": 3.954164133738602, "grad_norm": 0.07356515817803144, "learning_rate": 2.8386555478071566e-09, "loss": 0.5179, "step": 8136 }, { "epoch": 3.9546504559270517, "grad_norm": 0.07240874014530504, "learning_rate": 2.77451314426469e-09, "loss": 0.5562, "step": 8137 }, { "epoch": 3.9551367781155014, "grad_norm": 0.06962325511307683, "learning_rate": 2.7111035270926956e-09, "loss": 0.471, "step": 8138 }, { "epoch": 3.9556231003039515, "grad_norm": 0.07136874549247091, "learning_rate": 2.6484267055892907e-09, "loss": 0.4866, "step": 8139 }, { "epoch": 3.956109422492401, "grad_norm": 0.07181745592666004, "learning_rate": 2.5864826889454574e-09, "loss": 0.5488, "step": 8140 }, { "epoch": 3.9565957446808513, "grad_norm": 0.07221214086171204, "learning_rate": 2.5252714862444848e-09, "loss": 0.5036, "step": 8141 }, { "epoch": 3.957082066869301, "grad_norm": 0.07003244553923746, "learning_rate": 2.4647931064625263e-09, "loss": 0.4885, "step": 8142 }, { "epoch": 3.9575683890577507, "grad_norm": 0.06790757620095221, "learning_rate": 2.4050475584680433e-09, "loss": 0.4919, "step": 8143 }, { "epoch": 3.9580547112462003, "grad_norm": 0.07005590309632957, "learning_rate": 2.3460348510212503e-09, "loss": 0.4911, "step": 8144 }, { "epoch": 3.9585410334346505, "grad_norm": 0.07187730644827803, "learning_rate": 2.2877549927768914e-09, "loss": 0.5415, "step": 8145 }, { "epoch": 3.9590273556231, "grad_norm": 0.0706836695120867, "learning_rate": 2.230207992280353e-09, "loss": 0.5088, "step": 8146 }, { "epoch": 3.9595136778115503, "grad_norm": 0.072387026294712, "learning_rate": 2.1733938579698853e-09, "loss": 0.5014, "step": 8147 }, { "epoch": 3.96, "grad_norm": 0.07115738281883845, "learning_rate": 2.117312598177712e-09, "loss": 0.5066, "step": 8148 }, { "epoch": 3.9604863221884496, "grad_norm": 0.07139689653704843, "learning_rate": 2.0619642211266998e-09, "loss": 0.5096, "step": 8149 }, { "epoch": 3.9609726443768998, "grad_norm": 0.0702955967905899, "learning_rate": 2.0073487349336894e-09, "loss": 0.5149, "step": 8150 }, { "epoch": 3.9614589665653495, "grad_norm": 0.07102713027669254, "learning_rate": 1.9534661476067195e-09, "loss": 0.5191, "step": 8151 }, { "epoch": 3.9619452887537996, "grad_norm": 0.07154412624725065, "learning_rate": 1.9003164670472474e-09, "loss": 0.5256, "step": 8152 }, { "epoch": 3.9624316109422493, "grad_norm": 0.0720294956571181, "learning_rate": 1.8478997010490384e-09, "loss": 0.5151, "step": 8153 }, { "epoch": 3.962917933130699, "grad_norm": 0.06862619438173168, "learning_rate": 1.796215857298722e-09, "loss": 0.4634, "step": 8154 }, { "epoch": 3.963404255319149, "grad_norm": 0.0764921404797082, "learning_rate": 1.7452649433752355e-09, "loss": 0.6147, "step": 8155 }, { "epoch": 3.9638905775075988, "grad_norm": 0.06866013650053804, "learning_rate": 1.6950469667492697e-09, "loss": 0.4623, "step": 8156 }, { "epoch": 3.964376899696049, "grad_norm": 0.07074925107637062, "learning_rate": 1.645561934785489e-09, "loss": 0.4919, "step": 8157 }, { "epoch": 3.9648632218844986, "grad_norm": 0.07026614510420989, "learning_rate": 1.596809854739756e-09, "loss": 0.5281, "step": 8158 }, { "epoch": 3.9653495440729483, "grad_norm": 0.06977210402333377, "learning_rate": 1.5487907337613517e-09, "loss": 0.5136, "step": 8159 }, { "epoch": 3.965835866261398, "grad_norm": 0.07021467126198487, "learning_rate": 1.5015045788918658e-09, "loss": 0.4989, "step": 8160 }, { "epoch": 3.966322188449848, "grad_norm": 0.07117600998553947, "learning_rate": 1.454951397064641e-09, "loss": 0.4763, "step": 8161 }, { "epoch": 3.9668085106382978, "grad_norm": 0.06970448755775487, "learning_rate": 1.409131195106439e-09, "loss": 0.4962, "step": 8162 }, { "epoch": 3.967294832826748, "grad_norm": 0.07306336551471798, "learning_rate": 1.3640439797368843e-09, "loss": 0.5095, "step": 8163 }, { "epoch": 3.9677811550151976, "grad_norm": 0.07252556711660253, "learning_rate": 1.3196897575668e-09, "loss": 0.496, "step": 8164 }, { "epoch": 3.9682674772036473, "grad_norm": 0.06935909114039966, "learning_rate": 1.2760685351004277e-09, "loss": 0.5007, "step": 8165 }, { "epoch": 3.9687537993920974, "grad_norm": 0.07090171749214573, "learning_rate": 1.2331803187343171e-09, "loss": 0.524, "step": 8166 }, { "epoch": 3.969240121580547, "grad_norm": 0.07406312878779753, "learning_rate": 1.1910251147573272e-09, "loss": 0.5413, "step": 8167 }, { "epoch": 3.969726443768997, "grad_norm": 0.07114174440793454, "learning_rate": 1.149602929351179e-09, "loss": 0.4991, "step": 8168 }, { "epoch": 3.970212765957447, "grad_norm": 0.07040133790442153, "learning_rate": 1.1089137685904583e-09, "loss": 0.5125, "step": 8169 }, { "epoch": 3.9706990881458966, "grad_norm": 0.06936154269524242, "learning_rate": 1.0689576384415035e-09, "loss": 0.5126, "step": 8170 }, { "epoch": 3.9711854103343462, "grad_norm": 0.0722400663600258, "learning_rate": 1.0297345447629615e-09, "loss": 0.5659, "step": 8171 }, { "epoch": 3.9716717325227964, "grad_norm": 0.06972781353985649, "learning_rate": 9.912444933068976e-10, "loss": 0.4755, "step": 8172 }, { "epoch": 3.972158054711246, "grad_norm": 0.07047353065042987, "learning_rate": 9.534874897171309e-10, "loss": 0.516, "step": 8173 }, { "epoch": 3.972644376899696, "grad_norm": 0.07302752596832238, "learning_rate": 9.164635395303434e-10, "loss": 0.5601, "step": 8174 }, { "epoch": 3.973130699088146, "grad_norm": 0.0689001322167617, "learning_rate": 8.801726481766359e-10, "loss": 0.5049, "step": 8175 }, { "epoch": 3.9736170212765956, "grad_norm": 0.07428377465526251, "learning_rate": 8.446148209761973e-10, "loss": 0.5063, "step": 8176 }, { "epoch": 3.9741033434650457, "grad_norm": 0.07128211857910462, "learning_rate": 8.09790063143745e-10, "loss": 0.5382, "step": 8177 }, { "epoch": 3.9745896656534954, "grad_norm": 0.07057616688384419, "learning_rate": 7.75698379786305e-10, "loss": 0.5177, "step": 8178 }, { "epoch": 3.9750759878419455, "grad_norm": 0.07196260975657118, "learning_rate": 7.423397759026563e-10, "loss": 0.5351, "step": 8179 }, { "epoch": 3.975562310030395, "grad_norm": 0.07434715626005581, "learning_rate": 7.097142563844417e-10, "loss": 0.5404, "step": 8180 }, { "epoch": 3.976048632218845, "grad_norm": 0.07069529803170822, "learning_rate": 6.778218260161673e-10, "loss": 0.4838, "step": 8181 }, { "epoch": 3.976534954407295, "grad_norm": 0.06927011428174207, "learning_rate": 6.466624894740925e-10, "loss": 0.494, "step": 8182 }, { "epoch": 3.9770212765957447, "grad_norm": 0.07496577366429516, "learning_rate": 6.162362513273401e-10, "loss": 0.5345, "step": 8183 }, { "epoch": 3.977507598784195, "grad_norm": 0.06954442783966105, "learning_rate": 5.865431160378964e-10, "loss": 0.4961, "step": 8184 }, { "epoch": 3.9779939209726445, "grad_norm": 0.0726533837987503, "learning_rate": 5.575830879600564e-10, "loss": 0.4996, "step": 8185 }, { "epoch": 3.978480243161094, "grad_norm": 0.07340734325706312, "learning_rate": 5.293561713398676e-10, "loss": 0.5008, "step": 8186 }, { "epoch": 3.978966565349544, "grad_norm": 0.07397886319056134, "learning_rate": 5.01862370317352e-10, "loss": 0.578, "step": 8187 }, { "epoch": 3.979452887537994, "grad_norm": 0.07252826850789273, "learning_rate": 4.751016889231741e-10, "loss": 0.541, "step": 8188 }, { "epoch": 3.9799392097264437, "grad_norm": 0.07267762781569143, "learning_rate": 4.490741310819724e-10, "loss": 0.5543, "step": 8189 }, { "epoch": 3.980425531914894, "grad_norm": 0.07150693103716799, "learning_rate": 4.237797006106936e-10, "loss": 0.5079, "step": 8190 }, { "epoch": 3.9809118541033435, "grad_norm": 0.07052437152147178, "learning_rate": 3.9921840121803777e-10, "loss": 0.5186, "step": 8191 }, { "epoch": 3.981398176291793, "grad_norm": 0.06880520378124522, "learning_rate": 3.753902365061235e-10, "loss": 0.4956, "step": 8192 }, { "epoch": 3.9818844984802433, "grad_norm": 0.07061848070283695, "learning_rate": 3.522952099682675e-10, "loss": 0.5027, "step": 8193 }, { "epoch": 3.982370820668693, "grad_norm": 0.07418091822139052, "learning_rate": 3.2993332499176024e-10, "loss": 0.5466, "step": 8194 }, { "epoch": 3.982857142857143, "grad_norm": 0.07098606588952133, "learning_rate": 3.083045848550903e-10, "loss": 0.5056, "step": 8195 }, { "epoch": 3.983343465045593, "grad_norm": 0.0726395717090971, "learning_rate": 2.8740899273071996e-10, "loss": 0.4973, "step": 8196 }, { "epoch": 3.9838297872340425, "grad_norm": 0.07550587967062494, "learning_rate": 2.672465516823097e-10, "loss": 0.5216, "step": 8197 }, { "epoch": 3.984316109422492, "grad_norm": 0.07121613661275214, "learning_rate": 2.478172646663835e-10, "loss": 0.4895, "step": 8198 }, { "epoch": 3.9848024316109423, "grad_norm": 0.07246843768241126, "learning_rate": 2.2912113453232854e-10, "loss": 0.5193, "step": 8199 }, { "epoch": 3.985288753799392, "grad_norm": 0.07212811789144231, "learning_rate": 2.1115816402128563e-10, "loss": 0.5295, "step": 8200 }, { "epoch": 3.985775075987842, "grad_norm": 0.0853453254585128, "learning_rate": 1.9392835576725888e-10, "loss": 0.533, "step": 8201 }, { "epoch": 3.9862613981762918, "grad_norm": 0.06961462277246325, "learning_rate": 1.7743171229711586e-10, "loss": 0.4879, "step": 8202 }, { "epoch": 3.9867477203647415, "grad_norm": 0.06941821680312064, "learning_rate": 1.6166823603058768e-10, "loss": 0.5001, "step": 8203 }, { "epoch": 3.9872340425531916, "grad_norm": 0.07211203081906382, "learning_rate": 1.466379292774933e-10, "loss": 0.5075, "step": 8204 }, { "epoch": 3.9877203647416413, "grad_norm": 0.07044030763022163, "learning_rate": 1.3234079424384593e-10, "loss": 0.5077, "step": 8205 }, { "epoch": 3.9882066869300914, "grad_norm": 0.07147018420974952, "learning_rate": 1.187768330246364e-10, "loss": 0.5169, "step": 8206 }, { "epoch": 3.988693009118541, "grad_norm": 0.06998522917390052, "learning_rate": 1.0594604760938431e-10, "loss": 0.5009, "step": 8207 }, { "epoch": 3.9891793313069908, "grad_norm": 0.07150452256496002, "learning_rate": 9.384843987936266e-11, "loss": 0.5144, "step": 8208 }, { "epoch": 3.989665653495441, "grad_norm": 0.07307341410450179, "learning_rate": 8.24840116092629e-11, "loss": 0.519, "step": 8209 }, { "epoch": 3.9901519756838906, "grad_norm": 0.06852716940560657, "learning_rate": 7.185276446441958e-11, "loss": 0.4841, "step": 8210 }, { "epoch": 3.9906382978723407, "grad_norm": 0.07036543224293386, "learning_rate": 6.195470000525116e-11, "loss": 0.5117, "step": 8211 }, { "epoch": 3.9911246200607904, "grad_norm": 0.07149663926051224, "learning_rate": 5.278981968170893e-11, "loss": 0.5038, "step": 8212 }, { "epoch": 3.99161094224924, "grad_norm": 0.07080408322503196, "learning_rate": 4.4358124838828064e-11, "loss": 0.5283, "step": 8213 }, { "epoch": 3.9920972644376898, "grad_norm": 0.07180663847748395, "learning_rate": 3.665961671228679e-11, "loss": 0.527, "step": 8214 }, { "epoch": 3.99258358662614, "grad_norm": 0.07133509728331337, "learning_rate": 2.969429643118193e-11, "loss": 0.5191, "step": 8215 }, { "epoch": 3.9930699088145896, "grad_norm": 0.07008770849451361, "learning_rate": 2.346216501691867e-11, "loss": 0.5078, "step": 8216 }, { "epoch": 3.9935562310030397, "grad_norm": 0.07014451915149213, "learning_rate": 1.796322338376566e-11, "loss": 0.5052, "step": 8217 }, { "epoch": 3.9940425531914894, "grad_norm": 0.07066078685559711, "learning_rate": 1.3197472337744822e-11, "loss": 0.5024, "step": 8218 }, { "epoch": 3.994528875379939, "grad_norm": 0.07517486681995703, "learning_rate": 9.164912577741547e-12, "loss": 0.5505, "step": 8219 }, { "epoch": 3.995015197568389, "grad_norm": 0.07108906354695735, "learning_rate": 5.865544694949599e-12, "loss": 0.4933, "step": 8220 }, { "epoch": 3.995501519756839, "grad_norm": 0.06956036331651641, "learning_rate": 3.299369172871103e-12, "loss": 0.5009, "step": 8221 }, { "epoch": 3.995987841945289, "grad_norm": 0.0706466963612353, "learning_rate": 1.4663863889818885e-12, "loss": 0.5074, "step": 8222 }, { "epoch": 3.9964741641337387, "grad_norm": 0.07142461159105022, "learning_rate": 3.665966108457042e-13, "loss": 0.4782, "step": 8223 }, { "epoch": 3.9969604863221884, "grad_norm": 0.07134949425623466, "learning_rate": 0.0, "loss": 0.5257, "step": 8224 }, { "epoch": 3.9969604863221884, "eval_loss": 0.5693754553794861, "eval_runtime": 104.9899, "eval_samples_per_second": 289.104, "eval_steps_per_second": 36.146, "step": 8224 }, { "epoch": 4.00048632218845, "grad_norm": 0.06987722920988068, "learning_rate": 5.008605527125408e-06, "loss": 0.5249, "step": 8225 }, { "epoch": 4.000972644376899, "grad_norm": 0.07304031658279857, "learning_rate": 5.007649358237405e-06, "loss": 0.4971, "step": 8226 }, { "epoch": 4.00145896656535, "grad_norm": 0.07087156742228275, "learning_rate": 5.006693189069661e-06, "loss": 0.4925, "step": 8227 }, { "epoch": 4.0019452887538, "grad_norm": 0.07299306342503452, "learning_rate": 5.005737019657147e-06, "loss": 0.5113, "step": 8228 }, { "epoch": 4.002431610942249, "grad_norm": 0.07088033929123744, "learning_rate": 5.004780850034825e-06, "loss": 0.5015, "step": 8229 }, { "epoch": 4.002917933130699, "grad_norm": 0.07625491773835517, "learning_rate": 5.003824680237666e-06, "loss": 0.5461, "step": 8230 }, { "epoch": 4.003404255319149, "grad_norm": 0.07148663232221743, "learning_rate": 5.002868510300636e-06, "loss": 0.4638, "step": 8231 }, { "epoch": 4.003890577507598, "grad_norm": 0.07442791255292215, "learning_rate": 5.001912340258703e-06, "loss": 0.5325, "step": 8232 }, { "epoch": 4.004376899696049, "grad_norm": 0.07460171298984732, "learning_rate": 5.000956170146836e-06, "loss": 0.5423, "step": 8233 }, { "epoch": 4.004863221884499, "grad_norm": 0.07751291052820865, "learning_rate": 5e-06, "loss": 0.5103, "step": 8234 }, { "epoch": 4.005349544072948, "grad_norm": 0.0711764615739941, "learning_rate": 4.999043829853165e-06, "loss": 0.5166, "step": 8235 }, { "epoch": 4.005835866261398, "grad_norm": 0.07327707870288805, "learning_rate": 4.9980876597412985e-06, "loss": 0.5106, "step": 8236 }, { "epoch": 4.006322188449848, "grad_norm": 0.07373016803088536, "learning_rate": 4.997131489699365e-06, "loss": 0.508, "step": 8237 }, { "epoch": 4.006808510638298, "grad_norm": 0.07611251044609917, "learning_rate": 4.996175319762336e-06, "loss": 0.5181, "step": 8238 }, { "epoch": 4.007294832826748, "grad_norm": 0.07780645532882732, "learning_rate": 4.995219149965176e-06, "loss": 0.5149, "step": 8239 }, { "epoch": 4.007781155015198, "grad_norm": 0.0726645716616451, "learning_rate": 4.994262980342856e-06, "loss": 0.5072, "step": 8240 }, { "epoch": 4.008267477203647, "grad_norm": 0.07179560202076597, "learning_rate": 4.993306810930339e-06, "loss": 0.4785, "step": 8241 }, { "epoch": 4.008753799392097, "grad_norm": 0.07201453349903571, "learning_rate": 4.9923506417625955e-06, "loss": 0.4783, "step": 8242 }, { "epoch": 4.0092401215805475, "grad_norm": 0.07540298746065632, "learning_rate": 4.9913944728745925e-06, "loss": 0.5193, "step": 8243 }, { "epoch": 4.009726443768997, "grad_norm": 0.07614780971146975, "learning_rate": 4.990438304301299e-06, "loss": 0.5054, "step": 8244 }, { "epoch": 4.010212765957447, "grad_norm": 0.07529273969300707, "learning_rate": 4.989482136077679e-06, "loss": 0.5327, "step": 8245 }, { "epoch": 4.010699088145897, "grad_norm": 0.0730295857152015, "learning_rate": 4.988525968238703e-06, "loss": 0.524, "step": 8246 }, { "epoch": 4.011185410334346, "grad_norm": 0.07171160164164063, "learning_rate": 4.987569800819337e-06, "loss": 0.5181, "step": 8247 }, { "epoch": 4.011671732522796, "grad_norm": 0.07076031014339057, "learning_rate": 4.986613633854551e-06, "loss": 0.4903, "step": 8248 }, { "epoch": 4.0121580547112465, "grad_norm": 0.07501363195329876, "learning_rate": 4.985657467379308e-06, "loss": 0.5273, "step": 8249 }, { "epoch": 4.012644376899696, "grad_norm": 0.07463473227894854, "learning_rate": 4.98470130142858e-06, "loss": 0.5223, "step": 8250 }, { "epoch": 4.013130699088146, "grad_norm": 0.07716483246374314, "learning_rate": 4.983745136037331e-06, "loss": 0.5228, "step": 8251 }, { "epoch": 4.013617021276596, "grad_norm": 0.07234220880101895, "learning_rate": 4.98278897124053e-06, "loss": 0.4782, "step": 8252 }, { "epoch": 4.014103343465045, "grad_norm": 0.07384480458318275, "learning_rate": 4.981832807073143e-06, "loss": 0.512, "step": 8253 }, { "epoch": 4.014589665653496, "grad_norm": 0.07981209815387338, "learning_rate": 4.980876643570142e-06, "loss": 0.5528, "step": 8254 }, { "epoch": 4.0150759878419455, "grad_norm": 0.07530918411483194, "learning_rate": 4.979920480766488e-06, "loss": 0.536, "step": 8255 }, { "epoch": 4.015562310030395, "grad_norm": 0.0753745968480417, "learning_rate": 4.978964318697152e-06, "loss": 0.4961, "step": 8256 }, { "epoch": 4.016048632218845, "grad_norm": 0.0731400284668495, "learning_rate": 4.978008157397099e-06, "loss": 0.4932, "step": 8257 }, { "epoch": 4.016534954407295, "grad_norm": 0.07608280575089299, "learning_rate": 4.977051996901301e-06, "loss": 0.5201, "step": 8258 }, { "epoch": 4.017021276595744, "grad_norm": 0.07348973407919926, "learning_rate": 4.9760958372447185e-06, "loss": 0.4889, "step": 8259 }, { "epoch": 4.017507598784195, "grad_norm": 0.07636240363913128, "learning_rate": 4.975139678462324e-06, "loss": 0.4906, "step": 8260 }, { "epoch": 4.0179939209726445, "grad_norm": 0.0722656852489509, "learning_rate": 4.974183520589082e-06, "loss": 0.4855, "step": 8261 }, { "epoch": 4.018480243161094, "grad_norm": 0.07498030476452544, "learning_rate": 4.973227363659959e-06, "loss": 0.5158, "step": 8262 }, { "epoch": 4.018966565349544, "grad_norm": 0.07167201556798897, "learning_rate": 4.9722712077099255e-06, "loss": 0.5102, "step": 8263 }, { "epoch": 4.019452887537994, "grad_norm": 0.07358820873873567, "learning_rate": 4.971315052773945e-06, "loss": 0.5262, "step": 8264 }, { "epoch": 4.019939209726444, "grad_norm": 0.07436902319990749, "learning_rate": 4.970358898886989e-06, "loss": 0.5025, "step": 8265 }, { "epoch": 4.020425531914894, "grad_norm": 0.07588409367566396, "learning_rate": 4.969402746084019e-06, "loss": 0.491, "step": 8266 }, { "epoch": 4.0209118541033435, "grad_norm": 0.07609855860591161, "learning_rate": 4.9684465944000045e-06, "loss": 0.5063, "step": 8267 }, { "epoch": 4.021398176291793, "grad_norm": 0.07346674002556423, "learning_rate": 4.967490443869913e-06, "loss": 0.4947, "step": 8268 }, { "epoch": 4.021884498480243, "grad_norm": 0.07487369400302217, "learning_rate": 4.966534294528711e-06, "loss": 0.5124, "step": 8269 }, { "epoch": 4.0223708206686934, "grad_norm": 0.07800721007168725, "learning_rate": 4.965578146411364e-06, "loss": 0.5565, "step": 8270 }, { "epoch": 4.022857142857143, "grad_norm": 0.07800763230764782, "learning_rate": 4.964621999552841e-06, "loss": 0.505, "step": 8271 }, { "epoch": 4.023343465045593, "grad_norm": 0.0724595747284464, "learning_rate": 4.963665853988106e-06, "loss": 0.4855, "step": 8272 }, { "epoch": 4.0238297872340425, "grad_norm": 0.07816626330895647, "learning_rate": 4.96270970975213e-06, "loss": 0.5275, "step": 8273 }, { "epoch": 4.024316109422492, "grad_norm": 0.07249627500905842, "learning_rate": 4.961753566879874e-06, "loss": 0.4942, "step": 8274 }, { "epoch": 4.024802431610942, "grad_norm": 0.07347928119639274, "learning_rate": 4.96079742540631e-06, "loss": 0.5019, "step": 8275 }, { "epoch": 4.025288753799392, "grad_norm": 0.07248405057913224, "learning_rate": 4.9598412853663994e-06, "loss": 0.5363, "step": 8276 }, { "epoch": 4.025775075987842, "grad_norm": 0.07196782351160874, "learning_rate": 4.958885146795113e-06, "loss": 0.4902, "step": 8277 }, { "epoch": 4.026261398176292, "grad_norm": 0.07526385761607707, "learning_rate": 4.957929009727414e-06, "loss": 0.4978, "step": 8278 }, { "epoch": 4.0267477203647415, "grad_norm": 0.07864048019359021, "learning_rate": 4.956972874198272e-06, "loss": 0.5152, "step": 8279 }, { "epoch": 4.027234042553191, "grad_norm": 0.07759430388828786, "learning_rate": 4.956016740242651e-06, "loss": 0.5286, "step": 8280 }, { "epoch": 4.027720364741642, "grad_norm": 0.07246746999491008, "learning_rate": 4.955060607895517e-06, "loss": 0.4609, "step": 8281 }, { "epoch": 4.028206686930091, "grad_norm": 0.07600816888442034, "learning_rate": 4.954104477191837e-06, "loss": 0.5145, "step": 8282 }, { "epoch": 4.028693009118541, "grad_norm": 0.07374485174005918, "learning_rate": 4.953148348166579e-06, "loss": 0.5126, "step": 8283 }, { "epoch": 4.029179331306991, "grad_norm": 0.07296946755934197, "learning_rate": 4.9521922208547045e-06, "loss": 0.4964, "step": 8284 }, { "epoch": 4.0296656534954405, "grad_norm": 0.07666621804184696, "learning_rate": 4.951236095291184e-06, "loss": 0.5196, "step": 8285 }, { "epoch": 4.03015197568389, "grad_norm": 0.07309108649474487, "learning_rate": 4.95027997151098e-06, "loss": 0.5099, "step": 8286 }, { "epoch": 4.030638297872341, "grad_norm": 0.07485528933237076, "learning_rate": 4.94932384954906e-06, "loss": 0.5074, "step": 8287 }, { "epoch": 4.03112462006079, "grad_norm": 0.07492133312897797, "learning_rate": 4.948367729440393e-06, "loss": 0.5202, "step": 8288 }, { "epoch": 4.03161094224924, "grad_norm": 0.0761830811916514, "learning_rate": 4.947411611219938e-06, "loss": 0.5384, "step": 8289 }, { "epoch": 4.03209726443769, "grad_norm": 0.07395306993404442, "learning_rate": 4.946455494922668e-06, "loss": 0.5199, "step": 8290 }, { "epoch": 4.0325835866261395, "grad_norm": 0.0757393358938198, "learning_rate": 4.945499380583541e-06, "loss": 0.5255, "step": 8291 }, { "epoch": 4.03306990881459, "grad_norm": 0.07465702270700343, "learning_rate": 4.944543268237529e-06, "loss": 0.4846, "step": 8292 }, { "epoch": 4.03355623100304, "grad_norm": 0.07416351978628313, "learning_rate": 4.943587157919593e-06, "loss": 0.5087, "step": 8293 }, { "epoch": 4.034042553191489, "grad_norm": 0.07499156015079488, "learning_rate": 4.9426310496647025e-06, "loss": 0.5032, "step": 8294 }, { "epoch": 4.034528875379939, "grad_norm": 0.07499909907130087, "learning_rate": 4.941674943507818e-06, "loss": 0.5284, "step": 8295 }, { "epoch": 4.035015197568389, "grad_norm": 0.07565107936721804, "learning_rate": 4.940718839483909e-06, "loss": 0.5297, "step": 8296 }, { "epoch": 4.0355015197568385, "grad_norm": 0.07551316318035235, "learning_rate": 4.939762737627938e-06, "loss": 0.4989, "step": 8297 }, { "epoch": 4.035987841945289, "grad_norm": 0.07292462208638406, "learning_rate": 4.9388066379748725e-06, "loss": 0.4991, "step": 8298 }, { "epoch": 4.036474164133739, "grad_norm": 0.07018192302429595, "learning_rate": 4.937850540559675e-06, "loss": 0.483, "step": 8299 }, { "epoch": 4.036960486322188, "grad_norm": 0.07552310193010787, "learning_rate": 4.936894445417312e-06, "loss": 0.5109, "step": 8300 }, { "epoch": 4.037446808510638, "grad_norm": 0.07770641866458226, "learning_rate": 4.935938352582747e-06, "loss": 0.5493, "step": 8301 }, { "epoch": 4.037933130699088, "grad_norm": 0.07626169461086708, "learning_rate": 4.934982262090947e-06, "loss": 0.5197, "step": 8302 }, { "epoch": 4.038419452887538, "grad_norm": 0.07029229678580952, "learning_rate": 4.9340261739768734e-06, "loss": 0.4666, "step": 8303 }, { "epoch": 4.038905775075988, "grad_norm": 0.07058071882128329, "learning_rate": 4.933070088275494e-06, "loss": 0.4927, "step": 8304 }, { "epoch": 4.039392097264438, "grad_norm": 0.07227584313506363, "learning_rate": 4.932114005021772e-06, "loss": 0.4763, "step": 8305 }, { "epoch": 4.039878419452887, "grad_norm": 0.07529344441574466, "learning_rate": 4.93115792425067e-06, "loss": 0.5112, "step": 8306 }, { "epoch": 4.040364741641337, "grad_norm": 0.07246324321490295, "learning_rate": 4.930201845997155e-06, "loss": 0.4803, "step": 8307 }, { "epoch": 4.040851063829788, "grad_norm": 0.07152337273850556, "learning_rate": 4.929245770296191e-06, "loss": 0.4806, "step": 8308 }, { "epoch": 4.041337386018237, "grad_norm": 0.07504907189175945, "learning_rate": 4.92828969718274e-06, "loss": 0.5339, "step": 8309 }, { "epoch": 4.041823708206687, "grad_norm": 0.07109371557920573, "learning_rate": 4.9273336266917685e-06, "loss": 0.4968, "step": 8310 }, { "epoch": 4.042310030395137, "grad_norm": 0.07384910355549405, "learning_rate": 4.926377558858238e-06, "loss": 0.5142, "step": 8311 }, { "epoch": 4.042796352583586, "grad_norm": 0.07911339434259716, "learning_rate": 4.9254214937171144e-06, "loss": 0.546, "step": 8312 }, { "epoch": 4.043282674772036, "grad_norm": 0.07433159137103462, "learning_rate": 4.92446543130336e-06, "loss": 0.5068, "step": 8313 }, { "epoch": 4.043768996960487, "grad_norm": 0.07489035554560758, "learning_rate": 4.923509371651939e-06, "loss": 0.5293, "step": 8314 }, { "epoch": 4.044255319148936, "grad_norm": 0.07449143827797121, "learning_rate": 4.922553314797817e-06, "loss": 0.4977, "step": 8315 }, { "epoch": 4.044741641337386, "grad_norm": 0.07086697161949952, "learning_rate": 4.921597260775954e-06, "loss": 0.4806, "step": 8316 }, { "epoch": 4.045227963525836, "grad_norm": 0.0741159875188485, "learning_rate": 4.920641209621315e-06, "loss": 0.5156, "step": 8317 }, { "epoch": 4.045714285714285, "grad_norm": 0.07376534038221326, "learning_rate": 4.919685161368862e-06, "loss": 0.5418, "step": 8318 }, { "epoch": 4.046200607902736, "grad_norm": 0.07487511790617565, "learning_rate": 4.9187291160535615e-06, "loss": 0.5286, "step": 8319 }, { "epoch": 4.046686930091186, "grad_norm": 0.0726235926555809, "learning_rate": 4.917773073710372e-06, "loss": 0.513, "step": 8320 }, { "epoch": 4.047173252279635, "grad_norm": 0.07426291642458527, "learning_rate": 4.916817034374259e-06, "loss": 0.5015, "step": 8321 }, { "epoch": 4.047659574468085, "grad_norm": 0.07311915002192036, "learning_rate": 4.915860998080184e-06, "loss": 0.5106, "step": 8322 }, { "epoch": 4.048145896656535, "grad_norm": 0.07358610562882019, "learning_rate": 4.914904964863113e-06, "loss": 0.4932, "step": 8323 }, { "epoch": 4.048632218844984, "grad_norm": 0.07442255063043904, "learning_rate": 4.913948934758004e-06, "loss": 0.5242, "step": 8324 }, { "epoch": 4.049118541033435, "grad_norm": 0.07359918688109728, "learning_rate": 4.912992907799823e-06, "loss": 0.5384, "step": 8325 }, { "epoch": 4.049604863221885, "grad_norm": 0.07026683099809078, "learning_rate": 4.912036884023529e-06, "loss": 0.5004, "step": 8326 }, { "epoch": 4.050091185410334, "grad_norm": 0.07135693577891172, "learning_rate": 4.9110808634640885e-06, "loss": 0.5033, "step": 8327 }, { "epoch": 4.050577507598784, "grad_norm": 0.0737125754595248, "learning_rate": 4.910124846156459e-06, "loss": 0.5023, "step": 8328 }, { "epoch": 4.051063829787234, "grad_norm": 0.07168587289438462, "learning_rate": 4.909168832135607e-06, "loss": 0.4884, "step": 8329 }, { "epoch": 4.051550151975684, "grad_norm": 0.07666287110516042, "learning_rate": 4.90821282143649e-06, "loss": 0.5043, "step": 8330 }, { "epoch": 4.052036474164134, "grad_norm": 0.07949253015617697, "learning_rate": 4.907256814094073e-06, "loss": 0.5916, "step": 8331 }, { "epoch": 4.052522796352584, "grad_norm": 0.07264451891553099, "learning_rate": 4.9063008101433156e-06, "loss": 0.4927, "step": 8332 }, { "epoch": 4.053009118541033, "grad_norm": 0.0753920070326215, "learning_rate": 4.905344809619182e-06, "loss": 0.5171, "step": 8333 }, { "epoch": 4.053495440729483, "grad_norm": 0.07289025039595012, "learning_rate": 4.904388812556629e-06, "loss": 0.4758, "step": 8334 }, { "epoch": 4.0539817629179336, "grad_norm": 0.07505849587045671, "learning_rate": 4.9034328189906226e-06, "loss": 0.525, "step": 8335 }, { "epoch": 4.054468085106383, "grad_norm": 0.07331274692768641, "learning_rate": 4.90247682895612e-06, "loss": 0.5023, "step": 8336 }, { "epoch": 4.054954407294833, "grad_norm": 0.07358955410934745, "learning_rate": 4.901520842488087e-06, "loss": 0.4829, "step": 8337 }, { "epoch": 4.055440729483283, "grad_norm": 0.07354169698942832, "learning_rate": 4.900564859621479e-06, "loss": 0.5028, "step": 8338 }, { "epoch": 4.055927051671732, "grad_norm": 0.0719302294944975, "learning_rate": 4.899608880391259e-06, "loss": 0.5086, "step": 8339 }, { "epoch": 4.056413373860182, "grad_norm": 0.07258150943863832, "learning_rate": 4.898652904832389e-06, "loss": 0.4974, "step": 8340 }, { "epoch": 4.0568996960486325, "grad_norm": 0.08029129607170804, "learning_rate": 4.897696932979827e-06, "loss": 0.4945, "step": 8341 }, { "epoch": 4.057386018237082, "grad_norm": 0.07864295424139797, "learning_rate": 4.896740964868537e-06, "loss": 0.5663, "step": 8342 }, { "epoch": 4.057872340425532, "grad_norm": 0.07183141716746466, "learning_rate": 4.895785000533475e-06, "loss": 0.5172, "step": 8343 }, { "epoch": 4.058358662613982, "grad_norm": 0.07077327935443194, "learning_rate": 4.894829040009606e-06, "loss": 0.4706, "step": 8344 }, { "epoch": 4.058844984802431, "grad_norm": 0.07213827545903248, "learning_rate": 4.8938730833318825e-06, "loss": 0.4723, "step": 8345 }, { "epoch": 4.059331306990882, "grad_norm": 0.07322353846768895, "learning_rate": 4.892917130535271e-06, "loss": 0.5266, "step": 8346 }, { "epoch": 4.0598176291793315, "grad_norm": 0.07160238873466572, "learning_rate": 4.891961181654727e-06, "loss": 0.5026, "step": 8347 }, { "epoch": 4.060303951367781, "grad_norm": 0.07510253043640047, "learning_rate": 4.8910052367252146e-06, "loss": 0.513, "step": 8348 }, { "epoch": 4.060790273556231, "grad_norm": 0.07480732881359131, "learning_rate": 4.890049295781687e-06, "loss": 0.5158, "step": 8349 }, { "epoch": 4.061276595744681, "grad_norm": 0.07687670361687345, "learning_rate": 4.889093358859108e-06, "loss": 0.5014, "step": 8350 }, { "epoch": 4.06176291793313, "grad_norm": 0.07379484040025416, "learning_rate": 4.888137425992435e-06, "loss": 0.51, "step": 8351 }, { "epoch": 4.062249240121581, "grad_norm": 0.07554029126287756, "learning_rate": 4.887181497216628e-06, "loss": 0.5162, "step": 8352 }, { "epoch": 4.0627355623100305, "grad_norm": 0.07341335294156111, "learning_rate": 4.886225572566644e-06, "loss": 0.5118, "step": 8353 }, { "epoch": 4.06322188449848, "grad_norm": 0.07361731471875785, "learning_rate": 4.885269652077444e-06, "loss": 0.4636, "step": 8354 }, { "epoch": 4.06370820668693, "grad_norm": 0.07365564250204958, "learning_rate": 4.8843137357839836e-06, "loss": 0.4767, "step": 8355 }, { "epoch": 4.06419452887538, "grad_norm": 0.0729357769344785, "learning_rate": 4.883357823721222e-06, "loss": 0.5179, "step": 8356 }, { "epoch": 4.06468085106383, "grad_norm": 0.07149240912487374, "learning_rate": 4.8824019159241175e-06, "loss": 0.5041, "step": 8357 }, { "epoch": 4.06516717325228, "grad_norm": 0.0712629026057834, "learning_rate": 4.8814460124276305e-06, "loss": 0.5091, "step": 8358 }, { "epoch": 4.0656534954407295, "grad_norm": 0.07826148458011131, "learning_rate": 4.880490113266715e-06, "loss": 0.5324, "step": 8359 }, { "epoch": 4.066139817629179, "grad_norm": 0.07596092969879549, "learning_rate": 4.879534218476331e-06, "loss": 0.5138, "step": 8360 }, { "epoch": 4.066626139817629, "grad_norm": 0.07451951136200978, "learning_rate": 4.878578328091434e-06, "loss": 0.5292, "step": 8361 }, { "epoch": 4.0671124620060795, "grad_norm": 0.0724650741277835, "learning_rate": 4.877622442146985e-06, "loss": 0.496, "step": 8362 }, { "epoch": 4.067598784194529, "grad_norm": 0.07628316436925109, "learning_rate": 4.876666560677937e-06, "loss": 0.5263, "step": 8363 }, { "epoch": 4.068085106382979, "grad_norm": 0.07563351111660888, "learning_rate": 4.87571068371925e-06, "loss": 0.5333, "step": 8364 }, { "epoch": 4.0685714285714285, "grad_norm": 0.07377569860097302, "learning_rate": 4.874754811305879e-06, "loss": 0.483, "step": 8365 }, { "epoch": 4.069057750759878, "grad_norm": 0.07739807928793731, "learning_rate": 4.873798943472781e-06, "loss": 0.5356, "step": 8366 }, { "epoch": 4.069544072948328, "grad_norm": 0.07166219387027353, "learning_rate": 4.872843080254915e-06, "loss": 0.4746, "step": 8367 }, { "epoch": 4.0700303951367784, "grad_norm": 0.07359154610133878, "learning_rate": 4.871887221687233e-06, "loss": 0.5065, "step": 8368 }, { "epoch": 4.070516717325228, "grad_norm": 0.07649912048346863, "learning_rate": 4.870931367804696e-06, "loss": 0.5399, "step": 8369 }, { "epoch": 4.071003039513678, "grad_norm": 0.07548857514330723, "learning_rate": 4.869975518642255e-06, "loss": 0.4921, "step": 8370 }, { "epoch": 4.0714893617021275, "grad_norm": 0.07468029251851437, "learning_rate": 4.86901967423487e-06, "loss": 0.5233, "step": 8371 }, { "epoch": 4.071975683890577, "grad_norm": 0.07799294894236818, "learning_rate": 4.868063834617494e-06, "loss": 0.5275, "step": 8372 }, { "epoch": 4.072462006079028, "grad_norm": 0.07864036649754841, "learning_rate": 4.867107999825085e-06, "loss": 0.4984, "step": 8373 }, { "epoch": 4.072948328267477, "grad_norm": 0.07821686722444757, "learning_rate": 4.866152169892595e-06, "loss": 0.5696, "step": 8374 }, { "epoch": 4.073434650455927, "grad_norm": 0.07417454247051666, "learning_rate": 4.865196344854982e-06, "loss": 0.5102, "step": 8375 }, { "epoch": 4.073920972644377, "grad_norm": 0.07668708019371931, "learning_rate": 4.864240524747199e-06, "loss": 0.5109, "step": 8376 }, { "epoch": 4.0744072948328265, "grad_norm": 0.08065066959386501, "learning_rate": 4.863284709604204e-06, "loss": 0.5123, "step": 8377 }, { "epoch": 4.074893617021276, "grad_norm": 0.07448139730342614, "learning_rate": 4.862328899460947e-06, "loss": 0.4595, "step": 8378 }, { "epoch": 4.075379939209727, "grad_norm": 0.07671842343736177, "learning_rate": 4.861373094352386e-06, "loss": 0.5419, "step": 8379 }, { "epoch": 4.075866261398176, "grad_norm": 0.07777131323591312, "learning_rate": 4.860417294313472e-06, "loss": 0.5249, "step": 8380 }, { "epoch": 4.076352583586626, "grad_norm": 0.07598653032454367, "learning_rate": 4.859461499379164e-06, "loss": 0.5469, "step": 8381 }, { "epoch": 4.076838905775076, "grad_norm": 0.07176166833320136, "learning_rate": 4.85850570958441e-06, "loss": 0.4888, "step": 8382 }, { "epoch": 4.0773252279635255, "grad_norm": 0.07276747438348917, "learning_rate": 4.857549924964169e-06, "loss": 0.4932, "step": 8383 }, { "epoch": 4.077811550151976, "grad_norm": 0.07463859499197374, "learning_rate": 4.856594145553389e-06, "loss": 0.5122, "step": 8384 }, { "epoch": 4.078297872340426, "grad_norm": 0.0806355385410711, "learning_rate": 4.855638371387029e-06, "loss": 0.5452, "step": 8385 }, { "epoch": 4.078784194528875, "grad_norm": 0.07315902574618881, "learning_rate": 4.854682602500037e-06, "loss": 0.5142, "step": 8386 }, { "epoch": 4.079270516717325, "grad_norm": 0.07143494080084337, "learning_rate": 4.853726838927371e-06, "loss": 0.494, "step": 8387 }, { "epoch": 4.079756838905775, "grad_norm": 0.07131911191310064, "learning_rate": 4.852771080703978e-06, "loss": 0.4896, "step": 8388 }, { "epoch": 4.080243161094225, "grad_norm": 0.07468581761156592, "learning_rate": 4.851815327864815e-06, "loss": 0.5266, "step": 8389 }, { "epoch": 4.080729483282675, "grad_norm": 0.07662789409288262, "learning_rate": 4.850859580444832e-06, "loss": 0.5094, "step": 8390 }, { "epoch": 4.081215805471125, "grad_norm": 0.07452174842149409, "learning_rate": 4.8499038384789816e-06, "loss": 0.5022, "step": 8391 }, { "epoch": 4.081702127659574, "grad_norm": 0.0751309596755392, "learning_rate": 4.848948102002218e-06, "loss": 0.503, "step": 8392 }, { "epoch": 4.082188449848024, "grad_norm": 0.0740922383507223, "learning_rate": 4.847992371049489e-06, "loss": 0.539, "step": 8393 }, { "epoch": 4.082674772036474, "grad_norm": 0.07585553411796839, "learning_rate": 4.847036645655749e-06, "loss": 0.5517, "step": 8394 }, { "epoch": 4.083161094224924, "grad_norm": 0.07299891860442781, "learning_rate": 4.846080925855947e-06, "loss": 0.5157, "step": 8395 }, { "epoch": 4.083647416413374, "grad_norm": 0.07350908093952695, "learning_rate": 4.845125211685039e-06, "loss": 0.5038, "step": 8396 }, { "epoch": 4.084133738601824, "grad_norm": 0.07247360758118054, "learning_rate": 4.844169503177969e-06, "loss": 0.4977, "step": 8397 }, { "epoch": 4.084620060790273, "grad_norm": 0.07567373547740953, "learning_rate": 4.843213800369694e-06, "loss": 0.5195, "step": 8398 }, { "epoch": 4.085106382978723, "grad_norm": 0.07444425012663346, "learning_rate": 4.842258103295159e-06, "loss": 0.5305, "step": 8399 }, { "epoch": 4.085592705167174, "grad_norm": 0.07598434185453064, "learning_rate": 4.841302411989318e-06, "loss": 0.5206, "step": 8400 }, { "epoch": 4.086079027355623, "grad_norm": 0.07677697022737197, "learning_rate": 4.840346726487119e-06, "loss": 0.5021, "step": 8401 }, { "epoch": 4.086565349544073, "grad_norm": 0.07728882643219065, "learning_rate": 4.839391046823514e-06, "loss": 0.5314, "step": 8402 }, { "epoch": 4.087051671732523, "grad_norm": 0.07358295326272597, "learning_rate": 4.83843537303345e-06, "loss": 0.4904, "step": 8403 }, { "epoch": 4.087537993920972, "grad_norm": 0.07593188535565605, "learning_rate": 4.837479705151878e-06, "loss": 0.5259, "step": 8404 }, { "epoch": 4.088024316109422, "grad_norm": 0.07411325100474116, "learning_rate": 4.8365240432137465e-06, "loss": 0.5289, "step": 8405 }, { "epoch": 4.088510638297873, "grad_norm": 0.07211279051929438, "learning_rate": 4.835568387254008e-06, "loss": 0.4695, "step": 8406 }, { "epoch": 4.088996960486322, "grad_norm": 0.07484746303545112, "learning_rate": 4.8346127373076044e-06, "loss": 0.5221, "step": 8407 }, { "epoch": 4.089483282674772, "grad_norm": 0.0744416300688764, "learning_rate": 4.833657093409491e-06, "loss": 0.5126, "step": 8408 }, { "epoch": 4.089969604863222, "grad_norm": 0.07269969955565858, "learning_rate": 4.832701455594612e-06, "loss": 0.5214, "step": 8409 }, { "epoch": 4.090455927051671, "grad_norm": 0.07335578739278328, "learning_rate": 4.831745823897917e-06, "loss": 0.5138, "step": 8410 }, { "epoch": 4.090942249240122, "grad_norm": 0.07635890634093613, "learning_rate": 4.8307901983543535e-06, "loss": 0.5171, "step": 8411 }, { "epoch": 4.091428571428572, "grad_norm": 0.0766818077182476, "learning_rate": 4.82983457899887e-06, "loss": 0.5347, "step": 8412 }, { "epoch": 4.091914893617021, "grad_norm": 0.07118819501345552, "learning_rate": 4.8288789658664125e-06, "loss": 0.4925, "step": 8413 }, { "epoch": 4.092401215805471, "grad_norm": 0.0786071043349153, "learning_rate": 4.827923358991929e-06, "loss": 0.5483, "step": 8414 }, { "epoch": 4.092887537993921, "grad_norm": 0.07650896870916511, "learning_rate": 4.826967758410366e-06, "loss": 0.5056, "step": 8415 }, { "epoch": 4.093373860182371, "grad_norm": 0.07461718401611261, "learning_rate": 4.826012164156673e-06, "loss": 0.4904, "step": 8416 }, { "epoch": 4.093860182370821, "grad_norm": 0.07544098451107405, "learning_rate": 4.825056576265791e-06, "loss": 0.4898, "step": 8417 }, { "epoch": 4.094346504559271, "grad_norm": 0.07519129722240067, "learning_rate": 4.824100994772671e-06, "loss": 0.5369, "step": 8418 }, { "epoch": 4.09483282674772, "grad_norm": 0.07516849338187878, "learning_rate": 4.8231454197122575e-06, "loss": 0.5237, "step": 8419 }, { "epoch": 4.09531914893617, "grad_norm": 0.07856254107428146, "learning_rate": 4.822189851119495e-06, "loss": 0.5007, "step": 8420 }, { "epoch": 4.09580547112462, "grad_norm": 0.07515755797835864, "learning_rate": 4.8212342890293335e-06, "loss": 0.5205, "step": 8421 }, { "epoch": 4.09629179331307, "grad_norm": 0.07631363036739496, "learning_rate": 4.820278733476713e-06, "loss": 0.5279, "step": 8422 }, { "epoch": 4.09677811550152, "grad_norm": 0.07539843259341894, "learning_rate": 4.819323184496582e-06, "loss": 0.5551, "step": 8423 }, { "epoch": 4.09726443768997, "grad_norm": 0.07663154193959248, "learning_rate": 4.818367642123883e-06, "loss": 0.5129, "step": 8424 }, { "epoch": 4.097750759878419, "grad_norm": 0.07131465105836605, "learning_rate": 4.817412106393563e-06, "loss": 0.5329, "step": 8425 }, { "epoch": 4.098237082066869, "grad_norm": 0.07602080319672558, "learning_rate": 4.816456577340564e-06, "loss": 0.4994, "step": 8426 }, { "epoch": 4.09872340425532, "grad_norm": 0.07559629619139957, "learning_rate": 4.815501054999834e-06, "loss": 0.512, "step": 8427 }, { "epoch": 4.099209726443769, "grad_norm": 0.07919439788068093, "learning_rate": 4.814545539406311e-06, "loss": 0.5187, "step": 8428 }, { "epoch": 4.099696048632219, "grad_norm": 0.07850853057203927, "learning_rate": 4.813590030594944e-06, "loss": 0.4908, "step": 8429 }, { "epoch": 4.100182370820669, "grad_norm": 0.07743809474402, "learning_rate": 4.812634528600673e-06, "loss": 0.5484, "step": 8430 }, { "epoch": 4.100668693009118, "grad_norm": 0.07361810116202264, "learning_rate": 4.8116790334584435e-06, "loss": 0.4627, "step": 8431 }, { "epoch": 4.101155015197568, "grad_norm": 0.07378348470796924, "learning_rate": 4.810723545203196e-06, "loss": 0.4913, "step": 8432 }, { "epoch": 4.1016413373860185, "grad_norm": 0.07241972587777123, "learning_rate": 4.809768063869875e-06, "loss": 0.5176, "step": 8433 }, { "epoch": 4.102127659574468, "grad_norm": 0.07457140313801962, "learning_rate": 4.8088125894934215e-06, "loss": 0.5097, "step": 8434 }, { "epoch": 4.102613981762918, "grad_norm": 0.07494365427789915, "learning_rate": 4.807857122108781e-06, "loss": 0.5018, "step": 8435 }, { "epoch": 4.103100303951368, "grad_norm": 0.07571627120749001, "learning_rate": 4.806901661750891e-06, "loss": 0.5162, "step": 8436 }, { "epoch": 4.103586626139817, "grad_norm": 0.07296338650363951, "learning_rate": 4.8059462084546965e-06, "loss": 0.5006, "step": 8437 }, { "epoch": 4.104072948328268, "grad_norm": 0.0766451506480599, "learning_rate": 4.804990762255135e-06, "loss": 0.5208, "step": 8438 }, { "epoch": 4.1045592705167175, "grad_norm": 0.07165932527790829, "learning_rate": 4.8040353231871515e-06, "loss": 0.4932, "step": 8439 }, { "epoch": 4.105045592705167, "grad_norm": 0.0756174238550916, "learning_rate": 4.803079891285684e-06, "loss": 0.5354, "step": 8440 }, { "epoch": 4.105531914893617, "grad_norm": 0.07355449036255178, "learning_rate": 4.8021244665856764e-06, "loss": 0.4884, "step": 8441 }, { "epoch": 4.106018237082067, "grad_norm": 0.07263658461320514, "learning_rate": 4.801169049122065e-06, "loss": 0.4791, "step": 8442 }, { "epoch": 4.106504559270517, "grad_norm": 0.07264491467987305, "learning_rate": 4.800213638929792e-06, "loss": 0.4948, "step": 8443 }, { "epoch": 4.106990881458967, "grad_norm": 0.075062382012485, "learning_rate": 4.799258236043797e-06, "loss": 0.512, "step": 8444 }, { "epoch": 4.1074772036474165, "grad_norm": 0.07701915905724878, "learning_rate": 4.798302840499019e-06, "loss": 0.5009, "step": 8445 }, { "epoch": 4.107963525835866, "grad_norm": 0.07432349917959506, "learning_rate": 4.7973474523304e-06, "loss": 0.4869, "step": 8446 }, { "epoch": 4.108449848024316, "grad_norm": 0.07604818737076773, "learning_rate": 4.796392071572875e-06, "loss": 0.5073, "step": 8447 }, { "epoch": 4.108936170212766, "grad_norm": 0.07918190398610037, "learning_rate": 4.795436698261386e-06, "loss": 0.5458, "step": 8448 }, { "epoch": 4.109422492401216, "grad_norm": 0.07241430394658066, "learning_rate": 4.794481332430868e-06, "loss": 0.5071, "step": 8449 }, { "epoch": 4.109908814589666, "grad_norm": 0.07333025741684206, "learning_rate": 4.793525974116262e-06, "loss": 0.5073, "step": 8450 }, { "epoch": 4.1103951367781155, "grad_norm": 0.07369458476874377, "learning_rate": 4.792570623352504e-06, "loss": 0.5086, "step": 8451 }, { "epoch": 4.110881458966565, "grad_norm": 0.07554204454969354, "learning_rate": 4.791615280174535e-06, "loss": 0.4789, "step": 8452 }, { "epoch": 4.111367781155015, "grad_norm": 0.07462955760059636, "learning_rate": 4.790659944617287e-06, "loss": 0.5258, "step": 8453 }, { "epoch": 4.1118541033434655, "grad_norm": 0.07663972165689943, "learning_rate": 4.789704616715701e-06, "loss": 0.515, "step": 8454 }, { "epoch": 4.112340425531915, "grad_norm": 0.07386731959863665, "learning_rate": 4.788749296504712e-06, "loss": 0.5451, "step": 8455 }, { "epoch": 4.112826747720365, "grad_norm": 0.07300364202219237, "learning_rate": 4.78779398401926e-06, "loss": 0.4853, "step": 8456 }, { "epoch": 4.1133130699088145, "grad_norm": 0.07480187749125185, "learning_rate": 4.786838679294275e-06, "loss": 0.5021, "step": 8457 }, { "epoch": 4.113799392097264, "grad_norm": 0.07316756874914099, "learning_rate": 4.785883382364698e-06, "loss": 0.5024, "step": 8458 }, { "epoch": 4.114285714285714, "grad_norm": 0.07575717063939928, "learning_rate": 4.7849280932654625e-06, "loss": 0.5187, "step": 8459 }, { "epoch": 4.1147720364741645, "grad_norm": 0.07309875939571202, "learning_rate": 4.783972812031506e-06, "loss": 0.4893, "step": 8460 }, { "epoch": 4.115258358662614, "grad_norm": 0.07232193939457907, "learning_rate": 4.783017538697759e-06, "loss": 0.5138, "step": 8461 }, { "epoch": 4.115744680851064, "grad_norm": 0.07912145505561217, "learning_rate": 4.782062273299163e-06, "loss": 0.5809, "step": 8462 }, { "epoch": 4.1162310030395135, "grad_norm": 0.07345369400214838, "learning_rate": 4.781107015870645e-06, "loss": 0.5109, "step": 8463 }, { "epoch": 4.116717325227963, "grad_norm": 0.07494664278290203, "learning_rate": 4.780151766447145e-06, "loss": 0.5151, "step": 8464 }, { "epoch": 4.117203647416414, "grad_norm": 0.07702789300248726, "learning_rate": 4.779196525063593e-06, "loss": 0.521, "step": 8465 }, { "epoch": 4.117689969604863, "grad_norm": 0.07609796343293225, "learning_rate": 4.778241291754927e-06, "loss": 0.5615, "step": 8466 }, { "epoch": 4.118176291793313, "grad_norm": 0.0733806476170799, "learning_rate": 4.777286066556075e-06, "loss": 0.4975, "step": 8467 }, { "epoch": 4.118662613981763, "grad_norm": 0.0760401841307999, "learning_rate": 4.776330849501974e-06, "loss": 0.4808, "step": 8468 }, { "epoch": 4.1191489361702125, "grad_norm": 0.07238702981342365, "learning_rate": 4.775375640627555e-06, "loss": 0.5182, "step": 8469 }, { "epoch": 4.119635258358662, "grad_norm": 0.07388456835847891, "learning_rate": 4.77442043996775e-06, "loss": 0.4877, "step": 8470 }, { "epoch": 4.120121580547113, "grad_norm": 0.07236446820570626, "learning_rate": 4.773465247557494e-06, "loss": 0.5089, "step": 8471 }, { "epoch": 4.120607902735562, "grad_norm": 0.0742892714944287, "learning_rate": 4.772510063431716e-06, "loss": 0.5402, "step": 8472 }, { "epoch": 4.121094224924012, "grad_norm": 0.07565318559592946, "learning_rate": 4.771554887625348e-06, "loss": 0.5211, "step": 8473 }, { "epoch": 4.121580547112462, "grad_norm": 0.0751694433449456, "learning_rate": 4.770599720173321e-06, "loss": 0.5369, "step": 8474 }, { "epoch": 4.1220668693009115, "grad_norm": 0.07160251049156026, "learning_rate": 4.769644561110569e-06, "loss": 0.4756, "step": 8475 }, { "epoch": 4.122553191489362, "grad_norm": 0.07554071580412855, "learning_rate": 4.768689410472018e-06, "loss": 0.5269, "step": 8476 }, { "epoch": 4.123039513677812, "grad_norm": 0.07538997699840858, "learning_rate": 4.767734268292602e-06, "loss": 0.4852, "step": 8477 }, { "epoch": 4.123525835866261, "grad_norm": 0.07561541940040853, "learning_rate": 4.766779134607247e-06, "loss": 0.5174, "step": 8478 }, { "epoch": 4.124012158054711, "grad_norm": 0.07414870272579187, "learning_rate": 4.765824009450887e-06, "loss": 0.4668, "step": 8479 }, { "epoch": 4.124498480243161, "grad_norm": 0.07277545413423886, "learning_rate": 4.764868892858447e-06, "loss": 0.4995, "step": 8480 }, { "epoch": 4.124984802431611, "grad_norm": 0.07313710546007884, "learning_rate": 4.7639137848648616e-06, "loss": 0.5396, "step": 8481 }, { "epoch": 4.125471124620061, "grad_norm": 0.07784819306949427, "learning_rate": 4.7629586855050535e-06, "loss": 0.5573, "step": 8482 }, { "epoch": 4.125957446808511, "grad_norm": 0.07469256148305614, "learning_rate": 4.762003594813955e-06, "loss": 0.508, "step": 8483 }, { "epoch": 4.12644376899696, "grad_norm": 0.07676605050562277, "learning_rate": 4.761048512826493e-06, "loss": 0.5045, "step": 8484 }, { "epoch": 4.12693009118541, "grad_norm": 0.07463859453319577, "learning_rate": 4.760093439577597e-06, "loss": 0.4884, "step": 8485 }, { "epoch": 4.12741641337386, "grad_norm": 0.07271760362583618, "learning_rate": 4.759138375102191e-06, "loss": 0.4833, "step": 8486 }, { "epoch": 4.12790273556231, "grad_norm": 0.07436945294964803, "learning_rate": 4.7581833194352044e-06, "loss": 0.5313, "step": 8487 }, { "epoch": 4.12838905775076, "grad_norm": 0.07424254588825265, "learning_rate": 4.757228272611563e-06, "loss": 0.5007, "step": 8488 }, { "epoch": 4.12887537993921, "grad_norm": 0.07725554441577545, "learning_rate": 4.756273234666196e-06, "loss": 0.523, "step": 8489 }, { "epoch": 4.129361702127659, "grad_norm": 0.07181531891643472, "learning_rate": 4.755318205634026e-06, "loss": 0.4995, "step": 8490 }, { "epoch": 4.129848024316109, "grad_norm": 0.07713948845313415, "learning_rate": 4.754363185549982e-06, "loss": 0.5479, "step": 8491 }, { "epoch": 4.13033434650456, "grad_norm": 0.07510118932034981, "learning_rate": 4.753408174448986e-06, "loss": 0.483, "step": 8492 }, { "epoch": 4.130820668693009, "grad_norm": 0.07445380695687027, "learning_rate": 4.752453172365966e-06, "loss": 0.5193, "step": 8493 }, { "epoch": 4.131306990881459, "grad_norm": 0.07161513378779802, "learning_rate": 4.751498179335845e-06, "loss": 0.5021, "step": 8494 }, { "epoch": 4.131793313069909, "grad_norm": 0.078423799652204, "learning_rate": 4.750543195393551e-06, "loss": 0.5504, "step": 8495 }, { "epoch": 4.132279635258358, "grad_norm": 0.07435634578442701, "learning_rate": 4.749588220574003e-06, "loss": 0.5083, "step": 8496 }, { "epoch": 4.132765957446808, "grad_norm": 0.07032029957204687, "learning_rate": 4.748633254912128e-06, "loss": 0.483, "step": 8497 }, { "epoch": 4.133252279635259, "grad_norm": 0.07547310162210427, "learning_rate": 4.747678298442849e-06, "loss": 0.5313, "step": 8498 }, { "epoch": 4.133738601823708, "grad_norm": 0.07476563714440676, "learning_rate": 4.746723351201089e-06, "loss": 0.5251, "step": 8499 }, { "epoch": 4.134224924012158, "grad_norm": 0.07530646207841561, "learning_rate": 4.745768413221774e-06, "loss": 0.4939, "step": 8500 }, { "epoch": 4.134711246200608, "grad_norm": 0.07281349082892166, "learning_rate": 4.74481348453982e-06, "loss": 0.5081, "step": 8501 }, { "epoch": 4.135197568389057, "grad_norm": 0.07535371882766576, "learning_rate": 4.7438585651901555e-06, "loss": 0.502, "step": 8502 }, { "epoch": 4.135683890577508, "grad_norm": 0.07629967520929797, "learning_rate": 4.742903655207698e-06, "loss": 0.5179, "step": 8503 }, { "epoch": 4.136170212765958, "grad_norm": 0.07330226439801589, "learning_rate": 4.741948754627372e-06, "loss": 0.5393, "step": 8504 }, { "epoch": 4.136656534954407, "grad_norm": 0.07745839454052919, "learning_rate": 4.740993863484095e-06, "loss": 0.5384, "step": 8505 }, { "epoch": 4.137142857142857, "grad_norm": 0.07630889573110095, "learning_rate": 4.740038981812793e-06, "loss": 0.5351, "step": 8506 }, { "epoch": 4.137629179331307, "grad_norm": 0.07542290662599023, "learning_rate": 4.739084109648382e-06, "loss": 0.5372, "step": 8507 }, { "epoch": 4.138115501519757, "grad_norm": 0.0726327719816919, "learning_rate": 4.738129247025783e-06, "loss": 0.4963, "step": 8508 }, { "epoch": 4.138601823708207, "grad_norm": 0.07515230661333981, "learning_rate": 4.737174393979916e-06, "loss": 0.5159, "step": 8509 }, { "epoch": 4.139088145896657, "grad_norm": 0.07593225769332512, "learning_rate": 4.736219550545704e-06, "loss": 0.5067, "step": 8510 }, { "epoch": 4.139574468085106, "grad_norm": 0.0747183198223034, "learning_rate": 4.7352647167580595e-06, "loss": 0.5113, "step": 8511 }, { "epoch": 4.140060790273556, "grad_norm": 0.0727444087033036, "learning_rate": 4.734309892651907e-06, "loss": 0.4833, "step": 8512 }, { "epoch": 4.140547112462006, "grad_norm": 0.07356653661906826, "learning_rate": 4.733355078262159e-06, "loss": 0.5049, "step": 8513 }, { "epoch": 4.141033434650456, "grad_norm": 0.07472178029173547, "learning_rate": 4.732400273623741e-06, "loss": 0.5262, "step": 8514 }, { "epoch": 4.141519756838906, "grad_norm": 0.07460977023180501, "learning_rate": 4.731445478771564e-06, "loss": 0.504, "step": 8515 }, { "epoch": 4.142006079027356, "grad_norm": 0.075974412267872, "learning_rate": 4.730490693740551e-06, "loss": 0.55, "step": 8516 }, { "epoch": 4.142492401215805, "grad_norm": 0.07420864525004155, "learning_rate": 4.729535918565612e-06, "loss": 0.4944, "step": 8517 }, { "epoch": 4.142978723404255, "grad_norm": 0.07515982342346939, "learning_rate": 4.728581153281669e-06, "loss": 0.5232, "step": 8518 }, { "epoch": 4.143465045592706, "grad_norm": 0.0755553574457653, "learning_rate": 4.7276263979236354e-06, "loss": 0.5073, "step": 8519 }, { "epoch": 4.143951367781155, "grad_norm": 0.07368947400406485, "learning_rate": 4.72667165252643e-06, "loss": 0.517, "step": 8520 }, { "epoch": 4.144437689969605, "grad_norm": 0.07707189428488803, "learning_rate": 4.725716917124965e-06, "loss": 0.543, "step": 8521 }, { "epoch": 4.144924012158055, "grad_norm": 0.0727757337017132, "learning_rate": 4.724762191754157e-06, "loss": 0.52, "step": 8522 }, { "epoch": 4.145410334346504, "grad_norm": 0.074262894151074, "learning_rate": 4.7238074764489215e-06, "loss": 0.5228, "step": 8523 }, { "epoch": 4.145896656534954, "grad_norm": 0.0737422218344668, "learning_rate": 4.722852771244171e-06, "loss": 0.5122, "step": 8524 }, { "epoch": 4.1463829787234046, "grad_norm": 0.07511797028927264, "learning_rate": 4.721898076174822e-06, "loss": 0.5274, "step": 8525 }, { "epoch": 4.146869300911854, "grad_norm": 0.07481646911294902, "learning_rate": 4.720943391275786e-06, "loss": 0.5212, "step": 8526 }, { "epoch": 4.147355623100304, "grad_norm": 0.07258703612897424, "learning_rate": 4.719988716581977e-06, "loss": 0.4877, "step": 8527 }, { "epoch": 4.147841945288754, "grad_norm": 0.0737324916919453, "learning_rate": 4.719034052128307e-06, "loss": 0.5307, "step": 8528 }, { "epoch": 4.148328267477203, "grad_norm": 0.07371562653864114, "learning_rate": 4.718079397949691e-06, "loss": 0.5297, "step": 8529 }, { "epoch": 4.148814589665654, "grad_norm": 0.07302499564175673, "learning_rate": 4.717124754081038e-06, "loss": 0.4943, "step": 8530 }, { "epoch": 4.1493009118541035, "grad_norm": 0.08540931841216334, "learning_rate": 4.716170120557264e-06, "loss": 0.5181, "step": 8531 }, { "epoch": 4.149787234042553, "grad_norm": 0.0810716942002274, "learning_rate": 4.715215497413275e-06, "loss": 0.6227, "step": 8532 }, { "epoch": 4.150273556231003, "grad_norm": 0.07473611648709737, "learning_rate": 4.714260884683985e-06, "loss": 0.5305, "step": 8533 }, { "epoch": 4.150759878419453, "grad_norm": 0.07410754546538102, "learning_rate": 4.713306282404303e-06, "loss": 0.5072, "step": 8534 }, { "epoch": 4.151246200607902, "grad_norm": 0.07339648917271148, "learning_rate": 4.712351690609144e-06, "loss": 0.496, "step": 8535 }, { "epoch": 4.151732522796353, "grad_norm": 0.07440001898343732, "learning_rate": 4.7113971093334115e-06, "loss": 0.5297, "step": 8536 }, { "epoch": 4.1522188449848025, "grad_norm": 0.07202434766563388, "learning_rate": 4.710442538612019e-06, "loss": 0.4916, "step": 8537 }, { "epoch": 4.152705167173252, "grad_norm": 0.07403508750837864, "learning_rate": 4.709487978479873e-06, "loss": 0.5012, "step": 8538 }, { "epoch": 4.153191489361702, "grad_norm": 0.07535790937446205, "learning_rate": 4.708533428971886e-06, "loss": 0.4904, "step": 8539 }, { "epoch": 4.153677811550152, "grad_norm": 0.07373613606725883, "learning_rate": 4.707578890122962e-06, "loss": 0.5074, "step": 8540 }, { "epoch": 4.154164133738602, "grad_norm": 0.07349977027162974, "learning_rate": 4.706624361968013e-06, "loss": 0.5182, "step": 8541 }, { "epoch": 4.154650455927052, "grad_norm": 0.0729252228155845, "learning_rate": 4.705669844541942e-06, "loss": 0.5277, "step": 8542 }, { "epoch": 4.1551367781155015, "grad_norm": 0.07633788585071823, "learning_rate": 4.70471533787966e-06, "loss": 0.5298, "step": 8543 }, { "epoch": 4.155623100303951, "grad_norm": 0.07596510423713741, "learning_rate": 4.7037608420160706e-06, "loss": 0.4977, "step": 8544 }, { "epoch": 4.156109422492401, "grad_norm": 0.07709053723604035, "learning_rate": 4.7028063569860834e-06, "loss": 0.5192, "step": 8545 }, { "epoch": 4.1565957446808515, "grad_norm": 0.07294504698514606, "learning_rate": 4.701851882824602e-06, "loss": 0.5213, "step": 8546 }, { "epoch": 4.157082066869301, "grad_norm": 0.0747635006898603, "learning_rate": 4.700897419566533e-06, "loss": 0.5091, "step": 8547 }, { "epoch": 4.157568389057751, "grad_norm": 0.07572329051297934, "learning_rate": 4.69994296724678e-06, "loss": 0.5038, "step": 8548 }, { "epoch": 4.1580547112462005, "grad_norm": 0.07659586356667382, "learning_rate": 4.6989885259002495e-06, "loss": 0.4816, "step": 8549 }, { "epoch": 4.15854103343465, "grad_norm": 0.07192192965669973, "learning_rate": 4.698034095561847e-06, "loss": 0.4949, "step": 8550 }, { "epoch": 4.1590273556231, "grad_norm": 0.07316210150314204, "learning_rate": 4.697079676266473e-06, "loss": 0.5311, "step": 8551 }, { "epoch": 4.1595136778115505, "grad_norm": 0.07450112052090677, "learning_rate": 4.696125268049034e-06, "loss": 0.5271, "step": 8552 }, { "epoch": 4.16, "grad_norm": 0.0758047351351251, "learning_rate": 4.695170870944431e-06, "loss": 0.5265, "step": 8553 }, { "epoch": 4.16048632218845, "grad_norm": 0.07634671296173454, "learning_rate": 4.69421648498757e-06, "loss": 0.5402, "step": 8554 }, { "epoch": 4.1609726443768995, "grad_norm": 0.07641618843784993, "learning_rate": 4.6932621102133486e-06, "loss": 0.5188, "step": 8555 }, { "epoch": 4.161458966565349, "grad_norm": 0.07579075965807226, "learning_rate": 4.692307746656673e-06, "loss": 0.5305, "step": 8556 }, { "epoch": 4.1619452887538, "grad_norm": 0.0756996815933246, "learning_rate": 4.691353394352442e-06, "loss": 0.5276, "step": 8557 }, { "epoch": 4.1624316109422494, "grad_norm": 0.07567878484857823, "learning_rate": 4.690399053335557e-06, "loss": 0.4895, "step": 8558 }, { "epoch": 4.162917933130699, "grad_norm": 0.0801301112291178, "learning_rate": 4.689444723640919e-06, "loss": 0.514, "step": 8559 }, { "epoch": 4.163404255319149, "grad_norm": 0.07705269609616539, "learning_rate": 4.688490405303431e-06, "loss": 0.5234, "step": 8560 }, { "epoch": 4.1638905775075985, "grad_norm": 0.07284394538072295, "learning_rate": 4.687536098357988e-06, "loss": 0.4976, "step": 8561 }, { "epoch": 4.164376899696048, "grad_norm": 0.07661810538307577, "learning_rate": 4.686581802839493e-06, "loss": 0.5272, "step": 8562 }, { "epoch": 4.164863221884499, "grad_norm": 0.07472252280571419, "learning_rate": 4.685627518782843e-06, "loss": 0.5017, "step": 8563 }, { "epoch": 4.165349544072948, "grad_norm": 0.07381353930310107, "learning_rate": 4.684673246222939e-06, "loss": 0.4664, "step": 8564 }, { "epoch": 4.165835866261398, "grad_norm": 0.07914523833047131, "learning_rate": 4.683718985194676e-06, "loss": 0.5235, "step": 8565 }, { "epoch": 4.166322188449848, "grad_norm": 0.07672838283438023, "learning_rate": 4.682764735732954e-06, "loss": 0.5326, "step": 8566 }, { "epoch": 4.1668085106382975, "grad_norm": 0.07437841536997172, "learning_rate": 4.6818104978726685e-06, "loss": 0.4994, "step": 8567 }, { "epoch": 4.167294832826748, "grad_norm": 0.07621161484387089, "learning_rate": 4.68085627164872e-06, "loss": 0.4885, "step": 8568 }, { "epoch": 4.167781155015198, "grad_norm": 0.07621089522380564, "learning_rate": 4.679902057096001e-06, "loss": 0.5465, "step": 8569 }, { "epoch": 4.168267477203647, "grad_norm": 0.07666603167624715, "learning_rate": 4.678947854249412e-06, "loss": 0.4928, "step": 8570 }, { "epoch": 4.168753799392097, "grad_norm": 0.0799760010047343, "learning_rate": 4.677993663143842e-06, "loss": 0.5241, "step": 8571 }, { "epoch": 4.169240121580547, "grad_norm": 0.07601145026836639, "learning_rate": 4.677039483814192e-06, "loss": 0.5095, "step": 8572 }, { "epoch": 4.169726443768997, "grad_norm": 0.07319976740421791, "learning_rate": 4.676085316295353e-06, "loss": 0.5138, "step": 8573 }, { "epoch": 4.170212765957447, "grad_norm": 0.07698212498465612, "learning_rate": 4.675131160622224e-06, "loss": 0.5646, "step": 8574 }, { "epoch": 4.170699088145897, "grad_norm": 0.07341667373466226, "learning_rate": 4.674177016829694e-06, "loss": 0.4891, "step": 8575 }, { "epoch": 4.171185410334346, "grad_norm": 0.07533218591699581, "learning_rate": 4.673222884952659e-06, "loss": 0.5116, "step": 8576 }, { "epoch": 4.171671732522796, "grad_norm": 0.08239102919120085, "learning_rate": 4.672268765026011e-06, "loss": 0.4947, "step": 8577 }, { "epoch": 4.172158054711246, "grad_norm": 0.07472168464902597, "learning_rate": 4.671314657084644e-06, "loss": 0.5217, "step": 8578 }, { "epoch": 4.172644376899696, "grad_norm": 0.07329272817616501, "learning_rate": 4.67036056116345e-06, "loss": 0.4965, "step": 8579 }, { "epoch": 4.173130699088146, "grad_norm": 0.07489677119824412, "learning_rate": 4.669406477297319e-06, "loss": 0.5244, "step": 8580 }, { "epoch": 4.173617021276596, "grad_norm": 0.07301657628825546, "learning_rate": 4.668452405521143e-06, "loss": 0.4933, "step": 8581 }, { "epoch": 4.174103343465045, "grad_norm": 0.07529606263515964, "learning_rate": 4.667498345869813e-06, "loss": 0.5154, "step": 8582 }, { "epoch": 4.174589665653495, "grad_norm": 0.07614329140654127, "learning_rate": 4.666544298378222e-06, "loss": 0.5293, "step": 8583 }, { "epoch": 4.175075987841946, "grad_norm": 0.07618148460829599, "learning_rate": 4.665590263081255e-06, "loss": 0.5267, "step": 8584 }, { "epoch": 4.175562310030395, "grad_norm": 0.07813183655327673, "learning_rate": 4.664636240013805e-06, "loss": 0.5503, "step": 8585 }, { "epoch": 4.176048632218845, "grad_norm": 0.07654839960026084, "learning_rate": 4.66368222921076e-06, "loss": 0.5229, "step": 8586 }, { "epoch": 4.176534954407295, "grad_norm": 0.07231874862337323, "learning_rate": 4.662728230707008e-06, "loss": 0.4937, "step": 8587 }, { "epoch": 4.177021276595744, "grad_norm": 0.07439836542775184, "learning_rate": 4.661774244537438e-06, "loss": 0.5114, "step": 8588 }, { "epoch": 4.177507598784194, "grad_norm": 0.07676921242630429, "learning_rate": 4.660820270736939e-06, "loss": 0.5544, "step": 8589 }, { "epoch": 4.177993920972645, "grad_norm": 0.07605215328553559, "learning_rate": 4.659866309340395e-06, "loss": 0.5112, "step": 8590 }, { "epoch": 4.178480243161094, "grad_norm": 0.07774803695600067, "learning_rate": 4.658912360382695e-06, "loss": 0.5232, "step": 8591 }, { "epoch": 4.178966565349544, "grad_norm": 0.07554711015557045, "learning_rate": 4.657958423898725e-06, "loss": 0.5176, "step": 8592 }, { "epoch": 4.179452887537994, "grad_norm": 0.07465462389494179, "learning_rate": 4.657004499923372e-06, "loss": 0.5186, "step": 8593 }, { "epoch": 4.179939209726443, "grad_norm": 0.07392814739112226, "learning_rate": 4.656050588491519e-06, "loss": 0.4966, "step": 8594 }, { "epoch": 4.180425531914894, "grad_norm": 0.07615430034639238, "learning_rate": 4.655096689638054e-06, "loss": 0.5308, "step": 8595 }, { "epoch": 4.180911854103344, "grad_norm": 0.07523723262167048, "learning_rate": 4.654142803397857e-06, "loss": 0.5111, "step": 8596 }, { "epoch": 4.181398176291793, "grad_norm": 0.07937715225630966, "learning_rate": 4.653188929805816e-06, "loss": 0.5421, "step": 8597 }, { "epoch": 4.181884498480243, "grad_norm": 0.07172274786130603, "learning_rate": 4.652235068896813e-06, "loss": 0.4731, "step": 8598 }, { "epoch": 4.182370820668693, "grad_norm": 0.07612235852400556, "learning_rate": 4.651281220705733e-06, "loss": 0.5438, "step": 8599 }, { "epoch": 4.182857142857143, "grad_norm": 0.07617463385113146, "learning_rate": 4.650327385267456e-06, "loss": 0.4968, "step": 8600 }, { "epoch": 4.183343465045593, "grad_norm": 0.07172675714949191, "learning_rate": 4.649373562616865e-06, "loss": 0.4865, "step": 8601 }, { "epoch": 4.183829787234043, "grad_norm": 0.07423280489502632, "learning_rate": 4.648419752788843e-06, "loss": 0.5256, "step": 8602 }, { "epoch": 4.184316109422492, "grad_norm": 0.07411568394547702, "learning_rate": 4.647465955818269e-06, "loss": 0.4976, "step": 8603 }, { "epoch": 4.184802431610942, "grad_norm": 0.07847448634992887, "learning_rate": 4.646512171740028e-06, "loss": 0.5421, "step": 8604 }, { "epoch": 4.185288753799392, "grad_norm": 0.07112772827262814, "learning_rate": 4.6455584005889944e-06, "loss": 0.4735, "step": 8605 }, { "epoch": 4.185775075987842, "grad_norm": 0.07307619662880278, "learning_rate": 4.644604642400053e-06, "loss": 0.5223, "step": 8606 }, { "epoch": 4.186261398176292, "grad_norm": 0.07167307445154907, "learning_rate": 4.64365089720808e-06, "loss": 0.5249, "step": 8607 }, { "epoch": 4.186747720364742, "grad_norm": 0.07590288612844606, "learning_rate": 4.6426971650479575e-06, "loss": 0.5363, "step": 8608 }, { "epoch": 4.187234042553191, "grad_norm": 0.08237896067364611, "learning_rate": 4.64174344595456e-06, "loss": 0.5004, "step": 8609 }, { "epoch": 4.187720364741641, "grad_norm": 0.0733909118603107, "learning_rate": 4.64078973996277e-06, "loss": 0.5028, "step": 8610 }, { "epoch": 4.188206686930092, "grad_norm": 0.07239892222658986, "learning_rate": 4.63983604710746e-06, "loss": 0.5034, "step": 8611 }, { "epoch": 4.188693009118541, "grad_norm": 0.07719001751921614, "learning_rate": 4.63888236742351e-06, "loss": 0.5535, "step": 8612 }, { "epoch": 4.189179331306991, "grad_norm": 0.07688731979910943, "learning_rate": 4.637928700945795e-06, "loss": 0.5995, "step": 8613 }, { "epoch": 4.189665653495441, "grad_norm": 0.07393772045710352, "learning_rate": 4.636975047709195e-06, "loss": 0.4729, "step": 8614 }, { "epoch": 4.19015197568389, "grad_norm": 0.07393831761603349, "learning_rate": 4.6360214077485785e-06, "loss": 0.5213, "step": 8615 }, { "epoch": 4.19063829787234, "grad_norm": 0.07389979504158992, "learning_rate": 4.635067781098827e-06, "loss": 0.5048, "step": 8616 }, { "epoch": 4.191124620060791, "grad_norm": 0.07357789263361195, "learning_rate": 4.634114167794811e-06, "loss": 0.4757, "step": 8617 }, { "epoch": 4.19161094224924, "grad_norm": 0.07837721310275758, "learning_rate": 4.633160567871408e-06, "loss": 0.5543, "step": 8618 }, { "epoch": 4.19209726443769, "grad_norm": 0.07691414190321177, "learning_rate": 4.632206981363488e-06, "loss": 0.5221, "step": 8619 }, { "epoch": 4.19258358662614, "grad_norm": 0.07204873070183852, "learning_rate": 4.631253408305927e-06, "loss": 0.4894, "step": 8620 }, { "epoch": 4.193069908814589, "grad_norm": 0.0744755189278179, "learning_rate": 4.630299848733595e-06, "loss": 0.5263, "step": 8621 }, { "epoch": 4.19355623100304, "grad_norm": 0.0733820070362056, "learning_rate": 4.629346302681367e-06, "loss": 0.5058, "step": 8622 }, { "epoch": 4.1940425531914896, "grad_norm": 0.07433033885422978, "learning_rate": 4.628392770184112e-06, "loss": 0.515, "step": 8623 }, { "epoch": 4.194528875379939, "grad_norm": 0.0733964033081725, "learning_rate": 4.627439251276704e-06, "loss": 0.4928, "step": 8624 }, { "epoch": 4.195015197568389, "grad_norm": 0.07711271776796749, "learning_rate": 4.626485745994009e-06, "loss": 0.5265, "step": 8625 }, { "epoch": 4.195501519756839, "grad_norm": 0.07271916532576288, "learning_rate": 4.6255322543709025e-06, "loss": 0.5148, "step": 8626 }, { "epoch": 4.195987841945289, "grad_norm": 0.07990990018849148, "learning_rate": 4.624578776442249e-06, "loss": 0.5292, "step": 8627 }, { "epoch": 4.196474164133739, "grad_norm": 0.0732324168012335, "learning_rate": 4.623625312242922e-06, "loss": 0.4914, "step": 8628 }, { "epoch": 4.1969604863221885, "grad_norm": 0.0744643730181638, "learning_rate": 4.622671861807788e-06, "loss": 0.5059, "step": 8629 }, { "epoch": 4.197446808510638, "grad_norm": 0.07275270708369055, "learning_rate": 4.621718425171716e-06, "loss": 0.5145, "step": 8630 }, { "epoch": 4.197933130699088, "grad_norm": 0.07414208509829934, "learning_rate": 4.620765002369573e-06, "loss": 0.5336, "step": 8631 }, { "epoch": 4.198419452887538, "grad_norm": 0.07464983337027688, "learning_rate": 4.619811593436224e-06, "loss": 0.5239, "step": 8632 }, { "epoch": 4.198905775075988, "grad_norm": 0.07314849326706202, "learning_rate": 4.618858198406541e-06, "loss": 0.4961, "step": 8633 }, { "epoch": 4.199392097264438, "grad_norm": 0.08339314949028541, "learning_rate": 4.6179048173153845e-06, "loss": 0.6021, "step": 8634 }, { "epoch": 4.1998784194528875, "grad_norm": 0.07671418208163941, "learning_rate": 4.616951450197624e-06, "loss": 0.5165, "step": 8635 }, { "epoch": 4.200364741641337, "grad_norm": 0.0763546290088519, "learning_rate": 4.6159980970881225e-06, "loss": 0.5216, "step": 8636 }, { "epoch": 4.200851063829787, "grad_norm": 0.07595293413864083, "learning_rate": 4.615044758021745e-06, "loss": 0.5361, "step": 8637 }, { "epoch": 4.2013373860182375, "grad_norm": 0.07565301608467517, "learning_rate": 4.614091433033354e-06, "loss": 0.5397, "step": 8638 }, { "epoch": 4.201823708206687, "grad_norm": 0.07574818133900997, "learning_rate": 4.613138122157817e-06, "loss": 0.5028, "step": 8639 }, { "epoch": 4.202310030395137, "grad_norm": 0.07393028628759328, "learning_rate": 4.612184825429994e-06, "loss": 0.4898, "step": 8640 }, { "epoch": 4.2027963525835865, "grad_norm": 0.07531791085120113, "learning_rate": 4.611231542884747e-06, "loss": 0.5237, "step": 8641 }, { "epoch": 4.203282674772036, "grad_norm": 0.07780320279590346, "learning_rate": 4.61027827455694e-06, "loss": 0.5362, "step": 8642 }, { "epoch": 4.203768996960486, "grad_norm": 0.07651307564738992, "learning_rate": 4.609325020481435e-06, "loss": 0.4989, "step": 8643 }, { "epoch": 4.2042553191489365, "grad_norm": 0.07050200957379826, "learning_rate": 4.6083717806930884e-06, "loss": 0.4672, "step": 8644 }, { "epoch": 4.204741641337386, "grad_norm": 0.07406671092151201, "learning_rate": 4.607418555226766e-06, "loss": 0.5103, "step": 8645 }, { "epoch": 4.205227963525836, "grad_norm": 0.07501517962427871, "learning_rate": 4.606465344117324e-06, "loss": 0.5056, "step": 8646 }, { "epoch": 4.2057142857142855, "grad_norm": 0.076291861748952, "learning_rate": 4.6055121473996245e-06, "loss": 0.521, "step": 8647 }, { "epoch": 4.206200607902735, "grad_norm": 0.07742683853975692, "learning_rate": 4.604558965108524e-06, "loss": 0.5258, "step": 8648 }, { "epoch": 4.206686930091186, "grad_norm": 0.07574776110495535, "learning_rate": 4.603605797278883e-06, "loss": 0.4965, "step": 8649 }, { "epoch": 4.2071732522796355, "grad_norm": 0.074181891208974, "learning_rate": 4.602652643945557e-06, "loss": 0.5075, "step": 8650 }, { "epoch": 4.207659574468085, "grad_norm": 0.07358544889370713, "learning_rate": 4.601699505143404e-06, "loss": 0.5043, "step": 8651 }, { "epoch": 4.208145896656535, "grad_norm": 0.07604478950482464, "learning_rate": 4.6007463809072815e-06, "loss": 0.5101, "step": 8652 }, { "epoch": 4.2086322188449845, "grad_norm": 0.07641742178833977, "learning_rate": 4.5997932712720435e-06, "loss": 0.4681, "step": 8653 }, { "epoch": 4.209118541033435, "grad_norm": 0.07259836123755714, "learning_rate": 4.598840176272551e-06, "loss": 0.4938, "step": 8654 }, { "epoch": 4.209604863221885, "grad_norm": 0.07715505768697828, "learning_rate": 4.597887095943653e-06, "loss": 0.5136, "step": 8655 }, { "epoch": 4.2100911854103344, "grad_norm": 0.07646212567090423, "learning_rate": 4.596934030320207e-06, "loss": 0.5383, "step": 8656 }, { "epoch": 4.210577507598784, "grad_norm": 0.07363603651140135, "learning_rate": 4.595980979437067e-06, "loss": 0.522, "step": 8657 }, { "epoch": 4.211063829787234, "grad_norm": 0.07864830286149185, "learning_rate": 4.595027943329087e-06, "loss": 0.5393, "step": 8658 }, { "epoch": 4.2115501519756835, "grad_norm": 0.0736310624462123, "learning_rate": 4.594074922031117e-06, "loss": 0.5039, "step": 8659 }, { "epoch": 4.212036474164134, "grad_norm": 0.07275290155325348, "learning_rate": 4.593121915578013e-06, "loss": 0.4986, "step": 8660 }, { "epoch": 4.212522796352584, "grad_norm": 0.07979793846762186, "learning_rate": 4.592168924004624e-06, "loss": 0.5679, "step": 8661 }, { "epoch": 4.213009118541033, "grad_norm": 0.07410067545416402, "learning_rate": 4.591215947345806e-06, "loss": 0.5025, "step": 8662 }, { "epoch": 4.213495440729483, "grad_norm": 0.07410159786805302, "learning_rate": 4.590262985636403e-06, "loss": 0.5363, "step": 8663 }, { "epoch": 4.213981762917933, "grad_norm": 0.0736693982797478, "learning_rate": 4.5893100389112715e-06, "loss": 0.5159, "step": 8664 }, { "epoch": 4.214468085106383, "grad_norm": 0.07565466476913871, "learning_rate": 4.588357107205256e-06, "loss": 0.5047, "step": 8665 }, { "epoch": 4.214954407294833, "grad_norm": 0.07469850226358561, "learning_rate": 4.5874041905532096e-06, "loss": 0.4879, "step": 8666 }, { "epoch": 4.215440729483283, "grad_norm": 0.07908504052529744, "learning_rate": 4.586451288989978e-06, "loss": 0.5188, "step": 8667 }, { "epoch": 4.215927051671732, "grad_norm": 0.0731847070001752, "learning_rate": 4.585498402550413e-06, "loss": 0.4949, "step": 8668 }, { "epoch": 4.216413373860182, "grad_norm": 0.07733456091230159, "learning_rate": 4.584545531269357e-06, "loss": 0.5365, "step": 8669 }, { "epoch": 4.216899696048632, "grad_norm": 0.07546241812109102, "learning_rate": 4.5835926751816626e-06, "loss": 0.507, "step": 8670 }, { "epoch": 4.217386018237082, "grad_norm": 0.07662160343646876, "learning_rate": 4.58263983432217e-06, "loss": 0.5178, "step": 8671 }, { "epoch": 4.217872340425532, "grad_norm": 0.0763430413315756, "learning_rate": 4.581687008725731e-06, "loss": 0.5076, "step": 8672 }, { "epoch": 4.218358662613982, "grad_norm": 0.07722427949859416, "learning_rate": 4.580734198427187e-06, "loss": 0.5294, "step": 8673 }, { "epoch": 4.218844984802431, "grad_norm": 0.07713551194320627, "learning_rate": 4.579781403461384e-06, "loss": 0.5154, "step": 8674 }, { "epoch": 4.219331306990881, "grad_norm": 0.07591098778000949, "learning_rate": 4.578828623863165e-06, "loss": 0.5126, "step": 8675 }, { "epoch": 4.219817629179332, "grad_norm": 0.07310728554999228, "learning_rate": 4.577875859667377e-06, "loss": 0.4926, "step": 8676 }, { "epoch": 4.220303951367781, "grad_norm": 0.07436779151687803, "learning_rate": 4.576923110908858e-06, "loss": 0.5004, "step": 8677 }, { "epoch": 4.220790273556231, "grad_norm": 0.07281434253732112, "learning_rate": 4.575970377622456e-06, "loss": 0.4987, "step": 8678 }, { "epoch": 4.221276595744681, "grad_norm": 0.07664552228304802, "learning_rate": 4.575017659843007e-06, "loss": 0.5183, "step": 8679 }, { "epoch": 4.22176291793313, "grad_norm": 0.072920975667414, "learning_rate": 4.574064957605356e-06, "loss": 0.4782, "step": 8680 }, { "epoch": 4.222249240121581, "grad_norm": 0.07819163668521517, "learning_rate": 4.573112270944343e-06, "loss": 0.538, "step": 8681 }, { "epoch": 4.222735562310031, "grad_norm": 0.07683564925643531, "learning_rate": 4.572159599894808e-06, "loss": 0.5092, "step": 8682 }, { "epoch": 4.22322188449848, "grad_norm": 0.07347147602483026, "learning_rate": 4.571206944491593e-06, "loss": 0.472, "step": 8683 }, { "epoch": 4.22370820668693, "grad_norm": 0.07718951275473292, "learning_rate": 4.570254304769532e-06, "loss": 0.5098, "step": 8684 }, { "epoch": 4.22419452887538, "grad_norm": 0.07679857193516876, "learning_rate": 4.569301680763468e-06, "loss": 0.5365, "step": 8685 }, { "epoch": 4.224680851063829, "grad_norm": 0.07382949547252148, "learning_rate": 4.568349072508236e-06, "loss": 0.4642, "step": 8686 }, { "epoch": 4.22516717325228, "grad_norm": 0.07938568871975125, "learning_rate": 4.567396480038677e-06, "loss": 0.4913, "step": 8687 }, { "epoch": 4.22565349544073, "grad_norm": 0.07793437345472343, "learning_rate": 4.566443903389622e-06, "loss": 0.5519, "step": 8688 }, { "epoch": 4.226139817629179, "grad_norm": 0.07377412301186179, "learning_rate": 4.565491342595914e-06, "loss": 0.4945, "step": 8689 }, { "epoch": 4.226626139817629, "grad_norm": 0.0776944029922295, "learning_rate": 4.564538797692382e-06, "loss": 0.5232, "step": 8690 }, { "epoch": 4.227112462006079, "grad_norm": 0.07285783945653469, "learning_rate": 4.5635862687138645e-06, "loss": 0.5288, "step": 8691 }, { "epoch": 4.227598784194529, "grad_norm": 0.07699902132673209, "learning_rate": 4.562633755695195e-06, "loss": 0.5189, "step": 8692 }, { "epoch": 4.228085106382979, "grad_norm": 0.07225996419098245, "learning_rate": 4.56168125867121e-06, "loss": 0.4827, "step": 8693 }, { "epoch": 4.228571428571429, "grad_norm": 0.074962130347158, "learning_rate": 4.5607287776767386e-06, "loss": 0.5291, "step": 8694 }, { "epoch": 4.229057750759878, "grad_norm": 0.07791887590340825, "learning_rate": 4.559776312746617e-06, "loss": 0.5312, "step": 8695 }, { "epoch": 4.229544072948328, "grad_norm": 0.07473625643536515, "learning_rate": 4.558823863915673e-06, "loss": 0.499, "step": 8696 }, { "epoch": 4.230030395136778, "grad_norm": 0.07681095331078629, "learning_rate": 4.557871431218744e-06, "loss": 0.5001, "step": 8697 }, { "epoch": 4.230516717325228, "grad_norm": 0.07648674349380324, "learning_rate": 4.556919014690655e-06, "loss": 0.5426, "step": 8698 }, { "epoch": 4.231003039513678, "grad_norm": 0.0744084001770638, "learning_rate": 4.55596661436624e-06, "loss": 0.4957, "step": 8699 }, { "epoch": 4.231489361702128, "grad_norm": 0.07402201557692092, "learning_rate": 4.555014230280327e-06, "loss": 0.5125, "step": 8700 }, { "epoch": 4.231975683890577, "grad_norm": 0.07381443245221855, "learning_rate": 4.554061862467748e-06, "loss": 0.4804, "step": 8701 }, { "epoch": 4.232462006079027, "grad_norm": 0.07550477766177831, "learning_rate": 4.553109510963327e-06, "loss": 0.5306, "step": 8702 }, { "epoch": 4.232948328267478, "grad_norm": 0.07152553495268849, "learning_rate": 4.552157175801896e-06, "loss": 0.4958, "step": 8703 }, { "epoch": 4.233434650455927, "grad_norm": 0.07581792328001027, "learning_rate": 4.551204857018278e-06, "loss": 0.5334, "step": 8704 }, { "epoch": 4.233920972644377, "grad_norm": 0.0746442794651795, "learning_rate": 4.550252554647303e-06, "loss": 0.4968, "step": 8705 }, { "epoch": 4.234407294832827, "grad_norm": 0.07383079482696556, "learning_rate": 4.549300268723798e-06, "loss": 0.5211, "step": 8706 }, { "epoch": 4.234893617021276, "grad_norm": 0.07446737507778915, "learning_rate": 4.548347999282584e-06, "loss": 0.5693, "step": 8707 }, { "epoch": 4.235379939209727, "grad_norm": 0.07482578397225607, "learning_rate": 4.547395746358493e-06, "loss": 0.4895, "step": 8708 }, { "epoch": 4.235866261398177, "grad_norm": 0.07475043530154361, "learning_rate": 4.5464435099863415e-06, "loss": 0.5035, "step": 8709 }, { "epoch": 4.236352583586626, "grad_norm": 0.07532848373433775, "learning_rate": 4.545491290200959e-06, "loss": 0.5104, "step": 8710 }, { "epoch": 4.236838905775076, "grad_norm": 0.07267218439532801, "learning_rate": 4.5445390870371656e-06, "loss": 0.5021, "step": 8711 }, { "epoch": 4.237325227963526, "grad_norm": 0.074723096238761, "learning_rate": 4.543586900529786e-06, "loss": 0.5081, "step": 8712 }, { "epoch": 4.237811550151975, "grad_norm": 0.07799118909340397, "learning_rate": 4.542634730713639e-06, "loss": 0.4959, "step": 8713 }, { "epoch": 4.238297872340426, "grad_norm": 0.07653889202887335, "learning_rate": 4.541682577623548e-06, "loss": 0.5333, "step": 8714 }, { "epoch": 4.238784194528876, "grad_norm": 0.07465745108764789, "learning_rate": 4.540730441294334e-06, "loss": 0.5114, "step": 8715 }, { "epoch": 4.239270516717325, "grad_norm": 0.07339665835885074, "learning_rate": 4.5397783217608174e-06, "loss": 0.4885, "step": 8716 }, { "epoch": 4.239756838905775, "grad_norm": 0.07407899126115443, "learning_rate": 4.538826219057815e-06, "loss": 0.5062, "step": 8717 }, { "epoch": 4.240243161094225, "grad_norm": 0.07435973308592166, "learning_rate": 4.537874133220149e-06, "loss": 0.4998, "step": 8718 }, { "epoch": 4.240729483282675, "grad_norm": 0.07397053380948075, "learning_rate": 4.536922064282634e-06, "loss": 0.5403, "step": 8719 }, { "epoch": 4.241215805471125, "grad_norm": 0.07490142171157223, "learning_rate": 4.53597001228009e-06, "loss": 0.5425, "step": 8720 }, { "epoch": 4.2417021276595746, "grad_norm": 0.07471311350176477, "learning_rate": 4.535017977247334e-06, "loss": 0.544, "step": 8721 }, { "epoch": 4.242188449848024, "grad_norm": 0.0734170697021857, "learning_rate": 4.534065959219182e-06, "loss": 0.5189, "step": 8722 }, { "epoch": 4.242674772036474, "grad_norm": 0.07742542844479222, "learning_rate": 4.533113958230449e-06, "loss": 0.5921, "step": 8723 }, { "epoch": 4.243161094224924, "grad_norm": 0.07183809436425274, "learning_rate": 4.532161974315951e-06, "loss": 0.498, "step": 8724 }, { "epoch": 4.243647416413374, "grad_norm": 0.0731772908572836, "learning_rate": 4.531210007510501e-06, "loss": 0.5048, "step": 8725 }, { "epoch": 4.244133738601824, "grad_norm": 0.07487899733392256, "learning_rate": 4.530258057848916e-06, "loss": 0.5182, "step": 8726 }, { "epoch": 4.2446200607902735, "grad_norm": 0.07252965677148214, "learning_rate": 4.5293061253660056e-06, "loss": 0.4875, "step": 8727 }, { "epoch": 4.245106382978723, "grad_norm": 0.07447330119276449, "learning_rate": 4.528354210096585e-06, "loss": 0.4833, "step": 8728 }, { "epoch": 4.245592705167173, "grad_norm": 0.07382969610481885, "learning_rate": 4.527402312075464e-06, "loss": 0.5043, "step": 8729 }, { "epoch": 4.2460790273556235, "grad_norm": 0.07723969282011674, "learning_rate": 4.526450431337457e-06, "loss": 0.5433, "step": 8730 }, { "epoch": 4.246565349544073, "grad_norm": 0.07191645882261344, "learning_rate": 4.525498567917371e-06, "loss": 0.4987, "step": 8731 }, { "epoch": 4.247051671732523, "grad_norm": 0.07519499527333512, "learning_rate": 4.524546721850018e-06, "loss": 0.5276, "step": 8732 }, { "epoch": 4.2475379939209725, "grad_norm": 0.07187429383530079, "learning_rate": 4.52359489317021e-06, "loss": 0.4977, "step": 8733 }, { "epoch": 4.248024316109422, "grad_norm": 0.07538453184585003, "learning_rate": 4.5226430819127504e-06, "loss": 0.5616, "step": 8734 }, { "epoch": 4.248510638297873, "grad_norm": 0.07299794767359329, "learning_rate": 4.521691288112451e-06, "loss": 0.5074, "step": 8735 }, { "epoch": 4.2489969604863225, "grad_norm": 0.07396174481122963, "learning_rate": 4.5207395118041185e-06, "loss": 0.5311, "step": 8736 }, { "epoch": 4.249483282674772, "grad_norm": 0.07418648976744513, "learning_rate": 4.519787753022561e-06, "loss": 0.5154, "step": 8737 }, { "epoch": 4.249969604863222, "grad_norm": 0.07324800322254649, "learning_rate": 4.518836011802582e-06, "loss": 0.5294, "step": 8738 }, { "epoch": 4.249969604863222, "eval_loss": 0.5709888339042664, "eval_runtime": 104.8517, "eval_samples_per_second": 289.485, "eval_steps_per_second": 36.194, "step": 8738 }, { "epoch": 4.2504559270516715, "grad_norm": 0.07259134379553166, "learning_rate": 4.517884288178989e-06, "loss": 0.5104, "step": 8739 }, { "epoch": 4.250942249240121, "grad_norm": 0.07803557469579345, "learning_rate": 4.516932582186586e-06, "loss": 0.5334, "step": 8740 }, { "epoch": 4.251428571428572, "grad_norm": 0.0725182000730815, "learning_rate": 4.51598089386018e-06, "loss": 0.4907, "step": 8741 }, { "epoch": 4.2519148936170215, "grad_norm": 0.10968620580813508, "learning_rate": 4.51502922323457e-06, "loss": 0.5457, "step": 8742 }, { "epoch": 4.252401215805471, "grad_norm": 0.0757296271890457, "learning_rate": 4.514077570344565e-06, "loss": 0.5319, "step": 8743 }, { "epoch": 4.252887537993921, "grad_norm": 0.07409161370369666, "learning_rate": 4.5131259352249616e-06, "loss": 0.5104, "step": 8744 }, { "epoch": 4.2533738601823705, "grad_norm": 0.07317896499004671, "learning_rate": 4.5121743179105635e-06, "loss": 0.4986, "step": 8745 }, { "epoch": 4.25386018237082, "grad_norm": 0.07426250639754535, "learning_rate": 4.5112227184361726e-06, "loss": 0.4983, "step": 8746 }, { "epoch": 4.254346504559271, "grad_norm": 0.07121806614551542, "learning_rate": 4.510271136836591e-06, "loss": 0.51, "step": 8747 }, { "epoch": 4.2548328267477205, "grad_norm": 0.07583702226295949, "learning_rate": 4.509319573146614e-06, "loss": 0.4834, "step": 8748 }, { "epoch": 4.25531914893617, "grad_norm": 0.07193527971193463, "learning_rate": 4.508368027401044e-06, "loss": 0.5152, "step": 8749 }, { "epoch": 4.25580547112462, "grad_norm": 0.07436967309549494, "learning_rate": 4.507416499634678e-06, "loss": 0.5234, "step": 8750 }, { "epoch": 4.2562917933130695, "grad_norm": 0.07303227563763393, "learning_rate": 4.506464989882316e-06, "loss": 0.5258, "step": 8751 }, { "epoch": 4.25677811550152, "grad_norm": 0.07717013075558937, "learning_rate": 4.505513498178752e-06, "loss": 0.5335, "step": 8752 }, { "epoch": 4.25726443768997, "grad_norm": 0.07454202332370188, "learning_rate": 4.504562024558785e-06, "loss": 0.5042, "step": 8753 }, { "epoch": 4.2577507598784194, "grad_norm": 0.07467668953758691, "learning_rate": 4.503610569057208e-06, "loss": 0.5121, "step": 8754 }, { "epoch": 4.258237082066869, "grad_norm": 0.07443851881122236, "learning_rate": 4.502659131708821e-06, "loss": 0.5473, "step": 8755 }, { "epoch": 4.258723404255319, "grad_norm": 0.07131672641325543, "learning_rate": 4.501707712548413e-06, "loss": 0.4888, "step": 8756 }, { "epoch": 4.259209726443769, "grad_norm": 0.07779720583889972, "learning_rate": 4.5007563116107825e-06, "loss": 0.4812, "step": 8757 }, { "epoch": 4.259696048632219, "grad_norm": 0.0723020858048172, "learning_rate": 4.499804928930719e-06, "loss": 0.5063, "step": 8758 }, { "epoch": 4.260182370820669, "grad_norm": 0.07597115797662181, "learning_rate": 4.498853564543015e-06, "loss": 0.5051, "step": 8759 }, { "epoch": 4.260668693009118, "grad_norm": 0.0744597841786799, "learning_rate": 4.497902218482466e-06, "loss": 0.4974, "step": 8760 }, { "epoch": 4.261155015197568, "grad_norm": 0.0748062920763063, "learning_rate": 4.49695089078386e-06, "loss": 0.5105, "step": 8761 }, { "epoch": 4.261641337386019, "grad_norm": 0.07448387237692716, "learning_rate": 4.4959995814819904e-06, "loss": 0.4842, "step": 8762 }, { "epoch": 4.262127659574468, "grad_norm": 0.07427814720623255, "learning_rate": 4.495048290611643e-06, "loss": 0.5229, "step": 8763 }, { "epoch": 4.262613981762918, "grad_norm": 0.07251018282983426, "learning_rate": 4.494097018207609e-06, "loss": 0.5038, "step": 8764 }, { "epoch": 4.263100303951368, "grad_norm": 0.07484050226843347, "learning_rate": 4.4931457643046775e-06, "loss": 0.5277, "step": 8765 }, { "epoch": 4.263586626139817, "grad_norm": 0.07465325259856306, "learning_rate": 4.492194528937637e-06, "loss": 0.509, "step": 8766 }, { "epoch": 4.264072948328267, "grad_norm": 0.07585190924487659, "learning_rate": 4.491243312141271e-06, "loss": 0.5262, "step": 8767 }, { "epoch": 4.264559270516718, "grad_norm": 0.0731946682333731, "learning_rate": 4.49029211395037e-06, "loss": 0.5228, "step": 8768 }, { "epoch": 4.265045592705167, "grad_norm": 0.07231915622254274, "learning_rate": 4.4893409343997165e-06, "loss": 0.4823, "step": 8769 }, { "epoch": 4.265531914893617, "grad_norm": 0.07028292122038556, "learning_rate": 4.488389773524099e-06, "loss": 0.5139, "step": 8770 }, { "epoch": 4.266018237082067, "grad_norm": 0.07470876903983961, "learning_rate": 4.487438631358298e-06, "loss": 0.5336, "step": 8771 }, { "epoch": 4.266504559270516, "grad_norm": 0.07280975141157943, "learning_rate": 4.4864875079371e-06, "loss": 0.5033, "step": 8772 }, { "epoch": 4.266990881458966, "grad_norm": 0.07333147533724117, "learning_rate": 4.485536403295287e-06, "loss": 0.521, "step": 8773 }, { "epoch": 4.267477203647417, "grad_norm": 0.07511462236629839, "learning_rate": 4.484585317467642e-06, "loss": 0.5329, "step": 8774 }, { "epoch": 4.267963525835866, "grad_norm": 0.07668881321204545, "learning_rate": 4.483634250488945e-06, "loss": 0.5172, "step": 8775 }, { "epoch": 4.268449848024316, "grad_norm": 0.07423731055110483, "learning_rate": 4.482683202393979e-06, "loss": 0.535, "step": 8776 }, { "epoch": 4.268936170212766, "grad_norm": 0.07317403272780562, "learning_rate": 4.481732173217523e-06, "loss": 0.5191, "step": 8777 }, { "epoch": 4.269422492401215, "grad_norm": 0.07654644360192758, "learning_rate": 4.480781162994356e-06, "loss": 0.533, "step": 8778 }, { "epoch": 4.269908814589666, "grad_norm": 0.07597300033638635, "learning_rate": 4.479830171759258e-06, "loss": 0.5141, "step": 8779 }, { "epoch": 4.270395136778116, "grad_norm": 0.0731460516462222, "learning_rate": 4.478879199547009e-06, "loss": 0.5284, "step": 8780 }, { "epoch": 4.270881458966565, "grad_norm": 0.07529553989918555, "learning_rate": 4.477928246392382e-06, "loss": 0.5136, "step": 8781 }, { "epoch": 4.271367781155015, "grad_norm": 0.07454572143905275, "learning_rate": 4.4769773123301586e-06, "loss": 0.5104, "step": 8782 }, { "epoch": 4.271854103343465, "grad_norm": 0.07470779552390609, "learning_rate": 4.47602639739511e-06, "loss": 0.5093, "step": 8783 }, { "epoch": 4.272340425531915, "grad_norm": 0.07558066268226521, "learning_rate": 4.475075501622014e-06, "loss": 0.5344, "step": 8784 }, { "epoch": 4.272826747720365, "grad_norm": 0.0729851124107236, "learning_rate": 4.474124625045647e-06, "loss": 0.5071, "step": 8785 }, { "epoch": 4.273313069908815, "grad_norm": 0.07738719736032744, "learning_rate": 4.47317376770078e-06, "loss": 0.5393, "step": 8786 }, { "epoch": 4.273799392097264, "grad_norm": 0.07464123587971279, "learning_rate": 4.47222292962219e-06, "loss": 0.5063, "step": 8787 }, { "epoch": 4.274285714285714, "grad_norm": 0.07743054088657754, "learning_rate": 4.471272110844646e-06, "loss": 0.4839, "step": 8788 }, { "epoch": 4.274772036474165, "grad_norm": 0.07481329572451914, "learning_rate": 4.47032131140292e-06, "loss": 0.5122, "step": 8789 }, { "epoch": 4.275258358662614, "grad_norm": 0.07132892533148658, "learning_rate": 4.469370531331784e-06, "loss": 0.5002, "step": 8790 }, { "epoch": 4.275744680851064, "grad_norm": 0.0723130685776087, "learning_rate": 4.4684197706660125e-06, "loss": 0.4732, "step": 8791 }, { "epoch": 4.276231003039514, "grad_norm": 0.0768637366148514, "learning_rate": 4.4674690294403676e-06, "loss": 0.5418, "step": 8792 }, { "epoch": 4.276717325227963, "grad_norm": 0.07682422531247371, "learning_rate": 4.466518307689624e-06, "loss": 0.4787, "step": 8793 }, { "epoch": 4.277203647416413, "grad_norm": 0.07394132920279146, "learning_rate": 4.465567605448547e-06, "loss": 0.5028, "step": 8794 }, { "epoch": 4.277689969604864, "grad_norm": 0.07479170102597361, "learning_rate": 4.4646169227519075e-06, "loss": 0.478, "step": 8795 }, { "epoch": 4.278176291793313, "grad_norm": 0.07208901257486133, "learning_rate": 4.463666259634469e-06, "loss": 0.512, "step": 8796 }, { "epoch": 4.278662613981763, "grad_norm": 0.07576886107125212, "learning_rate": 4.462715616131e-06, "loss": 0.5116, "step": 8797 }, { "epoch": 4.279148936170213, "grad_norm": 0.07659478748145507, "learning_rate": 4.461764992276264e-06, "loss": 0.5373, "step": 8798 }, { "epoch": 4.279635258358662, "grad_norm": 0.07590146217647921, "learning_rate": 4.460814388105027e-06, "loss": 0.5129, "step": 8799 }, { "epoch": 4.280121580547112, "grad_norm": 0.07245037718056008, "learning_rate": 4.459863803652052e-06, "loss": 0.5005, "step": 8800 }, { "epoch": 4.280607902735563, "grad_norm": 0.078890503219173, "learning_rate": 4.458913238952105e-06, "loss": 0.5514, "step": 8801 }, { "epoch": 4.281094224924012, "grad_norm": 0.07492741407117794, "learning_rate": 4.457962694039945e-06, "loss": 0.5171, "step": 8802 }, { "epoch": 4.281580547112462, "grad_norm": 0.07555930591964198, "learning_rate": 4.457012168950336e-06, "loss": 0.4818, "step": 8803 }, { "epoch": 4.282066869300912, "grad_norm": 0.0754832058900754, "learning_rate": 4.456061663718039e-06, "loss": 0.4948, "step": 8804 }, { "epoch": 4.282553191489361, "grad_norm": 0.07530560834202397, "learning_rate": 4.455111178377815e-06, "loss": 0.5411, "step": 8805 }, { "epoch": 4.283039513677812, "grad_norm": 0.07451560872676627, "learning_rate": 4.45416071296442e-06, "loss": 0.5386, "step": 8806 }, { "epoch": 4.283525835866262, "grad_norm": 0.07394584771918655, "learning_rate": 4.4532102675126185e-06, "loss": 0.5246, "step": 8807 }, { "epoch": 4.284012158054711, "grad_norm": 0.07491523655089026, "learning_rate": 4.452259842057164e-06, "loss": 0.5112, "step": 8808 }, { "epoch": 4.284498480243161, "grad_norm": 0.07220221987507201, "learning_rate": 4.451309436632818e-06, "loss": 0.5077, "step": 8809 }, { "epoch": 4.284984802431611, "grad_norm": 0.07658118758865776, "learning_rate": 4.450359051274332e-06, "loss": 0.5326, "step": 8810 }, { "epoch": 4.285471124620061, "grad_norm": 0.07797859669975359, "learning_rate": 4.449408686016467e-06, "loss": 0.594, "step": 8811 }, { "epoch": 4.285957446808511, "grad_norm": 0.0711988378757658, "learning_rate": 4.448458340893979e-06, "loss": 0.4862, "step": 8812 }, { "epoch": 4.286443768996961, "grad_norm": 0.07391444606359074, "learning_rate": 4.447508015941616e-06, "loss": 0.4448, "step": 8813 }, { "epoch": 4.28693009118541, "grad_norm": 0.07354575247285881, "learning_rate": 4.446557711194138e-06, "loss": 0.5024, "step": 8814 }, { "epoch": 4.28741641337386, "grad_norm": 0.07322822137937383, "learning_rate": 4.445607426686295e-06, "loss": 0.5132, "step": 8815 }, { "epoch": 4.2879027355623105, "grad_norm": 0.07224456244252213, "learning_rate": 4.444657162452842e-06, "loss": 0.464, "step": 8816 }, { "epoch": 4.28838905775076, "grad_norm": 0.07602056854868319, "learning_rate": 4.443706918528527e-06, "loss": 0.5102, "step": 8817 }, { "epoch": 4.28887537993921, "grad_norm": 0.07513831770111727, "learning_rate": 4.442756694948103e-06, "loss": 0.5173, "step": 8818 }, { "epoch": 4.2893617021276595, "grad_norm": 0.07056319942432344, "learning_rate": 4.441806491746319e-06, "loss": 0.4633, "step": 8819 }, { "epoch": 4.289848024316109, "grad_norm": 0.07639973987500419, "learning_rate": 4.440856308957928e-06, "loss": 0.5431, "step": 8820 }, { "epoch": 4.290334346504559, "grad_norm": 0.07381501319111135, "learning_rate": 4.439906146617674e-06, "loss": 0.5064, "step": 8821 }, { "epoch": 4.2908206686930095, "grad_norm": 0.07394732410366524, "learning_rate": 4.438956004760307e-06, "loss": 0.5459, "step": 8822 }, { "epoch": 4.291306990881459, "grad_norm": 0.0759200443771861, "learning_rate": 4.438005883420572e-06, "loss": 0.521, "step": 8823 }, { "epoch": 4.291793313069909, "grad_norm": 0.07572521699622785, "learning_rate": 4.437055782633221e-06, "loss": 0.5317, "step": 8824 }, { "epoch": 4.2922796352583585, "grad_norm": 0.07216002648877469, "learning_rate": 4.4361057024329926e-06, "loss": 0.4721, "step": 8825 }, { "epoch": 4.292765957446808, "grad_norm": 0.0730238582838513, "learning_rate": 4.435155642854637e-06, "loss": 0.5282, "step": 8826 }, { "epoch": 4.293252279635258, "grad_norm": 0.07549362622246844, "learning_rate": 4.434205603932895e-06, "loss": 0.4735, "step": 8827 }, { "epoch": 4.2937386018237085, "grad_norm": 0.07256035173566985, "learning_rate": 4.433255585702511e-06, "loss": 0.5368, "step": 8828 }, { "epoch": 4.294224924012158, "grad_norm": 0.07309291082885862, "learning_rate": 4.432305588198227e-06, "loss": 0.5233, "step": 8829 }, { "epoch": 4.294711246200608, "grad_norm": 0.07553199473785852, "learning_rate": 4.431355611454788e-06, "loss": 0.5143, "step": 8830 }, { "epoch": 4.2951975683890575, "grad_norm": 0.0749011396508686, "learning_rate": 4.43040565550693e-06, "loss": 0.4991, "step": 8831 }, { "epoch": 4.295683890577507, "grad_norm": 0.0743130066759057, "learning_rate": 4.429455720389397e-06, "loss": 0.5209, "step": 8832 }, { "epoch": 4.296170212765958, "grad_norm": 0.07351555334264333, "learning_rate": 4.428505806136927e-06, "loss": 0.4973, "step": 8833 }, { "epoch": 4.2966565349544075, "grad_norm": 0.07421232291265613, "learning_rate": 4.427555912784262e-06, "loss": 0.5108, "step": 8834 }, { "epoch": 4.297142857142857, "grad_norm": 0.07293126190840091, "learning_rate": 4.426606040366133e-06, "loss": 0.5062, "step": 8835 }, { "epoch": 4.297629179331307, "grad_norm": 0.07586631993850015, "learning_rate": 4.425656188917284e-06, "loss": 0.5331, "step": 8836 }, { "epoch": 4.2981155015197565, "grad_norm": 0.07402833946435487, "learning_rate": 4.42470635847245e-06, "loss": 0.5005, "step": 8837 }, { "epoch": 4.298601823708207, "grad_norm": 0.0744793687247343, "learning_rate": 4.423756549066364e-06, "loss": 0.5069, "step": 8838 }, { "epoch": 4.299088145896657, "grad_norm": 0.07410432112739444, "learning_rate": 4.422806760733764e-06, "loss": 0.5029, "step": 8839 }, { "epoch": 4.2995744680851065, "grad_norm": 0.0808500669230187, "learning_rate": 4.421856993509382e-06, "loss": 0.521, "step": 8840 }, { "epoch": 4.300060790273556, "grad_norm": 0.08017556864986378, "learning_rate": 4.420907247427954e-06, "loss": 0.5293, "step": 8841 }, { "epoch": 4.300547112462006, "grad_norm": 0.07343790461639645, "learning_rate": 4.419957522524209e-06, "loss": 0.4731, "step": 8842 }, { "epoch": 4.3010334346504555, "grad_norm": 0.07435980626915134, "learning_rate": 4.419007818832883e-06, "loss": 0.4867, "step": 8843 }, { "epoch": 4.301519756838906, "grad_norm": 0.07420557641036699, "learning_rate": 4.4180581363887024e-06, "loss": 0.536, "step": 8844 }, { "epoch": 4.302006079027356, "grad_norm": 0.07333483604257106, "learning_rate": 4.417108475226403e-06, "loss": 0.4959, "step": 8845 }, { "epoch": 4.3024924012158055, "grad_norm": 0.07692840014364763, "learning_rate": 4.41615883538071e-06, "loss": 0.5148, "step": 8846 }, { "epoch": 4.302978723404255, "grad_norm": 0.07503908909506307, "learning_rate": 4.415209216886354e-06, "loss": 0.5463, "step": 8847 }, { "epoch": 4.303465045592705, "grad_norm": 0.07222173315302174, "learning_rate": 4.414259619778062e-06, "loss": 0.5019, "step": 8848 }, { "epoch": 4.303951367781155, "grad_norm": 0.0748463928362814, "learning_rate": 4.413310044090563e-06, "loss": 0.5038, "step": 8849 }, { "epoch": 4.304437689969605, "grad_norm": 0.07246934839549578, "learning_rate": 4.412360489858581e-06, "loss": 0.5171, "step": 8850 }, { "epoch": 4.304924012158055, "grad_norm": 0.07532778899577577, "learning_rate": 4.4114109571168444e-06, "loss": 0.5284, "step": 8851 }, { "epoch": 4.305410334346504, "grad_norm": 0.07418476849790004, "learning_rate": 4.410461445900075e-06, "loss": 0.5427, "step": 8852 }, { "epoch": 4.305896656534954, "grad_norm": 0.0758987137726916, "learning_rate": 4.409511956242999e-06, "loss": 0.5599, "step": 8853 }, { "epoch": 4.306382978723404, "grad_norm": 0.07546546038024077, "learning_rate": 4.408562488180338e-06, "loss": 0.5226, "step": 8854 }, { "epoch": 4.306869300911854, "grad_norm": 0.07586808486344018, "learning_rate": 4.407613041746818e-06, "loss": 0.512, "step": 8855 }, { "epoch": 4.307355623100304, "grad_norm": 0.07588463291409742, "learning_rate": 4.406663616977156e-06, "loss": 0.5142, "step": 8856 }, { "epoch": 4.307841945288754, "grad_norm": 0.0746360354744008, "learning_rate": 4.405714213906075e-06, "loss": 0.5046, "step": 8857 }, { "epoch": 4.308328267477203, "grad_norm": 0.07636943917876411, "learning_rate": 4.404764832568296e-06, "loss": 0.5289, "step": 8858 }, { "epoch": 4.308814589665653, "grad_norm": 0.07787474247598106, "learning_rate": 4.403815472998539e-06, "loss": 0.5399, "step": 8859 }, { "epoch": 4.309300911854104, "grad_norm": 0.07916379328334402, "learning_rate": 4.402866135231518e-06, "loss": 0.5215, "step": 8860 }, { "epoch": 4.309787234042553, "grad_norm": 0.07177162684397294, "learning_rate": 4.401916819301956e-06, "loss": 0.4987, "step": 8861 }, { "epoch": 4.310273556231003, "grad_norm": 0.08212220367829835, "learning_rate": 4.400967525244565e-06, "loss": 0.5347, "step": 8862 }, { "epoch": 4.310759878419453, "grad_norm": 0.07157022332514935, "learning_rate": 4.400018253094065e-06, "loss": 0.4875, "step": 8863 }, { "epoch": 4.311246200607902, "grad_norm": 0.07344837469413698, "learning_rate": 4.399069002885171e-06, "loss": 0.5037, "step": 8864 }, { "epoch": 4.311732522796353, "grad_norm": 0.07500922821222847, "learning_rate": 4.398119774652596e-06, "loss": 0.517, "step": 8865 }, { "epoch": 4.312218844984803, "grad_norm": 0.07626514907512279, "learning_rate": 4.397170568431056e-06, "loss": 0.5396, "step": 8866 }, { "epoch": 4.312705167173252, "grad_norm": 0.07712850986800156, "learning_rate": 4.39622138425526e-06, "loss": 0.5436, "step": 8867 }, { "epoch": 4.313191489361702, "grad_norm": 0.0744705521369078, "learning_rate": 4.395272222159923e-06, "loss": 0.5247, "step": 8868 }, { "epoch": 4.313677811550152, "grad_norm": 0.07333172452682638, "learning_rate": 4.394323082179755e-06, "loss": 0.5175, "step": 8869 }, { "epoch": 4.314164133738601, "grad_norm": 0.07608902381385374, "learning_rate": 4.393373964349469e-06, "loss": 0.5087, "step": 8870 }, { "epoch": 4.314650455927052, "grad_norm": 0.07390671415557674, "learning_rate": 4.3924248687037705e-06, "loss": 0.5128, "step": 8871 }, { "epoch": 4.315136778115502, "grad_norm": 0.07595870352857209, "learning_rate": 4.391475795277371e-06, "loss": 0.5013, "step": 8872 }, { "epoch": 4.315623100303951, "grad_norm": 0.07485824968183762, "learning_rate": 4.390526744104978e-06, "loss": 0.5327, "step": 8873 }, { "epoch": 4.316109422492401, "grad_norm": 0.07543939663567027, "learning_rate": 4.389577715221301e-06, "loss": 0.532, "step": 8874 }, { "epoch": 4.316595744680851, "grad_norm": 0.07657137578278234, "learning_rate": 4.388628708661042e-06, "loss": 0.4964, "step": 8875 }, { "epoch": 4.317082066869301, "grad_norm": 0.07160964541451799, "learning_rate": 4.387679724458911e-06, "loss": 0.5087, "step": 8876 }, { "epoch": 4.317568389057751, "grad_norm": 0.0719238648155246, "learning_rate": 4.3867307626496085e-06, "loss": 0.5192, "step": 8877 }, { "epoch": 4.318054711246201, "grad_norm": 0.0710696258549487, "learning_rate": 4.385781823267841e-06, "loss": 0.4829, "step": 8878 }, { "epoch": 4.31854103343465, "grad_norm": 0.07172916800527629, "learning_rate": 4.384832906348311e-06, "loss": 0.4919, "step": 8879 }, { "epoch": 4.3190273556231, "grad_norm": 0.0750671578329595, "learning_rate": 4.383884011925723e-06, "loss": 0.5121, "step": 8880 }, { "epoch": 4.31951367781155, "grad_norm": 0.07255129956589293, "learning_rate": 4.382935140034775e-06, "loss": 0.4896, "step": 8881 }, { "epoch": 4.32, "grad_norm": 0.07452289204902597, "learning_rate": 4.38198629071017e-06, "loss": 0.5205, "step": 8882 }, { "epoch": 4.32048632218845, "grad_norm": 0.07469569549049279, "learning_rate": 4.3810374639866055e-06, "loss": 0.5344, "step": 8883 }, { "epoch": 4.3209726443769, "grad_norm": 0.07242040528457883, "learning_rate": 4.380088659898784e-06, "loss": 0.4895, "step": 8884 }, { "epoch": 4.321458966565349, "grad_norm": 0.07330868075861767, "learning_rate": 4.379139878481401e-06, "loss": 0.5048, "step": 8885 }, { "epoch": 4.321945288753799, "grad_norm": 0.07232087946460136, "learning_rate": 4.378191119769155e-06, "loss": 0.4647, "step": 8886 }, { "epoch": 4.32243161094225, "grad_norm": 0.07518700637971938, "learning_rate": 4.3772423837967415e-06, "loss": 0.5091, "step": 8887 }, { "epoch": 4.322917933130699, "grad_norm": 0.07534152298482906, "learning_rate": 4.3762936705988566e-06, "loss": 0.4925, "step": 8888 }, { "epoch": 4.323404255319149, "grad_norm": 0.07477823833745276, "learning_rate": 4.375344980210198e-06, "loss": 0.5331, "step": 8889 }, { "epoch": 4.323890577507599, "grad_norm": 0.07416128838319172, "learning_rate": 4.3743963126654555e-06, "loss": 0.5204, "step": 8890 }, { "epoch": 4.324376899696048, "grad_norm": 0.07471537661589851, "learning_rate": 4.373447667999326e-06, "loss": 0.5487, "step": 8891 }, { "epoch": 4.324863221884499, "grad_norm": 0.0755163967098278, "learning_rate": 4.372499046246497e-06, "loss": 0.518, "step": 8892 }, { "epoch": 4.325349544072949, "grad_norm": 0.07206053574538679, "learning_rate": 4.371550447441665e-06, "loss": 0.527, "step": 8893 }, { "epoch": 4.325835866261398, "grad_norm": 0.07172106928710374, "learning_rate": 4.370601871619517e-06, "loss": 0.466, "step": 8894 }, { "epoch": 4.326322188449848, "grad_norm": 0.07554363047926235, "learning_rate": 4.369653318814747e-06, "loss": 0.4929, "step": 8895 }, { "epoch": 4.326808510638298, "grad_norm": 0.07168538598565016, "learning_rate": 4.368704789062039e-06, "loss": 0.5249, "step": 8896 }, { "epoch": 4.327294832826747, "grad_norm": 0.07111451244903727, "learning_rate": 4.367756282396085e-06, "loss": 0.4887, "step": 8897 }, { "epoch": 4.327781155015198, "grad_norm": 0.07601813067434408, "learning_rate": 4.36680779885157e-06, "loss": 0.5241, "step": 8898 }, { "epoch": 4.328267477203648, "grad_norm": 0.07344152425421795, "learning_rate": 4.365859338463183e-06, "loss": 0.492, "step": 8899 }, { "epoch": 4.328753799392097, "grad_norm": 0.07234623875486963, "learning_rate": 4.364910901265607e-06, "loss": 0.4697, "step": 8900 }, { "epoch": 4.329240121580547, "grad_norm": 0.06980103112906447, "learning_rate": 4.363962487293528e-06, "loss": 0.4496, "step": 8901 }, { "epoch": 4.329726443768997, "grad_norm": 0.07795854916768889, "learning_rate": 4.3630140965816294e-06, "loss": 0.5255, "step": 8902 }, { "epoch": 4.330212765957447, "grad_norm": 0.0718304546288623, "learning_rate": 4.362065729164596e-06, "loss": 0.495, "step": 8903 }, { "epoch": 4.330699088145897, "grad_norm": 0.07424082195604742, "learning_rate": 4.3611173850771074e-06, "loss": 0.509, "step": 8904 }, { "epoch": 4.331185410334347, "grad_norm": 0.07410623741866641, "learning_rate": 4.360169064353848e-06, "loss": 0.5327, "step": 8905 }, { "epoch": 4.331671732522796, "grad_norm": 0.07232232562747383, "learning_rate": 4.359220767029495e-06, "loss": 0.4998, "step": 8906 }, { "epoch": 4.332158054711246, "grad_norm": 0.07230138257650034, "learning_rate": 4.35827249313873e-06, "loss": 0.4678, "step": 8907 }, { "epoch": 4.332644376899696, "grad_norm": 0.07729282519907332, "learning_rate": 4.357324242716231e-06, "loss": 0.5317, "step": 8908 }, { "epoch": 4.333130699088146, "grad_norm": 0.07657744428302635, "learning_rate": 4.356376015796678e-06, "loss": 0.5075, "step": 8909 }, { "epoch": 4.333617021276596, "grad_norm": 0.07766636248162703, "learning_rate": 4.355427812414745e-06, "loss": 0.5074, "step": 8910 }, { "epoch": 4.3341033434650456, "grad_norm": 0.0738651748799548, "learning_rate": 4.35447963260511e-06, "loss": 0.5013, "step": 8911 }, { "epoch": 4.334589665653495, "grad_norm": 0.07498968654727205, "learning_rate": 4.3535314764024475e-06, "loss": 0.505, "step": 8912 }, { "epoch": 4.335075987841945, "grad_norm": 0.07508829461148832, "learning_rate": 4.352583343841435e-06, "loss": 0.5594, "step": 8913 }, { "epoch": 4.3355623100303955, "grad_norm": 0.07542999148669198, "learning_rate": 4.351635234956741e-06, "loss": 0.5123, "step": 8914 }, { "epoch": 4.336048632218845, "grad_norm": 0.07253343119260115, "learning_rate": 4.350687149783042e-06, "loss": 0.5072, "step": 8915 }, { "epoch": 4.336534954407295, "grad_norm": 0.07381610424724026, "learning_rate": 4.34973908835501e-06, "loss": 0.5238, "step": 8916 }, { "epoch": 4.3370212765957445, "grad_norm": 0.07620926980982037, "learning_rate": 4.3487910507073124e-06, "loss": 0.5049, "step": 8917 }, { "epoch": 4.337507598784194, "grad_norm": 0.07303803359960734, "learning_rate": 4.347843036874625e-06, "loss": 0.4915, "step": 8918 }, { "epoch": 4.337993920972645, "grad_norm": 0.07296787195692302, "learning_rate": 4.346895046891612e-06, "loss": 0.5065, "step": 8919 }, { "epoch": 4.3384802431610945, "grad_norm": 0.07210109231810234, "learning_rate": 4.345947080792946e-06, "loss": 0.4655, "step": 8920 }, { "epoch": 4.338966565349544, "grad_norm": 0.07411960088034263, "learning_rate": 4.34499913861329e-06, "loss": 0.5232, "step": 8921 }, { "epoch": 4.339452887537994, "grad_norm": 0.07528378630595386, "learning_rate": 4.344051220387314e-06, "loss": 0.5348, "step": 8922 }, { "epoch": 4.3399392097264435, "grad_norm": 0.0785423273024407, "learning_rate": 4.343103326149682e-06, "loss": 0.5235, "step": 8923 }, { "epoch": 4.340425531914893, "grad_norm": 0.0752622483213002, "learning_rate": 4.342155455935063e-06, "loss": 0.4896, "step": 8924 }, { "epoch": 4.340911854103344, "grad_norm": 0.07493896221957322, "learning_rate": 4.341207609778114e-06, "loss": 0.5302, "step": 8925 }, { "epoch": 4.3413981762917935, "grad_norm": 0.0752287306881958, "learning_rate": 4.340259787713505e-06, "loss": 0.4989, "step": 8926 }, { "epoch": 4.341884498480243, "grad_norm": 0.07617569671450834, "learning_rate": 4.339311989775893e-06, "loss": 0.5205, "step": 8927 }, { "epoch": 4.342370820668693, "grad_norm": 0.07386382882538522, "learning_rate": 4.338364215999944e-06, "loss": 0.491, "step": 8928 }, { "epoch": 4.3428571428571425, "grad_norm": 0.07608096331178003, "learning_rate": 4.337416466420313e-06, "loss": 0.519, "step": 8929 }, { "epoch": 4.343343465045593, "grad_norm": 0.0739610071552224, "learning_rate": 4.3364687410716665e-06, "loss": 0.4859, "step": 8930 }, { "epoch": 4.343829787234043, "grad_norm": 0.07344625314616673, "learning_rate": 4.335521039988657e-06, "loss": 0.5178, "step": 8931 }, { "epoch": 4.3443161094224925, "grad_norm": 0.0764110206964944, "learning_rate": 4.334573363205946e-06, "loss": 0.519, "step": 8932 }, { "epoch": 4.344802431610942, "grad_norm": 0.07591205649450931, "learning_rate": 4.333625710758188e-06, "loss": 0.5117, "step": 8933 }, { "epoch": 4.345288753799392, "grad_norm": 0.07616932213990428, "learning_rate": 4.332678082680043e-06, "loss": 0.5639, "step": 8934 }, { "epoch": 4.3457750759878415, "grad_norm": 0.07522738759425052, "learning_rate": 4.331730479006162e-06, "loss": 0.5384, "step": 8935 }, { "epoch": 4.346261398176292, "grad_norm": 0.07399524553376609, "learning_rate": 4.330782899771201e-06, "loss": 0.4959, "step": 8936 }, { "epoch": 4.346747720364742, "grad_norm": 0.07395539774238039, "learning_rate": 4.329835345009813e-06, "loss": 0.5462, "step": 8937 }, { "epoch": 4.3472340425531915, "grad_norm": 0.076310336152694, "learning_rate": 4.328887814756653e-06, "loss": 0.5394, "step": 8938 }, { "epoch": 4.347720364741641, "grad_norm": 0.07363981733818392, "learning_rate": 4.327940309046368e-06, "loss": 0.5038, "step": 8939 }, { "epoch": 4.348206686930091, "grad_norm": 0.0720931778531534, "learning_rate": 4.326992827913613e-06, "loss": 0.5087, "step": 8940 }, { "epoch": 4.348693009118541, "grad_norm": 0.07350593493429489, "learning_rate": 4.326045371393034e-06, "loss": 0.5237, "step": 8941 }, { "epoch": 4.349179331306991, "grad_norm": 0.07323029716157982, "learning_rate": 4.3250979395192834e-06, "loss": 0.5078, "step": 8942 }, { "epoch": 4.349665653495441, "grad_norm": 0.07414676450484353, "learning_rate": 4.324150532327009e-06, "loss": 0.4922, "step": 8943 }, { "epoch": 4.3501519756838904, "grad_norm": 0.07132569584618621, "learning_rate": 4.323203149850855e-06, "loss": 0.4983, "step": 8944 }, { "epoch": 4.35063829787234, "grad_norm": 0.07358142836906066, "learning_rate": 4.322255792125471e-06, "loss": 0.5077, "step": 8945 }, { "epoch": 4.351124620060791, "grad_norm": 0.0735995225088054, "learning_rate": 4.3213084591854984e-06, "loss": 0.5218, "step": 8946 }, { "epoch": 4.35161094224924, "grad_norm": 0.07513396594071299, "learning_rate": 4.3203611510655845e-06, "loss": 0.5021, "step": 8947 }, { "epoch": 4.35209726443769, "grad_norm": 0.07529187866288382, "learning_rate": 4.319413867800372e-06, "loss": 0.5214, "step": 8948 }, { "epoch": 4.35258358662614, "grad_norm": 0.07381393236850418, "learning_rate": 4.318466609424505e-06, "loss": 0.4673, "step": 8949 }, { "epoch": 4.353069908814589, "grad_norm": 0.07677864929108318, "learning_rate": 4.317519375972622e-06, "loss": 0.5617, "step": 8950 }, { "epoch": 4.353556231003039, "grad_norm": 0.0740033176650963, "learning_rate": 4.316572167479366e-06, "loss": 0.4654, "step": 8951 }, { "epoch": 4.35404255319149, "grad_norm": 0.07532289408922148, "learning_rate": 4.315624983979375e-06, "loss": 0.5114, "step": 8952 }, { "epoch": 4.354528875379939, "grad_norm": 0.07369310003524242, "learning_rate": 4.314677825507293e-06, "loss": 0.502, "step": 8953 }, { "epoch": 4.355015197568389, "grad_norm": 0.077197710604692, "learning_rate": 4.313730692097751e-06, "loss": 0.5725, "step": 8954 }, { "epoch": 4.355501519756839, "grad_norm": 0.07713828472241255, "learning_rate": 4.31278358378539e-06, "loss": 0.5291, "step": 8955 }, { "epoch": 4.355987841945288, "grad_norm": 0.07558809496618385, "learning_rate": 4.311836500604846e-06, "loss": 0.5169, "step": 8956 }, { "epoch": 4.356474164133739, "grad_norm": 0.07539873407292505, "learning_rate": 4.310889442590755e-06, "loss": 0.5115, "step": 8957 }, { "epoch": 4.356960486322189, "grad_norm": 0.072277900988303, "learning_rate": 4.309942409777747e-06, "loss": 0.4769, "step": 8958 }, { "epoch": 4.357446808510638, "grad_norm": 0.07463996880449425, "learning_rate": 4.308995402200462e-06, "loss": 0.4942, "step": 8959 }, { "epoch": 4.357933130699088, "grad_norm": 0.07230046070225032, "learning_rate": 4.308048419893527e-06, "loss": 0.537, "step": 8960 }, { "epoch": 4.358419452887538, "grad_norm": 0.0745794181193096, "learning_rate": 4.307101462891576e-06, "loss": 0.5064, "step": 8961 }, { "epoch": 4.358905775075987, "grad_norm": 0.07383499078362998, "learning_rate": 4.306154531229239e-06, "loss": 0.5221, "step": 8962 }, { "epoch": 4.359392097264438, "grad_norm": 0.07717260718233843, "learning_rate": 4.305207624941148e-06, "loss": 0.5121, "step": 8963 }, { "epoch": 4.359878419452888, "grad_norm": 0.07366586505225742, "learning_rate": 4.304260744061928e-06, "loss": 0.5302, "step": 8964 }, { "epoch": 4.360364741641337, "grad_norm": 0.07665854726318598, "learning_rate": 4.303313888626208e-06, "loss": 0.4999, "step": 8965 }, { "epoch": 4.360851063829787, "grad_norm": 0.07451813847677896, "learning_rate": 4.302367058668617e-06, "loss": 0.521, "step": 8966 }, { "epoch": 4.361337386018237, "grad_norm": 0.0752899722033896, "learning_rate": 4.3014202542237785e-06, "loss": 0.5177, "step": 8967 }, { "epoch": 4.361823708206687, "grad_norm": 0.07849829279744301, "learning_rate": 4.3004734753263205e-06, "loss": 0.5551, "step": 8968 }, { "epoch": 4.362310030395137, "grad_norm": 0.07444937303631532, "learning_rate": 4.2995267220108634e-06, "loss": 0.5061, "step": 8969 }, { "epoch": 4.362796352583587, "grad_norm": 0.074694372403024, "learning_rate": 4.298579994312034e-06, "loss": 0.556, "step": 8970 }, { "epoch": 4.363282674772036, "grad_norm": 0.0717691317317463, "learning_rate": 4.2976332922644515e-06, "loss": 0.4737, "step": 8971 }, { "epoch": 4.363768996960486, "grad_norm": 0.07660514643130598, "learning_rate": 4.296686615902739e-06, "loss": 0.5236, "step": 8972 }, { "epoch": 4.364255319148937, "grad_norm": 0.07405669123126633, "learning_rate": 4.295739965261516e-06, "loss": 0.4998, "step": 8973 }, { "epoch": 4.364741641337386, "grad_norm": 0.07370248275717206, "learning_rate": 4.294793340375405e-06, "loss": 0.4899, "step": 8974 }, { "epoch": 4.365227963525836, "grad_norm": 0.07612564587026457, "learning_rate": 4.293846741279019e-06, "loss": 0.5209, "step": 8975 }, { "epoch": 4.365714285714286, "grad_norm": 0.07453807468611076, "learning_rate": 4.292900168006979e-06, "loss": 0.545, "step": 8976 }, { "epoch": 4.366200607902735, "grad_norm": 0.07406787688920971, "learning_rate": 4.291953620593902e-06, "loss": 0.5315, "step": 8977 }, { "epoch": 4.366686930091185, "grad_norm": 0.07537165161300213, "learning_rate": 4.291007099074403e-06, "loss": 0.478, "step": 8978 }, { "epoch": 4.367173252279636, "grad_norm": 0.0706841018736068, "learning_rate": 4.290060603483095e-06, "loss": 0.4843, "step": 8979 }, { "epoch": 4.367659574468085, "grad_norm": 0.07394689173604536, "learning_rate": 4.289114133854594e-06, "loss": 0.4929, "step": 8980 }, { "epoch": 4.368145896656535, "grad_norm": 0.07959312816235899, "learning_rate": 4.288167690223512e-06, "loss": 0.5236, "step": 8981 }, { "epoch": 4.368632218844985, "grad_norm": 0.07215387193545882, "learning_rate": 4.287221272624462e-06, "loss": 0.508, "step": 8982 }, { "epoch": 4.369118541033434, "grad_norm": 0.07349109961906883, "learning_rate": 4.286274881092053e-06, "loss": 0.5188, "step": 8983 }, { "epoch": 4.369604863221885, "grad_norm": 0.07671258711287796, "learning_rate": 4.285328515660897e-06, "loss": 0.4767, "step": 8984 }, { "epoch": 4.370091185410335, "grad_norm": 0.0745084888237647, "learning_rate": 4.2843821763656e-06, "loss": 0.4997, "step": 8985 }, { "epoch": 4.370577507598784, "grad_norm": 0.07575426080794644, "learning_rate": 4.283435863240773e-06, "loss": 0.515, "step": 8986 }, { "epoch": 4.371063829787234, "grad_norm": 0.07716065604557355, "learning_rate": 4.282489576321021e-06, "loss": 0.5146, "step": 8987 }, { "epoch": 4.371550151975684, "grad_norm": 0.07492100670539603, "learning_rate": 4.281543315640953e-06, "loss": 0.5419, "step": 8988 }, { "epoch": 4.372036474164133, "grad_norm": 0.07544483639828305, "learning_rate": 4.280597081235171e-06, "loss": 0.4932, "step": 8989 }, { "epoch": 4.372522796352584, "grad_norm": 0.07659431445285168, "learning_rate": 4.279650873138281e-06, "loss": 0.5369, "step": 8990 }, { "epoch": 4.373009118541034, "grad_norm": 0.07900993196104746, "learning_rate": 4.278704691384885e-06, "loss": 0.5497, "step": 8991 }, { "epoch": 4.373495440729483, "grad_norm": 0.07372706145496581, "learning_rate": 4.277758536009588e-06, "loss": 0.4863, "step": 8992 }, { "epoch": 4.373981762917933, "grad_norm": 0.07524489423047519, "learning_rate": 4.2768124070469875e-06, "loss": 0.5547, "step": 8993 }, { "epoch": 4.374468085106383, "grad_norm": 0.07436807266822268, "learning_rate": 4.2758663045316866e-06, "loss": 0.5287, "step": 8994 }, { "epoch": 4.374954407294833, "grad_norm": 0.07330239069204415, "learning_rate": 4.274920228498284e-06, "loss": 0.4791, "step": 8995 }, { "epoch": 4.375440729483283, "grad_norm": 0.07651491508045516, "learning_rate": 4.273974178981377e-06, "loss": 0.5091, "step": 8996 }, { "epoch": 4.375927051671733, "grad_norm": 0.07290540102598252, "learning_rate": 4.273028156015566e-06, "loss": 0.5051, "step": 8997 }, { "epoch": 4.376413373860182, "grad_norm": 0.0728792059861237, "learning_rate": 4.2720821596354444e-06, "loss": 0.4997, "step": 8998 }, { "epoch": 4.376899696048632, "grad_norm": 0.07491539302081295, "learning_rate": 4.271136189875611e-06, "loss": 0.5254, "step": 8999 }, { "epoch": 4.3773860182370825, "grad_norm": 0.07318635320704184, "learning_rate": 4.270190246770656e-06, "loss": 0.4871, "step": 9000 }, { "epoch": 4.377872340425532, "grad_norm": 0.07266173846733405, "learning_rate": 4.2692443303551755e-06, "loss": 0.5061, "step": 9001 }, { "epoch": 4.378358662613982, "grad_norm": 0.07563284034120024, "learning_rate": 4.268298440663762e-06, "loss": 0.5251, "step": 9002 }, { "epoch": 4.378844984802432, "grad_norm": 0.07510102872727847, "learning_rate": 4.267352577731008e-06, "loss": 0.5344, "step": 9003 }, { "epoch": 4.379331306990881, "grad_norm": 0.07619567204533358, "learning_rate": 4.266406741591502e-06, "loss": 0.5376, "step": 9004 }, { "epoch": 4.379817629179331, "grad_norm": 0.07965380077124826, "learning_rate": 4.2654609322798345e-06, "loss": 0.5328, "step": 9005 }, { "epoch": 4.3803039513677815, "grad_norm": 0.07503217431703178, "learning_rate": 4.264515149830595e-06, "loss": 0.5251, "step": 9006 }, { "epoch": 4.380790273556231, "grad_norm": 0.07270153742412923, "learning_rate": 4.263569394278371e-06, "loss": 0.5059, "step": 9007 }, { "epoch": 4.381276595744681, "grad_norm": 0.07763096972220519, "learning_rate": 4.262623665657748e-06, "loss": 0.5616, "step": 9008 }, { "epoch": 4.3817629179331306, "grad_norm": 0.07140433487202989, "learning_rate": 4.261677964003313e-06, "loss": 0.4779, "step": 9009 }, { "epoch": 4.38224924012158, "grad_norm": 0.07042517833143659, "learning_rate": 4.2607322893496495e-06, "loss": 0.4458, "step": 9010 }, { "epoch": 4.382735562310031, "grad_norm": 0.07833307615913485, "learning_rate": 4.259786641731344e-06, "loss": 0.5316, "step": 9011 }, { "epoch": 4.3832218844984805, "grad_norm": 0.07704773043950978, "learning_rate": 4.2588410211829755e-06, "loss": 0.5075, "step": 9012 }, { "epoch": 4.38370820668693, "grad_norm": 0.07293520348608629, "learning_rate": 4.257895427739129e-06, "loss": 0.5056, "step": 9013 }, { "epoch": 4.38419452887538, "grad_norm": 0.07649131670078893, "learning_rate": 4.256949861434382e-06, "loss": 0.5154, "step": 9014 }, { "epoch": 4.3846808510638295, "grad_norm": 0.07279869677145666, "learning_rate": 4.256004322303318e-06, "loss": 0.4945, "step": 9015 }, { "epoch": 4.385167173252279, "grad_norm": 0.07657900549816428, "learning_rate": 4.255058810380512e-06, "loss": 0.5043, "step": 9016 }, { "epoch": 4.38565349544073, "grad_norm": 0.07543383162759293, "learning_rate": 4.254113325700547e-06, "loss": 0.5435, "step": 9017 }, { "epoch": 4.3861398176291795, "grad_norm": 0.07496872333988061, "learning_rate": 4.253167868297993e-06, "loss": 0.5104, "step": 9018 }, { "epoch": 4.386626139817629, "grad_norm": 0.07456958186471606, "learning_rate": 4.25222243820743e-06, "loss": 0.526, "step": 9019 }, { "epoch": 4.387112462006079, "grad_norm": 0.07576880618808729, "learning_rate": 4.251277035463433e-06, "loss": 0.5437, "step": 9020 }, { "epoch": 4.3875987841945285, "grad_norm": 0.07671835050407548, "learning_rate": 4.250331660100574e-06, "loss": 0.5402, "step": 9021 }, { "epoch": 4.388085106382979, "grad_norm": 0.07370104775995952, "learning_rate": 4.24938631215343e-06, "loss": 0.4894, "step": 9022 }, { "epoch": 4.388571428571429, "grad_norm": 0.07142073350530018, "learning_rate": 4.248440991656566e-06, "loss": 0.4923, "step": 9023 }, { "epoch": 4.3890577507598785, "grad_norm": 0.07128914650917673, "learning_rate": 4.247495698644559e-06, "loss": 0.471, "step": 9024 }, { "epoch": 4.389544072948328, "grad_norm": 0.07455547258875524, "learning_rate": 4.246550433151973e-06, "loss": 0.4934, "step": 9025 }, { "epoch": 4.390030395136778, "grad_norm": 0.07402123021651788, "learning_rate": 4.245605195213383e-06, "loss": 0.4964, "step": 9026 }, { "epoch": 4.390516717325228, "grad_norm": 0.0723470715913622, "learning_rate": 4.244659984863352e-06, "loss": 0.5222, "step": 9027 }, { "epoch": 4.391003039513678, "grad_norm": 0.07452849188939858, "learning_rate": 4.24371480213645e-06, "loss": 0.5106, "step": 9028 }, { "epoch": 4.391489361702128, "grad_norm": 0.07590476558689858, "learning_rate": 4.24276964706724e-06, "loss": 0.5182, "step": 9029 }, { "epoch": 4.3919756838905775, "grad_norm": 0.07355395019148747, "learning_rate": 4.241824519690288e-06, "loss": 0.5071, "step": 9030 }, { "epoch": 4.392462006079027, "grad_norm": 0.07554176845373808, "learning_rate": 4.240879420040158e-06, "loss": 0.4862, "step": 9031 }, { "epoch": 4.392948328267477, "grad_norm": 0.0741586743298622, "learning_rate": 4.239934348151413e-06, "loss": 0.5015, "step": 9032 }, { "epoch": 4.393434650455927, "grad_norm": 0.07362941654653629, "learning_rate": 4.2389893040586136e-06, "loss": 0.4873, "step": 9033 }, { "epoch": 4.393920972644377, "grad_norm": 0.07226512902417866, "learning_rate": 4.238044287796322e-06, "loss": 0.4856, "step": 9034 }, { "epoch": 4.394407294832827, "grad_norm": 0.07377129558844482, "learning_rate": 4.237099299399095e-06, "loss": 0.522, "step": 9035 }, { "epoch": 4.3948936170212765, "grad_norm": 0.07360844929935485, "learning_rate": 4.236154338901496e-06, "loss": 0.5206, "step": 9036 }, { "epoch": 4.395379939209726, "grad_norm": 0.07534835206032656, "learning_rate": 4.235209406338078e-06, "loss": 0.5283, "step": 9037 }, { "epoch": 4.395866261398176, "grad_norm": 0.07715964041408652, "learning_rate": 4.234264501743401e-06, "loss": 0.5056, "step": 9038 }, { "epoch": 4.396352583586626, "grad_norm": 0.07436895408191456, "learning_rate": 4.233319625152017e-06, "loss": 0.5255, "step": 9039 }, { "epoch": 4.396838905775076, "grad_norm": 0.07375793144052216, "learning_rate": 4.232374776598483e-06, "loss": 0.5257, "step": 9040 }, { "epoch": 4.397325227963526, "grad_norm": 0.07441814047501576, "learning_rate": 4.231429956117353e-06, "loss": 0.5095, "step": 9041 }, { "epoch": 4.3978115501519754, "grad_norm": 0.07523545678108617, "learning_rate": 4.23048516374318e-06, "loss": 0.4825, "step": 9042 }, { "epoch": 4.398297872340425, "grad_norm": 0.07379752232221334, "learning_rate": 4.2295403995105114e-06, "loss": 0.5019, "step": 9043 }, { "epoch": 4.398784194528876, "grad_norm": 0.07290779688624605, "learning_rate": 4.228595663453902e-06, "loss": 0.4999, "step": 9044 }, { "epoch": 4.399270516717325, "grad_norm": 0.07228222378426806, "learning_rate": 4.227650955607898e-06, "loss": 0.46, "step": 9045 }, { "epoch": 4.399756838905775, "grad_norm": 0.08030212702038708, "learning_rate": 4.22670627600705e-06, "loss": 0.5914, "step": 9046 }, { "epoch": 4.400243161094225, "grad_norm": 0.07762499667310019, "learning_rate": 4.225761624685907e-06, "loss": 0.5488, "step": 9047 }, { "epoch": 4.400729483282674, "grad_norm": 0.07455248583218539, "learning_rate": 4.224817001679011e-06, "loss": 0.5573, "step": 9048 }, { "epoch": 4.401215805471125, "grad_norm": 0.07747893884142715, "learning_rate": 4.2238724070209106e-06, "loss": 0.5025, "step": 9049 }, { "epoch": 4.401702127659575, "grad_norm": 0.0731013291850829, "learning_rate": 4.222927840746147e-06, "loss": 0.4728, "step": 9050 }, { "epoch": 4.402188449848024, "grad_norm": 0.07551629917769796, "learning_rate": 4.221983302889268e-06, "loss": 0.4967, "step": 9051 }, { "epoch": 4.402674772036474, "grad_norm": 0.07470691787108497, "learning_rate": 4.2210387934848115e-06, "loss": 0.5216, "step": 9052 }, { "epoch": 4.403161094224924, "grad_norm": 0.0741162819899098, "learning_rate": 4.220094312567322e-06, "loss": 0.5122, "step": 9053 }, { "epoch": 4.403647416413374, "grad_norm": 0.0745271948599348, "learning_rate": 4.219149860171335e-06, "loss": 0.5226, "step": 9054 }, { "epoch": 4.404133738601824, "grad_norm": 0.07332281977617179, "learning_rate": 4.218205436331394e-06, "loss": 0.4995, "step": 9055 }, { "epoch": 4.404620060790274, "grad_norm": 0.0767985456368329, "learning_rate": 4.217261041082034e-06, "loss": 0.5438, "step": 9056 }, { "epoch": 4.405106382978723, "grad_norm": 0.07416760821963926, "learning_rate": 4.216316674457796e-06, "loss": 0.531, "step": 9057 }, { "epoch": 4.405592705167173, "grad_norm": 0.07336749337127642, "learning_rate": 4.215372336493211e-06, "loss": 0.4996, "step": 9058 }, { "epoch": 4.406079027355623, "grad_norm": 0.07550463856438298, "learning_rate": 4.214428027222816e-06, "loss": 0.4821, "step": 9059 }, { "epoch": 4.406565349544073, "grad_norm": 0.07507610205327063, "learning_rate": 4.2134837466811455e-06, "loss": 0.5056, "step": 9060 }, { "epoch": 4.407051671732523, "grad_norm": 0.07563393938130927, "learning_rate": 4.212539494902734e-06, "loss": 0.4904, "step": 9061 }, { "epoch": 4.407537993920973, "grad_norm": 0.07219387953873002, "learning_rate": 4.211595271922108e-06, "loss": 0.4859, "step": 9062 }, { "epoch": 4.408024316109422, "grad_norm": 0.07428949609653966, "learning_rate": 4.210651077773803e-06, "loss": 0.5054, "step": 9063 }, { "epoch": 4.408510638297872, "grad_norm": 0.07332929695079349, "learning_rate": 4.209706912492345e-06, "loss": 0.5305, "step": 9064 }, { "epoch": 4.408996960486322, "grad_norm": 0.07254212468718906, "learning_rate": 4.208762776112265e-06, "loss": 0.5064, "step": 9065 }, { "epoch": 4.409483282674772, "grad_norm": 0.07222729982152454, "learning_rate": 4.207818668668089e-06, "loss": 0.4754, "step": 9066 }, { "epoch": 4.409969604863222, "grad_norm": 0.07224255179255343, "learning_rate": 4.2068745901943465e-06, "loss": 0.4936, "step": 9067 }, { "epoch": 4.410455927051672, "grad_norm": 0.07698495376583506, "learning_rate": 4.205930540725558e-06, "loss": 0.5543, "step": 9068 }, { "epoch": 4.410942249240121, "grad_norm": 0.07485188419614182, "learning_rate": 4.204986520296251e-06, "loss": 0.4787, "step": 9069 }, { "epoch": 4.411428571428571, "grad_norm": 0.07241756838664043, "learning_rate": 4.204042528940948e-06, "loss": 0.481, "step": 9070 }, { "epoch": 4.411914893617022, "grad_norm": 0.07401808347233409, "learning_rate": 4.203098566694174e-06, "loss": 0.4906, "step": 9071 }, { "epoch": 4.412401215805471, "grad_norm": 0.07472563348904236, "learning_rate": 4.202154633590444e-06, "loss": 0.5118, "step": 9072 }, { "epoch": 4.412887537993921, "grad_norm": 0.07463649824438731, "learning_rate": 4.201210729664282e-06, "loss": 0.4953, "step": 9073 }, { "epoch": 4.413373860182371, "grad_norm": 0.07461440640088356, "learning_rate": 4.200266854950208e-06, "loss": 0.532, "step": 9074 }, { "epoch": 4.41386018237082, "grad_norm": 0.07872846831950941, "learning_rate": 4.1993230094827365e-06, "loss": 0.5259, "step": 9075 }, { "epoch": 4.414346504559271, "grad_norm": 0.0748588363741536, "learning_rate": 4.198379193296389e-06, "loss": 0.5273, "step": 9076 }, { "epoch": 4.414832826747721, "grad_norm": 0.07676513184315509, "learning_rate": 4.197435406425676e-06, "loss": 0.5221, "step": 9077 }, { "epoch": 4.41531914893617, "grad_norm": 0.07418360592519829, "learning_rate": 4.196491648905118e-06, "loss": 0.5148, "step": 9078 }, { "epoch": 4.41580547112462, "grad_norm": 0.07492311173844467, "learning_rate": 4.195547920769222e-06, "loss": 0.4938, "step": 9079 }, { "epoch": 4.41629179331307, "grad_norm": 0.07540831221269857, "learning_rate": 4.194604222052507e-06, "loss": 0.5545, "step": 9080 }, { "epoch": 4.41677811550152, "grad_norm": 0.07675563697465707, "learning_rate": 4.193660552789479e-06, "loss": 0.4857, "step": 9081 }, { "epoch": 4.41726443768997, "grad_norm": 0.07457433600622512, "learning_rate": 4.192716913014653e-06, "loss": 0.4946, "step": 9082 }, { "epoch": 4.41775075987842, "grad_norm": 0.07376700804617603, "learning_rate": 4.191773302762534e-06, "loss": 0.5152, "step": 9083 }, { "epoch": 4.418237082066869, "grad_norm": 0.07308860634821253, "learning_rate": 4.1908297220676345e-06, "loss": 0.5163, "step": 9084 }, { "epoch": 4.418723404255319, "grad_norm": 0.07214967881111042, "learning_rate": 4.189886170964458e-06, "loss": 0.4916, "step": 9085 }, { "epoch": 4.419209726443769, "grad_norm": 0.07577197842988086, "learning_rate": 4.188942649487514e-06, "loss": 0.556, "step": 9086 }, { "epoch": 4.419696048632219, "grad_norm": 0.07608259734300477, "learning_rate": 4.187999157671304e-06, "loss": 0.5152, "step": 9087 }, { "epoch": 4.420182370820669, "grad_norm": 0.0731186727929142, "learning_rate": 4.187055695550335e-06, "loss": 0.5331, "step": 9088 }, { "epoch": 4.420668693009119, "grad_norm": 0.07718537948198227, "learning_rate": 4.186112263159108e-06, "loss": 0.5044, "step": 9089 }, { "epoch": 4.421155015197568, "grad_norm": 0.07662218528622265, "learning_rate": 4.185168860532127e-06, "loss": 0.51, "step": 9090 }, { "epoch": 4.421641337386018, "grad_norm": 0.07394124837735269, "learning_rate": 4.184225487703888e-06, "loss": 0.5229, "step": 9091 }, { "epoch": 4.422127659574468, "grad_norm": 0.0719442143277997, "learning_rate": 4.183282144708897e-06, "loss": 0.5093, "step": 9092 }, { "epoch": 4.422613981762918, "grad_norm": 0.0761258445087453, "learning_rate": 4.182338831581646e-06, "loss": 0.4985, "step": 9093 }, { "epoch": 4.423100303951368, "grad_norm": 0.07419935530040028, "learning_rate": 4.181395548356636e-06, "loss": 0.4965, "step": 9094 }, { "epoch": 4.423586626139818, "grad_norm": 0.07374791817061643, "learning_rate": 4.180452295068363e-06, "loss": 0.5331, "step": 9095 }, { "epoch": 4.424072948328267, "grad_norm": 0.07711191890342523, "learning_rate": 4.179509071751323e-06, "loss": 0.5105, "step": 9096 }, { "epoch": 4.424559270516717, "grad_norm": 0.07513900914892778, "learning_rate": 4.1785658784400076e-06, "loss": 0.5275, "step": 9097 }, { "epoch": 4.4250455927051675, "grad_norm": 0.07296096693191595, "learning_rate": 4.177622715168911e-06, "loss": 0.5221, "step": 9098 }, { "epoch": 4.425531914893617, "grad_norm": 0.07262837212831806, "learning_rate": 4.176679581972526e-06, "loss": 0.487, "step": 9099 }, { "epoch": 4.426018237082067, "grad_norm": 0.07586383156011682, "learning_rate": 4.175736478885342e-06, "loss": 0.5459, "step": 9100 }, { "epoch": 4.426504559270517, "grad_norm": 0.07458208431354382, "learning_rate": 4.1747934059418514e-06, "loss": 0.5236, "step": 9101 }, { "epoch": 4.426990881458966, "grad_norm": 0.07112016482307837, "learning_rate": 4.173850363176539e-06, "loss": 0.479, "step": 9102 }, { "epoch": 4.427477203647417, "grad_norm": 0.07387071893359579, "learning_rate": 4.172907350623896e-06, "loss": 0.5217, "step": 9103 }, { "epoch": 4.4279635258358665, "grad_norm": 0.07329809776277209, "learning_rate": 4.171964368318404e-06, "loss": 0.4707, "step": 9104 }, { "epoch": 4.428449848024316, "grad_norm": 0.07696985784682245, "learning_rate": 4.171021416294555e-06, "loss": 0.5128, "step": 9105 }, { "epoch": 4.428936170212766, "grad_norm": 0.07378305836485363, "learning_rate": 4.170078494586826e-06, "loss": 0.4887, "step": 9106 }, { "epoch": 4.4294224924012155, "grad_norm": 0.0778186705889417, "learning_rate": 4.169135603229707e-06, "loss": 0.5055, "step": 9107 }, { "epoch": 4.429908814589666, "grad_norm": 0.07586478389881635, "learning_rate": 4.168192742257674e-06, "loss": 0.5213, "step": 9108 }, { "epoch": 4.430395136778116, "grad_norm": 0.07335612360719625, "learning_rate": 4.16724991170521e-06, "loss": 0.5303, "step": 9109 }, { "epoch": 4.4308814589665655, "grad_norm": 0.07657735512603027, "learning_rate": 4.166307111606795e-06, "loss": 0.5249, "step": 9110 }, { "epoch": 4.431367781155015, "grad_norm": 0.07205231498610169, "learning_rate": 4.1653643419969104e-06, "loss": 0.4908, "step": 9111 }, { "epoch": 4.431854103343465, "grad_norm": 0.07438732692377771, "learning_rate": 4.164421602910028e-06, "loss": 0.5031, "step": 9112 }, { "epoch": 4.4323404255319145, "grad_norm": 0.07292292153829716, "learning_rate": 4.1634788943806286e-06, "loss": 0.4933, "step": 9113 }, { "epoch": 4.432826747720365, "grad_norm": 0.07272489464759248, "learning_rate": 4.162536216443185e-06, "loss": 0.505, "step": 9114 }, { "epoch": 4.433313069908815, "grad_norm": 0.075104762108542, "learning_rate": 4.161593569132175e-06, "loss": 0.5465, "step": 9115 }, { "epoch": 4.4337993920972645, "grad_norm": 0.0707235435697312, "learning_rate": 4.1606509524820666e-06, "loss": 0.4663, "step": 9116 }, { "epoch": 4.434285714285714, "grad_norm": 0.0727043044237745, "learning_rate": 4.159708366527337e-06, "loss": 0.5259, "step": 9117 }, { "epoch": 4.434772036474164, "grad_norm": 0.07320888965153415, "learning_rate": 4.1587658113024505e-06, "loss": 0.5052, "step": 9118 }, { "epoch": 4.4352583586626135, "grad_norm": 0.07528755572832935, "learning_rate": 4.157823286841882e-06, "loss": 0.5339, "step": 9119 }, { "epoch": 4.435744680851064, "grad_norm": 0.07464732244225625, "learning_rate": 4.156880793180098e-06, "loss": 0.5035, "step": 9120 }, { "epoch": 4.436231003039514, "grad_norm": 0.0725134205794363, "learning_rate": 4.155938330351569e-06, "loss": 0.4872, "step": 9121 }, { "epoch": 4.4367173252279635, "grad_norm": 0.07390102078467314, "learning_rate": 4.154995898390756e-06, "loss": 0.5077, "step": 9122 }, { "epoch": 4.437203647416413, "grad_norm": 0.07638143174560055, "learning_rate": 4.1540534973321275e-06, "loss": 0.5516, "step": 9123 }, { "epoch": 4.437689969604863, "grad_norm": 0.07546406710573061, "learning_rate": 4.153111127210147e-06, "loss": 0.5159, "step": 9124 }, { "epoch": 4.438176291793313, "grad_norm": 0.07613944887188974, "learning_rate": 4.152168788059276e-06, "loss": 0.5202, "step": 9125 }, { "epoch": 4.438662613981763, "grad_norm": 0.07222537282958809, "learning_rate": 4.151226479913981e-06, "loss": 0.4715, "step": 9126 }, { "epoch": 4.439148936170213, "grad_norm": 0.07233969747422339, "learning_rate": 4.150284202808716e-06, "loss": 0.5035, "step": 9127 }, { "epoch": 4.4396352583586625, "grad_norm": 0.07434789901175949, "learning_rate": 4.149341956777945e-06, "loss": 0.4909, "step": 9128 }, { "epoch": 4.440121580547112, "grad_norm": 0.07462667558137118, "learning_rate": 4.148399741856125e-06, "loss": 0.5049, "step": 9129 }, { "epoch": 4.440607902735563, "grad_norm": 0.0764114471862437, "learning_rate": 4.1474575580777145e-06, "loss": 0.528, "step": 9130 }, { "epoch": 4.441094224924012, "grad_norm": 0.07302644598794808, "learning_rate": 4.1465154054771674e-06, "loss": 0.4914, "step": 9131 }, { "epoch": 4.441580547112462, "grad_norm": 0.07260935998242737, "learning_rate": 4.145573284088941e-06, "loss": 0.4985, "step": 9132 }, { "epoch": 4.442066869300912, "grad_norm": 0.07734936149770032, "learning_rate": 4.144631193947485e-06, "loss": 0.5558, "step": 9133 }, { "epoch": 4.4425531914893615, "grad_norm": 0.07487387208687873, "learning_rate": 4.143689135087257e-06, "loss": 0.5228, "step": 9134 }, { "epoch": 4.443039513677811, "grad_norm": 0.077470120227211, "learning_rate": 4.142747107542705e-06, "loss": 0.5251, "step": 9135 }, { "epoch": 4.443525835866262, "grad_norm": 0.0733020601161558, "learning_rate": 4.1418051113482825e-06, "loss": 0.5144, "step": 9136 }, { "epoch": 4.444012158054711, "grad_norm": 0.07715403605423381, "learning_rate": 4.1408631465384355e-06, "loss": 0.5324, "step": 9137 }, { "epoch": 4.444498480243161, "grad_norm": 0.07398415580005642, "learning_rate": 4.139921213147614e-06, "loss": 0.5243, "step": 9138 }, { "epoch": 4.444984802431611, "grad_norm": 0.0728185274545478, "learning_rate": 4.138979311210264e-06, "loss": 0.4566, "step": 9139 }, { "epoch": 4.44547112462006, "grad_norm": 0.07506445678451476, "learning_rate": 4.138037440760834e-06, "loss": 0.4988, "step": 9140 }, { "epoch": 4.445957446808511, "grad_norm": 0.08126597511347414, "learning_rate": 4.1370956018337635e-06, "loss": 0.5583, "step": 9141 }, { "epoch": 4.446443768996961, "grad_norm": 0.07539548085363766, "learning_rate": 4.136153794463501e-06, "loss": 0.5486, "step": 9142 }, { "epoch": 4.44693009118541, "grad_norm": 0.07510361004220807, "learning_rate": 4.135212018684485e-06, "loss": 0.5328, "step": 9143 }, { "epoch": 4.44741641337386, "grad_norm": 0.07523356817819044, "learning_rate": 4.13427027453116e-06, "loss": 0.5251, "step": 9144 }, { "epoch": 4.44790273556231, "grad_norm": 0.07719522285238851, "learning_rate": 4.133328562037962e-06, "loss": 0.5294, "step": 9145 }, { "epoch": 4.448389057750759, "grad_norm": 0.07383354379769586, "learning_rate": 4.132386881239336e-06, "loss": 0.4959, "step": 9146 }, { "epoch": 4.44887537993921, "grad_norm": 0.07520400574567981, "learning_rate": 4.131445232169713e-06, "loss": 0.528, "step": 9147 }, { "epoch": 4.44936170212766, "grad_norm": 0.07279881023644758, "learning_rate": 4.1305036148635334e-06, "loss": 0.4789, "step": 9148 }, { "epoch": 4.449848024316109, "grad_norm": 0.07578316685053149, "learning_rate": 4.12956202935523e-06, "loss": 0.5305, "step": 9149 }, { "epoch": 4.450334346504559, "grad_norm": 0.0745941043817119, "learning_rate": 4.1286204756792395e-06, "loss": 0.49, "step": 9150 }, { "epoch": 4.450820668693009, "grad_norm": 0.07800376845251361, "learning_rate": 4.127678953869996e-06, "loss": 0.5364, "step": 9151 }, { "epoch": 4.451306990881459, "grad_norm": 0.07433122862396811, "learning_rate": 4.126737463961927e-06, "loss": 0.5417, "step": 9152 }, { "epoch": 4.451793313069909, "grad_norm": 0.0738548815111225, "learning_rate": 4.125796005989468e-06, "loss": 0.5151, "step": 9153 }, { "epoch": 4.452279635258359, "grad_norm": 0.07632446306627214, "learning_rate": 4.124854579987043e-06, "loss": 0.515, "step": 9154 }, { "epoch": 4.452765957446808, "grad_norm": 0.07453589290986092, "learning_rate": 4.1239131859890875e-06, "loss": 0.4892, "step": 9155 }, { "epoch": 4.453252279635258, "grad_norm": 0.07465550229398378, "learning_rate": 4.122971824030022e-06, "loss": 0.5532, "step": 9156 }, { "epoch": 4.453738601823709, "grad_norm": 0.07225889514780721, "learning_rate": 4.122030494144278e-06, "loss": 0.4755, "step": 9157 }, { "epoch": 4.454224924012158, "grad_norm": 0.07262962775281033, "learning_rate": 4.121089196366274e-06, "loss": 0.4972, "step": 9158 }, { "epoch": 4.454711246200608, "grad_norm": 0.07292766334312963, "learning_rate": 4.12014793073044e-06, "loss": 0.5011, "step": 9159 }, { "epoch": 4.455197568389058, "grad_norm": 0.07464166631207429, "learning_rate": 4.119206697271195e-06, "loss": 0.5036, "step": 9160 }, { "epoch": 4.455683890577507, "grad_norm": 0.07520585648564442, "learning_rate": 4.118265496022963e-06, "loss": 0.5312, "step": 9161 }, { "epoch": 4.456170212765957, "grad_norm": 0.0734861239147741, "learning_rate": 4.1173243270201604e-06, "loss": 0.5059, "step": 9162 }, { "epoch": 4.456656534954408, "grad_norm": 0.07200911399975939, "learning_rate": 4.116383190297209e-06, "loss": 0.5331, "step": 9163 }, { "epoch": 4.457142857142857, "grad_norm": 0.07386988059053695, "learning_rate": 4.1154420858885245e-06, "loss": 0.5242, "step": 9164 }, { "epoch": 4.457629179331307, "grad_norm": 0.0721244639412503, "learning_rate": 4.1145010138285265e-06, "loss": 0.5026, "step": 9165 }, { "epoch": 4.458115501519757, "grad_norm": 0.07808444002853071, "learning_rate": 4.113559974151628e-06, "loss": 0.5318, "step": 9166 }, { "epoch": 4.458601823708206, "grad_norm": 0.07716373419170366, "learning_rate": 4.112618966892245e-06, "loss": 0.5349, "step": 9167 }, { "epoch": 4.459088145896657, "grad_norm": 0.07379194939909248, "learning_rate": 4.111677992084787e-06, "loss": 0.5036, "step": 9168 }, { "epoch": 4.459574468085107, "grad_norm": 0.07348873200685176, "learning_rate": 4.110737049763671e-06, "loss": 0.4799, "step": 9169 }, { "epoch": 4.460060790273556, "grad_norm": 0.07443480805290324, "learning_rate": 4.109796139963303e-06, "loss": 0.5147, "step": 9170 }, { "epoch": 4.460547112462006, "grad_norm": 0.07530280503401027, "learning_rate": 4.108855262718098e-06, "loss": 0.5107, "step": 9171 }, { "epoch": 4.461033434650456, "grad_norm": 0.07434427440289688, "learning_rate": 4.107914418062457e-06, "loss": 0.4979, "step": 9172 }, { "epoch": 4.461519756838905, "grad_norm": 0.07564531306472663, "learning_rate": 4.106973606030793e-06, "loss": 0.4851, "step": 9173 }, { "epoch": 4.462006079027356, "grad_norm": 0.07146423142950971, "learning_rate": 4.106032826657509e-06, "loss": 0.4865, "step": 9174 }, { "epoch": 4.462492401215806, "grad_norm": 0.07283468829760416, "learning_rate": 4.105092079977012e-06, "loss": 0.5039, "step": 9175 }, { "epoch": 4.462978723404255, "grad_norm": 0.07231614855905573, "learning_rate": 4.104151366023703e-06, "loss": 0.5062, "step": 9176 }, { "epoch": 4.463465045592705, "grad_norm": 0.07746994349434007, "learning_rate": 4.1032106848319856e-06, "loss": 0.5327, "step": 9177 }, { "epoch": 4.463951367781155, "grad_norm": 0.07449258666988649, "learning_rate": 4.102270036436261e-06, "loss": 0.4585, "step": 9178 }, { "epoch": 4.464437689969605, "grad_norm": 0.07664642111752314, "learning_rate": 4.101329420870929e-06, "loss": 0.5163, "step": 9179 }, { "epoch": 4.464924012158055, "grad_norm": 0.07225118597422747, "learning_rate": 4.100388838170389e-06, "loss": 0.495, "step": 9180 }, { "epoch": 4.465410334346505, "grad_norm": 0.07381462558186154, "learning_rate": 4.099448288369037e-06, "loss": 0.4948, "step": 9181 }, { "epoch": 4.465896656534954, "grad_norm": 0.07564861456899609, "learning_rate": 4.098507771501272e-06, "loss": 0.5184, "step": 9182 }, { "epoch": 4.466382978723404, "grad_norm": 0.07242232205109025, "learning_rate": 4.097567287601485e-06, "loss": 0.5441, "step": 9183 }, { "epoch": 4.4668693009118545, "grad_norm": 0.07713014278620212, "learning_rate": 4.096626836704074e-06, "loss": 0.5196, "step": 9184 }, { "epoch": 4.467355623100304, "grad_norm": 0.0759162521295131, "learning_rate": 4.095686418843429e-06, "loss": 0.527, "step": 9185 }, { "epoch": 4.467841945288754, "grad_norm": 0.07263596208204152, "learning_rate": 4.094746034053945e-06, "loss": 0.5117, "step": 9186 }, { "epoch": 4.468328267477204, "grad_norm": 0.07378193519869722, "learning_rate": 4.093805682370007e-06, "loss": 0.4877, "step": 9187 }, { "epoch": 4.468814589665653, "grad_norm": 0.0740858492601317, "learning_rate": 4.092865363826007e-06, "loss": 0.5236, "step": 9188 }, { "epoch": 4.469300911854103, "grad_norm": 0.07324258186699761, "learning_rate": 4.091925078456333e-06, "loss": 0.4921, "step": 9189 }, { "epoch": 4.4697872340425535, "grad_norm": 0.076402138535962, "learning_rate": 4.090984826295373e-06, "loss": 0.5216, "step": 9190 }, { "epoch": 4.470273556231003, "grad_norm": 0.07585259376944907, "learning_rate": 4.090044607377509e-06, "loss": 0.5514, "step": 9191 }, { "epoch": 4.470759878419453, "grad_norm": 0.07439737611996425, "learning_rate": 4.089104421737128e-06, "loss": 0.5286, "step": 9192 }, { "epoch": 4.471246200607903, "grad_norm": 0.07364013295426698, "learning_rate": 4.088164269408612e-06, "loss": 0.5298, "step": 9193 }, { "epoch": 4.471732522796352, "grad_norm": 0.0754632171597059, "learning_rate": 4.087224150426344e-06, "loss": 0.4934, "step": 9194 }, { "epoch": 4.472218844984803, "grad_norm": 0.07784454436577268, "learning_rate": 4.086284064824702e-06, "loss": 0.5239, "step": 9195 }, { "epoch": 4.4727051671732525, "grad_norm": 0.0759475669798295, "learning_rate": 4.085344012638067e-06, "loss": 0.556, "step": 9196 }, { "epoch": 4.473191489361702, "grad_norm": 0.07365999847006834, "learning_rate": 4.084403993900818e-06, "loss": 0.5159, "step": 9197 }, { "epoch": 4.473677811550152, "grad_norm": 0.07443198087071798, "learning_rate": 4.083464008647331e-06, "loss": 0.497, "step": 9198 }, { "epoch": 4.474164133738602, "grad_norm": 0.0735587635551236, "learning_rate": 4.0825240569119795e-06, "loss": 0.5115, "step": 9199 }, { "epoch": 4.474650455927051, "grad_norm": 0.0713216298603689, "learning_rate": 4.081584138729144e-06, "loss": 0.4819, "step": 9200 }, { "epoch": 4.475136778115502, "grad_norm": 0.07256181532494116, "learning_rate": 4.080644254133189e-06, "loss": 0.5194, "step": 9201 }, { "epoch": 4.4756231003039515, "grad_norm": 0.07280272785430154, "learning_rate": 4.0797044031584935e-06, "loss": 0.4993, "step": 9202 }, { "epoch": 4.476109422492401, "grad_norm": 0.07679057200614148, "learning_rate": 4.078764585839426e-06, "loss": 0.5227, "step": 9203 }, { "epoch": 4.476595744680851, "grad_norm": 0.07774980929133844, "learning_rate": 4.077824802210356e-06, "loss": 0.5296, "step": 9204 }, { "epoch": 4.4770820668693005, "grad_norm": 0.07598749068598204, "learning_rate": 4.076885052305654e-06, "loss": 0.5005, "step": 9205 }, { "epoch": 4.477568389057751, "grad_norm": 0.07414400475947454, "learning_rate": 4.075945336159682e-06, "loss": 0.4663, "step": 9206 }, { "epoch": 4.478054711246201, "grad_norm": 0.07712391417719938, "learning_rate": 4.07500565380681e-06, "loss": 0.5411, "step": 9207 }, { "epoch": 4.4785410334346505, "grad_norm": 0.07747465468414004, "learning_rate": 4.0740660052814e-06, "loss": 0.4983, "step": 9208 }, { "epoch": 4.4790273556231, "grad_norm": 0.07303760276416392, "learning_rate": 4.073126390617821e-06, "loss": 0.502, "step": 9209 }, { "epoch": 4.47951367781155, "grad_norm": 0.07784725598303321, "learning_rate": 4.0721868098504275e-06, "loss": 0.5367, "step": 9210 }, { "epoch": 4.48, "grad_norm": 0.07456733990797919, "learning_rate": 4.0712472630135865e-06, "loss": 0.5083, "step": 9211 }, { "epoch": 4.48048632218845, "grad_norm": 0.074913364572263, "learning_rate": 4.070307750141652e-06, "loss": 0.5359, "step": 9212 }, { "epoch": 4.4809726443769, "grad_norm": 0.07695317591547961, "learning_rate": 4.069368271268987e-06, "loss": 0.5217, "step": 9213 }, { "epoch": 4.4814589665653495, "grad_norm": 0.07521774877799299, "learning_rate": 4.068428826429946e-06, "loss": 0.5091, "step": 9214 }, { "epoch": 4.481945288753799, "grad_norm": 0.07308852502263749, "learning_rate": 4.067489415658889e-06, "loss": 0.5196, "step": 9215 }, { "epoch": 4.482431610942249, "grad_norm": 0.07787148826527195, "learning_rate": 4.066550038990165e-06, "loss": 0.5112, "step": 9216 }, { "epoch": 4.482917933130699, "grad_norm": 0.0778526721759418, "learning_rate": 4.065610696458131e-06, "loss": 0.5037, "step": 9217 }, { "epoch": 4.483404255319149, "grad_norm": 0.07498795125566858, "learning_rate": 4.064671388097138e-06, "loss": 0.5204, "step": 9218 }, { "epoch": 4.483890577507599, "grad_norm": 0.07753354247840392, "learning_rate": 4.063732113941539e-06, "loss": 0.5376, "step": 9219 }, { "epoch": 4.4843768996960485, "grad_norm": 0.0720535942705819, "learning_rate": 4.062792874025679e-06, "loss": 0.5175, "step": 9220 }, { "epoch": 4.484863221884498, "grad_norm": 0.07461435039785043, "learning_rate": 4.061853668383912e-06, "loss": 0.5132, "step": 9221 }, { "epoch": 4.485349544072949, "grad_norm": 0.07358080704390886, "learning_rate": 4.0609144970505805e-06, "loss": 0.5085, "step": 9222 }, { "epoch": 4.485835866261398, "grad_norm": 0.07570764144735304, "learning_rate": 4.059975360060035e-06, "loss": 0.5357, "step": 9223 }, { "epoch": 4.486322188449848, "grad_norm": 0.07383572852496767, "learning_rate": 4.059036257446614e-06, "loss": 0.479, "step": 9224 }, { "epoch": 4.486808510638298, "grad_norm": 0.07400357054456261, "learning_rate": 4.058097189244669e-06, "loss": 0.5237, "step": 9225 }, { "epoch": 4.4872948328267475, "grad_norm": 0.07417429522100236, "learning_rate": 4.0571581554885345e-06, "loss": 0.4767, "step": 9226 }, { "epoch": 4.487781155015197, "grad_norm": 0.07385172551776475, "learning_rate": 4.056219156212556e-06, "loss": 0.5152, "step": 9227 }, { "epoch": 4.488267477203648, "grad_norm": 0.07661381870005167, "learning_rate": 4.05528019145107e-06, "loss": 0.5163, "step": 9228 }, { "epoch": 4.488753799392097, "grad_norm": 0.07608187355579815, "learning_rate": 4.054341261238418e-06, "loss": 0.555, "step": 9229 }, { "epoch": 4.489240121580547, "grad_norm": 0.07300010804799278, "learning_rate": 4.053402365608936e-06, "loss": 0.513, "step": 9230 }, { "epoch": 4.489726443768997, "grad_norm": 0.0742954532691905, "learning_rate": 4.052463504596959e-06, "loss": 0.4973, "step": 9231 }, { "epoch": 4.4902127659574465, "grad_norm": 0.07468256668069106, "learning_rate": 4.051524678236822e-06, "loss": 0.5236, "step": 9232 }, { "epoch": 4.490699088145897, "grad_norm": 0.07388177110272309, "learning_rate": 4.050585886562858e-06, "loss": 0.5061, "step": 9233 }, { "epoch": 4.491185410334347, "grad_norm": 0.07505008850314789, "learning_rate": 4.0496471296094016e-06, "loss": 0.5538, "step": 9234 }, { "epoch": 4.491671732522796, "grad_norm": 0.07241490035379888, "learning_rate": 4.048708407410779e-06, "loss": 0.516, "step": 9235 }, { "epoch": 4.492158054711246, "grad_norm": 0.07481466981765436, "learning_rate": 4.047769720001323e-06, "loss": 0.5291, "step": 9236 }, { "epoch": 4.492644376899696, "grad_norm": 0.07696794365658906, "learning_rate": 4.046831067415361e-06, "loss": 0.5403, "step": 9237 }, { "epoch": 4.493130699088146, "grad_norm": 0.07390932000186048, "learning_rate": 4.045892449687221e-06, "loss": 0.4975, "step": 9238 }, { "epoch": 4.493617021276596, "grad_norm": 0.07272641882298463, "learning_rate": 4.044953866851226e-06, "loss": 0.5159, "step": 9239 }, { "epoch": 4.494103343465046, "grad_norm": 0.07721302557672421, "learning_rate": 4.044015318941705e-06, "loss": 0.5379, "step": 9240 }, { "epoch": 4.494589665653495, "grad_norm": 0.07207208181933604, "learning_rate": 4.043076805992974e-06, "loss": 0.495, "step": 9241 }, { "epoch": 4.495075987841945, "grad_norm": 0.07309043489005235, "learning_rate": 4.042138328039361e-06, "loss": 0.4869, "step": 9242 }, { "epoch": 4.495562310030395, "grad_norm": 0.07267955554950069, "learning_rate": 4.041199885115183e-06, "loss": 0.4976, "step": 9243 }, { "epoch": 4.496048632218845, "grad_norm": 0.07215341886535027, "learning_rate": 4.040261477254763e-06, "loss": 0.5073, "step": 9244 }, { "epoch": 4.496534954407295, "grad_norm": 0.07333871415941677, "learning_rate": 4.039323104492415e-06, "loss": 0.4911, "step": 9245 }, { "epoch": 4.497021276595745, "grad_norm": 0.07181224526645402, "learning_rate": 4.0383847668624584e-06, "loss": 0.5093, "step": 9246 }, { "epoch": 4.497507598784194, "grad_norm": 0.07234753626864866, "learning_rate": 4.037446464399207e-06, "loss": 0.4835, "step": 9247 }, { "epoch": 4.497993920972644, "grad_norm": 0.07227323987217513, "learning_rate": 4.036508197136978e-06, "loss": 0.5056, "step": 9248 }, { "epoch": 4.498480243161095, "grad_norm": 0.07422656757214123, "learning_rate": 4.03556996511008e-06, "loss": 0.5374, "step": 9249 }, { "epoch": 4.498966565349544, "grad_norm": 0.0744136777963956, "learning_rate": 4.034631768352828e-06, "loss": 0.5326, "step": 9250 }, { "epoch": 4.499452887537994, "grad_norm": 0.07492827930506817, "learning_rate": 4.0336936068995286e-06, "loss": 0.4969, "step": 9251 }, { "epoch": 4.499939209726444, "grad_norm": 0.07223839735174606, "learning_rate": 4.032755480784494e-06, "loss": 0.502, "step": 9252 }, { "epoch": 4.499939209726444, "eval_loss": 0.5691717267036438, "eval_runtime": 105.0958, "eval_samples_per_second": 288.813, "eval_steps_per_second": 36.11, "step": 9252 }, { "epoch": 4.500425531914893, "grad_norm": 0.07197295245632464, "learning_rate": 4.031817390042031e-06, "loss": 0.5173, "step": 9253 }, { "epoch": 4.500911854103343, "grad_norm": 0.0718166851303264, "learning_rate": 4.030879334706447e-06, "loss": 0.4802, "step": 9254 }, { "epoch": 4.501398176291794, "grad_norm": 0.07657006337369, "learning_rate": 4.0299413148120444e-06, "loss": 0.5236, "step": 9255 }, { "epoch": 4.501884498480243, "grad_norm": 0.07434252278578819, "learning_rate": 4.029003330393128e-06, "loss": 0.5149, "step": 9256 }, { "epoch": 4.502370820668693, "grad_norm": 0.07425606058979606, "learning_rate": 4.028065381484002e-06, "loss": 0.5242, "step": 9257 }, { "epoch": 4.502857142857143, "grad_norm": 0.07405844853871049, "learning_rate": 4.027127468118967e-06, "loss": 0.5132, "step": 9258 }, { "epoch": 4.503343465045592, "grad_norm": 0.07387535955120854, "learning_rate": 4.026189590332323e-06, "loss": 0.5079, "step": 9259 }, { "epoch": 4.503829787234043, "grad_norm": 0.07316425652515, "learning_rate": 4.0252517481583665e-06, "loss": 0.5006, "step": 9260 }, { "epoch": 4.504316109422493, "grad_norm": 0.07202150422096368, "learning_rate": 4.024313941631397e-06, "loss": 0.5001, "step": 9261 }, { "epoch": 4.504802431610942, "grad_norm": 0.07641129960121701, "learning_rate": 4.02337617078571e-06, "loss": 0.5077, "step": 9262 }, { "epoch": 4.505288753799392, "grad_norm": 0.07383331947767907, "learning_rate": 4.022438435655601e-06, "loss": 0.4902, "step": 9263 }, { "epoch": 4.505775075987842, "grad_norm": 0.07296262164701718, "learning_rate": 4.021500736275361e-06, "loss": 0.5149, "step": 9264 }, { "epoch": 4.506261398176292, "grad_norm": 0.07448444653916979, "learning_rate": 4.020563072679286e-06, "loss": 0.5326, "step": 9265 }, { "epoch": 4.506747720364742, "grad_norm": 0.07518831906037178, "learning_rate": 4.019625444901662e-06, "loss": 0.5261, "step": 9266 }, { "epoch": 4.507234042553192, "grad_norm": 0.07417316446330552, "learning_rate": 4.018687852976783e-06, "loss": 0.5186, "step": 9267 }, { "epoch": 4.507720364741641, "grad_norm": 0.0762097291840639, "learning_rate": 4.017750296938932e-06, "loss": 0.5009, "step": 9268 }, { "epoch": 4.508206686930091, "grad_norm": 0.07357603364539657, "learning_rate": 4.016812776822402e-06, "loss": 0.4909, "step": 9269 }, { "epoch": 4.508693009118541, "grad_norm": 0.07095681142857135, "learning_rate": 4.015875292661474e-06, "loss": 0.4836, "step": 9270 }, { "epoch": 4.509179331306991, "grad_norm": 0.07782831667284015, "learning_rate": 4.014937844490434e-06, "loss": 0.5203, "step": 9271 }, { "epoch": 4.509665653495441, "grad_norm": 0.07693551833551153, "learning_rate": 4.014000432343563e-06, "loss": 0.5138, "step": 9272 }, { "epoch": 4.510151975683891, "grad_norm": 0.07675372753590344, "learning_rate": 4.013063056255147e-06, "loss": 0.5203, "step": 9273 }, { "epoch": 4.51063829787234, "grad_norm": 0.07304918207798498, "learning_rate": 4.012125716259461e-06, "loss": 0.4991, "step": 9274 }, { "epoch": 4.51112462006079, "grad_norm": 0.07635546349615119, "learning_rate": 4.011188412390788e-06, "loss": 0.5158, "step": 9275 }, { "epoch": 4.51161094224924, "grad_norm": 0.07276277522100671, "learning_rate": 4.0102511446834025e-06, "loss": 0.5027, "step": 9276 }, { "epoch": 4.51209726443769, "grad_norm": 0.07252339477070273, "learning_rate": 4.009313913171584e-06, "loss": 0.4951, "step": 9277 }, { "epoch": 4.51258358662614, "grad_norm": 0.0737220120378319, "learning_rate": 4.0083767178896046e-06, "loss": 0.5125, "step": 9278 }, { "epoch": 4.51306990881459, "grad_norm": 0.07208568380818856, "learning_rate": 4.0074395588717406e-06, "loss": 0.5043, "step": 9279 }, { "epoch": 4.513556231003039, "grad_norm": 0.07384039444594008, "learning_rate": 4.0065024361522606e-06, "loss": 0.5052, "step": 9280 }, { "epoch": 4.514042553191489, "grad_norm": 0.07356824048246989, "learning_rate": 4.005565349765438e-06, "loss": 0.5011, "step": 9281 }, { "epoch": 4.5145288753799395, "grad_norm": 0.07471788020450786, "learning_rate": 4.004628299745544e-06, "loss": 0.5001, "step": 9282 }, { "epoch": 4.515015197568389, "grad_norm": 0.0710858693953974, "learning_rate": 4.0036912861268434e-06, "loss": 0.465, "step": 9283 }, { "epoch": 4.515501519756839, "grad_norm": 0.0720420701395051, "learning_rate": 4.002754308943608e-06, "loss": 0.4945, "step": 9284 }, { "epoch": 4.515987841945289, "grad_norm": 0.07574934255713653, "learning_rate": 4.001817368230098e-06, "loss": 0.522, "step": 9285 }, { "epoch": 4.516474164133738, "grad_norm": 0.07582312734836276, "learning_rate": 4.000880464020582e-06, "loss": 0.5324, "step": 9286 }, { "epoch": 4.516960486322189, "grad_norm": 0.07194231379733766, "learning_rate": 3.9999435963493195e-06, "loss": 0.473, "step": 9287 }, { "epoch": 4.5174468085106385, "grad_norm": 0.07297646560307082, "learning_rate": 3.999006765250576e-06, "loss": 0.5206, "step": 9288 }, { "epoch": 4.517933130699088, "grad_norm": 0.07835034276835257, "learning_rate": 3.998069970758609e-06, "loss": 0.5298, "step": 9289 }, { "epoch": 4.518419452887538, "grad_norm": 0.07519668264825975, "learning_rate": 3.997133212907679e-06, "loss": 0.5356, "step": 9290 }, { "epoch": 4.518905775075988, "grad_norm": 0.0746168240062158, "learning_rate": 3.996196491732041e-06, "loss": 0.5438, "step": 9291 }, { "epoch": 4.519392097264438, "grad_norm": 0.0732138021003351, "learning_rate": 3.995259807265956e-06, "loss": 0.4903, "step": 9292 }, { "epoch": 4.519878419452888, "grad_norm": 0.0773828109032807, "learning_rate": 3.994323159543675e-06, "loss": 0.5311, "step": 9293 }, { "epoch": 4.5203647416413375, "grad_norm": 0.07487883981499747, "learning_rate": 3.993386548599454e-06, "loss": 0.5258, "step": 9294 }, { "epoch": 4.520851063829787, "grad_norm": 0.07321492962793047, "learning_rate": 3.992449974467542e-06, "loss": 0.4825, "step": 9295 }, { "epoch": 4.521337386018237, "grad_norm": 0.07315931764974398, "learning_rate": 3.9915134371821936e-06, "loss": 0.5136, "step": 9296 }, { "epoch": 4.5218237082066866, "grad_norm": 0.07636731854830328, "learning_rate": 3.9905769367776564e-06, "loss": 0.5277, "step": 9297 }, { "epoch": 4.522310030395137, "grad_norm": 0.07386656265960556, "learning_rate": 3.989640473288181e-06, "loss": 0.5099, "step": 9298 }, { "epoch": 4.522796352583587, "grad_norm": 0.07308825735289265, "learning_rate": 3.988704046748011e-06, "loss": 0.5042, "step": 9299 }, { "epoch": 4.5232826747720365, "grad_norm": 0.07446430230279066, "learning_rate": 3.987767657191393e-06, "loss": 0.5118, "step": 9300 }, { "epoch": 4.523768996960486, "grad_norm": 0.07289579885509151, "learning_rate": 3.986831304652572e-06, "loss": 0.5113, "step": 9301 }, { "epoch": 4.524255319148936, "grad_norm": 0.07435811531917867, "learning_rate": 3.985894989165792e-06, "loss": 0.523, "step": 9302 }, { "epoch": 4.5247416413373855, "grad_norm": 0.07642410642259997, "learning_rate": 3.984958710765291e-06, "loss": 0.5088, "step": 9303 }, { "epoch": 4.525227963525836, "grad_norm": 0.07134414286383806, "learning_rate": 3.984022469485314e-06, "loss": 0.4856, "step": 9304 }, { "epoch": 4.525714285714286, "grad_norm": 0.0726318784517991, "learning_rate": 3.983086265360093e-06, "loss": 0.5243, "step": 9305 }, { "epoch": 4.5262006079027355, "grad_norm": 0.07451550069135994, "learning_rate": 3.982150098423871e-06, "loss": 0.5336, "step": 9306 }, { "epoch": 4.526686930091185, "grad_norm": 0.07111640653778921, "learning_rate": 3.981213968710882e-06, "loss": 0.4622, "step": 9307 }, { "epoch": 4.527173252279635, "grad_norm": 0.07378757736643685, "learning_rate": 3.9802778762553606e-06, "loss": 0.5067, "step": 9308 }, { "epoch": 4.527659574468085, "grad_norm": 0.07805891922187286, "learning_rate": 3.979341821091543e-06, "loss": 0.5494, "step": 9309 }, { "epoch": 4.528145896656535, "grad_norm": 0.07335835696619315, "learning_rate": 3.978405803253656e-06, "loss": 0.5614, "step": 9310 }, { "epoch": 4.528632218844985, "grad_norm": 0.07441263378442507, "learning_rate": 3.977469822775934e-06, "loss": 0.5322, "step": 9311 }, { "epoch": 4.5291185410334345, "grad_norm": 0.0755974508599259, "learning_rate": 3.976533879692604e-06, "loss": 0.5199, "step": 9312 }, { "epoch": 4.529604863221884, "grad_norm": 0.07268009756675874, "learning_rate": 3.975597974037898e-06, "loss": 0.5154, "step": 9313 }, { "epoch": 4.530091185410335, "grad_norm": 0.0746747605080762, "learning_rate": 3.974662105846036e-06, "loss": 0.5233, "step": 9314 }, { "epoch": 4.530577507598784, "grad_norm": 0.07389784926086358, "learning_rate": 3.97372627515125e-06, "loss": 0.5047, "step": 9315 }, { "epoch": 4.531063829787234, "grad_norm": 0.07424802971303267, "learning_rate": 3.972790481987757e-06, "loss": 0.5011, "step": 9316 }, { "epoch": 4.531550151975684, "grad_norm": 0.07441778022364974, "learning_rate": 3.971854726389786e-06, "loss": 0.5092, "step": 9317 }, { "epoch": 4.5320364741641335, "grad_norm": 0.07310846216832277, "learning_rate": 3.970919008391552e-06, "loss": 0.5119, "step": 9318 }, { "epoch": 4.532522796352584, "grad_norm": 0.07450421601150241, "learning_rate": 3.96998332802728e-06, "loss": 0.5388, "step": 9319 }, { "epoch": 4.533009118541034, "grad_norm": 0.07243285938287639, "learning_rate": 3.969047685331184e-06, "loss": 0.4677, "step": 9320 }, { "epoch": 4.533495440729483, "grad_norm": 0.07507835126735407, "learning_rate": 3.9681120803374824e-06, "loss": 0.4946, "step": 9321 }, { "epoch": 4.533981762917933, "grad_norm": 0.07417852424322498, "learning_rate": 3.967176513080391e-06, "loss": 0.5269, "step": 9322 }, { "epoch": 4.534468085106383, "grad_norm": 0.07639212451088119, "learning_rate": 3.9662409835941245e-06, "loss": 0.5236, "step": 9323 }, { "epoch": 4.5349544072948325, "grad_norm": 0.07449302434528304, "learning_rate": 3.965305491912894e-06, "loss": 0.4932, "step": 9324 }, { "epoch": 4.535440729483283, "grad_norm": 0.07451420604336527, "learning_rate": 3.964370038070912e-06, "loss": 0.5144, "step": 9325 }, { "epoch": 4.535927051671733, "grad_norm": 0.07382092089704145, "learning_rate": 3.963434622102387e-06, "loss": 0.5119, "step": 9326 }, { "epoch": 4.536413373860182, "grad_norm": 0.07692256117569182, "learning_rate": 3.962499244041532e-06, "loss": 0.4826, "step": 9327 }, { "epoch": 4.536899696048632, "grad_norm": 0.0741511798853068, "learning_rate": 3.961563903922549e-06, "loss": 0.5144, "step": 9328 }, { "epoch": 4.537386018237082, "grad_norm": 0.07228413136967098, "learning_rate": 3.960628601779645e-06, "loss": 0.4748, "step": 9329 }, { "epoch": 4.5378723404255314, "grad_norm": 0.0728412750705643, "learning_rate": 3.959693337647026e-06, "loss": 0.4964, "step": 9330 }, { "epoch": 4.538358662613982, "grad_norm": 0.07156280063182621, "learning_rate": 3.9587581115588955e-06, "loss": 0.4802, "step": 9331 }, { "epoch": 4.538844984802432, "grad_norm": 0.07676877097281809, "learning_rate": 3.957822923549452e-06, "loss": 0.5224, "step": 9332 }, { "epoch": 4.539331306990881, "grad_norm": 0.07512896871439582, "learning_rate": 3.956887773652898e-06, "loss": 0.5366, "step": 9333 }, { "epoch": 4.539817629179331, "grad_norm": 0.0720943316031756, "learning_rate": 3.9559526619034335e-06, "loss": 0.4974, "step": 9334 }, { "epoch": 4.540303951367781, "grad_norm": 0.07245508083669079, "learning_rate": 3.955017588335252e-06, "loss": 0.4963, "step": 9335 }, { "epoch": 4.540790273556231, "grad_norm": 0.0721791386805876, "learning_rate": 3.954082552982554e-06, "loss": 0.4793, "step": 9336 }, { "epoch": 4.541276595744681, "grad_norm": 0.07087622774420815, "learning_rate": 3.953147555879531e-06, "loss": 0.4854, "step": 9337 }, { "epoch": 4.541762917933131, "grad_norm": 0.07502285430543014, "learning_rate": 3.95221259706038e-06, "loss": 0.4769, "step": 9338 }, { "epoch": 4.54224924012158, "grad_norm": 0.07352153868419029, "learning_rate": 3.951277676559288e-06, "loss": 0.5038, "step": 9339 }, { "epoch": 4.54273556231003, "grad_norm": 0.07330248743611278, "learning_rate": 3.9503427944104486e-06, "loss": 0.4877, "step": 9340 }, { "epoch": 4.543221884498481, "grad_norm": 0.07850893172390568, "learning_rate": 3.949407950648049e-06, "loss": 0.5261, "step": 9341 }, { "epoch": 4.54370820668693, "grad_norm": 0.07176517894202601, "learning_rate": 3.94847314530628e-06, "loss": 0.483, "step": 9342 }, { "epoch": 4.54419452887538, "grad_norm": 0.07490126077222273, "learning_rate": 3.9475383784193245e-06, "loss": 0.5312, "step": 9343 }, { "epoch": 4.54468085106383, "grad_norm": 0.07237236937699397, "learning_rate": 3.94660365002137e-06, "loss": 0.4961, "step": 9344 }, { "epoch": 4.545167173252279, "grad_norm": 0.07569531788936606, "learning_rate": 3.945668960146597e-06, "loss": 0.5136, "step": 9345 }, { "epoch": 4.54565349544073, "grad_norm": 0.0735559775455133, "learning_rate": 3.944734308829189e-06, "loss": 0.5257, "step": 9346 }, { "epoch": 4.54613981762918, "grad_norm": 0.07403499294450266, "learning_rate": 3.943799696103327e-06, "loss": 0.4877, "step": 9347 }, { "epoch": 4.546626139817629, "grad_norm": 0.07269643030045872, "learning_rate": 3.942865122003192e-06, "loss": 0.5263, "step": 9348 }, { "epoch": 4.547112462006079, "grad_norm": 0.07516116892036966, "learning_rate": 3.941930586562957e-06, "loss": 0.5254, "step": 9349 }, { "epoch": 4.547598784194529, "grad_norm": 0.07314509438835426, "learning_rate": 3.940996089816803e-06, "loss": 0.4918, "step": 9350 }, { "epoch": 4.548085106382978, "grad_norm": 0.07614808936951598, "learning_rate": 3.940061631798901e-06, "loss": 0.5387, "step": 9351 }, { "epoch": 4.548571428571429, "grad_norm": 0.07448856312515807, "learning_rate": 3.939127212543429e-06, "loss": 0.5041, "step": 9352 }, { "epoch": 4.549057750759879, "grad_norm": 0.07081525501058379, "learning_rate": 3.938192832084555e-06, "loss": 0.4924, "step": 9353 }, { "epoch": 4.549544072948328, "grad_norm": 0.07251093752698896, "learning_rate": 3.937258490456453e-06, "loss": 0.5129, "step": 9354 }, { "epoch": 4.550030395136778, "grad_norm": 0.07530015707247262, "learning_rate": 3.936324187693289e-06, "loss": 0.508, "step": 9355 }, { "epoch": 4.550516717325228, "grad_norm": 0.07527067208010085, "learning_rate": 3.9353899238292355e-06, "loss": 0.5068, "step": 9356 }, { "epoch": 4.551003039513677, "grad_norm": 0.0764365355266283, "learning_rate": 3.934455698898454e-06, "loss": 0.542, "step": 9357 }, { "epoch": 4.551489361702128, "grad_norm": 0.07462172737486743, "learning_rate": 3.933521512935114e-06, "loss": 0.5154, "step": 9358 }, { "epoch": 4.551975683890578, "grad_norm": 0.07266794715742102, "learning_rate": 3.932587365973374e-06, "loss": 0.4933, "step": 9359 }, { "epoch": 4.552462006079027, "grad_norm": 0.07297169637157461, "learning_rate": 3.9316532580474e-06, "loss": 0.5194, "step": 9360 }, { "epoch": 4.552948328267477, "grad_norm": 0.07089586743175724, "learning_rate": 3.930719189191352e-06, "loss": 0.4788, "step": 9361 }, { "epoch": 4.553434650455927, "grad_norm": 0.07582381698166774, "learning_rate": 3.9297851594393874e-06, "loss": 0.5107, "step": 9362 }, { "epoch": 4.553920972644377, "grad_norm": 0.07816746455215838, "learning_rate": 3.928851168825669e-06, "loss": 0.5265, "step": 9363 }, { "epoch": 4.554407294832827, "grad_norm": 0.07538072594197215, "learning_rate": 3.927917217384347e-06, "loss": 0.5224, "step": 9364 }, { "epoch": 4.554893617021277, "grad_norm": 0.07630716360194935, "learning_rate": 3.926983305149581e-06, "loss": 0.5835, "step": 9365 }, { "epoch": 4.555379939209726, "grad_norm": 0.07608038192539752, "learning_rate": 3.926049432155522e-06, "loss": 0.5357, "step": 9366 }, { "epoch": 4.555866261398176, "grad_norm": 0.07340667982104773, "learning_rate": 3.925115598436325e-06, "loss": 0.4868, "step": 9367 }, { "epoch": 4.5563525835866265, "grad_norm": 0.07668947030122514, "learning_rate": 3.924181804026137e-06, "loss": 0.5325, "step": 9368 }, { "epoch": 4.556838905775076, "grad_norm": 0.07532392010784962, "learning_rate": 3.9232480489591104e-06, "loss": 0.5365, "step": 9369 }, { "epoch": 4.557325227963526, "grad_norm": 0.07408912273729372, "learning_rate": 3.92231433326939e-06, "loss": 0.5312, "step": 9370 }, { "epoch": 4.557811550151976, "grad_norm": 0.0735007120621975, "learning_rate": 3.921380656991127e-06, "loss": 0.509, "step": 9371 }, { "epoch": 4.558297872340425, "grad_norm": 0.07666412749504235, "learning_rate": 3.920447020158461e-06, "loss": 0.5612, "step": 9372 }, { "epoch": 4.558784194528876, "grad_norm": 0.07343465139650254, "learning_rate": 3.9195134228055395e-06, "loss": 0.4885, "step": 9373 }, { "epoch": 4.5592705167173255, "grad_norm": 0.07424544077073247, "learning_rate": 3.918579864966502e-06, "loss": 0.4863, "step": 9374 }, { "epoch": 4.559756838905775, "grad_norm": 0.07414750458564023, "learning_rate": 3.917646346675491e-06, "loss": 0.5235, "step": 9375 }, { "epoch": 4.560243161094225, "grad_norm": 0.07430105556109139, "learning_rate": 3.916712867966644e-06, "loss": 0.5371, "step": 9376 }, { "epoch": 4.560729483282675, "grad_norm": 0.07399558110506868, "learning_rate": 3.9157794288741e-06, "loss": 0.548, "step": 9377 }, { "epoch": 4.561215805471124, "grad_norm": 0.07213414500122718, "learning_rate": 3.914846029431995e-06, "loss": 0.4969, "step": 9378 }, { "epoch": 4.561702127659575, "grad_norm": 0.07664282878385731, "learning_rate": 3.9139126696744636e-06, "loss": 0.4737, "step": 9379 }, { "epoch": 4.5621884498480245, "grad_norm": 0.07618814863371207, "learning_rate": 3.912979349635638e-06, "loss": 0.5316, "step": 9380 }, { "epoch": 4.562674772036474, "grad_norm": 0.07192547278007334, "learning_rate": 3.912046069349654e-06, "loss": 0.4788, "step": 9381 }, { "epoch": 4.563161094224924, "grad_norm": 0.07321205343944717, "learning_rate": 3.911112828850637e-06, "loss": 0.5207, "step": 9382 }, { "epoch": 4.563647416413374, "grad_norm": 0.07286769804339066, "learning_rate": 3.91017962817272e-06, "loss": 0.4836, "step": 9383 }, { "epoch": 4.564133738601823, "grad_norm": 0.0731352906986514, "learning_rate": 3.909246467350028e-06, "loss": 0.5141, "step": 9384 }, { "epoch": 4.564620060790274, "grad_norm": 0.07651885023375475, "learning_rate": 3.9083133464166905e-06, "loss": 0.5258, "step": 9385 }, { "epoch": 4.5651063829787235, "grad_norm": 0.07347187739775783, "learning_rate": 3.907380265406827e-06, "loss": 0.5014, "step": 9386 }, { "epoch": 4.565592705167173, "grad_norm": 0.07484664211420576, "learning_rate": 3.906447224354565e-06, "loss": 0.512, "step": 9387 }, { "epoch": 4.566079027355623, "grad_norm": 0.07173261805579136, "learning_rate": 3.905514223294026e-06, "loss": 0.503, "step": 9388 }, { "epoch": 4.566565349544073, "grad_norm": 0.07359214952830676, "learning_rate": 3.9045812622593275e-06, "loss": 0.5521, "step": 9389 }, { "epoch": 4.567051671732523, "grad_norm": 0.07453135287178002, "learning_rate": 3.9036483412845905e-06, "loss": 0.5016, "step": 9390 }, { "epoch": 4.567537993920973, "grad_norm": 0.07684984731358946, "learning_rate": 3.902715460403931e-06, "loss": 0.5535, "step": 9391 }, { "epoch": 4.5680243161094225, "grad_norm": 0.07659117273950458, "learning_rate": 3.901782619651468e-06, "loss": 0.5244, "step": 9392 }, { "epoch": 4.568510638297872, "grad_norm": 0.07627677323635071, "learning_rate": 3.90084981906131e-06, "loss": 0.5242, "step": 9393 }, { "epoch": 4.568996960486322, "grad_norm": 0.07471246540561714, "learning_rate": 3.899917058667576e-06, "loss": 0.5101, "step": 9394 }, { "epoch": 4.569483282674772, "grad_norm": 0.0772917860316628, "learning_rate": 3.898984338504373e-06, "loss": 0.547, "step": 9395 }, { "epoch": 4.569969604863222, "grad_norm": 0.07418603674263063, "learning_rate": 3.8980516586058155e-06, "loss": 0.538, "step": 9396 }, { "epoch": 4.570455927051672, "grad_norm": 0.07591462600023226, "learning_rate": 3.897119019006008e-06, "loss": 0.5178, "step": 9397 }, { "epoch": 4.5709422492401215, "grad_norm": 0.07344926732978485, "learning_rate": 3.89618641973906e-06, "loss": 0.4887, "step": 9398 }, { "epoch": 4.571428571428571, "grad_norm": 0.07761402148302443, "learning_rate": 3.895253860839075e-06, "loss": 0.4971, "step": 9399 }, { "epoch": 4.571914893617022, "grad_norm": 0.0762308165676122, "learning_rate": 3.894321342340159e-06, "loss": 0.5318, "step": 9400 }, { "epoch": 4.572401215805471, "grad_norm": 0.07323678965678548, "learning_rate": 3.893388864276413e-06, "loss": 0.4804, "step": 9401 }, { "epoch": 4.572887537993921, "grad_norm": 0.07312112617763636, "learning_rate": 3.89245642668194e-06, "loss": 0.494, "step": 9402 }, { "epoch": 4.573373860182371, "grad_norm": 0.07625460373135336, "learning_rate": 3.891524029590837e-06, "loss": 0.5111, "step": 9403 }, { "epoch": 4.5738601823708205, "grad_norm": 0.07585508992838295, "learning_rate": 3.890591673037205e-06, "loss": 0.5231, "step": 9404 }, { "epoch": 4.57434650455927, "grad_norm": 0.07219676731221171, "learning_rate": 3.889659357055139e-06, "loss": 0.4965, "step": 9405 }, { "epoch": 4.574832826747721, "grad_norm": 0.07256806982886983, "learning_rate": 3.888727081678737e-06, "loss": 0.5171, "step": 9406 }, { "epoch": 4.57531914893617, "grad_norm": 0.07988977124293499, "learning_rate": 3.887794846942088e-06, "loss": 0.5757, "step": 9407 }, { "epoch": 4.57580547112462, "grad_norm": 0.07456679588377475, "learning_rate": 3.886862652879288e-06, "loss": 0.5075, "step": 9408 }, { "epoch": 4.57629179331307, "grad_norm": 0.07225183621512404, "learning_rate": 3.885930499524425e-06, "loss": 0.4974, "step": 9409 }, { "epoch": 4.5767781155015195, "grad_norm": 0.07213481894390628, "learning_rate": 3.884998386911592e-06, "loss": 0.4887, "step": 9410 }, { "epoch": 4.577264437689969, "grad_norm": 0.07093542674314501, "learning_rate": 3.884066315074872e-06, "loss": 0.491, "step": 9411 }, { "epoch": 4.57775075987842, "grad_norm": 0.07402843518613701, "learning_rate": 3.883134284048355e-06, "loss": 0.4961, "step": 9412 }, { "epoch": 4.578237082066869, "grad_norm": 0.07318893932406952, "learning_rate": 3.8822022938661255e-06, "loss": 0.5027, "step": 9413 }, { "epoch": 4.578723404255319, "grad_norm": 0.07828587395429795, "learning_rate": 3.881270344562264e-06, "loss": 0.5307, "step": 9414 }, { "epoch": 4.579209726443769, "grad_norm": 0.07021040892394347, "learning_rate": 3.880338436170857e-06, "loss": 0.4953, "step": 9415 }, { "epoch": 4.5796960486322185, "grad_norm": 0.07563537304206946, "learning_rate": 3.87940656872598e-06, "loss": 0.5271, "step": 9416 }, { "epoch": 4.580182370820669, "grad_norm": 0.07148901880726123, "learning_rate": 3.878474742261716e-06, "loss": 0.4921, "step": 9417 }, { "epoch": 4.580668693009119, "grad_norm": 0.0729408647462505, "learning_rate": 3.877542956812137e-06, "loss": 0.4971, "step": 9418 }, { "epoch": 4.581155015197568, "grad_norm": 0.07185942828665691, "learning_rate": 3.876611212411324e-06, "loss": 0.5068, "step": 9419 }, { "epoch": 4.581641337386018, "grad_norm": 0.07404036217980606, "learning_rate": 3.875679509093348e-06, "loss": 0.5066, "step": 9420 }, { "epoch": 4.582127659574468, "grad_norm": 0.07372009271917068, "learning_rate": 3.874747846892286e-06, "loss": 0.5542, "step": 9421 }, { "epoch": 4.582613981762918, "grad_norm": 0.07495766607464964, "learning_rate": 3.873816225842204e-06, "loss": 0.5278, "step": 9422 }, { "epoch": 4.583100303951368, "grad_norm": 0.07078148696778389, "learning_rate": 3.872884645977175e-06, "loss": 0.4882, "step": 9423 }, { "epoch": 4.583586626139818, "grad_norm": 0.07608058702230615, "learning_rate": 3.871953107331266e-06, "loss": 0.5244, "step": 9424 }, { "epoch": 4.584072948328267, "grad_norm": 0.078461127525936, "learning_rate": 3.871021609938547e-06, "loss": 0.5425, "step": 9425 }, { "epoch": 4.584559270516717, "grad_norm": 0.07404268375669316, "learning_rate": 3.870090153833077e-06, "loss": 0.514, "step": 9426 }, { "epoch": 4.585045592705168, "grad_norm": 0.07474617671772428, "learning_rate": 3.869158739048927e-06, "loss": 0.4974, "step": 9427 }, { "epoch": 4.585531914893617, "grad_norm": 0.07333944582933946, "learning_rate": 3.868227365620152e-06, "loss": 0.5435, "step": 9428 }, { "epoch": 4.586018237082067, "grad_norm": 0.07145503174283267, "learning_rate": 3.867296033580819e-06, "loss": 0.4742, "step": 9429 }, { "epoch": 4.586504559270517, "grad_norm": 0.07635005777735188, "learning_rate": 3.8663647429649824e-06, "loss": 0.5205, "step": 9430 }, { "epoch": 4.586990881458966, "grad_norm": 0.07282747294151719, "learning_rate": 3.865433493806705e-06, "loss": 0.5048, "step": 9431 }, { "epoch": 4.587477203647416, "grad_norm": 0.07608790871955881, "learning_rate": 3.864502286140038e-06, "loss": 0.4693, "step": 9432 }, { "epoch": 4.587963525835867, "grad_norm": 0.07747000345311038, "learning_rate": 3.8635711199990395e-06, "loss": 0.5343, "step": 9433 }, { "epoch": 4.588449848024316, "grad_norm": 0.07617283097140695, "learning_rate": 3.86263999541776e-06, "loss": 0.5176, "step": 9434 }, { "epoch": 4.588936170212766, "grad_norm": 0.07242281563116791, "learning_rate": 3.8617089124302546e-06, "loss": 0.4839, "step": 9435 }, { "epoch": 4.589422492401216, "grad_norm": 0.07376442049941472, "learning_rate": 3.86077787107057e-06, "loss": 0.4855, "step": 9436 }, { "epoch": 4.589908814589665, "grad_norm": 0.07177725289119948, "learning_rate": 3.8598468713727565e-06, "loss": 0.4884, "step": 9437 }, { "epoch": 4.590395136778115, "grad_norm": 0.07560285939052847, "learning_rate": 3.8589159133708616e-06, "loss": 0.5122, "step": 9438 }, { "epoch": 4.590881458966566, "grad_norm": 0.07901844861992932, "learning_rate": 3.857984997098928e-06, "loss": 0.4981, "step": 9439 }, { "epoch": 4.591367781155015, "grad_norm": 0.0760807200893336, "learning_rate": 3.857054122591004e-06, "loss": 0.5304, "step": 9440 }, { "epoch": 4.591854103343465, "grad_norm": 0.07452078713634845, "learning_rate": 3.856123289881129e-06, "loss": 0.5238, "step": 9441 }, { "epoch": 4.592340425531915, "grad_norm": 0.07525974081641466, "learning_rate": 3.855192499003347e-06, "loss": 0.5281, "step": 9442 }, { "epoch": 4.592826747720364, "grad_norm": 0.07539140432671773, "learning_rate": 3.854261749991693e-06, "loss": 0.5048, "step": 9443 }, { "epoch": 4.593313069908815, "grad_norm": 0.07750851584138688, "learning_rate": 3.8533310428802084e-06, "loss": 0.4981, "step": 9444 }, { "epoch": 4.593799392097265, "grad_norm": 0.07439655316628532, "learning_rate": 3.852400377702927e-06, "loss": 0.4947, "step": 9445 }, { "epoch": 4.594285714285714, "grad_norm": 0.07428810358142628, "learning_rate": 3.8514697544938885e-06, "loss": 0.4783, "step": 9446 }, { "epoch": 4.594772036474164, "grad_norm": 0.07332533353272915, "learning_rate": 3.85053917328712e-06, "loss": 0.5078, "step": 9447 }, { "epoch": 4.595258358662614, "grad_norm": 0.07650448476123038, "learning_rate": 3.849608634116657e-06, "loss": 0.5017, "step": 9448 }, { "epoch": 4.595744680851064, "grad_norm": 0.07211763738390814, "learning_rate": 3.848678137016528e-06, "loss": 0.4664, "step": 9449 }, { "epoch": 4.596231003039514, "grad_norm": 0.07484629803964395, "learning_rate": 3.8477476820207646e-06, "loss": 0.5244, "step": 9450 }, { "epoch": 4.596717325227964, "grad_norm": 0.07308498024444289, "learning_rate": 3.846817269163391e-06, "loss": 0.5293, "step": 9451 }, { "epoch": 4.597203647416413, "grad_norm": 0.07503528945239649, "learning_rate": 3.845886898478435e-06, "loss": 0.5008, "step": 9452 }, { "epoch": 4.597689969604863, "grad_norm": 0.07258533972228852, "learning_rate": 3.844956569999917e-06, "loss": 0.5253, "step": 9453 }, { "epoch": 4.5981762917933136, "grad_norm": 0.07456482892045778, "learning_rate": 3.8440262837618635e-06, "loss": 0.5363, "step": 9454 }, { "epoch": 4.598662613981763, "grad_norm": 0.07368446187337664, "learning_rate": 3.843096039798293e-06, "loss": 0.5005, "step": 9455 }, { "epoch": 4.599148936170213, "grad_norm": 0.07814073533624062, "learning_rate": 3.8421658381432275e-06, "loss": 0.5661, "step": 9456 }, { "epoch": 4.599635258358663, "grad_norm": 0.07601538028404108, "learning_rate": 3.841235678830682e-06, "loss": 0.5143, "step": 9457 }, { "epoch": 4.600121580547112, "grad_norm": 0.0721920364470967, "learning_rate": 3.840305561894675e-06, "loss": 0.5227, "step": 9458 }, { "epoch": 4.600607902735562, "grad_norm": 0.07459943025647815, "learning_rate": 3.839375487369219e-06, "loss": 0.5114, "step": 9459 }, { "epoch": 4.6010942249240125, "grad_norm": 0.0757330807119611, "learning_rate": 3.838445455288331e-06, "loss": 0.5064, "step": 9460 }, { "epoch": 4.601580547112462, "grad_norm": 0.07544646910788051, "learning_rate": 3.837515465686018e-06, "loss": 0.5312, "step": 9461 }, { "epoch": 4.602066869300912, "grad_norm": 0.07555236857900353, "learning_rate": 3.836585518596294e-06, "loss": 0.4915, "step": 9462 }, { "epoch": 4.602553191489362, "grad_norm": 0.077142049341222, "learning_rate": 3.835655614053165e-06, "loss": 0.5044, "step": 9463 }, { "epoch": 4.603039513677811, "grad_norm": 0.07314501582103763, "learning_rate": 3.83472575209064e-06, "loss": 0.507, "step": 9464 }, { "epoch": 4.603525835866261, "grad_norm": 0.07615035555205024, "learning_rate": 3.833795932742725e-06, "loss": 0.5202, "step": 9465 }, { "epoch": 4.6040121580547115, "grad_norm": 0.07462205267127738, "learning_rate": 3.83286615604342e-06, "loss": 0.518, "step": 9466 }, { "epoch": 4.604498480243161, "grad_norm": 0.07541329463386828, "learning_rate": 3.831936422026733e-06, "loss": 0.4938, "step": 9467 }, { "epoch": 4.604984802431611, "grad_norm": 0.07390541315198346, "learning_rate": 3.831006730726659e-06, "loss": 0.5035, "step": 9468 }, { "epoch": 4.605471124620061, "grad_norm": 0.0745351254956586, "learning_rate": 3.8300770821772015e-06, "loss": 0.4845, "step": 9469 }, { "epoch": 4.60595744680851, "grad_norm": 0.07554491562162251, "learning_rate": 3.8291474764123544e-06, "loss": 0.5214, "step": 9470 }, { "epoch": 4.606443768996961, "grad_norm": 0.07299659021121051, "learning_rate": 3.82821791346612e-06, "loss": 0.4669, "step": 9471 }, { "epoch": 4.6069300911854105, "grad_norm": 0.07422117763230204, "learning_rate": 3.827288393372486e-06, "loss": 0.4986, "step": 9472 }, { "epoch": 4.60741641337386, "grad_norm": 0.07423495880259527, "learning_rate": 3.826358916165448e-06, "loss": 0.5023, "step": 9473 }, { "epoch": 4.60790273556231, "grad_norm": 0.07123173110027327, "learning_rate": 3.825429481878999e-06, "loss": 0.485, "step": 9474 }, { "epoch": 4.60838905775076, "grad_norm": 0.0752127379143631, "learning_rate": 3.824500090547127e-06, "loss": 0.5205, "step": 9475 }, { "epoch": 4.60887537993921, "grad_norm": 0.07786879865888996, "learning_rate": 3.823570742203821e-06, "loss": 0.5764, "step": 9476 }, { "epoch": 4.60936170212766, "grad_norm": 0.07219552369975611, "learning_rate": 3.822641436883067e-06, "loss": 0.4678, "step": 9477 }, { "epoch": 4.6098480243161095, "grad_norm": 0.07564927326451448, "learning_rate": 3.8217121746188496e-06, "loss": 0.5271, "step": 9478 }, { "epoch": 4.610334346504559, "grad_norm": 0.07444991600551204, "learning_rate": 3.820782955445156e-06, "loss": 0.5105, "step": 9479 }, { "epoch": 4.610820668693009, "grad_norm": 0.07494161729830032, "learning_rate": 3.819853779395963e-06, "loss": 0.513, "step": 9480 }, { "epoch": 4.6113069908814595, "grad_norm": 0.07317616315481869, "learning_rate": 3.818924646505256e-06, "loss": 0.5227, "step": 9481 }, { "epoch": 4.611793313069909, "grad_norm": 0.07480035058675628, "learning_rate": 3.817995556807008e-06, "loss": 0.5204, "step": 9482 }, { "epoch": 4.612279635258359, "grad_norm": 0.07686659295049114, "learning_rate": 3.8170665103352015e-06, "loss": 0.5214, "step": 9483 }, { "epoch": 4.6127659574468085, "grad_norm": 0.07555703319714131, "learning_rate": 3.816137507123809e-06, "loss": 0.5031, "step": 9484 }, { "epoch": 4.613252279635258, "grad_norm": 0.07521412396946751, "learning_rate": 3.8152085472068074e-06, "loss": 0.5218, "step": 9485 }, { "epoch": 4.613738601823708, "grad_norm": 0.0743982844701979, "learning_rate": 3.8142796306181656e-06, "loss": 0.5322, "step": 9486 }, { "epoch": 4.614224924012158, "grad_norm": 0.0744369163396937, "learning_rate": 3.8133507573918575e-06, "loss": 0.5051, "step": 9487 }, { "epoch": 4.614711246200608, "grad_norm": 0.07193154401324867, "learning_rate": 3.8124219275618507e-06, "loss": 0.484, "step": 9488 }, { "epoch": 4.615197568389058, "grad_norm": 0.07481107910941633, "learning_rate": 3.811493141162115e-06, "loss": 0.4872, "step": 9489 }, { "epoch": 4.6156838905775075, "grad_norm": 0.07578475644720155, "learning_rate": 3.8105643982266137e-06, "loss": 0.5108, "step": 9490 }, { "epoch": 4.616170212765957, "grad_norm": 0.07497276982605097, "learning_rate": 3.8096356987893123e-06, "loss": 0.5185, "step": 9491 }, { "epoch": 4.616656534954407, "grad_norm": 0.07259339086909142, "learning_rate": 3.808707042884176e-06, "loss": 0.5006, "step": 9492 }, { "epoch": 4.617142857142857, "grad_norm": 0.07490271475208989, "learning_rate": 3.8077784305451628e-06, "loss": 0.5241, "step": 9493 }, { "epoch": 4.617629179331307, "grad_norm": 0.07878946988105262, "learning_rate": 3.806849861806235e-06, "loss": 0.5612, "step": 9494 }, { "epoch": 4.618115501519757, "grad_norm": 0.07225396036900433, "learning_rate": 3.8059213367013485e-06, "loss": 0.4998, "step": 9495 }, { "epoch": 4.6186018237082065, "grad_norm": 0.07332486475880097, "learning_rate": 3.804992855264464e-06, "loss": 0.486, "step": 9496 }, { "epoch": 4.619088145896656, "grad_norm": 0.0765174148555299, "learning_rate": 3.8040644175295304e-06, "loss": 0.5267, "step": 9497 }, { "epoch": 4.619574468085107, "grad_norm": 0.07567744715304449, "learning_rate": 3.8031360235305064e-06, "loss": 0.5532, "step": 9498 }, { "epoch": 4.620060790273556, "grad_norm": 0.07709301199733738, "learning_rate": 3.802207673301341e-06, "loss": 0.5125, "step": 9499 }, { "epoch": 4.620547112462006, "grad_norm": 0.07628850399117111, "learning_rate": 3.801279366875986e-06, "loss": 0.5311, "step": 9500 }, { "epoch": 4.621033434650456, "grad_norm": 0.07714273442330423, "learning_rate": 3.800351104288388e-06, "loss": 0.5068, "step": 9501 }, { "epoch": 4.6215197568389055, "grad_norm": 0.07295483953696705, "learning_rate": 3.7994228855724963e-06, "loss": 0.5164, "step": 9502 }, { "epoch": 4.622006079027356, "grad_norm": 0.07379443000323054, "learning_rate": 3.7984947107622536e-06, "loss": 0.5018, "step": 9503 }, { "epoch": 4.622492401215806, "grad_norm": 0.07213853633630675, "learning_rate": 3.797566579891607e-06, "loss": 0.5055, "step": 9504 }, { "epoch": 4.622978723404255, "grad_norm": 0.07396693713182244, "learning_rate": 3.7966384929944955e-06, "loss": 0.5012, "step": 9505 }, { "epoch": 4.623465045592705, "grad_norm": 0.07730228493107462, "learning_rate": 3.795710450104863e-06, "loss": 0.5316, "step": 9506 }, { "epoch": 4.623951367781155, "grad_norm": 0.07210203341271802, "learning_rate": 3.7947824512566443e-06, "loss": 0.5053, "step": 9507 }, { "epoch": 4.624437689969605, "grad_norm": 0.07471053322435454, "learning_rate": 3.79385449648378e-06, "loss": 0.5123, "step": 9508 }, { "epoch": 4.624924012158055, "grad_norm": 0.07514867551497609, "learning_rate": 3.7929265858202035e-06, "loss": 0.5271, "step": 9509 }, { "epoch": 4.625410334346505, "grad_norm": 0.07310418719063497, "learning_rate": 3.7919987192998526e-06, "loss": 0.5124, "step": 9510 }, { "epoch": 4.625896656534954, "grad_norm": 0.07580512671457157, "learning_rate": 3.791070896956655e-06, "loss": 0.5293, "step": 9511 }, { "epoch": 4.626382978723404, "grad_norm": 0.0766143247335217, "learning_rate": 3.7901431188245453e-06, "loss": 0.5075, "step": 9512 }, { "epoch": 4.626869300911854, "grad_norm": 0.0743828943028626, "learning_rate": 3.78921538493745e-06, "loss": 0.5132, "step": 9513 }, { "epoch": 4.6273556231003035, "grad_norm": 0.0759008034594148, "learning_rate": 3.7882876953293003e-06, "loss": 0.5191, "step": 9514 }, { "epoch": 4.627841945288754, "grad_norm": 0.07776551062692796, "learning_rate": 3.7873600500340178e-06, "loss": 0.5097, "step": 9515 }, { "epoch": 4.628328267477204, "grad_norm": 0.07306178262039249, "learning_rate": 3.7864324490855297e-06, "loss": 0.4821, "step": 9516 }, { "epoch": 4.628814589665653, "grad_norm": 0.07313781810878421, "learning_rate": 3.785504892517759e-06, "loss": 0.5076, "step": 9517 }, { "epoch": 4.629300911854103, "grad_norm": 0.07402287239839066, "learning_rate": 3.7845773803646247e-06, "loss": 0.5223, "step": 9518 }, { "epoch": 4.629787234042553, "grad_norm": 0.07046761284790008, "learning_rate": 3.7836499126600507e-06, "loss": 0.5048, "step": 9519 }, { "epoch": 4.630273556231003, "grad_norm": 0.07327775229916007, "learning_rate": 3.7827224894379494e-06, "loss": 0.4882, "step": 9520 }, { "epoch": 4.630759878419453, "grad_norm": 0.07440353791655598, "learning_rate": 3.781795110732242e-06, "loss": 0.4958, "step": 9521 }, { "epoch": 4.631246200607903, "grad_norm": 0.07320541664060844, "learning_rate": 3.780867776576839e-06, "loss": 0.5001, "step": 9522 }, { "epoch": 4.631732522796352, "grad_norm": 0.07616011703036604, "learning_rate": 3.7799404870056557e-06, "loss": 0.5149, "step": 9523 }, { "epoch": 4.632218844984802, "grad_norm": 0.07744333425585907, "learning_rate": 3.7790132420526026e-06, "loss": 0.5276, "step": 9524 }, { "epoch": 4.632705167173253, "grad_norm": 0.07566761043196567, "learning_rate": 3.7780860417515918e-06, "loss": 0.4892, "step": 9525 }, { "epoch": 4.633191489361702, "grad_norm": 0.07514088263625601, "learning_rate": 3.777158886136528e-06, "loss": 0.4967, "step": 9526 }, { "epoch": 4.633677811550152, "grad_norm": 0.07526882707414885, "learning_rate": 3.776231775241319e-06, "loss": 0.5281, "step": 9527 }, { "epoch": 4.634164133738602, "grad_norm": 0.07651193187251416, "learning_rate": 3.77530470909987e-06, "loss": 0.5375, "step": 9528 }, { "epoch": 4.634650455927051, "grad_norm": 0.07423840591000447, "learning_rate": 3.7743776877460864e-06, "loss": 0.496, "step": 9529 }, { "epoch": 4.635136778115502, "grad_norm": 0.073935256014923, "learning_rate": 3.7734507112138652e-06, "loss": 0.531, "step": 9530 }, { "epoch": 4.635623100303952, "grad_norm": 0.07194190894330107, "learning_rate": 3.7725237795371094e-06, "loss": 0.4954, "step": 9531 }, { "epoch": 4.636109422492401, "grad_norm": 0.07353560460273091, "learning_rate": 3.7715968927497167e-06, "loss": 0.5001, "step": 9532 }, { "epoch": 4.636595744680851, "grad_norm": 0.0746582790320707, "learning_rate": 3.770670050885585e-06, "loss": 0.4854, "step": 9533 }, { "epoch": 4.637082066869301, "grad_norm": 0.07372899887960643, "learning_rate": 3.769743253978606e-06, "loss": 0.5213, "step": 9534 }, { "epoch": 4.63756838905775, "grad_norm": 0.07212950839336363, "learning_rate": 3.7688165020626772e-06, "loss": 0.4936, "step": 9535 }, { "epoch": 4.638054711246201, "grad_norm": 0.07594817659848711, "learning_rate": 3.7678897951716863e-06, "loss": 0.5073, "step": 9536 }, { "epoch": 4.638541033434651, "grad_norm": 0.07476370620708332, "learning_rate": 3.766963133339526e-06, "loss": 0.5196, "step": 9537 }, { "epoch": 4.6390273556231, "grad_norm": 0.07584518566263565, "learning_rate": 3.7660365166000834e-06, "loss": 0.5392, "step": 9538 }, { "epoch": 4.63951367781155, "grad_norm": 0.07569146466071262, "learning_rate": 3.7651099449872485e-06, "loss": 0.5675, "step": 9539 }, { "epoch": 4.64, "grad_norm": 0.07259531974374066, "learning_rate": 3.7641834185349014e-06, "loss": 0.495, "step": 9540 }, { "epoch": 4.640486322188449, "grad_norm": 0.07503499095375923, "learning_rate": 3.7632569372769294e-06, "loss": 0.5112, "step": 9541 }, { "epoch": 4.6409726443769, "grad_norm": 0.07606440715202127, "learning_rate": 3.762330501247212e-06, "loss": 0.5251, "step": 9542 }, { "epoch": 4.64145896656535, "grad_norm": 0.07130470785763345, "learning_rate": 3.7614041104796307e-06, "loss": 0.496, "step": 9543 }, { "epoch": 4.641945288753799, "grad_norm": 0.07615671337157306, "learning_rate": 3.7604777650080654e-06, "loss": 0.5101, "step": 9544 }, { "epoch": 4.642431610942249, "grad_norm": 0.07192905081790707, "learning_rate": 3.7595514648663894e-06, "loss": 0.5223, "step": 9545 }, { "epoch": 4.642917933130699, "grad_norm": 0.07371448061533994, "learning_rate": 3.758625210088482e-06, "loss": 0.5033, "step": 9546 }, { "epoch": 4.643404255319149, "grad_norm": 0.07649761607323644, "learning_rate": 3.7576990007082125e-06, "loss": 0.5146, "step": 9547 }, { "epoch": 4.643890577507599, "grad_norm": 0.07057738478422075, "learning_rate": 3.7567728367594564e-06, "loss": 0.4798, "step": 9548 }, { "epoch": 4.644376899696049, "grad_norm": 0.07438195516742215, "learning_rate": 3.755846718276081e-06, "loss": 0.483, "step": 9549 }, { "epoch": 4.644863221884498, "grad_norm": 0.07330784264288097, "learning_rate": 3.7549206452919584e-06, "loss": 0.5313, "step": 9550 }, { "epoch": 4.645349544072948, "grad_norm": 0.07289138818300984, "learning_rate": 3.753994617840952e-06, "loss": 0.4888, "step": 9551 }, { "epoch": 4.6458358662613986, "grad_norm": 0.07465262810113721, "learning_rate": 3.753068635956929e-06, "loss": 0.5034, "step": 9552 }, { "epoch": 4.646322188449848, "grad_norm": 0.07542769098137869, "learning_rate": 3.7521426996737516e-06, "loss": 0.5433, "step": 9553 }, { "epoch": 4.646808510638298, "grad_norm": 0.0749693442702763, "learning_rate": 3.751216809025285e-06, "loss": 0.5346, "step": 9554 }, { "epoch": 4.647294832826748, "grad_norm": 0.07466763207742935, "learning_rate": 3.750290964045384e-06, "loss": 0.4994, "step": 9555 }, { "epoch": 4.647781155015197, "grad_norm": 0.07250374001258861, "learning_rate": 3.749365164767912e-06, "loss": 0.4926, "step": 9556 }, { "epoch": 4.648267477203648, "grad_norm": 0.07570652146570055, "learning_rate": 3.748439411226723e-06, "loss": 0.5569, "step": 9557 }, { "epoch": 4.6487537993920975, "grad_norm": 0.07421932023576838, "learning_rate": 3.7475137034556753e-06, "loss": 0.5148, "step": 9558 }, { "epoch": 4.649240121580547, "grad_norm": 0.07457992476791968, "learning_rate": 3.746588041488619e-06, "loss": 0.473, "step": 9559 }, { "epoch": 4.649726443768997, "grad_norm": 0.07497857139757647, "learning_rate": 3.7456624253594087e-06, "loss": 0.518, "step": 9560 }, { "epoch": 4.650212765957447, "grad_norm": 0.07494900953534446, "learning_rate": 3.7447368551018916e-06, "loss": 0.5081, "step": 9561 }, { "epoch": 4.650699088145896, "grad_norm": 0.07442428162605412, "learning_rate": 3.743811330749919e-06, "loss": 0.4999, "step": 9562 }, { "epoch": 4.651185410334347, "grad_norm": 0.07535008447958119, "learning_rate": 3.742885852337336e-06, "loss": 0.5459, "step": 9563 }, { "epoch": 4.6516717325227965, "grad_norm": 0.07228302431615818, "learning_rate": 3.741960419897991e-06, "loss": 0.5024, "step": 9564 }, { "epoch": 4.652158054711246, "grad_norm": 0.07455600044653893, "learning_rate": 3.7410350334657218e-06, "loss": 0.5034, "step": 9565 }, { "epoch": 4.652644376899696, "grad_norm": 0.07641868902654676, "learning_rate": 3.7401096930743753e-06, "loss": 0.5378, "step": 9566 }, { "epoch": 4.653130699088146, "grad_norm": 0.0732124516137757, "learning_rate": 3.739184398757788e-06, "loss": 0.5223, "step": 9567 }, { "epoch": 4.653617021276595, "grad_norm": 0.07282143747930755, "learning_rate": 3.738259150549803e-06, "loss": 0.5079, "step": 9568 }, { "epoch": 4.654103343465046, "grad_norm": 0.07124438143144253, "learning_rate": 3.737333948484251e-06, "loss": 0.4786, "step": 9569 }, { "epoch": 4.6545896656534955, "grad_norm": 0.07273540222266, "learning_rate": 3.736408792594971e-06, "loss": 0.4768, "step": 9570 }, { "epoch": 4.655075987841945, "grad_norm": 0.07673260424922278, "learning_rate": 3.735483682915796e-06, "loss": 0.5525, "step": 9571 }, { "epoch": 4.655562310030395, "grad_norm": 0.07600713076263813, "learning_rate": 3.7345586194805562e-06, "loss": 0.5128, "step": 9572 }, { "epoch": 4.656048632218845, "grad_norm": 0.07559240948015863, "learning_rate": 3.7336336023230853e-06, "loss": 0.5265, "step": 9573 }, { "epoch": 4.656534954407295, "grad_norm": 0.07284962303963942, "learning_rate": 3.7327086314772064e-06, "loss": 0.4991, "step": 9574 }, { "epoch": 4.657021276595745, "grad_norm": 0.0784734662661495, "learning_rate": 3.7317837069767505e-06, "loss": 0.5702, "step": 9575 }, { "epoch": 4.6575075987841945, "grad_norm": 0.07428019499799277, "learning_rate": 3.730858828855539e-06, "loss": 0.5273, "step": 9576 }, { "epoch": 4.657993920972644, "grad_norm": 0.07618090946263137, "learning_rate": 3.7299339971473973e-06, "loss": 0.5254, "step": 9577 }, { "epoch": 4.658480243161094, "grad_norm": 0.07280468127081419, "learning_rate": 3.7290092118861454e-06, "loss": 0.5074, "step": 9578 }, { "epoch": 4.6589665653495445, "grad_norm": 0.07329129289431018, "learning_rate": 3.7280844731056066e-06, "loss": 0.4986, "step": 9579 }, { "epoch": 4.659452887537994, "grad_norm": 0.07223412735139698, "learning_rate": 3.727159780839594e-06, "loss": 0.5047, "step": 9580 }, { "epoch": 4.659939209726444, "grad_norm": 0.07498428933515157, "learning_rate": 3.726235135121927e-06, "loss": 0.5122, "step": 9581 }, { "epoch": 4.6604255319148935, "grad_norm": 0.07291334164386905, "learning_rate": 3.72531053598642e-06, "loss": 0.4875, "step": 9582 }, { "epoch": 4.660911854103343, "grad_norm": 0.07706097745118637, "learning_rate": 3.724385983466887e-06, "loss": 0.5232, "step": 9583 }, { "epoch": 4.661398176291794, "grad_norm": 0.07339697845810081, "learning_rate": 3.7234614775971366e-06, "loss": 0.4765, "step": 9584 }, { "epoch": 4.661884498480243, "grad_norm": 0.0744146720615144, "learning_rate": 3.7225370184109814e-06, "loss": 0.5079, "step": 9585 }, { "epoch": 4.662370820668693, "grad_norm": 0.07077348494825894, "learning_rate": 3.7216126059422263e-06, "loss": 0.4756, "step": 9586 }, { "epoch": 4.662857142857143, "grad_norm": 0.07274179623828411, "learning_rate": 3.7206882402246796e-06, "loss": 0.4904, "step": 9587 }, { "epoch": 4.6633434650455925, "grad_norm": 0.07598361193116747, "learning_rate": 3.7197639212921445e-06, "loss": 0.5012, "step": 9588 }, { "epoch": 4.663829787234042, "grad_norm": 0.07176317379494655, "learning_rate": 3.7188396491784262e-06, "loss": 0.5031, "step": 9589 }, { "epoch": 4.664316109422493, "grad_norm": 0.07407781378437643, "learning_rate": 3.717915423917322e-06, "loss": 0.5285, "step": 9590 }, { "epoch": 4.664802431610942, "grad_norm": 0.07485449954704698, "learning_rate": 3.7169912455426348e-06, "loss": 0.5202, "step": 9591 }, { "epoch": 4.665288753799392, "grad_norm": 0.07247835616473004, "learning_rate": 3.716067114088159e-06, "loss": 0.5009, "step": 9592 }, { "epoch": 4.665775075987842, "grad_norm": 0.07392328061597725, "learning_rate": 3.7151430295876943e-06, "loss": 0.5076, "step": 9593 }, { "epoch": 4.6662613981762915, "grad_norm": 0.0758691485924479, "learning_rate": 3.7142189920750304e-06, "loss": 0.5576, "step": 9594 }, { "epoch": 4.666747720364741, "grad_norm": 0.07660410457253233, "learning_rate": 3.713295001583963e-06, "loss": 0.5115, "step": 9595 }, { "epoch": 4.667234042553192, "grad_norm": 0.07594373902212093, "learning_rate": 3.712371058148282e-06, "loss": 0.5166, "step": 9596 }, { "epoch": 4.667720364741641, "grad_norm": 0.07230989221290264, "learning_rate": 3.7114471618017756e-06, "loss": 0.5082, "step": 9597 }, { "epoch": 4.668206686930091, "grad_norm": 0.07928490318333174, "learning_rate": 3.710523312578235e-06, "loss": 0.5069, "step": 9598 }, { "epoch": 4.668693009118541, "grad_norm": 0.07396771480513312, "learning_rate": 3.709599510511439e-06, "loss": 0.5415, "step": 9599 }, { "epoch": 4.6691793313069905, "grad_norm": 0.07424907094946222, "learning_rate": 3.708675755635178e-06, "loss": 0.5169, "step": 9600 }, { "epoch": 4.669665653495441, "grad_norm": 0.0756231051950839, "learning_rate": 3.7077520479832296e-06, "loss": 0.506, "step": 9601 }, { "epoch": 4.670151975683891, "grad_norm": 0.07387786097719395, "learning_rate": 3.706828387589377e-06, "loss": 0.537, "step": 9602 }, { "epoch": 4.67063829787234, "grad_norm": 0.0763183024763555, "learning_rate": 3.705904774487396e-06, "loss": 0.5257, "step": 9603 }, { "epoch": 4.67112462006079, "grad_norm": 0.07472397518631525, "learning_rate": 3.704981208711068e-06, "loss": 0.5007, "step": 9604 }, { "epoch": 4.67161094224924, "grad_norm": 0.07492416020627674, "learning_rate": 3.7040576902941634e-06, "loss": 0.5256, "step": 9605 }, { "epoch": 4.67209726443769, "grad_norm": 0.07485880416766108, "learning_rate": 3.7031342192704588e-06, "loss": 0.5325, "step": 9606 }, { "epoch": 4.67258358662614, "grad_norm": 0.07158234307150661, "learning_rate": 3.7022107956737234e-06, "loss": 0.4915, "step": 9607 }, { "epoch": 4.67306990881459, "grad_norm": 0.07225528992868703, "learning_rate": 3.7012874195377315e-06, "loss": 0.529, "step": 9608 }, { "epoch": 4.673556231003039, "grad_norm": 0.07225706820737202, "learning_rate": 3.700364090896247e-06, "loss": 0.4774, "step": 9609 }, { "epoch": 4.674042553191489, "grad_norm": 0.07561806125101941, "learning_rate": 3.699440809783038e-06, "loss": 0.5288, "step": 9610 }, { "epoch": 4.67452887537994, "grad_norm": 0.07410462174182866, "learning_rate": 3.6985175762318694e-06, "loss": 0.549, "step": 9611 }, { "epoch": 4.675015197568389, "grad_norm": 0.07858269014990575, "learning_rate": 3.6975943902765064e-06, "loss": 0.5299, "step": 9612 }, { "epoch": 4.675501519756839, "grad_norm": 0.07303523938707543, "learning_rate": 3.6966712519507052e-06, "loss": 0.4974, "step": 9613 }, { "epoch": 4.675987841945289, "grad_norm": 0.0721016590337247, "learning_rate": 3.695748161288232e-06, "loss": 0.5052, "step": 9614 }, { "epoch": 4.676474164133738, "grad_norm": 0.07364381736590409, "learning_rate": 3.6948251183228377e-06, "loss": 0.4833, "step": 9615 }, { "epoch": 4.676960486322188, "grad_norm": 0.07653172132265926, "learning_rate": 3.693902123088284e-06, "loss": 0.5071, "step": 9616 }, { "epoch": 4.677446808510639, "grad_norm": 0.07286093668446615, "learning_rate": 3.692979175618321e-06, "loss": 0.519, "step": 9617 }, { "epoch": 4.677933130699088, "grad_norm": 0.07161945261191206, "learning_rate": 3.692056275946706e-06, "loss": 0.4954, "step": 9618 }, { "epoch": 4.678419452887538, "grad_norm": 0.07332748044486957, "learning_rate": 3.691133424107185e-06, "loss": 0.553, "step": 9619 }, { "epoch": 4.678905775075988, "grad_norm": 0.07468228670131369, "learning_rate": 3.6902106201335104e-06, "loss": 0.5327, "step": 9620 }, { "epoch": 4.679392097264437, "grad_norm": 0.07594165262276767, "learning_rate": 3.689287864059427e-06, "loss": 0.5462, "step": 9621 }, { "epoch": 4.679878419452887, "grad_norm": 0.07207785603150786, "learning_rate": 3.6883651559186822e-06, "loss": 0.5096, "step": 9622 }, { "epoch": 4.680364741641338, "grad_norm": 0.0727797728628278, "learning_rate": 3.6874424957450215e-06, "loss": 0.494, "step": 9623 }, { "epoch": 4.680851063829787, "grad_norm": 0.07667396172812461, "learning_rate": 3.686519883572184e-06, "loss": 0.5044, "step": 9624 }, { "epoch": 4.681337386018237, "grad_norm": 0.07316306043841397, "learning_rate": 3.6855973194339113e-06, "loss": 0.5144, "step": 9625 }, { "epoch": 4.681823708206687, "grad_norm": 0.07602396817093657, "learning_rate": 3.6846748033639402e-06, "loss": 0.5104, "step": 9626 }, { "epoch": 4.682310030395136, "grad_norm": 0.07441150851758274, "learning_rate": 3.683752335396012e-06, "loss": 0.5013, "step": 9627 }, { "epoch": 4.682796352583587, "grad_norm": 0.0743595482002386, "learning_rate": 3.682829915563857e-06, "loss": 0.5006, "step": 9628 }, { "epoch": 4.683282674772037, "grad_norm": 0.07746167588075516, "learning_rate": 3.681907543901212e-06, "loss": 0.5332, "step": 9629 }, { "epoch": 4.683768996960486, "grad_norm": 0.07446019977027796, "learning_rate": 3.6809852204418045e-06, "loss": 0.4915, "step": 9630 }, { "epoch": 4.684255319148936, "grad_norm": 0.07212699422511144, "learning_rate": 3.6800629452193683e-06, "loss": 0.4902, "step": 9631 }, { "epoch": 4.684741641337386, "grad_norm": 0.07138086466157413, "learning_rate": 3.6791407182676287e-06, "loss": 0.4808, "step": 9632 }, { "epoch": 4.685227963525836, "grad_norm": 0.07541732465208206, "learning_rate": 3.678218539620315e-06, "loss": 0.4978, "step": 9633 }, { "epoch": 4.685714285714286, "grad_norm": 0.07398786260987969, "learning_rate": 3.6772964093111486e-06, "loss": 0.4843, "step": 9634 }, { "epoch": 4.686200607902736, "grad_norm": 0.07873925613633227, "learning_rate": 3.676374327373854e-06, "loss": 0.5216, "step": 9635 }, { "epoch": 4.686686930091185, "grad_norm": 0.07280369958394933, "learning_rate": 3.67545229384215e-06, "loss": 0.5104, "step": 9636 }, { "epoch": 4.687173252279635, "grad_norm": 0.07148377818397378, "learning_rate": 3.67453030874976e-06, "loss": 0.4434, "step": 9637 }, { "epoch": 4.687659574468086, "grad_norm": 0.07630968900013077, "learning_rate": 3.6736083721303966e-06, "loss": 0.5507, "step": 9638 }, { "epoch": 4.688145896656535, "grad_norm": 0.07264716097839222, "learning_rate": 3.67268648401778e-06, "loss": 0.5066, "step": 9639 }, { "epoch": 4.688632218844985, "grad_norm": 0.0729846544579363, "learning_rate": 3.6717646444456196e-06, "loss": 0.5157, "step": 9640 }, { "epoch": 4.689118541033435, "grad_norm": 0.07558655287620303, "learning_rate": 3.6708428534476302e-06, "loss": 0.5394, "step": 9641 }, { "epoch": 4.689604863221884, "grad_norm": 0.07425848146701212, "learning_rate": 3.6699211110575206e-06, "loss": 0.5255, "step": 9642 }, { "epoch": 4.690091185410334, "grad_norm": 0.0720756011348175, "learning_rate": 3.6689994173090025e-06, "loss": 0.4843, "step": 9643 }, { "epoch": 4.690577507598785, "grad_norm": 0.07355597179477104, "learning_rate": 3.6680777722357787e-06, "loss": 0.5149, "step": 9644 }, { "epoch": 4.691063829787234, "grad_norm": 0.07375458795722081, "learning_rate": 3.6671561758715564e-06, "loss": 0.5016, "step": 9645 }, { "epoch": 4.691550151975684, "grad_norm": 0.0757619720418, "learning_rate": 3.6662346282500373e-06, "loss": 0.5217, "step": 9646 }, { "epoch": 4.692036474164134, "grad_norm": 0.07626029129190803, "learning_rate": 3.6653131294049236e-06, "loss": 0.5357, "step": 9647 }, { "epoch": 4.692522796352583, "grad_norm": 0.07318033886282783, "learning_rate": 3.6643916793699175e-06, "loss": 0.524, "step": 9648 }, { "epoch": 4.693009118541033, "grad_norm": 0.07355118118920227, "learning_rate": 3.6634702781787122e-06, "loss": 0.5034, "step": 9649 }, { "epoch": 4.6934954407294835, "grad_norm": 0.0712011619777786, "learning_rate": 3.662548925865008e-06, "loss": 0.4835, "step": 9650 }, { "epoch": 4.693981762917933, "grad_norm": 0.07329112172033672, "learning_rate": 3.6616276224624947e-06, "loss": 0.5079, "step": 9651 }, { "epoch": 4.694468085106383, "grad_norm": 0.07397830532987652, "learning_rate": 3.6607063680048706e-06, "loss": 0.5474, "step": 9652 }, { "epoch": 4.694954407294833, "grad_norm": 0.07241938745143882, "learning_rate": 3.6597851625258205e-06, "loss": 0.502, "step": 9653 }, { "epoch": 4.695440729483282, "grad_norm": 0.07393857469957417, "learning_rate": 3.658864006059038e-06, "loss": 0.5219, "step": 9654 }, { "epoch": 4.695927051671733, "grad_norm": 0.07075624955625197, "learning_rate": 3.657942898638206e-06, "loss": 0.4801, "step": 9655 }, { "epoch": 4.6964133738601825, "grad_norm": 0.0769859262197467, "learning_rate": 3.6570218402970124e-06, "loss": 0.543, "step": 9656 }, { "epoch": 4.696899696048632, "grad_norm": 0.07896110797264357, "learning_rate": 3.6561008310691405e-06, "loss": 0.5629, "step": 9657 }, { "epoch": 4.697386018237082, "grad_norm": 0.07898354688370803, "learning_rate": 3.655179870988273e-06, "loss": 0.5696, "step": 9658 }, { "epoch": 4.697872340425532, "grad_norm": 0.07524286310016723, "learning_rate": 3.654258960088087e-06, "loss": 0.524, "step": 9659 }, { "epoch": 4.698358662613982, "grad_norm": 0.077592247965106, "learning_rate": 3.6533380984022625e-06, "loss": 0.531, "step": 9660 }, { "epoch": 4.698844984802432, "grad_norm": 0.07357493558132225, "learning_rate": 3.6524172859644752e-06, "loss": 0.5061, "step": 9661 }, { "epoch": 4.6993313069908815, "grad_norm": 0.07438941113371288, "learning_rate": 3.651496522808402e-06, "loss": 0.5403, "step": 9662 }, { "epoch": 4.699817629179331, "grad_norm": 0.0710591043464617, "learning_rate": 3.650575808967711e-06, "loss": 0.507, "step": 9663 }, { "epoch": 4.700303951367781, "grad_norm": 0.07444801583242466, "learning_rate": 3.6496551444760773e-06, "loss": 0.4813, "step": 9664 }, { "epoch": 4.7007902735562315, "grad_norm": 0.07364107287050706, "learning_rate": 3.6487345293671673e-06, "loss": 0.522, "step": 9665 }, { "epoch": 4.701276595744681, "grad_norm": 0.07418207340608211, "learning_rate": 3.647813963674651e-06, "loss": 0.5324, "step": 9666 }, { "epoch": 4.701762917933131, "grad_norm": 0.07086389652058153, "learning_rate": 3.6468934474321916e-06, "loss": 0.4841, "step": 9667 }, { "epoch": 4.7022492401215805, "grad_norm": 0.07399552472498022, "learning_rate": 3.6459729806734544e-06, "loss": 0.5384, "step": 9668 }, { "epoch": 4.70273556231003, "grad_norm": 0.07131064160335929, "learning_rate": 3.6450525634320986e-06, "loss": 0.4815, "step": 9669 }, { "epoch": 4.70322188449848, "grad_norm": 0.0752254813076923, "learning_rate": 3.6441321957417874e-06, "loss": 0.4824, "step": 9670 }, { "epoch": 4.7037082066869305, "grad_norm": 0.07431840307812927, "learning_rate": 3.6432118776361767e-06, "loss": 0.5216, "step": 9671 }, { "epoch": 4.70419452887538, "grad_norm": 0.07594438156081466, "learning_rate": 3.642291609148927e-06, "loss": 0.506, "step": 9672 }, { "epoch": 4.70468085106383, "grad_norm": 0.07244197669566692, "learning_rate": 3.641371390313687e-06, "loss": 0.4997, "step": 9673 }, { "epoch": 4.7051671732522795, "grad_norm": 0.07507202720339307, "learning_rate": 3.6404512211641123e-06, "loss": 0.5336, "step": 9674 }, { "epoch": 4.705653495440729, "grad_norm": 0.07385520390171911, "learning_rate": 3.639531101733856e-06, "loss": 0.5421, "step": 9675 }, { "epoch": 4.706139817629179, "grad_norm": 0.07262138845245943, "learning_rate": 3.6386110320565636e-06, "loss": 0.5078, "step": 9676 }, { "epoch": 4.7066261398176295, "grad_norm": 0.07440605342391532, "learning_rate": 3.6376910121658867e-06, "loss": 0.5393, "step": 9677 }, { "epoch": 4.707112462006079, "grad_norm": 0.07486882996441581, "learning_rate": 3.636771042095466e-06, "loss": 0.5017, "step": 9678 }, { "epoch": 4.707598784194529, "grad_norm": 0.07302965695974867, "learning_rate": 3.6358511218789507e-06, "loss": 0.4882, "step": 9679 }, { "epoch": 4.7080851063829785, "grad_norm": 0.07416681013074236, "learning_rate": 3.6349312515499765e-06, "loss": 0.4822, "step": 9680 }, { "epoch": 4.708571428571428, "grad_norm": 0.07514982724806697, "learning_rate": 3.634011431142188e-06, "loss": 0.5242, "step": 9681 }, { "epoch": 4.709057750759879, "grad_norm": 0.07673511482193901, "learning_rate": 3.6330916606892208e-06, "loss": 0.5496, "step": 9682 }, { "epoch": 4.709544072948328, "grad_norm": 0.07297530854212485, "learning_rate": 3.6321719402247144e-06, "loss": 0.4859, "step": 9683 }, { "epoch": 4.710030395136778, "grad_norm": 0.0739553311762443, "learning_rate": 3.6312522697823004e-06, "loss": 0.5089, "step": 9684 }, { "epoch": 4.710516717325228, "grad_norm": 0.07191695130023956, "learning_rate": 3.630332649395614e-06, "loss": 0.4973, "step": 9685 }, { "epoch": 4.7110030395136775, "grad_norm": 0.07343181185032584, "learning_rate": 3.629413079098282e-06, "loss": 0.5336, "step": 9686 }, { "epoch": 4.711489361702128, "grad_norm": 0.07387737643779135, "learning_rate": 3.62849355892394e-06, "loss": 0.5037, "step": 9687 }, { "epoch": 4.711975683890578, "grad_norm": 0.07630022820497294, "learning_rate": 3.6275740889062095e-06, "loss": 0.5284, "step": 9688 }, { "epoch": 4.712462006079027, "grad_norm": 0.07309824966081761, "learning_rate": 3.6266546690787187e-06, "loss": 0.4959, "step": 9689 }, { "epoch": 4.712948328267477, "grad_norm": 0.0731185972075894, "learning_rate": 3.6257352994750895e-06, "loss": 0.4953, "step": 9690 }, { "epoch": 4.713434650455927, "grad_norm": 0.07552762648953919, "learning_rate": 3.624815980128947e-06, "loss": 0.489, "step": 9691 }, { "epoch": 4.713920972644377, "grad_norm": 0.0749982077456791, "learning_rate": 3.623896711073907e-06, "loss": 0.5144, "step": 9692 }, { "epoch": 4.714407294832827, "grad_norm": 0.07537583112118984, "learning_rate": 3.6229774923435913e-06, "loss": 0.5192, "step": 9693 }, { "epoch": 4.714893617021277, "grad_norm": 0.07466143128530704, "learning_rate": 3.622058323971612e-06, "loss": 0.5143, "step": 9694 }, { "epoch": 4.715379939209726, "grad_norm": 0.07397705973358144, "learning_rate": 3.6211392059915878e-06, "loss": 0.5007, "step": 9695 }, { "epoch": 4.715866261398176, "grad_norm": 0.0752079888287382, "learning_rate": 3.6202201384371275e-06, "loss": 0.5337, "step": 9696 }, { "epoch": 4.716352583586626, "grad_norm": 0.07629116753465477, "learning_rate": 3.619301121341846e-06, "loss": 0.5086, "step": 9697 }, { "epoch": 4.716838905775076, "grad_norm": 0.07270555575141048, "learning_rate": 3.6183821547393473e-06, "loss": 0.5093, "step": 9698 }, { "epoch": 4.717325227963526, "grad_norm": 0.07329952883677855, "learning_rate": 3.617463238663241e-06, "loss": 0.5054, "step": 9699 }, { "epoch": 4.717811550151976, "grad_norm": 0.07177485735913473, "learning_rate": 3.616544373147134e-06, "loss": 0.4863, "step": 9700 }, { "epoch": 4.718297872340425, "grad_norm": 0.07249512808292269, "learning_rate": 3.615625558224626e-06, "loss": 0.5161, "step": 9701 }, { "epoch": 4.718784194528875, "grad_norm": 0.0744781437103124, "learning_rate": 3.6147067939293225e-06, "loss": 0.497, "step": 9702 }, { "epoch": 4.719270516717325, "grad_norm": 0.07123421964173951, "learning_rate": 3.6137880802948187e-06, "loss": 0.4848, "step": 9703 }, { "epoch": 4.719756838905775, "grad_norm": 0.07591929736847168, "learning_rate": 3.612869417354716e-06, "loss": 0.5122, "step": 9704 }, { "epoch": 4.720243161094225, "grad_norm": 0.07436117337444799, "learning_rate": 3.6119508051426074e-06, "loss": 0.5124, "step": 9705 }, { "epoch": 4.720729483282675, "grad_norm": 0.07712767828042628, "learning_rate": 3.6110322436920907e-06, "loss": 0.5168, "step": 9706 }, { "epoch": 4.721215805471124, "grad_norm": 0.07308873909564961, "learning_rate": 3.610113733036754e-06, "loss": 0.4883, "step": 9707 }, { "epoch": 4.721702127659574, "grad_norm": 0.07182223928762813, "learning_rate": 3.6091952732101914e-06, "loss": 0.4941, "step": 9708 }, { "epoch": 4.722188449848025, "grad_norm": 0.07365357247373885, "learning_rate": 3.6082768642459874e-06, "loss": 0.5118, "step": 9709 }, { "epoch": 4.722674772036474, "grad_norm": 0.07410324829943231, "learning_rate": 3.6073585061777317e-06, "loss": 0.5324, "step": 9710 }, { "epoch": 4.723161094224924, "grad_norm": 0.07193067339448064, "learning_rate": 3.6064401990390073e-06, "loss": 0.4812, "step": 9711 }, { "epoch": 4.723647416413374, "grad_norm": 0.07651798257715141, "learning_rate": 3.6055219428634004e-06, "loss": 0.4877, "step": 9712 }, { "epoch": 4.724133738601823, "grad_norm": 0.07176851181679521, "learning_rate": 3.6046037376844874e-06, "loss": 0.4793, "step": 9713 }, { "epoch": 4.724620060790274, "grad_norm": 0.07150576438483817, "learning_rate": 3.60368558353585e-06, "loss": 0.4693, "step": 9714 }, { "epoch": 4.725106382978724, "grad_norm": 0.07283998278940824, "learning_rate": 3.6027674804510648e-06, "loss": 0.5048, "step": 9715 }, { "epoch": 4.725592705167173, "grad_norm": 0.07580647852892412, "learning_rate": 3.6018494284637096e-06, "loss": 0.4965, "step": 9716 }, { "epoch": 4.726079027355623, "grad_norm": 0.0752706878266174, "learning_rate": 3.6009314276073543e-06, "loss": 0.5062, "step": 9717 }, { "epoch": 4.726565349544073, "grad_norm": 0.07383080829912109, "learning_rate": 3.6000134779155727e-06, "loss": 0.4723, "step": 9718 }, { "epoch": 4.727051671732523, "grad_norm": 0.07044824552727634, "learning_rate": 3.5990955794219335e-06, "loss": 0.4937, "step": 9719 }, { "epoch": 4.727537993920973, "grad_norm": 0.07434810067840288, "learning_rate": 3.5981777321600077e-06, "loss": 0.5173, "step": 9720 }, { "epoch": 4.728024316109423, "grad_norm": 0.07265568359586524, "learning_rate": 3.5972599361633564e-06, "loss": 0.4718, "step": 9721 }, { "epoch": 4.728510638297872, "grad_norm": 0.07551258315662726, "learning_rate": 3.5963421914655492e-06, "loss": 0.5122, "step": 9722 }, { "epoch": 4.728996960486322, "grad_norm": 0.07652969660092714, "learning_rate": 3.595424498100144e-06, "loss": 0.493, "step": 9723 }, { "epoch": 4.729483282674772, "grad_norm": 0.07578458077270042, "learning_rate": 3.5945068561007037e-06, "loss": 0.5226, "step": 9724 }, { "epoch": 4.729969604863222, "grad_norm": 0.07364530256160033, "learning_rate": 3.593589265500784e-06, "loss": 0.4909, "step": 9725 }, { "epoch": 4.730455927051672, "grad_norm": 0.07222076382519117, "learning_rate": 3.5926717263339458e-06, "loss": 0.4647, "step": 9726 }, { "epoch": 4.730942249240122, "grad_norm": 0.07441978681154107, "learning_rate": 3.5917542386337427e-06, "loss": 0.5091, "step": 9727 }, { "epoch": 4.731428571428571, "grad_norm": 0.075908317255612, "learning_rate": 3.590836802433725e-06, "loss": 0.5296, "step": 9728 }, { "epoch": 4.731914893617021, "grad_norm": 0.07281737757326617, "learning_rate": 3.589919417767447e-06, "loss": 0.4821, "step": 9729 }, { "epoch": 4.732401215805471, "grad_norm": 0.07147181446720538, "learning_rate": 3.5890020846684557e-06, "loss": 0.4894, "step": 9730 }, { "epoch": 4.732887537993921, "grad_norm": 0.07405245263519245, "learning_rate": 3.5880848031703007e-06, "loss": 0.5141, "step": 9731 }, { "epoch": 4.733373860182371, "grad_norm": 0.07212471909025862, "learning_rate": 3.587167573306525e-06, "loss": 0.5183, "step": 9732 }, { "epoch": 4.733860182370821, "grad_norm": 0.07246583488246103, "learning_rate": 3.5862503951106738e-06, "loss": 0.5127, "step": 9733 }, { "epoch": 4.73434650455927, "grad_norm": 0.07313886099581665, "learning_rate": 3.585333268616286e-06, "loss": 0.4778, "step": 9734 }, { "epoch": 4.73483282674772, "grad_norm": 0.07713045774273816, "learning_rate": 3.5844161938569044e-06, "loss": 0.4919, "step": 9735 }, { "epoch": 4.735319148936171, "grad_norm": 0.07886834976454239, "learning_rate": 3.5834991708660648e-06, "loss": 0.5594, "step": 9736 }, { "epoch": 4.73580547112462, "grad_norm": 0.07335635439637181, "learning_rate": 3.5825821996773067e-06, "loss": 0.5094, "step": 9737 }, { "epoch": 4.73629179331307, "grad_norm": 0.07388011356129526, "learning_rate": 3.5816652803241593e-06, "loss": 0.5072, "step": 9738 }, { "epoch": 4.73677811550152, "grad_norm": 0.07403671210681402, "learning_rate": 3.5807484128401577e-06, "loss": 0.524, "step": 9739 }, { "epoch": 4.737264437689969, "grad_norm": 0.07226507872097664, "learning_rate": 3.5798315972588306e-06, "loss": 0.4812, "step": 9740 }, { "epoch": 4.73775075987842, "grad_norm": 0.07565678335867206, "learning_rate": 3.5789148336137085e-06, "loss": 0.5327, "step": 9741 }, { "epoch": 4.7382370820668696, "grad_norm": 0.07752654608280349, "learning_rate": 3.5779981219383153e-06, "loss": 0.5104, "step": 9742 }, { "epoch": 4.738723404255319, "grad_norm": 0.07694209300471293, "learning_rate": 3.5770814622661775e-06, "loss": 0.5811, "step": 9743 }, { "epoch": 4.739209726443769, "grad_norm": 0.07164510482494477, "learning_rate": 3.5761648546308163e-06, "loss": 0.4723, "step": 9744 }, { "epoch": 4.739696048632219, "grad_norm": 0.07344454350082696, "learning_rate": 3.5752482990657557e-06, "loss": 0.4851, "step": 9745 }, { "epoch": 4.740182370820669, "grad_norm": 0.0706854632627505, "learning_rate": 3.5743317956045093e-06, "loss": 0.472, "step": 9746 }, { "epoch": 4.740668693009119, "grad_norm": 0.0753541275425176, "learning_rate": 3.5734153442805993e-06, "loss": 0.5155, "step": 9747 }, { "epoch": 4.7411550151975685, "grad_norm": 0.07368347283064641, "learning_rate": 3.572498945127536e-06, "loss": 0.5103, "step": 9748 }, { "epoch": 4.741641337386018, "grad_norm": 0.07400782047150341, "learning_rate": 3.5715825981788353e-06, "loss": 0.5178, "step": 9749 }, { "epoch": 4.742127659574468, "grad_norm": 0.07580568293924207, "learning_rate": 3.570666303468008e-06, "loss": 0.5416, "step": 9750 }, { "epoch": 4.742613981762918, "grad_norm": 0.07589635897475323, "learning_rate": 3.569750061028565e-06, "loss": 0.5311, "step": 9751 }, { "epoch": 4.743100303951368, "grad_norm": 0.07146293716591269, "learning_rate": 3.56883387089401e-06, "loss": 0.4885, "step": 9752 }, { "epoch": 4.743586626139818, "grad_norm": 0.0754189832744774, "learning_rate": 3.567917733097851e-06, "loss": 0.5175, "step": 9753 }, { "epoch": 4.7440729483282675, "grad_norm": 0.07504084512620407, "learning_rate": 3.5670016476735916e-06, "loss": 0.5452, "step": 9754 }, { "epoch": 4.744559270516717, "grad_norm": 0.07493569225025028, "learning_rate": 3.5660856146547316e-06, "loss": 0.5081, "step": 9755 }, { "epoch": 4.745045592705167, "grad_norm": 0.07679996303387869, "learning_rate": 3.5651696340747747e-06, "loss": 0.4957, "step": 9756 }, { "epoch": 4.745531914893617, "grad_norm": 0.07730414505923569, "learning_rate": 3.5642537059672142e-06, "loss": 0.572, "step": 9757 }, { "epoch": 4.746018237082067, "grad_norm": 0.07264580644260071, "learning_rate": 3.5633378303655486e-06, "loss": 0.4766, "step": 9758 }, { "epoch": 4.746504559270517, "grad_norm": 0.07300710064794234, "learning_rate": 3.5624220073032707e-06, "loss": 0.4882, "step": 9759 }, { "epoch": 4.7469908814589665, "grad_norm": 0.0776537167036983, "learning_rate": 3.561506236813875e-06, "loss": 0.5465, "step": 9760 }, { "epoch": 4.747477203647416, "grad_norm": 0.07252783462957638, "learning_rate": 3.5605905189308477e-06, "loss": 0.504, "step": 9761 }, { "epoch": 4.747963525835866, "grad_norm": 0.07135799151487464, "learning_rate": 3.559674853687681e-06, "loss": 0.5117, "step": 9762 }, { "epoch": 4.7484498480243165, "grad_norm": 0.07352622976456395, "learning_rate": 3.5587592411178574e-06, "loss": 0.4992, "step": 9763 }, { "epoch": 4.748936170212766, "grad_norm": 0.0744112848498426, "learning_rate": 3.5578436812548637e-06, "loss": 0.5093, "step": 9764 }, { "epoch": 4.749422492401216, "grad_norm": 0.07623190672905851, "learning_rate": 3.5569281741321813e-06, "loss": 0.5214, "step": 9765 }, { "epoch": 4.7499088145896655, "grad_norm": 0.0789306543042129, "learning_rate": 3.556012719783293e-06, "loss": 0.5126, "step": 9766 }, { "epoch": 4.7499088145896655, "eval_loss": 0.567747950553894, "eval_runtime": 105.2197, "eval_samples_per_second": 288.472, "eval_steps_per_second": 36.067, "step": 9766 }, { "epoch": 4.750395136778115, "grad_norm": 0.07255644356136397, "learning_rate": 3.5550973182416736e-06, "loss": 0.5055, "step": 9767 }, { "epoch": 4.750881458966566, "grad_norm": 0.07793849841204106, "learning_rate": 3.554181969540803e-06, "loss": 0.525, "step": 9768 }, { "epoch": 4.7513677811550155, "grad_norm": 0.07258094189185788, "learning_rate": 3.553266673714153e-06, "loss": 0.5059, "step": 9769 }, { "epoch": 4.751854103343465, "grad_norm": 0.0746562524015995, "learning_rate": 3.5523514307952e-06, "loss": 0.5398, "step": 9770 }, { "epoch": 4.752340425531915, "grad_norm": 0.07320671991583819, "learning_rate": 3.551436240817412e-06, "loss": 0.4994, "step": 9771 }, { "epoch": 4.7528267477203645, "grad_norm": 0.0736358204626534, "learning_rate": 3.5505211038142597e-06, "loss": 0.4974, "step": 9772 }, { "epoch": 4.753313069908815, "grad_norm": 0.07470222006856667, "learning_rate": 3.5496060198192073e-06, "loss": 0.5204, "step": 9773 }, { "epoch": 4.753799392097265, "grad_norm": 0.07504460776172159, "learning_rate": 3.5486909888657227e-06, "loss": 0.5068, "step": 9774 }, { "epoch": 4.7542857142857144, "grad_norm": 0.07018892982883108, "learning_rate": 3.547776010987268e-06, "loss": 0.4837, "step": 9775 }, { "epoch": 4.754772036474164, "grad_norm": 0.07253018225378546, "learning_rate": 3.5468610862173054e-06, "loss": 0.543, "step": 9776 }, { "epoch": 4.755258358662614, "grad_norm": 0.07301560198511932, "learning_rate": 3.545946214589291e-06, "loss": 0.5117, "step": 9777 }, { "epoch": 4.7557446808510635, "grad_norm": 0.07409450356552133, "learning_rate": 3.5450313961366843e-06, "loss": 0.5161, "step": 9778 }, { "epoch": 4.756231003039513, "grad_norm": 0.07640159500494696, "learning_rate": 3.544116630892942e-06, "loss": 0.5302, "step": 9779 }, { "epoch": 4.756717325227964, "grad_norm": 0.07519060246872211, "learning_rate": 3.5432019188915147e-06, "loss": 0.4826, "step": 9780 }, { "epoch": 4.757203647416413, "grad_norm": 0.07609169050357863, "learning_rate": 3.5422872601658566e-06, "loss": 0.523, "step": 9781 }, { "epoch": 4.757689969604863, "grad_norm": 0.07171616715320253, "learning_rate": 3.541372654749414e-06, "loss": 0.4725, "step": 9782 }, { "epoch": 4.758176291793313, "grad_norm": 0.07603686940243717, "learning_rate": 3.5404581026756368e-06, "loss": 0.5247, "step": 9783 }, { "epoch": 4.7586626139817625, "grad_norm": 0.0715948939181546, "learning_rate": 3.539543603977969e-06, "loss": 0.4676, "step": 9784 }, { "epoch": 4.759148936170213, "grad_norm": 0.07195841178651871, "learning_rate": 3.5386291586898575e-06, "loss": 0.4959, "step": 9785 }, { "epoch": 4.759635258358663, "grad_norm": 0.07360118603985216, "learning_rate": 3.537714766844739e-06, "loss": 0.5236, "step": 9786 }, { "epoch": 4.760121580547112, "grad_norm": 0.0717758448362556, "learning_rate": 3.5368004284760584e-06, "loss": 0.5091, "step": 9787 }, { "epoch": 4.760607902735562, "grad_norm": 0.07450682832691077, "learning_rate": 3.5358861436172487e-06, "loss": 0.5256, "step": 9788 }, { "epoch": 4.761094224924012, "grad_norm": 0.07080516295258174, "learning_rate": 3.534971912301749e-06, "loss": 0.4751, "step": 9789 }, { "epoch": 4.761580547112462, "grad_norm": 0.07377008894900824, "learning_rate": 3.534057734562991e-06, "loss": 0.5109, "step": 9790 }, { "epoch": 4.762066869300912, "grad_norm": 0.07401104027256379, "learning_rate": 3.53314361043441e-06, "loss": 0.5056, "step": 9791 }, { "epoch": 4.762553191489362, "grad_norm": 0.07432988650059785, "learning_rate": 3.5322295399494307e-06, "loss": 0.4925, "step": 9792 }, { "epoch": 4.763039513677811, "grad_norm": 0.07239458482336424, "learning_rate": 3.5313155231414855e-06, "loss": 0.489, "step": 9793 }, { "epoch": 4.763525835866261, "grad_norm": 0.071325125694811, "learning_rate": 3.5304015600439977e-06, "loss": 0.5018, "step": 9794 }, { "epoch": 4.764012158054712, "grad_norm": 0.07683321340533981, "learning_rate": 3.5294876506903947e-06, "loss": 0.5081, "step": 9795 }, { "epoch": 4.764498480243161, "grad_norm": 0.07464038713559308, "learning_rate": 3.528573795114094e-06, "loss": 0.5063, "step": 9796 }, { "epoch": 4.764984802431611, "grad_norm": 0.07628768713775802, "learning_rate": 3.52765999334852e-06, "loss": 0.533, "step": 9797 }, { "epoch": 4.765471124620061, "grad_norm": 0.07375930236235786, "learning_rate": 3.526746245427087e-06, "loss": 0.5094, "step": 9798 }, { "epoch": 4.76595744680851, "grad_norm": 0.07586561658147456, "learning_rate": 3.5258325513832157e-06, "loss": 0.5501, "step": 9799 }, { "epoch": 4.766443768996961, "grad_norm": 0.07347675720691822, "learning_rate": 3.5249189112503156e-06, "loss": 0.5325, "step": 9800 }, { "epoch": 4.766930091185411, "grad_norm": 0.07470445954407519, "learning_rate": 3.5240053250618035e-06, "loss": 0.4977, "step": 9801 }, { "epoch": 4.76741641337386, "grad_norm": 0.07443680061146025, "learning_rate": 3.5230917928510844e-06, "loss": 0.4815, "step": 9802 }, { "epoch": 4.76790273556231, "grad_norm": 0.07463936270291485, "learning_rate": 3.522178314651571e-06, "loss": 0.4965, "step": 9803 }, { "epoch": 4.76838905775076, "grad_norm": 0.07388399009859531, "learning_rate": 3.5212648904966675e-06, "loss": 0.5146, "step": 9804 }, { "epoch": 4.768875379939209, "grad_norm": 0.07351295468656954, "learning_rate": 3.5203515204197774e-06, "loss": 0.4994, "step": 9805 }, { "epoch": 4.769361702127659, "grad_norm": 0.07552166616510825, "learning_rate": 3.519438204454307e-06, "loss": 0.4943, "step": 9806 }, { "epoch": 4.76984802431611, "grad_norm": 0.07269306158659065, "learning_rate": 3.5185249426336526e-06, "loss": 0.4896, "step": 9807 }, { "epoch": 4.770334346504559, "grad_norm": 0.07190776536994783, "learning_rate": 3.5176117349912153e-06, "loss": 0.4757, "step": 9808 }, { "epoch": 4.770820668693009, "grad_norm": 0.07425955366321417, "learning_rate": 3.516698581560388e-06, "loss": 0.5768, "step": 9809 }, { "epoch": 4.771306990881459, "grad_norm": 0.07757335723694997, "learning_rate": 3.5157854823745706e-06, "loss": 0.5572, "step": 9810 }, { "epoch": 4.771793313069908, "grad_norm": 0.07365121798165827, "learning_rate": 3.5148724374671504e-06, "loss": 0.514, "step": 9811 }, { "epoch": 4.772279635258359, "grad_norm": 0.07392207043245279, "learning_rate": 3.513959446871521e-06, "loss": 0.5189, "step": 9812 }, { "epoch": 4.772765957446809, "grad_norm": 0.07275929456702032, "learning_rate": 3.5130465106210683e-06, "loss": 0.4925, "step": 9813 }, { "epoch": 4.773252279635258, "grad_norm": 0.07493324808021674, "learning_rate": 3.5121336287491827e-06, "loss": 0.5239, "step": 9814 }, { "epoch": 4.773738601823708, "grad_norm": 0.07188830018941737, "learning_rate": 3.5112208012892434e-06, "loss": 0.5226, "step": 9815 }, { "epoch": 4.774224924012158, "grad_norm": 0.07188461585063607, "learning_rate": 3.510308028274638e-06, "loss": 0.5191, "step": 9816 }, { "epoch": 4.774711246200608, "grad_norm": 0.07750324293504426, "learning_rate": 3.5093953097387432e-06, "loss": 0.5421, "step": 9817 }, { "epoch": 4.775197568389058, "grad_norm": 0.07247154017826295, "learning_rate": 3.5084826457149403e-06, "loss": 0.4997, "step": 9818 }, { "epoch": 4.775683890577508, "grad_norm": 0.07593107666964263, "learning_rate": 3.5075700362366037e-06, "loss": 0.5221, "step": 9819 }, { "epoch": 4.776170212765957, "grad_norm": 0.0794232921091397, "learning_rate": 3.5066574813371107e-06, "loss": 0.5347, "step": 9820 }, { "epoch": 4.776656534954407, "grad_norm": 0.07369308489835924, "learning_rate": 3.5057449810498303e-06, "loss": 0.5143, "step": 9821 }, { "epoch": 4.777142857142858, "grad_norm": 0.07511724406723964, "learning_rate": 3.5048325354081355e-06, "loss": 0.5183, "step": 9822 }, { "epoch": 4.777629179331307, "grad_norm": 0.082368562252736, "learning_rate": 3.503920144445393e-06, "loss": 0.5434, "step": 9823 }, { "epoch": 4.778115501519757, "grad_norm": 0.0744682451149145, "learning_rate": 3.5030078081949727e-06, "loss": 0.5018, "step": 9824 }, { "epoch": 4.778601823708207, "grad_norm": 0.07579777787978553, "learning_rate": 3.5020955266902344e-06, "loss": 0.5132, "step": 9825 }, { "epoch": 4.779088145896656, "grad_norm": 0.07226973858210134, "learning_rate": 3.5011832999645466e-06, "loss": 0.5196, "step": 9826 }, { "epoch": 4.779574468085106, "grad_norm": 0.07178286219415975, "learning_rate": 3.5002711280512638e-06, "loss": 0.5039, "step": 9827 }, { "epoch": 4.780060790273557, "grad_norm": 0.07331009740163545, "learning_rate": 3.499359010983748e-06, "loss": 0.5182, "step": 9828 }, { "epoch": 4.780547112462006, "grad_norm": 0.07811334721069643, "learning_rate": 3.4984469487953537e-06, "loss": 0.5336, "step": 9829 }, { "epoch": 4.781033434650456, "grad_norm": 0.0746750737897446, "learning_rate": 3.497534941519437e-06, "loss": 0.5206, "step": 9830 }, { "epoch": 4.781519756838906, "grad_norm": 0.07919322016925144, "learning_rate": 3.496622989189352e-06, "loss": 0.5021, "step": 9831 }, { "epoch": 4.782006079027355, "grad_norm": 0.07441818594727875, "learning_rate": 3.4957110918384457e-06, "loss": 0.5299, "step": 9832 }, { "epoch": 4.782492401215805, "grad_norm": 0.07490565547441784, "learning_rate": 3.4947992495000693e-06, "loss": 0.4936, "step": 9833 }, { "epoch": 4.782978723404256, "grad_norm": 0.07493913993770857, "learning_rate": 3.4938874622075664e-06, "loss": 0.5459, "step": 9834 }, { "epoch": 4.783465045592705, "grad_norm": 0.07405899335629826, "learning_rate": 3.4929757299942856e-06, "loss": 0.502, "step": 9835 }, { "epoch": 4.783951367781155, "grad_norm": 0.0749874857172059, "learning_rate": 3.492064052893565e-06, "loss": 0.5163, "step": 9836 }, { "epoch": 4.784437689969605, "grad_norm": 0.06983182178092108, "learning_rate": 3.4911524309387486e-06, "loss": 0.4824, "step": 9837 }, { "epoch": 4.784924012158054, "grad_norm": 0.072479363122535, "learning_rate": 3.4902408641631712e-06, "loss": 0.541, "step": 9838 }, { "epoch": 4.785410334346505, "grad_norm": 0.07415528128373292, "learning_rate": 3.489329352600175e-06, "loss": 0.515, "step": 9839 }, { "epoch": 4.7858966565349546, "grad_norm": 0.07654566834136997, "learning_rate": 3.4884178962830873e-06, "loss": 0.5433, "step": 9840 }, { "epoch": 4.786382978723404, "grad_norm": 0.0722118979317469, "learning_rate": 3.4875064952452465e-06, "loss": 0.4841, "step": 9841 }, { "epoch": 4.786869300911854, "grad_norm": 0.07785931538963002, "learning_rate": 3.4865951495199777e-06, "loss": 0.5142, "step": 9842 }, { "epoch": 4.787355623100304, "grad_norm": 0.07249086345814795, "learning_rate": 3.4856838591406133e-06, "loss": 0.5045, "step": 9843 }, { "epoch": 4.787841945288754, "grad_norm": 0.07386509165721343, "learning_rate": 3.4847726241404773e-06, "loss": 0.5179, "step": 9844 }, { "epoch": 4.788328267477204, "grad_norm": 0.07409307035709214, "learning_rate": 3.4838614445528966e-06, "loss": 0.5143, "step": 9845 }, { "epoch": 4.7888145896656535, "grad_norm": 0.07588906375978266, "learning_rate": 3.4829503204111897e-06, "loss": 0.5586, "step": 9846 }, { "epoch": 4.789300911854103, "grad_norm": 0.07410715122236765, "learning_rate": 3.48203925174868e-06, "loss": 0.5044, "step": 9847 }, { "epoch": 4.789787234042553, "grad_norm": 0.07308231934404547, "learning_rate": 3.4811282385986835e-06, "loss": 0.489, "step": 9848 }, { "epoch": 4.7902735562310035, "grad_norm": 0.07379629850994263, "learning_rate": 3.480217280994519e-06, "loss": 0.5013, "step": 9849 }, { "epoch": 4.790759878419453, "grad_norm": 0.07192926801370358, "learning_rate": 3.479306378969497e-06, "loss": 0.5042, "step": 9850 }, { "epoch": 4.791246200607903, "grad_norm": 0.07385691051176226, "learning_rate": 3.478395532556933e-06, "loss": 0.5081, "step": 9851 }, { "epoch": 4.7917325227963525, "grad_norm": 0.071388404141872, "learning_rate": 3.4774847417901345e-06, "loss": 0.4972, "step": 9852 }, { "epoch": 4.792218844984802, "grad_norm": 0.07501240873261915, "learning_rate": 3.4765740067024133e-06, "loss": 0.5154, "step": 9853 }, { "epoch": 4.792705167173252, "grad_norm": 0.07316167562038231, "learning_rate": 3.47566332732707e-06, "loss": 0.5131, "step": 9854 }, { "epoch": 4.7931914893617025, "grad_norm": 0.07549758956830742, "learning_rate": 3.4747527036974137e-06, "loss": 0.5321, "step": 9855 }, { "epoch": 4.793677811550152, "grad_norm": 0.07558350175019174, "learning_rate": 3.4738421358467417e-06, "loss": 0.5369, "step": 9856 }, { "epoch": 4.794164133738602, "grad_norm": 0.07238987935246406, "learning_rate": 3.4729316238083564e-06, "loss": 0.5042, "step": 9857 }, { "epoch": 4.7946504559270515, "grad_norm": 0.07337581550885319, "learning_rate": 3.4720211676155564e-06, "loss": 0.4975, "step": 9858 }, { "epoch": 4.795136778115501, "grad_norm": 0.0758419292319453, "learning_rate": 3.4711107673016355e-06, "loss": 0.509, "step": 9859 }, { "epoch": 4.795623100303951, "grad_norm": 0.07232613874457919, "learning_rate": 3.47020042289989e-06, "loss": 0.494, "step": 9860 }, { "epoch": 4.7961094224924015, "grad_norm": 0.07219658888394759, "learning_rate": 3.4692901344436085e-06, "loss": 0.4864, "step": 9861 }, { "epoch": 4.796595744680851, "grad_norm": 0.07382802307391192, "learning_rate": 3.4683799019660834e-06, "loss": 0.5029, "step": 9862 }, { "epoch": 4.797082066869301, "grad_norm": 0.07516563350058748, "learning_rate": 3.4674697255005995e-06, "loss": 0.53, "step": 9863 }, { "epoch": 4.7975683890577505, "grad_norm": 0.0751437166769319, "learning_rate": 3.466559605080447e-06, "loss": 0.4872, "step": 9864 }, { "epoch": 4.7980547112462, "grad_norm": 0.07608862040643435, "learning_rate": 3.4656495407389033e-06, "loss": 0.5129, "step": 9865 }, { "epoch": 4.798541033434651, "grad_norm": 0.07379749351457024, "learning_rate": 3.464739532509256e-06, "loss": 0.5001, "step": 9866 }, { "epoch": 4.7990273556231005, "grad_norm": 0.07078433037537617, "learning_rate": 3.463829580424779e-06, "loss": 0.5016, "step": 9867 }, { "epoch": 4.79951367781155, "grad_norm": 0.0727552629446106, "learning_rate": 3.462919684518753e-06, "loss": 0.5163, "step": 9868 }, { "epoch": 4.8, "grad_norm": 0.07409056608308609, "learning_rate": 3.462009844824451e-06, "loss": 0.4951, "step": 9869 }, { "epoch": 4.8004863221884495, "grad_norm": 0.07443712380351969, "learning_rate": 3.461100061375151e-06, "loss": 0.5073, "step": 9870 }, { "epoch": 4.8009726443769, "grad_norm": 0.07532965553008598, "learning_rate": 3.460190334204118e-06, "loss": 0.5075, "step": 9871 }, { "epoch": 4.80145896656535, "grad_norm": 0.07509388942729486, "learning_rate": 3.459280663344625e-06, "loss": 0.5399, "step": 9872 }, { "epoch": 4.8019452887537994, "grad_norm": 0.07380387466634364, "learning_rate": 3.4583710488299375e-06, "loss": 0.5014, "step": 9873 }, { "epoch": 4.802431610942249, "grad_norm": 0.07467597097277383, "learning_rate": 3.4574614906933234e-06, "loss": 0.5446, "step": 9874 }, { "epoch": 4.802917933130699, "grad_norm": 0.07315069463562376, "learning_rate": 3.456551988968041e-06, "loss": 0.4979, "step": 9875 }, { "epoch": 4.803404255319149, "grad_norm": 0.07279755130071162, "learning_rate": 3.455642543687355e-06, "loss": 0.5275, "step": 9876 }, { "epoch": 4.803890577507599, "grad_norm": 0.07554319797421721, "learning_rate": 3.454733154884521e-06, "loss": 0.5181, "step": 9877 }, { "epoch": 4.804376899696049, "grad_norm": 0.07299003141026666, "learning_rate": 3.4538238225928e-06, "loss": 0.4791, "step": 9878 }, { "epoch": 4.804863221884498, "grad_norm": 0.07632411918381653, "learning_rate": 3.4529145468454427e-06, "loss": 0.512, "step": 9879 }, { "epoch": 4.805349544072948, "grad_norm": 0.07592163029083701, "learning_rate": 3.452005327675705e-06, "loss": 0.5312, "step": 9880 }, { "epoch": 4.805835866261398, "grad_norm": 0.07297920857642748, "learning_rate": 3.4510961651168328e-06, "loss": 0.4882, "step": 9881 }, { "epoch": 4.806322188449848, "grad_norm": 0.07433125933882768, "learning_rate": 3.4501870592020802e-06, "loss": 0.486, "step": 9882 }, { "epoch": 4.806808510638298, "grad_norm": 0.07266363866821993, "learning_rate": 3.4492780099646887e-06, "loss": 0.4901, "step": 9883 }, { "epoch": 4.807294832826748, "grad_norm": 0.07452999573080502, "learning_rate": 3.4483690174379055e-06, "loss": 0.5051, "step": 9884 }, { "epoch": 4.807781155015197, "grad_norm": 0.07581845063732459, "learning_rate": 3.447460081654974e-06, "loss": 0.5129, "step": 9885 }, { "epoch": 4.808267477203647, "grad_norm": 0.07281744585970004, "learning_rate": 3.446551202649131e-06, "loss": 0.506, "step": 9886 }, { "epoch": 4.808753799392097, "grad_norm": 0.07616365660500832, "learning_rate": 3.445642380453617e-06, "loss": 0.5056, "step": 9887 }, { "epoch": 4.809240121580547, "grad_norm": 0.07397139837369174, "learning_rate": 3.4447336151016663e-06, "loss": 0.4928, "step": 9888 }, { "epoch": 4.809726443768997, "grad_norm": 0.07742794319823819, "learning_rate": 3.4438249066265163e-06, "loss": 0.5408, "step": 9889 }, { "epoch": 4.810212765957447, "grad_norm": 0.07654845924557889, "learning_rate": 3.4429162550613937e-06, "loss": 0.4991, "step": 9890 }, { "epoch": 4.810699088145896, "grad_norm": 0.07482347257278676, "learning_rate": 3.4420076604395327e-06, "loss": 0.5311, "step": 9891 }, { "epoch": 4.811185410334346, "grad_norm": 0.07469645729465225, "learning_rate": 3.441099122794158e-06, "loss": 0.5222, "step": 9892 }, { "epoch": 4.811671732522797, "grad_norm": 0.0784919170775449, "learning_rate": 3.4401906421584996e-06, "loss": 0.5307, "step": 9893 }, { "epoch": 4.812158054711246, "grad_norm": 0.0754819640765405, "learning_rate": 3.4392822185657747e-06, "loss": 0.5187, "step": 9894 }, { "epoch": 4.812644376899696, "grad_norm": 0.07923219645377234, "learning_rate": 3.438373852049211e-06, "loss": 0.5752, "step": 9895 }, { "epoch": 4.813130699088146, "grad_norm": 0.0750010896940689, "learning_rate": 3.437465542642023e-06, "loss": 0.5538, "step": 9896 }, { "epoch": 4.813617021276595, "grad_norm": 0.07536977279665356, "learning_rate": 3.4365572903774304e-06, "loss": 0.5484, "step": 9897 }, { "epoch": 4.814103343465046, "grad_norm": 0.07369347055207746, "learning_rate": 3.4356490952886477e-06, "loss": 0.4911, "step": 9898 }, { "epoch": 4.814589665653496, "grad_norm": 0.07269023541954552, "learning_rate": 3.4347409574088896e-06, "loss": 0.4764, "step": 9899 }, { "epoch": 4.815075987841945, "grad_norm": 0.0740189798211945, "learning_rate": 3.433832876771365e-06, "loss": 0.5103, "step": 9900 }, { "epoch": 4.815562310030395, "grad_norm": 0.07318115419514643, "learning_rate": 3.432924853409283e-06, "loss": 0.5227, "step": 9901 }, { "epoch": 4.816048632218845, "grad_norm": 0.07064213584603708, "learning_rate": 3.432016887355851e-06, "loss": 0.4964, "step": 9902 }, { "epoch": 4.816534954407295, "grad_norm": 0.0709788647563823, "learning_rate": 3.431108978644276e-06, "loss": 0.4695, "step": 9903 }, { "epoch": 4.817021276595745, "grad_norm": 0.07366166983146442, "learning_rate": 3.430201127307756e-06, "loss": 0.503, "step": 9904 }, { "epoch": 4.817507598784195, "grad_norm": 0.07349084825580836, "learning_rate": 3.4292933333794955e-06, "loss": 0.5165, "step": 9905 }, { "epoch": 4.817993920972644, "grad_norm": 0.07310470058748092, "learning_rate": 3.428385596892689e-06, "loss": 0.5125, "step": 9906 }, { "epoch": 4.818480243161094, "grad_norm": 0.07388463356470455, "learning_rate": 3.427477917880539e-06, "loss": 0.5001, "step": 9907 }, { "epoch": 4.818966565349544, "grad_norm": 0.0727782842341365, "learning_rate": 3.426570296376233e-06, "loss": 0.5067, "step": 9908 }, { "epoch": 4.819452887537994, "grad_norm": 0.07376985977931319, "learning_rate": 3.4256627324129667e-06, "loss": 0.5136, "step": 9909 }, { "epoch": 4.819939209726444, "grad_norm": 0.07728120971890819, "learning_rate": 3.424755226023931e-06, "loss": 0.5267, "step": 9910 }, { "epoch": 4.820425531914894, "grad_norm": 0.07264030809979652, "learning_rate": 3.423847777242311e-06, "loss": 0.522, "step": 9911 }, { "epoch": 4.820911854103343, "grad_norm": 0.07501985726659798, "learning_rate": 3.4229403861012938e-06, "loss": 0.5038, "step": 9912 }, { "epoch": 4.821398176291793, "grad_norm": 0.07420533693071835, "learning_rate": 3.4220330526340627e-06, "loss": 0.4887, "step": 9913 }, { "epoch": 4.821884498480243, "grad_norm": 0.07390935663500048, "learning_rate": 3.4211257768738014e-06, "loss": 0.5466, "step": 9914 }, { "epoch": 4.822370820668693, "grad_norm": 0.07501816611662929, "learning_rate": 3.420218558853687e-06, "loss": 0.5299, "step": 9915 }, { "epoch": 4.822857142857143, "grad_norm": 0.07825381389850279, "learning_rate": 3.4193113986068975e-06, "loss": 0.5324, "step": 9916 }, { "epoch": 4.823343465045593, "grad_norm": 0.07472965993156623, "learning_rate": 3.4184042961666077e-06, "loss": 0.5493, "step": 9917 }, { "epoch": 4.823829787234042, "grad_norm": 0.07353954208778861, "learning_rate": 3.417497251565993e-06, "loss": 0.5147, "step": 9918 }, { "epoch": 4.824316109422492, "grad_norm": 0.07225236763041619, "learning_rate": 3.416590264838221e-06, "loss": 0.5115, "step": 9919 }, { "epoch": 4.824802431610943, "grad_norm": 0.07445816652460352, "learning_rate": 3.415683336016465e-06, "loss": 0.4974, "step": 9920 }, { "epoch": 4.825288753799392, "grad_norm": 0.07442185604207374, "learning_rate": 3.4147764651338867e-06, "loss": 0.5102, "step": 9921 }, { "epoch": 4.825775075987842, "grad_norm": 0.07508518707252185, "learning_rate": 3.4138696522236536e-06, "loss": 0.4982, "step": 9922 }, { "epoch": 4.826261398176292, "grad_norm": 0.07477285477857919, "learning_rate": 3.4129628973189276e-06, "loss": 0.5183, "step": 9923 }, { "epoch": 4.826747720364741, "grad_norm": 0.07287682275463758, "learning_rate": 3.412056200452871e-06, "loss": 0.5122, "step": 9924 }, { "epoch": 4.827234042553192, "grad_norm": 0.07563406363863467, "learning_rate": 3.41114956165864e-06, "loss": 0.504, "step": 9925 }, { "epoch": 4.827720364741642, "grad_norm": 0.07401860667094659, "learning_rate": 3.410242980969391e-06, "loss": 0.5006, "step": 9926 }, { "epoch": 4.828206686930091, "grad_norm": 0.07365967112844747, "learning_rate": 3.4093364584182776e-06, "loss": 0.5106, "step": 9927 }, { "epoch": 4.828693009118541, "grad_norm": 0.0737457879408845, "learning_rate": 3.4084299940384545e-06, "loss": 0.4901, "step": 9928 }, { "epoch": 4.829179331306991, "grad_norm": 0.08344830942181425, "learning_rate": 3.4075235878630687e-06, "loss": 0.5213, "step": 9929 }, { "epoch": 4.829665653495441, "grad_norm": 0.07286199829243596, "learning_rate": 3.4066172399252684e-06, "loss": 0.502, "step": 9930 }, { "epoch": 4.830151975683891, "grad_norm": 0.07326707000163565, "learning_rate": 3.4057109502581993e-06, "loss": 0.499, "step": 9931 }, { "epoch": 4.830638297872341, "grad_norm": 0.07373937864381767, "learning_rate": 3.404804718895007e-06, "loss": 0.5106, "step": 9932 }, { "epoch": 4.83112462006079, "grad_norm": 0.07384152959034693, "learning_rate": 3.403898545868829e-06, "loss": 0.5255, "step": 9933 }, { "epoch": 4.83161094224924, "grad_norm": 0.0764035663334682, "learning_rate": 3.402992431212808e-06, "loss": 0.557, "step": 9934 }, { "epoch": 4.83209726443769, "grad_norm": 0.07265171373181742, "learning_rate": 3.4020863749600775e-06, "loss": 0.4987, "step": 9935 }, { "epoch": 4.83258358662614, "grad_norm": 0.07304273683395976, "learning_rate": 3.401180377143774e-06, "loss": 0.5208, "step": 9936 }, { "epoch": 4.83306990881459, "grad_norm": 0.07492914260466464, "learning_rate": 3.4002744377970315e-06, "loss": 0.5012, "step": 9937 }, { "epoch": 4.8335562310030395, "grad_norm": 0.07084420139606204, "learning_rate": 3.399368556952979e-06, "loss": 0.4829, "step": 9938 }, { "epoch": 4.834042553191489, "grad_norm": 0.07476823603231, "learning_rate": 3.3984627346447474e-06, "loss": 0.5349, "step": 9939 }, { "epoch": 4.834528875379939, "grad_norm": 0.07410193222690933, "learning_rate": 3.397556970905459e-06, "loss": 0.491, "step": 9940 }, { "epoch": 4.835015197568389, "grad_norm": 0.0718510477340495, "learning_rate": 3.3966512657682417e-06, "loss": 0.4798, "step": 9941 }, { "epoch": 4.835501519756839, "grad_norm": 0.07290836216206803, "learning_rate": 3.3957456192662143e-06, "loss": 0.4878, "step": 9942 }, { "epoch": 4.835987841945289, "grad_norm": 0.07024778130214428, "learning_rate": 3.3948400314325007e-06, "loss": 0.4837, "step": 9943 }, { "epoch": 4.8364741641337385, "grad_norm": 0.0755056860751704, "learning_rate": 3.3939345023002146e-06, "loss": 0.5187, "step": 9944 }, { "epoch": 4.836960486322188, "grad_norm": 0.07450840446820256, "learning_rate": 3.3930290319024746e-06, "loss": 0.5228, "step": 9945 }, { "epoch": 4.837446808510638, "grad_norm": 0.0741644843374763, "learning_rate": 3.3921236202723916e-06, "loss": 0.5003, "step": 9946 }, { "epoch": 4.8379331306990885, "grad_norm": 0.07334184543332505, "learning_rate": 3.3912182674430805e-06, "loss": 0.4974, "step": 9947 }, { "epoch": 4.838419452887538, "grad_norm": 0.07505031434502361, "learning_rate": 3.390312973447646e-06, "loss": 0.5294, "step": 9948 }, { "epoch": 4.838905775075988, "grad_norm": 0.07455947563647232, "learning_rate": 3.3894077383192e-06, "loss": 0.5421, "step": 9949 }, { "epoch": 4.8393920972644375, "grad_norm": 0.07469136038362498, "learning_rate": 3.388502562090842e-06, "loss": 0.5113, "step": 9950 }, { "epoch": 4.839878419452887, "grad_norm": 0.07552243990510667, "learning_rate": 3.3875974447956795e-06, "loss": 0.5047, "step": 9951 }, { "epoch": 4.840364741641338, "grad_norm": 0.07188136474218193, "learning_rate": 3.386692386466809e-06, "loss": 0.5163, "step": 9952 }, { "epoch": 4.8408510638297875, "grad_norm": 0.0732656384061756, "learning_rate": 3.385787387137333e-06, "loss": 0.5086, "step": 9953 }, { "epoch": 4.841337386018237, "grad_norm": 0.0763368841400177, "learning_rate": 3.384882446840344e-06, "loss": 0.5165, "step": 9954 }, { "epoch": 4.841823708206687, "grad_norm": 0.07368715187595497, "learning_rate": 3.383977565608938e-06, "loss": 0.4859, "step": 9955 }, { "epoch": 4.8423100303951365, "grad_norm": 0.07317892548658124, "learning_rate": 3.3830727434762068e-06, "loss": 0.5391, "step": 9956 }, { "epoch": 4.842796352583587, "grad_norm": 0.0758572337108233, "learning_rate": 3.3821679804752413e-06, "loss": 0.5184, "step": 9957 }, { "epoch": 4.843282674772037, "grad_norm": 0.0761427260422004, "learning_rate": 3.3812632766391252e-06, "loss": 0.537, "step": 9958 }, { "epoch": 4.8437689969604865, "grad_norm": 0.07661617984394792, "learning_rate": 3.3803586320009497e-06, "loss": 0.5087, "step": 9959 }, { "epoch": 4.844255319148936, "grad_norm": 0.07160121442996414, "learning_rate": 3.379454046593792e-06, "loss": 0.4751, "step": 9960 }, { "epoch": 4.844741641337386, "grad_norm": 0.07837639786820297, "learning_rate": 3.3785495204507363e-06, "loss": 0.5335, "step": 9961 }, { "epoch": 4.8452279635258355, "grad_norm": 0.07353751947740553, "learning_rate": 3.3776450536048623e-06, "loss": 0.5259, "step": 9962 }, { "epoch": 4.845714285714286, "grad_norm": 0.07222211739228658, "learning_rate": 3.3767406460892447e-06, "loss": 0.5067, "step": 9963 }, { "epoch": 4.846200607902736, "grad_norm": 0.07401648117741538, "learning_rate": 3.375836297936961e-06, "loss": 0.5188, "step": 9964 }, { "epoch": 4.8466869300911855, "grad_norm": 0.07521469517492321, "learning_rate": 3.37493200918108e-06, "loss": 0.5273, "step": 9965 }, { "epoch": 4.847173252279635, "grad_norm": 0.07680043984100841, "learning_rate": 3.374027779854675e-06, "loss": 0.514, "step": 9966 }, { "epoch": 4.847659574468085, "grad_norm": 0.0748688396911167, "learning_rate": 3.3731236099908116e-06, "loss": 0.5461, "step": 9967 }, { "epoch": 4.8481458966565345, "grad_norm": 0.07385640871075642, "learning_rate": 3.372219499622559e-06, "loss": 0.4922, "step": 9968 }, { "epoch": 4.848632218844985, "grad_norm": 0.0725224523564961, "learning_rate": 3.3713154487829764e-06, "loss": 0.5096, "step": 9969 }, { "epoch": 4.849118541033435, "grad_norm": 0.07344292832818512, "learning_rate": 3.370411457505129e-06, "loss": 0.4741, "step": 9970 }, { "epoch": 4.849604863221884, "grad_norm": 0.07667379348288852, "learning_rate": 3.3695075258220745e-06, "loss": 0.5276, "step": 9971 }, { "epoch": 4.850091185410334, "grad_norm": 0.07469491580733274, "learning_rate": 3.368603653766872e-06, "loss": 0.498, "step": 9972 }, { "epoch": 4.850577507598784, "grad_norm": 0.07277299863912308, "learning_rate": 3.3676998413725726e-06, "loss": 0.5039, "step": 9973 }, { "epoch": 4.851063829787234, "grad_norm": 0.07420847236987306, "learning_rate": 3.366796088672234e-06, "loss": 0.5271, "step": 9974 }, { "epoch": 4.851550151975684, "grad_norm": 0.07321948161815856, "learning_rate": 3.3658923956989033e-06, "loss": 0.522, "step": 9975 }, { "epoch": 4.852036474164134, "grad_norm": 0.07750270866383223, "learning_rate": 3.3649887624856303e-06, "loss": 0.5136, "step": 9976 }, { "epoch": 4.852522796352583, "grad_norm": 0.0749579091940929, "learning_rate": 3.3640851890654596e-06, "loss": 0.5047, "step": 9977 }, { "epoch": 4.853009118541033, "grad_norm": 0.07777987185989435, "learning_rate": 3.36318167547144e-06, "loss": 0.5424, "step": 9978 }, { "epoch": 4.853495440729484, "grad_norm": 0.07359019254798982, "learning_rate": 3.3622782217366066e-06, "loss": 0.5341, "step": 9979 }, { "epoch": 4.853981762917933, "grad_norm": 0.07767641189602528, "learning_rate": 3.361374827894005e-06, "loss": 0.5248, "step": 9980 }, { "epoch": 4.854468085106383, "grad_norm": 0.07388300598841771, "learning_rate": 3.3604714939766693e-06, "loss": 0.5125, "step": 9981 }, { "epoch": 4.854954407294833, "grad_norm": 0.0780196347436397, "learning_rate": 3.3595682200176372e-06, "loss": 0.5228, "step": 9982 }, { "epoch": 4.855440729483282, "grad_norm": 0.07189098717799296, "learning_rate": 3.3586650060499394e-06, "loss": 0.4865, "step": 9983 }, { "epoch": 4.855927051671733, "grad_norm": 0.07504904649218191, "learning_rate": 3.357761852106608e-06, "loss": 0.5382, "step": 9984 }, { "epoch": 4.856413373860183, "grad_norm": 0.0700289614060224, "learning_rate": 3.3568587582206712e-06, "loss": 0.472, "step": 9985 }, { "epoch": 4.856899696048632, "grad_norm": 0.07678016258738653, "learning_rate": 3.3559557244251585e-06, "loss": 0.5427, "step": 9986 }, { "epoch": 4.857386018237082, "grad_norm": 0.07948920413560626, "learning_rate": 3.35505275075309e-06, "loss": 0.5394, "step": 9987 }, { "epoch": 4.857872340425532, "grad_norm": 0.07251650638918544, "learning_rate": 3.354149837237489e-06, "loss": 0.4996, "step": 9988 }, { "epoch": 4.858358662613981, "grad_norm": 0.07288686904849229, "learning_rate": 3.353246983911379e-06, "loss": 0.492, "step": 9989 }, { "epoch": 4.858844984802432, "grad_norm": 0.07658468624542433, "learning_rate": 3.3523441908077726e-06, "loss": 0.5027, "step": 9990 }, { "epoch": 4.859331306990882, "grad_norm": 0.07666243253355116, "learning_rate": 3.351441457959689e-06, "loss": 0.5885, "step": 9991 }, { "epoch": 4.859817629179331, "grad_norm": 0.07227232657200966, "learning_rate": 3.3505387854001387e-06, "loss": 0.4979, "step": 9992 }, { "epoch": 4.860303951367781, "grad_norm": 0.07414202713011281, "learning_rate": 3.3496361731621364e-06, "loss": 0.5188, "step": 9993 }, { "epoch": 4.860790273556231, "grad_norm": 0.07366606635805148, "learning_rate": 3.3487336212786875e-06, "loss": 0.5278, "step": 9994 }, { "epoch": 4.86127659574468, "grad_norm": 0.07473170751222145, "learning_rate": 3.3478311297828013e-06, "loss": 0.5329, "step": 9995 }, { "epoch": 4.861762917933131, "grad_norm": 0.07477726441339, "learning_rate": 3.3469286987074803e-06, "loss": 0.4896, "step": 9996 }, { "epoch": 4.862249240121581, "grad_norm": 0.07304589698054452, "learning_rate": 3.3460263280857295e-06, "loss": 0.4978, "step": 9997 }, { "epoch": 4.86273556231003, "grad_norm": 0.0744397921859017, "learning_rate": 3.345124017950545e-06, "loss": 0.5214, "step": 9998 }, { "epoch": 4.86322188449848, "grad_norm": 0.07216225407263793, "learning_rate": 3.3442217683349286e-06, "loss": 0.4717, "step": 9999 }, { "epoch": 4.86370820668693, "grad_norm": 0.07497180266407774, "learning_rate": 3.3433195792718732e-06, "loss": 0.4915, "step": 10000 }, { "epoch": 4.86419452887538, "grad_norm": 0.07261845506524202, "learning_rate": 3.342417450794375e-06, "loss": 0.5388, "step": 10001 }, { "epoch": 4.86468085106383, "grad_norm": 0.07211161284215672, "learning_rate": 3.341515382935423e-06, "loss": 0.4969, "step": 10002 }, { "epoch": 4.86516717325228, "grad_norm": 0.07261994916627902, "learning_rate": 3.340613375728008e-06, "loss": 0.4737, "step": 10003 }, { "epoch": 4.865653495440729, "grad_norm": 0.07693557724595707, "learning_rate": 3.3397114292051135e-06, "loss": 0.484, "step": 10004 }, { "epoch": 4.866139817629179, "grad_norm": 0.07412737687312042, "learning_rate": 3.338809543399728e-06, "loss": 0.4844, "step": 10005 }, { "epoch": 4.86662613981763, "grad_norm": 0.07381212469044743, "learning_rate": 3.3379077183448306e-06, "loss": 0.5247, "step": 10006 }, { "epoch": 4.867112462006079, "grad_norm": 0.07408225321789191, "learning_rate": 3.3370059540734058e-06, "loss": 0.4792, "step": 10007 }, { "epoch": 4.867598784194529, "grad_norm": 0.07350698283682915, "learning_rate": 3.336104250618426e-06, "loss": 0.5144, "step": 10008 }, { "epoch": 4.868085106382979, "grad_norm": 0.0768106247785902, "learning_rate": 3.3352026080128715e-06, "loss": 0.5378, "step": 10009 }, { "epoch": 4.868571428571428, "grad_norm": 0.07483959806061541, "learning_rate": 3.3343010262897125e-06, "loss": 0.5152, "step": 10010 }, { "epoch": 4.869057750759879, "grad_norm": 0.0775465361829277, "learning_rate": 3.3333995054819236e-06, "loss": 0.5261, "step": 10011 }, { "epoch": 4.869544072948329, "grad_norm": 0.07593471594999275, "learning_rate": 3.33249804562247e-06, "loss": 0.5387, "step": 10012 }, { "epoch": 4.870030395136778, "grad_norm": 0.07732332122445831, "learning_rate": 3.331596646744321e-06, "loss": 0.5125, "step": 10013 }, { "epoch": 4.870516717325228, "grad_norm": 0.07524045023730902, "learning_rate": 3.3306953088804417e-06, "loss": 0.4794, "step": 10014 }, { "epoch": 4.871003039513678, "grad_norm": 0.07469719399219478, "learning_rate": 3.3297940320637924e-06, "loss": 0.516, "step": 10015 }, { "epoch": 4.871489361702127, "grad_norm": 0.07351274318456377, "learning_rate": 3.3288928163273344e-06, "loss": 0.5227, "step": 10016 }, { "epoch": 4.871975683890578, "grad_norm": 0.07493342586633125, "learning_rate": 3.327991661704024e-06, "loss": 0.503, "step": 10017 }, { "epoch": 4.872462006079028, "grad_norm": 0.07695294657415475, "learning_rate": 3.327090568226821e-06, "loss": 0.5147, "step": 10018 }, { "epoch": 4.872948328267477, "grad_norm": 0.07758095855370004, "learning_rate": 3.326189535928674e-06, "loss": 0.5057, "step": 10019 }, { "epoch": 4.873434650455927, "grad_norm": 0.07446454933886645, "learning_rate": 3.325288564842537e-06, "loss": 0.5268, "step": 10020 }, { "epoch": 4.873920972644377, "grad_norm": 0.07250081048624252, "learning_rate": 3.3243876550013566e-06, "loss": 0.4942, "step": 10021 }, { "epoch": 4.874407294832826, "grad_norm": 0.07445212591946888, "learning_rate": 3.323486806438083e-06, "loss": 0.4922, "step": 10022 }, { "epoch": 4.874893617021277, "grad_norm": 0.07092277499178194, "learning_rate": 3.322586019185657e-06, "loss": 0.4628, "step": 10023 }, { "epoch": 4.875379939209727, "grad_norm": 0.07440357078776229, "learning_rate": 3.3216852932770228e-06, "loss": 0.5082, "step": 10024 }, { "epoch": 4.875866261398176, "grad_norm": 0.07487056505890616, "learning_rate": 3.3207846287451194e-06, "loss": 0.4967, "step": 10025 }, { "epoch": 4.876352583586626, "grad_norm": 0.07572685907517213, "learning_rate": 3.319884025622887e-06, "loss": 0.5155, "step": 10026 }, { "epoch": 4.876838905775076, "grad_norm": 0.07700505800149454, "learning_rate": 3.3189834839432565e-06, "loss": 0.5287, "step": 10027 }, { "epoch": 4.877325227963526, "grad_norm": 0.0725051245910781, "learning_rate": 3.3180830037391666e-06, "loss": 0.5011, "step": 10028 }, { "epoch": 4.877811550151976, "grad_norm": 0.07264478767315147, "learning_rate": 3.317182585043543e-06, "loss": 0.5021, "step": 10029 }, { "epoch": 4.878297872340426, "grad_norm": 0.07840865554245192, "learning_rate": 3.316282227889318e-06, "loss": 0.5373, "step": 10030 }, { "epoch": 4.878784194528875, "grad_norm": 0.07650653821435979, "learning_rate": 3.315381932309415e-06, "loss": 0.5178, "step": 10031 }, { "epoch": 4.879270516717325, "grad_norm": 0.07223226121659362, "learning_rate": 3.3144816983367634e-06, "loss": 0.5381, "step": 10032 }, { "epoch": 4.8797568389057755, "grad_norm": 0.07303058812566782, "learning_rate": 3.3135815260042792e-06, "loss": 0.5071, "step": 10033 }, { "epoch": 4.880243161094225, "grad_norm": 0.07416992883607613, "learning_rate": 3.3126814153448856e-06, "loss": 0.5213, "step": 10034 }, { "epoch": 4.880729483282675, "grad_norm": 0.07381039134191224, "learning_rate": 3.3117813663914984e-06, "loss": 0.5369, "step": 10035 }, { "epoch": 4.8812158054711245, "grad_norm": 0.07642139422645766, "learning_rate": 3.3108813791770356e-06, "loss": 0.5451, "step": 10036 }, { "epoch": 4.881702127659574, "grad_norm": 0.07432602863555783, "learning_rate": 3.309981453734406e-06, "loss": 0.4941, "step": 10037 }, { "epoch": 4.882188449848025, "grad_norm": 0.07503644792132684, "learning_rate": 3.3090815900965234e-06, "loss": 0.5437, "step": 10038 }, { "epoch": 4.8826747720364745, "grad_norm": 0.07358503992661927, "learning_rate": 3.3081817882962946e-06, "loss": 0.507, "step": 10039 }, { "epoch": 4.883161094224924, "grad_norm": 0.07534949257411974, "learning_rate": 3.307282048366627e-06, "loss": 0.5251, "step": 10040 }, { "epoch": 4.883647416413374, "grad_norm": 0.07273369234153167, "learning_rate": 3.306382370340425e-06, "loss": 0.4796, "step": 10041 }, { "epoch": 4.8841337386018235, "grad_norm": 0.07263800851275007, "learning_rate": 3.3054827542505874e-06, "loss": 0.5413, "step": 10042 }, { "epoch": 4.884620060790273, "grad_norm": 0.07294296729047788, "learning_rate": 3.304583200130017e-06, "loss": 0.5227, "step": 10043 }, { "epoch": 4.885106382978723, "grad_norm": 0.07403974302333012, "learning_rate": 3.303683708011608e-06, "loss": 0.5015, "step": 10044 }, { "epoch": 4.8855927051671735, "grad_norm": 0.07642653557864701, "learning_rate": 3.302784277928257e-06, "loss": 0.5215, "step": 10045 }, { "epoch": 4.886079027355623, "grad_norm": 0.07484165392494449, "learning_rate": 3.301884909912855e-06, "loss": 0.4965, "step": 10046 }, { "epoch": 4.886565349544073, "grad_norm": 0.07195841038166434, "learning_rate": 3.300985603998296e-06, "loss": 0.5064, "step": 10047 }, { "epoch": 4.8870516717325225, "grad_norm": 0.07043711799280249, "learning_rate": 3.3000863602174626e-06, "loss": 0.4924, "step": 10048 }, { "epoch": 4.887537993920972, "grad_norm": 0.07562753975290293, "learning_rate": 3.299187178603244e-06, "loss": 0.5416, "step": 10049 }, { "epoch": 4.888024316109423, "grad_norm": 0.07188691157298088, "learning_rate": 3.2982880591885227e-06, "loss": 0.5282, "step": 10050 }, { "epoch": 4.8885106382978725, "grad_norm": 0.07309477649351301, "learning_rate": 3.297389002006182e-06, "loss": 0.4851, "step": 10051 }, { "epoch": 4.888996960486322, "grad_norm": 0.07450735668268164, "learning_rate": 3.2964900070890973e-06, "loss": 0.5552, "step": 10052 }, { "epoch": 4.889483282674772, "grad_norm": 0.07433251156736057, "learning_rate": 3.2955910744701485e-06, "loss": 0.5469, "step": 10053 }, { "epoch": 4.8899696048632215, "grad_norm": 0.0742850319957969, "learning_rate": 3.294692204182207e-06, "loss": 0.513, "step": 10054 }, { "epoch": 4.890455927051672, "grad_norm": 0.07351676782551884, "learning_rate": 3.293793396258147e-06, "loss": 0.5207, "step": 10055 }, { "epoch": 4.890942249240122, "grad_norm": 0.07347130313129643, "learning_rate": 3.2928946507308367e-06, "loss": 0.4867, "step": 10056 }, { "epoch": 4.8914285714285715, "grad_norm": 0.07241237959784527, "learning_rate": 3.2919959676331464e-06, "loss": 0.5066, "step": 10057 }, { "epoch": 4.891914893617021, "grad_norm": 0.07529887467471853, "learning_rate": 3.291097346997938e-06, "loss": 0.5153, "step": 10058 }, { "epoch": 4.892401215805471, "grad_norm": 0.0770271100472494, "learning_rate": 3.2901987888580767e-06, "loss": 0.5277, "step": 10059 }, { "epoch": 4.892887537993921, "grad_norm": 0.0749990216451476, "learning_rate": 3.2893002932464215e-06, "loss": 0.5259, "step": 10060 }, { "epoch": 4.893373860182371, "grad_norm": 0.07684437899549906, "learning_rate": 3.288401860195834e-06, "loss": 0.5196, "step": 10061 }, { "epoch": 4.893860182370821, "grad_norm": 0.0754757202453132, "learning_rate": 3.2875034897391656e-06, "loss": 0.5005, "step": 10062 }, { "epoch": 4.8943465045592704, "grad_norm": 0.07354082339836521, "learning_rate": 3.2866051819092743e-06, "loss": 0.4975, "step": 10063 }, { "epoch": 4.89483282674772, "grad_norm": 0.07557988352935968, "learning_rate": 3.285706936739008e-06, "loss": 0.5254, "step": 10064 }, { "epoch": 4.895319148936171, "grad_norm": 0.07728608958968981, "learning_rate": 3.2848087542612204e-06, "loss": 0.5415, "step": 10065 }, { "epoch": 4.89580547112462, "grad_norm": 0.07377136328852381, "learning_rate": 3.2839106345087545e-06, "loss": 0.527, "step": 10066 }, { "epoch": 4.89629179331307, "grad_norm": 0.0730167069055218, "learning_rate": 3.283012577514456e-06, "loss": 0.494, "step": 10067 }, { "epoch": 4.89677811550152, "grad_norm": 0.07773378427797767, "learning_rate": 3.282114583311169e-06, "loss": 0.5595, "step": 10068 }, { "epoch": 4.897264437689969, "grad_norm": 0.07513939358252131, "learning_rate": 3.281216651931731e-06, "loss": 0.4905, "step": 10069 }, { "epoch": 4.897750759878419, "grad_norm": 0.0732380198628978, "learning_rate": 3.280318783408981e-06, "loss": 0.5069, "step": 10070 }, { "epoch": 4.898237082066869, "grad_norm": 0.07626974749959135, "learning_rate": 3.279420977775754e-06, "loss": 0.5334, "step": 10071 }, { "epoch": 4.898723404255319, "grad_norm": 0.07630598532652054, "learning_rate": 3.2785232350648854e-06, "loss": 0.534, "step": 10072 }, { "epoch": 4.899209726443769, "grad_norm": 0.07428312400051716, "learning_rate": 3.2776255553092024e-06, "loss": 0.513, "step": 10073 }, { "epoch": 4.899696048632219, "grad_norm": 0.07324659078790577, "learning_rate": 3.2767279385415364e-06, "loss": 0.5226, "step": 10074 }, { "epoch": 4.900182370820668, "grad_norm": 0.0739185759618749, "learning_rate": 3.2758303847947114e-06, "loss": 0.4917, "step": 10075 }, { "epoch": 4.900668693009118, "grad_norm": 0.07759010118045165, "learning_rate": 3.2749328941015545e-06, "loss": 0.5467, "step": 10076 }, { "epoch": 4.901155015197569, "grad_norm": 0.07798652395815736, "learning_rate": 3.2740354664948837e-06, "loss": 0.5237, "step": 10077 }, { "epoch": 4.901641337386018, "grad_norm": 0.07444816311336011, "learning_rate": 3.2731381020075204e-06, "loss": 0.5344, "step": 10078 }, { "epoch": 4.902127659574468, "grad_norm": 0.07338779078486492, "learning_rate": 3.2722408006722807e-06, "loss": 0.5008, "step": 10079 }, { "epoch": 4.902613981762918, "grad_norm": 0.07710639754871149, "learning_rate": 3.2713435625219813e-06, "loss": 0.5053, "step": 10080 }, { "epoch": 4.903100303951367, "grad_norm": 0.07568679094565293, "learning_rate": 3.2704463875894323e-06, "loss": 0.508, "step": 10081 }, { "epoch": 4.903586626139818, "grad_norm": 0.07462793941545953, "learning_rate": 3.2695492759074458e-06, "loss": 0.4915, "step": 10082 }, { "epoch": 4.904072948328268, "grad_norm": 0.07506896105135114, "learning_rate": 3.268652227508827e-06, "loss": 0.5358, "step": 10083 }, { "epoch": 4.904559270516717, "grad_norm": 0.07300692498025771, "learning_rate": 3.2677552424263836e-06, "loss": 0.5078, "step": 10084 }, { "epoch": 4.905045592705167, "grad_norm": 0.07199599761022066, "learning_rate": 3.2668583206929166e-06, "loss": 0.5135, "step": 10085 }, { "epoch": 4.905531914893617, "grad_norm": 0.07048441485612092, "learning_rate": 3.2659614623412305e-06, "loss": 0.4819, "step": 10086 }, { "epoch": 4.906018237082067, "grad_norm": 0.07472383236313225, "learning_rate": 3.2650646674041196e-06, "loss": 0.4994, "step": 10087 }, { "epoch": 4.906504559270517, "grad_norm": 0.07254862112445593, "learning_rate": 3.264167935914383e-06, "loss": 0.4966, "step": 10088 }, { "epoch": 4.906990881458967, "grad_norm": 0.07543274313869557, "learning_rate": 3.2632712679048127e-06, "loss": 0.5213, "step": 10089 }, { "epoch": 4.907477203647416, "grad_norm": 0.07401936546812483, "learning_rate": 3.2623746634082034e-06, "loss": 0.5354, "step": 10090 }, { "epoch": 4.907963525835866, "grad_norm": 0.07464742325826529, "learning_rate": 3.26147812245734e-06, "loss": 0.49, "step": 10091 }, { "epoch": 4.908449848024317, "grad_norm": 0.07238322343079291, "learning_rate": 3.2605816450850116e-06, "loss": 0.511, "step": 10092 }, { "epoch": 4.908936170212766, "grad_norm": 0.07503912469681893, "learning_rate": 3.259685231324003e-06, "loss": 0.53, "step": 10093 }, { "epoch": 4.909422492401216, "grad_norm": 0.0746899302485398, "learning_rate": 3.2587888812070956e-06, "loss": 0.5075, "step": 10094 }, { "epoch": 4.909908814589666, "grad_norm": 0.07299117729776937, "learning_rate": 3.2578925947670716e-06, "loss": 0.5068, "step": 10095 }, { "epoch": 4.910395136778115, "grad_norm": 0.07129649205902233, "learning_rate": 3.256996372036705e-06, "loss": 0.5028, "step": 10096 }, { "epoch": 4.910881458966565, "grad_norm": 0.07523870673689492, "learning_rate": 3.256100213048775e-06, "loss": 0.4752, "step": 10097 }, { "epoch": 4.911367781155015, "grad_norm": 0.07342438861814998, "learning_rate": 3.255204117836051e-06, "loss": 0.5293, "step": 10098 }, { "epoch": 4.911854103343465, "grad_norm": 0.0728246775435652, "learning_rate": 3.254308086431306e-06, "loss": 0.5136, "step": 10099 }, { "epoch": 4.912340425531915, "grad_norm": 0.07520794201256967, "learning_rate": 3.2534121188673064e-06, "loss": 0.5508, "step": 10100 }, { "epoch": 4.912826747720365, "grad_norm": 0.07010348019175214, "learning_rate": 3.25251621517682e-06, "loss": 0.457, "step": 10101 }, { "epoch": 4.913313069908814, "grad_norm": 0.07526788885435415, "learning_rate": 3.251620375392609e-06, "loss": 0.5425, "step": 10102 }, { "epoch": 4.913799392097264, "grad_norm": 0.0732285865228861, "learning_rate": 3.2507245995474353e-06, "loss": 0.5126, "step": 10103 }, { "epoch": 4.914285714285715, "grad_norm": 0.07363440283947675, "learning_rate": 3.249828887674057e-06, "loss": 0.5315, "step": 10104 }, { "epoch": 4.914772036474164, "grad_norm": 0.0735967361530456, "learning_rate": 3.248933239805233e-06, "loss": 0.5196, "step": 10105 }, { "epoch": 4.915258358662614, "grad_norm": 0.07490431315455255, "learning_rate": 3.2480376559737147e-06, "loss": 0.5221, "step": 10106 }, { "epoch": 4.915744680851064, "grad_norm": 0.07463721848508649, "learning_rate": 3.247142136212257e-06, "loss": 0.5291, "step": 10107 }, { "epoch": 4.916231003039513, "grad_norm": 0.07362034506764235, "learning_rate": 3.2462466805536058e-06, "loss": 0.5475, "step": 10108 }, { "epoch": 4.916717325227964, "grad_norm": 0.07352108763488108, "learning_rate": 3.245351289030511e-06, "loss": 0.5018, "step": 10109 }, { "epoch": 4.917203647416414, "grad_norm": 0.07171146209607958, "learning_rate": 3.244455961675716e-06, "loss": 0.4963, "step": 10110 }, { "epoch": 4.917689969604863, "grad_norm": 0.07248580847278054, "learning_rate": 3.243560698521966e-06, "loss": 0.5262, "step": 10111 }, { "epoch": 4.918176291793313, "grad_norm": 0.07073569024089672, "learning_rate": 3.2426654996019967e-06, "loss": 0.4937, "step": 10112 }, { "epoch": 4.918662613981763, "grad_norm": 0.07259405070735103, "learning_rate": 3.2417703649485504e-06, "loss": 0.504, "step": 10113 }, { "epoch": 4.919148936170213, "grad_norm": 0.0735226674894187, "learning_rate": 3.240875294594359e-06, "loss": 0.4856, "step": 10114 }, { "epoch": 4.919635258358663, "grad_norm": 0.07465883809508439, "learning_rate": 3.2399802885721597e-06, "loss": 0.5302, "step": 10115 }, { "epoch": 4.920121580547113, "grad_norm": 0.07843580298466458, "learning_rate": 3.2390853469146787e-06, "loss": 0.5714, "step": 10116 }, { "epoch": 4.920607902735562, "grad_norm": 0.0724506693349241, "learning_rate": 3.2381904696546474e-06, "loss": 0.489, "step": 10117 }, { "epoch": 4.921094224924012, "grad_norm": 0.07687938449118557, "learning_rate": 3.2372956568247905e-06, "loss": 0.5024, "step": 10118 }, { "epoch": 4.921580547112462, "grad_norm": 0.07127194609043487, "learning_rate": 3.236400908457832e-06, "loss": 0.516, "step": 10119 }, { "epoch": 4.922066869300912, "grad_norm": 0.07384623629316198, "learning_rate": 3.2355062245864953e-06, "loss": 0.4891, "step": 10120 }, { "epoch": 4.922553191489362, "grad_norm": 0.0752684506219215, "learning_rate": 3.234611605243496e-06, "loss": 0.5226, "step": 10121 }, { "epoch": 4.923039513677812, "grad_norm": 0.07285626149931103, "learning_rate": 3.2337170504615545e-06, "loss": 0.4961, "step": 10122 }, { "epoch": 4.923525835866261, "grad_norm": 0.07530193598218167, "learning_rate": 3.23282256027338e-06, "loss": 0.5083, "step": 10123 }, { "epoch": 4.924012158054711, "grad_norm": 0.07387518829505471, "learning_rate": 3.2319281347116895e-06, "loss": 0.5123, "step": 10124 }, { "epoch": 4.924498480243161, "grad_norm": 0.07462073428969465, "learning_rate": 3.231033773809189e-06, "loss": 0.5064, "step": 10125 }, { "epoch": 4.924984802431611, "grad_norm": 0.07607156429184778, "learning_rate": 3.230139477598588e-06, "loss": 0.5319, "step": 10126 }, { "epoch": 4.925471124620061, "grad_norm": 0.07402970795567006, "learning_rate": 3.22924524611259e-06, "loss": 0.5242, "step": 10127 }, { "epoch": 4.9259574468085106, "grad_norm": 0.07162298414438688, "learning_rate": 3.2283510793838977e-06, "loss": 0.5269, "step": 10128 }, { "epoch": 4.92644376899696, "grad_norm": 0.07636262016561529, "learning_rate": 3.2274569774452112e-06, "loss": 0.5219, "step": 10129 }, { "epoch": 4.92693009118541, "grad_norm": 0.07200005768216096, "learning_rate": 3.22656294032923e-06, "loss": 0.4801, "step": 10130 }, { "epoch": 4.9274164133738605, "grad_norm": 0.07878081794081868, "learning_rate": 3.225668968068645e-06, "loss": 0.5217, "step": 10131 }, { "epoch": 4.92790273556231, "grad_norm": 0.07688272859795572, "learning_rate": 3.224775060696154e-06, "loss": 0.4953, "step": 10132 }, { "epoch": 4.92838905775076, "grad_norm": 0.0750831097317653, "learning_rate": 3.2238812182444444e-06, "loss": 0.5262, "step": 10133 }, { "epoch": 4.9288753799392095, "grad_norm": 0.07767257636475636, "learning_rate": 3.222987440746207e-06, "loss": 0.5506, "step": 10134 }, { "epoch": 4.929361702127659, "grad_norm": 0.07504626140201265, "learning_rate": 3.2220937282341247e-06, "loss": 0.519, "step": 10135 }, { "epoch": 4.92984802431611, "grad_norm": 0.07438335940981164, "learning_rate": 3.221200080740885e-06, "loss": 0.5214, "step": 10136 }, { "epoch": 4.9303343465045595, "grad_norm": 0.07355184298357463, "learning_rate": 3.220306498299164e-06, "loss": 0.5164, "step": 10137 }, { "epoch": 4.930820668693009, "grad_norm": 0.07494156930823387, "learning_rate": 3.2194129809416437e-06, "loss": 0.5142, "step": 10138 }, { "epoch": 4.931306990881459, "grad_norm": 0.07293953779961893, "learning_rate": 3.2185195287009984e-06, "loss": 0.4997, "step": 10139 }, { "epoch": 4.9317933130699085, "grad_norm": 0.07183619574176704, "learning_rate": 3.217626141609906e-06, "loss": 0.4934, "step": 10140 }, { "epoch": 4.932279635258359, "grad_norm": 0.0761620888974068, "learning_rate": 3.2167328197010326e-06, "loss": 0.5443, "step": 10141 }, { "epoch": 4.932765957446809, "grad_norm": 0.07170636422325165, "learning_rate": 3.2158395630070514e-06, "loss": 0.5016, "step": 10142 }, { "epoch": 4.9332522796352585, "grad_norm": 0.0787232549881691, "learning_rate": 3.214946371560628e-06, "loss": 0.5542, "step": 10143 }, { "epoch": 4.933738601823708, "grad_norm": 0.07203952489052931, "learning_rate": 3.214053245394425e-06, "loss": 0.4853, "step": 10144 }, { "epoch": 4.934224924012158, "grad_norm": 0.07204912019497156, "learning_rate": 3.2131601845411096e-06, "loss": 0.4835, "step": 10145 }, { "epoch": 4.9347112462006075, "grad_norm": 0.0739552499606694, "learning_rate": 3.212267189033336e-06, "loss": 0.5126, "step": 10146 }, { "epoch": 4.935197568389058, "grad_norm": 0.07369019119675618, "learning_rate": 3.211374258903765e-06, "loss": 0.514, "step": 10147 }, { "epoch": 4.935683890577508, "grad_norm": 0.0716321018893341, "learning_rate": 3.2104813941850475e-06, "loss": 0.5125, "step": 10148 }, { "epoch": 4.9361702127659575, "grad_norm": 0.07250159981829861, "learning_rate": 3.2095885949098405e-06, "loss": 0.5018, "step": 10149 }, { "epoch": 4.936656534954407, "grad_norm": 0.07586676834909536, "learning_rate": 3.2086958611107906e-06, "loss": 0.5143, "step": 10150 }, { "epoch": 4.937142857142857, "grad_norm": 0.07368063094769385, "learning_rate": 3.207803192820549e-06, "loss": 0.5057, "step": 10151 }, { "epoch": 4.9376291793313065, "grad_norm": 0.07673472289939451, "learning_rate": 3.2069105900717566e-06, "loss": 0.5282, "step": 10152 }, { "epoch": 4.938115501519757, "grad_norm": 0.07544132688355634, "learning_rate": 3.2060180528970597e-06, "loss": 0.4961, "step": 10153 }, { "epoch": 4.938601823708207, "grad_norm": 0.07531378028171364, "learning_rate": 3.205125581329096e-06, "loss": 0.5171, "step": 10154 }, { "epoch": 4.9390881458966565, "grad_norm": 0.0755485059959871, "learning_rate": 3.2042331754005084e-06, "loss": 0.5226, "step": 10155 }, { "epoch": 4.939574468085106, "grad_norm": 0.07715451396575607, "learning_rate": 3.2033408351439265e-06, "loss": 0.5121, "step": 10156 }, { "epoch": 4.940060790273556, "grad_norm": 0.07711850605945271, "learning_rate": 3.202448560591988e-06, "loss": 0.4921, "step": 10157 }, { "epoch": 4.940547112462006, "grad_norm": 0.07394846259410856, "learning_rate": 3.2015563517773214e-06, "loss": 0.5395, "step": 10158 }, { "epoch": 4.941033434650456, "grad_norm": 0.07226930212961101, "learning_rate": 3.200664208732558e-06, "loss": 0.5134, "step": 10159 }, { "epoch": 4.941519756838906, "grad_norm": 0.07238613601785343, "learning_rate": 3.1997721314903195e-06, "loss": 0.5036, "step": 10160 }, { "epoch": 4.9420060790273554, "grad_norm": 0.07173746792858594, "learning_rate": 3.1988801200832344e-06, "loss": 0.4825, "step": 10161 }, { "epoch": 4.942492401215805, "grad_norm": 0.07365118290774321, "learning_rate": 3.19798817454392e-06, "loss": 0.4953, "step": 10162 }, { "epoch": 4.942978723404256, "grad_norm": 0.07306819499163708, "learning_rate": 3.1970962949049973e-06, "loss": 0.499, "step": 10163 }, { "epoch": 4.943465045592705, "grad_norm": 0.0735579642748732, "learning_rate": 3.196204481199081e-06, "loss": 0.524, "step": 10164 }, { "epoch": 4.943951367781155, "grad_norm": 0.07421169026414505, "learning_rate": 3.1953127334587887e-06, "loss": 0.5064, "step": 10165 }, { "epoch": 4.944437689969605, "grad_norm": 0.07408113074226359, "learning_rate": 3.194421051716727e-06, "loss": 0.5262, "step": 10166 }, { "epoch": 4.944924012158054, "grad_norm": 0.07443714560090055, "learning_rate": 3.1935294360055096e-06, "loss": 0.4973, "step": 10167 }, { "epoch": 4.945410334346505, "grad_norm": 0.07252831821766367, "learning_rate": 3.1926378863577403e-06, "loss": 0.4946, "step": 10168 }, { "epoch": 4.945896656534955, "grad_norm": 0.07617294902513563, "learning_rate": 3.1917464028060262e-06, "loss": 0.5361, "step": 10169 }, { "epoch": 4.946382978723404, "grad_norm": 0.07001175281472896, "learning_rate": 3.1908549853829664e-06, "loss": 0.474, "step": 10170 }, { "epoch": 4.946869300911854, "grad_norm": 0.07552514847825104, "learning_rate": 3.1899636341211604e-06, "loss": 0.5236, "step": 10171 }, { "epoch": 4.947355623100304, "grad_norm": 0.07545566733202758, "learning_rate": 3.189072349053209e-06, "loss": 0.5343, "step": 10172 }, { "epoch": 4.947841945288753, "grad_norm": 0.07408979041791526, "learning_rate": 3.1881811302117025e-06, "loss": 0.5412, "step": 10173 }, { "epoch": 4.948328267477204, "grad_norm": 0.0763076222792267, "learning_rate": 3.1872899776292382e-06, "loss": 0.5286, "step": 10174 }, { "epoch": 4.948814589665654, "grad_norm": 0.07303290464238386, "learning_rate": 3.1863988913384002e-06, "loss": 0.5049, "step": 10175 }, { "epoch": 4.949300911854103, "grad_norm": 0.07474930552380307, "learning_rate": 3.1855078713717815e-06, "loss": 0.5376, "step": 10176 }, { "epoch": 4.949787234042553, "grad_norm": 0.07125295908755164, "learning_rate": 3.1846169177619614e-06, "loss": 0.5, "step": 10177 }, { "epoch": 4.950273556231003, "grad_norm": 0.0734276806463007, "learning_rate": 3.1837260305415267e-06, "loss": 0.5499, "step": 10178 }, { "epoch": 4.950759878419452, "grad_norm": 0.07494445845930545, "learning_rate": 3.182835209743056e-06, "loss": 0.5237, "step": 10179 }, { "epoch": 4.951246200607903, "grad_norm": 0.07153233260783684, "learning_rate": 3.1819444553991287e-06, "loss": 0.4649, "step": 10180 }, { "epoch": 4.951732522796353, "grad_norm": 0.07315808378702987, "learning_rate": 3.181053767542316e-06, "loss": 0.4887, "step": 10181 }, { "epoch": 4.952218844984802, "grad_norm": 0.07158873673318761, "learning_rate": 3.180163146205195e-06, "loss": 0.5136, "step": 10182 }, { "epoch": 4.952705167173252, "grad_norm": 0.07326766036441759, "learning_rate": 3.1792725914203337e-06, "loss": 0.5262, "step": 10183 }, { "epoch": 4.953191489361702, "grad_norm": 0.07537848034634737, "learning_rate": 3.178382103220302e-06, "loss": 0.5084, "step": 10184 }, { "epoch": 4.953677811550152, "grad_norm": 0.07422608631054724, "learning_rate": 3.177491681637663e-06, "loss": 0.5131, "step": 10185 }, { "epoch": 4.954164133738602, "grad_norm": 0.07841547142015454, "learning_rate": 3.1766013267049827e-06, "loss": 0.5635, "step": 10186 }, { "epoch": 4.954650455927052, "grad_norm": 0.07758471451620878, "learning_rate": 3.175711038454819e-06, "loss": 0.4747, "step": 10187 }, { "epoch": 4.955136778115501, "grad_norm": 0.07439273861185047, "learning_rate": 3.1748208169197336e-06, "loss": 0.542, "step": 10188 }, { "epoch": 4.955623100303951, "grad_norm": 0.07556239145644843, "learning_rate": 3.1739306621322776e-06, "loss": 0.5165, "step": 10189 }, { "epoch": 4.956109422492402, "grad_norm": 0.07124557854570239, "learning_rate": 3.1730405741250093e-06, "loss": 0.4602, "step": 10190 }, { "epoch": 4.956595744680851, "grad_norm": 0.07202594429471039, "learning_rate": 3.172150552930475e-06, "loss": 0.4994, "step": 10191 }, { "epoch": 4.957082066869301, "grad_norm": 0.07486785958375644, "learning_rate": 3.171260598581227e-06, "loss": 0.5152, "step": 10192 }, { "epoch": 4.957568389057751, "grad_norm": 0.0745513198162049, "learning_rate": 3.170370711109808e-06, "loss": 0.5126, "step": 10193 }, { "epoch": 4.9580547112462, "grad_norm": 0.072016591137042, "learning_rate": 3.1694808905487658e-06, "loss": 0.4654, "step": 10194 }, { "epoch": 4.958541033434651, "grad_norm": 0.07376451329851894, "learning_rate": 3.1685911369306364e-06, "loss": 0.4923, "step": 10195 }, { "epoch": 4.959027355623101, "grad_norm": 0.07281783344501232, "learning_rate": 3.167701450287962e-06, "loss": 0.5043, "step": 10196 }, { "epoch": 4.95951367781155, "grad_norm": 0.07409012176544998, "learning_rate": 3.1668118306532786e-06, "loss": 0.5078, "step": 10197 }, { "epoch": 4.96, "grad_norm": 0.07624964578884641, "learning_rate": 3.165922278059118e-06, "loss": 0.5203, "step": 10198 }, { "epoch": 4.96048632218845, "grad_norm": 0.07382328500562757, "learning_rate": 3.1650327925380164e-06, "loss": 0.5086, "step": 10199 }, { "epoch": 4.960972644376899, "grad_norm": 0.07138799535388599, "learning_rate": 3.1641433741224957e-06, "loss": 0.4902, "step": 10200 }, { "epoch": 4.96145896656535, "grad_norm": 0.07360831497654938, "learning_rate": 3.1632540228450887e-06, "loss": 0.5264, "step": 10201 }, { "epoch": 4.9619452887538, "grad_norm": 0.07915917538342591, "learning_rate": 3.1623647387383143e-06, "loss": 0.5251, "step": 10202 }, { "epoch": 4.962431610942249, "grad_norm": 0.07461986944318749, "learning_rate": 3.161475521834697e-06, "loss": 0.5148, "step": 10203 }, { "epoch": 4.962917933130699, "grad_norm": 0.07463837093952712, "learning_rate": 3.160586372166755e-06, "loss": 0.5185, "step": 10204 }, { "epoch": 4.963404255319149, "grad_norm": 0.07439308370997685, "learning_rate": 3.1596972897670063e-06, "loss": 0.5157, "step": 10205 }, { "epoch": 4.963890577507598, "grad_norm": 0.07399661921538865, "learning_rate": 3.158808274667962e-06, "loss": 0.4994, "step": 10206 }, { "epoch": 4.964376899696049, "grad_norm": 0.07609952851204221, "learning_rate": 3.157919326902137e-06, "loss": 0.4967, "step": 10207 }, { "epoch": 4.964863221884499, "grad_norm": 0.07582631828125783, "learning_rate": 3.1570304465020374e-06, "loss": 0.5445, "step": 10208 }, { "epoch": 4.965349544072948, "grad_norm": 0.07468550093963576, "learning_rate": 3.1561416335001737e-06, "loss": 0.5207, "step": 10209 }, { "epoch": 4.965835866261398, "grad_norm": 0.07058795260886852, "learning_rate": 3.155252887929047e-06, "loss": 0.4739, "step": 10210 }, { "epoch": 4.966322188449848, "grad_norm": 0.07460639061899049, "learning_rate": 3.1543642098211606e-06, "loss": 0.5112, "step": 10211 }, { "epoch": 4.966808510638298, "grad_norm": 0.0717316701008223, "learning_rate": 3.1534755992090126e-06, "loss": 0.4785, "step": 10212 }, { "epoch": 4.967294832826748, "grad_norm": 0.07343705942991076, "learning_rate": 3.152587056125103e-06, "loss": 0.5223, "step": 10213 }, { "epoch": 4.967781155015198, "grad_norm": 0.07518200298946358, "learning_rate": 3.1516985806019225e-06, "loss": 0.5536, "step": 10214 }, { "epoch": 4.968267477203647, "grad_norm": 0.07279037354171893, "learning_rate": 3.150810172671966e-06, "loss": 0.4943, "step": 10215 }, { "epoch": 4.968753799392097, "grad_norm": 0.07516409804308415, "learning_rate": 3.1499218323677196e-06, "loss": 0.5121, "step": 10216 }, { "epoch": 4.9692401215805475, "grad_norm": 0.07365436465231501, "learning_rate": 3.149033559721674e-06, "loss": 0.5311, "step": 10217 }, { "epoch": 4.969726443768997, "grad_norm": 0.07452446780337373, "learning_rate": 3.14814535476631e-06, "loss": 0.507, "step": 10218 }, { "epoch": 4.970212765957447, "grad_norm": 0.07253757268717734, "learning_rate": 3.1472572175341145e-06, "loss": 0.49, "step": 10219 }, { "epoch": 4.970699088145897, "grad_norm": 0.07014285600943093, "learning_rate": 3.146369148057562e-06, "loss": 0.4777, "step": 10220 }, { "epoch": 4.971185410334346, "grad_norm": 0.07166862751340183, "learning_rate": 3.1454811463691334e-06, "loss": 0.4994, "step": 10221 }, { "epoch": 4.971671732522797, "grad_norm": 0.07294772702853751, "learning_rate": 3.1445932125013002e-06, "loss": 0.4909, "step": 10222 }, { "epoch": 4.9721580547112465, "grad_norm": 0.074094172505211, "learning_rate": 3.1437053464865363e-06, "loss": 0.5181, "step": 10223 }, { "epoch": 4.972644376899696, "grad_norm": 0.07493546783834686, "learning_rate": 3.142817548357313e-06, "loss": 0.4958, "step": 10224 }, { "epoch": 4.973130699088146, "grad_norm": 0.07312614116589011, "learning_rate": 3.1419298181460944e-06, "loss": 0.5213, "step": 10225 }, { "epoch": 4.9736170212765956, "grad_norm": 0.07346659318117896, "learning_rate": 3.141042155885348e-06, "loss": 0.5393, "step": 10226 }, { "epoch": 4.974103343465045, "grad_norm": 0.07307699657872137, "learning_rate": 3.1401545616075317e-06, "loss": 0.5085, "step": 10227 }, { "epoch": 4.974589665653496, "grad_norm": 0.07294189605911208, "learning_rate": 3.1392670353451114e-06, "loss": 0.5094, "step": 10228 }, { "epoch": 4.9750759878419455, "grad_norm": 0.07706253190101434, "learning_rate": 3.1383795771305386e-06, "loss": 0.5324, "step": 10229 }, { "epoch": 4.975562310030395, "grad_norm": 0.0769271314475107, "learning_rate": 3.137492186996273e-06, "loss": 0.535, "step": 10230 }, { "epoch": 4.976048632218845, "grad_norm": 0.07614367764655156, "learning_rate": 3.1366048649747617e-06, "loss": 0.5481, "step": 10231 }, { "epoch": 4.9765349544072945, "grad_norm": 0.0771138792937198, "learning_rate": 3.1357176110984578e-06, "loss": 0.536, "step": 10232 }, { "epoch": 4.977021276595744, "grad_norm": 0.0792233669774828, "learning_rate": 3.1348304253998074e-06, "loss": 0.5354, "step": 10233 }, { "epoch": 4.977507598784195, "grad_norm": 0.0708128986887633, "learning_rate": 3.133943307911257e-06, "loss": 0.4636, "step": 10234 }, { "epoch": 4.9779939209726445, "grad_norm": 0.07400958622695976, "learning_rate": 3.133056258665246e-06, "loss": 0.5052, "step": 10235 }, { "epoch": 4.978480243161094, "grad_norm": 0.070496052835644, "learning_rate": 3.132169277694217e-06, "loss": 0.5079, "step": 10236 }, { "epoch": 4.978966565349544, "grad_norm": 0.07565349747368681, "learning_rate": 3.1312823650306057e-06, "loss": 0.5169, "step": 10237 }, { "epoch": 4.9794528875379935, "grad_norm": 0.07214730059546492, "learning_rate": 3.130395520706848e-06, "loss": 0.5079, "step": 10238 }, { "epoch": 4.979939209726444, "grad_norm": 0.07300507813060274, "learning_rate": 3.1295087447553745e-06, "loss": 0.5026, "step": 10239 }, { "epoch": 4.980425531914894, "grad_norm": 0.07321225749951657, "learning_rate": 3.128622037208617e-06, "loss": 0.5162, "step": 10240 }, { "epoch": 4.9809118541033435, "grad_norm": 0.07221755145758249, "learning_rate": 3.1277353980990012e-06, "loss": 0.4863, "step": 10241 }, { "epoch": 4.981398176291793, "grad_norm": 0.07482020030019791, "learning_rate": 3.1268488274589526e-06, "loss": 0.5421, "step": 10242 }, { "epoch": 4.981884498480243, "grad_norm": 0.0730556729888338, "learning_rate": 3.1259623253208928e-06, "loss": 0.4942, "step": 10243 }, { "epoch": 4.982370820668693, "grad_norm": 0.07304073005168182, "learning_rate": 3.125075891717244e-06, "loss": 0.4978, "step": 10244 }, { "epoch": 4.982857142857143, "grad_norm": 0.07807132055053483, "learning_rate": 3.12418952668042e-06, "loss": 0.5137, "step": 10245 }, { "epoch": 4.983343465045593, "grad_norm": 0.07313450850260139, "learning_rate": 3.123303230242838e-06, "loss": 0.4758, "step": 10246 }, { "epoch": 4.9838297872340425, "grad_norm": 0.07123861478395989, "learning_rate": 3.122417002436908e-06, "loss": 0.4934, "step": 10247 }, { "epoch": 4.984316109422492, "grad_norm": 0.07408830345256412, "learning_rate": 3.1215308432950435e-06, "loss": 0.5336, "step": 10248 }, { "epoch": 4.984802431610943, "grad_norm": 0.07307369786628065, "learning_rate": 3.1206447528496477e-06, "loss": 0.5186, "step": 10249 }, { "epoch": 4.985288753799392, "grad_norm": 0.07068473962276077, "learning_rate": 3.1197587311331266e-06, "loss": 0.4555, "step": 10250 }, { "epoch": 4.985775075987842, "grad_norm": 0.07254233727661387, "learning_rate": 3.118872778177885e-06, "loss": 0.4922, "step": 10251 }, { "epoch": 4.986261398176292, "grad_norm": 0.07195916141458782, "learning_rate": 3.1179868940163187e-06, "loss": 0.4824, "step": 10252 }, { "epoch": 4.9867477203647415, "grad_norm": 0.07592802675085018, "learning_rate": 3.1171010786808286e-06, "loss": 0.5337, "step": 10253 }, { "epoch": 4.987234042553191, "grad_norm": 0.07340716911643105, "learning_rate": 3.116215332203806e-06, "loss": 0.5056, "step": 10254 }, { "epoch": 4.987720364741642, "grad_norm": 0.07485085680500117, "learning_rate": 3.115329654617647e-06, "loss": 0.5341, "step": 10255 }, { "epoch": 4.988206686930091, "grad_norm": 0.07423446292563808, "learning_rate": 3.1144440459547355e-06, "loss": 0.4985, "step": 10256 }, { "epoch": 4.988693009118541, "grad_norm": 0.071868463004274, "learning_rate": 3.113558506247464e-06, "loss": 0.4785, "step": 10257 }, { "epoch": 4.989179331306991, "grad_norm": 0.07450094166013828, "learning_rate": 3.112673035528213e-06, "loss": 0.5228, "step": 10258 }, { "epoch": 4.9896656534954404, "grad_norm": 0.07174762226281818, "learning_rate": 3.1117876338293697e-06, "loss": 0.5082, "step": 10259 }, { "epoch": 4.99015197568389, "grad_norm": 0.07311332853867696, "learning_rate": 3.110902301183307e-06, "loss": 0.493, "step": 10260 }, { "epoch": 4.990638297872341, "grad_norm": 0.074140761913313, "learning_rate": 3.110017037622408e-06, "loss": 0.498, "step": 10261 }, { "epoch": 4.99112462006079, "grad_norm": 0.07294891605603335, "learning_rate": 3.109131843179043e-06, "loss": 0.5275, "step": 10262 }, { "epoch": 4.99161094224924, "grad_norm": 0.07237190432044079, "learning_rate": 3.108246717885587e-06, "loss": 0.529, "step": 10263 }, { "epoch": 4.99209726443769, "grad_norm": 0.07622478406109423, "learning_rate": 3.107361661774406e-06, "loss": 0.5347, "step": 10264 }, { "epoch": 4.992583586626139, "grad_norm": 0.07292900283168355, "learning_rate": 3.1064766748778706e-06, "loss": 0.5103, "step": 10265 }, { "epoch": 4.99306990881459, "grad_norm": 0.07162934528368807, "learning_rate": 3.1055917572283423e-06, "loss": 0.5067, "step": 10266 }, { "epoch": 4.99355623100304, "grad_norm": 0.0772221657392243, "learning_rate": 3.104706908858186e-06, "loss": 0.558, "step": 10267 }, { "epoch": 4.994042553191489, "grad_norm": 0.07358215377524287, "learning_rate": 3.1038221297997574e-06, "loss": 0.5331, "step": 10268 }, { "epoch": 4.994528875379939, "grad_norm": 0.07254897051563992, "learning_rate": 3.1029374200854167e-06, "loss": 0.542, "step": 10269 }, { "epoch": 4.995015197568389, "grad_norm": 0.07197521975929773, "learning_rate": 3.102052779747514e-06, "loss": 0.4817, "step": 10270 }, { "epoch": 4.995501519756839, "grad_norm": 0.0726734972824184, "learning_rate": 3.101168208818405e-06, "loss": 0.4983, "step": 10271 }, { "epoch": 4.995987841945289, "grad_norm": 0.07549430565792285, "learning_rate": 3.100283707330436e-06, "loss": 0.5319, "step": 10272 }, { "epoch": 4.996474164133739, "grad_norm": 0.07407634278685511, "learning_rate": 3.099399275315957e-06, "loss": 0.519, "step": 10273 }, { "epoch": 4.996960486322188, "grad_norm": 0.07275511124006062, "learning_rate": 3.0985149128073083e-06, "loss": 0.4962, "step": 10274 }, { "epoch": 4.997446808510638, "grad_norm": 0.07493752613740207, "learning_rate": 3.097630619836833e-06, "loss": 0.5442, "step": 10275 }, { "epoch": 4.997933130699089, "grad_norm": 0.07215634510644309, "learning_rate": 3.096746396436871e-06, "loss": 0.5213, "step": 10276 }, { "epoch": 4.998419452887538, "grad_norm": 0.07142708886125362, "learning_rate": 3.0958622426397567e-06, "loss": 0.5065, "step": 10277 }, { "epoch": 4.998905775075988, "grad_norm": 0.07234281504213742, "learning_rate": 3.0949781584778284e-06, "loss": 0.4892, "step": 10278 }, { "epoch": 4.999392097264438, "grad_norm": 0.0747017809557893, "learning_rate": 3.094094143983411e-06, "loss": 0.5128, "step": 10279 }, { "epoch": 4.999878419452887, "grad_norm": 0.07223965839724003, "learning_rate": 3.0932101991888385e-06, "loss": 0.4924, "step": 10280 }, { "epoch": 4.999878419452887, "eval_loss": 0.5666021108627319, "eval_runtime": 105.1621, "eval_samples_per_second": 288.631, "eval_steps_per_second": 36.087, "step": 10280 } ], "logging_steps": 1, "max_steps": 16448, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 2056, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4684063684165632.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }